summaryrefslogtreecommitdiff
path: root/tools/arch/s390/include/uapi
diff options
context:
space:
mode:
Diffstat (limited to 'tools/arch/s390/include/uapi')
0 files changed, 0 insertions, 0 deletions
it/linux-arm.git/log/drivers/infiniband'>
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig69
-rw-r--r--drivers/infiniband/Makefile1
-rw-r--r--drivers/infiniband/core/Makefile37
-rw-r--r--drivers/infiniband/core/addr.c699
-rw-r--r--drivers/infiniband/core/agent.c39
-rw-r--r--drivers/infiniband/core/cache.c1756
-rw-r--r--drivers/infiniband/core/cgroup.c53
-rw-r--r--drivers/infiniband/core/cm.c3494
-rw-r--r--drivers/infiniband/core/cm_msgs.h798
-rw-r--r--drivers/infiniband/core/cm_trace.c15
-rw-r--r--drivers/infiniband/core/cm_trace.h414
-rw-r--r--drivers/infiniband/core/cma.c3727
-rw-r--r--drivers/infiniband/core/cma_configfs.c120
-rw-r--r--drivers/infiniband/core/cma_priv.h141
-rw-r--r--drivers/infiniband/core/cma_trace.c16
-rw-r--r--drivers/infiniband/core/cma_trace.h361
-rw-r--r--drivers/infiniband/core/core_priv.h314
-rw-r--r--drivers/infiniband/core/counters.c681
-rw-r--r--drivers/infiniband/core/cq.c398
-rw-r--r--drivers/infiniband/core/device.c2922
-rw-r--r--drivers/infiniband/core/fmr_pool.c520
-rw-r--r--drivers/infiniband/core/ib_core_uverbs.c367
-rw-r--r--drivers/infiniband/core/iwcm.c186
-rw-r--r--drivers/infiniband/core/iwcm.h2
-rw-r--r--drivers/infiniband/core/iwpm_msg.c308
-rw-r--r--drivers/infiniband/core/iwpm_util.c208
-rw-r--r--drivers/infiniband/core/iwpm_util.h40
-rw-r--r--drivers/infiniband/core/lag.c135
-rw-r--r--drivers/infiniband/core/mad.c1226
-rw-r--r--drivers/infiniband/core/mad_priv.h98
-rw-r--r--drivers/infiniband/core/mad_rmpp.c99
-rw-r--r--drivers/infiniband/core/mr_pool.c18
-rw-r--r--drivers/infiniband/core/multicast.c119
-rw-r--r--drivers/infiniband/core/netlink.c366
-rw-r--r--drivers/infiniband/core/nldev.c2920
-rw-r--r--drivers/infiniband/core/opa_smi.h8
-rw-r--r--drivers/infiniband/core/rdma_core.c1050
-rw-r--r--drivers/infiniband/core/rdma_core.h192
-rw-r--r--drivers/infiniband/core/restrack.c314
-rw-r--r--drivers/infiniband/core/restrack.h36
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c465
-rw-r--r--drivers/infiniband/core/rw.c343
-rw-r--r--drivers/infiniband/core/sa.h8
-rw-r--r--drivers/infiniband/core/sa_query.c1671
-rw-r--r--drivers/infiniband/core/security.c750
-rw-r--r--drivers/infiniband/core/smi.c12
-rw-r--r--drivers/infiniband/core/smi.h8
-rw-r--r--drivers/infiniband/core/sysfs.c1479
-rw-r--r--drivers/infiniband/core/trace.c12
-rw-r--r--drivers/infiniband/core/ucaps.c267
-rw-r--r--drivers/infiniband/core/ucm.c1375
-rw-r--r--drivers/infiniband/core/ucma.c1105
-rw-r--r--drivers/infiniband/core/ud_header.c85
-rw-r--r--drivers/infiniband/core/umem.c392
-rw-r--r--drivers/infiniband/core/umem_dmabuf.c275
-rw-r--r--drivers/infiniband/core/umem_odp.c885
-rw-r--r--drivers/infiniband/core/umem_rbtree.c94
-rw-r--r--drivers/infiniband/core/user_mad.c655
-rw-r--r--drivers/infiniband/core/uverbs.h261
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c4562
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c849
-rw-r--r--drivers/infiniband/core/uverbs_main.c1569
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c128
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c269
-rw-r--r--drivers/infiniband/core/uverbs_std_types_async_fd.c79
-rw-r--r--drivers/infiniband/core/uverbs_std_types_counters.c163
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c298
-rw-r--r--drivers/infiniband/core/uverbs_std_types_device.c508
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dm.c116
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dmah.c145
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c (renamed from drivers/infiniband/hw/qib/qib_pio_copy.c)58
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c553
-rw-r--r--drivers/infiniband/core/uverbs_std_types_qp.c380
-rw-r--r--drivers/infiniband/core/uverbs_std_types_srq.c234
-rw-r--r--drivers/infiniband/core/uverbs_std_types_wq.c194
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c735
-rw-r--r--drivers/infiniband/core/verbs.c2446
-rw-r--r--drivers/infiniband/hw/Makefile14
-rw-r--r--drivers/infiniband/hw/bng_re/Kconfig10
-rw-r--r--drivers/infiniband/hw/bng_re/Makefile8
-rw-r--r--drivers/infiniband/hw/bng_re/bng_debugfs.c39
-rw-r--r--drivers/infiniband/hw/bng_re/bng_debugfs.h12
-rw-r--r--drivers/infiniband/hw/bng_re/bng_dev.c534
-rw-r--r--drivers/infiniband/hw/bng_re/bng_fw.c767
-rw-r--r--drivers/infiniband/hw/bng_re/bng_fw.h211
-rw-r--r--drivers/infiniband/hw/bng_re/bng_re.h85
-rw-r--r--drivers/infiniband/hw/bng_re/bng_res.c279
-rw-r--r--drivers/infiniband/hw/bng_re/bng_res.h215
-rw-r--r--drivers/infiniband/hw/bng_re/bng_sp.c131
-rw-r--r--drivers/infiniband/hw/bng_re/bng_sp.h47
-rw-r--r--drivers/infiniband/hw/bng_re/bng_tlv.h128
-rw-r--r--drivers/infiniband/hw/bnxt_re/Kconfig9
-rw-r--r--drivers/infiniband/hw/bnxt_re/Makefile8
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h292
-rw-r--r--drivers/infiniband/hw/bnxt_re/debugfs.c524
-rw-r--r--drivers/infiniband/hw/bnxt_re/debugfs.h55
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c433
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.h165
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c4938
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h296
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c2614
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c3226
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h689
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c1202
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h294
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c955
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h626
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c1145
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h373
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_tlv.h162
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h4729
-rw-r--r--drivers/infiniband/hw/cxgb3/Kconfig27
-rw-r--r--drivers/infiniband/hw/cxgb3/Makefile8
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_dbg.c203
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c1343
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h211
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c343
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.h69
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h802
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c292
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h180
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c2272
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h233
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c229
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_ev.c232
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c101
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c1487
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h347
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c1082
-rw-r--r--drivers/infiniband/hw/cxgb3/tcb.h632
-rw-r--r--drivers/infiniband/hw/cxgb4/Kconfig6
-rw-r--r--drivers/infiniband/hw/cxgb4/Makefile8
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c1068
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c547
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c657
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c76
-rw-r--r--drivers/infiniband/hw/cxgb4/id_table.c22
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h375
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c425
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c568
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c1598
-rw-r--r--drivers/infiniband/hw/cxgb4/resource.c159
-rw-r--r--drivers/infiniband/hw/cxgb4/restrack.c487
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h233
-rw-r--r--drivers/infiniband/hw/cxgb4/t4fw_ri_api.h98
-rw-r--r--drivers/infiniband/hw/efa/Kconfig15
-rw-r--r--drivers/infiniband/hw/efa/Makefile9
-rw-r--r--drivers/infiniband/hw/efa/efa.h198
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h1140
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_defs.h175
-rw-r--r--drivers/infiniband/hw/efa/efa_com.c1255
-rw-r--r--drivers/infiniband/hw/efa/efa_com.h180
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.c838
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.h353
-rw-r--r--drivers/infiniband/hw/efa/efa_common_defs.h29
-rw-r--r--drivers/infiniband/hw/efa/efa_io_defs.h391
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c706
-rw-r--r--drivers/infiniband/hw/efa/efa_regs_defs.h101
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c2348
-rw-r--r--drivers/infiniband/hw/erdma/Kconfig12
-rw-r--r--drivers/infiniband/hw/erdma/Makefile4
-rw-r--r--drivers/infiniband/hw/erdma/erdma.h283
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cm.c1431
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cm.h167
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cmdq.c452
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cq.c268
-rw-r--r--drivers/infiniband/hw/erdma/erdma_eq.c326
-rw-r--r--drivers/infiniband/hw/erdma/erdma_hw.h753
-rw-r--r--drivers/infiniband/hw/erdma/erdma_main.c684
-rw-r--r--drivers/infiniband/hw/erdma/erdma_qp.c757
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.c2300
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.h491
-rw-r--r--drivers/infiniband/hw/hfi1/Kconfig20
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile61
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c718
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h68
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.c270
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.h296
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c2957
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h257
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h77
-rw-r--r--drivers/infiniband/hw/hfi1/common.h149
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c536
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.h90
-rw-r--r--drivers/infiniband/hw/hfi1/device.c123
-rw-r--r--drivers/infiniband/hw/hfi1/device.h49
-rw-r--r--drivers/infiniband/hw/hfi1/dma.c183
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c1324
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.c74
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.h45
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.c56
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.h44
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.c78
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.h171
-rw-r--r--drivers/infiniband/hw/hfi1/fault.c319
-rw-r--r--drivers/infiniband/hw/hfi1/fault.h69
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c1336
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c206
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h1450
-rw-r--r--drivers/infiniband/hw/hfi1/init.c1091
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c152
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.c128
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h355
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib.h171
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_main.c250
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_rx.c92
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_tx.c868
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c1432
-rw-r--r--drivers/infiniband/hw/hfi1/mad.h102
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c325
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.h90
-rw-r--r--drivers/infiniband/hw/hfi1/msix.c348
-rw-r--r--drivers/infiniband/hw/hfi1/msix.h24
-rw-r--r--drivers/infiniband/hw/hfi1/netdev.h105
-rw-r--r--drivers/infiniband/hw/hfi1/netdev_rx.c487
-rw-r--r--drivers/infiniband/hw/hfi1/opa_compat.h69
-rw-r--r--drivers/infiniband/hw/hfi1/opfn.c323
-rw-r--r--drivers/infiniband/hw/hfi1/opfn.h87
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c589
-rw-r--r--drivers/infiniband/hw/hfi1/pin_system.c474
-rw-r--r--drivers/infiniband/hw/hfi1/pinning.h20
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c295
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h90
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c60
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c163
-rw-r--r--drivers/infiniband/hw/hfi1/platform.h45
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c704
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h159
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c74
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.h46
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c2084
-rw-r--r--drivers/infiniband/hw/hfi1/rc.h59
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c1105
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c456
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h204
-rw-r--r--drivers/infiniband/hw/hfi1/sdma_txreq.h48
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c725
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c5534
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.h319
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c429
-rw-r--r--drivers/infiniband/hw/hfi1/trace.h59
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ctxts.h87
-rw-r--r--drivers/infiniband/hw/hfi1/trace_dbg.h69
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h560
-rw-r--r--drivers/infiniband/hw/hfi1/trace_iowait.h54
-rw-r--r--drivers/infiniband/hw/hfi1/trace_misc.h115
-rw-r--r--drivers/infiniband/hw/hfi1/trace_mmu.h72
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rc.h100
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rx.h204
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tid.h1642
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tx.h523
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c172
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c710
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c824
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.h127
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c129
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c1155
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h225
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c1239
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h313
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.c59
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.h71
-rw-r--r--drivers/infiniband/hw/hfi1/vnic.h126
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c615
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c282
-rw-r--r--drivers/infiniband/hw/hns/Kconfig19
-rw-r--r--drivers/infiniband/hw/hns/Makefile14
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c152
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c303
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_bond.c1012
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_bond.h95
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c303
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h106
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h304
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c711
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_db.c180
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_debugfs.c111
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_debugfs.h33
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h1197
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_eq.c758
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_eq.h134
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c1525
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h119
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c3841
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.h1062
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c7409
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h1501
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c1366
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c1282
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_pd.c164
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c1680
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c234
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c549
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_trace.h216
-rw-r--r--drivers/infiniband/hw/i40iw/Kconfig7
-rw-r--r--drivers/infiniband/hw/i40iw/Makefile9
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h594
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c4329
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.h453
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c5047
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_d.h1727
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hmc.c821
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hmc.h241
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c768
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c2031
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_osdep.h218
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_p.h121
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_pble.c615
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_pble.h131
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c1493
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.h189
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_register.h1030
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_status.h101
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_type.h1350
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ucontext.h107
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c1198
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_user.h446
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c1447
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c2909
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.h179
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_vf.c85
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_vf.h62
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_virtchnl.c759
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_virtchnl.h124
-rw-r--r--drivers/infiniband/hw/ionic/Kconfig15
-rw-r--r--drivers/infiniband/hw/ionic/Makefile9
-rw-r--r--drivers/infiniband/hw/ionic/ionic_admin.c1229
-rw-r--r--drivers/infiniband/hw/ionic/ionic_controlpath.c2679
-rw-r--r--drivers/infiniband/hw/ionic/ionic_datapath.c1399
-rw-r--r--drivers/infiniband/hw/ionic/ionic_fw.h1029
-rw-r--r--drivers/infiniband/hw/ionic/ionic_hw_stats.c484
-rw-r--r--drivers/infiniband/hw/ionic/ionic_ibdev.c440
-rw-r--r--drivers/infiniband/hw/ionic/ionic_ibdev.h517
-rw-r--r--drivers/infiniband/hw/ionic/ionic_lif_cfg.c111
-rw-r--r--drivers/infiniband/hw/ionic/ionic_lif_cfg.h66
-rw-r--r--drivers/infiniband/hw/ionic/ionic_pgtbl.c143
-rw-r--r--drivers/infiniband/hw/ionic/ionic_queue.c52
-rw-r--r--drivers/infiniband/hw/ionic/ionic_queue.h234
-rw-r--r--drivers/infiniband/hw/ionic/ionic_res.h154
-rw-r--r--drivers/infiniband/hw/irdma/Kconfig14
-rw-r--r--drivers/infiniband/hw/irdma/Makefile31
-rw-r--r--drivers/infiniband/hw/irdma/cm.c4434
-rw-r--r--drivers/infiniband/hw/irdma/cm.h416
-rw-r--r--drivers/infiniband/hw/irdma/ctrl.c6602
-rw-r--r--drivers/infiniband/hw/irdma/defs.h1184
-rw-r--r--drivers/infiniband/hw/irdma/hmc.c709
-rw-r--r--drivers/infiniband/hw/irdma/hmc.h186
-rw-r--r--drivers/infiniband/hw/irdma/hw.c2823
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_hw.c261
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_hw.h162
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_if.c220
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_hw.c205
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_hw.h73
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_if.c347
-rw-r--r--drivers/infiniband/hw/irdma/ig3rdma_hw.c170
-rw-r--r--drivers/infiniband/hw/irdma/ig3rdma_hw.h32
-rw-r--r--drivers/infiniband/hw/irdma/ig3rdma_if.c236
-rw-r--r--drivers/infiniband/hw/irdma/irdma.h174
-rw-r--r--drivers/infiniband/hw/irdma/main.c211
-rw-r--r--drivers/infiniband/hw/irdma/main.h578
-rw-r--r--drivers/infiniband/hw/irdma/osdep.h74
-rw-r--r--drivers/infiniband/hw/irdma/pble.c519
-rw-r--r--drivers/infiniband/hw/irdma/pble.h132
-rw-r--r--drivers/infiniband/hw/irdma/protos.h97
-rw-r--r--drivers/infiniband/hw/irdma/puda.c1718
-rw-r--r--drivers/infiniband/hw/irdma/puda.h182
-rw-r--r--drivers/infiniband/hw/irdma/trace.c112
-rw-r--r--drivers/infiniband/hw/irdma/trace.h3
-rw-r--r--drivers/infiniband/hw/irdma/trace_cm.h460
-rw-r--r--drivers/infiniband/hw/irdma/type.h1674
-rw-r--r--drivers/infiniband/hw/irdma/uda.c265
-rw-r--r--drivers/infiniband/hw/irdma/uda.h87
-rw-r--r--drivers/infiniband/hw/irdma/uda_d.h127
-rw-r--r--drivers/infiniband/hw/irdma/uk.c1930
-rw-r--r--drivers/infiniband/hw/irdma/user.h676
-rw-r--r--drivers/infiniband/hw/irdma/utils.c2508
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c5517
-rw-r--r--drivers/infiniband/hw/irdma/verbs.h341
-rw-r--r--drivers/infiniband/hw/irdma/virtchnl.c618
-rw-r--r--drivers/infiniband/hw/irdma/virtchnl.h176
-rw-r--r--drivers/infiniband/hw/irdma/ws.c406
-rw-r--r--drivers/infiniband/hw/irdma/ws.h41
-rw-r--r--drivers/infiniband/hw/mana/Kconfig10
-rw-r--r--drivers/infiniband/hw/mana/Makefile4
-rw-r--r--drivers/infiniband/hw/mana/ah.c58
-rw-r--r--drivers/infiniband/hw/mana/counters.c179
-rw-r--r--drivers/infiniband/hw/mana/counters.h62
-rw-r--r--drivers/infiniband/hw/mana/cq.c342
-rw-r--r--drivers/infiniband/hw/mana/device.c261
-rw-r--r--drivers/infiniband/hw/mana/main.c1134
-rw-r--r--drivers/infiniband/hw/mana/mana_ib.h738
-rw-r--r--drivers/infiniband/hw/mana/mr.c319
-rw-r--r--drivers/infiniband/hw/mana/qp.c921
-rw-r--r--drivers/infiniband/hw/mana/shadow_queue.h115
-rw-r--r--drivers/infiniband/hw/mana/wq.c92
-rw-r--r--drivers/infiniband/hw/mana/wr.c168
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig4
-rw-r--r--drivers/infiniband/hw/mlx4/Makefile1
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c253
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c40
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c248
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c148
-rw-r--r--drivers/infiniband/hw/mlx4/doorbell.c12
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c381
-rw-r--r--drivers/infiniband/hw/mlx4/main.c1366
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c115
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h249
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c259
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c2255
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c121
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c91
-rw-r--r--drivers/infiniband/hw/mlx5/Kconfig5
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile33
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c145
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c268
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h63
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c491
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c1279
-rw-r--r--drivers/infiniband/hw/mlx5/counters.h19
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c668
-rw-r--r--drivers/infiniband/hw/mlx5/data_direct.c227
-rw-r--r--drivers/infiniband/hw/mlx5/data_direct.h23
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c3242
-rw-r--r--drivers/infiniband/hw/mlx5/devx.h50
-rw-r--r--drivers/infiniband/hw/mlx5/dm.c612
-rw-r--r--drivers/infiniband/hw/mlx5/dm.h68
-rw-r--r--drivers/infiniband/hw/mlx5/dmah.c54
-rw-r--r--drivers/infiniband/hw/mlx5/dmah.h23
-rw-r--r--drivers/infiniband/hw/mlx5/doorbell.c15
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c3516
-rw-r--r--drivers/infiniband/hw/mlx5/fs.h36
-rw-r--r--drivers/infiniband/hw/mlx5/gsi.c231
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c346
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.h40
-rw-r--r--drivers/infiniband/hw/mlx5/ib_virt.c46
-rw-r--r--drivers/infiniband/hw/mlx5/macsec.c364
-rw-r--r--drivers/infiniband/hw/mlx5/macsec.h29
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c411
-rw-r--r--drivers/infiniband/hw/mlx5/main.c5831
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c231
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h1627
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c3351
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c2175
-rw-r--r--drivers/infiniband/hw/mlx5/qos.c133
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c6152
-rw-r--r--drivers/infiniband/hw/mlx5/qp.h60
-rw-r--r--drivers/infiniband/hw/mlx5/qpc.c697
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.c217
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.h13
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c271
-rw-r--r--drivers/infiniband/hw/mlx5/srq.h69
-rw-r--r--drivers/infiniband/hw/mlx5/srq_cmd.c774
-rw-r--r--drivers/infiniband/hw/mlx5/std_types.c277
-rw-r--r--drivers/infiniband/hw/mlx5/umr.c1129
-rw-r--r--drivers/infiniband/hw/mlx5/umr.h113
-rw-r--r--drivers/infiniband/hw/mlx5/wr.c1284
-rw-r--r--drivers/infiniband/hw/mlx5/wr.h136
-rw-r--r--drivers/infiniband/hw/mthca/Kconfig5
-rw-r--r--drivers/infiniband/hw/mthca/Makefile1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_allocator.c31
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c90
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c10
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c44
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c19
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h70
-rw-r--r--drivers/infiniband/hw/mthca/mthca_eq.c43
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c103
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c66
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c52
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c291
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c818
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h50
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c268
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c58
-rw-r--r--drivers/infiniband/hw/mthca/mthca_user.h112
-rw-r--r--drivers/infiniband/hw/nes/Kconfig15
-rw-r--r--drivers/infiniband/hw/nes/Makefile3
-rw-r--r--drivers/infiniband/hw/nes/nes.c1243
-rw-r--r--drivers/infiniband/hw/nes/nes.h582
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c3998
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h471
-rw-r--r--drivers/infiniband/hw/nes/nes_context.h193
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c3893
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h1379
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.c1156
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.h97
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c1874
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c918
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c3922
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h199
-rw-r--r--drivers/infiniband/hw/ocrdma/Kconfig3
-rw-r--r--drivers/infiniband/hw/ocrdma/Makefile3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h8
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c151
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h20
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c147
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.h8
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c299
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c112
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.h3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c608
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h74
-rw-r--r--drivers/infiniband/hw/qedr/Kconfig5
-rw-r--r--drivers/infiniband/hw/qedr/Makefile3
-rw-r--r--drivers/infiniband/hw/qedr/main.c588
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h215
-rw-r--r--drivers/infiniband/hw/qedr/qedr_hsi.h56
-rw-r--r--drivers/infiniband/hw/qedr/qedr_hsi_rdma.h205
-rw-r--r--drivers/infiniband/hw/qedr/qedr_iw_cm.c819
-rw-r--r--drivers/infiniband/hw/qedr/qedr_iw_cm.h (renamed from drivers/infiniband/hw/qib/qib_debugfs.h)32
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.c (renamed from drivers/infiniband/hw/qedr/qedr_cm.c)345
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.h (renamed from drivers/infiniband/hw/qedr/qedr_cm.h)15
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c2633
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h83
-rw-r--r--drivers/infiniband/hw/qib/Kconfig15
-rw-r--r--drivers/infiniband/hw/qib/Makefile16
-rw-r--r--drivers/infiniband/hw/qib/qib.h1552
-rw-r--r--drivers/infiniband/hw/qib/qib_6120_regs.h977
-rw-r--r--drivers/infiniband/hw/qib/qib_7220.h149
-rw-r--r--drivers/infiniband/hw/qib/qib_7220_regs.h1496
-rw-r--r--drivers/infiniband/hw/qib/qib_7322_regs.h3163
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h809
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.c289
-rw-r--r--drivers/infiniband/hw/qib/qib_diag.c912
-rw-r--r--drivers/infiniband/hw/qib/qib_dma.c169
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c825
-rw-r--r--drivers/infiniband/hw/qib/qib_eeprom.c272
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c2428
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c605
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c3594
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c4651
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c8545
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c1827
-rw-r--r--drivers/infiniband/hw/qib/qib_intr.c240
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c238
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c2504
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.h300
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c705
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c630
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.c559
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.h189
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c2236
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c823
-rw-r--r--drivers/infiniband/hw/qib/qib_sd7220.c1454
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c1044
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c871
-rw-r--r--drivers/infiniband/hw/qib/qib_twsi.c501
-rw-r--r--drivers/infiniband/hw/qib/qib_tx.c572
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c533
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c600
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c157
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c1465
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.h52
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c1801
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h428
-rw-r--r--drivers/infiniband/hw/qib/qib_wc_ppc64.c62
-rw-r--r--drivers/infiniband/hw/qib/qib_wc_x86_64.c150
-rw-r--r--drivers/infiniband/hw/usnic/Kconfig5
-rw-r--r--drivers/infiniband/hw/usnic/Makefile3
-rw-r--r--drivers/infiniband/hw/usnic/usnic_abi.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_common_util.h38
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.c30
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.c22
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.h6
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib.h6
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c329
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c41
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h35
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c202
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c360
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h51
-rw-r--r--drivers/infiniband/hw/usnic/usnic_transport.c12
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c168
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.h15
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c18
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h16
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.c3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/Kconfig3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/Makefile3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h149
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c140
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h125
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c10
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c623
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c70
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c32
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c324
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h17
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c303
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c248
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h110
-rw-r--r--drivers/infiniband/sw/Makefile2
-rw-r--r--drivers/infiniband/sw/rdmavt/Kconfig7
-rw-r--r--drivers/infiniband/sw/rdmavt/Makefile5
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c127
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.h60
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c383
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.h64
-rw-r--r--drivers/infiniband/sw/rdmavt/dma.c198
-rw-r--r--drivers/infiniband/sw/rdmavt/dma.h53
-rw-r--r--drivers/infiniband/sw/rdmavt/mad.c65
-rw-r--r--drivers/infiniband/sw/rdmavt/mad.h52
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.c125
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.h50
-rw-r--r--drivers/infiniband/sw/rdmavt/mmap.c69
-rw-r--r--drivers/infiniband/sw/rdmavt/mmap.h56
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c518
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.h71
-rw-r--r--drivers/infiniband/sw/rdmavt/pd.c75
-rw-r--r--drivers/infiniband/sw/rdmavt/pd.h56
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c2122
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.h75
-rw-r--r--drivers/infiniband/sw/rdmavt/rc.c172
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.c167
-rw-r--r--drivers/infiniband/sw/rdmavt/srq.h57
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.c44
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.h52
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_cq.h124
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_mr.h162
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_qp.h90
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_rc.h67
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_rvt.h46
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_tx.h131
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c539
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.h78
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig11
-rw-r--r--drivers/infiniband/sw/rxe/Makefile7
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c351
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h126
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c187
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c470
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c115
-rw-r--r--drivers/infiniband/sw/rxe/rxe_dma.c183
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hdr.h259
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.c51
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.h37
-rw-r--r--drivers/infiniband/sw/rxe/rxe_icrc.c111
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h331
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mcast.c569
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mmap.c59
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c887
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mw.c338
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c590
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.h35
-rw-r--r--drivers/infiniband/sw/rxe/rxe_odp.c577
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.c816
-rw-r--r--drivers/infiniband/sw/rxe/rxe_opcode.h58
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h113
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c574
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h145
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c824
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.c135
-rw-r--r--drivers/infiniband/sw/rxe/rxe_queue.h326
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c337
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c551
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c1185
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c255
-rw-r--r--drivers/infiniband/sw/rxe/rxe_sysfs.c157
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c334
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.h78
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c1816
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h319
-rw-r--r--drivers/infiniband/sw/siw/Kconfig20
-rw-r--r--drivers/infiniband/sw/siw/Makefile11
-rw-r--r--drivers/infiniband/sw/siw/iwarp.h367
-rw-r--r--drivers/infiniband/sw/siw/siw.h729
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.c2009
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.h133
-rw-r--r--drivers/infiniband/sw/siw/siw_cq.c122
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c508
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.c400
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.h69
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c1318
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_rx.c1455
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c1306
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c1891
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.h91
-rw-r--r--drivers/infiniband/ulp/Makefile3
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig11
-rw-r--r--drivers/infiniband/ulp/ipoib/Makefile1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h135
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c322
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c113
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c74
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c652
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c1560
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c222
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c78
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c118
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c267
-rw-r--r--drivers/infiniband/ulp/iser/Kconfig7
-rw-r--r--drivers/infiniband/ulp/iser/Makefile1
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c272
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h259
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c260
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c443
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c683
-rw-r--r--drivers/infiniband/ulp/isert/Kconfig3
-rw-r--r--drivers/infiniband/ulp/isert/Makefile2
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c703
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h72
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Kconfig9
-rw-r--r--drivers/infiniband/ulp/opa_vnic/Makefile9
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c513
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.h524
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c183
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_internal.h329
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c400
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c1056
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema_iface.c390
-rw-r--r--drivers/infiniband/ulp/rtrs/Kconfig27
-rw-r--r--drivers/infiniband/ulp/rtrs/Makefile21
-rw-r--r--drivers/infiniband/ulp/rtrs/README213
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-stats.c198
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c514
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-trace.c15
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt-trace.h86
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.c3207
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-clt.h252
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-log.h28
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-pri.h408
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-stats.c51
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c319
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-trace.c16
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv-trace.h88
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.c2346
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs-srv.h156
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.c645
-rw-r--r--drivers/infiniband/ulp/rtrs/rtrs.h188
-rw-r--r--drivers/infiniband/ulp/srp/Kbuild1
-rw-r--r--drivers/infiniband/ulp/srp/Kconfig5
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c2087
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h124
-rw-r--r--drivers/infiniband/ulp/srpt/Kconfig5
-rw-r--r--drivers/infiniband/ulp/srpt/Makefile2
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c2424
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h226
741 files changed, 251240 insertions, 177028 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 670917387eda..794b9778816b 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -1,12 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only
menuconfig INFINIBAND
tristate "InfiniBand support"
- depends on PCI || BROKEN
- depends on HAS_IOMEM
+ depends on HAS_IOMEM && HAS_DMA
depends on NET
depends on INET
depends on m || IPV6 != m
+ depends on !ALPHA
select IRQ_POLL
- ---help---
+ select DIMLIB
+ help
Core support for InfiniBand (IB). Make sure to also select
any protocols you wish to use as well as drivers for your
InfiniBand hardware.
@@ -16,66 +18,89 @@ if INFINIBAND
config INFINIBAND_USER_MAD
tristate "InfiniBand userspace MAD support"
depends on INFINIBAND
- ---help---
+ help
Userspace InfiniBand Management Datagram (MAD) support. This
is the kernel side of the userspace MAD support, which allows
userspace processes to send and receive MADs. You will also
- need libibumad from <http://www.openfabrics.org/downloads/management/>.
+ need libibumad from rdma-core
+ <https://github.com/linux-rdma/rdma-core>.
config INFINIBAND_USER_ACCESS
tristate "InfiniBand userspace access (verbs and CM)"
- select ANON_INODES
- ---help---
+ depends on MMU
+ help
Userspace InfiniBand access support. This enables the
kernel side of userspace verbs and the userspace
communication manager (CM). This allows userspace processes
to set up connections and directly access InfiniBand
hardware for fast-path operations. You will also need
libibverbs, libibcm and a hardware driver library from
- <http://www.openfabrics.org/git/>.
+ rdma-core <https://github.com/linux-rdma/rdma-core>.
config INFINIBAND_USER_MEM
bool
depends on INFINIBAND_USER_ACCESS != n
+ depends on MMU
+ select DMA_SHARED_BUFFER
default y
config INFINIBAND_ON_DEMAND_PAGING
bool "InfiniBand on-demand paging support"
depends on INFINIBAND_USER_MEM
select MMU_NOTIFIER
+ select INTERVAL_TREE
+ select HMM_MIRROR
default y
- ---help---
+ help
On demand paging support for the InfiniBand subsystem.
Together with driver support this allows registration of
memory regions without pinning their pages, fetching the
pages on demand instead.
config INFINIBAND_ADDR_TRANS
- bool
+ bool "RDMA/CM"
depends on INFINIBAND
default y
+ help
+ Support for RDMA communication manager (CM).
+ This allows for a generic connection abstraction over RDMA.
config INFINIBAND_ADDR_TRANS_CONFIGFS
bool
depends on INFINIBAND_ADDR_TRANS && CONFIGFS_FS && !(INFINIBAND=y && CONFIGFS_FS=m)
default y
- ---help---
+ help
ConfigFS support for RDMA communication manager (CM).
This allows the user to config the default GID type that the CM
uses for each device, when initiaing new connections.
-source "drivers/infiniband/hw/mthca/Kconfig"
-source "drivers/infiniband/hw/qib/Kconfig"
-source "drivers/infiniband/hw/cxgb3/Kconfig"
+config INFINIBAND_VIRT_DMA
+ def_bool !HIGHMEM
+
+if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
+if !UML
+source "drivers/infiniband/hw/bnxt_re/Kconfig"
+source "drivers/infiniband/hw/bng_re/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
-source "drivers/infiniband/hw/i40iw/Kconfig"
+source "drivers/infiniband/hw/efa/Kconfig"
+source "drivers/infiniband/hw/erdma/Kconfig"
+source "drivers/infiniband/hw/hfi1/Kconfig"
+source "drivers/infiniband/hw/hns/Kconfig"
+source "drivers/infiniband/hw/ionic/Kconfig"
+source "drivers/infiniband/hw/irdma/Kconfig"
+source "drivers/infiniband/hw/mana/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
-source "drivers/infiniband/hw/nes/Kconfig"
+source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/ocrdma/Kconfig"
-source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
+source "drivers/infiniband/hw/qedr/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
-source "drivers/infiniband/hw/hns/Kconfig"
+source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
+source "drivers/infiniband/sw/rdmavt/Kconfig"
+endif # !UML
+source "drivers/infiniband/sw/rxe/Kconfig"
+source "drivers/infiniband/sw/siw/Kconfig"
+endif
source "drivers/infiniband/ulp/ipoib/Kconfig"
@@ -84,12 +109,8 @@ source "drivers/infiniband/ulp/srpt/Kconfig"
source "drivers/infiniband/ulp/iser/Kconfig"
source "drivers/infiniband/ulp/isert/Kconfig"
+source "drivers/infiniband/ulp/rtrs/Kconfig"
-source "drivers/infiniband/sw/rdmavt/Kconfig"
-source "drivers/infiniband/sw/rxe/Kconfig"
-
-source "drivers/infiniband/hw/hfi1/Kconfig"
-
-source "drivers/infiniband/hw/qedr/Kconfig"
+source "drivers/infiniband/ulp/opa_vnic/Kconfig"
endif # INFINIBAND
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index fad0b44c356f..8603cdfcfdcb 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_INFINIBAND) += core/
obj-$(CONFIG_INFINIBAND) += hw/
obj-$(CONFIG_INFINIBAND) += ulp/
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index edaae9f9853c..f483e0c12444 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -1,24 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o
user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \
$(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
-obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
- $(user_access-y)
+obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y)
ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
- device.o fmr_pool.o cache.o netlink.o \
+ device.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
- multicast.o mad.o smi.o agent.o mad_rmpp.o
-ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
-ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
+ multicast.o mad.o smi.o agent.o mad_rmpp.o \
+ nldev.o restrack.o counters.o ib_core_uverbs.o \
+ trace.o lag.o
-ib_cm-y := cm.o
+ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
+ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
+
+ib_cm-y := cm.o cm_trace.o
iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
-rdma_cm-y := cma.o
+CFLAGS_cma_trace.o += -I$(src)
+rdma_cm-y := cma.o cma_trace.o
rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
@@ -26,6 +30,17 @@ rdma_ucm-y := ucma.o
ib_umad-y := user_mad.o
-ib_ucm-y := ucm.o
-
-ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
+ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
+ rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
+ uverbs_std_types_cq.o \
+ uverbs_std_types_dmah.o \
+ uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
+ uverbs_std_types_mr.o uverbs_std_types_counters.o \
+ uverbs_uapi.o uverbs_std_types_device.o \
+ uverbs_std_types_async_fd.o \
+ uverbs_std_types_srq.o \
+ uverbs_std_types_wq.o \
+ uverbs_std_types_qp.o \
+ ucaps.o
+ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o
+ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 0f58f46dbad7..61596cda2b65 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -37,14 +37,15 @@
#include <linux/inetdevice.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
-#include <linux/module.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
#include <net/netevent.h>
-#include <net/addrconf.h>
+#include <net/ipv6_stubs.h>
#include <net/ip6_route.h>
#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_sa.h>
#include <rdma/ib.h>
#include <rdma/rdma_netlink.h>
#include <net/netlink.h>
@@ -56,27 +57,27 @@ struct addr_req {
struct sockaddr_storage src_addr;
struct sockaddr_storage dst_addr;
struct rdma_dev_addr *addr;
- struct rdma_addr_client *client;
void *context;
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context);
unsigned long timeout;
+ struct delayed_work work;
+ bool resolve_by_gid_attr; /* Consider gid attr in resolve phase */
int status;
u32 seq;
};
static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);
-static void process_req(struct work_struct *work);
-
-static DEFINE_MUTEX(lock);
+static DEFINE_SPINLOCK(lock);
static LIST_HEAD(req_list);
-static DECLARE_DELAYED_WORK(work, process_req);
static struct workqueue_struct *addr_wq;
static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
[LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
- .len = sizeof(struct rdma_nla_ls_gid)},
+ .len = sizeof(struct rdma_nla_ls_gid),
+ .validation_type = NLA_VALIDATE_MIN,
+ .min = sizeof(struct rdma_nla_ls_gid)},
};
static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
@@ -87,8 +88,8 @@ static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
return false;
- ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
- nlmsg_len(nlh), ib_nl_addr_policy);
+ ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
+ nlmsg_len(nlh), ib_nl_addr_policy, NULL);
if (ret)
return false;
@@ -111,7 +112,7 @@ static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
memcpy(&gid, nla_data(curr), nla_len(curr));
}
- mutex_lock(&lock);
+ spin_lock_bh(&lock);
list_for_each_entry(req, &req_list, list) {
if (nlh->nlmsg_seq != req->seq)
continue;
@@ -121,7 +122,7 @@ static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
found = 1;
break;
}
- mutex_unlock(&lock);
+ spin_unlock_bh(&lock);
if (!found)
pr_info("Couldn't find request waiting for DGID: %pI6\n",
@@ -129,19 +130,17 @@ static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
}
int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
- const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
-
if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
- !(NETLINK_CB(skb).sk) ||
- !netlink_capable(skb, CAP_NET_ADMIN))
+ !(NETLINK_CB(skb).sk))
return -EPERM;
if (ib_nl_is_good_ip_resp(nlh))
ib_nl_process_good_ip_rsep(nlh);
- return skb->len;
+ return 0;
}
static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
@@ -179,14 +178,13 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
}
/* Construct the family header first */
- header = (struct rdma_ls_ip_resolve_header *)
- skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
+ header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
header->ifindex = dev_addr->bound_dev_if;
nla_put(skb, attrtype, size, daddr);
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
- ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL);
+ rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
/* Make the request retry, so when we get the response from userspace
* we will have something.
@@ -194,7 +192,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
return -ENODATA;
}
-int rdma_addr_size(struct sockaddr *addr)
+int rdma_addr_size(const struct sockaddr *addr)
{
switch (addr->sa_family) {
case AF_INET:
@@ -209,92 +207,95 @@ int rdma_addr_size(struct sockaddr *addr)
}
EXPORT_SYMBOL(rdma_addr_size);
-static struct rdma_addr_client self;
-
-void rdma_addr_register_client(struct rdma_addr_client *client)
+int rdma_addr_size_in6(struct sockaddr_in6 *addr)
{
- atomic_set(&client->refcount, 1);
- init_completion(&client->comp);
-}
-EXPORT_SYMBOL(rdma_addr_register_client);
+ int ret = rdma_addr_size((struct sockaddr *) addr);
-static inline void put_client(struct rdma_addr_client *client)
-{
- if (atomic_dec_and_test(&client->refcount))
- complete(&client->comp);
+ return ret <= sizeof(*addr) ? ret : 0;
}
+EXPORT_SYMBOL(rdma_addr_size_in6);
-void rdma_addr_unregister_client(struct rdma_addr_client *client)
+int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr)
{
- put_client(client);
- wait_for_completion(&client->comp);
+ int ret = rdma_addr_size((struct sockaddr *) addr);
+
+ return ret <= sizeof(*addr) ? ret : 0;
}
-EXPORT_SYMBOL(rdma_addr_unregister_client);
+EXPORT_SYMBOL(rdma_addr_size_kss);
-int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
- const unsigned char *dst_dev_addr)
+/**
+ * rdma_copy_src_l2_addr - Copy netdevice source addresses
+ * @dev_addr: Destination address pointer where to copy the addresses
+ * @dev: Netdevice whose source addresses to copy
+ *
+ * rdma_copy_src_l2_addr() copies source addresses from the specified netdevice.
+ * This includes unicast address, broadcast address, device type and
+ * interface index.
+ */
+void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
+ const struct net_device *dev)
{
dev_addr->dev_type = dev->type;
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
- if (dst_dev_addr)
- memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
dev_addr->bound_dev_if = dev->ifindex;
- return 0;
}
-EXPORT_SYMBOL(rdma_copy_addr);
+EXPORT_SYMBOL(rdma_copy_src_l2_addr);
-int rdma_translate_ip(const struct sockaddr *addr,
- struct rdma_dev_addr *dev_addr,
- u16 *vlan_id)
+static struct net_device *
+rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in)
{
- struct net_device *dev;
+ struct net_device *dev = NULL;
int ret = -EADDRNOTAVAIL;
- if (dev_addr->bound_dev_if) {
- dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
- if (!dev)
- return -ENODEV;
- ret = rdma_copy_addr(dev_addr, dev, NULL);
- dev_put(dev);
- return ret;
- }
-
- switch (addr->sa_family) {
+ switch (src_in->sa_family) {
case AF_INET:
- dev = ip_dev_find(dev_addr->net,
- ((const struct sockaddr_in *)addr)->sin_addr.s_addr);
-
- if (!dev)
- return ret;
-
- ret = rdma_copy_addr(dev_addr, dev, NULL);
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(dev);
- dev_put(dev);
+ dev = __ip_dev_find(net,
+ ((const struct sockaddr_in *)src_in)->sin_addr.s_addr,
+ false);
+ if (dev)
+ ret = 0;
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- rcu_read_lock();
- for_each_netdev_rcu(dev_addr->net, dev) {
- if (ipv6_chk_addr(dev_addr->net,
- &((const struct sockaddr_in6 *)addr)->sin6_addr,
+ for_each_netdev_rcu(net, dev) {
+ if (ipv6_chk_addr(net,
+ &((const struct sockaddr_in6 *)src_in)->sin6_addr,
dev, 1)) {
- ret = rdma_copy_addr(dev_addr, dev, NULL);
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(dev);
+ ret = 0;
break;
}
}
- rcu_read_unlock();
break;
#endif
}
- return ret;
+ return ret ? ERR_PTR(ret) : dev;
+}
+
+int rdma_translate_ip(const struct sockaddr *addr,
+ struct rdma_dev_addr *dev_addr)
+{
+ struct net_device *dev;
+
+ if (dev_addr->bound_dev_if) {
+ dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ rdma_copy_src_l2_addr(dev_addr, dev);
+ dev_put(dev);
+ return 0;
+ }
+
+ rcu_read_lock();
+ dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr);
+ if (!IS_ERR(dev))
+ rdma_copy_src_l2_addr(dev_addr, dev);
+ rcu_read_unlock();
+ return PTR_ERR_OR_ZERO(dev);
}
EXPORT_SYMBOL(rdma_translate_ip);
-static void set_timeout(unsigned long time)
+static void set_timeout(struct addr_req *req, unsigned long time)
{
unsigned long delay;
@@ -302,76 +303,58 @@ static void set_timeout(unsigned long time)
if ((long)delay < 0)
delay = 0;
- mod_delayed_work(addr_wq, &work, delay);
+ mod_delayed_work(addr_wq, &req->work, delay);
}
static void queue_req(struct addr_req *req)
{
- struct addr_req *temp_req;
-
- mutex_lock(&lock);
- list_for_each_entry_reverse(temp_req, &req_list, list) {
- if (time_after_eq(req->timeout, temp_req->timeout))
- break;
- }
-
- list_add(&req->list, &temp_req->list);
-
- if (req_list.next == &req->list)
- set_timeout(req->timeout);
- mutex_unlock(&lock);
+ spin_lock_bh(&lock);
+ list_add_tail(&req->list, &req_list);
+ set_timeout(req, req->timeout);
+ spin_unlock_bh(&lock);
}
-static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr,
const void *daddr, u32 seq, u16 family)
{
- if (ibnl_chk_listeners(RDMA_NL_GROUP_LS))
+ if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
return -EADDRNOTAVAIL;
- /* We fill in what we can, the response will fill the rest */
- rdma_copy_addr(dev_addr, dst->dev, NULL);
return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
}
-static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+static int dst_fetch_ha(const struct dst_entry *dst,
+ struct rdma_dev_addr *dev_addr,
const void *daddr)
{
struct neighbour *n;
- int ret;
+ int ret = 0;
n = dst_neigh_lookup(dst, daddr);
+ if (!n)
+ return -ENODATA;
- rcu_read_lock();
- if (!n || !(n->nud_state & NUD_VALID)) {
- if (n)
- neigh_event_send(n, NULL);
+ if (!(n->nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
ret = -ENODATA;
} else {
- ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
+ neigh_ha_snapshot(dev_addr->dst_dev_addr, n, dst->dev);
}
- rcu_read_unlock();
- if (n)
- neigh_release(n);
+ neigh_release(n);
return ret;
}
-static bool has_gateway(struct dst_entry *dst, sa_family_t family)
+static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
{
- struct rtable *rt;
- struct rt6_info *rt6;
+ if (family == AF_INET)
+ return dst_rtable(dst)->rt_uses_gateway;
- if (family == AF_INET) {
- rt = container_of(dst, struct rtable, dst);
- return rt->rt_uses_gateway;
- }
-
- rt6 = container_of(dst, struct rt6_info, dst);
- return rt6->rt6i_flags & RTF_GATEWAY;
+ return dst_rt6_info(dst)->rt6i_flags & RTF_GATEWAY;
}
-static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
const struct sockaddr *dst_in, u32 seq)
{
const struct sockaddr_in *dst_in4 =
@@ -383,18 +366,24 @@ static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
(const void *)&dst_in6->sin6_addr;
sa_family_t family = dst_in->sa_family;
- /* Gateway + ARPHRD_INFINIBAND -> IB router */
- if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
- return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
+ might_sleep();
+
+ /* If we have a gateway in IB mode then it must be an IB network */
+ if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB)
+ return ib_nl_fetch_ha(dev_addr, daddr, seq, family);
else
return dst_fetch_ha(dst, dev_addr, daddr);
}
-static int addr4_resolve(struct sockaddr_in *src_in,
- const struct sockaddr_in *dst_in,
+static int addr4_resolve(struct sockaddr *src_sock,
+ const struct sockaddr *dst_sock,
struct rdma_dev_addr *addr,
struct rtable **prt)
{
+ struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock;
+ const struct sockaddr_in *dst_in =
+ (const struct sockaddr_in *)dst_sock;
+
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
struct rtable *rt;
@@ -406,78 +395,50 @@ static int addr4_resolve(struct sockaddr_in *src_in,
fl4.saddr = src_ip;
fl4.flowi4_oif = addr->bound_dev_if;
rt = ip_route_output_key(addr->net, &fl4);
- if (IS_ERR(rt)) {
- ret = PTR_ERR(rt);
- goto out;
- }
- src_in->sin_family = AF_INET;
- src_in->sin_addr.s_addr = fl4.saddr;
+ ret = PTR_ERR_OR_ZERO(rt);
+ if (ret)
+ return ret;
- /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
- * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
- * type accordingly.
- */
- if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
- addr->network = RDMA_NETWORK_IPV4;
+ src_in->sin_addr.s_addr = fl4.saddr;
addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
*prt = rt;
return 0;
-out:
- return ret;
}
#if IS_ENABLED(CONFIG_IPV6)
-static int addr6_resolve(struct sockaddr_in6 *src_in,
- const struct sockaddr_in6 *dst_in,
+static int addr6_resolve(struct sockaddr *src_sock,
+ const struct sockaddr *dst_sock,
struct rdma_dev_addr *addr,
struct dst_entry **pdst)
{
+ struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock;
+ const struct sockaddr_in6 *dst_in =
+ (const struct sockaddr_in6 *)dst_sock;
struct flowi6 fl6;
struct dst_entry *dst;
- struct rt6_info *rt;
- int ret;
memset(&fl6, 0, sizeof fl6);
fl6.daddr = dst_in->sin6_addr;
fl6.saddr = src_in->sin6_addr;
fl6.flowi6_oif = addr->bound_dev_if;
- dst = ip6_route_output(addr->net, NULL, &fl6);
- if ((ret = dst->error))
- goto put;
-
- rt = (struct rt6_info *)dst;
- if (ipv6_addr_any(&fl6.saddr)) {
- ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
- &fl6.daddr, 0, &fl6.saddr);
- if (ret)
- goto put;
+ dst = ipv6_stub->ipv6_dst_lookup_flow(addr->net, NULL, &fl6, NULL);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
- src_in->sin6_family = AF_INET6;
+ if (ipv6_addr_any(&src_in->sin6_addr))
src_in->sin6_addr = fl6.saddr;
- }
-
- /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
- * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
- * type accordingly.
- */
- if (rt->rt6i_flags & RTF_GATEWAY &&
- ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
- addr->network = RDMA_NETWORK_IPV6;
addr->hoplimit = ip6_dst_hoplimit(dst);
*pdst = dst;
return 0;
-put:
- dst_release(dst);
- return ret;
}
#else
-static int addr6_resolve(struct sockaddr_in6 *src_in,
- const struct sockaddr_in6 *dst_in,
+static int addr6_resolve(struct sockaddr *src_sock,
+ const struct sockaddr *dst_sock,
struct rdma_dev_addr *addr,
struct dst_entry **pdst)
{
@@ -485,126 +446,214 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
}
#endif
-static int addr_resolve_neigh(struct dst_entry *dst,
+static bool is_dst_local(const struct dst_entry *dst)
+{
+ if (dst->ops->family == AF_INET)
+ return !!(dst_rtable(dst)->rt_type & RTN_LOCAL);
+ else if (dst->ops->family == AF_INET6)
+ return !!(dst_rt6_info(dst)->rt6i_flags & RTF_LOCAL);
+ else
+ return false;
+}
+
+static int addr_resolve_neigh(const struct dst_entry *dst,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
u32 seq)
{
- if (dst->dev->flags & IFF_LOOPBACK) {
- int ret;
+ if (is_dst_local(dst)) {
+ /* When the destination is local entry, source and destination
+ * are same. Skip the neighbour lookup.
+ */
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+ return 0;
+ }
- ret = rdma_translate_ip(dst_in, addr, NULL);
- if (!ret)
- memcpy(addr->dst_dev_addr, addr->src_dev_addr,
- MAX_ADDR_LEN);
+ return fetch_ha(dst, addr, dst_in, seq);
+}
- return ret;
+static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr,
+ const struct sockaddr *dst_in,
+ const struct dst_entry *dst)
+{
+ struct net_device *ndev = READ_ONCE(dst->dev);
+
+ /* A physical device must be the RDMA device to use */
+ if (is_dst_local(dst)) {
+ int ret;
+ /*
+ * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or
+ * loopback IP address. So if route is resolved to loopback
+ * interface, translate that to a real ndev based on non
+ * loopback IP address.
+ */
+ ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in);
+ if (IS_ERR(ndev))
+ return -ENODEV;
+ ret = rdma_translate_ip(dst_in, dev_addr);
+ if (ret)
+ return ret;
+ } else {
+ rdma_copy_src_l2_addr(dev_addr, dst->dev);
}
- /* If the device doesn't do ARP internally */
- if (!(dst->dev->flags & IFF_NOARP))
- return fetch_ha(dst, addr, dst_in, seq);
+ /*
+ * If there's a gateway and type of device not ARPHRD_INFINIBAND,
+ * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the
+ * network type accordingly.
+ */
+ if (has_gateway(dst, dst_in->sa_family) &&
+ ndev->type != ARPHRD_INFINIBAND)
+ dev_addr->network = dst_in->sa_family == AF_INET ?
+ RDMA_NETWORK_IPV4 :
+ RDMA_NETWORK_IPV6;
+ else
+ dev_addr->network = RDMA_NETWORK_IB;
+
+ return 0;
+}
+
+static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr)
+{
+ struct net_device *ndev;
+
+ ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr);
+ if (IS_ERR(ndev))
+ return PTR_ERR(ndev);
- return rdma_copy_addr(addr, dst->dev, NULL);
+ /*
+ * Since we are holding the rcu, reading net and ifindex
+ * are safe without any additional reference; because
+ * change_net_namespace() in net/core/dev.c does rcu sync
+ * after it changes the state to IFF_DOWN and before
+ * updating netdev fields {net, ifindex}.
+ */
+ addr->net = dev_net(ndev);
+ addr->bound_dev_if = ndev->ifindex;
+ return 0;
+}
+
+static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr)
+{
+ addr->net = &init_net;
+ addr->bound_dev_if = 0;
}
static int addr_resolve(struct sockaddr *src_in,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
bool resolve_neigh,
+ bool resolve_by_gid_attr,
u32 seq)
{
- struct net_device *ndev;
- struct dst_entry *dst;
+ struct dst_entry *dst = NULL;
+ struct rtable *rt = NULL;
int ret;
- if (src_in->sa_family == AF_INET) {
- struct rtable *rt = NULL;
- const struct sockaddr_in *dst_in4 =
- (const struct sockaddr_in *)dst_in;
+ if (!addr->net) {
+ pr_warn_ratelimited("%s: missing namespace\n", __func__);
+ return -EINVAL;
+ }
- ret = addr4_resolve((struct sockaddr_in *)src_in,
- dst_in4, addr, &rt);
- if (ret)
+ rcu_read_lock();
+ if (resolve_by_gid_attr) {
+ if (!addr->sgid_attr) {
+ rcu_read_unlock();
+ pr_warn_ratelimited("%s: missing gid_attr\n", __func__);
+ return -EINVAL;
+ }
+ /*
+ * If the request is for a specific gid attribute of the
+ * rdma_dev_addr, derive net from the netdevice of the
+ * GID attribute.
+ */
+ ret = set_addr_netns_by_gid_rcu(addr);
+ if (ret) {
+ rcu_read_unlock();
return ret;
-
- if (resolve_neigh)
- ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
-
- ndev = rt->dst.dev;
- dev_hold(ndev);
-
- ip_rt_put(rt);
+ }
+ }
+ if (src_in->sa_family == AF_INET) {
+ ret = addr4_resolve(src_in, dst_in, addr, &rt);
+ dst = &rt->dst;
} else {
- const struct sockaddr_in6 *dst_in6 =
- (const struct sockaddr_in6 *)dst_in;
-
- ret = addr6_resolve((struct sockaddr_in6 *)src_in,
- dst_in6, addr,
- &dst);
- if (ret)
- return ret;
-
- if (resolve_neigh)
- ret = addr_resolve_neigh(dst, dst_in, addr, seq);
-
- ndev = dst->dev;
- dev_hold(ndev);
-
- dst_release(dst);
+ ret = addr6_resolve(src_in, dst_in, addr, &dst);
}
+ if (ret) {
+ rcu_read_unlock();
+ goto done;
+ }
+ ret = rdma_set_src_addr_rcu(addr, dst_in, dst);
+ rcu_read_unlock();
- addr->bound_dev_if = ndev->ifindex;
- addr->net = dev_net(ndev);
- dev_put(ndev);
+ /*
+ * Resolve neighbor destination address if requested and
+ * only if src addr translation didn't fail.
+ */
+ if (!ret && resolve_neigh)
+ ret = addr_resolve_neigh(dst, dst_in, addr, seq);
+ if (src_in->sa_family == AF_INET)
+ ip_rt_put(rt);
+ else
+ dst_release(dst);
+done:
+ /*
+ * Clear the addr net to go back to its original state, only if it was
+ * derived from GID attribute in this context.
+ */
+ if (resolve_by_gid_attr)
+ rdma_addr_set_net_defaults(addr);
return ret;
}
-static void process_req(struct work_struct *work)
+static void process_one_req(struct work_struct *_work)
{
- struct addr_req *req, *temp_req;
+ struct addr_req *req;
struct sockaddr *src_in, *dst_in;
- struct list_head done_list;
- INIT_LIST_HEAD(&done_list);
-
- mutex_lock(&lock);
- list_for_each_entry_safe(req, temp_req, &req_list, list) {
- if (req->status == -ENODATA) {
- src_in = (struct sockaddr *) &req->src_addr;
- dst_in = (struct sockaddr *) &req->dst_addr;
- req->status = addr_resolve(src_in, dst_in, req->addr,
- true, req->seq);
- if (req->status && time_after_eq(jiffies, req->timeout))
- req->status = -ETIMEDOUT;
- else if (req->status == -ENODATA)
- continue;
+ req = container_of(_work, struct addr_req, work.work);
+
+ if (req->status == -ENODATA) {
+ src_in = (struct sockaddr *)&req->src_addr;
+ dst_in = (struct sockaddr *)&req->dst_addr;
+ req->status = addr_resolve(src_in, dst_in, req->addr,
+ true, req->resolve_by_gid_attr,
+ req->seq);
+ if (req->status && time_after_eq(jiffies, req->timeout)) {
+ req->status = -ETIMEDOUT;
+ } else if (req->status == -ENODATA) {
+ /* requeue the work for retrying again */
+ spin_lock_bh(&lock);
+ if (!list_empty(&req->list))
+ set_timeout(req, req->timeout);
+ spin_unlock_bh(&lock);
+ return;
}
- list_move_tail(&req->list, &done_list);
}
- if (!list_empty(&req_list)) {
- req = list_entry(req_list.next, struct addr_req, list);
- set_timeout(req->timeout);
- }
- mutex_unlock(&lock);
+ req->callback(req->status, (struct sockaddr *)&req->src_addr,
+ req->addr, req->context);
+ req->callback = NULL;
- list_for_each_entry_safe(req, temp_req, &done_list, list) {
- list_del(&req->list);
- req->callback(req->status, (struct sockaddr *) &req->src_addr,
- req->addr, req->context);
- put_client(req->client);
+ spin_lock_bh(&lock);
+ /*
+ * Although the work will normally have been canceled by the workqueue,
+ * it can still be requeued as long as it is on the req_list.
+ */
+ cancel_delayed_work(&req->work);
+ if (!list_empty(&req->list)) {
+ list_del_init(&req->list);
kfree(req);
}
+ spin_unlock_bh(&lock);
}
-int rdma_resolve_ip(struct rdma_addr_client *client,
- struct sockaddr *src_addr, struct sockaddr *dst_addr,
- struct rdma_dev_addr *addr, int timeout_ms,
+int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
+ struct rdma_dev_addr *addr, unsigned long timeout_ms,
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context),
- void *context)
+ bool resolve_by_gid_attr, void *context)
{
struct sockaddr *src_in, *dst_in;
struct addr_req *req;
@@ -632,11 +681,12 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
req->addr = addr;
req->callback = callback;
req->context = context;
- req->client = client;
- atomic_inc(&client->refcount);
+ req->resolve_by_gid_attr = resolve_by_gid_attr;
+ INIT_DELAYED_WORK(&req->work, process_one_req);
req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
- req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
+ req->status = addr_resolve(src_in, dst_in, addr, true,
+ req->resolve_by_gid_attr, req->seq);
switch (req->status) {
case 0:
req->timeout = jiffies;
@@ -648,7 +698,6 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
break;
default:
ret = req->status;
- atomic_dec(&client->refcount);
goto err;
}
return ret;
@@ -658,46 +707,87 @@ err:
}
EXPORT_SYMBOL(rdma_resolve_ip);
-int rdma_resolve_ip_route(struct sockaddr *src_addr,
- const struct sockaddr *dst_addr,
- struct rdma_dev_addr *addr)
+int roce_resolve_route_from_path(struct sa_path_rec *rec,
+ const struct ib_gid_attr *attr)
{
- struct sockaddr_storage ssrc_addr = {};
- struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid, dgid;
+ struct rdma_dev_addr dev_addr = {};
+ int ret;
- if (src_addr) {
- if (src_addr->sa_family != dst_addr->sa_family)
- return -EINVAL;
+ might_sleep();
- memcpy(src_in, src_addr, rdma_addr_size(src_addr));
- } else {
- src_in->sa_family = dst_addr->sa_family;
- }
+ if (rec->roce.route_resolved)
+ return 0;
- return addr_resolve(src_in, dst_addr, addr, false, 0);
+ rdma_gid2ip((struct sockaddr *)&sgid, &rec->sgid);
+ rdma_gid2ip((struct sockaddr *)&dgid, &rec->dgid);
+
+ if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family)
+ return -EINVAL;
+
+ if (!attr || !attr->ndev)
+ return -EINVAL;
+
+ dev_addr.net = &init_net;
+ dev_addr.sgid_attr = attr;
+
+ ret = addr_resolve((struct sockaddr *)&sgid, (struct sockaddr *)&dgid,
+ &dev_addr, false, true, 0);
+ if (ret)
+ return ret;
+
+ if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
+ dev_addr.network == RDMA_NETWORK_IPV6) &&
+ rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
+ return -EINVAL;
+
+ rec->roce.route_resolved = true;
+ return 0;
}
-EXPORT_SYMBOL(rdma_resolve_ip_route);
+/**
+ * rdma_addr_cancel - Cancel resolve ip request
+ * @addr: Pointer to address structure given previously
+ * during rdma_resolve_ip().
+ * rdma_addr_cancel() is synchronous function which cancels any pending
+ * request if there is any.
+ */
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
struct addr_req *req, *temp_req;
+ struct addr_req *found = NULL;
- mutex_lock(&lock);
+ spin_lock_bh(&lock);
list_for_each_entry_safe(req, temp_req, &req_list, list) {
if (req->addr == addr) {
- req->status = -ECANCELED;
- req->timeout = jiffies;
- list_move(&req->list, &req_list);
- set_timeout(req->timeout);
+ /*
+ * Removing from the list means we take ownership of
+ * the req
+ */
+ list_del_init(&req->list);
+ found = req;
break;
}
}
- mutex_unlock(&lock);
+ spin_unlock_bh(&lock);
+
+ if (!found)
+ return;
+
+ /*
+ * sync canceling the work after removing it from the req_list
+ * guarentees no work is running and none will be started.
+ */
+ cancel_delayed_work_sync(&found->work);
+ kfree(found);
}
EXPORT_SYMBOL(rdma_addr_cancel);
struct resolve_cb_context {
- struct rdma_dev_addr *addr;
struct completion comp;
int status;
};
@@ -705,42 +795,34 @@ struct resolve_cb_context {
static void resolve_cb(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context)
{
- if (!status)
- memcpy(((struct resolve_cb_context *)context)->addr,
- addr, sizeof(struct rdma_dev_addr));
((struct resolve_cb_context *)context)->status = status;
complete(&((struct resolve_cb_context *)context)->comp);
}
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid,
- u8 *dmac, u16 *vlan_id, int *if_index,
+ u8 *dmac, const struct ib_gid_attr *sgid_attr,
int *hoplimit)
{
- int ret = 0;
struct rdma_dev_addr dev_addr;
struct resolve_cb_context ctx;
- struct net_device *dev;
-
union {
- struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
+ int ret;
-
- rdma_gid2ip(&sgid_addr._sockaddr, sgid);
- rdma_gid2ip(&dgid_addr._sockaddr, dgid);
+ rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid);
+ rdma_gid2ip((struct sockaddr *)&dgid_addr, dgid);
memset(&dev_addr, 0, sizeof(dev_addr));
- if (if_index)
- dev_addr.bound_dev_if = *if_index;
dev_addr.net = &init_net;
+ dev_addr.sgid_attr = sgid_attr;
- ctx.addr = &dev_addr;
init_completion(&ctx.comp);
- ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
- &dev_addr, 1000, resolve_cb, &ctx);
+ ret = rdma_resolve_ip((struct sockaddr *)&sgid_addr,
+ (struct sockaddr *)&dgid_addr, &dev_addr, 1000,
+ resolve_cb, true, &ctx);
if (ret)
return ret;
@@ -751,51 +833,23 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
return ret;
memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
- dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
- if (!dev)
- return -ENODEV;
- if (if_index)
- *if_index = dev_addr.bound_dev_if;
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(dev);
- if (hoplimit)
- *hoplimit = dev_addr.hoplimit;
- dev_put(dev);
- return ret;
-}
-EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
-
-int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
-{
- int ret = 0;
- struct rdma_dev_addr dev_addr;
- union {
- struct sockaddr _sockaddr;
- struct sockaddr_in _sockaddr_in;
- struct sockaddr_in6 _sockaddr_in6;
- } gid_addr;
-
- rdma_gid2ip(&gid_addr._sockaddr, sgid);
-
- memset(&dev_addr, 0, sizeof(dev_addr));
- dev_addr.net = &init_net;
- ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
- if (ret)
- return ret;
-
- memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
- return ret;
+ *hoplimit = dev_addr.hoplimit;
+ return 0;
}
-EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
{
+ struct addr_req *req;
+
if (event == NETEVENT_NEIGH_UPDATE) {
struct neighbour *neigh = ctx;
if (neigh->nud_state & NUD_VALID) {
- set_timeout(jiffies);
+ spin_lock_bh(&lock);
+ list_for_each_entry(req, &req_list, list)
+ set_timeout(req, jiffies);
+ spin_unlock_bh(&lock);
}
}
return 0;
@@ -807,19 +861,18 @@ static struct notifier_block nb = {
int addr_init(void)
{
- addr_wq = alloc_workqueue("ib_addr", WQ_MEM_RECLAIM, 0);
+ addr_wq = alloc_ordered_workqueue("ib_addr", 0);
if (!addr_wq)
return -ENOMEM;
register_netevent_notifier(&nb);
- rdma_addr_register_client(&self);
return 0;
}
void addr_cleanup(void)
{
- rdma_addr_unregister_client(&self);
unregister_netevent_notifier(&nb);
destroy_workqueue(addr_wq);
+ WARN_ON(!list_empty(&req_list));
}
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 11dacd97a667..25a060a28301 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -59,7 +59,16 @@ __ib_get_agent_port(const struct ib_device *device, int port_num)
struct ib_agent_port_private *entry;
list_for_each_entry(entry, &ib_agent_port_list, port_list) {
- if (entry->agent[1]->device == device &&
+ /* Need to check both agent[0] and agent[1], as an agent port
+ * may only have one of them
+ */
+ if (entry->agent[0] &&
+ entry->agent[0]->device == device &&
+ entry->agent[0]->port_num == port_num)
+ return entry;
+
+ if (entry->agent[1] &&
+ entry->agent[1]->device == device &&
entry->agent[1]->port_num == port_num)
return entry;
}
@@ -101,8 +110,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *
agent = port_priv->agent[qpn];
ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
if (IS_ERR(ah)) {
- dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n",
- PTR_ERR(ah));
+ dev_err(&device->dev, "ib_create_ah_from_wc error %pe\n", ah);
return;
}
@@ -137,13 +145,13 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *
err2:
ib_free_send_mad(send_buf);
err1:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
}
static void agent_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
- ib_destroy_ah(mad_send_wc->send_buf->ah);
+ rdma_destroy_ah(mad_send_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -172,14 +180,16 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
}
}
- /* Obtain send only MAD agent for GSI QP */
- port_priv->agent[1] = ib_register_mad_agent(device, port_num,
- IB_QPT_GSI, NULL, 0,
- &agent_send_handler,
- NULL, NULL, 0);
- if (IS_ERR(port_priv->agent[1])) {
- ret = PTR_ERR(port_priv->agent[1]);
- goto error3;
+ if (rdma_cap_ib_cm(device, port_num)) {
+ /* Obtain send only MAD agent for GSI QP */
+ port_priv->agent[1] = ib_register_mad_agent(device, port_num,
+ IB_QPT_GSI, NULL, 0,
+ &agent_send_handler,
+ NULL, NULL, 0);
+ if (IS_ERR(port_priv->agent[1])) {
+ ret = PTR_ERR(port_priv->agent[1]);
+ goto error3;
+ }
}
spin_lock_irqsave(&ib_agent_port_list_lock, flags);
@@ -212,7 +222,8 @@ int ib_agent_port_close(struct ib_device *device, int port_num)
list_del(&port_priv->port_list);
spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
- ib_unregister_mad_agent(port_priv->agent[1]);
+ if (port_priv->agent[1])
+ ib_unregister_mad_agent(port_priv->agent[1]);
if (port_priv->agent[0])
ib_unregister_mad_agent(port_priv->agent[0]);
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index ae04826e82fc..81cf3c902e81 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -33,7 +33,7 @@
* SOFTWARE.
*/
-#include <linux/module.h>
+#include <linux/if_vlan.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
@@ -46,20 +46,18 @@
struct ib_pkey_cache {
int table_len;
- u16 table[0];
+ u16 table[] __counted_by(table_len);
};
struct ib_update_work {
struct work_struct work;
- struct ib_device *device;
- u8 port_num;
+ struct ib_event event;
+ bool enforce_security;
};
union ib_gid zgid;
EXPORT_SYMBOL(zgid);
-static const struct ib_gid_attr zattr;
-
enum gid_attr_find_mask {
GID_ATTR_FIND_MASK_GID = 1UL << 0,
GID_ATTR_FIND_MASK_NETDEV = 1UL << 1,
@@ -67,29 +65,39 @@ enum gid_attr_find_mask {
GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3,
};
-enum gid_table_entry_props {
- GID_TABLE_ENTRY_INVALID = 1UL << 0,
- GID_TABLE_ENTRY_DEFAULT = 1UL << 1,
+enum gid_table_entry_state {
+ GID_TABLE_ENTRY_INVALID = 1,
+ GID_TABLE_ENTRY_VALID = 2,
+ /*
+ * Indicates that entry is pending to be removed, there may
+ * be active users of this GID entry.
+ * When last user of the GID entry releases reference to it,
+ * GID entry is detached from the table.
+ */
+ GID_TABLE_ENTRY_PENDING_DEL = 3,
};
-enum gid_table_write_action {
- GID_TABLE_WRITE_ACTION_ADD,
- GID_TABLE_WRITE_ACTION_DEL,
- /* MODIFY only updates the GID table. Currently only used by
- * ib_cache_update.
- */
- GID_TABLE_WRITE_ACTION_MODIFY
+struct roce_gid_ndev_storage {
+ struct rcu_head rcu_head;
+ struct net_device *ndev;
};
struct ib_gid_table_entry {
- unsigned long props;
- union ib_gid gid;
- struct ib_gid_attr attr;
- void *context;
+ struct kref kref;
+ struct work_struct del_work;
+ struct ib_gid_attr attr;
+ void *context;
+ /* Store the ndev pointer to release reference later on in
+ * call_rcu context because by that time gid_table_entry
+ * and attr might be already freed. So keep a copy of it.
+ * ndev_storage is freed by rcu callback.
+ */
+ struct roce_gid_ndev_storage *ndev_storage;
+ enum gid_table_entry_state state;
};
struct ib_gid_table {
- int sz;
+ int sz;
/* In RoCE, adding a GID to the table requires:
* (a) Find if this GID is already exists.
* (b) Find a free space.
@@ -99,35 +107,37 @@ struct ib_gid_table {
* (a) Find the GID
* (b) Delete it.
*
- * Add/delete should be carried out atomically.
- * This is done by locking this mutex from multiple
- * writers. We don't need this lock for IB, as the MAD
- * layer replaces all entries. All data_vec entries
- * are locked by this lock.
**/
- struct mutex lock;
- /* This lock protects the table entries from being
- * read and written simultaneously.
+ /* Any writer to data_vec must hold this lock and the write side of
+ * rwlock. Readers must hold only rwlock. All writers must be in a
+ * sleepable context.
+ */
+ struct mutex lock;
+ /* rwlock protects data_vec[ix]->state and entry pointer.
*/
- rwlock_t rwlock;
- struct ib_gid_table_entry *data_vec;
+ rwlock_t rwlock;
+ struct ib_gid_table_entry **data_vec;
+ /* bit field, each bit indicates the index of default GID */
+ u32 default_gid_indices;
};
-static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
+static void dispatch_gid_change_event(struct ib_device *ib_dev, u32 port)
{
- if (rdma_cap_roce_gid_table(ib_dev, port)) {
- struct ib_event event;
+ struct ib_event event;
- event.device = ib_dev;
- event.element.port_num = port;
- event.event = IB_EVENT_GID_CHANGE;
+ event.device = ib_dev;
+ event.element.port_num = port;
+ event.event = IB_EVENT_GID_CHANGE;
- ib_dispatch_event(&event);
- }
+ ib_dispatch_event_clients(&event);
}
static const char * const gid_type_str[] = {
+ /* IB/RoCE v1 value is set for IB_GID_TYPE_IB and IB_GID_TYPE_ROCE for
+ * user space compatibility reasons.
+ */
[IB_GID_TYPE_IB] = "IB/RoCE v1",
+ [IB_GID_TYPE_ROCE] = "IB/RoCE v1",
[IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2",
};
@@ -140,6 +150,29 @@ const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
}
EXPORT_SYMBOL(ib_cache_gid_type_str);
+/** rdma_is_zero_gid - Check if given GID is zero or not.
+ * @gid: GID to check
+ * Returns true if given GID is zero, returns false otherwise.
+ */
+bool rdma_is_zero_gid(const union ib_gid *gid)
+{
+ return !memcmp(gid, &zgid, sizeof(*gid));
+}
+EXPORT_SYMBOL(rdma_is_zero_gid);
+
+/** is_gid_index_default - Check if a given index belongs to
+ * reserved default GIDs or not.
+ * @table: GID table pointer
+ * @index: Index to check in GID table
+ * Returns true if index is one of the reserved default GID index otherwise
+ * returns false.
+ */
+static bool is_gid_index_default(const struct ib_gid_table *table,
+ unsigned int index)
+{
+ return index < 32 && (BIT(index) & table->default_gid_indices);
+}
+
int ib_cache_gid_parse_type_str(const char *buf)
{
unsigned int i;
@@ -164,94 +197,272 @@ int ib_cache_gid_parse_type_str(const char *buf)
}
EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
-/* This function expects that rwlock will be write locked in all
- * scenarios and that lock will be locked in sleep-able (RoCE)
- * scenarios.
- */
-static int write_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- enum gid_table_write_action action,
- bool default_gid)
- __releases(&table->rwlock) __acquires(&table->rwlock)
+static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u32 port)
{
- int ret = 0;
- struct net_device *old_net_dev;
- enum ib_gid_type old_gid_type;
+ return device->port_data[port].cache.gid;
+}
+
+static bool is_gid_entry_free(const struct ib_gid_table_entry *entry)
+{
+ return !entry;
+}
+
+static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry)
+{
+ return entry && entry->state == GID_TABLE_ENTRY_VALID;
+}
- /* in rdma_cap_roce_gid_table, this funciton should be protected by a
- * sleep-able lock.
+static void schedule_free_gid(struct kref *kref)
+{
+ struct ib_gid_table_entry *entry =
+ container_of(kref, struct ib_gid_table_entry, kref);
+
+ queue_work(ib_wq, &entry->del_work);
+}
+
+static void put_gid_ndev(struct rcu_head *head)
+{
+ struct roce_gid_ndev_storage *storage =
+ container_of(head, struct roce_gid_ndev_storage, rcu_head);
+
+ WARN_ON(!storage->ndev);
+ /* At this point its safe to release netdev reference,
+ * as all callers working on gid_attr->ndev are done
+ * using this netdev.
*/
+ dev_put(storage->ndev);
+ kfree(storage);
+}
- if (rdma_cap_roce_gid_table(ib_dev, port)) {
- table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
- write_unlock_irq(&table->rwlock);
- /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
- * RoCE providers and thus only updates the cache.
- */
- if (action == GID_TABLE_WRITE_ACTION_ADD)
- ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr,
- &table->data_vec[ix].context);
- else if (action == GID_TABLE_WRITE_ACTION_DEL)
- ret = ib_dev->del_gid(ib_dev, port, ix,
- &table->data_vec[ix].context);
- write_lock_irq(&table->rwlock);
- }
+static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
+{
+ struct ib_device *device = entry->attr.device;
+ u32 port_num = entry->attr.port_num;
+ struct ib_gid_table *table = rdma_gid_table(device, port_num);
- old_net_dev = table->data_vec[ix].attr.ndev;
- old_gid_type = table->data_vec[ix].attr.gid_type;
- if (old_net_dev && old_net_dev != attr->ndev)
- dev_put(old_net_dev);
- /* if modify_gid failed, just delete the old gid */
- if (ret || action == GID_TABLE_WRITE_ACTION_DEL) {
- gid = &zgid;
- attr = &zattr;
- table->data_vec[ix].context = NULL;
- }
+ dev_dbg(&device->dev, "%s port=%u index=%u gid %pI6\n", __func__,
+ port_num, entry->attr.index, entry->attr.gid.raw);
+
+ write_lock_irq(&table->rwlock);
+
+ /*
+ * The only way to avoid overwriting NULL in table is
+ * by comparing if it is same entry in table or not!
+ * If new entry in table is added by the time we free here,
+ * don't overwrite the table entry.
+ */
+ if (entry == table->data_vec[entry->attr.index])
+ table->data_vec[entry->attr.index] = NULL;
+ /* Now this index is ready to be allocated */
+ write_unlock_irq(&table->rwlock);
+
+ if (entry->ndev_storage)
+ call_rcu(&entry->ndev_storage->rcu_head, put_gid_ndev);
+ kfree(entry);
+}
+
+static void free_gid_entry(struct kref *kref)
+{
+ struct ib_gid_table_entry *entry =
+ container_of(kref, struct ib_gid_table_entry, kref);
+
+ free_gid_entry_locked(entry);
+}
+
+/**
+ * free_gid_work - Release reference to the GID entry
+ * @work: Work structure to refer to GID entry which needs to be
+ * deleted.
+ *
+ * free_gid_work() frees the entry from the HCA's hardware table
+ * if provider supports it. It releases reference to netdevice.
+ */
+static void free_gid_work(struct work_struct *work)
+{
+ struct ib_gid_table_entry *entry =
+ container_of(work, struct ib_gid_table_entry, del_work);
+ struct ib_device *device = entry->attr.device;
+ u32 port_num = entry->attr.port_num;
+ struct ib_gid_table *table = rdma_gid_table(device, port_num);
+
+ mutex_lock(&table->lock);
+ free_gid_entry_locked(entry);
+ mutex_unlock(&table->lock);
+}
- memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid));
- memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr));
- if (default_gid) {
- table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
- if (action == GID_TABLE_WRITE_ACTION_DEL)
- table->data_vec[ix].attr.gid_type = old_gid_type;
+static struct ib_gid_table_entry *
+alloc_gid_entry(const struct ib_gid_attr *attr)
+{
+ struct ib_gid_table_entry *entry;
+ struct net_device *ndev;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+
+ ndev = rcu_dereference_protected(attr->ndev, 1);
+ if (ndev) {
+ entry->ndev_storage = kzalloc(sizeof(*entry->ndev_storage),
+ GFP_KERNEL);
+ if (!entry->ndev_storage) {
+ kfree(entry);
+ return NULL;
+ }
+ dev_hold(ndev);
+ entry->ndev_storage->ndev = ndev;
}
- if (table->data_vec[ix].attr.ndev &&
- table->data_vec[ix].attr.ndev != old_net_dev)
- dev_hold(table->data_vec[ix].attr.ndev);
+ kref_init(&entry->kref);
+ memcpy(&entry->attr, attr, sizeof(*attr));
+ INIT_WORK(&entry->del_work, free_gid_work);
+ entry->state = GID_TABLE_ENTRY_INVALID;
+ return entry;
+}
- table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
+static void store_gid_entry(struct ib_gid_table *table,
+ struct ib_gid_table_entry *entry)
+{
+ entry->state = GID_TABLE_ENTRY_VALID;
- return ret;
+ dev_dbg(&entry->attr.device->dev, "%s port=%u index=%u gid %pI6\n",
+ __func__, entry->attr.port_num, entry->attr.index,
+ entry->attr.gid.raw);
+
+ lockdep_assert_held(&table->lock);
+ write_lock_irq(&table->rwlock);
+ table->data_vec[entry->attr.index] = entry;
+ write_unlock_irq(&table->rwlock);
+}
+
+static void get_gid_entry(struct ib_gid_table_entry *entry)
+{
+ kref_get(&entry->kref);
}
-static int add_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- bool default_gid) {
- return write_gid(ib_dev, port, table, ix, gid, attr,
- GID_TABLE_WRITE_ACTION_ADD, default_gid);
+static void put_gid_entry(struct ib_gid_table_entry *entry)
+{
+ kref_put(&entry->kref, schedule_free_gid);
}
-static int modify_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- bool default_gid) {
- return write_gid(ib_dev, port, table, ix, gid, attr,
- GID_TABLE_WRITE_ACTION_MODIFY, default_gid);
+static void put_gid_entry_locked(struct ib_gid_table_entry *entry)
+{
+ kref_put(&entry->kref, free_gid_entry);
+}
+
+static int add_roce_gid(struct ib_gid_table_entry *entry)
+{
+ const struct ib_gid_attr *attr = &entry->attr;
+ int ret;
+
+ if (!attr->ndev) {
+ dev_err(&attr->device->dev, "%s NULL netdev port=%u index=%u\n",
+ __func__, attr->port_num, attr->index);
+ return -EINVAL;
+ }
+ if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
+ ret = attr->device->ops.add_gid(attr, &entry->context);
+ if (ret) {
+ dev_err(&attr->device->dev,
+ "%s GID add failed port=%u index=%u\n",
+ __func__, attr->port_num, attr->index);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/**
+ * del_gid - Delete GID table entry
+ *
+ * @ib_dev: IB device whose GID entry to be deleted
+ * @port: Port number of the IB device
+ * @table: GID table of the IB device for a port
+ * @ix: GID entry index to delete
+ *
+ */
+static void del_gid(struct ib_device *ib_dev, u32 port,
+ struct ib_gid_table *table, int ix)
+{
+ struct roce_gid_ndev_storage *ndev_storage;
+ struct ib_gid_table_entry *entry;
+
+ lockdep_assert_held(&table->lock);
+
+ dev_dbg(&ib_dev->dev, "%s port=%u index=%d gid %pI6\n", __func__, port,
+ ix, table->data_vec[ix]->attr.gid.raw);
+
+ write_lock_irq(&table->rwlock);
+ entry = table->data_vec[ix];
+ entry->state = GID_TABLE_ENTRY_PENDING_DEL;
+ /*
+ * For non RoCE protocol, GID entry slot is ready to use.
+ */
+ if (!rdma_protocol_roce(ib_dev, port))
+ table->data_vec[ix] = NULL;
+ write_unlock_irq(&table->rwlock);
+
+ if (rdma_cap_roce_gid_table(ib_dev, port))
+ ib_dev->ops.del_gid(&entry->attr, &entry->context);
+
+ ndev_storage = entry->ndev_storage;
+ if (ndev_storage) {
+ entry->ndev_storage = NULL;
+ rcu_assign_pointer(entry->attr.ndev, NULL);
+ call_rcu(&ndev_storage->rcu_head, put_gid_ndev);
+ }
+
+ put_gid_entry_locked(entry);
}
-static int del_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- bool default_gid) {
- return write_gid(ib_dev, port, table, ix, &zgid, &zattr,
- GID_TABLE_WRITE_ACTION_DEL, default_gid);
+/**
+ * add_modify_gid - Add or modify GID table entry
+ *
+ * @table: GID table in which GID to be added or modified
+ * @attr: Attributes of the GID
+ *
+ * Returns 0 on success or appropriate error code. It accepts zero
+ * GID addition for non RoCE ports for HCA's who report them as valid
+ * GID. However such zero GIDs are not added to the cache.
+ */
+static int add_modify_gid(struct ib_gid_table *table,
+ const struct ib_gid_attr *attr)
+{
+ struct ib_gid_table_entry *entry;
+ int ret = 0;
+
+ /*
+ * Invalidate any old entry in the table to make it safe to write to
+ * this index.
+ */
+ if (is_gid_entry_valid(table->data_vec[attr->index]))
+ del_gid(attr->device, attr->port_num, table, attr->index);
+
+ /*
+ * Some HCA's report multiple GID entries with only one valid GID, and
+ * leave other unused entries as the zero GID. Convert zero GIDs to
+ * empty table entries instead of storing them.
+ */
+ if (rdma_is_zero_gid(&attr->gid))
+ return 0;
+
+ entry = alloc_gid_entry(attr);
+ if (!entry)
+ return -ENOMEM;
+
+ if (rdma_protocol_roce(attr->device, attr->port_num)) {
+ ret = add_roce_gid(entry);
+ if (ret)
+ goto done;
+ }
+
+ store_gid_entry(table, entry);
+ return 0;
+
+done:
+ put_gid_entry(entry);
+ return ret;
}
-/* rwlock should be read locked */
+/* rwlock should be read locked, or lock should be held */
static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
const struct ib_gid_attr *val, bool default_gid,
unsigned long mask, int *pempty)
@@ -261,30 +472,52 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
int empty = pempty ? -1 : 0;
while (i < table->sz && (found < 0 || empty < 0)) {
- struct ib_gid_table_entry *data = &table->data_vec[i];
- struct ib_gid_attr *attr = &data->attr;
+ struct ib_gid_table_entry *data = table->data_vec[i];
+ struct ib_gid_attr *attr;
int curr_index = i;
i++;
- if (data->props & GID_TABLE_ENTRY_INVALID)
- continue;
-
- if (empty < 0)
- if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
- !memcmp(attr, &zattr, sizeof(*attr)) &&
- !data->props)
+ /* find_gid() is used during GID addition where it is expected
+ * to return a free entry slot which is not duplicate.
+ * Free entry slot is requested and returned if pempty is set,
+ * so lookup free slot only if requested.
+ */
+ if (pempty && empty < 0) {
+ if (is_gid_entry_free(data) &&
+ default_gid ==
+ is_gid_index_default(table, curr_index)) {
+ /*
+ * Found an invalid (free) entry; allocate it.
+ * If default GID is requested, then our
+ * found slot must be one of the DEFAULT
+ * reserved slots or we fail.
+ * This ensures that only DEFAULT reserved
+ * slots are used for default property GIDs.
+ */
empty = curr_index;
+ }
+ }
+
+ /*
+ * Additionally find_gid() is used to find valid entry during
+ * lookup operation; so ignore the entries which are marked as
+ * pending for removal and the entries which are marked as
+ * invalid.
+ */
+ if (!is_gid_entry_valid(data))
+ continue;
if (found >= 0)
continue;
+ attr = &data->attr;
if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
attr->gid_type != val->gid_type)
continue;
if (mask & GID_ATTR_FIND_MASK_GID &&
- memcmp(gid, &data->gid, sizeof(*gid)))
+ memcmp(gid, &data->attr.gid, sizeof(*gid)))
continue;
if (mask & GID_ATTR_FIND_MASK_NETDEV &&
@@ -292,8 +525,7 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
continue;
if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
- !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
- default_gid)
+ is_gid_index_default(table, curr_index) != default_gid)
continue;
found = curr_index;
@@ -311,43 +543,27 @@ static void make_default_gid(struct net_device *dev, union ib_gid *gid)
addrconf_ifid_eui48(&gid->raw[8], dev);
}
-int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
- union ib_gid *gid, struct ib_gid_attr *attr)
+static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
+ union ib_gid *gid, struct ib_gid_attr *attr,
+ unsigned long mask, bool default_gid)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
- int ix;
int ret = 0;
- struct net_device *idev;
int empty;
+ int ix;
- table = ports_table[port - rdma_start_port(ib_dev)];
-
- if (!memcmp(gid, &zgid, sizeof(*gid)))
+ /* Do not allow adding zero GID in support of
+ * IB spec version 1.3 section 4.1.1 point (6) and
+ * section 12.7.10 and section 12.7.20
+ */
+ if (rdma_is_zero_gid(gid))
return -EINVAL;
- if (ib_dev->get_netdev) {
- idev = ib_dev->get_netdev(ib_dev, port);
- if (idev && attr->ndev != idev) {
- union ib_gid default_gid;
-
- /* Adding default GIDs in not permitted */
- make_default_gid(idev, &default_gid);
- if (!memcmp(gid, &default_gid, sizeof(*gid))) {
- dev_put(idev);
- return -EPERM;
- }
- }
- if (idev)
- dev_put(idev);
- }
+ table = rdma_gid_table(ib_dev, port);
mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
- ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_NETDEV, &empty);
+ ix = find_gid(table, gid, attr, default_gid, mask, &empty);
if (ix >= 0)
goto out_unlock;
@@ -355,68 +571,92 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
ret = -ENOSPC;
goto out_unlock;
}
-
- ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
+ attr->device = ib_dev;
+ attr->index = empty;
+ attr->port_num = port;
+ attr->gid = *gid;
+ ret = add_modify_gid(table, attr);
if (!ret)
dispatch_gid_change_event(ib_dev, port);
out_unlock:
- write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
+ if (ret)
+ pr_warn_ratelimited("%s: unable to add gid %pI6 error=%d\n",
+ __func__, gid->raw, ret);
return ret;
}
-int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
+ unsigned long mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_NETDEV;
+
+ return __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
+}
+
+static int
+_ib_cache_gid_del(struct ib_device *ib_dev, u32 port,
+ union ib_gid *gid, struct ib_gid_attr *attr,
+ unsigned long mask, bool default_gid)
+{
struct ib_gid_table *table;
+ int ret = 0;
int ix;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = rdma_gid_table(ib_dev, port);
mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
- ix = find_gid(table, gid, attr, false,
- GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_NETDEV |
- GID_ATTR_FIND_MASK_DEFAULT,
- NULL);
- if (ix < 0)
+ ix = find_gid(table, gid, attr, default_gid, mask, NULL);
+ if (ix < 0) {
+ ret = -EINVAL;
goto out_unlock;
+ }
- if (!del_gid(ib_dev, port, table, ix, false))
- dispatch_gid_change_event(ib_dev, port);
+ del_gid(ib_dev, port, table, ix);
+ dispatch_gid_change_event(ib_dev, port);
out_unlock:
- write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
- return 0;
+ if (ret)
+ pr_debug("%s: can't delete gid %pI6 error=%d\n",
+ __func__, gid->raw, ret);
+ return ret;
+}
+
+int ib_cache_gid_del(struct ib_device *ib_dev, u32 port,
+ union ib_gid *gid, struct ib_gid_attr *attr)
+{
+ unsigned long mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_DEFAULT |
+ GID_ATTR_FIND_MASK_NETDEV;
+
+ return _ib_cache_gid_del(ib_dev, port, gid, attr, mask, false);
}
-int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u32 port,
struct net_device *ndev)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
bool deleted = false;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = rdma_gid_table(ib_dev, port);
mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
- for (ix = 0; ix < table->sz; ix++)
- if (table->data_vec[ix].attr.ndev == ndev)
- if (!del_gid(ib_dev, port, table, ix,
- !!(table->data_vec[ix].props &
- GID_TABLE_ENTRY_DEFAULT)))
- deleted = true;
+ for (ix = 0; ix < table->sz; ix++) {
+ if (is_gid_entry_valid(table->data_vec[ix]) &&
+ table->data_vec[ix]->attr.ndev == ndev) {
+ del_gid(ib_dev, port, table, ix);
+ deleted = true;
+ }
+ }
- write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
if (deleted)
@@ -425,96 +665,38 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
return 0;
}
-static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
- union ib_gid *gid, struct ib_gid_attr *attr)
-{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
- struct ib_gid_table *table;
-
- table = ports_table[port - rdma_start_port(ib_dev)];
-
- if (index < 0 || index >= table->sz)
- return -EINVAL;
-
- if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
- return -EAGAIN;
-
- memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
- if (attr) {
- memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
- if (attr->ndev)
- dev_hold(attr->ndev);
- }
-
- return 0;
-}
-
-static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
- const union ib_gid *gid,
- const struct ib_gid_attr *val,
- unsigned long mask,
- u8 *port, u16 *index)
-{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
- struct ib_gid_table *table;
- u8 p;
- int local_index;
- unsigned long flags;
-
- for (p = 0; p < ib_dev->phys_port_cnt; p++) {
- table = ports_table[p];
- read_lock_irqsave(&table->rwlock, flags);
- local_index = find_gid(table, gid, val, false, mask, NULL);
- if (local_index >= 0) {
- if (index)
- *index = local_index;
- if (port)
- *port = p + rdma_start_port(ib_dev);
- read_unlock_irqrestore(&table->rwlock, flags);
- return 0;
- }
- read_unlock_irqrestore(&table->rwlock, flags);
- }
-
- return -ENOENT;
-}
-
-static int ib_cache_gid_find(struct ib_device *ib_dev,
- const union ib_gid *gid,
- enum ib_gid_type gid_type,
- struct net_device *ndev, u8 *port,
- u16 *index)
-{
- unsigned long mask = GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_GID_TYPE;
- struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
-
- if (ndev)
- mask |= GID_ATTR_FIND_MASK_NETDEV;
-
- return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
- mask, port, index);
-}
-
-int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
- const union ib_gid *gid,
- enum ib_gid_type gid_type,
- u8 port, struct net_device *ndev,
- u16 *index)
+/**
+ * rdma_find_gid_by_port - Returns the GID entry attributes when it finds
+ * a valid GID entry for given search parameters. It searches for the specified
+ * GID value in the local software cache.
+ * @ib_dev: The device to query.
+ * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
+ * @port: The port number of the device where the GID value should be searched.
+ * @ndev: In RoCE, the net device of the device. NULL means ignore.
+ *
+ * Returns sgid attributes if the GID is found with valid reference or
+ * returns ERR_PTR for the error.
+ * The caller must invoke rdma_put_gid_attr() to release the reference.
+ */
+const struct ib_gid_attr *
+rdma_find_gid_by_port(struct ib_device *ib_dev,
+ const union ib_gid *gid,
+ enum ib_gid_type gid_type,
+ u32 port, struct net_device *ndev)
{
int local_index;
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned long mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE;
struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
+ const struct ib_gid_attr *attr;
unsigned long flags;
- if (port < rdma_start_port(ib_dev) ||
- port > rdma_end_port(ib_dev))
- return -ENOENT;
+ if (!rdma_is_port_valid(ib_dev, port))
+ return ERR_PTR(-ENOENT);
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = rdma_gid_table(ib_dev, port);
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -522,95 +704,74 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
read_lock_irqsave(&table->rwlock, flags);
local_index = find_gid(table, gid, &val, false, mask, NULL);
if (local_index >= 0) {
- if (index)
- *index = local_index;
+ get_gid_entry(table->data_vec[local_index]);
+ attr = &table->data_vec[local_index]->attr;
read_unlock_irqrestore(&table->rwlock, flags);
- return 0;
+ return attr;
}
read_unlock_irqrestore(&table->rwlock, flags);
- return -ENOENT;
+ return ERR_PTR(-ENOENT);
}
-EXPORT_SYMBOL(ib_find_cached_gid_by_port);
+EXPORT_SYMBOL(rdma_find_gid_by_port);
/**
- * ib_find_gid_by_filter - Returns the GID table index where a specified
- * GID value occurs
- * @device: The device to query.
+ * rdma_find_gid_by_filter - Returns the GID table attribute where a
+ * specified GID value occurs
+ * @ib_dev: The device to query.
* @gid: The GID value to search for.
- * @port_num: The port number of the device where the GID value could be
+ * @port: The port number of the device where the GID value could be
* searched.
* @filter: The filter function is executed on any matching GID in the table.
* If the filter function returns true, the corresponding index is returned,
* otherwise, we continue searching the GID table. It's guaranteed that
* while filter is executed, ndev field is valid and the structure won't
* change. filter is executed in an atomic context. filter must not be NULL.
- * @index: The index into the cached GID table where the GID was found. This
- * parameter may be NULL.
+ * @context: Private data to pass into the call-back.
*
- * ib_cache_gid_find_by_filter() searches for the specified GID value
+ * rdma_find_gid_by_filter() searches for the specified GID value
* of which the filter function returns true in the port's GID table.
- * This function is only supported on RoCE ports.
*
*/
-static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
- const union ib_gid *gid,
- u8 port,
- bool (*filter)(const union ib_gid *,
- const struct ib_gid_attr *,
- void *),
- void *context,
- u16 *index)
+const struct ib_gid_attr *rdma_find_gid_by_filter(
+ struct ib_device *ib_dev, const union ib_gid *gid, u32 port,
+ bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
+ void *),
+ void *context)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
+ const struct ib_gid_attr *res = ERR_PTR(-ENOENT);
struct ib_gid_table *table;
- unsigned int i;
unsigned long flags;
- bool found = false;
-
- if (!ports_table)
- return -EOPNOTSUPP;
+ unsigned int i;
- if (port < rdma_start_port(ib_dev) ||
- port > rdma_end_port(ib_dev) ||
- !rdma_protocol_roce(ib_dev, port))
- return -EPROTONOSUPPORT;
+ if (!rdma_is_port_valid(ib_dev, port))
+ return ERR_PTR(-EINVAL);
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = rdma_gid_table(ib_dev, port);
read_lock_irqsave(&table->rwlock, flags);
for (i = 0; i < table->sz; i++) {
- struct ib_gid_attr attr;
-
- if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
- goto next;
-
- if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
- goto next;
+ struct ib_gid_table_entry *entry = table->data_vec[i];
- memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
+ if (!is_gid_entry_valid(entry))
+ continue;
- if (filter(gid, &attr, context))
- found = true;
+ if (memcmp(gid, &entry->attr.gid, sizeof(*gid)))
+ continue;
-next:
- if (found)
+ if (filter(gid, &entry->attr, context)) {
+ get_gid_entry(entry);
+ res = &entry->attr;
break;
+ }
}
read_unlock_irqrestore(&table->rwlock, flags);
-
- if (!found)
- return -ENOENT;
-
- if (index)
- *index = i;
- return 0;
+ return res;
}
static struct ib_gid_table *alloc_gid_table(int sz)
{
- struct ib_gid_table *table =
- kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
+ struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL);
if (!table)
return NULL;
@@ -623,7 +784,6 @@ static struct ib_gid_table *alloc_gid_table(int sz)
table->sz = sz;
rwlock_init(&table->rwlock);
-
return table;
err_free_table:
@@ -631,206 +791,131 @@ err_free_table:
return NULL;
}
-static void release_gid_table(struct ib_gid_table *table)
+static void release_gid_table(struct ib_device *device,
+ struct ib_gid_table *table)
{
- if (table) {
- kfree(table->data_vec);
- kfree(table);
+ int i;
+
+ if (!table)
+ return;
+
+ for (i = 0; i < table->sz; i++) {
+ if (is_gid_entry_free(table->data_vec[i]))
+ continue;
+
+ WARN_ONCE(true,
+ "GID entry ref leak for dev %s index %d ref=%u\n",
+ dev_name(&device->dev), i,
+ kref_read(&table->data_vec[i]->kref));
}
+
+ mutex_destroy(&table->lock);
+ kfree(table->data_vec);
+ kfree(table);
}
-static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
+static void cleanup_gid_table_port(struct ib_device *ib_dev, u32 port,
struct ib_gid_table *table)
{
int i;
- bool deleted = false;
if (!table)
return;
- write_lock_irq(&table->rwlock);
+ mutex_lock(&table->lock);
for (i = 0; i < table->sz; ++i) {
- if (memcmp(&table->data_vec[i].gid, &zgid,
- sizeof(table->data_vec[i].gid)))
- if (!del_gid(ib_dev, port, table, i,
- table->data_vec[i].props &
- GID_ATTR_FIND_MASK_DEFAULT))
- deleted = true;
+ if (is_gid_entry_valid(table->data_vec[i]))
+ del_gid(ib_dev, port, table, i);
}
- write_unlock_irq(&table->rwlock);
-
- if (deleted)
- dispatch_gid_change_event(ib_dev, port);
+ mutex_unlock(&table->lock);
}
-void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
+void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u32 port,
struct net_device *ndev,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
- union ib_gid gid;
+ union ib_gid gid = { };
struct ib_gid_attr gid_attr;
- struct ib_gid_attr zattr_type = zattr;
- struct ib_gid_table *table;
unsigned int gid_type;
+ unsigned long mask;
- table = ports_table[port - rdma_start_port(ib_dev)];
-
- make_default_gid(ndev, &gid);
+ mask = GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_DEFAULT |
+ GID_ATTR_FIND_MASK_NETDEV;
memset(&gid_attr, 0, sizeof(gid_attr));
gid_attr.ndev = ndev;
for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
- int ix;
- union ib_gid current_gid;
- struct ib_gid_attr current_gid_attr = {};
-
if (1UL << gid_type & ~gid_type_mask)
continue;
gid_attr.gid_type = gid_type;
- mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
- ix = find_gid(table, NULL, &gid_attr, true,
- GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_DEFAULT,
- NULL);
-
- /* Coudn't find default GID location */
- if (WARN_ON(ix < 0))
- goto release;
-
- zattr_type.gid_type = gid_type;
-
- if (!__ib_cache_gid_get(ib_dev, port, ix,
- &current_gid, &current_gid_attr) &&
- mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
- !memcmp(&gid, &current_gid, sizeof(gid)) &&
- !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
- goto release;
-
- if (memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
- memcmp(&current_gid_attr, &zattr_type,
- sizeof(current_gid_attr))) {
- if (del_gid(ib_dev, port, table, ix, true)) {
- pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
- ix, gid.raw);
- goto release;
- } else {
- dispatch_gid_change_event(ib_dev, port);
- }
- }
-
if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
- if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
- pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
- gid.raw);
- else
- dispatch_gid_change_event(ib_dev, port);
+ make_default_gid(ndev, &gid);
+ __ib_cache_gid_add(ib_dev, port, &gid,
+ &gid_attr, mask, true);
+ } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) {
+ _ib_cache_gid_del(ib_dev, port, &gid,
+ &gid_attr, mask, true);
}
-
-release:
- if (current_gid_attr.ndev)
- dev_put(current_gid_attr.ndev);
- write_unlock_irq(&table->rwlock);
- mutex_unlock(&table->lock);
}
}
-static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table)
+static void gid_table_reserve_default(struct ib_device *ib_dev, u32 port,
+ struct ib_gid_table *table)
{
unsigned int i;
unsigned long roce_gid_type_mask;
unsigned int num_default_gids;
- unsigned int current_gid = 0;
roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
num_default_gids = hweight_long(roce_gid_type_mask);
- for (i = 0; i < num_default_gids && i < table->sz; i++) {
- struct ib_gid_table_entry *entry =
- &table->data_vec[i];
-
- entry->props |= GID_TABLE_ENTRY_DEFAULT;
- current_gid = find_next_bit(&roce_gid_type_mask,
- BITS_PER_LONG,
- current_gid);
- entry->attr.gid_type = current_gid++;
- }
-
- return 0;
+ /* Reserve starting indices for default GIDs */
+ for (i = 0; i < num_default_gids && i < table->sz; i++)
+ table->default_gid_indices |= BIT(i);
}
-static int _gid_table_setup_one(struct ib_device *ib_dev)
+
+static void gid_table_release_one(struct ib_device *ib_dev)
{
- u8 port;
- struct ib_gid_table **table;
- int err = 0;
+ u32 p;
- table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
+ rdma_for_each_port (ib_dev, p) {
+ release_gid_table(ib_dev, ib_dev->port_data[p].cache.gid);
+ ib_dev->port_data[p].cache.gid = NULL;
+ }
+}
- for (port = 0; port < ib_dev->phys_port_cnt; port++) {
- u8 rdma_port = port + rdma_start_port(ib_dev);
+static int _gid_table_setup_one(struct ib_device *ib_dev)
+{
+ struct ib_gid_table *table;
+ u32 rdma_port;
- table[port] =
- alloc_gid_table(
- ib_dev->port_immutable[rdma_port].gid_tbl_len);
- if (!table[port]) {
- err = -ENOMEM;
+ rdma_for_each_port (ib_dev, rdma_port) {
+ table = alloc_gid_table(
+ ib_dev->port_data[rdma_port].immutable.gid_tbl_len);
+ if (!table)
goto rollback_table_setup;
- }
- err = gid_table_reserve_default(ib_dev,
- port + rdma_start_port(ib_dev),
- table[port]);
- if (err)
- goto rollback_table_setup;
+ gid_table_reserve_default(ib_dev, rdma_port, table);
+ ib_dev->port_data[rdma_port].cache.gid = table;
}
-
- ib_dev->cache.gid_cache = table;
return 0;
rollback_table_setup:
- for (port = 0; port < ib_dev->phys_port_cnt; port++) {
- cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
- table[port]);
- release_gid_table(table[port]);
- }
-
- kfree(table);
- return err;
-}
-
-static void gid_table_release_one(struct ib_device *ib_dev)
-{
- struct ib_gid_table **table = ib_dev->cache.gid_cache;
- u8 port;
-
- if (!table)
- return;
-
- for (port = 0; port < ib_dev->phys_port_cnt; port++)
- release_gid_table(table[port]);
-
- kfree(table);
- ib_dev->cache.gid_cache = NULL;
+ gid_table_release_one(ib_dev);
+ return -ENOMEM;
}
static void gid_table_cleanup_one(struct ib_device *ib_dev)
{
- struct ib_gid_table **table = ib_dev->cache.gid_cache;
- u8 port;
+ u32 p;
- if (!table)
- return;
-
- for (port = 0; port < ib_dev->phys_port_cnt; port++)
- cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
- table[port]);
+ rdma_for_each_port (ib_dev, p)
+ cleanup_gid_table_port(ib_dev, p,
+ ib_dev->port_data[p].cache.gid);
}
static int gid_table_setup_one(struct ib_device *ib_dev)
@@ -842,69 +927,125 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
if (err)
return err;
- err = roce_rescan_device(ib_dev);
-
- if (err) {
- gid_table_cleanup_one(ib_dev);
- gid_table_release_one(ib_dev);
- }
+ rdma_roce_rescan_device(ib_dev);
return err;
}
-int ib_get_cached_gid(struct ib_device *device,
- u8 port_num,
- int index,
- union ib_gid *gid,
- struct ib_gid_attr *gid_attr)
+/**
+ * rdma_query_gid - Read the GID content from the GID software cache
+ * @device: Device to query the GID
+ * @port_num: Port number of the device
+ * @index: Index of the GID table entry to read
+ * @gid: Pointer to GID where to store the entry's GID
+ *
+ * rdma_query_gid() only reads the GID entry content for requested device,
+ * port and index. It reads for IB, RoCE and iWarp link layers. It doesn't
+ * hold any reference to the GID table entry in the HCA or software cache.
+ *
+ * Returns 0 on success or appropriate error code.
+ *
+ */
+int rdma_query_gid(struct ib_device *device, u32 port_num,
+ int index, union ib_gid *gid)
{
- int res;
+ struct ib_gid_table *table;
unsigned long flags;
- struct ib_gid_table **ports_table = device->cache.gid_cache;
- struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)];
+ int res;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
+ table = rdma_gid_table(device, port_num);
read_lock_irqsave(&table->rwlock, flags);
- res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
- read_unlock_irqrestore(&table->rwlock, flags);
+ if (index < 0 || index >= table->sz) {
+ res = -EINVAL;
+ goto done;
+ }
+
+ if (!is_gid_entry_valid(table->data_vec[index])) {
+ res = -ENOENT;
+ goto done;
+ }
+
+ memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid));
+ res = 0;
+
+done:
+ read_unlock_irqrestore(&table->rwlock, flags);
return res;
}
-EXPORT_SYMBOL(ib_get_cached_gid);
-
-int ib_find_cached_gid(struct ib_device *device,
- const union ib_gid *gid,
- enum ib_gid_type gid_type,
- struct net_device *ndev,
- u8 *port_num,
- u16 *index)
+EXPORT_SYMBOL(rdma_query_gid);
+
+/**
+ * rdma_read_gid_hw_context - Read the HW GID context from GID attribute
+ * @attr: Potinter to the GID attribute
+ *
+ * rdma_read_gid_hw_context() reads the drivers GID HW context corresponding
+ * to the SGID attr. Callers are required to already be holding the reference
+ * to an existing GID entry.
+ *
+ * Returns the HW GID context
+ *
+ */
+void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr)
{
- return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
+ return container_of(attr, struct ib_gid_table_entry, attr)->context;
}
-EXPORT_SYMBOL(ib_find_cached_gid);
-
-int ib_find_gid_by_filter(struct ib_device *device,
- const union ib_gid *gid,
- u8 port_num,
- bool (*filter)(const union ib_gid *gid,
- const struct ib_gid_attr *,
- void *),
- void *context, u16 *index)
+EXPORT_SYMBOL(rdma_read_gid_hw_context);
+
+/**
+ * rdma_find_gid - Returns SGID attributes if the matching GID is found.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
+ * @ndev: In RoCE, the net device of the device. NULL means ignore.
+ *
+ * rdma_find_gid() searches for the specified GID value in the software cache.
+ *
+ * Returns GID attributes if a valid GID is found or returns ERR_PTR for the
+ * error. The caller must invoke rdma_put_gid_attr() to release the reference.
+ *
+ */
+const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
+ const union ib_gid *gid,
+ enum ib_gid_type gid_type,
+ struct net_device *ndev)
{
- /* Only RoCE GID table supports filter function */
- if (!rdma_cap_roce_gid_table(device, port_num) && filter)
- return -EPROTONOSUPPORT;
+ unsigned long mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE;
+ struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
+ u32 p;
+
+ if (ndev)
+ mask |= GID_ATTR_FIND_MASK_NETDEV;
+
+ rdma_for_each_port(device, p) {
+ struct ib_gid_table *table;
+ unsigned long flags;
+ int index;
+
+ table = device->port_data[p].cache.gid;
+ read_lock_irqsave(&table->rwlock, flags);
+ index = find_gid(table, gid, &gid_attr_val, false, mask, NULL);
+ if (index >= 0) {
+ const struct ib_gid_attr *attr;
+
+ get_gid_entry(table->data_vec[index]);
+ attr = &table->data_vec[index]->attr;
+ read_unlock_irqrestore(&table->rwlock, flags);
+ return attr;
+ }
+ read_unlock_irqrestore(&table->rwlock, flags);
+ }
- return ib_cache_gid_find_by_filter(device, gid,
- port_num, filter,
- context, index);
+ return ERR_PTR(-ENOENT);
}
-EXPORT_SYMBOL(ib_find_gid_by_filter);
+EXPORT_SYMBOL(rdma_find_gid);
int ib_get_cached_pkey(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
int index,
u16 *pkey)
{
@@ -912,28 +1053,37 @@ int ib_get_cached_pkey(struct ib_device *device,
unsigned long flags;
int ret = 0;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- read_lock_irqsave(&device->cache.lock, flags);
+ read_lock_irqsave(&device->cache_lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->port_data[port_num].cache.pkey;
- if (index < 0 || index >= cache->table_len)
+ if (!cache || index < 0 || index >= cache->table_len)
ret = -EINVAL;
else
*pkey = cache->table[index];
- read_unlock_irqrestore(&device->cache.lock, flags);
+ read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_cached_pkey);
-int ib_find_cached_pkey(struct ib_device *device,
- u8 port_num,
- u16 pkey,
- u16 *index)
+void ib_get_cached_subnet_prefix(struct ib_device *device, u32 port_num,
+ u64 *sn_pfx)
+{
+ unsigned long flags;
+
+ read_lock_irqsave(&device->cache_lock, flags);
+ *sn_pfx = device->port_data[port_num].cache.subnet_prefix;
+ read_unlock_irqrestore(&device->cache_lock, flags);
+}
+EXPORT_SYMBOL(ib_get_cached_subnet_prefix);
+
+int ib_find_cached_pkey(struct ib_device *device, u32 port_num,
+ u16 pkey, u16 *index)
{
struct ib_pkey_cache *cache;
unsigned long flags;
@@ -941,12 +1091,16 @@ int ib_find_cached_pkey(struct ib_device *device,
int ret = -ENOENT;
int partial_ix = -1;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- read_lock_irqsave(&device->cache.lock, flags);
+ read_lock_irqsave(&device->cache_lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->port_data[port_num].cache.pkey;
+ if (!cache) {
+ ret = -EINVAL;
+ goto err;
+ }
*index = -1;
@@ -956,8 +1110,9 @@ int ib_find_cached_pkey(struct ib_device *device,
*index = i;
ret = 0;
break;
- } else
+ } else {
partial_ix = i;
+ }
}
if (ret && partial_ix >= 0) {
@@ -965,237 +1120,509 @@ int ib_find_cached_pkey(struct ib_device *device,
ret = 0;
}
- read_unlock_irqrestore(&device->cache.lock, flags);
+err:
+ read_unlock_irqrestore(&device->cache_lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_find_cached_pkey);
-int ib_find_exact_cached_pkey(struct ib_device *device,
- u8 port_num,
- u16 pkey,
- u16 *index)
+int ib_get_cached_lmc(struct ib_device *device, u32 port_num, u8 *lmc)
{
- struct ib_pkey_cache *cache;
unsigned long flags;
- int i;
- int ret = -ENOENT;
+ int ret = 0;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- read_lock_irqsave(&device->cache.lock, flags);
+ read_lock_irqsave(&device->cache_lock, flags);
+ *lmc = device->port_data[port_num].cache.lmc;
+ read_unlock_irqrestore(&device->cache_lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ return ret;
+}
+EXPORT_SYMBOL(ib_get_cached_lmc);
- *index = -1;
+int ib_get_cached_port_state(struct ib_device *device, u32 port_num,
+ enum ib_port_state *port_state)
+{
+ unsigned long flags;
+ int ret = 0;
- for (i = 0; i < cache->table_len; ++i)
- if (cache->table[i] == pkey) {
- *index = i;
- ret = 0;
- break;
- }
+ if (!rdma_is_port_valid(device, port_num))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache_lock, flags);
+ *port_state = device->port_data[port_num].cache.port_state;
+ read_unlock_irqrestore(&device->cache_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_get_cached_port_state);
+
+/**
+ * rdma_get_gid_attr - Returns GID attributes for a port of a device
+ * at a requested gid_index, if a valid GID entry exists.
+ * @device: The device to query.
+ * @port_num: The port number on the device where the GID value
+ * is to be queried.
+ * @index: Index of the GID table entry whose attributes are to
+ * be queried.
+ *
+ * rdma_get_gid_attr() acquires reference count of gid attributes from the
+ * cached GID table. Caller must invoke rdma_put_gid_attr() to release
+ * reference to gid attribute regardless of link layer.
+ *
+ * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error
+ * code.
+ */
+const struct ib_gid_attr *
+rdma_get_gid_attr(struct ib_device *device, u32 port_num, int index)
+{
+ const struct ib_gid_attr *attr = ERR_PTR(-ENODATA);
+ struct ib_gid_table *table;
+ unsigned long flags;
+
+ if (!rdma_is_port_valid(device, port_num))
+ return ERR_PTR(-EINVAL);
+
+ table = rdma_gid_table(device, port_num);
+ if (index < 0 || index >= table->sz)
+ return ERR_PTR(-EINVAL);
+
+ read_lock_irqsave(&table->rwlock, flags);
+ if (!is_gid_entry_valid(table->data_vec[index]))
+ goto done;
+
+ get_gid_entry(table->data_vec[index]);
+ attr = &table->data_vec[index]->attr;
+done:
+ read_unlock_irqrestore(&table->rwlock, flags);
+ return attr;
+}
+EXPORT_SYMBOL(rdma_get_gid_attr);
+
+/**
+ * rdma_query_gid_table - Reads GID table entries of all the ports of a device up to max_entries.
+ * @device: The device to query.
+ * @entries: Entries where GID entries are returned.
+ * @max_entries: Maximum number of entries that can be returned.
+ * Entries array must be allocated to hold max_entries number of entries.
+ *
+ * Returns number of entries on success or appropriate error code.
+ */
+ssize_t rdma_query_gid_table(struct ib_device *device,
+ struct ib_uverbs_gid_entry *entries,
+ size_t max_entries)
+{
+ const struct ib_gid_attr *gid_attr;
+ ssize_t num_entries = 0, ret;
+ struct ib_gid_table *table;
+ u32 port_num, i;
+ struct net_device *ndev;
+ unsigned long flags;
+
+ rdma_for_each_port(device, port_num) {
+ table = rdma_gid_table(device, port_num);
+ read_lock_irqsave(&table->rwlock, flags);
+ for (i = 0; i < table->sz; i++) {
+ if (!is_gid_entry_valid(table->data_vec[i]))
+ continue;
+ if (num_entries >= max_entries) {
+ ret = -EINVAL;
+ goto err;
+ }
- read_unlock_irqrestore(&device->cache.lock, flags);
+ gid_attr = &table->data_vec[i]->attr;
+
+ memcpy(&entries->gid, &gid_attr->gid,
+ sizeof(gid_attr->gid));
+ entries->gid_index = gid_attr->index;
+ entries->port_num = gid_attr->port_num;
+ entries->gid_type = gid_attr->gid_type;
+ ndev = rcu_dereference_protected(
+ gid_attr->ndev,
+ lockdep_is_held(&table->rwlock));
+ if (ndev)
+ entries->netdev_ifindex = ndev->ifindex;
+
+ num_entries++;
+ entries++;
+ }
+ read_unlock_irqrestore(&table->rwlock, flags);
+ }
+ return num_entries;
+err:
+ read_unlock_irqrestore(&table->rwlock, flags);
return ret;
}
-EXPORT_SYMBOL(ib_find_exact_cached_pkey);
+EXPORT_SYMBOL(rdma_query_gid_table);
+
+/**
+ * rdma_put_gid_attr - Release reference to the GID attribute
+ * @attr: Pointer to the GID attribute whose reference
+ * needs to be released.
+ *
+ * rdma_put_gid_attr() must be used to release reference whose
+ * reference is acquired using rdma_get_gid_attr() or any APIs
+ * which returns a pointer to the ib_gid_attr regardless of link layer
+ * of IB or RoCE.
+ *
+ */
+void rdma_put_gid_attr(const struct ib_gid_attr *attr)
+{
+ struct ib_gid_table_entry *entry =
+ container_of(attr, struct ib_gid_table_entry, attr);
+
+ put_gid_entry(entry);
+}
+EXPORT_SYMBOL(rdma_put_gid_attr);
+
+/**
+ * rdma_hold_gid_attr - Get reference to existing GID attribute
+ *
+ * @attr: Pointer to the GID attribute whose reference
+ * needs to be taken.
+ *
+ * Increase the reference count to a GID attribute to keep it from being
+ * freed. Callers are required to already be holding a reference to attribute.
+ *
+ */
+void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
+{
+ struct ib_gid_table_entry *entry =
+ container_of(attr, struct ib_gid_table_entry, attr);
+
+ get_gid_entry(entry);
+}
+EXPORT_SYMBOL(rdma_hold_gid_attr);
-int ib_get_cached_lmc(struct ib_device *device,
- u8 port_num,
- u8 *lmc)
+/**
+ * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice
+ * which must be in UP state.
+ *
+ * @attr:Pointer to the GID attribute
+ *
+ * Returns pointer to netdevice if the netdevice was attached to GID and
+ * netdevice is in UP state. Caller must hold RCU lock as this API
+ * reads the netdev flags which can change while netdevice migrates to
+ * different net namespace. Returns ERR_PTR with error code otherwise.
+ *
+ */
+struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
{
+ struct ib_gid_table_entry *entry =
+ container_of(attr, struct ib_gid_table_entry, attr);
+ struct ib_device *device = entry->attr.device;
+ struct net_device *ndev = ERR_PTR(-EINVAL);
+ u32 port_num = entry->attr.port_num;
+ struct ib_gid_table *table;
unsigned long flags;
+ bool valid;
+
+ table = rdma_gid_table(device, port_num);
+
+ read_lock_irqsave(&table->rwlock, flags);
+ valid = is_gid_entry_valid(table->data_vec[attr->index]);
+ if (valid) {
+ ndev = rcu_dereference(attr->ndev);
+ if (!ndev)
+ ndev = ERR_PTR(-ENODEV);
+ }
+ read_unlock_irqrestore(&table->rwlock, flags);
+ return ndev;
+}
+EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu);
+
+static int get_lower_dev_vlan(struct net_device *lower_dev,
+ struct netdev_nested_priv *priv)
+{
+ u16 *vlan_id = (u16 *)priv->data;
+
+ if (is_vlan_dev(lower_dev))
+ *vlan_id = vlan_dev_vlan_id(lower_dev);
+
+ /* We are interested only in first level vlan device, so
+ * always return 1 to stop iterating over next level devices.
+ */
+ return 1;
+}
+
+/**
+ * rdma_read_gid_l2_fields - Read the vlan ID and source MAC address
+ * of a GID entry.
+ *
+ * @attr: GID attribute pointer whose L2 fields to be read
+ * @vlan_id: Pointer to vlan id to fill up if the GID entry has
+ * vlan id. It is optional.
+ * @smac: Pointer to smac to fill up for a GID entry. It is optional.
+ *
+ * rdma_read_gid_l2_fields() returns 0 on success and returns vlan id
+ * (if gid entry has vlan) and source MAC, or returns error.
+ */
+int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
+ u16 *vlan_id, u8 *smac)
+{
+ struct netdev_nested_priv priv = {
+ .data = (void *)vlan_id,
+ };
+ struct net_device *ndev;
+
+ rcu_read_lock();
+ ndev = rcu_dereference(attr->ndev);
+ if (!ndev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+ if (smac)
+ ether_addr_copy(smac, ndev->dev_addr);
+ if (vlan_id) {
+ *vlan_id = 0xffff;
+ if (is_vlan_dev(ndev)) {
+ *vlan_id = vlan_dev_vlan_id(ndev);
+ } else {
+ /* If the netdev is upper device and if it's lower
+ * device is vlan device, consider vlan id of
+ * the lower vlan device for this gid entry.
+ */
+ netdev_walk_all_lower_dev_rcu(attr->ndev,
+ get_lower_dev_vlan, &priv);
+ }
+ }
+ rcu_read_unlock();
+ return 0;
+}
+EXPORT_SYMBOL(rdma_read_gid_l2_fields);
+
+static int config_non_roce_gid_cache(struct ib_device *device,
+ u32 port, struct ib_port_attr *tprops)
+{
+ struct ib_gid_attr gid_attr = {};
+ struct ib_gid_table *table;
int ret = 0;
+ int i;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
- return -EINVAL;
+ gid_attr.device = device;
+ gid_attr.port_num = port;
+ table = rdma_gid_table(device, port);
+
+ mutex_lock(&table->lock);
+ for (i = 0; i < tprops->gid_tbl_len; ++i) {
+ if (!device->ops.query_gid)
+ continue;
+ ret = device->ops.query_gid(device, port, i, &gid_attr.gid);
+ if (ret) {
+ dev_warn(&device->dev,
+ "query_gid failed (%d) for index %d\n", ret,
+ i);
+ goto err;
+ }
- read_lock_irqsave(&device->cache.lock, flags);
- *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)];
- read_unlock_irqrestore(&device->cache.lock, flags);
+ if (rdma_protocol_iwarp(device, port)) {
+ struct net_device *ndev;
+ ndev = ib_device_get_netdev(device, port);
+ if (!ndev)
+ continue;
+ RCU_INIT_POINTER(gid_attr.ndev, ndev);
+ dev_put(ndev);
+ }
+
+ gid_attr.index = i;
+ tprops->subnet_prefix =
+ be64_to_cpu(gid_attr.gid.global.subnet_prefix);
+ add_modify_gid(table, &gid_attr);
+ }
+err:
+ mutex_unlock(&table->lock);
return ret;
}
-EXPORT_SYMBOL(ib_get_cached_lmc);
-static void ib_cache_update(struct ib_device *device,
- u8 port)
+static int
+ib_cache_update(struct ib_device *device, u32 port, bool update_gids,
+ bool update_pkeys, bool enforce_security)
{
struct ib_port_attr *tprops = NULL;
- struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
- struct ib_gid_cache {
- int table_len;
- union ib_gid table[0];
- } *gid_cache = NULL;
+ struct ib_pkey_cache *pkey_cache = NULL;
+ struct ib_pkey_cache *old_pkey_cache = NULL;
int i;
int ret;
- struct ib_gid_table *table;
- struct ib_gid_table **ports_table = device->cache.gid_cache;
- bool use_roce_gid_table =
- rdma_cap_roce_gid_table(device, port);
-
- if (port < rdma_start_port(device) || port > rdma_end_port(device))
- return;
- table = ports_table[port - rdma_start_port(device)];
+ if (!rdma_is_port_valid(device, port))
+ return -EINVAL;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
- return;
+ return -ENOMEM;
ret = ib_query_port(device, port, tprops);
if (ret) {
- pr_warn("ib_query_port failed (%d) for %s\n",
- ret, device->name);
+ dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret);
goto err;
}
- pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
- sizeof *pkey_cache->table, GFP_KERNEL);
- if (!pkey_cache)
- goto err;
-
- pkey_cache->table_len = tprops->pkey_tbl_len;
-
- if (!use_roce_gid_table) {
- gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len *
- sizeof(*gid_cache->table), GFP_KERNEL);
- if (!gid_cache)
+ if (!rdma_protocol_roce(device, port) && update_gids) {
+ ret = config_non_roce_gid_cache(device, port,
+ tprops);
+ if (ret)
goto err;
-
- gid_cache->table_len = tprops->gid_tbl_len;
}
- for (i = 0; i < pkey_cache->table_len; ++i) {
- ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
- if (ret) {
- pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
- ret, device->name, i);
+ update_pkeys &= !!tprops->pkey_tbl_len;
+
+ if (update_pkeys) {
+ pkey_cache = kmalloc(struct_size(pkey_cache, table,
+ tprops->pkey_tbl_len),
+ GFP_KERNEL);
+ if (!pkey_cache) {
+ ret = -ENOMEM;
goto err;
}
- }
- if (!use_roce_gid_table) {
- for (i = 0; i < gid_cache->table_len; ++i) {
- ret = ib_query_gid(device, port, i,
- gid_cache->table + i, NULL);
+ pkey_cache->table_len = tprops->pkey_tbl_len;
+
+ for (i = 0; i < pkey_cache->table_len; ++i) {
+ ret = ib_query_pkey(device, port, i,
+ pkey_cache->table + i);
if (ret) {
- pr_warn("ib_query_gid failed (%d) for %s (index %d)\n",
- ret, device->name, i);
+ dev_warn(&device->dev,
+ "ib_query_pkey failed (%d) for index %d\n",
+ ret, i);
goto err;
}
}
}
- write_lock_irq(&device->cache.lock);
-
- old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)];
+ write_lock_irq(&device->cache_lock);
- device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
- if (!use_roce_gid_table) {
- write_lock(&table->rwlock);
- for (i = 0; i < gid_cache->table_len; i++) {
- modify_gid(device, port, table, i, gid_cache->table + i,
- &zattr, false);
- }
- write_unlock(&table->rwlock);
+ if (update_pkeys) {
+ old_pkey_cache = device->port_data[port].cache.pkey;
+ device->port_data[port].cache.pkey = pkey_cache;
}
+ device->port_data[port].cache.lmc = tprops->lmc;
+
+ if (device->port_data[port].cache.port_state != IB_PORT_NOP &&
+ device->port_data[port].cache.port_state != tprops->state)
+ ibdev_info(device, "Port: %d Link %s\n", port,
+ ib_port_state_to_str(tprops->state));
- device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
+ device->port_data[port].cache.port_state = tprops->state;
- write_unlock_irq(&device->cache.lock);
+ device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
+ write_unlock_irq(&device->cache_lock);
+
+ if (enforce_security)
+ ib_security_cache_change(device,
+ port,
+ tprops->subnet_prefix);
- kfree(gid_cache);
kfree(old_pkey_cache);
kfree(tprops);
- return;
+ return 0;
err:
kfree(pkey_cache);
- kfree(gid_cache);
kfree(tprops);
+ return ret;
+}
+
+static void ib_cache_event_task(struct work_struct *_work)
+{
+ struct ib_update_work *work =
+ container_of(_work, struct ib_update_work, work);
+ int ret;
+
+ /* Before distributing the cache update event, first sync
+ * the cache.
+ */
+ ret = ib_cache_update(work->event.device, work->event.element.port_num,
+ work->event.event == IB_EVENT_GID_CHANGE,
+ work->event.event == IB_EVENT_PKEY_CHANGE,
+ work->enforce_security);
+
+ /* GID event is notified already for individual GID entries by
+ * dispatch_gid_change_event(). Hence, notifiy for rest of the
+ * events.
+ */
+ if (!ret && work->event.event != IB_EVENT_GID_CHANGE)
+ ib_dispatch_event_clients(&work->event);
+
+ kfree(work);
}
-static void ib_cache_task(struct work_struct *_work)
+static void ib_generic_event_task(struct work_struct *_work)
{
struct ib_update_work *work =
container_of(_work, struct ib_update_work, work);
- ib_cache_update(work->device, work->port_num);
+ ib_dispatch_event_clients(&work->event);
kfree(work);
}
-static void ib_cache_event(struct ib_event_handler *handler,
- struct ib_event *event)
+static bool is_cache_update_event(const struct ib_event *event)
+{
+ return (event->event == IB_EVENT_PORT_ERR ||
+ event->event == IB_EVENT_PORT_ACTIVE ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER ||
+ event->event == IB_EVENT_GID_CHANGE);
+}
+
+/**
+ * ib_dispatch_event - Dispatch an asynchronous event
+ * @event:Event to dispatch
+ *
+ * Low-level drivers must call ib_dispatch_event() to dispatch the
+ * event to all registered event handlers when an asynchronous event
+ * occurs.
+ */
+void ib_dispatch_event(const struct ib_event *event)
{
struct ib_update_work *work;
- if (event->event == IB_EVENT_PORT_ERR ||
- event->event == IB_EVENT_PORT_ACTIVE ||
- event->event == IB_EVENT_LID_CHANGE ||
- event->event == IB_EVENT_PKEY_CHANGE ||
- event->event == IB_EVENT_SM_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER ||
- event->event == IB_EVENT_GID_CHANGE) {
- work = kmalloc(sizeof *work, GFP_ATOMIC);
- if (work) {
- INIT_WORK(&work->work, ib_cache_task);
- work->device = event->device;
- work->port_num = event->element.port_num;
- queue_work(ib_wq, &work->work);
- }
- }
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ if (is_cache_update_event(event))
+ INIT_WORK(&work->work, ib_cache_event_task);
+ else
+ INIT_WORK(&work->work, ib_generic_event_task);
+
+ work->event = *event;
+ if (event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_GID_CHANGE)
+ work->enforce_security = true;
+
+ queue_work(ib_wq, &work->work);
}
+EXPORT_SYMBOL(ib_dispatch_event);
int ib_cache_setup_one(struct ib_device *device)
{
- int p;
+ u32 p;
int err;
- rwlock_init(&device->cache.lock);
-
- device->cache.pkey_cache =
- kzalloc(sizeof *device->cache.pkey_cache *
- (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
- device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
- (rdma_end_port(device) -
- rdma_start_port(device) + 1),
- GFP_KERNEL);
- if (!device->cache.pkey_cache ||
- !device->cache.lmc_cache) {
- err = -ENOMEM;
- goto free;
- }
-
err = gid_table_setup_one(device);
if (err)
- goto free;
-
- for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
- ib_cache_update(device, p + rdma_start_port(device));
+ return err;
- INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
- device, ib_cache_event);
- err = ib_register_event_handler(&device->cache.event_handler);
- if (err)
- goto err;
+ rdma_for_each_port (device, p) {
+ err = ib_cache_update(device, p, true, true, true);
+ if (err) {
+ gid_table_cleanup_one(device);
+ return err;
+ }
+ }
return 0;
-
-err:
- gid_table_cleanup_one(device);
-free:
- kfree(device->cache.pkey_cache);
- kfree(device->cache.lmc_cache);
- return err;
}
void ib_cache_release_one(struct ib_device *device)
{
- int p;
+ u32 p;
/*
* The release function frees all the cache elements.
@@ -1203,36 +1630,25 @@ void ib_cache_release_one(struct ib_device *device)
* all the device's resources when the cache could no
* longer be accessed.
*/
- if (device->cache.pkey_cache)
- for (p = 0;
- p <= rdma_end_port(device) - rdma_start_port(device); ++p)
- kfree(device->cache.pkey_cache[p]);
+ rdma_for_each_port (device, p)
+ kfree(device->port_data[p].cache.pkey);
gid_table_release_one(device);
- kfree(device->cache.pkey_cache);
- kfree(device->cache.lmc_cache);
}
void ib_cache_cleanup_one(struct ib_device *device)
{
- /* The cleanup function unregisters the event handler,
- * waits for all in-progress workqueue elements and cleans
- * up the GID cache. This function should be called after
- * the device was removed from the devices list and all
- * clients were removed, so the cache exists but is
+ /* The cleanup function waits for all in-progress workqueue
+ * elements and cleans up the GID cache. This function should be
+ * called after the device was removed from the devices list and
+ * all clients were removed, so the cache exists but is
* non-functional and shouldn't be updated anymore.
*/
- ib_unregister_event_handler(&device->cache.event_handler);
flush_workqueue(ib_wq);
gid_table_cleanup_one(device);
-}
-
-void __init ib_cache_setup(void)
-{
- roce_gid_mgmt_init();
-}
-void __exit ib_cache_cleanup(void)
-{
- roce_gid_mgmt_cleanup();
+ /*
+ * Flush the wq second time for any pending GID delete work.
+ */
+ flush_workqueue(ib_wq);
}
diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c
new file mode 100644
index 000000000000..1f037fe01450
--- /dev/null
+++ b/drivers/infiniband/core/cgroup.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
+ */
+
+#include "core_priv.h"
+
+/**
+ * ib_device_register_rdmacg - register with rdma cgroup.
+ * @device: device to register to participate in resource
+ * accounting by rdma cgroup.
+ *
+ * Register with the rdma cgroup. Should be called before
+ * exposing rdma device to user space applications to avoid
+ * resource accounting leak.
+ */
+void ib_device_register_rdmacg(struct ib_device *device)
+{
+ device->cg_device.name = device->name;
+ rdmacg_register_device(&device->cg_device);
+}
+
+/**
+ * ib_device_unregister_rdmacg - unregister with rdma cgroup.
+ * @device: device to unregister.
+ *
+ * Unregister with the rdma cgroup. Should be called after
+ * all the resources are deallocated, and after a stage when any
+ * other resource allocation by user application cannot be done
+ * for this device to avoid any leak in accounting.
+ */
+void ib_device_unregister_rdmacg(struct ib_device *device)
+{
+ rdmacg_unregister_device(&device->cg_device);
+}
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{
+ return rdmacg_try_charge(&cg_obj->cg, &device->cg_device,
+ resource_index);
+}
+EXPORT_SYMBOL(ib_rdmacg_try_charge);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{
+ rdmacg_uncharge(cg_obj->cg, &device->cg_device,
+ resource_index);
+}
+EXPORT_SYMBOL(ib_rdmacg_uncharge);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index cf1edfa1cbac..024df6ee239d 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1,36 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2004-2007 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/completion.h>
@@ -51,12 +25,18 @@
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
+#include <rdma/ib_sysfs.h>
#include "cm_msgs.h"
+#include "core_priv.h"
+#include "cm_trace.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
MODULE_LICENSE("Dual BSD/GPL");
+#define CM_DIRECT_RETRY_CTX ((void *) 1UL)
+#define CM_MRA_SETTING 24 /* 4.096us * 2^24 = ~68.7 seconds */
+
static const char * const ibcm_rej_reason_strs[] = {
[IB_CM_REJ_NO_QP] = "no QP",
[IB_CM_REJ_NO_EEC] = "no EEC",
@@ -91,6 +71,8 @@ static const char * const ibcm_rej_reason_strs[] = {
[IB_CM_REJ_INVALID_CLASS_VERSION] = "invalid class version",
[IB_CM_REJ_INVALID_FLOW_LABEL] = "invalid flow label",
[IB_CM_REJ_INVALID_ALT_FLOW_LABEL] = "invalid alt flow label",
+ [IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED] =
+ "vendor option is not supported",
};
const char *__attribute_const__ ibcm_reject_msg(int reason)
@@ -105,8 +87,21 @@ const char *__attribute_const__ ibcm_reject_msg(int reason)
}
EXPORT_SYMBOL(ibcm_reject_msg);
-static void cm_add_one(struct ib_device *device);
+struct cm_id_private;
+struct cm_work;
+static int cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device, void *client_data);
+static void cm_process_work(struct cm_id_private *cm_id_priv,
+ struct cm_work *work);
+static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
+ struct ib_cm_sidr_rep_param *param);
+static void cm_issue_dreq(struct cm_id_private *cm_id_priv);
+static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
+ void *private_data, u8 private_data_len);
+static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
+ enum ib_cm_rej_reason reason, void *ari,
+ u8 ari_length, const void *private_data,
+ u8 private_data_len);
static struct ib_client cm_client = {
.name = "cm",
@@ -124,12 +119,11 @@ static struct ib_cm {
struct rb_root remote_qp_table;
struct rb_root remote_id_table;
struct rb_root remote_sidr_table;
- struct idr local_id_table;
+ struct xarray local_id_table;
+ u32 local_id_next;
__be32 random_id_operand;
struct list_head timewait_list;
struct workqueue_struct *wq;
- /* Sync on cm change port state */
- spinlock_t state_lock;
} cm;
/* Counter indexes ordered by attribute ID */
@@ -157,78 +151,34 @@ enum {
CM_COUNTER_GROUPS
};
-static char const counter_group_names[CM_COUNTER_GROUPS]
- [sizeof("cm_rx_duplicates")] = {
- "cm_tx_msgs", "cm_tx_retries",
- "cm_rx_msgs", "cm_rx_duplicates"
-};
-
-struct cm_counter_group {
- struct kobject obj;
- atomic_long_t counter[CM_ATTR_COUNT];
-};
-
struct cm_counter_attribute {
- struct attribute attr;
- int index;
-};
-
-#define CM_COUNTER_ATTR(_name, _index) \
-struct cm_counter_attribute cm_##_name##_counter_attr = { \
- .attr = { .name = __stringify(_name), .mode = 0444 }, \
- .index = _index \
-}
-
-static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
-static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
-static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
-static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
-static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
-static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
-static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
-static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
-static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
-static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
-static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
-
-static struct attribute *cm_counter_default_attrs[] = {
- &cm_req_counter_attr.attr,
- &cm_mra_counter_attr.attr,
- &cm_rej_counter_attr.attr,
- &cm_rep_counter_attr.attr,
- &cm_rtu_counter_attr.attr,
- &cm_dreq_counter_attr.attr,
- &cm_drep_counter_attr.attr,
- &cm_sidr_req_counter_attr.attr,
- &cm_sidr_rep_counter_attr.attr,
- &cm_lap_counter_attr.attr,
- &cm_apr_counter_attr.attr,
- NULL
+ struct ib_port_attribute attr;
+ unsigned short group;
+ unsigned short index;
};
struct cm_port {
struct cm_device *cm_dev;
struct ib_mad_agent *mad_agent;
- struct kobject port_obj;
- u8 port_num;
- struct list_head cm_priv_prim_list;
- struct list_head cm_priv_altr_list;
- struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
+ struct ib_mad_agent *rep_agent;
+ u32 port_num;
+ atomic_long_t counters[CM_COUNTER_GROUPS][CM_ATTR_COUNT];
};
struct cm_device {
+ struct kref kref;
struct list_head list;
+ rwlock_t mad_agent_lock;
struct ib_device *ib_device;
- struct device *device;
u8 ack_delay;
int going_down;
- struct cm_port *port[0];
+ struct cm_port *port[];
};
struct cm_av {
struct cm_port *port;
- union ib_gid dgid;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
+ u16 dlid_datapath;
u16 pkey_index;
u8 timeout;
};
@@ -241,11 +191,11 @@ struct cm_work {
__be32 local_id; /* Established / timewait */
__be32 remote_id;
struct ib_cm_event cm_event;
- struct ib_sa_path_rec path[0];
+ struct sa_path_rec path[];
};
struct cm_timewait_info {
- struct cm_work work; /* Must be first. */
+ struct cm_work work;
struct list_head list;
struct rb_node remote_qp_node;
struct rb_node remote_id_node;
@@ -260,12 +210,15 @@ struct cm_id_private {
struct rb_node service_node;
struct rb_node sidr_id_node;
+ u32 sidr_slid;
spinlock_t lock; /* Do not acquire inside cm.lock */
struct completion comp;
- atomic_t refcount;
+ refcount_t refcount;
/* Number of clients sharing this ib_cm_id. Only valid for listeners.
- * Protected by the cm.lock spinlock. */
+ * Protected by the cm.lock spinlock.
+ */
int listen_sharecount;
+ struct rcu_head rcu;
struct ib_mad_send_buf *msg;
struct cm_timewait_info *timewait_info;
@@ -285,99 +238,161 @@ struct cm_id_private {
__be16 pkey;
u8 private_data_len;
u8 max_cm_retries;
- u8 peer_to_peer;
u8 responder_resources;
u8 initiator_depth;
u8 retry_count;
u8 rnr_retry_count;
- u8 service_timeout;
u8 target_ack_delay;
- struct list_head prim_list;
- struct list_head altr_list;
- /* Indicates that the send port mad is registered and av is set */
- int prim_send_port_not_ready;
- int altr_send_port_not_ready;
-
struct list_head work_list;
atomic_t work_count;
+
+ struct rdma_ucm_ece ece;
};
+static void cm_dev_release(struct kref *kref)
+{
+ struct cm_device *cm_dev = container_of(kref, struct cm_device, kref);
+ u32 i;
+
+ rdma_for_each_port(cm_dev->ib_device, i)
+ kfree(cm_dev->port[i - 1]);
+
+ kfree(cm_dev);
+}
+
+static void cm_device_put(struct cm_device *cm_dev)
+{
+ kref_put(&cm_dev->kref, cm_dev_release);
+}
+
static void cm_work_handler(struct work_struct *work);
static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
{
- if (atomic_dec_and_test(&cm_id_priv->refcount))
+ if (refcount_dec_and_test(&cm_id_priv->refcount))
complete(&cm_id_priv->comp);
}
-static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
- struct ib_mad_send_buf **msg)
+static struct ib_mad_send_buf *
+cm_alloc_msg_agent(struct cm_id_private *cm_id_priv, bool rep_agent)
{
struct ib_mad_agent *mad_agent;
struct ib_mad_send_buf *m;
struct ib_ah *ah;
- struct cm_av *av;
- unsigned long flags, flags2;
- int ret = 0;
- /* don't let the port to be released till the agent is down */
- spin_lock_irqsave(&cm.state_lock, flags2);
- spin_lock_irqsave(&cm.lock, flags);
- if (!cm_id_priv->prim_send_port_not_ready)
- av = &cm_id_priv->av;
- else if (!cm_id_priv->altr_send_port_not_ready &&
- (cm_id_priv->alt_av.port))
- av = &cm_id_priv->alt_av;
- else {
- pr_info("%s: not valid CM id\n", __func__);
- ret = -ENODEV;
- spin_unlock_irqrestore(&cm.lock, flags);
- goto out;
- }
- spin_unlock_irqrestore(&cm.lock, flags);
- /* Make sure the port haven't released the mad yet */
- mad_agent = cm_id_priv->av.port->mad_agent;
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ if (!cm_id_priv->av.port)
+ return ERR_PTR(-EINVAL);
+
+ read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ mad_agent = rep_agent ? cm_id_priv->av.port->rep_agent :
+ cm_id_priv->av.port->mad_agent;
if (!mad_agent) {
- pr_info("%s: not a valid MAD agent\n", __func__);
- ret = -ENODEV;
+ m = ERR_PTR(-EINVAL);
goto out;
}
- ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
+
+ ah = rdma_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr, 0);
if (IS_ERR(ah)) {
- ret = PTR_ERR(ah);
+ m = ERR_CAST(ah);
goto out;
}
m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
- av->pkey_index,
+ cm_id_priv->av.pkey_index,
0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
- ib_destroy_ah(ah);
- ret = PTR_ERR(m);
+ rdma_destroy_ah(ah, 0);
goto out;
}
- /* Timeout set by caller if response is expected. */
m->ah = ah;
- m->retries = cm_id_priv->max_cm_retries;
-
- atomic_inc(&cm_id_priv->refcount);
- m->context[0] = cm_id_priv;
- *msg = m;
out:
- spin_unlock_irqrestore(&cm.state_lock, flags2);
- return ret;
+ read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ return m;
}
-static int cm_alloc_response_msg(struct cm_port *port,
- struct ib_mad_recv_wc *mad_recv_wc,
- struct ib_mad_send_buf **msg)
+static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
+{
+ return cm_alloc_msg_agent(cm_id_priv, false);
+}
+
+static void cm_free_msg(struct ib_mad_send_buf *msg)
+{
+ if (msg->ah)
+ rdma_destroy_ah(msg->ah, 0);
+ ib_free_send_mad(msg);
+}
+
+static struct ib_mad_send_buf *
+cm_alloc_priv_msg_rep(struct cm_id_private *cm_id_priv, enum ib_cm_state state,
+ bool rep_agent)
+{
+ struct ib_mad_send_buf *msg;
+
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ msg = cm_alloc_msg_agent(cm_id_priv, rep_agent);
+ if (IS_ERR(msg))
+ return msg;
+
+ cm_id_priv->msg = msg;
+ refcount_inc(&cm_id_priv->refcount);
+ msg->context[0] = cm_id_priv;
+ msg->context[1] = (void *) (unsigned long) state;
+
+ msg->retries = cm_id_priv->max_cm_retries;
+ msg->timeout_ms = cm_id_priv->timeout_ms;
+
+ return msg;
+}
+
+static struct ib_mad_send_buf *
+cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state)
+{
+ return cm_alloc_priv_msg_rep(cm_id_priv, state, false);
+}
+
+static void cm_free_priv_msg(struct ib_mad_send_buf *msg)
+{
+ struct cm_id_private *cm_id_priv = msg->context[0];
+
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ if (!WARN_ON(cm_id_priv->msg != msg))
+ cm_id_priv->msg = NULL;
+
+ if (msg->ah)
+ rdma_destroy_ah(msg->ah, 0);
+ cm_deref_id(cm_id_priv);
+ ib_free_send_mad(msg);
+}
+
+static struct ib_mad_send_buf *
+cm_alloc_response_msg_no_ah(struct cm_port *port,
+ struct ib_mad_recv_wc *mad_recv_wc,
+ bool direct_retry)
{
struct ib_mad_send_buf *m;
+
+ m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
+ 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
+ GFP_ATOMIC, IB_MGMT_BASE_VERSION);
+ if (!IS_ERR(m))
+ m->context[0] = direct_retry ? CM_DIRECT_RETRY_CTX : NULL;
+
+ return m;
+}
+
+static int cm_create_response_msg_ah(struct cm_port *port,
+ struct ib_mad_recv_wc *mad_recv_wc,
+ struct ib_mad_send_buf *msg)
+{
struct ib_ah *ah;
ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
@@ -385,29 +400,33 @@ static int cm_alloc_response_msg(struct cm_port *port,
if (IS_ERR(ah))
return PTR_ERR(ah);
- m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
- 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
- GFP_ATOMIC,
- IB_MGMT_BASE_VERSION);
- if (IS_ERR(m)) {
- ib_destroy_ah(ah);
- return PTR_ERR(m);
- }
- m->ah = ah;
- *msg = m;
+ msg->ah = ah;
return 0;
}
-static void cm_free_msg(struct ib_mad_send_buf *msg)
+static int cm_alloc_response_msg(struct cm_port *port,
+ struct ib_mad_recv_wc *mad_recv_wc,
+ bool direct_retry,
+ struct ib_mad_send_buf **msg)
{
- ib_destroy_ah(msg->ah);
- if (msg->context[0])
- cm_deref_id(msg->context[0]);
- ib_free_send_mad(msg);
+ struct ib_mad_send_buf *m;
+ int ret;
+
+ m = cm_alloc_response_msg_no_ah(port, mad_recv_wc, direct_retry);
+ if (IS_ERR(m))
+ return PTR_ERR(m);
+
+ ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
+ if (ret) {
+ ib_free_send_mad(m);
+ return ret;
+ }
+
+ *msg = m;
+ return 0;
}
-static void * cm_copy_private_data(const void *private_data,
- u8 private_data_len)
+static void *cm_copy_private_data(const void *private_data, u8 private_data_len)
{
void *data;
@@ -431,112 +450,150 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
cm_id_priv->private_data_len = private_data_len;
}
-static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
- struct ib_grh *grh, struct cm_av *av)
+static void cm_set_av_port(struct cm_av *av, struct cm_port *port)
{
+ struct cm_port *old_port = av->port;
+
+ if (old_port == port)
+ return;
+
av->port = port;
+ if (old_port)
+ cm_device_put(old_port->cm_dev);
+ if (port)
+ kref_get(&port->cm_dev->kref);
+}
+
+static void cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
+ struct rdma_ah_attr *ah_attr, struct cm_av *av)
+{
+ cm_set_av_port(av, port);
+ av->pkey_index = wc->pkey_index;
+ rdma_move_ah_attr(&av->ah_attr, ah_attr);
+}
+
+static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
+ struct ib_grh *grh, struct cm_av *av)
+{
+ cm_set_av_port(av, port);
av->pkey_index = wc->pkey_index;
- ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
- grh, &av->ah_attr);
+ return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
+ port->port_num, wc,
+ grh, &av->ah_attr);
}
-static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
- struct cm_id_private *cm_id_priv)
+static struct cm_port *
+get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
{
struct cm_device *cm_dev;
struct cm_port *port = NULL;
unsigned long flags;
- int ret;
- u8 p;
- struct net_device *ndev = ib_get_ndev_from_path(path);
-
- read_lock_irqsave(&cm.device_lock, flags);
- list_for_each_entry(cm_dev, &cm.device_list, list) {
- if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
- path->gid_type, ndev, &p, NULL)) {
- port = cm_dev->port[p-1];
- break;
+
+ if (attr) {
+ read_lock_irqsave(&cm.device_lock, flags);
+ list_for_each_entry(cm_dev, &cm.device_list, list) {
+ if (cm_dev->ib_device == attr->device) {
+ port = cm_dev->port[attr->port_num - 1];
+ break;
+ }
+ }
+ read_unlock_irqrestore(&cm.device_lock, flags);
+ } else {
+ /* SGID attribute can be NULL in following
+ * conditions.
+ * (a) Alternative path
+ * (b) IB link layer without GRH
+ * (c) LAP send messages
+ */
+ read_lock_irqsave(&cm.device_lock, flags);
+ list_for_each_entry(cm_dev, &cm.device_list, list) {
+ attr = rdma_find_gid(cm_dev->ib_device,
+ &path->sgid,
+ sa_conv_pathrec_to_gid_type(path),
+ NULL);
+ if (!IS_ERR(attr)) {
+ port = cm_dev->port[attr->port_num - 1];
+ break;
+ }
}
+ read_unlock_irqrestore(&cm.device_lock, flags);
+ if (port)
+ rdma_put_gid_attr(attr);
}
- read_unlock_irqrestore(&cm.device_lock, flags);
+ return port;
+}
- if (ndev)
- dev_put(ndev);
+static int cm_init_av_by_path(struct sa_path_rec *path,
+ const struct ib_gid_attr *sgid_attr,
+ struct cm_av *av)
+{
+ struct rdma_ah_attr new_ah_attr;
+ struct cm_device *cm_dev;
+ struct cm_port *port;
+ int ret;
+ port = get_cm_port_from_path(path, sgid_attr);
if (!port)
return -EINVAL;
+ cm_dev = port->cm_dev;
ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
be16_to_cpu(path->pkey), &av->pkey_index);
if (ret)
return ret;
- av->port = port;
- ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
- &av->ah_attr);
- av->timeout = path->packet_life_time + 1;
-
- spin_lock_irqsave(&cm.lock, flags);
- if (&cm_id_priv->av == av)
- list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
- else if (&cm_id_priv->alt_av == av)
- list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
- else
- ret = -EINVAL;
+ cm_set_av_port(av, port);
- spin_unlock_irqrestore(&cm.lock, flags);
+ /*
+ * av->ah_attr might be initialized based on wc or during
+ * request processing time which might have reference to sgid_attr.
+ * So initialize a new ah_attr on stack.
+ * If initialization fails, old ah_attr is used for sending any
+ * responses. If initialization is successful, than new ah_attr
+ * is used by overwriting the old one. So that right ah_attr
+ * can be used to return an error response.
+ */
+ ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
+ &new_ah_attr, sgid_attr);
+ if (ret)
+ return ret;
- return ret;
+ av->timeout = path->packet_life_time + 1;
+ rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
+ return 0;
}
-static int cm_alloc_id(struct cm_id_private *cm_id_priv)
+/* Move av created by cm_init_av_by_path(), so av.dgid is not moved */
+static void cm_move_av_from_path(struct cm_av *dest, struct cm_av *src)
{
- unsigned long flags;
- int id;
-
- idr_preload(GFP_KERNEL);
- spin_lock_irqsave(&cm.lock, flags);
-
- id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
-
- spin_unlock_irqrestore(&cm.lock, flags);
- idr_preload_end();
-
- cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
- return id < 0 ? id : 0;
+ cm_set_av_port(dest, src->port);
+ cm_set_av_port(src, NULL);
+ dest->pkey_index = src->pkey_index;
+ rdma_move_ah_attr(&dest->ah_attr, &src->ah_attr);
+ dest->timeout = src->timeout;
}
-static void cm_free_id(__be32 local_id)
+static void cm_destroy_av(struct cm_av *av)
{
- spin_lock_irq(&cm.lock);
- idr_remove(&cm.local_id_table,
- (__force int) (local_id ^ cm.random_id_operand));
- spin_unlock_irq(&cm.lock);
+ rdma_destroy_ah_attr(&av->ah_attr);
+ cm_set_av_port(av, NULL);
}
-static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
+static u32 cm_local_id(__be32 local_id)
{
- struct cm_id_private *cm_id_priv;
-
- cm_id_priv = idr_find(&cm.local_id_table,
- (__force int) (local_id ^ cm.random_id_operand));
- if (cm_id_priv) {
- if (cm_id_priv->id.remote_id == remote_id)
- atomic_inc(&cm_id_priv->refcount);
- else
- cm_id_priv = NULL;
- }
-
- return cm_id_priv;
+ return (__force u32) (local_id ^ cm.random_id_operand);
}
-static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
+static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id)
{
struct cm_id_private *cm_id_priv;
- spin_lock_irq(&cm.lock);
- cm_id_priv = cm_get_id(local_id, remote_id);
- spin_unlock_irq(&cm.lock);
+ rcu_read_lock();
+ cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id));
+ if (!cm_id_priv || cm_id_priv->id.remote_id != remote_id ||
+ !refcount_inc_not_zero(&cm_id_priv->refcount))
+ cm_id_priv = NULL;
+ rcu_read_unlock();
return cm_id_priv;
}
@@ -566,22 +623,25 @@ static int be64_gt(__be64 a, __be64 b)
return (__force u64) a > (__force u64) b;
}
-static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
+/*
+ * Inserts a new cm_id_priv into the listen_service_table. Returns cm_id_priv
+ * if the new ID was inserted, NULL if it could not be inserted due to a
+ * collision, or the existing cm_id_priv ready for shared usage.
+ */
+static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv,
+ ib_cm_handler shared_handler)
{
struct rb_node **link = &cm.listen_service_table.rb_node;
struct rb_node *parent = NULL;
struct cm_id_private *cur_cm_id_priv;
__be64 service_id = cm_id_priv->id.service_id;
- __be64 service_mask = cm_id_priv->id.service_mask;
+ unsigned long flags;
+ spin_lock_irqsave(&cm.lock, flags);
while (*link) {
parent = *link;
cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
service_node);
- if ((cur_cm_id_priv->id.service_mask & service_id) ==
- (service_mask & cur_cm_id_priv->id.service_id) &&
- (cm_id_priv->id.device == cur_cm_id_priv->id.device))
- return cur_cm_id_priv;
if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
link = &(*link)->rb_left;
@@ -591,26 +651,38 @@ static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
link = &(*link)->rb_left;
else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
link = &(*link)->rb_right;
- else
- link = &(*link)->rb_right;
+ else {
+ /*
+ * Sharing an ib_cm_id with different handlers is not
+ * supported
+ */
+ if (cur_cm_id_priv->id.cm_handler != shared_handler ||
+ cur_cm_id_priv->id.context ||
+ WARN_ON(!cur_cm_id_priv->id.cm_handler)) {
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return NULL;
+ }
+ refcount_inc(&cur_cm_id_priv->refcount);
+ cur_cm_id_priv->listen_sharecount++;
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return cur_cm_id_priv;
+ }
}
+ cm_id_priv->listen_sharecount++;
rb_link_node(&cm_id_priv->service_node, parent, link);
rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
- return NULL;
+ spin_unlock_irqrestore(&cm.lock, flags);
+ return cm_id_priv;
}
-static struct cm_id_private * cm_find_listen(struct ib_device *device,
- __be64 service_id)
+static struct cm_id_private *cm_find_listen(struct ib_device *device,
+ __be64 service_id)
{
struct rb_node *node = cm.listen_service_table.rb_node;
struct cm_id_private *cm_id_priv;
while (node) {
cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
- if ((cm_id_priv->id.service_mask & service_id) ==
- cm_id_priv->id.service_id &&
- (cm_id_priv->id.device == device))
- return cm_id_priv;
if (device < cm_id_priv->id.device)
node = node->rb_left;
@@ -620,14 +692,16 @@ static struct cm_id_private * cm_find_listen(struct ib_device *device,
node = node->rb_left;
else if (be64_gt(service_id, cm_id_priv->id.service_id))
node = node->rb_right;
- else
- node = node->rb_right;
+ else {
+ refcount_inc(&cm_id_priv->refcount);
+ return cm_id_priv;
+ }
}
return NULL;
}
-static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
- *timewait_info)
+static struct cm_timewait_info *
+cm_insert_remote_id(struct cm_timewait_info *timewait_info)
{
struct rb_node **link = &cm.remote_id_table.rb_node;
struct rb_node *parent = NULL;
@@ -656,12 +730,14 @@ static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
return NULL;
}
-static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
- __be32 remote_id)
+static struct cm_id_private *cm_find_remote_id(__be64 remote_ca_guid,
+ __be32 remote_id)
{
struct rb_node *node = cm.remote_id_table.rb_node;
struct cm_timewait_info *timewait_info;
+ struct cm_id_private *res = NULL;
+ spin_lock_irq(&cm.lock);
while (node) {
timewait_info = rb_entry(node, struct cm_timewait_info,
remote_id_node);
@@ -673,14 +749,18 @@ static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
node = node->rb_left;
else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
node = node->rb_right;
- else
- return timewait_info;
+ else {
+ res = cm_acquire_id(timewait_info->work.local_id,
+ timewait_info->work.remote_id);
+ break;
+ }
}
- return NULL;
+ spin_unlock_irq(&cm.lock);
+ return res;
}
-static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
- *timewait_info)
+static struct cm_timewait_info *
+cm_insert_remote_qpn(struct cm_timewait_info *timewait_info)
{
struct rb_node **link = &cm.remote_qp_table.rb_node;
struct rb_node *parent = NULL;
@@ -709,13 +789,12 @@ static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
return NULL;
}
-static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
- *cm_id_priv)
+static struct cm_id_private *
+cm_insert_remote_sidr(struct cm_id_private *cm_id_priv)
{
struct rb_node **link = &cm.remote_sidr_table.rb_node;
struct rb_node *parent = NULL;
struct cm_id_private *cur_cm_id_priv;
- union ib_gid *port_gid = &cm_id_priv->av.dgid;
__be32 remote_id = cm_id_priv->id.remote_id;
while (*link) {
@@ -727,12 +806,9 @@ static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
link = &(*link)->rb_right;
else {
- int cmp;
- cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
- sizeof *port_gid);
- if (cmp < 0)
+ if (cur_cm_id_priv->sidr_slid < cm_id_priv->sidr_slid)
link = &(*link)->rb_left;
- else if (cmp > 0)
+ else if (cur_cm_id_priv->sidr_slid > cm_id_priv->sidr_slid)
link = &(*link)->rb_right;
else
return cur_cm_id_priv;
@@ -743,21 +819,12 @@ static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
return NULL;
}
-static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
- enum ib_cm_sidr_status status)
-{
- struct ib_cm_sidr_rep_param param;
-
- memset(&param, 0, sizeof param);
- param.status = status;
- ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
-}
-
-struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
- ib_cm_handler cm_handler,
- void *context)
+static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device,
+ ib_cm_handler cm_handler,
+ void *context)
{
struct cm_id_private *cm_id_priv;
+ u32 id;
int ret;
cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
@@ -769,26 +836,54 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
cm_id_priv->id.cm_handler = cm_handler;
cm_id_priv->id.context = context;
cm_id_priv->id.remote_cm_qpn = 1;
- ret = cm_alloc_id(cm_id_priv);
- if (ret)
- goto error;
+ RB_CLEAR_NODE(&cm_id_priv->service_node);
+ RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
spin_lock_init(&cm_id_priv->lock);
init_completion(&cm_id_priv->comp);
INIT_LIST_HEAD(&cm_id_priv->work_list);
- INIT_LIST_HEAD(&cm_id_priv->prim_list);
- INIT_LIST_HEAD(&cm_id_priv->altr_list);
atomic_set(&cm_id_priv->work_count, -1);
- atomic_set(&cm_id_priv->refcount, 1);
- return &cm_id_priv->id;
+ refcount_set(&cm_id_priv->refcount, 1);
+
+ ret = xa_alloc_cyclic(&cm.local_id_table, &id, NULL, xa_limit_32b,
+ &cm.local_id_next, GFP_KERNEL);
+ if (ret < 0)
+ goto error;
+ cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
+
+ return cm_id_priv;
error:
kfree(cm_id_priv);
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(ret);
+}
+
+/*
+ * Make the ID visible to the MAD handlers and other threads that use the
+ * xarray.
+ */
+static void cm_finalize_id(struct cm_id_private *cm_id_priv)
+{
+ xa_store(&cm.local_id_table, cm_local_id(cm_id_priv->id.local_id),
+ cm_id_priv, GFP_ATOMIC);
+}
+
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+ ib_cm_handler cm_handler,
+ void *context)
+{
+ struct cm_id_private *cm_id_priv;
+
+ cm_id_priv = cm_alloc_id_priv(device, cm_handler, context);
+ if (IS_ERR(cm_id_priv))
+ return ERR_CAST(cm_id_priv);
+
+ cm_finalize_id(cm_id_priv);
+ return &cm_id_priv->id;
}
EXPORT_SYMBOL(ib_create_cm_id);
-static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
+static struct cm_work *cm_dequeue_work(struct cm_id_private *cm_id_priv)
{
struct cm_work *work;
@@ -807,6 +902,36 @@ static void cm_free_work(struct cm_work *work)
kfree(work);
}
+static void cm_queue_work_unlock(struct cm_id_private *cm_id_priv,
+ struct cm_work *work)
+ __releases(&cm_id_priv->lock)
+{
+ bool immediate;
+
+ /*
+ * To deliver the event to the user callback we have the drop the
+ * spinlock, however, we need to ensure that the user callback is single
+ * threaded and receives events in the temporal order. If there are
+ * already events being processed then thread new events onto a list,
+ * the thread currently processing will pick them up.
+ */
+ immediate = atomic_inc_and_test(&cm_id_priv->work_count);
+ if (!immediate) {
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ /*
+ * This routine always consumes incoming reference. Once queued
+ * to the work_list then a reference is held by the thread
+ * currently running cm_process_work() and this reference is not
+ * needed.
+ */
+ cm_deref_id(cm_id_priv);
+ }
+ spin_unlock_irq(&cm_id_priv->lock);
+
+ if (immediate)
+ cm_process_work(cm_id_priv, work);
+}
+
static inline int cm_convert_to_ms(int iba_time)
{
/* approximate conversion to ms from 4.096us x 2^iba_time */
@@ -832,8 +957,10 @@ static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
return min(31, ack_timeout);
}
-static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
+static void cm_remove_remote(struct cm_id_private *cm_id_priv)
{
+ struct cm_timewait_info *timewait_info = cm_id_priv->timewait_info;
+
if (timewait_info->inserted_remote_id) {
rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
timewait_info->inserted_remote_id = 0;
@@ -845,7 +972,7 @@ static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
}
}
-static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
+static struct cm_timewait_info *cm_create_timewait_info(__be32 local_id)
{
struct cm_timewait_info *timewait_info;
@@ -865,12 +992,14 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
unsigned long flags;
struct cm_device *cm_dev;
+ lockdep_assert_held(&cm_id_priv->lock);
+
cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
if (!cm_dev)
return;
spin_lock_irqsave(&cm.lock, flags);
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
spin_unlock_irqrestore(&cm.lock, flags);
@@ -889,6 +1018,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
msecs_to_jiffies(wait_time));
spin_unlock_irqrestore(&cm.lock, flags);
+ /*
+ * The timewait_info is converted into a work and gets freed during
+ * cm_free_work() in cm_timewait_handler().
+ */
+ BUILD_BUG_ON(offsetof(struct cm_timewait_info, work) != 0);
cm_id_priv->timewait_info = NULL;
}
@@ -896,119 +1030,157 @@ static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
{
unsigned long flags;
+ lockdep_assert_held(&cm_id_priv->lock);
+
cm_id_priv->id.state = IB_CM_IDLE;
if (cm_id_priv->timewait_info) {
spin_lock_irqsave(&cm.lock, flags);
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
spin_unlock_irqrestore(&cm.lock, flags);
kfree(cm_id_priv->timewait_info);
cm_id_priv->timewait_info = NULL;
}
}
+static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id,
+ enum ib_cm_state old_state)
+{
+ struct cm_id_private *cm_id_priv;
+
+ cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ pr_err_ratelimited("%s: cm_id=%p timed out. state %d -> %d, refcnt=%d\n", __func__,
+ cm_id, old_state, cm_id->state, refcount_read(&cm_id_priv->refcount));
+}
+
static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
{
struct cm_id_private *cm_id_priv;
+ enum ib_cm_state old_state;
+ unsigned long timeout;
struct cm_work *work;
+ int ret;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-retest:
spin_lock_irq(&cm_id_priv->lock);
+ old_state = cm_id->state;
+retest:
switch (cm_id->state) {
case IB_CM_LISTEN:
- spin_unlock_irq(&cm_id_priv->lock);
-
- spin_lock_irq(&cm.lock);
+ spin_lock(&cm.lock);
if (--cm_id_priv->listen_sharecount > 0) {
/* The id is still shared. */
+ WARN_ON(refcount_read(&cm_id_priv->refcount) == 1);
+ spin_unlock(&cm.lock);
+ spin_unlock_irq(&cm_id_priv->lock);
cm_deref_id(cm_id_priv);
- spin_unlock_irq(&cm.lock);
return;
}
+ cm_id->state = IB_CM_IDLE;
rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
- spin_unlock_irq(&cm.lock);
+ RB_CLEAR_NODE(&cm_id_priv->service_node);
+ spin_unlock(&cm.lock);
break;
case IB_CM_SIDR_REQ_SENT:
cm_id->state = IB_CM_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- spin_unlock_irq(&cm_id_priv->lock);
+ ib_cancel_mad(cm_id_priv->msg);
break;
case IB_CM_SIDR_REQ_RCVD:
- spin_unlock_irq(&cm_id_priv->lock);
- cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
- spin_lock_irq(&cm.lock);
- if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
- rb_erase(&cm_id_priv->sidr_id_node,
- &cm.remote_sidr_table);
- spin_unlock_irq(&cm.lock);
+ cm_send_sidr_rep_locked(cm_id_priv,
+ &(struct ib_cm_sidr_rep_param){
+ .status = IB_SIDR_REJECT });
+ /* cm_send_sidr_rep_locked will not move to IDLE if it fails */
+ cm_id->state = IB_CM_IDLE;
break;
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
- &cm_id_priv->id.device->node_guid,
- sizeof cm_id_priv->id.device->node_guid,
- NULL, 0);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_send_rej_locked(cm_id_priv, IB_CM_REJ_TIMEOUT,
+ &cm_id_priv->id.device->node_guid,
+ sizeof(cm_id_priv->id.device->node_guid),
+ NULL, 0);
break;
case IB_CM_REQ_RCVD:
if (err == -ENOMEM) {
/* Do not reject to allow future retries. */
cm_reset_to_idle(cm_id_priv);
- spin_unlock_irq(&cm_id_priv->lock);
} else {
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
- NULL, 0, NULL, 0);
+ cm_send_rej_locked(cm_id_priv,
+ IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
+ NULL, 0);
}
break;
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- /* Fall through */
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
+ 0, NULL, 0);
+ goto retest;
case IB_CM_MRA_REQ_SENT:
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
- NULL, 0, NULL, 0);
+ cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
+ 0, NULL, 0);
break;
case IB_CM_ESTABLISHED:
- spin_unlock_irq(&cm_id_priv->lock);
- if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
+ if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
+ cm_id->state = IB_CM_IDLE;
break;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ }
+ cm_issue_dreq(cm_id_priv);
+ cm_enter_timewait(cm_id_priv);
goto retest;
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->msg);
cm_enter_timewait(cm_id_priv);
- spin_unlock_irq(&cm_id_priv->lock);
- break;
+ goto retest;
case IB_CM_DREQ_RCVD:
- spin_unlock_irq(&cm_id_priv->lock);
- ib_send_cm_drep(cm_id, NULL, 0);
+ cm_send_drep_locked(cm_id_priv, NULL, 0);
+ WARN_ON(cm_id->state != IB_CM_TIMEWAIT);
+ goto retest;
+ case IB_CM_TIMEWAIT:
+ /*
+ * The cm_acquire_id in cm_timewait_handler will stop working
+ * once we do xa_erase below, so just move to idle here for
+ * consistency.
+ */
+ cm_id->state = IB_CM_IDLE;
break;
- default:
- spin_unlock_irq(&cm_id_priv->lock);
+ case IB_CM_IDLE:
break;
}
+ WARN_ON(cm_id->state != IB_CM_IDLE);
- spin_lock_irq(&cm.lock);
- if (!list_empty(&cm_id_priv->altr_list) &&
- (!cm_id_priv->altr_send_port_not_ready))
- list_del(&cm_id_priv->altr_list);
- if (!list_empty(&cm_id_priv->prim_list) &&
- (!cm_id_priv->prim_send_port_not_ready))
- list_del(&cm_id_priv->prim_list);
- spin_unlock_irq(&cm.lock);
+ spin_lock(&cm.lock);
+ /* Required for cleanup paths related cm_req_handler() */
+ if (cm_id_priv->timewait_info) {
+ cm_remove_remote(cm_id_priv);
+ kfree(cm_id_priv->timewait_info);
+ cm_id_priv->timewait_info = NULL;
+ }
+
+ WARN_ON(cm_id_priv->listen_sharecount);
+ WARN_ON(!RB_EMPTY_NODE(&cm_id_priv->service_node));
+ if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
+ rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
+ spin_unlock(&cm.lock);
+ spin_unlock_irq(&cm_id_priv->lock);
- cm_free_id(cm_id->local_id);
+ xa_erase(&cm.local_id_table, cm_local_id(cm_id->local_id));
cm_deref_id(cm_id_priv);
- wait_for_completion(&cm_id_priv->comp);
+ timeout = msecs_to_jiffies((cm_id_priv->max_cm_retries * cm_id_priv->timeout_ms * 5) / 4);
+ do {
+ ret = wait_for_completion_timeout(&cm_id_priv->comp, timeout);
+ if (!ret) /* timeout happened */
+ cm_destroy_id_wait_timeout(cm_id, old_state);
+ } while (!ret);
+
while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
cm_free_work(work);
+
+ cm_destroy_av(&cm_id_priv->av);
+ cm_destroy_av(&cm_id_priv->alt_av);
kfree(cm_id_priv->private_data);
- kfree(cm_id_priv);
+ kfree_rcu(cm_id_priv, rcu);
}
void ib_destroy_cm_id(struct ib_cm_id *cm_id)
@@ -1017,70 +1189,63 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id)
}
EXPORT_SYMBOL(ib_destroy_cm_id);
+static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id)
+{
+ if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
+ (service_id != IB_CM_ASSIGN_SERVICE_ID))
+ return -EINVAL;
+
+ if (service_id == IB_CM_ASSIGN_SERVICE_ID)
+ cm_id_priv->id.service_id = cpu_to_be64(cm.listen_service_id++);
+ else
+ cm_id_priv->id.service_id = service_id;
+
+ return 0;
+}
+
/**
- * __ib_cm_listen - Initiates listening on the specified service ID for
+ * ib_cm_listen - Initiates listening on the specified service ID for
* connection and service ID resolution requests.
* @cm_id: Connection identifier associated with the listen request.
* @service_id: Service identifier matched against incoming connection
* and service ID resolution requests. The service ID should be specified
* network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
* assign a service ID to the caller.
- * @service_mask: Mask applied to service ID used to listen across a
- * range of service IDs. If set to 0, the service ID is matched
- * exactly. This parameter is ignored if %service_id is set to
- * IB_CM_ASSIGN_SERVICE_ID.
*/
-static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
- __be64 service_mask)
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id)
{
- struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
- int ret = 0;
-
- service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
- service_id &= service_mask;
- if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
- (service_id != IB_CM_ASSIGN_SERVICE_ID))
- return -EINVAL;
-
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- if (cm_id->state != IB_CM_IDLE)
- return -EINVAL;
-
- cm_id->state = IB_CM_LISTEN;
- ++cm_id_priv->listen_sharecount;
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ int ret;
- if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
- cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
- cm_id->service_mask = ~cpu_to_be64(0);
- } else {
- cm_id->service_id = service_id;
- cm_id->service_mask = service_mask;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id_priv->id.state != IB_CM_IDLE) {
+ ret = -EINVAL;
+ goto out;
}
- cur_cm_id_priv = cm_insert_listen(cm_id_priv);
- if (cur_cm_id_priv) {
- cm_id->state = IB_CM_IDLE;
- --cm_id_priv->listen_sharecount;
+ ret = cm_init_listen(cm_id_priv, service_id);
+ if (ret)
+ goto out;
+
+ if (!cm_insert_listen(cm_id_priv, NULL)) {
ret = -EBUSY;
+ goto out;
}
- return ret;
-}
-int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
-{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&cm.lock, flags);
- ret = __ib_cm_listen(cm_id, service_id, service_mask);
- spin_unlock_irqrestore(&cm.lock, flags);
+ cm_id_priv->id.state = IB_CM_LISTEN;
+ ret = 0;
+out:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_cm_listen);
/**
- * Create a new listening ib_cm_id and listen on the given service ID.
+ * ib_cm_insert_listen - Create a new listening ib_cm_id and listen on
+ * the given service ID.
*
* If there's an existing ID listening on that same device and service ID,
* return it.
@@ -1099,61 +1264,57 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
ib_cm_handler cm_handler,
__be64 service_id)
{
+ struct cm_id_private *listen_id_priv;
struct cm_id_private *cm_id_priv;
- struct ib_cm_id *cm_id;
- unsigned long flags;
int err = 0;
/* Create an ID in advance, since the creation may sleep */
- cm_id = ib_create_cm_id(device, cm_handler, NULL);
- if (IS_ERR(cm_id))
- return cm_id;
+ cm_id_priv = cm_alloc_id_priv(device, cm_handler, NULL);
+ if (IS_ERR(cm_id_priv))
+ return ERR_CAST(cm_id_priv);
- spin_lock_irqsave(&cm.lock, flags);
+ err = cm_init_listen(cm_id_priv, service_id);
+ if (err) {
+ ib_destroy_cm_id(&cm_id_priv->id);
+ return ERR_PTR(err);
+ }
- if (service_id == IB_CM_ASSIGN_SERVICE_ID)
- goto new_id;
-
- /* Find an existing ID */
- cm_id_priv = cm_find_listen(device, service_id);
- if (cm_id_priv) {
- if (cm_id->cm_handler != cm_handler || cm_id->context) {
- /* Sharing an ib_cm_id with different handlers is not
- * supported */
- spin_unlock_irqrestore(&cm.lock, flags);
+ spin_lock_irq(&cm_id_priv->lock);
+ listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler);
+ if (listen_id_priv != cm_id_priv) {
+ spin_unlock_irq(&cm_id_priv->lock);
+ ib_destroy_cm_id(&cm_id_priv->id);
+ if (!listen_id_priv)
return ERR_PTR(-EINVAL);
- }
- atomic_inc(&cm_id_priv->refcount);
- ++cm_id_priv->listen_sharecount;
- spin_unlock_irqrestore(&cm.lock, flags);
-
- ib_destroy_cm_id(cm_id);
- cm_id = &cm_id_priv->id;
- return cm_id;
+ return &listen_id_priv->id;
}
+ cm_id_priv->id.state = IB_CM_LISTEN;
+ spin_unlock_irq(&cm_id_priv->lock);
-new_id:
- /* Use newly created ID */
- err = __ib_cm_listen(cm_id, service_id, 0);
-
- spin_unlock_irqrestore(&cm.lock, flags);
+ /*
+ * A listen ID does not need to be in the xarray since it does not
+ * receive mads, is not placed in the remote_id or remote_qpn rbtree,
+ * and does not enter timewait.
+ */
- if (err) {
- ib_destroy_cm_id(cm_id);
- return ERR_PTR(err);
- }
- return cm_id;
+ return &cm_id_priv->id;
}
EXPORT_SYMBOL(ib_cm_insert_listen);
-static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
- enum cm_msg_sequence msg_seq)
+static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
{
- u64 hi_tid, low_tid;
+ u64 hi_tid = 0, low_tid;
+
+ lockdep_assert_held(&cm_id_priv->lock);
- hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
- low_tid = (u64) ((__force u32)cm_id_priv->id.local_id |
- (msg_seq << 30));
+ low_tid = (u64)cm_id_priv->id.local_id;
+ if (!cm_id_priv->av.port)
+ return cpu_to_be64(low_tid);
+
+ read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ if (cm_id_priv->av.port->mad_agent)
+ hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid) << 32;
+ read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
return cpu_to_be64(hi_tid | low_tid);
}
@@ -1168,92 +1329,166 @@ static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
hdr->tid = tid;
}
+static void cm_format_mad_ece_hdr(struct ib_mad_hdr *hdr, __be16 attr_id,
+ __be64 tid, u32 attr_mod)
+{
+ cm_format_mad_hdr(hdr, attr_id, tid);
+ hdr->attr_mod = cpu_to_be32(attr_mod);
+}
+
static void cm_format_req(struct cm_req_msg *req_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_req_param *param)
{
- struct ib_sa_path_rec *pri_path = param->primary_path;
- struct ib_sa_path_rec *alt_path = param->alternate_path;
-
- cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
- cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
-
- req_msg->local_comm_id = cm_id_priv->id.local_id;
- req_msg->service_id = param->service_id;
- req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
- cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
- cm_req_set_init_depth(req_msg, param->initiator_depth);
- cm_req_set_remote_resp_timeout(req_msg,
- param->remote_cm_response_timeout);
+ struct sa_path_rec *pri_path = param->primary_path;
+ struct sa_path_rec *alt_path = param->alternate_path;
+ bool pri_ext = false;
+ __be16 lid;
+
+ if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA)
+ pri_ext = opa_is_extended_lid(pri_path->opa.dlid,
+ pri_path->opa.slid);
+
+ cm_format_mad_ece_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
+ cm_form_tid(cm_id_priv), param->ece.attr_mod);
+
+ IBA_SET(CM_REQ_LOCAL_COMM_ID, req_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_REQ_SERVICE_ID, req_msg, be64_to_cpu(param->service_id));
+ IBA_SET(CM_REQ_LOCAL_CA_GUID, req_msg,
+ be64_to_cpu(cm_id_priv->id.device->node_guid));
+ IBA_SET(CM_REQ_LOCAL_QPN, req_msg, param->qp_num);
+ IBA_SET(CM_REQ_INITIATOR_DEPTH, req_msg, param->initiator_depth);
+ IBA_SET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg,
+ param->remote_cm_response_timeout);
cm_req_set_qp_type(req_msg, param->qp_type);
- cm_req_set_flow_ctrl(req_msg, param->flow_control);
- cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
- cm_req_set_local_resp_timeout(req_msg,
- param->local_cm_response_timeout);
- req_msg->pkey = param->primary_path->pkey;
- cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
- cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
+ IBA_SET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg, param->flow_control);
+ IBA_SET(CM_REQ_STARTING_PSN, req_msg, param->starting_psn);
+ IBA_SET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg,
+ param->local_cm_response_timeout);
+ IBA_SET(CM_REQ_PARTITION_KEY, req_msg,
+ be16_to_cpu(param->primary_path->pkey));
+ IBA_SET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg,
+ param->primary_path->mtu);
+ IBA_SET(CM_REQ_MAX_CM_RETRIES, req_msg, param->max_cm_retries);
if (param->qp_type != IB_QPT_XRC_INI) {
- cm_req_set_resp_res(req_msg, param->responder_resources);
- cm_req_set_retry_count(req_msg, param->retry_count);
- cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
- cm_req_set_srq(req_msg, param->srq);
+ IBA_SET(CM_REQ_RESPONDER_RESOURCES, req_msg,
+ param->responder_resources);
+ IBA_SET(CM_REQ_RETRY_COUNT, req_msg, param->retry_count);
+ IBA_SET(CM_REQ_RNR_RETRY_COUNT, req_msg,
+ param->rnr_retry_count);
+ IBA_SET(CM_REQ_SRQ, req_msg, param->srq);
}
+ *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg) =
+ pri_path->sgid;
+ *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg) =
+ pri_path->dgid;
+ if (pri_ext) {
+ IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg)
+ ->global.interface_id =
+ OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid));
+ IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg)
+ ->global.interface_id =
+ OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid));
+ }
if (pri_path->hop_limit <= 1) {
- req_msg->primary_local_lid = pri_path->slid;
- req_msg->primary_remote_lid = pri_path->dlid;
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(pri_ext ? 0 :
+ htons(ntohl(sa_path_get_slid(
+ pri_path)))));
+ IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
+ be16_to_cpu(pri_ext ? 0 :
+ htons(ntohl(sa_path_get_dlid(
+ pri_path)))));
} else {
+
+ if (param->primary_path_inbound) {
+ lid = param->primary_path_inbound->ib.dlid;
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(lid));
+ } else
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(IB_LID_PERMISSIVE));
+
/* Work-around until there's a way to obtain remote LID info */
- req_msg->primary_local_lid = IB_LID_PERMISSIVE;
- req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
+ IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
+ be16_to_cpu(IB_LID_PERMISSIVE));
}
- req_msg->primary_local_gid = pri_path->sgid;
- req_msg->primary_remote_gid = pri_path->dgid;
- cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
- cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
- req_msg->primary_traffic_class = pri_path->traffic_class;
- req_msg->primary_hop_limit = pri_path->hop_limit;
- cm_req_set_primary_sl(req_msg, pri_path->sl);
- cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
- cm_req_set_primary_local_ack_timeout(req_msg,
+ IBA_SET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg,
+ be32_to_cpu(pri_path->flow_label));
+ IBA_SET(CM_REQ_PRIMARY_PACKET_RATE, req_msg, pri_path->rate);
+ IBA_SET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg, pri_path->traffic_class);
+ IBA_SET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg, pri_path->hop_limit);
+ IBA_SET(CM_REQ_PRIMARY_SL, req_msg, pri_path->sl);
+ IBA_SET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg,
+ (pri_path->hop_limit <= 1));
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg,
cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
pri_path->packet_life_time));
if (alt_path) {
+ bool alt_ext = false;
+
+ if (alt_path->rec_type == SA_PATH_REC_TYPE_OPA)
+ alt_ext = opa_is_extended_lid(alt_path->opa.dlid,
+ alt_path->opa.slid);
+
+ *IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg) =
+ alt_path->sgid;
+ *IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg) =
+ alt_path->dgid;
+ if (alt_ext) {
+ IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
+ req_msg)
+ ->global.interface_id =
+ OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid));
+ IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID,
+ req_msg)
+ ->global.interface_id =
+ OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid));
+ }
if (alt_path->hop_limit <= 1) {
- req_msg->alt_local_lid = alt_path->slid;
- req_msg->alt_remote_lid = alt_path->dlid;
+ IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(
+ alt_ext ? 0 :
+ htons(ntohl(sa_path_get_slid(
+ alt_path)))));
+ IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
+ be16_to_cpu(
+ alt_ext ? 0 :
+ htons(ntohl(sa_path_get_dlid(
+ alt_path)))));
} else {
- req_msg->alt_local_lid = IB_LID_PERMISSIVE;
- req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
+ IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(IB_LID_PERMISSIVE));
+ IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
+ be16_to_cpu(IB_LID_PERMISSIVE));
}
- req_msg->alt_local_gid = alt_path->sgid;
- req_msg->alt_remote_gid = alt_path->dgid;
- cm_req_set_alt_flow_label(req_msg,
- alt_path->flow_label);
- cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
- req_msg->alt_traffic_class = alt_path->traffic_class;
- req_msg->alt_hop_limit = alt_path->hop_limit;
- cm_req_set_alt_sl(req_msg, alt_path->sl);
- cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
- cm_req_set_alt_local_ack_timeout(req_msg,
+ IBA_SET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg,
+ be32_to_cpu(alt_path->flow_label));
+ IBA_SET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg, alt_path->rate);
+ IBA_SET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg,
+ alt_path->traffic_class);
+ IBA_SET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg,
+ alt_path->hop_limit);
+ IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, alt_path->sl);
+ IBA_SET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg,
+ (alt_path->hop_limit <= 1));
+ IBA_SET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg,
cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
alt_path->packet_life_time));
}
+ IBA_SET(CM_REQ_VENDOR_ID, req_msg, param->ece.vendor_id);
if (param->private_data && param->private_data_len)
- memcpy(req_msg->private_data, param->private_data,
- param->private_data_len);
+ IBA_SET_MEM(CM_REQ_PRIVATE_DATA, req_msg, param->private_data,
+ param->private_data_len);
}
static int cm_validate_req_param(struct ib_cm_req_param *param)
{
- /* peer-to-peer not supported */
- if (param->peer_to_peer)
- return -EINVAL;
-
if (!param->primary_path)
return -EINVAL;
@@ -1276,7 +1511,9 @@ static int cm_validate_req_param(struct ib_cm_req_param *param)
int ib_send_cm_req(struct ib_cm_id *cm_id,
struct ib_cm_req_param *param)
{
+ struct cm_av av = {}, alt_av = {};
struct cm_id_private *cm_id_priv;
+ struct ib_mad_send_buf *msg;
struct cm_req_msg *req_msg;
unsigned long flags;
int ret;
@@ -1288,10 +1525,9 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
/* Verify that we're not in timewait. */
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_IDLE) {
+ if (cm_id->state != IB_CM_IDLE || WARN_ON(cm_id_priv->timewait_info)) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -1299,21 +1535,23 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
id.local_id);
if (IS_ERR(cm_id_priv->timewait_info)) {
ret = PTR_ERR(cm_id_priv->timewait_info);
- goto out;
+ cm_id_priv->timewait_info = NULL;
+ return ret;
}
- ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
- cm_id_priv);
+ ret = cm_init_av_by_path(param->primary_path,
+ param->ppath_sgid_attr, &av);
if (ret)
- goto error1;
+ return ret;
if (param->alternate_path) {
- ret = cm_init_av_by_path(param->alternate_path,
- &cm_id_priv->alt_av, cm_id_priv);
- if (ret)
- goto error1;
+ ret = cm_init_av_by_path(param->alternate_path, NULL,
+ &alt_av);
+ if (ret) {
+ cm_destroy_av(&av);
+ return ret;
+ }
}
cm_id->service_id = param->service_id;
- cm_id->service_mask = ~cpu_to_be64(0);
cm_id_priv->timeout_ms = cm_convert_to_ms(
param->primary_path->packet_life_time) * 2 +
cm_convert_to_ms(
@@ -1326,33 +1564,42 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
cm_id_priv->pkey = param->primary_path->pkey;
cm_id_priv->qp_type = param->qp_type;
- ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
- if (ret)
- goto error1;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+ cm_move_av_from_path(&cm_id_priv->av, &av);
+ if (param->primary_path_outbound)
+ cm_id_priv->av.dlid_datapath =
+ be16_to_cpu(param->primary_path_outbound->ib.dlid);
+
+ if (param->alternate_path)
+ cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av);
- req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
+ msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REQ_SENT);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
+ goto out_unlock;
+ }
+
+ req_msg = (struct cm_req_msg *)msg->mad;
cm_format_req(req_msg, cm_id_priv, param);
cm_id_priv->tid = req_msg->hdr.tid;
- cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
- cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
- cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
- cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
+ cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
+ cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- ret = ib_post_send_mad(cm_id_priv->msg, NULL);
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- goto error2;
- }
+ trace_icm_send_req(&cm_id_priv->id);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ goto out_free;
BUG_ON(cm_id->state != IB_CM_IDLE);
cm_id->state = IB_CM_REQ_SENT;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return 0;
-
-error2: cm_free_msg(cm_id_priv->msg);
-error1: kfree(cm_id_priv->timewait_info);
-out: return ret;
+out_free:
+ cm_free_priv_msg(msg);
+out_unlock:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return ret;
}
EXPORT_SYMBOL(ib_send_cm_req);
@@ -1366,7 +1613,7 @@ static int cm_issue_rej(struct cm_port *port,
struct cm_rej_msg *rej_msg, *rcv_msg;
int ret;
- ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
+ ret = cm_alloc_response_msg(port, mad_recv_wc, false, &msg);
if (ret)
return ret;
@@ -1375,16 +1622,21 @@ static int cm_issue_rej(struct cm_port *port,
rej_msg = (struct cm_rej_msg *) msg->mad;
cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
- rej_msg->remote_comm_id = rcv_msg->local_comm_id;
- rej_msg->local_comm_id = rcv_msg->remote_comm_id;
- cm_rej_set_msg_rejected(rej_msg, msg_rejected);
- rej_msg->reason = cpu_to_be16(reason);
+ IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
+ IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg));
+ IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+ IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
+ IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, msg_rejected);
+ IBA_SET(CM_REJ_REASON, rej_msg, reason);
if (ari && ari_length) {
- cm_rej_set_reject_info_len(rej_msg, ari_length);
- memcpy(rej_msg->ari, ari, ari_length);
+ IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
+ IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
}
+ trace_icm_issue_rej(
+ IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg),
+ IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
@@ -1392,74 +1644,141 @@ static int cm_issue_rej(struct cm_port *port,
return ret;
}
-static inline int cm_is_active_peer(__be64 local_ca_guid, __be64 remote_ca_guid,
- __be32 local_qpn, __be32 remote_qpn)
+static bool cm_req_has_alt_path(struct cm_req_msg *req_msg)
+{
+ return ((cpu_to_be16(
+ IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg))) ||
+ (ib_is_opa_gid(IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
+ req_msg))));
+}
+
+static void cm_path_set_rec_type(struct ib_device *ib_device, u32 port_num,
+ struct sa_path_rec *path, union ib_gid *gid)
{
- return (be64_to_cpu(local_ca_guid) > be64_to_cpu(remote_ca_guid) ||
- ((local_ca_guid == remote_ca_guid) &&
- (be32_to_cpu(local_qpn) > be32_to_cpu(remote_qpn))));
+ if (ib_is_opa_gid(gid) && rdma_cap_opa_ah(ib_device, port_num))
+ path->rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ path->rec_type = SA_PATH_REC_TYPE_IB;
+}
+
+static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
+ struct sa_path_rec *primary_path,
+ struct sa_path_rec *alt_path,
+ struct ib_wc *wc)
+{
+ u32 lid;
+
+ if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
+ sa_path_set_dlid(primary_path, wc->slid);
+ sa_path_set_slid(primary_path,
+ IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
+ req_msg));
+ } else {
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg));
+ sa_path_set_dlid(primary_path, lid);
+
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg));
+ sa_path_set_slid(primary_path, lid);
+ }
+
+ if (!cm_req_has_alt_path(req_msg))
+ return;
+
+ if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) {
+ sa_path_set_dlid(alt_path,
+ IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
+ req_msg));
+ sa_path_set_slid(alt_path,
+ IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
+ req_msg));
+ } else {
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg));
+ sa_path_set_dlid(alt_path, lid);
+
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg));
+ sa_path_set_slid(alt_path, lid);
+ }
}
static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
- struct ib_sa_path_rec *primary_path,
- struct ib_sa_path_rec *alt_path)
-{
- memset(primary_path, 0, sizeof *primary_path);
- primary_path->dgid = req_msg->primary_local_gid;
- primary_path->sgid = req_msg->primary_remote_gid;
- primary_path->dlid = req_msg->primary_local_lid;
- primary_path->slid = req_msg->primary_remote_lid;
- primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
- primary_path->hop_limit = req_msg->primary_hop_limit;
- primary_path->traffic_class = req_msg->primary_traffic_class;
+ struct sa_path_rec *primary_path,
+ struct sa_path_rec *alt_path,
+ struct ib_wc *wc)
+{
+ primary_path->dgid =
+ *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg);
+ primary_path->sgid =
+ *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg);
+ primary_path->flow_label =
+ cpu_to_be32(IBA_GET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg));
+ primary_path->hop_limit = IBA_GET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg);
+ primary_path->traffic_class =
+ IBA_GET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg);
primary_path->reversible = 1;
- primary_path->pkey = req_msg->pkey;
- primary_path->sl = cm_req_get_primary_sl(req_msg);
+ primary_path->pkey =
+ cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
+ primary_path->sl = IBA_GET(CM_REQ_PRIMARY_SL, req_msg);
primary_path->mtu_selector = IB_SA_EQ;
- primary_path->mtu = cm_req_get_path_mtu(req_msg);
+ primary_path->mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
primary_path->rate_selector = IB_SA_EQ;
- primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
+ primary_path->rate = IBA_GET(CM_REQ_PRIMARY_PACKET_RATE, req_msg);
primary_path->packet_life_time_selector = IB_SA_EQ;
primary_path->packet_life_time =
- cm_req_get_primary_local_ack_timeout(req_msg);
+ IBA_GET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg);
primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
- primary_path->service_id = req_msg->service_id;
-
- if (req_msg->alt_local_lid) {
- memset(alt_path, 0, sizeof *alt_path);
- alt_path->dgid = req_msg->alt_local_gid;
- alt_path->sgid = req_msg->alt_remote_gid;
- alt_path->dlid = req_msg->alt_local_lid;
- alt_path->slid = req_msg->alt_remote_lid;
- alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
- alt_path->hop_limit = req_msg->alt_hop_limit;
- alt_path->traffic_class = req_msg->alt_traffic_class;
+ primary_path->service_id =
+ cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
+ if (sa_path_is_roce(primary_path))
+ primary_path->roce.route_resolved = false;
+
+ if (cm_req_has_alt_path(req_msg)) {
+ alt_path->dgid = *IBA_GET_MEM_PTR(
+ CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg);
+ alt_path->sgid = *IBA_GET_MEM_PTR(
+ CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg);
+ alt_path->flow_label = cpu_to_be32(
+ IBA_GET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg));
+ alt_path->hop_limit =
+ IBA_GET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg);
+ alt_path->traffic_class =
+ IBA_GET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg);
alt_path->reversible = 1;
- alt_path->pkey = req_msg->pkey;
- alt_path->sl = cm_req_get_alt_sl(req_msg);
+ alt_path->pkey =
+ cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
+ alt_path->sl = IBA_GET(CM_REQ_ALTERNATE_SL, req_msg);
alt_path->mtu_selector = IB_SA_EQ;
- alt_path->mtu = cm_req_get_path_mtu(req_msg);
+ alt_path->mtu =
+ IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
alt_path->rate_selector = IB_SA_EQ;
- alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
+ alt_path->rate = IBA_GET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg);
alt_path->packet_life_time_selector = IB_SA_EQ;
alt_path->packet_life_time =
- cm_req_get_alt_local_ack_timeout(req_msg);
+ IBA_GET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg);
alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
- alt_path->service_id = req_msg->service_id;
+ alt_path->service_id =
+ cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
+
+ if (sa_path_is_roce(alt_path))
+ alt_path->roce.route_resolved = false;
}
+ cm_format_path_lid_from_req(req_msg, primary_path, alt_path, wc);
}
static u16 cm_get_bth_pkey(struct cm_work *work)
{
struct ib_device *ib_dev = work->port->cm_dev->ib_device;
- u8 port_num = work->port->port_num;
+ u32 port_num = work->port->port_num;
u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
u16 pkey;
int ret;
ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
if (ret) {
- dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
+ dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %u, pkey index %u). %d\n",
port_num, pkey_index, ret);
return 0;
}
@@ -1467,6 +1786,35 @@ static u16 cm_get_bth_pkey(struct cm_work *work)
return pkey;
}
+/**
+ * cm_opa_to_ib_sgid - Convert OPA SGID to IB SGID
+ * ULPs (such as IPoIB) do not understand OPA GIDs and will
+ * reject them as the local_gid will not match the sgid. Therefore,
+ * change the pathrec's SGID to an IB SGID.
+ *
+ * @work: Work completion
+ * @path: Path record
+ */
+static void cm_opa_to_ib_sgid(struct cm_work *work,
+ struct sa_path_rec *path)
+{
+ struct ib_device *dev = work->port->cm_dev->ib_device;
+ u32 port_num = work->port->port_num;
+
+ if (rdma_cap_opa_ah(dev, port_num) &&
+ (ib_is_opa_gid(&path->sgid))) {
+ union ib_gid sgid;
+
+ if (rdma_query_gid(dev, port_num, 0, &sgid)) {
+ dev_warn(&dev->dev,
+ "Error updating sgid in CM request\n");
+ return;
+ }
+
+ path->sgid = sgid;
+ }
+}
+
static void cm_format_req_event(struct cm_work *work,
struct cm_id_private *cm_id_priv,
struct ib_cm_id *listen_id)
@@ -1480,26 +1828,35 @@ static void cm_format_req_event(struct cm_work *work,
param->bth_pkey = cm_get_bth_pkey(work);
param->port = cm_id_priv->av.port->port_num;
param->primary_path = &work->path[0];
- if (req_msg->alt_local_lid)
+ cm_opa_to_ib_sgid(work, param->primary_path);
+ if (cm_req_has_alt_path(req_msg)) {
param->alternate_path = &work->path[1];
- else
+ cm_opa_to_ib_sgid(work, param->alternate_path);
+ } else {
param->alternate_path = NULL;
- param->remote_ca_guid = req_msg->local_ca_guid;
- param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
- param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
+ }
+ param->remote_ca_guid =
+ cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
+ param->remote_qkey = IBA_GET(CM_REQ_LOCAL_Q_KEY, req_msg);
+ param->remote_qpn = IBA_GET(CM_REQ_LOCAL_QPN, req_msg);
param->qp_type = cm_req_get_qp_type(req_msg);
- param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
- param->responder_resources = cm_req_get_init_depth(req_msg);
- param->initiator_depth = cm_req_get_resp_res(req_msg);
+ param->starting_psn = IBA_GET(CM_REQ_STARTING_PSN, req_msg);
+ param->responder_resources = IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
+ param->initiator_depth = IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
param->local_cm_response_timeout =
- cm_req_get_remote_resp_timeout(req_msg);
- param->flow_control = cm_req_get_flow_ctrl(req_msg);
+ IBA_GET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg);
+ param->flow_control = IBA_GET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg);
param->remote_cm_response_timeout =
- cm_req_get_local_resp_timeout(req_msg);
- param->retry_count = cm_req_get_retry_count(req_msg);
- param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
- param->srq = cm_req_get_srq(req_msg);
- work->cm_event.private_data = &req_msg->private_data;
+ IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg);
+ param->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
+ param->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
+ param->srq = IBA_GET(CM_REQ_SRQ, req_msg);
+ param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
+ param->ece.vendor_id = IBA_GET(CM_REQ_VENDOR_ID, req_msg);
+ param->ece.attr_mod = be32_to_cpu(req_msg->hdr.attr_mod);
+
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_REQ_PRIVATE_DATA, req_msg);
}
static void cm_process_work(struct cm_id_private *cm_id_priv,
@@ -1515,7 +1872,9 @@ static void cm_process_work(struct cm_id_private *cm_id_priv,
spin_lock_irq(&cm_id_priv->lock);
work = cm_dequeue_work(cm_id_priv);
spin_unlock_irq(&cm_id_priv->lock);
- BUG_ON(!work);
+ if (!work)
+ return;
+
ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
&work->cm_event);
cm_free_work(work);
@@ -1527,58 +1886,67 @@ static void cm_process_work(struct cm_id_private *cm_id_priv,
static void cm_format_mra(struct cm_mra_msg *mra_msg,
struct cm_id_private *cm_id_priv,
- enum cm_msg_response msg_mraed, u8 service_timeout,
+ enum cm_msg_response msg_mraed,
const void *private_data, u8 private_data_len)
{
cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
- cm_mra_set_msg_mraed(mra_msg, msg_mraed);
- mra_msg->local_comm_id = cm_id_priv->id.local_id;
- mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
- cm_mra_set_service_timeout(mra_msg, service_timeout);
+ IBA_SET(CM_MRA_MESSAGE_MRAED, mra_msg, msg_mraed);
+ IBA_SET(CM_MRA_LOCAL_COMM_ID, mra_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_MRA_REMOTE_COMM_ID, mra_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
+ IBA_SET(CM_MRA_SERVICE_TIMEOUT, mra_msg, CM_MRA_SETTING);
if (private_data && private_data_len)
- memcpy(mra_msg->private_data, private_data, private_data_len);
+ IBA_SET_MEM(CM_MRA_PRIVATE_DATA, mra_msg, private_data,
+ private_data_len);
}
static void cm_format_rej(struct cm_rej_msg *rej_msg,
struct cm_id_private *cm_id_priv,
- enum ib_cm_rej_reason reason,
- void *ari,
- u8 ari_length,
- const void *private_data,
- u8 private_data_len)
+ enum ib_cm_rej_reason reason, void *ari,
+ u8 ari_length, const void *private_data,
+ u8 private_data_len, enum ib_cm_state state)
{
+ lockdep_assert_held(&cm_id_priv->lock);
+
cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
- rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
+ IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
- switch(cm_id_priv->id.state) {
+ switch (state) {
case IB_CM_REQ_RCVD:
- rej_msg->local_comm_id = 0;
- cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
+ IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, be32_to_cpu(0));
+ IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
break;
case IB_CM_MRA_REQ_SENT:
- rej_msg->local_comm_id = cm_id_priv->id.local_id;
- cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
+ IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
break;
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
- rej_msg->local_comm_id = cm_id_priv->id.local_id;
- cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
+ IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REP);
break;
default:
- rej_msg->local_comm_id = cm_id_priv->id.local_id;
- cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
+ IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg,
+ CM_MSG_RESPONSE_OTHER);
break;
}
- rej_msg->reason = cpu_to_be16(reason);
+ IBA_SET(CM_REJ_REASON, rej_msg, reason);
if (ari && ari_length) {
- cm_rej_set_reject_info_len(rej_msg, ari_length);
- memcpy(rej_msg->ari, ari, ari_length);
+ IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
+ IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
}
if (private_data && private_data_len)
- memcpy(rej_msg->private_data, private_data, private_data_len);
+ IBA_SET_MEM(CM_REJ_PRIVATE_DATA, rej_msg, private_data,
+ private_data_len);
}
static void cm_dup_req_handler(struct cm_work *work,
@@ -1587,14 +1955,18 @@ static void cm_dup_req_handler(struct cm_work *work,
struct ib_mad_send_buf *msg = NULL;
int ret;
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_REQ_COUNTER]);
+ atomic_long_inc(
+ &work->port->counters[CM_RECV_DUPLICATES][CM_REQ_COUNTER]);
/* Quick state check to discard duplicate REQs. */
- if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
+ spin_lock_irq(&cm_id_priv->lock);
+ if (cm_id_priv->id.state == IB_CM_REQ_RCVD) {
+ spin_unlock_irq(&cm_id_priv->lock);
return;
+ }
+ spin_unlock_irq(&cm_id_priv->lock);
- ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
+ ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, true, &msg);
if (ret)
return;
@@ -1602,19 +1974,21 @@ static void cm_dup_req_handler(struct cm_work *work,
switch (cm_id_priv->id.state) {
case IB_CM_MRA_REQ_SENT:
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
- CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
+ CM_MSG_RESPONSE_REQ,
cm_id_priv->private_data,
cm_id_priv->private_data_len);
break;
case IB_CM_TIMEWAIT:
- cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
- IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
+ cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv,
+ IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0,
+ IB_CM_TIMEWAIT);
break;
default:
goto unlock;
}
spin_unlock_irq(&cm_id_priv->lock);
+ trace_icm_send_dup_req(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
@@ -1624,13 +1998,12 @@ unlock: spin_unlock_irq(&cm_id_priv->lock);
free: cm_free_msg(msg);
}
-static struct cm_id_private * cm_match_req(struct cm_work *work,
- struct cm_id_private *cm_id_priv)
+static struct cm_id_private *cm_match_req(struct cm_work *work,
+ struct cm_id_private *cm_id_priv)
{
struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
struct cm_timewait_info *timewait_info;
struct cm_req_msg *req_msg;
- struct ib_cm_id *cm_id;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1638,7 +2011,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
spin_lock_irq(&cm.lock);
timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
if (timewait_info) {
- cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
spin_unlock_irq(&cm.lock);
if (cur_cm_id_priv) {
@@ -1651,8 +2024,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
/* Check for stale connections. */
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
- cm_cleanup_timewait(cm_id_priv->timewait_info);
- cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ cm_remove_remote(cm_id_priv);
+ cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
spin_unlock_irq(&cm.lock);
@@ -1660,30 +2033,25 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
NULL, 0);
if (cur_cm_id_priv) {
- cm_id = &cur_cm_id_priv->id;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0);
cm_deref_id(cur_cm_id_priv);
}
return NULL;
}
/* Find matching listen request. */
- listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
- req_msg->service_id);
+ listen_cm_id_priv = cm_find_listen(
+ cm_id_priv->id.device,
+ cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)));
if (!listen_cm_id_priv) {
- cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cm_remove_remote(cm_id_priv);
spin_unlock_irq(&cm.lock);
cm_issue_rej(work->port, work->mad_recv_wc,
IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
NULL, 0);
- goto out;
+ return NULL;
}
- atomic_inc(&listen_cm_id_priv->refcount);
- atomic_inc(&cm_id_priv->refcount);
- cm_id_priv->id.state = IB_CM_REQ_RCVD;
- atomic_inc(&cm_id_priv->work_count);
spin_unlock_irq(&cm.lock);
-out:
return listen_cm_id_priv;
}
@@ -1694,138 +2062,187 @@ out:
*/
static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
{
- if (!cm_req_get_primary_subnet_local(req_msg)) {
- if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
- req_msg->primary_local_lid = cpu_to_be16(wc->slid);
- cm_req_set_primary_sl(req_msg, wc->sl);
+ if (!IBA_GET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg)) {
+ if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
+ req_msg)) == IB_LID_PERMISSIVE) {
+ IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(ib_lid_be16(wc->slid)));
+ IBA_SET(CM_REQ_PRIMARY_SL, req_msg, wc->sl);
}
- if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
- req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
+ if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
+ req_msg)) == IB_LID_PERMISSIVE)
+ IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
+ wc->dlid_path_bits);
}
- if (!cm_req_get_alt_subnet_local(req_msg)) {
- if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
- req_msg->alt_local_lid = cpu_to_be16(wc->slid);
- cm_req_set_alt_sl(req_msg, wc->sl);
+ if (!IBA_GET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg)) {
+ if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
+ req_msg)) == IB_LID_PERMISSIVE) {
+ IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
+ be16_to_cpu(ib_lid_be16(wc->slid)));
+ IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, wc->sl);
}
- if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
- req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
+ if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
+ req_msg)) == IB_LID_PERMISSIVE)
+ IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
+ wc->dlid_path_bits);
}
}
static int cm_req_handler(struct cm_work *work)
{
- struct ib_cm_id *cm_id;
struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
struct cm_req_msg *req_msg;
- union ib_gid gid;
- struct ib_gid_attr gid_attr;
+ const struct ib_global_route *grh;
+ const struct ib_gid_attr *gid_attr;
int ret;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
- if (IS_ERR(cm_id))
- return PTR_ERR(cm_id);
+ cm_id_priv =
+ cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL);
+ if (IS_ERR(cm_id_priv))
+ return PTR_ERR(cm_id_priv);
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- cm_id_priv->id.remote_id = req_msg->local_comm_id;
- cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
+ cm_id_priv->id.remote_id =
+ cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
+ cm_id_priv->id.service_id =
+ cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
+ cm_id_priv->tid = req_msg->hdr.tid;
+ cm_id_priv->timeout_ms = cm_convert_to_ms(
+ IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg));
+ cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg);
+ cm_id_priv->remote_qpn =
+ cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
+ cm_id_priv->initiator_depth =
+ IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
+ cm_id_priv->responder_resources =
+ IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
+ cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
+ cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
+ cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
+ cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
+ cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
+ cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
+
+ ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &cm_id_priv->av);
+ if (ret)
+ goto destroy;
cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
id.local_id);
if (IS_ERR(cm_id_priv->timewait_info)) {
ret = PTR_ERR(cm_id_priv->timewait_info);
+ cm_id_priv->timewait_info = NULL;
goto destroy;
}
- cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
- cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
- cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
+ cm_id_priv->timewait_info->work.remote_id = cm_id_priv->id.remote_id;
+ cm_id_priv->timewait_info->remote_ca_guid =
+ cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
+ cm_id_priv->timewait_info->remote_qpn = cm_id_priv->remote_qpn;
+
+ /*
+ * Note that the ID pointer is not in the xarray at this point,
+ * so this set is only visible to the local thread.
+ */
+ cm_id_priv->id.state = IB_CM_REQ_RCVD;
listen_cm_id_priv = cm_match_req(work, cm_id_priv);
if (!listen_cm_id_priv) {
+ trace_icm_no_listener_err(&cm_id_priv->id);
+ cm_id_priv->id.state = IB_CM_IDLE;
ret = -EINVAL;
- kfree(cm_id_priv->timewait_info);
goto destroy;
}
- cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
- cm_id_priv->id.context = listen_cm_id_priv->id.context;
- cm_id_priv->id.service_id = req_msg->service_id;
- cm_id_priv->id.service_mask = ~cpu_to_be64(0);
-
- cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
- cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
-
- memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
- work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
- ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
- work->port->port_num,
- cm_id_priv->av.ah_attr.grh.sgid_index,
- &gid, &gid_attr);
- if (!ret) {
- if (gid_attr.ndev) {
- work->path[0].ifindex = gid_attr.ndev->ifindex;
- work->path[0].net = dev_net(gid_attr.ndev);
- dev_put(gid_attr.ndev);
- }
- work->path[0].gid_type = gid_attr.gid_type;
- ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
- cm_id_priv);
+ memset(&work->path[0], 0, sizeof(work->path[0]));
+ if (cm_req_has_alt_path(req_msg))
+ memset(&work->path[1], 0, sizeof(work->path[1]));
+ grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
+ gid_attr = grh->sgid_attr;
+
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) {
+ work->path[0].rec_type =
+ sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
+ } else {
+ cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
+ cm_path_set_rec_type(
+ work->port->cm_dev->ib_device, work->port->port_num,
+ &work->path[0],
+ IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID,
+ req_msg));
}
+ if (cm_req_has_alt_path(req_msg))
+ work->path[1].rec_type = work->path[0].rec_type;
+ cm_format_paths_from_req(req_msg, &work->path[0],
+ &work->path[1], work->mad_recv_wc->wc);
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
+ sa_path_set_dmac(&work->path[0],
+ cm_id_priv->av.ah_attr.roce.dmac);
+ work->path[0].hop_limit = grh->hop_limit;
+
+ /* This destroy call is needed to pair with cm_init_av_for_response */
+ cm_destroy_av(&cm_id_priv->av);
+ ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av);
if (ret) {
- int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
- work->port->port_num, 0,
- &work->path[0].sgid,
- &gid_attr);
- if (!err && gid_attr.ndev) {
- work->path[0].ifindex = gid_attr.ndev->ifindex;
- work->path[0].net = dev_net(gid_attr.ndev);
- dev_put(gid_attr.ndev);
- }
- work->path[0].gid_type = gid_attr.gid_type;
- ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
- &work->path[0].sgid, sizeof work->path[0].sgid,
- NULL, 0);
+ int err;
+
+ err = rdma_query_gid(work->port->cm_dev->ib_device,
+ work->port->port_num, 0,
+ &work->path[0].sgid);
+ if (err)
+ ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID,
+ NULL, 0, NULL, 0);
+ else
+ ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID,
+ &work->path[0].sgid,
+ sizeof(work->path[0].sgid),
+ NULL, 0);
goto rejected;
}
- if (req_msg->alt_local_lid) {
- ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
- cm_id_priv);
+ if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_IB)
+ cm_id_priv->av.dlid_datapath =
+ IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg);
+
+ if (cm_req_has_alt_path(req_msg)) {
+ ret = cm_init_av_by_path(&work->path[1], NULL,
+ &cm_id_priv->alt_av);
if (ret) {
- ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
+ ib_send_cm_rej(&cm_id_priv->id,
+ IB_CM_REJ_INVALID_ALT_GID,
&work->path[0].sgid,
- sizeof work->path[0].sgid, NULL, 0);
+ sizeof(work->path[0].sgid), NULL, 0);
goto rejected;
}
}
- cm_id_priv->tid = req_msg->hdr.tid;
- cm_id_priv->timeout_ms = cm_convert_to_ms(
- cm_req_get_local_resp_timeout(req_msg));
- cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
- cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
- cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
- cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
- cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
- cm_id_priv->pkey = req_msg->pkey;
- cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
- cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
- cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
- cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
+ cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
+ cm_id_priv->id.context = listen_cm_id_priv->id.context;
cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
- cm_process_work(cm_id_priv, work);
+
+ /* Now MAD handlers can see the new ID */
+ spin_lock_irq(&cm_id_priv->lock);
+ cm_finalize_id(cm_id_priv);
+
+ /* Refcount belongs to the event, pairs with cm_process_work() */
+ refcount_inc(&cm_id_priv->refcount);
+ cm_queue_work_unlock(cm_id_priv, work);
+ /*
+ * Since this ID was just created and was not made visible to other MAD
+ * handlers until the cm_finalize_id() above we know that the
+ * cm_process_work() will deliver the event and the listen_cm_id
+ * embedded in the event can be derefed here.
+ */
cm_deref_id(listen_cm_id_priv);
return 0;
rejected:
- atomic_dec(&cm_id_priv->refcount);
cm_deref_id(listen_cm_id_priv);
destroy:
- ib_destroy_cm_id(cm_id);
+ ib_destroy_cm_id(&cm_id_priv->id);
return ret;
}
@@ -1833,30 +2250,41 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_rep_param *param)
{
- cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
- rep_msg->local_comm_id = cm_id_priv->id.local_id;
- rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
- cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
- rep_msg->resp_resources = param->responder_resources;
- cm_rep_set_target_ack_delay(rep_msg,
- cm_id_priv->av.port->cm_dev->ack_delay);
- cm_rep_set_failover(rep_msg, param->failover_accepted);
- cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
- rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
+ cm_format_mad_ece_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid,
+ param->ece.attr_mod);
+ IBA_SET(CM_REP_LOCAL_COMM_ID, rep_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_REP_REMOTE_COMM_ID, rep_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
+ IBA_SET(CM_REP_STARTING_PSN, rep_msg, param->starting_psn);
+ IBA_SET(CM_REP_RESPONDER_RESOURCES, rep_msg,
+ param->responder_resources);
+ IBA_SET(CM_REP_TARGET_ACK_DELAY, rep_msg,
+ cm_id_priv->av.port->cm_dev->ack_delay);
+ IBA_SET(CM_REP_FAILOVER_ACCEPTED, rep_msg, param->failover_accepted);
+ IBA_SET(CM_REP_RNR_RETRY_COUNT, rep_msg, param->rnr_retry_count);
+ IBA_SET(CM_REP_LOCAL_CA_GUID, rep_msg,
+ be64_to_cpu(cm_id_priv->id.device->node_guid));
if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
- rep_msg->initiator_depth = param->initiator_depth;
- cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
- cm_rep_set_srq(rep_msg, param->srq);
- cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
+ IBA_SET(CM_REP_INITIATOR_DEPTH, rep_msg,
+ param->initiator_depth);
+ IBA_SET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg,
+ param->flow_control);
+ IBA_SET(CM_REP_SRQ, rep_msg, param->srq);
+ IBA_SET(CM_REP_LOCAL_QPN, rep_msg, param->qp_num);
} else {
- cm_rep_set_srq(rep_msg, 1);
- cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
+ IBA_SET(CM_REP_SRQ, rep_msg, 1);
+ IBA_SET(CM_REP_LOCAL_EE_CONTEXT_NUMBER, rep_msg, param->qp_num);
}
+ IBA_SET(CM_REP_VENDOR_ID_L, rep_msg, param->ece.vendor_id);
+ IBA_SET(CM_REP_VENDOR_ID_M, rep_msg, param->ece.vendor_id >> 8);
+ IBA_SET(CM_REP_VENDOR_ID_H, rep_msg, param->ece.vendor_id >> 16);
+
if (param->private_data && param->private_data_len)
- memcpy(rep_msg->private_data, param->private_data,
- param->private_data_len);
+ IBA_SET_MEM(CM_REP_PRIVATE_DATA, rep_msg, param->private_data,
+ param->private_data_len);
}
int ib_send_cm_rep(struct ib_cm_id *cm_id,
@@ -1876,34 +2304,40 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REQ_RCVD &&
cm_id->state != IB_CM_MRA_REQ_SENT) {
+ trace_icm_send_rep_err(cm_id_priv->id.local_id, cm_id->state);
ret = -EINVAL;
goto out;
}
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
+ msg = cm_alloc_priv_msg_rep(cm_id_priv, IB_CM_REP_SENT, true);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
goto out;
+ }
rep_msg = (struct cm_rep_msg *) msg->mad;
cm_format_rep(rep_msg, cm_id_priv, param);
- msg->timeout_ms = cm_id_priv->timeout_ms;
- msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
+ trace_icm_send_rep(cm_id);
ret = ib_post_send_mad(msg, NULL);
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- return ret;
- }
+ if (ret)
+ goto out_free;
cm_id->state = IB_CM_REP_SENT;
- cm_id_priv->msg = msg;
cm_id_priv->initiator_depth = param->initiator_depth;
cm_id_priv->responder_resources = param->responder_resources;
- cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
+ cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
+ WARN_ONCE(param->qp_num & 0xFF000000,
+ "IBTA declares QPN to be 24 bits, but it is 0x%X\n",
+ param->qp_num);
cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return 0;
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+out_free:
+ cm_free_priv_msg(msg);
+out:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_rep);
@@ -1914,11 +2348,14 @@ static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
u8 private_data_len)
{
cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
- rtu_msg->local_comm_id = cm_id_priv->id.local_id;
- rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
+ IBA_SET(CM_RTU_LOCAL_COMM_ID, rtu_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_RTU_REMOTE_COMM_ID, rtu_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
if (private_data && private_data_len)
- memcpy(rtu_msg->private_data, private_data, private_data_len);
+ IBA_SET_MEM(CM_RTU_PRIVATE_DATA, rtu_msg, private_data,
+ private_data_len);
}
int ib_send_cm_rtu(struct ib_cm_id *cm_id,
@@ -1942,17 +2379,21 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id->state != IB_CM_REP_RCVD &&
cm_id->state != IB_CM_MRA_REP_SENT) {
+ trace_icm_send_cm_rtu_err(cm_id);
ret = -EINVAL;
goto error;
}
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
goto error;
+ }
cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
+ trace_icm_send_rtu(cm_id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -1979,18 +2420,25 @@ static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
param = &work->cm_event.param.rep_rcvd;
- param->remote_ca_guid = rep_msg->local_ca_guid;
- param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
+ param->remote_ca_guid =
+ cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg));
+ param->remote_qkey = IBA_GET(CM_REP_LOCAL_Q_KEY, rep_msg);
param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
- param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
- param->responder_resources = rep_msg->initiator_depth;
- param->initiator_depth = rep_msg->resp_resources;
- param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
- param->failover_accepted = cm_rep_get_failover(rep_msg);
- param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
- param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
- param->srq = cm_rep_get_srq(rep_msg);
- work->cm_event.private_data = &rep_msg->private_data;
+ param->starting_psn = IBA_GET(CM_REP_STARTING_PSN, rep_msg);
+ param->responder_resources = IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg);
+ param->initiator_depth = IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg);
+ param->target_ack_delay = IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg);
+ param->failover_accepted = IBA_GET(CM_REP_FAILOVER_ACCEPTED, rep_msg);
+ param->flow_control = IBA_GET(CM_REP_END_TO_END_FLOW_CONTROL, rep_msg);
+ param->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
+ param->srq = IBA_GET(CM_REP_SRQ, rep_msg);
+ param->ece.vendor_id = IBA_GET(CM_REP_VENDOR_ID_H, rep_msg) << 16;
+ param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_M, rep_msg) << 8;
+ param->ece.vendor_id |= IBA_GET(CM_REP_VENDOR_ID_L, rep_msg);
+ param->ece.attr_mod = be32_to_cpu(rep_msg->hdr.attr_mod);
+
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_REP_PRIVATE_DATA, rep_msg);
}
static void cm_dup_rep_handler(struct cm_work *work)
@@ -2001,14 +2449,15 @@ static void cm_dup_rep_handler(struct cm_work *work)
int ret;
rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
- rep_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)),
+ cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg)));
if (!cm_id_priv)
return;
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_REP_COUNTER]);
- ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
+ atomic_long_inc(
+ &work->port->counters[CM_RECV_DUPLICATES][CM_REP_COUNTER]);
+ ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, true, &msg);
if (ret)
goto deref;
@@ -2019,13 +2468,14 @@ static void cm_dup_rep_handler(struct cm_work *work)
cm_id_priv->private_data_len);
else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
- CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
+ CM_MSG_RESPONSE_REP,
cm_id_priv->private_data,
cm_id_priv->private_data_len);
else
goto unlock;
spin_unlock_irq(&cm_id_priv->lock);
+ trace_icm_send_dup_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret)
goto free;
@@ -2042,13 +2492,15 @@ static int cm_rep_handler(struct cm_work *work)
struct cm_rep_msg *rep_msg;
int ret;
struct cm_id_private *cur_cm_id_priv;
- struct ib_cm_id *cm_id;
struct cm_timewait_info *timewait_info;
rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0);
if (!cm_id_priv) {
cm_dup_rep_handler(work);
+ trace_icm_remote_no_priv_err(
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
return -EINVAL;
}
@@ -2060,13 +2512,19 @@ static int cm_rep_handler(struct cm_work *work)
case IB_CM_MRA_REQ_RCVD:
break;
default:
- spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
+ trace_icm_rep_unknown_err(
+ IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg),
+ cm_id_priv->id.state);
+ spin_unlock_irq(&cm_id_priv->lock);
goto error;
}
- cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
- cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
+ cm_id_priv->timewait_info->work.remote_id =
+ cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg));
+ cm_id_priv->timewait_info->remote_ca_guid =
+ cpu_to_be64(IBA_GET(CM_REP_LOCAL_CA_GUID, rep_msg));
cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
spin_lock(&cm.lock);
@@ -2075,15 +2533,15 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
ret = -EINVAL;
+ trace_icm_insert_failed_err(
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
goto error;
}
/* Check for a stale connection. */
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
- rb_erase(&cm_id_priv->timewait_info->remote_id_node,
- &cm.remote_id_table);
- cm_id_priv->timewait_info->inserted_remote_id = 0;
- cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ cm_remove_remote(cm_id_priv);
+ cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
timewait_info->work.remote_id);
spin_unlock(&cm.lock);
@@ -2092,9 +2550,12 @@ static int cm_rep_handler(struct cm_work *work)
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
+ trace_icm_staleconn_err(
+ IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg),
+ IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg));
+
if (cur_cm_id_priv) {
- cm_id = &cur_cm_id_priv->id;
- ib_send_cm_dreq(cm_id, NULL, 0);
+ ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0);
cm_deref_id(cur_cm_id_priv);
}
@@ -2103,13 +2564,17 @@ static int cm_rep_handler(struct cm_work *work)
spin_unlock(&cm.lock);
cm_id_priv->id.state = IB_CM_REP_RCVD;
- cm_id_priv->id.remote_id = rep_msg->local_comm_id;
+ cm_id_priv->id.remote_id =
+ cpu_to_be32(IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg));
cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
- cm_id_priv->initiator_depth = rep_msg->resp_resources;
- cm_id_priv->responder_resources = rep_msg->initiator_depth;
- cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
- cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
- cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
+ cm_id_priv->initiator_depth =
+ IBA_GET(CM_REP_RESPONDER_RESOURCES, rep_msg);
+ cm_id_priv->responder_resources =
+ IBA_GET(CM_REP_INITIATOR_DEPTH, rep_msg);
+ cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REP_STARTING_PSN, rep_msg));
+ cm_id_priv->rnr_retry_count = IBA_GET(CM_REP_RNR_RETRY_COUNT, rep_msg);
+ cm_id_priv->target_ack_delay =
+ IBA_GET(CM_REP_TARGET_ACK_DELAY, rep_msg);
cm_id_priv->av.timeout =
cm_ack_timeout(cm_id_priv->target_ack_delay,
cm_id_priv->av.timeout - 1);
@@ -2117,18 +2582,8 @@ static int cm_rep_handler(struct cm_work *work)
cm_ack_timeout(cm_id_priv->target_ack_delay,
cm_id_priv->alt_av.timeout - 1);
- /* todo: handle peer_to_peer */
-
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
error:
@@ -2139,7 +2594,6 @@ error:
static int cm_establish_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
- int ret;
/* See comment in cm_establish about lookup. */
cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
@@ -2152,16 +2606,8 @@ static int cm_establish_handler(struct cm_work *work)
goto out;
}
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -2172,36 +2618,29 @@ static int cm_rtu_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_rtu_msg *rtu_msg;
- int ret;
rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
- rtu_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_RTU_REMOTE_COMM_ID, rtu_msg)),
+ cpu_to_be32(IBA_GET(CM_RTU_LOCAL_COMM_ID, rtu_msg)));
if (!cm_id_priv)
return -EINVAL;
- work->cm_event.private_data = &rtu_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_RTU_PRIVATE_DATA, rtu_msg);
spin_lock_irq(&cm_id_priv->lock);
if (cm_id_priv->id.state != IB_CM_REP_SENT &&
cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
spin_unlock_irq(&cm_id_priv->lock);
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_RTU_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_RTU_COUNTER]);
goto out;
}
cm_id_priv->id.state = IB_CM_ESTABLISHED;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -2214,20 +2653,43 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
u8 private_data_len)
{
cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
- cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
- dreq_msg->local_comm_id = cm_id_priv->id.local_id;
- dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
- cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
+ cm_form_tid(cm_id_priv));
+ IBA_SET(CM_DREQ_LOCAL_COMM_ID, dreq_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_DREQ_REMOTE_COMM_ID, dreq_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
+ IBA_SET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg,
+ be32_to_cpu(cm_id_priv->remote_qpn));
if (private_data && private_data_len)
- memcpy(dreq_msg->private_data, private_data, private_data_len);
+ IBA_SET_MEM(CM_DREQ_PRIVATE_DATA, dreq_msg, private_data,
+ private_data_len);
}
-int ib_send_cm_dreq(struct ib_cm_id *cm_id,
- const void *private_data,
+static void cm_issue_dreq(struct cm_id_private *cm_id_priv)
+{
+ struct ib_mad_send_buf *msg;
+ int ret;
+
+ lockdep_assert_held(&cm_id_priv->lock);
+
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return;
+
+ cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv, NULL, 0);
+
+ trace_icm_send_dreq(&cm_id_priv->id);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ cm_free_msg(msg);
+}
+
+int ib_send_cm_dreq(struct ib_cm_id *cm_id, const void *private_data,
u8 private_data_len)
{
- struct cm_id_private *cm_id_priv;
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
struct ib_mad_send_buf *msg;
unsigned long flags;
int ret;
@@ -2235,39 +2697,38 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,
if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
return -EINVAL;
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_ESTABLISHED) {
+ if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
+ trace_icm_dreq_skipped(&cm_id_priv->id);
ret = -EINVAL;
- goto out;
+ goto unlock;
}
- if (cm_id->lap_state == IB_CM_LAP_SENT ||
- cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
+ cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
+ ib_cancel_mad(cm_id_priv->msg);
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret) {
+ msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_DREQ_SENT);
+ if (IS_ERR(msg)) {
cm_enter_timewait(cm_id_priv);
- goto out;
+ ret = PTR_ERR(msg);
+ goto unlock;
}
cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
- msg->timeout_ms = cm_id_priv->timeout_ms;
- msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
+ trace_icm_send_dreq(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
cm_enter_timewait(cm_id_priv);
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- return ret;
+ cm_free_priv_msg(msg);
+ goto unlock;
}
- cm_id->state = IB_CM_DREQ_SENT;
- cm_id_priv->msg = msg;
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ cm_id_priv->id.state = IB_CM_DREQ_SENT;
+unlock:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_dreq);
@@ -2278,56 +2739,68 @@ static void cm_format_drep(struct cm_drep_msg *drep_msg,
u8 private_data_len)
{
cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
- drep_msg->local_comm_id = cm_id_priv->id.local_id;
- drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
+ IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
if (private_data && private_data_len)
- memcpy(drep_msg->private_data, private_data, private_data_len);
+ IBA_SET_MEM(CM_DREP_PRIVATE_DATA, drep_msg, private_data,
+ private_data_len);
}
-int ib_send_cm_drep(struct ib_cm_id *cm_id,
- const void *private_data,
- u8 private_data_len)
+static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
+ void *private_data, u8 private_data_len)
{
- struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
- unsigned long flags;
- void *data;
int ret;
+ lockdep_assert_held(&cm_id_priv->lock);
+
if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
return -EINVAL;
- data = cm_copy_private_data(private_data, private_data_len);
- if (IS_ERR(data))
- return PTR_ERR(data);
-
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_DREQ_RCVD) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- kfree(data);
+ if (cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
+ trace_icm_send_drep_err(&cm_id_priv->id);
+ kfree(private_data);
return -EINVAL;
}
- cm_set_private_data(cm_id_priv, data, private_data_len);
+ cm_set_private_data(cm_id_priv, private_data, private_data_len);
cm_enter_timewait(cm_id_priv);
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
private_data, private_data_len);
+ trace_icm_send_drep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
return ret;
}
+ return 0;
+}
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+int ib_send_cm_drep(struct ib_cm_id *cm_id, const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ void *data;
+ int ret;
+
+ data = cm_copy_private_data(private_data, private_data_len);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ ret = cm_send_drep_locked(cm_id_priv, data, private_data_len);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_drep);
@@ -2340,7 +2813,7 @@ static int cm_issue_drep(struct cm_port *port,
struct cm_drep_msg *drep_msg;
int ret;
- ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
+ ret = cm_alloc_response_msg(port, mad_recv_wc, true, &msg);
if (ret)
return ret;
@@ -2348,9 +2821,14 @@ static int cm_issue_drep(struct cm_port *port,
drep_msg = (struct cm_drep_msg *) msg->mad;
cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
- drep_msg->remote_comm_id = dreq_msg->local_comm_id;
- drep_msg->local_comm_id = dreq_msg->remote_comm_id;
-
+ IBA_SET(CM_DREP_REMOTE_COMM_ID, drep_msg,
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg));
+ IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg,
+ IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
+
+ trace_icm_issue_drep(
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+ IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
ret = ib_post_send_mad(msg, NULL);
if (ret)
cm_free_msg(msg);
@@ -2363,40 +2841,46 @@ static int cm_dreq_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv;
struct cm_dreq_msg *dreq_msg;
struct ib_mad_send_buf *msg = NULL;
- int ret;
dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
- dreq_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)),
+ cpu_to_be32(IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg)));
if (!cm_id_priv) {
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_DREQ_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_DREQ_COUNTER]);
cm_issue_drep(work->port, work->mad_recv_wc);
+ trace_icm_no_priv_err(
+ IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg),
+ IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg));
return -EINVAL;
}
- work->cm_event.private_data = &dreq_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_DREQ_PRIVATE_DATA, dreq_msg);
spin_lock_irq(&cm_id_priv->lock);
- if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
+ if (cm_id_priv->local_qpn !=
+ cpu_to_be32(IBA_GET(CM_DREQ_REMOTE_QPN_EECN, dreq_msg)))
goto unlock;
switch (cm_id_priv->id.state) {
case IB_CM_REP_SENT:
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ case IB_CM_MRA_REP_RCVD:
+ ib_cancel_mad(cm_id_priv->msg);
break;
case IB_CM_ESTABLISHED:
if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- break;
- case IB_CM_MRA_REP_RCVD:
+ ib_cancel_mad(cm_id_priv->msg);
break;
case IB_CM_TIMEWAIT:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_DREQ_COUNTER]);
- if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_DREQ_COUNTER]);
+ msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc,
+ true);
+ if (IS_ERR(msg))
goto unlock;
cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
@@ -2404,27 +2888,21 @@ static int cm_dreq_handler(struct cm_work *work)
cm_id_priv->private_data_len);
spin_unlock_irq(&cm_id_priv->lock);
- if (ib_post_send_mad(msg, NULL))
+ if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
+ ib_post_send_mad(msg, NULL))
cm_free_msg(msg);
goto deref;
case IB_CM_DREQ_RCVD:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_DREQ_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_DREQ_COUNTER]);
goto unlock;
default:
+ trace_icm_dreq_unknown_err(&cm_id_priv->id);
goto unlock;
}
cm_id_priv->id.state = IB_CM_DREQ_RCVD;
cm_id_priv->tid = dreq_msg->hdr.tid;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
unlock: spin_unlock_irq(&cm_id_priv->lock);
@@ -2436,15 +2914,16 @@ static int cm_drep_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_drep_msg *drep_msg;
- int ret;
drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
- drep_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_DREP_REMOTE_COMM_ID, drep_msg)),
+ cpu_to_be32(IBA_GET(CM_DREP_LOCAL_COMM_ID, drep_msg)));
if (!cm_id_priv)
return -EINVAL;
- work->cm_event.private_data = &drep_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_DREP_PRIVATE_DATA, drep_msg);
spin_lock_irq(&cm_id_priv->lock);
if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
@@ -2454,79 +2933,83 @@ static int cm_drep_handler(struct cm_work *work)
}
cm_enter_timewait(cm_id_priv);
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
return -EINVAL;
}
-int ib_send_cm_rej(struct ib_cm_id *cm_id,
- enum ib_cm_rej_reason reason,
- void *ari,
- u8 ari_length,
- const void *private_data,
- u8 private_data_len)
+static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
+ enum ib_cm_rej_reason reason, void *ari,
+ u8 ari_length, const void *private_data,
+ u8 private_data_len)
{
- struct cm_id_private *cm_id_priv;
+ enum ib_cm_state state = cm_id_priv->id.state;
struct ib_mad_send_buf *msg;
- unsigned long flags;
int ret;
+ lockdep_assert_held(&cm_id_priv->lock);
+
if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
(ari && ari_length > IB_CM_REJ_ARI_LENGTH))
return -EINVAL;
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ trace_icm_send_rej(&cm_id_priv->id, reason);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- switch (cm_id->state) {
+ switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
case IB_CM_REQ_RCVD:
case IB_CM_MRA_REQ_SENT:
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (!ret)
- cm_format_rej((struct cm_rej_msg *) msg->mad,
- cm_id_priv, reason, ari, ari_length,
- private_data, private_data_len);
-
cm_reset_to_idle(cm_id_priv);
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
+ cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason,
+ ari, ari_length, private_data, private_data_len,
+ state);
break;
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (!ret)
- cm_format_rej((struct cm_rej_msg *) msg->mad,
- cm_id_priv, reason, ari, ari_length,
- private_data, private_data_len);
-
cm_enter_timewait(cm_id_priv);
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
+ cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv, reason,
+ ari, ari_length, private_data, private_data_len,
+ state);
break;
default:
- ret = -EINVAL;
- goto out;
+ trace_icm_send_unknown_rej_err(&cm_id_priv->id);
+ return -EINVAL;
}
- if (ret)
- goto out;
-
ret = ib_post_send_mad(msg, NULL);
- if (ret)
+ if (ret) {
cm_free_msg(msg);
+ return ret;
+ }
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return 0;
+}
+
+int ib_send_cm_rej(struct ib_cm_id *cm_id, enum ib_cm_rej_reason reason,
+ void *ari, u8 ari_length, const void *private_data,
+ u8 private_data_len)
+{
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ ret = cm_send_rej_locked(cm_id_priv, reason, ari, ari_length,
+ private_data, private_data_len);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_rej);
@@ -2538,42 +3021,33 @@ static void cm_format_rej_event(struct cm_work *work)
rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
param = &work->cm_event.param.rej_rcvd;
- param->ari = rej_msg->ari;
- param->ari_length = cm_rej_get_reject_info_len(rej_msg);
- param->reason = __be16_to_cpu(rej_msg->reason);
- work->cm_event.private_data = &rej_msg->private_data;
+ param->ari = IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg);
+ param->ari_length = IBA_GET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg);
+ param->reason = IBA_GET(CM_REJ_REASON, rej_msg);
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_REJ_PRIVATE_DATA, rej_msg);
}
-static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
+static struct cm_id_private *cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
{
- struct cm_timewait_info *timewait_info;
struct cm_id_private *cm_id_priv;
__be32 remote_id;
- remote_id = rej_msg->local_comm_id;
-
- if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
- spin_lock_irq(&cm.lock);
- timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
- remote_id);
- if (!timewait_info) {
- spin_unlock_irq(&cm.lock);
- return NULL;
- }
- cm_id_priv = idr_find(&cm.local_id_table, (__force int)
- (timewait_info->work.local_id ^
- cm.random_id_operand));
- if (cm_id_priv) {
- if (cm_id_priv->id.remote_id == remote_id)
- atomic_inc(&cm_id_priv->refcount);
- else
- cm_id_priv = NULL;
- }
- spin_unlock_irq(&cm.lock);
- } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
- cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
+ remote_id = cpu_to_be32(IBA_GET(CM_REJ_LOCAL_COMM_ID, rej_msg));
+
+ if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_TIMEOUT) {
+ cm_id_priv = cm_find_remote_id(
+ *((__be64 *)IBA_GET_MEM_PTR(CM_REJ_ARI, rej_msg)),
+ remote_id);
+ } else if (IBA_GET(CM_REJ_MESSAGE_REJECTED, rej_msg) ==
+ CM_MSG_RESPONSE_REQ)
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
+ 0);
else
- cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_REJ_REMOTE_COMM_ID, rej_msg)),
+ remote_id);
return cm_id_priv;
}
@@ -2582,7 +3056,6 @@ static int cm_rej_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_rej_msg *rej_msg;
- int ret;
rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_rejected_id(rej_msg);
@@ -2597,18 +3070,18 @@ static int cm_rej_handler(struct cm_work *work)
case IB_CM_MRA_REQ_RCVD:
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- /* fall through */
+ ib_cancel_mad(cm_id_priv->msg);
+ fallthrough;
case IB_CM_REQ_RCVD:
case IB_CM_MRA_REQ_SENT:
- if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
+ if (IBA_GET(CM_REJ_REASON, rej_msg) == IB_CM_REJ_STALE_CONN)
cm_enter_timewait(cm_id_priv);
else
cm_reset_to_idle(cm_id_priv);
break;
case IB_CM_DREQ_SENT:
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- /* fall through */
+ ib_cancel_mad(cm_id_priv->msg);
+ fallthrough;
case IB_CM_REP_RCVD:
case IB_CM_MRA_REP_SENT:
cm_enter_timewait(cm_id_priv);
@@ -2617,120 +3090,79 @@ static int cm_rej_handler(struct cm_work *work)
if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
- ib_cancel_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->msg);
cm_enter_timewait(cm_id_priv);
break;
}
- /* fall through */
+ fallthrough;
default:
+ trace_icm_rej_unknown_err(&cm_id_priv->id);
spin_unlock_irq(&cm_id_priv->lock);
- ret = -EINVAL;
goto out;
}
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
return -EINVAL;
}
-int ib_send_cm_mra(struct ib_cm_id *cm_id,
- u8 service_timeout,
- const void *private_data,
- u8 private_data_len)
+int ib_prepare_cm_mra(struct ib_cm_id *cm_id)
{
struct cm_id_private *cm_id_priv;
- struct ib_mad_send_buf *msg;
enum ib_cm_state cm_state;
enum ib_cm_lap_state lap_state;
- enum cm_msg_response msg_response;
- void *data;
unsigned long flags;
- int ret;
-
- if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
- return -EINVAL;
-
- data = cm_copy_private_data(private_data, private_data_len);
- if (IS_ERR(data))
- return PTR_ERR(data);
+ int ret = 0;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- switch(cm_id_priv->id.state) {
+ switch (cm_id_priv->id.state) {
case IB_CM_REQ_RCVD:
cm_state = IB_CM_MRA_REQ_SENT;
lap_state = cm_id->lap_state;
- msg_response = CM_MSG_RESPONSE_REQ;
break;
case IB_CM_REP_RCVD:
cm_state = IB_CM_MRA_REP_SENT;
lap_state = cm_id->lap_state;
- msg_response = CM_MSG_RESPONSE_REP;
break;
case IB_CM_ESTABLISHED:
if (cm_id->lap_state == IB_CM_LAP_RCVD) {
cm_state = cm_id->state;
lap_state = IB_CM_MRA_LAP_SENT;
- msg_response = CM_MSG_RESPONSE_OTHER;
break;
}
+ fallthrough;
default:
+ trace_icm_prepare_mra_unknown_err(&cm_id_priv->id);
ret = -EINVAL;
- goto error1;
- }
-
- if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto error1;
-
- cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
- msg_response, service_timeout,
- private_data, private_data_len);
- ret = ib_post_send_mad(msg, NULL);
- if (ret)
- goto error2;
+ goto error_unlock;
}
cm_id->state = cm_state;
cm_id->lap_state = lap_state;
- cm_id_priv->service_timeout = service_timeout;
- cm_set_private_data(cm_id_priv, data, private_data_len);
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- return 0;
+ cm_set_private_data(cm_id_priv, NULL, 0);
-error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- kfree(data);
- return ret;
-
-error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- kfree(data);
- cm_free_msg(msg);
+error_unlock:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
-EXPORT_SYMBOL(ib_send_cm_mra);
+EXPORT_SYMBOL(ib_prepare_cm_mra);
-static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
+static struct cm_id_private *cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
{
- switch (cm_mra_get_msg_mraed(mra_msg)) {
+ switch (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg)) {
case CM_MSG_RESPONSE_REQ:
- return cm_acquire_id(mra_msg->remote_comm_id, 0);
+ return cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)),
+ 0);
case CM_MSG_RESPONSE_REP:
case CM_MSG_RESPONSE_OTHER:
- return cm_acquire_id(mra_msg->remote_comm_id,
- mra_msg->local_comm_id);
+ return cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_MRA_REMOTE_COMM_ID, mra_msg)),
+ cpu_to_be32(IBA_GET(CM_MRA_LOCAL_COMM_ID, mra_msg)));
default:
return NULL;
}
@@ -2740,68 +3172,62 @@ static int cm_mra_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_mra_msg *mra_msg;
- int timeout, ret;
+ int timeout;
mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_mraed_id(mra_msg);
if (!cm_id_priv)
return -EINVAL;
- work->cm_event.private_data = &mra_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_MRA_PRIVATE_DATA, mra_msg);
work->cm_event.param.mra_rcvd.service_timeout =
- cm_mra_get_service_timeout(mra_msg);
- timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
+ IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg);
+ timeout = cm_convert_to_ms(IBA_GET(CM_MRA_SERVICE_TIMEOUT, mra_msg)) +
cm_convert_to_ms(cm_id_priv->av.timeout);
spin_lock_irq(&cm_id_priv->lock);
switch (cm_id_priv->id.state) {
case IB_CM_REQ_SENT:
- if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
- ib_modify_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg, timeout))
+ if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
+ CM_MSG_RESPONSE_REQ ||
+ ib_modify_mad(cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
break;
case IB_CM_REP_SENT:
- if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
- ib_modify_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg, timeout))
+ if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
+ CM_MSG_RESPONSE_REP ||
+ ib_modify_mad(cm_id_priv->msg, timeout))
goto out;
cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
break;
case IB_CM_ESTABLISHED:
- if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
+ if (IBA_GET(CM_MRA_MESSAGE_MRAED, mra_msg) !=
+ CM_MSG_RESPONSE_OTHER ||
cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
- ib_modify_mad(cm_id_priv->av.port->mad_agent,
- cm_id_priv->msg, timeout)) {
+ ib_modify_mad(cm_id_priv->msg, timeout)) {
if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
- atomic_long_inc(&work->port->
- counter_group[CM_RECV_DUPLICATES].
- counter[CM_MRA_COUNTER]);
+ atomic_long_inc(
+ &work->port->counters[CM_RECV_DUPLICATES]
+ [CM_MRA_COUNTER]);
goto out;
}
cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
break;
case IB_CM_MRA_REQ_RCVD:
case IB_CM_MRA_REP_RCVD:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_MRA_COUNTER]);
- /* fall through */
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_MRA_COUNTER]);
+ fallthrough;
default:
+ trace_icm_mra_unknown_err(&cm_id_priv->id);
goto out;
}
cm_id_priv->msg->context[1] = (void *) (unsigned long)
cm_id_priv->id.state;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
spin_unlock_irq(&cm_id_priv->lock);
@@ -2809,113 +3235,50 @@ out:
return -EINVAL;
}
-static void cm_format_lap(struct cm_lap_msg *lap_msg,
- struct cm_id_private *cm_id_priv,
- struct ib_sa_path_rec *alternate_path,
- const void *private_data,
- u8 private_data_len)
+static void cm_format_path_lid_from_lap(struct cm_lap_msg *lap_msg,
+ struct sa_path_rec *path)
{
- cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
- cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
- lap_msg->local_comm_id = cm_id_priv->id.local_id;
- lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
- cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
- /* todo: need remote CM response timeout */
- cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
- lap_msg->alt_local_lid = alternate_path->slid;
- lap_msg->alt_remote_lid = alternate_path->dlid;
- lap_msg->alt_local_gid = alternate_path->sgid;
- lap_msg->alt_remote_gid = alternate_path->dgid;
- cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
- cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
- lap_msg->alt_hop_limit = alternate_path->hop_limit;
- cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
- cm_lap_set_sl(lap_msg, alternate_path->sl);
- cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
- cm_lap_set_local_ack_timeout(lap_msg,
- cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
- alternate_path->packet_life_time));
+ u32 lid;
- if (private_data && private_data_len)
- memcpy(lap_msg->private_data, private_data, private_data_len);
-}
-
-int ib_send_cm_lap(struct ib_cm_id *cm_id,
- struct ib_sa_path_rec *alternate_path,
- const void *private_data,
- u8 private_data_len)
-{
- struct cm_id_private *cm_id_priv;
- struct ib_mad_send_buf *msg;
- unsigned long flags;
- int ret;
-
- if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
- return -EINVAL;
-
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_ESTABLISHED ||
- (cm_id->lap_state != IB_CM_LAP_UNINIT &&
- cm_id->lap_state != IB_CM_LAP_IDLE)) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
- cm_id_priv);
- if (ret)
- goto out;
- cm_id_priv->alt_av.timeout =
- cm_ack_timeout(cm_id_priv->target_ack_delay,
- cm_id_priv->alt_av.timeout - 1);
-
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
-
- cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
- alternate_path, private_data, private_data_len);
- msg->timeout_ms = cm_id_priv->timeout_ms;
- msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
+ if (path->rec_type != SA_PATH_REC_TYPE_OPA) {
+ sa_path_set_dlid(path, IBA_GET(CM_LAP_ALTERNATE_LOCAL_PORT_LID,
+ lap_msg));
+ sa_path_set_slid(path, IBA_GET(CM_LAP_ALTERNATE_REMOTE_PORT_LID,
+ lap_msg));
+ } else {
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg));
+ sa_path_set_dlid(path, lid);
- ret = ib_post_send_mad(msg, NULL);
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- return ret;
+ lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
+ CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg));
+ sa_path_set_slid(path, lid);
}
-
- cm_id->lap_state = IB_CM_LAP_SENT;
- cm_id_priv->msg = msg;
-
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- return ret;
}
-EXPORT_SYMBOL(ib_send_cm_lap);
static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
- struct ib_sa_path_rec *path,
+ struct sa_path_rec *path,
struct cm_lap_msg *lap_msg)
{
- memset(path, 0, sizeof *path);
- path->dgid = lap_msg->alt_local_gid;
- path->sgid = lap_msg->alt_remote_gid;
- path->dlid = lap_msg->alt_local_lid;
- path->slid = lap_msg->alt_remote_lid;
- path->flow_label = cm_lap_get_flow_label(lap_msg);
- path->hop_limit = lap_msg->alt_hop_limit;
- path->traffic_class = cm_lap_get_traffic_class(lap_msg);
+ path->dgid = *IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID, lap_msg);
+ path->sgid =
+ *IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_REMOTE_PORT_GID, lap_msg);
+ path->flow_label =
+ cpu_to_be32(IBA_GET(CM_LAP_ALTERNATE_FLOW_LABEL, lap_msg));
+ path->hop_limit = IBA_GET(CM_LAP_ALTERNATE_HOP_LIMIT, lap_msg);
+ path->traffic_class = IBA_GET(CM_LAP_ALTERNATE_TRAFFIC_CLASS, lap_msg);
path->reversible = 1;
path->pkey = cm_id_priv->pkey;
- path->sl = cm_lap_get_sl(lap_msg);
+ path->sl = IBA_GET(CM_LAP_ALTERNATE_SL, lap_msg);
path->mtu_selector = IB_SA_EQ;
path->mtu = cm_id_priv->path_mtu;
path->rate_selector = IB_SA_EQ;
- path->rate = cm_lap_get_packet_rate(lap_msg);
+ path->rate = IBA_GET(CM_LAP_ALTERNATE_PACKET_RATE, lap_msg);
path->packet_life_time_selector = IB_SA_EQ;
- path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
+ path->packet_life_time =
+ IBA_GET(CM_LAP_ALTERNATE_LOCAL_ACK_TIMEOUT, lap_msg);
path->packet_life_time -= (path->packet_life_time > 0);
+ cm_format_path_lid_from_lap(lap_msg, path);
}
static int cm_lap_handler(struct cm_work *work)
@@ -2924,21 +3287,55 @@ static int cm_lap_handler(struct cm_work *work)
struct cm_lap_msg *lap_msg;
struct ib_cm_lap_event_param *param;
struct ib_mad_send_buf *msg = NULL;
+ struct rdma_ah_attr ah_attr;
+ struct cm_av alt_av = {};
int ret;
+ /* Currently Alternate path messages are not supported for
+ * RoCE link layer.
+ */
+ if (rdma_protocol_roce(work->port->cm_dev->ib_device,
+ work->port->port_num))
+ return -EINVAL;
+
/* todo: verify LAP request and send reject APR if invalid. */
lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
- lap_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_LAP_REMOTE_COMM_ID, lap_msg)),
+ cpu_to_be32(IBA_GET(CM_LAP_LOCAL_COMM_ID, lap_msg)));
if (!cm_id_priv)
return -EINVAL;
param = &work->cm_event.param.lap_rcvd;
+ memset(&work->path[0], 0, sizeof(work->path[1]));
+ cm_path_set_rec_type(work->port->cm_dev->ib_device,
+ work->port->port_num, &work->path[0],
+ IBA_GET_MEM_PTR(CM_LAP_ALTERNATE_LOCAL_PORT_GID,
+ lap_msg));
param->alternate_path = &work->path[0];
cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
- work->cm_event.private_data = &lap_msg->private_data;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_LAP_PRIVATE_DATA, lap_msg);
+
+ ret = ib_init_ah_attr_from_wc(work->port->cm_dev->ib_device,
+ work->port->port_num,
+ work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &ah_attr);
+ if (ret)
+ goto deref;
+
+ ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av);
+ if (ret) {
+ rdma_destroy_ah_attr(&ah_attr);
+ goto deref;
+ }
spin_lock_irq(&cm_id_priv->lock);
+ cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
+ &ah_attr, &cm_id_priv->av);
+ cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av);
+
if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
goto unlock;
@@ -2947,24 +3344,26 @@ static int cm_lap_handler(struct cm_work *work)
case IB_CM_LAP_IDLE:
break;
case IB_CM_MRA_LAP_SENT:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_LAP_COUNTER]);
- if (cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg))
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_LAP_COUNTER]);
+ msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc,
+ true);
+ if (IS_ERR(msg))
goto unlock;
cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
CM_MSG_RESPONSE_OTHER,
- cm_id_priv->service_timeout,
cm_id_priv->private_data,
cm_id_priv->private_data_len);
spin_unlock_irq(&cm_id_priv->lock);
- if (ib_post_send_mad(msg, NULL))
+ if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
+ ib_post_send_mad(msg, NULL))
cm_free_msg(msg);
goto deref;
case IB_CM_LAP_RCVD:
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_LAP_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_LAP_COUNTER]);
goto unlock;
default:
goto unlock;
@@ -2972,20 +3371,7 @@ static int cm_lap_handler(struct cm_work *work)
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
- cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
- cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
- cm_id_priv);
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
unlock: spin_unlock_irq(&cm_id_priv->lock);
@@ -2993,88 +3379,33 @@ deref: cm_deref_id(cm_id_priv);
return -EINVAL;
}
-static void cm_format_apr(struct cm_apr_msg *apr_msg,
- struct cm_id_private *cm_id_priv,
- enum ib_cm_apr_status status,
- void *info,
- u8 info_length,
- const void *private_data,
- u8 private_data_len)
-{
- cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
- apr_msg->local_comm_id = cm_id_priv->id.local_id;
- apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
- apr_msg->ap_status = (u8) status;
-
- if (info && info_length) {
- apr_msg->info_length = info_length;
- memcpy(apr_msg->info, info, info_length);
- }
-
- if (private_data && private_data_len)
- memcpy(apr_msg->private_data, private_data, private_data_len);
-}
-
-int ib_send_cm_apr(struct ib_cm_id *cm_id,
- enum ib_cm_apr_status status,
- void *info,
- u8 info_length,
- const void *private_data,
- u8 private_data_len)
-{
- struct cm_id_private *cm_id_priv;
- struct ib_mad_send_buf *msg;
- unsigned long flags;
- int ret;
-
- if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
- (info && info_length > IB_CM_APR_INFO_LENGTH))
- return -EINVAL;
-
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_ESTABLISHED ||
- (cm_id->lap_state != IB_CM_LAP_RCVD &&
- cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
-
- cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
- info, info_length, private_data, private_data_len);
- ret = ib_post_send_mad(msg, NULL);
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- return ret;
- }
-
- cm_id->lap_state = IB_CM_LAP_IDLE;
-out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- return ret;
-}
-EXPORT_SYMBOL(ib_send_cm_apr);
-
static int cm_apr_handler(struct cm_work *work)
{
struct cm_id_private *cm_id_priv;
struct cm_apr_msg *apr_msg;
- int ret;
+
+ /* Currently Alternate path messages are not supported for
+ * RoCE link layer.
+ */
+ if (rdma_protocol_roce(work->port->cm_dev->ib_device,
+ work->port->port_num))
+ return -EINVAL;
apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
- apr_msg->local_comm_id);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_APR_REMOTE_COMM_ID, apr_msg)),
+ cpu_to_be32(IBA_GET(CM_APR_LOCAL_COMM_ID, apr_msg)));
if (!cm_id_priv)
return -EINVAL; /* Unmatched reply. */
- work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
- work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
- work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
- work->cm_event.private_data = &apr_msg->private_data;
+ work->cm_event.param.apr_rcvd.ap_status =
+ IBA_GET(CM_APR_AR_STATUS, apr_msg);
+ work->cm_event.param.apr_rcvd.apr_info =
+ IBA_GET_MEM_PTR(CM_APR_ADDITIONAL_INFORMATION, apr_msg);
+ work->cm_event.param.apr_rcvd.info_len =
+ IBA_GET(CM_APR_ADDITIONAL_INFORMATION_LENGTH, apr_msg);
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_APR_PRIVATE_DATA, apr_msg);
spin_lock_irq(&cm_id_priv->lock);
if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
@@ -3084,18 +3415,8 @@ static int cm_apr_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
- cm_id_priv->msg = NULL;
-
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ ib_cancel_mad(cm_id_priv->msg);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3106,9 +3427,8 @@ static int cm_timewait_handler(struct cm_work *work)
{
struct cm_timewait_info *timewait_info;
struct cm_id_private *cm_id_priv;
- int ret;
- timewait_info = (struct cm_timewait_info *)work;
+ timewait_info = container_of(work, struct cm_timewait_info, work);
spin_lock_irq(&cm.lock);
list_del(&timewait_info->list);
spin_unlock_irq(&cm.lock);
@@ -3125,15 +3445,7 @@ static int cm_timewait_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.state = IB_CM_IDLE;
- ret = atomic_inc_and_test(&cm_id_priv->work_count);
- if (!ret)
- list_add_tail(&work->list, &cm_id_priv->work_list);
- spin_unlock_irq(&cm_id_priv->lock);
-
- if (ret)
- cm_process_work(cm_id_priv, work);
- else
- cm_deref_id(cm_id_priv);
+ cm_queue_work_unlock(cm_id_priv, work);
return 0;
out:
cm_deref_id(cm_id_priv);
@@ -3145,14 +3457,17 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
struct ib_cm_sidr_req_param *param)
{
cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
- cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
- sidr_req_msg->request_id = cm_id_priv->id.local_id;
- sidr_req_msg->pkey = param->path->pkey;
- sidr_req_msg->service_id = param->service_id;
+ cm_form_tid(cm_id_priv));
+ IBA_SET(CM_SIDR_REQ_REQUESTID, sidr_req_msg,
+ be32_to_cpu(cm_id_priv->id.local_id));
+ IBA_SET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg,
+ be16_to_cpu(param->path->pkey));
+ IBA_SET(CM_SIDR_REQ_SERVICEID, sidr_req_msg,
+ be64_to_cpu(param->service_id));
if (param->private_data && param->private_data_len)
- memcpy(sidr_req_msg->private_data, param->private_data,
- param->private_data_len);
+ IBA_SET_MEM(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg,
+ param->private_data, param->private_data_len);
}
int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
@@ -3160,6 +3475,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
{
struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
+ struct cm_av av = {};
unsigned long flags;
int ret;
@@ -3168,43 +3484,46 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
+ ret = cm_init_av_by_path(param->path, param->sgid_attr, &av);
if (ret)
- goto out;
+ return ret;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ cm_move_av_from_path(&cm_id_priv->av, &av);
cm_id->service_id = param->service_id;
- cm_id->service_mask = ~cpu_to_be64(0);
cm_id_priv->timeout_ms = param->timeout_ms;
cm_id_priv->max_cm_retries = param->max_cm_retries;
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto out;
-
- cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
- param);
- msg->timeout_ms = cm_id_priv->timeout_ms;
- msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
-
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state == IB_CM_IDLE)
- ret = ib_post_send_mad(msg, NULL);
- else
+ if (cm_id->state != IB_CM_IDLE) {
ret = -EINVAL;
+ goto out_unlock;
+ }
- if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_free_msg(msg);
- goto out;
+ msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_SIDR_REQ_SENT);
+ if (IS_ERR(msg)) {
+ ret = PTR_ERR(msg);
+ goto out_unlock;
}
+
+ cm_format_sidr_req((struct cm_sidr_req_msg *)msg->mad, cm_id_priv,
+ param);
+
+ trace_icm_send_sidr_req(&cm_id_priv->id);
+ ret = ib_post_send_mad(msg, NULL);
+ if (ret)
+ goto out_free;
cm_id->state = IB_CM_SIDR_REQ_SENT;
- cm_id_priv->msg = msg;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-out:
+ return 0;
+out_free:
+ cm_free_priv_msg(msg);
+out_unlock:
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_sidr_req);
static void cm_format_sidr_req_event(struct cm_work *work,
+ const struct cm_id_private *rx_cm_id,
struct ib_cm_id *listen_id)
{
struct cm_sidr_req_msg *sidr_req_msg;
@@ -3213,67 +3532,86 @@ static void cm_format_sidr_req_event(struct cm_work *work,
sidr_req_msg = (struct cm_sidr_req_msg *)
work->mad_recv_wc->recv_buf.mad;
param = &work->cm_event.param.sidr_req_rcvd;
- param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
+ param->pkey = IBA_GET(CM_SIDR_REQ_PARTITION_KEY, sidr_req_msg);
param->listen_id = listen_id;
- param->service_id = sidr_req_msg->service_id;
+ param->service_id =
+ cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
param->bth_pkey = cm_get_bth_pkey(work);
param->port = work->port->port_num;
- work->cm_event.private_data = &sidr_req_msg->private_data;
+ param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_SIDR_REQ_PRIVATE_DATA, sidr_req_msg);
}
static int cm_sidr_req_handler(struct cm_work *work)
{
- struct ib_cm_id *cm_id;
- struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
+ struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
struct cm_sidr_req_msg *sidr_req_msg;
struct ib_wc *wc;
+ int ret;
- cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
- if (IS_ERR(cm_id))
- return PTR_ERR(cm_id);
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
+ cm_id_priv =
+ cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL);
+ if (IS_ERR(cm_id_priv))
+ return PTR_ERR(cm_id_priv);
/* Record SGID/SLID and request ID for lookup. */
sidr_req_msg = (struct cm_sidr_req_msg *)
work->mad_recv_wc->recv_buf.mad;
- wc = work->mad_recv_wc->wc;
- cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
- cm_id_priv->av.dgid.global.interface_id = 0;
- cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
- cm_id_priv->id.remote_id = sidr_req_msg->request_id;
+
+ cm_id_priv->id.remote_id =
+ cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg));
+ cm_id_priv->id.service_id =
+ cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg));
cm_id_priv->tid = sidr_req_msg->hdr.tid;
- atomic_inc(&cm_id_priv->work_count);
+
+ wc = work->mad_recv_wc->wc;
+ cm_id_priv->sidr_slid = wc->slid;
+ ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &cm_id_priv->av);
+ if (ret)
+ goto out;
spin_lock_irq(&cm.lock);
- cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
- if (cur_cm_id_priv) {
+ listen_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
+ if (listen_cm_id_priv) {
spin_unlock_irq(&cm.lock);
- atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
- counter[CM_SIDR_REQ_COUNTER]);
+ atomic_long_inc(&work->port->counters[CM_RECV_DUPLICATES]
+ [CM_SIDR_REQ_COUNTER]);
goto out; /* Duplicate message. */
}
cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
- cur_cm_id_priv = cm_find_listen(cm_id->device,
- sidr_req_msg->service_id);
- if (!cur_cm_id_priv) {
+ listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
+ cm_id_priv->id.service_id);
+ if (!listen_cm_id_priv) {
spin_unlock_irq(&cm.lock);
- cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
+ ib_send_cm_sidr_rep(&cm_id_priv->id,
+ &(struct ib_cm_sidr_rep_param){
+ .status = IB_SIDR_UNSUPPORTED });
goto out; /* No match. */
}
- atomic_inc(&cur_cm_id_priv->refcount);
- atomic_inc(&cm_id_priv->refcount);
spin_unlock_irq(&cm.lock);
- cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
- cm_id_priv->id.context = cur_cm_id_priv->id.context;
- cm_id_priv->id.service_id = sidr_req_msg->service_id;
- cm_id_priv->id.service_mask = ~cpu_to_be64(0);
+ cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
+ cm_id_priv->id.context = listen_cm_id_priv->id.context;
+
+ /*
+ * A SIDR ID does not need to be in the xarray since it does not receive
+ * mads, is not placed in the remote_id or remote_qpn rbtree, and does
+ * not enter timewait.
+ */
- cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
- cm_process_work(cm_id_priv, work);
- cm_deref_id(cur_cm_id_priv);
+ cm_format_sidr_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
+ cm_free_work(work);
+ /*
+ * A pointer to the listen_cm_id is held in the event, so this deref
+ * must be after the event is delivered above.
+ */
+ cm_deref_id(listen_cm_id_priv);
+ if (ret)
+ cm_destroy_id(&cm_id_priv->id, ret);
return 0;
out:
ib_destroy_cm_id(&cm_id_priv->id);
@@ -3284,57 +3622,59 @@ static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
struct cm_id_private *cm_id_priv,
struct ib_cm_sidr_rep_param *param)
{
- cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
- cm_id_priv->tid);
- sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
- sidr_rep_msg->status = param->status;
- cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
- sidr_rep_msg->service_id = cm_id_priv->id.service_id;
- sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
+ cm_format_mad_ece_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
+ cm_id_priv->tid, param->ece.attr_mod);
+ IBA_SET(CM_SIDR_REP_REQUESTID, sidr_rep_msg,
+ be32_to_cpu(cm_id_priv->id.remote_id));
+ IBA_SET(CM_SIDR_REP_STATUS, sidr_rep_msg, param->status);
+ IBA_SET(CM_SIDR_REP_QPN, sidr_rep_msg, param->qp_num);
+ IBA_SET(CM_SIDR_REP_SERVICEID, sidr_rep_msg,
+ be64_to_cpu(cm_id_priv->id.service_id));
+ IBA_SET(CM_SIDR_REP_Q_KEY, sidr_rep_msg, param->qkey);
+ IBA_SET(CM_SIDR_REP_VENDOR_ID_L, sidr_rep_msg,
+ param->ece.vendor_id & 0xFF);
+ IBA_SET(CM_SIDR_REP_VENDOR_ID_H, sidr_rep_msg,
+ (param->ece.vendor_id >> 8) & 0xFF);
if (param->info && param->info_length)
- memcpy(sidr_rep_msg->info, param->info, param->info_length);
+ IBA_SET_MEM(CM_SIDR_REP_ADDITIONAL_INFORMATION, sidr_rep_msg,
+ param->info, param->info_length);
if (param->private_data && param->private_data_len)
- memcpy(sidr_rep_msg->private_data, param->private_data,
- param->private_data_len);
+ IBA_SET_MEM(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg,
+ param->private_data, param->private_data_len);
}
-int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
- struct ib_cm_sidr_rep_param *param)
+static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
+ struct ib_cm_sidr_rep_param *param)
{
- struct cm_id_private *cm_id_priv;
struct ib_mad_send_buf *msg;
unsigned long flags;
int ret;
+ lockdep_assert_held(&cm_id_priv->lock);
+
if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
(param->private_data &&
param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
return -EINVAL;
- cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
- ret = -EINVAL;
- goto error;
- }
+ if (cm_id_priv->id.state != IB_CM_SIDR_REQ_RCVD)
+ return -EINVAL;
- ret = cm_alloc_msg(cm_id_priv, &msg);
- if (ret)
- goto error;
+ msg = cm_alloc_msg(cm_id_priv);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
param);
+ trace_icm_send_sidr_rep(&cm_id_priv->id);
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_free_msg(msg);
return ret;
}
- cm_id->state = IB_CM_IDLE;
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-
+ cm_id_priv->id.state = IB_CM_IDLE;
spin_lock_irqsave(&cm.lock, flags);
if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
@@ -3342,13 +3682,25 @@ int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
}
spin_unlock_irqrestore(&cm.lock, flags);
return 0;
+}
-error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
+ struct ib_cm_sidr_rep_param *param)
+{
+ struct cm_id_private *cm_id_priv =
+ container_of(cm_id, struct cm_id_private, id);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ ret = cm_send_sidr_rep_locked(cm_id_priv, param);
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_send_cm_sidr_rep);
-static void cm_format_sidr_rep_event(struct cm_work *work)
+static void cm_format_sidr_rep_event(struct cm_work *work,
+ const struct cm_id_private *cm_id_priv)
{
struct cm_sidr_rep_msg *sidr_rep_msg;
struct ib_cm_sidr_rep_event_param *param;
@@ -3356,12 +3708,16 @@ static void cm_format_sidr_rep_event(struct cm_work *work)
sidr_rep_msg = (struct cm_sidr_rep_msg *)
work->mad_recv_wc->recv_buf.mad;
param = &work->cm_event.param.sidr_rep_rcvd;
- param->status = sidr_rep_msg->status;
- param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
- param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
- param->info = &sidr_rep_msg->info;
- param->info_len = sidr_rep_msg->info_length;
- work->cm_event.private_data = &sidr_rep_msg->private_data;
+ param->status = IBA_GET(CM_SIDR_REP_STATUS, sidr_rep_msg);
+ param->qkey = IBA_GET(CM_SIDR_REP_Q_KEY, sidr_rep_msg);
+ param->qpn = IBA_GET(CM_SIDR_REP_QPN, sidr_rep_msg);
+ param->info = IBA_GET_MEM_PTR(CM_SIDR_REP_ADDITIONAL_INFORMATION,
+ sidr_rep_msg);
+ param->info_len = IBA_GET(CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH,
+ sidr_rep_msg);
+ param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
+ work->cm_event.private_data =
+ IBA_GET_MEM_PTR(CM_SIDR_REP_PRIVATE_DATA, sidr_rep_msg);
}
static int cm_sidr_rep_handler(struct cm_work *work)
@@ -3371,7 +3727,8 @@ static int cm_sidr_rep_handler(struct cm_work *work)
sidr_rep_msg = (struct cm_sidr_rep_msg *)
work->mad_recv_wc->recv_buf.mad;
- cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
+ cm_id_priv = cm_acquire_id(
+ cpu_to_be32(IBA_GET(CM_SIDR_REP_REQUESTID, sidr_rep_msg)), 0);
if (!cm_id_priv)
return -EINVAL; /* Unmatched reply. */
@@ -3381,10 +3738,10 @@ static int cm_sidr_rep_handler(struct cm_work *work)
goto out;
}
cm_id_priv->id.state = IB_CM_IDLE;
- ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+ ib_cancel_mad(cm_id_priv->msg);
spin_unlock_irq(&cm_id_priv->lock);
- cm_format_sidr_rep_event(work);
+ cm_format_sidr_rep_event(work, cm_id_priv);
cm_process_work(cm_id_priv, work);
return 0;
out:
@@ -3392,23 +3749,29 @@ out:
return -EINVAL;
}
-static void cm_process_send_error(struct ib_mad_send_buf *msg,
+static void cm_process_send_error(struct cm_id_private *cm_id_priv,
+ struct ib_mad_send_buf *msg,
enum ib_wc_status wc_status)
{
- struct cm_id_private *cm_id_priv;
- struct ib_cm_event cm_event;
- enum ib_cm_state state;
+ enum ib_cm_state state = (unsigned long) msg->context[1];
+ struct ib_cm_event cm_event = {};
int ret;
- memset(&cm_event, 0, sizeof cm_event);
- cm_id_priv = msg->context[0];
-
- /* Discard old sends or ones without a response. */
+ /* Discard old sends. */
spin_lock_irq(&cm_id_priv->lock);
- state = (enum ib_cm_state) (unsigned long) msg->context[1];
- if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
- goto discard;
+ if (msg != cm_id_priv->msg) {
+ spin_unlock_irq(&cm_id_priv->lock);
+ cm_free_msg(msg);
+ cm_deref_id(cm_id_priv);
+ return;
+ }
+ cm_free_priv_msg(msg);
+
+ if (state != cm_id_priv->id.state || wc_status == IB_WC_SUCCESS ||
+ wc_status == IB_WC_WR_FLUSH_ERR)
+ goto out_unlock;
+ trace_icm_mad_send_err(state, wc_status);
switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
@@ -3429,26 +3792,25 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
cm_event.event = IB_CM_SIDR_REQ_ERROR;
break;
default:
- goto discard;
+ goto out_unlock;
}
spin_unlock_irq(&cm_id_priv->lock);
cm_event.param.send_status = wc_status;
/* No other events can occur on the cm_id at this point. */
ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
- cm_free_msg(msg);
if (ret)
ib_destroy_cm_id(&cm_id_priv->id);
return;
-discard:
+out_unlock:
spin_unlock_irq(&cm_id_priv->lock);
- cm_free_msg(msg);
}
static void cm_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
+ struct cm_id_private *cm_id_priv;
struct cm_port *port;
u16 attr_index;
@@ -3456,33 +3818,22 @@ static void cm_send_handler(struct ib_mad_agent *mad_agent,
attr_index = be16_to_cpu(((struct ib_mad_hdr *)
msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
- /*
- * If the send was in response to a received message (context[0] is not
- * set to a cm_id), and is not a REJ, then it is a send that was
- * manually retried.
- */
- if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
+ if (msg->context[0] == CM_DIRECT_RETRY_CTX) {
msg->retries = 1;
+ cm_id_priv = NULL;
+ } else {
+ cm_id_priv = msg->context[0];
+ }
- atomic_long_add(1 + msg->retries,
- &port->counter_group[CM_XMIT].counter[attr_index]);
+ atomic_long_add(1 + msg->retries, &port->counters[CM_XMIT][attr_index]);
if (msg->retries)
atomic_long_add(msg->retries,
- &port->counter_group[CM_XMIT_RETRIES].
- counter[attr_index]);
+ &port->counters[CM_XMIT_RETRIES][attr_index]);
- switch (mad_send_wc->status) {
- case IB_WC_SUCCESS:
- case IB_WC_WR_FLUSH_ERR:
+ if (cm_id_priv)
+ cm_process_send_error(cm_id_priv, msg, mad_send_wc->status);
+ else
cm_free_msg(msg);
- break;
- default:
- if (msg->context[0] && msg->context[1])
- cm_process_send_error(msg, mad_send_wc->status);
- else
- cm_free_msg(msg);
- break;
- }
}
static void cm_work_handler(struct work_struct *_work)
@@ -3531,6 +3882,7 @@ static void cm_work_handler(struct work_struct *_work)
ret = cm_timewait_handler(work);
break;
default:
+ trace_icm_handler_err(work->cm_event.event);
ret = -EINVAL;
break;
}
@@ -3556,8 +3908,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
- switch (cm_id->state)
- {
+ switch (cm_id->state) {
case IB_CM_REP_SENT:
case IB_CM_MRA_REP_RCVD:
cm_id->state = IB_CM_ESTABLISHED;
@@ -3566,6 +3917,7 @@ static int cm_establish(struct ib_cm_id *cm_id)
ret = -EISCONN;
break;
default:
+ trace_icm_establish_err(cm_id);
ret = -EINVAL;
break;
}
@@ -3605,9 +3957,7 @@ out:
static int cm_migrate(struct ib_cm_id *cm_id)
{
struct cm_id_private *cm_id_priv;
- struct cm_av tmp_av;
unsigned long flags;
- int tmp_send_port_not_ready;
int ret = 0;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -3616,14 +3966,7 @@ static int cm_migrate(struct ib_cm_id *cm_id)
(cm_id->lap_state == IB_CM_LAP_UNINIT ||
cm_id->lap_state == IB_CM_LAP_IDLE)) {
cm_id->lap_state = IB_CM_LAP_IDLE;
- /* Swap address vector */
- tmp_av = cm_id_priv->av;
cm_id_priv->av = cm_id_priv->alt_av;
- cm_id_priv->alt_av = tmp_av;
- /* Swap port send ready state */
- tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
- cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
- cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
} else
ret = -EINVAL;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -3656,14 +3999,16 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
struct cm_port *port = mad_agent->context;
struct cm_work *work;
enum ib_cm_event_type event;
+ bool alt_path = false;
u16 attr_id;
int paths = 0;
int going_down = 0;
switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
case CM_REQ_ATTR_ID:
- paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
- alt_local_lid != 0);
+ alt_path = cm_req_has_alt_path((struct cm_req_msg *)
+ mad_recv_wc->recv_buf.mad);
+ paths = 1 + (alt_path != 0);
event = IB_CM_REQ_RECEIVED;
break;
case CM_MRA_ATTR_ID:
@@ -3703,11 +4048,9 @@ static void cm_recv_handler(struct ib_mad_agent *mad_agent,
}
attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
- atomic_long_inc(&port->counter_group[CM_RECV].
- counter[attr_id - CM_ATTR_ID_OFFSET]);
+ atomic_long_inc(&port->counters[CM_RECV][attr_id - CM_ATTR_ID_OFFSET]);
- work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
- GFP_KERNEL);
+ work = kmalloc(struct_size(work, path, paths), GFP_KERNEL);
if (!work) {
ib_free_recv_mad(mad_recv_wc);
return;
@@ -3753,14 +4096,25 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
IB_QP_PKEY_INDEX | IB_QP_PORT;
qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
- if (cm_id_priv->responder_resources)
+ if (cm_id_priv->responder_resources) {
+ struct ib_device *ib_dev = cm_id_priv->id.device;
+ u64 support_flush = ib_dev->attrs.device_cap_flags &
+ (IB_DEVICE_FLUSH_GLOBAL | IB_DEVICE_FLUSH_PERSISTENT);
+ u32 flushable = support_flush ?
+ (IB_ACCESS_FLUSH_GLOBAL |
+ IB_ACCESS_FLUSH_PERSISTENT) : 0;
+
qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_ATOMIC;
+ IB_ACCESS_REMOTE_ATOMIC |
+ flushable;
+ }
qp_attr->pkey_index = cm_id_priv->av.pkey_index;
- qp_attr->port_num = cm_id_priv->av.port->port_num;
+ if (cm_id_priv->av.port)
+ qp_attr->port_num = cm_id_priv->av.port->port_num;
ret = 0;
break;
default:
+ trace_icm_qp_init_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -3787,6 +4141,10 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
IB_QP_DEST_QPN | IB_QP_RQ_PSN;
qp_attr->ah_attr = cm_id_priv->av.ah_attr;
+ if ((qp_attr->ah_attr.type == RDMA_AH_ATTR_TYPE_IB) &&
+ cm_id_priv->av.dlid_datapath &&
+ (cm_id_priv->av.dlid_datapath != 0xffff))
+ qp_attr->ah_attr.ib.dlid = cm_id_priv->av.dlid_datapath;
qp_attr->path_mtu = cm_id_priv->path_mtu;
qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
@@ -3798,7 +4156,8 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
cm_id_priv->responder_resources;
qp_attr->min_rnr_timer = 0;
}
- if (cm_id_priv->alt_av.ah_attr.dlid) {
+ if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr) &&
+ cm_id_priv->alt_av.port) {
*qp_attr_mask |= IB_QP_ALT_PATH;
qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
@@ -3808,6 +4167,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
+ trace_icm_qp_rtr_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -3844,7 +4204,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
qp_attr->retry_cnt = cm_id_priv->retry_count;
qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
- /* fall through */
+ fallthrough;
case IB_QPT_XRC_TGT:
*qp_attr_mask |= IB_QP_TIMEOUT;
qp_attr->timeout = cm_id_priv->av.timeout;
@@ -3852,13 +4212,15 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
default:
break;
}
- if (cm_id_priv->alt_av.ah_attr.dlid) {
+ if (rdma_ah_get_dlid(&cm_id_priv->alt_av.ah_attr)) {
*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
qp_attr->path_mig_state = IB_MIG_REARM;
}
} else {
*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
- qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
+ if (cm_id_priv->alt_av.port)
+ qp_attr->alt_port_num =
+ cm_id_priv->alt_av.port->port_num;
qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
@@ -3867,6 +4229,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
ret = 0;
break;
default:
+ trace_icm_qp_rts_err(&cm_id_priv->id);
ret = -EINVAL;
break;
}
@@ -3900,96 +4263,76 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
}
EXPORT_SYMBOL(ib_cm_init_qp_attr);
-static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
- char *buf)
+static ssize_t cm_show_counter(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
{
- struct cm_counter_group *group;
- struct cm_counter_attribute *cm_attr;
-
- group = container_of(obj, struct cm_counter_group, obj);
- cm_attr = container_of(attr, struct cm_counter_attribute, attr);
-
- return sprintf(buf, "%ld\n",
- atomic_long_read(&group->counter[cm_attr->index]));
-}
-
-static const struct sysfs_ops cm_counter_ops = {
- .show = cm_show_counter
-};
-
-static struct kobj_type cm_counter_obj_type = {
- .sysfs_ops = &cm_counter_ops,
- .default_attrs = cm_counter_default_attrs
-};
+ struct cm_counter_attribute *cm_attr =
+ container_of(attr, struct cm_counter_attribute, attr);
+ struct cm_device *cm_dev = ib_get_client_data(ibdev, &cm_client);
-static void cm_release_port_obj(struct kobject *obj)
-{
- struct cm_port *cm_port;
-
- cm_port = container_of(obj, struct cm_port, port_obj);
- kfree(cm_port);
-}
-
-static struct kobj_type cm_port_obj_type = {
- .release = cm_release_port_obj
-};
+ if (WARN_ON(!cm_dev))
+ return -EINVAL;
-static char *cm_devnode(struct device *dev, umode_t *mode)
-{
- if (mode)
- *mode = 0666;
- return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+ return sysfs_emit(
+ buf, "%ld\n",
+ atomic_long_read(
+ &cm_dev->port[port_num - 1]
+ ->counters[cm_attr->group][cm_attr->index]));
}
-struct class cm_class = {
- .owner = THIS_MODULE,
- .name = "infiniband_cm",
- .devnode = cm_devnode,
-};
-EXPORT_SYMBOL(cm_class);
-
-static int cm_create_port_fs(struct cm_port *port)
-{
- int i, ret;
-
- ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
- &port->cm_dev->device->kobj,
- "%d", port->port_num);
- if (ret) {
- kfree(port);
- return ret;
- }
-
- for (i = 0; i < CM_COUNTER_GROUPS; i++) {
- ret = kobject_init_and_add(&port->counter_group[i].obj,
- &cm_counter_obj_type,
- &port->port_obj,
- "%s", counter_group_names[i]);
- if (ret)
- goto error;
+#define CM_COUNTER_ATTR(_name, _group, _index) \
+ { \
+ .attr = __ATTR(_name, 0444, cm_show_counter, NULL), \
+ .group = _group, .index = _index \
}
- return 0;
-
-error:
- while (i--)
- kobject_put(&port->counter_group[i].obj);
- kobject_put(&port->port_obj);
- return ret;
-
-}
-
-static void cm_remove_port_fs(struct cm_port *port)
-{
- int i;
-
- for (i = 0; i < CM_COUNTER_GROUPS; i++)
- kobject_put(&port->counter_group[i].obj);
+#define CM_COUNTER_GROUP(_group, _name) \
+ static struct cm_counter_attribute cm_counter_attr_##_group[] = { \
+ CM_COUNTER_ATTR(req, _group, CM_REQ_COUNTER), \
+ CM_COUNTER_ATTR(mra, _group, CM_MRA_COUNTER), \
+ CM_COUNTER_ATTR(rej, _group, CM_REJ_COUNTER), \
+ CM_COUNTER_ATTR(rep, _group, CM_REP_COUNTER), \
+ CM_COUNTER_ATTR(rtu, _group, CM_RTU_COUNTER), \
+ CM_COUNTER_ATTR(dreq, _group, CM_DREQ_COUNTER), \
+ CM_COUNTER_ATTR(drep, _group, CM_DREP_COUNTER), \
+ CM_COUNTER_ATTR(sidr_req, _group, CM_SIDR_REQ_COUNTER), \
+ CM_COUNTER_ATTR(sidr_rep, _group, CM_SIDR_REP_COUNTER), \
+ CM_COUNTER_ATTR(lap, _group, CM_LAP_COUNTER), \
+ CM_COUNTER_ATTR(apr, _group, CM_APR_COUNTER), \
+ }; \
+ static struct attribute *cm_counter_attrs_##_group[] = { \
+ &cm_counter_attr_##_group[0].attr.attr, \
+ &cm_counter_attr_##_group[1].attr.attr, \
+ &cm_counter_attr_##_group[2].attr.attr, \
+ &cm_counter_attr_##_group[3].attr.attr, \
+ &cm_counter_attr_##_group[4].attr.attr, \
+ &cm_counter_attr_##_group[5].attr.attr, \
+ &cm_counter_attr_##_group[6].attr.attr, \
+ &cm_counter_attr_##_group[7].attr.attr, \
+ &cm_counter_attr_##_group[8].attr.attr, \
+ &cm_counter_attr_##_group[9].attr.attr, \
+ &cm_counter_attr_##_group[10].attr.attr, \
+ NULL, \
+ }; \
+ static const struct attribute_group cm_counter_group_##_group = { \
+ .name = _name, \
+ .attrs = cm_counter_attrs_##_group, \
+ };
- kobject_put(&port->port_obj);
-}
+CM_COUNTER_GROUP(CM_XMIT, "cm_tx_msgs")
+CM_COUNTER_GROUP(CM_XMIT_RETRIES, "cm_tx_retries")
+CM_COUNTER_GROUP(CM_RECV, "cm_rx_msgs")
+CM_COUNTER_GROUP(CM_RECV_DUPLICATES, "cm_rx_duplicates")
+
+static const struct attribute_group *cm_counter_groups[] = {
+ &cm_counter_group_CM_XMIT,
+ &cm_counter_group_CM_XMIT_RETRIES,
+ &cm_counter_group_CM_RECV,
+ &cm_counter_group_CM_RECV_DUPLICATES,
+ NULL,
+};
-static void cm_add_one(struct ib_device *ib_device)
+static int cm_add_one(struct ib_device *ib_device)
{
struct cm_device *cm_dev;
struct cm_port *port;
@@ -4003,41 +4346,38 @@ static void cm_add_one(struct ib_device *ib_device)
unsigned long flags;
int ret;
int count = 0;
- u8 i;
+ u32 i;
- cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
- ib_device->phys_port_cnt, GFP_KERNEL);
+ cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt),
+ GFP_KERNEL);
if (!cm_dev)
- return;
+ return -ENOMEM;
+ kref_init(&cm_dev->kref);
+ rwlock_init(&cm_dev->mad_agent_lock);
cm_dev->ib_device = ib_device;
cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
cm_dev->going_down = 0;
- cm_dev->device = device_create(&cm_class, &ib_device->dev,
- MKDEV(0, 0), NULL,
- "%s", ib_device->name);
- if (IS_ERR(cm_dev->device)) {
- kfree(cm_dev);
- return;
- }
+
+ ib_set_client_data(ib_device, &cm_client, cm_dev);
set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
- for (i = 1; i <= ib_device->phys_port_cnt; i++) {
+ rdma_for_each_port (ib_device, i) {
if (!rdma_cap_ib_cm(ib_device, i))
continue;
port = kzalloc(sizeof *port, GFP_KERNEL);
- if (!port)
+ if (!port) {
+ ret = -ENOMEM;
goto error1;
+ }
cm_dev->port[i-1] = port;
port->cm_dev = cm_dev;
port->port_num = i;
- INIT_LIST_HEAD(&port->cm_priv_prim_list);
- INIT_LIST_HEAD(&port->cm_priv_altr_list);
-
- ret = cm_create_port_fs(port);
+ ret = ib_port_register_client_groups(ib_device, i,
+ cm_counter_groups);
if (ret)
goto error1;
@@ -4049,30 +4389,47 @@ static void cm_add_one(struct ib_device *ib_device)
cm_recv_handler,
port,
0);
- if (IS_ERR(port->mad_agent))
+ if (IS_ERR(port->mad_agent)) {
+ ret = PTR_ERR(port->mad_agent);
goto error2;
+ }
+
+ port->rep_agent = ib_register_mad_agent(ib_device, i,
+ IB_QPT_GSI,
+ NULL,
+ 0,
+ cm_send_handler,
+ NULL,
+ port,
+ 0);
+ if (IS_ERR(port->rep_agent)) {
+ ret = PTR_ERR(port->rep_agent);
+ goto error3;
+ }
ret = ib_modify_port(ib_device, i, 0, &port_modify);
if (ret)
- goto error3;
+ goto error4;
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
-
- ib_set_client_data(ib_device, &cm_client, cm_dev);
+ }
write_lock_irqsave(&cm.device_lock, flags);
list_add_tail(&cm_dev->list, &cm.device_list);
write_unlock_irqrestore(&cm.device_lock, flags);
- return;
+ return 0;
+error4:
+ ib_unregister_mad_agent(port->rep_agent);
error3:
ib_unregister_mad_agent(port->mad_agent);
error2:
- cm_remove_port_fs(port);
+ ib_port_unregister_client_groups(ib_device, i, cm_counter_groups);
error1:
port_modify.set_port_cap_mask = 0;
port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
@@ -4082,28 +4439,25 @@ error1:
port = cm_dev->port[i-1];
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
+ ib_unregister_mad_agent(port->rep_agent);
ib_unregister_mad_agent(port->mad_agent);
- cm_remove_port_fs(port);
+ ib_port_unregister_client_groups(ib_device, i,
+ cm_counter_groups);
}
free:
- device_unregister(cm_dev->device);
- kfree(cm_dev);
+ cm_device_put(cm_dev);
+ return ret;
}
static void cm_remove_one(struct ib_device *ib_device, void *client_data)
{
struct cm_device *cm_dev = client_data;
struct cm_port *port;
- struct cm_id_private *cm_id_priv;
- struct ib_mad_agent *cur_mad_agent;
struct ib_port_modify port_modify = {
.clr_port_cap_mask = IB_PORT_CM_SUP
};
unsigned long flags;
- int i;
-
- if (!cm_dev)
- return;
+ u32 i;
write_lock_irqsave(&cm.device_lock, flags);
list_del(&cm_dev->list);
@@ -4113,62 +4467,57 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
cm_dev->going_down = 1;
spin_unlock_irq(&cm.lock);
- for (i = 1; i <= ib_device->phys_port_cnt; i++) {
+ rdma_for_each_port (ib_device, i) {
+ struct ib_mad_agent *mad_agent;
+ struct ib_mad_agent *rep_agent;
+
if (!rdma_cap_ib_cm(ib_device, i))
continue;
port = cm_dev->port[i-1];
+ mad_agent = port->mad_agent;
+ rep_agent = port->rep_agent;
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
- /* Mark all the cm_id's as not valid */
- spin_lock_irq(&cm.lock);
- list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
- cm_id_priv->altr_send_port_not_ready = 1;
- list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
- cm_id_priv->prim_send_port_not_ready = 1;
- spin_unlock_irq(&cm.lock);
/*
* We flush the queue here after the going_down set, this
* verify that no new works will be queued in the recv handler,
* after that we can call the unregister_mad_agent
*/
flush_workqueue(cm.wq);
- spin_lock_irq(&cm.state_lock);
- cur_mad_agent = port->mad_agent;
+ /*
+ * The above ensures no call paths from the work are running,
+ * the remaining paths all take the mad_agent_lock.
+ */
+ write_lock(&cm_dev->mad_agent_lock);
port->mad_agent = NULL;
- spin_unlock_irq(&cm.state_lock);
- ib_unregister_mad_agent(cur_mad_agent);
- cm_remove_port_fs(port);
+ port->rep_agent = NULL;
+ write_unlock(&cm_dev->mad_agent_lock);
+ ib_unregister_mad_agent(mad_agent);
+ ib_unregister_mad_agent(rep_agent);
+ ib_port_unregister_client_groups(ib_device, i,
+ cm_counter_groups);
}
- device_unregister(cm_dev->device);
- kfree(cm_dev);
+ cm_device_put(cm_dev);
}
static int __init ib_cm_init(void)
{
int ret;
- memset(&cm, 0, sizeof cm);
INIT_LIST_HEAD(&cm.device_list);
rwlock_init(&cm.device_lock);
spin_lock_init(&cm.lock);
- spin_lock_init(&cm.state_lock);
cm.listen_service_table = RB_ROOT;
cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
cm.remote_id_table = RB_ROOT;
cm.remote_qp_table = RB_ROOT;
cm.remote_sidr_table = RB_ROOT;
- idr_init(&cm.local_id_table);
+ xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC);
get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
INIT_LIST_HEAD(&cm.timewait_list);
- ret = class_register(&cm_class);
- if (ret) {
- ret = -ENOMEM;
- goto error1;
- }
-
- cm.wq = create_workqueue("ib_cm");
+ cm.wq = alloc_workqueue("ib_cm", WQ_PERCPU, 1);
if (!cm.wq) {
ret = -ENOMEM;
goto error2;
@@ -4182,9 +4531,6 @@ static int __init ib_cm_init(void)
error3:
destroy_workqueue(cm.wq);
error2:
- class_unregister(&cm_class);
-error1:
- idr_destroy(&cm.local_id_table);
return ret;
}
@@ -4205,10 +4551,8 @@ static void __exit ib_cm_cleanup(void)
kfree(timewait_info);
}
- class_unregister(&cm_class);
- idr_destroy(&cm.local_id_table);
+ WARN_ON(!xa_empty(&cm.local_id_table));
}
module_init(ib_cm_init);
module_exit(ib_cm_cleanup);
-
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 8b76f0ef965e..8462de7ca26e 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -1,39 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2004, 2011 Intel Corporation. All rights reserved.
* Copyright (c) 2004 Topspin Corporation. All rights reserved.
* Copyright (c) 2004 Voltaire Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING the madirectory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use source and binary forms, with or
- * withmodification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retathe above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHWARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS THE
- * SOFTWARE.
+ * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
*/
-#if !defined(CM_MSGS_H)
+#ifndef CM_MSGS_H
#define CM_MSGS_H
+#include <rdma/ibta_vol1_c12.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_cm.h>
@@ -44,127 +19,14 @@
#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */
-enum cm_msg_sequence {
- CM_MSG_SEQUENCE_REQ,
- CM_MSG_SEQUENCE_LAP,
- CM_MSG_SEQUENCE_DREQ,
- CM_MSG_SEQUENCE_SIDR
-};
-
-struct cm_req_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 rsvd4;
- __be64 service_id;
- __be64 local_ca_guid;
- __be32 rsvd24;
- __be32 local_qkey;
- /* local QPN:24, responder resources:8 */
- __be32 offset32;
- /* local EECN:24, initiator depth:8 */
- __be32 offset36;
- /*
- * remote EECN:24, remote CM response timeout:5,
- * transport service type:2, end-to-end flow control:1
- */
- __be32 offset40;
- /* starting PSN:24, local CM response timeout:5, retry count:3 */
- __be32 offset44;
- __be16 pkey;
- /* path MTU:4, RDC exists:1, RNR retry count:3. */
- u8 offset50;
- /* max CM Retries:4, SRQ:1, extended transport type:3 */
- u8 offset51;
-
- __be16 primary_local_lid;
- __be16 primary_remote_lid;
- union ib_gid primary_local_gid;
- union ib_gid primary_remote_gid;
- /* flow label:20, rsvd:6, packet rate:6 */
- __be32 primary_offset88;
- u8 primary_traffic_class;
- u8 primary_hop_limit;
- /* SL:4, subnet local:1, rsvd:3 */
- u8 primary_offset94;
- /* local ACK timeout:5, rsvd:3 */
- u8 primary_offset95;
-
- __be16 alt_local_lid;
- __be16 alt_remote_lid;
- union ib_gid alt_local_gid;
- union ib_gid alt_remote_gid;
- /* flow label:20, rsvd:6, packet rate:6 */
- __be32 alt_offset132;
- u8 alt_traffic_class;
- u8 alt_hop_limit;
- /* SL:4, subnet local:1, rsvd:3 */
- u8 alt_offset138;
- /* local ACK timeout:5, rsvd:3 */
- u8 alt_offset139;
-
- u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)];
-
-} __attribute__ ((packed));
-
-static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg)
-{
- return cpu_to_be32(be32_to_cpu(req_msg->offset32) >> 8);
-}
-
-static inline void cm_req_set_local_qpn(struct cm_req_msg *req_msg, __be32 qpn)
-{
- req_msg->offset32 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
- (be32_to_cpu(req_msg->offset32) &
- 0x000000FF));
-}
-
-static inline u8 cm_req_get_resp_res(struct cm_req_msg *req_msg)
-{
- return (u8) be32_to_cpu(req_msg->offset32);
-}
-
-static inline void cm_req_set_resp_res(struct cm_req_msg *req_msg, u8 resp_res)
-{
- req_msg->offset32 = cpu_to_be32(resp_res |
- (be32_to_cpu(req_msg->offset32) &
- 0xFFFFFF00));
-}
-
-static inline u8 cm_req_get_init_depth(struct cm_req_msg *req_msg)
-{
- return (u8) be32_to_cpu(req_msg->offset36);
-}
-
-static inline void cm_req_set_init_depth(struct cm_req_msg *req_msg,
- u8 init_depth)
-{
- req_msg->offset36 = cpu_to_be32(init_depth |
- (be32_to_cpu(req_msg->offset36) &
- 0xFFFFFF00));
-}
-
-static inline u8 cm_req_get_remote_resp_timeout(struct cm_req_msg *req_msg)
-{
- return (u8) ((be32_to_cpu(req_msg->offset40) & 0xF8) >> 3);
-}
-
-static inline void cm_req_set_remote_resp_timeout(struct cm_req_msg *req_msg,
- u8 resp_timeout)
-{
- req_msg->offset40 = cpu_to_be32((resp_timeout << 3) |
- (be32_to_cpu(req_msg->offset40) &
- 0xFFFFFF07));
-}
-
static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
{
- u8 transport_type = (u8) (be32_to_cpu(req_msg->offset40) & 0x06) >> 1;
- switch(transport_type) {
+ u8 transport_type = IBA_GET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg);
+ switch (transport_type) {
case 0: return IB_QPT_RC;
case 1: return IB_QPT_UC;
case 3:
- switch (req_msg->offset51 & 0x7) {
+ switch (IBA_GET(CM_REQ_EXTENDED_TRANSPORT_TYPE, req_msg)) {
case 1: return IB_QPT_XRC_TGT;
default: return 0;
}
@@ -175,242 +37,19 @@ static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
enum ib_qp_type qp_type)
{
- switch(qp_type) {
+ switch (qp_type) {
case IB_QPT_UC:
- req_msg->offset40 = cpu_to_be32((be32_to_cpu(
- req_msg->offset40) &
- 0xFFFFFFF9) | 0x2);
+ IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 1);
break;
case IB_QPT_XRC_INI:
- req_msg->offset40 = cpu_to_be32((be32_to_cpu(
- req_msg->offset40) &
- 0xFFFFFFF9) | 0x6);
- req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1;
+ IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 3);
+ IBA_SET(CM_REQ_EXTENDED_TRANSPORT_TYPE, req_msg, 1);
break;
default:
- req_msg->offset40 = cpu_to_be32(be32_to_cpu(
- req_msg->offset40) &
- 0xFFFFFFF9);
+ IBA_SET(CM_REQ_TRANSPORT_SERVICE_TYPE, req_msg, 0);
}
}
-static inline u8 cm_req_get_flow_ctrl(struct cm_req_msg *req_msg)
-{
- return be32_to_cpu(req_msg->offset40) & 0x1;
-}
-
-static inline void cm_req_set_flow_ctrl(struct cm_req_msg *req_msg,
- u8 flow_ctrl)
-{
- req_msg->offset40 = cpu_to_be32((flow_ctrl & 0x1) |
- (be32_to_cpu(req_msg->offset40) &
- 0xFFFFFFFE));
-}
-
-static inline __be32 cm_req_get_starting_psn(struct cm_req_msg *req_msg)
-{
- return cpu_to_be32(be32_to_cpu(req_msg->offset44) >> 8);
-}
-
-static inline void cm_req_set_starting_psn(struct cm_req_msg *req_msg,
- __be32 starting_psn)
-{
- req_msg->offset44 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
- (be32_to_cpu(req_msg->offset44) & 0x000000FF));
-}
-
-static inline u8 cm_req_get_local_resp_timeout(struct cm_req_msg *req_msg)
-{
- return (u8) ((be32_to_cpu(req_msg->offset44) & 0xF8) >> 3);
-}
-
-static inline void cm_req_set_local_resp_timeout(struct cm_req_msg *req_msg,
- u8 resp_timeout)
-{
- req_msg->offset44 = cpu_to_be32((resp_timeout << 3) |
- (be32_to_cpu(req_msg->offset44) & 0xFFFFFF07));
-}
-
-static inline u8 cm_req_get_retry_count(struct cm_req_msg *req_msg)
-{
- return (u8) (be32_to_cpu(req_msg->offset44) & 0x7);
-}
-
-static inline void cm_req_set_retry_count(struct cm_req_msg *req_msg,
- u8 retry_count)
-{
- req_msg->offset44 = cpu_to_be32((retry_count & 0x7) |
- (be32_to_cpu(req_msg->offset44) & 0xFFFFFFF8));
-}
-
-static inline u8 cm_req_get_path_mtu(struct cm_req_msg *req_msg)
-{
- return req_msg->offset50 >> 4;
-}
-
-static inline void cm_req_set_path_mtu(struct cm_req_msg *req_msg, u8 path_mtu)
-{
- req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF) | (path_mtu << 4));
-}
-
-static inline u8 cm_req_get_rnr_retry_count(struct cm_req_msg *req_msg)
-{
- return req_msg->offset50 & 0x7;
-}
-
-static inline void cm_req_set_rnr_retry_count(struct cm_req_msg *req_msg,
- u8 rnr_retry_count)
-{
- req_msg->offset50 = (u8) ((req_msg->offset50 & 0xF8) |
- (rnr_retry_count & 0x7));
-}
-
-static inline u8 cm_req_get_max_cm_retries(struct cm_req_msg *req_msg)
-{
- return req_msg->offset51 >> 4;
-}
-
-static inline void cm_req_set_max_cm_retries(struct cm_req_msg *req_msg,
- u8 retries)
-{
- req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF) | (retries << 4));
-}
-
-static inline u8 cm_req_get_srq(struct cm_req_msg *req_msg)
-{
- return (req_msg->offset51 & 0x8) >> 3;
-}
-
-static inline void cm_req_set_srq(struct cm_req_msg *req_msg, u8 srq)
-{
- req_msg->offset51 = (u8) ((req_msg->offset51 & 0xF7) |
- ((srq & 0x1) << 3));
-}
-
-static inline __be32 cm_req_get_primary_flow_label(struct cm_req_msg *req_msg)
-{
- return cpu_to_be32(be32_to_cpu(req_msg->primary_offset88) >> 12);
-}
-
-static inline void cm_req_set_primary_flow_label(struct cm_req_msg *req_msg,
- __be32 flow_label)
-{
- req_msg->primary_offset88 = cpu_to_be32(
- (be32_to_cpu(req_msg->primary_offset88) &
- 0x00000FFF) |
- (be32_to_cpu(flow_label) << 12));
-}
-
-static inline u8 cm_req_get_primary_packet_rate(struct cm_req_msg *req_msg)
-{
- return (u8) (be32_to_cpu(req_msg->primary_offset88) & 0x3F);
-}
-
-static inline void cm_req_set_primary_packet_rate(struct cm_req_msg *req_msg,
- u8 rate)
-{
- req_msg->primary_offset88 = cpu_to_be32(
- (be32_to_cpu(req_msg->primary_offset88) &
- 0xFFFFFFC0) | (rate & 0x3F));
-}
-
-static inline u8 cm_req_get_primary_sl(struct cm_req_msg *req_msg)
-{
- return (u8) (req_msg->primary_offset94 >> 4);
-}
-
-static inline void cm_req_set_primary_sl(struct cm_req_msg *req_msg, u8 sl)
-{
- req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0x0F) |
- (sl << 4));
-}
-
-static inline u8 cm_req_get_primary_subnet_local(struct cm_req_msg *req_msg)
-{
- return (u8) ((req_msg->primary_offset94 & 0x08) >> 3);
-}
-
-static inline void cm_req_set_primary_subnet_local(struct cm_req_msg *req_msg,
- u8 subnet_local)
-{
- req_msg->primary_offset94 = (u8) ((req_msg->primary_offset94 & 0xF7) |
- ((subnet_local & 0x1) << 3));
-}
-
-static inline u8 cm_req_get_primary_local_ack_timeout(struct cm_req_msg *req_msg)
-{
- return (u8) (req_msg->primary_offset95 >> 3);
-}
-
-static inline void cm_req_set_primary_local_ack_timeout(struct cm_req_msg *req_msg,
- u8 local_ack_timeout)
-{
- req_msg->primary_offset95 = (u8) ((req_msg->primary_offset95 & 0x07) |
- (local_ack_timeout << 3));
-}
-
-static inline __be32 cm_req_get_alt_flow_label(struct cm_req_msg *req_msg)
-{
- return cpu_to_be32(be32_to_cpu(req_msg->alt_offset132) >> 12);
-}
-
-static inline void cm_req_set_alt_flow_label(struct cm_req_msg *req_msg,
- __be32 flow_label)
-{
- req_msg->alt_offset132 = cpu_to_be32(
- (be32_to_cpu(req_msg->alt_offset132) &
- 0x00000FFF) |
- (be32_to_cpu(flow_label) << 12));
-}
-
-static inline u8 cm_req_get_alt_packet_rate(struct cm_req_msg *req_msg)
-{
- return (u8) (be32_to_cpu(req_msg->alt_offset132) & 0x3F);
-}
-
-static inline void cm_req_set_alt_packet_rate(struct cm_req_msg *req_msg,
- u8 rate)
-{
- req_msg->alt_offset132 = cpu_to_be32(
- (be32_to_cpu(req_msg->alt_offset132) &
- 0xFFFFFFC0) | (rate & 0x3F));
-}
-
-static inline u8 cm_req_get_alt_sl(struct cm_req_msg *req_msg)
-{
- return (u8) (req_msg->alt_offset138 >> 4);
-}
-
-static inline void cm_req_set_alt_sl(struct cm_req_msg *req_msg, u8 sl)
-{
- req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0x0F) |
- (sl << 4));
-}
-
-static inline u8 cm_req_get_alt_subnet_local(struct cm_req_msg *req_msg)
-{
- return (u8) ((req_msg->alt_offset138 & 0x08) >> 3);
-}
-
-static inline void cm_req_set_alt_subnet_local(struct cm_req_msg *req_msg,
- u8 subnet_local)
-{
- req_msg->alt_offset138 = (u8) ((req_msg->alt_offset138 & 0xF7) |
- ((subnet_local & 0x1) << 3));
-}
-
-static inline u8 cm_req_get_alt_local_ack_timeout(struct cm_req_msg *req_msg)
-{
- return (u8) (req_msg->alt_offset139 >> 3);
-}
-
-static inline void cm_req_set_alt_local_ack_timeout(struct cm_req_msg *req_msg,
- u8 local_ack_timeout)
-{
- req_msg->alt_offset139 = (u8) ((req_msg->alt_offset139 & 0x07) |
- (local_ack_timeout << 3));
-}
-
/* Message REJected or MRAed */
enum cm_msg_response {
CM_MSG_RESPONSE_REQ = 0x0,
@@ -418,419 +57,12 @@ enum cm_msg_response {
CM_MSG_RESPONSE_OTHER = 0x2
};
- struct cm_mra_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
- /* message MRAed:2, rsvd:6 */
- u8 offset8;
- /* service timeout:5, rsvd:3 */
- u8 offset9;
-
- u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE];
-
-} __attribute__ ((packed));
-
-static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg)
-{
- return (u8) (mra_msg->offset8 >> 6);
-}
-
-static inline void cm_mra_set_msg_mraed(struct cm_mra_msg *mra_msg, u8 msg)
-{
- mra_msg->offset8 = (u8) ((mra_msg->offset8 & 0x3F) | (msg << 6));
-}
-
-static inline u8 cm_mra_get_service_timeout(struct cm_mra_msg *mra_msg)
-{
- return (u8) (mra_msg->offset9 >> 3);
-}
-
-static inline void cm_mra_set_service_timeout(struct cm_mra_msg *mra_msg,
- u8 service_timeout)
-{
- mra_msg->offset9 = (u8) ((mra_msg->offset9 & 0x07) |
- (service_timeout << 3));
-}
-
-struct cm_rej_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
- /* message REJected:2, rsvd:6 */
- u8 offset8;
- /* reject info length:7, rsvd:1. */
- u8 offset9;
- __be16 reason;
- u8 ari[IB_CM_REJ_ARI_LENGTH];
-
- u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE];
-
-} __attribute__ ((packed));
-
-static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg)
-{
- return (u8) (rej_msg->offset8 >> 6);
-}
-
-static inline void cm_rej_set_msg_rejected(struct cm_rej_msg *rej_msg, u8 msg)
-{
- rej_msg->offset8 = (u8) ((rej_msg->offset8 & 0x3F) | (msg << 6));
-}
-
-static inline u8 cm_rej_get_reject_info_len(struct cm_rej_msg *rej_msg)
-{
- return (u8) (rej_msg->offset9 >> 1);
-}
-
-static inline void cm_rej_set_reject_info_len(struct cm_rej_msg *rej_msg,
- u8 len)
-{
- rej_msg->offset9 = (u8) ((rej_msg->offset9 & 0x1) | (len << 1));
-}
-
-struct cm_rep_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
- __be32 local_qkey;
- /* local QPN:24, rsvd:8 */
- __be32 offset12;
- /* local EECN:24, rsvd:8 */
- __be32 offset16;
- /* starting PSN:24 rsvd:8 */
- __be32 offset20;
- u8 resp_resources;
- u8 initiator_depth;
- /* target ACK delay:5, failover accepted:2, end-to-end flow control:1 */
- u8 offset26;
- /* RNR retry count:3, SRQ:1, rsvd:5 */
- u8 offset27;
- __be64 local_ca_guid;
-
- u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE];
-
-} __attribute__ ((packed));
-
-static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg)
-{
- return cpu_to_be32(be32_to_cpu(rep_msg->offset12) >> 8);
-}
-
-static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn)
-{
- rep_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
- (be32_to_cpu(rep_msg->offset12) & 0x000000FF));
-}
-
-static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg)
-{
- return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8);
-}
-
-static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn)
-{
- rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) |
- (be32_to_cpu(rep_msg->offset16) & 0x000000FF));
-}
-
static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type)
{
return (qp_type == IB_QPT_XRC_INI) ?
- cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg);
-}
-
-static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
-{
- return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8);
-}
-
-static inline void cm_rep_set_starting_psn(struct cm_rep_msg *rep_msg,
- __be32 starting_psn)
-{
- rep_msg->offset20 = cpu_to_be32((be32_to_cpu(starting_psn) << 8) |
- (be32_to_cpu(rep_msg->offset20) & 0x000000FF));
-}
-
-static inline u8 cm_rep_get_target_ack_delay(struct cm_rep_msg *rep_msg)
-{
- return (u8) (rep_msg->offset26 >> 3);
-}
-
-static inline void cm_rep_set_target_ack_delay(struct cm_rep_msg *rep_msg,
- u8 target_ack_delay)
-{
- rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0x07) |
- (target_ack_delay << 3));
-}
-
-static inline u8 cm_rep_get_failover(struct cm_rep_msg *rep_msg)
-{
- return (u8) ((rep_msg->offset26 & 0x06) >> 1);
-}
-
-static inline void cm_rep_set_failover(struct cm_rep_msg *rep_msg, u8 failover)
-{
- rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xF9) |
- ((failover & 0x3) << 1));
-}
-
-static inline u8 cm_rep_get_flow_ctrl(struct cm_rep_msg *rep_msg)
-{
- return (u8) (rep_msg->offset26 & 0x01);
-}
-
-static inline void cm_rep_set_flow_ctrl(struct cm_rep_msg *rep_msg,
- u8 flow_ctrl)
-{
- rep_msg->offset26 = (u8) ((rep_msg->offset26 & 0xFE) |
- (flow_ctrl & 0x1));
-}
-
-static inline u8 cm_rep_get_rnr_retry_count(struct cm_rep_msg *rep_msg)
-{
- return (u8) (rep_msg->offset27 >> 5);
-}
-
-static inline void cm_rep_set_rnr_retry_count(struct cm_rep_msg *rep_msg,
- u8 rnr_retry_count)
-{
- rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0x1F) |
- (rnr_retry_count << 5));
-}
-
-static inline u8 cm_rep_get_srq(struct cm_rep_msg *rep_msg)
-{
- return (u8) ((rep_msg->offset27 >> 4) & 0x1);
-}
-
-static inline void cm_rep_set_srq(struct cm_rep_msg *rep_msg, u8 srq)
-{
- rep_msg->offset27 = (u8) ((rep_msg->offset27 & 0xEF) |
- ((srq & 0x1) << 4));
-}
-
-struct cm_rtu_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
-
- u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE];
-
-} __attribute__ ((packed));
-
-struct cm_dreq_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
- /* remote QPN/EECN:24, rsvd:8 */
- __be32 offset8;
-
- u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE];
-
-} __attribute__ ((packed));
-
-static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg)
-{
- return cpu_to_be32(be32_to_cpu(dreq_msg->offset8) >> 8);
-}
-
-static inline void cm_dreq_set_remote_qpn(struct cm_dreq_msg *dreq_msg, __be32 qpn)
-{
- dreq_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
- (be32_to_cpu(dreq_msg->offset8) & 0x000000FF));
-}
-
-struct cm_drep_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
-
- u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE];
-
-} __attribute__ ((packed));
-
-struct cm_lap_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
-
- __be32 rsvd8;
- /* remote QPN/EECN:24, remote CM response timeout:5, rsvd:3 */
- __be32 offset12;
- __be32 rsvd16;
-
- __be16 alt_local_lid;
- __be16 alt_remote_lid;
- union ib_gid alt_local_gid;
- union ib_gid alt_remote_gid;
- /* flow label:20, rsvd:4, traffic class:8 */
- __be32 offset56;
- u8 alt_hop_limit;
- /* rsvd:2, packet rate:6 */
- u8 offset61;
- /* SL:4, subnet local:1, rsvd:3 */
- u8 offset62;
- /* local ACK timeout:5, rsvd:3 */
- u8 offset63;
-
- u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE];
-} __attribute__ ((packed));
-
-static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg)
-{
- return cpu_to_be32(be32_to_cpu(lap_msg->offset12) >> 8);
-}
-
-static inline void cm_lap_set_remote_qpn(struct cm_lap_msg *lap_msg, __be32 qpn)
-{
- lap_msg->offset12 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
- (be32_to_cpu(lap_msg->offset12) &
- 0x000000FF));
-}
-
-static inline u8 cm_lap_get_remote_resp_timeout(struct cm_lap_msg *lap_msg)
-{
- return (u8) ((be32_to_cpu(lap_msg->offset12) & 0xF8) >> 3);
-}
-
-static inline void cm_lap_set_remote_resp_timeout(struct cm_lap_msg *lap_msg,
- u8 resp_timeout)
-{
- lap_msg->offset12 = cpu_to_be32((resp_timeout << 3) |
- (be32_to_cpu(lap_msg->offset12) &
- 0xFFFFFF07));
-}
-
-static inline __be32 cm_lap_get_flow_label(struct cm_lap_msg *lap_msg)
-{
- return cpu_to_be32(be32_to_cpu(lap_msg->offset56) >> 12);
-}
-
-static inline void cm_lap_set_flow_label(struct cm_lap_msg *lap_msg,
- __be32 flow_label)
-{
- lap_msg->offset56 = cpu_to_be32(
- (be32_to_cpu(lap_msg->offset56) & 0x00000FFF) |
- (be32_to_cpu(flow_label) << 12));
-}
-
-static inline u8 cm_lap_get_traffic_class(struct cm_lap_msg *lap_msg)
-{
- return (u8) be32_to_cpu(lap_msg->offset56);
-}
-
-static inline void cm_lap_set_traffic_class(struct cm_lap_msg *lap_msg,
- u8 traffic_class)
-{
- lap_msg->offset56 = cpu_to_be32(traffic_class |
- (be32_to_cpu(lap_msg->offset56) &
- 0xFFFFFF00));
-}
-
-static inline u8 cm_lap_get_packet_rate(struct cm_lap_msg *lap_msg)
-{
- return lap_msg->offset61 & 0x3F;
-}
-
-static inline void cm_lap_set_packet_rate(struct cm_lap_msg *lap_msg,
- u8 packet_rate)
-{
- lap_msg->offset61 = (packet_rate & 0x3F) | (lap_msg->offset61 & 0xC0);
-}
-
-static inline u8 cm_lap_get_sl(struct cm_lap_msg *lap_msg)
-{
- return lap_msg->offset62 >> 4;
-}
-
-static inline void cm_lap_set_sl(struct cm_lap_msg *lap_msg, u8 sl)
-{
- lap_msg->offset62 = (sl << 4) | (lap_msg->offset62 & 0x0F);
-}
-
-static inline u8 cm_lap_get_subnet_local(struct cm_lap_msg *lap_msg)
-{
- return (lap_msg->offset62 >> 3) & 0x1;
-}
-
-static inline void cm_lap_set_subnet_local(struct cm_lap_msg *lap_msg,
- u8 subnet_local)
-{
- lap_msg->offset62 = ((subnet_local & 0x1) << 3) |
- (lap_msg->offset61 & 0xF7);
-}
-static inline u8 cm_lap_get_local_ack_timeout(struct cm_lap_msg *lap_msg)
-{
- return lap_msg->offset63 >> 3;
-}
-
-static inline void cm_lap_set_local_ack_timeout(struct cm_lap_msg *lap_msg,
- u8 local_ack_timeout)
-{
- lap_msg->offset63 = (local_ack_timeout << 3) |
- (lap_msg->offset63 & 0x07);
-}
-
-struct cm_apr_msg {
- struct ib_mad_hdr hdr;
-
- __be32 local_comm_id;
- __be32 remote_comm_id;
-
- u8 info_length;
- u8 ap_status;
- __be16 rsvd;
- u8 info[IB_CM_APR_INFO_LENGTH];
-
- u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE];
-} __attribute__ ((packed));
-
-struct cm_sidr_req_msg {
- struct ib_mad_hdr hdr;
-
- __be32 request_id;
- __be16 pkey;
- __be16 rsvd;
- __be64 service_id;
-
- u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)];
-} __attribute__ ((packed));
-
-struct cm_sidr_rep_msg {
- struct ib_mad_hdr hdr;
-
- __be32 request_id;
- u8 status;
- u8 info_length;
- __be16 rsvd;
- /* QPN:24, rsvd:8 */
- __be32 offset8;
- __be64 service_id;
- __be32 qkey;
- u8 info[IB_CM_SIDR_REP_INFO_LENGTH];
-
- u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE];
-} __attribute__ ((packed));
-
-static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg)
-{
- return cpu_to_be32(be32_to_cpu(sidr_rep_msg->offset8) >> 8);
-}
-
-static inline void cm_sidr_rep_set_qpn(struct cm_sidr_rep_msg *sidr_rep_msg,
- __be32 qpn)
-{
- sidr_rep_msg->offset8 = cpu_to_be32((be32_to_cpu(qpn) << 8) |
- (be32_to_cpu(sidr_rep_msg->offset8) &
- 0x000000FF));
+ cpu_to_be32(IBA_GET(CM_REP_LOCAL_EE_CONTEXT_NUMBER,
+ rep_msg)) :
+ cpu_to_be32(IBA_GET(CM_REP_LOCAL_QPN, rep_msg));
}
#endif /* CM_MSGS_H */
diff --git a/drivers/infiniband/core/cm_trace.c b/drivers/infiniband/core/cm_trace.c
new file mode 100644
index 000000000000..8f3482f66338
--- /dev/null
+++ b/drivers/infiniband/core/cm_trace.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for the IB Connection Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020, Oracle and/or its affiliates.
+ */
+
+#include <rdma/rdma_cm.h>
+#include "cma_priv.h"
+
+#define CREATE_TRACE_POINTS
+
+#include "cm_trace.h"
diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h
new file mode 100644
index 000000000000..4a4987da69d4
--- /dev/null
+++ b/drivers/infiniband/core/cm_trace.h
@@ -0,0 +1,414 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace point definitions for the RDMA Connect Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020 Oracle and/or its affiliates.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ib_cma
+
+#if !defined(_TRACE_IB_CMA_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _TRACE_IB_CMA_H
+
+#include <linux/tracepoint.h>
+#include <rdma/ib_cm.h>
+#include <trace/misc/rdma.h>
+
+/*
+ * enum ib_cm_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_STATE_LIST \
+ ib_cm_state(IDLE) \
+ ib_cm_state(LISTEN) \
+ ib_cm_state(REQ_SENT) \
+ ib_cm_state(REQ_RCVD) \
+ ib_cm_state(MRA_REQ_SENT) \
+ ib_cm_state(MRA_REQ_RCVD) \
+ ib_cm_state(REP_SENT) \
+ ib_cm_state(REP_RCVD) \
+ ib_cm_state(MRA_REP_SENT) \
+ ib_cm_state(MRA_REP_RCVD) \
+ ib_cm_state(ESTABLISHED) \
+ ib_cm_state(DREQ_SENT) \
+ ib_cm_state(DREQ_RCVD) \
+ ib_cm_state(TIMEWAIT) \
+ ib_cm_state(SIDR_REQ_SENT) \
+ ib_cm_state_end(SIDR_REQ_RCVD)
+
+#undef ib_cm_state
+#undef ib_cm_state_end
+#define ib_cm_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_STATE_LIST
+
+#undef ib_cm_state
+#undef ib_cm_state_end
+#define ib_cm_state(x) { IB_CM_##x, #x },
+#define ib_cm_state_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_state(x) \
+ __print_symbolic(x, IB_CM_STATE_LIST)
+
+/*
+ * enum ib_cm_lap_state, from include/rdma/ib_cm.h
+ */
+#define IB_CM_LAP_STATE_LIST \
+ ib_cm_lap_state(LAP_UNINIT) \
+ ib_cm_lap_state(LAP_IDLE) \
+ ib_cm_lap_state(LAP_SENT) \
+ ib_cm_lap_state(LAP_RCVD) \
+ ib_cm_lap_state(MRA_LAP_SENT) \
+ ib_cm_lap_state_end(MRA_LAP_RCVD)
+
+#undef ib_cm_lap_state
+#undef ib_cm_lap_state_end
+#define ib_cm_lap_state(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_lap_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_LAP_STATE_LIST
+
+#undef ib_cm_lap_state
+#undef ib_cm_lap_state_end
+#define ib_cm_lap_state(x) { IB_CM_##x, #x },
+#define ib_cm_lap_state_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_lap_state(x) \
+ __print_symbolic(x, IB_CM_LAP_STATE_LIST)
+
+/*
+ * enum ib_cm_rej_reason, from include/rdma/ib_cm.h
+ */
+#define IB_CM_REJ_REASON_LIST \
+ ib_cm_rej_reason(REJ_NO_QP) \
+ ib_cm_rej_reason(REJ_NO_EEC) \
+ ib_cm_rej_reason(REJ_NO_RESOURCES) \
+ ib_cm_rej_reason(REJ_TIMEOUT) \
+ ib_cm_rej_reason(REJ_UNSUPPORTED) \
+ ib_cm_rej_reason(REJ_INVALID_COMM_ID) \
+ ib_cm_rej_reason(REJ_INVALID_COMM_INSTANCE) \
+ ib_cm_rej_reason(REJ_INVALID_SERVICE_ID) \
+ ib_cm_rej_reason(REJ_INVALID_TRANSPORT_TYPE) \
+ ib_cm_rej_reason(REJ_STALE_CONN) \
+ ib_cm_rej_reason(REJ_RDC_NOT_EXIST) \
+ ib_cm_rej_reason(REJ_INVALID_GID) \
+ ib_cm_rej_reason(REJ_INVALID_LID) \
+ ib_cm_rej_reason(REJ_INVALID_SL) \
+ ib_cm_rej_reason(REJ_INVALID_TRAFFIC_CLASS) \
+ ib_cm_rej_reason(REJ_INVALID_HOP_LIMIT) \
+ ib_cm_rej_reason(REJ_INVALID_PACKET_RATE) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_GID) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_LID) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_SL) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_TRAFFIC_CLASS) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_HOP_LIMIT) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_PACKET_RATE) \
+ ib_cm_rej_reason(REJ_PORT_CM_REDIRECT) \
+ ib_cm_rej_reason(REJ_PORT_REDIRECT) \
+ ib_cm_rej_reason(REJ_INVALID_MTU) \
+ ib_cm_rej_reason(REJ_INSUFFICIENT_RESP_RESOURCES) \
+ ib_cm_rej_reason(REJ_CONSUMER_DEFINED) \
+ ib_cm_rej_reason(REJ_INVALID_RNR_RETRY) \
+ ib_cm_rej_reason(REJ_DUPLICATE_LOCAL_COMM_ID) \
+ ib_cm_rej_reason(REJ_INVALID_CLASS_VERSION) \
+ ib_cm_rej_reason(REJ_INVALID_FLOW_LABEL) \
+ ib_cm_rej_reason(REJ_INVALID_ALT_FLOW_LABEL) \
+ ib_cm_rej_reason_end(REJ_VENDOR_OPTION_NOT_SUPPORTED)
+
+#undef ib_cm_rej_reason
+#undef ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+#define ib_cm_rej_reason_end(x) TRACE_DEFINE_ENUM(IB_CM_##x);
+
+IB_CM_REJ_REASON_LIST
+
+#undef ib_cm_rej_reason
+#undef ib_cm_rej_reason_end
+#define ib_cm_rej_reason(x) { IB_CM_##x, #x },
+#define ib_cm_rej_reason_end(x) { IB_CM_##x, #x }
+
+#define show_ib_cm_rej_reason(x) \
+ __print_symbolic(x, IB_CM_REJ_REASON_LIST)
+
+DECLARE_EVENT_CLASS(icm_id_class,
+ TP_PROTO(
+ const struct ib_cm_id *cm_id
+ ),
+
+ TP_ARGS(cm_id),
+
+ TP_STRUCT__entry(
+ __field(const void *, cm_id) /* for eBPF scripts */
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ __field(unsigned long, state)
+ __field(unsigned long, lap_state)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = cm_id;
+ __entry->local_id = be32_to_cpu(cm_id->local_id);
+ __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+ __entry->state = cm_id->state;
+ __entry->lap_state = cm_id->lap_state;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s lap_state=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state),
+ show_ib_cm_lap_state(__entry->lap_state)
+ )
+);
+
+#define DEFINE_CM_SEND_EVENT(name) \
+ DEFINE_EVENT(icm_id_class, \
+ icm_send_##name, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id))
+
+DEFINE_CM_SEND_EVENT(req);
+DEFINE_CM_SEND_EVENT(rep);
+DEFINE_CM_SEND_EVENT(dup_req);
+DEFINE_CM_SEND_EVENT(dup_rep);
+DEFINE_CM_SEND_EVENT(rtu);
+DEFINE_CM_SEND_EVENT(mra);
+DEFINE_CM_SEND_EVENT(sidr_req);
+DEFINE_CM_SEND_EVENT(sidr_rep);
+DEFINE_CM_SEND_EVENT(dreq);
+DEFINE_CM_SEND_EVENT(drep);
+
+TRACE_EVENT(icm_send_rej,
+ TP_PROTO(
+ const struct ib_cm_id *cm_id,
+ enum ib_cm_rej_reason reason
+ ),
+
+ TP_ARGS(cm_id, reason),
+
+ TP_STRUCT__entry(
+ __field(const void *, cm_id)
+ __field(u32, local_id)
+ __field(u32, remote_id)
+ __field(unsigned long, state)
+ __field(unsigned long, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = cm_id;
+ __entry->local_id = be32_to_cpu(cm_id->local_id);
+ __entry->remote_id = be32_to_cpu(cm_id->remote_id);
+ __entry->state = cm_id->state;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s reason=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state),
+ show_ib_cm_rej_reason(__entry->reason)
+ )
+);
+
+#define DEFINE_CM_ERR_EVENT(name) \
+ DEFINE_EVENT(icm_id_class, \
+ icm_##name##_err, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id))
+
+DEFINE_CM_ERR_EVENT(send_cm_rtu);
+DEFINE_CM_ERR_EVENT(establish);
+DEFINE_CM_ERR_EVENT(no_listener);
+DEFINE_CM_ERR_EVENT(send_drep);
+DEFINE_CM_ERR_EVENT(dreq_unknown);
+DEFINE_CM_ERR_EVENT(send_unknown_rej);
+DEFINE_CM_ERR_EVENT(rej_unknown);
+DEFINE_CM_ERR_EVENT(prepare_mra_unknown);
+DEFINE_CM_ERR_EVENT(mra_unknown);
+DEFINE_CM_ERR_EVENT(qp_init);
+DEFINE_CM_ERR_EVENT(qp_rtr);
+DEFINE_CM_ERR_EVENT(qp_rts);
+
+DEFINE_EVENT(icm_id_class, \
+ icm_dreq_skipped, \
+ TP_PROTO( \
+ const struct ib_cm_id *cm_id \
+ ), \
+ TP_ARGS(cm_id) \
+);
+
+DECLARE_EVENT_CLASS(icm_local_class,
+ TP_PROTO(
+ unsigned int local_id,
+ unsigned int remote_id
+ ),
+
+ TP_ARGS(local_id, remote_id),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = local_id;
+ __entry->remote_id = remote_id;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u",
+ __entry->local_id, __entry->remote_id
+ )
+);
+
+#define DEFINE_CM_LOCAL_EVENT(name) \
+ DEFINE_EVENT(icm_local_class, \
+ icm_##name, \
+ TP_PROTO( \
+ unsigned int local_id, \
+ unsigned int remote_id \
+ ), \
+ TP_ARGS(local_id, remote_id))
+
+DEFINE_CM_LOCAL_EVENT(issue_rej);
+DEFINE_CM_LOCAL_EVENT(issue_drep);
+DEFINE_CM_LOCAL_EVENT(staleconn_err);
+DEFINE_CM_LOCAL_EVENT(no_priv_err);
+
+DECLARE_EVENT_CLASS(icm_remote_class,
+ TP_PROTO(
+ u32 remote_id
+ ),
+
+ TP_ARGS(remote_id),
+
+ TP_STRUCT__entry(
+ __field(u32, remote_id)
+ ),
+
+ TP_fast_assign(
+ __entry->remote_id = remote_id;
+ ),
+
+ TP_printk("remote_id=%u",
+ __entry->remote_id
+ )
+);
+
+#define DEFINE_CM_REMOTE_EVENT(name) \
+ DEFINE_EVENT(icm_remote_class, \
+ icm_##name, \
+ TP_PROTO( \
+ u32 remote_id \
+ ), \
+ TP_ARGS(remote_id))
+
+DEFINE_CM_REMOTE_EVENT(remote_no_priv_err);
+DEFINE_CM_REMOTE_EVENT(insert_failed_err);
+
+TRACE_EVENT(icm_send_rep_err,
+ TP_PROTO(
+ __be32 local_id,
+ enum ib_cm_state state
+ ),
+
+ TP_ARGS(local_id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned long, state)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = be32_to_cpu(local_id);
+ __entry->state = state;
+ ),
+
+ TP_printk("local_id=%u state=%s",
+ __entry->local_id, show_ib_cm_state(__entry->state)
+ )
+);
+
+TRACE_EVENT(icm_rep_unknown_err,
+ TP_PROTO(
+ unsigned int local_id,
+ unsigned int remote_id,
+ enum ib_cm_state state
+ ),
+
+ TP_ARGS(local_id, remote_id, state),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, local_id)
+ __field(unsigned int, remote_id)
+ __field(unsigned long, state)
+ ),
+
+ TP_fast_assign(
+ __entry->local_id = local_id;
+ __entry->remote_id = remote_id;
+ __entry->state = state;
+ ),
+
+ TP_printk("local_id=%u remote_id=%u state=%s",
+ __entry->local_id, __entry->remote_id,
+ show_ib_cm_state(__entry->state)
+ )
+);
+
+TRACE_EVENT(icm_handler_err,
+ TP_PROTO(
+ enum ib_cm_event_type event
+ ),
+
+ TP_ARGS(event),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, event)
+ ),
+
+ TP_fast_assign(
+ __entry->event = event;
+ ),
+
+ TP_printk("unhandled event=%s",
+ rdma_show_ib_cm_event(__entry->event)
+ )
+);
+
+TRACE_EVENT(icm_mad_send_err,
+ TP_PROTO(
+ enum ib_cm_state state,
+ enum ib_wc_status wc_status
+ ),
+
+ TP_ARGS(state, wc_status),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, state)
+ __field(unsigned long, wc_status)
+ ),
+
+ TP_fast_assign(
+ __entry->state = state;
+ __entry->wc_status = wc_status;
+ ),
+
+ TP_printk("state=%s completion status=%s",
+ show_ib_cm_state(__entry->state),
+ rdma_show_wc_status(__entry->wc_status)
+ )
+);
+
+#endif /* _TRACE_IB_CMA_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/infiniband/core
+#define TRACE_INCLUDE_FILE cm_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e7dcfac877ca..95e89f5c147c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1,36 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2005 Voltaire Inc. All rights reserved.
* Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
- * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 1999-2019, Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
*/
#include <linux/completion.h>
@@ -38,8 +11,9 @@
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
+#include <linux/rbtree.h>
#include <linux/igmp.h>
-#include <linux/idr.h>
+#include <linux/xarray.h>
#include <linux/inetdevice.h>
#include <linux/slab.h>
#include <linux/module.h>
@@ -47,6 +21,7 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/netevent.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/ip_fib.h>
@@ -62,16 +37,17 @@
#include <rdma/iw_cm.h>
#include "core_priv.h"
+#include "cma_priv.h"
+#include "cma_trace.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
MODULE_LICENSE("Dual BSD/GPL");
#define CMA_CM_RESPONSE_TIMEOUT 20
-#define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000
#define CMA_MAX_CM_RETRIES 15
-#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
-#define CMA_IBOE_PACKET_LIFETIME 18
+#define CMA_IBOE_PACKET_LIFETIME 16
+#define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP
static const char * const cma_events[] = {
[RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved",
@@ -92,6 +68,11 @@ static const char * const cma_events[] = {
[RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit",
};
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
+ enum ib_gid_type gid_type);
+
+static void cma_netevent_work_handler(struct work_struct *_work);
+
const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
{
size_t index = event;
@@ -115,7 +96,13 @@ const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
}
EXPORT_SYMBOL(rdma_reject_msg);
-bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
+/**
+ * rdma_is_consumer_reject - return true if the consumer rejected the connect
+ * request.
+ * @id: Communication identifier that received the REJECT event.
+ * @reason: Value returned in the REJECT event status field.
+ */
+static bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
{
if (rdma_ib_or_roce(id->device, id->port_num))
return reason == IB_CM_REJ_CONSUMER_DEFINED;
@@ -126,7 +113,6 @@ bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
WARN_ON_ONCE(1);
return false;
}
-EXPORT_SYMBOL(rdma_is_consumer_reject);
const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
struct rdma_cm_event *ev, u8 *data_len)
@@ -144,7 +130,22 @@ const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
}
EXPORT_SYMBOL(rdma_consumer_reject_data);
-static void cma_add_one(struct ib_device *device);
+/**
+ * rdma_iw_cm_id() - return the iw_cm_id pointer for this cm_id.
+ * @id: Communication Identifier
+ */
+struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (id->device->node_type == RDMA_NODE_RNIC)
+ return id_priv->cm_id.iw;
+ return NULL;
+}
+EXPORT_SYMBOL(rdma_iw_cm_id);
+
+static int cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);
static struct ib_client cma_client = {
@@ -154,18 +155,20 @@ static struct ib_client cma_client = {
};
static struct ib_sa_client sa_client;
-static struct rdma_addr_client addr_client;
static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
+static struct rb_root id_table = RB_ROOT;
+/* Serialize operations of id_table tree */
+static DEFINE_SPINLOCK(id_table_lock);
static struct workqueue_struct *cma_wq;
static unsigned int cma_pernet_id;
struct cma_pernet {
- struct idr tcp_ps;
- struct idr udp_ps;
- struct idr ipoib_ps;
- struct idr ib_ps;
+ struct xarray tcp_ps;
+ struct xarray udp_ps;
+ struct xarray ipoib_ps;
+ struct xarray ib_ps;
};
static struct cma_pernet *cma_pernet(struct net *net)
@@ -173,7 +176,8 @@ static struct cma_pernet *cma_pernet(struct net *net)
return net_generic(net, cma_pernet_id);
}
-static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps)
+static
+struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps)
{
struct cma_pernet *pernet = cma_pernet(net);
@@ -191,59 +195,64 @@ static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps)
}
}
+struct id_table_entry {
+ struct list_head id_list;
+ struct rb_node rb_node;
+};
+
struct cma_device {
struct list_head list;
struct ib_device *device;
struct completion comp;
- atomic_t refcount;
+ refcount_t refcount;
struct list_head id_list;
enum ib_gid_type *default_gid_type;
+ u8 *default_roce_tos;
};
struct rdma_bind_list {
- enum rdma_port_space ps;
+ enum rdma_ucm_port_space ps;
struct hlist_head owners;
unsigned short port;
};
-struct class_port_info_context {
- struct ib_class_port_info *class_port_info;
- struct ib_device *device;
- struct completion done;
- struct ib_sa_query *sa_query;
- u8 port_num;
-};
-
-static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
+static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps,
struct rdma_bind_list *bind_list, int snum)
{
- struct idr *idr = cma_pernet_idr(net, ps);
+ struct xarray *xa = cma_pernet_xa(net, ps);
- return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL);
+ return xa_insert(xa, snum, bind_list, GFP_KERNEL);
}
static struct rdma_bind_list *cma_ps_find(struct net *net,
- enum rdma_port_space ps, int snum)
+ enum rdma_ucm_port_space ps, int snum)
{
- struct idr *idr = cma_pernet_idr(net, ps);
+ struct xarray *xa = cma_pernet_xa(net, ps);
- return idr_find(idr, snum);
+ return xa_load(xa, snum);
}
-static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum)
+static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps,
+ int snum)
{
- struct idr *idr = cma_pernet_idr(net, ps);
+ struct xarray *xa = cma_pernet_xa(net, ps);
- idr_remove(idr, snum);
+ xa_erase(xa, snum);
}
enum {
CMA_OPTION_AFONLY,
};
-void cma_ref_dev(struct cma_device *cma_dev)
+void cma_dev_get(struct cma_device *cma_dev)
+{
+ refcount_inc(&cma_dev->refcount);
+}
+
+void cma_dev_put(struct cma_device *cma_dev)
{
- atomic_inc(&cma_dev->refcount);
+ if (refcount_dec_and_test(&cma_dev->refcount))
+ complete(&cma_dev->comp);
}
struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
@@ -261,31 +270,33 @@ struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
}
if (found_cma_dev)
- cma_ref_dev(found_cma_dev);
+ cma_dev_get(found_cma_dev);
mutex_unlock(&lock);
return found_cma_dev;
}
int cma_get_default_gid_type(struct cma_device *cma_dev,
- unsigned int port)
+ u32 port)
{
- if (port < rdma_start_port(cma_dev->device) ||
- port > rdma_end_port(cma_dev->device))
+ if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
}
int cma_set_default_gid_type(struct cma_device *cma_dev,
- unsigned int port,
+ u32 port,
enum ib_gid_type default_gid_type)
{
unsigned long supported_gids;
- if (port < rdma_start_port(cma_dev->device) ||
- port > rdma_end_port(cma_dev->device))
+ if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
+ if (default_gid_type == IB_GID_TYPE_IB &&
+ rdma_protocol_roce_eth_encap(cma_dev->device, port))
+ default_gid_type = IB_GID_TYPE_ROCE;
+
supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
if (!(supported_gids & 1 << default_gid_type))
@@ -297,6 +308,25 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
return 0;
}
+int cma_get_default_roce_tos(struct cma_device *cma_dev, u32 port)
+{
+ if (!rdma_is_port_valid(cma_dev->device, port))
+ return -EINVAL;
+
+ return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)];
+}
+
+int cma_set_default_roce_tos(struct cma_device *cma_dev, u32 port,
+ u8 default_roce_tos)
+{
+ if (!rdma_is_port_valid(cma_dev->device, port))
+ return -EINVAL;
+
+ cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] =
+ default_roce_tos;
+
+ return 0;
+}
struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
{
return cma_dev->device;
@@ -308,56 +338,19 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
* We do this by disabling removal notification while a callback is in process,
* and reporting it after the callback completes.
*/
-struct rdma_id_private {
- struct rdma_cm_id id;
-
- struct rdma_bind_list *bind_list;
- struct hlist_node node;
- struct list_head list; /* listen_any_list or cma_device.list */
- struct list_head listen_list; /* per device listens */
- struct cma_device *cma_dev;
- struct list_head mc_list;
-
- int internal_id;
- enum rdma_cm_state state;
- spinlock_t lock;
- struct mutex qp_mutex;
-
- struct completion comp;
- atomic_t refcount;
- struct mutex handler_mutex;
-
- int backlog;
- int timeout_ms;
- struct ib_sa_query *query;
- int query_id;
- union {
- struct ib_cm_id *ib;
- struct iw_cm_id *iw;
- } cm_id;
-
- u32 seq_num;
- u32 qkey;
- u32 qp_num;
- pid_t owner;
- u32 options;
- u8 srq;
- u8 tos;
- u8 reuseaddr;
- u8 afonly;
- enum ib_gid_type gid_type;
-};
struct cma_multicast {
struct rdma_id_private *id_priv;
union {
- struct ib_sa_multicast *ib;
- } multicast;
+ struct ib_sa_multicast *sa_mc;
+ struct {
+ struct work_struct work;
+ struct rdma_cm_event event;
+ } iboe_join;
+ };
struct list_head list;
void *context;
struct sockaddr_storage addr;
- struct kref mcref;
- bool igmp_joined;
u8 join_state;
};
@@ -369,18 +362,6 @@ struct cma_work {
struct rdma_cm_event event;
};
-struct cma_ndev_work {
- struct work_struct work;
- struct rdma_id_private *id;
- struct rdma_cm_event event;
-};
-
-struct iboe_mcast_work {
- struct work_struct work;
- struct rdma_id_private *id;
- struct cma_multicast *mc;
-};
-
union cma_ip_addr {
struct in6_addr ip6;
struct {
@@ -400,31 +381,31 @@ struct cma_hdr {
#define CMA_VERSION 0x00
struct cma_req_info {
+ struct sockaddr_storage listen_addr_storage;
+ struct sockaddr_storage src_addr_storage;
struct ib_device *device;
- int port;
union ib_gid local_gid;
__be64 service_id;
+ int port;
+ bool has_gid;
u16 pkey;
- bool has_gid:1;
};
-static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
-{
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&id_priv->lock, flags);
- ret = (id_priv->state == comp);
- spin_unlock_irqrestore(&id_priv->lock, flags);
- return ret;
-}
-
static int cma_comp_exch(struct rdma_id_private *id_priv,
enum rdma_cm_state comp, enum rdma_cm_state exch)
{
unsigned long flags;
int ret;
+ /*
+ * The FSM uses a funny double locking where state is protected by both
+ * the handler_mutex and the spinlock. State is not allowed to change
+ * to/from a handler_mutex protected value without also holding
+ * handler_mutex.
+ */
+ if (comp == RDMA_CM_CONNECT || exch == RDMA_CM_CONNECT)
+ lockdep_assert_held(&id_priv->handler_mutex);
+
spin_lock_irqsave(&id_priv->lock, flags);
if ((ret = (id_priv->state == comp)))
id_priv->state = exch;
@@ -432,27 +413,24 @@ static int cma_comp_exch(struct rdma_id_private *id_priv,
return ret;
}
-static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv,
- enum rdma_cm_state exch)
+static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
{
- unsigned long flags;
- enum rdma_cm_state old;
+ return hdr->ip_version >> 4;
+}
- spin_lock_irqsave(&id_priv->lock, flags);
- old = id_priv->state;
- id_priv->state = exch;
- spin_unlock_irqrestore(&id_priv->lock, flags);
- return old;
+static void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
+{
+ hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
-static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr)
+static struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
{
- return hdr->ip_version >> 4;
+ return (struct sockaddr *)&id_priv->id.route.addr.src_addr;
}
-static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
+static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
{
- hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
+ return (struct sockaddr *)&id_priv->id.route.addr.dst_addr;
}
static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
@@ -475,16 +453,135 @@ static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
return (in_dev) ? 0 : -ENODEV;
}
+static int compare_netdev_and_ip(int ifindex_a, struct sockaddr *sa,
+ struct id_table_entry *entry_b)
+{
+ struct rdma_id_private *id_priv = list_first_entry(
+ &entry_b->id_list, struct rdma_id_private, id_list_entry);
+ int ifindex_b = id_priv->id.route.addr.dev_addr.bound_dev_if;
+ struct sockaddr *sb = cma_dst_addr(id_priv);
+
+ if (ifindex_a != ifindex_b)
+ return (ifindex_a > ifindex_b) ? 1 : -1;
+
+ if (sa->sa_family != sb->sa_family)
+ return sa->sa_family - sb->sa_family;
+
+ if (sa->sa_family == AF_INET &&
+ __builtin_object_size(sa, 0) >= sizeof(struct sockaddr_in)) {
+ return memcmp(&((struct sockaddr_in *)sa)->sin_addr,
+ &((struct sockaddr_in *)sb)->sin_addr,
+ sizeof(((struct sockaddr_in *)sa)->sin_addr));
+ }
+
+ if (sa->sa_family == AF_INET6 &&
+ __builtin_object_size(sa, 0) >= sizeof(struct sockaddr_in6)) {
+ return ipv6_addr_cmp(&((struct sockaddr_in6 *)sa)->sin6_addr,
+ &((struct sockaddr_in6 *)sb)->sin6_addr);
+ }
+
+ return -1;
+}
+
+static int cma_add_id_to_tree(struct rdma_id_private *node_id_priv)
+{
+ struct rb_node **new, *parent = NULL;
+ struct id_table_entry *this, *node;
+ unsigned long flags;
+ int result;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ new = &id_table.rb_node;
+ while (*new) {
+ this = container_of(*new, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(
+ node_id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(node_id_priv), this);
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else {
+ list_add_tail(&node_id_priv->id_list_entry,
+ &this->id_list);
+ kfree(node);
+ goto unlock;
+ }
+ }
+
+ INIT_LIST_HEAD(&node->id_list);
+ list_add_tail(&node_id_priv->id_list_entry, &node->id_list);
+
+ rb_link_node(&node->rb_node, parent, new);
+ rb_insert_color(&node->rb_node, &id_table);
+
+unlock:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return 0;
+}
+
+static struct id_table_entry *
+node_from_ndev_ip(struct rb_root *root, int ifindex, struct sockaddr *sa)
+{
+ struct rb_node *node = root->rb_node;
+ struct id_table_entry *data;
+ int result;
+
+ while (node) {
+ data = container_of(node, struct id_table_entry, rb_node);
+ result = compare_netdev_and_ip(ifindex, sa, data);
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+
+ return NULL;
+}
+
+static void cma_remove_id_from_tree(struct rdma_id_private *id_priv)
+{
+ struct id_table_entry *data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (list_empty(&id_priv->id_list_entry))
+ goto out;
+
+ data = node_from_ndev_ip(&id_table,
+ id_priv->id.route.addr.dev_addr.bound_dev_if,
+ cma_dst_addr(id_priv));
+ if (!data)
+ goto out;
+
+ list_del_init(&id_priv->id_list_entry);
+ if (list_empty(&data->id_list)) {
+ rb_erase(&data->rb_node, &id_table);
+ kfree(data);
+ }
+out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+}
+
static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
- cma_ref_dev(cma_dev);
+ cma_dev_get(cma_dev);
id_priv->cma_dev = cma_dev;
- id_priv->gid_type = 0;
id_priv->id.device = cma_dev->device;
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
- list_add_tail(&id_priv->list, &cma_dev->id_list);
+ list_add_tail(&id_priv->device_item, &cma_dev->id_list);
+
+ trace_cm_id_attach(id_priv, cma_dev->device);
}
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
@@ -496,60 +593,30 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv,
rdma_start_port(cma_dev->device)];
}
-void cma_deref_dev(struct cma_device *cma_dev)
-{
- if (atomic_dec_and_test(&cma_dev->refcount))
- complete(&cma_dev->comp);
-}
-
-static inline void release_mc(struct kref *kref)
-{
- struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
-
- kfree(mc->multicast.ib);
- kfree(mc);
-}
-
static void cma_release_dev(struct rdma_id_private *id_priv)
{
mutex_lock(&lock);
- list_del(&id_priv->list);
- cma_deref_dev(id_priv->cma_dev);
+ list_del_init(&id_priv->device_item);
+ cma_dev_put(id_priv->cma_dev);
id_priv->cma_dev = NULL;
+ id_priv->id.device = NULL;
+ if (id_priv->id.route.addr.dev_addr.sgid_attr) {
+ rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
+ id_priv->id.route.addr.dev_addr.sgid_attr = NULL;
+ }
mutex_unlock(&lock);
}
-static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
-}
-
-static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
-{
- return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
-}
-
static inline unsigned short cma_family(struct rdma_id_private *id_priv)
{
return id_priv->id.route.addr.src_addr.ss_family;
}
-static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
+static int cma_set_default_qkey(struct rdma_id_private *id_priv)
{
struct ib_sa_mcmember_rec rec;
int ret = 0;
- if (id_priv->qkey) {
- if (qkey && id_priv->qkey != qkey)
- return -EINVAL;
- return 0;
- }
-
- if (qkey) {
- id_priv->qkey = qkey;
- return 0;
- }
-
switch (id_priv->id.ps) {
case RDMA_PS_UDP:
case RDMA_PS_IB:
@@ -569,6 +636,16 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
return ret;
}
+static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
+{
+ if (!qkey ||
+ (id_priv->qkey && (id_priv->qkey != qkey)))
+ return -EINVAL;
+
+ id_priv->qkey = qkey;
+ return 0;
+}
+
static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
{
dev_addr->dev_type = ARPHRD_INFINIBAND;
@@ -581,7 +658,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
int ret;
if (addr->sa_family != AF_IB) {
- ret = rdma_translate_ip(addr, dev_addr, NULL);
+ ret = rdma_translate_ip(addr, dev_addr);
} else {
cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
ret = 0;
@@ -590,110 +667,257 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
return ret;
}
-static inline int cma_validate_port(struct ib_device *device, u8 port,
- enum ib_gid_type gid_type,
- union ib_gid *gid, int dev_type,
- int bound_if_index)
+static const struct ib_gid_attr *
+cma_validate_port(struct ib_device *device, u32 port,
+ enum ib_gid_type gid_type,
+ union ib_gid *gid,
+ struct rdma_id_private *id_priv)
{
- int ret = -ENODEV;
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ const struct ib_gid_attr *sgid_attr = ERR_PTR(-ENODEV);
+ int bound_if_index = dev_addr->bound_dev_if;
+ int dev_type = dev_addr->dev_type;
struct net_device *ndev = NULL;
+ struct net_device *pdev = NULL;
+
+ if (!rdma_dev_access_netns(device, id_priv->id.route.addr.dev_addr.net))
+ goto out;
if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
- return ret;
+ goto out;
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
- return ret;
+ goto out;
- if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
- ndev = dev_get_by_index(&init_net, bound_if_index);
- if (ndev && ndev->flags & IFF_LOOPBACK) {
- pr_info("detected loopback device\n");
- dev_put(ndev);
+ /*
+ * For drivers that do not associate more than one net device with
+ * their gid tables, such as iWARP drivers, it is sufficient to
+ * return the first table entry.
+ *
+ * Other driver classes might be included in the future.
+ */
+ if (rdma_protocol_iwarp(device, port)) {
+ sgid_attr = rdma_get_gid_attr(device, port, 0);
+ if (IS_ERR(sgid_attr))
+ goto out;
- if (!device->get_netdev)
- return -EOPNOTSUPP;
+ rcu_read_lock();
+ ndev = rcu_dereference(sgid_attr->ndev);
+ if (ndev->ifindex != bound_if_index) {
+ pdev = dev_get_by_index_rcu(dev_addr->net, bound_if_index);
+ if (pdev) {
+ if (is_vlan_dev(pdev)) {
+ pdev = vlan_dev_real_dev(pdev);
+ if (ndev->ifindex == pdev->ifindex)
+ bound_if_index = pdev->ifindex;
+ }
+ if (is_vlan_dev(ndev)) {
+ pdev = vlan_dev_real_dev(ndev);
+ if (bound_if_index == pdev->ifindex)
+ bound_if_index = ndev->ifindex;
+ }
+ }
+ }
+ if (!net_eq(dev_net(ndev), dev_addr->net) ||
+ ndev->ifindex != bound_if_index) {
+ rdma_put_gid_attr(sgid_attr);
+ sgid_attr = ERR_PTR(-ENODEV);
+ }
+ rcu_read_unlock();
+ goto out;
+ }
- ndev = device->get_netdev(device, port);
+ /*
+ * For a RXE device, it should work with TUN device and normal ethernet
+ * devices. Use driver_id to check if a device is a RXE device or not.
+ * ARPHDR_NONE means a TUN device.
+ */
+ if (device->ops.driver_id == RDMA_DRIVER_RXE) {
+ if ((dev_type == ARPHRD_NONE || dev_type == ARPHRD_ETHER)
+ && rdma_protocol_roce(device, port)) {
+ ndev = dev_get_by_index(dev_addr->net, bound_if_index);
if (!ndev)
- return -ENODEV;
+ goto out;
}
} else {
- gid_type = IB_GID_TYPE_IB;
+ if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
+ ndev = dev_get_by_index(dev_addr->net, bound_if_index);
+ if (!ndev)
+ goto out;
+ } else {
+ gid_type = IB_GID_TYPE_IB;
+ }
}
- ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
- ndev, NULL);
-
- if (ndev)
- dev_put(ndev);
+ sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev);
+ dev_put(ndev);
+out:
+ return sgid_attr;
+}
- return ret;
+static void cma_bind_sgid_attr(struct rdma_id_private *id_priv,
+ const struct ib_gid_attr *sgid_attr)
+{
+ WARN_ON(id_priv->id.route.addr.dev_addr.sgid_attr);
+ id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
}
-static int cma_acquire_dev(struct rdma_id_private *id_priv,
- struct rdma_id_private *listen_id_priv)
+/**
+ * cma_acquire_dev_by_src_ip - Acquire cma device, port, gid attribute
+ * based on source ip address.
+ * @id_priv: cm_id which should be bound to cma device
+ *
+ * cma_acquire_dev_by_src_ip() binds cm id to cma device, port and GID attribute
+ * based on source IP address. It returns 0 on success or error code otherwise.
+ * It is applicable to active and passive side cm_id.
+ */
+static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
- struct cma_device *cma_dev;
+ const struct ib_gid_attr *sgid_attr;
union ib_gid gid, iboe_gid, *gidp;
+ struct cma_device *cma_dev;
+ enum ib_gid_type gid_type;
int ret = -ENODEV;
- u8 port;
+ u32 port;
if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
id_priv->id.ps == RDMA_PS_IPOIB)
return -EINVAL;
- mutex_lock(&lock);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&iboe_gid);
memcpy(&gid, dev_addr->src_dev_addr +
- rdma_addr_gid_offset(dev_addr), sizeof gid);
-
- if (listen_id_priv) {
- cma_dev = listen_id_priv->cma_dev;
- port = listen_id_priv->id.port_num;
- gidp = rdma_protocol_roce(cma_dev->device, port) ?
- &iboe_gid : &gid;
-
- ret = cma_validate_port(cma_dev->device, port,
- rdma_protocol_ib(cma_dev->device, port) ?
- IB_GID_TYPE_IB :
- listen_id_priv->gid_type, gidp,
- dev_addr->dev_type,
- dev_addr->bound_dev_if);
- if (!ret) {
- id_priv->id.port_num = port;
- goto out;
+ rdma_addr_gid_offset(dev_addr), sizeof(gid));
+
+ mutex_lock(&lock);
+ list_for_each_entry(cma_dev, &dev_list, list) {
+ rdma_for_each_port (cma_dev->device, port) {
+ gidp = rdma_protocol_roce(cma_dev->device, port) ?
+ &iboe_gid : &gid;
+ gid_type = cma_dev->default_gid_type[port - 1];
+ sgid_attr = cma_validate_port(cma_dev->device, port,
+ gid_type, gidp, id_priv);
+ if (!IS_ERR(sgid_attr)) {
+ id_priv->id.port_num = port;
+ cma_bind_sgid_attr(id_priv, sgid_attr);
+ cma_attach_to_dev(id_priv, cma_dev);
+ ret = 0;
+ goto out;
+ }
}
}
+out:
+ mutex_unlock(&lock);
+ return ret;
+}
+
+/**
+ * cma_ib_acquire_dev - Acquire cma device, port and SGID attribute
+ * @id_priv: cm id to bind to cma device
+ * @listen_id_priv: listener cm id to match against
+ * @req: Pointer to req structure containaining incoming
+ * request information
+ * cma_ib_acquire_dev() acquires cma device, port and SGID attribute when
+ * rdma device matches for listen_id and incoming request. It also verifies
+ * that a GID table entry is present for the source address.
+ * Returns 0 on success, or returns error code otherwise.
+ */
+static int cma_ib_acquire_dev(struct rdma_id_private *id_priv,
+ const struct rdma_id_private *listen_id_priv,
+ struct cma_req_info *req)
+{
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ const struct ib_gid_attr *sgid_attr;
+ enum ib_gid_type gid_type;
+ union ib_gid gid;
+
+ if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
+ id_priv->id.ps == RDMA_PS_IPOIB)
+ return -EINVAL;
+
+ if (rdma_protocol_roce(req->device, req->port))
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &gid);
+ else
+ memcpy(&gid, dev_addr->src_dev_addr +
+ rdma_addr_gid_offset(dev_addr), sizeof(gid));
+
+ gid_type = listen_id_priv->cma_dev->default_gid_type[req->port - 1];
+ sgid_attr = cma_validate_port(req->device, req->port,
+ gid_type, &gid, id_priv);
+ if (IS_ERR(sgid_attr))
+ return PTR_ERR(sgid_attr);
+
+ id_priv->id.port_num = req->port;
+ cma_bind_sgid_attr(id_priv, sgid_attr);
+ /* Need to acquire lock to protect against reader
+ * of cma_dev->id_list such as cma_netdev_callback() and
+ * cma_process_remove().
+ */
+ mutex_lock(&lock);
+ cma_attach_to_dev(id_priv, listen_id_priv->cma_dev);
+ mutex_unlock(&lock);
+ rdma_restrack_add(&id_priv->res);
+ return 0;
+}
+
+static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
+ const struct rdma_id_private *listen_id_priv)
+{
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ const struct ib_gid_attr *sgid_attr;
+ struct cma_device *cma_dev;
+ enum ib_gid_type gid_type;
+ int ret = -ENODEV;
+ union ib_gid gid;
+ u32 port;
+
+ if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
+ id_priv->id.ps == RDMA_PS_IPOIB)
+ return -EINVAL;
+
+ memcpy(&gid, dev_addr->src_dev_addr +
+ rdma_addr_gid_offset(dev_addr), sizeof(gid));
+
+ mutex_lock(&lock);
+
+ cma_dev = listen_id_priv->cma_dev;
+ port = listen_id_priv->id.port_num;
+ gid_type = listen_id_priv->gid_type;
+ sgid_attr = cma_validate_port(cma_dev->device, port,
+ gid_type, &gid, id_priv);
+ if (!IS_ERR(sgid_attr)) {
+ id_priv->id.port_num = port;
+ cma_bind_sgid_attr(id_priv, sgid_attr);
+ ret = 0;
+ goto out;
+ }
list_for_each_entry(cma_dev, &dev_list, list) {
- for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
- if (listen_id_priv &&
- listen_id_priv->cma_dev == cma_dev &&
+ rdma_for_each_port (cma_dev->device, port) {
+ if (listen_id_priv->cma_dev == cma_dev &&
listen_id_priv->id.port_num == port)
continue;
- gidp = rdma_protocol_roce(cma_dev->device, port) ?
- &iboe_gid : &gid;
-
- ret = cma_validate_port(cma_dev->device, port,
- rdma_protocol_ib(cma_dev->device, port) ?
- IB_GID_TYPE_IB :
- cma_dev->default_gid_type[port - 1],
- gidp, dev_addr->dev_type,
- dev_addr->bound_dev_if);
- if (!ret) {
+ gid_type = cma_dev->default_gid_type[port - 1];
+ sgid_attr = cma_validate_port(cma_dev->device, port,
+ gid_type, &gid, id_priv);
+ if (!IS_ERR(sgid_attr)) {
id_priv->id.port_num = port;
+ cma_bind_sgid_attr(id_priv, sgid_attr);
+ ret = 0;
goto out;
}
}
}
out:
- if (!ret)
+ if (!ret) {
cma_attach_to_dev(id_priv, cma_dev);
+ rdma_restrack_add(&id_priv->res);
+ }
mutex_unlock(&lock);
return ret;
@@ -707,8 +931,10 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
struct cma_device *cma_dev, *cur_dev;
struct sockaddr_ib *addr;
union ib_gid gid, sgid, *dgid;
+ unsigned int p;
u16 pkey, index;
- u8 p;
+ enum ib_port_state port_state;
+ int ret;
int i;
cma_dev = NULL;
@@ -716,17 +942,25 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
dgid = (union ib_gid *) &addr->sib_addr;
pkey = ntohs(addr->sib_pkey);
+ mutex_lock(&lock);
list_for_each_entry(cur_dev, &dev_list, list) {
- for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
+ rdma_for_each_port (cur_dev->device, p) {
if (!rdma_cap_af_ib(cur_dev->device, p))
continue;
if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
continue;
- for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
- &gid, NULL);
- i++) {
+ if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
+ continue;
+
+ for (i = 0; i < cur_dev->device->port_data[p].immutable.gid_tbl_len;
+ ++i) {
+ ret = rdma_query_gid(cur_dev->device, p, i,
+ &gid);
+ if (ret)
+ continue;
+
if (!memcmp(&gid, dgid, sizeof(gid))) {
cma_dev = cur_dev;
sgid = gid;
@@ -735,36 +969,44 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
}
if (!cma_dev && (gid.global.subnet_prefix ==
- dgid->global.subnet_prefix)) {
+ dgid->global.subnet_prefix) &&
+ port_state == IB_PORT_ACTIVE) {
cma_dev = cur_dev;
sgid = gid;
id_priv->id.port_num = p;
+ goto found;
}
}
}
}
-
- if (!cma_dev)
- return -ENODEV;
+ mutex_unlock(&lock);
+ return -ENODEV;
found:
cma_attach_to_dev(id_priv, cma_dev);
- addr = (struct sockaddr_ib *) cma_src_addr(id_priv);
- memcpy(&addr->sib_addr, &sgid, sizeof sgid);
+ rdma_restrack_add(&id_priv->res);
+ mutex_unlock(&lock);
+ addr = (struct sockaddr_ib *)cma_src_addr(id_priv);
+ memcpy(&addr->sib_addr, &sgid, sizeof(sgid));
cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
return 0;
}
-static void cma_deref_id(struct rdma_id_private *id_priv)
+static void cma_id_get(struct rdma_id_private *id_priv)
+{
+ refcount_inc(&id_priv->refcount);
+}
+
+static void cma_id_put(struct rdma_id_private *id_priv)
{
- if (atomic_dec_and_test(&id_priv->refcount))
+ if (refcount_dec_and_test(&id_priv->refcount))
complete(&id_priv->comp);
}
-struct rdma_cm_id *rdma_create_id(struct net *net,
- rdma_cm_event_handler event_handler,
- void *context, enum rdma_port_space ps,
- enum ib_qp_type qp_type)
+static struct rdma_id_private *
+__rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const struct rdma_id_private *parent)
{
struct rdma_id_private *id_priv;
@@ -772,25 +1014,68 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
if (!id_priv)
return ERR_PTR(-ENOMEM);
- id_priv->owner = task_pid_nr(current);
id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context;
id_priv->id.event_handler = event_handler;
id_priv->id.ps = ps;
id_priv->id.qp_type = qp_type;
+ id_priv->tos_set = false;
+ id_priv->timeout_set = false;
+ id_priv->min_rnr_timer_set = false;
+ id_priv->gid_type = IB_GID_TYPE_IB;
spin_lock_init(&id_priv->lock);
mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
- atomic_set(&id_priv->refcount, 1);
+ refcount_set(&id_priv->refcount, 1);
mutex_init(&id_priv->handler_mutex);
+ INIT_LIST_HEAD(&id_priv->device_item);
+ INIT_LIST_HEAD(&id_priv->id_list_entry);
INIT_LIST_HEAD(&id_priv->listen_list);
INIT_LIST_HEAD(&id_priv->mc_list);
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
id_priv->id.route.addr.dev_addr.net = get_net(net);
+ id_priv->seq_num &= 0x00ffffff;
+ INIT_WORK(&id_priv->id.net_work, cma_netevent_work_handler);
- return &id_priv->id;
+ rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID);
+ if (parent)
+ rdma_restrack_parent_name(&id_priv->res, &parent->res);
+
+ return id_priv;
+}
+
+struct rdma_cm_id *
+__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler,
+ void *context, enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type, const char *caller)
+{
+ struct rdma_id_private *ret;
+
+ ret = __rdma_create_id(net, event_handler, context, ps, qp_type, NULL);
+ if (IS_ERR(ret))
+ return ERR_CAST(ret);
+
+ rdma_restrack_set_name(&ret->res, caller);
+ return &ret->id;
}
-EXPORT_SYMBOL(rdma_create_id);
+EXPORT_SYMBOL(__rdma_create_kernel_id);
+
+struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
+ void *context,
+ enum rdma_ucm_port_space ps,
+ enum ib_qp_type qp_type)
+{
+ struct rdma_id_private *ret;
+
+ ret = __rdma_create_id(current->nsproxy->net_ns, event_handler, context,
+ ps, qp_type, NULL);
+ if (IS_ERR(ret))
+ return ERR_CAST(ret);
+
+ rdma_restrack_set_name(&ret->res, NULL);
+ return &ret->id;
+}
+EXPORT_SYMBOL(rdma_create_user_id);
static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
{
@@ -839,27 +1124,34 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
- if (id->device != pd->device)
- return -EINVAL;
+ if (id->device != pd->device) {
+ ret = -EINVAL;
+ goto out_err;
+ }
qp_init_attr->port_num = id->port_num;
qp = ib_create_qp(pd, qp_init_attr);
- if (IS_ERR(qp))
- return PTR_ERR(qp);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto out_err;
+ }
if (id->qp_type == IB_QPT_UD)
ret = cma_init_ud_qp(id_priv, qp);
else
ret = cma_init_conn_qp(id_priv, qp);
if (ret)
- goto err;
+ goto out_destroy;
id->qp = qp;
id_priv->qp_num = qp->qp_num;
id_priv->srq = (qp->srq != NULL);
+ trace_cm_qp_create(id_priv, pd, qp_init_attr, 0);
return 0;
-err:
+out_destroy:
ib_destroy_qp(qp);
+out_err:
+ trace_cm_qp_create(id_priv, pd, qp_init_attr, ret);
return ret;
}
EXPORT_SYMBOL(rdma_create_qp);
@@ -869,6 +1161,7 @@ void rdma_destroy_qp(struct rdma_cm_id *id)
struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
+ trace_cm_qp_destroy(id_priv);
mutex_lock(&id_priv->qp_mutex);
ib_destroy_qp(id_priv->id.qp);
id_priv->id.qp = NULL;
@@ -881,7 +1174,6 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
{
struct ib_qp_attr qp_attr;
int qp_attr_mask, ret;
- union ib_gid sgid;
mutex_lock(&id_priv->qp_mutex);
if (!id_priv->id.qp) {
@@ -904,11 +1196,6 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
if (ret)
goto out;
- ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
- qp_attr.ah_attr.grh.sgid_index, &sgid, NULL);
- if (ret)
- goto out;
-
BUG_ON(id_priv->cma_dev->device != id_priv->id.device);
if (conn_param)
@@ -983,7 +1270,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
if (id_priv->id.qp_type == IB_QPT_UD) {
- ret = cma_set_qkey(id_priv, 0);
+ ret = cma_set_default_qkey(id_priv);
if (ret)
return ret;
@@ -1019,65 +1306,90 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
} else
ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
qp_attr_mask);
- } else
+ qp_attr->port_num = id_priv->id.port_num;
+ *qp_attr_mask |= IB_QP_PORT;
+ } else {
ret = -ENOSYS;
+ }
+
+ if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
+ qp_attr->timeout = id_priv->timeout;
+
+ if ((*qp_attr_mask & IB_QP_MIN_RNR_TIMER) && id_priv->min_rnr_timer_set)
+ qp_attr->min_rnr_timer = id_priv->min_rnr_timer;
return ret;
}
EXPORT_SYMBOL(rdma_init_qp_attr);
-static inline int cma_zero_addr(struct sockaddr *addr)
+static inline bool cma_zero_addr(const struct sockaddr *addr)
{
switch (addr->sa_family) {
case AF_INET:
return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
case AF_INET6:
- return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr);
+ return ipv6_addr_any(&((struct sockaddr_in6 *)addr)->sin6_addr);
case AF_IB:
- return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr);
+ return ib_addr_any(&((struct sockaddr_ib *)addr)->sib_addr);
default:
- return 0;
+ return false;
}
}
-static inline int cma_loopback_addr(struct sockaddr *addr)
+static inline bool cma_loopback_addr(const struct sockaddr *addr)
{
switch (addr->sa_family) {
case AF_INET:
- return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
+ return ipv4_is_loopback(
+ ((struct sockaddr_in *)addr)->sin_addr.s_addr);
case AF_INET6:
- return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr);
+ return ipv6_addr_loopback(
+ &((struct sockaddr_in6 *)addr)->sin6_addr);
case AF_IB:
- return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr);
+ return ib_addr_loopback(
+ &((struct sockaddr_ib *)addr)->sib_addr);
default:
- return 0;
+ return false;
}
}
-static inline int cma_any_addr(struct sockaddr *addr)
+static inline bool cma_any_addr(const struct sockaddr *addr)
{
return cma_zero_addr(addr) || cma_loopback_addr(addr);
}
-static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
+static int cma_addr_cmp(const struct sockaddr *src, const struct sockaddr *dst)
{
if (src->sa_family != dst->sa_family)
return -1;
switch (src->sa_family) {
case AF_INET:
- return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
- ((struct sockaddr_in *) dst)->sin_addr.s_addr;
- case AF_INET6:
- return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
- &((struct sockaddr_in6 *) dst)->sin6_addr);
+ return ((struct sockaddr_in *)src)->sin_addr.s_addr !=
+ ((struct sockaddr_in *)dst)->sin_addr.s_addr;
+ case AF_INET6: {
+ struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *)src;
+ struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst;
+ bool link_local;
+
+ if (ipv6_addr_cmp(&src_addr6->sin6_addr,
+ &dst_addr6->sin6_addr))
+ return 1;
+ link_local = ipv6_addr_type(&dst_addr6->sin6_addr) &
+ IPV6_ADDR_LINKLOCAL;
+ /* Link local must match their scope_ids */
+ return link_local ? (src_addr6->sin6_scope_id !=
+ dst_addr6->sin6_scope_id) :
+ 0;
+ }
+
default:
return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr,
&((struct sockaddr_ib *) dst)->sib_addr);
}
}
-static __be16 cma_port(struct sockaddr *addr)
+static __be16 cma_port(const struct sockaddr *addr)
{
struct sockaddr_ib *sib;
@@ -1095,15 +1407,15 @@ static __be16 cma_port(struct sockaddr *addr)
}
}
-static inline int cma_any_port(struct sockaddr *addr)
+static inline int cma_any_port(const struct sockaddr *addr)
{
return !cma_port(addr);
}
static void cma_save_ib_info(struct sockaddr *src_addr,
struct sockaddr *dst_addr,
- struct rdma_cm_id *listen_id,
- struct ib_sa_path_rec *path)
+ const struct rdma_cm_id *listen_id,
+ const struct sa_path_rec *path)
{
struct sockaddr_ib *listen_ib, *ib;
@@ -1188,7 +1500,7 @@ static u16 cma_port_from_service_id(__be64 service_id)
static int cma_save_ip_info(struct sockaddr *src_addr,
struct sockaddr *dst_addr,
- struct ib_cm_event *ib_event,
+ const struct ib_cm_event *ib_event,
__be64 service_id)
{
struct cma_hdr *hdr;
@@ -1218,8 +1530,8 @@ static int cma_save_ip_info(struct sockaddr *src_addr,
static int cma_save_net_info(struct sockaddr *src_addr,
struct sockaddr *dst_addr,
- struct rdma_cm_id *listen_id,
- struct ib_cm_event *ib_event,
+ const struct rdma_cm_id *listen_id,
+ const struct ib_cm_event *ib_event,
sa_family_t sa_family, __be64 service_id)
{
if (sa_family == AF_IB) {
@@ -1249,7 +1561,7 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event,
memcpy(&req->local_gid, &req_param->primary_path->sgid,
sizeof(req->local_gid));
req->has_gid = true;
- req->service_id = req_param->primary_path->service_id;
+ req->service_id = req_param->primary_path->service_id;
req->pkey = be16_to_cpu(req_param->primary_path->pkey);
if (req->pkey != req_param->bth_pkey)
pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n"
@@ -1292,7 +1604,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev,
return false;
memset(&fl4, 0, sizeof(fl4));
- fl4.flowi4_iif = net_dev->ifindex;
+ fl4.flowi4_oif = net_dev->ifindex;
fl4.daddr = daddr;
fl4.saddr = saddr;
@@ -1313,7 +1625,7 @@ static bool validate_ipv6_net_dev(struct net_device *net_dev,
IPV6_ADDR_LINKLOCAL;
struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr,
&src_addr->sin6_addr, net_dev->ifindex,
- strict);
+ NULL, strict);
bool ret;
if (!rt)
@@ -1351,12 +1663,36 @@ static bool validate_net_dev(struct net_device *net_dev,
}
}
-static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
- const struct cma_req_info *req)
+static struct net_device *
+roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event)
{
- struct sockaddr_storage listen_addr_storage, src_addr_storage;
- struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage,
- *src_addr = (struct sockaddr *)&src_addr_storage;
+ const struct ib_gid_attr *sgid_attr = NULL;
+ struct net_device *ndev;
+
+ if (ib_event->event == IB_CM_REQ_RECEIVED)
+ sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr;
+ else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
+ sgid_attr = ib_event->param.sidr_req_rcvd.sgid_attr;
+
+ if (!sgid_attr)
+ return NULL;
+
+ rcu_read_lock();
+ ndev = rdma_read_gid_attr_ndev_rcu(sgid_attr);
+ if (IS_ERR(ndev))
+ ndev = NULL;
+ else
+ dev_hold(ndev);
+ rcu_read_unlock();
+ return ndev;
+}
+
+static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event,
+ struct cma_req_info *req)
+{
+ struct sockaddr *listen_addr =
+ (struct sockaddr *)&req->listen_addr_storage;
+ struct sockaddr *src_addr = (struct sockaddr *)&req->src_addr_storage;
struct net_device *net_dev;
const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL;
int err;
@@ -1366,20 +1702,19 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
if (err)
return ERR_PTR(err);
- net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey,
- gid, listen_addr);
+ if (rdma_protocol_roce(req->device, req->port))
+ net_dev = roce_get_net_dev_by_cm_event(ib_event);
+ else
+ net_dev = ib_get_net_dev_by_params(req->device, req->port,
+ req->pkey,
+ gid, listen_addr);
if (!net_dev)
return ERR_PTR(-ENODEV);
- if (!validate_net_dev(net_dev, listen_addr, src_addr)) {
- dev_put(net_dev);
- return ERR_PTR(-EHOSTUNREACH);
- }
-
return net_dev;
}
-static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id)
+static enum rdma_ucm_port_space rdma_ps_from_service_id(__be64 service_id)
{
return (be64_to_cpu(service_id) >> 16) & 0xffff;
}
@@ -1420,38 +1755,52 @@ static bool cma_match_private_data(struct rdma_id_private *id_priv,
return true;
}
-static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num)
+static bool cma_protocol_roce(const struct rdma_cm_id *id)
{
- enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num);
- enum rdma_transport_type transport =
- rdma_node_get_transport(device->node_type);
+ struct ib_device *device = id->device;
+ const u32 port_num = id->port_num ?: rdma_start_port(device);
- return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB;
+ return rdma_protocol_roce(device, port_num);
}
-static bool cma_protocol_roce(const struct rdma_cm_id *id)
+static bool cma_is_req_ipv6_ll(const struct cma_req_info *req)
{
- struct ib_device *device = id->device;
- const int port_num = id->port_num ?: rdma_start_port(device);
+ const struct sockaddr *daddr =
+ (const struct sockaddr *)&req->listen_addr_storage;
+ const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
- return cma_protocol_roce_dev_port(device, port_num);
+ /* Returns true if the req is for IPv6 link local */
+ return (daddr->sa_family == AF_INET6 &&
+ (ipv6_addr_type(&daddr6->sin6_addr) & IPV6_ADDR_LINKLOCAL));
}
static bool cma_match_net_dev(const struct rdma_cm_id *id,
const struct net_device *net_dev,
- u8 port_num)
+ const struct cma_req_info *req)
{
const struct rdma_addr *addr = &id->route.addr;
if (!net_dev)
- /* This request is an AF_IB request or a RoCE request */
- return (!id->port_num || id->port_num == port_num) &&
- (addr->src_addr.ss_family == AF_IB ||
- cma_protocol_roce_dev_port(id->device, port_num));
+ /* This request is an AF_IB request */
+ return (!id->port_num || id->port_num == req->port) &&
+ (addr->src_addr.ss_family == AF_IB);
- return !addr->dev_addr.bound_dev_if ||
- (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
- addr->dev_addr.bound_dev_if == net_dev->ifindex);
+ /*
+ * If the request is not for IPv6 link local, allow matching
+ * request to any netdevice of the one or multiport rdma device.
+ */
+ if (!cma_is_req_ipv6_ll(req))
+ return true;
+ /*
+ * Net namespaces must match, and if the listner is listening
+ * on a specific netdevice than netdevice must match as well.
+ */
+ if (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
+ (!!addr->dev_addr.bound_dev_if ==
+ (addr->dev_addr.bound_dev_if == net_dev->ifindex)))
+ return true;
+ else
+ return false;
}
static struct rdma_id_private *cma_find_listener(
@@ -1463,19 +1812,22 @@ static struct rdma_id_private *cma_find_listener(
{
struct rdma_id_private *id_priv, *id_priv_dev;
+ lockdep_assert_held(&lock);
+
if (!bind_list)
return ERR_PTR(-EINVAL);
hlist_for_each_entry(id_priv, &bind_list->owners, node) {
if (cma_match_private_data(id_priv, ib_event->private_data)) {
if (id_priv->id.device == cm_id->device &&
- cma_match_net_dev(&id_priv->id, net_dev, req->port))
+ cma_match_net_dev(&id_priv->id, net_dev, req))
return id_priv;
list_for_each_entry(id_priv_dev,
&id_priv->listen_list,
- listen_list) {
+ listen_item) {
if (id_priv_dev->id.device == cm_id->device &&
- cma_match_net_dev(&id_priv_dev->id, net_dev, req->port))
+ cma_match_net_dev(&id_priv_dev->id,
+ net_dev, req))
return id_priv_dev;
}
}
@@ -1484,46 +1836,81 @@ static struct rdma_id_private *cma_find_listener(
return ERR_PTR(-EINVAL);
}
-static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
- struct ib_cm_event *ib_event,
- struct net_device **net_dev)
+static struct rdma_id_private *
+cma_ib_id_from_event(struct ib_cm_id *cm_id,
+ const struct ib_cm_event *ib_event,
+ struct cma_req_info *req,
+ struct net_device **net_dev)
{
- struct cma_req_info req;
struct rdma_bind_list *bind_list;
struct rdma_id_private *id_priv;
int err;
- err = cma_save_req_info(ib_event, &req);
+ err = cma_save_req_info(ib_event, req);
if (err)
return ERR_PTR(err);
- *net_dev = cma_get_net_dev(ib_event, &req);
+ *net_dev = cma_get_net_dev(ib_event, req);
if (IS_ERR(*net_dev)) {
if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
/* Assuming the protocol is AF_IB */
*net_dev = NULL;
- } else if (cma_protocol_roce_dev_port(req.device, req.port)) {
- /* TODO find the net dev matching the request parameters
- * through the RoCE GID table */
- *net_dev = NULL;
} else {
return ERR_CAST(*net_dev);
}
}
+ mutex_lock(&lock);
+ /*
+ * Net namespace might be getting deleted while route lookup,
+ * cm_id lookup is in progress. Therefore, perform netdevice
+ * validation, cm_id lookup under rcu lock.
+ * RCU lock along with netdevice state check, synchronizes with
+ * netdevice migrating to different net namespace and also avoids
+ * case where net namespace doesn't get deleted while lookup is in
+ * progress.
+ * If the device state is not IFF_UP, its properties such as ifindex
+ * and nd_net cannot be trusted to remain valid without rcu lock.
+ * net/core/dev.c change_net_namespace() ensures to synchronize with
+ * ongoing operations on net device after device is closed using
+ * synchronize_net().
+ */
+ rcu_read_lock();
+ if (*net_dev) {
+ /*
+ * If netdevice is down, it is likely that it is administratively
+ * down or it might be migrating to different namespace.
+ * In that case avoid further processing, as the net namespace
+ * or ifindex may change.
+ */
+ if (((*net_dev)->flags & IFF_UP) == 0) {
+ id_priv = ERR_PTR(-EHOSTUNREACH);
+ goto err;
+ }
+
+ if (!validate_net_dev(*net_dev,
+ (struct sockaddr *)&req->src_addr_storage,
+ (struct sockaddr *)&req->listen_addr_storage)) {
+ id_priv = ERR_PTR(-EHOSTUNREACH);
+ goto err;
+ }
+ }
+
bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
- rdma_ps_from_service_id(req.service_id),
- cma_port_from_service_id(req.service_id));
- id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
+ rdma_ps_from_service_id(req->service_id),
+ cma_port_from_service_id(req->service_id));
+ id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev);
+err:
+ rcu_read_unlock();
+ mutex_unlock(&lock);
if (IS_ERR(id_priv) && *net_dev) {
dev_put(*net_dev);
*net_dev = NULL;
}
-
return id_priv;
}
-static inline int cma_user_data_offset(struct rdma_id_private *id_priv)
+static inline u8 cma_user_data_offset(struct rdma_id_private *id_priv)
{
return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr);
}
@@ -1536,28 +1923,36 @@ static void cma_cancel_route(struct rdma_id_private *id_priv)
}
}
-static void cma_cancel_listens(struct rdma_id_private *id_priv)
+static void _cma_cancel_listens(struct rdma_id_private *id_priv)
{
struct rdma_id_private *dev_id_priv;
+ lockdep_assert_held(&lock);
+
/*
* Remove from listen_any_list to prevent added devices from spawning
* additional listen requests.
*/
- mutex_lock(&lock);
- list_del(&id_priv->list);
+ list_del_init(&id_priv->listen_any_item);
while (!list_empty(&id_priv->listen_list)) {
- dev_id_priv = list_entry(id_priv->listen_list.next,
- struct rdma_id_private, listen_list);
+ dev_id_priv =
+ list_first_entry(&id_priv->listen_list,
+ struct rdma_id_private, listen_item);
/* sync with device removal to avoid duplicate destruction */
- list_del_init(&dev_id_priv->list);
- list_del(&dev_id_priv->listen_list);
+ list_del_init(&dev_id_priv->device_item);
+ list_del_init(&dev_id_priv->listen_item);
mutex_unlock(&lock);
rdma_destroy_id(&dev_id_priv->id);
mutex_lock(&lock);
}
+}
+
+static void cma_cancel_listens(struct rdma_id_private *id_priv)
+{
+ mutex_lock(&lock);
+ _cma_cancel_listens(id_priv);
mutex_unlock(&lock);
}
@@ -1566,6 +1961,14 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
{
switch (state) {
case RDMA_CM_ADDR_QUERY:
+ /*
+ * We can avoid doing the rdma_addr_cancel() based on state,
+ * only RDMA_CM_ADDR_QUERY has a work that could still execute.
+ * Notice that the addr_handler work could still be exiting
+ * outside this state, however due to the interaction with the
+ * handler_mutex the work is guaranteed not to touch id_priv
+ * during exit.
+ */
rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
break;
case RDMA_CM_ROUTE_QUERY:
@@ -1597,55 +2000,60 @@ static void cma_release_port(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
+static void destroy_mc(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
+{
+ bool send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
+
+ if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num))
+ ib_sa_free_multicast(mc->sa_mc);
+
+ if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
+ struct rdma_dev_addr *dev_addr =
+ &id_priv->id.route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(dev_addr->net,
+ dev_addr->bound_dev_if);
+ if (ndev && !send_only) {
+ enum ib_gid_type gid_type;
+ union ib_gid mgid;
+
+ gid_type = id_priv->cma_dev->default_gid_type
+ [id_priv->id.port_num -
+ rdma_start_port(
+ id_priv->cma_dev->device)];
+ cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid,
+ gid_type);
+ cma_igmp_send(ndev, &mgid, false);
+ }
+ dev_put(ndev);
+
+ cancel_work_sync(&mc->iboe_join.work);
+ }
+ kfree(mc);
+}
+
static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
{
struct cma_multicast *mc;
while (!list_empty(&id_priv->mc_list)) {
- mc = container_of(id_priv->mc_list.next,
- struct cma_multicast, list);
+ mc = list_first_entry(&id_priv->mc_list, struct cma_multicast,
+ list);
list_del(&mc->list);
- if (rdma_cap_ib_mcast(id_priv->cma_dev->device,
- id_priv->id.port_num)) {
- ib_sa_free_multicast(mc->multicast.ib);
- kfree(mc);
- } else {
- if (mc->igmp_joined) {
- struct rdma_dev_addr *dev_addr =
- &id_priv->id.route.addr.dev_addr;
- struct net_device *ndev = NULL;
-
- if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(&init_net,
- dev_addr->bound_dev_if);
- if (ndev) {
- cma_igmp_send(ndev,
- &mc->multicast.ib->rec.mgid,
- false);
- dev_put(ndev);
- }
- }
- kref_put(&mc->mcref, release_mc);
- }
+ destroy_mc(id_priv, mc);
}
}
-void rdma_destroy_id(struct rdma_cm_id *id)
+static void _destroy_id(struct rdma_id_private *id_priv,
+ enum rdma_cm_state state)
{
- struct rdma_id_private *id_priv;
- enum rdma_cm_state state;
-
- id_priv = container_of(id, struct rdma_id_private, id);
- state = cma_exch(id_priv, RDMA_CM_DESTROYING);
cma_cancel_operation(id_priv, state);
- /*
- * Wait for any active callback to finish. New callbacks will find
- * the id_priv state set to destroying and abort.
- */
- mutex_lock(&id_priv->handler_mutex);
- mutex_unlock(&id_priv->handler_mutex);
-
+ rdma_restrack_del(&id_priv->res);
+ cma_remove_id_from_tree(id_priv);
if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib)
@@ -1659,16 +2067,56 @@ void rdma_destroy_id(struct rdma_cm_id *id)
}
cma_release_port(id_priv);
- cma_deref_id(id_priv);
+ cma_id_put(id_priv);
wait_for_completion(&id_priv->comp);
if (id_priv->internal_id)
- cma_deref_id(id_priv->id.context);
+ cma_id_put(id_priv->id.context);
kfree(id_priv->id.route.path_rec);
+ kfree(id_priv->id.route.path_rec_inbound);
+ kfree(id_priv->id.route.path_rec_outbound);
+ kfree(id_priv->id.route.service_recs);
+
put_net(id_priv->id.route.addr.dev_addr.net);
kfree(id_priv);
}
+
+/*
+ * destroy an ID from within the handler_mutex. This ensures that no other
+ * handlers can start running concurrently.
+ */
+static void destroy_id_handler_unlock(struct rdma_id_private *id_priv)
+ __releases(&idprv->handler_mutex)
+{
+ enum rdma_cm_state state;
+ unsigned long flags;
+
+ trace_cm_id_destroy(id_priv);
+
+ /*
+ * Setting the state to destroyed under the handler mutex provides a
+ * fence against calling handler callbacks. If this is invoked due to
+ * the failure of a handler callback then it guarentees that no future
+ * handlers will be called.
+ */
+ lockdep_assert_held(&id_priv->handler_mutex);
+ spin_lock_irqsave(&id_priv->lock, flags);
+ state = id_priv->state;
+ id_priv->state = RDMA_CM_DESTROYING;
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ mutex_unlock(&id_priv->handler_mutex);
+ _destroy_id(id_priv, state);
+}
+
+void rdma_destroy_id(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_lock(&id_priv->handler_mutex);
+ destroy_id_handler_unlock(id_priv);
+}
EXPORT_SYMBOL(rdma_destroy_id);
static int cma_rep_recv(struct rdma_id_private *id_priv)
@@ -1683,20 +2131,23 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
if (ret)
goto reject;
+ trace_cm_send_rtu(id_priv);
ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
if (ret)
goto reject;
return 0;
reject:
+ pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret);
cma_modify_qp_err(id_priv);
+ trace_cm_send_rej(id_priv);
ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
return ret;
}
static void cma_set_rep_event_data(struct rdma_cm_event *event,
- struct ib_cm_rep_event_param *rep_data,
+ const struct ib_cm_rep_event_param *rep_data,
void *private_data)
{
event->param.conn.private_data = private_data;
@@ -1707,22 +2158,40 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event,
event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
event->param.conn.srq = rep_data->srq;
event->param.conn.qp_num = rep_data->remote_qpn;
+
+ event->ece.vendor_id = rep_data->ece.vendor_id;
+ event->ece.attr_mod = rep_data->ece.attr_mod;
+}
+
+static int cma_cm_event_handler(struct rdma_id_private *id_priv,
+ struct rdma_cm_event *event)
+{
+ int ret;
+
+ lockdep_assert_held(&id_priv->handler_mutex);
+
+ trace_cm_event_handler(id_priv, event);
+ ret = id_priv->id.event_handler(&id_priv->id, event);
+ trace_cm_event_done(id_priv, event, ret);
+ return ret;
}
-static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
+static int cma_ib_handler(struct ib_cm_id *cm_id,
+ const struct ib_cm_event *ib_event)
{
struct rdma_id_private *id_priv = cm_id->context;
- struct rdma_cm_event event;
- int ret = 0;
+ struct rdma_cm_event event = {};
+ enum rdma_cm_state state;
+ int ret;
mutex_lock(&id_priv->handler_mutex);
+ state = READ_ONCE(id_priv->state);
if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
- id_priv->state != RDMA_CM_CONNECT) ||
+ state != RDMA_CM_CONNECT) ||
(ib_event->event == IB_CM_TIMEWAIT_EXIT &&
- id_priv->state != RDMA_CM_DISCONNECT))
+ state != RDMA_CM_DISCONNECT))
goto out;
- memset(&event, 0, sizeof event);
switch (ib_event->event) {
case IB_CM_REQ_ERROR:
case IB_CM_REP_ERROR:
@@ -1730,6 +2199,11 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
event.status = -ETIMEDOUT;
break;
case IB_CM_REP_RECEIVED:
+ if (state == RDMA_CM_CONNECT &&
+ (id_priv->id.qp_type != IB_QPT_UD)) {
+ trace_cm_prepare_mra(id_priv);
+ ib_prepare_cm_mra(cm_id);
+ }
if (id_priv->id.qp) {
event.status = cma_rep_recv(id_priv);
event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
@@ -1745,7 +2219,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
event.event = RDMA_CM_EVENT_ESTABLISHED;
break;
case IB_CM_DREQ_ERROR:
- event.status = -ETIMEDOUT; /* fall through */
+ event.status = -ETIMEDOUT;
+ fallthrough;
case IB_CM_DREQ_RECEIVED:
case IB_CM_DREP_RECEIVED:
if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT,
@@ -1760,6 +2235,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
/* ignore event */
goto out;
case IB_CM_REJ_RECEIVED:
+ pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv->id,
+ ib_event->param.rej_rcvd.reason));
cma_modify_qp_err(id_priv);
event.status = ib_event->param.rej_rcvd.reason;
event.event = RDMA_CM_EVENT_REJECTED;
@@ -1772,59 +2249,61 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
goto out;
}
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return ret;
}
out:
mutex_unlock(&id_priv->handler_mutex);
- return ret;
+ return 0;
}
-static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
- struct ib_cm_event *ib_event,
- struct net_device *net_dev)
+static struct rdma_id_private *
+cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
+ const struct ib_cm_event *ib_event,
+ struct net_device *net_dev)
{
+ struct rdma_id_private *listen_id_priv;
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
struct rdma_route *rt;
const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
+ struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path;
const __be64 service_id =
- ib_event->param.req_rcvd.primary_path->service_id;
+ ib_event->param.req_rcvd.primary_path->service_id;
int ret;
- id = rdma_create_id(listen_id->route.addr.dev_addr.net,
- listen_id->event_handler, listen_id->context,
- listen_id->ps, ib_event->param.req_rcvd.qp_type);
- if (IS_ERR(id))
+ listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
+ id_priv = __rdma_create_id(listen_id->route.addr.dev_addr.net,
+ listen_id->event_handler, listen_id->context,
+ listen_id->ps,
+ ib_event->param.req_rcvd.qp_type,
+ listen_id_priv);
+ if (IS_ERR(id_priv))
return NULL;
- id_priv = container_of(id, struct rdma_id_private, id);
+ id = &id_priv->id;
if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
(struct sockaddr *)&id->route.addr.dst_addr,
listen_id, ib_event, ss_family, service_id))
goto err;
rt = &id->route;
- rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
- rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,
- GFP_KERNEL);
+ rt->num_pri_alt_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
+ rt->path_rec = kmalloc_array(rt->num_pri_alt_paths,
+ sizeof(*rt->path_rec), GFP_KERNEL);
if (!rt->path_rec)
goto err;
- rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
- if (rt->num_paths == 2)
+ rt->path_rec[0] = *path;
+ if (rt->num_pri_alt_paths == 2)
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
if (net_dev) {
- ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL);
- if (ret)
- goto err;
+ rdma_copy_src_l2_addr(&rt->addr.dev_addr, net_dev);
} else {
if (!cma_protocol_roce(listen_id) &&
cma_any_addr(cma_src_addr(id_priv))) {
@@ -1847,22 +2326,26 @@ err:
return NULL;
}
-static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
- struct ib_cm_event *ib_event,
- struct net_device *net_dev)
+static struct rdma_id_private *
+cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
+ const struct ib_cm_event *ib_event,
+ struct net_device *net_dev)
{
+ const struct rdma_id_private *listen_id_priv;
struct rdma_id_private *id_priv;
struct rdma_cm_id *id;
const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
struct net *net = listen_id->route.addr.dev_addr.net;
int ret;
- id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
- listen_id->ps, IB_QPT_UD);
- if (IS_ERR(id))
+ listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
+ id_priv = __rdma_create_id(net, listen_id->event_handler,
+ listen_id->context, listen_id->ps, IB_QPT_UD,
+ listen_id_priv);
+ if (IS_ERR(id_priv))
return NULL;
- id_priv = container_of(id, struct rdma_id_private, id);
+ id = &id_priv->id;
if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr,
(struct sockaddr *)&id->route.addr.dst_addr,
listen_id, ib_event, ss_family,
@@ -1870,9 +2353,7 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
goto err;
if (net_dev) {
- ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL);
- if (ret)
- goto err;
+ rdma_copy_src_l2_addr(&id->route.addr.dev_addr, net_dev);
} else {
if (!cma_any_addr(cma_src_addr(id_priv))) {
ret = cma_translate_addr(cma_src_addr(id_priv),
@@ -1890,7 +2371,7 @@ err:
}
static void cma_set_req_event_data(struct rdma_cm_event *event,
- struct ib_cm_req_event_param *req_data,
+ const struct ib_cm_req_event_param *req_data,
void *private_data, int offset)
{
event->param.conn.private_data = private_data + offset;
@@ -1902,9 +2383,13 @@ static void cma_set_req_event_data(struct rdma_cm_event *event,
event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
event->param.conn.srq = req_data->srq;
event->param.conn.qp_num = req_data->remote_qpn;
+
+ event->ece.vendor_id = req_data->ece.vendor_id;
+ event->ece.attr_mod = req_data->ece.attr_mod;
}
-static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
+static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id,
+ const struct ib_cm_event *ib_event)
{
return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
(ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
@@ -1913,94 +2398,81 @@ static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_e
(!id->qp_type));
}
-static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
+static int cma_ib_req_handler(struct ib_cm_id *cm_id,
+ const struct ib_cm_event *ib_event)
{
struct rdma_id_private *listen_id, *conn_id = NULL;
- struct rdma_cm_event event;
+ struct rdma_cm_event event = {};
+ struct cma_req_info req = {};
struct net_device *net_dev;
- int offset, ret;
+ u8 offset;
+ int ret;
- listen_id = cma_id_from_event(cm_id, ib_event, &net_dev);
+ listen_id = cma_ib_id_from_event(cm_id, ib_event, &req, &net_dev);
if (IS_ERR(listen_id))
return PTR_ERR(listen_id);
- if (!cma_check_req_qp_type(&listen_id->id, ib_event)) {
+ trace_cm_req_handler(listen_id, ib_event->event);
+ if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) {
ret = -EINVAL;
goto net_dev_put;
}
mutex_lock(&listen_id->handler_mutex);
- if (listen_id->state != RDMA_CM_LISTEN) {
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) {
ret = -ECONNABORTED;
- goto err1;
+ goto err_unlock;
}
- memset(&event, 0, sizeof event);
offset = cma_user_data_offset(listen_id);
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
- conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev);
+ conn_id = cma_ib_new_udp_id(&listen_id->id, ib_event, net_dev);
event.param.ud.private_data = ib_event->private_data + offset;
event.param.ud.private_data_len =
IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
} else {
- conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev);
+ conn_id = cma_ib_new_conn_id(&listen_id->id, ib_event, net_dev);
cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
ib_event->private_data, offset);
}
if (!conn_id) {
ret = -ENOMEM;
- goto err1;
+ goto err_unlock;
}
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
- ret = cma_acquire_dev(conn_id, listen_id);
- if (ret)
- goto err2;
+ ret = cma_ib_acquire_dev(conn_id, listen_id, &req);
+ if (ret) {
+ destroy_id_handler_unlock(conn_id);
+ goto err_unlock;
+ }
conn_id->cm_id.ib = cm_id;
cm_id->context = conn_id;
cm_id->cm_handler = cma_ib_handler;
- /*
- * Protect against the user destroying conn_id from another thread
- * until we're done accessing it.
- */
- atomic_inc(&conn_id->refcount);
- ret = conn_id->id.event_handler(&conn_id->id, &event);
- if (ret)
- goto err3;
- /*
- * Acquire mutex to prevent user executing rdma_destroy_id()
- * while we're accessing the cm_id.
- */
- mutex_lock(&lock);
- if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
- (conn_id->id.qp_type != IB_QPT_UD))
- ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
- mutex_unlock(&lock);
- mutex_unlock(&conn_id->handler_mutex);
- mutex_unlock(&listen_id->handler_mutex);
- cma_deref_id(conn_id);
- if (net_dev)
- dev_put(net_dev);
- return 0;
+ ret = cma_cm_event_handler(conn_id, &event);
+ if (ret) {
+ /* Destroy the CM ID by returning a non-zero value. */
+ conn_id->cm_id.ib = NULL;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ goto net_dev_put;
+ }
-err3:
- cma_deref_id(conn_id);
- /* Destroy the CM ID by returning a non-zero value. */
- conn_id->cm_id.ib = NULL;
-err2:
- cma_exch(conn_id, RDMA_CM_DESTROYING);
+ if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT &&
+ conn_id->id.qp_type != IB_QPT_UD) {
+ trace_cm_prepare_mra(cm_id->context);
+ ib_prepare_cm_mra(cm_id);
+ }
mutex_unlock(&conn_id->handler_mutex);
-err1:
+
+err_unlock:
mutex_unlock(&listen_id->handler_mutex);
- if (conn_id)
- rdma_destroy_id(&conn_id->id);
net_dev_put:
- if (net_dev)
- dev_put(net_dev);
+ dev_put(net_dev);
return ret;
}
@@ -2014,19 +2486,45 @@ __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
}
EXPORT_SYMBOL(rdma_get_service_id);
+void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid,
+ union ib_gid *dgid)
+{
+ struct rdma_addr *addr = &cm_id->route.addr;
+
+ if (!cm_id->device) {
+ if (sgid)
+ memset(sgid, 0, sizeof(*sgid));
+ if (dgid)
+ memset(dgid, 0, sizeof(*dgid));
+ return;
+ }
+
+ if (rdma_protocol_roce(cm_id->device, cm_id->port_num)) {
+ if (sgid)
+ rdma_ip2gid((struct sockaddr *)&addr->src_addr, sgid);
+ if (dgid)
+ rdma_ip2gid((struct sockaddr *)&addr->dst_addr, dgid);
+ } else {
+ if (sgid)
+ rdma_addr_get_sgid(&addr->dev_addr, sgid);
+ if (dgid)
+ rdma_addr_get_dgid(&addr->dev_addr, dgid);
+ }
+}
+EXPORT_SYMBOL(rdma_read_gids);
+
static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
{
struct rdma_id_private *id_priv = iw_id->context;
- struct rdma_cm_event event;
+ struct rdma_cm_event event = {};
int ret = 0;
struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_CONNECT)
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
goto out;
- memset(&event, 0, sizeof event);
switch (iw_event->event) {
case IW_CM_EVENT_CLOSE:
event.event = RDMA_CM_EVENT_DISCONNECTED;
@@ -2060,19 +2558,17 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
event.param.conn.responder_resources = iw_event->ord;
break;
default:
- BUG_ON(1);
+ goto out;
}
event.status = iw_event->status;
event.param.conn.private_data = iw_event->private_data;
event.param.conn.private_data_len = iw_event->private_data_len;
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.iw = NULL;
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return ret;
}
@@ -2084,44 +2580,48 @@ out:
static int iw_conn_req_handler(struct iw_cm_id *cm_id,
struct iw_cm_event *iw_event)
{
- struct rdma_cm_id *new_cm_id;
struct rdma_id_private *listen_id, *conn_id;
- struct rdma_cm_event event;
+ struct rdma_cm_event event = {};
int ret = -ECONNABORTED;
struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
+ event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
+ event.param.conn.private_data = iw_event->private_data;
+ event.param.conn.private_data_len = iw_event->private_data_len;
+ event.param.conn.initiator_depth = iw_event->ird;
+ event.param.conn.responder_resources = iw_event->ord;
+
listen_id = cm_id->context;
mutex_lock(&listen_id->handler_mutex);
- if (listen_id->state != RDMA_CM_LISTEN)
+ if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN)
goto out;
/* Create a new RDMA id for the new IW CM ID */
- new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+ conn_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
listen_id->id.event_handler,
- listen_id->id.context,
- RDMA_PS_TCP, IB_QPT_RC);
- if (IS_ERR(new_cm_id)) {
+ listen_id->id.context, RDMA_PS_TCP,
+ IB_QPT_RC, listen_id);
+ if (IS_ERR(conn_id)) {
ret = -ENOMEM;
goto out;
}
- conn_id = container_of(new_cm_id, struct rdma_id_private, id);
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
conn_id->state = RDMA_CM_CONNECT;
- ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
+ ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
if (ret) {
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ return ret;
}
- ret = cma_acquire_dev(conn_id, listen_id);
+ ret = cma_iw_acquire_dev(conn_id, listen_id);
if (ret) {
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ return ret;
}
conn_id->cm_id.iw = cm_id;
@@ -2131,31 +2631,16 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
- memset(&event, 0, sizeof event);
- event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
- event.param.conn.private_data = iw_event->private_data;
- event.param.conn.private_data_len = iw_event->private_data_len;
- event.param.conn.initiator_depth = iw_event->ird;
- event.param.conn.responder_resources = iw_event->ord;
-
- /*
- * Protect against the user destroying conn_id from another thread
- * until we're done accessing it.
- */
- atomic_inc(&conn_id->refcount);
- ret = conn_id->id.event_handler(&conn_id->id, &event);
+ ret = cma_cm_event_handler(conn_id, &event);
if (ret) {
/* User wants to destroy the CM ID */
conn_id->cm_id.iw = NULL;
- cma_exch(conn_id, RDMA_CM_DESTROYING);
- mutex_unlock(&conn_id->handler_mutex);
- cma_deref_id(conn_id);
- rdma_destroy_id(&conn_id->id);
- goto out;
+ mutex_unlock(&listen_id->handler_mutex);
+ destroy_id_handler_unlock(conn_id);
+ return ret;
}
mutex_unlock(&conn_id->handler_mutex);
- cma_deref_id(conn_id);
out:
mutex_unlock(&listen_id->handler_mutex);
@@ -2170,7 +2655,8 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
addr = cma_src_addr(id_priv);
svc_id = rdma_get_service_id(&id_priv->id, addr);
- id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id);
+ id = ib_cm_insert_listen(id_priv->id.device,
+ cma_ib_req_handler, svc_id);
if (IS_ERR(id))
return PTR_ERR(id);
id_priv->cm_id.ib = id;
@@ -2189,7 +2675,11 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
if (IS_ERR(id))
return PTR_ERR(id);
+ mutex_lock(&id_priv->qp_mutex);
id->tos = id_priv->tos;
+ id->tos_set = id_priv->tos_set;
+ mutex_unlock(&id_priv->qp_mutex);
+ id->afonly = id_priv->afonly;
id_priv->cm_id.iw = id;
memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
@@ -2210,54 +2700,88 @@ static int cma_listen_handler(struct rdma_cm_id *id,
{
struct rdma_id_private *id_priv = id->context;
+ /* Listening IDs are always destroyed on removal */
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
+ return -1;
+
id->context = id_priv->id.context;
id->event_handler = id_priv->id.event_handler;
+ trace_cm_event_handler(id_priv, event);
return id_priv->id.event_handler(id, event);
}
-static void cma_listen_on_dev(struct rdma_id_private *id_priv,
- struct cma_device *cma_dev)
+static int cma_listen_on_dev(struct rdma_id_private *id_priv,
+ struct cma_device *cma_dev,
+ struct rdma_id_private **to_destroy)
{
struct rdma_id_private *dev_id_priv;
- struct rdma_cm_id *id;
struct net *net = id_priv->id.route.addr.dev_addr.net;
int ret;
- if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
- return;
+ lockdep_assert_held(&lock);
- id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
- id_priv->id.qp_type);
- if (IS_ERR(id))
- return;
+ *to_destroy = NULL;
+ if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
+ return 0;
- dev_id_priv = container_of(id, struct rdma_id_private, id);
+ dev_id_priv =
+ __rdma_create_id(net, cma_listen_handler, id_priv,
+ id_priv->id.ps, id_priv->id.qp_type, id_priv);
+ if (IS_ERR(dev_id_priv))
+ return PTR_ERR(dev_id_priv);
dev_id_priv->state = RDMA_CM_ADDR_BOUND;
memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
rdma_addr_size(cma_src_addr(id_priv)));
_cma_attach_to_dev(dev_id_priv, cma_dev);
- list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
- atomic_inc(&id_priv->refcount);
+ rdma_restrack_add(&dev_id_priv->res);
+ cma_id_get(id_priv);
dev_id_priv->internal_id = 1;
dev_id_priv->afonly = id_priv->afonly;
+ mutex_lock(&id_priv->qp_mutex);
+ dev_id_priv->tos_set = id_priv->tos_set;
+ dev_id_priv->tos = id_priv->tos;
+ mutex_unlock(&id_priv->qp_mutex);
- ret = rdma_listen(id, id_priv->backlog);
+ ret = rdma_listen(&dev_id_priv->id, id_priv->backlog);
if (ret)
- pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n",
- ret, cma_dev->device->name);
+ goto err_listen;
+ list_add_tail(&dev_id_priv->listen_item, &id_priv->listen_list);
+ return 0;
+err_listen:
+ /* Caller must destroy this after releasing lock */
+ *to_destroy = dev_id_priv;
+ dev_warn(&cma_dev->device->dev, "RDMA CMA: %s, error %d\n", __func__, ret);
+ return ret;
}
-static void cma_listen_on_all(struct rdma_id_private *id_priv)
+static int cma_listen_on_all(struct rdma_id_private *id_priv)
{
+ struct rdma_id_private *to_destroy;
struct cma_device *cma_dev;
+ int ret;
mutex_lock(&lock);
- list_add_tail(&id_priv->list, &listen_any_list);
- list_for_each_entry(cma_dev, &dev_list, list)
- cma_listen_on_dev(id_priv, cma_dev);
+ list_add_tail(&id_priv->listen_any_item, &listen_any_list);
+ list_for_each_entry(cma_dev, &dev_list, list) {
+ ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
+ if (ret) {
+ /* Prevent racing with cma_process_remove() */
+ if (to_destroy)
+ list_del_init(&to_destroy->device_item);
+ goto err_listen;
+ }
+ }
mutex_unlock(&lock);
+ return 0;
+
+err_listen:
+ _cma_cancel_listens(id_priv);
+ mutex_unlock(&lock);
+ if (to_destroy)
+ rdma_destroy_id(&to_destroy->id);
+ return ret;
}
void rdma_set_service_type(struct rdma_cm_id *id, int tos)
@@ -2265,47 +2789,179 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
+ mutex_lock(&id_priv->qp_mutex);
id_priv->tos = (u8) tos;
+ id_priv->tos_set = true;
+ mutex_unlock(&id_priv->qp_mutex);
}
EXPORT_SYMBOL(rdma_set_service_type);
-static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
- void *context)
+/**
+ * rdma_set_ack_timeout() - Set the ack timeout of QP associated
+ * with a connection identifier.
+ * @id: Communication identifier to associated with service type.
+ * @timeout: Ack timeout to set a QP, expressed as 4.096 * 2^(timeout) usec.
+ *
+ * This function should be called before rdma_connect() on active side,
+ * and on passive side before rdma_accept(). It is applicable to primary
+ * path only. The timeout will affect the local side of the QP, it is not
+ * negotiated with remote side and zero disables the timer. In case it is
+ * set before rdma_resolve_route, the value will also be used to determine
+ * PacketLifeTime for RoCE.
+ *
+ * Return: 0 for success
+ */
+int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
+{
+ struct rdma_id_private *id_priv;
+
+ if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_INI)
+ return -EINVAL;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ mutex_lock(&id_priv->qp_mutex);
+ id_priv->timeout = timeout;
+ id_priv->timeout_set = true;
+ mutex_unlock(&id_priv->qp_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL(rdma_set_ack_timeout);
+
+/**
+ * rdma_set_min_rnr_timer() - Set the minimum RNR Retry timer of the
+ * QP associated with a connection identifier.
+ * @id: Communication identifier to associated with service type.
+ * @min_rnr_timer: 5-bit value encoded as Table 45: "Encoding for RNR NAK
+ * Timer Field" in the IBTA specification.
+ *
+ * This function should be called before rdma_connect() on active
+ * side, and on passive side before rdma_accept(). The timer value
+ * will be associated with the local QP. When it receives a send it is
+ * not read to handle, typically if the receive queue is empty, an RNR
+ * Retry NAK is returned to the requester with the min_rnr_timer
+ * encoded. The requester will then wait at least the time specified
+ * in the NAK before retrying. The default is zero, which translates
+ * to a minimum RNR Timer value of 655 ms.
+ *
+ * Return: 0 for success
+ */
+int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer)
+{
+ struct rdma_id_private *id_priv;
+
+ /* It is a five-bit value */
+ if (min_rnr_timer & 0xe0)
+ return -EINVAL;
+
+ if (WARN_ON(id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_TGT))
+ return -EINVAL;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ mutex_lock(&id_priv->qp_mutex);
+ id_priv->min_rnr_timer = min_rnr_timer;
+ id_priv->min_rnr_timer_set = true;
+ mutex_unlock(&id_priv->qp_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL(rdma_set_min_rnr_timer);
+
+static int route_set_path_rec_inbound(struct cma_work *work,
+ struct sa_path_rec *path_rec)
+{
+ struct rdma_route *route = &work->id->id.route;
+
+ if (!route->path_rec_inbound) {
+ route->path_rec_inbound =
+ kzalloc(sizeof(*route->path_rec_inbound), GFP_KERNEL);
+ if (!route->path_rec_inbound)
+ return -ENOMEM;
+ }
+
+ *route->path_rec_inbound = *path_rec;
+ return 0;
+}
+
+static int route_set_path_rec_outbound(struct cma_work *work,
+ struct sa_path_rec *path_rec)
+{
+ struct rdma_route *route = &work->id->id.route;
+
+ if (!route->path_rec_outbound) {
+ route->path_rec_outbound =
+ kzalloc(sizeof(*route->path_rec_outbound), GFP_KERNEL);
+ if (!route->path_rec_outbound)
+ return -ENOMEM;
+ }
+
+ *route->path_rec_outbound = *path_rec;
+ return 0;
+}
+
+static void cma_query_handler(int status, struct sa_path_rec *path_rec,
+ unsigned int num_prs, void *context)
{
struct cma_work *work = context;
struct rdma_route *route;
+ int i;
route = &work->id->id.route;
- if (!status) {
- route->num_paths = 1;
- *route->path_rec = *path_rec;
- } else {
- work->old_state = RDMA_CM_ROUTE_QUERY;
- work->new_state = RDMA_CM_ADDR_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
- work->event.status = status;
+ if (status)
+ goto fail;
+
+ for (i = 0; i < num_prs; i++) {
+ if (!path_rec[i].flags || (path_rec[i].flags & IB_PATH_GMP))
+ *route->path_rec = path_rec[i];
+ else if (path_rec[i].flags & IB_PATH_INBOUND)
+ status = route_set_path_rec_inbound(work, &path_rec[i]);
+ else if (path_rec[i].flags & IB_PATH_OUTBOUND)
+ status = route_set_path_rec_outbound(work,
+ &path_rec[i]);
+ else
+ status = -EINVAL;
+
+ if (status)
+ goto fail;
}
+ route->num_pri_alt_paths = 1;
+ queue_work(cma_wq, &work->work);
+ return;
+
+fail:
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ADDR_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
+ work->event.status = status;
+ pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
+ status);
queue_work(cma_wq, &work->work);
}
-static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
- struct cma_work *work)
+static int cma_query_ib_route(struct rdma_id_private *id_priv,
+ unsigned long timeout_ms, struct cma_work *work)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
- struct ib_sa_path_rec path_rec;
+ struct sa_path_rec path_rec;
ib_sa_comp_mask comp_mask;
struct sockaddr_in6 *sin6;
struct sockaddr_ib *sib;
memset(&path_rec, 0, sizeof path_rec);
+
+ if (rdma_cap_opa_ah(id_priv->id.device, id_priv->id.port_num))
+ path_rec.rec_type = SA_PATH_REC_TYPE_OPA;
+ else
+ path_rec.rec_type = SA_PATH_REC_TYPE_IB;
rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
path_rec.numb_path = 1;
path_rec.reversible = 1;
- path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
+ path_rec.service_id = rdma_get_service_id(&id_priv->id,
+ cma_dst_addr(id_priv));
comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
@@ -2337,53 +2993,84 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
return (id_priv->query_id < 0) ? id_priv->query_id : 0;
}
-static void cma_work_handler(struct work_struct *_work)
+static void cma_iboe_join_work_handler(struct work_struct *work)
{
- struct cma_work *work = container_of(_work, struct cma_work, work);
- struct rdma_id_private *id_priv = work->id;
- int destroy = 0;
+ struct cma_multicast *mc =
+ container_of(work, struct cma_multicast, iboe_join.work);
+ struct rdma_cm_event *event = &mc->iboe_join.event;
+ struct rdma_id_private *id_priv = mc->id_priv;
+ int ret;
mutex_lock(&id_priv->handler_mutex);
- if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
- goto out;
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
- if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- destroy = 1;
- }
-out:
+ ret = cma_cm_event_handler(id_priv, event);
+ WARN_ON(ret);
+
+out_unlock:
mutex_unlock(&id_priv->handler_mutex);
- cma_deref_id(id_priv);
- if (destroy)
- rdma_destroy_id(&id_priv->id);
- kfree(work);
+ if (event->event == RDMA_CM_EVENT_MULTICAST_JOIN)
+ rdma_destroy_ah_attr(&event->param.ud.ah_attr);
}
-static void cma_ndev_work_handler(struct work_struct *_work)
+static void cma_work_handler(struct work_struct *_work)
{
- struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work);
+ struct cma_work *work = container_of(_work, struct cma_work, work);
struct rdma_id_private *id_priv = work->id;
- int destroy = 0;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state == RDMA_CM_DESTROYING ||
- id_priv->state == RDMA_CM_DEVICE_REMOVAL)
- goto out;
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
+ if (work->old_state != 0 || work->new_state != 0) {
+ if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
+ goto out_unlock;
+ }
- if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- destroy = 1;
+ if (cma_cm_event_handler(id_priv, &work->event)) {
+ cma_id_put(id_priv);
+ destroy_id_handler_unlock(id_priv);
+ goto out_free;
}
-out:
+out_unlock:
mutex_unlock(&id_priv->handler_mutex);
- cma_deref_id(id_priv);
- if (destroy)
- rdma_destroy_id(&id_priv->id);
+ cma_id_put(id_priv);
+out_free:
+ if (work->event.event == RDMA_CM_EVENT_MULTICAST_JOIN)
+ rdma_destroy_ah_attr(&work->event.param.ud.ah_attr);
kfree(work);
}
-static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
+static void cma_init_resolve_route_work(struct cma_work *work,
+ struct rdma_id_private *id_priv)
+{
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+ work->old_state = RDMA_CM_ROUTE_QUERY;
+ work->new_state = RDMA_CM_ROUTE_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+}
+
+static void enqueue_resolve_addr_work(struct cma_work *work,
+ struct rdma_id_private *id_priv)
+{
+ /* Balances with cma_id_put() in cma_work_handler */
+ cma_id_get(id_priv);
+
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+ work->old_state = RDMA_CM_ADDR_QUERY;
+ work->new_state = RDMA_CM_ADDR_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
+
+ queue_work(cma_wq, &work->work);
+}
+
+static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
+ unsigned long timeout_ms)
{
struct rdma_route *route = &id_priv->id.route;
struct cma_work *work;
@@ -2393,13 +3080,10 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
if (!work)
return -ENOMEM;
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
- work->old_state = RDMA_CM_ROUTE_QUERY;
- work->new_state = RDMA_CM_ROUTE_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+ cma_init_resolve_route_work(work, id_priv);
- route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
+ if (!route->path_rec)
+ route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL);
if (!route->path_rec) {
ret = -ENOMEM;
goto err1;
@@ -2418,10 +3102,62 @@ err1:
return ret;
}
-int rdma_set_ib_paths(struct rdma_cm_id *id,
- struct ib_sa_path_rec *path_rec, int num_paths)
+static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
+ unsigned long supported_gids,
+ enum ib_gid_type default_gid)
+{
+ if ((network_type == RDMA_NETWORK_IPV4 ||
+ network_type == RDMA_NETWORK_IPV6) &&
+ test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
+ return IB_GID_TYPE_ROCE_UDP_ENCAP;
+
+ return default_gid;
+}
+
+/*
+ * cma_iboe_set_path_rec_l2_fields() is helper function which sets
+ * path record type based on GID type.
+ * It also sets up other L2 fields which includes destination mac address
+ * netdev ifindex, of the path record.
+ * It returns the netdev of the bound interface for this path record entry.
+ */
+static struct net_device *
+cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
+{
+ struct rdma_route *route = &id_priv->id.route;
+ enum ib_gid_type gid_type = IB_GID_TYPE_ROCE;
+ struct rdma_addr *addr = &route->addr;
+ unsigned long supported_gids;
+ struct net_device *ndev;
+
+ if (!addr->dev_addr.bound_dev_if)
+ return NULL;
+
+ ndev = dev_get_by_index(addr->dev_addr.net,
+ addr->dev_addr.bound_dev_if);
+ if (!ndev)
+ return NULL;
+
+ supported_gids = roce_gid_type_mask_support(id_priv->id.device,
+ id_priv->id.port_num);
+ gid_type = cma_route_gid_type(addr->dev_addr.network,
+ supported_gids,
+ id_priv->gid_type);
+ /* Use the hint from IP Stack to select GID Type */
+ if (gid_type < ib_network_to_gid_type(addr->dev_addr.network))
+ gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+ route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
+
+ route->path_rec->roce.route_resolved = true;
+ sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
+ return ndev;
+}
+
+int rdma_set_ib_path(struct rdma_cm_id *id,
+ struct sa_path_rec *path_rec)
{
struct rdma_id_private *id_priv;
+ struct net_device *ndev;
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
@@ -2429,22 +3165,35 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,
RDMA_CM_ROUTE_RESOLVED))
return -EINVAL;
- id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths,
+ id->route.path_rec = kmemdup(path_rec, sizeof(*path_rec),
GFP_KERNEL);
if (!id->route.path_rec) {
ret = -ENOMEM;
goto err;
}
- id->route.num_paths = num_paths;
+ if (rdma_protocol_roce(id->device, id->port_num)) {
+ ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
+ if (!ndev) {
+ ret = -ENODEV;
+ goto err_free;
+ }
+ dev_put(ndev);
+ }
+
+ id->route.num_pri_alt_paths = 1;
return 0;
+
+err_free:
+ kfree(id->route.path_rec);
+ id->route.path_rec = NULL;
err:
cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);
return ret;
}
-EXPORT_SYMBOL(rdma_set_ib_paths);
+EXPORT_SYMBOL(rdma_set_ib_path);
-static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
+static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
{
struct cma_work *work;
@@ -2452,45 +3201,91 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
if (!work)
return -ENOMEM;
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
- work->old_state = RDMA_CM_ROUTE_QUERY;
- work->new_state = RDMA_CM_ROUTE_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+ cma_init_resolve_route_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
}
-static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+static int get_vlan_ndev_tc(struct net_device *vlan_ndev, int prio)
{
- int prio;
struct net_device *dev;
- prio = rt_tos2priority(tos);
- dev = ndev->priv_flags & IFF_802_1Q_VLAN ?
- vlan_dev_real_dev(ndev) : ndev;
-
+ dev = vlan_dev_real_dev(vlan_ndev);
if (dev->num_tc)
return netdev_get_prio_tc_map(dev, prio);
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
- if (ndev->priv_flags & IFF_802_1Q_VLAN)
- return (vlan_dev_get_egress_qos_mask(ndev, prio) &
- VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
-#endif
- return 0;
+ return (vlan_dev_get_egress_qos_mask(vlan_ndev, prio) &
+ VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
}
-static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
- unsigned long supported_gids,
- enum ib_gid_type default_gid)
+struct iboe_prio_tc_map {
+ int input_prio;
+ int output_tc;
+ bool found;
+};
+
+static int get_lower_vlan_dev_tc(struct net_device *dev,
+ struct netdev_nested_priv *priv)
{
- if ((network_type == RDMA_NETWORK_IPV4 ||
- network_type == RDMA_NETWORK_IPV6) &&
- test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
- return IB_GID_TYPE_ROCE_UDP_ENCAP;
+ struct iboe_prio_tc_map *map = (struct iboe_prio_tc_map *)priv->data;
- return default_gid;
+ if (is_vlan_dev(dev))
+ map->output_tc = get_vlan_ndev_tc(dev, map->input_prio);
+ else if (dev->num_tc)
+ map->output_tc = netdev_get_prio_tc_map(dev, map->input_prio);
+ else
+ map->output_tc = 0;
+ /* We are interested only in first level VLAN device, so always
+ * return 1 to stop iterating over next level devices.
+ */
+ map->found = true;
+ return 1;
+}
+
+static int iboe_tos_to_sl(struct net_device *ndev, int tos)
+{
+ struct iboe_prio_tc_map prio_tc_map = {};
+ int prio = rt_tos2priority(tos);
+ struct netdev_nested_priv priv;
+
+ /* If VLAN device, get it directly from the VLAN netdev */
+ if (is_vlan_dev(ndev))
+ return get_vlan_ndev_tc(ndev, prio);
+
+ prio_tc_map.input_prio = prio;
+ priv.data = (void *)&prio_tc_map;
+ rcu_read_lock();
+ netdev_walk_all_lower_dev_rcu(ndev,
+ get_lower_vlan_dev_tc,
+ &priv);
+ rcu_read_unlock();
+ /* If map is found from lower device, use it; Otherwise
+ * continue with the current netdevice to get priority to tc map.
+ */
+ if (prio_tc_map.found)
+ return prio_tc_map.output_tc;
+ else if (ndev->num_tc)
+ return netdev_get_prio_tc_map(ndev, prio);
+ else
+ return 0;
+}
+
+static __be32 cma_get_roce_udp_flow_label(struct rdma_id_private *id_priv)
+{
+ struct sockaddr_in6 *addr6;
+ u16 dport, sport;
+ u32 hash, fl;
+
+ addr6 = (struct sockaddr_in6 *)cma_src_addr(id_priv);
+ fl = be32_to_cpu(addr6->sin6_flowinfo) & IB_GRH_FLOWLABEL_MASK;
+ if ((cma_family(id_priv) != AF_INET6) || !fl) {
+ dport = be16_to_cpu(cma_port(cma_dst_addr(id_priv)));
+ sport = be16_to_cpu(cma_port(cma_src_addr(id_priv)));
+ hash = (u32)sport * 31 + dport;
+ fl = hash & IB_GRH_FLOWLABEL_MASK;
+ }
+
+ return cpu_to_be32(fl);
}
static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
@@ -2499,72 +3294,39 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
struct rdma_addr *addr = &route->addr;
struct cma_work *work;
int ret;
- struct net_device *ndev = NULL;
+ struct net_device *ndev;
+ u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+ u8 tos;
+
+ mutex_lock(&id_priv->qp_mutex);
+ tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
+ mutex_unlock(&id_priv->qp_mutex);
work = kzalloc(sizeof *work, GFP_KERNEL);
if (!work)
return -ENOMEM;
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
-
route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
if (!route->path_rec) {
ret = -ENOMEM;
goto err1;
}
- route->num_paths = 1;
-
- if (addr->dev_addr.bound_dev_if) {
- unsigned long supported_gids;
-
- ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
- if (!ndev) {
- ret = -ENODEV;
- goto err2;
- }
-
- if (ndev->flags & IFF_LOOPBACK) {
- dev_put(ndev);
- if (!id_priv->id.device->get_netdev) {
- ret = -EOPNOTSUPP;
- goto err2;
- }
-
- ndev = id_priv->id.device->get_netdev(id_priv->id.device,
- id_priv->id.port_num);
- if (!ndev) {
- ret = -ENODEV;
- goto err2;
- }
- }
+ route->num_pri_alt_paths = 1;
- route->path_rec->net = &init_net;
- route->path_rec->ifindex = ndev->ifindex;
- supported_gids = roce_gid_type_mask_support(id_priv->id.device,
- id_priv->id.port_num);
- route->path_rec->gid_type =
- cma_route_gid_type(addr->dev_addr.network,
- supported_gids,
- id_priv->gid_type);
- }
+ ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
if (!ndev) {
ret = -ENODEV;
goto err2;
}
- memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
-
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&route->path_rec->sgid);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
&route->path_rec->dgid);
- /* Use the hint from IP Stack to select GID Type */
- if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network))
- route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network);
if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
/* TODO: get the hoplimit from the inet/inet6 device */
route->path_rec->hop_limit = addr->dev_addr.hoplimit;
@@ -2573,23 +3335,38 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->reversible = 1;
route->path_rec->pkey = cpu_to_be16(0xffff);
route->path_rec->mtu_selector = IB_SA_EQ;
- route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos);
+ route->path_rec->sl = iboe_tos_to_sl(ndev, tos);
+ route->path_rec->traffic_class = tos;
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
route->path_rec->rate_selector = IB_SA_EQ;
- route->path_rec->rate = iboe_get_rate(ndev);
+ route->path_rec->rate = IB_RATE_PORT_CURRENT;
dev_put(ndev);
route->path_rec->packet_life_time_selector = IB_SA_EQ;
- route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
+ /* In case ACK timeout is set, use this value to calculate
+ * PacketLifeTime. As per IBTA 12.7.34,
+ * local ACK timeout = (2 * PacketLifeTime + Local CA’s ACK delay).
+ * Assuming a negligible local ACK delay, we can use
+ * PacketLifeTime = local ACK timeout/2
+ * as a reasonable approximation for RoCE networks.
+ */
+ mutex_lock(&id_priv->qp_mutex);
+ if (id_priv->timeout_set && id_priv->timeout)
+ route->path_rec->packet_life_time = id_priv->timeout - 1;
+ else
+ route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
+ mutex_unlock(&id_priv->qp_mutex);
+
if (!route->path_rec->mtu) {
ret = -EINVAL;
goto err2;
}
- work->old_state = RDMA_CM_ROUTE_QUERY;
- work->new_state = RDMA_CM_ROUTE_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
- work->event.status = 0;
+ if (rdma_protocol_roce_udp_encap(id_priv->id.device,
+ id_priv->id.port_num))
+ route->path_rec->flow_label =
+ cma_get_roce_udp_flow_label(id_priv);
+ cma_init_resolve_route_work(work, id_priv);
queue_work(cma_wq, &work->work);
return 0;
@@ -2597,27 +3374,39 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
err2:
kfree(route->path_rec);
route->path_rec = NULL;
+ route->num_pri_alt_paths = 0;
err1:
kfree(work);
return ret;
}
-int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
+int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
{
struct rdma_id_private *id_priv;
+ enum rdma_cm_state state;
int ret;
+ if (!timeout_ms)
+ return -EINVAL;
+
id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))
+ state = id_priv->state;
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
+ RDMA_CM_ROUTE_QUERY) &&
+ !cma_comp_exch(id_priv, RDMA_CM_ADDRINFO_RESOLVED,
+ RDMA_CM_ROUTE_QUERY))
return -EINVAL;
- atomic_inc(&id_priv->refcount);
+ cma_id_get(id_priv);
if (rdma_cap_ib_sa(id->device, id->port_num))
ret = cma_resolve_ib_route(id_priv, timeout_ms);
- else if (rdma_protocol_roce(id->device, id->port_num))
+ else if (rdma_protocol_roce(id->device, id->port_num)) {
ret = cma_resolve_iboe_route(id_priv);
+ if (!ret)
+ cma_add_id_to_tree(id_priv);
+ }
else if (rdma_protocol_iwarp(id->device, id->port_num))
- ret = cma_resolve_iw_route(id_priv, timeout_ms);
+ ret = cma_resolve_iw_route(id_priv);
else
ret = -ENOSYS;
@@ -2626,8 +3415,8 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
return 0;
err:
- cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);
- cma_deref_id(id_priv);
+ cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, state);
+ cma_id_put(id_priv);
return ret;
}
EXPORT_SYMBOL(rdma_resolve_route);
@@ -2652,11 +3441,11 @@ static void cma_set_loopback(struct sockaddr *addr)
static int cma_bind_loopback(struct rdma_id_private *id_priv)
{
struct cma_device *cma_dev, *cur_dev;
- struct ib_port_attr port_attr;
union ib_gid gid;
+ enum ib_port_state port_state;
+ unsigned int p;
u16 pkey;
int ret;
- u8 p;
cma_dev = NULL;
mutex_lock(&lock);
@@ -2668,9 +3457,9 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
if (!cma_dev)
cma_dev = cur_dev;
- for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
- if (!ib_query_port(cur_dev->device, p, &port_attr) &&
- port_attr.state == IB_PORT_ACTIVE) {
+ rdma_for_each_port (cur_dev->device, p) {
+ if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) &&
+ port_state == IB_PORT_ACTIVE) {
cma_dev = cur_dev;
goto port_found;
}
@@ -2685,7 +3474,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
p = 1;
port_found:
- ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL);
+ ret = rdma_query_gid(cma_dev->device, p, 0, &gid);
if (ret)
goto out;
@@ -2701,6 +3490,7 @@ port_found:
ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
id_priv->id.port_num = p;
cma_attach_to_dev(id_priv, cma_dev);
+ rdma_restrack_add(&id_priv->res);
cma_set_loopback(cma_src_addr(id_priv));
out:
mutex_unlock(&lock);
@@ -2711,19 +3501,36 @@ static void addr_handler(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *dev_addr, void *context)
{
struct rdma_id_private *id_priv = context;
- struct rdma_cm_event event;
+ struct rdma_cm_event event = {};
+ struct sockaddr *addr;
+ struct sockaddr_storage old_addr;
- memset(&event, 0, sizeof event);
mutex_lock(&id_priv->handler_mutex);
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
RDMA_CM_ADDR_RESOLVED))
goto out;
- memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
- if (!status && !id_priv->cma_dev)
- status = cma_acquire_dev(id_priv, NULL);
+ /*
+ * Store the previous src address, so that if we fail to acquire
+ * matching rdma device, old address can be restored back, which helps
+ * to cancel the cma listen operation correctly.
+ */
+ addr = cma_src_addr(id_priv);
+ memcpy(&old_addr, addr, rdma_addr_size(addr));
+ memcpy(addr, src_addr, rdma_addr_size(src_addr));
+ if (!status && !id_priv->cma_dev) {
+ status = cma_acquire_dev_by_src_ip(id_priv);
+ if (status)
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
+ status);
+ rdma_restrack_add(&id_priv->res);
+ } else if (status) {
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
+ }
if (status) {
+ memcpy(addr, &old_addr,
+ rdma_addr_size((struct sockaddr *)&old_addr));
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
RDMA_CM_ADDR_BOUND))
goto out;
@@ -2732,16 +3539,12 @@ static void addr_handler(int status, struct sockaddr *src_addr,
} else
event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
- if (id_priv->id.event_handler(&id_priv->id, &event)) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- cma_deref_id(id_priv);
- rdma_destroy_id(&id_priv->id);
+ if (cma_cm_event_handler(id_priv, &event)) {
+ destroy_id_handler_unlock(id_priv);
return;
}
out:
mutex_unlock(&id_priv->handler_mutex);
- cma_deref_id(id_priv);
}
static int cma_resolve_loopback(struct rdma_id_private *id_priv)
@@ -2763,12 +3566,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
- work->old_state = RDMA_CM_ADDR_QUERY;
- work->new_state = RDMA_CM_ADDR_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
- queue_work(cma_wq, &work->work);
+ enqueue_resolve_addr_work(work, id_priv);
return 0;
err:
kfree(work);
@@ -2793,81 +3591,13 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
&(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
- work->id = id_priv;
- INIT_WORK(&work->work, cma_work_handler);
- work->old_state = RDMA_CM_ADDR_QUERY;
- work->new_state = RDMA_CM_ADDR_RESOLVED;
- work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
- queue_work(cma_wq, &work->work);
+ enqueue_resolve_addr_work(work, id_priv);
return 0;
err:
kfree(work);
return ret;
}
-static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- struct sockaddr *dst_addr)
-{
- if (!src_addr || !src_addr->sa_family) {
- src_addr = (struct sockaddr *) &id->route.addr.src_addr;
- src_addr->sa_family = dst_addr->sa_family;
- if (dst_addr->sa_family == AF_INET6) {
- struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr;
- struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr;
- src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
- if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
- id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id;
- } else if (dst_addr->sa_family == AF_IB) {
- ((struct sockaddr_ib *) src_addr)->sib_pkey =
- ((struct sockaddr_ib *) dst_addr)->sib_pkey;
- }
- }
- return rdma_bind_addr(id, src_addr);
-}
-
-int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
- struct sockaddr *dst_addr, int timeout_ms)
-{
- struct rdma_id_private *id_priv;
- int ret;
-
- id_priv = container_of(id, struct rdma_id_private, id);
- if (id_priv->state == RDMA_CM_IDLE) {
- ret = cma_bind_addr(id, src_addr, dst_addr);
- if (ret)
- return ret;
- }
-
- if (cma_family(id_priv) != dst_addr->sa_family)
- return -EINVAL;
-
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
- return -EINVAL;
-
- atomic_inc(&id_priv->refcount);
- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
- if (cma_any_addr(dst_addr)) {
- ret = cma_resolve_loopback(id_priv);
- } else {
- if (dst_addr->sa_family == AF_IB) {
- ret = cma_resolve_ib_addr(id_priv);
- } else {
- ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv),
- dst_addr, &id->route.addr.dev_addr,
- timeout_ms, addr_handler, id_priv);
- }
- }
- if (ret)
- goto err;
-
- return 0;
-err:
- cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
- cma_deref_id(id_priv);
- return ret;
-}
-EXPORT_SYMBOL(rdma_resolve_addr);
-
int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
{
struct rdma_id_private *id_priv;
@@ -2876,7 +3606,8 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irqsave(&id_priv->lock, flags);
- if (reuse || id_priv->state == RDMA_CM_IDLE) {
+ if ((reuse && id_priv->state != RDMA_CM_LISTEN) ||
+ id_priv->state == RDMA_CM_IDLE) {
id_priv->reuseaddr = reuse;
ret = 0;
} else {
@@ -2915,6 +3646,8 @@ static void cma_bind_port(struct rdma_bind_list *bind_list,
u64 sid, mask;
__be16 port;
+ lockdep_assert_held(&lock);
+
addr = cma_src_addr(id_priv);
port = htons(bind_list->port);
@@ -2937,12 +3670,14 @@ static void cma_bind_port(struct rdma_bind_list *bind_list,
hlist_add_head(&id_priv->node, &bind_list->owners);
}
-static int cma_alloc_port(enum rdma_port_space ps,
+static int cma_alloc_port(enum rdma_ucm_port_space ps,
struct rdma_id_private *id_priv, unsigned short snum)
{
struct rdma_bind_list *bind_list;
int ret;
+ lockdep_assert_held(&lock);
+
bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
if (!bind_list)
return -ENOMEM;
@@ -2953,7 +3688,7 @@ static int cma_alloc_port(enum rdma_port_space ps,
goto err;
bind_list->ps = ps;
- bind_list->port = (unsigned short)ret;
+ bind_list->port = snum;
cma_bind_port(bind_list, id_priv);
return 0;
err:
@@ -2961,7 +3696,48 @@ err:
return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;
}
-static int cma_alloc_any_port(enum rdma_port_space ps,
+static int cma_port_is_unique(struct rdma_bind_list *bind_list,
+ struct rdma_id_private *id_priv)
+{
+ struct rdma_id_private *cur_id;
+ struct sockaddr *daddr = cma_dst_addr(id_priv);
+ struct sockaddr *saddr = cma_src_addr(id_priv);
+ __be16 dport = cma_port(daddr);
+
+ lockdep_assert_held(&lock);
+
+ hlist_for_each_entry(cur_id, &bind_list->owners, node) {
+ struct sockaddr *cur_daddr = cma_dst_addr(cur_id);
+ struct sockaddr *cur_saddr = cma_src_addr(cur_id);
+ __be16 cur_dport = cma_port(cur_daddr);
+
+ if (id_priv == cur_id)
+ continue;
+
+ /* different dest port -> unique */
+ if (!cma_any_port(daddr) &&
+ !cma_any_port(cur_daddr) &&
+ (dport != cur_dport))
+ continue;
+
+ /* different src address -> unique */
+ if (!cma_any_addr(saddr) &&
+ !cma_any_addr(cur_saddr) &&
+ cma_addr_cmp(saddr, cur_saddr))
+ continue;
+
+ /* different dst address -> unique */
+ if (!cma_any_addr(daddr) &&
+ !cma_any_addr(cur_daddr) &&
+ cma_addr_cmp(daddr, cur_daddr))
+ continue;
+
+ return -EADDRNOTAVAIL;
+ }
+ return 0;
+}
+
+static int cma_alloc_any_port(enum rdma_ucm_port_space ps,
struct rdma_id_private *id_priv)
{
static unsigned int last_used_port;
@@ -2969,13 +3745,25 @@ static int cma_alloc_any_port(enum rdma_port_space ps,
unsigned int rover;
struct net *net = id_priv->id.route.addr.dev_addr.net;
+ lockdep_assert_held(&lock);
+
inet_get_local_port_range(net, &low, &high);
remaining = (high - low) + 1;
- rover = prandom_u32() % remaining + low;
+ rover = get_random_u32_inclusive(low, remaining + low - 1);
retry:
- if (last_used_port != rover &&
- !cma_ps_find(net, ps, (unsigned short)rover)) {
- int ret = cma_alloc_port(ps, id_priv, rover);
+ if (last_used_port != rover) {
+ struct rdma_bind_list *bind_list;
+ int ret;
+
+ bind_list = cma_ps_find(net, ps, (unsigned short)rover);
+
+ if (!bind_list) {
+ ret = cma_alloc_port(ps, id_priv, rover);
+ } else {
+ ret = cma_port_is_unique(bind_list, id_priv);
+ if (!ret)
+ cma_bind_port(bind_list, id_priv);
+ }
/*
* Remember previously used port number in order to avoid
* re-using same port immediately after it is closed.
@@ -3006,13 +3794,14 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
struct rdma_id_private *cur_id;
struct sockaddr *addr, *cur_addr;
+ lockdep_assert_held(&lock);
+
addr = cma_src_addr(id_priv);
hlist_for_each_entry(cur_id, &bind_list->owners, node) {
if (id_priv == cur_id)
continue;
- if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr &&
- cur_id->reuseaddr)
+ if (reuseaddr && cur_id->reuseaddr)
continue;
cur_addr = cma_src_addr(cur_id);
@@ -3029,13 +3818,15 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
return 0;
}
-static int cma_use_port(enum rdma_port_space ps,
+static int cma_use_port(enum rdma_ucm_port_space ps,
struct rdma_id_private *id_priv)
{
struct rdma_bind_list *bind_list;
unsigned short snum;
int ret;
+ lockdep_assert_held(&lock);
+
snum = ntohs(cma_port(cma_src_addr(id_priv)));
if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
return -EACCES;
@@ -3051,20 +3842,8 @@ static int cma_use_port(enum rdma_port_space ps,
return ret;
}
-static int cma_bind_listen(struct rdma_id_private *id_priv)
-{
- struct rdma_bind_list *bind_list = id_priv->bind_list;
- int ret = 0;
-
- mutex_lock(&lock);
- if (bind_list->owners.first->next)
- ret = cma_check_port(bind_list, id_priv, 0);
- mutex_unlock(&lock);
- return ret;
-}
-
-static enum rdma_port_space cma_select_inet_ps(
- struct rdma_id_private *id_priv)
+static enum rdma_ucm_port_space
+cma_select_inet_ps(struct rdma_id_private *id_priv)
{
switch (id_priv->id.ps) {
case RDMA_PS_TCP:
@@ -3078,9 +3857,10 @@ static enum rdma_port_space cma_select_inet_ps(
}
}
-static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
+static enum rdma_ucm_port_space
+cma_select_ib_ps(struct rdma_id_private *id_priv)
{
- enum rdma_port_space ps = 0;
+ enum rdma_ucm_port_space ps = 0;
struct sockaddr_ib *sib;
u64 sid_ps, mask, sid;
@@ -3111,7 +3891,7 @@ static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
static int cma_get_port(struct rdma_id_private *id_priv)
{
- enum rdma_port_space ps;
+ enum rdma_ucm_port_space ps;
int ret;
if (cma_family(id_priv) != AF_IB)
@@ -3155,28 +3935,41 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
int rdma_listen(struct rdma_cm_id *id, int backlog)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
- if (id_priv->state == RDMA_CM_IDLE) {
- id->route.addr.src_addr.ss_family = AF_INET;
- ret = rdma_bind_addr(id, cma_src_addr(id_priv));
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) {
+ struct sockaddr_in any_in = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_ANY),
+ };
+
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
+ ret = rdma_bind_addr(id, (struct sockaddr *)&any_in);
if (ret)
return ret;
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+ RDMA_CM_LISTEN)))
+ return -EINVAL;
}
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN))
- return -EINVAL;
-
+ /*
+ * Once the ID reaches RDMA_CM_LISTEN it is not allowed to be reusable
+ * any more, and has to be unique in the bind list.
+ */
if (id_priv->reuseaddr) {
- ret = cma_bind_listen(id_priv);
+ mutex_lock(&lock);
+ ret = cma_check_port(id_priv->bind_list, id_priv, 0);
+ if (!ret)
+ id_priv->reuseaddr = 0;
+ mutex_unlock(&lock);
if (ret)
goto err;
}
id_priv->backlog = backlog;
- if (id->device) {
+ if (id_priv->cma_dev) {
if (rdma_cap_ib_cm(id->device, 1)) {
ret = cma_ib_listen(id_priv);
if (ret)
@@ -3189,41 +3982,48 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
ret = -ENOSYS;
goto err;
}
- } else
- cma_listen_on_all(id_priv);
+ } else {
+ ret = cma_listen_on_all(id_priv);
+ if (ret)
+ goto err;
+ }
return 0;
err:
id_priv->backlog = 0;
+ /*
+ * All the failure paths that lead here will not allow the req_handler's
+ * to have run.
+ */
cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND);
return ret;
}
EXPORT_SYMBOL(rdma_listen);
-int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
+static int rdma_bind_addr_dst(struct rdma_id_private *id_priv,
+ struct sockaddr *addr, const struct sockaddr *daddr)
{
- struct rdma_id_private *id_priv;
+ struct sockaddr *id_daddr;
int ret;
if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
addr->sa_family != AF_IB)
return -EAFNOSUPPORT;
- id_priv = container_of(id, struct rdma_id_private, id);
if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))
return -EINVAL;
- ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
+ ret = cma_check_linklocal(&id_priv->id.route.addr.dev_addr, addr);
if (ret)
goto err1;
memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
if (!cma_any_addr(addr)) {
- ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
+ ret = cma_translate_addr(addr, &id_priv->id.route.addr.dev_addr);
if (ret)
goto err1;
- ret = cma_acquire_dev(id_priv, NULL);
+ ret = cma_acquire_dev_by_src_ip(id_priv);
if (ret)
goto err1;
}
@@ -3239,10 +4039,17 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
}
#endif
}
+ id_daddr = cma_dst_addr(id_priv);
+ if (daddr != id_daddr)
+ memcpy(id_daddr, daddr, rdma_addr_size(addr));
+ id_daddr->sa_family = addr->sa_family;
+
ret = cma_get_port(id_priv);
if (ret)
goto err2;
+ if (!cma_any_addr(addr))
+ rdma_restrack_add(&id_priv->res);
return 0;
err2:
if (id_priv->cma_dev)
@@ -3251,6 +4058,129 @@ err1:
cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
return ret;
}
+
+static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ struct sockaddr_storage zero_sock = {};
+
+ if (src_addr && src_addr->sa_family)
+ return rdma_bind_addr_dst(id_priv, src_addr, dst_addr);
+
+ /*
+ * When the src_addr is not specified, automatically supply an any addr
+ */
+ zero_sock.ss_family = dst_addr->sa_family;
+ if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *src_addr6 =
+ (struct sockaddr_in6 *)&zero_sock;
+ struct sockaddr_in6 *dst_addr6 =
+ (struct sockaddr_in6 *)dst_addr;
+
+ src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id;
+ if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ id->route.addr.dev_addr.bound_dev_if =
+ dst_addr6->sin6_scope_id;
+ } else if (dst_addr->sa_family == AF_IB) {
+ ((struct sockaddr_ib *)&zero_sock)->sib_pkey =
+ ((struct sockaddr_ib *)dst_addr)->sib_pkey;
+ }
+ return rdma_bind_addr_dst(id_priv, (struct sockaddr *)&zero_sock, dst_addr);
+}
+
+/*
+ * If required, resolve the source address for bind and leave the id_priv in
+ * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior
+ * calls made by ULP, a previously bound ID will not be re-bound and src_addr is
+ * ignored.
+ */
+static int resolve_prepare_src(struct rdma_id_private *id_priv,
+ struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr)
+{
+ int ret;
+
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+ /* For a well behaved ULP state will be RDMA_CM_IDLE */
+ ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr);
+ if (ret)
+ return ret;
+ if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND,
+ RDMA_CM_ADDR_QUERY)))
+ return -EINVAL;
+
+ } else {
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
+ }
+
+ if (cma_family(id_priv) != dst_addr->sa_family) {
+ ret = -EINVAL;
+ goto err_state;
+ }
+ return 0;
+
+err_state:
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+ return ret;
+}
+
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr, unsigned long timeout_ms)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ int ret;
+
+ ret = resolve_prepare_src(id_priv, src_addr, dst_addr);
+ if (ret)
+ return ret;
+
+ if (cma_any_addr(dst_addr)) {
+ ret = cma_resolve_loopback(id_priv);
+ } else {
+ if (dst_addr->sa_family == AF_IB) {
+ ret = cma_resolve_ib_addr(id_priv);
+ } else {
+ /*
+ * The FSM can return back to RDMA_CM_ADDR_BOUND after
+ * rdma_resolve_ip() is called, eg through the error
+ * path in addr_handler(). If this happens the existing
+ * request must be canceled before issuing a new one.
+ * Since canceling a request is a bit slow and this
+ * oddball path is rare, keep track once a request has
+ * been issued. The track turns out to be a permanent
+ * state since this is the only cancel as it is
+ * immediately before rdma_resolve_ip().
+ */
+ if (id_priv->used_resolve_ip)
+ rdma_addr_cancel(&id->route.addr.dev_addr);
+ else
+ id_priv->used_resolve_ip = 1;
+ ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
+ &id->route.addr.dev_addr,
+ timeout_ms, addr_handler,
+ false, id_priv);
+ }
+ }
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_resolve_addr);
+
+int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ return rdma_bind_addr_dst(id_priv, addr, cma_dst_addr(id_priv));
+}
EXPORT_SYMBOL(rdma_bind_addr);
static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
@@ -3284,18 +4214,18 @@ static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
}
static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
- struct ib_cm_event *ib_event)
+ const struct ib_cm_event *ib_event)
{
struct rdma_id_private *id_priv = cm_id->context;
- struct rdma_cm_event event;
- struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
- int ret = 0;
+ struct rdma_cm_event event = {};
+ const struct ib_cm_sidr_rep_event_param *rep =
+ &ib_event->param.sidr_rep_rcvd;
+ int ret;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_CONNECT)
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
goto out;
- memset(&event, 0, sizeof event);
switch (ib_event->event) {
case IB_CM_SIDR_REQ_ERROR:
event.event = RDMA_CM_EVENT_UNREACHABLE;
@@ -3307,17 +4237,22 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
if (rep->status != IB_SIDR_SUCCESS) {
event.event = RDMA_CM_EVENT_UNREACHABLE;
event.status = ib_event->param.sidr_rep_rcvd.status;
+ pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n",
+ event.status);
break;
}
ret = cma_set_qkey(id_priv, rep->qkey);
if (ret) {
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret);
event.event = RDMA_CM_EVENT_ADDR_ERROR;
event.status = ret;
break;
}
- ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
- id_priv->id.route.path_rec,
- &event.param.ud.ah_attr);
+ ib_init_ah_attr_from_path(id_priv->id.device,
+ id_priv->id.port_num,
+ id_priv->id.route.path_rec,
+ &event.param.ud.ah_attr,
+ rep->sgid_attr);
event.param.ud.qp_num = rep->qpn;
event.param.ud.qkey = rep->qkey;
event.event = RDMA_CM_EVENT_ESTABLISHED;
@@ -3329,18 +4264,18 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
goto out;
}
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ ret = cma_cm_event_handler(id_priv, &event);
+
+ rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
+ destroy_id_handler_unlock(id_priv);
return ret;
}
out:
mutex_unlock(&id_priv->handler_mutex);
- return ret;
+ return 0;
}
static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
@@ -3349,12 +4284,12 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
struct ib_cm_sidr_req_param req;
struct ib_cm_id *id;
void *private_data;
- int offset, ret;
+ u8 offset;
+ int ret;
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
- req.private_data_len = offset + conn_param->private_data_len;
- if (req.private_data_len < conn_param->private_data_len)
+ if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len))
return -EINVAL;
if (req.private_data_len) {
@@ -3385,10 +4320,12 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
id_priv->cm_id.ib = id;
req.path = id_priv->id.route.path_rec;
+ req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
req.max_cm_retries = CMA_MAX_CM_RETRIES;
+ trace_cm_send_sidr_req(id_priv);
ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
if (ret) {
ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -3406,12 +4343,12 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
struct rdma_route *route;
void *private_data;
struct ib_cm_id *id;
- int offset, ret;
+ u8 offset;
+ int ret;
memset(&req, 0, sizeof req);
offset = cma_user_data_offset(id_priv);
- req.private_data_len = offset + conn_param->private_data_len;
- if (req.private_data_len < conn_param->private_data_len)
+ if (check_add_overflow(offset, conn_param->private_data_len, &req.private_data_len))
return -EINVAL;
if (req.private_data_len) {
@@ -3442,9 +4379,13 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
}
req.primary_path = &route->path_rec[0];
- if (route->num_paths == 2)
+ req.primary_path_inbound = route->path_rec_inbound;
+ req.primary_path_outbound = route->path_rec_outbound;
+ if (route->num_pri_alt_paths == 2)
req.alternate_path = &route->path_rec[1];
+ req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
+ /* Alternate path SGID attribute currently unsupported */
req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
req.qp_num = id_priv->qp_num;
req.qp_type = id_priv->id.qp_type;
@@ -3458,7 +4399,10 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
req.max_cm_retries = CMA_MAX_CM_RETRIES;
req.srq = id_priv->srq ? 1 : 0;
+ req.ece.vendor_id = id_priv->ece.vendor_id;
+ req.ece.attr_mod = id_priv->ece.attr_mod;
+ trace_cm_send_req(id_priv);
ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
out:
if (ret && !IS_ERR(id)) {
@@ -3481,7 +4425,11 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
+ mutex_lock(&id_priv->qp_mutex);
cm_id->tos = id_priv->tos;
+ cm_id->tos_set = id_priv->tos_set;
+ mutex_unlock(&id_priv->qp_mutex);
+
id_priv->cm_id.iw = cm_id;
memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
@@ -3512,12 +4460,23 @@ out:
return ret;
}
-int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+/**
+ * rdma_connect_locked - Initiate an active connection request.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
+ *
+ * Same as rdma_connect() but can only be called from the
+ * RDMA_CM_EVENT_ROUTE_RESOLVED handler callback.
+ */
+int rdma_connect_locked(struct rdma_cm_id *id,
+ struct rdma_conn_param *conn_param)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
+ lockdep_assert_held(&id_priv->handler_mutex);
+
if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))
return -EINVAL;
@@ -3531,20 +4490,66 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
ret = cma_resolve_ib_udp(id_priv, conn_param);
else
ret = cma_connect_ib(id_priv, conn_param);
- } else if (rdma_cap_iw_cm(id->device, id->port_num))
+ } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = cma_connect_iw(id_priv, conn_param);
- else
+ } else {
ret = -ENOSYS;
+ }
if (ret)
- goto err;
-
+ goto err_state;
return 0;
-err:
+err_state:
cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);
return ret;
}
+EXPORT_SYMBOL(rdma_connect_locked);
+
+/**
+ * rdma_connect - Initiate an active connection request.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
+ *
+ * Users must have resolved a route for the rdma_cm_id to connect with by having
+ * called rdma_resolve_route before calling this routine.
+ *
+ * This call will either connect to a remote QP or obtain remote QP information
+ * for unconnected rdma_cm_id's. The actual operation is based on the
+ * rdma_cm_id's port space.
+ */
+int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+ int ret;
+
+ mutex_lock(&id_priv->handler_mutex);
+ ret = rdma_connect_locked(id, conn_param);
+ mutex_unlock(&id_priv->handler_mutex);
+ return ret;
+}
EXPORT_SYMBOL(rdma_connect);
+/**
+ * rdma_connect_ece - Initiate an active connection request with ECE data.
+ * @id: Connection identifier to connect.
+ * @conn_param: Connection information used for connected QPs.
+ * @ece: ECE parameters
+ *
+ * See rdma_connect() explanation.
+ */
+int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ struct rdma_ucm_ece *ece)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ id_priv->ece.vendor_id = ece->vendor_id;
+ id_priv->ece.attr_mod = ece->attr_mod;
+
+ return rdma_connect(id, conn_param);
+}
+EXPORT_SYMBOL(rdma_connect_ece);
+
static int cma_accept_ib(struct rdma_id_private *id_priv,
struct rdma_conn_param *conn_param)
{
@@ -3570,7 +4575,10 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
rep.flow_control = conn_param->flow_control;
rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);
rep.srq = id_priv->srq ? 1 : 0;
+ rep.ece.vendor_id = id_priv->ece.vendor_id;
+ rep.ece.attr_mod = id_priv->ece.attr_mod;
+ trace_cm_send_rep(id_priv);
ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
out:
return ret;
@@ -3582,6 +4590,9 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
struct iw_cm_conn_param iw_param;
int ret;
+ if (!conn_param)
+ return -EINVAL;
+
ret = cma_modify_qp_rtr(id_priv, conn_param);
if (ret)
return ret;
@@ -3590,9 +4601,9 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
iw_param.ird = conn_param->responder_resources;
iw_param.private_data = conn_param->private_data;
iw_param.private_data_len = conn_param->private_data_len;
- if (id_priv->id.qp) {
+ if (id_priv->id.qp)
iw_param.qpn = id_priv->qp_num;
- } else
+ else
iw_param.qpn = conn_param->qp_num;
return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
@@ -3608,28 +4619,53 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
memset(&rep, 0, sizeof rep);
rep.status = status;
if (status == IB_SIDR_SUCCESS) {
- ret = cma_set_qkey(id_priv, qkey);
+ if (qkey)
+ ret = cma_set_qkey(id_priv, qkey);
+ else
+ ret = cma_set_default_qkey(id_priv);
if (ret)
return ret;
rep.qp_num = id_priv->qp_num;
rep.qkey = id_priv->qkey;
+
+ rep.ece.vendor_id = id_priv->ece.vendor_id;
+ rep.ece.attr_mod = id_priv->ece.attr_mod;
}
+
rep.private_data = private_data;
rep.private_data_len = private_data_len;
+ trace_cm_send_sidr_rep(id_priv);
return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
}
+/**
+ * rdma_accept - Called to accept a connection request or response.
+ * @id: Connection identifier associated with the request.
+ * @conn_param: Information needed to establish the connection. This must be
+ * provided if accepting a connection request. If accepting a connection
+ * response, this parameter must be NULL.
+ *
+ * Typically, this routine is only called by the listener to accept a connection
+ * request. It must also be called on the active side of a connection if the
+ * user is performing their own QP transitions.
+ *
+ * In the case of error, a reject message is sent to the remote side and the
+ * state of the qp associated with the id is modified to error, such that any
+ * previously posted receive buffers would be flushed.
+ *
+ * This function is for use by kernel ULPs and must be called from under the
+ * handler callback.
+ */
int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
-
- id_priv->owner = task_pid_nr(current);
+ lockdep_assert_held(&id_priv->handler_mutex);
- if (!cma_comp(id_priv, RDMA_CM_CONNECT))
+ if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT)
return -EINVAL;
if (!id->qp && conn_param) {
@@ -3653,22 +4689,53 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
else
ret = cma_rep_recv(id_priv);
}
- } else if (rdma_cap_iw_cm(id->device, id->port_num))
+ } else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = cma_accept_iw(id_priv, conn_param);
- else
+ } else {
ret = -ENOSYS;
-
+ }
if (ret)
goto reject;
return 0;
reject:
cma_modify_qp_err(id_priv);
- rdma_reject(id, NULL, 0);
+ rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED);
return ret;
}
EXPORT_SYMBOL(rdma_accept);
+int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+ struct rdma_ucm_ece *ece)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ id_priv->ece.vendor_id = ece->vendor_id;
+ id_priv->ece.attr_mod = ece->attr_mod;
+
+ return rdma_accept(id, conn_param);
+}
+EXPORT_SYMBOL(rdma_accept_ece);
+
+void rdma_lock_handler(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_lock(&id_priv->handler_mutex);
+}
+EXPORT_SYMBOL(rdma_lock_handler);
+
+void rdma_unlock_handler(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
+
+ mutex_unlock(&id_priv->handler_mutex);
+}
+EXPORT_SYMBOL(rdma_unlock_handler);
+
int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
{
struct rdma_id_private *id_priv;
@@ -3691,7 +4758,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
EXPORT_SYMBOL(rdma_notify);
int rdma_reject(struct rdma_cm_id *id, const void *private_data,
- u8 private_data_len)
+ u8 private_data_len, u8 reason)
{
struct rdma_id_private *id_priv;
int ret;
@@ -3701,18 +4768,20 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
return -EINVAL;
if (rdma_cap_ib_cm(id->device, id->port_num)) {
- if (id->qp_type == IB_QPT_UD)
+ if (id->qp_type == IB_QPT_UD) {
ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
private_data, private_data_len);
- else
- ret = ib_send_cm_rej(id_priv->cm_id.ib,
- IB_CM_REJ_CONSUMER_DEFINED, NULL,
- 0, private_data, private_data_len);
+ } else {
+ trace_cm_send_rej(id_priv);
+ ret = ib_send_cm_rej(id_priv->cm_id.ib, reason, NULL, 0,
+ private_data, private_data_len);
+ }
} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = iw_cm_reject(id_priv->cm_id.iw,
private_data, private_data_len);
- } else
+ } else {
ret = -ENOSYS;
+ }
return ret;
}
@@ -3732,8 +4801,13 @@ int rdma_disconnect(struct rdma_cm_id *id)
if (ret)
goto out;
/* Initiate or respond to a disconnect. */
- if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
- ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
+ trace_cm_disconnect(id_priv);
+ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) {
+ if (!ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0))
+ trace_cm_sent_drep(id_priv);
+ } else {
+ trace_cm_sent_dreq(id_priv);
+ }
} else if (rdma_cap_iw_cm(id->device, id->port_num)) {
ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
} else
@@ -3744,58 +4818,68 @@ out:
}
EXPORT_SYMBOL(rdma_disconnect);
+static void cma_make_mc_event(int status, struct rdma_id_private *id_priv,
+ struct ib_sa_multicast *multicast,
+ struct rdma_cm_event *event,
+ struct cma_multicast *mc)
+{
+ struct rdma_dev_addr *dev_addr;
+ enum ib_gid_type gid_type;
+ struct net_device *ndev;
+
+ if (status)
+ pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
+ status);
+
+ event->status = status;
+ event->param.ud.private_data = mc->context;
+ if (status) {
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ return;
+ }
+
+ dev_addr = &id_priv->id.route.addr.dev_addr;
+ ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+ gid_type =
+ id_priv->cma_dev
+ ->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(
+ id_priv->cma_dev->device)];
+
+ event->event = RDMA_CM_EVENT_MULTICAST_JOIN;
+ if (ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num,
+ &multicast->rec, ndev, gid_type,
+ &event->param.ud.ah_attr)) {
+ event->event = RDMA_CM_EVENT_MULTICAST_ERROR;
+ goto out;
+ }
+
+ event->param.ud.qp_num = 0xFFFFFF;
+ event->param.ud.qkey = id_priv->qkey;
+
+out:
+ dev_put(ndev);
+}
+
static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
{
- struct rdma_id_private *id_priv;
struct cma_multicast *mc = multicast->context;
- struct rdma_cm_event event;
+ struct rdma_id_private *id_priv = mc->id_priv;
+ struct rdma_cm_event event = {};
int ret = 0;
- id_priv = mc->id_priv;
mutex_lock(&id_priv->handler_mutex);
- if (id_priv->state != RDMA_CM_ADDR_BOUND &&
- id_priv->state != RDMA_CM_ADDR_RESOLVED)
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING)
goto out;
- if (!status)
- status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
- mutex_lock(&id_priv->qp_mutex);
- if (!status && id_priv->id.qp)
- status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
- be16_to_cpu(multicast->rec.mlid));
- mutex_unlock(&id_priv->qp_mutex);
-
- memset(&event, 0, sizeof event);
- event.status = status;
- event.param.ud.private_data = mc->context;
- if (!status) {
- struct rdma_dev_addr *dev_addr =
- &id_priv->id.route.addr.dev_addr;
- struct net_device *ndev =
- dev_get_by_index(&init_net, dev_addr->bound_dev_if);
- enum ib_gid_type gid_type =
- id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
- rdma_start_port(id_priv->cma_dev->device)];
-
- event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
- ib_init_ah_from_mcmember(id_priv->id.device,
- id_priv->id.port_num, &multicast->rec,
- ndev, gid_type,
- &event.param.ud.ah_attr);
- event.param.ud.qp_num = 0xFFFFFF;
- event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
- if (ndev)
- dev_put(ndev);
- } else
- event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
-
- ret = id_priv->id.event_handler(&id_priv->id, &event);
- if (ret) {
- cma_exch(id_priv, RDMA_CM_DESTROYING);
- mutex_unlock(&id_priv->handler_mutex);
- rdma_destroy_id(&id_priv->id);
- return 0;
+ ret = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
+ if (!ret) {
+ cma_make_mc_event(status, id_priv, multicast, &event, mc);
+ ret = cma_cm_event_handler(id_priv, &event);
}
+ rdma_destroy_ah_attr(&event.param.ud.ah_attr);
+ WARN_ON(ret);
out:
mutex_unlock(&id_priv->handler_mutex);
@@ -3819,7 +4903,7 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else if (addr->sa_family == AF_IB) {
memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);
- } else if ((addr->sa_family == AF_INET6)) {
+ } else if (addr->sa_family == AF_INET6) {
ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
if (id_priv->id.ps == RDMA_PS_UDP)
mc_map[7] = 0x01; /* Use RDMA CM signature */
@@ -3832,63 +4916,10 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
}
}
-static void cma_query_sa_classport_info_cb(int status,
- struct ib_class_port_info *rec,
- void *context)
-{
- struct class_port_info_context *cb_ctx = context;
-
- WARN_ON(!context);
-
- if (status || !rec) {
- pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n",
- cb_ctx->device->name, cb_ctx->port_num, status);
- goto out;
- }
-
- memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info));
-
-out:
- complete(&cb_ctx->done);
-}
-
-static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num,
- struct ib_class_port_info *class_port_info)
-{
- struct class_port_info_context *cb_ctx;
- int ret;
-
- cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL);
- if (!cb_ctx)
- return -ENOMEM;
-
- cb_ctx->device = device;
- cb_ctx->class_port_info = class_port_info;
- cb_ctx->port_num = port_num;
- init_completion(&cb_ctx->done);
-
- ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num,
- CMA_QUERY_CLASSPORT_INFO_TIMEOUT,
- GFP_KERNEL, cma_query_sa_classport_info_cb,
- cb_ctx, &cb_ctx->sa_query);
- if (ret < 0) {
- pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n",
- device->name, port_num, ret);
- goto out;
- }
-
- wait_for_completion(&cb_ctx->done);
-
-out:
- kfree(cb_ctx);
- return ret;
-}
-
static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
struct ib_sa_mcmember_rec rec;
- struct ib_class_port_info class_port_info;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
ib_sa_comp_mask comp_mask;
int ret;
@@ -3899,9 +4930,11 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
if (ret)
return ret;
- ret = cma_set_qkey(id_priv, 0);
- if (ret)
- return ret;
+ if (!id_priv->qkey) {
+ ret = cma_set_default_qkey(id_priv);
+ if (ret)
+ return ret;
+ }
cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
rec.qkey = cpu_to_be32(id_priv->qkey);
@@ -3909,23 +4942,6 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
rec.join_state = mc->join_state;
- if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) {
- ret = cma_query_sa_classport_info(id_priv->id.device,
- id_priv->id.port_num,
- &class_port_info);
-
- if (ret)
- return ret;
-
- if (!(ib_get_cpi_capmask2(&class_port_info) &
- IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) {
- pr_warn("RDMA CM: %s port %u Unable to multicast join\n"
- "RDMA CM: SM doesn't support Send Only Full Member option\n",
- id_priv->id.device->name, id_priv->id.port_num);
- return -EOPNOTSUPP;
- }
- }
-
comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
@@ -3939,26 +4955,14 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
IB_SA_MCMEMBER_REC_MTU |
IB_SA_MCMEMBER_REC_HOP_LIMIT;
- mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
- id_priv->id.port_num, &rec,
- comp_mask, GFP_KERNEL,
- cma_ib_mc_handler, mc);
- return PTR_ERR_OR_ZERO(mc->multicast.ib);
+ mc->sa_mc = ib_sa_join_multicast(&sa_client, id_priv->id.device,
+ id_priv->id.port_num, &rec, comp_mask,
+ GFP_KERNEL, cma_ib_mc_handler, mc);
+ return PTR_ERR_OR_ZERO(mc->sa_mc);
}
-static void iboe_mcast_work_handler(struct work_struct *work)
-{
- struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
- struct cma_multicast *mc = mw->mc;
- struct ib_sa_multicast *m = mc->multicast.ib;
-
- mc->multicast.ib->context = mc;
- cma_ib_mc_handler(0, m);
- kref_put(&mc->mcref, release_mc);
- kfree(mw);
-}
-
-static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
+ enum ib_gid_type gid_type)
{
struct sockaddr_in *sin = (struct sockaddr_in *)addr;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
@@ -3968,8 +4972,10 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
} else if (addr->sa_family == AF_INET6) {
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else {
- mgid->raw[0] = 0xff;
- mgid->raw[1] = 0x0e;
+ mgid->raw[0] =
+ (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff;
+ mgid->raw[1] =
+ (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e;
mgid->raw[2] = 0;
mgid->raw[3] = 0;
mgid->raw[4] = 0;
@@ -3987,55 +4993,39 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
struct cma_multicast *mc)
{
- struct iboe_mcast_work *work;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
+ struct ib_sa_multicast ib = {};
enum ib_gid_type gid_type;
bool send_only;
send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN);
- if (cma_zero_addr((struct sockaddr *)&mc->addr))
+ if (cma_zero_addr(addr))
return -EINVAL;
- work = kzalloc(sizeof *work, GFP_KERNEL);
- if (!work)
- return -ENOMEM;
-
- mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
- if (!mc->multicast.ib) {
- err = -ENOMEM;
- goto out1;
- }
-
- cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
-
- mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
- if (id_priv->id.ps == RDMA_PS_UDP)
- mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
+ gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+ cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type);
+ ib.rec.pkey = cpu_to_be16(0xffff);
if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
- if (!ndev) {
- err = -ENODEV;
- goto out2;
- }
- mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
- mc->multicast.ib->rec.hop_limit = 1;
- mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+ ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+ if (!ndev)
+ return -ENODEV;
+
+ ib.rec.rate = IB_RATE_PORT_CURRENT;
+ ib.rec.hop_limit = 1;
+ ib.rec.mtu = iboe_get_mtu(ndev->mtu);
- gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
- rdma_start_port(id_priv->cma_dev->device)];
if (addr->sa_family == AF_INET) {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
- mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+ ib.rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
if (!send_only) {
- err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+ err = cma_igmp_send(ndev, &ib.rec.mgid,
true);
- if (!err)
- mc->igmp_joined = true;
}
}
} else {
@@ -4043,67 +5033,69 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
err = -ENOTSUPP;
}
dev_put(ndev);
- if (err || !mc->multicast.ib->rec.mtu) {
- if (!err)
- err = -EINVAL;
- goto out2;
- }
- rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
- &mc->multicast.ib->rec.port_gid);
- work->id = id_priv;
- work->mc = mc;
- INIT_WORK(&work->work, iboe_mcast_work_handler);
- kref_get(&mc->mcref);
- queue_work(cma_wq, &work->work);
+ if (err || !ib.rec.mtu)
+ return err ?: -EINVAL;
- return 0;
+ if (!id_priv->qkey)
+ cma_set_default_qkey(id_priv);
-out2:
- kfree(mc->multicast.ib);
-out1:
- kfree(work);
- return err;
+ rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+ &ib.rec.port_gid);
+ INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler);
+ cma_make_mc_event(0, id_priv, &ib, &mc->iboe_join.event, mc);
+ queue_work(cma_wq, &mc->iboe_join.work);
+ return 0;
}
int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
u8 join_state, void *context)
{
- struct rdma_id_private *id_priv;
+ struct rdma_id_private *id_priv =
+ container_of(id, struct rdma_id_private, id);
struct cma_multicast *mc;
int ret;
- id_priv = container_of(id, struct rdma_id_private, id);
- if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) &&
- !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))
+ /* Not supported for kernel QPs */
+ if (WARN_ON(id->qp))
return -EINVAL;
- mc = kmalloc(sizeof *mc, GFP_KERNEL);
+ /* ULP is calling this wrong. */
+ if (!id->device || (READ_ONCE(id_priv->state) != RDMA_CM_ADDR_BOUND &&
+ READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED))
+ return -EINVAL;
+
+ if (id_priv->id.qp_type != IB_QPT_UD)
+ return -EINVAL;
+
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
if (!mc)
return -ENOMEM;
memcpy(&mc->addr, addr, rdma_addr_size(addr));
mc->context = context;
mc->id_priv = id_priv;
- mc->igmp_joined = false;
mc->join_state = join_state;
- spin_lock(&id_priv->lock);
- list_add(&mc->list, &id_priv->mc_list);
- spin_unlock(&id_priv->lock);
if (rdma_protocol_roce(id->device, id->port_num)) {
- kref_init(&mc->mcref);
ret = cma_iboe_join_multicast(id_priv, mc);
- } else if (rdma_cap_ib_mcast(id->device, id->port_num))
+ if (ret)
+ goto out_err;
+ } else if (rdma_cap_ib_mcast(id->device, id->port_num)) {
ret = cma_join_ib_multicast(id_priv, mc);
- else
+ if (ret)
+ goto out_err;
+ } else {
ret = -ENOSYS;
-
- if (ret) {
- spin_lock_irq(&id_priv->lock);
- list_del(&mc->list);
- spin_unlock_irq(&id_priv->lock);
- kfree(mc);
+ goto out_err;
}
+
+ spin_lock(&id_priv->lock);
+ list_add(&mc->list, &id_priv->mc_list);
+ spin_unlock(&id_priv->lock);
+
+ return 0;
+out_err:
+ kfree(mc);
return ret;
}
EXPORT_SYMBOL(rdma_join_multicast);
@@ -4116,41 +5108,14 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
id_priv = container_of(id, struct rdma_id_private, id);
spin_lock_irq(&id_priv->lock);
list_for_each_entry(mc, &id_priv->mc_list, list) {
- if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
- list_del(&mc->list);
- spin_unlock_irq(&id_priv->lock);
-
- if (id->qp)
- ib_detach_mcast(id->qp,
- &mc->multicast.ib->rec.mgid,
- be16_to_cpu(mc->multicast.ib->rec.mlid));
-
- BUG_ON(id_priv->cma_dev->device != id->device);
-
- if (rdma_cap_ib_mcast(id->device, id->port_num)) {
- ib_sa_free_multicast(mc->multicast.ib);
- kfree(mc);
- } else if (rdma_protocol_roce(id->device, id->port_num)) {
- if (mc->igmp_joined) {
- struct rdma_dev_addr *dev_addr =
- &id->route.addr.dev_addr;
- struct net_device *ndev = NULL;
-
- if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(&init_net,
- dev_addr->bound_dev_if);
- if (ndev) {
- cma_igmp_send(ndev,
- &mc->multicast.ib->rec.mgid,
- false);
- dev_put(ndev);
- }
- mc->igmp_joined = false;
- }
- kref_put(&mc->mcref, release_mc);
- }
- return;
- }
+ if (memcmp(&mc->addr, addr, rdma_addr_size(addr)) != 0)
+ continue;
+ list_del(&mc->list);
+ spin_unlock_irq(&id_priv->lock);
+
+ WARN_ON(id_priv->cma_dev->device != id->device);
+ destroy_mc(id_priv, mc);
+ return;
}
spin_unlock_irq(&id_priv->lock);
}
@@ -4159,7 +5124,7 @@ EXPORT_SYMBOL(rdma_leave_multicast);
static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr;
- struct cma_ndev_work *work;
+ struct cma_work *work;
dev_addr = &id_priv->id.route.addr.dev_addr;
@@ -4172,10 +5137,10 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
if (!work)
return -ENOMEM;
- INIT_WORK(&work->work, cma_ndev_work_handler);
+ INIT_WORK(&work->work, cma_work_handler);
work->id = id_priv;
work->event.event = RDMA_CM_EVENT_ADDR_CHANGE;
- atomic_inc(&id_priv->refcount);
+ cma_id_get(id_priv);
queue_work(cma_wq, &work->work);
}
@@ -4193,12 +5158,12 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
if (event != NETDEV_BONDING_FAILOVER)
return NOTIFY_DONE;
- if (!(ndev->flags & IFF_MASTER) || !(ndev->priv_flags & IFF_BONDING))
+ if (!netif_is_bond_master(ndev))
return NOTIFY_DONE;
mutex_lock(&lock);
list_for_each_entry(cma_dev, &dev_list, list)
- list_for_each_entry(id_priv, &cma_dev->id_list, list) {
+ list_for_each_entry(id_priv, &cma_dev->id_list, device_item) {
ret = cma_netdev_change(ndev, id_priv);
if (ret)
goto out;
@@ -4209,205 +5174,261 @@ out:
return ret;
}
-static struct notifier_block cma_nb = {
- .notifier_call = cma_netdev_callback
-};
-
-static void cma_add_one(struct ib_device *device)
+static void cma_netevent_work_handler(struct work_struct *_work)
{
- struct cma_device *cma_dev;
- struct rdma_id_private *id_priv;
- unsigned int i;
- unsigned long supported_gids = 0;
+ struct rdma_id_private *id_priv =
+ container_of(_work, struct rdma_id_private, id.net_work);
+ struct rdma_cm_event event = {};
- cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
- if (!cma_dev)
- return;
+ mutex_lock(&id_priv->handler_mutex);
- cma_dev->device = device;
- cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
- sizeof(*cma_dev->default_gid_type),
- GFP_KERNEL);
- if (!cma_dev->default_gid_type) {
- kfree(cma_dev);
+ if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING ||
+ READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL)
+ goto out_unlock;
+
+ event.event = RDMA_CM_EVENT_UNREACHABLE;
+ event.status = -ETIMEDOUT;
+
+ if (cma_cm_event_handler(id_priv, &event)) {
+ __acquire(&id_priv->handler_mutex);
+ id_priv->cm_id.ib = NULL;
+ cma_id_put(id_priv);
+ destroy_id_handler_unlock(id_priv);
return;
}
- for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
- supported_gids = roce_gid_type_mask_support(device, i);
- WARN_ON(!supported_gids);
- cma_dev->default_gid_type[i - rdma_start_port(device)] =
- find_first_bit(&supported_gids, BITS_PER_LONG);
- }
- init_completion(&cma_dev->comp);
- atomic_set(&cma_dev->refcount, 1);
- INIT_LIST_HEAD(&cma_dev->id_list);
- ib_set_client_data(device, &cma_client, cma_dev);
-
- mutex_lock(&lock);
- list_add_tail(&cma_dev->list, &dev_list);
- list_for_each_entry(id_priv, &listen_any_list, list)
- cma_listen_on_dev(id_priv, cma_dev);
- mutex_unlock(&lock);
+out_unlock:
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_id_put(id_priv);
}
-static int cma_remove_id_dev(struct rdma_id_private *id_priv)
+static int cma_netevent_callback(struct notifier_block *self,
+ unsigned long event, void *ctx)
{
- struct rdma_cm_event event;
- enum rdma_cm_state state;
- int ret = 0;
+ struct id_table_entry *ips_node = NULL;
+ struct rdma_id_private *current_id;
+ struct neighbour *neigh = ctx;
+ unsigned long flags;
- /* Record that we want to remove the device */
- state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL);
- if (state == RDMA_CM_DESTROYING)
- return 0;
+ if (event != NETEVENT_NEIGH_UPDATE)
+ return NOTIFY_DONE;
- cma_cancel_operation(id_priv, state);
- mutex_lock(&id_priv->handler_mutex);
+ spin_lock_irqsave(&id_table_lock, flags);
+ if (neigh->tbl->family == AF_INET6) {
+ struct sockaddr_in6 neigh_sock_6;
+
+ neigh_sock_6.sin6_family = AF_INET6;
+ neigh_sock_6.sin6_addr = *(struct in6_addr *)neigh->primary_key;
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_6);
+ } else if (neigh->tbl->family == AF_INET) {
+ struct sockaddr_in neigh_sock_4;
+
+ neigh_sock_4.sin_family = AF_INET;
+ neigh_sock_4.sin_addr.s_addr = *(__be32 *)(neigh->primary_key);
+ ips_node = node_from_ndev_ip(&id_table, neigh->dev->ifindex,
+ (struct sockaddr *)&neigh_sock_4);
+ } else
+ goto out;
- /* Check for destruction from another callback. */
- if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
+ if (!ips_node)
goto out;
- memset(&event, 0, sizeof event);
- event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
- ret = id_priv->id.event_handler(&id_priv->id, &event);
+ list_for_each_entry(current_id, &ips_node->id_list, id_list_entry) {
+ if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
+ neigh->ha, ETH_ALEN))
+ continue;
+ cma_id_get(current_id);
+ if (!queue_work(cma_wq, &current_id->id.net_work))
+ cma_id_put(current_id);
+ }
out:
+ spin_unlock_irqrestore(&id_table_lock, flags);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block cma_nb = {
+ .notifier_call = cma_netdev_callback
+};
+
+static struct notifier_block cma_netevent_cb = {
+ .notifier_call = cma_netevent_callback
+};
+
+static void cma_send_device_removal_put(struct rdma_id_private *id_priv)
+{
+ struct rdma_cm_event event = { .event = RDMA_CM_EVENT_DEVICE_REMOVAL };
+ enum rdma_cm_state state;
+ unsigned long flags;
+
+ mutex_lock(&id_priv->handler_mutex);
+ /* Record that we want to remove the device */
+ spin_lock_irqsave(&id_priv->lock, flags);
+ state = id_priv->state;
+ if (state == RDMA_CM_DESTROYING || state == RDMA_CM_DEVICE_REMOVAL) {
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+ mutex_unlock(&id_priv->handler_mutex);
+ cma_id_put(id_priv);
+ return;
+ }
+ id_priv->state = RDMA_CM_DEVICE_REMOVAL;
+ spin_unlock_irqrestore(&id_priv->lock, flags);
+
+ if (cma_cm_event_handler(id_priv, &event)) {
+ /*
+ * At this point the ULP promises it won't call
+ * rdma_destroy_id() concurrently
+ */
+ cma_id_put(id_priv);
+ mutex_unlock(&id_priv->handler_mutex);
+ trace_cm_id_destroy(id_priv);
+ _destroy_id(id_priv, state);
+ return;
+ }
mutex_unlock(&id_priv->handler_mutex);
- return ret;
+
+ /*
+ * If this races with destroy then the thread that first assigns state
+ * to a destroying does the cancel.
+ */
+ cma_cancel_operation(id_priv, state);
+ cma_id_put(id_priv);
}
static void cma_process_remove(struct cma_device *cma_dev)
{
- struct rdma_id_private *id_priv;
- int ret;
-
mutex_lock(&lock);
while (!list_empty(&cma_dev->id_list)) {
- id_priv = list_entry(cma_dev->id_list.next,
- struct rdma_id_private, list);
+ struct rdma_id_private *id_priv = list_first_entry(
+ &cma_dev->id_list, struct rdma_id_private, device_item);
- list_del(&id_priv->listen_list);
- list_del_init(&id_priv->list);
- atomic_inc(&id_priv->refcount);
+ list_del_init(&id_priv->listen_item);
+ list_del_init(&id_priv->device_item);
+ cma_id_get(id_priv);
mutex_unlock(&lock);
- ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
- cma_deref_id(id_priv);
- if (ret)
- rdma_destroy_id(&id_priv->id);
+ cma_send_device_removal_put(id_priv);
mutex_lock(&lock);
}
mutex_unlock(&lock);
- cma_deref_dev(cma_dev);
+ cma_dev_put(cma_dev);
wait_for_completion(&cma_dev->comp);
}
-static void cma_remove_one(struct ib_device *device, void *client_data)
+static bool cma_supported(struct ib_device *device)
{
- struct cma_device *cma_dev = client_data;
-
- if (!cma_dev)
- return;
+ u32 i;
- mutex_lock(&lock);
- list_del(&cma_dev->list);
- mutex_unlock(&lock);
-
- cma_process_remove(cma_dev);
- kfree(cma_dev->default_gid_type);
- kfree(cma_dev);
+ rdma_for_each_port(device, i) {
+ if (rdma_cap_ib_cm(device, i) || rdma_cap_iw_cm(device, i))
+ return true;
+ }
+ return false;
}
-static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
+static int cma_add_one(struct ib_device *device)
{
- struct nlmsghdr *nlh;
- struct rdma_cm_id_stats *id_stats;
- struct rdma_id_private *id_priv;
- struct rdma_cm_id *id = NULL;
+ struct rdma_id_private *to_destroy;
struct cma_device *cma_dev;
- int i_dev = 0, i_id = 0;
-
- /*
- * We export all of the IDs as a sequence of messages. Each
- * ID gets its own netlink message.
- */
- mutex_lock(&lock);
+ struct rdma_id_private *id_priv;
+ unsigned long supported_gids = 0;
+ int ret;
+ u32 i;
- list_for_each_entry(cma_dev, &dev_list, list) {
- if (i_dev < cb->args[0]) {
- i_dev++;
- continue;
- }
+ if (!cma_supported(device))
+ return -EOPNOTSUPP;
- i_id = 0;
- list_for_each_entry(id_priv, &cma_dev->id_list, list) {
- if (i_id < cb->args[1]) {
- i_id++;
- continue;
- }
+ cma_dev = kmalloc(sizeof(*cma_dev), GFP_KERNEL);
+ if (!cma_dev)
+ return -ENOMEM;
- id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,
- sizeof *id_stats, RDMA_NL_RDMA_CM,
- RDMA_NL_RDMA_CM_ID_STATS,
- NLM_F_MULTI);
- if (!id_stats)
- goto out;
+ cma_dev->device = device;
+ cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
+ sizeof(*cma_dev->default_gid_type),
+ GFP_KERNEL);
+ if (!cma_dev->default_gid_type) {
+ ret = -ENOMEM;
+ goto free_cma_dev;
+ }
- memset(id_stats, 0, sizeof *id_stats);
- id = &id_priv->id;
- id_stats->node_type = id->route.addr.dev_addr.dev_type;
- id_stats->port_num = id->port_num;
- id_stats->bound_dev_if =
- id->route.addr.dev_addr.bound_dev_if;
-
- if (ibnl_put_attr(skb, nlh,
- rdma_addr_size(cma_src_addr(id_priv)),
- cma_src_addr(id_priv),
- RDMA_NL_RDMA_CM_ATTR_SRC_ADDR))
- goto out;
- if (ibnl_put_attr(skb, nlh,
- rdma_addr_size(cma_src_addr(id_priv)),
- cma_dst_addr(id_priv),
- RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
- goto out;
+ cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
+ sizeof(*cma_dev->default_roce_tos),
+ GFP_KERNEL);
+ if (!cma_dev->default_roce_tos) {
+ ret = -ENOMEM;
+ goto free_gid_type;
+ }
- id_stats->pid = id_priv->owner;
- id_stats->port_space = id->ps;
- id_stats->cm_state = id_priv->state;
- id_stats->qp_num = id_priv->qp_num;
- id_stats->qp_type = id->qp_type;
+ rdma_for_each_port (device, i) {
+ supported_gids = roce_gid_type_mask_support(device, i);
+ WARN_ON(!supported_gids);
+ if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE))
+ cma_dev->default_gid_type[i - rdma_start_port(device)] =
+ CMA_PREFERRED_ROCE_GID_TYPE;
+ else
+ cma_dev->default_gid_type[i - rdma_start_port(device)] =
+ find_first_bit(&supported_gids, BITS_PER_LONG);
+ cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0;
+ }
- i_id++;
- }
+ init_completion(&cma_dev->comp);
+ refcount_set(&cma_dev->refcount, 1);
+ INIT_LIST_HEAD(&cma_dev->id_list);
+ ib_set_client_data(device, &cma_client, cma_dev);
- cb->args[1] = 0;
- i_dev++;
+ mutex_lock(&lock);
+ list_add_tail(&cma_dev->list, &dev_list);
+ list_for_each_entry(id_priv, &listen_any_list, listen_any_item) {
+ ret = cma_listen_on_dev(id_priv, cma_dev, &to_destroy);
+ if (ret)
+ goto free_listen;
}
+ mutex_unlock(&lock);
-out:
+ trace_cm_add_one(device);
+ return 0;
+
+free_listen:
+ list_del(&cma_dev->list);
mutex_unlock(&lock);
- cb->args[0] = i_dev;
- cb->args[1] = i_id;
- return skb->len;
+ /* cma_process_remove() will delete to_destroy */
+ cma_process_remove(cma_dev);
+ kfree(cma_dev->default_roce_tos);
+free_gid_type:
+ kfree(cma_dev->default_gid_type);
+
+free_cma_dev:
+ kfree(cma_dev);
+ return ret;
}
-static const struct ibnl_client_cbs cma_cb_table[] = {
- [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats,
- .module = THIS_MODULE },
-};
+static void cma_remove_one(struct ib_device *device, void *client_data)
+{
+ struct cma_device *cma_dev = client_data;
+
+ trace_cm_remove_one(device);
+
+ mutex_lock(&lock);
+ list_del(&cma_dev->list);
+ mutex_unlock(&lock);
+
+ cma_process_remove(cma_dev);
+ kfree(cma_dev->default_roce_tos);
+ kfree(cma_dev->default_gid_type);
+ kfree(cma_dev);
+}
static int cma_init_net(struct net *net)
{
struct cma_pernet *pernet = cma_pernet(net);
- idr_init(&pernet->tcp_ps);
- idr_init(&pernet->udp_ps);
- idr_init(&pernet->ipoib_ps);
- idr_init(&pernet->ib_ps);
+ xa_init(&pernet->tcp_ps);
+ xa_init(&pernet->udp_ps);
+ xa_init(&pernet->ipoib_ps);
+ xa_init(&pernet->ib_ps);
return 0;
}
@@ -4416,10 +5437,10 @@ static void cma_exit_net(struct net *net)
{
struct cma_pernet *pernet = cma_pernet(net);
- idr_destroy(&pernet->tcp_ps);
- idr_destroy(&pernet->udp_ps);
- idr_destroy(&pernet->ipoib_ps);
- idr_destroy(&pernet->ib_ps);
+ WARN_ON(!xa_empty(&pernet->tcp_ps));
+ WARN_ON(!xa_empty(&pernet->udp_ps));
+ WARN_ON(!xa_empty(&pernet->ipoib_ps));
+ WARN_ON(!xa_empty(&pernet->ib_ps));
}
static struct pernet_operations cma_pernet_operations = {
@@ -4433,6 +5454,19 @@ static int __init cma_init(void)
{
int ret;
+ /*
+ * There is a rare lock ordering dependency in cma_netdev_callback()
+ * that only happens when bonding is enabled. Teach lockdep that rtnl
+ * must never be nested under lock so it can find these without having
+ * to test with bonding.
+ */
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ rtnl_lock();
+ mutex_lock(&lock);
+ mutex_unlock(&lock);
+ rtnl_unlock();
+ }
+
cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM);
if (!cma_wq)
return -ENOMEM;
@@ -4442,24 +5476,26 @@ static int __init cma_init(void)
goto err_wq;
ib_sa_register_client(&sa_client);
- rdma_addr_register_client(&addr_client);
register_netdevice_notifier(&cma_nb);
+ register_netevent_notifier(&cma_netevent_cb);
ret = ib_register_client(&cma_client);
if (ret)
goto err;
- if (ibnl_add_client(RDMA_NL_RDMA_CM, ARRAY_SIZE(cma_cb_table),
- cma_cb_table))
- pr_warn("RDMA CMA: failed to add netlink callback\n");
- cma_configfs_init();
+ ret = cma_configfs_init();
+ if (ret)
+ goto err_ib;
return 0;
+err_ib:
+ ib_unregister_client(&cma_client);
err:
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
- rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
+ unregister_pernet_subsys(&cma_pernet_operations);
err_wq:
destroy_workqueue(cma_wq);
return ret;
@@ -4468,10 +5504,9 @@ err_wq:
static void __exit cma_cleanup(void)
{
cma_configfs_exit();
- ibnl_remove_client(RDMA_NL_RDMA_CM);
ib_unregister_client(&cma_client);
+ unregister_netevent_notifier(&cma_netevent_cb);
unregister_netdevice_notifier(&cma_nb);
- rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
destroy_workqueue(cma_wq);
@@ -4479,3 +5514,129 @@ static void __exit cma_cleanup(void)
module_init(cma_init);
module_exit(cma_cleanup);
+
+static void cma_query_ib_service_handler(int status,
+ struct sa_service_rec *recs,
+ unsigned int num_recs, void *context)
+{
+ struct cma_work *work = context;
+ struct rdma_id_private *id_priv = work->id;
+ struct sockaddr_ib *addr;
+
+ if (status)
+ goto fail;
+
+ if (!num_recs) {
+ status = -ENOENT;
+ goto fail;
+ }
+
+ if (id_priv->id.route.service_recs) {
+ status = -EALREADY;
+ goto fail;
+ }
+
+ id_priv->id.route.service_recs =
+ kmalloc_array(num_recs, sizeof(*recs), GFP_KERNEL);
+ if (!id_priv->id.route.service_recs) {
+ status = -ENOMEM;
+ goto fail;
+ }
+
+ id_priv->id.route.num_service_recs = num_recs;
+ memcpy(id_priv->id.route.service_recs, recs, sizeof(*recs) * num_recs);
+
+ addr = (struct sockaddr_ib *)&id_priv->id.route.addr.dst_addr;
+ addr->sib_family = AF_IB;
+ addr->sib_addr = *(struct ib_addr *)&recs->gid;
+ addr->sib_pkey = recs->pkey;
+ addr->sib_sid = recs->id;
+ rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr,
+ (union ib_gid *)&addr->sib_addr);
+ ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr,
+ ntohs(addr->sib_pkey));
+
+ queue_work(cma_wq, &work->work);
+ return;
+
+fail:
+ work->old_state = RDMA_CM_ADDRINFO_QUERY;
+ work->new_state = RDMA_CM_ADDR_BOUND;
+ work->event.event = RDMA_CM_EVENT_ADDRINFO_ERROR;
+ work->event.status = status;
+ pr_debug_ratelimited(
+ "RDMA CM: SERVICE_ERROR: failed to query service record. status %d\n",
+ status);
+ queue_work(cma_wq, &work->work);
+}
+
+static int cma_resolve_ib_service(struct rdma_id_private *id_priv,
+ struct rdma_ucm_ib_service *ibs)
+{
+ struct sa_service_rec sr = {};
+ ib_sa_comp_mask mask = 0;
+ struct cma_work *work;
+
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ cma_id_get(id_priv);
+
+ work->id = id_priv;
+ INIT_WORK(&work->work, cma_work_handler);
+ work->old_state = RDMA_CM_ADDRINFO_QUERY;
+ work->new_state = RDMA_CM_ADDRINFO_RESOLVED;
+ work->event.event = RDMA_CM_EVENT_ADDRINFO_RESOLVED;
+
+ if (ibs->flags & RDMA_USER_CM_IB_SERVICE_FLAG_ID) {
+ sr.id = cpu_to_be64(ibs->service_id);
+ mask |= IB_SA_SERVICE_REC_SERVICE_ID;
+ }
+ if (ibs->flags & RDMA_USER_CM_IB_SERVICE_FLAG_NAME) {
+ strscpy(sr.name, ibs->service_name, sizeof(sr.name));
+ mask |= IB_SA_SERVICE_REC_SERVICE_NAME;
+ }
+
+ id_priv->query_id = ib_sa_service_rec_get(&sa_client,
+ id_priv->id.device,
+ id_priv->id.port_num,
+ &sr, mask,
+ 2000, GFP_KERNEL,
+ cma_query_ib_service_handler,
+ work, &id_priv->query);
+
+ if (id_priv->query_id < 0) {
+ cma_id_put(id_priv);
+ kfree(work);
+ return id_priv->query_id;
+ }
+
+ return 0;
+}
+
+int rdma_resolve_ib_service(struct rdma_cm_id *id,
+ struct rdma_ucm_ib_service *ibs)
+{
+ struct rdma_id_private *id_priv;
+ int ret;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (!id_priv->cma_dev ||
+ !cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDRINFO_QUERY))
+ return -EINVAL;
+
+ if (rdma_cap_ib_sa(id->device, id->port_num))
+ ret = cma_resolve_ib_service(id_priv, ibs);
+ else
+ ret = -EOPNOTSUPP;
+
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ cma_comp_exch(id_priv, RDMA_CM_ADDRINFO_QUERY, RDMA_CM_ADDR_BOUND);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_resolve_ib_service);
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 41573df1d9fc..f2fb2d8a6597 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -30,17 +30,19 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/configfs.h>
#include <rdma/ib_verbs.h>
+#include <rdma/rdma_cm.h>
+
#include "core_priv.h"
+#include "cma_priv.h"
struct cma_device;
struct cma_dev_group;
struct cma_dev_port_group {
- unsigned int port_num;
+ u32 port_num;
struct cma_dev_group *cma_dev_group;
struct config_group group;
};
@@ -65,7 +67,7 @@ static struct cma_dev_port_group *to_dev_port_group(struct config_item *item)
static bool filter_by_name(struct ib_device *ib_dev, void *cookie)
{
- return !strcmp(ib_dev->name, cookie);
+ return !strcmp(dev_name(&ib_dev->dev), cookie);
}
static int cma_configfs_params_get(struct config_item *item,
@@ -91,7 +93,7 @@ static int cma_configfs_params_get(struct config_item *item,
static void cma_configfs_params_put(struct cma_device *cma_dev)
{
- cma_deref_dev(cma_dev);
+ cma_dev_put(cma_dev);
}
static ssize_t default_roce_mode_show(struct config_item *item,
@@ -112,7 +114,7 @@ static ssize_t default_roce_mode_show(struct config_item *item,
if (gid_type < 0)
return gid_type;
- return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type));
+ return sysfs_emit(buf, "%s\n", ib_cache_gid_type_str(gid_type));
}
static ssize_t default_roce_mode_store(struct config_item *item,
@@ -120,16 +122,19 @@ static ssize_t default_roce_mode_store(struct config_item *item,
{
struct cma_device *cma_dev;
struct cma_dev_port_group *group;
- int gid_type = ib_cache_gid_parse_type_str(buf);
+ int gid_type;
ssize_t ret;
- if (gid_type < 0)
- return -EINVAL;
-
ret = cma_configfs_params_get(item, &cma_dev, &group);
if (ret)
return ret;
+ gid_type = ib_cache_gid_parse_type_str(buf);
+ if (gid_type < 0) {
+ cma_configfs_params_put(cma_dev);
+ return -EINVAL;
+ }
+
ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
cma_configfs_params_put(cma_dev);
@@ -139,12 +144,54 @@ static ssize_t default_roce_mode_store(struct config_item *item,
CONFIGFS_ATTR(, default_roce_mode);
+static ssize_t default_roce_tos_show(struct config_item *item, char *buf)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ ssize_t ret;
+ u8 tos;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ tos = cma_get_default_roce_tos(cma_dev, group->port_num);
+ cma_configfs_params_put(cma_dev);
+
+ return sysfs_emit(buf, "%u\n", tos);
+}
+
+static ssize_t default_roce_tos_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ ssize_t ret;
+ u8 tos;
+
+ ret = kstrtou8(buf, 0, &tos);
+ if (ret)
+ return ret;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ ret = cma_set_default_roce_tos(cma_dev, group->port_num, tos);
+ cma_configfs_params_put(cma_dev);
+
+ return ret ? ret : strnlen(buf, count);
+}
+
+CONFIGFS_ATTR(, default_roce_tos);
+
static struct configfs_attribute *cma_configfs_attributes[] = {
&attr_default_roce_mode,
+ &attr_default_roce_tos,
NULL,
};
-static struct config_item_type cma_port_group_type = {
+static const struct config_item_type cma_port_group_type = {
.ct_attrs = cma_configfs_attributes,
.ct_owner = THIS_MODULE
};
@@ -152,11 +199,10 @@ static struct config_item_type cma_port_group_type = {
static int make_cma_ports(struct cma_dev_group *cma_dev_group,
struct cma_device *cma_dev)
{
- struct ib_device *ibdev;
- unsigned int i;
- unsigned int ports_num;
struct cma_dev_port_group *ports;
- int err;
+ struct ib_device *ibdev;
+ u32 ports_num;
+ u32 i;
ibdev = cma_get_ib_dev(cma_dev);
@@ -167,13 +213,11 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
GFP_KERNEL);
- if (!ports) {
- err = -ENOMEM;
- goto free;
- }
+ if (!ports)
+ return -ENOMEM;
for (i = 0; i < ports_num; i++) {
- char port_str[10];
+ char port_str[11];
ports[i].port_num = i + 1;
snprintf(port_str, sizeof(port_str), "%u", i + 1);
@@ -186,12 +230,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
}
cma_dev_group->ports = ports;
-
return 0;
-free:
- kfree(ports);
- cma_dev_group->ports = NULL;
- return err;
}
static void release_cma_dev(struct config_item *item)
@@ -221,7 +260,7 @@ static struct configfs_item_operations cma_ports_item_ops = {
.release = release_cma_ports_group
};
-static struct config_item_type cma_ports_group_type = {
+static const struct config_item_type cma_ports_group_type = {
.ct_item_ops = &cma_ports_item_ops,
.ct_owner = THIS_MODULE
};
@@ -230,7 +269,7 @@ static struct configfs_item_operations cma_device_item_ops = {
.release = release_cma_dev
};
-static struct config_item_type cma_device_group_type = {
+static const struct config_item_type cma_device_group_type = {
.ct_item_ops = &cma_device_item_ops,
.ct_owner = THIS_MODULE
};
@@ -253,7 +292,7 @@ static struct config_group *make_cma_dev(struct config_group *group,
goto fail;
}
- strncpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
+ strscpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
config_group_init_type_name(&cma_dev_group->ports_group, "ports",
&cma_ports_group_type);
@@ -267,21 +306,34 @@ static struct config_group *make_cma_dev(struct config_group *group,
configfs_add_default_group(&cma_dev_group->ports_group,
&cma_dev_group->device_group);
- cma_deref_dev(cma_dev);
+ cma_dev_put(cma_dev);
return &cma_dev_group->device_group;
fail:
if (cma_dev)
- cma_deref_dev(cma_dev);
+ cma_dev_put(cma_dev);
kfree(cma_dev_group);
return ERR_PTR(err);
}
+static void drop_cma_dev(struct config_group *cgroup, struct config_item *item)
+{
+ struct config_group *group =
+ container_of(item, struct config_group, cg_item);
+ struct cma_dev_group *cma_dev_group =
+ container_of(group, struct cma_dev_group, device_group);
+
+ configfs_remove_default_groups(&cma_dev_group->ports_group);
+ configfs_remove_default_groups(&cma_dev_group->device_group);
+ config_item_put(item);
+}
+
static struct configfs_group_operations cma_subsys_group_ops = {
.make_group = make_cma_dev,
+ .drop_item = drop_cma_dev,
};
-static struct config_item_type cma_subsys_type = {
+static const struct config_item_type cma_subsys_type = {
.ct_group_ops = &cma_subsys_group_ops,
.ct_owner = THIS_MODULE,
};
@@ -297,12 +349,18 @@ static struct configfs_subsystem cma_subsys = {
int __init cma_configfs_init(void)
{
+ int ret;
+
config_group_init(&cma_subsys.su_group);
mutex_init(&cma_subsys.su_mutex);
- return configfs_register_subsystem(&cma_subsys);
+ ret = configfs_register_subsystem(&cma_subsys);
+ if (ret)
+ mutex_destroy(&cma_subsys.su_mutex);
+ return ret;
}
void __exit cma_configfs_exit(void)
{
configfs_unregister_subsystem(&cma_subsys);
+ mutex_destroy(&cma_subsys.su_mutex);
}
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
new file mode 100644
index 000000000000..c604b601f4d9
--- /dev/null
+++ b/drivers/infiniband/core/cma_priv.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc. All rights reserved.
+ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _CMA_PRIV_H
+#define _CMA_PRIV_H
+
+enum rdma_cm_state {
+ RDMA_CM_IDLE,
+ RDMA_CM_ADDR_QUERY,
+ RDMA_CM_ADDR_RESOLVED,
+ RDMA_CM_ROUTE_QUERY,
+ RDMA_CM_ROUTE_RESOLVED,
+ RDMA_CM_CONNECT,
+ RDMA_CM_DISCONNECT,
+ RDMA_CM_ADDR_BOUND,
+ RDMA_CM_LISTEN,
+ RDMA_CM_DEVICE_REMOVAL,
+ RDMA_CM_DESTROYING,
+ RDMA_CM_ADDRINFO_QUERY,
+ RDMA_CM_ADDRINFO_RESOLVED
+};
+
+struct rdma_id_private {
+ struct rdma_cm_id id;
+
+ struct rdma_bind_list *bind_list;
+ struct hlist_node node;
+ union {
+ struct list_head device_item; /* On cma_device->id_list */
+ struct list_head listen_any_item; /* On listen_any_list */
+ };
+ union {
+ /* On rdma_id_private->listen_list */
+ struct list_head listen_item;
+ struct list_head listen_list;
+ };
+ struct list_head id_list_entry;
+ struct cma_device *cma_dev;
+ struct list_head mc_list;
+
+ int internal_id;
+ enum rdma_cm_state state;
+ spinlock_t lock;
+ struct mutex qp_mutex;
+
+ struct completion comp;
+ refcount_t refcount;
+ struct mutex handler_mutex;
+
+ int backlog;
+ int timeout_ms;
+ struct ib_sa_query *query;
+ int query_id;
+ union {
+ struct ib_cm_id *ib;
+ struct iw_cm_id *iw;
+ } cm_id;
+
+ u32 seq_num;
+ u32 qkey;
+ u32 qp_num;
+ u32 options;
+ u8 srq;
+ u8 tos;
+ u8 tos_set:1;
+ u8 timeout_set:1;
+ u8 min_rnr_timer_set:1;
+ u8 reuseaddr;
+ u8 afonly;
+ u8 timeout;
+ u8 min_rnr_timer;
+ u8 used_resolve_ip;
+ enum ib_gid_type gid_type;
+
+ /*
+ * Internal to RDMA/core, don't use in the drivers
+ */
+ struct rdma_restrack_entry res;
+ struct rdma_ucm_ece ece;
+};
+
+#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
+int cma_configfs_init(void);
+void cma_configfs_exit(void);
+#else
+static inline int cma_configfs_init(void)
+{
+ return 0;
+}
+
+static inline void cma_configfs_exit(void)
+{
+}
+#endif
+
+void cma_dev_get(struct cma_device *dev);
+void cma_dev_put(struct cma_device *dev);
+typedef bool (*cma_device_filter)(struct ib_device *, void *);
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+ void *cookie);
+int cma_get_default_gid_type(struct cma_device *dev, u32 port);
+int cma_set_default_gid_type(struct cma_device *dev, u32 port,
+ enum ib_gid_type default_gid_type);
+int cma_get_default_roce_tos(struct cma_device *dev, u32 port);
+int cma_set_default_roce_tos(struct cma_device *dev, u32 port,
+ u8 default_roce_tos);
+struct ib_device *cma_get_ib_dev(struct cma_device *dev);
+
+#endif /* _CMA_PRIV_H */
diff --git a/drivers/infiniband/core/cma_trace.c b/drivers/infiniband/core/cma_trace.c
new file mode 100644
index 000000000000..b314a281e10e
--- /dev/null
+++ b/drivers/infiniband/core/cma_trace.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for the RDMA Connection Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define CREATE_TRACE_POINTS
+
+#include <rdma/rdma_cm.h>
+#include <rdma/ib_cm.h>
+#include "cma_priv.h"
+
+#include "cma_trace.h"
diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h
new file mode 100644
index 000000000000..3456d5f3aa47
--- /dev/null
+++ b/drivers/infiniband/core/cma_trace.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace point definitions for the RDMA Connect Manager.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rdma_cma
+
+#if !defined(_TRACE_RDMA_CMA_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _TRACE_RDMA_CMA_H
+
+#include <linux/tracepoint.h>
+#include <trace/misc/rdma.h>
+
+
+DECLARE_EVENT_CLASS(cma_fsm_class,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv
+ ),
+
+ TP_ARGS(id_priv),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, tos)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->tos = id_priv->tos;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos
+ )
+);
+
+#define DEFINE_CMA_FSM_EVENT(name) \
+ DEFINE_EVENT(cma_fsm_class, cm_##name, \
+ TP_PROTO( \
+ const struct rdma_id_private *id_priv \
+ ), \
+ TP_ARGS(id_priv))
+
+DEFINE_CMA_FSM_EVENT(send_rtu);
+DEFINE_CMA_FSM_EVENT(send_rej);
+DEFINE_CMA_FSM_EVENT(prepare_mra);
+DEFINE_CMA_FSM_EVENT(send_sidr_req);
+DEFINE_CMA_FSM_EVENT(send_sidr_rep);
+DEFINE_CMA_FSM_EVENT(disconnect);
+DEFINE_CMA_FSM_EVENT(sent_drep);
+DEFINE_CMA_FSM_EVENT(sent_dreq);
+DEFINE_CMA_FSM_EVENT(id_destroy);
+
+TRACE_EVENT(cm_id_attach,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv,
+ const struct ib_device *device
+ ),
+
+ TP_ARGS(id_priv, device),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ __string(devname, device->name)
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ __assign_str(devname);
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc device=%s",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr,
+ __get_str(devname)
+ )
+);
+
+DECLARE_EVENT_CLASS(cma_qp_class,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv
+ ),
+
+ TP_ARGS(id_priv),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, tos)
+ __field(u32, qp_num)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->tos = id_priv->tos;
+ __entry->qp_num = id_priv->qp_num;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u qp_num=%u",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+ __entry->qp_num
+ )
+);
+
+#define DEFINE_CMA_QP_EVENT(name) \
+ DEFINE_EVENT(cma_qp_class, cm_##name, \
+ TP_PROTO( \
+ const struct rdma_id_private *id_priv \
+ ), \
+ TP_ARGS(id_priv))
+
+DEFINE_CMA_QP_EVENT(send_req);
+DEFINE_CMA_QP_EVENT(send_rep);
+DEFINE_CMA_QP_EVENT(qp_destroy);
+
+/*
+ * enum ib_wp_type, from include/rdma/ib_verbs.h
+ */
+#define IB_QP_TYPE_LIST \
+ ib_qp_type(SMI) \
+ ib_qp_type(GSI) \
+ ib_qp_type(RC) \
+ ib_qp_type(UC) \
+ ib_qp_type(UD) \
+ ib_qp_type(RAW_IPV6) \
+ ib_qp_type(RAW_ETHERTYPE) \
+ ib_qp_type(RAW_PACKET) \
+ ib_qp_type(XRC_INI) \
+ ib_qp_type_end(XRC_TGT)
+
+#undef ib_qp_type
+#undef ib_qp_type_end
+
+#define ib_qp_type(x) TRACE_DEFINE_ENUM(IB_QPT_##x);
+#define ib_qp_type_end(x) TRACE_DEFINE_ENUM(IB_QPT_##x);
+
+IB_QP_TYPE_LIST
+
+#undef ib_qp_type
+#undef ib_qp_type_end
+
+#define ib_qp_type(x) { IB_QPT_##x, #x },
+#define ib_qp_type_end(x) { IB_QPT_##x, #x }
+
+#define rdma_show_qp_type(x) \
+ __print_symbolic(x, IB_QP_TYPE_LIST)
+
+
+TRACE_EVENT(cm_qp_create,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv,
+ const struct ib_pd *pd,
+ const struct ib_qp_init_attr *qp_init_attr,
+ int rc
+ ),
+
+ TP_ARGS(id_priv, pd, qp_init_attr, rc),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, pd_id)
+ __field(u32, tos)
+ __field(u32, qp_num)
+ __field(u32, send_wr)
+ __field(u32, recv_wr)
+ __field(int, rc)
+ __field(unsigned long, qp_type)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->pd_id = pd->res.id;
+ __entry->tos = id_priv->tos;
+ __entry->send_wr = qp_init_attr->cap.max_send_wr;
+ __entry->recv_wr = qp_init_attr->cap.max_recv_wr;
+ __entry->rc = rc;
+ if (!rc) {
+ __entry->qp_num = id_priv->qp_num;
+ __entry->qp_type = id_priv->id.qp_type;
+ } else {
+ __entry->qp_num = 0;
+ __entry->qp_type = 0;
+ }
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u pd.id=%u qp_type=%s"
+ " send_wr=%u recv_wr=%u qp_num=%u rc=%d",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr,
+ __entry->tos, __entry->pd_id,
+ rdma_show_qp_type(__entry->qp_type), __entry->send_wr,
+ __entry->recv_wr, __entry->qp_num, __entry->rc
+ )
+);
+
+TRACE_EVENT(cm_req_handler,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv,
+ int event
+ ),
+
+ TP_ARGS(id_priv, event),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, tos)
+ __field(unsigned long, event)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->tos = id_priv->tos;
+ __entry->event = event;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s (%lu)",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+ rdma_show_ib_cm_event(__entry->event), __entry->event
+ )
+);
+
+TRACE_EVENT(cm_event_handler,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv,
+ const struct rdma_cm_event *event
+ ),
+
+ TP_ARGS(id_priv, event),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, tos)
+ __field(unsigned long, event)
+ __field(int, status)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->tos = id_priv->tos;
+ __entry->event = event->event;
+ __entry->status = event->status;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s (%lu/%d)",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+ rdma_show_cm_event(__entry->event), __entry->event,
+ __entry->status
+ )
+);
+
+TRACE_EVENT(cm_event_done,
+ TP_PROTO(
+ const struct rdma_id_private *id_priv,
+ const struct rdma_cm_event *event,
+ int result
+ ),
+
+ TP_ARGS(id_priv, event, result),
+
+ TP_STRUCT__entry(
+ __field(u32, cm_id)
+ __field(u32, tos)
+ __field(unsigned long, event)
+ __field(int, result)
+ __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
+ __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->cm_id = id_priv->res.id;
+ __entry->tos = id_priv->tos;
+ __entry->event = event->event;
+ __entry->result = result;
+ memcpy(__entry->srcaddr, &id_priv->id.route.addr.src_addr,
+ sizeof(struct sockaddr_in6));
+ memcpy(__entry->dstaddr, &id_priv->id.route.addr.dst_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+
+ TP_printk("cm.id=%u src=%pISpc dst=%pISpc tos=%u %s consumer returns %d",
+ __entry->cm_id, __entry->srcaddr, __entry->dstaddr, __entry->tos,
+ rdma_show_cm_event(__entry->event), __entry->result
+ )
+);
+
+DECLARE_EVENT_CLASS(cma_client_class,
+ TP_PROTO(
+ const struct ib_device *device
+ ),
+
+ TP_ARGS(device),
+
+ TP_STRUCT__entry(
+ __string(name, device->name)
+ ),
+
+ TP_fast_assign(
+ __assign_str(name);
+ ),
+
+ TP_printk("device name=%s",
+ __get_str(name)
+ )
+);
+
+#define DEFINE_CMA_CLIENT_EVENT(name) \
+ DEFINE_EVENT(cma_client_class, cm_##name, \
+ TP_PROTO( \
+ const struct ib_device *device \
+ ), \
+ TP_ARGS(device))
+
+DEFINE_CMA_CLIENT_EVENT(add_one);
+DEFINE_CMA_CLIENT_EVENT(remove_one);
+
+#endif /* _TRACE_RDMA_CMA_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE cma_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index d29372624f3a..05102769a918 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -35,48 +35,60 @@
#include <linux/list.h>
#include <linux/spinlock.h>
+#include <linux/cgroup_rdma.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <rdma/ib_verbs.h>
+#include <rdma/opa_addr.h>
+#include <rdma/ib_mad.h>
+#include <rdma/restrack.h>
+#include "mad_priv.h"
+#include "restrack.h"
-#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
-int cma_configfs_init(void);
-void cma_configfs_exit(void);
-#else
-static inline int cma_configfs_init(void)
-{
- return 0;
-}
+/* Total number of ports combined across all struct ib_devices's */
+#define RDMA_MAX_PORTS 8192
-static inline void cma_configfs_exit(void)
+struct pkey_index_qp_list {
+ struct list_head pkey_index_list;
+ u16 pkey_index;
+ /* Lock to hold while iterating the qp_list. */
+ spinlock_t qp_list_lock;
+ struct list_head qp_list;
+};
+
+/**
+ * struct rdma_dev_net - rdma net namespace metadata for a net
+ * @nl_sock: Pointer to netlink socket
+ * @net: Pointer to owner net namespace
+ * @id: xarray id to identify the net namespace.
+ */
+struct rdma_dev_net {
+ struct sock *nl_sock;
+ possible_net_t net;
+ u32 id;
+};
+
+extern const struct attribute_group ib_dev_attr_group;
+extern bool ib_devices_shared_netns;
+extern unsigned int rdma_dev_net_id;
+
+static inline struct rdma_dev_net *rdma_net_to_dev_net(struct net *net)
{
+ return net_generic(net, rdma_dev_net_id);
}
-#endif
-struct cma_device;
-void cma_ref_dev(struct cma_device *cma_dev);
-void cma_deref_dev(struct cma_device *cma_dev);
-typedef bool (*cma_device_filter)(struct ib_device *, void *);
-struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
- void *cookie);
-int cma_get_default_gid_type(struct cma_device *cma_dev,
- unsigned int port);
-int cma_set_default_gid_type(struct cma_device *cma_dev,
- unsigned int port,
- enum ib_gid_type default_gid_type);
-struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
-
-int ib_device_register_sysfs(struct ib_device *device,
- int (*port_callback)(struct ib_device *,
- u8, struct kobject *));
-void ib_device_unregister_sysfs(struct ib_device *device);
-
-void ib_cache_setup(void);
-void ib_cache_cleanup(void);
-
-typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
+
+int ib_device_rename(struct ib_device *ibdev, const char *name);
+int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim);
+
+typedef void (*roce_netdev_callback)(struct ib_device *device, u32 port,
struct net_device *idev, void *cookie);
-typedef int (*roce_netdev_filter)(struct ib_device *device, u8 port,
- struct net_device *idev, void *cookie);
+typedef bool (*roce_netdev_filter)(struct ib_device *device, u32 port,
+ struct net_device *idev, void *cookie);
+
+struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
+ u32 port);
void ib_enum_roce_netdev(struct ib_device *ib_dev,
roce_netdev_filter filter,
@@ -88,6 +100,23 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
roce_netdev_callback cb,
void *cookie);
+typedef int (*nldev_callback)(struct ib_device *device,
+ struct sk_buff *skb,
+ struct netlink_callback *cb,
+ unsigned int idx);
+
+int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
+ struct netlink_callback *cb);
+
+struct ib_client_nl_info {
+ struct sk_buff *nl_msg;
+ struct device *cdev;
+ u32 port;
+ u64 abi;
+};
+int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
+ struct ib_client_nl_info *res);
+
enum ib_cache_gid_default_mode {
IB_CACHE_GID_DEFAULT_MODE_SET,
IB_CACHE_GID_DEFAULT_MODE_DELETE
@@ -97,29 +126,63 @@ int ib_cache_gid_parse_type_str(const char *buf);
const char *ib_cache_gid_type_str(enum ib_gid_type gid_type);
-void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
+void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u32 port,
struct net_device *ndev,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode);
-int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr);
-int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_del(struct ib_device *ib_dev, u32 port,
union ib_gid *gid, struct ib_gid_attr *attr);
-int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
+int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u32 port,
struct net_device *ndev);
int roce_gid_mgmt_init(void);
void roce_gid_mgmt_cleanup(void);
-int roce_rescan_device(struct ib_device *ib_dev);
-unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u32 port);
int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
+void ib_dispatch_event_clients(struct ib_event *event);
+
+#ifdef CONFIG_CGROUP_RDMA
+void ib_device_register_rdmacg(struct ib_device *device);
+void ib_device_unregister_rdmacg(struct ib_device *device);
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index);
+#else
+static inline void ib_device_register_rdmacg(struct ib_device *device)
+{
+}
+
+static inline void ib_device_unregister_rdmacg(struct ib_device *device)
+{
+}
+
+static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{
+ return 0;
+}
+
+static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{
+}
+#endif
static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
struct net_device *upper)
@@ -136,11 +199,176 @@ void ib_mad_cleanup(void);
int ib_sa_init(void);
void ib_sa_cleanup(void);
+void rdma_nl_init(void);
+void rdma_nl_exit(void);
+
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
- struct netlink_callback *cb);
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack);
int ib_nl_handle_set_timeout(struct sk_buff *skb,
- struct netlink_callback *cb);
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack);
int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
- struct netlink_callback *cb);
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack);
+
+void ib_get_cached_subnet_prefix(struct ib_device *device,
+ u32 port_num,
+ u64 *sn_pfx);
+
+#ifdef CONFIG_SECURITY_INFINIBAND
+void ib_security_release_port_pkey_list(struct ib_device *device);
+
+void ib_security_cache_change(struct ib_device *device,
+ u32 port_num,
+ u64 subnet_prefix);
+
+int ib_security_modify_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_udata *udata);
+
+int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev);
+void ib_destroy_qp_security_begin(struct ib_qp_security *sec);
+void ib_destroy_qp_security_abort(struct ib_qp_security *sec);
+void ib_destroy_qp_security_end(struct ib_qp_security *sec);
+int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev);
+void ib_close_shared_qp_security(struct ib_qp_security *sec);
+int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
+ enum ib_qp_type qp_type);
+void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent);
+int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index);
+void ib_mad_agent_security_change(void);
+#else
+static inline void ib_security_release_port_pkey_list(struct ib_device *device)
+{
+}
+
+static inline void ib_security_cache_change(struct ib_device *device,
+ u32 port_num,
+ u64 subnet_prefix)
+{
+}
+
+static inline int ib_security_modify_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_udata *udata)
+{
+ return qp->device->ops.modify_qp(qp->real_qp,
+ qp_attr,
+ qp_attr_mask,
+ udata);
+}
+
+static inline int ib_create_qp_security(struct ib_qp *qp,
+ struct ib_device *dev)
+{
+ return 0;
+}
+
+static inline void ib_destroy_qp_security_begin(struct ib_qp_security *sec)
+{
+}
+
+static inline void ib_destroy_qp_security_abort(struct ib_qp_security *sec)
+{
+}
+
+static inline void ib_destroy_qp_security_end(struct ib_qp_security *sec)
+{
+}
+
+static inline int ib_open_shared_qp_security(struct ib_qp *qp,
+ struct ib_device *dev)
+{
+ return 0;
+}
+
+static inline void ib_close_shared_qp_security(struct ib_qp_security *sec)
+{
+}
+
+static inline int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
+ enum ib_qp_type qp_type)
+{
+ return 0;
+}
+
+static inline void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent)
+{
+}
+
+static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
+ u16 pkey_index)
+{
+ return 0;
+}
+
+static inline void ib_mad_agent_security_change(void)
+{
+}
+#endif
+
+struct ib_device *ib_device_get_by_index(const struct net *net, u32 index);
+
+/* RDMA device netlink */
+void nldev_init(void);
+void nldev_exit(void);
+
+struct ib_qp *ib_create_qp_user(struct ib_device *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata,
+ struct ib_uqp_object *uobj, const char *caller);
+
+void ib_qp_usecnt_inc(struct ib_qp *qp);
+void ib_qp_usecnt_dec(struct ib_qp *qp);
+
+struct rdma_dev_addr;
+
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+ const union ib_gid *dgid,
+ u8 *dmac, const struct ib_gid_attr *sgid_attr,
+ int *hoplimit);
+void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
+ const struct net_device *dev);
+
+struct sa_path_rec;
+int roce_resolve_route_from_path(struct sa_path_rec *rec,
+ const struct ib_gid_attr *attr);
+
+struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
+
+void ib_free_port_attrs(struct ib_core_device *coredev);
+int ib_setup_port_attrs(struct ib_core_device *coredev);
+struct rdma_hw_stats *ib_get_hw_stats_port(struct ib_device *ibdev, u32 port_num);
+void ib_device_release_hw_stats(struct hw_stats_device_data *data);
+int ib_setup_device_attrs(struct ib_device *ibdev);
+
+int rdma_compatdev_set(u8 enable);
+
+int ib_port_register_client_groups(struct ib_device *ibdev, u32 port_num,
+ const struct attribute_group **groups);
+void ib_port_unregister_client_groups(struct ib_device *ibdev, u32 port_num,
+ const struct attribute_group **groups);
+
+int ib_device_set_netns_put(struct sk_buff *skb,
+ struct ib_device *dev, u32 ns_fd);
+
+int rdma_nl_net_init(struct rdma_dev_net *rnet);
+void rdma_nl_net_exit(struct rdma_dev_net *rnet);
+
+struct rdma_umap_priv {
+ struct vm_area_struct *vma;
+ struct list_head list;
+ struct rdma_user_mmap_entry *entry;
+};
+
+void rdma_umap_priv_init(struct rdma_umap_priv *priv,
+ struct vm_area_struct *vma,
+ struct rdma_user_mmap_entry *entry);
+
+void ib_cq_pool_cleanup(struct ib_device *dev);
+bool rdma_nl_get_privileged_qkey(void);
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
new file mode 100644
index 000000000000..c3aa6d7fc66b
--- /dev/null
+++ b/drivers/infiniband/core/counters.c
@@ -0,0 +1,681 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
+ */
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_counter.h>
+
+#include "core_priv.h"
+#include "restrack.h"
+
+#define ALL_AUTO_MODE_MASKS (RDMA_COUNTER_MASK_QP_TYPE | RDMA_COUNTER_MASK_PID)
+
+static int __counter_set_mode(struct rdma_port_counter *port_counter,
+ enum rdma_nl_counter_mode new_mode,
+ enum rdma_nl_counter_mask new_mask,
+ bool bind_opcnt)
+{
+ if (new_mode == RDMA_COUNTER_MODE_AUTO) {
+ if (new_mask & (~ALL_AUTO_MODE_MASKS))
+ return -EINVAL;
+ if (port_counter->num_counters)
+ return -EBUSY;
+ }
+
+ port_counter->mode.mode = new_mode;
+ port_counter->mode.mask = new_mask;
+ port_counter->mode.bind_opcnt = bind_opcnt;
+ return 0;
+}
+
+/*
+ * rdma_counter_set_auto_mode() - Turn on/off per-port auto mode
+ *
+ * @dev: Device to operate
+ * @port: Port to use
+ * @mask: Mask to configure
+ * @extack: Message to the user
+ *
+ * Return 0 on success. If counter mode wasn't changed then it is considered
+ * as success as well.
+ * Return -EBUSY when changing to auto mode while there are bounded counters.
+ *
+ */
+int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
+ enum rdma_nl_counter_mask mask,
+ bool bind_opcnt,
+ struct netlink_ext_ack *extack)
+{
+ struct rdma_port_counter *port_counter;
+ enum rdma_nl_counter_mode mode;
+ int ret;
+
+ port_counter = &dev->port_data[port].port_counter;
+ if (!port_counter->hstats)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&port_counter->lock);
+ if (mask)
+ mode = RDMA_COUNTER_MODE_AUTO;
+ else
+ mode = (port_counter->num_counters) ? RDMA_COUNTER_MODE_MANUAL :
+ RDMA_COUNTER_MODE_NONE;
+
+ if (port_counter->mode.mode == mode &&
+ port_counter->mode.mask == mask &&
+ port_counter->mode.bind_opcnt == bind_opcnt) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = __counter_set_mode(port_counter, mode, mask, bind_opcnt);
+
+out:
+ mutex_unlock(&port_counter->lock);
+ if (ret == -EBUSY)
+ NL_SET_ERR_MSG(
+ extack,
+ "Modifying auto mode is not allowed when there is a bound QP");
+ return ret;
+}
+
+static void auto_mode_init_counter(struct rdma_counter *counter,
+ const struct ib_qp *qp,
+ enum rdma_nl_counter_mask new_mask)
+{
+ struct auto_mode_param *param = &counter->mode.param;
+
+ counter->mode.mode = RDMA_COUNTER_MODE_AUTO;
+ counter->mode.mask = new_mask;
+
+ if (new_mask & RDMA_COUNTER_MASK_QP_TYPE)
+ param->qp_type = qp->qp_type;
+}
+
+static int __rdma_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *qp, u32 port)
+{
+ int ret;
+
+ if (qp->counter)
+ return -EINVAL;
+
+ if (!qp->device->ops.counter_bind_qp)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&counter->lock);
+ ret = qp->device->ops.counter_bind_qp(counter, qp, port);
+ mutex_unlock(&counter->lock);
+
+ return ret;
+}
+
+int rdma_counter_modify(struct ib_device *dev, u32 port,
+ unsigned int index, bool enable)
+{
+ struct rdma_hw_stats *stats;
+ int ret = 0;
+
+ if (!dev->ops.modify_hw_stat)
+ return -EOPNOTSUPP;
+
+ stats = ib_get_hw_stats_port(dev, port);
+ if (!stats || index >= stats->num_counters ||
+ !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
+ return -EINVAL;
+
+ mutex_lock(&stats->lock);
+
+ if (enable != test_bit(index, stats->is_disabled))
+ goto out;
+
+ ret = dev->ops.modify_hw_stat(dev, port, index, enable);
+ if (ret)
+ goto out;
+
+ if (enable)
+ clear_bit(index, stats->is_disabled);
+ else
+ set_bit(index, stats->is_disabled);
+out:
+ mutex_unlock(&stats->lock);
+ return ret;
+}
+
+static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
+ struct ib_qp *qp,
+ enum rdma_nl_counter_mode mode,
+ bool bind_opcnt)
+{
+ struct rdma_port_counter *port_counter;
+ struct rdma_counter *counter;
+ int ret;
+
+ if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats)
+ return NULL;
+
+ counter = rdma_zalloc_drv_obj(dev, rdma_counter);
+ if (!counter)
+ return NULL;
+
+ counter->device = dev;
+ counter->port = port;
+
+ dev->ops.counter_init(counter);
+
+ rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER);
+ counter->stats = dev->ops.counter_alloc_stats(counter);
+ if (!counter->stats)
+ goto err_stats;
+
+ port_counter = &dev->port_data[port].port_counter;
+ mutex_lock(&port_counter->lock);
+ switch (mode) {
+ case RDMA_COUNTER_MODE_MANUAL:
+ ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL,
+ 0, bind_opcnt);
+ if (ret) {
+ mutex_unlock(&port_counter->lock);
+ goto err_mode;
+ }
+ break;
+ case RDMA_COUNTER_MODE_AUTO:
+ auto_mode_init_counter(counter, qp, port_counter->mode.mask);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ mutex_unlock(&port_counter->lock);
+ goto err_mode;
+ }
+
+ port_counter->num_counters++;
+ mutex_unlock(&port_counter->lock);
+
+ counter->mode.mode = mode;
+ counter->mode.bind_opcnt = bind_opcnt;
+ kref_init(&counter->kref);
+ mutex_init(&counter->lock);
+
+ ret = __rdma_counter_bind_qp(counter, qp, port);
+ if (ret)
+ goto err_mode;
+
+ rdma_restrack_parent_name(&counter->res, &qp->res);
+ rdma_restrack_add(&counter->res);
+ return counter;
+
+err_mode:
+ rdma_free_hw_stats_struct(counter->stats);
+err_stats:
+ rdma_restrack_put(&counter->res);
+ kfree(counter);
+ return NULL;
+}
+
+static void rdma_counter_free(struct rdma_counter *counter)
+{
+ struct rdma_port_counter *port_counter;
+
+ port_counter = &counter->device->port_data[counter->port].port_counter;
+ mutex_lock(&port_counter->lock);
+ port_counter->num_counters--;
+ if (!port_counter->num_counters &&
+ (port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL))
+ __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0,
+ false);
+
+ mutex_unlock(&port_counter->lock);
+
+ rdma_restrack_del(&counter->res);
+ rdma_free_hw_stats_struct(counter->stats);
+ kfree(counter);
+}
+
+static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
+ enum rdma_nl_counter_mask auto_mask)
+{
+ struct auto_mode_param *param = &counter->mode.param;
+ bool match = true;
+
+ if (auto_mask & RDMA_COUNTER_MASK_QP_TYPE)
+ match &= (param->qp_type == qp->qp_type);
+
+ if (auto_mask & RDMA_COUNTER_MASK_PID)
+ match &= (task_pid_nr(counter->res.task) ==
+ task_pid_nr(qp->res.task));
+
+ return match;
+}
+
+static int __rdma_counter_unbind_qp(struct ib_qp *qp, u32 port)
+{
+ struct rdma_counter *counter = qp->counter;
+ int ret;
+
+ if (!qp->device->ops.counter_unbind_qp)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&counter->lock);
+ ret = qp->device->ops.counter_unbind_qp(qp, port);
+ mutex_unlock(&counter->lock);
+
+ return ret;
+}
+
+static void counter_history_stat_update(struct rdma_counter *counter)
+{
+ struct ib_device *dev = counter->device;
+ struct rdma_port_counter *port_counter;
+ int i;
+
+ port_counter = &dev->port_data[counter->port].port_counter;
+ if (!port_counter->hstats)
+ return;
+
+ rdma_counter_query_stats(counter);
+
+ for (i = 0; i < counter->stats->num_counters; i++)
+ port_counter->hstats->value[i] += counter->stats->value[i];
+}
+
+/*
+ * rdma_get_counter_auto_mode - Find the counter that @qp should be bound
+ * with in auto mode
+ *
+ * Return: The counter (with ref-count increased) if found
+ */
+static struct rdma_counter *rdma_get_counter_auto_mode(struct ib_qp *qp,
+ u32 port)
+{
+ struct rdma_port_counter *port_counter;
+ struct rdma_counter *counter = NULL;
+ struct ib_device *dev = qp->device;
+ struct rdma_restrack_entry *res;
+ struct rdma_restrack_root *rt;
+ unsigned long id = 0;
+
+ port_counter = &dev->port_data[port].port_counter;
+ rt = &dev->res[RDMA_RESTRACK_COUNTER];
+ xa_lock(&rt->xa);
+ xa_for_each(&rt->xa, id, res) {
+ counter = container_of(res, struct rdma_counter, res);
+ if ((counter->device != qp->device) || (counter->port != port))
+ goto next;
+
+ if (auto_mode_match(qp, counter, port_counter->mode.mask))
+ break;
+next:
+ counter = NULL;
+ }
+
+ if (counter && !kref_get_unless_zero(&counter->kref))
+ counter = NULL;
+
+ xa_unlock(&rt->xa);
+ return counter;
+}
+
+static void counter_release(struct kref *kref)
+{
+ struct rdma_counter *counter;
+
+ counter = container_of(kref, struct rdma_counter, kref);
+ counter_history_stat_update(counter);
+ counter->device->ops.counter_dealloc(counter);
+ rdma_counter_free(counter);
+}
+
+/*
+ * rdma_counter_bind_qp_auto - Check and bind the QP to a counter base on
+ * the auto-mode rule
+ */
+int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port)
+{
+ struct rdma_port_counter *port_counter;
+ struct ib_device *dev = qp->device;
+ struct rdma_counter *counter;
+ int ret;
+
+ if (!rdma_restrack_is_tracked(&qp->res) || rdma_is_kernel_res(&qp->res))
+ return 0;
+
+ if (!rdma_is_port_valid(dev, port))
+ return -EINVAL;
+
+ port_counter = &dev->port_data[port].port_counter;
+ if (port_counter->mode.mode != RDMA_COUNTER_MODE_AUTO)
+ return 0;
+
+ counter = rdma_get_counter_auto_mode(qp, port);
+ if (counter) {
+ ret = __rdma_counter_bind_qp(counter, qp, port);
+ if (ret) {
+ kref_put(&counter->kref, counter_release);
+ return ret;
+ }
+ } else {
+ counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO,
+ port_counter->mode.bind_opcnt);
+ if (!counter)
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * rdma_counter_unbind_qp - Unbind a qp from a counter
+ * @force:
+ * true - Decrease the counter ref-count anyway (e.g., qp destroy)
+ */
+int rdma_counter_unbind_qp(struct ib_qp *qp, u32 port, bool force)
+{
+ struct rdma_counter *counter = qp->counter;
+ int ret;
+
+ if (!counter)
+ return -EINVAL;
+
+ ret = __rdma_counter_unbind_qp(qp, port);
+ if (ret && !force)
+ return ret;
+
+ kref_put(&counter->kref, counter_release);
+ return 0;
+}
+
+int rdma_counter_query_stats(struct rdma_counter *counter)
+{
+ struct ib_device *dev = counter->device;
+ int ret;
+
+ if (!dev->ops.counter_update_stats)
+ return -EINVAL;
+
+ mutex_lock(&counter->lock);
+ ret = dev->ops.counter_update_stats(counter);
+ mutex_unlock(&counter->lock);
+
+ return ret;
+}
+
+static u64 get_running_counters_hwstat_sum(struct ib_device *dev,
+ u32 port, u32 index)
+{
+ struct rdma_restrack_entry *res;
+ struct rdma_restrack_root *rt;
+ struct rdma_counter *counter;
+ unsigned long id = 0;
+ u64 sum = 0;
+
+ rt = &dev->res[RDMA_RESTRACK_COUNTER];
+ xa_lock(&rt->xa);
+ xa_for_each(&rt->xa, id, res) {
+ if (!rdma_restrack_get(res))
+ continue;
+
+ xa_unlock(&rt->xa);
+
+ counter = container_of(res, struct rdma_counter, res);
+ if ((counter->device != dev) || (counter->port != port) ||
+ rdma_counter_query_stats(counter))
+ goto next;
+
+ sum += counter->stats->value[index];
+
+next:
+ xa_lock(&rt->xa);
+ rdma_restrack_put(res);
+ }
+
+ xa_unlock(&rt->xa);
+ return sum;
+}
+
+/*
+ * rdma_counter_get_hwstat_value() - Get the sum value of all counters on a
+ * specific port, including the running ones and history data
+ */
+u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u32 port, u32 index)
+{
+ struct rdma_port_counter *port_counter;
+ u64 sum;
+
+ port_counter = &dev->port_data[port].port_counter;
+ if (!port_counter->hstats)
+ return 0;
+
+ sum = get_running_counters_hwstat_sum(dev, port, index);
+ sum += port_counter->hstats->value[index];
+
+ return sum;
+}
+
+static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num)
+{
+ struct rdma_restrack_entry *res = NULL;
+ struct ib_qp *qp = NULL;
+
+ res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_QP, qp_num);
+ if (IS_ERR(res))
+ return NULL;
+
+ qp = container_of(res, struct ib_qp, res);
+ if (qp->qp_type == IB_QPT_RAW_PACKET && !rdma_dev_has_raw_cap(dev))
+ goto err;
+
+ return qp;
+
+err:
+ rdma_restrack_put(res);
+ return NULL;
+}
+
+static struct rdma_counter *rdma_get_counter_by_id(struct ib_device *dev,
+ u32 counter_id)
+{
+ struct rdma_restrack_entry *res;
+ struct rdma_counter *counter;
+
+ res = rdma_restrack_get_byid(dev, RDMA_RESTRACK_COUNTER, counter_id);
+ if (IS_ERR(res))
+ return NULL;
+
+ counter = container_of(res, struct rdma_counter, res);
+ kref_get(&counter->kref);
+ rdma_restrack_put(res);
+
+ return counter;
+}
+
+/*
+ * rdma_counter_bind_qpn() - Bind QP @qp_num to counter @counter_id
+ */
+int rdma_counter_bind_qpn(struct ib_device *dev, u32 port,
+ u32 qp_num, u32 counter_id)
+{
+ struct rdma_port_counter *port_counter;
+ struct rdma_counter *counter;
+ struct ib_qp *qp;
+ int ret;
+
+ port_counter = &dev->port_data[port].port_counter;
+ if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
+ return -EINVAL;
+
+ qp = rdma_counter_get_qp(dev, qp_num);
+ if (!qp)
+ return -ENOENT;
+
+ counter = rdma_get_counter_by_id(dev, counter_id);
+ if (!counter) {
+ ret = -ENOENT;
+ goto err;
+ }
+
+ if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) {
+ ret = -EINVAL;
+ goto err_task;
+ }
+
+ if ((counter->device != qp->device) || (counter->port != qp->port)) {
+ ret = -EINVAL;
+ goto err_task;
+ }
+
+ ret = __rdma_counter_bind_qp(counter, qp, port);
+ if (ret)
+ goto err_task;
+
+ rdma_restrack_put(&qp->res);
+ return 0;
+
+err_task:
+ kref_put(&counter->kref, counter_release);
+err:
+ rdma_restrack_put(&qp->res);
+ return ret;
+}
+
+/*
+ * rdma_counter_bind_qpn_alloc() - Alloc a counter and bind QP @qp_num to it
+ * The id of new counter is returned in @counter_id
+ */
+int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port,
+ u32 qp_num, u32 *counter_id)
+{
+ struct rdma_port_counter *port_counter;
+ struct rdma_counter *counter;
+ struct ib_qp *qp;
+ int ret;
+
+ if (!rdma_is_port_valid(dev, port))
+ return -EINVAL;
+
+ port_counter = &dev->port_data[port].port_counter;
+ if (!port_counter->hstats)
+ return -EOPNOTSUPP;
+
+ if (port_counter->mode.mode == RDMA_COUNTER_MODE_AUTO)
+ return -EINVAL;
+
+ qp = rdma_counter_get_qp(dev, qp_num);
+ if (!qp)
+ return -ENOENT;
+
+ if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL, true);
+ if (!counter) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ if (counter_id)
+ *counter_id = counter->id;
+
+ rdma_restrack_put(&qp->res);
+ return 0;
+
+err:
+ rdma_restrack_put(&qp->res);
+ return ret;
+}
+
+/*
+ * rdma_counter_unbind_qpn() - Unbind QP @qp_num from a counter
+ */
+int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
+ u32 qp_num, u32 counter_id)
+{
+ struct rdma_port_counter *port_counter;
+ struct ib_qp *qp;
+ int ret;
+
+ if (!rdma_is_port_valid(dev, port))
+ return -EINVAL;
+
+ qp = rdma_counter_get_qp(dev, qp_num);
+ if (!qp)
+ return -ENOENT;
+
+ if (rdma_is_port_valid(dev, qp->port) && (qp->port != port)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ port_counter = &dev->port_data[port].port_counter;
+ if (!qp->counter || qp->counter->id != counter_id ||
+ port_counter->mode.mode != RDMA_COUNTER_MODE_MANUAL) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = rdma_counter_unbind_qp(qp, port, false);
+
+out:
+ rdma_restrack_put(&qp->res);
+ return ret;
+}
+
+int rdma_counter_get_mode(struct ib_device *dev, u32 port,
+ enum rdma_nl_counter_mode *mode,
+ enum rdma_nl_counter_mask *mask,
+ bool *opcnt)
+{
+ struct rdma_port_counter *port_counter;
+
+ port_counter = &dev->port_data[port].port_counter;
+ *mode = port_counter->mode.mode;
+ *mask = port_counter->mode.mask;
+ *opcnt = port_counter->mode.bind_opcnt;
+
+ return 0;
+}
+
+void rdma_counter_init(struct ib_device *dev)
+{
+ struct rdma_port_counter *port_counter;
+ u32 port, i;
+
+ if (!dev->port_data)
+ return;
+
+ rdma_for_each_port(dev, port) {
+ port_counter = &dev->port_data[port].port_counter;
+ port_counter->mode.mode = RDMA_COUNTER_MODE_NONE;
+ mutex_init(&port_counter->lock);
+
+ if (!dev->ops.alloc_hw_port_stats)
+ continue;
+
+ port_counter->hstats = dev->ops.alloc_hw_port_stats(dev, port);
+ if (!port_counter->hstats)
+ goto fail;
+ }
+
+ return;
+
+fail:
+ for (i = port; i >= rdma_start_port(dev); i--) {
+ port_counter = &dev->port_data[port].port_counter;
+ rdma_free_hw_stats_struct(port_counter->hstats);
+ port_counter->hstats = NULL;
+ mutex_destroy(&port_counter->lock);
+ }
+}
+
+void rdma_counter_release(struct ib_device *dev)
+{
+ struct rdma_port_counter *port_counter;
+ u32 port;
+
+ rdma_for_each_port(dev, port) {
+ port_counter = &dev->port_data[port].port_counter;
+ rdma_free_hw_stats_struct(port_counter->hstats);
+ mutex_destroy(&port_counter->lock);
+ }
+}
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index a754fc727de5..584537c71545 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -1,22 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2015 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
-#include <linux/module.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
+#include "core_priv.h"
+
+#include <trace/events/rdma_core.h>
+/* Max size for shared CQ, may require tuning */
+#define IB_MAX_SHARED_CQ_SZ 4096U
+
/* # of WCs to poll for with a single call to ib_poll_cq */
#define IB_POLL_BATCH 16
+#define IB_POLL_BATCH_DIRECT 8
/* # of WCs to iterate over before yielding */
#define IB_POLL_BUDGET_IRQ 256
@@ -25,13 +23,88 @@
#define IB_POLL_FLAGS \
(IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
-static int __ib_process_cq(struct ib_cq *cq, int budget)
+static const struct dim_cq_moder
+rdma_dim_prof[RDMA_DIM_PARAMS_NUM_PROFILES] = {
+ {1, 0, 1, 0},
+ {1, 0, 4, 0},
+ {2, 0, 4, 0},
+ {2, 0, 8, 0},
+ {4, 0, 8, 0},
+ {16, 0, 8, 0},
+ {16, 0, 16, 0},
+ {32, 0, 16, 0},
+ {32, 0, 32, 0},
+};
+
+static void ib_cq_rdma_dim_work(struct work_struct *w)
+{
+ struct dim *dim = container_of(w, struct dim, work);
+ struct ib_cq *cq = dim->priv;
+
+ u16 usec = rdma_dim_prof[dim->profile_ix].usec;
+ u16 comps = rdma_dim_prof[dim->profile_ix].comps;
+
+ dim->state = DIM_START_MEASURE;
+
+ trace_cq_modify(cq, comps, usec);
+ cq->device->ops.modify_cq(cq, comps, usec);
+}
+
+static void rdma_dim_init(struct ib_cq *cq)
+{
+ struct dim *dim;
+
+ if (!cq->device->ops.modify_cq || !cq->device->use_cq_dim ||
+ cq->poll_ctx == IB_POLL_DIRECT)
+ return;
+
+ dim = kzalloc(sizeof(struct dim), GFP_KERNEL);
+ if (!dim)
+ return;
+
+ dim->state = DIM_START_MEASURE;
+ dim->tune_state = DIM_GOING_RIGHT;
+ dim->profile_ix = RDMA_DIM_START_PROFILE;
+ dim->priv = cq;
+ cq->dim = dim;
+
+ INIT_WORK(&dim->work, ib_cq_rdma_dim_work);
+}
+
+static void rdma_dim_destroy(struct ib_cq *cq)
+{
+ if (!cq->dim)
+ return;
+
+ cancel_work_sync(&cq->dim->work);
+ kfree(cq->dim);
+}
+
+static int __poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
+{
+ int rc;
+
+ rc = ib_poll_cq(cq, num_entries, wc);
+ trace_cq_poll(cq, num_entries, rc);
+ return rc;
+}
+
+static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
+ int batch)
{
int i, n, completed = 0;
- while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
+ trace_cq_process(cq);
+
+ /*
+ * budget might be (-1) if the caller does not
+ * want to bound this call, thus we need unsigned
+ * minimum here.
+ */
+ while ((n = __poll_cq(cq, min_t(u32, batch,
+ budget - completed), wcs)) > 0) {
for (i = 0; i < n; i++) {
- struct ib_wc *wc = &cq->wc[i];
+ struct ib_wc *wc = &wcs[i];
if (wc->wr_cqe)
wc->wr_cqe->done(cq, wc);
@@ -41,8 +114,7 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
completed += n;
- if (n != IB_POLL_BATCH ||
- (budget != -1 && completed >= budget))
+ if (n != batch || (budget != -1 && completed >= budget))
break;
}
@@ -50,22 +122,24 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
}
/**
- * ib_process_direct_cq - process a CQ in caller context
+ * ib_process_cq_direct - process a CQ in caller context
* @cq: CQ to process
* @budget: number of CQEs to poll for
*
- * This function is used to process all outstanding CQ entries on a
- * %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different
- * context and does not ask for completion interrupts from the HCA.
+ * This function is used to process all outstanding CQ entries.
+ * It does not offload CQ processing to a different context and does
+ * not ask for completion interrupts from the HCA.
+ * Using direct processing on CQ with non IB_POLL_DIRECT type may trigger
+ * concurrent processing.
*
- * Note: for compatibility reasons -1 can be passed in %budget for unlimited
- * polling. Do not use this feature in new code, it will be removed soon.
+ * Note: do not pass -1 as %budget unless it is guaranteed that the number
+ * of completions that will be processed is small.
*/
int ib_process_cq_direct(struct ib_cq *cq, int budget)
{
- WARN_ON_ONCE(cq->poll_ctx != IB_POLL_DIRECT);
+ struct ib_wc wcs[IB_POLL_BATCH_DIRECT];
- return __ib_process_cq(cq, budget);
+ return __ib_process_cq(cq, budget, wcs, IB_POLL_BATCH_DIRECT);
}
EXPORT_SYMBOL(ib_process_cq_direct);
@@ -77,20 +151,27 @@ static void ib_cq_completion_direct(struct ib_cq *cq, void *private)
static int ib_poll_handler(struct irq_poll *iop, int budget)
{
struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
+ struct dim *dim = cq->dim;
int completed;
- completed = __ib_process_cq(cq, budget);
+ completed = __ib_process_cq(cq, budget, cq->wc, IB_POLL_BATCH);
if (completed < budget) {
irq_poll_complete(&cq->iop);
- if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+ if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) {
+ trace_cq_reschedule(cq);
irq_poll_sched(&cq->iop);
+ }
}
+ if (dim)
+ rdma_dim(dim, completed);
+
return completed;
}
static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
{
+ trace_cq_schedule(cq);
irq_poll_sched(&cq->iop);
}
@@ -99,32 +180,38 @@ static void ib_cq_poll_work(struct work_struct *work)
struct ib_cq *cq = container_of(work, struct ib_cq, work);
int completed;
- completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE);
+ completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE, cq->wc,
+ IB_POLL_BATCH);
if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
- queue_work(ib_comp_wq, &cq->work);
+ queue_work(cq->comp_wq, &cq->work);
+ else if (cq->dim)
+ rdma_dim(cq->dim, completed);
}
static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
{
- queue_work(ib_comp_wq, &cq->work);
+ trace_cq_schedule(cq);
+ queue_work(cq->comp_wq, &cq->work);
}
/**
- * ib_alloc_cq - allocate a completion queue
+ * __ib_alloc_cq - allocate a completion queue
* @dev: device to allocate the CQ for
* @private: driver private data, accessible from cq->cq_context
* @nr_cqe: number of CQEs to allocate
* @comp_vector: HCA completion vectors for this CQ
* @poll_ctx: context to poll the CQ from.
+ * @caller: module owner name.
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
- * specified context. The ULP needs must use wr->wr_cqe instead of wr->wr_id
+ * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
-struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
- int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx)
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
+ int comp_vector, enum ib_poll_context poll_ctx,
+ const char *caller)
{
struct ib_cq_init_attr cq_attr = {
.cqe = nr_cqe,
@@ -133,20 +220,28 @@ struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
struct ib_cq *cq;
int ret = -ENOMEM;
- cq = dev->create_cq(dev, &cq_attr, NULL, NULL);
- if (IS_ERR(cq))
- return cq;
+ cq = rdma_zalloc_drv_obj(dev, ib_cq);
+ if (!cq)
+ return ERR_PTR(ret);
cq->device = dev;
- cq->uobject = NULL;
- cq->event_handler = NULL;
cq->cq_context = private;
cq->poll_ctx = poll_ctx;
atomic_set(&cq->usecnt, 0);
+ cq->comp_vector = comp_vector;
cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
if (!cq->wc)
- goto out_destroy_cq;
+ goto out_free_cq;
+
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, caller);
+
+ ret = dev->ops.create_cq(cq, &cq_attr, NULL);
+ if (ret)
+ goto out_free_wc;
+
+ rdma_dim_init(cq);
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
@@ -159,24 +254,62 @@ struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
break;
case IB_POLL_WORKQUEUE:
+ case IB_POLL_UNBOUND_WORKQUEUE:
cq->comp_handler = ib_cq_completion_workqueue;
INIT_WORK(&cq->work, ib_cq_poll_work);
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ cq->comp_wq = (cq->poll_ctx == IB_POLL_WORKQUEUE) ?
+ ib_comp_wq : ib_comp_unbound_wq;
break;
default:
ret = -EINVAL;
- goto out_free_wc;
+ goto out_destroy_cq;
}
+ rdma_restrack_add(&cq->res);
+ trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx);
return cq;
+out_destroy_cq:
+ rdma_dim_destroy(cq);
+ cq->device->ops.destroy_cq(cq, NULL);
out_free_wc:
+ rdma_restrack_put(&cq->res);
kfree(cq->wc);
-out_destroy_cq:
- cq->device->destroy_cq(cq);
+out_free_cq:
+ kfree(cq);
+ trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret);
return ERR_PTR(ret);
}
-EXPORT_SYMBOL(ib_alloc_cq);
+EXPORT_SYMBOL(__ib_alloc_cq);
+
+/**
+ * __ib_alloc_cq_any - allocate a completion queue
+ * @dev: device to allocate the CQ for
+ * @private: driver private data, accessible from cq->cq_context
+ * @nr_cqe: number of CQEs to allocate
+ * @poll_ctx: context to poll the CQ from
+ * @caller: module owner name
+ *
+ * Attempt to spread ULP Completion Queues over each device's interrupt
+ * vectors. A simple best-effort mechanism is used.
+ */
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+ int nr_cqe, enum ib_poll_context poll_ctx,
+ const char *caller)
+{
+ static atomic_t counter;
+ int comp_vector = 0;
+
+ if (dev->num_comp_vectors > 1)
+ comp_vector =
+ atomic_inc_return(&counter) %
+ min_t(int, dev->num_comp_vectors, num_online_cpus());
+
+ return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
+ caller);
+}
+EXPORT_SYMBOL(__ib_alloc_cq_any);
/**
* ib_free_cq - free a completion queue
@@ -184,10 +317,17 @@ EXPORT_SYMBOL(ib_alloc_cq);
*/
void ib_free_cq(struct ib_cq *cq)
{
- int ret;
+ int ret = 0;
if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
return;
+ if (WARN_ON_ONCE(cq->cqe_used))
+ return;
+
+ if (cq->device->ops.pre_destroy_cq) {
+ ret = cq->device->ops.pre_destroy_cq(cq);
+ WARN_ONCE(ret, "Disable of kernel CQ shouldn't fail");
+ }
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
@@ -196,14 +336,180 @@ void ib_free_cq(struct ib_cq *cq)
irq_poll_disable(&cq->iop);
break;
case IB_POLL_WORKQUEUE:
- flush_work(&cq->work);
+ case IB_POLL_UNBOUND_WORKQUEUE:
+ cancel_work_sync(&cq->work);
break;
default:
WARN_ON_ONCE(1);
}
+ rdma_dim_destroy(cq);
+ trace_cq_free(cq);
+ if (cq->device->ops.post_destroy_cq)
+ cq->device->ops.post_destroy_cq(cq);
+ else
+ ret = cq->device->ops.destroy_cq(cq, NULL);
+ WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
+ rdma_restrack_del(&cq->res);
kfree(cq->wc);
- ret = cq->device->destroy_cq(cq);
- WARN_ON_ONCE(ret);
+ kfree(cq);
}
EXPORT_SYMBOL(ib_free_cq);
+
+void ib_cq_pool_cleanup(struct ib_device *dev)
+{
+ struct ib_cq *cq, *n;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(dev->cq_pools); i++) {
+ list_for_each_entry_safe(cq, n, &dev->cq_pools[i],
+ pool_entry) {
+ WARN_ON(cq->cqe_used);
+ list_del(&cq->pool_entry);
+ cq->shared = false;
+ ib_free_cq(cq);
+ }
+ }
+}
+
+static int ib_alloc_cqs(struct ib_device *dev, unsigned int nr_cqes,
+ enum ib_poll_context poll_ctx)
+{
+ LIST_HEAD(tmp_list);
+ unsigned int nr_cqs, i;
+ struct ib_cq *cq, *n;
+ int ret;
+
+ if (poll_ctx > IB_POLL_LAST_POOL_TYPE) {
+ WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE);
+ return -EINVAL;
+ }
+
+ /*
+ * Allocate at least as many CQEs as requested, and otherwise
+ * a reasonable batch size so that we can share CQs between
+ * multiple users instead of allocating a larger number of CQs.
+ */
+ nr_cqes = min_t(unsigned int, dev->attrs.max_cqe,
+ max(nr_cqes, IB_MAX_SHARED_CQ_SZ));
+ nr_cqs = min_t(unsigned int, dev->num_comp_vectors, num_online_cpus());
+ for (i = 0; i < nr_cqs; i++) {
+ cq = ib_alloc_cq(dev, NULL, nr_cqes, i, poll_ctx);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ goto out_free_cqs;
+ }
+ cq->shared = true;
+ list_add_tail(&cq->pool_entry, &tmp_list);
+ }
+
+ spin_lock_irq(&dev->cq_pools_lock);
+ list_splice(&tmp_list, &dev->cq_pools[poll_ctx]);
+ spin_unlock_irq(&dev->cq_pools_lock);
+
+ return 0;
+
+out_free_cqs:
+ list_for_each_entry_safe(cq, n, &tmp_list, pool_entry) {
+ cq->shared = false;
+ ib_free_cq(cq);
+ }
+ return ret;
+}
+
+/**
+ * ib_cq_pool_get() - Find the least used completion queue that matches
+ * a given cpu hint (or least used for wild card affinity) and fits
+ * nr_cqe.
+ * @dev: rdma device
+ * @nr_cqe: number of needed cqe entries
+ * @comp_vector_hint: completion vector hint (-1) for the driver to assign
+ * a comp vector based on internal counter
+ * @poll_ctx: cq polling context
+ *
+ * Finds a cq that satisfies @comp_vector_hint and @nr_cqe requirements and
+ * claim entries in it for us. In case there is no available cq, allocate
+ * a new cq with the requirements and add it to the device pool.
+ * IB_POLL_DIRECT cannot be used for shared cqs so it is not a valid value
+ * for @poll_ctx.
+ */
+struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe,
+ int comp_vector_hint,
+ enum ib_poll_context poll_ctx)
+{
+ static unsigned int default_comp_vector;
+ unsigned int vector, num_comp_vectors;
+ struct ib_cq *cq, *found = NULL;
+ int ret;
+
+ if (poll_ctx > IB_POLL_LAST_POOL_TYPE) {
+ WARN_ON_ONCE(poll_ctx > IB_POLL_LAST_POOL_TYPE);
+ return ERR_PTR(-EINVAL);
+ }
+
+ num_comp_vectors =
+ min_t(unsigned int, dev->num_comp_vectors, num_online_cpus());
+ /* Project the affinty to the device completion vector range */
+ if (comp_vector_hint < 0) {
+ comp_vector_hint =
+ (READ_ONCE(default_comp_vector) + 1) % num_comp_vectors;
+ WRITE_ONCE(default_comp_vector, comp_vector_hint);
+ }
+ vector = comp_vector_hint % num_comp_vectors;
+
+ /*
+ * Find the least used CQ with correct affinity and
+ * enough free CQ entries
+ */
+ while (!found) {
+ spin_lock_irq(&dev->cq_pools_lock);
+ list_for_each_entry(cq, &dev->cq_pools[poll_ctx],
+ pool_entry) {
+ /*
+ * Check to see if we have found a CQ with the
+ * correct completion vector
+ */
+ if (vector != cq->comp_vector)
+ continue;
+ if (cq->cqe_used + nr_cqe > cq->cqe)
+ continue;
+ found = cq;
+ break;
+ }
+
+ if (found) {
+ found->cqe_used += nr_cqe;
+ spin_unlock_irq(&dev->cq_pools_lock);
+
+ return found;
+ }
+ spin_unlock_irq(&dev->cq_pools_lock);
+
+ /*
+ * Didn't find a match or ran out of CQs in the device
+ * pool, allocate a new array of CQs.
+ */
+ ret = ib_alloc_cqs(dev, nr_cqe, poll_ctx);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ return found;
+}
+EXPORT_SYMBOL(ib_cq_pool_get);
+
+/**
+ * ib_cq_pool_put - Return a CQ taken from a shared pool.
+ * @cq: The CQ to return.
+ * @nr_cqe: The max number of cqes that the user had requested.
+ */
+void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe)
+{
+ if (WARN_ON_ONCE(nr_cqe > cq->cqe_used))
+ return;
+
+ spin_lock_irq(&cq->device->cq_pools_lock);
+ cq->cqe_used -= nr_cqe;
+ spin_unlock_irq(&cq->device->cq_pools_lock);
+}
+EXPORT_SYMBOL(ib_cq_pool_put);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 571974cd3919..13e8a1714bbd 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -37,67 +37,254 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/init.h>
-#include <linux/mutex.h>
#include <linux/netdevice.h>
+#include <net/net_namespace.h>
+#include <linux/security.h>
+#include <linux/notifier.h>
+#include <linux/hashtable.h>
#include <rdma/rdma_netlink.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
+#include <rdma/rdma_counter.h>
#include "core_priv.h"
+#include "restrack.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("core kernel InfiniBand API");
MODULE_LICENSE("Dual BSD/GPL");
-struct ib_client_data {
- struct list_head list;
- struct ib_client *client;
- void * data;
- /* The device or client is going down. Do not call client or device
- * callbacks other than remove(). */
- bool going_down;
-};
-
struct workqueue_struct *ib_comp_wq;
+struct workqueue_struct *ib_comp_unbound_wq;
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
+static struct workqueue_struct *ib_unreg_wq;
-/* The device_list and client_list contain devices and clients after their
- * registration has completed, and the devices and clients are removed
- * during unregistration. */
-static LIST_HEAD(device_list);
-static LIST_HEAD(client_list);
+/*
+ * Each of the three rwsem locks (devices, clients, client_data) protects the
+ * xarray of the same name. Specifically it allows the caller to assert that
+ * the MARK will/will not be changing under the lock, and for devices and
+ * clients, that the value in the xarray is still a valid pointer. Change of
+ * the MARK is linked to the object state, so holding the lock and testing the
+ * MARK also asserts that the contained object is in a certain state.
+ *
+ * This is used to build a two stage register/unregister flow where objects
+ * can continue to be in the xarray even though they are still in progress to
+ * register/unregister.
+ *
+ * The xarray itself provides additional locking, and restartable iteration,
+ * which is also relied on.
+ *
+ * Locks should not be nested, with the exception of client_data, which is
+ * allowed to nest under the read side of the other two locks.
+ *
+ * The devices_rwsem also protects the device name list, any change or
+ * assignment of device name must also hold the write side to guarantee unique
+ * names.
+ */
/*
- * device_mutex and lists_rwsem protect access to both device_list and
- * client_list. device_mutex protects writer access by device and client
- * registration / de-registration. lists_rwsem protects reader access to
- * these lists. Iterators of these lists must lock it for read, while updates
- * to the lists must be done with a write lock. A special case is when the
- * device_mutex is locked. In this case locking the lists for read access is
- * not necessary as the device_mutex implies it.
+ * devices contains devices that have had their names assigned. The
+ * devices may not be registered. Users that care about the registration
+ * status need to call ib_device_try_get() on the device to ensure it is
+ * registered, and keep it registered, for the required duration.
*
- * lists_rwsem also protects access to the client data list.
*/
-static DEFINE_MUTEX(device_mutex);
-static DECLARE_RWSEM(lists_rwsem);
+static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC);
+static DECLARE_RWSEM(devices_rwsem);
+#define DEVICE_REGISTERED XA_MARK_1
+
+static u32 highest_client_id;
+#define CLIENT_REGISTERED XA_MARK_1
+static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
+static DECLARE_RWSEM(clients_rwsem);
+
+static void ib_client_put(struct ib_client *client)
+{
+ if (refcount_dec_and_test(&client->uses))
+ complete(&client->uses_zero);
+}
+
+/*
+ * If client_data is registered then the corresponding client must also still
+ * be registered.
+ */
+#define CLIENT_DATA_REGISTERED XA_MARK_1
+unsigned int rdma_dev_net_id;
-static int ib_device_check_mandatory(struct ib_device *device)
+/*
+ * A list of net namespaces is maintained in an xarray. This is necessary
+ * because we can't get the locking right using the existing net ns list. We
+ * would require a init_net callback after the list is updated.
+ */
+static DEFINE_XARRAY_FLAGS(rdma_nets, XA_FLAGS_ALLOC);
+/*
+ * rwsem to protect accessing the rdma_nets xarray entries.
+ */
+static DECLARE_RWSEM(rdma_nets_rwsem);
+
+bool ib_devices_shared_netns = true;
+module_param_named(netns_mode, ib_devices_shared_netns, bool, 0444);
+MODULE_PARM_DESC(netns_mode,
+ "Share device among net namespaces; default=1 (shared)");
+/**
+ * rdma_dev_access_netns() - Return whether an rdma device can be accessed
+ * from a specified net namespace or not.
+ * @dev: Pointer to rdma device which needs to be checked
+ * @net: Pointer to net namesapce for which access to be checked
+ *
+ * When the rdma device is in shared mode, it ignores the net namespace.
+ * When the rdma device is exclusive to a net namespace, rdma device net
+ * namespace is checked against the specified one.
+ */
+bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net)
{
-#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x }
+ return (ib_devices_shared_netns ||
+ net_eq(read_pnet(&dev->coredev.rdma_net), net));
+}
+EXPORT_SYMBOL(rdma_dev_access_netns);
+
+/**
+ * rdma_dev_has_raw_cap() - Returns whether a specified rdma device has
+ * CAP_NET_RAW capability or not.
+ *
+ * @dev: Pointer to rdma device whose capability to be checked
+ *
+ * Returns true if a rdma device's owning user namespace has CAP_NET_RAW
+ * capability, otherwise false. When rdma subsystem is in legacy shared network,
+ * namespace mode, the default net namespace is considered.
+ */
+bool rdma_dev_has_raw_cap(const struct ib_device *dev)
+{
+ const struct net *net;
+
+ /* Network namespace is the resource whose user namespace
+ * to be considered. When in shared mode, there is no reliable
+ * network namespace resource, so consider the default net namespace.
+ */
+ if (ib_devices_shared_netns)
+ net = &init_net;
+ else
+ net = read_pnet(&dev->coredev.rdma_net);
+
+ return ns_capable(net->user_ns, CAP_NET_RAW);
+}
+EXPORT_SYMBOL(rdma_dev_has_raw_cap);
+
+/*
+ * xarray has this behavior where it won't iterate over NULL values stored in
+ * allocated arrays. So we need our own iterator to see all values stored in
+ * the array. This does the same thing as xa_for_each except that it also
+ * returns NULL valued entries if the array is allocating. Simplified to only
+ * work on simple xarrays.
+ */
+static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
+ xa_mark_t filter)
+{
+ XA_STATE(xas, xa, *indexp);
+ void *entry;
+
+ rcu_read_lock();
+ do {
+ entry = xas_find_marked(&xas, ULONG_MAX, filter);
+ if (xa_is_zero(entry))
+ break;
+ } while (xas_retry(&xas, entry));
+ rcu_read_unlock();
+
+ if (entry) {
+ *indexp = xas.xa_index;
+ if (xa_is_zero(entry))
+ return NULL;
+ return entry;
+ }
+ return XA_ERROR(-ENOENT);
+}
+#define xan_for_each_marked(xa, index, entry, filter) \
+ for (index = 0, entry = xan_find_marked(xa, &(index), filter); \
+ !xa_is_err(entry); \
+ (index)++, entry = xan_find_marked(xa, &(index), filter))
+
+/* RCU hash table mapping netdevice pointers to struct ib_port_data */
+static DEFINE_SPINLOCK(ndev_hash_lock);
+static DECLARE_HASHTABLE(ndev_hash, 5);
+
+static void free_netdevs(struct ib_device *ib_dev);
+static void ib_unregister_work(struct work_struct *work);
+static void __ib_unregister_device(struct ib_device *device);
+static int ib_security_change(struct notifier_block *nb, unsigned long event,
+ void *lsm_data);
+static void ib_policy_change_task(struct work_struct *work);
+static DECLARE_WORK(ib_policy_change_work, ib_policy_change_task);
+
+static void __ibdev_printk(const char *level, const struct ib_device *ibdev,
+ struct va_format *vaf)
+{
+ if (ibdev && ibdev->dev.parent)
+ dev_printk_emit(level[1] - '0',
+ ibdev->dev.parent,
+ "%s %s %s: %pV",
+ dev_driver_string(ibdev->dev.parent),
+ dev_name(ibdev->dev.parent),
+ dev_name(&ibdev->dev),
+ vaf);
+ else if (ibdev)
+ printk("%s%s: %pV",
+ level, dev_name(&ibdev->dev), vaf);
+ else
+ printk("%s(NULL ib_device): %pV", level, vaf);
+}
+
+#define define_ibdev_printk_level(func, level) \
+void func(const struct ib_device *ibdev, const char *fmt, ...) \
+{ \
+ struct va_format vaf; \
+ va_list args; \
+ \
+ va_start(args, fmt); \
+ \
+ vaf.fmt = fmt; \
+ vaf.va = &args; \
+ \
+ __ibdev_printk(level, ibdev, &vaf); \
+ \
+ va_end(args); \
+} \
+EXPORT_SYMBOL(func);
+
+define_ibdev_printk_level(ibdev_emerg, KERN_EMERG);
+define_ibdev_printk_level(ibdev_alert, KERN_ALERT);
+define_ibdev_printk_level(ibdev_crit, KERN_CRIT);
+define_ibdev_printk_level(ibdev_err, KERN_ERR);
+define_ibdev_printk_level(ibdev_warn, KERN_WARNING);
+define_ibdev_printk_level(ibdev_notice, KERN_NOTICE);
+define_ibdev_printk_level(ibdev_info, KERN_INFO);
+
+static struct notifier_block ibdev_lsm_nb = {
+ .notifier_call = ib_security_change,
+};
+
+static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
+ struct net *net);
+
+/* Pointer to the RCU head at the start of the ib_port_data array */
+struct ib_port_data_rcu {
+ struct rcu_head rcu_head;
+ struct ib_port_data pdata[];
+};
+
+static void ib_device_check_mandatory(struct ib_device *device)
+{
+#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x }
static const struct {
size_t offset;
char *name;
} mandatory_table[] = {
IB_MANDATORY_FUNC(query_device),
IB_MANDATORY_FUNC(query_port),
- IB_MANDATORY_FUNC(query_pkey),
- IB_MANDATORY_FUNC(query_gid),
IB_MANDATORY_FUNC(alloc_pd),
IB_MANDATORY_FUNC(dealloc_pd),
- IB_MANDATORY_FUNC(create_ah),
- IB_MANDATORY_FUNC(destroy_ah),
IB_MANDATORY_FUNC(create_qp),
IB_MANDATORY_FUNC(modify_qp),
IB_MANDATORY_FUNC(destroy_qp),
@@ -108,81 +295,238 @@ static int ib_device_check_mandatory(struct ib_device *device)
IB_MANDATORY_FUNC(poll_cq),
IB_MANDATORY_FUNC(req_notify_cq),
IB_MANDATORY_FUNC(get_dma_mr),
+ IB_MANDATORY_FUNC(reg_user_mr),
IB_MANDATORY_FUNC(dereg_mr),
IB_MANDATORY_FUNC(get_port_immutable)
};
int i;
+ device->kverbs_provider = true;
for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
- if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
- pr_warn("Device %s is missing mandatory function %s\n",
- device->name, mandatory_table[i].name);
- return -EINVAL;
+ if (!*(void **) ((void *) &device->ops +
+ mandatory_table[i].offset)) {
+ device->kverbs_provider = false;
+ break;
}
}
+}
- return 0;
+/*
+ * Caller must perform ib_device_put() to return the device reference count
+ * when ib_device_get_by_index() returns valid device pointer.
+ */
+struct ib_device *ib_device_get_by_index(const struct net *net, u32 index)
+{
+ struct ib_device *device;
+
+ down_read(&devices_rwsem);
+ device = xa_load(&devices, index);
+ if (device) {
+ if (!rdma_dev_access_netns(device, net)) {
+ device = NULL;
+ goto out;
+ }
+
+ if (!ib_device_try_get(device))
+ device = NULL;
+ }
+out:
+ up_read(&devices_rwsem);
+ return device;
+}
+
+/**
+ * ib_device_put - Release IB device reference
+ * @device: device whose reference to be released
+ *
+ * ib_device_put() releases reference to the IB device to allow it to be
+ * unregistered and eventually free.
+ */
+void ib_device_put(struct ib_device *device)
+{
+ if (refcount_dec_and_test(&device->refcount))
+ complete(&device->unreg_completion);
}
+EXPORT_SYMBOL(ib_device_put);
static struct ib_device *__ib_device_get_by_name(const char *name)
{
struct ib_device *device;
+ unsigned long index;
- list_for_each_entry(device, &device_list, core_list)
- if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX))
+ xa_for_each (&devices, index, device)
+ if (!strcmp(name, dev_name(&device->dev)))
return device;
return NULL;
}
+/**
+ * ib_device_get_by_name - Find an IB device by name
+ * @name: The name to look for
+ * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all)
+ *
+ * Find and hold an ib_device by its name. The caller must call
+ * ib_device_put() on the returned pointer.
+ */
+struct ib_device *ib_device_get_by_name(const char *name,
+ enum rdma_driver_id driver_id)
+{
+ struct ib_device *device;
+
+ down_read(&devices_rwsem);
+ device = __ib_device_get_by_name(name);
+ if (device && driver_id != RDMA_DRIVER_UNKNOWN &&
+ device->ops.driver_id != driver_id)
+ device = NULL;
+
+ if (device) {
+ if (!ib_device_try_get(device))
+ device = NULL;
+ }
+ up_read(&devices_rwsem);
+ return device;
+}
+EXPORT_SYMBOL(ib_device_get_by_name);
+
+static int rename_compat_devs(struct ib_device *device)
+{
+ struct ib_core_device *cdev;
+ unsigned long index;
+ int ret = 0;
+
+ mutex_lock(&device->compat_devs_mutex);
+ xa_for_each (&device->compat_devs, index, cdev) {
+ ret = device_rename(&cdev->dev, dev_name(&device->dev));
+ if (ret) {
+ dev_warn(&cdev->dev,
+ "Fail to rename compatdev to new name %s\n",
+ dev_name(&device->dev));
+ break;
+ }
+ }
+ mutex_unlock(&device->compat_devs_mutex);
+ return ret;
+}
-static int alloc_name(char *name)
+int ib_device_rename(struct ib_device *ibdev, const char *name)
+{
+ unsigned long index;
+ void *client_data;
+ int ret;
+
+ down_write(&devices_rwsem);
+ if (!strcmp(name, dev_name(&ibdev->dev))) {
+ up_write(&devices_rwsem);
+ return 0;
+ }
+
+ if (__ib_device_get_by_name(name)) {
+ up_write(&devices_rwsem);
+ return -EEXIST;
+ }
+
+ ret = device_rename(&ibdev->dev, name);
+ if (ret) {
+ up_write(&devices_rwsem);
+ return ret;
+ }
+
+ strscpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
+ ret = rename_compat_devs(ibdev);
+
+ downgrade_write(&devices_rwsem);
+ down_read(&ibdev->client_data_rwsem);
+ xan_for_each_marked(&ibdev->client_data, index, client_data,
+ CLIENT_DATA_REGISTERED) {
+ struct ib_client *client = xa_load(&clients, index);
+
+ if (!client || !client->rename)
+ continue;
+
+ client->rename(ibdev, client_data);
+ }
+ up_read(&ibdev->client_data_rwsem);
+ rdma_nl_notify_event(ibdev, 0, RDMA_RENAME_EVENT);
+ up_read(&devices_rwsem);
+ return 0;
+}
+
+int ib_device_set_dim(struct ib_device *ibdev, u8 use_dim)
+{
+ if (use_dim > 1)
+ return -EINVAL;
+ ibdev->use_cq_dim = use_dim;
+
+ return 0;
+}
+
+static int alloc_name(struct ib_device *ibdev, const char *name)
{
- unsigned long *inuse;
- char buf[IB_DEVICE_NAME_MAX];
struct ib_device *device;
+ unsigned long index;
+ struct ida inuse;
+ int rc;
int i;
- inuse = (unsigned long *) get_zeroed_page(GFP_KERNEL);
- if (!inuse)
- return -ENOMEM;
+ lockdep_assert_held_write(&devices_rwsem);
+ ida_init(&inuse);
+ xa_for_each (&devices, index, device) {
+ char buf[IB_DEVICE_NAME_MAX];
- list_for_each_entry(device, &device_list, core_list) {
- if (!sscanf(device->name, name, &i))
+ if (sscanf(dev_name(&device->dev), name, &i) != 1)
continue;
- if (i < 0 || i >= PAGE_SIZE * 8)
+ if (i < 0 || i >= INT_MAX)
continue;
snprintf(buf, sizeof buf, name, i);
- if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX))
- set_bit(i, inuse);
- }
+ if (strcmp(buf, dev_name(&device->dev)) != 0)
+ continue;
- i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
- free_page((unsigned long) inuse);
- snprintf(buf, sizeof buf, name, i);
+ rc = ida_alloc_range(&inuse, i, i, GFP_KERNEL);
+ if (rc < 0)
+ goto out;
+ }
- if (__ib_device_get_by_name(buf))
- return -ENFILE;
+ rc = ida_alloc(&inuse, GFP_KERNEL);
+ if (rc < 0)
+ goto out;
- strlcpy(name, buf, IB_DEVICE_NAME_MAX);
- return 0;
+ rc = dev_set_name(&ibdev->dev, name, rc);
+out:
+ ida_destroy(&inuse);
+ return rc;
}
static void ib_device_release(struct device *device)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
- ib_cache_release_one(dev);
- kfree(dev->port_immutable);
- kfree(dev);
+ free_netdevs(dev);
+ WARN_ON(refcount_read(&dev->refcount));
+ if (dev->hw_stats_data)
+ ib_device_release_hw_stats(dev->hw_stats_data);
+ if (dev->port_data) {
+ ib_cache_release_one(dev);
+ ib_security_release_port_pkey_list(dev);
+ rdma_counter_release(dev);
+ kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu,
+ pdata[0]),
+ rcu_head);
+ }
+
+ mutex_destroy(&dev->subdev_lock);
+ mutex_destroy(&dev->unregistration_lock);
+ mutex_destroy(&dev->compat_devs_mutex);
+
+ xa_destroy(&dev->compat_devs);
+ xa_destroy(&dev->client_data);
+ kfree_rcu(dev, rcu_head);
}
-static int ib_device_uevent(struct device *device,
+static int ib_device_uevent(const struct device *device,
struct kobj_uevent_env *env)
{
- struct ib_device *dev = container_of(device, struct ib_device, dev);
-
- if (add_uevent_var(env, "NAME=%s", dev->name))
+ if (add_uevent_var(env, "NAME=%s", dev_name(device)))
return -ENOMEM;
/*
@@ -192,15 +536,56 @@ static int ib_device_uevent(struct device *device,
return 0;
}
+static const void *net_namespace(const struct device *d)
+{
+ const struct ib_core_device *coredev =
+ container_of(d, struct ib_core_device, dev);
+
+ return read_pnet(&coredev->rdma_net);
+}
+
static struct class ib_class = {
.name = "infiniband",
.dev_release = ib_device_release,
.dev_uevent = ib_device_uevent,
+ .ns_type = &net_ns_type_operations,
+ .namespace = net_namespace,
};
+static void rdma_init_coredev(struct ib_core_device *coredev,
+ struct ib_device *dev, struct net *net)
+{
+ bool is_full_dev = &dev->coredev == coredev;
+
+ /* This BUILD_BUG_ON is intended to catch layout change
+ * of union of ib_core_device and device.
+ * dev must be the first element as ib_core and providers
+ * driver uses it. Adding anything in ib_core_device before
+ * device will break this assumption.
+ */
+ BUILD_BUG_ON(offsetof(struct ib_device, coredev.dev) !=
+ offsetof(struct ib_device, dev));
+
+ coredev->dev.class = &ib_class;
+ coredev->dev.groups = dev->groups;
+
+ /*
+ * Don't expose hw counters outside of the init namespace.
+ */
+ if (!is_full_dev && dev->hw_stats_attr_index)
+ coredev->dev.groups[dev->hw_stats_attr_index] = NULL;
+
+ device_initialize(&coredev->dev);
+ coredev->owner = dev;
+ INIT_LIST_HEAD(&coredev->port_list);
+ write_pnet(&coredev->rdma_net, net);
+}
+
/**
- * ib_alloc_device - allocate an IB device struct
+ * _ib_alloc_device - allocate an IB device struct
* @size:size of structure to allocate
+ * @net: network namespace device should be located in, namespace
+ * must stay valid until ib_register_device() is completed.
*
* Low-level drivers should use ib_alloc_device() to allocate &struct
* ib_device. @size is the size of the structure to be allocated,
@@ -208,9 +593,10 @@ static struct class ib_class = {
* ib_dealloc_device() must be used to free structures allocated with
* ib_alloc_device().
*/
-struct ib_device *ib_alloc_device(size_t size)
+struct ib_device *_ib_alloc_device(size_t size, struct net *net)
{
struct ib_device *device;
+ unsigned int i;
if (WARN_ON(size < sizeof(struct ib_device)))
return NULL;
@@ -219,20 +605,81 @@ struct ib_device *ib_alloc_device(size_t size)
if (!device)
return NULL;
- device->dev.class = &ib_class;
- device_initialize(&device->dev);
+ if (rdma_restrack_init(device)) {
+ kfree(device);
+ return NULL;
+ }
- dev_set_drvdata(&device->dev, device);
+ /* ib_devices_shared_netns can't change while we have active namespaces
+ * in the system which means either init_net is passed or the user has
+ * no idea what they are doing.
+ *
+ * To avoid breaking backward compatibility, when in shared mode,
+ * force to init the device in the init_net.
+ */
+ net = ib_devices_shared_netns ? &init_net : net;
+ rdma_init_coredev(&device->coredev, device, net);
INIT_LIST_HEAD(&device->event_handler_list);
- spin_lock_init(&device->event_handler_lock);
- spin_lock_init(&device->client_data_lock);
- INIT_LIST_HEAD(&device->client_data_list);
- INIT_LIST_HEAD(&device->port_list);
+ spin_lock_init(&device->qp_open_list_lock);
+ init_rwsem(&device->event_handler_rwsem);
+ mutex_init(&device->unregistration_lock);
+ /*
+ * client_data needs to be alloc because we don't want our mark to be
+ * destroyed if the user stores NULL in the client data.
+ */
+ xa_init_flags(&device->client_data, XA_FLAGS_ALLOC);
+ init_rwsem(&device->client_data_rwsem);
+ xa_init_flags(&device->compat_devs, XA_FLAGS_ALLOC);
+ mutex_init(&device->compat_devs_mutex);
+ init_completion(&device->unreg_completion);
+ INIT_WORK(&device->unregistration_work, ib_unregister_work);
+
+ spin_lock_init(&device->cq_pools_lock);
+ for (i = 0; i < ARRAY_SIZE(device->cq_pools); i++)
+ INIT_LIST_HEAD(&device->cq_pools[i]);
+
+ rwlock_init(&device->cache_lock);
+
+ device->uverbs_cmd_mask =
+ BIT_ULL(IB_USER_VERBS_CMD_ALLOC_MW) |
+ BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) |
+ BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ BIT_ULL(IB_USER_VERBS_CMD_CLOSE_XRCD) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_CREATE_XSRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_MW) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) |
+ BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) |
+ BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) |
+ BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_OPEN_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_OPEN_XRCD) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) |
+ BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) |
+ BIT_ULL(IB_USER_VERBS_CMD_REG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_REREG_MR) |
+ BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ);
+
+ mutex_init(&device->subdev_lock);
+ INIT_LIST_HEAD(&device->subdev_list_head);
+ INIT_LIST_HEAD(&device->subdev_list);
return device;
}
-EXPORT_SYMBOL(ib_alloc_device);
+EXPORT_SYMBOL(_ib_alloc_device);
/**
* ib_dealloc_device - free an IB device struct
@@ -242,63 +689,200 @@ EXPORT_SYMBOL(ib_alloc_device);
*/
void ib_dealloc_device(struct ib_device *device)
{
- WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
- device->reg_state != IB_DEV_UNINITIALIZED);
- kobject_put(&device->dev.kobj);
+ if (device->ops.dealloc_driver)
+ device->ops.dealloc_driver(device);
+
+ /*
+ * ib_unregister_driver() requires all devices to remain in the xarray
+ * while their ops are callable. The last op we call is dealloc_driver
+ * above. This is needed to create a fence on op callbacks prior to
+ * allowing the driver module to unload.
+ */
+ down_write(&devices_rwsem);
+ if (xa_load(&devices, device->index) == device)
+ xa_erase(&devices, device->index);
+ up_write(&devices_rwsem);
+
+ /* Expedite releasing netdev references */
+ free_netdevs(device);
+
+ WARN_ON(!xa_empty(&device->compat_devs));
+ WARN_ON(!xa_empty(&device->client_data));
+ WARN_ON(refcount_read(&device->refcount));
+ rdma_restrack_clean(device);
+ /* Balances with device_initialize */
+ put_device(&device->dev);
}
EXPORT_SYMBOL(ib_dealloc_device);
-static int add_client_context(struct ib_device *device, struct ib_client *client)
+/*
+ * add_client_context() and remove_client_context() must be safe against
+ * parallel calls on the same device - registration/unregistration of both the
+ * device and client can be occurring in parallel.
+ *
+ * The routines need to be a fence, any caller must not return until the add
+ * or remove is fully completed.
+ */
+static int add_client_context(struct ib_device *device,
+ struct ib_client *client)
{
- struct ib_client_data *context;
- unsigned long flags;
+ int ret = 0;
- context = kmalloc(sizeof *context, GFP_KERNEL);
- if (!context)
- return -ENOMEM;
+ if (!device->kverbs_provider && !client->no_kverbs_req)
+ return 0;
- context->client = client;
- context->data = NULL;
- context->going_down = false;
+ down_write(&device->client_data_rwsem);
+ /*
+ * So long as the client is registered hold both the client and device
+ * unregistration locks.
+ */
+ if (!refcount_inc_not_zero(&client->uses))
+ goto out_unlock;
+ refcount_inc(&device->refcount);
- down_write(&lists_rwsem);
- spin_lock_irqsave(&device->client_data_lock, flags);
- list_add(&context->list, &device->client_data_list);
- spin_unlock_irqrestore(&device->client_data_lock, flags);
- up_write(&lists_rwsem);
+ /*
+ * Another caller to add_client_context got here first and has already
+ * completely initialized context.
+ */
+ if (xa_get_mark(&device->client_data, client->client_id,
+ CLIENT_DATA_REGISTERED))
+ goto out;
+ ret = xa_err(xa_store(&device->client_data, client->client_id, NULL,
+ GFP_KERNEL));
+ if (ret)
+ goto out;
+ downgrade_write(&device->client_data_rwsem);
+ if (client->add) {
+ if (client->add(device)) {
+ /*
+ * If a client fails to add then the error code is
+ * ignored, but we won't call any more ops on this
+ * client.
+ */
+ xa_erase(&device->client_data, client->client_id);
+ up_read(&device->client_data_rwsem);
+ ib_device_put(device);
+ ib_client_put(client);
+ return 0;
+ }
+ }
+
+ /* Readers shall not see a client until add has been completed */
+ xa_set_mark(&device->client_data, client->client_id,
+ CLIENT_DATA_REGISTERED);
+ up_read(&device->client_data_rwsem);
return 0;
+
+out:
+ ib_device_put(device);
+ ib_client_put(client);
+out_unlock:
+ up_write(&device->client_data_rwsem);
+ return ret;
}
-static int verify_immutable(const struct ib_device *dev, u8 port)
+static void remove_client_context(struct ib_device *device,
+ unsigned int client_id)
{
- return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
- rdma_max_mad_size(dev, port) != 0);
+ struct ib_client *client;
+ void *client_data;
+
+ down_write(&device->client_data_rwsem);
+ if (!xa_get_mark(&device->client_data, client_id,
+ CLIENT_DATA_REGISTERED)) {
+ up_write(&device->client_data_rwsem);
+ return;
+ }
+ client_data = xa_load(&device->client_data, client_id);
+ xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED);
+ client = xa_load(&clients, client_id);
+ up_write(&device->client_data_rwsem);
+
+ /*
+ * Notice we cannot be holding any exclusive locks when calling the
+ * remove callback as the remove callback can recurse back into any
+ * public functions in this module and thus try for any locks those
+ * functions take.
+ *
+ * For this reason clients and drivers should not call the
+ * unregistration functions will holdling any locks.
+ */
+ if (client->remove)
+ client->remove(device, client_data);
+
+ xa_erase(&device->client_data, client_id);
+ ib_device_put(device);
+ ib_client_put(client);
}
-static int read_port_immutable(struct ib_device *device)
+static int alloc_port_data(struct ib_device *device)
{
- int ret;
- u8 start_port = rdma_start_port(device);
- u8 end_port = rdma_end_port(device);
- u8 port;
+ struct ib_port_data_rcu *pdata_rcu;
+ u32 port;
- /**
- * device->port_immutable is indexed directly by the port number to make
+ if (device->port_data)
+ return 0;
+
+ /* This can only be called once the physical port range is defined */
+ if (WARN_ON(!device->phys_port_cnt))
+ return -EINVAL;
+
+ /* Reserve U32_MAX so the logic to go over all the ports is sane */
+ if (WARN_ON(device->phys_port_cnt == U32_MAX))
+ return -EINVAL;
+
+ /*
+ * device->port_data is indexed directly by the port number to make
* access to this data as efficient as possible.
*
- * Therefore port_immutable is declared as a 1 based array with
- * potential empty slots at the beginning.
+ * Therefore port_data is declared as a 1 based array with potential
+ * empty slots at the beginning.
*/
- device->port_immutable = kzalloc(sizeof(*device->port_immutable)
- * (end_port + 1),
- GFP_KERNEL);
- if (!device->port_immutable)
+ pdata_rcu = kzalloc(struct_size(pdata_rcu, pdata,
+ size_add(rdma_end_port(device), 1)),
+ GFP_KERNEL);
+ if (!pdata_rcu)
return -ENOMEM;
+ /*
+ * The rcu_head is put in front of the port data array and the stored
+ * pointer is adjusted since we never need to see that member until
+ * kfree_rcu.
+ */
+ device->port_data = pdata_rcu->pdata;
+
+ rdma_for_each_port (device, port) {
+ struct ib_port_data *pdata = &device->port_data[port];
+
+ pdata->ib_dev = device;
+ spin_lock_init(&pdata->pkey_list_lock);
+ INIT_LIST_HEAD(&pdata->pkey_list);
+ spin_lock_init(&pdata->netdev_lock);
+ INIT_HLIST_NODE(&pdata->ndev_hash_link);
+ }
+ return 0;
+}
+
+static int verify_immutable(const struct ib_device *dev, u32 port)
+{
+ return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
+ rdma_max_mad_size(dev, port) != 0);
+}
- for (port = start_port; port <= end_port; ++port) {
- ret = device->get_port_immutable(device, port,
- &device->port_immutable[port]);
+static int setup_port_data(struct ib_device *device)
+{
+ u32 port;
+ int ret;
+
+ ret = alloc_port_data(device);
+ if (ret)
+ return ret;
+
+ rdma_for_each_port (device, port) {
+ struct ib_port_data *pdata = &device->port_data[port];
+
+ ret = device->ops.get_port_immutable(device, port,
+ &pdata->immutable);
if (ret)
return ret;
@@ -308,132 +892,950 @@ static int read_port_immutable(struct ib_device *device)
return 0;
}
-void ib_get_device_fw_str(struct ib_device *dev, char *str, size_t str_len)
+/**
+ * ib_port_immutable_read() - Read rdma port's immutable data
+ * @dev: IB device
+ * @port: port number whose immutable data to read. It starts with index 1 and
+ * valid upto including rdma_end_port().
+ */
+const struct ib_port_immutable*
+ib_port_immutable_read(struct ib_device *dev, unsigned int port)
{
- if (dev->get_dev_fw_str)
- dev->get_dev_fw_str(dev, str, str_len);
+ WARN_ON(!rdma_is_port_valid(dev, port));
+ return &dev->port_data[port].immutable;
+}
+EXPORT_SYMBOL(ib_port_immutable_read);
+
+void ib_get_device_fw_str(struct ib_device *dev, char *str)
+{
+ if (dev->ops.get_dev_fw_str)
+ dev->ops.get_dev_fw_str(dev, str);
else
str[0] = '\0';
}
EXPORT_SYMBOL(ib_get_device_fw_str);
-/**
- * ib_register_device - Register an IB device with IB core
- * @device:Device to register
- *
- * Low-level drivers use ib_register_device() to register their
- * devices with the IB core. All registered clients will receive a
- * callback for each device that is added. @device must be allocated
- * with ib_alloc_device().
+static void ib_policy_change_task(struct work_struct *work)
+{
+ struct ib_device *dev;
+ unsigned long index;
+
+ down_read(&devices_rwsem);
+ xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
+ unsigned int i;
+
+ rdma_for_each_port (dev, i) {
+ u64 sp;
+ ib_get_cached_subnet_prefix(dev, i, &sp);
+ ib_security_cache_change(dev, i, sp);
+ }
+ }
+ up_read(&devices_rwsem);
+}
+
+static int ib_security_change(struct notifier_block *nb, unsigned long event,
+ void *lsm_data)
+{
+ if (event != LSM_POLICY_CHANGE)
+ return NOTIFY_DONE;
+
+ schedule_work(&ib_policy_change_work);
+ ib_mad_agent_security_change();
+
+ return NOTIFY_OK;
+}
+
+static void compatdev_release(struct device *dev)
+{
+ struct ib_core_device *cdev =
+ container_of(dev, struct ib_core_device, dev);
+
+ kfree(cdev);
+}
+
+static int add_one_compat_dev(struct ib_device *device,
+ struct rdma_dev_net *rnet)
+{
+ struct ib_core_device *cdev;
+ int ret;
+
+ lockdep_assert_held(&rdma_nets_rwsem);
+ if (!ib_devices_shared_netns)
+ return 0;
+
+ /*
+ * Create and add compat device in all namespaces other than where it
+ * is currently bound to.
+ */
+ if (net_eq(read_pnet(&rnet->net),
+ read_pnet(&device->coredev.rdma_net)))
+ return 0;
+
+ /*
+ * The first of init_net() or ib_register_device() to take the
+ * compat_devs_mutex wins and gets to add the device. Others will wait
+ * for completion here.
+ */
+ mutex_lock(&device->compat_devs_mutex);
+ cdev = xa_load(&device->compat_devs, rnet->id);
+ if (cdev) {
+ ret = 0;
+ goto done;
+ }
+ ret = xa_reserve(&device->compat_devs, rnet->id, GFP_KERNEL);
+ if (ret)
+ goto done;
+
+ cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+ if (!cdev) {
+ ret = -ENOMEM;
+ goto cdev_err;
+ }
+
+ cdev->dev.parent = device->dev.parent;
+ rdma_init_coredev(cdev, device, read_pnet(&rnet->net));
+ cdev->dev.release = compatdev_release;
+ ret = dev_set_name(&cdev->dev, "%s", dev_name(&device->dev));
+ if (ret)
+ goto add_err;
+
+ ret = device_add(&cdev->dev);
+ if (ret)
+ goto add_err;
+ ret = ib_setup_port_attrs(cdev);
+ if (ret)
+ goto port_err;
+
+ ret = xa_err(xa_store(&device->compat_devs, rnet->id,
+ cdev, GFP_KERNEL));
+ if (ret)
+ goto insert_err;
+
+ mutex_unlock(&device->compat_devs_mutex);
+ return 0;
+
+insert_err:
+ ib_free_port_attrs(cdev);
+port_err:
+ device_del(&cdev->dev);
+add_err:
+ put_device(&cdev->dev);
+cdev_err:
+ xa_release(&device->compat_devs, rnet->id);
+done:
+ mutex_unlock(&device->compat_devs_mutex);
+ return ret;
+}
+
+static void remove_one_compat_dev(struct ib_device *device, u32 id)
+{
+ struct ib_core_device *cdev;
+
+ mutex_lock(&device->compat_devs_mutex);
+ cdev = xa_erase(&device->compat_devs, id);
+ mutex_unlock(&device->compat_devs_mutex);
+ if (cdev) {
+ ib_free_port_attrs(cdev);
+ device_del(&cdev->dev);
+ put_device(&cdev->dev);
+ }
+}
+
+static void remove_compat_devs(struct ib_device *device)
+{
+ struct ib_core_device *cdev;
+ unsigned long index;
+
+ xa_for_each (&device->compat_devs, index, cdev)
+ remove_one_compat_dev(device, index);
+}
+
+static int add_compat_devs(struct ib_device *device)
+{
+ struct rdma_dev_net *rnet;
+ unsigned long index;
+ int ret = 0;
+
+ lockdep_assert_held(&devices_rwsem);
+
+ down_read(&rdma_nets_rwsem);
+ xa_for_each (&rdma_nets, index, rnet) {
+ ret = add_one_compat_dev(device, rnet);
+ if (ret)
+ break;
+ }
+ up_read(&rdma_nets_rwsem);
+ return ret;
+}
+
+static void remove_all_compat_devs(void)
+{
+ struct ib_compat_device *cdev;
+ struct ib_device *dev;
+ unsigned long index;
+
+ down_read(&devices_rwsem);
+ xa_for_each (&devices, index, dev) {
+ unsigned long c_index = 0;
+
+ /* Hold nets_rwsem so that any other thread modifying this
+ * system param can sync with this thread.
+ */
+ down_read(&rdma_nets_rwsem);
+ xa_for_each (&dev->compat_devs, c_index, cdev)
+ remove_one_compat_dev(dev, c_index);
+ up_read(&rdma_nets_rwsem);
+ }
+ up_read(&devices_rwsem);
+}
+
+static int add_all_compat_devs(void)
+{
+ struct rdma_dev_net *rnet;
+ struct ib_device *dev;
+ unsigned long index;
+ int ret = 0;
+
+ down_read(&devices_rwsem);
+ xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
+ unsigned long net_index = 0;
+
+ /* Hold nets_rwsem so that any other thread modifying this
+ * system param can sync with this thread.
+ */
+ down_read(&rdma_nets_rwsem);
+ xa_for_each (&rdma_nets, net_index, rnet) {
+ ret = add_one_compat_dev(dev, rnet);
+ if (ret)
+ break;
+ }
+ up_read(&rdma_nets_rwsem);
+ }
+ up_read(&devices_rwsem);
+ if (ret)
+ remove_all_compat_devs();
+ return ret;
+}
+
+int rdma_compatdev_set(u8 enable)
+{
+ struct rdma_dev_net *rnet;
+ unsigned long index;
+ int ret = 0;
+
+ down_write(&rdma_nets_rwsem);
+ if (ib_devices_shared_netns == enable) {
+ up_write(&rdma_nets_rwsem);
+ return 0;
+ }
+
+ /* enable/disable of compat devices is not supported
+ * when more than default init_net exists.
+ */
+ xa_for_each (&rdma_nets, index, rnet) {
+ ret++;
+ break;
+ }
+ if (!ret)
+ ib_devices_shared_netns = enable;
+ up_write(&rdma_nets_rwsem);
+ if (ret)
+ return -EBUSY;
+
+ if (enable)
+ ret = add_all_compat_devs();
+ else
+ remove_all_compat_devs();
+ return ret;
+}
+
+static void rdma_dev_exit_net(struct net *net)
+{
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
+ struct ib_device *dev;
+ unsigned long index;
+ int ret;
+
+ down_write(&rdma_nets_rwsem);
+ /*
+ * Prevent the ID from being re-used and hide the id from xa_for_each.
+ */
+ ret = xa_err(xa_store(&rdma_nets, rnet->id, NULL, GFP_KERNEL));
+ WARN_ON(ret);
+ up_write(&rdma_nets_rwsem);
+
+ down_read(&devices_rwsem);
+ xa_for_each (&devices, index, dev) {
+ get_device(&dev->dev);
+ /*
+ * Release the devices_rwsem so that pontentially blocking
+ * device_del, doesn't hold the devices_rwsem for too long.
+ */
+ up_read(&devices_rwsem);
+
+ remove_one_compat_dev(dev, rnet->id);
+
+ /*
+ * If the real device is in the NS then move it back to init.
+ */
+ rdma_dev_change_netns(dev, net, &init_net);
+
+ put_device(&dev->dev);
+ down_read(&devices_rwsem);
+ }
+ up_read(&devices_rwsem);
+
+ rdma_nl_net_exit(rnet);
+ xa_erase(&rdma_nets, rnet->id);
+}
+
+static __net_init int rdma_dev_init_net(struct net *net)
+{
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
+ unsigned long index;
+ struct ib_device *dev;
+ int ret;
+
+ write_pnet(&rnet->net, net);
+
+ ret = rdma_nl_net_init(rnet);
+ if (ret)
+ return ret;
+
+ /* No need to create any compat devices in default init_net. */
+ if (net_eq(net, &init_net))
+ return 0;
+
+ ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL);
+ if (ret) {
+ rdma_nl_net_exit(rnet);
+ return ret;
+ }
+
+ down_read(&devices_rwsem);
+ xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
+ /* Hold nets_rwsem so that netlink command cannot change
+ * system configuration for device sharing mode.
+ */
+ down_read(&rdma_nets_rwsem);
+ ret = add_one_compat_dev(dev, rnet);
+ up_read(&rdma_nets_rwsem);
+ if (ret)
+ break;
+ }
+ up_read(&devices_rwsem);
+
+ if (ret)
+ rdma_dev_exit_net(net);
+
+ return ret;
+}
+
+/*
+ * Assign the unique string device name and the unique device index. This is
+ * undone by ib_dealloc_device.
*/
-int ib_register_device(struct ib_device *device,
- int (*port_callback)(struct ib_device *,
- u8, struct kobject *))
+static int assign_name(struct ib_device *device, const char *name)
{
+ static u32 last_id;
int ret;
- struct ib_client *client;
+
+ down_write(&devices_rwsem);
+ /* Assign a unique name to the device */
+ if (strchr(name, '%'))
+ ret = alloc_name(device, name);
+ else
+ ret = dev_set_name(&device->dev, name);
+ if (ret)
+ goto out;
+
+ if (__ib_device_get_by_name(dev_name(&device->dev))) {
+ ret = -ENFILE;
+ goto out;
+ }
+ strscpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
+
+ ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b,
+ &last_id, GFP_KERNEL);
+ if (ret > 0)
+ ret = 0;
+
+out:
+ up_write(&devices_rwsem);
+ return ret;
+}
+
+/*
+ * setup_device() allocates memory and sets up data that requires calling the
+ * device ops, this is the only reason these actions are not done during
+ * ib_alloc_device. It is undone by ib_dealloc_device().
+ */
+static int setup_device(struct ib_device *device)
+{
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
+ int ret;
+
+ ib_device_check_mandatory(device);
+
+ ret = setup_port_data(device);
+ if (ret) {
+ dev_warn(&device->dev, "Couldn't create per-port data\n");
+ return ret;
+ }
+
+ memset(&device->attrs, 0, sizeof(device->attrs));
+ ret = device->ops.query_device(device, &device->attrs, &uhw);
+ if (ret) {
+ dev_warn(&device->dev,
+ "Couldn't query the device attributes\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static void disable_device(struct ib_device *device)
+{
+ u32 cid;
+
+ WARN_ON(!refcount_read(&device->refcount));
+
+ down_write(&devices_rwsem);
+ xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
+ up_write(&devices_rwsem);
+
+ /*
+ * Remove clients in LIFO order, see assign_client_id. This could be
+ * more efficient if xarray learns to reverse iterate. Since no new
+ * clients can be added to this ib_device past this point we only need
+ * the maximum possible client_id value here.
+ */
+ down_read(&clients_rwsem);
+ cid = highest_client_id;
+ up_read(&clients_rwsem);
+ while (cid) {
+ cid--;
+ remove_client_context(device, cid);
+ }
+
+ ib_cq_pool_cleanup(device);
+
+ /* Pairs with refcount_set in enable_device */
+ ib_device_put(device);
+ wait_for_completion(&device->unreg_completion);
+
+ /*
+ * compat devices must be removed after device refcount drops to zero.
+ * Otherwise init_net() may add more compatdevs after removing compat
+ * devices and before device is disabled.
+ */
+ remove_compat_devs(device);
+}
+
+/*
+ * An enabled device is visible to all clients and to all the public facing
+ * APIs that return a device pointer. This always returns with a new get, even
+ * if it fails.
+ */
+static int enable_device_and_get(struct ib_device *device)
+{
+ struct ib_client *client;
+ unsigned long index;
+ int ret = 0;
+
+ /*
+ * One ref belongs to the xa and the other belongs to this
+ * thread. This is needed to guard against parallel unregistration.
+ */
+ refcount_set(&device->refcount, 2);
+ down_write(&devices_rwsem);
+ xa_set_mark(&devices, device->index, DEVICE_REGISTERED);
- mutex_lock(&device_mutex);
+ /*
+ * By using downgrade_write() we ensure that no other thread can clear
+ * DEVICE_REGISTERED while we are completing the client setup.
+ */
+ downgrade_write(&devices_rwsem);
- if (strchr(device->name, '%')) {
- ret = alloc_name(device->name);
+ if (device->ops.enable_driver) {
+ ret = device->ops.enable_driver(device);
if (ret)
goto out;
}
- if (ib_device_check_mandatory(device)) {
- ret = -EINVAL;
- goto out;
+ down_read(&clients_rwsem);
+ xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
+ ret = add_client_context(device, client);
+ if (ret)
+ break;
}
+ up_read(&clients_rwsem);
+ if (!ret)
+ ret = add_compat_devs(device);
+out:
+ up_read(&devices_rwsem);
+ return ret;
+}
- ret = read_port_immutable(device);
- if (ret) {
- pr_warn("Couldn't create per port immutable data %s\n",
- device->name);
+static void prevent_dealloc_device(struct ib_device *ib_dev)
+{
+}
+
+static void ib_device_notify_register(struct ib_device *device)
+{
+ struct net_device *netdev;
+ u32 port;
+ int ret;
+
+ down_read(&devices_rwsem);
+
+ /* Mark for userspace that device is ready */
+ kobject_uevent(&device->dev.kobj, KOBJ_ADD);
+
+ ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT);
+ if (ret)
goto out;
+
+ rdma_for_each_port(device, port) {
+ netdev = ib_device_get_netdev(device, port);
+ if (!netdev)
+ continue;
+
+ ret = rdma_nl_notify_event(device, port,
+ RDMA_NETDEV_ATTACH_EVENT);
+ dev_put(netdev);
+ if (ret)
+ goto out;
}
+out:
+ up_read(&devices_rwsem);
+}
+
+/**
+ * ib_register_device - Register an IB device with IB core
+ * @device: Device to register
+ * @name: unique string device name. This may include a '%' which will
+ * cause a unique index to be added to the passed device name.
+ * @dma_device: pointer to a DMA-capable device. If %NULL, then the IB
+ * device will be used. In this case the caller should fully
+ * setup the ibdev for DMA. This usually means using dma_virt_ops.
+ *
+ * Low-level drivers use ib_register_device() to register their
+ * devices with the IB core. All registered clients will receive a
+ * callback for each device that is added. @device must be allocated
+ * with ib_alloc_device().
+ *
+ * If the driver uses ops.dealloc_driver and calls any ib_unregister_device()
+ * asynchronously then the device pointer may become freed as soon as this
+ * function returns.
+ */
+int ib_register_device(struct ib_device *device, const char *name,
+ struct device *dma_device)
+{
+ int ret;
+
+ ret = assign_name(device, name);
+ if (ret)
+ return ret;
+
+ /*
+ * If the caller does not provide a DMA capable device then the IB core
+ * will set up ib_sge and scatterlist structures that stash the kernel
+ * virtual address into the address field.
+ */
+ WARN_ON(dma_device && !dma_device->dma_parms);
+ device->dma_device = dma_device;
+
+ ret = setup_device(device);
+ if (ret)
+ return ret;
+
ret = ib_cache_setup_one(device);
if (ret) {
- pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
- goto out;
+ dev_warn(&device->dev,
+ "Couldn't set up InfiniBand P_Key/GID cache\n");
+ return ret;
}
- memset(&device->attrs, 0, sizeof(device->attrs));
- ret = device->query_device(device, &device->attrs, &uhw);
+ device->groups[0] = &ib_dev_attr_group;
+ device->groups[1] = device->ops.device_group;
+ ret = ib_setup_device_attrs(device);
+ if (ret)
+ goto cache_cleanup;
+
+ ib_device_register_rdmacg(device);
+
+ rdma_counter_init(device);
+
+ /*
+ * Ensure that ADD uevent is not fired because it
+ * is too early amd device is not initialized yet.
+ */
+ dev_set_uevent_suppress(&device->dev, true);
+ ret = device_add(&device->dev);
+ if (ret)
+ goto cg_cleanup;
+
+ ret = ib_setup_port_attrs(&device->coredev);
if (ret) {
- pr_warn("Couldn't query the device attributes\n");
- ib_cache_cleanup_one(device);
- goto out;
+ dev_warn(&device->dev,
+ "Couldn't register device with driver model\n");
+ goto dev_cleanup;
}
- ret = ib_device_register_sysfs(device, port_callback);
+ ret = enable_device_and_get(device);
if (ret) {
- pr_warn("Couldn't register device %s with driver model\n",
- device->name);
- ib_cache_cleanup_one(device);
- goto out;
+ void (*dealloc_fn)(struct ib_device *);
+
+ /*
+ * If we hit this error flow then we don't want to
+ * automatically dealloc the device since the caller is
+ * expected to call ib_dealloc_device() after
+ * ib_register_device() fails. This is tricky due to the
+ * possibility for a parallel unregistration along with this
+ * error flow. Since we have a refcount here we know any
+ * parallel flow is stopped in disable_device and will see the
+ * special dealloc_driver pointer, causing the responsibility to
+ * ib_dealloc_device() to revert back to this thread.
+ */
+ dealloc_fn = device->ops.dealloc_driver;
+ device->ops.dealloc_driver = prevent_dealloc_device;
+ ib_device_put(device);
+ __ib_unregister_device(device);
+ device->ops.dealloc_driver = dealloc_fn;
+ dev_set_uevent_suppress(&device->dev, false);
+ return ret;
}
+ dev_set_uevent_suppress(&device->dev, false);
- device->reg_state = IB_DEV_REGISTERED;
+ ib_device_notify_register(device);
- list_for_each_entry(client, &client_list, list)
- if (client->add && !add_client_context(device, client))
- client->add(device);
+ ib_device_put(device);
- down_write(&lists_rwsem);
- list_add_tail(&device->core_list, &device_list);
- up_write(&lists_rwsem);
-out:
- mutex_unlock(&device_mutex);
+ return 0;
+
+dev_cleanup:
+ device_del(&device->dev);
+cg_cleanup:
+ dev_set_uevent_suppress(&device->dev, false);
+ ib_device_unregister_rdmacg(device);
+cache_cleanup:
+ ib_cache_cleanup_one(device);
return ret;
}
EXPORT_SYMBOL(ib_register_device);
+/* Callers must hold a get on the device. */
+static void __ib_unregister_device(struct ib_device *ib_dev)
+{
+ struct ib_device *sub, *tmp;
+
+ mutex_lock(&ib_dev->subdev_lock);
+ list_for_each_entry_safe_reverse(sub, tmp,
+ &ib_dev->subdev_list_head,
+ subdev_list) {
+ list_del(&sub->subdev_list);
+ ib_dev->ops.del_sub_dev(sub);
+ ib_device_put(ib_dev);
+ }
+ mutex_unlock(&ib_dev->subdev_lock);
+
+ /*
+ * We have a registration lock so that all the calls to unregister are
+ * fully fenced, once any unregister returns the device is truly
+ * unregistered even if multiple callers are unregistering it at the
+ * same time. This also interacts with the registration flow and
+ * provides sane semantics if register and unregister are racing.
+ */
+ mutex_lock(&ib_dev->unregistration_lock);
+ if (!refcount_read(&ib_dev->refcount))
+ goto out;
+
+ disable_device(ib_dev);
+ rdma_nl_notify_event(ib_dev, 0, RDMA_UNREGISTER_EVENT);
+
+ /* Expedite removing unregistered pointers from the hash table */
+ free_netdevs(ib_dev);
+
+ ib_free_port_attrs(&ib_dev->coredev);
+ device_del(&ib_dev->dev);
+ ib_device_unregister_rdmacg(ib_dev);
+ ib_cache_cleanup_one(ib_dev);
+
+ /*
+ * Drivers using the new flow may not call ib_dealloc_device except
+ * in error unwind prior to registration success.
+ */
+ if (ib_dev->ops.dealloc_driver &&
+ ib_dev->ops.dealloc_driver != prevent_dealloc_device) {
+ WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1);
+ ib_dealloc_device(ib_dev);
+ }
+out:
+ mutex_unlock(&ib_dev->unregistration_lock);
+}
+
/**
* ib_unregister_device - Unregister an IB device
- * @device:Device to unregister
+ * @ib_dev: The device to unregister
*
* Unregister an IB device. All clients will receive a remove callback.
+ *
+ * Callers should call this routine only once, and protect against races with
+ * registration. Typically it should only be called as part of a remove
+ * callback in an implementation of driver core's struct device_driver and
+ * related.
+ *
+ * If ops.dealloc_driver is used then ib_dev will be freed upon return from
+ * this function.
*/
-void ib_unregister_device(struct ib_device *device)
+void ib_unregister_device(struct ib_device *ib_dev)
{
- struct ib_client_data *context, *tmp;
- unsigned long flags;
+ get_device(&ib_dev->dev);
+ __ib_unregister_device(ib_dev);
+ put_device(&ib_dev->dev);
+}
+EXPORT_SYMBOL(ib_unregister_device);
- mutex_lock(&device_mutex);
+/**
+ * ib_unregister_device_and_put - Unregister a device while holding a 'get'
+ * @ib_dev: The device to unregister
+ *
+ * This is the same as ib_unregister_device(), except it includes an internal
+ * ib_device_put() that should match a 'get' obtained by the caller.
+ *
+ * It is safe to call this routine concurrently from multiple threads while
+ * holding the 'get'. When the function returns the device is fully
+ * unregistered.
+ *
+ * Drivers using this flow MUST use the driver_unregister callback to clean up
+ * their resources associated with the device and dealloc it.
+ */
+void ib_unregister_device_and_put(struct ib_device *ib_dev)
+{
+ WARN_ON(!ib_dev->ops.dealloc_driver);
+ get_device(&ib_dev->dev);
+ ib_device_put(ib_dev);
+ __ib_unregister_device(ib_dev);
+ put_device(&ib_dev->dev);
+}
+EXPORT_SYMBOL(ib_unregister_device_and_put);
- down_write(&lists_rwsem);
- list_del(&device->core_list);
- spin_lock_irqsave(&device->client_data_lock, flags);
- list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
- context->going_down = true;
- spin_unlock_irqrestore(&device->client_data_lock, flags);
- downgrade_write(&lists_rwsem);
+/**
+ * ib_unregister_driver - Unregister all IB devices for a driver
+ * @driver_id: The driver to unregister
+ *
+ * This implements a fence for device unregistration. It only returns once all
+ * devices associated with the driver_id have fully completed their
+ * unregistration and returned from ib_unregister_device*().
+ *
+ * If device's are not yet unregistered it goes ahead and starts unregistering
+ * them.
+ *
+ * This does not block creation of new devices with the given driver_id, that
+ * is the responsibility of the caller.
+ */
+void ib_unregister_driver(enum rdma_driver_id driver_id)
+{
+ struct ib_device *ib_dev;
+ unsigned long index;
+
+ down_read(&devices_rwsem);
+ xa_for_each (&devices, index, ib_dev) {
+ if (ib_dev->ops.driver_id != driver_id)
+ continue;
+
+ get_device(&ib_dev->dev);
+ up_read(&devices_rwsem);
- list_for_each_entry_safe(context, tmp, &device->client_data_list,
- list) {
- if (context->client->remove)
- context->client->remove(device, context->data);
+ WARN_ON(!ib_dev->ops.dealloc_driver);
+ __ib_unregister_device(ib_dev);
+
+ put_device(&ib_dev->dev);
+ down_read(&devices_rwsem);
}
- up_read(&lists_rwsem);
+ up_read(&devices_rwsem);
+}
+EXPORT_SYMBOL(ib_unregister_driver);
+
+static void ib_unregister_work(struct work_struct *work)
+{
+ struct ib_device *ib_dev =
+ container_of(work, struct ib_device, unregistration_work);
- mutex_unlock(&device_mutex);
+ __ib_unregister_device(ib_dev);
+ put_device(&ib_dev->dev);
+}
- ib_device_unregister_sysfs(device);
- ib_cache_cleanup_one(device);
+/**
+ * ib_unregister_device_queued - Unregister a device using a work queue
+ * @ib_dev: The device to unregister
+ *
+ * This schedules an asynchronous unregistration using a WQ for the device. A
+ * driver should use this to avoid holding locks while doing unregistration,
+ * such as holding the RTNL lock.
+ *
+ * Drivers using this API must use ib_unregister_driver before module unload
+ * to ensure that all scheduled unregistrations have completed.
+ */
+void ib_unregister_device_queued(struct ib_device *ib_dev)
+{
+ WARN_ON(!refcount_read(&ib_dev->refcount));
+ WARN_ON(!ib_dev->ops.dealloc_driver);
+ get_device(&ib_dev->dev);
+ if (!queue_work(ib_unreg_wq, &ib_dev->unregistration_work))
+ put_device(&ib_dev->dev);
+}
+EXPORT_SYMBOL(ib_unregister_device_queued);
- down_write(&lists_rwsem);
- spin_lock_irqsave(&device->client_data_lock, flags);
- list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
- kfree(context);
- spin_unlock_irqrestore(&device->client_data_lock, flags);
- up_write(&lists_rwsem);
+/*
+ * The caller must pass in a device that has the kref held and the refcount
+ * released. If the device is in cur_net and still registered then it is moved
+ * into net.
+ */
+static int rdma_dev_change_netns(struct ib_device *device, struct net *cur_net,
+ struct net *net)
+{
+ int ret2 = -EINVAL;
+ int ret;
+
+ mutex_lock(&device->unregistration_lock);
+
+ /*
+ * If a device not under ib_device_get() or if the unregistration_lock
+ * is not held, the namespace can be changed, or it can be unregistered.
+ * Check again under the lock.
+ */
+ if (refcount_read(&device->refcount) == 0 ||
+ !net_eq(cur_net, read_pnet(&device->coredev.rdma_net))) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ kobject_uevent(&device->dev.kobj, KOBJ_REMOVE);
+ disable_device(device);
+
+ /*
+ * At this point no one can be using the device, so it is safe to
+ * change the namespace.
+ */
+ write_pnet(&device->coredev.rdma_net, net);
+
+ down_read(&devices_rwsem);
+ /*
+ * Currently rdma devices are system wide unique. So the device name
+ * is guaranteed free in the new namespace. Publish the new namespace
+ * at the sysfs level.
+ */
+ ret = device_rename(&device->dev, dev_name(&device->dev));
+ up_read(&devices_rwsem);
+ if (ret) {
+ dev_warn(&device->dev,
+ "%s: Couldn't rename device after namespace change\n",
+ __func__);
+ /* Try and put things back and re-enable the device */
+ write_pnet(&device->coredev.rdma_net, cur_net);
+ }
- device->reg_state = IB_DEV_UNREGISTERED;
+ ret2 = enable_device_and_get(device);
+ if (ret2) {
+ /*
+ * This shouldn't really happen, but if it does, let the user
+ * retry at later point. So don't disable the device.
+ */
+ dev_warn(&device->dev,
+ "%s: Couldn't re-enable device after namespace change\n",
+ __func__);
+ }
+ kobject_uevent(&device->dev.kobj, KOBJ_ADD);
+
+ ib_device_put(device);
+out:
+ mutex_unlock(&device->unregistration_lock);
+ if (ret)
+ return ret;
+ return ret2;
+}
+
+int ib_device_set_netns_put(struct sk_buff *skb,
+ struct ib_device *dev, u32 ns_fd)
+{
+ struct net *net;
+ int ret;
+
+ net = get_net_ns_by_fd(ns_fd);
+ if (IS_ERR(net)) {
+ ret = PTR_ERR(net);
+ goto net_err;
+ }
+
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ ret = -EPERM;
+ goto ns_err;
+ }
+
+ /*
+ * All the ib_clients, including uverbs, are reset when the namespace is
+ * changed and this cannot be blocked waiting for userspace to do
+ * something, so disassociation is mandatory.
+ */
+ if (!dev->ops.disassociate_ucontext || ib_devices_shared_netns) {
+ ret = -EOPNOTSUPP;
+ goto ns_err;
+ }
+
+ get_device(&dev->dev);
+ ib_device_put(dev);
+ ret = rdma_dev_change_netns(dev, current->nsproxy->net_ns, net);
+ put_device(&dev->dev);
+
+ put_net(net);
+ return ret;
+
+ns_err:
+ put_net(net);
+net_err:
+ ib_device_put(dev);
+ return ret;
+}
+
+static struct pernet_operations rdma_dev_net_ops = {
+ .init = rdma_dev_init_net,
+ .exit = rdma_dev_exit_net,
+ .id = &rdma_dev_net_id,
+ .size = sizeof(struct rdma_dev_net),
+};
+
+static int assign_client_id(struct ib_client *client)
+{
+ int ret;
+
+ lockdep_assert_held(&clients_rwsem);
+ /*
+ * The add/remove callbacks must be called in FIFO/LIFO order. To
+ * achieve this we assign client_ids so they are sorted in
+ * registration order.
+ */
+ client->client_id = highest_client_id;
+ ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
+ if (ret)
+ return ret;
+
+ highest_client_id++;
+ xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
+ return 0;
+}
+
+static void remove_client_id(struct ib_client *client)
+{
+ down_write(&clients_rwsem);
+ xa_erase(&clients, client->client_id);
+ for (; highest_client_id; highest_client_id--)
+ if (xa_load(&clients, highest_client_id - 1))
+ break;
+ up_write(&clients_rwsem);
}
-EXPORT_SYMBOL(ib_unregister_device);
/**
* ib_register_client - Register an IB client
@@ -451,20 +1853,36 @@ EXPORT_SYMBOL(ib_unregister_device);
int ib_register_client(struct ib_client *client)
{
struct ib_device *device;
+ unsigned long index;
+ bool need_unreg = false;
+ int ret;
- mutex_lock(&device_mutex);
-
- list_for_each_entry(device, &device_list, core_list)
- if (client->add && !add_client_context(device, client))
- client->add(device);
-
- down_write(&lists_rwsem);
- list_add_tail(&client->list, &client_list);
- up_write(&lists_rwsem);
+ refcount_set(&client->uses, 1);
+ init_completion(&client->uses_zero);
- mutex_unlock(&device_mutex);
+ /*
+ * The devices_rwsem is held in write mode to ensure that a racing
+ * ib_register_device() sees a consisent view of clients and devices.
+ */
+ down_write(&devices_rwsem);
+ down_write(&clients_rwsem);
+ ret = assign_client_id(client);
+ if (ret)
+ goto out;
- return 0;
+ need_unreg = true;
+ xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
+ ret = add_client_context(device, client);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
+out:
+ up_write(&clients_rwsem);
+ up_write(&devices_rwsem);
+ if (need_unreg && ret)
+ ib_unregister_client(client);
+ return ret;
}
EXPORT_SYMBOL(ib_register_client);
@@ -475,80 +1893,140 @@ EXPORT_SYMBOL(ib_register_client);
* Upper level users use ib_unregister_client() to remove their client
* registration. When ib_unregister_client() is called, the client
* will receive a remove callback for each IB device still registered.
+ *
+ * This is a full fence, once it returns no client callbacks will be called,
+ * or are running in another thread.
*/
void ib_unregister_client(struct ib_client *client)
{
- struct ib_client_data *context, *tmp;
struct ib_device *device;
- unsigned long flags;
+ unsigned long index;
- mutex_lock(&device_mutex);
+ down_write(&clients_rwsem);
+ ib_client_put(client);
+ xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
+ up_write(&clients_rwsem);
- down_write(&lists_rwsem);
- list_del(&client->list);
- up_write(&lists_rwsem);
+ /* We do not want to have locks while calling client->remove() */
+ rcu_read_lock();
+ xa_for_each (&devices, index, device) {
+ if (!ib_device_try_get(device))
+ continue;
+ rcu_read_unlock();
- list_for_each_entry(device, &device_list, core_list) {
- struct ib_client_data *found_context = NULL;
+ remove_client_context(device, client->client_id);
- down_write(&lists_rwsem);
- spin_lock_irqsave(&device->client_data_lock, flags);
- list_for_each_entry_safe(context, tmp, &device->client_data_list, list)
- if (context->client == client) {
- context->going_down = true;
- found_context = context;
- break;
- }
- spin_unlock_irqrestore(&device->client_data_lock, flags);
- up_write(&lists_rwsem);
+ ib_device_put(device);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
- if (client->remove)
- client->remove(device, found_context ?
- found_context->data : NULL);
+ /*
+ * remove_client_context() is not a fence, it can return even though a
+ * removal is ongoing. Wait until all removals are completed.
+ */
+ wait_for_completion(&client->uses_zero);
+ remove_client_id(client);
+}
+EXPORT_SYMBOL(ib_unregister_client);
+
+static int __ib_get_global_client_nl_info(const char *client_name,
+ struct ib_client_nl_info *res)
+{
+ struct ib_client *client;
+ unsigned long index;
+ int ret = -ENOENT;
- if (!found_context) {
- pr_warn("No client context found for %s/%s\n",
- device->name, client->name);
+ down_read(&clients_rwsem);
+ xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
+ if (strcmp(client->name, client_name) != 0)
continue;
+ if (!client->get_global_nl_info) {
+ ret = -EOPNOTSUPP;
+ break;
}
+ ret = client->get_global_nl_info(res);
+ if (WARN_ON(ret == -ENOENT))
+ ret = -EINVAL;
+ if (!ret && res->cdev)
+ get_device(res->cdev);
+ break;
+ }
+ up_read(&clients_rwsem);
+ return ret;
+}
+
+static int __ib_get_client_nl_info(struct ib_device *ibdev,
+ const char *client_name,
+ struct ib_client_nl_info *res)
+{
+ unsigned long index;
+ void *client_data;
+ int ret = -ENOENT;
+
+ down_read(&ibdev->client_data_rwsem);
+ xan_for_each_marked (&ibdev->client_data, index, client_data,
+ CLIENT_DATA_REGISTERED) {
+ struct ib_client *client = xa_load(&clients, index);
- down_write(&lists_rwsem);
- spin_lock_irqsave(&device->client_data_lock, flags);
- list_del(&found_context->list);
- kfree(found_context);
- spin_unlock_irqrestore(&device->client_data_lock, flags);
- up_write(&lists_rwsem);
+ if (!client || strcmp(client->name, client_name) != 0)
+ continue;
+ if (!client->get_nl_info) {
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ ret = client->get_nl_info(ibdev, client_data, res);
+ if (WARN_ON(ret == -ENOENT))
+ ret = -EINVAL;
+
+ /*
+ * The cdev is guaranteed valid as long as we are inside the
+ * client_data_rwsem as remove_one can't be called. Keep it
+ * valid for the caller.
+ */
+ if (!ret && res->cdev)
+ get_device(res->cdev);
+ break;
}
+ up_read(&ibdev->client_data_rwsem);
- mutex_unlock(&device_mutex);
+ return ret;
}
-EXPORT_SYMBOL(ib_unregister_client);
/**
- * ib_get_client_data - Get IB client context
- * @device:Device to get context for
- * @client:Client to get context for
- *
- * ib_get_client_data() returns client context set with
- * ib_set_client_data().
+ * ib_get_client_nl_info - Fetch the nl_info from a client
+ * @ibdev: IB device
+ * @client_name: Name of the client
+ * @res: Result of the query
*/
-void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
+int ib_get_client_nl_info(struct ib_device *ibdev, const char *client_name,
+ struct ib_client_nl_info *res)
{
- struct ib_client_data *context;
- void *ret = NULL;
- unsigned long flags;
+ int ret;
- spin_lock_irqsave(&device->client_data_lock, flags);
- list_for_each_entry(context, &device->client_data_list, list)
- if (context->client == client) {
- ret = context->data;
- break;
- }
- spin_unlock_irqrestore(&device->client_data_lock, flags);
+ if (ibdev)
+ ret = __ib_get_client_nl_info(ibdev, client_name, res);
+ else
+ ret = __ib_get_global_client_nl_info(client_name, res);
+#ifdef CONFIG_MODULES
+ if (ret == -ENOENT) {
+ request_module("rdma-client-%s", client_name);
+ if (ibdev)
+ ret = __ib_get_client_nl_info(ibdev, client_name, res);
+ else
+ ret = __ib_get_global_client_nl_info(client_name, res);
+ }
+#endif
+ if (ret) {
+ if (ret == -ENOENT)
+ return -EOPNOTSUPP;
+ return ret;
+ }
- return ret;
+ if (WARN_ON(!res->cdev))
+ return -EINVAL;
+ return 0;
}
-EXPORT_SYMBOL(ib_get_client_data);
/**
* ib_set_client_data - Set IB client context
@@ -556,27 +2034,22 @@ EXPORT_SYMBOL(ib_get_client_data);
* @client:Client to set context for
* @data:Context to set
*
- * ib_set_client_data() sets client context that can be retrieved with
- * ib_get_client_data().
+ * ib_set_client_data() sets client context data that can be retrieved with
+ * ib_get_client_data(). This can only be called while the client is
+ * registered to the device, once the ib_client remove() callback returns this
+ * cannot be called.
*/
void ib_set_client_data(struct ib_device *device, struct ib_client *client,
void *data)
{
- struct ib_client_data *context;
- unsigned long flags;
+ void *rc;
- spin_lock_irqsave(&device->client_data_lock, flags);
- list_for_each_entry(context, &device->client_data_list, list)
- if (context->client == client) {
- context->data = data;
- goto out;
- }
+ if (WARN_ON(IS_ERR(data)))
+ data = NULL;
- pr_warn("No client context found for %s/%s\n",
- device->name, client->name);
-
-out:
- spin_unlock_irqrestore(&device->client_data_lock, flags);
+ rc = xa_store(&device->client_data, client->client_id, data,
+ GFP_KERNEL);
+ WARN_ON(xa_is_err(rc));
}
EXPORT_SYMBOL(ib_set_client_data);
@@ -586,19 +2059,15 @@ EXPORT_SYMBOL(ib_set_client_data);
*
* ib_register_event_handler() registers an event handler that will be
* called back when asynchronous IB events occur (as defined in
- * chapter 11 of the InfiniBand Architecture Specification). This
- * callback may occur in interrupt context.
+ * chapter 11 of the InfiniBand Architecture Specification). This
+ * callback occurs in workqueue context.
*/
-int ib_register_event_handler (struct ib_event_handler *event_handler)
+void ib_register_event_handler(struct ib_event_handler *event_handler)
{
- unsigned long flags;
-
- spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+ down_write(&event_handler->device->event_handler_rwsem);
list_add_tail(&event_handler->list,
&event_handler->device->event_handler_list);
- spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
-
- return 0;
+ up_write(&event_handler->device->event_handler_rwsem);
}
EXPORT_SYMBOL(ib_register_event_handler);
@@ -609,39 +2078,85 @@ EXPORT_SYMBOL(ib_register_event_handler);
* Unregister an event handler registered with
* ib_register_event_handler().
*/
-int ib_unregister_event_handler(struct ib_event_handler *event_handler)
+void ib_unregister_event_handler(struct ib_event_handler *event_handler)
{
- unsigned long flags;
-
- spin_lock_irqsave(&event_handler->device->event_handler_lock, flags);
+ down_write(&event_handler->device->event_handler_rwsem);
list_del(&event_handler->list);
- spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags);
-
- return 0;
+ up_write(&event_handler->device->event_handler_rwsem);
}
EXPORT_SYMBOL(ib_unregister_event_handler);
-/**
- * ib_dispatch_event - Dispatch an asynchronous event
- * @event:Event to dispatch
- *
- * Low-level drivers must call ib_dispatch_event() to dispatch the
- * event to all registered event handlers when an asynchronous event
- * occurs.
- */
-void ib_dispatch_event(struct ib_event *event)
+void ib_dispatch_event_clients(struct ib_event *event)
{
- unsigned long flags;
struct ib_event_handler *handler;
- spin_lock_irqsave(&event->device->event_handler_lock, flags);
+ down_read(&event->device->event_handler_rwsem);
list_for_each_entry(handler, &event->device->event_handler_list, list)
handler->handler(handler, event);
- spin_unlock_irqrestore(&event->device->event_handler_lock, flags);
+ up_read(&event->device->event_handler_rwsem);
+}
+
+static int iw_query_port(struct ib_device *device,
+ u32 port_num,
+ struct ib_port_attr *port_attr)
+{
+ struct in_device *inetdev;
+ struct net_device *netdev;
+
+ memset(port_attr, 0, sizeof(*port_attr));
+
+ netdev = ib_device_get_netdev(device, port_num);
+ if (!netdev)
+ return -ENODEV;
+
+ port_attr->max_mtu = IB_MTU_4096;
+ port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
+
+ if (!netif_carrier_ok(netdev)) {
+ port_attr->state = IB_PORT_DOWN;
+ port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+ } else {
+ rcu_read_lock();
+ inetdev = __in_dev_get_rcu(netdev);
+
+ if (inetdev && inetdev->ifa_list) {
+ port_attr->state = IB_PORT_ACTIVE;
+ port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ } else {
+ port_attr->state = IB_PORT_INIT;
+ port_attr->phys_state =
+ IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
+ }
+
+ rcu_read_unlock();
+ }
+
+ dev_put(netdev);
+ return device->ops.query_port(device, port_num, port_attr);
+}
+
+static int __ib_query_port(struct ib_device *device,
+ u32 port_num,
+ struct ib_port_attr *port_attr)
+{
+ int err;
+
+ memset(port_attr, 0, sizeof(*port_attr));
+
+ err = device->ops.query_port(device, port_num, port_attr);
+ if (err || port_attr->subnet_prefix)
+ return err;
+
+ if (rdma_port_get_link_layer(device, port_num) !=
+ IB_LINK_LAYER_INFINIBAND)
+ return 0;
+
+ ib_get_cached_subnet_prefix(device, port_num,
+ &port_attr->subnet_prefix);
+ return 0;
}
-EXPORT_SYMBOL(ib_dispatch_event);
/**
* ib_query_port - Query IB port attributes
@@ -653,56 +2168,227 @@ EXPORT_SYMBOL(ib_dispatch_event);
* @port_attr pointer.
*/
int ib_query_port(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
struct ib_port_attr *port_attr)
{
- union ib_gid gid;
- int err;
+ if (!rdma_is_port_valid(device, port_num))
+ return -EINVAL;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (rdma_protocol_iwarp(device, port_num))
+ return iw_query_port(device, port_num, port_attr);
+ else
+ return __ib_query_port(device, port_num, port_attr);
+}
+EXPORT_SYMBOL(ib_query_port);
+
+static void add_ndev_hash(struct ib_port_data *pdata)
+{
+ unsigned long flags;
+
+ might_sleep();
+
+ spin_lock_irqsave(&ndev_hash_lock, flags);
+ if (hash_hashed(&pdata->ndev_hash_link)) {
+ hash_del_rcu(&pdata->ndev_hash_link);
+ spin_unlock_irqrestore(&ndev_hash_lock, flags);
+ /*
+ * We cannot do hash_add_rcu after a hash_del_rcu until the
+ * grace period
+ */
+ synchronize_rcu();
+ spin_lock_irqsave(&ndev_hash_lock, flags);
+ }
+ if (pdata->netdev)
+ hash_add_rcu(ndev_hash, &pdata->ndev_hash_link,
+ (uintptr_t)pdata->netdev);
+ spin_unlock_irqrestore(&ndev_hash_lock, flags);
+}
+
+/**
+ * ib_device_set_netdev - Associate the ib_dev with an underlying net_device
+ * @ib_dev: Device to modify
+ * @ndev: net_device to affiliate, may be NULL
+ * @port: IB port the net_device is connected to
+ *
+ * Drivers should use this to link the ib_device to a netdev so the netdev
+ * shows up in interfaces like ib_enum_roce_netdev. Only one netdev may be
+ * affiliated with any port.
+ *
+ * The caller must ensure that the given ndev is not unregistered or
+ * unregistering, and that either the ib_device is unregistered or
+ * ib_device_set_netdev() is called with NULL when the ndev sends a
+ * NETDEV_UNREGISTER event.
+ */
+int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
+ u32 port)
+{
+ enum rdma_nl_notify_event_type etype;
+ struct net_device *old_ndev;
+ struct ib_port_data *pdata;
+ unsigned long flags;
+ int ret;
+
+ if (!rdma_is_port_valid(ib_dev, port))
return -EINVAL;
- memset(port_attr, 0, sizeof(*port_attr));
- err = device->query_port(device, port_num, port_attr);
- if (err || port_attr->subnet_prefix)
- return err;
+ /*
+ * Drivers wish to call this before ib_register_driver, so we have to
+ * setup the port data early.
+ */
+ ret = alloc_port_data(ib_dev);
+ if (ret)
+ return ret;
+
+ pdata = &ib_dev->port_data[port];
+ spin_lock_irqsave(&pdata->netdev_lock, flags);
+ old_ndev = rcu_dereference_protected(
+ pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
+ if (old_ndev == ndev) {
+ spin_unlock_irqrestore(&pdata->netdev_lock, flags);
+ return 0;
+ }
+
+ rcu_assign_pointer(pdata->netdev, ndev);
+ netdev_put(old_ndev, &pdata->netdev_tracker);
+ netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC);
+ spin_unlock_irqrestore(&pdata->netdev_lock, flags);
+
+ add_ndev_hash(pdata);
- if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
+ /* Make sure that the device is registered before we send events */
+ if (xa_load(&devices, ib_dev->index) != ib_dev)
return 0;
- err = ib_query_gid(device, port_num, 0, &gid, NULL);
- if (err)
- return err;
+ etype = ndev ? RDMA_NETDEV_ATTACH_EVENT : RDMA_NETDEV_DETACH_EVENT;
+ rdma_nl_notify_event(ib_dev, port, etype);
- port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
return 0;
}
-EXPORT_SYMBOL(ib_query_port);
+EXPORT_SYMBOL(ib_device_set_netdev);
+
+static void free_netdevs(struct ib_device *ib_dev)
+{
+ unsigned long flags;
+ u32 port;
+
+ if (!ib_dev->port_data)
+ return;
+
+ rdma_for_each_port (ib_dev, port) {
+ struct ib_port_data *pdata = &ib_dev->port_data[port];
+ struct net_device *ndev;
+
+ spin_lock_irqsave(&pdata->netdev_lock, flags);
+ ndev = rcu_dereference_protected(
+ pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
+ if (ndev) {
+ spin_lock(&ndev_hash_lock);
+ hash_del_rcu(&pdata->ndev_hash_link);
+ spin_unlock(&ndev_hash_lock);
+
+ /*
+ * If this is the last dev_put there is still a
+ * synchronize_rcu before the netdev is kfreed, so we
+ * can continue to rely on unlocked pointer
+ * comparisons after the put
+ */
+ rcu_assign_pointer(pdata->netdev, NULL);
+ netdev_put(ndev, &pdata->netdev_tracker);
+ }
+ spin_unlock_irqrestore(&pdata->netdev_lock, flags);
+ }
+}
+
+struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
+ u32 port)
+{
+ struct ib_port_data *pdata;
+ struct net_device *res;
+
+ if (!rdma_is_port_valid(ib_dev, port))
+ return NULL;
+
+ if (!ib_dev->port_data)
+ return NULL;
+
+ pdata = &ib_dev->port_data[port];
+
+ /*
+ * New drivers should use ib_device_set_netdev() not the legacy
+ * get_netdev().
+ */
+ if (ib_dev->ops.get_netdev)
+ res = ib_dev->ops.get_netdev(ib_dev, port);
+ else {
+ spin_lock(&pdata->netdev_lock);
+ res = rcu_dereference_protected(
+ pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
+ dev_hold(res);
+ spin_unlock(&pdata->netdev_lock);
+ }
+
+ return res;
+}
+EXPORT_SYMBOL(ib_device_get_netdev);
/**
- * ib_query_gid - Get GID table entry
- * @device:Device to query
- * @port_num:Port number to query
- * @index:GID table index to query
- * @gid:Returned GID
- * @attr: Returned GID attributes related to this GID index (only in RoCE).
- * NULL means ignore.
- *
- * ib_query_gid() fetches the specified GID table entry.
+ * ib_query_netdev_port - Query the port number of a net_device
+ * associated with an ibdev
+ * @ibdev: IB device
+ * @ndev: Network device
+ * @port: IB port the net_device is connected to
*/
-int ib_query_gid(struct ib_device *device,
- u8 port_num, int index, union ib_gid *gid,
- struct ib_gid_attr *attr)
+int ib_query_netdev_port(struct ib_device *ibdev, struct net_device *ndev,
+ u32 *port)
{
- if (rdma_cap_roce_gid_table(device, port_num))
- return ib_get_cached_gid(device, port_num, index, gid, attr);
+ struct net_device *ib_ndev;
+ u32 port_num;
+
+ rdma_for_each_port(ibdev, port_num) {
+ ib_ndev = ib_device_get_netdev(ibdev, port_num);
+ if (ndev == ib_ndev) {
+ *port = port_num;
+ dev_put(ib_ndev);
+ return 0;
+ }
+ dev_put(ib_ndev);
+ }
- if (attr)
- return -EINVAL;
+ return -ENOENT;
+}
+EXPORT_SYMBOL(ib_query_netdev_port);
- return device->query_gid(device, port_num, index, gid);
+/**
+ * ib_device_get_by_netdev - Find an IB device associated with a netdev
+ * @ndev: netdev to locate
+ * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all)
+ *
+ * Find and hold an ib_device that is associated with a netdev via
+ * ib_device_set_netdev(). The caller must call ib_device_put() on the
+ * returned pointer.
+ */
+struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
+ enum rdma_driver_id driver_id)
+{
+ struct ib_device *res = NULL;
+ struct ib_port_data *cur;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu (ndev_hash, cur, ndev_hash_link,
+ (uintptr_t)ndev) {
+ if (rcu_access_pointer(cur->netdev) == ndev &&
+ (driver_id == RDMA_DRIVER_UNKNOWN ||
+ cur->ib_dev->ops.driver_id == driver_id) &&
+ ib_device_try_get(cur->ib_dev)) {
+ res = cur->ib_dev;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return res;
}
-EXPORT_SYMBOL(ib_query_gid);
+EXPORT_SYMBOL(ib_device_get_by_netdev);
/**
* ib_enum_roce_netdev - enumerate all RoCE ports
@@ -722,27 +2408,16 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev,
roce_netdev_callback cb,
void *cookie)
{
- u8 port;
+ u32 port;
- for (port = rdma_start_port(ib_dev); port <= rdma_end_port(ib_dev);
- port++)
+ rdma_for_each_port (ib_dev, port)
if (rdma_protocol_roce(ib_dev, port)) {
- struct net_device *idev = NULL;
-
- if (ib_dev->get_netdev)
- idev = ib_dev->get_netdev(ib_dev, port);
-
- if (idev &&
- idev->reg_state >= NETREG_UNREGISTERED) {
- dev_put(idev);
- idev = NULL;
- }
+ struct net_device *idev =
+ ib_device_get_netdev(ib_dev, port);
if (filter(ib_dev, port, idev, filter_cookie))
cb(ib_dev, port, idev, cookie);
-
- if (idev)
- dev_put(idev);
+ dev_put(idev);
}
}
@@ -763,11 +2438,40 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
void *cookie)
{
struct ib_device *dev;
+ unsigned long index;
- down_read(&lists_rwsem);
- list_for_each_entry(dev, &device_list, core_list)
+ down_read(&devices_rwsem);
+ xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED)
ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
- up_read(&lists_rwsem);
+ up_read(&devices_rwsem);
+}
+
+/*
+ * ib_enum_all_devs - enumerate all ib_devices
+ * @cb: Callback to call for each found ib_device
+ *
+ * Enumerates all ib_devices and calls callback() on each device.
+ */
+int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ unsigned long index;
+ struct ib_device *dev;
+ unsigned int idx = 0;
+ int ret = 0;
+
+ down_read(&devices_rwsem);
+ xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
+ if (!rdma_dev_access_netns(dev, sock_net(skb->sk)))
+ continue;
+
+ ret = nldev_cb(dev, skb, cb, idx);
+ if (ret)
+ break;
+ idx++;
+ }
+ up_read(&devices_rwsem);
+ return ret;
}
/**
@@ -780,9 +2484,15 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
* ib_query_pkey() fetches the specified P_Key table entry.
*/
int ib_query_pkey(struct ib_device *device,
- u8 port_num, u16 index, u16 *pkey)
+ u32 port_num, u16 index, u16 *pkey)
{
- return device->query_pkey(device, port_num, index, pkey);
+ if (!rdma_is_port_valid(device, port_num))
+ return -EINVAL;
+
+ if (!device->ops.query_pkey)
+ return -EOPNOTSUPP;
+
+ return device->ops.query_pkey(device, port_num, index, pkey);
}
EXPORT_SYMBOL(ib_query_pkey);
@@ -799,11 +2509,11 @@ int ib_modify_device(struct ib_device *device,
int device_modify_mask,
struct ib_device_modify *device_modify)
{
- if (!device->modify_device)
- return -ENOSYS;
+ if (!device->ops.modify_device)
+ return -EOPNOTSUPP;
- return device->modify_device(device, device_modify_mask,
- device_modify);
+ return device->ops.modify_device(device, device_modify_mask,
+ device_modify);
}
EXPORT_SYMBOL(ib_modify_device);
@@ -819,54 +2529,54 @@ EXPORT_SYMBOL(ib_modify_device);
* @port_modify_mask and @port_modify structure.
*/
int ib_modify_port(struct ib_device *device,
- u8 port_num, int port_modify_mask,
+ u32 port_num, int port_modify_mask,
struct ib_port_modify *port_modify)
{
- if (!device->modify_port)
- return -ENOSYS;
+ int rc;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- return device->modify_port(device, port_num, port_modify_mask,
- port_modify);
+ if (device->ops.modify_port)
+ rc = device->ops.modify_port(device, port_num,
+ port_modify_mask,
+ port_modify);
+ else if (rdma_protocol_roce(device, port_num) &&
+ ((port_modify->set_port_cap_mask & ~IB_PORT_CM_SUP) == 0 ||
+ (port_modify->clr_port_cap_mask & ~IB_PORT_CM_SUP) == 0))
+ rc = 0;
+ else
+ rc = -EOPNOTSUPP;
+ return rc;
}
EXPORT_SYMBOL(ib_modify_port);
/**
* ib_find_gid - Returns the port number and GID table index where
- * a specified GID value occurs.
+ * a specified GID value occurs. Its searches only for IB link layer.
* @device: The device to query.
* @gid: The GID value to search for.
- * @gid_type: Type of GID.
- * @ndev: The ndev related to the GID to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- enum ib_gid_type gid_type, struct net_device *ndev,
- u8 *port_num, u16 *index)
+ u32 *port_num, u16 *index)
{
union ib_gid tmp_gid;
- int ret, port, i;
-
- for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
- if (rdma_cap_roce_gid_table(device, port)) {
- if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
- ndev, index)) {
- *port_num = port;
- return 0;
- }
- }
+ u32 port;
+ int ret, i;
- if (gid_type != IB_GID_TYPE_IB)
+ rdma_for_each_port (device, port) {
+ if (!rdma_protocol_ib(device, port))
continue;
- for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
- ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
+ for (i = 0; i < device->port_data[port].immutable.gid_tbl_len;
+ ++i) {
+ ret = rdma_query_gid(device, port, i, &tmp_gid);
if (ret)
- return ret;
+ continue;
+
if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
*port_num = port;
if (index)
@@ -889,13 +2599,14 @@ EXPORT_SYMBOL(ib_find_gid);
* @index: The index into the PKey table where the PKey was found.
*/
int ib_find_pkey(struct ib_device *device,
- u8 port_num, u16 pkey, u16 *index)
+ u32 port_num, u16 pkey, u16 *index)
{
int ret, i;
u16 tmp_pkey;
int partial_ix = -1;
- for (i = 0; i < device->port_immutable[port_num].pkey_tbl_len; ++i) {
+ for (i = 0; i < device->port_data[port_num].immutable.pkey_tbl_len;
+ ++i) {
ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
if (ret)
return ret;
@@ -928,96 +2639,420 @@ EXPORT_SYMBOL(ib_find_pkey);
* @gid: A GID that the net_dev uses to communicate.
* @addr: Contains the IP address that the request specified as its
* destination.
+ *
*/
struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
- u8 port,
+ u32 port,
u16 pkey,
const union ib_gid *gid,
const struct sockaddr *addr)
{
struct net_device *net_dev = NULL;
- struct ib_client_data *context;
+ unsigned long index;
+ void *client_data;
if (!rdma_protocol_ib(dev, port))
return NULL;
- down_read(&lists_rwsem);
-
- list_for_each_entry(context, &dev->client_data_list, list) {
- struct ib_client *client = context->client;
+ /*
+ * Holding the read side guarantees that the client will not become
+ * unregistered while we are calling get_net_dev_by_params()
+ */
+ down_read(&dev->client_data_rwsem);
+ xan_for_each_marked (&dev->client_data, index, client_data,
+ CLIENT_DATA_REGISTERED) {
+ struct ib_client *client = xa_load(&clients, index);
- if (context->going_down)
+ if (!client || !client->get_net_dev_by_params)
continue;
- if (client->get_net_dev_by_params) {
- net_dev = client->get_net_dev_by_params(dev, port, pkey,
- gid, addr,
- context->data);
- if (net_dev)
- break;
- }
+ net_dev = client->get_net_dev_by_params(dev, port, pkey, gid,
+ addr, client_data);
+ if (net_dev)
+ break;
}
-
- up_read(&lists_rwsem);
+ up_read(&dev->client_data_rwsem);
return net_dev;
}
EXPORT_SYMBOL(ib_get_net_dev_by_params);
-static struct ibnl_client_cbs ibnl_ls_cb_table[] = {
+void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
+{
+ struct ib_device_ops *dev_ops = &dev->ops;
+#define SET_DEVICE_OP(ptr, name) \
+ do { \
+ if (ops->name) \
+ if (!((ptr)->name)) \
+ (ptr)->name = ops->name; \
+ } while (0)
+
+#define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name)
+
+ if (ops->driver_id != RDMA_DRIVER_UNKNOWN) {
+ WARN_ON(dev_ops->driver_id != RDMA_DRIVER_UNKNOWN &&
+ dev_ops->driver_id != ops->driver_id);
+ dev_ops->driver_id = ops->driver_id;
+ }
+ if (ops->owner) {
+ WARN_ON(dev_ops->owner && dev_ops->owner != ops->owner);
+ dev_ops->owner = ops->owner;
+ }
+ if (ops->uverbs_abi_ver)
+ dev_ops->uverbs_abi_ver = ops->uverbs_abi_ver;
+
+ dev_ops->uverbs_no_driver_id_binding |=
+ ops->uverbs_no_driver_id_binding;
+
+ SET_DEVICE_OP(dev_ops, add_gid);
+ SET_DEVICE_OP(dev_ops, add_sub_dev);
+ SET_DEVICE_OP(dev_ops, advise_mr);
+ SET_DEVICE_OP(dev_ops, alloc_dm);
+ SET_DEVICE_OP(dev_ops, alloc_dmah);
+ SET_DEVICE_OP(dev_ops, alloc_hw_device_stats);
+ SET_DEVICE_OP(dev_ops, alloc_hw_port_stats);
+ SET_DEVICE_OP(dev_ops, alloc_mr);
+ SET_DEVICE_OP(dev_ops, alloc_mr_integrity);
+ SET_DEVICE_OP(dev_ops, alloc_mw);
+ SET_DEVICE_OP(dev_ops, alloc_pd);
+ SET_DEVICE_OP(dev_ops, alloc_rdma_netdev);
+ SET_DEVICE_OP(dev_ops, alloc_ucontext);
+ SET_DEVICE_OP(dev_ops, alloc_xrcd);
+ SET_DEVICE_OP(dev_ops, attach_mcast);
+ SET_DEVICE_OP(dev_ops, check_mr_status);
+ SET_DEVICE_OP(dev_ops, counter_alloc_stats);
+ SET_DEVICE_OP(dev_ops, counter_bind_qp);
+ SET_DEVICE_OP(dev_ops, counter_dealloc);
+ SET_DEVICE_OP(dev_ops, counter_init);
+ SET_DEVICE_OP(dev_ops, counter_unbind_qp);
+ SET_DEVICE_OP(dev_ops, counter_update_stats);
+ SET_DEVICE_OP(dev_ops, create_ah);
+ SET_DEVICE_OP(dev_ops, create_counters);
+ SET_DEVICE_OP(dev_ops, create_cq);
+ SET_DEVICE_OP(dev_ops, create_cq_umem);
+ SET_DEVICE_OP(dev_ops, create_flow);
+ SET_DEVICE_OP(dev_ops, create_qp);
+ SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
+ SET_DEVICE_OP(dev_ops, create_srq);
+ SET_DEVICE_OP(dev_ops, create_user_ah);
+ SET_DEVICE_OP(dev_ops, create_wq);
+ SET_DEVICE_OP(dev_ops, dealloc_dm);
+ SET_DEVICE_OP(dev_ops, dealloc_dmah);
+ SET_DEVICE_OP(dev_ops, dealloc_driver);
+ SET_DEVICE_OP(dev_ops, dealloc_mw);
+ SET_DEVICE_OP(dev_ops, dealloc_pd);
+ SET_DEVICE_OP(dev_ops, dealloc_ucontext);
+ SET_DEVICE_OP(dev_ops, dealloc_xrcd);
+ SET_DEVICE_OP(dev_ops, del_gid);
+ SET_DEVICE_OP(dev_ops, del_sub_dev);
+ SET_DEVICE_OP(dev_ops, dereg_mr);
+ SET_DEVICE_OP(dev_ops, destroy_ah);
+ SET_DEVICE_OP(dev_ops, destroy_counters);
+ SET_DEVICE_OP(dev_ops, destroy_cq);
+ SET_DEVICE_OP(dev_ops, destroy_flow);
+ SET_DEVICE_OP(dev_ops, destroy_flow_action);
+ SET_DEVICE_OP(dev_ops, destroy_qp);
+ SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table);
+ SET_DEVICE_OP(dev_ops, destroy_srq);
+ SET_DEVICE_OP(dev_ops, destroy_wq);
+ SET_DEVICE_OP(dev_ops, device_group);
+ SET_DEVICE_OP(dev_ops, detach_mcast);
+ SET_DEVICE_OP(dev_ops, disassociate_ucontext);
+ SET_DEVICE_OP(dev_ops, drain_rq);
+ SET_DEVICE_OP(dev_ops, drain_sq);
+ SET_DEVICE_OP(dev_ops, enable_driver);
+ SET_DEVICE_OP(dev_ops, fill_res_cm_id_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_cq_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_cq_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_res_mr_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_mr_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_res_qp_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_qp_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_res_srq_entry);
+ SET_DEVICE_OP(dev_ops, fill_res_srq_entry_raw);
+ SET_DEVICE_OP(dev_ops, fill_stat_mr_entry);
+ SET_DEVICE_OP(dev_ops, get_dev_fw_str);
+ SET_DEVICE_OP(dev_ops, get_dma_mr);
+ SET_DEVICE_OP(dev_ops, get_hw_stats);
+ SET_DEVICE_OP(dev_ops, get_link_layer);
+ SET_DEVICE_OP(dev_ops, get_netdev);
+ SET_DEVICE_OP(dev_ops, get_numa_node);
+ SET_DEVICE_OP(dev_ops, get_port_immutable);
+ SET_DEVICE_OP(dev_ops, get_vector_affinity);
+ SET_DEVICE_OP(dev_ops, get_vf_config);
+ SET_DEVICE_OP(dev_ops, get_vf_guid);
+ SET_DEVICE_OP(dev_ops, get_vf_stats);
+ SET_DEVICE_OP(dev_ops, iw_accept);
+ SET_DEVICE_OP(dev_ops, iw_add_ref);
+ SET_DEVICE_OP(dev_ops, iw_connect);
+ SET_DEVICE_OP(dev_ops, iw_create_listen);
+ SET_DEVICE_OP(dev_ops, iw_destroy_listen);
+ SET_DEVICE_OP(dev_ops, iw_get_qp);
+ SET_DEVICE_OP(dev_ops, iw_reject);
+ SET_DEVICE_OP(dev_ops, iw_rem_ref);
+ SET_DEVICE_OP(dev_ops, map_mr_sg);
+ SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
+ SET_DEVICE_OP(dev_ops, mmap);
+ SET_DEVICE_OP(dev_ops, mmap_free);
+ SET_DEVICE_OP(dev_ops, modify_ah);
+ SET_DEVICE_OP(dev_ops, modify_cq);
+ SET_DEVICE_OP(dev_ops, modify_device);
+ SET_DEVICE_OP(dev_ops, modify_hw_stat);
+ SET_DEVICE_OP(dev_ops, modify_port);
+ SET_DEVICE_OP(dev_ops, modify_qp);
+ SET_DEVICE_OP(dev_ops, modify_srq);
+ SET_DEVICE_OP(dev_ops, modify_wq);
+ SET_DEVICE_OP(dev_ops, peek_cq);
+ SET_DEVICE_OP(dev_ops, pre_destroy_cq);
+ SET_DEVICE_OP(dev_ops, poll_cq);
+ SET_DEVICE_OP(dev_ops, port_groups);
+ SET_DEVICE_OP(dev_ops, post_destroy_cq);
+ SET_DEVICE_OP(dev_ops, post_recv);
+ SET_DEVICE_OP(dev_ops, post_send);
+ SET_DEVICE_OP(dev_ops, post_srq_recv);
+ SET_DEVICE_OP(dev_ops, process_mad);
+ SET_DEVICE_OP(dev_ops, query_ah);
+ SET_DEVICE_OP(dev_ops, query_device);
+ SET_DEVICE_OP(dev_ops, query_gid);
+ SET_DEVICE_OP(dev_ops, query_pkey);
+ SET_DEVICE_OP(dev_ops, query_port);
+ SET_DEVICE_OP(dev_ops, query_qp);
+ SET_DEVICE_OP(dev_ops, query_srq);
+ SET_DEVICE_OP(dev_ops, query_ucontext);
+ SET_DEVICE_OP(dev_ops, rdma_netdev_get_params);
+ SET_DEVICE_OP(dev_ops, read_counters);
+ SET_DEVICE_OP(dev_ops, reg_dm_mr);
+ SET_DEVICE_OP(dev_ops, reg_user_mr);
+ SET_DEVICE_OP(dev_ops, reg_user_mr_dmabuf);
+ SET_DEVICE_OP(dev_ops, req_notify_cq);
+ SET_DEVICE_OP(dev_ops, rereg_user_mr);
+ SET_DEVICE_OP(dev_ops, resize_cq);
+ SET_DEVICE_OP(dev_ops, set_vf_guid);
+ SET_DEVICE_OP(dev_ops, set_vf_link_state);
+ SET_DEVICE_OP(dev_ops, ufile_hw_cleanup);
+ SET_DEVICE_OP(dev_ops, report_port_event);
+
+ SET_OBJ_SIZE(dev_ops, ib_ah);
+ SET_OBJ_SIZE(dev_ops, ib_counters);
+ SET_OBJ_SIZE(dev_ops, ib_cq);
+ SET_OBJ_SIZE(dev_ops, ib_dmah);
+ SET_OBJ_SIZE(dev_ops, ib_mw);
+ SET_OBJ_SIZE(dev_ops, ib_pd);
+ SET_OBJ_SIZE(dev_ops, ib_qp);
+ SET_OBJ_SIZE(dev_ops, ib_rwq_ind_table);
+ SET_OBJ_SIZE(dev_ops, ib_srq);
+ SET_OBJ_SIZE(dev_ops, ib_ucontext);
+ SET_OBJ_SIZE(dev_ops, ib_xrcd);
+ SET_OBJ_SIZE(dev_ops, rdma_counter);
+}
+EXPORT_SYMBOL(ib_set_device_ops);
+
+int ib_add_sub_device(struct ib_device *parent,
+ enum rdma_nl_dev_type type,
+ const char *name)
+{
+ struct ib_device *sub;
+ int ret = 0;
+
+ if (!parent->ops.add_sub_dev || !parent->ops.del_sub_dev)
+ return -EOPNOTSUPP;
+
+ if (!ib_device_try_get(parent))
+ return -EINVAL;
+
+ sub = parent->ops.add_sub_dev(parent, type, name);
+ if (IS_ERR(sub)) {
+ ib_device_put(parent);
+ return PTR_ERR(sub);
+ }
+
+ sub->type = type;
+ sub->parent = parent;
+
+ mutex_lock(&parent->subdev_lock);
+ list_add_tail(&parent->subdev_list_head, &sub->subdev_list);
+ mutex_unlock(&parent->subdev_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_add_sub_device);
+
+int ib_del_sub_device_and_put(struct ib_device *sub)
+{
+ struct ib_device *parent = sub->parent;
+
+ if (!parent)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&parent->subdev_lock);
+ list_del(&sub->subdev_list);
+ mutex_unlock(&parent->subdev_lock);
+
+ ib_device_put(sub);
+ parent->ops.del_sub_dev(sub);
+ ib_device_put(parent);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_del_sub_device_and_put);
+
+#ifdef CONFIG_INFINIBAND_VIRT_DMA
+int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)
+{
+ struct scatterlist *s;
+ int i;
+
+ for_each_sg(sg, s, nents, i) {
+ sg_dma_address(s) = (uintptr_t)sg_virt(s);
+ sg_dma_len(s) = s->length;
+ }
+ return nents;
+}
+EXPORT_SYMBOL(ib_dma_virt_map_sg);
+#endif /* CONFIG_INFINIBAND_VIRT_DMA */
+
+static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
[RDMA_NL_LS_OP_RESOLVE] = {
- .dump = ib_nl_handle_resolve_resp,
- .module = THIS_MODULE },
+ .doit = ib_nl_handle_resolve_resp,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
[RDMA_NL_LS_OP_SET_TIMEOUT] = {
- .dump = ib_nl_handle_set_timeout,
- .module = THIS_MODULE },
+ .doit = ib_nl_handle_set_timeout,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
[RDMA_NL_LS_OP_IP_RESOLVE] = {
- .dump = ib_nl_handle_ip_res_resp,
- .module = THIS_MODULE },
+ .doit = ib_nl_handle_ip_res_resp,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
};
-static int ib_add_ibnl_clients(void)
+void ib_dispatch_port_state_event(struct ib_device *ibdev, struct net_device *ndev)
{
- return ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ibnl_ls_cb_table),
- ibnl_ls_cb_table);
+ enum ib_port_state curr_state;
+ struct ib_event ibevent = {};
+ u32 port;
+
+ if (ib_query_netdev_port(ibdev, ndev, &port))
+ return;
+
+ curr_state = ib_get_curr_port_state(ndev);
+
+ write_lock_irq(&ibdev->cache_lock);
+ if (ibdev->port_data[port].cache.last_port_state == curr_state) {
+ write_unlock_irq(&ibdev->cache_lock);
+ return;
+ }
+ ibdev->port_data[port].cache.last_port_state = curr_state;
+ write_unlock_irq(&ibdev->cache_lock);
+
+ ibevent.event = (curr_state == IB_PORT_DOWN) ?
+ IB_EVENT_PORT_ERR : IB_EVENT_PORT_ACTIVE;
+ ibevent.device = ibdev;
+ ibevent.element.port_num = port;
+ ib_dispatch_event(&ibevent);
}
+EXPORT_SYMBOL(ib_dispatch_port_state_event);
-static void ib_remove_ibnl_clients(void)
+static void handle_port_event(struct net_device *ndev, unsigned long event)
{
- ibnl_remove_client(RDMA_NL_LS);
+ struct ib_device *ibdev;
+
+ /* Currently, link events in bonding scenarios are still
+ * reported by drivers that support bonding.
+ */
+ if (netif_is_lag_master(ndev) || netif_is_lag_port(ndev))
+ return;
+
+ ibdev = ib_device_get_by_netdev(ndev, RDMA_DRIVER_UNKNOWN);
+ if (!ibdev)
+ return;
+
+ if (ibdev->ops.report_port_event) {
+ ibdev->ops.report_port_event(ibdev, ndev, event);
+ goto put_ibdev;
+ }
+
+ ib_dispatch_port_state_event(ibdev, ndev);
+
+put_ibdev:
+ ib_device_put(ibdev);
+};
+
+static int ib_netdevice_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct ib_device *ibdev;
+ u32 port;
+
+ switch (event) {
+ case NETDEV_CHANGENAME:
+ ibdev = ib_device_get_by_netdev(ndev, RDMA_DRIVER_UNKNOWN);
+ if (!ibdev)
+ return NOTIFY_DONE;
+
+ if (ib_query_netdev_port(ibdev, ndev, &port)) {
+ ib_device_put(ibdev);
+ break;
+ }
+
+ rdma_nl_notify_event(ibdev, port, RDMA_NETDEV_RENAME_EVENT);
+ ib_device_put(ibdev);
+ break;
+
+ case NETDEV_UP:
+ case NETDEV_CHANGE:
+ case NETDEV_DOWN:
+ handle_port_event(ndev, event);
+ break;
+
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
}
+static struct notifier_block nb_netdevice = {
+ .notifier_call = ib_netdevice_event,
+};
+
static int __init ib_core_init(void)
{
- int ret;
+ int ret = -ENOMEM;
- ib_wq = alloc_workqueue("infiniband", 0, 0);
+ ib_wq = alloc_workqueue("infiniband", WQ_PERCPU, 0);
if (!ib_wq)
return -ENOMEM;
- ib_comp_wq = alloc_workqueue("ib-comp-wq",
- WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
- WQ_UNBOUND_MAX_ACTIVE);
- if (!ib_comp_wq) {
- ret = -ENOMEM;
+ ib_unreg_wq = alloc_workqueue("ib-unreg-wq", WQ_UNBOUND,
+ WQ_UNBOUND_MAX_ACTIVE);
+ if (!ib_unreg_wq)
goto err;
- }
+
+ ib_comp_wq = alloc_workqueue("ib-comp-wq",
+ WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS | WQ_PERCPU, 0);
+ if (!ib_comp_wq)
+ goto err_unbound;
+
+ ib_comp_unbound_wq =
+ alloc_workqueue("ib-comp-unb-wq",
+ WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
+ WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
+ if (!ib_comp_unbound_wq)
+ goto err_comp;
ret = class_register(&ib_class);
if (ret) {
pr_warn("Couldn't create InfiniBand device class\n");
- goto err_comp;
+ goto err_comp_unbound;
}
- ret = ibnl_init();
- if (ret) {
- pr_warn("Couldn't init IB netlink interface\n");
- goto err_sysfs;
- }
+ rdma_nl_init();
ret = addr_init();
if (ret) {
- pr_warn("Could't init IB address resolution\n");
+ pr_warn("Couldn't init IB address resolution\n");
goto err_ibnl;
}
@@ -1033,16 +3068,36 @@ static int __init ib_core_init(void)
goto err_mad;
}
- ret = ib_add_ibnl_clients();
+ ret = register_blocking_lsm_notifier(&ibdev_lsm_nb);
if (ret) {
- pr_warn("Couldn't register ibnl clients\n");
+ pr_warn("Couldn't register LSM notifier. ret %d\n", ret);
goto err_sa;
}
- ib_cache_setup();
+ ret = register_pernet_device(&rdma_dev_net_ops);
+ if (ret) {
+ pr_warn("Couldn't init compat dev. ret %d\n", ret);
+ goto err_compat;
+ }
+
+ nldev_init();
+ rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
+ ret = roce_gid_mgmt_init();
+ if (ret) {
+ pr_warn("Couldn't init RoCE GID management\n");
+ goto err_parent;
+ }
+
+ register_netdevice_notifier(&nb_netdevice);
return 0;
+err_parent:
+ rdma_nl_unregister(RDMA_NL_LS);
+ nldev_exit();
+ unregister_pernet_device(&rdma_dev_net_ops);
+err_compat:
+ unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
err_sa:
ib_sa_cleanup();
err_mad:
@@ -1050,11 +3105,13 @@ err_mad:
err_addr:
addr_cleanup();
err_ibnl:
- ibnl_cleanup();
-err_sysfs:
class_unregister(&ib_class);
+err_comp_unbound:
+ destroy_workqueue(ib_comp_unbound_wq);
err_comp:
destroy_workqueue(ib_comp_wq);
+err_unbound:
+ destroy_workqueue(ib_unreg_wq);
err:
destroy_workqueue(ib_wq);
return ret;
@@ -1062,17 +3119,30 @@ err:
static void __exit ib_core_cleanup(void)
{
- ib_cache_cleanup();
- ib_remove_ibnl_clients();
+ unregister_netdevice_notifier(&nb_netdevice);
+ roce_gid_mgmt_cleanup();
+ rdma_nl_unregister(RDMA_NL_LS);
+ nldev_exit();
+ unregister_pernet_device(&rdma_dev_net_ops);
+ unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
ib_sa_cleanup();
ib_mad_cleanup();
addr_cleanup();
- ibnl_cleanup();
+ rdma_nl_exit();
class_unregister(&ib_class);
+ destroy_workqueue(ib_comp_unbound_wq);
destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
+ destroy_workqueue(ib_unreg_wq);
+ WARN_ON(!xa_empty(&clients));
+ WARN_ON(!xa_empty(&devices));
}
-module_init(ib_core_init);
+MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4);
+
+/* ib core relies on netdev stack to first register net_ns_type_operations
+ * ns kobject type before ib_core initialization.
+ */
+fs_initcall(ib_core_init);
module_exit(ib_core_cleanup);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
deleted file mode 100644
index cdfad5f26212..000000000000
--- a/drivers/infiniband/core/fmr_pool.c
+++ /dev/null
@@ -1,520 +0,0 @@
-/*
- * Copyright (c) 2004 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/errno.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/jhash.h>
-#include <linux/kthread.h>
-
-#include <rdma/ib_fmr_pool.h>
-
-#include "core_priv.h"
-
-#define PFX "fmr_pool: "
-
-enum {
- IB_FMR_MAX_REMAPS = 32,
-
- IB_FMR_HASH_BITS = 8,
- IB_FMR_HASH_SIZE = 1 << IB_FMR_HASH_BITS,
- IB_FMR_HASH_MASK = IB_FMR_HASH_SIZE - 1
-};
-
-/*
- * If an FMR is not in use, then the list member will point to either
- * its pool's free_list (if the FMR can be mapped again; that is,
- * remap_count < pool->max_remaps) or its pool's dirty_list (if the
- * FMR needs to be unmapped before being remapped). In either of
- * these cases it is a bug if the ref_count is not 0. In other words,
- * if ref_count is > 0, then the list member must not be linked into
- * either free_list or dirty_list.
- *
- * The cache_node member is used to link the FMR into a cache bucket
- * (if caching is enabled). This is independent of the reference
- * count of the FMR. When a valid FMR is released, its ref_count is
- * decremented, and if ref_count reaches 0, the FMR is placed in
- * either free_list or dirty_list as appropriate. However, it is not
- * removed from the cache and may be "revived" if a call to
- * ib_fmr_register_physical() occurs before the FMR is remapped. In
- * this case we just increment the ref_count and remove the FMR from
- * free_list/dirty_list.
- *
- * Before we remap an FMR from free_list, we remove it from the cache
- * (to prevent another user from obtaining a stale FMR). When an FMR
- * is released, we add it to the tail of the free list, so that our
- * cache eviction policy is "least recently used."
- *
- * All manipulation of ref_count, list and cache_node is protected by
- * pool_lock to maintain consistency.
- */
-
-struct ib_fmr_pool {
- spinlock_t pool_lock;
-
- int pool_size;
- int max_pages;
- int max_remaps;
- int dirty_watermark;
- int dirty_len;
- struct list_head free_list;
- struct list_head dirty_list;
- struct hlist_head *cache_bucket;
-
- void (*flush_function)(struct ib_fmr_pool *pool,
- void * arg);
- void *flush_arg;
-
- struct task_struct *thread;
-
- atomic_t req_ser;
- atomic_t flush_ser;
-
- wait_queue_head_t force_wait;
-};
-
-static inline u32 ib_fmr_hash(u64 first_page)
-{
- return jhash_2words((u32) first_page, (u32) (first_page >> 32), 0) &
- (IB_FMR_HASH_SIZE - 1);
-}
-
-/* Caller must hold pool_lock */
-static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,
- u64 *page_list,
- int page_list_len,
- u64 io_virtual_address)
-{
- struct hlist_head *bucket;
- struct ib_pool_fmr *fmr;
-
- if (!pool->cache_bucket)
- return NULL;
-
- bucket = pool->cache_bucket + ib_fmr_hash(*page_list);
-
- hlist_for_each_entry(fmr, bucket, cache_node)
- if (io_virtual_address == fmr->io_virtual_address &&
- page_list_len == fmr->page_list_len &&
- !memcmp(page_list, fmr->page_list,
- page_list_len * sizeof *page_list))
- return fmr;
-
- return NULL;
-}
-
-static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
-{
- int ret;
- struct ib_pool_fmr *fmr;
- LIST_HEAD(unmap_list);
- LIST_HEAD(fmr_list);
-
- spin_lock_irq(&pool->pool_lock);
-
- list_for_each_entry(fmr, &pool->dirty_list, list) {
- hlist_del_init(&fmr->cache_node);
- fmr->remap_count = 0;
- list_add_tail(&fmr->fmr->list, &fmr_list);
-
-#ifdef DEBUG
- if (fmr->ref_count !=0) {
- pr_warn(PFX "Unmapping FMR 0x%08x with ref count %d\n",
- fmr, fmr->ref_count);
- }
-#endif
- }
-
- list_splice_init(&pool->dirty_list, &unmap_list);
- pool->dirty_len = 0;
-
- spin_unlock_irq(&pool->pool_lock);
-
- if (list_empty(&unmap_list)) {
- return;
- }
-
- ret = ib_unmap_fmr(&fmr_list);
- if (ret)
- pr_warn(PFX "ib_unmap_fmr returned %d\n", ret);
-
- spin_lock_irq(&pool->pool_lock);
- list_splice(&unmap_list, &pool->free_list);
- spin_unlock_irq(&pool->pool_lock);
-}
-
-static int ib_fmr_cleanup_thread(void *pool_ptr)
-{
- struct ib_fmr_pool *pool = pool_ptr;
-
- do {
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) < 0) {
- ib_fmr_batch_release(pool);
-
- atomic_inc(&pool->flush_ser);
- wake_up_interruptible(&pool->force_wait);
-
- if (pool->flush_function)
- pool->flush_function(pool, pool->flush_arg);
- }
-
- set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_read(&pool->flush_ser) - atomic_read(&pool->req_ser) >= 0 &&
- !kthread_should_stop())
- schedule();
- __set_current_state(TASK_RUNNING);
- } while (!kthread_should_stop());
-
- return 0;
-}
-
-/**
- * ib_create_fmr_pool - Create an FMR pool
- * @pd:Protection domain for FMRs
- * @params:FMR pool parameters
- *
- * Create a pool of FMRs. Return value is pointer to new pool or
- * error code if creation failed.
- */
-struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
- struct ib_fmr_pool_param *params)
-{
- struct ib_device *device;
- struct ib_fmr_pool *pool;
- int i;
- int ret;
- int max_remaps;
-
- if (!params)
- return ERR_PTR(-EINVAL);
-
- device = pd->device;
- if (!device->alloc_fmr || !device->dealloc_fmr ||
- !device->map_phys_fmr || !device->unmap_fmr) {
- pr_info(PFX "Device %s does not support FMRs\n", device->name);
- return ERR_PTR(-ENOSYS);
- }
-
- if (!device->attrs.max_map_per_fmr)
- max_remaps = IB_FMR_MAX_REMAPS;
- else
- max_remaps = device->attrs.max_map_per_fmr;
-
- pool = kmalloc(sizeof *pool, GFP_KERNEL);
- if (!pool)
- return ERR_PTR(-ENOMEM);
-
- pool->cache_bucket = NULL;
- pool->flush_function = params->flush_function;
- pool->flush_arg = params->flush_arg;
-
- INIT_LIST_HEAD(&pool->free_list);
- INIT_LIST_HEAD(&pool->dirty_list);
-
- if (params->cache) {
- pool->cache_bucket =
- kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
- GFP_KERNEL);
- if (!pool->cache_bucket) {
- ret = -ENOMEM;
- goto out_free_pool;
- }
-
- for (i = 0; i < IB_FMR_HASH_SIZE; ++i)
- INIT_HLIST_HEAD(pool->cache_bucket + i);
- }
-
- pool->pool_size = 0;
- pool->max_pages = params->max_pages_per_fmr;
- pool->max_remaps = max_remaps;
- pool->dirty_watermark = params->dirty_watermark;
- pool->dirty_len = 0;
- spin_lock_init(&pool->pool_lock);
- atomic_set(&pool->req_ser, 0);
- atomic_set(&pool->flush_ser, 0);
- init_waitqueue_head(&pool->force_wait);
-
- pool->thread = kthread_run(ib_fmr_cleanup_thread,
- pool,
- "ib_fmr(%s)",
- device->name);
- if (IS_ERR(pool->thread)) {
- pr_warn(PFX "couldn't start cleanup thread\n");
- ret = PTR_ERR(pool->thread);
- goto out_free_pool;
- }
-
- {
- struct ib_pool_fmr *fmr;
- struct ib_fmr_attr fmr_attr = {
- .max_pages = params->max_pages_per_fmr,
- .max_maps = pool->max_remaps,
- .page_shift = params->page_shift
- };
- int bytes_per_fmr = sizeof *fmr;
-
- if (pool->cache_bucket)
- bytes_per_fmr += params->max_pages_per_fmr * sizeof (u64);
-
- for (i = 0; i < params->pool_size; ++i) {
- fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);
- if (!fmr)
- goto out_fail;
-
- fmr->pool = pool;
- fmr->remap_count = 0;
- fmr->ref_count = 0;
- INIT_HLIST_NODE(&fmr->cache_node);
-
- fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
- if (IS_ERR(fmr->fmr)) {
- pr_warn(PFX "fmr_create failed for FMR %d\n",
- i);
- kfree(fmr);
- goto out_fail;
- }
-
- list_add_tail(&fmr->list, &pool->free_list);
- ++pool->pool_size;
- }
- }
-
- return pool;
-
- out_free_pool:
- kfree(pool->cache_bucket);
- kfree(pool);
-
- return ERR_PTR(ret);
-
- out_fail:
- ib_destroy_fmr_pool(pool);
-
- return ERR_PTR(-ENOMEM);
-}
-EXPORT_SYMBOL(ib_create_fmr_pool);
-
-/**
- * ib_destroy_fmr_pool - Free FMR pool
- * @pool:FMR pool to free
- *
- * Destroy an FMR pool and free all associated resources.
- */
-void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
-{
- struct ib_pool_fmr *fmr;
- struct ib_pool_fmr *tmp;
- LIST_HEAD(fmr_list);
- int i;
-
- kthread_stop(pool->thread);
- ib_fmr_batch_release(pool);
-
- i = 0;
- list_for_each_entry_safe(fmr, tmp, &pool->free_list, list) {
- if (fmr->remap_count) {
- INIT_LIST_HEAD(&fmr_list);
- list_add_tail(&fmr->fmr->list, &fmr_list);
- ib_unmap_fmr(&fmr_list);
- }
- ib_dealloc_fmr(fmr->fmr);
- list_del(&fmr->list);
- kfree(fmr);
- ++i;
- }
-
- if (i < pool->pool_size)
- pr_warn(PFX "pool still has %d regions registered\n",
- pool->pool_size - i);
-
- kfree(pool->cache_bucket);
- kfree(pool);
-}
-EXPORT_SYMBOL(ib_destroy_fmr_pool);
-
-/**
- * ib_flush_fmr_pool - Invalidate all unmapped FMRs
- * @pool:FMR pool to flush
- *
- * Ensure that all unmapped FMRs are fully invalidated.
- */
-int ib_flush_fmr_pool(struct ib_fmr_pool *pool)
-{
- int serial;
- struct ib_pool_fmr *fmr, *next;
-
- /*
- * The free_list holds FMRs that may have been used
- * but have not been remapped enough times to be dirty.
- * Put them on the dirty list now so that the cleanup
- * thread will reap them too.
- */
- spin_lock_irq(&pool->pool_lock);
- list_for_each_entry_safe(fmr, next, &pool->free_list, list) {
- if (fmr->remap_count > 0)
- list_move(&fmr->list, &pool->dirty_list);
- }
- spin_unlock_irq(&pool->pool_lock);
-
- serial = atomic_inc_return(&pool->req_ser);
- wake_up_process(pool->thread);
-
- if (wait_event_interruptible(pool->force_wait,
- atomic_read(&pool->flush_ser) - serial >= 0))
- return -EINTR;
-
- return 0;
-}
-EXPORT_SYMBOL(ib_flush_fmr_pool);
-
-/**
- * ib_fmr_pool_map_phys -
- * @pool:FMR pool to allocate FMR from
- * @page_list:List of pages to map
- * @list_len:Number of pages in @page_list
- * @io_virtual_address:I/O virtual address for new FMR
- *
- * Map an FMR from an FMR pool.
- */
-struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
- u64 *page_list,
- int list_len,
- u64 io_virtual_address)
-{
- struct ib_fmr_pool *pool = pool_handle;
- struct ib_pool_fmr *fmr;
- unsigned long flags;
- int result;
-
- if (list_len < 1 || list_len > pool->max_pages)
- return ERR_PTR(-EINVAL);
-
- spin_lock_irqsave(&pool->pool_lock, flags);
- fmr = ib_fmr_cache_lookup(pool,
- page_list,
- list_len,
- io_virtual_address);
- if (fmr) {
- /* found in cache */
- ++fmr->ref_count;
- if (fmr->ref_count == 1) {
- list_del(&fmr->list);
- }
-
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- return fmr;
- }
-
- if (list_empty(&pool->free_list)) {
- spin_unlock_irqrestore(&pool->pool_lock, flags);
- return ERR_PTR(-EAGAIN);
- }
-
- fmr = list_entry(pool->free_list.next, struct ib_pool_fmr, list);
- list_del(&fmr->list);
- hlist_del_init(&fmr->cache_node);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- result = ib_map_phys_fmr(fmr->fmr, page_list, list_len,
- io_virtual_address);
-
- if (result) {
- spin_lock_irqsave(&pool->pool_lock, flags);
- list_add(&fmr->list, &pool->free_list);
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- pr_warn(PFX "fmr_map returns %d\n", result);
-
- return ERR_PTR(result);
- }
-
- ++fmr->remap_count;
- fmr->ref_count = 1;
-
- if (pool->cache_bucket) {
- fmr->io_virtual_address = io_virtual_address;
- fmr->page_list_len = list_len;
- memcpy(fmr->page_list, page_list, list_len * sizeof(*page_list));
-
- spin_lock_irqsave(&pool->pool_lock, flags);
- hlist_add_head(&fmr->cache_node,
- pool->cache_bucket + ib_fmr_hash(fmr->page_list[0]));
- spin_unlock_irqrestore(&pool->pool_lock, flags);
- }
-
- return fmr;
-}
-EXPORT_SYMBOL(ib_fmr_pool_map_phys);
-
-/**
- * ib_fmr_pool_unmap - Unmap FMR
- * @fmr:FMR to unmap
- *
- * Unmap an FMR. The FMR mapping may remain valid until the FMR is
- * reused (or until ib_flush_fmr_pool() is called).
- */
-int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
-{
- struct ib_fmr_pool *pool;
- unsigned long flags;
-
- pool = fmr->pool;
-
- spin_lock_irqsave(&pool->pool_lock, flags);
-
- --fmr->ref_count;
- if (!fmr->ref_count) {
- if (fmr->remap_count < pool->max_remaps) {
- list_add_tail(&fmr->list, &pool->free_list);
- } else {
- list_add_tail(&fmr->list, &pool->dirty_list);
- if (++pool->dirty_len >= pool->dirty_watermark) {
- atomic_inc(&pool->req_ser);
- wake_up_process(pool->thread);
- }
- }
- }
-
-#ifdef DEBUG
- if (fmr->ref_count < 0)
- pr_warn(PFX "FMR %p has ref count %d < 0\n",
- fmr, fmr->ref_count);
-#endif
-
- spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- return 0;
-}
-EXPORT_SYMBOL(ib_fmr_pool_unmap);
diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c
new file mode 100644
index 000000000000..b51bd7087a88
--- /dev/null
+++ b/drivers/infiniband/core/ib_core_uverbs.c
@@ -0,0 +1,367 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2019 Marvell. All rights reserved.
+ */
+#include <linux/xarray.h>
+#include "uverbs.h"
+#include "core_priv.h"
+
+/**
+ * rdma_umap_priv_init() - Initialize the private data of a vma
+ *
+ * @priv: The already allocated private data
+ * @vma: The vm area struct that needs private data
+ * @entry: entry into the mmap_xa that needs to be linked with
+ * this vma
+ *
+ * Each time we map IO memory into user space this keeps track of the
+ * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space
+ * to point to the zero page and allow the hot unplug to proceed.
+ *
+ * This is necessary for cases like PCI physical hot unplug as the actual BAR
+ * memory may vanish after this and access to it from userspace could MCE.
+ *
+ * RDMA drivers supporting disassociation must have their user space designed
+ * to cope in some way with their IO pages going to the zero page.
+ *
+ */
+void rdma_umap_priv_init(struct rdma_umap_priv *priv,
+ struct vm_area_struct *vma,
+ struct rdma_user_mmap_entry *entry)
+{
+ struct ib_uverbs_file *ufile = vma->vm_file->private_data;
+
+ priv->vma = vma;
+ if (entry) {
+ kref_get(&entry->ref);
+ priv->entry = entry;
+ }
+ vma->vm_private_data = priv;
+ /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */
+
+ mutex_lock(&ufile->umap_lock);
+ list_add(&priv->list, &ufile->umaps);
+ mutex_unlock(&ufile->umap_lock);
+}
+EXPORT_SYMBOL(rdma_umap_priv_init);
+
+/**
+ * rdma_user_mmap_io() - Map IO memory into a process
+ *
+ * @ucontext: associated user context
+ * @vma: the vma related to the current mmap call
+ * @pfn: pfn to map
+ * @size: size to map
+ * @prot: pgprot to use in remap call
+ * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL
+ * if mmap_entry is not used by the driver
+ *
+ * This is to be called by drivers as part of their mmap() functions if they
+ * wish to send something like PCI-E BAR memory to userspace.
+ *
+ * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on
+ * success.
+ */
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+ unsigned long pfn, unsigned long size, pgprot_t prot,
+ struct rdma_user_mmap_entry *entry)
+{
+ struct ib_uverbs_file *ufile = ucontext->ufile;
+ struct rdma_umap_priv *priv;
+
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ if (vma->vm_end - vma->vm_start != size)
+ return -EINVAL;
+
+ /* Driver is using this wrong, must be called by ib_uverbs_mmap */
+ if (WARN_ON(!vma->vm_file ||
+ vma->vm_file->private_data != ufile))
+ return -EINVAL;
+ lockdep_assert_held(&ufile->device->disassociate_srcu);
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ vma->vm_page_prot = prot;
+ if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
+ kfree(priv);
+ return -EAGAIN;
+ }
+
+ rdma_umap_priv_init(priv, vma, entry);
+ return 0;
+}
+EXPORT_SYMBOL(rdma_user_mmap_io);
+
+/**
+ * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa
+ *
+ * @ucontext: associated user context
+ * @pgoff: The mmap offset >> PAGE_SHIFT
+ *
+ * This function is called when a user tries to mmap with an offset (returned
+ * by rdma_user_mmap_get_offset()) it initially received from the driver. The
+ * rdma_user_mmap_entry was created by the function
+ * rdma_user_mmap_entry_insert(). This function increases the refcnt of the
+ * entry so that it won't be deleted from the xarray in the meantime.
+ *
+ * Return an reference to an entry if exists or NULL if there is no
+ * match. rdma_user_mmap_entry_put() must be called to put the reference.
+ */
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
+ unsigned long pgoff)
+{
+ struct rdma_user_mmap_entry *entry;
+
+ if (pgoff > U32_MAX)
+ return NULL;
+
+ xa_lock(&ucontext->mmap_xa);
+
+ entry = xa_load(&ucontext->mmap_xa, pgoff);
+
+ /*
+ * If refcount is zero, entry is already being deleted, driver_removed
+ * indicates that the no further mmaps are possible and we waiting for
+ * the active VMAs to be closed.
+ */
+ if (!entry || entry->start_pgoff != pgoff || entry->driver_removed ||
+ !kref_get_unless_zero(&entry->ref))
+ goto err;
+
+ xa_unlock(&ucontext->mmap_xa);
+
+ ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] returned\n",
+ pgoff, entry->npages);
+
+ return entry;
+
+err:
+ xa_unlock(&ucontext->mmap_xa);
+ return NULL;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff);
+
+/**
+ * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa
+ *
+ * @ucontext: associated user context
+ * @vma: the vma being mmap'd into
+ *
+ * This function is like rdma_user_mmap_entry_get_pgoff() except that it also
+ * checks that the VMA is correct.
+ */
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
+ struct vm_area_struct *vma)
+{
+ struct rdma_user_mmap_entry *entry;
+
+ if (!(vma->vm_flags & VM_SHARED))
+ return NULL;
+ entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff);
+ if (!entry)
+ return NULL;
+ if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) {
+ rdma_user_mmap_entry_put(entry);
+ return NULL;
+ }
+ return entry;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_get);
+
+static void rdma_user_mmap_entry_free(struct kref *kref)
+{
+ struct rdma_user_mmap_entry *entry =
+ container_of(kref, struct rdma_user_mmap_entry, ref);
+ struct ib_ucontext *ucontext = entry->ucontext;
+ unsigned long i;
+
+ /*
+ * Erase all entries occupied by this single entry, this is deferred
+ * until all VMA are closed so that the mmap offsets remain unique.
+ */
+ xa_lock(&ucontext->mmap_xa);
+ for (i = 0; i < entry->npages; i++)
+ __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i);
+ xa_unlock(&ucontext->mmap_xa);
+
+ ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#zx] removed\n",
+ entry->start_pgoff, entry->npages);
+
+ if (ucontext->device->ops.mmap_free)
+ ucontext->device->ops.mmap_free(entry);
+}
+
+/**
+ * rdma_user_mmap_entry_put() - Drop reference to the mmap entry
+ *
+ * @entry: an entry in the mmap_xa
+ *
+ * This function is called when the mapping is closed if it was
+ * an io mapping or when the driver is done with the entry for
+ * some other reason.
+ * Should be called after rdma_user_mmap_entry_get was called
+ * and entry is no longer needed. This function will erase the
+ * entry and free it if its refcnt reaches zero.
+ */
+void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry)
+{
+ kref_put(&entry->ref, rdma_user_mmap_entry_free);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_put);
+
+/**
+ * rdma_user_mmap_entry_remove() - Drop reference to entry and
+ * mark it as unmmapable
+ *
+ * @entry: the entry to insert into the mmap_xa
+ *
+ * Drivers can call this to prevent userspace from creating more mappings for
+ * entry, however existing mmaps continue to exist and ops->mmap_free() will
+ * not be called until all user mmaps are destroyed.
+ */
+void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
+{
+ if (!entry)
+ return;
+
+ xa_lock(&entry->ucontext->mmap_xa);
+ entry->driver_removed = true;
+ xa_unlock(&entry->ucontext->mmap_xa);
+ kref_put(&entry->ref, rdma_user_mmap_entry_free);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
+
+/**
+ * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
+ * in a given range.
+ *
+ * @ucontext: associated user context.
+ * @entry: the entry to insert into the mmap_xa
+ * @length: length of the address that will be mmapped
+ * @min_pgoff: minimum pgoff to be returned
+ * @max_pgoff: maximum pgoff to be returned
+ *
+ * This function should be called by drivers that use the rdma_user_mmap
+ * interface for implementing their mmap syscall A database of mmap offsets is
+ * handled in the core and helper functions are provided to insert entries
+ * into the database and extract entries when the user calls mmap with the
+ * given offset. The function allocates a unique page offset in a given range
+ * that should be provided to user, the user will use the offset to retrieve
+ * information such as address to be mapped and how.
+ *
+ * Return: 0 on success and -ENOMEM on failure
+ */
+int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length, u32 min_pgoff,
+ u32 max_pgoff)
+{
+ struct ib_uverbs_file *ufile = ucontext->ufile;
+ XA_STATE(xas, &ucontext->mmap_xa, min_pgoff);
+ u32 xa_first, xa_last, npages;
+ int err;
+ u32 i;
+
+ if (!entry)
+ return -EINVAL;
+
+ kref_init(&entry->ref);
+ entry->ucontext = ucontext;
+
+ /*
+ * We want the whole allocation to be done without interruption from a
+ * different thread. The allocation requires finding a free range and
+ * storing. During the xa_insert the lock could be released, possibly
+ * allowing another thread to choose the same range.
+ */
+ mutex_lock(&ufile->umap_lock);
+
+ xa_lock(&ucontext->mmap_xa);
+
+ /* We want to find an empty range */
+ npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE);
+ entry->npages = npages;
+ while (true) {
+ /* First find an empty index */
+ xas_find_marked(&xas, max_pgoff, XA_FREE_MARK);
+ if (xas.xa_node == XAS_RESTART)
+ goto err_unlock;
+
+ xa_first = xas.xa_index;
+
+ /* Is there enough room to have the range? */
+ if (check_add_overflow(xa_first, npages, &xa_last))
+ goto err_unlock;
+
+ /*
+ * Now look for the next present entry. If an entry doesn't
+ * exist, we found an empty range and can proceed.
+ */
+ xas_next_entry(&xas, xa_last - 1);
+ if (xas.xa_node == XAS_BOUNDS || xas.xa_index >= xa_last)
+ break;
+ }
+
+ for (i = xa_first; i < xa_last; i++) {
+ err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL);
+ if (err)
+ goto err_undo;
+ }
+
+ /*
+ * Internally the kernel uses a page offset, in libc this is a byte
+ * offset. Drivers should not return pgoff to userspace.
+ */
+ entry->start_pgoff = xa_first;
+ xa_unlock(&ucontext->mmap_xa);
+ mutex_unlock(&ufile->umap_lock);
+
+ ibdev_dbg(ucontext->device, "mmap: pgoff[%#lx] npages[%#x] inserted\n",
+ entry->start_pgoff, npages);
+
+ return 0;
+
+err_undo:
+ for (; i > xa_first; i--)
+ __xa_erase(&ucontext->mmap_xa, i - 1);
+
+err_unlock:
+ xa_unlock(&ucontext->mmap_xa);
+ mutex_unlock(&ufile->umap_lock);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
+
+/**
+ * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
+ *
+ * @ucontext: associated user context.
+ * @entry: the entry to insert into the mmap_xa
+ * @length: length of the address that will be mmapped
+ *
+ * This function should be called by drivers that use the rdma_user_mmap
+ * interface for handling user mmapped addresses. The database is handled in
+ * the core and helper functions are provided to insert entries into the
+ * database and extract entries when the user calls mmap with the given offset.
+ * The function allocates a unique page offset that should be provided to user,
+ * the user will use the offset to retrieve information such as address to
+ * be mapped and how.
+ *
+ * Return: 0 on success and -ENOMEM on failure
+ */
+int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length)
+{
+ return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
+ U32_MAX);
+}
+EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 31661b5c1743..62410578dec3 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -80,14 +80,15 @@ const char *__attribute_const__ iwcm_reject_msg(int reason)
}
EXPORT_SYMBOL(iwcm_reject_msg);
-static struct ibnl_client_cbs iwcm_nl_cb_table[] = {
+static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = {
[RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
[RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
[RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
[RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
[RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
- [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
+ [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb},
+ [RDMA_NL_IWPM_HELLO] = {.dump = iwpm_hello_cb}
};
static struct workqueue_struct *iwcm_wq;
@@ -108,9 +109,10 @@ static struct ctl_table iwcm_ctl_table[] = {
.data = &default_backlog,
.maxlen = sizeof(default_backlog),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
},
- { }
};
/*
@@ -143,8 +145,8 @@ static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
if (list_empty(&cm_id_priv->work_free_list))
return NULL;
- work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
- free_list);
+ work = list_first_entry(&cm_id_priv->work_free_list, struct iwcm_work,
+ free_list);
list_del_init(&work->free_list);
return work;
}
@@ -158,8 +160,10 @@ static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
{
struct list_head *e, *tmp;
- list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
+ list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) {
+ list_del(e);
kfree(list_entry(e, struct iwcm_work, free_list));
+ }
}
static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
@@ -204,25 +208,24 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv)
/*
* Release a reference on cm_id. If the last reference is being
- * released, free the cm_id and return 1.
+ * released, free the cm_id and return 'true'.
*/
-static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
+static bool iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
{
- BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
- if (atomic_dec_and_test(&cm_id_priv->refcount)) {
+ if (refcount_dec_and_test(&cm_id_priv->refcount)) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
free_cm_id(cm_id_priv);
- return 1;
+ return true;
}
- return 0;
+ return false;
}
static void add_ref(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
- atomic_inc(&cm_id_priv->refcount);
+ refcount_inc(&cm_id_priv->refcount);
}
static void rem_ref(struct iw_cm_id *cm_id)
@@ -254,7 +257,7 @@ struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
cm_id_priv->id.add_ref = add_ref;
cm_id_priv->id.rem_ref = rem_ref;
spin_lock_init(&cm_id_priv->lock);
- atomic_set(&cm_id_priv->refcount, 1);
+ refcount_set(&cm_id_priv->refcount, 1);
init_waitqueue_head(&cm_id_priv->connect_wait);
init_completion(&cm_id_priv->destroy_comp);
INIT_LIST_HEAD(&cm_id_priv->work_list);
@@ -365,12 +368,12 @@ EXPORT_SYMBOL(iw_cm_disconnect);
/*
* CM_ID <-- DESTROYING
*
- * Clean up all resources associated with the connection and release
- * the initial reference taken by iw_create_cm_id.
+ * Clean up all resources associated with the connection.
*/
static void destroy_cm_id(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
+ struct ib_qp *qp;
unsigned long flags;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
@@ -388,19 +391,22 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);
spin_lock_irqsave(&cm_id_priv->lock, flags);
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
+
switch (cm_id_priv->state) {
case IW_CM_STATE_LISTEN:
cm_id_priv->state = IW_CM_STATE_DESTROYING;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
/* destroy the listening endpoint */
- cm_id->device->iwcm->destroy_listen(cm_id);
+ cm_id->device->ops.iw_destroy_listen(cm_id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
break;
case IW_CM_STATE_ESTABLISHED:
cm_id_priv->state = IW_CM_STATE_DESTROYING;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
/* Abrupt close of the connection */
- (void)iwcm_modify_qp_err(cm_id_priv->qp);
+ (void)iwcm_modify_qp_err(qp);
spin_lock_irqsave(&cm_id_priv->lock, flags);
break;
case IW_CM_STATE_IDLE:
@@ -416,7 +422,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
*/
cm_id_priv->state = IW_CM_STATE_DESTROYING;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- cm_id->device->iwcm->reject(cm_id, NULL, 0);
+ cm_id->device->ops.iw_reject(cm_id, NULL, 0);
spin_lock_irqsave(&cm_id_priv->lock, flags);
break;
case IW_CM_STATE_CONN_SENT:
@@ -425,25 +431,20 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
BUG();
break;
}
- if (cm_id_priv->qp) {
- cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
- cm_id_priv->qp = NULL;
- }
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id_priv->id.device->ops.iw_rem_ref(qp);
if (cm_id->mapped) {
iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr);
iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM);
}
-
- (void)iwcm_deref_id(cm_id_priv);
}
/*
- * This function is only called by the application thread and cannot
- * be called by the event thread. The function will wait for all
- * references to be released on the cm_id and then kfree the cm_id
- * object.
+ * Destroy cm_id. If the cm_id still has other references, wait for all
+ * references to be released on the cm_id and then release the initial
+ * reference taken by iw_create_cm_id.
*/
void iw_destroy_cm_id(struct iw_cm_id *cm_id)
{
@@ -451,6 +452,9 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id)
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
destroy_cm_id(cm_id);
+ if (refcount_read(&cm_id_priv->refcount) > 1)
+ flush_workqueue(iwcm_wq);
+ iwcm_deref_id(cm_id_priv);
}
EXPORT_SYMBOL(iw_destroy_cm_id);
@@ -505,17 +509,21 @@ static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr,
*/
static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
{
- struct iwpm_dev_data pm_reg_msg;
+ const char *devname = dev_name(&cm_id->device->dev);
+ const char *ifname = cm_id->device->iw_ifname;
+ struct iwpm_dev_data pm_reg_msg = {};
struct iwpm_sa_data pm_msg;
int status;
+ if (strlen(devname) >= sizeof(pm_reg_msg.dev_name) ||
+ strlen(ifname) >= sizeof(pm_reg_msg.if_name))
+ return -EINVAL;
+
cm_id->m_local_addr = cm_id->local_addr;
cm_id->m_remote_addr = cm_id->remote_addr;
- memcpy(pm_reg_msg.dev_name, cm_id->device->name,
- sizeof(pm_reg_msg.dev_name));
- memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname,
- sizeof(pm_reg_msg.if_name));
+ strcpy(pm_reg_msg.dev_name, devname);
+ strcpy(pm_reg_msg.if_name, ifname);
if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) ||
!iwpm_valid_pid())
@@ -524,6 +532,8 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
cm_id->mapped = true;
pm_msg.loc_addr = cm_id->local_addr;
pm_msg.rem_addr = cm_id->remote_addr;
+ pm_msg.flags = (cm_id->device->iw_driver_flags & IW_F_NO_PORT_MAP) ?
+ IWPM_FLAGS_NO_PORT_MAP : 0;
if (active)
status = iwpm_add_and_query_mapping(&pm_msg,
RDMA_NL_IWCM);
@@ -542,7 +552,7 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
return iwpm_create_mapinfo(&cm_id->local_addr,
&cm_id->m_local_addr,
- RDMA_NL_IWCM);
+ RDMA_NL_IWCM, pm_msg.flags);
}
/*
@@ -573,7 +583,8 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
ret = iw_cm_map(cm_id, false);
if (!ret)
- ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
+ ret = cm_id->device->ops.iw_create_listen(cm_id,
+ backlog);
if (ret)
cm_id_priv->state = IW_CM_STATE_IDLE;
spin_lock_irqsave(&cm_id_priv->lock, flags);
@@ -613,7 +624,7 @@ int iw_cm_reject(struct iw_cm_id *cm_id,
cm_id_priv->state = IW_CM_STATE_IDLE;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = cm_id->device->iwcm->reject(cm_id, private_data,
+ ret = cm_id->device->ops.iw_reject(cm_id, private_data,
private_data_len);
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
@@ -649,28 +660,28 @@ int iw_cm_accept(struct iw_cm_id *cm_id,
return -EINVAL;
}
/* Get the ib_qp given the QPN */
- qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+ qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn);
if (!qp) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
wake_up_all(&cm_id_priv->connect_wait);
return -EINVAL;
}
- cm_id->device->iwcm->add_ref(qp);
+ cm_id->device->ops.iw_add_ref(qp);
cm_id_priv->qp = qp;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = cm_id->device->iwcm->accept(cm_id, iw_param);
+ ret = cm_id->device->ops.iw_accept(cm_id, iw_param);
if (ret) {
/* An error on accept precludes provider events */
BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
cm_id_priv->state = IW_CM_STATE_IDLE;
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id_priv->qp) {
- cm_id->device->iwcm->rem_ref(qp);
- cm_id_priv->qp = NULL;
- }
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id->device->ops.iw_rem_ref(qp);
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
wake_up_all(&cm_id_priv->connect_wait);
}
@@ -691,7 +702,7 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
struct iwcm_id_private *cm_id_priv;
int ret;
unsigned long flags;
- struct ib_qp *qp;
+ struct ib_qp *qp = NULL;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
@@ -708,30 +719,30 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
}
/* Get the ib_qp given the QPN */
- qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+ qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn);
if (!qp) {
ret = -EINVAL;
goto err;
}
- cm_id->device->iwcm->add_ref(qp);
+ cm_id->device->ops.iw_add_ref(qp);
cm_id_priv->qp = qp;
cm_id_priv->state = IW_CM_STATE_CONN_SENT;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
ret = iw_cm_map(cm_id, true);
if (!ret)
- ret = cm_id->device->iwcm->connect(cm_id, iw_param);
+ ret = cm_id->device->ops.iw_connect(cm_id, iw_param);
if (!ret)
return 0; /* success */
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id_priv->qp) {
- cm_id->device->iwcm->rem_ref(qp);
- cm_id_priv->qp = NULL;
- }
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
cm_id_priv->state = IW_CM_STATE_IDLE;
err:
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id->device->ops.iw_rem_ref(qp);
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
wake_up_all(&cm_id_priv->connect_wait);
return ret;
@@ -873,6 +884,7 @@ static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
struct iw_cm_event *iw_event)
{
+ struct ib_qp *qp = NULL;
unsigned long flags;
int ret;
@@ -891,11 +903,13 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
} else {
/* REJECTED or RESET */
- cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+ qp = cm_id_priv->qp;
cm_id_priv->qp = NULL;
cm_id_priv->state = IW_CM_STATE_IDLE;
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id_priv->id.device->ops.iw_rem_ref(qp);
ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
if (iw_event->private_data_len)
@@ -937,21 +951,18 @@ static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
struct iw_cm_event *iw_event)
{
+ struct ib_qp *qp;
unsigned long flags;
- int ret = 0;
+ int ret = 0, notify_event = 0;
spin_lock_irqsave(&cm_id_priv->lock, flags);
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
- if (cm_id_priv->qp) {
- cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
- cm_id_priv->qp = NULL;
- }
switch (cm_id_priv->state) {
case IW_CM_STATE_ESTABLISHED:
case IW_CM_STATE_CLOSING:
cm_id_priv->state = IW_CM_STATE_IDLE;
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
+ notify_event = 1;
break;
case IW_CM_STATE_DESTROYING:
break;
@@ -960,6 +971,10 @@ static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id_priv->id.device->ops.iw_rem_ref(qp);
+ if (notify_event)
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
return ret;
}
@@ -1006,30 +1021,27 @@ static void cm_work_handler(struct work_struct *_work)
struct iw_cm_event levent;
struct iwcm_id_private *cm_id_priv = work->cm_id;
unsigned long flags;
- int empty;
int ret = 0;
spin_lock_irqsave(&cm_id_priv->lock, flags);
- empty = list_empty(&cm_id_priv->work_list);
- while (!empty) {
- work = list_entry(cm_id_priv->work_list.next,
- struct iwcm_work, list);
+ while (!list_empty(&cm_id_priv->work_list)) {
+ work = list_first_entry(&cm_id_priv->work_list,
+ struct iwcm_work, list);
list_del_init(&work->list);
- empty = list_empty(&cm_id_priv->work_list);
levent = work->event;
put_work(work);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
ret = process_event(cm_id_priv, &levent);
- if (ret)
+ if (ret) {
destroy_cm_id(&cm_id_priv->id);
+ WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
+ }
} else
pr_debug("dropping event %d\n", levent.event);
if (iwcm_deref_id(cm_id_priv))
return;
- if (empty)
- return;
spin_lock_irqsave(&cm_id_priv->lock, flags);
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -1081,12 +1093,9 @@ static int cm_event_handler(struct iw_cm_id *cm_id,
}
}
- atomic_inc(&cm_id_priv->refcount);
- if (list_empty(&cm_id_priv->work_list)) {
- list_add_tail(&work->list, &cm_id_priv->work_list);
- queue_work(iwcm_wq, &work->work);
- } else
- list_add_tail(&work->list, &cm_id_priv->work_list);
+ refcount_inc(&cm_id_priv->refcount);
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ queue_work(iwcm_wq, &work->work);
out:
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
@@ -1174,35 +1183,38 @@ static int __init iw_cm_init(void)
ret = iwpm_init(RDMA_NL_IWCM);
if (ret)
- pr_err("iw_cm: couldn't init iwpm\n");
-
- ret = ibnl_add_client(RDMA_NL_IWCM, ARRAY_SIZE(iwcm_nl_cb_table),
- iwcm_nl_cb_table);
- if (ret)
- pr_err("iw_cm: couldn't register netlink callbacks\n");
+ return ret;
iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM);
if (!iwcm_wq)
- return -ENOMEM;
+ goto err_alloc;
iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm",
iwcm_ctl_table);
if (!iwcm_ctl_table_hdr) {
pr_err("iw_cm: couldn't register sysctl paths\n");
- destroy_workqueue(iwcm_wq);
- return -ENOMEM;
+ goto err_sysctl;
}
+ rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table);
return 0;
+
+err_sysctl:
+ destroy_workqueue(iwcm_wq);
+err_alloc:
+ iwpm_exit(RDMA_NL_IWCM);
+ return -ENOMEM;
}
static void __exit iw_cm_cleanup(void)
{
+ rdma_nl_unregister(RDMA_NL_IWCM);
unregister_net_sysctl_table(iwcm_ctl_table_hdr);
destroy_workqueue(iwcm_wq);
- ibnl_remove_client(RDMA_NL_IWCM);
iwpm_exit(RDMA_NL_IWCM);
}
+MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_IWCM, 2);
+
module_init(iw_cm_init);
module_exit(iw_cm_cleanup);
diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h
index 82c2cd1b0a80..bf74639be128 100644
--- a/drivers/infiniband/core/iwcm.h
+++ b/drivers/infiniband/core/iwcm.h
@@ -52,7 +52,7 @@ struct iwcm_id_private {
wait_queue_head_t connect_wait;
struct list_head work_list;
spinlock_t lock;
- atomic_t refcount;
+ refcount_t refcount;
struct list_head work_free_list;
};
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index a0e7c16d8bd8..3c9a9869212b 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -34,19 +34,25 @@
#include "iwpm_util.h"
static const char iwpm_ulib_name[IWPM_ULIBNAME_SIZE] = "iWarpPortMapperUser";
-static int iwpm_ulib_version = 3;
+u16 iwpm_ulib_version = IWPM_UABI_VERSION_MIN;
static int iwpm_user_pid = IWPM_PID_UNDEFINED;
static atomic_t echo_nlmsg_seq;
+/**
+ * iwpm_valid_pid - Check if the userspace iwarp port mapper pid is valid
+ *
+ * Returns true if the pid is greater than zero, otherwise returns false
+ */
int iwpm_valid_pid(void)
{
return iwpm_user_pid > 0;
}
-EXPORT_SYMBOL(iwpm_valid_pid);
-/*
- * iwpm_register_pid - Send a netlink query to user space
- * for the iwarp port mapper pid
+/**
+ * iwpm_register_pid - Send a netlink query to userspace
+ * to get the iwarp port mapper pid
+ * @pm_msg: Contains driver info to send to the userspace port mapper
+ * @nl_client: The index of the netlink client
*
* nlmsg attributes:
* [IWPM_NLA_REG_PID_SEQ]
@@ -63,10 +69,6 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto pid_query_error;
- }
if (iwpm_check_registration(nl_client, IWPM_REG_VALID) ||
iwpm_user_pid == IWPM_PID_UNAVAILABLE)
return 0;
@@ -101,10 +103,12 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
if (ret)
goto pid_query_error;
+ nlmsg_end(skb, nlh);
+
pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n",
__func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name);
- ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
+ ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNAVAILABLE;
@@ -115,21 +119,26 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
ret = iwpm_wait_complete_req(nlmsg_request);
return ret;
pid_query_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
- if (skb)
- dev_kfree_skb(skb);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
+ dev_kfree_skb(skb);
if (nlmsg_request)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
}
-EXPORT_SYMBOL(iwpm_register_pid);
-/*
- * iwpm_add_mapping - Send a netlink add mapping message
- * to the port mapper
+/**
+ * iwpm_add_mapping - Send a netlink add mapping request to
+ * the userspace port mapper
+ * @pm_msg: Contains the local ip/tcp address info to send
+ * @nl_client: The index of the netlink client
+ *
* nlmsg attributes:
* [IWPM_NLA_MANAGE_MAPPING_SEQ]
* [IWPM_NLA_MANAGE_ADDR]
+ * [IWPM_NLA_MANAGE_FLAGS]
+ *
+ * If the request is successful, the pm_msg stores
+ * the port mapper response (mapped address info)
*/
int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
{
@@ -140,10 +149,6 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto add_mapping_error;
- }
if (!iwpm_valid_pid())
return 0;
if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) {
@@ -172,9 +177,23 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
&pm_msg->loc_addr, IWPM_NLA_MANAGE_ADDR);
if (ret)
goto add_mapping_error;
+
+ /* If flags are required and we're not V4, then return a quiet error */
+ if (pm_msg->flags && iwpm_ulib_version == IWPM_UABI_VERSION_MIN) {
+ ret = -EINVAL;
+ goto add_mapping_error_nowarn;
+ }
+ if (iwpm_ulib_version > IWPM_UABI_VERSION_MIN) {
+ ret = ibnl_put_attr(skb, nlh, sizeof(u32), &pm_msg->flags,
+ IWPM_NLA_MANAGE_FLAGS);
+ if (ret)
+ goto add_mapping_error;
+ }
+
+ nlmsg_end(skb, nlh);
nlmsg_request->req_buffer = pm_msg;
- ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+ ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNDEFINED;
@@ -184,22 +203,25 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
ret = iwpm_wait_complete_req(nlmsg_request);
return ret;
add_mapping_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
- if (skb)
- dev_kfree_skb(skb);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
+add_mapping_error_nowarn:
+ dev_kfree_skb(skb);
if (nlmsg_request)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
}
-EXPORT_SYMBOL(iwpm_add_mapping);
-/*
- * iwpm_add_and_query_mapping - Send a netlink add and query
- * mapping message to the port mapper
+/**
+ * iwpm_add_and_query_mapping - Process the port mapper response to
+ * iwpm_add_and_query_mapping request
+ * @pm_msg: Contains the local ip/tcp address info to send
+ * @nl_client: The index of the netlink client
+ *
* nlmsg attributes:
* [IWPM_NLA_QUERY_MAPPING_SEQ]
* [IWPM_NLA_QUERY_LOCAL_ADDR]
* [IWPM_NLA_QUERY_REMOTE_ADDR]
+ * [IWPM_NLA_QUERY_FLAGS]
*/
int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
{
@@ -210,10 +232,6 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto query_mapping_error;
- }
if (!iwpm_valid_pid())
return 0;
if (!iwpm_check_registration(nl_client, IWPM_REG_VALID)) {
@@ -249,9 +267,23 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
&pm_msg->rem_addr, IWPM_NLA_QUERY_REMOTE_ADDR);
if (ret)
goto query_mapping_error;
+
+ /* If flags are required and we're not V4, then return a quite error */
+ if (pm_msg->flags && iwpm_ulib_version == IWPM_UABI_VERSION_MIN) {
+ ret = -EINVAL;
+ goto query_mapping_error_nowarn;
+ }
+ if (iwpm_ulib_version > IWPM_UABI_VERSION_MIN) {
+ ret = ibnl_put_attr(skb, nlh, sizeof(u32), &pm_msg->flags,
+ IWPM_NLA_QUERY_FLAGS);
+ if (ret)
+ goto query_mapping_error;
+ }
+
+ nlmsg_end(skb, nlh);
nlmsg_request->req_buffer = pm_msg;
- ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+ ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
err_str = "Unable to send a nlmsg";
@@ -260,18 +292,21 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
ret = iwpm_wait_complete_req(nlmsg_request);
return ret;
query_mapping_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
- if (skb)
- dev_kfree_skb(skb);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
+query_mapping_error_nowarn:
+ dev_kfree_skb(skb);
if (nlmsg_request)
iwpm_free_nlmsg_request(&nlmsg_request->kref);
return ret;
}
-EXPORT_SYMBOL(iwpm_add_and_query_mapping);
-/*
- * iwpm_remove_mapping - Send a netlink remove mapping message
- * to the port mapper
+/**
+ * iwpm_remove_mapping - Send a netlink remove mapping request
+ * to the userspace port mapper
+ *
+ * @local_addr: Local ip/tcp address to remove
+ * @nl_client: The index of the netlink client
+ *
* nlmsg attributes:
* [IWPM_NLA_MANAGE_MAPPING_SEQ]
* [IWPM_NLA_MANAGE_ADDR]
@@ -284,10 +319,6 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
const char *err_str = "";
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- err_str = "Invalid port mapper client";
- goto remove_mapping_error;
- }
if (!iwpm_valid_pid())
return 0;
if (iwpm_check_registration(nl_client, IWPM_REG_UNDEF)) {
@@ -312,7 +343,9 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
if (ret)
goto remove_mapping_error;
- ret = ibnl_unicast(skb, nlh, iwpm_user_pid);
+ nlmsg_end(skb, nlh);
+
+ ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid);
if (ret) {
skb = NULL; /* skb is freed in the netlink send-op handling */
iwpm_user_pid = IWPM_PID_UNDEFINED;
@@ -323,12 +356,11 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
"remove_mapping: Local sockaddr:");
return 0;
remove_mapping_error:
- pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+ pr_info("%s: %s (client = %u)\n", __func__, err_str, nl_client);
if (skb)
dev_kfree_skb_any(skb);
return ret;
}
-EXPORT_SYMBOL(iwpm_remove_mapping);
/* netlink attribute policy for the received response to register pid request */
static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
@@ -341,9 +373,14 @@ static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
[IWPM_NLA_RREG_PID_ERR] = { .type = NLA_U16 }
};
-/*
- * iwpm_register_pid_cb - Process a port mapper response to
- * iwpm_register_pid()
+/**
+ * iwpm_register_pid_cb - Process the port mapper response to
+ * iwpm_register_pid query
+ * @skb: The socket buffer
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * If successful, the function receives the userspace port mapper pid
+ * which is used in future communication with the port mapper
*/
int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
@@ -376,19 +413,22 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
/* check device name, ulib name and version */
if (strcmp(pm_msg->dev_name, dev_name) ||
strcmp(iwpm_ulib_name, iwpm_name) ||
- iwpm_version != iwpm_ulib_version) {
+ iwpm_version < IWPM_UABI_VERSION_MIN) {
- pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n",
+ pr_info("%s: Incorrect info (dev = %s name = %s version = %u)\n",
__func__, dev_name, iwpm_name, iwpm_version);
nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
goto register_pid_response_exit;
}
iwpm_user_pid = cb->nlh->nlmsg_pid;
+ iwpm_ulib_version = iwpm_version;
+ if (iwpm_ulib_version < IWPM_UABI_VERSION)
+ pr_warn_once("%s: Down level iwpmd/pid %d. Continuing...",
+ __func__, iwpm_user_pid);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
__func__, iwpm_user_pid);
- if (iwpm_valid_client(nl_client))
- iwpm_set_registration(nl_client, IWPM_REG_VALID);
+ iwpm_set_registration(nl_client, IWPM_REG_VALID);
register_pid_response_exit:
nlmsg_request->request_done = 1;
/* always for found nlmsg_request */
@@ -397,19 +437,22 @@ register_pid_response_exit:
up(&nlmsg_request->sem);
return 0;
}
-EXPORT_SYMBOL(iwpm_register_pid_cb);
/* netlink attribute policy for the received response to add mapping request */
static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
- [IWPM_NLA_MANAGE_MAPPING_SEQ] = { .type = NLA_U32 },
- [IWPM_NLA_MANAGE_ADDR] = { .len = sizeof(struct sockaddr_storage) },
- [IWPM_NLA_MANAGE_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
- [IWPM_NLA_RMANAGE_MAPPING_ERR] = { .type = NLA_U16 }
+ [IWPM_NLA_RMANAGE_MAPPING_SEQ] = { .type = NLA_U32 },
+ [IWPM_NLA_RMANAGE_ADDR] = {
+ .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_RMANAGE_MAPPED_LOC_ADDR] = {
+ .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_RMANAGE_MAPPING_ERR] = { .type = NLA_U16 }
};
-/*
- * iwpm_add_mapping_cb - Process a port mapper response to
- * iwpm_add_mapping()
+/**
+ * iwpm_add_mapping_cb - Process the port mapper response to
+ * iwpm_add_mapping request
+ * @skb: The socket buffer
+ * @cb: Contains the received message (payload and netlink header)
*/
int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
@@ -428,7 +471,7 @@ int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
- msg_seq = nla_get_u32(nltb[IWPM_NLA_MANAGE_MAPPING_SEQ]);
+ msg_seq = nla_get_u32(nltb[IWPM_NLA_RMANAGE_MAPPING_SEQ]);
nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
if (!nlmsg_request) {
pr_info("%s: Could not find a matching request (seq = %u)\n",
@@ -437,9 +480,9 @@ int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
}
pm_msg = nlmsg_request->req_buffer;
local_sockaddr = (struct sockaddr_storage *)
- nla_data(nltb[IWPM_NLA_MANAGE_ADDR]);
+ nla_data(nltb[IWPM_NLA_RMANAGE_ADDR]);
mapped_sockaddr = (struct sockaddr_storage *)
- nla_data(nltb[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR]);
+ nla_data(nltb[IWPM_NLA_RMANAGE_MAPPED_LOC_ADDR]);
if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr)) {
nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
@@ -466,22 +509,28 @@ add_mapping_response_exit:
up(&nlmsg_request->sem);
return 0;
}
-EXPORT_SYMBOL(iwpm_add_mapping_cb);
/* netlink attribute policy for the response to add and query mapping request
- * and response with remote address info */
+ * and response with remote address info
+ */
static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = {
- [IWPM_NLA_QUERY_MAPPING_SEQ] = { .type = NLA_U32 },
- [IWPM_NLA_QUERY_LOCAL_ADDR] = { .len = sizeof(struct sockaddr_storage) },
- [IWPM_NLA_QUERY_REMOTE_ADDR] = { .len = sizeof(struct sockaddr_storage) },
- [IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
- [IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_RQUERY_MAPPING_SEQ] = { .type = NLA_U32 },
+ [IWPM_NLA_RQUERY_LOCAL_ADDR] = {
+ .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_RQUERY_REMOTE_ADDR] = {
+ .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = {
+ .len = sizeof(struct sockaddr_storage) },
+ [IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = {
+ .len = sizeof(struct sockaddr_storage) },
[IWPM_NLA_RQUERY_MAPPING_ERR] = { .type = NLA_U16 }
};
-/*
- * iwpm_add_and_query_mapping_cb - Process a port mapper response to
- * iwpm_add_and_query_mapping()
+/**
+ * iwpm_add_and_query_mapping_cb - Process the port mapper response to
+ * iwpm_add_and_query_mapping request
+ * @skb: The socket buffer
+ * @cb: Contains the received message (payload and netlink header)
*/
int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
struct netlink_callback *cb)
@@ -501,7 +550,7 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
return -EINVAL;
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
- msg_seq = nla_get_u32(nltb[IWPM_NLA_QUERY_MAPPING_SEQ]);
+ msg_seq = nla_get_u32(nltb[IWPM_NLA_RQUERY_MAPPING_SEQ]);
nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
if (!nlmsg_request) {
pr_info("%s: Could not find a matching request (seq = %u)\n",
@@ -510,9 +559,9 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
}
pm_msg = nlmsg_request->req_buffer;
local_sockaddr = (struct sockaddr_storage *)
- nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]);
+ nla_data(nltb[IWPM_NLA_RQUERY_LOCAL_ADDR]);
remote_sockaddr = (struct sockaddr_storage *)
- nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]);
+ nla_data(nltb[IWPM_NLA_RQUERY_REMOTE_ADDR]);
mapped_loc_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]);
mapped_rem_sockaddr = (struct sockaddr_storage *)
@@ -558,11 +607,14 @@ query_mapping_response_exit:
up(&nlmsg_request->sem);
return 0;
}
-EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb);
-/*
- * iwpm_remote_info_cb - Process a port mapper message, containing
- * the remote connecting peer address info
+/**
+ * iwpm_remote_info_cb - Process remote connecting peer address info, which
+ * the port mapper has received from the connecting peer
+ * @skb: The socket buffer
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * Stores the IPv4/IPv6 address info in a hash table
*/
int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
@@ -580,17 +632,12 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
return ret;
nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
- if (!iwpm_valid_client(nl_client)) {
- pr_info("%s: Invalid port mapper client = %d\n",
- __func__, nl_client);
- return ret;
- }
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
local_sockaddr = (struct sockaddr_storage *)
- nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]);
+ nla_data(nltb[IWPM_NLA_RQUERY_LOCAL_ADDR]);
remote_sockaddr = (struct sockaddr_storage *)
- nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]);
+ nla_data(nltb[IWPM_NLA_RQUERY_REMOTE_ADDR]);
mapped_loc_sockaddr = (struct sockaddr_storage *)
nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]);
mapped_rem_sockaddr = (struct sockaddr_storage *)
@@ -627,7 +674,6 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
"remote_info: Mapped remote sockaddr:");
return ret;
}
-EXPORT_SYMBOL(iwpm_remote_info_cb);
/* netlink attribute policy for the received request for mapping info */
static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
@@ -636,8 +682,14 @@ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
[IWPM_NLA_MAPINFO_ULIB_VER] = { .type = NLA_U16 }
};
-/*
- * iwpm_mapping_info_cb - Process a port mapper request for mapping info
+/**
+ * iwpm_mapping_info_cb - Process a notification that the userspace
+ * port mapper daemon is started
+ * @skb: The socket buffer
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * Using the received port mapper pid, send all the local mapping
+ * info records to the userspace port mapper
*/
int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
@@ -656,20 +708,20 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
iwpm_name = (char *)nla_data(nltb[IWPM_NLA_MAPINFO_ULIB_NAME]);
iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]);
if (strcmp(iwpm_ulib_name, iwpm_name) ||
- iwpm_version != iwpm_ulib_version) {
- pr_info("%s: Invalid port mapper name = %s version = %d\n",
+ iwpm_version < IWPM_UABI_VERSION_MIN) {
+ pr_info("%s: Invalid port mapper name = %s version = %u\n",
__func__, iwpm_name, iwpm_version);
return ret;
}
nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
- if (!iwpm_valid_client(nl_client)) {
- pr_info("%s: Invalid port mapper client = %d\n",
- __func__, nl_client);
- return ret;
- }
iwpm_set_registration(nl_client, IWPM_REG_INCOMPL);
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
iwpm_user_pid = cb->nlh->nlmsg_pid;
+
+ if (iwpm_ulib_version < IWPM_UABI_VERSION)
+ pr_warn_once("%s: Down level iwpmd/pid %d. Continuing...",
+ __func__, iwpm_user_pid);
+
if (!iwpm_mapinfo_available())
return 0;
pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
@@ -677,7 +729,6 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
ret = iwpm_send_mapinfo(nl_client, iwpm_user_pid);
return ret;
}
-EXPORT_SYMBOL(iwpm_mapping_info_cb);
/* netlink attribute policy for the received mapping info ack */
static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
@@ -686,9 +737,11 @@ static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
[IWPM_NLA_MAPINFO_ACK_NUM] = { .type = NLA_U32 }
};
-/*
- * iwpm_ack_mapping_info_cb - Process a port mapper ack for
- * the provided mapping info records
+/**
+ * iwpm_ack_mapping_info_cb - Process the port mapper ack for
+ * the provided local mapping info records
+ * @skb: The socket buffer
+ * @cb: Contains the received message (payload and netlink header)
*/
int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
@@ -707,7 +760,6 @@ int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
return 0;
}
-EXPORT_SYMBOL(iwpm_ack_mapping_info_cb);
/* netlink attribute policy for the received port mapper error message */
static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
@@ -715,8 +767,11 @@ static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
[IWPM_NLA_ERR_CODE] = { .type = NLA_U16 },
};
-/*
- * iwpm_mapping_error_cb - Process a port mapper error message
+/**
+ * iwpm_mapping_error_cb - Process port mapper notification for error
+ *
+ * @skb: The socket buffer
+ * @cb: Contains the received message (payload and netlink header)
*/
int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
{
@@ -751,4 +806,41 @@ int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
up(&nlmsg_request->sem);
return 0;
}
-EXPORT_SYMBOL(iwpm_mapping_error_cb);
+
+/* netlink attribute policy for the received hello request */
+static const struct nla_policy hello_policy[IWPM_NLA_HELLO_MAX] = {
+ [IWPM_NLA_HELLO_ABI_VERSION] = { .type = NLA_U16 }
+};
+
+/**
+ * iwpm_hello_cb - Process a hello message from iwpmd
+ *
+ * @skb: The socket buffer
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * Using the received port mapper pid, send the kernel's abi_version
+ * after adjusting it to support the iwpmd version.
+ */
+int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nlattr *nltb[IWPM_NLA_HELLO_MAX];
+ const char *msg_type = "Hello request";
+ u8 nl_client;
+ u16 abi_version;
+ int ret = -EINVAL;
+
+ if (iwpm_parse_nlmsg(cb, IWPM_NLA_HELLO_MAX, hello_policy, nltb,
+ msg_type)) {
+ pr_info("%s: Unable to parse nlmsg\n", __func__);
+ return ret;
+ }
+ abi_version = nla_get_u16(nltb[IWPM_NLA_HELLO_ABI_VERSION]);
+ nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
+ iwpm_set_registration(nl_client, IWPM_REG_INCOMPL);
+ atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+ iwpm_ulib_version = min_t(u16, IWPM_UABI_VERSION, abi_version);
+ pr_debug("Using ABI version %u\n", iwpm_ulib_version);
+ iwpm_user_pid = cb->nlh->nlmsg_pid;
+ ret = iwpm_send_hello(nl_client, iwpm_user_pid, iwpm_ulib_version);
+ return ret;
+}
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 3ef51a96bbf1..eecb369898f5 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -48,83 +48,71 @@ static DEFINE_SPINLOCK(iwpm_mapinfo_lock);
static struct hlist_head *iwpm_reminfo_bucket;
static DEFINE_SPINLOCK(iwpm_reminfo_lock);
-static DEFINE_MUTEX(iwpm_admin_lock);
static struct iwpm_admin_data iwpm_admin;
+/**
+ * iwpm_init - Allocate resources for the iwarp port mapper
+ * @nl_client: The index of the netlink client
+ *
+ * Should be called when network interface goes up.
+ */
int iwpm_init(u8 nl_client)
{
- int ret = 0;
- if (iwpm_valid_client(nl_client))
- return -EINVAL;
- mutex_lock(&iwpm_admin_lock);
- if (atomic_read(&iwpm_admin.refcount) == 0) {
- iwpm_hash_bucket = kzalloc(IWPM_MAPINFO_HASH_SIZE *
- sizeof(struct hlist_head), GFP_KERNEL);
- if (!iwpm_hash_bucket) {
- ret = -ENOMEM;
- goto init_exit;
- }
- iwpm_reminfo_bucket = kzalloc(IWPM_REMINFO_HASH_SIZE *
- sizeof(struct hlist_head), GFP_KERNEL);
- if (!iwpm_reminfo_bucket) {
- kfree(iwpm_hash_bucket);
- ret = -ENOMEM;
- goto init_exit;
- }
- }
- atomic_inc(&iwpm_admin.refcount);
-init_exit:
- mutex_unlock(&iwpm_admin_lock);
- if (!ret) {
- iwpm_set_valid(nl_client, 1);
- iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
- pr_debug("%s: Mapinfo and reminfo tables are created\n",
- __func__);
+ iwpm_hash_bucket = kcalloc(IWPM_MAPINFO_HASH_SIZE,
+ sizeof(struct hlist_head), GFP_KERNEL);
+ if (!iwpm_hash_bucket)
+ return -ENOMEM;
+
+ iwpm_reminfo_bucket = kcalloc(IWPM_REMINFO_HASH_SIZE,
+ sizeof(struct hlist_head), GFP_KERNEL);
+ if (!iwpm_reminfo_bucket) {
+ kfree(iwpm_hash_bucket);
+ return -ENOMEM;
}
- return ret;
+
+ iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
+ pr_debug("%s: Mapinfo and reminfo tables are created\n", __func__);
+ return 0;
}
-EXPORT_SYMBOL(iwpm_init);
static void free_hash_bucket(void);
static void free_reminfo_bucket(void);
+/**
+ * iwpm_exit - Deallocate resources for the iwarp port mapper
+ * @nl_client: The index of the netlink client
+ *
+ * Should be called when network interface goes down.
+ */
int iwpm_exit(u8 nl_client)
{
-
- if (!iwpm_valid_client(nl_client))
- return -EINVAL;
- mutex_lock(&iwpm_admin_lock);
- if (atomic_read(&iwpm_admin.refcount) == 0) {
- mutex_unlock(&iwpm_admin_lock);
- pr_err("%s Incorrect usage - negative refcount\n", __func__);
- return -EINVAL;
- }
- if (atomic_dec_and_test(&iwpm_admin.refcount)) {
- free_hash_bucket();
- free_reminfo_bucket();
- pr_debug("%s: Resources are destroyed\n", __func__);
- }
- mutex_unlock(&iwpm_admin_lock);
- iwpm_set_valid(nl_client, 0);
+ free_hash_bucket();
+ free_reminfo_bucket();
+ pr_debug("%s: Resources are destroyed\n", __func__);
iwpm_set_registration(nl_client, IWPM_REG_UNDEF);
return 0;
}
-EXPORT_SYMBOL(iwpm_exit);
static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *,
struct sockaddr_storage *);
+/**
+ * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
+ * info in a hash table
+ * @local_sockaddr: Local ip/tcp address
+ * @mapped_sockaddr: Mapped local ip/tcp address
+ * @nl_client: The index of the netlink client
+ * @map_flags: IWPM mapping flags
+ */
int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
struct sockaddr_storage *mapped_sockaddr,
- u8 nl_client)
+ u8 nl_client, u32 map_flags)
{
- struct hlist_head *hash_bucket_head;
+ struct hlist_head *hash_bucket_head = NULL;
struct iwpm_mapping_info *map_info;
unsigned long flags;
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client))
- return ret;
map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
if (!map_info)
return -ENOMEM;
@@ -134,6 +122,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
memcpy(&map_info->mapped_sockaddr, mapped_sockaddr,
sizeof(struct sockaddr_storage));
map_info->nl_client = nl_client;
+ map_info->map_flags = map_flags;
spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
if (iwpm_hash_bucket) {
@@ -146,10 +135,21 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
}
}
spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
+
+ if (!hash_bucket_head)
+ kfree(map_info);
return ret;
}
-EXPORT_SYMBOL(iwpm_create_mapinfo);
+/**
+ * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
+ * info from the hash table
+ * @local_sockaddr: Local ip/tcp address
+ * @mapped_local_addr: Mapped local ip/tcp address
+ *
+ * Returns err code if mapping info is not found in the hash table,
+ * otherwise returns 0
+ */
int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
struct sockaddr_storage *mapped_local_addr)
{
@@ -184,7 +184,6 @@ remove_mapinfo_exit:
spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
return ret;
}
-EXPORT_SYMBOL(iwpm_remove_mapinfo);
static void free_hash_bucket(void)
{
@@ -251,6 +250,17 @@ void iwpm_add_remote_info(struct iwpm_remote_info *rem_info)
spin_unlock_irqrestore(&iwpm_reminfo_lock, flags);
}
+/**
+ * iwpm_get_remote_info - Get the remote connecting peer address info
+ *
+ * @mapped_loc_addr: Mapped local address of the listening peer
+ * @mapped_rem_addr: Mapped remote address of the connecting peer
+ * @remote_addr: To store the remote address of the connecting peer
+ * @nl_client: The index of the netlink client
+ *
+ * The remote address info is retrieved and provided to the client in
+ * the remote_addr. After that it is removed from the hash table
+ */
int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
struct sockaddr_storage *mapped_rem_addr,
struct sockaddr_storage *remote_addr,
@@ -262,10 +272,6 @@ int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
unsigned long flags;
int ret = -EINVAL;
- if (!iwpm_valid_client(nl_client)) {
- pr_info("%s: Invalid client = %d\n", __func__, nl_client);
- return ret;
- }
spin_lock_irqsave(&iwpm_reminfo_lock, flags);
if (iwpm_reminfo_bucket) {
hash_bucket_head = get_reminfo_hash_bucket(
@@ -297,12 +303,11 @@ get_remote_info_exit:
spin_unlock_irqrestore(&iwpm_reminfo_lock, flags);
return ret;
}
-EXPORT_SYMBOL(iwpm_get_remote_info);
struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
u8 nl_client, gfp_t gfp)
{
- struct iwpm_nlmsg_request *nlmsg_request = NULL;
+ struct iwpm_nlmsg_request *nlmsg_request;
unsigned long flags;
nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp);
@@ -381,20 +386,6 @@ int iwpm_get_nlmsg_seq(void)
return atomic_inc_return(&iwpm_admin.nlmsg_seq);
}
-int iwpm_valid_client(u8 nl_client)
-{
- if (nl_client >= RDMA_NL_NUM_CLIENTS)
- return 0;
- return iwpm_admin.client_list[nl_client];
-}
-
-void iwpm_set_valid(u8 nl_client, int valid)
-{
- if (nl_client >= RDMA_NL_NUM_CLIENTS)
- return;
- iwpm_admin.client_list[nl_client] = valid;
-}
-
/* valid client */
u32 iwpm_get_registration(u8 nl_client)
{
@@ -450,10 +441,9 @@ struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
struct sk_buff *skb = NULL;
skb = dev_alloc_skb(IWPM_MSG_SIZE);
- if (!skb) {
- pr_err("%s Unable to allocate skb\n", __func__);
+ if (!skb)
goto create_nlmsg_exit;
- }
+
if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op,
NLM_F_REQUEST))) {
pr_warn("%s: Unable to put the nlmsg header\n", __func__);
@@ -472,12 +462,14 @@ int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
int ret;
const char *err_str = "";
- ret = nlmsg_validate(cb->nlh, nlh_len, policy_max-1, nlmsg_policy);
+ ret = nlmsg_validate_deprecated(cb->nlh, nlh_len, policy_max - 1,
+ nlmsg_policy, NULL);
if (ret) {
err_str = "Invalid attribute";
goto parse_nlmsg_error;
}
- ret = nlmsg_parse(cb->nlh, nlh_len, nltb, policy_max-1, nlmsg_policy);
+ ret = nlmsg_parse_deprecated(cb->nlh, nlh_len, nltb, policy_max - 1,
+ nlmsg_policy, NULL);
if (ret) {
err_str = "Unable to parse the nlmsg";
goto parse_nlmsg_error;
@@ -606,18 +598,20 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
&mapping_num, IWPM_NLA_MAPINFO_SEND_NUM);
if (ret)
goto mapinfo_num_error;
- ret = ibnl_unicast(skb, nlh, iwpm_pid);
+
+ nlmsg_end(skb, nlh);
+
+ ret = rdma_nl_unicast(&init_net, skb, iwpm_pid);
if (ret) {
skb = NULL;
err_str = "Unable to send a nlmsg";
goto mapinfo_num_error;
}
- pr_debug("%s: Sent mapping number = %d\n", __func__, mapping_num);
+ pr_debug("%s: Sent mapping number = %u\n", __func__, mapping_num);
return 0;
mapinfo_num_error:
pr_info("%s: %s\n", __func__, err_str);
- if (skb)
- dev_kfree_skb(skb);
+ dev_kfree_skb(skb);
return ret;
}
@@ -635,7 +629,7 @@ static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid)
return -ENOMEM;
}
nlh->nlmsg_type = NLMSG_DONE;
- ret = ibnl_unicast(skb, (struct nlmsghdr *)skb->data, iwpm_pid);
+ ret = rdma_nl_unicast(&init_net, skb, iwpm_pid);
if (ret)
pr_warn("%s Unable to send a nlmsg\n", __func__);
return ret;
@@ -660,6 +654,7 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid)
}
skb_num++;
spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
+ ret = -EINVAL;
for (i = 0; i < IWPM_MAPINFO_HASH_SIZE; i++) {
hlist_for_each_entry(map_info, &iwpm_hash_bucket[i],
hlist_node) {
@@ -687,6 +682,16 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid)
if (ret)
goto send_mapping_info_unlock;
+ if (iwpm_ulib_version > IWPM_UABI_VERSION_MIN) {
+ ret = ibnl_put_attr(skb, nlh, sizeof(u32),
+ &map_info->map_flags,
+ IWPM_NLA_MAPINFO_FLAGS);
+ if (ret)
+ goto send_mapping_info_unlock;
+ }
+
+ nlmsg_end(skb, nlh);
+
iwpm_print_sockaddr(&map_info->local_sockaddr,
"send_mapping_info: Local sockaddr:");
iwpm_print_sockaddr(&map_info->mapped_sockaddr,
@@ -728,8 +733,7 @@ send_mapping_info_unlock:
send_mapping_info_exit:
if (ret) {
pr_warn("%s: %s (ret = %d)\n", __func__, err_str, ret);
- if (skb)
- dev_kfree_skb(skb);
+ dev_kfree_skb(skb);
return ret;
}
send_nlmsg_done(skb, nl_client, iwpm_pid);
@@ -753,3 +757,37 @@ int iwpm_mapinfo_available(void)
spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
return full_bucket;
}
+
+int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version)
+{
+ struct sk_buff *skb = NULL;
+ struct nlmsghdr *nlh;
+ const char *err_str;
+ int ret = -EINVAL;
+
+ skb = iwpm_create_nlmsg(RDMA_NL_IWPM_HELLO, &nlh, nl_client);
+ if (!skb) {
+ err_str = "Unable to create a nlmsg";
+ goto hello_num_error;
+ }
+ nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+ err_str = "Unable to put attribute of abi_version into nlmsg";
+ ret = ibnl_put_attr(skb, nlh, sizeof(u16), &abi_version,
+ IWPM_NLA_HELLO_ABI_VERSION);
+ if (ret)
+ goto hello_num_error;
+ nlmsg_end(skb, nlh);
+
+ ret = rdma_nl_unicast(&init_net, skb, iwpm_pid);
+ if (ret) {
+ skb = NULL;
+ err_str = "Unable to send a nlmsg";
+ goto hello_num_error;
+ }
+ pr_debug("%s: Sent hello abi_version = %u\n", __func__, abi_version);
+ return 0;
+hello_num_error:
+ pr_info("%s: %s\n", __func__, err_str);
+ dev_kfree_skb(skb);
+ return ret;
+}
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
index af1fc14a0d3d..d6fc8402158a 100644
--- a/drivers/infiniband/core/iwpm_util.h
+++ b/drivers/infiniband/core/iwpm_util.h
@@ -33,7 +33,6 @@
#ifndef _IWPM_UTIL_H
#define _IWPM_UTIL_H
-#include <linux/module.h>
#include <linux/io.h>
#include <linux/in.h>
#include <linux/in6.h>
@@ -78,6 +77,7 @@ struct iwpm_mapping_info {
struct sockaddr_storage local_sockaddr;
struct sockaddr_storage mapped_sockaddr;
u8 nl_client;
+ u32 map_flags;
};
struct iwpm_remote_info {
@@ -89,9 +89,7 @@ struct iwpm_remote_info {
};
struct iwpm_admin_data {
- atomic_t refcount;
atomic_t nlmsg_seq;
- int client_list[RDMA_NL_NUM_CLIENTS];
u32 reg_list[RDMA_NL_NUM_CLIENTS];
};
@@ -140,29 +138,13 @@ int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request);
int iwpm_get_nlmsg_seq(void);
/**
- * iwpm_add_reminfo - Add remote address info of the connecting peer
+ * iwpm_add_remote_info - Add remote address info of the connecting peer
* to the remote info hash table
* @reminfo: The remote info to be added
*/
void iwpm_add_remote_info(struct iwpm_remote_info *reminfo);
/**
- * iwpm_valid_client - Check if the port mapper client is valid
- * @nl_client: The index of the netlink client
- *
- * Valid clients need to call iwpm_init() before using
- * the port mapper
- */
-int iwpm_valid_client(u8 nl_client);
-
-/**
- * iwpm_set_valid - Set the port mapper client to valid or not
- * @nl_client: The index of the netlink client
- * @valid: 1 if valid or 0 if invalid
- */
-void iwpm_set_valid(u8 nl_client, int valid);
-
-/**
* iwpm_check_registration - Check if the client registration
* matches the given one
* @nl_client: The index of the netlink client
@@ -182,7 +164,7 @@ u32 iwpm_check_registration(u8 nl_client, u32 reg);
void iwpm_set_registration(u8 nl_client, u32 reg);
/**
- * iwpm_get_registration
+ * iwpm_get_registration - Get the client registration
* @nl_client: The index of the netlink client
*
* Returns the client registration type
@@ -209,8 +191,10 @@ int iwpm_mapinfo_available(void);
/**
* iwpm_compare_sockaddr - Compare two sockaddr storage structs
+ * @a_sockaddr: first sockaddr to compare
+ * @b_sockaddr: second sockaddr to compare
*
- * Returns 0 if they are holding the same ip/tcp address info,
+ * Return: 0 if they are holding the same ip/tcp address info,
* otherwise returns 1
*/
int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr,
@@ -266,4 +250,16 @@ int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
* @msg: Message to print
*/
void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg);
+
+/**
+ * iwpm_send_hello - Send hello response to iwpmd
+ *
+ * @nl_client: The index of the netlink client
+ * @iwpm_pid: The pid of the user space port mapper
+ * @abi_version: The kernel's abi_version
+ *
+ * Returns 0 on success or a negative error code
+ */
+int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version);
+extern u16 iwpm_ulib_version;
#endif
diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c
new file mode 100644
index 000000000000..8fd80adfe833
--- /dev/null
+++ b/drivers/infiniband/core/lag.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ */
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
+#include <rdma/lag.h>
+
+static struct sk_buff *rdma_build_skb(struct net_device *netdev,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct ipv6hdr *ip6h;
+ struct sk_buff *skb;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+ struct udphdr *uh;
+ u8 smac[ETH_ALEN];
+ bool is_ipv4;
+ int hdr_len;
+
+ is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw);
+ hdr_len = ETH_HLEN + sizeof(struct udphdr) + LL_RESERVED_SPACE(netdev);
+ hdr_len += is_ipv4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr);
+
+ skb = alloc_skb(hdr_len, flags);
+ if (!skb)
+ return NULL;
+
+ skb->dev = netdev;
+ skb_reserve(skb, hdr_len);
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+ uh->source =
+ htons(rdma_flow_label_to_udp_sport(ah_attr->grh.flow_label));
+ uh->dest = htons(ROCE_V2_UDP_DPORT);
+ uh->len = htons(sizeof(struct udphdr));
+
+ if (is_ipv4) {
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ iph->frag_off = 0;
+ iph->version = 4;
+ iph->protocol = IPPROTO_UDP;
+ iph->ihl = 0x5;
+ iph->tot_len = htons(sizeof(struct udphdr) + sizeof(struct
+ iphdr));
+ memcpy(&iph->saddr, ah_attr->grh.sgid_attr->gid.raw + 12,
+ sizeof(struct in_addr));
+ memcpy(&iph->daddr, ah_attr->grh.dgid.raw + 12,
+ sizeof(struct in_addr));
+ } else {
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ ip6h = ipv6_hdr(skb);
+ ip6h->version = 6;
+ ip6h->nexthdr = IPPROTO_UDP;
+ memcpy(&ip6h->flow_lbl, &ah_attr->grh.flow_label,
+ sizeof(*ip6h->flow_lbl));
+ memcpy(&ip6h->saddr, ah_attr->grh.sgid_attr->gid.raw,
+ sizeof(struct in6_addr));
+ memcpy(&ip6h->daddr, ah_attr->grh.dgid.raw,
+ sizeof(struct in6_addr));
+ }
+
+ skb_push(skb, sizeof(struct ethhdr));
+ skb_reset_mac_header(skb);
+ eth = eth_hdr(skb);
+ skb->protocol = eth->h_proto = htons(is_ipv4 ? ETH_P_IP : ETH_P_IPV6);
+ rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, NULL, smac);
+ memcpy(eth->h_source, smac, ETH_ALEN);
+ memcpy(eth->h_dest, ah_attr->roce.dmac, ETH_ALEN);
+
+ return skb;
+}
+
+static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device,
+ struct net_device *master,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct net_device *slave;
+ struct sk_buff *skb;
+
+ skb = rdma_build_skb(master, ah_attr, flags);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ rcu_read_lock();
+ slave = netdev_get_xmit_slave(master, skb,
+ !!(device->lag_flags &
+ RDMA_LAG_FLAGS_HASH_ALL_SLAVES));
+ dev_hold(slave);
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return slave;
+}
+
+void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave)
+{
+ dev_put(xmit_slave);
+}
+
+struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr,
+ gfp_t flags)
+{
+ struct net_device *slave = NULL;
+ struct net_device *master;
+
+ if (!(ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE &&
+ ah_attr->grh.sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP &&
+ ah_attr->grh.flow_label))
+ return NULL;
+
+ rcu_read_lock();
+ master = rdma_read_gid_attr_ndev_rcu(ah_attr->grh.sgid_attr);
+ if (IS_ERR(master)) {
+ rcu_read_unlock();
+ return master;
+ }
+ dev_hold(master);
+ rcu_read_unlock();
+
+ if (!netif_is_bond_master(master))
+ goto put;
+
+ slave = rdma_get_xmit_slave_udp(device, master, ah_attr, flags);
+put:
+ dev_put(master);
+ return slave;
+}
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index a009f7132c73..8f26bfb69586 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -3,7 +3,7 @@
* Copyright (c) 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2009 HNR Consulting. All rights reserved.
- * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014,2018 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -40,14 +40,37 @@
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/security.h>
+#include <linux/xarray.h>
#include <rdma/ib_cache.h>
#include "mad_priv.h"
+#include "core_priv.h"
#include "mad_rmpp.h"
#include "smi.h"
#include "opa_smi.h"
#include "agent.h"
-#include "core_priv.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/ib_mad.h>
+
+#ifdef CONFIG_TRACEPOINTS
+static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_qp_info *qp_info,
+ struct trace_event_raw_ib_mad_send_template *entry)
+{
+ struct ib_ud_wr *wr = &mad_send_wr->send_wr;
+ struct rdma_ah_attr attr = {};
+
+ rdma_query_ah(wr->ah, &attr);
+
+ /* These are common */
+ entry->sl = attr.sl;
+ entry->rqpn = wr->remote_qpn;
+ entry->rqkey = wr->remote_qkey;
+ entry->dlid = rdma_ah_get_dlid(&attr);
+}
+#endif
static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
@@ -57,8 +80,9 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
+static DEFINE_XARRAY_ALLOC1(ib_mad_clients);
+static u32 ib_mad_client_next;
static struct list_head ib_mad_port_list;
-static u32 ib_mad_client_id = 0;
/* Port list lock */
static DEFINE_SPINLOCK(ib_mad_port_list_lock);
@@ -89,7 +113,7 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
* Assumes ib_mad_port_list_lock is being held
*/
static inline struct ib_mad_port_private *
-__ib_get_mad_port(struct ib_device *device, int port_num)
+__ib_get_mad_port(struct ib_device *device, u32 port_num)
{
struct ib_mad_port_private *entry;
@@ -105,7 +129,7 @@ __ib_get_mad_port(struct ib_device *device, int port_num)
* for a device/port
*/
static inline struct ib_mad_port_private *
-ib_get_mad_port(struct ib_device *device, int port_num)
+ib_get_mad_port(struct ib_device *device, u32 port_num)
{
struct ib_mad_port_private *entry;
unsigned long flags;
@@ -126,8 +150,7 @@ static inline u8 convert_mgmt_class(u8 mgmt_class)
static int get_spl_qp_index(enum ib_qp_type qp_type)
{
- switch (qp_type)
- {
+ switch (qp_type) {
case IB_QPT_SMI:
return 0;
case IB_QPT_GSI:
@@ -187,11 +210,36 @@ int ib_response_mad(const struct ib_mad_hdr *hdr)
}
EXPORT_SYMBOL(ib_response_mad);
+#define SOL_FC_MAX_DEFAULT_FRAC 4
+#define SOL_FC_MAX_SA_FRAC 32
+
+static int get_sol_fc_max_outstanding(struct ib_mad_reg_req *mad_reg_req)
+{
+ if (!mad_reg_req)
+ /* Send only agent */
+ return mad_recvq_size / SOL_FC_MAX_DEFAULT_FRAC;
+
+ switch (mad_reg_req->mgmt_class) {
+ case IB_MGMT_CLASS_CM:
+ return mad_recvq_size / SOL_FC_MAX_DEFAULT_FRAC;
+ case IB_MGMT_CLASS_SUBN_ADM:
+ return mad_recvq_size / SOL_FC_MAX_SA_FRAC;
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+ return min(mad_recvq_size, IB_MAD_QP_RECV_SIZE) /
+ SOL_FC_MAX_DEFAULT_FRAC;
+ default:
+ return 0;
+ }
+}
+
/*
* ib_register_mad_agent - Register to send/receive MADs
+ *
+ * Context: Process context.
*/
struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
enum ib_qp_type qp_type,
struct ib_mad_reg_req *mad_reg_req,
u8 rmpp_version,
@@ -209,36 +257,39 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
struct ib_mad_mgmt_vendor_class *vendor_class;
struct ib_mad_mgmt_method_table *method;
int ret2, qpn;
- unsigned long flags;
u8 mgmt_class, vclass;
+ if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) ||
+ (qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num)))
+ return ERR_PTR(-EPROTONOSUPPORT);
+
/* Validate parameters */
qpn = get_spl_qp_index(qp_type);
if (qpn == -1) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: invalid QP Type %d\n",
- qp_type);
+ dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n",
+ __func__, qp_type);
goto error1;
}
if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: invalid RMPP Version %u\n",
- rmpp_version);
+ dev_dbg_ratelimited(&device->dev,
+ "%s: invalid RMPP Version %u\n",
+ __func__, rmpp_version);
goto error1;
}
/* Validate MAD registration request if supplied */
if (mad_reg_req) {
if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: invalid Class Version %u\n",
- mad_reg_req->mgmt_class_version);
+ dev_dbg_ratelimited(&device->dev,
+ "%s: invalid Class Version %u\n",
+ __func__,
+ mad_reg_req->mgmt_class_version);
goto error1;
}
if (!recv_handler) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: no recv_handler\n");
+ dev_dbg_ratelimited(&device->dev,
+ "%s: no recv_handler\n", __func__);
goto error1;
}
if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
@@ -248,9 +299,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
*/
if (mad_reg_req->mgmt_class !=
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n",
- mad_reg_req->mgmt_class);
+ dev_dbg_ratelimited(&device->dev,
+ "%s: Invalid Mgmt Class 0x%x\n",
+ __func__, mad_reg_req->mgmt_class);
goto error1;
}
} else if (mad_reg_req->mgmt_class == 0) {
@@ -258,8 +309,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
* Class 0 is reserved in IBA and is used for
* aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
*/
- dev_notice(&device->dev,
- "ib_register_mad_agent: Invalid Mgmt Class 0\n");
+ dev_dbg_ratelimited(&device->dev,
+ "%s: Invalid Mgmt Class 0\n",
+ __func__);
goto error1;
} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
/*
@@ -267,18 +319,19 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
* ensure supplied OUI is not zero
*/
if (!is_vendor_oui(mad_reg_req->oui)) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: No OUI specified for class 0x%x\n",
- mad_reg_req->mgmt_class);
+ dev_dbg_ratelimited(&device->dev,
+ "%s: No OUI specified for class 0x%x\n",
+ __func__,
+ mad_reg_req->mgmt_class);
goto error1;
}
}
/* Make sure class supplied is consistent with RMPP */
if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
if (rmpp_version) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n",
- mad_reg_req->mgmt_class);
+ dev_dbg_ratelimited(&device->dev,
+ "%s: RMPP version for non-RMPP class 0x%x\n",
+ __func__, mad_reg_req->mgmt_class);
goto error1;
}
}
@@ -289,9 +342,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
(mad_reg_req->mgmt_class !=
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n",
- mad_reg_req->mgmt_class);
+ dev_dbg_ratelimited(&device->dev,
+ "%s: Invalid SM QP type: class 0x%x\n",
+ __func__, mad_reg_req->mgmt_class);
goto error1;
}
} else {
@@ -299,9 +352,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
(mad_reg_req->mgmt_class ==
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n",
- mad_reg_req->mgmt_class);
+ dev_dbg_ratelimited(&device->dev,
+ "%s: Invalid GS QP type: class 0x%x\n",
+ __func__, mad_reg_req->mgmt_class);
goto error1;
}
}
@@ -316,16 +369,18 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Validate device and port */
port_priv = ib_get_mad_port(device, port_num);
if (!port_priv) {
- dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n");
+ dev_dbg_ratelimited(&device->dev, "%s: Invalid port %u\n",
+ __func__, port_num);
ret = ERR_PTR(-ENODEV);
goto error1;
}
- /* Verify the QP requested is supported. For example, Ethernet devices
- * will not have QP0 */
+ /* Verify the QP requested is supported. For example, Ethernet devices
+ * will not have QP0.
+ */
if (!port_priv->qp_info[qpn].qp) {
- dev_notice(&device->dev,
- "ib_register_mad_agent: QP %d not supported\n", qpn);
+ dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n",
+ __func__, qpn);
ret = ERR_PTR(-EPROTONOSUPPORT);
goto error1;
}
@@ -359,21 +414,41 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
spin_lock_init(&mad_agent_priv->lock);
INIT_LIST_HEAD(&mad_agent_priv->send_list);
INIT_LIST_HEAD(&mad_agent_priv->wait_list);
- INIT_LIST_HEAD(&mad_agent_priv->done_list);
INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
+ INIT_LIST_HEAD(&mad_agent_priv->backlog_list);
INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
INIT_LIST_HEAD(&mad_agent_priv->local_list);
INIT_WORK(&mad_agent_priv->local_work, local_completions);
- atomic_set(&mad_agent_priv->refcount, 1);
+ refcount_set(&mad_agent_priv->refcount, 1);
init_completion(&mad_agent_priv->comp);
+ mad_agent_priv->sol_fc_send_count = 0;
+ mad_agent_priv->sol_fc_wait_count = 0;
+ mad_agent_priv->sol_fc_max =
+ recv_handler ? get_sol_fc_max_outstanding(mad_reg_req) : 0;
+
+ ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type);
+ if (ret2) {
+ ret = ERR_PTR(ret2);
+ goto error4;
+ }
- spin_lock_irqsave(&port_priv->reg_lock, flags);
- mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
+ /*
+ * The mlx4 driver uses the top byte to distinguish which virtual
+ * function generated the MAD, so we must avoid using it.
+ */
+ ret2 = xa_alloc_cyclic(&ib_mad_clients, &mad_agent_priv->agent.hi_tid,
+ mad_agent_priv, XA_LIMIT(0, (1 << 24) - 1),
+ &ib_mad_client_next, GFP_KERNEL);
+ if (ret2 < 0) {
+ ret = ERR_PTR(ret2);
+ goto error5;
+ }
/*
* Make sure MAD registration (if supplied)
* is non overlapping with any existing ones
*/
+ spin_lock_irq(&port_priv->reg_lock);
if (mad_reg_req) {
mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
if (!is_vendor_class(mgmt_class)) {
@@ -384,7 +459,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
if (method) {
if (method_in_use(&method,
mad_reg_req))
- goto error4;
+ goto error6;
}
}
ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
@@ -400,25 +475,26 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
if (is_vendor_method_in_use(
vendor_class,
mad_reg_req))
- goto error4;
+ goto error6;
}
}
ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
}
if (ret2) {
ret = ERR_PTR(ret2);
- goto error4;
+ goto error6;
}
}
+ spin_unlock_irq(&port_priv->reg_lock);
- /* Add mad agent into port's agent list */
- list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list);
- spin_unlock_irqrestore(&port_priv->reg_lock, flags);
-
+ trace_ib_mad_create_agent(mad_agent_priv);
return &mad_agent_priv->agent;
-
+error6:
+ spin_unlock_irq(&port_priv->reg_lock);
+ xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
+error5:
+ ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
error4:
- spin_unlock_irqrestore(&port_priv->reg_lock, flags);
kfree(reg_req);
error3:
kfree(mad_agent_priv);
@@ -427,138 +503,18 @@ error1:
}
EXPORT_SYMBOL(ib_register_mad_agent);
-static inline int is_snooping_sends(int mad_snoop_flags)
-{
- return (mad_snoop_flags &
- (/*IB_MAD_SNOOP_POSTED_SENDS |
- IB_MAD_SNOOP_RMPP_SENDS |*/
- IB_MAD_SNOOP_SEND_COMPLETIONS /*|
- IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/));
-}
-
-static inline int is_snooping_recvs(int mad_snoop_flags)
-{
- return (mad_snoop_flags &
- (IB_MAD_SNOOP_RECVS /*|
- IB_MAD_SNOOP_RMPP_RECVS*/));
-}
-
-static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
- struct ib_mad_snoop_private *mad_snoop_priv)
-{
- struct ib_mad_snoop_private **new_snoop_table;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- /* Check for empty slot in array. */
- for (i = 0; i < qp_info->snoop_table_size; i++)
- if (!qp_info->snoop_table[i])
- break;
-
- if (i == qp_info->snoop_table_size) {
- /* Grow table. */
- new_snoop_table = krealloc(qp_info->snoop_table,
- sizeof mad_snoop_priv *
- (qp_info->snoop_table_size + 1),
- GFP_ATOMIC);
- if (!new_snoop_table) {
- i = -ENOMEM;
- goto out;
- }
-
- qp_info->snoop_table = new_snoop_table;
- qp_info->snoop_table_size++;
- }
- qp_info->snoop_table[i] = mad_snoop_priv;
- atomic_inc(&qp_info->snoop_count);
-out:
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- return i;
-}
-
-struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
- u8 port_num,
- enum ib_qp_type qp_type,
- int mad_snoop_flags,
- ib_mad_snoop_handler snoop_handler,
- ib_mad_recv_handler recv_handler,
- void *context)
-{
- struct ib_mad_port_private *port_priv;
- struct ib_mad_agent *ret;
- struct ib_mad_snoop_private *mad_snoop_priv;
- int qpn;
-
- /* Validate parameters */
- if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
- (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) {
- ret = ERR_PTR(-EINVAL);
- goto error1;
- }
- qpn = get_spl_qp_index(qp_type);
- if (qpn == -1) {
- ret = ERR_PTR(-EINVAL);
- goto error1;
- }
- port_priv = ib_get_mad_port(device, port_num);
- if (!port_priv) {
- ret = ERR_PTR(-ENODEV);
- goto error1;
- }
- /* Allocate structures */
- mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
- if (!mad_snoop_priv) {
- ret = ERR_PTR(-ENOMEM);
- goto error1;
- }
-
- /* Now, fill in the various structures */
- mad_snoop_priv->qp_info = &port_priv->qp_info[qpn];
- mad_snoop_priv->agent.device = device;
- mad_snoop_priv->agent.recv_handler = recv_handler;
- mad_snoop_priv->agent.snoop_handler = snoop_handler;
- mad_snoop_priv->agent.context = context;
- mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp;
- mad_snoop_priv->agent.port_num = port_num;
- mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
- init_completion(&mad_snoop_priv->comp);
- mad_snoop_priv->snoop_index = register_snoop_agent(
- &port_priv->qp_info[qpn],
- mad_snoop_priv);
- if (mad_snoop_priv->snoop_index < 0) {
- ret = ERR_PTR(mad_snoop_priv->snoop_index);
- goto error2;
- }
-
- atomic_set(&mad_snoop_priv->refcount, 1);
- return &mad_snoop_priv->agent;
-
-error2:
- kfree(mad_snoop_priv);
-error1:
- return ret;
-}
-EXPORT_SYMBOL(ib_register_mad_snoop);
-
static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
{
- if (atomic_dec_and_test(&mad_agent_priv->refcount))
+ if (refcount_dec_and_test(&mad_agent_priv->refcount))
complete(&mad_agent_priv->comp);
}
-static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv)
-{
- if (atomic_dec_and_test(&mad_snoop_priv->refcount))
- complete(&mad_snoop_priv->comp);
-}
-
static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
{
struct ib_mad_port_private *port_priv;
- unsigned long flags;
/* Note that we could still be handling received MADs */
+ trace_ib_mad_unregister_agent(mad_agent_priv);
/*
* Canceling all sends results in dropping received response
@@ -568,59 +524,36 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
port_priv = mad_agent_priv->qp_info->port_priv;
cancel_delayed_work(&mad_agent_priv->timed_work);
- spin_lock_irqsave(&port_priv->reg_lock, flags);
+ spin_lock_irq(&port_priv->reg_lock);
remove_mad_reg_req(mad_agent_priv);
- list_del(&mad_agent_priv->agent_list);
- spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+ spin_unlock_irq(&port_priv->reg_lock);
+ xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
flush_workqueue(port_priv->wq);
- ib_cancel_rmpp_recvs(mad_agent_priv);
deref_mad_agent(mad_agent_priv);
wait_for_completion(&mad_agent_priv->comp);
+ ib_cancel_rmpp_recvs(mad_agent_priv);
- kfree(mad_agent_priv->reg_req);
- kfree(mad_agent_priv);
-}
-
-static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
-{
- struct ib_mad_qp_info *qp_info;
- unsigned long flags;
-
- qp_info = mad_snoop_priv->qp_info;
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL;
- atomic_dec(&qp_info->snoop_count);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-
- deref_snoop_agent(mad_snoop_priv);
- wait_for_completion(&mad_snoop_priv->comp);
+ ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
- kfree(mad_snoop_priv);
+ kfree(mad_agent_priv->reg_req);
+ kfree_rcu(mad_agent_priv, rcu);
}
/*
* ib_unregister_mad_agent - Unregisters a client from using MAD services
+ *
+ * Context: Process context.
*/
-int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
+void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
{
struct ib_mad_agent_private *mad_agent_priv;
- struct ib_mad_snoop_private *mad_snoop_priv;
-
- /* If the TID is zero, the agent can only snoop. */
- if (mad_agent->hi_tid) {
- mad_agent_priv = container_of(mad_agent,
- struct ib_mad_agent_private,
- agent);
- unregister_mad_agent(mad_agent_priv);
- } else {
- mad_snoop_priv = container_of(mad_agent,
- struct ib_mad_snoop_private,
- agent);
- unregister_mad_snoop(mad_snoop_priv);
- }
- return 0;
+
+ mad_agent_priv = container_of(mad_agent,
+ struct ib_mad_agent_private,
+ agent);
+ unregister_mad_agent(mad_agent_priv);
}
EXPORT_SYMBOL(ib_unregister_mad_agent);
@@ -629,7 +562,6 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list)
struct ib_mad_queue *mad_queue;
unsigned long flags;
- BUG_ON(!mad_list->mad_queue);
mad_queue = mad_list->mad_queue;
spin_lock_irqsave(&mad_queue->lock, flags);
list_del(&mad_list->list);
@@ -637,59 +569,8 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list)
spin_unlock_irqrestore(&mad_queue->lock, flags);
}
-static void snoop_send(struct ib_mad_qp_info *qp_info,
- struct ib_mad_send_buf *send_buf,
- struct ib_mad_send_wc *mad_send_wc,
- int mad_snoop_flags)
-{
- struct ib_mad_snoop_private *mad_snoop_priv;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- for (i = 0; i < qp_info->snoop_table_size; i++) {
- mad_snoop_priv = qp_info->snoop_table[i];
- if (!mad_snoop_priv ||
- !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
- continue;
-
- atomic_inc(&mad_snoop_priv->refcount);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
- send_buf, mad_send_wc);
- deref_snoop_agent(mad_snoop_priv);
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- }
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-}
-
-static void snoop_recv(struct ib_mad_qp_info *qp_info,
- struct ib_mad_recv_wc *mad_recv_wc,
- int mad_snoop_flags)
-{
- struct ib_mad_snoop_private *mad_snoop_priv;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- for (i = 0; i < qp_info->snoop_table_size; i++) {
- mad_snoop_priv = qp_info->snoop_table[i];
- if (!mad_snoop_priv ||
- !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
- continue;
-
- atomic_inc(&mad_snoop_priv->refcount);
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL,
- mad_recv_wc);
- deref_snoop_agent(mad_snoop_priv);
- spin_lock_irqsave(&qp_info->snoop_lock, flags);
- }
- spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
-}
-
static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
- u16 pkey_index, u8 port_num, struct ib_wc *wc)
+ u16 pkey_index, u32 port_num, struct ib_wc *wc)
{
memset(wc, 0, sizeof *wc);
wc->wr_cqe = cqe;
@@ -748,7 +629,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
struct ib_mad_port_private *port_priv;
struct ib_mad_agent_private *recv_mad_agent = NULL;
struct ib_device *device = mad_agent_priv->agent.device;
- u8 port_num;
+ u32 port_num;
struct ib_wc mad_wc;
struct ib_ud_wr *send_wr = &mad_send_wr->send_wr;
size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv);
@@ -772,6 +653,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
if (opa && smp->class_version == OPA_SM_CLASS_VERSION) {
u32 opa_drslid;
+ trace_ib_mad_handle_out_opa_smi(opa_smp);
+
if ((opa_get_smp_direction(opa_smp)
? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) ==
OPA_LID_PERMISSIVE &&
@@ -797,6 +680,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD)
goto out;
} else {
+ trace_ib_mad_handle_out_ib_smi(smp);
+
if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) ==
IB_LID_PERMISSIVE &&
smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) ==
@@ -839,12 +724,11 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
}
/* No GRH for DR SMP */
- ret = device->process_mad(device, 0, port_num, &mad_wc, NULL,
- (const struct ib_mad_hdr *)smp, mad_size,
- (struct ib_mad_hdr *)mad_priv->mad,
- &mad_size, &out_mad_pkey_index);
- switch (ret)
- {
+ ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL,
+ (const struct ib_mad *)smp,
+ (struct ib_mad *)mad_priv->mad, &mad_size,
+ &out_mad_pkey_index);
+ switch (ret) {
case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) &&
mad_agent_priv->agent.recv_handler) {
@@ -854,7 +738,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
* Reference MAD agent until receive
* side of local completion handled
*/
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
} else
kfree(mad_priv);
break;
@@ -894,7 +778,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
local->return_wc_byte_len = mad_size;
}
/* Reference MAD agent until send side of local completion handled */
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
/* Queue local completion to local list */
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
@@ -943,7 +827,7 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
/* Allocate data segments. */
for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
- seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
+ seg = kmalloc(sizeof(*seg) + seg_size, gfp_mask);
if (!seg) {
free_send_rmpp_list(send_wr);
return -ENOMEM;
@@ -973,12 +857,11 @@ int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent)
}
EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
-struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
- u32 remote_qpn, u16 pkey_index,
- int rmpp_active,
- int hdr_len, int data_len,
- gfp_t gfp_mask,
- u8 base_version)
+struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent,
+ u32 remote_qpn, u16 pkey_index,
+ int rmpp_active, int hdr_len,
+ int data_len, gfp_t gfp_mask,
+ u8 base_version)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
@@ -1052,7 +935,7 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
}
mad_send_wr->send_buf.mad_agent = mad_agent;
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
return &mad_send_wr->send_buf;
}
EXPORT_SYMBOL(ib_create_send_mad);
@@ -1138,7 +1021,6 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_mad_qp_info *qp_info;
struct list_head *list;
- struct ib_send_wr *bad_send_wr;
struct ib_mad_agent *mad_agent;
struct ib_sge *sge;
unsigned long flags;
@@ -1175,8 +1057,9 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
spin_lock_irqsave(&qp_info->send_queue.lock, flags);
if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
+ trace_ib_mad_ib_send_mad(mad_send_wr, qp_info);
ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
- &bad_send_wr);
+ NULL);
list = &qp_info->send_queue.list;
} else {
ret = 0;
@@ -1199,6 +1082,180 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
return ret;
}
+static void handle_queued_state(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_agent_private *mad_agent_priv)
+{
+ if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) {
+ mad_agent_priv->sol_fc_wait_count--;
+ list_move_tail(&mad_send_wr->agent_list,
+ &mad_agent_priv->backlog_list);
+ } else {
+ expect_mad_state(mad_send_wr, IB_MAD_STATE_INIT);
+ list_add_tail(&mad_send_wr->agent_list,
+ &mad_agent_priv->backlog_list);
+ }
+}
+
+static void handle_send_state(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_agent_private *mad_agent_priv)
+{
+ if (mad_send_wr->state == IB_MAD_STATE_INIT) {
+ list_add_tail(&mad_send_wr->agent_list,
+ &mad_agent_priv->send_list);
+ } else {
+ expect_mad_state2(mad_send_wr, IB_MAD_STATE_WAIT_RESP,
+ IB_MAD_STATE_QUEUED);
+ list_move_tail(&mad_send_wr->agent_list,
+ &mad_agent_priv->send_list);
+ }
+
+ if (mad_send_wr->is_solicited_fc) {
+ if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
+ mad_agent_priv->sol_fc_wait_count--;
+ mad_agent_priv->sol_fc_send_count++;
+ }
+}
+
+static void handle_wait_state(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_agent_private *mad_agent_priv)
+{
+ struct ib_mad_send_wr_private *temp_mad_send_wr;
+ struct list_head *list_item;
+ unsigned long delay;
+
+ expect_mad_state3(mad_send_wr, IB_MAD_STATE_SEND_START,
+ IB_MAD_STATE_WAIT_RESP, IB_MAD_STATE_CANCELED);
+ if (mad_send_wr->state == IB_MAD_STATE_SEND_START &&
+ mad_send_wr->is_solicited_fc) {
+ mad_agent_priv->sol_fc_send_count--;
+ mad_agent_priv->sol_fc_wait_count++;
+ }
+
+ list_del_init(&mad_send_wr->agent_list);
+ delay = mad_send_wr->timeout;
+ mad_send_wr->timeout += jiffies;
+
+ if (delay) {
+ list_for_each_prev(list_item,
+ &mad_agent_priv->wait_list) {
+ temp_mad_send_wr = list_entry(
+ list_item,
+ struct ib_mad_send_wr_private,
+ agent_list);
+ if (time_after(mad_send_wr->timeout,
+ temp_mad_send_wr->timeout))
+ break;
+ }
+ } else {
+ list_item = &mad_agent_priv->wait_list;
+ }
+
+ list_add(&mad_send_wr->agent_list, list_item);
+}
+
+static void handle_early_resp_state(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_agent_private *mad_agent_priv)
+{
+ expect_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
+ mad_agent_priv->sol_fc_send_count -= mad_send_wr->is_solicited_fc;
+}
+
+static void handle_canceled_state(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_agent_private *mad_agent_priv)
+{
+ not_expect_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
+ if (mad_send_wr->is_solicited_fc) {
+ if (mad_send_wr->state == IB_MAD_STATE_SEND_START)
+ mad_agent_priv->sol_fc_send_count--;
+ else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
+ mad_agent_priv->sol_fc_wait_count--;
+ }
+}
+
+static void handle_done_state(struct ib_mad_send_wr_private *mad_send_wr,
+ struct ib_mad_agent_private *mad_agent_priv)
+{
+ if (mad_send_wr->is_solicited_fc) {
+ if (mad_send_wr->state == IB_MAD_STATE_SEND_START)
+ mad_agent_priv->sol_fc_send_count--;
+ else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
+ mad_agent_priv->sol_fc_wait_count--;
+ }
+
+ list_del_init(&mad_send_wr->agent_list);
+}
+
+void change_mad_state(struct ib_mad_send_wr_private *mad_send_wr,
+ enum ib_mad_state new_state)
+{
+ struct ib_mad_agent_private *mad_agent_priv =
+ mad_send_wr->mad_agent_priv;
+
+ switch (new_state) {
+ case IB_MAD_STATE_INIT:
+ break;
+ case IB_MAD_STATE_QUEUED:
+ handle_queued_state(mad_send_wr, mad_agent_priv);
+ break;
+ case IB_MAD_STATE_SEND_START:
+ handle_send_state(mad_send_wr, mad_agent_priv);
+ break;
+ case IB_MAD_STATE_WAIT_RESP:
+ handle_wait_state(mad_send_wr, mad_agent_priv);
+ if (mad_send_wr->state == IB_MAD_STATE_CANCELED)
+ return;
+ break;
+ case IB_MAD_STATE_EARLY_RESP:
+ handle_early_resp_state(mad_send_wr, mad_agent_priv);
+ break;
+ case IB_MAD_STATE_CANCELED:
+ handle_canceled_state(mad_send_wr, mad_agent_priv);
+ break;
+ case IB_MAD_STATE_DONE:
+ handle_done_state(mad_send_wr, mad_agent_priv);
+ break;
+ }
+
+ mad_send_wr->state = new_state;
+}
+
+static bool is_solicited_fc_mad(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ struct ib_rmpp_mad *rmpp_mad;
+ u8 mgmt_class;
+
+ if (!mad_send_wr->timeout)
+ return 0;
+
+ rmpp_mad = mad_send_wr->send_buf.mad;
+ if (mad_send_wr->mad_agent_priv->agent.rmpp_version &&
+ (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE))
+ return 0;
+
+ mgmt_class =
+ ((struct ib_mad_hdr *)mad_send_wr->send_buf.mad)->mgmt_class;
+ return mgmt_class == IB_MGMT_CLASS_CM ||
+ mgmt_class == IB_MGMT_CLASS_SUBN_ADM ||
+ mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE;
+}
+
+static bool mad_is_for_backlog(struct ib_mad_send_wr_private *mad_send_wr)
+{
+ struct ib_mad_agent_private *mad_agent_priv =
+ mad_send_wr->mad_agent_priv;
+
+ if (!mad_send_wr->is_solicited_fc || !mad_agent_priv->sol_fc_max)
+ return false;
+
+ if (!list_empty(&mad_agent_priv->backlog_list))
+ return true;
+
+ return mad_agent_priv->sol_fc_send_count +
+ mad_agent_priv->sol_fc_wait_count >=
+ mad_agent_priv->sol_fc_max;
+}
+
/*
* ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
* with the registered client
@@ -1214,15 +1271,17 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
/* Walk list of send WRs and post each on send list */
for (; send_buf; send_buf = next_send_buf) {
-
mad_send_wr = container_of(send_buf,
struct ib_mad_send_wr_private,
send_buf);
mad_agent_priv = mad_send_wr->mad_agent_priv;
- if (!send_buf->mad_agent->send_handler ||
- (send_buf->timeout_ms &&
- !send_buf->mad_agent->recv_handler)) {
+ ret = ib_mad_enforce_security(mad_agent_priv,
+ mad_send_wr->send_wr.pkey_index);
+ if (ret)
+ goto error;
+
+ if (!send_buf->mad_agent->send_handler) {
ret = -EINVAL;
goto error;
}
@@ -1258,15 +1317,19 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
mad_send_wr->max_retries = send_buf->retries;
mad_send_wr->retries_left = send_buf->retries;
send_buf->retries = 0;
- /* Reference for work request to QP + response */
- mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
- mad_send_wr->status = IB_WC_SUCCESS;
+ change_mad_state(mad_send_wr, IB_MAD_STATE_INIT);
/* Reference MAD agent until send completes */
- atomic_inc(&mad_agent_priv->refcount);
+ refcount_inc(&mad_agent_priv->refcount);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
- list_add_tail(&mad_send_wr->agent_list,
- &mad_agent_priv->send_list);
+ mad_send_wr->is_solicited_fc = is_solicited_fc_mad(mad_send_wr);
+ if (mad_is_for_backlog(mad_send_wr)) {
+ change_mad_state(mad_send_wr, IB_MAD_STATE_QUEUED);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ return 0;
+ }
+
+ change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
@@ -1278,9 +1341,9 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
if (ret < 0) {
/* Fail send request */
spin_lock_irqsave(&mad_agent_priv->lock, flags);
- list_del(&mad_send_wr->agent_list);
+ change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
goto error;
}
}
@@ -1320,25 +1383,6 @@ void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
}
EXPORT_SYMBOL(ib_free_recv_mad);
-struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
- u8 rmpp_version,
- ib_mad_send_handler send_handler,
- ib_mad_recv_handler recv_handler,
- void *context)
-{
- return ERR_PTR(-EINVAL); /* XXX: for now */
-}
-EXPORT_SYMBOL(ib_redirect_mad_qp);
-
-int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
- struct ib_wc *wc)
-{
- dev_err(&mad_agent->device->dev,
- "ib_process_mad_wc() not implemented yet\n");
- return 0;
-}
-EXPORT_SYMBOL(ib_process_mad_wc);
-
static int method_in_use(struct ib_mad_mgmt_method_table **method,
struct ib_mad_reg_req *mad_reg_req)
{
@@ -1426,11 +1470,9 @@ static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
int i;
/* Remove any methods for this mad agent */
- for (i = 0; i < IB_MGMT_MAX_METHODS; i++) {
- if (method->agent[i] == agent) {
+ for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
+ if (method->agent[i] == agent)
method->agent[i] = NULL;
- }
- }
}
static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
@@ -1531,7 +1573,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
mad_reg_req->oui, 3)) {
method = &(*vendor_table)->vendor_class[
vclass]->method_table[i];
- BUG_ON(!*method);
+ if (!*method)
+ goto error3;
goto check_in_use;
}
}
@@ -1541,10 +1584,12 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
vclass]->oui[i])) {
method = &(*vendor_table)->vendor_class[
vclass]->method_table[i];
- BUG_ON(*method);
/* Allocate method table for this OUI */
- if ((ret = allocate_method_table(method)))
- goto error3;
+ if (!*method) {
+ ret = allocate_method_table(method);
+ if (ret)
+ goto error3;
+ }
memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
mad_reg_req->oui, 3);
goto check_in_use;
@@ -1602,9 +1647,8 @@ static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
* Was MAD registration request supplied
* with original registration ?
*/
- if (!agent_priv->reg_req) {
+ if (!agent_priv->reg_req)
goto out;
- }
port_priv = agent_priv->qp_info->port_priv;
mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
@@ -1692,22 +1736,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
struct ib_mad_agent_private *mad_agent = NULL;
unsigned long flags;
- spin_lock_irqsave(&port_priv->reg_lock, flags);
if (ib_response_mad(mad_hdr)) {
u32 hi_tid;
- struct ib_mad_agent_private *entry;
/*
* Routing is based on high 32 bits of transaction ID
* of MAD.
*/
hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
- list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
- if (entry->agent.hi_tid == hi_tid) {
- mad_agent = entry;
- break;
- }
- }
+ rcu_read_lock();
+ mad_agent = xa_load(&ib_mad_clients, hi_tid);
+ if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount))
+ mad_agent = NULL;
+ rcu_read_unlock();
} else {
struct ib_mad_mgmt_class_table *class;
struct ib_mad_mgmt_method_table *method;
@@ -1716,6 +1757,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
const struct ib_vendor_mad *vendor_mad;
int index;
+ spin_lock_irqsave(&port_priv->reg_lock, flags);
/*
* Routing is based on version, class, and method
* For "newer" vendor MADs, also based on OUI
@@ -1755,20 +1797,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
~IB_MGMT_METHOD_RESP];
}
}
+ if (mad_agent)
+ refcount_inc(&mad_agent->refcount);
+out:
+ spin_unlock_irqrestore(&port_priv->reg_lock, flags);
}
- if (mad_agent) {
- if (mad_agent->agent.recv_handler)
- atomic_inc(&mad_agent->refcount);
- else {
- dev_notice(&port_priv->device->dev,
- "No receive handler for client %p on port %d\n",
- &mad_agent->agent, port_priv->port_num);
- mad_agent = NULL;
- }
+ if (mad_agent && !mad_agent->agent.recv_handler) {
+ dev_notice(&port_priv->device->dev,
+ "No receive handler for client %p on port %u\n",
+ &mad_agent->agent, port_priv->port_num);
+ deref_mad_agent(mad_agent);
+ mad_agent = NULL;
}
-out:
- spin_unlock_irqrestore(&port_priv->reg_lock, flags);
return mad_agent;
}
@@ -1783,7 +1824,7 @@ static int validate_mad(const struct ib_mad_hdr *mad_hdr,
/* Make sure MAD base version is understood */
if (mad_hdr->base_version != IB_MGMT_BASE_VERSION &&
(!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) {
- pr_err("MAD received with unsupported base version %d %s\n",
+ pr_err("MAD received with unsupported base version %u %s\n",
mad_hdr->base_version, opa ? "(opa)" : "");
goto out;
}
@@ -1828,16 +1869,18 @@ static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr,
rwc->recv_buf.mad->mad_hdr.mgmt_class;
}
-static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv,
- const struct ib_mad_send_wr_private *wr,
- const struct ib_mad_recv_wc *rwc )
+static inline int
+rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv,
+ const struct ib_mad_send_wr_private *wr,
+ const struct ib_mad_recv_wc *rwc)
{
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
u8 send_resp, rcv_resp;
union ib_gid sgid;
struct ib_device *device = mad_agent_priv->agent.device;
- u8 port_num = mad_agent_priv->agent.port_num;
+ u32 port_num = mad_agent_priv->agent.port_num;
u8 lmc;
+ bool has_grh;
send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad);
rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr);
@@ -1846,36 +1889,40 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
/* both requests, or both responses. GIDs different */
return 0;
- if (ib_query_ah(wr->send_buf.ah, &attr))
+ if (rdma_query_ah(wr->send_buf.ah, &attr))
/* Assume not equal, to avoid false positives. */
return 0;
- if (!!(attr.ah_flags & IB_AH_GRH) !=
- !!(rwc->wc->wc_flags & IB_WC_GRH))
+ has_grh = !!(rdma_ah_get_ah_flags(&attr) & IB_AH_GRH);
+ if (has_grh != !!(rwc->wc->wc_flags & IB_WC_GRH))
/* one has GID, other does not. Assume different */
return 0;
if (!send_resp && rcv_resp) {
/* is request/response. */
- if (!(attr.ah_flags & IB_AH_GRH)) {
+ if (!has_grh) {
if (ib_get_cached_lmc(device, port_num, &lmc))
return 0;
- return (!lmc || !((attr.src_path_bits ^
+ return (!lmc || !((rdma_ah_get_path_bits(&attr) ^
rwc->wc->dlid_path_bits) &
((1 << lmc) - 1)));
} else {
- if (ib_get_cached_gid(device, port_num,
- attr.grh.sgid_index, &sgid, NULL))
+ const struct ib_global_route *grh =
+ rdma_ah_read_grh(&attr);
+
+ if (rdma_query_gid(device, port_num,
+ grh->sgid_index, &sgid))
return 0;
return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
16);
}
}
- if (!(attr.ah_flags & IB_AH_GRH))
- return attr.dlid == rwc->wc->slid;
+ if (!has_grh)
+ return rdma_ah_get_dlid(&attr) == rwc->wc->slid;
else
- return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw,
+ return !memcmp(rdma_ah_read_grh(&attr)->dgid.raw,
+ rwc->recv_buf.grh->sgid.raw,
16);
}
@@ -1902,7 +1949,19 @@ ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv,
*/
(is_direct(mad_hdr->mgmt_class) ||
rcv_has_same_gid(mad_agent_priv, wr, wc)))
- return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
+ return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL;
+ }
+
+ list_for_each_entry(wr, &mad_agent_priv->backlog_list, agent_list) {
+ if ((wr->tid == mad_hdr->tid) &&
+ rcv_has_same_class(wr, wc) &&
+ /*
+ * Don't check GID for direct routed MADs.
+ * These might have permissive LIDs.
+ */
+ (is_direct(mad_hdr->mgmt_class) ||
+ rcv_has_same_gid(mad_agent_priv, wr, wc)))
+ return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL;
}
/*
@@ -1921,17 +1980,55 @@ ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv,
(is_direct(mad_hdr->mgmt_class) ||
rcv_has_same_gid(mad_agent_priv, wr, wc)))
/* Verify request has not been canceled */
- return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
+ return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL;
}
return NULL;
}
+static void
+process_backlog_mads(struct ib_mad_agent_private *mad_agent_priv)
+{
+ struct ib_mad_send_wr_private *mad_send_wr;
+ struct ib_mad_send_wc mad_send_wc = {};
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ while (!list_empty(&mad_agent_priv->backlog_list) &&
+ (mad_agent_priv->sol_fc_send_count +
+ mad_agent_priv->sol_fc_wait_count <
+ mad_agent_priv->sol_fc_max)) {
+ mad_send_wr = list_entry(mad_agent_priv->backlog_list.next,
+ struct ib_mad_send_wr_private,
+ agent_list);
+ change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ ret = ib_send_mad(mad_send_wr);
+ if (ret) {
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ deref_mad_agent(mad_agent_priv);
+ change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ mad_send_wc.status = IB_WC_LOC_QP_OP_ERR;
+ mad_agent_priv->agent.send_handler(
+ &mad_agent_priv->agent, &mad_send_wc);
+ }
+
+ spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ }
+
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+}
+
void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
{
mad_send_wr->timeout = 0;
- if (mad_send_wr->refcount == 1)
- list_move_tail(&mad_send_wr->agent_list,
- &mad_send_wr->mad_agent_priv->done_list);
+ if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP ||
+ mad_send_wr->state == IB_MAD_STATE_QUEUED)
+ change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
+ else
+ change_mad_state(mad_send_wr, IB_MAD_STATE_EARLY_RESP);
}
static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
@@ -1940,8 +2037,18 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
struct ib_mad_send_wr_private *mad_send_wr;
struct ib_mad_send_wc mad_send_wc;
unsigned long flags;
+ bool is_mad_done;
+ int ret;
INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
+ ret = ib_mad_enforce_security(mad_agent_priv,
+ mad_recv_wc->wc->pkey_index);
+ if (ret) {
+ ib_free_recv_mad(mad_recv_wc);
+ deref_mad_agent(mad_agent_priv);
+ return;
+ }
+
list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
@@ -1968,16 +2075,18 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
mad_agent_priv->agent.recv_handler(
&mad_agent_priv->agent, NULL,
mad_recv_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
} else {
/* not user rmpp, revert to normal behavior and
- * drop the mad */
+ * drop the mad
+ */
ib_free_recv_mad(mad_recv_wc);
deref_mad_agent(mad_agent_priv);
return;
}
} else {
ib_mark_mad_done(mad_send_wr);
+ is_mad_done = (mad_send_wr->state == IB_MAD_STATE_DONE);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
/* Defined behavior is to complete response before request */
@@ -1985,12 +2094,15 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
&mad_agent_priv->agent,
&mad_send_wr->send_buf,
mad_recv_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
- mad_send_wc.status = IB_WC_SUCCESS;
- mad_send_wc.vendor_err = 0;
- mad_send_wc.send_buf = &mad_send_wr->send_buf;
- ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
+ if (is_mad_done) {
+ mad_send_wc.status = IB_WC_SUCCESS;
+ mad_send_wc.vendor_err = 0;
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ ib_mad_complete_send_wr(mad_send_wr,
+ &mad_send_wc);
+ }
}
} else {
mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
@@ -2002,13 +2114,15 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv,
const struct ib_mad_qp_info *qp_info,
const struct ib_wc *wc,
- int port_num,
+ u32 port_num,
struct ib_mad_private *recv,
struct ib_mad_private *response)
{
enum smi_forward_action retsmi;
struct ib_smp *smp = (struct ib_smp *)recv->mad;
+ trace_ib_mad_handle_ib_smi(smp);
+
if (smi_handle_dr_smp_recv(smp,
rdma_cap_ib_switch(port_priv->device),
port_num,
@@ -2087,13 +2201,15 @@ static enum smi_action
handle_opa_smi(struct ib_mad_port_private *port_priv,
struct ib_mad_qp_info *qp_info,
struct ib_wc *wc,
- int port_num,
+ u32 port_num,
struct ib_mad_private *recv,
struct ib_mad_private *response)
{
enum smi_forward_action retsmi;
struct opa_smp *smp = (struct opa_smp *)recv->mad;
+ trace_ib_mad_handle_opa_smi(smp);
+
if (opa_smi_handle_dr_smp_recv(smp,
rdma_cap_ib_switch(port_priv->device),
port_num,
@@ -2141,7 +2257,7 @@ static enum smi_action
handle_smi(struct ib_mad_port_private *port_priv,
struct ib_mad_qp_info *qp_info,
struct ib_wc *wc,
- int port_num,
+ u32 port_num,
struct ib_mad_private *recv,
struct ib_mad_private *response,
bool opa)
@@ -2165,7 +2281,7 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
struct ib_mad_private_header *mad_priv_hdr;
struct ib_mad_private *recv, *response = NULL;
struct ib_mad_agent_private *mad_agent;
- int port_num;
+ u32 port_num;
int ret = IB_MAD_RESULT_SUCCESS;
size_t mad_size;
u16 resp_mad_pkey_index = 0;
@@ -2211,13 +2327,13 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad;
recv->header.recv_wc.recv_buf.grh = &recv->grh;
- if (atomic_read(&qp_info->snoop_count))
- snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS);
-
/* Validate MAD */
if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa))
goto out;
+ trace_ib_mad_recv_done_handler(qp_info, wc,
+ (struct ib_mad_hdr *)recv->mad);
+
mad_size = recv->mad_size;
response = alloc_mad_private(mad_size, GFP_KERNEL);
if (!response)
@@ -2237,14 +2353,12 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
}
/* Give driver "right of first refusal" on incoming MAD */
- if (port_priv->device->process_mad) {
- ret = port_priv->device->process_mad(port_priv->device, 0,
- port_priv->port_num,
- wc, &recv->grh,
- (const struct ib_mad_hdr *)recv->mad,
- recv->mad_size,
- (struct ib_mad_hdr *)response->mad,
- &mad_size, &resp_mad_pkey_index);
+ if (port_priv->device->ops.process_mad) {
+ ret = port_priv->device->ops.process_mad(
+ port_priv->device, 0, port_priv->port_num, wc,
+ &recv->grh, (const struct ib_mad *)recv->mad,
+ (struct ib_mad *)response->mad, &mad_size,
+ &resp_mad_pkey_index);
if (opa)
wc->pkey_index = resp_mad_pkey_index;
@@ -2266,6 +2380,7 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad);
if (mad_agent) {
+ trace_ib_mad_recv_done_agent(mad_agent);
ib_mad_complete_recv(mad_agent, &recv->header.recv_wc);
/*
* recv is freed up in error cases in ib_mad_complete_recv
@@ -2315,29 +2430,11 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_mad_agent_private *mad_agent_priv;
- struct ib_mad_send_wr_private *temp_mad_send_wr;
- struct list_head *list_item;
unsigned long delay;
mad_agent_priv = mad_send_wr->mad_agent_priv;
- list_del(&mad_send_wr->agent_list);
-
delay = mad_send_wr->timeout;
- mad_send_wr->timeout += jiffies;
-
- if (delay) {
- list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
- temp_mad_send_wr = list_entry(list_item,
- struct ib_mad_send_wr_private,
- agent_list);
- if (time_after(mad_send_wr->timeout,
- temp_mad_send_wr->timeout))
- break;
- }
- }
- else
- list_item = &mad_agent_priv->wait_list;
- list_add(&mad_send_wr->agent_list, list_item);
+ change_mad_state(mad_send_wr, IB_MAD_STATE_WAIT_RESP);
/* Reschedule a work item if we have a shorter timeout */
if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
@@ -2346,7 +2443,7 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
}
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
- int timeout_ms)
+ unsigned long timeout_ms)
{
mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
wait_for_response(mad_send_wr);
@@ -2371,32 +2468,28 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
} else
ret = IB_RMPP_RESULT_UNHANDLED;
- if (mad_send_wc->status != IB_WC_SUCCESS &&
- mad_send_wr->status == IB_WC_SUCCESS) {
- mad_send_wr->status = mad_send_wc->status;
- mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
- }
-
- if (--mad_send_wr->refcount > 0) {
- if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
- mad_send_wr->status == IB_WC_SUCCESS) {
- wait_for_response(mad_send_wr);
- }
+ if (mad_send_wr->state == IB_MAD_STATE_CANCELED)
+ mad_send_wc->status = IB_WC_WR_FLUSH_ERR;
+ else if (mad_send_wr->state == IB_MAD_STATE_SEND_START &&
+ mad_send_wr->timeout) {
+ wait_for_response(mad_send_wr);
goto done;
}
/* Remove send from MAD agent and notify client of completion */
- list_del(&mad_send_wr->agent_list);
+ if (mad_send_wr->state != IB_MAD_STATE_DONE)
+ change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
adjust_timeout(mad_agent_priv);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
- if (mad_send_wr->status != IB_WC_SUCCESS )
- mad_send_wc->status = mad_send_wr->status;
- if (ret == IB_RMPP_RESULT_INTERNAL)
+ if (ret == IB_RMPP_RESULT_INTERNAL) {
ib_rmpp_send_handler(mad_send_wc);
- else
+ } else {
+ if (mad_send_wr->is_solicited_fc)
+ process_backlog_mads(mad_agent_priv);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
mad_send_wc);
+ }
/* Release reference on agent taken when sending */
deref_mad_agent(mad_agent_priv);
@@ -2413,7 +2506,6 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr;
struct ib_mad_qp_info *qp_info;
struct ib_mad_queue *send_queue;
- struct ib_send_wr *bad_send_wr;
struct ib_mad_send_wc mad_send_wc;
unsigned long flags;
int ret;
@@ -2431,6 +2523,9 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
send_queue = mad_list->mad_queue;
qp_info = send_queue->qp_info;
+ trace_ib_mad_send_done_agent(mad_send_wr->mad_agent_priv);
+ trace_ib_mad_send_done_handler(mad_send_wr, wc);
+
retry:
ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
mad_send_wr->header_mapping,
@@ -2456,14 +2551,12 @@ retry:
mad_send_wc.send_buf = &mad_send_wr->send_buf;
mad_send_wc.status = wc->status;
mad_send_wc.vendor_err = wc->vendor_err;
- if (atomic_read(&qp_info->snoop_count))
- snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
- IB_MAD_SNOOP_SEND_COMPLETIONS);
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
if (queued_send_wr) {
+ trace_ib_mad_send_done_resend(queued_send_wr, qp_info);
ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
- &bad_send_wr);
+ NULL);
if (ret) {
dev_err(&port_priv->device->dev,
"ib_post_send failed: %d\n", ret);
@@ -2508,11 +2601,10 @@ static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
if (wc->status == IB_WC_WR_FLUSH_ERR) {
if (mad_send_wr->retry) {
/* Repost send */
- struct ib_send_wr *bad_send_wr;
-
mad_send_wr->retry = 0;
+ trace_ib_mad_error_handler(mad_send_wr, qp_info);
ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
- &bad_send_wr);
+ NULL);
if (!ret)
return false;
}
@@ -2539,40 +2631,53 @@ static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
return true;
}
+static void clear_mad_error_list(struct list_head *list,
+ enum ib_wc_status wc_status,
+ struct ib_mad_agent_private *mad_agent_priv)
+{
+ struct ib_mad_send_wr_private *mad_send_wr, *n;
+ struct ib_mad_send_wc mad_send_wc;
+
+ mad_send_wc.status = wc_status;
+ mad_send_wc.vendor_err = 0;
+
+ list_for_each_entry_safe(mad_send_wr, n, list, agent_list) {
+ mad_send_wc.send_buf = &mad_send_wr->send_buf;
+ mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
+ &mad_send_wc);
+ deref_mad_agent(mad_agent_priv);
+ }
+}
+
static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
{
unsigned long flags;
struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
- struct ib_mad_send_wc mad_send_wc;
struct list_head cancel_list;
INIT_LIST_HEAD(&cancel_list);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
- &mad_agent_priv->send_list, agent_list) {
- if (mad_send_wr->status == IB_WC_SUCCESS) {
- mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
- mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
- }
- }
-
- /* Empty wait list to prevent receives from finding a request */
- list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
- spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ &mad_agent_priv->send_list, agent_list)
+ change_mad_state(mad_send_wr, IB_MAD_STATE_CANCELED);
- /* Report all cancelled requests */
- mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
- mad_send_wc.vendor_err = 0;
+ /* Empty wait & backlog list to prevent receives from finding request */
+ list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
+ &mad_agent_priv->wait_list, agent_list) {
+ change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
+ list_add_tail(&mad_send_wr->agent_list, &cancel_list);
+ }
list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
- &cancel_list, agent_list) {
- mad_send_wc.send_buf = &mad_send_wr->send_buf;
- list_del(&mad_send_wr->agent_list);
- mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
- &mad_send_wc);
- atomic_dec(&mad_agent_priv->refcount);
+ &mad_agent_priv->backlog_list, agent_list) {
+ change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
+ list_add_tail(&mad_send_wr->agent_list, &cancel_list);
}
+
+ spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ /* Report all cancelled requests */
+ clear_mad_error_list(&cancel_list, IB_WC_WR_FLUSH_ERR, mad_agent_priv);
}
static struct ib_mad_send_wr_private*
@@ -2594,31 +2699,40 @@ find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
&mad_send_wr->send_buf == send_buf)
return mad_send_wr;
}
+
+ list_for_each_entry(mad_send_wr, &mad_agent_priv->backlog_list,
+ agent_list) {
+ if (&mad_send_wr->send_buf == send_buf)
+ return mad_send_wr;
+ }
+
return NULL;
}
-int ib_modify_mad(struct ib_mad_agent *mad_agent,
- struct ib_mad_send_buf *send_buf, u32 timeout_ms)
+int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
unsigned long flags;
int active;
- mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
- agent);
+ if (!send_buf)
+ return -EINVAL;
+
+ mad_agent_priv = container_of(send_buf->mad_agent,
+ struct ib_mad_agent_private, agent);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
- if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
+ if (!mad_send_wr || mad_send_wr->state == IB_MAD_STATE_CANCELED) {
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
return -EINVAL;
}
- active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
- if (!timeout_ms) {
- mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
- mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
- }
+ active = ((mad_send_wr->state == IB_MAD_STATE_SEND_START) ||
+ (mad_send_wr->state == IB_MAD_STATE_EARLY_RESP) ||
+ (mad_send_wr->state == IB_MAD_STATE_QUEUED && timeout_ms));
+ if (!timeout_ms)
+ change_mad_state(mad_send_wr, IB_MAD_STATE_CANCELED);
mad_send_wr->send_buf.timeout_ms = timeout_ms;
if (active)
@@ -2631,13 +2745,6 @@ int ib_modify_mad(struct ib_mad_agent *mad_agent,
}
EXPORT_SYMBOL(ib_modify_mad);
-void ib_cancel_mad(struct ib_mad_agent *mad_agent,
- struct ib_mad_send_buf *send_buf)
-{
- ib_modify_mad(mad_agent, send_buf, 0);
-}
-EXPORT_SYMBOL(ib_cancel_mad);
-
static void local_completions(struct work_struct *work)
{
struct ib_mad_agent_private *mad_agent_priv;
@@ -2700,16 +2807,12 @@ static void local_completions(struct work_struct *work)
local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
local->mad_priv->header.recv_wc.recv_buf.mad =
(struct ib_mad *)local->mad_priv->mad;
- if (atomic_read(&recv_mad_agent->qp_info->snoop_count))
- snoop_recv(recv_mad_agent->qp_info,
- &local->mad_priv->header.recv_wc,
- IB_MAD_SNOOP_RECVS);
recv_mad_agent->agent.recv_handler(
&recv_mad_agent->agent,
&local->mad_send_wr->send_buf,
&local->mad_priv->header.recv_wc);
spin_lock_irqsave(&recv_mad_agent->lock, flags);
- atomic_dec(&recv_mad_agent->refcount);
+ deref_mad_agent(recv_mad_agent);
spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
}
@@ -2718,15 +2821,11 @@ local_send_completion:
mad_send_wc.status = IB_WC_SUCCESS;
mad_send_wc.vendor_err = 0;
mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
- if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
- snoop_send(mad_agent_priv->qp_info,
- &local->mad_send_wr->send_buf,
- &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
&mad_send_wc);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
- atomic_dec(&mad_agent_priv->refcount);
+ deref_mad_agent(mad_agent_priv);
if (free_mad)
kfree(local->mad_priv);
kfree(local);
@@ -2745,6 +2844,11 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->send_buf.retries++;
mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
+ if (mad_send_wr->is_solicited_fc &&
+ !list_empty(&mad_send_wr->mad_agent_priv->backlog_list)) {
+ change_mad_state(mad_send_wr, IB_MAD_STATE_QUEUED);
+ return 0;
+ }
if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
ret = ib_retry_rmpp(mad_send_wr);
@@ -2762,24 +2866,25 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
} else
ret = ib_send_mad(mad_send_wr);
- if (!ret) {
- mad_send_wr->refcount++;
- list_add_tail(&mad_send_wr->agent_list,
- &mad_send_wr->mad_agent_priv->send_list);
- }
+ if (!ret)
+ change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
+
return ret;
}
static void timeout_sends(struct work_struct *work)
{
- struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
- struct ib_mad_send_wc mad_send_wc;
+ struct ib_mad_agent_private *mad_agent_priv;
+ struct list_head timeout_list;
+ struct list_head cancel_list;
+ struct list_head *list_item;
unsigned long flags, delay;
mad_agent_priv = container_of(work, struct ib_mad_agent_private,
timed_work.work);
- mad_send_wc.vendor_err = 0;
+ INIT_LIST_HEAD(&timeout_list);
+ INIT_LIST_HEAD(&cancel_list);
spin_lock_irqsave(&mad_agent_priv->lock, flags);
while (!list_empty(&mad_agent_priv->wait_list)) {
@@ -2797,25 +2902,22 @@ static void timeout_sends(struct work_struct *work)
break;
}
- list_del(&mad_send_wr->agent_list);
- if (mad_send_wr->status == IB_WC_SUCCESS &&
- !retry_send(mad_send_wr))
- continue;
-
- spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
-
- if (mad_send_wr->status == IB_WC_SUCCESS)
- mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
+ if (mad_send_wr->state == IB_MAD_STATE_CANCELED)
+ list_item = &cancel_list;
+ else if (retry_send(mad_send_wr))
+ list_item = &timeout_list;
else
- mad_send_wc.status = mad_send_wr->status;
- mad_send_wc.send_buf = &mad_send_wr->send_buf;
- mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
- &mad_send_wc);
+ continue;
- atomic_dec(&mad_agent_priv->refcount);
- spin_lock_irqsave(&mad_agent_priv->lock, flags);
+ change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
+ list_add_tail(&mad_send_wr->agent_list, list_item);
}
+
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+ process_backlog_mads(mad_agent_priv);
+ clear_mad_error_list(&timeout_list, IB_WC_RESP_TIMEOUT_ERR,
+ mad_agent_priv);
+ clear_mad_error_list(&cancel_list, IB_WC_WR_FLUSH_ERR, mad_agent_priv);
}
/*
@@ -2825,11 +2927,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
struct ib_mad_private *mad)
{
unsigned long flags;
- int post, ret;
struct ib_mad_private *mad_priv;
struct ib_sge sg_list;
- struct ib_recv_wr recv_wr, *bad_recv_wr;
+ struct ib_recv_wr recv_wr;
struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
+ int ret = 0;
/* Initialize common scatter list fields */
sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey;
@@ -2839,7 +2941,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
recv_wr.sg_list = &sg_list;
recv_wr.num_sge = 1;
- do {
+ while (true) {
/* Allocate and map receive buffer */
if (mad) {
mad_priv = mad;
@@ -2847,10 +2949,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
} else {
mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv),
GFP_ATOMIC);
- if (!mad_priv) {
- ret = -ENOMEM;
- break;
- }
+ if (!mad_priv)
+ return -ENOMEM;
}
sg_list.length = mad_priv_dma_size(mad_priv);
sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
@@ -2860,35 +2960,40 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
sg_list.addr))) {
ret = -ENOMEM;
- break;
+ goto free_mad_priv;
}
mad_priv->header.mapping = sg_list.addr;
mad_priv->header.mad_list.mad_queue = recv_queue;
mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
-
- /* Post receive WR */
spin_lock_irqsave(&recv_queue->lock, flags);
- post = (++recv_queue->count < recv_queue->max_active);
- list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
+ if (recv_queue->count >= recv_queue->max_active) {
+ /* Fully populated the receive queue */
+ spin_unlock_irqrestore(&recv_queue->lock, flags);
+ break;
+ }
+ recv_queue->count++;
+ list_add_tail(&mad_priv->header.mad_list.list,
+ &recv_queue->list);
spin_unlock_irqrestore(&recv_queue->lock, flags);
- ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr);
+
+ ret = ib_post_recv(qp_info->qp, &recv_wr, NULL);
if (ret) {
spin_lock_irqsave(&recv_queue->lock, flags);
list_del(&mad_priv->header.mad_list.list);
recv_queue->count--;
spin_unlock_irqrestore(&recv_queue->lock, flags);
- ib_dma_unmap_single(qp_info->port_priv->device,
- mad_priv->header.mapping,
- mad_priv_dma_size(mad_priv),
- DMA_FROM_DEVICE);
- kfree(mad_priv);
dev_err(&qp_info->port_priv->device->dev,
"ib_post_recv failed: %d\n", ret);
break;
}
- } while (post);
+ }
+ ib_dma_unmap_single(qp_info->port_priv->device,
+ mad_priv->header.mapping,
+ mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE);
+free_mad_priv:
+ kfree(mad_priv);
return ret;
}
@@ -3017,7 +3122,7 @@ static void qp_event_handler(struct ib_event *event, void *qp_context)
/* It's worse than that! He's dead, Jim! */
dev_err(&qp_info->port_priv->device->dev,
- "Fatal error (%d) on MAD QP (%d)\n",
+ "Fatal error (%d) on MAD QP (%u)\n",
event->event, qp_info->qp->qp_num);
}
@@ -3037,10 +3142,6 @@ static void init_mad_qp(struct ib_mad_port_private *port_priv,
init_mad_queue(qp_info, &qp_info->send_queue);
init_mad_queue(qp_info, &qp_info->recv_queue);
INIT_LIST_HEAD(&qp_info->overflow_list);
- spin_lock_init(&qp_info->snoop_lock);
- qp_info->snoop_table = NULL;
- qp_info->snoop_table_size = 0;
- atomic_set(&qp_info->snoop_count, 0);
}
static int create_mad_qp(struct ib_mad_qp_info *qp_info,
@@ -3084,7 +3185,6 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
return;
ib_destroy_qp(qp_info->qp);
- kfree(qp_info->snoop_table);
}
/*
@@ -3092,12 +3192,11 @@ static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
* Create the QP, PD, MR, and CQ if needed
*/
static int ib_mad_port_open(struct ib_device *device,
- int port_num)
+ u32 port_num)
{
int ret, cq_size;
struct ib_mad_port_private *port_priv;
unsigned long flags;
- char name[sizeof "ib_mad123"];
int has_smi;
if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
@@ -3115,7 +3214,6 @@ static int ib_mad_port_open(struct ib_device *device,
port_priv->device = device;
port_priv->port_num = port_num;
spin_lock_init(&port_priv->reg_lock);
- INIT_LIST_HEAD(&port_priv->agent_list);
init_mad_qp(port_priv, &port_priv->qp_info[0]);
init_mad_qp(port_priv, &port_priv->qp_info[1]);
@@ -3124,18 +3222,18 @@ static int ib_mad_port_open(struct ib_device *device,
if (has_smi)
cq_size *= 2;
- port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
- IB_POLL_WORKQUEUE);
- if (IS_ERR(port_priv->cq)) {
- dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
- ret = PTR_ERR(port_priv->cq);
- goto error3;
- }
-
port_priv->pd = ib_alloc_pd(device, 0);
if (IS_ERR(port_priv->pd)) {
dev_err(&device->dev, "Couldn't create ib_mad PD\n");
ret = PTR_ERR(port_priv->pd);
+ goto error3;
+ }
+
+ port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
+ IB_POLL_UNBOUND_WORKQUEUE);
+ if (IS_ERR(port_priv->cq)) {
+ dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
+ ret = PTR_ERR(port_priv->cq);
goto error4;
}
@@ -3144,12 +3242,15 @@ static int ib_mad_port_open(struct ib_device *device,
if (ret)
goto error6;
}
- ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
- if (ret)
- goto error7;
- snprintf(name, sizeof name, "ib_mad%d", port_num);
- port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
+ if (rdma_cap_ib_cm(device, port_num)) {
+ ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
+ if (ret)
+ goto error7;
+ }
+
+ port_priv->wq = alloc_ordered_workqueue("ib_mad%u", WQ_MEM_RECLAIM,
+ port_num);
if (!port_priv->wq) {
ret = -ENOMEM;
goto error8;
@@ -3178,11 +3279,11 @@ error8:
error7:
destroy_mad_qp(&port_priv->qp_info[0]);
error6:
- ib_dealloc_pd(port_priv->pd);
-error4:
ib_free_cq(port_priv->cq);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
+error4:
+ ib_dealloc_pd(port_priv->pd);
error3:
kfree(port_priv);
@@ -3194,7 +3295,7 @@ error3:
* If there are no classes using the port, free the port
* resources (CQ, MR, PD, QP) and remove the port's info structure
*/
-static int ib_mad_port_close(struct ib_device *device, int port_num)
+static int ib_mad_port_close(struct ib_device *device, u32 port_num)
{
struct ib_mad_port_private *port_priv;
unsigned long flags;
@@ -3203,7 +3304,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
port_priv = __ib_get_mad_port(device, port_num);
if (port_priv == NULL) {
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
- dev_err(&device->dev, "Port %d not found\n", port_num);
+ dev_err(&device->dev, "Port %u not found\n", port_num);
return -ENODEV;
}
list_del_init(&port_priv->port_list);
@@ -3212,8 +3313,8 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
destroy_workqueue(port_priv->wq);
destroy_mad_qp(&port_priv->qp_info[1]);
destroy_mad_qp(&port_priv->qp_info[0]);
- ib_dealloc_pd(port_priv->pd);
ib_free_cq(port_priv->cq);
+ ib_dealloc_pd(port_priv->pd);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
/* XXX: Handle deallocation of MAD registration tables */
@@ -3223,9 +3324,11 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
return 0;
}
-static void ib_mad_init_device(struct ib_device *device)
+static int ib_mad_init_device(struct ib_device *device)
{
int start, i;
+ unsigned int count = 0;
+ int ret;
start = rdma_start_port(device);
@@ -3233,17 +3336,23 @@ static void ib_mad_init_device(struct ib_device *device)
if (!rdma_cap_ib_mad(device, i))
continue;
- if (ib_mad_port_open(device, i)) {
+ ret = ib_mad_port_open(device, i);
+ if (ret) {
dev_err(&device->dev, "Couldn't open port %d\n", i);
goto error;
}
- if (ib_agent_port_open(device, i)) {
+ ret = ib_agent_port_open(device, i);
+ if (ret) {
dev_err(&device->dev,
"Couldn't open port %d for agents\n", i);
goto error_agent;
}
+ count++;
}
- return;
+ if (!count)
+ return -EOPNOTSUPP;
+
+ return 0;
error_agent:
if (ib_mad_port_close(device, i))
@@ -3260,21 +3369,22 @@ error:
if (ib_mad_port_close(device, i))
dev_err(&device->dev, "Couldn't close port %d\n", i);
}
+ return ret;
}
static void ib_mad_remove_device(struct ib_device *device, void *client_data)
{
- int i;
+ unsigned int i;
- for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
+ rdma_for_each_port (device, i) {
if (!rdma_cap_ib_mad(device, i))
continue;
if (ib_agent_port_close(device, i))
dev_err(&device->dev,
- "Couldn't close port %d for agents\n", i);
+ "Couldn't close port %u for agents\n", i);
if (ib_mad_port_close(device, i))
- dev_err(&device->dev, "Couldn't close port %d\n", i);
+ dev_err(&device->dev, "Couldn't close port %u\n", i);
}
}
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 28669f6419e1..f444357d33f4 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -73,39 +73,44 @@ struct ib_mad_private_header {
struct ib_mad_recv_wc recv_wc;
struct ib_wc wc;
u64 mapping;
-} __attribute__ ((packed));
+} __packed;
struct ib_mad_private {
struct ib_mad_private_header header;
size_t mad_size;
struct ib_grh grh;
- u8 mad[0];
-} __attribute__ ((packed));
+ u8 mad[];
+} __packed;
struct ib_rmpp_segment {
struct list_head list;
u32 num;
- u8 data[0];
+ u8 data[];
};
struct ib_mad_agent_private {
- struct list_head agent_list;
struct ib_mad_agent agent;
struct ib_mad_reg_req *reg_req;
struct ib_mad_qp_info *qp_info;
spinlock_t lock;
struct list_head send_list;
+ unsigned int sol_fc_send_count;
struct list_head wait_list;
- struct list_head done_list;
+ unsigned int sol_fc_wait_count;
struct delayed_work timed_work;
unsigned long timeout;
struct list_head local_list;
struct work_struct local_work;
struct list_head rmpp_list;
-
- atomic_t refcount;
- struct completion comp;
+ unsigned int sol_fc_max;
+ struct list_head backlog_list;
+
+ refcount_t refcount;
+ union {
+ struct completion comp;
+ struct rcu_head rcu;
+ };
};
struct ib_mad_snoop_private {
@@ -113,10 +118,35 @@ struct ib_mad_snoop_private {
struct ib_mad_qp_info *qp_info;
int snoop_index;
int mad_snoop_flags;
- atomic_t refcount;
struct completion comp;
};
+enum ib_mad_state {
+ /* MAD is in the making and is not yet in any list */
+ IB_MAD_STATE_INIT,
+ /* MAD is in backlog list */
+ IB_MAD_STATE_QUEUED,
+ /*
+ * MAD was sent to the QP and is waiting for completion
+ * notification in send list.
+ */
+ IB_MAD_STATE_SEND_START,
+ /*
+ * MAD send completed successfully, waiting for a response
+ * in wait list.
+ */
+ IB_MAD_STATE_WAIT_RESP,
+ /*
+ * Response came early, before send completion notification,
+ * in send list.
+ */
+ IB_MAD_STATE_EARLY_RESP,
+ /* MAD was canceled while in wait or send list */
+ IB_MAD_STATE_CANCELED,
+ /* MAD processing completed, MAD in no list */
+ IB_MAD_STATE_DONE
+};
+
struct ib_mad_send_wr_private {
struct ib_mad_list_head mad_list;
struct list_head agent_list;
@@ -131,8 +161,6 @@ struct ib_mad_send_wr_private {
int max_retries;
int retries_left;
int retry;
- int refcount;
- enum ib_wc_status status;
/* RMPP control */
struct list_head rmpp_list;
@@ -142,8 +170,48 @@ struct ib_mad_send_wr_private {
int seg_num;
int newwin;
int pad;
+
+ enum ib_mad_state state;
+
+ /* Solicited MAD flow control */
+ bool is_solicited_fc;
};
+static inline void expect_mad_state(struct ib_mad_send_wr_private *mad_send_wr,
+ enum ib_mad_state expected_state)
+{
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ WARN_ON(mad_send_wr->state != expected_state);
+}
+
+static inline void expect_mad_state2(struct ib_mad_send_wr_private *mad_send_wr,
+ enum ib_mad_state expected_state1,
+ enum ib_mad_state expected_state2)
+{
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ WARN_ON(mad_send_wr->state != expected_state1 &&
+ mad_send_wr->state != expected_state2);
+}
+
+static inline void expect_mad_state3(struct ib_mad_send_wr_private *mad_send_wr,
+ enum ib_mad_state expected_state1,
+ enum ib_mad_state expected_state2,
+ enum ib_mad_state expected_state3)
+{
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ WARN_ON(mad_send_wr->state != expected_state1 &&
+ mad_send_wr->state != expected_state2 &&
+ mad_send_wr->state != expected_state3);
+}
+
+static inline void
+not_expect_mad_state(struct ib_mad_send_wr_private *mad_send_wr,
+ enum ib_mad_state wrong_state)
+{
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ WARN_ON(mad_send_wr->state == wrong_state);
+}
+
struct ib_mad_local_private {
struct list_head completion_list;
struct ib_mad_private *mad_priv;
@@ -203,7 +271,6 @@ struct ib_mad_port_private {
spinlock_t reg_lock;
struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
- struct list_head agent_list;
struct workqueue_struct *wq;
struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
};
@@ -220,6 +287,9 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
- int timeout_ms);
+ unsigned long timeout_ms);
+
+void change_mad_state(struct ib_mad_send_wr_private *mad_send_wr,
+ enum ib_mad_state new_state);
#endif /* __IB_MAD_PRIV_H__ */
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 382941b46e43..1c5e0eaf1c94 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -40,8 +40,7 @@
enum rmpp_state {
RMPP_STATE_ACTIVE,
RMPP_STATE_TIMEOUT,
- RMPP_STATE_COMPLETE,
- RMPP_STATE_CANCELING
+ RMPP_STATE_COMPLETE
};
struct mad_rmpp_recv {
@@ -52,7 +51,7 @@ struct mad_rmpp_recv {
struct completion comp;
enum rmpp_state state;
spinlock_t lock;
- atomic_t refcount;
+ refcount_t refcount;
struct ib_ah *ah;
struct ib_mad_recv_wc *rmpp_wc;
@@ -64,7 +63,7 @@ struct mad_rmpp_recv {
__be64 tid;
u32 src_qp;
- u16 slid;
+ u32 slid;
u8 mgmt_class;
u8 class_version;
u8 method;
@@ -73,7 +72,7 @@ struct mad_rmpp_recv {
static inline void deref_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
- if (atomic_dec_and_test(&rmpp_recv->refcount))
+ if (refcount_dec_and_test(&rmpp_recv->refcount))
complete(&rmpp_recv->comp);
}
@@ -81,7 +80,7 @@ static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
deref_rmpp_recv(rmpp_recv);
wait_for_completion(&rmpp_recv->comp);
- ib_destroy_ah(rmpp_recv->ah);
+ rdma_destroy_ah(rmpp_recv->ah, RDMA_DESTROY_AH_SLEEPABLE);
kfree(rmpp_recv);
}
@@ -92,22 +91,18 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
spin_lock_irqsave(&agent->lock, flags);
list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
- if (rmpp_recv->state != RMPP_STATE_COMPLETE)
- ib_free_recv_mad(rmpp_recv->rmpp_wc);
- rmpp_recv->state = RMPP_STATE_CANCELING;
- }
- spin_unlock_irqrestore(&agent->lock, flags);
-
- list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
cancel_delayed_work(&rmpp_recv->timeout_work);
cancel_delayed_work(&rmpp_recv->cleanup_work);
}
+ spin_unlock_irqrestore(&agent->lock, flags);
flush_workqueue(agent->qp_info->port_priv->wq);
list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv,
&agent->rmpp_list, list) {
list_del(&rmpp_recv->list);
+ if (rmpp_recv->state != RMPP_STATE_COMPLETE)
+ ib_free_recv_mad(rmpp_recv->rmpp_wc);
destroy_rmpp_recv(rmpp_recv);
}
}
@@ -163,7 +158,7 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
recv_wc->recv_buf.grh, agent->port_num);
if (IS_ERR(ah))
- return (void *) ah;
+ return ERR_CAST(ah);
hdr_len = ib_get_mad_data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class);
msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
@@ -171,7 +166,7 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
hdr_len, 0, GFP_KERNEL,
IB_MGMT_BASE_VERSION);
if (IS_ERR(msg))
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
else {
msg->ah = ah;
msg->context[0] = ah;
@@ -201,7 +196,7 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent,
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- ib_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(msg);
}
}
@@ -209,7 +204,8 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent,
void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah)
- ib_destroy_ah(mad_send_wc->send_buf->ah);
+ rdma_destroy_ah(mad_send_wc->send_buf->ah,
+ RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -237,7 +233,7 @@ static void nack_recv(struct ib_mad_agent_private *agent,
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- ib_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(msg);
}
}
@@ -271,10 +267,6 @@ static void recv_cleanup_handler(struct work_struct *work)
unsigned long flags;
spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
- if (rmpp_recv->state == RMPP_STATE_CANCELING) {
- spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
- return;
- }
list_del(&rmpp_recv->list);
spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
destroy_rmpp_recv(rmpp_recv);
@@ -304,7 +296,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent,
INIT_DELAYED_WORK(&rmpp_recv->cleanup_work, recv_cleanup_handler);
spin_lock_init(&rmpp_recv->lock);
rmpp_recv->state = RMPP_STATE_ACTIVE;
- atomic_set(&rmpp_recv->refcount, 1);
+ refcount_set(&rmpp_recv->refcount, 1);
rmpp_recv->rmpp_wc = mad_recv_wc;
rmpp_recv->cur_seg_buf = &mad_recv_wc->recv_buf;
@@ -356,7 +348,7 @@ acquire_rmpp_recv(struct ib_mad_agent_private *agent,
spin_lock_irqsave(&agent->lock, flags);
rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
if (rmpp_recv)
- atomic_inc(&rmpp_recv->refcount);
+ refcount_inc(&rmpp_recv->refcount);
spin_unlock_irqrestore(&agent->lock, flags);
return rmpp_recv;
}
@@ -390,8 +382,8 @@ static inline int get_seg_num(struct ib_mad_recv_buf *seg)
return be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num);
}
-static inline struct ib_mad_recv_buf * get_next_seg(struct list_head *rmpp_list,
- struct ib_mad_recv_buf *seg)
+static inline struct ib_mad_recv_buf *get_next_seg(struct list_head *rmpp_list,
+ struct ib_mad_recv_buf *seg)
{
if (seg->list.next == rmpp_list)
return NULL;
@@ -404,8 +396,8 @@ static inline int window_size(struct ib_mad_agent_private *agent)
return max(agent->qp_info->recv_queue.max_active >> 3, 1);
}
-static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
- int seg_num)
+static struct ib_mad_recv_buf *find_seg_location(struct list_head *rmpp_list,
+ int seg_num)
{
struct ib_mad_recv_buf *seg_buf;
int cur_seg_num;
@@ -457,7 +449,7 @@ static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv)
return hdr_size + rmpp_recv->seg_num * data_size - pad;
}
-static struct ib_mad_recv_wc * complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
+static struct ib_mad_recv_wc *complete_rmpp(struct mad_rmpp_recv *rmpp_recv)
{
struct ib_mad_recv_wc *rmpp_wc;
@@ -552,7 +544,7 @@ start_rmpp(struct ib_mad_agent_private *agent,
destroy_rmpp_recv(rmpp_recv);
return continue_rmpp(agent, mad_recv_wc);
}
- atomic_inc(&rmpp_recv->refcount);
+ refcount_inc(&rmpp_recv->refcount);
if (get_last_flag(&mad_recv_wc->recv_buf)) {
rmpp_recv->state = RMPP_STATE_COMPLETE;
@@ -616,16 +608,20 @@ static void abort_send(struct ib_mad_agent_private *agent,
goto out; /* Unmatched send */
if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
- (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
+ (!mad_send_wr->timeout) ||
+ (mad_send_wr->state == IB_MAD_STATE_CANCELED))
goto out; /* Send is already done */
ib_mark_mad_done(mad_send_wr);
+ if (mad_send_wr->state == IB_MAD_STATE_DONE) {
+ spin_unlock_irqrestore(&agent->lock, flags);
+ wc.status = IB_WC_REM_ABORT_ERR;
+ wc.vendor_err = rmpp_status;
+ wc.send_buf = &mad_send_wr->send_buf;
+ ib_mad_complete_send_wr(mad_send_wr, &wc);
+ return;
+ }
spin_unlock_irqrestore(&agent->lock, flags);
-
- wc.status = IB_WC_REM_ABORT_ERR;
- wc.vendor_err = rmpp_status;
- wc.send_buf = &mad_send_wr->send_buf;
- ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
out:
spin_unlock_irqrestore(&agent->lock, flags);
@@ -692,7 +688,8 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
}
if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
- (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
+ (!mad_send_wr->timeout) ||
+ (mad_send_wr->state == IB_MAD_STATE_CANCELED))
goto out; /* Send is already done */
if (seg_num > mad_send_wr->send_buf.seg_count ||
@@ -717,21 +714,24 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
struct ib_mad_send_wc wc;
ib_mark_mad_done(mad_send_wr);
+ if (mad_send_wr->state == IB_MAD_STATE_DONE) {
+ spin_unlock_irqrestore(&agent->lock, flags);
+ wc.status = IB_WC_SUCCESS;
+ wc.vendor_err = 0;
+ wc.send_buf = &mad_send_wr->send_buf;
+ ib_mad_complete_send_wr(mad_send_wr, &wc);
+ return;
+ }
spin_unlock_irqrestore(&agent->lock, flags);
-
- wc.status = IB_WC_SUCCESS;
- wc.vendor_err = 0;
- wc.send_buf = &mad_send_wr->send_buf;
- ib_mad_complete_send_wr(mad_send_wr, &wc);
return;
}
- if (mad_send_wr->refcount == 1)
+ if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
ib_reset_mad_timeout(mad_send_wr,
mad_send_wr->send_buf.timeout_ms);
spin_unlock_irqrestore(&agent->lock, flags);
ack_ds_ack(agent, mad_recv_wc);
return;
- } else if (mad_send_wr->refcount == 1 &&
+ } else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP &&
mad_send_wr->seg_num < mad_send_wr->newwin &&
mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
/* Send failure will just result in a timeout/retry */
@@ -739,7 +739,7 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
if (ret)
goto out;
- mad_send_wr->refcount++;
+ change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
list_move_tail(&mad_send_wr->agent_list,
&mad_send_wr->mad_agent_priv->send_list);
}
@@ -852,7 +852,7 @@ static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
struct mad_rmpp_recv *rmpp_recv;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
int newwin = 1;
@@ -867,10 +867,10 @@ static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
(rmpp_recv->method & IB_MGMT_METHOD_RESP))
continue;
- if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
+ if (rdma_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
continue;
- if (rmpp_recv->slid == ah_attr.dlid) {
+ if (rmpp_recv->slid == rdma_ah_get_dlid(&ah_attr)) {
newwin = rmpp_recv->repwin;
break;
}
@@ -898,7 +898,6 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
mad_send_wr->newwin = init_newwin(mad_send_wr);
/* We need to wait for the final ACK even if there isn't a response */
- mad_send_wr->refcount += (mad_send_wr->timeout == 0);
ret = send_next_seg(mad_send_wr);
if (!ret)
return IB_RMPP_RESULT_CONSUMED;
@@ -920,7 +919,7 @@ int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
return IB_RMPP_RESULT_INTERNAL; /* ACK, STOP, or ABORT */
if (mad_send_wc->status != IB_WC_SUCCESS ||
- mad_send_wr->status != IB_WC_SUCCESS)
+ mad_send_wr->state == IB_MAD_STATE_CANCELED)
return IB_RMPP_RESULT_PROCESSED; /* Canceled or send error */
if (!mad_send_wr->timeout)
diff --git a/drivers/infiniband/core/mr_pool.c b/drivers/infiniband/core/mr_pool.c
index 49d478b2ea94..c0e2df128b34 100644
--- a/drivers/infiniband/core/mr_pool.c
+++ b/drivers/infiniband/core/mr_pool.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#include <rdma/ib_verbs.h>
#include <rdma/mr_pool.h>
@@ -42,14 +34,18 @@ void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr)
EXPORT_SYMBOL(ib_mr_pool_put);
int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr,
- enum ib_mr_type type, u32 max_num_sg)
+ enum ib_mr_type type, u32 max_num_sg, u32 max_num_meta_sg)
{
struct ib_mr *mr;
unsigned long flags;
int ret, i;
for (i = 0; i < nr; i++) {
- mr = ib_alloc_mr(qp->pd, type, max_num_sg);
+ if (type == IB_MR_TYPE_INTEGRITY)
+ mr = ib_alloc_mr_integrity(qp->pd, max_num_sg,
+ max_num_meta_sg);
+ else
+ mr = ib_alloc_mr(qp->pd, type, max_num_sg);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto out;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 322cb67b07a9..a236532a9026 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -42,7 +42,7 @@
#include <rdma/ib_cache.h>
#include "sa.h"
-static void mcast_add_one(struct ib_device *device);
+static int mcast_add_one(struct ib_device *device);
static void mcast_remove_one(struct ib_device *device, void *client_data);
static struct ib_client mcast_client = {
@@ -61,9 +61,9 @@ struct mcast_port {
struct mcast_device *dev;
spinlock_t lock;
struct rb_root table;
- atomic_t refcount;
+ refcount_t refcount;
struct completion comp;
- u8 port_num;
+ u32 port_num;
};
struct mcast_device {
@@ -71,7 +71,7 @@ struct mcast_device {
struct ib_event_handler event_handler;
int start_port;
int end_port;
- struct mcast_port port[0];
+ struct mcast_port port[];
};
enum mcast_state {
@@ -117,7 +117,7 @@ struct mcast_member {
struct mcast_group *group;
struct list_head list;
enum mcast_state state;
- atomic_t refcount;
+ refcount_t refcount;
struct completion comp;
};
@@ -178,7 +178,7 @@ static struct mcast_group *mcast_insert(struct mcast_port *port,
static void deref_port(struct mcast_port *port)
{
- if (atomic_dec_and_test(&port->refcount))
+ if (refcount_dec_and_test(&port->refcount))
complete(&port->comp);
}
@@ -199,7 +199,7 @@ static void release_group(struct mcast_group *group)
static void deref_member(struct mcast_member *member)
{
- if (atomic_dec_and_test(&member->refcount))
+ if (refcount_dec_and_test(&member->refcount))
complete(&member->comp);
}
@@ -401,7 +401,7 @@ static void process_group_error(struct mcast_group *group)
while (!list_empty(&group->active_list)) {
member = list_entry(group->active_list.next,
struct mcast_member, list);
- atomic_inc(&member->refcount);
+ refcount_inc(&member->refcount);
list_del_init(&member->list);
adjust_membership(group, member->multicast.rec.join_state, -1);
member->state = MCAST_ERROR;
@@ -445,7 +445,7 @@ retest:
struct mcast_member, list);
multicast = &member->multicast;
join_state = multicast->rec.join_state;
- atomic_inc(&member->refcount);
+ refcount_inc(&member->refcount);
if (join_state == (group->rec.join_state & join_state)) {
status = cmp_rec(&group->rec, &multicast->rec,
@@ -497,7 +497,7 @@ static void process_join_error(struct mcast_group *group, int status)
member = list_entry(group->pending_list.next,
struct mcast_member, list);
if (group->last_join == member) {
- atomic_inc(&member->refcount);
+ refcount_inc(&member->refcount);
list_del_init(&member->list);
spin_unlock_irq(&group->lock);
ret = member->multicast.callback(status, &member->multicast);
@@ -589,7 +589,7 @@ static struct mcast_group *acquire_group(struct mcast_port *port,
kfree(group);
group = cur_group;
} else
- atomic_inc(&port->refcount);
+ refcount_inc(&port->refcount);
found:
atomic_inc(&group->refcount);
spin_unlock_irqrestore(&port->lock, flags);
@@ -605,7 +605,7 @@ found:
*/
struct ib_sa_multicast *
ib_sa_join_multicast(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
+ struct ib_device *device, u32 port_num,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
int (*callback)(int status,
@@ -632,7 +632,7 @@ ib_sa_join_multicast(struct ib_sa_client *client,
member->multicast.callback = callback;
member->multicast.context = context;
init_completion(&member->comp);
- atomic_set(&member->refcount, 1);
+ refcount_set(&member->refcount, 1);
member->state = MCAST_JOINING;
member->group = acquire_group(&dev->port[port_num - dev->start_port],
@@ -690,7 +690,7 @@ void ib_sa_free_multicast(struct ib_sa_multicast *multicast)
}
EXPORT_SYMBOL(ib_sa_free_multicast);
-int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
+int ib_sa_get_mcmember_rec(struct ib_device *device, u32 port_num,
union ib_gid *mgid, struct ib_sa_mcmember_rec *rec)
{
struct mcast_device *dev;
@@ -716,46 +716,54 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
}
EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
-int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+/**
+ * ib_init_ah_from_mcmember - Initialize AH attribute from multicast
+ * member record and gid of the device.
+ * @device: RDMA device
+ * @port_num: Port of the rdma device to consider
+ * @rec: Multicast member record to use
+ * @ndev: Optional netdevice, applicable only for RoCE
+ * @gid_type: GID type to consider
+ * @ah_attr: AH attribute to fillup on successful completion
+ *
+ * ib_init_ah_from_mcmember() initializes AH attribute based on multicast
+ * member record and other device properties. On success the caller is
+ * responsible to call rdma_destroy_ah_attr on the ah_attr. Returns 0 on
+ * success or appropriate error code.
+ *
+ */
+int ib_init_ah_from_mcmember(struct ib_device *device, u32 port_num,
struct ib_sa_mcmember_rec *rec,
struct net_device *ndev,
enum ib_gid_type gid_type,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
- int ret;
- u16 gid_index;
- u8 p;
-
- if (rdma_protocol_roce(device, port_num)) {
- ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
- gid_type, port_num,
- ndev,
- &gid_index);
- } else if (rdma_protocol_ib(device, port_num)) {
- ret = ib_find_cached_gid(device, &rec->port_gid,
- IB_GID_TYPE_IB, NULL, &p,
- &gid_index);
- } else {
- ret = -EINVAL;
- }
-
- if (ret)
- return ret;
-
- memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->dlid = be16_to_cpu(rec->mlid);
- ah_attr->sl = rec->sl;
- ah_attr->port_num = port_num;
- ah_attr->static_rate = rec->rate;
+ const struct ib_gid_attr *sgid_attr;
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = rec->mgid;
-
- ah_attr->grh.sgid_index = (u8) gid_index;
- ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
- ah_attr->grh.hop_limit = rec->hop_limit;
- ah_attr->grh.traffic_class = rec->traffic_class;
+ /* GID table is not based on the netdevice for IB link layer,
+ * so ignore ndev during search.
+ */
+ if (rdma_protocol_ib(device, port_num))
+ ndev = NULL;
+ else if (!rdma_protocol_roce(device, port_num))
+ return -EINVAL;
+ sgid_attr = rdma_find_gid_by_port(device, &rec->port_gid,
+ gid_type, port_num, ndev);
+ if (IS_ERR(sgid_attr))
+ return PTR_ERR(sgid_attr);
+
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ ah_attr->type = rdma_ah_find_type(device, port_num);
+
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid));
+ rdma_ah_set_sl(ah_attr, rec->sl);
+ rdma_ah_set_port_num(ah_attr, port_num);
+ rdma_ah_set_static_rate(ah_attr, rec->rate);
+ rdma_move_grh_sgid_attr(ah_attr, &rec->mgid,
+ be32_to_cpu(rec->flow_label),
+ rec->hop_limit, rec->traffic_class,
+ sgid_attr);
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_mcmember);
@@ -797,7 +805,6 @@ static void mcast_event_handler(struct ib_event_handler *handler,
switch (event->event) {
case IB_EVENT_PORT_ERR:
case IB_EVENT_LID_CHANGE:
- case IB_EVENT_SM_CHANGE:
case IB_EVENT_CLIENT_REREGISTER:
mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR);
break;
@@ -809,17 +816,17 @@ static void mcast_event_handler(struct ib_event_handler *handler,
}
}
-static void mcast_add_one(struct ib_device *device)
+static int mcast_add_one(struct ib_device *device)
{
struct mcast_device *dev;
struct mcast_port *port;
int i;
int count = 0;
- dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
+ dev = kmalloc(struct_size(dev, port, device->phys_port_cnt),
GFP_KERNEL);
if (!dev)
- return;
+ return -ENOMEM;
dev->start_port = rdma_start_port(device);
dev->end_port = rdma_end_port(device);
@@ -833,13 +840,13 @@ static void mcast_add_one(struct ib_device *device)
spin_lock_init(&port->lock);
port->table = RB_ROOT;
init_completion(&port->comp);
- atomic_set(&port->refcount, 1);
+ refcount_set(&port->refcount, 1);
++count;
}
if (!count) {
kfree(dev);
- return;
+ return -EOPNOTSUPP;
}
dev->device = device;
@@ -847,6 +854,7 @@ static void mcast_add_one(struct ib_device *device)
INIT_IB_EVENT_HANDLER(&dev->event_handler, device, mcast_event_handler);
ib_register_event_handler(&dev->event_handler);
+ return 0;
}
static void mcast_remove_one(struct ib_device *device, void *client_data)
@@ -855,9 +863,6 @@ static void mcast_remove_one(struct ib_device *device, void *client_data)
struct mcast_port *port;
int i;
- if (!dev)
- return;
-
ib_unregister_event_handler(&dev->event_handler);
flush_workqueue(mcast_wq);
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 10469b0088b5..def14c54b648 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2017 Mellanox Technologies Inc. All rights reserved.
* Copyright (c) 2010 Voltaire Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -35,240 +36,297 @@
#include <linux/export.h>
#include <net/netlink.h>
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/sock.h>
#include <rdma/rdma_netlink.h>
+#include <linux/module.h>
+#include "core_priv.h"
-struct ibnl_client {
- struct list_head list;
- int index;
- int nops;
- const struct ibnl_client_cbs *cb_table;
-};
-
-static DEFINE_MUTEX(ibnl_mutex);
-static struct sock *nls;
-static LIST_HEAD(client_list);
+static struct {
+ const struct rdma_nl_cbs *cb_table;
+ /* Synchronizes between ongoing netlink commands and netlink client
+ * unregistration.
+ */
+ struct rw_semaphore sem;
+} rdma_nl_types[RDMA_NL_NUM_CLIENTS];
-int ibnl_chk_listeners(unsigned int group)
+bool rdma_nl_chk_listeners(unsigned int group)
{
- if (netlink_has_listeners(nls, group) == 0)
- return -1;
- return 0;
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(&init_net);
+
+ return netlink_has_listeners(rnet->nl_sock, group);
}
-EXPORT_SYMBOL(ibnl_chk_listeners);
+EXPORT_SYMBOL(rdma_nl_chk_listeners);
-int ibnl_add_client(int index, int nops,
- const struct ibnl_client_cbs cb_table[])
+static bool is_nl_msg_valid(unsigned int type, unsigned int op)
{
- struct ibnl_client *cur;
- struct ibnl_client *nl_client;
+ static const unsigned int max_num_ops[RDMA_NL_NUM_CLIENTS] = {
+ [RDMA_NL_IWCM] = RDMA_NL_IWPM_NUM_OPS,
+ [RDMA_NL_LS] = RDMA_NL_LS_NUM_OPS,
+ [RDMA_NL_NLDEV] = RDMA_NLDEV_NUM_OPS,
+ };
- nl_client = kmalloc(sizeof *nl_client, GFP_KERNEL);
- if (!nl_client)
- return -ENOMEM;
+ /*
+ * This BUILD_BUG_ON is intended to catch addition of new
+ * RDMA netlink protocol without updating the array above.
+ */
+ BUILD_BUG_ON(RDMA_NL_NUM_CLIENTS != 6);
- nl_client->index = index;
- nl_client->nops = nops;
- nl_client->cb_table = cb_table;
+ if (type >= RDMA_NL_NUM_CLIENTS)
+ return false;
- mutex_lock(&ibnl_mutex);
+ return op < max_num_ops[type];
+}
- list_for_each_entry(cur, &client_list, list) {
- if (cur->index == index) {
- pr_warn("Client for %d already exists\n", index);
- mutex_unlock(&ibnl_mutex);
- kfree(nl_client);
- return -EINVAL;
- }
- }
+static const struct rdma_nl_cbs *
+get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op)
+{
+ const struct rdma_nl_cbs *cb_table;
+
+ /*
+ * Currently only NLDEV client is supporting netlink commands in
+ * non init_net net namespace.
+ */
+ if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV)
+ return NULL;
- list_add_tail(&nl_client->list, &client_list);
+ cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
+ if (!cb_table) {
+ /*
+ * Didn't get valid reference of the table, attempt module
+ * load once.
+ */
+ up_read(&rdma_nl_types[type].sem);
- mutex_unlock(&ibnl_mutex);
+ request_module("rdma-netlink-subsys-%u", type);
- return 0;
+ down_read(&rdma_nl_types[type].sem);
+ cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
+ }
+ if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
+ return NULL;
+ return cb_table;
}
-EXPORT_SYMBOL(ibnl_add_client);
-int ibnl_remove_client(int index)
+void rdma_nl_register(unsigned int index,
+ const struct rdma_nl_cbs cb_table[])
{
- struct ibnl_client *cur, *next;
-
- mutex_lock(&ibnl_mutex);
- list_for_each_entry_safe(cur, next, &client_list, list) {
- if (cur->index == index) {
- list_del(&(cur->list));
- mutex_unlock(&ibnl_mutex);
- kfree(cur);
- return 0;
- }
- }
- pr_warn("Can't remove callback for client idx %d. Not found\n", index);
- mutex_unlock(&ibnl_mutex);
+ if (WARN_ON(!is_nl_msg_valid(index, 0)) ||
+ WARN_ON(READ_ONCE(rdma_nl_types[index].cb_table)))
+ return;
+
+ /* Pairs with the READ_ONCE in is_nl_valid() */
+ smp_store_release(&rdma_nl_types[index].cb_table, cb_table);
+}
+EXPORT_SYMBOL(rdma_nl_register);
- return -EINVAL;
+void rdma_nl_unregister(unsigned int index)
+{
+ down_write(&rdma_nl_types[index].sem);
+ rdma_nl_types[index].cb_table = NULL;
+ up_write(&rdma_nl_types[index].sem);
}
-EXPORT_SYMBOL(ibnl_remove_client);
+EXPORT_SYMBOL(rdma_nl_unregister);
void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
int len, int client, int op, int flags)
{
- unsigned char *prev_tail;
-
- prev_tail = skb_tail_pointer(skb);
- *nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op),
- len, flags);
+ *nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), len, flags);
if (!*nlh)
- goto out_nlmsg_trim;
- (*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail;
+ return NULL;
return nlmsg_data(*nlh);
-
-out_nlmsg_trim:
- nlmsg_trim(skb, prev_tail);
- return NULL;
}
EXPORT_SYMBOL(ibnl_put_msg);
int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
int len, void *data, int type)
{
- unsigned char *prev_tail;
-
- prev_tail = skb_tail_pointer(skb);
- if (nla_put(skb, type, len, data))
- goto nla_put_failure;
- nlh->nlmsg_len += skb_tail_pointer(skb) - prev_tail;
+ if (nla_put(skb, type, len, data)) {
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+ }
return 0;
-
-nla_put_failure:
- nlmsg_trim(skb, prev_tail - nlh->nlmsg_len);
- return -EMSGSIZE;
}
EXPORT_SYMBOL(ibnl_put_attr);
-static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
- struct ibnl_client *client;
int type = nlh->nlmsg_type;
- int index = RDMA_NL_GET_CLIENT(type);
+ unsigned int index = RDMA_NL_GET_CLIENT(type);
unsigned int op = RDMA_NL_GET_OP(type);
+ const struct rdma_nl_cbs *cb_table;
+ int err = -EINVAL;
+
+ if (!is_nl_msg_valid(index, op))
+ return -EINVAL;
+
+ down_read(&rdma_nl_types[index].sem);
+ cb_table = get_cb_table(skb, index, op);
+ if (!cb_table)
+ goto done;
- list_for_each_entry(client, &client_list, list) {
- if (client->index == index) {
- if (op >= client->nops || !client->cb_table[op].dump)
- return -EINVAL;
-
- /*
- * For response or local service set_timeout request,
- * there is no need to use netlink_dump_start.
- */
- if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
- (index == RDMA_NL_LS &&
- op == RDMA_NL_LS_OP_SET_TIMEOUT)) {
- struct netlink_callback cb = {
- .skb = skb,
- .nlh = nlh,
- .dump = client->cb_table[op].dump,
- .module = client->cb_table[op].module,
- };
-
- return cb.dump(skb, &cb);
- }
-
- {
- struct netlink_dump_control c = {
- .dump = client->cb_table[op].dump,
- .module = client->cb_table[op].module,
- };
- return netlink_dump_start(nls, skb, nlh, &c);
- }
- }
+ if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) &&
+ !netlink_capable(skb, CAP_NET_ADMIN)) {
+ err = -EPERM;
+ goto done;
+ }
+
+ /*
+ * LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't
+ * mistakenly call the .dump() function.
+ */
+ if (index == RDMA_NL_LS) {
+ if (cb_table[op].doit)
+ err = cb_table[op].doit(skb, nlh, extack);
+ goto done;
+ }
+ /* FIXME: Convert IWCM to properly handle doit callbacks */
+ if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) {
+ struct netlink_dump_control c = {
+ .dump = cb_table[op].dump,
+ };
+ if (c.dump)
+ err = netlink_dump_start(skb->sk, skb, nlh, &c);
+ goto done;
}
- pr_info("Index %d wasn't found in client list\n", index);
- return -EINVAL;
+ if (cb_table[op].doit)
+ err = cb_table[op].doit(skb, nlh, extack);
+done:
+ up_read(&rdma_nl_types[index].sem);
+ return err;
}
-static void ibnl_rcv_reply_skb(struct sk_buff *skb)
+/*
+ * This function is similar to netlink_rcv_skb with one exception:
+ * It calls to the callback for the netlink messages without NLM_F_REQUEST
+ * flag. These messages are intended for RDMA_NL_LS consumer, so it is allowed
+ * for that consumer only.
+ */
+static int rdma_nl_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
+ struct nlmsghdr *,
+ struct netlink_ext_ack *))
{
+ struct netlink_ext_ack extack = {};
struct nlmsghdr *nlh;
- int msglen;
+ int err;
- /*
- * Process responses until there is no more message or the first
- * request. Generally speaking, it is not recommended to mix responses
- * with requests.
- */
while (skb->len >= nlmsg_total_size(0)) {
+ int msglen;
+
nlh = nlmsg_hdr(skb);
+ err = 0;
if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
- return;
-
- /* Handle response only */
- if (nlh->nlmsg_flags & NLM_F_REQUEST)
- return;
-
- ibnl_rcv_msg(skb, nlh);
+ return 0;
+ /*
+ * Generally speaking, the only requests are handled
+ * by the kernel, but RDMA_NL_LS is different, because it
+ * runs backward netlink scheme. Kernel initiates messages
+ * and waits for reply with data to keep pathrecord cache
+ * in sync.
+ */
+ if (!(nlh->nlmsg_flags & NLM_F_REQUEST) &&
+ (RDMA_NL_GET_CLIENT(nlh->nlmsg_type) != RDMA_NL_LS))
+ goto ack;
+
+ /* Skip control messages */
+ if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
+ goto ack;
+
+ err = cb(skb, nlh, &extack);
+ if (err == -EINTR)
+ goto skip;
+
+ack:
+ if (nlh->nlmsg_flags & NLM_F_ACK || err)
+ netlink_ack(skb, nlh, err, &extack);
+
+skip:
msglen = NLMSG_ALIGN(nlh->nlmsg_len);
if (msglen > skb->len)
msglen = skb->len;
skb_pull(skb, msglen);
}
+
+ return 0;
}
-static void ibnl_rcv(struct sk_buff *skb)
+static void rdma_nl_rcv(struct sk_buff *skb)
{
- mutex_lock(&ibnl_mutex);
- ibnl_rcv_reply_skb(skb);
- netlink_rcv_skb(skb, &ibnl_rcv_msg);
- mutex_unlock(&ibnl_mutex);
+ rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg);
+}
+
+int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid)
+{
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
+ int err;
+
+ err = netlink_unicast(rnet->nl_sock, skb, pid, MSG_DONTWAIT);
+ return (err < 0) ? err : 0;
}
+EXPORT_SYMBOL(rdma_nl_unicast);
-int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh,
- __u32 pid)
+int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid)
{
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
int err;
- err = netlink_unicast(nls, skb, pid, 0);
+ err = netlink_unicast(rnet->nl_sock, skb, pid, 0);
return (err < 0) ? err : 0;
}
-EXPORT_SYMBOL(ibnl_unicast);
+EXPORT_SYMBOL(rdma_nl_unicast_wait);
+
+int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
+ unsigned int group, gfp_t flags)
+{
+ struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
+
+ return nlmsg_multicast(rnet->nl_sock, skb, 0, group, flags);
+}
+EXPORT_SYMBOL(rdma_nl_multicast);
+
+void rdma_nl_init(void)
+{
+ int idx;
+
+ for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
+ init_rwsem(&rdma_nl_types[idx].sem);
+}
-int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh,
- unsigned int group, gfp_t flags)
+void rdma_nl_exit(void)
{
- return nlmsg_multicast(nls, skb, 0, group, flags);
+ int idx;
+
+ for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
+ WARN(rdma_nl_types[idx].cb_table,
+ "Netlink client %d wasn't released prior to unloading %s\n",
+ idx, KBUILD_MODNAME);
}
-EXPORT_SYMBOL(ibnl_multicast);
-int __init ibnl_init(void)
+int rdma_nl_net_init(struct rdma_dev_net *rnet)
{
+ struct net *net = read_pnet(&rnet->net);
struct netlink_kernel_cfg cfg = {
- .input = ibnl_rcv,
+ .input = rdma_nl_rcv,
+ .flags = NL_CFG_F_NONROOT_RECV,
};
+ struct sock *nls;
- nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg);
- if (!nls) {
- pr_warn("Failed to create netlink socket\n");
+ nls = netlink_kernel_create(net, NETLINK_RDMA, &cfg);
+ if (!nls)
return -ENOMEM;
- }
nls->sk_sndtimeo = 10 * HZ;
+ rnet->nl_sock = nls;
return 0;
}
-void ibnl_cleanup(void)
+void rdma_nl_net_exit(struct rdma_dev_net *rnet)
{
- struct ibnl_client *cur, *next;
-
- mutex_lock(&ibnl_mutex);
- list_for_each_entry_safe(cur, next, &client_list, list) {
- list_del(&(cur->list));
- kfree(cur);
- }
- mutex_unlock(&ibnl_mutex);
-
- netlink_kernel_release(nls);
+ netlink_kernel_release(rnet->nl_sock);
}
+
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_RDMA);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
new file mode 100644
index 000000000000..2220a2dfab24
--- /dev/null
+++ b/drivers/infiniband/core/nldev.c
@@ -0,0 +1,2920 @@
+/*
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/pid.h>
+#include <linux/pid_namespace.h>
+#include <linux/mutex.h>
+#include <net/netlink.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/rdma_netlink.h>
+
+#include "core_priv.h"
+#include "cma_priv.h"
+#include "restrack.h"
+#include "uverbs.h"
+
+/*
+ * This determines whether a non-privileged user is allowed to specify a
+ * controlled QKEY or not, when true non-privileged user is allowed to specify
+ * a controlled QKEY.
+ */
+static bool privileged_qkey;
+
+typedef int (*res_fill_func_t)(struct sk_buff*, bool,
+ struct rdma_restrack_entry*, uint32_t);
+
+/*
+ * Sort array elements by the netlink attribute name
+ */
+static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
+ [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_CHARDEV_TYPE] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
+ [RDMA_NLDEV_ATTR_DEV_DIM] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = IB_DEVICE_NAME_MAX },
+ [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 },
+ [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 },
+ [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING,
+ .len = IFNAMSIZ },
+ [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = IFNAMSIZ },
+ [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CTX] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_CTX_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_DST_ADDR] = {
+ .len = sizeof(struct __kernel_sockaddr_storage) },
+ [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_PDN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY },
+ [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = {
+ .len = sizeof(struct __kernel_sockaddr_storage) },
+ [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_RES_SUBTYPE] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_EMPTY_STRING },
+ [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_RES_SRQ] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_RES_SRQN] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_RES_SRQ_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_MIN_RANGE] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_MAX_RANGE] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_MODE] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_RES] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_COUNTER] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_STAT_COUNTER_ID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
+ [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 },
+ [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 },
+ [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 },
+ [RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_DRIVER_DETAILS] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_DEV_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING },
+ [RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 },
+};
+
+static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
+ enum rdma_nldev_print_type print_type)
+{
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
+ return -EMSGSIZE;
+ if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
+ nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
+ enum rdma_nldev_print_type print_type,
+ u32 value)
+{
+ if (put_driver_name_print_type(msg, name, print_type))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
+ enum rdma_nldev_print_type print_type,
+ u64 value)
+{
+ if (put_driver_name_print_type(msg, name, print_type))
+ return -EMSGSIZE;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
+ RDMA_NLDEV_ATTR_PAD))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
+ const char *str)
+{
+ if (put_driver_name_print_type(msg, name,
+ RDMA_NLDEV_PRINT_TYPE_UNSPEC))
+ return -EMSGSIZE;
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
+ return -EMSGSIZE;
+
+ return 0;
+}
+EXPORT_SYMBOL(rdma_nl_put_driver_string);
+
+int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
+{
+ return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
+ value);
+}
+EXPORT_SYMBOL(rdma_nl_put_driver_u32);
+
+int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
+ u32 value)
+{
+ return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
+ value);
+}
+EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
+
+int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
+{
+ return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
+ value);
+}
+EXPORT_SYMBOL(rdma_nl_put_driver_u64);
+
+int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
+{
+ return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
+ value);
+}
+EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
+
+bool rdma_nl_get_privileged_qkey(void)
+{
+ return privileged_qkey;
+}
+EXPORT_SYMBOL(rdma_nl_get_privileged_qkey);
+
+static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
+{
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
+ return -EMSGSIZE;
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
+ dev_name(&device->dev)))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
+{
+ char fw[IB_FW_VERSION_NAME_MAX];
+ int ret = 0;
+ u32 port;
+
+ if (fill_nldev_handle(msg, device))
+ return -EMSGSIZE;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
+ return -EMSGSIZE;
+
+ BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
+ device->attrs.device_cap_flags,
+ RDMA_NLDEV_ATTR_PAD))
+ return -EMSGSIZE;
+
+ ib_get_device_fw_str(device, fw);
+ /* Device without FW has strlen(fw) = 0 */
+ if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
+ be64_to_cpu(device->node_guid),
+ RDMA_NLDEV_ATTR_PAD))
+ return -EMSGSIZE;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
+ be64_to_cpu(device->attrs.sys_image_guid),
+ RDMA_NLDEV_ATTR_PAD))
+ return -EMSGSIZE;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
+ return -EMSGSIZE;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
+ return -EMSGSIZE;
+
+ if (device->type &&
+ nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_TYPE, device->type))
+ return -EMSGSIZE;
+
+ if (device->parent &&
+ nla_put_string(msg, RDMA_NLDEV_ATTR_PARENT_NAME,
+ dev_name(&device->parent->dev)))
+ return -EMSGSIZE;
+
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE,
+ device->name_assign_type))
+ return -EMSGSIZE;
+
+ /*
+ * Link type is determined on first port and mlx4 device
+ * which can potentially have two different link type for the same
+ * IB device is considered as better to be avoided in the future,
+ */
+ port = rdma_start_port(device);
+ if (rdma_cap_opa_mad(device, port))
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
+ else if (rdma_protocol_ib(device, port))
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
+ else if (rdma_protocol_iwarp(device, port))
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
+ else if (rdma_protocol_roce(device, port))
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
+ else if (rdma_protocol_usnic(device, port))
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
+ "usnic");
+ return ret;
+}
+
+static int fill_port_info(struct sk_buff *msg,
+ struct ib_device *device, u32 port,
+ const struct net *net)
+{
+ struct net_device *netdev = NULL;
+ struct ib_port_attr attr;
+ int ret;
+ u64 cap_flags = 0;
+
+ if (fill_nldev_handle(msg, device))
+ return -EMSGSIZE;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
+ return -EMSGSIZE;
+
+ ret = ib_query_port(device, port, &attr);
+ if (ret)
+ return ret;
+
+ if (rdma_protocol_ib(device, port)) {
+ BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
+ sizeof(attr.port_cap_flags2)) > sizeof(u64));
+ cap_flags = attr.port_cap_flags |
+ ((u64)attr.port_cap_flags2 << 32);
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
+ cap_flags, RDMA_NLDEV_ATTR_PAD))
+ return -EMSGSIZE;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
+ attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
+ return -EMSGSIZE;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
+ return -EMSGSIZE;
+ }
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
+ return -EMSGSIZE;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
+ return -EMSGSIZE;
+
+ netdev = ib_device_get_netdev(device, port);
+ if (netdev && net_eq(dev_net(netdev), net)) {
+ ret = nla_put_u32(msg,
+ RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
+ if (ret)
+ goto out;
+ ret = nla_put_string(msg,
+ RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
+ }
+
+out:
+ dev_put(netdev);
+ return ret;
+}
+
+static int fill_res_info_entry(struct sk_buff *msg,
+ const char *name, u64 curr)
+{
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start_noflag(msg,
+ RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
+ goto err;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
+ RDMA_NLDEV_ATTR_PAD))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_info(struct sk_buff *msg, struct ib_device *device,
+ bool show_details)
+{
+ static const char * const names[RDMA_RESTRACK_MAX] = {
+ [RDMA_RESTRACK_PD] = "pd",
+ [RDMA_RESTRACK_CQ] = "cq",
+ [RDMA_RESTRACK_QP] = "qp",
+ [RDMA_RESTRACK_CM_ID] = "cm_id",
+ [RDMA_RESTRACK_MR] = "mr",
+ [RDMA_RESTRACK_CTX] = "ctx",
+ [RDMA_RESTRACK_SRQ] = "srq",
+ };
+
+ struct nlattr *table_attr;
+ int ret, i, curr;
+
+ if (fill_nldev_handle(msg, device))
+ return -EMSGSIZE;
+
+ table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
+ if (!names[i])
+ continue;
+ curr = rdma_restrack_count(device, i, show_details);
+ ret = fill_res_info_entry(msg, names[i], curr);
+ if (ret)
+ goto err;
+ }
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return ret;
+}
+
+static int fill_res_name_pid(struct sk_buff *msg,
+ struct rdma_restrack_entry *res)
+{
+ int err = 0;
+
+ /*
+ * For user resources, user is should read /proc/PID/comm to get the
+ * name of the task file.
+ */
+ if (rdma_is_kernel_res(res)) {
+ err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
+ res->kern_name);
+ } else {
+ pid_t pid;
+
+ pid = task_pid_vnr(res->task);
+ /*
+ * Task is dead and in zombie state.
+ * There is no need to print PID anymore.
+ */
+ if (pid)
+ /*
+ * This part is racy, task can be killed and PID will
+ * be zero right here but it is ok, next query won't
+ * return PID. We don't promise real-time reflection
+ * of SW objects.
+ */
+ err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
+ }
+
+ return err ? -EMSGSIZE : 0;
+}
+
+static int fill_res_qp_entry_query(struct sk_buff *msg,
+ struct rdma_restrack_entry *res,
+ struct ib_device *dev,
+ struct ib_qp *qp)
+{
+ struct ib_qp_init_attr qp_init_attr;
+ struct ib_qp_attr qp_attr;
+ int ret;
+
+ ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
+ if (ret)
+ return ret;
+
+ if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
+ qp_attr.dest_qp_num))
+ goto err;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
+ qp_attr.rq_psn))
+ goto err;
+ }
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
+ goto err;
+
+ if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
+ qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
+ qp_attr.path_mig_state))
+ goto err;
+ }
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
+ goto err;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
+ goto err;
+
+ if (dev->ops.fill_res_qp_entry)
+ return dev->ops.fill_res_qp_entry(msg, qp);
+ return 0;
+
+err: return -EMSGSIZE;
+}
+
+static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_qp *qp = container_of(res, struct ib_qp, res);
+ struct ib_device *dev = qp->device;
+ int ret;
+
+ if (port && port != qp->port)
+ return -EAGAIN;
+
+ /* In create_qp() port is not set yet */
+ if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
+ return -EMSGSIZE;
+
+ ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
+ if (ret)
+ return -EMSGSIZE;
+
+ if (!rdma_is_kernel_res(res) &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
+ return -EMSGSIZE;
+
+ ret = fill_res_name_pid(msg, res);
+ if (ret)
+ return -EMSGSIZE;
+
+ return fill_res_qp_entry_query(msg, res, dev, qp);
+}
+
+static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_qp *qp = container_of(res, struct ib_qp, res);
+ struct ib_device *dev = qp->device;
+
+ if (port && port != qp->port)
+ return -EAGAIN;
+ if (!dev->ops.fill_res_qp_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_qp_entry_raw(msg, qp);
+}
+
+static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct rdma_id_private *id_priv =
+ container_of(res, struct rdma_id_private, res);
+ struct ib_device *dev = id_priv->id.device;
+ struct rdma_cm_id *cm_id = &id_priv->id;
+
+ if (port && port != cm_id->port_num)
+ return -EAGAIN;
+
+ if (cm_id->port_num &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
+ goto err;
+
+ if (id_priv->qp_num) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
+ goto err;
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
+ goto err;
+ }
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
+ goto err;
+
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
+ goto err;
+
+ if (cm_id->route.addr.src_addr.ss_family &&
+ nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
+ sizeof(cm_id->route.addr.src_addr),
+ &cm_id->route.addr.src_addr))
+ goto err;
+ if (cm_id->route.addr.dst_addr.ss_family &&
+ nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
+ sizeof(cm_id->route.addr.dst_addr),
+ &cm_id->route.addr.dst_addr))
+ goto err;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
+ goto err;
+
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
+ if (dev->ops.fill_res_cm_id_entry)
+ return dev->ops.fill_res_cm_id_entry(msg, cm_id);
+ return 0;
+
+err: return -EMSGSIZE;
+}
+
+static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_cq *cq = container_of(res, struct ib_cq, res);
+ struct ib_device *dev = cq->device;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
+ return -EMSGSIZE;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
+ atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
+ return -EMSGSIZE;
+
+ /* Poll context is only valid for kernel CQs */
+ if (rdma_is_kernel_res(res) &&
+ nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
+ return -EMSGSIZE;
+
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
+ return -EMSGSIZE;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
+ return -EMSGSIZE;
+ if (!rdma_is_kernel_res(res) &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
+ cq->uobject->uevent.uobject.context->res.id))
+ return -EMSGSIZE;
+
+ if (fill_res_name_pid(msg, res))
+ return -EMSGSIZE;
+
+ return (dev->ops.fill_res_cq_entry) ?
+ dev->ops.fill_res_cq_entry(msg, cq) : 0;
+}
+
+static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_cq *cq = container_of(res, struct ib_cq, res);
+ struct ib_device *dev = cq->device;
+
+ if (!dev->ops.fill_res_cq_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_cq_entry_raw(msg, cq);
+}
+
+static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_mr *mr = container_of(res, struct ib_mr, res);
+ struct ib_device *dev = mr->pd->device;
+
+ if (has_cap_net_admin) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
+ return -EMSGSIZE;
+ }
+
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
+ RDMA_NLDEV_ATTR_PAD))
+ return -EMSGSIZE;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
+ return -EMSGSIZE;
+
+ if (!rdma_is_kernel_res(res) &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
+ return -EMSGSIZE;
+
+ if (fill_res_name_pid(msg, res))
+ return -EMSGSIZE;
+
+ return (dev->ops.fill_res_mr_entry) ?
+ dev->ops.fill_res_mr_entry(msg, mr) :
+ 0;
+}
+
+static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_mr *mr = container_of(res, struct ib_mr, res);
+ struct ib_device *dev = mr->pd->device;
+
+ if (!dev->ops.fill_res_mr_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_mr_entry_raw(msg, mr);
+}
+
+static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_pd *pd = container_of(res, struct ib_pd, res);
+
+ if (has_cap_net_admin) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
+ pd->local_dma_lkey))
+ goto err;
+ if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
+ pd->unsafe_global_rkey))
+ goto err;
+ }
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
+ atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
+ goto err;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
+ goto err;
+
+ if (!rdma_is_kernel_res(res) &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
+ pd->uobject->context->res.id))
+ goto err;
+
+ return fill_res_name_pid(msg, res);
+
+err: return -EMSGSIZE;
+}
+
+static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res);
+
+ if (rdma_is_kernel_res(res))
+ return 0;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id))
+ return -EMSGSIZE;
+
+ return fill_res_name_pid(msg, res);
+}
+
+static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range,
+ uint32_t max_range)
+{
+ struct nlattr *entry_attr;
+
+ if (!min_range)
+ return 0;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (min_range == max_range) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range))
+ goto err;
+ } else {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range))
+ goto err;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range))
+ goto err;
+ }
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq)
+{
+ uint32_t min_range = 0, prev = 0;
+ struct rdma_restrack_entry *res;
+ struct rdma_restrack_root *rt;
+ struct nlattr *table_attr;
+ struct ib_qp *qp = NULL;
+ unsigned long id = 0;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ rt = &srq->device->res[RDMA_RESTRACK_QP];
+ xa_lock(&rt->xa);
+ xa_for_each(&rt->xa, id, res) {
+ if (!rdma_restrack_get(res))
+ continue;
+
+ qp = container_of(res, struct ib_qp, res);
+ if (!qp->srq || (qp->srq->res.id != srq->res.id)) {
+ rdma_restrack_put(res);
+ continue;
+ }
+
+ if (qp->qp_num < prev)
+ /* qp_num should be ascending */
+ goto err_loop;
+
+ if (min_range == 0) {
+ min_range = qp->qp_num;
+ } else if (qp->qp_num > (prev + 1)) {
+ if (fill_res_range_qp_entry(msg, min_range, prev))
+ goto err_loop;
+
+ min_range = qp->qp_num;
+ }
+ prev = qp->qp_num;
+ rdma_restrack_put(res);
+ }
+
+ xa_unlock(&rt->xa);
+
+ if (fill_res_range_qp_entry(msg, min_range, prev))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err_loop:
+ rdma_restrack_put(res);
+ xa_unlock(&rt->xa);
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_srq *srq = container_of(res, struct ib_srq, res);
+ struct ib_device *dev = srq->device;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id))
+ goto err;
+
+ if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type))
+ goto err;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id))
+ goto err;
+
+ if (ib_srq_has_cq(srq->srq_type)) {
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN,
+ srq->ext.cq->res.id))
+ goto err;
+ }
+
+ if (fill_res_srq_qps(msg, srq))
+ goto err;
+
+ if (fill_res_name_pid(msg, res))
+ goto err;
+
+ if (dev->ops.fill_res_srq_entry)
+ return dev->ops.fill_res_srq_entry(msg, srq);
+
+ return 0;
+
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_res_srq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_srq *srq = container_of(res, struct ib_srq, res);
+ struct ib_device *dev = srq->device;
+
+ if (!dev->ops.fill_res_srq_entry_raw)
+ return -EINVAL;
+ return dev->ops.fill_res_srq_entry_raw(msg, srq);
+}
+
+static int fill_stat_counter_mode(struct sk_buff *msg,
+ struct rdma_counter *counter)
+{
+ struct rdma_counter_mode *m = &counter->mode;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
+ return -EMSGSIZE;
+
+ if (m->mode == RDMA_COUNTER_MODE_AUTO) {
+ if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
+ nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
+ return -EMSGSIZE;
+
+ if ((m->mask & RDMA_COUNTER_MASK_PID) &&
+ fill_res_name_pid(msg, &counter->res))
+ return -EMSGSIZE;
+ }
+
+ return 0;
+}
+
+static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
+{
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_stat_counter_qps(struct sk_buff *msg,
+ struct rdma_counter *counter)
+{
+ struct rdma_restrack_entry *res;
+ struct rdma_restrack_root *rt;
+ struct nlattr *table_attr;
+ struct ib_qp *qp = NULL;
+ unsigned long id = 0;
+ int ret = 0;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ rt = &counter->device->res[RDMA_RESTRACK_QP];
+ xa_lock(&rt->xa);
+ xa_for_each(&rt->xa, id, res) {
+ qp = container_of(res, struct ib_qp, res);
+ if (!qp->counter || (qp->counter->id != counter->id))
+ continue;
+
+ ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
+ if (ret)
+ goto err;
+ }
+
+ xa_unlock(&rt->xa);
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ xa_unlock(&rt->xa);
+ nla_nest_cancel(msg, table_attr);
+ return ret;
+}
+
+int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
+ u64 value)
+{
+ struct nlattr *entry_attr;
+
+ entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
+ if (!entry_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
+ name))
+ goto err;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
+ value, RDMA_NLDEV_ATTR_PAD))
+ goto err;
+
+ nla_nest_end(msg, entry_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, entry_attr);
+ return -EMSGSIZE;
+}
+EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
+
+static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res, uint32_t port)
+{
+ struct ib_mr *mr = container_of(res, struct ib_mr, res);
+ struct ib_device *dev = mr->pd->device;
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
+ goto err;
+
+ if (dev->ops.fill_stat_mr_entry)
+ return dev->ops.fill_stat_mr_entry(msg, mr);
+ return 0;
+
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_stat_counter_hwcounters(struct sk_buff *msg,
+ struct rdma_counter *counter)
+{
+ struct rdma_hw_stats *st = counter->stats;
+ struct nlattr *table_attr;
+ int i;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ mutex_lock(&st->lock);
+ for (i = 0; i < st->num_counters; i++) {
+ if (test_bit(i, st->is_disabled))
+ continue;
+ if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
+ st->value[i]))
+ goto err;
+ }
+ mutex_unlock(&st->lock);
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ mutex_unlock(&st->lock);
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
+ struct rdma_restrack_entry *res,
+ uint32_t port)
+{
+ struct rdma_counter *counter =
+ container_of(res, struct rdma_counter, res);
+
+ if (port && port != counter->port)
+ return -EAGAIN;
+
+ /* Dump it even query failed */
+ rdma_counter_query_stats(counter);
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
+ fill_stat_counter_mode(msg, counter) ||
+ fill_stat_counter_qps(msg, counter) ||
+ fill_stat_counter_hwcounters(msg, counter))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index;
+ int err;
+
+ err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, extack);
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
+ 0, 0);
+ if (!nlh) {
+ err = -EMSGSIZE;
+ goto err_free;
+ }
+
+ err = fill_dev_info(msg, device);
+ if (err)
+ goto err_free;
+
+ nlmsg_end(msg, nlh);
+
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_free:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return err;
+}
+
+static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ u32 index;
+ int err;
+
+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
+ char name[IB_DEVICE_NAME_MAX] = {};
+
+ nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
+ IB_DEVICE_NAME_MAX);
+ if (strlen(name) == 0) {
+ err = -EINVAL;
+ goto done;
+ }
+ err = ib_device_rename(device, name);
+ goto done;
+ }
+
+ if (tb[RDMA_NLDEV_NET_NS_FD]) {
+ u32 ns_fd;
+
+ ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
+ err = ib_device_set_netns_put(skb, device, ns_fd);
+ goto put_done;
+ }
+
+ if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
+ u8 use_dim;
+
+ use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
+ err = ib_device_set_dim(device, use_dim);
+ goto done;
+ }
+
+done:
+ ib_device_put(device);
+put_done:
+ return err;
+}
+
+static int _nldev_get_dumpit(struct ib_device *device,
+ struct sk_buff *skb,
+ struct netlink_callback *cb,
+ unsigned int idx)
+{
+ int start = cb->args[0];
+ struct nlmsghdr *nlh;
+
+ if (idx < start)
+ return 0;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
+ 0, NLM_F_MULTI);
+
+ if (!nlh || fill_dev_info(skb, device)) {
+ nlmsg_cancel(skb, nlh);
+ goto out;
+ }
+
+ nlmsg_end(skb, nlh);
+
+ idx++;
+
+out: cb->args[0] = idx;
+ return skb->len;
+}
+
+static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ /*
+ * There is no need to take lock, because
+ * we are relying on ib_core's locking.
+ */
+ return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
+}
+
+static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index;
+ u32 port;
+ int err;
+
+ err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, extack);
+ if (err ||
+ !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
+ 0, 0);
+ if (!nlh) {
+ err = -EMSGSIZE;
+ goto err_free;
+ }
+
+ err = fill_port_info(msg, device, port, sock_net(skb->sk));
+ if (err)
+ goto err_free;
+
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_free:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return err;
+}
+
+static int nldev_port_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ int start = cb->args[0];
+ struct nlmsghdr *nlh;
+ u32 idx = 0;
+ u32 ifindex;
+ int err;
+ unsigned int p;
+
+ err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, NULL);
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
+ if (!device)
+ return -EINVAL;
+
+ rdma_for_each_port (device, p) {
+ /*
+ * The dumpit function returns all information from specific
+ * index. This specific index is taken from the netlink
+ * messages request sent by user and it is available
+ * in cb->args[0].
+ *
+ * Usually, the user doesn't fill this field and it causes
+ * to return everything.
+ *
+ */
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_PORT_GET),
+ 0, NLM_F_MULTI);
+
+ if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) {
+ nlmsg_cancel(skb, nlh);
+ goto out;
+ }
+ idx++;
+ nlmsg_end(skb, nlh);
+ }
+
+out:
+ ib_device_put(device);
+ cb->args[0] = idx;
+ return skb->len;
+}
+
+static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ bool show_details = false;
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index;
+ int ret;
+
+ ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
+ show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
+ 0, 0);
+ if (!nlh) {
+ ret = -EMSGSIZE;
+ goto err_free;
+ }
+
+ ret = fill_res_info(msg, device, show_details);
+ if (ret)
+ goto err_free;
+
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_free:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
+static int _nldev_res_get_dumpit(struct ib_device *device,
+ struct sk_buff *skb,
+ struct netlink_callback *cb,
+ unsigned int idx)
+{
+ int start = cb->args[0];
+ struct nlmsghdr *nlh;
+
+ if (idx < start)
+ return 0;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
+ 0, NLM_F_MULTI);
+
+ if (!nlh || fill_res_info(skb, device, false)) {
+ nlmsg_cancel(skb, nlh);
+ goto out;
+ }
+ nlmsg_end(skb, nlh);
+
+ idx++;
+
+out:
+ cb->args[0] = idx;
+ return skb->len;
+}
+
+static int nldev_res_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
+}
+
+struct nldev_fill_res_entry {
+ enum rdma_nldev_attr nldev_attr;
+ u8 flags;
+ u32 entry;
+ u32 id;
+};
+
+enum nldev_res_flags {
+ NLDEV_PER_DEV = 1 << 0,
+};
+
+static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
+ [RDMA_RESTRACK_QP] = {
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
+ .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
+ .id = RDMA_NLDEV_ATTR_RES_LQPN,
+ },
+ [RDMA_RESTRACK_CM_ID] = {
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
+ .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
+ .id = RDMA_NLDEV_ATTR_RES_CM_IDN,
+ },
+ [RDMA_RESTRACK_CQ] = {
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
+ .flags = NLDEV_PER_DEV,
+ .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
+ .id = RDMA_NLDEV_ATTR_RES_CQN,
+ },
+ [RDMA_RESTRACK_MR] = {
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
+ .flags = NLDEV_PER_DEV,
+ .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
+ .id = RDMA_NLDEV_ATTR_RES_MRN,
+ },
+ [RDMA_RESTRACK_PD] = {
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
+ .flags = NLDEV_PER_DEV,
+ .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
+ .id = RDMA_NLDEV_ATTR_RES_PDN,
+ },
+ [RDMA_RESTRACK_COUNTER] = {
+ .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
+ .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
+ .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
+ },
+ [RDMA_RESTRACK_CTX] = {
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_CTX,
+ .flags = NLDEV_PER_DEV,
+ .entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY,
+ .id = RDMA_NLDEV_ATTR_RES_CTXN,
+ },
+ [RDMA_RESTRACK_SRQ] = {
+ .nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ,
+ .flags = NLDEV_PER_DEV,
+ .entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY,
+ .id = RDMA_NLDEV_ATTR_RES_SRQN,
+ },
+
+};
+
+static noinline_for_stack int
+res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ enum rdma_restrack_type res_type,
+ res_fill_func_t fill_func)
+{
+ const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct rdma_restrack_entry *res;
+ struct ib_device *device;
+ u32 index, id, port = 0;
+ bool has_cap_net_admin;
+ struct sk_buff *msg;
+ int ret;
+
+ ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+ }
+
+ if ((port && fe->flags & NLDEV_PER_DEV) ||
+ (!port && ~fe->flags & NLDEV_PER_DEV)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ id = nla_get_u32(tb[fe->id]);
+ res = rdma_restrack_get_byid(device, res_type, id);
+ if (IS_ERR(res)) {
+ ret = PTR_ERR(res);
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err_get;
+ }
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NL_GET_OP(nlh->nlmsg_type)),
+ 0, 0);
+
+ if (!nlh || fill_nldev_handle(msg, device)) {
+ ret = -EMSGSIZE;
+ goto err_free;
+ }
+
+ has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
+
+ ret = fill_func(msg, has_cap_net_admin, res, port);
+ if (ret)
+ goto err_free;
+
+ rdma_restrack_put(res);
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_free:
+ nlmsg_free(msg);
+err_get:
+ rdma_restrack_put(res);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
+static int res_get_common_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ enum rdma_restrack_type res_type,
+ res_fill_func_t fill_func)
+{
+ const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct rdma_restrack_entry *res;
+ struct rdma_restrack_root *rt;
+ int err, ret = 0, idx = 0;
+ bool show_details = false;
+ struct nlattr *table_attr;
+ struct nlattr *entry_attr;
+ struct ib_device *device;
+ int start = cb->args[0];
+ bool has_cap_net_admin;
+ struct nlmsghdr *nlh;
+ unsigned long id;
+ u32 index, port = 0;
+ bool filled = false;
+
+ err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, NULL);
+ /*
+ * Right now, we are expecting the device index to get res information,
+ * but it is possible to extend this code to return all devices in
+ * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
+ * if it doesn't exist, we will iterate over all devices.
+ *
+ * But it is not needed for now.
+ */
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
+ show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
+
+ /*
+ * If no PORT_INDEX is supplied, we will return all QPs from that device
+ */
+ if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err_index;
+ }
+ }
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
+ 0, NLM_F_MULTI);
+
+ if (!nlh || fill_nldev_handle(skb, device)) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
+ if (!table_attr) {
+ ret = -EMSGSIZE;
+ goto err;
+ }
+
+ has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
+
+ rt = &device->res[res_type];
+ xa_lock(&rt->xa);
+ /*
+ * FIXME: if the skip ahead is something common this loop should
+ * use xas_for_each & xas_pause to optimize, we can have a lot of
+ * objects.
+ */
+ xa_for_each(&rt->xa, id, res) {
+ if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details)
+ goto next;
+
+ if (idx < start || !rdma_restrack_get(res))
+ goto next;
+
+ xa_unlock(&rt->xa);
+
+ filled = true;
+
+ entry_attr = nla_nest_start_noflag(skb, fe->entry);
+ if (!entry_attr) {
+ ret = -EMSGSIZE;
+ rdma_restrack_put(res);
+ goto msg_full;
+ }
+
+ ret = fill_func(skb, has_cap_net_admin, res, port);
+
+ rdma_restrack_put(res);
+
+ if (ret) {
+ nla_nest_cancel(skb, entry_attr);
+ if (ret == -EMSGSIZE)
+ goto msg_full;
+ if (ret == -EAGAIN)
+ goto again;
+ goto res_err;
+ }
+ nla_nest_end(skb, entry_attr);
+again: xa_lock(&rt->xa);
+next: idx++;
+ }
+ xa_unlock(&rt->xa);
+
+msg_full:
+ nla_nest_end(skb, table_attr);
+ nlmsg_end(skb, nlh);
+ cb->args[0] = idx;
+
+ /*
+ * No more entries to fill, cancel the message and
+ * return 0 to mark end of dumpit.
+ */
+ if (!filled)
+ goto err;
+
+ ib_device_put(device);
+ return skb->len;
+
+res_err:
+ nla_nest_cancel(skb, table_attr);
+
+err:
+ nlmsg_cancel(skb, nlh);
+
+err_index:
+ ib_device_put(device);
+ return ret;
+}
+
+#define RES_GET_FUNCS(name, type) \
+ static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
+ struct netlink_callback *cb) \
+ { \
+ return res_get_common_dumpit(skb, cb, type, \
+ fill_res_##name##_entry); \
+ } \
+ static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
+ struct nlmsghdr *nlh, \
+ struct netlink_ext_ack *extack) \
+ { \
+ return res_get_common_doit(skb, nlh, extack, type, \
+ fill_res_##name##_entry); \
+ }
+
+RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
+RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
+RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
+RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
+RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
+RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
+RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
+RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
+RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
+RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX);
+RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ);
+RES_GET_FUNCS(srq_raw, RDMA_RESTRACK_SRQ);
+
+static LIST_HEAD(link_ops);
+static DECLARE_RWSEM(link_ops_rwsem);
+
+static const struct rdma_link_ops *link_ops_get(const char *type)
+{
+ const struct rdma_link_ops *ops;
+
+ list_for_each_entry(ops, &link_ops, list) {
+ if (!strcmp(ops->type, type))
+ goto out;
+ }
+ ops = NULL;
+out:
+ return ops;
+}
+
+void rdma_link_register(struct rdma_link_ops *ops)
+{
+ down_write(&link_ops_rwsem);
+ if (WARN_ON_ONCE(link_ops_get(ops->type)))
+ goto out;
+ list_add(&ops->list, &link_ops);
+out:
+ up_write(&link_ops_rwsem);
+}
+EXPORT_SYMBOL(rdma_link_register);
+
+void rdma_link_unregister(struct rdma_link_ops *ops)
+{
+ down_write(&link_ops_rwsem);
+ list_del(&ops->list);
+ up_write(&link_ops_rwsem);
+}
+EXPORT_SYMBOL(rdma_link_unregister);
+
+static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ char ibdev_name[IB_DEVICE_NAME_MAX];
+ const struct rdma_link_ops *ops;
+ char ndev_name[IFNAMSIZ];
+ struct net_device *ndev;
+ char type[IFNAMSIZ];
+ int err;
+
+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
+ !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
+ return -EINVAL;
+
+ nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
+ sizeof(ibdev_name));
+ if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
+ return -EINVAL;
+
+ nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
+ nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
+ sizeof(ndev_name));
+
+ ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
+ if (!ndev)
+ return -ENODEV;
+
+ down_read(&link_ops_rwsem);
+ ops = link_ops_get(type);
+#ifdef CONFIG_MODULES
+ if (!ops) {
+ up_read(&link_ops_rwsem);
+ request_module("rdma-link-%s", type);
+ down_read(&link_ops_rwsem);
+ ops = link_ops_get(type);
+ }
+#endif
+ err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
+ up_read(&link_ops_rwsem);
+ dev_put(ndev);
+
+ return err;
+}
+
+static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ u32 index;
+ int err;
+
+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
+ ib_device_put(device);
+ return -EINVAL;
+ }
+
+ ib_unregister_device_and_put(device);
+ return 0;
+}
+
+static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
+ struct ib_client_nl_info data = {};
+ struct ib_device *ibdev = NULL;
+ struct sk_buff *msg;
+ u32 index;
+ int err;
+
+ err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
+ NL_VALIDATE_LIBERAL, extack);
+ if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
+ return -EINVAL;
+
+ nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
+ sizeof(client_name));
+
+ if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!ibdev)
+ return -EINVAL;
+
+ if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+ data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(ibdev, data.port)) {
+ err = -EINVAL;
+ goto out_put;
+ }
+ } else {
+ data.port = -1;
+ }
+ } else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
+ return -EINVAL;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ err = -ENOMEM;
+ goto out_put;
+ }
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_GET_CHARDEV),
+ 0, 0);
+ if (!nlh) {
+ err = -EMSGSIZE;
+ goto out_nlmsg;
+ }
+
+ data.nl_msg = msg;
+ err = ib_get_client_nl_info(ibdev, client_name, &data);
+ if (err)
+ goto out_nlmsg;
+
+ err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
+ huge_encode_dev(data.cdev->devt),
+ RDMA_NLDEV_ATTR_PAD);
+ if (err)
+ goto out_data;
+ err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
+ RDMA_NLDEV_ATTR_PAD);
+ if (err)
+ goto out_data;
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
+ dev_name(data.cdev))) {
+ err = -EMSGSIZE;
+ goto out_data;
+ }
+
+ nlmsg_end(msg, nlh);
+ put_device(data.cdev);
+ if (ibdev)
+ ib_device_put(ibdev);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+out_data:
+ put_device(data.cdev);
+out_nlmsg:
+ nlmsg_free(msg);
+out_put:
+ if (ibdev)
+ ib_device_put(ibdev);
+ return err;
+}
+
+static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct sk_buff *msg;
+ int err;
+
+ err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, extack);
+ if (err)
+ return err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_SYS_GET),
+ 0, 0);
+ if (!nlh) {
+ nlmsg_free(msg);
+ return -EMSGSIZE;
+ }
+
+ err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
+ (u8)ib_devices_shared_netns);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE,
+ (u8)privileged_qkey);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, 1);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+ /*
+ * Copy-on-fork is supported.
+ * See commits:
+ * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
+ * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
+ * for more details. Don't backport this without them.
+ *
+ * Return value ignored on purpose, assume copy-on-fork is not
+ * supported in case of failure.
+ */
+ nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1);
+
+ nlmsg_end(msg, nlh);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+}
+
+static int nldev_set_sys_set_netns_doit(struct nlattr *tb[])
+{
+ u8 enable;
+ int err;
+
+ enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
+ /* Only 0 and 1 are supported */
+ if (enable > 1)
+ return -EINVAL;
+
+ err = rdma_compatdev_set(enable);
+ return err;
+}
+
+static int nldev_set_sys_set_pqkey_doit(struct nlattr *tb[])
+{
+ u8 enable;
+
+ enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]);
+ /* Only 0 and 1 are supported */
+ if (enable > 1)
+ return -EINVAL;
+
+ privileged_qkey = enable;
+ return 0;
+}
+
+static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ int err;
+
+ err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (err)
+ return -EINVAL;
+
+ if (tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
+ return nldev_set_sys_set_netns_doit(tb);
+
+ if (tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE])
+ return nldev_set_sys_set_pqkey_doit(tb);
+
+ return -EINVAL;
+}
+
+
+static int nldev_stat_set_mode_doit(struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ struct nlattr *tb[],
+ struct ib_device *device, u32 port)
+{
+ u32 mode, mask = 0, qpn, cntn = 0;
+ bool opcnt = false;
+ int ret;
+
+ /* Currently only counter for QP is supported */
+ if (!tb[RDMA_NLDEV_ATTR_STAT_RES] ||
+ nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
+ return -EINVAL;
+
+ if (tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED])
+ opcnt = !!nla_get_u8(
+ tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]);
+
+ mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
+ if (mode == RDMA_COUNTER_MODE_AUTO) {
+ if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
+ mask = nla_get_u32(
+ tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
+ return rdma_counter_set_auto_mode(device, port, mask, opcnt,
+ extack);
+ }
+
+ if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
+ return -EINVAL;
+
+ qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
+ if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
+ cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
+ ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
+ if (ret)
+ return ret;
+ } else {
+ ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn);
+ if (ret)
+ return ret;
+ }
+
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
+ ret = -EMSGSIZE;
+ goto err_fill;
+ }
+
+ return 0;
+
+err_fill:
+ rdma_counter_unbind_qpn(device, port, qpn, cntn);
+ return ret;
+}
+
+static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
+ struct ib_device *device,
+ u32 port)
+{
+ struct rdma_hw_stats *stats;
+ struct nlattr *entry_attr;
+ unsigned long *target;
+ int rem, i, ret = 0;
+ u32 index;
+
+ stats = ib_get_hw_stats_port(device, port);
+ if (!stats)
+ return -EINVAL;
+
+ target = kcalloc(BITS_TO_LONGS(stats->num_counters),
+ sizeof(*stats->is_disabled), GFP_KERNEL);
+ if (!target)
+ return -ENOMEM;
+
+ nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS],
+ rem) {
+ index = nla_get_u32(entry_attr);
+ if ((index >= stats->num_counters) ||
+ !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ set_bit(index, target);
+ }
+
+ for (i = 0; i < stats->num_counters; i++) {
+ if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
+ continue;
+
+ ret = rdma_counter_modify(device, port, i, test_bit(i, target));
+ if (ret)
+ goto out;
+ }
+
+out:
+ kfree(target);
+ return ret;
+}
+
+static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index, port;
+ int ret;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
+ extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err_put_device;
+ }
+
+ if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] &&
+ !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
+ ret = -EINVAL;
+ goto err_put_device;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err_put_device;
+ }
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_STAT_SET),
+ 0, 0);
+ if (!nlh || fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
+ ret = -EMSGSIZE;
+ goto err_free_msg;
+ }
+
+ if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
+ ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port);
+ if (ret)
+ goto err_free_msg;
+ }
+
+ if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
+ ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
+ if (ret)
+ goto err_free_msg;
+ }
+
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_free_msg:
+ nlmsg_free(msg);
+err_put_device:
+ ib_device_put(device);
+ return ret;
+}
+
+static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index, port, qpn, cntn;
+ int ret;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
+ !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
+ !tb[RDMA_NLDEV_ATTR_RES_LQPN])
+ return -EINVAL;
+
+ if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_STAT_SET),
+ 0, 0);
+ if (!nlh) {
+ ret = -EMSGSIZE;
+ goto err_fill;
+ }
+
+ cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
+ qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
+ if (fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
+ ret = -EMSGSIZE;
+ goto err_fill;
+ }
+
+ ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
+ if (ret)
+ goto err_fill;
+
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_fill:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
+static noinline_for_stack int
+stat_get_doit_default_counter(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ struct nlattr *tb[])
+{
+ struct rdma_hw_stats *stats;
+ struct nlattr *table_attr;
+ struct ib_device *device;
+ int ret, num_cnts, i;
+ struct sk_buff *msg;
+ u32 index, port;
+ u64 v;
+
+ if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ stats = ib_get_hw_stats_port(device, port);
+ if (!stats) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_STAT_GET),
+ 0, 0);
+
+ if (!nlh || fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
+ ret = -EMSGSIZE;
+ goto err_msg;
+ }
+
+ mutex_lock(&stats->lock);
+
+ num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
+ if (num_cnts < 0) {
+ ret = -EINVAL;
+ goto err_stats;
+ }
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
+ if (!table_attr) {
+ ret = -EMSGSIZE;
+ goto err_stats;
+ }
+ for (i = 0; i < num_cnts; i++) {
+ if (test_bit(i, stats->is_disabled))
+ continue;
+
+ v = stats->value[i] +
+ rdma_counter_get_hwstat_value(device, port, i);
+ if (rdma_nl_stat_hwcounter_entry(msg,
+ stats->descs[i].name, v)) {
+ ret = -EMSGSIZE;
+ goto err_table;
+ }
+ }
+ nla_nest_end(msg, table_attr);
+
+ mutex_unlock(&stats->lock);
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_table:
+ nla_nest_cancel(msg, table_attr);
+err_stats:
+ mutex_unlock(&stats->lock);
+err_msg:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
+static noinline_for_stack int
+stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack, struct nlattr *tb[])
+
+{
+ static enum rdma_nl_counter_mode mode;
+ static enum rdma_nl_counter_mask mask;
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 index, port;
+ bool opcnt;
+ int ret;
+
+ if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
+ return nldev_res_get_counter_doit(skb, nlh, extack);
+
+ if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
+ !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), index);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
+ RDMA_NLDEV_CMD_STAT_GET),
+ 0, 0);
+ if (!nlh) {
+ ret = -EMSGSIZE;
+ goto err_msg;
+ }
+
+ ret = rdma_counter_get_mode(device, port, &mode, &mask, &opcnt);
+ if (ret)
+ goto err_msg;
+
+ if (fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
+ ret = -EMSGSIZE;
+ goto err_msg;
+ }
+
+ if ((mode == RDMA_COUNTER_MODE_AUTO) &&
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
+ ret = -EMSGSIZE;
+ goto err_msg;
+ }
+
+ if ((mode == RDMA_COUNTER_MODE_AUTO) &&
+ nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, opcnt)) {
+ ret = -EMSGSIZE;
+ goto err_msg;
+ }
+
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_msg:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
+static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ int ret;
+
+ ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, extack);
+ if (ret)
+ return -EINVAL;
+
+ if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
+ return stat_get_doit_default_counter(skb, nlh, extack, tb);
+
+ switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
+ case RDMA_NLDEV_ATTR_RES_QP:
+ ret = stat_get_doit_qp(skb, nlh, extack, tb);
+ break;
+ case RDMA_NLDEV_ATTR_RES_MR:
+ ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
+ fill_stat_mr_entry);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int nldev_stat_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ int ret;
+
+ ret = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, NULL);
+ if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
+ return -EINVAL;
+
+ switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
+ case RDMA_NLDEV_ATTR_RES_QP:
+ ret = nldev_res_get_counter_dumpit(skb, cb);
+ break;
+ case RDMA_NLDEV_ATTR_RES_MR:
+ ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
+ fill_stat_mr_entry);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry;
+ struct rdma_hw_stats *stats;
+ struct ib_device *device;
+ struct sk_buff *msg;
+ u32 devid, port;
+ int ret, i;
+
+ ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, NL_VALIDATE_LIBERAL, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
+ return -EINVAL;
+
+ devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), devid);
+ if (!device)
+ return -EINVAL;
+
+ port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
+ if (!rdma_is_port_valid(device, port)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ stats = ib_get_hw_stats_port(device, port);
+ if (!stats) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ nlh = nlmsg_put(
+ msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS),
+ 0, 0);
+
+ ret = -EMSGSIZE;
+ if (!nlh || fill_nldev_handle(msg, device) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
+ goto err_msg;
+
+ table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
+ if (!table)
+ goto err_msg;
+
+ mutex_lock(&stats->lock);
+ for (i = 0; i < stats->num_counters; i++) {
+ entry = nla_nest_start(msg,
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
+ if (!entry)
+ goto err_msg_table;
+
+ if (nla_put_string(msg,
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
+ stats->descs[i].name) ||
+ nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i))
+ goto err_msg_entry;
+
+ if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) &&
+ (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC,
+ !test_bit(i, stats->is_disabled))))
+ goto err_msg_entry;
+
+ nla_nest_end(msg, entry);
+ }
+ mutex_unlock(&stats->lock);
+
+ nla_nest_end(msg, table);
+ nlmsg_end(msg, nlh);
+ ib_device_put(device);
+ return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
+
+err_msg_entry:
+ nla_nest_cancel(msg, entry);
+err_msg_table:
+ mutex_unlock(&stats->lock);
+ nla_nest_cancel(msg, table);
+err_msg:
+ nlmsg_free(msg);
+err:
+ ib_device_put(device);
+ return ret;
+}
+
+static int nldev_newdev(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ enum rdma_nl_dev_type type;
+ struct ib_device *parent;
+ char name[IFNAMSIZ] = {};
+ u32 parentid;
+ int ret;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
+ !tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_TYPE])
+ return -EINVAL;
+
+ nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(name));
+ type = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_TYPE]);
+ parentid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ parent = ib_device_get_by_index(sock_net(skb->sk), parentid);
+ if (!parent)
+ return -EINVAL;
+
+ ret = ib_add_sub_device(parent, type, name);
+ ib_device_put(parent);
+
+ return ret;
+}
+
+static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+ struct ib_device *device;
+ u32 devid;
+ int ret;
+
+ ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+ nldev_policy, extack);
+ if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+ return -EINVAL;
+
+ devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+ device = ib_device_get_by_index(sock_net(skb->sk), devid);
+ if (!device)
+ return -EINVAL;
+
+ return ib_del_sub_device_and_put(device);
+}
+
+static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
+ [RDMA_NLDEV_CMD_GET] = {
+ .doit = nldev_get_doit,
+ .dump = nldev_get_dumpit,
+ },
+ [RDMA_NLDEV_CMD_GET_CHARDEV] = {
+ .doit = nldev_get_chardev,
+ },
+ [RDMA_NLDEV_CMD_SET] = {
+ .doit = nldev_set_doit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_NEWLINK] = {
+ .doit = nldev_newlink,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_DELLINK] = {
+ .doit = nldev_dellink,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_PORT_GET] = {
+ .doit = nldev_port_get_doit,
+ .dump = nldev_port_get_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_GET] = {
+ .doit = nldev_res_get_doit,
+ .dump = nldev_res_get_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_QP_GET] = {
+ .doit = nldev_res_get_qp_doit,
+ .dump = nldev_res_get_qp_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
+ .doit = nldev_res_get_cm_id_doit,
+ .dump = nldev_res_get_cm_id_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_CQ_GET] = {
+ .doit = nldev_res_get_cq_doit,
+ .dump = nldev_res_get_cq_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_MR_GET] = {
+ .doit = nldev_res_get_mr_doit,
+ .dump = nldev_res_get_mr_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_PD_GET] = {
+ .doit = nldev_res_get_pd_doit,
+ .dump = nldev_res_get_pd_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_CTX_GET] = {
+ .doit = nldev_res_get_ctx_doit,
+ .dump = nldev_res_get_ctx_dumpit,
+ },
+ [RDMA_NLDEV_CMD_RES_SRQ_GET] = {
+ .doit = nldev_res_get_srq_doit,
+ .dump = nldev_res_get_srq_dumpit,
+ },
+ [RDMA_NLDEV_CMD_SYS_GET] = {
+ .doit = nldev_sys_get_doit,
+ },
+ [RDMA_NLDEV_CMD_SYS_SET] = {
+ .doit = nldev_set_sys_set_doit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_STAT_SET] = {
+ .doit = nldev_stat_set_doit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_STAT_GET] = {
+ .doit = nldev_stat_get_doit,
+ .dump = nldev_stat_get_dumpit,
+ },
+ [RDMA_NLDEV_CMD_STAT_DEL] = {
+ .doit = nldev_stat_del_doit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
+ .doit = nldev_res_get_qp_raw_doit,
+ .dump = nldev_res_get_qp_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
+ .doit = nldev_res_get_cq_raw_doit,
+ .dump = nldev_res_get_cq_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
+ .doit = nldev_res_get_mr_raw_doit,
+ .dump = nldev_res_get_mr_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_RES_SRQ_GET_RAW] = {
+ .doit = nldev_res_get_srq_raw_doit,
+ .dump = nldev_res_get_srq_raw_dumpit,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_STAT_GET_STATUS] = {
+ .doit = nldev_stat_get_counter_status_doit,
+ },
+ [RDMA_NLDEV_CMD_NEWDEV] = {
+ .doit = nldev_newdev,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+ [RDMA_NLDEV_CMD_DELDEV] = {
+ .doit = nldev_deldev,
+ .flags = RDMA_NL_ADMIN_PERM,
+ },
+};
+
+static int fill_mon_netdev_rename(struct sk_buff *msg,
+ struct ib_device *device, u32 port,
+ const struct net *net)
+{
+ struct net_device *netdev = ib_device_get_netdev(device, port);
+ int ret = 0;
+
+ if (!netdev || !net_eq(dev_net(netdev), net))
+ goto out;
+
+ ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
+ if (ret)
+ goto out;
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
+out:
+ dev_put(netdev);
+ return ret;
+}
+
+static int fill_mon_netdev_association(struct sk_buff *msg,
+ struct ib_device *device, u32 port,
+ const struct net *net)
+{
+ struct net_device *netdev = ib_device_get_netdev(device, port);
+ int ret = 0;
+
+ if (netdev && !net_eq(dev_net(netdev), net))
+ goto out;
+
+ ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index);
+ if (ret)
+ goto out;
+
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
+ dev_name(&device->dev));
+ if (ret)
+ goto out;
+
+ ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port);
+ if (ret)
+ goto out;
+
+ if (netdev) {
+ ret = nla_put_u32(msg,
+ RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
+ if (ret)
+ goto out;
+
+ ret = nla_put_string(msg,
+ RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
+ }
+
+out:
+ dev_put(netdev);
+ return ret;
+}
+
+static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num,
+ enum rdma_nl_notify_event_type type)
+{
+ struct net_device *netdev;
+
+ switch (type) {
+ case RDMA_REGISTER_EVENT:
+ dev_warn_ratelimited(&device->dev,
+ "Failed to send RDMA monitor register device event\n");
+ break;
+ case RDMA_UNREGISTER_EVENT:
+ dev_warn_ratelimited(&device->dev,
+ "Failed to send RDMA monitor unregister device event\n");
+ break;
+ case RDMA_NETDEV_ATTACH_EVENT:
+ netdev = ib_device_get_netdev(device, port_num);
+ dev_warn_ratelimited(&device->dev,
+ "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n",
+ port_num, netdev->ifindex);
+ dev_put(netdev);
+ break;
+ case RDMA_NETDEV_DETACH_EVENT:
+ dev_warn_ratelimited(&device->dev,
+ "Failed to send RDMA monitor netdev detach event: port %d\n",
+ port_num);
+ break;
+ case RDMA_RENAME_EVENT:
+ dev_warn_ratelimited(&device->dev,
+ "Failed to send RDMA monitor rename device event\n");
+ break;
+
+ case RDMA_NETDEV_RENAME_EVENT:
+ netdev = ib_device_get_netdev(device, port_num);
+ dev_warn_ratelimited(&device->dev,
+ "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n",
+ port_num, netdev->ifindex);
+ dev_put(netdev);
+ break;
+ default:
+ break;
+ }
+}
+
+int rdma_nl_notify_event(struct ib_device *device, u32 port_num,
+ enum rdma_nl_notify_event_type type)
+{
+ struct sk_buff *skb;
+ int ret = -EMSGSIZE;
+ struct net *net;
+ void *nlh;
+
+ net = read_pnet(&device->coredev.rdma_net);
+ if (!net)
+ return -EINVAL;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+ nlh = nlmsg_put(skb, 0, 0,
+ RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR),
+ 0, 0);
+ if (!nlh)
+ goto err_free;
+
+ switch (type) {
+ case RDMA_REGISTER_EVENT:
+ case RDMA_UNREGISTER_EVENT:
+ case RDMA_RENAME_EVENT:
+ ret = fill_nldev_handle(skb, device);
+ if (ret)
+ goto err_free;
+ break;
+ case RDMA_NETDEV_ATTACH_EVENT:
+ case RDMA_NETDEV_DETACH_EVENT:
+ ret = fill_mon_netdev_association(skb, device, port_num, net);
+ if (ret)
+ goto err_free;
+ break;
+ case RDMA_NETDEV_RENAME_EVENT:
+ ret = fill_mon_netdev_rename(skb, device, port_num, net);
+ if (ret)
+ goto err_free;
+ break;
+ default:
+ break;
+ }
+
+ ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type);
+ if (ret)
+ goto err_free;
+
+ nlmsg_end(skb, nlh);
+ ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL);
+ if (ret && ret != -ESRCH) {
+ skb = NULL; /* skb is freed in the netlink send-op handling */
+ goto err_free;
+ }
+ return 0;
+
+err_free:
+ rdma_nl_notify_err_msg(device, port_num, type);
+ nlmsg_free(skb);
+ return ret;
+}
+
+void __init nldev_init(void)
+{
+ rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
+}
+
+void nldev_exit(void)
+{
+ rdma_nl_unregister(RDMA_NL_NLDEV);
+}
+
+MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
diff --git a/drivers/infiniband/core/opa_smi.h b/drivers/infiniband/core/opa_smi.h
index 3bfab3505a29..64e2822af70f 100644
--- a/drivers/infiniband/core/opa_smi.h
+++ b/drivers/infiniband/core/opa_smi.h
@@ -40,11 +40,11 @@
#include "smi.h"
enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch,
- int port_num, int phys_port_cnt);
+ u32 port_num, int phys_port_cnt);
int opa_smi_get_fwd_port(struct opa_smp *smp);
extern enum smi_forward_action opa_smi_check_forward_dr_smp(struct opa_smp *smp);
extern enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp,
- bool is_switch, int port_num);
+ bool is_switch, u32 port_num);
/*
* Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
@@ -55,7 +55,7 @@ static inline enum smi_action opa_smi_check_local_smp(struct opa_smp *smp,
{
/* C14-9:3 -- We're at the end of the DR segment of path */
/* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
- return (device->process_mad &&
+ return (device->ops.process_mad &&
!opa_get_smp_direction(smp) &&
(smp->hop_ptr == smp->hop_cnt + 1)) ?
IB_SMI_HANDLE : IB_SMI_DISCARD;
@@ -70,7 +70,7 @@ static inline enum smi_action opa_smi_check_local_returning_smp(struct opa_smp *
{
/* C14-13:3 -- We're at the end of the DR segment of path */
/* C14-13:4 -- Hop Pointer == 0 -> give to SM */
- return (device->process_mad &&
+ return (device->ops.process_mad &&
opa_get_smp_direction(smp) &&
!smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD;
}
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
new file mode 100644
index 000000000000..18918f463361
--- /dev/null
+++ b/drivers/infiniband/core/rdma_core.c
@@ -0,0 +1,1050 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <linux/sched/mm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <linux/rcupdate.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/rdma_user_ioctl.h>
+#include "uverbs.h"
+#include "core_priv.h"
+#include "rdma_core.h"
+
+static void uverbs_uobject_free(struct kref *ref)
+{
+ kfree_rcu(container_of(ref, struct ib_uobject, ref), rcu);
+}
+
+/*
+ * In order to indicate we no longer needs this uobject, uverbs_uobject_put
+ * is called. When the reference count is decreased, the uobject is freed.
+ * For example, this is used when attaching a completion channel to a CQ.
+ */
+void uverbs_uobject_put(struct ib_uobject *uobject)
+{
+ kref_put(&uobject->ref, uverbs_uobject_free);
+}
+EXPORT_SYMBOL(uverbs_uobject_put);
+
+int uverbs_try_lock_object(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
+{
+ /*
+ * When a shared access is required, we use a positive counter. Each
+ * shared access request checks that the value != -1 and increment it.
+ * Exclusive access is required for operations like write or destroy.
+ * In exclusive access mode, we check that the counter is zero (nobody
+ * claimed this object) and we set it to -1. Releasing a shared access
+ * lock is done simply by decreasing the counter. As for exclusive
+ * access locks, since only a single one of them is allowed
+ * concurrently, setting the counter to zero is enough for releasing
+ * this lock.
+ */
+ switch (mode) {
+ case UVERBS_LOOKUP_READ:
+ return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ?
+ -EBUSY : 0;
+ case UVERBS_LOOKUP_WRITE:
+ /* lock is exclusive */
+ return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
+ case UVERBS_LOOKUP_DESTROY:
+ return 0;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(uverbs_try_lock_object);
+
+static void assert_uverbs_usecnt(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
+{
+#ifdef CONFIG_LOCKDEP
+ switch (mode) {
+ case UVERBS_LOOKUP_READ:
+ WARN_ON(atomic_read(&uobj->usecnt) <= 0);
+ break;
+ case UVERBS_LOOKUP_WRITE:
+ WARN_ON(atomic_read(&uobj->usecnt) != -1);
+ break;
+ case UVERBS_LOOKUP_DESTROY:
+ break;
+ }
+#endif
+}
+
+/*
+ * This must be called with the hw_destroy_rwsem locked for read or write,
+ * also the uobject itself must be locked for write.
+ *
+ * Upon return the HW object is guaranteed to be destroyed.
+ *
+ * For RDMA_REMOVE_ABORT, the hw_destroy_rwsem is not required to be held,
+ * however the type's allocat_commit function cannot have been called and the
+ * uobject cannot be on the uobjects_lists
+ *
+ * For RDMA_REMOVE_DESTROY the caller should be holding a kref (eg via
+ * rdma_lookup_get_uobject) and the object is left in a state where the caller
+ * needs to call rdma_lookup_put_uobject.
+ *
+ * For all other destroy modes this function internally unlocks the uobject
+ * and consumes the kref on the uobj.
+ */
+static int uverbs_destroy_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason reason,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_file *ufile = attrs->ufile;
+ unsigned long flags;
+ int ret;
+
+ lockdep_assert_held(&ufile->hw_destroy_rwsem);
+ assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
+
+ if (reason == RDMA_REMOVE_ABORT) {
+ WARN_ON(!list_empty(&uobj->list));
+ WARN_ON(!uobj->context);
+ uobj->uapi_object->type_class->alloc_abort(uobj);
+ } else if (uobj->object) {
+ ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
+ attrs);
+ if (ret)
+ /* Nothing to be done, wait till ucontext will clean it */
+ return ret;
+
+ uobj->object = NULL;
+ }
+
+ uobj->context = NULL;
+
+ /*
+ * For DESTROY the usecnt is not changed, the caller is expected to
+ * manage it via uobj_put_destroy(). Only DESTROY can remove the IDR
+ * handle.
+ */
+ if (reason != RDMA_REMOVE_DESTROY)
+ atomic_set(&uobj->usecnt, 0);
+ else
+ uobj->uapi_object->type_class->remove_handle(uobj);
+
+ if (!list_empty(&uobj->list)) {
+ spin_lock_irqsave(&ufile->uobjects_lock, flags);
+ list_del_init(&uobj->list);
+ spin_unlock_irqrestore(&ufile->uobjects_lock, flags);
+
+ /*
+ * Pairs with the get in rdma_alloc_commit_uobject(), could
+ * destroy uobj.
+ */
+ uverbs_uobject_put(uobj);
+ }
+
+ /*
+ * When aborting the stack kref remains owned by the core code, and is
+ * not transferred into the type. Pairs with the get in alloc_uobj
+ */
+ if (reason == RDMA_REMOVE_ABORT)
+ uverbs_uobject_put(uobj);
+
+ return 0;
+}
+
+/*
+ * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
+ * sequence. It should only be used from command callbacks. On success the
+ * caller must pair this with uobj_put_destroy(). This
+ * version requires the caller to have already obtained an
+ * LOOKUP_DESTROY uobject kref.
+ */
+int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_file *ufile = attrs->ufile;
+ int ret;
+
+ down_read(&ufile->hw_destroy_rwsem);
+
+ /*
+ * Once the uobject is destroyed by RDMA_REMOVE_DESTROY then it is left
+ * write locked as the callers put it back with UVERBS_LOOKUP_DESTROY.
+ * This is because any other concurrent thread can still see the object
+ * in the xarray due to RCU. Leaving it locked ensures nothing else will
+ * touch it.
+ */
+ ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
+ if (ret)
+ goto out_unlock;
+
+ ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY, attrs);
+ if (ret) {
+ atomic_set(&uobj->usecnt, 0);
+ goto out_unlock;
+ }
+
+out_unlock:
+ up_read(&ufile->hw_destroy_rwsem);
+ return ret;
+}
+
+/*
+ * uobj_get_destroy destroys the HW object and returns a handle to the uobj
+ * with a NULL object pointer. The caller must pair this with
+ * uobj_put_destroy().
+ */
+struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
+ u32 id, struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj;
+ int ret;
+
+ uobj = rdma_lookup_get_uobject(obj, attrs->ufile, id,
+ UVERBS_LOOKUP_DESTROY, attrs);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ ret = uobj_destroy(uobj, attrs);
+ if (ret) {
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
+ return ERR_PTR(ret);
+ }
+
+ return uobj;
+}
+
+/*
+ * Does both uobj_get_destroy() and uobj_put_destroy(). Returns 0 on success
+ * (negative errno on failure). For use by callers that do not need the uobj.
+ */
+int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj;
+
+ uobj = __uobj_get_destroy(obj, id, attrs);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+ uobj_put_destroy(uobj);
+ return 0;
+}
+
+/* alloc_uobj must be undone by uverbs_destroy_uobject() */
+static struct ib_uobject *alloc_uobj(struct uverbs_attr_bundle *attrs,
+ const struct uverbs_api_object *obj)
+{
+ struct ib_uverbs_file *ufile = attrs->ufile;
+ struct ib_uobject *uobj;
+
+ if (!attrs->context) {
+ struct ib_ucontext *ucontext =
+ ib_uverbs_get_ucontext_file(ufile);
+
+ if (IS_ERR(ucontext))
+ return ERR_CAST(ucontext);
+ attrs->context = ucontext;
+ }
+
+ uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL);
+ if (!uobj)
+ return ERR_PTR(-ENOMEM);
+ /*
+ * user_handle should be filled by the handler,
+ * The object is added to the list in the commit stage.
+ */
+ uobj->ufile = ufile;
+ uobj->context = attrs->context;
+ INIT_LIST_HEAD(&uobj->list);
+ uobj->uapi_object = obj;
+ /*
+ * Allocated objects start out as write locked to deny any other
+ * syscalls from accessing them until they are committed. See
+ * rdma_alloc_commit_uobject
+ */
+ atomic_set(&uobj->usecnt, -1);
+ kref_init(&uobj->ref);
+
+ return uobj;
+}
+
+static int idr_add_uobj(struct ib_uobject *uobj)
+{
+ /*
+ * We start with allocating an idr pointing to NULL. This represents an
+ * object which isn't initialized yet. We'll replace it later on with
+ * the real object once we commit.
+ */
+ return xa_alloc(&uobj->ufile->idr, &uobj->id, NULL, xa_limit_32b,
+ GFP_KERNEL);
+}
+
+/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
+static struct ib_uobject *
+lookup_get_idr_uobject(const struct uverbs_api_object *obj,
+ struct ib_uverbs_file *ufile, s64 id,
+ enum rdma_lookup_mode mode)
+{
+ struct ib_uobject *uobj;
+
+ if (id < 0 || id > ULONG_MAX)
+ return ERR_PTR(-EINVAL);
+
+ rcu_read_lock();
+ /*
+ * The idr_find is guaranteed to return a pointer to something that
+ * isn't freed yet, or NULL, as the free after idr_remove goes through
+ * kfree_rcu(). However the object may still have been released and
+ * kfree() could be called at any time.
+ */
+ uobj = xa_load(&ufile->idr, id);
+ if (!uobj || !kref_get_unless_zero(&uobj->ref))
+ uobj = ERR_PTR(-ENOENT);
+ rcu_read_unlock();
+ return uobj;
+}
+
+static struct ib_uobject *
+lookup_get_fd_uobject(const struct uverbs_api_object *obj,
+ struct ib_uverbs_file *ufile, s64 id,
+ enum rdma_lookup_mode mode)
+{
+ const struct uverbs_obj_fd_type *fd_type;
+ struct file *f;
+ struct ib_uobject *uobject;
+ int fdno = id;
+
+ if (fdno != id)
+ return ERR_PTR(-EINVAL);
+
+ if (mode != UVERBS_LOOKUP_READ)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!obj->type_attrs)
+ return ERR_PTR(-EIO);
+ fd_type =
+ container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
+
+ f = fget(fdno);
+ if (!f)
+ return ERR_PTR(-EBADF);
+
+ uobject = f->private_data;
+ /*
+ * fget(id) ensures we are not currently running
+ * uverbs_uobject_fd_release(), and the caller is expected to ensure
+ * that release is never done while a call to lookup is possible.
+ */
+ if (f->f_op != fd_type->fops || uobject->ufile != ufile) {
+ fput(f);
+ return ERR_PTR(-EBADF);
+ }
+
+ uverbs_uobject_get(uobject);
+ return uobject;
+}
+
+struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
+ struct ib_uverbs_file *ufile, s64 id,
+ enum rdma_lookup_mode mode,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj;
+ int ret;
+
+ if (obj == ERR_PTR(-ENOMSG)) {
+ /* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */
+ uobj = lookup_get_idr_uobject(NULL, ufile, id, mode);
+ if (IS_ERR(uobj))
+ return uobj;
+ } else {
+ if (IS_ERR(obj))
+ return ERR_PTR(-EINVAL);
+
+ uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ if (uobj->uapi_object != obj) {
+ ret = -EINVAL;
+ goto free;
+ }
+ }
+
+ /*
+ * If we have been disassociated block every command except for
+ * DESTROY based commands.
+ */
+ if (mode != UVERBS_LOOKUP_DESTROY &&
+ !srcu_dereference(ufile->device->ib_dev,
+ &ufile->device->disassociate_srcu)) {
+ ret = -EIO;
+ goto free;
+ }
+
+ ret = uverbs_try_lock_object(uobj, mode);
+ if (ret)
+ goto free;
+ if (attrs)
+ attrs->context = uobj->context;
+
+ return uobj;
+free:
+ uobj->uapi_object->type_class->lookup_put(uobj, mode);
+ uverbs_uobject_put(uobj);
+ return ERR_PTR(ret);
+}
+
+static struct ib_uobject *
+alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
+ struct uverbs_attr_bundle *attrs)
+{
+ int ret;
+ struct ib_uobject *uobj;
+
+ uobj = alloc_uobj(attrs, obj);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ ret = idr_add_uobj(uobj);
+ if (ret)
+ goto uobj_put;
+
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto remove;
+
+ return uobj;
+
+remove:
+ xa_erase(&attrs->ufile->idr, uobj->id);
+uobj_put:
+ uverbs_uobject_put(uobj);
+ return ERR_PTR(ret);
+}
+
+static struct ib_uobject *
+alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
+ struct uverbs_attr_bundle *attrs)
+{
+ const struct uverbs_obj_fd_type *fd_type;
+ int new_fd;
+ struct ib_uobject *uobj, *ret;
+ struct file *filp;
+
+ uobj = alloc_uobj(attrs, obj);
+ if (IS_ERR(uobj))
+ return uobj;
+
+ fd_type =
+ container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
+ if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release &&
+ fd_type->fops->release != &uverbs_async_event_release)) {
+ ret = ERR_PTR(-EINVAL);
+ goto err_fd;
+ }
+
+ new_fd = get_unused_fd_flags(O_CLOEXEC);
+ if (new_fd < 0) {
+ ret = ERR_PTR(new_fd);
+ goto err_fd;
+ }
+
+ /* Note that uverbs_uobject_fd_release() is called during abort */
+ filp = anon_inode_getfile(fd_type->name, fd_type->fops, NULL,
+ fd_type->flags);
+ if (IS_ERR(filp)) {
+ ret = ERR_CAST(filp);
+ goto err_getfile;
+ }
+ uobj->object = filp;
+
+ uobj->id = new_fd;
+ return uobj;
+
+err_getfile:
+ put_unused_fd(new_fd);
+err_fd:
+ uverbs_uobject_put(uobj);
+ return ret;
+}
+
+struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_file *ufile = attrs->ufile;
+ struct ib_uobject *ret;
+
+ if (IS_ERR(obj))
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * The hw_destroy_rwsem is held across the entire object creation and
+ * released during rdma_alloc_commit_uobject or
+ * rdma_alloc_abort_uobject
+ */
+ if (!down_read_trylock(&ufile->hw_destroy_rwsem))
+ return ERR_PTR(-EIO);
+
+ ret = obj->type_class->alloc_begin(obj, attrs);
+ if (IS_ERR(ret)) {
+ up_read(&ufile->hw_destroy_rwsem);
+ return ret;
+ }
+ return ret;
+}
+
+static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
+{
+ ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+
+ xa_erase(&uobj->ufile->idr, uobj->id);
+}
+
+static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ const struct uverbs_obj_idr_type *idr_type =
+ container_of(uobj->uapi_object->type_attrs,
+ struct uverbs_obj_idr_type, type);
+ int ret = idr_type->destroy_object(uobj, why, attrs);
+
+ if (ret)
+ return ret;
+
+ if (why == RDMA_REMOVE_ABORT)
+ return 0;
+
+ ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+
+ return 0;
+}
+
+static void remove_handle_idr_uobject(struct ib_uobject *uobj)
+{
+ xa_erase(&uobj->ufile->idr, uobj->id);
+ /* Matches the kref in alloc_commit_idr_uobject */
+ uverbs_uobject_put(uobj);
+}
+
+static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
+{
+ struct file *filp = uobj->object;
+
+ fput(filp);
+ put_unused_fd(uobj->id);
+}
+
+static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ const struct uverbs_obj_fd_type *fd_type = container_of(
+ uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
+
+ fd_type->destroy_object(uobj, why);
+ return 0;
+}
+
+static void remove_handle_fd_uobject(struct ib_uobject *uobj)
+{
+}
+
+static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
+{
+ struct ib_uverbs_file *ufile = uobj->ufile;
+ void *old;
+
+ /*
+ * We already allocated this IDR with a NULL object, so
+ * this shouldn't fail.
+ *
+ * NOTE: Storing the uobj transfers our kref on uobj to the XArray.
+ * It will be put by remove_commit_idr_uobject()
+ */
+ old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL);
+ WARN_ON(old != NULL);
+}
+
+static void swap_idr_uobjects(struct ib_uobject *obj_old,
+ struct ib_uobject *obj_new)
+{
+ struct ib_uverbs_file *ufile = obj_old->ufile;
+ void *old;
+
+ /*
+ * New must be an object that been allocated but not yet committed, this
+ * moves the pre-committed state to obj_old, new still must be comitted.
+ */
+ old = xa_cmpxchg(&ufile->idr, obj_old->id, obj_old, XA_ZERO_ENTRY,
+ GFP_KERNEL);
+ if (WARN_ON(old != obj_old))
+ return;
+
+ swap(obj_old->id, obj_new->id);
+
+ old = xa_cmpxchg(&ufile->idr, obj_old->id, NULL, obj_old, GFP_KERNEL);
+ WARN_ON(old != NULL);
+}
+
+static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
+{
+ int fd = uobj->id;
+ struct file *filp = uobj->object;
+
+ /* Matching put will be done in uverbs_uobject_fd_release() */
+ kref_get(&uobj->ufile->ref);
+
+ /* This shouldn't be used anymore. Use the file object instead */
+ uobj->id = 0;
+
+ /*
+ * NOTE: Once we install the file we loose ownership of our kref on
+ * uobj. It will be put by uverbs_uobject_fd_release()
+ */
+ filp->private_data = uobj;
+ fd_install(fd, filp);
+}
+
+/*
+ * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the
+ * caller can no longer assume uobj is valid. If this function fails it
+ * destroys the uboject, including the attached HW object.
+ */
+void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_file *ufile = attrs->ufile;
+
+ /* kref is held so long as the uobj is on the uobj list. */
+ uverbs_uobject_get(uobj);
+ spin_lock_irq(&ufile->uobjects_lock);
+ list_add(&uobj->list, &ufile->uobjects);
+ spin_unlock_irq(&ufile->uobjects_lock);
+
+ /* matches atomic_set(-1) in alloc_uobj */
+ atomic_set(&uobj->usecnt, 0);
+
+ /* alloc_commit consumes the uobj kref */
+ uobj->uapi_object->type_class->alloc_commit(uobj);
+
+ /* Matches the down_read in rdma_alloc_begin_uobject */
+ up_read(&ufile->hw_destroy_rwsem);
+}
+
+/*
+ * new_uobj will be assigned to the handle currently used by to_uobj, and
+ * to_uobj will be destroyed.
+ *
+ * Upon return the caller must do:
+ * rdma_alloc_commit_uobject(new_uobj)
+ * uobj_put_destroy(to_uobj)
+ *
+ * to_uobj must have a write get but the put mode switches to destroy once
+ * this is called.
+ */
+void rdma_assign_uobject(struct ib_uobject *to_uobj, struct ib_uobject *new_uobj,
+ struct uverbs_attr_bundle *attrs)
+{
+ assert_uverbs_usecnt(new_uobj, UVERBS_LOOKUP_WRITE);
+
+ if (WARN_ON(to_uobj->uapi_object != new_uobj->uapi_object ||
+ !to_uobj->uapi_object->type_class->swap_uobjects))
+ return;
+
+ to_uobj->uapi_object->type_class->swap_uobjects(to_uobj, new_uobj);
+
+ /*
+ * If this fails then the uobject is still completely valid (though with
+ * a new ID) and we leak it until context close.
+ */
+ uverbs_destroy_uobject(to_uobj, RDMA_REMOVE_DESTROY, attrs);
+}
+
+/*
+ * This consumes the kref for uobj. It is up to the caller to unwind the HW
+ * object and anything else connected to uobj before calling this.
+ */
+void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
+ struct uverbs_attr_bundle *attrs,
+ bool hw_obj_valid)
+{
+ struct ib_uverbs_file *ufile = uobj->ufile;
+ int ret;
+
+ if (hw_obj_valid) {
+ ret = uobj->uapi_object->type_class->destroy_hw(
+ uobj, RDMA_REMOVE_ABORT, attrs);
+ /*
+ * If the driver couldn't destroy the object then go ahead and
+ * commit it. Leaking objects that can't be destroyed is only
+ * done during FD close after the driver has a few more tries to
+ * destroy it.
+ */
+ if (WARN_ON(ret))
+ return rdma_alloc_commit_uobject(uobj, attrs);
+ }
+
+ uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
+
+ /* Matches the down_read in rdma_alloc_begin_uobject */
+ up_read(&ufile->hw_destroy_rwsem);
+}
+
+static void lookup_put_idr_uobject(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
+{
+}
+
+static void lookup_put_fd_uobject(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
+{
+ struct file *filp = uobj->object;
+
+ WARN_ON(mode != UVERBS_LOOKUP_READ);
+ /*
+ * This indirectly calls uverbs_uobject_fd_release() and free the
+ * object
+ */
+ fput(filp);
+}
+
+void rdma_lookup_put_uobject(struct ib_uobject *uobj,
+ enum rdma_lookup_mode mode)
+{
+ assert_uverbs_usecnt(uobj, mode);
+ /*
+ * In order to unlock an object, either decrease its usecnt for
+ * read access or zero it in case of exclusive access. See
+ * uverbs_try_lock_object for locking schema information.
+ */
+ switch (mode) {
+ case UVERBS_LOOKUP_READ:
+ atomic_dec(&uobj->usecnt);
+ break;
+ case UVERBS_LOOKUP_WRITE:
+ atomic_set(&uobj->usecnt, 0);
+ break;
+ case UVERBS_LOOKUP_DESTROY:
+ break;
+ }
+
+ uobj->uapi_object->type_class->lookup_put(uobj, mode);
+ /* Pairs with the kref obtained by type->lookup_get */
+ uverbs_uobject_put(uobj);
+}
+
+void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile)
+{
+ xa_init_flags(&ufile->idr, XA_FLAGS_ALLOC);
+}
+
+void release_ufile_idr_uobject(struct ib_uverbs_file *ufile)
+{
+ struct ib_uobject *entry;
+ unsigned long id;
+
+ /*
+ * At this point uverbs_cleanup_ufile() is guaranteed to have run, and
+ * there are no HW objects left, however the xarray is still populated
+ * with anything that has not been cleaned up by userspace. Since the
+ * kref on ufile is 0, nothing is allowed to call lookup_get.
+ *
+ * This is an optimized equivalent to remove_handle_idr_uobject
+ */
+ xa_for_each(&ufile->idr, id, entry) {
+ WARN_ON(entry->object);
+ uverbs_uobject_put(entry);
+ }
+
+ xa_destroy(&ufile->idr);
+}
+
+const struct uverbs_obj_type_class uverbs_idr_class = {
+ .alloc_begin = alloc_begin_idr_uobject,
+ .lookup_get = lookup_get_idr_uobject,
+ .alloc_commit = alloc_commit_idr_uobject,
+ .alloc_abort = alloc_abort_idr_uobject,
+ .lookup_put = lookup_put_idr_uobject,
+ .destroy_hw = destroy_hw_idr_uobject,
+ .remove_handle = remove_handle_idr_uobject,
+ .swap_uobjects = swap_idr_uobjects,
+};
+EXPORT_SYMBOL(uverbs_idr_class);
+
+/*
+ * Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct
+ * file_operations release method.
+ */
+int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_file *ufile;
+ struct ib_uobject *uobj;
+
+ /*
+ * This can only happen if the fput came from alloc_abort_fd_uobject()
+ */
+ if (!filp->private_data)
+ return 0;
+ uobj = filp->private_data;
+ ufile = uobj->ufile;
+
+ if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
+ struct uverbs_attr_bundle attrs = {
+ .context = uobj->context,
+ .ufile = ufile,
+ };
+
+ /*
+ * lookup_get_fd_uobject holds the kref on the struct file any
+ * time a FD uobj is locked, which prevents this release
+ * method from being invoked. Meaning we can always get the
+ * write lock here, or we have a kernel bug.
+ */
+ WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE));
+ uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE, &attrs);
+ up_read(&ufile->hw_destroy_rwsem);
+ }
+
+ /* Matches the get in alloc_commit_fd_uobject() */
+ kref_put(&ufile->ref, ib_uverbs_release_file);
+
+ /* Pairs with filp->private_data in alloc_begin_fd_uobject */
+ uverbs_uobject_put(uobj);
+ return 0;
+}
+EXPORT_SYMBOL(uverbs_uobject_fd_release);
+
+/*
+ * Drop the ucontext off the ufile and completely disconnect it from the
+ * ib_device
+ */
+static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
+ enum rdma_remove_reason reason)
+{
+ struct ib_ucontext *ucontext = ufile->ucontext;
+ struct ib_device *ib_dev = ucontext->device;
+
+ /*
+ * If we are closing the FD then the user mmap VMAs must have
+ * already been destroyed as they hold on to the filep, otherwise
+ * they need to be zap'd.
+ */
+ if (reason == RDMA_REMOVE_DRIVER_REMOVE) {
+ uverbs_user_mmap_disassociate(ufile);
+ if (ib_dev->ops.disassociate_ucontext)
+ ib_dev->ops.disassociate_ucontext(ucontext);
+ }
+
+ ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_HANDLE);
+
+ rdma_restrack_del(&ucontext->res);
+
+ ib_dev->ops.dealloc_ucontext(ucontext);
+ WARN_ON(!xa_empty(&ucontext->mmap_xa));
+ kfree(ucontext);
+
+ ufile->ucontext = NULL;
+}
+
+static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
+ enum rdma_remove_reason reason)
+{
+ struct uverbs_attr_bundle attrs = { .ufile = ufile };
+ struct ib_ucontext *ucontext = ufile->ucontext;
+ struct ib_device *ib_dev = ucontext->device;
+ struct ib_uobject *obj, *next_obj;
+ int ret = -EINVAL;
+
+ if (ib_dev->ops.ufile_hw_cleanup)
+ ib_dev->ops.ufile_hw_cleanup(ufile);
+
+ /*
+ * This shouldn't run while executing other commands on this
+ * context. Thus, the only thing we should take care of is
+ * releasing a FD while traversing this list. The FD could be
+ * closed and released from the _release fop of this FD.
+ * In order to mitigate this, we add a lock.
+ * We take and release the lock per traversal in order to let
+ * other threads (which might still use the FDs) chance to run.
+ */
+ list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
+ attrs.context = obj->context;
+ /*
+ * if we hit this WARN_ON, that means we are
+ * racing with a lookup_get.
+ */
+ WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
+ if (reason == RDMA_REMOVE_DRIVER_FAILURE)
+ obj->object = NULL;
+ if (!uverbs_destroy_uobject(obj, reason, &attrs))
+ ret = 0;
+ else
+ atomic_set(&obj->usecnt, 0);
+ }
+
+ if (reason == RDMA_REMOVE_DRIVER_FAILURE) {
+ WARN_ON(!list_empty(&ufile->uobjects));
+ return 0;
+ }
+ return ret;
+}
+
+/*
+ * Destroy the ucontext and every uobject associated with it.
+ *
+ * This is internally locked and can be called in parallel from multiple
+ * contexts.
+ */
+void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
+ enum rdma_remove_reason reason)
+{
+ down_write(&ufile->hw_destroy_rwsem);
+
+ /*
+ * If a ucontext was never created then we can't have any uobjects to
+ * cleanup, nothing to do.
+ */
+ if (!ufile->ucontext)
+ goto done;
+
+ while (!list_empty(&ufile->uobjects) &&
+ !__uverbs_cleanup_ufile(ufile, reason)) {
+ }
+
+ if (WARN_ON(!list_empty(&ufile->uobjects)))
+ __uverbs_cleanup_ufile(ufile, RDMA_REMOVE_DRIVER_FAILURE);
+ ufile_destroy_ucontext(ufile, reason);
+
+done:
+ up_write(&ufile->hw_destroy_rwsem);
+}
+
+const struct uverbs_obj_type_class uverbs_fd_class = {
+ .alloc_begin = alloc_begin_fd_uobject,
+ .lookup_get = lookup_get_fd_uobject,
+ .alloc_commit = alloc_commit_fd_uobject,
+ .alloc_abort = alloc_abort_fd_uobject,
+ .lookup_put = lookup_put_fd_uobject,
+ .destroy_hw = destroy_hw_fd_uobject,
+ .remove_handle = remove_handle_fd_uobject,
+};
+EXPORT_SYMBOL(uverbs_fd_class);
+
+struct ib_uobject *
+uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
+ s64 id, struct uverbs_attr_bundle *attrs)
+{
+ const struct uverbs_api_object *obj =
+ uapi_get_object(attrs->ufile->device->uapi, object_id);
+
+ switch (access) {
+ case UVERBS_ACCESS_READ:
+ return rdma_lookup_get_uobject(obj, attrs->ufile, id,
+ UVERBS_LOOKUP_READ, attrs);
+ case UVERBS_ACCESS_DESTROY:
+ /* Actual destruction is done inside uverbs_handle_method */
+ return rdma_lookup_get_uobject(obj, attrs->ufile, id,
+ UVERBS_LOOKUP_DESTROY, attrs);
+ case UVERBS_ACCESS_WRITE:
+ return rdma_lookup_get_uobject(obj, attrs->ufile, id,
+ UVERBS_LOOKUP_WRITE, attrs);
+ case UVERBS_ACCESS_NEW:
+ return rdma_alloc_begin_uobject(obj, attrs);
+ default:
+ WARN_ON(true);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+}
+
+void uverbs_finalize_object(struct ib_uobject *uobj,
+ enum uverbs_obj_access access, bool hw_obj_valid,
+ bool commit, struct uverbs_attr_bundle *attrs)
+{
+ /*
+ * refcounts should be handled at the object level and not at the
+ * uobject level. Refcounts of the objects themselves are done in
+ * handlers.
+ */
+
+ switch (access) {
+ case UVERBS_ACCESS_READ:
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
+ break;
+ case UVERBS_ACCESS_WRITE:
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
+ break;
+ case UVERBS_ACCESS_DESTROY:
+ if (uobj)
+ rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
+ break;
+ case UVERBS_ACCESS_NEW:
+ if (commit)
+ rdma_alloc_commit_uobject(uobj, attrs);
+ else
+ rdma_alloc_abort_uobject(uobj, attrs, hw_obj_valid);
+ break;
+ default:
+ WARN_ON(true);
+ }
+}
+
+/**
+ * rdma_uattrs_has_raw_cap() - Returns whether a rdma device linked to the
+ * uverbs attributes file has CAP_NET_RAW
+ * capability or not.
+ *
+ * @attrs: Pointer to uverbs attributes
+ *
+ * Returns true if a rdma device's owning user namespace has CAP_NET_RAW
+ * capability, otherwise false.
+ */
+bool rdma_uattrs_has_raw_cap(const struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_file *ufile = attrs->ufile;
+ struct ib_ucontext *ucontext;
+ bool has_cap = false;
+ int srcu_key;
+
+ srcu_key = srcu_read_lock(&ufile->device->disassociate_srcu);
+ ucontext = ib_uverbs_get_ucontext_file(ufile);
+ if (IS_ERR(ucontext))
+ goto out;
+ has_cap = rdma_dev_has_raw_cap(ucontext->device);
+
+out:
+ srcu_read_unlock(&ufile->device->disassociate_srcu, srcu_key);
+ return has_cap;
+}
+EXPORT_SYMBOL(rdma_uattrs_has_raw_cap);
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
new file mode 100644
index 000000000000..a59b087611cb
--- /dev/null
+++ b/drivers/infiniband/core/rdma_core.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
+ * Copyright (c) 2005-2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef RDMA_CORE_H
+#define RDMA_CORE_H
+
+#include <linux/idr.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/ib_verbs.h>
+#include <linux/mutex.h>
+
+struct ib_uverbs_device;
+
+void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
+ enum rdma_remove_reason reason);
+
+int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs);
+
+/*
+ * Get an ib_uobject that corresponds to the given id from ufile, assuming
+ * the object is from the given type. Lock it to the required access when
+ * applicable.
+ * This function could create (access == NEW), destroy (access == DESTROY)
+ * or unlock (access == READ || access == WRITE) objects if required.
+ * The action will be finalized only when uverbs_finalize_object or
+ * uverbs_finalize_objects are called.
+ */
+struct ib_uobject *
+uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
+ s64 id, struct uverbs_attr_bundle *attrs);
+
+void uverbs_finalize_object(struct ib_uobject *uobj,
+ enum uverbs_obj_access access, bool hw_obj_valid,
+ bool commit, struct uverbs_attr_bundle *attrs);
+
+int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx);
+
+void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile);
+void release_ufile_idr_uobject(struct ib_uverbs_file *ufile);
+
+struct ib_udata *uverbs_get_cleared_udata(struct uverbs_attr_bundle *attrs);
+
+/*
+ * This is the runtime description of the uverbs API, used by the syscall
+ * machinery to validate and dispatch calls.
+ */
+
+/*
+ * Depending on ID the slot pointer in the radix tree points at one of these
+ * structs.
+ */
+
+struct uverbs_api_ioctl_method {
+ int(__rcu *handler)(struct uverbs_attr_bundle *attrs);
+ DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN);
+ u16 bundle_size;
+ u8 use_stack:1;
+ u8 driver_method:1;
+ u8 disabled:1;
+ u8 has_udata:1;
+ u8 key_bitmap_len;
+ u8 destroy_bkey;
+};
+
+struct uverbs_api_write_method {
+ int (*handler)(struct uverbs_attr_bundle *attrs);
+ u8 disabled:1;
+ u8 is_ex:1;
+ u8 has_udata:1;
+ u8 has_resp:1;
+ u8 req_size;
+ u8 resp_size;
+};
+
+struct uverbs_api_attr {
+ struct uverbs_attr_spec spec;
+};
+
+struct uverbs_api {
+ /* radix tree contains struct uverbs_api_* pointers */
+ struct radix_tree_root radix;
+ enum rdma_driver_id driver_id;
+
+ unsigned int num_write;
+ unsigned int num_write_ex;
+ struct uverbs_api_write_method notsupp_method;
+ const struct uverbs_api_write_method **write_methods;
+ const struct uverbs_api_write_method **write_ex_methods;
+};
+
+/*
+ * Get an uverbs_api_object that corresponds to the given object_id.
+ * Note:
+ * -ENOMSG means that any object is allowed to match during lookup.
+ */
+static inline const struct uverbs_api_object *
+uapi_get_object(struct uverbs_api *uapi, u16 object_id)
+{
+ const struct uverbs_api_object *res;
+
+ if (object_id == UVERBS_IDR_ANY_OBJECT)
+ return ERR_PTR(-ENOMSG);
+
+ res = radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id));
+ if (!res)
+ return ERR_PTR(-ENOENT);
+
+ return res;
+}
+
+char *uapi_key_format(char *S, unsigned int key);
+struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev);
+void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev);
+void uverbs_disassociate_api(struct uverbs_api *uapi);
+void uverbs_destroy_api(struct uverbs_api *uapi);
+void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
+ unsigned int num_attrs);
+void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
+
+extern const struct uapi_definition uverbs_def_obj_async_fd[];
+extern const struct uapi_definition uverbs_def_obj_counters[];
+extern const struct uapi_definition uverbs_def_obj_cq[];
+extern const struct uapi_definition uverbs_def_obj_device[];
+extern const struct uapi_definition uverbs_def_obj_dm[];
+extern const struct uapi_definition uverbs_def_obj_dmah[];
+extern const struct uapi_definition uverbs_def_obj_flow_action[];
+extern const struct uapi_definition uverbs_def_obj_intf[];
+extern const struct uapi_definition uverbs_def_obj_mr[];
+extern const struct uapi_definition uverbs_def_obj_qp[];
+extern const struct uapi_definition uverbs_def_obj_srq[];
+extern const struct uapi_definition uverbs_def_obj_wq[];
+extern const struct uapi_definition uverbs_def_write_intf[];
+
+static inline const struct uverbs_api_write_method *
+uapi_get_method(const struct uverbs_api *uapi, u32 command)
+{
+ u32 cmd_idx = command & IB_USER_VERBS_CMD_COMMAND_MASK;
+
+ if (command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
+ return ERR_PTR(-EINVAL);
+
+ if (command & IB_USER_VERBS_CMD_FLAG_EXTENDED) {
+ if (cmd_idx >= uapi->num_write_ex)
+ return ERR_PTR(-EOPNOTSUPP);
+ return uapi->write_ex_methods[cmd_idx];
+ }
+
+ if (cmd_idx >= uapi->num_write)
+ return ERR_PTR(-EOPNOTSUPP);
+ return uapi->write_methods[cmd_idx];
+}
+
+void uverbs_fill_udata(struct uverbs_attr_bundle *bundle,
+ struct ib_udata *udata, unsigned int attr_in,
+ unsigned int attr_out);
+
+#endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
new file mode 100644
index 000000000000..b097cfcade1c
--- /dev/null
+++ b/drivers/infiniband/core/restrack.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include <rdma/rdma_cm.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/restrack.h>
+#include <rdma/rdma_counter.h>
+#include <linux/mutex.h>
+#include <linux/sched/task.h>
+#include <linux/pid_namespace.h>
+
+#include "cma_priv.h"
+#include "restrack.h"
+
+/**
+ * rdma_restrack_init() - initialize and allocate resource tracking
+ * @dev: IB device
+ *
+ * Return: 0 on success
+ */
+int rdma_restrack_init(struct ib_device *dev)
+{
+ struct rdma_restrack_root *rt;
+ int i;
+
+ dev->res = kcalloc(RDMA_RESTRACK_MAX, sizeof(*rt), GFP_KERNEL);
+ if (!dev->res)
+ return -ENOMEM;
+
+ rt = dev->res;
+
+ for (i = 0; i < RDMA_RESTRACK_MAX; i++)
+ xa_init_flags(&rt[i].xa, XA_FLAGS_ALLOC);
+
+ return 0;
+}
+
+/**
+ * rdma_restrack_clean() - clean resource tracking
+ * @dev: IB device
+ */
+void rdma_restrack_clean(struct ib_device *dev)
+{
+ struct rdma_restrack_root *rt = dev->res;
+ int i;
+
+ for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) {
+ struct xarray *xa = &dev->res[i].xa;
+
+ WARN_ON(!xa_empty(xa));
+ xa_destroy(xa);
+ }
+ kfree(rt);
+}
+
+/**
+ * rdma_restrack_count() - the current usage of specific object
+ * @dev: IB device
+ * @type: actual type of object to operate
+ * @show_details: count driver specific objects
+ */
+int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
+ bool show_details)
+{
+ struct rdma_restrack_root *rt = &dev->res[type];
+ struct rdma_restrack_entry *e;
+ XA_STATE(xas, &rt->xa, 0);
+ u32 cnt = 0;
+
+ xa_lock(&rt->xa);
+ xas_for_each(&xas, e, U32_MAX) {
+ if (xa_get_mark(&rt->xa, e->id, RESTRACK_DD) && !show_details)
+ continue;
+ cnt++;
+ }
+ xa_unlock(&rt->xa);
+ return cnt;
+}
+EXPORT_SYMBOL(rdma_restrack_count);
+
+static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
+{
+ switch (res->type) {
+ case RDMA_RESTRACK_PD:
+ return container_of(res, struct ib_pd, res)->device;
+ case RDMA_RESTRACK_CQ:
+ return container_of(res, struct ib_cq, res)->device;
+ case RDMA_RESTRACK_QP:
+ return container_of(res, struct ib_qp, res)->device;
+ case RDMA_RESTRACK_CM_ID:
+ return container_of(res, struct rdma_id_private,
+ res)->id.device;
+ case RDMA_RESTRACK_MR:
+ return container_of(res, struct ib_mr, res)->device;
+ case RDMA_RESTRACK_CTX:
+ return container_of(res, struct ib_ucontext, res)->device;
+ case RDMA_RESTRACK_COUNTER:
+ return container_of(res, struct rdma_counter, res)->device;
+ case RDMA_RESTRACK_SRQ:
+ return container_of(res, struct ib_srq, res)->device;
+ case RDMA_RESTRACK_DMAH:
+ return container_of(res, struct ib_dmah, res)->device;
+ default:
+ WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
+ return NULL;
+ }
+}
+
+/**
+ * rdma_restrack_attach_task() - attach the task onto this resource,
+ * valid for user space restrack entries.
+ * @res: resource entry
+ * @task: the task to attach
+ */
+static void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
+ struct task_struct *task)
+{
+ if (WARN_ON_ONCE(!task))
+ return;
+
+ if (res->task)
+ put_task_struct(res->task);
+ get_task_struct(task);
+ res->task = task;
+ res->user = true;
+}
+
+/**
+ * rdma_restrack_set_name() - set the task for this resource
+ * @res: resource entry
+ * @caller: kernel name, the current task will be used if the caller is NULL.
+ */
+void rdma_restrack_set_name(struct rdma_restrack_entry *res, const char *caller)
+{
+ if (caller) {
+ res->kern_name = caller;
+ return;
+ }
+
+ rdma_restrack_attach_task(res, current);
+}
+EXPORT_SYMBOL(rdma_restrack_set_name);
+
+/**
+ * rdma_restrack_parent_name() - set the restrack name properties based
+ * on parent restrack
+ * @dst: destination resource entry
+ * @parent: parent resource entry
+ */
+void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
+ const struct rdma_restrack_entry *parent)
+{
+ if (rdma_is_kernel_res(parent))
+ dst->kern_name = parent->kern_name;
+ else
+ rdma_restrack_attach_task(dst, parent->task);
+}
+EXPORT_SYMBOL(rdma_restrack_parent_name);
+
+/**
+ * rdma_restrack_new() - Initializes new restrack entry to allow _put() interface
+ * to release memory in fully automatic way.
+ * @res: Entry to initialize
+ * @type: REstrack type
+ */
+void rdma_restrack_new(struct rdma_restrack_entry *res,
+ enum rdma_restrack_type type)
+{
+ kref_init(&res->kref);
+ init_completion(&res->comp);
+ res->type = type;
+}
+EXPORT_SYMBOL(rdma_restrack_new);
+
+/**
+ * rdma_restrack_add() - add object to the resource tracking database
+ * @res: resource entry
+ */
+void rdma_restrack_add(struct rdma_restrack_entry *res)
+{
+ struct ib_device *dev = res_to_dev(res);
+ struct rdma_restrack_root *rt;
+ int ret = 0;
+
+ if (!dev)
+ return;
+
+ if (res->no_track)
+ goto out;
+
+ rt = &dev->res[res->type];
+
+ if (res->type == RDMA_RESTRACK_QP) {
+ /* Special case to ensure that LQPN points to right QP */
+ struct ib_qp *qp = container_of(res, struct ib_qp, res);
+
+ WARN_ONCE(qp->qp_num >> 24 || qp->port >> 8,
+ "QP number 0x%0X and port 0x%0X", qp->qp_num,
+ qp->port);
+ res->id = qp->qp_num;
+ if (qp->qp_type == IB_QPT_SMI || qp->qp_type == IB_QPT_GSI)
+ res->id |= qp->port << 24;
+ ret = xa_insert(&rt->xa, res->id, res, GFP_KERNEL);
+ if (ret)
+ res->id = 0;
+
+ if (qp->qp_type >= IB_QPT_DRIVER)
+ xa_set_mark(&rt->xa, res->id, RESTRACK_DD);
+ } else if (res->type == RDMA_RESTRACK_COUNTER) {
+ /* Special case to ensure that cntn points to right counter */
+ struct rdma_counter *counter;
+
+ counter = container_of(res, struct rdma_counter, res);
+ ret = xa_insert(&rt->xa, counter->id, res, GFP_KERNEL);
+ res->id = ret ? 0 : counter->id;
+ } else {
+ ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b,
+ &rt->next_id, GFP_KERNEL);
+ ret = (ret < 0) ? ret : 0;
+ }
+
+out:
+ if (!ret)
+ res->valid = true;
+}
+EXPORT_SYMBOL(rdma_restrack_add);
+
+int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
+{
+ return kref_get_unless_zero(&res->kref);
+}
+EXPORT_SYMBOL(rdma_restrack_get);
+
+/**
+ * rdma_restrack_get_byid() - translate from ID to restrack object
+ * @dev: IB device
+ * @type: resource track type
+ * @id: ID to take a look
+ *
+ * Return: Pointer to restrack entry or -ENOENT in case of error.
+ */
+struct rdma_restrack_entry *
+rdma_restrack_get_byid(struct ib_device *dev,
+ enum rdma_restrack_type type, u32 id)
+{
+ struct rdma_restrack_root *rt = &dev->res[type];
+ struct rdma_restrack_entry *res;
+
+ xa_lock(&rt->xa);
+ res = xa_load(&rt->xa, id);
+ if (!res || !rdma_restrack_get(res))
+ res = ERR_PTR(-ENOENT);
+ xa_unlock(&rt->xa);
+
+ return res;
+}
+EXPORT_SYMBOL(rdma_restrack_get_byid);
+
+static void restrack_release(struct kref *kref)
+{
+ struct rdma_restrack_entry *res;
+
+ res = container_of(kref, struct rdma_restrack_entry, kref);
+ if (res->task) {
+ put_task_struct(res->task);
+ res->task = NULL;
+ }
+ complete(&res->comp);
+}
+
+int rdma_restrack_put(struct rdma_restrack_entry *res)
+{
+ return kref_put(&res->kref, restrack_release);
+}
+EXPORT_SYMBOL(rdma_restrack_put);
+
+/**
+ * rdma_restrack_del() - delete object from the resource tracking database
+ * @res: resource entry
+ */
+void rdma_restrack_del(struct rdma_restrack_entry *res)
+{
+ struct rdma_restrack_entry *old;
+ struct rdma_restrack_root *rt;
+ struct ib_device *dev;
+
+ if (!res->valid) {
+ if (res->task) {
+ put_task_struct(res->task);
+ res->task = NULL;
+ }
+ return;
+ }
+
+ if (res->no_track)
+ goto out;
+
+ dev = res_to_dev(res);
+ if (WARN_ON(!dev))
+ return;
+
+ rt = &dev->res[res->type];
+
+ old = xa_erase(&rt->xa, res->id);
+ WARN_ON(old != res);
+
+out:
+ res->valid = false;
+ rdma_restrack_put(res);
+ wait_for_completion(&res->comp);
+}
+EXPORT_SYMBOL(rdma_restrack_del);
diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h
new file mode 100644
index 000000000000..6a04fc41f738
--- /dev/null
+++ b/drivers/infiniband/core/restrack.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_CORE_RESTRACK_H_
+#define _RDMA_CORE_RESTRACK_H_
+
+#include <linux/mutex.h>
+
+/**
+ * struct rdma_restrack_root - main resource tracking management
+ * entity, per-device
+ */
+struct rdma_restrack_root {
+ /**
+ * @xa: Array of XArray structure to hold restrack entries.
+ */
+ struct xarray xa;
+ /**
+ * @next_id: Next ID to support cyclic allocation
+ */
+ u32 next_id;
+};
+
+int rdma_restrack_init(struct ib_device *dev);
+void rdma_restrack_clean(struct ib_device *dev);
+void rdma_restrack_add(struct rdma_restrack_entry *res);
+void rdma_restrack_del(struct rdma_restrack_entry *res);
+void rdma_restrack_new(struct rdma_restrack_entry *res,
+ enum rdma_restrack_type type);
+void rdma_restrack_set_name(struct rdma_restrack_entry *res,
+ const char *caller);
+void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
+ const struct rdma_restrack_entry *parent);
+#endif /* _RDMA_CORE_RESTRACK_H_ */
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 0621f4455732..a9f2c6b1b29e 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -42,6 +42,8 @@
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
+static struct workqueue_struct *gid_cache_wq;
+
enum gid_op_type {
GID_DEL = 0,
GID_ADD
@@ -68,7 +70,7 @@ struct netdev_event_work {
};
static const struct {
- bool (*is_supported)(const struct ib_device *device, u8 port_num);
+ bool (*is_supported)(const struct ib_device *device, u32 port_num);
enum ib_gid_type gid_type;
} PORT_CAP_TO_GID_TYPE[] = {
{rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
@@ -77,7 +79,7 @@ static const struct {
#define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
-unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u32 port)
{
int i;
unsigned int ret_flags = 0;
@@ -94,7 +96,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
EXPORT_SYMBOL(roce_gid_type_mask_support);
static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
- u8 port, union ib_gid *gid,
+ u32 port, union ib_gid *gid,
struct ib_gid_attr *gid_attr)
{
int i;
@@ -141,22 +143,22 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
#define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \
BONDING_SLAVE_STATE_NA)
-static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
- struct net_device *rdma_ndev, void *cookie)
+static bool
+is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u32 port,
+ struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
struct net_device *real_dev;
- int res;
+ bool res;
if (!rdma_ndev)
- return 0;
+ return false;
rcu_read_lock();
- real_dev = rdma_vlan_dev_real_dev(event_ndev);
+ real_dev = rdma_vlan_dev_real_dev(cookie);
if (!real_dev)
- real_dev = event_ndev;
+ real_dev = cookie;
- res = ((rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+ res = ((rdma_is_upper_dev_rcu(rdma_ndev, cookie) &&
(is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
REQUIRED_BOND_STATES)) ||
real_dev == rdma_ndev);
@@ -165,14 +167,15 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
return res;
}
-static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port,
- struct net_device *rdma_ndev, void *cookie)
+static bool
+is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u32 port,
+ struct net_device *rdma_ndev, void *cookie)
{
struct net_device *master_dev;
- int res;
+ bool res;
if (!rdma_ndev)
- return 0;
+ return false;
rcu_read_lock();
master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev);
@@ -183,34 +186,102 @@ static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port,
return res;
}
-static int pass_all_filter(struct ib_device *ib_dev, u8 port,
- struct net_device *rdma_ndev, void *cookie)
+/**
+ * is_ndev_for_default_gid_filter - Check if a given netdevice
+ * can be considered for default GIDs or not.
+ * @ib_dev: IB device to check
+ * @port: Port to consider for adding default GID
+ * @rdma_ndev: rdma netdevice pointer
+ * @cookie: Netdevice to consider to form a default GID
+ *
+ * is_ndev_for_default_gid_filter() returns true if a given netdevice can be
+ * considered for deriving default RoCE GID, returns false otherwise.
+ */
+static bool
+is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u32 port,
+ struct net_device *rdma_ndev, void *cookie)
+{
+ struct net_device *cookie_ndev = cookie;
+ bool res;
+
+ if (!rdma_ndev)
+ return false;
+
+ rcu_read_lock();
+
+ /*
+ * When rdma netdevice is used in bonding, bonding master netdevice
+ * should be considered for default GIDs. Therefore, ignore slave rdma
+ * netdevices when bonding is considered.
+ * Additionally when event(cookie) netdevice is bond master device,
+ * make sure that it the upper netdevice of rdma netdevice.
+ */
+ res = ((cookie_ndev == rdma_ndev && !netif_is_bond_slave(rdma_ndev)) ||
+ (netif_is_bond_master(cookie_ndev) &&
+ rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev)));
+
+ rcu_read_unlock();
+ return res;
+}
+
+static bool pass_all_filter(struct ib_device *ib_dev, u32 port,
+ struct net_device *rdma_ndev, void *cookie)
{
- return 1;
+ return true;
}
-static int upper_device_filter(struct ib_device *ib_dev, u8 port,
- struct net_device *rdma_ndev, void *cookie)
+static bool upper_device_filter(struct ib_device *ib_dev, u32 port,
+ struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
- int res;
+ bool res;
if (!rdma_ndev)
- return 0;
+ return false;
- if (rdma_ndev == event_ndev)
- return 1;
+ if (rdma_ndev == cookie)
+ return true;
rcu_read_lock();
- res = rdma_is_upper_dev_rcu(rdma_ndev, event_ndev);
+ res = rdma_is_upper_dev_rcu(rdma_ndev, cookie);
rcu_read_unlock();
return res;
}
+/**
+ * is_upper_ndev_bond_master_filter - Check if a given netdevice
+ * is bond master device of netdevice of the RDMA device of port.
+ * @ib_dev: IB device to check
+ * @port: Port to consider for adding default GID
+ * @rdma_ndev: Pointer to rdma netdevice
+ * @cookie: Netdevice to consider to form a default GID
+ *
+ * is_upper_ndev_bond_master_filter() returns true if a cookie_netdev
+ * is bond master device and rdma_ndev is its lower netdevice. It might
+ * not have been established as slave device yet.
+ */
+static bool
+is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u32 port,
+ struct net_device *rdma_ndev,
+ void *cookie)
+{
+ struct net_device *cookie_ndev = cookie;
+ bool match = false;
+
+ if (!rdma_ndev)
+ return false;
+
+ rcu_read_lock();
+ if (netif_is_bond_master(cookie_ndev) &&
+ rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev))
+ match = true;
+ rcu_read_unlock();
+ return match;
+}
+
static void update_gid_ip(enum gid_op_type gid_op,
struct ib_device *ib_dev,
- u8 port, struct net_device *ndev,
+ u32 port, struct net_device *ndev,
struct sockaddr *addr)
{
union ib_gid gid;
@@ -223,36 +294,13 @@ static void update_gid_ip(enum gid_op_type gid_op,
update_gid(gid_op, ib_dev, port, &gid, &gid_attr);
}
-static void enum_netdev_default_gids(struct ib_device *ib_dev,
- u8 port, struct net_device *event_ndev,
- struct net_device *rdma_ndev)
-{
- unsigned long gid_type_mask;
-
- rcu_read_lock();
- if (!rdma_ndev ||
- ((rdma_ndev != event_ndev &&
- !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
- is_eth_active_slave_of_bonding_rcu(rdma_ndev,
- netdev_master_upper_dev_get_rcu(rdma_ndev)) ==
- BONDING_SLAVE_STATE_INACTIVE)) {
- rcu_read_unlock();
- return;
- }
- rcu_read_unlock();
-
- gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
-
- ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask,
- IB_CACHE_GID_DEFAULT_MODE_SET);
-}
-
static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
- u8 port,
- struct net_device *event_ndev,
- struct net_device *rdma_ndev)
+ u32 port,
+ struct net_device *rdma_ndev,
+ struct net_device *event_ndev)
{
struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev);
+ unsigned long gid_type_mask;
if (!rdma_ndev)
return;
@@ -262,26 +310,28 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
rcu_read_lock();
- if (rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
- is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) ==
- BONDING_SLAVE_STATE_INACTIVE) {
- unsigned long gid_type_mask;
-
+ if (((rdma_ndev != event_ndev &&
+ !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
+ is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev)
+ ==
+ BONDING_SLAVE_STATE_INACTIVE)) {
rcu_read_unlock();
+ return;
+ }
+
+ rcu_read_unlock();
- gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+ gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
- ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
- gid_type_mask,
- IB_CACHE_GID_DEFAULT_MODE_DELETE);
- } else {
- rcu_read_unlock();
- }
+ ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+ gid_type_mask,
+ IB_CACHE_GID_DEFAULT_MODE_DELETE);
}
static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
- u8 port, struct net_device *ndev)
+ u32 port, struct net_device *ndev)
{
+ const struct in_ifaddr *ifa;
struct in_device *in_dev;
struct sin_list {
struct list_head list;
@@ -301,7 +351,7 @@ static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
return;
}
- for_ifa(in_dev) {
+ in_dev_for_each_ifa_rcu(ifa, in_dev) {
struct sin_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry)
@@ -311,7 +361,7 @@ static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
entry->ip.sin_addr.s_addr = ifa->ifa_address;
list_add_tail(&entry->list, &sin_list);
}
- endfor_ifa(in_dev);
+
rcu_read_unlock();
list_for_each_entry_safe(sin_iter, sin_temp, &sin_list, list) {
@@ -323,7 +373,7 @@ static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
}
static void enum_netdev_ipv6_ips(struct ib_device *ib_dev,
- u8 port, struct net_device *ndev)
+ u32 port, struct net_device *ndev)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *in6_dev;
@@ -368,7 +418,7 @@ static void enum_netdev_ipv6_ips(struct ib_device *ib_dev,
}
}
-static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
+static void _add_netdev_ips(struct ib_device *ib_dev, u32 port,
struct net_device *ndev)
{
enum_netdev_ipv4_ips(ib_dev, port, ndev);
@@ -376,25 +426,52 @@ static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
enum_netdev_ipv6_ips(ib_dev, port, ndev);
}
-static void add_netdev_ips(struct ib_device *ib_dev, u8 port,
+static void add_netdev_ips(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- enum_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev);
- _add_netdev_ips(ib_dev, port, event_ndev);
+ _add_netdev_ips(ib_dev, port, cookie);
}
-static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
+static void del_netdev_ips(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
+ ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie);
+}
+
+/**
+ * del_default_gids - Delete default GIDs of the event/cookie netdevice
+ * @ib_dev: RDMA device pointer
+ * @port: Port of the RDMA device whose GID table to consider
+ * @rdma_ndev: Unused rdma netdevice
+ * @cookie: Pointer to event netdevice
+ *
+ * del_default_gids() deletes the default GIDs of the event/cookie netdevice.
+ */
+static void del_default_gids(struct ib_device *ib_dev, u32 port,
+ struct net_device *rdma_ndev, void *cookie)
+{
+ struct net_device *cookie_ndev = cookie;
+ unsigned long gid_type_mask;
+
+ gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+ ib_cache_gid_set_default_gid(ib_dev, port, cookie_ndev, gid_type_mask,
+ IB_CACHE_GID_DEFAULT_MODE_DELETE);
+}
+
+static void add_default_gids(struct ib_device *ib_dev, u32 port,
+ struct net_device *rdma_ndev, void *cookie)
+{
+ struct net_device *event_ndev = cookie;
+ unsigned long gid_type_mask;
- ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
+ gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+ ib_cache_gid_set_default_gid(ib_dev, port, event_ndev, gid_type_mask,
+ IB_CACHE_GID_DEFAULT_MODE_SET);
}
static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
- u8 port,
+ u32 port,
struct net_device *rdma_ndev,
void *cookie)
{
@@ -405,25 +482,62 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
* our feet
*/
rtnl_lock();
+ down_read(&net_rwsem);
for_each_net(net)
- for_each_netdev(net, ndev)
- if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev))
- add_netdev_ips(ib_dev, port, rdma_ndev, ndev);
+ for_each_netdev(net, ndev) {
+ /*
+ * Filter and add default GIDs of the primary netdevice
+ * when not in bonding mode, or add default GIDs
+ * of bond master device, when in bonding mode.
+ */
+ if (is_ndev_for_default_gid_filter(ib_dev, port,
+ rdma_ndev, ndev))
+ add_default_gids(ib_dev, port, rdma_ndev, ndev);
+
+ if (is_eth_port_of_netdev_filter(ib_dev, port,
+ rdma_ndev, ndev))
+ _add_netdev_ips(ib_dev, port, ndev);
+ }
+ up_read(&net_rwsem);
rtnl_unlock();
}
-/* This function will rescan all of the network devices in the system
- * and add their gids, as needed, to the relevant RoCE devices. */
-int roce_rescan_device(struct ib_device *ib_dev)
+/**
+ * rdma_roce_rescan_device - Rescan all of the network devices in the system
+ * and add their gids, as needed, to the relevant RoCE devices.
+ *
+ * @ib_dev: the rdma device
+ */
+void rdma_roce_rescan_device(struct ib_device *ib_dev)
{
ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL,
enum_all_gids_of_dev_cb, NULL);
+}
+EXPORT_SYMBOL(rdma_roce_rescan_device);
- return 0;
+/**
+ * rdma_roce_rescan_port - Rescan all of the network devices in the system
+ * and add their gids if relevant to the port of the RoCE device.
+ *
+ * @ib_dev: IB device
+ * @port: Port number
+ */
+void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port)
+{
+ struct net_device *ndev = NULL;
+
+ if (rdma_protocol_roce(ib_dev, port)) {
+ ndev = ib_device_get_netdev(ib_dev, port);
+ if (!ndev)
+ return;
+ enum_all_gids_of_dev_cb(ib_dev, port, ndev, ndev);
+ dev_put(ndev);
+ }
}
+EXPORT_SYMBOL(rdma_roce_rescan_port);
static void callback_for_addr_gid_device_scan(struct ib_device *device,
- u8 port,
+ u32 port,
struct net_device *rdma_ndev,
void *cookie)
{
@@ -439,10 +553,11 @@ struct upper_list {
struct net_device *upper;
};
-static int netdev_upper_walk(struct net_device *upper, void *data)
+static int netdev_upper_walk(struct net_device *upper,
+ struct netdev_nested_priv *priv)
{
struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
- struct list_head *upper_list = data;
+ struct list_head *upper_list = (struct list_head *)priv->data;
if (!entry)
return 0;
@@ -454,19 +569,21 @@ static int netdev_upper_walk(struct net_device *upper, void *data)
return 0;
}
-static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
+static void handle_netdev_upper(struct ib_device *ib_dev, u32 port,
void *cookie,
void (*handle_netdev)(struct ib_device *ib_dev,
- u8 port,
+ u32 port,
struct net_device *ndev))
{
- struct net_device *ndev = (struct net_device *)cookie;
+ struct net_device *ndev = cookie;
+ struct netdev_nested_priv priv;
struct upper_list *upper_iter;
struct upper_list *upper_temp;
LIST_HEAD(upper_list);
+ priv.data = &upper_list;
rcu_read_lock();
- netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &upper_list);
+ netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &priv);
rcu_read_unlock();
handle_netdev(ib_dev, port, ndev);
@@ -479,25 +596,26 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
}
}
-static void _roce_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
- struct net_device *event_ndev)
+void roce_del_all_netdev_gids(struct ib_device *ib_dev,
+ u32 port, struct net_device *ndev)
{
- ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
+ ib_cache_gid_del_all_netdev_gids(ib_dev, port, ndev);
}
+EXPORT_SYMBOL(roce_del_all_netdev_gids);
-static void del_netdev_upper_ips(struct ib_device *ib_dev, u8 port,
+static void del_netdev_upper_ips(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
- handle_netdev_upper(ib_dev, port, cookie, _roce_del_all_netdev_gids);
+ handle_netdev_upper(ib_dev, port, cookie, roce_del_all_netdev_gids);
}
-static void add_netdev_upper_ips(struct ib_device *ib_dev, u8 port,
+static void add_netdev_upper_ips(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev, void *cookie)
{
handle_netdev_upper(ib_dev, port, cookie, _add_netdev_ips);
}
-static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
+static void del_netdev_default_ips_join(struct ib_device *ib_dev, u32 port,
struct net_device *rdma_ndev,
void *cookie)
{
@@ -505,25 +623,16 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
rcu_read_lock();
master_ndev = netdev_master_upper_dev_get_rcu(rdma_ndev);
- if (master_ndev)
- dev_hold(master_ndev);
+ dev_hold(master_ndev);
rcu_read_unlock();
if (master_ndev) {
- bond_delete_netdev_default_gids(ib_dev, port, master_ndev,
- rdma_ndev);
+ bond_delete_netdev_default_gids(ib_dev, port, rdma_ndev,
+ master_ndev);
dev_put(master_ndev);
}
}
-static void del_netdev_default_ips(struct ib_device *ib_dev, u8 port,
- struct net_device *rdma_ndev, void *cookie)
-{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- bond_delete_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev);
-}
-
/* The following functions operate on all IB devices. netdevice_event and
* addr_event execute ib_enum_all_roce_netdevs through a work.
* ib_enum_all_roce_netdevs iterates through all IB devices.
@@ -568,46 +677,100 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds,
}
INIT_WORK(&ndev_work->work, netdevice_event_work_handler);
- queue_work(ib_wq, &ndev_work->work);
+ queue_work(gid_cache_wq, &ndev_work->work);
return NOTIFY_DONE;
}
static const struct netdev_event_work_cmd add_cmd = {
- .cb = add_netdev_ips, .filter = is_eth_port_of_netdev};
+ .cb = add_netdev_ips,
+ .filter = is_eth_port_of_netdev_filter
+};
+
static const struct netdev_event_work_cmd add_cmd_upper_ips = {
- .cb = add_netdev_upper_ips, .filter = is_eth_port_of_netdev};
+ .cb = add_netdev_upper_ips,
+ .filter = is_eth_port_of_netdev_filter
+};
-static void netdevice_event_changeupper(struct netdev_notifier_changeupper_info *changeupper_info,
- struct netdev_event_work_cmd *cmds)
+static void
+ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info,
+ struct netdev_event_work_cmd *cmds)
{
- static const struct netdev_event_work_cmd upper_ips_del_cmd = {
- .cb = del_netdev_upper_ips, .filter = upper_device_filter};
- static const struct netdev_event_work_cmd bonding_default_del_cmd = {
- .cb = del_netdev_default_ips, .filter = is_eth_port_inactive_slave};
-
- if (changeupper_info->linking == false) {
- cmds[0] = upper_ips_del_cmd;
- cmds[0].ndev = changeupper_info->upper_dev;
- cmds[1] = add_cmd;
- } else {
- cmds[0] = bonding_default_del_cmd;
- cmds[0].ndev = changeupper_info->upper_dev;
- cmds[1] = add_cmd_upper_ips;
- cmds[1].ndev = changeupper_info->upper_dev;
- cmds[1].filter_ndev = changeupper_info->upper_dev;
- }
+ static const struct netdev_event_work_cmd
+ upper_ips_del_cmd = {
+ .cb = del_netdev_upper_ips,
+ .filter = upper_device_filter
+ };
+
+ cmds[0] = upper_ips_del_cmd;
+ cmds[0].ndev = changeupper_info->upper_dev;
+ cmds[1] = add_cmd;
}
+static const struct netdev_event_work_cmd bonding_default_add_cmd = {
+ .cb = add_default_gids,
+ .filter = is_upper_ndev_bond_master_filter
+};
+
+static void
+ndev_event_link(struct net_device *event_ndev,
+ struct netdev_notifier_changeupper_info *changeupper_info,
+ struct netdev_event_work_cmd *cmds)
+{
+ static const struct netdev_event_work_cmd
+ bonding_default_del_cmd = {
+ .cb = del_default_gids,
+ .filter = is_upper_ndev_bond_master_filter
+ };
+ /*
+ * When a lower netdev is linked to its upper bonding
+ * netdev, delete lower slave netdev's default GIDs.
+ */
+ cmds[0] = bonding_default_del_cmd;
+ cmds[0].ndev = event_ndev;
+ cmds[0].filter_ndev = changeupper_info->upper_dev;
+
+ /* Now add bonding upper device default GIDs */
+ cmds[1] = bonding_default_add_cmd;
+ cmds[1].ndev = changeupper_info->upper_dev;
+ cmds[1].filter_ndev = changeupper_info->upper_dev;
+
+ /* Now add bonding upper device IP based GIDs */
+ cmds[2] = add_cmd_upper_ips;
+ cmds[2].ndev = changeupper_info->upper_dev;
+ cmds[2].filter_ndev = changeupper_info->upper_dev;
+}
+
+static void netdevice_event_changeupper(struct net_device *event_ndev,
+ struct netdev_notifier_changeupper_info *changeupper_info,
+ struct netdev_event_work_cmd *cmds)
+{
+ if (changeupper_info->linking)
+ ndev_event_link(event_ndev, changeupper_info, cmds);
+ else
+ ndev_event_unlink(changeupper_info, cmds);
+}
+
+static const struct netdev_event_work_cmd add_default_gid_cmd = {
+ .cb = add_default_gids,
+ .filter = is_ndev_for_default_gid_filter,
+};
+
static int netdevice_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
static const struct netdev_event_work_cmd del_cmd = {
.cb = del_netdev_ips, .filter = pass_all_filter};
- static const struct netdev_event_work_cmd bonding_default_del_cmd_join = {
- .cb = del_netdev_default_ips_join, .filter = is_eth_port_inactive_slave};
- static const struct netdev_event_work_cmd default_del_cmd = {
- .cb = del_netdev_default_ips, .filter = pass_all_filter};
+ static const struct netdev_event_work_cmd
+ bonding_default_del_cmd_join = {
+ .cb = del_netdev_default_ips_join,
+ .filter = is_eth_port_inactive_slave_filter
+ };
+ static const struct netdev_event_work_cmd
+ netdev_del_cmd = {
+ .cb = del_netdev_ips,
+ .filter = is_eth_port_of_netdev_filter
+ };
static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = {
.cb = del_netdev_upper_ips, .filter = upper_device_filter};
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
@@ -620,7 +783,8 @@ static int netdevice_event(struct notifier_block *this, unsigned long event,
case NETDEV_REGISTER:
case NETDEV_UP:
cmds[0] = bonding_default_del_cmd_join;
- cmds[1] = add_cmd;
+ cmds[1] = add_default_gid_cmd;
+ cmds[2] = add_cmd;
break;
case NETDEV_UNREGISTER:
@@ -631,19 +795,24 @@ static int netdevice_event(struct notifier_block *this, unsigned long event,
break;
case NETDEV_CHANGEADDR:
- cmds[0] = default_del_cmd;
- cmds[1] = add_cmd;
+ cmds[0] = netdev_del_cmd;
+ if (ndev->reg_state == NETREG_REGISTERED) {
+ cmds[1] = add_default_gid_cmd;
+ cmds[2] = add_cmd;
+ }
break;
case NETDEV_CHANGEUPPER:
- netdevice_event_changeupper(
+ netdevice_event_changeupper(ndev,
container_of(ptr, struct netdev_notifier_changeupper_info, info),
cmds);
break;
case NETDEV_BONDING_FAILOVER:
cmds[0] = bonding_event_ips_del_cmd;
- cmds[1] = bonding_default_del_cmd_join;
+ /* Add default GIDs of the bond device */
+ cmds[1] = bonding_default_add_cmd;
+ /* Add IP based GIDs of the bond device */
cmds[2] = add_cmd_upper_ips;
break;
@@ -659,7 +828,8 @@ static void update_gid_event_work_handler(struct work_struct *_work)
struct update_gid_event_work *work =
container_of(_work, struct update_gid_event_work, work);
- ib_enum_all_roce_netdevs(is_eth_port_of_netdev, work->gid_attr.ndev,
+ ib_enum_all_roce_netdevs(is_eth_port_of_netdev_filter,
+ work->gid_attr.ndev,
callback_for_addr_gid_device_scan, work);
dev_put(work->gid_attr.ndev);
@@ -701,7 +871,7 @@ static int addr_event(struct notifier_block *this, unsigned long event,
dev_hold(ndev);
work->gid_attr.ndev = ndev;
- queue_work(ib_wq, &work->work);
+ queue_work(gid_cache_wq, &work->work);
return NOTIFY_DONE;
}
@@ -748,6 +918,10 @@ static struct notifier_block nb_inet6addr = {
int __init roce_gid_mgmt_init(void)
{
+ gid_cache_wq = alloc_ordered_workqueue("gid-cache-wq", 0);
+ if (!gid_cache_wq)
+ return -ENOMEM;
+
register_inetaddr_notifier(&nb_inetaddr);
if (IS_ENABLED(CONFIG_IPV6))
register_inet6addr_notifier(&nb_inet6addr);
@@ -772,4 +946,5 @@ void __exit roce_gid_mgmt_cleanup(void)
* ib-core is removed, all physical devices have been removed,
* so no issue with remaining hardware contexts.
*/
+ destroy_workqueue(gid_cache_wq);
}
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index dbfd854c32c9..6354ddf2a274 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -1,17 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
+#include <linux/memremap.h>
#include <linux/moduleparam.h>
#include <linux/slab.h>
+#include <linux/pci-p2pdma.h>
#include <rdma/mr_pool.h>
#include <rdma/rw.h>
@@ -27,14 +21,17 @@ module_param_named(force_mr, rdma_rw_force_mr, bool, 0);
MODULE_PARM_DESC(force_mr, "Force usage of MRs for RDMA READ/WRITE operations");
/*
- * Check if the device might use memory registration. This is currently only
- * true for iWarp devices. In the future we can hopefully fine tune this based
- * on HCA driver input.
+ * Report whether memory registration should be used. Memory registration must
+ * be used for iWarp devices because of iWARP-specific limitations. Memory
+ * registration is also enabled if registering memory might yield better
+ * performance than using multiple SGE entries, see rdma_rw_io_needs_mr()
*/
-static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
+static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u32 port_num)
{
if (rdma_protocol_iwarp(dev, port_num))
return true;
+ if (dev->attrs.max_sgl_rd)
+ return true;
if (unlikely(rdma_rw_force_mr))
return true;
return false;
@@ -42,34 +39,61 @@ static inline bool rdma_rw_can_use_mr(struct ib_device *dev, u8 port_num)
/*
* Check if the device will use memory registration for this RW operation.
- * We currently always use memory registrations for iWarp RDMA READs, and
- * have a debug option to force usage of MRs.
- *
- * XXX: In the future we can hopefully fine tune this based on HCA driver
- * input.
+ * For RDMA READs we must use MRs on iWarp and can optionally use them as an
+ * optimization otherwise. Additionally we have a debug option to force usage
+ * of MRs to help testing this code path.
*/
-static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num,
+static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u32 port_num,
enum dma_data_direction dir, int dma_nents)
{
- if (rdma_protocol_iwarp(dev, port_num) && dir == DMA_FROM_DEVICE)
- return true;
+ if (dir == DMA_FROM_DEVICE) {
+ if (rdma_protocol_iwarp(dev, port_num))
+ return true;
+ if (dev->attrs.max_sgl_rd && dma_nents > dev->attrs.max_sgl_rd)
+ return true;
+ }
if (unlikely(rdma_rw_force_mr))
return true;
return false;
}
-static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev)
+static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev,
+ bool pi_support)
{
+ u32 max_pages;
+
+ if (pi_support)
+ max_pages = dev->attrs.max_pi_fast_reg_page_list_len;
+ else
+ max_pages = dev->attrs.max_fast_reg_page_list_len;
+
/* arbitrary limit to avoid allocating gigantic resources */
- return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256);
+ return min_t(u32, max_pages, 256);
+}
+
+static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg)
+{
+ int count = 0;
+
+ if (reg->mr->need_inval) {
+ reg->inv_wr.opcode = IB_WR_LOCAL_INV;
+ reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey;
+ reg->inv_wr.next = &reg->reg_wr.wr;
+ count++;
+ } else {
+ reg->inv_wr.next = NULL;
+ }
+
+ return count;
}
/* Caller must have zero-initialized *reg. */
-static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
+static int rdma_rw_init_one_mr(struct ib_qp *qp, u32 port_num,
struct rdma_rw_reg_ctx *reg, struct scatterlist *sg,
u32 sg_cnt, u32 offset)
{
- u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
+ qp->integrity_en);
u32 nents = min(sg_cnt, pages_per_mr);
int count = 0, ret;
@@ -77,17 +101,10 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
if (!reg->mr)
return -EAGAIN;
- if (reg->mr->need_inval) {
- reg->inv_wr.opcode = IB_WR_LOCAL_INV;
- reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey;
- reg->inv_wr.next = &reg->reg_wr.wr;
- count++;
- } else {
- reg->inv_wr.next = NULL;
- }
+ count += rdma_rw_inv_key(reg);
ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE);
- if (ret < nents) {
+ if (ret < 0 || ret < nents) {
ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr);
return -EINVAL;
}
@@ -105,14 +122,15 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
}
static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
- u8 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
+ u32 port_num, struct scatterlist *sg, u32 sg_cnt, u32 offset,
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
{
struct rdma_rw_reg_ctx *prev = NULL;
- u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
+ qp->integrity_en);
int i, j, ret = 0, count = 0;
- ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr;
+ ctx->nr_ops = DIV_ROUND_UP(sg_cnt, pages_per_mr);
ctx->reg = kcalloc(ctx->nr_ops, sizeof(*ctx->reg), GFP_KERNEL);
if (!ctx->reg) {
ret = -ENOMEM;
@@ -178,7 +196,6 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
struct scatterlist *sg, u32 sg_cnt, u32 offset,
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
{
- struct ib_device *dev = qp->pd->device;
u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge :
qp->max_read_sge;
struct ib_sge *sge;
@@ -208,8 +225,8 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
rdma_wr->wr.sg_list = sge;
for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) {
- sge->addr = ib_sg_dma_address(dev, sg) + offset;
- sge->length = ib_sg_dma_len(dev, sg) - offset;
+ sge->addr = sg_dma_address(sg) + offset;
+ sge->length = sg_dma_len(sg) - offset;
sge->lkey = qp->pd->local_dma_lkey;
total_len += sge->length;
@@ -235,14 +252,13 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey,
enum dma_data_direction dir)
{
- struct ib_device *dev = qp->pd->device;
struct ib_rdma_wr *rdma_wr = &ctx->single.wr;
ctx->nr_ops = 1;
ctx->single.sge.lkey = qp->pd->local_dma_lkey;
- ctx->single.sge.addr = ib_sg_dma_address(dev, sg) + offset;
- ctx->single.sge.length = ib_sg_dma_len(dev, sg) - offset;
+ ctx->single.sge.addr = sg_dma_address(sg) + offset;
+ ctx->single.sge.length = sg_dma_len(sg) - offset;
memset(rdma_wr, 0, sizeof(*rdma_wr));
if (dir == DMA_TO_DEVICE)
@@ -273,23 +289,27 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
* Returns the number of WQEs that will be needed on the workqueue if
* successful, or a negative error code.
*/
-int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
{
struct ib_device *dev = qp->pd->device;
+ struct sg_table sgt = {
+ .sgl = sg,
+ .orig_nents = sg_cnt,
+ };
int ret;
- ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
- if (!ret)
- return -ENOMEM;
- sg_cnt = ret;
+ ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0);
+ if (ret)
+ return ret;
+ sg_cnt = sgt.nents;
/*
* Skip to the S/G entry that sg_offset falls into:
*/
for (;;) {
- u32 len = ib_sg_dma_len(dev, sg);
+ u32 len = sg_dma_len(sg);
if (sg_offset < len)
break;
@@ -319,13 +339,13 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
return ret;
out_unmap_sg:
- ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
+ ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0);
return ret;
}
EXPORT_SYMBOL(rdma_rw_ctx_init);
/**
- * rdma_rw_ctx_signature init - initialize a RW context with signature offload
+ * rdma_rw_ctx_signature_init - initialize a RW context with signature offload
* @ctx: context to initialize
* @qp: queue pair to operate on
* @port_num: port num to which the connection is bound
@@ -342,100 +362,85 @@ EXPORT_SYMBOL(rdma_rw_ctx_init);
* successful, or a negative error code.
*/
int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
- u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+ u32 port_num, struct scatterlist *sg, u32 sg_cnt,
struct scatterlist *prot_sg, u32 prot_sg_cnt,
struct ib_sig_attrs *sig_attrs,
u64 remote_addr, u32 rkey, enum dma_data_direction dir)
{
struct ib_device *dev = qp->pd->device;
- u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device);
+ u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device,
+ qp->integrity_en);
+ struct sg_table sgt = {
+ .sgl = sg,
+ .orig_nents = sg_cnt,
+ };
+ struct sg_table prot_sgt = {
+ .sgl = prot_sg,
+ .orig_nents = prot_sg_cnt,
+ };
struct ib_rdma_wr *rdma_wr;
- struct ib_send_wr *prev_wr = NULL;
int count = 0, ret;
if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) {
- pr_err("SG count too large\n");
+ pr_err("SG count too large: sg_cnt=%u, prot_sg_cnt=%u, pages_per_mr=%u\n",
+ sg_cnt, prot_sg_cnt, pages_per_mr);
return -EINVAL;
}
- ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
- if (!ret)
- return -ENOMEM;
- sg_cnt = ret;
+ ret = ib_dma_map_sgtable_attrs(dev, &sgt, dir, 0);
+ if (ret)
+ return ret;
- ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir);
- if (!ret) {
- ret = -ENOMEM;
- goto out_unmap_sg;
+ if (prot_sg_cnt) {
+ ret = ib_dma_map_sgtable_attrs(dev, &prot_sgt, dir, 0);
+ if (ret)
+ goto out_unmap_sg;
}
- prot_sg_cnt = ret;
ctx->type = RDMA_RW_SIG_MR;
ctx->nr_ops = 1;
- ctx->sig = kcalloc(1, sizeof(*ctx->sig), GFP_KERNEL);
- if (!ctx->sig) {
+ ctx->reg = kzalloc(sizeof(*ctx->reg), GFP_KERNEL);
+ if (!ctx->reg) {
ret = -ENOMEM;
goto out_unmap_prot_sg;
}
- ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->data, sg, sg_cnt, 0);
- if (ret < 0)
- goto out_free_ctx;
- count += ret;
- prev_wr = &ctx->sig->data.reg_wr.wr;
-
- if (prot_sg_cnt) {
- ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot,
- prot_sg, prot_sg_cnt, 0);
- if (ret < 0)
- goto out_destroy_data_mr;
- count += ret;
-
- if (ctx->sig->prot.inv_wr.next)
- prev_wr->next = &ctx->sig->prot.inv_wr;
- else
- prev_wr->next = &ctx->sig->prot.reg_wr.wr;
- prev_wr = &ctx->sig->prot.reg_wr.wr;
- } else {
- ctx->sig->prot.mr = NULL;
- }
-
- ctx->sig->sig_mr = ib_mr_pool_get(qp, &qp->sig_mrs);
- if (!ctx->sig->sig_mr) {
+ ctx->reg->mr = ib_mr_pool_get(qp, &qp->sig_mrs);
+ if (!ctx->reg->mr) {
ret = -EAGAIN;
- goto out_destroy_prot_mr;
+ goto out_free_ctx;
}
- if (ctx->sig->sig_mr->need_inval) {
- memset(&ctx->sig->sig_inv_wr, 0, sizeof(ctx->sig->sig_inv_wr));
+ count += rdma_rw_inv_key(ctx->reg);
- ctx->sig->sig_inv_wr.opcode = IB_WR_LOCAL_INV;
- ctx->sig->sig_inv_wr.ex.invalidate_rkey = ctx->sig->sig_mr->rkey;
+ memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs));
- prev_wr->next = &ctx->sig->sig_inv_wr;
- prev_wr = &ctx->sig->sig_inv_wr;
+ ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sgt.nents, NULL, prot_sg,
+ prot_sgt.nents, NULL, SZ_4K);
+ if (unlikely(ret)) {
+ pr_err("failed to map PI sg (%u)\n",
+ sgt.nents + prot_sgt.nents);
+ goto out_destroy_sig_mr;
}
- ctx->sig->sig_wr.wr.opcode = IB_WR_REG_SIG_MR;
- ctx->sig->sig_wr.wr.wr_cqe = NULL;
- ctx->sig->sig_wr.wr.sg_list = &ctx->sig->data.sge;
- ctx->sig->sig_wr.wr.num_sge = 1;
- ctx->sig->sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE;
- ctx->sig->sig_wr.sig_attrs = sig_attrs;
- ctx->sig->sig_wr.sig_mr = ctx->sig->sig_mr;
- if (prot_sg_cnt)
- ctx->sig->sig_wr.prot = &ctx->sig->prot.sge;
- prev_wr->next = &ctx->sig->sig_wr.wr;
- prev_wr = &ctx->sig->sig_wr.wr;
+ ctx->reg->reg_wr.wr.opcode = IB_WR_REG_MR_INTEGRITY;
+ ctx->reg->reg_wr.wr.wr_cqe = NULL;
+ ctx->reg->reg_wr.wr.num_sge = 0;
+ ctx->reg->reg_wr.wr.send_flags = 0;
+ ctx->reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE;
+ if (rdma_protocol_iwarp(qp->device, port_num))
+ ctx->reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE;
+ ctx->reg->reg_wr.mr = ctx->reg->mr;
+ ctx->reg->reg_wr.key = ctx->reg->mr->lkey;
count++;
- ctx->sig->sig_sge.addr = 0;
- ctx->sig->sig_sge.length = ctx->sig->data.sge.length;
- if (sig_attrs->wire.sig_type != IB_SIG_TYPE_NONE)
- ctx->sig->sig_sge.length += ctx->sig->prot.sge.length;
+ ctx->reg->sge.addr = ctx->reg->mr->iova;
+ ctx->reg->sge.length = ctx->reg->mr->length;
+ if (sig_attrs->wire.sig_type == IB_SIG_TYPE_NONE)
+ ctx->reg->sge.length -= ctx->reg->mr->sig_attrs->meta_length;
- rdma_wr = &ctx->sig->data.wr;
- rdma_wr->wr.sg_list = &ctx->sig->sig_sge;
+ rdma_wr = &ctx->reg->wr;
+ rdma_wr->wr.sg_list = &ctx->reg->sge;
rdma_wr->wr.num_sge = 1;
rdma_wr->remote_addr = remote_addr;
rdma_wr->rkey = rkey;
@@ -443,23 +448,20 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
else
rdma_wr->wr.opcode = IB_WR_RDMA_READ;
- prev_wr->next = &rdma_wr->wr;
- prev_wr = &rdma_wr->wr;
+ ctx->reg->reg_wr.wr.next = &rdma_wr->wr;
count++;
return count;
-out_destroy_prot_mr:
- if (prot_sg_cnt)
- ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr);
-out_destroy_data_mr:
- ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr);
+out_destroy_sig_mr:
+ ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
out_free_ctx:
- kfree(ctx->sig);
+ kfree(ctx->reg);
out_unmap_prot_sg:
- ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir);
+ if (prot_sgt.nents)
+ ib_dma_unmap_sgtable_attrs(dev, &prot_sgt, dir, 0);
out_unmap_sg:
- ib_dma_unmap_sg(dev, sg, sg_cnt, dir);
+ ib_dma_unmap_sgtable_attrs(dev, &sgt, dir, 0);
return ret;
}
EXPORT_SYMBOL(rdma_rw_ctx_signature_init);
@@ -493,28 +495,13 @@ static void rdma_rw_update_lkey(struct rdma_rw_reg_ctx *reg, bool need_inval)
* completion notification.
*/
struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
- u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
+ u32 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
{
struct ib_send_wr *first_wr, *last_wr;
int i;
switch (ctx->type) {
case RDMA_RW_SIG_MR:
- rdma_rw_update_lkey(&ctx->sig->data, true);
- if (ctx->sig->prot.mr)
- rdma_rw_update_lkey(&ctx->sig->prot, true);
-
- ctx->sig->sig_mr->need_inval = true;
- ib_update_fast_reg_key(ctx->sig->sig_mr,
- ib_inc_rkey(ctx->sig->sig_mr->lkey));
- ctx->sig->sig_sge.lkey = ctx->sig->sig_mr->lkey;
-
- if (ctx->sig->data.inv_wr.next)
- first_wr = &ctx->sig->data.inv_wr;
- else
- first_wr = &ctx->sig->data.reg_wr.wr;
- last_wr = &ctx->sig->data.wr.wr;
- break;
case RDMA_RW_MR:
for (i = 0; i < ctx->nr_ops; i++) {
rdma_rw_update_lkey(&ctx->reg[i],
@@ -565,13 +552,13 @@ EXPORT_SYMBOL(rdma_rw_ctx_wrs);
* is not set @cqe must be set so that the caller gets a completion
* notification.
*/
-int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u32 port_num,
struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
{
- struct ib_send_wr *first_wr, *bad_wr;
+ struct ib_send_wr *first_wr;
first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr);
- return ib_post_send(qp, first_wr, &bad_wr);
+ return ib_post_send(qp, first_wr, NULL);
}
EXPORT_SYMBOL(rdma_rw_ctx_post);
@@ -584,8 +571,9 @@ EXPORT_SYMBOL(rdma_rw_ctx_post);
* @sg_cnt: number of entries in @sg
* @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
*/
-void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
- struct scatterlist *sg, u32 sg_cnt, enum dma_data_direction dir)
+void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+ u32 port_num, struct scatterlist *sg, u32 sg_cnt,
+ enum dma_data_direction dir)
{
int i;
@@ -612,7 +600,7 @@ EXPORT_SYMBOL(rdma_rw_ctx_destroy);
/**
* rdma_rw_ctx_destroy_signature - release all resources allocated by
- * rdma_rw_ctx_init_signature
+ * rdma_rw_ctx_signature_init
* @ctx: context to release
* @qp: queue pair to operate on
* @port_num: port num to which the connection is bound
@@ -623,26 +611,46 @@ EXPORT_SYMBOL(rdma_rw_ctx_destroy);
* @dir: %DMA_TO_DEVICE for RDMA WRITE, %DMA_FROM_DEVICE for RDMA READ
*/
void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
- u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+ u32 port_num, struct scatterlist *sg, u32 sg_cnt,
struct scatterlist *prot_sg, u32 prot_sg_cnt,
enum dma_data_direction dir)
{
if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR))
return;
- ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr);
- ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
+ ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr);
+ kfree(ctx->reg);
- if (ctx->sig->prot.mr) {
- ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr);
+ if (prot_sg_cnt)
ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir);
- }
-
- ib_mr_pool_put(qp, &qp->sig_mrs, ctx->sig->sig_mr);
- kfree(ctx->sig);
+ ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
}
EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
+/**
+ * rdma_rw_mr_factor - return number of MRs required for a payload
+ * @device: device handling the connection
+ * @port_num: port num to which the connection is bound
+ * @maxpages: maximum payload pages per rdma_rw_ctx
+ *
+ * Returns the number of MRs the device requires to move @maxpayload
+ * bytes. The returned value is used during transport creation to
+ * compute max_rdma_ctxts and the size of the transport's Send and
+ * Send Completion Queues.
+ */
+unsigned int rdma_rw_mr_factor(struct ib_device *device, u32 port_num,
+ unsigned int maxpages)
+{
+ unsigned int mr_pages;
+
+ if (rdma_rw_can_use_mr(device, port_num))
+ mr_pages = rdma_rw_fr_page_list_len(device, false);
+ else
+ mr_pages = device->attrs.max_sge_rd;
+ return DIV_ROUND_UP(maxpages, mr_pages);
+}
+EXPORT_SYMBOL(rdma_rw_mr_factor);
+
void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
{
u32 factor;
@@ -658,13 +666,12 @@ void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
factor = 1;
/*
- * If the devices needs MRs to perform RDMA READ or WRITE operations,
+ * If the device needs MRs to perform RDMA READ or WRITE operations,
* we'll need two additional MRs for the registrations and the
* invalidation.
*/
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
- factor += 6; /* (inv + reg) * (data + prot + sig) */
- else if (rdma_rw_can_use_mr(dev, attr->port_num))
+ if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN ||
+ rdma_rw_can_use_mr(dev, attr->port_num))
factor += 2; /* inv + reg */
attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs;
@@ -680,22 +687,24 @@ void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
{
struct ib_device *dev = qp->pd->device;
- u32 nr_mrs = 0, nr_sig_mrs = 0;
+ u32 nr_mrs = 0, nr_sig_mrs = 0, max_num_sg = 0;
int ret = 0;
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) {
+ if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) {
nr_sig_mrs = attr->cap.max_rdma_ctxs;
- nr_mrs = attr->cap.max_rdma_ctxs * 2;
+ nr_mrs = attr->cap.max_rdma_ctxs;
+ max_num_sg = rdma_rw_fr_page_list_len(dev, true);
} else if (rdma_rw_can_use_mr(dev, attr->port_num)) {
nr_mrs = attr->cap.max_rdma_ctxs;
+ max_num_sg = rdma_rw_fr_page_list_len(dev, false);
}
if (nr_mrs) {
ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs,
IB_MR_TYPE_MEM_REG,
- rdma_rw_fr_page_list_len(dev));
+ max_num_sg, 0);
if (ret) {
- pr_err("%s: failed to allocated %d MRs\n",
+ pr_err("%s: failed to allocated %u MRs\n",
__func__, nr_mrs);
return ret;
}
@@ -703,10 +712,10 @@ int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr)
if (nr_sig_mrs) {
ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs,
- IB_MR_TYPE_SIGNATURE, 2);
+ IB_MR_TYPE_INTEGRITY, max_num_sg, max_num_sg);
if (ret) {
- pr_err("%s: failed to allocated %d SIG MRs\n",
- __func__, nr_mrs);
+ pr_err("%s: failed to allocated %u SIG MRs\n",
+ __func__, nr_sig_mrs);
goto out_free_rdma_mrs;
}
}
diff --git a/drivers/infiniband/core/sa.h b/drivers/infiniband/core/sa.h
index b1d4bbf4ce5c..143de37ae598 100644
--- a/drivers/infiniband/core/sa.h
+++ b/drivers/infiniband/core/sa.h
@@ -49,16 +49,14 @@ static inline void ib_sa_client_put(struct ib_sa_client *client)
}
int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- u8 method,
+ struct ib_device *device, u32 port_num, u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_mcmember_rec *resp,
void *context),
- void *context,
- struct ib_sa_query **sa_query);
+ void *context, struct ib_sa_query **sa_query);
int mcast_init(void);
void mcast_cleanup(void);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 81b742ca1639..c23e9c847314 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -32,7 +32,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/err.h>
#include <linux/random.h>
@@ -40,7 +39,7 @@
#include <linux/slab.h>
#include <linux/dma-mapping.h>
#include <linux/kref.h>
-#include <linux/idr.h>
+#include <linux/xarray.h>
#include <linux/workqueue.h>
#include <uapi/linux/if_ether.h>
#include <rdma/ib_pack.h>
@@ -50,12 +49,16 @@
#include <uapi/rdma/ib_user_sa.h>
#include <rdma/ib_marshall.h>
#include <rdma/ib_addr.h>
+#include <rdma/opa_addr.h>
+#include <rdma/rdma_cm.h>
#include "sa.h"
#include "core_priv.h"
#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100
#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000
#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000
+#define IB_SA_CPI_MAX_RETRY_CNT 3
+#define IB_SA_CPI_RETRY_WAIT 1000 /*msecs */
static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT;
struct ib_sa_sm_ah {
@@ -65,9 +68,23 @@ struct ib_sa_sm_ah {
u8 src_path_mask;
};
+enum rdma_class_port_info_type {
+ RDMA_CLASS_PORT_INFO_IB,
+ RDMA_CLASS_PORT_INFO_OPA
+};
+
+struct rdma_class_port_info {
+ enum rdma_class_port_info_type type;
+ union {
+ struct ib_class_port_info ib;
+ struct opa_class_port_info opa;
+ };
+};
+
struct ib_sa_classport_cache {
bool valid;
- struct ib_class_port_info data;
+ int retry_cnt;
+ struct rdma_class_port_info data;
};
struct ib_sa_port {
@@ -75,19 +92,23 @@ struct ib_sa_port {
struct ib_sa_sm_ah *sm_ah;
struct work_struct update_task;
struct ib_sa_classport_cache classport_info;
+ struct delayed_work ib_cpi_work;
spinlock_t classport_lock; /* protects class port info set */
spinlock_t ah_lock;
- u8 port_num;
+ u32 port_num;
};
struct ib_sa_device {
int start_port, end_port;
struct ib_event_handler event_handler;
- struct ib_sa_port port[0];
+ struct ib_sa_port port[];
};
struct ib_sa_query {
- void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
+ void (*callback)(struct ib_sa_query *sa_query, int status,
+ struct ib_sa_mad *mad);
+ void (*rmpp_callback)(struct ib_sa_query *sa_query, int status,
+ struct ib_mad_recv_wc *mad);
void (*release)(struct ib_sa_query *);
struct ib_sa_client *client;
struct ib_sa_port *port;
@@ -103,17 +124,14 @@ struct ib_sa_query {
#define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001
#define IB_SA_CANCEL 0x00000002
-
-struct ib_sa_service_query {
- void (*callback)(int, struct ib_sa_service_rec *, void *);
- void *context;
- struct ib_sa_query sa_query;
-};
+#define IB_SA_QUERY_OPA 0x00000004
struct ib_sa_path_query {
- void (*callback)(int, struct ib_sa_path_rec *, void *);
+ void (*callback)(int status, struct sa_path_rec *rec,
+ unsigned int num_paths, void *context);
void *context;
struct ib_sa_query sa_query;
+ struct sa_path_rec *conv_pr;
};
struct ib_sa_guidinfo_query {
@@ -123,7 +141,7 @@ struct ib_sa_guidinfo_query {
};
struct ib_sa_classport_info_query {
- void (*callback)(int, struct ib_class_port_info *, void *);
+ void (*callback)(void *);
void *context;
struct ib_sa_query sa_query;
};
@@ -134,6 +152,13 @@ struct ib_sa_mcmember_query {
struct ib_sa_query sa_query;
};
+struct ib_sa_service_query {
+ void (*callback)(int status, struct sa_service_rec *rec,
+ unsigned int num_services, void *context);
+ void *context;
+ struct ib_sa_query sa_query;
+};
+
static LIST_HEAD(ib_nl_request_list);
static DEFINE_SPINLOCK(ib_nl_request_lock);
static atomic_t ib_nl_sa_request_seq;
@@ -154,7 +179,7 @@ static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = {
};
-static void ib_sa_add_one(struct ib_device *device);
+static int ib_sa_add_one(struct ib_device *device);
static void ib_sa_remove_one(struct ib_device *device, void *client_data);
static struct ib_client sa_client = {
@@ -163,15 +188,14 @@ static struct ib_client sa_client = {
.remove = ib_sa_remove_one
};
-static DEFINE_SPINLOCK(idr_lock);
-static DEFINE_IDR(query_idr);
+static DEFINE_XARRAY_FLAGS(queries, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
static DEFINE_SPINLOCK(tid_lock);
static u32 tid;
#define PATH_REC_FIELD(field) \
- .struct_offset_bytes = offsetof(struct ib_sa_path_rec, field), \
- .struct_size_bytes = sizeof ((struct ib_sa_path_rec *) 0)->field, \
+ .struct_offset_bytes = offsetof(struct sa_path_rec, field), \
+ .struct_size_bytes = sizeof_field(struct sa_path_rec, field), \
.field_name = "sa_path_rec:" #field
static const struct ib_field path_rec_table[] = {
@@ -187,15 +211,15 @@ static const struct ib_field path_rec_table[] = {
.offset_words = 6,
.offset_bits = 0,
.size_bits = 128 },
- { PATH_REC_FIELD(dlid),
+ { PATH_REC_FIELD(ib.dlid),
.offset_words = 10,
.offset_bits = 0,
.size_bits = 16 },
- { PATH_REC_FIELD(slid),
+ { PATH_REC_FIELD(ib.slid),
.offset_words = 10,
.offset_bits = 16,
.size_bits = 16 },
- { PATH_REC_FIELD(raw_traffic),
+ { PATH_REC_FIELD(ib.raw_traffic),
.offset_words = 11,
.offset_bits = 0,
.size_bits = 1 },
@@ -269,9 +293,139 @@ static const struct ib_field path_rec_table[] = {
.size_bits = 48 },
};
+#define OPA_PATH_REC_FIELD(field) \
+ .struct_offset_bytes = \
+ offsetof(struct sa_path_rec, field), \
+ .struct_size_bytes = \
+ sizeof_field(struct sa_path_rec, field), \
+ .field_name = "sa_path_rec:" #field
+
+static const struct ib_field opa_path_rec_table[] = {
+ { OPA_PATH_REC_FIELD(service_id),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 64 },
+ { OPA_PATH_REC_FIELD(dgid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_PATH_REC_FIELD(sgid),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_PATH_REC_FIELD(opa.dlid),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_PATH_REC_FIELD(opa.slid),
+ .offset_words = 11,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_PATH_REC_FIELD(opa.raw_traffic),
+ .offset_words = 12,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 12,
+ .offset_bits = 1,
+ .size_bits = 3 },
+ { OPA_PATH_REC_FIELD(flow_label),
+ .offset_words = 12,
+ .offset_bits = 4,
+ .size_bits = 20 },
+ { OPA_PATH_REC_FIELD(hop_limit),
+ .offset_words = 12,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { OPA_PATH_REC_FIELD(traffic_class),
+ .offset_words = 13,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { OPA_PATH_REC_FIELD(reversible),
+ .offset_words = 13,
+ .offset_bits = 8,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(numb_path),
+ .offset_words = 13,
+ .offset_bits = 9,
+ .size_bits = 7 },
+ { OPA_PATH_REC_FIELD(pkey),
+ .offset_words = 13,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_PATH_REC_FIELD(opa.l2_8B),
+ .offset_words = 14,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(opa.l2_10B),
+ .offset_words = 14,
+ .offset_bits = 1,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(opa.l2_9B),
+ .offset_words = 14,
+ .offset_bits = 2,
+ .size_bits = 1 },
+ { OPA_PATH_REC_FIELD(opa.l2_16B),
+ .offset_words = 14,
+ .offset_bits = 3,
+ .size_bits = 1 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 4,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(opa.qos_type),
+ .offset_words = 14,
+ .offset_bits = 6,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(opa.qos_priority),
+ .offset_words = 14,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 16,
+ .size_bits = 3 },
+ { OPA_PATH_REC_FIELD(sl),
+ .offset_words = 14,
+ .offset_bits = 19,
+ .size_bits = 5 },
+ { RESERVED,
+ .offset_words = 14,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { OPA_PATH_REC_FIELD(mtu_selector),
+ .offset_words = 15,
+ .offset_bits = 0,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(mtu),
+ .offset_words = 15,
+ .offset_bits = 2,
+ .size_bits = 6 },
+ { OPA_PATH_REC_FIELD(rate_selector),
+ .offset_words = 15,
+ .offset_bits = 8,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(rate),
+ .offset_words = 15,
+ .offset_bits = 10,
+ .size_bits = 6 },
+ { OPA_PATH_REC_FIELD(packet_life_time_selector),
+ .offset_words = 15,
+ .offset_bits = 16,
+ .size_bits = 2 },
+ { OPA_PATH_REC_FIELD(packet_life_time),
+ .offset_words = 15,
+ .offset_bits = 18,
+ .size_bits = 6 },
+ { OPA_PATH_REC_FIELD(preference),
+ .offset_words = 15,
+ .offset_bits = 24,
+ .size_bits = 8 },
+};
+
#define MCMEMBER_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \
- .struct_size_bytes = sizeof ((struct ib_sa_mcmember_rec *) 0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_sa_mcmember_rec, field), \
.field_name = "sa_mcmember_rec:" #field
static const struct ib_field mcmember_rec_table[] = {
@@ -353,60 +507,12 @@ static const struct ib_field mcmember_rec_table[] = {
.size_bits = 23 },
};
-#define SERVICE_REC_FIELD(field) \
- .struct_offset_bytes = offsetof(struct ib_sa_service_rec, field), \
- .struct_size_bytes = sizeof ((struct ib_sa_service_rec *) 0)->field, \
- .field_name = "sa_service_rec:" #field
-
-static const struct ib_field service_rec_table[] = {
- { SERVICE_REC_FIELD(id),
- .offset_words = 0,
- .offset_bits = 0,
- .size_bits = 64 },
- { SERVICE_REC_FIELD(gid),
- .offset_words = 2,
- .offset_bits = 0,
- .size_bits = 128 },
- { SERVICE_REC_FIELD(pkey),
- .offset_words = 6,
- .offset_bits = 0,
- .size_bits = 16 },
- { SERVICE_REC_FIELD(lease),
- .offset_words = 7,
- .offset_bits = 0,
- .size_bits = 32 },
- { SERVICE_REC_FIELD(key),
- .offset_words = 8,
- .offset_bits = 0,
- .size_bits = 128 },
- { SERVICE_REC_FIELD(name),
- .offset_words = 12,
- .offset_bits = 0,
- .size_bits = 64*8 },
- { SERVICE_REC_FIELD(data8),
- .offset_words = 28,
- .offset_bits = 0,
- .size_bits = 16*8 },
- { SERVICE_REC_FIELD(data16),
- .offset_words = 32,
- .offset_bits = 0,
- .size_bits = 8*16 },
- { SERVICE_REC_FIELD(data32),
- .offset_words = 36,
- .offset_bits = 0,
- .size_bits = 4*32 },
- { SERVICE_REC_FIELD(data64),
- .offset_words = 40,
- .offset_bits = 0,
- .size_bits = 2*64 },
-};
-
#define CLASSPORTINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_class_port_info, field), \
- .struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_class_port_info, field), \
.field_name = "ib_class_port_info:" #field
-static const struct ib_field classport_info_rec_table[] = {
+static const struct ib_field ib_classport_info_rec_table[] = {
{ CLASSPORTINFO_REC_FIELD(base_version),
.offset_words = 0,
.offset_bits = 0,
@@ -477,9 +583,91 @@ static const struct ib_field classport_info_rec_table[] = {
.size_bits = 32 },
};
+#define OPA_CLASSPORTINFO_REC_FIELD(field) \
+ .struct_offset_bytes =\
+ offsetof(struct opa_class_port_info, field), \
+ .struct_size_bytes = \
+ sizeof_field(struct opa_class_port_info, field), \
+ .field_name = "opa_class_port_info:" #field
+
+static const struct ib_field opa_classport_info_rec_table[] = {
+ { OPA_CLASSPORTINFO_REC_FIELD(base_version),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { OPA_CLASSPORTINFO_REC_FIELD(class_version),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { OPA_CLASSPORTINFO_REC_FIELD(cap_mask),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_gid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_lid),
+ .offset_words = 7,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp),
+ .offset_words = 8,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey),
+ .offset_words = 9,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_gid),
+ .offset_words = 10,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl),
+ .offset_words = 14,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_lid),
+ .offset_words = 15,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp),
+ .offset_words = 16,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_qkey),
+ .offset_words = 17,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_pkey),
+ .offset_words = 18,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey),
+ .offset_words = 18,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd),
+ .offset_words = 19,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { RESERVED,
+ .offset_words = 19,
+ .offset_bits = 8,
+ .size_bits = 24 },
+};
+
#define GUIDINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
- .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_sa_guidinfo_rec, field), \
.field_name = "sa_guidinfo_rec:" #field
static const struct ib_field guidinfo_rec_table[] = {
@@ -505,6 +693,60 @@ static const struct ib_field guidinfo_rec_table[] = {
.size_bits = 512 },
};
+#define SERVICE_REC_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct sa_service_rec, field), \
+ .struct_size_bytes = sizeof_field(struct sa_service_rec, field), \
+ .field_name = "sa_service_rec:" #field
+
+static const struct ib_field service_rec_table[] = {
+ { SERVICE_REC_FIELD(id),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 64 },
+ { SERVICE_REC_FIELD(gid),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(pkey),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { RESERVED,
+ .offset_words = 6,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { SERVICE_REC_FIELD(lease),
+ .offset_words = 7,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { SERVICE_REC_FIELD(key),
+ .offset_words = 8,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(name),
+ .offset_words = 12,
+ .offset_bits = 0,
+ .size_bits = 512 },
+ { SERVICE_REC_FIELD(data_8),
+ .offset_words = 28,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(data_16),
+ .offset_words = 32,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(data_32),
+ .offset_words = 36,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { SERVICE_REC_FIELD(data_64),
+ .offset_words = 40,
+ .offset_bits = 0,
+ .size_bits = 128 },
+};
+
+#define RDMA_PRIMARY_PATH_MAX_REC_NUM 3
+
static inline void ib_sa_disable_local_svc(struct ib_sa_query *query)
{
query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE;
@@ -518,7 +760,7 @@ static inline int ib_sa_query_cancelled(struct ib_sa_query *query)
static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
struct ib_sa_query *query)
{
- struct ib_sa_path_rec *sa_rec = query->mad_buf->context[1];
+ struct sa_path_rec *sa_rec = query->mad_buf->context[1];
struct ib_sa_mad *mad = query->mad_buf->mad;
ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask;
u16 val16;
@@ -528,15 +770,15 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
query->mad_buf->context[1] = NULL;
/* Construct the family header first */
- header = (struct rdma_ls_resolve_header *)
- skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
- memcpy(header->device_name, query->port->agent->device->name,
- LS_DEVICE_NAME_MAX);
+ header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
+ strscpy_pad(header->device_name,
+ dev_name(&query->port->agent->device->dev),
+ LS_DEVICE_NAME_MAX);
header->port_num = query->port->port_num;
if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) &&
sa_rec->reversible != 0)
- query->path_use = LS_RESOLVE_PATH_USE_GMP;
+ query->path_use = LS_RESOLVE_PATH_USE_ALL;
else
query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL;
header->path_use = query->path_use;
@@ -599,14 +841,20 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
return len;
}
-static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
+static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
{
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
void *data;
- int ret = 0;
struct ib_sa_mad *mad;
int len;
+ unsigned long flags;
+ unsigned long delay;
+ gfp_t gfp_flag;
+ int ret;
+
+ INIT_LIST_HEAD(&query->list);
+ query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
mad = query->mad_buf->mad;
len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask);
@@ -631,44 +879,25 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
- ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, gfp_mask);
- if (!ret)
- ret = len;
- else
- ret = 0;
-
- return ret;
-}
+ gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC :
+ GFP_NOWAIT;
-static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
-{
- unsigned long flags;
- unsigned long delay;
- int ret;
+ spin_lock_irqsave(&ib_nl_request_lock, flags);
+ ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag);
- INIT_LIST_HEAD(&query->list);
- query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
+ if (ret)
+ goto out;
- /* Put the request on the list first.*/
- spin_lock_irqsave(&ib_nl_request_lock, flags);
+ /* Put the request on the list.*/
delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
query->timeout = delay + jiffies;
list_add_tail(&query->list, &ib_nl_request_list);
/* Start the timeout if this is the only request */
if (ib_nl_request_list.next == &query->list)
queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
- spin_unlock_irqrestore(&ib_nl_request_lock, flags);
- ret = ib_nl_send_msg(query, gfp_mask);
- if (ret <= 0) {
- ret = -EIO;
- /* Remove the request */
- spin_lock_irqsave(&ib_nl_request_lock, flags);
- list_del(&query->list);
- spin_unlock_irqrestore(&ib_nl_request_lock, flags);
- } else {
- ret = 0;
- }
+out:
+ spin_unlock_irqrestore(&ib_nl_request_lock, flags);
return ret;
}
@@ -702,50 +931,77 @@ static void send_handler(struct ib_mad_agent *agent,
static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query,
const struct nlmsghdr *nlh)
{
+ struct sa_path_rec recs[RDMA_PRIMARY_PATH_MAX_REC_NUM];
+ struct ib_sa_path_query *path_query;
+ struct ib_path_rec_data *rec_data;
struct ib_mad_send_wc mad_send_wc;
- struct ib_sa_mad *mad = NULL;
const struct nlattr *head, *curr;
- struct ib_path_rec_data *rec;
- int len, rem;
+ struct ib_sa_mad *mad = NULL;
+ int len, rem, status = -EIO;
+ unsigned int num_prs = 0;
u32 mask = 0;
- int status = -EIO;
-
- if (query->callback) {
- head = (const struct nlattr *) nlmsg_data(nlh);
- len = nlmsg_len(nlh);
- switch (query->path_use) {
- case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
- mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
- break;
- case LS_RESOLVE_PATH_USE_ALL:
- case LS_RESOLVE_PATH_USE_GMP:
- default:
- mask = IB_PATH_PRIMARY | IB_PATH_GMP |
- IB_PATH_BIDIRECTIONAL;
- break;
- }
- nla_for_each_attr(curr, head, len, rem) {
- if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) {
- rec = nla_data(curr);
- /*
- * Get the first one. In the future, we may
- * need to get up to 6 pathrecords.
- */
- if ((rec->flags & mask) == mask) {
- mad = query->mad_buf->mad;
- mad->mad_hdr.method |=
- IB_MGMT_METHOD_RESP;
- memcpy(mad->data, rec->path_rec,
- sizeof(rec->path_rec));
- status = 0;
- break;
- }
- }
+ if (!query->callback)
+ goto out;
+
+ path_query = container_of(query, struct ib_sa_path_query, sa_query);
+ mad = query->mad_buf->mad;
+
+ head = (const struct nlattr *) nlmsg_data(nlh);
+ len = nlmsg_len(nlh);
+ switch (query->path_use) {
+ case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL:
+ mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND;
+ break;
+
+ case LS_RESOLVE_PATH_USE_ALL:
+ mask = IB_PATH_PRIMARY;
+ break;
+
+ case LS_RESOLVE_PATH_USE_GMP:
+ default:
+ mask = IB_PATH_PRIMARY | IB_PATH_GMP |
+ IB_PATH_BIDIRECTIONAL;
+ break;
+ }
+
+ nla_for_each_attr(curr, head, len, rem) {
+ if (curr->nla_type != LS_NLA_TYPE_PATH_RECORD)
+ continue;
+
+ rec_data = nla_data(curr);
+ if ((rec_data->flags & mask) != mask)
+ continue;
+
+ if ((query->flags & IB_SA_QUERY_OPA) ||
+ path_query->conv_pr) {
+ mad->mad_hdr.method |= IB_MGMT_METHOD_RESP;
+ memcpy(mad->data, rec_data->path_rec,
+ sizeof(rec_data->path_rec));
+ query->callback(query, 0, mad);
+ goto out;
}
- query->callback(query, status, mad);
+
+ status = 0;
+ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ rec_data->path_rec, &recs[num_prs]);
+ recs[num_prs].flags = rec_data->flags;
+ recs[num_prs].rec_type = SA_PATH_REC_TYPE_IB;
+ sa_path_set_dmac_zero(&recs[num_prs]);
+
+ num_prs++;
+ if (num_prs >= RDMA_PRIMARY_PATH_MAX_REC_NUM)
+ break;
}
+ if (!status) {
+ mad->mad_hdr.method |= IB_MGMT_METHOD_RESP;
+ path_query->callback(status, recs, num_prs,
+ path_query->context);
+ } else
+ query->callback(query, status, mad);
+
+out:
mad_send_wc.send_buf = query->mad_buf;
mad_send_wc.status = IB_WC_SUCCESS;
send_handler(query->mad_buf->mad_agent, &mad_send_wc);
@@ -791,9 +1047,9 @@ static void ib_nl_request_timeout(struct work_struct *work)
}
int ib_nl_handle_set_timeout(struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
- const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
int timeout, delta, abs_delta;
const struct nlattr *attr;
unsigned long flags;
@@ -803,12 +1059,11 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
int ret;
if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
- !(NETLINK_CB(skb).sk) ||
- !netlink_capable(skb, CAP_NET_ADMIN))
+ !(NETLINK_CB(skb).sk))
return -EPERM;
- ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
- nlmsg_len(nlh), ib_nl_policy);
+ ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
+ nlmsg_len(nlh), ib_nl_policy, NULL);
attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT];
if (ret || !attr)
goto settimeout_out;
@@ -819,6 +1074,8 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX)
timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX;
+ spin_lock_irqsave(&ib_nl_request_lock, flags);
+
delta = timeout - sa_local_svc_timeout_ms;
if (delta < 0)
abs_delta = -delta;
@@ -826,7 +1083,6 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
abs_delta = delta;
if (delta != 0) {
- spin_lock_irqsave(&ib_nl_request_lock, flags);
sa_local_svc_timeout_ms = timeout;
list_for_each_entry(query, &ib_nl_request_list, list) {
if (delta < 0 && abs_delta > query->timeout)
@@ -844,11 +1100,12 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb,
if (delay)
mod_delayed_work(ib_nl_wq, &ib_nl_timed_work,
(unsigned long)delay);
- spin_unlock_irqrestore(&ib_nl_request_lock, flags);
}
+ spin_unlock_irqrestore(&ib_nl_request_lock, flags);
+
settimeout_out:
- return skb->len;
+ return 0;
}
static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
@@ -859,8 +1116,8 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
return 0;
- ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
- nlmsg_len(nlh), ib_nl_policy);
+ ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
+ nlmsg_len(nlh), ib_nl_policy, NULL);
if (ret)
return 0;
@@ -868,36 +1125,35 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
}
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
{
- const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
unsigned long flags;
- struct ib_sa_query *query;
+ struct ib_sa_query *query = NULL, *iter;
struct ib_mad_send_buf *send_buf;
struct ib_mad_send_wc mad_send_wc;
- int found = 0;
int ret;
if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
- !(NETLINK_CB(skb).sk) ||
- !netlink_capable(skb, CAP_NET_ADMIN))
+ !(NETLINK_CB(skb).sk))
return -EPERM;
spin_lock_irqsave(&ib_nl_request_lock, flags);
- list_for_each_entry(query, &ib_nl_request_list, list) {
+ list_for_each_entry(iter, &ib_nl_request_list, list) {
/*
* If the query is cancelled, let the timeout routine
* take care of it.
*/
- if (nlh->nlmsg_seq == query->seq) {
- found = !ib_sa_query_cancelled(query);
- if (found)
- list_del(&query->list);
+ if (nlh->nlmsg_seq == iter->seq) {
+ if (!ib_sa_query_cancelled(iter)) {
+ list_del(&iter->list);
+ query = iter;
+ }
break;
}
}
- if (!found) {
+ if (!query) {
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
goto resp_out;
}
@@ -920,103 +1176,17 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
}
resp_out:
- return skb->len;
+ return 0;
}
static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
- ib_destroy_ah(sm_ah->ah);
+ rdma_destroy_ah(sm_ah->ah, 0);
kfree(sm_ah);
}
-static void update_sm_ah(struct work_struct *work)
-{
- struct ib_sa_port *port =
- container_of(work, struct ib_sa_port, update_task);
- struct ib_sa_sm_ah *new_ah;
- struct ib_port_attr port_attr;
- struct ib_ah_attr ah_attr;
-
- if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
- pr_warn("Couldn't query port\n");
- return;
- }
-
- new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
- if (!new_ah) {
- return;
- }
-
- kref_init(&new_ah->ref);
- new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
-
- new_ah->pkey_index = 0;
- if (ib_find_pkey(port->agent->device, port->port_num,
- IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
- pr_err("Couldn't find index for default PKey\n");
-
- memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = port_attr.sm_lid;
- ah_attr.sl = port_attr.sm_sl;
- ah_attr.port_num = port->port_num;
- if (port_attr.grh_required) {
- ah_attr.ah_flags = IB_AH_GRH;
- ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix);
- ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID);
- }
-
- new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
- if (IS_ERR(new_ah->ah)) {
- pr_warn("Couldn't create new SM AH\n");
- kfree(new_ah);
- return;
- }
-
- spin_lock_irq(&port->ah_lock);
- if (port->sm_ah)
- kref_put(&port->sm_ah->ref, free_sm_ah);
- port->sm_ah = new_ah;
- spin_unlock_irq(&port->ah_lock);
-
-}
-
-static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
-{
- if (event->event == IB_EVENT_PORT_ERR ||
- event->event == IB_EVENT_PORT_ACTIVE ||
- event->event == IB_EVENT_LID_CHANGE ||
- event->event == IB_EVENT_PKEY_CHANGE ||
- event->event == IB_EVENT_SM_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER) {
- unsigned long flags;
- struct ib_sa_device *sa_dev =
- container_of(handler, typeof(*sa_dev), event_handler);
- struct ib_sa_port *port =
- &sa_dev->port[event->element.port_num - sa_dev->start_port];
-
- if (!rdma_cap_ib_sa(handler->device, port->port_num))
- return;
-
- spin_lock_irqsave(&port->ah_lock, flags);
- if (port->sm_ah)
- kref_put(&port->sm_ah->ref, free_sm_ah);
- port->sm_ah = NULL;
- spin_unlock_irqrestore(&port->ah_lock, flags);
-
- if (event->event == IB_EVENT_SM_CHANGE ||
- event->event == IB_EVENT_CLIENT_REREGISTER ||
- event->event == IB_EVENT_LID_CHANGE) {
- spin_lock_irqsave(&port->classport_lock, flags);
- port->classport_info.valid = false;
- spin_unlock_irqrestore(&port->classport_lock, flags);
- }
- queue_work(ib_wq, &sa_dev->port[event->element.port_num -
- sa_dev->start_port].update_task);
- }
-}
-
void ib_sa_register_client(struct ib_sa_client *client)
{
atomic_set(&client->users, 1);
@@ -1043,17 +1213,15 @@ EXPORT_SYMBOL(ib_sa_unregister_client);
void ib_sa_cancel_query(int id, struct ib_sa_query *query)
{
unsigned long flags;
- struct ib_mad_agent *agent;
struct ib_mad_send_buf *mad_buf;
- spin_lock_irqsave(&idr_lock, flags);
- if (idr_find(&query_idr, id) != query) {
- spin_unlock_irqrestore(&idr_lock, flags);
+ xa_lock_irqsave(&queries, flags);
+ if (xa_load(&queries, id) != query) {
+ xa_unlock_irqrestore(&queries, flags);
return;
}
- agent = query->port->agent;
mad_buf = query->mad_buf;
- spin_unlock_irqrestore(&idr_lock, flags);
+ xa_unlock_irqrestore(&queries, flags);
/*
* If the query is still on the netlink request list, schedule
@@ -1061,11 +1229,11 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
* sent to the MAD layer and has to be cancelled from there.
*/
if (!ib_nl_cancel_request(query))
- ib_cancel_mad(agent, mad_buf);
+ ib_cancel_mad(mad_buf);
}
EXPORT_SYMBOL(ib_sa_cancel_query);
-static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
+static u8 get_src_path_mask(struct ib_device *device, u32 port_num)
{
struct ib_sa_device *sa_dev;
struct ib_sa_port *port;
@@ -1084,110 +1252,84 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
return src_path_mask;
}
-int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
+static int init_ah_attr_grh_fields(struct ib_device *device, u32 port_num,
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr *gid_attr)
{
- int ret;
- u16 gid_index;
- int use_roce;
- struct net_device *ndev = NULL;
-
- memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->dlid = be16_to_cpu(rec->dlid);
- ah_attr->sl = rec->sl;
- ah_attr->src_path_bits = be16_to_cpu(rec->slid) &
- get_src_path_mask(device, port_num);
- ah_attr->port_num = port_num;
- ah_attr->static_rate = rec->rate;
-
- use_roce = rdma_cap_eth_ah(device, port_num);
-
- if (use_roce) {
- struct net_device *idev;
- struct net_device *resolved_dev;
- struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex,
- .net = rec->net ? rec->net :
- &init_net};
- union {
- struct sockaddr _sockaddr;
- struct sockaddr_in _sockaddr_in;
- struct sockaddr_in6 _sockaddr_in6;
- } sgid_addr, dgid_addr;
-
- if (!device->get_netdev)
- return -EOPNOTSUPP;
-
- rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
- rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
-
- /* validate the route */
- ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
- &dgid_addr._sockaddr, &dev_addr);
- if (ret)
- return ret;
+ enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
- if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
- dev_addr.network == RDMA_NETWORK_IPV6) &&
- rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
- return -EINVAL;
-
- idev = device->get_netdev(device, port_num);
- if (!idev)
- return -ENODEV;
-
- resolved_dev = dev_get_by_index(dev_addr.net,
- dev_addr.bound_dev_if);
- if (resolved_dev->flags & IFF_LOOPBACK) {
- dev_put(resolved_dev);
- resolved_dev = idev;
- dev_hold(resolved_dev);
- }
- ndev = ib_get_ndev_from_path(rec);
- rcu_read_lock();
- if ((ndev && ndev != resolved_dev) ||
- (resolved_dev != idev &&
- !rdma_is_upper_dev_rcu(idev, resolved_dev)))
- ret = -EHOSTUNREACH;
- rcu_read_unlock();
- dev_put(idev);
- dev_put(resolved_dev);
- if (ret) {
- if (ndev)
- dev_put(ndev);
- return ret;
- }
- }
+ if (!gid_attr) {
+ gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
+ port_num, NULL);
+ if (IS_ERR(gid_attr))
+ return PTR_ERR(gid_attr);
+ } else
+ rdma_hold_gid_attr(gid_attr);
+
+ rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
+ be32_to_cpu(rec->flow_label),
+ rec->hop_limit, rec->traffic_class,
+ gid_attr);
+ return 0;
+}
- if (rec->hop_limit > 0 || use_roce) {
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = rec->dgid;
+/**
+ * ib_init_ah_attr_from_path - Initialize address handle attributes based on
+ * an SA path record.
+ * @device: Device associated ah attributes initialization.
+ * @port_num: Port on the specified device.
+ * @rec: path record entry to use for ah attributes initialization.
+ * @ah_attr: address handle attributes to initialization from path record.
+ * @gid_attr: SGID attribute to consider during initialization.
+ *
+ * When ib_init_ah_attr_from_path() returns success,
+ * (a) for IB link layer it optionally contains a reference to SGID attribute
+ * when GRH is present for IB link layer.
+ * (b) for RoCE link layer it contains a reference to SGID attribute.
+ * User must invoke rdma_destroy_ah_attr() to release reference to SGID
+ * attributes which are initialized using ib_init_ah_attr_from_path().
+ */
+int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num,
+ struct sa_path_rec *rec,
+ struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr *gid_attr)
+{
+ int ret = 0;
- ret = ib_find_cached_gid_by_port(device, &rec->sgid,
- rec->gid_type, port_num, ndev,
- &gid_index);
- if (ret) {
- if (ndev)
- dev_put(ndev);
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ ah_attr->type = rdma_ah_find_type(device, port_num);
+ rdma_ah_set_sl(ah_attr, rec->sl);
+ rdma_ah_set_port_num(ah_attr, port_num);
+ rdma_ah_set_static_rate(ah_attr, rec->rate);
+
+ if (sa_path_is_roce(rec)) {
+ ret = roce_resolve_route_from_path(rec, gid_attr);
+ if (ret)
return ret;
- }
- ah_attr->grh.sgid_index = gid_index;
- ah_attr->grh.flow_label = be32_to_cpu(rec->flow_label);
- ah_attr->grh.hop_limit = rec->hop_limit;
- ah_attr->grh.traffic_class = rec->traffic_class;
- if (ndev)
- dev_put(ndev);
+ memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN);
+ } else {
+ rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
+ if (sa_path_is_opa(rec) &&
+ rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))
+ rdma_ah_set_make_grd(ah_attr, true);
+
+ rdma_ah_set_path_bits(ah_attr,
+ be32_to_cpu(sa_path_get_slid(rec)) &
+ get_src_path_mask(device, port_num));
}
- if (use_roce)
- memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
-
- return 0;
+ if (rec->hop_limit > 0 || sa_path_is_roce(rec))
+ ret = init_ah_attr_grh_fields(device, port_num,
+ rec, ah_attr, gid_attr);
+ return ret;
}
-EXPORT_SYMBOL(ib_init_ah_from_path);
+EXPORT_SYMBOL(ib_init_ah_attr_from_path);
static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
{
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
spin_lock_irqsave(&query->port->ah_lock, flags);
@@ -1199,11 +1341,22 @@ static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
query->sm_ah = query->port->sm_ah;
spin_unlock_irqrestore(&query->port->ah_lock, flags);
+ /*
+ * Always check if sm_ah has valid dlid assigned,
+ * before querying for class port info
+ */
+ if ((rdma_query_ah(query->sm_ah->ah, &ah_attr) < 0) ||
+ !rdma_is_valid_unicast_lid(&ah_attr)) {
+ kref_put(&query->sm_ah->ref, free_sm_ah);
+ return -EAGAIN;
+ }
query->mad_buf = ib_create_send_mad(query->port->agent, 1,
query->sm_ah->pkey_index,
0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
gfp_mask,
- IB_MGMT_BASE_VERSION);
+ ((query->flags & IB_SA_QUERY_OPA) ?
+ OPA_MGMT_BASE_VERSION :
+ IB_MGMT_BASE_VERSION));
if (IS_ERR(query->mad_buf)) {
kref_put(&query->sm_ah->ref, free_sm_ah);
return -ENOMEM;
@@ -1220,46 +1373,53 @@ static void free_mad(struct ib_sa_query *query)
kref_put(&query->sm_ah->ref, free_sm_ah);
}
-static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
+static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
{
+ struct ib_sa_mad *mad = query->mad_buf->mad;
unsigned long flags;
memset(mad, 0, sizeof *mad);
- mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ if (query->flags & IB_SA_QUERY_OPA) {
+ mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION;
+ mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION;
+ } else {
+ mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
+ mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
+ }
mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
- mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
-
spin_lock_irqsave(&tid_lock, flags);
mad->mad_hdr.tid =
cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
spin_unlock_irqrestore(&tid_lock, flags);
}
-static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
+static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
+ gfp_t gfp_mask)
{
- bool preload = gfpflags_allow_blocking(gfp_mask);
unsigned long flags;
int ret, id;
+ const int nmbr_sa_query_retries = 10;
- if (preload)
- idr_preload(gfp_mask);
- spin_lock_irqsave(&idr_lock, flags);
-
- id = idr_alloc(&query_idr, query, 0, 0, GFP_NOWAIT);
-
- spin_unlock_irqrestore(&idr_lock, flags);
- if (preload)
- idr_preload_end();
- if (id < 0)
- return id;
-
- query->mad_buf->timeout_ms = timeout_ms;
+ xa_lock_irqsave(&queries, flags);
+ ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask);
+ xa_unlock_irqrestore(&queries, flags);
+ if (ret < 0)
+ return ret;
+
+ query->mad_buf->timeout_ms = timeout_ms / nmbr_sa_query_retries;
+ query->mad_buf->retries = nmbr_sa_query_retries;
+ if (!query->mad_buf->timeout_ms) {
+ /* Special case, very small timeout_ms */
+ query->mad_buf->timeout_ms = 1;
+ query->mad_buf->retries = timeout_ms;
+ }
query->mad_buf->context[0] = query;
query->id = id;
- if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) {
- if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) {
+ if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
+ (!(query->flags & IB_SA_QUERY_OPA))) {
+ if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
if (!ib_nl_make_request(query, gfp_mask))
return id;
}
@@ -1268,9 +1428,9 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
ret = ib_post_send_mad(query->mad_buf, NULL);
if (ret) {
- spin_lock_irqsave(&idr_lock, flags);
- idr_remove(&query_idr, id);
- spin_unlock_irqrestore(&idr_lock, flags);
+ xa_lock_irqsave(&queries, flags);
+ __xa_erase(&queries, id);
+ xa_unlock_irqrestore(&queries, flags);
}
/*
@@ -1281,42 +1441,198 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
return ret ? ret : id;
}
-void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
+void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec)
{
ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
}
EXPORT_SYMBOL(ib_sa_unpack_path);
-void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute)
+void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute)
{
ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
}
EXPORT_SYMBOL(ib_sa_pack_path);
+void ib_sa_pack_service(struct sa_service_rec *rec, void *attribute)
+{
+ ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table), rec,
+ attribute);
+}
+EXPORT_SYMBOL(ib_sa_pack_service);
+
+void ib_sa_unpack_service(void *attribute, struct sa_service_rec *rec)
+{
+ ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table), attribute,
+ rec);
+}
+EXPORT_SYMBOL(ib_sa_unpack_service);
+
+static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client,
+ struct ib_sa_device *sa_dev,
+ u32 port_num)
+{
+ struct ib_sa_port *port;
+ unsigned long flags;
+ bool ret = false;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if (!port->classport_info.valid)
+ goto ret;
+
+ if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA)
+ ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) &
+ OPA_CLASS_PORT_INFO_PR_SUPPORT;
+ret:
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return ret;
+}
+
+enum opa_pr_supported {
+ PR_NOT_SUPPORTED,
+ PR_OPA_SUPPORTED,
+ PR_IB_SUPPORTED
+};
+
+/*
+ * opa_pr_query_possible - Check if current PR query can be an OPA query.
+ *
+ * Returns PR_NOT_SUPPORTED if a path record query is not
+ * possible, PR_OPA_SUPPORTED if an OPA path record query
+ * is possible and PR_IB_SUPPORTED if an IB path record
+ * query is possible.
+ */
+static int opa_pr_query_possible(struct ib_sa_client *client,
+ struct ib_sa_device *sa_dev,
+ struct ib_device *device, u32 port_num)
+{
+ struct ib_port_attr port_attr;
+
+ if (ib_query_port(device, port_num, &port_attr))
+ return PR_NOT_SUPPORTED;
+
+ if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num))
+ return PR_OPA_SUPPORTED;
+
+ if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
+ return PR_NOT_SUPPORTED;
+ else
+ return PR_IB_SUPPORTED;
+}
+
static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
- int status,
- struct ib_sa_mad *mad)
+ int status, struct ib_sa_mad *mad)
{
struct ib_sa_path_query *query =
container_of(sa_query, struct ib_sa_path_query, sa_query);
+ struct sa_path_rec rec = {};
- if (mad) {
- struct ib_sa_path_rec rec;
+ if (!mad) {
+ query->callback(status, NULL, 0, query->context);
+ return;
+ }
- ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ if (sa_query->flags & IB_SA_QUERY_OPA) {
+ ib_unpack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
mad->data, &rec);
- rec.net = NULL;
- rec.ifindex = 0;
- rec.gid_type = IB_GID_TYPE_IB;
- eth_zero_addr(rec.dmac);
- query->callback(status, &rec, query->context);
- } else
- query->callback(status, NULL, query->context);
+ rec.rec_type = SA_PATH_REC_TYPE_OPA;
+ query->callback(status, &rec, 1, query->context);
+ return;
+ }
+
+ ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ mad->data, &rec);
+ rec.rec_type = SA_PATH_REC_TYPE_IB;
+ sa_path_set_dmac_zero(&rec);
+
+ if (query->conv_pr) {
+ struct sa_path_rec opa;
+
+ memset(&opa, 0, sizeof(struct sa_path_rec));
+ sa_convert_path_ib_to_opa(&opa, &rec);
+ query->callback(status, &opa, 1, query->context);
+ } else {
+ query->callback(status, &rec, 1, query->context);
+ }
+}
+
+#define IB_SA_DATA_OFFS 56
+#define IB_SERVICE_REC_SZ 176
+
+static void ib_unpack_service_rmpp(struct sa_service_rec *rec,
+ struct ib_mad_recv_wc *mad_wc,
+ int num_services)
+{
+ unsigned int cp_sz, data_i, data_size, rec_i = 0, buf_i = 0;
+ struct ib_mad_recv_buf *mad_buf;
+ u8 buf[IB_SERVICE_REC_SZ];
+ u8 *data;
+
+ data_size = sizeof(((struct ib_sa_mad *) mad_buf->mad)->data);
+
+ list_for_each_entry(mad_buf, &mad_wc->rmpp_list, list) {
+ data = ((struct ib_sa_mad *) mad_buf->mad)->data;
+ data_i = 0;
+ while (data_i < data_size && rec_i < num_services) {
+ cp_sz = min(IB_SERVICE_REC_SZ - buf_i,
+ data_size - data_i);
+ memcpy(buf + buf_i, data + data_i, cp_sz);
+ data_i += cp_sz;
+ buf_i += cp_sz;
+ if (buf_i == IB_SERVICE_REC_SZ) {
+ ib_sa_unpack_service(buf, rec + rec_i);
+ buf_i = 0;
+ rec_i++;
+ }
+ }
+ }
+}
+
+static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query, int status,
+ struct ib_mad_recv_wc *mad_wc)
+{
+ struct ib_sa_service_query *query =
+ container_of(sa_query, struct ib_sa_service_query, sa_query);
+ struct sa_service_rec *rec;
+ int num_services;
+
+ if (!mad_wc || !mad_wc->recv_buf.mad) {
+ query->callback(status, NULL, 0, query->context);
+ return;
+ }
+
+ num_services = (mad_wc->mad_len - IB_SA_DATA_OFFS) / IB_SERVICE_REC_SZ;
+ if (!num_services) {
+ query->callback(-ENODATA, NULL, 0, query->context);
+ return;
+ }
+
+ rec = kmalloc_array(num_services, sizeof(*rec), GFP_KERNEL);
+ if (!rec) {
+ query->callback(-ENOMEM, NULL, 0, query->context);
+ return;
+ }
+
+ ib_unpack_service_rmpp(rec, mad_wc, num_services);
+ query->callback(status, rec, num_services, query->context);
+ kfree(rec);
}
static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
{
- kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
+ struct ib_sa_path_query *query =
+ container_of(sa_query, struct ib_sa_path_query, sa_query);
+
+ kfree(query->conv_pr);
+ kfree(query);
+}
+
+static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
+{
+ struct ib_sa_service_query *query =
+ container_of(sa_query, struct ib_sa_service_query, sa_query);
+
+ kfree(query);
}
/**
@@ -1345,13 +1661,13 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
* the query.
*/
int ib_sa_path_rec_get(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- struct ib_sa_path_rec *rec,
+ struct ib_device *device, u32 port_num,
+ struct sa_path_rec *rec,
ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
- struct ib_sa_path_rec *resp,
- void *context),
+ struct sa_path_rec *resp,
+ unsigned int num_paths, void *context),
void *context,
struct ib_sa_query **sa_query)
{
@@ -1360,11 +1676,16 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_sa_port *port;
struct ib_mad_agent *agent;
struct ib_sa_mad *mad;
+ enum opa_pr_supported status;
int ret;
if (!sa_dev)
return -ENODEV;
+ if ((rec->rec_type != SA_PATH_REC_TYPE_IB) &&
+ (rec->rec_type != SA_PATH_REC_TYPE_OPA))
+ return -EINVAL;
+
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
@@ -1373,9 +1694,26 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
return -ENOMEM;
query->sa_query.port = port;
+ if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
+ status = opa_pr_query_possible(client, sa_dev, device, port_num);
+ if (status == PR_NOT_SUPPORTED) {
+ ret = -EINVAL;
+ goto err1;
+ } else if (status == PR_OPA_SUPPORTED) {
+ query->sa_query.flags |= IB_SA_QUERY_OPA;
+ } else {
+ query->conv_pr =
+ kmalloc(sizeof(*query->conv_pr), gfp_mask);
+ if (!query->conv_pr) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+ }
+ }
+
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
- goto err1;
+ goto err2;
ib_sa_client_get(client);
query->sa_query.client = client;
@@ -1383,7 +1721,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
query->sa_query.release = ib_sa_path_rec_release;
@@ -1391,113 +1729,97 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
mad->sa_hdr.comp_mask = comp_mask;
- ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
+ if (query->sa_query.flags & IB_SA_QUERY_OPA) {
+ ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table),
+ rec, mad->data);
+ } else if (query->conv_pr) {
+ sa_convert_path_opa_to_ib(query->conv_pr, rec);
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ query->conv_pr, mad->data);
+ } else {
+ ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
+ rec, mad->data);
+ }
*sa_query = &query->sa_query;
query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE;
- query->sa_query.mad_buf->context[1] = rec;
+ query->sa_query.mad_buf->context[1] = (query->conv_pr) ?
+ query->conv_pr : rec;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
- goto err2;
+ goto err3;
return ret;
-err2:
+err3:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
-
+err2:
+ kfree(query->conv_pr);
err1:
kfree(query);
return ret;
}
EXPORT_SYMBOL(ib_sa_path_rec_get);
-static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
- int status,
- struct ib_sa_mad *mad)
-{
- struct ib_sa_service_query *query =
- container_of(sa_query, struct ib_sa_service_query, sa_query);
-
- if (mad) {
- struct ib_sa_service_rec rec;
-
- ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table),
- mad->data, &rec);
- query->callback(status, &rec, query->context);
- } else
- query->callback(status, NULL, query->context);
-}
-
-static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
-{
- kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
-}
-
/**
- * ib_sa_service_rec_query - Start Service Record operation
- * @client:SA client
- * @device:device to send request on
- * @port_num: port number to send request on
- * @method:SA method - should be get, set, or delete
- * @rec:Service Record to send in request
- * @comp_mask:component mask to send in request
- * @timeout_ms:time to wait for response
- * @gfp_mask:GFP mask to use for internal allocations
- * @callback:function called when request completes, times out or is
+ * ib_sa_service_rec_get - Start a Service get query
+ * @client: SA client
+ * @device: device to send query on
+ * @port_num: port number to send query on
+ * @rec: Service Record to send in query
+ * @comp_mask: component mask to send in query
+ * @timeout_ms: time to wait for response
+ * @gfp_mask: GFP mask to use for internal allocations
+ * @callback: function called when query completes, times out or is
* canceled
- * @context:opaque user context passed to callback
- * @sa_query:request context, used to cancel request
+ * @context: opaque user context passed to callback
+ * @sa_query: query context, used to cancel query
*
- * Send a Service Record set/get/delete to the SA to register,
- * unregister or query a service record.
- * The callback function will be called when the request completes (or
+ * Send a Service Record Get query to the SA to look up a path. The
+ * callback function will be called when the query completes (or
* fails); status is 0 for a successful response, -EINTR if the query
* is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
* occurred sending the query. The resp parameter of the callback is
* only valid if status is 0.
*
- * If the return value of ib_sa_service_rec_query() is negative, it is an
- * error code. Otherwise it is a request ID that can be used to cancel
+ * If the return value of ib_sa_service_rec_get() is negative, it is an
+ * error code. Otherwise it is a query ID that can be used to cancel
* the query.
*/
-int ib_sa_service_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num, u8 method,
- struct ib_sa_service_rec *rec,
- ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_sa_service_rec *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query)
+int ib_sa_service_rec_get(struct ib_sa_client *client,
+ struct ib_device *device, u32 port_num,
+ struct sa_service_rec *rec,
+ ib_sa_comp_mask comp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct sa_service_rec *resp,
+ unsigned int num_services,
+ void *context),
+ void *context, struct ib_sa_query **sa_query)
{
- struct ib_sa_service_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port;
+ struct ib_sa_service_query *query;
struct ib_mad_agent *agent;
+ struct ib_sa_port *port;
struct ib_sa_mad *mad;
int ret;
if (!sa_dev)
return -ENODEV;
- port = &sa_dev->port[port_num - sa_dev->start_port];
+ port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
- if (method != IB_MGMT_METHOD_GET &&
- method != IB_MGMT_METHOD_SET &&
- method != IB_SA_METHOD_DELETE)
- return -EINVAL;
-
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
- query->sa_query.port = port;
+ query->sa_query.port = port;
+
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
goto err1;
@@ -1508,18 +1830,19 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
- query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
- query->sa_query.release = ib_sa_service_rec_release;
- mad->mad_hdr.method = method;
- mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
- mad->sa_hdr.comp_mask = comp_mask;
+ query->sa_query.rmpp_callback = callback ? ib_sa_service_rec_callback :
+ NULL;
+ query->sa_query.release = ib_sa_service_rec_release;
+ mad->mad_hdr.method = IB_MGMT_METHOD_GET_TABLE;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
+ mad->sa_hdr.comp_mask = comp_mask;
- ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
- rec, mad->data);
+ ib_sa_pack_service(rec, mad->data);
*sa_query = &query->sa_query;
+ query->sa_query.mad_buf->context[1] = rec;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
@@ -1531,16 +1854,14 @@ err2:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
-
err1:
kfree(query);
return ret;
}
-EXPORT_SYMBOL(ib_sa_service_rec_query);
+EXPORT_SYMBOL(ib_sa_service_rec_get);
static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
- int status,
- struct ib_sa_mad *mad)
+ int status, struct ib_sa_mad *mad)
{
struct ib_sa_mcmember_query *query =
container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
@@ -1561,11 +1882,11 @@ static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
}
int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
+ struct ib_device *device, u32 port_num,
u8 method,
struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask,
- int timeout_ms, gfp_t gfp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_mcmember_rec *resp,
void *context),
@@ -1600,7 +1921,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
query->sa_query.release = ib_sa_mcmember_rec_release;
@@ -1631,8 +1952,7 @@ err1:
/* Support GuidInfoRecord */
static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
- int status,
- struct ib_sa_mad *mad)
+ int status, struct ib_sa_mad *mad)
{
struct ib_sa_guidinfo_query *query =
container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
@@ -1653,10 +1973,10 @@ static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
}
int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
+ struct ib_device *device, u32 port_num,
struct ib_sa_guidinfo_rec *rec,
ib_sa_comp_mask comp_mask, u8 method,
- int timeout_ms, gfp_t gfp_mask,
+ unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_sa_guidinfo_rec *resp,
void *context),
@@ -1697,7 +2017,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
query->sa_query.release = ib_sa_guidinfo_rec_release;
@@ -1728,99 +2048,109 @@ err1:
}
EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
-/* Support get SA ClassPortInfo */
+struct ib_classport_info_context {
+ struct completion done;
+ struct ib_sa_query *sa_query;
+};
+
+static void ib_classportinfo_cb(void *context)
+{
+ struct ib_classport_info_context *cb_ctx = context;
+
+ complete(&cb_ctx->done);
+}
+
static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
- int status,
- struct ib_sa_mad *mad)
+ int status, struct ib_sa_mad *mad)
{
unsigned long flags;
struct ib_sa_classport_info_query *query =
container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
+ struct ib_sa_classport_cache *info = &sa_query->port->classport_info;
if (mad) {
- struct ib_class_port_info rec;
+ if (sa_query->flags & IB_SA_QUERY_OPA) {
+ struct opa_class_port_info rec;
- ib_unpack(classport_info_rec_table,
- ARRAY_SIZE(classport_info_rec_table),
- mad->data, &rec);
+ ib_unpack(opa_classport_info_rec_table,
+ ARRAY_SIZE(opa_classport_info_rec_table),
+ mad->data, &rec);
- spin_lock_irqsave(&sa_query->port->classport_lock, flags);
- if (!status && !sa_query->port->classport_info.valid) {
- memcpy(&sa_query->port->classport_info.data, &rec,
- sizeof(sa_query->port->classport_info.data));
+ spin_lock_irqsave(&sa_query->port->classport_lock,
+ flags);
+ if (!status && !info->valid) {
+ memcpy(&info->data.opa, &rec,
+ sizeof(info->data.opa));
- sa_query->port->classport_info.valid = true;
- }
- spin_unlock_irqrestore(&sa_query->port->classport_lock, flags);
+ info->valid = true;
+ info->data.type = RDMA_CLASS_PORT_INFO_OPA;
+ }
+ spin_unlock_irqrestore(&sa_query->port->classport_lock,
+ flags);
- query->callback(status, &rec, query->context);
- } else {
- query->callback(status, NULL, query->context);
+ } else {
+ struct ib_class_port_info rec;
+
+ ib_unpack(ib_classport_info_rec_table,
+ ARRAY_SIZE(ib_classport_info_rec_table),
+ mad->data, &rec);
+
+ spin_lock_irqsave(&sa_query->port->classport_lock,
+ flags);
+ if (!status && !info->valid) {
+ memcpy(&info->data.ib, &rec,
+ sizeof(info->data.ib));
+
+ info->valid = true;
+ info->data.type = RDMA_CLASS_PORT_INFO_IB;
+ }
+ spin_unlock_irqrestore(&sa_query->port->classport_lock,
+ flags);
+ }
}
+ query->callback(query->context);
}
-static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query)
+static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
{
kfree(container_of(sa_query, struct ib_sa_classport_info_query,
sa_query));
}
-int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
- struct ib_device *device, u8 port_num,
- int timeout_ms, gfp_t gfp_mask,
- void (*callback)(int status,
- struct ib_class_port_info *resp,
- void *context),
- void *context,
- struct ib_sa_query **sa_query)
+static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
+ unsigned long timeout_ms,
+ void (*callback)(void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
{
- struct ib_sa_classport_info_query *query;
- struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
- struct ib_sa_port *port;
struct ib_mad_agent *agent;
+ struct ib_sa_classport_info_query *query;
struct ib_sa_mad *mad;
- struct ib_class_port_info cached_class_port_info;
+ gfp_t gfp_mask = GFP_KERNEL;
int ret;
- unsigned long flags;
- if (!sa_dev)
- return -ENODEV;
-
- port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
- /* Use cached ClassPortInfo attribute if valid instead of sending mad */
- spin_lock_irqsave(&port->classport_lock, flags);
- if (port->classport_info.valid && callback) {
- memcpy(&cached_class_port_info, &port->classport_info.data,
- sizeof(cached_class_port_info));
- spin_unlock_irqrestore(&port->classport_lock, flags);
- callback(0, &cached_class_port_info, context);
- return 0;
- }
- spin_unlock_irqrestore(&port->classport_lock, flags);
-
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
query->sa_query.port = port;
+ query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device,
+ port->port_num) ?
+ IB_SA_QUERY_OPA : 0;
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
- goto err1;
+ goto err_free;
- ib_sa_client_get(client);
- query->sa_query.client = client;
- query->callback = callback;
- query->context = context;
+ query->callback = callback;
+ query->context = context;
mad = query->sa_query.mad_buf->mad;
- init_mad(mad, agent);
+ init_mad(&query->sa_query, agent);
- query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL;
-
- query->sa_query.release = ib_sa_portclass_info_rec_release;
- /* support GET only */
+ query->sa_query.callback = ib_sa_classport_info_rec_callback;
+ query->sa_query.release = ib_sa_classport_info_rec_release;
mad->mad_hdr.method = IB_MGMT_METHOD_GET;
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
mad->sa_hdr.comp_mask = 0;
@@ -1828,49 +2158,107 @@ int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
- goto err2;
+ goto err_free_mad;
return ret;
-err2:
+err_free_mad:
*sa_query = NULL;
- ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
-err1:
+err_free:
kfree(query);
return ret;
}
-EXPORT_SYMBOL(ib_sa_classport_info_rec_query);
+
+static void update_ib_cpi(struct work_struct *work)
+{
+ struct ib_sa_port *port =
+ container_of(work, struct ib_sa_port, ib_cpi_work.work);
+ struct ib_classport_info_context *cb_context;
+ unsigned long flags;
+ int ret;
+
+ /* If the classport info is valid, nothing
+ * to do here.
+ */
+ spin_lock_irqsave(&port->classport_lock, flags);
+ if (port->classport_info.valid) {
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+
+ cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL);
+ if (!cb_context)
+ goto err_nomem;
+
+ init_completion(&cb_context->done);
+
+ ret = ib_sa_classport_info_rec_query(port, 3000,
+ ib_classportinfo_cb, cb_context,
+ &cb_context->sa_query);
+ if (ret < 0)
+ goto free_cb_err;
+ wait_for_completion(&cb_context->done);
+free_cb_err:
+ kfree(cb_context);
+ spin_lock_irqsave(&port->classport_lock, flags);
+
+ /* If the classport info is still not valid, the query should have
+ * failed for some reason. Retry issuing the query
+ */
+ if (!port->classport_info.valid) {
+ port->classport_info.retry_cnt++;
+ if (port->classport_info.retry_cnt <=
+ IB_SA_CPI_MAX_RETRY_CNT) {
+ unsigned long delay =
+ msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
+
+ queue_delayed_work(ib_wq, &port->ib_cpi_work, delay);
+ }
+ }
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+
+err_nomem:
+ return;
+}
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
unsigned long flags;
+ int status = 0;
- if (query->callback)
+ if (query->callback || query->rmpp_callback) {
switch (mad_send_wc->status) {
case IB_WC_SUCCESS:
/* No callback -- already got recv */
break;
case IB_WC_RESP_TIMEOUT_ERR:
- query->callback(query, -ETIMEDOUT, NULL);
+ status = -ETIMEDOUT;
break;
case IB_WC_WR_FLUSH_ERR:
- query->callback(query, -EINTR, NULL);
+ status = -EINTR;
break;
default:
- query->callback(query, -EIO, NULL);
+ status = -EIO;
break;
}
- spin_lock_irqsave(&idr_lock, flags);
- idr_remove(&query_idr, query->id);
- spin_unlock_irqrestore(&idr_lock, flags);
+ if (status)
+ query->callback ? query->callback(query, status, NULL) :
+ query->rmpp_callback(query, status, NULL);
+ }
+
+ xa_lock_irqsave(&queries, flags);
+ __xa_erase(&queries, query->id);
+ xa_unlock_irqrestore(&queries, flags);
free_mad(query);
- ib_sa_client_put(query->client);
+ if (query->client)
+ ib_sa_client_put(query->client);
query->release(query);
}
@@ -1879,17 +2267,25 @@ static void recv_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_sa_query *query;
+ struct ib_mad *mad;
+
if (!send_buf)
return;
query = send_buf->context[0];
- if (query->callback) {
+ mad = mad_recv_wc->recv_buf.mad;
+
+ if (query->rmpp_callback) {
+ if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
+ query->rmpp_callback(query, mad->mad_hdr.status ?
+ -EINVAL : 0, mad_recv_wc);
+ else
+ query->rmpp_callback(query, -EIO, NULL);
+ } else if (query->callback) {
if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
- query->callback(query,
- mad_recv_wc->recv_buf.mad->mad_hdr.status ?
- -EINVAL : 0,
- (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
+ query->callback(query, mad->mad_hdr.status ?
+ -EINVAL : 0, (struct ib_sa_mad *)mad);
else
query->callback(query, -EIO, NULL);
}
@@ -1897,20 +2293,133 @@ static void recv_handler(struct ib_mad_agent *mad_agent,
ib_free_recv_mad(mad_recv_wc);
}
-static void ib_sa_add_one(struct ib_device *device)
+static void update_sm_ah(struct work_struct *work)
+{
+ struct ib_sa_port *port =
+ container_of(work, struct ib_sa_port, update_task);
+ struct ib_sa_sm_ah *new_ah;
+ struct ib_port_attr port_attr;
+ struct rdma_ah_attr ah_attr;
+ bool grh_required;
+
+ if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
+ pr_warn("Couldn't query port\n");
+ return;
+ }
+
+ new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL);
+ if (!new_ah)
+ return;
+
+ kref_init(&new_ah->ref);
+ new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
+
+ new_ah->pkey_index = 0;
+ if (ib_find_pkey(port->agent->device, port->port_num,
+ IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
+ pr_err("Couldn't find index for default PKey\n");
+
+ memset(&ah_attr, 0, sizeof(ah_attr));
+ ah_attr.type = rdma_ah_find_type(port->agent->device,
+ port->port_num);
+ rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
+ rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
+ rdma_ah_set_port_num(&ah_attr, port->port_num);
+
+ grh_required = rdma_is_grh_required(port->agent->device,
+ port->port_num);
+
+ /*
+ * The OPA sm_lid of 0xFFFF needs special handling so that it can be
+ * differentiated from a permissive LID of 0xFFFF. We set the
+ * grh_required flag here so the SA can program the DGID in the
+ * address handle appropriately
+ */
+ if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA &&
+ (grh_required ||
+ port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)))
+ rdma_ah_set_make_grd(&ah_attr, true);
+
+ if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) {
+ rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
+ rdma_ah_set_subnet_prefix(&ah_attr,
+ cpu_to_be64(port_attr.subnet_prefix));
+ rdma_ah_set_interface_id(&ah_attr,
+ cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
+ }
+
+ new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr,
+ RDMA_CREATE_AH_SLEEPABLE);
+ if (IS_ERR(new_ah->ah)) {
+ pr_warn("Couldn't create new SM AH\n");
+ kfree(new_ah);
+ return;
+ }
+
+ spin_lock_irq(&port->ah_lock);
+ if (port->sm_ah)
+ kref_put(&port->sm_ah->ref, free_sm_ah);
+ port->sm_ah = new_ah;
+ spin_unlock_irq(&port->ah_lock);
+}
+
+static void ib_sa_event(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ if (event->event == IB_EVENT_PORT_ERR ||
+ event->event == IB_EVENT_PORT_ACTIVE ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PKEY_CHANGE ||
+ event->event == IB_EVENT_SM_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER) {
+ unsigned long flags;
+ struct ib_sa_device *sa_dev =
+ container_of(handler, typeof(*sa_dev), event_handler);
+ u32 port_num = event->element.port_num - sa_dev->start_port;
+ struct ib_sa_port *port = &sa_dev->port[port_num];
+
+ if (!rdma_cap_ib_sa(handler->device, port->port_num))
+ return;
+
+ spin_lock_irqsave(&port->ah_lock, flags);
+ if (port->sm_ah)
+ kref_put(&port->sm_ah->ref, free_sm_ah);
+ port->sm_ah = NULL;
+ spin_unlock_irqrestore(&port->ah_lock, flags);
+
+ if (event->event == IB_EVENT_SM_CHANGE ||
+ event->event == IB_EVENT_CLIENT_REREGISTER ||
+ event->event == IB_EVENT_LID_CHANGE ||
+ event->event == IB_EVENT_PORT_ACTIVE) {
+ unsigned long delay =
+ msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT);
+
+ spin_lock_irqsave(&port->classport_lock, flags);
+ port->classport_info.valid = false;
+ port->classport_info.retry_cnt = 0;
+ spin_unlock_irqrestore(&port->classport_lock, flags);
+ queue_delayed_work(ib_wq,
+ &port->ib_cpi_work, delay);
+ }
+ queue_work(ib_wq, &sa_dev->port[port_num].update_task);
+ }
+}
+
+static int ib_sa_add_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev;
int s, e, i;
int count = 0;
+ int ret;
s = rdma_start_port(device);
e = rdma_end_port(device);
- sa_dev = kzalloc(sizeof *sa_dev +
- (e - s + 1) * sizeof (struct ib_sa_port),
+ sa_dev = kzalloc(struct_size(sa_dev, port,
+ size_add(size_sub(e, s), 1)),
GFP_KERNEL);
if (!sa_dev)
- return;
+ return -ENOMEM;
sa_dev->start_port = s;
sa_dev->end_port = e;
@@ -1928,18 +2437,25 @@ static void ib_sa_add_one(struct ib_device *device)
sa_dev->port[i].agent =
ib_register_mad_agent(device, i + s, IB_QPT_GSI,
- NULL, 0, send_handler,
- recv_handler, sa_dev, 0);
- if (IS_ERR(sa_dev->port[i].agent))
+ NULL, IB_MGMT_RMPP_VERSION,
+ send_handler, recv_handler,
+ sa_dev, 0);
+ if (IS_ERR(sa_dev->port[i].agent)) {
+ ret = PTR_ERR(sa_dev->port[i].agent);
goto err;
+ }
INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
+ INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work,
+ update_ib_cpi);
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
+ }
ib_set_client_data(device, &sa_client, sa_dev);
@@ -1951,15 +2467,14 @@ static void ib_sa_add_one(struct ib_device *device)
*/
INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
- if (ib_register_event_handler(&sa_dev->event_handler))
- goto err;
+ ib_register_event_handler(&sa_dev->event_handler);
for (i = 0; i <= e - s; ++i) {
if (rdma_cap_ib_sa(device, i + 1))
update_sm_ah(&sa_dev->port[i].update_task);
}
- return;
+ return 0;
err:
while (--i >= 0) {
@@ -1968,7 +2483,7 @@ err:
}
free:
kfree(sa_dev);
- return;
+ return ret;
}
static void ib_sa_remove_one(struct ib_device *device, void *client_data)
@@ -1976,15 +2491,12 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data)
struct ib_sa_device *sa_dev = client_data;
int i;
- if (!sa_dev)
- return;
-
ib_unregister_event_handler(&sa_dev->event_handler);
-
flush_workqueue(ib_wq);
for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
if (rdma_cap_ib_sa(device, i + 1)) {
+ cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work);
ib_unregister_mad_agent(sa_dev->port[i].agent);
if (sa_dev->port[i].sm_ah)
kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
@@ -2036,9 +2548,8 @@ err1:
void ib_sa_cleanup(void)
{
cancel_delayed_work(&ib_nl_timed_work);
- flush_workqueue(ib_nl_wq);
destroy_workqueue(ib_nl_wq);
mcast_cleanup();
ib_unregister_client(&sa_client);
- idr_destroy(&query_idr);
+ WARN_ON(!xa_empty(&queries));
}
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
new file mode 100644
index 000000000000..3512c2e54efc
--- /dev/null
+++ b/drivers/infiniband/core/security.c
@@ -0,0 +1,750 @@
+/*
+ * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/security.h>
+#include <linux/completion.h>
+#include <linux/list.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
+#include "core_priv.h"
+#include "mad_priv.h"
+
+static LIST_HEAD(mad_agent_list);
+/* Lock to protect mad_agent_list */
+static DEFINE_SPINLOCK(mad_agent_list_lock);
+
+static struct pkey_index_qp_list *get_pkey_idx_qp_list(struct ib_port_pkey *pp)
+{
+ struct pkey_index_qp_list *pkey = NULL;
+ struct pkey_index_qp_list *tmp_pkey;
+ struct ib_device *dev = pp->sec->dev;
+
+ spin_lock(&dev->port_data[pp->port_num].pkey_list_lock);
+ list_for_each_entry (tmp_pkey, &dev->port_data[pp->port_num].pkey_list,
+ pkey_index_list) {
+ if (tmp_pkey->pkey_index == pp->pkey_index) {
+ pkey = tmp_pkey;
+ break;
+ }
+ }
+ spin_unlock(&dev->port_data[pp->port_num].pkey_list_lock);
+ return pkey;
+}
+
+static int get_pkey_and_subnet_prefix(struct ib_port_pkey *pp,
+ u16 *pkey,
+ u64 *subnet_prefix)
+{
+ struct ib_device *dev = pp->sec->dev;
+ int ret;
+
+ ret = ib_get_cached_pkey(dev, pp->port_num, pp->pkey_index, pkey);
+ if (ret)
+ return ret;
+
+ ib_get_cached_subnet_prefix(dev, pp->port_num, subnet_prefix);
+
+ return ret;
+}
+
+static int enforce_qp_pkey_security(u16 pkey,
+ u64 subnet_prefix,
+ struct ib_qp_security *qp_sec)
+{
+ struct ib_qp_security *shared_qp_sec;
+ int ret;
+
+ ret = security_ib_pkey_access(qp_sec->security, subnet_prefix, pkey);
+ if (ret)
+ return ret;
+
+ list_for_each_entry(shared_qp_sec,
+ &qp_sec->shared_qp_list,
+ shared_qp_list) {
+ ret = security_ib_pkey_access(shared_qp_sec->security,
+ subnet_prefix,
+ pkey);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+/* The caller of this function must hold the QP security
+ * mutex of the QP of the security structure in *pps.
+ *
+ * It takes separate ports_pkeys and security structure
+ * because in some cases the pps will be for a new settings
+ * or the pps will be for the real QP and security structure
+ * will be for a shared QP.
+ */
+static int check_qp_port_pkey_settings(struct ib_ports_pkeys *pps,
+ struct ib_qp_security *sec)
+{
+ u64 subnet_prefix;
+ u16 pkey;
+ int ret = 0;
+
+ if (!pps)
+ return 0;
+
+ if (pps->main.state != IB_PORT_PKEY_NOT_VALID) {
+ ret = get_pkey_and_subnet_prefix(&pps->main,
+ &pkey,
+ &subnet_prefix);
+ if (ret)
+ return ret;
+
+ ret = enforce_qp_pkey_security(pkey,
+ subnet_prefix,
+ sec);
+ if (ret)
+ return ret;
+ }
+
+ if (pps->alt.state != IB_PORT_PKEY_NOT_VALID) {
+ ret = get_pkey_and_subnet_prefix(&pps->alt,
+ &pkey,
+ &subnet_prefix);
+ if (ret)
+ return ret;
+
+ ret = enforce_qp_pkey_security(pkey,
+ subnet_prefix,
+ sec);
+ }
+
+ return ret;
+}
+
+/* The caller of this function must hold the QP security
+ * mutex.
+ */
+static void qp_to_error(struct ib_qp_security *sec)
+{
+ struct ib_qp_security *shared_qp_sec;
+ struct ib_qp_attr attr = {
+ .qp_state = IB_QPS_ERR
+ };
+ struct ib_event event = {
+ .event = IB_EVENT_QP_FATAL
+ };
+
+ /* If the QP is in the process of being destroyed
+ * the qp pointer in the security structure is
+ * undefined. It cannot be modified now.
+ */
+ if (sec->destroying)
+ return;
+
+ ib_modify_qp(sec->qp,
+ &attr,
+ IB_QP_STATE);
+
+ if (sec->qp->event_handler && sec->qp->qp_context) {
+ event.element.qp = sec->qp;
+ sec->qp->event_handler(&event,
+ sec->qp->qp_context);
+ }
+
+ list_for_each_entry(shared_qp_sec,
+ &sec->shared_qp_list,
+ shared_qp_list) {
+ struct ib_qp *qp = shared_qp_sec->qp;
+
+ if (qp->event_handler && qp->qp_context) {
+ event.element.qp = qp;
+ event.device = qp->device;
+ qp->event_handler(&event,
+ qp->qp_context);
+ }
+ }
+}
+
+static inline void check_pkey_qps(struct pkey_index_qp_list *pkey,
+ struct ib_device *device,
+ u32 port_num,
+ u64 subnet_prefix)
+{
+ struct ib_port_pkey *pp, *tmp_pp;
+ bool comp;
+ LIST_HEAD(to_error_list);
+ u16 pkey_val;
+
+ if (!ib_get_cached_pkey(device,
+ port_num,
+ pkey->pkey_index,
+ &pkey_val)) {
+ spin_lock(&pkey->qp_list_lock);
+ list_for_each_entry(pp, &pkey->qp_list, qp_list) {
+ if (atomic_read(&pp->sec->error_list_count))
+ continue;
+
+ if (enforce_qp_pkey_security(pkey_val,
+ subnet_prefix,
+ pp->sec)) {
+ atomic_inc(&pp->sec->error_list_count);
+ list_add(&pp->to_error_list,
+ &to_error_list);
+ }
+ }
+ spin_unlock(&pkey->qp_list_lock);
+ }
+
+ list_for_each_entry_safe(pp,
+ tmp_pp,
+ &to_error_list,
+ to_error_list) {
+ mutex_lock(&pp->sec->mutex);
+ qp_to_error(pp->sec);
+ list_del(&pp->to_error_list);
+ atomic_dec(&pp->sec->error_list_count);
+ comp = pp->sec->destroying;
+ mutex_unlock(&pp->sec->mutex);
+
+ if (comp)
+ complete(&pp->sec->error_complete);
+ }
+}
+
+/* The caller of this function must hold the QP security
+ * mutex.
+ */
+static int port_pkey_list_insert(struct ib_port_pkey *pp)
+{
+ struct pkey_index_qp_list *tmp_pkey;
+ struct pkey_index_qp_list *pkey;
+ struct ib_device *dev;
+ u32 port_num = pp->port_num;
+ int ret = 0;
+
+ if (pp->state != IB_PORT_PKEY_VALID)
+ return 0;
+
+ dev = pp->sec->dev;
+
+ pkey = get_pkey_idx_qp_list(pp);
+
+ if (!pkey) {
+ bool found = false;
+
+ pkey = kzalloc(sizeof(*pkey), GFP_KERNEL);
+ if (!pkey)
+ return -ENOMEM;
+
+ spin_lock(&dev->port_data[port_num].pkey_list_lock);
+ /* Check for the PKey again. A racing process may
+ * have created it.
+ */
+ list_for_each_entry(tmp_pkey,
+ &dev->port_data[port_num].pkey_list,
+ pkey_index_list) {
+ if (tmp_pkey->pkey_index == pp->pkey_index) {
+ kfree(pkey);
+ pkey = tmp_pkey;
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ pkey->pkey_index = pp->pkey_index;
+ spin_lock_init(&pkey->qp_list_lock);
+ INIT_LIST_HEAD(&pkey->qp_list);
+ list_add(&pkey->pkey_index_list,
+ &dev->port_data[port_num].pkey_list);
+ }
+ spin_unlock(&dev->port_data[port_num].pkey_list_lock);
+ }
+
+ spin_lock(&pkey->qp_list_lock);
+ list_add(&pp->qp_list, &pkey->qp_list);
+ spin_unlock(&pkey->qp_list_lock);
+
+ pp->state = IB_PORT_PKEY_LISTED;
+
+ return ret;
+}
+
+/* The caller of this function must hold the QP security
+ * mutex.
+ */
+static void port_pkey_list_remove(struct ib_port_pkey *pp)
+{
+ struct pkey_index_qp_list *pkey;
+
+ if (pp->state != IB_PORT_PKEY_LISTED)
+ return;
+
+ pkey = get_pkey_idx_qp_list(pp);
+
+ spin_lock(&pkey->qp_list_lock);
+ list_del(&pp->qp_list);
+ spin_unlock(&pkey->qp_list_lock);
+
+ /* The setting may still be valid, i.e. after
+ * a destroy has failed for example.
+ */
+ pp->state = IB_PORT_PKEY_VALID;
+}
+
+static void destroy_qp_security(struct ib_qp_security *sec)
+{
+ security_ib_free_security(sec->security);
+ kfree(sec->ports_pkeys);
+ kfree(sec);
+}
+
+/* The caller of this function must hold the QP security
+ * mutex.
+ */
+static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp,
+ const struct ib_qp_attr *qp_attr,
+ int qp_attr_mask)
+{
+ struct ib_ports_pkeys *new_pps;
+ struct ib_ports_pkeys *qp_pps = qp->qp_sec->ports_pkeys;
+
+ new_pps = kzalloc(sizeof(*new_pps), GFP_KERNEL);
+ if (!new_pps)
+ return NULL;
+
+ if (qp_attr_mask & IB_QP_PORT)
+ new_pps->main.port_num = qp_attr->port_num;
+ else if (qp_pps)
+ new_pps->main.port_num = qp_pps->main.port_num;
+
+ if (qp_attr_mask & IB_QP_PKEY_INDEX)
+ new_pps->main.pkey_index = qp_attr->pkey_index;
+ else if (qp_pps)
+ new_pps->main.pkey_index = qp_pps->main.pkey_index;
+
+ if (((qp_attr_mask & IB_QP_PKEY_INDEX) &&
+ (qp_attr_mask & IB_QP_PORT)) ||
+ (qp_pps && qp_pps->main.state != IB_PORT_PKEY_NOT_VALID))
+ new_pps->main.state = IB_PORT_PKEY_VALID;
+
+ if (qp_attr_mask & IB_QP_ALT_PATH) {
+ new_pps->alt.port_num = qp_attr->alt_port_num;
+ new_pps->alt.pkey_index = qp_attr->alt_pkey_index;
+ new_pps->alt.state = IB_PORT_PKEY_VALID;
+ } else if (qp_pps) {
+ new_pps->alt.port_num = qp_pps->alt.port_num;
+ new_pps->alt.pkey_index = qp_pps->alt.pkey_index;
+ if (qp_pps->alt.state != IB_PORT_PKEY_NOT_VALID)
+ new_pps->alt.state = IB_PORT_PKEY_VALID;
+ }
+
+ new_pps->main.sec = qp->qp_sec;
+ new_pps->alt.sec = qp->qp_sec;
+ return new_pps;
+}
+
+int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev)
+{
+ struct ib_qp *real_qp = qp->real_qp;
+ int ret;
+
+ ret = ib_create_qp_security(qp, dev);
+
+ if (ret)
+ return ret;
+
+ if (!qp->qp_sec)
+ return 0;
+
+ mutex_lock(&real_qp->qp_sec->mutex);
+ ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys,
+ qp->qp_sec);
+
+ if (ret)
+ goto ret;
+
+ if (qp != real_qp)
+ list_add(&qp->qp_sec->shared_qp_list,
+ &real_qp->qp_sec->shared_qp_list);
+ret:
+ mutex_unlock(&real_qp->qp_sec->mutex);
+ if (ret)
+ destroy_qp_security(qp->qp_sec);
+
+ return ret;
+}
+
+void ib_close_shared_qp_security(struct ib_qp_security *sec)
+{
+ struct ib_qp *real_qp = sec->qp->real_qp;
+
+ mutex_lock(&real_qp->qp_sec->mutex);
+ list_del(&sec->shared_qp_list);
+ mutex_unlock(&real_qp->qp_sec->mutex);
+
+ destroy_qp_security(sec);
+}
+
+int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
+{
+ unsigned int i;
+ bool is_ib = false;
+ int ret;
+
+ rdma_for_each_port (dev, i) {
+ is_ib = rdma_protocol_ib(dev, i);
+ if (is_ib)
+ break;
+ }
+
+ /* If this isn't an IB device don't create the security context */
+ if (!is_ib)
+ return 0;
+
+ qp->qp_sec = kzalloc(sizeof(*qp->qp_sec), GFP_KERNEL);
+ if (!qp->qp_sec)
+ return -ENOMEM;
+
+ qp->qp_sec->qp = qp;
+ qp->qp_sec->dev = dev;
+ mutex_init(&qp->qp_sec->mutex);
+ INIT_LIST_HEAD(&qp->qp_sec->shared_qp_list);
+ atomic_set(&qp->qp_sec->error_list_count, 0);
+ init_completion(&qp->qp_sec->error_complete);
+ ret = security_ib_alloc_security(&qp->qp_sec->security);
+ if (ret) {
+ kfree(qp->qp_sec);
+ qp->qp_sec = NULL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_create_qp_security);
+
+void ib_destroy_qp_security_begin(struct ib_qp_security *sec)
+{
+ /* Return if not IB */
+ if (!sec)
+ return;
+
+ mutex_lock(&sec->mutex);
+
+ /* Remove the QP from the lists so it won't get added to
+ * a to_error_list during the destroy process.
+ */
+ if (sec->ports_pkeys) {
+ port_pkey_list_remove(&sec->ports_pkeys->main);
+ port_pkey_list_remove(&sec->ports_pkeys->alt);
+ }
+
+ /* If the QP is already in one or more of those lists
+ * the destroying flag will ensure the to error flow
+ * doesn't operate on an undefined QP.
+ */
+ sec->destroying = true;
+
+ /* Record the error list count to know how many completions
+ * to wait for.
+ */
+ sec->error_comps_pending = atomic_read(&sec->error_list_count);
+
+ mutex_unlock(&sec->mutex);
+}
+
+void ib_destroy_qp_security_abort(struct ib_qp_security *sec)
+{
+ int ret;
+ int i;
+
+ /* Return if not IB */
+ if (!sec)
+ return;
+
+ /* If a concurrent cache update is in progress this
+ * QP security could be marked for an error state
+ * transition. Wait for this to complete.
+ */
+ for (i = 0; i < sec->error_comps_pending; i++)
+ wait_for_completion(&sec->error_complete);
+
+ mutex_lock(&sec->mutex);
+ sec->destroying = false;
+
+ /* Restore the position in the lists and verify
+ * access is still allowed in case a cache update
+ * occurred while attempting to destroy.
+ *
+ * Because these setting were listed already
+ * and removed during ib_destroy_qp_security_begin
+ * we know the pkey_index_qp_list for the PKey
+ * already exists so port_pkey_list_insert won't fail.
+ */
+ if (sec->ports_pkeys) {
+ port_pkey_list_insert(&sec->ports_pkeys->main);
+ port_pkey_list_insert(&sec->ports_pkeys->alt);
+ }
+
+ ret = check_qp_port_pkey_settings(sec->ports_pkeys, sec);
+ if (ret)
+ qp_to_error(sec);
+
+ mutex_unlock(&sec->mutex);
+}
+
+void ib_destroy_qp_security_end(struct ib_qp_security *sec)
+{
+ int i;
+
+ /* Return if not IB */
+ if (!sec)
+ return;
+
+ /* If a concurrent cache update is occurring we must
+ * wait until this QP security structure is processed
+ * in the QP to error flow before destroying it because
+ * the to_error_list is in use.
+ */
+ for (i = 0; i < sec->error_comps_pending; i++)
+ wait_for_completion(&sec->error_complete);
+
+ destroy_qp_security(sec);
+}
+
+void ib_security_cache_change(struct ib_device *device,
+ u32 port_num,
+ u64 subnet_prefix)
+{
+ struct pkey_index_qp_list *pkey;
+
+ list_for_each_entry (pkey, &device->port_data[port_num].pkey_list,
+ pkey_index_list) {
+ check_pkey_qps(pkey,
+ device,
+ port_num,
+ subnet_prefix);
+ }
+}
+
+void ib_security_release_port_pkey_list(struct ib_device *device)
+{
+ struct pkey_index_qp_list *pkey, *tmp_pkey;
+ unsigned int i;
+
+ rdma_for_each_port (device, i) {
+ list_for_each_entry_safe(pkey,
+ tmp_pkey,
+ &device->port_data[i].pkey_list,
+ pkey_index_list) {
+ list_del(&pkey->pkey_index_list);
+ kfree(pkey);
+ }
+ }
+}
+
+int ib_security_modify_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_udata *udata)
+{
+ int ret = 0;
+ struct ib_ports_pkeys *tmp_pps;
+ struct ib_ports_pkeys *new_pps = NULL;
+ struct ib_qp *real_qp = qp->real_qp;
+ bool special_qp = (real_qp->qp_type == IB_QPT_SMI ||
+ real_qp->qp_type == IB_QPT_GSI ||
+ real_qp->qp_type >= IB_QPT_RESERVED1);
+ bool pps_change = ((qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) ||
+ (qp_attr_mask & IB_QP_ALT_PATH));
+
+ WARN_ONCE((qp_attr_mask & IB_QP_PORT &&
+ rdma_protocol_ib(real_qp->device, qp_attr->port_num) &&
+ !real_qp->qp_sec),
+ "%s: QP security is not initialized for IB QP: %u\n",
+ __func__, real_qp->qp_num);
+
+ /* The port/pkey settings are maintained only for the real QP. Open
+ * handles on the real QP will be in the shared_qp_list. When
+ * enforcing security on the real QP all the shared QPs will be
+ * checked as well.
+ */
+
+ if (pps_change && !special_qp && real_qp->qp_sec) {
+ mutex_lock(&real_qp->qp_sec->mutex);
+ new_pps = get_new_pps(real_qp,
+ qp_attr,
+ qp_attr_mask);
+ if (!new_pps) {
+ mutex_unlock(&real_qp->qp_sec->mutex);
+ return -ENOMEM;
+ }
+ /* Add this QP to the lists for the new port
+ * and pkey settings before checking for permission
+ * in case there is a concurrent cache update
+ * occurring. Walking the list for a cache change
+ * doesn't acquire the security mutex unless it's
+ * sending the QP to error.
+ */
+ ret = port_pkey_list_insert(&new_pps->main);
+
+ if (!ret)
+ ret = port_pkey_list_insert(&new_pps->alt);
+
+ if (!ret)
+ ret = check_qp_port_pkey_settings(new_pps,
+ real_qp->qp_sec);
+ }
+
+ if (!ret)
+ ret = real_qp->device->ops.modify_qp(real_qp,
+ qp_attr,
+ qp_attr_mask,
+ udata);
+
+ if (new_pps) {
+ /* Clean up the lists and free the appropriate
+ * ports_pkeys structure.
+ */
+ if (ret) {
+ tmp_pps = new_pps;
+ } else {
+ tmp_pps = real_qp->qp_sec->ports_pkeys;
+ real_qp->qp_sec->ports_pkeys = new_pps;
+ }
+
+ if (tmp_pps) {
+ port_pkey_list_remove(&tmp_pps->main);
+ port_pkey_list_remove(&tmp_pps->alt);
+ }
+ kfree(tmp_pps);
+ mutex_unlock(&real_qp->qp_sec->mutex);
+ }
+ return ret;
+}
+
+static int ib_security_pkey_access(struct ib_device *dev,
+ u32 port_num,
+ u16 pkey_index,
+ void *sec)
+{
+ u64 subnet_prefix;
+ u16 pkey;
+ int ret;
+
+ if (!rdma_protocol_ib(dev, port_num))
+ return 0;
+
+ ret = ib_get_cached_pkey(dev, port_num, pkey_index, &pkey);
+ if (ret)
+ return ret;
+
+ ib_get_cached_subnet_prefix(dev, port_num, &subnet_prefix);
+
+ return security_ib_pkey_access(sec, subnet_prefix, pkey);
+}
+
+void ib_mad_agent_security_change(void)
+{
+ struct ib_mad_agent *ag;
+
+ spin_lock(&mad_agent_list_lock);
+ list_for_each_entry(ag,
+ &mad_agent_list,
+ mad_agent_sec_list)
+ WRITE_ONCE(ag->smp_allowed,
+ !security_ib_endport_manage_subnet(ag->security,
+ dev_name(&ag->device->dev), ag->port_num));
+ spin_unlock(&mad_agent_list_lock);
+}
+
+int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
+ enum ib_qp_type qp_type)
+{
+ int ret;
+
+ if (!rdma_protocol_ib(agent->device, agent->port_num))
+ return 0;
+
+ INIT_LIST_HEAD(&agent->mad_agent_sec_list);
+
+ ret = security_ib_alloc_security(&agent->security);
+ if (ret)
+ return ret;
+
+ if (qp_type != IB_QPT_SMI)
+ return 0;
+
+ spin_lock(&mad_agent_list_lock);
+ ret = security_ib_endport_manage_subnet(agent->security,
+ dev_name(&agent->device->dev),
+ agent->port_num);
+ if (ret)
+ goto free_security;
+
+ WRITE_ONCE(agent->smp_allowed, true);
+ list_add(&agent->mad_agent_sec_list, &mad_agent_list);
+ spin_unlock(&mad_agent_list_lock);
+ return 0;
+
+free_security:
+ spin_unlock(&mad_agent_list_lock);
+ security_ib_free_security(agent->security);
+ return ret;
+}
+
+void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent)
+{
+ if (!rdma_protocol_ib(agent->device, agent->port_num))
+ return;
+
+ if (agent->qp->qp_type == IB_QPT_SMI) {
+ spin_lock(&mad_agent_list_lock);
+ list_del(&agent->mad_agent_sec_list);
+ spin_unlock(&mad_agent_list_lock);
+ }
+
+ security_ib_free_security(agent->security);
+}
+
+int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index)
+{
+ if (!rdma_protocol_ib(map->agent.device, map->agent.port_num))
+ return 0;
+
+ if (map->agent.qp->qp_type == IB_QPT_SMI) {
+ if (!READ_ONCE(map->agent.smp_allowed))
+ return -EACCES;
+ return 0;
+ }
+
+ return ib_security_pkey_access(map->agent.device,
+ map->agent.port_num,
+ pkey_index,
+ map->agent.security);
+}
diff --git a/drivers/infiniband/core/smi.c b/drivers/infiniband/core/smi.c
index f19b23817c2b..45f09b75c893 100644
--- a/drivers/infiniband/core/smi.c
+++ b/drivers/infiniband/core/smi.c
@@ -41,7 +41,7 @@
#include "smi.h"
#include "opa_smi.h"
-static enum smi_action __smi_handle_dr_smp_send(bool is_switch, int port_num,
+static enum smi_action __smi_handle_dr_smp_send(bool is_switch, u32 port_num,
u8 *hop_ptr, u8 hop_cnt,
const u8 *initial_path,
const u8 *return_path,
@@ -127,7 +127,7 @@ static enum smi_action __smi_handle_dr_smp_send(bool is_switch, int port_num,
* Return IB_SMI_DISCARD if the SMP should be discarded
*/
enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
- bool is_switch, int port_num)
+ bool is_switch, u32 port_num)
{
return __smi_handle_dr_smp_send(is_switch, port_num,
&smp->hop_ptr, smp->hop_cnt,
@@ -139,7 +139,7 @@ enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
}
enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp,
- bool is_switch, int port_num)
+ bool is_switch, u32 port_num)
{
return __smi_handle_dr_smp_send(is_switch, port_num,
&smp->hop_ptr, smp->hop_cnt,
@@ -152,7 +152,7 @@ enum smi_action opa_smi_handle_dr_smp_send(struct opa_smp *smp,
OPA_LID_PERMISSIVE);
}
-static enum smi_action __smi_handle_dr_smp_recv(bool is_switch, int port_num,
+static enum smi_action __smi_handle_dr_smp_recv(bool is_switch, u32 port_num,
int phys_port_cnt,
u8 *hop_ptr, u8 hop_cnt,
const u8 *initial_path,
@@ -238,7 +238,7 @@ static enum smi_action __smi_handle_dr_smp_recv(bool is_switch, int port_num,
* Return IB_SMI_DISCARD if the SMP should be dropped
*/
enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch,
- int port_num, int phys_port_cnt)
+ u32 port_num, int phys_port_cnt)
{
return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt,
&smp->hop_ptr, smp->hop_cnt,
@@ -254,7 +254,7 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch,
* Return IB_SMI_DISCARD if the SMP should be dropped
*/
enum smi_action opa_smi_handle_dr_smp_recv(struct opa_smp *smp, bool is_switch,
- int port_num, int phys_port_cnt)
+ u32 port_num, int phys_port_cnt)
{
return __smi_handle_dr_smp_recv(is_switch, port_num, phys_port_cnt,
&smp->hop_ptr, smp->hop_cnt,
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index 33c91c8a16e9..e350ed623c45 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -52,11 +52,11 @@ enum smi_forward_action {
};
enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, bool is_switch,
- int port_num, int phys_port_cnt);
+ u32 port_num, int phys_port_cnt);
int smi_get_fwd_port(struct ib_smp *smp);
extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp);
extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
- bool is_switch, int port_num);
+ bool is_switch, u32 port_num);
/*
* Return IB_SMI_HANDLE if the SMP should be handled by the local SMA/SM
@@ -67,7 +67,7 @@ static inline enum smi_action smi_check_local_smp(struct ib_smp *smp,
{
/* C14-9:3 -- We're at the end of the DR segment of path */
/* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
- return ((device->process_mad &&
+ return ((device->ops.process_mad &&
!ib_get_smp_direction(smp) &&
(smp->hop_ptr == smp->hop_cnt + 1)) ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
@@ -82,7 +82,7 @@ static inline enum smi_action smi_check_local_returning_smp(struct ib_smp *smp,
{
/* C14-13:3 -- We're at the end of the DR segment of path */
/* C14-13:4 -- Hop Pointer == 0 -> give to SM */
- return ((device->process_mad &&
+ return ((device->ops.process_mad &&
ib_get_smp_direction(smp) &&
!smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index c1fb545e8d78..0ed862b38b44 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -42,193 +42,262 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_pma.h>
+#include <rdma/ib_cache.h>
+#include <rdma/rdma_counter.h>
+#include <rdma/ib_sysfs.h>
-struct ib_port;
+struct port_table_attribute {
+ struct ib_port_attribute attr;
+ char name[8];
+ int index;
+ __be16 attr_id;
+};
struct gid_attr_group {
- struct ib_port *port;
- struct kobject kobj;
- struct attribute_group ndev;
- struct attribute_group type;
+ struct ib_port *port;
+ struct kobject kobj;
+ struct attribute_group groups[2];
+ const struct attribute_group *groups_list[3];
+ struct port_table_attribute attrs_list[];
};
+
struct ib_port {
- struct kobject kobj;
- struct ib_device *ibdev;
+ struct kobject kobj;
+ struct ib_device *ibdev;
struct gid_attr_group *gid_attr_group;
- struct attribute_group gid_group;
- struct attribute_group pkey_group;
- struct attribute_group *pma_table;
- struct attribute_group *hw_stats_ag;
- struct rdma_hw_stats *hw_stats;
- u8 port_num;
+ struct hw_stats_port_data *hw_stats_data;
+
+ struct attribute_group groups[3];
+ const struct attribute_group *groups_list[5];
+ u32 port_num;
+ struct port_table_attribute attrs_list[];
};
-struct port_attribute {
- struct attribute attr;
- ssize_t (*show)(struct ib_port *, struct port_attribute *, char *buf);
- ssize_t (*store)(struct ib_port *, struct port_attribute *,
+struct hw_stats_device_attribute {
+ struct device_attribute attr;
+ ssize_t (*show)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num, char *buf);
+ ssize_t (*store)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num,
const char *buf, size_t count);
};
-#define PORT_ATTR(_name, _mode, _show, _store) \
-struct port_attribute port_attr_##_name = __ATTR(_name, _mode, _show, _store)
-
-#define PORT_ATTR_RO(_name) \
-struct port_attribute port_attr_##_name = __ATTR_RO(_name)
+struct hw_stats_port_attribute {
+ struct ib_port_attribute attr;
+ ssize_t (*show)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num, char *buf);
+ ssize_t (*store)(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num,
+ const char *buf, size_t count);
+};
-struct port_table_attribute {
- struct port_attribute attr;
- char name[8];
- int index;
- __be16 attr_id;
+struct hw_stats_device_data {
+ struct attribute_group group;
+ struct rdma_hw_stats *stats;
+ struct hw_stats_device_attribute attrs[];
};
-struct hw_stats_attribute {
- struct attribute attr;
- ssize_t (*show)(struct kobject *kobj,
- struct attribute *attr, char *buf);
- ssize_t (*store)(struct kobject *kobj,
- struct attribute *attr,
- const char *buf,
- size_t count);
- int index;
- u8 port_num;
+struct hw_stats_port_data {
+ struct rdma_hw_stats *stats;
+ struct hw_stats_port_attribute attrs[];
};
static ssize_t port_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
- struct port_attribute *port_attr =
- container_of(attr, struct port_attribute, attr);
+ struct ib_port_attribute *port_attr =
+ container_of(attr, struct ib_port_attribute, attr);
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
if (!port_attr->show)
return -EIO;
- return port_attr->show(p, port_attr, buf);
+ return port_attr->show(p->ibdev, p->port_num, port_attr, buf);
}
+static ssize_t port_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct ib_port_attribute *port_attr =
+ container_of(attr, struct ib_port_attribute, attr);
+ struct ib_port *p = container_of(kobj, struct ib_port, kobj);
+
+ if (!port_attr->store)
+ return -EIO;
+ return port_attr->store(p->ibdev, p->port_num, port_attr, buf, count);
+}
+
+struct ib_device *ib_port_sysfs_get_ibdev_kobj(struct kobject *kobj,
+ u32 *port_num)
+{
+ struct ib_port *port = container_of(kobj, struct ib_port, kobj);
+
+ *port_num = port->port_num;
+ return port->ibdev;
+}
+EXPORT_SYMBOL(ib_port_sysfs_get_ibdev_kobj);
+
static const struct sysfs_ops port_sysfs_ops = {
- .show = port_attr_show
+ .show = port_attr_show,
+ .store = port_attr_store
};
+static ssize_t hw_stat_device_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hw_stats_device_attribute *stat_attr =
+ container_of(attr, struct hw_stats_device_attribute, attr);
+ struct ib_device *ibdev = container_of(dev, struct ib_device, dev);
+
+ return stat_attr->show(ibdev, ibdev->hw_stats_data->stats,
+ stat_attr - ibdev->hw_stats_data->attrs, 0, buf);
+}
+
+static ssize_t hw_stat_device_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hw_stats_device_attribute *stat_attr =
+ container_of(attr, struct hw_stats_device_attribute, attr);
+ struct ib_device *ibdev = container_of(dev, struct ib_device, dev);
+
+ return stat_attr->store(ibdev, ibdev->hw_stats_data->stats,
+ stat_attr - ibdev->hw_stats_data->attrs, 0, buf,
+ count);
+}
+
+static ssize_t hw_stat_port_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
+{
+ struct hw_stats_port_attribute *stat_attr =
+ container_of(attr, struct hw_stats_port_attribute, attr);
+ struct ib_port *port = ibdev->port_data[port_num].sysfs;
+
+ return stat_attr->show(ibdev, port->hw_stats_data->stats,
+ stat_attr - port->hw_stats_data->attrs,
+ port->port_num, buf);
+}
+
+static ssize_t hw_stat_port_store(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hw_stats_port_attribute *stat_attr =
+ container_of(attr, struct hw_stats_port_attribute, attr);
+ struct ib_port *port = ibdev->port_data[port_num].sysfs;
+
+ return stat_attr->store(ibdev, port->hw_stats_data->stats,
+ stat_attr - port->hw_stats_data->attrs,
+ port->port_num, buf, count);
+}
+
static ssize_t gid_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
- struct port_attribute *port_attr =
- container_of(attr, struct port_attribute, attr);
+ struct ib_port_attribute *port_attr =
+ container_of(attr, struct ib_port_attribute, attr);
struct ib_port *p = container_of(kobj, struct gid_attr_group,
kobj)->port;
if (!port_attr->show)
return -EIO;
- return port_attr->show(p, port_attr, buf);
+ return port_attr->show(p->ibdev, p->port_num, port_attr, buf);
}
static const struct sysfs_ops gid_attr_sysfs_ops = {
.show = gid_attr_show
};
-static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t state_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- static const char *state_name[] = {
- [IB_PORT_NOP] = "NOP",
- [IB_PORT_DOWN] = "DOWN",
- [IB_PORT_INIT] = "INIT",
- [IB_PORT_ARMED] = "ARMED",
- [IB_PORT_ACTIVE] = "ACTIVE",
- [IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER"
- };
-
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "%d: %s\n", attr.state,
- attr.state >= 0 && attr.state < ARRAY_SIZE(state_name) ?
- state_name[attr.state] : "UNKNOWN");
+ return sysfs_emit(buf, "%d: %s\n", attr.state,
+ ib_port_state_to_str(attr.state));
}
-static ssize_t lid_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t lid_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "0x%x\n", attr.lid);
+ return sysfs_emit(buf, "0x%x\n", attr.lid);
}
-static ssize_t lid_mask_count_show(struct ib_port *p,
- struct port_attribute *unused,
- char *buf)
+static ssize_t lid_mask_count_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "%d\n", attr.lmc);
+ return sysfs_emit(buf, "%u\n", attr.lmc);
}
-static ssize_t sm_lid_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t sm_lid_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "0x%x\n", attr.sm_lid);
+ return sysfs_emit(buf, "0x%x\n", attr.sm_lid);
}
-static ssize_t sm_sl_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t sm_sl_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "%d\n", attr.sm_sl);
+ return sysfs_emit(buf, "%u\n", attr.sm_sl);
}
-static ssize_t cap_mask_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t cap_mask_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- return sprintf(buf, "0x%08x\n", attr.port_cap_flags);
+ return sysfs_emit(buf, "0x%08x\n", attr.port_cap_flags);
}
-static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t rate_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
char *speed = "";
int rate; /* in deci-Gb/sec */
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
@@ -253,8 +322,21 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
speed = " EDR";
rate = 250;
break;
+ case IB_SPEED_HDR:
+ speed = " HDR";
+ rate = 500;
+ break;
+ case IB_SPEED_NDR:
+ speed = " NDR";
+ rate = 1000;
+ break;
+ case IB_SPEED_XDR:
+ speed = " XDR";
+ rate = 2000;
+ break;
case IB_SPEED_SDR:
default: /* default to SDR for invalid rates */
+ speed = " SDR";
rate = 25;
break;
}
@@ -263,163 +345,198 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
if (rate < 0)
return -EINVAL;
- return sprintf(buf, "%d%s Gb/sec (%dX%s)\n",
- rate / 10, rate % 10 ? ".5" : "",
- ib_width_enum_to_int(attr.active_width), speed);
+ return sysfs_emit(buf, "%d%s Gb/sec (%dX%s)\n", rate / 10,
+ rate % 10 ? ".5" : "",
+ ib_width_enum_to_int(attr.active_width), speed);
}
-static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static const char *phys_state_to_str(enum ib_port_phys_state phys_state)
+{
+ static const char *phys_state_str[] = {
+ "<unknown>",
+ "Sleep",
+ "Polling",
+ "Disabled",
+ "PortConfigurationTraining",
+ "LinkUp",
+ "LinkErrorRecovery",
+ "Phy Test",
+ };
+
+ if (phys_state < ARRAY_SIZE(phys_state_str))
+ return phys_state_str[phys_state];
+ return "<unknown>";
+}
+
+static ssize_t phys_state_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
struct ib_port_attr attr;
ssize_t ret;
- ret = ib_query_port(p->ibdev, p->port_num, &attr);
+ ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
- switch (attr.phys_state) {
- case 1: return sprintf(buf, "1: Sleep\n");
- case 2: return sprintf(buf, "2: Polling\n");
- case 3: return sprintf(buf, "3: Disabled\n");
- case 4: return sprintf(buf, "4: PortConfigurationTraining\n");
- case 5: return sprintf(buf, "5: LinkUp\n");
- case 6: return sprintf(buf, "6: LinkErrorRecovery\n");
- case 7: return sprintf(buf, "7: Phy Test\n");
- default: return sprintf(buf, "%d: <unknown>\n", attr.phys_state);
- }
+ return sysfs_emit(buf, "%u: %s\n", attr.phys_state,
+ phys_state_to_str(attr.phys_state));
}
-static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused,
- char *buf)
+static ssize_t link_layer_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *unused, char *buf)
{
- switch (rdma_port_get_link_layer(p->ibdev, p->port_num)) {
+ const char *output;
+
+ switch (rdma_port_get_link_layer(ibdev, port_num)) {
case IB_LINK_LAYER_INFINIBAND:
- return sprintf(buf, "%s\n", "InfiniBand");
+ output = "InfiniBand";
+ break;
case IB_LINK_LAYER_ETHERNET:
- return sprintf(buf, "%s\n", "Ethernet");
+ output = "Ethernet";
+ break;
default:
- return sprintf(buf, "%s\n", "Unknown");
+ output = "Unknown";
+ break;
}
+
+ return sysfs_emit(buf, "%s\n", output);
}
-static PORT_ATTR_RO(state);
-static PORT_ATTR_RO(lid);
-static PORT_ATTR_RO(lid_mask_count);
-static PORT_ATTR_RO(sm_lid);
-static PORT_ATTR_RO(sm_sl);
-static PORT_ATTR_RO(cap_mask);
-static PORT_ATTR_RO(rate);
-static PORT_ATTR_RO(phys_state);
-static PORT_ATTR_RO(link_layer);
+static IB_PORT_ATTR_RO(state);
+static IB_PORT_ATTR_RO(lid);
+static IB_PORT_ATTR_RO(lid_mask_count);
+static IB_PORT_ATTR_RO(sm_lid);
+static IB_PORT_ATTR_RO(sm_sl);
+static IB_PORT_ATTR_RO(cap_mask);
+static IB_PORT_ATTR_RO(rate);
+static IB_PORT_ATTR_RO(phys_state);
+static IB_PORT_ATTR_RO(link_layer);
static struct attribute *port_default_attrs[] = {
- &port_attr_state.attr,
- &port_attr_lid.attr,
- &port_attr_lid_mask_count.attr,
- &port_attr_sm_lid.attr,
- &port_attr_sm_sl.attr,
- &port_attr_cap_mask.attr,
- &port_attr_rate.attr,
- &port_attr_phys_state.attr,
- &port_attr_link_layer.attr,
+ &ib_port_attr_state.attr,
+ &ib_port_attr_lid.attr,
+ &ib_port_attr_lid_mask_count.attr,
+ &ib_port_attr_sm_lid.attr,
+ &ib_port_attr_sm_sl.attr,
+ &ib_port_attr_cap_mask.attr,
+ &ib_port_attr_rate.attr,
+ &ib_port_attr_phys_state.attr,
+ &ib_port_attr_link_layer.attr,
NULL
};
+ATTRIBUTE_GROUPS(port_default);
-static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
+static ssize_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf)
{
- if (!gid_attr->ndev)
- return -EINVAL;
-
- return sprintf(buf, "%s\n", gid_attr->ndev->name);
+ struct net_device *ndev;
+ int ret = -EINVAL;
+
+ rcu_read_lock();
+ ndev = rcu_dereference(gid_attr->ndev);
+ if (ndev)
+ ret = sysfs_emit(buf, "%s\n", ndev->name);
+ rcu_read_unlock();
+ return ret;
}
-static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf)
+static ssize_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf)
{
- return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type));
+ return sysfs_emit(buf, "%s\n",
+ ib_cache_gid_type_str(gid_attr->gid_type));
}
-static ssize_t _show_port_gid_attr(struct ib_port *p,
- struct port_attribute *attr,
- char *buf,
- size_t (*print)(struct ib_gid_attr *gid_attr,
- char *buf))
+static ssize_t _show_port_gid_attr(
+ struct ib_device *ibdev, u32 port_num, struct ib_port_attribute *attr,
+ char *buf,
+ ssize_t (*print)(const struct ib_gid_attr *gid_attr, char *buf))
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
- union ib_gid gid;
- struct ib_gid_attr gid_attr = {};
+ const struct ib_gid_attr *gid_attr;
ssize_t ret;
- ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid,
- &gid_attr);
- if (ret)
- goto err;
-
- ret = print(&gid_attr, buf);
+ gid_attr = rdma_get_gid_attr(ibdev, port_num, tab_attr->index);
+ if (IS_ERR(gid_attr))
+ /* -EINVAL is returned for user space compatibility reasons. */
+ return -EINVAL;
-err:
- if (gid_attr.ndev)
- dev_put(gid_attr.ndev);
+ ret = print(gid_attr, buf);
+ rdma_put_gid_attr(gid_attr);
return ret;
}
-static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
- char *buf)
+static ssize_t show_port_gid(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
- union ib_gid gid;
- ssize_t ret;
-
- ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
- if (ret)
- return ret;
+ const struct ib_gid_attr *gid_attr;
+ int len;
+
+ gid_attr = rdma_get_gid_attr(ibdev, port_num, tab_attr->index);
+ if (IS_ERR(gid_attr)) {
+ const union ib_gid zgid = {};
+
+ /* If reading GID fails, it is likely due to GID entry being
+ * empty (invalid) or reserved GID in the table. User space
+ * expects to read GID table entries as long as it given index
+ * is within GID table size. Administrative/debugging tool
+ * fails to query rest of the GID entries if it hits error
+ * while querying a GID of the given index. To avoid user
+ * space throwing such error on fail to read gid, return zero
+ * GID as before. This maintains backward compatibility.
+ */
+ return sysfs_emit(buf, "%pI6\n", zgid.raw);
+ }
- return sprintf(buf, "%pI6\n", gid.raw);
+ len = sysfs_emit(buf, "%pI6\n", gid_attr->gid.raw);
+ rdma_put_gid_attr(gid_attr);
+ return len;
}
-static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
- struct port_attribute *attr, char *buf)
+static ssize_t show_port_gid_attr_ndev(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr,
+ char *buf)
{
- return _show_port_gid_attr(p, attr, buf, print_ndev);
+ return _show_port_gid_attr(ibdev, port_num, attr, buf, print_ndev);
}
-static ssize_t show_port_gid_attr_gid_type(struct ib_port *p,
- struct port_attribute *attr,
+static ssize_t show_port_gid_attr_gid_type(struct ib_device *ibdev,
+ u32 port_num,
+ struct ib_port_attribute *attr,
char *buf)
{
- return _show_port_gid_attr(p, attr, buf, print_gid_type);
+ return _show_port_gid_attr(ibdev, port_num, attr, buf, print_gid_type);
}
-static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
- char *buf)
+static ssize_t show_port_pkey(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
u16 pkey;
- ssize_t ret;
+ int ret;
- ret = ib_query_pkey(p->ibdev, p->port_num, tab_attr->index, &pkey);
+ ret = ib_query_pkey(ibdev, port_num, tab_attr->index, &pkey);
if (ret)
return ret;
- return sprintf(buf, "0x%04x\n", pkey);
+ return sysfs_emit(buf, "0x%04x\n", pkey);
}
#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
struct port_table_attribute port_pma_attr_##_name = { \
.attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
.index = (_offset) | ((_width) << 16) | ((_counter) << 24), \
- .attr_id = IB_PMA_PORT_COUNTERS , \
+ .attr_id = IB_PMA_PORT_COUNTERS, \
}
#define PORT_PMA_ATTR_EXT(_name, _width, _offset) \
struct port_table_attribute port_pma_attr_ext_##_name = { \
.attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
.index = (_offset) | ((_width) << 16), \
- .attr_id = IB_PMA_PORT_COUNTERS_EXT , \
+ .attr_id = IB_PMA_PORT_COUNTERS_EXT, \
}
/*
@@ -435,11 +552,11 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
u16 out_mad_pkey_index = 0;
ssize_t ret;
- if (!dev->process_mad)
+ if (!dev->ops.process_mad)
return -ENOSYS;
- in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
- out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
+ out_mad = kzalloc(sizeof(*out_mad), GFP_KERNEL);
if (!in_mad || !out_mad) {
ret = -ENOMEM;
goto out;
@@ -454,11 +571,9 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
if (attr != IB_PMA_CLASS_PORT_INFO)
in_mad->data[41] = port_num; /* PortSelect field */
- if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY,
- port_num, NULL, NULL,
- (const struct ib_mad_hdr *)in_mad, mad_size,
- (struct ib_mad_hdr *)out_mad, &mad_size,
- &out_mad_pkey_index) &
+ if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY, port_num, NULL, NULL,
+ in_mad, out_mad, &mad_size,
+ &out_mad_pkey_index) &
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
ret = -EINVAL;
@@ -472,47 +587,45 @@ out:
return ret;
}
-static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
- char *buf)
+static ssize_t show_pma_counter(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
int offset = tab_attr->index & 0xffff;
int width = (tab_attr->index >> 16) & 0xff;
- ssize_t ret;
+ int ret;
u8 data[8];
+ int len;
- ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
+ ret = get_perf_mad(ibdev, port_num, tab_attr->attr_id, &data,
40 + offset / 8, sizeof(data));
if (ret < 0)
- return sprintf(buf, "N/A (no PMA)\n");
+ return ret;
switch (width) {
case 4:
- ret = sprintf(buf, "%u\n", (*data >>
- (4 - (offset % 8))) & 0xf);
+ len = sysfs_emit(buf, "%d\n",
+ (*data >> (4 - (offset % 8))) & 0xf);
break;
case 8:
- ret = sprintf(buf, "%u\n", *data);
+ len = sysfs_emit(buf, "%u\n", *data);
break;
case 16:
- ret = sprintf(buf, "%u\n",
- be16_to_cpup((__be16 *)data));
+ len = sysfs_emit(buf, "%u\n", be16_to_cpup((__be16 *)data));
break;
case 32:
- ret = sprintf(buf, "%u\n",
- be32_to_cpup((__be32 *)data));
+ len = sysfs_emit(buf, "%u\n", be32_to_cpup((__be32 *)data));
break;
case 64:
- ret = sprintf(buf, "%llu\n",
- be64_to_cpup((__be64 *)data));
+ len = sysfs_emit(buf, "%llu\n", be64_to_cpup((__be64 *)data));
break;
-
default:
- ret = 0;
+ len = 0;
+ break;
}
- return ret;
+ return len;
}
static PORT_PMA_ATTR(symbol_error , 0, 16, 32);
@@ -612,72 +725,49 @@ static struct attribute *pma_attrs_noietf[] = {
NULL
};
-static struct attribute_group pma_group = {
+static const struct attribute_group pma_group = {
.name = "counters",
.attrs = pma_attrs
};
-static struct attribute_group pma_group_ext = {
+static const struct attribute_group pma_group_ext = {
.name = "counters",
.attrs = pma_attrs_ext
};
-static struct attribute_group pma_group_noietf = {
+static const struct attribute_group pma_group_noietf = {
.name = "counters",
.attrs = pma_attrs_noietf
};
static void ib_port_release(struct kobject *kobj)
{
- struct ib_port *p = container_of(kobj, struct ib_port, kobj);
- struct attribute *a;
+ struct ib_port *port = container_of(kobj, struct ib_port, kobj);
int i;
- if (p->gid_group.attrs) {
- for (i = 0; (a = p->gid_group.attrs[i]); ++i)
- kfree(a);
-
- kfree(p->gid_group.attrs);
- }
-
- if (p->pkey_group.attrs) {
- for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
- kfree(a);
-
- kfree(p->pkey_group.attrs);
- }
-
- kfree(p);
+ for (i = 0; i != ARRAY_SIZE(port->groups); i++)
+ kfree(port->groups[i].attrs);
+ if (port->hw_stats_data)
+ rdma_free_hw_stats_struct(port->hw_stats_data->stats);
+ kfree(port->hw_stats_data);
+ kvfree(port);
}
static void ib_port_gid_attr_release(struct kobject *kobj)
{
- struct gid_attr_group *g = container_of(kobj, struct gid_attr_group,
- kobj);
- struct attribute *a;
+ struct gid_attr_group *gid_attr_group =
+ container_of(kobj, struct gid_attr_group, kobj);
int i;
- if (g->ndev.attrs) {
- for (i = 0; (a = g->ndev.attrs[i]); ++i)
- kfree(a);
-
- kfree(g->ndev.attrs);
- }
-
- if (g->type.attrs) {
- for (i = 0; (a = g->type.attrs[i]); ++i)
- kfree(a);
-
- kfree(g->type.attrs);
- }
-
- kfree(g);
+ for (i = 0; i != ARRAY_SIZE(gid_attr_group->groups); i++)
+ kfree(gid_attr_group->groups[i].attrs);
+ kfree(gid_attr_group);
}
static struct kobj_type port_type = {
.release = ib_port_release,
.sysfs_ops = &port_sysfs_ops,
- .default_attrs = port_default_attrs
+ .default_groups = port_default_groups,
};
static struct kobj_type gid_attr_type = {
@@ -685,55 +775,12 @@ static struct kobj_type gid_attr_type = {
.release = ib_port_gid_attr_release
};
-static struct attribute **
-alloc_group_attrs(ssize_t (*show)(struct ib_port *,
- struct port_attribute *, char *buf),
- int len)
-{
- struct attribute **tab_attr;
- struct port_table_attribute *element;
- int i;
-
- tab_attr = kcalloc(1 + len, sizeof(struct attribute *), GFP_KERNEL);
- if (!tab_attr)
- return NULL;
-
- for (i = 0; i < len; i++) {
- element = kzalloc(sizeof(struct port_table_attribute),
- GFP_KERNEL);
- if (!element)
- goto err;
-
- if (snprintf(element->name, sizeof(element->name),
- "%d", i) >= sizeof(element->name)) {
- kfree(element);
- goto err;
- }
-
- element->attr.attr.name = element->name;
- element->attr.attr.mode = S_IRUGO;
- element->attr.show = show;
- element->index = i;
- sysfs_attr_init(&element->attr.attr);
-
- tab_attr[i] = &element->attr.attr;
- }
-
- return tab_attr;
-
-err:
- while (--i >= 0)
- kfree(tab_attr[i]);
- kfree(tab_attr);
- return NULL;
-}
-
/*
* Figure out which counter table to use depending on
* the device capabilities.
*/
-static struct attribute_group *get_counter_table(struct ib_device *dev,
- int port_num)
+static const struct attribute_group *get_counter_table(struct ib_device *dev,
+ int port_num)
{
struct ib_class_port_info cpi;
@@ -753,13 +800,13 @@ static struct attribute_group *get_counter_table(struct ib_device *dev,
}
static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats,
- u8 port_num, int index)
+ u32 port_num, int index)
{
int ret;
if (time_is_after_eq_jiffies(stats->timestamp + stats->lifespan))
return 0;
- ret = dev->get_hw_stats(dev, stats, port_num, index);
+ ret = dev->ops.get_hw_stats(dev, stats, port_num, index);
if (ret < 0)
return ret;
if (ret == stats->num_counters)
@@ -768,60 +815,50 @@ static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats,
return 0;
}
-static ssize_t print_hw_stat(struct rdma_hw_stats *stats, int index, char *buf)
+static int print_hw_stat(struct ib_device *dev, int port_num,
+ struct rdma_hw_stats *stats, int index, char *buf)
{
- return sprintf(buf, "%llu\n", stats->value[index]);
+ u64 v = rdma_counter_get_hwstat_value(dev, port_num, index);
+
+ return sysfs_emit(buf, "%llu\n", stats->value[index] + v);
}
-static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr,
- char *buf)
+static ssize_t show_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats, unsigned int index,
+ unsigned int port_num, char *buf)
{
- struct ib_device *dev;
- struct ib_port *port;
- struct hw_stats_attribute *hsa;
- struct rdma_hw_stats *stats;
int ret;
- hsa = container_of(attr, struct hw_stats_attribute, attr);
- if (!hsa->port_num) {
- dev = container_of((struct device *)kobj,
- struct ib_device, dev);
- stats = dev->hw_stats;
- } else {
- port = container_of(kobj, struct ib_port, kobj);
- dev = port->ibdev;
- stats = port->hw_stats;
- }
- ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index);
+ mutex_lock(&stats->lock);
+ ret = update_hw_stats(ibdev, stats, port_num, index);
if (ret)
- return ret;
- return print_hw_stat(stats, hsa->index, buf);
+ goto unlock;
+ ret = print_hw_stat(ibdev, port_num, stats, index, buf);
+unlock:
+ mutex_unlock(&stats->lock);
+
+ return ret;
}
-static ssize_t show_stats_lifespan(struct kobject *kobj,
- struct attribute *attr,
+static ssize_t show_stats_lifespan(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num,
char *buf)
{
- struct hw_stats_attribute *hsa;
int msecs;
- hsa = container_of(attr, struct hw_stats_attribute, attr);
- if (!hsa->port_num) {
- struct ib_device *dev = container_of((struct device *)kobj,
- struct ib_device, dev);
- msecs = jiffies_to_msecs(dev->hw_stats->lifespan);
- } else {
- struct ib_port *p = container_of(kobj, struct ib_port, kobj);
- msecs = jiffies_to_msecs(p->hw_stats->lifespan);
- }
- return sprintf(buf, "%d\n", msecs);
+ mutex_lock(&stats->lock);
+ msecs = jiffies_to_msecs(stats->lifespan);
+ mutex_unlock(&stats->lock);
+
+ return sysfs_emit(buf, "%d\n", msecs);
}
-static ssize_t set_stats_lifespan(struct kobject *kobj,
- struct attribute *attr,
- const char *buf, size_t count)
+static ssize_t set_stats_lifespan(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ unsigned int index, unsigned int port_num,
+ const char *buf, size_t count)
{
- struct hw_stats_attribute *hsa;
int msecs;
int jiffies;
int ret;
@@ -832,366 +869,499 @@ static ssize_t set_stats_lifespan(struct kobject *kobj,
if (msecs < 0 || msecs > 10000)
return -EINVAL;
jiffies = msecs_to_jiffies(msecs);
- hsa = container_of(attr, struct hw_stats_attribute, attr);
- if (!hsa->port_num) {
- struct ib_device *dev = container_of((struct device *)kobj,
- struct ib_device, dev);
- dev->hw_stats->lifespan = jiffies;
- } else {
- struct ib_port *p = container_of(kobj, struct ib_port, kobj);
- p->hw_stats->lifespan = jiffies;
- }
+
+ mutex_lock(&stats->lock);
+ stats->lifespan = jiffies;
+ mutex_unlock(&stats->lock);
+
return count;
}
-static void free_hsag(struct kobject *kobj, struct attribute_group *attr_group)
+static struct hw_stats_device_data *
+alloc_hw_stats_device(struct ib_device *ibdev)
{
- struct attribute **attr;
-
- sysfs_remove_group(kobj, attr_group);
+ struct hw_stats_device_data *data;
+ struct rdma_hw_stats *stats;
- for (attr = attr_group->attrs; *attr; attr++)
- kfree(*attr);
- kfree(attr_group);
-}
+ if (!ibdev->ops.alloc_hw_device_stats)
+ return ERR_PTR(-EOPNOTSUPP);
+ stats = ibdev->ops.alloc_hw_device_stats(ibdev);
+ if (!stats)
+ return ERR_PTR(-ENOMEM);
+ if (!stats->descs || stats->num_counters <= 0)
+ goto err_free_stats;
-static struct attribute *alloc_hsa(int index, u8 port_num, const char *name)
-{
- struct hw_stats_attribute *hsa;
+ /*
+ * Two extra attribue elements here, one for the lifespan entry and
+ * one to NULL terminate the list for the sysfs core code
+ */
+ data = kzalloc(struct_size(data, attrs, size_add(stats->num_counters, 1)),
+ GFP_KERNEL);
+ if (!data)
+ goto err_free_stats;
+ data->group.attrs = kcalloc(stats->num_counters + 2,
+ sizeof(*data->group.attrs), GFP_KERNEL);
+ if (!data->group.attrs)
+ goto err_free_data;
- hsa = kmalloc(sizeof(*hsa), GFP_KERNEL);
- if (!hsa)
- return NULL;
+ data->group.name = "hw_counters";
+ data->stats = stats;
+ return data;
- hsa->attr.name = (char *)name;
- hsa->attr.mode = S_IRUGO;
- hsa->show = show_hw_stats;
- hsa->store = NULL;
- hsa->index = index;
- hsa->port_num = port_num;
+err_free_data:
+ kfree(data);
+err_free_stats:
+ rdma_free_hw_stats_struct(stats);
+ return ERR_PTR(-ENOMEM);
+}
- return &hsa->attr;
+void ib_device_release_hw_stats(struct hw_stats_device_data *data)
+{
+ kfree(data->group.attrs);
+ rdma_free_hw_stats_struct(data->stats);
+ kfree(data);
}
-static struct attribute *alloc_hsa_lifespan(char *name, u8 port_num)
+int ib_setup_device_attrs(struct ib_device *ibdev)
{
- struct hw_stats_attribute *hsa;
+ struct hw_stats_device_attribute *attr;
+ struct hw_stats_device_data *data;
+ bool opstat_skipped = false;
+ int i, ret, pos = 0;
+
+ data = alloc_hw_stats_device(ibdev);
+ if (IS_ERR(data)) {
+ if (PTR_ERR(data) == -EOPNOTSUPP)
+ return 0;
+ return PTR_ERR(data);
+ }
+ ibdev->hw_stats_data = data;
- hsa = kmalloc(sizeof(*hsa), GFP_KERNEL);
- if (!hsa)
- return NULL;
+ ret = ibdev->ops.get_hw_stats(ibdev, data->stats, 0,
+ data->stats->num_counters);
+ if (ret != data->stats->num_counters) {
+ if (WARN_ON(ret >= 0))
+ return -EINVAL;
+ return ret;
+ }
- hsa->attr.name = name;
- hsa->attr.mode = S_IWUSR | S_IRUGO;
- hsa->show = show_stats_lifespan;
- hsa->store = set_stats_lifespan;
- hsa->index = 0;
- hsa->port_num = port_num;
+ data->stats->timestamp = jiffies;
- return &hsa->attr;
+ for (i = 0; i < data->stats->num_counters; i++) {
+ if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
+ opstat_skipped = true;
+ continue;
+ }
+
+ WARN_ON(opstat_skipped);
+ attr = &data->attrs[pos];
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = data->stats->descs[i].name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = hw_stat_device_show;
+ attr->show = show_hw_stats;
+ data->group.attrs[pos] = &attr->attr.attr;
+ pos++;
+ }
+
+ attr = &data->attrs[pos];
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = "lifespan";
+ attr->attr.attr.mode = 0644;
+ attr->attr.show = hw_stat_device_show;
+ attr->show = show_stats_lifespan;
+ attr->attr.store = hw_stat_device_store;
+ attr->store = set_stats_lifespan;
+ data->group.attrs[pos] = &attr->attr.attr;
+ for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++)
+ if (!ibdev->groups[i]) {
+ ibdev->groups[i] = &data->group;
+ ibdev->hw_stats_attr_index = i;
+ return 0;
+ }
+ WARN(true, "struct ib_device->groups is too small");
+ return -EINVAL;
}
-static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
- u8 port_num)
+static struct hw_stats_port_data *
+alloc_hw_stats_port(struct ib_port *port, struct attribute_group *group)
{
- struct attribute_group *hsag;
+ struct ib_device *ibdev = port->ibdev;
+ struct hw_stats_port_data *data;
struct rdma_hw_stats *stats;
- int i, ret;
-
- stats = device->alloc_hw_stats(device, port_num);
+ if (!ibdev->ops.alloc_hw_port_stats)
+ return ERR_PTR(-EOPNOTSUPP);
+ stats = ibdev->ops.alloc_hw_port_stats(port->ibdev, port->port_num);
if (!stats)
- return;
-
- if (!stats->names || stats->num_counters <= 0)
+ return ERR_PTR(-ENOMEM);
+ if (!stats->descs || stats->num_counters <= 0)
goto err_free_stats;
/*
* Two extra attribue elements here, one for the lifespan entry and
* one to NULL terminate the list for the sysfs core code
*/
- hsag = kzalloc(sizeof(*hsag) +
- sizeof(void *) * (stats->num_counters + 2),
+ data = kzalloc(struct_size(data, attrs, size_add(stats->num_counters, 1)),
GFP_KERNEL);
- if (!hsag)
+ if (!data)
goto err_free_stats;
+ group->attrs = kcalloc(stats->num_counters + 2,
+ sizeof(*group->attrs), GFP_KERNEL);
+ if (!group->attrs)
+ goto err_free_data;
- ret = device->get_hw_stats(device, stats, port_num,
- stats->num_counters);
- if (ret != stats->num_counters)
- goto err_free_hsag;
+ group->name = "hw_counters";
+ data->stats = stats;
+ return data;
- stats->timestamp = jiffies;
-
- hsag->name = "hw_counters";
- hsag->attrs = (void *)hsag + sizeof(*hsag);
+err_free_data:
+ kfree(data);
+err_free_stats:
+ rdma_free_hw_stats_struct(stats);
+ return ERR_PTR(-ENOMEM);
+}
- for (i = 0; i < stats->num_counters; i++) {
- hsag->attrs[i] = alloc_hsa(i, port_num, stats->names[i]);
- if (!hsag->attrs[i])
- goto err;
- sysfs_attr_init(hsag->attrs[i]);
+static int setup_hw_port_stats(struct ib_port *port,
+ struct attribute_group *group)
+{
+ struct hw_stats_port_attribute *attr;
+ struct hw_stats_port_data *data;
+ bool opstat_skipped = false;
+ int i, ret, pos = 0;
+
+ data = alloc_hw_stats_port(port, group);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ ret = port->ibdev->ops.get_hw_stats(port->ibdev, data->stats,
+ port->port_num,
+ data->stats->num_counters);
+ if (ret != data->stats->num_counters) {
+ if (WARN_ON(ret >= 0))
+ return -EINVAL;
+ return ret;
}
- /* treat an error here as non-fatal */
- hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num);
- if (hsag->attrs[i])
- sysfs_attr_init(hsag->attrs[i]);
+ data->stats->timestamp = jiffies;
- if (port) {
- struct kobject *kobj = &port->kobj;
- ret = sysfs_create_group(kobj, hsag);
- if (ret)
- goto err;
- port->hw_stats_ag = hsag;
- port->hw_stats = stats;
- } else {
- struct kobject *kobj = &device->dev.kobj;
- ret = sysfs_create_group(kobj, hsag);
- if (ret)
- goto err;
- device->hw_stats_ag = hsag;
- device->hw_stats = stats;
+ for (i = 0; i < data->stats->num_counters; i++) {
+ if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
+ opstat_skipped = true;
+ continue;
+ }
+
+ WARN_ON(opstat_skipped);
+ attr = &data->attrs[pos];
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = data->stats->descs[i].name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = hw_stat_port_show;
+ attr->show = show_hw_stats;
+ group->attrs[pos] = &attr->attr.attr;
+ pos++;
}
- return;
+ attr = &data->attrs[pos];
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = "lifespan";
+ attr->attr.attr.mode = 0644;
+ attr->attr.show = hw_stat_port_show;
+ attr->show = show_stats_lifespan;
+ attr->attr.store = hw_stat_port_store;
+ attr->store = set_stats_lifespan;
+ group->attrs[pos] = &attr->attr.attr;
+
+ port->hw_stats_data = data;
+ return 0;
+}
-err:
- for (; i >= 0; i--)
- kfree(hsag->attrs[i]);
-err_free_hsag:
- kfree(hsag);
-err_free_stats:
- kfree(stats);
- return;
+struct rdma_hw_stats *ib_get_hw_stats_port(struct ib_device *ibdev,
+ u32 port_num)
+{
+ if (!ibdev->port_data || !rdma_is_port_valid(ibdev, port_num) ||
+ !ibdev->port_data[port_num].sysfs->hw_stats_data)
+ return NULL;
+ return ibdev->port_data[port_num].sysfs->hw_stats_data->stats;
}
-static int add_port(struct ib_device *device, int port_num,
- int (*port_callback)(struct ib_device *,
- u8, struct kobject *))
+static int
+alloc_port_table_group(const char *name, struct attribute_group *group,
+ struct port_table_attribute *attrs, size_t num,
+ ssize_t (*show)(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *, char *buf))
{
- struct ib_port *p;
- struct ib_port_attr attr;
+ struct attribute **attr_list;
int i;
- int ret;
- ret = ib_query_port(device, port_num, &attr);
- if (ret)
- return ret;
-
- p = kzalloc(sizeof *p, GFP_KERNEL);
- if (!p)
+ attr_list = kcalloc(num + 1, sizeof(*attr_list), GFP_KERNEL);
+ if (!attr_list)
return -ENOMEM;
- p->ibdev = device;
- p->port_num = port_num;
+ for (i = 0; i < num; i++) {
+ struct port_table_attribute *element = &attrs[i];
- ret = kobject_init_and_add(&p->kobj, &port_type,
- device->ports_parent,
- "%d", port_num);
- if (ret) {
- kfree(p);
- return ret;
- }
+ if (snprintf(element->name, sizeof(element->name), "%d", i) >=
+ sizeof(element->name))
+ goto err;
- p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL);
- if (!p->gid_attr_group) {
- ret = -ENOMEM;
- goto err_put;
- }
+ sysfs_attr_init(&element->attr.attr);
+ element->attr.attr.name = element->name;
+ element->attr.attr.mode = 0444;
+ element->attr.show = show;
+ element->index = i;
- p->gid_attr_group->port = p;
- ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type,
- &p->kobj, "gid_attrs");
- if (ret) {
- kfree(p->gid_attr_group);
- goto err_put;
+ attr_list[i] = &element->attr.attr;
}
+ group->name = name;
+ group->attrs = attr_list;
+ return 0;
+err:
+ kfree(attr_list);
+ return -EINVAL;
+}
- p->pma_table = get_counter_table(device, port_num);
- ret = sysfs_create_group(&p->kobj, p->pma_table);
- if (ret)
- goto err_put_gid_attrs;
+/*
+ * Create the sysfs:
+ * ibp0s9/ports/XX/gid_attrs/{ndevs,types}/YYY
+ * YYY is the gid table index in decimal
+ */
+static int setup_gid_attrs(struct ib_port *port,
+ const struct ib_port_attr *attr)
+{
+ struct gid_attr_group *gid_attr_group;
+ int ret;
- p->gid_group.name = "gids";
- p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
- if (!p->gid_group.attrs) {
- ret = -ENOMEM;
- goto err_remove_pma;
- }
+ gid_attr_group = kzalloc(struct_size(gid_attr_group, attrs_list,
+ size_mul(attr->gid_tbl_len, 2)),
+ GFP_KERNEL);
+ if (!gid_attr_group)
+ return -ENOMEM;
+ gid_attr_group->port = port;
+ kobject_init(&gid_attr_group->kobj, &gid_attr_type);
- ret = sysfs_create_group(&p->kobj, &p->gid_group);
+ ret = alloc_port_table_group("ndevs", &gid_attr_group->groups[0],
+ gid_attr_group->attrs_list,
+ attr->gid_tbl_len,
+ show_port_gid_attr_ndev);
if (ret)
- goto err_free_gid;
-
- p->gid_attr_group->ndev.name = "ndevs";
- p->gid_attr_group->ndev.attrs = alloc_group_attrs(show_port_gid_attr_ndev,
- attr.gid_tbl_len);
- if (!p->gid_attr_group->ndev.attrs) {
- ret = -ENOMEM;
- goto err_remove_gid;
- }
+ goto err_put;
+ gid_attr_group->groups_list[0] = &gid_attr_group->groups[0];
- ret = sysfs_create_group(&p->gid_attr_group->kobj,
- &p->gid_attr_group->ndev);
+ ret = alloc_port_table_group(
+ "types", &gid_attr_group->groups[1],
+ gid_attr_group->attrs_list + attr->gid_tbl_len,
+ attr->gid_tbl_len, show_port_gid_attr_gid_type);
if (ret)
- goto err_free_gid_ndev;
-
- p->gid_attr_group->type.name = "types";
- p->gid_attr_group->type.attrs = alloc_group_attrs(show_port_gid_attr_gid_type,
- attr.gid_tbl_len);
- if (!p->gid_attr_group->type.attrs) {
- ret = -ENOMEM;
- goto err_remove_gid_ndev;
- }
+ goto err_put;
+ gid_attr_group->groups_list[1] = &gid_attr_group->groups[1];
- ret = sysfs_create_group(&p->gid_attr_group->kobj,
- &p->gid_attr_group->type);
+ ret = kobject_add(&gid_attr_group->kobj, &port->kobj, "gid_attrs");
if (ret)
- goto err_free_gid_type;
-
- p->pkey_group.name = "pkeys";
- p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
- attr.pkey_tbl_len);
- if (!p->pkey_group.attrs) {
- ret = -ENOMEM;
- goto err_remove_gid_type;
- }
-
- ret = sysfs_create_group(&p->kobj, &p->pkey_group);
+ goto err_put;
+ ret = sysfs_create_groups(&gid_attr_group->kobj,
+ gid_attr_group->groups_list);
if (ret)
- goto err_free_pkey;
+ goto err_del;
+ port->gid_attr_group = gid_attr_group;
+ return 0;
- if (port_callback) {
- ret = port_callback(device, port_num, &p->kobj);
- if (ret)
- goto err_remove_pkey;
- }
+err_del:
+ kobject_del(&gid_attr_group->kobj);
+err_put:
+ kobject_put(&gid_attr_group->kobj);
+ return ret;
+}
- /*
- * If port == 0, it means we have only one port and the parent
- * device, not this port device, should be the holder of the
- * hw_counters
- */
- if (device->alloc_hw_stats && port_num)
- setup_hw_stats(device, p, port_num);
+static void destroy_gid_attrs(struct ib_port *port)
+{
+ struct gid_attr_group *gid_attr_group = port->gid_attr_group;
- list_add_tail(&p->kobj.entry, &device->port_list);
+ if (!gid_attr_group)
+ return;
+ sysfs_remove_groups(&gid_attr_group->kobj, gid_attr_group->groups_list);
+ kobject_del(&gid_attr_group->kobj);
+ kobject_put(&gid_attr_group->kobj);
+}
- kobject_uevent(&p->kobj, KOBJ_ADD);
- return 0;
+/*
+ * Create the sysfs:
+ * ibp0s9/ports/XX/{gids,pkeys,counters}/YYY
+ */
+static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num,
+ const struct ib_port_attr *attr)
+{
+ struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
+ bool is_full_dev = &device->coredev == coredev;
+ const struct attribute_group **cur_group;
+ struct ib_port *p;
+ int ret;
-err_remove_pkey:
- sysfs_remove_group(&p->kobj, &p->pkey_group);
+ p = kvzalloc(struct_size(p, attrs_list,
+ size_add(attr->gid_tbl_len, attr->pkey_tbl_len)),
+ GFP_KERNEL);
+ if (!p)
+ return ERR_PTR(-ENOMEM);
+ p->ibdev = device;
+ p->port_num = port_num;
+ kobject_init(&p->kobj, &port_type);
-err_free_pkey:
- for (i = 0; i < attr.pkey_tbl_len; ++i)
- kfree(p->pkey_group.attrs[i]);
+ if (device->port_data && is_full_dev)
+ device->port_data[port_num].sysfs = p;
- kfree(p->pkey_group.attrs);
- p->pkey_group.attrs = NULL;
+ cur_group = p->groups_list;
+ ret = alloc_port_table_group("gids", &p->groups[0], p->attrs_list,
+ attr->gid_tbl_len, show_port_gid);
+ if (ret)
+ goto err_put;
+ *cur_group++ = &p->groups[0];
-err_remove_gid_type:
- sysfs_remove_group(&p->gid_attr_group->kobj,
- &p->gid_attr_group->type);
+ if (attr->pkey_tbl_len) {
+ ret = alloc_port_table_group("pkeys", &p->groups[1],
+ p->attrs_list + attr->gid_tbl_len,
+ attr->pkey_tbl_len, show_port_pkey);
+ if (ret)
+ goto err_put;
+ *cur_group++ = &p->groups[1];
+ }
-err_free_gid_type:
- for (i = 0; i < attr.gid_tbl_len; ++i)
- kfree(p->gid_attr_group->type.attrs[i]);
+ /*
+ * If port == 0, it means hw_counters are per device and not per
+ * port, so holder should be device. Therefore skip per port
+ * counter initialization.
+ */
+ if (port_num && is_full_dev) {
+ ret = setup_hw_port_stats(p, &p->groups[2]);
+ if (ret && ret != -EOPNOTSUPP)
+ goto err_put;
+ if (!ret)
+ *cur_group++ = &p->groups[2];
+ }
- kfree(p->gid_attr_group->type.attrs);
- p->gid_attr_group->type.attrs = NULL;
+ if (device->ops.process_mad && is_full_dev)
+ *cur_group++ = get_counter_table(device, port_num);
-err_remove_gid_ndev:
- sysfs_remove_group(&p->gid_attr_group->kobj,
- &p->gid_attr_group->ndev);
+ ret = kobject_add(&p->kobj, coredev->ports_kobj, "%d", port_num);
+ if (ret)
+ goto err_put;
+ ret = sysfs_create_groups(&p->kobj, p->groups_list);
+ if (ret)
+ goto err_del;
+ if (is_full_dev) {
+ ret = sysfs_create_groups(&p->kobj, device->ops.port_groups);
+ if (ret)
+ goto err_groups;
+ }
-err_free_gid_ndev:
- for (i = 0; i < attr.gid_tbl_len; ++i)
- kfree(p->gid_attr_group->ndev.attrs[i]);
+ list_add_tail(&p->kobj.entry, &coredev->port_list);
+ return p;
- kfree(p->gid_attr_group->ndev.attrs);
- p->gid_attr_group->ndev.attrs = NULL;
+err_groups:
+ sysfs_remove_groups(&p->kobj, p->groups_list);
+err_del:
+ kobject_del(&p->kobj);
+err_put:
+ if (device->port_data && is_full_dev)
+ device->port_data[port_num].sysfs = NULL;
+ kobject_put(&p->kobj);
+ return ERR_PTR(ret);
+}
-err_remove_gid:
- sysfs_remove_group(&p->kobj, &p->gid_group);
+static void destroy_port(struct ib_core_device *coredev, struct ib_port *port)
+{
+ bool is_full_dev = &port->ibdev->coredev == coredev;
-err_free_gid:
- for (i = 0; i < attr.gid_tbl_len; ++i)
- kfree(p->gid_group.attrs[i]);
+ list_del(&port->kobj.entry);
+ if (is_full_dev)
+ sysfs_remove_groups(&port->kobj, port->ibdev->ops.port_groups);
- kfree(p->gid_group.attrs);
- p->gid_group.attrs = NULL;
+ sysfs_remove_groups(&port->kobj, port->groups_list);
+ kobject_del(&port->kobj);
-err_remove_pma:
- sysfs_remove_group(&p->kobj, p->pma_table);
+ if (port->ibdev->port_data &&
+ port->ibdev->port_data[port->port_num].sysfs == port)
+ port->ibdev->port_data[port->port_num].sysfs = NULL;
-err_put_gid_attrs:
- kobject_put(&p->gid_attr_group->kobj);
+ kobject_put(&port->kobj);
+}
-err_put:
- kobject_put(&p->kobj);
- return ret;
+static const char *node_type_string(int node_type)
+{
+ switch (node_type) {
+ case RDMA_NODE_IB_CA:
+ return "CA";
+ case RDMA_NODE_IB_SWITCH:
+ return "switch";
+ case RDMA_NODE_IB_ROUTER:
+ return "router";
+ case RDMA_NODE_RNIC:
+ return "RNIC";
+ case RDMA_NODE_USNIC:
+ return "usNIC";
+ case RDMA_NODE_USNIC_UDP:
+ return "usNIC UDP";
+ case RDMA_NODE_UNSPECIFIED:
+ return "unspecified";
+ }
+ return "<unknown>";
}
-static ssize_t show_node_type(struct device *device,
+static ssize_t node_type_show(struct device *device,
struct device_attribute *attr, char *buf)
{
- struct ib_device *dev = container_of(device, struct ib_device, dev);
-
- switch (dev->node_type) {
- case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type);
- case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
- case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type);
- case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type);
- case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
- case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
- default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
- }
+ struct ib_device *dev = rdma_device_to_ibdev(device);
+
+ return sysfs_emit(buf, "%u: %s\n", dev->node_type,
+ node_type_string(dev->node_type));
}
+static DEVICE_ATTR_RO(node_type);
-static ssize_t show_sys_image_guid(struct device *device,
+static ssize_t sys_image_guid_show(struct device *device,
struct device_attribute *dev_attr, char *buf)
{
- struct ib_device *dev = container_of(device, struct ib_device, dev);
-
- return sprintf(buf, "%04x:%04x:%04x:%04x\n",
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]),
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]),
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
- be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
+ struct ib_device *dev = rdma_device_to_ibdev(device);
+ __be16 *guid = (__be16 *)&dev->attrs.sys_image_guid;
+
+ return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(guid[0]),
+ be16_to_cpu(guid[1]),
+ be16_to_cpu(guid[2]),
+ be16_to_cpu(guid[3]));
}
+static DEVICE_ATTR_RO(sys_image_guid);
-static ssize_t show_node_guid(struct device *device,
+static ssize_t node_guid_show(struct device *device,
struct device_attribute *attr, char *buf)
{
- struct ib_device *dev = container_of(device, struct ib_device, dev);
-
- return sprintf(buf, "%04x:%04x:%04x:%04x\n",
- be16_to_cpu(((__be16 *) &dev->node_guid)[0]),
- be16_to_cpu(((__be16 *) &dev->node_guid)[1]),
- be16_to_cpu(((__be16 *) &dev->node_guid)[2]),
- be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
+ struct ib_device *dev = rdma_device_to_ibdev(device);
+ __be16 *node_guid = (__be16 *)&dev->node_guid;
+
+ return sysfs_emit(buf, "%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(node_guid[0]),
+ be16_to_cpu(node_guid[1]),
+ be16_to_cpu(node_guid[2]),
+ be16_to_cpu(node_guid[3]));
}
+static DEVICE_ATTR_RO(node_guid);
-static ssize_t show_node_desc(struct device *device,
+static ssize_t node_desc_show(struct device *device,
struct device_attribute *attr, char *buf)
{
- struct ib_device *dev = container_of(device, struct ib_device, dev);
+ struct ib_device *dev = rdma_device_to_ibdev(device);
- return sprintf(buf, "%.64s\n", dev->node_desc);
+ return sysfs_emit(buf, "%.64s\n", dev->node_desc);
}
-static ssize_t set_node_desc(struct device *device,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t node_desc_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
- struct ib_device *dev = container_of(device, struct ib_device, dev);
+ struct ib_device *dev = rdma_device_to_ibdev(device);
struct ib_device_modify desc = {};
int ret;
- if (!dev->modify_device)
- return -EIO;
+ if (!dev->ops.modify_device)
+ return -EOPNOTSUPP;
memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX));
ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc);
@@ -1200,129 +1370,104 @@ static ssize_t set_node_desc(struct device *device,
return count;
}
+static DEVICE_ATTR_RW(node_desc);
-static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr,
char *buf)
{
- struct ib_device *dev = container_of(device, struct ib_device, dev);
+ struct ib_device *dev = rdma_device_to_ibdev(device);
+ char version[IB_FW_VERSION_NAME_MAX] = {};
- ib_get_device_fw_str(dev, buf, PAGE_SIZE);
- strlcat(buf, "\n", PAGE_SIZE);
- return strlen(buf);
+ ib_get_device_fw_str(dev, version);
+
+ return sysfs_emit(buf, "%s\n", version);
}
+static DEVICE_ATTR_RO(fw_ver);
+
+static struct attribute *ib_dev_attrs[] = {
+ &dev_attr_node_type.attr,
+ &dev_attr_node_guid.attr,
+ &dev_attr_sys_image_guid.attr,
+ &dev_attr_fw_ver.attr,
+ &dev_attr_node_desc.attr,
+ NULL,
+};
-static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL);
-static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL);
-static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL);
-static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc);
-static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
-
-static struct device_attribute *ib_class_attributes[] = {
- &dev_attr_node_type,
- &dev_attr_sys_image_guid,
- &dev_attr_node_guid,
- &dev_attr_node_desc,
- &dev_attr_fw_ver,
+const struct attribute_group ib_dev_attr_group = {
+ .attrs = ib_dev_attrs,
};
-static void free_port_list_attributes(struct ib_device *device)
+void ib_free_port_attrs(struct ib_core_device *coredev)
{
struct kobject *p, *t;
- list_for_each_entry_safe(p, t, &device->port_list, entry) {
+ list_for_each_entry_safe(p, t, &coredev->port_list, entry) {
struct ib_port *port = container_of(p, struct ib_port, kobj);
- list_del(&p->entry);
- if (port->hw_stats) {
- kfree(port->hw_stats);
- free_hsag(&port->kobj, port->hw_stats_ag);
- }
- sysfs_remove_group(p, port->pma_table);
- sysfs_remove_group(p, &port->pkey_group);
- sysfs_remove_group(p, &port->gid_group);
- sysfs_remove_group(&port->gid_attr_group->kobj,
- &port->gid_attr_group->ndev);
- sysfs_remove_group(&port->gid_attr_group->kobj,
- &port->gid_attr_group->type);
- kobject_put(&port->gid_attr_group->kobj);
- kobject_put(p);
+
+ destroy_gid_attrs(port);
+ destroy_port(coredev, port);
}
- kobject_put(device->ports_parent);
+ kobject_put(coredev->ports_kobj);
}
-int ib_device_register_sysfs(struct ib_device *device,
- int (*port_callback)(struct ib_device *,
- u8, struct kobject *))
+int ib_setup_port_attrs(struct ib_core_device *coredev)
{
- struct device *class_dev = &device->dev;
+ struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
+ u32 port_num;
int ret;
- int i;
- device->dev.parent = device->dma_device;
- ret = dev_set_name(class_dev, "%s", device->name);
- if (ret)
- return ret;
+ coredev->ports_kobj = kobject_create_and_add("ports",
+ &coredev->dev.kobj);
+ if (!coredev->ports_kobj)
+ return -ENOMEM;
- ret = device_add(class_dev);
- if (ret)
- goto err;
+ rdma_for_each_port (device, port_num) {
+ struct ib_port_attr attr;
+ struct ib_port *port;
- for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) {
- ret = device_create_file(class_dev, ib_class_attributes[i]);
+ ret = ib_query_port(device, port_num, &attr);
if (ret)
- goto err_unregister;
- }
+ goto err_put;
- device->ports_parent = kobject_create_and_add("ports",
- &class_dev->kobj);
- if (!device->ports_parent) {
- ret = -ENOMEM;
- goto err_put;
- }
+ port = setup_port(coredev, port_num, &attr);
+ if (IS_ERR(port)) {
+ ret = PTR_ERR(port);
+ goto err_put;
+ }
- if (rdma_cap_ib_switch(device)) {
- ret = add_port(device, 0, port_callback);
+ ret = setup_gid_attrs(port, &attr);
if (ret)
goto err_put;
- } else {
- for (i = 1; i <= device->phys_port_cnt; ++i) {
- ret = add_port(device, i, port_callback);
- if (ret)
- goto err_put;
- }
}
-
- if (device->alloc_hw_stats)
- setup_hw_stats(device, NULL, 0);
-
return 0;
err_put:
- free_port_list_attributes(device);
-
-err_unregister:
- device_unregister(class_dev);
-
-err:
+ ib_free_port_attrs(coredev);
return ret;
}
-void ib_device_unregister_sysfs(struct ib_device *device)
+/**
+ * ib_port_register_client_groups - Add an ib_client's attributes to the port
+ *
+ * @ibdev: IB device to add counters
+ * @port_num: valid port number
+ * @groups: Group list of attributes
+ *
+ * Do not use. Only for legacy sysfs compatibility.
+ */
+int ib_port_register_client_groups(struct ib_device *ibdev, u32 port_num,
+ const struct attribute_group **groups)
{
- int i;
-
- /* Hold kobject until ib_dealloc_device() */
- kobject_get(&device->dev.kobj);
-
- free_port_list_attributes(device);
-
- if (device->hw_stats) {
- kfree(device->hw_stats);
- free_hsag(&device->dev.kobj, device->hw_stats_ag);
- }
-
- for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
- device_remove_file(&device->dev, ib_class_attributes[i]);
+ return sysfs_create_groups(&ibdev->port_data[port_num].sysfs->kobj,
+ groups);
+}
+EXPORT_SYMBOL(ib_port_register_client_groups);
- device_unregister(&device->dev);
+void ib_port_unregister_client_groups(struct ib_device *ibdev, u32 port_num,
+ const struct attribute_group **groups)
+{
+ return sysfs_remove_groups(&ibdev->port_data[port_num].sysfs->kobj,
+ groups);
}
+EXPORT_SYMBOL(ib_port_unregister_client_groups);
diff --git a/drivers/infiniband/core/trace.c b/drivers/infiniband/core/trace.c
new file mode 100644
index 000000000000..31e7860d35bf
--- /dev/null
+++ b/drivers/infiniband/core/trace.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Trace points for core RDMA functions.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define CREATE_TRACE_POINTS
+
+#include <trace/events/rdma_core.h>
diff --git a/drivers/infiniband/core/ucaps.c b/drivers/infiniband/core/ucaps.c
new file mode 100644
index 000000000000..de5cb8bf0a61
--- /dev/null
+++ b/drivers/infiniband/core/ucaps.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include <linux/kref.h>
+#include <linux/cdev.h>
+#include <linux/mutex.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <rdma/ib_ucaps.h>
+
+#define RDMA_UCAP_FIRST RDMA_UCAP_MLX5_CTRL_LOCAL
+
+static DEFINE_MUTEX(ucaps_mutex);
+static struct ib_ucap *ucaps_list[RDMA_UCAP_MAX];
+static bool ucaps_class_is_registered;
+static dev_t ucaps_base_dev;
+
+struct ib_ucap {
+ struct cdev cdev;
+ struct device dev;
+ struct kref ref;
+};
+
+static const char *ucap_names[RDMA_UCAP_MAX] = {
+ [RDMA_UCAP_MLX5_CTRL_LOCAL] = "mlx5_perm_ctrl_local",
+ [RDMA_UCAP_MLX5_CTRL_OTHER_VHCA] = "mlx5_perm_ctrl_other_vhca"
+};
+
+static char *ucaps_devnode(const struct device *dev, umode_t *mode)
+{
+ if (mode)
+ *mode = 0600;
+
+ return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+}
+
+static const struct class ucaps_class = {
+ .name = "infiniband_ucaps",
+ .devnode = ucaps_devnode,
+};
+
+static const struct file_operations ucaps_cdev_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+};
+
+/**
+ * ib_cleanup_ucaps - cleanup all API resources and class.
+ *
+ * This is called once, when removing the ib_uverbs module.
+ */
+void ib_cleanup_ucaps(void)
+{
+ mutex_lock(&ucaps_mutex);
+ if (!ucaps_class_is_registered) {
+ mutex_unlock(&ucaps_mutex);
+ return;
+ }
+
+ for (int i = RDMA_UCAP_FIRST; i < RDMA_UCAP_MAX; i++)
+ WARN_ON(ucaps_list[i]);
+
+ class_unregister(&ucaps_class);
+ ucaps_class_is_registered = false;
+ unregister_chrdev_region(ucaps_base_dev, RDMA_UCAP_MAX);
+ mutex_unlock(&ucaps_mutex);
+}
+
+static int get_ucap_from_devt(dev_t devt, u64 *idx_mask)
+{
+ for (int type = RDMA_UCAP_FIRST; type < RDMA_UCAP_MAX; type++) {
+ if (ucaps_list[type] && ucaps_list[type]->dev.devt == devt) {
+ *idx_mask |= 1 << type;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int get_devt_from_fd(unsigned int fd, dev_t *ret_dev)
+{
+ struct file *file;
+
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+
+ *ret_dev = file_inode(file)->i_rdev;
+ fput(file);
+ return 0;
+}
+
+/**
+ * ib_ucaps_init - Initialization required before ucap creation.
+ *
+ * Return: 0 on success, or a negative errno value on failure
+ */
+static int ib_ucaps_init(void)
+{
+ int ret = 0;
+
+ if (ucaps_class_is_registered)
+ return ret;
+
+ ret = class_register(&ucaps_class);
+ if (ret)
+ return ret;
+
+ ret = alloc_chrdev_region(&ucaps_base_dev, 0, RDMA_UCAP_MAX,
+ ucaps_class.name);
+ if (ret < 0) {
+ class_unregister(&ucaps_class);
+ return ret;
+ }
+
+ ucaps_class_is_registered = true;
+
+ return 0;
+}
+
+static void ucap_dev_release(struct device *device)
+{
+ struct ib_ucap *ucap = container_of(device, struct ib_ucap, dev);
+
+ kfree(ucap);
+}
+
+/**
+ * ib_create_ucap - Add a ucap character device
+ * @type: UCAP type
+ *
+ * Creates a ucap character device in the /dev/infiniband directory. By default,
+ * the device has root-only read-write access.
+ *
+ * A driver may call this multiple times with the same UCAP type. A reference
+ * count tracks creations and deletions.
+ *
+ * Return: 0 on success, or a negative errno value on failure
+ */
+int ib_create_ucap(enum rdma_user_cap type)
+{
+ struct ib_ucap *ucap;
+ int ret;
+
+ if (type >= RDMA_UCAP_MAX)
+ return -EINVAL;
+
+ mutex_lock(&ucaps_mutex);
+ ret = ib_ucaps_init();
+ if (ret)
+ goto unlock;
+
+ ucap = ucaps_list[type];
+ if (ucap) {
+ kref_get(&ucap->ref);
+ mutex_unlock(&ucaps_mutex);
+ return 0;
+ }
+
+ ucap = kzalloc(sizeof(*ucap), GFP_KERNEL);
+ if (!ucap) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ device_initialize(&ucap->dev);
+ ucap->dev.class = &ucaps_class;
+ ucap->dev.devt = MKDEV(MAJOR(ucaps_base_dev), type);
+ ucap->dev.release = ucap_dev_release;
+ ret = dev_set_name(&ucap->dev, "%s", ucap_names[type]);
+ if (ret)
+ goto err_device;
+
+ cdev_init(&ucap->cdev, &ucaps_cdev_fops);
+ ucap->cdev.owner = THIS_MODULE;
+
+ ret = cdev_device_add(&ucap->cdev, &ucap->dev);
+ if (ret)
+ goto err_device;
+
+ kref_init(&ucap->ref);
+ ucaps_list[type] = ucap;
+ mutex_unlock(&ucaps_mutex);
+
+ return 0;
+
+err_device:
+ put_device(&ucap->dev);
+unlock:
+ mutex_unlock(&ucaps_mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ib_create_ucap);
+
+static void ib_release_ucap(struct kref *ref)
+{
+ struct ib_ucap *ucap = container_of(ref, struct ib_ucap, ref);
+ enum rdma_user_cap type;
+
+ for (type = RDMA_UCAP_FIRST; type < RDMA_UCAP_MAX; type++) {
+ if (ucaps_list[type] == ucap)
+ break;
+ }
+ WARN_ON(type == RDMA_UCAP_MAX);
+
+ ucaps_list[type] = NULL;
+ cdev_device_del(&ucap->cdev, &ucap->dev);
+ put_device(&ucap->dev);
+}
+
+/**
+ * ib_remove_ucap - Remove a ucap character device
+ * @type: User cap type
+ *
+ * Removes the ucap character device according to type. The device is completely
+ * removed from the filesystem when its reference count reaches 0.
+ */
+void ib_remove_ucap(enum rdma_user_cap type)
+{
+ struct ib_ucap *ucap;
+
+ mutex_lock(&ucaps_mutex);
+ ucap = ucaps_list[type];
+ if (WARN_ON(!ucap))
+ goto end;
+
+ kref_put(&ucap->ref, ib_release_ucap);
+end:
+ mutex_unlock(&ucaps_mutex);
+}
+EXPORT_SYMBOL(ib_remove_ucap);
+
+/**
+ * ib_get_ucaps - Get bitmask of ucap types from file descriptors
+ * @fds: Array of file descriptors
+ * @fd_count: Number of file descriptors in the array
+ * @idx_mask: Bitmask to be updated based on the ucaps in the fd list
+ *
+ * Given an array of file descriptors, this function returns a bitmask of
+ * the ucaps where a bit is set if an FD for that ucap type was in the array.
+ *
+ * Return: 0 on success, or a negative errno value on failure
+ */
+int ib_get_ucaps(int *fds, int fd_count, uint64_t *idx_mask)
+{
+ int ret = 0;
+ dev_t dev;
+
+ *idx_mask = 0;
+ mutex_lock(&ucaps_mutex);
+ for (int i = 0; i < fd_count; i++) {
+ ret = get_devt_from_fd(fds[i], &dev);
+ if (ret)
+ goto end;
+
+ ret = get_ucap_from_devt(dev, idx_mask);
+ if (ret)
+ goto end;
+ }
+
+end:
+ mutex_unlock(&ucaps_mutex);
+ return ret;
+}
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
deleted file mode 100644
index e0a995b85a2d..000000000000
--- a/drivers/infiniband/core/ucm.c
+++ /dev/null
@@ -1,1375 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/completion.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/poll.h>
-#include <linux/sched.h>
-#include <linux/file.h>
-#include <linux/mount.h>
-#include <linux/cdev.h>
-#include <linux/idr.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-
-#include <linux/uaccess.h>
-
-#include <rdma/ib.h>
-#include <rdma/ib_cm.h>
-#include <rdma/ib_user_cm.h>
-#include <rdma/ib_marshall.h>
-
-MODULE_AUTHOR("Libor Michalek");
-MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
-MODULE_LICENSE("Dual BSD/GPL");
-
-struct ib_ucm_device {
- int devnum;
- struct cdev cdev;
- struct device dev;
- struct ib_device *ib_dev;
-};
-
-struct ib_ucm_file {
- struct mutex file_mutex;
- struct file *filp;
- struct ib_ucm_device *device;
-
- struct list_head ctxs;
- struct list_head events;
- wait_queue_head_t poll_wait;
-};
-
-struct ib_ucm_context {
- int id;
- struct completion comp;
- atomic_t ref;
- int events_reported;
-
- struct ib_ucm_file *file;
- struct ib_cm_id *cm_id;
- __u64 uid;
-
- struct list_head events; /* list of pending events. */
- struct list_head file_list; /* member in file ctx list */
-};
-
-struct ib_ucm_event {
- struct ib_ucm_context *ctx;
- struct list_head file_list; /* member in file event list */
- struct list_head ctx_list; /* member in ctx event list */
-
- struct ib_cm_id *cm_id;
- struct ib_ucm_event_resp resp;
- void *data;
- void *info;
- int data_len;
- int info_len;
-};
-
-enum {
- IB_UCM_MAJOR = 231,
- IB_UCM_BASE_MINOR = 224,
- IB_UCM_MAX_DEVICES = 32
-};
-
-#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
-
-static void ib_ucm_add_one(struct ib_device *device);
-static void ib_ucm_remove_one(struct ib_device *device, void *client_data);
-
-static struct ib_client ucm_client = {
- .name = "ucm",
- .add = ib_ucm_add_one,
- .remove = ib_ucm_remove_one
-};
-
-static DEFINE_MUTEX(ctx_id_mutex);
-static DEFINE_IDR(ctx_id_table);
-static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES);
-
-static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id)
-{
- struct ib_ucm_context *ctx;
-
- mutex_lock(&ctx_id_mutex);
- ctx = idr_find(&ctx_id_table, id);
- if (!ctx)
- ctx = ERR_PTR(-ENOENT);
- else if (ctx->file != file)
- ctx = ERR_PTR(-EINVAL);
- else
- atomic_inc(&ctx->ref);
- mutex_unlock(&ctx_id_mutex);
-
- return ctx;
-}
-
-static void ib_ucm_ctx_put(struct ib_ucm_context *ctx)
-{
- if (atomic_dec_and_test(&ctx->ref))
- complete(&ctx->comp);
-}
-
-static inline int ib_ucm_new_cm_id(int event)
-{
- return event == IB_CM_REQ_RECEIVED || event == IB_CM_SIDR_REQ_RECEIVED;
-}
-
-static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx)
-{
- struct ib_ucm_event *uevent;
-
- mutex_lock(&ctx->file->file_mutex);
- list_del(&ctx->file_list);
- while (!list_empty(&ctx->events)) {
-
- uevent = list_entry(ctx->events.next,
- struct ib_ucm_event, ctx_list);
- list_del(&uevent->file_list);
- list_del(&uevent->ctx_list);
- mutex_unlock(&ctx->file->file_mutex);
-
- /* clear incoming connections. */
- if (ib_ucm_new_cm_id(uevent->resp.event))
- ib_destroy_cm_id(uevent->cm_id);
-
- kfree(uevent);
- mutex_lock(&ctx->file->file_mutex);
- }
- mutex_unlock(&ctx->file->file_mutex);
-}
-
-static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
-{
- struct ib_ucm_context *ctx;
-
- ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
- if (!ctx)
- return NULL;
-
- atomic_set(&ctx->ref, 1);
- init_completion(&ctx->comp);
- ctx->file = file;
- INIT_LIST_HEAD(&ctx->events);
-
- mutex_lock(&ctx_id_mutex);
- ctx->id = idr_alloc(&ctx_id_table, ctx, 0, 0, GFP_KERNEL);
- mutex_unlock(&ctx_id_mutex);
- if (ctx->id < 0)
- goto error;
-
- list_add_tail(&ctx->file_list, &file->ctxs);
- return ctx;
-
-error:
- kfree(ctx);
- return NULL;
-}
-
-static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
- struct ib_cm_req_event_param *kreq)
-{
- ureq->remote_ca_guid = kreq->remote_ca_guid;
- ureq->remote_qkey = kreq->remote_qkey;
- ureq->remote_qpn = kreq->remote_qpn;
- ureq->qp_type = kreq->qp_type;
- ureq->starting_psn = kreq->starting_psn;
- ureq->responder_resources = kreq->responder_resources;
- ureq->initiator_depth = kreq->initiator_depth;
- ureq->local_cm_response_timeout = kreq->local_cm_response_timeout;
- ureq->flow_control = kreq->flow_control;
- ureq->remote_cm_response_timeout = kreq->remote_cm_response_timeout;
- ureq->retry_count = kreq->retry_count;
- ureq->rnr_retry_count = kreq->rnr_retry_count;
- ureq->srq = kreq->srq;
- ureq->port = kreq->port;
-
- ib_copy_path_rec_to_user(&ureq->primary_path, kreq->primary_path);
- if (kreq->alternate_path)
- ib_copy_path_rec_to_user(&ureq->alternate_path,
- kreq->alternate_path);
-}
-
-static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep,
- struct ib_cm_rep_event_param *krep)
-{
- urep->remote_ca_guid = krep->remote_ca_guid;
- urep->remote_qkey = krep->remote_qkey;
- urep->remote_qpn = krep->remote_qpn;
- urep->starting_psn = krep->starting_psn;
- urep->responder_resources = krep->responder_resources;
- urep->initiator_depth = krep->initiator_depth;
- urep->target_ack_delay = krep->target_ack_delay;
- urep->failover_accepted = krep->failover_accepted;
- urep->flow_control = krep->flow_control;
- urep->rnr_retry_count = krep->rnr_retry_count;
- urep->srq = krep->srq;
-}
-
-static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep,
- struct ib_cm_sidr_rep_event_param *krep)
-{
- urep->status = krep->status;
- urep->qkey = krep->qkey;
- urep->qpn = krep->qpn;
-};
-
-static int ib_ucm_event_process(struct ib_cm_event *evt,
- struct ib_ucm_event *uvt)
-{
- void *info = NULL;
-
- switch (evt->event) {
- case IB_CM_REQ_RECEIVED:
- ib_ucm_event_req_get(&uvt->resp.u.req_resp,
- &evt->param.req_rcvd);
- uvt->data_len = IB_CM_REQ_PRIVATE_DATA_SIZE;
- uvt->resp.present = IB_UCM_PRES_PRIMARY;
- uvt->resp.present |= (evt->param.req_rcvd.alternate_path ?
- IB_UCM_PRES_ALTERNATE : 0);
- break;
- case IB_CM_REP_RECEIVED:
- ib_ucm_event_rep_get(&uvt->resp.u.rep_resp,
- &evt->param.rep_rcvd);
- uvt->data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
- break;
- case IB_CM_RTU_RECEIVED:
- uvt->data_len = IB_CM_RTU_PRIVATE_DATA_SIZE;
- uvt->resp.u.send_status = evt->param.send_status;
- break;
- case IB_CM_DREQ_RECEIVED:
- uvt->data_len = IB_CM_DREQ_PRIVATE_DATA_SIZE;
- uvt->resp.u.send_status = evt->param.send_status;
- break;
- case IB_CM_DREP_RECEIVED:
- uvt->data_len = IB_CM_DREP_PRIVATE_DATA_SIZE;
- uvt->resp.u.send_status = evt->param.send_status;
- break;
- case IB_CM_MRA_RECEIVED:
- uvt->resp.u.mra_resp.timeout =
- evt->param.mra_rcvd.service_timeout;
- uvt->data_len = IB_CM_MRA_PRIVATE_DATA_SIZE;
- break;
- case IB_CM_REJ_RECEIVED:
- uvt->resp.u.rej_resp.reason = evt->param.rej_rcvd.reason;
- uvt->data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
- uvt->info_len = evt->param.rej_rcvd.ari_length;
- info = evt->param.rej_rcvd.ari;
- break;
- case IB_CM_LAP_RECEIVED:
- ib_copy_path_rec_to_user(&uvt->resp.u.lap_resp.path,
- evt->param.lap_rcvd.alternate_path);
- uvt->data_len = IB_CM_LAP_PRIVATE_DATA_SIZE;
- uvt->resp.present = IB_UCM_PRES_ALTERNATE;
- break;
- case IB_CM_APR_RECEIVED:
- uvt->resp.u.apr_resp.status = evt->param.apr_rcvd.ap_status;
- uvt->data_len = IB_CM_APR_PRIVATE_DATA_SIZE;
- uvt->info_len = evt->param.apr_rcvd.info_len;
- info = evt->param.apr_rcvd.apr_info;
- break;
- case IB_CM_SIDR_REQ_RECEIVED:
- uvt->resp.u.sidr_req_resp.pkey =
- evt->param.sidr_req_rcvd.pkey;
- uvt->resp.u.sidr_req_resp.port =
- evt->param.sidr_req_rcvd.port;
- uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
- break;
- case IB_CM_SIDR_REP_RECEIVED:
- ib_ucm_event_sidr_rep_get(&uvt->resp.u.sidr_rep_resp,
- &evt->param.sidr_rep_rcvd);
- uvt->data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
- uvt->info_len = evt->param.sidr_rep_rcvd.info_len;
- info = evt->param.sidr_rep_rcvd.info;
- break;
- default:
- uvt->resp.u.send_status = evt->param.send_status;
- break;
- }
-
- if (uvt->data_len) {
- uvt->data = kmemdup(evt->private_data, uvt->data_len, GFP_KERNEL);
- if (!uvt->data)
- goto err1;
-
- uvt->resp.present |= IB_UCM_PRES_DATA;
- }
-
- if (uvt->info_len) {
- uvt->info = kmemdup(info, uvt->info_len, GFP_KERNEL);
- if (!uvt->info)
- goto err2;
-
- uvt->resp.present |= IB_UCM_PRES_INFO;
- }
- return 0;
-
-err2:
- kfree(uvt->data);
-err1:
- return -ENOMEM;
-}
-
-static int ib_ucm_event_handler(struct ib_cm_id *cm_id,
- struct ib_cm_event *event)
-{
- struct ib_ucm_event *uevent;
- struct ib_ucm_context *ctx;
- int result = 0;
-
- ctx = cm_id->context;
-
- uevent = kzalloc(sizeof *uevent, GFP_KERNEL);
- if (!uevent)
- goto err1;
-
- uevent->ctx = ctx;
- uevent->cm_id = cm_id;
- uevent->resp.uid = ctx->uid;
- uevent->resp.id = ctx->id;
- uevent->resp.event = event->event;
-
- result = ib_ucm_event_process(event, uevent);
- if (result)
- goto err2;
-
- mutex_lock(&ctx->file->file_mutex);
- list_add_tail(&uevent->file_list, &ctx->file->events);
- list_add_tail(&uevent->ctx_list, &ctx->events);
- wake_up_interruptible(&ctx->file->poll_wait);
- mutex_unlock(&ctx->file->file_mutex);
- return 0;
-
-err2:
- kfree(uevent);
-err1:
- /* Destroy new cm_id's */
- return ib_ucm_new_cm_id(event->event);
-}
-
-static ssize_t ib_ucm_event(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_context *ctx;
- struct ib_ucm_event_get cmd;
- struct ib_ucm_event *uevent;
- int result = 0;
-
- if (out_len < sizeof(struct ib_ucm_event_resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- mutex_lock(&file->file_mutex);
- while (list_empty(&file->events)) {
- mutex_unlock(&file->file_mutex);
-
- if (file->filp->f_flags & O_NONBLOCK)
- return -EAGAIN;
-
- if (wait_event_interruptible(file->poll_wait,
- !list_empty(&file->events)))
- return -ERESTARTSYS;
-
- mutex_lock(&file->file_mutex);
- }
-
- uevent = list_entry(file->events.next, struct ib_ucm_event, file_list);
-
- if (ib_ucm_new_cm_id(uevent->resp.event)) {
- ctx = ib_ucm_ctx_alloc(file);
- if (!ctx) {
- result = -ENOMEM;
- goto done;
- }
-
- ctx->cm_id = uevent->cm_id;
- ctx->cm_id->context = ctx;
- uevent->resp.id = ctx->id;
- }
-
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &uevent->resp, sizeof(uevent->resp))) {
- result = -EFAULT;
- goto done;
- }
-
- if (uevent->data) {
- if (cmd.data_len < uevent->data_len) {
- result = -ENOMEM;
- goto done;
- }
- if (copy_to_user((void __user *)(unsigned long)cmd.data,
- uevent->data, uevent->data_len)) {
- result = -EFAULT;
- goto done;
- }
- }
-
- if (uevent->info) {
- if (cmd.info_len < uevent->info_len) {
- result = -ENOMEM;
- goto done;
- }
- if (copy_to_user((void __user *)(unsigned long)cmd.info,
- uevent->info, uevent->info_len)) {
- result = -EFAULT;
- goto done;
- }
- }
-
- list_del(&uevent->file_list);
- list_del(&uevent->ctx_list);
- uevent->ctx->events_reported++;
-
- kfree(uevent->data);
- kfree(uevent->info);
- kfree(uevent);
-done:
- mutex_unlock(&file->file_mutex);
- return result;
-}
-
-static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_create_id cmd;
- struct ib_ucm_create_id_resp resp;
- struct ib_ucm_context *ctx;
- int result;
-
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- mutex_lock(&file->file_mutex);
- ctx = ib_ucm_ctx_alloc(file);
- mutex_unlock(&file->file_mutex);
- if (!ctx)
- return -ENOMEM;
-
- ctx->uid = cmd.uid;
- ctx->cm_id = ib_create_cm_id(file->device->ib_dev,
- ib_ucm_event_handler, ctx);
- if (IS_ERR(ctx->cm_id)) {
- result = PTR_ERR(ctx->cm_id);
- goto err1;
- }
-
- resp.id = ctx->id;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &resp, sizeof(resp))) {
- result = -EFAULT;
- goto err2;
- }
- return 0;
-
-err2:
- ib_destroy_cm_id(ctx->cm_id);
-err1:
- mutex_lock(&ctx_id_mutex);
- idr_remove(&ctx_id_table, ctx->id);
- mutex_unlock(&ctx_id_mutex);
- kfree(ctx);
- return result;
-}
-
-static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_destroy_id cmd;
- struct ib_ucm_destroy_id_resp resp;
- struct ib_ucm_context *ctx;
- int result = 0;
-
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- mutex_lock(&ctx_id_mutex);
- ctx = idr_find(&ctx_id_table, cmd.id);
- if (!ctx)
- ctx = ERR_PTR(-ENOENT);
- else if (ctx->file != file)
- ctx = ERR_PTR(-EINVAL);
- else
- idr_remove(&ctx_id_table, ctx->id);
- mutex_unlock(&ctx_id_mutex);
-
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- ib_ucm_ctx_put(ctx);
- wait_for_completion(&ctx->comp);
-
- /* No new events will be generated after destroying the cm_id. */
- ib_destroy_cm_id(ctx->cm_id);
- /* Cleanup events not yet reported to the user. */
- ib_ucm_cleanup_events(ctx);
-
- resp.events_reported = ctx->events_reported;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &resp, sizeof(resp)))
- result = -EFAULT;
-
- kfree(ctx);
- return result;
-}
-
-static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_attr_id_resp resp;
- struct ib_ucm_attr_id cmd;
- struct ib_ucm_context *ctx;
- int result = 0;
-
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- resp.service_id = ctx->cm_id->service_id;
- resp.service_mask = ctx->cm_id->service_mask;
- resp.local_id = ctx->cm_id->local_id;
- resp.remote_id = ctx->cm_id->remote_id;
-
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &resp, sizeof(resp)))
- result = -EFAULT;
-
- ib_ucm_ctx_put(ctx);
- return result;
-}
-
-static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_uverbs_qp_attr resp;
- struct ib_ucm_init_qp_attr cmd;
- struct ib_ucm_context *ctx;
- struct ib_qp_attr qp_attr;
- int result = 0;
-
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- resp.qp_attr_mask = 0;
- memset(&qp_attr, 0, sizeof qp_attr);
- qp_attr.qp_state = cmd.qp_state;
- result = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
- if (result)
- goto out;
-
- ib_copy_qp_attr_to_user(&resp, &qp_attr);
-
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &resp, sizeof(resp)))
- result = -EFAULT;
-
-out:
- ib_ucm_ctx_put(ctx);
- return result;
-}
-
-static int ucm_validate_listen(__be64 service_id, __be64 service_mask)
-{
- service_id &= service_mask;
-
- if (((service_id & IB_CMA_SERVICE_ID_MASK) == IB_CMA_SERVICE_ID) ||
- ((service_id & IB_SDP_SERVICE_ID_MASK) == IB_SDP_SERVICE_ID))
- return -EINVAL;
-
- return 0;
-}
-
-static ssize_t ib_ucm_listen(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_listen cmd;
- struct ib_ucm_context *ctx;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- result = ucm_validate_listen(cmd.service_id, cmd.service_mask);
- if (result)
- goto out;
-
- result = ib_cm_listen(ctx->cm_id, cmd.service_id, cmd.service_mask);
-out:
- ib_ucm_ctx_put(ctx);
- return result;
-}
-
-static ssize_t ib_ucm_notify(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_notify cmd;
- struct ib_ucm_context *ctx;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- result = ib_cm_notify(ctx->cm_id, (enum ib_event_type) cmd.event);
- ib_ucm_ctx_put(ctx);
- return result;
-}
-
-static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
-{
- void *data;
-
- *dest = NULL;
-
- if (!len)
- return 0;
-
- data = memdup_user((void __user *)(unsigned long)src, len);
- if (IS_ERR(data))
- return PTR_ERR(data);
-
- *dest = data;
- return 0;
-}
-
-static int ib_ucm_path_get(struct ib_sa_path_rec **path, u64 src)
-{
- struct ib_user_path_rec upath;
- struct ib_sa_path_rec *sa_path;
-
- *path = NULL;
-
- if (!src)
- return 0;
-
- sa_path = kmalloc(sizeof(*sa_path), GFP_KERNEL);
- if (!sa_path)
- return -ENOMEM;
-
- if (copy_from_user(&upath, (void __user *)(unsigned long)src,
- sizeof(upath))) {
-
- kfree(sa_path);
- return -EFAULT;
- }
-
- ib_copy_path_rec_from_user(sa_path, &upath);
- *path = sa_path;
- return 0;
-}
-
-static ssize_t ib_ucm_send_req(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_cm_req_param param;
- struct ib_ucm_context *ctx;
- struct ib_ucm_req cmd;
- int result;
-
- param.private_data = NULL;
- param.primary_path = NULL;
- param.alternate_path = NULL;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&param.private_data, cmd.data, cmd.len);
- if (result)
- goto done;
-
- result = ib_ucm_path_get(&param.primary_path, cmd.primary_path);
- if (result)
- goto done;
-
- result = ib_ucm_path_get(&param.alternate_path, cmd.alternate_path);
- if (result)
- goto done;
-
- param.private_data_len = cmd.len;
- param.service_id = cmd.sid;
- param.qp_num = cmd.qpn;
- param.qp_type = cmd.qp_type;
- param.starting_psn = cmd.psn;
- param.peer_to_peer = cmd.peer_to_peer;
- param.responder_resources = cmd.responder_resources;
- param.initiator_depth = cmd.initiator_depth;
- param.remote_cm_response_timeout = cmd.remote_cm_response_timeout;
- param.flow_control = cmd.flow_control;
- param.local_cm_response_timeout = cmd.local_cm_response_timeout;
- param.retry_count = cmd.retry_count;
- param.rnr_retry_count = cmd.rnr_retry_count;
- param.max_cm_retries = cmd.max_cm_retries;
- param.srq = cmd.srq;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = ib_send_cm_req(ctx->cm_id, &param);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
-done:
- kfree(param.private_data);
- kfree(param.primary_path);
- kfree(param.alternate_path);
- return result;
-}
-
-static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_cm_rep_param param;
- struct ib_ucm_context *ctx;
- struct ib_ucm_rep cmd;
- int result;
-
- param.private_data = NULL;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&param.private_data, cmd.data, cmd.len);
- if (result)
- return result;
-
- param.qp_num = cmd.qpn;
- param.starting_psn = cmd.psn;
- param.private_data_len = cmd.len;
- param.responder_resources = cmd.responder_resources;
- param.initiator_depth = cmd.initiator_depth;
- param.failover_accepted = cmd.failover_accepted;
- param.flow_control = cmd.flow_control;
- param.rnr_retry_count = cmd.rnr_retry_count;
- param.srq = cmd.srq;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- ctx->uid = cmd.uid;
- result = ib_send_cm_rep(ctx->cm_id, &param);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
- kfree(param.private_data);
- return result;
-}
-
-static ssize_t ib_ucm_send_private_data(struct ib_ucm_file *file,
- const char __user *inbuf, int in_len,
- int (*func)(struct ib_cm_id *cm_id,
- const void *private_data,
- u8 private_data_len))
-{
- struct ib_ucm_private_data cmd;
- struct ib_ucm_context *ctx;
- const void *private_data = NULL;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&private_data, cmd.data, cmd.len);
- if (result)
- return result;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = func(ctx->cm_id, private_data, cmd.len);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
- kfree(private_data);
- return result;
-}
-
-static ssize_t ib_ucm_send_rtu(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_rtu);
-}
-
-static ssize_t ib_ucm_send_dreq(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_dreq);
-}
-
-static ssize_t ib_ucm_send_drep(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- return ib_ucm_send_private_data(file, inbuf, in_len, ib_send_cm_drep);
-}
-
-static ssize_t ib_ucm_send_info(struct ib_ucm_file *file,
- const char __user *inbuf, int in_len,
- int (*func)(struct ib_cm_id *cm_id,
- int status,
- const void *info,
- u8 info_len,
- const void *data,
- u8 data_len))
-{
- struct ib_ucm_context *ctx;
- struct ib_ucm_info cmd;
- const void *data = NULL;
- const void *info = NULL;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&data, cmd.data, cmd.data_len);
- if (result)
- goto done;
-
- result = ib_ucm_alloc_data(&info, cmd.info, cmd.info_len);
- if (result)
- goto done;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = func(ctx->cm_id, cmd.status, info, cmd.info_len,
- data, cmd.data_len);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
-done:
- kfree(data);
- kfree(info);
- return result;
-}
-
-static ssize_t ib_ucm_send_rej(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_rej);
-}
-
-static ssize_t ib_ucm_send_apr(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- return ib_ucm_send_info(file, inbuf, in_len, (void *)ib_send_cm_apr);
-}
-
-static ssize_t ib_ucm_send_mra(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_context *ctx;
- struct ib_ucm_mra cmd;
- const void *data = NULL;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&data, cmd.data, cmd.len);
- if (result)
- return result;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = ib_send_cm_mra(ctx->cm_id, cmd.timeout, data, cmd.len);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
- kfree(data);
- return result;
-}
-
-static ssize_t ib_ucm_send_lap(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_ucm_context *ctx;
- struct ib_sa_path_rec *path = NULL;
- struct ib_ucm_lap cmd;
- const void *data = NULL;
- int result;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&data, cmd.data, cmd.len);
- if (result)
- goto done;
-
- result = ib_ucm_path_get(&path, cmd.path);
- if (result)
- goto done;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = ib_send_cm_lap(ctx->cm_id, path, data, cmd.len);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
-done:
- kfree(data);
- kfree(path);
- return result;
-}
-
-static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_cm_sidr_req_param param;
- struct ib_ucm_context *ctx;
- struct ib_ucm_sidr_req cmd;
- int result;
-
- param.private_data = NULL;
- param.path = NULL;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&param.private_data, cmd.data, cmd.len);
- if (result)
- goto done;
-
- result = ib_ucm_path_get(&param.path, cmd.path);
- if (result)
- goto done;
-
- param.private_data_len = cmd.len;
- param.service_id = cmd.sid;
- param.timeout_ms = cmd.timeout;
- param.max_cm_retries = cmd.max_cm_retries;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = ib_send_cm_sidr_req(ctx->cm_id, &param);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
-done:
- kfree(param.private_data);
- kfree(param.path);
- return result;
-}
-
-static ssize_t ib_ucm_send_sidr_rep(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len)
-{
- struct ib_cm_sidr_rep_param param;
- struct ib_ucm_sidr_rep cmd;
- struct ib_ucm_context *ctx;
- int result;
-
- param.info = NULL;
-
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
- return -EFAULT;
-
- result = ib_ucm_alloc_data(&param.private_data,
- cmd.data, cmd.data_len);
- if (result)
- goto done;
-
- result = ib_ucm_alloc_data(&param.info, cmd.info, cmd.info_len);
- if (result)
- goto done;
-
- param.qp_num = cmd.qpn;
- param.qkey = cmd.qkey;
- param.status = cmd.status;
- param.info_length = cmd.info_len;
- param.private_data_len = cmd.data_len;
-
- ctx = ib_ucm_ctx_get(file, cmd.id);
- if (!IS_ERR(ctx)) {
- result = ib_send_cm_sidr_rep(ctx->cm_id, &param);
- ib_ucm_ctx_put(ctx);
- } else
- result = PTR_ERR(ctx);
-
-done:
- kfree(param.private_data);
- kfree(param.info);
- return result;
-}
-
-static ssize_t (*ucm_cmd_table[])(struct ib_ucm_file *file,
- const char __user *inbuf,
- int in_len, int out_len) = {
- [IB_USER_CM_CMD_CREATE_ID] = ib_ucm_create_id,
- [IB_USER_CM_CMD_DESTROY_ID] = ib_ucm_destroy_id,
- [IB_USER_CM_CMD_ATTR_ID] = ib_ucm_attr_id,
- [IB_USER_CM_CMD_LISTEN] = ib_ucm_listen,
- [IB_USER_CM_CMD_NOTIFY] = ib_ucm_notify,
- [IB_USER_CM_CMD_SEND_REQ] = ib_ucm_send_req,
- [IB_USER_CM_CMD_SEND_REP] = ib_ucm_send_rep,
- [IB_USER_CM_CMD_SEND_RTU] = ib_ucm_send_rtu,
- [IB_USER_CM_CMD_SEND_DREQ] = ib_ucm_send_dreq,
- [IB_USER_CM_CMD_SEND_DREP] = ib_ucm_send_drep,
- [IB_USER_CM_CMD_SEND_REJ] = ib_ucm_send_rej,
- [IB_USER_CM_CMD_SEND_MRA] = ib_ucm_send_mra,
- [IB_USER_CM_CMD_SEND_LAP] = ib_ucm_send_lap,
- [IB_USER_CM_CMD_SEND_APR] = ib_ucm_send_apr,
- [IB_USER_CM_CMD_SEND_SIDR_REQ] = ib_ucm_send_sidr_req,
- [IB_USER_CM_CMD_SEND_SIDR_REP] = ib_ucm_send_sidr_rep,
- [IB_USER_CM_CMD_EVENT] = ib_ucm_event,
- [IB_USER_CM_CMD_INIT_QP_ATTR] = ib_ucm_init_qp_attr,
-};
-
-static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
- size_t len, loff_t *pos)
-{
- struct ib_ucm_file *file = filp->private_data;
- struct ib_ucm_cmd_hdr hdr;
- ssize_t result;
-
- if (!ib_safe_file_access(filp)) {
- pr_err_once("ucm_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
- task_tgid_vnr(current), current->comm);
- return -EACCES;
- }
-
- if (len < sizeof(hdr))
- return -EINVAL;
-
- if (copy_from_user(&hdr, buf, sizeof(hdr)))
- return -EFAULT;
-
- if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
- return -EINVAL;
-
- if (hdr.in + sizeof(hdr) > len)
- return -EINVAL;
-
- result = ucm_cmd_table[hdr.cmd](file, buf + sizeof(hdr),
- hdr.in, hdr.out);
- if (!result)
- result = len;
-
- return result;
-}
-
-static unsigned int ib_ucm_poll(struct file *filp,
- struct poll_table_struct *wait)
-{
- struct ib_ucm_file *file = filp->private_data;
- unsigned int mask = 0;
-
- poll_wait(filp, &file->poll_wait, wait);
-
- if (!list_empty(&file->events))
- mask = POLLIN | POLLRDNORM;
-
- return mask;
-}
-
-/*
- * ib_ucm_open() does not need the BKL:
- *
- * - no global state is referred to;
- * - there is no ioctl method to race against;
- * - no further module initialization is required for open to work
- * after the device is registered.
- */
-static int ib_ucm_open(struct inode *inode, struct file *filp)
-{
- struct ib_ucm_file *file;
-
- file = kmalloc(sizeof(*file), GFP_KERNEL);
- if (!file)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&file->events);
- INIT_LIST_HEAD(&file->ctxs);
- init_waitqueue_head(&file->poll_wait);
-
- mutex_init(&file->file_mutex);
-
- filp->private_data = file;
- file->filp = filp;
- file->device = container_of(inode->i_cdev, struct ib_ucm_device, cdev);
-
- return nonseekable_open(inode, filp);
-}
-
-static int ib_ucm_close(struct inode *inode, struct file *filp)
-{
- struct ib_ucm_file *file = filp->private_data;
- struct ib_ucm_context *ctx;
-
- mutex_lock(&file->file_mutex);
- while (!list_empty(&file->ctxs)) {
- ctx = list_entry(file->ctxs.next,
- struct ib_ucm_context, file_list);
- mutex_unlock(&file->file_mutex);
-
- mutex_lock(&ctx_id_mutex);
- idr_remove(&ctx_id_table, ctx->id);
- mutex_unlock(&ctx_id_mutex);
-
- ib_destroy_cm_id(ctx->cm_id);
- ib_ucm_cleanup_events(ctx);
- kfree(ctx);
-
- mutex_lock(&file->file_mutex);
- }
- mutex_unlock(&file->file_mutex);
- kfree(file);
- return 0;
-}
-
-static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES);
-static void ib_ucm_release_dev(struct device *dev)
-{
- struct ib_ucm_device *ucm_dev;
-
- ucm_dev = container_of(dev, struct ib_ucm_device, dev);
- cdev_del(&ucm_dev->cdev);
- if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
- clear_bit(ucm_dev->devnum, dev_map);
- else
- clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, overflow_map);
- kfree(ucm_dev);
-}
-
-static const struct file_operations ucm_fops = {
- .owner = THIS_MODULE,
- .open = ib_ucm_open,
- .release = ib_ucm_close,
- .write = ib_ucm_write,
- .poll = ib_ucm_poll,
- .llseek = no_llseek,
-};
-
-static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct ib_ucm_device *ucm_dev;
-
- ucm_dev = container_of(dev, struct ib_ucm_device, dev);
- return sprintf(buf, "%s\n", ucm_dev->ib_dev->name);
-}
-static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
-
-static dev_t overflow_maj;
-static int find_overflow_devnum(void)
-{
- int ret;
-
- if (!overflow_maj) {
- ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES,
- "infiniband_cm");
- if (ret) {
- pr_err("ucm: couldn't register dynamic device number\n");
- return ret;
- }
- }
-
- ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES);
- if (ret >= IB_UCM_MAX_DEVICES)
- return -1;
-
- return ret;
-}
-
-static void ib_ucm_add_one(struct ib_device *device)
-{
- int devnum;
- dev_t base;
- struct ib_ucm_device *ucm_dev;
-
- if (!device->alloc_ucontext || !rdma_cap_ib_cm(device, 1))
- return;
-
- ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL);
- if (!ucm_dev)
- return;
-
- ucm_dev->ib_dev = device;
-
- devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
- if (devnum >= IB_UCM_MAX_DEVICES) {
- devnum = find_overflow_devnum();
- if (devnum < 0)
- goto err;
-
- ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES;
- base = devnum + overflow_maj;
- set_bit(devnum, overflow_map);
- } else {
- ucm_dev->devnum = devnum;
- base = devnum + IB_UCM_BASE_DEV;
- set_bit(devnum, dev_map);
- }
-
- cdev_init(&ucm_dev->cdev, &ucm_fops);
- ucm_dev->cdev.owner = THIS_MODULE;
- kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum);
- if (cdev_add(&ucm_dev->cdev, base, 1))
- goto err;
-
- ucm_dev->dev.class = &cm_class;
- ucm_dev->dev.parent = device->dma_device;
- ucm_dev->dev.devt = ucm_dev->cdev.dev;
- ucm_dev->dev.release = ib_ucm_release_dev;
- dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum);
- if (device_register(&ucm_dev->dev))
- goto err_cdev;
-
- if (device_create_file(&ucm_dev->dev, &dev_attr_ibdev))
- goto err_dev;
-
- ib_set_client_data(device, &ucm_client, ucm_dev);
- return;
-
-err_dev:
- device_unregister(&ucm_dev->dev);
-err_cdev:
- cdev_del(&ucm_dev->cdev);
- if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
- clear_bit(devnum, dev_map);
- else
- clear_bit(devnum, overflow_map);
-err:
- kfree(ucm_dev);
- return;
-}
-
-static void ib_ucm_remove_one(struct ib_device *device, void *client_data)
-{
- struct ib_ucm_device *ucm_dev = client_data;
-
- if (!ucm_dev)
- return;
-
- device_unregister(&ucm_dev->dev);
-}
-
-static CLASS_ATTR_STRING(abi_version, S_IRUGO,
- __stringify(IB_USER_CM_ABI_VERSION));
-
-static int __init ib_ucm_init(void)
-{
- int ret;
-
- ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES,
- "infiniband_cm");
- if (ret) {
- pr_err("ucm: couldn't register device number\n");
- goto error1;
- }
-
- ret = class_create_file(&cm_class, &class_attr_abi_version.attr);
- if (ret) {
- pr_err("ucm: couldn't create abi_version attribute\n");
- goto error2;
- }
-
- ret = ib_register_client(&ucm_client);
- if (ret) {
- pr_err("ucm: couldn't register client\n");
- goto error3;
- }
- return 0;
-
-error3:
- class_remove_file(&cm_class, &class_attr_abi_version.attr);
-error2:
- unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
-error1:
- return ret;
-}
-
-static void __exit ib_ucm_cleanup(void)
-{
- ib_unregister_client(&ucm_client);
- class_remove_file(&cm_class, &class_attr_abi_version.attr);
- unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
- if (overflow_maj)
- unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES);
- idr_destroy(&ctx_id_table);
-}
-
-module_init(ib_ucm_init);
-module_exit(ib_ucm_cleanup);
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index e12f8faf8c23..ec3be65a2b88 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -44,12 +44,17 @@
#include <linux/module.h>
#include <linux/nsproxy.h>
+#include <linux/nospec.h>
+
#include <rdma/rdma_user_cm.h>
#include <rdma/ib_marshall.h>
#include <rdma/rdma_cm.h>
#include <rdma/rdma_cm_ib.h>
#include <rdma/ib_addr.h>
#include <rdma/ib.h>
+#include <rdma/ib_cm.h>
+#include <rdma/rdma_netlink.h>
+#include "core_priv.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@@ -64,9 +69,10 @@ static struct ctl_table ucma_ctl_table[] = {
.data = &max_backlog,
.maxlen = sizeof max_backlog,
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
},
- { }
};
struct ucma_file {
@@ -75,34 +81,28 @@ struct ucma_file {
struct list_head ctx_list;
struct list_head event_list;
wait_queue_head_t poll_wait;
- struct workqueue_struct *close_wq;
};
struct ucma_context {
- int id;
+ u32 id;
struct completion comp;
- atomic_t ref;
+ refcount_t ref;
int events_reported;
- int backlog;
+ atomic_t backlog;
struct ucma_file *file;
struct rdma_cm_id *cm_id;
+ struct mutex mutex;
u64 uid;
struct list_head list;
struct list_head mc_list;
- /* mark that device is in process of destroying the internal HW
- * resources, protected by the global mut
- */
- int closing;
- /* sync between removal event and id destroy, protected by file mut */
- int destroying;
struct work_struct close_work;
};
struct ucma_multicast {
struct ucma_context *ctx;
- int id;
+ u32 id;
int events_reported;
u64 uid;
@@ -113,23 +113,24 @@ struct ucma_multicast {
struct ucma_event {
struct ucma_context *ctx;
+ struct ucma_context *conn_req_ctx;
struct ucma_multicast *mc;
struct list_head list;
- struct rdma_cm_id *cm_id;
struct rdma_ucm_event_resp resp;
- struct work_struct close_work;
};
-static DEFINE_MUTEX(mut);
-static DEFINE_IDR(ctx_idr);
-static DEFINE_IDR(multicast_idr);
+static DEFINE_XARRAY_ALLOC(ctx_table);
+static DEFINE_XARRAY_ALLOC(multicast_table);
+
+static const struct file_operations ucma_fops;
+static int ucma_destroy_private_ctx(struct ucma_context *ctx);
static inline struct ucma_context *_ucma_find_context(int id,
struct ucma_file *file)
{
struct ucma_context *ctx;
- ctx = idr_find(&ctx_idr, id);
+ ctx = xa_load(&ctx_table, id);
if (!ctx)
ctx = ERR_PTR(-ENOENT);
else if (ctx->file != file)
@@ -141,30 +142,36 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id)
{
struct ucma_context *ctx;
- mutex_lock(&mut);
+ xa_lock(&ctx_table);
ctx = _ucma_find_context(id, file);
- if (!IS_ERR(ctx)) {
- if (ctx->closing)
- ctx = ERR_PTR(-EIO);
- else
- atomic_inc(&ctx->ref);
- }
- mutex_unlock(&mut);
+ if (!IS_ERR(ctx))
+ if (!refcount_inc_not_zero(&ctx->ref))
+ ctx = ERR_PTR(-ENXIO);
+ xa_unlock(&ctx_table);
return ctx;
}
static void ucma_put_ctx(struct ucma_context *ctx)
{
- if (atomic_dec_and_test(&ctx->ref))
+ if (refcount_dec_and_test(&ctx->ref))
complete(&ctx->comp);
}
-static void ucma_close_event_id(struct work_struct *work)
+/*
+ * Same as ucm_get_ctx but requires that ->cm_id->device is valid, eg that the
+ * CM_ID is bound.
+ */
+static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id)
{
- struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work);
+ struct ucma_context *ctx = ucma_get_ctx(file, id);
- rdma_destroy_id(uevent_close->cm_id);
- kfree(uevent_close);
+ if (IS_ERR(ctx))
+ return ctx;
+ if (!ctx->cm_id->device) {
+ ucma_put_ctx(ctx);
+ return ERR_PTR(-EINVAL);
+ }
+ return ctx;
}
static void ucma_close_id(struct work_struct *work)
@@ -173,12 +180,15 @@ static void ucma_close_id(struct work_struct *work)
/* once all inflight tasks are finished, we close all underlying
* resources. The context is still alive till its explicit destryoing
- * by its creator.
+ * by its creator. This puts back the xarray's reference.
*/
ucma_put_ctx(ctx);
wait_for_completion(&ctx->comp);
/* No new events will be generated after destroying the id. */
rdma_destroy_id(ctx->cm_id);
+
+ /* Reading the cm_id without holding a positive ref is not allowed */
+ ctx->cm_id = NULL;
}
static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
@@ -190,46 +200,32 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
return NULL;
INIT_WORK(&ctx->close_work, ucma_close_id);
- atomic_set(&ctx->ref, 1);
init_completion(&ctx->comp);
INIT_LIST_HEAD(&ctx->mc_list);
+ /* So list_del() will work if we don't do ucma_finish_ctx() */
+ INIT_LIST_HEAD(&ctx->list);
ctx->file = file;
+ mutex_init(&ctx->mutex);
- mutex_lock(&mut);
- ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL);
- mutex_unlock(&mut);
- if (ctx->id < 0)
- goto error;
-
- list_add_tail(&ctx->list, &file->ctx_list);
+ if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) {
+ kfree(ctx);
+ return NULL;
+ }
return ctx;
-
-error:
- kfree(ctx);
- return NULL;
}
-static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
+static void ucma_set_ctx_cm_id(struct ucma_context *ctx,
+ struct rdma_cm_id *cm_id)
{
- struct ucma_multicast *mc;
-
- mc = kzalloc(sizeof(*mc), GFP_KERNEL);
- if (!mc)
- return NULL;
-
- mutex_lock(&mut);
- mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL);
- mutex_unlock(&mut);
- if (mc->id < 0)
- goto error;
-
- mc->ctx = ctx;
- list_add_tail(&mc->list, &ctx->mc_list);
- return mc;
+ refcount_set(&ctx->ref, 1);
+ ctx->cm_id = cm_id;
+}
-error:
- kfree(mc);
- return NULL;
+static void ucma_finish_ctx(struct ucma_context *ctx)
+{
+ lockdep_assert_held(&ctx->file->mut);
+ list_add_tail(&ctx->list, &ctx->file->ctx_list);
+ xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL);
}
static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
@@ -239,7 +235,7 @@ static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
memcpy(dst->private_data, src->private_data,
src->private_data_len);
dst->private_data_len = src->private_data_len;
- dst->responder_resources =src->responder_resources;
+ dst->responder_resources = src->responder_resources;
dst->initiator_depth = src->initiator_depth;
dst->flow_control = src->flow_control;
dst->retry_count = src->retry_count;
@@ -248,22 +244,28 @@ static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst,
dst->qp_num = src->qp_num;
}
-static void ucma_copy_ud_event(struct rdma_ucm_ud_param *dst,
+static void ucma_copy_ud_event(struct ib_device *device,
+ struct rdma_ucm_ud_param *dst,
struct rdma_ud_param *src)
{
if (src->private_data_len)
memcpy(dst->private_data, src->private_data,
src->private_data_len);
dst->private_data_len = src->private_data_len;
- ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
+ ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr);
dst->qp_num = src->qp_num;
dst->qkey = src->qkey;
}
-static void ucma_set_event_context(struct ucma_context *ctx,
- struct rdma_cm_event *event,
- struct ucma_event *uevent)
+static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx,
+ struct rdma_cm_event *event)
{
+ struct ucma_event *uevent;
+
+ uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
+ if (!uevent)
+ return NULL;
+
uevent->ctx = ctx;
switch (event->event) {
case RDMA_CM_EVENT_MULTICAST_JOIN:
@@ -278,44 +280,60 @@ static void ucma_set_event_context(struct ucma_context *ctx,
uevent->resp.id = ctx->id;
break;
}
+ uevent->resp.event = event->event;
+ uevent->resp.status = event->status;
+
+ if (event->event == RDMA_CM_EVENT_ADDRINFO_RESOLVED)
+ goto out;
+
+ if (ctx->cm_id->qp_type == IB_QPT_UD)
+ ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud,
+ &event->param.ud);
+ else
+ ucma_copy_conn_event(&uevent->resp.param.conn,
+ &event->param.conn);
+
+out:
+ uevent->resp.ece.vendor_id = event->ece.vendor_id;
+ uevent->resp.ece.attr_mod = event->ece.attr_mod;
+ return uevent;
}
-/* Called with file->mut locked for the relevant context. */
-static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
+static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
+ struct rdma_cm_event *event)
{
- struct ucma_context *ctx = cm_id->context;
- struct ucma_event *con_req_eve;
- int event_found = 0;
+ struct ucma_context *listen_ctx = cm_id->context;
+ struct ucma_context *ctx;
+ struct ucma_event *uevent;
- if (ctx->destroying)
- return;
+ if (!atomic_add_unless(&listen_ctx->backlog, -1, 0))
+ return -ENOMEM;
+ ctx = ucma_alloc_ctx(listen_ctx->file);
+ if (!ctx)
+ goto err_backlog;
+ ucma_set_ctx_cm_id(ctx, cm_id);
- /* only if context is pointing to cm_id that it owns it and can be
- * queued to be closed, otherwise that cm_id is an inflight one that
- * is part of that context event list pending to be detached and
- * reattached to its new context as part of ucma_get_event,
- * handled separately below.
- */
- if (ctx->cm_id == cm_id) {
- mutex_lock(&mut);
- ctx->closing = 1;
- mutex_unlock(&mut);
- queue_work(ctx->file->close_wq, &ctx->close_work);
- return;
- }
+ uevent = ucma_create_uevent(listen_ctx, event);
+ if (!uevent)
+ goto err_alloc;
+ uevent->conn_req_ctx = ctx;
+ uevent->resp.id = ctx->id;
- list_for_each_entry(con_req_eve, &ctx->file->event_list, list) {
- if (con_req_eve->cm_id == cm_id &&
- con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
- list_del(&con_req_eve->list);
- INIT_WORK(&con_req_eve->close_work, ucma_close_event_id);
- queue_work(ctx->file->close_wq, &con_req_eve->close_work);
- event_found = 1;
- break;
- }
- }
- if (!event_found)
- pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n");
+ ctx->cm_id->context = ctx;
+
+ mutex_lock(&ctx->file->mut);
+ ucma_finish_ctx(ctx);
+ list_add_tail(&uevent->list, &ctx->file->event_list);
+ mutex_unlock(&ctx->file->mut);
+ wake_up_interruptible(&ctx->file->poll_wait);
+ return 0;
+
+err_alloc:
+ ucma_destroy_private_ctx(ctx);
+err_backlog:
+ atomic_inc(&listen_ctx->backlog);
+ /* Returning error causes the new ID to be destroyed */
+ return -ENOMEM;
}
static int ucma_event_handler(struct rdma_cm_id *cm_id,
@@ -323,64 +341,49 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
{
struct ucma_event *uevent;
struct ucma_context *ctx = cm_id->context;
- int ret = 0;
- uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
- if (!uevent)
- return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
+ if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
+ return ucma_connect_event_handler(cm_id, event);
- mutex_lock(&ctx->file->mut);
- uevent->cm_id = cm_id;
- ucma_set_event_context(ctx, event, uevent);
- uevent->resp.event = event->event;
- uevent->resp.status = event->status;
- if (cm_id->qp_type == IB_QPT_UD)
- ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
- else
- ucma_copy_conn_event(&uevent->resp.param.conn,
- &event->param.conn);
-
- if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
- if (!ctx->backlog) {
- ret = -ENOMEM;
- kfree(uevent);
- goto out;
- }
- ctx->backlog--;
- } else if (!ctx->uid || ctx->cm_id != cm_id) {
- /*
- * We ignore events for new connections until userspace has set
- * their context. This can only happen if an error occurs on a
- * new connection before the user accepts it. This is okay,
- * since the accept will just fail later. However, we do need
- * to release the underlying HW resources in case of a device
- * removal event.
- */
- if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
- ucma_removal_event_handler(cm_id);
-
- kfree(uevent);
- goto out;
+ /*
+ * We ignore events for new connections until userspace has set their
+ * context. This can only happen if an error occurs on a new connection
+ * before the user accepts it. This is okay, since the accept will just
+ * fail later. However, we do need to release the underlying HW
+ * resources in case of a device removal event.
+ */
+ if (ctx->uid) {
+ uevent = ucma_create_uevent(ctx, event);
+ if (!uevent)
+ return 0;
+
+ mutex_lock(&ctx->file->mut);
+ list_add_tail(&uevent->list, &ctx->file->event_list);
+ mutex_unlock(&ctx->file->mut);
+ wake_up_interruptible(&ctx->file->poll_wait);
}
- list_add_tail(&uevent->list, &ctx->file->event_list);
- wake_up_interruptible(&ctx->file->poll_wait);
- if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL)
- ucma_removal_event_handler(cm_id);
-out:
- mutex_unlock(&ctx->file->mut);
- return ret;
+ if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
+ xa_lock(&ctx_table);
+ if (xa_load(&ctx_table, ctx->id) == ctx)
+ queue_work(system_dfl_wq, &ctx->close_work);
+ xa_unlock(&ctx_table);
+ }
+ return 0;
}
static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
- struct ucma_context *ctx;
struct rdma_ucm_get_event cmd;
struct ucma_event *uevent;
- int ret = 0;
- if (out_len < sizeof uevent->resp)
+ /*
+ * Old 32 bit user space does not send the 4 byte padding in the
+ * reserved field. We don't care, allow it to keep working.
+ */
+ if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved) -
+ sizeof(uevent->resp.ece))
return -ENOSPC;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
@@ -400,34 +403,25 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
mutex_lock(&file->mut);
}
- uevent = list_entry(file->event_list.next, struct ucma_event, list);
-
- if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) {
- ctx = ucma_alloc_ctx(file);
- if (!ctx) {
- ret = -ENOMEM;
- goto done;
- }
- uevent->ctx->backlog++;
- ctx->cm_id = uevent->cm_id;
- ctx->cm_id->context = ctx;
- uevent->resp.id = ctx->id;
- }
+ uevent = list_first_entry(&file->event_list, struct ucma_event, list);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &uevent->resp, sizeof uevent->resp)) {
- ret = -EFAULT;
- goto done;
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
+ &uevent->resp,
+ min_t(size_t, out_len, sizeof(uevent->resp)))) {
+ mutex_unlock(&file->mut);
+ return -EFAULT;
}
list_del(&uevent->list);
uevent->ctx->events_reported++;
if (uevent->mc)
uevent->mc->events_reported++;
- kfree(uevent);
-done:
+ if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
+ atomic_inc(&uevent->ctx->backlog);
mutex_unlock(&file->mut);
- return ret;
+
+ kfree(uevent);
+ return 0;
}
static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type)
@@ -454,6 +448,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
struct rdma_ucm_create_id cmd;
struct rdma_ucm_create_id_resp resp;
struct ucma_context *ctx;
+ struct rdma_cm_id *cm_id;
enum ib_qp_type qp_type;
int ret;
@@ -467,35 +462,32 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
if (ret)
return ret;
- mutex_lock(&file->mut);
ctx = ucma_alloc_ctx(file);
- mutex_unlock(&file->mut);
if (!ctx)
return -ENOMEM;
ctx->uid = cmd.uid;
- ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
- ucma_event_handler, ctx, cmd.ps, qp_type);
- if (IS_ERR(ctx->cm_id)) {
- ret = PTR_ERR(ctx->cm_id);
+ cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type);
+ if (IS_ERR(cm_id)) {
+ ret = PTR_ERR(cm_id);
goto err1;
}
+ ucma_set_ctx_cm_id(ctx, cm_id);
resp.id = ctx->id;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp))) {
ret = -EFAULT;
- goto err2;
+ goto err1;
}
+
+ mutex_lock(&file->mut);
+ ucma_finish_ctx(ctx);
+ mutex_unlock(&file->mut);
return 0;
-err2:
- rdma_destroy_id(ctx->cm_id);
err1:
- mutex_lock(&mut);
- idr_remove(&ctx_idr, ctx->id);
- mutex_unlock(&mut);
- kfree(ctx);
+ ucma_destroy_private_ctx(ctx);
return ret;
}
@@ -503,19 +495,25 @@ static void ucma_cleanup_multicast(struct ucma_context *ctx)
{
struct ucma_multicast *mc, *tmp;
- mutex_lock(&mut);
+ xa_lock(&multicast_table);
list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) {
list_del(&mc->list);
- idr_remove(&multicast_idr, mc->id);
+ /*
+ * At this point mc->ctx->ref is 0 so the mc cannot leave the
+ * lock on the reader and this is enough serialization
+ */
+ __xa_erase(&multicast_table, mc->id);
kfree(mc);
}
- mutex_unlock(&mut);
+ xa_unlock(&multicast_table);
}
static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
{
struct ucma_event *uevent, *tmp;
+ rdma_lock_handler(mc->ctx->cm_id);
+ mutex_lock(&mc->ctx->file->mut);
list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) {
if (uevent->mc != mc)
continue;
@@ -523,45 +521,75 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
list_del(&uevent->list);
kfree(uevent);
}
+ mutex_unlock(&mc->ctx->file->mut);
+ rdma_unlock_handler(mc->ctx->cm_id);
}
-/*
- * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
- * this point, no new events will be reported from the hardware. However, we
- * still need to cleanup the UCMA context for this ID. Specifically, there
- * might be events that have not yet been consumed by the user space software.
- * These might include pending connect requests which we have not completed
- * processing. We cannot call rdma_destroy_id while holding the lock of the
- * context (file->mut), as it might cause a deadlock. We therefore extract all
- * relevant events from the context pending events list while holding the
- * mutex. After that we release them as needed.
- */
-static int ucma_free_ctx(struct ucma_context *ctx)
+static int ucma_cleanup_ctx_events(struct ucma_context *ctx)
{
int events_reported;
struct ucma_event *uevent, *tmp;
LIST_HEAD(list);
-
- ucma_cleanup_multicast(ctx);
-
- /* Cleanup events not yet reported to the user. */
+ /* Cleanup events not yet reported to the user.*/
mutex_lock(&ctx->file->mut);
list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
- if (uevent->ctx == ctx)
+ if (uevent->ctx != ctx)
+ continue;
+
+ if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST &&
+ xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id,
+ uevent->conn_req_ctx, XA_ZERO_ENTRY,
+ GFP_KERNEL) == uevent->conn_req_ctx) {
list_move_tail(&uevent->list, &list);
+ continue;
+ }
+ list_del(&uevent->list);
+ kfree(uevent);
}
list_del(&ctx->list);
+ events_reported = ctx->events_reported;
mutex_unlock(&ctx->file->mut);
+ /*
+ * If this was a listening ID then any connections spawned from it that
+ * have not been delivered to userspace are cleaned up too. Must be done
+ * outside any locks.
+ */
list_for_each_entry_safe(uevent, tmp, &list, list) {
- list_del(&uevent->list);
- if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
- rdma_destroy_id(uevent->cm_id);
+ ucma_destroy_private_ctx(uevent->conn_req_ctx);
kfree(uevent);
}
+ return events_reported;
+}
- events_reported = ctx->events_reported;
+/*
+ * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie
+ * the ctx is not public to the user). This either because:
+ * - ucma_finish_ctx() hasn't been called
+ * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed)
+ */
+static int ucma_destroy_private_ctx(struct ucma_context *ctx)
+{
+ int events_reported;
+
+ /*
+ * Destroy the underlying cm_id. New work queuing is prevented now by
+ * the removal from the xarray. Once the work is cancled ref will either
+ * be 0 because the work ran to completion and consumed the ref from the
+ * xarray, or it will be positive because we still have the ref from the
+ * xarray. This can also be 0 in cases where cm_id was never set
+ */
+ cancel_work_sync(&ctx->close_work);
+ if (refcount_read(&ctx->ref))
+ ucma_close_id(&ctx->close_work);
+
+ events_reported = ucma_cleanup_ctx_events(ctx);
+ ucma_cleanup_multicast(ctx);
+
+ WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL,
+ GFP_KERNEL) != NULL);
+ mutex_destroy(&ctx->mutex);
kfree(ctx);
return events_reported;
}
@@ -580,34 +608,20 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- mutex_lock(&mut);
+ xa_lock(&ctx_table);
ctx = _ucma_find_context(cmd.id, file);
- if (!IS_ERR(ctx))
- idr_remove(&ctx_idr, ctx->id);
- mutex_unlock(&mut);
+ if (!IS_ERR(ctx)) {
+ if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
+ GFP_KERNEL) != ctx)
+ ctx = ERR_PTR(-ENOENT);
+ }
+ xa_unlock(&ctx_table);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- mutex_lock(&ctx->file->mut);
- ctx->destroying = 1;
- mutex_unlock(&ctx->file->mut);
-
- flush_workqueue(ctx->file->close_wq);
- /* At this point it's guaranteed that there is no inflight
- * closing task */
- mutex_lock(&mut);
- if (!ctx->closing) {
- mutex_unlock(&mut);
- ucma_put_ctx(ctx);
- wait_for_completion(&ctx->comp);
- rdma_destroy_id(ctx->cm_id);
- } else {
- mutex_unlock(&mut);
- }
-
- resp.events_reported = ucma_free_ctx(ctx);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ resp.events_reported = ucma_destroy_private_ctx(ctx);
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -624,11 +638,17 @@ static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
+ if (!rdma_addr_size_in6(&cmd.addr))
+ return -EINVAL;
+
ctx = ucma_get_ctx(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
+ mutex_unlock(&ctx->mutex);
+
ucma_put_ctx(ctx);
return ret;
}
@@ -637,22 +657,23 @@ static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
struct rdma_ucm_bind cmd;
- struct sockaddr *addr;
struct ucma_context *ctx;
int ret;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- addr = (struct sockaddr *) &cmd.addr;
- if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr)))
+ if (cmd.reserved || !cmd.addr_size ||
+ cmd.addr_size != rdma_addr_size_kss(&cmd.addr))
return -EINVAL;
ctx = ucma_get_ctx(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ret = rdma_bind_addr(ctx->cm_id, addr);
+ mutex_lock(&ctx->mutex);
+ ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -668,13 +689,18 @@ static ssize_t ucma_resolve_ip(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
+ if ((cmd.src_addr.sin6_family && !rdma_addr_size_in6(&cmd.src_addr)) ||
+ !rdma_addr_size_in6(&cmd.dst_addr))
+ return -EINVAL;
+
ctx = ucma_get_ctx(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
- (struct sockaddr *) &cmd.dst_addr,
- cmd.timeout_ms);
+ (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -684,24 +710,47 @@ static ssize_t ucma_resolve_addr(struct ucma_file *file,
int in_len, int out_len)
{
struct rdma_ucm_resolve_addr cmd;
- struct sockaddr *src, *dst;
struct ucma_context *ctx;
int ret;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- src = (struct sockaddr *) &cmd.src_addr;
- dst = (struct sockaddr *) &cmd.dst_addr;
- if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) ||
- !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst)))
+ if (cmd.reserved ||
+ (cmd.src_size && (cmd.src_size != rdma_addr_size_kss(&cmd.src_addr))) ||
+ !cmd.dst_size || (cmd.dst_size != rdma_addr_size_kss(&cmd.dst_addr)))
return -EINVAL;
ctx = ucma_get_ctx(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms);
+ mutex_lock(&ctx->mutex);
+ ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
+ (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms);
+ mutex_unlock(&ctx->mutex);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_resolve_ib_service(struct ucma_file *file,
+ const char __user *inbuf, int in_len,
+ int out_len)
+{
+ struct rdma_ucm_resolve_ib_service cmd;
+ struct ucma_context *ctx;
+ int ret;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ mutex_lock(&ctx->mutex);
+ ret = rdma_resolve_ib_service(ctx->cm_id, &cmd.ibs);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -717,11 +766,13 @@ static ssize_t ucma_resolve_route(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -731,8 +782,8 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
{
struct rdma_dev_addr *dev_addr;
- resp->num_paths = route->num_paths;
- switch (route->num_paths) {
+ resp->num_paths = route->num_pri_alt_paths;
+ switch (route->num_pri_alt_paths) {
case 0:
dev_addr = &route->addr.dev_addr;
rdma_addr_get_dgid(dev_addr,
@@ -744,7 +795,7 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
case 2:
ib_copy_path_rec_to_user(&resp->ib_route[1],
&route->path_rec[1]);
- /* fall through */
+ fallthrough;
case 1:
ib_copy_path_rec_to_user(&resp->ib_route[0],
&route->path_rec[0]);
@@ -758,8 +809,8 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
struct rdma_route *route)
{
- resp->num_paths = route->num_paths;
- switch (route->num_paths) {
+ resp->num_paths = route->num_pri_alt_paths;
+ switch (route->num_pri_alt_paths) {
case 0:
rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr,
(union ib_gid *)&resp->ib_route[0].dgid);
@@ -770,7 +821,7 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
case 2:
ib_copy_path_rec_to_user(&resp->ib_route[1],
&route->path_rec[1]);
- /* fall through */
+ fallthrough;
case 1:
ib_copy_path_rec_to_user(&resp->ib_route[0],
&route->path_rec[0]);
@@ -800,7 +851,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
struct sockaddr *addr;
int ret = 0;
- if (out_len < sizeof(resp))
+ if (out_len < offsetof(struct rdma_ucm_query_route_resp, ibdev_index))
return -ENOSPC;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
@@ -810,6 +861,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
memset(&resp, 0, sizeof resp);
addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ?
@@ -823,6 +875,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
goto out;
resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;
+ resp.ibdev_index = ctx->cm_id->device->index;
resp.port_num = ctx->cm_id->port_num;
if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num))
@@ -833,8 +886,9 @@ static ssize_t ucma_query_route(struct ucma_file *file,
ucma_copy_iw_route(&resp, &ctx->cm_id->route);
out:
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &resp, sizeof(resp)))
+ mutex_unlock(&ctx->mutex);
+ if (copy_to_user(u64_to_user_ptr(cmd.response), &resp,
+ min_t(size_t, out_len, sizeof(resp))))
ret = -EFAULT;
ucma_put_ctx(ctx);
@@ -848,6 +902,7 @@ static void ucma_query_device_addr(struct rdma_cm_id *cm_id,
return;
resp->node_guid = (__force __u64) cm_id->device->node_guid;
+ resp->ibdev_index = cm_id->device->index;
resp->port_num = cm_id->port_num;
resp->pkey = (__force __u16) cpu_to_be16(
ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
@@ -860,7 +915,7 @@ static ssize_t ucma_query_addr(struct ucma_context *ctx,
struct sockaddr *addr;
int ret = 0;
- if (out_len < sizeof(resp))
+ if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index))
return -ENOSPC;
memset(&resp, 0, sizeof resp);
@@ -875,7 +930,7 @@ static ssize_t ucma_query_addr(struct ucma_context *ctx,
ucma_query_device_addr(ctx->cm_id, &resp);
- if (copy_to_user(response, &resp, sizeof(resp)))
+ if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp))))
ret = -EFAULT;
return ret;
@@ -894,19 +949,26 @@ static ssize_t ucma_query_path(struct ucma_context *ctx,
if (!resp)
return -ENOMEM;
- resp->num_paths = ctx->cm_id->route.num_paths;
+ resp->num_paths = ctx->cm_id->route.num_pri_alt_paths;
for (i = 0, out_len -= sizeof(*resp);
i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
i++, out_len -= sizeof(struct ib_path_rec_data)) {
+ struct sa_path_rec *rec = &ctx->cm_id->route.path_rec[i];
resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
IB_PATH_BIDIRECTIONAL;
- ib_sa_pack_path(&ctx->cm_id->route.path_rec[i],
- &resp->path_data[i].path_rec);
+ if (rec->rec_type == SA_PATH_REC_TYPE_OPA) {
+ struct sa_path_rec ib;
+
+ sa_convert_path_opa_to_ib(&ib, rec);
+ ib_sa_pack_path(&ib, &resp->path_data[i].path_rec);
+
+ } else {
+ ib_sa_pack_path(rec, &resp->path_data[i].path_rec);
+ }
}
- if (copy_to_user(response, resp,
- sizeof(*resp) + (i * sizeof(struct ib_path_rec_data))))
+ if (copy_to_user(response, resp, struct_size(resp, path_data, i)))
ret = -EFAULT;
kfree(resp);
@@ -920,7 +982,7 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
struct sockaddr_ib *addr;
int ret = 0;
- if (out_len < sizeof(resp))
+ if (out_len < offsetof(struct rdma_ucm_query_addr_resp, ibdev_index))
return -ENOSPC;
memset(&resp, 0, sizeof resp);
@@ -934,8 +996,8 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
} else {
addr->sib_family = AF_IB;
addr->sib_pkey = (__force __be16) resp.pkey;
- rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr,
- (union ib_gid *) &addr->sib_addr);
+ rdma_read_gids(ctx->cm_id, (union ib_gid *)&addr->sib_addr,
+ NULL);
addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
&ctx->cm_id->route.addr.src_addr);
}
@@ -947,15 +1009,52 @@ static ssize_t ucma_query_gid(struct ucma_context *ctx,
} else {
addr->sib_family = AF_IB;
addr->sib_pkey = (__force __be16) resp.pkey;
- rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr,
- (union ib_gid *) &addr->sib_addr);
+ rdma_read_gids(ctx->cm_id, NULL,
+ (union ib_gid *)&addr->sib_addr);
addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
&ctx->cm_id->route.addr.dst_addr);
}
- if (copy_to_user(response, &resp, sizeof(resp)))
+ if (copy_to_user(response, &resp, min_t(size_t, out_len, sizeof(resp))))
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static ssize_t ucma_query_ib_service(struct ucma_context *ctx,
+ void __user *response, int out_len)
+{
+ struct rdma_ucm_query_ib_service_resp *resp;
+ int n, ret = 0;
+
+ if (out_len < sizeof(struct rdma_ucm_query_ib_service_resp))
+ return -ENOSPC;
+
+ if (!ctx->cm_id->route.service_recs)
+ return -ENODATA;
+
+ resp = kzalloc(out_len, GFP_KERNEL);
+ if (!resp)
+ return -ENOMEM;
+
+ resp->num_service_recs = ctx->cm_id->route.num_service_recs;
+
+ n = (out_len - sizeof(struct rdma_ucm_query_ib_service_resp)) /
+ sizeof(struct ib_user_service_rec);
+
+ if (!n)
+ goto out;
+
+ if (n > ctx->cm_id->route.num_service_recs)
+ n = ctx->cm_id->route.num_service_recs;
+
+ memcpy(resp->recs, ctx->cm_id->route.service_recs,
+ sizeof(*resp->recs) * n);
+ if (copy_to_user(response, resp, struct_size(resp, recs, n)))
ret = -EFAULT;
+out:
+ kfree(resp);
return ret;
}
@@ -971,11 +1070,12 @@ static ssize_t ucma_query(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- response = (void __user *)(unsigned long) cmd.response;
+ response = u64_to_user_ptr(cmd.response);
ctx = ucma_get_ctx(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
switch (cmd.option) {
case RDMA_USER_CM_QUERY_ADDR:
ret = ucma_query_addr(ctx, response, out_len);
@@ -986,10 +1086,14 @@ static ssize_t ucma_query(struct ucma_file *file,
case RDMA_USER_CM_QUERY_GID:
ret = ucma_query_gid(ctx, response, out_len);
break;
+ case RDMA_USER_CM_QUERY_IB_SERVICE:
+ ret = ucma_query_ib_service(ctx, response, out_len);
+ break;
default:
ret = -ENOSYS;
break;
}
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
@@ -1001,36 +1105,48 @@ static void ucma_copy_conn_param(struct rdma_cm_id *id,
{
dst->private_data = src->private_data;
dst->private_data_len = src->private_data_len;
- dst->responder_resources =src->responder_resources;
+ dst->responder_resources = src->responder_resources;
dst->initiator_depth = src->initiator_depth;
dst->flow_control = src->flow_control;
dst->retry_count = src->retry_count;
dst->rnr_retry_count = src->rnr_retry_count;
dst->srq = src->srq;
- dst->qp_num = src->qp_num;
+ dst->qp_num = src->qp_num & 0xFFFFFF;
dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;
}
static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
int in_len, int out_len)
{
- struct rdma_ucm_connect cmd;
struct rdma_conn_param conn_param;
+ struct rdma_ucm_ece ece = {};
+ struct rdma_ucm_connect cmd;
struct ucma_context *ctx;
+ size_t in_size;
int ret;
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ if (in_len < offsetofend(typeof(cmd), reserved))
+ return -EINVAL;
+ in_size = min_t(size_t, in_len, sizeof(cmd));
+ if (copy_from_user(&cmd, inbuf, in_size))
return -EFAULT;
if (!cmd.conn_param.valid)
return -EINVAL;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
- ret = rdma_connect(ctx->cm_id, &conn_param);
+ if (offsetofend(typeof(cmd), ece) <= in_size) {
+ ece.vendor_id = cmd.ece.vendor_id;
+ ece.attr_mod = cmd.ece.attr_mod;
+ }
+
+ mutex_lock(&ctx->mutex);
+ ret = rdma_connect_ece(ctx->cm_id, &conn_param, &ece);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1049,9 +1165,13 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ?
- cmd.backlog : max_backlog;
- ret = rdma_listen(ctx->cm_id, ctx->backlog);
+ if (cmd.backlog <= 0 || cmd.backlog > max_backlog)
+ cmd.backlog = max_backlog;
+ atomic_set(&ctx->backlog, cmd.backlog);
+
+ mutex_lock(&ctx->mutex);
+ ret = rdma_listen(ctx->cm_id, cmd.backlog);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1061,26 +1181,44 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
{
struct rdma_ucm_accept cmd;
struct rdma_conn_param conn_param;
+ struct rdma_ucm_ece ece = {};
struct ucma_context *ctx;
+ size_t in_size;
int ret;
- if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ if (in_len < offsetofend(typeof(cmd), reserved))
+ return -EINVAL;
+ in_size = min_t(size_t, in_len, sizeof(cmd));
+ if (copy_from_user(&cmd, inbuf, in_size))
return -EFAULT;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ if (offsetofend(typeof(cmd), ece) <= in_size) {
+ ece.vendor_id = cmd.ece.vendor_id;
+ ece.attr_mod = cmd.ece.attr_mod;
+ }
+
if (cmd.conn_param.valid) {
ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
- mutex_lock(&file->mut);
- ret = rdma_accept(ctx->cm_id, &conn_param);
- if (!ret)
+ mutex_lock(&ctx->mutex);
+ rdma_lock_handler(ctx->cm_id);
+ ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece);
+ if (!ret) {
+ /* The uid must be set atomically with the handler */
ctx->uid = cmd.uid;
- mutex_unlock(&file->mut);
- } else
- ret = rdma_accept(ctx->cm_id, NULL);
-
+ }
+ rdma_unlock_handler(ctx->cm_id);
+ mutex_unlock(&ctx->mutex);
+ } else {
+ mutex_lock(&ctx->mutex);
+ rdma_lock_handler(ctx->cm_id);
+ ret = rdma_accept_ece(ctx->cm_id, NULL, &ece);
+ rdma_unlock_handler(ctx->cm_id);
+ mutex_unlock(&ctx->mutex);
+ }
ucma_put_ctx(ctx);
return ret;
}
@@ -1095,11 +1233,25 @@ static ssize_t ucma_reject(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- ctx = ucma_get_ctx(file, cmd.id);
+ if (!cmd.reason)
+ cmd.reason = IB_CM_REJ_CONSUMER_DEFINED;
+
+ switch (cmd.reason) {
+ case IB_CM_REJ_CONSUMER_DEFINED:
+ case IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len);
+ mutex_lock(&ctx->mutex);
+ ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len,
+ cmd.reason);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1114,11 +1266,13 @@ static ssize_t ucma_disconnect(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- ctx = ucma_get_ctx(file, cmd.id);
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ mutex_lock(&ctx->mutex);
ret = rdma_disconnect(ctx->cm_id);
+ mutex_unlock(&ctx->mutex);
ucma_put_ctx(ctx);
return ret;
}
@@ -1139,19 +1293,24 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- ctx = ucma_get_ctx(file, cmd.id);
+ if (cmd.qp_state > IB_QPS_ERR)
+ return -EINVAL;
+
+ ctx = ucma_get_ctx_dev(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
resp.qp_attr_mask = 0;
memset(&qp_attr, 0, sizeof qp_attr);
qp_attr.qp_state = cmd.qp_state;
+ mutex_lock(&ctx->mutex);
ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask);
+ mutex_unlock(&ctx->mutex);
if (ret)
goto out;
- ib_copy_qp_attr_to_user(&resp, &qp_attr);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
@@ -1187,6 +1346,13 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
}
ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0);
break;
+ case RDMA_OPTION_ID_ACK_TIMEOUT:
+ if (optlen != sizeof(u8)) {
+ ret = -EINVAL;
+ break;
+ }
+ ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval));
+ break;
default:
ret = -ENOSYS;
}
@@ -1197,7 +1363,7 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
static int ucma_set_ib_path(struct ucma_context *ctx,
struct ib_path_rec_data *path_data, size_t optlen)
{
- struct ib_sa_path_rec sa_path;
+ struct sa_path_rec sa_path;
struct rdma_cm_event event;
int ret;
@@ -1213,10 +1379,26 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
if (!optlen)
return -EINVAL;
+ if (!ctx->cm_id->device)
+ return -EINVAL;
+
memset(&sa_path, 0, sizeof(sa_path));
+ sa_path.rec_type = SA_PATH_REC_TYPE_IB;
ib_sa_unpack_path(path_data->path_rec, &sa_path);
- ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+
+ if (rdma_cap_opa_ah(ctx->cm_id->device, ctx->cm_id->port_num)) {
+ struct sa_path_rec opa;
+
+ sa_convert_path_ib_to_opa(&opa, &sa_path);
+ mutex_lock(&ctx->mutex);
+ ret = rdma_set_ib_path(ctx->cm_id, &opa);
+ mutex_unlock(&ctx->mutex);
+ } else {
+ mutex_lock(&ctx->mutex);
+ ret = rdma_set_ib_path(ctx->cm_id, &sa_path);
+ mutex_unlock(&ctx->mutex);
+ }
if (ret)
return ret;
@@ -1248,7 +1430,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level,
switch (level) {
case RDMA_OPTION_ID:
+ mutex_lock(&ctx->mutex);
ret = ucma_set_option_id(ctx, optname, optval, optlen);
+ mutex_unlock(&ctx->mutex);
break;
case RDMA_OPTION_IB:
ret = ucma_set_option_ib(ctx, optname, optval, optlen);
@@ -1271,11 +1455,14 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
+ if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE))
+ return -EINVAL;
+
ctx = ucma_get_ctx(file, cmd.id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- optval = memdup_user((void __user *) (unsigned long) cmd.optval,
+ optval = memdup_user(u64_to_user_ptr(cmd.optval),
cmd.optlen);
if (IS_ERR(optval)) {
ret = PTR_ERR(optval);
@@ -1296,7 +1483,7 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
{
struct rdma_ucm_notify cmd;
struct ucma_context *ctx;
- int ret;
+ int ret = -EINVAL;
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
@@ -1305,7 +1492,11 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ret = rdma_notify(ctx->cm_id, (enum ib_event_type) cmd.event);
+ mutex_lock(&ctx->mutex);
+ if (ctx->cm_id->device)
+ ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event);
+ mutex_unlock(&ctx->mutex);
+
ucma_put_ctx(ctx);
return ret;
}
@@ -1324,7 +1515,7 @@ static ssize_t ucma_process_join(struct ucma_file *file,
return -ENOSPC;
addr = (struct sockaddr *) &cmd->addr;
- if (!cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
+ if (cmd->addr_size != rdma_addr_size(addr))
return -EINVAL;
if (cmd->join_flags == RDMA_MC_JOIN_FLAG_FULLMEMBER)
@@ -1334,46 +1525,63 @@ static ssize_t ucma_process_join(struct ucma_file *file,
else
return -EINVAL;
- ctx = ucma_get_ctx(file, cmd->id);
+ ctx = ucma_get_ctx_dev(file, cmd->id);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- mutex_lock(&file->mut);
- mc = ucma_alloc_multicast(ctx);
+ mc = kzalloc(sizeof(*mc), GFP_KERNEL);
if (!mc) {
ret = -ENOMEM;
- goto err1;
+ goto err_put_ctx;
}
+
+ mc->ctx = ctx;
mc->join_state = join_state;
mc->uid = cmd->uid;
memcpy(&mc->addr, addr, cmd->addr_size);
+
+ xa_lock(&multicast_table);
+ if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b,
+ GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_free_mc;
+ }
+
+ list_add_tail(&mc->list, &ctx->mc_list);
+ xa_unlock(&multicast_table);
+
+ mutex_lock(&ctx->mutex);
ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr,
join_state, mc);
+ mutex_unlock(&ctx->mutex);
if (ret)
- goto err2;
+ goto err_xa_erase;
resp.id = mc->id;
- if (copy_to_user((void __user *)(unsigned long) cmd->response,
+ if (copy_to_user(u64_to_user_ptr(cmd->response),
&resp, sizeof(resp))) {
ret = -EFAULT;
- goto err3;
+ goto err_leave_multicast;
}
- mutex_unlock(&file->mut);
+ xa_store(&multicast_table, mc->id, mc, 0);
+
ucma_put_ctx(ctx);
return 0;
-err3:
+err_leave_multicast:
+ mutex_lock(&ctx->mutex);
rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr);
+ mutex_unlock(&ctx->mutex);
ucma_cleanup_mc_events(mc);
-err2:
- mutex_lock(&mut);
- idr_remove(&multicast_idr, mc->id);
- mutex_unlock(&mut);
+err_xa_erase:
+ xa_lock(&multicast_table);
list_del(&mc->list);
+ __xa_erase(&multicast_table, mc->id);
+err_free_mc:
+ xa_unlock(&multicast_table);
kfree(mc);
-err1:
- mutex_unlock(&file->mut);
+err_put_ctx:
ucma_put_ctx(ctx);
return ret;
}
@@ -1391,7 +1599,10 @@ static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
join_cmd.response = cmd.response;
join_cmd.uid = cmd.uid;
join_cmd.id = cmd.id;
- join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr);
+ join_cmd.addr_size = rdma_addr_size_in6(&cmd.addr);
+ if (!join_cmd.addr_size)
+ return -EINVAL;
+
join_cmd.join_flags = RDMA_MC_JOIN_FLAG_FULLMEMBER;
memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
@@ -1407,6 +1618,9 @@ static ssize_t ucma_join_multicast(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
+ if (!rdma_addr_size_kss(&cmd.addr))
+ return -EINVAL;
+
return ucma_process_join(file, &cmd, out_len);
}
@@ -1425,80 +1639,51 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
- mutex_lock(&mut);
- mc = idr_find(&multicast_idr, cmd.id);
+ xa_lock(&multicast_table);
+ mc = xa_load(&multicast_table, cmd.id);
if (!mc)
mc = ERR_PTR(-ENOENT);
- else if (mc->ctx->file != file)
+ else if (READ_ONCE(mc->ctx->file) != file)
mc = ERR_PTR(-EINVAL);
- else if (!atomic_inc_not_zero(&mc->ctx->ref))
+ else if (!refcount_inc_not_zero(&mc->ctx->ref))
mc = ERR_PTR(-ENXIO);
- else
- idr_remove(&multicast_idr, mc->id);
- mutex_unlock(&mut);
if (IS_ERR(mc)) {
+ xa_unlock(&multicast_table);
ret = PTR_ERR(mc);
goto out;
}
+ list_del(&mc->list);
+ __xa_erase(&multicast_table, mc->id);
+ xa_unlock(&multicast_table);
+
+ mutex_lock(&mc->ctx->mutex);
rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr);
- mutex_lock(&mc->ctx->file->mut);
+ mutex_unlock(&mc->ctx->mutex);
+
ucma_cleanup_mc_events(mc);
- list_del(&mc->list);
- mutex_unlock(&mc->ctx->file->mut);
ucma_put_ctx(mc->ctx);
resp.events_reported = mc->events_reported;
kfree(mc);
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
out:
return ret;
}
-static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2)
-{
- /* Acquire mutex's based on pointer comparison to prevent deadlock. */
- if (file1 < file2) {
- mutex_lock(&file1->mut);
- mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING);
- } else {
- mutex_lock(&file2->mut);
- mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING);
- }
-}
-
-static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2)
-{
- if (file1 < file2) {
- mutex_unlock(&file2->mut);
- mutex_unlock(&file1->mut);
- } else {
- mutex_unlock(&file1->mut);
- mutex_unlock(&file2->mut);
- }
-}
-
-static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file)
-{
- struct ucma_event *uevent, *tmp;
-
- list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list)
- if (uevent->ctx == ctx)
- list_move_tail(&uevent->list, &file->event_list);
-}
-
static ssize_t ucma_migrate_id(struct ucma_file *new_file,
const char __user *inbuf,
int in_len, int out_len)
{
struct rdma_ucm_migrate_id cmd;
struct rdma_ucm_migrate_resp resp;
+ struct ucma_event *uevent, *tmp;
struct ucma_context *ctx;
- struct fd f;
+ LIST_HEAD(event_list);
struct ucma_file *cur_file;
int ret = 0;
@@ -1506,46 +1691,106 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
return -EFAULT;
/* Get current fd to protect against it being closed */
- f = fdget(cmd.fd);
- if (!f.file)
+ CLASS(fd, f)(cmd.fd);
+ if (fd_empty(f))
return -ENOENT;
+ if (fd_file(f)->f_op != &ucma_fops)
+ return -EINVAL;
+ cur_file = fd_file(f)->private_data;
/* Validate current fd and prevent destruction of id. */
- ctx = ucma_get_ctx(f.file->private_data, cmd.id);
- if (IS_ERR(ctx)) {
- ret = PTR_ERR(ctx);
- goto file_put;
- }
+ ctx = ucma_get_ctx(cur_file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
- cur_file = ctx->file;
- if (cur_file == new_file) {
- resp.events_reported = ctx->events_reported;
- goto response;
+ rdma_lock_handler(ctx->cm_id);
+ /*
+ * ctx->file can only be changed under the handler & xa_lock. xa_load()
+ * must be checked again to ensure the ctx hasn't begun destruction
+ * since the ucma_get_ctx().
+ */
+ xa_lock(&ctx_table);
+ if (_ucma_find_context(cmd.id, cur_file) != ctx) {
+ xa_unlock(&ctx_table);
+ ret = -ENOENT;
+ goto err_unlock;
}
+ ctx->file = new_file;
+ xa_unlock(&ctx_table);
+ mutex_lock(&cur_file->mut);
+ list_del(&ctx->list);
/*
- * Migrate events between fd's, maintaining order, and avoiding new
- * events being added before existing events.
+ * At this point lock_handler() prevents addition of new uevents for
+ * this ctx.
*/
- ucma_lock_files(cur_file, new_file);
- mutex_lock(&mut);
-
- list_move_tail(&ctx->list, &new_file->ctx_list);
- ucma_move_events(ctx, new_file);
- ctx->file = new_file;
+ list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list)
+ if (uevent->ctx == ctx)
+ list_move_tail(&uevent->list, &event_list);
resp.events_reported = ctx->events_reported;
+ mutex_unlock(&cur_file->mut);
- mutex_unlock(&mut);
- ucma_unlock_files(cur_file, new_file);
+ mutex_lock(&new_file->mut);
+ list_add_tail(&ctx->list, &new_file->ctx_list);
+ list_splice_tail(&event_list, &new_file->event_list);
+ mutex_unlock(&new_file->mut);
-response:
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
+ if (copy_to_user(u64_to_user_ptr(cmd.response),
&resp, sizeof(resp)))
ret = -EFAULT;
+err_unlock:
+ rdma_unlock_handler(ctx->cm_id);
+ ucma_put_ctx(ctx);
+ return ret;
+}
+
+static ssize_t ucma_write_cm_event(struct ucma_file *file,
+ const char __user *inbuf, int in_len,
+ int out_len)
+{
+ struct rdma_ucm_write_cm_event cmd;
+ struct rdma_cm_event event = {};
+ struct ucma_event *uevent;
+ struct ucma_context *ctx;
+ int ret = 0;
+
+ if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
+ return -EFAULT;
+
+ if ((cmd.event != RDMA_CM_EVENT_USER) &&
+ (cmd.event != RDMA_CM_EVENT_INTERNAL))
+ return -EINVAL;
+
+ ctx = ucma_get_ctx(file, cmd.id);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ event.event = cmd.event;
+ event.status = cmd.status;
+ event.param.arg = cmd.param.arg;
+
+ uevent = kzalloc(sizeof(*uevent), GFP_KERNEL);
+ if (!uevent) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ uevent->ctx = ctx;
+ uevent->resp.uid = ctx->uid;
+ uevent->resp.id = ctx->id;
+ uevent->resp.event = event.event;
+ uevent->resp.status = event.status;
+ memcpy(uevent->resp.param.arg32, &event.param.arg,
+ sizeof(event.param.arg));
+
+ mutex_lock(&ctx->file->mut);
+ list_add_tail(&uevent->list, &ctx->file->event_list);
+ mutex_unlock(&ctx->file->mut);
+ wake_up_interruptible(&ctx->file->poll_wait);
+
+out:
ucma_put_ctx(ctx);
-file_put:
- fdput(f);
return ret;
}
@@ -1574,7 +1819,9 @@ static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
[RDMA_USER_CM_CMD_QUERY] = ucma_query,
[RDMA_USER_CM_CMD_BIND] = ucma_bind,
[RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr,
- [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast
+ [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast,
+ [RDMA_USER_CM_CMD_RESOLVE_IB_SERVICE] = ucma_resolve_ib_service,
+ [RDMA_USER_CM_CMD_WRITE_CM_EVENT] = ucma_write_cm_event,
};
static ssize_t ucma_write(struct file *filp, const char __user *buf,
@@ -1585,8 +1832,8 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
ssize_t ret;
if (!ib_safe_file_access(filp)) {
- pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
- task_tgid_vnr(current), current->comm);
+ pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+ __func__, task_tgid_vnr(current), current->comm);
return -EACCES;
}
@@ -1598,6 +1845,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
return -EINVAL;
+ hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table));
if (hdr.in + sizeof(hdr) > len)
return -EINVAL;
@@ -1612,15 +1860,15 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
return ret;
}
-static unsigned int ucma_poll(struct file *filp, struct poll_table_struct *wait)
+static __poll_t ucma_poll(struct file *filp, struct poll_table_struct *wait)
{
struct ucma_file *file = filp->private_data;
- unsigned int mask = 0;
+ __poll_t mask = 0;
poll_wait(filp, &file->poll_wait, wait);
if (!list_empty(&file->event_list))
- mask = POLLIN | POLLRDNORM;
+ mask = EPOLLIN | EPOLLRDNORM;
return mask;
}
@@ -1641,13 +1889,6 @@ static int ucma_open(struct inode *inode, struct file *filp)
if (!file)
return -ENOMEM;
- file->close_wq = alloc_ordered_workqueue("ucma_close_id",
- WQ_MEM_RECLAIM);
- if (!file->close_wq) {
- kfree(file);
- return -ENOMEM;
- }
-
INIT_LIST_HEAD(&file->event_list);
INIT_LIST_HEAD(&file->ctx_list);
init_waitqueue_head(&file->poll_wait);
@@ -1656,44 +1897,29 @@ static int ucma_open(struct inode *inode, struct file *filp)
filp->private_data = file;
file->filp = filp;
- return nonseekable_open(inode, filp);
+ return stream_open(inode, filp);
}
static int ucma_close(struct inode *inode, struct file *filp)
{
struct ucma_file *file = filp->private_data;
- struct ucma_context *ctx, *tmp;
- mutex_lock(&file->mut);
- list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) {
- ctx->destroying = 1;
- mutex_unlock(&file->mut);
-
- mutex_lock(&mut);
- idr_remove(&ctx_idr, ctx->id);
- mutex_unlock(&mut);
-
- flush_workqueue(file->close_wq);
- /* At that step once ctx was marked as destroying and workqueue
- * was flushed we are safe from any inflights handlers that
- * might put other closing task.
- */
- mutex_lock(&mut);
- if (!ctx->closing) {
- mutex_unlock(&mut);
- /* rdma_destroy_id ensures that no event handlers are
- * inflight for that id before releasing it.
- */
- rdma_destroy_id(ctx->cm_id);
- } else {
- mutex_unlock(&mut);
- }
+ /*
+ * All paths that touch ctx_list or ctx_list starting from write() are
+ * prevented by this being a FD release function. The list_add_tail() in
+ * ucma_connect_event_handler() can run concurrently, however it only
+ * adds to the list *after* a listening ID. By only reading the first of
+ * the list, and relying on ucma_destroy_private_ctx() to block
+ * ucma_connect_event_handler(), no additional locking is needed.
+ */
+ while (!list_empty(&file->ctx_list)) {
+ struct ucma_context *ctx = list_first_entry(
+ &file->ctx_list, struct ucma_context, list);
- ucma_free_ctx(ctx);
- mutex_lock(&file->mut);
+ WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
+ GFP_KERNEL) != ctx);
+ ucma_destroy_private_ctx(ctx);
}
- mutex_unlock(&file->mut);
- destroy_workqueue(file->close_wq);
kfree(file);
return 0;
}
@@ -1704,7 +1930,6 @@ static const struct file_operations ucma_fops = {
.release = ucma_close,
.write = ucma_write,
.poll = ucma_poll,
- .llseek = no_llseek,
};
static struct miscdevice ucma_misc = {
@@ -1715,13 +1940,25 @@ static struct miscdevice ucma_misc = {
.fops = &ucma_fops,
};
-static ssize_t show_abi_version(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static int ucma_get_global_nl_info(struct ib_client_nl_info *res)
{
- return sprintf(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
+ res->abi = RDMA_USER_CM_ABI_VERSION;
+ res->cdev = ucma_misc.this_device;
+ return 0;
}
-static DEVICE_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+
+static struct ib_client rdma_cma_client = {
+ .name = "rdma_cm",
+ .get_global_nl_info = ucma_get_global_nl_info,
+};
+MODULE_ALIAS_RDMA_CLIENT("rdma_cm");
+
+static ssize_t abi_version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", RDMA_USER_CM_ABI_VERSION);
+}
+static DEVICE_ATTR_RO(abi_version);
static int __init ucma_init(void)
{
@@ -1743,7 +1980,14 @@ static int __init ucma_init(void)
ret = -ENOMEM;
goto err2;
}
+
+ ret = ib_register_client(&rdma_cma_client);
+ if (ret)
+ goto err3;
+
return 0;
+err3:
+ unregister_net_sysctl_table(ucma_ctl_table_hdr);
err2:
device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
err1:
@@ -1753,11 +1997,10 @@ err1:
static void __exit ucma_cleanup(void)
{
+ ib_unregister_client(&rdma_cma_client);
unregister_net_sysctl_table(ucma_ctl_table_hdr);
device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
misc_deregister(&ucma_misc);
- idr_destroy(&ctx_idr);
- idr_destroy(&multicast_idr);
}
module_init(ucma_init);
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 29a45d2f8898..8d3dfef9ebaa 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -41,7 +41,7 @@
#define STRUCT_FIELD(header, field) \
.struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \
- .struct_size_bytes = sizeof ((struct ib_unpacked_ ## header *) 0)->field, \
+ .struct_size_bytes = sizeof_field(struct ib_unpacked_ ## header, field), \
.field_name = #header ":" #field
static const struct ib_field lrh_table[] = {
@@ -462,86 +462,3 @@ int ib_ud_header_pack(struct ib_ud_header *header,
return len;
}
EXPORT_SYMBOL(ib_ud_header_pack);
-
-/**
- * ib_ud_header_unpack - Unpack UD header struct from wire format
- * @header:UD header struct
- * @buf:Buffer to pack into
- *
- * ib_ud_header_pack() unpacks the UD header structure @header from wire
- * format in the buffer @buf.
- */
-int ib_ud_header_unpack(void *buf,
- struct ib_ud_header *header)
-{
- ib_unpack(lrh_table, ARRAY_SIZE(lrh_table),
- buf, &header->lrh);
- buf += IB_LRH_BYTES;
-
- if (header->lrh.link_version != 0) {
- pr_warn("Invalid LRH.link_version %d\n",
- header->lrh.link_version);
- return -EINVAL;
- }
-
- switch (header->lrh.link_next_header) {
- case IB_LNH_IBA_LOCAL:
- header->grh_present = 0;
- break;
-
- case IB_LNH_IBA_GLOBAL:
- header->grh_present = 1;
- ib_unpack(grh_table, ARRAY_SIZE(grh_table),
- buf, &header->grh);
- buf += IB_GRH_BYTES;
-
- if (header->grh.ip_version != 6) {
- pr_warn("Invalid GRH.ip_version %d\n",
- header->grh.ip_version);
- return -EINVAL;
- }
- if (header->grh.next_header != 0x1b) {
- pr_warn("Invalid GRH.next_header 0x%02x\n",
- header->grh.next_header);
- return -EINVAL;
- }
- break;
-
- default:
- pr_warn("Invalid LRH.link_next_header %d\n",
- header->lrh.link_next_header);
- return -EINVAL;
- }
-
- ib_unpack(bth_table, ARRAY_SIZE(bth_table),
- buf, &header->bth);
- buf += IB_BTH_BYTES;
-
- switch (header->bth.opcode) {
- case IB_OPCODE_UD_SEND_ONLY:
- header->immediate_present = 0;
- break;
- case IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE:
- header->immediate_present = 1;
- break;
- default:
- pr_warn("Invalid BTH.opcode 0x%02x\n", header->bth.opcode);
- return -EINVAL;
- }
-
- if (header->bth.transport_header_version != 0) {
- pr_warn("Invalid BTH.transport_header_version %d\n",
- header->bth.transport_header_version);
- return -EINVAL;
- }
-
- ib_unpack(deth_table, ARRAY_SIZE(deth_table),
- buf, &header->deth);
- buf += IB_DETH_BYTES;
-
- if (header->immediate_present)
- memcpy(&header->immediate_data, buf, sizeof header->immediate_data);
-
- return 0;
-}
-EXPORT_SYMBOL(ib_ud_header_unpack);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 1e62a5f0cb28..8137031c2a65 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -2,6 +2,7 @@
* Copyright (c) 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005 Cisco Systems. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2020 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -34,73 +35,145 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
#include <linux/export.h>
-#include <linux/hugetlb.h>
#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/count_zeros.h>
#include <rdma/ib_umem_odp.h>
#include "uverbs.h"
+#define RESCHED_LOOP_CNT_THRESHOLD 0x1000
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
{
+ bool make_dirty = umem->writable && dirty;
struct scatterlist *sg;
- struct page *page;
+ unsigned int i;
+
+ if (dirty)
+ ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt,
+ DMA_BIDIRECTIONAL, 0);
+
+ for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) {
+ unpin_user_page_range_dirty_lock(sg_page(sg),
+ DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty);
+
+ if (i && !(i % RESCHED_LOOP_CNT_THRESHOLD))
+ cond_resched();
+ }
+
+ sg_free_append_table(&umem->sgt_append);
+}
+
+/**
+ * ib_umem_find_best_pgsz - Find best HW page size to use for this MR
+ *
+ * @umem: umem struct
+ * @pgsz_bitmap: bitmap of HW supported page sizes
+ * @virt: IOVA
+ *
+ * This helper is intended for HW that support multiple page
+ * sizes but can do only a single page size in an MR.
+ *
+ * Returns 0 if the umem requires page sizes not supported by
+ * the driver to be mapped. Drivers always supporting PAGE_SIZE
+ * or smaller will never see a 0 result.
+ */
+unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
+ unsigned long pgsz_bitmap,
+ unsigned long virt)
+{
+ unsigned long curr_len = 0;
+ dma_addr_t curr_base = ~0;
+ unsigned long va, pgoff;
+ struct scatterlist *sg;
+ dma_addr_t mask;
+ dma_addr_t end;
int i;
- if (umem->nmap > 0)
- ib_dma_unmap_sg(dev, umem->sg_head.sgl,
- umem->npages,
- DMA_BIDIRECTIONAL);
+ umem->iova = va = virt;
- for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
+ if (umem->is_odp) {
+ unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift);
- page = sg_page(sg);
- if (umem->writable && dirty)
- set_page_dirty_lock(page);
- put_page(page);
+ /* ODP must always be self consistent. */
+ if (!(pgsz_bitmap & page_size))
+ return 0;
+ return page_size;
}
- sg_free_table(&umem->sg_head);
- return;
+ /* The best result is the smallest page size that results in the minimum
+ * number of required pages. Compute the largest page size that could
+ * work based on VA address bits that don't change.
+ */
+ mask = pgsz_bitmap &
+ GENMASK(BITS_PER_LONG - 1,
+ bits_per((umem->length - 1 + virt) ^ virt));
+ /* offset into first SGL */
+ pgoff = umem->address & ~PAGE_MASK;
+
+ for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
+ /* If the current entry is physically contiguous with the previous
+ * one, no need to take its start addresses into consideration.
+ */
+ if (check_add_overflow(curr_base, curr_len, &end) ||
+ end != sg_dma_address(sg)) {
+
+ curr_base = sg_dma_address(sg);
+ curr_len = 0;
+
+ /* Reduce max page size if VA/PA bits differ */
+ mask |= (curr_base + pgoff) ^ va;
+
+ /* The alignment of any VA matching a discontinuity point
+ * in the physical memory sets the maximum possible page
+ * size as this must be a starting point of a new page that
+ * needs to be aligned.
+ */
+ if (i != 0)
+ mask |= va;
+ }
+
+ curr_len += sg_dma_len(sg);
+ va += sg_dma_len(sg) - pgoff;
+
+ pgoff = 0;
+ }
+ /* The mask accumulates 1's in each position where the VA and physical
+ * address differ, thus the length of trailing 0 is the largest page
+ * size that can pass the VA through to the physical.
+ */
+ if (mask)
+ pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0);
+ return pgsz_bitmap ? rounddown_pow_of_two(pgsz_bitmap) : 0;
}
+EXPORT_SYMBOL(ib_umem_find_best_pgsz);
/**
* ib_umem_get - Pin and DMA map userspace memory.
*
- * If access flags indicate ODP memory, avoid pinning. Instead, stores
- * the mm for future page fault handling in conjunction with MMU notifiers.
- *
- * @context: userspace context to pin memory for
+ * @device: IB device to connect UMEM
* @addr: userspace virtual address to start at
* @size: length of region to pin
* @access: IB_ACCESS_xxx flags for memory being pinned
- * @dmasync: flush in-flight DMA when the memory region is written
*/
-struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
- size_t size, int access, int dmasync)
+struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
+ size_t size, int access)
{
struct ib_umem *umem;
struct page **page_list;
- struct vm_area_struct **vma_list;
- unsigned long locked;
unsigned long lock_limit;
+ unsigned long new_pinned;
unsigned long cur_base;
+ unsigned long dma_attr = 0;
+ struct mm_struct *mm;
unsigned long npages;
- int ret;
- int i;
- unsigned long dma_attrs = 0;
- struct scatterlist *sg, *sg_list_start;
- int need_release = 0;
- unsigned int gup_flags = FOLL_WRITE;
-
- if (dmasync)
- dma_attrs |= DMA_ATTR_WRITE_BARRIER;
-
- if (!size)
- return ERR_PTR(-EINVAL);
+ int pinned, ret;
+ unsigned int gup_flags = FOLL_LONGTERM;
/*
* If the combination of the addr and size requested for this memory
@@ -113,226 +186,118 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (!can_do_mlock())
return ERR_PTR(-EPERM);
- umem = kzalloc(sizeof *umem, GFP_KERNEL);
+ if (access & IB_ACCESS_ON_DEMAND)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
-
- umem->context = context;
- umem->length = size;
- umem->address = addr;
- umem->page_size = PAGE_SIZE;
- umem->pid = get_task_pid(current, PIDTYPE_PID);
+ umem->ibdev = device;
+ umem->length = size;
+ umem->address = addr;
/*
- * We ask for writable memory if any of the following
- * access flags are set. "Local write" and "remote write"
- * obviously require write access. "Remote atomic" can do
- * things like fetch and add, which will modify memory, and
- * "MW bind" can change permissions by binding a window.
+ * Drivers should call ib_umem_find_best_pgsz() to set the iova
+ * correctly.
*/
- umem->writable = !!(access &
- (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
-
- if (access & IB_ACCESS_ON_DEMAND) {
- ret = ib_umem_odp_get(context, umem);
- if (ret) {
- kfree(umem);
- return ERR_PTR(ret);
- }
- return umem;
- }
-
- umem->odp_data = NULL;
-
- /* We assume the memory is from hugetlb until proved otherwise */
- umem->hugetlb = 1;
+ umem->iova = addr;
+ umem->writable = ib_access_writable(access);
+ umem->owning_mm = mm = current->mm;
+ mmgrab(mm);
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
- kfree(umem);
- return ERR_PTR(-ENOMEM);
+ ret = -ENOMEM;
+ goto umem_kfree;
}
- /*
- * if we can't alloc the vma_list, it's not so bad;
- * just assume the memory is not hugetlb memory
- */
- vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
- if (!vma_list)
- umem->hugetlb = 0;
-
npages = ib_umem_num_pages(umem);
+ if (npages == 0 || npages > UINT_MAX) {
+ ret = -EINVAL;
+ goto out;
+ }
- down_write(&current->mm->mmap_sem);
-
- locked = npages + current->mm->pinned_vm;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ new_pinned = atomic64_add_return(npages, &mm->pinned_vm);
+ if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
+ atomic64_sub(npages, &mm->pinned_vm);
ret = -ENOMEM;
goto out;
}
cur_base = addr & PAGE_MASK;
- if (npages == 0 || npages > UINT_MAX) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
- if (ret)
- goto out;
-
- if (!umem->writable)
- gup_flags |= FOLL_FORCE;
-
- need_release = 1;
- sg_list_start = umem->sg_head.sgl;
+ if (umem->writable)
+ gup_flags |= FOLL_WRITE;
while (npages) {
- ret = get_user_pages(cur_base,
- min_t(unsigned long, npages,
- PAGE_SIZE / sizeof (struct page *)),
- gup_flags, page_list, vma_list);
-
- if (ret < 0)
- goto out;
-
- umem->npages += ret;
- cur_base += ret * PAGE_SIZE;
- npages -= ret;
-
- for_each_sg(sg_list_start, sg, ret, i) {
- if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
- umem->hugetlb = 0;
-
- sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
+ cond_resched();
+ pinned = pin_user_pages_fast(cur_base,
+ min_t(unsigned long, npages,
+ PAGE_SIZE /
+ sizeof(struct page *)),
+ gup_flags, page_list);
+ if (pinned < 0) {
+ ret = pinned;
+ goto umem_release;
}
- /* preparing for next loop */
- sg_list_start = sg;
+ cur_base += pinned * PAGE_SIZE;
+ npages -= pinned;
+ ret = sg_alloc_append_table_from_pages(
+ &umem->sgt_append, page_list, pinned, 0,
+ pinned << PAGE_SHIFT, ib_dma_max_seg_size(device),
+ npages, GFP_KERNEL);
+ if (ret) {
+ unpin_user_pages_dirty_lock(page_list, pinned, 0);
+ goto umem_release;
+ }
}
- umem->nmap = ib_dma_map_sg_attrs(context->device,
- umem->sg_head.sgl,
- umem->npages,
- DMA_BIDIRECTIONAL,
- dma_attrs);
+ if (access & IB_ACCESS_RELAXED_ORDERING)
+ dma_attr |= DMA_ATTR_WEAK_ORDERING;
- if (umem->nmap <= 0) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = 0;
+ ret = ib_dma_map_sgtable_attrs(device, &umem->sgt_append.sgt,
+ DMA_BIDIRECTIONAL, dma_attr);
+ if (ret)
+ goto umem_release;
+ goto out;
+umem_release:
+ __ib_umem_release(device, umem, 0);
+ atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
out:
- if (ret < 0) {
- if (need_release)
- __ib_umem_release(context->device, umem, 0);
- put_pid(umem->pid);
- kfree(umem);
- } else
- current->mm->pinned_vm = locked;
-
- up_write(&current->mm->mmap_sem);
- if (vma_list)
- free_page((unsigned long) vma_list);
free_page((unsigned long) page_list);
-
- return ret < 0 ? ERR_PTR(ret) : umem;
+umem_kfree:
+ if (ret) {
+ mmdrop(umem->owning_mm);
+ kfree(umem);
+ }
+ return ret ? ERR_PTR(ret) : umem;
}
EXPORT_SYMBOL(ib_umem_get);
-static void ib_umem_account(struct work_struct *work)
-{
- struct ib_umem *umem = container_of(work, struct ib_umem, work);
-
- down_write(&umem->mm->mmap_sem);
- umem->mm->pinned_vm -= umem->diff;
- up_write(&umem->mm->mmap_sem);
- mmput(umem->mm);
- kfree(umem);
-}
-
/**
* ib_umem_release - release memory pinned with ib_umem_get
* @umem: umem struct to release
*/
void ib_umem_release(struct ib_umem *umem)
{
- struct ib_ucontext *context = umem->context;
- struct mm_struct *mm;
- struct task_struct *task;
- unsigned long diff;
-
- if (umem->odp_data) {
- ib_umem_odp_release(umem);
+ if (!umem)
return;
- }
+ if (umem->is_dmabuf)
+ return ib_umem_dmabuf_release(to_ib_umem_dmabuf(umem));
+ if (umem->is_odp)
+ return ib_umem_odp_release(to_ib_umem_odp(umem));
- __ib_umem_release(umem->context->device, umem, 1);
-
- task = get_pid_task(umem->pid, PIDTYPE_PID);
- put_pid(umem->pid);
- if (!task)
- goto out;
- mm = get_task_mm(task);
- put_task_struct(task);
- if (!mm)
- goto out;
-
- diff = ib_umem_num_pages(umem);
-
- /*
- * We may be called with the mm's mmap_sem already held. This
- * can happen when a userspace munmap() is the call that drops
- * the last reference to our file and calls our release
- * method. If there are memory regions to destroy, we'll end
- * up here and not be able to take the mmap_sem. In that case
- * we defer the vm_locked accounting to the system workqueue.
- */
- if (context->closing) {
- if (!down_write_trylock(&mm->mmap_sem)) {
- INIT_WORK(&umem->work, ib_umem_account);
- umem->mm = mm;
- umem->diff = diff;
-
- queue_work(ib_wq, &umem->work);
- return;
- }
- } else
- down_write(&mm->mmap_sem);
+ __ib_umem_release(umem->ibdev, umem, 1);
- mm->pinned_vm -= diff;
- up_write(&mm->mmap_sem);
- mmput(mm);
-out:
+ atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
+ mmdrop(umem->owning_mm);
kfree(umem);
}
EXPORT_SYMBOL(ib_umem_release);
-int ib_umem_page_count(struct ib_umem *umem)
-{
- int shift;
- int i;
- int n;
- struct scatterlist *sg;
-
- if (umem->odp_data)
- return ib_umem_num_pages(umem);
-
- shift = ilog2(umem->page_size);
-
- n = 0;
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
- n += sg_dma_len(sg) >> shift;
-
- return n;
-}
-EXPORT_SYMBOL(ib_umem_page_count);
-
/*
* Copy from the given ib_umem's pages to the given buffer.
*
@@ -350,12 +315,13 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
int ret;
if (offset > umem->length || length > umem->length - offset) {
- pr_err("ib_umem_copy_from not in range. offset: %zd umem length: %zd end: %zd\n",
- offset, umem->length, end);
+ pr_err("%s not in range. offset: %zd umem length: %zd end: %zd\n",
+ __func__, offset, umem->length, end);
return -EINVAL;
}
- ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->nmap, dst, length,
+ ret = sg_pcopy_to_buffer(umem->sgt_append.sgt.sgl,
+ umem->sgt_append.sgt.orig_nents, dst, length,
offset + ib_umem_offset(umem));
if (ret < 0)
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
new file mode 100644
index 000000000000..0ec2e4120cc9
--- /dev/null
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright (c) 2020 Intel Corporation. All rights reserved.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+
+#include "uverbs.h"
+
+MODULE_IMPORT_NS("DMA_BUF");
+
+int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ struct sg_table *sgt;
+ struct scatterlist *sg;
+ unsigned long start, end, cur = 0;
+ unsigned int nmap = 0;
+ long ret;
+ int i;
+
+ dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+
+ if (umem_dmabuf->revoked)
+ return -EINVAL;
+
+ if (umem_dmabuf->sgt)
+ goto wait_fence;
+
+ sgt = dma_buf_map_attachment(umem_dmabuf->attach,
+ DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt))
+ return PTR_ERR(sgt);
+
+ /* modify the sg list in-place to match umem address and length */
+
+ start = ALIGN_DOWN(umem_dmabuf->umem.address, PAGE_SIZE);
+ end = ALIGN(umem_dmabuf->umem.address + umem_dmabuf->umem.length,
+ PAGE_SIZE);
+ for_each_sgtable_dma_sg(sgt, sg, i) {
+ if (start < cur + sg_dma_len(sg) && cur < end)
+ nmap++;
+ if (cur <= start && start < cur + sg_dma_len(sg)) {
+ unsigned long offset = start - cur;
+
+ umem_dmabuf->first_sg = sg;
+ umem_dmabuf->first_sg_offset = offset;
+ sg_dma_address(sg) += offset;
+ sg_dma_len(sg) -= offset;
+ cur += offset;
+ }
+ if (cur < end && end <= cur + sg_dma_len(sg)) {
+ unsigned long trim = cur + sg_dma_len(sg) - end;
+
+ umem_dmabuf->last_sg = sg;
+ umem_dmabuf->last_sg_trim = trim;
+ sg_dma_len(sg) -= trim;
+ break;
+ }
+ cur += sg_dma_len(sg);
+ }
+
+ umem_dmabuf->umem.sgt_append.sgt.sgl = umem_dmabuf->first_sg;
+ umem_dmabuf->umem.sgt_append.sgt.nents = nmap;
+ umem_dmabuf->sgt = sgt;
+
+wait_fence:
+ /*
+ * Although the sg list is valid now, the content of the pages
+ * may be not up-to-date. Wait for the exporter to finish
+ * the migration.
+ */
+ ret = dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv,
+ DMA_RESV_USAGE_KERNEL,
+ false, MAX_SCHEDULE_TIMEOUT);
+ if (ret < 0)
+ return ret;
+ if (ret == 0)
+ return -ETIMEDOUT;
+ return 0;
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
+
+void ib_umem_dmabuf_unmap_pages(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+
+ if (!umem_dmabuf->sgt)
+ return;
+
+ /* retore the original sg list */
+ if (umem_dmabuf->first_sg) {
+ sg_dma_address(umem_dmabuf->first_sg) -=
+ umem_dmabuf->first_sg_offset;
+ sg_dma_len(umem_dmabuf->first_sg) +=
+ umem_dmabuf->first_sg_offset;
+ umem_dmabuf->first_sg = NULL;
+ umem_dmabuf->first_sg_offset = 0;
+ }
+ if (umem_dmabuf->last_sg) {
+ sg_dma_len(umem_dmabuf->last_sg) +=
+ umem_dmabuf->last_sg_trim;
+ umem_dmabuf->last_sg = NULL;
+ umem_dmabuf->last_sg_trim = 0;
+ }
+
+ dma_buf_unmap_attachment(umem_dmabuf->attach, umem_dmabuf->sgt,
+ DMA_BIDIRECTIONAL);
+
+ umem_dmabuf->sgt = NULL;
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_unmap_pages);
+
+static struct ib_umem_dmabuf *
+ib_umem_dmabuf_get_with_dma_device(struct ib_device *device,
+ struct device *dma_device,
+ unsigned long offset, size_t size,
+ int fd, int access,
+ const struct dma_buf_attach_ops *ops)
+{
+ struct dma_buf *dmabuf;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct ib_umem *umem;
+ unsigned long end;
+ struct ib_umem_dmabuf *ret = ERR_PTR(-EINVAL);
+
+ if (check_add_overflow(offset, (unsigned long)size, &end))
+ return ret;
+
+ if (unlikely(!ops || !ops->move_notify))
+ return ret;
+
+ dmabuf = dma_buf_get(fd);
+ if (IS_ERR(dmabuf))
+ return ERR_CAST(dmabuf);
+
+ if (dmabuf->size < end)
+ goto out_release_dmabuf;
+
+ umem_dmabuf = kzalloc(sizeof(*umem_dmabuf), GFP_KERNEL);
+ if (!umem_dmabuf) {
+ ret = ERR_PTR(-ENOMEM);
+ goto out_release_dmabuf;
+ }
+
+ umem = &umem_dmabuf->umem;
+ umem->ibdev = device;
+ umem->length = size;
+ umem->address = offset;
+ umem->writable = ib_access_writable(access);
+ umem->is_dmabuf = 1;
+
+ if (!ib_umem_num_pages(umem))
+ goto out_free_umem;
+
+ umem_dmabuf->attach = dma_buf_dynamic_attach(
+ dmabuf,
+ dma_device,
+ ops,
+ umem_dmabuf);
+ if (IS_ERR(umem_dmabuf->attach)) {
+ ret = ERR_CAST(umem_dmabuf->attach);
+ goto out_free_umem;
+ }
+ return umem_dmabuf;
+
+out_free_umem:
+ kfree(umem_dmabuf);
+
+out_release_dmabuf:
+ dma_buf_put(dmabuf);
+ return ret;
+}
+
+struct ib_umem_dmabuf *ib_umem_dmabuf_get(struct ib_device *device,
+ unsigned long offset, size_t size,
+ int fd, int access,
+ const struct dma_buf_attach_ops *ops)
+{
+ return ib_umem_dmabuf_get_with_dma_device(device, device->dma_device,
+ offset, size, fd, access, ops);
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_get);
+
+static void
+ib_umem_dmabuf_unsupported_move_notify(struct dma_buf_attachment *attach)
+{
+ struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv;
+
+ ibdev_warn_ratelimited(umem_dmabuf->umem.ibdev,
+ "Invalidate callback should not be called when memory is pinned\n");
+}
+
+static struct dma_buf_attach_ops ib_umem_dmabuf_attach_pinned_ops = {
+ .allow_peer2peer = true,
+ .move_notify = ib_umem_dmabuf_unsupported_move_notify,
+};
+
+struct ib_umem_dmabuf *
+ib_umem_dmabuf_get_pinned_with_dma_device(struct ib_device *device,
+ struct device *dma_device,
+ unsigned long offset, size_t size,
+ int fd, int access)
+{
+ struct ib_umem_dmabuf *umem_dmabuf;
+ int err;
+
+ umem_dmabuf = ib_umem_dmabuf_get_with_dma_device(device, dma_device, offset,
+ size, fd, access,
+ &ib_umem_dmabuf_attach_pinned_ops);
+ if (IS_ERR(umem_dmabuf))
+ return umem_dmabuf;
+
+ dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL);
+ err = dma_buf_pin(umem_dmabuf->attach);
+ if (err)
+ goto err_release;
+ umem_dmabuf->pinned = 1;
+
+ err = ib_umem_dmabuf_map_pages(umem_dmabuf);
+ if (err)
+ goto err_unpin;
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
+
+ return umem_dmabuf;
+
+err_unpin:
+ dma_buf_unpin(umem_dmabuf->attach);
+err_release:
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
+ ib_umem_release(&umem_dmabuf->umem);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_get_pinned_with_dma_device);
+
+struct ib_umem_dmabuf *ib_umem_dmabuf_get_pinned(struct ib_device *device,
+ unsigned long offset,
+ size_t size, int fd,
+ int access)
+{
+ return ib_umem_dmabuf_get_pinned_with_dma_device(device, device->dma_device,
+ offset, size, fd, access);
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_get_pinned);
+
+void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf;
+
+ dma_resv_lock(dmabuf->resv, NULL);
+ if (umem_dmabuf->revoked)
+ goto end;
+ ib_umem_dmabuf_unmap_pages(umem_dmabuf);
+ if (umem_dmabuf->pinned) {
+ dma_buf_unpin(umem_dmabuf->attach);
+ umem_dmabuf->pinned = 0;
+ }
+ umem_dmabuf->revoked = 1;
+end:
+ dma_resv_unlock(dmabuf->resv);
+}
+EXPORT_SYMBOL(ib_umem_dmabuf_revoke);
+
+void ib_umem_dmabuf_release(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ struct dma_buf *dmabuf = umem_dmabuf->attach->dmabuf;
+
+ ib_umem_dmabuf_revoke(umem_dmabuf);
+
+ dma_buf_detach(dmabuf, umem_dmabuf->attach);
+ dma_buf_put(dmabuf);
+ kfree(umem_dmabuf);
+}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 6b079a31dced..572a91a62a7b 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -32,324 +32,248 @@
#include <linux/types.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
#include <linux/pid.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/vmalloc.h>
+#include <linux/hugetlb.h>
+#include <linux/interval_tree.h>
+#include <linux/hmm.h>
+#include <linux/hmm-dma.h>
+#include <linux/pagemap.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
-static void ib_umem_notifier_start_account(struct ib_umem *item)
-{
- mutex_lock(&item->odp_data->umem_mutex);
-
- /* Only update private counters for this umem if it has them.
- * Otherwise skip it. All page faults will be delayed for this umem. */
- if (item->odp_data->mn_counters_active) {
- int notifiers_count = item->odp_data->notifiers_count++;
-
- if (notifiers_count == 0)
- /* Initialize the completion object for waiting on
- * notifiers. Since notifier_count is zero, no one
- * should be waiting right now. */
- reinit_completion(&item->odp_data->notifier_completion);
- }
- mutex_unlock(&item->odp_data->umem_mutex);
-}
-
-static void ib_umem_notifier_end_account(struct ib_umem *item)
-{
- mutex_lock(&item->odp_data->umem_mutex);
-
- /* Only update private counters for this umem if it has them.
- * Otherwise skip it. All page faults will be delayed for this umem. */
- if (item->odp_data->mn_counters_active) {
- /*
- * This sequence increase will notify the QP page fault that
- * the page that is going to be mapped in the spte could have
- * been freed.
- */
- ++item->odp_data->notifiers_seq;
- if (--item->odp_data->notifiers_count == 0)
- complete_all(&item->odp_data->notifier_completion);
- }
- mutex_unlock(&item->odp_data->umem_mutex);
-}
+#include "uverbs.h"
-/* Account for a new mmu notifier in an ib_ucontext. */
-static void ib_ucontext_notifier_start_account(struct ib_ucontext *context)
+static void ib_init_umem_implicit_odp(struct ib_umem_odp *umem_odp)
{
- atomic_inc(&context->notifier_count);
+ umem_odp->is_implicit_odp = 1;
+ umem_odp->umem.is_odp = 1;
+ mutex_init(&umem_odp->umem_mutex);
}
-/* Account for a terminating mmu notifier in an ib_ucontext.
- *
- * Must be called with the ib_ucontext->umem_rwsem semaphore unlocked, since
- * the function takes the semaphore itself. */
-static void ib_ucontext_notifier_end_account(struct ib_ucontext *context)
+static int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
+ const struct mmu_interval_notifier_ops *ops)
{
- int zero_notifiers = atomic_dec_and_test(&context->notifier_count);
-
- if (zero_notifiers &&
- !list_empty(&context->no_private_counters)) {
- /* No currently running mmu notifiers. Now is the chance to
- * add private accounting to all previously added umems. */
- struct ib_umem_odp *odp_data, *next;
-
- /* Prevent concurrent mmu notifiers from working on the
- * no_private_counters list. */
- down_write(&context->umem_rwsem);
-
- /* Read the notifier_count again, with the umem_rwsem
- * semaphore taken for write. */
- if (!atomic_read(&context->notifier_count)) {
- list_for_each_entry_safe(odp_data, next,
- &context->no_private_counters,
- no_private_counters) {
- mutex_lock(&odp_data->umem_mutex);
- odp_data->mn_counters_active = true;
- list_del(&odp_data->no_private_counters);
- complete_all(&odp_data->notifier_completion);
- mutex_unlock(&odp_data->umem_mutex);
- }
- }
+ struct ib_device *dev = umem_odp->umem.ibdev;
+ size_t page_size = 1UL << umem_odp->page_shift;
+ struct hmm_dma_map *map;
+ unsigned long start;
+ unsigned long end;
+ size_t nr_entries;
+ int ret = 0;
- up_write(&context->umem_rwsem);
- }
-}
+ umem_odp->umem.is_odp = 1;
+ mutex_init(&umem_odp->umem_mutex);
-static int ib_umem_notifier_release_trampoline(struct ib_umem *item, u64 start,
- u64 end, void *cookie) {
+ start = ALIGN_DOWN(umem_odp->umem.address, page_size);
+ if (check_add_overflow(umem_odp->umem.address,
+ (unsigned long)umem_odp->umem.length, &end))
+ return -EOVERFLOW;
+ end = ALIGN(end, page_size);
+ if (unlikely(end < page_size))
+ return -EOVERFLOW;
/*
- * Increase the number of notifiers running, to
- * prevent any further fault handling on this MR.
+ * The mmu notifier can be called within reclaim contexts and takes the
+ * umem_mutex. This is rare to trigger in testing, teach lockdep about
+ * it.
*/
- ib_umem_notifier_start_account(item);
- item->odp_data->dying = 1;
- /* Make sure that the fact the umem is dying is out before we release
- * all pending page faults. */
- smp_wmb();
- complete_all(&item->odp_data->notifier_completion);
- item->context->invalidate_range(item, ib_umem_start(item),
- ib_umem_end(item));
- return 0;
-}
-
-static void ib_umem_notifier_release(struct mmu_notifier *mn,
- struct mm_struct *mm)
-{
- struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
-
- if (!context->invalidate_range)
- return;
-
- ib_ucontext_notifier_start_account(context);
- down_read(&context->umem_rwsem);
- rbt_ib_umem_for_each_in_range(&context->umem_tree, 0,
- ULLONG_MAX,
- ib_umem_notifier_release_trampoline,
- NULL);
- up_read(&context->umem_rwsem);
-}
+ if (IS_ENABLED(CONFIG_LOCKDEP)) {
+ fs_reclaim_acquire(GFP_KERNEL);
+ mutex_lock(&umem_odp->umem_mutex);
+ mutex_unlock(&umem_odp->umem_mutex);
+ fs_reclaim_release(GFP_KERNEL);
+ }
-static int invalidate_page_trampoline(struct ib_umem *item, u64 start,
- u64 end, void *cookie)
-{
- ib_umem_notifier_start_account(item);
- item->context->invalidate_range(item, start, start + PAGE_SIZE);
- ib_umem_notifier_end_account(item);
- return 0;
-}
+ nr_entries = (end - start) >> PAGE_SHIFT;
+ if (!(nr_entries * PAGE_SIZE / page_size))
+ return -EINVAL;
-static void ib_umem_notifier_invalidate_page(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long address)
-{
- struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
-
- if (!context->invalidate_range)
- return;
-
- ib_ucontext_notifier_start_account(context);
- down_read(&context->umem_rwsem);
- rbt_ib_umem_for_each_in_range(&context->umem_tree, address,
- address + PAGE_SIZE,
- invalidate_page_trampoline, NULL);
- up_read(&context->umem_rwsem);
- ib_ucontext_notifier_end_account(context);
-}
+ map = &umem_odp->map;
+ if (ib_uses_virt_dma(dev)) {
+ map->pfn_list = kvcalloc(nr_entries, sizeof(*map->pfn_list),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!map->pfn_list)
+ ret = -ENOMEM;
+ } else
+ ret = hmm_dma_map_alloc(dev->dma_device, map,
+ (end - start) >> PAGE_SHIFT,
+ 1 << umem_odp->page_shift);
+ if (ret)
+ return ret;
+
+ ret = mmu_interval_notifier_insert(&umem_odp->notifier,
+ umem_odp->umem.owning_mm, start,
+ end - start, ops);
+ if (ret)
+ goto out_free_map;
-static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
- u64 end, void *cookie)
-{
- ib_umem_notifier_start_account(item);
- item->context->invalidate_range(item, start, end);
return 0;
-}
-
-static void ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
-{
- struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
- if (!context->invalidate_range)
- return;
-
- ib_ucontext_notifier_start_account(context);
- down_read(&context->umem_rwsem);
- rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
- end,
- invalidate_range_start_trampoline, NULL);
- up_read(&context->umem_rwsem);
-}
-
-static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start,
- u64 end, void *cookie)
-{
- ib_umem_notifier_end_account(item);
- return 0;
+out_free_map:
+ if (ib_uses_virt_dma(dev))
+ kvfree(map->pfn_list);
+ else
+ hmm_dma_map_free(dev->dma_device, map);
+ return ret;
}
-static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
+/**
+ * ib_umem_odp_alloc_implicit - Allocate a parent implicit ODP umem
+ *
+ * Implicit ODP umems do not have a VA range and do not have any page lists.
+ * They exist only to hold the per_mm reference to help the driver create
+ * children umems.
+ *
+ * @device: IB device to create UMEM
+ * @access: ib_reg_mr access flags
+ */
+struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device,
+ int access)
{
- struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
-
- if (!context->invalidate_range)
- return;
-
- down_read(&context->umem_rwsem);
- rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
- end,
- invalidate_range_end_trampoline, NULL);
- up_read(&context->umem_rwsem);
- ib_ucontext_notifier_end_account(context);
+ struct ib_umem *umem;
+ struct ib_umem_odp *umem_odp;
+
+ if (access & IB_ACCESS_HUGETLB)
+ return ERR_PTR(-EINVAL);
+
+ umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL);
+ if (!umem_odp)
+ return ERR_PTR(-ENOMEM);
+ umem = &umem_odp->umem;
+ umem->ibdev = device;
+ umem->writable = ib_access_writable(access);
+ umem->owning_mm = current->mm;
+ umem_odp->page_shift = PAGE_SHIFT;
+
+ umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
+ ib_init_umem_implicit_odp(umem_odp);
+ return umem_odp;
}
+EXPORT_SYMBOL(ib_umem_odp_alloc_implicit);
-static const struct mmu_notifier_ops ib_umem_notifiers = {
- .release = ib_umem_notifier_release,
- .invalidate_page = ib_umem_notifier_invalidate_page,
- .invalidate_range_start = ib_umem_notifier_invalidate_range_start,
- .invalidate_range_end = ib_umem_notifier_invalidate_range_end,
-};
-
-int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
+/**
+ * ib_umem_odp_alloc_child - Allocate a child ODP umem under an implicit
+ * parent ODP umem
+ *
+ * @root: The parent umem enclosing the child. This must be allocated using
+ * ib_alloc_implicit_odp_umem()
+ * @addr: The starting userspace VA
+ * @size: The length of the userspace VA
+ * @ops: MMU interval ops, currently only @invalidate
+ */
+struct ib_umem_odp *
+ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr,
+ size_t size,
+ const struct mmu_interval_notifier_ops *ops)
{
- int ret_val;
- struct pid *our_pid;
- struct mm_struct *mm = get_task_mm(current);
-
- if (!mm)
- return -EINVAL;
-
- /* Prevent creating ODP MRs in child processes */
- rcu_read_lock();
- our_pid = get_task_pid(current->group_leader, PIDTYPE_PID);
- rcu_read_unlock();
- put_pid(our_pid);
- if (context->tgid != our_pid) {
- ret_val = -EINVAL;
- goto out_mm;
- }
-
- umem->hugetlb = 0;
- umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL);
- if (!umem->odp_data) {
- ret_val = -ENOMEM;
- goto out_mm;
- }
- umem->odp_data->umem = umem;
-
- mutex_init(&umem->odp_data->umem_mutex);
-
- init_completion(&umem->odp_data->notifier_completion);
-
- umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
- sizeof(*umem->odp_data->page_list));
- if (!umem->odp_data->page_list) {
- ret_val = -ENOMEM;
- goto out_odp_data;
- }
-
- umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
- sizeof(*umem->odp_data->dma_list));
- if (!umem->odp_data->dma_list) {
- ret_val = -ENOMEM;
- goto out_page_list;
- }
-
/*
- * When using MMU notifiers, we will get a
- * notification before the "current" task (and MM) is
- * destroyed. We use the umem_rwsem semaphore to synchronize.
+ * Caller must ensure that root cannot be freed during the call to
+ * ib_alloc_odp_umem.
*/
- down_write(&context->umem_rwsem);
- context->odp_mrs_count++;
- if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
- rbt_ib_umem_insert(&umem->odp_data->interval_tree,
- &context->umem_tree);
- if (likely(!atomic_read(&context->notifier_count)) ||
- context->odp_mrs_count == 1)
- umem->odp_data->mn_counters_active = true;
- else
- list_add(&umem->odp_data->no_private_counters,
- &context->no_private_counters);
- downgrade_write(&context->umem_rwsem);
-
- if (context->odp_mrs_count == 1) {
- /*
- * Note that at this point, no MMU notifier is running
- * for this context!
- */
- atomic_set(&context->notifier_count, 0);
- INIT_HLIST_NODE(&context->mn.hlist);
- context->mn.ops = &ib_umem_notifiers;
- /*
- * Lock-dep detects a false positive for mmap_sem vs.
- * umem_rwsem, due to not grasping downgrade_write correctly.
- */
- lockdep_off();
- ret_val = mmu_notifier_register(&context->mn, mm);
- lockdep_on();
- if (ret_val) {
- pr_err("Failed to register mmu_notifier %d\n", ret_val);
- ret_val = -EBUSY;
- goto out_mutex;
- }
- }
-
- up_read(&context->umem_rwsem);
+ struct ib_umem_odp *odp_data;
+ struct ib_umem *umem;
+ int ret;
+
+ if (WARN_ON(!root->is_implicit_odp))
+ return ERR_PTR(-EINVAL);
+
+ odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
+ if (!odp_data)
+ return ERR_PTR(-ENOMEM);
+ umem = &odp_data->umem;
+ umem->ibdev = root->umem.ibdev;
+ umem->length = size;
+ umem->address = addr;
+ umem->writable = root->umem.writable;
+ umem->owning_mm = root->umem.owning_mm;
+ odp_data->page_shift = PAGE_SHIFT;
+ odp_data->notifier.ops = ops;
/*
- * Note that doing an mmput can cause a notifier for the relevant mm.
- * If the notifier is called while we hold the umem_rwsem, this will
- * cause a deadlock. Therefore, we release the reference only after we
- * released the semaphore.
+ * A mmget must be held when registering a notifier, the owming_mm only
+ * has a mm_grab at this point.
*/
- mmput(mm);
- return 0;
+ if (!mmget_not_zero(umem->owning_mm)) {
+ ret = -EFAULT;
+ goto out_free;
+ }
-out_mutex:
- up_read(&context->umem_rwsem);
- vfree(umem->odp_data->dma_list);
-out_page_list:
- vfree(umem->odp_data->page_list);
-out_odp_data:
- kfree(umem->odp_data);
-out_mm:
- mmput(mm);
- return ret_val;
+ odp_data->tgid = get_pid(root->tgid);
+ ret = ib_init_umem_odp(odp_data, ops);
+ if (ret)
+ goto out_tgid;
+ mmput(umem->owning_mm);
+ return odp_data;
+
+out_tgid:
+ put_pid(odp_data->tgid);
+ mmput(umem->owning_mm);
+out_free:
+ kfree(odp_data);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_umem_odp_alloc_child);
+
+/**
+ * ib_umem_odp_get - Create a umem_odp for a userspace va
+ *
+ * @device: IB device struct to get UMEM
+ * @addr: userspace virtual address to start at
+ * @size: length of region to pin
+ * @access: IB_ACCESS_xxx flags for memory being pinned
+ * @ops: MMU interval ops, currently only @invalidate
+ *
+ * The driver should use when the access flags indicate ODP memory. It avoids
+ * pinning, instead, stores the mm for future page fault handling in
+ * conjunction with MMU notifiers.
+ */
+struct ib_umem_odp *ib_umem_odp_get(struct ib_device *device,
+ unsigned long addr, size_t size, int access,
+ const struct mmu_interval_notifier_ops *ops)
+{
+ struct ib_umem_odp *umem_odp;
+ int ret;
+
+ if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND)))
+ return ERR_PTR(-EINVAL);
+
+ umem_odp = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
+ if (!umem_odp)
+ return ERR_PTR(-ENOMEM);
+
+ umem_odp->umem.ibdev = device;
+ umem_odp->umem.length = size;
+ umem_odp->umem.address = addr;
+ umem_odp->umem.writable = ib_access_writable(access);
+ umem_odp->umem.owning_mm = current->mm;
+ umem_odp->notifier.ops = ops;
+
+ umem_odp->page_shift = PAGE_SHIFT;
+#ifdef CONFIG_HUGETLB_PAGE
+ if (access & IB_ACCESS_HUGETLB)
+ umem_odp->page_shift = HPAGE_SHIFT;
+#endif
+
+ umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
+ ret = ib_init_umem_odp(umem_odp, ops);
+ if (ret)
+ goto err_put_pid;
+ return umem_odp;
+
+err_put_pid:
+ put_pid(umem_odp->tgid);
+ kfree(umem_odp);
+ return ERR_PTR(ret);
}
+EXPORT_SYMBOL(ib_umem_odp_get);
-void ib_umem_odp_release(struct ib_umem *umem)
+static void ib_umem_odp_free(struct ib_umem_odp *umem_odp)
{
- struct ib_ucontext *context = umem->context;
+ struct ib_device *dev = umem_odp->umem.ibdev;
/*
* Ensure that no more pages are mapped in the umem.
@@ -357,156 +281,37 @@ void ib_umem_odp_release(struct ib_umem *umem)
* It is the driver's responsibility to ensure, before calling us,
* that the hardware will not attempt to access the MR any more.
*/
- ib_umem_odp_unmap_dma_pages(umem, ib_umem_start(umem),
- ib_umem_end(umem));
-
- down_write(&context->umem_rwsem);
- if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
- rbt_ib_umem_remove(&umem->odp_data->interval_tree,
- &context->umem_tree);
- context->odp_mrs_count--;
- if (!umem->odp_data->mn_counters_active) {
- list_del(&umem->odp_data->no_private_counters);
- complete_all(&umem->odp_data->notifier_completion);
- }
-
- /*
- * Downgrade the lock to a read lock. This ensures that the notifiers
- * (who lock the mutex for reading) will be able to finish, and we
- * will be able to enventually obtain the mmu notifiers SRCU. Note
- * that since we are doing it atomically, no other user could register
- * and unregister while we do the check.
- */
- downgrade_write(&context->umem_rwsem);
- if (!context->odp_mrs_count) {
- struct task_struct *owning_process = NULL;
- struct mm_struct *owning_mm = NULL;
-
- owning_process = get_pid_task(context->tgid,
- PIDTYPE_PID);
- if (owning_process == NULL)
- /*
- * The process is already dead, notifier were removed
- * already.
- */
- goto out;
-
- owning_mm = get_task_mm(owning_process);
- if (owning_mm == NULL)
- /*
- * The process' mm is already dead, notifier were
- * removed already.
- */
- goto out_put_task;
- mmu_notifier_unregister(&context->mn, owning_mm);
-
- mmput(owning_mm);
-
-out_put_task:
- put_task_struct(owning_process);
- }
-out:
- up_read(&context->umem_rwsem);
-
- vfree(umem->odp_data->dma_list);
- vfree(umem->odp_data->page_list);
- kfree(umem->odp_data);
- kfree(umem);
+ mutex_lock(&umem_odp->umem_mutex);
+ ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
+ ib_umem_end(umem_odp));
+ mutex_unlock(&umem_odp->umem_mutex);
+ mmu_interval_notifier_remove(&umem_odp->notifier);
+ if (ib_uses_virt_dma(dev))
+ kvfree(umem_odp->map.pfn_list);
+ else
+ hmm_dma_map_free(dev->dma_device, &umem_odp->map);
}
-/*
- * Map for DMA and insert a single page into the on-demand paging page tables.
- *
- * @umem: the umem to insert the page to.
- * @page_index: index in the umem to add the page to.
- * @page: the page struct to map and add.
- * @access_mask: access permissions needed for this page.
- * @current_seq: sequence number for synchronization with invalidations.
- * the sequence number is taken from
- * umem->odp_data->notifiers_seq.
- *
- * The function returns -EFAULT if the DMA mapping operation fails. It returns
- * -EAGAIN if a concurrent invalidation prevents us from updating the page.
- *
- * The page is released via put_page even if the operation failed. For
- * on-demand pinning, the page is released whenever it isn't stored in the
- * umem.
- */
-static int ib_umem_odp_map_dma_single_page(
- struct ib_umem *umem,
- int page_index,
- u64 base_virt_addr,
- struct page *page,
- u64 access_mask,
- unsigned long current_seq)
+void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
{
- struct ib_device *dev = umem->context->device;
- dma_addr_t dma_addr;
- int stored_page = 0;
- int remove_existing_mapping = 0;
- int ret = 0;
+ if (!umem_odp->is_implicit_odp)
+ ib_umem_odp_free(umem_odp);
- /*
- * Note: we avoid writing if seq is different from the initial seq, to
- * handle case of a racing notifier. This check also allows us to bail
- * early if we have a notifier running in parallel with us.
- */
- if (ib_umem_mmu_notifier_retry(umem, current_seq)) {
- ret = -EAGAIN;
- goto out;
- }
- if (!(umem->odp_data->dma_list[page_index])) {
- dma_addr = ib_dma_map_page(dev,
- page,
- 0, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
- if (ib_dma_mapping_error(dev, dma_addr)) {
- ret = -EFAULT;
- goto out;
- }
- umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
- umem->odp_data->page_list[page_index] = page;
- stored_page = 1;
- } else if (umem->odp_data->page_list[page_index] == page) {
- umem->odp_data->dma_list[page_index] |= access_mask;
- } else {
- pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
- umem->odp_data->page_list[page_index], page);
- /* Better remove the mapping now, to prevent any further
- * damage. */
- remove_existing_mapping = 1;
- }
-
-out:
- /* On Demand Paging - avoid pinning the page */
- if (umem->context->invalidate_range || !stored_page)
- put_page(page);
-
- if (remove_existing_mapping && umem->context->invalidate_range) {
- invalidate_page_trampoline(
- umem,
- base_virt_addr + (page_index * PAGE_SIZE),
- base_virt_addr + ((page_index+1)*PAGE_SIZE),
- NULL);
- ret = -EAGAIN;
- }
-
- return ret;
+ put_pid(umem_odp->tgid);
+ kfree(umem_odp);
}
+EXPORT_SYMBOL(ib_umem_odp_release);
/**
- * ib_umem_odp_map_dma_pages - Pin and DMA map userspace memory in an ODP MR.
+ * ib_umem_odp_map_dma_and_lock - DMA map userspace memory in an ODP MR and lock it.
*
- * Pins the range of pages passed in the argument, and maps them to
- * DMA addresses. The DMA addresses of the mapped pages is updated in
- * umem->odp_data->dma_list.
+ * Maps the range passed in the argument to DMA addresses.
+ * Upon success the ODP MR will be locked to let caller complete its device
+ * page table update.
*
* Returns the number of pages mapped in success, negative error code
* for failure.
- * An -EAGAIN error code is returned when a concurrent mmu notifier prevents
- * the function from completing its task.
- *
- * @umem: the umem to map and pin
+ * @umem_odp: the umem to map and pin
* @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
* bigger due to alignment, and may also be smaller in case of an error
@@ -514,159 +319,151 @@ out:
* the return value.
* @access_mask: bit mask of the requested access permissions for the given
* range.
- * @current_seq: the MMU notifiers sequance value for synchronization with
- * invalidations. the sequance number is read from
- * umem->odp_data->notifiers_seq before calling this function
+ * @fault: is faulting required for the given range
*/
-int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
- u64 access_mask, unsigned long current_seq)
+int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt,
+ u64 bcnt, u64 access_mask, bool fault)
+ __acquires(&umem_odp->umem_mutex)
{
struct task_struct *owning_process = NULL;
- struct mm_struct *owning_mm = NULL;
- struct page **local_page_list = NULL;
- u64 off;
- int j, k, ret = 0, start_idx, npages = 0;
- u64 base_virt_addr;
- unsigned int flags = 0;
-
- if (access_mask == 0)
- return -EINVAL;
-
- if (user_virt < ib_umem_start(umem) ||
- user_virt + bcnt > ib_umem_end(umem))
+ struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
+ int pfn_index, dma_index, ret = 0, start_idx;
+ unsigned int page_shift, hmm_order, pfn_start_idx;
+ unsigned long num_pfns, current_seq;
+ struct hmm_range range = {};
+ unsigned long timeout;
+
+ if (user_virt < ib_umem_start(umem_odp) ||
+ user_virt + bcnt > ib_umem_end(umem_odp))
return -EFAULT;
- local_page_list = (struct page **)__get_free_page(GFP_KERNEL);
- if (!local_page_list)
- return -ENOMEM;
-
- off = user_virt & (~PAGE_MASK);
- user_virt = user_virt & PAGE_MASK;
- base_virt_addr = user_virt;
- bcnt += off; /* Charge for the first page offset as well. */
+ page_shift = umem_odp->page_shift;
- owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
- if (owning_process == NULL) {
+ /*
+ * owning_process is allowed to be NULL, this means somehow the mm is
+ * existing beyond the lifetime of the originating process.. Presumably
+ * mmget_not_zero will fail in this case.
+ */
+ owning_process = get_pid_task(umem_odp->tgid, PIDTYPE_PID);
+ if (!owning_process || !mmget_not_zero(owning_mm)) {
ret = -EINVAL;
- goto out_no_task;
+ goto out_put_task;
}
- owning_mm = get_task_mm(owning_process);
- if (owning_mm == NULL) {
- ret = -EINVAL;
- goto out_put_task;
+ range.notifier = &umem_odp->notifier;
+ range.start = ALIGN_DOWN(user_virt, 1UL << page_shift);
+ range.end = ALIGN(user_virt + bcnt, 1UL << page_shift);
+ pfn_start_idx = (range.start - ib_umem_start(umem_odp)) >> PAGE_SHIFT;
+ num_pfns = (range.end - range.start) >> PAGE_SHIFT;
+ if (fault) {
+ range.default_flags = HMM_PFN_REQ_FAULT;
+
+ if (access_mask & HMM_PFN_WRITE)
+ range.default_flags |= HMM_PFN_REQ_WRITE;
+ }
+
+ range.hmm_pfns = &(umem_odp->map.pfn_list[pfn_start_idx]);
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+
+retry:
+ current_seq = range.notifier_seq =
+ mmu_interval_read_begin(&umem_odp->notifier);
+
+ mmap_read_lock(owning_mm);
+ ret = hmm_range_fault(&range);
+ mmap_read_unlock(owning_mm);
+ if (unlikely(ret)) {
+ if (ret == -EBUSY && !time_after(jiffies, timeout))
+ goto retry;
+ goto out_put_mm;
}
- if (access_mask & ODP_WRITE_ALLOWED_BIT)
- flags |= FOLL_WRITE;
+ start_idx = (range.start - ib_umem_start(umem_odp)) >> page_shift;
+ dma_index = start_idx;
- start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT;
- k = start_idx;
+ mutex_lock(&umem_odp->umem_mutex);
+ if (mmu_interval_read_retry(&umem_odp->notifier, current_seq)) {
+ mutex_unlock(&umem_odp->umem_mutex);
+ goto retry;
+ }
- while (bcnt > 0) {
- const size_t gup_num_pages =
- min_t(size_t, ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
- PAGE_SIZE / sizeof(struct page *));
+ for (pfn_index = 0; pfn_index < num_pfns;
+ pfn_index += 1 << (page_shift - PAGE_SHIFT), dma_index++) {
- down_read(&owning_mm->mmap_sem);
/*
- * Note: this might result in redundent page getting. We can
- * avoid this by checking dma_list to be 0 before calling
- * get_user_pages. However, this make the code much more
- * complex (and doesn't gain us much performance in most use
- * cases).
+ * Since we asked for hmm_range_fault() to populate
+ * pages it shouldn't return an error entry on success.
*/
- npages = get_user_pages_remote(owning_process, owning_mm,
- user_virt, gup_num_pages,
- flags, local_page_list, NULL, NULL);
- up_read(&owning_mm->mmap_sem);
+ WARN_ON(fault && range.hmm_pfns[pfn_index] & HMM_PFN_ERROR);
+ WARN_ON(fault && !(range.hmm_pfns[pfn_index] & HMM_PFN_VALID));
+ if (!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID))
+ continue;
- if (npages < 0)
- break;
+ if (range.hmm_pfns[pfn_index] & HMM_PFN_DMA_MAPPED)
+ continue;
- bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
- user_virt += npages << PAGE_SHIFT;
- mutex_lock(&umem->odp_data->umem_mutex);
- for (j = 0; j < npages; ++j) {
- ret = ib_umem_odp_map_dma_single_page(
- umem, k, base_virt_addr, local_page_list[j],
- access_mask, current_seq);
- if (ret < 0)
- break;
- k++;
- }
- mutex_unlock(&umem->odp_data->umem_mutex);
-
- if (ret < 0) {
- /* Release left over pages when handling errors. */
- for (++j; j < npages; ++j)
- put_page(local_page_list[j]);
+ hmm_order = hmm_pfn_to_map_order(range.hmm_pfns[pfn_index]);
+ /* If a hugepage was detected and ODP wasn't set for, the umem
+ * page_shift will be used, the opposite case is an error.
+ */
+ if (hmm_order + PAGE_SHIFT < page_shift) {
+ ret = -EINVAL;
+ ibdev_dbg(umem_odp->umem.ibdev,
+ "%s: un-expected hmm_order %u, page_shift %u\n",
+ __func__, hmm_order, page_shift);
break;
}
}
+ /* upon success lock should stay on hold for the callee */
+ if (!ret)
+ ret = dma_index - start_idx;
+ else
+ mutex_unlock(&umem_odp->umem_mutex);
- if (ret >= 0) {
- if (npages < 0 && k == start_idx)
- ret = npages;
- else
- ret = k - start_idx;
- }
-
- mmput(owning_mm);
+out_put_mm:
+ mmput_async(owning_mm);
out_put_task:
- put_task_struct(owning_process);
-out_no_task:
- free_page((unsigned long)local_page_list);
+ if (owning_process)
+ put_task_struct(owning_process);
return ret;
}
-EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
+EXPORT_SYMBOL(ib_umem_odp_map_dma_and_lock);
-void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
+void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
u64 bound)
{
- int idx;
+ struct ib_device *dev = umem_odp->umem.ibdev;
u64 addr;
- struct ib_device *dev = umem->context->device;
-
- virt = max_t(u64, virt, ib_umem_start(umem));
- bound = min_t(u64, bound, ib_umem_end(umem));
- /* Note that during the run of this function, the
- * notifiers_count of the MR is > 0, preventing any racing
- * faults from completion. We might be racing with other
- * invalidations, so we must make sure we free each page only
- * once. */
- mutex_lock(&umem->odp_data->umem_mutex);
- for (addr = virt; addr < bound; addr += (u64)umem->page_size) {
- idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
- if (umem->odp_data->page_list[idx]) {
- struct page *page = umem->odp_data->page_list[idx];
- dma_addr_t dma = umem->odp_data->dma_list[idx];
- dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
-
- WARN_ON(!dma_addr);
-
- ib_dma_unmap_page(dev, dma_addr, PAGE_SIZE,
- DMA_BIDIRECTIONAL);
- if (dma & ODP_WRITE_ALLOWED_BIT) {
- struct page *head_page = compound_head(page);
- /*
- * set_page_dirty prefers being called with
- * the page lock. However, MMU notifiers are
- * called sometimes with and sometimes without
- * the lock. We rely on the umem_mutex instead
- * to prevent other mmu notifiers from
- * continuing and allowing the page mapping to
- * be removed.
- */
- set_page_dirty(head_page);
- }
- /* on demand pinning support */
- if (!umem->context->invalidate_range)
- put_page(page);
- umem->odp_data->page_list[idx] = NULL;
- umem->odp_data->dma_list[idx] = 0;
+
+ lockdep_assert_held(&umem_odp->umem_mutex);
+
+ virt = max_t(u64, virt, ib_umem_start(umem_odp));
+ bound = min_t(u64, bound, ib_umem_end(umem_odp));
+ for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
+ u64 offset = addr - ib_umem_start(umem_odp);
+ size_t idx = offset >> umem_odp->page_shift;
+ unsigned long pfn = umem_odp->map.pfn_list[idx];
+
+ if (!hmm_dma_unmap_pfn(dev->dma_device, &umem_odp->map, idx))
+ goto clear;
+
+ if (pfn & HMM_PFN_WRITE) {
+ struct page *page = hmm_pfn_to_page(pfn);
+ struct page *head_page = compound_head(page);
+ /*
+ * set_page_dirty prefers being called with
+ * the page lock. However, MMU notifiers are
+ * called sometimes with and sometimes without
+ * the lock. We rely on the umem_mutex instead
+ * to prevent other mmu notifiers from
+ * continuing and allowing the page mapping to
+ * be removed.
+ */
+ set_page_dirty(head_page);
}
+ umem_odp->npages--;
+clear:
+ umem_odp->map.pfn_list[idx] &= ~HMM_PFN_FLAGS;
}
- mutex_unlock(&umem->odp_data->umem_mutex);
}
EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c
deleted file mode 100644
index 727d788448f5..000000000000
--- a/drivers/infiniband/core/umem_rbtree.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/interval_tree_generic.h>
-#include <linux/sched.h>
-#include <linux/gfp.h>
-#include <rdma/ib_umem_odp.h>
-
-/*
- * The ib_umem list keeps track of memory regions for which the HW
- * device request to receive notification when the related memory
- * mapping is changed.
- *
- * ib_umem_lock protects the list.
- */
-
-static inline u64 node_start(struct umem_odp_node *n)
-{
- struct ib_umem_odp *umem_odp =
- container_of(n, struct ib_umem_odp, interval_tree);
-
- return ib_umem_start(umem_odp->umem);
-}
-
-/* Note that the representation of the intervals in the interval tree
- * considers the ending point as contained in the interval, while the
- * function ib_umem_end returns the first address which is not contained
- * in the umem.
- */
-static inline u64 node_last(struct umem_odp_node *n)
-{
- struct ib_umem_odp *umem_odp =
- container_of(n, struct ib_umem_odp, interval_tree);
-
- return ib_umem_end(umem_odp->umem) - 1;
-}
-
-INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
- node_start, node_last, , rbt_ib_umem)
-
-/* @last is not a part of the interval. See comment for function
- * node_last.
- */
-int rbt_ib_umem_for_each_in_range(struct rb_root *root,
- u64 start, u64 last,
- umem_call_back cb,
- void *cookie)
-{
- int ret_val = 0;
- struct umem_odp_node *node;
- struct ib_umem_odp *umem;
-
- if (unlikely(start == last))
- return ret_val;
-
- for (node = rbt_ib_umem_iter_first(root, start, last - 1); node;
- node = rbt_ib_umem_iter_next(node, start, last - 1)) {
- umem = container_of(node, struct ib_umem_odp, interval_tree);
- ret_val = cb(umem->umem, start, last, cookie) || ret_val;
- }
-
- return ret_val;
-}
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 249b403b43a4..fd67fc9fe85a 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -49,22 +49,31 @@
#include <linux/sched.h>
#include <linux/semaphore.h>
#include <linux/slab.h>
+#include <linux/nospec.h>
#include <linux/uaccess.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_user_mad.h>
+#include <rdma/rdma_netlink.h>
+
+#include "core_priv.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace MAD packet access");
MODULE_LICENSE("Dual BSD/GPL");
+#define MAX_UMAD_RECV_LIST_SIZE 200000
+
enum {
- IB_UMAD_MAX_PORTS = 64,
+ IB_UMAD_MAX_PORTS = RDMA_MAX_PORTS,
IB_UMAD_MAX_AGENTS = 32,
IB_UMAD_MAJOR = 231,
- IB_UMAD_MINOR_BASE = 0
+ IB_UMAD_MINOR_BASE = 0,
+ IB_UMAD_NUM_FIXED_MINOR = 64,
+ IB_UMAD_NUM_DYNAMIC_MINOR = IB_UMAD_MAX_PORTS - IB_UMAD_NUM_FIXED_MINOR,
+ IB_ISSM_MINOR_BASE = IB_UMAD_NUM_FIXED_MINOR,
};
/*
@@ -83,10 +92,9 @@ enum {
struct ib_umad_port {
struct cdev cdev;
- struct device *dev;
-
+ struct device dev;
struct cdev sm_cdev;
- struct device *sm_dev;
+ struct device sm_dev;
struct semaphore sm_sem;
struct mutex file_mutex;
@@ -95,18 +103,19 @@ struct ib_umad_port {
struct ib_device *ib_dev;
struct ib_umad_device *umad_dev;
int dev_num;
- u8 port_num;
+ u32 port_num;
};
struct ib_umad_device {
- struct kobject kobj;
- struct ib_umad_port port[0];
+ struct kref kref;
+ struct ib_umad_port ports[];
};
struct ib_umad_file {
struct mutex mutex;
struct ib_umad_port *port;
struct list_head recv_list;
+ atomic_t recv_list_size;
struct list_head send_list;
struct list_head port_list;
spinlock_t send_lock;
@@ -125,32 +134,47 @@ struct ib_umad_packet {
struct ib_user_mad mad;
};
-static struct class *umad_class;
+struct ib_rmpp_mad_hdr {
+ struct ib_mad_hdr mad_hdr;
+ struct ib_rmpp_hdr rmpp_hdr;
+} __packed;
-static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
+#define CREATE_TRACE_POINTS
+#include <trace/events/ib_umad.h>
-static DEFINE_SPINLOCK(port_lock);
-static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
+static const dev_t base_umad_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
+static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) +
+ IB_UMAD_NUM_FIXED_MINOR;
+static dev_t dynamic_umad_dev;
+static dev_t dynamic_issm_dev;
-static void ib_umad_add_one(struct ib_device *device);
+static DEFINE_IDA(umad_ida);
+
+static int ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device, void *client_data);
-static void ib_umad_release_dev(struct kobject *kobj)
+static void ib_umad_dev_free(struct kref *kref)
{
struct ib_umad_device *dev =
- container_of(kobj, struct ib_umad_device, kobj);
+ container_of(kref, struct ib_umad_device, kref);
kfree(dev);
}
-static struct kobj_type ib_umad_dev_ktype = {
- .release = ib_umad_release_dev,
-};
+static void ib_umad_dev_get(struct ib_umad_device *dev)
+{
+ kref_get(&dev->kref);
+}
+
+static void ib_umad_dev_put(struct ib_umad_device *dev)
+{
+ kref_put(&dev->kref, ib_umad_dev_free);
+}
static int hdr_size(struct ib_umad_file *file)
{
- return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) :
- sizeof (struct ib_user_mad_hdr_old);
+ return file->use_pkey_index ? sizeof(struct ib_user_mad_hdr) :
+ sizeof(struct ib_user_mad_hdr_old);
}
/* caller must hold file->mutex */
@@ -159,24 +183,28 @@ static struct ib_mad_agent *__get_agent(struct ib_umad_file *file, int id)
return file->agents_dead ? NULL : file->agent[id];
}
-static int queue_packet(struct ib_umad_file *file,
- struct ib_mad_agent *agent,
- struct ib_umad_packet *packet)
+static int queue_packet(struct ib_umad_file *file, struct ib_mad_agent *agent,
+ struct ib_umad_packet *packet, bool is_recv_mad)
{
int ret = 1;
mutex_lock(&file->mutex);
+ if (is_recv_mad &&
+ atomic_read(&file->recv_list_size) > MAX_UMAD_RECV_LIST_SIZE)
+ goto unlock;
+
for (packet->mad.hdr.id = 0;
packet->mad.hdr.id < IB_UMAD_MAX_AGENTS;
packet->mad.hdr.id++)
if (agent == __get_agent(file, packet->mad.hdr.id)) {
list_add_tail(&packet->list, &file->recv_list);
+ atomic_inc(&file->recv_list_size);
wake_up_interruptible(&file->recv_wait);
ret = 0;
break;
}
-
+unlock:
mutex_unlock(&file->mutex);
return ret;
@@ -197,13 +225,13 @@ static void send_handler(struct ib_mad_agent *agent,
struct ib_umad_packet *packet = send_wc->send_buf->context[0];
dequeue_send(file, packet);
- ib_destroy_ah(packet->msg->ah);
+ rdma_destroy_ah(packet->msg->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(packet->msg);
if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
packet->length = IB_MGMT_MAD_HDR;
packet->mad.hdr.status = ETIMEDOUT;
- if (!queue_packet(file, agent, packet))
+ if (!queue_packet(file, agent, packet, false))
return;
}
kfree(packet);
@@ -229,26 +257,41 @@ static void recv_handler(struct ib_mad_agent *agent,
packet->mad.hdr.status = 0;
packet->mad.hdr.length = hdr_size(file) + mad_recv_wc->mad_len;
packet->mad.hdr.qpn = cpu_to_be32(mad_recv_wc->wc->src_qp);
- packet->mad.hdr.lid = cpu_to_be16(mad_recv_wc->wc->slid);
+ /*
+ * On OPA devices it is okay to lose the upper 16 bits of LID as this
+ * information is obtained elsewhere. Mask off the upper 16 bits.
+ */
+ if (rdma_cap_opa_mad(agent->device, agent->port_num))
+ packet->mad.hdr.lid = ib_lid_be16(0xFFFF &
+ mad_recv_wc->wc->slid);
+ else
+ packet->mad.hdr.lid = ib_lid_be16(mad_recv_wc->wc->slid);
packet->mad.hdr.sl = mad_recv_wc->wc->sl;
packet->mad.hdr.path_bits = mad_recv_wc->wc->dlid_path_bits;
packet->mad.hdr.pkey_index = mad_recv_wc->wc->pkey_index;
packet->mad.hdr.grh_present = !!(mad_recv_wc->wc->wc_flags & IB_WC_GRH);
if (packet->mad.hdr.grh_present) {
- struct ib_ah_attr ah_attr;
-
- ib_init_ah_from_wc(agent->device, agent->port_num,
- mad_recv_wc->wc, mad_recv_wc->recv_buf.grh,
- &ah_attr);
-
- packet->mad.hdr.gid_index = ah_attr.grh.sgid_index;
- packet->mad.hdr.hop_limit = ah_attr.grh.hop_limit;
- packet->mad.hdr.traffic_class = ah_attr.grh.traffic_class;
- memcpy(packet->mad.hdr.gid, &ah_attr.grh.dgid, 16);
- packet->mad.hdr.flow_label = cpu_to_be32(ah_attr.grh.flow_label);
+ struct rdma_ah_attr ah_attr;
+ const struct ib_global_route *grh;
+ int ret;
+
+ ret = ib_init_ah_attr_from_wc(agent->device, agent->port_num,
+ mad_recv_wc->wc,
+ mad_recv_wc->recv_buf.grh,
+ &ah_attr);
+ if (ret)
+ goto err2;
+
+ grh = rdma_ah_read_grh(&ah_attr);
+ packet->mad.hdr.gid_index = grh->sgid_index;
+ packet->mad.hdr.hop_limit = grh->hop_limit;
+ packet->mad.hdr.traffic_class = grh->traffic_class;
+ memcpy(packet->mad.hdr.gid, &grh->dgid, 16);
+ packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label);
+ rdma_destroy_ah_attr(&ah_attr);
}
- if (queue_packet(file, agent, packet))
+ if (queue_packet(file, agent, packet, true))
goto err2;
return;
@@ -308,6 +351,9 @@ static ssize_t copy_recv_mad(struct ib_umad_file *file, char __user *buf,
return -EFAULT;
}
}
+
+ trace_ib_umad_read_recv(file, &packet->mad.hdr, &recv_buf->mad->mad_hdr);
+
return hdr_size(file) + packet->length;
}
@@ -327,6 +373,9 @@ static ssize_t copy_send_mad(struct ib_umad_file *file, char __user *buf,
if (copy_to_user(buf, packet->mad.data, packet->length))
return -EFAULT;
+ trace_ib_umad_read_send(file, &packet->mad.hdr,
+ (struct ib_mad_hdr *)&packet->mad.data);
+
return size;
}
@@ -342,6 +391,11 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
mutex_lock(&file->mutex);
+ if (file->agents_dead) {
+ mutex_unlock(&file->mutex);
+ return -EIO;
+ }
+
while (list_empty(&file->recv_list)) {
mutex_unlock(&file->mutex);
@@ -355,8 +409,14 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
mutex_lock(&file->mutex);
}
+ if (file->agents_dead) {
+ mutex_unlock(&file->mutex);
+ return -EIO;
+ }
+
packet = list_entry(file->recv_list.next, struct ib_umad_packet, list);
list_del(&packet->list);
+ atomic_dec(&file->recv_list_size);
mutex_unlock(&file->mutex);
@@ -369,6 +429,7 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf,
/* Requeue packet */
mutex_lock(&file->mutex);
list_add(&packet->list, &file->recv_list);
+ atomic_inc(&file->recv_list_size);
mutex_unlock(&file->mutex);
} else {
if (packet->recv_wc)
@@ -447,11 +508,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_umad_file *file = filp->private_data;
+ struct ib_rmpp_mad_hdr *rmpp_mad_hdr;
struct ib_umad_packet *packet;
struct ib_mad_agent *agent;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
struct ib_ah *ah;
- struct ib_rmpp_mad *rmpp_mad;
__be64 *tid;
int ret, data_len, hdr_len, copy_offset, rmpp_active;
u8 base_version;
@@ -459,7 +520,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
if (count < hdr_size(file) + IB_MGMT_RMPP_HDR)
return -EINVAL;
- packet = kzalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
+ packet = kzalloc(sizeof(*packet) + IB_MGMT_RMPP_HDR, GFP_KERNEL);
if (!packet)
return -ENOMEM;
@@ -482,39 +543,44 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
mutex_lock(&file->mutex);
+ trace_ib_umad_write(file, &packet->mad.hdr,
+ (struct ib_mad_hdr *)&packet->mad.data);
+
agent = __get_agent(file, packet->mad.hdr.id);
if (!agent) {
- ret = -EINVAL;
+ ret = -EIO;
goto err_up;
}
memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = be16_to_cpu(packet->mad.hdr.lid);
- ah_attr.sl = packet->mad.hdr.sl;
- ah_attr.src_path_bits = packet->mad.hdr.path_bits;
- ah_attr.port_num = file->port->port_num;
+ ah_attr.type = rdma_ah_find_type(agent->device,
+ file->port->port_num);
+ rdma_ah_set_dlid(&ah_attr, be16_to_cpu(packet->mad.hdr.lid));
+ rdma_ah_set_sl(&ah_attr, packet->mad.hdr.sl);
+ rdma_ah_set_path_bits(&ah_attr, packet->mad.hdr.path_bits);
+ rdma_ah_set_port_num(&ah_attr, file->port->port_num);
if (packet->mad.hdr.grh_present) {
- ah_attr.ah_flags = IB_AH_GRH;
- memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
- ah_attr.grh.sgid_index = packet->mad.hdr.gid_index;
- ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
- ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
- ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
+ rdma_ah_set_grh(&ah_attr, NULL,
+ be32_to_cpu(packet->mad.hdr.flow_label),
+ packet->mad.hdr.gid_index,
+ packet->mad.hdr.hop_limit,
+ packet->mad.hdr.traffic_class);
+ rdma_ah_set_dgid_raw(&ah_attr, packet->mad.hdr.gid);
}
- ah = ib_create_ah(agent->qp->pd, &ah_attr);
+ ah = rdma_create_user_ah(agent->qp->pd, &ah_attr, NULL);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_up;
}
- rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
- hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
+ rmpp_mad_hdr = (struct ib_rmpp_mad_hdr *)packet->mad.data;
+ hdr_len = ib_get_mad_data_offset(rmpp_mad_hdr->mad_hdr.mgmt_class);
- if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
+ if (ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class)
&& ib_mad_kernel_rmpp_agent(agent)) {
copy_offset = IB_MGMT_RMPP_HDR;
- rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
+ rmpp_active = ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) &
IB_MGMT_RMPP_FLAG_ACTIVE;
} else {
copy_offset = IB_MGMT_MAD_HDR;
@@ -563,12 +629,12 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid;
*tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
(be64_to_cpup(tid) & 0xffffffff));
- rmpp_mad->mad_hdr.tid = *tid;
+ rmpp_mad_hdr->mad_hdr.tid = *tid;
}
if (!ib_mad_kernel_rmpp_agent(agent)
- && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
- && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
+ && ib_is_mad_class_rmpp(rmpp_mad_hdr->mad_hdr.mgmt_class)
+ && (ib_get_rmpp_flags(&rmpp_mad_hdr->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
spin_lock_irq(&file->send_lock);
list_add_tail(&packet->list, &file->send_list);
spin_unlock_irq(&file->send_lock);
@@ -596,7 +662,7 @@ err_send:
err_msg:
ib_free_send_mad(packet->msg);
err_ah:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
err_up:
mutex_unlock(&file->mutex);
err:
@@ -604,17 +670,21 @@ err:
return ret;
}
-static unsigned int ib_umad_poll(struct file *filp, struct poll_table_struct *wait)
+static __poll_t ib_umad_poll(struct file *filp, struct poll_table_struct *wait)
{
struct ib_umad_file *file = filp->private_data;
/* we will always be able to post a MAD send */
- unsigned int mask = POLLOUT | POLLWRNORM;
+ __poll_t mask = EPOLLOUT | EPOLLWRNORM;
+ mutex_lock(&file->mutex);
poll_wait(filp, &file->recv_wait, wait);
if (!list_empty(&file->recv_list))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
+ if (file->agents_dead)
+ mask = EPOLLERR;
+ mutex_unlock(&file->mutex);
return mask;
}
@@ -632,8 +702,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
mutex_lock(&file->mutex);
if (!file->port->ib_dev) {
- dev_notice(file->port->dev,
- "ib_umad_reg_agent: invalid device\n");
+ dev_notice(&file->port->dev, "%s: invalid device\n", __func__);
ret = -EPIPE;
goto out;
}
@@ -644,8 +713,8 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
}
if (ureq.qpn != 0 && ureq.qpn != 1) {
- dev_notice(file->port->dev,
- "ib_umad_reg_agent: invalid QPN %d specified\n",
+ dev_notice(&file->port->dev,
+ "%s: invalid QPN %u specified\n", __func__,
ureq.qpn);
ret = -EINVAL;
goto out;
@@ -655,9 +724,9 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
if (!__get_agent(file, agent_id))
goto found;
- dev_notice(file->port->dev,
- "ib_umad_reg_agent: Max Agents (%u) reached\n",
+ dev_notice(&file->port->dev, "%s: Max Agents (%u) reached\n", __func__,
IB_UMAD_MAX_AGENTS);
+
ret = -ENOMEM;
goto out;
@@ -700,11 +769,11 @@ found:
if (!file->already_used) {
file->already_used = 1;
if (!file->use_pkey_index) {
- dev_warn(file->port->dev,
+ dev_warn(&file->port->dev,
"process %s did not enable P_Key index support.\n",
current->comm);
- dev_warn(file->port->dev,
- " Documentation/infiniband/user_mad.txt has info on the new ABI.\n");
+ dev_warn(&file->port->dev,
+ " Documentation/infiniband/user_mad.rst has info on the new ABI.\n");
}
}
@@ -734,8 +803,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
mutex_lock(&file->mutex);
if (!file->port->ib_dev) {
- dev_notice(file->port->dev,
- "ib_umad_reg_agent2: invalid device\n");
+ dev_notice(&file->port->dev, "%s: invalid device\n", __func__);
ret = -EPIPE;
goto out;
}
@@ -746,17 +814,16 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
}
if (ureq.qpn != 0 && ureq.qpn != 1) {
- dev_notice(file->port->dev,
- "ib_umad_reg_agent2: invalid QPN %d specified\n",
- ureq.qpn);
+ dev_notice(&file->port->dev, "%s: invalid QPN %u specified\n",
+ __func__, ureq.qpn);
ret = -EINVAL;
goto out;
}
if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) {
- dev_notice(file->port->dev,
- "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n",
- ureq.flags, IB_USER_MAD_REG_FLAGS_CAP);
+ dev_notice(&file->port->dev,
+ "%s failed: invalid registration flags specified 0x%x; supported 0x%x\n",
+ __func__, ureq.flags, IB_USER_MAD_REG_FLAGS_CAP);
ret = -EINVAL;
if (put_user((u32)IB_USER_MAD_REG_FLAGS_CAP,
@@ -771,8 +838,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
if (!__get_agent(file, agent_id))
goto found;
- dev_notice(file->port->dev,
- "ib_umad_reg_agent2: Max Agents (%u) reached\n",
+ dev_notice(&file->port->dev, "%s: Max Agents (%u) reached\n", __func__,
IB_UMAD_MAX_AGENTS);
ret = -ENOMEM;
goto out;
@@ -783,8 +849,8 @@ found:
req.mgmt_class = ureq.mgmt_class;
req.mgmt_class_version = ureq.mgmt_class_version;
if (ureq.oui & 0xff000000) {
- dev_notice(file->port->dev,
- "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n",
+ dev_notice(&file->port->dev,
+ "%s failed: oui invalid 0x%08x\n", __func__,
ureq.oui);
ret = -EINVAL;
goto out;
@@ -843,11 +909,14 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
if (get_user(id, arg))
return -EFAULT;
+ if (id >= IB_UMAD_MAX_AGENTS)
+ return -EINVAL;
mutex_lock(&file->port->file_mutex);
mutex_lock(&file->mutex);
- if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
+ id = array_index_nospec(id, IB_UMAD_MAX_AGENTS);
+ if (!__get_agent(file, id)) {
ret = -EINVAL;
goto out;
}
@@ -929,19 +998,27 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
{
struct ib_umad_port *port;
struct ib_umad_file *file;
- int ret = -ENXIO;
+ int ret = 0;
port = container_of(inode->i_cdev, struct ib_umad_port, cdev);
mutex_lock(&port->file_mutex);
- if (!port->ib_dev)
+ if (!port->ib_dev) {
+ ret = -ENXIO;
goto out;
+ }
- ret = -ENOMEM;
- file = kzalloc(sizeof *file, GFP_KERNEL);
- if (!file)
+ if (!rdma_dev_access_netns(port->ib_dev, current->nsproxy->net_ns)) {
+ ret = -EPERM;
goto out;
+ }
+
+ file = kzalloc(sizeof(*file), GFP_KERNEL);
+ if (!file) {
+ ret = -ENOMEM;
+ goto out;
+ }
mutex_init(&file->mutex);
spin_lock_init(&file->send_lock);
@@ -954,15 +1031,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
list_add_tail(&file->port_list, &port->file_list);
- ret = nonseekable_open(inode, filp);
- if (ret) {
- list_del(&file->port_list);
- kfree(file);
- goto out;
- }
-
- kobject_get(&port->umad_dev->kobj);
-
+ stream_open(inode, filp);
out:
mutex_unlock(&port->file_mutex);
return ret;
@@ -971,7 +1040,6 @@ out:
static int ib_umad_close(struct inode *inode, struct file *filp)
{
struct ib_umad_file *file = filp->private_data;
- struct ib_umad_device *dev = file->port->umad_dev;
struct ib_umad_packet *packet, *tmp;
int already_dead;
int i;
@@ -998,10 +1066,8 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
ib_unregister_mad_agent(file->agent[i]);
mutex_unlock(&file->port->file_mutex);
-
+ mutex_destroy(&file->mutex);
kfree(file);
- kobject_put(&dev->kobj);
-
return 0;
}
@@ -1016,7 +1082,6 @@ static const struct file_operations umad_fops = {
#endif
.open = ib_umad_open,
.release = ib_umad_close,
- .llseek = no_llseek,
};
static int ib_umad_sm_open(struct inode *inode, struct file *filp)
@@ -1041,24 +1106,20 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
}
}
+ if (!rdma_dev_access_netns(port->ib_dev, current->nsproxy->net_ns)) {
+ ret = -EPERM;
+ goto err_up_sem;
+ }
+
ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
if (ret)
goto err_up_sem;
filp->private_data = port;
- ret = nonseekable_open(inode, filp);
- if (ret)
- goto err_clr_sm_cap;
-
- kobject_get(&port->umad_dev->kobj);
-
+ nonseekable_open(inode, filp);
return 0;
-err_clr_sm_cap:
- swap(props.set_port_cap_mask, props.clr_port_cap_mask);
- ib_modify_port(port->ib_dev, port->port_num, 0, &props);
-
err_up_sem:
up(&port->sm_sem);
@@ -1081,8 +1142,6 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
up(&port->sm_sem);
- kobject_put(&port->umad_dev->kobj);
-
return ret;
}
@@ -1090,16 +1149,63 @@ static const struct file_operations umad_sm_fops = {
.owner = THIS_MODULE,
.open = ib_umad_sm_open,
.release = ib_umad_sm_close,
- .llseek = no_llseek,
};
+static struct ib_umad_port *get_port(struct ib_device *ibdev,
+ struct ib_umad_device *umad_dev,
+ u32 port)
+{
+ if (!umad_dev)
+ return ERR_PTR(-EOPNOTSUPP);
+ if (!rdma_is_port_valid(ibdev, port))
+ return ERR_PTR(-EINVAL);
+ if (!rdma_cap_ib_mad(ibdev, port))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ return &umad_dev->ports[port - rdma_start_port(ibdev)];
+}
+
+static int ib_umad_get_nl_info(struct ib_device *ibdev, void *client_data,
+ struct ib_client_nl_info *res)
+{
+ struct ib_umad_port *port = get_port(ibdev, client_data, res->port);
+
+ if (IS_ERR(port))
+ return PTR_ERR(port);
+
+ res->abi = IB_USER_MAD_ABI_VERSION;
+ res->cdev = &port->dev;
+ return 0;
+}
+
static struct ib_client umad_client = {
.name = "umad",
.add = ib_umad_add_one,
- .remove = ib_umad_remove_one
+ .remove = ib_umad_remove_one,
+ .get_nl_info = ib_umad_get_nl_info,
+};
+MODULE_ALIAS_RDMA_CLIENT("umad");
+
+static int ib_issm_get_nl_info(struct ib_device *ibdev, void *client_data,
+ struct ib_client_nl_info *res)
+{
+ struct ib_umad_port *port = get_port(ibdev, client_data, res->port);
+
+ if (IS_ERR(port))
+ return PTR_ERR(port);
+
+ res->abi = IB_USER_MAD_ABI_VERSION;
+ res->cdev = &port->sm_dev;
+ return 0;
+}
+
+static struct ib_client issm_client = {
+ .name = "issm",
+ .get_nl_info = ib_issm_get_nl_info,
};
+MODULE_ALIAS_RDMA_CLIENT("issm");
-static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
+static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct ib_umad_port *port = dev_get_drvdata(dev);
@@ -1107,11 +1213,11 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
if (!port)
return -ENODEV;
- return sprintf(buf, "%s\n", port->ib_dev->name);
+ return sysfs_emit(buf, "%s\n", dev_name(&port->ib_dev->dev));
}
-static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+static DEVICE_ATTR_RO(ibdev);
-static ssize_t show_port(struct device *dev, struct device_attribute *attr,
+static ssize_t port_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct ib_umad_port *port = dev_get_drvdata(dev);
@@ -1119,34 +1225,60 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr,
if (!port)
return -ENODEV;
- return sprintf(buf, "%d\n", port->port_num);
+ return sysfs_emit(buf, "%d\n", port->port_num);
}
-static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
+static DEVICE_ATTR_RO(port);
-static CLASS_ATTR_STRING(abi_version, S_IRUGO,
- __stringify(IB_USER_MAD_ABI_VERSION));
+static struct attribute *umad_class_dev_attrs[] = {
+ &dev_attr_ibdev.attr,
+ &dev_attr_port.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(umad_class_dev);
-static dev_t overflow_maj;
-static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
-static int find_overflow_devnum(struct ib_device *device)
+static char *umad_devnode(const struct device *dev, umode_t *mode)
{
- int ret;
+ return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+}
- if (!overflow_maj) {
- ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
- "infiniband_mad");
- if (ret) {
- dev_err(&device->dev,
- "couldn't register dynamic device number\n");
- return ret;
- }
- }
+static ssize_t abi_version_show(const struct class *class,
+ const struct class_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
+}
+static CLASS_ATTR_RO(abi_version);
- ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS);
- if (ret >= IB_UMAD_MAX_PORTS)
- return -1;
+static struct attribute *umad_class_attrs[] = {
+ &class_attr_abi_version.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(umad_class);
- return ret;
+static struct class umad_class = {
+ .name = "infiniband_mad",
+ .devnode = umad_devnode,
+ .class_groups = umad_class_groups,
+ .dev_groups = umad_class_dev_groups,
+};
+
+static void ib_umad_release_port(struct device *device)
+{
+ struct ib_umad_port *port = dev_get_drvdata(device);
+ struct ib_umad_device *umad_dev = port->umad_dev;
+
+ ib_umad_dev_put(umad_dev);
+}
+
+static void ib_umad_init_port_dev(struct device *dev,
+ struct ib_umad_port *port,
+ const struct ib_device *device)
+{
+ device_initialize(dev);
+ ib_umad_dev_get(port->umad_dev);
+ dev->class = &umad_class;
+ dev->parent = device->dev.parent;
+ dev_set_drvdata(dev, port);
+ dev->release = ib_umad_release_port;
}
static int ib_umad_init_port(struct ib_device *device, int port_num,
@@ -1154,112 +1286,85 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
struct ib_umad_port *port)
{
int devnum;
- dev_t base;
-
- spin_lock(&port_lock);
- devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
- if (devnum >= IB_UMAD_MAX_PORTS) {
- spin_unlock(&port_lock);
- devnum = find_overflow_devnum(device);
- if (devnum < 0)
- return -1;
-
- spin_lock(&port_lock);
- port->dev_num = devnum + IB_UMAD_MAX_PORTS;
- base = devnum + overflow_maj;
- set_bit(devnum, overflow_map);
+ dev_t base_umad;
+ dev_t base_issm;
+ int ret;
+
+ devnum = ida_alloc_max(&umad_ida, IB_UMAD_MAX_PORTS - 1, GFP_KERNEL);
+ if (devnum < 0)
+ return -1;
+ port->dev_num = devnum;
+ if (devnum >= IB_UMAD_NUM_FIXED_MINOR) {
+ base_umad = dynamic_umad_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
+ base_issm = dynamic_issm_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
} else {
- port->dev_num = devnum;
- base = devnum + base_dev;
- set_bit(devnum, dev_map);
+ base_umad = devnum + base_umad_dev;
+ base_issm = devnum + base_issm_dev;
}
- spin_unlock(&port_lock);
port->ib_dev = device;
+ port->umad_dev = umad_dev;
port->port_num = port_num;
sema_init(&port->sm_sem, 1);
mutex_init(&port->file_mutex);
INIT_LIST_HEAD(&port->file_list);
+ ib_umad_init_port_dev(&port->dev, port, device);
+ port->dev.devt = base_umad;
+ dev_set_name(&port->dev, "umad%d", port->dev_num);
cdev_init(&port->cdev, &umad_fops);
port->cdev.owner = THIS_MODULE;
- port->cdev.kobj.parent = &umad_dev->kobj;
- kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
- if (cdev_add(&port->cdev, base, 1))
- goto err_cdev;
- port->dev = device_create(umad_class, device->dma_device,
- port->cdev.dev, port,
- "umad%d", port->dev_num);
- if (IS_ERR(port->dev))
+ ret = cdev_device_add(&port->cdev, &port->dev);
+ if (ret)
goto err_cdev;
- if (device_create_file(port->dev, &dev_attr_ibdev))
- goto err_dev;
- if (device_create_file(port->dev, &dev_attr_port))
- goto err_dev;
-
- base += IB_UMAD_MAX_PORTS;
- cdev_init(&port->sm_cdev, &umad_sm_fops);
- port->sm_cdev.owner = THIS_MODULE;
- port->sm_cdev.kobj.parent = &umad_dev->kobj;
- kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
- if (cdev_add(&port->sm_cdev, base, 1))
- goto err_sm_cdev;
-
- port->sm_dev = device_create(umad_class, device->dma_device,
- port->sm_cdev.dev, port,
- "issm%d", port->dev_num);
- if (IS_ERR(port->sm_dev))
- goto err_sm_cdev;
-
- if (device_create_file(port->sm_dev, &dev_attr_ibdev))
- goto err_sm_dev;
- if (device_create_file(port->sm_dev, &dev_attr_port))
- goto err_sm_dev;
-
- return 0;
+ if (rdma_cap_ib_smi(device, port_num)) {
+ ib_umad_init_port_dev(&port->sm_dev, port, device);
+ port->sm_dev.devt = base_issm;
+ dev_set_name(&port->sm_dev, "issm%d", port->dev_num);
+ cdev_init(&port->sm_cdev, &umad_sm_fops);
+ port->sm_cdev.owner = THIS_MODULE;
-err_sm_dev:
- device_destroy(umad_class, port->sm_cdev.dev);
+ ret = cdev_device_add(&port->sm_cdev, &port->sm_dev);
+ if (ret)
+ goto err_dev;
+ }
-err_sm_cdev:
- cdev_del(&port->sm_cdev);
+ return 0;
err_dev:
- device_destroy(umad_class, port->cdev.dev);
-
+ put_device(&port->sm_dev);
+ cdev_device_del(&port->cdev, &port->dev);
err_cdev:
- cdev_del(&port->cdev);
- if (port->dev_num < IB_UMAD_MAX_PORTS)
- clear_bit(devnum, dev_map);
- else
- clear_bit(devnum, overflow_map);
-
- return -1;
+ put_device(&port->dev);
+ ida_free(&umad_ida, devnum);
+ return ret;
}
static void ib_umad_kill_port(struct ib_umad_port *port)
{
struct ib_umad_file *file;
+ bool has_smi = false;
int id;
- dev_set_drvdata(port->dev, NULL);
- dev_set_drvdata(port->sm_dev, NULL);
-
- device_destroy(umad_class, port->cdev.dev);
- device_destroy(umad_class, port->sm_cdev.dev);
-
- cdev_del(&port->cdev);
- cdev_del(&port->sm_cdev);
+ if (rdma_cap_ib_smi(port->ib_dev, port->port_num)) {
+ cdev_device_del(&port->sm_cdev, &port->sm_dev);
+ has_smi = true;
+ }
+ cdev_device_del(&port->cdev, &port->dev);
mutex_lock(&port->file_mutex);
+ /* Mark ib_dev NULL and block ioctl or other file ops to progress
+ * further.
+ */
port->ib_dev = NULL;
list_for_each_entry(file, &port->file_list, port_list) {
mutex_lock(&file->mutex);
file->agents_dead = 1;
+ wake_up_interruptible(&file->recv_wait);
mutex_unlock(&file->mutex);
for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id)
@@ -1269,120 +1374,128 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
mutex_unlock(&port->file_mutex);
- if (port->dev_num < IB_UMAD_MAX_PORTS)
- clear_bit(port->dev_num, dev_map);
- else
- clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map);
+ ida_free(&umad_ida, port->dev_num);
+
+ /* balances device_initialize() */
+ if (has_smi)
+ put_device(&port->sm_dev);
+ put_device(&port->dev);
}
-static void ib_umad_add_one(struct ib_device *device)
+static int ib_umad_add_one(struct ib_device *device)
{
struct ib_umad_device *umad_dev;
int s, e, i;
int count = 0;
+ int ret;
s = rdma_start_port(device);
e = rdma_end_port(device);
- umad_dev = kzalloc(sizeof *umad_dev +
- (e - s + 1) * sizeof (struct ib_umad_port),
+ umad_dev = kzalloc(struct_size(umad_dev, ports,
+ size_add(size_sub(e, s), 1)),
GFP_KERNEL);
if (!umad_dev)
- return;
-
- kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype);
+ return -ENOMEM;
+ kref_init(&umad_dev->kref);
for (i = s; i <= e; ++i) {
if (!rdma_cap_ib_mad(device, i))
continue;
- umad_dev->port[i - s].umad_dev = umad_dev;
-
- if (ib_umad_init_port(device, i, umad_dev,
- &umad_dev->port[i - s]))
+ ret = ib_umad_init_port(device, i, umad_dev,
+ &umad_dev->ports[i - s]);
+ if (ret)
goto err;
count++;
}
- if (!count)
+ if (!count) {
+ ret = -EOPNOTSUPP;
goto free;
+ }
ib_set_client_data(device, &umad_client, umad_dev);
- return;
+ return 0;
err:
while (--i >= s) {
if (!rdma_cap_ib_mad(device, i))
continue;
- ib_umad_kill_port(&umad_dev->port[i - s]);
+ ib_umad_kill_port(&umad_dev->ports[i - s]);
}
free:
- kobject_put(&umad_dev->kobj);
+ /* balances kref_init */
+ ib_umad_dev_put(umad_dev);
+ return ret;
}
static void ib_umad_remove_one(struct ib_device *device, void *client_data)
{
struct ib_umad_device *umad_dev = client_data;
- int i;
-
- if (!umad_dev)
- return;
+ unsigned int i;
- for (i = 0; i <= rdma_end_port(device) - rdma_start_port(device); ++i) {
- if (rdma_cap_ib_mad(device, i + rdma_start_port(device)))
- ib_umad_kill_port(&umad_dev->port[i]);
+ rdma_for_each_port (device, i) {
+ if (rdma_cap_ib_mad(device, i))
+ ib_umad_kill_port(
+ &umad_dev->ports[i - rdma_start_port(device)]);
}
-
- kobject_put(&umad_dev->kobj);
-}
-
-static char *umad_devnode(struct device *dev, umode_t *mode)
-{
- return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+ /* balances kref_init() */
+ ib_umad_dev_put(umad_dev);
}
static int __init ib_umad_init(void)
{
int ret;
- ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
- "infiniband_mad");
+ ret = register_chrdev_region(base_umad_dev,
+ IB_UMAD_NUM_FIXED_MINOR * 2,
+ umad_class.name);
if (ret) {
pr_err("couldn't register device number\n");
goto out;
}
- umad_class = class_create(THIS_MODULE, "infiniband_mad");
- if (IS_ERR(umad_class)) {
- ret = PTR_ERR(umad_class);
- pr_err("couldn't create class infiniband_mad\n");
- goto out_chrdev;
+ ret = alloc_chrdev_region(&dynamic_umad_dev, 0,
+ IB_UMAD_NUM_DYNAMIC_MINOR * 2,
+ umad_class.name);
+ if (ret) {
+ pr_err("couldn't register dynamic device number\n");
+ goto out_alloc;
}
+ dynamic_issm_dev = dynamic_umad_dev + IB_UMAD_NUM_DYNAMIC_MINOR;
- umad_class->devnode = umad_devnode;
-
- ret = class_create_file(umad_class, &class_attr_abi_version.attr);
+ ret = class_register(&umad_class);
if (ret) {
- pr_err("couldn't create abi_version attribute\n");
- goto out_class;
+ pr_err("couldn't create class infiniband_mad\n");
+ goto out_chrdev;
}
ret = ib_register_client(&umad_client);
- if (ret) {
- pr_err("couldn't register ib_umad client\n");
+ if (ret)
goto out_class;
- }
+
+ ret = ib_register_client(&issm_client);
+ if (ret)
+ goto out_client;
return 0;
+out_client:
+ ib_unregister_client(&umad_client);
out_class:
- class_destroy(umad_class);
+ class_unregister(&umad_class);
out_chrdev:
- unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
+ unregister_chrdev_region(dynamic_umad_dev,
+ IB_UMAD_NUM_DYNAMIC_MINOR * 2);
+
+out_alloc:
+ unregister_chrdev_region(base_umad_dev,
+ IB_UMAD_NUM_FIXED_MINOR * 2);
out:
return ret;
@@ -1390,11 +1503,13 @@ out:
static void __exit ib_umad_cleanup(void)
{
+ ib_unregister_client(&issm_client);
ib_unregister_client(&umad_client);
- class_destroy(umad_class);
- unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
- if (overflow_maj)
- unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2);
+ class_unregister(&umad_class);
+ unregister_chrdev_region(base_umad_dev,
+ IB_UMAD_NUM_FIXED_MINOR * 2);
+ unregister_chrdev_region(dynamic_umad_dev,
+ IB_UMAD_NUM_DYNAMIC_MINOR * 2);
}
module_init(ib_umad_init);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 455034ac994e..797e2fcc8072 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -46,22 +46,33 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_std_types.h>
-#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
- do { \
- (udata)->inbuf = (const void __user *) (ibuf); \
- (udata)->outbuf = (void __user *) (obuf); \
- (udata)->inlen = (ilen); \
- (udata)->outlen = (olen); \
- } while (0)
-
-#define INIT_UDATA_BUF_OR_NULL(udata, ibuf, obuf, ilen, olen) \
- do { \
- (udata)->inbuf = (ilen) ? (const void __user *) (ibuf) : NULL; \
- (udata)->outbuf = (olen) ? (void __user *) (obuf) : NULL; \
- (udata)->inlen = (ilen); \
- (udata)->outlen = (olen); \
- } while (0)
+#define UVERBS_MODULE_NAME ib_uverbs
+#include <rdma/uverbs_named_ioctl.h>
+
+static inline void
+ib_uverbs_init_udata(struct ib_udata *udata,
+ const void __user *ibuf,
+ void __user *obuf,
+ size_t ilen, size_t olen)
+{
+ udata->inbuf = ibuf;
+ udata->outbuf = obuf;
+ udata->inlen = ilen;
+ udata->outlen = olen;
+}
+
+static inline void
+ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata,
+ const void __user *ibuf,
+ void __user *obuf,
+ size_t ilen, size_t olen)
+{
+ ib_uverbs_init_udata(udata,
+ ilen ? ibuf : NULL, olen ? obuf : NULL,
+ ilen, olen);
+}
/*
* Our lifetime rules for these structs are the following:
@@ -76,53 +87,50 @@
* an asynchronous event queue file is created and released when the
* event file is closed.
*
- * struct ib_uverbs_event_file: One reference is held by the VFS and
- * released when the file is closed. For asynchronous event files,
- * another reference is held by the corresponding main context file
- * and released when that file is closed. For completion event files,
- * a reference is taken when a CQ is created that uses the file, and
- * released when the CQ is destroyed.
+ * struct ib_uverbs_event_queue: Base structure for
+ * struct ib_uverbs_async_event_file and struct ib_uverbs_completion_event_file.
+ * One reference is held by the VFS and released when the file is closed.
+ * For asynchronous event files, another reference is held by the corresponding
+ * main context file and released when that file is closed. For completion
+ * event files, a reference is taken when a CQ is created that uses the file,
+ * and released when the CQ is destroyed.
*/
struct ib_uverbs_device {
- atomic_t refcount;
- int num_comp_vectors;
+ refcount_t refcount;
+ u32 num_comp_vectors;
struct completion comp;
- struct device *dev;
+ struct device dev;
+ /* First group for device attributes, NULL terminated array */
+ const struct attribute_group *groups[2];
struct ib_device __rcu *ib_dev;
int devnum;
struct cdev cdev;
struct rb_root xrcd_tree;
struct mutex xrcd_tree_mutex;
- struct kobject kobj;
struct srcu_struct disassociate_srcu;
struct mutex lists_mutex; /* protect lists */
struct list_head uverbs_file_list;
- struct list_head uverbs_events_file_list;
+ struct uverbs_api *uapi;
};
-struct ib_uverbs_event_file {
- struct kref ref;
- int is_async;
- struct ib_uverbs_file *uverbs_file;
+struct ib_uverbs_event_queue {
spinlock_t lock;
int is_closed;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
- struct list_head list;
};
-struct ib_uverbs_file {
- struct kref ref;
- struct mutex mutex;
- struct mutex cleanup_mutex; /* protect cleanup */
- struct ib_uverbs_device *device;
- struct ib_ucontext *ucontext;
+struct ib_uverbs_async_event_file {
+ struct ib_uobject uobj;
+ struct ib_uverbs_event_queue ev_queue;
struct ib_event_handler event_handler;
- struct ib_uverbs_event_file *async_file;
- struct list_head list;
- int is_closed;
+};
+
+struct ib_uverbs_completion_event_file {
+ struct ib_uobject uobj;
+ struct ib_uverbs_event_queue ev_queue;
};
struct ib_uverbs_event {
@@ -143,6 +151,8 @@ struct ib_uverbs_mcast_entry {
struct ib_uevent_object {
struct ib_uobject uobject;
+ struct ib_uverbs_async_event_file *event_file;
+ /* List member for ib_uverbs_async_event_file list */
struct list_head event_list;
u32 events_reported;
};
@@ -159,6 +169,8 @@ struct ib_usrq_object {
struct ib_uqp_object {
struct ib_uevent_object uevent;
+ /* lock for mcast list */
+ struct mutex mcast_lock;
struct list_head mcast_list;
struct ib_uxrcd_object *uxrcd;
};
@@ -168,51 +180,44 @@ struct ib_uwq_object {
};
struct ib_ucq_object {
- struct ib_uobject uobject;
- struct ib_uverbs_file *uverbs_file;
+ struct ib_uevent_object uevent;
struct list_head comp_list;
- struct list_head async_list;
u32 comp_events_reported;
- u32 async_events_reported;
};
-extern spinlock_t ib_uverbs_idr_lock;
-extern struct idr ib_uverbs_pd_idr;
-extern struct idr ib_uverbs_mr_idr;
-extern struct idr ib_uverbs_mw_idr;
-extern struct idr ib_uverbs_ah_idr;
-extern struct idr ib_uverbs_cq_idr;
-extern struct idr ib_uverbs_qp_idr;
-extern struct idr ib_uverbs_srq_idr;
-extern struct idr ib_uverbs_xrcd_idr;
-extern struct idr ib_uverbs_rule_idr;
-extern struct idr ib_uverbs_wq_idr;
-extern struct idr ib_uverbs_rwq_ind_tbl_idr;
-
-void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
-
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev,
- int is_async);
-void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
-
-void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_event_file *ev_file,
+extern const struct file_operations uverbs_event_fops;
+extern const struct file_operations uverbs_async_event_fops;
+void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
+void ib_uverbs_init_async_event_file(struct ib_uverbs_async_event_file *ev_file);
+void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue);
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
+int uverbs_async_event_release(struct inode *inode, struct file *filp);
+
+int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs);
+int ib_init_ucontext(struct uverbs_attr_bundle *attrs);
+
+void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
struct ib_ucq_object *uobj);
-void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
- struct ib_uevent_object *uobj);
+void ib_uverbs_release_uevent(struct ib_uevent_object *uobj);
+void ib_uverbs_release_file(struct kref *ref);
+void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
+ __u64 element, __u64 event,
+ struct list_head *obj_list, u32 *counter);
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
-void ib_uverbs_event_handler(struct ib_event_handler *handler,
- struct ib_event *event);
-void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
+int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs);
int uverbs_dealloc_mw(struct ib_mw *mw);
+void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj);
+
+long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
struct ib_uverbs_flow_spec {
union {
@@ -226,69 +231,63 @@ struct ib_uverbs_flow_spec {
};
struct ib_uverbs_flow_spec_eth eth;
struct ib_uverbs_flow_spec_ipv4 ipv4;
+ struct ib_uverbs_flow_spec_esp esp;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
+ struct ib_uverbs_flow_spec_action_tag flow_tag;
+ struct ib_uverbs_flow_spec_action_drop drop;
+ struct ib_uverbs_flow_spec_action_handle action;
+ struct ib_uverbs_flow_spec_action_count flow_count;
};
};
-#define IB_UVERBS_DECLARE_CMD(name) \
- ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
- struct ib_device *ib_dev, \
- const char __user *buf, int in_len, \
- int out_len)
-
-IB_UVERBS_DECLARE_CMD(get_context);
-IB_UVERBS_DECLARE_CMD(query_device);
-IB_UVERBS_DECLARE_CMD(query_port);
-IB_UVERBS_DECLARE_CMD(alloc_pd);
-IB_UVERBS_DECLARE_CMD(dealloc_pd);
-IB_UVERBS_DECLARE_CMD(reg_mr);
-IB_UVERBS_DECLARE_CMD(rereg_mr);
-IB_UVERBS_DECLARE_CMD(dereg_mr);
-IB_UVERBS_DECLARE_CMD(alloc_mw);
-IB_UVERBS_DECLARE_CMD(dealloc_mw);
-IB_UVERBS_DECLARE_CMD(create_comp_channel);
-IB_UVERBS_DECLARE_CMD(create_cq);
-IB_UVERBS_DECLARE_CMD(resize_cq);
-IB_UVERBS_DECLARE_CMD(poll_cq);
-IB_UVERBS_DECLARE_CMD(req_notify_cq);
-IB_UVERBS_DECLARE_CMD(destroy_cq);
-IB_UVERBS_DECLARE_CMD(create_qp);
-IB_UVERBS_DECLARE_CMD(open_qp);
-IB_UVERBS_DECLARE_CMD(query_qp);
-IB_UVERBS_DECLARE_CMD(modify_qp);
-IB_UVERBS_DECLARE_CMD(destroy_qp);
-IB_UVERBS_DECLARE_CMD(post_send);
-IB_UVERBS_DECLARE_CMD(post_recv);
-IB_UVERBS_DECLARE_CMD(post_srq_recv);
-IB_UVERBS_DECLARE_CMD(create_ah);
-IB_UVERBS_DECLARE_CMD(destroy_ah);
-IB_UVERBS_DECLARE_CMD(attach_mcast);
-IB_UVERBS_DECLARE_CMD(detach_mcast);
-IB_UVERBS_DECLARE_CMD(create_srq);
-IB_UVERBS_DECLARE_CMD(modify_srq);
-IB_UVERBS_DECLARE_CMD(query_srq);
-IB_UVERBS_DECLARE_CMD(destroy_srq);
-IB_UVERBS_DECLARE_CMD(create_xsrq);
-IB_UVERBS_DECLARE_CMD(open_xrcd);
-IB_UVERBS_DECLARE_CMD(close_xrcd);
-
-#define IB_UVERBS_DECLARE_EX_CMD(name) \
- int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
- struct ib_device *ib_dev, \
- struct ib_udata *ucore, \
- struct ib_udata *uhw)
-
-IB_UVERBS_DECLARE_EX_CMD(create_flow);
-IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
-IB_UVERBS_DECLARE_EX_CMD(query_device);
-IB_UVERBS_DECLARE_EX_CMD(create_cq);
-IB_UVERBS_DECLARE_EX_CMD(create_qp);
-IB_UVERBS_DECLARE_EX_CMD(create_wq);
-IB_UVERBS_DECLARE_EX_CMD(modify_wq);
-IB_UVERBS_DECLARE_EX_CMD(destroy_wq);
-IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table);
-IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table);
-IB_UVERBS_DECLARE_EX_CMD(modify_qp);
+int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
+ const void *kern_spec_mask,
+ const void *kern_spec_val,
+ size_t kern_filter_sz,
+ union ib_flow_spec *ib_spec);
+
+/*
+ * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
+ * PortInfo CapabilityMask, but was extended with unique bits.
+ */
+static inline u32 make_port_cap_flags(const struct ib_port_attr *attr)
+{
+ u32 res;
+
+ /* All IBA CapabilityMask bits are passed through here, except bit 26,
+ * which is overridden with IP_BASED_GIDS. This is due to a historical
+ * mistake in the implementation of IP_BASED_GIDS. Otherwise all other
+ * bits match the IBA definition across all kernel versions.
+ */
+ res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS;
+
+ if (attr->ip_gids)
+ res |= IB_UVERBS_PCF_IP_BASED_GIDS;
+
+ return res;
+}
+
+static inline struct ib_uverbs_async_event_file *
+ib_uverbs_get_async_event(struct uverbs_attr_bundle *attrs,
+ u16 id)
+{
+ struct ib_uobject *async_ev_file_uobj;
+ struct ib_uverbs_async_event_file *async_ev_file;
+
+ async_ev_file_uobj = uverbs_attr_get_uobject(attrs, id);
+ if (IS_ERR(async_ev_file_uobj))
+ async_ev_file = READ_ONCE(attrs->ufile->default_async_file);
+ else
+ async_ev_file = container_of(async_ev_file_uobj,
+ struct ib_uverbs_async_event_file,
+ uobj);
+ if (async_ev_file)
+ uverbs_uobject_get(&async_ev_file->uobj);
+ return async_ev_file;
+}
+void copy_port_attr_to_resp(struct ib_port_attr *attr,
+ struct ib_uverbs_query_port_resp *resp,
+ struct ib_device *ib_dev, u8 port_num);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 700782203483..ce16404cdfb8 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -40,383 +40,312 @@
#include <linux/uaccess.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_std_types.h>
+#include <rdma/ib_ucaps.h>
+#include "rdma_core.h"
+
#include "uverbs.h"
#include "core_priv.h"
-struct uverbs_lock_class {
- struct lock_class_key key;
- char name[16];
-};
-
-static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" };
-static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" };
-static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" };
-static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" };
-static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
-static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
-static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
-static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
-static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
-static struct uverbs_lock_class wq_lock_class = { .name = "WQ-uobj" };
-static struct uverbs_lock_class rwq_ind_table_lock_class = { .name = "IND_TBL-uobj" };
-
/*
- * The ib_uobject locking scheme is as follows:
- *
- * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
- * needs to be held during all idr write operations. When an object is
- * looked up, a reference must be taken on the object's kref before
- * dropping this lock. For read operations, the rcu_read_lock()
- * and rcu_write_lock() but similarly the kref reference is grabbed
- * before the rcu_read_unlock().
+ * Copy a response to userspace. If the provided 'resp' is larger than the
+ * user buffer it is silently truncated. If the user provided a larger buffer
+ * then the trailing portion is zero filled.
*
- * - Each object also has an rwsem. This rwsem must be held for
- * reading while an operation that uses the object is performed.
- * For example, while registering an MR, the associated PD's
- * uobject.mutex must be held for reading. The rwsem must be held
- * for writing while initializing or destroying an object.
- *
- * - In addition, each object has a "live" flag. If this flag is not
- * set, then lookups of the object will fail even if it is found in
- * the idr. This handles a reader that blocks and does not acquire
- * the rwsem until after the object is destroyed. The destroy
- * operation will set the live flag to 0 and then drop the rwsem;
- * this will allow the reader to acquire the rwsem, see that the
- * live flag is 0, and then drop the rwsem and its reference to
- * object. The underlying storage will not be freed until the last
- * reference to the object is dropped.
+ * These semantics are intended to support future extension of the output
+ * structures.
*/
-
-static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
- struct ib_ucontext *context, struct uverbs_lock_class *c)
-{
- uobj->user_handle = user_handle;
- uobj->context = context;
- kref_init(&uobj->ref);
- init_rwsem(&uobj->mutex);
- lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name);
- uobj->live = 0;
-}
-
-static void release_uobj(struct kref *kref)
-{
- kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu);
-}
-
-static void put_uobj(struct ib_uobject *uobj)
-{
- kref_put(&uobj->ref, release_uobj);
-}
-
-static void put_uobj_read(struct ib_uobject *uobj)
-{
- up_read(&uobj->mutex);
- put_uobj(uobj);
-}
-
-static void put_uobj_write(struct ib_uobject *uobj)
-{
- up_write(&uobj->mutex);
- put_uobj(uobj);
-}
-
-static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)
+static int uverbs_response(struct uverbs_attr_bundle *attrs, const void *resp,
+ size_t resp_len)
{
int ret;
- idr_preload(GFP_KERNEL);
- spin_lock(&ib_uverbs_idr_lock);
-
- ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT);
- if (ret >= 0)
- uobj->id = ret;
-
- spin_unlock(&ib_uverbs_idr_lock);
- idr_preload_end();
-
- return ret < 0 ? ret : 0;
-}
-
-void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj)
-{
- spin_lock(&ib_uverbs_idr_lock);
- idr_remove(idr, uobj->id);
- spin_unlock(&ib_uverbs_idr_lock);
-}
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CORE_OUT))
+ return uverbs_copy_to_struct_or_zero(
+ attrs, UVERBS_ATTR_CORE_OUT, resp, resp_len);
-static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
- struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
+ if (copy_to_user(attrs->ucore.outbuf, resp,
+ min(attrs->ucore.outlen, resp_len)))
+ return -EFAULT;
- rcu_read_lock();
- uobj = idr_find(idr, id);
- if (uobj) {
- if (uobj->context == context)
- kref_get(&uobj->ref);
- else
- uobj = NULL;
+ if (resp_len < attrs->ucore.outlen) {
+ /*
+ * Zero fill any extra memory that user
+ * space might have provided.
+ */
+ ret = clear_user(attrs->ucore.outbuf + resp_len,
+ attrs->ucore.outlen - resp_len);
+ if (ret)
+ return -EFAULT;
}
- rcu_read_unlock();
- return uobj;
+ return 0;
}
-static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
- struct ib_ucontext *context, int nested)
+/*
+ * Copy a request from userspace. If the provided 'req' is larger than the
+ * user buffer then the user buffer is zero extended into the 'req'. If 'req'
+ * is smaller than the user buffer then the uncopied bytes in the user buffer
+ * must be zero.
+ */
+static int uverbs_request(struct uverbs_attr_bundle *attrs, void *req,
+ size_t req_len)
{
- struct ib_uobject *uobj;
-
- uobj = __idr_get_uobj(idr, id, context);
- if (!uobj)
- return NULL;
+ if (copy_from_user(req, attrs->ucore.inbuf,
+ min(attrs->ucore.inlen, req_len)))
+ return -EFAULT;
- if (nested)
- down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
- else
- down_read(&uobj->mutex);
- if (!uobj->live) {
- put_uobj_read(uobj);
- return NULL;
+ if (attrs->ucore.inlen < req_len) {
+ memset(req + attrs->ucore.inlen, 0,
+ req_len - attrs->ucore.inlen);
+ } else if (attrs->ucore.inlen > req_len) {
+ if (!ib_is_buffer_cleared(attrs->ucore.inbuf + req_len,
+ attrs->ucore.inlen - req_len))
+ return -EOPNOTSUPP;
}
-
- return uobj;
+ return 0;
}
-static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
- struct ib_ucontext *context)
+/*
+ * Generate the value for the 'response_length' protocol used by write_ex.
+ * This is the number of bytes the kernel actually wrote. Userspace can use
+ * this to detect what structure members in the response the kernel
+ * understood.
+ */
+static u32 uverbs_response_length(struct uverbs_attr_bundle *attrs,
+ size_t resp_len)
{
- struct ib_uobject *uobj;
-
- uobj = __idr_get_uobj(idr, id, context);
- if (!uobj)
- return NULL;
-
- down_write(&uobj->mutex);
- if (!uobj->live) {
- put_uobj_write(uobj);
- return NULL;
- }
-
- return uobj;
+ return min_t(size_t, attrs->ucore.outlen, resp_len);
}
-static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
- int nested)
-{
- struct ib_uobject *uobj;
-
- uobj = idr_read_uobj(idr, id, context, nested);
- return uobj ? uobj->object : NULL;
-}
+/*
+ * The iterator version of the request interface is for handlers that need to
+ * step over a flex array at the end of a command header.
+ */
+struct uverbs_req_iter {
+ const void __user *cur;
+ const void __user *end;
+};
-static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
+static int uverbs_request_start(struct uverbs_attr_bundle *attrs,
+ struct uverbs_req_iter *iter,
+ void *req,
+ size_t req_len)
{
- return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
-}
+ if (attrs->ucore.inlen < req_len)
+ return -ENOSPC;
-static void put_pd_read(struct ib_pd *pd)
-{
- put_uobj_read(pd->uobject);
-}
+ if (copy_from_user(req, attrs->ucore.inbuf, req_len))
+ return -EFAULT;
-static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
-{
- return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
+ iter->cur = attrs->ucore.inbuf + req_len;
+ iter->end = attrs->ucore.inbuf + attrs->ucore.inlen;
+ return 0;
}
-static void put_cq_read(struct ib_cq *cq)
+static int uverbs_request_next(struct uverbs_req_iter *iter, void *val,
+ size_t len)
{
- put_uobj_read(cq->uobject);
-}
+ if (iter->cur + len > iter->end)
+ return -ENOSPC;
-static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
-}
+ if (copy_from_user(val, iter->cur, len))
+ return -EFAULT;
-static void put_ah_read(struct ib_ah *ah)
-{
- put_uobj_read(ah->uobject);
+ iter->cur += len;
+ return 0;
}
-static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
+static const void __user *uverbs_request_next_ptr(struct uverbs_req_iter *iter,
+ size_t len)
{
- return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
-}
+ const void __user *res = iter->cur;
-static struct ib_wq *idr_read_wq(int wq_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_wq_idr, wq_handle, context, 0);
+ if (len > iter->end - iter->cur)
+ return (void __force __user *)ERR_PTR(-ENOSPC);
+ iter->cur += len;
+ return res;
}
-static void put_wq_read(struct ib_wq *wq)
+static int uverbs_request_finish(struct uverbs_req_iter *iter)
{
- put_uobj_read(wq->uobject);
+ if (!ib_is_buffer_cleared(iter->cur, iter->end - iter->cur))
+ return -EOPNOTSUPP;
+ return 0;
}
-static struct ib_rwq_ind_table *idr_read_rwq_indirection_table(int ind_table_handle,
- struct ib_ucontext *context)
+/*
+ * When calling a destroy function during an error unwind we need to pass in
+ * the udata that is sanitized of all user arguments. Ie from the driver
+ * perspective it looks like no udata was passed.
+ */
+struct ib_udata *uverbs_get_cleared_udata(struct uverbs_attr_bundle *attrs)
{
- return idr_read_obj(&ib_uverbs_rwq_ind_tbl_idr, ind_table_handle, context, 0);
+ attrs->driver_udata = (struct ib_udata){};
+ return &attrs->driver_udata;
}
-static void put_rwq_indirection_table_read(struct ib_rwq_ind_table *ind_table)
+static struct ib_uverbs_completion_event_file *
+_ib_uverbs_lookup_comp_file(s32 fd, struct uverbs_attr_bundle *attrs)
{
- put_uobj_read(ind_table->uobject);
-}
+ struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL,
+ fd, attrs);
-static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
-{
- struct ib_uobject *uobj;
+ if (IS_ERR(uobj))
+ return ERR_CAST(uobj);
- uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context);
- return uobj ? uobj->object : NULL;
-}
+ uverbs_uobject_get(uobj);
+ uobj_put_read(uobj);
-static void put_qp_read(struct ib_qp *qp)
-{
- put_uobj_read(qp->uobject);
+ return container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj);
}
+#define ib_uverbs_lookup_comp_file(_fd, _ufile) \
+ _ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile)
-static void put_qp_write(struct ib_qp *qp)
+int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs)
{
- put_uobj_write(qp->uobject);
-}
+ struct ib_uverbs_file *ufile = attrs->ufile;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
-static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
-{
- return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
-}
+ ib_dev = srcu_dereference(ufile->device->ib_dev,
+ &ufile->device->disassociate_srcu);
+ if (!ib_dev)
+ return -EIO;
-static void put_srq_read(struct ib_srq *srq)
-{
- put_uobj_read(srq->uobject);
-}
+ ucontext = rdma_zalloc_drv_obj(ib_dev, ib_ucontext);
+ if (!ucontext)
+ return -ENOMEM;
-static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
- struct ib_uobject **uobj)
-{
- *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
- return *uobj ? (*uobj)->object : NULL;
-}
+ ucontext->device = ib_dev;
+ ucontext->ufile = ufile;
+ xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC);
-static void put_xrcd_read(struct ib_uobject *uobj)
-{
- put_uobj_read(uobj);
+ rdma_restrack_new(&ucontext->res, RDMA_RESTRACK_CTX);
+ rdma_restrack_set_name(&ucontext->res, NULL);
+ attrs->context = ucontext;
+ return 0;
}
-ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf,
- int in_len, int out_len)
+int ib_init_ucontext(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_get_context cmd;
- struct ib_uverbs_get_context_resp resp;
- struct ib_udata udata;
- struct ib_ucontext *ucontext;
- struct file *filp;
+ struct ib_ucontext *ucontext = attrs->context;
+ struct ib_uverbs_file *file = attrs->ufile;
+ int *fd_array;
+ int fd_count;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- mutex_lock(&file->mutex);
-
+ if (!down_read_trylock(&file->hw_destroy_rwsem))
+ return -EIO;
+ mutex_lock(&file->ucontext_lock);
if (file->ucontext) {
ret = -EINVAL;
goto err;
}
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
-
- ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
- if (IS_ERR(ucontext)) {
- ret = PTR_ERR(ucontext);
+ ret = ib_rdmacg_try_charge(&ucontext->cg_obj, ucontext->device,
+ RDMACG_RESOURCE_HCA_HANDLE);
+ if (ret)
goto err;
- }
- ucontext->device = ib_dev;
- INIT_LIST_HEAD(&ucontext->pd_list);
- INIT_LIST_HEAD(&ucontext->mr_list);
- INIT_LIST_HEAD(&ucontext->mw_list);
- INIT_LIST_HEAD(&ucontext->cq_list);
- INIT_LIST_HEAD(&ucontext->qp_list);
- INIT_LIST_HEAD(&ucontext->srq_list);
- INIT_LIST_HEAD(&ucontext->ah_list);
- INIT_LIST_HEAD(&ucontext->wq_list);
- INIT_LIST_HEAD(&ucontext->rwq_ind_tbl_list);
- INIT_LIST_HEAD(&ucontext->xrcd_list);
- INIT_LIST_HEAD(&ucontext->rule_list);
- rcu_read_lock();
- ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
- rcu_read_unlock();
- ucontext->closing = 0;
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- ucontext->umem_tree = RB_ROOT;
- init_rwsem(&ucontext->umem_rwsem);
- ucontext->odp_mrs_count = 0;
- INIT_LIST_HEAD(&ucontext->no_private_counters);
-
- if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
- ucontext->invalidate_range = NULL;
-
-#endif
-
- resp.num_comp_vectors = file->device->num_comp_vectors;
-
- ret = get_unused_fd_flags(O_CLOEXEC);
- if (ret < 0)
- goto err_free;
- resp.async_fd = ret;
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_GET_CONTEXT_FD_ARR)) {
+ fd_count = uverbs_attr_ptr_get_array_size(attrs,
+ UVERBS_ATTR_GET_CONTEXT_FD_ARR,
+ sizeof(int));
+ if (fd_count < 0) {
+ ret = fd_count;
+ goto err_uncharge;
+ }
- filp = ib_uverbs_alloc_event_file(file, ib_dev, 1);
- if (IS_ERR(filp)) {
- ret = PTR_ERR(filp);
- goto err_fd;
+ fd_array = uverbs_attr_get_alloced_ptr(attrs,
+ UVERBS_ATTR_GET_CONTEXT_FD_ARR);
+ ret = ib_get_ucaps(fd_array, fd_count, &ucontext->enabled_caps);
+ if (ret)
+ goto err_uncharge;
}
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
- goto err_file;
- }
+ ret = ucontext->device->ops.alloc_ucontext(ucontext,
+ &attrs->driver_udata);
+ if (ret)
+ goto err_uncharge;
- file->ucontext = ucontext;
+ rdma_restrack_add(&ucontext->res);
- fd_install(resp.async_fd, filp);
+ /*
+ * Make sure that ib_uverbs_get_ucontext() sees the pointer update
+ * only after all writes to setup the ucontext have completed
+ */
+ smp_store_release(&file->ucontext, ucontext);
- mutex_unlock(&file->mutex);
+ mutex_unlock(&file->ucontext_lock);
+ up_read(&file->hw_destroy_rwsem);
+ return 0;
- return in_len;
+err_uncharge:
+ ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
+ RDMACG_RESOURCE_HCA_HANDLE);
+err:
+ mutex_unlock(&file->ucontext_lock);
+ up_read(&file->hw_destroy_rwsem);
+ return ret;
+}
-err_file:
- ib_uverbs_free_async_event_file(file);
- fput(filp);
+static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_get_context_resp resp;
+ struct ib_uverbs_get_context cmd;
+ struct ib_device *ib_dev;
+ struct ib_uobject *uobj;
+ int ret;
-err_fd:
- put_unused_fd(resp.async_fd);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
-err_free:
- put_pid(ucontext->tgid);
- ib_dev->dealloc_ucontext(ucontext);
+ ret = ib_alloc_ucontext(attrs);
+ if (ret)
+ return ret;
-err:
- mutex_unlock(&file->mutex);
+ uobj = uobj_alloc(UVERBS_OBJECT_ASYNC_EVENT, attrs, &ib_dev);
+ if (IS_ERR(uobj)) {
+ ret = PTR_ERR(uobj);
+ goto err_ucontext;
+ }
+
+ resp = (struct ib_uverbs_get_context_resp){
+ .num_comp_vectors = attrs->ufile->device->num_comp_vectors,
+ .async_fd = uobj->id,
+ };
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
+ goto err_uobj;
+
+ ret = ib_init_ucontext(attrs);
+ if (ret)
+ goto err_uobj;
+
+ ib_uverbs_init_async_event_file(
+ container_of(uobj, struct ib_uverbs_async_event_file, uobj));
+ rdma_alloc_commit_uobject(uobj, attrs);
+ return 0;
+
+err_uobj:
+ rdma_alloc_abort_uobject(uobj, attrs, false);
+err_ucontext:
+ rdma_restrack_put(&attrs->context->res);
+ kfree(attrs->context);
+ attrs->context = NULL;
return ret;
}
-static void copy_query_dev_fields(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
+static void copy_query_dev_fields(struct ib_ucontext *ucontext,
struct ib_uverbs_query_device_resp *resp,
struct ib_device_attr *attr)
{
+ struct ib_device *ib_dev = ucontext->device;
+
resp->fw_ver = attr->fw_ver;
resp->node_guid = ib_dev->node_guid;
resp->sys_image_guid = attr->sys_image_guid;
@@ -427,8 +356,8 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file,
resp->hw_ver = attr->hw_ver;
resp->max_qp = attr->max_qp;
resp->max_qp_wr = attr->max_qp_wr;
- resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
- resp->max_sge = attr->max_sge;
+ resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
+ resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge);
resp->max_sge_rd = attr->max_sge_rd;
resp->max_cq = attr->max_cq;
resp->max_cqe = attr->max_cqe;
@@ -449,210 +378,122 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file,
resp->max_mcast_qp_attach = attr->max_mcast_qp_attach;
resp->max_total_mcast_qp_attach = attr->max_total_mcast_qp_attach;
resp->max_ah = attr->max_ah;
- resp->max_fmr = attr->max_fmr;
- resp->max_map_per_fmr = attr->max_map_per_fmr;
resp->max_srq = attr->max_srq;
resp->max_srq_wr = attr->max_srq_wr;
resp->max_srq_sge = attr->max_srq_sge;
resp->max_pkeys = attr->max_pkeys;
resp->local_ca_ack_delay = attr->local_ca_ack_delay;
- resp->phys_port_cnt = ib_dev->phys_port_cnt;
+ resp->phys_port_cnt = min_t(u32, ib_dev->phys_port_cnt, U8_MAX);
}
-ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_query_device(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_device cmd;
struct ib_uverbs_query_device_resp resp;
+ struct ib_ucontext *ucontext;
+ int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&resp, 0, sizeof resp);
- copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs);
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- return -EFAULT;
+ copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs);
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_query_port(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_port cmd;
struct ib_uverbs_query_port_resp resp;
struct ib_port_attr attr;
int ret;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
ret = ib_query_port(ib_dev, cmd.port_num, &attr);
if (ret)
return ret;
memset(&resp, 0, sizeof resp);
+ copy_port_attr_to_resp(&attr, &resp, ib_dev, cmd.port_num);
- resp.state = attr.state;
- resp.max_mtu = attr.max_mtu;
- resp.active_mtu = attr.active_mtu;
- resp.gid_tbl_len = attr.gid_tbl_len;
- resp.port_cap_flags = attr.port_cap_flags;
- resp.max_msg_sz = attr.max_msg_sz;
- resp.bad_pkey_cntr = attr.bad_pkey_cntr;
- resp.qkey_viol_cntr = attr.qkey_viol_cntr;
- resp.pkey_tbl_len = attr.pkey_tbl_len;
- resp.lid = attr.lid;
- resp.sm_lid = attr.sm_lid;
- resp.lmc = attr.lmc;
- resp.max_vl_num = attr.max_vl_num;
- resp.sm_sl = attr.sm_sl;
- resp.subnet_timeout = attr.subnet_timeout;
- resp.init_type_reply = attr.init_type_reply;
- resp.active_width = attr.active_width;
- resp.active_speed = attr.active_speed;
- resp.phys_state = attr.phys_state;
- resp.link_layer = rdma_port_get_link_layer(ib_dev,
- cmd.port_num);
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_alloc_pd_resp resp = {};
struct ib_uverbs_alloc_pd cmd;
- struct ib_uverbs_alloc_pd_resp resp;
- struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_pd *pd;
int ret;
+ struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
-
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(UVERBS_OBJECT_PD, attrs, &ib_dev);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
- if (IS_ERR(pd)) {
- ret = PTR_ERR(pd);
+ pd = rdma_zalloc_drv_obj(ib_dev, ib_pd);
+ if (!pd) {
+ ret = -ENOMEM;
goto err;
}
pd->device = ib_dev;
pd->uobject = uobj;
- pd->__internal_mr = NULL;
atomic_set(&pd->usecnt, 0);
- uobj->object = pd;
- ret = idr_add_uobj(&ib_uverbs_pd_idr, uobj);
- if (ret)
- goto err_idr;
-
- memset(&resp, 0, sizeof resp);
- resp.pd_handle = uobj->id;
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
- goto err_copy;
- }
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->pd_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
+ rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
+ rdma_restrack_set_name(&pd->res, NULL);
- up_write(&uobj->mutex);
-
- return in_len;
+ ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata);
+ if (ret)
+ goto err_alloc;
+ rdma_restrack_add(&pd->res);
-err_copy:
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
+ uobj->object = pd;
+ uobj_finalize_uobj_create(uobj, attrs);
-err_idr:
- ib_dealloc_pd(pd);
+ resp.pd_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
+err_alloc:
+ rdma_restrack_put(&pd->res);
+ kfree(pd);
err:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj, attrs);
return ret;
}
-ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_dealloc_pd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_dealloc_pd cmd;
- struct ib_uobject *uobj;
- struct ib_pd *pd;
- int ret;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- uobj = idr_write_uobj(&ib_uverbs_pd_idr, cmd.pd_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- pd = uobj->object;
-
- if (atomic_read(&pd->usecnt)) {
- ret = -EBUSY;
- goto err_put;
- }
+ int ret;
- ret = pd->device->dealloc_pd(uobj->object);
- WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
- goto err_put;
-
- uobj->live = 0;
- put_uobj_write(uobj);
-
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ return ret;
-err_put:
- put_uobj_write(uobj);
- return ret;
+ return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
}
struct xrcd_table_entry {
@@ -740,43 +581,35 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev,
}
}
-ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_device *ibudev = attrs->ufile->device;
+ struct ib_uverbs_open_xrcd_resp resp = {};
struct ib_uverbs_open_xrcd cmd;
- struct ib_uverbs_open_xrcd_resp resp;
- struct ib_udata udata;
struct ib_uxrcd_object *obj;
struct ib_xrcd *xrcd = NULL;
- struct fd f = {NULL, 0};
struct inode *inode = NULL;
- int ret = 0;
int new_xrcd = 0;
+ struct ib_device *ib_dev;
+ struct fd f = EMPTY_FD;
+ int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- mutex_lock(&file->device->xrcd_tree_mutex);
+ mutex_lock(&ibudev->xrcd_tree_mutex);
if (cmd.fd != -1) {
/* search for file descriptor */
f = fdget(cmd.fd);
- if (!f.file) {
+ if (fd_empty(f)) {
ret = -EBADF;
goto err_tree_mutex_unlock;
}
- inode = file_inode(f.file);
- xrcd = find_xrcd(file->device, inode);
+ inode = file_inode(fd_file(f));
+ xrcd = find_xrcd(ibudev, inode);
if (!xrcd && !(cmd.oflags & O_CREAT)) {
/* no file descriptor. Need CREATE flag */
ret = -EAGAIN;
@@ -789,226 +622,127 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
}
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj) {
- ret = -ENOMEM;
+ obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, attrs,
+ &ib_dev);
+ if (IS_ERR(obj)) {
+ ret = PTR_ERR(obj);
goto err_tree_mutex_unlock;
}
- init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class);
-
- down_write(&obj->uobject.mutex);
-
if (!xrcd) {
- xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
+ xrcd = ib_alloc_xrcd_user(ib_dev, inode, &attrs->driver_udata);
if (IS_ERR(xrcd)) {
ret = PTR_ERR(xrcd);
goto err;
}
-
- xrcd->inode = inode;
- xrcd->device = ib_dev;
- atomic_set(&xrcd->usecnt, 0);
- mutex_init(&xrcd->tgt_qp_mutex);
- INIT_LIST_HEAD(&xrcd->tgt_qp_list);
new_xrcd = 1;
}
atomic_set(&obj->refcnt, 0);
obj->uobject.object = xrcd;
- ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
- if (ret)
- goto err_idr;
-
- memset(&resp, 0, sizeof resp);
- resp.xrcd_handle = obj->uobject.id;
if (inode) {
if (new_xrcd) {
/* create new inode/xrcd table entry */
- ret = xrcd_table_insert(file->device, inode, xrcd);
+ ret = xrcd_table_insert(ibudev, inode, xrcd);
if (ret)
- goto err_insert_xrcd;
+ goto err_dealloc_xrcd;
}
atomic_inc(&xrcd->usecnt);
}
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
- goto err_copy;
- }
-
- if (f.file)
- fdput(f);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
- mutex_unlock(&file->mutex);
-
- obj->uobject.live = 1;
- up_write(&obj->uobject.mutex);
+ fdput(f);
- mutex_unlock(&file->device->xrcd_tree_mutex);
- return in_len;
+ mutex_unlock(&ibudev->xrcd_tree_mutex);
+ uobj_finalize_uobj_create(&obj->uobject, attrs);
-err_copy:
- if (inode) {
- if (new_xrcd)
- xrcd_table_delete(file->device, inode);
- atomic_dec(&xrcd->usecnt);
- }
-
-err_insert_xrcd:
- idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
+ resp.xrcd_handle = obj->uobject.id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
-err_idr:
- ib_dealloc_xrcd(xrcd);
+err_dealloc_xrcd:
+ ib_dealloc_xrcd_user(xrcd, uverbs_get_cleared_udata(attrs));
err:
- put_uobj_write(&obj->uobject);
+ uobj_alloc_abort(&obj->uobject, attrs);
err_tree_mutex_unlock:
- if (f.file)
- fdput(f);
+ fdput(f);
- mutex_unlock(&file->device->xrcd_tree_mutex);
+ mutex_unlock(&ibudev->xrcd_tree_mutex);
return ret;
}
-ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_close_xrcd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_close_xrcd cmd;
- struct ib_uobject *uobj;
- struct ib_xrcd *xrcd = NULL;
- struct inode *inode = NULL;
- struct ib_uxrcd_object *obj;
- int live;
- int ret = 0;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- mutex_lock(&file->device->xrcd_tree_mutex);
- uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
- if (!uobj) {
- ret = -EINVAL;
- goto out;
- }
-
- xrcd = uobj->object;
- inode = xrcd->inode;
- obj = container_of(uobj, struct ib_uxrcd_object, uobject);
- if (atomic_read(&obj->refcnt)) {
- put_uobj_write(uobj);
- ret = -EBUSY;
- goto out;
- }
-
- if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
- ret = ib_dealloc_xrcd(uobj->object);
- if (!ret)
- uobj->live = 0;
- }
-
- live = uobj->live;
- if (inode && ret)
- atomic_inc(&xrcd->usecnt);
-
- put_uobj_write(uobj);
+ int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
- goto out;
-
- if (inode && !live)
- xrcd_table_delete(file->device, inode);
-
- idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
- ret = in_len;
+ return ret;
-out:
- mutex_unlock(&file->device->xrcd_tree_mutex);
- return ret;
+ return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, attrs);
}
-void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
- struct ib_xrcd *xrcd)
+int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
{
struct inode *inode;
+ int ret;
+ struct ib_uverbs_device *dev = attrs->ufile->device;
inode = xrcd->inode;
if (inode && !atomic_dec_and_test(&xrcd->usecnt))
- return;
+ return 0;
- ib_dealloc_xrcd(xrcd);
+ ret = ib_dealloc_xrcd_user(xrcd, &attrs->driver_udata);
+ if (ret) {
+ atomic_inc(&xrcd->usecnt);
+ return ret;
+ }
if (inode)
xrcd_table_delete(dev, inode);
+
+ return 0;
}
-ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_reg_mr_resp resp = {};
struct ib_uverbs_reg_mr cmd;
- struct ib_uverbs_reg_mr_resp resp;
- struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mr *mr;
int ret;
+ struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
-
- if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
- return -EINVAL;
-
- ret = ib_check_mr_access(cmd.access_flags);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
+ if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
+ return -EINVAL;
- init_uobj(uobj, 0, file->ucontext, &mr_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
- if (!pd) {
- ret = -EINVAL;
+ ret = ib_check_mr_access(ib_dev, cmd.access_flags);
+ if (ret)
goto err_free;
- }
- if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
- if (!(pd->device->attrs.device_cap_flags &
- IB_DEVICE_ON_DEMAND_PAGING)) {
- pr_debug("ODP support not available\n");
- ret = -EINVAL;
- goto err_put;
- }
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
+ goto err_free;
}
- mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
- cmd.access_flags, &udata);
+ mr = pd->device->ops.reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
+ cmd.access_flags, NULL,
+ &attrs->driver_udata);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto err_put;
@@ -1016,485 +750,360 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->device = pd->device;
mr->pd = pd;
+ mr->type = IB_MR_TYPE_USER;
+ mr->dm = NULL;
+ mr->sig_attrs = NULL;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
+ mr->iova = cmd.hca_va;
+ mr->length = cmd.length;
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+ rdma_restrack_add(&mr->res);
uobj->object = mr;
- ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
- if (ret)
- goto err_unreg;
+ uobj_put_obj_read(pd);
+ uobj_finalize_uobj_create(uobj, attrs);
- memset(&resp, 0, sizeof resp);
- resp.lkey = mr->lkey;
- resp.rkey = mr->rkey;
+ resp.lkey = mr->lkey;
+ resp.rkey = mr->rkey;
resp.mr_handle = uobj->id;
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
- goto err_copy;
- }
-
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->mr_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
-
- return in_len;
-
-err_copy:
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
-
-err_unreg:
- ib_dereg_mr(mr);
+ return uverbs_response(attrs, &resp, sizeof(resp));
err_put:
- put_pd_read(pd);
-
+ uobj_put_obj_read(pd);
err_free:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj, attrs);
return ret;
}
-ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_rereg_mr cmd;
struct ib_uverbs_rereg_mr_resp resp;
- struct ib_udata udata;
- struct ib_pd *pd = NULL;
struct ib_mr *mr;
- struct ib_pd *old_pd;
int ret;
struct ib_uobject *uobj;
+ struct ib_uobject *new_uobj;
+ struct ib_device *ib_dev;
+ struct ib_pd *orig_pd;
+ struct ib_pd *new_pd;
+ struct ib_mr *new_mr;
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
-
- INIT_UDATA(&udata, buf + sizeof(cmd),
- (unsigned long) cmd.response + sizeof(resp),
- in_len - sizeof(cmd), out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
+ if (!cmd.flags)
return -EINVAL;
- if ((cmd.flags & IB_MR_REREG_TRANS) &&
- (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
- (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
- return -EINVAL;
-
- uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
- file->ucontext);
+ if (cmd.flags & ~IB_MR_REREG_SUPPORTED)
+ return -EOPNOTSUPP;
- if (!uobj)
+ if ((cmd.flags & IB_MR_REREG_TRANS) &&
+ (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
+ uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
mr = uobj->object;
+ if (mr->dm) {
+ ret = -EINVAL;
+ goto put_uobjs;
+ }
+
if (cmd.flags & IB_MR_REREG_ACCESS) {
- ret = ib_check_mr_access(cmd.access_flags);
+ ret = ib_check_mr_access(mr->device, cmd.access_flags);
if (ret)
goto put_uobjs;
}
+ orig_pd = mr->pd;
if (cmd.flags & IB_MR_REREG_PD) {
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
- if (!pd) {
- ret = -EINVAL;
+ new_pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
+ attrs);
+ if (IS_ERR(new_pd)) {
+ ret = PTR_ERR(new_pd);
goto put_uobjs;
}
+ } else {
+ new_pd = mr->pd;
}
- old_pd = mr->pd;
- ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
- cmd.length, cmd.hca_va,
- cmd.access_flags, pd, &udata);
- if (!ret) {
+ /*
+ * The driver might create a new HW object as part of the rereg, we need
+ * to have a uobject ready to hold it.
+ */
+ new_uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
+ if (IS_ERR(new_uobj)) {
+ ret = PTR_ERR(new_uobj);
+ goto put_uobj_pd;
+ }
+
+ new_mr = ib_dev->ops.rereg_user_mr(mr, cmd.flags, cmd.start, cmd.length,
+ cmd.hca_va, cmd.access_flags, new_pd,
+ &attrs->driver_udata);
+ if (IS_ERR(new_mr)) {
+ ret = PTR_ERR(new_mr);
+ goto put_new_uobj;
+ }
+ if (new_mr) {
+ new_mr->device = new_pd->device;
+ new_mr->pd = new_pd;
+ new_mr->type = IB_MR_TYPE_USER;
+ new_mr->uobject = uobj;
+ atomic_inc(&new_pd->usecnt);
+ new_uobj->object = new_mr;
+
+ rdma_restrack_new(&new_mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&new_mr->res, NULL);
+ rdma_restrack_add(&new_mr->res);
+
+ /*
+ * The new uobj for the new HW object is put into the same spot
+ * in the IDR and the old uobj & HW object is deleted.
+ */
+ rdma_assign_uobject(uobj, new_uobj, attrs);
+ rdma_alloc_commit_uobject(new_uobj, attrs);
+ uobj_put_destroy(uobj);
+ new_uobj = NULL;
+ uobj = NULL;
+ mr = new_mr;
+ } else {
if (cmd.flags & IB_MR_REREG_PD) {
- atomic_inc(&pd->usecnt);
- mr->pd = pd;
- atomic_dec(&old_pd->usecnt);
+ atomic_dec(&orig_pd->usecnt);
+ mr->pd = new_pd;
+ atomic_inc(&new_pd->usecnt);
+ }
+ if (cmd.flags & IB_MR_REREG_TRANS) {
+ mr->iova = cmd.hca_va;
+ mr->length = cmd.length;
}
- } else {
- goto put_uobj_pd;
}
memset(&resp, 0, sizeof(resp));
resp.lkey = mr->lkey;
resp.rkey = mr->rkey;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &resp, sizeof(resp)))
- ret = -EFAULT;
- else
- ret = in_len;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+put_new_uobj:
+ if (new_uobj)
+ uobj_alloc_abort(new_uobj, attrs);
put_uobj_pd:
if (cmd.flags & IB_MR_REREG_PD)
- put_pd_read(pd);
+ uobj_put_obj_read(new_pd);
put_uobjs:
-
- put_uobj_write(mr->uobject);
+ if (uobj)
+ uobj_put_write(uobj);
return ret;
}
-ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_dereg_mr cmd;
- struct ib_mr *mr;
- struct ib_uobject *uobj;
- int ret = -EINVAL;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
-
- mr = uobj->object;
-
- ret = ib_dereg_mr(mr);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
}
-ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_alloc_mw cmd;
- struct ib_uverbs_alloc_mw_resp resp;
+ struct ib_uverbs_alloc_mw_resp resp = {};
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mw *mw;
- struct ib_udata udata;
int ret;
+ struct ib_device *ib_dev;
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
-
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- init_uobj(uobj, 0, file->ucontext, &mw_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(UVERBS_OBJECT_MW, attrs, &ib_dev);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
- if (!pd) {
- ret = -EINVAL;
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
goto err_free;
}
- INIT_UDATA(&udata, buf + sizeof(cmd),
- (unsigned long)cmd.response + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ if (cmd.mw_type != IB_MW_TYPE_1 && cmd.mw_type != IB_MW_TYPE_2) {
+ ret = -EINVAL;
+ goto err_put;
+ }
- mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
- if (IS_ERR(mw)) {
- ret = PTR_ERR(mw);
+ mw = rdma_zalloc_drv_obj(ib_dev, ib_mw);
+ if (!mw) {
+ ret = -ENOMEM;
goto err_put;
}
- mw->device = pd->device;
- mw->pd = pd;
+ mw->device = ib_dev;
+ mw->pd = pd;
mw->uobject = uobj;
- atomic_inc(&pd->usecnt);
+ mw->type = cmd.mw_type;
- uobj->object = mw;
- ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj);
+ ret = pd->device->ops.alloc_mw(mw, &attrs->driver_udata);
if (ret)
- goto err_unalloc;
-
- memset(&resp, 0, sizeof(resp));
- resp.rkey = mw->rkey;
- resp.mw_handle = uobj->id;
+ goto err_alloc;
- if (copy_to_user((void __user *)(unsigned long)cmd.response,
- &resp, sizeof(resp))) {
- ret = -EFAULT;
- goto err_copy;
- }
-
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->mw_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
-
- return in_len;
+ atomic_inc(&pd->usecnt);
-err_copy:
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
+ uobj->object = mw;
+ uobj_put_obj_read(pd);
+ uobj_finalize_uobj_create(uobj, attrs);
-err_unalloc:
- uverbs_dealloc_mw(mw);
+ resp.rkey = mw->rkey;
+ resp.mw_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
+err_alloc:
+ kfree(mw);
err_put:
- put_pd_read(pd);
-
+ uobj_put_obj_read(pd);
err_free:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj, attrs);
return ret;
}
-ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_dealloc_mw(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_dealloc_mw cmd;
- struct ib_mw *mw;
- struct ib_uobject *uobj;
- int ret = -EINVAL;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
-
- uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
-
- mw = uobj->object;
-
- ret = uverbs_dealloc_mw(mw);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, attrs);
}
-ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_comp_channel cmd;
struct ib_uverbs_create_comp_channel_resp resp;
- struct file *filp;
+ struct ib_uobject *uobj;
+ struct ib_uverbs_completion_event_file *ev_file;
+ struct ib_device *ib_dev;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ret = get_unused_fd_flags(O_CLOEXEC);
- if (ret < 0)
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
return ret;
- resp.fd = ret;
- filp = ib_uverbs_alloc_event_file(file, ib_dev, 0);
- if (IS_ERR(filp)) {
- put_unused_fd(resp.fd);
- return PTR_ERR(filp);
- }
+ uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, attrs, &ib_dev);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- put_unused_fd(resp.fd);
- fput(filp);
- return -EFAULT;
- }
+ ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj);
+ ib_uverbs_init_event_queue(&ev_file->ev_queue);
+ uobj_finalize_uobj_create(uobj, attrs);
- fd_install(resp.fd, filp);
- return in_len;
+ resp.fd = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw,
- struct ib_uverbs_ex_create_cq *cmd,
- size_t cmd_sz,
- int (*cb)(struct ib_uverbs_file *file,
- struct ib_ucq_object *obj,
- struct ib_uverbs_ex_create_cq_resp *resp,
- struct ib_udata *udata,
- void *context),
- void *context)
+static int create_cq(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_create_cq *cmd)
{
struct ib_ucq_object *obj;
- struct ib_uverbs_event_file *ev_file = NULL;
+ struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_cq *cq;
int ret;
- struct ib_uverbs_ex_create_cq_resp resp;
+ struct ib_uverbs_ex_create_cq_resp resp = {};
struct ib_cq_init_attr attr = {};
+ struct ib_device *ib_dev;
- if (cmd->comp_vector >= file->device->num_comp_vectors)
- return ERR_PTR(-EINVAL);
-
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return ERR_PTR(-ENOMEM);
+ if (cmd->comp_vector >= attrs->ufile->device->num_comp_vectors)
+ return -EINVAL;
- init_uobj(&obj->uobject, cmd->user_handle, file->ucontext, &cq_lock_class);
- down_write(&obj->uobject.mutex);
+ obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, attrs,
+ &ib_dev);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
if (cmd->comp_channel >= 0) {
- ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel);
- if (!ev_file) {
- ret = -EINVAL;
+ ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, attrs);
+ if (IS_ERR(ev_file)) {
+ ret = PTR_ERR(ev_file);
goto err;
}
}
- obj->uverbs_file = file;
- obj->comp_events_reported = 0;
- obj->async_events_reported = 0;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
INIT_LIST_HEAD(&obj->comp_list);
- INIT_LIST_HEAD(&obj->async_list);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
attr.cqe = cmd->cqe;
attr.comp_vector = cmd->comp_vector;
+ attr.flags = cmd->flags;
- if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
- attr.flags = cmd->flags;
-
- cq = ib_dev->create_cq(ib_dev, &attr,
- file->ucontext, uhw);
- if (IS_ERR(cq)) {
- ret = PTR_ERR(cq);
+ cq = rdma_zalloc_drv_obj(ib_dev, ib_cq);
+ if (!cq) {
+ ret = -ENOMEM;
goto err_file;
}
-
cq->device = ib_dev;
- cq->uobject = &obj->uobject;
+ cq->uobject = obj;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
- cq->cq_context = ev_file;
+ cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
atomic_set(&cq->usecnt, 0);
- obj->uobject.object = cq;
- ret = idr_add_uobj(&ib_uverbs_cq_idr, &obj->uobject);
- if (ret)
- goto err_free;
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, NULL);
- memset(&resp, 0, sizeof resp);
- resp.base.cq_handle = obj->uobject.id;
- resp.base.cqe = cq->cqe;
-
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
-
- ret = cb(file, obj, &resp, ucore, context);
+ ret = ib_dev->ops.create_cq(cq, &attr, attrs);
if (ret)
- goto err_cb;
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uobject.list, &file->ucontext->cq_list);
- mutex_unlock(&file->mutex);
-
- obj->uobject.live = 1;
-
- up_write(&obj->uobject.mutex);
+ goto err_free;
+ rdma_restrack_add(&cq->res);
- return obj;
+ obj->uevent.uobject.object = cq;
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
-err_cb:
- idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
+ resp.base.cq_handle = obj->uevent.uobject.id;
+ resp.base.cqe = cq->cqe;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ return uverbs_response(attrs, &resp, sizeof(resp));
err_free:
- ib_destroy_cq(cq);
-
+ rdma_restrack_put(&cq->res);
+ kfree(cq);
err_file:
if (ev_file)
- ib_uverbs_release_ucq(file, ev_file, obj);
-
+ ib_uverbs_release_ucq(ev_file, obj);
err:
- put_uobj_write(&obj->uobject);
-
- return ERR_PTR(ret);
-}
-
-static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file,
- struct ib_ucq_object *obj,
- struct ib_uverbs_ex_create_cq_resp *resp,
- struct ib_udata *ucore, void *context)
-{
- if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
- return -EFAULT;
-
- return 0;
+ uobj_alloc_abort(&obj->uevent.uobject, attrs);
+ return ret;
}
-ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_cq cmd;
struct ib_uverbs_ex_create_cq cmd_ex;
- struct ib_uverbs_create_cq_resp resp;
- struct ib_udata ucore;
- struct ib_udata uhw;
- struct ib_ucq_object *obj;
-
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
-
- INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp));
+ int ret;
- INIT_UDATA(&uhw, buf + sizeof(cmd),
- (unsigned long)cmd.response + sizeof(resp),
- in_len - sizeof(cmd), out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&cmd_ex, 0, sizeof(cmd_ex));
cmd_ex.user_handle = cmd.user_handle;
@@ -1502,44 +1111,17 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
cmd_ex.comp_vector = cmd.comp_vector;
cmd_ex.comp_channel = cmd.comp_channel;
- obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex,
- offsetof(typeof(cmd_ex), comp_channel) +
- sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
- NULL);
-
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- return in_len;
+ return create_cq(attrs, &cmd_ex);
}
-static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file,
- struct ib_ucq_object *obj,
- struct ib_uverbs_ex_create_cq_resp *resp,
- struct ib_udata *ucore, void *context)
+static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs)
{
- if (ib_copy_to_udata(ucore, resp, resp->response_length))
- return -EFAULT;
-
- return 0;
-}
-
-int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
-{
- struct ib_uverbs_ex_create_cq_resp resp;
struct ib_uverbs_ex_create_cq cmd;
- struct ib_ucq_object *obj;
- int err;
-
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
+ int ret;
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
- if (err)
- return err;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if (cmd.comp_mask)
return -EINVAL;
@@ -1547,59 +1129,40 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
if (cmd.reserved)
return -EINVAL;
- if (ucore->outlen < (offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length)))
- return -ENOSPC;
-
- obj = create_cq(file, ib_dev, ucore, uhw, &cmd,
- min(ucore->inlen, sizeof(cmd)),
- ib_uverbs_ex_create_cq_cb, NULL);
-
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- return 0;
+ return create_cq(attrs, &cmd);
}
-ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_resize_cq cmd;
- struct ib_uverbs_resize_cq_resp resp;
- struct ib_udata udata;
+ struct ib_uverbs_resize_cq_resp resp = {};
struct ib_cq *cq;
- int ret = -EINVAL;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ int ret;
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
- if (!cq)
- return -EINVAL;
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
+ if (IS_ERR(cq))
+ return PTR_ERR(cq);
- ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
+ ret = cq->device->ops.resize_cq(cq, cmd.cqe, &attrs->driver_udata);
if (ret)
goto out;
resp.cqe = cq->cqe;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp.cqe))
- ret = -EFAULT;
-
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
out:
- put_cq_read(cq);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
- return ret ? ret : in_len;
+ return ret;
}
-static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
+static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
+ struct ib_wc *wc)
{
struct ib_uverbs_wc tmp;
@@ -1608,12 +1171,15 @@ static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
tmp.opcode = wc->opcode;
tmp.vendor_err = wc->vendor_err;
tmp.byte_len = wc->byte_len;
- tmp.ex.imm_data = (__u32 __force) wc->ex.imm_data;
+ tmp.ex.imm_data = wc->ex.imm_data;
tmp.qp_num = wc->qp->qp_num;
tmp.src_qp = wc->src_qp;
tmp.wc_flags = wc->wc_flags;
tmp.pkey_index = wc->pkey_index;
- tmp.slid = wc->slid;
+ if (rdma_cap_opa_ah(ib_dev, wc->port_num))
+ tmp.slid = OPA_TO_IB_UCAST_LID(wc->slid);
+ else
+ tmp.slid = ib_lid_cpu16(wc->slid);
tmp.sl = wc->sl;
tmp.dlid_path_bits = wc->dlid_path_bits;
tmp.port_num = wc->port_num;
@@ -1625,10 +1191,7 @@ static int copy_wc_to_user(void __user *dest, struct ib_wc *wc)
return 0;
}
-ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_poll_cq cmd;
struct ib_uverbs_poll_cq_resp resp;
@@ -1638,15 +1201,16 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
struct ib_wc wc;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
- if (!cq)
- return -EINVAL;
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
+ if (IS_ERR(cq))
+ return PTR_ERR(cq);
/* we copy a struct ib_uverbs_poll_cq_resp to user space */
- header_ptr = (void __user *)(unsigned long) cmd.response;
+ header_ptr = attrs->ucore.outbuf;
data_ptr = header_ptr + sizeof resp;
memset(&resp, 0, sizeof resp);
@@ -1657,7 +1221,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
if (!ret)
break;
- ret = copy_wc_to_user(data_ptr, &wc);
+ ret = copy_wc_to_user(cq->device, data_ptr, &wc);
if (ret)
goto out_put;
@@ -1669,144 +1233,119 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
ret = -EFAULT;
goto out_put;
}
+ ret = 0;
- ret = in_len;
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CORE_OUT))
+ ret = uverbs_output_written(attrs, UVERBS_ATTR_CORE_OUT);
out_put:
- put_cq_read(cq);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
-ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_req_notify_cq cmd;
struct ib_cq *cq;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
- if (!cq)
- return -EINVAL;
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
+ if (IS_ERR(cq))
+ return PTR_ERR(cq);
ib_req_notify_cq(cq, cmd.solicited_only ?
IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
- put_cq_read(cq);
-
- return in_len;
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ return 0;
}
-ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_destroy_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_cq cmd;
struct ib_uverbs_destroy_cq_resp resp;
struct ib_uobject *uobj;
- struct ib_cq *cq;
struct ib_ucq_object *obj;
- struct ib_uverbs_event_file *ev_file;
- int ret = -EINVAL;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- uobj = idr_write_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- cq = uobj->object;
- ev_file = cq->cq_context;
- obj = container_of(cq->uobject, struct ib_ucq_object, uobject);
-
- ret = ib_destroy_cq(cq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- ib_uverbs_release_ucq(file, ev_file, obj);
-
- memset(&resp, 0, sizeof resp);
+ obj = container_of(uobj, struct ib_ucq_object, uevent.uobject);
+ memset(&resp, 0, sizeof(resp));
resp.comp_events_reported = obj->comp_events_reported;
- resp.async_events_reported = obj->async_events_reported;
-
- put_uobj(uobj);
+ resp.async_events_reported = obj->uevent.events_reported;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- return -EFAULT;
+ uobj_put_destroy(uobj);
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-static int create_qp(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw,
- struct ib_uverbs_ex_create_qp *cmd,
- size_t cmd_sz,
- int (*cb)(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_create_qp_resp *resp,
- struct ib_udata *udata),
- void *context)
+static int create_qp(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_create_qp *cmd)
{
struct ib_uqp_object *obj;
struct ib_device *device;
struct ib_pd *pd = NULL;
struct ib_xrcd *xrcd = NULL;
- struct ib_uobject *uninitialized_var(xrcd_uobj);
+ struct ib_uobject *xrcd_uobj = ERR_PTR(-ENOENT);
struct ib_cq *scq = NULL, *rcq = NULL;
struct ib_srq *srq = NULL;
struct ib_qp *qp;
- char *buf;
struct ib_qp_init_attr attr = {};
- struct ib_uverbs_ex_create_qp_resp resp;
+ struct ib_uverbs_ex_create_qp_resp resp = {};
int ret;
struct ib_rwq_ind_table *ind_tbl = NULL;
bool has_sq = true;
+ struct ib_device *ib_dev;
+
+ switch (cmd->qp_type) {
+ case IB_QPT_RAW_PACKET:
+ if (!rdma_uattrs_has_raw_cap(attrs))
+ return -EPERM;
+ fallthrough;
+ case IB_QPT_RC:
+ case IB_QPT_UC:
+ case IB_QPT_UD:
+ case IB_QPT_XRC_INI:
+ case IB_QPT_XRC_TGT:
+ case IB_QPT_DRIVER:
+ break;
+ default:
+ return -EINVAL;
+ }
- if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
- return -EPERM;
-
- obj = kzalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
-
- init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
- &qp_lock_class);
- down_write(&obj->uevent.uobject.mutex);
- if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
- sizeof(cmd->rwq_ind_tbl_handle) &&
- (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
- ind_tbl = idr_read_rwq_indirection_table(cmd->rwq_ind_tbl_handle,
- file->ucontext);
- if (!ind_tbl) {
- ret = -EINVAL;
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
+ &ib_dev);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+ obj->uxrcd = NULL;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
+ mutex_init(&obj->mcast_lock);
+
+ if (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE) {
+ ind_tbl = uobj_get_obj_read(rwq_ind_table,
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ cmd->rwq_ind_tbl_handle, attrs);
+ if (IS_ERR(ind_tbl)) {
+ ret = PTR_ERR(ind_tbl);
goto err_put;
}
attr.rwq_ind_tbl = ind_tbl;
}
- if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) +
- sizeof(cmd->reserved1)) && cmd->reserved1) {
- ret = -EOPNOTSUPP;
- goto err_put;
- }
-
if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) {
ret = -EINVAL;
goto err_put;
@@ -1816,8 +1355,15 @@ static int create_qp(struct ib_uverbs_file *file,
has_sq = false;
if (cmd->qp_type == IB_QPT_XRC_TGT) {
- xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
- &xrcd_uobj);
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
+ attrs);
+
+ if (IS_ERR(xrcd_uobj)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
if (!xrcd) {
ret = -EINVAL;
goto err_put;
@@ -1829,33 +1375,44 @@ static int create_qp(struct ib_uverbs_file *file,
cmd->max_recv_sge = 0;
} else {
if (cmd->is_srq) {
- srq = idr_read_srq(cmd->srq_handle,
- file->ucontext);
- if (!srq || srq->srq_type != IB_SRQT_BASIC) {
- ret = -EINVAL;
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ,
+ cmd->srq_handle, attrs);
+ if (IS_ERR(srq) ||
+ srq->srq_type == IB_SRQT_XRC) {
+ ret = IS_ERR(srq) ? PTR_ERR(srq) :
+ -EINVAL;
goto err_put;
}
}
if (!ind_tbl) {
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
- rcq = idr_read_cq(cmd->recv_cq_handle,
- file->ucontext, 0);
- if (!rcq) {
- ret = -EINVAL;
+ rcq = uobj_get_obj_read(
+ cq, UVERBS_OBJECT_CQ,
+ cmd->recv_cq_handle, attrs);
+ if (IS_ERR(rcq)) {
+ ret = PTR_ERR(rcq);
goto err_put;
}
}
}
}
- if (has_sq)
- scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
- if (!ind_tbl)
+ if (has_sq) {
+ scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
+ cmd->send_cq_handle, attrs);
+ if (IS_ERR(scq)) {
+ ret = PTR_ERR(scq);
+ goto err_put;
+ }
+ }
+
+ if (!ind_tbl && cmd->qp_type != IB_QPT_XRC_INI)
rcq = rcq ?: scq;
- pd = idr_read_pd(cmd->pd_handle, file->ucontext);
- if (!pd || (!scq && has_sq)) {
- ret = -EINVAL;
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
+ attrs);
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
goto err_put;
}
@@ -1863,7 +1420,6 @@ static int create_qp(struct ib_uverbs_file *file,
}
attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = file;
attr.send_cq = scq;
attr.recv_cq = rcq;
attr.srq = srq;
@@ -1871,7 +1427,6 @@ static int create_qp(struct ib_uverbs_file *file,
attr.sq_sig_type = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
IB_SIGNAL_REQ_WR;
attr.qp_type = cmd->qp_type;
- attr.create_flags = 0;
attr.cap.max_send_wr = cmd->max_send_wr;
attr.cap.max_recv_wr = cmd->max_recv_wr;
@@ -1879,171 +1434,106 @@ static int create_qp(struct ib_uverbs_file *file,
attr.cap.max_recv_sge = cmd->max_recv_sge;
attr.cap.max_inline_data = cmd->max_inline_data;
- obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
- if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
- sizeof(cmd->create_flags))
- attr.create_flags = cmd->create_flags;
-
+ attr.create_flags = cmd->create_flags;
if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
IB_QP_CREATE_CROSS_CHANNEL |
IB_QP_CREATE_MANAGED_SEND |
IB_QP_CREATE_MANAGED_RECV |
- IB_QP_CREATE_SCATTER_FCS)) {
+ IB_QP_CREATE_SCATTER_FCS |
+ IB_QP_CREATE_CVLAN_STRIPPING |
+ IB_QP_CREATE_SOURCE_QPN |
+ IB_QP_CREATE_PCI_WRITE_END_PADDING)) {
ret = -EINVAL;
goto err_put;
}
- buf = (void *)cmd + sizeof(*cmd);
- if (cmd_sz > sizeof(*cmd))
- if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
- cmd_sz - sizeof(*cmd) - 1))) {
- ret = -EINVAL;
+ if (attr.create_flags & IB_QP_CREATE_SOURCE_QPN) {
+ if (!rdma_uattrs_has_raw_cap(attrs)) {
+ ret = -EPERM;
goto err_put;
}
- if (cmd->qp_type == IB_QPT_XRC_TGT)
- qp = ib_create_qp(pd, &attr);
- else
- qp = device->create_qp(pd, &attr, uhw);
+ attr.source_qpn = cmd->source_qpn;
+ }
+ qp = ib_create_qp_user(device, pd, &attr, &attrs->driver_udata, obj,
+ KBUILD_MODNAME);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto err_put;
}
-
- if (cmd->qp_type != IB_QPT_XRC_TGT) {
- qp->real_qp = qp;
- qp->device = device;
- qp->pd = pd;
- qp->send_cq = attr.send_cq;
- qp->recv_cq = attr.recv_cq;
- qp->srq = attr.srq;
- qp->rwq_ind_tbl = ind_tbl;
- qp->event_handler = attr.event_handler;
- qp->qp_context = attr.qp_context;
- qp->qp_type = attr.qp_type;
- atomic_set(&qp->usecnt, 0);
- atomic_inc(&pd->usecnt);
- if (attr.send_cq)
- atomic_inc(&attr.send_cq->usecnt);
- if (attr.recv_cq)
- atomic_inc(&attr.recv_cq->usecnt);
- if (attr.srq)
- atomic_inc(&attr.srq->usecnt);
- if (ind_tbl)
- atomic_inc(&ind_tbl->usecnt);
- }
- qp->uobject = &obj->uevent.uobject;
+ ib_qp_usecnt_inc(qp);
obj->uevent.uobject.object = qp;
- ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
-
- memset(&resp, 0, sizeof resp);
- resp.base.qpn = qp->qp_num;
- resp.base.qp_handle = obj->uevent.uobject.id;
- resp.base.max_recv_sge = attr.cap.max_recv_sge;
- resp.base.max_send_sge = attr.cap.max_send_sge;
- resp.base.max_recv_wr = attr.cap.max_recv_wr;
- resp.base.max_send_wr = attr.cap.max_send_wr;
- resp.base.max_inline_data = attr.cap.max_inline_data;
-
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
-
- ret = cb(file, &resp, ucore);
- if (ret)
- goto err_cb;
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
if (xrcd) {
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
uobject);
atomic_inc(&obj->uxrcd->refcnt);
- put_xrcd_read(xrcd_uobj);
+ uobj_put_read(xrcd_uobj);
}
if (pd)
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
if (scq)
- put_cq_read(scq);
+ rdma_lookup_put_uobject(&scq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (rcq && rcq != scq)
- put_cq_read(rcq);
+ rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (srq)
- put_srq_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ind_tbl)
- put_rwq_indirection_table_read(ind_tbl);
+ uobj_put_obj_read(ind_tbl);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
-
- up_write(&obj->uevent.uobject.mutex);
-
- return 0;
-err_cb:
- idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
-
-err_destroy:
- ib_destroy_qp(qp);
+ resp.base.qpn = qp->qp_num;
+ resp.base.qp_handle = obj->uevent.uobject.id;
+ resp.base.max_recv_sge = attr.cap.max_recv_sge;
+ resp.base.max_send_sge = attr.cap.max_send_sge;
+ resp.base.max_recv_wr = attr.cap.max_recv_wr;
+ resp.base.max_send_wr = attr.cap.max_send_wr;
+ resp.base.max_inline_data = attr.cap.max_inline_data;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ return uverbs_response(attrs, &resp, sizeof(resp));
err_put:
- if (xrcd)
- put_xrcd_read(xrcd_uobj);
- if (pd)
- put_pd_read(pd);
- if (scq)
- put_cq_read(scq);
- if (rcq && rcq != scq)
- put_cq_read(rcq);
- if (srq)
- put_srq_read(srq);
- if (ind_tbl)
- put_rwq_indirection_table_read(ind_tbl);
-
- put_uobj_write(&obj->uevent.uobject);
+ if (!IS_ERR(xrcd_uobj))
+ uobj_put_read(xrcd_uobj);
+ if (!IS_ERR_OR_NULL(pd))
+ uobj_put_obj_read(pd);
+ if (!IS_ERR_OR_NULL(scq))
+ rdma_lookup_put_uobject(&scq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ if (!IS_ERR_OR_NULL(rcq) && rcq != scq)
+ rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ if (!IS_ERR_OR_NULL(srq))
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ if (!IS_ERR_OR_NULL(ind_tbl))
+ uobj_put_obj_read(ind_tbl);
+
+ uobj_alloc_abort(&obj->uevent.uobject, attrs);
return ret;
}
-static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_create_qp_resp *resp,
- struct ib_udata *ucore)
-{
- if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
- return -EFAULT;
-
- return 0;
-}
-
-ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_qp cmd;
struct ib_uverbs_ex_create_qp cmd_ex;
- struct ib_udata ucore;
- struct ib_udata uhw;
- ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
- int err;
-
- if (out_len < resp_size)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ int ret;
- INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd),
- resp_size);
- INIT_UDATA(&uhw, buf + sizeof(cmd),
- (unsigned long)cmd.response + resp_size,
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - resp_size);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&cmd_ex, 0, sizeof(cmd_ex));
cmd_ex.user_handle = cmd.user_handle;
@@ -2060,43 +1550,17 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
cmd_ex.qp_type = cmd.qp_type;
cmd_ex.is_srq = cmd.is_srq;
- err = create_qp(file, &ucore, &uhw, &cmd_ex,
- offsetof(typeof(cmd_ex), is_srq) +
- sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
- NULL);
-
- if (err)
- return err;
-
- return in_len;
+ return create_qp(attrs, &cmd_ex);
}
-static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_create_qp_resp *resp,
- struct ib_udata *ucore)
+static int ib_uverbs_ex_create_qp(struct uverbs_attr_bundle *attrs)
{
- if (ib_copy_to_udata(ucore, resp, resp->response_length))
- return -EFAULT;
-
- return 0;
-}
-
-int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
-{
- struct ib_uverbs_ex_create_qp_resp resp;
- struct ib_uverbs_ex_create_qp cmd = {0};
- int err;
-
- if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
- sizeof(cmd.comp_mask)))
- return -EINVAL;
+ struct ib_uverbs_ex_create_qp cmd;
+ int ret;
- err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
- if (err)
- return err;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK)
return -EINVAL;
@@ -2104,119 +1568,98 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
if (cmd.reserved)
return -EINVAL;
- if (ucore->outlen < (offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length)))
- return -ENOSPC;
-
- err = create_qp(file, ucore, uhw, &cmd,
- min(ucore->inlen, sizeof(cmd)),
- ib_uverbs_ex_create_qp_cb, NULL);
-
- if (err)
- return err;
-
- return 0;
+ return create_qp(attrs, &cmd);
}
-ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_create_qp_resp resp = {};
struct ib_uverbs_open_qp cmd;
- struct ib_uverbs_create_qp_resp resp;
- struct ib_udata udata;
struct ib_uqp_object *obj;
struct ib_xrcd *xrcd;
- struct ib_uobject *uninitialized_var(xrcd_uobj);
struct ib_qp *qp;
- struct ib_qp_open_attr attr;
+ struct ib_qp_open_attr attr = {};
int ret;
+ struct ib_uobject *xrcd_uobj;
+ struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
+ &ib_dev);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
- down_write(&obj->uevent.uobject.mutex);
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, attrs);
+ if (IS_ERR(xrcd_uobj)) {
+ ret = -EINVAL;
+ goto err_put;
+ }
- xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
if (!xrcd) {
ret = -EINVAL;
- goto err_put;
+ goto err_xrcd;
}
attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = file;
attr.qp_num = cmd.qpn;
attr.qp_type = cmd.qp_type;
- obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
qp = ib_open_qp(xrcd, &attr);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
- goto err_put;
+ goto err_xrcd;
}
- qp->uobject = &obj->uevent.uobject;
-
obj->uevent.uobject.object = qp;
- ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
-
- memset(&resp, 0, sizeof resp);
- resp.qpn = qp->qp_num;
- resp.qp_handle = obj->uevent.uobject.id;
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
- goto err_remove;
- }
+ obj->uevent.uobject.user_handle = cmd.user_handle;
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
- put_xrcd_read(xrcd_uobj);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
+ qp->uobject = obj;
+ uobj_put_read(xrcd_uobj);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
- up_write(&obj->uevent.uobject.mutex);
-
- return in_len;
-
-err_remove:
- idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
-
-err_destroy:
- ib_destroy_qp(qp);
+ resp.qpn = qp->qp_num;
+ resp.qp_handle = obj->uevent.uobject.id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
+err_xrcd:
+ uobj_put_read(xrcd_uobj);
err_put:
- put_xrcd_read(xrcd_uobj);
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject, attrs);
return ret;
}
-ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr,
+ struct rdma_ah_attr *rdma_attr)
+{
+ const struct ib_global_route *grh;
+
+ uverb_attr->dlid = rdma_ah_get_dlid(rdma_attr);
+ uverb_attr->sl = rdma_ah_get_sl(rdma_attr);
+ uverb_attr->src_path_bits = rdma_ah_get_path_bits(rdma_attr);
+ uverb_attr->static_rate = rdma_ah_get_static_rate(rdma_attr);
+ uverb_attr->is_global = !!(rdma_ah_get_ah_flags(rdma_attr) &
+ IB_AH_GRH);
+ if (uverb_attr->is_global) {
+ grh = rdma_ah_read_grh(rdma_attr);
+ memcpy(uverb_attr->dgid, grh->dgid.raw, 16);
+ uverb_attr->flow_label = grh->flow_label;
+ uverb_attr->sgid_index = grh->sgid_index;
+ uverb_attr->hop_limit = grh->hop_limit;
+ uverb_attr->traffic_class = grh->traffic_class;
+ }
+ uverb_attr->port_num = rdma_ah_get_port_num(rdma_attr);
+}
+
+static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_qp cmd;
struct ib_uverbs_query_qp_resp resp;
@@ -2225,8 +1668,9 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
struct ib_qp_init_attr *init_attr;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
@@ -2235,15 +1679,16 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
goto out;
}
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
- if (!qp) {
- ret = -EINVAL;
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
goto out;
}
ret = ib_query_qp(qp, attr, cmd.attr_mask, init_attr);
- put_qp_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ret)
goto out;
@@ -2272,29 +1717,8 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
resp.alt_port_num = attr->alt_port_num;
resp.alt_timeout = attr->alt_timeout;
- memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
- resp.dest.flow_label = attr->ah_attr.grh.flow_label;
- resp.dest.sgid_index = attr->ah_attr.grh.sgid_index;
- resp.dest.hop_limit = attr->ah_attr.grh.hop_limit;
- resp.dest.traffic_class = attr->ah_attr.grh.traffic_class;
- resp.dest.dlid = attr->ah_attr.dlid;
- resp.dest.sl = attr->ah_attr.sl;
- resp.dest.src_path_bits = attr->ah_attr.src_path_bits;
- resp.dest.static_rate = attr->ah_attr.static_rate;
- resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
- resp.dest.port_num = attr->ah_attr.port_num;
-
- memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
- resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
- resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
- resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
- resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
- resp.alt_dest.dlid = attr->alt_ah_attr.dlid;
- resp.alt_dest.sl = attr->alt_ah_attr.sl;
- resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
- resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate;
- resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
- resp.alt_dest.port_num = attr->alt_ah_attr.port_num;
+ copy_ah_attr_to_uverbs(&resp.dest, &attr->ah_attr);
+ copy_ah_attr_to_uverbs(&resp.alt_dest, &attr->alt_ah_attr);
resp.max_send_wr = init_attr->cap.max_send_wr;
resp.max_recv_wr = init_attr->cap.max_recv_wr;
@@ -2303,15 +1727,13 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
resp.max_inline_data = init_attr->cap.max_inline_data;
resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
out:
kfree(attr);
kfree(init_attr);
- return ret ? ret : in_len;
+ return ret;
}
/* Remove ignored fields set in the attribute mask */
@@ -2328,263 +1750,320 @@ static int modify_qp_mask(enum ib_qp_type qp_type, int mask)
}
}
-static int modify_qp(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata)
+static void copy_ah_attr_from_uverbs(struct ib_device *dev,
+ struct rdma_ah_attr *rdma_attr,
+ struct ib_uverbs_qp_dest *uverb_attr)
+{
+ rdma_attr->type = rdma_ah_find_type(dev, uverb_attr->port_num);
+ if (uverb_attr->is_global) {
+ rdma_ah_set_grh(rdma_attr, NULL,
+ uverb_attr->flow_label,
+ uverb_attr->sgid_index,
+ uverb_attr->hop_limit,
+ uverb_attr->traffic_class);
+ rdma_ah_set_dgid_raw(rdma_attr, uverb_attr->dgid);
+ } else {
+ rdma_ah_set_ah_flags(rdma_attr, 0);
+ }
+ rdma_ah_set_dlid(rdma_attr, uverb_attr->dlid);
+ rdma_ah_set_sl(rdma_attr, uverb_attr->sl);
+ rdma_ah_set_path_bits(rdma_attr, uverb_attr->src_path_bits);
+ rdma_ah_set_static_rate(rdma_attr, uverb_attr->static_rate);
+ rdma_ah_set_port_num(rdma_attr, uverb_attr->port_num);
+ rdma_ah_set_make_grd(rdma_attr, false);
+}
+
+static int modify_qp(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_modify_qp *cmd)
{
struct ib_qp_attr *attr;
struct ib_qp *qp;
int ret;
- attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
if (!attr)
return -ENOMEM;
- qp = idr_read_qp(cmd->base.qp_handle, file->ucontext);
- if (!qp) {
- ret = -EINVAL;
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle,
+ attrs);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
goto out;
}
- attr->qp_state = cmd->base.qp_state;
- attr->cur_qp_state = cmd->base.cur_qp_state;
- attr->path_mtu = cmd->base.path_mtu;
- attr->path_mig_state = cmd->base.path_mig_state;
- attr->qkey = cmd->base.qkey;
- attr->rq_psn = cmd->base.rq_psn;
- attr->sq_psn = cmd->base.sq_psn;
- attr->dest_qp_num = cmd->base.dest_qp_num;
- attr->qp_access_flags = cmd->base.qp_access_flags;
- attr->pkey_index = cmd->base.pkey_index;
- attr->alt_pkey_index = cmd->base.alt_pkey_index;
- attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify;
- attr->max_rd_atomic = cmd->base.max_rd_atomic;
- attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic;
- attr->min_rnr_timer = cmd->base.min_rnr_timer;
- attr->port_num = cmd->base.port_num;
- attr->timeout = cmd->base.timeout;
- attr->retry_cnt = cmd->base.retry_cnt;
- attr->rnr_retry = cmd->base.rnr_retry;
- attr->alt_port_num = cmd->base.alt_port_num;
- attr->alt_timeout = cmd->base.alt_timeout;
- attr->rate_limit = cmd->rate_limit;
-
- memcpy(attr->ah_attr.grh.dgid.raw, cmd->base.dest.dgid, 16);
- attr->ah_attr.grh.flow_label = cmd->base.dest.flow_label;
- attr->ah_attr.grh.sgid_index = cmd->base.dest.sgid_index;
- attr->ah_attr.grh.hop_limit = cmd->base.dest.hop_limit;
- attr->ah_attr.grh.traffic_class = cmd->base.dest.traffic_class;
- attr->ah_attr.dlid = cmd->base.dest.dlid;
- attr->ah_attr.sl = cmd->base.dest.sl;
- attr->ah_attr.src_path_bits = cmd->base.dest.src_path_bits;
- attr->ah_attr.static_rate = cmd->base.dest.static_rate;
- attr->ah_attr.ah_flags = cmd->base.dest.is_global ?
- IB_AH_GRH : 0;
- attr->ah_attr.port_num = cmd->base.dest.port_num;
-
- memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->base.alt_dest.dgid, 16);
- attr->alt_ah_attr.grh.flow_label = cmd->base.alt_dest.flow_label;
- attr->alt_ah_attr.grh.sgid_index = cmd->base.alt_dest.sgid_index;
- attr->alt_ah_attr.grh.hop_limit = cmd->base.alt_dest.hop_limit;
- attr->alt_ah_attr.grh.traffic_class = cmd->base.alt_dest.traffic_class;
- attr->alt_ah_attr.dlid = cmd->base.alt_dest.dlid;
- attr->alt_ah_attr.sl = cmd->base.alt_dest.sl;
- attr->alt_ah_attr.src_path_bits = cmd->base.alt_dest.src_path_bits;
- attr->alt_ah_attr.static_rate = cmd->base.alt_dest.static_rate;
- attr->alt_ah_attr.ah_flags = cmd->base.alt_dest.is_global ?
- IB_AH_GRH : 0;
- attr->alt_ah_attr.port_num = cmd->base.alt_dest.port_num;
-
- if (qp->real_qp == qp) {
- if (cmd->base.attr_mask & IB_QP_AV) {
- ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
- if (ret)
+ if ((cmd->base.attr_mask & IB_QP_PORT) &&
+ !rdma_is_port_valid(qp->device, cmd->base.port_num)) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+
+ if ((cmd->base.attr_mask & IB_QP_AV)) {
+ if (!rdma_is_port_valid(qp->device, cmd->base.dest.port_num)) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+
+ if (cmd->base.attr_mask & IB_QP_STATE &&
+ cmd->base.qp_state == IB_QPS_RTR) {
+ /* We are in INIT->RTR TRANSITION (if we are not,
+ * this transition will be rejected in subsequent checks).
+ * In the INIT->RTR transition, we cannot have IB_QP_PORT set,
+ * but the IB_QP_STATE flag is required.
+ *
+ * Since kernel 3.14 (commit dbf727de7440), the uverbs driver,
+ * when IB_QP_AV is set, has required inclusion of a valid
+ * port number in the primary AV. (AVs are created and handled
+ * differently for infiniband and ethernet (RoCE) ports).
+ *
+ * Check the port number included in the primary AV against
+ * the port number in the qp struct, which was set (and saved)
+ * in the RST->INIT transition.
+ */
+ if (cmd->base.dest.port_num != qp->real_qp->port) {
+ ret = -EINVAL;
goto release_qp;
+ }
+ } else {
+ /* We are in SQD->SQD. (If we are not, this transition will
+ * be rejected later in the verbs layer checks).
+ * Check for both IB_QP_PORT and IB_QP_AV, these can be set
+ * together in the SQD->SQD transition.
+ *
+ * If only IP_QP_AV was set, add in IB_QP_PORT as well (the
+ * verbs layer driver does not track primary port changes
+ * resulting from path migration. Thus, in SQD, if the primary
+ * AV is modified, the primary port should also be modified).
+ *
+ * Note that in this transition, the IB_QP_STATE flag
+ * is not allowed.
+ */
+ if (((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT))
+ == (IB_QP_AV | IB_QP_PORT)) &&
+ cmd->base.port_num != cmd->base.dest.port_num) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+ if ((cmd->base.attr_mask & (IB_QP_AV | IB_QP_PORT))
+ == IB_QP_AV) {
+ cmd->base.attr_mask |= IB_QP_PORT;
+ cmd->base.port_num = cmd->base.dest.port_num;
+ }
}
- ret = qp->device->modify_qp(qp, attr,
- modify_qp_mask(qp->qp_type,
- cmd->base.attr_mask),
- udata);
- } else {
- ret = ib_modify_qp(qp, attr,
- modify_qp_mask(qp->qp_type,
- cmd->base.attr_mask));
}
-release_qp:
- put_qp_read(qp);
+ if ((cmd->base.attr_mask & IB_QP_ALT_PATH) &&
+ (!rdma_is_port_valid(qp->device, cmd->base.alt_port_num) ||
+ !rdma_is_port_valid(qp->device, cmd->base.alt_dest.port_num) ||
+ cmd->base.alt_port_num != cmd->base.alt_dest.port_num)) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+ if ((cmd->base.attr_mask & IB_QP_CUR_STATE &&
+ cmd->base.cur_qp_state > IB_QPS_ERR) ||
+ (cmd->base.attr_mask & IB_QP_STATE &&
+ cmd->base.qp_state > IB_QPS_ERR)) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+
+ if (cmd->base.attr_mask & IB_QP_STATE)
+ attr->qp_state = cmd->base.qp_state;
+ if (cmd->base.attr_mask & IB_QP_CUR_STATE)
+ attr->cur_qp_state = cmd->base.cur_qp_state;
+ if (cmd->base.attr_mask & IB_QP_PATH_MTU)
+ attr->path_mtu = cmd->base.path_mtu;
+ if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE)
+ attr->path_mig_state = cmd->base.path_mig_state;
+ if (cmd->base.attr_mask & IB_QP_QKEY) {
+ if (cmd->base.qkey & IB_QP_SET_QKEY &&
+ !(rdma_nl_get_privileged_qkey() ||
+ rdma_uattrs_has_raw_cap(attrs))) {
+ ret = -EPERM;
+ goto release_qp;
+ }
+ attr->qkey = cmd->base.qkey;
+ }
+ if (cmd->base.attr_mask & IB_QP_RQ_PSN)
+ attr->rq_psn = cmd->base.rq_psn;
+ if (cmd->base.attr_mask & IB_QP_SQ_PSN)
+ attr->sq_psn = cmd->base.sq_psn;
+ if (cmd->base.attr_mask & IB_QP_DEST_QPN)
+ attr->dest_qp_num = cmd->base.dest_qp_num;
+ if (cmd->base.attr_mask & IB_QP_ACCESS_FLAGS)
+ attr->qp_access_flags = cmd->base.qp_access_flags;
+ if (cmd->base.attr_mask & IB_QP_PKEY_INDEX)
+ attr->pkey_index = cmd->base.pkey_index;
+ if (cmd->base.attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
+ attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify;
+ if (cmd->base.attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
+ attr->max_rd_atomic = cmd->base.max_rd_atomic;
+ if (cmd->base.attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic;
+ if (cmd->base.attr_mask & IB_QP_MIN_RNR_TIMER)
+ attr->min_rnr_timer = cmd->base.min_rnr_timer;
+ if (cmd->base.attr_mask & IB_QP_PORT)
+ attr->port_num = cmd->base.port_num;
+ if (cmd->base.attr_mask & IB_QP_TIMEOUT)
+ attr->timeout = cmd->base.timeout;
+ if (cmd->base.attr_mask & IB_QP_RETRY_CNT)
+ attr->retry_cnt = cmd->base.retry_cnt;
+ if (cmd->base.attr_mask & IB_QP_RNR_RETRY)
+ attr->rnr_retry = cmd->base.rnr_retry;
+ if (cmd->base.attr_mask & IB_QP_ALT_PATH) {
+ attr->alt_port_num = cmd->base.alt_port_num;
+ attr->alt_timeout = cmd->base.alt_timeout;
+ attr->alt_pkey_index = cmd->base.alt_pkey_index;
+ }
+ if (cmd->base.attr_mask & IB_QP_RATE_LIMIT)
+ attr->rate_limit = cmd->rate_limit;
+
+ if (cmd->base.attr_mask & IB_QP_AV)
+ copy_ah_attr_from_uverbs(qp->device, &attr->ah_attr,
+ &cmd->base.dest);
+
+ if (cmd->base.attr_mask & IB_QP_ALT_PATH)
+ copy_ah_attr_from_uverbs(qp->device, &attr->alt_ah_attr,
+ &cmd->base.alt_dest);
+
+ ret = ib_modify_qp_with_udata(qp, attr,
+ modify_qp_mask(qp->qp_type,
+ cmd->base.attr_mask),
+ &attrs->driver_udata);
+
+release_qp:
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
out:
kfree(attr);
return ret;
}
-ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_qp cmd = {};
- struct ib_udata udata;
+ struct ib_uverbs_ex_modify_qp cmd;
int ret;
- if (copy_from_user(&cmd.base, buf, sizeof(cmd.base)))
- return -EFAULT;
-
- if (cmd.base.attr_mask &
- ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1))
- return -EOPNOTSUPP;
-
- INIT_UDATA(&udata, buf + sizeof(cmd.base), NULL,
- in_len - sizeof(cmd.base), out_len);
-
- ret = modify_qp(file, &cmd, &udata);
+ ret = uverbs_request(attrs, &cmd.base, sizeof(cmd.base));
if (ret)
return ret;
- return in_len;
+ if (cmd.base.attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
+ return modify_qp(attrs, &cmd);
}
-int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_qp cmd = {};
+ struct ib_uverbs_ex_modify_qp cmd;
+ struct ib_uverbs_ex_modify_qp_resp resp = {
+ .response_length = uverbs_response_length(attrs, sizeof(resp))
+ };
int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
+
/*
* Last bit is reserved for extending the attr_mask by
* using another field.
*/
- BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31));
-
- if (ucore->inlen < sizeof(cmd.base))
- return -EINVAL;
+ if (cmd.base.attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT))
+ return -EOPNOTSUPP;
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = modify_qp(attrs, &cmd);
if (ret)
return ret;
- if (cmd.base.attr_mask &
- ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
- return -EOPNOTSUPP;
-
- if (ucore->inlen > sizeof(cmd)) {
- if (ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
- }
-
- ret = modify_qp(file, &cmd, uhw);
-
- return ret;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_destroy_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_qp cmd;
struct ib_uverbs_destroy_qp_resp resp;
struct ib_uobject *uobj;
- struct ib_qp *qp;
struct ib_uqp_object *obj;
- int ret = -EINVAL;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- memset(&resp, 0, sizeof resp);
-
- uobj = idr_write_uobj(&ib_uverbs_qp_idr, cmd.qp_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- qp = uobj->object;
- obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
-
- if (!list_empty(&obj->mcast_list)) {
- put_uobj_write(uobj);
- return -EBUSY;
- }
-
- ret = ib_destroy_qp(qp);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- if (obj->uxrcd)
- atomic_dec(&obj->uxrcd->refcnt);
-
- idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_uevent(file, &obj->uevent);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+ obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
+ memset(&resp, 0, sizeof(resp));
resp.events_reported = obj->uevent.events_reported;
- put_uobj(uobj);
+ uobj_put_destroy(uobj);
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
static void *alloc_wr(size_t wr_size, __u32 num_sge)
{
- return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
- num_sge * sizeof (struct ib_sge), GFP_KERNEL);
-};
+ if (num_sge >= (U32_MAX - ALIGN(wr_size, sizeof(struct ib_sge))) /
+ sizeof(struct ib_sge))
+ return NULL;
+
+ return kmalloc(ALIGN(wr_size, sizeof(struct ib_sge)) +
+ num_sge * sizeof(struct ib_sge),
+ GFP_KERNEL);
+}
-ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_post_send cmd;
struct ib_uverbs_post_send_resp resp;
struct ib_uverbs_send_wr *user_wr;
- struct ib_send_wr *wr = NULL, *last, *next, *bad_wr;
+ struct ib_send_wr *wr = NULL, *last, *next;
+ const struct ib_send_wr *bad_wr;
struct ib_qp *qp;
int i, sg_ind;
int is_ud;
- ssize_t ret = -EINVAL;
+ int ret, ret2;
size_t next_size;
+ const struct ib_sge __user *sgls;
+ const void __user *wqes;
+ struct uverbs_req_iter iter;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
- cmd.sge_count * sizeof (struct ib_uverbs_sge))
- return -EINVAL;
-
- if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
- return -EINVAL;
+ ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
+ wqes = uverbs_request_next_ptr(&iter, size_mul(cmd.wqe_size,
+ cmd.wr_count));
+ if (IS_ERR(wqes))
+ return PTR_ERR(wqes);
+ sgls = uverbs_request_next_ptr(&iter,
+ size_mul(cmd.sge_count,
+ sizeof(struct ib_uverbs_sge)));
+ if (IS_ERR(sgls))
+ return PTR_ERR(sgls);
+ ret = uverbs_request_finish(&iter);
+ if (ret)
+ return ret;
user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
if (!user_wr)
return -ENOMEM;
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
- if (!qp)
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
goto out;
+ }
is_ud = qp->qp_type == IB_QPT_UD;
sg_ind = 0;
last = NULL;
for (i = 0; i < cmd.wr_count; ++i) {
- if (copy_from_user(user_wr,
- buf + sizeof cmd + i * cmd.wqe_size,
+ if (copy_from_user(user_wr, wqes + i * cmd.wqe_size,
cmd.wqe_size)) {
ret = -EFAULT;
goto out_put;
@@ -2611,10 +2090,11 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put;
}
- ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
- if (!ud->ah) {
+ ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH,
+ user_wr->wr.ud.ah, attrs);
+ if (IS_ERR(ud->ah)) {
+ ret = PTR_ERR(ud->ah);
kfree(ud);
- ret = -EINVAL;
goto out_put;
}
ud->remote_qpn = user_wr->wr.ud.remote_qpn;
@@ -2691,11 +2171,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (next->num_sge) {
next->sg_list = (void *) next +
ALIGN(next_size, sizeof(struct ib_sge));
- if (copy_from_user(next->sg_list,
- buf + sizeof cmd +
- cmd.wr_count * cmd.wqe_size +
- sg_ind * sizeof (struct ib_sge),
- next->num_sge * sizeof (struct ib_sge))) {
+ if (copy_from_user(next->sg_list, sgls + sg_ind,
+ next->num_sge *
+ sizeof(struct ib_sge))) {
ret = -EFAULT;
goto out_put;
}
@@ -2705,7 +2183,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
}
resp.bad_wr = 0;
- ret = qp->device->post_send(qp->real_qp, wr, &bad_wr);
+ ret = qp->device->ops.post_send(qp->real_qp, wr, &bad_wr);
if (ret)
for (next = wr; next; next = next->next) {
++resp.bad_wr;
@@ -2713,16 +2191,17 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
break;
}
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
+ ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret2)
+ ret = ret2;
out_put:
- put_qp_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
while (wr) {
if (is_ud && ud_wr(wr)->ah)
- put_ah_read(ud_wr(wr)->ah);
+ uobj_put_obj_read(ud_wr(wr)->ah);
next = wr->next;
kfree(wr);
wr = next;
@@ -2731,27 +2210,34 @@ out_put:
out:
kfree(user_wr);
- return ret ? ret : in_len;
+ return ret;
}
-static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
- int in_len,
- u32 wr_count,
- u32 sge_count,
- u32 wqe_size)
+static struct ib_recv_wr *
+ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count,
+ u32 wqe_size, u32 sge_count)
{
struct ib_uverbs_recv_wr *user_wr;
struct ib_recv_wr *wr = NULL, *last, *next;
int sg_ind;
int i;
int ret;
+ const struct ib_sge __user *sgls;
+ const void __user *wqes;
- if (in_len < wqe_size * wr_count +
- sge_count * sizeof (struct ib_uverbs_sge))
+ if (wqe_size < sizeof(struct ib_uverbs_recv_wr))
return ERR_PTR(-EINVAL);
- if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
- return ERR_PTR(-EINVAL);
+ wqes = uverbs_request_next_ptr(iter, size_mul(wqe_size, wr_count));
+ if (IS_ERR(wqes))
+ return ERR_CAST(wqes);
+ sgls = uverbs_request_next_ptr(iter, size_mul(sge_count,
+ sizeof(struct ib_uverbs_sge)));
+ if (IS_ERR(sgls))
+ return ERR_CAST(sgls);
+ ret = uverbs_request_finish(iter);
+ if (ret)
+ return ERR_PTR(ret);
user_wr = kmalloc(wqe_size, GFP_KERNEL);
if (!user_wr)
@@ -2760,7 +2246,7 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
sg_ind = 0;
last = NULL;
for (i = 0; i < wr_count; ++i) {
- if (copy_from_user(user_wr, buf + i * wqe_size,
+ if (copy_from_user(user_wr, wqes + i * wqe_size,
wqe_size)) {
ret = -EFAULT;
goto err;
@@ -2771,8 +2257,15 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
goto err;
}
- next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
- user_wr->num_sge * sizeof (struct ib_sge),
+ if (user_wr->num_sge >=
+ (U32_MAX - ALIGN(sizeof(*next), sizeof(struct ib_sge))) /
+ sizeof(struct ib_sge)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ next = kmalloc(ALIGN(sizeof(*next), sizeof(struct ib_sge)) +
+ user_wr->num_sge * sizeof(struct ib_sge),
GFP_KERNEL);
if (!next) {
ret = -ENOMEM;
@@ -2790,12 +2283,11 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
next->num_sge = user_wr->num_sge;
if (next->num_sge) {
- next->sg_list = (void *) next +
- ALIGN(sizeof *next, sizeof (struct ib_sge));
- if (copy_from_user(next->sg_list,
- buf + wr_count * wqe_size +
- sg_ind * sizeof (struct ib_sge),
- next->num_sge * sizeof (struct ib_sge))) {
+ next->sg_list = (void *)next +
+ ALIGN(sizeof(*next), sizeof(struct ib_sge));
+ if (copy_from_user(next->sg_list, sgls + sg_ind,
+ next->num_sge *
+ sizeof(struct ib_sge))) {
ret = -EFAULT;
goto err;
}
@@ -2819,46 +2311,47 @@ err:
return ERR_PTR(ret);
}
-ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_post_recv cmd;
struct ib_uverbs_post_recv_resp resp;
- struct ib_recv_wr *wr, *next, *bad_wr;
+ struct ib_recv_wr *wr, *next;
+ const struct ib_recv_wr *bad_wr;
struct ib_qp *qp;
- ssize_t ret = -EINVAL;
+ int ret, ret2;
+ struct uverbs_req_iter iter;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
- in_len - sizeof cmd, cmd.wr_count,
- cmd.sge_count, cmd.wqe_size);
+ wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size,
+ cmd.sge_count);
if (IS_ERR(wr))
return PTR_ERR(wr);
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
- if (!qp)
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
goto out;
+ }
resp.bad_wr = 0;
- ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
+ ret = qp->device->ops.post_recv(qp->real_qp, wr, &bad_wr);
- put_qp_read(qp);
-
- if (ret)
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ if (ret) {
for (next = wr; next; next = next->next) {
++resp.bad_wr;
if (next == bad_wr)
break;
}
+ }
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
-
+ ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret2)
+ ret = ret2;
out:
while (wr) {
next = wr->next;
@@ -2866,37 +2359,39 @@ out:
wr = next;
}
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_post_srq_recv cmd;
struct ib_uverbs_post_srq_recv_resp resp;
- struct ib_recv_wr *wr, *next, *bad_wr;
+ struct ib_recv_wr *wr, *next;
+ const struct ib_recv_wr *bad_wr;
struct ib_srq *srq;
- ssize_t ret = -EINVAL;
+ int ret, ret2;
+ struct uverbs_req_iter iter;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
- in_len - sizeof cmd, cmd.wr_count,
- cmd.sge_count, cmd.wqe_size);
+ wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size,
+ cmd.sge_count);
if (IS_ERR(wr))
return PTR_ERR(wr);
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
- if (!srq)
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
+ if (IS_ERR(srq)) {
+ ret = PTR_ERR(srq);
goto out;
+ }
resp.bad_wr = 0;
- ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
+ ret = srq->device->ops.post_srq_recv(srq, wr, &bad_wr);
- put_srq_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ret)
for (next = wr; next; next = next->next) {
@@ -2905,9 +2400,9 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
break;
}
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
+ ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret2)
+ ret = ret2;
out:
while (wr) {
@@ -2916,151 +2411,92 @@ out:
wr = next;
}
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_ah cmd;
struct ib_uverbs_create_ah_resp resp;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_ah *ah;
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr = {};
int ret;
- struct ib_udata udata;
-
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ struct ib_device *ib_dev;
- INIT_UDATA(&udata, buf + sizeof(cmd),
- (unsigned long)cmd.response + sizeof(resp),
- in_len - sizeof(cmd), out_len - sizeof(resp));
-
- uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
- if (!uobj)
- return -ENOMEM;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);
- down_write(&uobj->mutex);
+ uobj = uobj_alloc(UVERBS_OBJECT_AH, attrs, &ib_dev);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
- if (!pd) {
+ if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) {
ret = -EINVAL;
goto err;
}
- attr.dlid = cmd.attr.dlid;
- attr.sl = cmd.attr.sl;
- attr.src_path_bits = cmd.attr.src_path_bits;
- attr.static_rate = cmd.attr.static_rate;
- attr.ah_flags = cmd.attr.is_global ? IB_AH_GRH : 0;
- attr.port_num = cmd.attr.port_num;
- attr.grh.flow_label = cmd.attr.grh.flow_label;
- attr.grh.sgid_index = cmd.attr.grh.sgid_index;
- attr.grh.hop_limit = cmd.attr.grh.hop_limit;
- attr.grh.traffic_class = cmd.attr.grh.traffic_class;
- memset(&attr.dmac, 0, sizeof(attr.dmac));
- memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
-
- ah = pd->device->create_ah(pd, &attr, &udata);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
+ goto err;
+ }
+
+ attr.type = rdma_ah_find_type(ib_dev, cmd.attr.port_num);
+ rdma_ah_set_make_grd(&attr, false);
+ rdma_ah_set_dlid(&attr, cmd.attr.dlid);
+ rdma_ah_set_sl(&attr, cmd.attr.sl);
+ rdma_ah_set_path_bits(&attr, cmd.attr.src_path_bits);
+ rdma_ah_set_static_rate(&attr, cmd.attr.static_rate);
+ rdma_ah_set_port_num(&attr, cmd.attr.port_num);
+
+ if (cmd.attr.is_global) {
+ rdma_ah_set_grh(&attr, NULL, cmd.attr.grh.flow_label,
+ cmd.attr.grh.sgid_index,
+ cmd.attr.grh.hop_limit,
+ cmd.attr.grh.traffic_class);
+ rdma_ah_set_dgid_raw(&attr, cmd.attr.grh.dgid);
+ } else {
+ rdma_ah_set_ah_flags(&attr, 0);
+ }
+ ah = rdma_create_user_ah(pd, &attr, &attrs->driver_udata);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_put;
}
- ah->device = pd->device;
- ah->pd = pd;
- atomic_inc(&pd->usecnt);
ah->uobject = uobj;
+ uobj->user_handle = cmd.user_handle;
uobj->object = ah;
-
- ret = idr_add_uobj(&ib_uverbs_ah_idr, uobj);
- if (ret)
- goto err_destroy;
+ uobj_put_obj_read(pd);
+ uobj_finalize_uobj_create(uobj, attrs);
resp.ah_handle = uobj->id;
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
- goto err_copy;
- }
-
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->ah_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
-
- return in_len;
-
-err_copy:
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
-
-err_destroy:
- ib_destroy_ah(ah);
+ return uverbs_response(attrs, &resp, sizeof(resp));
err_put:
- put_pd_read(pd);
-
+ uobj_put_obj_read(pd);
err:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj, attrs);
return ret;
}
-ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_destroy_ah(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_ah cmd;
- struct ib_ah *ah;
- struct ib_uobject *uobj;
- int ret;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- uobj = idr_write_uobj(&ib_uverbs_ah_idr, cmd.ah_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- ah = uobj->object;
-
- ret = ib_destroy_ah(ah);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, attrs);
}
-ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_attach_mcast cmd;
struct ib_qp *qp;
@@ -3068,15 +2504,17 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
struct ib_uverbs_mcast_entry *mcast;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- qp = idr_write_qp(cmd.qp_handle, file->ucontext);
- if (!qp)
- return -EINVAL;
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
+ if (IS_ERR(qp))
+ return PTR_ERR(qp);
- obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ obj = qp->uobject;
+ mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
if (cmd.mlid == mcast->lid &&
!memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
@@ -3100,56 +2538,196 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
kfree(mcast);
out_put:
- put_qp_write(qp);
+ mutex_unlock(&obj->mcast_lock);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_detach_mcast cmd;
struct ib_uqp_object *obj;
struct ib_qp *qp;
struct ib_uverbs_mcast_entry *mcast;
- int ret = -EINVAL;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- qp = idr_write_qp(cmd.qp_handle, file->ucontext);
- if (!qp)
- return -EINVAL;
+ int ret;
+ bool found = false;
- ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
- goto out_put;
+ return ret;
+
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
+ if (IS_ERR(qp))
+ return PTR_ERR(qp);
- obj = container_of(qp->uobject, struct ib_uqp_object, uevent.uobject);
+ obj = qp->uobject;
+ mutex_lock(&obj->mcast_lock);
list_for_each_entry(mcast, &obj->mcast_list, list)
if (cmd.mlid == mcast->lid &&
!memcmp(cmd.gid, mcast->gid.raw, sizeof mcast->gid.raw)) {
list_del(&mcast->list);
kfree(mcast);
+ found = true;
break;
}
+ if (!found) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+
+ ret = ib_detach_mcast(qp, (union ib_gid *)cmd.gid, cmd.mlid);
+
out_put:
- put_qp_write(qp);
+ mutex_unlock(&obj->mcast_lock);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ return ret;
+}
+
+struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
+{
+ struct ib_uflow_resources *resources;
+
+ resources = kzalloc(sizeof(*resources), GFP_KERNEL);
+
+ if (!resources)
+ return NULL;
+
+ if (!num_specs)
+ goto out;
+
+ resources->counters =
+ kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL);
+ resources->collection =
+ kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL);
+
+ if (!resources->counters || !resources->collection)
+ goto err;
+
+out:
+ resources->max = num_specs;
+ return resources;
+
+err:
+ kfree(resources->counters);
+ kfree(resources);
+
+ return NULL;
+}
+EXPORT_SYMBOL(flow_resources_alloc);
+
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
+{
+ unsigned int i;
+
+ if (!uflow_res)
+ return;
+
+ for (i = 0; i < uflow_res->collection_num; i++)
+ atomic_dec(&uflow_res->collection[i]->usecnt);
+
+ for (i = 0; i < uflow_res->counters_num; i++)
+ atomic_dec(&uflow_res->counters[i]->usecnt);
- return ret ? ret : in_len;
+ kfree(uflow_res->collection);
+ kfree(uflow_res->counters);
+ kfree(uflow_res);
}
+EXPORT_SYMBOL(ib_uverbs_flow_resources_free);
-static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
+void flow_resources_add(struct ib_uflow_resources *uflow_res,
+ enum ib_flow_spec_type type,
+ void *ibobj)
{
- /* Returns user space filter size, includes padding */
- return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2;
+ WARN_ON(uflow_res->num >= uflow_res->max);
+
+ switch (type) {
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ atomic_inc(&((struct ib_flow_action *)ibobj)->usecnt);
+ uflow_res->collection[uflow_res->collection_num++] =
+ (struct ib_flow_action *)ibobj;
+ break;
+ case IB_FLOW_SPEC_ACTION_COUNT:
+ atomic_inc(&((struct ib_counters *)ibobj)->usecnt);
+ uflow_res->counters[uflow_res->counters_num++] =
+ (struct ib_counters *)ibobj;
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ uflow_res->num++;
+}
+EXPORT_SYMBOL(flow_resources_add);
+
+static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec,
+ struct ib_uflow_resources *uflow_res)
+{
+ ib_spec->type = kern_spec->type;
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (kern_spec->flow_tag.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_tag))
+ return -EINVAL;
+
+ ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag);
+ ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id;
+ break;
+ case IB_FLOW_SPEC_ACTION_DROP:
+ if (kern_spec->drop.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_drop))
+ return -EINVAL;
+
+ ib_spec->drop.size = sizeof(struct ib_flow_spec_action_drop);
+ break;
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ if (kern_spec->action.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_handle))
+ return -EOPNOTSUPP;
+ ib_spec->action.act = uobj_get_obj_read(flow_action,
+ UVERBS_OBJECT_FLOW_ACTION,
+ kern_spec->action.handle,
+ attrs);
+ if (IS_ERR(ib_spec->action.act))
+ return PTR_ERR(ib_spec->action.act);
+ ib_spec->action.size =
+ sizeof(struct ib_flow_spec_action_handle);
+ flow_resources_add(uflow_res,
+ IB_FLOW_SPEC_ACTION_HANDLE,
+ ib_spec->action.act);
+ uobj_put_obj_read(ib_spec->action.act);
+ break;
+ case IB_FLOW_SPEC_ACTION_COUNT:
+ if (kern_spec->flow_count.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_count))
+ return -EINVAL;
+ ib_spec->flow_count.counters =
+ uobj_get_obj_read(counters,
+ UVERBS_OBJECT_COUNTERS,
+ kern_spec->flow_count.handle,
+ attrs);
+ if (IS_ERR(ib_spec->flow_count.counters))
+ return PTR_ERR(ib_spec->flow_count.counters);
+ ib_spec->flow_count.size =
+ sizeof(struct ib_flow_spec_action_count);
+ flow_resources_add(uflow_res,
+ IB_FLOW_SPEC_ACTION_COUNT,
+ ib_spec->flow_count.counters);
+ uobj_put_obj_read(ib_spec->flow_count.counters);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
}
-static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
+static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size,
u16 ib_real_filter_sz)
{
/*
@@ -3167,34 +2745,27 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
return kern_filter_size;
}
-static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
+ const void *kern_spec_mask,
+ const void *kern_spec_val,
+ size_t kern_filter_sz,
+ union ib_flow_spec *ib_spec)
{
ssize_t actual_filter_sz;
- ssize_t kern_filter_sz;
ssize_t ib_filter_sz;
- void *kern_spec_mask;
- void *kern_spec_val;
- if (kern_spec->reserved)
- return -EINVAL;
-
- ib_spec->type = kern_spec->type;
-
- kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
/* User flow spec size must be aligned to 4 bytes */
if (kern_filter_sz != ALIGN(kern_filter_sz, 4))
return -EINVAL;
- kern_spec_val = (void *)kern_spec +
- sizeof(struct ib_uverbs_flow_spec_hdr);
- kern_spec_mask = kern_spec_val + kern_filter_sz;
+ ib_spec->type = type;
+
if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL))
return -EINVAL;
switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
case IB_FLOW_SPEC_ETH:
- ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz);
+ ib_filter_sz = sizeof(struct ib_flow_eth_filter);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
@@ -3205,7 +2776,7 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
memcpy(&ib_spec->eth.mask, kern_spec_mask, actual_filter_sz);
break;
case IB_FLOW_SPEC_IPV4:
- ib_filter_sz = offsetof(struct ib_flow_ipv4_filter, real_sz);
+ ib_filter_sz = sizeof(struct ib_flow_ipv4_filter);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
@@ -3216,7 +2787,7 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
memcpy(&ib_spec->ipv4.mask, kern_spec_mask, actual_filter_sz);
break;
case IB_FLOW_SPEC_IPV6:
- ib_filter_sz = offsetof(struct ib_flow_ipv6_filter, real_sz);
+ ib_filter_sz = sizeof(struct ib_flow_ipv6_filter);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
@@ -3232,7 +2803,7 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
break;
case IB_FLOW_SPEC_TCP:
case IB_FLOW_SPEC_UDP:
- ib_filter_sz = offsetof(struct ib_flow_tcp_udp_filter, real_sz);
+ ib_filter_sz = sizeof(struct ib_flow_tcp_udp_filter);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
@@ -3243,7 +2814,7 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz);
break;
case IB_FLOW_SPEC_VXLAN_TUNNEL:
- ib_filter_sz = offsetof(struct ib_flow_tunnel_filter, real_sz);
+ ib_filter_sz = sizeof(struct ib_flow_tunnel_filter);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
@@ -3257,18 +2828,87 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
(ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24))
return -EINVAL;
break;
+ case IB_FLOW_SPEC_ESP:
+ ib_filter_sz = sizeof(struct ib_flow_esp_filter);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->esp.size = sizeof(struct ib_flow_spec_esp);
+ memcpy(&ib_spec->esp.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->esp.mask, kern_spec_mask, actual_filter_sz);
+ break;
+ case IB_FLOW_SPEC_GRE:
+ ib_filter_sz = sizeof(struct ib_flow_gre_filter);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->gre.size = sizeof(struct ib_flow_spec_gre);
+ memcpy(&ib_spec->gre.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->gre.mask, kern_spec_mask, actual_filter_sz);
+ break;
+ case IB_FLOW_SPEC_MPLS:
+ ib_filter_sz = sizeof(struct ib_flow_mpls_filter);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->mpls.size = sizeof(struct ib_flow_spec_mpls);
+ memcpy(&ib_spec->mpls.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->mpls.mask, kern_spec_mask, actual_filter_sz);
+ break;
default:
return -EINVAL;
}
return 0;
}
-int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ size_t kern_filter_sz;
+ void *kern_spec_mask;
+ void *kern_spec_val;
+
+ if (check_sub_overflow((size_t)kern_spec->hdr.size,
+ sizeof(struct ib_uverbs_flow_spec_hdr),
+ &kern_filter_sz))
+ return -EINVAL;
+
+ kern_filter_sz /= 2;
+
+ kern_spec_val = (void *)kern_spec +
+ sizeof(struct ib_uverbs_flow_spec_hdr);
+ kern_spec_mask = kern_spec_val + kern_filter_sz;
+
+ return ib_uverbs_kern_spec_to_ib_spec_filter(kern_spec->type,
+ kern_spec_mask,
+ kern_spec_val,
+ kern_filter_sz, ib_spec);
+}
+
+static int kern_spec_to_ib_spec(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec,
+ struct ib_uflow_resources *uflow_res)
+{
+ if (kern_spec->reserved)
+ return -EINVAL;
+
+ if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
+ return kern_spec_to_ib_spec_action(attrs, kern_spec, ib_spec,
+ uflow_res);
+ else
+ return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
+}
+
+static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_create_wq cmd = {};
+ struct ib_uverbs_ex_create_wq cmd;
struct ib_uverbs_ex_create_wq_resp resp = {};
struct ib_uwq_object *obj;
int err = 0;
@@ -3276,260 +2916,182 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
struct ib_pd *pd;
struct ib_wq *wq;
struct ib_wq_init_attr wq_init_attr = {};
- size_t required_cmd_sz;
- size_t required_resp_len;
-
- required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge);
- required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
+ struct ib_device *ib_dev;
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->outlen < required_resp_len)
- return -ENOSPC;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ err = uverbs_request(attrs, &cmd, sizeof(cmd));
if (err)
return err;
if (cmd.comp_mask)
return -EOPNOTSUPP;
- obj = kmalloc(sizeof(*obj), GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, attrs,
+ &ib_dev);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext,
- &wq_lock_class);
- down_write(&obj->uevent.uobject.mutex);
- pd = idr_read_pd(cmd.pd_handle, file->ucontext);
- if (!pd) {
- err = -EINVAL;
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
+ if (IS_ERR(pd)) {
+ err = PTR_ERR(pd);
goto err_uobj;
}
- cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
- if (!cq) {
- err = -EINVAL;
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
+ if (IS_ERR(cq)) {
+ err = PTR_ERR(cq);
goto err_put_pd;
}
wq_init_attr.cq = cq;
wq_init_attr.max_sge = cmd.max_sge;
wq_init_attr.max_wr = cmd.max_wr;
- wq_init_attr.wq_context = file;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
- obj->uevent.events_reported = 0;
+ wq_init_attr.create_flags = cmd.create_flags;
INIT_LIST_HEAD(&obj->uevent.event_list);
- wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
+ obj->uevent.uobject.user_handle = cmd.user_handle;
+
+ wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata);
if (IS_ERR(wq)) {
err = PTR_ERR(wq);
goto err_put_cq;
}
- wq->uobject = &obj->uevent.uobject;
+ wq->uobject = obj;
obj->uevent.uobject.object = wq;
wq->wq_type = wq_init_attr.wq_type;
wq->cq = cq;
wq->pd = pd;
wq->device = pd->device;
- wq->wq_context = wq_init_attr.wq_context;
atomic_set(&wq->usecnt, 0);
atomic_inc(&pd->usecnt);
atomic_inc(&cq->usecnt);
- wq->uobject = &obj->uevent.uobject;
- obj->uevent.uobject.object = wq;
- err = idr_add_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
- if (err)
- goto destroy_wq;
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
+
+ uobj_put_obj_read(pd);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
- memset(&resp, 0, sizeof(resp));
resp.wq_handle = obj->uevent.uobject.id;
resp.max_sge = wq_init_attr.max_sge;
resp.max_wr = wq_init_attr.max_wr;
resp.wqn = wq->wq_num;
- resp.response_length = required_resp_len;
- err = ib_copy_to_udata(ucore,
- &resp, resp.response_length);
- if (err)
- goto err_copy;
-
- put_pd_read(pd);
- put_cq_read(cq);
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ return uverbs_response(attrs, &resp, sizeof(resp));
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->wq_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
- up_write(&obj->uevent.uobject.mutex);
- return 0;
-
-err_copy:
- idr_remove_uobj(&ib_uverbs_wq_idr, &obj->uevent.uobject);
-destroy_wq:
- ib_destroy_wq(wq);
err_put_cq:
- put_cq_read(cq);
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
err_put_pd:
- put_pd_read(pd);
+ uobj_put_obj_read(pd);
err_uobj:
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject, attrs);
return err;
}
-int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_wq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_destroy_wq cmd = {};
+ struct ib_uverbs_ex_destroy_wq cmd;
struct ib_uverbs_ex_destroy_wq_resp resp = {};
- struct ib_wq *wq;
struct ib_uobject *uobj;
struct ib_uwq_object *obj;
- size_t required_cmd_sz;
- size_t required_resp_len;
int ret;
- required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle);
- required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
-
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->outlen < required_resp_len)
- return -ENOSPC;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
if (cmd.comp_mask)
return -EOPNOTSUPP;
- resp.response_length = required_resp_len;
- uobj = idr_write_uobj(&ib_uverbs_wq_idr, cmd.wq_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, attrs);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- wq = uobj->object;
obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
- ret = ib_destroy_wq(wq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- ib_uverbs_release_uevent(file, &obj->uevent);
resp.events_reported = obj->uevent.events_reported;
- put_uobj(uobj);
- ret = ib_copy_to_udata(ucore, &resp, resp.response_length);
- if (ret)
- return ret;
+ uobj_put_destroy(uobj);
- return 0;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_wq cmd = {};
+ struct ib_uverbs_ex_modify_wq cmd;
struct ib_wq *wq;
struct ib_wq_attr wq_attr = {};
- size_t required_cmd_sz;
int ret;
- required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state);
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
if (!cmd.attr_mask)
return -EINVAL;
- if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE))
+ if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
- wq = idr_read_wq(cmd.wq_handle, file->ucontext);
- if (!wq)
- return -EINVAL;
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, attrs);
+ if (IS_ERR(wq))
+ return PTR_ERR(wq);
+
+ if (cmd.attr_mask & IB_WQ_FLAGS) {
+ wq_attr.flags = cmd.flags;
+ wq_attr.flags_mask = cmd.flags_mask;
+ }
- wq_attr.curr_wq_state = cmd.curr_wq_state;
- wq_attr.wq_state = cmd.wq_state;
- ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
- put_wq_read(wq);
+ if (cmd.attr_mask & IB_WQ_CUR_STATE) {
+ if (cmd.curr_wq_state > IB_WQS_ERR)
+ return -EINVAL;
+
+ wq_attr.curr_wq_state = cmd.curr_wq_state;
+ } else {
+ wq_attr.curr_wq_state = wq->state;
+ }
+
+ if (cmd.attr_mask & IB_WQ_STATE) {
+ if (cmd.wq_state > IB_WQS_ERR)
+ return -EINVAL;
+
+ wq_attr.wq_state = cmd.wq_state;
+ } else {
+ wq_attr.wq_state = wq_attr.curr_wq_state;
+ }
+
+ ret = wq->device->ops.modify_wq(wq, &wq_attr, cmd.attr_mask,
+ &attrs->driver_udata);
+ rdma_lookup_put_uobject(&wq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
return ret;
}
-int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_create_rwq_ind_table cmd = {};
+ struct ib_uverbs_ex_create_rwq_ind_table cmd;
struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {};
- struct ib_uobject *uobj;
- int err = 0;
+ struct ib_uobject *uobj;
+ int err;
struct ib_rwq_ind_table_init_attr init_attr = {};
struct ib_rwq_ind_table *rwq_ind_tbl;
- struct ib_wq **wqs = NULL;
+ struct ib_wq **wqs = NULL;
u32 *wqs_handles = NULL;
struct ib_wq *wq = NULL;
- int i, j, num_read_wqs;
+ int i, num_read_wqs;
u32 num_wq_handles;
- u32 expected_in_size;
- size_t required_cmd_sz_header;
- size_t required_resp_len;
-
- required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size);
- required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
-
- if (ucore->inlen < required_cmd_sz_header)
- return -EINVAL;
-
- if (ucore->outlen < required_resp_len)
- return -ENOSPC;
+ struct uverbs_req_iter iter;
+ struct ib_device *ib_dev;
- err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header);
+ err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
if (err)
return err;
- ucore->inbuf += required_cmd_sz_header;
- ucore->inlen -= required_cmd_sz_header;
-
if (cmd.comp_mask)
return -EOPNOTSUPP;
@@ -3537,26 +3099,17 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
return -EINVAL;
num_wq_handles = 1 << cmd.log_ind_tbl_size;
- expected_in_size = num_wq_handles * sizeof(__u32);
- if (num_wq_handles == 1)
- /* input size for wq handles is u64 aligned */
- expected_in_size += sizeof(__u32);
-
- if (ucore->inlen < expected_in_size)
- return -EINVAL;
-
- if (ucore->inlen > expected_in_size &&
- !ib_is_udata_cleared(ucore, expected_in_size,
- ucore->inlen - expected_in_size))
- return -EOPNOTSUPP;
-
wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles),
GFP_KERNEL);
if (!wqs_handles)
return -ENOMEM;
- err = ib_copy_from_udata(wqs_handles, ucore,
- num_wq_handles * sizeof(__u32));
+ err = uverbs_request_next(&iter, wqs_handles,
+ num_wq_handles * sizeof(__u32));
+ if (err)
+ goto err_free;
+
+ err = uverbs_request_finish(&iter);
if (err)
goto err_free;
@@ -3568,32 +3121,32 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
- wq = idr_read_wq(wqs_handles[num_read_wqs], file->ucontext);
- if (!wq) {
- err = -EINVAL;
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ,
+ wqs_handles[num_read_wqs], attrs);
+ if (IS_ERR(wq)) {
+ err = PTR_ERR(wq);
goto put_wqs;
}
wqs[num_read_wqs] = wq;
+ atomic_inc(&wqs[num_read_wqs]->usecnt);
}
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj) {
- err = -ENOMEM;
+ uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, attrs, &ib_dev);
+ if (IS_ERR(uobj)) {
+ err = PTR_ERR(uobj);
goto put_wqs;
}
- init_uobj(uobj, 0, file->ucontext, &rwq_ind_table_lock_class);
- down_write(&uobj->mutex);
- init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
- init_attr.ind_tbl = wqs;
- rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
-
- if (IS_ERR(rwq_ind_tbl)) {
- err = PTR_ERR(rwq_ind_tbl);
+ rwq_ind_tbl = rdma_zalloc_drv_obj(ib_dev, ib_rwq_ind_table);
+ if (!rwq_ind_tbl) {
+ err = -ENOMEM;
goto err_uobj;
}
+ init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
+ init_attr.ind_tbl = wqs;
+
rwq_ind_tbl->ind_tbl = wqs;
rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size;
rwq_ind_tbl->uobject = uobj;
@@ -3601,141 +3154,79 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
rwq_ind_tbl->device = ib_dev;
atomic_set(&rwq_ind_tbl->usecnt, 0);
- for (i = 0; i < num_wq_handles; i++)
- atomic_inc(&wqs[i]->usecnt);
-
- err = idr_add_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
- if (err)
- goto destroy_ind_tbl;
-
- resp.ind_tbl_handle = uobj->id;
- resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
- resp.response_length = required_resp_len;
-
- err = ib_copy_to_udata(ucore,
- &resp, resp.response_length);
+ err = ib_dev->ops.create_rwq_ind_table(rwq_ind_tbl, &init_attr,
+ &attrs->driver_udata);
if (err)
- goto err_copy;
+ goto err_create;
+ for (i = 0; i < num_wq_handles; i++)
+ rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
kfree(wqs_handles);
+ uobj_finalize_uobj_create(uobj, attrs);
- for (j = 0; j < num_read_wqs; j++)
- put_wq_read(wqs[j]);
-
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->rwq_ind_tbl_list);
- mutex_unlock(&file->mutex);
-
- uobj->live = 1;
-
- up_write(&uobj->mutex);
- return 0;
+ resp.ind_tbl_handle = uobj->id;
+ resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ return uverbs_response(attrs, &resp, sizeof(resp));
-err_copy:
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
-destroy_ind_tbl:
- ib_destroy_rwq_ind_table(rwq_ind_tbl);
+err_create:
+ kfree(rwq_ind_tbl);
err_uobj:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj, attrs);
put_wqs:
- for (j = 0; j < num_read_wqs; j++)
- put_wq_read(wqs[j]);
+ for (i = 0; i < num_read_wqs; i++) {
+ rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ atomic_dec(&wqs[i]->usecnt);
+ }
err_free:
kfree(wqs_handles);
kfree(wqs);
return err;
}
-int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_rwq_ind_table(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
- struct ib_rwq_ind_table *rwq_ind_tbl;
- struct ib_uobject *uobj;
- int ret;
- struct ib_wq **ind_tbl;
- size_t required_cmd_sz;
-
- required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle);
-
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
+ struct ib_uverbs_ex_destroy_rwq_ind_table cmd;
+ int ret;
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
if (cmd.comp_mask)
return -EOPNOTSUPP;
- uobj = idr_write_uobj(&ib_uverbs_rwq_ind_tbl_idr, cmd.ind_tbl_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
- rwq_ind_tbl = uobj->object;
- ind_tbl = rwq_ind_tbl->ind_tbl;
-
- ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- if (ret)
- return ret;
-
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
- kfree(ind_tbl);
- return ret;
+ return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL,
+ cmd.ind_tbl_handle, attrs);
}
-int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_flow cmd;
- struct ib_uverbs_create_flow_resp resp;
+ struct ib_uverbs_create_flow_resp resp = {};
struct ib_uobject *uobj;
struct ib_flow *flow_id;
struct ib_uverbs_flow_attr *kern_flow_attr;
struct ib_flow_attr *flow_attr;
struct ib_qp *qp;
- int err = 0;
- void *kern_spec;
+ struct ib_uflow_resources *uflow_res;
+ struct ib_uverbs_flow_spec_hdr *kern_spec;
+ struct uverbs_req_iter iter;
+ int err;
void *ib_spec;
int i;
+ struct ib_device *ib_dev;
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
-
- if (ucore->outlen < sizeof(resp))
- return -ENOSPC;
-
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
if (err)
return err;
- ucore->inbuf += sizeof(cmd);
- ucore->inlen -= sizeof(cmd);
-
if (cmd.comp_mask)
return -EINVAL;
- if (!capable(CAP_NET_RAW))
+ if (!rdma_uattrs_has_raw_cap(attrs))
return -EPERM;
if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED)
@@ -3749,8 +3240,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
return -EINVAL;
- if (cmd.flow_attr.size > ucore->inlen ||
- cmd.flow_attr.size >
+ if (cmd.flow_attr.size >
(cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
return -EINVAL;
@@ -3764,35 +3254,52 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
if (!kern_flow_attr)
return -ENOMEM;
- memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
- err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
- cmd.flow_attr.size);
+ *kern_flow_attr = cmd.flow_attr;
+ err = uverbs_request_next(&iter, &kern_flow_attr->flow_specs,
+ cmd.flow_attr.size);
if (err)
goto err_free_attr;
} else {
kern_flow_attr = &cmd.flow_attr;
}
- uobj = kmalloc(sizeof(*uobj), GFP_KERNEL);
- if (!uobj) {
- err = -ENOMEM;
+ err = uverbs_request_finish(&iter);
+ if (err)
+ goto err_free_attr;
+
+ uobj = uobj_alloc(UVERBS_OBJECT_FLOW, attrs, &ib_dev);
+ if (IS_ERR(uobj)) {
+ err = PTR_ERR(uobj);
goto err_free_attr;
}
- init_uobj(uobj, 0, file->ucontext, &rule_lock_class);
- down_write(&uobj->mutex);
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
- if (!qp) {
+ if (!rdma_is_port_valid(uobj->context->device, cmd.flow_attr.port)) {
err = -EINVAL;
goto err_uobj;
}
- flow_attr = kzalloc(sizeof(*flow_attr) + cmd.flow_attr.num_of_specs *
- sizeof(union ib_flow_spec), GFP_KERNEL);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
+ if (IS_ERR(qp)) {
+ err = PTR_ERR(qp);
+ goto err_uobj;
+ }
+
+ if (qp->qp_type != IB_QPT_UD && qp->qp_type != IB_QPT_RAW_PACKET) {
+ err = -EINVAL;
+ goto err_put;
+ }
+
+ flow_attr = kzalloc(struct_size(flow_attr, flows,
+ cmd.flow_attr.num_of_specs), GFP_KERNEL);
if (!flow_attr) {
err = -ENOMEM;
goto err_put;
}
+ uflow_res = flow_resources_alloc(cmd.flow_attr.num_of_specs);
+ if (!uflow_res) {
+ err = -ENOMEM;
+ goto err_free_flow_attr;
+ }
flow_attr->type = kern_flow_attr->type;
flow_attr->priority = kern_flow_attr->priority;
@@ -3801,269 +3308,204 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
flow_attr->flags = kern_flow_attr->flags;
flow_attr->size = sizeof(*flow_attr);
- kern_spec = kern_flow_attr + 1;
+ kern_spec = kern_flow_attr->flow_specs;
ib_spec = flow_attr + 1;
for (i = 0; i < flow_attr->num_of_specs &&
- cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
- cmd.flow_attr.size >=
- ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
- err = kern_spec_to_ib_spec(kern_spec, ib_spec);
+ cmd.flow_attr.size >= sizeof(*kern_spec) &&
+ cmd.flow_attr.size >= kern_spec->size;
+ i++) {
+ err = kern_spec_to_ib_spec(
+ attrs, (struct ib_uverbs_flow_spec *)kern_spec,
+ ib_spec, uflow_res);
if (err)
goto err_free;
+
flow_attr->size +=
((union ib_flow_spec *) ib_spec)->size;
- cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
- kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size;
+ cmd.flow_attr.size -= kern_spec->size;
+ kern_spec = ((void *)kern_spec) + kern_spec->size;
ib_spec += ((union ib_flow_spec *) ib_spec)->size;
}
if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {
- pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n",
+ pr_warn("create flow failed, flow %d: %u bytes left from uverb cmd\n",
i, cmd.flow_attr.size);
err = -EINVAL;
goto err_free;
}
- flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
+
+ flow_id = qp->device->ops.create_flow(qp, flow_attr,
+ &attrs->driver_udata);
+
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
goto err_free;
}
- flow_id->uobject = uobj;
- uobj->object = flow_id;
-
- err = idr_add_uobj(&ib_uverbs_rule_idr, uobj);
- if (err)
- goto destroy_flow;
-
- memset(&resp, 0, sizeof(resp));
- resp.flow_handle = uobj->id;
-
- err = ib_copy_to_udata(ucore,
- &resp, sizeof(resp));
- if (err)
- goto err_copy;
- put_qp_read(qp);
- mutex_lock(&file->mutex);
- list_add_tail(&uobj->list, &file->ucontext->rule_list);
- mutex_unlock(&file->mutex);
+ ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res);
- uobj->live = 1;
-
- up_write(&uobj->mutex);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
kfree(flow_attr);
+
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
- return 0;
-err_copy:
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
-destroy_flow:
- ib_destroy_flow(flow_id);
+ uobj_finalize_uobj_create(uobj, attrs);
+
+ resp.flow_handle = uobj->id;
+ return uverbs_response(attrs, &resp, sizeof(resp));
+
err_free:
+ ib_uverbs_flow_resources_free(uflow_res);
+err_free_flow_attr:
kfree(flow_attr);
err_put:
- put_qp_read(qp);
+ rdma_lookup_put_uobject(&qp->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
err_uobj:
- put_uobj_write(uobj);
+ uobj_alloc_abort(uobj, attrs);
err_free_attr:
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
return err;
}
-int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_flow(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_flow cmd;
- struct ib_flow *flow_id;
- struct ib_uobject *uobj;
int ret;
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
-
- ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
if (cmd.comp_mask)
return -EINVAL;
- uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,
- file->ucontext);
- if (!uobj)
- return -EINVAL;
- flow_id = uobj->object;
-
- ret = ib_destroy_flow(flow_id);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
-
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
-
- put_uobj(uobj);
-
- return ret;
+ return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, attrs);
}
-static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
+static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
struct ib_uverbs_create_xsrq *cmd,
struct ib_udata *udata)
{
- struct ib_uverbs_create_srq_resp resp;
+ struct ib_uverbs_create_srq_resp resp = {};
struct ib_usrq_object *obj;
struct ib_pd *pd;
struct ib_srq *srq;
- struct ib_uobject *uninitialized_var(xrcd_uobj);
struct ib_srq_init_attr attr;
int ret;
+ struct ib_uobject *xrcd_uobj;
+ struct ib_device *ib_dev;
- obj = kmalloc(sizeof *obj, GFP_KERNEL);
- if (!obj)
- return -ENOMEM;
+ obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, attrs,
+ &ib_dev);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
- init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class);
- down_write(&obj->uevent.uobject.mutex);
+ if (cmd->srq_type == IB_SRQT_TM)
+ attr.ext.tag_matching.max_num_tags = cmd->max_num_tags;
if (cmd->srq_type == IB_SRQT_XRC) {
- attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
- if (!attr.ext.xrc.xrcd) {
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
+ attrs);
+ if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err;
}
+ attr.ext.xrc.xrcd = (struct ib_xrcd *)xrcd_uobj->object;
+ if (!attr.ext.xrc.xrcd) {
+ ret = -EINVAL;
+ goto err_put_xrcd;
+ }
+
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
+ }
- attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
- if (!attr.ext.xrc.cq) {
- ret = -EINVAL;
+ if (ib_srq_has_cq(cmd->srq_type)) {
+ attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
+ cmd->cq_handle, attrs);
+ if (IS_ERR(attr.ext.cq)) {
+ ret = PTR_ERR(attr.ext.cq);
goto err_put_xrcd;
}
}
- pd = idr_read_pd(cmd->pd_handle, file->ucontext);
- if (!pd) {
- ret = -EINVAL;
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs);
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
goto err_put_cq;
}
attr.event_handler = ib_uverbs_srq_event_handler;
- attr.srq_context = file;
attr.srq_type = cmd->srq_type;
attr.attr.max_wr = cmd->max_wr;
attr.attr.max_sge = cmd->max_sge;
attr.attr.srq_limit = cmd->srq_limit;
- obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
+ obj->uevent.uobject.user_handle = cmd->user_handle;
- srq = pd->device->create_srq(pd, &attr, udata);
+ srq = ib_create_srq_user(pd, &attr, obj, udata);
if (IS_ERR(srq)) {
ret = PTR_ERR(srq);
- goto err_put;
- }
-
- srq->device = pd->device;
- srq->pd = pd;
- srq->srq_type = cmd->srq_type;
- srq->uobject = &obj->uevent.uobject;
- srq->event_handler = attr.event_handler;
- srq->srq_context = attr.srq_context;
-
- if (cmd->srq_type == IB_SRQT_XRC) {
- srq->ext.xrc.cq = attr.ext.xrc.cq;
- srq->ext.xrc.xrcd = attr.ext.xrc.xrcd;
- atomic_inc(&attr.ext.xrc.cq->usecnt);
- atomic_inc(&attr.ext.xrc.xrcd->usecnt);
+ goto err_put_pd;
}
- atomic_inc(&pd->usecnt);
- atomic_set(&srq->usecnt, 0);
-
obj->uevent.uobject.object = srq;
- ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
- if (ret)
- goto err_destroy;
+ obj->uevent.uobject.user_handle = cmd->user_handle;
+ obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file);
+ if (obj->uevent.event_file)
+ uverbs_uobject_get(&obj->uevent.event_file->uobj);
- memset(&resp, 0, sizeof resp);
- resp.srq_handle = obj->uevent.uobject.id;
- resp.max_wr = attr.attr.max_wr;
- resp.max_sge = attr.attr.max_sge;
if (cmd->srq_type == IB_SRQT_XRC)
resp.srqn = srq->ext.xrc.srq_num;
- if (copy_to_user((void __user *) (unsigned long) cmd->response,
- &resp, sizeof resp)) {
- ret = -EFAULT;
- goto err_copy;
- }
-
- if (cmd->srq_type == IB_SRQT_XRC) {
- put_uobj_read(xrcd_uobj);
- put_cq_read(attr.ext.xrc.cq);
- }
- put_pd_read(pd);
-
- mutex_lock(&file->mutex);
- list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
- mutex_unlock(&file->mutex);
-
- obj->uevent.uobject.live = 1;
-
- up_write(&obj->uevent.uobject.mutex);
-
- return 0;
+ if (cmd->srq_type == IB_SRQT_XRC)
+ uobj_put_read(xrcd_uobj);
-err_copy:
- idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
+ if (ib_srq_has_cq(cmd->srq_type))
+ rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
-err_destroy:
- ib_destroy_srq(srq);
+ uobj_put_obj_read(pd);
+ uobj_finalize_uobj_create(&obj->uevent.uobject, attrs);
-err_put:
- put_pd_read(pd);
+ resp.srq_handle = obj->uevent.uobject.id;
+ resp.max_wr = attr.attr.max_wr;
+ resp.max_sge = attr.attr.max_sge;
+ return uverbs_response(attrs, &resp, sizeof(resp));
+err_put_pd:
+ uobj_put_obj_read(pd);
err_put_cq:
- if (cmd->srq_type == IB_SRQT_XRC)
- put_cq_read(attr.ext.xrc.cq);
+ if (ib_srq_has_cq(cmd->srq_type))
+ rdma_lookup_put_uobject(&attr.ext.cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
err_put_xrcd:
if (cmd->srq_type == IB_SRQT_XRC) {
atomic_dec(&obj->uxrcd->refcnt);
- put_uobj_read(xrcd_uobj);
+ uobj_put_read(xrcd_uobj);
}
err:
- put_uobj_write(&obj->uevent.uobject);
+ uobj_alloc_abort(&obj->uevent.uobject, attrs);
return ret;
}
-ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_srq cmd;
struct ib_uverbs_create_xsrq xcmd;
- struct ib_uverbs_create_srq_resp resp;
- struct ib_udata udata;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
+ memset(&xcmd, 0, sizeof(xcmd));
xcmd.response = cmd.response;
xcmd.user_handle = cmd.user_handle;
xcmd.srq_type = IB_SRQT_BASIC;
@@ -4072,80 +3514,49 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
xcmd.max_sge = cmd.max_sge;
xcmd.srq_limit = cmd.srq_limit;
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof resp);
-
- ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
- if (ret)
- return ret;
-
- return in_len;
+ return __uverbs_create_xsrq(attrs, &xcmd, &attrs->driver_udata);
}
-ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_create_xsrq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_xsrq cmd;
- struct ib_uverbs_create_srq_resp resp;
- struct ib_udata udata;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- INIT_UDATA(&udata, buf + sizeof cmd,
- (unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof resp);
-
- ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- return in_len;
+ return __uverbs_create_xsrq(attrs, &cmd, &attrs->driver_udata);
}
-ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_modify_srq cmd;
- struct ib_udata udata;
struct ib_srq *srq;
struct ib_srq_attr attr;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
- out_len);
-
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
- if (!srq)
- return -EINVAL;
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
+ if (IS_ERR(srq))
+ return PTR_ERR(srq);
attr.max_wr = cmd.max_wr;
attr.srq_limit = cmd.srq_limit;
- ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
+ ret = srq->device->ops.modify_srq(srq, &attr, cmd.attr_mask,
+ &attrs->driver_udata);
- put_srq_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_srq cmd;
struct ib_uverbs_query_srq_resp resp;
@@ -4153,19 +3564,18 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
struct ib_srq *srq;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- srq = idr_read_srq(cmd.srq_handle, file->ucontext);
- if (!srq)
- return -EINVAL;
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
+ if (IS_ERR(srq))
+ return PTR_ERR(srq);
ret = ib_query_srq(srq, &attr);
- put_srq_read(srq);
+ rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
if (ret)
return ret;
@@ -4176,85 +3586,49 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
resp.max_sge = attr.max_sge;
resp.srq_limit = attr.srq_limit;
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_destroy_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_srq cmd;
struct ib_uverbs_destroy_srq_resp resp;
struct ib_uobject *uobj;
- struct ib_srq *srq;
struct ib_uevent_object *obj;
- int ret = -EINVAL;
- struct ib_usrq_object *us;
- enum ib_srq_type srq_type;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- uobj = idr_write_uobj(&ib_uverbs_srq_idr, cmd.srq_handle, file->ucontext);
- if (!uobj)
- return -EINVAL;
- srq = uobj->object;
- obj = container_of(uobj, struct ib_uevent_object, uobject);
- srq_type = srq->srq_type;
-
- ret = ib_destroy_srq(srq);
- if (!ret)
- uobj->live = 0;
-
- put_uobj_write(uobj);
+ int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- if (srq_type == IB_SRQT_XRC) {
- us = container_of(obj, struct ib_usrq_object, uevent);
- atomic_dec(&us->uxrcd->refcnt);
- }
-
- idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
-
- mutex_lock(&file->mutex);
- list_del(&uobj->list);
- mutex_unlock(&file->mutex);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
- ib_uverbs_release_uevent(file, obj);
-
- memset(&resp, 0, sizeof resp);
+ obj = container_of(uobj, struct ib_uevent_object, uobject);
+ memset(&resp, 0, sizeof(resp));
resp.events_reported = obj->events_reported;
- put_uobj(uobj);
-
- if (copy_to_user((void __user *) (unsigned long) cmd.response,
- &resp, sizeof resp))
- ret = -EFAULT;
+ uobj_put_destroy(uobj);
- return ret ? ret : in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_query_device_resp resp = { {0} };
+ struct ib_uverbs_ex_query_device_resp resp = {};
struct ib_uverbs_ex_query_device cmd;
struct ib_device_attr attr = {0};
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
int err;
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ err = uverbs_request(attrs, &cmd, sizeof(cmd));
if (err)
return err;
@@ -4264,21 +3638,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
if (cmd.reserved)
return -EINVAL;
- resp.response_length = offsetof(typeof(resp), odp_caps);
-
- if (ucore->outlen < resp.response_length)
- return -ENOSPC;
-
- err = ib_dev->query_device(ib_dev, &attr, uhw);
+ err = ib_dev->ops.query_device(ib_dev, &attr, &attrs->driver_udata);
if (err)
return err;
- copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
- goto end;
+ copy_query_dev_fields(ucontext, &resp.base, &attr);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
resp.odp_caps.general_caps = attr.odp_caps.general_caps;
resp.odp_caps.per_transport_caps.rc_odp_caps =
attr.odp_caps.per_transport_caps.rc_odp_caps;
@@ -4286,44 +3651,431 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
attr.odp_caps.per_transport_caps.uc_odp_caps;
resp.odp_caps.per_transport_caps.ud_odp_caps =
attr.odp_caps.per_transport_caps.ud_odp_caps;
-#endif
- resp.response_length += sizeof(resp.odp_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask))
- goto end;
+ resp.xrc_odp_caps = attr.odp_caps.per_transport_caps.xrc_odp_caps;
resp.timestamp_mask = attr.timestamp_mask;
- resp.response_length += sizeof(resp.timestamp_mask);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock))
- goto end;
-
resp.hca_core_clock = attr.hca_core_clock;
- resp.response_length += sizeof(resp.hca_core_clock);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex))
- goto end;
-
resp.device_cap_flags_ex = attr.device_cap_flags;
- resp.response_length += sizeof(resp.device_cap_flags_ex);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps))
- goto end;
-
resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts;
resp.rss_caps.max_rwq_indirection_tables =
attr.rss_caps.max_rwq_indirection_tables;
resp.rss_caps.max_rwq_indirection_table_size =
attr.rss_caps.max_rwq_indirection_table_size;
+ resp.max_wq_type_rq = attr.max_wq_type_rq;
+ resp.raw_packet_caps = attr.raw_packet_caps;
+ resp.tm_caps.max_rndv_hdr_size = attr.tm_caps.max_rndv_hdr_size;
+ resp.tm_caps.max_num_tags = attr.tm_caps.max_num_tags;
+ resp.tm_caps.max_ops = attr.tm_caps.max_ops;
+ resp.tm_caps.max_sge = attr.tm_caps.max_sge;
+ resp.tm_caps.flags = attr.tm_caps.flags;
+ resp.cq_moderation_caps.max_cq_moderation_count =
+ attr.cq_caps.max_cq_moderation_count;
+ resp.cq_moderation_caps.max_cq_moderation_period =
+ attr.cq_caps.max_cq_moderation_period;
+ resp.max_dm_size = attr.max_dm_size;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
- resp.response_length += sizeof(resp.rss_caps);
+ return uverbs_response(attrs, &resp, sizeof(resp));
+}
- if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq))
- goto end;
+static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_ex_modify_cq cmd;
+ struct ib_cq *cq;
+ int ret;
- resp.max_wq_type_rq = attr.max_wq_type_rq;
- resp.response_length += sizeof(resp.max_wq_type_rq);
-end:
- err = ib_copy_to_udata(ucore, &resp, resp.response_length);
- return err;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
+
+ if (!cmd.attr_mask || cmd.reserved)
+ return -EINVAL;
+
+ if (cmd.attr_mask > IB_CQ_MODERATE)
+ return -EOPNOTSUPP;
+
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
+ if (IS_ERR(cq))
+ return PTR_ERR(cq);
+
+ ret = rdma_set_cq_moderation(cq, cmd.attr.cq_count, cmd.attr.cq_period);
+
+ rdma_lookup_put_uobject(&cq->uobject->uevent.uobject,
+ UVERBS_LOOKUP_READ);
+ return ret;
}
+
+/*
+ * Describe the input structs for write(). Some write methods have an input
+ * only struct, most have an input and output. If the struct has an output then
+ * the 'response' u64 must be the first field in the request structure.
+ *
+ * If udata is present then both the request and response structs have a
+ * trailing driver_data flex array. In this case the size of the base struct
+ * cannot be changed.
+ */
+#define UAPI_DEF_WRITE_IO(req, resp) \
+ .write.has_resp = 1 + \
+ BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \
+ BUILD_BUG_ON_ZERO(sizeof_field(req, response) != \
+ sizeof(u64)), \
+ .write.req_size = sizeof(req), .write.resp_size = sizeof(resp)
+
+#define UAPI_DEF_WRITE_I(req) .write.req_size = sizeof(req)
+
+#define UAPI_DEF_WRITE_UDATA_IO(req, resp) \
+ UAPI_DEF_WRITE_IO(req, resp), \
+ .write.has_udata = \
+ 1 + \
+ BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \
+ sizeof(req)) + \
+ BUILD_BUG_ON_ZERO(offsetof(resp, driver_data) != \
+ sizeof(resp))
+
+#define UAPI_DEF_WRITE_UDATA_I(req) \
+ UAPI_DEF_WRITE_I(req), \
+ .write.has_udata = \
+ 1 + BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \
+ sizeof(req))
+
+/*
+ * The _EX versions are for use with WRITE_EX and allow the last struct member
+ * to be specified. Buffers that do not include that member will be rejected.
+ */
+#define UAPI_DEF_WRITE_IO_EX(req, req_last_member, resp, resp_last_member) \
+ .write.has_resp = 1, \
+ .write.req_size = offsetofend(req, req_last_member), \
+ .write.resp_size = offsetofend(resp, resp_last_member)
+
+#define UAPI_DEF_WRITE_I_EX(req, req_last_member) \
+ .write.req_size = offsetofend(req, req_last_member)
+
+const struct uapi_definition uverbs_def_write_intf[] = {
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_AH,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_AH,
+ ib_uverbs_create_ah,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_ah,
+ struct ib_uverbs_create_ah_resp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_AH,
+ ib_uverbs_destroy_ah,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah)),
+ UAPI_DEF_OBJ_NEEDS_FN(create_user_ah),
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_COMP_CHANNEL,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL,
+ ib_uverbs_create_comp_channel,
+ UAPI_DEF_WRITE_IO(
+ struct ib_uverbs_create_comp_channel,
+ struct ib_uverbs_create_comp_channel_resp))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_CQ,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_CQ,
+ ib_uverbs_create_cq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_cq,
+ struct ib_uverbs_create_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_cq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_CQ,
+ ib_uverbs_destroy_cq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_cq,
+ struct ib_uverbs_destroy_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_cq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POLL_CQ,
+ ib_uverbs_poll_cq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_poll_cq,
+ struct ib_uverbs_poll_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(poll_cq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_REQ_NOTIFY_CQ,
+ ib_uverbs_req_notify_cq,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_req_notify_cq),
+ UAPI_DEF_METHOD_NEEDS_FN(req_notify_cq)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_RESIZE_CQ,
+ ib_uverbs_resize_cq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_resize_cq,
+ struct ib_uverbs_resize_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(resize_cq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_CQ,
+ ib_uverbs_ex_create_cq,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_cq,
+ reserved,
+ struct ib_uverbs_ex_create_cq_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(create_cq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_MODIFY_CQ,
+ ib_uverbs_ex_modify_cq,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_cq))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_DEVICE,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_GET_CONTEXT,
+ ib_uverbs_get_context,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_get_context,
+ struct ib_uverbs_get_context_resp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_DEVICE,
+ ib_uverbs_query_device,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_device,
+ struct ib_uverbs_query_device_resp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_PORT,
+ ib_uverbs_query_port,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_port,
+ struct ib_uverbs_query_port_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(query_port)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_QUERY_DEVICE,
+ ib_uverbs_ex_query_device,
+ UAPI_DEF_WRITE_IO_EX(
+ struct ib_uverbs_ex_query_device,
+ reserved,
+ struct ib_uverbs_ex_query_device_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(query_device)),
+ UAPI_DEF_OBJ_NEEDS_FN(alloc_ucontext),
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_ucontext)),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_FLOW,
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_FLOW,
+ ib_uverbs_ex_create_flow,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_create_flow,
+ flow_attr,
+ struct ib_uverbs_create_flow_resp,
+ flow_handle),
+ UAPI_DEF_METHOD_NEEDS_FN(create_flow)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
+ ib_uverbs_ex_destroy_flow,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_flow),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_flow))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_MR,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_DEREG_MR,
+ ib_uverbs_dereg_mr,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_dereg_mr),
+ UAPI_DEF_METHOD_NEEDS_FN(dereg_mr)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_REG_MR,
+ ib_uverbs_reg_mr,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_reg_mr,
+ struct ib_uverbs_reg_mr_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(reg_user_mr)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_REREG_MR,
+ ib_uverbs_rereg_mr,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_rereg_mr,
+ struct ib_uverbs_rereg_mr_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_MW,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_ALLOC_MW,
+ ib_uverbs_alloc_mw,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_mw,
+ struct ib_uverbs_alloc_mw_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(alloc_mw)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DEALLOC_MW,
+ ib_uverbs_dealloc_mw,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_mw),
+ UAPI_DEF_METHOD_NEEDS_FN(dealloc_mw))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_PD,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_ALLOC_PD,
+ ib_uverbs_alloc_pd,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_pd,
+ struct ib_uverbs_alloc_pd_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(alloc_pd)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DEALLOC_PD,
+ ib_uverbs_dealloc_pd,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_pd),
+ UAPI_DEF_METHOD_NEEDS_FN(dealloc_pd))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_QP,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_ATTACH_MCAST,
+ ib_uverbs_attach_mcast,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_attach_mcast),
+ UAPI_DEF_METHOD_NEEDS_FN(attach_mcast),
+ UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_QP,
+ ib_uverbs_create_qp,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_qp,
+ struct ib_uverbs_create_qp_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_qp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_QP,
+ ib_uverbs_destroy_qp,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_qp,
+ struct ib_uverbs_destroy_qp_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_qp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DETACH_MCAST,
+ ib_uverbs_detach_mcast,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_detach_mcast),
+ UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_MODIFY_QP,
+ ib_uverbs_modify_qp,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_modify_qp),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_qp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POST_RECV,
+ ib_uverbs_post_recv,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_post_recv,
+ struct ib_uverbs_post_recv_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(post_recv)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POST_SEND,
+ ib_uverbs_post_send,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_post_send,
+ struct ib_uverbs_post_send_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(post_send)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_QP,
+ ib_uverbs_query_qp,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_qp,
+ struct ib_uverbs_query_qp_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(query_qp)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_QP,
+ ib_uverbs_ex_create_qp,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_qp,
+ comp_mask,
+ struct ib_uverbs_ex_create_qp_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(create_qp)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_MODIFY_QP,
+ ib_uverbs_ex_modify_qp,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_modify_qp,
+ base,
+ struct ib_uverbs_ex_modify_qp_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_qp))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL,
+ ib_uverbs_ex_create_rwq_ind_table,
+ UAPI_DEF_WRITE_IO_EX(
+ struct ib_uverbs_ex_create_rwq_ind_table,
+ log_ind_tbl_size,
+ struct ib_uverbs_ex_create_rwq_ind_table_resp,
+ ind_tbl_num),
+ UAPI_DEF_METHOD_NEEDS_FN(create_rwq_ind_table)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL,
+ ib_uverbs_ex_destroy_rwq_ind_table,
+ UAPI_DEF_WRITE_I(
+ struct ib_uverbs_ex_destroy_rwq_ind_table),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_rwq_ind_table))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_WQ,
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_WQ,
+ ib_uverbs_ex_create_wq,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_wq,
+ max_sge,
+ struct ib_uverbs_ex_create_wq_resp,
+ wqn),
+ UAPI_DEF_METHOD_NEEDS_FN(create_wq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_DESTROY_WQ,
+ ib_uverbs_ex_destroy_wq,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_destroy_wq,
+ wq_handle,
+ struct ib_uverbs_ex_destroy_wq_resp,
+ reserved),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_wq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_MODIFY_WQ,
+ ib_uverbs_ex_modify_wq,
+ UAPI_DEF_WRITE_I_EX(struct ib_uverbs_ex_modify_wq,
+ curr_wq_state),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_wq))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_SRQ,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_SRQ,
+ ib_uverbs_create_srq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_srq,
+ struct ib_uverbs_create_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_srq)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_XSRQ,
+ ib_uverbs_create_xsrq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_xsrq,
+ struct ib_uverbs_create_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_srq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_SRQ,
+ ib_uverbs_destroy_srq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_srq,
+ struct ib_uverbs_destroy_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_srq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_MODIFY_SRQ,
+ ib_uverbs_modify_srq,
+ UAPI_DEF_WRITE_UDATA_I(struct ib_uverbs_modify_srq),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_srq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POST_SRQ_RECV,
+ ib_uverbs_post_srq_recv,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_post_srq_recv,
+ struct ib_uverbs_post_srq_recv_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(post_srq_recv)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_SRQ,
+ ib_uverbs_query_srq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_srq,
+ struct ib_uverbs_query_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(query_srq))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_XRCD,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_CLOSE_XRCD,
+ ib_uverbs_close_xrcd,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP,
+ ib_uverbs_open_qp,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_open_qp,
+ struct ib_uverbs_create_qp_resp)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_XRCD,
+ ib_uverbs_open_xrcd,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_open_xrcd,
+ struct ib_uverbs_open_xrcd_resp)),
+ UAPI_DEF_OBJ_NEEDS_FN(alloc_xrcd),
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)),
+
+ {},
+};
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
new file mode 100644
index 000000000000..f80da6a67e24
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -0,0 +1,849 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/rdma_user_ioctl.h>
+#include <rdma/uverbs_ioctl.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+struct bundle_alloc_head {
+ struct_group_tagged(bundle_alloc_head_hdr, hdr,
+ struct bundle_alloc_head *next;
+ );
+ u8 data[];
+};
+
+struct bundle_priv {
+ /* Must be first */
+ struct bundle_alloc_head_hdr alloc_head;
+ struct bundle_alloc_head *allocated_mem;
+ size_t internal_avail;
+ size_t internal_used;
+
+ struct radix_tree_root *radix;
+ const struct uverbs_api_ioctl_method *method_elm;
+ void __rcu **radix_slots;
+ unsigned long radix_slots_len;
+ u32 method_key;
+
+ struct ib_uverbs_attr __user *user_attrs;
+ struct ib_uverbs_attr *uattrs;
+
+ DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
+ DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN);
+ DECLARE_BITMAP(uobj_hw_obj_valid, UVERBS_API_ATTR_BKEY_LEN);
+
+ /*
+ * Must be last. bundle ends in a flex array which overlaps
+ * internal_buffer.
+ */
+ struct uverbs_attr_bundle_hdr bundle;
+ u64 internal_buffer[32];
+};
+
+/*
+ * Each method has an absolute minimum amount of memory it needs to allocate,
+ * precompute that amount and determine if the onstack memory can be used or
+ * if allocation is need.
+ */
+void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
+ unsigned int num_attrs)
+{
+ struct bundle_priv *pbundle;
+ struct uverbs_attr_bundle *bundle;
+ size_t bundle_size =
+ offsetof(struct bundle_priv, internal_buffer) +
+ sizeof(*bundle->attrs) * method_elm->key_bitmap_len +
+ sizeof(*pbundle->uattrs) * num_attrs;
+
+ method_elm->use_stack = bundle_size <= sizeof(*pbundle);
+ method_elm->bundle_size =
+ ALIGN(bundle_size + 256, sizeof(*pbundle->internal_buffer));
+
+ /* Do not want order-2 allocations for this. */
+ WARN_ON_ONCE(method_elm->bundle_size > PAGE_SIZE);
+}
+
+/**
+ * _uverbs_alloc() - Quickly allocate memory for use with a bundle
+ * @bundle: The bundle
+ * @size: Number of bytes to allocate
+ * @flags: Allocator flags
+ *
+ * The bundle allocator is intended for allocations that are connected with
+ * processing the system call related to the bundle. The allocated memory is
+ * always freed once the system call completes, and cannot be freed any other
+ * way.
+ *
+ * This tries to use a small pool of pre-allocated memory for performance.
+ */
+__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
+ gfp_t flags)
+{
+ struct bundle_priv *pbundle =
+ container_of(&bundle->hdr, struct bundle_priv, bundle);
+ size_t new_used;
+ void *res;
+
+ if (check_add_overflow(size, pbundle->internal_used, &new_used))
+ return ERR_PTR(-EOVERFLOW);
+
+ if (new_used > pbundle->internal_avail) {
+ struct bundle_alloc_head *buf;
+
+ buf = kvmalloc(struct_size(buf, data, size), flags);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+ buf->next = pbundle->allocated_mem;
+ pbundle->allocated_mem = buf;
+ return buf->data;
+ }
+
+ res = (void *)pbundle->internal_buffer + pbundle->internal_used;
+ pbundle->internal_used =
+ ALIGN(new_used, sizeof(*pbundle->internal_buffer));
+ if (want_init_on_alloc(flags))
+ memset(res, 0, size);
+ return res;
+}
+EXPORT_SYMBOL(_uverbs_alloc);
+
+static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
+ u16 len)
+{
+ if (uattr->len > sizeof_field(struct ib_uverbs_attr, data))
+ return ib_is_buffer_cleared(u64_to_user_ptr(uattr->data) + len,
+ uattr->len - len);
+
+ return !memchr_inv((const void *)&uattr->data + len,
+ 0, uattr->len - len);
+}
+
+static int uverbs_set_output(const struct uverbs_attr_bundle *bundle,
+ const struct uverbs_attr *attr)
+{
+ struct bundle_priv *pbundle =
+ container_of(&bundle->hdr, struct bundle_priv, bundle);
+ u16 flags;
+
+ flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags |
+ UVERBS_ATTR_F_VALID_OUTPUT;
+ if (put_user(flags,
+ &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags))
+ return -EFAULT;
+ return 0;
+}
+
+static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
+ const struct uverbs_api_attr *attr_uapi,
+ struct uverbs_objs_arr_attr *attr,
+ struct ib_uverbs_attr *uattr,
+ u32 attr_bkey)
+{
+ struct uverbs_attr_bundle *bundle =
+ container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr);
+ const struct uverbs_attr_spec *spec = &attr_uapi->spec;
+ size_t array_len;
+ u32 *idr_vals;
+ int ret = 0;
+ size_t i;
+
+ if (uattr->attr_data.reserved)
+ return -EINVAL;
+
+ if (uattr->len % sizeof(u32))
+ return -EINVAL;
+
+ array_len = uattr->len / sizeof(u32);
+ if (array_len < spec->u2.objs_arr.min_len ||
+ array_len > spec->u2.objs_arr.max_len)
+ return -EINVAL;
+
+ attr->uobjects =
+ uverbs_alloc(bundle,
+ array_size(array_len, sizeof(*attr->uobjects)));
+ if (IS_ERR(attr->uobjects))
+ return PTR_ERR(attr->uobjects);
+
+ /*
+ * Since idr is 4B and *uobjects is >= 4B, we can use attr->uobjects
+ * to store idrs array and avoid additional memory allocation. The
+ * idrs array is offset to the end of the uobjects array so we will be
+ * able to read idr and replace with a pointer.
+ */
+ idr_vals = (u32 *)(attr->uobjects + array_len) - array_len;
+
+ if (uattr->len > sizeof(uattr->data)) {
+ ret = copy_from_user(idr_vals, u64_to_user_ptr(uattr->data),
+ uattr->len);
+ if (ret)
+ return -EFAULT;
+ } else {
+ memcpy(idr_vals, &uattr->data, uattr->len);
+ }
+
+ for (i = 0; i != array_len; i++) {
+ attr->uobjects[i] = uverbs_get_uobject_from_file(
+ spec->u2.objs_arr.obj_type, spec->u2.objs_arr.access,
+ idr_vals[i], bundle);
+ if (IS_ERR(attr->uobjects[i])) {
+ ret = PTR_ERR(attr->uobjects[i]);
+ break;
+ }
+ }
+
+ attr->len = i;
+ __set_bit(attr_bkey, pbundle->spec_finalize);
+ return ret;
+}
+
+static void uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
+ struct uverbs_objs_arr_attr *attr,
+ bool commit,
+ struct uverbs_attr_bundle *attrs)
+{
+ const struct uverbs_attr_spec *spec = &attr_uapi->spec;
+ size_t i;
+
+ for (i = 0; i != attr->len; i++)
+ uverbs_finalize_object(attr->uobjects[i],
+ spec->u2.objs_arr.access, false, commit,
+ attrs);
+}
+
+static int uverbs_process_attr(struct bundle_priv *pbundle,
+ const struct uverbs_api_attr *attr_uapi,
+ struct ib_uverbs_attr *uattr, u32 attr_bkey)
+{
+ const struct uverbs_attr_spec *spec = &attr_uapi->spec;
+ struct uverbs_attr_bundle *bundle =
+ container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr);
+ struct uverbs_attr *e = &bundle->attrs[attr_bkey];
+ const struct uverbs_attr_spec *val_spec = spec;
+ struct uverbs_obj_attr *o_attr;
+
+ switch (spec->type) {
+ case UVERBS_ATTR_TYPE_ENUM_IN:
+ if (uattr->attr_data.enum_data.elem_id >= spec->u.enum_def.num_elems)
+ return -EOPNOTSUPP;
+
+ if (uattr->attr_data.enum_data.reserved)
+ return -EINVAL;
+
+ val_spec = &spec->u2.enum_def.ids[uattr->attr_data.enum_data.elem_id];
+
+ /* Currently we only support PTR_IN based enums */
+ if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN)
+ return -EOPNOTSUPP;
+
+ e->ptr_attr.enum_id = uattr->attr_data.enum_data.elem_id;
+ fallthrough;
+ case UVERBS_ATTR_TYPE_PTR_IN:
+ /* Ensure that any data provided by userspace beyond the known
+ * struct is zero. Userspace that knows how to use some future
+ * longer struct will fail here if used with an old kernel and
+ * non-zero content, making ABI compat/discovery simpler.
+ */
+ if (uattr->len > val_spec->u.ptr.len &&
+ val_spec->zero_trailing &&
+ !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len))
+ return -EOPNOTSUPP;
+
+ fallthrough;
+ case UVERBS_ATTR_TYPE_PTR_OUT:
+ if (uattr->len < val_spec->u.ptr.min_len ||
+ (!val_spec->zero_trailing &&
+ uattr->len > val_spec->u.ptr.len))
+ return -EINVAL;
+
+ if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN &&
+ uattr->attr_data.reserved)
+ return -EINVAL;
+
+ e->ptr_attr.uattr_idx = uattr - pbundle->uattrs;
+ e->ptr_attr.len = uattr->len;
+
+ if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) {
+ void *p;
+
+ p = uverbs_alloc(bundle, uattr->len);
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
+ e->ptr_attr.ptr = p;
+
+ if (copy_from_user(p, u64_to_user_ptr(uattr->data),
+ uattr->len))
+ return -EFAULT;
+ } else {
+ e->ptr_attr.data = uattr->data;
+ }
+ break;
+
+ case UVERBS_ATTR_TYPE_IDR:
+ case UVERBS_ATTR_TYPE_FD:
+ if (uattr->attr_data.reserved)
+ return -EINVAL;
+
+ if (uattr->len != 0)
+ return -EINVAL;
+
+ o_attr = &e->obj_attr;
+ o_attr->attr_elm = attr_uapi;
+
+ /*
+ * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and
+ * s64 for UVERBS_ATTR_TYPE_FD. We can cast the u64 to s64
+ * here without caring about truncation as we know that the
+ * IDR implementation today rejects negative IDs
+ */
+ o_attr->uobject = uverbs_get_uobject_from_file(
+ spec->u.obj.obj_type, spec->u.obj.access,
+ uattr->data_s64, bundle);
+ if (IS_ERR(o_attr->uobject))
+ return PTR_ERR(o_attr->uobject);
+ __set_bit(attr_bkey, pbundle->uobj_finalize);
+
+ if (spec->u.obj.access == UVERBS_ACCESS_NEW) {
+ unsigned int uattr_idx = uattr - pbundle->uattrs;
+ s64 id = o_attr->uobject->id;
+
+ /* Copy the allocated id to the user-space */
+ if (put_user(id, &pbundle->user_attrs[uattr_idx].data))
+ return -EFAULT;
+ }
+
+ break;
+
+ case UVERBS_ATTR_TYPE_RAW_FD:
+ if (uattr->attr_data.reserved || uattr->len != 0 ||
+ uattr->data_s64 < INT_MIN || uattr->data_s64 > INT_MAX)
+ return -EINVAL;
+ /* _uverbs_get_const_signed() is the accessor */
+ e->ptr_attr.data = uattr->data_s64;
+ break;
+
+ case UVERBS_ATTR_TYPE_IDRS_ARRAY:
+ return uverbs_process_idrs_array(pbundle, attr_uapi,
+ &e->objs_arr_attr, uattr,
+ attr_bkey);
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+/*
+ * We search the radix tree with the method prefix and now we want to fast
+ * search the suffix bits to get a particular attribute pointer. It is not
+ * totally clear to me if this breaks the radix tree encasulation or not, but
+ * it uses the iter data to determine if the method iter points at the same
+ * chunk that will store the attribute, if so it just derefs it directly. By
+ * construction in most kernel configs the method and attrs will all fit in a
+ * single radix chunk, so in most cases this will have no search. Other cases
+ * this falls back to a full search.
+ */
+static void __rcu **uapi_get_attr_for_method(struct bundle_priv *pbundle,
+ u32 attr_key)
+{
+ void __rcu **slot;
+
+ if (likely(attr_key < pbundle->radix_slots_len)) {
+ void *entry;
+
+ slot = pbundle->radix_slots + attr_key;
+ entry = rcu_dereference_raw(*slot);
+ if (likely(!radix_tree_is_internal_node(entry) && entry))
+ return slot;
+ }
+
+ return radix_tree_lookup_slot(pbundle->radix,
+ pbundle->method_key | attr_key);
+}
+
+static int uverbs_set_attr(struct bundle_priv *pbundle,
+ struct ib_uverbs_attr *uattr)
+{
+ u32 attr_key = uapi_key_attr(uattr->attr_id);
+ u32 attr_bkey = uapi_bkey_attr(attr_key);
+ const struct uverbs_api_attr *attr;
+ void __rcu **slot;
+ int ret;
+
+ slot = uapi_get_attr_for_method(pbundle, attr_key);
+ if (!slot) {
+ /*
+ * Kernel does not support the attribute but user-space says it
+ * is mandatory
+ */
+ if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
+ return -EPROTONOSUPPORT;
+ return 0;
+ }
+ attr = rcu_dereference_protected(*slot, true);
+
+ /* Reject duplicate attributes from user-space */
+ if (test_bit(attr_bkey, pbundle->bundle.attr_present))
+ return -EINVAL;
+
+ ret = uverbs_process_attr(pbundle, attr, uattr, attr_bkey);
+ if (ret)
+ return ret;
+
+ __set_bit(attr_bkey, pbundle->bundle.attr_present);
+
+ return 0;
+}
+
+static int ib_uverbs_run_method(struct bundle_priv *pbundle,
+ unsigned int num_attrs)
+{
+ int (*handler)(struct uverbs_attr_bundle *attrs);
+ struct uverbs_attr_bundle *bundle =
+ container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr);
+ size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs);
+ unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey;
+ unsigned int i;
+ int ret;
+
+ /* See uverbs_disassociate_api() */
+ handler = srcu_dereference(
+ pbundle->method_elm->handler,
+ &pbundle->bundle.ufile->device->disassociate_srcu);
+ if (!handler)
+ return -EIO;
+
+ pbundle->uattrs = uverbs_alloc(bundle, uattrs_size);
+ if (IS_ERR(pbundle->uattrs))
+ return PTR_ERR(pbundle->uattrs);
+ if (copy_from_user(pbundle->uattrs, pbundle->user_attrs, uattrs_size))
+ return -EFAULT;
+
+ for (i = 0; i != num_attrs; i++) {
+ ret = uverbs_set_attr(pbundle, &pbundle->uattrs[i]);
+ if (unlikely(ret))
+ return ret;
+ }
+
+ /* User space did not provide all the mandatory attributes */
+ if (unlikely(!bitmap_subset(pbundle->method_elm->attr_mandatory,
+ pbundle->bundle.attr_present,
+ pbundle->method_elm->key_bitmap_len)))
+ return -EINVAL;
+
+ if (pbundle->method_elm->has_udata)
+ uverbs_fill_udata(bundle, &pbundle->bundle.driver_udata,
+ UVERBS_ATTR_UHW_IN, UVERBS_ATTR_UHW_OUT);
+ else
+ pbundle->bundle.driver_udata = (struct ib_udata){};
+
+ if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) {
+ struct uverbs_obj_attr *destroy_attr = &bundle->attrs[destroy_bkey].obj_attr;
+
+ ret = uobj_destroy(destroy_attr->uobject, bundle);
+ if (ret)
+ return ret;
+ __clear_bit(destroy_bkey, pbundle->uobj_finalize);
+
+ ret = handler(bundle);
+ uobj_put_destroy(destroy_attr->uobject);
+ } else {
+ ret = handler(bundle);
+ }
+
+ /*
+ * Until the drivers are revised to use the bundle directly we have to
+ * assume that the driver wrote to its UHW_OUT and flag userspace
+ * appropriately.
+ */
+ if (!ret && pbundle->method_elm->has_udata) {
+ const struct uverbs_attr *attr =
+ uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT);
+
+ if (!IS_ERR(attr))
+ ret = uverbs_set_output(bundle, attr);
+ }
+
+ /*
+ * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can
+ * not invoke the method because the request is not supported. No
+ * other cases should return this code.
+ */
+ if (WARN_ON_ONCE(ret == -EPROTONOSUPPORT))
+ return -EINVAL;
+
+ return ret;
+}
+
+static void bundle_destroy(struct bundle_priv *pbundle, bool commit)
+{
+ unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len;
+ struct uverbs_attr_bundle *bundle =
+ container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr);
+ struct bundle_alloc_head *memblock;
+ unsigned int i;
+
+ /* fast path for simple uobjects */
+ i = -1;
+ while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
+ i + 1)) < key_bitmap_len) {
+ struct uverbs_attr *attr = &bundle->attrs[i];
+
+ uverbs_finalize_object(
+ attr->obj_attr.uobject,
+ attr->obj_attr.attr_elm->spec.u.obj.access,
+ test_bit(i, pbundle->uobj_hw_obj_valid),
+ commit, bundle);
+ }
+
+ i = -1;
+ while ((i = find_next_bit(pbundle->spec_finalize, key_bitmap_len,
+ i + 1)) < key_bitmap_len) {
+ struct uverbs_attr *attr = &bundle->attrs[i];
+ const struct uverbs_api_attr *attr_uapi;
+ void __rcu **slot;
+
+ slot = uapi_get_attr_for_method(
+ pbundle,
+ pbundle->method_key | uapi_bkey_to_key_attr(i));
+ if (WARN_ON(!slot))
+ continue;
+
+ attr_uapi = rcu_dereference_protected(*slot, true);
+
+ if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
+ uverbs_free_idrs_array(attr_uapi, &attr->objs_arr_attr,
+ commit, bundle);
+ }
+ }
+
+ for (memblock = pbundle->allocated_mem; memblock;) {
+ struct bundle_alloc_head *tmp = memblock;
+
+ memblock = memblock->next;
+ kvfree(tmp);
+ }
+}
+
+static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
+ struct ib_uverbs_ioctl_hdr *hdr,
+ struct ib_uverbs_attr __user *user_attrs)
+{
+ const struct uverbs_api_ioctl_method *method_elm;
+ struct uverbs_api *uapi = ufile->device->uapi;
+ struct radix_tree_iter attrs_iter;
+ struct bundle_priv *pbundle;
+ struct bundle_priv onstack;
+ void __rcu **slot;
+ int ret;
+
+ if (unlikely(hdr->driver_id != uapi->driver_id))
+ return -EINVAL;
+
+ slot = radix_tree_iter_lookup(
+ &uapi->radix, &attrs_iter,
+ uapi_key_obj(hdr->object_id) |
+ uapi_key_ioctl_method(hdr->method_id));
+ if (unlikely(!slot))
+ return -EPROTONOSUPPORT;
+ method_elm = rcu_dereference_protected(*slot, true);
+
+ if (!method_elm->use_stack) {
+ pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL);
+ if (!pbundle)
+ return -ENOMEM;
+ pbundle->internal_avail =
+ method_elm->bundle_size -
+ offsetof(struct bundle_priv, internal_buffer);
+ pbundle->alloc_head.next = NULL;
+ pbundle->allocated_mem = container_of(&pbundle->alloc_head,
+ struct bundle_alloc_head, hdr);
+ } else {
+ pbundle = &onstack;
+ pbundle->internal_avail = sizeof(pbundle->internal_buffer);
+ pbundle->allocated_mem = NULL;
+ }
+
+ /* Space for the pbundle->bundle.attrs flex array */
+ pbundle->method_elm = method_elm;
+ pbundle->method_key = attrs_iter.index;
+ pbundle->bundle.ufile = ufile;
+ pbundle->bundle.context = NULL; /* only valid if bundle has uobject */
+ pbundle->radix = &uapi->radix;
+ pbundle->radix_slots = slot;
+ pbundle->radix_slots_len = radix_tree_chunk_size(&attrs_iter);
+ pbundle->user_attrs = user_attrs;
+
+ pbundle->internal_used = ALIGN(pbundle->method_elm->key_bitmap_len *
+ sizeof(*container_of(&pbundle->bundle,
+ struct uverbs_attr_bundle, hdr)->attrs),
+ sizeof(*pbundle->internal_buffer));
+ memset(pbundle->bundle.attr_present, 0,
+ sizeof(pbundle->bundle.attr_present));
+ memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
+ memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize));
+ memset(pbundle->uobj_hw_obj_valid, 0,
+ sizeof(pbundle->uobj_hw_obj_valid));
+
+ ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
+ bundle_destroy(pbundle, ret == 0);
+ return ret;
+}
+
+long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ struct ib_uverbs_file *file = filp->private_data;
+ struct ib_uverbs_ioctl_hdr __user *user_hdr =
+ (struct ib_uverbs_ioctl_hdr __user *)arg;
+ struct ib_uverbs_ioctl_hdr hdr;
+ int srcu_key;
+ int err;
+
+ if (unlikely(cmd != RDMA_VERBS_IOCTL))
+ return -ENOIOCTLCMD;
+
+ err = copy_from_user(&hdr, user_hdr, sizeof(hdr));
+ if (err)
+ return -EFAULT;
+
+ if (hdr.length > PAGE_SIZE ||
+ hdr.length != struct_size(&hdr, attrs, hdr.num_attrs))
+ return -EINVAL;
+
+ if (hdr.reserved1 || hdr.reserved2)
+ return -EPROTONOSUPPORT;
+
+ srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
+ err = ib_uverbs_cmd_verbs(file, &hdr, user_hdr->attrs);
+ srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
+ return err;
+}
+
+int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, u64 allowed_bits)
+{
+ const struct uverbs_attr *attr;
+ u64 flags;
+
+ attr = uverbs_attr_get(attrs_bundle, idx);
+ /* Missing attribute means 0 flags */
+ if (IS_ERR(attr)) {
+ *to = 0;
+ return 0;
+ }
+
+ /*
+ * New userspace code should use 8 bytes to pass flags, but we
+ * transparently support old userspaces that were using 4 bytes as
+ * well.
+ */
+ if (attr->ptr_attr.len == 8)
+ flags = attr->ptr_attr.data;
+ else if (attr->ptr_attr.len == 4)
+ flags = *(u32 *)&attr->ptr_attr.data;
+ else
+ return -EINVAL;
+
+ if (flags & ~allowed_bits)
+ return -EINVAL;
+
+ *to = flags;
+ return 0;
+}
+EXPORT_SYMBOL(uverbs_get_flags64);
+
+int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, u64 allowed_bits)
+{
+ u64 flags;
+ int ret;
+
+ ret = uverbs_get_flags64(&flags, attrs_bundle, idx, allowed_bits);
+ if (ret)
+ return ret;
+
+ if (flags > U32_MAX)
+ return -EINVAL;
+ *to = flags;
+
+ return 0;
+}
+EXPORT_SYMBOL(uverbs_get_flags32);
+
+/*
+ * Fill a ib_udata struct (core or uhw) using the given attribute IDs.
+ * This is primarily used to convert the UVERBS_ATTR_UHW() into the
+ * ib_udata format used by the drivers.
+ */
+void uverbs_fill_udata(struct uverbs_attr_bundle *bundle,
+ struct ib_udata *udata, unsigned int attr_in,
+ unsigned int attr_out)
+{
+ struct bundle_priv *pbundle =
+ container_of(&bundle->hdr, struct bundle_priv, bundle);
+ struct uverbs_attr_bundle *bundle_aux =
+ container_of(&pbundle->bundle, struct uverbs_attr_bundle, hdr);
+ const struct uverbs_attr *in =
+ uverbs_attr_get(bundle_aux, attr_in);
+ const struct uverbs_attr *out =
+ uverbs_attr_get(bundle_aux, attr_out);
+
+ if (!IS_ERR(in)) {
+ udata->inlen = in->ptr_attr.len;
+ if (uverbs_attr_ptr_is_inline(in))
+ udata->inbuf =
+ &pbundle->user_attrs[in->ptr_attr.uattr_idx]
+ .data;
+ else
+ udata->inbuf = u64_to_user_ptr(in->ptr_attr.data);
+ } else {
+ udata->inbuf = NULL;
+ udata->inlen = 0;
+ }
+
+ if (!IS_ERR(out)) {
+ udata->outbuf = u64_to_user_ptr(out->ptr_attr.data);
+ udata->outlen = out->ptr_attr.len;
+ } else {
+ udata->outbuf = NULL;
+ udata->outlen = 0;
+ }
+}
+
+int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx,
+ const void *from, size_t size)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
+ size_t min_size;
+
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ min_size = min_t(size_t, attr->ptr_attr.len, size);
+ if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size))
+ return -EFAULT;
+
+ return uverbs_set_output(bundle, attr);
+}
+EXPORT_SYMBOL(uverbs_copy_to);
+
+
+/*
+ * This is only used if the caller has directly used copy_to_use to write the
+ * data. It signals to user space that the buffer is filled in.
+ */
+int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
+
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ return uverbs_set_output(bundle, attr);
+}
+
+int _uverbs_get_const_signed(s64 *to,
+ const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, s64 lower_bound, u64 upper_bound,
+ s64 *def_val)
+{
+ const struct uverbs_attr *attr;
+
+ attr = uverbs_attr_get(attrs_bundle, idx);
+ if (IS_ERR(attr)) {
+ if ((PTR_ERR(attr) != -ENOENT) || !def_val)
+ return PTR_ERR(attr);
+
+ *to = *def_val;
+ } else {
+ *to = attr->ptr_attr.data;
+ }
+
+ if (*to < lower_bound || (*to > 0 && (u64)*to > upper_bound))
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL(_uverbs_get_const_signed);
+
+int _uverbs_get_const_unsigned(u64 *to,
+ const struct uverbs_attr_bundle *attrs_bundle,
+ size_t idx, u64 upper_bound, u64 *def_val)
+{
+ const struct uverbs_attr *attr;
+
+ attr = uverbs_attr_get(attrs_bundle, idx);
+ if (IS_ERR(attr)) {
+ if ((PTR_ERR(attr) != -ENOENT) || !def_val)
+ return PTR_ERR(attr);
+
+ *to = *def_val;
+ } else {
+ *to = attr->ptr_attr.data;
+ }
+
+ if (*to > upper_bound)
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL(_uverbs_get_const_unsigned);
+
+int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
+ size_t idx, const void *from, size_t size)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
+
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ if (size < attr->ptr_attr.len) {
+ if (clear_user(u64_to_user_ptr(attr->ptr_attr.data) + size,
+ attr->ptr_attr.len - size))
+ return -EFAULT;
+ }
+ return uverbs_copy_to(bundle, idx, from, size);
+}
+EXPORT_SYMBOL(uverbs_copy_to_struct_or_zero);
+
+/* Once called an abort will call through to the type's destroy_hw() */
+void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *bundle,
+ u16 idx)
+{
+ struct bundle_priv *pbundle =
+ container_of(&bundle->hdr, struct bundle_priv, bundle);
+
+ __set_bit(uapi_bkey_attr(uapi_key_attr(idx)),
+ pbundle->uobj_hw_obj_valid);
+}
+EXPORT_SYMBOL(uverbs_finalize_uobj_create);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index b3f95d453fba..973fe2c7ef53 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -45,12 +45,18 @@
#include <linux/cdev.h>
#include <linux/anon_inodes.h>
#include <linux/slab.h>
+#include <linux/sched/mm.h>
#include <linux/uaccess.h>
#include <rdma/ib.h>
+#include <rdma/uverbs_std_types.h>
+#include <rdma/rdma_netlink.h>
+#include <rdma/ib_ucaps.h>
#include "uverbs.h"
+#include "core_priv.h"
+#include "rdma_core.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
@@ -59,162 +65,120 @@ MODULE_LICENSE("Dual BSD/GPL");
enum {
IB_UVERBS_MAJOR = 231,
IB_UVERBS_BASE_MINOR = 192,
- IB_UVERBS_MAX_DEVICES = 32
+ IB_UVERBS_MAX_DEVICES = RDMA_MAX_PORTS,
+ IB_UVERBS_NUM_FIXED_MINOR = 32,
+ IB_UVERBS_NUM_DYNAMIC_MINOR = IB_UVERBS_MAX_DEVICES - IB_UVERBS_NUM_FIXED_MINOR,
};
#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
-static struct class *uverbs_class;
-
-DEFINE_SPINLOCK(ib_uverbs_idr_lock);
-DEFINE_IDR(ib_uverbs_pd_idr);
-DEFINE_IDR(ib_uverbs_mr_idr);
-DEFINE_IDR(ib_uverbs_mw_idr);
-DEFINE_IDR(ib_uverbs_ah_idr);
-DEFINE_IDR(ib_uverbs_cq_idr);
-DEFINE_IDR(ib_uverbs_qp_idr);
-DEFINE_IDR(ib_uverbs_srq_idr);
-DEFINE_IDR(ib_uverbs_xrcd_idr);
-DEFINE_IDR(ib_uverbs_rule_idr);
-DEFINE_IDR(ib_uverbs_wq_idr);
-DEFINE_IDR(ib_uverbs_rwq_ind_tbl_idr);
-
-static DEFINE_SPINLOCK(map_lock);
-static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
-
-static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len) = {
- [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
- [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
- [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
- [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
- [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
- [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
- [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr,
- [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
- [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
- [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
- [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
- [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
- [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
- [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
- [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
- [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
- [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
- [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
- [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
- [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
- [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
- [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
- [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
- [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
- [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
- [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
- [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
- [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
- [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
- [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
- [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
- [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
- [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
- [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
- [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
-};
+static dev_t dynamic_uverbs_dev;
+
+static DEFINE_IDA(uverbs_ida);
+static int ib_uverbs_add_one(struct ib_device *device);
+static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
+static struct ib_client uverbs_client;
-static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- struct ib_udata *ucore,
- struct ib_udata *uhw) = {
- [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
- [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
- [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
- [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
- [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp,
- [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq,
- [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq,
- [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq,
- [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
- [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
- [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp,
+static char *uverbs_devnode(const struct device *dev, umode_t *mode)
+{
+ if (mode)
+ *mode = 0666;
+ return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+}
+
+static const struct class uverbs_class = {
+ .name = "infiniband_verbs",
+ .devnode = uverbs_devnode,
};
-static void ib_uverbs_add_one(struct ib_device *device);
-static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
+/*
+ * Must be called with the ufile->device->disassociate_srcu held, and the lock
+ * must be held until use of the ucontext is finished.
+ */
+struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile)
+{
+ /*
+ * We do not hold the hw_destroy_rwsem lock for this flow, instead
+ * srcu is used. It does not matter if someone races this with
+ * get_context, we get NULL or valid ucontext.
+ */
+ struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext);
+
+ if (!srcu_dereference(ufile->device->ib_dev,
+ &ufile->device->disassociate_srcu))
+ return ERR_PTR(-EIO);
+
+ if (!ucontext)
+ return ERR_PTR(-EINVAL);
+
+ return ucontext;
+}
+EXPORT_SYMBOL(ib_uverbs_get_ucontext_file);
int uverbs_dealloc_mw(struct ib_mw *mw)
{
struct ib_pd *pd = mw->pd;
int ret;
- ret = mw->device->dealloc_mw(mw);
- if (!ret)
- atomic_dec(&pd->usecnt);
+ ret = mw->device->ops.dealloc_mw(mw);
+ if (ret)
+ return ret;
+
+ atomic_dec(&pd->usecnt);
+ kfree(mw);
return ret;
}
-static void ib_uverbs_release_dev(struct kobject *kobj)
+static void ib_uverbs_release_dev(struct device *device)
{
struct ib_uverbs_device *dev =
- container_of(kobj, struct ib_uverbs_device, kobj);
+ container_of(device, struct ib_uverbs_device, dev);
+ uverbs_destroy_api(dev->uapi);
cleanup_srcu_struct(&dev->disassociate_srcu);
+ mutex_destroy(&dev->lists_mutex);
+ mutex_destroy(&dev->xrcd_tree_mutex);
kfree(dev);
}
-static struct kobj_type ib_uverbs_dev_ktype = {
- .release = ib_uverbs_release_dev,
-};
-
-static void ib_uverbs_release_event_file(struct kref *ref)
-{
- struct ib_uverbs_event_file *file =
- container_of(ref, struct ib_uverbs_event_file, ref);
-
- kfree(file);
-}
-
-void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
- struct ib_uverbs_event_file *ev_file,
- struct ib_ucq_object *uobj)
+void ib_uverbs_release_ucq(struct ib_uverbs_completion_event_file *ev_file,
+ struct ib_ucq_object *uobj)
{
struct ib_uverbs_event *evt, *tmp;
if (ev_file) {
- spin_lock_irq(&ev_file->lock);
+ spin_lock_irq(&ev_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&ev_file->lock);
+ spin_unlock_irq(&ev_file->ev_queue.lock);
- kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+ uverbs_uobject_put(&ev_file->uobj);
}
- spin_lock_irq(&file->async_file->lock);
- list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
- list_del(&evt->list);
- kfree(evt);
- }
- spin_unlock_irq(&file->async_file->lock);
+ ib_uverbs_release_uevent(&uobj->uevent);
}
-void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
- struct ib_uevent_object *uobj)
+void ib_uverbs_release_uevent(struct ib_uevent_object *uobj)
{
+ struct ib_uverbs_async_event_file *async_file = uobj->event_file;
struct ib_uverbs_event *evt, *tmp;
- spin_lock_irq(&file->async_file->lock);
+ if (!async_file)
+ return;
+
+ spin_lock_irq(&async_file->ev_queue.lock);
list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
- spin_unlock_irq(&file->async_file->lock);
+ spin_unlock_irq(&async_file->ev_queue.lock);
+ uverbs_uobject_put(&async_file->uobj);
}
-static void ib_uverbs_detach_umcast(struct ib_qp *qp,
- struct ib_uqp_object *uobj)
+void ib_uverbs_detach_umcast(struct ib_qp *qp,
+ struct ib_uqp_object *uobj)
{
struct ib_uverbs_mcast_entry *mcast, *tmp;
@@ -225,205 +189,84 @@ static void ib_uverbs_detach_umcast(struct ib_qp *qp,
}
}
-static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
- struct ib_ucontext *context)
-{
- struct ib_uobject *uobj, *tmp;
-
- context->closing = 1;
-
- list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
- struct ib_ah *ah = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
- ib_destroy_ah(ah);
- kfree(uobj);
- }
-
- /* Remove MWs before QPs, in order to support type 2A MWs. */
- list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
- struct ib_mw *mw = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
- uverbs_dealloc_mw(mw);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
- struct ib_flow *flow_id = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
- ib_destroy_flow(flow_id);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
- struct ib_qp *qp = uobj->object;
- struct ib_uqp_object *uqp =
- container_of(uobj, struct ib_uqp_object, uevent.uobject);
-
- idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
- if (qp == qp->real_qp)
- ib_uverbs_detach_umcast(qp, uqp);
- ib_destroy_qp(qp);
- ib_uverbs_release_uevent(file, &uqp->uevent);
- kfree(uqp);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->rwq_ind_tbl_list, list) {
- struct ib_rwq_ind_table *rwq_ind_tbl = uobj->object;
- struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
-
- idr_remove_uobj(&ib_uverbs_rwq_ind_tbl_idr, uobj);
- ib_destroy_rwq_ind_table(rwq_ind_tbl);
- kfree(ind_tbl);
- kfree(uobj);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->wq_list, list) {
- struct ib_wq *wq = uobj->object;
- struct ib_uwq_object *uwq =
- container_of(uobj, struct ib_uwq_object, uevent.uobject);
-
- idr_remove_uobj(&ib_uverbs_wq_idr, uobj);
- ib_destroy_wq(wq);
- ib_uverbs_release_uevent(file, &uwq->uevent);
- kfree(uwq);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
- struct ib_srq *srq = uobj->object;
- struct ib_uevent_object *uevent =
- container_of(uobj, struct ib_uevent_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
- ib_destroy_srq(srq);
- ib_uverbs_release_uevent(file, uevent);
- kfree(uevent);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
- struct ib_cq *cq = uobj->object;
- struct ib_uverbs_event_file *ev_file = cq->cq_context;
- struct ib_ucq_object *ucq =
- container_of(uobj, struct ib_ucq_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
- ib_destroy_cq(cq);
- ib_uverbs_release_ucq(file, ev_file, ucq);
- kfree(ucq);
- }
-
- list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
- struct ib_mr *mr = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
- ib_dereg_mr(mr);
- kfree(uobj);
- }
-
- mutex_lock(&file->device->xrcd_tree_mutex);
- list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
- struct ib_xrcd *xrcd = uobj->object;
- struct ib_uxrcd_object *uxrcd =
- container_of(uobj, struct ib_uxrcd_object, uobject);
-
- idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
- ib_uverbs_dealloc_xrcd(file->device, xrcd);
- kfree(uxrcd);
- }
- mutex_unlock(&file->device->xrcd_tree_mutex);
-
- list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
- struct ib_pd *pd = uobj->object;
-
- idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
- ib_dealloc_pd(pd);
- kfree(uobj);
- }
-
- put_pid(context->tgid);
-
- return context->device->dealloc_ucontext(context);
-}
-
static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
{
complete(&dev->comp);
}
-static void ib_uverbs_release_file(struct kref *ref)
+void ib_uverbs_release_file(struct kref *ref)
{
struct ib_uverbs_file *file =
container_of(ref, struct ib_uverbs_file, ref);
struct ib_device *ib_dev;
int srcu_key;
+ release_ufile_idr_uobject(file);
+
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
ib_dev = srcu_dereference(file->device->ib_dev,
&file->device->disassociate_srcu);
- if (ib_dev && !ib_dev->disassociate_ucontext)
- module_put(ib_dev->owner);
+ if (ib_dev && !ib_dev->ops.disassociate_ucontext)
+ module_put(ib_dev->ops.owner);
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
- if (atomic_dec_and_test(&file->device->refcount))
+ if (refcount_dec_and_test(&file->device->refcount))
ib_uverbs_comp_dev(file->device);
+ if (file->default_async_file)
+ uverbs_uobject_put(&file->default_async_file->uobj);
+ put_device(&file->device->dev);
+
+ if (file->disassociate_page)
+ __free_pages(file->disassociate_page, 0);
+ mutex_destroy(&file->disassociation_lock);
+ mutex_destroy(&file->umap_lock);
+ mutex_destroy(&file->ucontext_lock);
kfree(file);
}
-static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
- size_t count, loff_t *pos)
+static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
+ struct file *filp, char __user *buf,
+ size_t count, loff_t *pos,
+ size_t eventsz)
{
- struct ib_uverbs_event_file *file = filp->private_data;
struct ib_uverbs_event *event;
- int eventsz;
int ret = 0;
- spin_lock_irq(&file->lock);
+ spin_lock_irq(&ev_queue->lock);
- while (list_empty(&file->event_list)) {
- spin_unlock_irq(&file->lock);
+ while (list_empty(&ev_queue->event_list)) {
+ if (ev_queue->is_closed) {
+ spin_unlock_irq(&ev_queue->lock);
+ return -EIO;
+ }
+ spin_unlock_irq(&ev_queue->lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
- if (wait_event_interruptible(file->poll_wait,
- (!list_empty(&file->event_list) ||
- /* The barriers built into wait_event_interruptible()
- * and wake_up() guarentee this will see the null set
- * without using RCU
- */
- !file->uverbs_file->device->ib_dev)))
+ if (wait_event_interruptible(ev_queue->poll_wait,
+ (!list_empty(&ev_queue->event_list) ||
+ ev_queue->is_closed)))
return -ERESTARTSYS;
- /* If device was disassociated and no event exists set an error */
- if (list_empty(&file->event_list) &&
- !file->uverbs_file->device->ib_dev)
- return -EIO;
-
- spin_lock_irq(&file->lock);
+ spin_lock_irq(&ev_queue->lock);
}
- event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
-
- if (file->is_async)
- eventsz = sizeof (struct ib_uverbs_async_event_desc);
- else
- eventsz = sizeof (struct ib_uverbs_comp_event_desc);
+ event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);
if (eventsz > count) {
ret = -EINVAL;
event = NULL;
} else {
- list_del(file->event_list.next);
+ list_del(ev_queue->event_list.next);
if (event->counter) {
++(*event->counter);
list_del(&event->obj_list);
}
}
- spin_unlock_irq(&file->lock);
+ spin_unlock_irq(&ev_queue->lock);
if (event) {
if (copy_to_user(buf, event, eventsz))
@@ -437,313 +280,303 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
return ret;
}
-static unsigned int ib_uverbs_event_poll(struct file *filp,
+static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+
+ return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos,
+ sizeof(struct ib_uverbs_async_event_desc));
+}
+
+static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
+
+ return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count,
+ pos,
+ sizeof(struct ib_uverbs_comp_event_desc));
+}
+
+static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue,
+ struct file *filp,
struct poll_table_struct *wait)
{
- unsigned int pollflags = 0;
- struct ib_uverbs_event_file *file = filp->private_data;
+ __poll_t pollflags = 0;
- poll_wait(filp, &file->poll_wait, wait);
+ poll_wait(filp, &ev_queue->poll_wait, wait);
- spin_lock_irq(&file->lock);
- if (!list_empty(&file->event_list))
- pollflags = POLLIN | POLLRDNORM;
- spin_unlock_irq(&file->lock);
+ spin_lock_irq(&ev_queue->lock);
+ if (!list_empty(&ev_queue->event_list))
+ pollflags = EPOLLIN | EPOLLRDNORM;
+ else if (ev_queue->is_closed)
+ pollflags = EPOLLERR;
+ spin_unlock_irq(&ev_queue->lock);
return pollflags;
}
-static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
+static __poll_t ib_uverbs_async_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
{
- struct ib_uverbs_event_file *file = filp->private_data;
+ struct ib_uverbs_async_event_file *file = filp->private_data;
- return fasync_helper(fd, filp, on, &file->async_queue);
+ return ib_uverbs_event_poll(&file->ev_queue, filp, wait);
}
-static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
+static __poll_t ib_uverbs_comp_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
{
- struct ib_uverbs_event_file *file = filp->private_data;
- struct ib_uverbs_event *entry, *tmp;
- int closed_already = 0;
-
- mutex_lock(&file->uverbs_file->device->lists_mutex);
- spin_lock_irq(&file->lock);
- closed_already = file->is_closed;
- file->is_closed = 1;
- list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
- if (entry->counter)
- list_del(&entry->obj_list);
- kfree(entry);
- }
- spin_unlock_irq(&file->lock);
- if (!closed_already) {
- list_del(&file->list);
- if (file->is_async)
- ib_unregister_event_handler(&file->uverbs_file->
- event_handler);
- }
- mutex_unlock(&file->uverbs_file->device->lists_mutex);
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
- kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&file->ref, ib_uverbs_release_event_file);
+ return ib_uverbs_event_poll(&comp_ev_file->ev_queue, filp, wait);
+}
- return 0;
+static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on)
+{
+ struct ib_uverbs_async_event_file *file = filp->private_data;
+
+ return fasync_helper(fd, filp, on, &file->ev_queue.async_queue);
+}
+
+static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on)
+{
+ struct ib_uverbs_completion_event_file *comp_ev_file =
+ filp->private_data;
+
+ return fasync_helper(fd, filp, on, &comp_ev_file->ev_queue.async_queue);
}
-static const struct file_operations uverbs_event_fops = {
+const struct file_operations uverbs_event_fops = {
.owner = THIS_MODULE,
- .read = ib_uverbs_event_read,
- .poll = ib_uverbs_event_poll,
- .release = ib_uverbs_event_close,
- .fasync = ib_uverbs_event_fasync,
- .llseek = no_llseek,
+ .read = ib_uverbs_comp_event_read,
+ .poll = ib_uverbs_comp_event_poll,
+ .release = uverbs_uobject_fd_release,
+ .fasync = ib_uverbs_comp_event_fasync,
+};
+
+const struct file_operations uverbs_async_event_fops = {
+ .owner = THIS_MODULE,
+ .read = ib_uverbs_async_event_read,
+ .poll = ib_uverbs_async_event_poll,
+ .release = uverbs_async_event_release,
+ .fasync = ib_uverbs_async_event_fasync,
};
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
- struct ib_uverbs_event_file *file = cq_context;
+ struct ib_uverbs_event_queue *ev_queue = cq_context;
struct ib_ucq_object *uobj;
struct ib_uverbs_event *entry;
unsigned long flags;
- if (!file)
+ if (!ev_queue)
return;
- spin_lock_irqsave(&file->lock, flags);
- if (file->is_closed) {
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_lock_irqsave(&ev_queue->lock, flags);
+ if (ev_queue->is_closed) {
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
- entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
return;
}
- uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
+ uobj = cq->uobject;
- entry->desc.comp.cq_handle = cq->uobject->user_handle;
+ entry->desc.comp.cq_handle = cq->uobject->uevent.uobject.user_handle;
entry->counter = &uobj->comp_events_reported;
- list_add_tail(&entry->list, &file->event_list);
+ list_add_tail(&entry->list, &ev_queue->event_list);
list_add_tail(&entry->obj_list, &uobj->comp_list);
- spin_unlock_irqrestore(&file->lock, flags);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
- wake_up_interruptible(&file->poll_wait);
- kill_fasync(&file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&ev_queue->poll_wait);
+ kill_fasync(&ev_queue->async_queue, SIGIO, POLL_IN);
}
-static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
- __u64 element, __u64 event,
- struct list_head *obj_list,
- u32 *counter)
+void ib_uverbs_async_handler(struct ib_uverbs_async_event_file *async_file,
+ __u64 element, __u64 event,
+ struct list_head *obj_list, u32 *counter)
{
struct ib_uverbs_event *entry;
unsigned long flags;
- spin_lock_irqsave(&file->async_file->lock, flags);
- if (file->async_file->is_closed) {
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ if (!async_file)
+ return;
+
+ spin_lock_irqsave(&async_file->ev_queue.lock, flags);
+ if (async_file->ev_queue.is_closed) {
+ spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
return;
}
- entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+ entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
if (!entry) {
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
return;
}
- entry->desc.async.element = element;
+ entry->desc.async.element = element;
entry->desc.async.event_type = event;
- entry->desc.async.reserved = 0;
- entry->counter = counter;
+ entry->desc.async.reserved = 0;
+ entry->counter = counter;
- list_add_tail(&entry->list, &file->async_file->event_list);
+ list_add_tail(&entry->list, &async_file->ev_queue.event_list);
if (obj_list)
list_add_tail(&entry->obj_list, obj_list);
- spin_unlock_irqrestore(&file->async_file->lock, flags);
+ spin_unlock_irqrestore(&async_file->ev_queue.lock, flags);
- wake_up_interruptible(&file->async_file->poll_wait);
- kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
+ wake_up_interruptible(&async_file->ev_queue.poll_wait);
+ kill_fasync(&async_file->ev_queue.async_queue, SIGIO, POLL_IN);
}
-void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
+static void uverbs_uobj_event(struct ib_uevent_object *eobj,
+ struct ib_event *event)
{
- struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
- struct ib_ucq_object, uobject);
+ ib_uverbs_async_handler(eobj->event_file,
+ eobj->uobject.user_handle, event->event,
+ &eobj->event_list, &eobj->events_reported);
+}
- ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
- event->event, &uobj->async_list,
- &uobj->async_events_reported);
+void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
+{
+ uverbs_uobj_event(&event->element.cq->uobject->uevent, event);
}
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
{
- struct ib_uevent_object *uobj;
-
/* for XRC target qp's, check that qp is live */
- if (!event->element.qp->uobject || !event->element.qp->uobject->live)
+ if (!event->element.qp->uobject)
return;
- uobj = container_of(event->element.qp->uobject,
- struct ib_uevent_object, uobject);
-
- ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
- event->event, &uobj->event_list,
- &uobj->events_reported);
+ uverbs_uobj_event(&event->element.qp->uobject->uevent, event);
}
void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr)
{
- struct ib_uevent_object *uobj = container_of(event->element.wq->uobject,
- struct ib_uevent_object, uobject);
-
- ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
- event->event, &uobj->event_list,
- &uobj->events_reported);
+ uverbs_uobj_event(&event->element.wq->uobject->uevent, event);
}
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
{
- struct ib_uevent_object *uobj;
-
- uobj = container_of(event->element.srq->uobject,
- struct ib_uevent_object, uobject);
-
- ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
- event->event, &uobj->event_list,
- &uobj->events_reported);
+ uverbs_uobj_event(&event->element.srq->uobject->uevent, event);
}
-void ib_uverbs_event_handler(struct ib_event_handler *handler,
- struct ib_event *event)
+static void ib_uverbs_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
{
- struct ib_uverbs_file *file =
- container_of(handler, struct ib_uverbs_file, event_handler);
-
- ib_uverbs_async_handler(file, event->element.port_num, event->event,
- NULL, NULL);
+ ib_uverbs_async_handler(
+ container_of(handler, struct ib_uverbs_async_event_file,
+ event_handler),
+ event->element.port_num, event->event, NULL, NULL);
}
-void ib_uverbs_free_async_event_file(struct ib_uverbs_file *file)
+void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue)
{
- kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
- file->async_file = NULL;
+ spin_lock_init(&ev_queue->lock);
+ INIT_LIST_HEAD(&ev_queue->event_list);
+ init_waitqueue_head(&ev_queue->poll_wait);
+ ev_queue->is_closed = 0;
+ ev_queue->async_queue = NULL;
}
-struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
- struct ib_device *ib_dev,
- int is_async)
+void ib_uverbs_init_async_event_file(
+ struct ib_uverbs_async_event_file *async_file)
{
- struct ib_uverbs_event_file *ev_file;
- struct file *filp;
- int ret;
+ struct ib_uverbs_file *uverbs_file = async_file->uobj.ufile;
+ struct ib_device *ib_dev = async_file->uobj.context->device;
- ev_file = kzalloc(sizeof(*ev_file), GFP_KERNEL);
- if (!ev_file)
- return ERR_PTR(-ENOMEM);
-
- kref_init(&ev_file->ref);
- spin_lock_init(&ev_file->lock);
- INIT_LIST_HEAD(&ev_file->event_list);
- init_waitqueue_head(&ev_file->poll_wait);
- ev_file->uverbs_file = uverbs_file;
- kref_get(&ev_file->uverbs_file->ref);
- ev_file->async_queue = NULL;
- ev_file->is_closed = 0;
-
- filp = anon_inode_getfile("[infinibandevent]", &uverbs_event_fops,
- ev_file, O_RDONLY);
- if (IS_ERR(filp))
- goto err_put_refs;
-
- mutex_lock(&uverbs_file->device->lists_mutex);
- list_add_tail(&ev_file->list,
- &uverbs_file->device->uverbs_events_file_list);
- mutex_unlock(&uverbs_file->device->lists_mutex);
-
- if (is_async) {
- WARN_ON(uverbs_file->async_file);
- uverbs_file->async_file = ev_file;
- kref_get(&uverbs_file->async_file->ref);
- INIT_IB_EVENT_HANDLER(&uverbs_file->event_handler,
- ib_dev,
- ib_uverbs_event_handler);
- ret = ib_register_event_handler(&uverbs_file->event_handler);
- if (ret)
- goto err_put_file;
+ ib_uverbs_init_event_queue(&async_file->ev_queue);
- /* At that point async file stuff was fully set */
- ev_file->is_async = 1;
+ /* The first async_event_file becomes the default one for the file. */
+ mutex_lock(&uverbs_file->ucontext_lock);
+ if (!uverbs_file->default_async_file) {
+ /* Pairs with the put in ib_uverbs_release_file */
+ uverbs_uobject_get(&async_file->uobj);
+ smp_store_release(&uverbs_file->default_async_file, async_file);
}
+ mutex_unlock(&uverbs_file->ucontext_lock);
- return filp;
-
-err_put_file:
- fput(filp);
- kref_put(&uverbs_file->async_file->ref, ib_uverbs_release_event_file);
- uverbs_file->async_file = NULL;
- return ERR_PTR(ret);
-
-err_put_refs:
- kref_put(&ev_file->uverbs_file->ref, ib_uverbs_release_file);
- kref_put(&ev_file->ref, ib_uverbs_release_event_file);
- return filp;
+ INIT_IB_EVENT_HANDLER(&async_file->event_handler, ib_dev,
+ ib_uverbs_event_handler);
+ ib_register_event_handler(&async_file->event_handler);
}
-/*
- * Look up a completion event file by FD. If lookup is successful,
- * takes a ref to the event file struct that it returns; if
- * unsuccessful, returns NULL.
- */
-struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
+static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
+ struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count,
+ const struct uverbs_api_write_method *method_elm)
{
- struct ib_uverbs_event_file *ev_file = NULL;
- struct fd f = fdget(fd);
-
- if (!f.file)
- return NULL;
+ if (method_elm->is_ex) {
+ count -= sizeof(*hdr) + sizeof(*ex_hdr);
- if (f.file->f_op != &uverbs_event_fops)
- goto out;
+ if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
+ return -EINVAL;
- ev_file = f.file->private_data;
- if (ev_file->is_async) {
- ev_file = NULL;
- goto out;
- }
+ if (hdr->in_words * 8 < method_elm->req_size)
+ return -ENOSPC;
- kref_get(&ev_file->ref);
+ if (ex_hdr->cmd_hdr_reserved)
+ return -EINVAL;
-out:
- fdput(f);
- return ev_file;
-}
+ if (ex_hdr->response) {
+ if (!hdr->out_words && !ex_hdr->provider_out_words)
+ return -EINVAL;
-static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
-{
- u64 mask;
+ if (hdr->out_words * 8 < method_elm->resp_size)
+ return -ENOSPC;
- if (command <= IB_USER_VERBS_CMD_OPEN_QP)
- mask = ib_dev->uverbs_cmd_mask;
- else
- mask = ib_dev->uverbs_ex_cmd_mask;
+ if (!access_ok(u64_to_user_ptr(ex_hdr->response),
+ (hdr->out_words + ex_hdr->provider_out_words) * 8))
+ return -EFAULT;
+ } else {
+ if (hdr->out_words || ex_hdr->provider_out_words)
+ return -EINVAL;
+ }
- if (mask & ((u64)1 << command))
return 0;
+ }
+
+ /* not extended command */
+ if (hdr->in_words * 4 != count)
+ return -EINVAL;
- return -1;
+ if (count < method_elm->req_size + sizeof(*hdr)) {
+ /*
+ * rdma-core v18 and v19 have a bug where they send DESTROY_CQ
+ * with a 16 byte write instead of 24. Old kernels didn't
+ * check the size so they allowed this. Now that the size is
+ * checked provide a compatibility work around to not break
+ * those userspaces.
+ */
+ if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ &&
+ count == 16) {
+ hdr->in_words = 6;
+ return 0;
+ }
+ return -ENOSPC;
+ }
+ if (hdr->out_words * 4 < method_elm->resp_size)
+ return -ENOSPC;
+
+ return 0;
}
static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_file *file = filp->private_data;
- struct ib_device *ib_dev;
+ const struct uverbs_api_write_method *method_elm;
+ struct uverbs_api *uapi = file->device->uapi;
+ struct ib_uverbs_ex_cmd_hdr ex_hdr;
struct ib_uverbs_cmd_hdr hdr;
- __u32 command;
- __u32 flags;
+ struct uverbs_attr_bundle bundle;
int srcu_key;
ssize_t ret;
@@ -753,166 +586,337 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
return -EACCES;
}
- if (count < sizeof hdr)
+ if (count < sizeof(hdr))
return -EINVAL;
- if (copy_from_user(&hdr, buf, sizeof hdr))
+ if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
- srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- ib_dev = srcu_dereference(file->device->ib_dev,
- &file->device->disassociate_srcu);
- if (!ib_dev) {
- ret = -EIO;
- goto out;
- }
-
- if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
- IB_USER_VERBS_CMD_COMMAND_MASK)) {
- ret = -EINVAL;
- goto out;
- }
-
- command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
- if (verify_command_mask(ib_dev, command)) {
- ret = -EOPNOTSUPP;
- goto out;
- }
+ method_elm = uapi_get_method(uapi, hdr.command);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
- if (!file->ucontext &&
- command != IB_USER_VERBS_CMD_GET_CONTEXT) {
- ret = -EINVAL;
- goto out;
+ if (method_elm->is_ex) {
+ if (count < (sizeof(hdr) + sizeof(ex_hdr)))
+ return -EINVAL;
+ if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
+ return -EFAULT;
}
- flags = (hdr.command &
- IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
-
- if (!flags) {
- if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
- !uverbs_cmd_table[command]) {
- ret = -EINVAL;
- goto out;
- }
-
- if (hdr.in_words * 4 != count) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = uverbs_cmd_table[command](file, ib_dev,
- buf + sizeof(hdr),
- hdr.in_words * 4,
- hdr.out_words * 4);
-
- } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
- struct ib_uverbs_ex_cmd_hdr ex_hdr;
- struct ib_udata ucore;
- struct ib_udata uhw;
- size_t written_count = count;
-
- if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
- !uverbs_ex_cmd_table[command]) {
- ret = -ENOSYS;
- goto out;
- }
-
- if (!file->ucontext) {
- ret = -EINVAL;
- goto out;
- }
+ ret = verify_hdr(&hdr, &ex_hdr, count, method_elm);
+ if (ret)
+ return ret;
- if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
- ret = -EINVAL;
- goto out;
- }
-
- if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) {
- ret = -EFAULT;
- goto out;
- }
-
- count -= sizeof(hdr) + sizeof(ex_hdr);
- buf += sizeof(hdr) + sizeof(ex_hdr);
-
- if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) {
- ret = -EINVAL;
- goto out;
- }
+ srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- if (ex_hdr.cmd_hdr_reserved) {
- ret = -EINVAL;
- goto out;
+ buf += sizeof(hdr);
+
+ memset(bundle.attr_present, 0, sizeof(bundle.attr_present));
+ bundle.ufile = file;
+ bundle.context = NULL; /* only valid if bundle has uobject */
+ bundle.uobject = NULL;
+ if (!method_elm->is_ex) {
+ size_t in_len = hdr.in_words * 4 - sizeof(hdr);
+ size_t out_len = hdr.out_words * 4;
+ u64 response = 0;
+
+ if (method_elm->has_udata) {
+ bundle.driver_udata.inlen =
+ in_len - method_elm->req_size;
+ in_len = method_elm->req_size;
+ if (bundle.driver_udata.inlen)
+ bundle.driver_udata.inbuf = buf + in_len;
+ else
+ bundle.driver_udata.inbuf = NULL;
+ } else {
+ memset(&bundle.driver_udata, 0,
+ sizeof(bundle.driver_udata));
}
- if (ex_hdr.response) {
- if (!hdr.out_words && !ex_hdr.provider_out_words) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!access_ok(VERIFY_WRITE,
- (void __user *) (unsigned long) ex_hdr.response,
- (hdr.out_words + ex_hdr.provider_out_words) * 8)) {
- ret = -EFAULT;
- goto out;
+ if (method_elm->has_resp) {
+ /*
+ * The macros check that if has_resp is set
+ * then the command request structure starts
+ * with a '__aligned u64 response' member.
+ */
+ ret = get_user(response, (const u64 __user *)buf);
+ if (ret)
+ goto out_unlock;
+
+ if (method_elm->has_udata) {
+ bundle.driver_udata.outlen =
+ out_len - method_elm->resp_size;
+ out_len = method_elm->resp_size;
+ if (bundle.driver_udata.outlen)
+ bundle.driver_udata.outbuf =
+ u64_to_user_ptr(response +
+ out_len);
+ else
+ bundle.driver_udata.outbuf = NULL;
}
} else {
- if (hdr.out_words || ex_hdr.provider_out_words) {
- ret = -EINVAL;
- goto out;
- }
+ bundle.driver_udata.outlen = 0;
+ bundle.driver_udata.outbuf = NULL;
}
- INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response,
- hdr.in_words * 8, hdr.out_words * 8);
-
- INIT_UDATA_BUF_OR_NULL(&uhw,
- buf + ucore.inlen,
- (unsigned long) ex_hdr.response + ucore.outlen,
- ex_hdr.provider_in_words * 8,
- ex_hdr.provider_out_words * 8);
-
- ret = uverbs_ex_cmd_table[command](file,
- ib_dev,
- &ucore,
- &uhw);
- if (!ret)
- ret = written_count;
+ ib_uverbs_init_udata_buf_or_null(
+ &bundle.ucore, buf, u64_to_user_ptr(response),
+ in_len, out_len);
} else {
- ret = -ENOSYS;
+ buf += sizeof(ex_hdr);
+
+ ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf,
+ u64_to_user_ptr(ex_hdr.response),
+ hdr.in_words * 8, hdr.out_words * 8);
+
+ ib_uverbs_init_udata_buf_or_null(
+ &bundle.driver_udata, buf + bundle.ucore.inlen,
+ u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen,
+ ex_hdr.provider_in_words * 8,
+ ex_hdr.provider_out_words * 8);
+
}
-out:
+ ret = method_elm->handler(&bundle);
+ if (bundle.uobject)
+ uverbs_finalize_object(bundle.uobject, UVERBS_ACCESS_NEW, true,
+ !ret, &bundle);
+out_unlock:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
- return ret;
+ return (ret) ? : count;
}
+static const struct vm_operations_struct rdma_umap_ops;
+
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct ib_uverbs_file *file = filp->private_data;
- struct ib_device *ib_dev;
+ struct ib_ucontext *ucontext;
int ret = 0;
int srcu_key;
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- ib_dev = srcu_dereference(file->device->ib_dev,
- &file->device->disassociate_srcu);
- if (!ib_dev) {
- ret = -EIO;
+ ucontext = ib_uverbs_get_ucontext_file(file);
+ if (IS_ERR(ucontext)) {
+ ret = PTR_ERR(ucontext);
goto out;
}
- if (!file->ucontext)
- ret = -ENODEV;
- else
- ret = ib_dev->mmap(file->ucontext, vma);
+ mutex_lock(&file->disassociation_lock);
+
+ vma->vm_ops = &rdma_umap_ops;
+ ret = ucontext->device->ops.mmap(ucontext, vma);
+
+ mutex_unlock(&file->disassociation_lock);
out:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
return ret;
}
/*
+ * The VMA has been dup'd, initialize the vm_private_data with a new tracking
+ * struct
+ */
+static void rdma_umap_open(struct vm_area_struct *vma)
+{
+ struct ib_uverbs_file *ufile = vma->vm_file->private_data;
+ struct rdma_umap_priv *opriv = vma->vm_private_data;
+ struct rdma_umap_priv *priv;
+
+ if (!opriv)
+ return;
+
+ /* We are racing with disassociation */
+ if (!down_read_trylock(&ufile->hw_destroy_rwsem))
+ goto out_zap;
+ mutex_lock(&ufile->disassociation_lock);
+
+ /*
+ * Disassociation already completed, the VMA should already be zapped.
+ */
+ if (!ufile->ucontext)
+ goto out_unlock;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ goto out_unlock;
+ rdma_umap_priv_init(priv, vma, opriv->entry);
+
+ mutex_unlock(&ufile->disassociation_lock);
+ up_read(&ufile->hw_destroy_rwsem);
+ return;
+
+out_unlock:
+ mutex_unlock(&ufile->disassociation_lock);
+ up_read(&ufile->hw_destroy_rwsem);
+out_zap:
+ /*
+ * We can't allow the VMA to be created with the actual IO pages, that
+ * would break our API contract, and it can't be stopped at this
+ * point, so zap it.
+ */
+ vma->vm_private_data = NULL;
+ zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
+}
+
+static void rdma_umap_close(struct vm_area_struct *vma)
+{
+ struct ib_uverbs_file *ufile = vma->vm_file->private_data;
+ struct rdma_umap_priv *priv = vma->vm_private_data;
+
+ if (!priv)
+ return;
+
+ /*
+ * The vma holds a reference on the struct file that created it, which
+ * in turn means that the ib_uverbs_file is guaranteed to exist at
+ * this point.
+ */
+ mutex_lock(&ufile->umap_lock);
+ if (priv->entry)
+ rdma_user_mmap_entry_put(priv->entry);
+
+ list_del(&priv->list);
+ mutex_unlock(&ufile->umap_lock);
+ kfree(priv);
+}
+
+/*
+ * Once the zap_vma_ptes has been called touches to the VMA will come here and
+ * we return a dummy writable zero page for all the pfns.
+ */
+static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
+{
+ struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data;
+ struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
+ vm_fault_t ret = 0;
+
+ if (!priv)
+ return VM_FAULT_SIGBUS;
+
+ /* Read only pages can just use the system zero page. */
+ if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
+ vmf->page = ZERO_PAGE(vmf->address);
+ get_page(vmf->page);
+ return 0;
+ }
+
+ mutex_lock(&ufile->umap_lock);
+ if (!ufile->disassociate_page)
+ ufile->disassociate_page =
+ alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
+
+ if (ufile->disassociate_page) {
+ /*
+ * This VMA is forced to always be shared so this doesn't have
+ * to worry about COW.
+ */
+ vmf->page = ufile->disassociate_page;
+ get_page(vmf->page);
+ } else {
+ ret = VM_FAULT_SIGBUS;
+ }
+ mutex_unlock(&ufile->umap_lock);
+
+ return ret;
+}
+
+static const struct vm_operations_struct rdma_umap_ops = {
+ .open = rdma_umap_open,
+ .close = rdma_umap_close,
+ .fault = rdma_umap_fault,
+};
+
+void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
+{
+ struct rdma_umap_priv *priv, *next_priv;
+
+ mutex_lock(&ufile->disassociation_lock);
+
+ while (1) {
+ struct mm_struct *mm = NULL;
+
+ /* Get an arbitrary mm pointer that hasn't been cleaned yet */
+ mutex_lock(&ufile->umap_lock);
+ while (!list_empty(&ufile->umaps)) {
+ int ret;
+
+ priv = list_first_entry(&ufile->umaps,
+ struct rdma_umap_priv, list);
+ mm = priv->vma->vm_mm;
+ ret = mmget_not_zero(mm);
+ if (!ret) {
+ list_del_init(&priv->list);
+ if (priv->entry) {
+ rdma_user_mmap_entry_put(priv->entry);
+ priv->entry = NULL;
+ }
+ mm = NULL;
+ continue;
+ }
+ break;
+ }
+ mutex_unlock(&ufile->umap_lock);
+ if (!mm) {
+ mutex_unlock(&ufile->disassociation_lock);
+ return;
+ }
+
+ /*
+ * The umap_lock is nested under mmap_lock since it used within
+ * the vma_ops callbacks, so we have to clean the list one mm
+ * at a time to get the lock ordering right. Typically there
+ * will only be one mm, so no big deal.
+ */
+ mmap_read_lock(mm);
+ mutex_lock(&ufile->umap_lock);
+ list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
+ list) {
+ struct vm_area_struct *vma = priv->vma;
+
+ if (vma->vm_mm != mm)
+ continue;
+ list_del_init(&priv->list);
+
+ zap_vma_ptes(vma, vma->vm_start,
+ vma->vm_end - vma->vm_start);
+
+ if (priv->entry) {
+ rdma_user_mmap_entry_put(priv->entry);
+ priv->entry = NULL;
+ }
+ }
+ mutex_unlock(&ufile->umap_lock);
+ mmap_read_unlock(mm);
+ mmput(mm);
+ }
+
+ mutex_unlock(&ufile->disassociation_lock);
+}
+
+/**
+ * rdma_user_mmap_disassociate() - Revoke mmaps for a device
+ * @device: device to revoke
+ *
+ * This function should be called by drivers that need to disable mmaps for the
+ * device, for instance because it is going to be reset.
+ */
+void rdma_user_mmap_disassociate(struct ib_device *device)
+{
+ struct ib_uverbs_device *uverbs_dev =
+ ib_get_client_data(device, &uverbs_client);
+ struct ib_uverbs_file *ufile;
+
+ mutex_lock(&uverbs_dev->lists_mutex);
+ list_for_each_entry(ufile, &uverbs_dev->uverbs_file_list, list) {
+ if (ufile->ucontext)
+ uverbs_user_mmap_disassociate(ufile);
+ }
+ mutex_unlock(&uverbs_dev->lists_mutex);
+}
+EXPORT_SYMBOL(rdma_user_mmap_disassociate);
+
+/*
* ib_uverbs_open() does not need the BKL:
*
* - the ib_uverbs_device structures are properly reference counted and
@@ -932,9 +936,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
int srcu_key;
dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
- if (!atomic_inc_not_zero(&dev->refcount))
+ if (!refcount_inc_not_zero(&dev->refcount))
return -ENXIO;
+ get_device(&dev->dev);
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
mutex_lock(&dev->lists_mutex);
ib_dev = srcu_dereference(dev->ib_dev,
@@ -944,13 +949,18 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
goto err;
}
+ if (!rdma_dev_access_netns(ib_dev, current->nsproxy->net_ns)) {
+ ret = -EPERM;
+ goto err;
+ }
+
/* In case IB device supports disassociate ucontext, there is no hard
* dependency between uverbs device and its low level device.
*/
- module_dependent = !(ib_dev->disassociate_ucontext);
+ module_dependent = !(ib_dev->ops.disassociate_ucontext);
if (module_dependent) {
- if (!try_module_get(ib_dev->owner)) {
+ if (!try_module_get(ib_dev->ops.owner)) {
ret = -ENODEV;
goto err;
}
@@ -966,56 +976,50 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
}
file->device = dev;
- file->ucontext = NULL;
- file->async_file = NULL;
kref_init(&file->ref);
- mutex_init(&file->mutex);
- mutex_init(&file->cleanup_mutex);
+ mutex_init(&file->ucontext_lock);
+
+ spin_lock_init(&file->uobjects_lock);
+ INIT_LIST_HEAD(&file->uobjects);
+ init_rwsem(&file->hw_destroy_rwsem);
+ mutex_init(&file->umap_lock);
+ INIT_LIST_HEAD(&file->umaps);
+
+ mutex_init(&file->disassociation_lock);
filp->private_data = file;
- kobject_get(&dev->kobj);
list_add_tail(&file->list, &dev->uverbs_file_list);
mutex_unlock(&dev->lists_mutex);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
- return nonseekable_open(inode, filp);
+ setup_ufile_idr_uobject(file);
+
+ return stream_open(inode, filp);
err_module:
- module_put(ib_dev->owner);
+ module_put(ib_dev->ops.owner);
err:
mutex_unlock(&dev->lists_mutex);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
- if (atomic_dec_and_test(&dev->refcount))
+ if (refcount_dec_and_test(&dev->refcount))
ib_uverbs_comp_dev(dev);
+ put_device(&dev->dev);
return ret;
}
static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_file *file = filp->private_data;
- struct ib_uverbs_device *dev = file->device;
- mutex_lock(&file->cleanup_mutex);
- if (file->ucontext) {
- ib_uverbs_cleanup_ucontext(file, file->ucontext);
- file->ucontext = NULL;
- }
- mutex_unlock(&file->cleanup_mutex);
+ uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
mutex_lock(&file->device->lists_mutex);
- if (!file->is_closed) {
- list_del(&file->list);
- file->is_closed = 1;
- }
+ list_del_init(&file->list);
mutex_unlock(&file->device->lists_mutex);
- if (file->async_file)
- kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
-
kref_put(&file->ref, ib_uverbs_release_file);
- kobject_put(&dev->kobj);
return 0;
}
@@ -1025,7 +1029,8 @@ static const struct file_operations uverbs_fops = {
.write = ib_uverbs_write,
.open = ib_uverbs_open,
.release = ib_uverbs_close,
- .llseek = no_llseek,
+ .unlocked_ioctl = ib_uverbs_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
static const struct file_operations uverbs_mmap_fops = {
@@ -1034,247 +1039,216 @@ static const struct file_operations uverbs_mmap_fops = {
.mmap = ib_uverbs_mmap,
.open = ib_uverbs_open,
.release = ib_uverbs_close,
- .llseek = no_llseek,
+ .unlocked_ioctl = ib_uverbs_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
+static int ib_uverbs_get_nl_info(struct ib_device *ibdev, void *client_data,
+ struct ib_client_nl_info *res)
+{
+ struct ib_uverbs_device *uverbs_dev = client_data;
+ int ret;
+
+ if (res->port != -1)
+ return -EINVAL;
+
+ res->abi = ibdev->ops.uverbs_abi_ver;
+ res->cdev = &uverbs_dev->dev;
+
+ /*
+ * To support DRIVER_ID binding in userspace some of the driver need
+ * upgrading to expose their PCI dependent revision information
+ * through get_context instead of relying on modalias matching. When
+ * the drivers are fixed they can drop this flag.
+ */
+ if (!ibdev->ops.uverbs_no_driver_id_binding) {
+ ret = nla_put_u32(res->nl_msg, RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID,
+ ibdev->ops.driver_id);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
static struct ib_client uverbs_client = {
.name = "uverbs",
+ .no_kverbs_req = true,
.add = ib_uverbs_add_one,
- .remove = ib_uverbs_remove_one
+ .remove = ib_uverbs_remove_one,
+ .get_nl_info = ib_uverbs_get_nl_info,
};
+MODULE_ALIAS_RDMA_CLIENT("uverbs");
-static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
+static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
char *buf)
{
+ struct ib_uverbs_device *dev =
+ container_of(device, struct ib_uverbs_device, dev);
int ret = -ENODEV;
int srcu_key;
- struct ib_uverbs_device *dev = dev_get_drvdata(device);
struct ib_device *ib_dev;
- if (!dev)
- return -ENODEV;
-
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
- ret = sprintf(buf, "%s\n", ib_dev->name);
+ ret = sysfs_emit(buf, "%s\n", dev_name(&ib_dev->dev));
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return ret;
}
-static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+static DEVICE_ATTR_RO(ibdev);
-static ssize_t show_dev_abi_version(struct device *device,
- struct device_attribute *attr, char *buf)
+static ssize_t abi_version_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
- struct ib_uverbs_device *dev = dev_get_drvdata(device);
+ struct ib_uverbs_device *dev =
+ container_of(device, struct ib_uverbs_device, dev);
int ret = -ENODEV;
int srcu_key;
struct ib_device *ib_dev;
- if (!dev)
- return -ENODEV;
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev)
- ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver);
+ ret = sysfs_emit(buf, "%u\n", ib_dev->ops.uverbs_abi_ver);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return ret;
}
-static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
+static DEVICE_ATTR_RO(abi_version);
+
+static struct attribute *ib_dev_attrs[] = {
+ &dev_attr_abi_version.attr,
+ &dev_attr_ibdev.attr,
+ NULL,
+};
+
+static const struct attribute_group dev_attr_group = {
+ .attrs = ib_dev_attrs,
+};
static CLASS_ATTR_STRING(abi_version, S_IRUGO,
__stringify(IB_USER_VERBS_ABI_VERSION));
-static dev_t overflow_maj;
-static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
-
-/*
- * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
- * requesting a new major number and doubling the number of max devices we
- * support. It's stupid, but simple.
- */
-static int find_overflow_devnum(void)
+static int ib_uverbs_create_uapi(struct ib_device *device,
+ struct ib_uverbs_device *uverbs_dev)
{
- int ret;
-
- if (!overflow_maj) {
- ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
- "infiniband_verbs");
- if (ret) {
- pr_err("user_verbs: couldn't register dynamic device number\n");
- return ret;
- }
- }
+ struct uverbs_api *uapi;
- ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
- if (ret >= IB_UVERBS_MAX_DEVICES)
- return -1;
+ uapi = uverbs_alloc_api(device);
+ if (IS_ERR(uapi))
+ return PTR_ERR(uapi);
- return ret;
+ uverbs_dev->uapi = uapi;
+ return 0;
}
-static void ib_uverbs_add_one(struct ib_device *device)
+static int ib_uverbs_add_one(struct ib_device *device)
{
int devnum;
dev_t base;
struct ib_uverbs_device *uverbs_dev;
int ret;
- if (!device->alloc_ucontext)
- return;
+ if (!device->ops.alloc_ucontext ||
+ device->type == RDMA_DEVICE_TYPE_SMI)
+ return -EOPNOTSUPP;
- uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
+ uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
if (!uverbs_dev)
- return;
+ return -ENOMEM;
ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
if (ret) {
kfree(uverbs_dev);
- return;
+ return -ENOMEM;
}
- atomic_set(&uverbs_dev->refcount, 1);
+ device_initialize(&uverbs_dev->dev);
+ uverbs_dev->dev.class = &uverbs_class;
+ uverbs_dev->dev.parent = device->dev.parent;
+ uverbs_dev->dev.release = ib_uverbs_release_dev;
+ uverbs_dev->groups[0] = &dev_attr_group;
+ uverbs_dev->dev.groups = uverbs_dev->groups;
+ refcount_set(&uverbs_dev->refcount, 1);
init_completion(&uverbs_dev->comp);
uverbs_dev->xrcd_tree = RB_ROOT;
mutex_init(&uverbs_dev->xrcd_tree_mutex);
- kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
mutex_init(&uverbs_dev->lists_mutex);
INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
- INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
-
- spin_lock(&map_lock);
- devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
- if (devnum >= IB_UVERBS_MAX_DEVICES) {
- spin_unlock(&map_lock);
- devnum = find_overflow_devnum();
- if (devnum < 0)
- goto err;
-
- spin_lock(&map_lock);
- uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
- base = devnum + overflow_maj;
- set_bit(devnum, overflow_map);
- } else {
- uverbs_dev->devnum = devnum;
- base = devnum + IB_UVERBS_BASE_DEV;
- set_bit(devnum, dev_map);
- }
- spin_unlock(&map_lock);
-
rcu_assign_pointer(uverbs_dev->ib_dev, device);
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
- cdev_init(&uverbs_dev->cdev, NULL);
- uverbs_dev->cdev.owner = THIS_MODULE;
- uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
- uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj;
- kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
- if (cdev_add(&uverbs_dev->cdev, base, 1))
- goto err_cdev;
-
- uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
- uverbs_dev->cdev.dev, uverbs_dev,
- "uverbs%d", uverbs_dev->devnum);
- if (IS_ERR(uverbs_dev->dev))
- goto err_cdev;
-
- if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
- goto err_class;
- if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
- goto err_class;
+ devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
+ GFP_KERNEL);
+ if (devnum < 0) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ uverbs_dev->devnum = devnum;
+ if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
+ base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
+ else
+ base = IB_UVERBS_BASE_DEV + devnum;
- ib_set_client_data(device, &uverbs_client, uverbs_dev);
+ ret = ib_uverbs_create_uapi(device, uverbs_dev);
+ if (ret)
+ goto err_uapi;
- return;
+ uverbs_dev->dev.devt = base;
+ dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum);
-err_class:
- device_destroy(uverbs_class, uverbs_dev->cdev.dev);
+ cdev_init(&uverbs_dev->cdev,
+ device->ops.mmap ? &uverbs_mmap_fops : &uverbs_fops);
+ uverbs_dev->cdev.owner = THIS_MODULE;
-err_cdev:
- cdev_del(&uverbs_dev->cdev);
- if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
- clear_bit(devnum, dev_map);
- else
- clear_bit(devnum, overflow_map);
+ ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev);
+ if (ret)
+ goto err_uapi;
+ ib_set_client_data(device, &uverbs_client, uverbs_dev);
+ return 0;
+
+err_uapi:
+ ida_free(&uverbs_ida, devnum);
err:
- if (atomic_dec_and_test(&uverbs_dev->refcount))
+ if (refcount_dec_and_test(&uverbs_dev->refcount))
ib_uverbs_comp_dev(uverbs_dev);
wait_for_completion(&uverbs_dev->comp);
- kobject_put(&uverbs_dev->kobj);
- return;
+ put_device(&uverbs_dev->dev);
+ return ret;
}
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_device *ib_dev)
{
struct ib_uverbs_file *file;
- struct ib_uverbs_event_file *event_file;
- struct ib_event event;
/* Pending running commands to terminate */
- synchronize_srcu(&uverbs_dev->disassociate_srcu);
- event.event = IB_EVENT_DEVICE_FATAL;
- event.element.port_num = 0;
- event.device = ib_dev;
+ uverbs_disassociate_api_pre(uverbs_dev);
mutex_lock(&uverbs_dev->lists_mutex);
while (!list_empty(&uverbs_dev->uverbs_file_list)) {
- struct ib_ucontext *ucontext;
file = list_first_entry(&uverbs_dev->uverbs_file_list,
struct ib_uverbs_file, list);
- file->is_closed = 1;
- list_del(&file->list);
+ list_del_init(&file->list);
kref_get(&file->ref);
- mutex_unlock(&uverbs_dev->lists_mutex);
-
- ib_uverbs_event_handler(&file->event_handler, &event);
- mutex_lock(&file->cleanup_mutex);
- ucontext = file->ucontext;
- file->ucontext = NULL;
- mutex_unlock(&file->cleanup_mutex);
-
- /* At this point ib_uverbs_close cannot be running
- * ib_uverbs_cleanup_ucontext
+ /* We must release the mutex before going ahead and calling
+ * uverbs_cleanup_ufile, as it might end up indirectly calling
+ * uverbs_close, for example due to freeing the resources (e.g
+ * mmput).
*/
- if (ucontext) {
- /* We must release the mutex before going ahead and
- * calling disassociate_ucontext. disassociate_ucontext
- * might end up indirectly calling uverbs_close,
- * for example due to freeing the resources
- * (e.g mmput).
- */
- ib_dev->disassociate_ucontext(ucontext);
- ib_uverbs_cleanup_ucontext(file, ucontext);
- }
+ mutex_unlock(&uverbs_dev->lists_mutex);
- mutex_lock(&uverbs_dev->lists_mutex);
+ uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
kref_put(&file->ref, ib_uverbs_release_file);
- }
-
- while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
- event_file = list_first_entry(&uverbs_dev->
- uverbs_events_file_list,
- struct ib_uverbs_event_file,
- list);
- spin_lock_irq(&event_file->lock);
- event_file->is_closed = 1;
- spin_unlock_irq(&event_file->lock);
-
- list_del(&event_file->list);
- if (event_file->is_async) {
- ib_unregister_event_handler(&event_file->uverbs_file->
- event_handler);
- event_file->uverbs_file->event_handler.device = NULL;
- }
- wake_up_interruptible(&event_file->poll_wait);
- kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
+ mutex_lock(&uverbs_dev->lists_mutex);
}
mutex_unlock(&uverbs_dev->lists_mutex);
+
+ uverbs_disassociate_api(uverbs_dev->uapi);
}
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
@@ -1282,19 +1256,10 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
struct ib_uverbs_device *uverbs_dev = client_data;
int wait_clients = 1;
- if (!uverbs_dev)
- return;
-
- dev_set_drvdata(uverbs_dev->dev, NULL);
- device_destroy(uverbs_class, uverbs_dev->cdev.dev);
- cdev_del(&uverbs_dev->cdev);
+ cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
+ ida_free(&uverbs_ida, uverbs_dev->devnum);
- if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
- clear_bit(uverbs_dev->devnum, dev_map);
- else
- clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
-
- if (device->disassociate_ucontext) {
+ if (device->ops.disassociate_ucontext) {
/* We disassociate HW resources and immediately return.
* Userspace will see a EIO errno for all future access.
* Upon returning, ib_device may be freed internally and is not
@@ -1306,46 +1271,45 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
* cdev was deleted, however active clients can still issue
* commands and close their open files.
*/
- rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
ib_uverbs_free_hw_resources(uverbs_dev, device);
wait_clients = 0;
}
- if (atomic_dec_and_test(&uverbs_dev->refcount))
+ if (refcount_dec_and_test(&uverbs_dev->refcount))
ib_uverbs_comp_dev(uverbs_dev);
if (wait_clients)
wait_for_completion(&uverbs_dev->comp);
- kobject_put(&uverbs_dev->kobj);
-}
-static char *uverbs_devnode(struct device *dev, umode_t *mode)
-{
- if (mode)
- *mode = 0666;
- return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+ put_device(&uverbs_dev->dev);
}
static int __init ib_uverbs_init(void)
{
int ret;
- ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
+ ret = register_chrdev_region(IB_UVERBS_BASE_DEV,
+ IB_UVERBS_NUM_FIXED_MINOR,
"infiniband_verbs");
if (ret) {
pr_err("user_verbs: couldn't register device number\n");
goto out;
}
- uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
- if (IS_ERR(uverbs_class)) {
- ret = PTR_ERR(uverbs_class);
+ ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0,
+ IB_UVERBS_NUM_DYNAMIC_MINOR,
+ "infiniband_verbs");
+ if (ret) {
+ pr_err("couldn't register dynamic device number\n");
+ goto out_alloc;
+ }
+
+ ret = class_register(&uverbs_class);
+ if (ret) {
pr_err("user_verbs: couldn't create class infiniband_verbs\n");
goto out_chrdev;
}
- uverbs_class->devnode = uverbs_devnode;
-
- ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
+ ret = class_create_file(&uverbs_class, &class_attr_abi_version.attr);
if (ret) {
pr_err("user_verbs: couldn't create abi_version attribute\n");
goto out_class;
@@ -1360,10 +1324,15 @@ static int __init ib_uverbs_init(void)
return 0;
out_class:
- class_destroy(uverbs_class);
+ class_unregister(&uverbs_class);
out_chrdev:
- unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+ unregister_chrdev_region(dynamic_uverbs_dev,
+ IB_UVERBS_NUM_DYNAMIC_MINOR);
+
+out_alloc:
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV,
+ IB_UVERBS_NUM_FIXED_MINOR);
out:
return ret;
@@ -1372,17 +1341,13 @@ out:
static void __exit ib_uverbs_cleanup(void)
{
ib_unregister_client(&uverbs_client);
- class_destroy(uverbs_class);
- unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
- if (overflow_maj)
- unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
- idr_destroy(&ib_uverbs_pd_idr);
- idr_destroy(&ib_uverbs_mr_idr);
- idr_destroy(&ib_uverbs_mw_idr);
- idr_destroy(&ib_uverbs_ah_idr);
- idr_destroy(&ib_uverbs_cq_idr);
- idr_destroy(&ib_uverbs_qp_idr);
- idr_destroy(&ib_uverbs_srq_idr);
+ class_unregister(&uverbs_class);
+ unregister_chrdev_region(IB_UVERBS_BASE_DEV,
+ IB_UVERBS_NUM_FIXED_MINOR);
+ unregister_chrdev_region(dynamic_uverbs_dev,
+ IB_UVERBS_NUM_DYNAMIC_MINOR);
+ ib_cleanup_ucaps();
+ mmu_notifier_synchronize();
}
module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index af020f80d50f..e803f609ec87 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -33,26 +33,68 @@
#include <linux/export.h>
#include <rdma/ib_marshall.h>
-void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
- struct ib_ah_attr *src)
+#define OPA_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
+static int rdma_ah_conv_opa_to_ib(struct ib_device *dev,
+ struct rdma_ah_attr *ib,
+ struct rdma_ah_attr *opa)
{
- memcpy(dst->grh.dgid, src->grh.dgid.raw, sizeof src->grh.dgid);
- dst->grh.flow_label = src->grh.flow_label;
- dst->grh.sgid_index = src->grh.sgid_index;
- dst->grh.hop_limit = src->grh.hop_limit;
- dst->grh.traffic_class = src->grh.traffic_class;
- memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));
- dst->dlid = src->dlid;
- dst->sl = src->sl;
- dst->src_path_bits = src->src_path_bits;
- dst->static_rate = src->static_rate;
- dst->is_global = src->ah_flags & IB_AH_GRH ? 1 : 0;
- dst->port_num = src->port_num;
+ struct ib_port_attr port_attr;
+ int ret = 0;
+
+ /* Do structure copy and the over-write fields */
+ *ib = *opa;
+
+ ib->type = RDMA_AH_ATTR_TYPE_IB;
+ rdma_ah_set_grh(ib, NULL, 0, 0, 1, 0);
+
+ if (ib_query_port(dev, opa->port_num, &port_attr)) {
+ /* Set to default subnet to indicate error */
+ rdma_ah_set_subnet_prefix(ib, OPA_DEFAULT_GID_PREFIX);
+ ret = -EINVAL;
+ } else {
+ rdma_ah_set_subnet_prefix(ib,
+ cpu_to_be64(port_attr.subnet_prefix));
+ }
+ rdma_ah_set_interface_id(ib, OPA_MAKE_ID(rdma_ah_get_dlid(opa)));
+ return ret;
+}
+
+void ib_copy_ah_attr_to_user(struct ib_device *device,
+ struct ib_uverbs_ah_attr *dst,
+ struct rdma_ah_attr *ah_attr)
+{
+ struct rdma_ah_attr *src = ah_attr;
+ struct rdma_ah_attr conv_ah;
+
+ memset(&dst->grh, 0, sizeof(dst->grh));
+
+ if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) &&
+ (rdma_ah_get_dlid(ah_attr) > be16_to_cpu(IB_LID_PERMISSIVE)) &&
+ (!rdma_ah_conv_opa_to_ib(device, &conv_ah, ah_attr)))
+ src = &conv_ah;
+
+ dst->dlid = rdma_ah_get_dlid(src);
+ dst->sl = rdma_ah_get_sl(src);
+ dst->src_path_bits = rdma_ah_get_path_bits(src);
+ dst->static_rate = rdma_ah_get_static_rate(src);
+ dst->is_global = rdma_ah_get_ah_flags(src) &
+ IB_AH_GRH ? 1 : 0;
+ if (dst->is_global) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(src);
+
+ memcpy(dst->grh.dgid, grh->dgid.raw, sizeof(grh->dgid));
+ dst->grh.flow_label = grh->flow_label;
+ dst->grh.sgid_index = grh->sgid_index;
+ dst->grh.hop_limit = grh->hop_limit;
+ dst->grh.traffic_class = grh->traffic_class;
+ }
+ dst->port_num = rdma_ah_get_port_num(src);
dst->reserved = 0;
}
EXPORT_SYMBOL(ib_copy_ah_attr_to_user);
-void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
+void ib_copy_qp_attr_to_user(struct ib_device *device,
+ struct ib_uverbs_qp_attr *dst,
struct ib_qp_attr *src)
{
dst->qp_state = src->qp_state;
@@ -71,8 +113,8 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
dst->max_recv_sge = src->cap.max_recv_sge;
dst->max_inline_data = src->cap.max_inline_data;
- ib_copy_ah_attr_to_user(&dst->ah_attr, &src->ah_attr);
- ib_copy_ah_attr_to_user(&dst->alt_ah_attr, &src->alt_ah_attr);
+ ib_copy_ah_attr_to_user(device, &dst->ah_attr, &src->ah_attr);
+ ib_copy_ah_attr_to_user(device, &dst->alt_ah_attr, &src->alt_ah_attr);
dst->pkey_index = src->pkey_index;
dst->alt_pkey_index = src->alt_pkey_index;
@@ -91,15 +133,15 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,
}
EXPORT_SYMBOL(ib_copy_qp_attr_to_user);
-void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
- struct ib_sa_path_rec *src)
+static void __ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+ struct sa_path_rec *src)
{
- memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid);
- memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid);
+ memcpy(dst->dgid, src->dgid.raw, sizeof(src->dgid));
+ memcpy(dst->sgid, src->sgid.raw, sizeof(src->sgid));
- dst->dlid = src->dlid;
- dst->slid = src->slid;
- dst->raw_traffic = src->raw_traffic;
+ dst->dlid = htons(ntohl(sa_path_get_dlid(src)));
+ dst->slid = htons(ntohl(sa_path_get_slid(src)));
+ dst->raw_traffic = sa_path_get_raw_traffic(src);
dst->flow_label = src->flow_label;
dst->hop_limit = src->hop_limit;
dst->traffic_class = src->traffic_class;
@@ -115,35 +157,17 @@ void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
}
-EXPORT_SYMBOL(ib_copy_path_rec_to_user);
-void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
- struct ib_user_path_rec *src)
+void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+ struct sa_path_rec *src)
{
- memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid);
- memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid);
+ struct sa_path_rec rec;
- dst->dlid = src->dlid;
- dst->slid = src->slid;
- dst->raw_traffic = src->raw_traffic;
- dst->flow_label = src->flow_label;
- dst->hop_limit = src->hop_limit;
- dst->traffic_class = src->traffic_class;
- dst->reversible = src->reversible;
- dst->numb_path = src->numb_path;
- dst->pkey = src->pkey;
- dst->sl = src->sl;
- dst->mtu_selector = src->mtu_selector;
- dst->mtu = src->mtu;
- dst->rate_selector = src->rate_selector;
- dst->rate = src->rate;
- dst->packet_life_time = src->packet_life_time;
- dst->preference = src->preference;
- dst->packet_life_time_selector = src->packet_life_time_selector;
-
- memset(dst->dmac, 0, sizeof(dst->dmac));
- dst->net = NULL;
- dst->ifindex = 0;
- dst->gid_type = IB_GID_TYPE_IB;
+ if (src->rec_type == SA_PATH_REC_TYPE_OPA) {
+ sa_convert_path_opa_to_ib(&rec, src);
+ __ib_copy_path_rec_to_user(dst, &rec);
+ return;
+ }
+ __ib_copy_path_rec_to_user(dst, src);
}
-EXPORT_SYMBOL(ib_copy_path_rec_from_user);
+EXPORT_SYMBOL(ib_copy_path_rec_to_user);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
new file mode 100644
index 000000000000..13776a66e2e4
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <linux/bug.h>
+#include <linux/file.h>
+#include <rdma/restrack.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_ah(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ return rdma_destroy_ah_user((struct ib_ah *)uobject->object,
+ RDMA_DESTROY_AH_SLEEPABLE,
+ &attrs->driver_udata);
+}
+
+static int uverbs_free_flow(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_flow *flow = (struct ib_flow *)uobject->object;
+ struct ib_uflow_object *uflow =
+ container_of(uobject, struct ib_uflow_object, uobject);
+ struct ib_qp *qp = flow->qp;
+ int ret;
+
+ ret = flow->device->ops.destroy_flow(flow);
+ if (!ret) {
+ if (qp)
+ atomic_dec(&qp->usecnt);
+ ib_uverbs_flow_resources_free(uflow->resources);
+ }
+
+ return ret;
+}
+
+static int uverbs_free_mw(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ return uverbs_dealloc_mw((struct ib_mw *)uobject->object);
+}
+
+static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object;
+ struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl;
+ u32 table_size = (1 << rwq_ind_tbl->log_ind_tbl_size);
+ int ret, i;
+
+ if (atomic_read(&rwq_ind_tbl->usecnt))
+ return -EBUSY;
+
+ ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < table_size; i++)
+ atomic_dec(&ind_tbl[i]->usecnt);
+
+ kfree(rwq_ind_tbl);
+ kfree(ind_tbl);
+ return 0;
+}
+
+static int uverbs_free_xrcd(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_xrcd *xrcd = uobject->object;
+ struct ib_uxrcd_object *uxrcd =
+ container_of(uobject, struct ib_uxrcd_object, uobject);
+ int ret;
+
+ if (atomic_read(&uxrcd->refcnt))
+ return -EBUSY;
+
+ mutex_lock(&attrs->ufile->device->xrcd_tree_mutex);
+ ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why, attrs);
+ mutex_unlock(&attrs->ufile->device->xrcd_tree_mutex);
+
+ return ret;
+}
+
+static int uverbs_free_pd(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_pd *pd = uobject->object;
+
+ if (atomic_read(&pd->usecnt))
+ return -EBUSY;
+
+ return ib_dealloc_pd_user(pd, &attrs->driver_udata);
+}
+
+void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue)
+{
+ struct ib_uverbs_event *entry, *tmp;
+
+ spin_lock_irq(&event_queue->lock);
+ /*
+ * The user must ensure that no new items are added to the event_list
+ * once is_closed is set.
+ */
+ event_queue->is_closed = 1;
+ spin_unlock_irq(&event_queue->lock);
+ wake_up_interruptible(&event_queue->poll_wait);
+ kill_fasync(&event_queue->async_queue, SIGIO, POLL_IN);
+
+ spin_lock_irq(&event_queue->lock);
+ list_for_each_entry_safe(entry, tmp, &event_queue->event_list, list) {
+ if (entry->counter)
+ list_del(&entry->obj_list);
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ spin_unlock_irq(&event_queue->lock);
+}
+
+static void
+uverbs_completion_event_file_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct ib_uverbs_completion_event_file *file =
+ container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj);
+
+ ib_uverbs_free_event_queue(&file->ev_queue);
+}
+
+int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs)
+{
+ return 0;
+}
+EXPORT_SYMBOL(uverbs_destroy_def_handler);
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_COMP_CHANNEL,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file),
+ uverbs_completion_event_file_destroy_uobj,
+ &uverbs_event_fops,
+ "[infinibandevent]",
+ O_RDONLY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_MW_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MW_HANDLE,
+ UVERBS_OBJECT_MW,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw),
+ &UVERBS_METHOD(UVERBS_METHOD_MW_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_AH_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_AH_HANDLE,
+ UVERBS_OBJECT_AH,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah),
+ &UVERBS_METHOD(UVERBS_METHOD_AH_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_FLOW_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_HANDLE,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_FLOW,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
+ uverbs_free_flow),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_RWQ_IND_TBL_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_RWQ_IND_TBL_HANDLE,
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl),
+ &UVERBS_METHOD(UVERBS_METHOD_RWQ_IND_TBL_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_XRCD_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_XRCD_HANDLE,
+ UVERBS_OBJECT_XRCD,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_XRCD,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object),
+ uverbs_free_xrcd),
+ &UVERBS_METHOD(UVERBS_METHOD_XRCD_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_PD_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd),
+ &UVERBS_METHOD(UVERBS_METHOD_PD_DESTROY));
+
+const struct uapi_definition uverbs_def_obj_intf[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_PD,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COMP_CHANNEL,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_AH,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MW,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_mw)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_FLOW,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_flow)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_rwq_ind_table)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_XRCD,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_async_fd.c b/drivers/infiniband/core/uverbs_std_types_async_fd.c
new file mode 100644
index 000000000000..cc24cfdf7aee
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_async_fd.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int UVERBS_HANDLER(UVERBS_METHOD_ASYNC_EVENT_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_METHOD_ASYNC_EVENT_ALLOC);
+
+ ib_uverbs_init_async_event_file(
+ container_of(uobj, struct ib_uverbs_async_event_file, uobj));
+ return 0;
+}
+
+static void uverbs_async_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct ib_uverbs_async_event_file *event_file =
+ container_of(uobj, struct ib_uverbs_async_event_file, uobj);
+
+ ib_unregister_event_handler(&event_file->event_handler);
+
+ if (why == RDMA_REMOVE_DRIVER_REMOVE)
+ ib_uverbs_async_handler(event_file, 0, IB_EVENT_DEVICE_FATAL,
+ NULL, NULL);
+}
+
+int uverbs_async_event_release(struct inode *inode, struct file *filp)
+{
+ struct ib_uverbs_async_event_file *event_file;
+ struct ib_uobject *uobj = filp->private_data;
+ int ret;
+
+ if (!uobj)
+ return uverbs_uobject_fd_release(inode, filp);
+
+ event_file =
+ container_of(uobj, struct ib_uverbs_async_event_file, uobj);
+
+ /*
+ * The async event FD has to deliver IB_EVENT_DEVICE_FATAL even after
+ * disassociation, so cleaning the event list must only happen after
+ * release. The user knows it has reached the end of the event stream
+ * when it sees IB_EVENT_DEVICE_FATAL.
+ */
+ uverbs_uobject_get(uobj);
+ ret = uverbs_uobject_fd_release(inode, filp);
+ ib_uverbs_free_event_queue(&event_file->ev_queue);
+ uverbs_uobject_put(uobj);
+ return ret;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_ASYNC_EVENT_ALLOC,
+ UVERBS_ATTR_FD(UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_async_event_file),
+ uverbs_async_event_destroy_uobj,
+ &uverbs_async_event_fops,
+ "[infinibandevent]",
+ O_RDONLY),
+ &UVERBS_METHOD(UVERBS_METHOD_ASYNC_EVENT_ALLOC));
+
+const struct uapi_definition uverbs_def_obj_async_fd[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_ASYNC_EVENT),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
new file mode 100644
index 000000000000..381aa5797641
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rdma_core.h"
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_counters(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_counters *counters = uobject->object;
+ int ret;
+
+ if (atomic_read(&counters->usecnt))
+ return -EBUSY;
+
+ ret = counters->device->ops.destroy_counters(counters);
+ if (ret)
+ return ret;
+ kfree(counters);
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
+ struct ib_device *ib_dev = attrs->context->device;
+ struct ib_counters *counters;
+ int ret;
+
+ /*
+ * This check should be removed once the infrastructure
+ * have the ability to remove methods from parse tree once
+ * such condition is met.
+ */
+ if (!ib_dev->ops.create_counters)
+ return -EOPNOTSUPP;
+
+ counters = rdma_zalloc_drv_obj(ib_dev, ib_counters);
+ if (!counters)
+ return -ENOMEM;
+
+ counters->device = ib_dev;
+ counters->uobject = uobj;
+ uobj->object = counters;
+ atomic_set(&counters->usecnt, 0);
+
+ ret = ib_dev->ops.create_counters(counters, attrs);
+ if (ret)
+ kfree(counters);
+
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_counters_read_attr read_attr = {};
+ const struct uverbs_attr *uattr;
+ struct ib_counters *counters =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE);
+ int ret;
+
+ if (!counters->device->ops.read_counters)
+ return -EOPNOTSUPP;
+
+ if (!atomic_read(&counters->usecnt))
+ return -EINVAL;
+
+ ret = uverbs_get_flags32(&read_attr.flags, attrs,
+ UVERBS_ATTR_READ_COUNTERS_FLAGS,
+ IB_UVERBS_READ_COUNTERS_PREFER_CACHED);
+ if (ret)
+ return ret;
+
+ uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF);
+ if (IS_ERR(uattr))
+ return PTR_ERR(uattr);
+ read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64);
+ read_attr.counters_buff = uverbs_zalloc(
+ attrs, array_size(read_attr.ncounters, sizeof(u64)));
+ if (IS_ERR(read_attr.counters_buff))
+ return PTR_ERR(read_attr.counters_buff);
+
+ ret = counters->device->ops.read_counters(counters, &read_attr, attrs);
+ if (ret)
+ return ret;
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF,
+ read_attr.counters_buff,
+ read_attr.ncounters * sizeof(u64));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COUNTERS_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_COUNTERS_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COUNTERS_READ,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
+ UVERBS_ATTR_MIN_SIZE(0),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
+ enum ib_uverbs_read_counters_flags));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters),
+ &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));
+
+const struct uapi_definition uverbs_def_obj_counters[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COUNTERS,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_counters)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
new file mode 100644
index 000000000000..fab5d914029d
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -0,0 +1,298 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+#include "restrack.h"
+
+static int uverbs_free_cq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_cq *cq = uobject->object;
+ struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
+ struct ib_ucq_object *ucq =
+ container_of(uobject, struct ib_ucq_object, uevent.uobject);
+ int ret;
+
+ ret = ib_destroy_cq_user(cq, &attrs->driver_udata);
+ if (ret)
+ return ret;
+
+ ib_uverbs_release_ucq(
+ ev_queue ? container_of(ev_queue,
+ struct ib_uverbs_completion_event_file,
+ ev_queue) :
+ NULL,
+ ucq);
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_ucq_object *obj = container_of(
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE),
+ typeof(*obj), uevent.uobject);
+ struct ib_uverbs_completion_event_file *ev_file = NULL;
+ struct ib_device *ib_dev = attrs->context->device;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct ib_cq_init_attr attr = {};
+ struct ib_uobject *ev_file_uobj;
+ struct ib_umem *umem = NULL;
+ u64 buffer_length;
+ u64 buffer_offset;
+ struct ib_cq *cq;
+ u64 user_handle;
+ u64 buffer_va;
+ int buffer_fd;
+ int ret;
+
+ if ((!ib_dev->ops.create_cq && !ib_dev->ops.create_cq_umem) || !ib_dev->ops.destroy_cq)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.comp_vector, attrs,
+ UVERBS_ATTR_CREATE_CQ_COMP_VECTOR);
+ if (!ret)
+ ret = uverbs_copy_from(&attr.cqe, attrs,
+ UVERBS_ATTR_CREATE_CQ_CQE);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_CQ_USER_HANDLE);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_flags32(&attr.flags, attrs,
+ UVERBS_ATTR_CREATE_CQ_FLAGS,
+ IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION |
+ IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN);
+ if (ret)
+ return ret;
+
+ ev_file_uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
+ if (!IS_ERR(ev_file_uobj)) {
+ ev_file = container_of(ev_file_uobj,
+ struct ib_uverbs_completion_event_file,
+ uobj);
+ uverbs_uobject_get(ev_file_uobj);
+ }
+
+ obj->uevent.event_file = ib_uverbs_get_async_event(
+ attrs, UVERBS_ATTR_CREATE_CQ_EVENT_FD);
+
+ if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) {
+ ret = -EINVAL;
+ goto err_event_file;
+ }
+
+ INIT_LIST_HEAD(&obj->comp_list);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_VA)) {
+
+ ret = uverbs_copy_from(&buffer_va, attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_VA);
+ if (ret)
+ goto err_event_file;
+
+ ret = uverbs_copy_from(&buffer_length, attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH);
+ if (ret)
+ goto err_event_file;
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_FD) ||
+ uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET) ||
+ !ib_dev->ops.create_cq_umem) {
+ ret = -EINVAL;
+ goto err_event_file;
+ }
+
+ umem = ib_umem_get(ib_dev, buffer_va, buffer_length, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem)) {
+ ret = PTR_ERR(umem);
+ goto err_event_file;
+ }
+ } else if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_FD)) {
+
+ ret = uverbs_get_raw_fd(&buffer_fd, attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_FD);
+ if (ret)
+ goto err_event_file;
+
+ ret = uverbs_copy_from(&buffer_offset, attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET);
+ if (ret)
+ goto err_event_file;
+
+ ret = uverbs_copy_from(&buffer_length, attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH);
+ if (ret)
+ goto err_event_file;
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_VA) ||
+ !ib_dev->ops.create_cq_umem) {
+ ret = -EINVAL;
+ goto err_event_file;
+ }
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(ib_dev, buffer_offset, buffer_length,
+ buffer_fd, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem_dmabuf)) {
+ ret = PTR_ERR(umem_dmabuf);
+ goto err_event_file;
+ }
+ umem = &umem_dmabuf->umem;
+ } else if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET) ||
+ uverbs_attr_is_valid(attrs, UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH) ||
+ !ib_dev->ops.create_cq) {
+ ret = -EINVAL;
+ goto err_event_file;
+ }
+
+ cq = rdma_zalloc_drv_obj(ib_dev, ib_cq);
+ if (!cq) {
+ ret = -ENOMEM;
+ ib_umem_release(umem);
+ goto err_event_file;
+ }
+
+ cq->device = ib_dev;
+ cq->uobject = obj;
+ cq->comp_handler = ib_uverbs_comp_handler;
+ cq->event_handler = ib_uverbs_cq_event_handler;
+ cq->cq_context = ev_file ? &ev_file->ev_queue : NULL;
+ atomic_set(&cq->usecnt, 0);
+
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, NULL);
+
+ ret = umem ? ib_dev->ops.create_cq_umem(cq, &attr, umem, attrs) :
+ ib_dev->ops.create_cq(cq, &attr, attrs);
+ if (ret)
+ goto err_free;
+
+ obj->uevent.uobject.object = cq;
+ obj->uevent.uobject.user_handle = user_handle;
+ rdma_restrack_add(&cq->res);
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
+ sizeof(cq->cqe));
+ return ret;
+
+err_free:
+ ib_umem_release(umem);
+ rdma_restrack_put(&cq->res);
+ kfree(cq);
+err_event_file:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
+ if (ev_file)
+ uverbs_uobject_put(ev_file_uobj);
+ return ret;
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_CQ_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
+ UVERBS_OBJECT_COMP_CHANNEL,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_CQ_FLAGS,
+ enum ib_uverbs_ex_create_cq_flags),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_BUFFER_VA,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_BUFFER_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_RAW_FD(UVERBS_ATTR_CREATE_CQ_BUFFER_FD,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_BUFFER_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_UHW());
+
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
+ struct ib_ucq_object *obj =
+ container_of(uobj, struct ib_ucq_object, uevent.uobject);
+ struct ib_uverbs_destroy_cq_resp resp = {
+ .comp_events_reported = obj->comp_events_reported,
+ .async_events_reported = obj->uevent.events_reported
+ };
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp,
+ sizeof(resp));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_CQ_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_CQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq),
+ &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
+);
+
+const struct uapi_definition uverbs_def_obj_cq[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_CQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_cq)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
new file mode 100644
index 000000000000..c0fd283d9d6c
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -0,0 +1,508 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <linux/overflow.h>
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/opa_addr.h>
+#include <rdma/ib_cache.h>
+
+/*
+ * This ioctl method allows calling any defined write or write_ex
+ * handler. This essentially replaces the hdr/ex_hdr system with the ioctl
+ * marshalling, and brings the non-ex path into the same marshalling as the ex
+ * path.
+ */
+static int UVERBS_HANDLER(UVERBS_METHOD_INVOKE_WRITE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct uverbs_api *uapi = attrs->ufile->device->uapi;
+ const struct uverbs_api_write_method *method_elm;
+ u32 cmd;
+ int rc;
+
+ rc = uverbs_get_const(&cmd, attrs, UVERBS_ATTR_WRITE_CMD);
+ if (rc)
+ return rc;
+
+ method_elm = uapi_get_method(uapi, cmd);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+
+ uverbs_fill_udata(attrs, &attrs->ucore, UVERBS_ATTR_CORE_IN,
+ UVERBS_ATTR_CORE_OUT);
+
+ if (attrs->ucore.inlen < method_elm->req_size ||
+ attrs->ucore.outlen < method_elm->resp_size)
+ return -ENOSPC;
+
+ attrs->uobject = NULL;
+ rc = method_elm->handler(attrs);
+ if (attrs->uobject)
+ uverbs_finalize_object(attrs->uobject, UVERBS_ACCESS_NEW, true,
+ !rc, attrs);
+ return rc;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_INVOKE_WRITE,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_WRITE_CMD,
+ enum ib_uverbs_write_cmds,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CORE_IN,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CORE_OUT,
+ UVERBS_ATTR_MIN_SIZE(0),
+ UA_OPTIONAL),
+ UVERBS_ATTR_UHW());
+
+static uint32_t *
+gather_objects_handle(struct ib_uverbs_file *ufile,
+ const struct uverbs_api_object *uapi_object,
+ struct uverbs_attr_bundle *attrs,
+ ssize_t out_len,
+ u64 *total)
+{
+ u64 max_count = out_len / sizeof(u32);
+ struct ib_uobject *obj;
+ u64 count = 0;
+ u32 *handles;
+
+ /* Allocated memory that cannot page out where we gather
+ * all object ids under a spin_lock.
+ */
+ handles = uverbs_zalloc(attrs, out_len);
+ if (IS_ERR(handles))
+ return handles;
+
+ spin_lock_irq(&ufile->uobjects_lock);
+ list_for_each_entry(obj, &ufile->uobjects, list) {
+ u32 obj_id = obj->id;
+
+ if (obj->uapi_object != uapi_object)
+ continue;
+
+ if (count >= max_count)
+ break;
+
+ handles[count] = obj_id;
+ count++;
+ }
+ spin_unlock_irq(&ufile->uobjects_lock);
+
+ *total = count;
+ return handles;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_INFO_HANDLES)(
+ struct uverbs_attr_bundle *attrs)
+{
+ const struct uverbs_api_object *uapi_object;
+ ssize_t out_len;
+ u64 total = 0;
+ u16 object_id;
+ u32 *handles;
+ int ret;
+
+ out_len = uverbs_attr_get_len(attrs, UVERBS_ATTR_INFO_HANDLES_LIST);
+ if (out_len <= 0 || (out_len % sizeof(u32) != 0))
+ return -EINVAL;
+
+ ret = uverbs_get_const(&object_id, attrs, UVERBS_ATTR_INFO_OBJECT_ID);
+ if (ret)
+ return ret;
+
+ uapi_object = uapi_get_object(attrs->ufile->device->uapi, object_id);
+ if (IS_ERR(uapi_object))
+ return PTR_ERR(uapi_object);
+
+ handles = gather_objects_handle(attrs->ufile, uapi_object, attrs,
+ out_len, &total);
+ if (IS_ERR(handles))
+ return PTR_ERR(handles);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_INFO_HANDLES_LIST, handles,
+ sizeof(u32) * total);
+ if (ret)
+ goto err;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_INFO_TOTAL_HANDLES, &total,
+ sizeof(total));
+err:
+ return ret;
+}
+
+void copy_port_attr_to_resp(struct ib_port_attr *attr,
+ struct ib_uverbs_query_port_resp *resp,
+ struct ib_device *ib_dev, u8 port_num)
+{
+ resp->state = attr->state;
+ resp->max_mtu = attr->max_mtu;
+ resp->active_mtu = attr->active_mtu;
+ resp->gid_tbl_len = attr->gid_tbl_len;
+ resp->port_cap_flags = make_port_cap_flags(attr);
+ resp->max_msg_sz = attr->max_msg_sz;
+ resp->bad_pkey_cntr = attr->bad_pkey_cntr;
+ resp->qkey_viol_cntr = attr->qkey_viol_cntr;
+ resp->pkey_tbl_len = attr->pkey_tbl_len;
+
+ if (rdma_is_grh_required(ib_dev, port_num))
+ resp->flags |= IB_UVERBS_QPF_GRH_REQUIRED;
+
+ if (rdma_cap_opa_ah(ib_dev, port_num)) {
+ resp->lid = OPA_TO_IB_UCAST_LID(attr->lid);
+ resp->sm_lid = OPA_TO_IB_UCAST_LID(attr->sm_lid);
+ } else {
+ resp->lid = ib_lid_cpu16(attr->lid);
+ resp->sm_lid = ib_lid_cpu16(attr->sm_lid);
+ }
+
+ resp->lmc = attr->lmc;
+ resp->max_vl_num = attr->max_vl_num;
+ resp->sm_sl = attr->sm_sl;
+ resp->subnet_timeout = attr->subnet_timeout;
+ resp->init_type_reply = attr->init_type_reply;
+ resp->active_width = attr->active_width;
+ /* This ABI needs to be extended to provide any speed more than IB_SPEED_NDR */
+ resp->active_speed = min_t(u16, attr->active_speed, IB_SPEED_NDR);
+ resp->phys_state = attr->phys_state;
+ resp->link_layer = rdma_port_get_link_layer(ib_dev, port_num);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_device *ib_dev;
+ struct ib_port_attr attr = {};
+ struct ib_uverbs_query_port_resp_ex resp = {};
+ struct ib_ucontext *ucontext;
+ int ret;
+ u8 port_num;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */
+ if (!ib_dev->ops.query_port)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_get_const(&port_num, attrs,
+ UVERBS_ATTR_QUERY_PORT_PORT_NUM);
+ if (ret)
+ return ret;
+
+ ret = ib_query_port(ib_dev, port_num, &attr);
+ if (ret)
+ return ret;
+
+ copy_port_attr_to_resp(&attr, &resp.legacy_resp, ib_dev, port_num);
+ resp.port_cap_flags2 = attr.port_cap_flags2;
+ resp.active_speed_ex = attr.active_speed;
+
+ return uverbs_copy_to_struct_or_zero(attrs, UVERBS_ATTR_QUERY_PORT_RESP,
+ &resp, sizeof(resp));
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_GET_CONTEXT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ u32 num_comp = attrs->ufile->device->num_comp_vectors;
+ u64 core_support = IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS;
+ int ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
+ &num_comp, sizeof(num_comp));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
+ &core_support, sizeof(core_support));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ ret = ib_alloc_ucontext(attrs);
+ if (ret)
+ return ret;
+ ret = ib_init_ucontext(attrs);
+ if (ret) {
+ kfree(attrs->context);
+ attrs->context = NULL;
+ return ret;
+ }
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_CONTEXT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ u64 core_support = IB_UVERBS_CORE_SUPPORT_OPTIONAL_MR_ACCESS;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
+ u32 num_comp;
+ int ret;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ if (!ib_dev->ops.query_ucontext)
+ return -EOPNOTSUPP;
+
+ num_comp = attrs->ufile->device->num_comp_vectors;
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS,
+ &num_comp, sizeof(num_comp));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT,
+ &core_support, sizeof(core_support));
+ if (IS_UVERBS_COPY_ERR(ret))
+ return ret;
+
+ return ucontext->device->ops.query_ucontext(ucontext, attrs);
+}
+
+static int copy_gid_entries_to_user(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_gid_entry *entries,
+ size_t num_entries, size_t user_entry_size)
+{
+ const struct uverbs_attr *attr;
+ void __user *user_entries;
+ size_t copy_len;
+ int ret;
+ int i;
+
+ if (user_entry_size == sizeof(*entries)) {
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ entries, sizeof(*entries) * num_entries);
+ return ret;
+ }
+
+ copy_len = min_t(size_t, user_entry_size, sizeof(*entries));
+ attr = uverbs_attr_get(attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ user_entries = u64_to_user_ptr(attr->ptr_attr.data);
+ for (i = 0; i < num_entries; i++) {
+ if (copy_to_user(user_entries, entries, copy_len))
+ return -EFAULT;
+
+ if (user_entry_size > sizeof(*entries)) {
+ if (clear_user(user_entries + sizeof(*entries),
+ user_entry_size - sizeof(*entries)))
+ return -EFAULT;
+ }
+
+ entries++;
+ user_entries += user_entry_size;
+ }
+
+ return uverbs_output_written(attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_gid_entry *entries;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
+ size_t user_entry_size;
+ ssize_t num_entries;
+ int max_entries;
+ u32 flags;
+ int ret;
+
+ ret = uverbs_get_flags32(&flags, attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, 0);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&user_entry_size, attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE);
+ if (ret)
+ return ret;
+
+ if (!user_entry_size)
+ return -EINVAL;
+
+ max_entries = uverbs_attr_ptr_get_array_size(
+ attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ user_entry_size);
+ if (max_entries <= 0)
+ return max_entries ?: -EINVAL;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ entries = uverbs_kcalloc(attrs, max_entries, sizeof(*entries));
+ if (IS_ERR(entries))
+ return PTR_ERR(entries);
+
+ num_entries = rdma_query_gid_table(ib_dev, entries, max_entries);
+ if (num_entries < 0)
+ return -EINVAL;
+
+ ret = copy_gid_entries_to_user(attrs, entries, num_entries,
+ user_entry_size);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+ &num_entries, sizeof(num_entries));
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_ENTRY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uverbs_gid_entry entry = {};
+ const struct ib_gid_attr *gid_attr;
+ struct ib_ucontext *ucontext;
+ struct ib_device *ib_dev;
+ struct net_device *ndev;
+ u32 gid_index;
+ u32 port_num;
+ u32 flags;
+ int ret;
+
+ ret = uverbs_get_flags32(&flags, attrs,
+ UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, 0);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&port_num, attrs,
+ UVERBS_ATTR_QUERY_GID_ENTRY_PORT);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&gid_index, attrs,
+ UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX);
+ if (ret)
+ return ret;
+
+ ucontext = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ucontext))
+ return PTR_ERR(ucontext);
+ ib_dev = ucontext->device;
+
+ if (!rdma_is_port_valid(ib_dev, port_num))
+ return -EINVAL;
+
+ gid_attr = rdma_get_gid_attr(ib_dev, port_num, gid_index);
+ if (IS_ERR(gid_attr))
+ return PTR_ERR(gid_attr);
+
+ memcpy(&entry.gid, &gid_attr->gid, sizeof(gid_attr->gid));
+ entry.gid_index = gid_attr->index;
+ entry.port_num = gid_attr->port_num;
+ entry.gid_type = gid_attr->gid_type;
+
+ rcu_read_lock();
+ ndev = rdma_read_gid_attr_ndev_rcu(gid_attr);
+ if (IS_ERR(ndev)) {
+ if (PTR_ERR(ndev) != -ENODEV) {
+ ret = PTR_ERR(ndev);
+ rcu_read_unlock();
+ goto out;
+ }
+ } else {
+ entry.netdev_ifindex = ndev->ifindex;
+ }
+ rcu_read_unlock();
+
+ ret = uverbs_copy_to_struct_or_zero(
+ attrs, UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY, &entry,
+ sizeof(entry));
+out:
+ rdma_put_gid_attr(gid_attr);
+ return ret;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_GET_CONTEXT,
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS,
+ UVERBS_ATTR_TYPE(u32), UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
+ UVERBS_ATTR_TYPE(u64), UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_GET_CONTEXT_FD_ARR,
+ UVERBS_ATTR_MIN_SIZE(sizeof(int)),
+ UA_OPTIONAL,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_UHW());
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_CONTEXT,
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_CONTEXT_NUM_COMP_VECTORS,
+ UVERBS_ATTR_TYPE(u32), UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_CONTEXT_CORE_SUPPORT,
+ UVERBS_ATTR_TYPE(u64), UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_INFO_HANDLES,
+ /* Also includes any device specific object ids */
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_INFO_OBJECT_ID,
+ enum uverbs_default_objects, UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_INFO_TOTAL_HANDLES,
+ UVERBS_ATTR_TYPE(u32), UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_INFO_HANDLES_LIST,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u32)), UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_PORT,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_PORT_PORT_NUM, u8, UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(
+ UVERBS_ATTR_QUERY_PORT_RESP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_query_port_resp_ex,
+ active_speed_ex),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_GID_TABLE,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE, u64,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, u32,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
+ UVERBS_ATTR_MIN_SIZE(0), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES,
+ UVERBS_ATTR_TYPE(u64), UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_GID_ENTRY,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_PORT, u32,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX, u32,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, u32,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_gid_entry,
+ netdev_ifindex),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE,
+ &UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT),
+ &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE),
+ &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_TABLE),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_ENTRY));
+
+const struct uapi_definition uverbs_def_obj_device[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE),
+ {},
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
new file mode 100644
index 000000000000..98c522cf86d6
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rdma_core.h"
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_dm(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_dm *dm = uobject->object;
+
+ if (atomic_read(&dm->usecnt))
+ return -EBUSY;
+
+ return dm->device->ops.dealloc_dm(dm, attrs);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_dm_alloc_attr attr = {};
+ struct ib_uobject *uobj =
+ uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)
+ ->obj_attr.uobject;
+ struct ib_device *ib_dev = attrs->context->device;
+ struct ib_dm *dm;
+ int ret;
+
+ if (!ib_dev->ops.alloc_dm)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.length, attrs,
+ UVERBS_ATTR_ALLOC_DM_LENGTH);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&attr.alignment, attrs,
+ UVERBS_ATTR_ALLOC_DM_ALIGNMENT);
+ if (ret)
+ return ret;
+
+ dm = ib_dev->ops.alloc_dm(ib_dev, attrs->context, &attr, attrs);
+ if (IS_ERR(dm))
+ return PTR_ERR(dm);
+
+ dm->device = ib_dev;
+ dm->length = attr.length;
+ dm->uobject = uobj;
+ atomic_set(&dm->usecnt, 0);
+
+ uobj->object = dm;
+
+ return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_DM_ALLOC,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_DM_FREE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
+
+const struct uapi_definition uverbs_def_obj_dm[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DM,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_dm)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_dmah.c b/drivers/infiniband/core/uverbs_std_types_dmah.c
new file mode 100644
index 000000000000..453ce656c6f2
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_dmah.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include "rdma_core.h"
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+#include "restrack.h"
+
+static int uverbs_free_dmah(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_dmah *dmah = uobject->object;
+ int ret;
+
+ if (atomic_read(&dmah->usecnt))
+ return -EBUSY;
+
+ ret = dmah->device->ops.dealloc_dmah(dmah, attrs);
+ if (ret)
+ return ret;
+
+ rdma_restrack_del(&dmah->res);
+ kfree(dmah);
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DMAH_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DMAH_HANDLE)
+ ->obj_attr.uobject;
+ struct ib_device *ib_dev = attrs->context->device;
+ struct ib_dmah *dmah;
+ int ret;
+
+ dmah = rdma_zalloc_drv_obj(ib_dev, ib_dmah);
+ if (!dmah)
+ return -ENOMEM;
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_CPU_ID)) {
+ ret = uverbs_copy_from(&dmah->cpu_id, attrs,
+ UVERBS_ATTR_ALLOC_DMAH_CPU_ID);
+ if (ret)
+ goto err;
+
+ if (!cpumask_test_cpu(dmah->cpu_id, current->cpus_ptr)) {
+ ret = -EPERM;
+ goto err;
+ }
+
+ dmah->valid_fields |= BIT(IB_DMAH_CPU_ID_EXISTS);
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE)) {
+ dmah->mem_type = uverbs_attr_get_enum_id(attrs,
+ UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE);
+ dmah->valid_fields |= BIT(IB_DMAH_MEM_TYPE_EXISTS);
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_ALLOC_DMAH_PH)) {
+ ret = uverbs_copy_from(&dmah->ph, attrs,
+ UVERBS_ATTR_ALLOC_DMAH_PH);
+ if (ret)
+ goto err;
+
+ /* Per PCIe spec 6.2-1.0, only the lowest two bits are applicable */
+ if (dmah->ph & 0xFC) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ dmah->valid_fields |= BIT(IB_DMAH_PH_EXISTS);
+ }
+
+ dmah->device = ib_dev;
+ dmah->uobject = uobj;
+ atomic_set(&dmah->usecnt, 0);
+
+ rdma_restrack_new(&dmah->res, RDMA_RESTRACK_DMAH);
+ rdma_restrack_set_name(&dmah->res, NULL);
+
+ ret = ib_dev->ops.alloc_dmah(dmah, attrs);
+ if (ret) {
+ rdma_restrack_put(&dmah->res);
+ goto err;
+ }
+
+ uobj->object = dmah;
+ rdma_restrack_add(&dmah->res);
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_ALLOC_DMAH_HANDLE);
+ return 0;
+err:
+ kfree(dmah);
+ return ret;
+}
+
+static const struct uverbs_attr_spec uverbs_dmah_mem_type[] = {
+ [TPH_MEM_TYPE_VM] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+ [TPH_MEM_TYPE_PM] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_DMAH_ALLOC,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DMAH_HANDLE,
+ UVERBS_OBJECT_DMAH,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DMAH_CPU_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_ALLOC_DMAH_TPH_MEM_TYPE,
+ uverbs_dmah_mem_type,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DMAH_PH,
+ UVERBS_ATTR_TYPE(u8),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_DMAH_FREE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DMA_HANDLE,
+ UVERBS_OBJECT_DMAH,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DMAH,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_dmah),
+ &UVERBS_METHOD(UVERBS_METHOD_DMAH_ALLOC),
+ &UVERBS_METHOD(UVERBS_METHOD_DMAH_FREE));
+
+const struct uapi_definition uverbs_def_obj_dmah[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DMAH,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_dmah),
+ UAPI_DEF_OBJ_NEEDS_FN(alloc_dmah)),
+ {}
+};
diff --git a/drivers/infiniband/hw/qib/qib_pio_copy.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index 10b8c444dd31..0ddcf6da66c4 100644
--- a/drivers/infiniband/hw/qib/qib_pio_copy.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -30,35 +30,37 @@
* SOFTWARE.
*/
-#include "qib.h"
+#include "rdma_core.h"
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
-/**
- * qib_pio_copy - copy data to MMIO space, in multiples of 32-bits
- * @to: destination, in MMIO space (must be 64-bit aligned)
- * @from: source (must be 64-bit aligned)
- * @count: number of 32-bit quantities to copy
- *
- * Copy data from kernel space to MMIO space, in multiples of 32 bits at a
- * time. Order of access is not guaranteed, nor is a memory barrier
- * performed afterwards.
- */
-void qib_pio_copy(void __iomem *to, const void *from, size_t count)
+static int uverbs_free_flow_action(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
{
-#ifdef CONFIG_64BIT
- u64 __iomem *dst = to;
- const u64 *src = from;
- const u64 *end = src + (count >> 1);
+ struct ib_flow_action *action = uobject->object;
- while (src < end)
- __raw_writeq(*src++, dst++);
- if (count & 1)
- __raw_writel(*(const u32 *)src, dst);
-#else
- u32 __iomem *dst = to;
- const u32 *src = from;
- const u32 *end = src + count;
+ if (atomic_read(&action->usecnt))
+ return -EBUSY;
- while (src < end)
- __raw_writel(*src++, dst++);
-#endif
+ return action->device->ops.destroy_flow_action(action);
}
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_FLOW_ACTION_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY));
+
+const struct uapi_definition uverbs_def_obj_flow_action[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ UVERBS_OBJECT_FLOW_ACTION,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_flow_action)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
new file mode 100644
index 000000000000..570b9656801d
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -0,0 +1,553 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rdma_core.h"
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+#include "restrack.h"
+
+static int uverbs_free_mr(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ return ib_dereg_mr_user((struct ib_mr *)uobject->object,
+ &attrs->driver_udata);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_ADVISE_MR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_ADVISE_MR_PD_HANDLE);
+ enum ib_uverbs_advise_mr_advice advice;
+ struct ib_device *ib_dev = pd->device;
+ struct ib_sge *sg_list;
+ int num_sge;
+ u32 flags;
+ int ret;
+
+ /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */
+ if (!ib_dev->ops.advise_mr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_get_const(&advice, attrs, UVERBS_ATTR_ADVISE_MR_ADVICE);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_flags32(&flags, attrs, UVERBS_ATTR_ADVISE_MR_FLAGS,
+ IB_UVERBS_ADVISE_MR_FLAG_FLUSH);
+ if (ret)
+ return ret;
+
+ num_sge = uverbs_attr_ptr_get_array_size(
+ attrs, UVERBS_ATTR_ADVISE_MR_SGE_LIST, sizeof(struct ib_sge));
+ if (num_sge <= 0)
+ return num_sge;
+
+ sg_list = uverbs_attr_get_alloced_ptr(attrs,
+ UVERBS_ATTR_ADVISE_MR_SGE_LIST);
+ return ib_dev->ops.advise_mr(pd, advice, flags, sg_list, num_sge,
+ attrs);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_dm_mr_attr attr = {};
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE);
+ struct ib_dm *dm =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE);
+ struct ib_device *ib_dev = pd->device;
+
+ struct ib_mr *mr;
+ int ret;
+
+ if (!ib_dev->ops.reg_dm_mr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&attr.offset, attrs, UVERBS_ATTR_REG_DM_MR_OFFSET);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&attr.length, attrs,
+ UVERBS_ATTR_REG_DM_MR_LENGTH);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_flags32(&attr.access_flags, attrs,
+ UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+ IB_ACCESS_SUPPORTED);
+ if (ret)
+ return ret;
+
+ if (!(attr.access_flags & IB_ZERO_BASED))
+ return -EINVAL;
+
+ ret = ib_check_mr_access(ib_dev, attr.access_flags);
+ if (ret)
+ return ret;
+
+ if (attr.offset > dm->length || attr.length > dm->length ||
+ attr.length > dm->length - attr.offset)
+ return -EINVAL;
+
+ mr = pd->device->ops.reg_dm_mr(pd, dm, &attr, attrs);
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->type = IB_MR_TYPE_DM;
+ mr->dm = dm;
+ mr->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&dm->usecnt);
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+ rdma_restrack_add(&mr->res);
+ uobj->object = mr;
+
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_LKEY, &mr->lkey,
+ sizeof(mr->lkey));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+ &mr->rkey, sizeof(mr->rkey));
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_MR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_mr *mr =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_QUERY_MR_HANDLE);
+ int ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_LKEY, &mr->lkey,
+ sizeof(mr->lkey));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_RKEY,
+ &mr->rkey, sizeof(mr->rkey));
+
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_LENGTH,
+ &mr->length, sizeof(mr->length));
+
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_QUERY_MR_RESP_IOVA,
+ &mr->iova, sizeof(mr->iova));
+
+ return IS_UVERBS_COPY_ERR(ret) ? ret : 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_REG_DMABUF_MR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DMABUF_MR_HANDLE);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE);
+ struct ib_device *ib_dev = pd->device;
+
+ u64 offset, length, iova;
+ u32 fd, access_flags;
+ struct ib_mr *mr;
+ int ret;
+
+ if (!ib_dev->ops.reg_user_mr_dmabuf)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_copy_from(&offset, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_OFFSET);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&length, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_LENGTH);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&iova, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_IOVA);
+ if (ret)
+ return ret;
+
+ if ((offset & ~PAGE_MASK) != (iova & ~PAGE_MASK))
+ return -EINVAL;
+
+ ret = uverbs_copy_from(&fd, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_FD);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_flags32(&access_flags, attrs,
+ UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_ATOMIC |
+ IB_ACCESS_RELAXED_ORDERING);
+ if (ret)
+ return ret;
+
+ ret = ib_check_mr_access(ib_dev, access_flags);
+ if (ret)
+ return ret;
+
+ mr = pd->device->ops.reg_user_mr_dmabuf(pd, offset, length, iova, fd,
+ access_flags, NULL,
+ attrs);
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->type = IB_MR_TYPE_USER;
+ mr->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+ rdma_restrack_add(&mr->res);
+ uobj->object = mr;
+
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_DMABUF_MR_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY,
+ &mr->lkey, sizeof(mr->lkey));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY,
+ &mr->rkey, sizeof(mr->rkey));
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_REG_MR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_MR_HANDLE);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_MR_PD_HANDLE);
+ u32 valid_access_flags = IB_ACCESS_SUPPORTED;
+ u64 length, iova, fd_offset = 0, addr = 0;
+ struct ib_device *ib_dev = pd->device;
+ struct ib_dmah *dmah = NULL;
+ bool has_fd_offset = false;
+ bool has_addr = false;
+ bool has_fd = false;
+ u32 access_flags;
+ struct ib_mr *mr;
+ int fd;
+ int ret;
+
+ ret = uverbs_copy_from(&iova, attrs, UVERBS_ATTR_REG_MR_IOVA);
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_from(&length, attrs, UVERBS_ATTR_REG_MR_LENGTH);
+ if (ret)
+ return ret;
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_REG_MR_ADDR)) {
+ ret = uverbs_copy_from(&addr, attrs,
+ UVERBS_ATTR_REG_MR_ADDR);
+ if (ret)
+ return ret;
+ has_addr = true;
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_REG_MR_FD_OFFSET)) {
+ ret = uverbs_copy_from(&fd_offset, attrs,
+ UVERBS_ATTR_REG_MR_FD_OFFSET);
+ if (ret)
+ return ret;
+ has_fd_offset = true;
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_REG_MR_FD)) {
+ ret = uverbs_get_raw_fd(&fd, attrs,
+ UVERBS_ATTR_REG_MR_FD);
+ if (ret)
+ return ret;
+ has_fd = true;
+ }
+
+ if (has_fd) {
+ if (!ib_dev->ops.reg_user_mr_dmabuf)
+ return -EOPNOTSUPP;
+
+ /* FD requires offset and can't come with addr */
+ if (!has_fd_offset || has_addr)
+ return -EINVAL;
+
+ if ((fd_offset & ~PAGE_MASK) != (iova & ~PAGE_MASK))
+ return -EINVAL;
+
+ valid_access_flags = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_ATOMIC |
+ IB_ACCESS_RELAXED_ORDERING;
+ } else {
+ if (!has_addr || has_fd_offset)
+ return -EINVAL;
+
+ if ((addr & ~PAGE_MASK) != (iova & ~PAGE_MASK))
+ return -EINVAL;
+ }
+
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_REG_MR_DMA_HANDLE)) {
+ dmah = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_REG_MR_DMA_HANDLE);
+ if (IS_ERR(dmah))
+ return PTR_ERR(dmah);
+ }
+
+ ret = uverbs_get_flags32(&access_flags, attrs,
+ UVERBS_ATTR_REG_MR_ACCESS_FLAGS,
+ valid_access_flags);
+ if (ret)
+ return ret;
+
+ ret = ib_check_mr_access(ib_dev, access_flags);
+ if (ret)
+ return ret;
+
+ if (has_fd)
+ mr = pd->device->ops.reg_user_mr_dmabuf(pd, fd_offset, length,
+ iova, fd, access_flags,
+ dmah, attrs);
+ else
+ mr = pd->device->ops.reg_user_mr(pd, addr, length, iova,
+ access_flags, dmah, NULL);
+
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->type = IB_MR_TYPE_USER;
+ mr->uobject = uobj;
+ atomic_inc(&pd->usecnt);
+ if (dmah) {
+ mr->dmah = dmah;
+ atomic_inc(&dmah->usecnt);
+ }
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_set_name(&mr->res, NULL);
+ rdma_restrack_add(&mr->res);
+ uobj->object = mr;
+
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_REG_MR_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_MR_RESP_LKEY,
+ &mr->lkey, sizeof(mr->lkey));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_MR_RESP_RKEY,
+ &mr->rkey, sizeof(mr->rkey));
+ return ret;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_ADVISE_MR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_ADVISE_MR_ADVICE,
+ enum ib_uverbs_advise_mr_advice,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_ADVISE_MR_FLAGS,
+ enum ib_uverbs_advise_mr_flag,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ADVISE_MR_SGE_LIST,
+ UVERBS_ATTR_MIN_SIZE(sizeof(struct ib_uverbs_sge)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_MR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_QUERY_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_MR_RESP_IOVA,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_DM_MR_REG,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+ enum ib_access_flags),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_REG_DMABUF_MR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DMABUF_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_IOVA,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DMABUF_MR_FD,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ enum ib_access_flags),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_REG_MR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_MR_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_MR_DMA_HANDLE,
+ UVERBS_OBJECT_DMAH,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_MR_IOVA,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_MR_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_MR_ACCESS_FLAGS,
+ enum ib_access_flags,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_MR_ADDR,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_MR_FD_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_RAW_FD(UVERBS_ATTR_REG_MR_FD,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_MR_RESP_LKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_MR_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_MR,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
+ &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG),
+ &UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_MR),
+ &UVERBS_METHOD(UVERBS_METHOD_REG_DMABUF_MR),
+ &UVERBS_METHOD(UVERBS_METHOD_REG_MR));
+
+const struct uapi_definition uverbs_def_obj_mr[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR,
+ UAPI_DEF_OBJ_NEEDS_FN(dereg_mr)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c
new file mode 100644
index 000000000000..be0730e8509e
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_qp.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+#include "core_priv.h"
+
+static int uverbs_free_qp(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_qp *qp = uobject->object;
+ struct ib_uqp_object *uqp =
+ container_of(uobject, struct ib_uqp_object, uevent.uobject);
+ int ret;
+
+ /*
+ * If this is a user triggered destroy then do not allow destruction
+ * until the user cleans up all the mcast bindings. Unlike in other
+ * places we forcibly clean up the mcast attachments for !DESTROY
+ * because the mcast attaches are not ubojects and will not be
+ * destroyed by anything else during cleanup processing.
+ */
+ if (why == RDMA_REMOVE_DESTROY) {
+ if (!list_empty(&uqp->mcast_list))
+ return -EBUSY;
+ } else if (qp == qp->real_qp) {
+ ib_uverbs_detach_umcast(qp, uqp);
+ }
+
+ ret = ib_destroy_qp_user(qp, &attrs->driver_udata);
+ if (ret)
+ return ret;
+
+ if (uqp->uxrcd)
+ atomic_dec(&uqp->uxrcd->refcnt);
+
+ ib_uverbs_release_uevent(&uqp->uevent);
+ return 0;
+}
+
+static int check_creation_flags(enum ib_qp_type qp_type,
+ u32 create_flags)
+{
+ create_flags &= ~IB_UVERBS_QP_CREATE_SQ_SIG_ALL;
+
+ if (!create_flags || qp_type == IB_QPT_DRIVER)
+ return 0;
+
+ if (qp_type != IB_QPT_RAW_PACKET && qp_type != IB_QPT_UD)
+ return -EINVAL;
+
+ if ((create_flags & IB_UVERBS_QP_CREATE_SCATTER_FCS ||
+ create_flags & IB_UVERBS_QP_CREATE_CVLAN_STRIPPING) &&
+ qp_type != IB_QPT_RAW_PACKET)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void set_caps(struct ib_qp_init_attr *attr,
+ struct ib_uverbs_qp_cap *cap, bool req)
+{
+ if (req) {
+ attr->cap.max_send_wr = cap->max_send_wr;
+ attr->cap.max_recv_wr = cap->max_recv_wr;
+ attr->cap.max_send_sge = cap->max_send_sge;
+ attr->cap.max_recv_sge = cap->max_recv_sge;
+ attr->cap.max_inline_data = cap->max_inline_data;
+ } else {
+ cap->max_send_wr = attr->cap.max_send_wr;
+ cap->max_recv_wr = attr->cap.max_recv_wr;
+ cap->max_send_sge = attr->cap.max_send_sge;
+ cap->max_recv_sge = attr->cap.max_recv_sge;
+ cap->max_inline_data = attr->cap.max_inline_data;
+ }
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uqp_object *obj = container_of(
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_QP_HANDLE),
+ typeof(*obj), uevent.uobject);
+ struct ib_qp_init_attr attr = {};
+ struct ib_uverbs_qp_cap cap = {};
+ struct ib_rwq_ind_table *rwq_ind_tbl = NULL;
+ struct ib_qp *qp;
+ struct ib_pd *pd = NULL;
+ struct ib_srq *srq = NULL;
+ struct ib_cq *recv_cq = NULL;
+ struct ib_cq *send_cq = NULL;
+ struct ib_xrcd *xrcd = NULL;
+ struct ib_uobject *xrcd_uobj = NULL;
+ struct ib_device *device;
+ u64 user_handle;
+ int ret;
+
+ ret = uverbs_copy_from_or_zero(&cap, attrs,
+ UVERBS_ATTR_CREATE_QP_CAP);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_QP_USER_HANDLE);
+ if (!ret)
+ ret = uverbs_get_const(&attr.qp_type, attrs,
+ UVERBS_ATTR_CREATE_QP_TYPE);
+ if (ret)
+ return ret;
+
+ switch (attr.qp_type) {
+ case IB_QPT_XRC_TGT:
+ if (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_PD_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE))
+ return -EINVAL;
+
+ xrcd_uobj = uverbs_attr_get_uobject(attrs,
+ UVERBS_ATTR_CREATE_QP_XRCD_HANDLE);
+ if (IS_ERR(xrcd_uobj))
+ return PTR_ERR(xrcd_uobj);
+
+ xrcd = (struct ib_xrcd *)xrcd_uobj->object;
+ if (!xrcd)
+ return -EINVAL;
+ device = xrcd->device;
+ break;
+ case IB_UVERBS_QPT_RAW_PACKET:
+ if (!rdma_uattrs_has_raw_cap(attrs))
+ return -EPERM;
+ fallthrough;
+ case IB_UVERBS_QPT_RC:
+ case IB_UVERBS_QPT_UC:
+ case IB_UVERBS_QPT_UD:
+ case IB_UVERBS_QPT_XRC_INI:
+ case IB_UVERBS_QPT_DRIVER:
+ if (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_XRCD_HANDLE) ||
+ (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SRQ_HANDLE) &&
+ attr.qp_type == IB_QPT_XRC_INI))
+ return -EINVAL;
+
+ pd = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_PD_HANDLE);
+ if (IS_ERR(pd))
+ return PTR_ERR(pd);
+
+ rwq_ind_tbl = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE);
+ if (!IS_ERR(rwq_ind_tbl)) {
+ if (cap.max_recv_wr || cap.max_recv_sge ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE) ||
+ uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SRQ_HANDLE))
+ return -EINVAL;
+
+ /* send_cq is optional */
+ if (cap.max_send_wr) {
+ send_cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE);
+ if (IS_ERR(send_cq))
+ return PTR_ERR(send_cq);
+ }
+ attr.rwq_ind_tbl = rwq_ind_tbl;
+ } else {
+ send_cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE);
+ if (IS_ERR(send_cq))
+ return PTR_ERR(send_cq);
+
+ if (attr.qp_type != IB_QPT_XRC_INI) {
+ recv_cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE);
+ if (IS_ERR(recv_cq))
+ return PTR_ERR(recv_cq);
+ }
+ }
+
+ device = pd->device;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ret = uverbs_get_flags32(&attr.create_flags, attrs,
+ UVERBS_ATTR_CREATE_QP_FLAGS,
+ IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+ IB_UVERBS_QP_CREATE_SCATTER_FCS |
+ IB_UVERBS_QP_CREATE_CVLAN_STRIPPING |
+ IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING |
+ IB_UVERBS_QP_CREATE_SQ_SIG_ALL);
+ if (ret)
+ return ret;
+
+ ret = check_creation_flags(attr.qp_type, attr.create_flags);
+ if (ret)
+ return ret;
+
+ if (uverbs_attr_is_valid(attrs,
+ UVERBS_ATTR_CREATE_QP_SOURCE_QPN)) {
+ ret = uverbs_copy_from(&attr.source_qpn, attrs,
+ UVERBS_ATTR_CREATE_QP_SOURCE_QPN);
+ if (ret)
+ return ret;
+ attr.create_flags |= IB_QP_CREATE_SOURCE_QPN;
+ }
+
+ srq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_QP_SRQ_HANDLE);
+ if (!IS_ERR(srq)) {
+ if ((srq->srq_type == IB_SRQT_XRC &&
+ attr.qp_type != IB_QPT_XRC_TGT) ||
+ (srq->srq_type != IB_SRQT_XRC &&
+ attr.qp_type == IB_QPT_XRC_TGT))
+ return -EINVAL;
+ attr.srq = srq;
+ }
+
+ obj->uevent.event_file = ib_uverbs_get_async_event(attrs,
+ UVERBS_ATTR_CREATE_QP_EVENT_FD);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ INIT_LIST_HEAD(&obj->mcast_list);
+ obj->uevent.uobject.user_handle = user_handle;
+ attr.event_handler = ib_uverbs_qp_event_handler;
+ attr.send_cq = send_cq;
+ attr.recv_cq = recv_cq;
+ attr.xrcd = xrcd;
+ if (attr.create_flags & IB_UVERBS_QP_CREATE_SQ_SIG_ALL) {
+ /* This creation bit is uverbs one, need to mask before
+ * calling drivers. It was added to prevent an extra user attr
+ * only for that when using ioctl.
+ */
+ attr.create_flags &= ~IB_UVERBS_QP_CREATE_SQ_SIG_ALL;
+ attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ } else {
+ attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+ }
+
+ set_caps(&attr, &cap, true);
+ mutex_init(&obj->mcast_lock);
+
+ qp = ib_create_qp_user(device, pd, &attr, &attrs->driver_udata, obj,
+ KBUILD_MODNAME);
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
+ goto err_put;
+ }
+ ib_qp_usecnt_inc(qp);
+
+ if (attr.qp_type == IB_QPT_XRC_TGT) {
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
+ uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
+ }
+
+ obj->uevent.uobject.object = qp;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_QP_HANDLE);
+
+ set_caps(&attr, &cap, false);
+ ret = uverbs_copy_to_struct_or_zero(attrs,
+ UVERBS_ATTR_CREATE_QP_RESP_CAP, &cap,
+ sizeof(cap));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_QP_RESP_QP_NUM,
+ &qp->qp_num,
+ sizeof(qp->qp_num));
+
+ return ret;
+err_put:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
+ return ret;
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QP_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_HANDLE,
+ UVERBS_OBJECT_QP,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_XRCD_HANDLE,
+ UVERBS_OBJECT_XRCD,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_SRQ_HANDLE,
+ UVERBS_OBJECT_SRQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_RECV_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_QP_IND_TABLE_HANDLE,
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_QP_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_QP_CAP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_qp_cap,
+ max_inline_data),
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_CREATE_QP_TYPE,
+ enum ib_uverbs_qp_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_QP_FLAGS,
+ enum ib_uverbs_qp_create_flags,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_QP_SOURCE_QPN,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_QP_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_QP_RESP_CAP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_qp_cap,
+ max_inline_data),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_QP_RESP_QP_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_UHW());
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QP_DESTROY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_QP_HANDLE);
+ struct ib_uqp_object *obj =
+ container_of(uobj, struct ib_uqp_object, uevent.uobject);
+ struct ib_uverbs_destroy_qp_resp resp = {
+ .events_reported = obj->uevent.events_reported
+ };
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_QP_RESP, &resp,
+ sizeof(resp));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QP_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_QP_HANDLE,
+ UVERBS_OBJECT_QP,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_QP_RESP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_qp_resp),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_QP,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp),
+ &UVERBS_METHOD(UVERBS_METHOD_QP_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_QP_DESTROY));
+
+const struct uapi_definition uverbs_def_obj_qp[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_srq.c b/drivers/infiniband/core/uverbs_std_types_srq.c
new file mode 100644
index 000000000000..e5513f828bdc
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_srq.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_srq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_srq *srq = uobject->object;
+ struct ib_uevent_object *uevent =
+ container_of(uobject, struct ib_uevent_object, uobject);
+ enum ib_srq_type srq_type = srq->srq_type;
+ int ret;
+
+ ret = ib_destroy_srq_user(srq, &attrs->driver_udata);
+ if (ret)
+ return ret;
+
+ if (srq_type == IB_SRQT_XRC) {
+ struct ib_usrq_object *us =
+ container_of(uobject, struct ib_usrq_object,
+ uevent.uobject);
+
+ atomic_dec(&us->uxrcd->refcnt);
+ }
+
+ ib_uverbs_release_uevent(uevent);
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_usrq_object *obj = container_of(
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_SRQ_HANDLE),
+ typeof(*obj), uevent.uobject);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_SRQ_PD_HANDLE);
+ struct ib_srq_init_attr attr = {};
+ struct ib_uobject *xrcd_uobj;
+ struct ib_srq *srq;
+ u64 user_handle;
+ int ret;
+
+ ret = uverbs_copy_from(&attr.attr.max_sge, attrs,
+ UVERBS_ATTR_CREATE_SRQ_MAX_SGE);
+ if (!ret)
+ ret = uverbs_copy_from(&attr.attr.max_wr, attrs,
+ UVERBS_ATTR_CREATE_SRQ_MAX_WR);
+ if (!ret)
+ ret = uverbs_copy_from(&attr.attr.srq_limit, attrs,
+ UVERBS_ATTR_CREATE_SRQ_LIMIT);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_SRQ_USER_HANDLE);
+ if (!ret)
+ ret = uverbs_get_const(&attr.srq_type, attrs,
+ UVERBS_ATTR_CREATE_SRQ_TYPE);
+ if (ret)
+ return ret;
+
+ if (ib_srq_has_cq(attr.srq_type)) {
+ attr.ext.cq = uverbs_attr_get_obj(attrs,
+ UVERBS_ATTR_CREATE_SRQ_CQ_HANDLE);
+ if (IS_ERR(attr.ext.cq))
+ return PTR_ERR(attr.ext.cq);
+ }
+
+ switch (attr.srq_type) {
+ case IB_UVERBS_SRQT_XRC:
+ xrcd_uobj = uverbs_attr_get_uobject(attrs,
+ UVERBS_ATTR_CREATE_SRQ_XRCD_HANDLE);
+ if (IS_ERR(xrcd_uobj))
+ return PTR_ERR(xrcd_uobj);
+
+ attr.ext.xrc.xrcd = (struct ib_xrcd *)xrcd_uobj->object;
+ if (!attr.ext.xrc.xrcd)
+ return -EINVAL;
+ obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
+ uobject);
+ atomic_inc(&obj->uxrcd->refcnt);
+ break;
+ case IB_UVERBS_SRQT_TM:
+ ret = uverbs_copy_from(&attr.ext.tag_matching.max_num_tags,
+ attrs,
+ UVERBS_ATTR_CREATE_SRQ_MAX_NUM_TAGS);
+ if (ret)
+ return ret;
+ break;
+ case IB_UVERBS_SRQT_BASIC:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ obj->uevent.event_file = ib_uverbs_get_async_event(attrs,
+ UVERBS_ATTR_CREATE_SRQ_EVENT_FD);
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ attr.event_handler = ib_uverbs_srq_event_handler;
+ obj->uevent.uobject.user_handle = user_handle;
+
+ srq = ib_create_srq_user(pd, &attr, obj, &attrs->driver_udata);
+ if (IS_ERR(srq)) {
+ ret = PTR_ERR(srq);
+ goto err;
+ }
+
+ obj->uevent.uobject.object = srq;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_SRQ_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_SRQ_RESP_MAX_WR,
+ &attr.attr.max_wr,
+ sizeof(attr.attr.max_wr));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_SRQ_RESP_MAX_SGE,
+ &attr.attr.max_sge,
+ sizeof(attr.attr.max_sge));
+ if (ret)
+ return ret;
+
+ if (attr.srq_type == IB_SRQT_XRC) {
+ ret = uverbs_copy_to(attrs,
+ UVERBS_ATTR_CREATE_SRQ_RESP_SRQ_NUM,
+ &srq->ext.xrc.srq_num,
+ sizeof(srq->ext.xrc.srq_num));
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+err:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
+ if (attr.srq_type == IB_SRQT_XRC)
+ atomic_dec(&obj->uxrcd->refcnt);
+ return ret;
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_SRQ_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_HANDLE,
+ UVERBS_OBJECT_SRQ,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_CREATE_SRQ_TYPE,
+ enum ib_uverbs_srq_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_LIMIT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_XRCD_HANDLE,
+ UVERBS_OBJECT_XRCD,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_SRQ_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_SRQ_MAX_NUM_TAGS,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_SRQ_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_SRQ_RESP_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_SRQ_RESP_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_SRQ_RESP_SRQ_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_UHW());
+
+static int UVERBS_HANDLER(UVERBS_METHOD_SRQ_DESTROY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_SRQ_HANDLE);
+ struct ib_usrq_object *obj =
+ container_of(uobj, struct ib_usrq_object, uevent.uobject);
+ struct ib_uverbs_destroy_srq_resp resp = {
+ .events_reported = obj->uevent.events_reported
+ };
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_SRQ_RESP, &resp,
+ sizeof(resp));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_SRQ_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_SRQ_HANDLE,
+ UVERBS_OBJECT_SRQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_SRQ_RESP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_srq_resp),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_SRQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
+ uverbs_free_srq),
+ &UVERBS_METHOD(UVERBS_METHOD_SRQ_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_SRQ_DESTROY)
+);
+
+const struct uapi_definition uverbs_def_obj_srq[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_wq.c b/drivers/infiniband/core/uverbs_std_types_wq.c
new file mode 100644
index 000000000000..7ded8339346f
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_wq.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_wq(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_wq *wq = uobject->object;
+ struct ib_uwq_object *uwq =
+ container_of(uobject, struct ib_uwq_object, uevent.uobject);
+ int ret;
+
+ ret = ib_destroy_wq_user(wq, &attrs->driver_udata);
+ if (ret)
+ return ret;
+
+ ib_uverbs_release_uevent(&uwq->uevent);
+ return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_WQ_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uwq_object *obj = container_of(
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_WQ_HANDLE),
+ typeof(*obj), uevent.uobject);
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_WQ_PD_HANDLE);
+ struct ib_cq *cq =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_WQ_CQ_HANDLE);
+ struct ib_wq_init_attr wq_init_attr = {};
+ struct ib_wq *wq;
+ u64 user_handle;
+ int ret;
+
+ ret = uverbs_get_flags32(&wq_init_attr.create_flags, attrs,
+ UVERBS_ATTR_CREATE_WQ_FLAGS,
+ IB_UVERBS_WQ_FLAGS_CVLAN_STRIPPING |
+ IB_UVERBS_WQ_FLAGS_SCATTER_FCS |
+ IB_UVERBS_WQ_FLAGS_DELAY_DROP |
+ IB_UVERBS_WQ_FLAGS_PCI_WRITE_END_PADDING);
+ if (!ret)
+ ret = uverbs_copy_from(&wq_init_attr.max_sge, attrs,
+ UVERBS_ATTR_CREATE_WQ_MAX_SGE);
+ if (!ret)
+ ret = uverbs_copy_from(&wq_init_attr.max_wr, attrs,
+ UVERBS_ATTR_CREATE_WQ_MAX_WR);
+ if (!ret)
+ ret = uverbs_copy_from(&user_handle, attrs,
+ UVERBS_ATTR_CREATE_WQ_USER_HANDLE);
+ if (!ret)
+ ret = uverbs_get_const(&wq_init_attr.wq_type, attrs,
+ UVERBS_ATTR_CREATE_WQ_TYPE);
+ if (ret)
+ return ret;
+
+ if (wq_init_attr.wq_type != IB_WQT_RQ)
+ return -EINVAL;
+
+ obj->uevent.event_file = ib_uverbs_get_async_event(attrs,
+ UVERBS_ATTR_CREATE_WQ_EVENT_FD);
+ obj->uevent.uobject.user_handle = user_handle;
+ INIT_LIST_HEAD(&obj->uevent.event_list);
+ wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
+ wq_init_attr.wq_context = attrs->ufile;
+ wq_init_attr.cq = cq;
+
+ wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata);
+ if (IS_ERR(wq)) {
+ ret = PTR_ERR(wq);
+ goto err;
+ }
+
+ obj->uevent.uobject.object = wq;
+ wq->wq_type = wq_init_attr.wq_type;
+ wq->cq = cq;
+ wq->pd = pd;
+ wq->device = pd->device;
+ wq->wq_context = wq_init_attr.wq_context;
+ atomic_set(&wq->usecnt, 0);
+ atomic_inc(&pd->usecnt);
+ atomic_inc(&cq->usecnt);
+ wq->uobject = obj;
+ uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_WQ_HANDLE);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_WQ_RESP_MAX_WR,
+ &wq_init_attr.max_wr,
+ sizeof(wq_init_attr.max_wr));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_WQ_RESP_MAX_SGE,
+ &wq_init_attr.max_sge,
+ sizeof(wq_init_attr.max_sge));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_WQ_RESP_WQ_NUM,
+ &wq->wq_num,
+ sizeof(wq->wq_num));
+ return ret;
+
+err:
+ if (obj->uevent.event_file)
+ uverbs_uobject_put(&obj->uevent.event_file->uobj);
+ return ret;
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_WQ_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_WQ_HANDLE,
+ UVERBS_OBJECT_WQ,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_WQ_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_CREATE_WQ_TYPE,
+ enum ib_wq_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_WQ_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_WQ_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_WQ_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_WQ_FLAGS,
+ enum ib_uverbs_wq_flags,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_WQ_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_WQ_EVENT_FD,
+ UVERBS_OBJECT_ASYNC_EVENT,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_WQ_RESP_MAX_WR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_WQ_RESP_MAX_SGE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_WQ_RESP_WQ_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_UHW());
+
+static int UVERBS_HANDLER(UVERBS_METHOD_WQ_DESTROY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj =
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_WQ_HANDLE);
+ struct ib_uwq_object *obj =
+ container_of(uobj, struct ib_uwq_object, uevent.uobject);
+
+ return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_WQ_RESP,
+ &obj->uevent.events_reported,
+ sizeof(obj->uevent.events_reported));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_WQ_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_WQ_HANDLE,
+ UVERBS_OBJECT_WQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_WQ_RESP,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_WQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq),
+ &UVERBS_METHOD(UVERBS_METHOD_WQ_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_WQ_DESTROY)
+);
+
+const struct uapi_definition uverbs_def_obj_wq[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
new file mode 100644
index 000000000000..e00ea63175bd
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -0,0 +1,735 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved.
+ */
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/rdma_user_ioctl.h>
+#include <linux/bitops.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int ib_uverbs_notsupp(struct uverbs_attr_bundle *attrs)
+{
+ return -EOPNOTSUPP;
+}
+
+static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
+{
+ void *elm;
+ int rc;
+
+ if (key == UVERBS_API_KEY_ERR)
+ return ERR_PTR(-EOVERFLOW);
+
+ elm = kzalloc(alloc_size, GFP_KERNEL);
+ if (!elm)
+ return ERR_PTR(-ENOMEM);
+ rc = radix_tree_insert(&uapi->radix, key, elm);
+ if (rc) {
+ kfree(elm);
+ return ERR_PTR(rc);
+ }
+
+ return elm;
+}
+
+static void *uapi_add_get_elm(struct uverbs_api *uapi, u32 key,
+ size_t alloc_size, bool *exists)
+{
+ void *elm;
+
+ elm = uapi_add_elm(uapi, key, alloc_size);
+ if (!IS_ERR(elm)) {
+ *exists = false;
+ return elm;
+ }
+
+ if (elm != ERR_PTR(-EEXIST))
+ return elm;
+
+ elm = radix_tree_lookup(&uapi->radix, key);
+ if (WARN_ON(!elm))
+ return ERR_PTR(-EINVAL);
+ *exists = true;
+ return elm;
+}
+
+static int uapi_create_write(struct uverbs_api *uapi,
+ struct ib_device *ibdev,
+ const struct uapi_definition *def,
+ u32 obj_key,
+ u32 *cur_method_key)
+{
+ struct uverbs_api_write_method *method_elm;
+ u32 method_key = obj_key;
+ bool exists;
+
+ if (def->write.is_ex)
+ method_key |= uapi_key_write_ex_method(def->write.command_num);
+ else
+ method_key |= uapi_key_write_method(def->write.command_num);
+
+ method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm),
+ &exists);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+
+ if (WARN_ON(exists && (def->write.is_ex != method_elm->is_ex)))
+ return -EINVAL;
+
+ method_elm->is_ex = def->write.is_ex;
+ method_elm->handler = def->func_write;
+ if (!def->write.is_ex)
+ method_elm->disabled = !(ibdev->uverbs_cmd_mask &
+ BIT_ULL(def->write.command_num));
+
+ if (!def->write.is_ex && def->func_write) {
+ method_elm->has_udata = def->write.has_udata;
+ method_elm->has_resp = def->write.has_resp;
+ method_elm->req_size = def->write.req_size;
+ method_elm->resp_size = def->write.resp_size;
+ }
+
+ *cur_method_key = method_key;
+ return 0;
+}
+
+static int uapi_merge_method(struct uverbs_api *uapi,
+ struct uverbs_api_object *obj_elm, u32 obj_key,
+ const struct uverbs_method_def *method,
+ bool is_driver)
+{
+ u32 method_key = obj_key | uapi_key_ioctl_method(method->id);
+ struct uverbs_api_ioctl_method *method_elm;
+ unsigned int i;
+ bool exists;
+
+ if (!method->attrs)
+ return 0;
+
+ method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm),
+ &exists);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+ if (exists) {
+ /*
+ * This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE
+ */
+ if (WARN_ON(method->handler))
+ return -EINVAL;
+ } else {
+ WARN_ON(!method->handler);
+ rcu_assign_pointer(method_elm->handler, method->handler);
+ if (method->handler != uverbs_destroy_def_handler)
+ method_elm->driver_method = is_driver;
+ }
+
+ for (i = 0; i != method->num_attrs; i++) {
+ const struct uverbs_attr_def *attr = (*method->attrs)[i];
+ struct uverbs_api_attr *attr_slot;
+
+ if (!attr)
+ continue;
+
+ /*
+ * ENUM_IN contains the 'ids' pointer to the driver's .rodata,
+ * so if it is specified by a driver then it always makes this
+ * into a driver method.
+ */
+ if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN)
+ method_elm->driver_method |= is_driver;
+
+ /*
+ * Like other uobject based things we only support a single
+ * uobject being NEW'd or DESTROY'd
+ */
+ if (attr->attr.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
+ u8 access = attr->attr.u2.objs_arr.access;
+
+ if (WARN_ON(access == UVERBS_ACCESS_NEW ||
+ access == UVERBS_ACCESS_DESTROY))
+ return -EINVAL;
+ }
+
+ attr_slot =
+ uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id),
+ sizeof(*attr_slot));
+ /* Attributes are not allowed to be modified by drivers */
+ if (IS_ERR(attr_slot))
+ return PTR_ERR(attr_slot);
+
+ attr_slot->spec = attr->attr;
+ }
+
+ return 0;
+}
+
+static int uapi_merge_obj_tree(struct uverbs_api *uapi,
+ const struct uverbs_object_def *obj,
+ bool is_driver)
+{
+ struct uverbs_api_object *obj_elm;
+ unsigned int i;
+ u32 obj_key;
+ bool exists;
+ int rc;
+
+ obj_key = uapi_key_obj(obj->id);
+ obj_elm = uapi_add_get_elm(uapi, obj_key, sizeof(*obj_elm), &exists);
+ if (IS_ERR(obj_elm))
+ return PTR_ERR(obj_elm);
+
+ if (obj->type_attrs) {
+ if (WARN_ON(obj_elm->type_attrs))
+ return -EINVAL;
+
+ obj_elm->id = obj->id;
+ obj_elm->type_attrs = obj->type_attrs;
+ obj_elm->type_class = obj->type_attrs->type_class;
+ /*
+ * Today drivers are only permitted to use idr_class and
+ * fd_class types. We can revoke the IDR types during
+ * disassociation, and the FD types require the driver to use
+ * struct file_operations.owner to prevent the driver module
+ * code from unloading while the file is open. This provides
+ * enough safety that uverbs_uobject_fd_release() will
+ * continue to work. Drivers using FD are responsible to
+ * handle disassociation of the device on their own.
+ */
+ if (WARN_ON(is_driver &&
+ obj->type_attrs->type_class != &uverbs_idr_class &&
+ obj->type_attrs->type_class != &uverbs_fd_class))
+ return -EINVAL;
+ }
+
+ if (!obj->methods)
+ return 0;
+
+ for (i = 0; i != obj->num_methods; i++) {
+ const struct uverbs_method_def *method = (*obj->methods)[i];
+
+ if (!method)
+ continue;
+
+ rc = uapi_merge_method(uapi, obj_elm, obj_key, method,
+ is_driver);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
+static int uapi_disable_elm(struct uverbs_api *uapi,
+ const struct uapi_definition *def,
+ u32 obj_key,
+ u32 method_key)
+{
+ bool exists;
+
+ if (def->scope == UAPI_SCOPE_OBJECT) {
+ struct uverbs_api_object *obj_elm;
+
+ obj_elm = uapi_add_get_elm(
+ uapi, obj_key, sizeof(*obj_elm), &exists);
+ if (IS_ERR(obj_elm))
+ return PTR_ERR(obj_elm);
+ obj_elm->disabled = 1;
+ return 0;
+ }
+
+ if (def->scope == UAPI_SCOPE_METHOD &&
+ uapi_key_is_ioctl_method(method_key)) {
+ struct uverbs_api_ioctl_method *method_elm;
+
+ method_elm = uapi_add_get_elm(uapi, method_key,
+ sizeof(*method_elm), &exists);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+ method_elm->disabled = 1;
+ return 0;
+ }
+
+ if (def->scope == UAPI_SCOPE_METHOD &&
+ (uapi_key_is_write_method(method_key) ||
+ uapi_key_is_write_ex_method(method_key))) {
+ struct uverbs_api_write_method *write_elm;
+
+ write_elm = uapi_add_get_elm(uapi, method_key,
+ sizeof(*write_elm), &exists);
+ if (IS_ERR(write_elm))
+ return PTR_ERR(write_elm);
+ write_elm->disabled = 1;
+ return 0;
+ }
+
+ WARN_ON(true);
+ return -EINVAL;
+}
+
+static int uapi_merge_def(struct uverbs_api *uapi, struct ib_device *ibdev,
+ const struct uapi_definition *def_list,
+ bool is_driver)
+{
+ const struct uapi_definition *def = def_list;
+ u32 cur_obj_key = UVERBS_API_KEY_ERR;
+ u32 cur_method_key = UVERBS_API_KEY_ERR;
+ bool exists;
+ int rc;
+
+ if (!def_list)
+ return 0;
+
+ for (;; def++) {
+ switch ((enum uapi_definition_kind)def->kind) {
+ case UAPI_DEF_CHAIN:
+ rc = uapi_merge_def(uapi, ibdev, def->chain, is_driver);
+ if (rc)
+ return rc;
+ continue;
+
+ case UAPI_DEF_CHAIN_OBJ_TREE:
+ if (WARN_ON(def->object_start.object_id !=
+ def->chain_obj_tree->id))
+ return -EINVAL;
+
+ cur_obj_key = uapi_key_obj(def->object_start.object_id);
+ rc = uapi_merge_obj_tree(uapi, def->chain_obj_tree,
+ is_driver);
+ if (rc)
+ return rc;
+ continue;
+
+ case UAPI_DEF_END:
+ return 0;
+
+ case UAPI_DEF_IS_SUPPORTED_DEV_FN: {
+ void **ibdev_fn =
+ (void *)(&ibdev->ops) + def->needs_fn_offset;
+
+ if (*ibdev_fn)
+ continue;
+ rc = uapi_disable_elm(
+ uapi, def, cur_obj_key, cur_method_key);
+ if (rc)
+ return rc;
+ continue;
+ }
+
+ case UAPI_DEF_IS_SUPPORTED_FUNC:
+ if (def->func_is_supported(ibdev))
+ continue;
+ rc = uapi_disable_elm(
+ uapi, def, cur_obj_key, cur_method_key);
+ if (rc)
+ return rc;
+ continue;
+
+ case UAPI_DEF_OBJECT_START: {
+ struct uverbs_api_object *obj_elm;
+
+ cur_obj_key = uapi_key_obj(def->object_start.object_id);
+ obj_elm = uapi_add_get_elm(uapi, cur_obj_key,
+ sizeof(*obj_elm), &exists);
+ if (IS_ERR(obj_elm))
+ return PTR_ERR(obj_elm);
+ continue;
+ }
+
+ case UAPI_DEF_WRITE:
+ rc = uapi_create_write(
+ uapi, ibdev, def, cur_obj_key, &cur_method_key);
+ if (rc)
+ return rc;
+ continue;
+ }
+ WARN_ON(true);
+ return -EINVAL;
+ }
+}
+
+static int
+uapi_finalize_ioctl_method(struct uverbs_api *uapi,
+ struct uverbs_api_ioctl_method *method_elm,
+ u32 method_key)
+{
+ struct radix_tree_iter iter;
+ unsigned int num_attrs = 0;
+ unsigned int max_bkey = 0;
+ bool single_uobj = false;
+ void __rcu **slot;
+
+ method_elm->destroy_bkey = UVERBS_API_ATTR_BKEY_LEN;
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter,
+ uapi_key_attrs_start(method_key)) {
+ struct uverbs_api_attr *elm =
+ rcu_dereference_protected(*slot, true);
+ u32 attr_key = iter.index & UVERBS_API_ATTR_KEY_MASK;
+ u32 attr_bkey = uapi_bkey_attr(attr_key);
+ u8 type = elm->spec.type;
+
+ if (uapi_key_attr_to_ioctl_method(iter.index) !=
+ uapi_key_attr_to_ioctl_method(method_key))
+ break;
+
+ if (elm->spec.mandatory)
+ __set_bit(attr_bkey, method_elm->attr_mandatory);
+
+ if (elm->spec.is_udata)
+ method_elm->has_udata = true;
+
+ if (type == UVERBS_ATTR_TYPE_IDR ||
+ type == UVERBS_ATTR_TYPE_FD) {
+ u8 access = elm->spec.u.obj.access;
+
+ /*
+ * Verbs specs may only have one NEW/DESTROY, we don't
+ * have the infrastructure to abort multiple NEW's or
+ * cope with multiple DESTROY failure.
+ */
+ if (access == UVERBS_ACCESS_NEW ||
+ access == UVERBS_ACCESS_DESTROY) {
+ if (WARN_ON(single_uobj))
+ return -EINVAL;
+
+ single_uobj = true;
+ if (WARN_ON(!elm->spec.mandatory))
+ return -EINVAL;
+ }
+
+ if (access == UVERBS_ACCESS_DESTROY)
+ method_elm->destroy_bkey = attr_bkey;
+ }
+
+ max_bkey = max(max_bkey, attr_bkey);
+ num_attrs++;
+ }
+
+ method_elm->key_bitmap_len = max_bkey + 1;
+ WARN_ON(method_elm->key_bitmap_len > UVERBS_API_ATTR_BKEY_LEN);
+
+ uapi_compute_bundle_size(method_elm, num_attrs);
+ return 0;
+}
+
+static int uapi_finalize(struct uverbs_api *uapi)
+{
+ const struct uverbs_api_write_method **data;
+ unsigned long max_write_ex = 0;
+ unsigned long max_write = 0;
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+ int rc;
+ int i;
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ struct uverbs_api_ioctl_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (uapi_key_is_ioctl_method(iter.index)) {
+ rc = uapi_finalize_ioctl_method(uapi, method_elm,
+ iter.index);
+ if (rc)
+ return rc;
+ }
+
+ if (uapi_key_is_write_method(iter.index))
+ max_write = max(max_write,
+ iter.index & UVERBS_API_ATTR_KEY_MASK);
+ if (uapi_key_is_write_ex_method(iter.index))
+ max_write_ex =
+ max(max_write_ex,
+ iter.index & UVERBS_API_ATTR_KEY_MASK);
+ }
+
+ uapi->notsupp_method.handler = ib_uverbs_notsupp;
+ uapi->num_write = max_write + 1;
+ uapi->num_write_ex = max_write_ex + 1;
+ data = kmalloc_array(uapi->num_write + uapi->num_write_ex,
+ sizeof(*uapi->write_methods), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++)
+ data[i] = &uapi->notsupp_method;
+ uapi->write_methods = data;
+ uapi->write_ex_methods = data + uapi->num_write;
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ if (uapi_key_is_write_method(iter.index))
+ uapi->write_methods[iter.index &
+ UVERBS_API_ATTR_KEY_MASK] =
+ rcu_dereference_protected(*slot, true);
+ if (uapi_key_is_write_ex_method(iter.index))
+ uapi->write_ex_methods[iter.index &
+ UVERBS_API_ATTR_KEY_MASK] =
+ rcu_dereference_protected(*slot, true);
+ }
+
+ return 0;
+}
+
+static void uapi_remove_range(struct uverbs_api *uapi, u32 start, u32 last)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, start) {
+ if (iter.index > last)
+ return;
+ kfree(rcu_dereference_protected(*slot, true));
+ radix_tree_iter_delete(&uapi->radix, &iter, slot);
+ }
+}
+
+static void uapi_remove_object(struct uverbs_api *uapi, u32 obj_key)
+{
+ uapi_remove_range(uapi, obj_key,
+ obj_key | UVERBS_API_METHOD_KEY_MASK |
+ UVERBS_API_ATTR_KEY_MASK);
+}
+
+static void uapi_remove_method(struct uverbs_api *uapi, u32 method_key)
+{
+ uapi_remove_range(uapi, method_key,
+ method_key | UVERBS_API_ATTR_KEY_MASK);
+}
+
+
+static u32 uapi_get_obj_id(struct uverbs_attr_spec *spec)
+{
+ if (spec->type == UVERBS_ATTR_TYPE_IDR ||
+ spec->type == UVERBS_ATTR_TYPE_FD)
+ return spec->u.obj.obj_type;
+ if (spec->type == UVERBS_ATTR_TYPE_IDRS_ARRAY)
+ return spec->u2.objs_arr.obj_type;
+ return UVERBS_API_KEY_ERR;
+}
+
+static void uapi_key_okay(u32 key)
+{
+ unsigned int count = 0;
+
+ if (uapi_key_is_object(key))
+ count++;
+ if (uapi_key_is_ioctl_method(key))
+ count++;
+ if (uapi_key_is_write_method(key))
+ count++;
+ if (uapi_key_is_write_ex_method(key))
+ count++;
+ if (uapi_key_is_attr(key))
+ count++;
+ WARN(count != 1, "Bad count %u key=%x", count, key);
+}
+
+static void uapi_finalize_disable(struct uverbs_api *uapi)
+{
+ struct radix_tree_iter iter;
+ u32 starting_key = 0;
+ bool scan_again = false;
+ void __rcu **slot;
+
+again:
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, starting_key) {
+ uapi_key_okay(iter.index);
+
+ if (uapi_key_is_object(iter.index)) {
+ struct uverbs_api_object *obj_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (obj_elm->disabled) {
+ /* Have to check all the attrs again */
+ scan_again = true;
+ starting_key = iter.index;
+ uapi_remove_object(uapi, iter.index);
+ goto again;
+ }
+ continue;
+ }
+
+ if (uapi_key_is_ioctl_method(iter.index)) {
+ struct uverbs_api_ioctl_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (method_elm->disabled) {
+ starting_key = iter.index;
+ uapi_remove_method(uapi, iter.index);
+ goto again;
+ }
+ continue;
+ }
+
+ if (uapi_key_is_write_method(iter.index) ||
+ uapi_key_is_write_ex_method(iter.index)) {
+ struct uverbs_api_write_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (method_elm->disabled) {
+ kfree(method_elm);
+ radix_tree_iter_delete(&uapi->radix, &iter, slot);
+ }
+ continue;
+ }
+
+ if (uapi_key_is_attr(iter.index)) {
+ struct uverbs_api_attr *attr_elm =
+ rcu_dereference_protected(*slot, true);
+ const struct uverbs_api_object *tmp_obj;
+ u32 obj_key;
+
+ /*
+ * If the method has a mandatory object handle
+ * attribute which relies on an object which is not
+ * present then the entire method is uncallable.
+ */
+ if (!attr_elm->spec.mandatory)
+ continue;
+ obj_key = uapi_get_obj_id(&attr_elm->spec);
+ if (obj_key == UVERBS_API_KEY_ERR)
+ continue;
+ tmp_obj = uapi_get_object(uapi, obj_key);
+ if (IS_ERR(tmp_obj)) {
+ if (PTR_ERR(tmp_obj) == -ENOMSG)
+ continue;
+ } else {
+ if (!tmp_obj->disabled)
+ continue;
+ }
+
+ starting_key = iter.index;
+ uapi_remove_method(
+ uapi,
+ iter.index & (UVERBS_API_OBJ_KEY_MASK |
+ UVERBS_API_METHOD_KEY_MASK));
+ goto again;
+ }
+
+ WARN_ON(false);
+ }
+
+ if (!scan_again)
+ return;
+ scan_again = false;
+ starting_key = 0;
+ goto again;
+}
+
+void uverbs_destroy_api(struct uverbs_api *uapi)
+{
+ if (!uapi)
+ return;
+
+ uapi_remove_range(uapi, 0, U32_MAX);
+ kfree(uapi->write_methods);
+ kfree(uapi);
+}
+
+static const struct uapi_definition uverbs_core_api[] = {
+ UAPI_DEF_CHAIN(uverbs_def_obj_async_fd),
+ UAPI_DEF_CHAIN(uverbs_def_obj_counters),
+ UAPI_DEF_CHAIN(uverbs_def_obj_cq),
+ UAPI_DEF_CHAIN(uverbs_def_obj_device),
+ UAPI_DEF_CHAIN(uverbs_def_obj_dm),
+ UAPI_DEF_CHAIN(uverbs_def_obj_dmah),
+ UAPI_DEF_CHAIN(uverbs_def_obj_flow_action),
+ UAPI_DEF_CHAIN(uverbs_def_obj_intf),
+ UAPI_DEF_CHAIN(uverbs_def_obj_mr),
+ UAPI_DEF_CHAIN(uverbs_def_obj_qp),
+ UAPI_DEF_CHAIN(uverbs_def_obj_srq),
+ UAPI_DEF_CHAIN(uverbs_def_obj_wq),
+ UAPI_DEF_CHAIN(uverbs_def_write_intf),
+ {},
+};
+
+struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev)
+{
+ struct uverbs_api *uapi;
+ int rc;
+
+ uapi = kzalloc(sizeof(*uapi), GFP_KERNEL);
+ if (!uapi)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL);
+ uapi->driver_id = ibdev->ops.driver_id;
+
+ rc = uapi_merge_def(uapi, ibdev, uverbs_core_api, false);
+ if (rc)
+ goto err;
+ rc = uapi_merge_def(uapi, ibdev, ibdev->driver_def, true);
+ if (rc)
+ goto err;
+
+ uapi_finalize_disable(uapi);
+ rc = uapi_finalize(uapi);
+ if (rc)
+ goto err;
+
+ return uapi;
+err:
+ if (rc != -ENOMEM)
+ dev_err(&ibdev->dev,
+ "Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n",
+ rc);
+
+ uverbs_destroy_api(uapi);
+ return ERR_PTR(rc);
+}
+
+/*
+ * The pre version is done before destroying the HW objects, it only blocks
+ * off method access. All methods that require the ib_dev or the module data
+ * must test one of these assignments prior to continuing.
+ */
+void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev)
+{
+ struct uverbs_api *uapi = uverbs_dev->uapi;
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+
+ rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ if (uapi_key_is_ioctl_method(iter.index)) {
+ struct uverbs_api_ioctl_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (method_elm->driver_method)
+ rcu_assign_pointer(method_elm->handler, NULL);
+ }
+ }
+
+ synchronize_srcu(&uverbs_dev->disassociate_srcu);
+}
+
+/*
+ * Called when a driver disassociates from the ib_uverbs_device. The
+ * assumption is that the driver module will unload after. Replace everything
+ * related to the driver with NULL as a safety measure.
+ */
+void uverbs_disassociate_api(struct uverbs_api *uapi)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ if (uapi_key_is_object(iter.index)) {
+ struct uverbs_api_object *object_elm =
+ rcu_dereference_protected(*slot, true);
+
+ /*
+ * Some type_attrs are in the driver module. We don't
+ * bother to keep track of which since there should be
+ * no use of this after disassociate.
+ */
+ object_elm->type_attrs = NULL;
+ } else if (uapi_key_is_attr(iter.index)) {
+ struct uverbs_api_attr *elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (elm->spec.type == UVERBS_ATTR_TYPE_ENUM_IN)
+ elm->spec.u2.enum_def.ids = NULL;
+ }
+ }
+}
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 71580cc28c9e..11b1a194de44 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -44,13 +44,19 @@
#include <linux/in.h>
#include <linux/in6.h>
#include <net/addrconf.h>
+#include <linux/security.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
#include <rdma/rw.h>
+#include <rdma/lag.h>
#include "core_priv.h"
+#include <trace/events/rdma_core.h>
+
+static int ib_resolve_eth_dmac(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr);
static const char * const ib_events[] = {
[IB_EVENT_CQ_ERR] = "CQ error",
@@ -90,10 +96,10 @@ static const char * const wc_statuses[] = {
[IB_WC_LOC_EEC_OP_ERR] = "local EE context operation error",
[IB_WC_LOC_PROT_ERR] = "local protection error",
[IB_WC_WR_FLUSH_ERR] = "WR flushed",
- [IB_WC_MW_BIND_ERR] = "memory management operation error",
+ [IB_WC_MW_BIND_ERR] = "memory bind operation error",
[IB_WC_BAD_RESP_ERR] = "bad response error",
[IB_WC_LOC_ACCESS_ERR] = "local access error",
- [IB_WC_REM_INV_REQ_ERR] = "invalid request error",
+ [IB_WC_REM_INV_REQ_ERR] = "remote invalid request error",
[IB_WC_REM_ACCESS_ERR] = "remote access error",
[IB_WC_REM_OP_ERR] = "remote operation error",
[IB_WC_RETRY_EXC_ERR] = "transport retry counter exceeded",
@@ -120,16 +126,30 @@ EXPORT_SYMBOL(ib_wc_status_msg);
__attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
{
switch (rate) {
- case IB_RATE_2_5_GBPS: return 1;
- case IB_RATE_5_GBPS: return 2;
- case IB_RATE_10_GBPS: return 4;
- case IB_RATE_20_GBPS: return 8;
- case IB_RATE_30_GBPS: return 12;
- case IB_RATE_40_GBPS: return 16;
- case IB_RATE_60_GBPS: return 24;
- case IB_RATE_80_GBPS: return 32;
- case IB_RATE_120_GBPS: return 48;
- default: return -1;
+ case IB_RATE_2_5_GBPS: return 1;
+ case IB_RATE_5_GBPS: return 2;
+ case IB_RATE_10_GBPS: return 4;
+ case IB_RATE_20_GBPS: return 8;
+ case IB_RATE_30_GBPS: return 12;
+ case IB_RATE_40_GBPS: return 16;
+ case IB_RATE_60_GBPS: return 24;
+ case IB_RATE_80_GBPS: return 32;
+ case IB_RATE_120_GBPS: return 48;
+ case IB_RATE_14_GBPS: return 6;
+ case IB_RATE_56_GBPS: return 22;
+ case IB_RATE_112_GBPS: return 45;
+ case IB_RATE_168_GBPS: return 67;
+ case IB_RATE_25_GBPS: return 10;
+ case IB_RATE_100_GBPS: return 40;
+ case IB_RATE_200_GBPS: return 80;
+ case IB_RATE_300_GBPS: return 120;
+ case IB_RATE_28_GBPS: return 11;
+ case IB_RATE_50_GBPS: return 20;
+ case IB_RATE_400_GBPS: return 160;
+ case IB_RATE_600_GBPS: return 240;
+ case IB_RATE_800_GBPS: return 320;
+ case IB_RATE_1600_GBPS: return 640;
+ default: return -1;
}
}
EXPORT_SYMBOL(ib_rate_to_mult);
@@ -137,16 +157,30 @@ EXPORT_SYMBOL(ib_rate_to_mult);
__attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
{
switch (mult) {
- case 1: return IB_RATE_2_5_GBPS;
- case 2: return IB_RATE_5_GBPS;
- case 4: return IB_RATE_10_GBPS;
- case 8: return IB_RATE_20_GBPS;
- case 12: return IB_RATE_30_GBPS;
- case 16: return IB_RATE_40_GBPS;
- case 24: return IB_RATE_60_GBPS;
- case 32: return IB_RATE_80_GBPS;
- case 48: return IB_RATE_120_GBPS;
- default: return IB_RATE_PORT_CURRENT;
+ case 1: return IB_RATE_2_5_GBPS;
+ case 2: return IB_RATE_5_GBPS;
+ case 4: return IB_RATE_10_GBPS;
+ case 8: return IB_RATE_20_GBPS;
+ case 12: return IB_RATE_30_GBPS;
+ case 16: return IB_RATE_40_GBPS;
+ case 24: return IB_RATE_60_GBPS;
+ case 32: return IB_RATE_80_GBPS;
+ case 48: return IB_RATE_120_GBPS;
+ case 6: return IB_RATE_14_GBPS;
+ case 22: return IB_RATE_56_GBPS;
+ case 45: return IB_RATE_112_GBPS;
+ case 67: return IB_RATE_168_GBPS;
+ case 10: return IB_RATE_25_GBPS;
+ case 40: return IB_RATE_100_GBPS;
+ case 80: return IB_RATE_200_GBPS;
+ case 120: return IB_RATE_300_GBPS;
+ case 11: return IB_RATE_28_GBPS;
+ case 20: return IB_RATE_50_GBPS;
+ case 160: return IB_RATE_400_GBPS;
+ case 240: return IB_RATE_600_GBPS;
+ case 320: return IB_RATE_800_GBPS;
+ case 640: return IB_RATE_1600_GBPS;
+ default: return IB_RATE_PORT_CURRENT;
}
}
EXPORT_SYMBOL(mult_to_ib_rate);
@@ -171,55 +205,56 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
case IB_RATE_100_GBPS: return 103125;
case IB_RATE_200_GBPS: return 206250;
case IB_RATE_300_GBPS: return 309375;
+ case IB_RATE_28_GBPS: return 28125;
+ case IB_RATE_50_GBPS: return 53125;
+ case IB_RATE_400_GBPS: return 425000;
+ case IB_RATE_600_GBPS: return 637500;
+ case IB_RATE_800_GBPS: return 850000;
+ case IB_RATE_1600_GBPS: return 1700000;
default: return -1;
}
}
EXPORT_SYMBOL(ib_rate_to_mbps);
__attribute_const__ enum rdma_transport_type
-rdma_node_get_transport(enum rdma_node_type node_type)
-{
- switch (node_type) {
- case RDMA_NODE_IB_CA:
- case RDMA_NODE_IB_SWITCH:
- case RDMA_NODE_IB_ROUTER:
- return RDMA_TRANSPORT_IB;
- case RDMA_NODE_RNIC:
- return RDMA_TRANSPORT_IWARP;
- case RDMA_NODE_USNIC:
+rdma_node_get_transport(unsigned int node_type)
+{
+
+ if (node_type == RDMA_NODE_USNIC)
return RDMA_TRANSPORT_USNIC;
- case RDMA_NODE_USNIC_UDP:
+ if (node_type == RDMA_NODE_USNIC_UDP)
return RDMA_TRANSPORT_USNIC_UDP;
- default:
- BUG();
- return 0;
- }
+ if (node_type == RDMA_NODE_RNIC)
+ return RDMA_TRANSPORT_IWARP;
+ if (node_type == RDMA_NODE_UNSPECIFIED)
+ return RDMA_TRANSPORT_UNSPECIFIED;
+
+ return RDMA_TRANSPORT_IB;
}
EXPORT_SYMBOL(rdma_node_get_transport);
-enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num)
+enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
+ u32 port_num)
{
- if (device->get_link_layer)
- return device->get_link_layer(device, port_num);
+ enum rdma_transport_type lt;
+ if (device->ops.get_link_layer)
+ return device->ops.get_link_layer(device, port_num);
- switch (rdma_node_get_transport(device->node_type)) {
- case RDMA_TRANSPORT_IB:
+ lt = rdma_node_get_transport(device->node_type);
+ if (lt == RDMA_TRANSPORT_IB)
return IB_LINK_LAYER_INFINIBAND;
- case RDMA_TRANSPORT_IWARP:
- case RDMA_TRANSPORT_USNIC:
- case RDMA_TRANSPORT_USNIC_UDP:
- return IB_LINK_LAYER_ETHERNET;
- default:
- return IB_LINK_LAYER_UNSPECIFIED;
- }
+
+ return IB_LINK_LAYER_ETHERNET;
}
EXPORT_SYMBOL(rdma_port_get_link_layer);
/* Protection domains */
/**
- * ib_alloc_pd - Allocates an unused protection domain.
+ * __ib_alloc_pd - Allocates an unused protection domain.
* @device: The device on which to allocate the protection domain.
+ * @flags: protection domain flags
+ * @caller: caller's build-time module name
*
* A protection domain object provides an association between QPs, shared
* receive queues, address handles, memory regions, and memory windows.
@@ -232,18 +267,27 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
{
struct ib_pd *pd;
int mr_access_flags = 0;
+ int ret;
- pd = device->alloc_pd(device, NULL, NULL);
- if (IS_ERR(pd))
- return pd;
+ pd = rdma_zalloc_drv_obj(device, ib_pd);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
pd->device = device;
- pd->uobject = NULL;
- pd->__internal_mr = NULL;
- atomic_set(&pd->usecnt, 0);
pd->flags = flags;
- if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
+ rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD);
+ rdma_restrack_set_name(&pd->res, caller);
+
+ ret = device->ops.alloc_pd(pd, NULL);
+ if (ret) {
+ rdma_restrack_put(&pd->res);
+ kfree(pd);
+ return ERR_PTR(ret);
+ }
+ rdma_restrack_add(&pd->res);
+
+ if (device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
else
mr_access_flags |= IB_ACCESS_LOCAL_WRITE;
@@ -256,7 +300,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
if (mr_access_flags) {
struct ib_mr *mr;
- mr = pd->device->get_dma_mr(pd, mr_access_flags);
+ mr = pd->device->ops.get_dma_mr(pd, mr_access_flags);
if (IS_ERR(mr)) {
ib_dealloc_pd(pd);
return ERR_CAST(mr);
@@ -264,12 +308,13 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
mr->device = pd->device;
mr->pd = pd;
+ mr->type = IB_MR_TYPE_DMA;
mr->uobject = NULL;
mr->need_inval = false;
pd->__internal_mr = mr;
- if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY))
+ if (!(device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY))
pd->local_dma_lkey = pd->__internal_mr->lkey;
if (flags & IB_PD_UNSAFE_GLOBAL_RKEY)
@@ -281,52 +326,305 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
EXPORT_SYMBOL(__ib_alloc_pd);
/**
- * ib_dealloc_pd - Deallocates a protection domain.
+ * ib_dealloc_pd_user - Deallocates a protection domain.
* @pd: The protection domain to deallocate.
+ * @udata: Valid user data or NULL for kernel object
*
* It is an error to call this function while any resources in the pd still
* exist. The caller is responsible to synchronously destroy them and
* guarantee no new allocations will happen.
*/
-void ib_dealloc_pd(struct ib_pd *pd)
+int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
{
int ret;
if (pd->__internal_mr) {
- ret = pd->device->dereg_mr(pd->__internal_mr);
+ ret = pd->device->ops.dereg_mr(pd->__internal_mr, NULL);
WARN_ON(ret);
pd->__internal_mr = NULL;
}
- /* uverbs manipulates usecnt with proper locking, while the kabi
- requires the caller to guarantee we can't race here. */
- WARN_ON(atomic_read(&pd->usecnt));
+ ret = pd->device->ops.dealloc_pd(pd, udata);
+ if (ret)
+ return ret;
- /* Making delalloc_pd a void return is a WIP, no driver should return
- an error here. */
- ret = pd->device->dealloc_pd(pd);
- WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
+ rdma_restrack_del(&pd->res);
+ kfree(pd);
+ return ret;
}
-EXPORT_SYMBOL(ib_dealloc_pd);
+EXPORT_SYMBOL(ib_dealloc_pd_user);
/* Address handles */
-struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+/**
+ * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination.
+ * @dest: Pointer to destination ah_attr. Contents of the destination
+ * pointer is assumed to be invalid and attribute are overwritten.
+ * @src: Pointer to source ah_attr.
+ */
+void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
+ const struct rdma_ah_attr *src)
+{
+ *dest = *src;
+ if (dest->grh.sgid_attr)
+ rdma_hold_gid_attr(dest->grh.sgid_attr);
+}
+EXPORT_SYMBOL(rdma_copy_ah_attr);
+
+/**
+ * rdma_replace_ah_attr - Replace valid ah_attr with new one.
+ * @old: Pointer to existing ah_attr which needs to be replaced.
+ * old is assumed to be valid or zero'd
+ * @new: Pointer to the new ah_attr.
+ *
+ * rdma_replace_ah_attr() first releases any reference in the old ah_attr if
+ * old the ah_attr is valid; after that it copies the new attribute and holds
+ * the reference to the replaced ah_attr.
+ */
+void rdma_replace_ah_attr(struct rdma_ah_attr *old,
+ const struct rdma_ah_attr *new)
+{
+ rdma_destroy_ah_attr(old);
+ *old = *new;
+ if (old->grh.sgid_attr)
+ rdma_hold_gid_attr(old->grh.sgid_attr);
+}
+EXPORT_SYMBOL(rdma_replace_ah_attr);
+
+/**
+ * rdma_move_ah_attr - Move ah_attr pointed by source to destination.
+ * @dest: Pointer to destination ah_attr to copy to.
+ * dest is assumed to be valid or zero'd
+ * @src: Pointer to the new ah_attr.
+ *
+ * rdma_move_ah_attr() first releases any reference in the destination ah_attr
+ * if it is valid. This also transfers ownership of internal references from
+ * src to dest, making src invalid in the process. No new reference of the src
+ * ah_attr is taken.
+ */
+void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src)
+{
+ rdma_destroy_ah_attr(dest);
+ *dest = *src;
+ src->grh.sgid_attr = NULL;
+}
+EXPORT_SYMBOL(rdma_move_ah_attr);
+
+/*
+ * Validate that the rdma_ah_attr is valid for the device before passing it
+ * off to the driver.
+ */
+static int rdma_check_ah_attr(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr)
+{
+ if (!rdma_is_port_valid(device, ah_attr->port_num))
+ return -EINVAL;
+
+ if ((rdma_is_grh_required(device, ah_attr->port_num) ||
+ ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) &&
+ !(ah_attr->ah_flags & IB_AH_GRH))
+ return -EINVAL;
+
+ if (ah_attr->grh.sgid_attr) {
+ /*
+ * Make sure the passed sgid_attr is consistent with the
+ * parameters
+ */
+ if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index ||
+ ah_attr->grh.sgid_attr->port_num != ah_attr->port_num)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * If the ah requires a GRH then ensure that sgid_attr pointer is filled in.
+ * On success the caller is responsible to call rdma_unfill_sgid_attr().
+ */
+static int rdma_fill_sgid_attr(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr **old_sgid_attr)
{
+ const struct ib_gid_attr *sgid_attr;
+ struct ib_global_route *grh;
+ int ret;
+
+ *old_sgid_attr = ah_attr->grh.sgid_attr;
+
+ ret = rdma_check_ah_attr(device, ah_attr);
+ if (ret)
+ return ret;
+
+ if (!(ah_attr->ah_flags & IB_AH_GRH))
+ return 0;
+
+ grh = rdma_ah_retrieve_grh(ah_attr);
+ if (grh->sgid_attr)
+ return 0;
+
+ sgid_attr =
+ rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index);
+ if (IS_ERR(sgid_attr))
+ return PTR_ERR(sgid_attr);
+
+ /* Move ownerhip of the kref into the ah_attr */
+ grh->sgid_attr = sgid_attr;
+ return 0;
+}
+
+static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr *old_sgid_attr)
+{
+ /*
+ * Fill didn't change anything, the caller retains ownership of
+ * whatever it passed
+ */
+ if (ah_attr->grh.sgid_attr == old_sgid_attr)
+ return;
+
+ /*
+ * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller
+ * doesn't see any change in the rdma_ah_attr. If we get here
+ * old_sgid_attr is NULL.
+ */
+ rdma_destroy_ah_attr(ah_attr);
+}
+
+static const struct ib_gid_attr *
+rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr *old_attr)
+{
+ if (old_attr)
+ rdma_put_gid_attr(old_attr);
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ rdma_hold_gid_attr(ah_attr->grh.sgid_attr);
+ return ah_attr->grh.sgid_attr;
+ }
+ return NULL;
+}
+
+static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
+ u32 flags,
+ struct ib_udata *udata,
+ struct net_device *xmit_slave)
+{
+ struct rdma_ah_init_attr init_attr = {};
+ struct ib_device *device = pd->device;
struct ib_ah *ah;
+ int ret;
- ah = pd->device->create_ah(pd, ah_attr, NULL);
+ might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE);
- if (!IS_ERR(ah)) {
- ah->device = pd->device;
- ah->pd = pd;
- ah->uobject = NULL;
- atomic_inc(&pd->usecnt);
+ if (!udata && !device->ops.create_ah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ah = rdma_zalloc_drv_obj_gfp(
+ device, ib_ah,
+ (flags & RDMA_CREATE_AH_SLEEPABLE) ? GFP_KERNEL : GFP_ATOMIC);
+ if (!ah)
+ return ERR_PTR(-ENOMEM);
+
+ ah->device = device;
+ ah->pd = pd;
+ ah->type = ah_attr->type;
+ ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL);
+ init_attr.ah_attr = ah_attr;
+ init_attr.flags = flags;
+ init_attr.xmit_slave = xmit_slave;
+
+ if (udata)
+ ret = device->ops.create_user_ah(ah, &init_attr, udata);
+ else
+ ret = device->ops.create_ah(ah, &init_attr, NULL);
+ if (ret) {
+ if (ah->sgid_attr)
+ rdma_put_gid_attr(ah->sgid_attr);
+ kfree(ah);
+ return ERR_PTR(ret);
}
+ atomic_inc(&pd->usecnt);
+ return ah;
+}
+
+/**
+ * rdma_create_ah - Creates an address handle for the
+ * given address vector.
+ * @pd: The protection domain associated with the address handle.
+ * @ah_attr: The attributes of the address vector.
+ * @flags: Create address handle flags (see enum rdma_create_ah_flags).
+ *
+ * It returns 0 on success and returns appropriate error code on error.
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
+struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
+ u32 flags)
+{
+ const struct ib_gid_attr *old_sgid_attr;
+ struct net_device *slave;
+ struct ib_ah *ah;
+ int ret;
+
+ ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
+ if (ret)
+ return ERR_PTR(ret);
+ slave = rdma_lag_get_ah_roce_slave(pd->device, ah_attr,
+ (flags & RDMA_CREATE_AH_SLEEPABLE) ?
+ GFP_KERNEL : GFP_ATOMIC);
+ if (IS_ERR(slave)) {
+ rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+ return ERR_CAST(slave);
+ }
+ ah = _rdma_create_ah(pd, ah_attr, flags, NULL, slave);
+ rdma_lag_put_ah_roce_slave(slave);
+ rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+ return ah;
+}
+EXPORT_SYMBOL(rdma_create_ah);
+
+/**
+ * rdma_create_user_ah - Creates an address handle for the
+ * given address vector.
+ * It resolves destination mac address for ah attribute of RoCE type.
+ * @pd: The protection domain associated with the address handle.
+ * @ah_attr: The attributes of the address vector.
+ * @udata: pointer to user's input output buffer information need by
+ * provider driver.
+ *
+ * It returns 0 on success and returns appropriate error code on error.
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
+struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
+ struct ib_udata *udata)
+{
+ const struct ib_gid_attr *old_sgid_attr;
+ struct ib_ah *ah;
+ int err;
+
+ err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
+ if (err)
+ return ERR_PTR(err);
+
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ err = ib_resolve_eth_dmac(pd->device, ah_attr);
+ if (err) {
+ ah = ERR_PTR(err);
+ goto out;
+ }
+ }
+
+ ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE,
+ udata, NULL);
+
+out:
+ rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
return ah;
}
-EXPORT_SYMBOL(ib_create_ah);
+EXPORT_SYMBOL(rdma_create_user_ah);
int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
{
@@ -362,7 +660,7 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
EXPORT_SYMBOL(ib_get_rdma_header_version);
static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
- u8 port_num,
+ u32 port_num,
const struct ib_grh *grh)
{
int grh_version;
@@ -390,30 +688,30 @@ static bool find_gid_index(const union ib_gid *gid,
const struct ib_gid_attr *gid_attr,
void *context)
{
- struct find_gid_index_context *ctx =
- (struct find_gid_index_context *)context;
+ struct find_gid_index_context *ctx = context;
+ u16 vlan_id = 0xffff;
+ int ret;
if (ctx->gid_type != gid_attr->gid_type)
return false;
- if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
- (is_vlan_dev(gid_attr->ndev) &&
- vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
+ ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
+ if (ret)
return false;
- return true;
+ return ctx->vlan_id == vlan_id;
}
-static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
- u16 vlan_id, const union ib_gid *sgid,
- enum ib_gid_type gid_type,
- u16 *gid_index)
+static const struct ib_gid_attr *
+get_sgid_attr_from_eth(struct ib_device *device, u32 port_num,
+ u16 vlan_id, const union ib_gid *sgid,
+ enum ib_gid_type gid_type)
{
struct find_gid_index_context context = {.vlan_id = vlan_id,
.gid_type = gid_type};
- return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
- &context, gid_index);
+ return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index,
+ &context);
}
int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
@@ -440,7 +738,7 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
(struct in6_addr *)dgid);
return 0;
} else if (net_type == RDMA_NETWORK_IPV6 ||
- net_type == RDMA_NETWORK_IB) {
+ net_type == RDMA_NETWORK_IB || RDMA_NETWORK_ROCE_V1) {
*dgid = hdr->ibgrh.dgid;
*sgid = hdr->ibgrh.sgid;
return 0;
@@ -450,20 +748,65 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
}
EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
-int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
- const struct ib_wc *wc, const struct ib_grh *grh,
- struct ib_ah_attr *ah_attr)
+/* Resolve destination mac address and hop limit for unicast destination
+ * GID entry, considering the source GID entry as well.
+ * ah_attribute must have valid port_num, sgid_index.
+ */
+static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr)
+{
+ struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr);
+ const struct ib_gid_attr *sgid_attr = grh->sgid_attr;
+ int hop_limit = 0xff;
+ int ret = 0;
+
+ /* If destination is link local and source GID is RoCEv1,
+ * IP stack is not used.
+ */
+ if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) &&
+ sgid_attr->gid_type == IB_GID_TYPE_ROCE) {
+ rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
+ ah_attr->roce.dmac);
+ return ret;
+ }
+
+ ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
+ ah_attr->roce.dmac,
+ sgid_attr, &hop_limit);
+
+ grh->hop_limit = hop_limit;
+ return ret;
+}
+
+/*
+ * This function initializes address handle attributes from the incoming packet.
+ * Incoming packet has dgid of the receiver node on which this code is
+ * getting executed and, sgid contains the GID of the sender.
+ *
+ * When resolving mac address of destination, the arrived dgid is used
+ * as sgid and, sgid is used as dgid because sgid contains destinations
+ * GID whom to respond to.
+ *
+ * On success the caller is responsible to call rdma_destroy_ah_attr on the
+ * attr.
+ */
+int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num,
+ const struct ib_wc *wc, const struct ib_grh *grh,
+ struct rdma_ah_attr *ah_attr)
{
u32 flow_class;
- u16 gid_index;
int ret;
enum rdma_network_type net_type = RDMA_NETWORK_IB;
enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ const struct ib_gid_attr *sgid_attr;
int hoplimit = 0xff;
union ib_gid dgid;
union ib_gid sgid;
+ might_sleep();
+
memset(ah_attr, 0, sizeof *ah_attr);
+ ah_attr->type = rdma_ah_find_type(device, port_num);
if (rdma_cap_eth_ah(device, port_num)) {
if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
net_type = wc->network_hdr_type;
@@ -476,232 +819,311 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
if (ret)
return ret;
+ rdma_ah_set_sl(ah_attr, wc->sl);
+ rdma_ah_set_port_num(ah_attr, port_num);
+
if (rdma_protocol_roce(device, port_num)) {
- int if_index = 0;
u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
wc->vlan_id : 0xffff;
- struct net_device *idev;
- struct net_device *resolved_dev;
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
- if (!device->get_netdev)
- return -EOPNOTSUPP;
+ sgid_attr = get_sgid_attr_from_eth(device, port_num,
+ vlan_id, &dgid,
+ gid_type);
+ if (IS_ERR(sgid_attr))
+ return PTR_ERR(sgid_attr);
- idev = device->get_netdev(device, port_num);
- if (!idev)
- return -ENODEV;
+ flow_class = be32_to_cpu(grh->version_tclass_flow);
+ rdma_move_grh_sgid_attr(ah_attr,
+ &sgid,
+ flow_class & 0xFFFFF,
+ hoplimit,
+ (flow_class >> 20) & 0xFF,
+ sgid_attr);
+
+ ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
+ if (ret)
+ rdma_destroy_ah_attr(ah_attr);
- ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
- ah_attr->dmac,
- wc->wc_flags & IB_WC_WITH_VLAN ?
- NULL : &vlan_id,
- &if_index, &hoplimit);
- if (ret) {
- dev_put(idev);
- return ret;
- }
+ return ret;
+ } else {
+ rdma_ah_set_dlid(ah_attr, wc->slid);
+ rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
- resolved_dev = dev_get_by_index(&init_net, if_index);
- if (resolved_dev->flags & IFF_LOOPBACK) {
- dev_put(resolved_dev);
- resolved_dev = idev;
- dev_hold(resolved_dev);
- }
- rcu_read_lock();
- if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
- resolved_dev))
- ret = -EHOSTUNREACH;
- rcu_read_unlock();
- dev_put(idev);
- dev_put(resolved_dev);
- if (ret)
- return ret;
+ if ((wc->wc_flags & IB_WC_GRH) == 0)
+ return 0;
- ret = get_sgid_index_from_eth(device, port_num, vlan_id,
- &dgid, gid_type, &gid_index);
- if (ret)
- return ret;
+ if (dgid.global.interface_id !=
+ cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
+ sgid_attr = rdma_find_gid_by_port(
+ device, &dgid, IB_GID_TYPE_IB, port_num, NULL);
+ } else
+ sgid_attr = rdma_get_gid_attr(device, port_num, 0);
+
+ if (IS_ERR(sgid_attr))
+ return PTR_ERR(sgid_attr);
+ flow_class = be32_to_cpu(grh->version_tclass_flow);
+ rdma_move_grh_sgid_attr(ah_attr,
+ &sgid,
+ flow_class & 0xFFFFF,
+ hoplimit,
+ (flow_class >> 20) & 0xFF,
+ sgid_attr);
+
+ return 0;
}
+}
+EXPORT_SYMBOL(ib_init_ah_attr_from_wc);
- ah_attr->dlid = wc->slid;
- ah_attr->sl = wc->sl;
- ah_attr->src_path_bits = wc->dlid_path_bits;
- ah_attr->port_num = port_num;
-
- if (wc->wc_flags & IB_WC_GRH) {
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = sgid;
-
- if (!rdma_cap_eth_ah(device, port_num)) {
- if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
- ret = ib_find_cached_gid_by_port(device, &dgid,
- IB_GID_TYPE_IB,
- port_num, NULL,
- &gid_index);
- if (ret)
- return ret;
- } else {
- gid_index = 0;
- }
- }
+/**
+ * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership
+ * of the reference
+ *
+ * @attr: Pointer to AH attribute structure
+ * @dgid: Destination GID
+ * @flow_label: Flow label
+ * @hop_limit: Hop limit
+ * @traffic_class: traffic class
+ * @sgid_attr: Pointer to SGID attribute
+ *
+ * This takes ownership of the sgid_attr reference. The caller must ensure
+ * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after
+ * calling this function.
+ */
+void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
+ u32 flow_label, u8 hop_limit, u8 traffic_class,
+ const struct ib_gid_attr *sgid_attr)
+{
+ rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit,
+ traffic_class);
+ attr->grh.sgid_attr = sgid_attr;
+}
+EXPORT_SYMBOL(rdma_move_grh_sgid_attr);
- ah_attr->grh.sgid_index = (u8) gid_index;
- flow_class = be32_to_cpu(grh->version_tclass_flow);
- ah_attr->grh.flow_label = flow_class & 0xFFFFF;
- ah_attr->grh.hop_limit = hoplimit;
- ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
+/**
+ * rdma_destroy_ah_attr - Release reference to SGID attribute of
+ * ah attribute.
+ * @ah_attr: Pointer to ah attribute
+ *
+ * Release reference to the SGID attribute of the ah attribute if it is
+ * non NULL. It is safe to call this multiple times, and safe to call it on
+ * a zero initialized ah_attr.
+ */
+void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr)
+{
+ if (ah_attr->grh.sgid_attr) {
+ rdma_put_gid_attr(ah_attr->grh.sgid_attr);
+ ah_attr->grh.sgid_attr = NULL;
}
- return 0;
}
-EXPORT_SYMBOL(ib_init_ah_from_wc);
+EXPORT_SYMBOL(rdma_destroy_ah_attr);
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
- const struct ib_grh *grh, u8 port_num)
+ const struct ib_grh *grh, u32 port_num)
{
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
+ struct ib_ah *ah;
int ret;
- ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
+ ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr);
if (ret)
return ERR_PTR(ret);
- return ib_create_ah(pd, &ah_attr);
+ ah = rdma_create_ah(pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE);
+
+ rdma_destroy_ah_attr(&ah_attr);
+ return ah;
}
EXPORT_SYMBOL(ib_create_ah_from_wc);
-int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
- return ah->device->modify_ah ?
- ah->device->modify_ah(ah, ah_attr) :
- -ENOSYS;
+ const struct ib_gid_attr *old_sgid_attr;
+ int ret;
+
+ if (ah->type != ah_attr->type)
+ return -EINVAL;
+
+ ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr);
+ if (ret)
+ return ret;
+
+ ret = ah->device->ops.modify_ah ?
+ ah->device->ops.modify_ah(ah, ah_attr) :
+ -EOPNOTSUPP;
+
+ ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr);
+ rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+ return ret;
}
-EXPORT_SYMBOL(ib_modify_ah);
+EXPORT_SYMBOL(rdma_modify_ah);
-int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
+int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
- return ah->device->query_ah ?
- ah->device->query_ah(ah, ah_attr) :
- -ENOSYS;
+ ah_attr->grh.sgid_attr = NULL;
+
+ return ah->device->ops.query_ah ?
+ ah->device->ops.query_ah(ah, ah_attr) :
+ -EOPNOTSUPP;
}
-EXPORT_SYMBOL(ib_query_ah);
+EXPORT_SYMBOL(rdma_query_ah);
-int ib_destroy_ah(struct ib_ah *ah)
+int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata)
{
+ const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
struct ib_pd *pd;
int ret;
+ might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE);
+
pd = ah->pd;
- ret = ah->device->destroy_ah(ah);
- if (!ret)
- atomic_dec(&pd->usecnt);
+ ret = ah->device->ops.destroy_ah(ah, flags);
+ if (ret)
+ return ret;
+
+ atomic_dec(&pd->usecnt);
+ if (sgid_attr)
+ rdma_put_gid_attr(sgid_attr);
+
+ kfree(ah);
return ret;
}
-EXPORT_SYMBOL(ib_destroy_ah);
+EXPORT_SYMBOL(rdma_destroy_ah_user);
/* Shared receive queues */
-struct ib_srq *ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *srq_init_attr)
+/**
+ * ib_create_srq_user - Creates a SRQ associated with the specified protection
+ * domain.
+ * @pd: The protection domain associated with the SRQ.
+ * @srq_init_attr: A list of initial attributes required to create the
+ * SRQ. If SRQ creation succeeds, then the attributes are updated to
+ * the actual capabilities of the created SRQ.
+ * @uobject: uobject pointer if this is not a kernel SRQ
+ * @udata: udata pointer if this is not a kernel SRQ
+ *
+ * srq_attr->max_wr and srq_attr->max_sge are read the determine the
+ * requested size of the SRQ, and set to the actual values allocated
+ * on return. If ib_create_srq() succeeds, then max_wr and max_sge
+ * will always be at least as large as the requested values.
+ */
+struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_usrq_object *uobject,
+ struct ib_udata *udata)
{
struct ib_srq *srq;
+ int ret;
+
+ srq = rdma_zalloc_drv_obj(pd->device, ib_srq);
+ if (!srq)
+ return ERR_PTR(-ENOMEM);
+
+ srq->device = pd->device;
+ srq->pd = pd;
+ srq->event_handler = srq_init_attr->event_handler;
+ srq->srq_context = srq_init_attr->srq_context;
+ srq->srq_type = srq_init_attr->srq_type;
+ srq->uobject = uobject;
- if (!pd->device->create_srq)
- return ERR_PTR(-ENOSYS);
-
- srq = pd->device->create_srq(pd, srq_init_attr, NULL);
-
- if (!IS_ERR(srq)) {
- srq->device = pd->device;
- srq->pd = pd;
- srq->uobject = NULL;
- srq->event_handler = srq_init_attr->event_handler;
- srq->srq_context = srq_init_attr->srq_context;
- srq->srq_type = srq_init_attr->srq_type;
- if (srq->srq_type == IB_SRQT_XRC) {
- srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
- srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq;
+ if (ib_srq_has_cq(srq->srq_type)) {
+ srq->ext.cq = srq_init_attr->ext.cq;
+ atomic_inc(&srq->ext.cq->usecnt);
+ }
+ if (srq->srq_type == IB_SRQT_XRC) {
+ srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
+ if (srq->ext.xrc.xrcd)
atomic_inc(&srq->ext.xrc.xrcd->usecnt);
- atomic_inc(&srq->ext.xrc.cq->usecnt);
- }
- atomic_inc(&pd->usecnt);
- atomic_set(&srq->usecnt, 0);
}
+ atomic_inc(&pd->usecnt);
+
+ rdma_restrack_new(&srq->res, RDMA_RESTRACK_SRQ);
+ rdma_restrack_parent_name(&srq->res, &pd->res);
+
+ ret = pd->device->ops.create_srq(srq, srq_init_attr, udata);
+ if (ret) {
+ rdma_restrack_put(&srq->res);
+ atomic_dec(&pd->usecnt);
+ if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd)
+ atomic_dec(&srq->ext.xrc.xrcd->usecnt);
+ if (ib_srq_has_cq(srq->srq_type))
+ atomic_dec(&srq->ext.cq->usecnt);
+ kfree(srq);
+ return ERR_PTR(ret);
+ }
+
+ rdma_restrack_add(&srq->res);
return srq;
}
-EXPORT_SYMBOL(ib_create_srq);
+EXPORT_SYMBOL(ib_create_srq_user);
int ib_modify_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr,
enum ib_srq_attr_mask srq_attr_mask)
{
- return srq->device->modify_srq ?
- srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) :
- -ENOSYS;
+ return srq->device->ops.modify_srq ?
+ srq->device->ops.modify_srq(srq, srq_attr, srq_attr_mask,
+ NULL) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_modify_srq);
int ib_query_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr)
{
- return srq->device->query_srq ?
- srq->device->query_srq(srq, srq_attr) : -ENOSYS;
+ return srq->device->ops.query_srq ?
+ srq->device->ops.query_srq(srq, srq_attr) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_query_srq);
-int ib_destroy_srq(struct ib_srq *srq)
+int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata)
{
- struct ib_pd *pd;
- enum ib_srq_type srq_type;
- struct ib_xrcd *uninitialized_var(xrcd);
- struct ib_cq *uninitialized_var(cq);
int ret;
if (atomic_read(&srq->usecnt))
return -EBUSY;
- pd = srq->pd;
- srq_type = srq->srq_type;
- if (srq_type == IB_SRQT_XRC) {
- xrcd = srq->ext.xrc.xrcd;
- cq = srq->ext.xrc.cq;
- }
+ ret = srq->device->ops.destroy_srq(srq, udata);
+ if (ret)
+ return ret;
- ret = srq->device->destroy_srq(srq);
- if (!ret) {
- atomic_dec(&pd->usecnt);
- if (srq_type == IB_SRQT_XRC) {
- atomic_dec(&xrcd->usecnt);
- atomic_dec(&cq->usecnt);
- }
- }
+ atomic_dec(&srq->pd->usecnt);
+ if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd)
+ atomic_dec(&srq->ext.xrc.xrcd->usecnt);
+ if (ib_srq_has_cq(srq->srq_type))
+ atomic_dec(&srq->ext.cq->usecnt);
+ rdma_restrack_del(&srq->res);
+ kfree(srq);
return ret;
}
-EXPORT_SYMBOL(ib_destroy_srq);
+EXPORT_SYMBOL(ib_destroy_srq_user);
/* Queue pairs */
+static void __ib_qp_event_handler(struct ib_event *event, void *context)
+{
+ struct ib_qp *qp = event->element.qp;
+
+ if (event->event == IB_EVENT_QP_LAST_WQE_REACHED)
+ complete(&qp->srq_completion);
+ if (qp->registered_event_handler)
+ qp->registered_event_handler(event, qp->qp_context);
+}
+
static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
{
struct ib_qp *qp = context;
unsigned long flags;
- spin_lock_irqsave(&qp->device->event_handler_lock, flags);
+ spin_lock_irqsave(&qp->device->qp_open_list_lock, flags);
list_for_each_entry(event->element.qp, &qp->open_list, open_list)
if (event->element.qp->event_handler)
event->element.qp->event_handler(event, event->element.qp->qp_context);
- spin_unlock_irqrestore(&qp->device->event_handler_lock, flags);
-}
-
-static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
-{
- mutex_lock(&xrcd->tgt_qp_mutex);
- list_add(&qp->xrcd_list, &xrcd->tgt_qp_list);
- mutex_unlock(&xrcd->tgt_qp_mutex);
+ spin_unlock_irqrestore(&qp->device->qp_open_list_lock, flags);
}
static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
@@ -710,12 +1132,20 @@ static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
{
struct ib_qp *qp;
unsigned long flags;
+ int err;
qp = kzalloc(sizeof *qp, GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
qp->real_qp = real_qp;
+ err = ib_open_shared_qp_security(qp, real_qp->device);
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ qp->real_qp = real_qp;
atomic_inc(&real_qp->usecnt);
qp->device = real_qp->device;
qp->event_handler = event_handler;
@@ -723,9 +1153,9 @@ static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
qp->qp_num = real_qp->qp_num;
qp->qp_type = real_qp->qp_type;
- spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+ spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags);
list_add(&qp->open_list, &real_qp->open_list);
- spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+ spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags);
return qp;
}
@@ -738,24 +1168,24 @@ struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
if (qp_open_attr->qp_type != IB_QPT_XRC_TGT)
return ERR_PTR(-EINVAL);
- qp = ERR_PTR(-EINVAL);
- mutex_lock(&xrcd->tgt_qp_mutex);
- list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) {
- if (real_qp->qp_num == qp_open_attr->qp_num) {
- qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
- qp_open_attr->qp_context);
- break;
- }
+ down_read(&xrcd->tgt_qps_rwsem);
+ real_qp = xa_load(&xrcd->tgt_qps, qp_open_attr->qp_num);
+ if (!real_qp) {
+ up_read(&xrcd->tgt_qps_rwsem);
+ return ERR_PTR(-EINVAL);
}
- mutex_unlock(&xrcd->tgt_qp_mutex);
+ qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
+ qp_open_attr->qp_context);
+ up_read(&xrcd->tgt_qps_rwsem);
return qp;
}
EXPORT_SYMBOL(ib_open_qp);
-static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp,
- struct ib_qp_init_attr *qp_init_attr)
+static struct ib_qp *create_xrc_qp_user(struct ib_qp *qp,
+ struct ib_qp_init_attr *qp_init_attr)
{
struct ib_qp *real_qp = qp;
+ int err;
qp->event_handler = __ib_shared_qp_event_handler;
qp->qp_context = qp;
@@ -768,85 +1198,179 @@ static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp,
qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
qp_init_attr->qp_context);
- if (!IS_ERR(qp))
- __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
- else
- real_qp->device->destroy_qp(real_qp);
+ if (IS_ERR(qp))
+ return qp;
+
+ err = xa_err(xa_store(&qp_init_attr->xrcd->tgt_qps, real_qp->qp_num,
+ real_qp, GFP_KERNEL));
+ if (err) {
+ ib_close_qp(qp);
+ return ERR_PTR(err);
+ }
return qp;
}
-struct ib_qp *ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *qp_init_attr)
+static struct ib_qp *create_qp(struct ib_device *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata,
+ struct ib_uqp_object *uobj, const char *caller)
{
- struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device;
+ struct ib_udata dummy = {};
struct ib_qp *qp;
int ret;
- if (qp_init_attr->rwq_ind_tbl &&
- (qp_init_attr->recv_cq ||
- qp_init_attr->srq || qp_init_attr->cap.max_recv_wr ||
- qp_init_attr->cap.max_recv_sge))
- return ERR_PTR(-EINVAL);
+ if (!dev->ops.create_qp)
+ return ERR_PTR(-EOPNOTSUPP);
- /*
- * If the callers is using the RDMA API calculate the resources
- * needed for the RDMA READ/WRITE operations.
- *
- * Note that these callers need to pass in a port number.
- */
- if (qp_init_attr->cap.max_rdma_ctxs)
- rdma_rw_init_qp(device, qp_init_attr);
+ qp = rdma_zalloc_drv_obj_numa(dev, ib_qp);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
- qp = device->create_qp(pd, qp_init_attr, NULL);
- if (IS_ERR(qp))
- return qp;
+ qp->device = dev;
+ qp->pd = pd;
+ qp->uobject = uobj;
+ qp->real_qp = qp;
- qp->device = device;
- qp->real_qp = qp;
- qp->uobject = NULL;
- qp->qp_type = qp_init_attr->qp_type;
- qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl;
+ qp->qp_type = attr->qp_type;
+ qp->rwq_ind_tbl = attr->rwq_ind_tbl;
+ qp->srq = attr->srq;
+ qp->event_handler = __ib_qp_event_handler;
+ qp->registered_event_handler = attr->event_handler;
+ qp->port = attr->port_num;
+ qp->qp_context = attr->qp_context;
- atomic_set(&qp->usecnt, 0);
- qp->mrs_used = 0;
spin_lock_init(&qp->mr_lock);
INIT_LIST_HEAD(&qp->rdma_mrs);
INIT_LIST_HEAD(&qp->sig_mrs);
+ init_completion(&qp->srq_completion);
- if (qp_init_attr->qp_type == IB_QPT_XRC_TGT)
- return ib_create_xrc_qp(qp, qp_init_attr);
+ qp->send_cq = attr->send_cq;
+ qp->recv_cq = attr->recv_cq;
- qp->event_handler = qp_init_attr->event_handler;
- qp->qp_context = qp_init_attr->qp_context;
- if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
- qp->recv_cq = NULL;
- qp->srq = NULL;
- } else {
- qp->recv_cq = qp_init_attr->recv_cq;
- if (qp_init_attr->recv_cq)
- atomic_inc(&qp_init_attr->recv_cq->usecnt);
- qp->srq = qp_init_attr->srq;
- if (qp->srq)
- atomic_inc(&qp_init_attr->srq->usecnt);
+ rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP);
+ WARN_ONCE(!udata && !caller, "Missing kernel QP owner");
+ rdma_restrack_set_name(&qp->res, udata ? NULL : caller);
+ ret = dev->ops.create_qp(qp, attr, udata);
+ if (ret)
+ goto err_create;
+
+ /*
+ * TODO: The mlx4 internally overwrites send_cq and recv_cq.
+ * Unfortunately, it is not an easy task to fix that driver.
+ */
+ qp->send_cq = attr->send_cq;
+ qp->recv_cq = attr->recv_cq;
+
+ ret = ib_create_qp_security(qp, dev);
+ if (ret)
+ goto err_security;
+
+ rdma_restrack_add(&qp->res);
+ return qp;
+
+err_security:
+ qp->device->ops.destroy_qp(qp, udata ? &dummy : NULL);
+err_create:
+ rdma_restrack_put(&qp->res);
+ kfree(qp);
+ return ERR_PTR(ret);
+
+}
+
+/**
+ * ib_create_qp_user - Creates a QP associated with the specified protection
+ * domain.
+ * @dev: IB device
+ * @pd: The protection domain associated with the QP.
+ * @attr: A list of initial attributes required to create the
+ * QP. If QP creation succeeds, then the attributes are updated to
+ * the actual capabilities of the created QP.
+ * @udata: User data
+ * @uobj: uverbs obect
+ * @caller: caller's build-time module name
+ */
+struct ib_qp *ib_create_qp_user(struct ib_device *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata,
+ struct ib_uqp_object *uobj, const char *caller)
+{
+ struct ib_qp *qp, *xrc_qp;
+
+ if (attr->qp_type == IB_QPT_XRC_TGT)
+ qp = create_qp(dev, pd, attr, NULL, NULL, caller);
+ else
+ qp = create_qp(dev, pd, attr, udata, uobj, NULL);
+ if (attr->qp_type != IB_QPT_XRC_TGT || IS_ERR(qp))
+ return qp;
+
+ xrc_qp = create_xrc_qp_user(qp, attr);
+ if (IS_ERR(xrc_qp)) {
+ ib_destroy_qp(qp);
+ return xrc_qp;
}
- qp->pd = pd;
- qp->send_cq = qp_init_attr->send_cq;
- qp->xrcd = NULL;
+ xrc_qp->uobject = uobj;
+ return xrc_qp;
+}
+EXPORT_SYMBOL(ib_create_qp_user);
- atomic_inc(&pd->usecnt);
- if (qp_init_attr->send_cq)
- atomic_inc(&qp_init_attr->send_cq->usecnt);
- if (qp_init_attr->rwq_ind_tbl)
+void ib_qp_usecnt_inc(struct ib_qp *qp)
+{
+ if (qp->pd)
+ atomic_inc(&qp->pd->usecnt);
+ if (qp->send_cq)
+ atomic_inc(&qp->send_cq->usecnt);
+ if (qp->recv_cq)
+ atomic_inc(&qp->recv_cq->usecnt);
+ if (qp->srq)
+ atomic_inc(&qp->srq->usecnt);
+ if (qp->rwq_ind_tbl)
atomic_inc(&qp->rwq_ind_tbl->usecnt);
+}
+EXPORT_SYMBOL(ib_qp_usecnt_inc);
+
+void ib_qp_usecnt_dec(struct ib_qp *qp)
+{
+ if (qp->rwq_ind_tbl)
+ atomic_dec(&qp->rwq_ind_tbl->usecnt);
+ if (qp->srq)
+ atomic_dec(&qp->srq->usecnt);
+ if (qp->recv_cq)
+ atomic_dec(&qp->recv_cq->usecnt);
+ if (qp->send_cq)
+ atomic_dec(&qp->send_cq->usecnt);
+ if (qp->pd)
+ atomic_dec(&qp->pd->usecnt);
+}
+EXPORT_SYMBOL(ib_qp_usecnt_dec);
+
+struct ib_qp *ib_create_qp_kernel(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ const char *caller)
+{
+ struct ib_device *device = pd->device;
+ struct ib_qp *qp;
+ int ret;
+
+ /*
+ * If the callers is using the RDMA API calculate the resources
+ * needed for the RDMA READ/WRITE operations.
+ *
+ * Note that these callers need to pass in a port number.
+ */
+ if (qp_init_attr->cap.max_rdma_ctxs)
+ rdma_rw_init_qp(device, qp_init_attr);
+
+ qp = create_qp(device, pd, qp_init_attr, NULL, NULL, caller);
+ if (IS_ERR(qp))
+ return qp;
+
+ ib_qp_usecnt_inc(qp);
if (qp_init_attr->cap.max_rdma_ctxs) {
ret = rdma_rw_init_mrs(qp, qp_init_attr);
- if (ret) {
- pr_err("failed to init MR pool ret= %d\n", ret);
- ib_destroy_qp(qp);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto err;
}
/*
@@ -857,10 +1381,17 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd,
qp->max_write_sge = qp_init_attr->cap.max_send_sge;
qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge,
device->attrs.max_sge_rd);
+ if (qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN)
+ qp->integrity_en = true;
return qp;
+
+err:
+ ib_destroy_qp(qp);
+ return ERR_PTR(ret);
+
}
-EXPORT_SYMBOL(ib_create_qp);
+EXPORT_SYMBOL(ib_create_qp_kernel);
static const struct {
int valid;
@@ -1169,99 +1700,328 @@ static const struct {
}
};
-int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
- enum ib_qp_type type, enum ib_qp_attr_mask mask,
- enum rdma_link_layer ll)
+bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
+ enum ib_qp_type type, enum ib_qp_attr_mask mask)
{
enum ib_qp_attr_mask req_param, opt_param;
- if (cur_state < 0 || cur_state > IB_QPS_ERR ||
- next_state < 0 || next_state > IB_QPS_ERR)
- return 0;
-
if (mask & IB_QP_CUR_STATE &&
cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
- return 0;
+ return false;
if (!qp_state_table[cur_state][next_state].valid)
- return 0;
+ return false;
req_param = qp_state_table[cur_state][next_state].req_param[type];
opt_param = qp_state_table[cur_state][next_state].opt_param[type];
if ((mask & req_param) != req_param)
- return 0;
+ return false;
if (mask & ~(req_param | opt_param | IB_QP_STATE))
- return 0;
+ return false;
- return 1;
+ return true;
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
-int ib_resolve_eth_dmac(struct ib_device *device,
- struct ib_ah_attr *ah_attr)
+/**
+ * ib_resolve_eth_dmac - Resolve destination mac address
+ * @device: Device to consider
+ * @ah_attr: address handle attribute which describes the
+ * source and destination parameters
+ * ib_resolve_eth_dmac() resolves destination mac address and L3 hop limit It
+ * returns 0 on success or appropriate error code. It initializes the
+ * necessary ah_attr fields when call is successful.
+ */
+static int ib_resolve_eth_dmac(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr)
{
- int ret = 0;
+ int ret = 0;
- if (ah_attr->port_num < rdma_start_port(device) ||
- ah_attr->port_num > rdma_end_port(device))
- return -EINVAL;
-
- if (!rdma_cap_eth_ah(device, ah_attr->port_num))
- return 0;
+ if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
+ __be32 addr = 0;
- if (rdma_link_local_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
- rdma_get_ll_mac((struct in6_addr *)ah_attr->grh.dgid.raw,
- ah_attr->dmac);
+ memcpy(&addr, ah_attr->grh.dgid.raw + 12, 4);
+ ip_eth_mc_map(addr, (char *)ah_attr->roce.dmac);
+ } else {
+ ipv6_eth_mc_map((struct in6_addr *)ah_attr->grh.dgid.raw,
+ (char *)ah_attr->roce.dmac);
+ }
} else {
- union ib_gid sgid;
- struct ib_gid_attr sgid_attr;
- int ifindex;
- int hop_limit;
-
- ret = ib_query_gid(device,
- ah_attr->port_num,
- ah_attr->grh.sgid_index,
- &sgid, &sgid_attr);
-
- if (ret || !sgid_attr.ndev) {
- if (!ret)
- ret = -ENXIO;
+ ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
+ }
+ return ret;
+}
+
+static bool is_qp_type_connected(const struct ib_qp *qp)
+{
+ return (qp->qp_type == IB_QPT_UC ||
+ qp->qp_type == IB_QPT_RC ||
+ qp->qp_type == IB_QPT_XRC_INI ||
+ qp->qp_type == IB_QPT_XRC_TGT);
+}
+
+/*
+ * IB core internal function to perform QP attributes modification.
+ */
+static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ u32 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ const struct ib_gid_attr *old_sgid_attr_av;
+ const struct ib_gid_attr *old_sgid_attr_alt_av;
+ int ret;
+
+ attr->xmit_slave = NULL;
+ if (attr_mask & IB_QP_AV) {
+ ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr,
+ &old_sgid_attr_av);
+ if (ret)
+ return ret;
+
+ if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
+ is_qp_type_connected(qp)) {
+ struct net_device *slave;
+
+ /*
+ * If the user provided the qp_attr then we have to
+ * resolve it. Kerne users have to provide already
+ * resolved rdma_ah_attr's.
+ */
+ if (udata) {
+ ret = ib_resolve_eth_dmac(qp->device,
+ &attr->ah_attr);
+ if (ret)
+ goto out_av;
+ }
+ slave = rdma_lag_get_ah_roce_slave(qp->device,
+ &attr->ah_attr,
+ GFP_KERNEL);
+ if (IS_ERR(slave)) {
+ ret = PTR_ERR(slave);
+ goto out_av;
+ }
+ attr->xmit_slave = slave;
+ }
+ }
+ if (attr_mask & IB_QP_ALT_PATH) {
+ /*
+ * FIXME: This does not track the migration state, so if the
+ * user loads a new alternate path after the HW has migrated
+ * from primary->alternate we will keep the wrong
+ * references. This is OK for IB because the reference
+ * counting does not serve any functional purpose.
+ */
+ ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr,
+ &old_sgid_attr_alt_av);
+ if (ret)
+ goto out_av;
+
+ /*
+ * Today the core code can only handle alternate paths and APM
+ * for IB. Ban them in roce mode.
+ */
+ if (!(rdma_protocol_ib(qp->device,
+ attr->alt_ah_attr.port_num) &&
+ rdma_protocol_ib(qp->device, port))) {
+ ret = -EINVAL;
goto out;
}
+ }
- ifindex = sgid_attr.ndev->ifindex;
+ if (rdma_ib_or_roce(qp->device, port)) {
+ if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
+ dev_warn(&qp->device->dev,
+ "%s rq_psn overflow, masking to 24 bits\n",
+ __func__);
+ attr->rq_psn &= 0xffffff;
+ }
- ret = rdma_addr_find_l2_eth_by_grh(&sgid,
- &ah_attr->grh.dgid,
- ah_attr->dmac,
- NULL, &ifindex, &hop_limit);
+ if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
+ dev_warn(&qp->device->dev,
+ " %s sq_psn overflow, masking to 24 bits\n",
+ __func__);
+ attr->sq_psn &= 0xffffff;
+ }
+ }
- dev_put(sgid_attr.ndev);
+ /*
+ * Bind this qp to a counter automatically based on the rdma counter
+ * rules. This only set in RST2INIT with port specified
+ */
+ if (!qp->counter && (attr_mask & IB_QP_PORT) &&
+ ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT))
+ rdma_counter_bind_qp_auto(qp, attr->port_num);
+
+ ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
+ if (ret)
+ goto out;
+
+ if (attr_mask & IB_QP_PORT)
+ qp->port = attr->port_num;
+ if (attr_mask & IB_QP_AV)
+ qp->av_sgid_attr =
+ rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr);
+ if (attr_mask & IB_QP_ALT_PATH)
+ qp->alt_path_sgid_attr = rdma_update_sgid_attr(
+ &attr->alt_ah_attr, qp->alt_path_sgid_attr);
- ah_attr->grh.hop_limit = hop_limit;
- }
out:
+ if (attr_mask & IB_QP_ALT_PATH)
+ rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av);
+out_av:
+ if (attr_mask & IB_QP_AV) {
+ rdma_lag_put_ah_roce_slave(attr->xmit_slave);
+ rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av);
+ }
return ret;
}
-EXPORT_SYMBOL(ib_resolve_eth_dmac);
-int ib_modify_qp(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr,
- int qp_attr_mask)
+/**
+ * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
+ * @ib_qp: The QP to modify.
+ * @attr: On input, specifies the QP attributes to modify. On output,
+ * the current values of selected QP attributes are returned.
+ * @attr_mask: A bit-mask used to specify which attributes of the QP
+ * are being modified.
+ * @udata: pointer to user's input output buffer information
+ * are being modified.
+ * It returns 0 on success and returns appropriate error code on error.
+ */
+int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata);
+}
+EXPORT_SYMBOL(ib_modify_qp_with_udata);
+
+static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes,
+ u16 *speed, u8 *width)
+{
+ if (!lanes) {
+ if (netdev_speed <= SPEED_1000) {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_SDR;
+ } else if (netdev_speed <= SPEED_10000) {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_FDR10;
+ } else if (netdev_speed <= SPEED_20000) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_DDR;
+ } else if (netdev_speed <= SPEED_25000) {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_EDR;
+ } else if (netdev_speed <= SPEED_40000) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_FDR10;
+ } else if (netdev_speed <= SPEED_50000) {
+ *width = IB_WIDTH_2X;
+ *speed = IB_SPEED_EDR;
+ } else if (netdev_speed <= SPEED_100000) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_EDR;
+ } else if (netdev_speed <= SPEED_200000) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_HDR;
+ } else {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_NDR;
+ }
+
+ return;
+ }
+
+ switch (lanes) {
+ case 1:
+ *width = IB_WIDTH_1X;
+ break;
+ case 2:
+ *width = IB_WIDTH_2X;
+ break;
+ case 4:
+ *width = IB_WIDTH_4X;
+ break;
+ case 8:
+ *width = IB_WIDTH_8X;
+ break;
+ case 12:
+ *width = IB_WIDTH_12X;
+ break;
+ default:
+ *width = IB_WIDTH_1X;
+ }
+
+ switch (netdev_speed / lanes) {
+ case SPEED_2500:
+ *speed = IB_SPEED_SDR;
+ break;
+ case SPEED_5000:
+ *speed = IB_SPEED_DDR;
+ break;
+ case SPEED_10000:
+ *speed = IB_SPEED_FDR10;
+ break;
+ case SPEED_14000:
+ *speed = IB_SPEED_FDR;
+ break;
+ case SPEED_25000:
+ *speed = IB_SPEED_EDR;
+ break;
+ case SPEED_50000:
+ *speed = IB_SPEED_HDR;
+ break;
+ case SPEED_100000:
+ *speed = IB_SPEED_NDR;
+ break;
+ default:
+ *speed = IB_SPEED_SDR;
+ }
+}
+
+int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
{
+ int rc;
+ u32 netdev_speed;
+ struct net_device *netdev;
+ struct ethtool_link_ksettings lksettings = {};
- if (qp_attr_mask & IB_QP_AV) {
- int ret;
+ if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
+ return -EINVAL;
- ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr);
- if (ret)
- return ret;
+ netdev = ib_device_get_netdev(dev, port_num);
+ if (!netdev)
+ return -ENODEV;
+
+ rtnl_lock();
+ rc = __ethtool_get_link_ksettings(netdev, &lksettings);
+ rtnl_unlock();
+
+ dev_put(netdev);
+
+ if (!rc && lksettings.base.speed != (u32)SPEED_UNKNOWN) {
+ netdev_speed = lksettings.base.speed;
+ } else {
+ netdev_speed = SPEED_1000;
+ if (rc)
+ pr_warn("%s speed is unknown, defaulting to %u\n",
+ netdev->name, netdev_speed);
}
- return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
+ ib_get_width_and_speed(netdev_speed, lksettings.lanes,
+ speed, width);
+
+ return 0;
+}
+EXPORT_SYMBOL(ib_get_eth_speed);
+
+int ib_modify_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask)
+{
+ return _ib_modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
}
EXPORT_SYMBOL(ib_modify_qp);
@@ -1270,9 +2030,12 @@ int ib_query_qp(struct ib_qp *qp,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr)
{
- return qp->device->query_qp ?
- qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
- -ENOSYS;
+ qp_attr->ah_attr.grh.sgid_attr = NULL;
+ qp_attr->alt_ah_attr.grh.sgid_attr = NULL;
+
+ return qp->device->ops.query_qp ?
+ qp->device->ops.query_qp(qp->real_qp, qp_attr, qp_attr_mask,
+ qp_init_attr) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_query_qp);
@@ -1285,11 +2048,13 @@ int ib_close_qp(struct ib_qp *qp)
if (real_qp == qp)
return -EINVAL;
- spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+ spin_lock_irqsave(&real_qp->device->qp_open_list_lock, flags);
list_del(&qp->open_list);
- spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+ spin_unlock_irqrestore(&real_qp->device->qp_open_list_lock, flags);
atomic_dec(&real_qp->usecnt);
+ if (qp->qp_sec)
+ ib_close_shared_qp_security(qp->qp_sec);
kfree(qp);
return 0;
@@ -1304,32 +2069,28 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp)
real_qp = qp->real_qp;
xrcd = real_qp->xrcd;
-
- mutex_lock(&xrcd->tgt_qp_mutex);
+ down_write(&xrcd->tgt_qps_rwsem);
ib_close_qp(qp);
if (atomic_read(&real_qp->usecnt) == 0)
- list_del(&real_qp->xrcd_list);
+ xa_erase(&xrcd->tgt_qps, real_qp->qp_num);
else
real_qp = NULL;
- mutex_unlock(&xrcd->tgt_qp_mutex);
+ up_write(&xrcd->tgt_qps_rwsem);
if (real_qp) {
ret = ib_destroy_qp(real_qp);
if (!ret)
atomic_dec(&xrcd->usecnt);
- else
- __ib_insert_xrcd_qp(xrcd, real_qp);
}
return 0;
}
-int ib_destroy_qp(struct ib_qp *qp)
+int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
{
- struct ib_pd *pd;
- struct ib_cq *scq, *rcq;
- struct ib_srq *srq;
- struct ib_rwq_ind_table *ind_tbl;
+ const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr;
+ const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr;
+ struct ib_qp_security *sec;
int ret;
WARN_ON_ONCE(qp->mrs_used > 0);
@@ -1340,95 +2101,189 @@ int ib_destroy_qp(struct ib_qp *qp)
if (qp->real_qp != qp)
return __ib_destroy_shared_qp(qp);
- pd = qp->pd;
- scq = qp->send_cq;
- rcq = qp->recv_cq;
- srq = qp->srq;
- ind_tbl = qp->rwq_ind_tbl;
+ sec = qp->qp_sec;
+ if (sec)
+ ib_destroy_qp_security_begin(sec);
if (!qp->uobject)
rdma_rw_cleanup_mrs(qp);
- ret = qp->device->destroy_qp(qp);
- if (!ret) {
- if (pd)
- atomic_dec(&pd->usecnt);
- if (scq)
- atomic_dec(&scq->usecnt);
- if (rcq)
- atomic_dec(&rcq->usecnt);
- if (srq)
- atomic_dec(&srq->usecnt);
- if (ind_tbl)
- atomic_dec(&ind_tbl->usecnt);
+ rdma_counter_unbind_qp(qp, qp->port, true);
+ ret = qp->device->ops.destroy_qp(qp, udata);
+ if (ret) {
+ if (sec)
+ ib_destroy_qp_security_abort(sec);
+ return ret;
}
+ if (alt_path_sgid_attr)
+ rdma_put_gid_attr(alt_path_sgid_attr);
+ if (av_sgid_attr)
+ rdma_put_gid_attr(av_sgid_attr);
+
+ ib_qp_usecnt_dec(qp);
+ if (sec)
+ ib_destroy_qp_security_end(sec);
+
+ rdma_restrack_del(&qp->res);
+ kfree(qp);
return ret;
}
-EXPORT_SYMBOL(ib_destroy_qp);
+EXPORT_SYMBOL(ib_destroy_qp_user);
/* Completion queues */
-struct ib_cq *ib_create_cq(struct ib_device *device,
- ib_comp_handler comp_handler,
- void (*event_handler)(struct ib_event *, void *),
- void *cq_context,
- const struct ib_cq_init_attr *cq_attr)
+struct ib_cq *__ib_create_cq(struct ib_device *device,
+ ib_comp_handler comp_handler,
+ void (*event_handler)(struct ib_event *, void *),
+ void *cq_context,
+ const struct ib_cq_init_attr *cq_attr,
+ const char *caller)
{
struct ib_cq *cq;
+ int ret;
+
+ cq = rdma_zalloc_drv_obj(device, ib_cq);
+ if (!cq)
+ return ERR_PTR(-ENOMEM);
- cq = device->create_cq(device, cq_attr, NULL, NULL);
+ cq->device = device;
+ cq->uobject = NULL;
+ cq->comp_handler = comp_handler;
+ cq->event_handler = event_handler;
+ cq->cq_context = cq_context;
+ atomic_set(&cq->usecnt, 0);
- if (!IS_ERR(cq)) {
- cq->device = device;
- cq->uobject = NULL;
- cq->comp_handler = comp_handler;
- cq->event_handler = event_handler;
- cq->cq_context = cq_context;
- atomic_set(&cq->usecnt, 0);
+ rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ);
+ rdma_restrack_set_name(&cq->res, caller);
+
+ ret = device->ops.create_cq(cq, cq_attr, NULL);
+ if (ret) {
+ rdma_restrack_put(&cq->res);
+ kfree(cq);
+ return ERR_PTR(ret);
}
+ rdma_restrack_add(&cq->res);
return cq;
}
-EXPORT_SYMBOL(ib_create_cq);
+EXPORT_SYMBOL(__ib_create_cq);
-int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
- return cq->device->modify_cq ?
- cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS;
+ if (cq->shared)
+ return -EOPNOTSUPP;
+
+ return cq->device->ops.modify_cq ?
+ cq->device->ops.modify_cq(cq, cq_count,
+ cq_period) : -EOPNOTSUPP;
}
-EXPORT_SYMBOL(ib_modify_cq);
+EXPORT_SYMBOL(rdma_set_cq_moderation);
-int ib_destroy_cq(struct ib_cq *cq)
+int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata)
{
+ int ret;
+
+ if (WARN_ON_ONCE(cq->shared))
+ return -EOPNOTSUPP;
+
if (atomic_read(&cq->usecnt))
return -EBUSY;
- return cq->device->destroy_cq(cq);
+ ret = cq->device->ops.destroy_cq(cq, udata);
+ if (ret)
+ return ret;
+
+ rdma_restrack_del(&cq->res);
+ kfree(cq);
+ return ret;
}
-EXPORT_SYMBOL(ib_destroy_cq);
+EXPORT_SYMBOL(ib_destroy_cq_user);
int ib_resize_cq(struct ib_cq *cq, int cqe)
{
- return cq->device->resize_cq ?
- cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS;
+ if (cq->shared)
+ return -EOPNOTSUPP;
+
+ return cq->device->ops.resize_cq ?
+ cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_resize_cq);
/* Memory regions */
-int ib_dereg_mr(struct ib_mr *mr)
+struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags)
+{
+ struct ib_mr *mr;
+
+ if (access_flags & IB_ACCESS_ON_DEMAND) {
+ if (!(pd->device->attrs.kernel_cap_flags &
+ IBK_ON_DEMAND_PAGING)) {
+ pr_debug("ODP support not available\n");
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ mr = pd->device->ops.reg_user_mr(pd, start, length, virt_addr,
+ access_flags, NULL, NULL);
+
+ if (IS_ERR(mr))
+ return mr;
+
+ mr->device = pd->device;
+ mr->type = IB_MR_TYPE_USER;
+ mr->pd = pd;
+ mr->dm = NULL;
+ atomic_inc(&pd->usecnt);
+ mr->iova = virt_addr;
+ mr->length = length;
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+ rdma_restrack_add(&mr->res);
+
+ return mr;
+}
+EXPORT_SYMBOL(ib_reg_user_mr);
+
+int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
+ u32 flags, struct ib_sge *sg_list, u32 num_sge)
+{
+ if (!pd->device->ops.advise_mr)
+ return -EOPNOTSUPP;
+
+ if (!num_sge)
+ return 0;
+
+ return pd->device->ops.advise_mr(pd, advice, flags, sg_list, num_sge,
+ NULL);
+}
+EXPORT_SYMBOL(ib_advise_mr);
+
+int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
{
struct ib_pd *pd = mr->pd;
+ struct ib_dm *dm = mr->dm;
+ struct ib_dmah *dmah = mr->dmah;
+ struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
int ret;
- ret = mr->device->dereg_mr(mr);
- if (!ret)
+ trace_mr_dereg(mr);
+ rdma_restrack_del(&mr->res);
+ ret = mr->device->ops.dereg_mr(mr, udata);
+ if (!ret) {
atomic_dec(&pd->usecnt);
+ if (dm)
+ atomic_dec(&dm->usecnt);
+ if (dmah)
+ atomic_dec(&dmah->usecnt);
+ kfree(sig_attrs);
+ }
return ret;
}
-EXPORT_SYMBOL(ib_dereg_mr);
+EXPORT_SYMBOL(ib_dereg_mr_user);
/**
* ib_alloc_mr() - Allocates a memory region
@@ -1442,88 +2297,157 @@ EXPORT_SYMBOL(ib_dereg_mr);
* max_num_sg * used_page_size.
*
*/
-struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
+struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg)
{
struct ib_mr *mr;
- if (!pd->device->alloc_mr)
- return ERR_PTR(-ENOSYS);
+ if (!pd->device->ops.alloc_mr) {
+ mr = ERR_PTR(-EOPNOTSUPP);
+ goto out;
+ }
- mr = pd->device->alloc_mr(pd, mr_type, max_num_sg);
- if (!IS_ERR(mr)) {
- mr->device = pd->device;
- mr->pd = pd;
- mr->uobject = NULL;
- atomic_inc(&pd->usecnt);
- mr->need_inval = false;
+ if (mr_type == IB_MR_TYPE_INTEGRITY) {
+ WARN_ON_ONCE(1);
+ mr = ERR_PTR(-EINVAL);
+ goto out;
}
+ mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg);
+ if (IS_ERR(mr))
+ goto out;
+
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->dm = NULL;
+ mr->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ mr->need_inval = false;
+ mr->type = mr_type;
+ mr->sig_attrs = NULL;
+
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+ rdma_restrack_add(&mr->res);
+out:
+ trace_mr_alloc(pd, mr_type, max_num_sg, mr);
return mr;
}
EXPORT_SYMBOL(ib_alloc_mr);
-/* "Fast" memory regions */
-
-struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
- int mr_access_flags,
- struct ib_fmr_attr *fmr_attr)
+/**
+ * ib_alloc_mr_integrity() - Allocates an integrity memory region
+ * @pd: protection domain associated with the region
+ * @max_num_data_sg: maximum data sg entries available for registration
+ * @max_num_meta_sg: maximum metadata sg entries available for
+ * registration
+ *
+ * Notes:
+ * Memory registration page/sg lists must not exceed max_num_sg,
+ * also the integrity page/sg lists must not exceed max_num_meta_sg.
+ *
+ */
+struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_data_sg,
+ u32 max_num_meta_sg)
{
- struct ib_fmr *fmr;
+ struct ib_mr *mr;
+ struct ib_sig_attrs *sig_attrs;
- if (!pd->device->alloc_fmr)
- return ERR_PTR(-ENOSYS);
+ if (!pd->device->ops.alloc_mr_integrity ||
+ !pd->device->ops.map_mr_sg_pi) {
+ mr = ERR_PTR(-EOPNOTSUPP);
+ goto out;
+ }
- fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr);
- if (!IS_ERR(fmr)) {
- fmr->device = pd->device;
- fmr->pd = pd;
- atomic_inc(&pd->usecnt);
+ if (!max_num_meta_sg) {
+ mr = ERR_PTR(-EINVAL);
+ goto out;
}
- return fmr;
-}
-EXPORT_SYMBOL(ib_alloc_fmr);
+ sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL);
+ if (!sig_attrs) {
+ mr = ERR_PTR(-ENOMEM);
+ goto out;
+ }
-int ib_unmap_fmr(struct list_head *fmr_list)
-{
- struct ib_fmr *fmr;
+ mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg,
+ max_num_meta_sg);
+ if (IS_ERR(mr)) {
+ kfree(sig_attrs);
+ goto out;
+ }
- if (list_empty(fmr_list))
- return 0;
+ mr->device = pd->device;
+ mr->pd = pd;
+ mr->dm = NULL;
+ mr->uobject = NULL;
+ atomic_inc(&pd->usecnt);
+ mr->need_inval = false;
+ mr->type = IB_MR_TYPE_INTEGRITY;
+ mr->sig_attrs = sig_attrs;
- fmr = list_entry(fmr_list->next, struct ib_fmr, list);
- return fmr->device->unmap_fmr(fmr_list);
+ rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR);
+ rdma_restrack_parent_name(&mr->res, &pd->res);
+ rdma_restrack_add(&mr->res);
+out:
+ trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr);
+ return mr;
}
-EXPORT_SYMBOL(ib_unmap_fmr);
+EXPORT_SYMBOL(ib_alloc_mr_integrity);
-int ib_dealloc_fmr(struct ib_fmr *fmr)
+/* Multicast groups */
+
+static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
{
- struct ib_pd *pd;
- int ret;
+ struct ib_qp_init_attr init_attr = {};
+ struct ib_qp_attr attr = {};
+ int num_eth_ports = 0;
+ unsigned int port;
- pd = fmr->pd;
- ret = fmr->device->dealloc_fmr(fmr);
- if (!ret)
- atomic_dec(&pd->usecnt);
+ /* If QP state >= init, it is assigned to a port and we can check this
+ * port only.
+ */
+ if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) {
+ if (attr.qp_state >= IB_QPS_INIT) {
+ if (rdma_port_get_link_layer(qp->device, attr.port_num) !=
+ IB_LINK_LAYER_INFINIBAND)
+ return true;
+ goto lid_check;
+ }
+ }
- return ret;
-}
-EXPORT_SYMBOL(ib_dealloc_fmr);
+ /* Can't get a quick answer, iterate over all ports */
+ rdma_for_each_port(qp->device, port)
+ if (rdma_port_get_link_layer(qp->device, port) !=
+ IB_LINK_LAYER_INFINIBAND)
+ num_eth_ports++;
-/* Multicast groups */
+ /* If we have at lease one Ethernet port, RoCE annex declares that
+ * multicast LID should be ignored. We can't tell at this step if the
+ * QP belongs to an IB or Ethernet port.
+ */
+ if (num_eth_ports)
+ return true;
+
+ /* If all the ports are IB, we can check according to IB spec. */
+lid_check:
+ return !(lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ lid == be16_to_cpu(IB_LID_PERMISSIVE));
+}
int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
int ret;
- if (!qp->device->attach_mcast)
- return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ if (!qp->device->ops.attach_mcast)
+ return -EOPNOTSUPP;
+
+ if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
+ qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
return -EINVAL;
- ret = qp->device->attach_mcast(qp, gid, lid);
+ ret = qp->device->ops.attach_mcast(qp, gid, lid);
if (!ret)
atomic_inc(&qp->usecnt);
return ret;
@@ -1534,66 +2458,85 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
int ret;
- if (!qp->device->detach_mcast)
- return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+ if (!qp->device->ops.detach_mcast)
+ return -EOPNOTSUPP;
+
+ if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
+ qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
return -EINVAL;
- ret = qp->device->detach_mcast(qp, gid, lid);
+ ret = qp->device->ops.detach_mcast(qp, gid, lid);
if (!ret)
atomic_dec(&qp->usecnt);
return ret;
}
EXPORT_SYMBOL(ib_detach_mcast);
-struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
+/**
+ * ib_alloc_xrcd_user - Allocates an XRC domain.
+ * @device: The device on which to allocate the XRC domain.
+ * @inode: inode to connect XRCD
+ * @udata: Valid user data or NULL for kernel object
+ */
+struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device,
+ struct inode *inode, struct ib_udata *udata)
{
struct ib_xrcd *xrcd;
+ int ret;
- if (!device->alloc_xrcd)
- return ERR_PTR(-ENOSYS);
+ if (!device->ops.alloc_xrcd)
+ return ERR_PTR(-EOPNOTSUPP);
- xrcd = device->alloc_xrcd(device, NULL, NULL);
- if (!IS_ERR(xrcd)) {
- xrcd->device = device;
- xrcd->inode = NULL;
- atomic_set(&xrcd->usecnt, 0);
- mutex_init(&xrcd->tgt_qp_mutex);
- INIT_LIST_HEAD(&xrcd->tgt_qp_list);
- }
+ xrcd = rdma_zalloc_drv_obj(device, ib_xrcd);
+ if (!xrcd)
+ return ERR_PTR(-ENOMEM);
+
+ xrcd->device = device;
+ xrcd->inode = inode;
+ atomic_set(&xrcd->usecnt, 0);
+ init_rwsem(&xrcd->tgt_qps_rwsem);
+ xa_init(&xrcd->tgt_qps);
+ ret = device->ops.alloc_xrcd(xrcd, udata);
+ if (ret)
+ goto err;
return xrcd;
+err:
+ kfree(xrcd);
+ return ERR_PTR(ret);
}
-EXPORT_SYMBOL(ib_alloc_xrcd);
+EXPORT_SYMBOL(ib_alloc_xrcd_user);
-int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+/**
+ * ib_dealloc_xrcd_user - Deallocates an XRC domain.
+ * @xrcd: The XRC domain to deallocate.
+ * @udata: Valid user data or NULL for kernel object
+ */
+int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
- struct ib_qp *qp;
int ret;
if (atomic_read(&xrcd->usecnt))
return -EBUSY;
- while (!list_empty(&xrcd->tgt_qp_list)) {
- qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list);
- ret = ib_destroy_qp(qp);
- if (ret)
- return ret;
- }
-
- return xrcd->device->dealloc_xrcd(xrcd);
+ WARN_ON(!xa_empty(&xrcd->tgt_qps));
+ ret = xrcd->device->ops.dealloc_xrcd(xrcd, udata);
+ if (ret)
+ return ret;
+ kfree(xrcd);
+ return ret;
}
-EXPORT_SYMBOL(ib_dealloc_xrcd);
+EXPORT_SYMBOL(ib_dealloc_xrcd_user);
/**
* ib_create_wq - Creates a WQ associated with the specified protection
* domain.
* @pd: The protection domain associated with the WQ.
- * @wq_init_attr: A list of initial attributes required to create the
+ * @wq_attr: A list of initial attributes required to create the
* WQ. If WQ creation succeeds, then the attributes are updated to
* the actual capabilities of the created WQ.
*
- * wq_init_attr->max_wr and wq_init_attr->max_sge determine
+ * wq_attr->max_wr and wq_attr->max_sge determine
* the requested size of the WQ, and set to the actual values allocated
* on return.
* If ib_create_wq() succeeds, then max_wr and max_sge will always be
@@ -1604,10 +2547,10 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd,
{
struct ib_wq *wq;
- if (!pd->device->create_wq)
- return ERR_PTR(-ENOSYS);
+ if (!pd->device->ops.create_wq)
+ return ERR_PTR(-EOPNOTSUPP);
- wq = pd->device->create_wq(pd, wq_attr, NULL);
+ wq = pd->device->ops.create_wq(pd, wq_attr, NULL);
if (!IS_ERR(wq)) {
wq->event_handler = wq_attr->event_handler;
wq->wq_context = wq_attr->wq_context;
@@ -1625,187 +2568,126 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd,
EXPORT_SYMBOL(ib_create_wq);
/**
- * ib_destroy_wq - Destroys the specified WQ.
+ * ib_destroy_wq_user - Destroys the specified user WQ.
* @wq: The WQ to destroy.
+ * @udata: Valid user data
*/
-int ib_destroy_wq(struct ib_wq *wq)
+int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata)
{
- int err;
struct ib_cq *cq = wq->cq;
struct ib_pd *pd = wq->pd;
+ int ret;
if (atomic_read(&wq->usecnt))
return -EBUSY;
- err = wq->device->destroy_wq(wq);
- if (!err) {
- atomic_dec(&pd->usecnt);
- atomic_dec(&cq->usecnt);
- }
- return err;
-}
-EXPORT_SYMBOL(ib_destroy_wq);
-
-/**
- * ib_modify_wq - Modifies the specified WQ.
- * @wq: The WQ to modify.
- * @wq_attr: On input, specifies the WQ attributes to modify.
- * @wq_attr_mask: A bit-mask used to specify which attributes of the WQ
- * are being modified.
- * On output, the current values of selected WQ attributes are returned.
- */
-int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
- u32 wq_attr_mask)
-{
- int err;
-
- if (!wq->device->modify_wq)
- return -ENOSYS;
-
- err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL);
- return err;
-}
-EXPORT_SYMBOL(ib_modify_wq);
-
-/*
- * ib_create_rwq_ind_table - Creates a RQ Indirection Table.
- * @device: The device on which to create the rwq indirection table.
- * @ib_rwq_ind_table_init_attr: A list of initial attributes required to
- * create the Indirection Table.
- *
- * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less
- * than the created ib_rwq_ind_table object and the caller is responsible
- * for its memory allocation/free.
- */
-struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr)
-{
- struct ib_rwq_ind_table *rwq_ind_table;
- int i;
- u32 table_size;
-
- if (!device->create_rwq_ind_table)
- return ERR_PTR(-ENOSYS);
-
- table_size = (1 << init_attr->log_ind_tbl_size);
- rwq_ind_table = device->create_rwq_ind_table(device,
- init_attr, NULL);
- if (IS_ERR(rwq_ind_table))
- return rwq_ind_table;
-
- rwq_ind_table->ind_tbl = init_attr->ind_tbl;
- rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size;
- rwq_ind_table->device = device;
- rwq_ind_table->uobject = NULL;
- atomic_set(&rwq_ind_table->usecnt, 0);
-
- for (i = 0; i < table_size; i++)
- atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt);
-
- return rwq_ind_table;
-}
-EXPORT_SYMBOL(ib_create_rwq_ind_table);
-
-/*
- * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table.
- * @wq_ind_table: The Indirection Table to destroy.
-*/
-int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
-{
- int err, i;
- u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size);
- struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl;
-
- if (atomic_read(&rwq_ind_table->usecnt))
- return -EBUSY;
-
- err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table);
- if (!err) {
- for (i = 0; i < table_size; i++)
- atomic_dec(&ind_tbl[i]->usecnt);
- }
-
- return err;
-}
-EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
-
-struct ib_flow *ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr,
- int domain)
-{
- struct ib_flow *flow_id;
- if (!qp->device->create_flow)
- return ERR_PTR(-ENOSYS);
-
- flow_id = qp->device->create_flow(qp, flow_attr, domain);
- if (!IS_ERR(flow_id)) {
- atomic_inc(&qp->usecnt);
- flow_id->qp = qp;
- }
- return flow_id;
-}
-EXPORT_SYMBOL(ib_create_flow);
-
-int ib_destroy_flow(struct ib_flow *flow_id)
-{
- int err;
- struct ib_qp *qp = flow_id->qp;
+ ret = wq->device->ops.destroy_wq(wq, udata);
+ if (ret)
+ return ret;
- err = qp->device->destroy_flow(flow_id);
- if (!err)
- atomic_dec(&qp->usecnt);
- return err;
+ atomic_dec(&pd->usecnt);
+ atomic_dec(&cq->usecnt);
+ return ret;
}
-EXPORT_SYMBOL(ib_destroy_flow);
+EXPORT_SYMBOL(ib_destroy_wq_user);
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
{
- return mr->device->check_mr_status ?
- mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
+ if (!mr->device->ops.check_mr_status)
+ return -EOPNOTSUPP;
+
+ return mr->device->ops.check_mr_status(mr, check_mask, mr_status);
}
EXPORT_SYMBOL(ib_check_mr_status);
-int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
+int ib_set_vf_link_state(struct ib_device *device, int vf, u32 port,
int state)
{
- if (!device->set_vf_link_state)
- return -ENOSYS;
+ if (!device->ops.set_vf_link_state)
+ return -EOPNOTSUPP;
- return device->set_vf_link_state(device, vf, port, state);
+ return device->ops.set_vf_link_state(device, vf, port, state);
}
EXPORT_SYMBOL(ib_set_vf_link_state);
-int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+int ib_get_vf_config(struct ib_device *device, int vf, u32 port,
struct ifla_vf_info *info)
{
- if (!device->get_vf_config)
- return -ENOSYS;
+ if (!device->ops.get_vf_config)
+ return -EOPNOTSUPP;
- return device->get_vf_config(device, vf, port, info);
+ return device->ops.get_vf_config(device, vf, port, info);
}
EXPORT_SYMBOL(ib_get_vf_config);
-int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
+int ib_get_vf_stats(struct ib_device *device, int vf, u32 port,
struct ifla_vf_stats *stats)
{
- if (!device->get_vf_stats)
- return -ENOSYS;
+ if (!device->ops.get_vf_stats)
+ return -EOPNOTSUPP;
- return device->get_vf_stats(device, vf, port, stats);
+ return device->ops.get_vf_stats(device, vf, port, stats);
}
EXPORT_SYMBOL(ib_get_vf_stats);
-int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
+int ib_set_vf_guid(struct ib_device *device, int vf, u32 port, u64 guid,
int type)
{
- if (!device->set_vf_guid)
- return -ENOSYS;
+ if (!device->ops.set_vf_guid)
+ return -EOPNOTSUPP;
- return device->set_vf_guid(device, vf, port, guid, type);
+ return device->ops.set_vf_guid(device, vf, port, guid, type);
}
EXPORT_SYMBOL(ib_set_vf_guid);
+int ib_get_vf_guid(struct ib_device *device, int vf, u32 port,
+ struct ifla_vf_guid *node_guid,
+ struct ifla_vf_guid *port_guid)
+{
+ if (!device->ops.get_vf_guid)
+ return -EOPNOTSUPP;
+
+ return device->ops.get_vf_guid(device, vf, port, node_guid, port_guid);
+}
+EXPORT_SYMBOL(ib_get_vf_guid);
+/**
+ * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection
+ * information) and set an appropriate memory region for registration.
+ * @mr: memory region
+ * @data_sg: dma mapped scatterlist for data
+ * @data_sg_nents: number of entries in data_sg
+ * @data_sg_offset: offset in bytes into data_sg
+ * @meta_sg: dma mapped scatterlist for metadata
+ * @meta_sg_nents: number of entries in meta_sg
+ * @meta_sg_offset: offset in bytes into meta_sg
+ * @page_size: page vector desired page size
+ *
+ * Constraints:
+ * - The MR must be allocated with type IB_MR_TYPE_INTEGRITY.
+ *
+ * Return: 0 on success.
+ *
+ * After this completes successfully, the memory region
+ * is ready for registration.
+ */
+int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset, unsigned int page_size)
+{
+ if (unlikely(!mr->device->ops.map_mr_sg_pi ||
+ WARN_ON_ONCE(mr->type != IB_MR_TYPE_INTEGRITY)))
+ return -EOPNOTSUPP;
+
+ mr->page_size = page_size;
+
+ return mr->device->ops.map_mr_sg_pi(mr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg,
+ meta_sg_nents, meta_sg_offset);
+}
+EXPORT_SYMBOL(ib_map_mr_sg_pi);
+
/**
* ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
* and set it the memory region.
@@ -1816,6 +2698,7 @@ EXPORT_SYMBOL(ib_set_vf_guid);
* @page_size: page vector desired page size
*
* Constraints:
+ *
* - The first sg element is allowed to have an offset.
* - Each sg element must either be aligned to page_size or virtually
* contiguous to the previous element. In case an sg element has a
@@ -1834,12 +2717,12 @@ EXPORT_SYMBOL(ib_set_vf_guid);
int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset, unsigned int page_size)
{
- if (unlikely(!mr->device->map_mr_sg))
- return -ENOSYS;
+ if (unlikely(!mr->device->ops.map_mr_sg))
+ return -EOPNOTSUPP;
mr->page_size = page_size;
- return mr->device->map_mr_sg(mr, sg, sg_nents, sg_offset);
+ return mr->device->ops.map_mr_sg(mr, sg, sg_nents, sg_offset);
}
EXPORT_SYMBOL(ib_map_mr_sg);
@@ -1849,10 +2732,12 @@ EXPORT_SYMBOL(ib_map_mr_sg);
* @mr: memory region
* @sgl: dma mapped scatterlist
* @sg_nents: number of entries in sg
- * @sg_offset_p: IN: start offset in bytes into sg
- * OUT: offset in bytes for element n of the sg of the first
+ * @sg_offset_p: ==== =======================================================
+ * IN start offset in bytes into sg
+ * OUT offset in bytes for element n of the sg of the first
* byte that has not been processed where n is the return
* value of this function.
+ * ==== =======================================================
* @set_page: driver page assignment function pointer
*
* Core service helper for drivers to convert the largest
@@ -1949,34 +2834,38 @@ static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
*/
static void __ib_drain_sq(struct ib_qp *qp)
{
+ struct ib_cq *cq = qp->send_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe sdrain;
- struct ib_send_wr swr = {}, *bad_swr;
+ struct ib_rdma_wr swr = {
+ .wr = {
+ .next = NULL,
+ { .wr_cqe = &sdrain.cqe, },
+ .opcode = IB_WR_RDMA_WRITE,
+ },
+ };
int ret;
- if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) {
- WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT,
- "IB_POLL_DIRECT poll_ctx not supported for drain\n");
- return;
- }
-
- swr.wr_cqe = &sdrain.cqe;
- sdrain.cqe.done = ib_drain_qp_done;
- init_completion(&sdrain.done);
-
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
}
- ret = ib_post_send(qp, &swr, &bad_swr);
+ sdrain.cqe.done = ib_drain_qp_done;
+ init_completion(&sdrain.done);
+
+ ret = ib_post_send(qp, &swr.wr, NULL);
if (ret) {
WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
return;
}
- wait_for_completion(&sdrain.done);
+ if (cq->poll_ctx == IB_POLL_DIRECT)
+ while (wait_for_completion_timeout(&sdrain.done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ else
+ wait_for_completion(&sdrain.done);
}
/*
@@ -1984,14 +2873,15 @@ static void __ib_drain_sq(struct ib_qp *qp)
*/
static void __ib_drain_rq(struct ib_qp *qp)
{
+ struct ib_cq *cq = qp->recv_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe rdrain;
- struct ib_recv_wr rwr = {}, *bad_rwr;
+ struct ib_recv_wr rwr = {};
int ret;
- if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) {
- WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT,
- "IB_POLL_DIRECT poll_ctx not supported for drain\n");
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret) {
+ WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
return;
}
@@ -1999,19 +2889,83 @@ static void __ib_drain_rq(struct ib_qp *qp)
rdrain.cqe.done = ib_drain_qp_done;
init_completion(&rdrain.done);
- ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ ret = ib_post_recv(qp, &rwr, NULL);
if (ret) {
WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
return;
}
- ret = ib_post_recv(qp, &rwr, &bad_rwr);
+ if (cq->poll_ctx == IB_POLL_DIRECT)
+ while (wait_for_completion_timeout(&rdrain.done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ else
+ wait_for_completion(&rdrain.done);
+}
+
+/*
+ * __ib_drain_srq() - Block until Last WQE Reached event arrives, or timeout
+ * expires.
+ * @qp: queue pair associated with SRQ to drain
+ *
+ * Quoting 10.3.1 Queue Pair and EE Context States:
+ *
+ * Note, for QPs that are associated with an SRQ, the Consumer should take the
+ * QP through the Error State before invoking a Destroy QP or a Modify QP to the
+ * Reset State. The Consumer may invoke the Destroy QP without first performing
+ * a Modify QP to the Error State and waiting for the Affiliated Asynchronous
+ * Last WQE Reached Event. However, if the Consumer does not wait for the
+ * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment
+ * leakage may occur. Therefore, it is good programming practice to tear down a
+ * QP that is associated with an SRQ by using the following process:
+ *
+ * - Put the QP in the Error State
+ * - Wait for the Affiliated Asynchronous Last WQE Reached Event;
+ * - either:
+ * drain the CQ by invoking the Poll CQ verb and either wait for CQ
+ * to be empty or the number of Poll CQ operations has exceeded
+ * CQ capacity size;
+ * - or
+ * post another WR that completes on the same CQ and wait for this
+ * WR to return as a WC;
+ * - and then invoke a Destroy QP or Reset QP.
+ *
+ * We use the first option.
+ */
+static void __ib_drain_srq(struct ib_qp *qp)
+{
+ struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ struct ib_cq *cq;
+ int n, polled = 0;
+ int ret;
+
+ if (!qp->srq) {
+ WARN_ONCE(1, "QP 0x%p is not associated with SRQ\n", qp);
+ return;
+ }
+
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
- WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+ WARN_ONCE(ret, "failed to drain shared recv queue: %d\n", ret);
+ return;
+ }
+
+ if (ib_srq_has_cq(qp->srq->srq_type)) {
+ cq = qp->srq->ext.cq;
+ } else if (qp->recv_cq) {
+ cq = qp->recv_cq;
+ } else {
+ WARN_ONCE(1, "QP 0x%p has no CQ associated with SRQ\n", qp);
return;
}
- wait_for_completion(&rdrain.done);
+ if (wait_for_completion_timeout(&qp->srq_completion, 60 * HZ) > 0) {
+ while (polled != cq->cqe) {
+ n = ib_process_cq_direct(cq, cq->cqe - polled);
+ if (!n)
+ return;
+ polled += n;
+ }
+ }
}
/**
@@ -2028,18 +2982,18 @@ static void __ib_drain_rq(struct ib_qp *qp)
* ensure there is room in the CQ and SQ for the drain work request and
* completion.
*
- * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQ using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
*/
void ib_drain_sq(struct ib_qp *qp)
{
- if (qp->device->drain_sq)
- qp->device->drain_sq(qp);
+ if (qp->device->ops.drain_sq)
+ qp->device->ops.drain_sq(qp);
else
__ib_drain_sq(qp);
+ trace_cq_drain_complete(qp->send_cq);
}
EXPORT_SYMBOL(ib_drain_sq);
@@ -2057,18 +3011,18 @@ EXPORT_SYMBOL(ib_drain_sq);
* ensure there is room in the CQ and RQ for the drain work request and
* completion.
*
- * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQ using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
*/
void ib_drain_rq(struct ib_qp *qp)
{
- if (qp->device->drain_rq)
- qp->device->drain_rq(qp);
+ if (qp->device->ops.drain_rq)
+ qp->device->ops.drain_rq(qp);
else
__ib_drain_rq(qp);
+ trace_cq_drain_complete(qp->recv_cq);
}
EXPORT_SYMBOL(ib_drain_rq);
@@ -2082,8 +3036,7 @@ EXPORT_SYMBOL(ib_drain_rq);
* ensure there is room in the CQ(s), SQ, and RQ for drain work requests
* and completions.
*
- * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQs using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
@@ -2093,5 +3046,142 @@ void ib_drain_qp(struct ib_qp *qp)
ib_drain_sq(qp);
if (!qp->srq)
ib_drain_rq(qp);
+ else
+ __ib_drain_srq(qp);
}
EXPORT_SYMBOL(ib_drain_qp);
+
+struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num,
+ enum rdma_netdev_t type, const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *))
+{
+ struct rdma_netdev_alloc_params params;
+ struct net_device *netdev;
+ int rc;
+
+ if (!device->ops.rdma_netdev_get_params)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ rc = device->ops.rdma_netdev_get_params(device, port_num, type,
+ &params);
+ if (rc)
+ return ERR_PTR(rc);
+
+ netdev = alloc_netdev_mqs(params.sizeof_priv, name, name_assign_type,
+ setup, params.txqs, params.rxqs);
+ if (!netdev)
+ return ERR_PTR(-ENOMEM);
+
+ return netdev;
+}
+EXPORT_SYMBOL(rdma_alloc_netdev);
+
+int rdma_init_netdev(struct ib_device *device, u32 port_num,
+ enum rdma_netdev_t type, const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *),
+ struct net_device *netdev)
+{
+ struct rdma_netdev_alloc_params params;
+ int rc;
+
+ if (!device->ops.rdma_netdev_get_params)
+ return -EOPNOTSUPP;
+
+ rc = device->ops.rdma_netdev_get_params(device, port_num, type,
+ &params);
+ if (rc)
+ return rc;
+
+ return params.initialize_rdma_netdev(device, port_num,
+ netdev, params.param);
+}
+EXPORT_SYMBOL(rdma_init_netdev);
+
+void __rdma_block_iter_start(struct ib_block_iter *biter,
+ struct scatterlist *sglist, unsigned int nents,
+ unsigned long pgsz)
+{
+ memset(biter, 0, sizeof(struct ib_block_iter));
+ biter->__sg = sglist;
+ biter->__sg_nents = nents;
+
+ /* Driver provides best block size to use */
+ biter->__pg_bit = __fls(pgsz);
+}
+EXPORT_SYMBOL(__rdma_block_iter_start);
+
+bool __rdma_block_iter_next(struct ib_block_iter *biter)
+{
+ unsigned int block_offset;
+ unsigned int delta;
+
+ if (!biter->__sg_nents || !biter->__sg)
+ return false;
+
+ biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance;
+ block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1);
+ delta = BIT_ULL(biter->__pg_bit) - block_offset;
+
+ while (biter->__sg_nents && biter->__sg &&
+ sg_dma_len(biter->__sg) - biter->__sg_advance <= delta) {
+ delta -= sg_dma_len(biter->__sg) - biter->__sg_advance;
+ biter->__sg_advance = 0;
+ biter->__sg = sg_next(biter->__sg);
+ biter->__sg_nents--;
+ }
+ biter->__sg_advance += delta;
+
+ return true;
+}
+EXPORT_SYMBOL(__rdma_block_iter_next);
+
+/**
+ * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct
+ * for the drivers.
+ * @descs: array of static descriptors
+ * @num_counters: number of elements in array
+ * @lifespan: milliseconds between updates
+ */
+struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
+ const struct rdma_stat_desc *descs, int num_counters,
+ unsigned long lifespan)
+{
+ struct rdma_hw_stats *stats;
+
+ stats = kzalloc(struct_size(stats, value, num_counters), GFP_KERNEL);
+ if (!stats)
+ return NULL;
+
+ stats->is_disabled = kcalloc(BITS_TO_LONGS(num_counters),
+ sizeof(*stats->is_disabled), GFP_KERNEL);
+ if (!stats->is_disabled)
+ goto err;
+
+ stats->descs = descs;
+ stats->num_counters = num_counters;
+ stats->lifespan = msecs_to_jiffies(lifespan);
+ mutex_init(&stats->lock);
+
+ return stats;
+
+err:
+ kfree(stats);
+ return NULL;
+}
+EXPORT_SYMBOL(rdma_alloc_hw_stats_struct);
+
+/**
+ * rdma_free_hw_stats_struct - Helper function to release rdma_hw_stats
+ * @stats: statistics to release
+ */
+void rdma_free_hw_stats_struct(struct rdma_hw_stats *stats)
+{
+ if (!stats)
+ return;
+
+ kfree(stats->is_disabled);
+ kfree(stats);
+}
+EXPORT_SYMBOL(rdma_free_hw_stats_struct);
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index ed553de2ca12..c42b22ac3303 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -1,14 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/
-obj-$(CONFIG_INFINIBAND_QIB) += qib/
-obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/
obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/
-obj-$(CONFIG_INFINIBAND_I40IW) += i40iw/
+obj-$(CONFIG_INFINIBAND_EFA) += efa/
+obj-$(CONFIG_INFINIBAND_IRDMA) += irdma/
+obj-$(CONFIG_MANA_INFINIBAND) += mana/
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
-obj-$(CONFIG_INFINIBAND_NES) += nes/
obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma/
obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
-obj-$(CONFIG_INFINIBAND_HNS) += hns/
+obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns/
obj-$(CONFIG_INFINIBAND_QEDR) += qedr/
+obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re/
+obj-$(CONFIG_INFINIBAND_BNG_RE) += bng_re/
+obj-$(CONFIG_INFINIBAND_ERDMA) += erdma/
+obj-$(CONFIG_INFINIBAND_IONIC) += ionic/
diff --git a/drivers/infiniband/hw/bng_re/Kconfig b/drivers/infiniband/hw/bng_re/Kconfig
new file mode 100644
index 000000000000..85845f72c64d
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config INFINIBAND_BNG_RE
+ tristate "Broadcom Next generation RoCE HCA support"
+ depends on 64BIT
+ depends on INET && DCB && BNGE
+ help
+ This driver supports Broadcom Next generation
+ 50/100/200/400/800 gigabit RoCE HCAs. The module
+ will be called bng_re. To compile this driver
+ as a module, choose M here.
diff --git a/drivers/infiniband/hw/bng_re/Makefile b/drivers/infiniband/hw/bng_re/Makefile
new file mode 100644
index 000000000000..c6aaaf853c77
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+ccflags-y := -I $(srctree)/drivers/net/ethernet/broadcom/bnge -I $(srctree)/drivers/infiniband/hw/bnxt_re
+
+obj-$(CONFIG_INFINIBAND_BNG_RE) += bng_re.o
+
+bng_re-y := bng_dev.o bng_fw.o \
+ bng_res.o bng_sp.o \
+ bng_debugfs.o
diff --git a/drivers/infiniband/hw/bng_re/bng_debugfs.c b/drivers/infiniband/hw/bng_re/bng_debugfs.c
new file mode 100644
index 000000000000..9ec5a8785250
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_debugfs.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2025 Broadcom.
+#include <linux/debugfs.h>
+#include <linux/pci.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "bng_res.h"
+#include "bng_fw.h"
+#include "bnge.h"
+#include "bnge_auxr.h"
+#include "bng_re.h"
+#include "bng_debugfs.h"
+
+static struct dentry *bng_re_debugfs_root;
+
+void bng_re_debugfs_add_pdev(struct bng_re_dev *rdev)
+{
+ struct pci_dev *pdev = rdev->aux_dev->pdev;
+
+ rdev->dbg_root =
+ debugfs_create_dir(dev_name(&pdev->dev), bng_re_debugfs_root);
+}
+
+void bng_re_debugfs_rem_pdev(struct bng_re_dev *rdev)
+{
+ debugfs_remove_recursive(rdev->dbg_root);
+ rdev->dbg_root = NULL;
+}
+
+void bng_re_register_debugfs(void)
+{
+ bng_re_debugfs_root = debugfs_create_dir("bng_re", NULL);
+}
+
+void bng_re_unregister_debugfs(void)
+{
+ debugfs_remove(bng_re_debugfs_root);
+}
diff --git a/drivers/infiniband/hw/bng_re/bng_debugfs.h b/drivers/infiniband/hw/bng_re/bng_debugfs.h
new file mode 100644
index 000000000000..baef71df4242
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_debugfs.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2025 Broadcom.
+
+#ifndef __BNG_RE_DEBUGFS__
+#define __BNG_RE_DEBUGFS__
+
+void bng_re_debugfs_add_pdev(struct bng_re_dev *rdev);
+void bng_re_debugfs_rem_pdev(struct bng_re_dev *rdev);
+
+void bng_re_register_debugfs(void);
+void bng_re_unregister_debugfs(void);
+#endif
diff --git a/drivers/infiniband/hw/bng_re/bng_dev.c b/drivers/infiniband/hw/bng_re/bng_dev.c
new file mode 100644
index 000000000000..d8f8d7f7075f
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_dev.c
@@ -0,0 +1,534 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2025 Broadcom.
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/auxiliary_bus.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "bng_res.h"
+#include "bng_sp.h"
+#include "bng_fw.h"
+#include "bnge.h"
+#include "bnge_auxr.h"
+#include "bng_re.h"
+#include "bnge_hwrm.h"
+#include "bng_debugfs.h"
+
+MODULE_AUTHOR("Siva Reddy Kallam <siva.kallam@broadcom.com>");
+MODULE_DESCRIPTION(BNG_RE_DESC);
+MODULE_LICENSE("Dual BSD/GPL");
+
+static struct bng_re_dev *bng_re_dev_add(struct auxiliary_device *adev,
+ struct bnge_auxr_dev *aux_dev)
+{
+ struct bng_re_dev *rdev;
+
+ /* Allocate bng_re_dev instance */
+ rdev = ib_alloc_device(bng_re_dev, ibdev);
+ if (!rdev) {
+ pr_err("%s: bng_re_dev allocation failure!", KBUILD_MODNAME);
+ return NULL;
+ }
+
+ /* Assign auxiliary device specific data */
+ rdev->netdev = aux_dev->net;
+ rdev->aux_dev = aux_dev;
+ rdev->adev = adev;
+ rdev->fn_id = rdev->aux_dev->pdev->devfn;
+
+ return rdev;
+}
+
+
+static int bng_re_register_netdev(struct bng_re_dev *rdev)
+{
+ struct bnge_auxr_dev *aux_dev;
+
+ aux_dev = rdev->aux_dev;
+ return bnge_register_dev(aux_dev, rdev->adev);
+}
+
+static void bng_re_destroy_chip_ctx(struct bng_re_dev *rdev)
+{
+ struct bng_re_chip_ctx *chip_ctx;
+
+ if (!rdev->chip_ctx)
+ return;
+
+ kfree(rdev->dev_attr);
+ rdev->dev_attr = NULL;
+
+ chip_ctx = rdev->chip_ctx;
+ rdev->chip_ctx = NULL;
+ rdev->rcfw.res = NULL;
+ rdev->bng_res.cctx = NULL;
+ rdev->bng_res.pdev = NULL;
+ kfree(chip_ctx);
+}
+
+static int bng_re_setup_chip_ctx(struct bng_re_dev *rdev)
+{
+ struct bng_re_chip_ctx *chip_ctx;
+ struct bnge_auxr_dev *aux_dev;
+ int rc = -ENOMEM;
+
+ aux_dev = rdev->aux_dev;
+ rdev->bng_res.pdev = aux_dev->pdev;
+ rdev->rcfw.res = &rdev->bng_res;
+ chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL);
+ if (!chip_ctx)
+ return -ENOMEM;
+ chip_ctx->chip_num = aux_dev->chip_num;
+ chip_ctx->hw_stats_size = aux_dev->hw_ring_stats_size;
+
+ rdev->chip_ctx = chip_ctx;
+ rdev->bng_res.cctx = rdev->chip_ctx;
+ rdev->dev_attr = kzalloc(sizeof(*rdev->dev_attr), GFP_KERNEL);
+ if (!rdev->dev_attr)
+ goto free_chip_ctx;
+ rdev->bng_res.dattr = rdev->dev_attr;
+
+ return 0;
+free_chip_ctx:
+ kfree(rdev->chip_ctx);
+ rdev->chip_ctx = NULL;
+ return rc;
+}
+
+static void bng_re_init_hwrm_hdr(struct input *hdr, u16 opcd)
+{
+ hdr->req_type = cpu_to_le16(opcd);
+ hdr->cmpl_ring = cpu_to_le16(-1);
+ hdr->target_id = cpu_to_le16(-1);
+}
+
+static void bng_re_fill_fw_msg(struct bnge_fw_msg *fw_msg, void *msg,
+ int msg_len, void *resp, int resp_max_len,
+ int timeout)
+{
+ fw_msg->msg = msg;
+ fw_msg->msg_len = msg_len;
+ fw_msg->resp = resp;
+ fw_msg->resp_max_len = resp_max_len;
+ fw_msg->timeout = timeout;
+}
+
+static int bng_re_net_ring_free(struct bng_re_dev *rdev,
+ u16 fw_ring_id, int type)
+{
+ struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
+ struct hwrm_ring_free_input req = {};
+ struct hwrm_ring_free_output resp;
+ struct bnge_fw_msg fw_msg = {};
+ int rc = -EINVAL;
+
+ if (!rdev)
+ return rc;
+
+ if (!aux_dev)
+ return rc;
+
+ bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_FREE);
+ req.ring_type = type;
+ req.ring_id = cpu_to_le16(fw_ring_id);
+ bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT);
+ rc = bnge_send_msg(aux_dev, &fw_msg);
+ if (rc)
+ ibdev_err(&rdev->ibdev, "Failed to free HW ring:%d :%#x",
+ req.ring_id, rc);
+ return rc;
+}
+
+static int bng_re_net_ring_alloc(struct bng_re_dev *rdev,
+ struct bng_re_ring_attr *ring_attr,
+ u16 *fw_ring_id)
+{
+ struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
+ struct hwrm_ring_alloc_input req = {};
+ struct hwrm_ring_alloc_output resp;
+ struct bnge_fw_msg fw_msg = {};
+ int rc = -EINVAL;
+
+ if (!aux_dev)
+ return rc;
+
+ bng_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC);
+ req.enables = 0;
+ req.page_tbl_addr = cpu_to_le64(ring_attr->dma_arr[0]);
+ if (ring_attr->pages > 1) {
+ /* Page size is in log2 units */
+ req.page_size = BNGE_PAGE_SHIFT;
+ req.page_tbl_depth = 1;
+ }
+ req.fbo = 0;
+ /* Association of ring index with doorbell index and MSIX number */
+ req.logical_id = cpu_to_le16(ring_attr->lrid);
+ req.length = cpu_to_le32(ring_attr->depth + 1);
+ req.ring_type = ring_attr->type;
+ req.int_mode = ring_attr->mode;
+ bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT);
+ rc = bnge_send_msg(aux_dev, &fw_msg);
+ if (!rc)
+ *fw_ring_id = le16_to_cpu(resp.ring_id);
+
+ return rc;
+}
+
+static int bng_re_stats_ctx_free(struct bng_re_dev *rdev)
+{
+ struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
+ struct hwrm_stat_ctx_free_input req = {};
+ struct hwrm_stat_ctx_free_output resp = {};
+ struct bnge_fw_msg fw_msg = {};
+ int rc = -EINVAL;
+
+ if (!aux_dev)
+ return rc;
+
+ bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE);
+ req.stat_ctx_id = cpu_to_le32(rdev->stats_ctx.fw_id);
+ bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT);
+ rc = bnge_send_msg(aux_dev, &fw_msg);
+ if (rc)
+ ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x",
+ rc);
+
+ return rc;
+}
+
+static int bng_re_stats_ctx_alloc(struct bng_re_dev *rdev)
+{
+ struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
+ struct bng_re_stats *stats = &rdev->stats_ctx;
+ struct hwrm_stat_ctx_alloc_output resp = {};
+ struct hwrm_stat_ctx_alloc_input req = {};
+ struct bnge_fw_msg fw_msg = {};
+ int rc = -EINVAL;
+
+ stats->fw_id = BNGE_INVALID_STATS_CTX_ID;
+
+ if (!aux_dev)
+ return rc;
+
+ bng_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC);
+ req.update_period_ms = cpu_to_le32(1000);
+ req.stats_dma_addr = cpu_to_le64(stats->dma_map);
+ req.stats_dma_length = cpu_to_le16(rdev->chip_ctx->hw_stats_size);
+ req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
+ bng_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNGE_DFLT_HWRM_CMD_TIMEOUT);
+ rc = bnge_send_msg(aux_dev, &fw_msg);
+ if (!rc)
+ stats->fw_id = le32_to_cpu(resp.stat_ctx_id);
+ return rc;
+}
+
+static void bng_re_query_hwrm_version(struct bng_re_dev *rdev)
+{
+ struct bnge_auxr_dev *aux_dev = rdev->aux_dev;
+ struct hwrm_ver_get_output ver_get_resp = {};
+ struct hwrm_ver_get_input ver_get_req = {};
+ struct bng_re_chip_ctx *cctx;
+ struct bnge_fw_msg fw_msg = {};
+ int rc;
+
+ bng_re_init_hwrm_hdr((void *)&ver_get_req, HWRM_VER_GET);
+ ver_get_req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
+ ver_get_req.hwrm_intf_min = HWRM_VERSION_MINOR;
+ ver_get_req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
+ bng_re_fill_fw_msg(&fw_msg, (void *)&ver_get_req, sizeof(ver_get_req),
+ (void *)&ver_get_resp, sizeof(ver_get_resp),
+ BNGE_DFLT_HWRM_CMD_TIMEOUT);
+ rc = bnge_send_msg(aux_dev, &fw_msg);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x",
+ rc);
+ return;
+ }
+
+ cctx = rdev->chip_ctx;
+ cctx->hwrm_intf_ver =
+ (u64)le16_to_cpu(ver_get_resp.hwrm_intf_major) << 48 |
+ (u64)le16_to_cpu(ver_get_resp.hwrm_intf_minor) << 32 |
+ (u64)le16_to_cpu(ver_get_resp.hwrm_intf_build) << 16 |
+ le16_to_cpu(ver_get_resp.hwrm_intf_patch);
+
+ cctx->hwrm_cmd_max_timeout = le16_to_cpu(ver_get_resp.max_req_timeout);
+
+ if (!cctx->hwrm_cmd_max_timeout)
+ cctx->hwrm_cmd_max_timeout = BNG_ROCE_FW_MAX_TIMEOUT;
+}
+
+static void bng_re_dev_uninit(struct bng_re_dev *rdev)
+{
+ int rc;
+ bng_re_debugfs_rem_pdev(rdev);
+
+ if (test_and_clear_bit(BNG_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
+ rc = bng_re_deinit_rcfw(&rdev->rcfw);
+ if (rc)
+ ibdev_warn(&rdev->ibdev,
+ "Failed to deinitialize RCFW: %#x", rc);
+ bng_re_stats_ctx_free(rdev);
+ bng_re_free_stats_ctx_mem(rdev->bng_res.pdev, &rdev->stats_ctx);
+ bng_re_disable_rcfw_channel(&rdev->rcfw);
+ bng_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id,
+ RING_ALLOC_REQ_RING_TYPE_NQ);
+ bng_re_free_rcfw_channel(&rdev->rcfw);
+ }
+
+ kfree(rdev->nqr);
+ rdev->nqr = NULL;
+ bng_re_destroy_chip_ctx(rdev);
+ if (test_and_clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags))
+ bnge_unregister_dev(rdev->aux_dev);
+}
+
+static int bng_re_dev_init(struct bng_re_dev *rdev)
+{
+ struct bng_re_ring_attr rattr = {};
+ struct bng_re_creq_ctx *creq;
+ u32 db_offt;
+ int vid;
+ u8 type;
+ int rc;
+
+ /* Registered a new RoCE device instance to netdev */
+ rc = bng_re_register_netdev(rdev);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to register with netedev: %#x\n", rc);
+ return -EINVAL;
+ }
+
+ set_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+
+ if (rdev->aux_dev->auxr_info->msix_requested < BNG_RE_MIN_MSIX) {
+ ibdev_err(&rdev->ibdev,
+ "RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n",
+ rdev->aux_dev->auxr_info->msix_requested);
+ bnge_unregister_dev(rdev->aux_dev);
+ clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ return -EINVAL;
+ }
+ ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n",
+ rdev->aux_dev->auxr_info->msix_requested);
+
+ rc = bng_re_setup_chip_ctx(rdev);
+ if (rc) {
+ bnge_unregister_dev(rdev->aux_dev);
+ clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
+ return -EINVAL;
+ }
+
+ bng_re_query_hwrm_version(rdev);
+
+ rc = bng_re_alloc_fw_channel(&rdev->bng_res, &rdev->rcfw);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate RCFW Channel: %#x\n", rc);
+ goto fail;
+ }
+
+ /* Allocate nq record memory */
+ rdev->nqr = kzalloc(sizeof(*rdev->nqr), GFP_KERNEL);
+ if (!rdev->nqr) {
+ bng_re_destroy_chip_ctx(rdev);
+ bnge_unregister_dev(rdev->aux_dev);
+ clear_bit(BNG_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ return -ENOMEM;
+ }
+
+ rdev->nqr->num_msix = rdev->aux_dev->auxr_info->msix_requested;
+ memcpy(rdev->nqr->msix_entries, rdev->aux_dev->msix_info,
+ sizeof(struct bnge_msix_info) * rdev->nqr->num_msix);
+
+ type = RING_ALLOC_REQ_RING_TYPE_NQ;
+ creq = &rdev->rcfw.creq;
+ rattr.dma_arr = creq->hwq.pbl[BNG_PBL_LVL_0].pg_map_arr;
+ rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count;
+ rattr.type = type;
+ rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+ rattr.depth = BNG_FW_CREQE_MAX_CNT - 1;
+ rattr.lrid = rdev->nqr->msix_entries[BNG_RE_CREQ_NQ_IDX].ring_idx;
+ rc = bng_re_net_ring_alloc(rdev, &rattr, &creq->ring_id);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc);
+ goto free_rcfw;
+ }
+ db_offt = rdev->nqr->msix_entries[BNG_RE_CREQ_NQ_IDX].db_offset;
+ vid = rdev->nqr->msix_entries[BNG_RE_CREQ_NQ_IDX].vector;
+
+ rc = bng_re_enable_fw_channel(&rdev->rcfw,
+ vid, db_offt);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to enable RCFW channel: %#x\n",
+ rc);
+ goto free_ring;
+ }
+
+ rc = bng_re_get_dev_attr(&rdev->rcfw);
+ if (rc)
+ goto disable_rcfw;
+
+ bng_re_debugfs_add_pdev(rdev);
+ rc = bng_re_alloc_stats_ctx_mem(rdev->bng_res.pdev, rdev->chip_ctx,
+ &rdev->stats_ctx);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate stats context: %#x\n", rc);
+ goto disable_rcfw;
+ }
+
+ rc = bng_re_stats_ctx_alloc(rdev);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate QPLIB context: %#x\n", rc);
+ goto free_stats_ctx;
+ }
+
+ rc = bng_re_init_rcfw(&rdev->rcfw, &rdev->stats_ctx);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to initialize RCFW: %#x\n", rc);
+ goto free_sctx;
+ }
+ set_bit(BNG_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
+
+ return 0;
+free_sctx:
+ bng_re_stats_ctx_free(rdev);
+free_stats_ctx:
+ bng_re_free_stats_ctx_mem(rdev->bng_res.pdev, &rdev->stats_ctx);
+disable_rcfw:
+ bng_re_disable_rcfw_channel(&rdev->rcfw);
+free_ring:
+ bng_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
+free_rcfw:
+ bng_re_free_rcfw_channel(&rdev->rcfw);
+fail:
+ bng_re_dev_uninit(rdev);
+ return rc;
+}
+
+static int bng_re_add_device(struct auxiliary_device *adev)
+{
+ struct bnge_auxr_priv *auxr_priv =
+ container_of(adev, struct bnge_auxr_priv, aux_dev);
+ struct bng_re_en_dev_info *dev_info;
+ struct bng_re_dev *rdev;
+ int rc;
+
+ dev_info = auxiliary_get_drvdata(adev);
+
+ rdev = bng_re_dev_add(adev, auxr_priv->auxr_dev);
+ if (!rdev) {
+ rc = -ENOMEM;
+ goto exit;
+ }
+
+ dev_info->rdev = rdev;
+
+ rc = bng_re_dev_init(rdev);
+ if (rc)
+ goto re_dev_dealloc;
+
+ return 0;
+
+re_dev_dealloc:
+ ib_dealloc_device(&rdev->ibdev);
+exit:
+ return rc;
+}
+
+
+static void bng_re_remove_device(struct bng_re_dev *rdev,
+ struct auxiliary_device *aux_dev)
+{
+ bng_re_dev_uninit(rdev);
+ ib_dealloc_device(&rdev->ibdev);
+}
+
+
+static int bng_re_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct bnge_auxr_priv *aux_priv =
+ container_of(adev, struct bnge_auxr_priv, aux_dev);
+ struct bng_re_en_dev_info *en_info;
+ int rc;
+
+ en_info = kzalloc(sizeof(*en_info), GFP_KERNEL);
+ if (!en_info)
+ return -ENOMEM;
+
+ en_info->auxr_dev = aux_priv->auxr_dev;
+
+ auxiliary_set_drvdata(adev, en_info);
+
+ rc = bng_re_add_device(adev);
+ if (rc)
+ kfree(en_info);
+
+ return rc;
+}
+
+static void bng_re_remove(struct auxiliary_device *adev)
+{
+ struct bng_re_en_dev_info *dev_info = auxiliary_get_drvdata(adev);
+ struct bng_re_dev *rdev;
+
+ rdev = dev_info->rdev;
+
+ if (rdev)
+ bng_re_remove_device(rdev, adev);
+ kfree(dev_info);
+}
+
+static const struct auxiliary_device_id bng_re_id_table[] = {
+ { .name = BNG_RE_ADEV_NAME ".rdma", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, bng_re_id_table);
+
+static struct auxiliary_driver bng_re_driver = {
+ .name = "rdma",
+ .probe = bng_re_probe,
+ .remove = bng_re_remove,
+ .id_table = bng_re_id_table,
+};
+
+static int __init bng_re_mod_init(void)
+{
+ int rc;
+
+
+ bng_re_register_debugfs();
+
+ rc = auxiliary_driver_register(&bng_re_driver);
+ if (rc) {
+ pr_err("%s: Failed to register auxiliary driver\n",
+ KBUILD_MODNAME);
+ goto unreg_debugfs;
+ }
+ return 0;
+unreg_debugfs:
+ bng_re_unregister_debugfs();
+ return rc;
+}
+
+static void __exit bng_re_mod_exit(void)
+{
+ auxiliary_driver_unregister(&bng_re_driver);
+ bng_re_unregister_debugfs();
+}
+
+module_init(bng_re_mod_init);
+module_exit(bng_re_mod_exit);
diff --git a/drivers/infiniband/hw/bng_re/bng_fw.c b/drivers/infiniband/hw/bng_re/bng_fw.c
new file mode 100644
index 000000000000..7d9539113cf5
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_fw.c
@@ -0,0 +1,767 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2025 Broadcom.
+#include <linux/pci.h>
+
+#include "roce_hsi.h"
+#include "bng_res.h"
+#include "bng_fw.h"
+#include "bng_sp.h"
+
+/**
+ * bng_re_map_rc - map return type based on opcode
+ * @opcode: roce slow path opcode
+ *
+ * case #1
+ * Firmware initiated error recovery is a safe state machine and
+ * driver can consider all the underlying rdma resources are free.
+ * In this state, it is safe to return success for opcodes related to
+ * destroying rdma resources (like destroy qp, destroy cq etc.).
+ *
+ * case #2
+ * If driver detect potential firmware stall, it is not safe state machine
+ * and the driver can not consider all the underlying rdma resources are
+ * freed.
+ * In this state, it is not safe to return success for opcodes related to
+ * destroying rdma resources (like destroy qp, destroy cq etc.).
+ *
+ * Scope of this helper function is only for case #1.
+ *
+ * Returns:
+ * 0 to communicate success to caller.
+ * Non zero error code to communicate failure to caller.
+ */
+static int bng_re_map_rc(u8 opcode)
+{
+ switch (opcode) {
+ case CMDQ_BASE_OPCODE_DESTROY_QP:
+ case CMDQ_BASE_OPCODE_DESTROY_SRQ:
+ case CMDQ_BASE_OPCODE_DESTROY_CQ:
+ case CMDQ_BASE_OPCODE_DEALLOCATE_KEY:
+ case CMDQ_BASE_OPCODE_DEREGISTER_MR:
+ case CMDQ_BASE_OPCODE_DELETE_GID:
+ case CMDQ_BASE_OPCODE_DESTROY_QP1:
+ case CMDQ_BASE_OPCODE_DESTROY_AH:
+ case CMDQ_BASE_OPCODE_DEINITIALIZE_FW:
+ case CMDQ_BASE_OPCODE_MODIFY_ROCE_CC:
+ case CMDQ_BASE_OPCODE_SET_LINK_AGGR_MODE:
+ return 0;
+ default:
+ return -ETIMEDOUT;
+ }
+}
+
+void bng_re_free_rcfw_channel(struct bng_re_rcfw *rcfw)
+{
+ kfree(rcfw->crsqe_tbl);
+ bng_re_free_hwq(rcfw->res, &rcfw->cmdq.hwq);
+ bng_re_free_hwq(rcfw->res, &rcfw->creq.hwq);
+ rcfw->pdev = NULL;
+}
+
+int bng_re_alloc_fw_channel(struct bng_re_res *res,
+ struct bng_re_rcfw *rcfw)
+{
+ struct bng_re_hwq_attr hwq_attr = {};
+ struct bng_re_sg_info sginfo = {};
+ struct bng_re_cmdq_ctx *cmdq;
+ struct bng_re_creq_ctx *creq;
+
+ rcfw->pdev = res->pdev;
+ cmdq = &rcfw->cmdq;
+ creq = &rcfw->creq;
+ rcfw->res = res;
+
+ sginfo.pgsize = PAGE_SIZE;
+ sginfo.pgshft = PAGE_SHIFT;
+
+ hwq_attr.sginfo = &sginfo;
+ hwq_attr.res = rcfw->res;
+ hwq_attr.depth = BNG_FW_CREQE_MAX_CNT;
+ hwq_attr.stride = BNG_FW_CREQE_UNITS;
+ hwq_attr.type = BNG_HWQ_TYPE_QUEUE;
+
+ if (bng_re_alloc_init_hwq(&creq->hwq, &hwq_attr)) {
+ dev_err(&rcfw->pdev->dev,
+ "HW channel CREQ allocation failed\n");
+ goto fail;
+ }
+
+ rcfw->cmdq_depth = BNG_FW_CMDQE_MAX_CNT;
+
+ sginfo.pgsize = bng_fw_cmdqe_page_size(rcfw->cmdq_depth);
+ hwq_attr.depth = rcfw->cmdq_depth & 0x7FFFFFFF;
+ hwq_attr.stride = BNG_FW_CMDQE_UNITS;
+ hwq_attr.type = BNG_HWQ_TYPE_CTX;
+ if (bng_re_alloc_init_hwq(&cmdq->hwq, &hwq_attr)) {
+ dev_err(&rcfw->pdev->dev,
+ "HW channel CMDQ allocation failed\n");
+ goto fail;
+ }
+
+ rcfw->crsqe_tbl = kcalloc(cmdq->hwq.max_elements,
+ sizeof(*rcfw->crsqe_tbl), GFP_KERNEL);
+ if (!rcfw->crsqe_tbl)
+ goto fail;
+
+ spin_lock_init(&rcfw->tbl_lock);
+
+ rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout;
+ return 0;
+
+fail:
+ bng_re_free_rcfw_channel(rcfw);
+ return -ENOMEM;
+}
+
+static int bng_re_process_qp_event(struct bng_re_rcfw *rcfw,
+ struct creq_qp_event *qp_event,
+ u32 *num_wait)
+{
+ struct bng_re_hwq *hwq = &rcfw->cmdq.hwq;
+ struct bng_re_crsqe *crsqe;
+ u32 req_size;
+ u16 cookie;
+ bool is_waiter_alive;
+ struct pci_dev *pdev;
+ u32 wait_cmds = 0;
+ int rc = 0;
+
+ pdev = rcfw->pdev;
+ switch (qp_event->event) {
+ case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
+ dev_err(&pdev->dev, "Received QP error notification\n");
+ break;
+ default:
+ /*
+ * Command Response
+ * cmdq->lock needs to be acquired to synchronie
+ * the command send and completion reaping. This function
+ * is always called with creq->lock held. Using
+ * the nested variant of spin_lock.
+ *
+ */
+
+ spin_lock_nested(&hwq->lock, SINGLE_DEPTH_NESTING);
+ cookie = le16_to_cpu(qp_event->cookie);
+ cookie &= BNG_FW_MAX_COOKIE_VALUE;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ if (WARN_ONCE(test_bit(FIRMWARE_STALL_DETECTED,
+ &rcfw->cmdq.flags),
+ "Unreponsive rcfw channel detected.!!")) {
+ dev_info(&pdev->dev,
+ "rcfw timedout: cookie = %#x, free_slots = %d",
+ cookie, crsqe->free_slots);
+ spin_unlock(&hwq->lock);
+ return rc;
+ }
+
+ if (crsqe->is_waiter_alive) {
+ if (crsqe->resp) {
+ memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
+ /* Insert write memory barrier to ensure that
+ * response data is copied before clearing the
+ * flags
+ */
+ smp_wmb();
+ }
+ }
+
+ wait_cmds++;
+
+ req_size = crsqe->req_size;
+ is_waiter_alive = crsqe->is_waiter_alive;
+
+ crsqe->req_size = 0;
+ if (!is_waiter_alive)
+ crsqe->resp = NULL;
+
+ crsqe->is_in_used = false;
+
+ hwq->cons += req_size;
+
+ spin_unlock(&hwq->lock);
+ }
+ *num_wait += wait_cmds;
+ return rc;
+}
+
+/* function events */
+static int bng_re_process_func_event(struct bng_re_rcfw *rcfw,
+ struct creq_func_event *func_event)
+{
+ switch (func_event->event) {
+ case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
+ case CREQ_FUNC_EVENT_EVENT_VF_COMM_REQUEST:
+ case CREQ_FUNC_EVENT_EVENT_RESOURCE_EXHAUSTED:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* CREQ Completion handlers */
+static void bng_re_service_creq(struct tasklet_struct *t)
+{
+ struct bng_re_rcfw *rcfw = from_tasklet(rcfw, t, creq.creq_tasklet);
+ struct bng_re_creq_ctx *creq = &rcfw->creq;
+ u32 type, budget = BNG_FW_CREQ_ENTRY_POLL_BUDGET;
+ struct bng_re_hwq *hwq = &creq->hwq;
+ struct creq_base *creqe;
+ u32 num_wakeup = 0;
+ u32 hw_polled = 0;
+
+ /* Service the CREQ until budget is over */
+ spin_lock_bh(&hwq->lock);
+ while (budget > 0) {
+ creqe = bng_re_get_qe(hwq, hwq->cons, NULL);
+ if (!BNG_FW_CREQ_CMP_VALID(creqe, creq->creq_db.dbinfo.flags))
+ break;
+ /* The valid test of the entry must be done first before
+ * reading any further.
+ */
+ dma_rmb();
+
+ type = creqe->type & CREQ_BASE_TYPE_MASK;
+ switch (type) {
+ case CREQ_BASE_TYPE_QP_EVENT:
+ bng_re_process_qp_event
+ (rcfw, (struct creq_qp_event *)creqe,
+ &num_wakeup);
+ creq->stats.creq_qp_event_processed++;
+ break;
+ case CREQ_BASE_TYPE_FUNC_EVENT:
+ if (!bng_re_process_func_event
+ (rcfw, (struct creq_func_event *)creqe))
+ creq->stats.creq_func_event_processed++;
+ else
+ dev_warn(&rcfw->pdev->dev,
+ "aeqe:%#x Not handled\n", type);
+ break;
+ default:
+ if (type != ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT)
+ dev_warn(&rcfw->pdev->dev,
+ "creqe with event 0x%x not handled\n",
+ type);
+ break;
+ }
+ budget--;
+ hw_polled++;
+ bng_re_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+ 1, &creq->creq_db.dbinfo.flags);
+ }
+
+ if (hw_polled)
+ bng_re_ring_nq_db(&creq->creq_db.dbinfo,
+ rcfw->res->cctx, true);
+ spin_unlock_bh(&hwq->lock);
+ if (num_wakeup)
+ wake_up_nr(&rcfw->cmdq.waitq, num_wakeup);
+}
+
+static int __send_message_basic_sanity(struct bng_re_rcfw *rcfw,
+ struct bng_re_cmdqmsg *msg,
+ u8 opcode)
+{
+ struct bng_re_cmdq_ctx *cmdq;
+
+ cmdq = &rcfw->cmdq;
+
+ if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
+ return -ETIMEDOUT;
+
+ if (test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
+ opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
+ dev_err(&rcfw->pdev->dev, "RCFW already initialized!");
+ return -EINVAL;
+ }
+
+ if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
+ (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
+ opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
+ opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
+ dev_err(&rcfw->pdev->dev,
+ "RCFW not initialized, reject opcode 0x%x",
+ opcode);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int __send_message(struct bng_re_rcfw *rcfw,
+ struct bng_re_cmdqmsg *msg, u8 opcode)
+{
+ u32 bsize, free_slots, required_slots;
+ struct bng_re_cmdq_ctx *cmdq;
+ struct bng_re_crsqe *crsqe;
+ struct bng_fw_cmdqe *cmdqe;
+ struct bng_re_hwq *hwq;
+ u32 sw_prod, cmdq_prod;
+ struct pci_dev *pdev;
+ u16 cookie;
+ u8 *preq;
+
+ cmdq = &rcfw->cmdq;
+ hwq = &cmdq->hwq;
+ pdev = rcfw->pdev;
+
+ /* Cmdq are in 16-byte units, each request can consume 1 or more
+ * cmdqe
+ */
+ spin_lock_bh(&hwq->lock);
+ required_slots = bng_re_get_cmd_slots(msg->req);
+ free_slots = HWQ_FREE_SLOTS(hwq);
+ cookie = cmdq->seq_num & BNG_FW_MAX_COOKIE_VALUE;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ if (required_slots >= free_slots) {
+ dev_info_ratelimited(&pdev->dev,
+ "CMDQ is full req/free %d/%d!",
+ required_slots, free_slots);
+ spin_unlock_bh(&hwq->lock);
+ return -EAGAIN;
+ }
+ __set_cmdq_base_cookie(msg->req, msg->req_sz, cpu_to_le16(cookie));
+
+ bsize = bng_re_set_cmd_slots(msg->req);
+ crsqe->free_slots = free_slots;
+ crsqe->resp = (struct creq_qp_event *)msg->resp;
+ crsqe->is_waiter_alive = true;
+ crsqe->is_in_used = true;
+ crsqe->opcode = opcode;
+
+ crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz);
+ if (__get_cmdq_base_resp_size(msg->req, msg->req_sz) && msg->sb) {
+ struct bng_re_rcfw_sbuf *sbuf = msg->sb;
+
+ __set_cmdq_base_resp_addr(msg->req, msg->req_sz,
+ cpu_to_le64(sbuf->dma_addr));
+ __set_cmdq_base_resp_size(msg->req, msg->req_sz,
+ ALIGN(sbuf->size,
+ BNG_FW_CMDQE_UNITS) /
+ BNG_FW_CMDQE_UNITS);
+ }
+
+ preq = (u8 *)msg->req;
+ do {
+ /* Locate the next cmdq slot */
+ sw_prod = HWQ_CMP(hwq->prod, hwq);
+ cmdqe = bng_re_get_qe(hwq, sw_prod, NULL);
+ /* Copy a segment of the req cmd to the cmdq */
+ memset(cmdqe, 0, sizeof(*cmdqe));
+ memcpy(cmdqe, preq, min_t(u32, bsize, sizeof(*cmdqe)));
+ preq += min_t(u32, bsize, sizeof(*cmdqe));
+ bsize -= min_t(u32, bsize, sizeof(*cmdqe));
+ hwq->prod++;
+ } while (bsize > 0);
+ cmdq->seq_num++;
+
+ cmdq_prod = hwq->prod & 0xFFFF;
+ if (test_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags)) {
+ /* The very first doorbell write
+ * is required to set this flag
+ * which prompts the FW to reset
+ * its internal pointers
+ */
+ cmdq_prod |= BIT(FIRMWARE_FIRST_FLAG);
+ clear_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags);
+ }
+ /* ring CMDQ DB */
+ wmb();
+ writel(cmdq_prod, cmdq->cmdq_mbox.prod);
+ writel(BNG_FW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db);
+ spin_unlock_bh(&hwq->lock);
+ /* Return the CREQ response pointer */
+ return 0;
+}
+
+/**
+ * __wait_for_resp - Don't hold the cpu context and wait for response
+ * @rcfw: rcfw channel instance of rdev
+ * @cookie: cookie to track the command
+ *
+ * Wait for command completion in sleepable context.
+ *
+ * Returns:
+ * 0 if command is completed by firmware.
+ * Non zero error code for rest of the case.
+ */
+static int __wait_for_resp(struct bng_re_rcfw *rcfw, u16 cookie)
+{
+ struct bng_re_cmdq_ctx *cmdq;
+ struct bng_re_crsqe *crsqe;
+
+ cmdq = &rcfw->cmdq;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ do {
+ wait_event_timeout(cmdq->waitq,
+ !crsqe->is_in_used,
+ secs_to_jiffies(rcfw->max_timeout));
+
+ if (!crsqe->is_in_used)
+ return 0;
+
+ bng_re_service_creq(&rcfw->creq.creq_tasklet);
+
+ if (!crsqe->is_in_used)
+ return 0;
+ } while (true);
+};
+
+/**
+ * bng_re_rcfw_send_message - interface to send
+ * and complete rcfw command.
+ * @rcfw: rcfw channel instance of rdev
+ * @msg: message to send
+ *
+ * This function does not account shadow queue depth. It will send
+ * all the command unconditionally as long as send queue is not full.
+ *
+ * Returns:
+ * 0 if command completed by firmware.
+ * Non zero if the command is not completed by firmware.
+ */
+int bng_re_rcfw_send_message(struct bng_re_rcfw *rcfw,
+ struct bng_re_cmdqmsg *msg)
+{
+ struct creq_qp_event *evnt = (struct creq_qp_event *)msg->resp;
+ struct bng_re_crsqe *crsqe;
+ u16 cookie;
+ int rc;
+ u8 opcode;
+
+ opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz);
+
+ rc = __send_message_basic_sanity(rcfw, msg, opcode);
+ if (rc)
+ return rc == -ENXIO ? bng_re_map_rc(opcode) : rc;
+
+ rc = __send_message(rcfw, msg, opcode);
+ if (rc)
+ return rc;
+
+ cookie = le16_to_cpu(__get_cmdq_base_cookie(msg->req, msg->req_sz))
+ & BNG_FW_MAX_COOKIE_VALUE;
+
+ rc = __wait_for_resp(rcfw, cookie);
+
+ if (rc) {
+ spin_lock_bh(&rcfw->cmdq.hwq.lock);
+ crsqe = &rcfw->crsqe_tbl[cookie];
+ crsqe->is_waiter_alive = false;
+ if (rc == -ENODEV)
+ set_bit(FIRMWARE_STALL_DETECTED, &rcfw->cmdq.flags);
+ spin_unlock_bh(&rcfw->cmdq.hwq.lock);
+ return -ETIMEDOUT;
+ }
+
+ if (evnt->status) {
+ /* failed with status */
+ dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n",
+ cookie, opcode, evnt->status);
+ rc = -EIO;
+ }
+
+ return rc;
+}
+
+static int bng_re_map_cmdq_mbox(struct bng_re_rcfw *rcfw)
+{
+ struct bng_re_cmdq_mbox *mbox;
+ resource_size_t bar_reg;
+ struct pci_dev *pdev;
+
+ pdev = rcfw->pdev;
+ mbox = &rcfw->cmdq.cmdq_mbox;
+
+ mbox->reg.bar_id = BNG_FW_COMM_PCI_BAR_REGION;
+ mbox->reg.len = BNG_FW_COMM_SIZE;
+ mbox->reg.bar_base = pci_resource_start(pdev, mbox->reg.bar_id);
+ if (!mbox->reg.bar_base) {
+ dev_err(&pdev->dev,
+ "CMDQ BAR region %d resc start is 0!\n",
+ mbox->reg.bar_id);
+ return -ENOMEM;
+ }
+
+ bar_reg = mbox->reg.bar_base + BNG_FW_COMM_BASE_OFFSET;
+ mbox->reg.len = BNG_FW_COMM_SIZE;
+ mbox->reg.bar_reg = ioremap(bar_reg, mbox->reg.len);
+ if (!mbox->reg.bar_reg) {
+ dev_err(&pdev->dev,
+ "CMDQ BAR region %d mapping failed\n",
+ mbox->reg.bar_id);
+ return -ENOMEM;
+ }
+
+ mbox->prod = (void __iomem *)(mbox->reg.bar_reg +
+ BNG_FW_PF_VF_COMM_PROD_OFFSET);
+ mbox->db = (void __iomem *)(mbox->reg.bar_reg + BNG_FW_COMM_TRIG_OFFSET);
+ return 0;
+}
+
+static irqreturn_t bng_re_creq_irq(int irq, void *dev_instance)
+{
+ struct bng_re_rcfw *rcfw = dev_instance;
+ struct bng_re_creq_ctx *creq;
+ struct bng_re_hwq *hwq;
+ u32 sw_cons;
+
+ creq = &rcfw->creq;
+ hwq = &creq->hwq;
+ /* Prefetch the CREQ element */
+ sw_cons = HWQ_CMP(hwq->cons, hwq);
+ bng_re_get_qe(hwq, sw_cons, NULL);
+
+ tasklet_schedule(&creq->creq_tasklet);
+ return IRQ_HANDLED;
+}
+
+int bng_re_rcfw_start_irq(struct bng_re_rcfw *rcfw, int msix_vector,
+ bool need_init)
+{
+ struct bng_re_creq_ctx *creq;
+ struct bng_re_res *res;
+ int rc;
+
+ creq = &rcfw->creq;
+ res = rcfw->res;
+
+ if (creq->irq_handler_avail)
+ return -EFAULT;
+
+ creq->msix_vec = msix_vector;
+ if (need_init)
+ tasklet_setup(&creq->creq_tasklet, bng_re_service_creq);
+ else
+ tasklet_enable(&creq->creq_tasklet);
+
+ creq->irq_name = kasprintf(GFP_KERNEL, "bng_re-creq@pci:%s",
+ pci_name(res->pdev));
+ if (!creq->irq_name)
+ return -ENOMEM;
+ rc = request_irq(creq->msix_vec, bng_re_creq_irq, 0,
+ creq->irq_name, rcfw);
+ if (rc) {
+ kfree(creq->irq_name);
+ creq->irq_name = NULL;
+ tasklet_disable(&creq->creq_tasklet);
+ return rc;
+ }
+ creq->irq_handler_avail = true;
+
+ bng_re_ring_nq_db(&creq->creq_db.dbinfo, res->cctx, true);
+ atomic_inc(&rcfw->rcfw_intr_enabled);
+
+ return 0;
+}
+
+static int bng_re_map_creq_db(struct bng_re_rcfw *rcfw, u32 reg_offt)
+{
+ struct bng_re_creq_db *creq_db;
+ resource_size_t bar_reg;
+ struct pci_dev *pdev;
+
+ pdev = rcfw->pdev;
+ creq_db = &rcfw->creq.creq_db;
+
+ creq_db->dbinfo.flags = 0;
+ creq_db->reg.bar_id = BNG_FW_COMM_CONS_PCI_BAR_REGION;
+ creq_db->reg.bar_base = pci_resource_start(pdev, creq_db->reg.bar_id);
+ if (!creq_db->reg.bar_id)
+ dev_err(&pdev->dev,
+ "CREQ BAR region %d resc start is 0!",
+ creq_db->reg.bar_id);
+
+ bar_reg = creq_db->reg.bar_base + reg_offt;
+
+ creq_db->reg.len = BNG_FW_CREQ_DB_LEN;
+ creq_db->reg.bar_reg = ioremap(bar_reg, creq_db->reg.len);
+ if (!creq_db->reg.bar_reg) {
+ dev_err(&pdev->dev,
+ "CREQ BAR region %d mapping failed",
+ creq_db->reg.bar_id);
+ return -ENOMEM;
+ }
+ creq_db->dbinfo.db = creq_db->reg.bar_reg;
+ creq_db->dbinfo.hwq = &rcfw->creq.hwq;
+ creq_db->dbinfo.xid = rcfw->creq.ring_id;
+ return 0;
+}
+
+void bng_re_rcfw_stop_irq(struct bng_re_rcfw *rcfw, bool kill)
+{
+ struct bng_re_creq_ctx *creq;
+
+ creq = &rcfw->creq;
+
+ if (!creq->irq_handler_avail)
+ return;
+
+ creq->irq_handler_avail = false;
+ /* Mask h/w interrupts */
+ bng_re_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, false);
+ /* Sync with last running IRQ-handler */
+ synchronize_irq(creq->msix_vec);
+ free_irq(creq->msix_vec, rcfw);
+ kfree(creq->irq_name);
+ creq->irq_name = NULL;
+ atomic_set(&rcfw->rcfw_intr_enabled, 0);
+ if (kill)
+ tasklet_kill(&creq->creq_tasklet);
+ tasklet_disable(&creq->creq_tasklet);
+}
+
+void bng_re_disable_rcfw_channel(struct bng_re_rcfw *rcfw)
+{
+ struct bng_re_creq_ctx *creq;
+ struct bng_re_cmdq_ctx *cmdq;
+
+ creq = &rcfw->creq;
+ cmdq = &rcfw->cmdq;
+ /* Make sure the HW channel is stopped! */
+ bng_re_rcfw_stop_irq(rcfw, true);
+
+ iounmap(cmdq->cmdq_mbox.reg.bar_reg);
+ iounmap(creq->creq_db.reg.bar_reg);
+
+ cmdq->cmdq_mbox.reg.bar_reg = NULL;
+ creq->creq_db.reg.bar_reg = NULL;
+ creq->msix_vec = 0;
+}
+
+static void bng_re_start_rcfw(struct bng_re_rcfw *rcfw)
+{
+ struct bng_re_cmdq_ctx *cmdq;
+ struct bng_re_creq_ctx *creq;
+ struct bng_re_cmdq_mbox *mbox;
+ struct cmdq_init init = {0};
+
+ cmdq = &rcfw->cmdq;
+ creq = &rcfw->creq;
+ mbox = &cmdq->cmdq_mbox;
+
+ init.cmdq_pbl = cpu_to_le64(cmdq->hwq.pbl[BNG_PBL_LVL_0].pg_map_arr[0]);
+ init.cmdq_size_cmdq_lvl =
+ cpu_to_le16(((rcfw->cmdq_depth <<
+ CMDQ_INIT_CMDQ_SIZE_SFT) &
+ CMDQ_INIT_CMDQ_SIZE_MASK) |
+ ((cmdq->hwq.level <<
+ CMDQ_INIT_CMDQ_LVL_SFT) &
+ CMDQ_INIT_CMDQ_LVL_MASK));
+ init.creq_ring_id = cpu_to_le16(creq->ring_id);
+ /* Write to the mailbox register */
+ __iowrite32_copy(mbox->reg.bar_reg, &init, sizeof(init) / 4);
+}
+
+int bng_re_enable_fw_channel(struct bng_re_rcfw *rcfw,
+ int msix_vector,
+ int cp_bar_reg_off)
+{
+ struct bng_re_cmdq_ctx *cmdq;
+ int rc;
+
+ cmdq = &rcfw->cmdq;
+
+ /* Assign defaults */
+ cmdq->seq_num = 0;
+ set_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags);
+ init_waitqueue_head(&cmdq->waitq);
+
+ rc = bng_re_map_cmdq_mbox(rcfw);
+ if (rc)
+ return rc;
+
+ rc = bng_re_map_creq_db(rcfw, cp_bar_reg_off);
+ if (rc)
+ return rc;
+
+ rc = bng_re_rcfw_start_irq(rcfw, msix_vector, true);
+ if (rc) {
+ dev_err(&rcfw->pdev->dev,
+ "Failed to request IRQ for CREQ rc = 0x%x\n", rc);
+ bng_re_disable_rcfw_channel(rcfw);
+ return rc;
+ }
+
+ bng_re_start_rcfw(rcfw);
+ return 0;
+}
+
+int bng_re_deinit_rcfw(struct bng_re_rcfw *rcfw)
+{
+ struct creq_deinitialize_fw_resp resp = {};
+ struct cmdq_deinitialize_fw req = {};
+ struct bng_re_cmdqmsg msg = {};
+ int rc;
+
+ bng_re_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DEINITIALIZE_FW,
+ sizeof(req));
+ bng_re_fill_cmdqmsg(&msg, &req, &resp, NULL,
+ sizeof(req), sizeof(resp), 0);
+ rc = bng_re_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return rc;
+
+ clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags);
+ return 0;
+}
+static inline bool _is_hw_retx_supported(u16 dev_cap_flags)
+{
+ return dev_cap_flags &
+ (CREQ_QUERY_FUNC_RESP_SB_HW_REQUESTER_RETX_ENABLED |
+ CREQ_QUERY_FUNC_RESP_SB_HW_RESPONDER_RETX_ENABLED);
+}
+
+#define BNG_RE_HW_RETX(a) _is_hw_retx_supported((a))
+static inline bool _is_optimize_modify_qp_supported(u16 dev_cap_ext_flags2)
+{
+ return dev_cap_ext_flags2 &
+ CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED;
+}
+
+int bng_re_init_rcfw(struct bng_re_rcfw *rcfw,
+ struct bng_re_stats *stats_ctx)
+{
+ struct creq_initialize_fw_resp resp = {};
+ struct cmdq_initialize_fw req = {};
+ struct bng_re_cmdqmsg msg = {};
+ int rc;
+ u16 flags = 0;
+
+ bng_re_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_INITIALIZE_FW,
+ sizeof(req));
+ /* Supply (log-base-2-of-host-page-size - base-page-shift)
+ * to bono to adjust the doorbell page sizes.
+ */
+ req.log2_dbr_pg_size = cpu_to_le16(PAGE_SHIFT -
+ BNG_FW_DBR_BASE_PAGE_SHIFT);
+ if (BNG_RE_HW_RETX(rcfw->res->dattr->dev_cap_flags))
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED;
+ if (_is_optimize_modify_qp_supported(rcfw->res->dattr->dev_cap_flags2))
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED;
+ req.flags |= cpu_to_le16(flags);
+ req.stat_ctx_id = cpu_to_le32(stats_ctx->fw_id);
+ bng_re_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bng_re_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return rc;
+ set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags);
+ return 0;
+}
diff --git a/drivers/infiniband/hw/bng_re/bng_fw.h b/drivers/infiniband/hw/bng_re/bng_fw.h
new file mode 100644
index 000000000000..c89c926ec2fc
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_fw.h
@@ -0,0 +1,211 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2025 Broadcom.
+
+#ifndef __BNG_FW_H__
+#define __BNG_FW_H__
+
+#include "bng_tlv.h"
+
+/* FW DB related */
+#define BNG_FW_CMDQ_TRIG_VAL 1
+#define BNG_FW_COMM_PCI_BAR_REGION 0
+#define BNG_FW_COMM_CONS_PCI_BAR_REGION 2
+#define BNG_FW_DBR_BASE_PAGE_SHIFT 12
+#define BNG_FW_COMM_SIZE 0x104
+#define BNG_FW_COMM_BASE_OFFSET 0x600
+#define BNG_FW_COMM_TRIG_OFFSET 0x100
+#define BNG_FW_PF_VF_COMM_PROD_OFFSET 0xc
+#define BNG_FW_CREQ_DB_LEN 8
+
+/* CREQ */
+#define BNG_FW_CREQE_MAX_CNT (64 * 1024)
+#define BNG_FW_CREQE_UNITS 16
+#define BNG_FW_CREQ_ENTRY_POLL_BUDGET 0x100
+#define BNG_FW_CREQ_CMP_VALID(hdr, pass) \
+ (!!((hdr)->v & CREQ_BASE_V) == \
+ !((pass) & BNG_RE_FLAG_EPOCH_CONS_MASK))
+#define BNG_FW_CREQ_ENTRY_POLL_BUDGET 0x100
+
+/* CMDQ */
+struct bng_fw_cmdqe {
+ u8 data[16];
+};
+
+#define BNG_FW_CMDQE_MAX_CNT 8192
+#define BNG_FW_CMDQE_UNITS sizeof(struct bng_fw_cmdqe)
+#define BNG_FW_CMDQE_BYTES(depth) ((depth) * BNG_FW_CMDQE_UNITS)
+
+#define BNG_FW_MAX_COOKIE_VALUE (BNG_FW_CMDQE_MAX_CNT - 1)
+#define BNG_FW_CMD_IS_BLOCKING 0x8000
+
+/* Crsq buf is 1024-Byte */
+struct bng_re_crsbe {
+ u8 data[1024];
+};
+
+
+static inline u32 bng_fw_cmdqe_npages(u32 depth)
+{
+ u32 npages;
+
+ npages = BNG_FW_CMDQE_BYTES(depth) / PAGE_SIZE;
+ if (BNG_FW_CMDQE_BYTES(depth) % PAGE_SIZE)
+ npages++;
+ return npages;
+}
+
+static inline u32 bng_fw_cmdqe_page_size(u32 depth)
+{
+ return (bng_fw_cmdqe_npages(depth) * PAGE_SIZE);
+}
+struct bng_re_cmdq_mbox {
+ struct bng_re_reg_desc reg;
+ void __iomem *prod;
+ void __iomem *db;
+};
+
+/* HWQ */
+struct bng_re_cmdq_ctx {
+ struct bng_re_hwq hwq;
+ struct bng_re_cmdq_mbox cmdq_mbox;
+ unsigned long flags;
+#define FIRMWARE_INITIALIZED_FLAG (0)
+#define FIRMWARE_STALL_DETECTED (3)
+#define FIRMWARE_FIRST_FLAG (31)
+ wait_queue_head_t waitq;
+ u32 seq_num;
+};
+
+struct bng_re_creq_db {
+ struct bng_re_reg_desc reg;
+ struct bng_re_db_info dbinfo;
+};
+
+struct bng_re_creq_stat {
+ u64 creq_qp_event_processed;
+ u64 creq_func_event_processed;
+};
+
+struct bng_re_creq_ctx {
+ struct bng_re_hwq hwq;
+ struct bng_re_creq_db creq_db;
+ struct bng_re_creq_stat stats;
+ struct tasklet_struct creq_tasklet;
+ u16 ring_id;
+ int msix_vec;
+ bool irq_handler_avail;
+ char *irq_name;
+};
+
+struct bng_re_crsqe {
+ struct creq_qp_event *resp;
+ u32 req_size;
+ /* Free slots at the time of submission */
+ u32 free_slots;
+ u8 opcode;
+ bool is_waiter_alive;
+ bool is_in_used;
+};
+
+struct bng_re_rcfw_sbuf {
+ void *sb;
+ dma_addr_t dma_addr;
+ u32 size;
+};
+
+/* RoCE FW Communication Channels */
+struct bng_re_rcfw {
+ struct pci_dev *pdev;
+ struct bng_re_res *res;
+ struct bng_re_cmdq_ctx cmdq;
+ struct bng_re_creq_ctx creq;
+ struct bng_re_crsqe *crsqe_tbl;
+ /* To synchronize the qp-handle hash table */
+ spinlock_t tbl_lock;
+ u32 cmdq_depth;
+ /* cached from chip cctx for quick reference in slow path */
+ u16 max_timeout;
+ atomic_t rcfw_intr_enabled;
+};
+
+struct bng_re_cmdqmsg {
+ struct cmdq_base *req;
+ struct creq_base *resp;
+ void *sb;
+ u32 req_sz;
+ u32 res_sz;
+ u8 block;
+};
+
+static inline void bng_re_rcfw_cmd_prep(struct cmdq_base *req,
+ u8 opcode, u8 cmd_size)
+{
+ req->opcode = opcode;
+ req->cmd_size = cmd_size;
+}
+
+static inline void bng_re_fill_cmdqmsg(struct bng_re_cmdqmsg *msg,
+ void *req, void *resp, void *sb,
+ u32 req_sz, u32 res_sz, u8 block)
+{
+ msg->req = req;
+ msg->resp = resp;
+ msg->sb = sb;
+ msg->req_sz = req_sz;
+ msg->res_sz = res_sz;
+ msg->block = block;
+}
+
+/* Get the number of command units required for the req. The
+ * function returns correct value only if called before
+ * setting using bng_re_set_cmd_slots
+ */
+static inline u32 bng_re_get_cmd_slots(struct cmdq_base *req)
+{
+ u32 cmd_units = 0;
+
+ if (HAS_TLV_HEADER(req)) {
+ struct roce_tlv *tlv_req = (struct roce_tlv *)req;
+
+ cmd_units = tlv_req->total_size;
+ } else {
+ cmd_units = (req->cmd_size + BNG_FW_CMDQE_UNITS - 1) /
+ BNG_FW_CMDQE_UNITS;
+ }
+
+ return cmd_units;
+}
+
+static inline u32 bng_re_set_cmd_slots(struct cmdq_base *req)
+{
+ u32 cmd_byte = 0;
+
+ if (HAS_TLV_HEADER(req)) {
+ struct roce_tlv *tlv_req = (struct roce_tlv *)req;
+
+ cmd_byte = tlv_req->total_size * BNG_FW_CMDQE_UNITS;
+ } else {
+ cmd_byte = req->cmd_size;
+ req->cmd_size = (req->cmd_size + BNG_FW_CMDQE_UNITS - 1) /
+ BNG_FW_CMDQE_UNITS;
+ }
+
+ return cmd_byte;
+}
+
+void bng_re_free_rcfw_channel(struct bng_re_rcfw *rcfw);
+int bng_re_alloc_fw_channel(struct bng_re_res *res,
+ struct bng_re_rcfw *rcfw);
+int bng_re_enable_fw_channel(struct bng_re_rcfw *rcfw,
+ int msix_vector,
+ int cp_bar_reg_off);
+void bng_re_disable_rcfw_channel(struct bng_re_rcfw *rcfw);
+int bng_re_rcfw_start_irq(struct bng_re_rcfw *rcfw, int msix_vector,
+ bool need_init);
+void bng_re_rcfw_stop_irq(struct bng_re_rcfw *rcfw, bool kill);
+int bng_re_rcfw_send_message(struct bng_re_rcfw *rcfw,
+ struct bng_re_cmdqmsg *msg);
+int bng_re_init_rcfw(struct bng_re_rcfw *rcfw,
+ struct bng_re_stats *stats_ctx);
+int bng_re_deinit_rcfw(struct bng_re_rcfw *rcfw);
+#endif
diff --git a/drivers/infiniband/hw/bng_re/bng_re.h b/drivers/infiniband/hw/bng_re/bng_re.h
new file mode 100644
index 000000000000..dae4862621a7
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_re.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2025 Broadcom.
+
+#ifndef __BNG_RE_H__
+#define __BNG_RE_H__
+
+#include "bng_res.h"
+
+#define BNG_RE_ADEV_NAME "bng_en"
+
+#define BNG_RE_DESC "Broadcom 800G RoCE Driver"
+
+#define rdev_to_dev(rdev) ((rdev) ? (&(rdev)->ibdev.dev) : NULL)
+
+#define BNG_RE_MIN_MSIX 2
+#define BNG_RE_MAX_MSIX BNGE_MAX_ROCE_MSIX
+
+#define BNG_RE_CREQ_NQ_IDX 0
+
+#define BNGE_INVALID_STATS_CTX_ID -1
+/* NQ specific structures */
+struct bng_re_nq_db {
+ struct bng_re_reg_desc reg;
+ struct bng_re_db_info dbinfo;
+};
+
+struct bng_re_nq {
+ struct pci_dev *pdev;
+ struct bng_re_res *res;
+ char *name;
+ struct bng_re_hwq hwq;
+ struct bng_re_nq_db nq_db;
+ u16 ring_id;
+ int msix_vec;
+ cpumask_t mask;
+ struct tasklet_struct nq_tasklet;
+ bool requested;
+ int budget;
+ u32 load;
+
+ struct workqueue_struct *cqn_wq;
+};
+
+struct bng_re_nq_record {
+ struct bnge_msix_info msix_entries[BNG_RE_MAX_MSIX];
+ struct bng_re_nq nq[BNG_RE_MAX_MSIX];
+ int num_msix;
+ /* serialize NQ access */
+ struct mutex load_lock;
+};
+
+struct bng_re_en_dev_info {
+ struct bng_re_dev *rdev;
+ struct bnge_auxr_dev *auxr_dev;
+};
+
+struct bng_re_ring_attr {
+ dma_addr_t *dma_arr;
+ int pages;
+ int type;
+ u32 depth;
+ u32 lrid; /* Logical ring id */
+ u8 mode;
+};
+
+struct bng_re_dev {
+ struct ib_device ibdev;
+ unsigned long flags;
+#define BNG_RE_FLAG_NETDEV_REGISTERED 0
+#define BNG_RE_FLAG_RCFW_CHANNEL_EN 1
+ struct net_device *netdev;
+ struct auxiliary_device *adev;
+ struct bnge_auxr_dev *aux_dev;
+ struct bng_re_chip_ctx *chip_ctx;
+ int fn_id;
+ struct bng_re_res bng_res;
+ struct bng_re_rcfw rcfw;
+ struct bng_re_nq_record *nqr;
+ /* Device Resources */
+ struct bng_re_dev_attr *dev_attr;
+ struct dentry *dbg_root;
+ struct bng_re_stats stats_ctx;
+};
+
+#endif
diff --git a/drivers/infiniband/hw/bng_re/bng_res.c b/drivers/infiniband/hw/bng_re/bng_res.c
new file mode 100644
index 000000000000..c50823758b53
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_res.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2025 Broadcom.
+
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+#include <rdma/ib_umem.h>
+
+#include <linux/bnxt/hsi.h>
+#include "bng_res.h"
+#include "roce_hsi.h"
+
+/* Stats */
+void bng_re_free_stats_ctx_mem(struct pci_dev *pdev,
+ struct bng_re_stats *stats)
+{
+ if (stats->dma) {
+ dma_free_coherent(&pdev->dev, stats->size,
+ stats->dma, stats->dma_map);
+ }
+ memset(stats, 0, sizeof(*stats));
+ stats->fw_id = -1;
+}
+
+int bng_re_alloc_stats_ctx_mem(struct pci_dev *pdev,
+ struct bng_re_chip_ctx *cctx,
+ struct bng_re_stats *stats)
+{
+ memset(stats, 0, sizeof(*stats));
+ stats->fw_id = -1;
+ stats->size = cctx->hw_stats_size;
+ stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
+ &stats->dma_map, GFP_KERNEL);
+ if (!stats->dma)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void bng_free_pbl(struct bng_re_res *res, struct bng_re_pbl *pbl)
+{
+ struct pci_dev *pdev = res->pdev;
+ int i;
+
+ for (i = 0; i < pbl->pg_count; i++) {
+ if (pbl->pg_arr[i])
+ dma_free_coherent(&pdev->dev, pbl->pg_size,
+ (void *)((unsigned long)
+ pbl->pg_arr[i] &
+ PAGE_MASK),
+ pbl->pg_map_arr[i]);
+ else
+ dev_warn(&pdev->dev,
+ "PBL free pg_arr[%d] empty?!\n", i);
+ pbl->pg_arr[i] = NULL;
+ }
+
+ vfree(pbl->pg_arr);
+ pbl->pg_arr = NULL;
+ vfree(pbl->pg_map_arr);
+ pbl->pg_map_arr = NULL;
+ pbl->pg_count = 0;
+ pbl->pg_size = 0;
+}
+
+static int bng_alloc_pbl(struct bng_re_res *res,
+ struct bng_re_pbl *pbl,
+ struct bng_re_sg_info *sginfo)
+{
+ struct pci_dev *pdev = res->pdev;
+ u32 pages;
+ int i;
+
+ if (sginfo->nopte)
+ return 0;
+ pages = sginfo->npages;
+
+ /* page ptr arrays */
+ pbl->pg_arr = vmalloc_array(pages, sizeof(void *));
+ if (!pbl->pg_arr)
+ return -ENOMEM;
+
+ pbl->pg_map_arr = vmalloc_array(pages, sizeof(dma_addr_t));
+ if (!pbl->pg_map_arr) {
+ vfree(pbl->pg_arr);
+ pbl->pg_arr = NULL;
+ return -ENOMEM;
+ }
+ pbl->pg_count = 0;
+ pbl->pg_size = sginfo->pgsize;
+
+ for (i = 0; i < pages; i++) {
+ pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev,
+ pbl->pg_size,
+ &pbl->pg_map_arr[i],
+ GFP_KERNEL);
+ if (!pbl->pg_arr[i])
+ goto fail;
+ pbl->pg_count++;
+ }
+
+ return 0;
+fail:
+ bng_free_pbl(res, pbl);
+ return -ENOMEM;
+}
+
+void bng_re_free_hwq(struct bng_re_res *res,
+ struct bng_re_hwq *hwq)
+{
+ int i;
+
+ if (!hwq->max_elements)
+ return;
+ if (hwq->level >= BNG_PBL_LVL_MAX)
+ return;
+
+ for (i = 0; i < hwq->level + 1; i++)
+ bng_free_pbl(res, &hwq->pbl[i]);
+
+ hwq->level = BNG_PBL_LVL_MAX;
+ hwq->max_elements = 0;
+ hwq->element_size = 0;
+ hwq->prod = 0;
+ hwq->cons = 0;
+}
+
+/* All HWQs are power of 2 in size */
+int bng_re_alloc_init_hwq(struct bng_re_hwq *hwq,
+ struct bng_re_hwq_attr *hwq_attr)
+{
+ u32 npages, pg_size;
+ struct bng_re_sg_info sginfo = {};
+ u32 depth, stride, npbl, npde;
+ dma_addr_t *src_phys_ptr, **dst_virt_ptr;
+ struct bng_re_res *res;
+ struct pci_dev *pdev;
+ int i, rc, lvl;
+
+ res = hwq_attr->res;
+ pdev = res->pdev;
+ pg_size = hwq_attr->sginfo->pgsize;
+ hwq->level = BNG_PBL_LVL_MAX;
+
+ depth = roundup_pow_of_two(hwq_attr->depth);
+ stride = roundup_pow_of_two(hwq_attr->stride);
+
+ npages = (depth * stride) / pg_size;
+ if ((depth * stride) % pg_size)
+ npages++;
+ if (!npages)
+ return -EINVAL;
+ hwq_attr->sginfo->npages = npages;
+
+ if (npages == MAX_PBL_LVL_0_PGS && !hwq_attr->sginfo->nopte) {
+ /* This request is Level 0, map PTE */
+ rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_0], hwq_attr->sginfo);
+ if (rc)
+ goto fail;
+ hwq->level = BNG_PBL_LVL_0;
+ goto done;
+ }
+
+ if (npages >= MAX_PBL_LVL_0_PGS) {
+ if (npages > MAX_PBL_LVL_1_PGS) {
+ u32 flag = PTU_PTE_VALID;
+ /* 2 levels of indirection */
+ npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT;
+ if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT))
+ npbl++;
+ npde = npbl >> MAX_PDL_LVL_SHIFT;
+ if (npbl % BIT(MAX_PDL_LVL_SHIFT))
+ npde++;
+ /* Alloc PDE pages */
+ sginfo.pgsize = npde * pg_size;
+ sginfo.npages = 1;
+ rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_0], &sginfo);
+ if (rc)
+ goto fail;
+
+ /* Alloc PBL pages */
+ sginfo.npages = npbl;
+ sginfo.pgsize = PAGE_SIZE;
+ rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_1], &sginfo);
+ if (rc)
+ goto fail;
+ /* Fill PDL with PBL page pointers */
+ dst_virt_ptr =
+ (dma_addr_t **)hwq->pbl[BNG_PBL_LVL_0].pg_arr;
+ src_phys_ptr = hwq->pbl[BNG_PBL_LVL_1].pg_map_arr;
+ for (i = 0; i < hwq->pbl[BNG_PBL_LVL_1].pg_count; i++)
+ dst_virt_ptr[0][i] = src_phys_ptr[i] | flag;
+
+ /* Alloc or init PTEs */
+ rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_2],
+ hwq_attr->sginfo);
+ if (rc)
+ goto fail;
+ hwq->level = BNG_PBL_LVL_2;
+ if (hwq_attr->sginfo->nopte)
+ goto done;
+ /* Fill PBLs with PTE pointers */
+ dst_virt_ptr =
+ (dma_addr_t **)hwq->pbl[BNG_PBL_LVL_1].pg_arr;
+ src_phys_ptr = hwq->pbl[BNG_PBL_LVL_2].pg_map_arr;
+ for (i = 0; i < hwq->pbl[BNG_PBL_LVL_2].pg_count; i++) {
+ dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+ src_phys_ptr[i] | PTU_PTE_VALID;
+ }
+ if (hwq_attr->type == BNG_HWQ_TYPE_QUEUE) {
+ /* Find the last pg of the size */
+ i = hwq->pbl[BNG_PBL_LVL_2].pg_count;
+ dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
+ PTU_PTE_LAST;
+ if (i > 1)
+ dst_virt_ptr[PTR_PG(i - 2)]
+ [PTR_IDX(i - 2)] |=
+ PTU_PTE_NEXT_TO_LAST;
+ }
+ } else { /* pages < 512 npbl = 1, npde = 0 */
+ u32 flag = PTU_PTE_VALID;
+
+ /* 1 level of indirection */
+ npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT;
+ if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT))
+ npbl++;
+ sginfo.npages = npbl;
+ sginfo.pgsize = PAGE_SIZE;
+ /* Alloc PBL page */
+ rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_0], &sginfo);
+ if (rc)
+ goto fail;
+ /* Alloc or init PTEs */
+ rc = bng_alloc_pbl(res, &hwq->pbl[BNG_PBL_LVL_1],
+ hwq_attr->sginfo);
+ if (rc)
+ goto fail;
+ hwq->level = BNG_PBL_LVL_1;
+ if (hwq_attr->sginfo->nopte)
+ goto done;
+ /* Fill PBL with PTE pointers */
+ dst_virt_ptr =
+ (dma_addr_t **)hwq->pbl[BNG_PBL_LVL_0].pg_arr;
+ src_phys_ptr = hwq->pbl[BNG_PBL_LVL_1].pg_map_arr;
+ for (i = 0; i < hwq->pbl[BNG_PBL_LVL_1].pg_count; i++)
+ dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+ src_phys_ptr[i] | flag;
+ if (hwq_attr->type == BNG_HWQ_TYPE_QUEUE) {
+ /* Find the last pg of the size */
+ i = hwq->pbl[BNG_PBL_LVL_1].pg_count;
+ dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
+ PTU_PTE_LAST;
+ if (i > 1)
+ dst_virt_ptr[PTR_PG(i - 2)]
+ [PTR_IDX(i - 2)] |=
+ PTU_PTE_NEXT_TO_LAST;
+ }
+ }
+ }
+done:
+ hwq->prod = 0;
+ hwq->cons = 0;
+ hwq->pdev = pdev;
+ hwq->depth = hwq_attr->depth;
+ hwq->max_elements = hwq->depth;
+ hwq->element_size = stride;
+ hwq->qe_ppg = pg_size / stride;
+ /* For direct access to the elements */
+ lvl = hwq->level;
+ if (hwq_attr->sginfo->nopte && hwq->level)
+ lvl = hwq->level - 1;
+ hwq->pbl_ptr = hwq->pbl[lvl].pg_arr;
+ hwq->pbl_dma_ptr = hwq->pbl[lvl].pg_map_arr;
+ spin_lock_init(&hwq->lock);
+
+ return 0;
+fail:
+ bng_re_free_hwq(res, hwq);
+ return -ENOMEM;
+}
diff --git a/drivers/infiniband/hw/bng_re/bng_res.h b/drivers/infiniband/hw/bng_re/bng_res.h
new file mode 100644
index 000000000000..9997f86d6a0e
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_res.h
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2025 Broadcom.
+
+#ifndef __BNG_RES_H__
+#define __BNG_RES_H__
+
+#include "roce_hsi.h"
+
+#define BNG_ROCE_FW_MAX_TIMEOUT 60
+
+#define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *))
+#define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1)
+#define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG)
+#define PTR_IDX(x) ((x) & PTR_MAX_IDX_PER_PG)
+
+#define HWQ_CMP(idx, hwq) ((idx) & ((hwq)->max_elements - 1))
+#define HWQ_FREE_SLOTS(hwq) (hwq->max_elements - \
+ ((HWQ_CMP(hwq->prod, hwq)\
+ - HWQ_CMP(hwq->cons, hwq))\
+ & (hwq->max_elements - 1)))
+
+#define MAX_PBL_LVL_0_PGS 1
+#define MAX_PBL_LVL_1_PGS 512
+#define MAX_PBL_LVL_1_PGS_SHIFT 9
+#define MAX_PBL_LVL_1_PGS_FOR_LVL_2 256
+#define MAX_PBL_LVL_2_PGS (256 * 512)
+#define MAX_PDL_LVL_SHIFT 9
+
+#define BNG_RE_DBR_VALID (0x1UL << 26)
+#define BNG_RE_DBR_EPOCH_SHIFT 24
+#define BNG_RE_DBR_TOGGLE_SHIFT 25
+
+#define BNG_MAX_TQM_ALLOC_REQ 48
+
+struct bng_re_reg_desc {
+ u8 bar_id;
+ resource_size_t bar_base;
+ unsigned long offset;
+ void __iomem *bar_reg;
+ size_t len;
+};
+
+struct bng_re_db_info {
+ void __iomem *db;
+ void __iomem *priv_db;
+ struct bng_re_hwq *hwq;
+ u32 xid;
+ u32 max_slot;
+ u32 flags;
+ u8 toggle;
+};
+
+enum bng_re_db_info_flags_mask {
+ BNG_RE_FLAG_EPOCH_CONS_SHIFT = 0x0UL,
+ BNG_RE_FLAG_EPOCH_PROD_SHIFT = 0x1UL,
+ BNG_RE_FLAG_EPOCH_CONS_MASK = 0x1UL,
+ BNG_RE_FLAG_EPOCH_PROD_MASK = 0x2UL,
+};
+
+enum bng_re_db_epoch_flag_shift {
+ BNG_RE_DB_EPOCH_CONS_SHIFT = BNG_RE_DBR_EPOCH_SHIFT,
+ BNG_RE_DB_EPOCH_PROD_SHIFT = (BNG_RE_DBR_EPOCH_SHIFT - 1),
+};
+
+struct bng_re_chip_ctx {
+ u16 chip_num;
+ u16 hw_stats_size;
+ u64 hwrm_intf_ver;
+ u16 hwrm_cmd_max_timeout;
+};
+
+struct bng_re_pbl {
+ u32 pg_count;
+ u32 pg_size;
+ void **pg_arr;
+ dma_addr_t *pg_map_arr;
+};
+
+enum bng_re_pbl_lvl {
+ BNG_PBL_LVL_0,
+ BNG_PBL_LVL_1,
+ BNG_PBL_LVL_2,
+ BNG_PBL_LVL_MAX
+};
+
+enum bng_re_hwq_type {
+ BNG_HWQ_TYPE_CTX,
+ BNG_HWQ_TYPE_QUEUE
+};
+
+struct bng_re_sg_info {
+ u32 npages;
+ u32 pgshft;
+ u32 pgsize;
+ bool nopte;
+};
+
+struct bng_re_hwq_attr {
+ struct bng_re_res *res;
+ struct bng_re_sg_info *sginfo;
+ enum bng_re_hwq_type type;
+ u32 depth;
+ u32 stride;
+ u32 aux_stride;
+ u32 aux_depth;
+};
+
+struct bng_re_hwq {
+ struct pci_dev *pdev;
+ /* lock to protect hwq */
+ spinlock_t lock;
+ struct bng_re_pbl pbl[BNG_PBL_LVL_MAX + 1];
+ /* Valid values: 0, 1, 2 */
+ enum bng_re_pbl_lvl level;
+ /* PBL entries */
+ void **pbl_ptr;
+ /* PBL dma_addr */
+ dma_addr_t *pbl_dma_ptr;
+ u32 max_elements;
+ u32 depth;
+ u16 element_size;
+ u32 prod;
+ u32 cons;
+ /* queue entry per page */
+ u16 qe_ppg;
+};
+
+struct bng_re_stats {
+ dma_addr_t dma_map;
+ void *dma;
+ u32 size;
+ u32 fw_id;
+};
+
+struct bng_re_res {
+ struct pci_dev *pdev;
+ struct bng_re_chip_ctx *cctx;
+ struct bng_re_dev_attr *dattr;
+};
+
+static inline void *bng_re_get_qe(struct bng_re_hwq *hwq,
+ u32 indx, u64 *pg)
+{
+ u32 pg_num, pg_idx;
+
+ pg_num = (indx / hwq->qe_ppg);
+ pg_idx = (indx % hwq->qe_ppg);
+ if (pg)
+ *pg = (u64)&hwq->pbl_ptr[pg_num];
+ return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx);
+}
+
+#define BNG_RE_INIT_DBHDR(xid, type, indx, toggle) \
+ (((u64)(((xid) & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | \
+ (type) | BNG_RE_DBR_VALID) << 32) | (indx) | \
+ (((u32)(toggle)) << (BNG_RE_DBR_TOGGLE_SHIFT)))
+
+static inline void bng_re_ring_db(struct bng_re_db_info *info,
+ u32 type)
+{
+ u64 key = 0;
+ u32 indx;
+ u8 toggle = 0;
+
+ if (type == DBC_DBC_TYPE_CQ_ARMALL ||
+ type == DBC_DBC_TYPE_CQ_ARMSE)
+ toggle = info->toggle;
+
+ indx = (info->hwq->cons & DBC_DBC_INDEX_MASK) |
+ ((info->flags & BNG_RE_FLAG_EPOCH_CONS_MASK) <<
+ BNG_RE_DB_EPOCH_CONS_SHIFT);
+
+ key = BNG_RE_INIT_DBHDR(info->xid, type, indx, toggle);
+ writeq(key, info->db);
+}
+
+static inline void bng_re_ring_nq_db(struct bng_re_db_info *info,
+ struct bng_re_chip_ctx *cctx,
+ bool arm)
+{
+ u32 type;
+
+ type = arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ;
+ bng_re_ring_db(info, type);
+}
+
+static inline void bng_re_hwq_incr_cons(u32 max_elements, u32 *cons, u32 cnt,
+ u32 *dbinfo_flags)
+{
+ /* move cons and update toggle/epoch if wrap around */
+ *cons += cnt;
+ if (*cons >= max_elements) {
+ *cons %= max_elements;
+ *dbinfo_flags ^= 1UL << BNG_RE_FLAG_EPOCH_CONS_SHIFT;
+ }
+}
+
+static inline bool _is_max_srq_ext_supported(u16 dev_cap_ext_flags_2)
+{
+ return !!(dev_cap_ext_flags_2 & CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED);
+}
+
+void bng_re_free_hwq(struct bng_re_res *res,
+ struct bng_re_hwq *hwq);
+
+int bng_re_alloc_init_hwq(struct bng_re_hwq *hwq,
+ struct bng_re_hwq_attr *hwq_attr);
+
+void bng_re_free_stats_ctx_mem(struct pci_dev *pdev,
+ struct bng_re_stats *stats);
+
+int bng_re_alloc_stats_ctx_mem(struct pci_dev *pdev,
+ struct bng_re_chip_ctx *cctx,
+ struct bng_re_stats *stats);
+#endif
diff --git a/drivers/infiniband/hw/bng_re/bng_sp.c b/drivers/infiniband/hw/bng_re/bng_sp.c
new file mode 100644
index 000000000000..83099e05328d
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_sp.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2025 Broadcom.
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+
+#include "bng_res.h"
+#include "bng_fw.h"
+#include "bng_sp.h"
+#include "bng_tlv.h"
+
+static bool bng_re_is_atomic_cap(struct bng_re_rcfw *rcfw)
+{
+ u16 pcie_ctl2 = 0;
+
+ pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2, &pcie_ctl2);
+ return (pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ);
+}
+
+static void bng_re_query_version(struct bng_re_rcfw *rcfw,
+ char *fw_ver)
+{
+ struct creq_query_version_resp resp = {};
+ struct bng_re_cmdqmsg msg = {};
+ struct cmdq_query_version req = {};
+ int rc;
+
+ bng_re_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_QUERY_VERSION,
+ sizeof(req));
+
+ bng_re_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bng_re_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return;
+ fw_ver[0] = resp.fw_maj;
+ fw_ver[1] = resp.fw_minor;
+ fw_ver[2] = resp.fw_bld;
+ fw_ver[3] = resp.fw_rsvd;
+}
+
+int bng_re_get_dev_attr(struct bng_re_rcfw *rcfw)
+{
+ struct bng_re_dev_attr *attr = rcfw->res->dattr;
+ struct creq_query_func_resp resp = {};
+ struct bng_re_cmdqmsg msg = {};
+ struct creq_query_func_resp_sb *sb;
+ struct bng_re_rcfw_sbuf sbuf;
+ struct cmdq_query_func req = {};
+ u8 *tqm_alloc;
+ int i, rc;
+ u32 temp;
+
+ bng_re_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_QUERY_FUNC,
+ sizeof(req));
+
+ sbuf.size = ALIGN(sizeof(*sb), BNG_FW_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+ sb = sbuf.sb;
+ req.resp_size = sbuf.size / BNG_FW_CMDQE_UNITS;
+ bng_re_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bng_re_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ goto bail;
+ /* Extract the context from the side buffer */
+ attr->max_qp = le32_to_cpu(sb->max_qp);
+ /* max_qp value reported by FW doesn't include the QP1 */
+ attr->max_qp += 1;
+ attr->max_qp_rd_atom =
+ sb->max_qp_rd_atom > BNG_RE_MAX_OUT_RD_ATOM ?
+ BNG_RE_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom;
+ attr->max_qp_init_rd_atom =
+ sb->max_qp_init_rd_atom > BNG_RE_MAX_OUT_RD_ATOM ?
+ BNG_RE_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom;
+ attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr) - 1;
+
+ /* Adjust for max_qp_wqes for variable wqe */
+ attr->max_qp_wqes = min_t(u32, attr->max_qp_wqes, BNG_VAR_MAX_WQE - 1);
+
+ attr->max_qp_sges = min_t(u32, sb->max_sge_var_wqe, BNG_VAR_MAX_SGE);
+ attr->max_cq = le32_to_cpu(sb->max_cq);
+ attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
+ attr->max_cq_sges = attr->max_qp_sges;
+ attr->max_mr = le32_to_cpu(sb->max_mr);
+ attr->max_mw = le32_to_cpu(sb->max_mw);
+
+ attr->max_mr_size = le64_to_cpu(sb->max_mr_size);
+ attr->max_pd = 64 * 1024;
+ attr->max_raw_ethy_qp = le32_to_cpu(sb->max_raw_eth_qp);
+ attr->max_ah = le32_to_cpu(sb->max_ah);
+
+ attr->max_srq = le16_to_cpu(sb->max_srq);
+ attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
+ attr->max_srq_sges = sb->max_srq_sge;
+ attr->max_pkey = 1;
+ attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
+ /*
+ * Read the max gid supported by HW.
+ * For each entry in HW GID in HW table, we consume 2
+ * GID entries in the kernel GID table. So max_gid reported
+ * to stack can be up to twice the value reported by the HW, up to 256 gids.
+ */
+ attr->max_sgid = le32_to_cpu(sb->max_gid);
+ attr->max_sgid = min_t(u32, BNG_RE_NUM_GIDS_SUPPORTED, 2 * attr->max_sgid);
+ attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags);
+ attr->dev_cap_flags2 = le16_to_cpu(sb->dev_cap_ext_flags_2);
+
+ if (_is_max_srq_ext_supported(attr->dev_cap_flags2))
+ attr->max_srq += le16_to_cpu(sb->max_srq_ext);
+
+ bng_re_query_version(rcfw, attr->fw_ver);
+ for (i = 0; i < BNG_MAX_TQM_ALLOC_REQ / 4; i++) {
+ temp = le32_to_cpu(sb->tqm_alloc_reqs[i]);
+ tqm_alloc = (u8 *)&temp;
+ attr->tqm_alloc_reqs[i * 4] = *tqm_alloc;
+ attr->tqm_alloc_reqs[i * 4 + 1] = *(++tqm_alloc);
+ attr->tqm_alloc_reqs[i * 4 + 2] = *(++tqm_alloc);
+ attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
+ }
+
+ attr->max_dpi = le32_to_cpu(sb->max_dpi);
+ attr->is_atomic = bng_re_is_atomic_cap(rcfw);
+bail:
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
+ return rc;
+}
diff --git a/drivers/infiniband/hw/bng_re/bng_sp.h b/drivers/infiniband/hw/bng_re/bng_sp.h
new file mode 100644
index 000000000000..e15190515ed1
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_sp.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2025 Broadcom.
+
+#ifndef __BNG_SP_H__
+#define __BNG_SP_H__
+
+#include "bng_fw.h"
+
+#define BNG_VAR_MAX_WQE 4352
+#define BNG_VAR_MAX_SGE 13
+
+struct bng_re_dev_attr {
+#define FW_VER_ARR_LEN 4
+ u8 fw_ver[FW_VER_ARR_LEN];
+#define BNG_RE_NUM_GIDS_SUPPORTED 256
+ u16 max_sgid;
+ u16 max_mrw;
+ u32 max_qp;
+#define BNG_RE_MAX_OUT_RD_ATOM 126
+ u32 max_qp_rd_atom;
+ u32 max_qp_init_rd_atom;
+ u32 max_qp_wqes;
+ u32 max_qp_sges;
+ u32 max_cq;
+ u32 max_cq_wqes;
+ u32 max_cq_sges;
+ u32 max_mr;
+ u64 max_mr_size;
+ u32 max_pd;
+ u32 max_mw;
+ u32 max_raw_ethy_qp;
+ u32 max_ah;
+ u32 max_srq;
+ u32 max_srq_wqes;
+ u32 max_srq_sges;
+ u32 max_pkey;
+ u32 max_inline_data;
+ u32 l2_db_size;
+ u8 tqm_alloc_reqs[BNG_MAX_TQM_ALLOC_REQ];
+ bool is_atomic;
+ u16 dev_cap_flags;
+ u16 dev_cap_flags2;
+ u32 max_dpi;
+};
+
+int bng_re_get_dev_attr(struct bng_re_rcfw *rcfw);
+#endif
diff --git a/drivers/infiniband/hw/bng_re/bng_tlv.h b/drivers/infiniband/hw/bng_re/bng_tlv.h
new file mode 100644
index 000000000000..278f4922962d
--- /dev/null
+++ b/drivers/infiniband/hw/bng_re/bng_tlv.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+#ifndef __BNG_TLV_H__
+#define __BNG_TLV_H__
+
+#include "roce_hsi.h"
+
+struct roce_tlv {
+ struct tlv tlv;
+ u8 total_size; // in units of 16 byte chunks
+ u8 unused[7]; // for 16 byte alignment
+};
+
+/*
+ * TLV size in units of 16 byte chunks
+ */
+#define TLV_SIZE ((sizeof(struct roce_tlv) + 15) / 16)
+/*
+ * TLV length in bytes
+ */
+#define TLV_BYTES (TLV_SIZE * 16)
+
+#define HAS_TLV_HEADER(msg) (le16_to_cpu(((struct tlv *)(msg))->cmd_discr) == CMD_DISCR_TLV_ENCAP)
+#define GET_TLV_DATA(tlv) ((void *)&((uint8_t *)(tlv))[TLV_BYTES])
+
+static inline u8 __get_cmdq_base_opcode(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->opcode;
+ else
+ return req->opcode;
+}
+
+static inline void __set_cmdq_base_opcode(struct cmdq_base *req,
+ u32 size, u8 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->opcode = val;
+ else
+ req->opcode = val;
+}
+
+static inline __le16 __get_cmdq_base_cookie(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->cookie;
+ else
+ return req->cookie;
+}
+
+static inline void __set_cmdq_base_cookie(struct cmdq_base *req,
+ u32 size, __le16 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->cookie = val;
+ else
+ req->cookie = val;
+}
+
+static inline __le64 __get_cmdq_base_resp_addr(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->resp_addr;
+ else
+ return req->resp_addr;
+}
+
+static inline void __set_cmdq_base_resp_addr(struct cmdq_base *req,
+ u32 size, __le64 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->resp_addr = val;
+ else
+ req->resp_addr = val;
+}
+
+static inline u8 __get_cmdq_base_resp_size(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->resp_size;
+ else
+ return req->resp_size;
+}
+
+static inline void __set_cmdq_base_resp_size(struct cmdq_base *req,
+ u32 size, u8 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->resp_size = val;
+ else
+ req->resp_size = val;
+}
+
+static inline u8 __get_cmdq_base_cmd_size(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct roce_tlv *)(req))->total_size;
+ else
+ return req->cmd_size;
+}
+
+static inline void __set_cmdq_base_cmd_size(struct cmdq_base *req,
+ u32 size, u8 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->cmd_size = val;
+ else
+ req->cmd_size = val;
+}
+
+static inline __le16 __get_cmdq_base_flags(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->flags;
+ else
+ return req->flags;
+}
+
+static inline void __set_cmdq_base_flags(struct cmdq_base *req,
+ u32 size, __le16 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->flags = val;
+ else
+ req->flags = val;
+}
+
+#endif /* __BNG_TLV_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig
new file mode 100644
index 000000000000..6a17f5cdb020
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/Kconfig
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config INFINIBAND_BNXT_RE
+ tristate "Broadcom Netxtreme HCA support"
+ depends on 64BIT
+ depends on INET && DCB && BNXT
+ help
+ This driver supports Broadcom NetXtreme-E 10/25/40/50 gigabit
+ RoCE HCAs. To compile this driver as a module, choose M here:
+ the module will be called bnxt_re.
diff --git a/drivers/infiniband/hw/bnxt_re/Makefile b/drivers/infiniband/hw/bnxt_re/Makefile
new file mode 100644
index 000000000000..f63417d2ccc6
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-y := -I $(srctree)/drivers/net/ethernet/broadcom/bnxt
+obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re.o
+bnxt_re-y := main.o ib_verbs.o \
+ qplib_res.o qplib_rcfw.o \
+ qplib_sp.o qplib_fp.o hw_counters.o \
+ debugfs.o
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
new file mode 100644
index 000000000000..3a7ce4729fcf
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -0,0 +1,292 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Slow Path Operators (header)
+ *
+ */
+
+#ifndef __BNXT_RE_H__
+#define __BNXT_RE_H__
+#include <rdma/uverbs_ioctl.h>
+#include "hw_counters.h"
+#include <linux/hashtable.h>
+#define ROCE_DRV_MODULE_NAME "bnxt_re"
+
+#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver"
+
+#define BNXT_RE_PAGE_SHIFT_1G (30)
+#define BNXT_RE_PAGE_SIZE_SUPPORTED 0x7FFFF000 /* 4kb - 1G */
+
+#define BNXT_RE_MAX_MR_SIZE_LOW BIT_ULL(BNXT_RE_PAGE_SHIFT_1G)
+#define BNXT_RE_MAX_MR_SIZE_HIGH BIT_ULL(39)
+#define BNXT_RE_MAX_MR_SIZE BNXT_RE_MAX_MR_SIZE_HIGH
+
+
+/* Number of MRs to reserve for PF, leaving remainder for VFs */
+#define BNXT_RE_RESVD_MR_FOR_PF (32 * 1024)
+#define BNXT_RE_MAX_GID_PER_VF 128
+
+/*
+ * Percentage of resources of each type reserved for PF.
+ * Remaining resources are divided equally among VFs.
+ * [0, 100]
+ */
+#define BNXT_RE_PCT_RSVD_FOR_PF 50
+
+#define BNXT_RE_UD_QP_HW_STALL 0x400000
+
+#define BNXT_RE_RQ_WQE_THRESHOLD 32
+
+/*
+ * Setting the default ack delay value to 16, which means
+ * the default timeout is approx. 260ms(4 usec * 2 ^(timeout))
+ */
+
+#define BNXT_RE_DEFAULT_ACK_DELAY 16
+
+struct bnxt_re_ring_attr {
+ dma_addr_t *dma_arr;
+ int pages;
+ int type;
+ u32 depth;
+ u32 lrid; /* Logical ring id */
+ u8 mode;
+};
+
+/*
+ * Data structure and defines to handle
+ * recovery
+ */
+#define BNXT_RE_PRE_RECOVERY_REMOVE 0x1
+#define BNXT_RE_COMPLETE_REMOVE 0x2
+#define BNXT_RE_POST_RECOVERY_INIT 0x4
+#define BNXT_RE_COMPLETE_INIT 0x8
+
+struct bnxt_re_sqp_entries {
+ struct bnxt_qplib_sge sge;
+ u64 wrid;
+ /* For storing the actual qp1 cqe */
+ struct bnxt_qplib_cqe cqe;
+ struct bnxt_re_qp *qp1_qp;
+};
+
+#define BNXT_RE_MAX_GSI_SQP_ENTRIES 1024
+struct bnxt_re_gsi_context {
+ struct bnxt_re_qp *gsi_qp;
+ struct bnxt_re_qp *gsi_sqp;
+ struct bnxt_re_ah *gsi_sah;
+ struct bnxt_re_sqp_entries *sqp_tbl;
+};
+
+struct bnxt_re_en_dev_info {
+ struct bnxt_en_dev *en_dev;
+ struct bnxt_re_dev *rdev;
+};
+
+#define BNXT_RE_AEQ_IDX 0
+#define BNXT_RE_NQ_IDX 1
+#define BNXT_RE_GEN_P5_MAX_VF 64
+
+struct bnxt_re_pacing {
+ u64 dbr_db_fifo_reg_off;
+ void *dbr_page;
+ u64 dbr_bar_addr;
+ u32 pacing_algo_th;
+ u32 do_pacing_save;
+ u32 dbq_pacing_time; /* ms */
+ u32 dbr_def_do_pacing;
+ bool dbr_pacing;
+ struct mutex dbq_lock; /* synchronize db pacing algo */
+};
+
+#define BNXT_RE_MAX_DBR_DO_PACING 0xFFFF
+#define BNXT_RE_DBR_PACING_TIME 5 /* ms */
+#define BNXT_RE_PACING_ALGO_THRESHOLD 250 /* Entries in DB FIFO */
+#define BNXT_RE_PACING_ALARM_TH_MULTIPLE 2 /* Multiple of pacing algo threshold */
+/* Default do_pacing value when there is no congestion */
+#define BNXT_RE_DBR_DO_PACING_NO_CONGESTION 0x7F /* 1 in 512 probability */
+
+#define BNXT_RE_MAX_FIFO_DEPTH_P5 0x2c00
+#define BNXT_RE_MAX_FIFO_DEPTH_P7 0x8000
+
+#define BNXT_RE_MAX_FIFO_DEPTH(ctx) \
+ (bnxt_qplib_is_chip_gen_p7((ctx)) ? \
+ BNXT_RE_MAX_FIFO_DEPTH_P7 :\
+ BNXT_RE_MAX_FIFO_DEPTH_P5)
+
+#define BNXT_RE_GRC_FIFO_REG_BASE 0x2000
+
+#define BNXT_RE_MIN_MSIX 2
+#define BNXT_RE_MAX_MSIX BNXT_MAX_ROCE_MSIX
+struct bnxt_re_nq_record {
+ struct bnxt_msix_entry msix_entries[BNXT_RE_MAX_MSIX];
+ struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX];
+ int num_msix;
+ /* serialize NQ access */
+ struct mutex load_lock;
+};
+
+#define MAX_CQ_HASH_BITS (16)
+#define MAX_SRQ_HASH_BITS (16)
+
+static inline bool bnxt_re_chip_gen_p7(u16 chip_num)
+{
+ return (chip_num == CHIP_NUM_58818 ||
+ chip_num == CHIP_NUM_57608);
+}
+
+struct bnxt_re_dev {
+ struct ib_device ibdev;
+ struct list_head list;
+ unsigned long flags;
+#define BNXT_RE_FLAG_NETDEV_REGISTERED 0
+#define BNXT_RE_FLAG_STATS_CTX3_ALLOC 1
+#define BNXT_RE_FLAG_HAVE_L2_REF 3
+#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
+#define BNXT_RE_FLAG_RESOURCES_ALLOCATED 7
+#define BNXT_RE_FLAG_RESOURCES_INITIALIZED 8
+#define BNXT_RE_FLAG_ERR_DEVICE_DETACHED 17
+#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
+ struct net_device *netdev;
+ struct auxiliary_device *adev;
+ unsigned int version, major, minor;
+ struct bnxt_qplib_chip_ctx *chip_ctx;
+ struct bnxt_en_dev *en_dev;
+
+ int id;
+
+ /* RCFW Channel */
+ struct bnxt_qplib_rcfw rcfw;
+
+ /* NQ record */
+ struct bnxt_re_nq_record *nqr;
+
+ /* Device Resources */
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_qplib_ctx qplib_ctx;
+ struct bnxt_qplib_res qplib_res;
+ struct bnxt_qplib_dpi dpi_privileged;
+ struct bnxt_qplib_cq_coal_param cq_coalescing;
+
+ struct mutex qp_lock; /* protect qp list */
+ struct list_head qp_list;
+
+ /* Max of 2 lossless traffic class supported per port */
+ u16 cosq[2];
+
+ /* QP for handling QP1 packets */
+ struct bnxt_re_gsi_context gsi_ctx;
+ struct bnxt_re_stats stats;
+ atomic_t nq_alloc_cnt;
+ u32 is_virtfn;
+ u32 num_vfs;
+ struct bnxt_re_pacing pacing;
+ struct work_struct dbq_fifo_check_work;
+ struct delayed_work dbq_pacing_work;
+ DECLARE_HASHTABLE(cq_hash, MAX_CQ_HASH_BITS);
+ DECLARE_HASHTABLE(srq_hash, MAX_SRQ_HASH_BITS);
+ struct dentry *dbg_root;
+ struct dentry *qp_debugfs;
+ unsigned long event_bitmap;
+ struct bnxt_qplib_cc_param cc_param;
+ struct workqueue_struct *dcb_wq;
+ struct dentry *cc_config;
+ struct bnxt_re_dbg_cc_config_params *cc_config_params;
+ struct dentry *cq_coal_cfg;
+ struct bnxt_re_dbg_cq_coal_params *cq_coal_cfg_params;
+#define BNXT_VPD_FLD_LEN 32
+ char board_partno[BNXT_VPD_FLD_LEN];
+ /* RoCE mirror */
+ u16 mirror_vnic_id;
+ union ib_gid ugid;
+ u32 ugid_index;
+ u8 sniffer_flow_created : 1;
+};
+
+#define to_bnxt_re_dev(ptr, member) \
+ container_of((ptr), struct bnxt_re_dev, member)
+
+#define BNXT_RE_ROCE_V1_PACKET 0
+#define BNXT_RE_ROCEV2_IPV4_PACKET 2
+#define BNXT_RE_ROCEV2_IPV6_PACKET 3
+
+#define BNXT_RE_CHECK_RC(x) ((x) && ((x) != -ETIMEDOUT))
+void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev);
+
+int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad);
+int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev,
+ struct ib_mad *out_mad);
+
+void bnxt_re_hwrm_free_vnic(struct bnxt_re_dev *rdev);
+int bnxt_re_hwrm_alloc_vnic(struct bnxt_re_dev *rdev);
+int bnxt_re_hwrm_cfg_vnic(struct bnxt_re_dev *rdev, u32 qp_id);
+
+static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
+{
+ if (rdev)
+ return &rdev->ibdev.dev;
+ return NULL;
+}
+
+extern const struct uapi_definition bnxt_re_uapi_defs[];
+
+static inline void bnxt_re_set_pacing_dev_state(struct bnxt_re_dev *rdev)
+{
+ rdev->qplib_res.pacing_data->dev_err_state =
+ test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
+}
+
+static inline int bnxt_re_read_context_allowed(struct bnxt_re_dev *rdev)
+{
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ||
+ rdev->rcfw.res->cctx->hwrm_intf_ver < HWRM_VERSION_READ_CTX)
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+#define BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P5 1088
+#define BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P5 128
+#define BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P5 128
+#define BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P5 192
+
+#define BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P7 1088
+#define BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P7 192
+#define BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P7 192
+#define BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P7 192
+
+#define BNXT_RE_HWRM_CMD_TIMEOUT(rdev) \
+ ((rdev)->chip_ctx->hwrm_cmd_max_timeout * 1000)
+
+#endif
diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.c b/drivers/infiniband/hw/bnxt_re/debugfs.c
new file mode 100644
index 000000000000..88817c86ae24
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/debugfs.c
@@ -0,0 +1,524 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright (c) 2024, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * Description: Debugfs component of the bnxt_re driver
+ */
+
+#include <linux/debugfs.h>
+#include <linux/pci.h>
+#include <linux/seq_file.h>
+#include <rdma/ib_addr.h>
+
+#include "bnxt_ulp.h"
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_sp.h"
+#include "qplib_fp.h"
+#include "qplib_rcfw.h"
+#include "bnxt_re.h"
+#include "ib_verbs.h"
+#include "debugfs.h"
+
+static struct dentry *bnxt_re_debugfs_root;
+
+static const char * const bnxt_re_cq_coal_str[] = {
+ "buf_maxtime",
+ "normal_maxbuf",
+ "during_maxbuf",
+ "en_ring_idle_mode",
+ "enable",
+};
+
+static const char * const bnxt_re_cc_gen0_name[] = {
+ "enable_cc",
+ "run_avg_weight_g",
+ "num_phase_per_state",
+ "init_cr",
+ "init_tr",
+ "tos_ecn",
+ "tos_dscp",
+ "alt_vlan_pcp",
+ "alt_vlan_dscp",
+ "rtt",
+ "cc_mode",
+ "tcp_cp",
+ "tx_queue",
+ "inactivity_cp",
+};
+
+static inline const char *bnxt_re_qp_state_str(u8 state)
+{
+ switch (state) {
+ case CMDQ_MODIFY_QP_NEW_STATE_RESET:
+ return "RST";
+ case CMDQ_MODIFY_QP_NEW_STATE_INIT:
+ return "INIT";
+ case CMDQ_MODIFY_QP_NEW_STATE_RTR:
+ return "RTR";
+ case CMDQ_MODIFY_QP_NEW_STATE_RTS:
+ return "RTS";
+ case CMDQ_MODIFY_QP_NEW_STATE_SQE:
+ return "SQER";
+ case CMDQ_MODIFY_QP_NEW_STATE_SQD:
+ return "SQD";
+ case CMDQ_MODIFY_QP_NEW_STATE_ERR:
+ return "ERR";
+ default:
+ return "Invalid QP state";
+ }
+}
+
+static inline const char *bnxt_re_qp_type_str(u8 type)
+{
+ switch (type) {
+ case CMDQ_CREATE_QP1_TYPE_GSI: return "QP1";
+ case CMDQ_CREATE_QP_TYPE_GSI: return "QP1";
+ case CMDQ_CREATE_QP_TYPE_RC: return "RC";
+ case CMDQ_CREATE_QP_TYPE_UD: return "UD";
+ case CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE: return "RAW_ETHERTYPE";
+ default: return "Invalid transport type";
+ }
+}
+
+static ssize_t qp_info_read(struct file *filep,
+ char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct bnxt_re_qp *qp = filep->private_data;
+ char *buf;
+ int len;
+
+ if (*ppos)
+ return 0;
+
+ buf = kasprintf(GFP_KERNEL,
+ "QPN\t\t: %d\n"
+ "transport\t: %s\n"
+ "state\t\t: %s\n"
+ "mtu\t\t: %d\n"
+ "timeout\t\t: %d\n"
+ "remote QPN\t: %d\n",
+ qp->qplib_qp.id,
+ bnxt_re_qp_type_str(qp->qplib_qp.type),
+ bnxt_re_qp_state_str(qp->qplib_qp.state),
+ qp->qplib_qp.mtu,
+ qp->qplib_qp.timeout,
+ qp->qplib_qp.dest_qpn);
+ if (!buf)
+ return -ENOMEM;
+ if (count < strlen(buf)) {
+ kfree(buf);
+ return -ENOSPC;
+ }
+ len = simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+ kfree(buf);
+ return len;
+}
+
+static const struct file_operations debugfs_qp_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = qp_info_read,
+};
+
+void bnxt_re_debug_add_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp)
+{
+ char resn[32];
+
+ sprintf(resn, "0x%x", qp->qplib_qp.id);
+ qp->dentry = debugfs_create_file(resn, 0400, rdev->qp_debugfs, qp, &debugfs_qp_fops);
+}
+
+void bnxt_re_debug_rem_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp)
+{
+ debugfs_remove(qp->dentry);
+}
+
+static int map_cc_config_offset_gen0_ext0(u32 offset, struct bnxt_qplib_cc_param *ccparam, u32 *val)
+{
+ u64 map_offset;
+
+ map_offset = BIT(offset);
+
+ switch (map_offset) {
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC:
+ *val = ccparam->enable;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G:
+ *val = ccparam->g;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE:
+ *val = ccparam->nph_per_state;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR:
+ *val = ccparam->init_cr;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR:
+ *val = ccparam->init_tr;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN:
+ *val = ccparam->tos_ecn;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP:
+ *val = ccparam->tos_dscp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP:
+ *val = ccparam->alt_vlan_pcp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP:
+ *val = ccparam->alt_tos_dscp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT:
+ *val = ccparam->rtt;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE:
+ *val = ccparam->cc_mode;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP:
+ *val = ccparam->tcp_cp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP:
+ *val = ccparam->inact_th;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static ssize_t bnxt_re_cc_config_get(struct file *filp, char __user *buffer,
+ size_t usr_buf_len, loff_t *ppos)
+{
+ struct bnxt_re_cc_param *dbg_cc_param = filp->private_data;
+ struct bnxt_re_dev *rdev = dbg_cc_param->rdev;
+ struct bnxt_qplib_cc_param ccparam = {};
+ u32 offset = dbg_cc_param->offset;
+ char buf[16];
+ u32 val;
+ int rc;
+
+ rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &ccparam);
+ if (rc)
+ return rc;
+
+ rc = map_cc_config_offset_gen0_ext0(offset, &ccparam, &val);
+ if (rc)
+ return rc;
+
+ rc = snprintf(buf, sizeof(buf), "%d\n", val);
+ if (rc < 0)
+ return rc;
+
+ return simple_read_from_buffer(buffer, usr_buf_len, ppos, (u8 *)(buf), rc);
+}
+
+static int bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offset, u32 val)
+{
+ u32 modify_mask;
+
+ modify_mask = BIT(offset);
+
+ switch (modify_mask) {
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC:
+ ccparam->enable = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G:
+ ccparam->g = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE:
+ ccparam->nph_per_state = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR:
+ ccparam->init_cr = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR:
+ ccparam->init_tr = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN:
+ ccparam->tos_ecn = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP:
+ ccparam->tos_dscp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP:
+ ccparam->alt_vlan_pcp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP:
+ ccparam->alt_tos_dscp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT:
+ ccparam->rtt = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE:
+ ccparam->cc_mode = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP:
+ ccparam->tcp_cp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TX_QUEUE:
+ return -EOPNOTSUPP;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP:
+ ccparam->inact_th = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TIME_PER_PHASE:
+ ccparam->time_pph = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_PKTS_PER_PHASE:
+ ccparam->pkts_pph = val;
+ break;
+ }
+
+ ccparam->mask = modify_mask;
+ return 0;
+}
+
+static int bnxt_re_configure_cc(struct bnxt_re_dev *rdev, u32 gen_ext, u32 offset, u32 val)
+{
+ struct bnxt_qplib_cc_param ccparam = { };
+ int rc;
+
+ if (gen_ext != CC_CONFIG_GEN0_EXT0)
+ return -EOPNOTSUPP;
+
+ rc = bnxt_re_fill_gen0_ext0(&ccparam, offset, val);
+ if (rc)
+ return rc;
+
+ bnxt_qplib_modify_cc(&rdev->qplib_res, &ccparam);
+ return 0;
+}
+
+static ssize_t bnxt_re_cc_config_set(struct file *filp, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct bnxt_re_cc_param *dbg_cc_param = filp->private_data;
+ struct bnxt_re_dev *rdev = dbg_cc_param->rdev;
+ u32 offset = dbg_cc_param->offset;
+ u8 cc_gen = dbg_cc_param->cc_gen;
+ char buf[16];
+ u32 val;
+ int rc;
+
+ if (count >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(buf, buffer, count))
+ return -EFAULT;
+
+ buf[count] = '\0';
+ if (kstrtou32(buf, 0, &val))
+ return -EINVAL;
+
+ rc = bnxt_re_configure_cc(rdev, cc_gen, offset, val);
+ return rc ? rc : count;
+}
+
+static const struct file_operations bnxt_re_cc_config_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = bnxt_re_cc_config_get,
+ .write = bnxt_re_cc_config_set,
+};
+
+static int info_show(struct seq_file *m, void *unused)
+{
+ struct bnxt_re_dev *rdev = m->private;
+ struct bnxt_re_res_cntrs *res_s = &rdev->stats.res;
+
+ seq_puts(m, "Info:\n");
+ seq_printf(m, "Device Name\t\t: %s\n", dev_name(&rdev->ibdev.dev));
+ seq_printf(m, "PD Watermark\t\t: %llu\n", res_s->pd_watermark);
+ seq_printf(m, "AH Watermark\t\t: %llu\n", res_s->ah_watermark);
+ seq_printf(m, "QP Watermark\t\t: %llu\n", res_s->qp_watermark);
+ seq_printf(m, "RC QP Watermark\t\t: %llu\n", res_s->rc_qp_watermark);
+ seq_printf(m, "UD QP Watermark\t\t: %llu\n", res_s->ud_qp_watermark);
+ seq_printf(m, "SRQ Watermark\t\t: %llu\n", res_s->srq_watermark);
+ seq_printf(m, "CQ Watermark\t\t: %llu\n", res_s->cq_watermark);
+ seq_printf(m, "MR Watermark\t\t: %llu\n", res_s->mr_watermark);
+ seq_printf(m, "MW Watermark\t\t: %llu\n", res_s->mw_watermark);
+ seq_printf(m, "CQ Resize Count\t\t: %d\n", atomic_read(&res_s->resize_count));
+ if (rdev->pacing.dbr_pacing) {
+ seq_printf(m, "DB Pacing Reschedule\t: %llu\n", rdev->stats.pacing.resched);
+ seq_printf(m, "DB Pacing Complete\t: %llu\n", rdev->stats.pacing.complete);
+ seq_printf(m, "DB Pacing Alerts\t: %llu\n", rdev->stats.pacing.alerts);
+ seq_printf(m, "DB FIFO Register\t: 0x%x\n",
+ readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off));
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(info);
+
+static void bnxt_re_debugfs_add_info(struct bnxt_re_dev *rdev)
+{
+ debugfs_create_file("info", 0400, rdev->dbg_root, rdev, &info_fops);
+}
+
+static ssize_t cq_coal_cfg_write(struct file *file,
+ const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct seq_file *s = file->private_data;
+ struct bnxt_re_cq_coal_param *param = s->private;
+ struct bnxt_re_dev *rdev = param->rdev;
+ int offset = param->offset;
+ char lbuf[16] = { };
+ u32 val;
+
+ if (count > sizeof(lbuf))
+ return -EINVAL;
+
+ if (copy_from_user(lbuf, buf, count))
+ return -EFAULT;
+
+ lbuf[sizeof(lbuf) - 1] = '\0';
+
+ if (kstrtou32(lbuf, 0, &val))
+ return -EINVAL;
+
+ switch (offset) {
+ case BNXT_RE_COAL_CQ_BUF_MAXTIME:
+ if (val < 1 || val > BNXT_QPLIB_CQ_COAL_MAX_BUF_MAXTIME)
+ return -EINVAL;
+ rdev->cq_coalescing.buf_maxtime = val;
+ break;
+ case BNXT_RE_COAL_CQ_NORMAL_MAXBUF:
+ if (val < 1 || val > BNXT_QPLIB_CQ_COAL_MAX_NORMAL_MAXBUF)
+ return -EINVAL;
+ rdev->cq_coalescing.normal_maxbuf = val;
+ break;
+ case BNXT_RE_COAL_CQ_DURING_MAXBUF:
+ if (val < 1 || val > BNXT_QPLIB_CQ_COAL_MAX_DURING_MAXBUF)
+ return -EINVAL;
+ rdev->cq_coalescing.during_maxbuf = val;
+ break;
+ case BNXT_RE_COAL_CQ_EN_RING_IDLE_MODE:
+ if (val > BNXT_QPLIB_CQ_COAL_MAX_EN_RING_IDLE_MODE)
+ return -EINVAL;
+ rdev->cq_coalescing.en_ring_idle_mode = val;
+ break;
+ case BNXT_RE_COAL_CQ_ENABLE:
+ if (val > 1)
+ return -EINVAL;
+ rdev->cq_coalescing.enable = val;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return count;
+}
+
+static int cq_coal_cfg_show(struct seq_file *s, void *unused)
+{
+ struct bnxt_re_cq_coal_param *param = s->private;
+ struct bnxt_re_dev *rdev = param->rdev;
+ int offset = param->offset;
+ u32 val = 0;
+
+ switch (offset) {
+ case BNXT_RE_COAL_CQ_BUF_MAXTIME:
+ val = rdev->cq_coalescing.buf_maxtime;
+ break;
+ case BNXT_RE_COAL_CQ_NORMAL_MAXBUF:
+ val = rdev->cq_coalescing.normal_maxbuf;
+ break;
+ case BNXT_RE_COAL_CQ_DURING_MAXBUF:
+ val = rdev->cq_coalescing.during_maxbuf;
+ break;
+ case BNXT_RE_COAL_CQ_EN_RING_IDLE_MODE:
+ val = rdev->cq_coalescing.en_ring_idle_mode;
+ break;
+ case BNXT_RE_COAL_CQ_ENABLE:
+ val = rdev->cq_coalescing.enable;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ seq_printf(s, "%u\n", val);
+ return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(cq_coal_cfg);
+
+static void bnxt_re_cleanup_cq_coal_debugfs(struct bnxt_re_dev *rdev)
+{
+ debugfs_remove_recursive(rdev->cq_coal_cfg);
+ kfree(rdev->cq_coal_cfg_params);
+}
+
+static void bnxt_re_init_cq_coal_debugfs(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_re_dbg_cq_coal_params *dbg_cq_coal_params;
+ int i;
+
+ if (!_is_cq_coalescing_supported(rdev->dev_attr->dev_cap_flags2))
+ return;
+
+ dbg_cq_coal_params = kzalloc(sizeof(*dbg_cq_coal_params), GFP_KERNEL);
+ if (!dbg_cq_coal_params)
+ return;
+
+ rdev->cq_coal_cfg = debugfs_create_dir("cq_coal_cfg", rdev->dbg_root);
+ rdev->cq_coal_cfg_params = dbg_cq_coal_params;
+
+ for (i = 0; i < BNXT_RE_COAL_CQ_MAX; i++) {
+ dbg_cq_coal_params->params[i].offset = i;
+ dbg_cq_coal_params->params[i].rdev = rdev;
+ debugfs_create_file(bnxt_re_cq_coal_str[i],
+ 0600, rdev->cq_coal_cfg,
+ &dbg_cq_coal_params->params[i],
+ &cq_coal_cfg_fops);
+ }
+}
+
+void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev)
+{
+ struct pci_dev *pdev = rdev->en_dev->pdev;
+ struct bnxt_re_dbg_cc_config_params *cc_params;
+ int i;
+
+ rdev->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), bnxt_re_debugfs_root);
+
+ rdev->qp_debugfs = debugfs_create_dir("QPs", rdev->dbg_root);
+ rdev->cc_config = debugfs_create_dir("cc_config", rdev->dbg_root);
+
+ bnxt_re_debugfs_add_info(rdev);
+
+ rdev->cc_config_params = kzalloc(sizeof(*cc_params), GFP_KERNEL);
+
+ for (i = 0; i < BNXT_RE_CC_PARAM_GEN0; i++) {
+ struct bnxt_re_cc_param *tmp_params = &rdev->cc_config_params->gen0_parms[i];
+
+ tmp_params->rdev = rdev;
+ tmp_params->offset = i;
+ tmp_params->cc_gen = CC_CONFIG_GEN0_EXT0;
+ tmp_params->dentry = debugfs_create_file(bnxt_re_cc_gen0_name[i], 0400,
+ rdev->cc_config, tmp_params,
+ &bnxt_re_cc_config_ops);
+ }
+
+ bnxt_re_init_cq_coal_debugfs(rdev);
+}
+
+void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev)
+{
+ bnxt_re_cleanup_cq_coal_debugfs(rdev);
+ debugfs_remove_recursive(rdev->qp_debugfs);
+ debugfs_remove_recursive(rdev->cc_config);
+ kfree(rdev->cc_config_params);
+ debugfs_remove_recursive(rdev->dbg_root);
+ rdev->dbg_root = NULL;
+}
+
+void bnxt_re_register_debugfs(void)
+{
+ bnxt_re_debugfs_root = debugfs_create_dir("bnxt_re", NULL);
+}
+
+void bnxt_re_unregister_debugfs(void)
+{
+ debugfs_remove(bnxt_re_debugfs_root);
+}
diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.h b/drivers/infiniband/hw/bnxt_re/debugfs.h
new file mode 100644
index 000000000000..98f4620ef245
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/debugfs.h
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright (c) 2024, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * Description: Debugfs header
+ */
+
+#ifndef __BNXT_RE_DEBUGFS__
+#define __BNXT_RE_DEBUGFS__
+
+void bnxt_re_debug_add_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp);
+void bnxt_re_debug_rem_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp);
+
+void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev);
+void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev);
+
+void bnxt_re_register_debugfs(void);
+void bnxt_re_unregister_debugfs(void);
+
+#define CC_CONFIG_GEN_EXT(x, y) (((x) << 16) | (y))
+#define CC_CONFIG_GEN0_EXT0 CC_CONFIG_GEN_EXT(0, 0)
+
+#define BNXT_RE_CC_PARAM_GEN0 14
+
+struct bnxt_re_cc_param {
+ struct bnxt_re_dev *rdev;
+ struct dentry *dentry;
+ u32 offset;
+ u8 cc_gen;
+};
+
+struct bnxt_re_dbg_cc_config_params {
+ struct bnxt_re_cc_param gen0_parms[BNXT_RE_CC_PARAM_GEN0];
+};
+
+struct bnxt_re_cq_coal_param {
+ struct bnxt_re_dev *rdev;
+ u32 offset;
+};
+
+enum bnxt_re_cq_coal_types {
+ BNXT_RE_COAL_CQ_BUF_MAXTIME,
+ BNXT_RE_COAL_CQ_NORMAL_MAXBUF,
+ BNXT_RE_COAL_CQ_DURING_MAXBUF,
+ BNXT_RE_COAL_CQ_EN_RING_IDLE_MODE,
+ BNXT_RE_COAL_CQ_ENABLE,
+ BNXT_RE_COAL_CQ_MAX
+
+};
+
+struct bnxt_re_dbg_cq_coal_params {
+ struct bnxt_re_cq_coal_param params[BNXT_RE_COAL_CQ_MAX];
+};
+#endif
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
new file mode 100644
index 000000000000..651cf9d0e0c7
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -0,0 +1,433 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Statistics
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_pma.h>
+
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_sp.h"
+#include "qplib_fp.h"
+#include "qplib_rcfw.h"
+#include "bnxt_re.h"
+#include "hw_counters.h"
+
+static const struct rdma_stat_desc bnxt_re_stat_descs[] = {
+ [BNXT_RE_RX_PKTS].name = "rx_pkts",
+ [BNXT_RE_RX_BYTES].name = "rx_bytes",
+ [BNXT_RE_TX_PKTS].name = "tx_pkts",
+ [BNXT_RE_TX_BYTES].name = "tx_bytes",
+ [BNXT_RE_RECOVERABLE_ERRORS].name = "recoverable_errors",
+ [BNXT_RE_TX_ERRORS].name = "tx_roce_errors",
+ [BNXT_RE_TX_DISCARDS].name = "tx_roce_discards",
+ [BNXT_RE_RX_ERRORS].name = "rx_roce_errors",
+ [BNXT_RE_RX_DISCARDS].name = "rx_roce_discards",
+ [BNXT_RE_TO_RETRANSMITS].name = "local_ack_timeout_err",
+ [BNXT_RE_SEQ_ERR_NAKS_RCVD].name = "packet_seq_err",
+ [BNXT_RE_MAX_RETRY_EXCEEDED].name = "max_retry_exceeded",
+ [BNXT_RE_RNR_NAKS_RCVD].name = "rnr_nak_retry_err",
+ [BNXT_RE_MISSING_RESP].name = "implied_nak_seq_err",
+ [BNXT_RE_UNRECOVERABLE_ERR].name = "unrecoverable_err",
+ [BNXT_RE_BAD_RESP_ERR].name = "bad_resp_err",
+ [BNXT_RE_LOCAL_QP_OP_ERR].name = "local_qp_op_err",
+ [BNXT_RE_LOCAL_PROTECTION_ERR].name = "local_protection_err",
+ [BNXT_RE_MEM_MGMT_OP_ERR].name = "mem_mgmt_op_err",
+ [BNXT_RE_REMOTE_INVALID_REQ_ERR].name = "req_remote_invalid_request",
+ [BNXT_RE_REMOTE_ACCESS_ERR].name = "req_remote_access_errors",
+ [BNXT_RE_REMOTE_OP_ERR].name = "remote_op_err",
+ [BNXT_RE_DUP_REQ].name = "duplicate_request",
+ [BNXT_RE_RES_EXCEED_MAX].name = "res_exceed_max",
+ [BNXT_RE_RES_LENGTH_MISMATCH].name = "resp_local_length_error",
+ [BNXT_RE_RES_EXCEEDS_WQE].name = "res_exceeds_wqe",
+ [BNXT_RE_RES_OPCODE_ERR].name = "res_opcode_err",
+ [BNXT_RE_RES_RX_INVALID_RKEY].name = "res_rx_invalid_rkey",
+ [BNXT_RE_RES_RX_DOMAIN_ERR].name = "res_rx_domain_err",
+ [BNXT_RE_RES_RX_NO_PERM].name = "res_rx_no_perm",
+ [BNXT_RE_RES_RX_RANGE_ERR].name = "res_rx_range_err",
+ [BNXT_RE_RES_TX_INVALID_RKEY].name = "res_tx_invalid_rkey",
+ [BNXT_RE_RES_TX_DOMAIN_ERR].name = "res_tx_domain_err",
+ [BNXT_RE_RES_TX_NO_PERM].name = "res_tx_no_perm",
+ [BNXT_RE_RES_TX_RANGE_ERR].name = "res_tx_range_err",
+ [BNXT_RE_RES_IRRQ_OFLOW].name = "res_irrq_oflow",
+ [BNXT_RE_RES_UNSUP_OPCODE].name = "res_unsup_opcode",
+ [BNXT_RE_RES_UNALIGNED_ATOMIC].name = "res_unaligned_atomic",
+ [BNXT_RE_RES_REM_INV_ERR].name = "res_rem_inv_err",
+ [BNXT_RE_RES_MEM_ERROR].name = "res_mem_err",
+ [BNXT_RE_RES_SRQ_ERR].name = "res_srq_err",
+ [BNXT_RE_RES_CMP_ERR].name = "res_cmp_err",
+ [BNXT_RE_RES_INVALID_DUP_RKEY].name = "res_invalid_dup_rkey",
+ [BNXT_RE_RES_WQE_FORMAT_ERR].name = "res_wqe_format_err",
+ [BNXT_RE_RES_CQ_LOAD_ERR].name = "res_cq_load_err",
+ [BNXT_RE_RES_SRQ_LOAD_ERR].name = "res_srq_load_err",
+ [BNXT_RE_RES_TX_PCI_ERR].name = "res_tx_pci_err",
+ [BNXT_RE_RES_RX_PCI_ERR].name = "res_rx_pci_err",
+ [BNXT_RE_OUT_OF_SEQ_ERR].name = "out_of_sequence",
+ [BNXT_RE_TX_ATOMIC_REQ].name = "tx_atomic_req",
+ [BNXT_RE_TX_READ_REQ].name = "tx_read_req",
+ [BNXT_RE_TX_READ_RES].name = "tx_read_resp",
+ [BNXT_RE_TX_WRITE_REQ].name = "tx_write_req",
+ [BNXT_RE_TX_SEND_REQ].name = "tx_send_req",
+ [BNXT_RE_TX_ROCE_PKTS].name = "tx_roce_only_pkts",
+ [BNXT_RE_TX_ROCE_BYTES].name = "tx_roce_only_bytes",
+ [BNXT_RE_RX_ATOMIC_REQ].name = "rx_atomic_requests",
+ [BNXT_RE_RX_READ_REQ].name = "rx_read_requests",
+ [BNXT_RE_RX_READ_RESP].name = "rx_read_resp",
+ [BNXT_RE_RX_WRITE_REQ].name = "rx_write_requests",
+ [BNXT_RE_RX_SEND_REQ].name = "rx_send_req",
+ [BNXT_RE_RX_ROCE_PKTS].name = "rx_roce_only_pkts",
+ [BNXT_RE_RX_ROCE_BYTES].name = "rx_roce_only_bytes",
+ [BNXT_RE_RX_ROCE_GOOD_PKTS].name = "rx_roce_good_pkts",
+ [BNXT_RE_RX_ROCE_GOOD_BYTES].name = "rx_roce_good_bytes",
+ [BNXT_RE_OOB].name = "out_of_buffer",
+ [BNXT_RE_TX_CNP].name = "np_cnp_pkts",
+ [BNXT_RE_RX_CNP].name = "rp_cnp_handled",
+ [BNXT_RE_RX_ECN].name = "np_ecn_marked_roce_packets",
+ [BNXT_RE_REQ_CQE_ERROR].name = "req_cqe_error",
+ [BNXT_RE_RESP_CQE_ERROR].name = "resp_cqe_error",
+ [BNXT_RE_RESP_REMOTE_ACCESS_ERRS].name = "resp_remote_access_errors",
+};
+
+static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev,
+ struct rdma_hw_stats *stats,
+ struct bnxt_qplib_ext_stat *s)
+{
+ stats->value[BNXT_RE_TX_ATOMIC_REQ] = s->tx_atomic_req;
+ stats->value[BNXT_RE_TX_READ_REQ] = s->tx_read_req;
+ stats->value[BNXT_RE_TX_READ_RES] = s->tx_read_res;
+ stats->value[BNXT_RE_TX_WRITE_REQ] = s->tx_write_req;
+ stats->value[BNXT_RE_TX_SEND_REQ] = s->tx_send_req;
+ stats->value[BNXT_RE_TX_ROCE_PKTS] = s->tx_roce_pkts;
+ stats->value[BNXT_RE_TX_ROCE_BYTES] = s->tx_roce_bytes;
+ stats->value[BNXT_RE_RX_ATOMIC_REQ] = s->rx_atomic_req;
+ stats->value[BNXT_RE_RX_READ_REQ] = s->rx_read_req;
+ stats->value[BNXT_RE_RX_READ_RESP] = s->rx_read_res;
+ stats->value[BNXT_RE_RX_WRITE_REQ] = s->rx_write_req;
+ stats->value[BNXT_RE_RX_SEND_REQ] = s->rx_send_req;
+ stats->value[BNXT_RE_RX_ROCE_PKTS] = s->rx_roce_pkts;
+ stats->value[BNXT_RE_RX_ROCE_BYTES] = s->rx_roce_bytes;
+ stats->value[BNXT_RE_RX_ROCE_GOOD_PKTS] = s->rx_roce_good_pkts;
+ stats->value[BNXT_RE_RX_ROCE_GOOD_BYTES] = s->rx_roce_good_bytes;
+ stats->value[BNXT_RE_OOB] = s->rx_out_of_buffer;
+ stats->value[BNXT_RE_TX_CNP] = s->tx_cnp;
+ stats->value[BNXT_RE_RX_CNP] = s->rx_cnp;
+ stats->value[BNXT_RE_RX_ECN] = s->rx_ecn_marked;
+ stats->value[BNXT_RE_OUT_OF_SEQ_ERR] = s->rx_out_of_sequence;
+}
+
+static int bnxt_re_get_ext_stat(struct bnxt_re_dev *rdev,
+ struct rdma_hw_stats *stats)
+{
+ struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
+ u32 fid;
+ int rc;
+
+ fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
+ rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
+ if (rc)
+ goto done;
+ bnxt_re_copy_ext_stats(rdev, stats, estat);
+
+done:
+ return rc;
+}
+
+static void bnxt_re_copy_err_stats(struct bnxt_re_dev *rdev,
+ struct rdma_hw_stats *stats,
+ struct bnxt_qplib_roce_stats *err_s)
+{
+ stats->value[BNXT_RE_TO_RETRANSMITS] =
+ err_s->to_retransmits;
+ stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] =
+ err_s->seq_err_naks_rcvd;
+ stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] =
+ err_s->max_retry_exceeded;
+ stats->value[BNXT_RE_RNR_NAKS_RCVD] =
+ err_s->rnr_naks_rcvd;
+ stats->value[BNXT_RE_MISSING_RESP] =
+ err_s->missing_resp;
+ stats->value[BNXT_RE_UNRECOVERABLE_ERR] =
+ err_s->unrecoverable_err;
+ stats->value[BNXT_RE_BAD_RESP_ERR] =
+ err_s->bad_resp_err;
+ stats->value[BNXT_RE_LOCAL_QP_OP_ERR] =
+ err_s->local_qp_op_err;
+ stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] =
+ err_s->local_protection_err;
+ stats->value[BNXT_RE_MEM_MGMT_OP_ERR] =
+ err_s->mem_mgmt_op_err;
+ stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] =
+ err_s->remote_invalid_req_err;
+ stats->value[BNXT_RE_REMOTE_ACCESS_ERR] =
+ err_s->remote_access_err;
+ stats->value[BNXT_RE_REMOTE_OP_ERR] =
+ err_s->remote_op_err;
+ stats->value[BNXT_RE_DUP_REQ] =
+ err_s->dup_req;
+ stats->value[BNXT_RE_RES_EXCEED_MAX] =
+ err_s->res_exceed_max;
+ stats->value[BNXT_RE_RES_LENGTH_MISMATCH] =
+ err_s->res_length_mismatch;
+ stats->value[BNXT_RE_RES_EXCEEDS_WQE] =
+ err_s->res_exceeds_wqe;
+ stats->value[BNXT_RE_RES_OPCODE_ERR] =
+ err_s->res_opcode_err;
+ stats->value[BNXT_RE_RES_RX_INVALID_RKEY] =
+ err_s->res_rx_invalid_rkey;
+ stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] =
+ err_s->res_rx_domain_err;
+ stats->value[BNXT_RE_RES_RX_NO_PERM] =
+ err_s->res_rx_no_perm;
+ stats->value[BNXT_RE_RES_RX_RANGE_ERR] =
+ err_s->res_rx_range_err;
+ stats->value[BNXT_RE_RES_TX_INVALID_RKEY] =
+ err_s->res_tx_invalid_rkey;
+ stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] =
+ err_s->res_tx_domain_err;
+ stats->value[BNXT_RE_RES_TX_NO_PERM] =
+ err_s->res_tx_no_perm;
+ stats->value[BNXT_RE_RES_TX_RANGE_ERR] =
+ err_s->res_tx_range_err;
+ stats->value[BNXT_RE_RES_IRRQ_OFLOW] =
+ err_s->res_irrq_oflow;
+ stats->value[BNXT_RE_RES_UNSUP_OPCODE] =
+ err_s->res_unsup_opcode;
+ stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] =
+ err_s->res_unaligned_atomic;
+ stats->value[BNXT_RE_RES_REM_INV_ERR] =
+ err_s->res_rem_inv_err;
+ stats->value[BNXT_RE_RES_MEM_ERROR] =
+ err_s->res_mem_error;
+ stats->value[BNXT_RE_RES_SRQ_ERR] =
+ err_s->res_srq_err;
+ stats->value[BNXT_RE_RES_CMP_ERR] =
+ err_s->res_cmp_err;
+ stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] =
+ err_s->res_invalid_dup_rkey;
+ stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] =
+ err_s->res_wqe_format_err;
+ stats->value[BNXT_RE_RES_CQ_LOAD_ERR] =
+ err_s->res_cq_load_err;
+ stats->value[BNXT_RE_RES_SRQ_LOAD_ERR] =
+ err_s->res_srq_load_err;
+ stats->value[BNXT_RE_RES_TX_PCI_ERR] =
+ err_s->res_tx_pci_err;
+ stats->value[BNXT_RE_RES_RX_PCI_ERR] =
+ err_s->res_rx_pci_err;
+ stats->value[BNXT_RE_OUT_OF_SEQ_ERR] =
+ err_s->res_oos_drop_count;
+ stats->value[BNXT_RE_REQ_CQE_ERROR] =
+ err_s->bad_resp_err +
+ err_s->local_qp_op_err +
+ err_s->local_protection_err +
+ err_s->mem_mgmt_op_err +
+ err_s->remote_invalid_req_err +
+ err_s->remote_access_err +
+ err_s->remote_op_err;
+ stats->value[BNXT_RE_RESP_CQE_ERROR] =
+ err_s->res_cmp_err +
+ err_s->res_cq_load_err;
+ stats->value[BNXT_RE_RESP_REMOTE_ACCESS_ERRS] =
+ err_s->res_rx_no_perm +
+ err_s->res_tx_no_perm;
+}
+
+int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad)
+{
+ struct ib_pma_portcounters_ext *pma_cnt_ext;
+ struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
+ struct ctx_hw_stats *hw_stats = NULL;
+ int rc;
+
+ hw_stats = rdev->qplib_ctx.stats.dma;
+
+ pma_cnt_ext = (struct ib_pma_portcounters_ext *)(out_mad->data + 40);
+ if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags)) {
+ u32 fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
+
+ rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
+ if (rc)
+ return rc;
+ }
+
+ pma_cnt_ext = (struct ib_pma_portcounters_ext *)(out_mad->data + 40);
+ if ((bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) && rdev->is_virtfn) ||
+ !bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
+ pma_cnt_ext->port_xmit_data =
+ cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_bytes) / 4);
+ pma_cnt_ext->port_rcv_data =
+ cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_bytes) / 4);
+ pma_cnt_ext->port_xmit_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_pkts));
+ pma_cnt_ext->port_rcv_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_pkts));
+ pma_cnt_ext->port_unicast_rcv_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_pkts));
+ pma_cnt_ext->port_unicast_xmit_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_pkts));
+
+ } else {
+ pma_cnt_ext->port_rcv_packets = cpu_to_be64(estat->rx_roce_good_pkts);
+ pma_cnt_ext->port_rcv_data = cpu_to_be64(estat->rx_roce_good_bytes / 4);
+ pma_cnt_ext->port_xmit_packets = cpu_to_be64(estat->tx_roce_pkts);
+ pma_cnt_ext->port_xmit_data = cpu_to_be64(estat->tx_roce_bytes / 4);
+ pma_cnt_ext->port_unicast_rcv_packets = cpu_to_be64(estat->rx_roce_good_pkts);
+ pma_cnt_ext->port_unicast_xmit_packets = cpu_to_be64(estat->tx_roce_pkts);
+ }
+ return 0;
+}
+
+int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad)
+{
+ struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
+ struct ib_pma_portcounters *pma_cnt;
+ struct ctx_hw_stats *hw_stats = NULL;
+ int rc;
+
+ hw_stats = rdev->qplib_ctx.stats.dma;
+
+ pma_cnt = (struct ib_pma_portcounters *)(out_mad->data + 40);
+ if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags)) {
+ u32 fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
+
+ rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
+ if (rc)
+ return rc;
+ }
+ if ((bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) && rdev->is_virtfn) ||
+ !bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
+ pma_cnt->port_rcv_packets =
+ cpu_to_be32((u32)(le64_to_cpu(hw_stats->rx_ucast_pkts)) & 0xFFFFFFFF);
+ pma_cnt->port_rcv_data =
+ cpu_to_be32((u32)((le64_to_cpu(hw_stats->rx_ucast_bytes) &
+ 0xFFFFFFFF) / 4));
+ pma_cnt->port_xmit_packets =
+ cpu_to_be32((u32)(le64_to_cpu(hw_stats->tx_ucast_pkts)) & 0xFFFFFFFF);
+ pma_cnt->port_xmit_data =
+ cpu_to_be32((u32)((le64_to_cpu(hw_stats->tx_ucast_bytes)
+ & 0xFFFFFFFF) / 4));
+ } else {
+ pma_cnt->port_rcv_packets = cpu_to_be32(estat->rx_roce_good_pkts);
+ pma_cnt->port_rcv_data = cpu_to_be32((estat->rx_roce_good_bytes / 4));
+ pma_cnt->port_xmit_packets = cpu_to_be32(estat->tx_roce_pkts);
+ pma_cnt->port_xmit_data = cpu_to_be32((estat->tx_roce_bytes / 4));
+ }
+ pma_cnt->port_rcv_constraint_errors = (u8)(le64_to_cpu(hw_stats->rx_discard_pkts) & 0xFF);
+ pma_cnt->port_rcv_errors = cpu_to_be16((u16)(le64_to_cpu(hw_stats->rx_error_pkts)
+ & 0xFFFF));
+ pma_cnt->port_xmit_constraint_errors = (u8)(le64_to_cpu(hw_stats->tx_error_pkts) & 0xFF);
+ pma_cnt->port_xmit_discards = cpu_to_be16((u16)(le64_to_cpu(hw_stats->tx_discard_pkts)
+ & 0xFFFF));
+
+ return 0;
+}
+
+int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u32 port, int index)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_qplib_roce_stats *err_s = NULL;
+ struct ctx_hw_stats *hw_stats = NULL;
+ int rc = 0;
+
+ hw_stats = rdev->qplib_ctx.stats.dma;
+ if (!port || !stats)
+ return -EINVAL;
+
+ if (hw_stats) {
+ stats->value[BNXT_RE_RECOVERABLE_ERRORS] =
+ le64_to_cpu(hw_stats->tx_bcast_pkts);
+ stats->value[BNXT_RE_TX_DISCARDS] =
+ le64_to_cpu(hw_stats->tx_discard_pkts);
+ stats->value[BNXT_RE_TX_ERRORS] =
+ le64_to_cpu(hw_stats->tx_error_pkts);
+ stats->value[BNXT_RE_RX_ERRORS] =
+ le64_to_cpu(hw_stats->rx_error_pkts);
+ stats->value[BNXT_RE_RX_DISCARDS] =
+ le64_to_cpu(hw_stats->rx_discard_pkts);
+ stats->value[BNXT_RE_RX_PKTS] =
+ le64_to_cpu(hw_stats->rx_ucast_pkts);
+ stats->value[BNXT_RE_RX_BYTES] =
+ le64_to_cpu(hw_stats->rx_ucast_bytes);
+ stats->value[BNXT_RE_TX_PKTS] =
+ le64_to_cpu(hw_stats->tx_ucast_pkts);
+ stats->value[BNXT_RE_TX_BYTES] =
+ le64_to_cpu(hw_stats->tx_ucast_bytes);
+ }
+ err_s = &rdev->stats.rstat.errs;
+ if (test_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags)) {
+ rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, err_s);
+ if (rc) {
+ clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS,
+ &rdev->flags);
+ goto done;
+ }
+ bnxt_re_copy_err_stats(rdev, stats, err_s);
+ if (bnxt_ext_stats_supported(rdev->chip_ctx, rdev->dev_attr->dev_cap_flags,
+ rdev->is_virtfn)) {
+ rc = bnxt_re_get_ext_stat(rdev, stats);
+ if (rc) {
+ clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS,
+ &rdev->flags);
+ goto done;
+ }
+ }
+ }
+
+done:
+ return bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ?
+ BNXT_RE_NUM_EXT_COUNTERS : BNXT_RE_NUM_STD_COUNTERS;
+}
+
+struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ int num_counters = 0;
+
+ if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ num_counters = BNXT_RE_NUM_EXT_COUNTERS;
+ else
+ num_counters = BNXT_RE_NUM_STD_COUNTERS;
+
+ return rdma_alloc_hw_stats_struct(bnxt_re_stat_descs, num_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h
new file mode 100644
index 000000000000..09d371d442aa
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h
@@ -0,0 +1,165 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Statistics (header)
+ *
+ */
+
+#ifndef __BNXT_RE_HW_STATS_H__
+#define __BNXT_RE_HW_STATS_H__
+
+enum bnxt_re_hw_stats {
+ BNXT_RE_RX_PKTS,
+ BNXT_RE_RX_BYTES,
+ BNXT_RE_TX_PKTS,
+ BNXT_RE_TX_BYTES,
+ BNXT_RE_RECOVERABLE_ERRORS,
+ BNXT_RE_TX_ERRORS,
+ BNXT_RE_TX_DISCARDS,
+ BNXT_RE_RX_ERRORS,
+ BNXT_RE_RX_DISCARDS,
+ BNXT_RE_TO_RETRANSMITS,
+ BNXT_RE_SEQ_ERR_NAKS_RCVD,
+ BNXT_RE_MAX_RETRY_EXCEEDED,
+ BNXT_RE_RNR_NAKS_RCVD,
+ BNXT_RE_MISSING_RESP,
+ BNXT_RE_UNRECOVERABLE_ERR,
+ BNXT_RE_BAD_RESP_ERR,
+ BNXT_RE_LOCAL_QP_OP_ERR,
+ BNXT_RE_LOCAL_PROTECTION_ERR,
+ BNXT_RE_MEM_MGMT_OP_ERR,
+ BNXT_RE_REMOTE_INVALID_REQ_ERR,
+ BNXT_RE_REMOTE_ACCESS_ERR,
+ BNXT_RE_REMOTE_OP_ERR,
+ BNXT_RE_DUP_REQ,
+ BNXT_RE_RES_EXCEED_MAX,
+ BNXT_RE_RES_LENGTH_MISMATCH,
+ BNXT_RE_RES_EXCEEDS_WQE,
+ BNXT_RE_RES_OPCODE_ERR,
+ BNXT_RE_RES_RX_INVALID_RKEY,
+ BNXT_RE_RES_RX_DOMAIN_ERR,
+ BNXT_RE_RES_RX_NO_PERM,
+ BNXT_RE_RES_RX_RANGE_ERR,
+ BNXT_RE_RES_TX_INVALID_RKEY,
+ BNXT_RE_RES_TX_DOMAIN_ERR,
+ BNXT_RE_RES_TX_NO_PERM,
+ BNXT_RE_RES_TX_RANGE_ERR,
+ BNXT_RE_RES_IRRQ_OFLOW,
+ BNXT_RE_RES_UNSUP_OPCODE,
+ BNXT_RE_RES_UNALIGNED_ATOMIC,
+ BNXT_RE_RES_REM_INV_ERR,
+ BNXT_RE_RES_MEM_ERROR,
+ BNXT_RE_RES_SRQ_ERR,
+ BNXT_RE_RES_CMP_ERR,
+ BNXT_RE_RES_INVALID_DUP_RKEY,
+ BNXT_RE_RES_WQE_FORMAT_ERR,
+ BNXT_RE_RES_CQ_LOAD_ERR,
+ BNXT_RE_RES_SRQ_LOAD_ERR,
+ BNXT_RE_RES_TX_PCI_ERR,
+ BNXT_RE_RES_RX_PCI_ERR,
+ BNXT_RE_OUT_OF_SEQ_ERR,
+ BNXT_RE_TX_ATOMIC_REQ,
+ BNXT_RE_TX_READ_REQ,
+ BNXT_RE_TX_READ_RES,
+ BNXT_RE_TX_WRITE_REQ,
+ BNXT_RE_TX_SEND_REQ,
+ BNXT_RE_TX_ROCE_PKTS,
+ BNXT_RE_TX_ROCE_BYTES,
+ BNXT_RE_RX_ATOMIC_REQ,
+ BNXT_RE_RX_READ_REQ,
+ BNXT_RE_RX_READ_RESP,
+ BNXT_RE_RX_WRITE_REQ,
+ BNXT_RE_RX_SEND_REQ,
+ BNXT_RE_RX_ROCE_PKTS,
+ BNXT_RE_RX_ROCE_BYTES,
+ BNXT_RE_RX_ROCE_GOOD_PKTS,
+ BNXT_RE_RX_ROCE_GOOD_BYTES,
+ BNXT_RE_OOB,
+ BNXT_RE_TX_CNP,
+ BNXT_RE_RX_CNP,
+ BNXT_RE_RX_ECN,
+ BNXT_RE_REQ_CQE_ERROR,
+ BNXT_RE_RESP_CQE_ERROR,
+ BNXT_RE_RESP_REMOTE_ACCESS_ERRS,
+ BNXT_RE_NUM_EXT_COUNTERS
+};
+
+#define BNXT_RE_NUM_STD_COUNTERS (BNXT_RE_OUT_OF_SEQ_ERR + 1)
+
+struct bnxt_re_db_pacing_stats {
+ u64 resched;
+ u64 complete;
+ u64 alerts;
+};
+
+struct bnxt_re_res_cntrs {
+ atomic_t qp_count;
+ atomic_t rc_qp_count;
+ atomic_t ud_qp_count;
+ atomic_t cq_count;
+ atomic_t srq_count;
+ atomic_t mr_count;
+ atomic_t mw_count;
+ atomic_t ah_count;
+ atomic_t pd_count;
+ atomic_t resize_count;
+ u64 qp_watermark;
+ u64 rc_qp_watermark;
+ u64 ud_qp_watermark;
+ u64 cq_watermark;
+ u64 srq_watermark;
+ u64 mr_watermark;
+ u64 mw_watermark;
+ u64 ah_watermark;
+ u64 pd_watermark;
+};
+
+struct bnxt_re_rstat {
+ struct bnxt_qplib_roce_stats errs;
+ struct bnxt_qplib_ext_stat ext_stat;
+};
+
+struct bnxt_re_stats {
+ struct bnxt_re_rstat rstat;
+ struct bnxt_re_res_cntrs res;
+ struct bnxt_re_db_pacing_stats pacing;
+};
+
+struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num);
+int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u32 port, int index);
+#endif /* __BNXT_RE_HW_STATS_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
new file mode 100644
index 000000000000..f19b55c13d58
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -0,0 +1,4938 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: IB Verbs interpreter
+ */
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <net/addrconf.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_pma.h>
+#include <rdma/uverbs_ioctl.h>
+#include <linux/hashtable.h>
+
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_sp.h"
+#include "qplib_fp.h"
+#include "qplib_rcfw.h"
+
+#include "bnxt_re.h"
+#include "ib_verbs.h"
+#include "debugfs.h"
+
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_std_types.h>
+
+#include <rdma/ib_user_ioctl_cmds.h>
+
+#define UVERBS_MODULE_NAME bnxt_re
+#include <rdma/uverbs_named_ioctl.h>
+
+#include <rdma/bnxt_re-abi.h>
+
+static int __from_ib_access_flags(int iflags)
+{
+ int qflags = 0;
+
+ if (iflags & IB_ACCESS_LOCAL_WRITE)
+ qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_READ)
+ qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ;
+ if (iflags & IB_ACCESS_REMOTE_WRITE)
+ qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_ATOMIC)
+ qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC;
+ if (iflags & IB_ACCESS_MW_BIND)
+ qflags |= BNXT_QPLIB_ACCESS_MW_BIND;
+ if (iflags & IB_ZERO_BASED)
+ qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED;
+ if (iflags & IB_ACCESS_ON_DEMAND)
+ qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND;
+ return qflags;
+};
+
+static int __to_ib_access_flags(int qflags)
+{
+ int iflags = 0;
+
+ if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
+ iflags |= IB_ACCESS_LOCAL_WRITE;
+ if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE)
+ iflags |= IB_ACCESS_REMOTE_WRITE;
+ if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ)
+ iflags |= IB_ACCESS_REMOTE_READ;
+ if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC)
+ iflags |= IB_ACCESS_REMOTE_ATOMIC;
+ if (qflags & BNXT_QPLIB_ACCESS_MW_BIND)
+ iflags |= IB_ACCESS_MW_BIND;
+ if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED)
+ iflags |= IB_ZERO_BASED;
+ if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
+ iflags |= IB_ACCESS_ON_DEMAND;
+ return iflags;
+}
+
+static u8 __qp_access_flags_from_ib(struct bnxt_qplib_chip_ctx *cctx, int iflags)
+{
+ u8 qflags = 0;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(cctx))
+ /* For Wh+ */
+ return (u8)__from_ib_access_flags(iflags);
+
+ /* For P5, P7 and later chips */
+ if (iflags & IB_ACCESS_LOCAL_WRITE)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_WRITE)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_READ)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_READ;
+ if (iflags & IB_ACCESS_REMOTE_ATOMIC)
+ qflags |= CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC;
+
+ return qflags;
+}
+
+static int __qp_access_flags_to_ib(struct bnxt_qplib_chip_ctx *cctx, u8 qflags)
+{
+ int iflags = 0;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(cctx))
+ /* For Wh+ */
+ return __to_ib_access_flags(qflags);
+
+ /* For P5, P7 and later chips */
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE)
+ iflags |= IB_ACCESS_LOCAL_WRITE;
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE)
+ iflags |= IB_ACCESS_REMOTE_WRITE;
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_READ)
+ iflags |= IB_ACCESS_REMOTE_READ;
+ if (qflags & CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC)
+ iflags |= IB_ACCESS_REMOTE_ATOMIC;
+
+ return iflags;
+}
+
+static void bnxt_re_check_and_set_relaxed_ordering(struct bnxt_re_dev *rdev,
+ struct bnxt_qplib_mrw *qplib_mr)
+{
+ if (_is_relaxed_ordering_supported(rdev->dev_attr->dev_cap_flags2) &&
+ pcie_relaxed_ordering_enabled(rdev->en_dev->pdev))
+ qplib_mr->flags |= CMDQ_REGISTER_MR_FLAGS_ENABLE_RO;
+}
+
+static int bnxt_re_build_sgl(struct ib_sge *ib_sg_list,
+ struct bnxt_qplib_sge *sg_list, int num)
+{
+ int i, total = 0;
+
+ for (i = 0; i < num; i++) {
+ sg_list[i].addr = ib_sg_list[i].addr;
+ sg_list[i].lkey = ib_sg_list[i].lkey;
+ sg_list[i].size = ib_sg_list[i].length;
+ total += sg_list[i].size;
+ }
+ return total;
+}
+
+/* Device */
+int bnxt_re_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *ib_attr,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
+
+ memset(ib_attr, 0, sizeof(*ib_attr));
+ memcpy(&ib_attr->fw_ver, dev_attr->fw_ver,
+ min(sizeof(dev_attr->fw_ver),
+ sizeof(ib_attr->fw_ver)));
+ addrconf_addr_eui48((u8 *)&ib_attr->sys_image_guid,
+ rdev->netdev->dev_addr);
+ ib_attr->max_mr_size = BNXT_RE_MAX_MR_SIZE;
+ ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_SUPPORTED;
+
+ ib_attr->vendor_id = rdev->en_dev->pdev->vendor;
+ ib_attr->vendor_part_id = rdev->en_dev->pdev->device;
+ ib_attr->hw_ver = rdev->en_dev->pdev->revision;
+ ib_attr->max_qp = dev_attr->max_qp;
+ ib_attr->max_qp_wr = dev_attr->max_qp_wqes;
+ ib_attr->device_cap_flags =
+ IB_DEVICE_CURR_QP_STATE_MOD
+ | IB_DEVICE_RC_RNR_NAK_GEN
+ | IB_DEVICE_SHUTDOWN_PORT
+ | IB_DEVICE_SYS_IMAGE_GUID
+ | IB_DEVICE_RESIZE_MAX_WR
+ | IB_DEVICE_PORT_ACTIVE_EVENT
+ | IB_DEVICE_N_NOTIFY_CQ
+ | IB_DEVICE_MEM_WINDOW
+ | IB_DEVICE_MEM_WINDOW_TYPE_2B
+ | IB_DEVICE_MEM_MGT_EXTENSIONS;
+ ib_attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
+ ib_attr->max_send_sge = dev_attr->max_qp_sges;
+ ib_attr->max_recv_sge = dev_attr->max_qp_sges;
+ ib_attr->max_sge_rd = dev_attr->max_qp_sges;
+ ib_attr->max_cq = dev_attr->max_cq;
+ ib_attr->max_cqe = dev_attr->max_cq_wqes;
+ ib_attr->max_mr = dev_attr->max_mr;
+ ib_attr->max_pd = dev_attr->max_pd;
+ ib_attr->max_qp_rd_atom = dev_attr->max_qp_rd_atom;
+ ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_init_rd_atom;
+ ib_attr->atomic_cap = IB_ATOMIC_NONE;
+ ib_attr->masked_atomic_cap = IB_ATOMIC_NONE;
+ if (dev_attr->is_atomic) {
+ ib_attr->atomic_cap = IB_ATOMIC_GLOB;
+ ib_attr->masked_atomic_cap = IB_ATOMIC_GLOB;
+ }
+
+ ib_attr->max_ee_rd_atom = 0;
+ ib_attr->max_res_rd_atom = 0;
+ ib_attr->max_ee_init_rd_atom = 0;
+ ib_attr->max_ee = 0;
+ ib_attr->max_rdd = 0;
+ ib_attr->max_mw = dev_attr->max_mw;
+ ib_attr->max_raw_ipv6_qp = 0;
+ ib_attr->max_raw_ethy_qp = dev_attr->max_raw_ethy_qp;
+ ib_attr->max_mcast_grp = 0;
+ ib_attr->max_mcast_qp_attach = 0;
+ ib_attr->max_total_mcast_qp_attach = 0;
+ ib_attr->max_ah = dev_attr->max_ah;
+
+ ib_attr->max_srq = dev_attr->max_srq;
+ ib_attr->max_srq_wr = dev_attr->max_srq_wqes;
+ ib_attr->max_srq_sge = dev_attr->max_srq_sges;
+
+ ib_attr->max_fast_reg_page_list_len = MAX_PBL_LVL_1_PGS;
+
+ ib_attr->max_pkeys = 1;
+ ib_attr->local_ca_ack_delay = BNXT_RE_DEFAULT_ACK_DELAY;
+ return 0;
+}
+
+int bnxt_re_modify_device(struct ib_device *ibdev,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify)
+{
+ ibdev_dbg(ibdev, "Modify device with mask 0x%x", device_modify_mask);
+
+ if (device_modify_mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+ return -EOPNOTSUPP;
+
+ if (!(device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC))
+ return 0;
+
+ memcpy(ibdev->node_desc, device_modify->node_desc, IB_DEVICE_NODE_DESC_MAX);
+ return 0;
+}
+
+/* Port */
+int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attr *port_attr)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
+ int rc;
+
+ memset(port_attr, 0, sizeof(*port_attr));
+
+ if (netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev)) {
+ port_attr->state = IB_PORT_ACTIVE;
+ port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ } else {
+ port_attr->state = IB_PORT_DOWN;
+ port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+ }
+ port_attr->max_mtu = IB_MTU_4096;
+ port_attr->active_mtu = iboe_get_mtu(rdev->netdev->mtu);
+ /* One GID is reserved for RawEth QP. Report one less */
+ port_attr->gid_tbl_len = (rdev->rcfw.roce_mirror ? (dev_attr->max_sgid - 1) :
+ dev_attr->max_sgid);
+ port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
+ IB_PORT_DEVICE_MGMT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP;
+ port_attr->ip_gids = true;
+
+ port_attr->max_msg_sz = (u32)BNXT_RE_MAX_MR_SIZE_LOW;
+ port_attr->bad_pkey_cntr = 0;
+ port_attr->qkey_viol_cntr = 0;
+ port_attr->pkey_tbl_len = dev_attr->max_pkey;
+ port_attr->lid = 0;
+ port_attr->sm_lid = 0;
+ port_attr->lmc = 0;
+ port_attr->max_vl_num = 4;
+ port_attr->sm_sl = 0;
+ port_attr->subnet_timeout = 0;
+ port_attr->init_type_reply = 0;
+ rc = ib_get_eth_speed(&rdev->ibdev, port_num, &port_attr->active_speed,
+ &port_attr->active_width);
+
+ return rc;
+}
+
+int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr port_attr;
+
+ if (bnxt_re_query_port(ibdev, port_num, &port_attr))
+ return -EINVAL;
+
+ immutable->pkey_tbl_len = port_attr.pkey_tbl_len;
+ immutable->gid_tbl_len = port_attr.gid_tbl_len;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ return 0;
+}
+
+void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d",
+ rdev->dev_attr->fw_ver[0], rdev->dev_attr->fw_ver[1],
+ rdev->dev_attr->fw_ver[2], rdev->dev_attr->fw_ver[3]);
+}
+
+int bnxt_re_query_pkey(struct ib_device *ibdev, u32 port_num,
+ u16 index, u16 *pkey)
+{
+ if (index > 0)
+ return -EINVAL;
+
+ *pkey = IB_DEFAULT_PKEY_FULL;
+
+ return 0;
+}
+
+int bnxt_re_query_gid(struct ib_device *ibdev, u32 port_num,
+ int index, union ib_gid *gid)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ int rc;
+
+ /* Ignore port_num */
+ memset(gid, 0, sizeof(*gid));
+ rc = bnxt_qplib_get_sgid(&rdev->qplib_res,
+ &rdev->qplib_res.sgid_tbl, index,
+ (struct bnxt_qplib_gid *)gid);
+ return rc;
+}
+
+int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
+{
+ int rc = 0;
+ struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(attr->device, ibdev);
+ struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
+ struct bnxt_qplib_gid *gid_to_del;
+ u16 vlan_id = 0xFFFF;
+
+ /* Delete the entry from the hardware */
+ ctx = *context;
+ if (!ctx)
+ return -EINVAL;
+
+ if (sgid_tbl->active) {
+ if (ctx->idx >= sgid_tbl->max)
+ return -EINVAL;
+ gid_to_del = &sgid_tbl->tbl[ctx->idx].gid;
+ vlan_id = sgid_tbl->tbl[ctx->idx].vlan_id;
+ /* DEL_GID is called in WQ context(netdevice_event_work_handler)
+ * or via the ib_unregister_device path. In the former case QP1
+ * may not be destroyed yet, in which case just return as FW
+ * needs that entry to be present and will fail it's deletion.
+ * We could get invoked again after QP1 is destroyed OR get an
+ * ADD_GID call with a different GID value for the same index
+ * where we issue MODIFY_GID cmd to update the GID entry -- TBD
+ */
+ if (ctx->idx == 0 &&
+ rdma_link_local_addr((struct in6_addr *)gid_to_del) &&
+ ctx->refcnt == 1 && rdev->gsi_ctx.gsi_sqp) {
+ ibdev_dbg(&rdev->ibdev,
+ "Trying to delete GID0 while QP1 is alive\n");
+ return -EFAULT;
+ }
+ ctx->refcnt--;
+ if (!ctx->refcnt) {
+ rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del,
+ vlan_id, true);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to remove GID: %#x", rc);
+ } else {
+ ctx_tbl = sgid_tbl->ctx;
+ ctx_tbl[ctx->idx] = NULL;
+ kfree(ctx);
+ }
+ }
+ } else {
+ return -EINVAL;
+ }
+ return rc;
+}
+
+int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context)
+{
+ int rc;
+ u32 tbl_idx = 0;
+ u16 vlan_id = 0xFFFF;
+ struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(attr->device, ibdev);
+ struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
+
+ rc = rdma_read_gid_l2_fields(attr, &vlan_id, NULL);
+ if (rc)
+ return rc;
+
+ rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)&attr->gid,
+ rdev->qplib_res.netdev->dev_addr,
+ vlan_id, true, &tbl_idx, false, 0);
+ if (rc == -EALREADY) {
+ ctx_tbl = sgid_tbl->ctx;
+ ctx_tbl[tbl_idx]->refcnt++;
+ *context = ctx_tbl[tbl_idx];
+ return 0;
+ }
+
+ if (rc < 0) {
+ ibdev_err(&rdev->ibdev, "Failed to add GID: %#x", rc);
+ return rc;
+ }
+
+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ ctx_tbl = sgid_tbl->ctx;
+ ctx->idx = tbl_idx;
+ ctx->refcnt = 1;
+ ctx_tbl[tbl_idx] = ctx;
+ *context = ctx;
+
+ return rc;
+}
+
+enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
+ u32 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+#define BNXT_RE_FENCE_PBL_SIZE DIV_ROUND_UP(BNXT_RE_FENCE_BYTES, PAGE_SIZE)
+
+static void bnxt_re_create_fence_wqe(struct bnxt_re_pd *pd)
+{
+ struct bnxt_re_fence_data *fence = &pd->fence;
+ struct ib_mr *ib_mr = &fence->mr->ib_mr;
+ struct bnxt_qplib_swqe *wqe = &fence->bind_wqe;
+ struct bnxt_re_dev *rdev = pd->rdev;
+
+ if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return;
+
+ memset(wqe, 0, sizeof(*wqe));
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_BIND_MW;
+ wqe->wr_id = BNXT_QPLIB_FENCE_WRID;
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ wqe->bind.zero_based = false;
+ wqe->bind.parent_l_key = ib_mr->lkey;
+ wqe->bind.va = (u64)(unsigned long)fence->va;
+ wqe->bind.length = fence->size;
+ wqe->bind.access_cntl = __from_ib_access_flags(IB_ACCESS_REMOTE_READ);
+ wqe->bind.mw_type = SQ_BIND_MW_TYPE_TYPE1;
+
+ /* Save the initial rkey in fence structure for now;
+ * wqe->bind.r_key will be set at (re)bind time.
+ */
+ fence->bind_rkey = ib_inc_rkey(fence->mw->rkey);
+}
+
+static int bnxt_re_bind_fence_mw(struct bnxt_qplib_qp *qplib_qp)
+{
+ struct bnxt_re_qp *qp = container_of(qplib_qp, struct bnxt_re_qp,
+ qplib_qp);
+ struct ib_pd *ib_pd = qp->ib_qp.pd;
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_fence_data *fence = &pd->fence;
+ struct bnxt_qplib_swqe *fence_wqe = &fence->bind_wqe;
+ struct bnxt_qplib_swqe wqe;
+ int rc;
+
+ memcpy(&wqe, fence_wqe, sizeof(wqe));
+ wqe.bind.r_key = fence->bind_rkey;
+ fence->bind_rkey = ib_inc_rkey(fence->bind_rkey);
+
+ ibdev_dbg(&qp->rdev->ibdev,
+ "Posting bind fence-WQE: rkey: %#x QP: %d PD: %p\n",
+ wqe.bind.r_key, qp->qplib_qp.id, pd);
+ rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+ if (rc) {
+ ibdev_err(&qp->rdev->ibdev, "Failed to bind fence-WQE\n");
+ return rc;
+ }
+ bnxt_qplib_post_send_db(&qp->qplib_qp);
+
+ return rc;
+}
+
+static void bnxt_re_destroy_fence_mr(struct bnxt_re_pd *pd)
+{
+ struct bnxt_re_fence_data *fence = &pd->fence;
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct device *dev = &rdev->en_dev->pdev->dev;
+ struct bnxt_re_mr *mr = fence->mr;
+
+ if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return;
+
+ if (fence->mw) {
+ bnxt_re_dealloc_mw(fence->mw);
+ fence->mw = NULL;
+ }
+ if (mr) {
+ if (mr->ib_mr.rkey)
+ bnxt_qplib_dereg_mrw(&rdev->qplib_res, &mr->qplib_mr,
+ true);
+ if (mr->ib_mr.lkey)
+ bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ kfree(mr);
+ fence->mr = NULL;
+ }
+ if (fence->dma_addr) {
+ dma_unmap_single(dev, fence->dma_addr, BNXT_RE_FENCE_BYTES,
+ DMA_BIDIRECTIONAL);
+ fence->dma_addr = 0;
+ }
+}
+
+static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
+{
+ int mr_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_MW_BIND;
+ struct bnxt_re_fence_data *fence = &pd->fence;
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct device *dev = &rdev->en_dev->pdev->dev;
+ struct bnxt_re_mr *mr = NULL;
+ dma_addr_t dma_addr = 0;
+ struct ib_mw *mw;
+ int rc;
+
+ if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return 0;
+
+ dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES,
+ DMA_BIDIRECTIONAL);
+ rc = dma_mapping_error(dev, dma_addr);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to dma-map fence-MR-mem\n");
+ rc = -EIO;
+ fence->dma_addr = 0;
+ goto fail;
+ }
+ fence->dma_addr = dma_addr;
+
+ /* Allocate a MR */
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ fence->mr = mr;
+ mr->rdev = rdev;
+ mr->qplib_mr.pd = &pd->qplib_pd;
+ mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+ mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags);
+ if (!_is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags)) {
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to alloc fence-HW-MR\n");
+ goto fail;
+ }
+
+ /* Register MR */
+ mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ } else {
+ mr->qplib_mr.flags = CMDQ_REGISTER_MR_FLAGS_ALLOC_MR;
+ }
+ mr->qplib_mr.va = (u64)(unsigned long)fence->va;
+ mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES;
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL,
+ BNXT_RE_FENCE_PBL_SIZE, PAGE_SIZE,
+ _is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags));
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to register fence-MR\n");
+ goto fail;
+ }
+ mr->ib_mr.rkey = mr->qplib_mr.rkey;
+
+ /* Create a fence MW only for kernel consumers */
+ mw = bnxt_re_alloc_mw(&pd->ib_pd, IB_MW_TYPE_1, NULL);
+ if (IS_ERR(mw)) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to create fence-MW for PD: %p\n", pd);
+ rc = PTR_ERR(mw);
+ goto fail;
+ }
+ fence->mw = mw;
+
+ bnxt_re_create_fence_wqe(pd);
+ return 0;
+
+fail:
+ bnxt_re_destroy_fence_mr(pd);
+ return rc;
+}
+
+static struct bnxt_re_user_mmap_entry*
+bnxt_re_mmap_entry_insert(struct bnxt_re_ucontext *uctx, u64 mem_offset,
+ enum bnxt_re_mmap_flag mmap_flag, u64 *offset)
+{
+ struct bnxt_re_user_mmap_entry *entry;
+ int ret;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+
+ entry->mem_offset = mem_offset;
+ entry->mmap_flag = mmap_flag;
+ entry->uctx = uctx;
+
+ switch (mmap_flag) {
+ case BNXT_RE_MMAP_SH_PAGE:
+ ret = rdma_user_mmap_entry_insert_exact(&uctx->ib_uctx,
+ &entry->rdma_entry, PAGE_SIZE, 0);
+ break;
+ case BNXT_RE_MMAP_UC_DB:
+ case BNXT_RE_MMAP_WC_DB:
+ case BNXT_RE_MMAP_DBR_BAR:
+ case BNXT_RE_MMAP_DBR_PAGE:
+ case BNXT_RE_MMAP_TOGGLE_PAGE:
+ ret = rdma_user_mmap_entry_insert(&uctx->ib_uctx,
+ &entry->rdma_entry, PAGE_SIZE);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret) {
+ kfree(entry);
+ return NULL;
+ }
+ if (offset)
+ *offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+ return entry;
+}
+
+/* Protection Domains */
+int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+
+ if (udata) {
+ rdma_user_mmap_entry_remove(pd->pd_db_mmap);
+ pd->pd_db_mmap = NULL;
+ }
+
+ bnxt_re_destroy_fence_mr(pd);
+
+ if (pd->qplib_pd.id) {
+ if (!bnxt_qplib_dealloc_pd(&rdev->qplib_res,
+ &rdev->qplib_res.pd_tbl,
+ &pd->qplib_pd))
+ atomic_dec(&rdev->stats.res.pd_count);
+ }
+ return 0;
+}
+
+int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct ib_device *ibdev = ibpd->device;
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_re_ucontext *ucntx = rdma_udata_to_drv_context(
+ udata, struct bnxt_re_ucontext, ib_uctx);
+ struct bnxt_re_pd *pd = container_of(ibpd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_user_mmap_entry *entry = NULL;
+ u32 active_pds;
+ int rc = 0;
+
+ pd->rdev = rdev;
+ if (bnxt_qplib_alloc_pd(&rdev->qplib_res, &pd->qplib_pd)) {
+ ibdev_err(&rdev->ibdev, "Failed to allocate HW PD");
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ if (udata) {
+ struct bnxt_re_pd_resp resp = {};
+
+ if (!ucntx->dpi.dbr) {
+ /* Allocate DPI in alloc_pd to avoid failing of
+ * ibv_devinfo and family of application when DPIs
+ * are depleted.
+ */
+ if (bnxt_qplib_alloc_dpi(&rdev->qplib_res,
+ &ucntx->dpi, ucntx, BNXT_QPLIB_DPI_TYPE_UC)) {
+ rc = -ENOMEM;
+ goto dbfail;
+ }
+ }
+
+ resp.pdid = pd->qplib_pd.id;
+ /* Still allow mapping this DBR to the new user PD. */
+ resp.dpi = ucntx->dpi.dpi;
+
+ entry = bnxt_re_mmap_entry_insert(ucntx, (u64)ucntx->dpi.umdbr,
+ BNXT_RE_MMAP_UC_DB, &resp.dbr);
+
+ if (!entry) {
+ rc = -ENOMEM;
+ goto dbfail;
+ }
+
+ pd->pd_db_mmap = &entry->rdma_entry;
+
+ rc = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (rc) {
+ rdma_user_mmap_entry_remove(pd->pd_db_mmap);
+ rc = -EFAULT;
+ goto dbfail;
+ }
+ }
+
+ if (!udata)
+ if (bnxt_re_create_fence_mr(pd))
+ ibdev_warn(&rdev->ibdev,
+ "Failed to create Fence-MR\n");
+ active_pds = atomic_inc_return(&rdev->stats.res.pd_count);
+ if (active_pds > rdev->stats.res.pd_watermark)
+ rdev->stats.res.pd_watermark = active_pds;
+
+ return 0;
+dbfail:
+ bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
+ &pd->qplib_pd);
+fail:
+ return rc;
+}
+
+/* Address Handles */
+int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
+{
+ struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
+ struct bnxt_re_dev *rdev = ah->rdev;
+ bool block = true;
+ int rc;
+
+ block = !(flags & RDMA_DESTROY_AH_SLEEPABLE);
+ rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, block);
+ if (BNXT_RE_CHECK_RC(rc)) {
+ if (rc == -ETIMEDOUT)
+ rc = 0;
+ else
+ goto fail;
+ }
+ atomic_dec(&rdev->stats.res.ah_count);
+fail:
+ return rc;
+}
+
+static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype)
+{
+ u8 nw_type;
+
+ switch (ntype) {
+ case RDMA_NETWORK_IPV4:
+ nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4;
+ break;
+ case RDMA_NETWORK_IPV6:
+ nw_type = CMDQ_CREATE_AH_TYPE_V2IPV6;
+ break;
+ default:
+ nw_type = CMDQ_CREATE_AH_TYPE_V1;
+ break;
+ }
+ return nw_type;
+}
+
+int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct ib_pd *ib_pd = ib_ah->pd;
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ const struct ib_gid_attr *sgid_attr;
+ struct bnxt_re_gid_ctx *ctx;
+ struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
+ u32 active_ahs;
+ u8 nw_type;
+ int rc;
+
+ if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) {
+ ibdev_err(&rdev->ibdev, "Failed to alloc AH: GRH not set");
+ return -EINVAL;
+ }
+
+ ah->rdev = rdev;
+ ah->qplib_ah.pd = &pd->qplib_pd;
+
+ /* Supply the configuration for the HW */
+ memcpy(ah->qplib_ah.dgid.data, grh->dgid.raw,
+ sizeof(union ib_gid));
+ sgid_attr = grh->sgid_attr;
+ /* Get the HW context of the GID. The reference
+ * of GID table entry is already taken by the caller.
+ */
+ ctx = rdma_read_gid_hw_context(sgid_attr);
+ ah->qplib_ah.sgid_index = ctx->idx;
+ ah->qplib_ah.host_sgid_index = grh->sgid_index;
+ ah->qplib_ah.traffic_class = grh->traffic_class;
+ ah->qplib_ah.flow_label = grh->flow_label;
+ ah->qplib_ah.hop_limit = grh->hop_limit;
+ ah->qplib_ah.sl = rdma_ah_get_sl(ah_attr);
+
+ /* Get network header type for this GID */
+ nw_type = rdma_gid_attr_network_type(sgid_attr);
+ ah->qplib_ah.nw_type = bnxt_re_stack_to_dev_nw_type(nw_type);
+
+ memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN);
+ rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah,
+ !(init_attr->flags &
+ RDMA_CREATE_AH_SLEEPABLE));
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to allocate HW AH");
+ return rc;
+ }
+
+ /* Write AVID to shared page. */
+ if (udata) {
+ struct bnxt_re_ucontext *uctx = rdma_udata_to_drv_context(
+ udata, struct bnxt_re_ucontext, ib_uctx);
+ unsigned long flag;
+ u32 *wrptr;
+
+ spin_lock_irqsave(&uctx->sh_lock, flag);
+ wrptr = (u32 *)(uctx->shpg + BNXT_RE_AVID_OFFT);
+ *wrptr = ah->qplib_ah.id;
+ wmb(); /* make sure cache is updated. */
+ spin_unlock_irqrestore(&uctx->sh_lock, flag);
+ }
+ active_ahs = atomic_inc_return(&rdev->stats.res.ah_count);
+ if (active_ahs > rdev->stats.res.ah_watermark)
+ rdev->stats.res.ah_watermark = active_ahs;
+
+ return 0;
+}
+
+int bnxt_re_query_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
+{
+ struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
+
+ ah_attr->type = ib_ah->type;
+ rdma_ah_set_sl(ah_attr, ah->qplib_ah.sl);
+ memcpy(ah_attr->roce.dmac, ah->qplib_ah.dmac, ETH_ALEN);
+ rdma_ah_set_grh(ah_attr, NULL, 0,
+ ah->qplib_ah.host_sgid_index,
+ 0, ah->qplib_ah.traffic_class);
+ rdma_ah_set_dgid_raw(ah_attr, ah->qplib_ah.dgid.data);
+ rdma_ah_set_port_num(ah_attr, 1);
+ rdma_ah_set_static_rate(ah_attr, 0);
+ return 0;
+}
+
+unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp)
+ __acquires(&qp->scq->cq_lock) __acquires(&qp->rcq->cq_lock)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->scq->cq_lock, flags);
+ if (qp->rcq != qp->scq)
+ spin_lock(&qp->rcq->cq_lock);
+ else
+ __acquire(&qp->rcq->cq_lock);
+
+ return flags;
+}
+
+void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp,
+ unsigned long flags)
+ __releases(&qp->scq->cq_lock) __releases(&qp->rcq->cq_lock)
+{
+ if (qp->rcq != qp->scq)
+ spin_unlock(&qp->rcq->cq_lock);
+ else
+ __release(&qp->rcq->cq_lock);
+ spin_unlock_irqrestore(&qp->scq->cq_lock, flags);
+}
+
+static void bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp)
+{
+ struct bnxt_re_qp *gsi_sqp;
+ struct bnxt_re_ah *gsi_sah;
+ struct bnxt_re_dev *rdev;
+ int rc;
+
+ rdev = qp->rdev;
+ gsi_sqp = rdev->gsi_ctx.gsi_sqp;
+ gsi_sah = rdev->gsi_ctx.gsi_sah;
+
+ ibdev_dbg(&rdev->ibdev, "Destroy the shadow AH\n");
+ bnxt_qplib_destroy_ah(&rdev->qplib_res,
+ &gsi_sah->qplib_ah,
+ true);
+ atomic_dec(&rdev->stats.res.ah_count);
+ bnxt_qplib_clean_qp(&qp->qplib_qp);
+
+ ibdev_dbg(&rdev->ibdev, "Destroy the shadow QP\n");
+ rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &gsi_sqp->qplib_qp);
+ if (rc)
+ ibdev_err(&rdev->ibdev, "Destroy Shadow QP failed");
+
+ bnxt_qplib_free_qp_res(&rdev->qplib_res, &gsi_sqp->qplib_qp);
+
+ /* remove from active qp list */
+ mutex_lock(&rdev->qp_lock);
+ list_del(&gsi_sqp->list);
+ mutex_unlock(&rdev->qp_lock);
+ atomic_dec(&rdev->stats.res.qp_count);
+
+ kfree(rdev->gsi_ctx.sqp_tbl);
+ kfree(gsi_sah);
+ kfree(gsi_sqp);
+ rdev->gsi_ctx.gsi_sqp = NULL;
+ rdev->gsi_ctx.gsi_sah = NULL;
+ rdev->gsi_ctx.sqp_tbl = NULL;
+}
+
+static void bnxt_re_del_unique_gid(struct bnxt_re_dev *rdev)
+{
+ int rc;
+
+ if (!rdev->rcfw.roce_mirror)
+ return;
+
+ rc = bnxt_qplib_del_sgid(&rdev->qplib_res.sgid_tbl,
+ (struct bnxt_qplib_gid *)&rdev->ugid,
+ 0xFFFF, true);
+ if (rc)
+ dev_err(rdev_to_dev(rdev), "Failed to delete unique GID, rc: %d\n", rc);
+}
+
+/* Queue Pairs */
+int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp;
+ struct bnxt_re_dev *rdev = qp->rdev;
+ struct bnxt_qplib_nq *scq_nq = NULL;
+ struct bnxt_qplib_nq *rcq_nq = NULL;
+ unsigned int flags;
+ int rc;
+
+ bnxt_re_debug_rem_qpinfo(rdev, qp);
+
+ bnxt_qplib_flush_cqn_wq(&qp->qplib_qp);
+
+ rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
+ if (rc)
+ ibdev_err(&rdev->ibdev, "Failed to destroy HW QP");
+
+ if (rdma_is_kernel_res(&qp->ib_qp.res)) {
+ flags = bnxt_re_lock_cqs(qp);
+ bnxt_qplib_clean_qp(&qp->qplib_qp);
+ bnxt_re_unlock_cqs(qp, flags);
+ }
+
+ bnxt_qplib_free_qp_res(&rdev->qplib_res, &qp->qplib_qp);
+
+ if (ib_qp->qp_type == IB_QPT_GSI && rdev->gsi_ctx.gsi_sqp)
+ bnxt_re_destroy_gsi_sqp(qp);
+
+ mutex_lock(&rdev->qp_lock);
+ list_del(&qp->list);
+ mutex_unlock(&rdev->qp_lock);
+ atomic_dec(&rdev->stats.res.qp_count);
+ if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_RC)
+ atomic_dec(&rdev->stats.res.rc_qp_count);
+ else if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD)
+ atomic_dec(&rdev->stats.res.ud_qp_count);
+
+ if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE)
+ bnxt_re_del_unique_gid(rdev);
+
+ ib_umem_release(qp->rumem);
+ ib_umem_release(qp->sumem);
+
+ /* Flush all the entries of notification queue associated with
+ * given qp.
+ */
+ scq_nq = qplib_qp->scq->nq;
+ rcq_nq = qplib_qp->rcq->nq;
+ bnxt_re_synchronize_nq(scq_nq);
+ if (scq_nq != rcq_nq)
+ bnxt_re_synchronize_nq(rcq_nq);
+
+ return 0;
+}
+
+static u8 __from_ib_qp_type(enum ib_qp_type type)
+{
+ switch (type) {
+ case IB_QPT_GSI:
+ return CMDQ_CREATE_QP1_TYPE_GSI;
+ case IB_QPT_RC:
+ return CMDQ_CREATE_QP_TYPE_RC;
+ case IB_QPT_UD:
+ return CMDQ_CREATE_QP_TYPE_UD;
+ case IB_QPT_RAW_PACKET:
+ return CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE;
+ default:
+ return IB_QPT_MAX;
+ }
+}
+
+static u16 bnxt_re_setup_rwqe_size(struct bnxt_qplib_qp *qplqp,
+ int rsge, int max)
+{
+ if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC)
+ rsge = max;
+ return bnxt_re_get_rwqe_size(rsge);
+}
+
+static u16 bnxt_re_get_wqe_size(int ilsize, int nsge)
+{
+ u16 wqe_size, calc_ils;
+
+ wqe_size = bnxt_re_get_swqe_size(nsge);
+ if (ilsize) {
+ calc_ils = sizeof(struct sq_send_hdr) + ilsize;
+ wqe_size = max_t(u16, calc_ils, wqe_size);
+ wqe_size = ALIGN(wqe_size, sizeof(struct sq_send_hdr));
+ }
+ return wqe_size;
+}
+
+static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_qplib_qp *qplqp;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_q *sq;
+ int align, ilsize;
+
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
+ sq = &qplqp->sq;
+ dev_attr = rdev->dev_attr;
+
+ align = sizeof(struct sq_send_hdr);
+ ilsize = ALIGN(init_attr->cap.max_inline_data, align);
+
+ /* For gen p4 and gen p5 fixed wqe compatibility mode
+ * wqe size is fixed to 128 bytes - ie 6 SGEs
+ */
+ if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) {
+ sq->wqe_size = bnxt_re_get_swqe_size(BNXT_STATIC_MAX_SGE);
+ sq->max_sge = BNXT_STATIC_MAX_SGE;
+ } else {
+ sq->wqe_size = bnxt_re_get_wqe_size(ilsize, sq->max_sge);
+ if (sq->wqe_size > bnxt_re_get_swqe_size(dev_attr->max_qp_sges))
+ return -EINVAL;
+ }
+
+ if (init_attr->cap.max_inline_data) {
+ qplqp->max_inline_data = sq->wqe_size -
+ sizeof(struct sq_send_hdr);
+ init_attr->cap.max_inline_data = qplqp->max_inline_data;
+ }
+
+ return 0;
+}
+
+static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
+ struct bnxt_re_qp *qp, struct bnxt_re_ucontext *cntx,
+ struct bnxt_re_qp_req *ureq)
+{
+ struct bnxt_qplib_qp *qplib_qp;
+ int bytes = 0, psn_sz;
+ struct ib_umem *umem;
+ int psn_nume;
+
+ qplib_qp = &qp->qplib_qp;
+
+ bytes = (qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size);
+ /* Consider mapping PSN search memory only for RC QPs. */
+ if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) {
+ psn_sz = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ?
+ sizeof(struct sq_psn_search_ext) :
+ sizeof(struct sq_psn_search);
+ if (cntx && bnxt_re_is_var_size_supported(rdev, cntx)) {
+ psn_nume = ureq->sq_slots;
+ } else {
+ psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ qplib_qp->sq.max_wqe : ((qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size) /
+ sizeof(struct bnxt_qplib_sge));
+ }
+ if (_is_host_msn_table(rdev->qplib_res.dattr->dev_cap_flags2))
+ psn_nume = roundup_pow_of_two(psn_nume);
+ bytes += (psn_nume * psn_sz);
+ }
+
+ bytes = PAGE_ALIGN(bytes);
+ umem = ib_umem_get(&rdev->ibdev, ureq->qpsva, bytes,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem))
+ return PTR_ERR(umem);
+
+ qp->sumem = umem;
+ qplib_qp->sq.sg_info.umem = umem;
+ qplib_qp->sq.sg_info.pgsize = PAGE_SIZE;
+ qplib_qp->sq.sg_info.pgshft = PAGE_SHIFT;
+ qplib_qp->qp_handle = ureq->qp_handle;
+
+ if (!qp->qplib_qp.srq) {
+ bytes = (qplib_qp->rq.max_wqe * qplib_qp->rq.wqe_size);
+ bytes = PAGE_ALIGN(bytes);
+ umem = ib_umem_get(&rdev->ibdev, ureq->qprva, bytes,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem))
+ goto rqfail;
+ qp->rumem = umem;
+ qplib_qp->rq.sg_info.umem = umem;
+ qplib_qp->rq.sg_info.pgsize = PAGE_SIZE;
+ qplib_qp->rq.sg_info.pgshft = PAGE_SHIFT;
+ }
+
+ qplib_qp->dpi = &cntx->dpi;
+ return 0;
+rqfail:
+ ib_umem_release(qp->sumem);
+ qp->sumem = NULL;
+ memset(&qplib_qp->sq.sg_info, 0, sizeof(qplib_qp->sq.sg_info));
+
+ return PTR_ERR(umem);
+}
+
+static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah
+ (struct bnxt_re_pd *pd,
+ struct bnxt_qplib_res *qp1_res,
+ struct bnxt_qplib_qp *qp1_qp)
+{
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_ah *ah;
+ union ib_gid sgid;
+ int rc;
+
+ ah = kzalloc(sizeof(*ah), GFP_KERNEL);
+ if (!ah)
+ return NULL;
+
+ ah->rdev = rdev;
+ ah->qplib_ah.pd = &pd->qplib_pd;
+
+ rc = bnxt_re_query_gid(&rdev->ibdev, 1, 0, &sgid);
+ if (rc)
+ goto fail;
+
+ /* supply the dgid data same as sgid */
+ memcpy(ah->qplib_ah.dgid.data, &sgid.raw,
+ sizeof(union ib_gid));
+ ah->qplib_ah.sgid_index = 0;
+
+ ah->qplib_ah.traffic_class = 0;
+ ah->qplib_ah.flow_label = 0;
+ ah->qplib_ah.hop_limit = 1;
+ ah->qplib_ah.sl = 0;
+ /* Have DMAC same as SMAC */
+ ether_addr_copy(ah->qplib_ah.dmac, rdev->netdev->dev_addr);
+
+ rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, false);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate HW AH for Shadow QP");
+ goto fail;
+ }
+ atomic_inc(&rdev->stats.res.ah_count);
+
+ return ah;
+
+fail:
+ kfree(ah);
+ return NULL;
+}
+
+static struct bnxt_re_qp *bnxt_re_create_shadow_qp
+ (struct bnxt_re_pd *pd,
+ struct bnxt_qplib_res *qp1_res,
+ struct bnxt_qplib_qp *qp1_qp)
+{
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_qp *qp;
+ int rc;
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return NULL;
+
+ qp->rdev = rdev;
+
+ /* Initialize the shadow QP structure from the QP1 values */
+ ether_addr_copy(qp->qplib_qp.smac, rdev->netdev->dev_addr);
+
+ qp->qplib_qp.pd = &pd->qplib_pd;
+ qp->qplib_qp.qp_handle = (u64)(unsigned long)(&qp->qplib_qp);
+ qp->qplib_qp.type = IB_QPT_UD;
+
+ qp->qplib_qp.max_inline_data = 0;
+ qp->qplib_qp.sig_type = true;
+
+ /* Shadow QP SQ depth should be same as QP1 RQ depth */
+ qp->qplib_qp.sq.wqe_size = bnxt_re_get_wqe_size(0, 6);
+ qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe;
+ qp->qplib_qp.sq.max_sw_wqe = qp1_qp->rq.max_wqe;
+ qp->qplib_qp.sq.max_sge = 2;
+ /* Q full delta can be 1 since it is internal QP */
+ qp->qplib_qp.sq.q_full_delta = 1;
+ qp->qplib_qp.sq.sg_info.pgsize = PAGE_SIZE;
+ qp->qplib_qp.sq.sg_info.pgshft = PAGE_SHIFT;
+
+ qp->qplib_qp.scq = qp1_qp->scq;
+ qp->qplib_qp.rcq = qp1_qp->rcq;
+
+ qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(6);
+ qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe;
+ qp->qplib_qp.rq.max_sw_wqe = qp1_qp->rq.max_wqe;
+ qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge;
+ /* Q full delta can be 1 since it is internal QP */
+ qp->qplib_qp.rq.q_full_delta = 1;
+ qp->qplib_qp.rq.sg_info.pgsize = PAGE_SIZE;
+ qp->qplib_qp.rq.sg_info.pgshft = PAGE_SHIFT;
+
+ qp->qplib_qp.mtu = qp1_qp->mtu;
+
+ qp->qplib_qp.sq_hdr_buf_size = 0;
+ qp->qplib_qp.rq_hdr_buf_size = BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6;
+ qp->qplib_qp.dpi = &rdev->dpi_privileged;
+
+ rc = bnxt_qplib_create_qp(qp1_res, &qp->qplib_qp);
+ if (rc)
+ goto fail;
+
+ spin_lock_init(&qp->sq_lock);
+ INIT_LIST_HEAD(&qp->list);
+ mutex_lock(&rdev->qp_lock);
+ list_add_tail(&qp->list, &rdev->qp_list);
+ atomic_inc(&rdev->stats.res.qp_count);
+ mutex_unlock(&rdev->qp_lock);
+ return qp;
+fail:
+ kfree(qp);
+ return NULL;
+}
+
+static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct bnxt_re_ucontext *uctx)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_qplib_qp *qplqp;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_q *rq;
+ int entries;
+
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
+ rq = &qplqp->rq;
+ dev_attr = rdev->dev_attr;
+
+ if (init_attr->srq) {
+ struct bnxt_re_srq *srq;
+
+ srq = container_of(init_attr->srq, struct bnxt_re_srq, ib_srq);
+ qplqp->srq = &srq->qplib_srq;
+ rq->max_wqe = 0;
+ } else {
+ rq->max_sge = init_attr->cap.max_recv_sge;
+ if (rq->max_sge > dev_attr->max_qp_sges)
+ rq->max_sge = dev_attr->max_qp_sges;
+ init_attr->cap.max_recv_sge = rq->max_sge;
+ rq->wqe_size = bnxt_re_setup_rwqe_size(qplqp, rq->max_sge,
+ dev_attr->max_qp_sges);
+ /* Allocate 1 more than what's provided so posting max doesn't
+ * mean empty.
+ */
+ entries = bnxt_re_init_depth(init_attr->cap.max_recv_wr + 1, uctx);
+ rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1);
+ rq->max_sw_wqe = rq->max_wqe;
+ rq->q_full_delta = 0;
+ rq->sg_info.pgsize = PAGE_SIZE;
+ rq->sg_info.pgshft = PAGE_SHIFT;
+ }
+
+ return 0;
+}
+
+static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_qplib_qp *qplqp;
+ struct bnxt_re_dev *rdev;
+
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
+ dev_attr = rdev->dev_attr;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
+ qplqp->rq.max_sge = dev_attr->max_qp_sges;
+ if (qplqp->rq.max_sge > dev_attr->max_qp_sges)
+ qplqp->rq.max_sge = dev_attr->max_qp_sges;
+ qplqp->rq.max_sge = 6;
+ }
+}
+
+static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct bnxt_re_ucontext *uctx,
+ struct bnxt_re_qp_req *ureq)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_qplib_qp *qplqp;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_q *sq;
+ int diff = 0;
+ int entries;
+ int rc;
+
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
+ sq = &qplqp->sq;
+ dev_attr = rdev->dev_attr;
+
+ sq->max_sge = init_attr->cap.max_send_sge;
+ entries = init_attr->cap.max_send_wr;
+ if (uctx && qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) {
+ sq->max_wqe = ureq->sq_slots;
+ sq->max_sw_wqe = ureq->sq_slots;
+ sq->wqe_size = sizeof(struct sq_sge);
+ } else {
+ if (sq->max_sge > dev_attr->max_qp_sges) {
+ sq->max_sge = dev_attr->max_qp_sges;
+ init_attr->cap.max_send_sge = sq->max_sge;
+ }
+
+ rc = bnxt_re_setup_swqe_size(qp, init_attr);
+ if (rc)
+ return rc;
+
+ /* Allocate 128 + 1 more than what's provided */
+ diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ?
+ 0 : BNXT_QPLIB_RESERVED_QP_WRS;
+ entries = bnxt_re_init_depth(entries + diff + 1, uctx);
+ sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1);
+ if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE)
+ sq->max_sw_wqe = bnxt_qplib_get_depth(sq, qplqp->wqe_mode, true);
+ else
+ sq->max_sw_wqe = sq->max_wqe;
+
+ }
+ sq->q_full_delta = diff + 1;
+ /*
+ * Reserving one slot for Phantom WQE. Application can
+ * post one extra entry in this case. But allowing this to avoid
+ * unexpected Queue full condition
+ */
+ qplqp->sq.q_full_delta -= 1;
+ qplqp->sq.sg_info.pgsize = PAGE_SIZE;
+ qplqp->sq.sg_info.pgshft = PAGE_SHIFT;
+
+ return 0;
+}
+
+static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct bnxt_re_ucontext *uctx)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_qplib_qp *qplqp;
+ struct bnxt_re_dev *rdev;
+ int entries;
+
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
+ dev_attr = rdev->dev_attr;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
+ entries = bnxt_re_init_depth(init_attr->cap.max_send_wr + 1, uctx);
+ qplqp->sq.max_wqe = min_t(u32, entries,
+ dev_attr->max_qp_wqes + 1);
+ qplqp->sq.q_full_delta = qplqp->sq.max_wqe -
+ init_attr->cap.max_send_wr;
+ qplqp->sq.max_sge++; /* Need one extra sge to put UD header */
+ if (qplqp->sq.max_sge > dev_attr->max_qp_sges)
+ qplqp->sq.max_sge = dev_attr->max_qp_sges;
+ }
+}
+
+static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct bnxt_qplib_chip_ctx *chip_ctx;
+ int qptype;
+
+ chip_ctx = rdev->chip_ctx;
+
+ qptype = __from_ib_qp_type(init_attr->qp_type);
+ if (qptype == IB_QPT_MAX) {
+ ibdev_err(&rdev->ibdev, "QP type 0x%x not supported", qptype);
+ qptype = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (bnxt_qplib_is_chip_gen_p5_p7(chip_ctx) &&
+ init_attr->qp_type == IB_QPT_GSI)
+ qptype = CMDQ_CREATE_QP_TYPE_GSI;
+out:
+ return qptype;
+}
+
+static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct bnxt_re_ucontext *uctx,
+ struct bnxt_re_qp_req *ureq)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_qplib_qp *qplqp;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_cq *cq;
+ int rc = 0, qptype;
+
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
+ dev_attr = rdev->dev_attr;
+
+ /* Setup misc params */
+ ether_addr_copy(qplqp->smac, rdev->netdev->dev_addr);
+ qplqp->pd = &pd->qplib_pd;
+ qplqp->qp_handle = (u64)qplqp;
+ qplqp->max_inline_data = init_attr->cap.max_inline_data;
+ qplqp->sig_type = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
+ qptype = bnxt_re_init_qp_type(rdev, init_attr);
+ if (qptype < 0) {
+ rc = qptype;
+ goto out;
+ }
+ qplqp->type = (u8)qptype;
+ qplqp->wqe_mode = bnxt_re_is_var_size_supported(rdev, uctx);
+ if (init_attr->qp_type == IB_QPT_RC) {
+ qplqp->max_rd_atomic = dev_attr->max_qp_rd_atom;
+ qplqp->max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom;
+ }
+ qplqp->mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
+ qplqp->dpi = &rdev->dpi_privileged; /* Doorbell page */
+ if (init_attr->create_flags) {
+ ibdev_dbg(&rdev->ibdev,
+ "QP create flags 0x%x not supported",
+ init_attr->create_flags);
+ return -EOPNOTSUPP;
+ }
+
+ /* Setup CQs */
+ if (init_attr->send_cq) {
+ cq = container_of(init_attr->send_cq, struct bnxt_re_cq, ib_cq);
+ qplqp->scq = &cq->qplib_cq;
+ qp->scq = cq;
+ }
+
+ if (init_attr->recv_cq) {
+ cq = container_of(init_attr->recv_cq, struct bnxt_re_cq, ib_cq);
+ qplqp->rcq = &cq->qplib_cq;
+ qp->rcq = cq;
+ }
+
+ /* Setup RQ/SRQ */
+ rc = bnxt_re_init_rq_attr(qp, init_attr, uctx);
+ if (rc)
+ goto out;
+ if (init_attr->qp_type == IB_QPT_GSI)
+ bnxt_re_adjust_gsi_rq_attr(qp);
+
+ /* Setup SQ */
+ rc = bnxt_re_init_sq_attr(qp, init_attr, uctx, ureq);
+ if (rc)
+ goto out;
+ if (init_attr->qp_type == IB_QPT_GSI)
+ bnxt_re_adjust_gsi_sq_attr(qp, init_attr, uctx);
+
+ if (uctx) /* This will update DPI and qp_handle */
+ rc = bnxt_re_init_user_qp(rdev, pd, qp, uctx, ureq);
+out:
+ return rc;
+}
+
+static int bnxt_re_create_shadow_gsi(struct bnxt_re_qp *qp,
+ struct bnxt_re_pd *pd)
+{
+ struct bnxt_re_sqp_entries *sqp_tbl;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_qp *sqp;
+ struct bnxt_re_ah *sah;
+ int rc = 0;
+
+ rdev = qp->rdev;
+ /* Create a shadow QP to handle the QP1 traffic */
+ sqp_tbl = kcalloc(BNXT_RE_MAX_GSI_SQP_ENTRIES, sizeof(*sqp_tbl),
+ GFP_KERNEL);
+ if (!sqp_tbl)
+ return -ENOMEM;
+ rdev->gsi_ctx.sqp_tbl = sqp_tbl;
+
+ sqp = bnxt_re_create_shadow_qp(pd, &rdev->qplib_res, &qp->qplib_qp);
+ if (!sqp) {
+ rc = -ENODEV;
+ ibdev_err(&rdev->ibdev, "Failed to create Shadow QP for QP1");
+ goto out;
+ }
+ rdev->gsi_ctx.gsi_sqp = sqp;
+
+ sqp->rcq = qp->rcq;
+ sqp->scq = qp->scq;
+ sah = bnxt_re_create_shadow_qp_ah(pd, &rdev->qplib_res,
+ &qp->qplib_qp);
+ if (!sah) {
+ bnxt_qplib_destroy_qp(&rdev->qplib_res,
+ &sqp->qplib_qp);
+ rc = -ENODEV;
+ ibdev_err(&rdev->ibdev,
+ "Failed to create AH entry for ShadowQP");
+ goto out;
+ }
+ rdev->gsi_ctx.gsi_sah = sah;
+
+ return 0;
+out:
+ kfree(sqp_tbl);
+ return rc;
+}
+
+static int bnxt_re_create_gsi_qp(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_qp *qplqp;
+ int rc;
+
+ rdev = qp->rdev;
+ qplqp = &qp->qplib_qp;
+
+ qplqp->rq_hdr_buf_size = BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2;
+ qplqp->sq_hdr_buf_size = BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE_V2;
+
+ rc = bnxt_qplib_create_qp1(&rdev->qplib_res, qplqp);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "create HW QP1 failed!");
+ goto out;
+ }
+
+ rc = bnxt_re_create_shadow_gsi(qp, pd);
+out:
+ return rc;
+}
+
+static bool bnxt_re_test_qp_limits(struct bnxt_re_dev *rdev,
+ struct ib_qp_init_attr *init_attr,
+ struct bnxt_qplib_dev_attr *dev_attr)
+{
+ bool rc = true;
+
+ if (init_attr->cap.max_send_wr > dev_attr->max_qp_wqes ||
+ init_attr->cap.max_recv_wr > dev_attr->max_qp_wqes ||
+ init_attr->cap.max_send_sge > dev_attr->max_qp_sges ||
+ init_attr->cap.max_recv_sge > dev_attr->max_qp_sges ||
+ init_attr->cap.max_inline_data > dev_attr->max_inline_data) {
+ ibdev_err(&rdev->ibdev,
+ "Create QP failed - max exceeded! 0x%x/0x%x 0x%x/0x%x 0x%x/0x%x 0x%x/0x%x 0x%x/0x%x",
+ init_attr->cap.max_send_wr, dev_attr->max_qp_wqes,
+ init_attr->cap.max_recv_wr, dev_attr->max_qp_wqes,
+ init_attr->cap.max_send_sge, dev_attr->max_qp_sges,
+ init_attr->cap.max_recv_sge, dev_attr->max_qp_sges,
+ init_attr->cap.max_inline_data,
+ dev_attr->max_inline_data);
+ rc = false;
+ }
+ return rc;
+}
+
+static int bnxt_re_add_unique_gid(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx;
+ struct bnxt_qplib_res *res = &rdev->qplib_res;
+ int rc;
+
+ if (!rdev->rcfw.roce_mirror)
+ return 0;
+
+ rdev->ugid.global.subnet_prefix = cpu_to_be64(0xfe8000000000abcdLL);
+ addrconf_ifid_eui48(&rdev->ugid.raw[8], rdev->netdev);
+
+ rc = bnxt_qplib_add_sgid(&res->sgid_tbl,
+ (struct bnxt_qplib_gid *)&rdev->ugid,
+ rdev->qplib_res.netdev->dev_addr,
+ 0xFFFF, true, &rdev->ugid_index, true,
+ hctx->stats3.fw_id);
+ if (rc)
+ dev_err(rdev_to_dev(rdev), "Failed to add unique GID. rc = %d\n", rc);
+
+ return rc;
+}
+
+int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_re_ucontext *uctx;
+ struct bnxt_re_qp_req ureq;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_pd *pd;
+ struct bnxt_re_qp *qp;
+ struct ib_pd *ib_pd;
+ u32 active_qps;
+ int rc;
+
+ ib_pd = ib_qp->pd;
+ pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ rdev = pd->rdev;
+ dev_attr = rdev->dev_attr;
+ qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+
+ uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
+ if (udata)
+ if (ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq))))
+ return -EFAULT;
+
+ rc = bnxt_re_test_qp_limits(rdev, qp_init_attr, dev_attr);
+ if (!rc) {
+ rc = -EINVAL;
+ goto fail;
+ }
+
+ qp->rdev = rdev;
+ rc = bnxt_re_init_qp_attr(qp, pd, qp_init_attr, uctx, &ureq);
+ if (rc)
+ goto fail;
+
+ if (qp_init_attr->qp_type == IB_QPT_GSI &&
+ !(bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))) {
+ rc = bnxt_re_create_gsi_qp(qp, pd, qp_init_attr);
+ if (rc == -ENODEV)
+ goto qp_destroy;
+ if (rc)
+ goto fail;
+ } else {
+ rc = bnxt_qplib_create_qp(&rdev->qplib_res, &qp->qplib_qp);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to create HW QP");
+ goto free_umem;
+ }
+ if (udata) {
+ struct bnxt_re_qp_resp resp;
+
+ resp.qpid = qp->qplib_qp.id;
+ resp.rsvd = 0;
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to copy QP udata");
+ goto qp_destroy;
+ }
+ }
+ }
+
+ /* Support for RawEth QP is added to capture TCP pkt dump.
+ * So unique SGID is used to avoid incorrect statistics on per
+ * function stats_ctx
+ */
+ if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE) {
+ rc = bnxt_re_add_unique_gid(rdev);
+ if (rc)
+ goto qp_destroy;
+ qp->qplib_qp.ugid_index = rdev->ugid_index;
+ }
+
+ qp->ib_qp.qp_num = qp->qplib_qp.id;
+ if (qp_init_attr->qp_type == IB_QPT_GSI)
+ rdev->gsi_ctx.gsi_qp = qp;
+ spin_lock_init(&qp->sq_lock);
+ spin_lock_init(&qp->rq_lock);
+ INIT_LIST_HEAD(&qp->list);
+ mutex_lock(&rdev->qp_lock);
+ list_add_tail(&qp->list, &rdev->qp_list);
+ mutex_unlock(&rdev->qp_lock);
+ active_qps = atomic_inc_return(&rdev->stats.res.qp_count);
+ if (active_qps > rdev->stats.res.qp_watermark)
+ rdev->stats.res.qp_watermark = active_qps;
+ if (qp_init_attr->qp_type == IB_QPT_RC) {
+ active_qps = atomic_inc_return(&rdev->stats.res.rc_qp_count);
+ if (active_qps > rdev->stats.res.rc_qp_watermark)
+ rdev->stats.res.rc_qp_watermark = active_qps;
+ } else if (qp_init_attr->qp_type == IB_QPT_UD) {
+ active_qps = atomic_inc_return(&rdev->stats.res.ud_qp_count);
+ if (active_qps > rdev->stats.res.ud_qp_watermark)
+ rdev->stats.res.ud_qp_watermark = active_qps;
+ }
+ bnxt_re_debug_add_qpinfo(rdev, qp);
+
+ return 0;
+qp_destroy:
+ bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
+free_umem:
+ ib_umem_release(qp->rumem);
+ ib_umem_release(qp->sumem);
+fail:
+ return rc;
+}
+
+static u8 __from_ib_qp_state(enum ib_qp_state state)
+{
+ switch (state) {
+ case IB_QPS_RESET:
+ return CMDQ_MODIFY_QP_NEW_STATE_RESET;
+ case IB_QPS_INIT:
+ return CMDQ_MODIFY_QP_NEW_STATE_INIT;
+ case IB_QPS_RTR:
+ return CMDQ_MODIFY_QP_NEW_STATE_RTR;
+ case IB_QPS_RTS:
+ return CMDQ_MODIFY_QP_NEW_STATE_RTS;
+ case IB_QPS_SQD:
+ return CMDQ_MODIFY_QP_NEW_STATE_SQD;
+ case IB_QPS_SQE:
+ return CMDQ_MODIFY_QP_NEW_STATE_SQE;
+ case IB_QPS_ERR:
+ default:
+ return CMDQ_MODIFY_QP_NEW_STATE_ERR;
+ }
+}
+
+static enum ib_qp_state __to_ib_qp_state(u8 state)
+{
+ switch (state) {
+ case CMDQ_MODIFY_QP_NEW_STATE_RESET:
+ return IB_QPS_RESET;
+ case CMDQ_MODIFY_QP_NEW_STATE_INIT:
+ return IB_QPS_INIT;
+ case CMDQ_MODIFY_QP_NEW_STATE_RTR:
+ return IB_QPS_RTR;
+ case CMDQ_MODIFY_QP_NEW_STATE_RTS:
+ return IB_QPS_RTS;
+ case CMDQ_MODIFY_QP_NEW_STATE_SQD:
+ return IB_QPS_SQD;
+ case CMDQ_MODIFY_QP_NEW_STATE_SQE:
+ return IB_QPS_SQE;
+ case CMDQ_MODIFY_QP_NEW_STATE_ERR:
+ default:
+ return IB_QPS_ERR;
+ }
+}
+
+static u32 __from_ib_mtu(enum ib_mtu mtu)
+{
+ switch (mtu) {
+ case IB_MTU_256:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_256;
+ case IB_MTU_512:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_512;
+ case IB_MTU_1024:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_1024;
+ case IB_MTU_2048:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
+ case IB_MTU_4096:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_4096;
+ default:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
+ }
+}
+
+static enum ib_mtu __to_ib_mtu(u32 mtu)
+{
+ switch (mtu & CREQ_QUERY_QP_RESP_SB_PATH_MTU_MASK) {
+ case CMDQ_MODIFY_QP_PATH_MTU_MTU_256:
+ return IB_MTU_256;
+ case CMDQ_MODIFY_QP_PATH_MTU_MTU_512:
+ return IB_MTU_512;
+ case CMDQ_MODIFY_QP_PATH_MTU_MTU_1024:
+ return IB_MTU_1024;
+ case CMDQ_MODIFY_QP_PATH_MTU_MTU_2048:
+ return IB_MTU_2048;
+ case CMDQ_MODIFY_QP_PATH_MTU_MTU_4096:
+ return IB_MTU_4096;
+ default:
+ return IB_MTU_2048;
+ }
+}
+
+/* Shared Receive Queues */
+int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
+{
+ struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
+ ib_srq);
+ struct bnxt_re_dev *rdev = srq->rdev;
+ struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq;
+
+ if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) {
+ free_page((unsigned long)srq->uctx_srq_page);
+ hash_del(&srq->hash_entry);
+ }
+ bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq);
+ ib_umem_release(srq->umem);
+ atomic_dec(&rdev->stats.res.srq_count);
+ return 0;
+}
+
+static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
+ struct bnxt_re_pd *pd,
+ struct bnxt_re_srq *srq,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_srq_req ureq;
+ struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq;
+ struct ib_umem *umem;
+ int bytes = 0;
+ struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context(
+ udata, struct bnxt_re_ucontext, ib_uctx);
+
+ if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
+ return -EFAULT;
+
+ bytes = (qplib_srq->max_wqe * qplib_srq->wqe_size);
+ bytes = PAGE_ALIGN(bytes);
+ umem = ib_umem_get(&rdev->ibdev, ureq.srqva, bytes,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem))
+ return PTR_ERR(umem);
+
+ srq->umem = umem;
+ qplib_srq->sg_info.umem = umem;
+ qplib_srq->sg_info.pgsize = PAGE_SIZE;
+ qplib_srq->sg_info.pgshft = PAGE_SHIFT;
+ qplib_srq->srq_handle = ureq.srq_handle;
+ qplib_srq->dpi = &cntx->dpi;
+
+ return 0;
+}
+
+int bnxt_re_create_srq(struct ib_srq *ib_srq,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata)
+{
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_re_ucontext *uctx;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_srq *srq;
+ struct bnxt_re_pd *pd;
+ struct ib_pd *ib_pd;
+ u32 active_srqs;
+ int rc, entries;
+
+ ib_pd = ib_srq->pd;
+ pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ rdev = pd->rdev;
+ dev_attr = rdev->dev_attr;
+ srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq);
+
+ if (srq_init_attr->attr.max_wr >= dev_attr->max_srq_wqes) {
+ ibdev_err(&rdev->ibdev, "Create CQ failed - max exceeded");
+ rc = -EINVAL;
+ goto exit;
+ }
+
+ if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
+ rc = -EOPNOTSUPP;
+ goto exit;
+ }
+
+ uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
+ srq->rdev = rdev;
+ srq->qplib_srq.pd = &pd->qplib_pd;
+ srq->qplib_srq.dpi = &rdev->dpi_privileged;
+ /* Allocate 1 more than what's provided so posting max doesn't
+ * mean empty
+ */
+ entries = bnxt_re_init_depth(srq_init_attr->attr.max_wr + 1, uctx);
+ if (entries > dev_attr->max_srq_wqes + 1)
+ entries = dev_attr->max_srq_wqes + 1;
+ srq->qplib_srq.max_wqe = entries;
+
+ srq->qplib_srq.max_sge = srq_init_attr->attr.max_sge;
+ /* 128 byte wqe size for SRQ . So use max sges */
+ srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(dev_attr->max_srq_sges);
+ srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit;
+ srq->srq_limit = srq_init_attr->attr.srq_limit;
+ srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id;
+ srq->qplib_srq.sg_info.pgsize = PAGE_SIZE;
+ srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT;
+
+ if (udata) {
+ rc = bnxt_re_init_user_srq(rdev, pd, srq, udata);
+ if (rc)
+ goto fail;
+ }
+
+ rc = bnxt_qplib_create_srq(&rdev->qplib_res, &srq->qplib_srq);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Create HW SRQ failed!");
+ goto fail;
+ }
+
+ if (udata) {
+ struct bnxt_re_srq_resp resp = {};
+
+ resp.srqid = srq->qplib_srq.id;
+ if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) {
+ hash_add(rdev->srq_hash, &srq->hash_entry, srq->qplib_srq.id);
+ srq->uctx_srq_page = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!srq->uctx_srq_page) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ resp.comp_mask |= BNXT_RE_SRQ_TOGGLE_PAGE_SUPPORT;
+ }
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "SRQ copy to udata failed!");
+ bnxt_qplib_destroy_srq(&rdev->qplib_res,
+ &srq->qplib_srq);
+ goto fail;
+ }
+ }
+ active_srqs = atomic_inc_return(&rdev->stats.res.srq_count);
+ if (active_srqs > rdev->stats.res.srq_watermark)
+ rdev->stats.res.srq_watermark = active_srqs;
+ spin_lock_init(&srq->lock);
+
+ return 0;
+
+fail:
+ ib_umem_release(srq->umem);
+exit:
+ return rc;
+}
+
+int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
+ ib_srq);
+ struct bnxt_re_dev *rdev = srq->rdev;
+
+ switch (srq_attr_mask) {
+ case IB_SRQ_MAX_WR:
+ /* SRQ resize is not supported */
+ return -EINVAL;
+ case IB_SRQ_LIMIT:
+ /* Change the SRQ threshold */
+ if (srq_attr->srq_limit > srq->qplib_srq.max_wqe)
+ return -EINVAL;
+
+ srq->qplib_srq.threshold = srq_attr->srq_limit;
+ bnxt_qplib_srq_arm_db(&srq->qplib_srq.dbinfo, srq->qplib_srq.threshold);
+
+ /* On success, update the shadow */
+ srq->srq_limit = srq_attr->srq_limit;
+ /* No need to Build and send response back to udata */
+ return 0;
+ default:
+ ibdev_err(&rdev->ibdev,
+ "Unsupported srq_attr_mask 0x%x", srq_attr_mask);
+ return -EINVAL;
+ }
+}
+
+int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr)
+{
+ struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
+ ib_srq);
+ struct bnxt_re_srq tsrq;
+ struct bnxt_re_dev *rdev = srq->rdev;
+ int rc;
+
+ /* Get live SRQ attr */
+ tsrq.qplib_srq.id = srq->qplib_srq.id;
+ rc = bnxt_qplib_query_srq(&rdev->qplib_res, &tsrq.qplib_srq);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Query HW SRQ failed!");
+ return rc;
+ }
+ srq_attr->max_wr = srq->qplib_srq.max_wqe;
+ srq_attr->max_sge = srq->qplib_srq.max_sge;
+ srq_attr->srq_limit = tsrq.qplib_srq.threshold;
+
+ return 0;
+}
+
+int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
+ ib_srq);
+ struct bnxt_qplib_swqe wqe;
+ unsigned long flags;
+ int rc = 0;
+
+ spin_lock_irqsave(&srq->lock, flags);
+ while (wr) {
+ /* Transcribe each ib_recv_wr to qplib_swqe */
+ wqe.num_sge = wr->num_sge;
+ bnxt_re_build_sgl(wr->sg_list, wqe.sg_list, wr->num_sge);
+ wqe.wr_id = wr->wr_id;
+ wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
+
+ rc = bnxt_qplib_post_srq_recv(&srq->qplib_srq, &wqe);
+ if (rc) {
+ *bad_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+ spin_unlock_irqrestore(&srq->lock, flags);
+
+ return rc;
+}
+static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
+ struct bnxt_re_qp *qp1_qp,
+ int qp_attr_mask)
+{
+ struct bnxt_re_qp *qp = rdev->gsi_ctx.gsi_sqp;
+ int rc;
+
+ if (qp_attr_mask & IB_QP_STATE) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE;
+ qp->qplib_qp.state = qp1_qp->qplib_qp.state;
+ }
+ if (qp_attr_mask & IB_QP_PKEY_INDEX) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
+ qp->qplib_qp.pkey_index = qp1_qp->qplib_qp.pkey_index;
+ }
+
+ if (qp_attr_mask & IB_QP_QKEY) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY;
+ /* Using a Random QKEY */
+ qp->qplib_qp.qkey = 0x81818181;
+ }
+ if (qp_attr_mask & IB_QP_SQ_PSN) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN;
+ qp->qplib_qp.sq.psn = qp1_qp->qplib_qp.sq.psn;
+ }
+
+ rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
+ if (rc)
+ ibdev_err(&rdev->ibdev, "Failed to modify Shadow QP for QP1");
+ return rc;
+}
+
+int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_udata *udata)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_re_dev *rdev = qp->rdev;
+ struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
+ enum ib_qp_state curr_qp_state, new_qp_state;
+ int rc, entries;
+ unsigned int flags;
+ u8 nw_type;
+
+ if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
+ qp->qplib_qp.modify_flags = 0;
+ if (qp_attr_mask & IB_QP_STATE) {
+ curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state);
+ new_qp_state = qp_attr->qp_state;
+ if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state,
+ ib_qp->qp_type, qp_attr_mask)) {
+ ibdev_err(&rdev->ibdev,
+ "Invalid attribute mask: %#x specified ",
+ qp_attr_mask);
+ ibdev_err(&rdev->ibdev,
+ "for qpn: %#x type: %#x",
+ ib_qp->qp_num, ib_qp->qp_type);
+ ibdev_err(&rdev->ibdev,
+ "curr_qp_state=0x%x, new_qp_state=0x%x\n",
+ curr_qp_state, new_qp_state);
+ return -EINVAL;
+ }
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE;
+ qp->qplib_qp.state = __from_ib_qp_state(qp_attr->qp_state);
+
+ if (!qp->sumem &&
+ qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ ibdev_dbg(&rdev->ibdev,
+ "Move QP = %p to flush list\n", qp);
+ flags = bnxt_re_lock_cqs(qp);
+ bnxt_qplib_add_flush_qp(&qp->qplib_qp);
+ bnxt_re_unlock_cqs(qp, flags);
+ }
+ if (!qp->sumem &&
+ qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_RESET) {
+ ibdev_dbg(&rdev->ibdev,
+ "Move QP = %p out of flush list\n", qp);
+ flags = bnxt_re_lock_cqs(qp);
+ bnxt_qplib_clean_qp(&qp->qplib_qp);
+ bnxt_re_unlock_cqs(qp, flags);
+ }
+ }
+ if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_EN_SQD_ASYNC_NOTIFY;
+ qp->qplib_qp.en_sqd_async_notify = true;
+ }
+ if (qp_attr_mask & IB_QP_ACCESS_FLAGS) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS;
+ qp->qplib_qp.access =
+ __qp_access_flags_from_ib(qp->qplib_qp.cctx,
+ qp_attr->qp_access_flags);
+ /* LOCAL_WRITE access must be set to allow RC receive */
+ qp->qplib_qp.access |= CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE;
+ }
+ if (qp_attr_mask & IB_QP_PKEY_INDEX) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
+ qp->qplib_qp.pkey_index = qp_attr->pkey_index;
+ }
+ if (qp_attr_mask & IB_QP_QKEY) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY;
+ qp->qplib_qp.qkey = qp_attr->qkey;
+ }
+ if (qp_attr_mask & IB_QP_AV) {
+ const struct ib_global_route *grh =
+ rdma_ah_read_grh(&qp_attr->ah_attr);
+ const struct ib_gid_attr *sgid_attr;
+ struct bnxt_re_gid_ctx *ctx;
+
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID |
+ CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL |
+ CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX |
+ CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT |
+ CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS |
+ CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC |
+ CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID;
+ memcpy(qp->qplib_qp.ah.dgid.data, grh->dgid.raw,
+ sizeof(qp->qplib_qp.ah.dgid.data));
+ qp->qplib_qp.ah.flow_label = grh->flow_label;
+ sgid_attr = grh->sgid_attr;
+ /* Get the HW context of the GID. The reference
+ * of GID table entry is already taken by the caller.
+ */
+ ctx = rdma_read_gid_hw_context(sgid_attr);
+ qp->qplib_qp.ah.sgid_index = ctx->idx;
+ qp->qplib_qp.ah.host_sgid_index = grh->sgid_index;
+ qp->qplib_qp.ah.hop_limit = grh->hop_limit;
+ qp->qplib_qp.ah.traffic_class = grh->traffic_class >> 2;
+ qp->qplib_qp.ah.sl = rdma_ah_get_sl(&qp_attr->ah_attr);
+ ether_addr_copy(qp->qplib_qp.ah.dmac,
+ qp_attr->ah_attr.roce.dmac);
+
+ rc = rdma_read_gid_l2_fields(sgid_attr, NULL,
+ &qp->qplib_qp.smac[0]);
+ if (rc)
+ return rc;
+
+ nw_type = rdma_gid_attr_network_type(sgid_attr);
+ switch (nw_type) {
+ case RDMA_NETWORK_IPV4:
+ qp->qplib_qp.nw_type =
+ CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4;
+ break;
+ case RDMA_NETWORK_IPV6:
+ qp->qplib_qp.nw_type =
+ CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6;
+ break;
+ default:
+ qp->qplib_qp.nw_type =
+ CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1;
+ break;
+ }
+ }
+
+ if (qp_attr->qp_state == IB_QPS_RTR) {
+ enum ib_mtu qpmtu;
+
+ qpmtu = iboe_get_mtu(rdev->netdev->mtu);
+ if (qp_attr_mask & IB_QP_PATH_MTU) {
+ if (ib_mtu_enum_to_int(qp_attr->path_mtu) >
+ ib_mtu_enum_to_int(qpmtu))
+ return -EINVAL;
+ qpmtu = qp_attr->path_mtu;
+ }
+
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
+ qp->qplib_qp.path_mtu = __from_ib_mtu(qpmtu);
+ qp->qplib_qp.mtu = ib_mtu_enum_to_int(qpmtu);
+ }
+
+ if (qp_attr_mask & IB_QP_TIMEOUT) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT;
+ qp->qplib_qp.timeout = qp_attr->timeout;
+ }
+ if (qp_attr_mask & IB_QP_RETRY_CNT) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_RETRY_CNT;
+ qp->qplib_qp.retry_cnt = qp_attr->retry_cnt;
+ }
+ if (qp_attr_mask & IB_QP_RNR_RETRY) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_RNR_RETRY;
+ qp->qplib_qp.rnr_retry = qp_attr->rnr_retry;
+ }
+ if (qp_attr_mask & IB_QP_MIN_RNR_TIMER) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER;
+ qp->qplib_qp.min_rnr_timer = qp_attr->min_rnr_timer;
+ }
+ if (qp_attr_mask & IB_QP_RQ_PSN) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN;
+ qp->qplib_qp.rq.psn = qp_attr->rq_psn;
+ }
+ if (qp_attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC;
+ /* Cap the max_rd_atomic to device max */
+ qp->qplib_qp.max_rd_atomic = min_t(u32, qp_attr->max_rd_atomic,
+ dev_attr->max_qp_rd_atom);
+ }
+ if (qp_attr_mask & IB_QP_SQ_PSN) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN;
+ qp->qplib_qp.sq.psn = qp_attr->sq_psn;
+ }
+ if (qp_attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ if (qp_attr->max_dest_rd_atomic >
+ dev_attr->max_qp_init_rd_atom) {
+ ibdev_err(&rdev->ibdev,
+ "max_dest_rd_atomic requested%d is > dev_max%d",
+ qp_attr->max_dest_rd_atomic,
+ dev_attr->max_qp_init_rd_atom);
+ return -EINVAL;
+ }
+
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC;
+ qp->qplib_qp.max_dest_rd_atomic = qp_attr->max_dest_rd_atomic;
+ }
+ if (qp_attr_mask & IB_QP_CAP) {
+ struct bnxt_re_ucontext *uctx =
+ rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
+
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SIZE |
+ CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SIZE |
+ CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SGE |
+ CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SGE |
+ CMDQ_MODIFY_QP_MODIFY_MASK_MAX_INLINE_DATA;
+ if ((qp_attr->cap.max_send_wr >= dev_attr->max_qp_wqes) ||
+ (qp_attr->cap.max_recv_wr >= dev_attr->max_qp_wqes) ||
+ (qp_attr->cap.max_send_sge >= dev_attr->max_qp_sges) ||
+ (qp_attr->cap.max_recv_sge >= dev_attr->max_qp_sges) ||
+ (qp_attr->cap.max_inline_data >=
+ dev_attr->max_inline_data)) {
+ ibdev_err(&rdev->ibdev,
+ "Create QP failed - max exceeded");
+ return -EINVAL;
+ }
+ entries = bnxt_re_init_depth(qp_attr->cap.max_send_wr, uctx);
+ qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+ dev_attr->max_qp_wqes + 1);
+ qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
+ qp_attr->cap.max_send_wr;
+ /*
+ * Reserving one slot for Phantom WQE. Some application can
+ * post one extra entry in this case. Allowing this to avoid
+ * unexpected Queue full condition
+ */
+ qp->qplib_qp.sq.q_full_delta -= 1;
+ qp->qplib_qp.sq.max_sge = qp_attr->cap.max_send_sge;
+ if (qp->qplib_qp.rq.max_wqe) {
+ entries = bnxt_re_init_depth(qp_attr->cap.max_recv_wr, uctx);
+ qp->qplib_qp.rq.max_wqe =
+ min_t(u32, entries, dev_attr->max_qp_wqes + 1);
+ qp->qplib_qp.rq.max_sw_wqe = qp->qplib_qp.rq.max_wqe;
+ qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
+ qp_attr->cap.max_recv_wr;
+ qp->qplib_qp.rq.max_sge = qp_attr->cap.max_recv_sge;
+ } else {
+ /* SRQ was used prior, just ignore the RQ caps */
+ }
+ }
+ if (qp_attr_mask & IB_QP_DEST_QPN) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID;
+ qp->qplib_qp.dest_qpn = qp_attr->dest_qp_num;
+ }
+ rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to modify HW QP");
+ return rc;
+ }
+ if (ib_qp->qp_type == IB_QPT_GSI && rdev->gsi_ctx.gsi_sqp)
+ rc = bnxt_re_modify_shadow_qp(rdev, qp, qp_attr_mask);
+ return rc;
+}
+
+int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_re_dev *rdev = qp->rdev;
+ struct bnxt_qplib_qp *qplib_qp;
+ int rc;
+
+ qplib_qp = kzalloc(sizeof(*qplib_qp), GFP_KERNEL);
+ if (!qplib_qp)
+ return -ENOMEM;
+
+ qplib_qp->id = qp->qplib_qp.id;
+ qplib_qp->ah.host_sgid_index = qp->qplib_qp.ah.host_sgid_index;
+
+ rc = bnxt_qplib_query_qp(&rdev->qplib_res, qplib_qp);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to query HW QP");
+ goto out;
+ }
+ qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state);
+ qp_attr->cur_qp_state = __to_ib_qp_state(qplib_qp->cur_qp_state);
+ qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0;
+ qp_attr->qp_access_flags = __qp_access_flags_to_ib(qp->qplib_qp.cctx,
+ qplib_qp->access);
+ qp_attr->pkey_index = qplib_qp->pkey_index;
+ qp_attr->qkey = qplib_qp->qkey;
+ qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp->udp_sport,
+ qplib_qp->ah.host_sgid_index,
+ qplib_qp->ah.hop_limit,
+ qplib_qp->ah.traffic_class);
+ rdma_ah_set_dgid_raw(&qp_attr->ah_attr, qplib_qp->ah.dgid.data);
+ rdma_ah_set_sl(&qp_attr->ah_attr, qplib_qp->ah.sl);
+ ether_addr_copy(qp_attr->ah_attr.roce.dmac, qplib_qp->ah.dmac);
+ qp_attr->path_mtu = __to_ib_mtu(qplib_qp->path_mtu);
+ qp_attr->timeout = qplib_qp->timeout;
+ qp_attr->retry_cnt = qplib_qp->retry_cnt;
+ qp_attr->rnr_retry = qplib_qp->rnr_retry;
+ qp_attr->min_rnr_timer = qplib_qp->min_rnr_timer;
+ qp_attr->port_num = __to_ib_port_num(qplib_qp->port_id);
+ qp_attr->rq_psn = qplib_qp->rq.psn;
+ qp_attr->max_rd_atomic = qplib_qp->max_rd_atomic;
+ qp_attr->sq_psn = qplib_qp->sq.psn;
+ qp_attr->max_dest_rd_atomic = qplib_qp->max_dest_rd_atomic;
+ qp_init_attr->sq_sig_type = qplib_qp->sig_type ? IB_SIGNAL_ALL_WR :
+ IB_SIGNAL_REQ_WR;
+ qp_attr->dest_qp_num = qplib_qp->dest_qpn;
+
+ qp_attr->cap.max_send_wr = qp->qplib_qp.sq.max_wqe;
+ qp_attr->cap.max_send_sge = qp->qplib_qp.sq.max_sge;
+ qp_attr->cap.max_recv_wr = qp->qplib_qp.rq.max_wqe;
+ qp_attr->cap.max_recv_sge = qp->qplib_qp.rq.max_sge;
+ qp_attr->cap.max_inline_data = qp->qplib_qp.max_inline_data;
+ qp_init_attr->cap = qp_attr->cap;
+
+out:
+ kfree(qplib_qp);
+ return rc;
+}
+
+/* Routine for sending QP1 packets for RoCE V1 an V2
+ */
+static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
+ const struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe,
+ int payload_size)
+{
+ struct bnxt_re_ah *ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah,
+ ib_ah);
+ struct bnxt_qplib_ah *qplib_ah = &ah->qplib_ah;
+ const struct ib_gid_attr *sgid_attr = ah->ib_ah.sgid_attr;
+ struct bnxt_qplib_sge sge;
+ u8 nw_type;
+ u16 ether_type;
+ union ib_gid dgid;
+ bool is_eth = false;
+ bool is_vlan = false;
+ bool is_grh = false;
+ bool is_udp = false;
+ u8 ip_version = 0;
+ u16 vlan_id = 0xFFFF;
+ void *buf;
+ int i, rc;
+
+ memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr));
+
+ rc = rdma_read_gid_l2_fields(sgid_attr, &vlan_id, NULL);
+ if (rc)
+ return rc;
+
+ /* Get network header type for this GID */
+ nw_type = rdma_gid_attr_network_type(sgid_attr);
+ switch (nw_type) {
+ case RDMA_NETWORK_IPV4:
+ nw_type = BNXT_RE_ROCEV2_IPV4_PACKET;
+ break;
+ case RDMA_NETWORK_IPV6:
+ nw_type = BNXT_RE_ROCEV2_IPV6_PACKET;
+ break;
+ default:
+ nw_type = BNXT_RE_ROCE_V1_PACKET;
+ break;
+ }
+ memcpy(&dgid.raw, &qplib_ah->dgid, 16);
+ is_udp = sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+ if (is_udp) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) {
+ ip_version = 4;
+ ether_type = ETH_P_IP;
+ } else {
+ ip_version = 6;
+ ether_type = ETH_P_IPV6;
+ }
+ is_grh = false;
+ } else {
+ ether_type = ETH_P_IBOE;
+ is_grh = true;
+ }
+
+ is_eth = true;
+ is_vlan = vlan_id && (vlan_id < 0x1000);
+
+ ib_ud_header_init(payload_size, !is_eth, is_eth, is_vlan, is_grh,
+ ip_version, is_udp, 0, &qp->qp1_hdr);
+
+ /* ETH */
+ ether_addr_copy(qp->qp1_hdr.eth.dmac_h, ah->qplib_ah.dmac);
+ ether_addr_copy(qp->qp1_hdr.eth.smac_h, qp->qplib_qp.smac);
+
+ /* For vlan, check the sgid for vlan existence */
+
+ if (!is_vlan) {
+ qp->qp1_hdr.eth.type = cpu_to_be16(ether_type);
+ } else {
+ qp->qp1_hdr.vlan.type = cpu_to_be16(ether_type);
+ qp->qp1_hdr.vlan.tag = cpu_to_be16(vlan_id);
+ }
+
+ if (is_grh || (ip_version == 6)) {
+ memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid_attr->gid.raw,
+ sizeof(sgid_attr->gid));
+ memcpy(qp->qp1_hdr.grh.destination_gid.raw, qplib_ah->dgid.data,
+ sizeof(sgid_attr->gid));
+ qp->qp1_hdr.grh.hop_limit = qplib_ah->hop_limit;
+ }
+
+ if (ip_version == 4) {
+ qp->qp1_hdr.ip4.tos = 0;
+ qp->qp1_hdr.ip4.id = 0;
+ qp->qp1_hdr.ip4.frag_off = htons(IP_DF);
+ qp->qp1_hdr.ip4.ttl = qplib_ah->hop_limit;
+
+ memcpy(&qp->qp1_hdr.ip4.saddr, sgid_attr->gid.raw + 12, 4);
+ memcpy(&qp->qp1_hdr.ip4.daddr, qplib_ah->dgid.data + 12, 4);
+ qp->qp1_hdr.ip4.check = ib_ud_ip4_csum(&qp->qp1_hdr);
+ }
+
+ if (is_udp) {
+ qp->qp1_hdr.udp.dport = htons(ROCE_V2_UDP_DPORT);
+ qp->qp1_hdr.udp.sport = htons(0x8CD1);
+ qp->qp1_hdr.udp.csum = 0;
+ }
+
+ /* BTH */
+ if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+ qp->qp1_hdr.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ qp->qp1_hdr.immediate_present = 1;
+ } else {
+ qp->qp1_hdr.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ }
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ qp->qp1_hdr.bth.solicited_event = 1;
+ /* pad_count */
+ qp->qp1_hdr.bth.pad_count = (4 - payload_size) & 3;
+
+ /* P_key for QP1 is for all members */
+ qp->qp1_hdr.bth.pkey = cpu_to_be16(0xFFFF);
+ qp->qp1_hdr.bth.destination_qpn = IB_QP1;
+ qp->qp1_hdr.bth.ack_req = 0;
+ qp->send_psn++;
+ qp->send_psn &= BTH_PSN_MASK;
+ qp->qp1_hdr.bth.psn = cpu_to_be32(qp->send_psn);
+ /* DETH */
+ /* Use the priviledged Q_Key for QP1 */
+ qp->qp1_hdr.deth.qkey = cpu_to_be32(IB_QP1_QKEY);
+ qp->qp1_hdr.deth.source_qpn = IB_QP1;
+
+ /* Pack the QP1 to the transmit buffer */
+ buf = bnxt_qplib_get_qp1_sq_buf(&qp->qplib_qp, &sge);
+ if (buf) {
+ ib_ud_header_pack(&qp->qp1_hdr, buf);
+ for (i = wqe->num_sge; i; i--) {
+ wqe->sg_list[i].addr = wqe->sg_list[i - 1].addr;
+ wqe->sg_list[i].lkey = wqe->sg_list[i - 1].lkey;
+ wqe->sg_list[i].size = wqe->sg_list[i - 1].size;
+ }
+
+ /*
+ * Max Header buf size for IPV6 RoCE V2 is 86,
+ * which is same as the QP1 SQ header buffer.
+ * Header buf size for IPV4 RoCE V2 can be 66.
+ * ETH(14) + VLAN(4)+ IP(20) + UDP (8) + BTH(20).
+ * Subtract 20 bytes from QP1 SQ header buf size
+ */
+ if (is_udp && ip_version == 4)
+ sge.size -= 20;
+ /*
+ * Max Header buf size for RoCE V1 is 78.
+ * ETH(14) + VLAN(4) + GRH(40) + BTH(20).
+ * Subtract 8 bytes from QP1 SQ header buf size
+ */
+ if (!is_udp)
+ sge.size -= 8;
+
+ /* Subtract 4 bytes for non vlan packets */
+ if (!is_vlan)
+ sge.size -= 4;
+
+ wqe->sg_list[0].addr = sge.addr;
+ wqe->sg_list[0].lkey = sge.lkey;
+ wqe->sg_list[0].size = sge.size;
+ wqe->num_sge++;
+
+ } else {
+ ibdev_err(&qp->rdev->ibdev, "QP1 buffer is empty!");
+ rc = -ENOMEM;
+ }
+ return rc;
+}
+
+/* For the MAD layer, it only provides the recv SGE the size of
+ * ib_grh + MAD datagram. No Ethernet headers, Ethertype, BTH, DETH,
+ * nor RoCE iCRC. The Cu+ solution must provide buffer for the entire
+ * receive packet (334 bytes) with no VLAN and then copy the GRH
+ * and the MAD datagram out to the provided SGE.
+ */
+static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp,
+ const struct ib_recv_wr *wr,
+ struct bnxt_qplib_swqe *wqe,
+ int payload_size)
+{
+ struct bnxt_re_sqp_entries *sqp_entry;
+ struct bnxt_qplib_sge ref, sge;
+ struct bnxt_re_dev *rdev;
+ u32 rq_prod_index;
+
+ rdev = qp->rdev;
+
+ rq_prod_index = bnxt_qplib_get_rq_prod_index(&qp->qplib_qp);
+
+ if (!bnxt_qplib_get_qp1_rq_buf(&qp->qplib_qp, &sge))
+ return -ENOMEM;
+
+ /* Create 1 SGE to receive the entire
+ * ethernet packet
+ */
+ /* Save the reference from ULP */
+ ref.addr = wqe->sg_list[0].addr;
+ ref.lkey = wqe->sg_list[0].lkey;
+ ref.size = wqe->sg_list[0].size;
+
+ sqp_entry = &rdev->gsi_ctx.sqp_tbl[rq_prod_index];
+
+ /* SGE 1 */
+ wqe->sg_list[0].addr = sge.addr;
+ wqe->sg_list[0].lkey = sge.lkey;
+ wqe->sg_list[0].size = BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2;
+ sge.size -= wqe->sg_list[0].size;
+
+ sqp_entry->sge.addr = ref.addr;
+ sqp_entry->sge.lkey = ref.lkey;
+ sqp_entry->sge.size = ref.size;
+ /* Store the wrid for reporting completion */
+ sqp_entry->wrid = wqe->wr_id;
+ /* change the wqe->wrid to table index */
+ wqe->wr_id = rq_prod_index;
+ return 0;
+}
+
+static int is_ud_qp(struct bnxt_re_qp *qp)
+{
+ return (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD ||
+ qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_GSI);
+}
+
+static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp,
+ const struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ struct bnxt_re_ah *ah = NULL;
+
+ if (is_ud_qp(qp)) {
+ ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah, ib_ah);
+ wqe->send.q_key = ud_wr(wr)->remote_qkey;
+ wqe->send.dst_qp = ud_wr(wr)->remote_qpn;
+ wqe->send.avid = ah->qplib_ah.id;
+ }
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND;
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM;
+ wqe->send.imm_data = be32_to_cpu(wr->ex.imm_data);
+ break;
+ case IB_WR_SEND_WITH_INV:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV;
+ wqe->send.inv_key = wr->ex.invalidate_rkey;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+ if (wr->send_flags & IB_SEND_FENCE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+ if (wr->send_flags & IB_SEND_INLINE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_INLINE;
+
+ return 0;
+}
+
+static int bnxt_re_build_rdma_wqe(const struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ switch (wr->opcode) {
+ case IB_WR_RDMA_WRITE:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE;
+ break;
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM;
+ wqe->rdma.imm_data = be32_to_cpu(wr->ex.imm_data);
+ break;
+ case IB_WR_RDMA_READ:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_READ;
+ wqe->rdma.inv_key = wr->ex.invalidate_rkey;
+ break;
+ default:
+ return -EINVAL;
+ }
+ wqe->rdma.remote_va = rdma_wr(wr)->remote_addr;
+ wqe->rdma.r_key = rdma_wr(wr)->rkey;
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+ if (wr->send_flags & IB_SEND_FENCE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+ if (wr->send_flags & IB_SEND_INLINE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_INLINE;
+
+ return 0;
+}
+
+static int bnxt_re_build_atomic_wqe(const struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ switch (wr->opcode) {
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP;
+ wqe->atomic.cmp_data = atomic_wr(wr)->compare_add;
+ wqe->atomic.swap_data = atomic_wr(wr)->swap;
+ break;
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD;
+ wqe->atomic.cmp_data = atomic_wr(wr)->compare_add;
+ break;
+ default:
+ return -EINVAL;
+ }
+ wqe->atomic.remote_va = atomic_wr(wr)->remote_addr;
+ wqe->atomic.r_key = atomic_wr(wr)->rkey;
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+ if (wr->send_flags & IB_SEND_FENCE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+ return 0;
+}
+
+static int bnxt_re_build_inv_wqe(const struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV;
+ wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey;
+
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+
+ return 0;
+}
+
+static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ struct bnxt_re_mr *mr = container_of(wr->mr, struct bnxt_re_mr, ib_mr);
+ struct bnxt_qplib_frpl *qplib_frpl = &mr->qplib_frpl;
+ int access = wr->access;
+
+ wqe->frmr.pbl_ptr = (__le64 *)qplib_frpl->hwq.pbl_ptr[0];
+ wqe->frmr.pbl_dma_ptr = qplib_frpl->hwq.pbl_dma_ptr[0];
+ wqe->frmr.page_list = mr->pages;
+ wqe->frmr.page_list_len = mr->npages;
+ wqe->frmr.levels = qplib_frpl->hwq.level;
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR;
+
+ if (wr->wr.send_flags & IB_SEND_SIGNALED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+
+ if (access & IB_ACCESS_LOCAL_WRITE)
+ wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_LOCAL_WRITE;
+ if (access & IB_ACCESS_REMOTE_READ)
+ wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_REMOTE_READ;
+ if (access & IB_ACCESS_REMOTE_WRITE)
+ wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_REMOTE_WRITE;
+ if (access & IB_ACCESS_REMOTE_ATOMIC)
+ wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_REMOTE_ATOMIC;
+ if (access & IB_ACCESS_MW_BIND)
+ wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_WINDOW_BIND;
+
+ wqe->frmr.l_key = wr->key;
+ wqe->frmr.length = wr->mr->length;
+ wqe->frmr.pbl_pg_sz_log = ilog2(PAGE_SIZE >> PAGE_SHIFT_4K);
+ wqe->frmr.pg_sz_log = ilog2(wr->mr->page_size >> PAGE_SHIFT_4K);
+ wqe->frmr.va = wr->mr->iova;
+ return 0;
+}
+
+static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev,
+ const struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ /* Copy the inline data to the data field */
+ u8 *in_data;
+ u32 i, sge_len;
+ void *sge_addr;
+
+ in_data = wqe->inline_data;
+ for (i = 0; i < wr->num_sge; i++) {
+ sge_addr = (void *)(unsigned long)
+ wr->sg_list[i].addr;
+ sge_len = wr->sg_list[i].length;
+
+ if ((sge_len + wqe->inline_len) >
+ BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
+ ibdev_err(&rdev->ibdev,
+ "Inline data size requested > supported value");
+ return -EINVAL;
+ }
+ sge_len = wr->sg_list[i].length;
+
+ memcpy(in_data, sge_addr, sge_len);
+ in_data += wr->sg_list[i].length;
+ wqe->inline_len += wr->sg_list[i].length;
+ }
+ return wqe->inline_len;
+}
+
+static int bnxt_re_copy_wr_payload(struct bnxt_re_dev *rdev,
+ const struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ int payload_sz = 0;
+
+ if (wr->send_flags & IB_SEND_INLINE)
+ payload_sz = bnxt_re_copy_inline_data(rdev, wr, wqe);
+ else
+ payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe->sg_list,
+ wqe->num_sge);
+
+ return payload_sz;
+}
+
+static void bnxt_ud_qp_hw_stall_workaround(struct bnxt_re_qp *qp)
+{
+ if ((qp->ib_qp.qp_type == IB_QPT_UD ||
+ qp->ib_qp.qp_type == IB_QPT_GSI ||
+ qp->ib_qp.qp_type == IB_QPT_RAW_ETHERTYPE) &&
+ qp->qplib_qp.wqe_cnt == BNXT_RE_UD_QP_HW_STALL) {
+ int qp_attr_mask;
+ struct ib_qp_attr qp_attr;
+
+ qp_attr_mask = IB_QP_STATE;
+ qp_attr.qp_state = IB_QPS_RTS;
+ bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, qp_attr_mask, NULL);
+ qp->qplib_qp.wqe_cnt = 0;
+ }
+}
+
+static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
+ struct bnxt_re_qp *qp,
+ const struct ib_send_wr *wr)
+{
+ int rc = 0, payload_sz = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->sq_lock, flags);
+ while (wr) {
+ struct bnxt_qplib_swqe wqe = {};
+
+ /* Common */
+ wqe.num_sge = wr->num_sge;
+ if (wr->num_sge > qp->qplib_qp.sq.max_sge) {
+ ibdev_err(&rdev->ibdev,
+ "Limit exceeded for Send SGEs");
+ rc = -EINVAL;
+ goto bad;
+ }
+
+ payload_sz = bnxt_re_copy_wr_payload(qp->rdev, wr, &wqe);
+ if (payload_sz < 0) {
+ rc = -EINVAL;
+ goto bad;
+ }
+ wqe.wr_id = wr->wr_id;
+
+ wqe.type = BNXT_QPLIB_SWQE_TYPE_SEND;
+
+ rc = bnxt_re_build_send_wqe(qp, wr, &wqe);
+ if (!rc)
+ rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+bad:
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Post send failed opcode = %#x rc = %d",
+ wr->opcode, rc);
+ break;
+ }
+ wr = wr->next;
+ }
+ bnxt_qplib_post_send_db(&qp->qplib_qp);
+ if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx))
+ bnxt_ud_qp_hw_stall_workaround(qp);
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+ return rc;
+}
+
+static void bnxt_re_legacy_set_uc_fence(struct bnxt_qplib_swqe *wqe)
+{
+ /* Need unconditional fence for non-wire memory opcode
+ * to work as expected.
+ */
+ if (wqe->type == BNXT_QPLIB_SWQE_TYPE_LOCAL_INV ||
+ wqe->type == BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR ||
+ wqe->type == BNXT_QPLIB_SWQE_TYPE_REG_MR ||
+ wqe->type == BNXT_QPLIB_SWQE_TYPE_BIND_MW)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+}
+
+int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_qplib_swqe wqe;
+ int rc = 0, payload_sz = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->sq_lock, flags);
+ while (wr) {
+ /* House keeping */
+ memset(&wqe, 0, sizeof(wqe));
+
+ /* Common */
+ wqe.num_sge = wr->num_sge;
+ if (wr->num_sge > qp->qplib_qp.sq.max_sge) {
+ ibdev_err(&qp->rdev->ibdev,
+ "Limit exceeded for Send SGEs");
+ rc = -EINVAL;
+ goto bad;
+ }
+
+ payload_sz = bnxt_re_copy_wr_payload(qp->rdev, wr, &wqe);
+ if (payload_sz < 0) {
+ rc = -EINVAL;
+ goto bad;
+ }
+ wqe.wr_id = wr->wr_id;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ if (qp->qplib_qp.type == CMDQ_CREATE_QP1_TYPE_GSI) {
+ rc = bnxt_re_build_qp1_send_v2(qp, wr, &wqe,
+ payload_sz);
+ if (rc)
+ goto bad;
+ wqe.rawqp1.lflags |=
+ SQ_SEND_RAWETH_QP1_LFLAGS_ROCE_CRC;
+ }
+ switch (wr->send_flags) {
+ case IB_SEND_IP_CSUM:
+ wqe.rawqp1.lflags |=
+ SQ_SEND_RAWETH_QP1_LFLAGS_IP_CHKSUM;
+ break;
+ default:
+ break;
+ }
+ fallthrough;
+ case IB_WR_SEND_WITH_INV:
+ rc = bnxt_re_build_send_wqe(qp, wr, &wqe);
+ break;
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ case IB_WR_RDMA_READ:
+ rc = bnxt_re_build_rdma_wqe(wr, &wqe);
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ rc = bnxt_re_build_atomic_wqe(wr, &wqe);
+ break;
+ case IB_WR_RDMA_READ_WITH_INV:
+ ibdev_err(&qp->rdev->ibdev,
+ "RDMA Read with Invalidate is not supported");
+ rc = -EINVAL;
+ goto bad;
+ case IB_WR_LOCAL_INV:
+ rc = bnxt_re_build_inv_wqe(wr, &wqe);
+ break;
+ case IB_WR_REG_MR:
+ rc = bnxt_re_build_reg_wqe(reg_wr(wr), &wqe);
+ break;
+ default:
+ /* Unsupported WRs */
+ ibdev_err(&qp->rdev->ibdev,
+ "WR (%#x) is not supported", wr->opcode);
+ rc = -EINVAL;
+ goto bad;
+ }
+ if (!rc) {
+ if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx))
+ bnxt_re_legacy_set_uc_fence(&wqe);
+ rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+ }
+bad:
+ if (rc) {
+ ibdev_err(&qp->rdev->ibdev,
+ "post_send failed op:%#x qps = %#x rc = %d\n",
+ wr->opcode, qp->qplib_qp.state, rc);
+ *bad_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+ bnxt_qplib_post_send_db(&qp->qplib_qp);
+ if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx))
+ bnxt_ud_qp_hw_stall_workaround(qp);
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+
+ return rc;
+}
+
+static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
+ struct bnxt_re_qp *qp,
+ const struct ib_recv_wr *wr)
+{
+ struct bnxt_qplib_swqe wqe;
+ int rc = 0;
+
+ while (wr) {
+ /* House keeping */
+ memset(&wqe, 0, sizeof(wqe));
+
+ /* Common */
+ wqe.num_sge = wr->num_sge;
+ if (wr->num_sge > qp->qplib_qp.rq.max_sge) {
+ ibdev_err(&rdev->ibdev,
+ "Limit exceeded for Receive SGEs");
+ rc = -EINVAL;
+ break;
+ }
+ bnxt_re_build_sgl(wr->sg_list, wqe.sg_list, wr->num_sge);
+ wqe.wr_id = wr->wr_id;
+ wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
+
+ rc = bnxt_qplib_post_recv(&qp->qplib_qp, &wqe);
+ if (rc)
+ break;
+
+ wr = wr->next;
+ }
+ if (!rc)
+ bnxt_qplib_post_recv_db(&qp->qplib_qp);
+ return rc;
+}
+
+int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_qplib_swqe wqe;
+ int rc = 0, payload_sz = 0;
+ unsigned long flags;
+ u32 count = 0;
+
+ spin_lock_irqsave(&qp->rq_lock, flags);
+ while (wr) {
+ /* House keeping */
+ memset(&wqe, 0, sizeof(wqe));
+
+ /* Common */
+ wqe.num_sge = wr->num_sge;
+ if (wr->num_sge > qp->qplib_qp.rq.max_sge) {
+ ibdev_err(&qp->rdev->ibdev,
+ "Limit exceeded for Receive SGEs");
+ rc = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+
+ payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe.sg_list,
+ wr->num_sge);
+ wqe.wr_id = wr->wr_id;
+ wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
+
+ if (ib_qp->qp_type == IB_QPT_GSI &&
+ qp->qplib_qp.type != CMDQ_CREATE_QP_TYPE_GSI)
+ rc = bnxt_re_build_qp1_shadow_qp_recv(qp, wr, &wqe,
+ payload_sz);
+ if (!rc)
+ rc = bnxt_qplib_post_recv(&qp->qplib_qp, &wqe);
+ if (rc) {
+ *bad_wr = wr;
+ break;
+ }
+
+ /* Ring DB if the RQEs posted reaches a threshold value */
+ if (++count >= BNXT_RE_RQ_WQE_THRESHOLD) {
+ bnxt_qplib_post_recv_db(&qp->qplib_qp);
+ count = 0;
+ }
+
+ wr = wr->next;
+ }
+
+ if (count)
+ bnxt_qplib_post_recv_db(&qp->qplib_qp);
+
+ spin_unlock_irqrestore(&qp->rq_lock, flags);
+
+ return rc;
+}
+
+static struct bnxt_qplib_nq *bnxt_re_get_nq(struct bnxt_re_dev *rdev)
+{
+ int min, indx;
+
+ mutex_lock(&rdev->nqr->load_lock);
+ for (indx = 0, min = 0; indx < (rdev->nqr->num_msix - 1); indx++) {
+ if (rdev->nqr->nq[min].load > rdev->nqr->nq[indx].load)
+ min = indx;
+ }
+ rdev->nqr->nq[min].load++;
+ mutex_unlock(&rdev->nqr->load_lock);
+
+ return &rdev->nqr->nq[min];
+}
+
+static void bnxt_re_put_nq(struct bnxt_re_dev *rdev, struct bnxt_qplib_nq *nq)
+{
+ mutex_lock(&rdev->nqr->load_lock);
+ nq->load--;
+ mutex_unlock(&rdev->nqr->load_lock);
+}
+
+/* Completion Queues */
+int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+{
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_qplib_nq *nq;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_cq *cq;
+
+ cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ rdev = cq->rdev;
+ nq = cq->qplib_cq.nq;
+ cctx = rdev->chip_ctx;
+
+ if (cctx->modes.toggle_bits & BNXT_QPLIB_CQ_TOGGLE_BIT) {
+ free_page((unsigned long)cq->uctx_cq_page);
+ hash_del(&cq->hash_entry);
+ }
+ bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
+
+ bnxt_re_put_nq(rdev, nq);
+ ib_umem_release(cq->umem);
+
+ atomic_dec(&rdev->stats.res.cq_count);
+ kfree(cq->cql);
+ return 0;
+}
+
+int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct bnxt_re_cq *cq = container_of(ibcq, struct bnxt_re_cq, ib_cq);
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibcq->device, ibdev);
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct bnxt_re_ucontext *uctx =
+ rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
+ struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
+ struct bnxt_qplib_chip_ctx *cctx;
+ int cqe = attr->cqe;
+ int rc, entries;
+ u32 active_cqs;
+
+ if (attr->flags)
+ return -EOPNOTSUPP;
+
+ /* Validate CQ fields */
+ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) {
+ ibdev_err(&rdev->ibdev, "Failed to create CQ -max exceeded");
+ return -EINVAL;
+ }
+
+ cq->rdev = rdev;
+ cctx = rdev->chip_ctx;
+ cq->qplib_cq.cq_handle = (u64)(unsigned long)(&cq->qplib_cq);
+
+ entries = bnxt_re_init_depth(cqe + 1, uctx);
+ if (entries > dev_attr->max_cq_wqes + 1)
+ entries = dev_attr->max_cq_wqes + 1;
+
+ cq->qplib_cq.sg_info.pgsize = PAGE_SIZE;
+ cq->qplib_cq.sg_info.pgshft = PAGE_SHIFT;
+ if (udata) {
+ struct bnxt_re_cq_req req;
+ if (ib_copy_from_udata(&req, udata, sizeof(req))) {
+ rc = -EFAULT;
+ goto fail;
+ }
+
+ cq->umem = ib_umem_get(&rdev->ibdev, req.cq_va,
+ entries * sizeof(struct cq_base),
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(cq->umem)) {
+ rc = PTR_ERR(cq->umem);
+ goto fail;
+ }
+ cq->qplib_cq.sg_info.umem = cq->umem;
+ cq->qplib_cq.dpi = &uctx->dpi;
+ } else {
+ cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL);
+ cq->cql = kcalloc(cq->max_cql, sizeof(struct bnxt_qplib_cqe),
+ GFP_KERNEL);
+ if (!cq->cql) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ cq->qplib_cq.dpi = &rdev->dpi_privileged;
+ }
+ cq->qplib_cq.max_wqe = entries;
+ cq->qplib_cq.coalescing = &rdev->cq_coalescing;
+ cq->qplib_cq.nq = bnxt_re_get_nq(rdev);
+ cq->qplib_cq.cnq_hw_ring_id = cq->qplib_cq.nq->ring_id;
+
+ rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to create HW CQ");
+ goto fail;
+ }
+
+ cq->ib_cq.cqe = entries;
+ cq->cq_period = cq->qplib_cq.period;
+
+ active_cqs = atomic_inc_return(&rdev->stats.res.cq_count);
+ if (active_cqs > rdev->stats.res.cq_watermark)
+ rdev->stats.res.cq_watermark = active_cqs;
+ spin_lock_init(&cq->cq_lock);
+
+ if (udata) {
+ struct bnxt_re_cq_resp resp = {};
+
+ if (cctx->modes.toggle_bits & BNXT_QPLIB_CQ_TOGGLE_BIT) {
+ hash_add(rdev->cq_hash, &cq->hash_entry, cq->qplib_cq.id);
+ /* Allocate a page */
+ cq->uctx_cq_page = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!cq->uctx_cq_page) {
+ rc = -ENOMEM;
+ goto c2fail;
+ }
+ resp.comp_mask |= BNXT_RE_CQ_TOGGLE_PAGE_SUPPORT;
+ }
+ resp.cqid = cq->qplib_cq.id;
+ resp.tail = cq->qplib_cq.hwq.cons;
+ resp.phase = cq->qplib_cq.period;
+ resp.rsvd = 0;
+ rc = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to copy CQ udata");
+ bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
+ goto free_mem;
+ }
+ }
+
+ return 0;
+
+free_mem:
+ free_page((unsigned long)cq->uctx_cq_page);
+c2fail:
+ ib_umem_release(cq->umem);
+fail:
+ kfree(cq->cql);
+ return rc;
+}
+
+static void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq)
+{
+ struct bnxt_re_dev *rdev = cq->rdev;
+
+ bnxt_qplib_resize_cq_complete(&rdev->qplib_res, &cq->qplib_cq);
+
+ cq->qplib_cq.max_wqe = cq->resize_cqe;
+ if (cq->resize_umem) {
+ ib_umem_release(cq->umem);
+ cq->umem = cq->resize_umem;
+ cq->resize_umem = NULL;
+ cq->resize_cqe = 0;
+ }
+}
+
+int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
+{
+ struct bnxt_qplib_sg_info sg_info = {};
+ struct bnxt_qplib_dpi *orig_dpi = NULL;
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_re_ucontext *uctx = NULL;
+ struct bnxt_re_resize_cq_req req;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_cq *cq;
+ int rc, entries;
+
+ cq = container_of(ibcq, struct bnxt_re_cq, ib_cq);
+ rdev = cq->rdev;
+ dev_attr = rdev->dev_attr;
+ if (!ibcq->uobject) {
+ ibdev_err(&rdev->ibdev, "Kernel CQ Resize not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (cq->resize_umem) {
+ ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - Busy",
+ cq->qplib_cq.id);
+ return -EBUSY;
+ }
+
+ /* Check the requested cq depth out of supported depth */
+ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) {
+ ibdev_err(&rdev->ibdev, "Resize CQ %#x failed - out of range cqe %d",
+ cq->qplib_cq.id, cqe);
+ return -EINVAL;
+ }
+
+ uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
+ entries = bnxt_re_init_depth(cqe + 1, uctx);
+ if (entries > dev_attr->max_cq_wqes + 1)
+ entries = dev_attr->max_cq_wqes + 1;
+
+ /* uverbs consumer */
+ if (ib_copy_from_udata(&req, udata, sizeof(req))) {
+ rc = -EFAULT;
+ goto fail;
+ }
+
+ cq->resize_umem = ib_umem_get(&rdev->ibdev, req.cq_va,
+ entries * sizeof(struct cq_base),
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(cq->resize_umem)) {
+ rc = PTR_ERR(cq->resize_umem);
+ ibdev_err(&rdev->ibdev, "%s: ib_umem_get failed! rc = %pe\n",
+ __func__, cq->resize_umem);
+ cq->resize_umem = NULL;
+ goto fail;
+ }
+ cq->resize_cqe = entries;
+ memcpy(&sg_info, &cq->qplib_cq.sg_info, sizeof(sg_info));
+ orig_dpi = cq->qplib_cq.dpi;
+
+ cq->qplib_cq.sg_info.umem = cq->resize_umem;
+ cq->qplib_cq.sg_info.pgsize = PAGE_SIZE;
+ cq->qplib_cq.sg_info.pgshft = PAGE_SHIFT;
+ cq->qplib_cq.dpi = &uctx->dpi;
+
+ rc = bnxt_qplib_resize_cq(&rdev->qplib_res, &cq->qplib_cq, entries);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Resize HW CQ %#x failed!",
+ cq->qplib_cq.id);
+ goto fail;
+ }
+
+ cq->ib_cq.cqe = cq->resize_cqe;
+ atomic_inc(&rdev->stats.res.resize_count);
+
+ return 0;
+
+fail:
+ if (cq->resize_umem) {
+ ib_umem_release(cq->resize_umem);
+ cq->resize_umem = NULL;
+ cq->resize_cqe = 0;
+ memcpy(&cq->qplib_cq.sg_info, &sg_info, sizeof(sg_info));
+ cq->qplib_cq.dpi = orig_dpi;
+ }
+ return rc;
+}
+
+static u8 __req_to_ib_wc_status(u8 qstatus)
+{
+ switch (qstatus) {
+ case CQ_REQ_STATUS_OK:
+ return IB_WC_SUCCESS;
+ case CQ_REQ_STATUS_BAD_RESPONSE_ERR:
+ return IB_WC_BAD_RESP_ERR;
+ case CQ_REQ_STATUS_LOCAL_LENGTH_ERR:
+ return IB_WC_LOC_LEN_ERR;
+ case CQ_REQ_STATUS_LOCAL_QP_OPERATION_ERR:
+ return IB_WC_LOC_QP_OP_ERR;
+ case CQ_REQ_STATUS_LOCAL_PROTECTION_ERR:
+ return IB_WC_LOC_PROT_ERR;
+ case CQ_REQ_STATUS_MEMORY_MGT_OPERATION_ERR:
+ return IB_WC_GENERAL_ERR;
+ case CQ_REQ_STATUS_REMOTE_INVALID_REQUEST_ERR:
+ return IB_WC_REM_INV_REQ_ERR;
+ case CQ_REQ_STATUS_REMOTE_ACCESS_ERR:
+ return IB_WC_REM_ACCESS_ERR;
+ case CQ_REQ_STATUS_REMOTE_OPERATION_ERR:
+ return IB_WC_REM_OP_ERR;
+ case CQ_REQ_STATUS_RNR_NAK_RETRY_CNT_ERR:
+ return IB_WC_RNR_RETRY_EXC_ERR;
+ case CQ_REQ_STATUS_TRANSPORT_RETRY_CNT_ERR:
+ return IB_WC_RETRY_EXC_ERR;
+ case CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+ return 0;
+}
+
+static u8 __rawqp1_to_ib_wc_status(u8 qstatus)
+{
+ switch (qstatus) {
+ case CQ_RES_RAWETH_QP1_STATUS_OK:
+ return IB_WC_SUCCESS;
+ case CQ_RES_RAWETH_QP1_STATUS_LOCAL_ACCESS_ERROR:
+ return IB_WC_LOC_ACCESS_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_HW_LOCAL_LENGTH_ERR:
+ return IB_WC_LOC_LEN_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_LOCAL_PROTECTION_ERR:
+ return IB_WC_LOC_PROT_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_LOCAL_QP_OPERATION_ERR:
+ return IB_WC_LOC_QP_OP_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_MEMORY_MGT_OPERATION_ERR:
+ return IB_WC_GENERAL_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_WORK_REQUEST_FLUSHED_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_HW_FLUSH_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+}
+
+static u8 __rc_to_ib_wc_status(u8 qstatus)
+{
+ switch (qstatus) {
+ case CQ_RES_RC_STATUS_OK:
+ return IB_WC_SUCCESS;
+ case CQ_RES_RC_STATUS_LOCAL_ACCESS_ERROR:
+ return IB_WC_LOC_ACCESS_ERR;
+ case CQ_RES_RC_STATUS_LOCAL_LENGTH_ERR:
+ return IB_WC_LOC_LEN_ERR;
+ case CQ_RES_RC_STATUS_LOCAL_PROTECTION_ERR:
+ return IB_WC_LOC_PROT_ERR;
+ case CQ_RES_RC_STATUS_LOCAL_QP_OPERATION_ERR:
+ return IB_WC_LOC_QP_OP_ERR;
+ case CQ_RES_RC_STATUS_MEMORY_MGT_OPERATION_ERR:
+ return IB_WC_GENERAL_ERR;
+ case CQ_RES_RC_STATUS_REMOTE_INVALID_REQUEST_ERR:
+ return IB_WC_REM_INV_REQ_ERR;
+ case CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ case CQ_RES_RC_STATUS_HW_FLUSH_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+}
+
+static void bnxt_re_process_req_wc(struct ib_wc *wc, struct bnxt_qplib_cqe *cqe)
+{
+ switch (cqe->type) {
+ case BNXT_QPLIB_SWQE_TYPE_SEND:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM:
+ wc->opcode = IB_WC_SEND;
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV:
+ wc->opcode = IB_WC_SEND;
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_READ:
+ wc->opcode = IB_WC_RDMA_READ;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP:
+ wc->opcode = IB_WC_COMP_SWAP;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD:
+ wc->opcode = IB_WC_FETCH_ADD;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV:
+ wc->opcode = IB_WC_LOCAL_INV;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_REG_MR:
+ wc->opcode = IB_WC_REG_MR;
+ break;
+ default:
+ wc->opcode = IB_WC_SEND;
+ break;
+ }
+
+ wc->status = __req_to_ib_wc_status(cqe->status);
+}
+
+static int bnxt_re_check_packet_type(u16 raweth_qp1_flags,
+ u16 raweth_qp1_flags2)
+{
+ bool is_ipv6 = false, is_ipv4 = false;
+
+ /* raweth_qp1_flags Bit 9-6 indicates itype */
+ if ((raweth_qp1_flags & CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ROCE)
+ != CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ROCE)
+ return -1;
+
+ if (raweth_qp1_flags2 &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_CS_CALC &&
+ raweth_qp1_flags2 &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_L4_CS_CALC) {
+ /* raweth_qp1_flags2 Bit 8 indicates ip_type. 0-v4 1 - v6 */
+ (raweth_qp1_flags2 &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_TYPE) ?
+ (is_ipv6 = true) : (is_ipv4 = true);
+ return ((is_ipv6) ?
+ BNXT_RE_ROCEV2_IPV6_PACKET :
+ BNXT_RE_ROCEV2_IPV4_PACKET);
+ } else {
+ return BNXT_RE_ROCE_V1_PACKET;
+ }
+}
+
+static int bnxt_re_to_ib_nw_type(int nw_type)
+{
+ u8 nw_hdr_type = 0xFF;
+
+ switch (nw_type) {
+ case BNXT_RE_ROCE_V1_PACKET:
+ nw_hdr_type = RDMA_NETWORK_ROCE_V1;
+ break;
+ case BNXT_RE_ROCEV2_IPV4_PACKET:
+ nw_hdr_type = RDMA_NETWORK_IPV4;
+ break;
+ case BNXT_RE_ROCEV2_IPV6_PACKET:
+ nw_hdr_type = RDMA_NETWORK_IPV6;
+ break;
+ }
+ return nw_hdr_type;
+}
+
+static bool bnxt_re_is_loopback_packet(struct bnxt_re_dev *rdev,
+ void *rq_hdr_buf)
+{
+ u8 *tmp_buf = NULL;
+ struct ethhdr *eth_hdr;
+ u16 eth_type;
+ bool rc = false;
+
+ tmp_buf = (u8 *)rq_hdr_buf;
+ /*
+ * If dest mac is not same as I/F mac, this could be a
+ * loopback address or multicast address, check whether
+ * it is a loopback packet
+ */
+ if (!ether_addr_equal(tmp_buf, rdev->netdev->dev_addr)) {
+ tmp_buf += 4;
+ /* Check the ether type */
+ eth_hdr = (struct ethhdr *)tmp_buf;
+ eth_type = ntohs(eth_hdr->h_proto);
+ switch (eth_type) {
+ case ETH_P_IBOE:
+ rc = true;
+ break;
+ case ETH_P_IP:
+ case ETH_P_IPV6: {
+ u32 len;
+ struct udphdr *udp_hdr;
+
+ len = (eth_type == ETH_P_IP ? sizeof(struct iphdr) :
+ sizeof(struct ipv6hdr));
+ tmp_buf += sizeof(struct ethhdr) + len;
+ udp_hdr = (struct udphdr *)tmp_buf;
+ if (ntohs(udp_hdr->dest) ==
+ ROCE_V2_UDP_DPORT)
+ rc = true;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *gsi_qp,
+ struct bnxt_qplib_cqe *cqe)
+{
+ struct bnxt_re_dev *rdev = gsi_qp->rdev;
+ struct bnxt_re_sqp_entries *sqp_entry = NULL;
+ struct bnxt_re_qp *gsi_sqp = rdev->gsi_ctx.gsi_sqp;
+ dma_addr_t shrq_hdr_buf_map;
+ struct ib_sge s_sge[2] = {};
+ struct ib_sge r_sge[2] = {};
+ struct bnxt_re_ah *gsi_sah;
+ struct ib_recv_wr rwr = {};
+ dma_addr_t rq_hdr_buf_map;
+ struct ib_ud_wr udwr = {};
+ struct ib_send_wr *swr;
+ u32 skip_bytes = 0;
+ int pkt_type = 0;
+ void *rq_hdr_buf;
+ u32 offset = 0;
+ u32 tbl_idx;
+ int rc;
+
+ swr = &udwr.wr;
+ tbl_idx = cqe->wr_id;
+
+ rq_hdr_buf = gsi_qp->qplib_qp.rq_hdr_buf +
+ (tbl_idx * gsi_qp->qplib_qp.rq_hdr_buf_size);
+ rq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&gsi_qp->qplib_qp,
+ tbl_idx);
+
+ /* Shadow QP header buffer */
+ shrq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&gsi_qp->qplib_qp,
+ tbl_idx);
+ sqp_entry = &rdev->gsi_ctx.sqp_tbl[tbl_idx];
+
+ /* Store this cqe */
+ memcpy(&sqp_entry->cqe, cqe, sizeof(struct bnxt_qplib_cqe));
+ sqp_entry->qp1_qp = gsi_qp;
+
+ /* Find packet type from the cqe */
+
+ pkt_type = bnxt_re_check_packet_type(cqe->raweth_qp1_flags,
+ cqe->raweth_qp1_flags2);
+ if (pkt_type < 0) {
+ ibdev_err(&rdev->ibdev, "Invalid packet\n");
+ return -EINVAL;
+ }
+
+ /* Adjust the offset for the user buffer and post in the rq */
+
+ if (pkt_type == BNXT_RE_ROCEV2_IPV4_PACKET)
+ offset = 20;
+
+ /*
+ * QP1 loopback packet has 4 bytes of internal header before
+ * ether header. Skip these four bytes.
+ */
+ if (bnxt_re_is_loopback_packet(rdev, rq_hdr_buf))
+ skip_bytes = 4;
+
+ /* First send SGE . Skip the ether header*/
+ s_sge[0].addr = rq_hdr_buf_map + BNXT_QPLIB_MAX_QP1_RQ_ETH_HDR_SIZE
+ + skip_bytes;
+ s_sge[0].lkey = 0xFFFFFFFF;
+ s_sge[0].length = offset ? BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV4 :
+ BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6;
+
+ /* Second Send SGE */
+ s_sge[1].addr = s_sge[0].addr + s_sge[0].length +
+ BNXT_QPLIB_MAX_QP1_RQ_BDETH_HDR_SIZE;
+ if (pkt_type != BNXT_RE_ROCE_V1_PACKET)
+ s_sge[1].addr += 8;
+ s_sge[1].lkey = 0xFFFFFFFF;
+ s_sge[1].length = 256;
+
+ /* First recv SGE */
+
+ r_sge[0].addr = shrq_hdr_buf_map;
+ r_sge[0].lkey = 0xFFFFFFFF;
+ r_sge[0].length = 40;
+
+ r_sge[1].addr = sqp_entry->sge.addr + offset;
+ r_sge[1].lkey = sqp_entry->sge.lkey;
+ r_sge[1].length = BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6 + 256 - offset;
+
+ /* Create receive work request */
+ rwr.num_sge = 2;
+ rwr.sg_list = r_sge;
+ rwr.wr_id = tbl_idx;
+ rwr.next = NULL;
+
+ rc = bnxt_re_post_recv_shadow_qp(rdev, gsi_sqp, &rwr);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to post Rx buffers to shadow QP");
+ return -ENOMEM;
+ }
+
+ swr->num_sge = 2;
+ swr->sg_list = s_sge;
+ swr->wr_id = tbl_idx;
+ swr->opcode = IB_WR_SEND;
+ swr->next = NULL;
+ gsi_sah = rdev->gsi_ctx.gsi_sah;
+ udwr.ah = &gsi_sah->ib_ah;
+ udwr.remote_qpn = gsi_sqp->qplib_qp.id;
+ udwr.remote_qkey = gsi_sqp->qplib_qp.qkey;
+
+ /* post data received in the send queue */
+ return bnxt_re_post_send_shadow_qp(rdev, gsi_sqp, swr);
+}
+
+static void bnxt_re_process_res_rawqp1_wc(struct ib_wc *wc,
+ struct bnxt_qplib_cqe *cqe)
+{
+ wc->opcode = IB_WC_RECV;
+ wc->status = __rawqp1_to_ib_wc_status(cqe->status);
+ wc->wc_flags |= IB_WC_GRH;
+}
+
+static bool bnxt_re_check_if_vlan_valid(struct bnxt_re_dev *rdev,
+ u16 vlan_id)
+{
+ /*
+ * Check if the vlan is configured in the host. If not configured, it
+ * can be a transparent VLAN. So dont report the vlan id.
+ */
+ if (!__vlan_find_dev_deep_rcu(rdev->netdev,
+ htons(ETH_P_8021Q), vlan_id))
+ return false;
+ return true;
+}
+
+static bool bnxt_re_is_vlan_pkt(struct bnxt_qplib_cqe *orig_cqe,
+ u16 *vid, u8 *sl)
+{
+ bool ret = false;
+ u32 metadata;
+ u16 tpid;
+
+ metadata = orig_cqe->raweth_qp1_metadata;
+ if (orig_cqe->raweth_qp1_flags2 &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_VLAN) {
+ tpid = ((metadata &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_MASK) >>
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_SFT);
+ if (tpid == ETH_P_8021Q) {
+ *vid = metadata &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_VID_MASK;
+ *sl = (metadata &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_MASK) >>
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_SFT;
+ ret = true;
+ }
+ }
+
+ return ret;
+}
+
+static void bnxt_re_process_res_rc_wc(struct ib_wc *wc,
+ struct bnxt_qplib_cqe *cqe)
+{
+ wc->opcode = IB_WC_RECV;
+ wc->status = __rc_to_ib_wc_status(cqe->status);
+
+ if (cqe->flags & CQ_RES_RC_FLAGS_IMM)
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ if (cqe->flags & CQ_RES_RC_FLAGS_INV)
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ if ((cqe->flags & (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM)) ==
+ (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM))
+ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+}
+
+static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *gsi_sqp,
+ struct ib_wc *wc,
+ struct bnxt_qplib_cqe *cqe)
+{
+ struct bnxt_re_dev *rdev = gsi_sqp->rdev;
+ struct bnxt_re_qp *gsi_qp = NULL;
+ struct bnxt_qplib_cqe *orig_cqe = NULL;
+ struct bnxt_re_sqp_entries *sqp_entry = NULL;
+ int nw_type;
+ u32 tbl_idx;
+ u16 vlan_id;
+ u8 sl;
+
+ tbl_idx = cqe->wr_id;
+
+ sqp_entry = &rdev->gsi_ctx.sqp_tbl[tbl_idx];
+ gsi_qp = sqp_entry->qp1_qp;
+ orig_cqe = &sqp_entry->cqe;
+
+ wc->wr_id = sqp_entry->wrid;
+ wc->byte_len = orig_cqe->length;
+ wc->qp = &gsi_qp->ib_qp;
+
+ wc->ex.imm_data = cpu_to_be32(orig_cqe->immdata);
+ wc->src_qp = orig_cqe->src_qp;
+ memcpy(wc->smac, orig_cqe->smac, ETH_ALEN);
+ if (bnxt_re_is_vlan_pkt(orig_cqe, &vlan_id, &sl)) {
+ if (bnxt_re_check_if_vlan_valid(rdev, vlan_id)) {
+ wc->vlan_id = vlan_id;
+ wc->sl = sl;
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+ }
+ }
+ wc->port_num = 1;
+ wc->vendor_err = orig_cqe->status;
+
+ wc->opcode = IB_WC_RECV;
+ wc->status = __rawqp1_to_ib_wc_status(orig_cqe->status);
+ wc->wc_flags |= IB_WC_GRH;
+
+ nw_type = bnxt_re_check_packet_type(orig_cqe->raweth_qp1_flags,
+ orig_cqe->raweth_qp1_flags2);
+ if (nw_type >= 0) {
+ wc->network_hdr_type = bnxt_re_to_ib_nw_type(nw_type);
+ wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
+ }
+}
+
+static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp,
+ struct ib_wc *wc,
+ struct bnxt_qplib_cqe *cqe)
+{
+ struct bnxt_re_dev *rdev;
+ u16 vlan_id = 0;
+ u8 nw_type;
+
+ rdev = qp->rdev;
+ wc->opcode = IB_WC_RECV;
+ wc->status = __rc_to_ib_wc_status(cqe->status);
+
+ if (cqe->flags & CQ_RES_UD_FLAGS_IMM)
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ /* report only on GSI QP for Thor */
+ if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_GSI) {
+ wc->wc_flags |= IB_WC_GRH;
+ memcpy(wc->smac, cqe->smac, ETH_ALEN);
+ wc->wc_flags |= IB_WC_WITH_SMAC;
+ if (cqe->flags & CQ_RES_UD_FLAGS_META_FORMAT_VLAN) {
+ vlan_id = (cqe->cfa_meta & 0xFFF);
+ }
+ /* Mark only if vlan_id is non zero */
+ if (vlan_id && bnxt_re_check_if_vlan_valid(rdev, vlan_id)) {
+ wc->vlan_id = vlan_id;
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+ }
+ nw_type = (cqe->flags & CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK) >>
+ CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT;
+ wc->network_hdr_type = bnxt_re_to_ib_nw_type(nw_type);
+ wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
+ }
+
+}
+
+static int send_phantom_wqe(struct bnxt_re_qp *qp)
+{
+ struct bnxt_qplib_qp *lib_qp = &qp->qplib_qp;
+ unsigned long flags;
+ int rc;
+
+ spin_lock_irqsave(&qp->sq_lock, flags);
+
+ rc = bnxt_re_bind_fence_mw(lib_qp);
+ if (!rc) {
+ lib_qp->sq.phantom_wqe_cnt++;
+ ibdev_dbg(&qp->rdev->ibdev,
+ "qp %#x sq->prod %#x sw_prod %#x phantom_wqe_cnt %d\n",
+ lib_qp->id, lib_qp->sq.hwq.prod,
+ HWQ_CMP(lib_qp->sq.hwq.prod, &lib_qp->sq.hwq),
+ lib_qp->sq.phantom_wqe_cnt);
+ }
+
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+ return rc;
+}
+
+int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
+{
+ struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ struct bnxt_re_qp *qp, *sh_qp;
+ struct bnxt_qplib_cqe *cqe;
+ int i, ncqe, budget;
+ struct bnxt_qplib_q *sq;
+ struct bnxt_qplib_qp *lib_qp;
+ u32 tbl_idx;
+ struct bnxt_re_sqp_entries *sqp_entry = NULL;
+ unsigned long flags;
+
+ /* User CQ; the only processing we do is to
+ * complete any pending CQ resize operation.
+ */
+ if (cq->umem) {
+ if (cq->resize_umem)
+ bnxt_re_resize_cq_complete(cq);
+ return 0;
+ }
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ budget = min_t(u32, num_entries, cq->max_cql);
+ num_entries = budget;
+ if (!cq->cql) {
+ ibdev_err(&cq->rdev->ibdev, "POLL CQ : no CQL to use");
+ goto exit;
+ }
+ cqe = &cq->cql[0];
+ while (budget) {
+ lib_qp = NULL;
+ ncqe = bnxt_qplib_poll_cq(&cq->qplib_cq, cqe, budget, &lib_qp);
+ if (lib_qp) {
+ sq = &lib_qp->sq;
+ if (sq->send_phantom) {
+ qp = container_of(lib_qp,
+ struct bnxt_re_qp, qplib_qp);
+ if (send_phantom_wqe(qp) == -ENOMEM)
+ ibdev_err(&cq->rdev->ibdev,
+ "Phantom failed! Scheduled to send again\n");
+ else
+ sq->send_phantom = false;
+ }
+ }
+ if (ncqe < budget)
+ ncqe += bnxt_qplib_process_flush_list(&cq->qplib_cq,
+ cqe + ncqe,
+ budget - ncqe);
+
+ if (!ncqe)
+ break;
+
+ for (i = 0; i < ncqe; i++, cqe++) {
+ /* Transcribe each qplib_wqe back to ib_wc */
+ memset(wc, 0, sizeof(*wc));
+
+ wc->wr_id = cqe->wr_id;
+ wc->byte_len = cqe->length;
+ qp = container_of
+ ((struct bnxt_qplib_qp *)
+ (unsigned long)(cqe->qp_handle),
+ struct bnxt_re_qp, qplib_qp);
+ wc->qp = &qp->ib_qp;
+ if (cqe->flags & CQ_RES_RC_FLAGS_IMM)
+ wc->ex.imm_data = cpu_to_be32(cqe->immdata);
+ else
+ wc->ex.invalidate_rkey = cqe->invrkey;
+ wc->src_qp = cqe->src_qp;
+ memcpy(wc->smac, cqe->smac, ETH_ALEN);
+ wc->port_num = 1;
+ wc->vendor_err = cqe->status;
+
+ switch (cqe->opcode) {
+ case CQ_BASE_CQE_TYPE_REQ:
+ sh_qp = qp->rdev->gsi_ctx.gsi_sqp;
+ if (sh_qp &&
+ qp->qplib_qp.id == sh_qp->qplib_qp.id) {
+ /* Handle this completion with
+ * the stored completion
+ */
+ memset(wc, 0, sizeof(*wc));
+ continue;
+ }
+ bnxt_re_process_req_wc(wc, cqe);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_RAWETH_QP1:
+ if (!cqe->status) {
+ int rc = 0;
+
+ rc = bnxt_re_process_raw_qp_pkt_rx
+ (qp, cqe);
+ if (!rc) {
+ memset(wc, 0, sizeof(*wc));
+ continue;
+ }
+ cqe->status = -1;
+ }
+ /* Errors need not be looped back.
+ * But change the wr_id to the one
+ * stored in the table
+ */
+ tbl_idx = cqe->wr_id;
+ sqp_entry = &cq->rdev->gsi_ctx.sqp_tbl[tbl_idx];
+ wc->wr_id = sqp_entry->wrid;
+ bnxt_re_process_res_rawqp1_wc(wc, cqe);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_RC:
+ bnxt_re_process_res_rc_wc(wc, cqe);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_UD:
+ sh_qp = qp->rdev->gsi_ctx.gsi_sqp;
+ if (sh_qp &&
+ qp->qplib_qp.id == sh_qp->qplib_qp.id) {
+ /* Handle this completion with
+ * the stored completion
+ */
+ if (cqe->status) {
+ continue;
+ } else {
+ bnxt_re_process_res_shadow_qp_wc
+ (qp, wc, cqe);
+ break;
+ }
+ }
+ bnxt_re_process_res_ud_wc(qp, wc, cqe);
+ break;
+ default:
+ ibdev_err(&cq->rdev->ibdev,
+ "POLL CQ : type 0x%x not handled",
+ cqe->opcode);
+ continue;
+ }
+ wc++;
+ budget--;
+ }
+ }
+exit:
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+ return num_entries - budget;
+}
+
+int bnxt_re_req_notify_cq(struct ib_cq *ib_cq,
+ enum ib_cq_notify_flags ib_cqn_flags)
+{
+ struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ int type = 0, rc = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ /* Trigger on the very next completion */
+ if (ib_cqn_flags & IB_CQ_NEXT_COMP)
+ type = DBC_DBC_TYPE_CQ_ARMALL;
+ /* Trigger on the next solicited completion */
+ else if (ib_cqn_flags & IB_CQ_SOLICITED)
+ type = DBC_DBC_TYPE_CQ_ARMSE;
+
+ /* Poll to see if there are missed events */
+ if ((ib_cqn_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
+ !(bnxt_qplib_is_cq_empty(&cq->qplib_cq))) {
+ rc = 1;
+ goto exit;
+ }
+ bnxt_qplib_req_notify_cq(&cq->qplib_cq, type);
+
+exit:
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+ return rc;
+}
+
+/* Memory Regions */
+struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_mr *mr;
+ u32 active_mrs;
+ int rc;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->rdev = rdev;
+ mr->qplib_mr.pd = &pd->qplib_pd;
+ mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags);
+ mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+
+ if (mr_access_flags & IB_ACCESS_RELAXED_ORDERING)
+ bnxt_re_check_and_set_relaxed_ordering(rdev, &mr->qplib_mr);
+
+ /* Allocate and register 0 as the address */
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc)
+ goto fail;
+
+ mr->qplib_mr.hwq.level = PBL_LVL_MAX;
+ mr->qplib_mr.total_size = -1; /* Infinte length */
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL, 0,
+ PAGE_SIZE, false);
+ if (rc)
+ goto fail_mr;
+
+ mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ if (mr_access_flags & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_ATOMIC))
+ mr->ib_mr.rkey = mr->ib_mr.lkey;
+ active_mrs = atomic_inc_return(&rdev->stats.res.mr_count);
+ if (active_mrs > rdev->stats.res.mr_watermark)
+ rdev->stats.res.mr_watermark = active_mrs;
+
+ return &mr->ib_mr;
+
+fail_mr:
+ bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+fail:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
+{
+ struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+ struct bnxt_re_dev *rdev = mr->rdev;
+ int rc;
+
+ rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Dereg MR failed: %#x\n", rc);
+ return rc;
+ }
+
+ if (mr->pages) {
+ rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
+ &mr->qplib_frpl);
+ kfree(mr->pages);
+ mr->npages = 0;
+ mr->pages = NULL;
+ }
+ ib_umem_release(mr->ib_umem);
+
+ kfree(mr);
+ atomic_dec(&rdev->stats.res.mr_count);
+ return rc;
+}
+
+static int bnxt_re_set_page(struct ib_mr *ib_mr, u64 addr)
+{
+ struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+
+ if (unlikely(mr->npages == mr->qplib_frpl.max_pg_ptrs))
+ return -ENOMEM;
+
+ mr->pages[mr->npages++] = addr;
+ return 0;
+}
+
+int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+
+ mr->npages = 0;
+ return ib_sg_to_pages(ib_mr, sg, sg_nents, sg_offset, bnxt_re_set_page);
+}
+
+struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
+ u32 max_num_sg)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_mr *mr = NULL;
+ u32 active_mrs;
+ int rc;
+
+ if (type != IB_MR_TYPE_MEM_REG) {
+ ibdev_dbg(&rdev->ibdev, "MR type 0x%x not supported", type);
+ return ERR_PTR(-EINVAL);
+ }
+ if (max_num_sg > MAX_PBL_LVL_1_PGS)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->rdev = rdev;
+ mr->qplib_mr.pd = &pd->qplib_pd;
+ mr->qplib_mr.access_flags = BNXT_QPLIB_FR_PMR;
+ mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc)
+ goto bail;
+
+ mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ mr->ib_mr.rkey = mr->ib_mr.lkey;
+
+ mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
+ if (!mr->pages) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ rc = bnxt_qplib_alloc_fast_reg_page_list(&rdev->qplib_res,
+ &mr->qplib_frpl, max_num_sg);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate HW FR page list");
+ goto fail_mr;
+ }
+
+ active_mrs = atomic_inc_return(&rdev->stats.res.mr_count);
+ if (active_mrs > rdev->stats.res.mr_watermark)
+ rdev->stats.res.mr_watermark = active_mrs;
+ return &mr->ib_mr;
+
+fail_mr:
+ kfree(mr->pages);
+fail:
+ bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+bail:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_mw *mw;
+ u32 active_mws;
+ int rc;
+
+ mw = kzalloc(sizeof(*mw), GFP_KERNEL);
+ if (!mw)
+ return ERR_PTR(-ENOMEM);
+ mw->rdev = rdev;
+ mw->qplib_mw.pd = &pd->qplib_pd;
+
+ mw->qplib_mw.type = (type == IB_MW_TYPE_1 ?
+ CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1 :
+ CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B);
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mw->qplib_mw);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Allocate MW failed!");
+ goto fail;
+ }
+ mw->ib_mw.rkey = mw->qplib_mw.rkey;
+
+ active_mws = atomic_inc_return(&rdev->stats.res.mw_count);
+ if (active_mws > rdev->stats.res.mw_watermark)
+ rdev->stats.res.mw_watermark = active_mws;
+ return &mw->ib_mw;
+
+fail:
+ kfree(mw);
+ return ERR_PTR(rc);
+}
+
+int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
+{
+ struct bnxt_re_mw *mw = container_of(ib_mw, struct bnxt_re_mw, ib_mw);
+ struct bnxt_re_dev *rdev = mw->rdev;
+ int rc;
+
+ rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mw->qplib_mw);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Free MW failed: %#x\n", rc);
+ return rc;
+ }
+
+ kfree(mw);
+ atomic_dec(&rdev->stats.res.mw_count);
+ return rc;
+}
+
+static struct ib_mr *__bnxt_re_user_reg_mr(struct ib_pd *ib_pd, u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_umem *umem)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ unsigned long page_size;
+ struct bnxt_re_mr *mr;
+ int umem_pgs, rc;
+ u32 active_mrs;
+
+ if (length > BNXT_RE_MAX_MR_SIZE) {
+ ibdev_err(&rdev->ibdev, "MR Size: %lld > Max supported:%lld\n",
+ length, BNXT_RE_MAX_MR_SIZE);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ page_size = ib_umem_find_best_pgsz(umem, BNXT_RE_PAGE_SIZE_SUPPORTED, virt_addr);
+ if (!page_size) {
+ ibdev_err(&rdev->ibdev, "umem page size unsupported!");
+ return ERR_PTR(-EINVAL);
+ }
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->rdev = rdev;
+ mr->qplib_mr.pd = &pd->qplib_pd;
+ mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags);
+ mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR;
+
+ if (!_is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags)) {
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to allocate MR rc = %d", rc);
+ rc = -EIO;
+ goto free_mr;
+ }
+ /* The fixed portion of the rkey is the same as the lkey */
+ mr->ib_mr.rkey = mr->qplib_mr.rkey;
+ } else {
+ mr->qplib_mr.flags = CMDQ_REGISTER_MR_FLAGS_ALLOC_MR;
+ }
+ mr->ib_umem = umem;
+ mr->qplib_mr.va = virt_addr;
+ mr->qplib_mr.total_size = length;
+
+ if (mr_access_flags & IB_ACCESS_RELAXED_ORDERING)
+ bnxt_re_check_and_set_relaxed_ordering(rdev, &mr->qplib_mr);
+
+ umem_pgs = ib_umem_num_dma_blocks(umem, page_size);
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, umem,
+ umem_pgs, page_size,
+ _is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags));
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to register user MR - rc = %d\n", rc);
+ rc = -EIO;
+ goto free_mrw;
+ }
+
+ mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ mr->ib_mr.rkey = mr->qplib_mr.lkey;
+ active_mrs = atomic_inc_return(&rdev->stats.res.mr_count);
+ if (active_mrs > rdev->stats.res.mr_watermark)
+ rdev->stats.res.mr_watermark = active_mrs;
+
+ return &mr->ib_mr;
+
+free_mrw:
+ bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+free_mr:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct ib_umem *umem;
+ struct ib_mr *ib_mr;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ umem = ib_umem_get(&rdev->ibdev, start, length, mr_access_flags);
+ if (IS_ERR(umem))
+ return ERR_CAST(umem);
+
+ ib_mr = __bnxt_re_user_reg_mr(ib_pd, length, virt_addr, mr_access_flags, umem);
+ if (IS_ERR(ib_mr))
+ ib_umem_release(umem);
+ return ib_mr;
+}
+
+struct ib_mr *bnxt_re_reg_user_mr_dmabuf(struct ib_pd *ib_pd, u64 start,
+ u64 length, u64 virt_addr, int fd,
+ int mr_access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct ib_umem *umem;
+ struct ib_mr *ib_mr;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(&rdev->ibdev, start, length,
+ fd, mr_access_flags);
+ if (IS_ERR(umem_dmabuf))
+ return ERR_CAST(umem_dmabuf);
+
+ umem = &umem_dmabuf->umem;
+
+ ib_mr = __bnxt_re_user_reg_mr(ib_pd, length, virt_addr, mr_access_flags, umem);
+ if (IS_ERR(ib_mr))
+ ib_umem_release(umem);
+ return ib_mr;
+}
+
+int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
+{
+ struct ib_device *ibdev = ctx->device;
+ struct bnxt_re_ucontext *uctx =
+ container_of(ctx, struct bnxt_re_ucontext, ib_uctx);
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr;
+ struct bnxt_re_user_mmap_entry *entry;
+ struct bnxt_re_uctx_resp resp = {};
+ struct bnxt_re_uctx_req ureq = {};
+ u32 chip_met_rev_num = 0;
+ int rc;
+
+ ibdev_dbg(ibdev, "ABI version requested %u", ibdev->ops.uverbs_abi_ver);
+
+ if (ibdev->ops.uverbs_abi_ver != BNXT_RE_ABI_VERSION) {
+ ibdev_dbg(ibdev, " is different from the device %d ",
+ BNXT_RE_ABI_VERSION);
+ return -EPERM;
+ }
+
+ uctx->rdev = rdev;
+
+ uctx->shpg = (void *)__get_free_page(GFP_KERNEL);
+ if (!uctx->shpg) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ spin_lock_init(&uctx->sh_lock);
+
+ resp.comp_mask = BNXT_RE_UCNTX_CMASK_HAVE_CCTX;
+ chip_met_rev_num = rdev->chip_ctx->chip_num;
+ chip_met_rev_num |= ((u32)rdev->chip_ctx->chip_rev & 0xFF) <<
+ BNXT_RE_CHIP_ID0_CHIP_REV_SFT;
+ chip_met_rev_num |= ((u32)rdev->chip_ctx->chip_metal & 0xFF) <<
+ BNXT_RE_CHIP_ID0_CHIP_MET_SFT;
+ resp.chip_id0 = chip_met_rev_num;
+ /*Temp, Use xa_alloc instead */
+ resp.dev_id = rdev->en_dev->pdev->devfn;
+ resp.max_qp = rdev->qplib_ctx.qpc_count;
+ resp.pg_size = PAGE_SIZE;
+ resp.cqe_sz = sizeof(struct cq_base);
+ resp.max_cqd = dev_attr->max_cq_wqes;
+
+ if (rdev->chip_ctx->modes.db_push)
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED;
+
+ entry = bnxt_re_mmap_entry_insert(uctx, 0, BNXT_RE_MMAP_SH_PAGE, NULL);
+ if (!entry) {
+ rc = -ENOMEM;
+ goto cfail;
+ }
+ uctx->shpage_mmap = &entry->rdma_entry;
+ if (rdev->pacing.dbr_pacing)
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED;
+
+ if (_is_host_msn_table(rdev->qplib_res.dattr->dev_cap_flags2))
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_MSN_TABLE_ENABLED;
+
+ if (udata->inlen >= sizeof(ureq)) {
+ rc = ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq)));
+ if (rc)
+ goto cfail;
+ if (ureq.comp_mask & BNXT_RE_COMP_MASK_REQ_UCNTX_POW2_SUPPORT) {
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_POW2_DISABLED;
+ uctx->cmask |= BNXT_RE_UCNTX_CAP_POW2_DISABLED;
+ }
+ if (ureq.comp_mask & BNXT_RE_COMP_MASK_REQ_UCNTX_VAR_WQE_SUPPORT) {
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_HAVE_MODE;
+ resp.mode = rdev->chip_ctx->modes.wqe_mode;
+ if (resp.mode == BNXT_QPLIB_WQE_MODE_VARIABLE)
+ uctx->cmask |= BNXT_RE_UCNTX_CAP_VAR_WQE_ENABLED;
+ }
+ }
+
+ rc = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
+ if (rc) {
+ ibdev_err(ibdev, "Failed to copy user context");
+ rc = -EFAULT;
+ goto cfail;
+ }
+
+ return 0;
+cfail:
+ free_page((unsigned long)uctx->shpg);
+ uctx->shpg = NULL;
+fail:
+ return rc;
+}
+
+void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
+{
+ struct bnxt_re_ucontext *uctx = container_of(ib_uctx,
+ struct bnxt_re_ucontext,
+ ib_uctx);
+
+ struct bnxt_re_dev *rdev = uctx->rdev;
+
+ rdma_user_mmap_entry_remove(uctx->shpage_mmap);
+ uctx->shpage_mmap = NULL;
+ if (uctx->shpg)
+ free_page((unsigned long)uctx->shpg);
+
+ if (uctx->dpi.dbr) {
+ /* Free DPI only if this is the first PD allocated by the
+ * application and mark the context dpi as NULL
+ */
+ bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &uctx->dpi);
+ uctx->dpi.dbr = NULL;
+ }
+}
+
+static int bnxt_re_setup_vnic(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp)
+{
+ int rc;
+
+ rc = bnxt_re_hwrm_alloc_vnic(rdev);
+ if (rc)
+ return rc;
+
+ rc = bnxt_re_hwrm_cfg_vnic(rdev, qp->qplib_qp.id);
+ if (rc)
+ goto out_free_vnic;
+
+ return 0;
+out_free_vnic:
+ bnxt_re_hwrm_free_vnic(rdev);
+ return rc;
+}
+
+struct ib_flow *bnxt_re_create_flow(struct ib_qp *ib_qp,
+ struct ib_flow_attr *attr,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_re_dev *rdev = qp->rdev;
+ struct bnxt_re_flow *flow;
+ int rc;
+
+ if (attr->type != IB_FLOW_ATTR_SNIFFER ||
+ !rdev->rcfw.roce_mirror)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&rdev->qp_lock);
+ if (rdev->sniffer_flow_created) {
+ ibdev_err(&rdev->ibdev, "RoCE Mirroring is already Configured\n");
+ mutex_unlock(&rdev->qp_lock);
+ return ERR_PTR(-EBUSY);
+ }
+
+ flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+ if (!flow) {
+ mutex_unlock(&rdev->qp_lock);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ flow->rdev = rdev;
+
+ rc = bnxt_re_setup_vnic(rdev, qp);
+ if (rc)
+ goto out_free_flow;
+
+ rc = bnxt_qplib_create_flow(&rdev->qplib_res);
+ if (rc)
+ goto out_free_vnic;
+
+ rdev->sniffer_flow_created = 1;
+ mutex_unlock(&rdev->qp_lock);
+
+ return &flow->ib_flow;
+
+out_free_vnic:
+ bnxt_re_hwrm_free_vnic(rdev);
+out_free_flow:
+ mutex_unlock(&rdev->qp_lock);
+ kfree(flow);
+ return ERR_PTR(rc);
+}
+
+int bnxt_re_destroy_flow(struct ib_flow *flow_id)
+{
+ struct bnxt_re_flow *flow =
+ container_of(flow_id, struct bnxt_re_flow, ib_flow);
+ struct bnxt_re_dev *rdev = flow->rdev;
+ int rc;
+
+ mutex_lock(&rdev->qp_lock);
+ rc = bnxt_qplib_destroy_flow(&rdev->qplib_res);
+ if (rc)
+ ibdev_dbg(&rdev->ibdev, "failed to destroy_flow rc = %d\n", rc);
+ rdev->sniffer_flow_created = 0;
+
+ bnxt_re_hwrm_free_vnic(rdev);
+ mutex_unlock(&rdev->qp_lock);
+ kfree(flow);
+
+ return rc;
+}
+
+static struct bnxt_re_cq *bnxt_re_search_for_cq(struct bnxt_re_dev *rdev, u32 cq_id)
+{
+ struct bnxt_re_cq *cq = NULL, *tmp_cq;
+
+ hash_for_each_possible(rdev->cq_hash, tmp_cq, hash_entry, cq_id) {
+ if (tmp_cq->qplib_cq.id == cq_id) {
+ cq = tmp_cq;
+ break;
+ }
+ }
+ return cq;
+}
+
+static struct bnxt_re_srq *bnxt_re_search_for_srq(struct bnxt_re_dev *rdev, u32 srq_id)
+{
+ struct bnxt_re_srq *srq = NULL, *tmp_srq;
+
+ hash_for_each_possible(rdev->srq_hash, tmp_srq, hash_entry, srq_id) {
+ if (tmp_srq->qplib_srq.id == srq_id) {
+ srq = tmp_srq;
+ break;
+ }
+ }
+ return srq;
+}
+
+/* Helper function to mmap the virtual memory from user app */
+int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma)
+{
+ struct bnxt_re_ucontext *uctx = container_of(ib_uctx,
+ struct bnxt_re_ucontext,
+ ib_uctx);
+ struct bnxt_re_user_mmap_entry *bnxt_entry;
+ struct rdma_user_mmap_entry *rdma_entry;
+ int ret = 0;
+ u64 pfn;
+
+ rdma_entry = rdma_user_mmap_entry_get(&uctx->ib_uctx, vma);
+ if (!rdma_entry)
+ return -EINVAL;
+
+ bnxt_entry = container_of(rdma_entry, struct bnxt_re_user_mmap_entry,
+ rdma_entry);
+
+ switch (bnxt_entry->mmap_flag) {
+ case BNXT_RE_MMAP_WC_DB:
+ pfn = bnxt_entry->mem_offset >> PAGE_SHIFT;
+ ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE,
+ pgprot_writecombine(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ case BNXT_RE_MMAP_UC_DB:
+ pfn = bnxt_entry->mem_offset >> PAGE_SHIFT;
+ ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ case BNXT_RE_MMAP_SH_PAGE:
+ ret = vm_insert_page(vma, vma->vm_start, virt_to_page(uctx->shpg));
+ break;
+ case BNXT_RE_MMAP_DBR_BAR:
+ pfn = bnxt_entry->mem_offset >> PAGE_SHIFT;
+ ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ case BNXT_RE_MMAP_DBR_PAGE:
+ case BNXT_RE_MMAP_TOGGLE_PAGE:
+ /* Driver doesn't expect write access for user space */
+ if (vma->vm_flags & VM_WRITE)
+ ret = -EFAULT;
+ else
+ ret = vm_insert_page(vma, vma->vm_start,
+ virt_to_page((void *)bnxt_entry->mem_offset));
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ rdma_user_mmap_entry_put(rdma_entry);
+ return ret;
+}
+
+void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct bnxt_re_user_mmap_entry *bnxt_entry;
+
+ bnxt_entry = container_of(rdma_entry, struct bnxt_re_user_mmap_entry,
+ rdma_entry);
+
+ kfree(bnxt_entry);
+}
+
+int bnxt_re_process_mad(struct ib_device *ibdev, int mad_flags,
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct ib_class_port_info cpi = {};
+ int ret = IB_MAD_RESULT_SUCCESS;
+ int rc = 0;
+
+ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
+ return ret;
+
+ switch (in_mad->mad_hdr.attr_id) {
+ case IB_PMA_CLASS_PORT_INFO:
+ cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+ memcpy((out_mad->data + 40), &cpi, sizeof(cpi));
+ break;
+ case IB_PMA_PORT_COUNTERS_EXT:
+ rc = bnxt_re_assign_pma_port_ext_counters(rdev, out_mad);
+ break;
+ case IB_PMA_PORT_COUNTERS:
+ rc = bnxt_re_assign_pma_port_counters(rdev, out_mad);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+ if (rc)
+ return IB_MAD_RESULT_FAILURE;
+ ret |= IB_MAD_RESULT_REPLY;
+ return ret;
+}
+
+static int UVERBS_HANDLER(BNXT_RE_METHOD_NOTIFY_DRV)(struct uverbs_attr_bundle *attrs)
+{
+ struct bnxt_re_ucontext *uctx;
+
+ uctx = container_of(ib_uverbs_get_ucontext(attrs), struct bnxt_re_ucontext, ib_uctx);
+ bnxt_re_pacing_alert(uctx->rdev);
+ return 0;
+}
+
+static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, BNXT_RE_ALLOC_PAGE_HANDLE);
+ enum bnxt_re_alloc_page_type alloc_type;
+ struct bnxt_re_user_mmap_entry *entry;
+ enum bnxt_re_mmap_flag mmap_flag;
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_re_ucontext *uctx;
+ struct bnxt_re_dev *rdev;
+ u64 mmap_offset;
+ u32 length;
+ u32 dpi;
+ u64 addr;
+ int err;
+
+ uctx = container_of(ib_uverbs_get_ucontext(attrs), struct bnxt_re_ucontext, ib_uctx);
+ if (IS_ERR(uctx))
+ return PTR_ERR(uctx);
+
+ err = uverbs_get_const(&alloc_type, attrs, BNXT_RE_ALLOC_PAGE_TYPE);
+ if (err)
+ return err;
+
+ rdev = uctx->rdev;
+ cctx = rdev->chip_ctx;
+
+ switch (alloc_type) {
+ case BNXT_RE_ALLOC_WC_PAGE:
+ if (cctx->modes.db_push) {
+ if (bnxt_qplib_alloc_dpi(&rdev->qplib_res, &uctx->wcdpi,
+ uctx, BNXT_QPLIB_DPI_TYPE_WC))
+ return -ENOMEM;
+ length = PAGE_SIZE;
+ dpi = uctx->wcdpi.dpi;
+ addr = (u64)uctx->wcdpi.umdbr;
+ mmap_flag = BNXT_RE_MMAP_WC_DB;
+ } else {
+ return -EINVAL;
+ }
+
+ break;
+ case BNXT_RE_ALLOC_DBR_BAR_PAGE:
+ length = PAGE_SIZE;
+ addr = (u64)rdev->pacing.dbr_bar_addr;
+ mmap_flag = BNXT_RE_MMAP_DBR_BAR;
+ break;
+
+ case BNXT_RE_ALLOC_DBR_PAGE:
+ length = PAGE_SIZE;
+ addr = (u64)rdev->pacing.dbr_page;
+ mmap_flag = BNXT_RE_MMAP_DBR_PAGE;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ entry = bnxt_re_mmap_entry_insert(uctx, addr, mmap_flag, &mmap_offset);
+ if (!entry)
+ return -ENOMEM;
+
+ uobj->object = entry;
+ uverbs_finalize_uobj_create(attrs, BNXT_RE_ALLOC_PAGE_HANDLE);
+ err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_MMAP_OFFSET,
+ &mmap_offset, sizeof(mmap_offset));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_MMAP_LENGTH,
+ &length, sizeof(length));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_DPI,
+ &dpi, sizeof(dpi));
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int alloc_page_obj_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct bnxt_re_user_mmap_entry *entry = uobject->object;
+ struct bnxt_re_ucontext *uctx = entry->uctx;
+
+ switch (entry->mmap_flag) {
+ case BNXT_RE_MMAP_WC_DB:
+ if (uctx && uctx->wcdpi.dbr) {
+ struct bnxt_re_dev *rdev = uctx->rdev;
+
+ bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &uctx->wcdpi);
+ uctx->wcdpi.dbr = NULL;
+ }
+ break;
+ case BNXT_RE_MMAP_DBR_BAR:
+ case BNXT_RE_MMAP_DBR_PAGE:
+ break;
+ default:
+ goto exit;
+ }
+ rdma_user_mmap_entry_remove(&entry->rdma_entry);
+exit:
+ return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_ALLOC_PAGE,
+ UVERBS_ATTR_IDR(BNXT_RE_ALLOC_PAGE_HANDLE,
+ BNXT_RE_OBJECT_ALLOC_PAGE,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(BNXT_RE_ALLOC_PAGE_TYPE,
+ enum bnxt_re_alloc_page_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_ALLOC_PAGE_MMAP_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_ALLOC_PAGE_MMAP_LENGTH,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_ALLOC_PAGE_DPI,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(BNXT_RE_METHOD_DESTROY_PAGE,
+ UVERBS_ATTR_IDR(BNXT_RE_DESTROY_PAGE_HANDLE,
+ BNXT_RE_OBJECT_ALLOC_PAGE,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(BNXT_RE_OBJECT_ALLOC_PAGE,
+ UVERBS_TYPE_ALLOC_IDR(alloc_page_obj_cleanup),
+ &UVERBS_METHOD(BNXT_RE_METHOD_ALLOC_PAGE),
+ &UVERBS_METHOD(BNXT_RE_METHOD_DESTROY_PAGE));
+
+DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_NOTIFY_DRV);
+
+DECLARE_UVERBS_GLOBAL_METHODS(BNXT_RE_OBJECT_NOTIFY_DRV,
+ &UVERBS_METHOD(BNXT_RE_METHOD_NOTIFY_DRV));
+
+/* Toggle MEM */
+static int UVERBS_HANDLER(BNXT_RE_METHOD_GET_TOGGLE_MEM)(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, BNXT_RE_TOGGLE_MEM_HANDLE);
+ enum bnxt_re_mmap_flag mmap_flag = BNXT_RE_MMAP_TOGGLE_PAGE;
+ enum bnxt_re_get_toggle_mem_type res_type;
+ struct bnxt_re_user_mmap_entry *entry;
+ struct bnxt_re_ucontext *uctx;
+ struct ib_ucontext *ib_uctx;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_srq *srq;
+ u32 length = PAGE_SIZE;
+ struct bnxt_re_cq *cq;
+ u64 mem_offset;
+ u32 offset = 0;
+ u64 addr = 0;
+ u32 res_id;
+ int err;
+
+ ib_uctx = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ib_uctx))
+ return PTR_ERR(ib_uctx);
+
+ err = uverbs_get_const(&res_type, attrs, BNXT_RE_TOGGLE_MEM_TYPE);
+ if (err)
+ return err;
+
+ uctx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx);
+ rdev = uctx->rdev;
+ err = uverbs_copy_from(&res_id, attrs, BNXT_RE_TOGGLE_MEM_RES_ID);
+ if (err)
+ return err;
+
+ switch (res_type) {
+ case BNXT_RE_CQ_TOGGLE_MEM:
+ cq = bnxt_re_search_for_cq(rdev, res_id);
+ if (!cq)
+ return -EINVAL;
+
+ addr = (u64)cq->uctx_cq_page;
+ break;
+ case BNXT_RE_SRQ_TOGGLE_MEM:
+ srq = bnxt_re_search_for_srq(rdev, res_id);
+ if (!srq)
+ return -EINVAL;
+
+ addr = (u64)srq->uctx_srq_page;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ entry = bnxt_re_mmap_entry_insert(uctx, addr, mmap_flag, &mem_offset);
+ if (!entry)
+ return -ENOMEM;
+
+ uobj->object = entry;
+ uverbs_finalize_uobj_create(attrs, BNXT_RE_TOGGLE_MEM_HANDLE);
+ err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_PAGE,
+ &mem_offset, sizeof(mem_offset));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_LENGTH,
+ &length, sizeof(length));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_OFFSET,
+ &offset, sizeof(offset));
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int get_toggle_mem_obj_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct bnxt_re_user_mmap_entry *entry = uobject->object;
+
+ rdma_user_mmap_entry_remove(&entry->rdma_entry);
+ return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_GET_TOGGLE_MEM,
+ UVERBS_ATTR_IDR(BNXT_RE_TOGGLE_MEM_HANDLE,
+ BNXT_RE_OBJECT_GET_TOGGLE_MEM,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(BNXT_RE_TOGGLE_MEM_TYPE,
+ enum bnxt_re_get_toggle_mem_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(BNXT_RE_TOGGLE_MEM_RES_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_TOGGLE_MEM_MMAP_PAGE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_TOGGLE_MEM_MMAP_OFFSET,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_TOGGLE_MEM_MMAP_LENGTH,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(BNXT_RE_METHOD_RELEASE_TOGGLE_MEM,
+ UVERBS_ATTR_IDR(BNXT_RE_RELEASE_TOGGLE_MEM_HANDLE,
+ BNXT_RE_OBJECT_GET_TOGGLE_MEM,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(BNXT_RE_OBJECT_GET_TOGGLE_MEM,
+ UVERBS_TYPE_ALLOC_IDR(get_toggle_mem_obj_cleanup),
+ &UVERBS_METHOD(BNXT_RE_METHOD_GET_TOGGLE_MEM),
+ &UVERBS_METHOD(BNXT_RE_METHOD_RELEASE_TOGGLE_MEM));
+
+const struct uapi_definition bnxt_re_uapi_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_ALLOC_PAGE),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_NOTIFY_DRV),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_GET_TOGGLE_MEM),
+ {}
+};
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
new file mode 100644
index 000000000000..76ba9ab04d5c
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -0,0 +1,296 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: IB Verbs interpreter (header)
+ */
+
+#ifndef __BNXT_RE_IB_VERBS_H__
+#define __BNXT_RE_IB_VERBS_H__
+
+struct bnxt_re_gid_ctx {
+ u32 idx;
+ u32 refcnt;
+};
+
+#define BNXT_RE_FENCE_BYTES 64
+struct bnxt_re_fence_data {
+ u32 size;
+ u8 va[BNXT_RE_FENCE_BYTES];
+ dma_addr_t dma_addr;
+ struct bnxt_re_mr *mr;
+ struct ib_mw *mw;
+ struct bnxt_qplib_swqe bind_wqe;
+ u32 bind_rkey;
+};
+
+struct bnxt_re_pd {
+ struct ib_pd ib_pd;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_pd qplib_pd;
+ struct bnxt_re_fence_data fence;
+ struct rdma_user_mmap_entry *pd_db_mmap;
+ struct rdma_user_mmap_entry *pd_wcdb_mmap;
+};
+
+struct bnxt_re_ah {
+ struct ib_ah ib_ah;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_ah qplib_ah;
+};
+
+struct bnxt_re_srq {
+ struct ib_srq ib_srq;
+ struct bnxt_re_dev *rdev;
+ u32 srq_limit;
+ struct bnxt_qplib_srq qplib_srq;
+ struct ib_umem *umem;
+ spinlock_t lock; /* protect srq */
+ void *uctx_srq_page;
+ struct hlist_node hash_entry;
+};
+
+struct bnxt_re_qp {
+ struct ib_qp ib_qp;
+ struct list_head list;
+ struct bnxt_re_dev *rdev;
+ spinlock_t sq_lock; /* protect sq */
+ spinlock_t rq_lock; /* protect rq */
+ struct bnxt_qplib_qp qplib_qp;
+ struct ib_umem *sumem;
+ struct ib_umem *rumem;
+ /* QP1 */
+ u32 send_psn;
+ struct ib_ud_header qp1_hdr;
+ struct bnxt_re_cq *scq;
+ struct bnxt_re_cq *rcq;
+ struct dentry *dentry;
+};
+
+struct bnxt_re_cq {
+ struct ib_cq ib_cq;
+ struct bnxt_re_dev *rdev;
+ spinlock_t cq_lock; /* protect cq */
+ u16 cq_count;
+ u16 cq_period;
+ struct bnxt_qplib_cq qplib_cq;
+ struct bnxt_qplib_cqe *cql;
+#define MAX_CQL_PER_POLL 1024
+ u32 max_cql;
+ struct ib_umem *umem;
+ struct ib_umem *resize_umem;
+ int resize_cqe;
+ void *uctx_cq_page;
+ struct hlist_node hash_entry;
+};
+
+struct bnxt_re_mr {
+ struct bnxt_re_dev *rdev;
+ struct ib_mr ib_mr;
+ struct ib_umem *ib_umem;
+ struct bnxt_qplib_mrw qplib_mr;
+ u32 npages;
+ u64 *pages;
+ struct bnxt_qplib_frpl qplib_frpl;
+};
+
+struct bnxt_re_frpl {
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_frpl qplib_frpl;
+ u64 *page_list;
+};
+
+struct bnxt_re_mw {
+ struct bnxt_re_dev *rdev;
+ struct ib_mw ib_mw;
+ struct bnxt_qplib_mrw qplib_mw;
+};
+
+struct bnxt_re_ucontext {
+ struct ib_ucontext ib_uctx;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_dpi dpi;
+ struct bnxt_qplib_dpi wcdpi;
+ void *shpg;
+ spinlock_t sh_lock; /* protect shpg */
+ struct rdma_user_mmap_entry *shpage_mmap;
+ u64 cmask;
+};
+
+enum bnxt_re_mmap_flag {
+ BNXT_RE_MMAP_SH_PAGE,
+ BNXT_RE_MMAP_UC_DB,
+ BNXT_RE_MMAP_WC_DB,
+ BNXT_RE_MMAP_DBR_PAGE,
+ BNXT_RE_MMAP_DBR_BAR,
+ BNXT_RE_MMAP_TOGGLE_PAGE,
+};
+
+struct bnxt_re_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ struct bnxt_re_ucontext *uctx;
+ u64 mem_offset;
+ u8 mmap_flag;
+};
+
+struct bnxt_re_flow {
+ struct ib_flow ib_flow;
+ struct bnxt_re_dev *rdev;
+};
+
+static inline u16 bnxt_re_get_swqe_size(int nsge)
+{
+ return sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge);
+}
+
+static inline u16 bnxt_re_get_rwqe_size(int nsge)
+{
+ return sizeof(struct rq_wqe_hdr) + (nsge * sizeof(struct sq_sge));
+}
+
+enum {
+ BNXT_RE_UCNTX_CAP_POW2_DISABLED = 0x1ULL,
+ BNXT_RE_UCNTX_CAP_VAR_WQE_ENABLED = 0x2ULL,
+};
+
+static inline u32 bnxt_re_init_depth(u32 ent, struct bnxt_re_ucontext *uctx)
+{
+ return uctx ? (uctx->cmask & BNXT_RE_UCNTX_CAP_POW2_DISABLED) ?
+ ent : roundup_pow_of_two(ent) : ent;
+}
+
+static inline bool bnxt_re_is_var_size_supported(struct bnxt_re_dev *rdev,
+ struct bnxt_re_ucontext *uctx)
+{
+ if (uctx)
+ return uctx->cmask & BNXT_RE_UCNTX_CAP_VAR_WQE_ENABLED;
+ else
+ return rdev->chip_ctx->modes.wqe_mode;
+}
+
+int bnxt_re_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *ib_attr,
+ struct ib_udata *udata);
+int bnxt_re_modify_device(struct ib_device *ibdev,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify);
+int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attr *port_attr);
+int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable);
+void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str);
+int bnxt_re_query_pkey(struct ib_device *ibdev, u32 port_num,
+ u16 index, u16 *pkey);
+int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context);
+int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context);
+int bnxt_re_query_gid(struct ib_device *ibdev, u32 port_num,
+ int index, union ib_gid *gid);
+enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
+ u32 port_num);
+int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags);
+int bnxt_re_create_srq(struct ib_srq *srq,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata);
+int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+int bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr);
+int bnxt_re_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
+int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_udata *udata);
+int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+int bnxt_re_destroy_qp(struct ib_qp *qp, struct ib_udata *udata);
+int bnxt_re_post_send(struct ib_qp *qp, const struct ib_send_wr *send_wr,
+ const struct ib_send_wr **bad_send_wr);
+int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr);
+int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
+int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
+int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
+int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
+
+int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
+struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
+int bnxt_re_dereg_mr(struct ib_mr *mr, struct ib_udata *udata);
+struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
+ struct ib_udata *udata);
+int bnxt_re_dealloc_mw(struct ib_mw *mw);
+struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_dmah *dmah,
+ struct ib_udata *udata);
+struct ib_mr *bnxt_re_reg_user_mr_dmabuf(struct ib_pd *ib_pd, u64 start,
+ u64 length, u64 virt_addr,
+ int fd, int mr_access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs);
+int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata);
+void bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
+struct ib_flow *bnxt_re_create_flow(struct ib_qp *ib_qp,
+ struct ib_flow_attr *attr,
+ struct ib_udata *udata);
+int bnxt_re_destroy_flow(struct ib_flow *flow_id);
+
+int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
+
+int bnxt_re_process_mad(struct ib_device *device, int process_mad_flags,
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
+
+static inline u32 __to_ib_port_num(u16 port_id)
+{
+ return (u32)port_id + 1;
+}
+
+unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp);
+void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, unsigned long flags);
+#endif /* __BNXT_RE_IB_VERBS_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
new file mode 100644
index 000000000000..73003ad25ee8
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -0,0 +1,2614 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Main component of the bnxt_re driver
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <net/dcbnl.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <linux/if_ether.h>
+#include <linux/auxiliary_bus.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_addr.h>
+#include <linux/hashtable.h>
+
+#include "bnxt_ulp.h"
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_sp.h"
+#include "qplib_fp.h"
+#include "qplib_rcfw.h"
+#include "bnxt_re.h"
+#include "ib_verbs.h"
+#include <rdma/bnxt_re-abi.h>
+#include "bnxt.h"
+#include "hw_counters.h"
+#include "debugfs.h"
+
+static char version[] =
+ BNXT_RE_DESC "\n";
+
+MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
+MODULE_DESCRIPTION(BNXT_RE_DESC);
+MODULE_LICENSE("Dual BSD/GPL");
+
+/* globals */
+static DEFINE_MUTEX(bnxt_re_mutex);
+
+static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev);
+static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev);
+
+static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
+ u32 *offset);
+static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
+ u8 port_num, enum ib_event_type event);
+static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_en_dev *en_dev;
+ struct bnxt_qplib_res *res;
+ u32 l2db_len = 0;
+ u32 offset = 0;
+ u32 barlen;
+ int rc;
+
+ res = &rdev->qplib_res;
+ en_dev = rdev->en_dev;
+ cctx = rdev->chip_ctx;
+
+ /* Issue qcfg */
+ rc = bnxt_re_hwrm_qcfg(rdev, &l2db_len, &offset);
+ if (rc)
+ dev_info(rdev_to_dev(rdev),
+ "Couldn't get DB bar size, Low latency framework is disabled\n");
+ /* set register offsets for both UC and WC */
+ if (bnxt_qplib_is_chip_gen_p7(cctx)) {
+ res->dpi_tbl.ucreg.offset = offset;
+ res->dpi_tbl.wcreg.offset = en_dev->l2_db_size;
+ } else {
+ res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET :
+ BNXT_QPLIB_DBR_PF_DB_OFFSET;
+ res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset;
+ }
+
+ /* If WC mapping is disabled by L2 driver then en_dev->l2_db_size
+ * is equal to the DB-Bar actual size. This indicates that L2
+ * is mapping entire bar as UC-. RoCE driver can't enable WC mapping
+ * in such cases and DB-push will be disabled.
+ */
+ barlen = pci_resource_len(res->pdev, RCFW_DBR_PCI_BAR_REGION);
+ if (cctx->modes.db_push && l2db_len && en_dev->l2_db_size != barlen) {
+ res->dpi_tbl.wcreg.offset = en_dev->l2_db_size;
+ dev_info(rdev_to_dev(rdev), "Low latency framework is enabled\n");
+ }
+}
+
+static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_chip_ctx *cctx;
+
+ cctx = rdev->chip_ctx;
+ cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ?
+ BNXT_QPLIB_WQE_MODE_VARIABLE : BNXT_QPLIB_WQE_MODE_STATIC;
+ if (bnxt_re_hwrm_qcaps(rdev))
+ dev_err(rdev_to_dev(rdev),
+ "Failed to query hwrm qcaps\n");
+ if (bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx)) {
+ cctx->modes.toggle_bits |= BNXT_QPLIB_CQ_TOGGLE_BIT;
+ cctx->modes.toggle_bits |= BNXT_QPLIB_SRQ_TOGGLE_BIT;
+ }
+}
+
+static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_chip_ctx *chip_ctx;
+
+ if (!rdev->chip_ctx)
+ return;
+
+ kfree(rdev->dev_attr);
+ rdev->dev_attr = NULL;
+
+ chip_ctx = rdev->chip_ctx;
+ rdev->chip_ctx = NULL;
+ rdev->rcfw.res = NULL;
+ rdev->qplib_res.cctx = NULL;
+ rdev->qplib_res.pdev = NULL;
+ rdev->qplib_res.netdev = NULL;
+ kfree(chip_ctx);
+}
+
+static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_chip_ctx *chip_ctx;
+ struct bnxt_en_dev *en_dev;
+ int rc = -ENOMEM;
+
+ en_dev = rdev->en_dev;
+
+ rdev->qplib_res.pdev = en_dev->pdev;
+ chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL);
+ if (!chip_ctx)
+ return -ENOMEM;
+ chip_ctx->chip_num = en_dev->chip_num;
+ chip_ctx->hw_stats_size = en_dev->hw_ring_stats_size;
+
+ rdev->chip_ctx = chip_ctx;
+ /* rest members to follow eventually */
+
+ rdev->qplib_res.cctx = rdev->chip_ctx;
+ rdev->rcfw.res = &rdev->qplib_res;
+ rdev->dev_attr = kzalloc(sizeof(*rdev->dev_attr), GFP_KERNEL);
+ if (!rdev->dev_attr)
+ goto free_chip_ctx;
+ rdev->qplib_res.dattr = rdev->dev_attr;
+ rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev);
+ rdev->qplib_res.en_dev = en_dev;
+
+ rc = bnxt_re_query_hwrm_intf_version(rdev);
+ if (rc)
+ goto free_dev_attr;
+
+ bnxt_re_set_drv_mode(rdev);
+
+ bnxt_re_set_db_offset(rdev);
+ rc = bnxt_qplib_map_db_bar(&rdev->qplib_res);
+ if (rc)
+ goto free_dev_attr;
+
+ if (bnxt_qplib_determine_atomics(en_dev->pdev))
+ ibdev_info(&rdev->ibdev,
+ "platform doesn't support global atomics.");
+ return 0;
+free_dev_attr:
+ kfree(rdev->dev_attr);
+ rdev->dev_attr = NULL;
+free_chip_ctx:
+ kfree(rdev->chip_ctx);
+ rdev->chip_ctx = NULL;
+ return rc;
+}
+
+/* SR-IOV helper functions */
+
+static void bnxt_re_get_sriov_func_type(struct bnxt_re_dev *rdev)
+{
+ if (BNXT_EN_VF(rdev->en_dev))
+ rdev->is_virtfn = 1;
+}
+
+/* Set the maximum number of each resource that the driver actually wants
+ * to allocate. This may be up to the maximum number the firmware has
+ * reserved for the function. The driver may choose to allocate fewer
+ * resources than the firmware maximum.
+ */
+static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_dev_attr *attr;
+ struct bnxt_qplib_ctx *ctx;
+ int i;
+
+ attr = rdev->dev_attr;
+ ctx = &rdev->qplib_ctx;
+
+ ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT,
+ attr->max_qp);
+ ctx->mrw_count = BNXT_RE_MAX_MRW_COUNT_256K;
+ /* Use max_mr from fw since max_mrw does not get set */
+ ctx->mrw_count = min_t(u32, ctx->mrw_count, attr->max_mr);
+ ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
+ attr->max_srq);
+ ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq);
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
+ rdev->qplib_ctx.tqm_ctx.qcount[i] =
+ rdev->dev_attr->tqm_alloc_reqs[i];
+}
+
+static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf)
+{
+ struct bnxt_qplib_vf_res *vf_res;
+ u32 mrws = 0;
+ u32 vf_pct;
+ u32 nvfs;
+
+ vf_res = &qplib_ctx->vf_res;
+ /*
+ * Reserve a set of resources for the PF. Divide the remaining
+ * resources among the VFs
+ */
+ vf_pct = 100 - BNXT_RE_PCT_RSVD_FOR_PF;
+ nvfs = num_vf;
+ num_vf = 100 * num_vf;
+ vf_res->max_qp_per_vf = (qplib_ctx->qpc_count * vf_pct) / num_vf;
+ vf_res->max_srq_per_vf = (qplib_ctx->srqc_count * vf_pct) / num_vf;
+ vf_res->max_cq_per_vf = (qplib_ctx->cq_count * vf_pct) / num_vf;
+ /*
+ * The driver allows many more MRs than other resources. If the
+ * firmware does also, then reserve a fixed amount for the PF and
+ * divide the rest among VFs. VFs may use many MRs for NFS
+ * mounts, ISER, NVME applications, etc. If the firmware severely
+ * restricts the number of MRs, then let PF have half and divide
+ * the rest among VFs, as for the other resource types.
+ */
+ if (qplib_ctx->mrw_count < BNXT_RE_MAX_MRW_COUNT_64K) {
+ mrws = qplib_ctx->mrw_count * vf_pct;
+ nvfs = num_vf;
+ } else {
+ mrws = qplib_ctx->mrw_count - BNXT_RE_RESVD_MR_FOR_PF;
+ }
+ vf_res->max_mrw_per_vf = (mrws / nvfs);
+ vf_res->max_gid_per_vf = BNXT_RE_MAX_GID_PER_VF;
+}
+
+static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
+{
+ u32 num_vfs;
+
+ memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res));
+ bnxt_re_limit_pf_res(rdev);
+
+ num_vfs = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ?
+ BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs;
+ if (num_vfs)
+ bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs);
+}
+
+static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev)
+{
+ /*
+ * Use the total VF count since the actual VF count may not be
+ * available at this point.
+ */
+ rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev);
+ if (!rdev->num_vfs)
+ return;
+
+ bnxt_re_set_resource_limits(rdev);
+ bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
+ &rdev->qplib_ctx);
+}
+
+struct bnxt_re_dcb_work {
+ struct work_struct work;
+ struct bnxt_re_dev *rdev;
+ struct hwrm_async_event_cmpl cmpl;
+};
+
+static bool bnxt_re_is_qp1_qp(struct bnxt_re_qp *qp)
+{
+ return qp->ib_qp.qp_type == IB_QPT_GSI;
+}
+
+static struct bnxt_re_qp *bnxt_re_get_qp1_qp(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_re_qp *qp;
+
+ mutex_lock(&rdev->qp_lock);
+ list_for_each_entry(qp, &rdev->qp_list, list) {
+ if (bnxt_re_is_qp1_qp(qp)) {
+ mutex_unlock(&rdev->qp_lock);
+ return qp;
+ }
+ }
+ mutex_unlock(&rdev->qp_lock);
+ return NULL;
+}
+
+static int bnxt_re_update_qp1_tos_dscp(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_re_qp *qp;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return 0;
+
+ qp = bnxt_re_get_qp1_qp(rdev);
+ if (!qp)
+ return 0;
+
+ qp->qplib_qp.modify_flags = CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP;
+ qp->qplib_qp.tos_dscp = rdev->cc_param.qp1_tos_dscp;
+
+ return bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
+}
+
+static void bnxt_re_init_dcb_wq(struct bnxt_re_dev *rdev)
+{
+ rdev->dcb_wq = create_singlethread_workqueue("bnxt_re_dcb_wq");
+}
+
+static void bnxt_re_uninit_dcb_wq(struct bnxt_re_dev *rdev)
+{
+ if (!rdev->dcb_wq)
+ return;
+ destroy_workqueue(rdev->dcb_wq);
+}
+
+static void bnxt_re_dcb_wq_task(struct work_struct *work)
+{
+ struct bnxt_re_dcb_work *dcb_work =
+ container_of(work, struct bnxt_re_dcb_work, work);
+ struct bnxt_re_dev *rdev = dcb_work->rdev;
+ struct bnxt_qplib_cc_param *cc_param;
+ int rc;
+
+ if (!rdev)
+ goto free_dcb;
+
+ cc_param = &rdev->cc_param;
+ rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, cc_param);
+ if (rc) {
+ ibdev_dbg(&rdev->ibdev, "Failed to query ccparam rc:%d", rc);
+ goto free_dcb;
+ }
+ if (cc_param->qp1_tos_dscp != cc_param->tos_dscp) {
+ cc_param->qp1_tos_dscp = cc_param->tos_dscp;
+ rc = bnxt_re_update_qp1_tos_dscp(rdev);
+ if (rc) {
+ ibdev_dbg(&rdev->ibdev, "%s: Failed to modify QP1 rc:%d",
+ __func__, rc);
+ goto free_dcb;
+ }
+ }
+
+free_dcb:
+ kfree(dcb_work);
+}
+
+static void bnxt_re_async_notifier(void *handle, struct hwrm_async_event_cmpl *cmpl)
+{
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
+ struct bnxt_re_dcb_work *dcb_work;
+ struct bnxt_re_dev *rdev;
+ u32 data1, data2;
+ u16 event_id;
+
+ rdev = en_info->rdev;
+ if (!rdev)
+ return;
+
+ event_id = le16_to_cpu(cmpl->event_id);
+ data1 = le32_to_cpu(cmpl->event_data1);
+ data2 = le32_to_cpu(cmpl->event_data2);
+
+ ibdev_dbg(&rdev->ibdev, "Async event_id = %d data1 = %d data2 = %d",
+ event_id, data1, data2);
+
+ switch (event_id) {
+ case ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE:
+ dcb_work = kzalloc(sizeof(*dcb_work), GFP_ATOMIC);
+ if (!dcb_work)
+ break;
+
+ dcb_work->rdev = rdev;
+ memcpy(&dcb_work->cmpl, cmpl, sizeof(*cmpl));
+ INIT_WORK(&dcb_work->work, bnxt_re_dcb_wq_task);
+ queue_work(rdev->dcb_wq, &dcb_work->work);
+ break;
+ default:
+ break;
+ }
+}
+
+static void bnxt_re_stop_irq(void *handle, bool reset)
+{
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
+ struct bnxt_qplib_rcfw *rcfw;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_nq *nq;
+ int indx;
+
+ rdev = en_info->rdev;
+ if (!rdev)
+ return;
+ rcfw = &rdev->rcfw;
+
+ if (reset) {
+ set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
+ set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
+ wake_up_all(&rdev->rcfw.cmdq.waitq);
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
+ IB_EVENT_DEVICE_FATAL);
+ }
+
+ for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) {
+ nq = &rdev->nqr->nq[indx - 1];
+ bnxt_qplib_nq_stop_irq(nq, false);
+ }
+
+ bnxt_qplib_rcfw_stop_irq(rcfw, false);
+}
+
+static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
+{
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
+ struct bnxt_msix_entry *msix_ent;
+ struct bnxt_qplib_rcfw *rcfw;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_nq *nq;
+ int indx, rc;
+
+ rdev = en_info->rdev;
+ if (!rdev)
+ return;
+ msix_ent = rdev->nqr->msix_entries;
+ rcfw = &rdev->rcfw;
+ if (!ent) {
+ /* Not setting the f/w timeout bit in rcfw.
+ * During the driver unload the first command
+ * to f/w will timeout and that will set the
+ * timeout bit.
+ */
+ ibdev_err(&rdev->ibdev, "Failed to re-start IRQs\n");
+ return;
+ }
+
+ /* Vectors may change after restart, so update with new vectors
+ * in device sctructure.
+ */
+ for (indx = 0; indx < rdev->nqr->num_msix; indx++)
+ rdev->nqr->msix_entries[indx].vector = ent[indx].vector;
+
+ rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector,
+ false);
+ if (rc) {
+ ibdev_warn(&rdev->ibdev, "Failed to reinit CREQ\n");
+ return;
+ }
+ for (indx = BNXT_RE_NQ_IDX ; indx < rdev->nqr->num_msix; indx++) {
+ nq = &rdev->nqr->nq[indx - 1];
+ rc = bnxt_qplib_nq_start_irq(nq, indx - 1,
+ msix_ent[indx].vector, false);
+ if (rc) {
+ ibdev_warn(&rdev->ibdev, "Failed to reinit NQ index %d\n",
+ indx - 1);
+ return;
+ }
+ }
+}
+
+static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
+ .ulp_async_notifier = bnxt_re_async_notifier,
+ .ulp_irq_stop = bnxt_re_stop_irq,
+ .ulp_irq_restart = bnxt_re_start_irq
+};
+
+/* RoCE -> Net driver */
+
+static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev;
+
+ en_dev = rdev->en_dev;
+ return bnxt_register_dev(en_dev, &bnxt_re_ulp_ops, rdev->adev);
+}
+
+static void bnxt_re_init_hwrm_hdr(struct input *hdr, u16 opcd)
+{
+ hdr->req_type = cpu_to_le16(opcd);
+ hdr->cmpl_ring = cpu_to_le16(-1);
+ hdr->target_id = cpu_to_le16(-1);
+}
+
+static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
+ int msg_len, void *resp, int resp_max_len,
+ int timeout)
+{
+ fw_msg->msg = msg;
+ fw_msg->msg_len = msg_len;
+ fw_msg->resp = resp;
+ fw_msg->resp_max_len = resp_max_len;
+ fw_msg->timeout = timeout;
+}
+
+void bnxt_re_hwrm_free_vnic(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_vnic_free_input req = {};
+ struct bnxt_fw_msg fw_msg = {};
+ int rc;
+
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VNIC_FREE);
+
+ req.vnic_id = cpu_to_le32(rdev->mirror_vnic_id);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), NULL,
+ 0, BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ ibdev_dbg(&rdev->ibdev,
+ "Failed to free vnic, rc = %d\n", rc);
+}
+
+int bnxt_re_hwrm_alloc_vnic(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_vnic_alloc_output resp = {};
+ struct hwrm_vnic_alloc_input req = {};
+ struct bnxt_fw_msg fw_msg = {};
+ int rc;
+
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VNIC_ALLOC);
+
+ req.vnic_id = cpu_to_le16(rdev->mirror_vnic_id);
+ req.flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_VNIC_ID_VALID);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ ibdev_dbg(&rdev->ibdev,
+ "Failed to alloc vnic, rc = %d\n", rc);
+
+ return rc;
+}
+
+int bnxt_re_hwrm_cfg_vnic(struct bnxt_re_dev *rdev, u32 qp_id)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_vnic_cfg_input req = {};
+ struct bnxt_fw_msg fw_msg = {};
+ int rc;
+
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VNIC_CFG);
+
+ req.flags = cpu_to_le32(VNIC_CFG_REQ_FLAGS_ROCE_ONLY_VNIC_MODE);
+ req.enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_RAW_QP_ID |
+ VNIC_CFG_REQ_ENABLES_MRU);
+ req.vnic_id = cpu_to_le16(rdev->mirror_vnic_id);
+ req.raw_qp_id = cpu_to_le32(qp_id);
+ req.mru = cpu_to_le16(rdev->netdev->mtu + VLAN_ETH_HLEN);
+
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), NULL,
+ 0, BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ ibdev_dbg(&rdev->ibdev,
+ "Failed to cfg vnic, rc = %d\n", rc);
+
+ return rc;
+}
+
+/* Query device config using common hwrm */
+static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
+ u32 *offset)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_func_qcfg_output resp = {0};
+ struct hwrm_func_qcfg_input req = {0};
+ struct bnxt_fw_msg fw_msg = {};
+ int rc;
+
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCFG);
+ req.fid = cpu_to_le16(0xffff);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (!rc) {
+ *db_len = PAGE_ALIGN(le16_to_cpu(resp.l2_doorbell_bar_size_kb) * 1024);
+ *offset = PAGE_ALIGN(le16_to_cpu(resp.legacy_l2_db_size_kb) * 1024);
+ rdev->mirror_vnic_id = le16_to_cpu(resp.mirror_vnic_id);
+ }
+ return rc;
+}
+
+/* Query function capabilities using common hwrm */
+int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_func_qcaps_output resp = {};
+ struct hwrm_func_qcaps_input req = {};
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_fw_msg fw_msg = {};
+ u32 flags_ext2;
+ int rc;
+
+ cctx = rdev->chip_ctx;
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCAPS);
+ req.fid = cpu_to_le16(0xffff);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ return rc;
+ cctx->modes.db_push = le32_to_cpu(resp.flags) & FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE;
+
+ flags_ext2 = le32_to_cpu(resp.flags_ext2);
+ cctx->modes.dbr_pacing = flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED ||
+ flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_V0_SUPPORTED;
+ cctx->modes.roce_mirror = !!(le32_to_cpu(resp.flags_ext3) &
+ FUNC_QCAPS_RESP_FLAGS_EXT3_MIRROR_ON_ROCE_SUPPORTED);
+ return 0;
+}
+
+static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
+ struct hwrm_func_dbr_pacing_qcfg_output resp = {};
+ struct hwrm_func_dbr_pacing_qcfg_input req = {};
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_fw_msg fw_msg = {};
+ int rc;
+
+ cctx = rdev->chip_ctx;
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_DBR_PACING_QCFG);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ return rc;
+
+ if ((le32_to_cpu(resp.dbr_stat_db_fifo_reg) &
+ FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK) ==
+ FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_GRC)
+ cctx->dbr_stat_db_fifo =
+ le32_to_cpu(resp.dbr_stat_db_fifo_reg) &
+ ~FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK;
+
+ pacing_data->fifo_max_depth = le32_to_cpu(resp.dbr_stat_db_max_fifo_depth);
+ if (!pacing_data->fifo_max_depth)
+ pacing_data->fifo_max_depth = BNXT_RE_MAX_FIFO_DEPTH(cctx);
+ pacing_data->fifo_room_mask = le32_to_cpu(resp.dbr_stat_db_fifo_reg_fifo_room_mask);
+ pacing_data->fifo_room_shift = resp.dbr_stat_db_fifo_reg_fifo_room_shift;
+
+ return 0;
+}
+
+/* Update the pacing tunable parameters to the default values */
+static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
+
+ pacing_data->do_pacing = rdev->pacing.dbr_def_do_pacing;
+ pacing_data->pacing_th = rdev->pacing.pacing_algo_th;
+ pacing_data->alarm_th =
+ pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE;
+}
+
+static u32 __get_fifo_occupancy(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
+ u32 read_val, fifo_occup;
+
+ read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off);
+ fifo_occup = pacing_data->fifo_max_depth -
+ ((read_val & pacing_data->fifo_room_mask) >>
+ pacing_data->fifo_room_shift);
+ return fifo_occup;
+}
+
+static bool is_dbr_fifo_full(struct bnxt_re_dev *rdev)
+{
+ u32 max_occup, fifo_occup;
+
+ fifo_occup = __get_fifo_occupancy(rdev);
+ max_occup = BNXT_RE_MAX_FIFO_DEPTH(rdev->chip_ctx) - 1;
+ if (fifo_occup == max_occup)
+ return true;
+
+ return false;
+}
+
+static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
+ u32 retry_fifo_check = 1000;
+ u32 fifo_occup;
+
+ /* loop shouldn't run infintely as the occupancy usually goes
+ * below pacing algo threshold as soon as pacing kicks in.
+ */
+ while (1) {
+ fifo_occup = __get_fifo_occupancy(rdev);
+ /* Fifo occupancy cannot be greater the MAX FIFO depth */
+ if (fifo_occup > pacing_data->fifo_max_depth)
+ break;
+
+ if (fifo_occup < pacing_data->pacing_th)
+ break;
+ if (!retry_fifo_check--) {
+ dev_info_once(rdev_to_dev(rdev),
+ "%s: fifo_occup = 0x%xfifo_max_depth = 0x%x pacing_th = 0x%x\n",
+ __func__, fifo_occup, pacing_data->fifo_max_depth,
+ pacing_data->pacing_th);
+ break;
+ }
+
+ }
+}
+
+static void bnxt_re_db_fifo_check(struct work_struct *work)
+{
+ struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
+ dbq_fifo_check_work);
+ struct bnxt_qplib_db_pacing_data *pacing_data;
+ u32 pacing_save;
+
+ if (!mutex_trylock(&rdev->pacing.dbq_lock))
+ return;
+ pacing_data = rdev->qplib_res.pacing_data;
+ pacing_save = rdev->pacing.do_pacing_save;
+ __wait_for_fifo_occupancy_below_th(rdev);
+ cancel_delayed_work_sync(&rdev->dbq_pacing_work);
+ if (pacing_save > rdev->pacing.dbr_def_do_pacing) {
+ /* Double the do_pacing value during the congestion */
+ pacing_save = pacing_save << 1;
+ } else {
+ /*
+ * when a new congestion is detected increase the do_pacing
+ * by 8 times. And also increase the pacing_th by 4 times. The
+ * reason to increase pacing_th is to give more space for the
+ * queue to oscillate down without getting empty, but also more
+ * room for the queue to increase without causing another alarm.
+ */
+ pacing_save = pacing_save << 3;
+ pacing_data->pacing_th = rdev->pacing.pacing_algo_th * 4;
+ }
+
+ if (pacing_save > BNXT_RE_MAX_DBR_DO_PACING)
+ pacing_save = BNXT_RE_MAX_DBR_DO_PACING;
+
+ pacing_data->do_pacing = pacing_save;
+ rdev->pacing.do_pacing_save = pacing_data->do_pacing;
+ pacing_data->alarm_th =
+ pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE;
+ schedule_delayed_work(&rdev->dbq_pacing_work,
+ msecs_to_jiffies(rdev->pacing.dbq_pacing_time));
+ rdev->stats.pacing.alerts++;
+ mutex_unlock(&rdev->pacing.dbq_lock);
+}
+
+static void bnxt_re_pacing_timer_exp(struct work_struct *work)
+{
+ struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
+ dbq_pacing_work.work);
+ struct bnxt_qplib_db_pacing_data *pacing_data;
+ u32 fifo_occup;
+
+ if (!mutex_trylock(&rdev->pacing.dbq_lock))
+ return;
+
+ pacing_data = rdev->qplib_res.pacing_data;
+ fifo_occup = __get_fifo_occupancy(rdev);
+
+ if (fifo_occup > pacing_data->pacing_th)
+ goto restart_timer;
+
+ /*
+ * Instead of immediately going back to the default do_pacing
+ * reduce it by 1/8 times and restart the timer.
+ */
+ pacing_data->do_pacing = pacing_data->do_pacing - (pacing_data->do_pacing >> 3);
+ pacing_data->do_pacing = max_t(u32, rdev->pacing.dbr_def_do_pacing, pacing_data->do_pacing);
+ if (pacing_data->do_pacing <= rdev->pacing.dbr_def_do_pacing) {
+ bnxt_re_set_default_pacing_data(rdev);
+ rdev->stats.pacing.complete++;
+ goto dbq_unlock;
+ }
+
+restart_timer:
+ schedule_delayed_work(&rdev->dbq_pacing_work,
+ msecs_to_jiffies(rdev->pacing.dbq_pacing_time));
+ rdev->stats.pacing.resched++;
+dbq_unlock:
+ rdev->pacing.do_pacing_save = pacing_data->do_pacing;
+ mutex_unlock(&rdev->pacing.dbq_lock);
+}
+
+void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_db_pacing_data *pacing_data;
+
+ if (!rdev->pacing.dbr_pacing)
+ return;
+ mutex_lock(&rdev->pacing.dbq_lock);
+ pacing_data = rdev->qplib_res.pacing_data;
+
+ /*
+ * Increase the alarm_th to max so that other user lib instances do not
+ * keep alerting the driver.
+ */
+ pacing_data->alarm_th = pacing_data->fifo_max_depth;
+ pacing_data->do_pacing = BNXT_RE_MAX_DBR_DO_PACING;
+ cancel_work_sync(&rdev->dbq_fifo_check_work);
+ schedule_work(&rdev->dbq_fifo_check_work);
+ mutex_unlock(&rdev->pacing.dbq_lock);
+}
+
+static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev)
+{
+ /* Allocate a page for app use */
+ rdev->pacing.dbr_page = (void *)__get_free_page(GFP_KERNEL);
+ if (!rdev->pacing.dbr_page)
+ return -ENOMEM;
+
+ memset((u8 *)rdev->pacing.dbr_page, 0, PAGE_SIZE);
+ rdev->qplib_res.pacing_data = (struct bnxt_qplib_db_pacing_data *)rdev->pacing.dbr_page;
+
+ if (bnxt_re_hwrm_dbr_pacing_qcfg(rdev)) {
+ free_page((u64)rdev->pacing.dbr_page);
+ rdev->pacing.dbr_page = NULL;
+ return -EIO;
+ }
+
+ /* MAP HW window 2 for reading db fifo depth */
+ writel(rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_BASE_MASK,
+ rdev->en_dev->bar0 + BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
+ rdev->pacing.dbr_db_fifo_reg_off =
+ (rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_OFFSET_MASK) +
+ BNXT_RE_GRC_FIFO_REG_BASE;
+ rdev->pacing.dbr_bar_addr =
+ pci_resource_start(rdev->qplib_res.pdev, 0) + rdev->pacing.dbr_db_fifo_reg_off;
+
+ if (is_dbr_fifo_full(rdev)) {
+ free_page((u64)rdev->pacing.dbr_page);
+ rdev->pacing.dbr_page = NULL;
+ return -EIO;
+ }
+
+ rdev->pacing.pacing_algo_th = BNXT_RE_PACING_ALGO_THRESHOLD;
+ rdev->pacing.dbq_pacing_time = BNXT_RE_DBR_PACING_TIME;
+ rdev->pacing.dbr_def_do_pacing = BNXT_RE_DBR_DO_PACING_NO_CONGESTION;
+ rdev->pacing.do_pacing_save = rdev->pacing.dbr_def_do_pacing;
+ rdev->qplib_res.pacing_data->grc_reg_offset = rdev->pacing.dbr_db_fifo_reg_off;
+ bnxt_re_set_default_pacing_data(rdev);
+ /* Initialize worker for DBR Pacing */
+ INIT_WORK(&rdev->dbq_fifo_check_work, bnxt_re_db_fifo_check);
+ INIT_DELAYED_WORK(&rdev->dbq_pacing_work, bnxt_re_pacing_timer_exp);
+ return 0;
+}
+
+static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev)
+{
+ cancel_work_sync(&rdev->dbq_fifo_check_work);
+ cancel_delayed_work_sync(&rdev->dbq_pacing_work);
+ if (rdev->pacing.dbr_page)
+ free_page((u64)rdev->pacing.dbr_page);
+
+ rdev->pacing.dbr_page = NULL;
+ rdev->pacing.dbr_pacing = false;
+}
+
+static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev,
+ u16 fw_ring_id, int type)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_ring_free_input req = {};
+ struct hwrm_ring_free_output resp;
+ struct bnxt_fw_msg fw_msg = {};
+ int rc = -EINVAL;
+
+ if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
+ return 0;
+
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_FREE);
+ req.ring_type = type;
+ req.ring_id = cpu_to_le16(fw_ring_id);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ ibdev_err(&rdev->ibdev, "Failed to free HW ring:%d :%#x",
+ req.ring_id, rc);
+ return rc;
+}
+
+static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev,
+ struct bnxt_re_ring_attr *ring_attr,
+ u16 *fw_ring_id)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_ring_alloc_input req = {};
+ struct hwrm_ring_alloc_output resp;
+ struct bnxt_fw_msg fw_msg = {};
+ int rc = -EINVAL;
+
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_RING_ALLOC);
+ req.enables = 0;
+ req.page_tbl_addr = cpu_to_le64(ring_attr->dma_arr[0]);
+ if (ring_attr->pages > 1) {
+ /* Page size is in log2 units */
+ req.page_size = BNXT_PAGE_SHIFT;
+ req.page_tbl_depth = 1;
+ }
+ req.fbo = 0;
+ /* Association of ring index with doorbell index and MSIX number */
+ req.logical_id = cpu_to_le16(ring_attr->lrid);
+ req.length = cpu_to_le32(ring_attr->depth + 1);
+ req.ring_type = ring_attr->type;
+ req.int_mode = ring_attr->mode;
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (!rc)
+ *fw_ring_id = le16_to_cpu(resp.ring_id);
+
+ return rc;
+}
+
+static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
+ u32 fw_stats_ctx_id)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_stat_ctx_free_input req = {};
+ struct hwrm_stat_ctx_free_output resp = {};
+ struct bnxt_fw_msg fw_msg = {};
+ int rc = -EINVAL;
+
+ if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
+ return 0;
+
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_FREE);
+ req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ ibdev_err(&rdev->ibdev, "Failed to free HW stats context %#x",
+ rc);
+
+ return rc;
+}
+
+static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
+ struct bnxt_qplib_stats *stats)
+{
+ struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx;
+ struct hwrm_stat_ctx_alloc_output resp = {};
+ struct hwrm_stat_ctx_alloc_input req = {};
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct bnxt_fw_msg fw_msg = {};
+ int rc = -EINVAL;
+
+ stats->fw_id = INVALID_STATS_CTX_ID;
+
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_STAT_CTX_ALLOC);
+ req.update_period_ms = cpu_to_le32(1000);
+ req.stats_dma_addr = cpu_to_le64(stats->dma_map);
+ req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size);
+ req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), BNXT_RE_HWRM_CMD_TIMEOUT(rdev));
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (!rc)
+ stats->fw_id = le32_to_cpu(resp.stat_ctx_id);
+
+ return rc;
+}
+
+static void bnxt_re_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+}
+
+/* Device */
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev =
+ rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
+
+ return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->revision);
+}
+static DEVICE_ATTR_RO(hw_rev);
+
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct bnxt_re_dev *rdev =
+ rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
+
+ return sysfs_emit(buf, "0x%x\n", rdev->en_dev->pdev->device);
+}
+static DEVICE_ATTR_RO(hca_type);
+
+static ssize_t board_id_show(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev = rdma_device_to_drv_device(device,
+ struct bnxt_re_dev, ibdev);
+ char buffer[BNXT_VPD_FLD_LEN] = {};
+
+ if (!rdev->is_virtfn)
+ memcpy(buffer, rdev->board_partno, BNXT_VPD_FLD_LEN - 1);
+ else
+ scnprintf(buffer, BNXT_VPD_FLD_LEN, "0x%x-VF",
+ rdev->en_dev->pdev->device);
+
+ return sysfs_emit(buf, "%s\n", buffer);
+}
+static DEVICE_ATTR_RO(board_id);
+
+static struct attribute *bnxt_re_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
+
+static const struct attribute_group bnxt_re_dev_attr_group = {
+ .attrs = bnxt_re_attributes,
+};
+
+static int bnxt_re_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr)
+{
+ struct bnxt_qplib_hwq *mr_hwq;
+ struct nlattr *table_attr;
+ struct bnxt_re_mr *mr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+ mr_hwq = &mr->qplib_mr.hwq;
+
+ if (rdma_nl_put_driver_u32(msg, "page_size",
+ mr_hwq->qe_ppg * mr_hwq->element_size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "max_elements", mr_hwq->max_elements))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "element_size", mr_hwq->element_size))
+ goto err;
+ if (rdma_nl_put_driver_u64_hex(msg, "hwq", (unsigned long)mr_hwq))
+ goto err;
+ if (rdma_nl_put_driver_u64_hex(msg, "va", mr->qplib_mr.va))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int bnxt_re_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr)
+{
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_mr *mr;
+ int err, len;
+ void *data;
+
+ mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+ rdev = mr->rdev;
+
+ err = bnxt_re_read_context_allowed(rdev);
+ if (err)
+ return err;
+
+ len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P7 :
+ BNXT_RE_CONTEXT_TYPE_MRW_SIZE_P5;
+ data = kzalloc(len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_MRW,
+ mr->qplib_mr.lkey, len, data);
+ if (!err)
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
+
+ kfree(data);
+ return err;
+}
+
+static int bnxt_re_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq)
+{
+ struct bnxt_qplib_hwq *cq_hwq;
+ struct nlattr *table_attr;
+ struct bnxt_re_cq *cq;
+
+ cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ cq_hwq = &cq->qplib_cq.hwq;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32(msg, "cq_depth", cq_hwq->depth))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "max_elements", cq_hwq->max_elements))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "element_size", cq_hwq->element_size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "max_wqe", cq->qplib_cq.max_wqe))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int bnxt_re_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq)
+{
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_cq *cq;
+ int err, len;
+ void *data;
+
+ cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ rdev = cq->rdev;
+
+ err = bnxt_re_read_context_allowed(rdev);
+ if (err)
+ return err;
+
+ len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P7 :
+ BNXT_RE_CONTEXT_TYPE_CQ_SIZE_P5;
+ data = kzalloc(len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = bnxt_qplib_read_context(&rdev->rcfw,
+ CMDQ_READ_CONTEXT_TYPE_CQ,
+ cq->qplib_cq.id, len, data);
+ if (!err)
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
+
+ kfree(data);
+ return err;
+}
+
+static int bnxt_re_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp)
+{
+ struct bnxt_qplib_qp *qplib_qp;
+ struct nlattr *table_attr;
+ struct bnxt_re_qp *qp;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ qplib_qp = &qp->qplib_qp;
+
+ if (rdma_nl_put_driver_u32(msg, "sq_max_wqe", qplib_qp->sq.max_wqe))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sq_max_sge", qplib_qp->sq.max_sge))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sq_wqe_size", qplib_qp->sq.wqe_size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sq_swq_start", qplib_qp->sq.swq_start))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sq_swq_last", qplib_qp->sq.swq_last))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rq_max_wqe", qplib_qp->rq.max_wqe))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rq_max_sge", qplib_qp->rq.max_sge))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rq_wqe_size", qplib_qp->rq.wqe_size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rq_swq_start", qplib_qp->rq.swq_start))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rq_swq_last", qplib_qp->rq.swq_last))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "timeout", qplib_qp->timeout))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int bnxt_re_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibqp->device, ibdev);
+ int err, len;
+ void *data;
+
+ err = bnxt_re_read_context_allowed(rdev);
+ if (err)
+ return err;
+
+ len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P7 :
+ BNXT_RE_CONTEXT_TYPE_QPC_SIZE_P5;
+ data = kzalloc(len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_QPC,
+ ibqp->qp_num, len, data);
+ if (!err)
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
+
+ kfree(data);
+ return err;
+}
+
+static int bnxt_re_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq)
+{
+ struct nlattr *table_attr;
+ struct bnxt_re_srq *srq;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq);
+
+ if (rdma_nl_put_driver_u32_hex(msg, "wqe_size", srq->qplib_srq.wqe_size))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "max_wqe", srq->qplib_srq.max_wqe))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "max_sge", srq->qplib_srq.max_sge))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "srq_limit", srq->qplib_srq.threshold))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int bnxt_re_fill_res_srq_entry_raw(struct sk_buff *msg, struct ib_srq *ib_srq)
+{
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_srq *srq;
+ int err, len;
+ void *data;
+
+ srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq);
+ rdev = srq->rdev;
+
+ err = bnxt_re_read_context_allowed(rdev);
+ if (err)
+ return err;
+
+ len = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ? BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P7 :
+ BNXT_RE_CONTEXT_TYPE_SRQ_SIZE_P5;
+
+ data = kzalloc(len, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = bnxt_qplib_read_context(&rdev->rcfw, CMDQ_READ_CONTEXT_TYPE_SRQ,
+ srq->qplib_srq.id, len, data);
+ if (!err)
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
+
+ kfree(data);
+ return err;
+}
+
+static const struct ib_device_ops bnxt_re_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_BNXT_RE,
+ .uverbs_abi_ver = BNXT_RE_ABI_VERSION,
+
+ .add_gid = bnxt_re_add_gid,
+ .alloc_hw_port_stats = bnxt_re_ib_alloc_hw_port_stats,
+ .alloc_mr = bnxt_re_alloc_mr,
+ .alloc_pd = bnxt_re_alloc_pd,
+ .alloc_ucontext = bnxt_re_alloc_ucontext,
+ .create_ah = bnxt_re_create_ah,
+ .create_cq = bnxt_re_create_cq,
+ .create_qp = bnxt_re_create_qp,
+ .create_srq = bnxt_re_create_srq,
+ .create_user_ah = bnxt_re_create_ah,
+ .dealloc_pd = bnxt_re_dealloc_pd,
+ .dealloc_ucontext = bnxt_re_dealloc_ucontext,
+ .del_gid = bnxt_re_del_gid,
+ .dereg_mr = bnxt_re_dereg_mr,
+ .destroy_ah = bnxt_re_destroy_ah,
+ .destroy_cq = bnxt_re_destroy_cq,
+ .destroy_qp = bnxt_re_destroy_qp,
+ .destroy_srq = bnxt_re_destroy_srq,
+ .device_group = &bnxt_re_dev_attr_group,
+ .disassociate_ucontext = bnxt_re_disassociate_ucontext,
+ .get_dev_fw_str = bnxt_re_query_fw_str,
+ .get_dma_mr = bnxt_re_get_dma_mr,
+ .get_hw_stats = bnxt_re_ib_get_hw_stats,
+ .get_link_layer = bnxt_re_get_link_layer,
+ .get_port_immutable = bnxt_re_get_port_immutable,
+ .map_mr_sg = bnxt_re_map_mr_sg,
+ .mmap = bnxt_re_mmap,
+ .mmap_free = bnxt_re_mmap_free,
+ .modify_qp = bnxt_re_modify_qp,
+ .modify_srq = bnxt_re_modify_srq,
+ .poll_cq = bnxt_re_poll_cq,
+ .post_recv = bnxt_re_post_recv,
+ .post_send = bnxt_re_post_send,
+ .post_srq_recv = bnxt_re_post_srq_recv,
+ .process_mad = bnxt_re_process_mad,
+ .query_ah = bnxt_re_query_ah,
+ .query_device = bnxt_re_query_device,
+ .modify_device = bnxt_re_modify_device,
+ .query_pkey = bnxt_re_query_pkey,
+ .query_port = bnxt_re_query_port,
+ .query_qp = bnxt_re_query_qp,
+ .query_srq = bnxt_re_query_srq,
+ .reg_user_mr = bnxt_re_reg_user_mr,
+ .reg_user_mr_dmabuf = bnxt_re_reg_user_mr_dmabuf,
+ .req_notify_cq = bnxt_re_req_notify_cq,
+ .resize_cq = bnxt_re_resize_cq,
+ .create_flow = bnxt_re_create_flow,
+ .destroy_flow = bnxt_re_destroy_flow,
+ INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, bnxt_re_cq, ib_cq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, bnxt_re_qp, ib_qp),
+ INIT_RDMA_OBJ_SIZE(ib_srq, bnxt_re_srq, ib_srq),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx),
+};
+
+static const struct ib_device_ops restrack_ops = {
+ .fill_res_cq_entry = bnxt_re_fill_res_cq_entry,
+ .fill_res_cq_entry_raw = bnxt_re_fill_res_cq_entry_raw,
+ .fill_res_qp_entry = bnxt_re_fill_res_qp_entry,
+ .fill_res_qp_entry_raw = bnxt_re_fill_res_qp_entry_raw,
+ .fill_res_mr_entry = bnxt_re_fill_res_mr_entry,
+ .fill_res_mr_entry_raw = bnxt_re_fill_res_mr_entry_raw,
+ .fill_res_srq_entry = bnxt_re_fill_res_srq_entry,
+ .fill_res_srq_entry_raw = bnxt_re_fill_res_srq_entry_raw,
+};
+
+static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
+{
+ struct ib_device *ibdev = &rdev->ibdev;
+ int ret;
+
+ /* ib device init */
+ ibdev->node_type = RDMA_NODE_IB_CA;
+ strscpy(ibdev->node_desc, BNXT_RE_DESC " HCA");
+ ibdev->phys_port_cnt = 1;
+
+ addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr);
+
+ ibdev->num_comp_vectors = rdev->nqr->num_msix - 1;
+ ibdev->dev.parent = &rdev->en_dev->pdev->dev;
+ ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
+
+ if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
+ ibdev->driver_def = bnxt_re_uapi_defs;
+
+ ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
+ ib_set_device_ops(ibdev, &restrack_ops);
+ ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1);
+ if (ret)
+ return ret;
+
+ dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX);
+ ibdev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ);
+ return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev);
+}
+
+static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev,
+ struct bnxt_en_dev *en_dev)
+{
+ struct bnxt_re_dev *rdev;
+
+ /* Allocate bnxt_re_dev instance here */
+ rdev = ib_alloc_device(bnxt_re_dev, ibdev);
+ if (!rdev) {
+ ibdev_err(NULL, "%s: bnxt_re_dev allocation failure!",
+ ROCE_DRV_MODULE_NAME);
+ return NULL;
+ }
+ /* Default values */
+ rdev->netdev = en_dev->net;
+ rdev->en_dev = en_dev;
+ rdev->adev = adev;
+ rdev->id = rdev->en_dev->pdev->devfn;
+ INIT_LIST_HEAD(&rdev->qp_list);
+ mutex_init(&rdev->qp_lock);
+ mutex_init(&rdev->pacing.dbq_lock);
+ atomic_set(&rdev->stats.res.qp_count, 0);
+ atomic_set(&rdev->stats.res.cq_count, 0);
+ atomic_set(&rdev->stats.res.srq_count, 0);
+ atomic_set(&rdev->stats.res.mr_count, 0);
+ atomic_set(&rdev->stats.res.mw_count, 0);
+ atomic_set(&rdev->stats.res.ah_count, 0);
+ atomic_set(&rdev->stats.res.pd_count, 0);
+ rdev->cosq[0] = 0xFFFF;
+ rdev->cosq[1] = 0xFFFF;
+ rdev->cq_coalescing.enable = 1;
+ rdev->cq_coalescing.buf_maxtime = BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME;
+ if (bnxt_re_chip_gen_p7(en_dev->chip_num)) {
+ rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7;
+ rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P7;
+ } else {
+ rdev->cq_coalescing.normal_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P5;
+ rdev->cq_coalescing.during_maxbuf = BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P5;
+ }
+ rdev->cq_coalescing.en_ring_idle_mode = BNXT_QPLIB_CQ_COAL_DEF_EN_RING_IDLE_MODE;
+
+ return rdev;
+}
+
+static int bnxt_re_handle_unaffi_async_event(struct creq_func_event
+ *unaffi_async)
+{
+ switch (unaffi_async->event) {
+ case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
+ struct bnxt_re_qp *qp)
+{
+ struct creq_qp_error_notification *err_event;
+ struct bnxt_re_srq *srq = NULL;
+ struct ib_event event = {};
+ unsigned int flags;
+
+ if (qp->qplib_qp.srq)
+ srq = container_of(qp->qplib_qp.srq, struct bnxt_re_srq,
+ qplib_srq);
+
+ if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR &&
+ rdma_is_kernel_res(&qp->ib_qp.res)) {
+ flags = bnxt_re_lock_cqs(qp);
+ bnxt_qplib_add_flush_qp(&qp->qplib_qp);
+ bnxt_re_unlock_cqs(qp, flags);
+ }
+
+ event.device = &qp->rdev->ibdev;
+ event.element.qp = &qp->ib_qp;
+ event.event = IB_EVENT_QP_FATAL;
+
+ err_event = (struct creq_qp_error_notification *)qp_event;
+
+ switch (err_event->req_err_state_reason) {
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_OPCODE_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TIMEOUT_RETRY_LIMIT:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RNR_TIMEOUT_RETRY_LIMIT:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_2:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_3:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_READ_RESP:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_BIND:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_FAST_REG:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_INVALIDATE:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETRAN_LOCAL_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_AV_DOMAIN_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PROD_WQE_MSMTCH_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PSN_RANGE_CHECK_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_1:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_4:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_READ_RESP_LENGTH:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_WQE_FORMAT_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ORRQ_FORMAT_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_AVID_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_SERV_TYPE_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_OP_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_MEMORY_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_MEMORY_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CMP_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CQ_LOAD_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_PCI_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_PCI_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETX_SETUP_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+
+ default:
+ break;
+ }
+
+ switch (err_event->res_err_state_reason) {
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEED_MAX:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PAYLOAD_LENGTH_MISMATCH:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_SEQ_ERROR_RETRY_LIMIT:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_INVALID_R_KEY:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_DOMAIN_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_NO_PERMISSION:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_RANGE_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_INVALID_R_KEY:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_DOMAIN_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_NO_PERMISSION:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_RANGE_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNALIGN_ATOMIC:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_NOT_FOUND:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_INVALID_DUP_RKEY:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_FORMAT_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEEDS_WQE:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_WQE_FORMAT_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNSUPPORTED_OPCODE:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_REM_INVALIDATE:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_OPCODE_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_OFLOW:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CMP_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CQ_LOAD_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_PCI_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_PCI_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_MEMORY_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_LOAD_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_ERROR:
+ if (srq)
+ event.event = IB_EVENT_SRQ_ERR;
+ break;
+ default:
+ break;
+ }
+
+ if (err_event->res_err_state_reason || err_event->req_err_state_reason) {
+ ibdev_dbg(&qp->rdev->ibdev,
+ "%s %s qp_id: %d cons (%d %d) req (%d %d) res (%d %d)\n",
+ __func__, rdma_is_kernel_res(&qp->ib_qp.res) ? "kernel" : "user",
+ qp->qplib_qp.id,
+ err_event->sq_cons_idx,
+ err_event->rq_cons_idx,
+ err_event->req_slow_path_state,
+ err_event->req_err_state_reason,
+ err_event->res_slow_path_state,
+ err_event->res_err_state_reason);
+ } else {
+ if (srq)
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ }
+
+ if (event.event == IB_EVENT_SRQ_ERR && srq->ib_srq.event_handler) {
+ (*srq->ib_srq.event_handler)(&event,
+ srq->ib_srq.srq_context);
+ } else if (event.device && qp->ib_qp.event_handler) {
+ qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+ }
+
+ return 0;
+}
+
+static int bnxt_re_handle_cq_async_error(void *event, struct bnxt_re_cq *cq)
+{
+ struct creq_cq_error_notification *cqerr;
+ struct ib_event ibevent = {};
+
+ cqerr = event;
+ switch (cqerr->cq_err_reason) {
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_INVALID_ERROR:
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_OVERFLOW_ERROR:
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_LOAD_ERROR:
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_INVALID_ERROR:
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_OVERFLOW_ERROR:
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_LOAD_ERROR:
+ ibevent.event = IB_EVENT_CQ_ERR;
+ break;
+ default:
+ break;
+ }
+
+ if (ibevent.event == IB_EVENT_CQ_ERR && cq->ib_cq.event_handler) {
+ ibevent.element.cq = &cq->ib_cq;
+ ibevent.device = &cq->rdev->ibdev;
+
+ ibdev_dbg(&cq->rdev->ibdev,
+ "%s err reason %d\n", __func__, cqerr->cq_err_reason);
+ cq->ib_cq.event_handler(&ibevent, cq->ib_cq.cq_context);
+ }
+
+ return 0;
+}
+
+static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async,
+ void *obj)
+{
+ struct bnxt_qplib_qp *lib_qp;
+ struct bnxt_qplib_cq *lib_cq;
+ struct bnxt_re_qp *qp;
+ struct bnxt_re_cq *cq;
+ int rc = 0;
+ u8 event;
+
+ if (!obj)
+ return rc; /* QP was already dead, still return success */
+
+ event = affi_async->event;
+ switch (event) {
+ case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
+ lib_qp = obj;
+ qp = container_of(lib_qp, struct bnxt_re_qp, qplib_qp);
+ rc = bnxt_re_handle_qp_async_event(affi_async, qp);
+ break;
+ case CREQ_QP_EVENT_EVENT_CQ_ERROR_NOTIFICATION:
+ lib_cq = obj;
+ cq = container_of(lib_cq, struct bnxt_re_cq, qplib_cq);
+ rc = bnxt_re_handle_cq_async_error(affi_async, cq);
+ break;
+ default:
+ rc = -EINVAL;
+ }
+ return rc;
+}
+
+static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
+ void *aeqe, void *obj)
+{
+ struct creq_qp_event *affi_async;
+ struct creq_func_event *unaffi_async;
+ u8 type;
+ int rc;
+
+ type = ((struct creq_base *)aeqe)->type;
+ if (type == CREQ_BASE_TYPE_FUNC_EVENT) {
+ unaffi_async = aeqe;
+ rc = bnxt_re_handle_unaffi_async_event(unaffi_async);
+ } else {
+ affi_async = aeqe;
+ rc = bnxt_re_handle_affi_async_event(affi_async, obj);
+ }
+
+ return rc;
+}
+
+static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_srq *handle, u8 event)
+{
+ struct bnxt_re_srq *srq = container_of(handle, struct bnxt_re_srq,
+ qplib_srq);
+ struct ib_event ib_event;
+
+ ib_event.device = &srq->rdev->ibdev;
+ ib_event.element.srq = &srq->ib_srq;
+
+ if (srq->ib_srq.event_handler) {
+ if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT)
+ ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ (*srq->ib_srq.event_handler)(&ib_event,
+ srq->ib_srq.srq_context);
+ }
+ return 0;
+}
+
+static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_cq *handle)
+{
+ struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq,
+ qplib_cq);
+
+ if (cq->ib_cq.comp_handler)
+ (*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context);
+
+ return 0;
+}
+
+static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
+{
+ int i;
+
+ for (i = 1; i < rdev->nqr->num_msix; i++)
+ bnxt_qplib_disable_nq(&rdev->nqr->nq[i - 1]);
+
+ if (rdev->qplib_res.rcfw)
+ bnxt_qplib_cleanup_res(&rdev->qplib_res);
+}
+
+static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
+{
+ int num_vec_enabled = 0;
+ int rc = 0, i;
+ u32 db_offt;
+
+ bnxt_qplib_init_res(&rdev->qplib_res);
+
+ mutex_init(&rdev->nqr->load_lock);
+
+ for (i = 1; i < rdev->nqr->num_msix ; i++) {
+ db_offt = rdev->nqr->msix_entries[i].db_offset;
+ rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nqr->nq[i - 1],
+ i - 1, rdev->nqr->msix_entries[i].vector,
+ db_offt, &bnxt_re_cqn_handler,
+ &bnxt_re_srqn_handler);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to enable NQ with rc = 0x%x", rc);
+ goto fail;
+ }
+ num_vec_enabled++;
+ }
+ return 0;
+fail:
+ for (i = num_vec_enabled; i >= 0; i--)
+ bnxt_qplib_disable_nq(&rdev->nqr->nq[i]);
+ return rc;
+}
+
+static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_nq *nq;
+ u8 type;
+ int i;
+
+ for (i = 0; i < rdev->nqr->num_msix - 1; i++) {
+ type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+ nq = &rdev->nqr->nq[i];
+ bnxt_re_net_ring_free(rdev, nq->ring_id, type);
+ bnxt_qplib_free_nq(nq);
+ nq->res = NULL;
+ }
+}
+
+static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
+{
+ bnxt_re_free_nq_res(rdev);
+
+ if (rdev->qplib_res.dpi_tbl.max) {
+ bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
+ &rdev->dpi_privileged);
+ }
+ if (rdev->qplib_res.rcfw) {
+ bnxt_qplib_free_res(&rdev->qplib_res);
+ rdev->qplib_res.rcfw = NULL;
+ }
+}
+
+static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_re_ring_attr rattr = {};
+ int num_vec_created = 0;
+ int rc, i;
+ u8 type;
+
+ /* Configure and allocate resources for qplib */
+ rdev->qplib_res.rcfw = &rdev->rcfw;
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->netdev);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res,
+ &rdev->dpi_privileged,
+ rdev, BNXT_QPLIB_DPI_TYPE_KERNEL);
+ if (rc)
+ goto dealloc_res;
+
+ for (i = 0; i < rdev->nqr->num_msix - 1; i++) {
+ struct bnxt_qplib_nq *nq;
+
+ nq = &rdev->nqr->nq[i];
+ nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
+ rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, nq);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x",
+ i, rc);
+ goto free_nq;
+ }
+ type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+ rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr;
+ rattr.pages = nq->hwq.pbl[rdev->nqr->nq[i].hwq.level].pg_count;
+ rattr.type = type;
+ rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+ rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1;
+ rattr.lrid = rdev->nqr->msix_entries[i + 1].ring_idx;
+ rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate NQ fw id with rc = 0x%x",
+ rc);
+ bnxt_qplib_free_nq(nq);
+ goto free_nq;
+ }
+ num_vec_created++;
+ }
+ return 0;
+free_nq:
+ for (i = num_vec_created - 1; i >= 0; i--) {
+ type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+ bnxt_re_net_ring_free(rdev, rdev->nqr->nq[i].ring_id, type);
+ bnxt_qplib_free_nq(&rdev->nqr->nq[i]);
+ }
+ bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
+ &rdev->dpi_privileged);
+dealloc_res:
+ bnxt_qplib_free_res(&rdev->qplib_res);
+
+fail:
+ rdev->qplib_res.rcfw = NULL;
+ return rc;
+}
+
+static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
+ u8 port_num, enum ib_event_type event)
+{
+ struct ib_event ib_event;
+
+ ib_event.device = ibdev;
+ if (qp) {
+ ib_event.element.qp = qp;
+ ib_event.event = event;
+ if (qp->event_handler)
+ qp->event_handler(&ib_event, qp->qp_context);
+
+ } else {
+ ib_event.element.port_num = port_num;
+ ib_event.event = event;
+ ib_dispatch_event(&ib_event);
+ }
+}
+
+static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev,
+ struct bnxt_re_qp *qp)
+{
+ return (qp->ib_qp.qp_type == IB_QPT_GSI) ||
+ (qp == rdev->gsi_ctx.gsi_sqp);
+}
+
+static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_re_qp *qp;
+
+ mutex_lock(&rdev->qp_lock);
+ list_for_each_entry(qp, &rdev->qp_list, list) {
+ /* Modify the state of all QPs except QP1/Shadow QP */
+ if (!bnxt_re_is_qp1_or_shadow_qp(rdev, qp)) {
+ if (qp->qplib_qp.state !=
+ CMDQ_MODIFY_QP_NEW_STATE_RESET &&
+ qp->qplib_qp.state !=
+ CMDQ_MODIFY_QP_NEW_STATE_ERR)
+ bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp,
+ 1, IB_EVENT_QP_FATAL);
+ }
+ }
+ mutex_unlock(&rdev->qp_lock);
+}
+
+static void bnxt_re_net_unregister_async_event(struct bnxt_re_dev *rdev)
+{
+ if (rdev->is_virtfn)
+ return;
+
+ memset(&rdev->event_bitmap, 0, sizeof(rdev->event_bitmap));
+ bnxt_register_async_events(rdev->en_dev, &rdev->event_bitmap,
+ ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE);
+}
+
+static void bnxt_re_net_register_async_event(struct bnxt_re_dev *rdev)
+{
+ if (rdev->is_virtfn)
+ return;
+
+ rdev->event_bitmap |= (1 << ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE);
+ bnxt_register_async_events(rdev->en_dev, &rdev->event_bitmap,
+ ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE);
+}
+
+static void bnxt_re_read_vpd_info(struct bnxt_re_dev *rdev)
+{
+ struct pci_dev *pdev = rdev->en_dev->pdev;
+ unsigned int vpd_size, kw_len;
+ int pos, size;
+ u8 *vpd_data;
+
+ vpd_data = pci_vpd_alloc(pdev, &vpd_size);
+ if (IS_ERR(vpd_data)) {
+ pci_warn(pdev, "Unable to read VPD, err=%pe\n", vpd_data);
+ return;
+ }
+
+ pos = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
+ PCI_VPD_RO_KEYWORD_PARTNO, &kw_len);
+ if (pos < 0)
+ goto free;
+
+ size = min_t(int, kw_len, BNXT_VPD_FLD_LEN - 1);
+ memcpy(rdev->board_partno, &vpd_data[pos], size);
+free:
+ kfree(vpd_data);
+}
+
+static int bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_ver_get_output resp = {};
+ struct hwrm_ver_get_input req = {};
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_fw_msg fw_msg = {};
+ int rc;
+
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VER_GET);
+ req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
+ req.hwrm_intf_min = HWRM_VERSION_MINOR;
+ req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to query HW version, rc = 0x%x",
+ rc);
+ return rc;
+ }
+
+ cctx = rdev->chip_ctx;
+ cctx->hwrm_intf_ver =
+ (u64)le16_to_cpu(resp.hwrm_intf_major) << 48 |
+ (u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 |
+ (u64)le16_to_cpu(resp.hwrm_intf_build) << 16 |
+ le16_to_cpu(resp.hwrm_intf_patch);
+
+ cctx->hwrm_cmd_max_timeout = le16_to_cpu(resp.max_req_timeout);
+
+ if (!cctx->hwrm_cmd_max_timeout)
+ cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT;
+
+ return 0;
+}
+
+static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
+{
+ int rc;
+ u32 event;
+
+ /* Register ib dev */
+ rc = bnxt_re_register_ib(rdev);
+ if (rc) {
+ pr_err("Failed to register with IB: %#x\n", rc);
+ return rc;
+ }
+ dev_info(rdev_to_dev(rdev), "Device registered with IB successfully");
+ set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
+
+ event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ?
+ IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, event);
+
+ return rc;
+}
+
+static int bnxt_re_alloc_nqr_mem(struct bnxt_re_dev *rdev)
+{
+ rdev->nqr = kzalloc(sizeof(*rdev->nqr), GFP_KERNEL);
+ if (!rdev->nqr)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void bnxt_re_free_nqr_mem(struct bnxt_re_dev *rdev)
+{
+ kfree(rdev->nqr);
+ rdev->nqr = NULL;
+}
+
+/* When DEL_GID fails, driver is not freeing GID ctx memory.
+ * To avoid the memory leak, free the memory during unload
+ */
+static void bnxt_re_free_gid_ctx(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
+ struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
+ int i;
+
+ if (!sgid_tbl->active)
+ return;
+
+ ctx_tbl = sgid_tbl->ctx;
+ for (i = 0; i < sgid_tbl->max; i++) {
+ if (sgid_tbl->hw_id[i] == 0xFFFF)
+ continue;
+
+ ctx = ctx_tbl[i];
+ kfree(ctx);
+ }
+}
+
+static int bnxt_re_get_stats_ctx(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx;
+ struct bnxt_qplib_res *res = &rdev->qplib_res;
+ int rc;
+
+ rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &hctx->stats);
+ if (rc)
+ return rc;
+
+ rc = bnxt_re_net_stats_ctx_alloc(rdev, &hctx->stats);
+ if (rc)
+ goto free_stat_mem;
+
+ return 0;
+free_stat_mem:
+ bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats);
+
+ return rc;
+}
+
+static int bnxt_re_get_stats3_ctx(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx;
+ struct bnxt_qplib_res *res = &rdev->qplib_res;
+ int rc;
+
+ if (!rdev->rcfw.roce_mirror)
+ return 0;
+
+ rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &hctx->stats3);
+ if (rc)
+ return rc;
+
+ rc = bnxt_re_net_stats_ctx_alloc(rdev, &hctx->stats3);
+ if (rc)
+ goto free_stat_mem;
+
+ return 0;
+free_stat_mem:
+ bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats3);
+
+ return rc;
+}
+
+static void bnxt_re_put_stats3_ctx(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx;
+ struct bnxt_qplib_res *res = &rdev->qplib_res;
+
+ if (!rdev->rcfw.roce_mirror)
+ return;
+
+ bnxt_re_net_stats_ctx_free(rdev, hctx->stats3.fw_id);
+ bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats3);
+}
+
+static void bnxt_re_put_stats_ctx(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_ctx *hctx = &rdev->qplib_ctx;
+ struct bnxt_qplib_res *res = &rdev->qplib_res;
+
+ bnxt_re_net_stats_ctx_free(rdev, hctx->stats.fw_id);
+ bnxt_qplib_free_stats_ctx(res->pdev, &hctx->stats);
+}
+
+static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
+{
+ u8 type;
+ int rc;
+
+ bnxt_re_debugfs_rem_pdev(rdev);
+
+ bnxt_re_net_unregister_async_event(rdev);
+ bnxt_re_uninit_dcb_wq(rdev);
+
+ bnxt_re_put_stats3_ctx(rdev);
+
+ bnxt_re_free_gid_ctx(rdev);
+ if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED,
+ &rdev->flags))
+ bnxt_re_cleanup_res(rdev);
+ if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags))
+ bnxt_re_free_res(rdev);
+
+ if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
+ rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
+ if (rc)
+ ibdev_warn(&rdev->ibdev,
+ "Failed to deinitialize RCFW: %#x", rc);
+ bnxt_re_put_stats_ctx(rdev);
+ bnxt_qplib_free_hwctx(&rdev->qplib_res, &rdev->qplib_ctx);
+ bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
+ type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+ bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
+ bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
+ }
+
+ rdev->nqr->num_msix = 0;
+
+ if (rdev->pacing.dbr_pacing)
+ bnxt_re_deinitialize_dbr_pacing(rdev);
+
+ bnxt_re_free_nqr_mem(rdev);
+ bnxt_re_destroy_chip_ctx(rdev);
+ if (op_type == BNXT_RE_COMPLETE_REMOVE) {
+ if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags))
+ bnxt_unregister_dev(rdev->en_dev);
+ }
+}
+
+static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
+{
+ struct bnxt_re_ring_attr rattr = {};
+ struct bnxt_qplib_creq_ctx *creq;
+ u32 db_offt;
+ int vid;
+ u8 type;
+ int rc;
+
+ if (op_type == BNXT_RE_COMPLETE_INIT) {
+ /* Registered a new RoCE device instance to netdev */
+ rc = bnxt_re_register_netdev(rdev);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to register with Ethernet driver, rc %d\n",
+ rc);
+ return rc;
+ }
+ }
+ set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+
+ if (rdev->en_dev->ulp_tbl->msix_requested < BNXT_RE_MIN_MSIX) {
+ ibdev_err(&rdev->ibdev,
+ "RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n",
+ rdev->en_dev->ulp_tbl->msix_requested);
+ bnxt_unregister_dev(rdev->en_dev);
+ clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ return -EINVAL;
+ }
+ ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n",
+ rdev->en_dev->ulp_tbl->msix_requested);
+
+ rc = bnxt_re_setup_chip_ctx(rdev);
+ if (rc) {
+ bnxt_unregister_dev(rdev->en_dev);
+ clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
+ return -EINVAL;
+ }
+
+ rc = bnxt_re_alloc_nqr_mem(rdev);
+ if (rc) {
+ bnxt_re_destroy_chip_ctx(rdev);
+ bnxt_unregister_dev(rdev->en_dev);
+ clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+ return rc;
+ }
+ rdev->nqr->num_msix = rdev->en_dev->ulp_tbl->msix_requested;
+ memcpy(rdev->nqr->msix_entries, rdev->en_dev->msix_entries,
+ sizeof(struct bnxt_msix_entry) * rdev->nqr->num_msix);
+
+ /* Check whether VF or PF */
+ bnxt_re_get_sriov_func_type(rdev);
+
+ /* Establish RCFW Communication Channel to initialize the context
+ * memory for the function and all child VFs
+ */
+ rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res, &rdev->rcfw,
+ &rdev->qplib_ctx);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate RCFW Channel: %#x\n", rc);
+ goto fail;
+ }
+
+ type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+ creq = &rdev->rcfw.creq;
+ rattr.dma_arr = creq->hwq.pbl[PBL_LVL_0].pg_map_arr;
+ rattr.pages = creq->hwq.pbl[creq->hwq.level].pg_count;
+ rattr.type = type;
+ rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+ rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1;
+ rattr.lrid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].ring_idx;
+ rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc);
+ goto free_rcfw;
+ }
+ db_offt = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].db_offset;
+ vid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].vector;
+ rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw,
+ vid, db_offt,
+ &bnxt_re_aeq_handler);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to enable RCFW channel: %#x\n",
+ rc);
+ goto free_ring;
+ }
+
+ if (bnxt_qplib_dbr_pacing_en(rdev->chip_ctx)) {
+ rc = bnxt_re_initialize_dbr_pacing(rdev);
+ if (!rc) {
+ rdev->pacing.dbr_pacing = true;
+ } else {
+ ibdev_err(&rdev->ibdev,
+ "DBR pacing disabled with error : %d\n", rc);
+ rdev->pacing.dbr_pacing = false;
+ }
+ }
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw);
+ if (rc)
+ goto disable_rcfw;
+
+ bnxt_qplib_query_version(&rdev->rcfw);
+ bnxt_re_set_resource_limits(rdev);
+
+ if (!rdev->is_virtfn &&
+ !bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
+ rc = bnxt_qplib_alloc_hwctx(&rdev->qplib_res, &rdev->qplib_ctx);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate hw context: %#x\n", rc);
+ goto disable_rcfw;
+ }
+ }
+
+ rc = bnxt_re_get_stats_ctx(rdev);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate stats context: %#x\n", rc);
+ goto free_ctx;
+ }
+
+ rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx,
+ rdev->is_virtfn);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to initialize RCFW: %#x\n", rc);
+ goto free_sctx;
+ }
+ set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
+
+ /* Resources based on the 'new' device caps */
+ rc = bnxt_re_alloc_res(rdev);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to allocate resources: %#x\n", rc);
+ goto fail;
+ }
+ set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags);
+ rc = bnxt_re_init_res(rdev);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to initialize resources: %#x\n", rc);
+ goto fail;
+ }
+
+ set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags);
+
+ if (!rdev->is_virtfn) {
+ /* Query f/w defaults of CC params */
+ rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &rdev->cc_param);
+ if (rc)
+ ibdev_warn(&rdev->ibdev, "Failed to query CC defaults\n");
+
+ if (!(rdev->qplib_res.en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT))
+ bnxt_re_vf_res_config(rdev);
+ }
+ hash_init(rdev->cq_hash);
+ if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT)
+ hash_init(rdev->srq_hash);
+
+ bnxt_re_debugfs_add_pdev(rdev);
+
+ bnxt_re_init_dcb_wq(rdev);
+ bnxt_re_net_register_async_event(rdev);
+
+ if (!rdev->is_virtfn)
+ bnxt_re_read_vpd_info(rdev);
+
+ rc = bnxt_re_get_stats3_ctx(rdev);
+ if (rc)
+ goto fail;
+
+ return 0;
+free_sctx:
+ bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
+free_ctx:
+ bnxt_qplib_free_hwctx(&rdev->qplib_res, &rdev->qplib_ctx);
+disable_rcfw:
+ bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
+free_ring:
+ type = bnxt_qplib_get_ring_type(rdev->chip_ctx);
+ bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
+free_rcfw:
+ bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
+fail:
+ bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
+
+ return rc;
+}
+
+static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
+{
+ struct bnxt_qplib_cc_param cc_param = {};
+
+ /* Do not enable congestion control on VFs */
+ if (rdev->is_virtfn)
+ return;
+
+ /* Currently enabling only for GenP5 adapters */
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return;
+
+ if (enable) {
+ cc_param.enable = 1;
+ cc_param.tos_ecn = 1;
+ }
+
+ cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
+ CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
+
+ if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param))
+ ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable);
+}
+
+static void bnxt_re_update_en_info_rdev(struct bnxt_re_dev *rdev,
+ struct bnxt_re_en_dev_info *en_info,
+ struct auxiliary_device *adev)
+{
+ /* Before updating the rdev pointer in bnxt_re_en_dev_info structure,
+ * take the rtnl lock to avoid accessing invalid rdev pointer from
+ * L2 ULP callbacks. This is applicable in all the places where rdev
+ * pointer is updated in bnxt_re_en_dev_info.
+ */
+ rtnl_lock();
+ en_info->rdev = rdev;
+ rtnl_unlock();
+}
+
+static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type)
+{
+ struct bnxt_aux_priv *aux_priv =
+ container_of(adev, struct bnxt_aux_priv, aux_dev);
+ struct bnxt_re_en_dev_info *en_info;
+ struct bnxt_en_dev *en_dev;
+ struct bnxt_re_dev *rdev;
+ int rc;
+
+ en_info = auxiliary_get_drvdata(adev);
+ en_dev = en_info->en_dev;
+
+
+ rdev = bnxt_re_dev_add(adev, en_dev);
+ if (!rdev || !rdev_to_dev(rdev)) {
+ rc = -ENOMEM;
+ goto exit;
+ }
+
+ bnxt_re_update_en_info_rdev(rdev, en_info, adev);
+
+ rc = bnxt_re_dev_init(rdev, op_type);
+ if (rc)
+ goto re_dev_dealloc;
+
+ rc = bnxt_re_ib_init(rdev);
+ if (rc) {
+ pr_err("Failed to register with IB: %s",
+ aux_priv->aux_dev.name);
+ goto re_dev_uninit;
+ }
+
+ bnxt_re_setup_cc(rdev, true);
+
+ return 0;
+
+re_dev_uninit:
+ bnxt_re_update_en_info_rdev(NULL, en_info, adev);
+ bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
+re_dev_dealloc:
+ ib_dealloc_device(&rdev->ibdev);
+exit:
+ return rc;
+}
+
+#define BNXT_ADEV_NAME "bnxt_en"
+
+static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type,
+ struct auxiliary_device *aux_dev)
+{
+ bnxt_re_setup_cc(rdev, false);
+ ib_unregister_device(&rdev->ibdev);
+ bnxt_re_dev_uninit(rdev, op_type);
+ ib_dealloc_device(&rdev->ibdev);
+}
+
+static void bnxt_re_remove(struct auxiliary_device *adev)
+{
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
+ struct bnxt_re_dev *rdev;
+
+ mutex_lock(&bnxt_re_mutex);
+ rdev = en_info->rdev;
+
+ if (rdev)
+ bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, adev);
+ kfree(en_info);
+ mutex_unlock(&bnxt_re_mutex);
+}
+
+static int bnxt_re_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct bnxt_aux_priv *aux_priv =
+ container_of(adev, struct bnxt_aux_priv, aux_dev);
+ struct bnxt_re_en_dev_info *en_info;
+ struct bnxt_en_dev *en_dev;
+ int rc;
+
+ en_dev = aux_priv->edev;
+
+ mutex_lock(&bnxt_re_mutex);
+ en_info = kzalloc(sizeof(*en_info), GFP_KERNEL);
+ if (!en_info) {
+ mutex_unlock(&bnxt_re_mutex);
+ return -ENOMEM;
+ }
+ en_info->en_dev = en_dev;
+
+ auxiliary_set_drvdata(adev, en_info);
+
+ rc = bnxt_re_add_device(adev, BNXT_RE_COMPLETE_INIT);
+ if (rc)
+ kfree(en_info);
+
+ mutex_unlock(&bnxt_re_mutex);
+
+ return rc;
+}
+
+static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state)
+{
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
+ struct bnxt_en_dev *en_dev;
+ struct bnxt_re_dev *rdev;
+
+ rdev = en_info->rdev;
+ en_dev = en_info->en_dev;
+ mutex_lock(&bnxt_re_mutex);
+
+ ibdev_info(&rdev->ibdev, "Handle device suspend call");
+ /* Check the current device state from bnxt_en_dev and move the
+ * device to detached state if FW_FATAL_COND is set.
+ * This prevents more commands to HW during clean-up,
+ * in case the device is already in error.
+ */
+ if (test_bit(BNXT_STATE_FW_FATAL_COND, &rdev->en_dev->en_state)) {
+ set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
+ set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
+ wake_up_all(&rdev->rcfw.cmdq.waitq);
+ bnxt_re_dev_stop(rdev);
+ }
+
+ if (rdev->pacing.dbr_pacing)
+ bnxt_re_set_pacing_dev_state(rdev);
+
+ ibdev_info(&rdev->ibdev, "%s: L2 driver notified to stop en_state 0x%lx",
+ __func__, en_dev->en_state);
+ bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, adev);
+ bnxt_re_update_en_info_rdev(NULL, en_info, adev);
+ mutex_unlock(&bnxt_re_mutex);
+
+ return 0;
+}
+
+static int bnxt_re_resume(struct auxiliary_device *adev)
+{
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
+ struct bnxt_re_dev *rdev;
+
+ mutex_lock(&bnxt_re_mutex);
+ bnxt_re_add_device(adev, BNXT_RE_POST_RECOVERY_INIT);
+ rdev = en_info->rdev;
+ ibdev_info(&rdev->ibdev, "Device resume completed");
+ mutex_unlock(&bnxt_re_mutex);
+
+ return 0;
+}
+
+static void bnxt_re_shutdown(struct auxiliary_device *adev)
+{
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
+ struct bnxt_re_dev *rdev;
+
+ rdev = en_info->rdev;
+ ib_unregister_device(&rdev->ibdev);
+ bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
+}
+
+static const struct auxiliary_device_id bnxt_re_id_table[] = {
+ { .name = BNXT_ADEV_NAME ".rdma", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, bnxt_re_id_table);
+
+static struct auxiliary_driver bnxt_re_driver = {
+ .name = "rdma",
+ .probe = bnxt_re_probe,
+ .remove = bnxt_re_remove,
+ .shutdown = bnxt_re_shutdown,
+ .suspend = bnxt_re_suspend,
+ .resume = bnxt_re_resume,
+ .id_table = bnxt_re_id_table,
+};
+
+static int __init bnxt_re_mod_init(void)
+{
+ int rc;
+
+ pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version);
+ bnxt_re_register_debugfs();
+
+ rc = auxiliary_driver_register(&bnxt_re_driver);
+ if (rc) {
+ pr_err("%s: Failed to register auxiliary driver\n",
+ ROCE_DRV_MODULE_NAME);
+ goto err_debug;
+ }
+ return 0;
+err_debug:
+ bnxt_re_unregister_debugfs();
+ return rc;
+}
+
+static void __exit bnxt_re_mod_exit(void)
+{
+ auxiliary_driver_unregister(&bnxt_re_driver);
+ bnxt_re_unregister_debugfs();
+}
+
+module_init(bnxt_re_mod_init);
+module_exit(bnxt_re_mod_exit);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
new file mode 100644
index 000000000000..c88f049136fc
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -0,0 +1,3226 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Fast Path Operators
+ */
+
+#define dev_fmt(fmt) "QPLIB: " fmt
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/prefetch.h>
+#include <linux/if_ether.h>
+#include <rdma/ib_mad.h>
+
+#include "roce_hsi.h"
+
+#include "qplib_res.h"
+#include "qplib_rcfw.h"
+#include "qplib_sp.h"
+#include "qplib_fp.h"
+#include <rdma/ib_addr.h>
+#include "bnxt_ulp.h"
+#include "bnxt_re.h"
+#include "ib_verbs.h"
+
+static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp);
+
+static void bnxt_qplib_cancel_phantom_processing(struct bnxt_qplib_qp *qp)
+{
+ qp->sq.condition = false;
+ qp->sq.send_phantom = false;
+ qp->sq.single = false;
+}
+
+/* Flush list */
+static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_cq *scq, *rcq;
+
+ scq = qp->scq;
+ rcq = qp->rcq;
+
+ if (!qp->sq.flushed) {
+ dev_dbg(&scq->hwq.pdev->dev,
+ "FP: Adding to SQ Flush list = %p\n", qp);
+ bnxt_qplib_cancel_phantom_processing(qp);
+ list_add_tail(&qp->sq_flush, &scq->sqf_head);
+ qp->sq.flushed = true;
+ }
+ if (!qp->srq) {
+ if (!qp->rq.flushed) {
+ dev_dbg(&rcq->hwq.pdev->dev,
+ "FP: Adding to RQ Flush list = %p\n", qp);
+ list_add_tail(&qp->rq_flush, &rcq->rqf_head);
+ qp->rq.flushed = true;
+ }
+ }
+}
+
+static void bnxt_qplib_acquire_cq_flush_locks(struct bnxt_qplib_qp *qp,
+ unsigned long *flags)
+ __acquires(&qp->scq->flush_lock) __acquires(&qp->rcq->flush_lock)
+{
+ spin_lock_irqsave(&qp->scq->flush_lock, *flags);
+ if (qp->scq == qp->rcq)
+ __acquire(&qp->rcq->flush_lock);
+ else
+ spin_lock(&qp->rcq->flush_lock);
+}
+
+static void bnxt_qplib_release_cq_flush_locks(struct bnxt_qplib_qp *qp,
+ unsigned long *flags)
+ __releases(&qp->scq->flush_lock) __releases(&qp->rcq->flush_lock)
+{
+ if (qp->scq == qp->rcq)
+ __release(&qp->rcq->flush_lock);
+ else
+ spin_unlock(&qp->rcq->flush_lock);
+ spin_unlock_irqrestore(&qp->scq->flush_lock, *flags);
+}
+
+void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
+{
+ unsigned long flags;
+
+ bnxt_qplib_acquire_cq_flush_locks(qp, &flags);
+ __bnxt_qplib_add_flush_qp(qp);
+ bnxt_qplib_release_cq_flush_locks(qp, &flags);
+}
+
+static void __bnxt_qplib_del_flush_qp(struct bnxt_qplib_qp *qp)
+{
+ if (qp->sq.flushed) {
+ qp->sq.flushed = false;
+ list_del(&qp->sq_flush);
+ }
+ if (!qp->srq) {
+ if (qp->rq.flushed) {
+ qp->rq.flushed = false;
+ list_del(&qp->rq_flush);
+ }
+ }
+}
+
+void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp)
+{
+ unsigned long flags;
+
+ bnxt_qplib_acquire_cq_flush_locks(qp, &flags);
+ __clean_cq(qp->scq, (u64)(unsigned long)qp);
+ qp->sq.hwq.prod = 0;
+ qp->sq.hwq.cons = 0;
+ __clean_cq(qp->rcq, (u64)(unsigned long)qp);
+ qp->rq.hwq.prod = 0;
+ qp->rq.hwq.cons = 0;
+
+ __bnxt_qplib_del_flush_qp(qp);
+ bnxt_qplib_release_cq_flush_locks(qp, &flags);
+}
+
+static void bnxt_qpn_cqn_sched_task(struct work_struct *work)
+{
+ struct bnxt_qplib_nq_work *nq_work =
+ container_of(work, struct bnxt_qplib_nq_work, work);
+
+ struct bnxt_qplib_cq *cq = nq_work->cq;
+ struct bnxt_qplib_nq *nq = nq_work->nq;
+
+ if (cq && nq) {
+ spin_lock_bh(&cq->compl_lock);
+ if (atomic_read(&cq->arm_state) && nq->cqn_handler) {
+ dev_dbg(&nq->pdev->dev,
+ "%s:Trigger cq = %p event nq = %p\n",
+ __func__, cq, nq);
+ nq->cqn_handler(nq, cq);
+ }
+ spin_unlock_bh(&cq->compl_lock);
+ }
+ kfree(nq_work);
+}
+
+static void bnxt_qplib_free_qp_hdr_buf(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct bnxt_qplib_q *sq = &qp->sq;
+
+ if (qp->rq_hdr_buf)
+ dma_free_coherent(&res->pdev->dev,
+ rq->max_wqe * qp->rq_hdr_buf_size,
+ qp->rq_hdr_buf, qp->rq_hdr_buf_map);
+ if (qp->sq_hdr_buf)
+ dma_free_coherent(&res->pdev->dev,
+ sq->max_wqe * qp->sq_hdr_buf_size,
+ qp->sq_hdr_buf, qp->sq_hdr_buf_map);
+ qp->rq_hdr_buf = NULL;
+ qp->sq_hdr_buf = NULL;
+ qp->rq_hdr_buf_map = 0;
+ qp->sq_hdr_buf_map = 0;
+ qp->sq_hdr_buf_size = 0;
+ qp->rq_hdr_buf_size = 0;
+}
+
+static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct bnxt_qplib_q *sq = &qp->sq;
+ int rc = 0;
+
+ if (qp->sq_hdr_buf_size && sq->max_wqe) {
+ qp->sq_hdr_buf = dma_alloc_coherent(&res->pdev->dev,
+ sq->max_wqe * qp->sq_hdr_buf_size,
+ &qp->sq_hdr_buf_map, GFP_KERNEL);
+ if (!qp->sq_hdr_buf) {
+ rc = -ENOMEM;
+ dev_err(&res->pdev->dev,
+ "Failed to create sq_hdr_buf\n");
+ goto fail;
+ }
+ }
+
+ if (qp->rq_hdr_buf_size && rq->max_wqe) {
+ qp->rq_hdr_buf = dma_alloc_coherent(&res->pdev->dev,
+ rq->max_wqe *
+ qp->rq_hdr_buf_size,
+ &qp->rq_hdr_buf_map,
+ GFP_KERNEL);
+ if (!qp->rq_hdr_buf) {
+ rc = -ENOMEM;
+ dev_err(&res->pdev->dev,
+ "Failed to create rq_hdr_buf\n");
+ goto fail;
+ }
+ }
+ return 0;
+
+fail:
+ bnxt_qplib_free_qp_hdr_buf(res, qp);
+ return rc;
+}
+
+static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq)
+{
+ struct bnxt_qplib_hwq *hwq = &nq->hwq;
+ struct nq_base *nqe, **nq_ptr;
+ int budget = nq->budget;
+ uintptr_t q_handle;
+ u16 type;
+
+ spin_lock_bh(&hwq->lock);
+ /* Service the NQ until empty */
+ while (budget--) {
+ nq_ptr = (struct nq_base **)hwq->pbl_ptr;
+ nqe = &nq_ptr[NQE_PG(hwq->cons)][NQE_IDX(hwq->cons)];
+ if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags))
+ break;
+
+ /*
+ * The valid test of the entry must be done first before
+ * reading any further.
+ */
+ dma_rmb();
+
+ type = le16_to_cpu(nqe->info10_type) & NQ_BASE_TYPE_MASK;
+ switch (type) {
+ case NQ_BASE_TYPE_CQ_NOTIFICATION:
+ {
+ struct nq_cn *nqcne = (struct nq_cn *)nqe;
+
+ q_handle = le32_to_cpu(nqcne->cq_handle_low);
+ q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high)
+ << 32;
+ if ((unsigned long)cq == q_handle) {
+ nqcne->cq_handle_low = 0;
+ nqcne->cq_handle_high = 0;
+ cq->cnq_events++;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+ 1, &nq->nq_db.dbinfo.flags);
+ }
+ spin_unlock_bh(&hwq->lock);
+}
+
+/* Wait for receiving all NQEs for this CQ and clean the NQEs associated with
+ * this CQ.
+ */
+static void __wait_for_all_nqes(struct bnxt_qplib_cq *cq, u16 cnq_events)
+{
+ u32 retry_cnt = 100;
+
+ while (retry_cnt--) {
+ if (cnq_events == cq->cnq_events)
+ return;
+ usleep_range(50, 100);
+ clean_nq(cq->nq, cq);
+ }
+}
+
+static void bnxt_qplib_service_nq(struct tasklet_struct *t)
+{
+ struct bnxt_qplib_nq *nq = from_tasklet(nq, t, nq_tasklet);
+ struct bnxt_qplib_hwq *hwq = &nq->hwq;
+ struct bnxt_qplib_cq *cq;
+ int budget = nq->budget;
+ struct nq_base *nqe;
+ uintptr_t q_handle;
+ u32 hw_polled = 0;
+ u16 type;
+
+ spin_lock_bh(&hwq->lock);
+ /* Service the NQ until empty */
+ while (budget--) {
+ nqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL);
+ if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags))
+ break;
+
+ /*
+ * The valid test of the entry must be done first before
+ * reading any further.
+ */
+ dma_rmb();
+
+ type = le16_to_cpu(nqe->info10_type) & NQ_BASE_TYPE_MASK;
+ switch (type) {
+ case NQ_BASE_TYPE_CQ_NOTIFICATION:
+ {
+ struct nq_cn *nqcne = (struct nq_cn *)nqe;
+ struct bnxt_re_cq *cq_p;
+
+ q_handle = le32_to_cpu(nqcne->cq_handle_low);
+ q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high)
+ << 32;
+ cq = (struct bnxt_qplib_cq *)(unsigned long)q_handle;
+ if (!cq)
+ break;
+ cq->toggle = (le16_to_cpu(nqe->info10_type) &
+ NQ_CN_TOGGLE_MASK) >> NQ_CN_TOGGLE_SFT;
+ cq->dbinfo.toggle = cq->toggle;
+ cq_p = container_of(cq, struct bnxt_re_cq, qplib_cq);
+ if (cq_p->uctx_cq_page)
+ *((u32 *)cq_p->uctx_cq_page) = cq->toggle;
+
+ bnxt_qplib_armen_db(&cq->dbinfo,
+ DBC_DBC_TYPE_CQ_ARMENA);
+ spin_lock_bh(&cq->compl_lock);
+ atomic_set(&cq->arm_state, 0);
+ if (nq->cqn_handler(nq, (cq)))
+ dev_warn(&nq->pdev->dev,
+ "cqn - type 0x%x not handled\n", type);
+ cq->cnq_events++;
+ spin_unlock_bh(&cq->compl_lock);
+ break;
+ }
+ case NQ_BASE_TYPE_SRQ_EVENT:
+ {
+ struct bnxt_qplib_srq *srq;
+ struct bnxt_re_srq *srq_p;
+ struct nq_srq_event *nqsrqe =
+ (struct nq_srq_event *)nqe;
+
+ q_handle = le32_to_cpu(nqsrqe->srq_handle_low);
+ q_handle |= (u64)le32_to_cpu(nqsrqe->srq_handle_high)
+ << 32;
+ srq = (struct bnxt_qplib_srq *)q_handle;
+ srq->toggle = (le16_to_cpu(nqe->info10_type) & NQ_CN_TOGGLE_MASK)
+ >> NQ_CN_TOGGLE_SFT;
+ srq->dbinfo.toggle = srq->toggle;
+ srq_p = container_of(srq, struct bnxt_re_srq, qplib_srq);
+ if (srq_p->uctx_srq_page)
+ *((u32 *)srq_p->uctx_srq_page) = srq->toggle;
+ bnxt_qplib_armen_db(&srq->dbinfo,
+ DBC_DBC_TYPE_SRQ_ARMENA);
+ if (nq->srqn_handler(nq,
+ (struct bnxt_qplib_srq *)q_handle,
+ nqsrqe->event))
+ dev_warn(&nq->pdev->dev,
+ "SRQ event 0x%x not handled\n",
+ nqsrqe->event);
+ break;
+ }
+ case NQ_BASE_TYPE_DBQ_EVENT:
+ break;
+ default:
+ dev_warn(&nq->pdev->dev,
+ "nqe with type = 0x%x not handled\n", type);
+ break;
+ }
+ hw_polled++;
+ bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+ 1, &nq->nq_db.dbinfo.flags);
+ }
+ if (hw_polled)
+ bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true);
+ spin_unlock_bh(&hwq->lock);
+}
+
+/* bnxt_re_synchronize_nq - self polling notification queue.
+ * @nq - notification queue pointer
+ *
+ * This function will start polling entries of a given notification queue
+ * for all pending entries.
+ * This function is useful to synchronize notification entries while resources
+ * are going away.
+ */
+
+void bnxt_re_synchronize_nq(struct bnxt_qplib_nq *nq)
+{
+ int budget = nq->budget;
+
+ nq->budget = nq->hwq.max_elements;
+ bnxt_qplib_service_nq(&nq->nq_tasklet);
+ nq->budget = budget;
+}
+
+static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
+{
+ struct bnxt_qplib_nq *nq = dev_instance;
+ struct bnxt_qplib_hwq *hwq = &nq->hwq;
+ u32 sw_cons;
+
+ /* Prefetch the NQ element */
+ sw_cons = HWQ_CMP(hwq->cons, hwq);
+ prefetch(bnxt_qplib_get_qe(hwq, sw_cons, NULL));
+
+ /* Fan out to CPU affinitized kthreads? */
+ tasklet_schedule(&nq->nq_tasklet);
+
+ return IRQ_HANDLED;
+}
+
+void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill)
+{
+ if (!nq->requested)
+ return;
+
+ nq->requested = false;
+ /* Mask h/w interrupt */
+ bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, false);
+ /* Sync with last running IRQ handler */
+ synchronize_irq(nq->msix_vec);
+ irq_set_affinity_hint(nq->msix_vec, NULL);
+ free_irq(nq->msix_vec, nq);
+ kfree(nq->name);
+ nq->name = NULL;
+
+ if (kill)
+ tasklet_kill(&nq->nq_tasklet);
+ tasklet_disable(&nq->nq_tasklet);
+}
+
+void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
+{
+ if (nq->cqn_wq) {
+ destroy_workqueue(nq->cqn_wq);
+ nq->cqn_wq = NULL;
+ }
+
+ /* Make sure the HW is stopped! */
+ bnxt_qplib_nq_stop_irq(nq, true);
+
+ if (nq->nq_db.reg.bar_reg) {
+ iounmap(nq->nq_db.reg.bar_reg);
+ nq->nq_db.reg.bar_reg = NULL;
+ }
+
+ nq->cqn_handler = NULL;
+ nq->srqn_handler = NULL;
+ nq->msix_vec = 0;
+}
+
+int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
+ int msix_vector, bool need_init)
+{
+ struct bnxt_qplib_res *res = nq->res;
+ int rc;
+
+ if (nq->requested)
+ return -EFAULT;
+
+ nq->msix_vec = msix_vector;
+ if (need_init)
+ tasklet_setup(&nq->nq_tasklet, bnxt_qplib_service_nq);
+ else
+ tasklet_enable(&nq->nq_tasklet);
+
+ nq->name = kasprintf(GFP_KERNEL, "bnxt_re-nq-%d@pci:%s",
+ nq_indx, pci_name(res->pdev));
+ if (!nq->name)
+ return -ENOMEM;
+ rc = request_irq(nq->msix_vec, bnxt_qplib_nq_irq, 0, nq->name, nq);
+ if (rc) {
+ kfree(nq->name);
+ nq->name = NULL;
+ tasklet_disable(&nq->nq_tasklet);
+ return rc;
+ }
+
+ cpumask_clear(&nq->mask);
+ cpumask_set_cpu(nq_indx, &nq->mask);
+ rc = irq_set_affinity_hint(nq->msix_vec, &nq->mask);
+ if (rc) {
+ dev_warn(&nq->pdev->dev,
+ "set affinity failed; vector: %d nq_idx: %d\n",
+ nq->msix_vec, nq_indx);
+ }
+ nq->requested = true;
+ bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, res->cctx, true);
+
+ return rc;
+}
+
+static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq, u32 reg_offt)
+{
+ resource_size_t reg_base;
+ struct bnxt_qplib_nq_db *nq_db;
+ struct pci_dev *pdev;
+
+ pdev = nq->pdev;
+ nq_db = &nq->nq_db;
+
+ nq_db->dbinfo.flags = 0;
+ nq_db->reg.bar_id = NQ_CONS_PCI_BAR_REGION;
+ nq_db->reg.bar_base = pci_resource_start(pdev, nq_db->reg.bar_id);
+ if (!nq_db->reg.bar_base) {
+ dev_err(&pdev->dev, "QPLIB: NQ BAR region %d resc start is 0!",
+ nq_db->reg.bar_id);
+ return -ENOMEM;
+ }
+
+ reg_base = nq_db->reg.bar_base + reg_offt;
+ /* Unconditionally map 8 bytes to support 57500 series */
+ nq_db->reg.len = 8;
+ nq_db->reg.bar_reg = ioremap(reg_base, nq_db->reg.len);
+ if (!nq_db->reg.bar_reg) {
+ dev_err(&pdev->dev, "QPLIB: NQ BAR region %d mapping failed",
+ nq_db->reg.bar_id);
+ return -ENOMEM;
+ }
+
+ nq_db->dbinfo.db = nq_db->reg.bar_reg;
+ nq_db->dbinfo.hwq = &nq->hwq;
+ nq_db->dbinfo.xid = nq->ring_id;
+
+ return 0;
+}
+
+int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
+ int nq_idx, int msix_vector, int bar_reg_offset,
+ cqn_handler_t cqn_handler,
+ srqn_handler_t srqn_handler)
+{
+ int rc;
+
+ nq->pdev = pdev;
+ nq->cqn_handler = cqn_handler;
+ nq->srqn_handler = srqn_handler;
+ nq->load = 0;
+
+ /* Have a task to schedule CQ notifiers in post send case */
+ nq->cqn_wq = create_singlethread_workqueue("bnxt_qplib_nq");
+ if (!nq->cqn_wq)
+ return -ENOMEM;
+
+ rc = bnxt_qplib_map_nq_db(nq, bar_reg_offset);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true);
+ if (rc) {
+ dev_err(&nq->pdev->dev,
+ "Failed to request irq for nq-idx %d\n", nq_idx);
+ goto fail;
+ }
+
+ return 0;
+fail:
+ bnxt_qplib_disable_nq(nq);
+ return rc;
+}
+
+void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq)
+{
+ if (nq->hwq.max_elements) {
+ bnxt_qplib_free_hwq(nq->res, &nq->hwq);
+ nq->hwq.max_elements = 0;
+ }
+}
+
+int bnxt_qplib_alloc_nq(struct bnxt_qplib_res *res, struct bnxt_qplib_nq *nq)
+{
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_sg_info sginfo = {};
+
+ nq->pdev = res->pdev;
+ nq->res = res;
+ if (!nq->hwq.max_elements ||
+ nq->hwq.max_elements > BNXT_QPLIB_NQE_MAX_CNT)
+ nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
+
+ sginfo.pgsize = PAGE_SIZE;
+ sginfo.pgshft = PAGE_SHIFT;
+ hwq_attr.res = res;
+ hwq_attr.sginfo = &sginfo;
+ hwq_attr.depth = nq->hwq.max_elements;
+ hwq_attr.stride = sizeof(struct nq_base);
+ hwq_attr.type = bnxt_qplib_get_hwq_type(nq->res);
+ if (bnxt_qplib_alloc_init_hwq(&nq->hwq, &hwq_attr)) {
+ dev_err(&nq->pdev->dev, "FP NQ allocation failed");
+ return -ENOMEM;
+ }
+ nq->budget = 8;
+ return 0;
+}
+
+/* SRQ */
+void bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_srq *srq)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_destroy_srq_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_destroy_srq req = {};
+ int rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DESTROY_SRQ,
+ sizeof(req));
+
+ /* Configure the request */
+ req.srq_cid = cpu_to_le32(srq->id);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ kfree(srq->swq);
+ if (rc)
+ return;
+ bnxt_qplib_free_hwq(res, &srq->hwq);
+}
+
+int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_srq *srq)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct creq_create_srq_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_create_srq req = {};
+ struct bnxt_qplib_pbl *pbl;
+ u16 pg_sz_lvl;
+ int rc, idx;
+
+ hwq_attr.res = res;
+ hwq_attr.sginfo = &srq->sg_info;
+ hwq_attr.depth = srq->max_wqe;
+ hwq_attr.stride = srq->wqe_size;
+ hwq_attr.type = HWQ_TYPE_QUEUE;
+ rc = bnxt_qplib_alloc_init_hwq(&srq->hwq, &hwq_attr);
+ if (rc)
+ return rc;
+ srq->dbinfo.flags = 0;
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_CREATE_SRQ,
+ sizeof(req));
+
+ /* Configure the request */
+ req.dpi = cpu_to_le32(srq->dpi->dpi);
+ req.srq_handle = cpu_to_le64((uintptr_t)srq);
+
+ req.srq_size = cpu_to_le16((u16)srq->hwq.max_elements);
+ pbl = &srq->hwq.pbl[PBL_LVL_0];
+ pg_sz_lvl = ((u16)bnxt_qplib_base_pg_size(&srq->hwq) <<
+ CMDQ_CREATE_SRQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (srq->hwq.level & CMDQ_CREATE_SRQ_LVL_MASK) <<
+ CMDQ_CREATE_SRQ_LVL_SFT;
+ req.pg_size_lvl = cpu_to_le16(pg_sz_lvl);
+ req.pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+ req.pd_id = cpu_to_le32(srq->pd->id);
+ req.eventq_id = cpu_to_le16(srq->eventq_hw_ring_id);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ goto fail;
+
+ spin_lock_init(&srq->lock);
+ srq->start_idx = 0;
+ srq->last_idx = srq->hwq.max_elements - 1;
+ if (!srq->hwq.is_user) {
+ srq->swq = kcalloc(srq->hwq.max_elements, sizeof(*srq->swq),
+ GFP_KERNEL);
+ if (!srq->swq) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ for (idx = 0; idx < srq->hwq.max_elements; idx++)
+ srq->swq[idx].next_idx = idx + 1;
+ srq->swq[srq->last_idx].next_idx = -1;
+ }
+
+ srq->id = le32_to_cpu(resp.xid);
+ srq->dbinfo.hwq = &srq->hwq;
+ srq->dbinfo.xid = srq->id;
+ srq->dbinfo.db = srq->dpi->dbr;
+ srq->dbinfo.max_slot = 1;
+ srq->dbinfo.priv_db = res->dpi_tbl.priv_db;
+ bnxt_qplib_armen_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ_ARMENA);
+
+ return 0;
+fail:
+ bnxt_qplib_free_hwq(res, &srq->hwq);
+ kfree(srq->swq);
+
+ return rc;
+}
+
+int bnxt_qplib_query_srq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_srq *srq)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_query_srq_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ struct creq_query_srq_resp_sb *sb;
+ struct cmdq_query_srq req = {};
+ int rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_QUERY_SRQ,
+ sizeof(req));
+
+ /* Configure the request */
+ sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ req.srq_cid = cpu_to_le32(srq->id);
+ sb = sbuf.sb;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (!rc)
+ srq->threshold = le16_to_cpu(sb->srq_limit);
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
+
+ return rc;
+}
+
+int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
+ struct bnxt_qplib_swqe *wqe)
+{
+ struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
+ struct rq_wqe *srqe;
+ struct sq_sge *hw_sge;
+ int i, next;
+
+ spin_lock(&srq_hwq->lock);
+ if (srq->start_idx == srq->last_idx) {
+ dev_err(&srq_hwq->pdev->dev,
+ "FP: SRQ (0x%x) is full!\n", srq->id);
+ spin_unlock(&srq_hwq->lock);
+ return -EINVAL;
+ }
+ next = srq->start_idx;
+ srq->start_idx = srq->swq[next].next_idx;
+ spin_unlock(&srq_hwq->lock);
+
+ srqe = bnxt_qplib_get_qe(srq_hwq, srq_hwq->prod, NULL);
+ memset(srqe, 0, srq->wqe_size);
+ /* Calculate wqe_size16 and data_len */
+ for (i = 0, hw_sge = (struct sq_sge *)srqe->data;
+ i < wqe->num_sge; i++, hw_sge++) {
+ hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr);
+ hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey);
+ hw_sge->size = cpu_to_le32(wqe->sg_list[i].size);
+ }
+ srqe->wqe_type = wqe->type;
+ srqe->flags = wqe->flags;
+ srqe->wqe_size = wqe->num_sge +
+ ((offsetof(typeof(*srqe), data) + 15) >> 4);
+ srqe->wr_id[0] = cpu_to_le32((u32)next);
+ srq->swq[next].wr_id = wqe->wr_id;
+
+ bnxt_qplib_hwq_incr_prod(&srq->dbinfo, srq_hwq, srq->dbinfo.max_slot);
+
+ /* Ring DB */
+ bnxt_qplib_ring_prod_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ);
+
+ return 0;
+}
+
+/* QP */
+
+static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que)
+{
+ int indx;
+
+ que->swq = kcalloc(que->max_sw_wqe, sizeof(*que->swq), GFP_KERNEL);
+ if (!que->swq)
+ return -ENOMEM;
+
+ que->swq_start = 0;
+ que->swq_last = que->max_sw_wqe - 1;
+ for (indx = 0; indx < que->max_sw_wqe; indx++)
+ que->swq[indx].next_idx = indx + 1;
+ que->swq[que->swq_last].next_idx = 0; /* Make it circular */
+ que->swq_last = 0;
+
+ return 0;
+}
+
+int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_create_qp1_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct bnxt_qplib_q *sq = &qp->sq;
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct cmdq_create_qp1 req = {};
+ struct bnxt_qplib_pbl *pbl;
+ u32 qp_flags = 0;
+ u8 pg_sz_lvl;
+ u32 tbl_indx;
+ int rc;
+
+ sq->dbinfo.flags = 0;
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_CREATE_QP1,
+ sizeof(req));
+ /* General */
+ req.type = qp->type;
+ req.dpi = cpu_to_le32(qp->dpi->dpi);
+ req.qp_handle = cpu_to_le64(qp->qp_handle);
+
+ /* SQ */
+ hwq_attr.res = res;
+ hwq_attr.sginfo = &sq->sg_info;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(sq, qp->wqe_mode, false);
+ hwq_attr.type = HWQ_TYPE_QUEUE;
+ rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr);
+ if (rc)
+ return rc;
+
+ rc = bnxt_qplib_alloc_init_swq(sq);
+ if (rc)
+ goto fail_sq;
+
+ req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode));
+ pbl = &sq->hwq.pbl[PBL_LVL_0];
+ req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+ pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) <<
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK);
+ req.sq_pg_size_sq_lvl = pg_sz_lvl;
+ req.sq_fwo_sq_sge =
+ cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) <<
+ CMDQ_CREATE_QP1_SQ_SGE_SFT);
+ req.scq_cid = cpu_to_le32(qp->scq->id);
+
+ /* RQ */
+ if (rq->max_wqe) {
+ rq->dbinfo.flags = 0;
+ hwq_attr.res = res;
+ hwq_attr.sginfo = &rq->sg_info;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(rq, qp->wqe_mode, false);
+ hwq_attr.type = HWQ_TYPE_QUEUE;
+ rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr);
+ if (rc)
+ goto sq_swq;
+ rc = bnxt_qplib_alloc_init_swq(rq);
+ if (rc)
+ goto fail_rq;
+ req.rq_size = cpu_to_le32(rq->max_wqe);
+ pbl = &rq->hwq.pbl[PBL_LVL_0];
+ req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+ pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) <<
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK);
+ req.rq_pg_size_rq_lvl = pg_sz_lvl;
+ req.rq_fwo_rq_sge =
+ cpu_to_le16((rq->max_sge &
+ CMDQ_CREATE_QP1_RQ_SGE_MASK) <<
+ CMDQ_CREATE_QP1_RQ_SGE_SFT);
+ }
+ req.rcq_cid = cpu_to_le32(qp->rcq->id);
+ /* Header buffer - allow hdr_buf pass in */
+ rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp);
+ if (rc) {
+ rc = -ENOMEM;
+ goto rq_rwq;
+ }
+ qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE;
+ req.qp_flags = cpu_to_le32(qp_flags);
+ req.pd_id = cpu_to_le32(qp->pd->id);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ goto fail;
+
+ qp->id = le32_to_cpu(resp.xid);
+ qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
+ qp->cctx = res->cctx;
+ sq->dbinfo.hwq = &sq->hwq;
+ sq->dbinfo.xid = qp->id;
+ sq->dbinfo.db = qp->dpi->dbr;
+ sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode);
+ if (rq->max_wqe) {
+ rq->dbinfo.hwq = &rq->hwq;
+ rq->dbinfo.xid = qp->id;
+ rq->dbinfo.db = qp->dpi->dbr;
+ rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size);
+ }
+ tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw);
+ rcfw->qp_tbl[tbl_indx].qp_id = qp->id;
+ rcfw->qp_tbl[tbl_indx].qp_handle = (void *)qp;
+
+ return 0;
+
+fail:
+ bnxt_qplib_free_qp_hdr_buf(res, qp);
+rq_rwq:
+ kfree(rq->swq);
+fail_rq:
+ bnxt_qplib_free_hwq(res, &rq->hwq);
+sq_swq:
+ kfree(sq->swq);
+fail_sq:
+ bnxt_qplib_free_hwq(res, &sq->hwq);
+ return rc;
+}
+
+static void bnxt_qplib_init_psn_ptr(struct bnxt_qplib_qp *qp, int size)
+{
+ struct bnxt_qplib_hwq *hwq;
+ struct bnxt_qplib_q *sq;
+ u64 fpsne, psn_pg;
+ u16 indx_pad = 0;
+
+ sq = &qp->sq;
+ hwq = &sq->hwq;
+ /* First psn entry */
+ fpsne = (u64)bnxt_qplib_get_qe(hwq, hwq->depth, &psn_pg);
+ if (!IS_ALIGNED(fpsne, PAGE_SIZE))
+ indx_pad = (fpsne & ~PAGE_MASK) / size;
+ hwq->pad_pgofft = indx_pad;
+ hwq->pad_pg = (u64 *)psn_pg;
+ hwq->pad_stride = size;
+}
+
+int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_sg_info sginfo = {};
+ struct creq_create_qp_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct bnxt_qplib_q *sq = &qp->sq;
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct cmdq_create_qp req = {};
+ int rc, req_size, psn_sz = 0;
+ struct bnxt_qplib_hwq *xrrq;
+ struct bnxt_qplib_pbl *pbl;
+ u32 qp_flags = 0;
+ u8 pg_sz_lvl;
+ u32 tbl_indx;
+ u16 nsge;
+
+ qp->is_host_msn_tbl = _is_host_msn_table(res->dattr->dev_cap_flags2);
+ sq->dbinfo.flags = 0;
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_CREATE_QP,
+ sizeof(req));
+
+ /* General */
+ req.type = qp->type;
+ req.dpi = cpu_to_le32(qp->dpi->dpi);
+ req.qp_handle = cpu_to_le64(qp->qp_handle);
+
+ /* SQ */
+ if (qp->type == CMDQ_CREATE_QP_TYPE_RC) {
+ psn_sz = bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ?
+ sizeof(struct sq_psn_search_ext) :
+ sizeof(struct sq_psn_search);
+
+ if (qp->is_host_msn_tbl) {
+ psn_sz = sizeof(struct sq_msn_search);
+ qp->msn = 0;
+ }
+ }
+
+ hwq_attr.res = res;
+ hwq_attr.sginfo = &sq->sg_info;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(sq, qp->wqe_mode, true);
+ hwq_attr.aux_stride = psn_sz;
+ hwq_attr.aux_depth = psn_sz ? bnxt_qplib_set_sq_size(sq, qp->wqe_mode)
+ : 0;
+ /* Update msn tbl size */
+ if (qp->is_host_msn_tbl && psn_sz) {
+ if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC)
+ hwq_attr.aux_depth =
+ roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode));
+ else
+ hwq_attr.aux_depth =
+ roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode)) / 2;
+ qp->msn_tbl_sz = hwq_attr.aux_depth;
+ qp->msn = 0;
+ }
+
+ hwq_attr.type = HWQ_TYPE_QUEUE;
+ rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr);
+ if (rc)
+ return rc;
+
+ if (!sq->hwq.is_user) {
+ rc = bnxt_qplib_alloc_init_swq(sq);
+ if (rc)
+ goto fail_sq;
+
+ if (psn_sz)
+ bnxt_qplib_init_psn_ptr(qp, psn_sz);
+ }
+ req.sq_size = cpu_to_le32(bnxt_qplib_set_sq_size(sq, qp->wqe_mode));
+ pbl = &sq->hwq.pbl[PBL_LVL_0];
+ req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+ pg_sz_lvl = (bnxt_qplib_base_pg_size(&sq->hwq) <<
+ CMDQ_CREATE_QP_SQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK);
+ req.sq_pg_size_sq_lvl = pg_sz_lvl;
+ req.sq_fwo_sq_sge =
+ cpu_to_le16(((sq->max_sge & CMDQ_CREATE_QP_SQ_SGE_MASK) <<
+ CMDQ_CREATE_QP_SQ_SGE_SFT) | 0);
+ req.scq_cid = cpu_to_le32(qp->scq->id);
+
+ /* RQ */
+ if (!qp->srq) {
+ rq->dbinfo.flags = 0;
+ hwq_attr.res = res;
+ hwq_attr.sginfo = &rq->sg_info;
+ hwq_attr.stride = sizeof(struct sq_sge);
+ hwq_attr.depth = bnxt_qplib_get_depth(rq, qp->wqe_mode, false);
+ hwq_attr.aux_stride = 0;
+ hwq_attr.aux_depth = 0;
+ hwq_attr.type = HWQ_TYPE_QUEUE;
+ rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr);
+ if (rc)
+ goto sq_swq;
+ if (!rq->hwq.is_user) {
+ rc = bnxt_qplib_alloc_init_swq(rq);
+ if (rc)
+ goto fail_rq;
+ }
+
+ req.rq_size = cpu_to_le32(rq->max_wqe);
+ pbl = &rq->hwq.pbl[PBL_LVL_0];
+ req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+ pg_sz_lvl = (bnxt_qplib_base_pg_size(&rq->hwq) <<
+ CMDQ_CREATE_QP_RQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK);
+ req.rq_pg_size_rq_lvl = pg_sz_lvl;
+ nsge = (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ 6 : rq->max_sge;
+ req.rq_fwo_rq_sge =
+ cpu_to_le16(((nsge &
+ CMDQ_CREATE_QP_RQ_SGE_MASK) <<
+ CMDQ_CREATE_QP_RQ_SGE_SFT) | 0);
+ } else {
+ /* SRQ */
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED;
+ req.srq_cid = cpu_to_le32(qp->srq->id);
+ }
+ req.rcq_cid = cpu_to_le32(qp->rcq->id);
+
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE;
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED;
+ if (qp->sig_type)
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION;
+ if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE)
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED;
+ if (bnxt_ext_stats_supported(res->cctx, res->dattr->dev_cap_flags, res->is_vf))
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED;
+
+ req.qp_flags = cpu_to_le32(qp_flags);
+
+ /* ORRQ and IRRQ */
+ if (psn_sz) {
+ xrrq = &qp->orrq;
+ xrrq->max_elements =
+ ORD_LIMIT_TO_ORRQ_SLOTS(qp->max_rd_atomic);
+ req_size = xrrq->max_elements *
+ BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE + PAGE_SIZE - 1;
+ req_size &= ~(PAGE_SIZE - 1);
+ sginfo.pgsize = req_size;
+ sginfo.pgshft = PAGE_SHIFT;
+
+ hwq_attr.res = res;
+ hwq_attr.sginfo = &sginfo;
+ hwq_attr.depth = xrrq->max_elements;
+ hwq_attr.stride = BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE;
+ hwq_attr.aux_stride = 0;
+ hwq_attr.aux_depth = 0;
+ hwq_attr.type = HWQ_TYPE_CTX;
+ rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr);
+ if (rc)
+ goto rq_swq;
+ pbl = &xrrq->pbl[PBL_LVL_0];
+ req.orrq_addr = cpu_to_le64(pbl->pg_map_arr[0]);
+
+ xrrq = &qp->irrq;
+ xrrq->max_elements = IRD_LIMIT_TO_IRRQ_SLOTS(
+ qp->max_dest_rd_atomic);
+ req_size = xrrq->max_elements *
+ BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE + PAGE_SIZE - 1;
+ req_size &= ~(PAGE_SIZE - 1);
+ sginfo.pgsize = req_size;
+ hwq_attr.depth = xrrq->max_elements;
+ hwq_attr.stride = BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE;
+ rc = bnxt_qplib_alloc_init_hwq(xrrq, &hwq_attr);
+ if (rc)
+ goto fail_orrq;
+
+ pbl = &xrrq->pbl[PBL_LVL_0];
+ req.irrq_addr = cpu_to_le64(pbl->pg_map_arr[0]);
+ }
+ req.pd_id = cpu_to_le32(qp->pd->id);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ goto fail;
+
+ qp->id = le32_to_cpu(resp.xid);
+ qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
+ INIT_LIST_HEAD(&qp->sq_flush);
+ INIT_LIST_HEAD(&qp->rq_flush);
+ qp->cctx = res->cctx;
+ sq->dbinfo.hwq = &sq->hwq;
+ sq->dbinfo.xid = qp->id;
+ sq->dbinfo.db = qp->dpi->dbr;
+ sq->dbinfo.max_slot = bnxt_qplib_set_sq_max_slot(qp->wqe_mode);
+ if (rq->max_wqe) {
+ rq->dbinfo.hwq = &rq->hwq;
+ rq->dbinfo.xid = qp->id;
+ rq->dbinfo.db = qp->dpi->dbr;
+ rq->dbinfo.max_slot = bnxt_qplib_set_rq_max_slot(rq->wqe_size);
+ }
+ spin_lock_bh(&rcfw->tbl_lock);
+ tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw);
+ rcfw->qp_tbl[tbl_indx].qp_id = qp->id;
+ rcfw->qp_tbl[tbl_indx].qp_handle = (void *)qp;
+ spin_unlock_bh(&rcfw->tbl_lock);
+
+ return 0;
+fail:
+ bnxt_qplib_free_hwq(res, &qp->irrq);
+fail_orrq:
+ bnxt_qplib_free_hwq(res, &qp->orrq);
+rq_swq:
+ kfree(rq->swq);
+fail_rq:
+ bnxt_qplib_free_hwq(res, &rq->hwq);
+sq_swq:
+ kfree(sq->swq);
+fail_sq:
+ bnxt_qplib_free_hwq(res, &sq->hwq);
+ return rc;
+}
+
+static void __modify_flags_from_init_state(struct bnxt_qplib_qp *qp)
+{
+ switch (qp->state) {
+ case CMDQ_MODIFY_QP_NEW_STATE_RTR:
+ /* INIT->RTR, configure the path_mtu to the default
+ * 2048 if not being requested
+ */
+ if (!(qp->modify_flags &
+ CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU)) {
+ qp->modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
+ qp->path_mtu =
+ CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
+ }
+ /* Bono FW require the max_dest_rd_atomic to be >= 1 */
+ if (qp->max_dest_rd_atomic < 1)
+ qp->max_dest_rd_atomic = 1;
+ qp->modify_flags &= ~CMDQ_MODIFY_QP_MODIFY_MASK_SRC_MAC;
+ /* Bono FW 20.6.5 requires SGID_INDEX configuration */
+ if (!(qp->modify_flags &
+ CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX)) {
+ qp->modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX;
+ qp->ah.sgid_index = 0;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static void __modify_flags_from_rtr_state(struct bnxt_qplib_qp *qp)
+{
+ switch (qp->state) {
+ case CMDQ_MODIFY_QP_NEW_STATE_RTS:
+ /* Bono FW requires the max_rd_atomic to be >= 1 */
+ if (qp->max_rd_atomic < 1)
+ qp->max_rd_atomic = 1;
+ /* Bono FW does not allow PKEY_INDEX,
+ * DGID, FLOW_LABEL, SGID_INDEX, HOP_LIMIT,
+ * TRAFFIC_CLASS, DEST_MAC, PATH_MTU, RQ_PSN,
+ * MIN_RNR_TIMER, MAX_DEST_RD_ATOMIC, DEST_QP_ID
+ * modification
+ */
+ qp->modify_flags &=
+ ~(CMDQ_MODIFY_QP_MODIFY_MASK_PKEY |
+ CMDQ_MODIFY_QP_MODIFY_MASK_DGID |
+ CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL |
+ CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX |
+ CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT |
+ CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS |
+ CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC |
+ CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU |
+ CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN |
+ CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER |
+ CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC |
+ CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID);
+ break;
+ default:
+ break;
+ }
+}
+
+static void __filter_modify_flags(struct bnxt_qplib_qp *qp)
+{
+ switch (qp->cur_qp_state) {
+ case CMDQ_MODIFY_QP_NEW_STATE_RESET:
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_INIT:
+ __modify_flags_from_init_state(qp);
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_RTR:
+ __modify_flags_from_rtr_state(qp);
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_RTS:
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_SQD:
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_SQE:
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_ERR:
+ break;
+ default:
+ break;
+ }
+}
+
+static void bnxt_set_mandatory_attributes(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_qp *qp,
+ struct cmdq_modify_qp *req)
+{
+ u32 mandatory_flags = 0;
+
+ if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC)
+ mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS;
+
+ if (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_INIT &&
+ qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTR) {
+ if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC && qp->srq)
+ req->flags = cpu_to_le16(CMDQ_MODIFY_QP_FLAGS_SRQ_USED);
+ mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
+ }
+
+ if (_is_min_rnr_in_rtr_rts_mandatory(res->dattr->dev_cap_flags2) &&
+ (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_RTR &&
+ qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTS)) {
+ if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_RC)
+ mandatory_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER;
+ }
+
+ if (qp->type == CMDQ_MODIFY_QP_QP_TYPE_UD ||
+ qp->type == CMDQ_MODIFY_QP_QP_TYPE_GSI)
+ mandatory_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY;
+
+ qp->modify_flags |= mandatory_flags;
+ req->qp_type = qp->type;
+}
+
+static bool is_optimized_state_transition(struct bnxt_qplib_qp *qp)
+{
+ if ((qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_INIT &&
+ qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTR) ||
+ (qp->cur_qp_state == CMDQ_MODIFY_QP_NEW_STATE_RTR &&
+ qp->state == CMDQ_MODIFY_QP_NEW_STATE_RTS))
+ return true;
+
+ return false;
+}
+
+int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_sgid_tbl *sgid_tbl = &res->sgid_tbl;
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_modify_qp_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_modify_qp req = {};
+ u16 vlan_pcp_vlan_dei_vlan_id;
+ u32 temp32[4];
+ u32 bmask;
+ int rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_MODIFY_QP,
+ sizeof(req));
+
+ /* Filter out the qp_attr_mask based on the state->new transition */
+ __filter_modify_flags(qp);
+ if (qp->modify_flags & CMDQ_MODIFY_QP_MODIFY_MASK_STATE) {
+ /* Set mandatory attributes for INIT -> RTR and RTR -> RTS transition */
+ if (_is_optimize_modify_qp_supported(res->dattr->dev_cap_flags2) &&
+ is_optimized_state_transition(qp))
+ bnxt_set_mandatory_attributes(res, qp, &req);
+ }
+ bmask = qp->modify_flags;
+ req.modify_mask = cpu_to_le32(qp->modify_flags);
+ req.qp_cid = cpu_to_le32(qp->id);
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_STATE) {
+ req.network_type_en_sqd_async_notify_new_state =
+ (qp->state & CMDQ_MODIFY_QP_NEW_STATE_MASK) |
+ (qp->en_sqd_async_notify ?
+ CMDQ_MODIFY_QP_EN_SQD_ASYNC_NOTIFY : 0);
+ }
+ req.network_type_en_sqd_async_notify_new_state |= qp->nw_type;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS)
+ req.access = qp->access;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PKEY)
+ req.pkey = cpu_to_le16(IB_DEFAULT_PKEY_FULL);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_QKEY)
+ req.qkey = cpu_to_le32(qp->qkey);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DGID) {
+ memcpy(temp32, qp->ah.dgid.data, sizeof(struct bnxt_qplib_gid));
+ req.dgid[0] = cpu_to_le32(temp32[0]);
+ req.dgid[1] = cpu_to_le32(temp32[1]);
+ req.dgid[2] = cpu_to_le32(temp32[2]);
+ req.dgid[3] = cpu_to_le32(temp32[3]);
+ }
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL)
+ req.flow_label = cpu_to_le32(qp->ah.flow_label);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX) {
+ if (qp->type == CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE)
+ req.sgid_index =
+ cpu_to_le16(sgid_tbl->hw_id[qp->ugid_index]);
+ else
+ req.sgid_index =
+ cpu_to_le16(sgid_tbl->hw_id[qp->ah.sgid_index]);
+ }
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT)
+ req.hop_limit = qp->ah.hop_limit;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS)
+ req.traffic_class = qp->ah.traffic_class;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC)
+ memcpy(req.dest_mac, qp->ah.dmac, 6);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU)
+ req.path_mtu_pingpong_push_enable |= qp->path_mtu;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT)
+ req.timeout = qp->timeout;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_RETRY_CNT)
+ req.retry_cnt = qp->retry_cnt;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_RNR_RETRY)
+ req.rnr_retry = qp->rnr_retry;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER)
+ req.min_rnr_timer = qp->min_rnr_timer;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN)
+ req.rq_psn = cpu_to_le32(qp->rq.psn);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN)
+ req.sq_psn = cpu_to_le32(qp->sq.psn);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC)
+ req.max_rd_atomic =
+ ORD_LIMIT_TO_ORRQ_SLOTS(qp->max_rd_atomic);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC)
+ req.max_dest_rd_atomic =
+ IRD_LIMIT_TO_IRRQ_SLOTS(qp->max_dest_rd_atomic);
+
+ req.sq_size = cpu_to_le32(qp->sq.hwq.max_elements);
+ req.rq_size = cpu_to_le32(qp->rq.hwq.max_elements);
+ req.sq_sge = cpu_to_le16(qp->sq.max_sge);
+ req.rq_sge = cpu_to_le16(qp->rq.max_sge);
+ req.max_inline_data = cpu_to_le32(qp->max_inline_data);
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID)
+ req.dest_qp_id = cpu_to_le32(qp->dest_qpn);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID) {
+ vlan_pcp_vlan_dei_vlan_id =
+ ((res->sgid_tbl.tbl[qp->ah.sgid_index].vlan_id <<
+ CMDQ_MODIFY_QP_VLAN_ID_SFT) &
+ CMDQ_MODIFY_QP_VLAN_ID_MASK);
+ vlan_pcp_vlan_dei_vlan_id |=
+ ((qp->ah.sl << CMDQ_MODIFY_QP_VLAN_PCP_SFT) &
+ CMDQ_MODIFY_QP_VLAN_PCP_MASK);
+ req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(vlan_pcp_vlan_dei_vlan_id);
+ }
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return rc;
+ qp->cur_qp_state = qp->state;
+ return 0;
+}
+
+int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_query_qp_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ struct creq_query_qp_resp_sb *sb;
+ struct cmdq_query_qp req = {};
+ u32 temp32[4];
+ int i, rc;
+
+ sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+ sb = sbuf.sb;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_QUERY_QP,
+ sizeof(req));
+
+ req.qp_cid = cpu_to_le32(qp->id);
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ goto bail;
+ /* Extract the context from the side buffer */
+ qp->state = sb->en_sqd_async_notify_state &
+ CREQ_QUERY_QP_RESP_SB_STATE_MASK;
+ qp->en_sqd_async_notify = sb->en_sqd_async_notify_state &
+ CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY;
+ qp->access = sb->access;
+ qp->pkey_index = le16_to_cpu(sb->pkey);
+ qp->qkey = le32_to_cpu(sb->qkey);
+ qp->udp_sport = le16_to_cpu(sb->udp_src_port);
+
+ temp32[0] = le32_to_cpu(sb->dgid[0]);
+ temp32[1] = le32_to_cpu(sb->dgid[1]);
+ temp32[2] = le32_to_cpu(sb->dgid[2]);
+ temp32[3] = le32_to_cpu(sb->dgid[3]);
+ memcpy(qp->ah.dgid.data, temp32, sizeof(qp->ah.dgid.data));
+
+ qp->ah.flow_label = le32_to_cpu(sb->flow_label);
+
+ qp->ah.sgid_index = 0;
+ for (i = 0; i < res->sgid_tbl.max; i++) {
+ if (res->sgid_tbl.hw_id[i] == le16_to_cpu(sb->sgid_index)) {
+ qp->ah.sgid_index = i;
+ break;
+ }
+ }
+ if (i == res->sgid_tbl.max)
+ dev_warn(&res->pdev->dev, "SGID not found??\n");
+
+ qp->ah.hop_limit = sb->hop_limit;
+ qp->ah.traffic_class = sb->traffic_class;
+ memcpy(qp->ah.dmac, sb->dest_mac, 6);
+ qp->ah.vlan_id = (le16_to_cpu(sb->path_mtu_dest_vlan_id) &
+ CREQ_QUERY_QP_RESP_SB_VLAN_ID_MASK) >>
+ CREQ_QUERY_QP_RESP_SB_VLAN_ID_SFT;
+ qp->path_mtu = (le16_to_cpu(sb->path_mtu_dest_vlan_id) &
+ CREQ_QUERY_QP_RESP_SB_PATH_MTU_MASK) >>
+ CREQ_QUERY_QP_RESP_SB_PATH_MTU_SFT;
+ qp->timeout = sb->timeout;
+ qp->retry_cnt = sb->retry_cnt;
+ qp->rnr_retry = sb->rnr_retry;
+ qp->min_rnr_timer = sb->min_rnr_timer;
+ qp->rq.psn = le32_to_cpu(sb->rq_psn);
+ qp->max_rd_atomic = ORRQ_SLOTS_TO_ORD_LIMIT(sb->max_rd_atomic);
+ qp->sq.psn = le32_to_cpu(sb->sq_psn);
+ qp->max_dest_rd_atomic =
+ IRRQ_SLOTS_TO_IRD_LIMIT(sb->max_dest_rd_atomic);
+ qp->sq.max_wqe = qp->sq.hwq.max_elements;
+ qp->rq.max_wqe = qp->rq.hwq.max_elements;
+ qp->sq.max_sge = le16_to_cpu(sb->sq_sge);
+ qp->rq.max_sge = le16_to_cpu(sb->rq_sge);
+ qp->max_inline_data = le32_to_cpu(sb->max_inline_data);
+ qp->dest_qpn = le32_to_cpu(sb->dest_qp_id);
+ memcpy(qp->smac, sb->src_mac, 6);
+ qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id);
+ qp->port_id = le16_to_cpu(sb->port_id);
+bail:
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
+ return rc;
+}
+
+static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
+{
+ struct bnxt_qplib_hwq *cq_hwq = &cq->hwq;
+ u32 peek_flags, peek_cons;
+ struct cq_base *hw_cqe;
+ int i;
+
+ peek_flags = cq->dbinfo.flags;
+ peek_cons = cq_hwq->cons;
+ for (i = 0; i < cq_hwq->max_elements; i++) {
+ hw_cqe = bnxt_qplib_get_qe(cq_hwq, peek_cons, NULL);
+ if (!CQE_CMP_VALID(hw_cqe, peek_flags))
+ continue;
+ /*
+ * The valid test of the entry must be done first before
+ * reading any further.
+ */
+ dma_rmb();
+ switch (hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) {
+ case CQ_BASE_CQE_TYPE_REQ:
+ case CQ_BASE_CQE_TYPE_TERMINAL:
+ {
+ struct cq_req *cqe = (struct cq_req *)hw_cqe;
+
+ if (qp == le64_to_cpu(cqe->qp_handle))
+ cqe->qp_handle = 0;
+ break;
+ }
+ case CQ_BASE_CQE_TYPE_RES_RC:
+ case CQ_BASE_CQE_TYPE_RES_UD:
+ case CQ_BASE_CQE_TYPE_RES_RAWETH_QP1:
+ {
+ struct cq_res_rc *cqe = (struct cq_res_rc *)hw_cqe;
+
+ if (qp == le64_to_cpu(cqe->qp_handle))
+ cqe->qp_handle = 0;
+ break;
+ }
+ default:
+ break;
+ }
+ bnxt_qplib_hwq_incr_cons(cq_hwq->max_elements, &peek_cons,
+ 1, &peek_flags);
+ }
+}
+
+int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_destroy_qp_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_destroy_qp req = {};
+ u32 tbl_indx;
+ int rc;
+
+ spin_lock_bh(&rcfw->tbl_lock);
+ tbl_indx = map_qp_id_to_tbl_indx(qp->id, rcfw);
+ rcfw->qp_tbl[tbl_indx].qp_id = BNXT_QPLIB_QP_ID_INVALID;
+ rcfw->qp_tbl[tbl_indx].qp_handle = NULL;
+ spin_unlock_bh(&rcfw->tbl_lock);
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DESTROY_QP,
+ sizeof(req));
+
+ req.qp_cid = cpu_to_le32(qp->id);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc) {
+ spin_lock_bh(&rcfw->tbl_lock);
+ rcfw->qp_tbl[tbl_indx].qp_id = qp->id;
+ rcfw->qp_tbl[tbl_indx].qp_handle = qp;
+ spin_unlock_bh(&rcfw->tbl_lock);
+ return rc;
+ }
+
+ return 0;
+}
+
+void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_qp *qp)
+{
+ bnxt_qplib_free_qp_hdr_buf(res, qp);
+ bnxt_qplib_free_hwq(res, &qp->sq.hwq);
+ kfree(qp->sq.swq);
+
+ bnxt_qplib_free_hwq(res, &qp->rq.hwq);
+ kfree(qp->rq.swq);
+
+ if (qp->irrq.max_elements)
+ bnxt_qplib_free_hwq(res, &qp->irrq);
+ if (qp->orrq.max_elements)
+ bnxt_qplib_free_hwq(res, &qp->orrq);
+
+}
+
+void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_sge *sge)
+{
+ struct bnxt_qplib_q *sq = &qp->sq;
+ u32 sw_prod;
+
+ memset(sge, 0, sizeof(*sge));
+
+ if (qp->sq_hdr_buf) {
+ sw_prod = sq->swq_start;
+ sge->addr = (dma_addr_t)(qp->sq_hdr_buf_map +
+ sw_prod * qp->sq_hdr_buf_size);
+ sge->lkey = 0xFFFFFFFF;
+ sge->size = qp->sq_hdr_buf_size;
+ return qp->sq_hdr_buf + sw_prod * sge->size;
+ }
+ return NULL;
+}
+
+u32 bnxt_qplib_get_rq_prod_index(struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+
+ return rq->swq_start;
+}
+
+dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp, u32 index)
+{
+ return (qp->rq_hdr_buf_map + index * qp->rq_hdr_buf_size);
+}
+
+void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_sge *sge)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+ u32 sw_prod;
+
+ memset(sge, 0, sizeof(*sge));
+
+ if (qp->rq_hdr_buf) {
+ sw_prod = rq->swq_start;
+ sge->addr = (dma_addr_t)(qp->rq_hdr_buf_map +
+ sw_prod * qp->rq_hdr_buf_size);
+ sge->lkey = 0xFFFFFFFF;
+ sge->size = qp->rq_hdr_buf_size;
+ return qp->rq_hdr_buf + sw_prod * sge->size;
+ }
+ return NULL;
+}
+
+/* Fil the MSN table into the next psn row */
+static void bnxt_qplib_fill_msn_search(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe,
+ struct bnxt_qplib_swq *swq)
+{
+ struct sq_msn_search *msns;
+ u32 start_psn, next_psn;
+ u16 start_idx;
+
+ msns = (struct sq_msn_search *)swq->psn_search;
+ msns->start_idx_next_psn_start_psn = 0;
+
+ start_psn = swq->start_psn;
+ next_psn = swq->next_psn;
+ start_idx = swq->slot_idx;
+ msns->start_idx_next_psn_start_psn |=
+ bnxt_re_update_msn_tbl(start_idx, next_psn, start_psn);
+ qp->msn++;
+ qp->msn %= qp->msn_tbl_sz;
+}
+
+static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe,
+ struct bnxt_qplib_swq *swq)
+{
+ struct sq_psn_search_ext *psns_ext;
+ struct sq_psn_search *psns;
+ u32 flg_npsn;
+ u32 op_spsn;
+
+ if (!swq->psn_search)
+ return;
+ /* Handle MSN differently on cap flags */
+ if (qp->is_host_msn_tbl) {
+ bnxt_qplib_fill_msn_search(qp, wqe, swq);
+ return;
+ }
+ psns = (struct sq_psn_search *)swq->psn_search;
+ psns = swq->psn_search;
+ psns_ext = swq->psn_ext;
+
+ op_spsn = ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) &
+ SQ_PSN_SEARCH_START_PSN_MASK);
+ op_spsn |= ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) &
+ SQ_PSN_SEARCH_OPCODE_MASK);
+ flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) &
+ SQ_PSN_SEARCH_NEXT_PSN_MASK);
+
+ if (bnxt_qplib_is_chip_gen_p5_p7(qp->cctx)) {
+ psns_ext->opcode_start_psn = cpu_to_le32(op_spsn);
+ psns_ext->flags_next_psn = cpu_to_le32(flg_npsn);
+ psns_ext->start_slot_idx = cpu_to_le16(swq->slot_idx);
+ } else {
+ psns->opcode_start_psn = cpu_to_le32(op_spsn);
+ psns->flags_next_psn = cpu_to_le32(flg_npsn);
+ }
+}
+
+static unsigned int bnxt_qplib_put_inline(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe,
+ u32 *idx)
+{
+ struct bnxt_qplib_hwq *hwq;
+ int len, t_len, offt;
+ bool pull_dst = true;
+ void *il_dst = NULL;
+ void *il_src = NULL;
+ int t_cplen, cplen;
+ int indx;
+
+ hwq = &qp->sq.hwq;
+ t_len = 0;
+ for (indx = 0; indx < wqe->num_sge; indx++) {
+ len = wqe->sg_list[indx].size;
+ il_src = (void *)wqe->sg_list[indx].addr;
+ t_len += len;
+ if (t_len > qp->max_inline_data)
+ return BNXT_RE_INVAL_MSG_SIZE;
+ while (len) {
+ if (pull_dst) {
+ pull_dst = false;
+ il_dst = bnxt_qplib_get_prod_qe(hwq, *idx);
+ (*idx)++;
+ t_cplen = 0;
+ offt = 0;
+ }
+ cplen = min_t(int, len, sizeof(struct sq_sge));
+ cplen = min_t(int, cplen,
+ (sizeof(struct sq_sge) - offt));
+ memcpy(il_dst, il_src, cplen);
+ t_cplen += cplen;
+ il_src += cplen;
+ il_dst += cplen;
+ offt += cplen;
+ len -= cplen;
+ if (t_cplen == sizeof(struct sq_sge))
+ pull_dst = true;
+ }
+ }
+
+ return t_len;
+}
+
+static unsigned int bnxt_qplib_put_sges(struct bnxt_qplib_hwq *hwq,
+ struct bnxt_qplib_sge *ssge,
+ u32 nsge, u32 *idx)
+{
+ struct sq_sge *dsge;
+ int indx, len = 0;
+
+ for (indx = 0; indx < nsge; indx++, (*idx)++) {
+ dsge = bnxt_qplib_get_prod_qe(hwq, *idx);
+ dsge->va_or_pa = cpu_to_le64(ssge[indx].addr);
+ dsge->l_key = cpu_to_le32(ssge[indx].lkey);
+ dsge->size = cpu_to_le32(ssge[indx].size);
+ len += ssge[indx].size;
+ }
+
+ return len;
+}
+
+static u16 bnxt_qplib_required_slots(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe,
+ u16 *wqe_sz, u16 *qdf, u8 mode)
+{
+ u32 ilsize, bytes;
+ u16 nsge;
+ u16 slot;
+
+ nsge = wqe->num_sge;
+ /* Adding sq_send_hdr is a misnomer, for rq also hdr size is same. */
+ bytes = sizeof(struct sq_send_hdr) + nsge * sizeof(struct sq_sge);
+ if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) {
+ ilsize = bnxt_qplib_calc_ilsize(wqe, qp->max_inline_data);
+ bytes = ALIGN(ilsize, sizeof(struct sq_sge));
+ bytes += sizeof(struct sq_send_hdr);
+ }
+
+ *qdf = __xlate_qfd(qp->sq.q_full_delta, bytes);
+ slot = bytes >> 4;
+ *wqe_sz = slot;
+ if (mode == BNXT_QPLIB_WQE_MODE_STATIC)
+ slot = 8;
+ return slot;
+}
+
+static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_qp *qp, struct bnxt_qplib_q *sq,
+ struct bnxt_qplib_swq *swq, bool hw_retx)
+{
+ struct bnxt_qplib_hwq *hwq;
+ u32 pg_num, pg_indx;
+ void *buff;
+ u32 tail;
+
+ hwq = &sq->hwq;
+ if (!hwq->pad_pg)
+ return;
+ tail = swq->slot_idx / sq->dbinfo.max_slot;
+ if (hw_retx) {
+ /* For HW retx use qp msn index */
+ tail = qp->msn;
+ tail %= qp->msn_tbl_sz;
+ }
+ pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride);
+ pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride);
+ buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride);
+ swq->psn_ext = buff;
+ swq->psn_search = buff;
+}
+
+void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_q *sq = &qp->sq;
+
+ bnxt_qplib_ring_prod_db(&sq->dbinfo, DBC_DBC_TYPE_SQ);
+}
+
+int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe)
+{
+ struct bnxt_qplib_nq_work *nq_work = NULL;
+ int i, rc = 0, data_len = 0, pkt_num = 0;
+ struct bnxt_qplib_q *sq = &qp->sq;
+ struct bnxt_qplib_hwq *hwq;
+ struct bnxt_qplib_swq *swq;
+ bool sch_handler = false;
+ u32 wqe_idx, slots, idx;
+ u16 wqe_sz, qdf = 0;
+ bool msn_update;
+ void *base_hdr;
+ void *ext_hdr;
+ __le32 temp32;
+
+ hwq = &sq->hwq;
+ if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS &&
+ qp->state != CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ dev_err(&hwq->pdev->dev,
+ "QPLIB: FP: QP (0x%x) is in the 0x%x state",
+ qp->id, qp->state);
+ rc = -EINVAL;
+ goto done;
+ }
+
+ slots = bnxt_qplib_required_slots(qp, wqe, &wqe_sz, &qdf, qp->wqe_mode);
+ if (bnxt_qplib_queue_full(sq, slots + qdf)) {
+ dev_err(&hwq->pdev->dev,
+ "prod = %#x cons = %#x qdepth = %#x delta = %#x\n",
+ hwq->prod, hwq->cons, hwq->depth, sq->q_full_delta);
+ rc = -ENOMEM;
+ goto done;
+ }
+
+ swq = bnxt_qplib_get_swqe(sq, &wqe_idx);
+ bnxt_qplib_pull_psn_buff(qp, sq, swq, qp->is_host_msn_tbl);
+
+ idx = 0;
+ swq->slot_idx = hwq->prod;
+ swq->slots = slots;
+ swq->wr_id = wqe->wr_id;
+ swq->type = wqe->type;
+ swq->flags = wqe->flags;
+ swq->start_psn = sq->psn & BTH_PSN_MASK;
+ if (qp->sig_type)
+ swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP;
+
+ if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ sch_handler = true;
+ dev_dbg(&hwq->pdev->dev,
+ "%s Error QP. Scheduling for poll_cq\n", __func__);
+ goto queue_err;
+ }
+
+ base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ memset(base_hdr, 0, sizeof(struct sq_sge));
+ memset(ext_hdr, 0, sizeof(struct sq_sge));
+
+ if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE)
+ /* Copy the inline data */
+ data_len = bnxt_qplib_put_inline(qp, wqe, &idx);
+ else
+ data_len = bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge,
+ &idx);
+ if (data_len > BNXT_RE_MAX_MSG_SIZE) {
+ rc = -EINVAL;
+ goto done;
+ }
+ /* Make sure we update MSN table only for wired wqes */
+ msn_update = true;
+ /* Specifics */
+ switch (wqe->type) {
+ case BNXT_QPLIB_SWQE_TYPE_SEND:
+ if (qp->type == CMDQ_CREATE_QP1_TYPE_GSI) {
+ struct sq_send_raweth_qp1_hdr *sqe = base_hdr;
+ struct sq_raw_ext_hdr *ext_sqe = ext_hdr;
+ /* Assemble info for Raw Ethertype QPs */
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->wqe_size = wqe_sz;
+ sqe->cfa_action = cpu_to_le16(wqe->rawqp1.cfa_action);
+ sqe->lflags = cpu_to_le16(wqe->rawqp1.lflags);
+ sqe->length = cpu_to_le32(data_len);
+ ext_sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta &
+ SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK) <<
+ SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT);
+
+ break;
+ }
+ fallthrough;
+ case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM:
+ case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV:
+ {
+ struct sq_ud_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_send_hdr *sqe = base_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->wqe_size = wqe_sz;
+ sqe->inv_key_or_imm_data = cpu_to_le32(wqe->send.inv_key);
+ if (qp->type == CMDQ_CREATE_QP_TYPE_UD ||
+ qp->type == CMDQ_CREATE_QP_TYPE_GSI) {
+ sqe->q_key = cpu_to_le32(wqe->send.q_key);
+ sqe->length = cpu_to_le32(data_len);
+ sq->psn = (sq->psn + 1) & BTH_PSN_MASK;
+ ext_sqe->dst_qp = cpu_to_le32(wqe->send.dst_qp &
+ SQ_SEND_DST_QP_MASK);
+ ext_sqe->avid = cpu_to_le32(wqe->send.avid &
+ SQ_SEND_AVID_MASK);
+ msn_update = false;
+ } else {
+ sqe->length = cpu_to_le32(data_len);
+ if (qp->mtu)
+ pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
+ if (!pkt_num)
+ pkt_num = 1;
+ sq->psn = (sq->psn + pkt_num) & BTH_PSN_MASK;
+ }
+ break;
+ }
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE:
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM:
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_READ:
+ {
+ struct sq_rdma_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_rdma_hdr *sqe = base_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->wqe_size = wqe_sz;
+ sqe->imm_data = cpu_to_le32(wqe->rdma.inv_key);
+ sqe->length = cpu_to_le32((u32)data_len);
+ ext_sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va);
+ ext_sqe->remote_key = cpu_to_le32(wqe->rdma.r_key);
+ if (qp->mtu)
+ pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
+ if (!pkt_num)
+ pkt_num = 1;
+ sq->psn = (sq->psn + pkt_num) & BTH_PSN_MASK;
+ break;
+ }
+ case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP:
+ case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD:
+ {
+ struct sq_atomic_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_atomic_hdr *sqe = base_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->remote_key = cpu_to_le32(wqe->atomic.r_key);
+ sqe->remote_va = cpu_to_le64(wqe->atomic.remote_va);
+ ext_sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data);
+ ext_sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data);
+ if (qp->mtu)
+ pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
+ if (!pkt_num)
+ pkt_num = 1;
+ sq->psn = (sq->psn + pkt_num) & BTH_PSN_MASK;
+ break;
+ }
+ case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV:
+ {
+ struct sq_localinvalidate *sqe = base_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->inv_l_key = cpu_to_le32(wqe->local_inv.inv_l_key);
+ msn_update = false;
+ break;
+ }
+ case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR:
+ {
+ struct sq_fr_pmr_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_fr_pmr_hdr *sqe = base_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->access_cntl = wqe->frmr.access_cntl |
+ SQ_FR_PMR_ACCESS_CNTL_LOCAL_WRITE;
+ sqe->zero_based_page_size_log =
+ (wqe->frmr.pg_sz_log & SQ_FR_PMR_PAGE_SIZE_LOG_MASK) <<
+ SQ_FR_PMR_PAGE_SIZE_LOG_SFT |
+ (wqe->frmr.zero_based ? SQ_FR_PMR_ZERO_BASED : 0);
+ sqe->l_key = cpu_to_le32(wqe->frmr.l_key);
+ temp32 = cpu_to_le32(wqe->frmr.length);
+ memcpy(sqe->length, &temp32, sizeof(wqe->frmr.length));
+ sqe->numlevels_pbl_page_size_log =
+ ((wqe->frmr.pbl_pg_sz_log <<
+ SQ_FR_PMR_PBL_PAGE_SIZE_LOG_SFT) &
+ SQ_FR_PMR_PBL_PAGE_SIZE_LOG_MASK) |
+ ((wqe->frmr.levels << SQ_FR_PMR_NUMLEVELS_SFT) &
+ SQ_FR_PMR_NUMLEVELS_MASK);
+
+ for (i = 0; i < wqe->frmr.page_list_len; i++)
+ wqe->frmr.pbl_ptr[i] = cpu_to_le64(
+ wqe->frmr.page_list[i] |
+ PTU_PTE_VALID);
+ ext_sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr);
+ ext_sqe->va = cpu_to_le64(wqe->frmr.va);
+ msn_update = false;
+
+ break;
+ }
+ case BNXT_QPLIB_SWQE_TYPE_BIND_MW:
+ {
+ struct sq_bind_ext_hdr *ext_sqe = ext_hdr;
+ struct sq_bind_hdr *sqe = base_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->access_cntl = wqe->bind.access_cntl;
+ sqe->mw_type_zero_based = wqe->bind.mw_type |
+ (wqe->bind.zero_based ? SQ_BIND_ZERO_BASED : 0);
+ sqe->parent_l_key = cpu_to_le32(wqe->bind.parent_l_key);
+ sqe->l_key = cpu_to_le32(wqe->bind.r_key);
+ ext_sqe->va = cpu_to_le64(wqe->bind.va);
+ ext_sqe->length_lo = cpu_to_le32(wqe->bind.length);
+ msn_update = false;
+ break;
+ }
+ default:
+ /* Bad wqe, return error */
+ rc = -EINVAL;
+ goto done;
+ }
+ if (!qp->is_host_msn_tbl || msn_update) {
+ swq->next_psn = sq->psn & BTH_PSN_MASK;
+ bnxt_qplib_fill_psn_search(qp, wqe, swq);
+ }
+queue_err:
+ bnxt_qplib_swq_mod_start(sq, wqe_idx);
+ bnxt_qplib_hwq_incr_prod(&sq->dbinfo, hwq, swq->slots);
+ qp->wqe_cnt++;
+done:
+ if (sch_handler) {
+ nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC);
+ if (nq_work) {
+ nq_work->cq = qp->scq;
+ nq_work->nq = qp->scq->nq;
+ INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task);
+ queue_work(qp->scq->nq->cqn_wq, &nq_work->work);
+ } else {
+ dev_err(&hwq->pdev->dev,
+ "FP: Failed to allocate SQ nq_work!\n");
+ rc = -ENOMEM;
+ }
+ }
+ return rc;
+}
+
+void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+
+ bnxt_qplib_ring_prod_db(&rq->dbinfo, DBC_DBC_TYPE_RQ);
+}
+
+int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe)
+{
+ struct bnxt_qplib_nq_work *nq_work = NULL;
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct rq_wqe_hdr *base_hdr;
+ struct rq_ext_hdr *ext_hdr;
+ struct bnxt_qplib_hwq *hwq;
+ struct bnxt_qplib_swq *swq;
+ bool sch_handler = false;
+ u32 wqe_idx, idx;
+ u16 wqe_sz;
+ int rc = 0;
+
+ hwq = &rq->hwq;
+ if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_RESET) {
+ dev_err(&hwq->pdev->dev,
+ "QPLIB: FP: QP (0x%x) is in the 0x%x state",
+ qp->id, qp->state);
+ rc = -EINVAL;
+ goto done;
+ }
+
+ if (bnxt_qplib_queue_full(rq, rq->dbinfo.max_slot)) {
+ dev_err(&hwq->pdev->dev,
+ "FP: QP (0x%x) RQ is full!\n", qp->id);
+ rc = -EINVAL;
+ goto done;
+ }
+
+ swq = bnxt_qplib_get_swqe(rq, &wqe_idx);
+ swq->wr_id = wqe->wr_id;
+ swq->slots = rq->dbinfo.max_slot;
+
+ if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ sch_handler = true;
+ dev_dbg(&hwq->pdev->dev,
+ "%s: Error QP. Scheduling for poll_cq\n", __func__);
+ goto queue_err;
+ }
+
+ idx = 0;
+ base_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ ext_hdr = bnxt_qplib_get_prod_qe(hwq, idx++);
+ memset(base_hdr, 0, sizeof(struct sq_sge));
+ memset(ext_hdr, 0, sizeof(struct sq_sge));
+ wqe_sz = (sizeof(struct rq_wqe_hdr) +
+ wqe->num_sge * sizeof(struct sq_sge)) >> 4;
+ bnxt_qplib_put_sges(hwq, wqe->sg_list, wqe->num_sge, &idx);
+ if (!wqe->num_sge) {
+ struct sq_sge *sge;
+
+ sge = bnxt_qplib_get_prod_qe(hwq, idx++);
+ sge->size = 0;
+ wqe_sz++;
+ }
+ base_hdr->wqe_type = wqe->type;
+ base_hdr->flags = wqe->flags;
+ base_hdr->wqe_size = wqe_sz;
+ base_hdr->wr_id[0] = cpu_to_le32(wqe_idx);
+queue_err:
+ bnxt_qplib_swq_mod_start(rq, wqe_idx);
+ bnxt_qplib_hwq_incr_prod(&rq->dbinfo, hwq, swq->slots);
+done:
+ if (sch_handler) {
+ nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC);
+ if (nq_work) {
+ nq_work->cq = qp->rcq;
+ nq_work->nq = qp->rcq->nq;
+ INIT_WORK(&nq_work->work, bnxt_qpn_cqn_sched_task);
+ queue_work(qp->rcq->nq->cqn_wq, &nq_work->work);
+ } else {
+ dev_err(&hwq->pdev->dev,
+ "FP: Failed to allocate RQ nq_work!\n");
+ rc = -ENOMEM;
+ }
+ }
+
+ return rc;
+}
+
+/* CQ */
+int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct creq_create_cq_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_create_cq req = {};
+ struct bnxt_qplib_pbl *pbl;
+ u32 coalescing = 0;
+ u32 pg_sz_lvl;
+ int rc;
+
+ if (!cq->dpi) {
+ dev_err(&rcfw->pdev->dev,
+ "FP: CREATE_CQ failed due to NULL DPI\n");
+ return -EINVAL;
+ }
+
+ cq->dbinfo.flags = 0;
+ hwq_attr.res = res;
+ hwq_attr.depth = cq->max_wqe;
+ hwq_attr.stride = sizeof(struct cq_base);
+ hwq_attr.type = HWQ_TYPE_QUEUE;
+ hwq_attr.sginfo = &cq->sg_info;
+ rc = bnxt_qplib_alloc_init_hwq(&cq->hwq, &hwq_attr);
+ if (rc)
+ return rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_CREATE_CQ,
+ sizeof(req));
+
+ req.dpi = cpu_to_le32(cq->dpi->dpi);
+ req.cq_handle = cpu_to_le64(cq->cq_handle);
+ req.cq_size = cpu_to_le32(cq->max_wqe);
+
+ if (_is_cq_coalescing_supported(res->dattr->dev_cap_flags2) &&
+ cq->coalescing->enable) {
+ req.flags |= cpu_to_le16(CMDQ_CREATE_CQ_FLAGS_COALESCING_VALID);
+ coalescing |= ((cq->coalescing->buf_maxtime <<
+ CMDQ_CREATE_CQ_BUF_MAXTIME_SFT) &
+ CMDQ_CREATE_CQ_BUF_MAXTIME_MASK);
+ coalescing |= ((cq->coalescing->normal_maxbuf <<
+ CMDQ_CREATE_CQ_NORMAL_MAXBUF_SFT) &
+ CMDQ_CREATE_CQ_NORMAL_MAXBUF_MASK);
+ coalescing |= ((cq->coalescing->during_maxbuf <<
+ CMDQ_CREATE_CQ_DURING_MAXBUF_SFT) &
+ CMDQ_CREATE_CQ_DURING_MAXBUF_MASK);
+ if (cq->coalescing->en_ring_idle_mode)
+ coalescing |= CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE;
+ else
+ coalescing &= ~CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE;
+ req.coalescing = cpu_to_le32(coalescing);
+ }
+
+ pbl = &cq->hwq.pbl[PBL_LVL_0];
+ pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) <<
+ CMDQ_CREATE_CQ_PG_SIZE_SFT);
+ pg_sz_lvl |= (cq->hwq.level & CMDQ_CREATE_CQ_LVL_MASK);
+ req.pg_size_lvl = cpu_to_le32(pg_sz_lvl);
+ req.pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+ req.cq_fco_cnq_id = cpu_to_le32(
+ (cq->cnq_hw_ring_id & CMDQ_CREATE_CQ_CNQ_ID_MASK) <<
+ CMDQ_CREATE_CQ_CNQ_ID_SFT);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ goto fail;
+
+ cq->id = le32_to_cpu(resp.xid);
+ cq->period = BNXT_QPLIB_QUEUE_START_PERIOD;
+ init_waitqueue_head(&cq->waitq);
+ INIT_LIST_HEAD(&cq->sqf_head);
+ INIT_LIST_HEAD(&cq->rqf_head);
+ spin_lock_init(&cq->compl_lock);
+ spin_lock_init(&cq->flush_lock);
+
+ cq->dbinfo.hwq = &cq->hwq;
+ cq->dbinfo.xid = cq->id;
+ cq->dbinfo.db = cq->dpi->dbr;
+ cq->dbinfo.priv_db = res->dpi_tbl.priv_db;
+ cq->dbinfo.flags = 0;
+ cq->dbinfo.toggle = 0;
+
+ bnxt_qplib_armen_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMENA);
+
+ return 0;
+
+fail:
+ bnxt_qplib_free_hwq(res, &cq->hwq);
+ return rc;
+}
+
+void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cq *cq)
+{
+ bnxt_qplib_free_hwq(res, &cq->hwq);
+ memcpy(&cq->hwq, &cq->resize_hwq, sizeof(cq->hwq));
+ /* Reset only the cons bit in the flags */
+ cq->dbinfo.flags &= ~(1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT);
+}
+
+int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq,
+ int new_cqes)
+{
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_resize_cq_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_resize_cq req = {};
+ struct bnxt_qplib_pbl *pbl;
+ u32 pg_sz, lvl, new_sz;
+ int rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_RESIZE_CQ,
+ sizeof(req));
+ hwq_attr.sginfo = &cq->sg_info;
+ hwq_attr.res = res;
+ hwq_attr.depth = new_cqes;
+ hwq_attr.stride = sizeof(struct cq_base);
+ hwq_attr.type = HWQ_TYPE_QUEUE;
+ rc = bnxt_qplib_alloc_init_hwq(&cq->resize_hwq, &hwq_attr);
+ if (rc)
+ return rc;
+
+ req.cq_cid = cpu_to_le32(cq->id);
+ pbl = &cq->resize_hwq.pbl[PBL_LVL_0];
+ pg_sz = bnxt_qplib_base_pg_size(&cq->resize_hwq);
+ lvl = (cq->resize_hwq.level << CMDQ_RESIZE_CQ_LVL_SFT) &
+ CMDQ_RESIZE_CQ_LVL_MASK;
+ new_sz = (new_cqes << CMDQ_RESIZE_CQ_NEW_CQ_SIZE_SFT) &
+ CMDQ_RESIZE_CQ_NEW_CQ_SIZE_MASK;
+ req.new_cq_size_pg_size_lvl = cpu_to_le32(new_sz | pg_sz | lvl);
+ req.new_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ return rc;
+}
+
+int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_destroy_cq_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_destroy_cq req = {};
+ u16 total_cnq_events;
+ int rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DESTROY_CQ,
+ sizeof(req));
+
+ req.cq_cid = cpu_to_le32(cq->id);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return rc;
+ total_cnq_events = le16_to_cpu(resp.total_cnq_events);
+ __wait_for_all_nqes(cq, total_cnq_events);
+ bnxt_qplib_free_hwq(res, &cq->hwq);
+ return 0;
+}
+
+static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_cqe **pcqe, int *budget)
+{
+ struct bnxt_qplib_cqe *cqe;
+ u32 start, last;
+ int rc = 0;
+
+ /* Now complete all outstanding SQEs with FLUSHED_ERR */
+ start = sq->swq_start;
+ cqe = *pcqe;
+ while (*budget) {
+ last = sq->swq_last;
+ if (start == last)
+ break;
+ /* Skip the FENCE WQE completions */
+ if (sq->swq[last].wr_id == BNXT_QPLIB_FENCE_WRID) {
+ bnxt_qplib_cancel_phantom_processing(qp);
+ goto skip_compl;
+ }
+ memset(cqe, 0, sizeof(*cqe));
+ cqe->status = CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR;
+ cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
+ cqe->qp_handle = (u64)(unsigned long)qp;
+ cqe->wr_id = sq->swq[last].wr_id;
+ cqe->src_qp = qp->id;
+ cqe->type = sq->swq[last].type;
+ cqe++;
+ (*budget)--;
+skip_compl:
+ bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons,
+ sq->swq[last].slots, &sq->dbinfo.flags);
+ sq->swq_last = sq->swq[last].next_idx;
+ }
+ *pcqe = cqe;
+ if (!(*budget) && sq->swq_last != start)
+ /* Out of budget */
+ rc = -EAGAIN;
+
+ return rc;
+}
+
+static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_cqe **pcqe, int *budget)
+{
+ struct bnxt_qplib_cqe *cqe;
+ u32 start, last;
+ int opcode = 0;
+ int rc = 0;
+
+ switch (qp->type) {
+ case CMDQ_CREATE_QP1_TYPE_GSI:
+ opcode = CQ_BASE_CQE_TYPE_RES_RAWETH_QP1;
+ break;
+ case CMDQ_CREATE_QP_TYPE_RC:
+ opcode = CQ_BASE_CQE_TYPE_RES_RC;
+ break;
+ case CMDQ_CREATE_QP_TYPE_UD:
+ case CMDQ_CREATE_QP_TYPE_GSI:
+ opcode = CQ_BASE_CQE_TYPE_RES_UD;
+ break;
+ }
+
+ /* Flush the rest of the RQ */
+ start = rq->swq_start;
+ cqe = *pcqe;
+ while (*budget) {
+ last = rq->swq_last;
+ if (last == start)
+ break;
+ memset(cqe, 0, sizeof(*cqe));
+ cqe->status =
+ CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR;
+ cqe->opcode = opcode;
+ cqe->qp_handle = (unsigned long)qp;
+ cqe->wr_id = rq->swq[last].wr_id;
+ cqe++;
+ (*budget)--;
+ bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+ rq->swq[last].slots, &rq->dbinfo.flags);
+ rq->swq_last = rq->swq[last].next_idx;
+ }
+ *pcqe = cqe;
+ if (!*budget && rq->swq_last != start)
+ /* Out of budget */
+ rc = -EAGAIN;
+
+ return rc;
+}
+
+void bnxt_qplib_mark_qp_error(void *qp_handle)
+{
+ struct bnxt_qplib_qp *qp = qp_handle;
+
+ if (!qp)
+ return;
+
+ /* Must block new posting of SQ and RQ */
+ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+ bnxt_qplib_cancel_phantom_processing(qp);
+}
+
+/* Note: SQE is valid from sw_sq_cons up to cqe_sq_cons (exclusive)
+ * CQE is track from sw_cq_cons to max_element but valid only if VALID=1
+ */
+static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
+ u32 cq_cons, u32 swq_last, u32 cqe_sq_cons)
+{
+ u32 peek_sw_cq_cons, peek_sq_cons_idx, peek_flags;
+ struct bnxt_qplib_q *sq = &qp->sq;
+ struct cq_req *peek_req_hwcqe;
+ struct bnxt_qplib_qp *peek_qp;
+ struct bnxt_qplib_q *peek_sq;
+ struct bnxt_qplib_swq *swq;
+ struct cq_base *peek_hwcqe;
+ int i, rc = 0;
+
+ /* Normal mode */
+ /* Check for the psn_search marking before completing */
+ swq = &sq->swq[swq_last];
+ if (swq->psn_search &&
+ le32_to_cpu(swq->psn_search->flags_next_psn) & 0x80000000) {
+ /* Unmark */
+ swq->psn_search->flags_next_psn = cpu_to_le32
+ (le32_to_cpu(swq->psn_search->flags_next_psn)
+ & ~0x80000000);
+ dev_dbg(&cq->hwq.pdev->dev,
+ "FP: Process Req cq_cons=0x%x qp=0x%x sq cons sw=0x%x cqe=0x%x marked!\n",
+ cq_cons, qp->id, swq_last, cqe_sq_cons);
+ sq->condition = true;
+ sq->send_phantom = true;
+
+ /* TODO: Only ARM if the previous SQE is ARMALL */
+ bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMALL);
+ rc = -EAGAIN;
+ goto out;
+ }
+ if (sq->condition) {
+ /* Peek at the completions */
+ peek_flags = cq->dbinfo.flags;
+ peek_sw_cq_cons = cq_cons;
+ i = cq->hwq.max_elements;
+ while (i--) {
+ peek_hwcqe = bnxt_qplib_get_qe(&cq->hwq,
+ peek_sw_cq_cons, NULL);
+ /* If the next hwcqe is VALID */
+ if (CQE_CMP_VALID(peek_hwcqe, peek_flags)) {
+ /*
+ * The valid test of the entry must be done first before
+ * reading any further.
+ */
+ dma_rmb();
+ /* If the next hwcqe is a REQ */
+ if ((peek_hwcqe->cqe_type_toggle &
+ CQ_BASE_CQE_TYPE_MASK) ==
+ CQ_BASE_CQE_TYPE_REQ) {
+ peek_req_hwcqe = (struct cq_req *)
+ peek_hwcqe;
+ peek_qp = (struct bnxt_qplib_qp *)
+ ((unsigned long)
+ le64_to_cpu
+ (peek_req_hwcqe->qp_handle));
+ peek_sq = &peek_qp->sq;
+ peek_sq_cons_idx =
+ ((le16_to_cpu(
+ peek_req_hwcqe->sq_cons_idx)
+ - 1) % sq->max_wqe);
+ /* If the hwcqe's sq's wr_id matches */
+ if (peek_sq == sq &&
+ sq->swq[peek_sq_cons_idx].wr_id ==
+ BNXT_QPLIB_FENCE_WRID) {
+ /*
+ * Unbreak only if the phantom
+ * comes back
+ */
+ dev_dbg(&cq->hwq.pdev->dev,
+ "FP: Got Phantom CQE\n");
+ sq->condition = false;
+ sq->single = true;
+ rc = 0;
+ goto out;
+ }
+ }
+ /* Valid but not the phantom, so keep looping */
+ } else {
+ /* Not valid yet, just exit and wait */
+ rc = -EINVAL;
+ goto out;
+ }
+ bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements,
+ &peek_sw_cq_cons,
+ 1, &peek_flags);
+ }
+ dev_err(&cq->hwq.pdev->dev,
+ "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n",
+ cq_cons, qp->id, swq_last, cqe_sq_cons);
+ rc = -EINVAL;
+ }
+out:
+ return rc;
+}
+
+static int bnxt_qplib_get_cqe_sq_cons(struct bnxt_qplib_q *sq, u32 cqe_slot)
+{
+ struct bnxt_qplib_hwq *sq_hwq;
+ struct bnxt_qplib_swq *swq;
+ int cqe_sq_cons = -1;
+ u32 start, last;
+
+ sq_hwq = &sq->hwq;
+
+ start = sq->swq_start;
+ last = sq->swq_last;
+
+ while (last != start) {
+ swq = &sq->swq[last];
+ if (swq->slot_idx == cqe_slot) {
+ cqe_sq_cons = swq->next_idx;
+ dev_err(&sq_hwq->pdev->dev, "%s: Found cons wqe = %d slot = %d\n",
+ __func__, cqe_sq_cons, cqe_slot);
+ break;
+ }
+
+ last = swq->next_idx;
+ }
+ return cqe_sq_cons;
+}
+
+static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
+ struct cq_req *hwcqe,
+ struct bnxt_qplib_cqe **pcqe, int *budget,
+ u32 cq_cons, struct bnxt_qplib_qp **lib_qp)
+{
+ struct bnxt_qplib_swq *swq;
+ struct bnxt_qplib_cqe *cqe;
+ u32 cqe_sq_cons, slot_num;
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *sq;
+ int cqe_cons;
+ int rc = 0;
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+ if (!qp) {
+ dev_err(&cq->hwq.pdev->dev,
+ "FP: Process Req qp is NULL\n");
+ return -EINVAL;
+ }
+ sq = &qp->sq;
+
+ cqe_sq_cons = le16_to_cpu(hwcqe->sq_cons_idx) % sq->max_sw_wqe;
+ if (qp->sq.flushed) {
+ dev_dbg(&cq->hwq.pdev->dev,
+ "%s: QP in Flush QP = %p\n", __func__, qp);
+ goto done;
+ }
+
+ if (__is_err_cqe_for_var_wqe(qp, hwcqe->status)) {
+ slot_num = le16_to_cpu(hwcqe->sq_cons_idx);
+ cqe_cons = bnxt_qplib_get_cqe_sq_cons(sq, slot_num);
+ if (cqe_cons < 0) {
+ dev_err(&cq->hwq.pdev->dev, "%s: Wrong SQ cons cqe_slot_indx = %d\n",
+ __func__, slot_num);
+ goto done;
+ }
+ cqe_sq_cons = cqe_cons;
+ dev_err(&cq->hwq.pdev->dev, "%s: cqe_sq_cons = %d swq_last = %d swq_start = %d\n",
+ __func__, cqe_sq_cons, sq->swq_last, sq->swq_start);
+ }
+
+ /* Require to walk the sq's swq to fabricate CQEs for all previously
+ * signaled SWQEs due to CQE aggregation from the current sq cons
+ * to the cqe_sq_cons
+ */
+ cqe = *pcqe;
+ while (*budget) {
+ if (sq->swq_last == cqe_sq_cons)
+ /* Done */
+ break;
+
+ swq = &sq->swq[sq->swq_last];
+ memset(cqe, 0, sizeof(*cqe));
+ cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
+ cqe->qp_handle = (u64)(unsigned long)qp;
+ cqe->src_qp = qp->id;
+ cqe->wr_id = swq->wr_id;
+ if (cqe->wr_id == BNXT_QPLIB_FENCE_WRID)
+ goto skip;
+ cqe->type = swq->type;
+
+ /* For the last CQE, check for status. For errors, regardless
+ * of the request being signaled or not, it must complete with
+ * the hwcqe error status
+ */
+ if (swq->next_idx == cqe_sq_cons &&
+ hwcqe->status != CQ_REQ_STATUS_OK) {
+ cqe->status = hwcqe->status;
+ dev_err(&cq->hwq.pdev->dev,
+ "FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n",
+ sq->swq_last, cqe->wr_id, cqe->status);
+ cqe++;
+ (*budget)--;
+ bnxt_qplib_mark_qp_error(qp);
+ /* Add qp to flush list of the CQ */
+ bnxt_qplib_add_flush_qp(qp);
+ } else {
+ /* Before we complete, do WA 9060 */
+ if (!bnxt_qplib_is_chip_gen_p5_p7(qp->cctx)) {
+ if (do_wa9060(qp, cq, cq_cons, sq->swq_last,
+ cqe_sq_cons)) {
+ *lib_qp = qp;
+ goto out;
+ }
+ }
+ if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
+ cqe->status = CQ_REQ_STATUS_OK;
+ cqe++;
+ (*budget)--;
+ }
+ }
+skip:
+ bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons,
+ swq->slots, &sq->dbinfo.flags);
+ sq->swq_last = swq->next_idx;
+ if (sq->single)
+ break;
+ }
+out:
+ *pcqe = cqe;
+ if (sq->swq_last != cqe_sq_cons) {
+ /* Out of budget */
+ rc = -EAGAIN;
+ goto done;
+ }
+ /*
+ * Back to normal completion mode only after it has completed all of
+ * the WC for this CQE
+ */
+ sq->single = false;
+done:
+ return rc;
+}
+
+static void bnxt_qplib_release_srqe(struct bnxt_qplib_srq *srq, u32 tag)
+{
+ spin_lock(&srq->hwq.lock);
+ srq->swq[srq->last_idx].next_idx = (int)tag;
+ srq->last_idx = (int)tag;
+ srq->swq[srq->last_idx].next_idx = -1;
+ bnxt_qplib_hwq_incr_cons(srq->hwq.max_elements, &srq->hwq.cons,
+ srq->dbinfo.max_slot, &srq->dbinfo.flags);
+ spin_unlock(&srq->hwq.lock);
+}
+
+static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
+ struct cq_res_rc *hwcqe,
+ struct bnxt_qplib_cqe **pcqe,
+ int *budget)
+{
+ struct bnxt_qplib_srq *srq;
+ struct bnxt_qplib_cqe *cqe;
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *rq;
+ u32 wr_id_idx;
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+ if (!qp) {
+ dev_err(&cq->hwq.pdev->dev, "process_cq RC qp is NULL\n");
+ return -EINVAL;
+ }
+ if (qp->rq.flushed) {
+ dev_dbg(&cq->hwq.pdev->dev,
+ "%s: QP in Flush QP = %p\n", __func__, qp);
+ return 0;
+ }
+
+ cqe = *pcqe;
+ cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
+ cqe->length = le32_to_cpu(hwcqe->length);
+ cqe->invrkey = le32_to_cpu(hwcqe->imm_data_or_inv_r_key);
+ cqe->mr_handle = le64_to_cpu(hwcqe->mr_handle);
+ cqe->flags = le16_to_cpu(hwcqe->flags);
+ cqe->status = hwcqe->status;
+ cqe->qp_handle = (u64)(unsigned long)qp;
+
+ wr_id_idx = le32_to_cpu(hwcqe->srq_or_rq_wr_id) &
+ CQ_RES_RC_SRQ_OR_RQ_WR_ID_MASK;
+ if (cqe->flags & CQ_RES_RC_FLAGS_SRQ_SRQ) {
+ srq = qp->srq;
+ if (!srq)
+ return -EINVAL;
+ if (wr_id_idx >= srq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev,
+ "FP: CQ Process RC wr_id idx 0x%x exceeded SRQ max 0x%x\n",
+ wr_id_idx, srq->hwq.max_elements);
+ return -EINVAL;
+ }
+ cqe->wr_id = srq->swq[wr_id_idx].wr_id;
+ bnxt_qplib_release_srqe(srq, wr_id_idx);
+ cqe++;
+ (*budget)--;
+ *pcqe = cqe;
+ } else {
+ struct bnxt_qplib_swq *swq;
+
+ rq = &qp->rq;
+ if (wr_id_idx > (rq->max_wqe - 1)) {
+ dev_err(&cq->hwq.pdev->dev,
+ "FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n",
+ wr_id_idx, rq->max_wqe);
+ return -EINVAL;
+ }
+ if (wr_id_idx != rq->swq_last)
+ return -EINVAL;
+ swq = &rq->swq[rq->swq_last];
+ cqe->wr_id = swq->wr_id;
+ cqe++;
+ (*budget)--;
+ bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+ swq->slots, &rq->dbinfo.flags);
+ rq->swq_last = swq->next_idx;
+ *pcqe = cqe;
+
+ if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
+ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+ /* Add qp to flush list of the CQ */
+ bnxt_qplib_add_flush_qp(qp);
+ }
+ }
+
+ return 0;
+}
+
+static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
+ struct cq_res_ud *hwcqe,
+ struct bnxt_qplib_cqe **pcqe,
+ int *budget)
+{
+ struct bnxt_qplib_srq *srq;
+ struct bnxt_qplib_cqe *cqe;
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *rq;
+ u32 wr_id_idx;
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+ if (!qp) {
+ dev_err(&cq->hwq.pdev->dev, "process_cq UD qp is NULL\n");
+ return -EINVAL;
+ }
+ if (qp->rq.flushed) {
+ dev_dbg(&cq->hwq.pdev->dev,
+ "%s: QP in Flush QP = %p\n", __func__, qp);
+ return 0;
+ }
+ cqe = *pcqe;
+ cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
+ cqe->length = le16_to_cpu(hwcqe->length) & CQ_RES_UD_LENGTH_MASK;
+ cqe->cfa_meta = le16_to_cpu(hwcqe->cfa_metadata);
+ cqe->invrkey = le32_to_cpu(hwcqe->imm_data);
+ cqe->flags = le16_to_cpu(hwcqe->flags);
+ cqe->status = hwcqe->status;
+ cqe->qp_handle = (u64)(unsigned long)qp;
+ /*FIXME: Endianness fix needed for smace */
+ memcpy(cqe->smac, hwcqe->src_mac, ETH_ALEN);
+ wr_id_idx = le32_to_cpu(hwcqe->src_qp_high_srq_or_rq_wr_id)
+ & CQ_RES_UD_SRQ_OR_RQ_WR_ID_MASK;
+ cqe->src_qp = le16_to_cpu(hwcqe->src_qp_low) |
+ ((le32_to_cpu(
+ hwcqe->src_qp_high_srq_or_rq_wr_id) &
+ CQ_RES_UD_SRC_QP_HIGH_MASK) >> 8);
+
+ if (cqe->flags & CQ_RES_RC_FLAGS_SRQ_SRQ) {
+ srq = qp->srq;
+ if (!srq)
+ return -EINVAL;
+
+ if (wr_id_idx >= srq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev,
+ "FP: CQ Process UD wr_id idx 0x%x exceeded SRQ max 0x%x\n",
+ wr_id_idx, srq->hwq.max_elements);
+ return -EINVAL;
+ }
+ cqe->wr_id = srq->swq[wr_id_idx].wr_id;
+ bnxt_qplib_release_srqe(srq, wr_id_idx);
+ cqe++;
+ (*budget)--;
+ *pcqe = cqe;
+ } else {
+ struct bnxt_qplib_swq *swq;
+
+ rq = &qp->rq;
+ if (wr_id_idx > (rq->max_wqe - 1)) {
+ dev_err(&cq->hwq.pdev->dev,
+ "FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n",
+ wr_id_idx, rq->max_wqe);
+ return -EINVAL;
+ }
+
+ if (rq->swq_last != wr_id_idx)
+ return -EINVAL;
+ swq = &rq->swq[rq->swq_last];
+ cqe->wr_id = swq->wr_id;
+ cqe++;
+ (*budget)--;
+ bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+ swq->slots, &rq->dbinfo.flags);
+ rq->swq_last = swq->next_idx;
+ *pcqe = cqe;
+
+ if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
+ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+ /* Add qp to flush list of the CQ */
+ bnxt_qplib_add_flush_qp(qp);
+ }
+ }
+
+ return 0;
+}
+
+bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
+{
+ struct cq_base *hw_cqe;
+ bool rc = true;
+
+ hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL);
+ /* Check for Valid bit. If the CQE is valid, return false */
+ rc = !CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags);
+ return rc;
+}
+
+static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
+ struct cq_res_raweth_qp1 *hwcqe,
+ struct bnxt_qplib_cqe **pcqe,
+ int *budget)
+{
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *rq;
+ struct bnxt_qplib_srq *srq;
+ struct bnxt_qplib_cqe *cqe;
+ u32 wr_id_idx;
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+ if (!qp) {
+ dev_err(&cq->hwq.pdev->dev, "process_cq Raw/QP1 qp is NULL\n");
+ return -EINVAL;
+ }
+ if (qp->rq.flushed) {
+ dev_dbg(&cq->hwq.pdev->dev,
+ "%s: QP in Flush QP = %p\n", __func__, qp);
+ return 0;
+ }
+ cqe = *pcqe;
+ cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
+ cqe->flags = le16_to_cpu(hwcqe->flags);
+ cqe->qp_handle = (u64)(unsigned long)qp;
+
+ wr_id_idx =
+ le32_to_cpu(hwcqe->raweth_qp1_payload_offset_srq_or_rq_wr_id)
+ & CQ_RES_RAWETH_QP1_SRQ_OR_RQ_WR_ID_MASK;
+ cqe->src_qp = qp->id;
+ if (qp->id == 1 && !cqe->length) {
+ /* Add workaround for the length misdetection */
+ cqe->length = 296;
+ } else {
+ cqe->length = le16_to_cpu(hwcqe->length);
+ }
+ cqe->pkey_index = qp->pkey_index;
+ memcpy(cqe->smac, qp->smac, 6);
+
+ cqe->raweth_qp1_flags = le16_to_cpu(hwcqe->raweth_qp1_flags);
+ cqe->raweth_qp1_flags2 = le32_to_cpu(hwcqe->raweth_qp1_flags2);
+ cqe->raweth_qp1_metadata = le32_to_cpu(hwcqe->raweth_qp1_metadata);
+
+ if (cqe->flags & CQ_RES_RAWETH_QP1_FLAGS_SRQ_SRQ) {
+ srq = qp->srq;
+ if (!srq) {
+ dev_err(&cq->hwq.pdev->dev,
+ "FP: SRQ used but not defined??\n");
+ return -EINVAL;
+ }
+ if (wr_id_idx >= srq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev,
+ "FP: CQ Process Raw/QP1 wr_id idx 0x%x exceeded SRQ max 0x%x\n",
+ wr_id_idx, srq->hwq.max_elements);
+ return -EINVAL;
+ }
+ cqe->wr_id = srq->swq[wr_id_idx].wr_id;
+ bnxt_qplib_release_srqe(srq, wr_id_idx);
+ cqe++;
+ (*budget)--;
+ *pcqe = cqe;
+ } else {
+ struct bnxt_qplib_swq *swq;
+
+ rq = &qp->rq;
+ if (wr_id_idx > (rq->max_wqe - 1)) {
+ dev_err(&cq->hwq.pdev->dev,
+ "FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n",
+ wr_id_idx, rq->max_wqe);
+ return -EINVAL;
+ }
+ if (rq->swq_last != wr_id_idx)
+ return -EINVAL;
+ swq = &rq->swq[rq->swq_last];
+ cqe->wr_id = swq->wr_id;
+ cqe++;
+ (*budget)--;
+ bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+ swq->slots, &rq->dbinfo.flags);
+ rq->swq_last = swq->next_idx;
+ *pcqe = cqe;
+
+ if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
+ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+ /* Add qp to flush list of the CQ */
+ bnxt_qplib_add_flush_qp(qp);
+ }
+ }
+
+ return 0;
+}
+
+static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
+ struct cq_terminal *hwcqe,
+ struct bnxt_qplib_cqe **pcqe,
+ int *budget)
+{
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *sq, *rq;
+ struct bnxt_qplib_cqe *cqe;
+ u32 swq_last = 0, cqe_cons;
+ int rc = 0;
+
+ /* Check the Status */
+ if (hwcqe->status != CQ_TERMINAL_STATUS_OK)
+ dev_warn(&cq->hwq.pdev->dev,
+ "FP: CQ Process Terminal Error status = 0x%x\n",
+ hwcqe->status);
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+ if (!qp)
+ return -EINVAL;
+
+ /* Must block new posting of SQ and RQ */
+ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+
+ sq = &qp->sq;
+ rq = &qp->rq;
+
+ cqe_cons = le16_to_cpu(hwcqe->sq_cons_idx);
+ if (cqe_cons == 0xFFFF)
+ goto do_rq;
+ cqe_cons %= sq->max_sw_wqe;
+
+ if (qp->sq.flushed) {
+ dev_dbg(&cq->hwq.pdev->dev,
+ "%s: QP in Flush QP = %p\n", __func__, qp);
+ goto sq_done;
+ }
+
+ /* Terminal CQE can also include aggregated successful CQEs prior.
+ * So we must complete all CQEs from the current sq's cons to the
+ * cq_cons with status OK
+ */
+ cqe = *pcqe;
+ while (*budget) {
+ swq_last = sq->swq_last;
+ if (swq_last == cqe_cons)
+ break;
+ if (sq->swq[swq_last].flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
+ memset(cqe, 0, sizeof(*cqe));
+ cqe->status = CQ_REQ_STATUS_OK;
+ cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
+ cqe->qp_handle = (u64)(unsigned long)qp;
+ cqe->src_qp = qp->id;
+ cqe->wr_id = sq->swq[swq_last].wr_id;
+ cqe->type = sq->swq[swq_last].type;
+ cqe++;
+ (*budget)--;
+ }
+ bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons,
+ sq->swq[swq_last].slots, &sq->dbinfo.flags);
+ sq->swq_last = sq->swq[swq_last].next_idx;
+ }
+ *pcqe = cqe;
+ if (!(*budget) && swq_last != cqe_cons) {
+ /* Out of budget */
+ rc = -EAGAIN;
+ goto sq_done;
+ }
+sq_done:
+ if (rc)
+ return rc;
+do_rq:
+ cqe_cons = le16_to_cpu(hwcqe->rq_cons_idx);
+ if (cqe_cons == 0xFFFF) {
+ goto done;
+ } else if (cqe_cons > rq->max_wqe - 1) {
+ dev_err(&cq->hwq.pdev->dev,
+ "FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n",
+ cqe_cons, rq->max_wqe);
+ rc = -EINVAL;
+ goto done;
+ }
+
+ if (qp->rq.flushed) {
+ dev_dbg(&cq->hwq.pdev->dev,
+ "%s: QP in Flush QP = %p\n", __func__, qp);
+ rc = 0;
+ goto done;
+ }
+
+ /* Terminal CQE requires all posted RQEs to complete with FLUSHED_ERR
+ * from the current rq->cons to the rq->prod regardless what the
+ * rq->cons the terminal CQE indicates
+ */
+
+ /* Add qp to flush list of the CQ */
+ bnxt_qplib_add_flush_qp(qp);
+done:
+ return rc;
+}
+
+static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq,
+ struct cq_cutoff *hwcqe)
+{
+ /* Check the Status */
+ if (hwcqe->status != CQ_CUTOFF_STATUS_OK) {
+ dev_err(&cq->hwq.pdev->dev,
+ "FP: CQ Process Cutoff Error status = 0x%x\n",
+ hwcqe->status);
+ return -EINVAL;
+ }
+ clear_bit(CQ_FLAGS_RESIZE_IN_PROG, &cq->flags);
+ wake_up_interruptible(&cq->waitq);
+
+ return 0;
+}
+
+int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
+ struct bnxt_qplib_cqe *cqe,
+ int num_cqes)
+{
+ struct bnxt_qplib_qp *qp = NULL;
+ u32 budget = num_cqes;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->flush_lock, flags);
+ list_for_each_entry(qp, &cq->sqf_head, sq_flush) {
+ dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing SQ QP= %p\n", qp);
+ __flush_sq(&qp->sq, qp, &cqe, &budget);
+ }
+
+ list_for_each_entry(qp, &cq->rqf_head, rq_flush) {
+ dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing RQ QP= %p\n", qp);
+ __flush_rq(&qp->rq, qp, &cqe, &budget);
+ }
+ spin_unlock_irqrestore(&cq->flush_lock, flags);
+
+ return num_cqes - budget;
+}
+
+int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
+ int num_cqes, struct bnxt_qplib_qp **lib_qp)
+{
+ struct cq_base *hw_cqe;
+ int budget, rc = 0;
+ u32 hw_polled = 0;
+ u8 type;
+
+ budget = num_cqes;
+
+ while (budget) {
+ hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL);
+
+ /* Check for Valid bit */
+ if (!CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags))
+ break;
+
+ /*
+ * The valid test of the entry must be done first before
+ * reading any further.
+ */
+ dma_rmb();
+ /* From the device's respective CQE format to qplib_wc*/
+ type = hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
+ switch (type) {
+ case CQ_BASE_CQE_TYPE_REQ:
+ rc = bnxt_qplib_cq_process_req(cq,
+ (struct cq_req *)hw_cqe,
+ &cqe, &budget,
+ cq->hwq.cons, lib_qp);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_RC:
+ rc = bnxt_qplib_cq_process_res_rc(cq,
+ (struct cq_res_rc *)
+ hw_cqe, &cqe,
+ &budget);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_UD:
+ rc = bnxt_qplib_cq_process_res_ud
+ (cq, (struct cq_res_ud *)hw_cqe, &cqe,
+ &budget);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_RAWETH_QP1:
+ rc = bnxt_qplib_cq_process_res_raweth_qp1
+ (cq, (struct cq_res_raweth_qp1 *)
+ hw_cqe, &cqe, &budget);
+ break;
+ case CQ_BASE_CQE_TYPE_TERMINAL:
+ rc = bnxt_qplib_cq_process_terminal
+ (cq, (struct cq_terminal *)hw_cqe,
+ &cqe, &budget);
+ break;
+ case CQ_BASE_CQE_TYPE_CUT_OFF:
+ bnxt_qplib_cq_process_cutoff
+ (cq, (struct cq_cutoff *)hw_cqe);
+ /* Done processing this CQ */
+ goto exit;
+ default:
+ dev_err(&cq->hwq.pdev->dev,
+ "process_cq unknown type 0x%lx\n",
+ hw_cqe->cqe_type_toggle &
+ CQ_BASE_CQE_TYPE_MASK);
+ rc = -EINVAL;
+ break;
+ }
+ if (rc < 0) {
+ if (rc == -EAGAIN)
+ break;
+ /* Error while processing the CQE, just skip to the
+ * next one
+ */
+ if (type != CQ_BASE_CQE_TYPE_TERMINAL)
+ dev_err(&cq->hwq.pdev->dev,
+ "process_cqe error rc = 0x%x\n", rc);
+ }
+ hw_polled++;
+ bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements, &cq->hwq.cons,
+ 1, &cq->dbinfo.flags);
+
+ }
+ if (hw_polled)
+ bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ);
+exit:
+ return num_cqes - budget;
+}
+
+void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
+{
+ cq->dbinfo.toggle = cq->toggle;
+ if (arm_type)
+ bnxt_qplib_ring_db(&cq->dbinfo, arm_type);
+ /* Using cq->arm_state variable to track whether to issue cq handler */
+ atomic_set(&cq->arm_state, 1);
+}
+
+void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp)
+{
+ flush_workqueue(qp->scq->nq->cqn_wq);
+ if (qp->scq != qp->rcq)
+ flush_workqueue(qp->rcq->nq->cqn_wq);
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
new file mode 100644
index 000000000000..1b414a73b46d
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -0,0 +1,689 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Fast Path Operators (header)
+ */
+
+#ifndef __BNXT_QPLIB_FP_H__
+#define __BNXT_QPLIB_FP_H__
+
+#include <rdma/bnxt_re-abi.h>
+
+/* Few helper structures temporarily defined here
+ * should get rid of these when roce_hsi.h is updated
+ * in original code base
+ */
+struct sq_ud_ext_hdr {
+ __le32 dst_qp;
+ __le32 avid;
+ __le64 rsvd;
+};
+
+struct sq_raw_ext_hdr {
+ __le32 cfa_meta;
+ __le32 rsvd0;
+ __le64 rsvd1;
+};
+
+struct sq_rdma_ext_hdr {
+ __le64 remote_va;
+ __le32 remote_key;
+ __le32 rsvd;
+};
+
+struct sq_atomic_ext_hdr {
+ __le64 swap_data;
+ __le64 cmp_data;
+};
+
+struct sq_fr_pmr_ext_hdr {
+ __le64 pblptr;
+ __le64 va;
+};
+
+struct sq_bind_ext_hdr {
+ __le64 va;
+ __le32 length_lo;
+ __le32 length_hi;
+};
+
+struct rq_ext_hdr {
+ __le64 rsvd1;
+ __le64 rsvd2;
+};
+
+/* Helper structures end */
+
+struct bnxt_qplib_srq {
+ struct bnxt_qplib_pd *pd;
+ struct bnxt_qplib_dpi *dpi;
+ struct bnxt_qplib_db_info dbinfo;
+ u64 srq_handle;
+ u32 id;
+ u16 wqe_size;
+ u32 max_wqe;
+ u32 max_sge;
+ u32 threshold;
+ bool arm_req;
+ struct bnxt_qplib_cq *cq;
+ struct bnxt_qplib_hwq hwq;
+ struct bnxt_qplib_swq *swq;
+ int start_idx;
+ int last_idx;
+ struct bnxt_qplib_sg_info sg_info;
+ u16 eventq_hw_ring_id;
+ spinlock_t lock; /* protect SRQE link list */
+ u8 toggle;
+};
+
+struct bnxt_qplib_sge {
+ u64 addr;
+ u32 lkey;
+ u32 size;
+};
+
+struct bnxt_qplib_swq {
+ u64 wr_id;
+ int next_idx;
+ u8 type;
+ u8 flags;
+ u32 start_psn;
+ u32 next_psn;
+ u32 slot_idx;
+ u8 slots;
+ struct sq_psn_search *psn_search;
+ struct sq_psn_search_ext *psn_ext;
+};
+
+struct bnxt_qplib_swqe {
+ /* General */
+#define BNXT_QPLIB_FENCE_WRID 0x46454E43 /* "FENC" */
+ u64 wr_id;
+ u8 reqs_type;
+ u8 type;
+#define BNXT_QPLIB_SWQE_TYPE_SEND 0
+#define BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM 1
+#define BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV 2
+#define BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE 4
+#define BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM 5
+#define BNXT_QPLIB_SWQE_TYPE_RDMA_READ 6
+#define BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP 8
+#define BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD 11
+#define BNXT_QPLIB_SWQE_TYPE_LOCAL_INV 12
+#define BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR 13
+#define BNXT_QPLIB_SWQE_TYPE_REG_MR 13
+#define BNXT_QPLIB_SWQE_TYPE_BIND_MW 14
+#define BNXT_QPLIB_SWQE_TYPE_RECV 128
+#define BNXT_QPLIB_SWQE_TYPE_RECV_RDMA_IMM 129
+ u8 flags;
+#define BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP BIT(0)
+#define BNXT_QPLIB_SWQE_FLAGS_RD_ATOMIC_FENCE BIT(1)
+#define BNXT_QPLIB_SWQE_FLAGS_UC_FENCE BIT(2)
+#define BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT BIT(3)
+#define BNXT_QPLIB_SWQE_FLAGS_INLINE BIT(4)
+ struct bnxt_qplib_sge sg_list[BNXT_VAR_MAX_SGE];
+ int num_sge;
+ /* Max inline data is 96 bytes */
+ u32 inline_len;
+#define BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH 96
+ u8 inline_data[BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH];
+
+ union {
+ /* Send, with imm, inval key */
+ struct {
+ union {
+ u32 imm_data;
+ u32 inv_key;
+ };
+ u32 q_key;
+ u32 dst_qp;
+ u32 avid;
+ } send;
+
+ /* Send Raw Ethernet and QP1 */
+ struct {
+ u16 lflags;
+ u16 cfa_action;
+ u32 cfa_meta;
+ } rawqp1;
+
+ /* RDMA write, with imm, read */
+ struct {
+ union {
+ u32 imm_data;
+ u32 inv_key;
+ };
+ u64 remote_va;
+ u32 r_key;
+ } rdma;
+
+ /* Atomic cmp/swap, fetch/add */
+ struct {
+ u64 remote_va;
+ u32 r_key;
+ u64 swap_data;
+ u64 cmp_data;
+ } atomic;
+
+ /* Local Invalidate */
+ struct {
+ u32 inv_l_key;
+ } local_inv;
+
+ /* FR-PMR */
+ struct {
+ u8 access_cntl;
+ u8 pg_sz_log;
+ bool zero_based;
+ u32 l_key;
+ u32 length;
+ u8 pbl_pg_sz_log;
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_4K 0
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_8K 1
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_64K 4
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_256K 6
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_1M 8
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_2M 9
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_4M 10
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_1G 18
+ u8 levels;
+#define PAGE_SHIFT_4K 12
+ __le64 *pbl_ptr;
+ dma_addr_t pbl_dma_ptr;
+ u64 *page_list;
+ u16 page_list_len;
+ u64 va;
+ } frmr;
+
+ /* Bind */
+ struct {
+ u8 access_cntl;
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_LOCAL_WRITE BIT(0)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_REMOTE_READ BIT(1)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_REMOTE_WRITE BIT(2)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_REMOTE_ATOMIC BIT(3)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_WINDOW_BIND BIT(4)
+ bool zero_based;
+ u8 mw_type;
+ u32 parent_l_key;
+ u32 r_key;
+ u64 va;
+ u32 length;
+ } bind;
+ };
+};
+
+struct bnxt_qplib_q {
+ struct bnxt_qplib_hwq hwq;
+ struct bnxt_qplib_swq *swq;
+ struct bnxt_qplib_db_info dbinfo;
+ struct bnxt_qplib_sg_info sg_info;
+ u32 max_wqe;
+ u32 max_sw_wqe;
+ u16 wqe_size;
+ u16 q_full_delta;
+ u16 max_sge;
+ u32 psn;
+ bool condition;
+ bool single;
+ bool send_phantom;
+ u32 phantom_wqe_cnt;
+ u32 phantom_cqe_cnt;
+ u32 next_cq_cons;
+ bool flushed;
+ u32 swq_start;
+ u32 swq_last;
+};
+
+struct bnxt_qplib_qp {
+ struct bnxt_qplib_pd *pd;
+ struct bnxt_qplib_dpi *dpi;
+ struct bnxt_qplib_chip_ctx *cctx;
+ u64 qp_handle;
+#define BNXT_QPLIB_QP_ID_INVALID 0xFFFFFFFF
+ u32 id;
+ u8 type;
+ u8 sig_type;
+ u8 wqe_mode;
+ u8 state;
+ u8 cur_qp_state;
+ u64 modify_flags;
+ u32 max_inline_data;
+ u32 mtu;
+ u8 path_mtu;
+ bool en_sqd_async_notify;
+ u16 pkey_index;
+ u32 qkey;
+ u32 dest_qp_id;
+ u8 access;
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+ u64 wqe_cnt;
+ u32 min_rnr_timer;
+ u32 max_rd_atomic;
+ u32 max_dest_rd_atomic;
+ u32 dest_qpn;
+ u8 smac[6];
+ u16 vlan_id;
+ u16 port_id;
+ u16 udp_sport;
+ u8 nw_type;
+ struct bnxt_qplib_ah ah;
+
+#define BTH_PSN_MASK ((1 << 24) - 1)
+ /* SQ */
+ struct bnxt_qplib_q sq;
+ /* RQ */
+ struct bnxt_qplib_q rq;
+ /* SRQ */
+ struct bnxt_qplib_srq *srq;
+ /* CQ */
+ struct bnxt_qplib_cq *scq;
+ struct bnxt_qplib_cq *rcq;
+ /* IRRQ and ORRQ */
+ struct bnxt_qplib_hwq irrq;
+ struct bnxt_qplib_hwq orrq;
+ /* Header buffer for QP1 */
+ int sq_hdr_buf_size;
+ int rq_hdr_buf_size;
+/*
+ * Buffer space for ETH(14), IP or GRH(40), UDP header(8)
+ * and ib_bth + ib_deth (20).
+ * Max required is 82 when RoCE V2 is enabled
+ */
+#define BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE_V2 86
+ /* Ethernet header = 14 */
+ /* ib_grh = 40 (provided by MAD) */
+ /* ib_bth + ib_deth = 20 */
+ /* MAD = 256 (provided by MAD) */
+ /* iCRC = 4 */
+#define BNXT_QPLIB_MAX_QP1_RQ_ETH_HDR_SIZE 14
+#define BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2 512
+#define BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV4 20
+#define BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6 40
+#define BNXT_QPLIB_MAX_QP1_RQ_BDETH_HDR_SIZE 20
+ void *sq_hdr_buf;
+ dma_addr_t sq_hdr_buf_map;
+ void *rq_hdr_buf;
+ dma_addr_t rq_hdr_buf_map;
+ struct list_head sq_flush;
+ struct list_head rq_flush;
+ u32 msn;
+ u32 msn_tbl_sz;
+ bool is_host_msn_tbl;
+ u8 tos_dscp;
+ u32 ugid_index;
+};
+
+#define BNXT_RE_MAX_MSG_SIZE 0x80000000
+#define BNXT_RE_INVAL_MSG_SIZE 0xFFFFFFFF
+
+#define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base)
+
+#define CQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_CQE_ENTRY_SIZE)
+#define CQE_MAX_IDX_PER_PG (CQE_CNT_PER_PG - 1)
+#define CQE_PG(x) (((x) & ~CQE_MAX_IDX_PER_PG) / CQE_CNT_PER_PG)
+#define CQE_IDX(x) ((x) & CQE_MAX_IDX_PER_PG)
+
+#define ROCE_CQE_CMP_V 0
+#define CQE_CMP_VALID(hdr, pass) \
+ (!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \
+ !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK))
+
+static inline u32 __bnxt_qplib_get_avail(struct bnxt_qplib_hwq *hwq)
+{
+ int cons, prod, avail;
+
+ cons = hwq->cons;
+ prod = hwq->prod;
+ avail = cons - prod;
+ if (cons <= prod)
+ avail += hwq->depth;
+ return avail;
+}
+
+static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que,
+ u8 slots)
+{
+ struct bnxt_qplib_hwq *hwq;
+ int avail;
+
+ hwq = &que->hwq;
+ /* False full is possible, retrying post-send makes sense */
+ avail = hwq->cons - hwq->prod;
+ if (hwq->cons <= hwq->prod)
+ avail += hwq->depth;
+ return avail <= slots;
+}
+
+/* CQ coalescing parameters */
+struct bnxt_qplib_cq_coal_param {
+ u16 buf_maxtime;
+ u8 normal_maxbuf;
+ u8 during_maxbuf;
+ u8 en_ring_idle_mode;
+ u8 enable;
+};
+
+#define BNXT_QPLIB_CQ_COAL_DEF_BUF_MAXTIME 0x1
+#define BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P7 0x8
+#define BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P7 0x8
+#define BNXT_QPLIB_CQ_COAL_DEF_NORMAL_MAXBUF_P5 0x1
+#define BNXT_QPLIB_CQ_COAL_DEF_DURING_MAXBUF_P5 0x1
+#define BNXT_QPLIB_CQ_COAL_DEF_EN_RING_IDLE_MODE 0x1
+#define BNXT_QPLIB_CQ_COAL_MAX_BUF_MAXTIME 0x1bf
+#define BNXT_QPLIB_CQ_COAL_MAX_NORMAL_MAXBUF 0x1f
+#define BNXT_QPLIB_CQ_COAL_MAX_DURING_MAXBUF 0x1f
+#define BNXT_QPLIB_CQ_COAL_MAX_EN_RING_IDLE_MODE 0x1
+
+struct bnxt_qplib_cqe {
+ u8 status;
+ u8 type;
+ u8 opcode;
+ u32 length;
+ u16 cfa_meta;
+ u64 wr_id;
+ union {
+ u32 immdata;
+ u32 invrkey;
+ };
+ u64 qp_handle;
+ u64 mr_handle;
+ u16 flags;
+ u8 smac[6];
+ u32 src_qp;
+ u16 raweth_qp1_flags;
+ u16 raweth_qp1_errors;
+ u16 raweth_qp1_cfa_code;
+ u32 raweth_qp1_flags2;
+ u32 raweth_qp1_metadata;
+ u8 raweth_qp1_payload_offset;
+ u16 pkey_index;
+};
+
+#define BNXT_QPLIB_QUEUE_START_PERIOD 0x01
+struct bnxt_qplib_cq {
+ struct bnxt_qplib_dpi *dpi;
+ struct bnxt_qplib_db_info dbinfo;
+ u32 max_wqe;
+ u32 id;
+ u16 count;
+ u16 period;
+ struct bnxt_qplib_hwq hwq;
+ struct bnxt_qplib_hwq resize_hwq;
+ u32 cnq_hw_ring_id;
+ struct bnxt_qplib_nq *nq;
+ bool resize_in_progress;
+ struct bnxt_qplib_sg_info sg_info;
+ u64 cq_handle;
+ u8 toggle;
+
+#define CQ_RESIZE_WAIT_TIME_MS 500
+ unsigned long flags;
+#define CQ_FLAGS_RESIZE_IN_PROG 1
+ wait_queue_head_t waitq;
+ struct list_head sqf_head, rqf_head;
+ atomic_t arm_state;
+ spinlock_t compl_lock; /* synch CQ handlers */
+/* Locking Notes:
+ * QP can move to error state from modify_qp, async error event or error
+ * CQE as part of poll_cq. When QP is moved to error state, it gets added
+ * to two flush lists, one each for SQ and RQ.
+ * Each flush list is protected by qplib_cq->flush_lock. Both scq and rcq
+ * flush_locks should be acquired when QP is moved to error. The control path
+ * operations(modify_qp and async error events) are synchronized with poll_cq
+ * using upper level CQ locks (bnxt_re_cq->cq_lock) of both SCQ and RCQ.
+ * The qplib_cq->flush_lock is required to synchronize two instances of poll_cq
+ * of the same QP while manipulating the flush list.
+ */
+ spinlock_t flush_lock; /* QP flush management */
+ u16 cnq_events;
+ struct bnxt_qplib_cq_coal_param *coalescing;
+};
+
+#define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE sizeof(struct xrrq_irrq)
+#define BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE sizeof(struct xrrq_orrq)
+#define IRD_LIMIT_TO_IRRQ_SLOTS(x) (2 * (x) + 2)
+#define IRRQ_SLOTS_TO_IRD_LIMIT(s) (((s) >> 1) - 1)
+#define ORD_LIMIT_TO_ORRQ_SLOTS(x) ((x) + 1)
+#define ORRQ_SLOTS_TO_ORD_LIMIT(s) ((s) - 1)
+
+#define BNXT_QPLIB_MAX_NQE_ENTRY_SIZE sizeof(struct nq_base)
+
+#define NQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_NQE_ENTRY_SIZE)
+#define NQE_MAX_IDX_PER_PG (NQE_CNT_PER_PG - 1)
+#define NQE_PG(x) (((x) & ~NQE_MAX_IDX_PER_PG) / NQE_CNT_PER_PG)
+#define NQE_IDX(x) ((x) & NQE_MAX_IDX_PER_PG)
+
+#define NQE_CMP_VALID(hdr, pass) \
+ (!!(le32_to_cpu((hdr)->info63_v[0]) & NQ_BASE_V) == \
+ !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK))
+
+#define BNXT_QPLIB_NQE_MAX_CNT (128 * 1024)
+
+#define NQ_CONS_PCI_BAR_REGION 2
+#define NQ_DB_KEY_CP (0x2 << CMPL_DOORBELL_KEY_SFT)
+#define NQ_DB_IDX_VALID CMPL_DOORBELL_IDX_VALID
+#define NQ_DB_IRQ_DIS CMPL_DOORBELL_MASK
+#define NQ_DB_CP_FLAGS_REARM (NQ_DB_KEY_CP | \
+ NQ_DB_IDX_VALID)
+#define NQ_DB_CP_FLAGS (NQ_DB_KEY_CP | \
+ NQ_DB_IDX_VALID | \
+ NQ_DB_IRQ_DIS)
+
+struct bnxt_qplib_nq_db {
+ struct bnxt_qplib_reg_desc reg;
+ struct bnxt_qplib_db_info dbinfo;
+};
+
+typedef int (*cqn_handler_t)(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_cq *cq);
+typedef int (*srqn_handler_t)(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_srq *srq, u8 event);
+
+struct bnxt_qplib_nq {
+ struct pci_dev *pdev;
+ struct bnxt_qplib_res *res;
+ char *name;
+ struct bnxt_qplib_hwq hwq;
+ struct bnxt_qplib_nq_db nq_db;
+ u16 ring_id;
+ int msix_vec;
+ cpumask_t mask;
+ struct tasklet_struct nq_tasklet;
+ bool requested;
+ int budget;
+ u32 load;
+
+ cqn_handler_t cqn_handler;
+ srqn_handler_t srqn_handler;
+ struct workqueue_struct *cqn_wq;
+};
+
+struct bnxt_qplib_nq_work {
+ struct work_struct work;
+ struct bnxt_qplib_nq *nq;
+ struct bnxt_qplib_cq *cq;
+};
+
+void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill);
+void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq);
+int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
+ int msix_vector, bool need_init);
+int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
+ int nq_idx, int msix_vector, int bar_reg_offset,
+ cqn_handler_t cqn_handler,
+ srqn_handler_t srq_handler);
+int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_srq *srq);
+int bnxt_qplib_query_srq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_srq *srq);
+void bnxt_qplib_destroy_srq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_srq *srq);
+int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
+ struct bnxt_qplib_swqe *wqe);
+int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+void bnxt_qplib_clean_qp(struct bnxt_qplib_qp *qp);
+void bnxt_qplib_free_qp_res(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_qp *qp);
+void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_sge *sge);
+void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_sge *sge);
+u32 bnxt_qplib_get_rq_prod_index(struct bnxt_qplib_qp *qp);
+dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp,
+ u32 index);
+void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp);
+int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe);
+void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp);
+int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe);
+int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
+int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq,
+ int new_cqes);
+void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cq *cq);
+int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
+int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
+ int num, struct bnxt_qplib_qp **qp);
+bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq);
+void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type);
+void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
+int bnxt_qplib_alloc_nq(struct bnxt_qplib_res *res, struct bnxt_qplib_nq *nq);
+void bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp);
+void bnxt_qplib_acquire_cq_locks(struct bnxt_qplib_qp *qp,
+ unsigned long *flags);
+void bnxt_qplib_release_cq_locks(struct bnxt_qplib_qp *qp,
+ unsigned long *flags);
+int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
+ struct bnxt_qplib_cqe *cqe,
+ int num_cqes);
+void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp);
+void bnxt_re_synchronize_nq(struct bnxt_qplib_nq *nq);
+
+static inline void *bnxt_qplib_get_swqe(struct bnxt_qplib_q *que, u32 *swq_idx)
+{
+ u32 idx;
+
+ idx = que->swq_start;
+ if (swq_idx)
+ *swq_idx = idx;
+ return &que->swq[idx];
+}
+
+static inline void bnxt_qplib_swq_mod_start(struct bnxt_qplib_q *que, u32 idx)
+{
+ que->swq_start = que->swq[idx].next_idx;
+}
+
+static inline u32 bnxt_qplib_get_depth(struct bnxt_qplib_q *que, u8 wqe_mode, bool is_sq)
+{
+ u32 slots;
+
+ /* Queue depth is the number of slots. */
+ slots = (que->wqe_size * que->max_wqe) / sizeof(struct sq_sge);
+ /* For variable WQE mode, need to align the slots to 256 */
+ if (wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE && is_sq)
+ slots = ALIGN(slots, BNXT_VAR_MAX_SLOT_ALIGN);
+ return slots;
+}
+
+static inline u32 bnxt_qplib_set_sq_size(struct bnxt_qplib_q *que, u8 wqe_mode)
+{
+ return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ que->max_wqe : bnxt_qplib_get_depth(que, wqe_mode, true);
+}
+
+static inline u32 bnxt_qplib_set_sq_max_slot(u8 wqe_mode)
+{
+ return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ sizeof(struct sq_send) / sizeof(struct sq_sge) : 1;
+}
+
+static inline u32 bnxt_qplib_set_rq_max_slot(u32 wqe_size)
+{
+ return (wqe_size / sizeof(struct sq_sge));
+}
+
+static inline u16 __xlate_qfd(u16 delta, u16 wqe_bytes)
+{
+ /* For Cu/Wh delta = 128, stride = 16, wqe_bytes = 128
+ * For Gen-p5 B/C mode delta = 0, stride = 16, wqe_bytes = 128.
+ * For Gen-p5 delta = 0, stride = 16, 32 <= wqe_bytes <= 512.
+ * when 8916 is disabled.
+ */
+ return (delta * wqe_bytes) / sizeof(struct sq_sge);
+}
+
+static inline u16 bnxt_qplib_calc_ilsize(struct bnxt_qplib_swqe *wqe, u16 max)
+{
+ u16 size = 0;
+ int indx;
+
+ for (indx = 0; indx < wqe->num_sge; indx++)
+ size += wqe->sg_list[indx].size;
+ if (size > max)
+ size = max;
+
+ return size;
+}
+
+/* MSN table update inlin */
+static inline __le64 bnxt_re_update_msn_tbl(u32 st_idx, u32 npsn, u32 start_psn)
+{
+ return cpu_to_le64((((u64)(st_idx) << SQ_MSN_SEARCH_START_IDX_SFT) &
+ SQ_MSN_SEARCH_START_IDX_MASK) |
+ (((u64)(npsn) << SQ_MSN_SEARCH_NEXT_PSN_SFT) &
+ SQ_MSN_SEARCH_NEXT_PSN_MASK) |
+ (((start_psn) << SQ_MSN_SEARCH_START_PSN_SFT) &
+ SQ_MSN_SEARCH_START_PSN_MASK));
+}
+
+static inline bool __is_var_wqe(struct bnxt_qplib_qp *qp)
+{
+ return (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE);
+}
+
+static inline bool __is_err_cqe_for_var_wqe(struct bnxt_qplib_qp *qp, u8 status)
+{
+ return (status != CQ_REQ_STATUS_OK) && __is_var_wqe(qp);
+}
+#endif /* __BNXT_QPLIB_FP_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
new file mode 100644
index 000000000000..295a9610f3e6
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -0,0 +1,1202 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: RDMA Controller HW interface
+ */
+
+#define dev_fmt(fmt) "QPLIB: " fmt
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/prefetch.h>
+#include <linux/delay.h>
+
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_rcfw.h"
+#include "qplib_sp.h"
+#include "qplib_fp.h"
+#include "qplib_tlv.h"
+
+static void bnxt_qplib_service_creq(struct tasklet_struct *t);
+
+/**
+ * bnxt_qplib_map_rc - map return type based on opcode
+ * @opcode: roce slow path opcode
+ *
+ * case #1
+ * Firmware initiated error recovery is a safe state machine and
+ * driver can consider all the underlying rdma resources are free.
+ * In this state, it is safe to return success for opcodes related to
+ * destroying rdma resources (like destroy qp, destroy cq etc.).
+ *
+ * case #2
+ * If driver detect potential firmware stall, it is not safe state machine
+ * and the driver can not consider all the underlying rdma resources are
+ * freed.
+ * In this state, it is not safe to return success for opcodes related to
+ * destroying rdma resources (like destroy qp, destroy cq etc.).
+ *
+ * Scope of this helper function is only for case #1.
+ *
+ * Returns:
+ * 0 to communicate success to caller.
+ * Non zero error code to communicate failure to caller.
+ */
+static int bnxt_qplib_map_rc(u8 opcode)
+{
+ switch (opcode) {
+ case CMDQ_BASE_OPCODE_DESTROY_QP:
+ case CMDQ_BASE_OPCODE_DESTROY_SRQ:
+ case CMDQ_BASE_OPCODE_DESTROY_CQ:
+ case CMDQ_BASE_OPCODE_DEALLOCATE_KEY:
+ case CMDQ_BASE_OPCODE_DEREGISTER_MR:
+ case CMDQ_BASE_OPCODE_DELETE_GID:
+ case CMDQ_BASE_OPCODE_DESTROY_QP1:
+ case CMDQ_BASE_OPCODE_DESTROY_AH:
+ case CMDQ_BASE_OPCODE_DEINITIALIZE_FW:
+ case CMDQ_BASE_OPCODE_MODIFY_ROCE_CC:
+ case CMDQ_BASE_OPCODE_SET_LINK_AGGR_MODE:
+ return 0;
+ default:
+ return -ETIMEDOUT;
+ }
+}
+
+/**
+ * bnxt_re_is_fw_stalled - Check firmware health
+ * @rcfw: rcfw channel instance of rdev
+ * @cookie: cookie to track the command
+ *
+ * If firmware has not responded any rcfw command within
+ * rcfw->max_timeout, consider firmware as stalled.
+ *
+ * Returns:
+ * 0 if firmware is responding
+ * -ENODEV if firmware is not responding
+ */
+static int bnxt_re_is_fw_stalled(struct bnxt_qplib_rcfw *rcfw,
+ u16 cookie)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+ struct bnxt_qplib_crsqe *crsqe;
+
+ crsqe = &rcfw->crsqe_tbl[cookie];
+ cmdq = &rcfw->cmdq;
+
+ if (time_after(jiffies, cmdq->last_seen +
+ (rcfw->max_timeout * HZ))) {
+ dev_warn_ratelimited(&rcfw->pdev->dev,
+ "%s: FW STALL Detected. cmdq[%#x]=%#x waited (%d > %d) msec active %d ",
+ __func__, cookie, crsqe->opcode,
+ jiffies_to_msecs(jiffies - cmdq->last_seen),
+ rcfw->max_timeout * 1000,
+ crsqe->is_in_used);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+/**
+ * __wait_for_resp - Don't hold the cpu context and wait for response
+ * @rcfw: rcfw channel instance of rdev
+ * @cookie: cookie to track the command
+ *
+ * Wait for command completion in sleepable context.
+ *
+ * Returns:
+ * 0 if command is completed by firmware.
+ * Non zero error code for rest of the case.
+ */
+static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+ struct bnxt_qplib_crsqe *crsqe;
+ int ret;
+
+ cmdq = &rcfw->cmdq;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ do {
+ if (test_bit(ERR_DEVICE_DETACHED, &cmdq->flags))
+ return bnxt_qplib_map_rc(crsqe->opcode);
+ if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
+ return -ETIMEDOUT;
+
+ wait_event_timeout(cmdq->waitq,
+ !crsqe->is_in_used ||
+ test_bit(ERR_DEVICE_DETACHED, &cmdq->flags),
+ secs_to_jiffies(rcfw->max_timeout));
+
+ if (!crsqe->is_in_used)
+ return 0;
+
+ bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet);
+
+ if (!crsqe->is_in_used)
+ return 0;
+
+ ret = bnxt_re_is_fw_stalled(rcfw, cookie);
+ if (ret)
+ return ret;
+
+ } while (true);
+};
+
+/**
+ * __block_for_resp - hold the cpu context and wait for response
+ * @rcfw: rcfw channel instance of rdev
+ * @cookie: cookie to track the command
+ *
+ * This function will hold the cpu (non-sleepable context) and
+ * wait for command completion. Maximum holding interval is 8 second.
+ *
+ * Returns:
+ * -ETIMEDOUT if command is not completed in specific time interval.
+ * 0 if command is completed by firmware.
+ */
+static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq;
+ struct bnxt_qplib_crsqe *crsqe;
+ unsigned long issue_time = 0;
+
+ issue_time = jiffies;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ do {
+ if (test_bit(ERR_DEVICE_DETACHED, &cmdq->flags))
+ return bnxt_qplib_map_rc(crsqe->opcode);
+ if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
+ return -ETIMEDOUT;
+
+ udelay(1);
+
+ bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet);
+ if (!crsqe->is_in_used)
+ return 0;
+
+ } while (time_before(jiffies, issue_time + (8 * HZ)));
+
+ return -ETIMEDOUT;
+};
+
+/* __send_message_no_waiter - get cookie and post the message.
+ * @rcfw: rcfw channel instance of rdev
+ * @msg: qplib message internal
+ *
+ * This function will just post and don't bother about completion.
+ * Current design of this function is -
+ * user must hold the completion queue hwq->lock.
+ * user must have used existing completion and free the resources.
+ * this function will not check queue full condition.
+ * this function will explicitly set is_waiter_alive=false.
+ * current use case is - send destroy_ah if create_ah is return
+ * after waiter of create_ah is lost. It can be extended for other
+ * use case as well.
+ *
+ * Returns: Nothing
+ *
+ */
+static void __send_message_no_waiter(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq;
+ struct bnxt_qplib_hwq *hwq = &cmdq->hwq;
+ struct bnxt_qplib_crsqe *crsqe;
+ struct bnxt_qplib_cmdqe *cmdqe;
+ u32 sw_prod, cmdq_prod;
+ u16 cookie;
+ u32 bsize;
+ u8 *preq;
+
+ cookie = cmdq->seq_num & RCFW_MAX_COOKIE_VALUE;
+ __set_cmdq_base_cookie(msg->req, msg->req_sz, cpu_to_le16(cookie));
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ /* Set cmd_size in terms of 16B slots in req. */
+ bsize = bnxt_qplib_set_cmd_slots(msg->req);
+ /* GET_CMD_SIZE would return number of slots in either case of tlv
+ * and non-tlv commands after call to bnxt_qplib_set_cmd_slots()
+ */
+ crsqe->is_internal_cmd = true;
+ crsqe->is_waiter_alive = false;
+ crsqe->is_in_used = true;
+ crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz);
+
+ preq = (u8 *)msg->req;
+ do {
+ /* Locate the next cmdq slot */
+ sw_prod = HWQ_CMP(hwq->prod, hwq);
+ cmdqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL);
+ /* Copy a segment of the req cmd to the cmdq */
+ memset(cmdqe, 0, sizeof(*cmdqe));
+ memcpy(cmdqe, preq, min_t(u32, bsize, sizeof(*cmdqe)));
+ preq += min_t(u32, bsize, sizeof(*cmdqe));
+ bsize -= min_t(u32, bsize, sizeof(*cmdqe));
+ hwq->prod++;
+ } while (bsize > 0);
+ cmdq->seq_num++;
+
+ cmdq_prod = hwq->prod;
+ atomic_inc(&rcfw->timeout_send);
+ /* ring CMDQ DB */
+ wmb();
+ writel(cmdq_prod, cmdq->cmdq_mbox.prod);
+ writel(RCFW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db);
+}
+
+static int __send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg, u8 opcode)
+{
+ u32 bsize, free_slots, required_slots;
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+ struct bnxt_qplib_crsqe *crsqe;
+ struct bnxt_qplib_cmdqe *cmdqe;
+ struct bnxt_qplib_hwq *hwq;
+ u32 sw_prod, cmdq_prod;
+ struct pci_dev *pdev;
+ u16 cookie;
+ u8 *preq;
+
+ cmdq = &rcfw->cmdq;
+ hwq = &cmdq->hwq;
+ pdev = rcfw->pdev;
+
+ /* Cmdq are in 16-byte units, each request can consume 1 or more
+ * cmdqe
+ */
+ spin_lock_bh(&hwq->lock);
+ required_slots = bnxt_qplib_get_cmd_slots(msg->req);
+ free_slots = HWQ_FREE_SLOTS(hwq);
+ cookie = cmdq->seq_num & RCFW_MAX_COOKIE_VALUE;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ if (required_slots >= free_slots) {
+ dev_info_ratelimited(&pdev->dev,
+ "CMDQ is full req/free %d/%d!",
+ required_slots, free_slots);
+ spin_unlock_bh(&hwq->lock);
+ return -EAGAIN;
+ }
+ if (msg->block)
+ cookie |= RCFW_CMD_IS_BLOCKING;
+ __set_cmdq_base_cookie(msg->req, msg->req_sz, cpu_to_le16(cookie));
+
+ bsize = bnxt_qplib_set_cmd_slots(msg->req);
+ crsqe->free_slots = free_slots;
+ crsqe->resp = (struct creq_qp_event *)msg->resp;
+ crsqe->resp->cookie = cpu_to_le16(cookie);
+ crsqe->is_internal_cmd = false;
+ crsqe->is_waiter_alive = true;
+ crsqe->is_in_used = true;
+ crsqe->opcode = opcode;
+
+ crsqe->req_size = __get_cmdq_base_cmd_size(msg->req, msg->req_sz);
+ if (__get_cmdq_base_resp_size(msg->req, msg->req_sz) && msg->sb) {
+ struct bnxt_qplib_rcfw_sbuf *sbuf = msg->sb;
+
+ __set_cmdq_base_resp_addr(msg->req, msg->req_sz,
+ cpu_to_le64(sbuf->dma_addr));
+ __set_cmdq_base_resp_size(msg->req, msg->req_sz,
+ ALIGN(sbuf->size,
+ BNXT_QPLIB_CMDQE_UNITS) /
+ BNXT_QPLIB_CMDQE_UNITS);
+ }
+
+ preq = (u8 *)msg->req;
+ do {
+ /* Locate the next cmdq slot */
+ sw_prod = HWQ_CMP(hwq->prod, hwq);
+ cmdqe = bnxt_qplib_get_qe(hwq, sw_prod, NULL);
+ /* Copy a segment of the req cmd to the cmdq */
+ memset(cmdqe, 0, sizeof(*cmdqe));
+ memcpy(cmdqe, preq, min_t(u32, bsize, sizeof(*cmdqe)));
+ preq += min_t(u32, bsize, sizeof(*cmdqe));
+ bsize -= min_t(u32, bsize, sizeof(*cmdqe));
+ hwq->prod++;
+ } while (bsize > 0);
+ cmdq->seq_num++;
+
+ cmdq_prod = hwq->prod & 0xFFFF;
+ if (test_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags)) {
+ /* The very first doorbell write
+ * is required to set this flag
+ * which prompts the FW to reset
+ * its internal pointers
+ */
+ cmdq_prod |= BIT(FIRMWARE_FIRST_FLAG);
+ clear_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags);
+ }
+ /* ring CMDQ DB */
+ wmb();
+ writel(cmdq_prod, cmdq->cmdq_mbox.prod);
+ writel(RCFW_CMDQ_TRIG_VAL, cmdq->cmdq_mbox.db);
+ print_hex_dump_bytes("req: ", DUMP_PREFIX_OFFSET, msg->req, msg->req_sz);
+ spin_unlock_bh(&hwq->lock);
+ /* Return the CREQ response pointer */
+ return 0;
+}
+
+/**
+ * __poll_for_resp - self poll completion for rcfw command
+ * @rcfw: rcfw channel instance of rdev
+ * @cookie: cookie to track the command
+ *
+ * It works same as __wait_for_resp except this function will
+ * do self polling in sort interval since interrupt is disabled.
+ * This function can not be called from non-sleepable context.
+ *
+ * Returns:
+ * -ETIMEDOUT if command is not completed in specific time interval.
+ * 0 if command is completed by firmware.
+ */
+static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq;
+ struct bnxt_qplib_crsqe *crsqe;
+ unsigned long issue_time;
+ int ret;
+
+ issue_time = jiffies;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ do {
+ if (test_bit(ERR_DEVICE_DETACHED, &cmdq->flags))
+ return bnxt_qplib_map_rc(crsqe->opcode);
+ if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
+ return -ETIMEDOUT;
+
+ usleep_range(1000, 1001);
+
+ bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet);
+ if (!crsqe->is_in_used)
+ return 0;
+ if (jiffies_to_msecs(jiffies - issue_time) >
+ (rcfw->max_timeout * 1000)) {
+ ret = bnxt_re_is_fw_stalled(rcfw, cookie);
+ if (ret)
+ return ret;
+ }
+ } while (true);
+};
+
+static int __send_message_basic_sanity(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg,
+ u8 opcode)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+
+ cmdq = &rcfw->cmdq;
+
+ /* Prevent posting if f/w is not in a state to process */
+ if (test_bit(ERR_DEVICE_DETACHED, &rcfw->cmdq.flags))
+ return -ENXIO;
+
+ if (test_bit(FIRMWARE_STALL_DETECTED, &cmdq->flags))
+ return -ETIMEDOUT;
+
+ if (test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
+ opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
+ return -EINVAL;
+ }
+
+ if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &cmdq->flags) &&
+ (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
+ opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
+ opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: RCFW not initialized, reject opcode 0x%x",
+ opcode);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+/* This function will just post and do not bother about completion */
+static void __destroy_timedout_ah(struct bnxt_qplib_rcfw *rcfw,
+ struct creq_create_ah_resp *create_ah_resp)
+{
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_destroy_ah req = {};
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DESTROY_AH,
+ sizeof(req));
+ req.ah_cid = create_ah_resp->xid;
+ msg.req = (struct cmdq_base *)&req;
+ msg.req_sz = sizeof(req);
+ __send_message_no_waiter(rcfw, &msg);
+ dev_info_ratelimited(&rcfw->pdev->dev,
+ "From %s: ah_cid = %d timeout_send %d\n",
+ __func__, req.ah_cid,
+ atomic_read(&rcfw->timeout_send));
+}
+
+/**
+ * __bnxt_qplib_rcfw_send_message - qplib interface to send
+ * and complete rcfw command.
+ * @rcfw: rcfw channel instance of rdev
+ * @msg: qplib message internal
+ *
+ * This function does not account shadow queue depth. It will send
+ * all the command unconditionally as long as send queue is not full.
+ *
+ * Returns:
+ * 0 if command completed by firmware.
+ * Non zero if the command is not completed by firmware.
+ */
+static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg)
+{
+ struct creq_qp_event *evnt = (struct creq_qp_event *)msg->resp;
+ struct bnxt_qplib_crsqe *crsqe;
+ u16 cookie;
+ int rc;
+ u8 opcode;
+
+ opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz);
+
+ rc = __send_message_basic_sanity(rcfw, msg, opcode);
+ if (rc)
+ return rc == -ENXIO ? bnxt_qplib_map_rc(opcode) : rc;
+
+ rc = __send_message(rcfw, msg, opcode);
+ if (rc)
+ return rc;
+
+ cookie = le16_to_cpu(__get_cmdq_base_cookie(msg->req, msg->req_sz))
+ & RCFW_MAX_COOKIE_VALUE;
+
+ if (msg->block)
+ rc = __block_for_resp(rcfw, cookie);
+ else if (atomic_read(&rcfw->rcfw_intr_enabled))
+ rc = __wait_for_resp(rcfw, cookie);
+ else
+ rc = __poll_for_resp(rcfw, cookie);
+
+ if (rc) {
+ spin_lock_bh(&rcfw->cmdq.hwq.lock);
+ crsqe = &rcfw->crsqe_tbl[cookie];
+ crsqe->is_waiter_alive = false;
+ if (rc == -ENODEV)
+ set_bit(FIRMWARE_STALL_DETECTED, &rcfw->cmdq.flags);
+ spin_unlock_bh(&rcfw->cmdq.hwq.lock);
+ return -ETIMEDOUT;
+ }
+
+ if (evnt->status) {
+ /* failed with status */
+ dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n",
+ cookie, opcode, evnt->status);
+ rc = -EIO;
+ }
+
+ return rc;
+}
+
+/**
+ * bnxt_qplib_rcfw_send_message - qplib interface to send
+ * and complete rcfw command.
+ * @rcfw: rcfw channel instance of rdev
+ * @msg: qplib message internal
+ *
+ * Driver interact with Firmware through rcfw channel/slow path in two ways.
+ * a. Blocking rcfw command send. In this path, driver cannot hold
+ * the context for longer period since it is holding cpu until
+ * command is not completed.
+ * b. Non-blocking rcfw command send. In this path, driver can hold the
+ * context for longer period. There may be many pending command waiting
+ * for completion because of non-blocking nature.
+ *
+ * Driver will use shadow queue depth. Current queue depth of 8K
+ * (due to size of rcfw message there can be actual ~4K rcfw outstanding)
+ * is not optimal for rcfw command processing in firmware.
+ *
+ * Restrict at max #RCFW_CMD_NON_BLOCKING_SHADOW_QD Non-Blocking rcfw commands.
+ * Allow all blocking commands until there is no queue full.
+ *
+ * Returns:
+ * 0 if command completed by firmware.
+ * Non zero if the command is not completed by firmware.
+ */
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg)
+{
+ int ret;
+
+ if (!msg->block) {
+ down(&rcfw->rcfw_inflight);
+ ret = __bnxt_qplib_rcfw_send_message(rcfw, msg);
+ up(&rcfw->rcfw_inflight);
+ } else {
+ ret = __bnxt_qplib_rcfw_send_message(rcfw, msg);
+ }
+
+ return ret;
+}
+
+/* Completions */
+static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
+ struct creq_func_event *func_event)
+{
+ int rc;
+
+ switch (func_event->event) {
+ case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
+ /* SRQ ctx error, call srq_handler??
+ * But there's no SRQ handle!
+ */
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_VF_COMM_REQUEST:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RESOURCE_EXHAUSTED:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ rc = rcfw->creq.aeq_handler(rcfw, (void *)func_event, NULL);
+ return rc;
+}
+
+static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
+ struct creq_qp_event *qp_event,
+ u32 *num_wait)
+{
+ struct creq_qp_error_notification *err_event;
+ struct bnxt_qplib_hwq *hwq = &rcfw->cmdq.hwq;
+ struct bnxt_qplib_crsqe *crsqe;
+ u32 qp_id, tbl_indx, req_size;
+ struct bnxt_qplib_qp *qp;
+ u16 cookie, blocked = 0;
+ bool is_waiter_alive;
+ struct pci_dev *pdev;
+ u32 wait_cmds = 0;
+ int rc = 0;
+
+ pdev = rcfw->pdev;
+ print_hex_dump_bytes("event: ", DUMP_PREFIX_OFFSET, qp_event, sizeof(*qp_event));
+ switch (qp_event->event) {
+ case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
+ err_event = (struct creq_qp_error_notification *)qp_event;
+ qp_id = le32_to_cpu(err_event->xid);
+ spin_lock(&rcfw->tbl_lock);
+ tbl_indx = map_qp_id_to_tbl_indx(qp_id, rcfw);
+ qp = rcfw->qp_tbl[tbl_indx].qp_handle;
+ if (!qp) {
+ spin_unlock(&rcfw->tbl_lock);
+ break;
+ }
+ bnxt_qplib_mark_qp_error(qp);
+ rc = rcfw->creq.aeq_handler(rcfw, qp_event, qp);
+ spin_unlock(&rcfw->tbl_lock);
+ dev_dbg(&pdev->dev, "Received QP error notification\n");
+ dev_dbg(&pdev->dev,
+ "qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
+ qp_id, err_event->req_err_state_reason,
+ err_event->res_err_state_reason);
+ break;
+ default:
+ /*
+ * Command Response
+ * cmdq->lock needs to be acquired to synchronie
+ * the command send and completion reaping. This function
+ * is always called with creq->lock held. Using
+ * the nested variant of spin_lock.
+ *
+ */
+
+ spin_lock_nested(&hwq->lock, SINGLE_DEPTH_NESTING);
+ cookie = le16_to_cpu(qp_event->cookie);
+ blocked = cookie & RCFW_CMD_IS_BLOCKING;
+ cookie &= RCFW_MAX_COOKIE_VALUE;
+ crsqe = &rcfw->crsqe_tbl[cookie];
+
+ if (WARN_ONCE(test_bit(FIRMWARE_STALL_DETECTED,
+ &rcfw->cmdq.flags),
+ "QPLIB: Unreponsive rcfw channel detected.!!")) {
+ dev_info(&pdev->dev,
+ "rcfw timedout: cookie = %#x, free_slots = %d",
+ cookie, crsqe->free_slots);
+ spin_unlock(&hwq->lock);
+ return rc;
+ }
+
+ if (crsqe->is_internal_cmd && !qp_event->status)
+ atomic_dec(&rcfw->timeout_send);
+
+ if (crsqe->is_waiter_alive) {
+ if (crsqe->resp) {
+ memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
+ /* Insert write memory barrier to ensure that
+ * response data is copied before clearing the
+ * flags
+ */
+ smp_wmb();
+ }
+ if (!blocked)
+ wait_cmds++;
+ }
+
+ req_size = crsqe->req_size;
+ is_waiter_alive = crsqe->is_waiter_alive;
+
+ crsqe->req_size = 0;
+ if (!is_waiter_alive)
+ crsqe->resp = NULL;
+
+ crsqe->is_in_used = false;
+
+ hwq->cons += req_size;
+
+ /* This is a case to handle below scenario -
+ * Create AH is completed successfully by firmware,
+ * but completion took more time and driver already lost
+ * the context of create_ah from caller.
+ * We have already return failure for create_ah verbs,
+ * so let's destroy the same address vector since it is
+ * no more used in stack. We don't care about completion
+ * in __send_message_no_waiter.
+ * If destroy_ah is failued by firmware, there will be AH
+ * resource leak and relatively not critical + unlikely
+ * scenario. Current design is not to handle such case.
+ */
+ if (!is_waiter_alive && !qp_event->status &&
+ qp_event->event == CREQ_QP_EVENT_EVENT_CREATE_AH)
+ __destroy_timedout_ah(rcfw,
+ (struct creq_create_ah_resp *)
+ qp_event);
+ spin_unlock(&hwq->lock);
+ }
+ *num_wait += wait_cmds;
+ return rc;
+}
+
+/* SP - CREQ Completion handlers */
+static void bnxt_qplib_service_creq(struct tasklet_struct *t)
+{
+ struct bnxt_qplib_rcfw *rcfw = from_tasklet(rcfw, t, creq.creq_tasklet);
+ struct bnxt_qplib_creq_ctx *creq = &rcfw->creq;
+ u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
+ struct bnxt_qplib_hwq *hwq = &creq->hwq;
+ struct creq_base *creqe;
+ u32 num_wakeup = 0;
+ u32 hw_polled = 0;
+
+ /* Service the CREQ until budget is over */
+ spin_lock_bh(&hwq->lock);
+ while (budget > 0) {
+ creqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL);
+ if (!CREQ_CMP_VALID(creqe, creq->creq_db.dbinfo.flags))
+ break;
+ /* The valid test of the entry must be done first before
+ * reading any further.
+ */
+ dma_rmb();
+ rcfw->cmdq.last_seen = jiffies;
+
+ type = creqe->type & CREQ_BASE_TYPE_MASK;
+ switch (type) {
+ case CREQ_BASE_TYPE_QP_EVENT:
+ bnxt_qplib_process_qp_event
+ (rcfw, (struct creq_qp_event *)creqe,
+ &num_wakeup);
+ creq->stats.creq_qp_event_processed++;
+ break;
+ case CREQ_BASE_TYPE_FUNC_EVENT:
+ if (!bnxt_qplib_process_func_event
+ (rcfw, (struct creq_func_event *)creqe))
+ creq->stats.creq_func_event_processed++;
+ else
+ dev_warn(&rcfw->pdev->dev,
+ "aeqe:%#x Not handled\n", type);
+ break;
+ default:
+ if (type != ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT)
+ dev_warn(&rcfw->pdev->dev,
+ "creqe with event 0x%x not handled\n",
+ type);
+ break;
+ }
+ budget--;
+ hw_polled++;
+ bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+ 1, &creq->creq_db.dbinfo.flags);
+ }
+
+ if (hw_polled)
+ bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo,
+ rcfw->res->cctx, true);
+ spin_unlock_bh(&hwq->lock);
+ if (num_wakeup)
+ wake_up_nr(&rcfw->cmdq.waitq, num_wakeup);
+}
+
+static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance)
+{
+ struct bnxt_qplib_rcfw *rcfw = dev_instance;
+ struct bnxt_qplib_creq_ctx *creq;
+ struct bnxt_qplib_hwq *hwq;
+ u32 sw_cons;
+
+ creq = &rcfw->creq;
+ hwq = &creq->hwq;
+ /* Prefetch the CREQ element */
+ sw_cons = HWQ_CMP(hwq->cons, hwq);
+ prefetch(bnxt_qplib_get_qe(hwq, sw_cons, NULL));
+
+ tasklet_schedule(&creq->creq_tasklet);
+
+ return IRQ_HANDLED;
+}
+
+/* RCFW */
+int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw)
+{
+ struct creq_deinitialize_fw_resp resp = {};
+ struct cmdq_deinitialize_fw req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ int rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DEINITIALIZE_FW,
+ sizeof(req));
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL,
+ sizeof(req), sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return rc;
+
+ clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags);
+ return 0;
+}
+
+int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx, int is_virtfn)
+{
+ struct creq_initialize_fw_resp resp = {};
+ struct cmdq_initialize_fw req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ u16 flags = 0;
+ u8 pgsz, lvl;
+ int rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_INITIALIZE_FW,
+ sizeof(req));
+ /* Supply (log-base-2-of-host-page-size - base-page-shift)
+ * to bono to adjust the doorbell page sizes.
+ */
+ req.log2_dbr_pg_size = cpu_to_le16(PAGE_SHIFT -
+ RCFW_DBR_BASE_PAGE_SHIFT);
+ /*
+ * Gen P5 devices doesn't require this allocation
+ * as the L2 driver does the same for RoCE also.
+ * Also, VFs need not setup the HW context area, PF
+ * shall setup this area for VF. Skipping the
+ * HW programming
+ */
+ if (is_virtfn || bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx))
+ goto skip_ctx_setup;
+
+ lvl = ctx->qpc_tbl.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->qpc_tbl);
+ req.qpc_pg_size_qpc_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ lvl = ctx->mrw_tbl.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->mrw_tbl);
+ req.mrw_pg_size_mrw_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ lvl = ctx->srqc_tbl.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->srqc_tbl);
+ req.srq_pg_size_srq_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ lvl = ctx->cq_tbl.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->cq_tbl);
+ req.cq_pg_size_cq_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ lvl = ctx->tim_tbl.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->tim_tbl);
+ req.tim_pg_size_tim_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ lvl = ctx->tqm_ctx.pde.level;
+ pgsz = bnxt_qplib_base_pg_size(&ctx->tqm_ctx.pde);
+ req.tqm_pg_size_tqm_lvl = (pgsz << CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT) |
+ lvl;
+ req.qpc_page_dir =
+ cpu_to_le64(ctx->qpc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+ req.mrw_page_dir =
+ cpu_to_le64(ctx->mrw_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+ req.srq_page_dir =
+ cpu_to_le64(ctx->srqc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+ req.cq_page_dir =
+ cpu_to_le64(ctx->cq_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+ req.tim_page_dir =
+ cpu_to_le64(ctx->tim_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+ req.tqm_page_dir =
+ cpu_to_le64(ctx->tqm_ctx.pde.pbl[PBL_LVL_0].pg_map_arr[0]);
+
+ req.number_of_qp = cpu_to_le32(ctx->qpc_tbl.max_elements);
+ req.number_of_mrw = cpu_to_le32(ctx->mrw_tbl.max_elements);
+ req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements);
+ req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements);
+
+skip_ctx_setup:
+ if (BNXT_RE_HW_RETX(rcfw->res->dattr->dev_cap_flags))
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED;
+ if (_is_optimize_modify_qp_supported(rcfw->res->dattr->dev_cap_flags2))
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED;
+ if (rcfw->res->en_dev->flags & BNXT_EN_FLAG_ROCE_VF_RES_MGMT)
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_L2_VF_RESOURCE_MGMT;
+ if (bnxt_qplib_roce_mirror_supported(rcfw->res->cctx)) {
+ flags |= CMDQ_INITIALIZE_FW_FLAGS_MIRROR_ON_ROCE_SUPPORTED;
+ rcfw->roce_mirror = true;
+ }
+ req.flags |= cpu_to_le16(flags);
+ req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return rc;
+ set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->cmdq.flags);
+ return 0;
+}
+
+void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
+{
+ kfree(rcfw->crsqe_tbl);
+ bnxt_qplib_free_hwq(rcfw->res, &rcfw->cmdq.hwq);
+ bnxt_qplib_free_hwq(rcfw->res, &rcfw->creq.hwq);
+ rcfw->pdev = NULL;
+}
+
+int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx)
+{
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_sg_info sginfo = {};
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+ struct bnxt_qplib_creq_ctx *creq;
+
+ rcfw->pdev = res->pdev;
+ cmdq = &rcfw->cmdq;
+ creq = &rcfw->creq;
+ rcfw->res = res;
+
+ sginfo.pgsize = PAGE_SIZE;
+ sginfo.pgshft = PAGE_SHIFT;
+
+ hwq_attr.sginfo = &sginfo;
+ hwq_attr.res = rcfw->res;
+ hwq_attr.depth = BNXT_QPLIB_CREQE_MAX_CNT;
+ hwq_attr.stride = BNXT_QPLIB_CREQE_UNITS;
+ hwq_attr.type = bnxt_qplib_get_hwq_type(res);
+
+ if (bnxt_qplib_alloc_init_hwq(&creq->hwq, &hwq_attr)) {
+ dev_err(&rcfw->pdev->dev,
+ "HW channel CREQ allocation failed\n");
+ goto fail;
+ }
+
+ rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT;
+
+ sginfo.pgsize = bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth);
+ hwq_attr.depth = rcfw->cmdq_depth & 0x7FFFFFFF;
+ hwq_attr.stride = BNXT_QPLIB_CMDQE_UNITS;
+ hwq_attr.type = HWQ_TYPE_CTX;
+ if (bnxt_qplib_alloc_init_hwq(&cmdq->hwq, &hwq_attr)) {
+ dev_err(&rcfw->pdev->dev,
+ "HW channel CMDQ allocation failed\n");
+ goto fail;
+ }
+
+ rcfw->crsqe_tbl = kcalloc(cmdq->hwq.max_elements,
+ sizeof(*rcfw->crsqe_tbl), GFP_KERNEL);
+ if (!rcfw->crsqe_tbl)
+ goto fail;
+
+ spin_lock_init(&rcfw->tbl_lock);
+
+ rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout;
+
+ return 0;
+
+fail:
+ bnxt_qplib_free_rcfw_channel(rcfw);
+ return -ENOMEM;
+}
+
+void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill)
+{
+ struct bnxt_qplib_creq_ctx *creq;
+
+ creq = &rcfw->creq;
+
+ if (!creq->requested)
+ return;
+
+ creq->requested = false;
+ /* Mask h/w interrupts */
+ bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, false);
+ /* Sync with last running IRQ-handler */
+ synchronize_irq(creq->msix_vec);
+ free_irq(creq->msix_vec, rcfw);
+ kfree(creq->irq_name);
+ creq->irq_name = NULL;
+ atomic_set(&rcfw->rcfw_intr_enabled, 0);
+ if (kill)
+ tasklet_kill(&creq->creq_tasklet);
+ tasklet_disable(&creq->creq_tasklet);
+}
+
+void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
+{
+ struct bnxt_qplib_creq_ctx *creq;
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+
+ creq = &rcfw->creq;
+ cmdq = &rcfw->cmdq;
+ /* Make sure the HW channel is stopped! */
+ bnxt_qplib_rcfw_stop_irq(rcfw, true);
+
+ iounmap(cmdq->cmdq_mbox.reg.bar_reg);
+ iounmap(creq->creq_db.reg.bar_reg);
+
+ cmdq->cmdq_mbox.reg.bar_reg = NULL;
+ creq->creq_db.reg.bar_reg = NULL;
+ creq->aeq_handler = NULL;
+ creq->msix_vec = 0;
+}
+
+int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
+ bool need_init)
+{
+ struct bnxt_qplib_creq_ctx *creq;
+ struct bnxt_qplib_res *res;
+ int rc;
+
+ creq = &rcfw->creq;
+ res = rcfw->res;
+
+ if (creq->requested)
+ return -EFAULT;
+
+ creq->msix_vec = msix_vector;
+ if (need_init)
+ tasklet_setup(&creq->creq_tasklet, bnxt_qplib_service_creq);
+ else
+ tasklet_enable(&creq->creq_tasklet);
+
+ creq->irq_name = kasprintf(GFP_KERNEL, "bnxt_re-creq@pci:%s",
+ pci_name(res->pdev));
+ if (!creq->irq_name)
+ return -ENOMEM;
+ rc = request_irq(creq->msix_vec, bnxt_qplib_creq_irq, 0,
+ creq->irq_name, rcfw);
+ if (rc) {
+ kfree(creq->irq_name);
+ creq->irq_name = NULL;
+ tasklet_disable(&creq->creq_tasklet);
+ return rc;
+ }
+ creq->requested = true;
+
+ bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, res->cctx, true);
+ atomic_inc(&rcfw->rcfw_intr_enabled);
+
+ return 0;
+}
+
+static int bnxt_qplib_map_cmdq_mbox(struct bnxt_qplib_rcfw *rcfw)
+{
+ struct bnxt_qplib_cmdq_mbox *mbox;
+ resource_size_t bar_reg;
+ struct pci_dev *pdev;
+
+ pdev = rcfw->pdev;
+ mbox = &rcfw->cmdq.cmdq_mbox;
+
+ mbox->reg.bar_id = RCFW_COMM_PCI_BAR_REGION;
+ mbox->reg.len = RCFW_COMM_SIZE;
+ mbox->reg.bar_base = pci_resource_start(pdev, mbox->reg.bar_id);
+ if (!mbox->reg.bar_base) {
+ dev_err(&pdev->dev,
+ "QPLIB: CMDQ BAR region %d resc start is 0!\n",
+ mbox->reg.bar_id);
+ return -ENOMEM;
+ }
+
+ bar_reg = mbox->reg.bar_base + RCFW_COMM_BASE_OFFSET;
+ mbox->reg.len = RCFW_COMM_SIZE;
+ mbox->reg.bar_reg = ioremap(bar_reg, mbox->reg.len);
+ if (!mbox->reg.bar_reg) {
+ dev_err(&pdev->dev,
+ "QPLIB: CMDQ BAR region %d mapping failed\n",
+ mbox->reg.bar_id);
+ return -ENOMEM;
+ }
+
+ mbox->prod = (void __iomem *)(mbox->reg.bar_reg +
+ RCFW_PF_VF_COMM_PROD_OFFSET);
+ mbox->db = (void __iomem *)(mbox->reg.bar_reg + RCFW_COMM_TRIG_OFFSET);
+ return 0;
+}
+
+static int bnxt_qplib_map_creq_db(struct bnxt_qplib_rcfw *rcfw, u32 reg_offt)
+{
+ struct bnxt_qplib_creq_db *creq_db;
+ resource_size_t bar_reg;
+ struct pci_dev *pdev;
+
+ pdev = rcfw->pdev;
+ creq_db = &rcfw->creq.creq_db;
+
+ creq_db->dbinfo.flags = 0;
+ creq_db->reg.bar_id = RCFW_COMM_CONS_PCI_BAR_REGION;
+ creq_db->reg.bar_base = pci_resource_start(pdev, creq_db->reg.bar_id);
+ if (!creq_db->reg.bar_id)
+ dev_err(&pdev->dev,
+ "QPLIB: CREQ BAR region %d resc start is 0!",
+ creq_db->reg.bar_id);
+
+ bar_reg = creq_db->reg.bar_base + reg_offt;
+ /* Unconditionally map 8 bytes to support 57500 series */
+ creq_db->reg.len = 8;
+ creq_db->reg.bar_reg = ioremap(bar_reg, creq_db->reg.len);
+ if (!creq_db->reg.bar_reg) {
+ dev_err(&pdev->dev,
+ "QPLIB: CREQ BAR region %d mapping failed",
+ creq_db->reg.bar_id);
+ return -ENOMEM;
+ }
+ creq_db->dbinfo.db = creq_db->reg.bar_reg;
+ creq_db->dbinfo.hwq = &rcfw->creq.hwq;
+ creq_db->dbinfo.xid = rcfw->creq.ring_id;
+ return 0;
+}
+
+static void bnxt_qplib_start_rcfw(struct bnxt_qplib_rcfw *rcfw)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+ struct bnxt_qplib_creq_ctx *creq;
+ struct bnxt_qplib_cmdq_mbox *mbox;
+ struct cmdq_init init = {0};
+
+ cmdq = &rcfw->cmdq;
+ creq = &rcfw->creq;
+ mbox = &cmdq->cmdq_mbox;
+
+ init.cmdq_pbl = cpu_to_le64(cmdq->hwq.pbl[PBL_LVL_0].pg_map_arr[0]);
+ init.cmdq_size_cmdq_lvl =
+ cpu_to_le16(((rcfw->cmdq_depth <<
+ CMDQ_INIT_CMDQ_SIZE_SFT) &
+ CMDQ_INIT_CMDQ_SIZE_MASK) |
+ ((cmdq->hwq.level <<
+ CMDQ_INIT_CMDQ_LVL_SFT) &
+ CMDQ_INIT_CMDQ_LVL_MASK));
+ init.creq_ring_id = cpu_to_le16(creq->ring_id);
+ /* Write to the Bono mailbox register */
+ __iowrite32_copy(mbox->reg.bar_reg, &init, sizeof(init) / 4);
+}
+
+int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw,
+ int msix_vector,
+ int cp_bar_reg_off,
+ aeq_handler_t aeq_handler)
+{
+ struct bnxt_qplib_cmdq_ctx *cmdq;
+ struct bnxt_qplib_creq_ctx *creq;
+ int rc;
+
+ cmdq = &rcfw->cmdq;
+ creq = &rcfw->creq;
+
+ /* Clear to defaults */
+
+ cmdq->seq_num = 0;
+ set_bit(FIRMWARE_FIRST_FLAG, &cmdq->flags);
+ init_waitqueue_head(&cmdq->waitq);
+
+ creq->stats.creq_qp_event_processed = 0;
+ creq->stats.creq_func_event_processed = 0;
+ creq->aeq_handler = aeq_handler;
+
+ rc = bnxt_qplib_map_cmdq_mbox(rcfw);
+ if (rc)
+ return rc;
+
+ rc = bnxt_qplib_map_creq_db(rcfw, cp_bar_reg_off);
+ if (rc)
+ return rc;
+
+ rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true);
+ if (rc) {
+ dev_err(&rcfw->pdev->dev,
+ "Failed to request IRQ for CREQ rc = 0x%x\n", rc);
+ bnxt_qplib_disable_rcfw_channel(rcfw);
+ return rc;
+ }
+
+ sema_init(&rcfw->rcfw_inflight, RCFW_CMD_NON_BLOCKING_SHADOW_QD);
+ bnxt_qplib_start_rcfw(rcfw);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
new file mode 100644
index 000000000000..988c89b4232e
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -0,0 +1,294 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: RDMA Controller HW interface (header)
+ */
+
+#ifndef __BNXT_QPLIB_RCFW_H__
+#define __BNXT_QPLIB_RCFW_H__
+
+#include "qplib_tlv.h"
+
+#define RCFW_CMDQ_TRIG_VAL 1
+#define RCFW_COMM_PCI_BAR_REGION 0
+#define RCFW_COMM_CONS_PCI_BAR_REGION 2
+#define RCFW_COMM_BASE_OFFSET 0x600
+#define RCFW_PF_VF_COMM_PROD_OFFSET 0xc
+#define RCFW_COMM_TRIG_OFFSET 0x100
+#define RCFW_COMM_SIZE 0x104
+
+#define RCFW_DBR_PCI_BAR_REGION 2
+#define RCFW_DBR_BASE_PAGE_SHIFT 12
+#define RCFW_FW_STALL_MAX_TIMEOUT 40
+
+/* Cmdq contains a fix number of a 16-Byte slots */
+struct bnxt_qplib_cmdqe {
+ u8 data[16];
+};
+
+#define BNXT_QPLIB_CMDQE_UNITS sizeof(struct bnxt_qplib_cmdqe)
+
+static inline void bnxt_qplib_rcfw_cmd_prep(struct cmdq_base *req,
+ u8 opcode, u8 cmd_size)
+{
+ req->opcode = opcode;
+ req->cmd_size = cmd_size;
+}
+
+/* Shadow queue depth for non blocking command */
+#define RCFW_CMD_NON_BLOCKING_SHADOW_QD 64
+#define RCFW_CMD_WAIT_TIME_MS 20000 /* 20 Seconds timeout */
+
+/* CMDQ elements */
+#define BNXT_QPLIB_CMDQE_MAX_CNT 8192
+#define BNXT_QPLIB_CMDQE_BYTES(depth) ((depth) * BNXT_QPLIB_CMDQE_UNITS)
+
+static inline u32 bnxt_qplib_cmdqe_npages(u32 depth)
+{
+ u32 npages;
+
+ npages = BNXT_QPLIB_CMDQE_BYTES(depth) / PAGE_SIZE;
+ if (BNXT_QPLIB_CMDQE_BYTES(depth) % PAGE_SIZE)
+ npages++;
+ return npages;
+}
+
+static inline u32 bnxt_qplib_cmdqe_page_size(u32 depth)
+{
+ return (bnxt_qplib_cmdqe_npages(depth) * PAGE_SIZE);
+}
+
+/* Get the number of command units required for the req. The
+ * function returns correct value only if called before
+ * setting using bnxt_qplib_set_cmd_slots
+ */
+static inline u32 bnxt_qplib_get_cmd_slots(struct cmdq_base *req)
+{
+ u32 cmd_units = 0;
+
+ if (HAS_TLV_HEADER(req)) {
+ struct roce_tlv *tlv_req = (struct roce_tlv *)req;
+
+ cmd_units = tlv_req->total_size;
+ } else {
+ cmd_units = (req->cmd_size + BNXT_QPLIB_CMDQE_UNITS - 1) /
+ BNXT_QPLIB_CMDQE_UNITS;
+ }
+
+ return cmd_units;
+}
+
+static inline u32 bnxt_qplib_set_cmd_slots(struct cmdq_base *req)
+{
+ u32 cmd_byte = 0;
+
+ if (HAS_TLV_HEADER(req)) {
+ struct roce_tlv *tlv_req = (struct roce_tlv *)req;
+
+ cmd_byte = tlv_req->total_size * BNXT_QPLIB_CMDQE_UNITS;
+ } else {
+ cmd_byte = req->cmd_size;
+ req->cmd_size = (req->cmd_size + BNXT_QPLIB_CMDQE_UNITS - 1) /
+ BNXT_QPLIB_CMDQE_UNITS;
+ }
+
+ return cmd_byte;
+}
+
+#define RCFW_MAX_COOKIE_VALUE (BNXT_QPLIB_CMDQE_MAX_CNT - 1)
+#define RCFW_CMD_IS_BLOCKING 0x8000
+
+#define HWRM_VERSION_DEV_ATTR_MAX_DPI 0x1000A0000000DULL
+/* HWRM version 1.10.3.18 */
+#define HWRM_VERSION_READ_CTX 0x1000A00030012
+
+/* Crsq buf is 1024-Byte */
+struct bnxt_qplib_crsbe {
+ u8 data[1024];
+};
+
+/* CREQ */
+/* Allocate 1 per QP for async error notification for now */
+#define BNXT_QPLIB_CREQE_MAX_CNT (64 * 1024)
+#define BNXT_QPLIB_CREQE_UNITS 16 /* 16-Bytes per prod unit */
+#define CREQ_CMP_VALID(hdr, pass) \
+ (!!((hdr)->v & CREQ_BASE_V) == \
+ !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK))
+#define CREQ_ENTRY_POLL_BUDGET 0x100
+
+/* HWQ */
+typedef int (*aeq_handler_t)(struct bnxt_qplib_rcfw *, void *, void *);
+
+struct bnxt_qplib_crsqe {
+ struct creq_qp_event *resp;
+ u32 req_size;
+ /* Free slots at the time of submission */
+ u32 free_slots;
+ u8 opcode;
+ bool is_waiter_alive;
+ bool is_internal_cmd;
+ bool is_in_used;
+};
+
+struct bnxt_qplib_rcfw_sbuf {
+ void *sb;
+ dma_addr_t dma_addr;
+ u32 size;
+};
+
+struct bnxt_qplib_qp_node {
+ u32 qp_id; /* QP id */
+ void *qp_handle; /* ptr to qplib_qp */
+};
+
+#define BNXT_QPLIB_OOS_COUNT_MASK 0xFFFFFFFF
+
+#define FIRMWARE_INITIALIZED_FLAG (0)
+#define FIRMWARE_FIRST_FLAG (31)
+#define FIRMWARE_STALL_DETECTED (3)
+#define ERR_DEVICE_DETACHED (4)
+
+struct bnxt_qplib_cmdq_mbox {
+ struct bnxt_qplib_reg_desc reg;
+ void __iomem *prod;
+ void __iomem *db;
+};
+
+struct bnxt_qplib_cmdq_ctx {
+ struct bnxt_qplib_hwq hwq;
+ struct bnxt_qplib_cmdq_mbox cmdq_mbox;
+ wait_queue_head_t waitq;
+ unsigned long flags;
+ unsigned long last_seen;
+ u32 seq_num;
+};
+
+struct bnxt_qplib_creq_db {
+ struct bnxt_qplib_reg_desc reg;
+ struct bnxt_qplib_db_info dbinfo;
+};
+
+struct bnxt_qplib_creq_stat {
+ u64 creq_qp_event_processed;
+ u64 creq_func_event_processed;
+};
+
+struct bnxt_qplib_creq_ctx {
+ struct bnxt_qplib_hwq hwq;
+ struct bnxt_qplib_creq_db creq_db;
+ struct bnxt_qplib_creq_stat stats;
+ struct tasklet_struct creq_tasklet;
+ aeq_handler_t aeq_handler;
+ u16 ring_id;
+ int msix_vec;
+ bool requested; /*irq handler installed */
+ char *irq_name;
+};
+
+/* RCFW Communication Channels */
+struct bnxt_qplib_rcfw {
+ struct pci_dev *pdev;
+ struct bnxt_qplib_res *res;
+ struct bnxt_qplib_cmdq_ctx cmdq;
+ struct bnxt_qplib_creq_ctx creq;
+ struct bnxt_qplib_crsqe *crsqe_tbl;
+ int qp_tbl_size;
+ struct bnxt_qplib_qp_node *qp_tbl;
+ /* To synchronize the qp-handle hash table */
+ spinlock_t tbl_lock;
+ u64 oos_prev;
+ u32 init_oos_stats;
+ u32 cmdq_depth;
+ atomic_t rcfw_intr_enabled;
+ struct semaphore rcfw_inflight;
+ atomic_t timeout_send;
+ /* cached from chip cctx for quick reference in slow path */
+ u16 max_timeout;
+ bool roce_mirror;
+};
+
+struct bnxt_qplib_cmdqmsg {
+ struct cmdq_base *req;
+ struct creq_base *resp;
+ void *sb;
+ u32 req_sz;
+ u32 res_sz;
+ u8 block;
+};
+
+static inline void bnxt_qplib_fill_cmdqmsg(struct bnxt_qplib_cmdqmsg *msg,
+ void *req, void *resp, void *sb,
+ u32 req_sz, u32 res_sz, u8 block)
+{
+ msg->req = req;
+ msg->resp = resp;
+ msg->sb = sb;
+ msg->req_sz = req_sz;
+ msg->res_sz = res_sz;
+ msg->block = block;
+}
+
+void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx);
+void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill);
+void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
+ bool need_init);
+int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw,
+ int msix_vector,
+ int cp_bar_reg_off,
+ aeq_handler_t aeq_handler);
+
+struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
+ struct bnxt_qplib_rcfw *rcfw,
+ u32 size);
+void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_rcfw_sbuf *sbuf);
+int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_cmdqmsg *msg);
+
+int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx, int is_virtfn);
+void bnxt_qplib_mark_qp_error(void *qp_handle);
+
+static inline u32 map_qp_id_to_tbl_indx(u32 qid, struct bnxt_qplib_rcfw *rcfw)
+{
+ /* Last index of the qp_tbl is for QP1 ie. qp_tbl_size - 1*/
+ return (qid == 1) ? rcfw->qp_tbl_size - 1 : (qid % (rcfw->qp_tbl_size - 2));
+}
+#endif /* __BNXT_QPLIB_RCFW_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
new file mode 100644
index 000000000000..875d7b52c06a
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -0,0 +1,955 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: QPLib resource manager
+ */
+
+#define dev_fmt(fmt) "QPLIB: " fmt
+
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/inetdevice.h>
+#include <linux/dma-mapping.h>
+#include <linux/if_vlan.h>
+#include <linux/vmalloc.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_sp.h"
+#include "qplib_rcfw.h"
+
+/* PBL */
+static void __free_pbl(struct bnxt_qplib_res *res, struct bnxt_qplib_pbl *pbl,
+ bool is_umem)
+{
+ struct pci_dev *pdev = res->pdev;
+ int i;
+
+ if (!is_umem) {
+ for (i = 0; i < pbl->pg_count; i++) {
+ if (pbl->pg_arr[i])
+ dma_free_coherent(&pdev->dev, pbl->pg_size,
+ (void *)((unsigned long)
+ pbl->pg_arr[i] &
+ PAGE_MASK),
+ pbl->pg_map_arr[i]);
+ else
+ dev_warn(&pdev->dev,
+ "PBL free pg_arr[%d] empty?!\n", i);
+ pbl->pg_arr[i] = NULL;
+ }
+ }
+ vfree(pbl->pg_arr);
+ pbl->pg_arr = NULL;
+ vfree(pbl->pg_map_arr);
+ pbl->pg_map_arr = NULL;
+ pbl->pg_count = 0;
+ pbl->pg_size = 0;
+}
+
+static void bnxt_qplib_fill_user_dma_pages(struct bnxt_qplib_pbl *pbl,
+ struct bnxt_qplib_sg_info *sginfo)
+{
+ struct ib_block_iter biter;
+ int i = 0;
+
+ rdma_umem_for_each_dma_block(sginfo->umem, &biter, sginfo->pgsize) {
+ pbl->pg_map_arr[i] = rdma_block_iter_dma_address(&biter);
+ pbl->pg_arr[i] = NULL;
+ pbl->pg_count++;
+ i++;
+ }
+}
+
+static int __alloc_pbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pbl *pbl,
+ struct bnxt_qplib_sg_info *sginfo)
+{
+ struct pci_dev *pdev = res->pdev;
+ bool is_umem = false;
+ u32 pages;
+ int i;
+
+ if (sginfo->nopte)
+ return 0;
+ if (sginfo->umem)
+ pages = ib_umem_num_dma_blocks(sginfo->umem, sginfo->pgsize);
+ else
+ pages = sginfo->npages;
+ /* page ptr arrays */
+ pbl->pg_arr = vmalloc_array(pages, sizeof(void *));
+ if (!pbl->pg_arr)
+ return -ENOMEM;
+ memset(pbl->pg_arr, 0, pages * sizeof(void *));
+
+ pbl->pg_map_arr = vmalloc_array(pages, sizeof(dma_addr_t));
+ if (!pbl->pg_map_arr) {
+ vfree(pbl->pg_arr);
+ pbl->pg_arr = NULL;
+ return -ENOMEM;
+ }
+ memset(pbl->pg_map_arr, 0, pages * sizeof(dma_addr_t));
+ pbl->pg_count = 0;
+ pbl->pg_size = sginfo->pgsize;
+
+ if (!sginfo->umem) {
+ for (i = 0; i < pages; i++) {
+ pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev,
+ pbl->pg_size,
+ &pbl->pg_map_arr[i],
+ GFP_KERNEL);
+ if (!pbl->pg_arr[i])
+ goto fail;
+ pbl->pg_count++;
+ }
+ } else {
+ is_umem = true;
+ bnxt_qplib_fill_user_dma_pages(pbl, sginfo);
+ }
+
+ return 0;
+fail:
+ __free_pbl(res, pbl, is_umem);
+ return -ENOMEM;
+}
+
+/* HWQ */
+void bnxt_qplib_free_hwq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_hwq *hwq)
+{
+ int i;
+
+ if (!hwq->max_elements)
+ return;
+ if (hwq->level >= PBL_LVL_MAX)
+ return;
+
+ for (i = 0; i < hwq->level + 1; i++) {
+ if (i == hwq->level)
+ __free_pbl(res, &hwq->pbl[i], hwq->is_user);
+ else
+ __free_pbl(res, &hwq->pbl[i], false);
+ }
+
+ hwq->level = PBL_LVL_MAX;
+ hwq->max_elements = 0;
+ hwq->element_size = 0;
+ hwq->prod = 0;
+ hwq->cons = 0;
+ hwq->cp_bit = 0;
+}
+
+/* All HWQs are power of 2 in size */
+
+int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
+ struct bnxt_qplib_hwq_attr *hwq_attr)
+{
+ u32 npages, aux_slots, pg_size, aux_pages = 0, aux_size = 0;
+ struct bnxt_qplib_sg_info sginfo = {};
+ u32 depth, stride, npbl, npde;
+ dma_addr_t *src_phys_ptr, **dst_virt_ptr;
+ struct bnxt_qplib_res *res;
+ struct pci_dev *pdev;
+ int i, rc, lvl;
+
+ res = hwq_attr->res;
+ pdev = res->pdev;
+ pg_size = hwq_attr->sginfo->pgsize;
+ hwq->level = PBL_LVL_MAX;
+
+ depth = roundup_pow_of_two(hwq_attr->depth);
+ stride = roundup_pow_of_two(hwq_attr->stride);
+ if (hwq_attr->aux_depth) {
+ aux_slots = hwq_attr->aux_depth;
+ aux_size = roundup_pow_of_two(hwq_attr->aux_stride);
+ aux_pages = (aux_slots * aux_size) / pg_size;
+ if ((aux_slots * aux_size) % pg_size)
+ aux_pages++;
+ }
+
+ if (!hwq_attr->sginfo->umem) {
+ hwq->is_user = false;
+ npages = (depth * stride) / pg_size + aux_pages;
+ if ((depth * stride) % pg_size)
+ npages++;
+ if (!npages)
+ return -EINVAL;
+ hwq_attr->sginfo->npages = npages;
+ } else {
+ npages = ib_umem_num_dma_blocks(hwq_attr->sginfo->umem,
+ hwq_attr->sginfo->pgsize);
+ hwq->is_user = true;
+ }
+
+ if (npages == MAX_PBL_LVL_0_PGS && !hwq_attr->sginfo->nopte) {
+ /* This request is Level 0, map PTE */
+ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], hwq_attr->sginfo);
+ if (rc)
+ goto fail;
+ hwq->level = PBL_LVL_0;
+ goto done;
+ }
+
+ if (npages >= MAX_PBL_LVL_0_PGS) {
+ if (npages > MAX_PBL_LVL_1_PGS) {
+ u32 flag = (hwq_attr->type == HWQ_TYPE_L2_CMPL) ?
+ 0 : PTU_PTE_VALID;
+ /* 2 levels of indirection */
+ npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT;
+ if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT))
+ npbl++;
+ npde = npbl >> MAX_PDL_LVL_SHIFT;
+ if (npbl % BIT(MAX_PDL_LVL_SHIFT))
+ npde++;
+ /* Alloc PDE pages */
+ sginfo.pgsize = npde * pg_size;
+ sginfo.npages = 1;
+ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], &sginfo);
+ if (rc)
+ goto fail;
+
+ /* Alloc PBL pages */
+ sginfo.npages = npbl;
+ sginfo.pgsize = PAGE_SIZE;
+ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_1], &sginfo);
+ if (rc)
+ goto fail;
+ /* Fill PDL with PBL page pointers */
+ dst_virt_ptr =
+ (dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr;
+ src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr;
+ for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++)
+ dst_virt_ptr[0][i] = src_phys_ptr[i] | flag;
+
+ /* Alloc or init PTEs */
+ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_2],
+ hwq_attr->sginfo);
+ if (rc)
+ goto fail;
+ hwq->level = PBL_LVL_2;
+ if (hwq_attr->sginfo->nopte)
+ goto done;
+ /* Fill PBLs with PTE pointers */
+ dst_virt_ptr =
+ (dma_addr_t **)hwq->pbl[PBL_LVL_1].pg_arr;
+ src_phys_ptr = hwq->pbl[PBL_LVL_2].pg_map_arr;
+ for (i = 0; i < hwq->pbl[PBL_LVL_2].pg_count; i++) {
+ dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+ src_phys_ptr[i] | PTU_PTE_VALID;
+ }
+ if (hwq_attr->type == HWQ_TYPE_QUEUE) {
+ /* Find the last pg of the size */
+ i = hwq->pbl[PBL_LVL_2].pg_count;
+ dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
+ PTU_PTE_LAST;
+ if (i > 1)
+ dst_virt_ptr[PTR_PG(i - 2)]
+ [PTR_IDX(i - 2)] |=
+ PTU_PTE_NEXT_TO_LAST;
+ }
+ } else { /* pages < 512 npbl = 1, npde = 0 */
+ u32 flag = (hwq_attr->type == HWQ_TYPE_L2_CMPL) ?
+ 0 : PTU_PTE_VALID;
+
+ /* 1 level of indirection */
+ npbl = npages >> MAX_PBL_LVL_1_PGS_SHIFT;
+ if (npages % BIT(MAX_PBL_LVL_1_PGS_SHIFT))
+ npbl++;
+ sginfo.npages = npbl;
+ sginfo.pgsize = PAGE_SIZE;
+ /* Alloc PBL page */
+ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_0], &sginfo);
+ if (rc)
+ goto fail;
+ /* Alloc or init PTEs */
+ rc = __alloc_pbl(res, &hwq->pbl[PBL_LVL_1],
+ hwq_attr->sginfo);
+ if (rc)
+ goto fail;
+ hwq->level = PBL_LVL_1;
+ if (hwq_attr->sginfo->nopte)
+ goto done;
+ /* Fill PBL with PTE pointers */
+ dst_virt_ptr =
+ (dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr;
+ src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr;
+ for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++)
+ dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+ src_phys_ptr[i] | flag;
+ if (hwq_attr->type == HWQ_TYPE_QUEUE) {
+ /* Find the last pg of the size */
+ i = hwq->pbl[PBL_LVL_1].pg_count;
+ dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
+ PTU_PTE_LAST;
+ if (i > 1)
+ dst_virt_ptr[PTR_PG(i - 2)]
+ [PTR_IDX(i - 2)] |=
+ PTU_PTE_NEXT_TO_LAST;
+ }
+ }
+ }
+done:
+ hwq->prod = 0;
+ hwq->cons = 0;
+ hwq->pdev = pdev;
+ hwq->depth = hwq_attr->depth;
+ hwq->max_elements = hwq->depth;
+ hwq->element_size = stride;
+ hwq->qe_ppg = pg_size / stride;
+ /* For direct access to the elements */
+ lvl = hwq->level;
+ if (hwq_attr->sginfo->nopte && hwq->level)
+ lvl = hwq->level - 1;
+ hwq->pbl_ptr = hwq->pbl[lvl].pg_arr;
+ hwq->pbl_dma_ptr = hwq->pbl[lvl].pg_map_arr;
+ spin_lock_init(&hwq->lock);
+
+ return 0;
+fail:
+ bnxt_qplib_free_hwq(res, hwq);
+ return -ENOMEM;
+}
+
+/* Context Tables */
+void bnxt_qplib_free_hwctx(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx)
+{
+ int i;
+
+ bnxt_qplib_free_hwq(res, &ctx->qpc_tbl);
+ bnxt_qplib_free_hwq(res, &ctx->mrw_tbl);
+ bnxt_qplib_free_hwq(res, &ctx->srqc_tbl);
+ bnxt_qplib_free_hwq(res, &ctx->cq_tbl);
+ bnxt_qplib_free_hwq(res, &ctx->tim_tbl);
+ for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
+ bnxt_qplib_free_hwq(res, &ctx->tqm_ctx.qtbl[i]);
+ /* restore original pde level before destroy */
+ ctx->tqm_ctx.pde.level = ctx->tqm_ctx.pde_level;
+ bnxt_qplib_free_hwq(res, &ctx->tqm_ctx.pde);
+}
+
+static int bnxt_qplib_alloc_tqm_rings(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx)
+{
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_sg_info sginfo = {};
+ struct bnxt_qplib_tqm_ctx *tqmctx;
+ int rc;
+ int i;
+
+ tqmctx = &ctx->tqm_ctx;
+
+ sginfo.pgsize = PAGE_SIZE;
+ sginfo.pgshft = PAGE_SHIFT;
+ hwq_attr.sginfo = &sginfo;
+ hwq_attr.res = res;
+ hwq_attr.type = HWQ_TYPE_CTX;
+ hwq_attr.depth = 512;
+ hwq_attr.stride = sizeof(u64);
+ /* Alloc pdl buffer */
+ rc = bnxt_qplib_alloc_init_hwq(&tqmctx->pde, &hwq_attr);
+ if (rc)
+ goto out;
+ /* Save original pdl level */
+ tqmctx->pde_level = tqmctx->pde.level;
+
+ hwq_attr.stride = 1;
+ for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) {
+ if (!tqmctx->qcount[i])
+ continue;
+ hwq_attr.depth = ctx->qpc_count * tqmctx->qcount[i];
+ rc = bnxt_qplib_alloc_init_hwq(&tqmctx->qtbl[i], &hwq_attr);
+ if (rc)
+ goto out;
+ }
+out:
+ return rc;
+}
+
+static void bnxt_qplib_map_tqm_pgtbl(struct bnxt_qplib_tqm_ctx *ctx)
+{
+ struct bnxt_qplib_hwq *tbl;
+ dma_addr_t *dma_ptr;
+ __le64 **pbl_ptr, *ptr;
+ int i, j, k;
+ int fnz_idx = -1;
+ int pg_count;
+
+ pbl_ptr = (__le64 **)ctx->pde.pbl_ptr;
+
+ for (i = 0, j = 0; i < MAX_TQM_ALLOC_REQ;
+ i++, j += MAX_TQM_ALLOC_BLK_SIZE) {
+ tbl = &ctx->qtbl[i];
+ if (!tbl->max_elements)
+ continue;
+ if (fnz_idx == -1)
+ fnz_idx = i; /* first non-zero index */
+ switch (tbl->level) {
+ case PBL_LVL_2:
+ pg_count = tbl->pbl[PBL_LVL_1].pg_count;
+ for (k = 0; k < pg_count; k++) {
+ ptr = &pbl_ptr[PTR_PG(j + k)][PTR_IDX(j + k)];
+ dma_ptr = &tbl->pbl[PBL_LVL_1].pg_map_arr[k];
+ *ptr = cpu_to_le64(*dma_ptr | PTU_PTE_VALID);
+ }
+ break;
+ case PBL_LVL_1:
+ case PBL_LVL_0:
+ default:
+ ptr = &pbl_ptr[PTR_PG(j)][PTR_IDX(j)];
+ *ptr = cpu_to_le64(tbl->pbl[PBL_LVL_0].pg_map_arr[0] |
+ PTU_PTE_VALID);
+ break;
+ }
+ }
+ if (fnz_idx == -1)
+ fnz_idx = 0;
+ /* update pde level as per page table programming */
+ ctx->pde.level = (ctx->qtbl[fnz_idx].level == PBL_LVL_2) ? PBL_LVL_2 :
+ ctx->qtbl[fnz_idx].level + 1;
+}
+
+static int bnxt_qplib_setup_tqm_rings(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx)
+{
+ int rc;
+
+ rc = bnxt_qplib_alloc_tqm_rings(res, ctx);
+ if (rc)
+ goto fail;
+
+ bnxt_qplib_map_tqm_pgtbl(&ctx->tqm_ctx);
+fail:
+ return rc;
+}
+
+/*
+ * Routine: bnxt_qplib_alloc_hwctx
+ * Description:
+ * Context tables are memories which are used by the chip fw.
+ * The 6 tables defined are:
+ * QPC ctx - holds QP states
+ * MRW ctx - holds memory region and window
+ * SRQ ctx - holds shared RQ states
+ * CQ ctx - holds completion queue states
+ * TQM ctx - holds Tx Queue Manager context
+ * TIM ctx - holds timer context
+ * Depending on the size of the tbl requested, either a 1 Page Buffer List
+ * or a 1-to-2-stage indirection Page Directory List + 1 PBL is used
+ * instead.
+ * Table might be employed as follows:
+ * For 0 < ctx size <= 1 PAGE, 0 level of ind is used
+ * For 1 PAGE < ctx size <= 512 entries size, 1 level of ind is used
+ * For 512 < ctx size <= MAX, 2 levels of ind is used
+ * Returns:
+ * 0 if success, else -ERRORS
+ */
+int bnxt_qplib_alloc_hwctx(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx)
+{
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_sg_info sginfo = {};
+ int rc;
+
+ /* QPC Tables */
+ sginfo.pgsize = PAGE_SIZE;
+ sginfo.pgshft = PAGE_SHIFT;
+ hwq_attr.sginfo = &sginfo;
+
+ hwq_attr.res = res;
+ hwq_attr.depth = ctx->qpc_count;
+ hwq_attr.stride = BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE;
+ hwq_attr.type = HWQ_TYPE_CTX;
+ rc = bnxt_qplib_alloc_init_hwq(&ctx->qpc_tbl, &hwq_attr);
+ if (rc)
+ goto fail;
+
+ /* MRW Tables */
+ hwq_attr.depth = ctx->mrw_count;
+ hwq_attr.stride = BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE;
+ rc = bnxt_qplib_alloc_init_hwq(&ctx->mrw_tbl, &hwq_attr);
+ if (rc)
+ goto fail;
+
+ /* SRQ Tables */
+ hwq_attr.depth = ctx->srqc_count;
+ hwq_attr.stride = BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE;
+ rc = bnxt_qplib_alloc_init_hwq(&ctx->srqc_tbl, &hwq_attr);
+ if (rc)
+ goto fail;
+
+ /* CQ Tables */
+ hwq_attr.depth = ctx->cq_count;
+ hwq_attr.stride = BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE;
+ rc = bnxt_qplib_alloc_init_hwq(&ctx->cq_tbl, &hwq_attr);
+ if (rc)
+ goto fail;
+
+ /* TQM Buffer */
+ rc = bnxt_qplib_setup_tqm_rings(res, ctx);
+ if (rc)
+ goto fail;
+ /* TIM Buffer */
+ ctx->tim_tbl.max_elements = ctx->qpc_count * 16;
+ hwq_attr.depth = ctx->qpc_count * 16;
+ hwq_attr.stride = 1;
+ rc = bnxt_qplib_alloc_init_hwq(&ctx->tim_tbl, &hwq_attr);
+ if (rc)
+ goto fail;
+
+ return 0;
+
+fail:
+ bnxt_qplib_free_hwctx(res, ctx);
+ return rc;
+}
+
+static void bnxt_qplib_free_sgid_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_sgid_tbl *sgid_tbl)
+{
+ kfree(sgid_tbl->tbl);
+ kfree(sgid_tbl->hw_id);
+ kfree(sgid_tbl->ctx);
+ kfree(sgid_tbl->vlan);
+ sgid_tbl->tbl = NULL;
+ sgid_tbl->hw_id = NULL;
+ sgid_tbl->ctx = NULL;
+ sgid_tbl->vlan = NULL;
+ sgid_tbl->max = 0;
+ sgid_tbl->active = 0;
+}
+
+static int bnxt_qplib_alloc_sgid_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ u16 max)
+{
+ sgid_tbl->tbl = kcalloc(max, sizeof(*sgid_tbl->tbl), GFP_KERNEL);
+ if (!sgid_tbl->tbl)
+ return -ENOMEM;
+
+ sgid_tbl->hw_id = kcalloc(max, sizeof(u16), GFP_KERNEL);
+ if (!sgid_tbl->hw_id)
+ goto out_free1;
+
+ sgid_tbl->ctx = kcalloc(max, sizeof(void *), GFP_KERNEL);
+ if (!sgid_tbl->ctx)
+ goto out_free2;
+
+ sgid_tbl->vlan = kcalloc(max, sizeof(u8), GFP_KERNEL);
+ if (!sgid_tbl->vlan)
+ goto out_free3;
+
+ sgid_tbl->max = max;
+ return 0;
+out_free3:
+ kfree(sgid_tbl->ctx);
+ sgid_tbl->ctx = NULL;
+out_free2:
+ kfree(sgid_tbl->hw_id);
+ sgid_tbl->hw_id = NULL;
+out_free1:
+ kfree(sgid_tbl->tbl);
+ sgid_tbl->tbl = NULL;
+ return -ENOMEM;
+};
+
+static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_sgid_tbl *sgid_tbl)
+{
+ int i;
+
+ for (i = 0; i < sgid_tbl->max; i++) {
+ if (memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
+ sizeof(bnxt_qplib_gid_zero)))
+ bnxt_qplib_del_sgid(sgid_tbl, &sgid_tbl->tbl[i].gid,
+ sgid_tbl->tbl[i].vlan_id, true);
+ }
+ memset(sgid_tbl->tbl, 0, sizeof(*sgid_tbl->tbl) * sgid_tbl->max);
+ memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
+ memset(sgid_tbl->vlan, 0, sizeof(u8) * sgid_tbl->max);
+ sgid_tbl->active = 0;
+}
+
+static void bnxt_qplib_init_sgid_tbl(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct net_device *netdev)
+{
+ u32 i;
+
+ for (i = 0; i < sgid_tbl->max; i++)
+ sgid_tbl->tbl[i].vlan_id = 0xffff;
+
+ memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
+}
+
+/* PDs */
+int bnxt_qplib_alloc_pd(struct bnxt_qplib_res *res, struct bnxt_qplib_pd *pd)
+{
+ struct bnxt_qplib_pd_tbl *pdt = &res->pd_tbl;
+ u32 bit_num;
+ int rc = 0;
+
+ mutex_lock(&res->pd_tbl_lock);
+ bit_num = find_first_bit(pdt->tbl, pdt->max);
+ if (bit_num == pdt->max) {
+ rc = -ENOMEM;
+ goto exit;
+ }
+
+ /* Found unused PD */
+ clear_bit(bit_num, pdt->tbl);
+ pd->id = bit_num;
+exit:
+ mutex_unlock(&res->pd_tbl_lock);
+ return rc;
+}
+
+int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pd_tbl *pdt,
+ struct bnxt_qplib_pd *pd)
+{
+ int rc = 0;
+
+ mutex_lock(&res->pd_tbl_lock);
+ if (test_and_set_bit(pd->id, pdt->tbl)) {
+ dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d\n",
+ pd->id);
+ rc = -EINVAL;
+ goto exit;
+ }
+ pd->id = 0;
+exit:
+ mutex_unlock(&res->pd_tbl_lock);
+ return rc;
+}
+
+static void bnxt_qplib_free_pd_tbl(struct bnxt_qplib_pd_tbl *pdt)
+{
+ kfree(pdt->tbl);
+ pdt->tbl = NULL;
+ pdt->max = 0;
+}
+
+static int bnxt_qplib_alloc_pd_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pd_tbl *pdt,
+ u32 max)
+{
+ u32 bytes;
+
+ bytes = max >> 3;
+ if (!bytes)
+ bytes = 1;
+ pdt->tbl = kmalloc(bytes, GFP_KERNEL);
+ if (!pdt->tbl)
+ return -ENOMEM;
+
+ pdt->max = max;
+ memset((u8 *)pdt->tbl, 0xFF, bytes);
+ mutex_init(&res->pd_tbl_lock);
+
+ return 0;
+}
+
+/* DPIs */
+int bnxt_qplib_alloc_dpi(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi *dpi,
+ void *app, u8 type)
+{
+ struct bnxt_qplib_dpi_tbl *dpit = &res->dpi_tbl;
+ struct bnxt_qplib_reg_desc *reg;
+ u32 bit_num;
+ u64 umaddr;
+
+ reg = &dpit->wcreg;
+ mutex_lock(&res->dpi_tbl_lock);
+
+ bit_num = find_first_bit(dpit->tbl, dpit->max);
+ if (bit_num == dpit->max) {
+ mutex_unlock(&res->dpi_tbl_lock);
+ return -ENOMEM;
+ }
+
+ /* Found unused DPI */
+ clear_bit(bit_num, dpit->tbl);
+ dpit->app_tbl[bit_num] = app;
+
+ dpi->bit = bit_num;
+ dpi->dpi = bit_num + (reg->offset - dpit->ucreg.offset) / PAGE_SIZE;
+
+ umaddr = reg->bar_base + reg->offset + bit_num * PAGE_SIZE;
+ dpi->umdbr = umaddr;
+
+ switch (type) {
+ case BNXT_QPLIB_DPI_TYPE_KERNEL:
+ /* privileged dbr was already mapped just initialize it. */
+ dpi->umdbr = dpit->ucreg.bar_base +
+ dpit->ucreg.offset + bit_num * PAGE_SIZE;
+ dpi->dbr = dpit->priv_db;
+ dpi->dpi = dpi->bit;
+ break;
+ case BNXT_QPLIB_DPI_TYPE_WC:
+ dpi->dbr = ioremap_wc(umaddr, PAGE_SIZE);
+ break;
+ default:
+ dpi->dbr = ioremap(umaddr, PAGE_SIZE);
+ break;
+ }
+
+ dpi->type = type;
+ mutex_unlock(&res->dpi_tbl_lock);
+ return 0;
+
+}
+
+int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi *dpi)
+{
+ struct bnxt_qplib_dpi_tbl *dpit = &res->dpi_tbl;
+
+ mutex_lock(&res->dpi_tbl_lock);
+ if (dpi->dpi && dpi->type != BNXT_QPLIB_DPI_TYPE_KERNEL)
+ pci_iounmap(res->pdev, dpi->dbr);
+
+ if (test_and_set_bit(dpi->bit, dpit->tbl)) {
+ dev_warn(&res->pdev->dev,
+ "Freeing an unused DPI? dpi = %d, bit = %d\n",
+ dpi->dpi, dpi->bit);
+ mutex_unlock(&res->dpi_tbl_lock);
+ return -EINVAL;
+ }
+ if (dpit->app_tbl)
+ dpit->app_tbl[dpi->bit] = NULL;
+ memset(dpi, 0, sizeof(*dpi));
+ mutex_unlock(&res->dpi_tbl_lock);
+ return 0;
+}
+
+static void bnxt_qplib_free_dpi_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi_tbl *dpit)
+{
+ kfree(dpit->tbl);
+ kfree(dpit->app_tbl);
+ dpit->tbl = NULL;
+ dpit->app_tbl = NULL;
+ dpit->max = 0;
+}
+
+static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dev_attr *dev_attr)
+{
+ struct bnxt_qplib_dpi_tbl *dpit;
+ struct bnxt_qplib_reg_desc *reg;
+ unsigned long bar_len;
+ u32 dbr_offset;
+ u32 bytes;
+
+ dpit = &res->dpi_tbl;
+ reg = &dpit->wcreg;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) {
+ /* Offest should come from L2 driver */
+ dbr_offset = dev_attr->l2_db_size;
+ dpit->ucreg.offset = dbr_offset;
+ dpit->wcreg.offset = dbr_offset;
+ }
+
+ bar_len = pci_resource_len(res->pdev, reg->bar_id);
+ dpit->max = (bar_len - reg->offset) / PAGE_SIZE;
+ if (dev_attr->max_dpi)
+ dpit->max = min_t(u32, dpit->max, dev_attr->max_dpi);
+
+ dpit->app_tbl = kcalloc(dpit->max, sizeof(void *), GFP_KERNEL);
+ if (!dpit->app_tbl)
+ return -ENOMEM;
+
+ bytes = dpit->max >> 3;
+ if (!bytes)
+ bytes = 1;
+
+ dpit->tbl = kmalloc(bytes, GFP_KERNEL);
+ if (!dpit->tbl) {
+ kfree(dpit->app_tbl);
+ dpit->app_tbl = NULL;
+ return -ENOMEM;
+ }
+
+ memset((u8 *)dpit->tbl, 0xFF, bytes);
+ mutex_init(&res->dpi_tbl_lock);
+ dpit->priv_db = dpit->ucreg.bar_reg + dpit->ucreg.offset;
+
+ return 0;
+
+}
+
+/* Stats */
+void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_stats *stats)
+{
+ if (stats->dma) {
+ dma_free_coherent(&pdev->dev, stats->size,
+ stats->dma, stats->dma_map);
+ }
+ memset(stats, 0, sizeof(*stats));
+ stats->fw_id = -1;
+}
+
+int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_chip_ctx *cctx,
+ struct bnxt_qplib_stats *stats)
+{
+ memset(stats, 0, sizeof(*stats));
+ stats->fw_id = -1;
+ stats->size = cctx->hw_stats_size;
+ stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
+ &stats->dma_map, GFP_KERNEL);
+ if (!stats->dma) {
+ dev_err(&pdev->dev, "Stats DMA allocation failed\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res)
+{
+ bnxt_qplib_cleanup_sgid_tbl(res, &res->sgid_tbl);
+}
+
+int bnxt_qplib_init_res(struct bnxt_qplib_res *res)
+{
+ bnxt_qplib_init_sgid_tbl(&res->sgid_tbl, res->netdev);
+
+ return 0;
+}
+
+void bnxt_qplib_free_res(struct bnxt_qplib_res *res)
+{
+ kfree(res->rcfw->qp_tbl);
+ bnxt_qplib_free_sgid_tbl(res, &res->sgid_tbl);
+ bnxt_qplib_free_pd_tbl(&res->pd_tbl);
+ bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl);
+}
+
+int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct bnxt_qplib_dev_attr *dev_attr;
+ int rc;
+
+ res->netdev = netdev;
+ dev_attr = res->dattr;
+
+ /* Allocate one extra to hold the QP1 entries */
+ rcfw->qp_tbl_size = max_t(u32, BNXT_RE_MAX_QPC_COUNT + 1, dev_attr->max_qp);
+ rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node),
+ GFP_KERNEL);
+ if (!rcfw->qp_tbl)
+ return -ENOMEM;
+
+ rc = bnxt_qplib_alloc_sgid_tbl(res, &res->sgid_tbl, dev_attr->max_sgid);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_pd_tbl(res, &res->pd_tbl, dev_attr->max_pd);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_dpi_tbl(res, dev_attr);
+ if (rc)
+ goto fail;
+
+ return 0;
+fail:
+ bnxt_qplib_free_res(res);
+ return rc;
+}
+
+void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res)
+{
+ struct bnxt_qplib_reg_desc *reg;
+
+ reg = &res->dpi_tbl.ucreg;
+ if (reg->bar_reg)
+ pci_iounmap(res->pdev, reg->bar_reg);
+ reg->bar_reg = NULL;
+ reg->bar_base = 0;
+ reg->len = 0;
+ reg->bar_id = 0;
+}
+
+int bnxt_qplib_map_db_bar(struct bnxt_qplib_res *res)
+{
+ struct bnxt_qplib_reg_desc *ucreg;
+ struct bnxt_qplib_reg_desc *wcreg;
+
+ wcreg = &res->dpi_tbl.wcreg;
+ wcreg->bar_id = RCFW_DBR_PCI_BAR_REGION;
+ wcreg->bar_base = pci_resource_start(res->pdev, wcreg->bar_id);
+
+ ucreg = &res->dpi_tbl.ucreg;
+ ucreg->bar_id = RCFW_DBR_PCI_BAR_REGION;
+ ucreg->bar_base = pci_resource_start(res->pdev, ucreg->bar_id);
+ ucreg->len = ucreg->offset + PAGE_SIZE;
+ if (!ucreg->len || ((ucreg->len & (PAGE_SIZE - 1)) != 0)) {
+ dev_err(&res->pdev->dev, "QPLIB: invalid dbr length %d",
+ (int)ucreg->len);
+ return -EINVAL;
+ }
+ ucreg->bar_reg = ioremap(ucreg->bar_base, ucreg->len);
+ if (!ucreg->bar_reg) {
+ dev_err(&res->pdev->dev, "privileged dpi map failed!");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+int bnxt_qplib_determine_atomics(struct pci_dev *dev)
+{
+ int comp;
+ u16 ctl2;
+
+ comp = pci_enable_atomic_ops_to_root(dev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP32);
+ if (comp)
+ return -EOPNOTSUPP;
+ comp = pci_enable_atomic_ops_to_root(dev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ if (comp)
+ return -EOPNOTSUPP;
+ pcie_capability_read_word(dev, PCI_EXP_DEVCTL2, &ctl2);
+ return !(ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ);
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
new file mode 100644
index 000000000000..2ea3b7f232a3
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -0,0 +1,626 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: QPLib resource manager (header)
+ */
+
+#ifndef __BNXT_QPLIB_RES_H__
+#define __BNXT_QPLIB_RES_H__
+
+#include "bnxt_ulp.h"
+
+extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
+
+#define CHIP_NUM_57508 0x1750
+#define CHIP_NUM_57504 0x1751
+#define CHIP_NUM_57502 0x1752
+#define CHIP_NUM_58818 0xd818
+#define CHIP_NUM_57608 0x1760
+
+#define BNXT_RE_MAX_QPC_COUNT (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT (64 * 1024)
+#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024)
+#define BNXT_RE_MAX_CQ_COUNT (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT_64K (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT_256K (256 * 1024)
+
+#define BNXT_QPLIB_DBR_VALID (0x1UL << 26)
+#define BNXT_QPLIB_DBR_EPOCH_SHIFT 24
+#define BNXT_QPLIB_DBR_TOGGLE_SHIFT 25
+
+struct bnxt_qplib_drv_modes {
+ u8 wqe_mode;
+ bool db_push;
+ bool dbr_pacing;
+ u32 toggle_bits;
+ u8 roce_mirror;
+};
+
+enum bnxt_re_toggle_modes {
+ BNXT_QPLIB_CQ_TOGGLE_BIT = 0x1,
+ BNXT_QPLIB_SRQ_TOGGLE_BIT = 0x2,
+};
+
+struct bnxt_qplib_chip_ctx {
+ u16 chip_num;
+ u8 chip_rev;
+ u8 chip_metal;
+ u16 hw_stats_size;
+ u16 hwrm_cmd_max_timeout;
+ struct bnxt_qplib_drv_modes modes;
+ u64 hwrm_intf_ver;
+ u32 dbr_stat_db_fifo;
+};
+
+struct bnxt_qplib_db_pacing_data {
+ u32 do_pacing;
+ u32 pacing_th;
+ u32 alarm_th;
+ u32 fifo_max_depth;
+ u32 fifo_room_mask;
+ u32 fifo_room_shift;
+ u32 grc_reg_offset;
+ u32 dev_err_state;
+};
+
+#define BNXT_QPLIB_DBR_PF_DB_OFFSET 0x10000
+#define BNXT_QPLIB_DBR_VF_DB_OFFSET 0x4000
+
+#define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *))
+#define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1)
+#define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG)
+#define PTR_IDX(x) ((x) & PTR_MAX_IDX_PER_PG)
+
+#define HWQ_CMP(idx, hwq) ((idx) & ((hwq)->max_elements - 1))
+
+#define HWQ_FREE_SLOTS(hwq) (hwq->max_elements - \
+ ((HWQ_CMP(hwq->prod, hwq)\
+ - HWQ_CMP(hwq->cons, hwq))\
+ & (hwq->max_elements - 1)))
+enum bnxt_qplib_hwq_type {
+ HWQ_TYPE_CTX,
+ HWQ_TYPE_QUEUE,
+ HWQ_TYPE_L2_CMPL,
+ HWQ_TYPE_MR
+};
+
+#define MAX_PBL_LVL_0_PGS 1
+#define MAX_PBL_LVL_1_PGS 512
+#define MAX_PBL_LVL_1_PGS_SHIFT 9
+#define MAX_PBL_LVL_1_PGS_FOR_LVL_2 256
+#define MAX_PBL_LVL_2_PGS (256 * 512)
+#define MAX_PDL_LVL_SHIFT 9
+
+enum bnxt_qplib_pbl_lvl {
+ PBL_LVL_0,
+ PBL_LVL_1,
+ PBL_LVL_2,
+ PBL_LVL_MAX
+};
+
+#define ROCE_PG_SIZE_4K (4 * 1024)
+#define ROCE_PG_SIZE_8K (8 * 1024)
+#define ROCE_PG_SIZE_64K (64 * 1024)
+#define ROCE_PG_SIZE_2M (2 * 1024 * 1024)
+#define ROCE_PG_SIZE_8M (8 * 1024 * 1024)
+#define ROCE_PG_SIZE_1G (1024 * 1024 * 1024)
+
+enum bnxt_qplib_hwrm_pg_size {
+ BNXT_QPLIB_HWRM_PG_SIZE_4K = 0,
+ BNXT_QPLIB_HWRM_PG_SIZE_8K = 1,
+ BNXT_QPLIB_HWRM_PG_SIZE_64K = 2,
+ BNXT_QPLIB_HWRM_PG_SIZE_2M = 3,
+ BNXT_QPLIB_HWRM_PG_SIZE_8M = 4,
+ BNXT_QPLIB_HWRM_PG_SIZE_1G = 5,
+};
+
+struct bnxt_qplib_reg_desc {
+ u8 bar_id;
+ resource_size_t bar_base;
+ unsigned long offset;
+ void __iomem *bar_reg;
+ size_t len;
+};
+
+struct bnxt_qplib_pbl {
+ u32 pg_count;
+ u32 pg_size;
+ void **pg_arr;
+ dma_addr_t *pg_map_arr;
+};
+
+struct bnxt_qplib_sg_info {
+ struct ib_umem *umem;
+ u32 npages;
+ u32 pgshft;
+ u32 pgsize;
+ bool nopte;
+};
+
+struct bnxt_qplib_hwq_attr {
+ struct bnxt_qplib_res *res;
+ struct bnxt_qplib_sg_info *sginfo;
+ enum bnxt_qplib_hwq_type type;
+ u32 depth;
+ u32 stride;
+ u32 aux_stride;
+ u32 aux_depth;
+};
+
+struct bnxt_qplib_hwq {
+ struct pci_dev *pdev;
+ /* lock to protect qplib_hwq */
+ spinlock_t lock;
+ struct bnxt_qplib_pbl pbl[PBL_LVL_MAX + 1];
+ enum bnxt_qplib_pbl_lvl level; /* 0, 1, or 2 */
+ /* ptr for easy access to the PBL entries */
+ void **pbl_ptr;
+ /* ptr for easy access to the dma_addr */
+ dma_addr_t *pbl_dma_ptr;
+ u32 max_elements;
+ u32 depth;
+ u16 element_size; /* Size of each entry */
+ u16 qe_ppg; /* queue entry per page */
+
+ u32 prod; /* raw */
+ u32 cons; /* raw */
+ u8 cp_bit;
+ u8 is_user;
+ u64 *pad_pg;
+ u32 pad_stride;
+ u32 pad_pgofft;
+};
+
+struct bnxt_qplib_db_info {
+ void __iomem *db;
+ void __iomem *priv_db;
+ struct bnxt_qplib_hwq *hwq;
+ u32 xid;
+ u32 max_slot;
+ u32 flags;
+ u8 toggle;
+};
+
+enum bnxt_qplib_db_info_flags_mask {
+ BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT = 0x0UL,
+ BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT = 0x1UL,
+ BNXT_QPLIB_FLAG_EPOCH_CONS_MASK = 0x1UL,
+ BNXT_QPLIB_FLAG_EPOCH_PROD_MASK = 0x2UL,
+};
+
+enum bnxt_qplib_db_epoch_flag_shift {
+ BNXT_QPLIB_DB_EPOCH_CONS_SHIFT = BNXT_QPLIB_DBR_EPOCH_SHIFT,
+ BNXT_QPLIB_DB_EPOCH_PROD_SHIFT = (BNXT_QPLIB_DBR_EPOCH_SHIFT - 1),
+};
+
+/* Tables */
+struct bnxt_qplib_pd_tbl {
+ unsigned long *tbl;
+ u32 max;
+};
+
+struct bnxt_qplib_sgid_tbl {
+ struct bnxt_qplib_gid_info *tbl;
+ u16 *hw_id;
+ u16 max;
+ u16 active;
+ void *ctx;
+ u8 *vlan;
+};
+
+enum {
+ BNXT_QPLIB_DPI_TYPE_KERNEL = 0,
+ BNXT_QPLIB_DPI_TYPE_UC = 1,
+ BNXT_QPLIB_DPI_TYPE_WC = 2
+};
+
+struct bnxt_qplib_dpi {
+ u32 dpi;
+ u32 bit;
+ void __iomem *dbr;
+ u64 umdbr;
+ u8 type;
+};
+
+struct bnxt_qplib_dpi_tbl {
+ void **app_tbl;
+ unsigned long *tbl;
+ u16 max;
+ struct bnxt_qplib_reg_desc ucreg; /* Hold entire DB bar. */
+ struct bnxt_qplib_reg_desc wcreg;
+ void __iomem *priv_db;
+};
+
+struct bnxt_qplib_stats {
+ dma_addr_t dma_map;
+ void *dma;
+ u32 size;
+ u32 fw_id;
+};
+
+struct bnxt_qplib_vf_res {
+ u32 max_qp_per_vf;
+ u32 max_mrw_per_vf;
+ u32 max_srq_per_vf;
+ u32 max_cq_per_vf;
+ u32 max_gid_per_vf;
+};
+
+#define BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE 448
+#define BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE 64
+#define BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE 64
+#define BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE 128
+
+#define MAX_TQM_ALLOC_REQ 48
+#define MAX_TQM_ALLOC_BLK_SIZE 8
+struct bnxt_qplib_tqm_ctx {
+ struct bnxt_qplib_hwq pde;
+ u8 pde_level; /* Original level */
+ struct bnxt_qplib_hwq qtbl[MAX_TQM_ALLOC_REQ];
+ u8 qcount[MAX_TQM_ALLOC_REQ];
+};
+
+struct bnxt_qplib_ctx {
+ u32 qpc_count;
+ struct bnxt_qplib_hwq qpc_tbl;
+ u32 mrw_count;
+ struct bnxt_qplib_hwq mrw_tbl;
+ u32 srqc_count;
+ struct bnxt_qplib_hwq srqc_tbl;
+ u32 cq_count;
+ struct bnxt_qplib_hwq cq_tbl;
+ struct bnxt_qplib_hwq tim_tbl;
+ struct bnxt_qplib_tqm_ctx tqm_ctx;
+ struct bnxt_qplib_stats stats;
+ struct bnxt_qplib_stats stats3;
+ struct bnxt_qplib_vf_res vf_res;
+};
+
+struct bnxt_qplib_res {
+ struct pci_dev *pdev;
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_qplib_dev_attr *dattr;
+ struct net_device *netdev;
+ struct bnxt_en_dev *en_dev;
+ struct bnxt_qplib_rcfw *rcfw;
+ struct bnxt_qplib_pd_tbl pd_tbl;
+ /* To protect the pd table bit map */
+ struct mutex pd_tbl_lock;
+ struct bnxt_qplib_sgid_tbl sgid_tbl;
+ struct bnxt_qplib_dpi_tbl dpi_tbl;
+ /* To protect the dpi table bit map */
+ struct mutex dpi_tbl_lock;
+ bool prio;
+ bool is_vf;
+ struct bnxt_qplib_db_pacing_data *pacing_data;
+};
+
+static inline bool bnxt_qplib_is_chip_gen_p7(struct bnxt_qplib_chip_ctx *cctx)
+{
+ return (cctx->chip_num == CHIP_NUM_58818 ||
+ cctx->chip_num == CHIP_NUM_57608);
+}
+
+static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx)
+{
+ return (cctx->chip_num == CHIP_NUM_57508 ||
+ cctx->chip_num == CHIP_NUM_57504 ||
+ cctx->chip_num == CHIP_NUM_57502);
+}
+
+static inline bool bnxt_qplib_is_chip_gen_p5_p7(struct bnxt_qplib_chip_ctx *cctx)
+{
+ return bnxt_qplib_is_chip_gen_p5(cctx) || bnxt_qplib_is_chip_gen_p7(cctx);
+}
+
+static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res)
+{
+ return bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ?
+ HWQ_TYPE_QUEUE : HWQ_TYPE_L2_CMPL;
+}
+
+static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx)
+{
+ return bnxt_qplib_is_chip_gen_p5_p7(cctx) ?
+ RING_ALLOC_REQ_RING_TYPE_NQ :
+ RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL;
+}
+
+static inline u8 bnxt_qplib_base_pg_size(struct bnxt_qplib_hwq *hwq)
+{
+ u8 pg_size = BNXT_QPLIB_HWRM_PG_SIZE_4K;
+ struct bnxt_qplib_pbl *pbl;
+
+ pbl = &hwq->pbl[PBL_LVL_0];
+ switch (pbl->pg_size) {
+ case ROCE_PG_SIZE_4K:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_4K;
+ break;
+ case ROCE_PG_SIZE_8K:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_8K;
+ break;
+ case ROCE_PG_SIZE_64K:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_64K;
+ break;
+ case ROCE_PG_SIZE_2M:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_2M;
+ break;
+ case ROCE_PG_SIZE_8M:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_8M;
+ break;
+ case ROCE_PG_SIZE_1G:
+ pg_size = BNXT_QPLIB_HWRM_PG_SIZE_1G;
+ break;
+ default:
+ break;
+ }
+
+ return pg_size;
+}
+
+static inline void *bnxt_qplib_get_qe(struct bnxt_qplib_hwq *hwq,
+ u32 indx, u64 *pg)
+{
+ u32 pg_num, pg_idx;
+
+ pg_num = (indx / hwq->qe_ppg);
+ pg_idx = (indx % hwq->qe_ppg);
+ if (pg)
+ *pg = (u64)&hwq->pbl_ptr[pg_num];
+ return (void *)(hwq->pbl_ptr[pg_num] + hwq->element_size * pg_idx);
+}
+
+static inline void *bnxt_qplib_get_prod_qe(struct bnxt_qplib_hwq *hwq, u32 idx)
+{
+ idx += hwq->prod;
+ if (idx >= hwq->depth)
+ idx -= hwq->depth;
+ return bnxt_qplib_get_qe(hwq, idx, NULL);
+}
+
+#define to_bnxt_qplib(ptr, type, member) \
+ container_of(ptr, type, member)
+
+struct bnxt_qplib_pd;
+struct bnxt_qplib_dev_attr;
+
+void bnxt_qplib_free_hwq(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_hwq *hwq);
+int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
+ struct bnxt_qplib_hwq_attr *hwq_attr);
+int bnxt_qplib_alloc_pd(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pd *pd);
+int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pd_tbl *pd_tbl,
+ struct bnxt_qplib_pd *pd);
+int bnxt_qplib_alloc_dpi(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi *dpi,
+ void *app, u8 type);
+int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi *dpi);
+void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res);
+int bnxt_qplib_init_res(struct bnxt_qplib_res *res);
+void bnxt_qplib_free_res(struct bnxt_qplib_res *res);
+int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev);
+void bnxt_qplib_free_hwctx(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx);
+int bnxt_qplib_alloc_hwctx(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_ctx *ctx);
+int bnxt_qplib_map_db_bar(struct bnxt_qplib_res *res);
+void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res);
+
+int bnxt_qplib_determine_atomics(struct pci_dev *dev);
+int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_chip_ctx *cctx,
+ struct bnxt_qplib_stats *stats);
+void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_stats *stats);
+
+static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_db_info *dbinfo,
+ struct bnxt_qplib_hwq *hwq, u32 cnt)
+{
+ /* move prod and update toggle/epoch if wrap around */
+ hwq->prod += cnt;
+ if (hwq->prod >= hwq->depth) {
+ hwq->prod %= hwq->depth;
+ dbinfo->flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT;
+ }
+}
+
+static inline void bnxt_qplib_hwq_incr_cons(u32 max_elements, u32 *cons, u32 cnt,
+ u32 *dbinfo_flags)
+{
+ /* move cons and update toggle/epoch if wrap around */
+ *cons += cnt;
+ if (*cons >= max_elements) {
+ *cons %= max_elements;
+ *dbinfo_flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT;
+ }
+}
+
+static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info,
+ bool arm)
+{
+ u32 key = 0;
+
+ key |= info->hwq->cons | (CMPL_DOORBELL_IDX_VALID |
+ (CMPL_DOORBELL_KEY_CMPL & CMPL_DOORBELL_KEY_MASK));
+ if (!arm)
+ key |= CMPL_DOORBELL_MASK;
+ writel(key, info->db);
+}
+
+#define BNXT_QPLIB_INIT_DBHDR(xid, type, indx, toggle) \
+ (((u64)(((xid) & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | \
+ (type) | BNXT_QPLIB_DBR_VALID) << 32) | (indx) | \
+ (((u32)(toggle)) << (BNXT_QPLIB_DBR_TOGGLE_SHIFT)))
+
+static inline void bnxt_qplib_ring_db(struct bnxt_qplib_db_info *info,
+ u32 type)
+{
+ u64 key = 0;
+ u32 indx;
+ u8 toggle = 0;
+
+ if (type == DBC_DBC_TYPE_CQ_ARMALL ||
+ type == DBC_DBC_TYPE_CQ_ARMSE)
+ toggle = info->toggle;
+
+ indx = (info->hwq->cons & DBC_DBC_INDEX_MASK) |
+ ((info->flags & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK) <<
+ BNXT_QPLIB_DB_EPOCH_CONS_SHIFT);
+
+ key = BNXT_QPLIB_INIT_DBHDR(info->xid, type, indx, toggle);
+ writeq(key, info->db);
+}
+
+static inline void bnxt_qplib_ring_prod_db(struct bnxt_qplib_db_info *info,
+ u32 type)
+{
+ u64 key = 0;
+ u32 indx;
+
+ indx = (((info->hwq->prod / info->max_slot) & DBC_DBC_INDEX_MASK) |
+ ((info->flags & BNXT_QPLIB_FLAG_EPOCH_PROD_MASK) <<
+ BNXT_QPLIB_DB_EPOCH_PROD_SHIFT));
+ key = BNXT_QPLIB_INIT_DBHDR(info->xid, type, indx, 0);
+ writeq(key, info->db);
+}
+
+static inline void bnxt_qplib_armen_db(struct bnxt_qplib_db_info *info,
+ u32 type)
+{
+ u64 key = 0;
+ u8 toggle = 0;
+
+ if (type == DBC_DBC_TYPE_CQ_ARMENA || type == DBC_DBC_TYPE_SRQ_ARMENA)
+ toggle = info->toggle;
+ /* Index always at 0 */
+ key = BNXT_QPLIB_INIT_DBHDR(info->xid, type, 0, toggle);
+ writeq(key, info->priv_db);
+}
+
+static inline void bnxt_qplib_srq_arm_db(struct bnxt_qplib_db_info *info,
+ u32 th)
+{
+ u64 key = 0;
+
+ key = BNXT_QPLIB_INIT_DBHDR(info->xid, DBC_DBC_TYPE_SRQ_ARM, th, info->toggle);
+ writeq(key, info->priv_db);
+}
+
+static inline void bnxt_qplib_ring_nq_db(struct bnxt_qplib_db_info *info,
+ struct bnxt_qplib_chip_ctx *cctx,
+ bool arm)
+{
+ u32 type;
+
+ type = arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ;
+ if (bnxt_qplib_is_chip_gen_p5_p7(cctx))
+ bnxt_qplib_ring_db(info, type);
+ else
+ bnxt_qplib_ring_db32(info, arm);
+}
+
+static inline bool _is_ext_stats_supported(u16 dev_cap_flags)
+{
+ return dev_cap_flags &
+ CREQ_QUERY_FUNC_RESP_SB_EXT_STATS;
+}
+
+static inline int bnxt_ext_stats_supported(struct bnxt_qplib_chip_ctx *ctx,
+ u16 flags, bool virtfn)
+{
+ /* ext stats supported if cap flag is set AND is a PF OR a Thor2 VF */
+ return (_is_ext_stats_supported(flags) &&
+ ((virtfn && bnxt_qplib_is_chip_gen_p7(ctx)) || (!virtfn)));
+}
+
+static inline bool _is_hw_retx_supported(u16 dev_cap_flags)
+{
+ return dev_cap_flags &
+ (CREQ_QUERY_FUNC_RESP_SB_HW_REQUESTER_RETX_ENABLED |
+ CREQ_QUERY_FUNC_RESP_SB_HW_RESPONDER_RETX_ENABLED);
+}
+
+#define BNXT_RE_HW_RETX(a) _is_hw_retx_supported((a))
+
+static inline bool _is_host_msn_table(u16 dev_cap_ext_flags2)
+{
+ return (dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_MASK) ==
+ CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_HOST_MSN_TABLE;
+}
+
+static inline u8 bnxt_qplib_dbr_pacing_en(struct bnxt_qplib_chip_ctx *cctx)
+{
+ return cctx->modes.dbr_pacing;
+}
+
+static inline u8 bnxt_qplib_roce_mirror_supported(struct bnxt_qplib_chip_ctx *cctx)
+{
+ return cctx->modes.roce_mirror;
+}
+
+static inline bool _is_alloc_mr_unified(u16 dev_cap_flags)
+{
+ return dev_cap_flags & CREQ_QUERY_FUNC_RESP_SB_MR_REGISTER_ALLOC;
+}
+
+static inline bool _is_relaxed_ordering_supported(u16 dev_cap_ext_flags2)
+{
+ return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_MEMORY_REGION_RO_SUPPORTED;
+}
+
+static inline bool _is_optimize_modify_qp_supported(u16 dev_cap_ext_flags2)
+{
+ return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED;
+}
+
+static inline bool _is_min_rnr_in_rtr_rts_mandatory(u16 dev_cap_ext_flags2)
+{
+ return !!(dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_MIN_RNR_RTR_RTS_OPT_SUPPORTED);
+}
+
+static inline bool _is_cq_coalescing_supported(u16 dev_cap_ext_flags2)
+{
+ return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED;
+}
+
+static inline bool _is_max_srq_ext_supported(u16 dev_cap_ext_flags_2)
+{
+ return !!(dev_cap_ext_flags_2 & CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED);
+}
+
+#endif /* __BNXT_QPLIB_RES_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
new file mode 100644
index 000000000000..408a34df2667
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -0,0 +1,1145 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Slow Path Operators
+ */
+
+#define dev_fmt(fmt) "QPLIB: " fmt
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+
+#include "roce_hsi.h"
+
+#include "qplib_res.h"
+#include "qplib_rcfw.h"
+#include "qplib_sp.h"
+#include "qplib_tlv.h"
+
+const struct bnxt_qplib_gid bnxt_qplib_gid_zero = {{ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 } };
+
+/* Device */
+
+static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw)
+{
+ u16 pcie_ctl2 = 0;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx))
+ return false;
+
+ pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2, &pcie_ctl2);
+ return (pcie_ctl2 & PCI_EXP_DEVCTL2_ATOMIC_REQ);
+}
+
+void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw)
+{
+ struct creq_query_version_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_query_version req = {};
+ struct bnxt_qplib_dev_attr *attr;
+ int rc;
+
+ attr = rcfw->res->dattr;
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_QUERY_VERSION,
+ sizeof(req));
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return;
+ attr->fw_ver[0] = resp.fw_maj;
+ attr->fw_ver[1] = resp.fw_minor;
+ attr->fw_ver[2] = resp.fw_bld;
+ attr->fw_ver[3] = resp.fw_rsvd;
+}
+
+int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw)
+{
+ struct bnxt_qplib_dev_attr *attr = rcfw->res->dattr;
+ struct creq_query_func_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct creq_query_func_resp_sb *sb;
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct cmdq_query_func req = {};
+ u8 *tqm_alloc;
+ int i, rc;
+ u32 temp;
+
+ cctx = rcfw->res->cctx;
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_QUERY_FUNC,
+ sizeof(req));
+
+ sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+ sb = sbuf.sb;
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ goto bail;
+
+ /* Extract the context from the side buffer */
+ attr->max_qp = le32_to_cpu(sb->max_qp);
+ /* max_qp value reported by FW doesn't include the QP1 */
+ attr->max_qp += 1;
+ attr->max_qp_rd_atom =
+ sb->max_qp_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
+ BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom;
+ attr->max_qp_init_rd_atom =
+ sb->max_qp_init_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
+ BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom;
+ attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr) - 1;
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx)) {
+ /*
+ * 128 WQEs needs to be reserved for the HW (8916). Prevent
+ * reporting the max number on legacy devices
+ */
+ attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1;
+ }
+
+ /* Adjust for max_qp_wqes for variable wqe */
+ if (cctx->modes.wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE)
+ attr->max_qp_wqes = BNXT_VAR_MAX_WQE - 1;
+
+ attr->max_qp_sges = cctx->modes.wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE ?
+ min_t(u32, sb->max_sge_var_wqe, BNXT_VAR_MAX_SGE) : 6;
+ attr->max_cq = le32_to_cpu(sb->max_cq);
+ attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
+ if (!bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx))
+ attr->max_cq_wqes = min_t(u32, BNXT_QPLIB_MAX_CQ_WQES, attr->max_cq_wqes);
+ attr->max_cq_sges = attr->max_qp_sges;
+ attr->max_mr = le32_to_cpu(sb->max_mr);
+ attr->max_mw = le32_to_cpu(sb->max_mw);
+
+ attr->max_mr_size = le64_to_cpu(sb->max_mr_size);
+ attr->max_pd = 64 * 1024;
+ attr->max_raw_ethy_qp = le32_to_cpu(sb->max_raw_eth_qp);
+ attr->max_ah = le32_to_cpu(sb->max_ah);
+
+ attr->max_srq = le16_to_cpu(sb->max_srq);
+ attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
+ attr->max_srq_sges = sb->max_srq_sge;
+ attr->max_pkey = 1;
+ attr->max_inline_data = attr->max_qp_sges * sizeof(struct sq_sge);
+ if (!bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx))
+ attr->l2_db_size = (sb->l2_db_space_size + 1) *
+ (0x01 << RCFW_DBR_BASE_PAGE_SHIFT);
+ /*
+ * Read the max gid supported by HW.
+ * For each entry in HW GID in HW table, we consume 2
+ * GID entries in the kernel GID table. So max_gid reported
+ * to stack can be up to twice the value reported by the HW, up to 256 gids.
+ */
+ attr->max_sgid = le32_to_cpu(sb->max_gid);
+ attr->max_sgid = min_t(u32, BNXT_QPLIB_NUM_GIDS_SUPPORTED, 2 * attr->max_sgid);
+ attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags);
+ attr->dev_cap_flags2 = le16_to_cpu(sb->dev_cap_ext_flags_2);
+
+ if (_is_max_srq_ext_supported(attr->dev_cap_flags2))
+ attr->max_srq += le16_to_cpu(sb->max_srq_ext);
+
+ for (i = 0; i < MAX_TQM_ALLOC_REQ / 4; i++) {
+ temp = le32_to_cpu(sb->tqm_alloc_reqs[i]);
+ tqm_alloc = (u8 *)&temp;
+ attr->tqm_alloc_reqs[i * 4] = *tqm_alloc;
+ attr->tqm_alloc_reqs[i * 4 + 1] = *(++tqm_alloc);
+ attr->tqm_alloc_reqs[i * 4 + 2] = *(++tqm_alloc);
+ attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
+ }
+
+ if (rcfw->res->cctx->hwrm_intf_ver >= HWRM_VERSION_DEV_ATTR_MAX_DPI)
+ attr->max_dpi = le32_to_cpu(sb->max_dpi);
+
+ attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw);
+bail:
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
+ return rc;
+}
+
+int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx)
+{
+ struct creq_set_func_resources_resp resp = {};
+ struct cmdq_set_func_resources req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ int rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_SET_FUNC_RESOURCES,
+ sizeof(req));
+
+ req.number_of_qp = cpu_to_le32(ctx->qpc_count);
+ req.number_of_mrw = cpu_to_le32(ctx->mrw_count);
+ req.number_of_srq = cpu_to_le32(ctx->srqc_count);
+ req.number_of_cq = cpu_to_le32(ctx->cq_count);
+
+ req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf);
+ req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf);
+ req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf);
+ req.max_cq_per_vf = cpu_to_le32(ctx->vf_res.max_cq_per_vf);
+ req.max_gid_per_vf = cpu_to_le32(ctx->vf_res.max_gid_per_vf);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc) {
+ dev_err(&res->pdev->dev, "Failed to set function resources\n");
+ }
+ return rc;
+}
+
+/* SGID */
+int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
+ struct bnxt_qplib_gid *gid)
+{
+ if (index >= sgid_tbl->max) {
+ dev_err(&res->pdev->dev,
+ "Index %d exceeded SGID table max (%d)\n",
+ index, sgid_tbl->max);
+ return -EINVAL;
+ }
+ memcpy(gid, &sgid_tbl->tbl[index].gid, sizeof(*gid));
+ return 0;
+}
+
+int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct bnxt_qplib_gid *gid, u16 vlan_id, bool update)
+{
+ struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
+ struct bnxt_qplib_res,
+ sgid_tbl);
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ int index;
+
+ /* Do we need a sgid_lock here? */
+ if (!sgid_tbl->active) {
+ dev_err(&res->pdev->dev, "SGID table has no active entries\n");
+ return -ENOMEM;
+ }
+ for (index = 0; index < sgid_tbl->max; index++) {
+ if (!memcmp(&sgid_tbl->tbl[index].gid, gid, sizeof(*gid)) &&
+ vlan_id == sgid_tbl->tbl[index].vlan_id)
+ break;
+ }
+ if (index == sgid_tbl->max) {
+ dev_warn(&res->pdev->dev, "GID not found in the SGID table\n");
+ return 0;
+ }
+ /* Remove GID from the SGID table */
+ if (update) {
+ struct creq_delete_gid_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_delete_gid req = {};
+ int rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DELETE_GID,
+ sizeof(req));
+ if (sgid_tbl->hw_id[index] == 0xFFFF) {
+ dev_err(&res->pdev->dev,
+ "GID entry contains an invalid HW id\n");
+ return -EINVAL;
+ }
+ req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return rc;
+ }
+ memcpy(&sgid_tbl->tbl[index].gid, &bnxt_qplib_gid_zero,
+ sizeof(bnxt_qplib_gid_zero));
+ sgid_tbl->tbl[index].vlan_id = 0xFFFF;
+ sgid_tbl->vlan[index] = 0;
+ sgid_tbl->active--;
+ dev_dbg(&res->pdev->dev,
+ "SGID deleted hw_id[0x%x] = 0x%x active = 0x%x\n",
+ index, sgid_tbl->hw_id[index], sgid_tbl->active);
+ sgid_tbl->hw_id[index] = (u16)-1;
+
+ /* unlock */
+ return 0;
+}
+
+int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct bnxt_qplib_gid *gid, const u8 *smac,
+ u16 vlan_id, bool update, u32 *index,
+ bool is_ugid, u32 stats_ctx_id)
+{
+ struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
+ struct bnxt_qplib_res,
+ sgid_tbl);
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ int i, free_idx;
+
+ /* Do we need a sgid_lock here? */
+ if (sgid_tbl->active == sgid_tbl->max) {
+ dev_err(&res->pdev->dev, "SGID table is full\n");
+ return -ENOMEM;
+ }
+ free_idx = sgid_tbl->max;
+ for (i = 0; i < sgid_tbl->max; i++) {
+ if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid)) &&
+ sgid_tbl->tbl[i].vlan_id == vlan_id) {
+ dev_dbg(&res->pdev->dev,
+ "SGID entry already exist in entry %d!\n", i);
+ *index = i;
+ return -EALREADY;
+ } else if (!memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
+ sizeof(bnxt_qplib_gid_zero)) &&
+ free_idx == sgid_tbl->max) {
+ free_idx = i;
+ }
+ }
+ if (free_idx == sgid_tbl->max) {
+ dev_err(&res->pdev->dev,
+ "SGID table is FULL but count is not MAX??\n");
+ return -ENOMEM;
+ }
+ if (update) {
+ struct creq_add_gid_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_add_gid req = {};
+ int rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_ADD_GID,
+ sizeof(req));
+
+ req.gid[0] = cpu_to_be32(((u32 *)gid->data)[3]);
+ req.gid[1] = cpu_to_be32(((u32 *)gid->data)[2]);
+ req.gid[2] = cpu_to_be32(((u32 *)gid->data)[1]);
+ req.gid[3] = cpu_to_be32(((u32 *)gid->data)[0]);
+ /*
+ * driver should ensure that all RoCE traffic is always VLAN
+ * tagged if RoCE traffic is running on non-zero VLAN ID or
+ * RoCE traffic is running on non-zero Priority.
+ */
+ if ((vlan_id != 0xFFFF) || res->prio) {
+ if (vlan_id != 0xFFFF)
+ req.vlan = cpu_to_le16
+ (vlan_id & CMDQ_ADD_GID_VLAN_VLAN_ID_MASK);
+ req.vlan |= cpu_to_le16
+ (CMDQ_ADD_GID_VLAN_TPID_TPID_8100 |
+ CMDQ_ADD_GID_VLAN_VLAN_EN);
+ }
+
+ /* MAC in network format */
+ req.src_mac[0] = cpu_to_be16(((u16 *)smac)[0]);
+ req.src_mac[1] = cpu_to_be16(((u16 *)smac)[1]);
+ req.src_mac[2] = cpu_to_be16(((u16 *)smac)[2]);
+
+ req.stats_ctx = cpu_to_le16(CMDQ_ADD_GID_STATS_CTX_STATS_CTX_VALID |
+ (u16)stats_ctx_id);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return rc;
+ sgid_tbl->hw_id[free_idx] = le32_to_cpu(resp.xid);
+ }
+ /* Add GID to the sgid_tbl */
+ memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid));
+ sgid_tbl->tbl[free_idx].vlan_id = vlan_id;
+ sgid_tbl->active++;
+ if (vlan_id != 0xFFFF)
+ sgid_tbl->vlan[free_idx] = 1;
+
+ dev_dbg(&res->pdev->dev,
+ "SGID added hw_id[0x%x] = 0x%x active = 0x%x\n",
+ free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active);
+
+ *index = free_idx;
+ /* unlock */
+ return 0;
+}
+
+/* AH */
+int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_create_ah_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_create_ah req = {};
+ u32 temp32[4];
+ u16 temp16[3];
+ int rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_CREATE_AH,
+ sizeof(req));
+
+ memcpy(temp32, ah->dgid.data, sizeof(struct bnxt_qplib_gid));
+ req.dgid[0] = cpu_to_le32(temp32[0]);
+ req.dgid[1] = cpu_to_le32(temp32[1]);
+ req.dgid[2] = cpu_to_le32(temp32[2]);
+ req.dgid[3] = cpu_to_le32(temp32[3]);
+
+ req.type = ah->nw_type;
+ req.hop_limit = ah->hop_limit;
+ req.sgid_index = cpu_to_le16(res->sgid_tbl.hw_id[ah->sgid_index]);
+ req.dest_vlan_id_flow_label = cpu_to_le32((ah->flow_label &
+ CMDQ_CREATE_AH_FLOW_LABEL_MASK) |
+ CMDQ_CREATE_AH_DEST_VLAN_ID_MASK);
+ req.pd_id = cpu_to_le32(ah->pd->id);
+ req.traffic_class = ah->traffic_class;
+
+ /* MAC in network format */
+ memcpy(temp16, ah->dmac, 6);
+ req.dest_mac[0] = cpu_to_le16(temp16[0]);
+ req.dest_mac[1] = cpu_to_le16(temp16[1]);
+ req.dest_mac[2] = cpu_to_le16(temp16[2]);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), block);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return rc;
+
+ ah->id = le32_to_cpu(resp.xid);
+ return 0;
+}
+
+int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_destroy_ah_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_destroy_ah req = {};
+ int rc;
+
+ /* Clean up the AH table in the device */
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DESTROY_AH,
+ sizeof(req));
+
+ req.ah_cid = cpu_to_le32(ah->id);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), block);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ return rc;
+}
+
+/* MRW */
+int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
+{
+ struct creq_deallocate_key_resp resp = {};
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_deallocate_key req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ int rc;
+
+ if (mrw->lkey == 0xFFFFFFFF) {
+ dev_info(&res->pdev->dev, "SP: Free a reserved lkey MRW\n");
+ return 0;
+ }
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DEALLOCATE_KEY,
+ sizeof(req));
+
+ req.mrw_flags = mrw->type;
+
+ if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1) ||
+ (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A) ||
+ (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B))
+ req.key = cpu_to_le32(mrw->rkey);
+ else
+ req.key = cpu_to_le32(mrw->lkey);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return rc;
+
+ /* Free the qplib's MRW memory */
+ if (mrw->hwq.max_elements)
+ bnxt_qplib_free_hwq(res, &mrw->hwq);
+
+ return 0;
+}
+
+int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_allocate_mrw_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_allocate_mrw req = {};
+ unsigned long tmp;
+ int rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_ALLOCATE_MRW,
+ sizeof(req));
+
+ req.pd_id = cpu_to_le32(mrw->pd->id);
+ req.mrw_flags = mrw->type;
+ if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR &&
+ mrw->access_flags & BNXT_QPLIB_FR_PMR) ||
+ mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A ||
+ mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B)
+ req.access = CMDQ_ALLOCATE_MRW_ACCESS_CONSUMER_OWNED_KEY;
+ tmp = (unsigned long)mrw;
+ req.mrw_handle = cpu_to_le64(tmp);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return rc;
+
+ if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1) ||
+ (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A) ||
+ (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B))
+ mrw->rkey = le32_to_cpu(resp.xid);
+ else
+ mrw->lkey = le32_to_cpu(resp.xid);
+ return 0;
+}
+
+int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
+ bool block)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_deregister_mr_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_deregister_mr req = {};
+ int rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_DEREGISTER_MR,
+ sizeof(req));
+
+ req.lkey = cpu_to_le32(mrw->lkey);
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), block);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ return rc;
+
+ /* Free the qplib's MR memory */
+ if (mrw->hwq.max_elements) {
+ mrw->va = 0;
+ mrw->total_size = 0;
+ bnxt_qplib_free_hwq(res, &mrw->hwq);
+ }
+
+ return 0;
+}
+
+int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
+ struct ib_umem *umem, int num_pbls, u32 buf_pg_size, bool unified_mr)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_sg_info sginfo = {};
+ struct creq_register_mr_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_register_mr req = {};
+ int pages, rc;
+ u32 pg_size;
+ u16 level;
+
+ if (num_pbls) {
+ pages = roundup_pow_of_two(num_pbls);
+ /* Allocate memory for the non-leaf pages to store buf ptrs.
+ * Non-leaf pages always uses system PAGE_SIZE
+ */
+ /* Free the hwq if it already exist, must be a rereg */
+ if (mr->hwq.max_elements)
+ bnxt_qplib_free_hwq(res, &mr->hwq);
+ hwq_attr.res = res;
+ hwq_attr.depth = pages;
+ hwq_attr.stride = sizeof(dma_addr_t);
+ hwq_attr.type = HWQ_TYPE_MR;
+ hwq_attr.sginfo = &sginfo;
+ hwq_attr.sginfo->umem = umem;
+ hwq_attr.sginfo->npages = pages;
+ hwq_attr.sginfo->pgsize = buf_pg_size;
+ hwq_attr.sginfo->pgshft = ilog2(buf_pg_size);
+ rc = bnxt_qplib_alloc_init_hwq(&mr->hwq, &hwq_attr);
+ if (rc) {
+ dev_err(&res->pdev->dev,
+ "SP: Reg MR memory allocation failed\n");
+ return -ENOMEM;
+ }
+ }
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_REGISTER_MR,
+ sizeof(req));
+
+ /* Configure the request */
+ if (mr->hwq.level == PBL_LVL_MAX) {
+ /* No PBL provided, just use system PAGE_SIZE */
+ level = 0;
+ req.pbl = 0;
+ pg_size = PAGE_SIZE;
+ } else {
+ level = mr->hwq.level;
+ req.pbl = cpu_to_le64(mr->hwq.pbl[PBL_LVL_0].pg_map_arr[0]);
+ }
+ pg_size = buf_pg_size ? buf_pg_size : PAGE_SIZE;
+ req.log2_pg_size_lvl = (level << CMDQ_REGISTER_MR_LVL_SFT) |
+ ((ilog2(pg_size) <<
+ CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT) &
+ CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK);
+ req.log2_pbl_pg_size = cpu_to_le16(((ilog2(PAGE_SIZE) <<
+ CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_SFT) &
+ CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_MASK));
+ req.access = (mr->access_flags & BNXT_QPLIB_MR_ACCESS_MASK);
+ req.va = cpu_to_le64(mr->va);
+ req.key = cpu_to_le32(mr->lkey);
+ if (unified_mr)
+ req.key = cpu_to_le32(mr->pd->id);
+ req.flags = cpu_to_le16(mr->flags);
+ req.mr_size = cpu_to_le64(mr->total_size);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ goto fail;
+
+ if (unified_mr) {
+ mr->lkey = le32_to_cpu(resp.xid);
+ mr->rkey = mr->lkey;
+ }
+
+ return 0;
+
+fail:
+ if (mr->hwq.max_elements)
+ bnxt_qplib_free_hwq(res, &mr->hwq);
+ return rc;
+}
+
+int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_frpl *frpl,
+ int max_pg_ptrs)
+{
+ struct bnxt_qplib_hwq_attr hwq_attr = {};
+ struct bnxt_qplib_sg_info sginfo = {};
+ int pg_ptrs, pages, rc;
+
+ /* Re-calculate the max to fit the HWQ allocation model */
+ pg_ptrs = roundup_pow_of_two(max_pg_ptrs);
+ pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT;
+ if (!pages)
+ pages++;
+
+ if (pages > MAX_PBL_LVL_1_PGS)
+ return -ENOMEM;
+
+ sginfo.pgsize = PAGE_SIZE;
+ sginfo.nopte = true;
+
+ hwq_attr.res = res;
+ hwq_attr.depth = pg_ptrs;
+ hwq_attr.stride = PAGE_SIZE;
+ hwq_attr.sginfo = &sginfo;
+ hwq_attr.type = HWQ_TYPE_CTX;
+ rc = bnxt_qplib_alloc_init_hwq(&frpl->hwq, &hwq_attr);
+ if (!rc)
+ frpl->max_pg_ptrs = pg_ptrs;
+
+ return rc;
+}
+
+int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_frpl *frpl)
+{
+ bnxt_qplib_free_hwq(res, &frpl->hwq);
+ return 0;
+}
+
+int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_roce_stats *stats)
+{
+ struct creq_query_roce_stats_resp resp = {};
+ struct creq_query_roce_stats_resp_sb *sb;
+ struct cmdq_query_roce_stats req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ int rc;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_QUERY_ROCE_STATS,
+ sizeof(req));
+
+ sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+ sb = sbuf.sb;
+
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ goto bail;
+ /* Extract the context from the side buffer */
+ stats->to_retransmits = le64_to_cpu(sb->to_retransmits);
+ stats->seq_err_naks_rcvd = le64_to_cpu(sb->seq_err_naks_rcvd);
+ stats->max_retry_exceeded = le64_to_cpu(sb->max_retry_exceeded);
+ stats->rnr_naks_rcvd = le64_to_cpu(sb->rnr_naks_rcvd);
+ stats->missing_resp = le64_to_cpu(sb->missing_resp);
+ stats->unrecoverable_err = le64_to_cpu(sb->unrecoverable_err);
+ stats->bad_resp_err = le64_to_cpu(sb->bad_resp_err);
+ stats->local_qp_op_err = le64_to_cpu(sb->local_qp_op_err);
+ stats->local_protection_err = le64_to_cpu(sb->local_protection_err);
+ stats->mem_mgmt_op_err = le64_to_cpu(sb->mem_mgmt_op_err);
+ stats->remote_invalid_req_err = le64_to_cpu(sb->remote_invalid_req_err);
+ stats->remote_access_err = le64_to_cpu(sb->remote_access_err);
+ stats->remote_op_err = le64_to_cpu(sb->remote_op_err);
+ stats->dup_req = le64_to_cpu(sb->dup_req);
+ stats->res_exceed_max = le64_to_cpu(sb->res_exceed_max);
+ stats->res_length_mismatch = le64_to_cpu(sb->res_length_mismatch);
+ stats->res_exceeds_wqe = le64_to_cpu(sb->res_exceeds_wqe);
+ stats->res_opcode_err = le64_to_cpu(sb->res_opcode_err);
+ stats->res_rx_invalid_rkey = le64_to_cpu(sb->res_rx_invalid_rkey);
+ stats->res_rx_domain_err = le64_to_cpu(sb->res_rx_domain_err);
+ stats->res_rx_no_perm = le64_to_cpu(sb->res_rx_no_perm);
+ stats->res_rx_range_err = le64_to_cpu(sb->res_rx_range_err);
+ stats->res_tx_invalid_rkey = le64_to_cpu(sb->res_tx_invalid_rkey);
+ stats->res_tx_domain_err = le64_to_cpu(sb->res_tx_domain_err);
+ stats->res_tx_no_perm = le64_to_cpu(sb->res_tx_no_perm);
+ stats->res_tx_range_err = le64_to_cpu(sb->res_tx_range_err);
+ stats->res_irrq_oflow = le64_to_cpu(sb->res_irrq_oflow);
+ stats->res_unsup_opcode = le64_to_cpu(sb->res_unsup_opcode);
+ stats->res_unaligned_atomic = le64_to_cpu(sb->res_unaligned_atomic);
+ stats->res_rem_inv_err = le64_to_cpu(sb->res_rem_inv_err);
+ stats->res_mem_error = le64_to_cpu(sb->res_mem_error);
+ stats->res_srq_err = le64_to_cpu(sb->res_srq_err);
+ stats->res_cmp_err = le64_to_cpu(sb->res_cmp_err);
+ stats->res_invalid_dup_rkey = le64_to_cpu(sb->res_invalid_dup_rkey);
+ stats->res_wqe_format_err = le64_to_cpu(sb->res_wqe_format_err);
+ stats->res_cq_load_err = le64_to_cpu(sb->res_cq_load_err);
+ stats->res_srq_load_err = le64_to_cpu(sb->res_srq_load_err);
+ stats->res_tx_pci_err = le64_to_cpu(sb->res_tx_pci_err);
+ stats->res_rx_pci_err = le64_to_cpu(sb->res_rx_pci_err);
+ if (!rcfw->init_oos_stats) {
+ rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
+ rcfw->init_oos_stats = 1;
+ } else {
+ stats->res_oos_drop_count +=
+ (le64_to_cpu(sb->res_oos_drop_count) -
+ rcfw->oos_prev) & BNXT_QPLIB_OOS_COUNT_MASK;
+ rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
+ }
+
+bail:
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
+ return rc;
+}
+
+int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid,
+ struct bnxt_qplib_ext_stat *estat)
+{
+ struct creq_query_roce_stats_ext_resp resp = {};
+ struct creq_query_roce_stats_ext_resp_sb *sb;
+ struct cmdq_query_roce_stats_ext req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ int rc;
+
+ sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+
+ sb = sbuf.sb;
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS,
+ sizeof(req));
+
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ req.resp_addr = cpu_to_le64(sbuf.dma_addr);
+ if (bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx) && rcfw->res->is_vf)
+ req.function_id =
+ cpu_to_le32(CMDQ_QUERY_ROCE_STATS_EXT_VF_VALID |
+ (fid << CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_SFT));
+ else
+ req.function_id = cpu_to_le32(fid);
+ req.flags = cpu_to_le16(CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ goto bail;
+
+ estat->tx_atomic_req = le64_to_cpu(sb->tx_atomic_req_pkts);
+ estat->tx_read_req = le64_to_cpu(sb->tx_read_req_pkts);
+ estat->tx_read_res = le64_to_cpu(sb->tx_read_res_pkts);
+ estat->tx_write_req = le64_to_cpu(sb->tx_write_req_pkts);
+ estat->tx_send_req = le64_to_cpu(sb->tx_send_req_pkts);
+ estat->tx_roce_pkts = le64_to_cpu(sb->tx_roce_pkts);
+ estat->tx_roce_bytes = le64_to_cpu(sb->tx_roce_bytes);
+ estat->rx_atomic_req = le64_to_cpu(sb->rx_atomic_req_pkts);
+ estat->rx_read_req = le64_to_cpu(sb->rx_read_req_pkts);
+ estat->rx_read_res = le64_to_cpu(sb->rx_read_res_pkts);
+ estat->rx_write_req = le64_to_cpu(sb->rx_write_req_pkts);
+ estat->rx_send_req = le64_to_cpu(sb->rx_send_req_pkts);
+ estat->rx_roce_pkts = le64_to_cpu(sb->rx_roce_pkts);
+ estat->rx_roce_bytes = le64_to_cpu(sb->rx_roce_bytes);
+ estat->rx_roce_good_pkts = le64_to_cpu(sb->rx_roce_good_pkts);
+ estat->rx_roce_good_bytes = le64_to_cpu(sb->rx_roce_good_bytes);
+ estat->rx_out_of_buffer = le64_to_cpu(sb->rx_out_of_buffer_pkts);
+ estat->rx_out_of_sequence = le64_to_cpu(sb->rx_out_of_sequence_pkts);
+ estat->tx_cnp = le64_to_cpu(sb->tx_cnp_pkts);
+ estat->rx_cnp = le64_to_cpu(sb->rx_cnp_pkts);
+ estat->rx_ecn_marked = le64_to_cpu(sb->rx_ecn_marked_pkts);
+
+bail:
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
+ return rc;
+}
+
+static void bnxt_qplib_fill_cc_gen1(struct cmdq_modify_roce_cc_gen1_tlv *ext_req,
+ struct bnxt_qplib_cc_param_ext *cc_ext)
+{
+ ext_req->modify_mask = cpu_to_le64(cc_ext->ext_mask);
+ cc_ext->ext_mask = 0;
+ ext_req->inactivity_th_hi = cpu_to_le16(cc_ext->inact_th_hi);
+ ext_req->min_time_between_cnps = cpu_to_le16(cc_ext->min_delta_cnp);
+ ext_req->init_cp = cpu_to_le16(cc_ext->init_cp);
+ ext_req->tr_update_mode = cc_ext->tr_update_mode;
+ ext_req->tr_update_cycles = cc_ext->tr_update_cyls;
+ ext_req->fr_num_rtts = cc_ext->fr_rtt;
+ ext_req->ai_rate_increase = cc_ext->ai_rate_incr;
+ ext_req->reduction_relax_rtts_th = cpu_to_le16(cc_ext->rr_rtt_th);
+ ext_req->additional_relax_cr_th = cpu_to_le16(cc_ext->ar_cr_th);
+ ext_req->cr_min_th = cpu_to_le16(cc_ext->cr_min_th);
+ ext_req->bw_avg_weight = cc_ext->bw_avg_weight;
+ ext_req->actual_cr_factor = cc_ext->cr_factor;
+ ext_req->max_cp_cr_th = cpu_to_le16(cc_ext->cr_th_max_cp);
+ ext_req->cp_bias_en = cc_ext->cp_bias_en;
+ ext_req->cp_bias = cc_ext->cp_bias;
+ ext_req->cnp_ecn = cc_ext->cnp_ecn;
+ ext_req->rtt_jitter_en = cc_ext->rtt_jitter_en;
+ ext_req->link_bytes_per_usec = cpu_to_le16(cc_ext->bytes_per_usec);
+ ext_req->reset_cc_cr_th = cpu_to_le16(cc_ext->cc_cr_reset_th);
+ ext_req->cr_width = cc_ext->cr_width;
+ ext_req->quota_period_min = cc_ext->min_quota;
+ ext_req->quota_period_max = cc_ext->max_quota;
+ ext_req->quota_period_abs_max = cc_ext->abs_max_quota;
+ ext_req->tr_lower_bound = cpu_to_le16(cc_ext->tr_lb);
+ ext_req->cr_prob_factor = cc_ext->cr_prob_fac;
+ ext_req->tr_prob_factor = cc_ext->tr_prob_fac;
+ ext_req->fairness_cr_th = cpu_to_le16(cc_ext->fair_cr_th);
+ ext_req->red_div = cc_ext->red_div;
+ ext_req->cnp_ratio_th = cc_ext->cnp_ratio_th;
+ ext_req->exp_ai_rtts = cpu_to_le16(cc_ext->ai_ext_rtt);
+ ext_req->exp_ai_cr_cp_ratio = cc_ext->exp_crcp_ratio;
+ ext_req->use_rate_table = cc_ext->low_rate_en;
+ ext_req->cp_exp_update_th = cpu_to_le16(cc_ext->cpcr_update_th);
+ ext_req->high_exp_ai_rtts_th1 = cpu_to_le16(cc_ext->ai_rtt_th1);
+ ext_req->high_exp_ai_rtts_th2 = cpu_to_le16(cc_ext->ai_rtt_th2);
+ ext_req->actual_cr_cong_free_rtts_th = cpu_to_le16(cc_ext->cf_rtt_th);
+ ext_req->severe_cong_cr_th1 = cpu_to_le16(cc_ext->sc_cr_th1);
+ ext_req->severe_cong_cr_th2 = cpu_to_le16(cc_ext->sc_cr_th2);
+ ext_req->link64B_per_rtt = cpu_to_le32(cc_ext->l64B_per_rtt);
+ ext_req->cc_ack_bytes = cc_ext->cc_ack_bytes;
+}
+
+int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cc_param *cc_param)
+{
+ struct bnxt_qplib_tlv_modify_cc_req tlv_req = {};
+ struct creq_modify_roce_cc_resp resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_modify_roce_cc *req;
+ int req_size;
+ void *cmd;
+ int rc;
+
+ /* Prepare the older base command */
+ req = &tlv_req.base_req;
+ cmd = req;
+ req_size = sizeof(*req);
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)req, CMDQ_BASE_OPCODE_MODIFY_ROCE_CC,
+ sizeof(*req));
+ req->modify_mask = cpu_to_le32(cc_param->mask);
+ req->enable_cc = cc_param->enable;
+ req->g = cc_param->g;
+ req->num_phases_per_state = cc_param->nph_per_state;
+ req->time_per_phase = cc_param->time_pph;
+ req->pkts_per_phase = cc_param->pkts_pph;
+ req->init_cr = cpu_to_le16(cc_param->init_cr);
+ req->init_tr = cpu_to_le16(cc_param->init_tr);
+ req->tos_dscp_tos_ecn = (cc_param->tos_dscp << CMDQ_MODIFY_ROCE_CC_TOS_DSCP_SFT) |
+ (cc_param->tos_ecn & CMDQ_MODIFY_ROCE_CC_TOS_ECN_MASK);
+ req->alt_vlan_pcp = cc_param->alt_vlan_pcp;
+ req->alt_tos_dscp = cpu_to_le16(cc_param->alt_tos_dscp);
+ req->rtt = cpu_to_le16(cc_param->rtt);
+ req->tcp_cp = cpu_to_le16(cc_param->tcp_cp);
+ req->cc_mode = cc_param->cc_mode;
+ req->inactivity_th = cpu_to_le16(cc_param->inact_th);
+
+ /* For chip gen P5 onwards fill extended cmd and header */
+ if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) {
+ struct roce_tlv *hdr;
+ u32 payload;
+ u32 chunks;
+
+ cmd = &tlv_req;
+ req_size = sizeof(tlv_req);
+ /* Prepare primary tlv header */
+ hdr = &tlv_req.tlv_hdr;
+ chunks = CHUNKS(sizeof(struct bnxt_qplib_tlv_modify_cc_req));
+ payload = sizeof(struct cmdq_modify_roce_cc);
+ __roce_1st_tlv_prep(hdr, chunks, payload, true);
+ /* Prepare secondary tlv header */
+ hdr = (struct roce_tlv *)&tlv_req.ext_req;
+ payload = sizeof(struct cmdq_modify_roce_cc_gen1_tlv) -
+ sizeof(struct roce_tlv);
+ __roce_ext_tlv_prep(hdr, TLV_TYPE_MODIFY_ROCE_CC_GEN1, payload, false, true);
+ bnxt_qplib_fill_cc_gen1(&tlv_req.ext_req, &cc_param->cc_ext);
+ }
+
+ bnxt_qplib_fill_cmdqmsg(&msg, cmd, &resp, NULL, req_size,
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(res->rcfw, &msg);
+ return rc;
+}
+
+int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 res_type,
+ u32 xid, u32 resp_size, void *resp_va)
+{
+ struct creq_read_context resp = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_read_context req = {};
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ int rc;
+
+ sbuf.size = resp_size;
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_READ_CONTEXT, sizeof(req));
+ req.resp_addr = cpu_to_le64(sbuf.dma_addr);
+ req.resp_size = resp_size / BNXT_QPLIB_CMDQE_UNITS;
+
+ req.xid = cpu_to_le32(xid);
+ req.type = res_type;
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
+ if (rc)
+ goto free_mem;
+
+ memcpy(resp_va, sbuf.sb, resp_size);
+free_mem:
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr);
+ return rc;
+}
+
+static void bnxt_qplib_read_cc_gen1(struct bnxt_qplib_cc_param_ext *cc_ext,
+ struct creq_query_roce_cc_gen1_resp_sb_tlv *sb)
+{
+ cc_ext->inact_th_hi = le16_to_cpu(sb->inactivity_th_hi);
+ cc_ext->min_delta_cnp = le16_to_cpu(sb->min_time_between_cnps);
+ cc_ext->init_cp = le16_to_cpu(sb->init_cp);
+ cc_ext->tr_update_mode = sb->tr_update_mode;
+ cc_ext->tr_update_cyls = sb->tr_update_cycles;
+ cc_ext->fr_rtt = sb->fr_num_rtts;
+ cc_ext->ai_rate_incr = sb->ai_rate_increase;
+ cc_ext->rr_rtt_th = le16_to_cpu(sb->reduction_relax_rtts_th);
+ cc_ext->ar_cr_th = le16_to_cpu(sb->additional_relax_cr_th);
+ cc_ext->cr_min_th = le16_to_cpu(sb->cr_min_th);
+ cc_ext->bw_avg_weight = sb->bw_avg_weight;
+ cc_ext->cr_factor = sb->actual_cr_factor;
+ cc_ext->cr_th_max_cp = le16_to_cpu(sb->max_cp_cr_th);
+ cc_ext->cp_bias_en = sb->cp_bias_en;
+ cc_ext->cp_bias = sb->cp_bias;
+ cc_ext->cnp_ecn = sb->cnp_ecn;
+ cc_ext->rtt_jitter_en = sb->rtt_jitter_en;
+ cc_ext->bytes_per_usec = le16_to_cpu(sb->link_bytes_per_usec);
+ cc_ext->cc_cr_reset_th = le16_to_cpu(sb->reset_cc_cr_th);
+ cc_ext->cr_width = sb->cr_width;
+ cc_ext->min_quota = sb->quota_period_min;
+ cc_ext->max_quota = sb->quota_period_max;
+ cc_ext->abs_max_quota = sb->quota_period_abs_max;
+ cc_ext->tr_lb = le16_to_cpu(sb->tr_lower_bound);
+ cc_ext->cr_prob_fac = sb->cr_prob_factor;
+ cc_ext->tr_prob_fac = sb->tr_prob_factor;
+ cc_ext->fair_cr_th = le16_to_cpu(sb->fairness_cr_th);
+ cc_ext->red_div = sb->red_div;
+ cc_ext->cnp_ratio_th = sb->cnp_ratio_th;
+ cc_ext->ai_ext_rtt = le16_to_cpu(sb->exp_ai_rtts);
+ cc_ext->exp_crcp_ratio = sb->exp_ai_cr_cp_ratio;
+ cc_ext->low_rate_en = sb->use_rate_table;
+ cc_ext->cpcr_update_th = le16_to_cpu(sb->cp_exp_update_th);
+ cc_ext->ai_rtt_th1 = le16_to_cpu(sb->high_exp_ai_rtts_th1);
+ cc_ext->ai_rtt_th2 = le16_to_cpu(sb->high_exp_ai_rtts_th2);
+ cc_ext->cf_rtt_th = le16_to_cpu(sb->actual_cr_cong_free_rtts_th);
+ cc_ext->sc_cr_th1 = le16_to_cpu(sb->severe_cong_cr_th1);
+ cc_ext->sc_cr_th2 = le16_to_cpu(sb->severe_cong_cr_th2);
+ cc_ext->l64B_per_rtt = le32_to_cpu(sb->link64B_per_rtt);
+ cc_ext->cc_ack_bytes = sb->cc_ack_bytes;
+ cc_ext->reduce_cf_rtt_th = le16_to_cpu(sb->reduce_init_cong_free_rtts_th);
+}
+
+int bnxt_qplib_query_cc_param(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cc_param *cc_param)
+{
+ struct bnxt_qplib_tlv_query_rcc_sb *ext_sb;
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_query_roce_cc_resp resp = {};
+ struct creq_query_roce_cc_resp_sb *sb;
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_query_roce_cc req = {};
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ size_t resp_size;
+ int rc;
+
+ /* Query the parameters from chip */
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_ROCE_CC,
+ sizeof(req));
+ if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx))
+ resp_size = sizeof(*ext_sb);
+ else
+ resp_size = sizeof(*sb);
+
+ sbuf.size = ALIGN(resp_size, BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(res->rcfw, &msg);
+ if (rc)
+ goto out;
+
+ ext_sb = sbuf.sb;
+ sb = bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ? &ext_sb->base_sb :
+ (struct creq_query_roce_cc_resp_sb *)ext_sb;
+
+ cc_param->enable = sb->enable_cc & CREQ_QUERY_ROCE_CC_RESP_SB_ENABLE_CC;
+ cc_param->tos_ecn = (sb->tos_dscp_tos_ecn &
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_MASK) >>
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_SFT;
+ cc_param->tos_dscp = (sb->tos_dscp_tos_ecn &
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_MASK) >>
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_SFT;
+ cc_param->alt_tos_dscp = sb->alt_tos_dscp;
+ cc_param->alt_vlan_pcp = sb->alt_vlan_pcp;
+
+ cc_param->g = sb->g;
+ cc_param->nph_per_state = sb->num_phases_per_state;
+ cc_param->init_cr = le16_to_cpu(sb->init_cr);
+ cc_param->init_tr = le16_to_cpu(sb->init_tr);
+ cc_param->cc_mode = sb->cc_mode;
+ cc_param->inact_th = le16_to_cpu(sb->inactivity_th);
+ cc_param->rtt = le16_to_cpu(sb->rtt);
+ cc_param->tcp_cp = le16_to_cpu(sb->tcp_cp);
+ cc_param->time_pph = sb->time_per_phase;
+ cc_param->pkts_pph = sb->pkts_per_phase;
+ if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) {
+ bnxt_qplib_read_cc_gen1(&cc_param->cc_ext, &ext_sb->gen1_sb);
+ cc_param->inact_th |= (cc_param->cc_ext.inact_th_hi & 0x3F) << 16;
+ }
+out:
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr);
+ return rc;
+}
+
+int bnxt_qplib_create_flow(struct bnxt_qplib_res *res)
+{
+ struct creq_roce_mirror_cfg_resp resp = {};
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_roce_mirror_cfg req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG,
+ sizeof(req));
+
+ req.mirror_flags = (u8)CMDQ_ROCE_MIRROR_CFG_MIRROR_ENABLE;
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+ return bnxt_qplib_rcfw_send_message(rcfw, &msg);
+}
+
+int bnxt_qplib_destroy_flow(struct bnxt_qplib_res *res)
+{
+ struct creq_roce_mirror_cfg_resp resp = {};
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_roce_mirror_cfg req = {};
+ struct bnxt_qplib_cmdqmsg msg = {};
+
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
+ CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG,
+ sizeof(req));
+
+ req.mirror_flags &= ~((u8)CMDQ_ROCE_MIRROR_CFG_MIRROR_ENABLE);
+
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
+ sizeof(resp), 0);
+
+ return bnxt_qplib_rcfw_send_message(rcfw, &msg);
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
new file mode 100644
index 000000000000..5a45c55c6464
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -0,0 +1,373 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Slow Path Operators (header)
+ *
+ */
+
+#ifndef __BNXT_QPLIB_SP_H__
+#define __BNXT_QPLIB_SP_H__
+
+#include <rdma/bnxt_re-abi.h>
+#define BNXT_QPLIB_RESERVED_QP_WRS 128
+
+struct bnxt_qplib_dev_attr {
+#define FW_VER_ARR_LEN 4
+ u8 fw_ver[FW_VER_ARR_LEN];
+#define BNXT_QPLIB_NUM_GIDS_SUPPORTED 256
+ u16 max_sgid;
+ u16 max_mrw;
+ u32 max_qp;
+#define BNXT_QPLIB_MAX_OUT_RD_ATOM 126
+ u32 max_qp_rd_atom;
+ u32 max_qp_init_rd_atom;
+ u32 max_qp_wqes;
+ u32 max_qp_sges;
+ u32 max_cq;
+#define BNXT_QPLIB_MAX_CQ_WQES 0xfffff
+ u32 max_cq_wqes;
+ u32 max_cq_sges;
+ u32 max_mr;
+ u64 max_mr_size;
+ u32 max_pd;
+ u32 max_mw;
+ u32 max_raw_ethy_qp;
+ u32 max_ah;
+ u32 max_srq;
+ u32 max_srq_wqes;
+ u32 max_srq_sges;
+ u32 max_pkey;
+ u32 max_inline_data;
+ u32 l2_db_size;
+ u8 tqm_alloc_reqs[MAX_TQM_ALLOC_REQ];
+ bool is_atomic;
+ u16 dev_cap_flags;
+ u16 dev_cap_flags2;
+ u32 max_dpi;
+};
+
+struct bnxt_qplib_pd {
+ u32 id;
+};
+
+struct bnxt_qplib_gid {
+ u8 data[16];
+};
+
+struct bnxt_qplib_gid_info {
+ struct bnxt_qplib_gid gid;
+ u16 vlan_id;
+};
+
+struct bnxt_qplib_ah {
+ struct bnxt_qplib_gid dgid;
+ struct bnxt_qplib_pd *pd;
+ u32 id;
+ u8 sgid_index;
+ /* For Query AH if the hw table and SW table are differnt */
+ u8 host_sgid_index;
+ u8 traffic_class;
+ u32 flow_label;
+ u8 hop_limit;
+ u8 sl;
+ u8 dmac[6];
+ u16 vlan_id;
+ u8 nw_type;
+};
+
+struct bnxt_qplib_mrw {
+ struct bnxt_qplib_pd *pd;
+ int type;
+ u32 access_flags;
+#define BNXT_QPLIB_MR_ACCESS_MASK 0xFF
+#define BNXT_QPLIB_FR_PMR 0x80000000
+ u32 lkey;
+ u32 rkey;
+#define BNXT_QPLIB_RSVD_LKEY 0xFFFFFFFF
+ u64 va;
+ u64 total_size;
+ u32 npages;
+ u16 flags;
+ u64 mr_handle;
+ struct bnxt_qplib_hwq hwq;
+};
+
+struct bnxt_qplib_frpl {
+ int max_pg_ptrs;
+ struct bnxt_qplib_hwq hwq;
+};
+
+#define BNXT_QPLIB_ACCESS_LOCAL_WRITE BIT(0)
+#define BNXT_QPLIB_ACCESS_REMOTE_READ BIT(1)
+#define BNXT_QPLIB_ACCESS_REMOTE_WRITE BIT(2)
+#define BNXT_QPLIB_ACCESS_REMOTE_ATOMIC BIT(3)
+#define BNXT_QPLIB_ACCESS_MW_BIND BIT(4)
+#define BNXT_QPLIB_ACCESS_ZERO_BASED BIT(5)
+#define BNXT_QPLIB_ACCESS_ON_DEMAND BIT(6)
+
+struct bnxt_qplib_roce_stats {
+ u64 to_retransmits;
+ u64 seq_err_naks_rcvd;
+ /* seq_err_naks_rcvd is 64 b */
+ u64 max_retry_exceeded;
+ /* max_retry_exceeded is 64 b */
+ u64 rnr_naks_rcvd;
+ /* rnr_naks_rcvd is 64 b */
+ u64 missing_resp;
+ u64 unrecoverable_err;
+ /* unrecoverable_err is 64 b */
+ u64 bad_resp_err;
+ /* bad_resp_err is 64 b */
+ u64 local_qp_op_err;
+ /* local_qp_op_err is 64 b */
+ u64 local_protection_err;
+ /* local_protection_err is 64 b */
+ u64 mem_mgmt_op_err;
+ /* mem_mgmt_op_err is 64 b */
+ u64 remote_invalid_req_err;
+ /* remote_invalid_req_err is 64 b */
+ u64 remote_access_err;
+ /* remote_access_err is 64 b */
+ u64 remote_op_err;
+ /* remote_op_err is 64 b */
+ u64 dup_req;
+ /* dup_req is 64 b */
+ u64 res_exceed_max;
+ /* res_exceed_max is 64 b */
+ u64 res_length_mismatch;
+ /* res_length_mismatch is 64 b */
+ u64 res_exceeds_wqe;
+ /* res_exceeds_wqe is 64 b */
+ u64 res_opcode_err;
+ /* res_opcode_err is 64 b */
+ u64 res_rx_invalid_rkey;
+ /* res_rx_invalid_rkey is 64 b */
+ u64 res_rx_domain_err;
+ /* res_rx_domain_err is 64 b */
+ u64 res_rx_no_perm;
+ /* res_rx_no_perm is 64 b */
+ u64 res_rx_range_err;
+ /* res_rx_range_err is 64 b */
+ u64 res_tx_invalid_rkey;
+ /* res_tx_invalid_rkey is 64 b */
+ u64 res_tx_domain_err;
+ /* res_tx_domain_err is 64 b */
+ u64 res_tx_no_perm;
+ /* res_tx_no_perm is 64 b */
+ u64 res_tx_range_err;
+ /* res_tx_range_err is 64 b */
+ u64 res_irrq_oflow;
+ /* res_irrq_oflow is 64 b */
+ u64 res_unsup_opcode;
+ /* res_unsup_opcode is 64 b */
+ u64 res_unaligned_atomic;
+ /* res_unaligned_atomic is 64 b */
+ u64 res_rem_inv_err;
+ /* res_rem_inv_err is 64 b */
+ u64 res_mem_error;
+ /* res_mem_error is 64 b */
+ u64 res_srq_err;
+ /* res_srq_err is 64 b */
+ u64 res_cmp_err;
+ /* res_cmp_err is 64 b */
+ u64 res_invalid_dup_rkey;
+ /* res_invalid_dup_rkey is 64 b */
+ u64 res_wqe_format_err;
+ /* res_wqe_format_err is 64 b */
+ u64 res_cq_load_err;
+ /* res_cq_load_err is 64 b */
+ u64 res_srq_load_err;
+ /* res_srq_load_err is 64 b */
+ u64 res_tx_pci_err;
+ /* res_tx_pci_err is 64 b */
+ u64 res_rx_pci_err;
+ /* res_rx_pci_err is 64 b */
+ u64 res_oos_drop_count;
+ /* res_oos_drop_count */
+ u64 active_qp_count_p0;
+ /* port 0 active qps */
+ u64 active_qp_count_p1;
+ /* port 1 active qps */
+ u64 active_qp_count_p2;
+ /* port 2 active qps */
+ u64 active_qp_count_p3;
+ /* port 3 active qps */
+};
+
+struct bnxt_qplib_ext_stat {
+ u64 tx_atomic_req;
+ u64 tx_read_req;
+ u64 tx_read_res;
+ u64 tx_write_req;
+ u64 tx_send_req;
+ u64 tx_roce_pkts;
+ u64 tx_roce_bytes;
+ u64 rx_atomic_req;
+ u64 rx_read_req;
+ u64 rx_read_res;
+ u64 rx_write_req;
+ u64 rx_send_req;
+ u64 rx_roce_pkts;
+ u64 rx_roce_bytes;
+ u64 rx_roce_good_pkts;
+ u64 rx_roce_good_bytes;
+ u64 rx_out_of_buffer;
+ u64 rx_out_of_sequence;
+ u64 tx_cnp;
+ u64 rx_cnp;
+ u64 rx_ecn_marked;
+};
+
+struct bnxt_qplib_cc_param_ext {
+ u64 ext_mask;
+ u16 inact_th_hi;
+ u16 min_delta_cnp;
+ u16 init_cp;
+ u8 tr_update_mode;
+ u8 tr_update_cyls;
+ u8 fr_rtt;
+ u8 ai_rate_incr;
+ u16 rr_rtt_th;
+ u16 ar_cr_th;
+ u16 cr_min_th;
+ u8 bw_avg_weight;
+ u8 cr_factor;
+ u16 cr_th_max_cp;
+ u8 cp_bias_en;
+ u8 cp_bias;
+ u8 cnp_ecn;
+ u8 rtt_jitter_en;
+ u16 bytes_per_usec;
+ u16 cc_cr_reset_th;
+ u8 cr_width;
+ u8 min_quota;
+ u8 max_quota;
+ u8 abs_max_quota;
+ u16 tr_lb;
+ u8 cr_prob_fac;
+ u8 tr_prob_fac;
+ u16 fair_cr_th;
+ u8 red_div;
+ u8 cnp_ratio_th;
+ u16 ai_ext_rtt;
+ u8 exp_crcp_ratio;
+ u8 low_rate_en;
+ u16 cpcr_update_th;
+ u16 ai_rtt_th1;
+ u16 ai_rtt_th2;
+ u16 cf_rtt_th;
+ u16 sc_cr_th1; /* severe congestion cr threshold 1 */
+ u16 sc_cr_th2; /* severe congestion cr threshold 2 */
+ u32 l64B_per_rtt;
+ u8 cc_ack_bytes;
+ u16 reduce_cf_rtt_th;
+};
+
+struct bnxt_qplib_cc_param {
+ u8 alt_vlan_pcp;
+ u8 qp1_tos_dscp;
+ u16 alt_tos_dscp;
+ u8 cc_mode;
+ u8 enable;
+ u16 inact_th;
+ u16 init_cr;
+ u16 init_tr;
+ u16 rtt;
+ u8 g;
+ u8 nph_per_state;
+ u8 time_pph;
+ u8 pkts_pph;
+ u8 tos_ecn;
+ u8 tos_dscp;
+ u16 tcp_cp;
+ struct bnxt_qplib_cc_param_ext cc_ext;
+ u32 mask;
+};
+
+int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
+ struct bnxt_qplib_gid *gid);
+int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct bnxt_qplib_gid *gid, u16 vlan_id, bool update);
+int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct bnxt_qplib_gid *gid, const u8 *mac, u16 vlan_id,
+ bool update, u32 *index,
+ bool is_ugid, u32 stats_ctx_id);
+int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct bnxt_qplib_gid *gid, u16 gid_idx,
+ const u8 *smac);
+int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx);
+int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block);
+int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block);
+int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_mrw *mrw);
+int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
+ bool block);
+int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
+ struct ib_umem *umem, int num_pbls, u32 buf_pg_size, bool unified_mr);
+int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr);
+int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_mrw *mr, int max);
+int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_frpl *frpl, int max);
+int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_frpl *frpl);
+int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_roce_stats *stats);
+int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid,
+ struct bnxt_qplib_ext_stat *estat);
+int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cc_param *cc_param);
+int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 type, u32 xid,
+ u32 resp_size, void *resp_va);
+int bnxt_qplib_query_cc_param(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cc_param *cc_param);
+void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_create_flow(struct bnxt_qplib_res *res);
+int bnxt_qplib_destroy_flow(struct bnxt_qplib_res *res);
+
+#define BNXT_VAR_MAX_WQE 4352
+#define BNXT_VAR_MAX_SLOT_ALIGN 256
+#define BNXT_VAR_MAX_SGE 13
+#define BNXT_RE_MAX_RQ_WQES 65536
+
+#define BNXT_STATIC_MAX_SGE 6
+
+#endif /* __BNXT_QPLIB_SP_H__*/
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_tlv.h b/drivers/infiniband/hw/bnxt_re/qplib_tlv.h
new file mode 100644
index 000000000000..ae96a75d7f31
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_tlv.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+#ifndef __QPLIB_TLV_H__
+#define __QPLIB_TLV_H__
+
+struct roce_tlv {
+ struct tlv tlv;
+ u8 total_size; // in units of 16 byte chunks
+ u8 unused[7]; // for 16 byte alignment
+};
+
+#define CHUNK_SIZE 16
+#define CHUNKS(x) (((x) + CHUNK_SIZE - 1) / CHUNK_SIZE)
+
+static inline void __roce_1st_tlv_prep(struct roce_tlv *rtlv, u8 tot_chunks,
+ u16 content_bytes, u8 flags)
+{
+ rtlv->tlv.cmd_discr = cpu_to_le16(CMD_DISCR_TLV_ENCAP);
+ rtlv->tlv.tlv_type = cpu_to_le16(TLV_TYPE_ROCE_SP_COMMAND);
+ rtlv->tlv.length = cpu_to_le16(content_bytes);
+ rtlv->tlv.flags = TLV_FLAGS_REQUIRED;
+ rtlv->tlv.flags |= flags ? TLV_FLAGS_MORE : 0;
+ rtlv->total_size = (tot_chunks);
+}
+
+static inline void __roce_ext_tlv_prep(struct roce_tlv *rtlv, u16 tlv_type,
+ u16 content_bytes, u8 more, u8 flags)
+{
+ rtlv->tlv.cmd_discr = cpu_to_le16(CMD_DISCR_TLV_ENCAP);
+ rtlv->tlv.tlv_type = cpu_to_le16(tlv_type);
+ rtlv->tlv.length = cpu_to_le16(content_bytes);
+ rtlv->tlv.flags |= more ? TLV_FLAGS_MORE : 0;
+ rtlv->tlv.flags |= flags ? TLV_FLAGS_REQUIRED : 0;
+}
+
+/*
+ * TLV size in units of 16 byte chunks
+ */
+#define TLV_SIZE ((sizeof(struct roce_tlv) + 15) / 16)
+/*
+ * TLV length in bytes
+ */
+#define TLV_BYTES (TLV_SIZE * 16)
+
+#define HAS_TLV_HEADER(msg) (le16_to_cpu(((struct tlv *)(msg))->cmd_discr) == CMD_DISCR_TLV_ENCAP)
+#define GET_TLV_DATA(tlv) ((void *)&((uint8_t *)(tlv))[TLV_BYTES])
+
+static inline u8 __get_cmdq_base_opcode(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->opcode;
+ else
+ return req->opcode;
+}
+
+static inline void __set_cmdq_base_opcode(struct cmdq_base *req,
+ u32 size, u8 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->opcode = val;
+ else
+ req->opcode = val;
+}
+
+static inline __le16 __get_cmdq_base_cookie(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->cookie;
+ else
+ return req->cookie;
+}
+
+static inline void __set_cmdq_base_cookie(struct cmdq_base *req,
+ u32 size, __le16 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->cookie = val;
+ else
+ req->cookie = val;
+}
+
+static inline __le64 __get_cmdq_base_resp_addr(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->resp_addr;
+ else
+ return req->resp_addr;
+}
+
+static inline void __set_cmdq_base_resp_addr(struct cmdq_base *req,
+ u32 size, __le64 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->resp_addr = val;
+ else
+ req->resp_addr = val;
+}
+
+static inline u8 __get_cmdq_base_resp_size(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->resp_size;
+ else
+ return req->resp_size;
+}
+
+static inline void __set_cmdq_base_resp_size(struct cmdq_base *req,
+ u32 size, u8 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->resp_size = val;
+ else
+ req->resp_size = val;
+}
+
+static inline u8 __get_cmdq_base_cmd_size(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct roce_tlv *)(req))->total_size;
+ else
+ return req->cmd_size;
+}
+
+static inline void __set_cmdq_base_cmd_size(struct cmdq_base *req,
+ u32 size, u8 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->cmd_size = val;
+ else
+ req->cmd_size = val;
+}
+
+static inline __le16 __get_cmdq_base_flags(struct cmdq_base *req, u32 size)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ return ((struct cmdq_base *)GET_TLV_DATA(req))->flags;
+ else
+ return req->flags;
+}
+
+static inline void __set_cmdq_base_flags(struct cmdq_base *req,
+ u32 size, __le16 val)
+{
+ if (HAS_TLV_HEADER(req) && size > TLV_BYTES)
+ ((struct cmdq_base *)GET_TLV_DATA(req))->flags = val;
+ else
+ req->flags = val;
+}
+
+struct bnxt_qplib_tlv_modify_cc_req {
+ struct roce_tlv tlv_hdr;
+ struct cmdq_modify_roce_cc base_req;
+ __le64 tlvpad;
+ struct cmdq_modify_roce_cc_gen1_tlv ext_req;
+};
+
+struct bnxt_qplib_tlv_query_rcc_sb {
+ struct roce_tlv tlv_hdr;
+ struct creq_query_roce_cc_resp_sb base_sb;
+ struct creq_query_roce_cc_gen1_resp_sb_tlv gen1_sb;
+};
+#endif /* __QPLIB_TLV_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
new file mode 100644
index 000000000000..99ecd72e72e2
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -0,0 +1,4729 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: RoCE HSI File - Autogenerated
+ */
+
+#ifndef __BNXT_RE_HSI_H__
+#define __BNXT_RE_HSI_H__
+
+/* include linux/bnxt/hsi.h */
+#include <linux/bnxt/hsi.h>
+
+/* tx_doorbell (size:32b/4B) */
+struct tx_doorbell {
+ __le32 key_idx;
+ #define TX_DOORBELL_IDX_MASK 0xffffffUL
+ #define TX_DOORBELL_IDX_SFT 0
+ #define TX_DOORBELL_KEY_MASK 0xf0000000UL
+ #define TX_DOORBELL_KEY_SFT 28
+ #define TX_DOORBELL_KEY_TX (0x0UL << 28)
+ #define TX_DOORBELL_KEY_LAST TX_DOORBELL_KEY_TX
+};
+
+/* rx_doorbell (size:32b/4B) */
+struct rx_doorbell {
+ __le32 key_idx;
+ #define RX_DOORBELL_IDX_MASK 0xffffffUL
+ #define RX_DOORBELL_IDX_SFT 0
+ #define RX_DOORBELL_KEY_MASK 0xf0000000UL
+ #define RX_DOORBELL_KEY_SFT 28
+ #define RX_DOORBELL_KEY_RX (0x1UL << 28)
+ #define RX_DOORBELL_KEY_LAST RX_DOORBELL_KEY_RX
+};
+
+/* cmpl_doorbell (size:32b/4B) */
+struct cmpl_doorbell {
+ __le32 key_mask_valid_idx;
+ #define CMPL_DOORBELL_IDX_MASK 0xffffffUL
+ #define CMPL_DOORBELL_IDX_SFT 0
+ #define CMPL_DOORBELL_IDX_VALID 0x4000000UL
+ #define CMPL_DOORBELL_MASK 0x8000000UL
+ #define CMPL_DOORBELL_KEY_MASK 0xf0000000UL
+ #define CMPL_DOORBELL_KEY_SFT 28
+ #define CMPL_DOORBELL_KEY_CMPL (0x2UL << 28)
+ #define CMPL_DOORBELL_KEY_LAST CMPL_DOORBELL_KEY_CMPL
+};
+
+/* status_doorbell (size:32b/4B) */
+struct status_doorbell {
+ __le32 key_idx;
+ #define STATUS_DOORBELL_IDX_MASK 0xffffffUL
+ #define STATUS_DOORBELL_IDX_SFT 0
+ #define STATUS_DOORBELL_KEY_MASK 0xf0000000UL
+ #define STATUS_DOORBELL_KEY_SFT 28
+ #define STATUS_DOORBELL_KEY_STAT (0x3UL << 28)
+ #define STATUS_DOORBELL_KEY_LAST STATUS_DOORBELL_KEY_STAT
+};
+
+/* cmdq_init (size:128b/16B) */
+struct cmdq_init {
+ __le64 cmdq_pbl;
+ __le16 cmdq_size_cmdq_lvl;
+ #define CMDQ_INIT_CMDQ_LVL_MASK 0x3UL
+ #define CMDQ_INIT_CMDQ_LVL_SFT 0
+ #define CMDQ_INIT_CMDQ_SIZE_MASK 0xfffcUL
+ #define CMDQ_INIT_CMDQ_SIZE_SFT 2
+ __le16 creq_ring_id;
+ __le32 prod_idx;
+};
+
+/* cmdq_base (size:128b/16B) */
+struct cmdq_base {
+ u8 opcode;
+ #define CMDQ_BASE_OPCODE_CREATE_QP 0x1UL
+ #define CMDQ_BASE_OPCODE_DESTROY_QP 0x2UL
+ #define CMDQ_BASE_OPCODE_MODIFY_QP 0x3UL
+ #define CMDQ_BASE_OPCODE_QUERY_QP 0x4UL
+ #define CMDQ_BASE_OPCODE_CREATE_SRQ 0x5UL
+ #define CMDQ_BASE_OPCODE_DESTROY_SRQ 0x6UL
+ #define CMDQ_BASE_OPCODE_QUERY_SRQ 0x8UL
+ #define CMDQ_BASE_OPCODE_CREATE_CQ 0x9UL
+ #define CMDQ_BASE_OPCODE_DESTROY_CQ 0xaUL
+ #define CMDQ_BASE_OPCODE_RESIZE_CQ 0xcUL
+ #define CMDQ_BASE_OPCODE_ALLOCATE_MRW 0xdUL
+ #define CMDQ_BASE_OPCODE_DEALLOCATE_KEY 0xeUL
+ #define CMDQ_BASE_OPCODE_REGISTER_MR 0xfUL
+ #define CMDQ_BASE_OPCODE_DEREGISTER_MR 0x10UL
+ #define CMDQ_BASE_OPCODE_ADD_GID 0x11UL
+ #define CMDQ_BASE_OPCODE_DELETE_GID 0x12UL
+ #define CMDQ_BASE_OPCODE_MODIFY_GID 0x17UL
+ #define CMDQ_BASE_OPCODE_QUERY_GID 0x18UL
+ #define CMDQ_BASE_OPCODE_CREATE_QP1 0x13UL
+ #define CMDQ_BASE_OPCODE_DESTROY_QP1 0x14UL
+ #define CMDQ_BASE_OPCODE_CREATE_AH 0x15UL
+ #define CMDQ_BASE_OPCODE_DESTROY_AH 0x16UL
+ #define CMDQ_BASE_OPCODE_INITIALIZE_FW 0x80UL
+ #define CMDQ_BASE_OPCODE_DEINITIALIZE_FW 0x81UL
+ #define CMDQ_BASE_OPCODE_STOP_FUNC 0x82UL
+ #define CMDQ_BASE_OPCODE_QUERY_FUNC 0x83UL
+ #define CMDQ_BASE_OPCODE_SET_FUNC_RESOURCES 0x84UL
+ #define CMDQ_BASE_OPCODE_READ_CONTEXT 0x85UL
+ #define CMDQ_BASE_OPCODE_VF_BACKCHANNEL_REQUEST 0x86UL
+ #define CMDQ_BASE_OPCODE_READ_VF_MEMORY 0x87UL
+ #define CMDQ_BASE_OPCODE_COMPLETE_VF_REQUEST 0x88UL
+ #define CMDQ_BASE_OPCODE_EXTEND_CONTEXT_ARRRAY 0x89UL
+ #define CMDQ_BASE_OPCODE_MAP_TC_TO_COS 0x8aUL
+ #define CMDQ_BASE_OPCODE_QUERY_VERSION 0x8bUL
+ #define CMDQ_BASE_OPCODE_MODIFY_ROCE_CC 0x8cUL
+ #define CMDQ_BASE_OPCODE_QUERY_ROCE_CC 0x8dUL
+ #define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS 0x8eUL
+ #define CMDQ_BASE_OPCODE_SET_LINK_AGGR_MODE 0x8fUL
+ #define CMDQ_BASE_OPCODE_MODIFY_CQ 0x90UL
+ #define CMDQ_BASE_OPCODE_QUERY_QP_EXTEND 0x91UL
+ #define CMDQ_BASE_OPCODE_QUERY_ROCE_STATS_EXT 0x92UL
+ #define CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG 0x99UL
+ #define CMDQ_BASE_OPCODE_LAST CMDQ_BASE_OPCODE_ROCE_MIRROR_CFG
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* creq_base (size:128b/16B) */
+struct creq_base {
+ u8 type;
+ #define CREQ_BASE_TYPE_MASK 0x3fUL
+ #define CREQ_BASE_TYPE_SFT 0
+ #define CREQ_BASE_TYPE_QP_EVENT 0x38UL
+ #define CREQ_BASE_TYPE_FUNC_EVENT 0x3aUL
+ #define CREQ_BASE_TYPE_LAST CREQ_BASE_TYPE_FUNC_EVENT
+ u8 reserved56[7];
+ u8 v;
+ #define CREQ_BASE_V 0x1UL
+ u8 event;
+ u8 reserved48[6];
+};
+
+/* cmdq_query_version (size:128b/16B) */
+struct cmdq_query_version {
+ u8 opcode;
+ #define CMDQ_QUERY_VERSION_OPCODE_QUERY_VERSION 0x8bUL
+ #define CMDQ_QUERY_VERSION_OPCODE_LAST CMDQ_QUERY_VERSION_OPCODE_QUERY_VERSION
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* creq_query_version_resp (size:128b/16B) */
+struct creq_query_version_resp {
+ u8 type;
+ #define CREQ_QUERY_VERSION_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_VERSION_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_VERSION_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_VERSION_RESP_TYPE_LAST CREQ_QUERY_VERSION_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ u8 fw_maj;
+ u8 fw_minor;
+ u8 fw_bld;
+ u8 fw_rsvd;
+ u8 v;
+ #define CREQ_QUERY_VERSION_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_VERSION_RESP_EVENT_QUERY_VERSION 0x8bUL
+ #define CREQ_QUERY_VERSION_RESP_EVENT_LAST \
+ CREQ_QUERY_VERSION_RESP_EVENT_QUERY_VERSION
+ __le16 reserved16;
+ u8 intf_maj;
+ u8 intf_minor;
+ u8 intf_bld;
+ u8 intf_rsvd;
+};
+
+/* cmdq_initialize_fw (size:896b/112B) */
+struct cmdq_initialize_fw {
+ u8 opcode;
+ #define CMDQ_INITIALIZE_FW_OPCODE_INITIALIZE_FW 0x80UL
+ #define CMDQ_INITIALIZE_FW_OPCODE_LAST CMDQ_INITIALIZE_FW_OPCODE_INITIALIZE_FW
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_INITIALIZE_FW_FLAGS_MRAV_RESERVATION_SPLIT 0x1UL
+ #define CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED 0x2UL
+ #define CMDQ_INITIALIZE_FW_FLAGS_OPTIMIZE_MODIFY_QP_SUPPORTED 0x8UL
+ #define CMDQ_INITIALIZE_FW_FLAGS_L2_VF_RESOURCE_MGMT 0x10UL
+ #define CMDQ_INITIALIZE_FW_FLAGS_MIRROR_ON_ROCE_SUPPORTED 0x80UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ u8 qpc_pg_size_qpc_lvl;
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_LAST CMDQ_INITIALIZE_FW_QPC_LVL_LVL_2
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_LAST CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_1G
+ u8 mrw_pg_size_mrw_lvl;
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_LAST CMDQ_INITIALIZE_FW_MRW_LVL_LVL_2
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_LAST CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_1G
+ u8 srq_pg_size_srq_lvl;
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_LAST CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_2
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_LAST CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_1G
+ u8 cq_pg_size_cq_lvl;
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_LAST CMDQ_INITIALIZE_FW_CQ_LVL_LVL_2
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_LAST CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_1G
+ u8 tqm_pg_size_tqm_lvl;
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_LAST CMDQ_INITIALIZE_FW_TQM_LVL_LVL_2
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_LAST CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_1G
+ u8 tim_pg_size_tim_lvl;
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_LAST CMDQ_INITIALIZE_FW_TIM_LVL_LVL_2
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_LAST CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_1G
+ __le16 log2_dbr_pg_size;
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_SFT 0
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4K 0x0UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8K 0x1UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16K 0x2UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32K 0x3UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64K 0x4UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128K 0x5UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_256K 0x6UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_512K 0x7UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_1M 0x8UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_2M 0x9UL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_4M 0xaUL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_8M 0xbUL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_16M 0xcUL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_32M 0xdUL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_64M 0xeUL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M 0xfUL
+ #define CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_LAST \
+ CMDQ_INITIALIZE_FW_LOG2_DBR_PG_SIZE_PG_128M
+ #define CMDQ_INITIALIZE_FW_RSVD_MASK 0xfff0UL
+ #define CMDQ_INITIALIZE_FW_RSVD_SFT 4
+ __le64 qpc_page_dir;
+ __le64 mrw_page_dir;
+ __le64 srq_page_dir;
+ __le64 cq_page_dir;
+ __le64 tqm_page_dir;
+ __le64 tim_page_dir;
+ __le32 number_of_qp;
+ __le32 number_of_mrw;
+ __le32 number_of_srq;
+ __le32 number_of_cq;
+ __le32 max_qp_per_vf;
+ __le32 max_mrw_per_vf;
+ __le32 max_srq_per_vf;
+ __le32 max_cq_per_vf;
+ __le32 max_gid_per_vf;
+ __le32 stat_ctx_id;
+};
+
+/* creq_initialize_fw_resp (size:128b/16B) */
+struct creq_initialize_fw_resp {
+ u8 type;
+ #define CREQ_INITIALIZE_FW_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_INITIALIZE_FW_RESP_TYPE_SFT 0
+ #define CREQ_INITIALIZE_FW_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_INITIALIZE_FW_RESP_TYPE_LAST CREQ_INITIALIZE_FW_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_INITIALIZE_FW_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_INITIALIZE_FW_RESP_EVENT_INITIALIZE_FW 0x80UL
+ #define CREQ_INITIALIZE_FW_RESP_EVENT_LAST \
+ CREQ_INITIALIZE_FW_RESP_EVENT_INITIALIZE_FW
+ u8 reserved48[6];
+};
+
+/* cmdq_deinitialize_fw (size:128b/16B) */
+struct cmdq_deinitialize_fw {
+ u8 opcode;
+ #define CMDQ_DEINITIALIZE_FW_OPCODE_DEINITIALIZE_FW 0x81UL
+ #define CMDQ_DEINITIALIZE_FW_OPCODE_LAST \
+ CMDQ_DEINITIALIZE_FW_OPCODE_DEINITIALIZE_FW
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* creq_deinitialize_fw_resp (size:128b/16B) */
+struct creq_deinitialize_fw_resp {
+ u8 type;
+ #define CREQ_DEINITIALIZE_FW_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DEINITIALIZE_FW_RESP_TYPE_SFT 0
+ #define CREQ_DEINITIALIZE_FW_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DEINITIALIZE_FW_RESP_TYPE_LAST CREQ_DEINITIALIZE_FW_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_DEINITIALIZE_FW_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DEINITIALIZE_FW_RESP_EVENT_DEINITIALIZE_FW 0x81UL
+ #define CREQ_DEINITIALIZE_FW_RESP_EVENT_LAST \
+ CREQ_DEINITIALIZE_FW_RESP_EVENT_DEINITIALIZE_FW
+ u8 reserved48[6];
+};
+
+/* cmdq_create_qp (size:832b/104B) */
+struct cmdq_create_qp {
+ u8 opcode;
+ #define CMDQ_CREATE_QP_OPCODE_CREATE_QP 0x1UL
+ #define CMDQ_CREATE_QP_OPCODE_LAST CMDQ_CREATE_QP_OPCODE_CREATE_QP
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 qp_handle;
+ __le32 qp_flags;
+ #define CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED 0x1UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION 0x2UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED 0x8UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED 0x10UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_OPTIMIZED_TRANSMIT_ENABLED 0x20UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_RESPONDER_UD_CQE_WITH_CFA 0x40UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED 0x80UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_EXPRESS_MODE_ENABLED 0x100UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_STEERING_TAG_VALID 0x200UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_RDMA_READ_OR_ATOMICS_USED 0x400UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_LAST \
+ CMDQ_CREATE_QP_QP_FLAGS_RDMA_READ_OR_ATOMICS_USED
+ u8 type;
+ #define CMDQ_CREATE_QP_TYPE_RC 0x2UL
+ #define CMDQ_CREATE_QP_TYPE_UD 0x4UL
+ #define CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE 0x6UL
+ #define CMDQ_CREATE_QP_TYPE_GSI 0x7UL
+ #define CMDQ_CREATE_QP_TYPE_LAST CMDQ_CREATE_QP_TYPE_GSI
+ u8 sq_pg_size_sq_lvl;
+ #define CMDQ_CREATE_QP_SQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP_SQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP_SQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP_SQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP_SQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP_SQ_LVL_LAST CMDQ_CREATE_QP_SQ_LVL_LVL_2
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_LAST CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G
+ u8 rq_pg_size_rq_lvl;
+ #define CMDQ_CREATE_QP_RQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP_RQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP_RQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP_RQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP_RQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP_RQ_LVL_LAST CMDQ_CREATE_QP_RQ_LVL_LVL_2
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_LAST CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G
+ u8 unused_0;
+ __le32 dpi;
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_fwo_sq_sge;
+ #define CMDQ_CREATE_QP_SQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP_SQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP_SQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP_SQ_FWO_SFT 4
+ __le16 rq_fwo_rq_sge;
+ #define CMDQ_CREATE_QP_RQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP_RQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP_RQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP_RQ_FWO_SFT 4
+ __le32 scq_cid;
+ __le32 rcq_cid;
+ __le32 srq_cid;
+ __le32 pd_id;
+ __le64 sq_pbl;
+ __le64 rq_pbl;
+ __le64 irrq_addr;
+ __le64 orrq_addr;
+ __le32 request_xid;
+ __le16 steering_tag;
+ __le16 reserved16;
+};
+
+/* creq_create_qp_resp (size:128b/16B) */
+struct creq_create_qp_resp {
+ u8 type;
+ #define CREQ_CREATE_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_QP_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_QP_RESP_TYPE_LAST CREQ_CREATE_QP_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_QP_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_CREATE_QP_RESP_EVENT_CREATE_QP 0x1UL
+ #define CREQ_CREATE_QP_RESP_EVENT_LAST CREQ_CREATE_QP_RESP_EVENT_CREATE_QP
+ u8 optimized_transmit_enabled;
+ u8 reserved48[5];
+};
+
+/* cmdq_destroy_qp (size:192b/24B) */
+struct cmdq_destroy_qp {
+ u8 opcode;
+ #define CMDQ_DESTROY_QP_OPCODE_DESTROY_QP 0x2UL
+ #define CMDQ_DESTROY_QP_OPCODE_LAST CMDQ_DESTROY_QP_OPCODE_DESTROY_QP
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 qp_cid;
+ __le32 unused_0;
+};
+
+/* creq_destroy_qp_resp (size:128b/16B) */
+struct creq_destroy_qp_resp {
+ u8 type;
+ #define CREQ_DESTROY_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_QP_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_QP_RESP_TYPE_LAST CREQ_DESTROY_QP_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_QP_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DESTROY_QP_RESP_EVENT_DESTROY_QP 0x2UL
+ #define CREQ_DESTROY_QP_RESP_EVENT_LAST CREQ_DESTROY_QP_RESP_EVENT_DESTROY_QP
+ u8 reserved48[6];
+};
+
+/* cmdq_modify_qp (size:1024b/128B) */
+struct cmdq_modify_qp {
+ u8 opcode;
+ #define CMDQ_MODIFY_QP_OPCODE_MODIFY_QP 0x3UL
+ #define CMDQ_MODIFY_QP_OPCODE_LAST CMDQ_MODIFY_QP_OPCODE_MODIFY_QP
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_MODIFY_QP_FLAGS_SRQ_USED 0x1UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 qp_type;
+ #define CMDQ_MODIFY_QP_QP_TYPE_RC 0x2UL
+ #define CMDQ_MODIFY_QP_QP_TYPE_UD 0x4UL
+ #define CMDQ_MODIFY_QP_QP_TYPE_RAW_ETHERTYPE 0x6UL
+ #define CMDQ_MODIFY_QP_QP_TYPE_GSI 0x7UL
+ #define CMDQ_MODIFY_QP_QP_TYPE_LAST CMDQ_MODIFY_QP_QP_TYPE_GSI
+ __le64 resp_addr;
+ __le32 modify_mask;
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_STATE 0x1UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_EN_SQD_ASYNC_NOTIFY 0x2UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS 0x4UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_PKEY 0x8UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_QKEY 0x10UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_DGID 0x20UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL 0x40UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX 0x80UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT 0x100UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS 0x200UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC 0x400UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_PINGPONG_PUSH_MODE 0x800UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU 0x1000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT 0x2000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RETRY_CNT 0x4000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RNR_RETRY 0x8000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN 0x10000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC 0x20000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER 0x40000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN 0x80000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC 0x100000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SIZE 0x200000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SIZE 0x400000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SGE 0x800000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SGE 0x1000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_MAX_INLINE_DATA 0x2000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID 0x4000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SRC_MAC 0x8000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID 0x10000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_ENABLE_CC 0x20000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TOS_ECN 0x40000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP 0x80000000UL
+ __le32 qp_cid;
+ u8 network_type_en_sqd_async_notify_new_state;
+ #define CMDQ_MODIFY_QP_NEW_STATE_MASK 0xfUL
+ #define CMDQ_MODIFY_QP_NEW_STATE_SFT 0
+ #define CMDQ_MODIFY_QP_NEW_STATE_RESET 0x0UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_INIT 0x1UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_RTR 0x2UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_RTS 0x3UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_SQD 0x4UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_SQE 0x5UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_ERR 0x6UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_LAST CMDQ_MODIFY_QP_NEW_STATE_ERR
+ #define CMDQ_MODIFY_QP_EN_SQD_ASYNC_NOTIFY 0x10UL
+ #define CMDQ_MODIFY_QP_UNUSED1 0x20UL
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_MASK 0xc0UL
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_SFT 6
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1 (0x0UL << 6)
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4 (0x2UL << 6)
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6 (0x3UL << 6)
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_LAST CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6
+ u8 access;
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_MASK 0xffUL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_SFT 0
+ #define CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE 0x1UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE 0x2UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_READ 0x4UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC 0x8UL
+ __le16 pkey;
+ __le32 qkey;
+ __le32 dgid[4];
+ __le32 flow_label;
+ __le16 sgid_index;
+ u8 hop_limit;
+ u8 traffic_class;
+ __le16 dest_mac[3];
+ u8 tos_dscp_tos_ecn;
+ #define CMDQ_MODIFY_QP_TOS_ECN_MASK 0x3UL
+ #define CMDQ_MODIFY_QP_TOS_ECN_SFT 0
+ #define CMDQ_MODIFY_QP_TOS_DSCP_MASK 0xfcUL
+ #define CMDQ_MODIFY_QP_TOS_DSCP_SFT 2
+ u8 path_mtu_pingpong_push_enable;
+ #define CMDQ_MODIFY_QP_PINGPONG_PUSH_ENABLE 0x1UL
+ #define CMDQ_MODIFY_QP_UNUSED3_MASK 0xeUL
+ #define CMDQ_MODIFY_QP_UNUSED3_SFT 1
+ #define CMDQ_MODIFY_QP_PATH_MTU_MASK 0xf0UL
+ #define CMDQ_MODIFY_QP_PATH_MTU_SFT 4
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_256 (0x0UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_512 (0x1UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_1024 (0x2UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_2048 (0x3UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_4096 (0x4UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_8192 (0x5UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_LAST CMDQ_MODIFY_QP_PATH_MTU_MTU_8192
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+ u8 min_rnr_timer;
+ __le32 rq_psn;
+ __le32 sq_psn;
+ u8 max_rd_atomic;
+ u8 max_dest_rd_atomic;
+ __le16 enable_cc;
+ #define CMDQ_MODIFY_QP_ENABLE_CC 0x1UL
+ #define CMDQ_MODIFY_QP_UNUSED15_MASK 0xfffeUL
+ #define CMDQ_MODIFY_QP_UNUSED15_SFT 1
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_sge;
+ __le16 rq_sge;
+ __le32 max_inline_data;
+ __le32 dest_qp_id;
+ __le32 pingpong_push_dpi;
+ __le16 src_mac[3];
+ __le16 vlan_pcp_vlan_dei_vlan_id;
+ #define CMDQ_MODIFY_QP_VLAN_ID_MASK 0xfffUL
+ #define CMDQ_MODIFY_QP_VLAN_ID_SFT 0
+ #define CMDQ_MODIFY_QP_VLAN_DEI 0x1000UL
+ #define CMDQ_MODIFY_QP_VLAN_PCP_MASK 0xe000UL
+ #define CMDQ_MODIFY_QP_VLAN_PCP_SFT 13
+ __le64 irrq_addr;
+ __le64 orrq_addr;
+ __le32 ext_modify_mask;
+ #define CMDQ_MODIFY_QP_EXT_MODIFY_MASK_EXT_STATS_CTX 0x1UL
+ #define CMDQ_MODIFY_QP_EXT_MODIFY_MASK_SCHQ_ID_VALID 0x2UL
+ __le32 ext_stats_ctx_id;
+ __le16 schq_id;
+ __le16 unused_0;
+ __le32 reserved32;
+};
+
+/* creq_modify_qp_resp (size:128b/16B) */
+struct creq_modify_qp_resp {
+ u8 type;
+ #define CREQ_MODIFY_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MODIFY_QP_RESP_TYPE_SFT 0
+ #define CREQ_MODIFY_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MODIFY_QP_RESP_TYPE_LAST CREQ_MODIFY_QP_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_MODIFY_QP_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_MODIFY_QP_RESP_EVENT_MODIFY_QP 0x3UL
+ #define CREQ_MODIFY_QP_RESP_EVENT_LAST CREQ_MODIFY_QP_RESP_EVENT_MODIFY_QP
+ u8 pingpong_push_state_index_enabled;
+ #define CREQ_MODIFY_QP_RESP_PINGPONG_PUSH_ENABLED 0x1UL
+ #define CREQ_MODIFY_QP_RESP_PINGPONG_PUSH_INDEX_MASK 0xeUL
+ #define CREQ_MODIFY_QP_RESP_PINGPONG_PUSH_INDEX_SFT 1
+ #define CREQ_MODIFY_QP_RESP_PINGPONG_PUSH_STATE 0x10UL
+ u8 reserved8;
+ __le32 lag_src_mac;
+};
+
+/* cmdq_query_qp (size:192b/24B) */
+struct cmdq_query_qp {
+ u8 opcode;
+ #define CMDQ_QUERY_QP_OPCODE_QUERY_QP 0x4UL
+ #define CMDQ_QUERY_QP_OPCODE_LAST CMDQ_QUERY_QP_OPCODE_QUERY_QP
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 qp_cid;
+ __le32 unused_0;
+};
+
+/* creq_query_qp_resp (size:128b/16B) */
+struct creq_query_qp_resp {
+ u8 type;
+ #define CREQ_QUERY_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_QP_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_QP_RESP_TYPE_LAST CREQ_QUERY_QP_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_QP_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_QP_RESP_EVENT_QUERY_QP 0x4UL
+ #define CREQ_QUERY_QP_RESP_EVENT_LAST CREQ_QUERY_QP_RESP_EVENT_QUERY_QP
+ u8 reserved48[6];
+};
+
+/* creq_query_qp_resp_sb (size:832b/104B) */
+struct creq_query_qp_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_QP_RESP_SB_OPCODE_QUERY_QP 0x4UL
+ #define CREQ_QUERY_QP_RESP_SB_OPCODE_LAST CREQ_QUERY_QP_RESP_SB_OPCODE_QUERY_QP
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 xid;
+ u8 en_sqd_async_notify_state;
+ #define CREQ_QUERY_QP_RESP_SB_STATE_MASK 0xfUL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_STATE_RESET 0x0UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_INIT 0x1UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_RTR 0x2UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_RTS 0x3UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_SQD 0x4UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_SQE 0x5UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_ERR 0x6UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_LAST CREQ_QUERY_QP_RESP_SB_STATE_ERR
+ #define CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY 0x10UL
+ #define CREQ_QUERY_QP_RESP_SB_UNUSED3_MASK 0xe0UL
+ #define CREQ_QUERY_QP_RESP_SB_UNUSED3_SFT 5
+ u8 access;
+ #define \
+ CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_MASK\
+ 0xffUL
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_SFT\
+ 0
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_LOCAL_WRITE 0x1UL
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_WRITE 0x2UL
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_READ 0x4UL
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_ATOMIC 0x8UL
+ __le16 pkey;
+ __le32 qkey;
+ __le16 udp_src_port;
+ __le16 reserved16;
+ __le32 dgid[4];
+ __le32 flow_label;
+ __le16 sgid_index;
+ u8 hop_limit;
+ u8 traffic_class;
+ __le16 dest_mac[3];
+ __le16 path_mtu_dest_vlan_id;
+ #define CREQ_QUERY_QP_RESP_SB_DEST_VLAN_ID_MASK 0xfffUL
+ #define CREQ_QUERY_QP_RESP_SB_DEST_VLAN_ID_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MASK 0xf000UL
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_SFT 12
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_256 (0x0UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_512 (0x1UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_1024 (0x2UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_2048 (0x3UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_4096 (0x4UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_8192 (0x5UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_LAST CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_8192
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+ u8 min_rnr_timer;
+ __le32 rq_psn;
+ __le32 sq_psn;
+ u8 max_rd_atomic;
+ u8 max_dest_rd_atomic;
+ u8 tos_dscp_tos_ecn;
+ #define CREQ_QUERY_QP_RESP_SB_TOS_ECN_MASK 0x3UL
+ #define CREQ_QUERY_QP_RESP_SB_TOS_ECN_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_TOS_DSCP_MASK 0xfcUL
+ #define CREQ_QUERY_QP_RESP_SB_TOS_DSCP_SFT 2
+ u8 enable_cc;
+ #define CREQ_QUERY_QP_RESP_SB_ENABLE_CC 0x1UL
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_sge;
+ __le16 rq_sge;
+ __le32 max_inline_data;
+ __le32 dest_qp_id;
+ __le16 port_id;
+ u8 unused_0;
+ u8 stat_collection_id;
+ __le16 src_mac[3];
+ __le16 vlan_pcp_vlan_dei_vlan_id;
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_ID_MASK 0xfffUL
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_ID_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_DEI 0x1000UL
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_PCP_MASK 0xe000UL
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_PCP_SFT 13
+};
+
+/* cmdq_query_qp_extend (size:192b/24B) */
+struct cmdq_query_qp_extend {
+ u8 opcode;
+ #define CMDQ_QUERY_QP_EXTEND_OPCODE_QUERY_QP_EXTEND 0x91UL
+ #define CMDQ_QUERY_QP_EXTEND_OPCODE_LAST CMDQ_QUERY_QP_EXTEND_OPCODE_QUERY_QP_EXTEND
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 num_qps;
+ __le64 resp_addr;
+ __le32 function_id;
+ #define CMDQ_QUERY_QP_EXTEND_PF_NUM_MASK 0xffUL
+ #define CMDQ_QUERY_QP_EXTEND_PF_NUM_SFT 0
+ #define CMDQ_QUERY_QP_EXTEND_VF_NUM_MASK 0xffff00UL
+ #define CMDQ_QUERY_QP_EXTEND_VF_NUM_SFT 8
+ #define CMDQ_QUERY_QP_EXTEND_VF_VALID 0x1000000UL
+ __le32 current_index;
+};
+
+/* creq_query_qp_extend_resp (size:128b/16B) */
+struct creq_query_qp_extend_resp {
+ u8 type;
+ #define CREQ_QUERY_QP_EXTEND_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_QP_EXTEND_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_QP_EXTEND_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_TYPE_LAST CREQ_QUERY_QP_EXTEND_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_QP_EXTEND_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_QP_EXTEND_RESP_EVENT_QUERY_QP_EXTEND 0x91UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_EVENT_LAST CREQ_QUERY_QP_EXTEND_RESP_EVENT_QUERY_QP_EXTEND
+ __le16 reserved16;
+ __le32 current_index;
+};
+
+/* creq_query_qp_extend_resp_sb (size:384b/48B) */
+struct creq_query_qp_extend_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_OPCODE_QUERY_QP_EXTEND 0x91UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_OPCODE_LAST \
+ CREQ_QUERY_QP_EXTEND_RESP_SB_OPCODE_QUERY_QP_EXTEND
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 xid;
+ u8 state;
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_MASK 0xfUL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_SFT 0
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_RESET 0x0UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_INIT 0x1UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_RTR 0x2UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_RTS 0x3UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_SQD 0x4UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_SQE 0x5UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_ERR 0x6UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_LAST CREQ_QUERY_QP_EXTEND_RESP_SB_STATE_ERR
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_UNUSED4_MASK 0xf0UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_UNUSED4_SFT 4
+ u8 reserved_8;
+ __le16 port_id;
+ __le32 qkey;
+ __le16 sgid_index;
+ u8 network_type;
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_NETWORK_TYPE_ROCEV1 0x0UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_NETWORK_TYPE_ROCEV2_IPV4 0x2UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_NETWORK_TYPE_ROCEV2_IPV6 0x3UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_NETWORK_TYPE_LAST \
+ CREQ_QUERY_QP_EXTEND_RESP_SB_NETWORK_TYPE_ROCEV2_IPV6
+ u8 unused_0;
+ __le32 dgid[4];
+ __le32 dest_qp_id;
+ u8 stat_collection_id;
+ u8 reservred_8;
+ __le16 reserved_16;
+};
+
+/* creq_query_qp_extend_resp_sb_tlv (size:512b/64B) */
+struct creq_query_qp_extend_resp_sb_tlv {
+ __le16 cmd_discr;
+ u8 reserved_8b;
+ u8 tlv_flags;
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_MORE 0x1UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_MORE_LAST 0x0UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_MORE_NOT_LAST 0x1UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_REQUIRED 0x2UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_REQUIRED_NO (0x0UL << 1)
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_REQUIRED_YES (0x1UL << 1)
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_REQUIRED_LAST \
+ CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_TLV_FLAGS_REQUIRED_YES
+ __le16 tlv_type;
+ __le16 length;
+ u8 total_size;
+ u8 reserved56[7];
+ u8 opcode;
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_OPCODE_QUERY_QP_EXTEND 0x91UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_OPCODE_LAST \
+ CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_OPCODE_QUERY_QP_EXTEND
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 xid;
+ u8 state;
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_MASK 0xfUL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_SFT 0
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_RESET 0x0UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_INIT 0x1UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_RTR 0x2UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_RTS 0x3UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_SQD 0x4UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_SQE 0x5UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_ERR 0x6UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_LAST \
+ CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_STATE_ERR
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_UNUSED4_MASK 0xf0UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_UNUSED4_SFT 4
+ u8 reserved_8;
+ __le16 port_id;
+ __le32 qkey;
+ __le16 sgid_index;
+ u8 network_type;
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_NETWORK_TYPE_ROCEV1 0x0UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_NETWORK_TYPE_ROCEV2_IPV4 0x2UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_NETWORK_TYPE_ROCEV2_IPV6 0x3UL
+ #define CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_NETWORK_TYPE_LAST \
+ CREQ_QUERY_QP_EXTEND_RESP_SB_TLV_NETWORK_TYPE_ROCEV2_IPV6
+ u8 unused_0;
+ __le32 dgid[4];
+ __le32 dest_qp_id;
+ u8 stat_collection_id;
+ u8 reservred_8;
+ __le16 reserved_16;
+};
+
+/* cmdq_create_srq (size:448b/56B) */
+struct cmdq_create_srq {
+ u8 opcode;
+ #define CMDQ_CREATE_SRQ_OPCODE_CREATE_SRQ 0x5UL
+ #define CMDQ_CREATE_SRQ_OPCODE_LAST CMDQ_CREATE_SRQ_OPCODE_CREATE_SRQ
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_CREATE_SRQ_FLAGS_STEERING_TAG_VALID 0x1UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 srq_handle;
+ __le16 pg_size_lvl;
+ #define CMDQ_CREATE_SRQ_LVL_MASK 0x3UL
+ #define CMDQ_CREATE_SRQ_LVL_SFT 0
+ #define CMDQ_CREATE_SRQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_SRQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_SRQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_SRQ_LVL_LAST CMDQ_CREATE_SRQ_LVL_LVL_2
+ #define CMDQ_CREATE_SRQ_PG_SIZE_MASK 0x1cUL
+ #define CMDQ_CREATE_SRQ_PG_SIZE_SFT 2
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_4K (0x0UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_8K (0x1UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_64K (0x2UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_2M (0x3UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_8M (0x4UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_1G (0x5UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_LAST CMDQ_CREATE_SRQ_PG_SIZE_PG_1G
+ #define CMDQ_CREATE_SRQ_UNUSED11_MASK 0xffe0UL
+ #define CMDQ_CREATE_SRQ_UNUSED11_SFT 5
+ __le16 eventq_id;
+ #define CMDQ_CREATE_SRQ_EVENTQ_ID_MASK 0xfffUL
+ #define CMDQ_CREATE_SRQ_EVENTQ_ID_SFT 0
+ #define CMDQ_CREATE_SRQ_UNUSED4_MASK 0xf000UL
+ #define CMDQ_CREATE_SRQ_UNUSED4_SFT 12
+ __le16 srq_size;
+ __le16 srq_fwo;
+ __le32 dpi;
+ __le32 pd_id;
+ __le64 pbl;
+ __le16 steering_tag;
+ u8 reserved48[6];
+};
+
+/* creq_create_srq_resp (size:128b/16B) */
+struct creq_create_srq_resp {
+ u8 type;
+ #define CREQ_CREATE_SRQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_SRQ_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_SRQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_SRQ_RESP_TYPE_LAST CREQ_CREATE_SRQ_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_SRQ_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_CREATE_SRQ_RESP_EVENT_CREATE_SRQ 0x5UL
+ #define CREQ_CREATE_SRQ_RESP_EVENT_LAST CREQ_CREATE_SRQ_RESP_EVENT_CREATE_SRQ
+ u8 reserved48[6];
+};
+
+/* cmdq_destroy_srq (size:192b/24B) */
+struct cmdq_destroy_srq {
+ u8 opcode;
+ #define CMDQ_DESTROY_SRQ_OPCODE_DESTROY_SRQ 0x6UL
+ #define CMDQ_DESTROY_SRQ_OPCODE_LAST CMDQ_DESTROY_SRQ_OPCODE_DESTROY_SRQ
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 srq_cid;
+ __le32 unused_0;
+};
+
+/* creq_destroy_srq_resp (size:128b/16B) */
+struct creq_destroy_srq_resp {
+ u8 type;
+ #define CREQ_DESTROY_SRQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_SRQ_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_SRQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_SRQ_RESP_TYPE_LAST CREQ_DESTROY_SRQ_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_SRQ_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DESTROY_SRQ_RESP_EVENT_DESTROY_SRQ 0x6UL
+ #define CREQ_DESTROY_SRQ_RESP_EVENT_LAST CREQ_DESTROY_SRQ_RESP_EVENT_DESTROY_SRQ
+ __le16 enable_for_arm[3];
+ #define CREQ_DESTROY_SRQ_RESP_UNUSED0_MASK 0xffffUL
+ #define CREQ_DESTROY_SRQ_RESP_UNUSED0_SFT 0
+ #define CREQ_DESTROY_SRQ_RESP_ENABLE_FOR_ARM_MASK 0x30000UL
+ #define CREQ_DESTROY_SRQ_RESP_ENABLE_FOR_ARM_SFT 16
+};
+
+/* cmdq_query_srq (size:192b/24B) */
+struct cmdq_query_srq {
+ u8 opcode;
+ #define CMDQ_QUERY_SRQ_OPCODE_QUERY_SRQ 0x8UL
+ #define CMDQ_QUERY_SRQ_OPCODE_LAST CMDQ_QUERY_SRQ_OPCODE_QUERY_SRQ
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 srq_cid;
+ __le32 unused_0;
+};
+
+/* creq_query_srq_resp (size:128b/16B) */
+struct creq_query_srq_resp {
+ u8 type;
+ #define CREQ_QUERY_SRQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_SRQ_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_SRQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_SRQ_RESP_TYPE_LAST CREQ_QUERY_SRQ_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_SRQ_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_SRQ_RESP_EVENT_QUERY_SRQ 0x8UL
+ #define CREQ_QUERY_SRQ_RESP_EVENT_LAST CREQ_QUERY_SRQ_RESP_EVENT_QUERY_SRQ
+ u8 reserved48[6];
+};
+
+/* creq_query_srq_resp_sb (size:256b/32B) */
+struct creq_query_srq_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_SRQ_RESP_SB_OPCODE_QUERY_SRQ 0x8UL
+ #define CREQ_QUERY_SRQ_RESP_SB_OPCODE_LAST CREQ_QUERY_SRQ_RESP_SB_OPCODE_QUERY_SRQ
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 xid;
+ __le16 srq_limit;
+ __le16 reserved16;
+ __le32 data[4];
+};
+
+/* cmdq_create_cq (size:448b/56B) */
+struct cmdq_create_cq {
+ u8 opcode;
+ #define CMDQ_CREATE_CQ_OPCODE_CREATE_CQ 0x9UL
+ #define CMDQ_CREATE_CQ_OPCODE_LAST CMDQ_CREATE_CQ_OPCODE_CREATE_CQ
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_CREATE_CQ_FLAGS_DISABLE_CQ_OVERFLOW_DETECTION 0x1UL
+ #define CMDQ_CREATE_CQ_FLAGS_STEERING_TAG_VALID 0x2UL
+ #define CMDQ_CREATE_CQ_FLAGS_INFINITE_CQ_MODE 0x4UL
+ #define CMDQ_CREATE_CQ_FLAGS_COALESCING_VALID 0x8UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 cq_handle;
+ __le32 pg_size_lvl;
+ #define CMDQ_CREATE_CQ_LVL_MASK 0x3UL
+ #define CMDQ_CREATE_CQ_LVL_SFT 0
+ #define CMDQ_CREATE_CQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_CQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_CQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_CQ_LVL_LAST CMDQ_CREATE_CQ_LVL_LVL_2
+ #define CMDQ_CREATE_CQ_PG_SIZE_MASK 0x1cUL
+ #define CMDQ_CREATE_CQ_PG_SIZE_SFT 2
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_4K (0x0UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_8K (0x1UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_64K (0x2UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_2M (0x3UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_8M (0x4UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_1G (0x5UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_LAST CMDQ_CREATE_CQ_PG_SIZE_PG_1G
+ #define CMDQ_CREATE_CQ_UNUSED27_MASK 0xffffffe0UL
+ #define CMDQ_CREATE_CQ_UNUSED27_SFT 5
+ __le32 cq_fco_cnq_id;
+ #define CMDQ_CREATE_CQ_CNQ_ID_MASK 0xfffUL
+ #define CMDQ_CREATE_CQ_CNQ_ID_SFT 0
+ #define CMDQ_CREATE_CQ_CQ_FCO_MASK 0xfffff000UL
+ #define CMDQ_CREATE_CQ_CQ_FCO_SFT 12
+ __le32 dpi;
+ __le32 cq_size;
+ __le64 pbl;
+ __le16 steering_tag;
+ u8 reserved48[2];
+ __le32 coalescing;
+ #define CMDQ_CREATE_CQ_BUF_MAXTIME_MASK 0x1ffUL
+ #define CMDQ_CREATE_CQ_BUF_MAXTIME_SFT 0
+ #define CMDQ_CREATE_CQ_NORMAL_MAXBUF_MASK 0x3e00UL
+ #define CMDQ_CREATE_CQ_NORMAL_MAXBUF_SFT 9
+ #define CMDQ_CREATE_CQ_DURING_MAXBUF_MASK 0x7c000UL
+ #define CMDQ_CREATE_CQ_DURING_MAXBUF_SFT 14
+ #define CMDQ_CREATE_CQ_ENABLE_RING_IDLE_MODE 0x80000UL
+ #define CMDQ_CREATE_CQ_UNUSED12_MASK 0xfff00000UL
+ #define CMDQ_CREATE_CQ_UNUSED12_SFT 20
+ __le64 reserved64;
+};
+
+/* creq_create_cq_resp (size:128b/16B) */
+struct creq_create_cq_resp {
+ u8 type;
+ #define CREQ_CREATE_CQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_CQ_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_CQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_CQ_RESP_TYPE_LAST CREQ_CREATE_CQ_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_CQ_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_CREATE_CQ_RESP_EVENT_CREATE_CQ 0x9UL
+ #define CREQ_CREATE_CQ_RESP_EVENT_LAST CREQ_CREATE_CQ_RESP_EVENT_CREATE_CQ
+ u8 reserved48[6];
+};
+
+/* cmdq_destroy_cq (size:192b/24B) */
+struct cmdq_destroy_cq {
+ u8 opcode;
+ #define CMDQ_DESTROY_CQ_OPCODE_DESTROY_CQ 0xaUL
+ #define CMDQ_DESTROY_CQ_OPCODE_LAST CMDQ_DESTROY_CQ_OPCODE_DESTROY_CQ
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 cq_cid;
+ __le32 unused_0;
+};
+
+/* creq_destroy_cq_resp (size:128b/16B) */
+struct creq_destroy_cq_resp {
+ u8 type;
+ #define CREQ_DESTROY_CQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_CQ_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_CQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_CQ_RESP_TYPE_LAST CREQ_DESTROY_CQ_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_CQ_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DESTROY_CQ_RESP_EVENT_DESTROY_CQ 0xaUL
+ #define CREQ_DESTROY_CQ_RESP_EVENT_LAST CREQ_DESTROY_CQ_RESP_EVENT_DESTROY_CQ
+ __le16 cq_arm_lvl;
+ #define CREQ_DESTROY_CQ_RESP_CQ_ARM_LVL_MASK 0x3UL
+ #define CREQ_DESTROY_CQ_RESP_CQ_ARM_LVL_SFT 0
+ __le16 total_cnq_events;
+ __le16 reserved16;
+};
+
+/* cmdq_resize_cq (size:320b/40B) */
+struct cmdq_resize_cq {
+ u8 opcode;
+ #define CMDQ_RESIZE_CQ_OPCODE_RESIZE_CQ 0xcUL
+ #define CMDQ_RESIZE_CQ_OPCODE_LAST CMDQ_RESIZE_CQ_OPCODE_RESIZE_CQ
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 cq_cid;
+ __le32 new_cq_size_pg_size_lvl;
+ #define CMDQ_RESIZE_CQ_LVL_MASK 0x3UL
+ #define CMDQ_RESIZE_CQ_LVL_SFT 0
+ #define CMDQ_RESIZE_CQ_LVL_LVL_0 0x0UL
+ #define CMDQ_RESIZE_CQ_LVL_LVL_1 0x1UL
+ #define CMDQ_RESIZE_CQ_LVL_LVL_2 0x2UL
+ #define CMDQ_RESIZE_CQ_LVL_LAST CMDQ_RESIZE_CQ_LVL_LVL_2
+ #define CMDQ_RESIZE_CQ_PG_SIZE_MASK 0x1cUL
+ #define CMDQ_RESIZE_CQ_PG_SIZE_SFT 2
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_4K (0x0UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_8K (0x1UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_64K (0x2UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_2M (0x3UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_8M (0x4UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_1G (0x5UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_LAST CMDQ_RESIZE_CQ_PG_SIZE_PG_1G
+ #define CMDQ_RESIZE_CQ_NEW_CQ_SIZE_MASK 0x1fffffe0UL
+ #define CMDQ_RESIZE_CQ_NEW_CQ_SIZE_SFT 5
+ __le64 new_pbl;
+ __le32 new_cq_fco;
+ __le32 unused_0;
+};
+
+/* creq_resize_cq_resp (size:128b/16B) */
+struct creq_resize_cq_resp {
+ u8 type;
+ #define CREQ_RESIZE_CQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_RESIZE_CQ_RESP_TYPE_SFT 0
+ #define CREQ_RESIZE_CQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_RESIZE_CQ_RESP_TYPE_LAST CREQ_RESIZE_CQ_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_RESIZE_CQ_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_RESIZE_CQ_RESP_EVENT_RESIZE_CQ 0xcUL
+ #define CREQ_RESIZE_CQ_RESP_EVENT_LAST CREQ_RESIZE_CQ_RESP_EVENT_RESIZE_CQ
+ u8 reserved48[6];
+};
+
+/* cmdq_allocate_mrw (size:256b/32B) */
+struct cmdq_allocate_mrw {
+ u8 opcode;
+ #define CMDQ_ALLOCATE_MRW_OPCODE_ALLOCATE_MRW 0xdUL
+ #define CMDQ_ALLOCATE_MRW_OPCODE_LAST CMDQ_ALLOCATE_MRW_OPCODE_ALLOCATE_MRW
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 mrw_handle;
+ u8 mrw_flags;
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MASK 0xfUL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_SFT 0
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR 0x0UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR 0x1UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1 0x2UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A 0x3UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B 0x4UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_LAST CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B
+ #define CMDQ_ALLOCATE_MRW_STEERING_TAG_VALID 0x10UL
+ #define CMDQ_ALLOCATE_MRW_UNUSED4_MASK 0xe0UL
+ #define CMDQ_ALLOCATE_MRW_UNUSED4_SFT 5
+ u8 access;
+ #define CMDQ_ALLOCATE_MRW_ACCESS_CONSUMER_OWNED_KEY 0x20UL
+ __le16 steering_tag;
+ __le32 pd_id;
+};
+
+/* creq_allocate_mrw_resp (size:128b/16B) */
+struct creq_allocate_mrw_resp {
+ u8 type;
+ #define CREQ_ALLOCATE_MRW_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_ALLOCATE_MRW_RESP_TYPE_SFT 0
+ #define CREQ_ALLOCATE_MRW_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_ALLOCATE_MRW_RESP_TYPE_LAST CREQ_ALLOCATE_MRW_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_ALLOCATE_MRW_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_ALLOCATE_MRW_RESP_EVENT_ALLOCATE_MRW 0xdUL
+ #define CREQ_ALLOCATE_MRW_RESP_EVENT_LAST CREQ_ALLOCATE_MRW_RESP_EVENT_ALLOCATE_MRW
+ u8 reserved48[6];
+};
+
+/* cmdq_deallocate_key (size:192b/24B) */
+struct cmdq_deallocate_key {
+ u8 opcode;
+ #define CMDQ_DEALLOCATE_KEY_OPCODE_DEALLOCATE_KEY 0xeUL
+ #define CMDQ_DEALLOCATE_KEY_OPCODE_LAST CMDQ_DEALLOCATE_KEY_OPCODE_DEALLOCATE_KEY
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ u8 mrw_flags;
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MASK 0xfUL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_SFT 0
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MR 0x0UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_PMR 0x1UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE1 0x2UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE2A 0x3UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE2B 0x4UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_LAST CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE2B
+ #define CMDQ_DEALLOCATE_KEY_UNUSED4_MASK 0xf0UL
+ #define CMDQ_DEALLOCATE_KEY_UNUSED4_SFT 4
+ u8 unused24[3];
+ __le32 key;
+};
+
+/* creq_deallocate_key_resp (size:128b/16B) */
+struct creq_deallocate_key_resp {
+ u8 type;
+ #define CREQ_DEALLOCATE_KEY_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DEALLOCATE_KEY_RESP_TYPE_SFT 0
+ #define CREQ_DEALLOCATE_KEY_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DEALLOCATE_KEY_RESP_TYPE_LAST CREQ_DEALLOCATE_KEY_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DEALLOCATE_KEY_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DEALLOCATE_KEY_RESP_EVENT_DEALLOCATE_KEY 0xeUL
+ #define CREQ_DEALLOCATE_KEY_RESP_EVENT_LAST CREQ_DEALLOCATE_KEY_RESP_EVENT_DEALLOCATE_KEY
+ __le16 reserved16;
+ __le32 bound_window_info;
+};
+
+/* cmdq_register_mr (size:448b/56B) */
+struct cmdq_register_mr {
+ u8 opcode;
+ #define CMDQ_REGISTER_MR_OPCODE_REGISTER_MR 0xfUL
+ #define CMDQ_REGISTER_MR_OPCODE_LAST CMDQ_REGISTER_MR_OPCODE_REGISTER_MR
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_REGISTER_MR_FLAGS_ALLOC_MR 0x1UL
+ #define CMDQ_REGISTER_MR_FLAGS_STEERING_TAG_VALID 0x2UL
+ #define CMDQ_REGISTER_MR_FLAGS_ENABLE_RO 0x4UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ u8 log2_pg_size_lvl;
+ #define CMDQ_REGISTER_MR_LVL_MASK 0x3UL
+ #define CMDQ_REGISTER_MR_LVL_SFT 0
+ #define CMDQ_REGISTER_MR_LVL_LVL_0 0x0UL
+ #define CMDQ_REGISTER_MR_LVL_LVL_1 0x1UL
+ #define CMDQ_REGISTER_MR_LVL_LVL_2 0x2UL
+ #define CMDQ_REGISTER_MR_LVL_LAST CMDQ_REGISTER_MR_LVL_LVL_2
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK 0x7cUL
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT 2
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_4K (0xcUL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_8K (0xdUL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_64K (0x10UL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_256K (0x12UL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_1M (0x14UL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_2M (0x15UL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_4M (0x16UL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_1G (0x1eUL << 2)
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_LAST CMDQ_REGISTER_MR_LOG2_PG_SIZE_PG_1G
+ #define CMDQ_REGISTER_MR_UNUSED1 0x80UL
+ u8 access;
+ #define CMDQ_REGISTER_MR_ACCESS_LOCAL_WRITE 0x1UL
+ #define CMDQ_REGISTER_MR_ACCESS_REMOTE_READ 0x2UL
+ #define CMDQ_REGISTER_MR_ACCESS_REMOTE_WRITE 0x4UL
+ #define CMDQ_REGISTER_MR_ACCESS_REMOTE_ATOMIC 0x8UL
+ #define CMDQ_REGISTER_MR_ACCESS_MW_BIND 0x10UL
+ #define CMDQ_REGISTER_MR_ACCESS_ZERO_BASED 0x20UL
+ __le16 log2_pbl_pg_size;
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_MASK 0x1fUL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_SFT 0
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4K 0xcUL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_8K 0xdUL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_64K 0x10UL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_256K 0x12UL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1M 0x14UL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_2M 0x15UL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4M 0x16UL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G 0x1eUL
+ #define CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_LAST CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G
+ #define CMDQ_REGISTER_MR_UNUSED11_MASK 0xffe0UL
+ #define CMDQ_REGISTER_MR_UNUSED11_SFT 5
+ __le32 key;
+ __le64 pbl;
+ __le64 va;
+ __le64 mr_size;
+ __le16 steering_tag;
+ u8 reserved48[6];
+};
+
+/* creq_register_mr_resp (size:128b/16B) */
+struct creq_register_mr_resp {
+ u8 type;
+ #define CREQ_REGISTER_MR_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_REGISTER_MR_RESP_TYPE_SFT 0
+ #define CREQ_REGISTER_MR_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_REGISTER_MR_RESP_TYPE_LAST CREQ_REGISTER_MR_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_REGISTER_MR_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_REGISTER_MR_RESP_EVENT_REGISTER_MR 0xfUL
+ #define CREQ_REGISTER_MR_RESP_EVENT_LAST CREQ_REGISTER_MR_RESP_EVENT_REGISTER_MR
+ u8 reserved48[6];
+};
+
+/* cmdq_deregister_mr (size:192b/24B) */
+struct cmdq_deregister_mr {
+ u8 opcode;
+ #define CMDQ_DEREGISTER_MR_OPCODE_DEREGISTER_MR 0x10UL
+ #define CMDQ_DEREGISTER_MR_OPCODE_LAST CMDQ_DEREGISTER_MR_OPCODE_DEREGISTER_MR
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 lkey;
+ __le32 unused_0;
+};
+
+/* creq_deregister_mr_resp (size:128b/16B) */
+struct creq_deregister_mr_resp {
+ u8 type;
+ #define CREQ_DEREGISTER_MR_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DEREGISTER_MR_RESP_TYPE_SFT 0
+ #define CREQ_DEREGISTER_MR_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DEREGISTER_MR_RESP_TYPE_LAST CREQ_DEREGISTER_MR_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DEREGISTER_MR_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DEREGISTER_MR_RESP_EVENT_DEREGISTER_MR 0x10UL
+ #define CREQ_DEREGISTER_MR_RESP_EVENT_LAST CREQ_DEREGISTER_MR_RESP_EVENT_DEREGISTER_MR
+ __le16 reserved16;
+ __le32 bound_windows;
+};
+
+/* cmdq_add_gid (size:384b/48B) */
+struct cmdq_add_gid {
+ u8 opcode;
+ #define CMDQ_ADD_GID_OPCODE_ADD_GID 0x11UL
+ #define CMDQ_ADD_GID_OPCODE_LAST CMDQ_ADD_GID_OPCODE_ADD_GID
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __be32 gid[4];
+ __be16 src_mac[3];
+ __le16 vlan;
+ #define CMDQ_ADD_GID_VLAN_VLAN_EN_TPID_VLAN_ID_MASK 0xffffUL
+ #define CMDQ_ADD_GID_VLAN_VLAN_EN_TPID_VLAN_ID_SFT 0
+ #define CMDQ_ADD_GID_VLAN_VLAN_ID_MASK 0xfffUL
+ #define CMDQ_ADD_GID_VLAN_VLAN_ID_SFT 0
+ #define CMDQ_ADD_GID_VLAN_TPID_MASK 0x7000UL
+ #define CMDQ_ADD_GID_VLAN_TPID_SFT 12
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_88A8 (0x0UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_8100 (0x1UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_9100 (0x2UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_9200 (0x3UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_9300 (0x4UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_CFG1 (0x5UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_CFG2 (0x6UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_CFG3 (0x7UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_LAST CMDQ_ADD_GID_VLAN_TPID_TPID_CFG3
+ #define CMDQ_ADD_GID_VLAN_VLAN_EN 0x8000UL
+ __le16 ipid;
+ __le16 stats_ctx;
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_VALID_STATS_CTX_ID_MASK 0xffffUL
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_VALID_STATS_CTX_ID_SFT 0
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_ID_MASK 0x7fffUL
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_ID_SFT 0
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_VALID 0x8000UL
+ __le32 unused_0;
+};
+
+/* creq_add_gid_resp (size:128b/16B) */
+struct creq_add_gid_resp {
+ u8 type;
+ #define CREQ_ADD_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_ADD_GID_RESP_TYPE_SFT 0
+ #define CREQ_ADD_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_ADD_GID_RESP_TYPE_LAST CREQ_ADD_GID_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_ADD_GID_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_ADD_GID_RESP_EVENT_ADD_GID 0x11UL
+ #define CREQ_ADD_GID_RESP_EVENT_LAST CREQ_ADD_GID_RESP_EVENT_ADD_GID
+ u8 reserved48[6];
+};
+
+/* cmdq_delete_gid (size:192b/24B) */
+struct cmdq_delete_gid {
+ u8 opcode;
+ #define CMDQ_DELETE_GID_OPCODE_DELETE_GID 0x12UL
+ #define CMDQ_DELETE_GID_OPCODE_LAST CMDQ_DELETE_GID_OPCODE_DELETE_GID
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le16 gid_index;
+ u8 unused_0[6];
+};
+
+/* creq_delete_gid_resp (size:128b/16B) */
+struct creq_delete_gid_resp {
+ u8 type;
+ #define CREQ_DELETE_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DELETE_GID_RESP_TYPE_SFT 0
+ #define CREQ_DELETE_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DELETE_GID_RESP_TYPE_LAST CREQ_DELETE_GID_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DELETE_GID_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DELETE_GID_RESP_EVENT_DELETE_GID 0x12UL
+ #define CREQ_DELETE_GID_RESP_EVENT_LAST CREQ_DELETE_GID_RESP_EVENT_DELETE_GID
+ u8 reserved48[6];
+};
+
+/* cmdq_modify_gid (size:384b/48B) */
+struct cmdq_modify_gid {
+ u8 opcode;
+ #define CMDQ_MODIFY_GID_OPCODE_MODIFY_GID 0x17UL
+ #define CMDQ_MODIFY_GID_OPCODE_LAST CMDQ_MODIFY_GID_OPCODE_MODIFY_GID
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __be32 gid[4];
+ __be16 src_mac[3];
+ __le16 vlan;
+ #define CMDQ_MODIFY_GID_VLAN_VLAN_ID_MASK 0xfffUL
+ #define CMDQ_MODIFY_GID_VLAN_VLAN_ID_SFT 0
+ #define CMDQ_MODIFY_GID_VLAN_TPID_MASK 0x7000UL
+ #define CMDQ_MODIFY_GID_VLAN_TPID_SFT 12
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_88A8 (0x0UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_8100 (0x1UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_9100 (0x2UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_9200 (0x3UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_9300 (0x4UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG1 (0x5UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG2 (0x6UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG3 (0x7UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_LAST CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG3
+ #define CMDQ_MODIFY_GID_VLAN_VLAN_EN 0x8000UL
+ __le16 ipid;
+ __le16 gid_index;
+ __le16 stats_ctx;
+ #define CMDQ_MODIFY_GID_STATS_CTX_STATS_CTX_ID_MASK 0x7fffUL
+ #define CMDQ_MODIFY_GID_STATS_CTX_STATS_CTX_ID_SFT 0
+ #define CMDQ_MODIFY_GID_STATS_CTX_STATS_CTX_VALID 0x8000UL
+ __le16 unused_0;
+};
+
+/* creq_modify_gid_resp (size:128b/16B) */
+struct creq_modify_gid_resp {
+ u8 type;
+ #define CREQ_MODIFY_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MODIFY_GID_RESP_TYPE_SFT 0
+ #define CREQ_MODIFY_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MODIFY_GID_RESP_TYPE_LAST CREQ_MODIFY_GID_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_MODIFY_GID_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_MODIFY_GID_RESP_EVENT_ADD_GID 0x11UL
+ #define CREQ_MODIFY_GID_RESP_EVENT_LAST CREQ_MODIFY_GID_RESP_EVENT_ADD_GID
+ u8 reserved48[6];
+};
+
+/* cmdq_query_gid (size:192b/24B) */
+struct cmdq_query_gid {
+ u8 opcode;
+ #define CMDQ_QUERY_GID_OPCODE_QUERY_GID 0x18UL
+ #define CMDQ_QUERY_GID_OPCODE_LAST CMDQ_QUERY_GID_OPCODE_QUERY_GID
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le16 gid_index;
+ u8 unused16[6];
+};
+
+/* creq_query_gid_resp (size:128b/16B) */
+struct creq_query_gid_resp {
+ u8 type;
+ #define CREQ_QUERY_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_GID_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_GID_RESP_TYPE_LAST CREQ_QUERY_GID_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_GID_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_GID_RESP_EVENT_QUERY_GID 0x18UL
+ #define CREQ_QUERY_GID_RESP_EVENT_LAST CREQ_QUERY_GID_RESP_EVENT_QUERY_GID
+ u8 reserved48[6];
+};
+
+/* creq_query_gid_resp_sb (size:320b/40B) */
+struct creq_query_gid_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_GID_RESP_SB_OPCODE_QUERY_GID 0x18UL
+ #define CREQ_QUERY_GID_RESP_SB_OPCODE_LAST CREQ_QUERY_GID_RESP_SB_OPCODE_QUERY_GID
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 gid[4];
+ __le16 src_mac[3];
+ __le16 vlan;
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_EN_TPID_VLAN_ID_MASK 0xffffUL
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_EN_TPID_VLAN_ID_SFT 0
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_ID_MASK 0xfffUL
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_ID_SFT 0
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_MASK 0x7000UL
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_SFT 12
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_88A8 (0x0UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_8100 (0x1UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_9100 (0x2UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_9200 (0x3UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_9300 (0x4UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG1 (0x5UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG2 (0x6UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG3 (0x7UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_LAST CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG3
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_EN 0x8000UL
+ __le16 ipid;
+ __le16 gid_index;
+ __le32 unused_0;
+};
+
+/* cmdq_create_qp1 (size:640b/80B) */
+struct cmdq_create_qp1 {
+ u8 opcode;
+ #define CMDQ_CREATE_QP1_OPCODE_CREATE_QP1 0x13UL
+ #define CMDQ_CREATE_QP1_OPCODE_LAST CMDQ_CREATE_QP1_OPCODE_CREATE_QP1
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 qp_handle;
+ __le32 qp_flags;
+ #define CMDQ_CREATE_QP1_QP_FLAGS_SRQ_USED 0x1UL
+ #define CMDQ_CREATE_QP1_QP_FLAGS_FORCE_COMPLETION 0x2UL
+ #define CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL
+ #define CMDQ_CREATE_QP1_QP_FLAGS_LAST CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE
+ u8 type;
+ #define CMDQ_CREATE_QP1_TYPE_GSI 0x1UL
+ #define CMDQ_CREATE_QP1_TYPE_LAST CMDQ_CREATE_QP1_TYPE_GSI
+ u8 sq_pg_size_sq_lvl;
+ #define CMDQ_CREATE_QP1_SQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_SQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP1_SQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP1_SQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP1_SQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP1_SQ_LVL_LAST CMDQ_CREATE_QP1_SQ_LVL_LVL_2
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_LAST CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_1G
+ u8 rq_pg_size_rq_lvl;
+ #define CMDQ_CREATE_QP1_RQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_RQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP1_RQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP1_RQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP1_RQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP1_RQ_LVL_LAST CMDQ_CREATE_QP1_RQ_LVL_LVL_2
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_1G (0x5UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_LAST CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_1G
+ u8 unused_0;
+ __le32 dpi;
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_fwo_sq_sge;
+ #define CMDQ_CREATE_QP1_SQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_SQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP1_SQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP1_SQ_FWO_SFT 4
+ __le16 rq_fwo_rq_sge;
+ #define CMDQ_CREATE_QP1_RQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_RQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP1_RQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP1_RQ_FWO_SFT 4
+ __le32 scq_cid;
+ __le32 rcq_cid;
+ __le32 srq_cid;
+ __le32 pd_id;
+ __le64 sq_pbl;
+ __le64 rq_pbl;
+};
+
+/* creq_create_qp1_resp (size:128b/16B) */
+struct creq_create_qp1_resp {
+ u8 type;
+ #define CREQ_CREATE_QP1_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_QP1_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_QP1_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_QP1_RESP_TYPE_LAST CREQ_CREATE_QP1_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_QP1_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_CREATE_QP1_RESP_EVENT_CREATE_QP1 0x13UL
+ #define CREQ_CREATE_QP1_RESP_EVENT_LAST CREQ_CREATE_QP1_RESP_EVENT_CREATE_QP1
+ u8 reserved48[6];
+};
+
+/* cmdq_destroy_qp1 (size:192b/24B) */
+struct cmdq_destroy_qp1 {
+ u8 opcode;
+ #define CMDQ_DESTROY_QP1_OPCODE_DESTROY_QP1 0x14UL
+ #define CMDQ_DESTROY_QP1_OPCODE_LAST CMDQ_DESTROY_QP1_OPCODE_DESTROY_QP1
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 qp1_cid;
+ __le32 unused_0;
+};
+
+/* creq_destroy_qp1_resp (size:128b/16B) */
+struct creq_destroy_qp1_resp {
+ u8 type;
+ #define CREQ_DESTROY_QP1_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_QP1_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_QP1_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_QP1_RESP_TYPE_LAST CREQ_DESTROY_QP1_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_QP1_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DESTROY_QP1_RESP_EVENT_DESTROY_QP1 0x14UL
+ #define CREQ_DESTROY_QP1_RESP_EVENT_LAST CREQ_DESTROY_QP1_RESP_EVENT_DESTROY_QP1
+ u8 reserved48[6];
+};
+
+/* cmdq_create_ah (size:512b/64B) */
+struct cmdq_create_ah {
+ u8 opcode;
+ #define CMDQ_CREATE_AH_OPCODE_CREATE_AH 0x15UL
+ #define CMDQ_CREATE_AH_OPCODE_LAST CMDQ_CREATE_AH_OPCODE_CREATE_AH
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 ah_handle;
+ __le32 dgid[4];
+ u8 type;
+ #define CMDQ_CREATE_AH_TYPE_V1 0x0UL
+ #define CMDQ_CREATE_AH_TYPE_V2IPV4 0x2UL
+ #define CMDQ_CREATE_AH_TYPE_V2IPV6 0x3UL
+ #define CMDQ_CREATE_AH_TYPE_LAST CMDQ_CREATE_AH_TYPE_V2IPV6
+ u8 hop_limit;
+ __le16 sgid_index;
+ __le32 dest_vlan_id_flow_label;
+ #define CMDQ_CREATE_AH_FLOW_LABEL_MASK 0xfffffUL
+ #define CMDQ_CREATE_AH_FLOW_LABEL_SFT 0
+ #define CMDQ_CREATE_AH_DEST_VLAN_ID_MASK 0xfff00000UL
+ #define CMDQ_CREATE_AH_DEST_VLAN_ID_SFT 20
+ __le32 pd_id;
+ __le32 unused_0;
+ __le16 dest_mac[3];
+ u8 traffic_class;
+ u8 enable_cc;
+ #define CMDQ_CREATE_AH_ENABLE_CC 0x1UL
+};
+
+/* creq_create_ah_resp (size:128b/16B) */
+struct creq_create_ah_resp {
+ u8 type;
+ #define CREQ_CREATE_AH_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_AH_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_AH_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_AH_RESP_TYPE_LAST CREQ_CREATE_AH_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_AH_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_CREATE_AH_RESP_EVENT_CREATE_AH 0x15UL
+ #define CREQ_CREATE_AH_RESP_EVENT_LAST CREQ_CREATE_AH_RESP_EVENT_CREATE_AH
+ u8 reserved48[6];
+};
+
+/* cmdq_destroy_ah (size:192b/24B) */
+struct cmdq_destroy_ah {
+ u8 opcode;
+ #define CMDQ_DESTROY_AH_OPCODE_DESTROY_AH 0x16UL
+ #define CMDQ_DESTROY_AH_OPCODE_LAST CMDQ_DESTROY_AH_OPCODE_DESTROY_AH
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 ah_cid;
+ __le32 unused_0;
+};
+
+/* creq_destroy_ah_resp (size:128b/16B) */
+struct creq_destroy_ah_resp {
+ u8 type;
+ #define CREQ_DESTROY_AH_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_AH_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_AH_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_AH_RESP_TYPE_LAST CREQ_DESTROY_AH_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_AH_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_DESTROY_AH_RESP_EVENT_DESTROY_AH 0x16UL
+ #define CREQ_DESTROY_AH_RESP_EVENT_LAST CREQ_DESTROY_AH_RESP_EVENT_DESTROY_AH
+ u8 reserved48[6];
+};
+
+/* cmdq_query_roce_stats (size:192b/24B) */
+struct cmdq_query_roce_stats {
+ u8 opcode;
+ #define CMDQ_QUERY_ROCE_STATS_OPCODE_QUERY_ROCE_STATS 0x8eUL
+ #define CMDQ_QUERY_ROCE_STATS_OPCODE_LAST CMDQ_QUERY_ROCE_STATS_OPCODE_QUERY_ROCE_STATS
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_QUERY_ROCE_STATS_FLAGS_COLLECTION_ID 0x1UL
+ #define CMDQ_QUERY_ROCE_STATS_FLAGS_FUNCTION_ID 0x2UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 collection_id;
+ __le64 resp_addr;
+ __le32 function_id;
+ #define CMDQ_QUERY_ROCE_STATS_PF_NUM_MASK 0xffUL
+ #define CMDQ_QUERY_ROCE_STATS_PF_NUM_SFT 0
+ #define CMDQ_QUERY_ROCE_STATS_VF_NUM_MASK 0xffff00UL
+ #define CMDQ_QUERY_ROCE_STATS_VF_NUM_SFT 8
+ #define CMDQ_QUERY_ROCE_STATS_VF_VALID 0x1000000UL
+ __le32 reserved32;
+};
+
+/* creq_query_roce_stats_resp (size:128b/16B) */
+struct creq_query_roce_stats_resp {
+ u8 type;
+ #define CREQ_QUERY_ROCE_STATS_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_ROCE_STATS_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_ROCE_STATS_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_ROCE_STATS_RESP_TYPE_LAST CREQ_QUERY_ROCE_STATS_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_ROCE_STATS_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_ROCE_STATS_RESP_EVENT_QUERY_ROCE_STATS 0x8eUL
+ #define CREQ_QUERY_ROCE_STATS_RESP_EVENT_LAST \
+ CREQ_QUERY_ROCE_STATS_RESP_EVENT_QUERY_ROCE_STATS
+ u8 reserved48[6];
+};
+
+/* creq_query_roce_stats_resp_sb (size:2944b/368B) */
+struct creq_query_roce_stats_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_ROCE_STATS_RESP_SB_OPCODE_QUERY_ROCE_STATS 0x8eUL
+ #define CREQ_QUERY_ROCE_STATS_RESP_SB_OPCODE_LAST \
+ CREQ_QUERY_ROCE_STATS_RESP_SB_OPCODE_QUERY_ROCE_STATS
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 rsvd;
+ __le32 num_counters;
+ __le32 rsvd1;
+ __le64 to_retransmits;
+ __le64 seq_err_naks_rcvd;
+ __le64 max_retry_exceeded;
+ __le64 rnr_naks_rcvd;
+ __le64 missing_resp;
+ __le64 unrecoverable_err;
+ __le64 bad_resp_err;
+ __le64 local_qp_op_err;
+ __le64 local_protection_err;
+ __le64 mem_mgmt_op_err;
+ __le64 remote_invalid_req_err;
+ __le64 remote_access_err;
+ __le64 remote_op_err;
+ __le64 dup_req;
+ __le64 res_exceed_max;
+ __le64 res_length_mismatch;
+ __le64 res_exceeds_wqe;
+ __le64 res_opcode_err;
+ __le64 res_rx_invalid_rkey;
+ __le64 res_rx_domain_err;
+ __le64 res_rx_no_perm;
+ __le64 res_rx_range_err;
+ __le64 res_tx_invalid_rkey;
+ __le64 res_tx_domain_err;
+ __le64 res_tx_no_perm;
+ __le64 res_tx_range_err;
+ __le64 res_irrq_oflow;
+ __le64 res_unsup_opcode;
+ __le64 res_unaligned_atomic;
+ __le64 res_rem_inv_err;
+ __le64 res_mem_error;
+ __le64 res_srq_err;
+ __le64 res_cmp_err;
+ __le64 res_invalid_dup_rkey;
+ __le64 res_wqe_format_err;
+ __le64 res_cq_load_err;
+ __le64 res_srq_load_err;
+ __le64 res_tx_pci_err;
+ __le64 res_rx_pci_err;
+ __le64 res_oos_drop_count;
+ __le64 active_qp_count_p0;
+ __le64 active_qp_count_p1;
+ __le64 active_qp_count_p2;
+ __le64 active_qp_count_p3;
+};
+
+/* cmdq_query_roce_stats_ext (size:192b/24B) */
+struct cmdq_query_roce_stats_ext {
+ u8 opcode;
+ #define CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS 0x92UL
+ #define CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_LAST \
+ CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_COLLECTION_ID 0x1UL
+ #define CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID 0x2UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 collection_id;
+ __le64 resp_addr;
+ __le32 function_id;
+ #define CMDQ_QUERY_ROCE_STATS_EXT_PF_NUM_MASK 0xffUL
+ #define CMDQ_QUERY_ROCE_STATS_EXT_PF_NUM_SFT 0
+ #define CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_MASK 0xffff00UL
+ #define CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_SFT 8
+ #define CMDQ_QUERY_ROCE_STATS_EXT_VF_VALID 0x1000000UL
+ __le32 reserved32;
+};
+
+/* creq_query_roce_stats_ext_resp (size:128b/16B) */
+struct creq_query_roce_stats_ext_resp {
+ u8 type;
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_LAST \
+ CREQ_QUERY_ROCE_STATS_EXT_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_QUERY_ROCE_STATS_EXT 0x92UL
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_LAST \
+ CREQ_QUERY_ROCE_STATS_EXT_RESP_EVENT_QUERY_ROCE_STATS_EXT
+ u8 reserved48[6];
+};
+
+/* creq_query_roce_stats_ext_resp_sb (size:1856b/232B) */
+struct creq_query_roce_stats_ext_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_QUERY_ROCE_STATS_EXT 0x92UL
+ #define CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_LAST \
+ CREQ_QUERY_ROCE_STATS_EXT_RESP_SB_OPCODE_QUERY_ROCE_STATS_EXT
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 rsvd;
+ __le64 tx_atomic_req_pkts;
+ __le64 tx_read_req_pkts;
+ __le64 tx_read_res_pkts;
+ __le64 tx_write_req_pkts;
+ __le64 tx_send_req_pkts;
+ __le64 tx_roce_pkts;
+ __le64 tx_roce_bytes;
+ __le64 rx_atomic_req_pkts;
+ __le64 rx_read_req_pkts;
+ __le64 rx_read_res_pkts;
+ __le64 rx_write_req_pkts;
+ __le64 rx_send_req_pkts;
+ __le64 rx_roce_pkts;
+ __le64 rx_roce_bytes;
+ __le64 rx_roce_good_pkts;
+ __le64 rx_roce_good_bytes;
+ __le64 rx_out_of_buffer_pkts;
+ __le64 rx_out_of_sequence_pkts;
+ __le64 tx_cnp_pkts;
+ __le64 rx_cnp_pkts;
+ __le64 rx_ecn_marked_pkts;
+ __le64 tx_cnp_bytes;
+ __le64 rx_cnp_bytes;
+ __le64 seq_err_naks_rcvd;
+ __le64 rnr_naks_rcvd;
+ __le64 missing_resp;
+ __le64 to_retransmit;
+ __le64 dup_req;
+};
+
+/* cmdq_roce_mirror_cfg (size:192b/24B) */
+struct cmdq_roce_mirror_cfg {
+ u8 opcode;
+ #define CMDQ_ROCE_MIRROR_CFG_OPCODE_ROCE_MIRROR_CFG 0x99UL
+ #define CMDQ_ROCE_MIRROR_CFG_OPCODE_LAST \
+ CMDQ_ROCE_MIRROR_CFG_OPCODE_ROCE_MIRROR_CFG
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ u8 mirror_flags;
+ #define CMDQ_ROCE_MIRROR_CFG_MIRROR_ENABLE 0x1UL
+ u8 rsvd[7];
+};
+
+/* creq_roce_mirror_cfg_resp (size:128b/16B) */
+struct creq_roce_mirror_cfg_resp {
+ u8 type;
+ #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_SFT 0
+ #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_ROCE_MIRROR_CFG_RESP_TYPE_LAST \
+ CREQ_ROCE_MIRROR_CFG_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_ROCE_MIRROR_CFG_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_ROCE_MIRROR_CFG_RESP_EVENT_ROCE_MIRROR_CFG 0x99UL
+ #define CREQ_ROCE_MIRROR_CFG_RESP_EVENT_LAST \
+ CREQ_ROCE_MIRROR_CFG_RESP_EVENT_ROCE_MIRROR_CFG
+ u8 reserved48[6];
+};
+
+/* cmdq_query_func (size:128b/16B) */
+struct cmdq_query_func {
+ u8 opcode;
+ #define CMDQ_QUERY_FUNC_OPCODE_QUERY_FUNC 0x83UL
+ #define CMDQ_QUERY_FUNC_OPCODE_LAST CMDQ_QUERY_FUNC_OPCODE_QUERY_FUNC
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* creq_query_func_resp (size:128b/16B) */
+struct creq_query_func_resp {
+ u8 type;
+ #define CREQ_QUERY_FUNC_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_FUNC_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_FUNC_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_FUNC_RESP_TYPE_LAST CREQ_QUERY_FUNC_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_FUNC_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_FUNC_RESP_EVENT_QUERY_FUNC 0x83UL
+ #define CREQ_QUERY_FUNC_RESP_EVENT_LAST CREQ_QUERY_FUNC_RESP_EVENT_QUERY_FUNC
+ u8 reserved48[6];
+};
+
+/* creq_query_func_resp_sb (size:1088b/136B) */
+struct creq_query_func_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_FUNC_RESP_SB_OPCODE_QUERY_FUNC 0x83UL
+ #define CREQ_QUERY_FUNC_RESP_SB_OPCODE_LAST CREQ_QUERY_FUNC_RESP_SB_OPCODE_QUERY_FUNC
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 max_mr_size;
+ __le32 max_qp;
+ __le16 max_qp_wr;
+ __le16 dev_cap_flags;
+ #define CREQ_QUERY_FUNC_RESP_SB_RESIZE_QP 0x1UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CC_GENERATION_MASK 0xeUL
+ #define CREQ_QUERY_FUNC_RESP_SB_CC_GENERATION_SFT 1
+ #define CREQ_QUERY_FUNC_RESP_SB_CC_GENERATION_CC_GEN0 (0x0UL << 1)
+ #define CREQ_QUERY_FUNC_RESP_SB_CC_GENERATION_CC_GEN1 (0x1UL << 1)
+ #define CREQ_QUERY_FUNC_RESP_SB_CC_GENERATION_CC_GEN1_EXT (0x2UL << 1)
+ #define CREQ_QUERY_FUNC_RESP_SB_CC_GENERATION_LAST \
+ CREQ_QUERY_FUNC_RESP_SB_CC_GENERATION_CC_GEN1_EXT
+ #define CREQ_QUERY_FUNC_RESP_SB_EXT_STATS 0x10UL
+ #define CREQ_QUERY_FUNC_RESP_SB_MR_REGISTER_ALLOC 0x20UL
+ #define CREQ_QUERY_FUNC_RESP_SB_OPTIMIZED_TRANSMIT_ENABLED 0x40UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CQE_V2 0x80UL
+ #define CREQ_QUERY_FUNC_RESP_SB_PINGPONG_PUSH_MODE 0x100UL
+ #define CREQ_QUERY_FUNC_RESP_SB_HW_REQUESTER_RETX_ENABLED 0x200UL
+ #define CREQ_QUERY_FUNC_RESP_SB_HW_RESPONDER_RETX_ENABLED 0x400UL
+ __le32 max_cq;
+ __le32 max_cqe;
+ __le32 max_pd;
+ u8 max_sge;
+ u8 max_srq_sge;
+ u8 max_qp_rd_atom;
+ u8 max_qp_init_rd_atom;
+ __le32 max_mr;
+ __le32 max_mw;
+ __le32 max_raw_eth_qp;
+ __le32 max_ah;
+ __le32 max_fmr;
+ __le32 max_srq_wr;
+ __le32 max_pkeys;
+ __le32 max_inline_data;
+ u8 max_map_per_fmr;
+ u8 l2_db_space_size;
+ __le16 max_srq;
+ __le32 max_gid;
+ __le32 tqm_alloc_reqs[12];
+ __le32 max_dpi;
+ u8 max_sge_var_wqe;
+ u8 dev_cap_ext_flags;
+ #define CREQ_QUERY_FUNC_RESP_SB_ATOMIC_OPS_NOT_SUPPORTED 0x1UL
+ #define CREQ_QUERY_FUNC_RESP_SB_DRV_VERSION_RGTR_SUPPORTED 0x2UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CREATE_QP_BATCH_SUPPORTED 0x4UL
+ #define CREQ_QUERY_FUNC_RESP_SB_DESTROY_QP_BATCH_SUPPORTED 0x8UL
+ #define CREQ_QUERY_FUNC_RESP_SB_ROCE_STATS_EXT_CTX_SUPPORTED 0x10UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CREATE_SRQ_SGE_SUPPORTED 0x20UL
+ #define CREQ_QUERY_FUNC_RESP_SB_FIXED_SIZE_WQE_DISABLED 0x40UL
+ #define CREQ_QUERY_FUNC_RESP_SB_DCN_SUPPORTED 0x80UL
+ __le16 max_inline_data_var_wqe;
+ __le32 start_qid;
+ u8 max_msn_table_size;
+ u8 reserved8_1;
+ __le16 dev_cap_ext_flags_2;
+ #define CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED 0x1UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CHANGE_UDP_SRC_PORT_WQE_SUPPORTED 0x2UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED 0x4UL
+ #define CREQ_QUERY_FUNC_RESP_SB_MEMORY_REGION_RO_SUPPORTED 0x8UL
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_MASK 0x30UL
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_SFT 4
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_HOST_PSN_TABLE (0x0UL << 4)
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_HOST_MSN_TABLE (0x1UL << 4)
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE (0x2UL << 4)
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_LAST \
+ CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE
+ #define CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED 0x40UL
+ #define CREQ_QUERY_FUNC_RESP_SB_MIN_RNR_RTR_RTS_OPT_SUPPORTED 0x1000UL
+ __le16 max_xp_qp_size;
+ __le16 create_qp_batch_size;
+ __le16 destroy_qp_batch_size;
+ __le16 max_srq_ext;
+ __le64 reserved64;
+};
+
+/* cmdq_set_func_resources (size:448b/56B) */
+struct cmdq_set_func_resources {
+ u8 opcode;
+ #define CMDQ_SET_FUNC_RESOURCES_OPCODE_SET_FUNC_RESOURCES 0x84UL
+ #define CMDQ_SET_FUNC_RESOURCES_OPCODE_LAST\
+ CMDQ_SET_FUNC_RESOURCES_OPCODE_SET_FUNC_RESOURCES
+ u8 cmd_size;
+ __le16 flags;
+ #define CMDQ_SET_FUNC_RESOURCES_FLAGS_MRAV_RESERVATION_SPLIT 0x1UL
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 number_of_qp;
+ __le32 number_of_mrw;
+ __le32 number_of_srq;
+ __le32 number_of_cq;
+ __le32 max_qp_per_vf;
+ __le32 max_mrw_per_vf;
+ __le32 max_srq_per_vf;
+ __le32 max_cq_per_vf;
+ __le32 max_gid_per_vf;
+ __le32 stat_ctx_id;
+};
+
+/* creq_set_func_resources_resp (size:128b/16B) */
+struct creq_set_func_resources_resp {
+ u8 type;
+ #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_SFT 0
+ #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_LAST CREQ_SET_FUNC_RESOURCES_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_SET_FUNC_RESOURCES_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_SET_FUNC_RESOURCES_RESP_EVENT_SET_FUNC_RESOURCES 0x84UL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_EVENT_LAST \
+ CREQ_SET_FUNC_RESOURCES_RESP_EVENT_SET_FUNC_RESOURCES
+ u8 reserved48[6];
+};
+
+/* cmdq_read_context (size:192b/24B) */
+struct cmdq_read_context {
+ u8 opcode;
+ #define CMDQ_READ_CONTEXT_OPCODE_READ_CONTEXT 0x85UL
+ #define CMDQ_READ_CONTEXT_OPCODE_LAST CMDQ_READ_CONTEXT_OPCODE_READ_CONTEXT
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 xid;
+ u8 type;
+ #define CMDQ_READ_CONTEXT_TYPE_QPC 0x0UL
+ #define CMDQ_READ_CONTEXT_TYPE_CQ 0x1UL
+ #define CMDQ_READ_CONTEXT_TYPE_MRW 0x2UL
+ #define CMDQ_READ_CONTEXT_TYPE_SRQ 0x3UL
+ #define CMDQ_READ_CONTEXT_TYPE_LAST CMDQ_READ_CONTEXT_TYPE_SRQ
+ u8 unused_0[3];
+};
+
+/* creq_read_context (size:128b/16B) */
+struct creq_read_context {
+ u8 type;
+ #define CREQ_READ_CONTEXT_TYPE_MASK 0x3fUL
+ #define CREQ_READ_CONTEXT_TYPE_SFT 0
+ #define CREQ_READ_CONTEXT_TYPE_QP_EVENT 0x38UL
+ #define CREQ_READ_CONTEXT_TYPE_LAST CREQ_READ_CONTEXT_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_READ_CONTEXT_V 0x1UL
+ u8 event;
+ #define CREQ_READ_CONTEXT_EVENT_READ_CONTEXT 0x85UL
+ #define CREQ_READ_CONTEXT_EVENT_LAST CREQ_READ_CONTEXT_EVENT_READ_CONTEXT
+ __le16 reserved16;
+ __le32 reserved_32;
+};
+
+/* cmdq_map_tc_to_cos (size:192b/24B) */
+struct cmdq_map_tc_to_cos {
+ u8 opcode;
+ #define CMDQ_MAP_TC_TO_COS_OPCODE_MAP_TC_TO_COS 0x8aUL
+ #define CMDQ_MAP_TC_TO_COS_OPCODE_LAST CMDQ_MAP_TC_TO_COS_OPCODE_MAP_TC_TO_COS
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le16 cos0;
+ #define CMDQ_MAP_TC_TO_COS_COS0_NO_CHANGE 0xffffUL
+ #define CMDQ_MAP_TC_TO_COS_COS0_LAST CMDQ_MAP_TC_TO_COS_COS0_NO_CHANGE
+ __le16 cos1;
+ #define CMDQ_MAP_TC_TO_COS_COS1_DISABLE 0x8000UL
+ #define CMDQ_MAP_TC_TO_COS_COS1_NO_CHANGE 0xffffUL
+ #define CMDQ_MAP_TC_TO_COS_COS1_LAST CMDQ_MAP_TC_TO_COS_COS1_NO_CHANGE
+ __le32 unused_0;
+};
+
+/* creq_map_tc_to_cos_resp (size:128b/16B) */
+struct creq_map_tc_to_cos_resp {
+ u8 type;
+ #define CREQ_MAP_TC_TO_COS_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MAP_TC_TO_COS_RESP_TYPE_SFT 0
+ #define CREQ_MAP_TC_TO_COS_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MAP_TC_TO_COS_RESP_TYPE_LAST CREQ_MAP_TC_TO_COS_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_MAP_TC_TO_COS_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_MAP_TC_TO_COS_RESP_EVENT_MAP_TC_TO_COS 0x8aUL
+ #define CREQ_MAP_TC_TO_COS_RESP_EVENT_LAST CREQ_MAP_TC_TO_COS_RESP_EVENT_MAP_TC_TO_COS
+ u8 reserved48[6];
+};
+
+/* cmdq_query_roce_cc (size:128b/16B) */
+struct cmdq_query_roce_cc {
+ u8 opcode;
+ #define CMDQ_QUERY_ROCE_CC_OPCODE_QUERY_ROCE_CC 0x8dUL
+ #define CMDQ_QUERY_ROCE_CC_OPCODE_LAST CMDQ_QUERY_ROCE_CC_OPCODE_QUERY_ROCE_CC
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* creq_query_roce_cc_resp (size:128b/16B) */
+struct creq_query_roce_cc_resp {
+ u8 type;
+ #define CREQ_QUERY_ROCE_CC_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_ROCE_CC_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_ROCE_CC_RESP_TYPE_LAST CREQ_QUERY_ROCE_CC_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_ROCE_CC_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_QUERY_ROCE_CC_RESP_EVENT_QUERY_ROCE_CC 0x8dUL
+ #define CREQ_QUERY_ROCE_CC_RESP_EVENT_LAST CREQ_QUERY_ROCE_CC_RESP_EVENT_QUERY_ROCE_CC
+ u8 reserved48[6];
+};
+
+/* creq_query_roce_cc_resp_sb (size:256b/32B) */
+struct creq_query_roce_cc_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_OPCODE_QUERY_ROCE_CC 0x8dUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_OPCODE_LAST \
+ CREQ_QUERY_ROCE_CC_RESP_SB_OPCODE_QUERY_ROCE_CC
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ u8 enable_cc;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_ENABLE_CC 0x1UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_UNUSED7_MASK 0xfeUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_UNUSED7_SFT 1
+ u8 tos_dscp_tos_ecn;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_MASK 0x3UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_MASK 0xfcUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_SFT 2
+ u8 g;
+ u8 num_phases_per_state;
+ __le16 init_cr;
+ __le16 init_tr;
+ u8 alt_vlan_pcp;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_ALT_VLAN_PCP_MASK 0x7UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_ALT_VLAN_PCP_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD1_MASK 0xf8UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD1_SFT 3
+ u8 alt_tos_dscp;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_ALT_TOS_DSCP_MASK 0x3fUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_ALT_TOS_DSCP_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD4_MASK 0xc0UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD4_SFT 6
+ u8 cc_mode;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_CC_MODE_DCTCP 0x0UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_CC_MODE_PROBABILISTIC 0x1UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_CC_MODE_LAST \
+ CREQ_QUERY_ROCE_CC_RESP_SB_CC_MODE_PROBABILISTIC
+ u8 tx_queue;
+ __le16 rtt;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RTT_MASK 0x3fffUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RTT_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD5_MASK 0xc000UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD5_SFT 14
+ __le16 tcp_cp;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TCP_CP_MASK 0x3ffUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TCP_CP_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD6_MASK 0xfc00UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_RSVD6_SFT 10
+ __le16 inactivity_th;
+ u8 pkts_per_phase;
+ u8 time_per_phase;
+ __le32 reserved32;
+};
+
+/* creq_query_roce_cc_resp_sb_tlv (size:384b/48B) */
+struct creq_query_roce_cc_resp_sb_tlv {
+ __le16 cmd_discr;
+ u8 reserved_8b;
+ u8 tlv_flags;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_MORE 0x1UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_MORE_LAST 0x0UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_MORE_NOT_LAST 0x1UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_REQUIRED 0x2UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_REQUIRED_NO (0x0UL << 1)
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_REQUIRED_YES (0x1UL << 1)
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_REQUIRED_LAST \
+ CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TLV_FLAGS_REQUIRED_YES
+ __le16 tlv_type;
+ __le16 length;
+ u8 total_size;
+ u8 reserved56[7];
+ u8 opcode;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_OPCODE_QUERY_ROCE_CC 0x8dUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_OPCODE_LAST \
+ CREQ_QUERY_ROCE_CC_RESP_SB_TLV_OPCODE_QUERY_ROCE_CC
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ u8 enable_cc;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_ENABLE_CC 0x1UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_UNUSED7_MASK 0xfeUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_UNUSED7_SFT 1
+ u8 tos_dscp_tos_ecn;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TOS_ECN_MASK 0x3UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TOS_ECN_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TOS_DSCP_MASK 0xfcUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TOS_DSCP_SFT 2
+ u8 g;
+ u8 num_phases_per_state;
+ __le16 init_cr;
+ __le16 init_tr;
+ u8 alt_vlan_pcp;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_ALT_VLAN_PCP_MASK 0x7UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_ALT_VLAN_PCP_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD1_MASK 0xf8UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD1_SFT 3
+ u8 alt_tos_dscp;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_ALT_TOS_DSCP_MASK 0x3fUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_ALT_TOS_DSCP_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD4_MASK 0xc0UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD4_SFT 6
+ u8 cc_mode;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_CC_MODE_DCTCP 0x0UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_CC_MODE_PROBABILISTIC 0x1UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_CC_MODE_LAST\
+ CREQ_QUERY_ROCE_CC_RESP_SB_TLV_CC_MODE_PROBABILISTIC
+ u8 tx_queue;
+ __le16 rtt;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RTT_MASK 0x3fffUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RTT_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD5_MASK 0xc000UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD5_SFT 14
+ __le16 tcp_cp;
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TCP_CP_MASK 0x3ffUL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_TCP_CP_SFT 0
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD6_MASK 0xfc00UL
+ #define CREQ_QUERY_ROCE_CC_RESP_SB_TLV_RSVD6_SFT 10
+ __le16 inactivity_th;
+ u8 pkts_per_phase;
+ u8 time_per_phase;
+ __le32 reserved32;
+};
+
+/* creq_query_roce_cc_gen1_resp_sb_tlv (size:704b/88B) */
+struct creq_query_roce_cc_gen1_resp_sb_tlv {
+ __le16 cmd_discr;
+ u8 reserved_8b;
+ u8 tlv_flags;
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_MORE 0x1UL
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_MORE_LAST 0x0UL
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_MORE_NOT_LAST 0x1UL
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_REQUIRED 0x2UL
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_REQUIRED_NO (0x0UL << 1)
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_REQUIRED_YES (0x1UL << 1)
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_REQUIRED_LAST \
+ CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_TLV_FLAGS_REQUIRED_YES
+ __le16 tlv_type;
+ __le16 length;
+ __le64 reserved64;
+ __le16 inactivity_th_hi;
+ __le16 min_time_between_cnps;
+ __le16 init_cp;
+ u8 tr_update_mode;
+ u8 tr_update_cycles;
+ u8 fr_num_rtts;
+ u8 ai_rate_increase;
+ __le16 reduction_relax_rtts_th;
+ __le16 additional_relax_cr_th;
+ __le16 cr_min_th;
+ u8 bw_avg_weight;
+ u8 actual_cr_factor;
+ __le16 max_cp_cr_th;
+ u8 cp_bias_en;
+ u8 cp_bias;
+ u8 cnp_ecn;
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_CNP_ECN_NOT_ECT 0x0UL
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_CNP_ECN_ECT_1 0x1UL
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_CNP_ECN_ECT_0 0x2UL
+ #define CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_CNP_ECN_LAST \
+ CREQ_QUERY_ROCE_CC_GEN1_RESP_SB_TLV_CNP_ECN_ECT_0
+ u8 rtt_jitter_en;
+ __le16 link_bytes_per_usec;
+ __le16 reset_cc_cr_th;
+ u8 cr_width;
+ u8 quota_period_min;
+ u8 quota_period_max;
+ u8 quota_period_abs_max;
+ __le16 tr_lower_bound;
+ u8 cr_prob_factor;
+ u8 tr_prob_factor;
+ __le16 fairness_cr_th;
+ u8 red_div;
+ u8 cnp_ratio_th;
+ __le16 exp_ai_rtts;
+ u8 exp_ai_cr_cp_ratio;
+ u8 use_rate_table;
+ __le16 cp_exp_update_th;
+ __le16 high_exp_ai_rtts_th1;
+ __le16 high_exp_ai_rtts_th2;
+ __le16 actual_cr_cong_free_rtts_th;
+ __le16 severe_cong_cr_th1;
+ __le16 severe_cong_cr_th2;
+ __le32 link64B_per_rtt;
+ u8 cc_ack_bytes;
+ u8 reduce_init_en;
+ __le16 reduce_init_cong_free_rtts_th;
+ u8 random_no_red_en;
+ u8 actual_cr_shift_correction_en;
+ u8 quota_period_adjust_en;
+ u8 reserved[5];
+};
+
+/* cmdq_modify_roce_cc (size:448b/56B) */
+struct cmdq_modify_roce_cc {
+ u8 opcode;
+ #define CMDQ_MODIFY_ROCE_CC_OPCODE_MODIFY_ROCE_CC 0x8cUL
+ #define CMDQ_MODIFY_ROCE_CC_OPCODE_LAST CMDQ_MODIFY_ROCE_CC_OPCODE_MODIFY_ROCE_CC
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 modify_mask;
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G 0x2UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE 0x4UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR 0x8UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR 0x10UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN 0x20UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP 0x40UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP 0x80UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP 0x100UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT 0x200UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE 0x400UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP 0x800UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TX_QUEUE 0x1000UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP 0x2000UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TIME_PER_PHASE 0x4000UL
+ #define CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_PKTS_PER_PHASE 0x8000UL
+ u8 enable_cc;
+ #define CMDQ_MODIFY_ROCE_CC_ENABLE_CC 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_RSVD1_MASK 0xfeUL
+ #define CMDQ_MODIFY_ROCE_CC_RSVD1_SFT 1
+ u8 g;
+ u8 num_phases_per_state;
+ u8 pkts_per_phase;
+ __le16 init_cr;
+ __le16 init_tr;
+ u8 tos_dscp_tos_ecn;
+ #define CMDQ_MODIFY_ROCE_CC_TOS_ECN_MASK 0x3UL
+ #define CMDQ_MODIFY_ROCE_CC_TOS_ECN_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_TOS_DSCP_MASK 0xfcUL
+ #define CMDQ_MODIFY_ROCE_CC_TOS_DSCP_SFT 2
+ u8 alt_vlan_pcp;
+ #define CMDQ_MODIFY_ROCE_CC_ALT_VLAN_PCP_MASK 0x7UL
+ #define CMDQ_MODIFY_ROCE_CC_ALT_VLAN_PCP_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_RSVD3_MASK 0xf8UL
+ #define CMDQ_MODIFY_ROCE_CC_RSVD3_SFT 3
+ __le16 alt_tos_dscp;
+ #define CMDQ_MODIFY_ROCE_CC_ALT_TOS_DSCP_MASK 0x3fUL
+ #define CMDQ_MODIFY_ROCE_CC_ALT_TOS_DSCP_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_RSVD4_MASK 0xffc0UL
+ #define CMDQ_MODIFY_ROCE_CC_RSVD4_SFT 6
+ __le16 rtt;
+ #define CMDQ_MODIFY_ROCE_CC_RTT_MASK 0x3fffUL
+ #define CMDQ_MODIFY_ROCE_CC_RTT_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_RSVD5_MASK 0xc000UL
+ #define CMDQ_MODIFY_ROCE_CC_RSVD5_SFT 14
+ __le16 tcp_cp;
+ #define CMDQ_MODIFY_ROCE_CC_TCP_CP_MASK 0x3ffUL
+ #define CMDQ_MODIFY_ROCE_CC_TCP_CP_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_RSVD6_MASK 0xfc00UL
+ #define CMDQ_MODIFY_ROCE_CC_RSVD6_SFT 10
+ u8 cc_mode;
+ #define CMDQ_MODIFY_ROCE_CC_CC_MODE_DCTCP_CC_MODE 0x0UL
+ #define CMDQ_MODIFY_ROCE_CC_CC_MODE_PROBABILISTIC_CC_MODE 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_CC_MODE_LAST CMDQ_MODIFY_ROCE_CC_CC_MODE_PROBABILISTIC_CC_MODE
+ u8 tx_queue;
+ __le16 inactivity_th;
+ u8 time_per_phase;
+ u8 reserved8_1;
+ __le16 reserved16;
+ __le32 reserved32;
+ __le64 reserved64;
+};
+
+/* cmdq_modify_roce_cc_tlv (size:640b/80B) */
+struct cmdq_modify_roce_cc_tlv {
+ __le16 cmd_discr;
+ u8 reserved_8b;
+ u8 tlv_flags;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_MORE 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_MORE_LAST 0x0UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_MORE_NOT_LAST 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_REQUIRED 0x2UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_REQUIRED_NO (0x0UL << 1)
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_REQUIRED_YES (0x1UL << 1)
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_REQUIRED_LAST \
+ CMDQ_MODIFY_ROCE_CC_TLV_TLV_FLAGS_REQUIRED_YES
+ __le16 tlv_type;
+ __le16 length;
+ u8 total_size;
+ u8 reserved56[7];
+ u8 opcode;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_OPCODE_MODIFY_ROCE_CC 0x8cUL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_OPCODE_LAST CMDQ_MODIFY_ROCE_CC_TLV_OPCODE_MODIFY_ROCE_CC
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 modify_mask;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_ENABLE_CC 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_G 0x2UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_NUMPHASEPERSTATE 0x4UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_INIT_CR 0x8UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_INIT_TR 0x10UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_TOS_ECN 0x20UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_TOS_DSCP 0x40UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_ALT_VLAN_PCP 0x80UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_ALT_TOS_DSCP 0x100UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_RTT 0x200UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_CC_MODE 0x400UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_TCP_CP 0x800UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_TX_QUEUE 0x1000UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_INACTIVITY_CP 0x2000UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_TIME_PER_PHASE 0x4000UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_MODIFY_MASK_PKTS_PER_PHASE 0x8000UL
+ u8 enable_cc;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_ENABLE_CC 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD1_MASK 0xfeUL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD1_SFT 1
+ u8 g;
+ u8 num_phases_per_state;
+ u8 pkts_per_phase;
+ __le16 init_cr;
+ __le16 init_tr;
+ u8 tos_dscp_tos_ecn;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TOS_ECN_MASK 0x3UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TOS_ECN_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TOS_DSCP_MASK 0xfcUL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TOS_DSCP_SFT 2
+ u8 alt_vlan_pcp;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_ALT_VLAN_PCP_MASK 0x7UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_ALT_VLAN_PCP_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD3_MASK 0xf8UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD3_SFT 3
+ __le16 alt_tos_dscp;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_ALT_TOS_DSCP_MASK 0x3fUL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_ALT_TOS_DSCP_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD4_MASK 0xffc0UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD4_SFT 6
+ __le16 rtt;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RTT_MASK 0x3fffUL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RTT_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD5_MASK 0xc000UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD5_SFT 14
+ __le16 tcp_cp;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TCP_CP_MASK 0x3ffUL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_TCP_CP_SFT 0
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD6_MASK 0xfc00UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_RSVD6_SFT 10
+ u8 cc_mode;
+ #define CMDQ_MODIFY_ROCE_CC_TLV_CC_MODE_DCTCP_CC_MODE 0x0UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_CC_MODE_PROBABILISTIC_CC_MODE 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_TLV_CC_MODE_LAST\
+ CMDQ_MODIFY_ROCE_CC_TLV_CC_MODE_PROBABILISTIC_CC_MODE
+ u8 tx_queue;
+ __le16 inactivity_th;
+ u8 time_per_phase;
+ u8 reserved8_1;
+ __le16 reserved16;
+ __le32 reserved32;
+ __le64 reserved64;
+ __le64 reservedtlvpad;
+};
+
+/* cmdq_modify_roce_cc_gen1_tlv (size:768b/96B) */
+struct cmdq_modify_roce_cc_gen1_tlv {
+ __le16 cmd_discr;
+ u8 reserved_8b;
+ u8 tlv_flags;
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_MORE 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_MORE_LAST 0x0UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_MORE_NOT_LAST 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_REQUIRED 0x2UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_REQUIRED_NO (0x0UL << 1)
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_REQUIRED_YES (0x1UL << 1)
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_REQUIRED_LAST\
+ CMDQ_MODIFY_ROCE_CC_GEN1_TLV_TLV_FLAGS_REQUIRED_YES
+ __le16 tlv_type;
+ __le16 length;
+ __le64 reserved64;
+ __le64 modify_mask;
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_MIN_TIME_BETWEEN_CNPS 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_INIT_CP 0x2UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_TR_UPDATE_MODE 0x4UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_TR_UPDATE_CYCLES 0x8UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_FR_NUM_RTTS 0x10UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_AI_RATE_INCREASE 0x20UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_REDUCTION_RELAX_RTTS_TH 0x40UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_ADDITIONAL_RELAX_CR_TH 0x80UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CR_MIN_TH 0x100UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_BW_AVG_WEIGHT 0x200UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_ACTUAL_CR_FACTOR 0x400UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_MAX_CP_CR_TH 0x800UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CP_BIAS_EN 0x1000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CP_BIAS 0x2000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CNP_ECN 0x4000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_RTT_JITTER_EN 0x8000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_LINK_BYTES_PER_USEC 0x10000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_RESET_CC_CR_TH 0x20000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CR_WIDTH 0x40000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_QUOTA_PERIOD_MIN 0x80000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_QUOTA_PERIOD_MAX 0x100000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_QUOTA_PERIOD_ABS_MAX 0x200000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_TR_LOWER_BOUND 0x400000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CR_PROB_FACTOR 0x800000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_TR_PROB_FACTOR 0x1000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_FAIRNESS_CR_TH 0x2000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_RED_DIV 0x4000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CNP_RATIO_TH 0x8000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_EXP_AI_RTTS 0x10000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_EXP_AI_CR_CP_RATIO 0x20000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CP_EXP_UPDATE_TH 0x40000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_HIGH_EXP_AI_RTTS_TH1 0x80000000UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_HIGH_EXP_AI_RTTS_TH2 0x100000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_USE_RATE_TABLE 0x200000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_LINK64B_PER_RTT 0x400000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_ACTUAL_CR_CONG_FREE_RTTS_TH 0x800000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_SEVERE_CONG_CR_TH1 0x1000000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_SEVERE_CONG_CR_TH2 0x2000000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_CC_ACK_BYTES 0x4000000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_REDUCE_INIT_EN 0x8000000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_REDUCE_INIT_CONG_FREE_RTTS_TH \
+ 0x10000000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_RANDOM_NO_RED_EN 0x20000000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_ACTUAL_CR_SHIFT_CORRECTION_EN \
+ 0x40000000000ULL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_MODIFY_MASK_QUOTA_PERIOD_ADJUST_EN 0x80000000000ULL
+ __le16 inactivity_th_hi;
+ __le16 min_time_between_cnps;
+ __le16 init_cp;
+ u8 tr_update_mode;
+ u8 tr_update_cycles;
+ u8 fr_num_rtts;
+ u8 ai_rate_increase;
+ __le16 reduction_relax_rtts_th;
+ __le16 additional_relax_cr_th;
+ __le16 cr_min_th;
+ u8 bw_avg_weight;
+ u8 actual_cr_factor;
+ __le16 max_cp_cr_th;
+ u8 cp_bias_en;
+ u8 cp_bias;
+ u8 cnp_ecn;
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_CNP_ECN_NOT_ECT 0x0UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_CNP_ECN_ECT_1 0x1UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_CNP_ECN_ECT_0 0x2UL
+ #define CMDQ_MODIFY_ROCE_CC_GEN1_TLV_CNP_ECN_LAST CMDQ_MODIFY_ROCE_CC_GEN1_TLV_CNP_ECN_ECT_0
+ u8 rtt_jitter_en;
+ __le16 link_bytes_per_usec;
+ __le16 reset_cc_cr_th;
+ u8 cr_width;
+ u8 quota_period_min;
+ u8 quota_period_max;
+ u8 quota_period_abs_max;
+ __le16 tr_lower_bound;
+ u8 cr_prob_factor;
+ u8 tr_prob_factor;
+ __le16 fairness_cr_th;
+ u8 red_div;
+ u8 cnp_ratio_th;
+ __le16 exp_ai_rtts;
+ u8 exp_ai_cr_cp_ratio;
+ u8 use_rate_table;
+ __le16 cp_exp_update_th;
+ __le16 high_exp_ai_rtts_th1;
+ __le16 high_exp_ai_rtts_th2;
+ __le16 actual_cr_cong_free_rtts_th;
+ __le16 severe_cong_cr_th1;
+ __le16 severe_cong_cr_th2;
+ __le32 link64B_per_rtt;
+ u8 cc_ack_bytes;
+ u8 reduce_init_en;
+ __le16 reduce_init_cong_free_rtts_th;
+ u8 random_no_red_en;
+ u8 actual_cr_shift_correction_en;
+ u8 quota_period_adjust_en;
+ u8 reserved[5];
+};
+
+/* creq_modify_roce_cc_resp (size:128b/16B) */
+struct creq_modify_roce_cc_resp {
+ u8 type;
+ #define CREQ_MODIFY_ROCE_CC_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MODIFY_ROCE_CC_RESP_TYPE_SFT 0
+ #define CREQ_MODIFY_ROCE_CC_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MODIFY_ROCE_CC_RESP_TYPE_LAST CREQ_MODIFY_ROCE_CC_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_MODIFY_ROCE_CC_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_MODIFY_ROCE_CC_RESP_EVENT_MODIFY_ROCE_CC 0x8cUL
+ #define CREQ_MODIFY_ROCE_CC_RESP_EVENT_LAST CREQ_MODIFY_ROCE_CC_RESP_EVENT_MODIFY_ROCE_CC
+ u8 reserved48[6];
+};
+
+/* cmdq_set_link_aggr_mode_cc (size:320b/40B) */
+struct cmdq_set_link_aggr_mode_cc {
+ u8 opcode;
+ #define CMDQ_SET_LINK_AGGR_MODE_OPCODE_SET_LINK_AGGR_MODE 0x8fUL
+ #define CMDQ_SET_LINK_AGGR_MODE_OPCODE_LAST \
+ CMDQ_SET_LINK_AGGR_MODE_OPCODE_SET_LINK_AGGR_MODE
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 modify_mask;
+ #define CMDQ_SET_LINK_AGGR_MODE_MODIFY_MASK_AGGR_EN 0x1UL
+ #define CMDQ_SET_LINK_AGGR_MODE_MODIFY_MASK_ACTIVE_PORT_MAP 0x2UL
+ #define CMDQ_SET_LINK_AGGR_MODE_MODIFY_MASK_MEMBER_PORT_MAP 0x4UL
+ #define CMDQ_SET_LINK_AGGR_MODE_MODIFY_MASK_AGGR_MODE 0x8UL
+ #define CMDQ_SET_LINK_AGGR_MODE_MODIFY_MASK_STAT_CTX_ID 0x10UL
+ u8 aggr_enable;
+ #define CMDQ_SET_LINK_AGGR_MODE_AGGR_ENABLE 0x1UL
+ #define CMDQ_SET_LINK_AGGR_MODE_RSVD1_MASK 0xfeUL
+ #define CMDQ_SET_LINK_AGGR_MODE_RSVD1_SFT 1
+ u8 active_port_map;
+ #define CMDQ_SET_LINK_AGGR_MODE_ACTIVE_PORT_MAP_MASK 0xfUL
+ #define CMDQ_SET_LINK_AGGR_MODE_ACTIVE_PORT_MAP_SFT 0
+ #define CMDQ_SET_LINK_AGGR_MODE_RSVD2_MASK 0xf0UL
+ #define CMDQ_SET_LINK_AGGR_MODE_RSVD2_SFT 4
+ u8 member_port_map;
+ u8 link_aggr_mode;
+ #define CMDQ_SET_LINK_AGGR_MODE_AGGR_MODE_ACTIVE_ACTIVE 0x1UL
+ #define CMDQ_SET_LINK_AGGR_MODE_AGGR_MODE_ACTIVE_BACKUP 0x2UL
+ #define CMDQ_SET_LINK_AGGR_MODE_AGGR_MODE_BALANCE_XOR 0x3UL
+ #define CMDQ_SET_LINK_AGGR_MODE_AGGR_MODE_802_3_AD 0x4UL
+ #define CMDQ_SET_LINK_AGGR_MODE_AGGR_MODE_LAST CMDQ_SET_LINK_AGGR_MODE_AGGR_MODE_802_3_AD
+ __le16 stat_ctx_id[4];
+ __le64 rsvd1;
+};
+
+/* creq_set_link_aggr_mode_resources_resp (size:128b/16B) */
+struct creq_set_link_aggr_mode_resources_resp {
+ u8 type;
+ #define CREQ_SET_LINK_AGGR_MODE_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_SET_LINK_AGGR_MODE_RESP_TYPE_SFT 0
+ #define CREQ_SET_LINK_AGGR_MODE_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_SET_LINK_AGGR_MODE_RESP_TYPE_LAST CREQ_SET_LINK_AGGR_MODE_RESP_TYPE_QP_EVENT
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_SET_LINK_AGGR_MODE_RESP_V 0x1UL
+ u8 event;
+ #define CREQ_SET_LINK_AGGR_MODE_RESP_EVENT_SET_LINK_AGGR_MODE 0x8fUL
+ #define CREQ_SET_LINK_AGGR_MODE_RESP_EVENT_LAST\
+ CREQ_SET_LINK_AGGR_MODE_RESP_EVENT_SET_LINK_AGGR_MODE
+ u8 reserved48[6];
+};
+
+/* creq_func_event (size:128b/16B) */
+struct creq_func_event {
+ u8 type;
+ #define CREQ_FUNC_EVENT_TYPE_MASK 0x3fUL
+ #define CREQ_FUNC_EVENT_TYPE_SFT 0
+ #define CREQ_FUNC_EVENT_TYPE_FUNC_EVENT 0x3aUL
+ #define CREQ_FUNC_EVENT_TYPE_LAST CREQ_FUNC_EVENT_TYPE_FUNC_EVENT
+ u8 reserved56[7];
+ u8 v;
+ #define CREQ_FUNC_EVENT_V 0x1UL
+ u8 event;
+ #define CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR 0x1UL
+ #define CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR 0x2UL
+ #define CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR 0x3UL
+ #define CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR 0x4UL
+ #define CREQ_FUNC_EVENT_EVENT_CQ_ERROR 0x5UL
+ #define CREQ_FUNC_EVENT_EVENT_TQM_ERROR 0x6UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR 0x7UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCS_ERROR 0x8UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCC_ERROR 0x9UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCM_ERROR 0xaUL
+ #define CREQ_FUNC_EVENT_EVENT_TIM_ERROR 0xbUL
+ #define CREQ_FUNC_EVENT_EVENT_VF_COMM_REQUEST 0x80UL
+ #define CREQ_FUNC_EVENT_EVENT_RESOURCE_EXHAUSTED 0x81UL
+ #define CREQ_FUNC_EVENT_EVENT_LAST CREQ_FUNC_EVENT_EVENT_RESOURCE_EXHAUSTED
+ u8 reserved48[6];
+};
+
+/* creq_qp_event (size:128b/16B) */
+struct creq_qp_event {
+ u8 type;
+ #define CREQ_QP_EVENT_TYPE_MASK 0x3fUL
+ #define CREQ_QP_EVENT_TYPE_SFT 0
+ #define CREQ_QP_EVENT_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QP_EVENT_TYPE_LAST CREQ_QP_EVENT_TYPE_QP_EVENT
+ u8 status;
+ #define CREQ_QP_EVENT_STATUS_SUCCESS 0x0UL
+ #define CREQ_QP_EVENT_STATUS_FAIL 0x1UL
+ #define CREQ_QP_EVENT_STATUS_RESOURCES 0x2UL
+ #define CREQ_QP_EVENT_STATUS_INVALID_CMD 0x3UL
+ #define CREQ_QP_EVENT_STATUS_NOT_IMPLEMENTED 0x4UL
+ #define CREQ_QP_EVENT_STATUS_INVALID_PARAMETER 0x5UL
+ #define CREQ_QP_EVENT_STATUS_HARDWARE_ERROR 0x6UL
+ #define CREQ_QP_EVENT_STATUS_INTERNAL_ERROR 0x7UL
+ #define CREQ_QP_EVENT_STATUS_LAST CREQ_QP_EVENT_STATUS_INTERNAL_ERROR
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_QP_EVENT_V 0x1UL
+ u8 event;
+ #define CREQ_QP_EVENT_EVENT_CREATE_QP 0x1UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_QP 0x2UL
+ #define CREQ_QP_EVENT_EVENT_MODIFY_QP 0x3UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_QP 0x4UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_SRQ 0x5UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_SRQ 0x6UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_SRQ 0x8UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_CQ 0x9UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_CQ 0xaUL
+ #define CREQ_QP_EVENT_EVENT_RESIZE_CQ 0xcUL
+ #define CREQ_QP_EVENT_EVENT_ALLOCATE_MRW 0xdUL
+ #define CREQ_QP_EVENT_EVENT_DEALLOCATE_KEY 0xeUL
+ #define CREQ_QP_EVENT_EVENT_REGISTER_MR 0xfUL
+ #define CREQ_QP_EVENT_EVENT_DEREGISTER_MR 0x10UL
+ #define CREQ_QP_EVENT_EVENT_ADD_GID 0x11UL
+ #define CREQ_QP_EVENT_EVENT_DELETE_GID 0x12UL
+ #define CREQ_QP_EVENT_EVENT_MODIFY_GID 0x17UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_GID 0x18UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_QP1 0x13UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_QP1 0x14UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_AH 0x15UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_AH 0x16UL
+ #define CREQ_QP_EVENT_EVENT_INITIALIZE_FW 0x80UL
+ #define CREQ_QP_EVENT_EVENT_DEINITIALIZE_FW 0x81UL
+ #define CREQ_QP_EVENT_EVENT_STOP_FUNC 0x82UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_FUNC 0x83UL
+ #define CREQ_QP_EVENT_EVENT_SET_FUNC_RESOURCES 0x84UL
+ #define CREQ_QP_EVENT_EVENT_READ_CONTEXT 0x85UL
+ #define CREQ_QP_EVENT_EVENT_MAP_TC_TO_COS 0x8aUL
+ #define CREQ_QP_EVENT_EVENT_QUERY_VERSION 0x8bUL
+ #define CREQ_QP_EVENT_EVENT_MODIFY_CC 0x8cUL
+ #define CREQ_QP_EVENT_EVENT_QUERY_CC 0x8dUL
+ #define CREQ_QP_EVENT_EVENT_QUERY_ROCE_STATS 0x8eUL
+ #define CREQ_QP_EVENT_EVENT_SET_LINK_AGGR_MODE 0x8fUL
+ #define CREQ_QP_EVENT_EVENT_QUERY_QP_EXTEND 0x91UL
+ #define CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION 0xc0UL
+ #define CREQ_QP_EVENT_EVENT_CQ_ERROR_NOTIFICATION 0xc1UL
+ #define CREQ_QP_EVENT_EVENT_LAST CREQ_QP_EVENT_EVENT_CQ_ERROR_NOTIFICATION
+ u8 reserved48[6];
+};
+
+/* creq_qp_error_notification (size:128b/16B) */
+struct creq_qp_error_notification {
+ u8 type;
+ #define CREQ_QP_ERROR_NOTIFICATION_TYPE_MASK 0x3fUL
+ #define CREQ_QP_ERROR_NOTIFICATION_TYPE_SFT 0
+ #define CREQ_QP_ERROR_NOTIFICATION_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QP_ERROR_NOTIFICATION_TYPE_LAST CREQ_QP_ERROR_NOTIFICATION_TYPE_QP_EVENT
+ u8 status;
+ u8 req_slow_path_state;
+ u8 req_err_state_reason;
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_NO_ERROR 0X0UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_OPCODE_ERROR 0X1UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TIMEOUT_RETRY_LIMIT 0X2UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RNR_TIMEOUT_RETRY_LIMIT 0X3UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_1 0X4UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_2 0X5UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_3 0X6UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_4 0X7UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_MEMORY_ERROR 0X8UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_MEMORY_ERROR 0X9UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_READ_RESP_LENGTH 0XAUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_READ_RESP 0XBUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_BIND 0XCUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_FAST_REG 0XDUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_INVALIDATE 0XEUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CMP_ERROR 0XFUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETRAN_LOCAL_ERROR 0X10UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_WQE_FORMAT_ERROR 0X11UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ORRQ_FORMAT_ERROR 0X12UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_AVID_ERROR 0X13UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_AV_DOMAIN_ERROR 0X14UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CQ_LOAD_ERROR 0X15UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_SERV_TYPE_ERROR 0X16UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_OP_ERROR 0X17UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_PCI_ERROR 0X18UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_PCI_ERROR 0X19UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PROD_WQE_MSMTCH_ERROR 0X1AUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PSN_RANGE_CHECK_ERROR 0X1BUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETX_SETUP_ERROR 0X1CUL
+ __le32 xid;
+ u8 v;
+ #define CREQ_QP_ERROR_NOTIFICATION_V 0x1UL
+ u8 event;
+ #define CREQ_QP_ERROR_NOTIFICATION_EVENT_QP_ERROR_NOTIFICATION 0xc0UL
+ #define CREQ_QP_ERROR_NOTIFICATION_EVENT_LAST \
+ CREQ_QP_ERROR_NOTIFICATION_EVENT_QP_ERROR_NOTIFICATION
+ u8 res_slow_path_state;
+ u8 res_err_state_reason;
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_NO_ERROR 0x0UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEED_MAX 0x1UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PAYLOAD_LENGTH_MISMATCH 0x2UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEEDS_WQE 0x3UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_OPCODE_ERROR 0x4UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_SEQ_ERROR_RETRY_LIMIT 0x5UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_INVALID_R_KEY 0x6UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_DOMAIN_ERROR 0x7UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_NO_PERMISSION 0x8UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_RANGE_ERROR 0x9UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_INVALID_R_KEY 0xaUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_DOMAIN_ERROR 0xbUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_NO_PERMISSION 0xcUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_RANGE_ERROR 0xdUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_OFLOW 0xeUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNSUPPORTED_OPCODE 0xfUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNALIGN_ATOMIC 0x10UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_REM_INVALIDATE 0x11UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_MEMORY_ERROR 0x12UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_ERROR 0x13UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CMP_ERROR 0x14UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_INVALID_DUP_RKEY 0x15UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_WQE_FORMAT_ERROR 0x16UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_FORMAT_ERROR 0x17UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CQ_LOAD_ERROR 0x18UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_LOAD_ERROR 0x19UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_PCI_ERROR 0x1bUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_PCI_ERROR 0x1cUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_NOT_FOUND 0x1dUL
+ __le16 sq_cons_idx;
+ __le16 rq_cons_idx;
+};
+
+/* creq_cq_error_notification (size:128b/16B) */
+struct creq_cq_error_notification {
+ u8 type;
+ #define CREQ_CQ_ERROR_NOTIFICATION_TYPE_MASK 0x3fUL
+ #define CREQ_CQ_ERROR_NOTIFICATION_TYPE_SFT 0
+ #define CREQ_CQ_ERROR_NOTIFICATION_TYPE_CQ_EVENT 0x38UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_TYPE_LAST CREQ_CQ_ERROR_NOTIFICATION_TYPE_CQ_EVENT
+ u8 status;
+ u8 cq_err_reason;
+ #define CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_INVALID_ERROR 0x1UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_OVERFLOW_ERROR 0x2UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_LOAD_ERROR 0x3UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_INVALID_ERROR 0x4UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_OVERFLOW_ERROR 0x5UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_LOAD_ERROR 0x6UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_LAST \
+ CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_LOAD_ERROR
+ u8 reserved8;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CQ_ERROR_NOTIFICATION_V 0x1UL
+ u8 event;
+ #define CREQ_CQ_ERROR_NOTIFICATION_EVENT_CQ_ERROR_NOTIFICATION 0xc1UL
+ #define CREQ_CQ_ERROR_NOTIFICATION_EVENT_LAST \
+ CREQ_CQ_ERROR_NOTIFICATION_EVENT_CQ_ERROR_NOTIFICATION
+ u8 reserved48[6];
+};
+
+/* sq_base (size:64b/8B) */
+struct sq_base {
+ u8 wqe_type;
+ #define SQ_BASE_WQE_TYPE_SEND 0x0UL
+ #define SQ_BASE_WQE_TYPE_SEND_W_IMMEAD 0x1UL
+ #define SQ_BASE_WQE_TYPE_SEND_W_INVALID 0x2UL
+ #define SQ_BASE_WQE_TYPE_WRITE_WQE 0x4UL
+ #define SQ_BASE_WQE_TYPE_WRITE_W_IMMEAD 0x5UL
+ #define SQ_BASE_WQE_TYPE_READ_WQE 0x6UL
+ #define SQ_BASE_WQE_TYPE_ATOMIC_CS 0x8UL
+ #define SQ_BASE_WQE_TYPE_ATOMIC_FA 0xbUL
+ #define SQ_BASE_WQE_TYPE_LOCAL_INVALID 0xcUL
+ #define SQ_BASE_WQE_TYPE_FR_PMR 0xdUL
+ #define SQ_BASE_WQE_TYPE_BIND 0xeUL
+ #define SQ_BASE_WQE_TYPE_FR_PPMR 0xfUL
+ #define SQ_BASE_WQE_TYPE_LAST SQ_BASE_WQE_TYPE_FR_PPMR
+ u8 unused_0[7];
+};
+
+/* sq_sge (size:128b/16B) */
+struct sq_sge {
+ __le64 va_or_pa;
+ __le32 l_key;
+ __le32 size;
+};
+
+/* sq_psn_search (size:64b/8B) */
+struct sq_psn_search {
+ __le32 opcode_start_psn;
+ #define SQ_PSN_SEARCH_START_PSN_MASK 0xffffffUL
+ #define SQ_PSN_SEARCH_START_PSN_SFT 0
+ #define SQ_PSN_SEARCH_OPCODE_MASK 0xff000000UL
+ #define SQ_PSN_SEARCH_OPCODE_SFT 24
+ __le32 flags_next_psn;
+ #define SQ_PSN_SEARCH_NEXT_PSN_MASK 0xffffffUL
+ #define SQ_PSN_SEARCH_NEXT_PSN_SFT 0
+ #define SQ_PSN_SEARCH_FLAGS_MASK 0xff000000UL
+ #define SQ_PSN_SEARCH_FLAGS_SFT 24
+};
+
+/* sq_psn_search_ext (size:128b/16B) */
+struct sq_psn_search_ext {
+ __le32 opcode_start_psn;
+ #define SQ_PSN_SEARCH_EXT_START_PSN_MASK 0xffffffUL
+ #define SQ_PSN_SEARCH_EXT_START_PSN_SFT 0
+ #define SQ_PSN_SEARCH_EXT_OPCODE_MASK 0xff000000UL
+ #define SQ_PSN_SEARCH_EXT_OPCODE_SFT 24
+ __le32 flags_next_psn;
+ #define SQ_PSN_SEARCH_EXT_NEXT_PSN_MASK 0xffffffUL
+ #define SQ_PSN_SEARCH_EXT_NEXT_PSN_SFT 0
+ #define SQ_PSN_SEARCH_EXT_FLAGS_MASK 0xff000000UL
+ #define SQ_PSN_SEARCH_EXT_FLAGS_SFT 24
+ __le16 start_slot_idx;
+ __le16 reserved16;
+ __le32 reserved32;
+};
+
+/* sq_msn_search (size:64b/8B) */
+struct sq_msn_search {
+ __le64 start_idx_next_psn_start_psn;
+ #define SQ_MSN_SEARCH_START_PSN_MASK 0xffffffUL
+ #define SQ_MSN_SEARCH_START_PSN_SFT 0
+ #define SQ_MSN_SEARCH_NEXT_PSN_MASK 0xffffff000000ULL
+ #define SQ_MSN_SEARCH_NEXT_PSN_SFT 24
+ #define SQ_MSN_SEARCH_START_IDX_MASK 0xffff000000000000ULL
+ #define SQ_MSN_SEARCH_START_IDX_SFT 48
+};
+
+/* sq_send (size:1024b/128B) */
+struct sq_send {
+ u8 wqe_type;
+ #define SQ_SEND_WQE_TYPE_SEND 0x0UL
+ #define SQ_SEND_WQE_TYPE_SEND_W_IMMEAD 0x1UL
+ #define SQ_SEND_WQE_TYPE_SEND_W_INVALID 0x2UL
+ #define SQ_SEND_WQE_TYPE_LAST SQ_SEND_WQE_TYPE_SEND_W_INVALID
+ u8 flags;
+ #define SQ_SEND_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_SEND_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_SEND_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_SEND_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_SEND_FLAGS_UC_FENCE 0x4UL
+ #define SQ_SEND_FLAGS_SE 0x8UL
+ #define SQ_SEND_FLAGS_INLINE 0x10UL
+ #define SQ_SEND_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_SEND_FLAGS_DEBUG_TRACE 0x40UL
+ u8 wqe_size;
+ u8 reserved8_1;
+ __le32 inv_key_or_imm_data;
+ __le32 length;
+ __le32 q_key;
+ __le32 dst_qp;
+ #define SQ_SEND_DST_QP_MASK 0xffffffUL
+ #define SQ_SEND_DST_QP_SFT 0
+ __le32 avid;
+ #define SQ_SEND_AVID_MASK 0xfffffUL
+ #define SQ_SEND_AVID_SFT 0
+ __le32 reserved32;
+ __le32 timestamp;
+ #define SQ_SEND_TIMESTAMP_MASK 0xffffffUL
+ #define SQ_SEND_TIMESTAMP_SFT 0
+ __le32 data[24];
+};
+
+/* sq_send_hdr (size:256b/32B) */
+struct sq_send_hdr {
+ u8 wqe_type;
+ #define SQ_SEND_HDR_WQE_TYPE_SEND 0x0UL
+ #define SQ_SEND_HDR_WQE_TYPE_SEND_W_IMMEAD 0x1UL
+ #define SQ_SEND_HDR_WQE_TYPE_SEND_W_INVALID 0x2UL
+ #define SQ_SEND_HDR_WQE_TYPE_LAST SQ_SEND_HDR_WQE_TYPE_SEND_W_INVALID
+ u8 flags;
+ #define SQ_SEND_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_SEND_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_SEND_HDR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_SEND_HDR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_SEND_HDR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_SEND_HDR_FLAGS_SE 0x8UL
+ #define SQ_SEND_HDR_FLAGS_INLINE 0x10UL
+ #define SQ_SEND_HDR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_SEND_HDR_FLAGS_DEBUG_TRACE 0x40UL
+ u8 wqe_size;
+ u8 reserved8_1;
+ __le32 inv_key_or_imm_data;
+ __le32 length;
+ __le32 q_key;
+ __le32 dst_qp;
+ #define SQ_SEND_HDR_DST_QP_MASK 0xffffffUL
+ #define SQ_SEND_HDR_DST_QP_SFT 0
+ __le32 avid;
+ #define SQ_SEND_HDR_AVID_MASK 0xfffffUL
+ #define SQ_SEND_HDR_AVID_SFT 0
+ __le32 reserved32;
+ __le32 timestamp;
+ #define SQ_SEND_HDR_TIMESTAMP_MASK 0xffffffUL
+ #define SQ_SEND_HDR_TIMESTAMP_SFT 0
+};
+
+/* sq_send_raweth_qp1 (size:1024b/128B) */
+struct sq_send_raweth_qp1 {
+ u8 wqe_type;
+ #define SQ_SEND_RAWETH_QP1_WQE_TYPE_SEND 0x0UL
+ #define SQ_SEND_RAWETH_QP1_WQE_TYPE_LAST SQ_SEND_RAWETH_QP1_WQE_TYPE_SEND
+ u8 flags;
+ #define SQ_SEND_RAWETH_QP1_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK \
+ 0xffUL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT \
+ 0
+ #define SQ_SEND_RAWETH_QP1_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_UC_FENCE 0x4UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_SE 0x8UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_INLINE 0x10UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_DEBUG_TRACE 0x40UL
+ u8 wqe_size;
+ u8 reserved8;
+ __le16 lflags;
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_TCP_UDP_CHKSUM 0x1UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_IP_CHKSUM 0x2UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_NOCRC 0x4UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_STAMP 0x8UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_T_IP_CHKSUM 0x10UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_ROCE_CRC 0x100UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_FCOE_CRC 0x200UL
+ __le16 cfa_action;
+ __le32 length;
+ __le32 reserved32_1;
+ __le32 cfa_meta;
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK 0xfffUL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT 0
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_DE 0x1000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_PRI_MASK 0xe000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_PRI_SFT 13
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_MASK 0x70000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_SFT 16
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID88A8 (0x0UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID8100 (0x1UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID9100 (0x2UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID9200 (0x3UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID9300 (0x4UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPIDCFG (0x5UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_LAST\
+ SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPIDCFG
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_RESERVED_MASK 0xff80000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_RESERVED_SFT 19
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_MASK 0xf0000000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_SFT 28
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_NONE (0x0UL << 28)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_VLAN_TAG (0x1UL << 28)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_LAST SQ_SEND_RAWETH_QP1_CFA_META_KEY_VLAN_TAG
+ __le32 reserved32_2;
+ __le32 reserved32_3;
+ __le32 timestamp;
+ #define SQ_SEND_RAWETH_QP1_TIMESTAMP_MASK 0xffffffUL
+ #define SQ_SEND_RAWETH_QP1_TIMESTAMP_SFT 0
+ __le32 data[24];
+};
+
+/* sq_send_raweth_qp1_hdr (size:256b/32B) */
+struct sq_send_raweth_qp1_hdr {
+ u8 wqe_type;
+ #define SQ_SEND_RAWETH_QP1_HDR_WQE_TYPE_SEND 0x0UL
+ #define SQ_SEND_RAWETH_QP1_HDR_WQE_TYPE_LAST SQ_SEND_RAWETH_QP1_HDR_WQE_TYPE_SEND
+ u8 flags;
+ #define \
+ SQ_SEND_RAWETH_QP1_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT\
+ 0
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_SE 0x8UL
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_INLINE 0x10UL
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_SEND_RAWETH_QP1_HDR_FLAGS_DEBUG_TRACE 0x40UL
+ u8 wqe_size;
+ u8 reserved8;
+ __le16 lflags;
+ #define SQ_SEND_RAWETH_QP1_HDR_LFLAGS_TCP_UDP_CHKSUM 0x1UL
+ #define SQ_SEND_RAWETH_QP1_HDR_LFLAGS_IP_CHKSUM 0x2UL
+ #define SQ_SEND_RAWETH_QP1_HDR_LFLAGS_NOCRC 0x4UL
+ #define SQ_SEND_RAWETH_QP1_HDR_LFLAGS_STAMP 0x8UL
+ #define SQ_SEND_RAWETH_QP1_HDR_LFLAGS_T_IP_CHKSUM 0x10UL
+ #define SQ_SEND_RAWETH_QP1_HDR_LFLAGS_ROCE_CRC 0x100UL
+ #define SQ_SEND_RAWETH_QP1_HDR_LFLAGS_FCOE_CRC 0x200UL
+ __le16 cfa_action;
+ __le32 length;
+ __le32 reserved32_1;
+ __le32 cfa_meta;
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_VID_MASK 0xfffUL
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_VID_SFT 0
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_DE 0x1000UL
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_PRI_MASK 0xe000UL
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_PRI_SFT 13
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_MASK 0x70000UL
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_SFT 16
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_TPID88A8 (0x0UL << 16)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_TPID8100 (0x1UL << 16)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_TPID9100 (0x2UL << 16)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_TPID9200 (0x3UL << 16)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_TPID9300 (0x4UL << 16)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_TPIDCFG (0x5UL << 16)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_LAST\
+ SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_TPID_TPIDCFG
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_RESERVED_MASK 0xff80000UL
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_VLAN_RESERVED_SFT 19
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_KEY_MASK 0xf0000000UL
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_KEY_SFT 28
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_KEY_NONE (0x0UL << 28)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_KEY_VLAN_TAG (0x1UL << 28)
+ #define SQ_SEND_RAWETH_QP1_HDR_CFA_META_KEY_LAST\
+ SQ_SEND_RAWETH_QP1_HDR_CFA_META_KEY_VLAN_TAG
+ __le32 reserved32_2;
+ __le32 reserved32_3;
+ __le32 timestamp;
+ #define SQ_SEND_RAWETH_QP1_HDR_TIMESTAMP_MASK 0xffffffUL
+ #define SQ_SEND_RAWETH_QP1_HDR_TIMESTAMP_SFT 0
+};
+
+/* sq_rdma (size:1024b/128B) */
+struct sq_rdma {
+ u8 wqe_type;
+ #define SQ_RDMA_WQE_TYPE_WRITE_WQE 0x4UL
+ #define SQ_RDMA_WQE_TYPE_WRITE_W_IMMEAD 0x5UL
+ #define SQ_RDMA_WQE_TYPE_READ_WQE 0x6UL
+ #define SQ_RDMA_WQE_TYPE_LAST SQ_RDMA_WQE_TYPE_READ_WQE
+ u8 flags;
+ #define SQ_RDMA_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_RDMA_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_RDMA_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_RDMA_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_RDMA_FLAGS_UC_FENCE 0x4UL
+ #define SQ_RDMA_FLAGS_SE 0x8UL
+ #define SQ_RDMA_FLAGS_INLINE 0x10UL
+ #define SQ_RDMA_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_RDMA_FLAGS_DEBUG_TRACE 0x40UL
+ u8 wqe_size;
+ u8 reserved8;
+ __le32 imm_data;
+ __le32 length;
+ __le32 reserved32_1;
+ __le64 remote_va;
+ __le32 remote_key;
+ __le32 timestamp;
+ #define SQ_RDMA_TIMESTAMP_MASK 0xffffffUL
+ #define SQ_RDMA_TIMESTAMP_SFT 0
+ __le32 data[24];
+};
+
+/* sq_rdma_hdr (size:256b/32B) */
+struct sq_rdma_hdr {
+ u8 wqe_type;
+ #define SQ_RDMA_HDR_WQE_TYPE_WRITE_WQE 0x4UL
+ #define SQ_RDMA_HDR_WQE_TYPE_WRITE_W_IMMEAD 0x5UL
+ #define SQ_RDMA_HDR_WQE_TYPE_READ_WQE 0x6UL
+ #define SQ_RDMA_HDR_WQE_TYPE_LAST SQ_RDMA_HDR_WQE_TYPE_READ_WQE
+ u8 flags;
+ #define SQ_RDMA_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_RDMA_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_RDMA_HDR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_RDMA_HDR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_RDMA_HDR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_RDMA_HDR_FLAGS_SE 0x8UL
+ #define SQ_RDMA_HDR_FLAGS_INLINE 0x10UL
+ #define SQ_RDMA_HDR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_RDMA_HDR_FLAGS_DEBUG_TRACE 0x40UL
+ u8 wqe_size;
+ u8 reserved8;
+ __le32 imm_data;
+ __le32 length;
+ __le32 reserved32_1;
+ __le64 remote_va;
+ __le32 remote_key;
+ __le32 timestamp;
+ #define SQ_RDMA_HDR_TIMESTAMP_MASK 0xffffffUL
+ #define SQ_RDMA_HDR_TIMESTAMP_SFT 0
+};
+
+/* sq_atomic (size:1024b/128B) */
+struct sq_atomic {
+ u8 wqe_type;
+ #define SQ_ATOMIC_WQE_TYPE_ATOMIC_CS 0x8UL
+ #define SQ_ATOMIC_WQE_TYPE_ATOMIC_FA 0xbUL
+ #define SQ_ATOMIC_WQE_TYPE_LAST SQ_ATOMIC_WQE_TYPE_ATOMIC_FA
+ u8 flags;
+ #define SQ_ATOMIC_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_ATOMIC_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_ATOMIC_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_ATOMIC_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_ATOMIC_FLAGS_UC_FENCE 0x4UL
+ #define SQ_ATOMIC_FLAGS_SE 0x8UL
+ #define SQ_ATOMIC_FLAGS_INLINE 0x10UL
+ #define SQ_ATOMIC_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_ATOMIC_FLAGS_DEBUG_TRACE 0x40UL
+ __le16 reserved16;
+ __le32 remote_key;
+ __le64 remote_va;
+ __le64 swap_data;
+ __le64 cmp_data;
+ __le32 data[24];
+};
+
+/* sq_atomic_hdr (size:256b/32B) */
+struct sq_atomic_hdr {
+ u8 wqe_type;
+ #define SQ_ATOMIC_HDR_WQE_TYPE_ATOMIC_CS 0x8UL
+ #define SQ_ATOMIC_HDR_WQE_TYPE_ATOMIC_FA 0xbUL
+ #define SQ_ATOMIC_HDR_WQE_TYPE_LAST SQ_ATOMIC_HDR_WQE_TYPE_ATOMIC_FA
+ u8 flags;
+ #define SQ_ATOMIC_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_ATOMIC_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_ATOMIC_HDR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_ATOMIC_HDR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_ATOMIC_HDR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_ATOMIC_HDR_FLAGS_SE 0x8UL
+ #define SQ_ATOMIC_HDR_FLAGS_INLINE 0x10UL
+ #define SQ_ATOMIC_HDR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_ATOMIC_HDR_FLAGS_DEBUG_TRACE 0x40UL
+ __le16 reserved16;
+ __le32 remote_key;
+ __le64 remote_va;
+ __le64 swap_data;
+ __le64 cmp_data;
+};
+
+/* sq_localinvalidate (size:1024b/128B) */
+struct sq_localinvalidate {
+ u8 wqe_type;
+ #define SQ_LOCALINVALIDATE_WQE_TYPE_LOCAL_INVALID 0xcUL
+ #define SQ_LOCALINVALIDATE_WQE_TYPE_LAST SQ_LOCALINVALIDATE_WQE_TYPE_LOCAL_INVALID
+ u8 flags;
+ #define SQ_LOCALINVALIDATE_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK\
+ 0xffUL
+ #define SQ_LOCALINVALIDATE_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT\
+ 0
+ #define SQ_LOCALINVALIDATE_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_LOCALINVALIDATE_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_LOCALINVALIDATE_FLAGS_UC_FENCE 0x4UL
+ #define SQ_LOCALINVALIDATE_FLAGS_SE 0x8UL
+ #define SQ_LOCALINVALIDATE_FLAGS_INLINE 0x10UL
+ #define SQ_LOCALINVALIDATE_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_LOCALINVALIDATE_FLAGS_DEBUG_TRACE 0x40UL
+ __le16 reserved16;
+ __le32 inv_l_key;
+ __le64 reserved64;
+ u8 reserved128[16];
+ __le32 data[24];
+};
+
+/* sq_localinvalidate_hdr (size:256b/32B) */
+struct sq_localinvalidate_hdr {
+ u8 wqe_type;
+ #define SQ_LOCALINVALIDATE_HDR_WQE_TYPE_LOCAL_INVALID 0xcUL
+ #define SQ_LOCALINVALIDATE_HDR_WQE_TYPE_LAST SQ_LOCALINVALIDATE_HDR_WQE_TYPE_LOCAL_INVALID
+ u8 flags;
+ #define \
+ SQ_LOCALINVALIDATE_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT\
+ 0
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_SE 0x8UL
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_INLINE 0x10UL
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_LOCALINVALIDATE_HDR_FLAGS_DEBUG_TRACE 0x40UL
+ __le16 reserved16;
+ __le32 inv_l_key;
+ __le64 reserved64;
+ u8 reserved128[16];
+};
+
+/* sq_fr_pmr (size:1024b/128B) */
+struct sq_fr_pmr {
+ u8 wqe_type;
+ #define SQ_FR_PMR_WQE_TYPE_FR_PMR 0xdUL
+ #define SQ_FR_PMR_WQE_TYPE_LAST SQ_FR_PMR_WQE_TYPE_FR_PMR
+ u8 flags;
+ #define SQ_FR_PMR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_FR_PMR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_FR_PMR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_FR_PMR_FLAGS_SE 0x8UL
+ #define SQ_FR_PMR_FLAGS_INLINE 0x10UL
+ #define SQ_FR_PMR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_FR_PMR_FLAGS_DEBUG_TRACE 0x40UL
+ u8 access_cntl;
+ #define SQ_FR_PMR_ACCESS_CNTL_LOCAL_WRITE 0x1UL
+ #define SQ_FR_PMR_ACCESS_CNTL_REMOTE_READ 0x2UL
+ #define SQ_FR_PMR_ACCESS_CNTL_REMOTE_WRITE 0x4UL
+ #define SQ_FR_PMR_ACCESS_CNTL_REMOTE_ATOMIC 0x8UL
+ #define SQ_FR_PMR_ACCESS_CNTL_WINDOW_BIND 0x10UL
+ u8 zero_based_page_size_log;
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_MASK 0x1fUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_SFT 0
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_4K 0x0UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_8K 0x1UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_16K 0x2UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_32K 0x3UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_64K 0x4UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_128K 0x5UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_256K 0x6UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_512K 0x7UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_1M 0x8UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_2M 0x9UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_4M 0xaUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_8M 0xbUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_16M 0xcUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_32M 0xdUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_64M 0xeUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_128M 0xfUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_256M 0x10UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_512M 0x11UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_1G 0x12UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_2G 0x13UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_4G 0x14UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_8G 0x15UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_16G 0x16UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_32G 0x17UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_64G 0x18UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_128G 0x19UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_256G 0x1aUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_512G 0x1bUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_1T 0x1cUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_2T 0x1dUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_4T 0x1eUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_8T 0x1fUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_LAST SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_8T
+ #define SQ_FR_PMR_ZERO_BASED 0x20UL
+ __le32 l_key;
+ u8 length[5];
+ u8 reserved8_1;
+ u8 reserved8_2;
+ u8 numlevels_pbl_page_size_log;
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_MASK 0x1fUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_SFT 0
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_4K 0x0UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_8K 0x1UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_16K 0x2UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_32K 0x3UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_64K 0x4UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_128K 0x5UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_256K 0x6UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_512K 0x7UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_1M 0x8UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_2M 0x9UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_4M 0xaUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_8M 0xbUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_16M 0xcUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_32M 0xdUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_64M 0xeUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_128M 0xfUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_256M 0x10UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_512M 0x11UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_1G 0x12UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_2G 0x13UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_4G 0x14UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_8G 0x15UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_16G 0x16UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_32G 0x17UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_64G 0x18UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_128G 0x19UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_256G 0x1aUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_512G 0x1bUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_1T 0x1cUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_2T 0x1dUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_4T 0x1eUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_8T 0x1fUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_LAST SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_8T
+ #define SQ_FR_PMR_NUMLEVELS_MASK 0xc0UL
+ #define SQ_FR_PMR_NUMLEVELS_SFT 6
+ #define SQ_FR_PMR_NUMLEVELS_PHYSICAL (0x0UL << 6)
+ #define SQ_FR_PMR_NUMLEVELS_LAYER1 (0x1UL << 6)
+ #define SQ_FR_PMR_NUMLEVELS_LAYER2 (0x2UL << 6)
+ #define SQ_FR_PMR_NUMLEVELS_LAST SQ_FR_PMR_NUMLEVELS_LAYER2
+ __le64 pblptr;
+ __le64 va;
+ __le32 data[24];
+};
+
+/* sq_fr_pmr_hdr (size:256b/32B) */
+struct sq_fr_pmr_hdr {
+ u8 wqe_type;
+ #define SQ_FR_PMR_HDR_WQE_TYPE_FR_PMR 0xdUL
+ #define SQ_FR_PMR_HDR_WQE_TYPE_LAST SQ_FR_PMR_HDR_WQE_TYPE_FR_PMR
+ u8 flags;
+ #define SQ_FR_PMR_HDR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_FR_PMR_HDR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_FR_PMR_HDR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_FR_PMR_HDR_FLAGS_SE 0x8UL
+ #define SQ_FR_PMR_HDR_FLAGS_INLINE 0x10UL
+ #define SQ_FR_PMR_HDR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_FR_PMR_HDR_FLAGS_DEBUG_TRACE 0x40UL
+ u8 access_cntl;
+ #define SQ_FR_PMR_HDR_ACCESS_CNTL_LOCAL_WRITE 0x1UL
+ #define SQ_FR_PMR_HDR_ACCESS_CNTL_REMOTE_READ 0x2UL
+ #define SQ_FR_PMR_HDR_ACCESS_CNTL_REMOTE_WRITE 0x4UL
+ #define SQ_FR_PMR_HDR_ACCESS_CNTL_REMOTE_ATOMIC 0x8UL
+ #define SQ_FR_PMR_HDR_ACCESS_CNTL_WINDOW_BIND 0x10UL
+ u8 zero_based_page_size_log;
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_MASK 0x1fUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_SFT 0
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_4K 0x0UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_8K 0x1UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_16K 0x2UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_32K 0x3UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_64K 0x4UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_128K 0x5UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_256K 0x6UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_512K 0x7UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_1M 0x8UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_2M 0x9UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_4M 0xaUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_8M 0xbUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_16M 0xcUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_32M 0xdUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_64M 0xeUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_128M 0xfUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_256M 0x10UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_512M 0x11UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_1G 0x12UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_2G 0x13UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_4G 0x14UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_8G 0x15UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_16G 0x16UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_32G 0x17UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_64G 0x18UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_128G 0x19UL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_256G 0x1aUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_512G 0x1bUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_1T 0x1cUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_2T 0x1dUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_4T 0x1eUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_8T 0x1fUL
+ #define SQ_FR_PMR_HDR_PAGE_SIZE_LOG_LAST SQ_FR_PMR_HDR_PAGE_SIZE_LOG_PGSZ_8T
+ #define SQ_FR_PMR_HDR_ZERO_BASED 0x20UL
+ __le32 l_key;
+ u8 length[5];
+ u8 reserved8_1;
+ u8 reserved8_2;
+ u8 numlevels_pbl_page_size_log;
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_MASK 0x1fUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_SFT 0
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_4K 0x0UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_8K 0x1UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_16K 0x2UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_32K 0x3UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_64K 0x4UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_128K 0x5UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_256K 0x6UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_512K 0x7UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_1M 0x8UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_2M 0x9UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_4M 0xaUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_8M 0xbUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_16M 0xcUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_32M 0xdUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_64M 0xeUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_128M 0xfUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_256M 0x10UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_512M 0x11UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_1G 0x12UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_2G 0x13UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_4G 0x14UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_8G 0x15UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_16G 0x16UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_32G 0x17UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_64G 0x18UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_128G 0x19UL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_256G 0x1aUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_512G 0x1bUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_1T 0x1cUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_2T 0x1dUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_4T 0x1eUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_8T 0x1fUL
+ #define SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_LAST SQ_FR_PMR_HDR_PBL_PAGE_SIZE_LOG_PGSZ_8T
+ #define SQ_FR_PMR_HDR_NUMLEVELS_MASK 0xc0UL
+ #define SQ_FR_PMR_HDR_NUMLEVELS_SFT 6
+ #define SQ_FR_PMR_HDR_NUMLEVELS_PHYSICAL (0x0UL << 6)
+ #define SQ_FR_PMR_HDR_NUMLEVELS_LAYER1 (0x1UL << 6)
+ #define SQ_FR_PMR_HDR_NUMLEVELS_LAYER2 (0x2UL << 6)
+ #define SQ_FR_PMR_HDR_NUMLEVELS_LAST SQ_FR_PMR_HDR_NUMLEVELS_LAYER2
+ __le64 pblptr;
+ __le64 va;
+};
+
+/* sq_bind (size:1024b/128B) */
+struct sq_bind {
+ u8 wqe_type;
+ #define SQ_BIND_WQE_TYPE_BIND 0xeUL
+ #define SQ_BIND_WQE_TYPE_LAST SQ_BIND_WQE_TYPE_BIND
+ u8 flags;
+ #define SQ_BIND_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_BIND_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_BIND_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_BIND_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_BIND_FLAGS_UC_FENCE 0x4UL
+ #define SQ_BIND_FLAGS_SE 0x8UL
+ #define SQ_BIND_FLAGS_INLINE 0x10UL
+ #define SQ_BIND_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_BIND_FLAGS_DEBUG_TRACE 0x40UL
+ u8 access_cntl;
+ #define \
+ SQ_BIND_ACCESS_CNTL_WINDOW_BIND_REMOTE_ATOMIC_REMOTE_WRITE_REMOTE_READ_LOCAL_WRITE_MASK\
+ 0xffUL
+ #define \
+ SQ_BIND_ACCESS_CNTL_WINDOW_BIND_REMOTE_ATOMIC_REMOTE_WRITE_REMOTE_READ_LOCAL_WRITE_SFT 0
+ #define SQ_BIND_ACCESS_CNTL_LOCAL_WRITE 0x1UL
+ #define SQ_BIND_ACCESS_CNTL_REMOTE_READ 0x2UL
+ #define SQ_BIND_ACCESS_CNTL_REMOTE_WRITE 0x4UL
+ #define SQ_BIND_ACCESS_CNTL_REMOTE_ATOMIC 0x8UL
+ #define SQ_BIND_ACCESS_CNTL_WINDOW_BIND 0x10UL
+ u8 reserved8_1;
+ u8 mw_type_zero_based;
+ #define SQ_BIND_ZERO_BASED 0x1UL
+ #define SQ_BIND_MW_TYPE 0x2UL
+ #define SQ_BIND_MW_TYPE_TYPE1 (0x0UL << 1)
+ #define SQ_BIND_MW_TYPE_TYPE2 (0x1UL << 1)
+ #define SQ_BIND_MW_TYPE_LAST SQ_BIND_MW_TYPE_TYPE2
+ u8 reserved8_2;
+ __le16 reserved16;
+ __le32 parent_l_key;
+ __le32 l_key;
+ __le64 va;
+ u8 length[5];
+ u8 reserved24[3];
+ __le32 data[24];
+};
+
+/* sq_bind_hdr (size:256b/32B) */
+struct sq_bind_hdr {
+ u8 wqe_type;
+ #define SQ_BIND_HDR_WQE_TYPE_BIND 0xeUL
+ #define SQ_BIND_HDR_WQE_TYPE_LAST SQ_BIND_HDR_WQE_TYPE_BIND
+ u8 flags;
+ #define SQ_BIND_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_MASK 0xffUL
+ #define SQ_BIND_HDR_FLAGS_INLINE_SE_UC_FENCE_RD_OR_ATOMIC_FENCE_SIGNAL_COMP_SFT 0
+ #define SQ_BIND_HDR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_BIND_HDR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_BIND_HDR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_BIND_HDR_FLAGS_SE 0x8UL
+ #define SQ_BIND_HDR_FLAGS_INLINE 0x10UL
+ #define SQ_BIND_HDR_FLAGS_WQE_TS_EN 0x20UL
+ #define SQ_BIND_HDR_FLAGS_DEBUG_TRACE 0x40UL
+ u8 access_cntl;
+ #define \
+ SQ_BIND_HDR_ACCESS_CNTL_WINDOW_BIND_REMOTE_ATOMIC_REMOTE_WRITE_REMOTE_READ_LOCAL_WRITE_MASK\
+ 0xffUL
+ #define \
+ SQ_BIND_HDR_ACCESS_CNTL_WINDOW_BIND_REMOTE_ATOMIC_REMOTE_WRITE_REMOTE_READ_LOCAL_WRITE_SFT \
+ 0
+ #define SQ_BIND_HDR_ACCESS_CNTL_LOCAL_WRITE 0x1UL
+ #define SQ_BIND_HDR_ACCESS_CNTL_REMOTE_READ 0x2UL
+ #define SQ_BIND_HDR_ACCESS_CNTL_REMOTE_WRITE 0x4UL
+ #define SQ_BIND_HDR_ACCESS_CNTL_REMOTE_ATOMIC 0x8UL
+ #define SQ_BIND_HDR_ACCESS_CNTL_WINDOW_BIND 0x10UL
+ u8 reserved8_1;
+ u8 mw_type_zero_based;
+ #define SQ_BIND_HDR_ZERO_BASED 0x1UL
+ #define SQ_BIND_HDR_MW_TYPE 0x2UL
+ #define SQ_BIND_HDR_MW_TYPE_TYPE1 (0x0UL << 1)
+ #define SQ_BIND_HDR_MW_TYPE_TYPE2 (0x1UL << 1)
+ #define SQ_BIND_HDR_MW_TYPE_LAST SQ_BIND_HDR_MW_TYPE_TYPE2
+ u8 reserved8_2;
+ __le16 reserved16;
+ __le32 parent_l_key;
+ __le32 l_key;
+ __le64 va;
+ u8 length[5];
+ u8 reserved24[3];
+};
+
+/* rq_wqe (size:1024b/128B) */
+struct rq_wqe {
+ u8 wqe_type;
+ #define RQ_WQE_WQE_TYPE_RCV 0x80UL
+ #define RQ_WQE_WQE_TYPE_LAST RQ_WQE_WQE_TYPE_RCV
+ u8 flags;
+ u8 wqe_size;
+ u8 reserved8;
+ __le32 reserved32;
+ __le32 wr_id[2];
+ #define RQ_WQE_WR_ID_MASK 0xfffffUL
+ #define RQ_WQE_WR_ID_SFT 0
+ u8 reserved128[16];
+ __le32 data[24];
+};
+
+/* rq_wqe_hdr (size:256b/32B) */
+struct rq_wqe_hdr {
+ u8 wqe_type;
+ #define RQ_WQE_HDR_WQE_TYPE_RCV 0x80UL
+ #define RQ_WQE_HDR_WQE_TYPE_LAST RQ_WQE_HDR_WQE_TYPE_RCV
+ u8 flags;
+ u8 wqe_size;
+ u8 reserved8;
+ __le32 reserved32;
+ __le32 wr_id[2];
+ #define RQ_WQE_HDR_WR_ID_MASK 0xfffffUL
+ #define RQ_WQE_HDR_WR_ID_SFT 0
+ u8 reserved128[16];
+};
+
+/* cq_base (size:256b/32B) */
+struct cq_base {
+ __le64 reserved64_1;
+ __le64 reserved64_2;
+ __le64 reserved64_3;
+ u8 cqe_type_toggle;
+ #define CQ_BASE_TOGGLE 0x1UL
+ #define CQ_BASE_CQE_TYPE_MASK 0x1eUL
+ #define CQ_BASE_CQE_TYPE_SFT 1
+ #define CQ_BASE_CQE_TYPE_REQ (0x0UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_RC (0x1UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_UD (0x2UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_RAWETH_QP1 (0x3UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_UD_CFA (0x4UL << 1)
+ #define CQ_BASE_CQE_TYPE_REQ_V3 (0x8UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_RC_V3 (0x9UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_UD_V3 (0xaUL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_RAWETH_QP1_V3 (0xbUL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_UD_CFA_V3 (0xcUL << 1)
+ #define CQ_BASE_CQE_TYPE_NO_OP (0xdUL << 1)
+ #define CQ_BASE_CQE_TYPE_TERMINAL (0xeUL << 1)
+ #define CQ_BASE_CQE_TYPE_CUT_OFF (0xfUL << 1)
+ #define CQ_BASE_CQE_TYPE_LAST CQ_BASE_CQE_TYPE_CUT_OFF
+ u8 status;
+ #define CQ_BASE_STATUS_OK 0x0UL
+ #define CQ_BASE_STATUS_BAD_RESPONSE_ERR 0x1UL
+ #define CQ_BASE_STATUS_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_BASE_STATUS_HW_LOCAL_LENGTH_ERR 0x3UL
+ #define CQ_BASE_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_BASE_STATUS_LOCAL_PROTECTION_ERR 0x5UL
+ #define CQ_BASE_STATUS_LOCAL_ACCESS_ERROR 0x6UL
+ #define CQ_BASE_STATUS_MEMORY_MGT_OPERATION_ERR 0x7UL
+ #define CQ_BASE_STATUS_REMOTE_INVALID_REQUEST_ERR 0x8UL
+ #define CQ_BASE_STATUS_REMOTE_ACCESS_ERR 0x9UL
+ #define CQ_BASE_STATUS_REMOTE_OPERATION_ERR 0xaUL
+ #define CQ_BASE_STATUS_RNR_NAK_RETRY_CNT_ERR 0xbUL
+ #define CQ_BASE_STATUS_TRANSPORT_RETRY_CNT_ERR 0xcUL
+ #define CQ_BASE_STATUS_WORK_REQUEST_FLUSHED_ERR 0xdUL
+ #define CQ_BASE_STATUS_HW_FLUSH_ERR 0xeUL
+ #define CQ_BASE_STATUS_OVERFLOW_ERR 0xfUL
+ #define CQ_BASE_STATUS_LAST CQ_BASE_STATUS_OVERFLOW_ERR
+ __le16 reserved16;
+ __le32 opaque;
+};
+
+/* cq_req (size:256b/32B) */
+struct cq_req {
+ __le64 qp_handle;
+ __le16 sq_cons_idx;
+ __le16 reserved16_1;
+ __le32 reserved32_2;
+ __le64 reserved64;
+ u8 cqe_type_toggle;
+ #define CQ_REQ_TOGGLE 0x1UL
+ #define CQ_REQ_CQE_TYPE_MASK 0x1eUL
+ #define CQ_REQ_CQE_TYPE_SFT 1
+ #define CQ_REQ_CQE_TYPE_REQ (0x0UL << 1)
+ #define CQ_REQ_CQE_TYPE_LAST CQ_REQ_CQE_TYPE_REQ
+ #define CQ_REQ_PUSH 0x20UL
+ u8 status;
+ #define CQ_REQ_STATUS_OK 0x0UL
+ #define CQ_REQ_STATUS_BAD_RESPONSE_ERR 0x1UL
+ #define CQ_REQ_STATUS_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_REQ_STATUS_LOCAL_QP_OPERATION_ERR 0x3UL
+ #define CQ_REQ_STATUS_LOCAL_PROTECTION_ERR 0x4UL
+ #define CQ_REQ_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_REQ_STATUS_REMOTE_INVALID_REQUEST_ERR 0x6UL
+ #define CQ_REQ_STATUS_REMOTE_ACCESS_ERR 0x7UL
+ #define CQ_REQ_STATUS_REMOTE_OPERATION_ERR 0x8UL
+ #define CQ_REQ_STATUS_RNR_NAK_RETRY_CNT_ERR 0x9UL
+ #define CQ_REQ_STATUS_TRANSPORT_RETRY_CNT_ERR 0xaUL
+ #define CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR 0xbUL
+ #define CQ_REQ_STATUS_LAST CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR
+ __le16 reserved16_2;
+ __le32 reserved32_1;
+};
+
+/* cq_res_rc (size:256b/32B) */
+struct cq_res_rc {
+ __le32 length;
+ __le32 imm_data_or_inv_r_key;
+ __le64 qp_handle;
+ __le64 mr_handle;
+ u8 cqe_type_toggle;
+ #define CQ_RES_RC_TOGGLE 0x1UL
+ #define CQ_RES_RC_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_RC_CQE_TYPE_SFT 1
+ #define CQ_RES_RC_CQE_TYPE_RES_RC (0x1UL << 1)
+ #define CQ_RES_RC_CQE_TYPE_LAST CQ_RES_RC_CQE_TYPE_RES_RC
+ u8 status;
+ #define CQ_RES_RC_STATUS_OK 0x0UL
+ #define CQ_RES_RC_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_RC_STATUS_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_RC_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_RC_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_RC_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_RC_STATUS_REMOTE_INVALID_REQUEST_ERR 0x6UL
+ #define CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_RC_STATUS_HW_FLUSH_ERR 0x8UL
+ #define CQ_RES_RC_STATUS_LAST CQ_RES_RC_STATUS_HW_FLUSH_ERR
+ __le16 flags;
+ #define CQ_RES_RC_FLAGS_SRQ 0x1UL
+ #define CQ_RES_RC_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_RC_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_RC_FLAGS_SRQ_LAST CQ_RES_RC_FLAGS_SRQ_SRQ
+ #define CQ_RES_RC_FLAGS_IMM 0x2UL
+ #define CQ_RES_RC_FLAGS_INV 0x4UL
+ #define CQ_RES_RC_FLAGS_RDMA 0x8UL
+ #define CQ_RES_RC_FLAGS_RDMA_SEND (0x0UL << 3)
+ #define CQ_RES_RC_FLAGS_RDMA_RDMA_WRITE (0x1UL << 3)
+ #define CQ_RES_RC_FLAGS_RDMA_LAST CQ_RES_RC_FLAGS_RDMA_RDMA_WRITE
+ __le32 srq_or_rq_wr_id;
+ #define CQ_RES_RC_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_RC_SRQ_OR_RQ_WR_ID_SFT 0
+};
+
+/* cq_res_ud (size:256b/32B) */
+struct cq_res_ud {
+ __le16 length;
+ #define CQ_RES_UD_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_UD_LENGTH_SFT 0
+ __le16 cfa_metadata;
+ #define CQ_RES_UD_CFA_METADATA_VID_MASK 0xfffUL
+ #define CQ_RES_UD_CFA_METADATA_VID_SFT 0
+ #define CQ_RES_UD_CFA_METADATA_DE 0x1000UL
+ #define CQ_RES_UD_CFA_METADATA_PRI_MASK 0xe000UL
+ #define CQ_RES_UD_CFA_METADATA_PRI_SFT 13
+ __le32 imm_data;
+ __le64 qp_handle;
+ __le16 src_mac[3];
+ __le16 src_qp_low;
+ u8 cqe_type_toggle;
+ #define CQ_RES_UD_TOGGLE 0x1UL
+ #define CQ_RES_UD_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_UD_CQE_TYPE_SFT 1
+ #define CQ_RES_UD_CQE_TYPE_RES_UD (0x2UL << 1)
+ #define CQ_RES_UD_CQE_TYPE_LAST CQ_RES_UD_CQE_TYPE_RES_UD
+ u8 status;
+ #define CQ_RES_UD_STATUS_OK 0x0UL
+ #define CQ_RES_UD_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_UD_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_UD_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_UD_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_UD_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_UD_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_UD_STATUS_HW_FLUSH_ERR 0x8UL
+ #define CQ_RES_UD_STATUS_LAST CQ_RES_UD_STATUS_HW_FLUSH_ERR
+ __le16 flags;
+ #define CQ_RES_UD_FLAGS_SRQ 0x1UL
+ #define CQ_RES_UD_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_UD_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_UD_FLAGS_SRQ_LAST CQ_RES_UD_FLAGS_SRQ_SRQ
+ #define CQ_RES_UD_FLAGS_IMM 0x2UL
+ #define CQ_RES_UD_FLAGS_UNUSED_MASK 0xcUL
+ #define CQ_RES_UD_FLAGS_UNUSED_SFT 2
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK 0x30UL
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT 4
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V1 (0x0UL << 4)
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 4)
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 4)
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_LAST CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6
+ #define CQ_RES_UD_FLAGS_META_FORMAT_MASK 0x3c0UL
+ #define CQ_RES_UD_FLAGS_META_FORMAT_SFT 6
+ #define CQ_RES_UD_FLAGS_META_FORMAT_NONE (0x0UL << 6)
+ #define CQ_RES_UD_FLAGS_META_FORMAT_VLAN (0x1UL << 6)
+ #define CQ_RES_UD_FLAGS_META_FORMAT_TUNNEL_ID (0x2UL << 6)
+ #define CQ_RES_UD_FLAGS_META_FORMAT_CHDR_DATA (0x3UL << 6)
+ #define CQ_RES_UD_FLAGS_META_FORMAT_HDR_OFFSET (0x4UL << 6)
+ #define CQ_RES_UD_FLAGS_META_FORMAT_LAST CQ_RES_UD_FLAGS_META_FORMAT_HDR_OFFSET
+ #define CQ_RES_UD_FLAGS_EXT_META_FORMAT_MASK 0xc00UL
+ #define CQ_RES_UD_FLAGS_EXT_META_FORMAT_SFT 10
+ __le32 src_qp_high_srq_or_rq_wr_id;
+ #define CQ_RES_UD_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_UD_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_UD_SRC_QP_HIGH_MASK 0xff000000UL
+ #define CQ_RES_UD_SRC_QP_HIGH_SFT 24
+};
+
+/* cq_res_ud_v2 (size:256b/32B) */
+struct cq_res_ud_v2 {
+ __le16 length;
+ #define CQ_RES_UD_V2_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_UD_V2_LENGTH_SFT 0
+ __le16 cfa_metadata0;
+ #define CQ_RES_UD_V2_CFA_METADATA0_VID_MASK 0xfffUL
+ #define CQ_RES_UD_V2_CFA_METADATA0_VID_SFT 0
+ #define CQ_RES_UD_V2_CFA_METADATA0_DE 0x1000UL
+ #define CQ_RES_UD_V2_CFA_METADATA0_PRI_MASK 0xe000UL
+ #define CQ_RES_UD_V2_CFA_METADATA0_PRI_SFT 13
+ __le32 imm_data;
+ __le64 qp_handle;
+ __le16 src_mac[3];
+ __le16 src_qp_low;
+ u8 cqe_type_toggle;
+ #define CQ_RES_UD_V2_TOGGLE 0x1UL
+ #define CQ_RES_UD_V2_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_UD_V2_CQE_TYPE_SFT 1
+ #define CQ_RES_UD_V2_CQE_TYPE_RES_UD (0x2UL << 1)
+ #define CQ_RES_UD_V2_CQE_TYPE_LAST CQ_RES_UD_V2_CQE_TYPE_RES_UD
+ u8 status;
+ #define CQ_RES_UD_V2_STATUS_OK 0x0UL
+ #define CQ_RES_UD_V2_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_UD_V2_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_UD_V2_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_UD_V2_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_UD_V2_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_UD_V2_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_UD_V2_STATUS_HW_FLUSH_ERR 0x8UL
+ #define CQ_RES_UD_V2_STATUS_LAST CQ_RES_UD_V2_STATUS_HW_FLUSH_ERR
+ __le16 flags;
+ #define CQ_RES_UD_V2_FLAGS_SRQ 0x1UL
+ #define CQ_RES_UD_V2_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_UD_V2_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_UD_V2_FLAGS_SRQ_LAST CQ_RES_UD_V2_FLAGS_SRQ_SRQ
+ #define CQ_RES_UD_V2_FLAGS_IMM 0x2UL
+ #define CQ_RES_UD_V2_FLAGS_UNUSED_MASK 0xcUL
+ #define CQ_RES_UD_V2_FLAGS_UNUSED_SFT 2
+ #define CQ_RES_UD_V2_FLAGS_ROCE_IP_VER_MASK 0x30UL
+ #define CQ_RES_UD_V2_FLAGS_ROCE_IP_VER_SFT 4
+ #define CQ_RES_UD_V2_FLAGS_ROCE_IP_VER_V1 (0x0UL << 4)
+ #define CQ_RES_UD_V2_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 4)
+ #define CQ_RES_UD_V2_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 4)
+ #define CQ_RES_UD_V2_FLAGS_ROCE_IP_VER_LAST CQ_RES_UD_V2_FLAGS_ROCE_IP_VER_V2IPV6
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_MASK 0x3c0UL
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_SFT 6
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_NONE (0x0UL << 6)
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_ACT_REC_PTR (0x1UL << 6)
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_TUNNEL_ID (0x2UL << 6)
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_CHDR_DATA (0x3UL << 6)
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_HDR_OFFSET (0x4UL << 6)
+ #define CQ_RES_UD_V2_FLAGS_META_FORMAT_LAST CQ_RES_UD_V2_FLAGS_META_FORMAT_HDR_OFFSET
+ __le32 src_qp_high_srq_or_rq_wr_id;
+ #define CQ_RES_UD_V2_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_UD_V2_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_UD_V2_CFA_METADATA1_MASK 0xf00000UL
+ #define CQ_RES_UD_V2_CFA_METADATA1_SFT 20
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_MASK 0x700000UL
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_SFT 20
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_TPID88A8 (0x0UL << 20)
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_TPID8100 (0x1UL << 20)
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_TPID9100 (0x2UL << 20)
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_TPID9200 (0x3UL << 20)
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_TPID9300 (0x4UL << 20)
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_TPIDCFG (0x5UL << 20)
+ #define CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_LAST CQ_RES_UD_V2_CFA_METADATA1_TPID_SEL_TPIDCFG
+ #define CQ_RES_UD_V2_CFA_METADATA1_VALID 0x800000UL
+ #define CQ_RES_UD_V2_SRC_QP_HIGH_MASK 0xff000000UL
+ #define CQ_RES_UD_V2_SRC_QP_HIGH_SFT 24
+};
+
+/* cq_res_ud_cfa (size:256b/32B) */
+struct cq_res_ud_cfa {
+ __le16 length;
+ #define CQ_RES_UD_CFA_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_UD_CFA_LENGTH_SFT 0
+ __le16 cfa_code;
+ __le32 imm_data;
+ __le32 qid;
+ #define CQ_RES_UD_CFA_QID_MASK 0xfffffUL
+ #define CQ_RES_UD_CFA_QID_SFT 0
+ __le32 cfa_metadata;
+ #define CQ_RES_UD_CFA_CFA_METADATA_VID_MASK 0xfffUL
+ #define CQ_RES_UD_CFA_CFA_METADATA_VID_SFT 0
+ #define CQ_RES_UD_CFA_CFA_METADATA_DE 0x1000UL
+ #define CQ_RES_UD_CFA_CFA_METADATA_PRI_MASK 0xe000UL
+ #define CQ_RES_UD_CFA_CFA_METADATA_PRI_SFT 13
+ #define CQ_RES_UD_CFA_CFA_METADATA_TPID_MASK 0xffff0000UL
+ #define CQ_RES_UD_CFA_CFA_METADATA_TPID_SFT 16
+ __le16 src_mac[3];
+ __le16 src_qp_low;
+ u8 cqe_type_toggle;
+ #define CQ_RES_UD_CFA_TOGGLE 0x1UL
+ #define CQ_RES_UD_CFA_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_UD_CFA_CQE_TYPE_SFT 1
+ #define CQ_RES_UD_CFA_CQE_TYPE_RES_UD_CFA (0x4UL << 1)
+ #define CQ_RES_UD_CFA_CQE_TYPE_LAST CQ_RES_UD_CFA_CQE_TYPE_RES_UD_CFA
+ u8 status;
+ #define CQ_RES_UD_CFA_STATUS_OK 0x0UL
+ #define CQ_RES_UD_CFA_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_UD_CFA_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_UD_CFA_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_UD_CFA_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_UD_CFA_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_UD_CFA_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_UD_CFA_STATUS_HW_FLUSH_ERR 0x8UL
+ #define CQ_RES_UD_CFA_STATUS_LAST CQ_RES_UD_CFA_STATUS_HW_FLUSH_ERR
+ __le16 flags;
+ #define CQ_RES_UD_CFA_FLAGS_SRQ 0x1UL
+ #define CQ_RES_UD_CFA_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_UD_CFA_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_UD_CFA_FLAGS_SRQ_LAST CQ_RES_UD_CFA_FLAGS_SRQ_SRQ
+ #define CQ_RES_UD_CFA_FLAGS_IMM 0x2UL
+ #define CQ_RES_UD_CFA_FLAGS_UNUSED_MASK 0xcUL
+ #define CQ_RES_UD_CFA_FLAGS_UNUSED_SFT 2
+ #define CQ_RES_UD_CFA_FLAGS_ROCE_IP_VER_MASK 0x30UL
+ #define CQ_RES_UD_CFA_FLAGS_ROCE_IP_VER_SFT 4
+ #define CQ_RES_UD_CFA_FLAGS_ROCE_IP_VER_V1 (0x0UL << 4)
+ #define CQ_RES_UD_CFA_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 4)
+ #define CQ_RES_UD_CFA_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 4)
+ #define CQ_RES_UD_CFA_FLAGS_ROCE_IP_VER_LAST CQ_RES_UD_CFA_FLAGS_ROCE_IP_VER_V2IPV6
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_MASK 0x3c0UL
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_SFT 6
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_NONE (0x0UL << 6)
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_VLAN (0x1UL << 6)
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_TUNNEL_ID (0x2UL << 6)
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_CHDR_DATA (0x3UL << 6)
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_HDR_OFFSET (0x4UL << 6)
+ #define CQ_RES_UD_CFA_FLAGS_META_FORMAT_LAST CQ_RES_UD_CFA_FLAGS_META_FORMAT_HDR_OFFSET
+ #define CQ_RES_UD_CFA_FLAGS_EXT_META_FORMAT_MASK 0xc00UL
+ #define CQ_RES_UD_CFA_FLAGS_EXT_META_FORMAT_SFT 10
+ __le32 src_qp_high_srq_or_rq_wr_id;
+ #define CQ_RES_UD_CFA_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_UD_CFA_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_UD_CFA_SRC_QP_HIGH_MASK 0xff000000UL
+ #define CQ_RES_UD_CFA_SRC_QP_HIGH_SFT 24
+};
+
+/* cq_res_ud_cfa_v2 (size:256b/32B) */
+struct cq_res_ud_cfa_v2 {
+ __le16 length;
+ #define CQ_RES_UD_CFA_V2_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_UD_CFA_V2_LENGTH_SFT 0
+ __le16 cfa_metadata0;
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA0_VID_MASK 0xfffUL
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA0_VID_SFT 0
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA0_DE 0x1000UL
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA0_PRI_MASK 0xe000UL
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA0_PRI_SFT 13
+ __le32 imm_data;
+ __le32 qid;
+ #define CQ_RES_UD_CFA_V2_QID_MASK 0xfffffUL
+ #define CQ_RES_UD_CFA_V2_QID_SFT 0
+ __le32 cfa_metadata2;
+ __le16 src_mac[3];
+ __le16 src_qp_low;
+ u8 cqe_type_toggle;
+ #define CQ_RES_UD_CFA_V2_TOGGLE 0x1UL
+ #define CQ_RES_UD_CFA_V2_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_UD_CFA_V2_CQE_TYPE_SFT 1
+ #define CQ_RES_UD_CFA_V2_CQE_TYPE_RES_UD_CFA (0x4UL << 1)
+ #define CQ_RES_UD_CFA_V2_CQE_TYPE_LAST CQ_RES_UD_CFA_V2_CQE_TYPE_RES_UD_CFA
+ u8 status;
+ #define CQ_RES_UD_CFA_V2_STATUS_OK 0x0UL
+ #define CQ_RES_UD_CFA_V2_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_UD_CFA_V2_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_UD_CFA_V2_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_UD_CFA_V2_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_UD_CFA_V2_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_UD_CFA_V2_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_UD_CFA_V2_STATUS_HW_FLUSH_ERR 0x8UL
+ #define CQ_RES_UD_CFA_V2_STATUS_LAST CQ_RES_UD_CFA_V2_STATUS_HW_FLUSH_ERR
+ __le16 flags;
+ #define CQ_RES_UD_CFA_V2_FLAGS_SRQ 0x1UL
+ #define CQ_RES_UD_CFA_V2_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_UD_CFA_V2_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_UD_CFA_V2_FLAGS_SRQ_LAST CQ_RES_UD_CFA_V2_FLAGS_SRQ_SRQ
+ #define CQ_RES_UD_CFA_V2_FLAGS_IMM 0x2UL
+ #define CQ_RES_UD_CFA_V2_FLAGS_UNUSED_MASK 0xcUL
+ #define CQ_RES_UD_CFA_V2_FLAGS_UNUSED_SFT 2
+ #define CQ_RES_UD_CFA_V2_FLAGS_ROCE_IP_VER_MASK 0x30UL
+ #define CQ_RES_UD_CFA_V2_FLAGS_ROCE_IP_VER_SFT 4
+ #define CQ_RES_UD_CFA_V2_FLAGS_ROCE_IP_VER_V1 (0x0UL << 4)
+ #define CQ_RES_UD_CFA_V2_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 4)
+ #define CQ_RES_UD_CFA_V2_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 4)
+ #define CQ_RES_UD_CFA_V2_FLAGS_ROCE_IP_VER_LAST CQ_RES_UD_CFA_V2_FLAGS_ROCE_IP_VER_V2IPV6
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_MASK 0x3c0UL
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_SFT 6
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_NONE (0x0UL << 6)
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_ACT_REC_PTR (0x1UL << 6)
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_TUNNEL_ID (0x2UL << 6)
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_CHDR_DATA (0x3UL << 6)
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_HDR_OFFSET (0x4UL << 6)
+ #define CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_LAST \
+ CQ_RES_UD_CFA_V2_FLAGS_META_FORMAT_HDR_OFFSET
+ __le32 src_qp_high_srq_or_rq_wr_id;
+ #define CQ_RES_UD_CFA_V2_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_UD_CFA_V2_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_MASK 0xf00000UL
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_SFT 20
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_MASK 0x700000UL
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_SFT 20
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_TPID88A8 (0x0UL << 20)
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_TPID8100 (0x1UL << 20)
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_TPID9100 (0x2UL << 20)
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_TPID9200 (0x3UL << 20)
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_TPID9300 (0x4UL << 20)
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_TPIDCFG (0x5UL << 20)
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_LAST \
+ CQ_RES_UD_CFA_V2_CFA_METADATA1_TPID_SEL_TPIDCFG
+ #define CQ_RES_UD_CFA_V2_CFA_METADATA1_VALID 0x800000UL
+ #define CQ_RES_UD_CFA_V2_SRC_QP_HIGH_MASK 0xff000000UL
+ #define CQ_RES_UD_CFA_V2_SRC_QP_HIGH_SFT 24
+};
+
+/* cq_res_raweth_qp1 (size:256b/32B) */
+struct cq_res_raweth_qp1 {
+ __le16 length;
+ #define CQ_RES_RAWETH_QP1_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_RAWETH_QP1_LENGTH_SFT 0
+ __le16 raweth_qp1_flags;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_MASK 0x3ffUL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_SFT 0
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ERROR 0x1UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_MASK 0x3c0UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_SFT 6
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_NOT_KNOWN (0x0UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_IP (0x1UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_TCP (0x2UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_UDP (0x3UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_FCOE (0x4UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ROCE (0x5UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ICMP (0x7UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_PTP_WO_TIMESTAMP (0x8UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_PTP_W_TIMESTAMP (0x9UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_LAST \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_PTP_W_TIMESTAMP
+ __le16 raweth_qp1_errors;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_IP_CS_ERROR 0x10UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_L4_CS_ERROR 0x20UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_IP_CS_ERROR 0x40UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_L4_CS_ERROR 0x80UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_CRC_ERROR 0x100UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_MASK 0xe00UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_SFT 9
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_NO_ERROR (0x0UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_VERSION (0x1UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_HDR_LEN (0x2UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_TUNNEL_TOTAL_ERROR (0x3UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_IP_TOTAL_ERROR (0x4UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_UDP_TOTAL_ERROR (0x5UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_TTL (0x6UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_LAST \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_TTL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_MASK 0xf000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_SFT 12
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_NO_ERROR (0x0UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_VERSION (0x1UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_HDR_LEN (0x2UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_TTL (0x3UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_IP_TOTAL_ERROR (0x4UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_UDP_TOTAL_ERROR (0x5UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_HDR_LEN (0x6UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_HDR_LEN_TOO_SMALL (0x7UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN (0x8UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_LAST \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN
+ __le16 raweth_qp1_cfa_code;
+ __le64 qp_handle;
+ __le32 raweth_qp1_flags2;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_CS_CALC 0x1UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_L4_CS_CALC 0x2UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_T_IP_CS_CALC 0x4UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_T_L4_CS_CALC 0x8UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_MASK 0xf0UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_SFT 4
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_NONE (0x0UL << 4)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_VLAN (0x1UL << 4)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_TUNNEL_ID (0x2UL << 4)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_CHDR_DATA (0x3UL << 4)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_HDR_OFFSET (0x4UL << 4)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_LAST \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_HDR_OFFSET
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_TYPE 0x100UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_COMPLETE_CHECKSUM_CALC 0x200UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_EXT_META_FORMAT_MASK 0xc00UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_EXT_META_FORMAT_SFT 10
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_COMPLETE_CHECKSUM_MASK 0xffff0000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_COMPLETE_CHECKSUM_SFT 16
+ __le32 raweth_qp1_metadata;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_DE_VID_MASK 0xffffUL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_DE_VID_SFT 0
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_VID_MASK 0xfffUL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_VID_SFT 0
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_DE 0x1000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_MASK 0xe000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_SFT 13
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_MASK 0xffff0000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_SFT 16
+ u8 cqe_type_toggle;
+ #define CQ_RES_RAWETH_QP1_TOGGLE 0x1UL
+ #define CQ_RES_RAWETH_QP1_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_RAWETH_QP1_CQE_TYPE_SFT 1
+ #define CQ_RES_RAWETH_QP1_CQE_TYPE_RES_RAWETH_QP1 (0x3UL << 1)
+ #define CQ_RES_RAWETH_QP1_CQE_TYPE_LAST CQ_RES_RAWETH_QP1_CQE_TYPE_RES_RAWETH_QP1
+ u8 status;
+ #define CQ_RES_RAWETH_QP1_STATUS_OK 0x0UL
+ #define CQ_RES_RAWETH_QP1_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_RAWETH_QP1_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_RAWETH_QP1_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_RAWETH_QP1_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_RAWETH_QP1_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_RAWETH_QP1_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_RAWETH_QP1_STATUS_HW_FLUSH_ERR 0x8UL
+ #define CQ_RES_RAWETH_QP1_STATUS_LAST CQ_RES_RAWETH_QP1_STATUS_HW_FLUSH_ERR
+ __le16 flags;
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ 0x1UL
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ_LAST CQ_RES_RAWETH_QP1_FLAGS_SRQ_SRQ
+ __le32 raweth_qp1_payload_offset_srq_or_rq_wr_id;
+ #define CQ_RES_RAWETH_QP1_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_RAWETH_QP1_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_PAYLOAD_OFFSET_MASK 0xff000000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_PAYLOAD_OFFSET_SFT 24
+};
+
+/* cq_res_raweth_qp1_v2 (size:256b/32B) */
+struct cq_res_raweth_qp1_v2 {
+ __le16 length;
+ #define CQ_RES_RAWETH_QP1_V2_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_RAWETH_QP1_V2_LENGTH_SFT 0
+ __le16 raweth_qp1_flags;
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_MASK 0x3ffUL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_SFT 0
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ERROR 0x1UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_MASK 0x3c0UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_SFT 6
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_NOT_KNOWN (0x0UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_IP (0x1UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_TCP (0x2UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_UDP (0x3UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_FCOE (0x4UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_ROCE (0x5UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_ICMP (0x7UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_PTP_WO_TIMESTAMP (0x8UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_PTP_W_TIMESTAMP (0x9UL << 6)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_LAST \
+ CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS_ITYPE_PTP_W_TIMESTAMP
+ __le16 raweth_qp1_errors;
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_IP_CS_ERROR 0x10UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_L4_CS_ERROR 0x20UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_IP_CS_ERROR 0x40UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_L4_CS_ERROR 0x80UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_CRC_ERROR 0x100UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_MASK 0xe00UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_SFT 9
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_NO_ERROR (0x0UL << 9)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_VERSION (0x1UL << 9)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_HDR_LEN (0x2UL << 9)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_TUNNEL_TOTAL_ERROR (0x3UL << 9)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_IP_TOTAL_ERROR (0x4UL << 9)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_UDP_TOTAL_ERROR (0x5UL << 9)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_TTL (0x6UL << 9)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_LAST \
+ CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_TTL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_MASK 0xf000UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_SFT 12
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_NO_ERROR (0x0UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_VERSION (0x1UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_HDR_LEN (0x2UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_TTL (0x3UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_IP_TOTAL_ERROR (0x4UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_UDP_TOTAL_ERROR (0x5UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_HDR_LEN (0x6UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_HDR_LEN_TOO_SMALL \
+ (0x7UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN \
+ (0x8UL << 12)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_LAST \
+ CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN
+ __le16 cfa_metadata0;
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA0_VID_MASK 0xfffUL
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA0_VID_SFT 0
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA0_DE 0x1000UL
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA0_PRI_MASK 0xe000UL
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA0_PRI_SFT 13
+ __le64 qp_handle;
+ __le32 raweth_qp1_flags2;
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_CS_ALL_OK_MODE 0x8UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_MASK 0xf0UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_SFT 4
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_NONE (0x0UL << 4)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_ACT_REC_PTR (0x1UL << 4)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_TUNNEL_ID (0x2UL << 4)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_CHDR_DATA (0x3UL << 4)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_HDR_OFFSET (0x4UL << 4)
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_LAST \
+ CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_META_FORMAT_HDR_OFFSET
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_IP_TYPE 0x100UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_COMPLETE_CHECKSUM_CALC 0x200UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_CS_OK_MASK 0xfc00UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_CS_OK_SFT 10
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_COMPLETE_CHECKSUM_MASK 0xffff0000UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_FLAGS2_COMPLETE_CHECKSUM_SFT 16
+ __le32 cfa_metadata2;
+ u8 cqe_type_toggle;
+ #define CQ_RES_RAWETH_QP1_V2_TOGGLE 0x1UL
+ #define CQ_RES_RAWETH_QP1_V2_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_RAWETH_QP1_V2_CQE_TYPE_SFT 1
+ #define CQ_RES_RAWETH_QP1_V2_CQE_TYPE_RES_RAWETH_QP1 (0x3UL << 1)
+ #define CQ_RES_RAWETH_QP1_V2_CQE_TYPE_LAST CQ_RES_RAWETH_QP1_V2_CQE_TYPE_RES_RAWETH_QP1
+ u8 status;
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_OK 0x0UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_HW_FLUSH_ERR 0x8UL
+ #define CQ_RES_RAWETH_QP1_V2_STATUS_LAST CQ_RES_RAWETH_QP1_V2_STATUS_HW_FLUSH_ERR
+ __le16 flags;
+ #define CQ_RES_RAWETH_QP1_V2_FLAGS_SRQ 0x1UL
+ #define CQ_RES_RAWETH_QP1_V2_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_RAWETH_QP1_V2_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_RAWETH_QP1_V2_FLAGS_SRQ_LAST CQ_RES_RAWETH_QP1_V2_FLAGS_SRQ_SRQ
+ __le32 raweth_qp1_payload_offset_srq_or_rq_wr_id;
+ #define CQ_RES_RAWETH_QP1_V2_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_RAWETH_QP1_V2_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_MASK 0xf00000UL
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_SFT 20
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_MASK 0x700000UL
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_SFT 20
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_TPID88A8 (0x0UL << 20)
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_TPID8100 (0x1UL << 20)
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_TPID9100 (0x2UL << 20)
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_TPID9200 (0x3UL << 20)
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_TPID9300 (0x4UL << 20)
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_TPIDCFG (0x5UL << 20)
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_LAST \
+ CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_TPID_SEL_TPIDCFG
+ #define CQ_RES_RAWETH_QP1_V2_CFA_METADATA1_VALID 0x800000UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_PAYLOAD_OFFSET_MASK 0xff000000UL
+ #define CQ_RES_RAWETH_QP1_V2_RAWETH_QP1_PAYLOAD_OFFSET_SFT 24
+};
+
+/* cq_terminal (size:256b/32B) */
+struct cq_terminal {
+ __le64 qp_handle;
+ __le16 sq_cons_idx;
+ __le16 rq_cons_idx;
+ __le32 reserved32_1;
+ __le64 reserved64_3;
+ u8 cqe_type_toggle;
+ #define CQ_TERMINAL_TOGGLE 0x1UL
+ #define CQ_TERMINAL_CQE_TYPE_MASK 0x1eUL
+ #define CQ_TERMINAL_CQE_TYPE_SFT 1
+ #define CQ_TERMINAL_CQE_TYPE_TERMINAL (0xeUL << 1)
+ #define CQ_TERMINAL_CQE_TYPE_LAST CQ_TERMINAL_CQE_TYPE_TERMINAL
+ u8 status;
+ #define CQ_TERMINAL_STATUS_OK 0x0UL
+ #define CQ_TERMINAL_STATUS_LAST CQ_TERMINAL_STATUS_OK
+ __le16 reserved16;
+ __le32 reserved32_2;
+};
+
+/* cq_cutoff (size:256b/32B) */
+struct cq_cutoff {
+ __le64 reserved64_1;
+ __le64 reserved64_2;
+ __le64 reserved64_3;
+ u8 cqe_type_toggle;
+ #define CQ_CUTOFF_TOGGLE 0x1UL
+ #define CQ_CUTOFF_CQE_TYPE_MASK 0x1eUL
+ #define CQ_CUTOFF_CQE_TYPE_SFT 1
+ #define CQ_CUTOFF_CQE_TYPE_CUT_OFF (0xfUL << 1)
+ #define CQ_CUTOFF_CQE_TYPE_LAST CQ_CUTOFF_CQE_TYPE_CUT_OFF
+ #define CQ_CUTOFF_RESIZE_TOGGLE_MASK 0x60UL
+ #define CQ_CUTOFF_RESIZE_TOGGLE_SFT 5
+ u8 status;
+ #define CQ_CUTOFF_STATUS_OK 0x0UL
+ #define CQ_CUTOFF_STATUS_LAST CQ_CUTOFF_STATUS_OK
+ __le16 reserved16;
+ __le32 reserved32;
+};
+
+/* nq_base (size:128b/16B) */
+struct nq_base {
+ __le16 info10_type;
+ #define NQ_BASE_TYPE_MASK 0x3fUL
+ #define NQ_BASE_TYPE_SFT 0
+ #define NQ_BASE_TYPE_CQ_NOTIFICATION 0x30UL
+ #define NQ_BASE_TYPE_SRQ_EVENT 0x32UL
+ #define NQ_BASE_TYPE_DBQ_EVENT 0x34UL
+ #define NQ_BASE_TYPE_QP_EVENT 0x38UL
+ #define NQ_BASE_TYPE_FUNC_EVENT 0x3aUL
+ #define NQ_BASE_TYPE_LAST NQ_BASE_TYPE_FUNC_EVENT
+ #define NQ_BASE_INFO10_MASK 0xffc0UL
+ #define NQ_BASE_INFO10_SFT 6
+ __le16 info16;
+ __le32 info32;
+ __le32 info63_v[2];
+ #define NQ_BASE_V 0x1UL
+ #define NQ_BASE_INFO63_MASK 0xfffffffeUL
+ #define NQ_BASE_INFO63_SFT 1
+};
+
+/* nq_cn (size:128b/16B) */
+struct nq_cn {
+ __le16 type;
+ #define NQ_CN_TYPE_MASK 0x3fUL
+ #define NQ_CN_TYPE_SFT 0
+ #define NQ_CN_TYPE_CQ_NOTIFICATION 0x30UL
+ #define NQ_CN_TYPE_LAST NQ_CN_TYPE_CQ_NOTIFICATION
+ #define NQ_CN_TOGGLE_MASK 0xc0UL
+ #define NQ_CN_TOGGLE_SFT 6
+ __le16 reserved16;
+ __le32 cq_handle_low;
+ __le32 v;
+ #define NQ_CN_V 0x1UL
+ __le32 cq_handle_high;
+};
+
+/* nq_srq_event (size:128b/16B) */
+struct nq_srq_event {
+ u8 type;
+ #define NQ_SRQ_EVENT_TYPE_MASK 0x3fUL
+ #define NQ_SRQ_EVENT_TYPE_SFT 0
+ #define NQ_SRQ_EVENT_TYPE_SRQ_EVENT 0x32UL
+ #define NQ_SRQ_EVENT_TYPE_LAST NQ_SRQ_EVENT_TYPE_SRQ_EVENT
+ #define NQ_SRQ_EVENT_TOGGLE_MASK 0xc0UL
+ #define NQ_SRQ_EVENT_TOGGLE_SFT 6
+ u8 event;
+ #define NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT 0x1UL
+ #define NQ_SRQ_EVENT_EVENT_LAST NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT
+ __le16 reserved16;
+ __le32 srq_handle_low;
+ __le32 v;
+ #define NQ_SRQ_EVENT_V 0x1UL
+ __le32 srq_handle_high;
+};
+
+/* nq_dbq_event (size:128b/16B) */
+struct nq_dbq_event {
+ u8 type;
+ #define NQ_DBQ_EVENT_TYPE_MASK 0x3fUL
+ #define NQ_DBQ_EVENT_TYPE_SFT 0
+ #define NQ_DBQ_EVENT_TYPE_DBQ_EVENT 0x34UL
+ #define NQ_DBQ_EVENT_TYPE_LAST NQ_DBQ_EVENT_TYPE_DBQ_EVENT
+ u8 event;
+ #define NQ_DBQ_EVENT_EVENT_DBQ_THRESHOLD_EVENT 0x1UL
+ #define NQ_DBQ_EVENT_EVENT_LAST NQ_DBQ_EVENT_EVENT_DBQ_THRESHOLD_EVENT
+ __le16 db_pfid;
+ #define NQ_DBQ_EVENT_DB_PFID_MASK 0xfUL
+ #define NQ_DBQ_EVENT_DB_PFID_SFT 0
+ __le32 db_dpi;
+ #define NQ_DBQ_EVENT_DB_DPI_MASK 0xfffffUL
+ #define NQ_DBQ_EVENT_DB_DPI_SFT 0
+ __le32 v;
+ #define NQ_DBQ_EVENT_V 0x1UL
+ __le32 db_type_db_xid;
+ #define NQ_DBQ_EVENT_DB_XID_MASK 0xfffffUL
+ #define NQ_DBQ_EVENT_DB_XID_SFT 0
+ #define NQ_DBQ_EVENT_DB_TYPE_MASK 0xf0000000UL
+ #define NQ_DBQ_EVENT_DB_TYPE_SFT 28
+};
+
+/* xrrq_irrq (size:256b/32B) */
+struct xrrq_irrq {
+ __le16 credits_type;
+ #define XRRQ_IRRQ_TYPE 0x1UL
+ #define XRRQ_IRRQ_TYPE_READ_REQ 0x0UL
+ #define XRRQ_IRRQ_TYPE_ATOMIC_REQ 0x1UL
+ #define XRRQ_IRRQ_TYPE_LAST XRRQ_IRRQ_TYPE_ATOMIC_REQ
+ #define XRRQ_IRRQ_CREDITS_MASK 0xf800UL
+ #define XRRQ_IRRQ_CREDITS_SFT 11
+ __le16 reserved16;
+ __le32 reserved32;
+ __le32 psn;
+ #define XRRQ_IRRQ_PSN_MASK 0xffffffUL
+ #define XRRQ_IRRQ_PSN_SFT 0
+ __le32 msn;
+ #define XRRQ_IRRQ_MSN_MASK 0xffffffUL
+ #define XRRQ_IRRQ_MSN_SFT 0
+ __le64 va_or_atomic_result;
+ __le32 rdma_r_key;
+ __le32 length;
+};
+
+/* xrrq_orrq (size:256b/32B) */
+struct xrrq_orrq {
+ __le16 num_sges_type;
+ #define XRRQ_ORRQ_TYPE 0x1UL
+ #define XRRQ_ORRQ_TYPE_READ_REQ 0x0UL
+ #define XRRQ_ORRQ_TYPE_ATOMIC_REQ 0x1UL
+ #define XRRQ_ORRQ_TYPE_LAST XRRQ_ORRQ_TYPE_ATOMIC_REQ
+ #define XRRQ_ORRQ_NUM_SGES_MASK 0xf800UL
+ #define XRRQ_ORRQ_NUM_SGES_SFT 11
+ __le16 reserved16;
+ __le32 length;
+ __le32 psn;
+ #define XRRQ_ORRQ_PSN_MASK 0xffffffUL
+ #define XRRQ_ORRQ_PSN_SFT 0
+ __le32 end_psn;
+ #define XRRQ_ORRQ_END_PSN_MASK 0xffffffUL
+ #define XRRQ_ORRQ_END_PSN_SFT 0
+ __le64 first_sge_phy_or_sing_sge_va;
+ __le32 single_sge_l_key;
+ __le32 single_sge_size;
+};
+
+/* ptu_pte (size:64b/8B) */
+struct ptu_pte {
+ __le32 page_next_to_last_last_valid[2];
+ #define PTU_PTE_VALID 0x1UL
+ #define PTU_PTE_LAST 0x2UL
+ #define PTU_PTE_NEXT_TO_LAST 0x4UL
+ #define PTU_PTE_UNUSED_MASK 0xff8UL
+ #define PTU_PTE_UNUSED_SFT 3
+ #define PTU_PTE_PAGE_MASK 0xfffff000UL
+ #define PTU_PTE_PAGE_SFT 12
+};
+
+/* ptu_pde (size:64b/8B) */
+struct ptu_pde {
+ __le32 page_valid[2];
+ #define PTU_PDE_VALID 0x1UL
+ #define PTU_PDE_UNUSED_MASK 0xffeUL
+ #define PTU_PDE_UNUSED_SFT 1
+ #define PTU_PDE_PAGE_MASK 0xfffff000UL
+ #define PTU_PDE_PAGE_SFT 12
+};
+
+#endif /* ___BNXT_RE_HSI_H__ */
diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig
deleted file mode 100644
index 2b6352b85485..000000000000
--- a/drivers/infiniband/hw/cxgb3/Kconfig
+++ /dev/null
@@ -1,27 +0,0 @@
-config INFINIBAND_CXGB3
- tristate "Chelsio RDMA Driver"
- depends on CHELSIO_T3 && INET
- select GENERIC_ALLOCATOR
- ---help---
- This is an iWARP/RDMA driver for the Chelsio T3 1GbE and
- 10GbE adapters.
-
- For general information about Chelsio and our products, visit
- our website at <http://www.chelsio.com>.
-
- For customer support, please visit our customer support page at
- <http://www.chelsio.com/support.html>.
-
- Please send feedback to <linux-bugs@chelsio.com>.
-
- To compile this driver as a module, choose M here: the module
- will be called iw_cxgb3.
-
-config INFINIBAND_CXGB3_DEBUG
- bool "Verbose debugging output"
- depends on INFINIBAND_CXGB3
- default n
- ---help---
- This option causes the Chelsio RDMA driver to produce copious
- amounts of debug messages. Select this if you are developing
- the driver or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile
deleted file mode 100644
index 2761364185af..000000000000
--- a/drivers/infiniband/hw/cxgb3/Makefile
+++ /dev/null
@@ -1,8 +0,0 @@
-ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb3
-
-obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o
-
-iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \
- iwch_provider.o iwch.o cxio_hal.o cxio_resource.o
-
-ccflags-$(CONFIG_INFINIBAND_CXGB3_DEBUG) += -DDEBUG
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
deleted file mode 100644
index 445e89e5e7cf..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef DEBUG
-#include <linux/types.h>
-#include <linux/slab.h>
-#include "common.h"
-#include "cxgb3_ioctl.h"
-#include "cxio_hal.h"
-#include "cxio_wr.h"
-
-void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag)
-{
- struct ch_mem_range *m;
- u64 *data;
- int rc;
- int size = 32;
-
- m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m)
- return;
-
- m->mem_id = MEM_PMRX;
- m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
- m->len = size;
- PDBG("%s TPT addr 0x%x len %d\n", __func__, m->addr, m->len);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
- if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
- kfree(m);
- return;
- }
-
- data = (u64 *)m->buf;
- while (size > 0) {
- PDBG("TPT %08x: %016llx\n", m->addr, (unsigned long long) *data);
- size -= 8;
- data++;
- m->addr += 8;
- }
- kfree(m);
-}
-
-void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift)
-{
- struct ch_mem_range *m;
- u64 *data;
- int rc;
- int size, npages;
-
- shift += 12;
- npages = (len + (1ULL << shift) - 1) >> shift;
- size = npages * sizeof(u64);
-
- m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m)
- return;
-
- m->mem_id = MEM_PMRX;
- m->addr = pbl_addr;
- m->len = size;
- PDBG("%s PBL addr 0x%x len %d depth %d\n",
- __func__, m->addr, m->len, npages);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
- if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
- kfree(m);
- return;
- }
-
- data = (u64 *)m->buf;
- while (size > 0) {
- PDBG("PBL %08x: %016llx\n", m->addr, (unsigned long long) *data);
- size -= 8;
- data++;
- m->addr += 8;
- }
- kfree(m);
-}
-
-void cxio_dump_wqe(union t3_wr *wqe)
-{
- __be64 *data = (__be64 *)wqe;
- uint size = (uint)(be64_to_cpu(*data) & 0xff);
-
- if (size == 0)
- size = 8;
- while (size > 0) {
- PDBG("WQE %p: %016llx\n", data,
- (unsigned long long) be64_to_cpu(*data));
- size--;
- data++;
- }
-}
-
-void cxio_dump_wce(struct t3_cqe *wce)
-{
- __be64 *data = (__be64 *)wce;
- int size = sizeof(*wce);
-
- while (size > 0) {
- PDBG("WCE %p: %016llx\n", data,
- (unsigned long long) be64_to_cpu(*data));
- size -= 8;
- data++;
- }
-}
-
-void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents)
-{
- struct ch_mem_range *m;
- int size = nents * 64;
- u64 *data;
- int rc;
-
- m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m)
- return;
-
- m->mem_id = MEM_PMRX;
- m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
- m->len = size;
- PDBG("%s RQT addr 0x%x len %d\n", __func__, m->addr, m->len);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
- if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
- kfree(m);
- return;
- }
-
- data = (u64 *)m->buf;
- while (size > 0) {
- PDBG("RQT %08x: %016llx\n", m->addr, (unsigned long long) *data);
- size -= 8;
- data++;
- m->addr += 8;
- }
- kfree(m);
-}
-
-void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid)
-{
- struct ch_mem_range *m;
- int size = TCB_SIZE;
- u32 *data;
- int rc;
-
- m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m)
- return;
-
- m->mem_id = MEM_CM;
- m->addr = hwtid * size;
- m->len = size;
- PDBG("%s TCB %d len %d\n", __func__, m->addr, m->len);
- rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
- if (rc) {
- PDBG("%s toectl returned error %d\n", __func__, rc);
- kfree(m);
- return;
- }
-
- data = (u32 *)m->buf;
- while (size > 0) {
- printk("%2u: %08x %08x %08x %08x %08x %08x %08x %08x\n",
- m->addr,
- *(data+2), *(data+3), *(data),*(data+1),
- *(data+6), *(data+7), *(data+4), *(data+5));
- size -= 32;
- data += 8;
- m->addr += 32;
- }
- kfree(m);
-}
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
deleted file mode 100644
index ada2e5009c86..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ /dev/null
@@ -1,1343 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <asm/delay.h>
-
-#include <linux/mutex.h>
-#include <linux/netdevice.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-#include <net/net_namespace.h>
-
-#include "cxio_resource.h"
-#include "cxio_hal.h"
-#include "cxgb3_offload.h"
-#include "sge_defs.h"
-
-static LIST_HEAD(rdev_list);
-static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL;
-
-static struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name)
-{
- struct cxio_rdev *rdev;
-
- list_for_each_entry(rdev, &rdev_list, entry)
- if (!strcmp(rdev->dev_name, dev_name))
- return rdev;
- return NULL;
-}
-
-static struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev)
-{
- struct cxio_rdev *rdev;
-
- list_for_each_entry(rdev, &rdev_list, entry)
- if (rdev->t3cdev_p == tdev)
- return rdev;
- return NULL;
-}
-
-int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
- enum t3_cq_opcode op, u32 credit)
-{
- int ret;
- struct t3_cqe *cqe;
- u32 rptr;
-
- struct rdma_cq_op setup;
- setup.id = cq->cqid;
- setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0;
- setup.op = op;
- ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup);
-
- if ((ret < 0) || (op == CQ_CREDIT_UPDATE))
- return ret;
-
- /*
- * If the rearm returned an index other than our current index,
- * then there might be CQE's in flight (being DMA'd). We must wait
- * here for them to complete or the consumer can miss a notification.
- */
- if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) {
- int i=0;
-
- rptr = cq->rptr;
-
- /*
- * Keep the generation correct by bumping rptr until it
- * matches the index returned by the rearm - 1.
- */
- while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret)
- rptr++;
-
- /*
- * Now rptr is the index for the (last) cqe that was
- * in-flight at the time the HW rearmed the CQ. We
- * spin until that CQE is valid.
- */
- cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2);
- while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
- udelay(1);
- if (i++ > 1000000) {
- printk(KERN_ERR "%s: stalled rnic\n",
- rdev_p->dev_name);
- return -EIO;
- }
- }
-
- return 1;
- }
-
- return 0;
-}
-
-static int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
-{
- struct rdma_cq_setup setup;
- setup.id = cqid;
- setup.base_addr = 0; /* NULL address */
- setup.size = 0; /* disaable the CQ */
- setup.credits = 0;
- setup.credit_thres = 0;
- setup.ovfl_mode = 0;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-
-static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
-{
- u64 sge_cmd;
- struct t3_modify_qp_wr *wqe;
- struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
- if (!skb) {
- PDBG("%s alloc_skb failed\n", __func__);
- return -ENOMEM;
- }
- wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
- memset(wqe, 0, sizeof(*wqe));
- build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD,
- T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 0, qpid, 7,
- T3_SOPEOP);
- wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
- sge_cmd = qpid << 8 | 3;
- wqe->sge_cmd = cpu_to_be64(sge_cmd);
- skb->priority = CPL_PRIORITY_CONTROL;
- return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
-}
-
-int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
-{
- struct rdma_cq_setup setup;
- int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
-
- size += 1; /* one extra page for storing cq-in-err state */
- cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
- if (!cq->cqid)
- return -ENOMEM;
- if (kernel) {
- cq->sw_queue = kzalloc(size, GFP_KERNEL);
- if (!cq->sw_queue)
- return -ENOMEM;
- }
- cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size,
- &(cq->dma_addr), GFP_KERNEL);
- if (!cq->queue) {
- kfree(cq->sw_queue);
- return -ENOMEM;
- }
- dma_unmap_addr_set(cq, mapping, cq->dma_addr);
- memset(cq->queue, 0, size);
- setup.id = cq->cqid;
- setup.base_addr = (u64) (cq->dma_addr);
- setup.size = 1UL << cq->size_log2;
- setup.credits = 65535;
- setup.credit_thres = 1;
- if (rdev_p->t3cdev_p->type != T3A)
- setup.ovfl_mode = 0;
- else
- setup.ovfl_mode = 1;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-
-#ifdef notyet
-int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
-{
- struct rdma_cq_setup setup;
- setup.id = cq->cqid;
- setup.base_addr = (u64) (cq->dma_addr);
- setup.size = 1UL << cq->size_log2;
- setup.credits = setup.size;
- setup.credit_thres = setup.size; /* TBD: overflow recovery */
- setup.ovfl_mode = 1;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-#endif
-
-static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
-{
- struct cxio_qpid_list *entry;
- u32 qpid;
- int i;
-
- mutex_lock(&uctx->lock);
- if (!list_empty(&uctx->qpids)) {
- entry = list_entry(uctx->qpids.next, struct cxio_qpid_list,
- entry);
- list_del(&entry->entry);
- qpid = entry->qpid;
- kfree(entry);
- } else {
- qpid = cxio_hal_get_qpid(rdev_p->rscp);
- if (!qpid)
- goto out;
- for (i = qpid+1; i & rdev_p->qpmask; i++) {
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
- if (!entry)
- break;
- entry->qpid = i;
- list_add_tail(&entry->entry, &uctx->qpids);
- }
- }
-out:
- mutex_unlock(&uctx->lock);
- PDBG("%s qpid 0x%x\n", __func__, qpid);
- return qpid;
-}
-
-static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid,
- struct cxio_ucontext *uctx)
-{
- struct cxio_qpid_list *entry;
-
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
- if (!entry)
- return;
- PDBG("%s qpid 0x%x\n", __func__, qpid);
- entry->qpid = qpid;
- mutex_lock(&uctx->lock);
- list_add_tail(&entry->entry, &uctx->qpids);
- mutex_unlock(&uctx->lock);
-}
-
-void cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
-{
- struct list_head *pos, *nxt;
- struct cxio_qpid_list *entry;
-
- mutex_lock(&uctx->lock);
- list_for_each_safe(pos, nxt, &uctx->qpids) {
- entry = list_entry(pos, struct cxio_qpid_list, entry);
- list_del_init(&entry->entry);
- if (!(entry->qpid & rdev_p->qpmask))
- cxio_hal_put_qpid(rdev_p->rscp, entry->qpid);
- kfree(entry);
- }
- mutex_unlock(&uctx->lock);
-}
-
-void cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
-{
- INIT_LIST_HEAD(&uctx->qpids);
- mutex_init(&uctx->lock);
-}
-
-int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
- struct t3_wq *wq, struct cxio_ucontext *uctx)
-{
- int depth = 1UL << wq->size_log2;
- int rqsize = 1UL << wq->rq_size_log2;
-
- wq->qpid = get_qpid(rdev_p, uctx);
- if (!wq->qpid)
- return -ENOMEM;
-
- wq->rq = kzalloc(depth * sizeof(struct t3_swrq), GFP_KERNEL);
- if (!wq->rq)
- goto err1;
-
- wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize);
- if (!wq->rq_addr)
- goto err2;
-
- wq->sq = kzalloc(depth * sizeof(struct t3_swsq), GFP_KERNEL);
- if (!wq->sq)
- goto err3;
-
- wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
- depth * sizeof(union t3_wr),
- &(wq->dma_addr), GFP_KERNEL);
- if (!wq->queue)
- goto err4;
-
- memset(wq->queue, 0, depth * sizeof(union t3_wr));
- dma_unmap_addr_set(wq, mapping, wq->dma_addr);
- wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
- if (!kernel_domain)
- wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
- (wq->qpid << rdev_p->qpshift);
- wq->rdev = rdev_p;
- PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __func__,
- wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
- return 0;
-err4:
- kfree(wq->sq);
-err3:
- cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize);
-err2:
- kfree(wq->rq);
-err1:
- put_qpid(rdev_p, wq->qpid, uctx);
- return -ENOMEM;
-}
-
-int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
-{
- int err;
- err = cxio_hal_clear_cq_ctx(rdev_p, cq->cqid);
- kfree(cq->sw_queue);
- dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
- (1UL << (cq->size_log2))
- * sizeof(struct t3_cqe) + 1, cq->queue,
- dma_unmap_addr(cq, mapping));
- cxio_hal_put_cqid(rdev_p->rscp, cq->cqid);
- return err;
-}
-
-int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq,
- struct cxio_ucontext *uctx)
-{
- dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
- (1UL << (wq->size_log2))
- * sizeof(union t3_wr), wq->queue,
- dma_unmap_addr(wq, mapping));
- kfree(wq->sq);
- cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2));
- kfree(wq->rq);
- put_qpid(rdev_p, wq->qpid, uctx);
- return 0;
-}
-
-static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
-{
- struct t3_cqe cqe;
-
- PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
- wq, cq, cq->sw_rptr, cq->sw_wptr);
- memset(&cqe, 0, sizeof(cqe));
- cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
- V_CQE_OPCODE(T3_SEND) |
- V_CQE_TYPE(0) |
- V_CQE_SWCQE(1) |
- V_CQE_QPID(wq->qpid) |
- V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
- cq->size_log2)));
- *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
- cq->sw_wptr++;
-}
-
-int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
-{
- u32 ptr;
- int flushed = 0;
-
- PDBG("%s wq %p cq %p\n", __func__, wq, cq);
-
- /* flush RQ */
- PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
- wq->rq_rptr, wq->rq_wptr, count);
- ptr = wq->rq_rptr + count;
- while (ptr++ != wq->rq_wptr) {
- insert_recv_cqe(wq, cq);
- flushed++;
- }
- return flushed;
-}
-
-static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
- struct t3_swsq *sqp)
-{
- struct t3_cqe cqe;
-
- PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __func__,
- wq, cq, cq->sw_rptr, cq->sw_wptr);
- memset(&cqe, 0, sizeof(cqe));
- cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
- V_CQE_OPCODE(sqp->opcode) |
- V_CQE_TYPE(1) |
- V_CQE_SWCQE(1) |
- V_CQE_QPID(wq->qpid) |
- V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
- cq->size_log2)));
- cqe.u.scqe.wrid_hi = sqp->sq_wptr;
-
- *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
- cq->sw_wptr++;
-}
-
-int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
-{
- __u32 ptr;
- int flushed = 0;
- struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
-
- ptr = wq->sq_rptr + count;
- sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
- while (ptr != wq->sq_wptr) {
- sqp->signaled = 0;
- insert_sq_cqe(wq, cq, sqp);
- ptr++;
- sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
- flushed++;
- }
- return flushed;
-}
-
-/*
- * Move all CQEs from the HWCQ into the SWCQ.
- */
-void cxio_flush_hw_cq(struct t3_cq *cq)
-{
- struct t3_cqe *cqe, *swcqe;
-
- PDBG("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid);
- cqe = cxio_next_hw_cqe(cq);
- while (cqe) {
- PDBG("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n",
- __func__, cq->rptr, cq->sw_wptr);
- swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2);
- *swcqe = *cqe;
- swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
- cq->sw_wptr++;
- cq->rptr++;
- cqe = cxio_next_hw_cqe(cq);
- }
-}
-
-static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
-{
- if (CQE_OPCODE(*cqe) == T3_TERMINATE)
- return 0;
-
- if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe))
- return 0;
-
- if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe))
- return 0;
-
- if (CQE_SEND_OPCODE(*cqe) && RQ_TYPE(*cqe) &&
- Q_EMPTY(wq->rq_rptr, wq->rq_wptr))
- return 0;
-
- return 1;
-}
-
-void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
-{
- struct t3_cqe *cqe;
- u32 ptr;
-
- *count = 0;
- ptr = cq->sw_rptr;
- while (!Q_EMPTY(ptr, cq->sw_wptr)) {
- cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
- if ((SQ_TYPE(*cqe) ||
- ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) &&
- (CQE_QPID(*cqe) == wq->qpid))
- (*count)++;
- ptr++;
- }
- PDBG("%s cq %p count %d\n", __func__, cq, *count);
-}
-
-void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
-{
- struct t3_cqe *cqe;
- u32 ptr;
-
- *count = 0;
- PDBG("%s count zero %d\n", __func__, *count);
- ptr = cq->sw_rptr;
- while (!Q_EMPTY(ptr, cq->sw_wptr)) {
- cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
- if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) &&
- (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq))
- (*count)++;
- ptr++;
- }
- PDBG("%s cq %p count %d\n", __func__, cq, *count);
-}
-
-static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
-{
- struct rdma_cq_setup setup;
- setup.id = 0;
- setup.base_addr = 0; /* NULL address */
- setup.size = 1; /* enable the CQ */
- setup.credits = 0;
-
- /* force SGE to redirect to RspQ and interrupt */
- setup.credit_thres = 0;
- setup.ovfl_mode = 1;
- return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
-}
-
-static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
-{
- int err;
- u64 sge_cmd, ctx0, ctx1;
- u64 base_addr;
- struct t3_modify_qp_wr *wqe;
- struct sk_buff *skb;
-
- skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
- if (!skb) {
- PDBG("%s alloc_skb failed\n", __func__);
- return -ENOMEM;
- }
- err = cxio_hal_init_ctrl_cq(rdev_p);
- if (err) {
- PDBG("%s err %d initializing ctrl_cq\n", __func__, err);
- goto err;
- }
- rdev_p->ctrl_qp.workq = dma_alloc_coherent(
- &(rdev_p->rnic_info.pdev->dev),
- (1 << T3_CTRL_QP_SIZE_LOG2) *
- sizeof(union t3_wr),
- &(rdev_p->ctrl_qp.dma_addr),
- GFP_KERNEL);
- if (!rdev_p->ctrl_qp.workq) {
- PDBG("%s dma_alloc_coherent failed\n", __func__);
- err = -ENOMEM;
- goto err;
- }
- dma_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
- rdev_p->ctrl_qp.dma_addr);
- rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
- memset(rdev_p->ctrl_qp.workq, 0,
- (1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr));
-
- mutex_init(&rdev_p->ctrl_qp.lock);
- init_waitqueue_head(&rdev_p->ctrl_qp.waitq);
-
- /* update HW Ctrl QP context */
- base_addr = rdev_p->ctrl_qp.dma_addr;
- base_addr >>= 12;
- ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) |
- V_EC_BASE_LO((u32) base_addr & 0xffff));
- ctx0 <<= 32;
- ctx0 |= V_EC_CREDITS(FW_WR_NUM);
- base_addr >>= 16;
- ctx1 = (u32) base_addr;
- base_addr >>= 32;
- ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) |
- V_EC_TYPE(0) | V_EC_GEN(1) |
- V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
- wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
- memset(wqe, 0, sizeof(*wqe));
- build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
- T3_CTL_QP_TID, 7, T3_SOPEOP);
- wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
- sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
- wqe->sge_cmd = cpu_to_be64(sge_cmd);
- wqe->ctx1 = cpu_to_be64(ctx1);
- wqe->ctx0 = cpu_to_be64(ctx0);
- PDBG("CtrlQP dma_addr 0x%llx workq %p size %d\n",
- (unsigned long long) rdev_p->ctrl_qp.dma_addr,
- rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
- skb->priority = CPL_PRIORITY_CONTROL;
- return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
-err:
- kfree_skb(skb);
- return err;
-}
-
-static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
-{
- dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
- (1UL << T3_CTRL_QP_SIZE_LOG2)
- * sizeof(union t3_wr), rdev_p->ctrl_qp.workq,
- dma_unmap_addr(&rdev_p->ctrl_qp, mapping));
- return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID);
-}
-
-/* write len bytes of data into addr (32B aligned address)
- * If data is NULL, clear len byte of memory to zero.
- * caller acquires the ctrl_qp lock before the call
- */
-static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
- u32 len, void *data)
-{
- u32 i, nr_wqe, copy_len;
- u8 *copy_data;
- u8 wr_len, utx_len; /* length in 8 byte flit */
- enum t3_wr_flags flag;
- __be64 *wqe;
- u64 utx_cmd;
- addr &= 0x7FFFFFF;
- nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */
- PDBG("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n",
- __func__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
- nr_wqe, data, addr);
- utx_len = 3; /* in 32B unit */
- for (i = 0; i < nr_wqe; i++) {
- if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr,
- T3_CTRL_QP_SIZE_LOG2)) {
- PDBG("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, "
- "wait for more space i %d\n", __func__,
- rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
- if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
- !Q_FULL(rdev_p->ctrl_qp.rptr,
- rdev_p->ctrl_qp.wptr,
- T3_CTRL_QP_SIZE_LOG2))) {
- PDBG("%s ctrl_qp workq interrupted\n",
- __func__);
- return -ERESTARTSYS;
- }
- PDBG("%s ctrl_qp wakeup, continue posting work request "
- "i %d\n", __func__, i);
- }
- wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
- (1 << T3_CTRL_QP_SIZE_LOG2)));
- flag = 0;
- if (i == (nr_wqe - 1)) {
- /* last WQE */
- flag = T3_COMPLETION_FLAG;
- if (len % 32)
- utx_len = len / 32 + 1;
- else
- utx_len = len / 32;
- }
-
- /*
- * Force a CQE to return the credit to the workq in case
- * we posted more than half the max QP size of WRs
- */
- if ((i != 0) &&
- (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) {
- flag = T3_COMPLETION_FLAG;
- PDBG("%s force completion at i %d\n", __func__, i);
- }
-
- /* build the utx mem command */
- wqe += (sizeof(struct t3_bypass_wr) >> 3);
- utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3);
- utx_cmd <<= 32;
- utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1);
- *wqe = cpu_to_be64(utx_cmd);
- wqe++;
- copy_data = (u8 *) data + i * 96;
- copy_len = len > 96 ? 96 : len;
-
- /* clear memory content if data is NULL */
- if (data)
- memcpy(wqe, copy_data, copy_len);
- else
- memset(wqe, 0, copy_len);
- if (copy_len % 32)
- memset(((u8 *) wqe) + copy_len, 0,
- 32 - (copy_len % 32));
- wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 +
- (utx_len << 2);
- wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
- (1 << T3_CTRL_QP_SIZE_LOG2)));
-
- /* wptr in the WRID[31:0] */
- ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr;
-
- /*
- * This must be the last write with a memory barrier
- * for the genbit
- */
- build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag,
- Q_GENBIT(rdev_p->ctrl_qp.wptr,
- T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID,
- wr_len, T3_SOPEOP);
- if (flag == T3_COMPLETION_FLAG)
- ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID);
- len -= 96;
- rdev_p->ctrl_qp.wptr++;
- }
- return 0;
-}
-
-/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr
- * OUT: stag index
- * TBD: shared memory region support
- */
-static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
- u32 *stag, u8 stag_state, u32 pdid,
- enum tpt_mem_type type, enum tpt_mem_perm perm,
- u32 zbva, u64 to, u32 len, u8 page_size,
- u32 pbl_size, u32 pbl_addr)
-{
- int err;
- struct tpt_entry tpt;
- u32 stag_idx;
- u32 wptr;
-
- if (cxio_fatal_error(rdev_p))
- return -EIO;
-
- stag_state = stag_state > 0;
- stag_idx = (*stag) >> 8;
-
- if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) {
- stag_idx = cxio_hal_get_stag(rdev_p->rscp);
- if (!stag_idx)
- return -ENOMEM;
- *stag = (stag_idx << 8) | ((*stag) & 0xFF);
- }
- PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
- __func__, stag_state, type, pdid, stag_idx);
-
- mutex_lock(&rdev_p->ctrl_qp.lock);
-
- /* write TPT entry */
- if (reset_tpt_entry)
- memset(&tpt, 0, sizeof(tpt));
- else {
- tpt.valid_stag_pdid = cpu_to_be32(F_TPT_VALID |
- V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) |
- V_TPT_STAG_STATE(stag_state) |
- V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid));
- BUG_ON(page_size >= 28);
- tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) |
- ((perm & TPT_MW_BIND) ? F_TPT_MW_BIND_ENABLE : 0) |
- V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
- V_TPT_PAGE_SIZE(page_size));
- tpt.rsvd_pbl_addr = cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
- tpt.len = cpu_to_be32(len);
- tpt.va_hi = cpu_to_be32((u32) (to >> 32));
- tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
- tpt.rsvd_bind_cnt_or_pstag = 0;
- tpt.rsvd_pbl_size = cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
- }
- err = cxio_hal_ctrl_qp_write_mem(rdev_p,
- stag_idx +
- (rdev_p->rnic_info.tpt_base >> 5),
- sizeof(tpt), &tpt);
-
- /* release the stag index to free pool */
- if (reset_tpt_entry)
- cxio_hal_put_stag(rdev_p->rscp, stag_idx);
-
- wptr = rdev_p->ctrl_qp.wptr;
- mutex_unlock(&rdev_p->ctrl_qp.lock);
- if (!err)
- if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
- SEQ32_GE(rdev_p->ctrl_qp.rptr,
- wptr)))
- return -ERESTARTSYS;
- return err;
-}
-
-int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
- u32 pbl_addr, u32 pbl_size)
-{
- u32 wptr;
- int err;
-
- PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
- __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
- pbl_size);
-
- mutex_lock(&rdev_p->ctrl_qp.lock);
- err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
- pbl);
- wptr = rdev_p->ctrl_qp.wptr;
- mutex_unlock(&rdev_p->ctrl_qp.lock);
- if (err)
- return err;
-
- if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
- SEQ32_GE(rdev_p->ctrl_qp.rptr,
- wptr)))
- return -ERESTARTSYS;
-
- return 0;
-}
-
-int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
- enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, u32 pbl_size, u32 pbl_addr)
-{
- *stag = T3_STAG_UNSET;
- return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
- zbva, to, len, page_size, pbl_size, pbl_addr);
-}
-
-int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
- enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, u32 pbl_size, u32 pbl_addr)
-{
- return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
- zbva, to, len, page_size, pbl_size, pbl_addr);
-}
-
-int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
- u32 pbl_addr)
-{
- return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
- pbl_size, pbl_addr);
-}
-
-int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
-{
- *stag = T3_STAG_UNSET;
- return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
- 0, 0);
-}
-
-int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
-{
- return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
- 0, 0);
-}
-
-int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr)
-{
- *stag = T3_STAG_UNSET;
- return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR,
- 0, 0, 0ULL, 0, 0, pbl_size, pbl_addr);
-}
-
-int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
-{
- struct t3_rdma_init_wr *wqe;
- struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC);
- if (!skb)
- return -ENOMEM;
- PDBG("%s rdev_p %p\n", __func__, rdev_p);
- wqe = (struct t3_rdma_init_wr *) __skb_put(skb, sizeof(*wqe));
- wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT));
- wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) |
- V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
- wqe->wrid.id1 = 0;
- wqe->qpid = cpu_to_be32(attr->qpid);
- wqe->pdid = cpu_to_be32(attr->pdid);
- wqe->scqid = cpu_to_be32(attr->scqid);
- wqe->rcqid = cpu_to_be32(attr->rcqid);
- wqe->rq_addr = cpu_to_be32(attr->rq_addr - rdev_p->rnic_info.rqt_base);
- wqe->rq_size = cpu_to_be32(attr->rq_size);
- wqe->mpaattrs = attr->mpaattrs;
- wqe->qpcaps = attr->qpcaps;
- wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
- wqe->rqe_count = cpu_to_be16(attr->rqe_count);
- wqe->flags_rtr_type = cpu_to_be16(attr->flags |
- V_RTR_TYPE(attr->rtr_type) |
- V_CHAN(attr->chan));
- wqe->ord = cpu_to_be32(attr->ord);
- wqe->ird = cpu_to_be32(attr->ird);
- wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
- wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
- wqe->irs = cpu_to_be32(attr->irs);
- skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */
- return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
-}
-
-void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
-{
- cxio_ev_cb = ev_cb;
-}
-
-void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
-{
- cxio_ev_cb = NULL;
-}
-
-static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb)
-{
- static int cnt;
- struct cxio_rdev *rdev_p = NULL;
- struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
- PDBG("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x"
- " se %0x notify %0x cqbranch %0x creditth %0x\n",
- cnt, __func__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
- RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg),
- RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
- RSPQ_CREDIT_THRESH(rsp_msg));
- PDBG("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d "
- "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe),
- CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
- CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe),
- CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
- rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
- if (!rdev_p) {
- PDBG("%s called by t3cdev %p with null ulp\n", __func__,
- t3cdev_p);
- return 0;
- }
- if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
- rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1;
- wake_up_interruptible(&rdev_p->ctrl_qp.waitq);
- dev_kfree_skb_irq(skb);
- } else if (CQE_QPID(rsp_msg->cqe) == 0xfff8)
- dev_kfree_skb_irq(skb);
- else if (cxio_ev_cb)
- (*cxio_ev_cb) (rdev_p, skb);
- else
- dev_kfree_skb_irq(skb);
- cnt++;
- return 0;
-}
-
-/* Caller takes care of locking if needed */
-int cxio_rdev_open(struct cxio_rdev *rdev_p)
-{
- struct net_device *netdev_p = NULL;
- int err = 0;
- if (strlen(rdev_p->dev_name)) {
- if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) {
- return -EBUSY;
- }
- netdev_p = dev_get_by_name(&init_net, rdev_p->dev_name);
- if (!netdev_p) {
- return -EINVAL;
- }
- dev_put(netdev_p);
- } else if (rdev_p->t3cdev_p) {
- if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) {
- return -EBUSY;
- }
- netdev_p = rdev_p->t3cdev_p->lldev;
- strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
- T3_MAX_DEV_NAME_LEN);
- } else {
- PDBG("%s t3cdev_p or dev_name must be set\n", __func__);
- return -EINVAL;
- }
-
- list_add_tail(&rdev_p->entry, &rdev_list);
-
- PDBG("%s opening rnic dev %s\n", __func__, rdev_p->dev_name);
- memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
- if (!rdev_p->t3cdev_p)
- rdev_p->t3cdev_p = dev2t3cdev(netdev_p);
- rdev_p->t3cdev_p->ulp = (void *) rdev_p;
-
- err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_EMBEDDED_INFO,
- &(rdev_p->fw_info));
- if (err) {
- printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
- __func__, rdev_p->t3cdev_p, err);
- goto err1;
- }
- if (G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers) != CXIO_FW_MAJ) {
- printk(KERN_ERR MOD "fatal firmware version mismatch: "
- "need version %u but adapter has version %u\n",
- CXIO_FW_MAJ,
- G_FW_VERSION_MAJOR(rdev_p->fw_info.fw_vers));
- err = -EINVAL;
- goto err1;
- }
-
- err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
- &(rdev_p->rnic_info));
- if (err) {
- printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
- __func__, rdev_p->t3cdev_p, err);
- goto err1;
- }
- err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
- &(rdev_p->port_info));
- if (err) {
- printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
- __func__, rdev_p->t3cdev_p, err);
- goto err1;
- }
-
- /*
- * qpshift is the number of bits to shift the qpid left in order
- * to get the correct address of the doorbell for that qp.
- */
- cxio_init_ucontext(rdev_p, &rdev_p->uctx);
- rdev_p->qpshift = PAGE_SHIFT -
- ilog2(65536 >>
- ilog2(rdev_p->rnic_info.udbell_len >>
- PAGE_SHIFT));
- rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
- rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
- PDBG("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d "
- "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n",
- __func__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
- rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p),
- rdev_p->rnic_info.pbl_base,
- rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
- rdev_p->rnic_info.rqt_top);
- PDBG("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu "
- "qpnr %d qpmask 0x%x\n",
- rdev_p->rnic_info.udbell_len,
- rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
- rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
-
- err = cxio_hal_init_ctrl_qp(rdev_p);
- if (err) {
- printk(KERN_ERR "%s error %d initializing ctrl_qp.\n",
- __func__, err);
- goto err1;
- }
- err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0,
- 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ,
- T3_MAX_NUM_PD);
- if (err) {
- printk(KERN_ERR "%s error %d initializing hal resources.\n",
- __func__, err);
- goto err2;
- }
- err = cxio_hal_pblpool_create(rdev_p);
- if (err) {
- printk(KERN_ERR "%s error %d initializing pbl mem pool.\n",
- __func__, err);
- goto err3;
- }
- err = cxio_hal_rqtpool_create(rdev_p);
- if (err) {
- printk(KERN_ERR "%s error %d initializing rqt mem pool.\n",
- __func__, err);
- goto err4;
- }
- return 0;
-err4:
- cxio_hal_pblpool_destroy(rdev_p);
-err3:
- cxio_hal_destroy_resource(rdev_p->rscp);
-err2:
- cxio_hal_destroy_ctrl_qp(rdev_p);
-err1:
- rdev_p->t3cdev_p->ulp = NULL;
- list_del(&rdev_p->entry);
- return err;
-}
-
-void cxio_rdev_close(struct cxio_rdev *rdev_p)
-{
- if (rdev_p) {
- cxio_hal_pblpool_destroy(rdev_p);
- cxio_hal_rqtpool_destroy(rdev_p);
- list_del(&rdev_p->entry);
- cxio_hal_destroy_ctrl_qp(rdev_p);
- cxio_hal_destroy_resource(rdev_p->rscp);
- rdev_p->t3cdev_p->ulp = NULL;
- }
-}
-
-int __init cxio_hal_init(void)
-{
- if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI))
- return -ENOMEM;
- t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
- return 0;
-}
-
-void __exit cxio_hal_exit(void)
-{
- struct cxio_rdev *rdev, *tmp;
-
- t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL);
- list_for_each_entry_safe(rdev, tmp, &rdev_list, entry)
- cxio_rdev_close(rdev);
- cxio_hal_destroy_rhdl_resource();
-}
-
-static void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
-{
- struct t3_swsq *sqp;
- __u32 ptr = wq->sq_rptr;
- int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr);
-
- sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
- while (count--)
- if (!sqp->signaled) {
- ptr++;
- sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
- } else if (sqp->complete) {
-
- /*
- * Insert this completed cqe into the swcq.
- */
- PDBG("%s moving cqe into swcq sq idx %ld cq idx %ld\n",
- __func__, Q_PTR2IDX(ptr, wq->sq_size_log2),
- Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
- sqp->cqe.header |= htonl(V_CQE_SWCQE(1));
- *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2))
- = sqp->cqe;
- cq->sw_wptr++;
- sqp->signaled = 0;
- break;
- } else
- break;
-}
-
-static void create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe,
- struct t3_cqe *read_cqe)
-{
- read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr;
- read_cqe->len = wq->oldest_read->read_len;
- read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) |
- V_CQE_SWCQE(SW_CQE(*hw_cqe)) |
- V_CQE_OPCODE(T3_READ_REQ) |
- V_CQE_TYPE(1));
-}
-
-/*
- * Return a ptr to the next read wr in the SWSQ or NULL.
- */
-static void advance_oldest_read(struct t3_wq *wq)
-{
-
- u32 rptr = wq->oldest_read - wq->sq + 1;
- u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2);
-
- while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) {
- wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2);
-
- if (wq->oldest_read->opcode == T3_READ_REQ)
- return;
- rptr++;
- }
- wq->oldest_read = NULL;
-}
-
-/*
- * cxio_poll_cq
- *
- * Caller must:
- * check the validity of the first CQE,
- * supply the wq assicated with the qpid.
- *
- * credit: cq credit to return to sge.
- * cqe_flushed: 1 iff the CQE is flushed.
- * cqe: copy of the polled CQE.
- *
- * return value:
- * 0 CQE returned,
- * -1 CQE skipped, try again.
- */
-int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
- u8 *cqe_flushed, u64 *cookie, u32 *credit)
-{
- int ret = 0;
- struct t3_cqe *hw_cqe, read_cqe;
-
- *cqe_flushed = 0;
- *credit = 0;
- hw_cqe = cxio_next_cqe(cq);
-
- PDBG("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x"
- " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- __func__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
- CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe),
- CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
- CQE_WRID_LOW(*hw_cqe));
-
- /*
- * skip cqe's not affiliated with a QP.
- */
- if (wq == NULL) {
- ret = -1;
- goto skip_cqe;
- }
-
- /*
- * Gotta tweak READ completions:
- * 1) the cqe doesn't contain the sq_wptr from the wr.
- * 2) opcode not reflected from the wr.
- * 3) read_len not reflected from the wr.
- * 4) cq_type is RQ_TYPE not SQ_TYPE.
- */
- if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) {
-
- /*
- * If this is an unsolicited read response, then the read
- * was generated by the kernel driver as part of peer-2-peer
- * connection setup. So ignore the completion.
- */
- if (!wq->oldest_read) {
- if (CQE_STATUS(*hw_cqe))
- wq->error = 1;
- ret = -1;
- goto skip_cqe;
- }
-
- /*
- * Don't write to the HWCQ, so create a new read req CQE
- * in local memory.
- */
- create_read_req_cqe(wq, hw_cqe, &read_cqe);
- hw_cqe = &read_cqe;
- advance_oldest_read(wq);
- }
-
- /*
- * T3A: Discard TERMINATE CQEs.
- */
- if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) {
- ret = -1;
- wq->error = 1;
- goto skip_cqe;
- }
-
- if (CQE_STATUS(*hw_cqe) || wq->error) {
- *cqe_flushed = wq->error;
- wq->error = 1;
-
- /*
- * T3A inserts errors into the CQE. We cannot return
- * these as work completions.
- */
- /* incoming write failures */
- if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE)
- && RQ_TYPE(*hw_cqe)) {
- ret = -1;
- goto skip_cqe;
- }
- /* incoming read request failures */
- if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) {
- ret = -1;
- goto skip_cqe;
- }
-
- /* incoming SEND with no receive posted failures */
- if (CQE_SEND_OPCODE(*hw_cqe) && RQ_TYPE(*hw_cqe) &&
- Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
- ret = -1;
- goto skip_cqe;
- }
- BUG_ON((*cqe_flushed == 0) && !SW_CQE(*hw_cqe));
- goto proc_cqe;
- }
-
- /*
- * RECV completion.
- */
- if (RQ_TYPE(*hw_cqe)) {
-
- /*
- * HW only validates 4 bits of MSN. So we must validate that
- * the MSN in the SEND is the next expected MSN. If its not,
- * then we complete this with TPT_ERR_MSN and mark the wq in
- * error.
- */
-
- if (Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
- wq->error = 1;
- ret = -1;
- goto skip_cqe;
- }
-
- if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) {
- wq->error = 1;
- hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN));
- goto proc_cqe;
- }
- goto proc_cqe;
- }
-
- /*
- * If we get here its a send completion.
- *
- * Handle out of order completion. These get stuffed
- * in the SW SQ. Then the SW SQ is walked to move any
- * now in-order completions into the SW CQ. This handles
- * 2 cases:
- * 1) reaping unsignaled WRs when the first subsequent
- * signaled WR is completed.
- * 2) out of order read completions.
- */
- if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) {
- struct t3_swsq *sqp;
-
- PDBG("%s out of order completion going in swsq at idx %ld\n",
- __func__,
- Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2));
- sqp = wq->sq +
- Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2);
- sqp->cqe = *hw_cqe;
- sqp->complete = 1;
- ret = -1;
- goto flush_wq;
- }
-
-proc_cqe:
- *cqe = *hw_cqe;
-
- /*
- * Reap the associated WR(s) that are freed up with this
- * completion.
- */
- if (SQ_TYPE(*hw_cqe)) {
- wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
- PDBG("%s completing sq idx %ld\n", __func__,
- Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
- *cookie = wq->sq[Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)].wr_id;
- wq->sq_rptr++;
- } else {
- PDBG("%s completing rq idx %ld\n", __func__,
- Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
- *cookie = wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].wr_id;
- if (wq->rq[Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)].pbl_addr)
- cxio_hal_pblpool_free(wq->rdev,
- wq->rq[Q_PTR2IDX(wq->rq_rptr,
- wq->rq_size_log2)].pbl_addr, T3_STAG0_PBL_SIZE);
- BUG_ON(Q_EMPTY(wq->rq_rptr, wq->rq_wptr));
- wq->rq_rptr++;
- }
-
-flush_wq:
- /*
- * Flush any completed cqes that are now in-order.
- */
- flush_completed_wrs(wq, cq);
-
-skip_cqe:
- if (SW_CQE(*hw_cqe)) {
- PDBG("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n",
- __func__, cq, cq->cqid, cq->sw_rptr);
- ++cq->sw_rptr;
- } else {
- PDBG("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n",
- __func__, cq, cq->cqid, cq->rptr);
- ++cq->rptr;
-
- /*
- * T3A: compute credits.
- */
- if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1)))
- || ((cq->rptr - cq->wptr) >= 128)) {
- *credit = cq->rptr - cq->wptr;
- cq->wptr = cq->rptr;
- }
- }
- return ret;
-}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
deleted file mode 100644
index 78fbe9ffe7f0..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __CXIO_HAL_H__
-#define __CXIO_HAL_H__
-
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/kfifo.h>
-
-#include "t3_cpl.h"
-#include "t3cdev.h"
-#include "cxgb3_ctl_defs.h"
-#include "cxio_wr.h"
-
-#define T3_CTRL_QP_ID FW_RI_SGEEC_START
-#define T3_CTL_QP_TID FW_RI_TID_START
-#define T3_CTRL_QP_SIZE_LOG2 8
-#define T3_CTRL_CQ_ID 0
-
-#define T3_MAX_NUM_RI (1<<15)
-#define T3_MAX_NUM_QP (1<<15)
-#define T3_MAX_NUM_CQ (1<<15)
-#define T3_MAX_NUM_PD (1<<15)
-#define T3_MAX_PBL_SIZE 256
-#define T3_MAX_RQ_SIZE 1024
-#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1)
-#define T3_MAX_CQ_DEPTH 65536
-#define T3_MAX_NUM_STAG (1<<15)
-#define T3_MAX_MR_SIZE 0x100000000ULL
-#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
-
-#define T3_STAG_UNSET 0xffffffff
-
-#define T3_MAX_DEV_NAME_LEN 32
-
-#define CXIO_FW_MAJ 7
-
-struct cxio_hal_ctrl_qp {
- u32 wptr;
- u32 rptr;
- struct mutex lock; /* for the wtpr, can sleep */
- wait_queue_head_t waitq;/* wait for RspQ/CQE msg */
- union t3_wr *workq; /* the work request queue */
- dma_addr_t dma_addr; /* pci bus address of the workq */
- DEFINE_DMA_UNMAP_ADDR(mapping);
- void __iomem *doorbell;
-};
-
-struct cxio_hal_resource {
- struct kfifo tpt_fifo;
- spinlock_t tpt_fifo_lock;
- struct kfifo qpid_fifo;
- spinlock_t qpid_fifo_lock;
- struct kfifo cqid_fifo;
- spinlock_t cqid_fifo_lock;
- struct kfifo pdid_fifo;
- spinlock_t pdid_fifo_lock;
-};
-
-struct cxio_qpid_list {
- struct list_head entry;
- u32 qpid;
-};
-
-struct cxio_ucontext {
- struct list_head qpids;
- struct mutex lock;
-};
-
-struct cxio_rdev {
- char dev_name[T3_MAX_DEV_NAME_LEN];
- struct t3cdev *t3cdev_p;
- struct rdma_info rnic_info;
- struct adap_ports port_info;
- struct cxio_hal_resource *rscp;
- struct cxio_hal_ctrl_qp ctrl_qp;
- void *ulp;
- unsigned long qpshift;
- u32 qpnr;
- u32 qpmask;
- struct cxio_ucontext uctx;
- struct gen_pool *pbl_pool;
- struct gen_pool *rqt_pool;
- struct list_head entry;
- struct ch_embedded_info fw_info;
- u32 flags;
-#define CXIO_ERROR_FATAL 1
-};
-
-static inline int cxio_fatal_error(struct cxio_rdev *rdev_p)
-{
- return rdev_p->flags & CXIO_ERROR_FATAL;
-}
-
-static inline int cxio_num_stags(struct cxio_rdev *rdev_p)
-{
- return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5));
-}
-
-typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p,
- struct sk_buff * skb);
-
-#define RSPQ_CQID(rsp) (be32_to_cpu(rsp->cq_ptrid) & 0xffff)
-#define RSPQ_CQPTR(rsp) ((be32_to_cpu(rsp->cq_ptrid) >> 16) & 0xffff)
-#define RSPQ_GENBIT(rsp) ((be32_to_cpu(rsp->flags) >> 16) & 1)
-#define RSPQ_OVERFLOW(rsp) ((be32_to_cpu(rsp->flags) >> 17) & 1)
-#define RSPQ_AN(rsp) ((be32_to_cpu(rsp->flags) >> 18) & 1)
-#define RSPQ_SE(rsp) ((be32_to_cpu(rsp->flags) >> 19) & 1)
-#define RSPQ_NOTIFY(rsp) ((be32_to_cpu(rsp->flags) >> 20) & 1)
-#define RSPQ_CQBRANCH(rsp) ((be32_to_cpu(rsp->flags) >> 21) & 1)
-#define RSPQ_CREDIT_THRESH(rsp) ((be32_to_cpu(rsp->flags) >> 22) & 1)
-
-struct respQ_msg_t {
- __be32 flags; /* flit 0 */
- __be32 cq_ptrid;
- __be64 rsvd; /* flit 1 */
- struct t3_cqe cqe; /* flits 2-3 */
-};
-
-enum t3_cq_opcode {
- CQ_ARM_AN = 0x2,
- CQ_ARM_SE = 0x6,
- CQ_FORCE_AN = 0x3,
- CQ_CREDIT_UPDATE = 0x7
-};
-
-int cxio_rdev_open(struct cxio_rdev *rdev);
-void cxio_rdev_close(struct cxio_rdev *rdev);
-int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
- enum t3_cq_opcode op, u32 credit);
-int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel);
-int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
-int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
-void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
-void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
-int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
- struct cxio_ucontext *uctx);
-int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
- struct cxio_ucontext *uctx);
-int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
-int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
- u32 pbl_addr, u32 pbl_size);
-int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
- enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, u32 pbl_size, u32 pbl_addr);
-int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
- enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
- u8 page_size, u32 pbl_size, u32 pbl_addr);
-int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
- u32 pbl_addr);
-int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
-int cxio_allocate_stag(struct cxio_rdev *rdev, u32 *stag, u32 pdid, u32 pbl_size, u32 pbl_addr);
-int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
-int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
-void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
-void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
-u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
-void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
-int __init cxio_hal_init(void);
-void __exit cxio_hal_exit(void);
-int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
-int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
-void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
-void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
-void cxio_flush_hw_cq(struct t3_cq *cq);
-int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
- u8 *cqe_flushed, u64 *cookie, u32 *credit);
-int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb);
-
-#define MOD "iw_cxgb3: "
-#define PDBG(fmt, args...) pr_debug(MOD fmt, ## args)
-
-#ifdef DEBUG
-void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag);
-void cxio_dump_pbl(struct cxio_rdev *rev, u32 pbl_addr, uint len, u8 shift);
-void cxio_dump_wqe(union t3_wr *wqe);
-void cxio_dump_wce(struct t3_cqe *wce);
-void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents);
-void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid);
-#endif
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
deleted file mode 100644
index c40088ecf9f3..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ /dev/null
@@ -1,343 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-/* Crude resource management */
-#include <linux/kernel.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/kfifo.h>
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include "cxio_resource.h"
-#include "cxio_hal.h"
-
-static struct kfifo rhdl_fifo;
-static spinlock_t rhdl_fifo_lock;
-
-#define RANDOM_SIZE 16
-
-static int __cxio_init_resource_fifo(struct kfifo *fifo,
- spinlock_t *fifo_lock,
- u32 nr, u32 skip_low,
- u32 skip_high,
- int random)
-{
- u32 i, j, entry = 0, idx;
- u32 random_bytes;
- u32 rarray[16];
- spin_lock_init(fifo_lock);
-
- if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL))
- return -ENOMEM;
-
- for (i = 0; i < skip_low + skip_high; i++)
- kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32));
- if (random) {
- j = 0;
- random_bytes = prandom_u32();
- for (i = 0; i < RANDOM_SIZE; i++)
- rarray[i] = i + skip_low;
- for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) {
- if (j >= RANDOM_SIZE) {
- j = 0;
- random_bytes = prandom_u32();
- }
- idx = (random_bytes >> (j * 2)) & 0xF;
- kfifo_in(fifo,
- (unsigned char *) &rarray[idx],
- sizeof(u32));
- rarray[idx] = i;
- j++;
- }
- for (i = 0; i < RANDOM_SIZE; i++)
- kfifo_in(fifo,
- (unsigned char *) &rarray[i],
- sizeof(u32));
- } else
- for (i = skip_low; i < nr - skip_high; i++)
- kfifo_in(fifo, (unsigned char *) &i, sizeof(u32));
-
- for (i = 0; i < skip_low + skip_high; i++)
- if (kfifo_out_locked(fifo, (unsigned char *) &entry,
- sizeof(u32), fifo_lock) != sizeof(u32))
- break;
- return 0;
-}
-
-static int cxio_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock,
- u32 nr, u32 skip_low, u32 skip_high)
-{
- return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
- skip_high, 0));
-}
-
-static int cxio_init_resource_fifo_random(struct kfifo *fifo,
- spinlock_t * fifo_lock,
- u32 nr, u32 skip_low, u32 skip_high)
-{
-
- return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
- skip_high, 1));
-}
-
-static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
-{
- u32 i;
-
- spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
-
- if (kfifo_alloc(&rdev_p->rscp->qpid_fifo, T3_MAX_NUM_QP * sizeof(u32),
- GFP_KERNEL))
- return -ENOMEM;
-
- for (i = 16; i < T3_MAX_NUM_QP; i++)
- if (!(i & rdev_p->qpmask))
- kfifo_in(&rdev_p->rscp->qpid_fifo,
- (unsigned char *) &i, sizeof(u32));
- return 0;
-}
-
-int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
-{
- return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1,
- 0);
-}
-
-void cxio_hal_destroy_rhdl_resource(void)
-{
- kfifo_free(&rhdl_fifo);
-}
-
-/* nr_* must be power of 2 */
-int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
- u32 nr_tpt, u32 nr_pbl,
- u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid)
-{
- int err = 0;
- struct cxio_hal_resource *rscp;
-
- rscp = kmalloc(sizeof(*rscp), GFP_KERNEL);
- if (!rscp)
- return -ENOMEM;
- rdev_p->rscp = rscp;
- err = cxio_init_resource_fifo_random(&rscp->tpt_fifo,
- &rscp->tpt_fifo_lock,
- nr_tpt, 1, 0);
- if (err)
- goto tpt_err;
- err = cxio_init_qpid_fifo(rdev_p);
- if (err)
- goto qpid_err;
- err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock,
- nr_cqid, 1, 0);
- if (err)
- goto cqid_err;
- err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock,
- nr_pdid, 1, 0);
- if (err)
- goto pdid_err;
- return 0;
-pdid_err:
- kfifo_free(&rscp->cqid_fifo);
-cqid_err:
- kfifo_free(&rscp->qpid_fifo);
-qpid_err:
- kfifo_free(&rscp->tpt_fifo);
-tpt_err:
- return -ENOMEM;
-}
-
-/*
- * returns 0 if no resource available
- */
-static u32 cxio_hal_get_resource(struct kfifo *fifo, spinlock_t * lock)
-{
- u32 entry;
- if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
- return entry;
- else
- return 0; /* fifo emptry */
-}
-
-static void cxio_hal_put_resource(struct kfifo *fifo, spinlock_t * lock,
- u32 entry)
-{
- BUG_ON(
- kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock)
- == 0);
-}
-
-u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
-{
- return cxio_hal_get_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock);
-}
-
-void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
-{
- cxio_hal_put_resource(&rscp->tpt_fifo, &rscp->tpt_fifo_lock, stag);
-}
-
-u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
-{
- u32 qpid = cxio_hal_get_resource(&rscp->qpid_fifo,
- &rscp->qpid_fifo_lock);
- PDBG("%s qpid 0x%x\n", __func__, qpid);
- return qpid;
-}
-
-void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
-{
- PDBG("%s qpid 0x%x\n", __func__, qpid);
- cxio_hal_put_resource(&rscp->qpid_fifo, &rscp->qpid_fifo_lock, qpid);
-}
-
-u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
-{
- return cxio_hal_get_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock);
-}
-
-void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
-{
- cxio_hal_put_resource(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, cqid);
-}
-
-u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
-{
- return cxio_hal_get_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock);
-}
-
-void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
-{
- cxio_hal_put_resource(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, pdid);
-}
-
-void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
-{
- kfifo_free(&rscp->tpt_fifo);
- kfifo_free(&rscp->cqid_fifo);
- kfifo_free(&rscp->qpid_fifo);
- kfifo_free(&rscp->pdid_fifo);
- kfree(rscp);
-}
-
-/*
- * PBL Memory Manager. Uses Linux generic allocator.
- */
-
-#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */
-
-u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
-{
- unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
- return (u32)addr;
-}
-
-void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
-{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
- gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size);
-}
-
-int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
-{
- unsigned pbl_start, pbl_chunk;
-
- rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1);
- if (!rdev_p->pbl_pool)
- return -ENOMEM;
-
- pbl_start = rdev_p->rnic_info.pbl_base;
- pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1;
-
- while (pbl_start < rdev_p->rnic_info.pbl_top) {
- pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1,
- pbl_chunk);
- if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) {
- PDBG("%s failed to add PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
- if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
- printk(KERN_WARNING MOD "%s: Failed to add all PBL chunks (%x/%x)\n",
- __func__, pbl_start, rdev_p->rnic_info.pbl_top - pbl_start);
- return 0;
- }
- pbl_chunk >>= 1;
- } else {
- PDBG("%s added PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
- pbl_start += pbl_chunk;
- }
- }
-
- return 0;
-}
-
-void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
-{
- gen_pool_destroy(rdev_p->pbl_pool);
-}
-
-/*
- * RQT Memory Manager. Uses Linux generic allocator.
- */
-
-#define MIN_RQT_SHIFT 10 /* 1KB == mini RQT size (16 entries) */
-#define RQT_CHUNK 2*1024*1024
-
-u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size)
-{
- unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
- return (u32)addr;
-}
-
-void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
-{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6);
- gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6);
-}
-
-int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p)
-{
- unsigned long i;
- rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1);
- if (rdev_p->rqt_pool)
- for (i = rdev_p->rnic_info.rqt_base;
- i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1;
- i += RQT_CHUNK)
- gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1);
- return rdev_p->rqt_pool ? 0 : -ENOMEM;
-}
-
-void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p)
-{
- gen_pool_destroy(rdev_p->rqt_pool);
-}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h
deleted file mode 100644
index a2703a3d882d..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __CXIO_RESOURCE_H__
-#define __CXIO_RESOURCE_H__
-
-#include <linux/kernel.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/kfifo.h>
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/genalloc.h>
-#include "cxio_hal.h"
-
-extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl);
-extern void cxio_hal_destroy_rhdl_resource(void);
-extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
- u32 nr_tpt, u32 nr_pbl,
- u32 nr_rqt, u32 nr_qpid, u32 nr_cqid,
- u32 nr_pdid);
-extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp);
-extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag);
-extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp);
-extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid);
-extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp);
-extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid);
-extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp);
-
-#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base )
-extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p);
-extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p);
-extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size);
-extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
-
-#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base )
-extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p);
-extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p);
-extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size);
-extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
deleted file mode 100644
index 83d2e19d31ae..000000000000
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ /dev/null
@@ -1,802 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __CXIO_WR_H__
-#define __CXIO_WR_H__
-
-#include <asm/io.h>
-#include <linux/pci.h>
-#include <linux/timer.h>
-#include "firmware_exports.h"
-
-#define T3_MAX_SGE 4
-#define T3_MAX_INLINE 64
-#define T3_STAG0_PBL_SIZE (2 * T3_MAX_SGE << 3)
-#define T3_STAG0_MAX_PBE_LEN (128 * 1024 * 1024)
-#define T3_STAG0_PAGE_SHIFT 15
-
-#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
-#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \
- ((rptr)!=(wptr)) )
-#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1))
-#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<<size_log2)-((wptr)-(rptr)))
-#define Q_COUNT(rptr,wptr) ((wptr)-(rptr))
-#define Q_PTR2IDX(ptr,size_log2) (ptr & ((1UL<<size_log2)-1))
-
-static inline void ring_doorbell(void __iomem *doorbell, u32 qpid)
-{
- writel(((1<<31) | qpid), doorbell);
-}
-
-#define SEQ32_GE(x,y) (!( (((u32) (x)) - ((u32) (y))) & 0x80000000 ))
-
-enum t3_wr_flags {
- T3_COMPLETION_FLAG = 0x01,
- T3_NOTIFY_FLAG = 0x02,
- T3_SOLICITED_EVENT_FLAG = 0x04,
- T3_READ_FENCE_FLAG = 0x08,
- T3_LOCAL_FENCE_FLAG = 0x10
-} __attribute__ ((packed));
-
-enum t3_wr_opcode {
- T3_WR_BP = FW_WROPCODE_RI_BYPASS,
- T3_WR_SEND = FW_WROPCODE_RI_SEND,
- T3_WR_WRITE = FW_WROPCODE_RI_RDMA_WRITE,
- T3_WR_READ = FW_WROPCODE_RI_RDMA_READ,
- T3_WR_INV_STAG = FW_WROPCODE_RI_LOCAL_INV,
- T3_WR_BIND = FW_WROPCODE_RI_BIND_MW,
- T3_WR_RCV = FW_WROPCODE_RI_RECEIVE,
- T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT,
- T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP,
- T3_WR_FASTREG = FW_WROPCODE_RI_FASTREGISTER_MR
-} __attribute__ ((packed));
-
-enum t3_rdma_opcode {
- T3_RDMA_WRITE, /* IETF RDMAP v1.0 ... */
- T3_READ_REQ,
- T3_READ_RESP,
- T3_SEND,
- T3_SEND_WITH_INV,
- T3_SEND_WITH_SE,
- T3_SEND_WITH_SE_INV,
- T3_TERMINATE,
- T3_RDMA_INIT, /* CHELSIO RI specific ... */
- T3_BIND_MW,
- T3_FAST_REGISTER,
- T3_LOCAL_INV,
- T3_QP_MOD,
- T3_BYPASS,
- T3_RDMA_READ_REQ_WITH_INV,
-} __attribute__ ((packed));
-
-static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop)
-{
- switch (wrop) {
- case T3_WR_BP: return T3_BYPASS;
- case T3_WR_SEND: return T3_SEND;
- case T3_WR_WRITE: return T3_RDMA_WRITE;
- case T3_WR_READ: return T3_READ_REQ;
- case T3_WR_INV_STAG: return T3_LOCAL_INV;
- case T3_WR_BIND: return T3_BIND_MW;
- case T3_WR_INIT: return T3_RDMA_INIT;
- case T3_WR_QP_MOD: return T3_QP_MOD;
- case T3_WR_FASTREG: return T3_FAST_REGISTER;
- default: break;
- }
- return -1;
-}
-
-
-/* Work request id */
-union t3_wrid {
- struct {
- u32 hi;
- u32 low;
- } id0;
- u64 id1;
-};
-
-#define WRID(wrid) (wrid.id1)
-#define WRID_GEN(wrid) (wrid.id0.wr_gen)
-#define WRID_IDX(wrid) (wrid.id0.wr_idx)
-#define WRID_LO(wrid) (wrid.id0.wr_lo)
-
-struct fw_riwrh {
- __be32 op_seop_flags;
- __be32 gen_tid_len;
-};
-
-#define S_FW_RIWR_OP 24
-#define M_FW_RIWR_OP 0xff
-#define V_FW_RIWR_OP(x) ((x) << S_FW_RIWR_OP)
-#define G_FW_RIWR_OP(x) ((((x) >> S_FW_RIWR_OP)) & M_FW_RIWR_OP)
-
-#define S_FW_RIWR_SOPEOP 22
-#define M_FW_RIWR_SOPEOP 0x3
-#define V_FW_RIWR_SOPEOP(x) ((x) << S_FW_RIWR_SOPEOP)
-
-#define S_FW_RIWR_FLAGS 8
-#define M_FW_RIWR_FLAGS 0x3fffff
-#define V_FW_RIWR_FLAGS(x) ((x) << S_FW_RIWR_FLAGS)
-#define G_FW_RIWR_FLAGS(x) ((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS)
-
-#define S_FW_RIWR_TID 8
-#define V_FW_RIWR_TID(x) ((x) << S_FW_RIWR_TID)
-
-#define S_FW_RIWR_LEN 0
-#define V_FW_RIWR_LEN(x) ((x) << S_FW_RIWR_LEN)
-
-#define S_FW_RIWR_GEN 31
-#define V_FW_RIWR_GEN(x) ((x) << S_FW_RIWR_GEN)
-
-struct t3_sge {
- __be32 stag;
- __be32 len;
- __be64 to;
-};
-
-/* If num_sgle is zero, flit 5+ contains immediate data.*/
-struct t3_send_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
-
- u8 rdmaop; /* 2 */
- u8 reserved[3];
- __be32 rem_stag;
- __be32 plen; /* 3 */
- __be32 num_sgle;
- struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */
-};
-
-#define T3_MAX_FASTREG_DEPTH 10
-#define T3_MAX_FASTREG_FRAG 10
-
-struct t3_fastreg_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- __be32 stag; /* 2 */
- __be32 len;
- __be32 va_base_hi; /* 3 */
- __be32 va_base_lo_fbo;
- __be32 page_type_perms; /* 4 */
- __be32 reserved1;
- __be64 pbl_addrs[0]; /* 5+ */
-};
-
-/*
- * If a fastreg wr spans multiple wqes, then the 2nd fragment look like this.
- */
-struct t3_pbl_frag {
- struct fw_riwrh wrh; /* 0 */
- __be64 pbl_addrs[14]; /* 1..14 */
-};
-
-#define S_FR_PAGE_COUNT 24
-#define M_FR_PAGE_COUNT 0xff
-#define V_FR_PAGE_COUNT(x) ((x) << S_FR_PAGE_COUNT)
-#define G_FR_PAGE_COUNT(x) ((((x) >> S_FR_PAGE_COUNT)) & M_FR_PAGE_COUNT)
-
-#define S_FR_PAGE_SIZE 16
-#define M_FR_PAGE_SIZE 0x1f
-#define V_FR_PAGE_SIZE(x) ((x) << S_FR_PAGE_SIZE)
-#define G_FR_PAGE_SIZE(x) ((((x) >> S_FR_PAGE_SIZE)) & M_FR_PAGE_SIZE)
-
-#define S_FR_TYPE 8
-#define M_FR_TYPE 0x1
-#define V_FR_TYPE(x) ((x) << S_FR_TYPE)
-#define G_FR_TYPE(x) ((((x) >> S_FR_TYPE)) & M_FR_TYPE)
-
-#define S_FR_PERMS 0
-#define M_FR_PERMS 0xff
-#define V_FR_PERMS(x) ((x) << S_FR_PERMS)
-#define G_FR_PERMS(x) ((((x) >> S_FR_PERMS)) & M_FR_PERMS)
-
-struct t3_local_inv_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- __be32 stag; /* 2 */
- __be32 reserved;
-};
-
-struct t3_rdma_write_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- u8 rdmaop; /* 2 */
- u8 reserved[3];
- __be32 stag_sink;
- __be64 to_sink; /* 3 */
- __be32 plen; /* 4 */
- __be32 num_sgle;
- struct t3_sge sgl[T3_MAX_SGE]; /* 5+ */
-};
-
-struct t3_rdma_read_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- u8 rdmaop; /* 2 */
- u8 local_inv;
- u8 reserved[2];
- __be32 rem_stag;
- __be64 rem_to; /* 3 */
- __be32 local_stag; /* 4 */
- __be32 local_len;
- __be64 local_to; /* 5 */
-};
-
-struct t3_bind_mw_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- u16 reserved; /* 2 */
- u8 type;
- u8 perms;
- __be32 mr_stag;
- __be32 mw_stag; /* 3 */
- __be32 mw_len;
- __be64 mw_va; /* 4 */
- __be32 mr_pbl_addr; /* 5 */
- u8 reserved2[3];
- u8 mr_pagesz;
-};
-
-struct t3_receive_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- u8 pagesz[T3_MAX_SGE];
- __be32 num_sgle; /* 2 */
- struct t3_sge sgl[T3_MAX_SGE]; /* 3+ */
- __be32 pbl_addr[T3_MAX_SGE];
-};
-
-struct t3_bypass_wr {
- struct fw_riwrh wrh;
- union t3_wrid wrid; /* 1 */
-};
-
-struct t3_modify_qp_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- __be32 flags; /* 2 */
- __be32 quiesce; /* 2 */
- __be32 max_ird; /* 3 */
- __be32 max_ord; /* 3 */
- __be64 sge_cmd; /* 4 */
- __be64 ctx1; /* 5 */
- __be64 ctx0; /* 6 */
-};
-
-enum t3_modify_qp_flags {
- MODQP_QUIESCE = 0x01,
- MODQP_MAX_IRD = 0x02,
- MODQP_MAX_ORD = 0x04,
- MODQP_WRITE_EC = 0x08,
- MODQP_READ_EC = 0x10,
-};
-
-
-enum t3_mpa_attrs {
- uP_RI_MPA_RX_MARKER_ENABLE = 0x1,
- uP_RI_MPA_TX_MARKER_ENABLE = 0x2,
- uP_RI_MPA_CRC_ENABLE = 0x4,
- uP_RI_MPA_IETF_ENABLE = 0x8
-} __attribute__ ((packed));
-
-enum t3_qp_caps {
- uP_RI_QP_RDMA_READ_ENABLE = 0x01,
- uP_RI_QP_RDMA_WRITE_ENABLE = 0x02,
- uP_RI_QP_BIND_ENABLE = 0x04,
- uP_RI_QP_FAST_REGISTER_ENABLE = 0x08,
- uP_RI_QP_STAG0_ENABLE = 0x10
-} __attribute__ ((packed));
-
-enum rdma_init_rtr_types {
- RTR_READ = 1,
- RTR_WRITE = 2,
- RTR_SEND = 3,
-};
-
-#define S_RTR_TYPE 2
-#define M_RTR_TYPE 0x3
-#define V_RTR_TYPE(x) ((x) << S_RTR_TYPE)
-#define G_RTR_TYPE(x) ((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
-
-#define S_CHAN 4
-#define M_CHAN 0x3
-#define V_CHAN(x) ((x) << S_CHAN)
-#define G_CHAN(x) ((((x) >> S_CHAN)) & M_CHAN)
-
-struct t3_rdma_init_attr {
- u32 tid;
- u32 qpid;
- u32 pdid;
- u32 scqid;
- u32 rcqid;
- u32 rq_addr;
- u32 rq_size;
- enum t3_mpa_attrs mpaattrs;
- enum t3_qp_caps qpcaps;
- u16 tcp_emss;
- u32 ord;
- u32 ird;
- u64 qp_dma_addr;
- u32 qp_dma_size;
- enum rdma_init_rtr_types rtr_type;
- u16 flags;
- u16 rqe_count;
- u32 irs;
- u32 chan;
-};
-
-struct t3_rdma_init_wr {
- struct fw_riwrh wrh; /* 0 */
- union t3_wrid wrid; /* 1 */
- __be32 qpid; /* 2 */
- __be32 pdid;
- __be32 scqid; /* 3 */
- __be32 rcqid;
- __be32 rq_addr; /* 4 */
- __be32 rq_size;
- u8 mpaattrs; /* 5 */
- u8 qpcaps;
- __be16 ulpdu_size;
- __be16 flags_rtr_type;
- __be16 rqe_count;
- __be32 ord; /* 6 */
- __be32 ird;
- __be64 qp_dma_addr; /* 7 */
- __be32 qp_dma_size; /* 8 */
- __be32 irs;
-};
-
-struct t3_genbit {
- u64 flit[15];
- __be64 genbit;
-};
-
-struct t3_wq_in_err {
- u64 flit[13];
- u64 err;
-};
-
-enum rdma_init_wr_flags {
- MPA_INITIATOR = (1<<0),
- PRIV_QP = (1<<1),
-};
-
-union t3_wr {
- struct t3_send_wr send;
- struct t3_rdma_write_wr write;
- struct t3_rdma_read_wr read;
- struct t3_receive_wr recv;
- struct t3_fastreg_wr fastreg;
- struct t3_pbl_frag pbl_frag;
- struct t3_local_inv_wr local_inv;
- struct t3_bind_mw_wr bind;
- struct t3_bypass_wr bypass;
- struct t3_rdma_init_wr init;
- struct t3_modify_qp_wr qp_mod;
- struct t3_genbit genbit;
- struct t3_wq_in_err wq_in_err;
- __be64 flit[16];
-};
-
-#define T3_SQ_CQE_FLIT 13
-#define T3_SQ_COOKIE_FLIT 14
-
-#define T3_RQ_COOKIE_FLIT 13
-#define T3_RQ_CQE_FLIT 14
-
-static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe)
-{
- return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags));
-}
-
-enum t3_wr_hdr_bits {
- T3_EOP = 1,
- T3_SOP = 2,
- T3_SOPEOP = T3_EOP|T3_SOP,
-};
-
-static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op,
- enum t3_wr_flags flags, u8 genbit, u32 tid,
- u8 len, u8 sopeop)
-{
- wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) |
- V_FW_RIWR_SOPEOP(sopeop) |
- V_FW_RIWR_FLAGS(flags));
- wmb();
- wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) |
- V_FW_RIWR_TID(tid) |
- V_FW_RIWR_LEN(len));
- /* 2nd gen bit... */
- ((union t3_wr *)wqe)->genbit.genbit = cpu_to_be64(genbit);
-}
-
-/*
- * T3 ULP2_TX commands
- */
-enum t3_utx_mem_op {
- T3_UTX_MEM_READ = 2,
- T3_UTX_MEM_WRITE = 3
-};
-
-/* T3 MC7 RDMA TPT entry format */
-
-enum tpt_mem_type {
- TPT_NON_SHARED_MR = 0x0,
- TPT_SHARED_MR = 0x1,
- TPT_MW = 0x2,
- TPT_MW_RELAXED_PROTECTION = 0x3
-};
-
-enum tpt_addr_type {
- TPT_ZBTO = 0,
- TPT_VATO = 1
-};
-
-enum tpt_mem_perm {
- TPT_MW_BIND = 0x10,
- TPT_LOCAL_READ = 0x8,
- TPT_LOCAL_WRITE = 0x4,
- TPT_REMOTE_READ = 0x2,
- TPT_REMOTE_WRITE = 0x1
-};
-
-struct tpt_entry {
- __be32 valid_stag_pdid;
- __be32 flags_pagesize_qpid;
-
- __be32 rsvd_pbl_addr;
- __be32 len;
- __be32 va_hi;
- __be32 va_low_or_fbo;
-
- __be32 rsvd_bind_cnt_or_pstag;
- __be32 rsvd_pbl_size;
-};
-
-#define S_TPT_VALID 31
-#define V_TPT_VALID(x) ((x) << S_TPT_VALID)
-#define F_TPT_VALID V_TPT_VALID(1U)
-
-#define S_TPT_STAG_KEY 23
-#define M_TPT_STAG_KEY 0xFF
-#define V_TPT_STAG_KEY(x) ((x) << S_TPT_STAG_KEY)
-#define G_TPT_STAG_KEY(x) (((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY)
-
-#define S_TPT_STAG_STATE 22
-#define V_TPT_STAG_STATE(x) ((x) << S_TPT_STAG_STATE)
-#define F_TPT_STAG_STATE V_TPT_STAG_STATE(1U)
-
-#define S_TPT_STAG_TYPE 20
-#define M_TPT_STAG_TYPE 0x3
-#define V_TPT_STAG_TYPE(x) ((x) << S_TPT_STAG_TYPE)
-#define G_TPT_STAG_TYPE(x) (((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE)
-
-#define S_TPT_PDID 0
-#define M_TPT_PDID 0xFFFFF
-#define V_TPT_PDID(x) ((x) << S_TPT_PDID)
-#define G_TPT_PDID(x) (((x) >> S_TPT_PDID) & M_TPT_PDID)
-
-#define S_TPT_PERM 28
-#define M_TPT_PERM 0xF
-#define V_TPT_PERM(x) ((x) << S_TPT_PERM)
-#define G_TPT_PERM(x) (((x) >> S_TPT_PERM) & M_TPT_PERM)
-
-#define S_TPT_REM_INV_DIS 27
-#define V_TPT_REM_INV_DIS(x) ((x) << S_TPT_REM_INV_DIS)
-#define F_TPT_REM_INV_DIS V_TPT_REM_INV_DIS(1U)
-
-#define S_TPT_ADDR_TYPE 26
-#define V_TPT_ADDR_TYPE(x) ((x) << S_TPT_ADDR_TYPE)
-#define F_TPT_ADDR_TYPE V_TPT_ADDR_TYPE(1U)
-
-#define S_TPT_MW_BIND_ENABLE 25
-#define V_TPT_MW_BIND_ENABLE(x) ((x) << S_TPT_MW_BIND_ENABLE)
-#define F_TPT_MW_BIND_ENABLE V_TPT_MW_BIND_ENABLE(1U)
-
-#define S_TPT_PAGE_SIZE 20
-#define M_TPT_PAGE_SIZE 0x1F
-#define V_TPT_PAGE_SIZE(x) ((x) << S_TPT_PAGE_SIZE)
-#define G_TPT_PAGE_SIZE(x) (((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE)
-
-#define S_TPT_PBL_ADDR 0
-#define M_TPT_PBL_ADDR 0x1FFFFFFF
-#define V_TPT_PBL_ADDR(x) ((x) << S_TPT_PBL_ADDR)
-#define G_TPT_PBL_ADDR(x) (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR)
-
-#define S_TPT_QPID 0
-#define M_TPT_QPID 0xFFFFF
-#define V_TPT_QPID(x) ((x) << S_TPT_QPID)
-#define G_TPT_QPID(x) (((x) >> S_TPT_QPID) & M_TPT_QPID)
-
-#define S_TPT_PSTAG 0
-#define M_TPT_PSTAG 0xFFFFFF
-#define V_TPT_PSTAG(x) ((x) << S_TPT_PSTAG)
-#define G_TPT_PSTAG(x) (((x) >> S_TPT_PSTAG) & M_TPT_PSTAG)
-
-#define S_TPT_PBL_SIZE 0
-#define M_TPT_PBL_SIZE 0xFFFFF
-#define V_TPT_PBL_SIZE(x) ((x) << S_TPT_PBL_SIZE)
-#define G_TPT_PBL_SIZE(x) (((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE)
-
-/*
- * CQE defs
- */
-struct t3_cqe {
- __be32 header;
- __be32 len;
- union {
- struct {
- __be32 stag;
- __be32 msn;
- } rcqe;
- struct {
- u32 wrid_hi;
- u32 wrid_low;
- } scqe;
- } u;
-};
-
-#define S_CQE_OOO 31
-#define M_CQE_OOO 0x1
-#define G_CQE_OOO(x) ((((x) >> S_CQE_OOO)) & M_CQE_OOO)
-#define V_CEQ_OOO(x) ((x)<<S_CQE_OOO)
-
-#define S_CQE_QPID 12
-#define M_CQE_QPID 0x7FFFF
-#define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID)
-#define V_CQE_QPID(x) ((x)<<S_CQE_QPID)
-
-#define S_CQE_SWCQE 11
-#define M_CQE_SWCQE 0x1
-#define G_CQE_SWCQE(x) ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE)
-#define V_CQE_SWCQE(x) ((x)<<S_CQE_SWCQE)
-
-#define S_CQE_GENBIT 10
-#define M_CQE_GENBIT 0x1
-#define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT)
-#define V_CQE_GENBIT(x) ((x)<<S_CQE_GENBIT)
-
-#define S_CQE_STATUS 5
-#define M_CQE_STATUS 0x1F
-#define G_CQE_STATUS(x) ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS)
-#define V_CQE_STATUS(x) ((x)<<S_CQE_STATUS)
-
-#define S_CQE_TYPE 4
-#define M_CQE_TYPE 0x1
-#define G_CQE_TYPE(x) ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE)
-#define V_CQE_TYPE(x) ((x)<<S_CQE_TYPE)
-
-#define S_CQE_OPCODE 0
-#define M_CQE_OPCODE 0xF
-#define G_CQE_OPCODE(x) ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE)
-#define V_CQE_OPCODE(x) ((x)<<S_CQE_OPCODE)
-
-#define SW_CQE(x) (G_CQE_SWCQE(be32_to_cpu((x).header)))
-#define CQE_OOO(x) (G_CQE_OOO(be32_to_cpu((x).header)))
-#define CQE_QPID(x) (G_CQE_QPID(be32_to_cpu((x).header)))
-#define CQE_GENBIT(x) (G_CQE_GENBIT(be32_to_cpu((x).header)))
-#define CQE_TYPE(x) (G_CQE_TYPE(be32_to_cpu((x).header)))
-#define SQ_TYPE(x) (CQE_TYPE((x)))
-#define RQ_TYPE(x) (!CQE_TYPE((x)))
-#define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x).header)))
-#define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x).header)))
-
-#define CQE_SEND_OPCODE(x)( \
- (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND) || \
- (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE) || \
- (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_INV) || \
- (G_CQE_OPCODE(be32_to_cpu((x).header)) == T3_SEND_WITH_SE_INV))
-
-#define CQE_LEN(x) (be32_to_cpu((x).len))
-
-/* used for RQ completion processing */
-#define CQE_WRID_STAG(x) (be32_to_cpu((x).u.rcqe.stag))
-#define CQE_WRID_MSN(x) (be32_to_cpu((x).u.rcqe.msn))
-
-/* used for SQ completion processing */
-#define CQE_WRID_SQ_WPTR(x) ((x).u.scqe.wrid_hi)
-#define CQE_WRID_WPTR(x) ((x).u.scqe.wrid_low)
-
-/* generic accessor macros */
-#define CQE_WRID_HI(x) ((x).u.scqe.wrid_hi)
-#define CQE_WRID_LOW(x) ((x).u.scqe.wrid_low)
-
-#define TPT_ERR_SUCCESS 0x0
-#define TPT_ERR_STAG 0x1 /* STAG invalid: either the */
- /* STAG is offlimt, being 0, */
- /* or STAG_key mismatch */
-#define TPT_ERR_PDID 0x2 /* PDID mismatch */
-#define TPT_ERR_QPID 0x3 /* QPID mismatch */
-#define TPT_ERR_ACCESS 0x4 /* Invalid access right */
-#define TPT_ERR_WRAP 0x5 /* Wrap error */
-#define TPT_ERR_BOUND 0x6 /* base and bounds voilation */
-#define TPT_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */
- /* shared memory region */
-#define TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */
- /* shared memory region */
-#define TPT_ERR_ECC 0x9 /* ECC error detected */
-#define TPT_ERR_ECC_PSTAG 0xA /* ECC error detected when */
- /* reading PSTAG for a MW */
- /* Invalidate */
-#define TPT_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */
- /* software error */
-#define TPT_ERR_SWFLUSH 0xC /* SW FLUSHED */
-#define TPT_ERR_CRC 0x10 /* CRC error */
-#define TPT_ERR_MARKER 0x11 /* Marker error */
-#define TPT_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */
-#define TPT_ERR_OUT_OF_RQE 0x13 /* out of RQE */
-#define TPT_ERR_DDP_VERSION 0x14 /* wrong DDP version */
-#define TPT_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */
-#define TPT_ERR_OPCODE 0x16 /* invalid rdma opcode */
-#define TPT_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */
-#define TPT_ERR_MSN 0x18 /* MSN error */
-#define TPT_ERR_TBIT 0x19 /* tag bit not set correctly */
-#define TPT_ERR_MO 0x1A /* MO not 0 for TERMINATE */
- /* or READ_REQ */
-#define TPT_ERR_MSN_GAP 0x1B
-#define TPT_ERR_MSN_RANGE 0x1C
-#define TPT_ERR_IRD_OVERFLOW 0x1D
-#define TPT_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */
- /* software error */
-#define TPT_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */
- /* mismatch) */
-
-struct t3_swsq {
- __u64 wr_id;
- struct t3_cqe cqe;
- __u32 sq_wptr;
- __be32 read_len;
- int opcode;
- int complete;
- int signaled;
-};
-
-struct t3_swrq {
- __u64 wr_id;
- __u32 pbl_addr;
-};
-
-/*
- * A T3 WQ implements both the SQ and RQ.
- */
-struct t3_wq {
- union t3_wr *queue; /* DMA accessible memory */
- dma_addr_t dma_addr; /* DMA address for HW */
- DEFINE_DMA_UNMAP_ADDR(mapping); /* unmap kruft */
- u32 error; /* 1 once we go to ERROR */
- u32 qpid;
- u32 wptr; /* idx to next available WR slot */
- u32 size_log2; /* total wq size */
- struct t3_swsq *sq; /* SW SQ */
- struct t3_swsq *oldest_read; /* tracks oldest pending read */
- u32 sq_wptr; /* sq_wptr - sq_rptr == count of */
- u32 sq_rptr; /* pending wrs */
- u32 sq_size_log2; /* sq size */
- struct t3_swrq *rq; /* SW RQ (holds consumer wr_ids */
- u32 rq_wptr; /* rq_wptr - rq_rptr == count of */
- u32 rq_rptr; /* pending wrs */
- struct t3_swrq *rq_oldest_wr; /* oldest wr on the SW RQ */
- u32 rq_size_log2; /* rq size */
- u32 rq_addr; /* rq adapter address */
- void __iomem *doorbell; /* kernel db */
- u64 udb; /* user db if any */
- struct cxio_rdev *rdev;
-};
-
-struct t3_cq {
- u32 cqid;
- u32 rptr;
- u32 wptr;
- u32 size_log2;
- dma_addr_t dma_addr;
- DEFINE_DMA_UNMAP_ADDR(mapping);
- struct t3_cqe *queue;
- struct t3_cqe *sw_queue;
- u32 sw_rptr;
- u32 sw_wptr;
-};
-
-#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \
- CQE_GENBIT(*cqe))
-
-struct t3_cq_status_page {
- u32 cq_err;
-};
-
-static inline int cxio_cq_in_error(struct t3_cq *cq)
-{
- return ((struct t3_cq_status_page *)
- &cq->queue[1 << cq->size_log2])->cq_err;
-}
-
-static inline void cxio_set_cq_in_error(struct t3_cq *cq)
-{
- ((struct t3_cq_status_page *)
- &cq->queue[1 << cq->size_log2])->cq_err = 1;
-}
-
-static inline void cxio_set_wq_in_error(struct t3_wq *wq)
-{
- wq->queue->wq_in_err.err |= 1;
-}
-
-static inline void cxio_disable_wq_db(struct t3_wq *wq)
-{
- wq->queue->wq_in_err.err |= 2;
-}
-
-static inline void cxio_enable_wq_db(struct t3_wq *wq)
-{
- wq->queue->wq_in_err.err &= ~2;
-}
-
-static inline int cxio_wq_db_enabled(struct t3_wq *wq)
-{
- return !(wq->queue->wq_in_err.err & 2);
-}
-
-static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
-{
- struct t3_cqe *cqe;
-
- cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
- if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
- return cqe;
- return NULL;
-}
-
-static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq)
-{
- struct t3_cqe *cqe;
-
- if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
- cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
- return cqe;
- }
- return NULL;
-}
-
-static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq)
-{
- struct t3_cqe *cqe;
-
- if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
- cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
- return cqe;
- }
- cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
- if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
- return cqe;
- return NULL;
-}
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
deleted file mode 100644
index b3e11329801d..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-
-#include <rdma/ib_verbs.h>
-
-#include "cxgb3_offload.h"
-#include "iwch_provider.h"
-#include <rdma/cxgb3-abi.h>
-#include "iwch.h"
-#include "iwch_cm.h"
-
-#define DRV_VERSION "1.1"
-
-MODULE_AUTHOR("Boyd Faulkner, Steve Wise");
-MODULE_DESCRIPTION("Chelsio T3 RDMA Driver");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION);
-
-static void open_rnic_dev(struct t3cdev *);
-static void close_rnic_dev(struct t3cdev *);
-static void iwch_event_handler(struct t3cdev *, u32, u32);
-
-struct cxgb3_client t3c_client = {
- .name = "iw_cxgb3",
- .add = open_rnic_dev,
- .remove = close_rnic_dev,
- .handlers = t3c_handlers,
- .redirect = iwch_ep_redirect,
- .event_handler = iwch_event_handler
-};
-
-static LIST_HEAD(dev_list);
-static DEFINE_MUTEX(dev_mutex);
-
-static int disable_qp_db(int id, void *p, void *data)
-{
- struct iwch_qp *qhp = p;
-
- cxio_disable_wq_db(&qhp->wq);
- return 0;
-}
-
-static int enable_qp_db(int id, void *p, void *data)
-{
- struct iwch_qp *qhp = p;
-
- if (data)
- ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, qhp->wq.qpid);
- cxio_enable_wq_db(&qhp->wq);
- return 0;
-}
-
-static void disable_dbs(struct iwch_dev *rnicp)
-{
- spin_lock_irq(&rnicp->lock);
- idr_for_each(&rnicp->qpidr, disable_qp_db, NULL);
- spin_unlock_irq(&rnicp->lock);
-}
-
-static void enable_dbs(struct iwch_dev *rnicp, int ring_db)
-{
- spin_lock_irq(&rnicp->lock);
- idr_for_each(&rnicp->qpidr, enable_qp_db,
- (void *)(unsigned long)ring_db);
- spin_unlock_irq(&rnicp->lock);
-}
-
-static void iwch_db_drop_task(struct work_struct *work)
-{
- struct iwch_dev *rnicp = container_of(work, struct iwch_dev,
- db_drop_task.work);
- enable_dbs(rnicp, 1);
-}
-
-static void rnic_init(struct iwch_dev *rnicp)
-{
- PDBG("%s iwch_dev %p\n", __func__, rnicp);
- idr_init(&rnicp->cqidr);
- idr_init(&rnicp->qpidr);
- idr_init(&rnicp->mmidr);
- spin_lock_init(&rnicp->lock);
- INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task);
-
- rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
- rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
- rnicp->attr.max_sge_per_wr = T3_MAX_SGE;
- rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE;
- rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1;
- rnicp->attr.max_cqes_per_cq = T3_MAX_CQ_DEPTH;
- rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev);
- rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
- rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
- rnicp->attr.mem_pgsizes_bitmask = T3_PAGESIZE_MASK;
- rnicp->attr.max_mr_size = T3_MAX_MR_SIZE;
- rnicp->attr.can_resize_wq = 0;
- rnicp->attr.max_rdma_reads_per_qp = 8;
- rnicp->attr.max_rdma_read_resources =
- rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps;
- rnicp->attr.max_rdma_read_qp_depth = 8; /* IRD */
- rnicp->attr.max_rdma_read_depth =
- rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps;
- rnicp->attr.rq_overflow_handled = 0;
- rnicp->attr.can_modify_ird = 0;
- rnicp->attr.can_modify_ord = 0;
- rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1;
- rnicp->attr.stag0_value = 1;
- rnicp->attr.zbva_support = 1;
- rnicp->attr.local_invalidate_fence = 1;
- rnicp->attr.cq_overflow_detection = 1;
- return;
-}
-
-static void open_rnic_dev(struct t3cdev *tdev)
-{
- struct iwch_dev *rnicp;
-
- PDBG("%s t3cdev %p\n", __func__, tdev);
- printk_once(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
- DRV_VERSION);
- rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
- if (!rnicp) {
- printk(KERN_ERR MOD "Cannot allocate ib device\n");
- return;
- }
- rnicp->rdev.ulp = rnicp;
- rnicp->rdev.t3cdev_p = tdev;
-
- mutex_lock(&dev_mutex);
-
- if (cxio_rdev_open(&rnicp->rdev)) {
- mutex_unlock(&dev_mutex);
- printk(KERN_ERR MOD "Unable to open CXIO rdev\n");
- ib_dealloc_device(&rnicp->ibdev);
- return;
- }
-
- rnic_init(rnicp);
-
- list_add_tail(&rnicp->entry, &dev_list);
- mutex_unlock(&dev_mutex);
-
- if (iwch_register_device(rnicp)) {
- printk(KERN_ERR MOD "Unable to register device\n");
- close_rnic_dev(tdev);
- }
- printk(KERN_INFO MOD "Initialized device %s\n",
- pci_name(rnicp->rdev.rnic_info.pdev));
- return;
-}
-
-static void close_rnic_dev(struct t3cdev *tdev)
-{
- struct iwch_dev *dev, *tmp;
- PDBG("%s t3cdev %p\n", __func__, tdev);
- mutex_lock(&dev_mutex);
- list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
- if (dev->rdev.t3cdev_p == tdev) {
- dev->rdev.flags = CXIO_ERROR_FATAL;
- synchronize_net();
- cancel_delayed_work_sync(&dev->db_drop_task);
- list_del(&dev->entry);
- iwch_unregister_device(dev);
- cxio_rdev_close(&dev->rdev);
- idr_destroy(&dev->cqidr);
- idr_destroy(&dev->qpidr);
- idr_destroy(&dev->mmidr);
- ib_dealloc_device(&dev->ibdev);
- break;
- }
- }
- mutex_unlock(&dev_mutex);
-}
-
-static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id)
-{
- struct cxio_rdev *rdev = tdev->ulp;
- struct iwch_dev *rnicp;
- struct ib_event event;
- u32 portnum = port_id + 1;
- int dispatch = 0;
-
- if (!rdev)
- return;
- rnicp = rdev_to_iwch_dev(rdev);
- switch (evt) {
- case OFFLOAD_STATUS_DOWN: {
- rdev->flags = CXIO_ERROR_FATAL;
- synchronize_net();
- event.event = IB_EVENT_DEVICE_FATAL;
- dispatch = 1;
- break;
- }
- case OFFLOAD_PORT_DOWN: {
- event.event = IB_EVENT_PORT_ERR;
- dispatch = 1;
- break;
- }
- case OFFLOAD_PORT_UP: {
- event.event = IB_EVENT_PORT_ACTIVE;
- dispatch = 1;
- break;
- }
- case OFFLOAD_DB_FULL: {
- disable_dbs(rnicp);
- break;
- }
- case OFFLOAD_DB_EMPTY: {
- enable_dbs(rnicp, 1);
- break;
- }
- case OFFLOAD_DB_DROP: {
- unsigned long delay = 1000;
- unsigned short r;
-
- disable_dbs(rnicp);
- get_random_bytes(&r, 2);
- delay += r & 1023;
-
- /*
- * delay is between 1000-2023 usecs.
- */
- schedule_delayed_work(&rnicp->db_drop_task,
- usecs_to_jiffies(delay));
- break;
- }
- }
-
- if (dispatch) {
- event.device = &rnicp->ibdev;
- event.element.port_num = portnum;
- ib_dispatch_event(&event);
- }
-
- return;
-}
-
-static int __init iwch_init_module(void)
-{
- int err;
-
- err = cxio_hal_init();
- if (err)
- return err;
- err = iwch_cm_init();
- if (err)
- return err;
- cxio_register_ev_cb(iwch_ev_dispatch);
- cxgb3_register_client(&t3c_client);
- return 0;
-}
-
-static void __exit iwch_exit_module(void)
-{
- cxgb3_unregister_client(&t3c_client);
- cxio_unregister_ev_cb(iwch_ev_dispatch);
- iwch_cm_term();
- cxio_hal_exit();
-}
-
-module_init(iwch_init_module);
-module_exit(iwch_exit_module);
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
deleted file mode 100644
index 837862287a29..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __IWCH_H__
-#define __IWCH_H__
-
-#include <linux/mutex.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/idr.h>
-#include <linux/workqueue.h>
-
-#include <rdma/ib_verbs.h>
-
-#include "cxio_hal.h"
-#include "cxgb3_offload.h"
-
-struct iwch_pd;
-struct iwch_cq;
-struct iwch_qp;
-struct iwch_mr;
-
-struct iwch_rnic_attributes {
- u32 max_qps;
- u32 max_wrs; /* Max for any SQ/RQ */
- u32 max_sge_per_wr;
- u32 max_sge_per_rdma_write_wr; /* for RDMA Write WR */
- u32 max_cqs;
- u32 max_cqes_per_cq;
- u32 max_mem_regs;
- u32 max_phys_buf_entries; /* for phys buf list */
- u32 max_pds;
-
- /*
- * The memory page sizes supported by this RNIC.
- * Bit position i in bitmap indicates page of
- * size (4k)^i. Phys block list mode unsupported.
- */
- u32 mem_pgsizes_bitmask;
- u64 max_mr_size;
- u8 can_resize_wq;
-
- /*
- * The maximum number of RDMA Reads that can be outstanding
- * per QP with this RNIC as the target.
- */
- u32 max_rdma_reads_per_qp;
-
- /*
- * The maximum number of resources used for RDMA Reads
- * by this RNIC with this RNIC as the target.
- */
- u32 max_rdma_read_resources;
-
- /*
- * The max depth per QP for initiation of RDMA Read
- * by this RNIC.
- */
- u32 max_rdma_read_qp_depth;
-
- /*
- * The maximum depth for initiation of RDMA Read
- * operations by this RNIC on all QPs
- */
- u32 max_rdma_read_depth;
- u8 rq_overflow_handled;
- u32 can_modify_ird;
- u32 can_modify_ord;
- u32 max_mem_windows;
- u32 stag0_value;
- u8 zbva_support;
- u8 local_invalidate_fence;
- u32 cq_overflow_detection;
-};
-
-struct iwch_dev {
- struct ib_device ibdev;
- struct cxio_rdev rdev;
- u32 device_cap_flags;
- struct iwch_rnic_attributes attr;
- struct idr cqidr;
- struct idr qpidr;
- struct idr mmidr;
- spinlock_t lock;
- struct list_head entry;
- struct delayed_work db_drop_task;
-};
-
-static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
-{
- return container_of(ibdev, struct iwch_dev, ibdev);
-}
-
-static inline struct iwch_dev *rdev_to_iwch_dev(struct cxio_rdev *rdev)
-{
- return container_of(rdev, struct iwch_dev, rdev);
-}
-
-static inline int t3b_device(const struct iwch_dev *rhp)
-{
- return rhp->rdev.t3cdev_p->type == T3B;
-}
-
-static inline int t3a_device(const struct iwch_dev *rhp)
-{
- return rhp->rdev.t3cdev_p->type == T3A;
-}
-
-static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid)
-{
- return idr_find(&rhp->cqidr, cqid);
-}
-
-static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid)
-{
- return idr_find(&rhp->qpidr, qpid);
-}
-
-static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid)
-{
- return idr_find(&rhp->mmidr, mmid);
-}
-
-static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr,
- void *handle, u32 id)
-{
- int ret;
-
- idr_preload(GFP_KERNEL);
- spin_lock_irq(&rhp->lock);
-
- ret = idr_alloc(idr, handle, id, id + 1, GFP_NOWAIT);
-
- spin_unlock_irq(&rhp->lock);
- idr_preload_end();
-
- BUG_ON(ret == -ENOSPC);
- return ret < 0 ? ret : 0;
-}
-
-static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id)
-{
- spin_lock_irq(&rhp->lock);
- idr_remove(idr, id);
- spin_unlock_irq(&rhp->lock);
-}
-
-extern struct cxgb3_client t3c_client;
-extern cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
-extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb);
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
deleted file mode 100644
index 65ee64400deb..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ /dev/null
@@ -1,2272 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
-#include <linux/skbuff.h>
-#include <linux/timer.h>
-#include <linux/notifier.h>
-#include <linux/inetdevice.h>
-
-#include <net/neighbour.h>
-#include <net/netevent.h>
-#include <net/route.h>
-
-#include "tcb.h"
-#include "cxgb3_offload.h"
-#include "iwch.h"
-#include "iwch_provider.h"
-#include "iwch_cm.h"
-
-static char *states[] = {
- "idle",
- "listen",
- "connecting",
- "mpa_wait_req",
- "mpa_req_sent",
- "mpa_req_rcvd",
- "mpa_rep_sent",
- "fpdu_mode",
- "aborting",
- "closing",
- "moribund",
- "dead",
- NULL,
-};
-
-int peer2peer = 0;
-module_param(peer2peer, int, 0644);
-MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
-
-static int ep_timeout_secs = 60;
-module_param(ep_timeout_secs, int, 0644);
-MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
- "in seconds (default=60)");
-
-static int mpa_rev = 1;
-module_param(mpa_rev, int, 0644);
-MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
- "1 is spec compliant. (default=1)");
-
-static int markers_enabled = 0;
-module_param(markers_enabled, int, 0644);
-MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
-
-static int crc_enabled = 1;
-module_param(crc_enabled, int, 0644);
-MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
-
-static int rcv_win = 256 * 1024;
-module_param(rcv_win, int, 0644);
-MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)");
-
-static int snd_win = 32 * 1024;
-module_param(snd_win, int, 0644);
-MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
-
-static unsigned int nocong = 0;
-module_param(nocong, uint, 0644);
-MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)");
-
-static unsigned int cong_flavor = 1;
-module_param(cong_flavor, uint, 0644);
-MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
-
-static struct workqueue_struct *workq;
-
-static struct sk_buff_head rxq;
-
-static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
-static void ep_timeout(unsigned long arg);
-static void connect_reply_upcall(struct iwch_ep *ep, int status);
-
-static void start_ep_timer(struct iwch_ep *ep)
-{
- PDBG("%s ep %p\n", __func__, ep);
- if (timer_pending(&ep->timer)) {
- PDBG("%s stopped / restarted timer ep %p\n", __func__, ep);
- del_timer_sync(&ep->timer);
- } else
- get_ep(&ep->com);
- ep->timer.expires = jiffies + ep_timeout_secs * HZ;
- ep->timer.data = (unsigned long)ep;
- ep->timer.function = ep_timeout;
- add_timer(&ep->timer);
-}
-
-static void stop_ep_timer(struct iwch_ep *ep)
-{
- PDBG("%s ep %p\n", __func__, ep);
- if (!timer_pending(&ep->timer)) {
- WARN(1, "%s timer stopped when its not running! ep %p state %u\n",
- __func__, ep, ep->com.state);
- return;
- }
- del_timer_sync(&ep->timer);
- put_ep(&ep->com);
-}
-
-static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e)
-{
- int error = 0;
- struct cxio_rdev *rdev;
-
- rdev = (struct cxio_rdev *)tdev->ulp;
- if (cxio_fatal_error(rdev)) {
- kfree_skb(skb);
- return -EIO;
- }
- error = l2t_send(tdev, skb, l2e);
- if (error < 0)
- kfree_skb(skb);
- return error < 0 ? error : 0;
-}
-
-int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
-{
- int error = 0;
- struct cxio_rdev *rdev;
-
- rdev = (struct cxio_rdev *)tdev->ulp;
- if (cxio_fatal_error(rdev)) {
- kfree_skb(skb);
- return -EIO;
- }
- error = cxgb3_ofld_send(tdev, skb);
- if (error < 0)
- kfree_skb(skb);
- return error < 0 ? error : 0;
-}
-
-static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
-{
- struct cpl_tid_release *req;
-
- skb = get_skb(skb, sizeof *req, GFP_KERNEL);
- if (!skb)
- return;
- req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
- skb->priority = CPL_PRIORITY_SETUP;
- iwch_cxgb3_ofld_send(tdev, skb);
- return;
-}
-
-int iwch_quiesce_tid(struct iwch_ep *ep)
-{
- struct cpl_set_tcb_field *req;
- struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
-
- if (!skb)
- return -ENOMEM;
- req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
- req->reply = 0;
- req->cpu_idx = 0;
- req->word = htons(W_TCB_RX_QUIESCE);
- req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
- req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
-
- skb->priority = CPL_PRIORITY_DATA;
- return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-int iwch_resume_tid(struct iwch_ep *ep)
-{
- struct cpl_set_tcb_field *req;
- struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
-
- if (!skb)
- return -ENOMEM;
- req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
- req->reply = 0;
- req->cpu_idx = 0;
- req->word = htons(W_TCB_RX_QUIESCE);
- req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
- req->val = 0;
-
- skb->priority = CPL_PRIORITY_DATA;
- return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-static void set_emss(struct iwch_ep *ep, u16 opt)
-{
- PDBG("%s ep %p opt %u\n", __func__, ep, opt);
- ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
- if (G_TCPOPT_TSTAMP(opt))
- ep->emss -= 12;
- if (ep->emss < 128)
- ep->emss = 128;
- PDBG("emss=%d\n", ep->emss);
-}
-
-static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
-{
- unsigned long flags;
- enum iwch_ep_state state;
-
- spin_lock_irqsave(&epc->lock, flags);
- state = epc->state;
- spin_unlock_irqrestore(&epc->lock, flags);
- return state;
-}
-
-static void __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
-{
- epc->state = new;
-}
-
-static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&epc->lock, flags);
- PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
- __state_set(epc, new);
- spin_unlock_irqrestore(&epc->lock, flags);
- return;
-}
-
-static void *alloc_ep(int size, gfp_t gfp)
-{
- struct iwch_ep_common *epc;
-
- epc = kzalloc(size, gfp);
- if (epc) {
- kref_init(&epc->kref);
- spin_lock_init(&epc->lock);
- init_waitqueue_head(&epc->waitq);
- }
- PDBG("%s alloc ep %p\n", __func__, epc);
- return epc;
-}
-
-void __free_ep(struct kref *kref)
-{
- struct iwch_ep *ep;
- ep = container_of(container_of(kref, struct iwch_ep_common, kref),
- struct iwch_ep, com);
- PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
- if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
- cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
- dst_release(ep->dst);
- l2t_release(ep->com.tdev, ep->l2t);
- }
- kfree(ep);
-}
-
-static void release_ep_resources(struct iwch_ep *ep)
-{
- PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
- set_bit(RELEASE_RESOURCES, &ep->com.flags);
- put_ep(&ep->com);
-}
-
-static int status2errno(int status)
-{
- switch (status) {
- case CPL_ERR_NONE:
- return 0;
- case CPL_ERR_CONN_RESET:
- return -ECONNRESET;
- case CPL_ERR_ARP_MISS:
- return -EHOSTUNREACH;
- case CPL_ERR_CONN_TIMEDOUT:
- return -ETIMEDOUT;
- case CPL_ERR_TCAM_FULL:
- return -ENOMEM;
- case CPL_ERR_CONN_EXIST:
- return -EADDRINUSE;
- default:
- return -EIO;
- }
-}
-
-/*
- * Try and reuse skbs already allocated...
- */
-static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
-{
- if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
- skb_trim(skb, 0);
- skb_get(skb);
- } else {
- skb = alloc_skb(len, gfp);
- }
- return skb;
-}
-
-static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip,
- __be32 peer_ip, __be16 local_port,
- __be16 peer_port, u8 tos)
-{
- struct rtable *rt;
- struct flowi4 fl4;
-
- rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
- peer_port, local_port, IPPROTO_TCP,
- tos, 0);
- if (IS_ERR(rt))
- return NULL;
- return rt;
-}
-
-static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
-{
- int i = 0;
-
- while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
- ++i;
- return i;
-}
-
-static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
-{
- PDBG("%s t3cdev %p\n", __func__, dev);
- kfree_skb(skb);
-}
-
-/*
- * Handle an ARP failure for an active open.
- */
-static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
-{
- printk(KERN_ERR MOD "ARP failure during connect\n");
- kfree_skb(skb);
-}
-
-/*
- * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
- * and send it along.
- */
-static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
-{
- struct cpl_abort_req *req = cplhdr(skb);
-
- PDBG("%s t3cdev %p\n", __func__, dev);
- req->cmd = CPL_ABORT_NO_RST;
- iwch_cxgb3_ofld_send(dev, skb);
-}
-
-static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
-{
- struct cpl_close_con_req *req;
- struct sk_buff *skb;
-
- PDBG("%s ep %p\n", __func__, ep);
- skb = get_skb(NULL, sizeof(*req), gfp);
- if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
- return -ENOMEM;
- }
- skb->priority = CPL_PRIORITY_DATA;
- set_arp_failure_handler(skb, arp_failure_discard);
- req = (struct cpl_close_con_req *) skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
- req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
- return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
-{
- struct cpl_abort_req *req;
-
- PDBG("%s ep %p\n", __func__, ep);
- skb = get_skb(skb, sizeof(*req), gfp);
- if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
- __func__);
- return -ENOMEM;
- }
- skb->priority = CPL_PRIORITY_DATA;
- set_arp_failure_handler(skb, abort_arp_failure);
- req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req));
- memset(req, 0, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
- req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
- req->cmd = CPL_ABORT_SEND_RST;
- return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int send_connect(struct iwch_ep *ep)
-{
- struct cpl_act_open_req *req;
- struct sk_buff *skb;
- u32 opt0h, opt0l, opt2;
- unsigned int mtu_idx;
- int wscale;
-
- PDBG("%s ep %p\n", __func__, ep);
-
- skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
- if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
- __func__);
- return -ENOMEM;
- }
- mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
- wscale = compute_wscale(rcv_win);
- opt0h = V_NAGLE(0) |
- V_NO_CONG(nocong) |
- V_KEEP_ALIVE(1) |
- F_TCAM_BYPASS |
- V_WND_SCALE(wscale) |
- V_MSS_IDX(mtu_idx) |
- V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
- opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
- opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
- V_CONG_CONTROL_FLAVOR(cong_flavor);
- skb->priority = CPL_PRIORITY_SETUP;
- set_arp_failure_handler(skb, act_open_req_arp_failure);
-
- req = (struct cpl_act_open_req *) skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid));
- req->local_port = ep->com.local_addr.sin_port;
- req->peer_port = ep->com.remote_addr.sin_port;
- req->local_ip = ep->com.local_addr.sin_addr.s_addr;
- req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
- req->opt0h = htonl(opt0h);
- req->opt0l = htonl(opt0l);
- req->params = 0;
- req->opt2 = htonl(opt2);
- return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
-{
- int mpalen;
- struct tx_data_wr *req;
- struct mpa_message *mpa;
- int len;
-
- PDBG("%s ep %p pd_len %d\n", __func__, ep, ep->plen);
-
- BUG_ON(skb_cloned(skb));
-
- mpalen = sizeof(*mpa) + ep->plen;
- if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) {
- kfree_skb(skb);
- skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
- if (!skb) {
- connect_reply_upcall(ep, -ENOMEM);
- return;
- }
- }
- skb_trim(skb, 0);
- skb_reserve(skb, sizeof(*req));
- skb_put(skb, mpalen);
- skb->priority = CPL_PRIORITY_DATA;
- mpa = (struct mpa_message *) skb->data;
- memset(mpa, 0, sizeof(*mpa));
- memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
- mpa->flags = (crc_enabled ? MPA_CRC : 0) |
- (markers_enabled ? MPA_MARKERS : 0);
- mpa->private_data_size = htons(ep->plen);
- mpa->revision = mpa_rev;
-
- if (ep->plen)
- memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
-
- /*
- * Reference the mpa skb. This ensures the data area
- * will remain in memory until the hw acks the tx.
- * Function tx_ack() will deref it.
- */
- skb_get(skb);
- set_arp_failure_handler(skb, arp_failure_discard);
- skb_reset_transport_header(skb);
- len = skb->len;
- req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
- req->wr_lo = htonl(V_WR_TID(ep->hwtid));
- req->len = htonl(len);
- req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
- V_TX_SNDBUF(snd_win>>15));
- req->flags = htonl(F_TX_INIT);
- req->sndseq = htonl(ep->snd_seq);
- BUG_ON(ep->mpa_skb);
- ep->mpa_skb = skb;
- iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
- start_ep_timer(ep);
- state_set(&ep->com, MPA_REQ_SENT);
- return;
-}
-
-static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
-{
- int mpalen;
- struct tx_data_wr *req;
- struct mpa_message *mpa;
- struct sk_buff *skb;
-
- PDBG("%s ep %p plen %d\n", __func__, ep, plen);
-
- mpalen = sizeof(*mpa) + plen;
-
- skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
- if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
- return -ENOMEM;
- }
- skb_reserve(skb, sizeof(*req));
- mpa = (struct mpa_message *) skb_put(skb, mpalen);
- memset(mpa, 0, sizeof(*mpa));
- memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
- mpa->flags = MPA_REJECT;
- mpa->revision = mpa_rev;
- mpa->private_data_size = htons(plen);
- if (plen)
- memcpy(mpa->private_data, pdata, plen);
-
- /*
- * Reference the mpa skb again. This ensures the data area
- * will remain in memory until the hw acks the tx.
- * Function tx_ack() will deref it.
- */
- skb_get(skb);
- skb->priority = CPL_PRIORITY_DATA;
- set_arp_failure_handler(skb, arp_failure_discard);
- skb_reset_transport_header(skb);
- req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
- req->wr_lo = htonl(V_WR_TID(ep->hwtid));
- req->len = htonl(mpalen);
- req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
- V_TX_SNDBUF(snd_win>>15));
- req->flags = htonl(F_TX_INIT);
- req->sndseq = htonl(ep->snd_seq);
- BUG_ON(ep->mpa_skb);
- ep->mpa_skb = skb;
- return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
-{
- int mpalen;
- struct tx_data_wr *req;
- struct mpa_message *mpa;
- int len;
- struct sk_buff *skb;
-
- PDBG("%s ep %p plen %d\n", __func__, ep, plen);
-
- mpalen = sizeof(*mpa) + plen;
-
- skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
- if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
- return -ENOMEM;
- }
- skb->priority = CPL_PRIORITY_DATA;
- skb_reserve(skb, sizeof(*req));
- mpa = (struct mpa_message *) skb_put(skb, mpalen);
- memset(mpa, 0, sizeof(*mpa));
- memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
- mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
- (markers_enabled ? MPA_MARKERS : 0);
- mpa->revision = mpa_rev;
- mpa->private_data_size = htons(plen);
- if (plen)
- memcpy(mpa->private_data, pdata, plen);
-
- /*
- * Reference the mpa skb. This ensures the data area
- * will remain in memory until the hw acks the tx.
- * Function tx_ack() will deref it.
- */
- skb_get(skb);
- set_arp_failure_handler(skb, arp_failure_discard);
- skb_reset_transport_header(skb);
- len = skb->len;
- req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
- req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
- req->wr_lo = htonl(V_WR_TID(ep->hwtid));
- req->len = htonl(len);
- req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
- V_TX_SNDBUF(snd_win>>15));
- req->flags = htonl(F_TX_INIT);
- req->sndseq = htonl(ep->snd_seq);
- ep->mpa_skb = skb;
- state_set(&ep->com, MPA_REP_SENT);
- return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-}
-
-static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct cpl_act_establish *req = cplhdr(skb);
- unsigned int tid = GET_TID(req);
-
- PDBG("%s ep %p tid %d\n", __func__, ep, tid);
-
- dst_confirm(ep->dst);
-
- /* setup the hwtid for this connection */
- ep->hwtid = tid;
- cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
-
- ep->snd_seq = ntohl(req->snd_isn);
- ep->rcv_seq = ntohl(req->rcv_isn);
-
- set_emss(ep, ntohs(req->tcp_opt));
-
- /* dealloc the atid */
- cxgb3_free_atid(ep->com.tdev, ep->atid);
-
- /* start MPA negotiation */
- send_mpa_req(ep, skb);
-
- return 0;
-}
-
-static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
-{
- PDBG("%s ep %p\n", __FILE__, ep);
- state_set(&ep->com, ABORTING);
- send_abort(ep, skb, gfp);
-}
-
-static void close_complete_upcall(struct iwch_ep *ep)
-{
- struct iw_cm_event event;
-
- PDBG("%s ep %p\n", __func__, ep);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_CLOSE;
- if (ep->com.cm_id) {
- PDBG("close complete delivered ep %p cm_id %p tid %d\n",
- ep, ep->com.cm_id, ep->hwtid);
- ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- ep->com.cm_id->rem_ref(ep->com.cm_id);
- ep->com.cm_id = NULL;
- ep->com.qp = NULL;
- }
-}
-
-static void peer_close_upcall(struct iwch_ep *ep)
-{
- struct iw_cm_event event;
-
- PDBG("%s ep %p\n", __func__, ep);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_DISCONNECT;
- if (ep->com.cm_id) {
- PDBG("peer close delivered ep %p cm_id %p tid %d\n",
- ep, ep->com.cm_id, ep->hwtid);
- ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- }
-}
-
-static void peer_abort_upcall(struct iwch_ep *ep)
-{
- struct iw_cm_event event;
-
- PDBG("%s ep %p\n", __func__, ep);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_CLOSE;
- event.status = -ECONNRESET;
- if (ep->com.cm_id) {
- PDBG("abort delivered ep %p cm_id %p tid %d\n", ep,
- ep->com.cm_id, ep->hwtid);
- ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- ep->com.cm_id->rem_ref(ep->com.cm_id);
- ep->com.cm_id = NULL;
- ep->com.qp = NULL;
- }
-}
-
-static void connect_reply_upcall(struct iwch_ep *ep, int status)
-{
- struct iw_cm_event event;
-
- PDBG("%s ep %p status %d\n", __func__, ep, status);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_CONNECT_REPLY;
- event.status = status;
- memcpy(&event.local_addr, &ep->com.local_addr,
- sizeof(ep->com.local_addr));
- memcpy(&event.remote_addr, &ep->com.remote_addr,
- sizeof(ep->com.remote_addr));
-
- if ((status == 0) || (status == -ECONNREFUSED)) {
- event.private_data_len = ep->plen;
- event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
- }
- if (ep->com.cm_id) {
- PDBG("%s ep %p tid %d status %d\n", __func__, ep,
- ep->hwtid, status);
- ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- }
- if (status < 0) {
- ep->com.cm_id->rem_ref(ep->com.cm_id);
- ep->com.cm_id = NULL;
- ep->com.qp = NULL;
- }
-}
-
-static void connect_request_upcall(struct iwch_ep *ep)
-{
- struct iw_cm_event event;
-
- PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_CONNECT_REQUEST;
- memcpy(&event.local_addr, &ep->com.local_addr,
- sizeof(ep->com.local_addr));
- memcpy(&event.remote_addr, &ep->com.remote_addr,
- sizeof(ep->com.local_addr));
- event.private_data_len = ep->plen;
- event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
- event.provider_data = ep;
- /*
- * Until ird/ord negotiation via MPAv2 support is added, send max
- * supported values
- */
- event.ird = event.ord = 8;
- if (state_read(&ep->parent_ep->com) != DEAD) {
- get_ep(&ep->com);
- ep->parent_ep->com.cm_id->event_handler(
- ep->parent_ep->com.cm_id,
- &event);
- }
- put_ep(&ep->parent_ep->com);
- ep->parent_ep = NULL;
-}
-
-static void established_upcall(struct iwch_ep *ep)
-{
- struct iw_cm_event event;
-
- PDBG("%s ep %p\n", __func__, ep);
- memset(&event, 0, sizeof(event));
- event.event = IW_CM_EVENT_ESTABLISHED;
- /*
- * Until ird/ord negotiation via MPAv2 support is added, send max
- * supported values
- */
- event.ird = event.ord = 8;
- if (ep->com.cm_id) {
- PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
- ep->com.cm_id->event_handler(ep->com.cm_id, &event);
- }
-}
-
-static int update_rx_credits(struct iwch_ep *ep, u32 credits)
-{
- struct cpl_rx_data_ack *req;
- struct sk_buff *skb;
-
- PDBG("%s ep %p credits %u\n", __func__, ep, credits);
- skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
- if (!skb) {
- printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
- return 0;
- }
-
- req = (struct cpl_rx_data_ack *) skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
- req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
- skb->priority = CPL_PRIORITY_ACK;
- iwch_cxgb3_ofld_send(ep->com.tdev, skb);
- return credits;
-}
-
-static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
-{
- struct mpa_message *mpa;
- u16 plen;
- struct iwch_qp_attributes attrs;
- enum iwch_qp_attr_mask mask;
- int err;
-
- PDBG("%s ep %p\n", __func__, ep);
-
- /*
- * Stop mpa timer. If it expired, then the state has
- * changed and we bail since ep_timeout already aborted
- * the connection.
- */
- stop_ep_timer(ep);
- if (state_read(&ep->com) != MPA_REQ_SENT)
- return;
-
- /*
- * If we get more than the supported amount of private data
- * then we must fail this connection.
- */
- if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
- err = -EINVAL;
- goto err;
- }
-
- /*
- * copy the new data into our accumulation buffer.
- */
- skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
- skb->len);
- ep->mpa_pkt_len += skb->len;
-
- /*
- * if we don't even have the mpa message, then bail.
- */
- if (ep->mpa_pkt_len < sizeof(*mpa))
- return;
- mpa = (struct mpa_message *) ep->mpa_pkt;
-
- /* Validate MPA header. */
- if (mpa->revision != mpa_rev) {
- err = -EPROTO;
- goto err;
- }
- if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
- err = -EPROTO;
- goto err;
- }
-
- plen = ntohs(mpa->private_data_size);
-
- /*
- * Fail if there's too much private data.
- */
- if (plen > MPA_MAX_PRIVATE_DATA) {
- err = -EPROTO;
- goto err;
- }
-
- /*
- * If plen does not account for pkt size
- */
- if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
- err = -EPROTO;
- goto err;
- }
-
- ep->plen = (u8) plen;
-
- /*
- * If we don't have all the pdata yet, then bail.
- * We'll continue process when more data arrives.
- */
- if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
- return;
-
- if (mpa->flags & MPA_REJECT) {
- err = -ECONNREFUSED;
- goto err;
- }
-
- /*
- * If we get here we have accumulated the entire mpa
- * start reply message including private data. And
- * the MPA header is valid.
- */
- state_set(&ep->com, FPDU_MODE);
- ep->mpa_attr.initiator = 1;
- ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
- ep->mpa_attr.recv_marker_enabled = markers_enabled;
- ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
- ep->mpa_attr.version = mpa_rev;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d\n", __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
-
- attrs.mpa_attr = ep->mpa_attr;
- attrs.max_ird = ep->ird;
- attrs.max_ord = ep->ord;
- attrs.llp_stream_handle = ep;
- attrs.next_state = IWCH_QP_STATE_RTS;
-
- mask = IWCH_QP_ATTR_NEXT_STATE |
- IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
- IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
-
- /* bind QP and TID with INIT_WR */
- err = iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, mask, &attrs, 1);
- if (err)
- goto err;
-
- if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) {
- iwch_post_zb_read(ep);
- }
-
- goto out;
-err:
- abort_connection(ep, skb, GFP_KERNEL);
-out:
- connect_reply_upcall(ep, err);
- return;
-}
-
-static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
-{
- struct mpa_message *mpa;
- u16 plen;
-
- PDBG("%s ep %p\n", __func__, ep);
-
- /*
- * Stop mpa timer. If it expired, then the state has
- * changed and we bail since ep_timeout already aborted
- * the connection.
- */
- stop_ep_timer(ep);
- if (state_read(&ep->com) != MPA_REQ_WAIT)
- return;
-
- /*
- * If we get more than the supported amount of private data
- * then we must fail this connection.
- */
- if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
-
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
-
- /*
- * Copy the new data into our accumulation buffer.
- */
- skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
- skb->len);
- ep->mpa_pkt_len += skb->len;
-
- /*
- * If we don't even have the mpa message, then bail.
- * We'll continue process when more data arrives.
- */
- if (ep->mpa_pkt_len < sizeof(*mpa))
- return;
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
- mpa = (struct mpa_message *) ep->mpa_pkt;
-
- /*
- * Validate MPA Header.
- */
- if (mpa->revision != mpa_rev) {
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
-
- if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
-
- plen = ntohs(mpa->private_data_size);
-
- /*
- * Fail if there's too much private data.
- */
- if (plen > MPA_MAX_PRIVATE_DATA) {
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
-
- /*
- * If plen does not account for pkt size
- */
- if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
- abort_connection(ep, skb, GFP_KERNEL);
- return;
- }
- ep->plen = (u8) plen;
-
- /*
- * If we don't have all the pdata yet, then bail.
- */
- if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
- return;
-
- /*
- * If we get here we have accumulated the entire mpa
- * start reply message including private data.
- */
- ep->mpa_attr.initiator = 0;
- ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
- ep->mpa_attr.recv_marker_enabled = markers_enabled;
- ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
- ep->mpa_attr.version = mpa_rev;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d\n", __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
-
- state_set(&ep->com, MPA_REQ_RCVD);
-
- /* drive upcall */
- connect_request_upcall(ep);
- return;
-}
-
-static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct cpl_rx_data *hdr = cplhdr(skb);
- unsigned int dlen = ntohs(hdr->len);
-
- PDBG("%s ep %p dlen %u\n", __func__, ep, dlen);
-
- skb_pull(skb, sizeof(*hdr));
- skb_trim(skb, dlen);
-
- ep->rcv_seq += dlen;
- BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
-
- switch (state_read(&ep->com)) {
- case MPA_REQ_SENT:
- process_mpa_reply(ep, skb);
- break;
- case MPA_REQ_WAIT:
- process_mpa_request(ep, skb);
- break;
- case MPA_REP_SENT:
- break;
- default:
- printk(KERN_ERR MOD "%s Unexpected streaming data."
- " ep %p state %d tid %d\n",
- __func__, ep, state_read(&ep->com), ep->hwtid);
-
- /*
- * The ep will timeout and inform the ULP of the failure.
- * See ep_timeout().
- */
- break;
- }
-
- /* update RX credits */
- update_rx_credits(ep, dlen);
-
- return CPL_RET_BUF_DONE;
-}
-
-/*
- * Upcall from the adapter indicating data has been transmitted.
- * For us its just the single MPA request or reply. We can now free
- * the skb holding the mpa message.
- */
-static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct cpl_wr_ack *hdr = cplhdr(skb);
- unsigned int credits = ntohs(hdr->credits);
- unsigned long flags;
- int post_zb = 0;
-
- PDBG("%s ep %p credits %u\n", __func__, ep, credits);
-
- if (credits == 0) {
- PDBG("%s 0 credit ack ep %p state %u\n",
- __func__, ep, state_read(&ep->com));
- return CPL_RET_BUF_DONE;
- }
-
- spin_lock_irqsave(&ep->com.lock, flags);
- BUG_ON(credits != 1);
- dst_confirm(ep->dst);
- if (!ep->mpa_skb) {
- PDBG("%s rdma_init wr_ack ep %p state %u\n",
- __func__, ep, ep->com.state);
- if (ep->mpa_attr.initiator) {
- PDBG("%s initiator ep %p state %u\n",
- __func__, ep, ep->com.state);
- if (peer2peer && ep->com.state == FPDU_MODE)
- post_zb = 1;
- } else {
- PDBG("%s responder ep %p state %u\n",
- __func__, ep, ep->com.state);
- if (ep->com.state == MPA_REQ_RCVD) {
- ep->com.rpl_done = 1;
- wake_up(&ep->com.waitq);
- }
- }
- } else {
- PDBG("%s lsm ack ep %p state %u freeing skb\n",
- __func__, ep, ep->com.state);
- kfree_skb(ep->mpa_skb);
- ep->mpa_skb = NULL;
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
- if (post_zb)
- iwch_post_zb_read(ep);
- return CPL_RET_BUF_DONE;
-}
-
-static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- unsigned long flags;
- int release = 0;
-
- PDBG("%s ep %p\n", __func__, ep);
- BUG_ON(!ep);
-
- /*
- * We get 2 abort replies from the HW. The first one must
- * be ignored except for scribbling that we need one more.
- */
- if (!test_and_set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags)) {
- return CPL_RET_BUF_DONE;
- }
-
- spin_lock_irqsave(&ep->com.lock, flags);
- switch (ep->com.state) {
- case ABORTING:
- close_complete_upcall(ep);
- __state_set(&ep->com, DEAD);
- release = 1;
- break;
- default:
- printk(KERN_ERR "%s ep %p state %d\n",
- __func__, ep, ep->com.state);
- break;
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
-
- if (release)
- release_ep_resources(ep);
- return CPL_RET_BUF_DONE;
-}
-
-/*
- * Return whether a failed active open has allocated a TID
- */
-static inline int act_open_has_tid(int status)
-{
- return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
- status != CPL_ERR_ARP_MISS;
-}
-
-static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct cpl_act_open_rpl *rpl = cplhdr(skb);
-
- PDBG("%s ep %p status %u errno %d\n", __func__, ep, rpl->status,
- status2errno(rpl->status));
- connect_reply_upcall(ep, status2errno(rpl->status));
- state_set(&ep->com, DEAD);
- if (ep->com.tdev->type != T3A && act_open_has_tid(rpl->status))
- release_tid(ep->com.tdev, GET_TID(rpl), NULL);
- cxgb3_free_atid(ep->com.tdev, ep->atid);
- dst_release(ep->dst);
- l2t_release(ep->com.tdev, ep->l2t);
- put_ep(&ep->com);
- return CPL_RET_BUF_DONE;
-}
-
-static int listen_start(struct iwch_listen_ep *ep)
-{
- struct sk_buff *skb;
- struct cpl_pass_open_req *req;
-
- PDBG("%s ep %p\n", __func__, ep);
- skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
- if (!skb) {
- printk(KERN_ERR MOD "t3c_listen_start failed to alloc skb!\n");
- return -ENOMEM;
- }
-
- req = (struct cpl_pass_open_req *) skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid));
- req->local_port = ep->com.local_addr.sin_port;
- req->local_ip = ep->com.local_addr.sin_addr.s_addr;
- req->peer_port = 0;
- req->peer_ip = 0;
- req->peer_netmask = 0;
- req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
- req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10));
- req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
-
- skb->priority = 1;
- return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_listen_ep *ep = ctx;
- struct cpl_pass_open_rpl *rpl = cplhdr(skb);
-
- PDBG("%s ep %p status %d error %d\n", __func__, ep,
- rpl->status, status2errno(rpl->status));
- ep->com.rpl_err = status2errno(rpl->status);
- ep->com.rpl_done = 1;
- wake_up(&ep->com.waitq);
-
- return CPL_RET_BUF_DONE;
-}
-
-static int listen_stop(struct iwch_listen_ep *ep)
-{
- struct sk_buff *skb;
- struct cpl_close_listserv_req *req;
-
- PDBG("%s ep %p\n", __func__, ep);
- skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
- if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
- return -ENOMEM;
- }
- req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req));
- req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- req->cpu_idx = 0;
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
- skb->priority = 1;
- return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
-}
-
-static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
- void *ctx)
-{
- struct iwch_listen_ep *ep = ctx;
- struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
-
- PDBG("%s ep %p\n", __func__, ep);
- ep->com.rpl_err = status2errno(rpl->status);
- ep->com.rpl_done = 1;
- wake_up(&ep->com.waitq);
- return CPL_RET_BUF_DONE;
-}
-
-static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
-{
- struct cpl_pass_accept_rpl *rpl;
- unsigned int mtu_idx;
- u32 opt0h, opt0l, opt2;
- int wscale;
-
- PDBG("%s ep %p\n", __func__, ep);
- BUG_ON(skb_cloned(skb));
- skb_trim(skb, sizeof(*rpl));
- skb_get(skb);
- mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
- wscale = compute_wscale(rcv_win);
- opt0h = V_NAGLE(0) |
- V_NO_CONG(nocong) |
- V_KEEP_ALIVE(1) |
- F_TCAM_BYPASS |
- V_WND_SCALE(wscale) |
- V_MSS_IDX(mtu_idx) |
- V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
- opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
- opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) |
- V_CONG_CONTROL_FLAVOR(cong_flavor);
-
- rpl = cplhdr(skb);
- rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid));
- rpl->peer_ip = peer_ip;
- rpl->opt0h = htonl(opt0h);
- rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT);
- rpl->opt2 = htonl(opt2);
- rpl->rsvd = rpl->opt2; /* workaround for HW bug */
- skb->priority = CPL_PRIORITY_SETUP;
- iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
-
- return;
-}
-
-static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
- struct sk_buff *skb)
-{
- PDBG("%s t3cdev %p tid %u peer_ip %x\n", __func__, tdev, hwtid,
- peer_ip);
- BUG_ON(skb_cloned(skb));
- skb_trim(skb, sizeof(struct cpl_tid_release));
- skb_get(skb);
-
- if (tdev->type != T3A)
- release_tid(tdev, hwtid, skb);
- else {
- struct cpl_pass_accept_rpl *rpl;
-
- rpl = cplhdr(skb);
- skb->priority = CPL_PRIORITY_SETUP;
- rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
- OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
- hwtid));
- rpl->peer_ip = peer_ip;
- rpl->opt0h = htonl(F_TCAM_BYPASS);
- rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
- rpl->opt2 = 0;
- rpl->rsvd = rpl->opt2;
- iwch_cxgb3_ofld_send(tdev, skb);
- }
-}
-
-static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *child_ep, *parent_ep = ctx;
- struct cpl_pass_accept_req *req = cplhdr(skb);
- unsigned int hwtid = GET_TID(req);
- struct dst_entry *dst;
- struct l2t_entry *l2t;
- struct rtable *rt;
- struct iff_mac tim;
-
- PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
-
- if (state_read(&parent_ep->com) != LISTEN) {
- printk(KERN_ERR "%s - listening ep not in LISTEN\n",
- __func__);
- goto reject;
- }
-
- /*
- * Find the netdev for this connection request.
- */
- tim.mac_addr = req->dst_mac;
- tim.vlan_tag = ntohs(req->vlan_tag);
- if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
- printk(KERN_ERR "%s bad dst mac %pM\n",
- __func__, req->dst_mac);
- goto reject;
- }
-
- /* Find output route */
- rt = find_route(tdev,
- req->local_ip,
- req->peer_ip,
- req->local_port,
- req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
- if (!rt) {
- printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
- __func__);
- goto reject;
- }
- dst = &rt->dst;
- l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip);
- if (!l2t) {
- printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
- __func__);
- dst_release(dst);
- goto reject;
- }
- child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
- if (!child_ep) {
- printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
- __func__);
- l2t_release(tdev, l2t);
- dst_release(dst);
- goto reject;
- }
- state_set(&child_ep->com, CONNECTING);
- child_ep->com.tdev = tdev;
- child_ep->com.cm_id = NULL;
- child_ep->com.local_addr.sin_family = AF_INET;
- child_ep->com.local_addr.sin_port = req->local_port;
- child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
- child_ep->com.remote_addr.sin_family = AF_INET;
- child_ep->com.remote_addr.sin_port = req->peer_port;
- child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
- get_ep(&parent_ep->com);
- child_ep->parent_ep = parent_ep;
- child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid));
- child_ep->l2t = l2t;
- child_ep->dst = dst;
- child_ep->hwtid = hwtid;
- init_timer(&child_ep->timer);
- cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid);
- accept_cr(child_ep, req->peer_ip, skb);
- goto out;
-reject:
- reject_cr(tdev, hwtid, req->peer_ip, skb);
-out:
- return CPL_RET_BUF_DONE;
-}
-
-static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct cpl_pass_establish *req = cplhdr(skb);
-
- PDBG("%s ep %p\n", __func__, ep);
- ep->snd_seq = ntohl(req->snd_isn);
- ep->rcv_seq = ntohl(req->rcv_isn);
-
- set_emss(ep, ntohs(req->tcp_opt));
-
- dst_confirm(ep->dst);
- state_set(&ep->com, MPA_REQ_WAIT);
- start_ep_timer(ep);
-
- return CPL_RET_BUF_DONE;
-}
-
-static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct iwch_qp_attributes attrs;
- unsigned long flags;
- int disconnect = 1;
- int release = 0;
-
- PDBG("%s ep %p\n", __func__, ep);
- dst_confirm(ep->dst);
-
- spin_lock_irqsave(&ep->com.lock, flags);
- switch (ep->com.state) {
- case MPA_REQ_WAIT:
- __state_set(&ep->com, CLOSING);
- break;
- case MPA_REQ_SENT:
- __state_set(&ep->com, CLOSING);
- connect_reply_upcall(ep, -ECONNRESET);
- break;
- case MPA_REQ_RCVD:
-
- /*
- * We're gonna mark this puppy DEAD, but keep
- * the reference on it until the ULP accepts or
- * rejects the CR. Also wake up anyone waiting
- * in rdma connection migration (see iwch_accept_cr()).
- */
- __state_set(&ep->com, CLOSING);
- ep->com.rpl_done = 1;
- ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
- wake_up(&ep->com.waitq);
- break;
- case MPA_REP_SENT:
- __state_set(&ep->com, CLOSING);
- ep->com.rpl_done = 1;
- ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
- wake_up(&ep->com.waitq);
- break;
- case FPDU_MODE:
- start_ep_timer(ep);
- __state_set(&ep->com, CLOSING);
- attrs.next_state = IWCH_QP_STATE_CLOSING;
- iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
- IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
- peer_close_upcall(ep);
- break;
- case ABORTING:
- disconnect = 0;
- break;
- case CLOSING:
- __state_set(&ep->com, MORIBUND);
- disconnect = 0;
- break;
- case MORIBUND:
- stop_ep_timer(ep);
- if (ep->com.cm_id && ep->com.qp) {
- attrs.next_state = IWCH_QP_STATE_IDLE;
- iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
- IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
- }
- close_complete_upcall(ep);
- __state_set(&ep->com, DEAD);
- release = 1;
- disconnect = 0;
- break;
- case DEAD:
- disconnect = 0;
- break;
- default:
- BUG_ON(1);
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
- if (disconnect)
- iwch_ep_disconnect(ep, 0, GFP_KERNEL);
- if (release)
- release_ep_resources(ep);
- return CPL_RET_BUF_DONE;
-}
-
-/*
- * Returns whether an ABORT_REQ_RSS message is a negative advice.
- */
-static int is_neg_adv_abort(unsigned int status)
-{
- return status == CPL_ERR_RTX_NEG_ADVICE ||
- status == CPL_ERR_PERSIST_NEG_ADVICE;
-}
-
-static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct cpl_abort_req_rss *req = cplhdr(skb);
- struct iwch_ep *ep = ctx;
- struct cpl_abort_rpl *rpl;
- struct sk_buff *rpl_skb;
- struct iwch_qp_attributes attrs;
- int ret;
- int release = 0;
- unsigned long flags;
-
- if (is_neg_adv_abort(req->status)) {
- PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
- ep->hwtid);
- t3_l2t_send_event(ep->com.tdev, ep->l2t);
- return CPL_RET_BUF_DONE;
- }
-
- /*
- * We get 2 peer aborts from the HW. The first one must
- * be ignored except for scribbling that we need one more.
- */
- if (!test_and_set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags)) {
- return CPL_RET_BUF_DONE;
- }
-
- spin_lock_irqsave(&ep->com.lock, flags);
- PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state);
- switch (ep->com.state) {
- case CONNECTING:
- break;
- case MPA_REQ_WAIT:
- stop_ep_timer(ep);
- break;
- case MPA_REQ_SENT:
- stop_ep_timer(ep);
- connect_reply_upcall(ep, -ECONNRESET);
- break;
- case MPA_REP_SENT:
- ep->com.rpl_done = 1;
- ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
- wake_up(&ep->com.waitq);
- break;
- case MPA_REQ_RCVD:
-
- /*
- * We're gonna mark this puppy DEAD, but keep
- * the reference on it until the ULP accepts or
- * rejects the CR. Also wake up anyone waiting
- * in rdma connection migration (see iwch_accept_cr()).
- */
- ep->com.rpl_done = 1;
- ep->com.rpl_err = -ECONNRESET;
- PDBG("waking up ep %p\n", ep);
- wake_up(&ep->com.waitq);
- break;
- case MORIBUND:
- case CLOSING:
- stop_ep_timer(ep);
- /*FALLTHROUGH*/
- case FPDU_MODE:
- if (ep->com.cm_id && ep->com.qp) {
- attrs.next_state = IWCH_QP_STATE_ERROR;
- ret = iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- if (ret)
- printk(KERN_ERR MOD
- "%s - qp <- error failed!\n",
- __func__);
- }
- peer_abort_upcall(ep);
- break;
- case ABORTING:
- break;
- case DEAD:
- PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
- spin_unlock_irqrestore(&ep->com.lock, flags);
- return CPL_RET_BUF_DONE;
- default:
- BUG_ON(1);
- break;
- }
- dst_confirm(ep->dst);
- if (ep->com.state != ABORTING) {
- __state_set(&ep->com, DEAD);
- release = 1;
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
-
- rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
- if (!rpl_skb) {
- printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
- __func__);
- release = 1;
- goto out;
- }
- rpl_skb->priority = CPL_PRIORITY_DATA;
- rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
- rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
- rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
- OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
- rpl->cmd = CPL_ABORT_NO_RST;
- iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb);
-out:
- if (release)
- release_ep_resources(ep);
- return CPL_RET_BUF_DONE;
-}
-
-static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
- struct iwch_qp_attributes attrs;
- unsigned long flags;
- int release = 0;
-
- PDBG("%s ep %p\n", __func__, ep);
- BUG_ON(!ep);
-
- /* The cm_id may be null if we failed to connect */
- spin_lock_irqsave(&ep->com.lock, flags);
- switch (ep->com.state) {
- case CLOSING:
- __state_set(&ep->com, MORIBUND);
- break;
- case MORIBUND:
- stop_ep_timer(ep);
- if ((ep->com.cm_id) && (ep->com.qp)) {
- attrs.next_state = IWCH_QP_STATE_IDLE;
- iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp,
- IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- }
- close_complete_upcall(ep);
- __state_set(&ep->com, DEAD);
- release = 1;
- break;
- case ABORTING:
- case DEAD:
- break;
- default:
- BUG_ON(1);
- break;
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
- if (release)
- release_ep_resources(ep);
- return CPL_RET_BUF_DONE;
-}
-
-/*
- * T3A does 3 things when a TERM is received:
- * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
- * 2) generate an async event on the QP with the TERMINATE opcode
- * 3) post a TERMINATE opcode cqe into the associated CQ.
- *
- * For (1), we save the message in the qp for later consumer consumption.
- * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
- * For (3), we toss the CQE in cxio_poll_cq().
- *
- * terminate() handles case (1)...
- */
-static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep *ep = ctx;
-
- if (state_read(&ep->com) != FPDU_MODE)
- return CPL_RET_BUF_DONE;
-
- PDBG("%s ep %p\n", __func__, ep);
- skb_pull(skb, sizeof(struct cpl_rdma_terminate));
- PDBG("%s saving %d bytes of term msg\n", __func__, skb->len);
- skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
- skb->len);
- ep->com.qp->attr.terminate_msg_len = skb->len;
- ep->com.qp->attr.is_terminate_local = 0;
- return CPL_RET_BUF_DONE;
-}
-
-static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct cpl_rdma_ec_status *rep = cplhdr(skb);
- struct iwch_ep *ep = ctx;
-
- PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid,
- rep->status);
- if (rep->status) {
- struct iwch_qp_attributes attrs;
-
- printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n",
- __func__, ep->hwtid);
- stop_ep_timer(ep);
- attrs.next_state = IWCH_QP_STATE_ERROR;
- iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- abort_connection(ep, NULL, GFP_KERNEL);
- }
- return CPL_RET_BUF_DONE;
-}
-
-static void ep_timeout(unsigned long arg)
-{
- struct iwch_ep *ep = (struct iwch_ep *)arg;
- struct iwch_qp_attributes attrs;
- unsigned long flags;
- int abort = 1;
-
- spin_lock_irqsave(&ep->com.lock, flags);
- PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
- ep->com.state);
- switch (ep->com.state) {
- case MPA_REQ_SENT:
- __state_set(&ep->com, ABORTING);
- connect_reply_upcall(ep, -ETIMEDOUT);
- break;
- case MPA_REQ_WAIT:
- __state_set(&ep->com, ABORTING);
- break;
- case CLOSING:
- case MORIBUND:
- if (ep->com.cm_id && ep->com.qp) {
- attrs.next_state = IWCH_QP_STATE_ERROR;
- iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- }
- __state_set(&ep->com, ABORTING);
- break;
- default:
- WARN(1, "%s unexpected state ep %p state %u\n",
- __func__, ep, ep->com.state);
- abort = 0;
- }
- spin_unlock_irqrestore(&ep->com.lock, flags);
- if (abort)
- abort_connection(ep, NULL, GFP_ATOMIC);
- put_ep(&ep->com);
-}
-
-int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
- int err;
- struct iwch_ep *ep = to_ep(cm_id);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
-
- if (state_read(&ep->com) == DEAD) {
- put_ep(&ep->com);
- return -ECONNRESET;
- }
- BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
- if (mpa_rev == 0)
- abort_connection(ep, NULL, GFP_KERNEL);
- else {
- err = send_mpa_reject(ep, pdata, pdata_len);
- err = iwch_ep_disconnect(ep, 0, GFP_KERNEL);
- }
- put_ep(&ep->com);
- return 0;
-}
-
-int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
- int err;
- struct iwch_qp_attributes attrs;
- enum iwch_qp_attr_mask mask;
- struct iwch_ep *ep = to_ep(cm_id);
- struct iwch_dev *h = to_iwch_dev(cm_id->device);
- struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
-
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- if (state_read(&ep->com) == DEAD) {
- err = -ECONNRESET;
- goto err;
- }
-
- BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
- BUG_ON(!qp);
-
- if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
- (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
- abort_connection(ep, NULL, GFP_KERNEL);
- err = -EINVAL;
- goto err;
- }
-
- cm_id->add_ref(cm_id);
- ep->com.cm_id = cm_id;
- ep->com.qp = qp;
-
- ep->ird = conn_param->ird;
- ep->ord = conn_param->ord;
-
- if (peer2peer && ep->ird == 0)
- ep->ird = 1;
-
- PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
-
- /* bind QP to EP and move to RTS */
- attrs.mpa_attr = ep->mpa_attr;
- attrs.max_ird = ep->ird;
- attrs.max_ord = ep->ord;
- attrs.llp_stream_handle = ep;
- attrs.next_state = IWCH_QP_STATE_RTS;
-
- /* bind QP and TID with INIT_WR */
- mask = IWCH_QP_ATTR_NEXT_STATE |
- IWCH_QP_ATTR_LLP_STREAM_HANDLE |
- IWCH_QP_ATTR_MPA_ATTR |
- IWCH_QP_ATTR_MAX_IRD |
- IWCH_QP_ATTR_MAX_ORD;
-
- err = iwch_modify_qp(ep->com.qp->rhp,
- ep->com.qp, mask, &attrs, 1);
- if (err)
- goto err1;
-
- /* if needed, wait for wr_ack */
- if (iwch_rqes_posted(qp)) {
- wait_event(ep->com.waitq, ep->com.rpl_done);
- err = ep->com.rpl_err;
- if (err)
- goto err1;
- }
-
- err = send_mpa_reply(ep, conn_param->private_data,
- conn_param->private_data_len);
- if (err)
- goto err1;
-
-
- state_set(&ep->com, FPDU_MODE);
- established_upcall(ep);
- put_ep(&ep->com);
- return 0;
-err1:
- ep->com.cm_id = NULL;
- ep->com.qp = NULL;
- cm_id->rem_ref(cm_id);
-err:
- put_ep(&ep->com);
- return err;
-}
-
-static int is_loopback_dst(struct iw_cm_id *cm_id)
-{
- struct net_device *dev;
- struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
-
- dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr);
- if (!dev)
- return 0;
- dev_put(dev);
- return 1;
-}
-
-int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
- struct iwch_dev *h = to_iwch_dev(cm_id->device);
- struct iwch_ep *ep;
- struct rtable *rt;
- int err = 0;
- struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
- struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
-
- if (cm_id->m_remote_addr.ss_family != PF_INET) {
- err = -ENOSYS;
- goto out;
- }
-
- if (is_loopback_dst(cm_id)) {
- err = -ENOSYS;
- goto out;
- }
-
- ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
- if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
- err = -ENOMEM;
- goto out;
- }
- init_timer(&ep->timer);
- ep->plen = conn_param->private_data_len;
- if (ep->plen)
- memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
- conn_param->private_data, ep->plen);
- ep->ird = conn_param->ird;
- ep->ord = conn_param->ord;
-
- if (peer2peer && ep->ord == 0)
- ep->ord = 1;
-
- ep->com.tdev = h->rdev.t3cdev_p;
-
- cm_id->add_ref(cm_id);
- ep->com.cm_id = cm_id;
- ep->com.qp = get_qhp(h, conn_param->qpn);
- BUG_ON(!ep->com.qp);
- PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
- ep->com.qp, cm_id);
-
- /*
- * Allocate an active TID to initiate a TCP connection.
- */
- ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
- if (ep->atid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
- err = -ENOMEM;
- goto fail2;
- }
-
- /* find a route */
- rt = find_route(h->rdev.t3cdev_p, laddr->sin_addr.s_addr,
- raddr->sin_addr.s_addr, laddr->sin_port,
- raddr->sin_port, IPTOS_LOWDELAY);
- if (!rt) {
- printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
- err = -EHOSTUNREACH;
- goto fail3;
- }
- ep->dst = &rt->dst;
- ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL,
- &raddr->sin_addr.s_addr);
- if (!ep->l2t) {
- printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
- err = -ENOMEM;
- goto fail4;
- }
-
- state_set(&ep->com, CONNECTING);
- ep->tos = IPTOS_LOWDELAY;
- memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
- sizeof(ep->com.local_addr));
- memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
- sizeof(ep->com.remote_addr));
-
- /* send connect request to rnic */
- err = send_connect(ep);
- if (!err)
- goto out;
-
- l2t_release(h->rdev.t3cdev_p, ep->l2t);
-fail4:
- dst_release(ep->dst);
-fail3:
- cxgb3_free_atid(ep->com.tdev, ep->atid);
-fail2:
- cm_id->rem_ref(cm_id);
- put_ep(&ep->com);
-out:
- return err;
-}
-
-int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
-{
- int err = 0;
- struct iwch_dev *h = to_iwch_dev(cm_id->device);
- struct iwch_listen_ep *ep;
-
-
- might_sleep();
-
- if (cm_id->m_local_addr.ss_family != PF_INET) {
- err = -ENOSYS;
- goto fail1;
- }
-
- ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
- if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
- err = -ENOMEM;
- goto fail1;
- }
- PDBG("%s ep %p\n", __func__, ep);
- ep->com.tdev = h->rdev.t3cdev_p;
- cm_id->add_ref(cm_id);
- ep->com.cm_id = cm_id;
- ep->backlog = backlog;
- memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
- sizeof(ep->com.local_addr));
-
- /*
- * Allocate a server TID.
- */
- ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
- if (ep->stid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
- err = -ENOMEM;
- goto fail2;
- }
-
- state_set(&ep->com, LISTEN);
- err = listen_start(ep);
- if (err)
- goto fail3;
-
- /* wait for pass_open_rpl */
- wait_event(ep->com.waitq, ep->com.rpl_done);
- err = ep->com.rpl_err;
- if (!err) {
- cm_id->provider_data = ep;
- goto out;
- }
-fail3:
- cxgb3_free_stid(ep->com.tdev, ep->stid);
-fail2:
- cm_id->rem_ref(cm_id);
- put_ep(&ep->com);
-fail1:
-out:
- return err;
-}
-
-int iwch_destroy_listen(struct iw_cm_id *cm_id)
-{
- int err;
- struct iwch_listen_ep *ep = to_listen_ep(cm_id);
-
- PDBG("%s ep %p\n", __func__, ep);
-
- might_sleep();
- state_set(&ep->com, DEAD);
- ep->com.rpl_done = 0;
- ep->com.rpl_err = 0;
- err = listen_stop(ep);
- if (err)
- goto done;
- wait_event(ep->com.waitq, ep->com.rpl_done);
- cxgb3_free_stid(ep->com.tdev, ep->stid);
-done:
- err = ep->com.rpl_err;
- cm_id->rem_ref(cm_id);
- put_ep(&ep->com);
- return err;
-}
-
-int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
-{
- int ret=0;
- unsigned long flags;
- int close = 0;
- int fatal = 0;
- struct t3cdev *tdev;
- struct cxio_rdev *rdev;
-
- spin_lock_irqsave(&ep->com.lock, flags);
-
- PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
- states[ep->com.state], abrupt);
-
- tdev = (struct t3cdev *)ep->com.tdev;
- rdev = (struct cxio_rdev *)tdev->ulp;
- if (cxio_fatal_error(rdev)) {
- fatal = 1;
- close_complete_upcall(ep);
- ep->com.state = DEAD;
- }
- switch (ep->com.state) {
- case MPA_REQ_WAIT:
- case MPA_REQ_SENT:
- case MPA_REQ_RCVD:
- case MPA_REP_SENT:
- case FPDU_MODE:
- close = 1;
- if (abrupt)
- ep->com.state = ABORTING;
- else {
- ep->com.state = CLOSING;
- start_ep_timer(ep);
- }
- set_bit(CLOSE_SENT, &ep->com.flags);
- break;
- case CLOSING:
- if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
- close = 1;
- if (abrupt) {
- stop_ep_timer(ep);
- ep->com.state = ABORTING;
- } else
- ep->com.state = MORIBUND;
- }
- break;
- case MORIBUND:
- case ABORTING:
- case DEAD:
- PDBG("%s ignoring disconnect ep %p state %u\n",
- __func__, ep, ep->com.state);
- break;
- default:
- BUG();
- break;
- }
-
- spin_unlock_irqrestore(&ep->com.lock, flags);
- if (close) {
- if (abrupt)
- ret = send_abort(ep, NULL, gfp);
- else
- ret = send_halfclose(ep, gfp);
- if (ret)
- fatal = 1;
- }
- if (fatal)
- release_ep_resources(ep);
- return ret;
-}
-
-int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
- struct l2t_entry *l2t)
-{
- struct iwch_ep *ep = ctx;
-
- if (ep->dst != old)
- return 0;
-
- PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
- l2t);
- dst_hold(new);
- l2t_release(ep->com.tdev, ep->l2t);
- ep->l2t = l2t;
- dst_release(old);
- ep->dst = new;
- return 1;
-}
-
-/*
- * All the CM events are handled on a work queue to have a safe context.
- * These are the real handlers that are called from the work queue.
- */
-static const cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS] = {
- [CPL_ACT_ESTABLISH] = act_establish,
- [CPL_ACT_OPEN_RPL] = act_open_rpl,
- [CPL_RX_DATA] = rx_data,
- [CPL_TX_DMA_ACK] = tx_ack,
- [CPL_ABORT_RPL_RSS] = abort_rpl,
- [CPL_ABORT_RPL] = abort_rpl,
- [CPL_PASS_OPEN_RPL] = pass_open_rpl,
- [CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
- [CPL_PASS_ACCEPT_REQ] = pass_accept_req,
- [CPL_PASS_ESTABLISH] = pass_establish,
- [CPL_PEER_CLOSE] = peer_close,
- [CPL_ABORT_REQ_RSS] = peer_abort,
- [CPL_CLOSE_CON_RPL] = close_con_rpl,
- [CPL_RDMA_TERMINATE] = terminate,
- [CPL_RDMA_EC_STATUS] = ec_status,
-};
-
-static void process_work(struct work_struct *work)
-{
- struct sk_buff *skb = NULL;
- void *ep;
- struct t3cdev *tdev;
- int ret;
-
- while ((skb = skb_dequeue(&rxq))) {
- ep = *((void **) (skb->cb));
- tdev = *((struct t3cdev **) (skb->cb + sizeof(void *)));
- ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep);
- if (ret & CPL_RET_BUF_DONE)
- kfree_skb(skb);
-
- /*
- * ep was referenced in sched(), and is freed here.
- */
- put_ep((struct iwch_ep_common *)ep);
- }
-}
-
-static DECLARE_WORK(skb_work, process_work);
-
-static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct iwch_ep_common *epc = ctx;
-
- get_ep(epc);
-
- /*
- * Save ctx and tdev in the skb->cb area.
- */
- *((void **) skb->cb) = ctx;
- *((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev;
-
- /*
- * Queue the skb and schedule the worker thread.
- */
- skb_queue_tail(&rxq, skb);
- queue_work(workq, &skb_work);
- return 0;
-}
-
-static int set_tcb_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
-{
- struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
-
- if (rpl->status != CPL_ERR_NONE) {
- printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
- "for tid %u\n", rpl->status, GET_TID(rpl));
- }
- return CPL_RET_BUF_DONE;
-}
-
-/*
- * All upcalls from the T3 Core go to sched() to schedule the
- * processing on a work queue.
- */
-cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS] = {
- [CPL_ACT_ESTABLISH] = sched,
- [CPL_ACT_OPEN_RPL] = sched,
- [CPL_RX_DATA] = sched,
- [CPL_TX_DMA_ACK] = sched,
- [CPL_ABORT_RPL_RSS] = sched,
- [CPL_ABORT_RPL] = sched,
- [CPL_PASS_OPEN_RPL] = sched,
- [CPL_CLOSE_LISTSRV_RPL] = sched,
- [CPL_PASS_ACCEPT_REQ] = sched,
- [CPL_PASS_ESTABLISH] = sched,
- [CPL_PEER_CLOSE] = sched,
- [CPL_CLOSE_CON_RPL] = sched,
- [CPL_ABORT_REQ_RSS] = sched,
- [CPL_RDMA_TERMINATE] = sched,
- [CPL_RDMA_EC_STATUS] = sched,
- [CPL_SET_TCB_RPL] = set_tcb_rpl,
-};
-
-int __init iwch_cm_init(void)
-{
- skb_queue_head_init(&rxq);
-
- workq = alloc_ordered_workqueue("iw_cxgb3", WQ_MEM_RECLAIM);
- if (!workq)
- return -ENOMEM;
-
- return 0;
-}
-
-void __exit iwch_cm_term(void)
-{
- flush_workqueue(workq);
- destroy_workqueue(workq);
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
deleted file mode 100644
index b9efadfffb4f..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _IWCH_CM_H_
-#define _IWCH_CM_H_
-
-#include <linux/inet.h>
-#include <linux/wait.h>
-#include <linux/spinlock.h>
-#include <linux/kref.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/iw_cm.h>
-
-#include "cxgb3_offload.h"
-#include "iwch_provider.h"
-
-#define MPA_KEY_REQ "MPA ID Req Frame"
-#define MPA_KEY_REP "MPA ID Rep Frame"
-
-#define MPA_MAX_PRIVATE_DATA 256
-#define MPA_REV 0 /* XXX - amso1100 uses rev 0 ! */
-#define MPA_REJECT 0x20
-#define MPA_CRC 0x40
-#define MPA_MARKERS 0x80
-#define MPA_FLAGS_MASK 0xE0
-
-#define put_ep(ep) { \
- PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
- WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
- kref_put(&((ep)->kref), __free_ep); \
-}
-
-#define get_ep(ep) { \
- PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
- kref_get(&((ep)->kref)); \
-}
-
-struct mpa_message {
- u8 key[16];
- u8 flags;
- u8 revision;
- __be16 private_data_size;
- u8 private_data[0];
-};
-
-struct terminate_message {
- u8 layer_etype;
- u8 ecode;
- __be16 hdrct_rsvd;
- u8 len_hdrs[0];
-};
-
-#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28)
-
-enum iwch_layers_types {
- LAYER_RDMAP = 0x00,
- LAYER_DDP = 0x10,
- LAYER_MPA = 0x20,
- RDMAP_LOCAL_CATA = 0x00,
- RDMAP_REMOTE_PROT = 0x01,
- RDMAP_REMOTE_OP = 0x02,
- DDP_LOCAL_CATA = 0x00,
- DDP_TAGGED_ERR = 0x01,
- DDP_UNTAGGED_ERR = 0x02,
- DDP_LLP = 0x03
-};
-
-enum iwch_rdma_ecodes {
- RDMAP_INV_STAG = 0x00,
- RDMAP_BASE_BOUNDS = 0x01,
- RDMAP_ACC_VIOL = 0x02,
- RDMAP_STAG_NOT_ASSOC = 0x03,
- RDMAP_TO_WRAP = 0x04,
- RDMAP_INV_VERS = 0x05,
- RDMAP_INV_OPCODE = 0x06,
- RDMAP_STREAM_CATA = 0x07,
- RDMAP_GLOBAL_CATA = 0x08,
- RDMAP_CANT_INV_STAG = 0x09,
- RDMAP_UNSPECIFIED = 0xff
-};
-
-enum iwch_ddp_ecodes {
- DDPT_INV_STAG = 0x00,
- DDPT_BASE_BOUNDS = 0x01,
- DDPT_STAG_NOT_ASSOC = 0x02,
- DDPT_TO_WRAP = 0x03,
- DDPT_INV_VERS = 0x04,
- DDPU_INV_QN = 0x01,
- DDPU_INV_MSN_NOBUF = 0x02,
- DDPU_INV_MSN_RANGE = 0x03,
- DDPU_INV_MO = 0x04,
- DDPU_MSG_TOOBIG = 0x05,
- DDPU_INV_VERS = 0x06
-};
-
-enum iwch_mpa_ecodes {
- MPA_CRC_ERR = 0x02,
- MPA_MARKER_ERR = 0x03
-};
-
-enum iwch_ep_state {
- IDLE = 0,
- LISTEN,
- CONNECTING,
- MPA_REQ_WAIT,
- MPA_REQ_SENT,
- MPA_REQ_RCVD,
- MPA_REP_SENT,
- FPDU_MODE,
- ABORTING,
- CLOSING,
- MORIBUND,
- DEAD,
-};
-
-enum iwch_ep_flags {
- PEER_ABORT_IN_PROGRESS = 0,
- ABORT_REQ_IN_PROGRESS = 1,
- RELEASE_RESOURCES = 2,
- CLOSE_SENT = 3,
-};
-
-struct iwch_ep_common {
- struct iw_cm_id *cm_id;
- struct iwch_qp *qp;
- struct t3cdev *tdev;
- enum iwch_ep_state state;
- struct kref kref;
- spinlock_t lock;
- struct sockaddr_in local_addr;
- struct sockaddr_in remote_addr;
- wait_queue_head_t waitq;
- int rpl_done;
- int rpl_err;
- unsigned long flags;
-};
-
-struct iwch_listen_ep {
- struct iwch_ep_common com;
- unsigned int stid;
- int backlog;
-};
-
-struct iwch_ep {
- struct iwch_ep_common com;
- struct iwch_ep *parent_ep;
- struct timer_list timer;
- unsigned int atid;
- u32 hwtid;
- u32 snd_seq;
- u32 rcv_seq;
- struct l2t_entry *l2t;
- struct dst_entry *dst;
- struct sk_buff *mpa_skb;
- struct iwch_mpa_attributes mpa_attr;
- unsigned int mpa_pkt_len;
- u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA];
- u8 tos;
- u16 emss;
- u16 plen;
- u32 ird;
- u32 ord;
-};
-
-static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id)
-{
- return cm_id->provider_data;
-}
-
-static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id)
-{
- return cm_id->provider_data;
-}
-
-static inline int compute_wscale(int win)
-{
- int wscale = 0;
-
- while (wscale < 14 && (65535<<wscale) < win)
- wscale++;
- return wscale;
-}
-
-/* CM prototypes */
-
-int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
-int iwch_create_listen(struct iw_cm_id *cm_id, int backlog);
-int iwch_destroy_listen(struct iw_cm_id *cm_id);
-int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
-int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
-int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp);
-int iwch_quiesce_tid(struct iwch_ep *ep);
-int iwch_resume_tid(struct iwch_ep *ep);
-void __free_ep(struct kref *kref);
-void iwch_rearp(struct iwch_ep *ep);
-int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t);
-
-int __init iwch_cm_init(void);
-void __exit iwch_cm_term(void);
-extern int peer2peer;
-
-#endif /* _IWCH_CM_H_ */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
deleted file mode 100644
index 97fbfd2c298e..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "iwch_provider.h"
-#include "iwch.h"
-
-/*
- * Get one cq entry from cxio and map it to openib.
- *
- * Returns:
- * 0 EMPTY;
- * 1 cqe returned
- * -EAGAIN caller must try again
- * any other -errno fatal error
- */
-static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
- struct ib_wc *wc)
-{
- struct iwch_qp *qhp = NULL;
- struct t3_cqe cqe, *rd_cqe;
- struct t3_wq *wq;
- u32 credit = 0;
- u8 cqe_flushed;
- u64 cookie;
- int ret = 1;
-
- rd_cqe = cxio_next_cqe(&chp->cq);
-
- if (!rd_cqe)
- return 0;
-
- qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
- if (!qhp)
- wq = NULL;
- else {
- spin_lock(&qhp->lock);
- wq = &(qhp->wq);
- }
- ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
- &credit);
- if (t3a_device(chp->rhp) && credit) {
- PDBG("%s updating %d cq credits on id %d\n", __func__,
- credit, chp->cq.cqid);
- cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit);
- }
-
- if (ret) {
- ret = -EAGAIN;
- goto out;
- }
- ret = 1;
-
- wc->wr_id = cookie;
- wc->qp = &qhp->ibqp;
- wc->vendor_err = CQE_STATUS(cqe);
- wc->wc_flags = 0;
-
- PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x "
- "lo 0x%x cookie 0x%llx\n", __func__,
- CQE_QPID(cqe), CQE_TYPE(cqe),
- CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
- CQE_WRID_LOW(cqe), (unsigned long long) cookie);
-
- if (CQE_TYPE(cqe) == 0) {
- if (!CQE_STATUS(cqe))
- wc->byte_len = CQE_LEN(cqe);
- else
- wc->byte_len = 0;
- wc->opcode = IB_WC_RECV;
- if (CQE_OPCODE(cqe) == T3_SEND_WITH_INV ||
- CQE_OPCODE(cqe) == T3_SEND_WITH_SE_INV) {
- wc->ex.invalidate_rkey = CQE_WRID_STAG(cqe);
- wc->wc_flags |= IB_WC_WITH_INVALIDATE;
- }
- } else {
- switch (CQE_OPCODE(cqe)) {
- case T3_RDMA_WRITE:
- wc->opcode = IB_WC_RDMA_WRITE;
- break;
- case T3_READ_REQ:
- wc->opcode = IB_WC_RDMA_READ;
- wc->byte_len = CQE_LEN(cqe);
- break;
- case T3_SEND:
- case T3_SEND_WITH_SE:
- case T3_SEND_WITH_INV:
- case T3_SEND_WITH_SE_INV:
- wc->opcode = IB_WC_SEND;
- break;
- case T3_LOCAL_INV:
- wc->opcode = IB_WC_LOCAL_INV;
- break;
- case T3_FAST_REGISTER:
- wc->opcode = IB_WC_REG_MR;
- break;
- default:
- printk(KERN_ERR MOD "Unexpected opcode %d "
- "in the CQE received for QPID=0x%0x\n",
- CQE_OPCODE(cqe), CQE_QPID(cqe));
- ret = -EINVAL;
- goto out;
- }
- }
-
- if (cqe_flushed)
- wc->status = IB_WC_WR_FLUSH_ERR;
- else {
-
- switch (CQE_STATUS(cqe)) {
- case TPT_ERR_SUCCESS:
- wc->status = IB_WC_SUCCESS;
- break;
- case TPT_ERR_STAG:
- wc->status = IB_WC_LOC_ACCESS_ERR;
- break;
- case TPT_ERR_PDID:
- wc->status = IB_WC_LOC_PROT_ERR;
- break;
- case TPT_ERR_QPID:
- case TPT_ERR_ACCESS:
- wc->status = IB_WC_LOC_ACCESS_ERR;
- break;
- case TPT_ERR_WRAP:
- wc->status = IB_WC_GENERAL_ERR;
- break;
- case TPT_ERR_BOUND:
- wc->status = IB_WC_LOC_LEN_ERR;
- break;
- case TPT_ERR_INVALIDATE_SHARED_MR:
- case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
- wc->status = IB_WC_MW_BIND_ERR;
- break;
- case TPT_ERR_CRC:
- case TPT_ERR_MARKER:
- case TPT_ERR_PDU_LEN_ERR:
- case TPT_ERR_OUT_OF_RQE:
- case TPT_ERR_DDP_VERSION:
- case TPT_ERR_RDMA_VERSION:
- case TPT_ERR_DDP_QUEUE_NUM:
- case TPT_ERR_MSN:
- case TPT_ERR_TBIT:
- case TPT_ERR_MO:
- case TPT_ERR_MSN_RANGE:
- case TPT_ERR_IRD_OVERFLOW:
- case TPT_ERR_OPCODE:
- wc->status = IB_WC_FATAL_ERR;
- break;
- case TPT_ERR_SWFLUSH:
- wc->status = IB_WC_WR_FLUSH_ERR;
- break;
- default:
- printk(KERN_ERR MOD "Unexpected cqe_status 0x%x for "
- "QPID=0x%0x\n", CQE_STATUS(cqe), CQE_QPID(cqe));
- ret = -EINVAL;
- }
- }
-out:
- if (wq)
- spin_unlock(&qhp->lock);
- return ret;
-}
-
-int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
-{
- struct iwch_dev *rhp;
- struct iwch_cq *chp;
- unsigned long flags;
- int npolled;
- int err = 0;
-
- chp = to_iwch_cq(ibcq);
- rhp = chp->rhp;
-
- spin_lock_irqsave(&chp->lock, flags);
- for (npolled = 0; npolled < num_entries; ++npolled) {
-#ifdef DEBUG
- int i=0;
-#endif
-
- /*
- * Because T3 can post CQEs that are _not_ associated
- * with a WR, we might have to poll again after removing
- * one of these.
- */
- do {
- err = iwch_poll_cq_one(rhp, chp, wc + npolled);
-#ifdef DEBUG
- BUG_ON(++i > 1000);
-#endif
- } while (err == -EAGAIN);
- if (err <= 0)
- break;
- }
- spin_unlock_irqrestore(&chp->lock, flags);
-
- if (err < 0)
- return err;
- else {
- return npolled;
- }
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
deleted file mode 100644
index abcc9e76962b..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/gfp.h>
-#include <linux/mman.h>
-#include <net/sock.h>
-#include "iwch_provider.h"
-#include "iwch.h"
-#include "iwch_cm.h"
-#include "cxio_hal.h"
-#include "cxio_wr.h"
-
-static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
- struct respQ_msg_t *rsp_msg,
- enum ib_event_type ib_event,
- int send_term)
-{
- struct ib_event event;
- struct iwch_qp_attributes attrs;
- struct iwch_qp *qhp;
- unsigned long flag;
-
- spin_lock(&rnicp->lock);
- qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
-
- if (!qhp) {
- printk(KERN_ERR "%s unaffiliated error 0x%x qpid 0x%x\n",
- __func__, CQE_STATUS(rsp_msg->cqe),
- CQE_QPID(rsp_msg->cqe));
- spin_unlock(&rnicp->lock);
- return;
- }
-
- if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
- (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
- PDBG("%s AE received after RTS - "
- "qp state %d qpid 0x%x status 0x%x\n", __func__,
- qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe));
- spin_unlock(&rnicp->lock);
- return;
- }
-
- printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x "
- "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __func__,
- CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
- CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
- CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
-
- atomic_inc(&qhp->refcnt);
- spin_unlock(&rnicp->lock);
-
- if (qhp->attr.state == IWCH_QP_STATE_RTS) {
- attrs.next_state = IWCH_QP_STATE_TERMINATE;
- iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
- &attrs, 1);
- if (send_term)
- iwch_post_terminate(qhp, rsp_msg);
- }
-
- event.event = ib_event;
- event.device = chp->ibcq.device;
- if (ib_event == IB_EVENT_CQ_ERR)
- event.element.cq = &chp->ibcq;
- else
- event.element.qp = &qhp->ibqp;
-
- if (qhp->ibqp.event_handler)
- (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
-
- spin_lock_irqsave(&chp->comp_handler_lock, flag);
- (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
- spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
-
- if (atomic_dec_and_test(&qhp->refcnt))
- wake_up(&qhp->wait);
-}
-
-void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
-{
- struct iwch_dev *rnicp;
- struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
- struct iwch_cq *chp;
- struct iwch_qp *qhp;
- u32 cqid = RSPQ_CQID(rsp_msg);
- unsigned long flag;
-
- rnicp = (struct iwch_dev *) rdev_p->ulp;
- spin_lock(&rnicp->lock);
- chp = get_chp(rnicp, cqid);
- qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
- if (!chp || !qhp) {
- printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d "
- "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x \n",
- cqid, CQE_QPID(rsp_msg->cqe),
- CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
- CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
- CQE_WRID_LOW(rsp_msg->cqe));
- spin_unlock(&rnicp->lock);
- goto out;
- }
- iwch_qp_add_ref(&qhp->ibqp);
- atomic_inc(&chp->refcnt);
- spin_unlock(&rnicp->lock);
-
- /*
- * 1) completion of our sending a TERMINATE.
- * 2) incoming TERMINATE message.
- */
- if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) &&
- (CQE_STATUS(rsp_msg->cqe) == 0)) {
- if (SQ_TYPE(rsp_msg->cqe)) {
- PDBG("%s QPID 0x%x ep %p disconnecting\n",
- __func__, qhp->wq.qpid, qhp->ep);
- iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
- } else {
- PDBG("%s post REQ_ERR AE QPID 0x%x\n", __func__,
- qhp->wq.qpid);
- post_qp_event(rnicp, chp, rsp_msg,
- IB_EVENT_QP_REQ_ERR, 0);
- iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
- }
- goto done;
- }
-
- /* Bad incoming Read request */
- if (SQ_TYPE(rsp_msg->cqe) &&
- (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) {
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
- goto done;
- }
-
- /* Bad incoming write */
- if (RQ_TYPE(rsp_msg->cqe) &&
- (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) {
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
- goto done;
- }
-
- switch (CQE_STATUS(rsp_msg->cqe)) {
-
- /* Completion Events */
- case TPT_ERR_SUCCESS:
-
- /*
- * Confirm the destination entry if this is a RECV completion.
- */
- if (qhp->ep && SQ_TYPE(rsp_msg->cqe))
- dst_confirm(qhp->ep->dst);
- spin_lock_irqsave(&chp->comp_handler_lock, flag);
- (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
- spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
- break;
-
- case TPT_ERR_STAG:
- case TPT_ERR_PDID:
- case TPT_ERR_QPID:
- case TPT_ERR_ACCESS:
- case TPT_ERR_WRAP:
- case TPT_ERR_BOUND:
- case TPT_ERR_INVALIDATE_SHARED_MR:
- case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1);
- break;
-
- /* Device Fatal Errors */
- case TPT_ERR_ECC:
- case TPT_ERR_ECC_PSTAG:
- case TPT_ERR_INTERNAL_ERR:
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1);
- break;
-
- /* QP Fatal Errors */
- case TPT_ERR_OUT_OF_RQE:
- case TPT_ERR_PBL_ADDR_BOUND:
- case TPT_ERR_CRC:
- case TPT_ERR_MARKER:
- case TPT_ERR_PDU_LEN_ERR:
- case TPT_ERR_DDP_VERSION:
- case TPT_ERR_RDMA_VERSION:
- case TPT_ERR_OPCODE:
- case TPT_ERR_DDP_QUEUE_NUM:
- case TPT_ERR_MSN:
- case TPT_ERR_TBIT:
- case TPT_ERR_MO:
- case TPT_ERR_MSN_GAP:
- case TPT_ERR_MSN_RANGE:
- case TPT_ERR_RQE_ADDR_BOUND:
- case TPT_ERR_IRD_OVERFLOW:
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
- break;
-
- default:
- printk(KERN_ERR MOD "Unknown T3 status 0x%x QPID 0x%x\n",
- CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid);
- post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
- break;
- }
-done:
- if (atomic_dec_and_test(&chp->refcnt))
- wake_up(&chp->wait);
- iwch_qp_rem_ref(&qhp->ibqp);
-out:
- dev_kfree_skb_irq(skb);
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
deleted file mode 100644
index 1d04c872c9d5..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/slab.h>
-#include <asm/byteorder.h>
-
-#include <rdma/iw_cm.h>
-#include <rdma/ib_verbs.h>
-
-#include "cxio_hal.h"
-#include "cxio_resource.h"
-#include "iwch.h"
-#include "iwch_provider.h"
-
-static int iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
-{
- u32 mmid;
-
- mhp->attr.state = 1;
- mhp->attr.stag = stag;
- mmid = stag >> 8;
- mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
- return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
-}
-
-int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp, int shift)
-{
- u32 stag;
- int ret;
-
- if (cxio_register_phys_mem(&rhp->rdev,
- &stag, mhp->attr.pdid,
- mhp->attr.perms,
- mhp->attr.zbva,
- mhp->attr.va_fbo,
- mhp->attr.len,
- shift - 12,
- mhp->attr.pbl_size, mhp->attr.pbl_addr))
- return -ENOMEM;
-
- ret = iwch_finish_mem_reg(mhp, stag);
- if (ret)
- cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
- return ret;
-}
-
-int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
-{
- mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
- npages << 3);
-
- if (!mhp->attr.pbl_addr)
- return -ENOMEM;
-
- mhp->attr.pbl_size = npages;
-
- return 0;
-}
-
-void iwch_free_pbl(struct iwch_mr *mhp)
-{
- cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
- mhp->attr.pbl_size << 3);
-}
-
-int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
-{
- return cxio_write_pbl(&mhp->rhp->rdev, pages,
- mhp->attr.pbl_addr + (offset << 3), npages);
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
deleted file mode 100644
index 9d5fe1853da4..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ /dev/null
@@ -1,1487 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/device.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/list.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <linux/ethtool.h>
-#include <linux/rtnetlink.h>
-#include <linux/inetdevice.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include <rdma/iw_cm.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_smi.h>
-#include <rdma/ib_umem.h>
-#include <rdma/ib_user_verbs.h>
-
-#include "cxio_hal.h"
-#include "iwch.h"
-#include "iwch_provider.h"
-#include "iwch_cm.h"
-#include <rdma/cxgb3-abi.h>
-#include "common.h"
-
-static struct ib_ah *iwch_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
- struct ib_udata *udata)
-{
- return ERR_PTR(-ENOSYS);
-}
-
-static int iwch_ah_destroy(struct ib_ah *ah)
-{
- return -ENOSYS;
-}
-
-static int iwch_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
- return -ENOSYS;
-}
-
-static int iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
- return -ENOSYS;
-}
-
-static int iwch_process_mad(struct ib_device *ibdev,
- int mad_flags,
- u8 port_num,
- const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad,
- size_t in_mad_size,
- struct ib_mad_hdr *out_mad,
- size_t *out_mad_size,
- u16 *out_mad_pkey_index)
-{
- return -ENOSYS;
-}
-
-static int iwch_dealloc_ucontext(struct ib_ucontext *context)
-{
- struct iwch_dev *rhp = to_iwch_dev(context->device);
- struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
- struct iwch_mm_entry *mm, *tmp;
-
- PDBG("%s context %p\n", __func__, context);
- list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
- kfree(mm);
- cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
- kfree(ucontext);
- return 0;
-}
-
-static struct ib_ucontext *iwch_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
-{
- struct iwch_ucontext *context;
- struct iwch_dev *rhp = to_iwch_dev(ibdev);
-
- PDBG("%s ibdev %p\n", __func__, ibdev);
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return ERR_PTR(-ENOMEM);
- cxio_init_ucontext(&rhp->rdev, &context->uctx);
- INIT_LIST_HEAD(&context->mmaps);
- spin_lock_init(&context->mmap_lock);
- return &context->ibucontext;
-}
-
-static int iwch_destroy_cq(struct ib_cq *ib_cq)
-{
- struct iwch_cq *chp;
-
- PDBG("%s ib_cq %p\n", __func__, ib_cq);
- chp = to_iwch_cq(ib_cq);
-
- remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
- atomic_dec(&chp->refcnt);
- wait_event(chp->wait, !atomic_read(&chp->refcnt));
-
- cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
- kfree(chp);
- return 0;
-}
-
-static struct ib_cq *iwch_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *ib_context,
- struct ib_udata *udata)
-{
- int entries = attr->cqe;
- struct iwch_dev *rhp;
- struct iwch_cq *chp;
- struct iwch_create_cq_resp uresp;
- struct iwch_create_cq_req ureq;
- struct iwch_ucontext *ucontext = NULL;
- static int warned;
- size_t resplen;
-
- PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
- if (attr->flags)
- return ERR_PTR(-EINVAL);
-
- rhp = to_iwch_dev(ibdev);
- chp = kzalloc(sizeof(*chp), GFP_KERNEL);
- if (!chp)
- return ERR_PTR(-ENOMEM);
-
- if (ib_context) {
- ucontext = to_iwch_ucontext(ib_context);
- if (!t3a_device(rhp)) {
- if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) {
- kfree(chp);
- return ERR_PTR(-EFAULT);
- }
- chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr;
- }
- }
-
- if (t3a_device(rhp)) {
-
- /*
- * T3A: Add some fluff to handle extra CQEs inserted
- * for various errors.
- * Additional CQE possibilities:
- * TERMINATE,
- * incoming RDMA WRITE Failures
- * incoming RDMA READ REQUEST FAILUREs
- * NOTE: We cannot ensure the CQ won't overflow.
- */
- entries += 16;
- }
- entries = roundup_pow_of_two(entries);
- chp->cq.size_log2 = ilog2(entries);
-
- if (cxio_create_cq(&rhp->rdev, &chp->cq, !ucontext)) {
- kfree(chp);
- return ERR_PTR(-ENOMEM);
- }
- chp->rhp = rhp;
- chp->ibcq.cqe = 1 << chp->cq.size_log2;
- spin_lock_init(&chp->lock);
- spin_lock_init(&chp->comp_handler_lock);
- atomic_set(&chp->refcnt, 1);
- init_waitqueue_head(&chp->wait);
- if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) {
- cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
- kfree(chp);
- return ERR_PTR(-ENOMEM);
- }
-
- if (ucontext) {
- struct iwch_mm_entry *mm;
-
- mm = kmalloc(sizeof *mm, GFP_KERNEL);
- if (!mm) {
- iwch_destroy_cq(&chp->ibcq);
- return ERR_PTR(-ENOMEM);
- }
- uresp.cqid = chp->cq.cqid;
- uresp.size_log2 = chp->cq.size_log2;
- spin_lock(&ucontext->mmap_lock);
- uresp.key = ucontext->key;
- ucontext->key += PAGE_SIZE;
- spin_unlock(&ucontext->mmap_lock);
- mm->key = uresp.key;
- mm->addr = virt_to_phys(chp->cq.queue);
- if (udata->outlen < sizeof uresp) {
- if (!warned++)
- printk(KERN_WARNING MOD "Warning - "
- "downlevel libcxgb3 (non-fatal).\n");
- mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
- sizeof(struct t3_cqe));
- resplen = sizeof(struct iwch_create_cq_resp_v0);
- } else {
- mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) *
- sizeof(struct t3_cqe));
- uresp.memsize = mm->len;
- uresp.reserved = 0;
- resplen = sizeof uresp;
- }
- if (ib_copy_to_udata(udata, &uresp, resplen)) {
- kfree(mm);
- iwch_destroy_cq(&chp->ibcq);
- return ERR_PTR(-EFAULT);
- }
- insert_mmap(ucontext, mm);
- }
- PDBG("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n",
- chp->cq.cqid, chp, (1 << chp->cq.size_log2),
- (unsigned long long) chp->cq.dma_addr);
- return &chp->ibcq;
-}
-
-static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
-{
-#ifdef notyet
- struct iwch_cq *chp = to_iwch_cq(cq);
- struct t3_cq oldcq, newcq;
- int ret;
-
- PDBG("%s ib_cq %p cqe %d\n", __func__, cq, cqe);
-
- /* We don't downsize... */
- if (cqe <= cq->cqe)
- return 0;
-
- /* create new t3_cq with new size */
- cqe = roundup_pow_of_two(cqe+1);
- newcq.size_log2 = ilog2(cqe);
-
- /* Dont allow resize to less than the current wce count */
- if (cqe < Q_COUNT(chp->cq.rptr, chp->cq.wptr)) {
- return -ENOMEM;
- }
-
- /* Quiesce all QPs using this CQ */
- ret = iwch_quiesce_qps(chp);
- if (ret) {
- return ret;
- }
-
- ret = cxio_create_cq(&chp->rhp->rdev, &newcq);
- if (ret) {
- return ret;
- }
-
- /* copy CQEs */
- memcpy(newcq.queue, chp->cq.queue, (1 << chp->cq.size_log2) *
- sizeof(struct t3_cqe));
-
- /* old iwch_qp gets new t3_cq but keeps old cqid */
- oldcq = chp->cq;
- chp->cq = newcq;
- chp->cq.cqid = oldcq.cqid;
-
- /* resize new t3_cq to update the HW context */
- ret = cxio_resize_cq(&chp->rhp->rdev, &chp->cq);
- if (ret) {
- chp->cq = oldcq;
- return ret;
- }
- chp->ibcq.cqe = (1<<chp->cq.size_log2) - 1;
-
- /* destroy old t3_cq */
- oldcq.cqid = newcq.cqid;
- ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq);
- if (ret) {
- printk(KERN_ERR MOD "%s - cxio_destroy_cq failed %d\n",
- __func__, ret);
- }
-
- /* add user hooks here */
-
- /* resume qps */
- ret = iwch_resume_qps(chp);
- return ret;
-#else
- return -ENOSYS;
-#endif
-}
-
-static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
-{
- struct iwch_dev *rhp;
- struct iwch_cq *chp;
- enum t3_cq_opcode cq_op;
- int err;
- unsigned long flag;
- u32 rptr;
-
- chp = to_iwch_cq(ibcq);
- rhp = chp->rhp;
- if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
- cq_op = CQ_ARM_SE;
- else
- cq_op = CQ_ARM_AN;
- if (chp->user_rptr_addr) {
- if (get_user(rptr, chp->user_rptr_addr))
- return -EFAULT;
- spin_lock_irqsave(&chp->lock, flag);
- chp->cq.rptr = rptr;
- } else
- spin_lock_irqsave(&chp->lock, flag);
- PDBG("%s rptr 0x%x\n", __func__, chp->cq.rptr);
- err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
- spin_unlock_irqrestore(&chp->lock, flag);
- if (err < 0)
- printk(KERN_ERR MOD "Error %d rearming CQID 0x%x\n", err,
- chp->cq.cqid);
- if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
- err = 0;
- return err;
-}
-
-static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
- int len = vma->vm_end - vma->vm_start;
- u32 key = vma->vm_pgoff << PAGE_SHIFT;
- struct cxio_rdev *rdev_p;
- int ret = 0;
- struct iwch_mm_entry *mm;
- struct iwch_ucontext *ucontext;
- u64 addr;
-
- PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
- key, len);
-
- if (vma->vm_start & (PAGE_SIZE-1)) {
- return -EINVAL;
- }
-
- rdev_p = &(to_iwch_dev(context->device)->rdev);
- ucontext = to_iwch_ucontext(context);
-
- mm = remove_mmap(ucontext, key, len);
- if (!mm)
- return -EINVAL;
- addr = mm->addr;
- kfree(mm);
-
- if ((addr >= rdev_p->rnic_info.udbell_physbase) &&
- (addr < (rdev_p->rnic_info.udbell_physbase +
- rdev_p->rnic_info.udbell_len))) {
-
- /*
- * Map T3 DB register.
- */
- if (vma->vm_flags & VM_READ) {
- return -EPERM;
- }
-
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
- vma->vm_flags &= ~VM_MAYREAD;
- ret = io_remap_pfn_range(vma, vma->vm_start,
- addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
- } else {
-
- /*
- * Map WQ or CQ contig dma memory...
- */
- ret = remap_pfn_range(vma, vma->vm_start,
- addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
- }
-
- return ret;
-}
-
-static int iwch_deallocate_pd(struct ib_pd *pd)
-{
- struct iwch_dev *rhp;
- struct iwch_pd *php;
-
- php = to_iwch_pd(pd);
- rhp = php->rhp;
- PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
- cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
- kfree(php);
- return 0;
-}
-
-static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
-{
- struct iwch_pd *php;
- u32 pdid;
- struct iwch_dev *rhp;
-
- PDBG("%s ibdev %p\n", __func__, ibdev);
- rhp = (struct iwch_dev *) ibdev;
- pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
- if (!pdid)
- return ERR_PTR(-EINVAL);
- php = kzalloc(sizeof(*php), GFP_KERNEL);
- if (!php) {
- cxio_hal_put_pdid(rhp->rdev.rscp, pdid);
- return ERR_PTR(-ENOMEM);
- }
- php->pdid = pdid;
- php->rhp = rhp;
- if (context) {
- if (ib_copy_to_udata(udata, &php->pdid, sizeof (__u32))) {
- iwch_deallocate_pd(&php->ibpd);
- return ERR_PTR(-EFAULT);
- }
- }
- PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
- return &php->ibpd;
-}
-
-static int iwch_dereg_mr(struct ib_mr *ib_mr)
-{
- struct iwch_dev *rhp;
- struct iwch_mr *mhp;
- u32 mmid;
-
- PDBG("%s ib_mr %p\n", __func__, ib_mr);
-
- mhp = to_iwch_mr(ib_mr);
- kfree(mhp->pages);
- rhp = mhp->rhp;
- mmid = mhp->attr.stag >> 8;
- cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
- iwch_free_pbl(mhp);
- remove_handle(rhp, &rhp->mmidr, mmid);
- if (mhp->kva)
- kfree((void *) (unsigned long) mhp->kva);
- if (mhp->umem)
- ib_umem_release(mhp->umem);
- PDBG("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
- kfree(mhp);
- return 0;
-}
-
-static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
-{
- const u64 total_size = 0xffffffff;
- const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK;
- struct iwch_pd *php = to_iwch_pd(pd);
- struct iwch_dev *rhp = php->rhp;
- struct iwch_mr *mhp;
- __be64 *page_list;
- int shift = 26, npages, ret, i;
-
- PDBG("%s ib_pd %p\n", __func__, pd);
-
- /*
- * T3 only supports 32 bits of size.
- */
- if (sizeof(phys_addr_t) > 4) {
- pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
- return ERR_PTR(-ENOTSUPP);
- }
-
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
-
- mhp->rhp = rhp;
-
- npages = (total_size + (1ULL << shift) - 1) >> shift;
- if (!npages) {
- ret = -EINVAL;
- goto err;
- }
-
- page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL);
- if (!page_list) {
- ret = -ENOMEM;
- goto err;
- }
-
- for (i = 0; i < npages; i++)
- page_list[i] = cpu_to_be64((u64)i << shift);
-
- PDBG("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
- __func__, mask, shift, total_size, npages);
-
- ret = iwch_alloc_pbl(mhp, npages);
- if (ret) {
- kfree(page_list);
- goto err_pbl;
- }
-
- ret = iwch_write_pbl(mhp, page_list, npages, 0);
- kfree(page_list);
- if (ret)
- goto err_pbl;
-
- mhp->attr.pdid = php->pdid;
- mhp->attr.zbva = 0;
-
- mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = 0;
- mhp->attr.page_size = shift - 12;
-
- mhp->attr.len = (u32) total_size;
- mhp->attr.pbl_size = npages;
- ret = iwch_register_mem(rhp, php, mhp, shift);
- if (ret)
- goto err_pbl;
-
- return &mhp->ibmr;
-
-err_pbl:
- iwch_free_pbl(mhp);
-
-err:
- kfree(mhp);
- return ERR_PTR(ret);
-}
-
-static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *udata)
-{
- __be64 *pages;
- int shift, n, len;
- int i, k, entry;
- int err = 0;
- struct iwch_dev *rhp;
- struct iwch_pd *php;
- struct iwch_mr *mhp;
- struct iwch_reg_user_mr_resp uresp;
- struct scatterlist *sg;
- PDBG("%s ib_pd %p\n", __func__, pd);
-
- php = to_iwch_pd(pd);
- rhp = php->rhp;
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
-
- mhp->rhp = rhp;
-
- mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
- if (IS_ERR(mhp->umem)) {
- err = PTR_ERR(mhp->umem);
- kfree(mhp);
- return ERR_PTR(err);
- }
-
- shift = ffs(mhp->umem->page_size) - 1;
-
- n = mhp->umem->nmap;
-
- err = iwch_alloc_pbl(mhp, n);
- if (err)
- goto err;
-
- pages = (__be64 *) __get_free_page(GFP_KERNEL);
- if (!pages) {
- err = -ENOMEM;
- goto err_pbl;
- }
-
- i = n = 0;
-
- for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
- len = sg_dma_len(sg) >> shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = cpu_to_be64(sg_dma_address(sg) +
- mhp->umem->page_size * k);
- if (i == PAGE_SIZE / sizeof *pages) {
- err = iwch_write_pbl(mhp, pages, i, n);
- if (err)
- goto pbl_done;
- n += i;
- i = 0;
- }
- }
- }
-
- if (i)
- err = iwch_write_pbl(mhp, pages, i, n);
-
-pbl_done:
- free_page((unsigned long) pages);
- if (err)
- goto err_pbl;
-
- mhp->attr.pdid = php->pdid;
- mhp->attr.zbva = 0;
- mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = virt;
- mhp->attr.page_size = shift - 12;
- mhp->attr.len = (u32) length;
-
- err = iwch_register_mem(rhp, php, mhp, shift);
- if (err)
- goto err_pbl;
-
- if (udata && !t3a_device(rhp)) {
- uresp.pbl_addr = (mhp->attr.pbl_addr -
- rhp->rdev.rnic_info.pbl_base) >> 3;
- PDBG("%s user resp pbl_addr 0x%x\n", __func__,
- uresp.pbl_addr);
-
- if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
- iwch_dereg_mr(&mhp->ibmr);
- err = -EFAULT;
- goto err;
- }
- }
-
- return &mhp->ibmr;
-
-err_pbl:
- iwch_free_pbl(mhp);
-
-err:
- ib_umem_release(mhp->umem);
- kfree(mhp);
- return ERR_PTR(err);
-}
-
-static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_pd *php;
- struct iwch_mw *mhp;
- u32 mmid;
- u32 stag = 0;
- int ret;
-
- if (type != IB_MW_TYPE_1)
- return ERR_PTR(-EINVAL);
-
- php = to_iwch_pd(pd);
- rhp = php->rhp;
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
- ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid);
- if (ret) {
- kfree(mhp);
- return ERR_PTR(ret);
- }
- mhp->rhp = rhp;
- mhp->attr.pdid = php->pdid;
- mhp->attr.type = TPT_MW;
- mhp->attr.stag = stag;
- mmid = (stag) >> 8;
- mhp->ibmw.rkey = stag;
- if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
- cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
- kfree(mhp);
- return ERR_PTR(-ENOMEM);
- }
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
- return &(mhp->ibmw);
-}
-
-static int iwch_dealloc_mw(struct ib_mw *mw)
-{
- struct iwch_dev *rhp;
- struct iwch_mw *mhp;
- u32 mmid;
-
- mhp = to_iwch_mw(mw);
- rhp = mhp->rhp;
- mmid = (mw->rkey) >> 8;
- cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
- remove_handle(rhp, &rhp->mmidr, mmid);
- PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
- kfree(mhp);
- return 0;
-}
-
-static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg)
-{
- struct iwch_dev *rhp;
- struct iwch_pd *php;
- struct iwch_mr *mhp;
- u32 mmid;
- u32 stag = 0;
- int ret = 0;
-
- if (mr_type != IB_MR_TYPE_MEM_REG ||
- max_num_sg > T3_MAX_FASTREG_DEPTH)
- return ERR_PTR(-EINVAL);
-
- php = to_iwch_pd(pd);
- rhp = php->rhp;
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- goto err;
-
- mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
- if (!mhp->pages) {
- ret = -ENOMEM;
- goto pl_err;
- }
-
- mhp->rhp = rhp;
- ret = iwch_alloc_pbl(mhp, max_num_sg);
- if (ret)
- goto err1;
- mhp->attr.pbl_size = max_num_sg;
- ret = cxio_allocate_stag(&rhp->rdev, &stag, php->pdid,
- mhp->attr.pbl_size, mhp->attr.pbl_addr);
- if (ret)
- goto err2;
- mhp->attr.pdid = php->pdid;
- mhp->attr.type = TPT_NON_SHARED_MR;
- mhp->attr.stag = stag;
- mhp->attr.state = 1;
- mmid = (stag) >> 8;
- mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- if (insert_handle(rhp, &rhp->mmidr, mhp, mmid))
- goto err3;
-
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
- return &(mhp->ibmr);
-err3:
- cxio_dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
-err2:
- iwch_free_pbl(mhp);
-err1:
- kfree(mhp->pages);
-pl_err:
- kfree(mhp);
-err:
- return ERR_PTR(ret);
-}
-
-static int iwch_set_page(struct ib_mr *ibmr, u64 addr)
-{
- struct iwch_mr *mhp = to_iwch_mr(ibmr);
-
- if (unlikely(mhp->npages == mhp->attr.pbl_size))
- return -ENOMEM;
-
- mhp->pages[mhp->npages++] = addr;
-
- return 0;
-}
-
-static int iwch_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
- int sg_nents, unsigned int *sg_offset)
-{
- struct iwch_mr *mhp = to_iwch_mr(ibmr);
-
- mhp->npages = 0;
-
- return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, iwch_set_page);
-}
-
-static int iwch_destroy_qp(struct ib_qp *ib_qp)
-{
- struct iwch_dev *rhp;
- struct iwch_qp *qhp;
- struct iwch_qp_attributes attrs;
- struct iwch_ucontext *ucontext;
-
- qhp = to_iwch_qp(ib_qp);
- rhp = qhp->rhp;
-
- attrs.next_state = IWCH_QP_STATE_ERROR;
- iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0);
- wait_event(qhp->wait, !qhp->ep);
-
- remove_handle(rhp, &rhp->qpidr, qhp->wq.qpid);
-
- atomic_dec(&qhp->refcnt);
- wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
-
- ucontext = ib_qp->uobject ? to_iwch_ucontext(ib_qp->uobject->context)
- : NULL;
- cxio_destroy_qp(&rhp->rdev, &qhp->wq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
-
- PDBG("%s ib_qp %p qpid 0x%0x qhp %p\n", __func__,
- ib_qp, qhp->wq.qpid, qhp);
- kfree(qhp);
- return 0;
-}
-
-static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *attrs,
- struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_qp *qhp;
- struct iwch_pd *php;
- struct iwch_cq *schp;
- struct iwch_cq *rchp;
- struct iwch_create_qp_resp uresp;
- int wqsize, sqsize, rqsize;
- struct iwch_ucontext *ucontext;
-
- PDBG("%s ib_pd %p\n", __func__, pd);
- if (attrs->qp_type != IB_QPT_RC)
- return ERR_PTR(-EINVAL);
- php = to_iwch_pd(pd);
- rhp = php->rhp;
- schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid);
- rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid);
- if (!schp || !rchp)
- return ERR_PTR(-EINVAL);
-
- /* The RQT size must be # of entries + 1 rounded up to a power of two */
- rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr);
- if (rqsize == attrs->cap.max_recv_wr)
- rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1);
-
- /* T3 doesn't support RQT depth < 16 */
- if (rqsize < 16)
- rqsize = 16;
-
- if (rqsize > T3_MAX_RQ_SIZE)
- return ERR_PTR(-EINVAL);
-
- if (attrs->cap.max_inline_data > T3_MAX_INLINE)
- return ERR_PTR(-EINVAL);
-
- /*
- * NOTE: The SQ and total WQ sizes don't need to be
- * a power of two. However, all the code assumes
- * they are. EG: Q_FREECNT() and friends.
- */
- sqsize = roundup_pow_of_two(attrs->cap.max_send_wr);
- wqsize = roundup_pow_of_two(rqsize + sqsize);
-
- /*
- * Kernel users need more wq space for fastreg WRs which can take
- * 2 WR fragments.
- */
- ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL;
- if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
- wqsize = roundup_pow_of_two(rqsize +
- roundup_pow_of_two(attrs->cap.max_send_wr * 2));
- PDBG("%s wqsize %d sqsize %d rqsize %d\n", __func__,
- wqsize, sqsize, rqsize);
- qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
- if (!qhp)
- return ERR_PTR(-ENOMEM);
- qhp->wq.size_log2 = ilog2(wqsize);
- qhp->wq.rq_size_log2 = ilog2(rqsize);
- qhp->wq.sq_size_log2 = ilog2(sqsize);
- if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) {
- kfree(qhp);
- return ERR_PTR(-ENOMEM);
- }
-
- attrs->cap.max_recv_wr = rqsize - 1;
- attrs->cap.max_send_wr = sqsize;
- attrs->cap.max_inline_data = T3_MAX_INLINE;
-
- qhp->rhp = rhp;
- qhp->attr.pd = php->pdid;
- qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid;
- qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid;
- qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
- qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
- qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
- qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
- qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
- qhp->attr.state = IWCH_QP_STATE_IDLE;
- qhp->attr.next_state = IWCH_QP_STATE_IDLE;
-
- /*
- * XXX - These don't get passed in from the openib user
- * at create time. The CM sets them via a QP modify.
- * Need to fix... I think the CM should
- */
- qhp->attr.enable_rdma_read = 1;
- qhp->attr.enable_rdma_write = 1;
- qhp->attr.enable_bind = 1;
- qhp->attr.max_ord = 1;
- qhp->attr.max_ird = 1;
-
- spin_lock_init(&qhp->lock);
- init_waitqueue_head(&qhp->wait);
- atomic_set(&qhp->refcnt, 1);
-
- if (insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid)) {
- cxio_destroy_qp(&rhp->rdev, &qhp->wq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
- kfree(qhp);
- return ERR_PTR(-ENOMEM);
- }
-
- if (udata) {
-
- struct iwch_mm_entry *mm1, *mm2;
-
- mm1 = kmalloc(sizeof *mm1, GFP_KERNEL);
- if (!mm1) {
- iwch_destroy_qp(&qhp->ibqp);
- return ERR_PTR(-ENOMEM);
- }
-
- mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
- if (!mm2) {
- kfree(mm1);
- iwch_destroy_qp(&qhp->ibqp);
- return ERR_PTR(-ENOMEM);
- }
-
- uresp.qpid = qhp->wq.qpid;
- uresp.size_log2 = qhp->wq.size_log2;
- uresp.sq_size_log2 = qhp->wq.sq_size_log2;
- uresp.rq_size_log2 = qhp->wq.rq_size_log2;
- spin_lock(&ucontext->mmap_lock);
- uresp.key = ucontext->key;
- ucontext->key += PAGE_SIZE;
- uresp.db_key = ucontext->key;
- ucontext->key += PAGE_SIZE;
- spin_unlock(&ucontext->mmap_lock);
- if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
- kfree(mm1);
- kfree(mm2);
- iwch_destroy_qp(&qhp->ibqp);
- return ERR_PTR(-EFAULT);
- }
- mm1->key = uresp.key;
- mm1->addr = virt_to_phys(qhp->wq.queue);
- mm1->len = PAGE_ALIGN(wqsize * sizeof (union t3_wr));
- insert_mmap(ucontext, mm1);
- mm2->key = uresp.db_key;
- mm2->addr = qhp->wq.udb & PAGE_MASK;
- mm2->len = PAGE_SIZE;
- insert_mmap(ucontext, mm2);
- }
- qhp->ibqp.qp_num = qhp->wq.qpid;
- init_timer(&(qhp->timer));
- PDBG("%s sq_num_entries %d, rq_num_entries %d "
- "qpid 0x%0x qhp %p dma_addr 0x%llx size %d rq_addr 0x%x\n",
- __func__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
- qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr,
- 1 << qhp->wq.size_log2, qhp->wq.rq_addr);
- return &qhp->ibqp;
-}
-
-static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
-{
- struct iwch_dev *rhp;
- struct iwch_qp *qhp;
- enum iwch_qp_attr_mask mask = 0;
- struct iwch_qp_attributes attrs;
-
- PDBG("%s ib_qp %p\n", __func__, ibqp);
-
- /* iwarp does not support the RTR state */
- if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
- attr_mask &= ~IB_QP_STATE;
-
- /* Make sure we still have something left to do */
- if (!attr_mask)
- return 0;
-
- memset(&attrs, 0, sizeof attrs);
- qhp = to_iwch_qp(ibqp);
- rhp = qhp->rhp;
-
- attrs.next_state = iwch_convert_state(attr->qp_state);
- attrs.enable_rdma_read = (attr->qp_access_flags &
- IB_ACCESS_REMOTE_READ) ? 1 : 0;
- attrs.enable_rdma_write = (attr->qp_access_flags &
- IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
- attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
-
-
- mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0;
- mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
- (IWCH_QP_ATTR_ENABLE_RDMA_READ |
- IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
- IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0;
-
- return iwch_modify_qp(rhp, qhp, mask, &attrs, 0);
-}
-
-void iwch_qp_add_ref(struct ib_qp *qp)
-{
- PDBG("%s ib_qp %p\n", __func__, qp);
- atomic_inc(&(to_iwch_qp(qp)->refcnt));
-}
-
-void iwch_qp_rem_ref(struct ib_qp *qp)
-{
- PDBG("%s ib_qp %p\n", __func__, qp);
- if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt)))
- wake_up(&(to_iwch_qp(qp)->wait));
-}
-
-static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
-{
- PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
- return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
-}
-
-
-static int iwch_query_pkey(struct ib_device *ibdev,
- u8 port, u16 index, u16 * pkey)
-{
- PDBG("%s ibdev %p\n", __func__, ibdev);
- *pkey = 0;
- return 0;
-}
-
-static int iwch_query_gid(struct ib_device *ibdev, u8 port,
- int index, union ib_gid *gid)
-{
- struct iwch_dev *dev;
-
- PDBG("%s ibdev %p, port %d, index %d, gid %p\n",
- __func__, ibdev, port, index, gid);
- dev = to_iwch_dev(ibdev);
- BUG_ON(port == 0 || port > 2);
- memset(&(gid->raw[0]), 0, sizeof(gid->raw));
- memcpy(&(gid->raw[0]), dev->rdev.port_info.lldevs[port-1]->dev_addr, 6);
- return 0;
-}
-
-static u64 fw_vers_string_to_u64(struct iwch_dev *iwch_dev)
-{
- struct ethtool_drvinfo info;
- struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
- char *cp, *next;
- unsigned fw_maj, fw_min, fw_mic;
-
- lldev->ethtool_ops->get_drvinfo(lldev, &info);
-
- next = info.fw_version + 1;
- cp = strsep(&next, ".");
- sscanf(cp, "%i", &fw_maj);
- cp = strsep(&next, ".");
- sscanf(cp, "%i", &fw_min);
- cp = strsep(&next, ".");
- sscanf(cp, "%i", &fw_mic);
-
- return (((u64)fw_maj & 0xffff) << 32) | ((fw_min & 0xffff) << 16) |
- (fw_mic & 0xffff);
-}
-
-static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
- struct ib_udata *uhw)
-{
-
- struct iwch_dev *dev;
-
- PDBG("%s ibdev %p\n", __func__, ibdev);
-
- if (uhw->inlen || uhw->outlen)
- return -EINVAL;
-
- dev = to_iwch_dev(ibdev);
- memset(props, 0, sizeof *props);
- memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
- props->hw_ver = dev->rdev.t3cdev_p->type;
- props->fw_ver = fw_vers_string_to_u64(dev);
- props->device_cap_flags = dev->device_cap_flags;
- props->page_size_cap = dev->attr.mem_pgsizes_bitmask;
- props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
- props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
- props->max_mr_size = dev->attr.max_mr_size;
- props->max_qp = dev->attr.max_qps;
- props->max_qp_wr = dev->attr.max_wrs;
- props->max_sge = dev->attr.max_sge_per_wr;
- props->max_sge_rd = 1;
- props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp;
- props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp;
- props->max_cq = dev->attr.max_cqs;
- props->max_cqe = dev->attr.max_cqes_per_cq;
- props->max_mr = dev->attr.max_mem_regs;
- props->max_pd = dev->attr.max_pds;
- props->local_ca_ack_delay = 0;
- props->max_fast_reg_page_list_len = T3_MAX_FASTREG_DEPTH;
-
- return 0;
-}
-
-static int iwch_query_port(struct ib_device *ibdev,
- u8 port, struct ib_port_attr *props)
-{
- struct iwch_dev *dev;
- struct net_device *netdev;
- struct in_device *inetdev;
-
- PDBG("%s ibdev %p\n", __func__, ibdev);
-
- dev = to_iwch_dev(ibdev);
- netdev = dev->rdev.port_info.lldevs[port-1];
-
- memset(props, 0, sizeof(struct ib_port_attr));
- props->max_mtu = IB_MTU_4096;
- if (netdev->mtu >= 4096)
- props->active_mtu = IB_MTU_4096;
- else if (netdev->mtu >= 2048)
- props->active_mtu = IB_MTU_2048;
- else if (netdev->mtu >= 1024)
- props->active_mtu = IB_MTU_1024;
- else if (netdev->mtu >= 512)
- props->active_mtu = IB_MTU_512;
- else
- props->active_mtu = IB_MTU_256;
-
- if (!netif_carrier_ok(netdev))
- props->state = IB_PORT_DOWN;
- else {
- inetdev = in_dev_get(netdev);
- if (inetdev) {
- if (inetdev->ifa_list)
- props->state = IB_PORT_ACTIVE;
- else
- props->state = IB_PORT_INIT;
- in_dev_put(inetdev);
- } else
- props->state = IB_PORT_INIT;
- }
-
- props->port_cap_flags =
- IB_PORT_CM_SUP |
- IB_PORT_SNMP_TUNNEL_SUP |
- IB_PORT_REINIT_SUP |
- IB_PORT_DEVICE_MGMT_SUP |
- IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
- props->gid_tbl_len = 1;
- props->pkey_tbl_len = 1;
- props->active_width = 2;
- props->active_speed = IB_SPEED_DDR;
- props->max_msg_sz = -1;
-
- return 0;
-}
-
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
- ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
- return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
-}
-
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
- ibdev.dev);
- struct ethtool_drvinfo info;
- struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
-
- PDBG("%s dev 0x%p\n", __func__, dev);
- lldev->ethtool_ops->get_drvinfo(lldev, &info);
- return sprintf(buf, "%s\n", info.driver);
-}
-
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
- ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
- return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
- iwch_dev->rdev.rnic_info.pdev->device);
-}
-
-enum counters {
- IPINRECEIVES,
- IPINHDRERRORS,
- IPINADDRERRORS,
- IPINUNKNOWNPROTOS,
- IPINDISCARDS,
- IPINDELIVERS,
- IPOUTREQUESTS,
- IPOUTDISCARDS,
- IPOUTNOROUTES,
- IPREASMTIMEOUT,
- IPREASMREQDS,
- IPREASMOKS,
- IPREASMFAILS,
- TCPACTIVEOPENS,
- TCPPASSIVEOPENS,
- TCPATTEMPTFAILS,
- TCPESTABRESETS,
- TCPCURRESTAB,
- TCPINSEGS,
- TCPOUTSEGS,
- TCPRETRANSSEGS,
- TCPINERRS,
- TCPOUTRSTS,
- TCPRTOMIN,
- TCPRTOMAX,
- NR_COUNTERS
-};
-
-static const char * const names[] = {
- [IPINRECEIVES] = "ipInReceives",
- [IPINHDRERRORS] = "ipInHdrErrors",
- [IPINADDRERRORS] = "ipInAddrErrors",
- [IPINUNKNOWNPROTOS] = "ipInUnknownProtos",
- [IPINDISCARDS] = "ipInDiscards",
- [IPINDELIVERS] = "ipInDelivers",
- [IPOUTREQUESTS] = "ipOutRequests",
- [IPOUTDISCARDS] = "ipOutDiscards",
- [IPOUTNOROUTES] = "ipOutNoRoutes",
- [IPREASMTIMEOUT] = "ipReasmTimeout",
- [IPREASMREQDS] = "ipReasmReqds",
- [IPREASMOKS] = "ipReasmOKs",
- [IPREASMFAILS] = "ipReasmFails",
- [TCPACTIVEOPENS] = "tcpActiveOpens",
- [TCPPASSIVEOPENS] = "tcpPassiveOpens",
- [TCPATTEMPTFAILS] = "tcpAttemptFails",
- [TCPESTABRESETS] = "tcpEstabResets",
- [TCPCURRESTAB] = "tcpCurrEstab",
- [TCPINSEGS] = "tcpInSegs",
- [TCPOUTSEGS] = "tcpOutSegs",
- [TCPRETRANSSEGS] = "tcpRetransSegs",
- [TCPINERRS] = "tcpInErrs",
- [TCPOUTRSTS] = "tcpOutRsts",
- [TCPRTOMIN] = "tcpRtoMin",
- [TCPRTOMAX] = "tcpRtoMax",
-};
-
-static struct rdma_hw_stats *iwch_alloc_stats(struct ib_device *ibdev,
- u8 port_num)
-{
- BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
-
- /* Our driver only supports device level stats */
- if (port_num != 0)
- return NULL;
-
- return rdma_alloc_hw_stats_struct(names, NR_COUNTERS,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
-}
-
-static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
- u8 port, int index)
-{
- struct iwch_dev *dev;
- struct tp_mib_stats m;
- int ret;
-
- if (port != 0 || !stats)
- return -ENOSYS;
-
- PDBG("%s ibdev %p\n", __func__, ibdev);
- dev = to_iwch_dev(ibdev);
- ret = dev->rdev.t3cdev_p->ctl(dev->rdev.t3cdev_p, RDMA_GET_MIB, &m);
- if (ret)
- return -ENOSYS;
-
- stats->value[IPINRECEIVES] = ((u64)m.ipInReceive_hi << 32) + m.ipInReceive_lo;
- stats->value[IPINHDRERRORS] = ((u64)m.ipInHdrErrors_hi << 32) + m.ipInHdrErrors_lo;
- stats->value[IPINADDRERRORS] = ((u64)m.ipInAddrErrors_hi << 32) + m.ipInAddrErrors_lo;
- stats->value[IPINUNKNOWNPROTOS] = ((u64)m.ipInUnknownProtos_hi << 32) + m.ipInUnknownProtos_lo;
- stats->value[IPINDISCARDS] = ((u64)m.ipInDiscards_hi << 32) + m.ipInDiscards_lo;
- stats->value[IPINDELIVERS] = ((u64)m.ipInDelivers_hi << 32) + m.ipInDelivers_lo;
- stats->value[IPOUTREQUESTS] = ((u64)m.ipOutRequests_hi << 32) + m.ipOutRequests_lo;
- stats->value[IPOUTDISCARDS] = ((u64)m.ipOutDiscards_hi << 32) + m.ipOutDiscards_lo;
- stats->value[IPOUTNOROUTES] = ((u64)m.ipOutNoRoutes_hi << 32) + m.ipOutNoRoutes_lo;
- stats->value[IPREASMTIMEOUT] = m.ipReasmTimeout;
- stats->value[IPREASMREQDS] = m.ipReasmReqds;
- stats->value[IPREASMOKS] = m.ipReasmOKs;
- stats->value[IPREASMFAILS] = m.ipReasmFails;
- stats->value[TCPACTIVEOPENS] = m.tcpActiveOpens;
- stats->value[TCPPASSIVEOPENS] = m.tcpPassiveOpens;
- stats->value[TCPATTEMPTFAILS] = m.tcpAttemptFails;
- stats->value[TCPESTABRESETS] = m.tcpEstabResets;
- stats->value[TCPCURRESTAB] = m.tcpOutRsts;
- stats->value[TCPINSEGS] = m.tcpCurrEstab;
- stats->value[TCPOUTSEGS] = ((u64)m.tcpInSegs_hi << 32) + m.tcpInSegs_lo;
- stats->value[TCPRETRANSSEGS] = ((u64)m.tcpOutSegs_hi << 32) + m.tcpOutSegs_lo;
- stats->value[TCPINERRS] = ((u64)m.tcpRetransSeg_hi << 32) + m.tcpRetransSeg_lo,
- stats->value[TCPOUTRSTS] = ((u64)m.tcpInErrs_hi << 32) + m.tcpInErrs_lo;
- stats->value[TCPRTOMIN] = m.tcpRtoMin;
- stats->value[TCPRTOMAX] = m.tcpRtoMax;
-
- return stats->num_counters;
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-
-static struct device_attribute *iwch_class_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id,
-};
-
-static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
- struct ib_port_immutable *immutable)
-{
- struct ib_port_attr attr;
- int err;
-
- err = iwch_query_port(ibdev, port_num, &attr);
- if (err)
- return err;
-
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
-
- return 0;
-}
-
-static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str,
- size_t str_len)
-{
- struct iwch_dev *iwch_dev = to_iwch_dev(ibdev);
- struct ethtool_drvinfo info;
- struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
-
- PDBG("%s dev 0x%p\n", __func__, iwch_dev);
- lldev->ethtool_ops->get_drvinfo(lldev, &info);
- snprintf(str, str_len, "%s", info.fw_version);
-}
-
-int iwch_register_device(struct iwch_dev *dev)
-{
- int ret;
- int i;
-
- PDBG("%s iwch_dev %p\n", __func__, dev);
- strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
- memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
- memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
- dev->ibdev.owner = THIS_MODULE;
- dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
- IB_DEVICE_MEM_WINDOW |
- IB_DEVICE_MEM_MGT_EXTENSIONS;
-
- /* cxgb3 supports STag 0. */
- dev->ibdev.local_dma_lkey = 0;
-
- dev->ibdev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_POST_SEND) |
- (1ull << IB_USER_VERBS_CMD_POST_RECV);
- dev->ibdev.node_type = RDMA_NODE_RNIC;
- BUILD_BUG_ON(sizeof(IWCH_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
- memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
- dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
- dev->ibdev.num_comp_vectors = 1;
- dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
- dev->ibdev.query_device = iwch_query_device;
- dev->ibdev.query_port = iwch_query_port;
- dev->ibdev.query_pkey = iwch_query_pkey;
- dev->ibdev.query_gid = iwch_query_gid;
- dev->ibdev.alloc_ucontext = iwch_alloc_ucontext;
- dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext;
- dev->ibdev.mmap = iwch_mmap;
- dev->ibdev.alloc_pd = iwch_allocate_pd;
- dev->ibdev.dealloc_pd = iwch_deallocate_pd;
- dev->ibdev.create_ah = iwch_ah_create;
- dev->ibdev.destroy_ah = iwch_ah_destroy;
- dev->ibdev.create_qp = iwch_create_qp;
- dev->ibdev.modify_qp = iwch_ib_modify_qp;
- dev->ibdev.destroy_qp = iwch_destroy_qp;
- dev->ibdev.create_cq = iwch_create_cq;
- dev->ibdev.destroy_cq = iwch_destroy_cq;
- dev->ibdev.resize_cq = iwch_resize_cq;
- dev->ibdev.poll_cq = iwch_poll_cq;
- dev->ibdev.get_dma_mr = iwch_get_dma_mr;
- dev->ibdev.reg_user_mr = iwch_reg_user_mr;
- dev->ibdev.dereg_mr = iwch_dereg_mr;
- dev->ibdev.alloc_mw = iwch_alloc_mw;
- dev->ibdev.dealloc_mw = iwch_dealloc_mw;
- dev->ibdev.alloc_mr = iwch_alloc_mr;
- dev->ibdev.map_mr_sg = iwch_map_mr_sg;
- dev->ibdev.attach_mcast = iwch_multicast_attach;
- dev->ibdev.detach_mcast = iwch_multicast_detach;
- dev->ibdev.process_mad = iwch_process_mad;
- dev->ibdev.req_notify_cq = iwch_arm_cq;
- dev->ibdev.post_send = iwch_post_send;
- dev->ibdev.post_recv = iwch_post_receive;
- dev->ibdev.alloc_hw_stats = iwch_alloc_stats;
- dev->ibdev.get_hw_stats = iwch_get_mib;
- dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
- dev->ibdev.get_port_immutable = iwch_port_immutable;
- dev->ibdev.get_dev_fw_str = get_dev_fw_ver_str;
-
- dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
- if (!dev->ibdev.iwcm)
- return -ENOMEM;
-
- dev->ibdev.iwcm->connect = iwch_connect;
- dev->ibdev.iwcm->accept = iwch_accept_cr;
- dev->ibdev.iwcm->reject = iwch_reject_cr;
- dev->ibdev.iwcm->create_listen = iwch_create_listen;
- dev->ibdev.iwcm->destroy_listen = iwch_destroy_listen;
- dev->ibdev.iwcm->add_ref = iwch_qp_add_ref;
- dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
- dev->ibdev.iwcm->get_qp = iwch_get_qp;
- memcpy(dev->ibdev.iwcm->ifname, dev->rdev.t3cdev_p->lldev->name,
- sizeof(dev->ibdev.iwcm->ifname));
-
- ret = ib_register_device(&dev->ibdev, NULL);
- if (ret)
- goto bail1;
-
- for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
- ret = device_create_file(&dev->ibdev.dev,
- iwch_class_attributes[i]);
- if (ret) {
- goto bail2;
- }
- }
- return 0;
-bail2:
- ib_unregister_device(&dev->ibdev);
-bail1:
- kfree(dev->ibdev.iwcm);
- return ret;
-}
-
-void iwch_unregister_device(struct iwch_dev *dev)
-{
- int i;
-
- PDBG("%s iwch_dev %p\n", __func__, dev);
- for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
- device_remove_file(&dev->ibdev.dev,
- iwch_class_attributes[i]);
- ib_unregister_device(&dev->ibdev);
- kfree(dev->ibdev.iwcm);
- return;
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
deleted file mode 100644
index 252c464a09f6..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __IWCH_PROVIDER_H__
-#define __IWCH_PROVIDER_H__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <rdma/ib_verbs.h>
-#include <asm/types.h>
-#include "t3cdev.h"
-#include "iwch.h"
-#include "cxio_wr.h"
-#include "cxio_hal.h"
-
-struct iwch_pd {
- struct ib_pd ibpd;
- u32 pdid;
- struct iwch_dev *rhp;
-};
-
-static inline struct iwch_pd *to_iwch_pd(struct ib_pd *ibpd)
-{
- return container_of(ibpd, struct iwch_pd, ibpd);
-}
-
-struct tpt_attributes {
- u32 stag;
- u32 state:1;
- u32 type:2;
- u32 rsvd:1;
- enum tpt_mem_perm perms;
- u32 remote_invaliate_disable:1;
- u32 zbva:1;
- u32 mw_bind_enable:1;
- u32 page_size:5;
-
- u32 pdid;
- u32 qpid;
- u32 pbl_addr;
- u32 len;
- u64 va_fbo;
- u32 pbl_size;
-};
-
-struct iwch_mr {
- struct ib_mr ibmr;
- struct ib_umem *umem;
- struct iwch_dev *rhp;
- u64 kva;
- struct tpt_attributes attr;
- u64 *pages;
- u32 npages;
-};
-
-typedef struct iwch_mw iwch_mw_handle;
-
-static inline struct iwch_mr *to_iwch_mr(struct ib_mr *ibmr)
-{
- return container_of(ibmr, struct iwch_mr, ibmr);
-}
-
-struct iwch_mw {
- struct ib_mw ibmw;
- struct iwch_dev *rhp;
- u64 kva;
- struct tpt_attributes attr;
-};
-
-static inline struct iwch_mw *to_iwch_mw(struct ib_mw *ibmw)
-{
- return container_of(ibmw, struct iwch_mw, ibmw);
-}
-
-struct iwch_cq {
- struct ib_cq ibcq;
- struct iwch_dev *rhp;
- struct t3_cq cq;
- spinlock_t lock;
- spinlock_t comp_handler_lock;
- atomic_t refcnt;
- wait_queue_head_t wait;
- u32 __user *user_rptr_addr;
-};
-
-static inline struct iwch_cq *to_iwch_cq(struct ib_cq *ibcq)
-{
- return container_of(ibcq, struct iwch_cq, ibcq);
-}
-
-enum IWCH_QP_FLAGS {
- QP_QUIESCED = 0x01
-};
-
-struct iwch_mpa_attributes {
- u8 initiator;
- u8 recv_marker_enabled;
- u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */
- u8 crc_enabled;
- u8 version; /* 0 or 1 */
-};
-
-struct iwch_qp_attributes {
- u32 scq;
- u32 rcq;
- u32 sq_num_entries;
- u32 rq_num_entries;
- u32 sq_max_sges;
- u32 sq_max_sges_rdma_write;
- u32 rq_max_sges;
- u32 state;
- u8 enable_rdma_read;
- u8 enable_rdma_write; /* enable inbound Read Resp. */
- u8 enable_bind;
- u8 enable_mmid0_fastreg; /* Enable STAG0 + Fast-register */
- /*
- * Next QP state. If specify the current state, only the
- * QP attributes will be modified.
- */
- u32 max_ord;
- u32 max_ird;
- u32 pd; /* IN */
- u32 next_state;
- char terminate_buffer[52];
- u32 terminate_msg_len;
- u8 is_terminate_local;
- struct iwch_mpa_attributes mpa_attr; /* IN-OUT */
- struct iwch_ep *llp_stream_handle;
- char *stream_msg_buf; /* Last stream msg. before Idle -> RTS */
- u32 stream_msg_buf_len; /* Only on Idle -> RTS */
-};
-
-struct iwch_qp {
- struct ib_qp ibqp;
- struct iwch_dev *rhp;
- struct iwch_ep *ep;
- struct iwch_qp_attributes attr;
- struct t3_wq wq;
- spinlock_t lock;
- atomic_t refcnt;
- wait_queue_head_t wait;
- enum IWCH_QP_FLAGS flags;
- struct timer_list timer;
-};
-
-static inline int qp_quiesced(struct iwch_qp *qhp)
-{
- return qhp->flags & QP_QUIESCED;
-}
-
-static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp)
-{
- return container_of(ibqp, struct iwch_qp, ibqp);
-}
-
-void iwch_qp_add_ref(struct ib_qp *qp);
-void iwch_qp_rem_ref(struct ib_qp *qp);
-
-struct iwch_ucontext {
- struct ib_ucontext ibucontext;
- struct cxio_ucontext uctx;
- u32 key;
- spinlock_t mmap_lock;
- struct list_head mmaps;
-};
-
-static inline struct iwch_ucontext *to_iwch_ucontext(struct ib_ucontext *c)
-{
- return container_of(c, struct iwch_ucontext, ibucontext);
-}
-
-struct iwch_mm_entry {
- struct list_head entry;
- u64 addr;
- u32 key;
- unsigned len;
-};
-
-static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext,
- u32 key, unsigned len)
-{
- struct list_head *pos, *nxt;
- struct iwch_mm_entry *mm;
-
- spin_lock(&ucontext->mmap_lock);
- list_for_each_safe(pos, nxt, &ucontext->mmaps) {
-
- mm = list_entry(pos, struct iwch_mm_entry, entry);
- if (mm->key == key && mm->len == len) {
- list_del_init(&mm->entry);
- spin_unlock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- key, (unsigned long long) mm->addr, mm->len);
- return mm;
- }
- }
- spin_unlock(&ucontext->mmap_lock);
- return NULL;
-}
-
-static inline void insert_mmap(struct iwch_ucontext *ucontext,
- struct iwch_mm_entry *mm)
-{
- spin_lock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- mm->key, (unsigned long long) mm->addr, mm->len);
- list_add_tail(&mm->entry, &ucontext->mmaps);
- spin_unlock(&ucontext->mmap_lock);
-}
-
-enum iwch_qp_attr_mask {
- IWCH_QP_ATTR_NEXT_STATE = 1 << 0,
- IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
- IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
- IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
- IWCH_QP_ATTR_MAX_ORD = 1 << 11,
- IWCH_QP_ATTR_MAX_IRD = 1 << 12,
- IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22,
- IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23,
- IWCH_QP_ATTR_MPA_ATTR = 1 << 24,
- IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25,
- IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ |
- IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
- IWCH_QP_ATTR_MAX_ORD |
- IWCH_QP_ATTR_MAX_IRD |
- IWCH_QP_ATTR_LLP_STREAM_HANDLE |
- IWCH_QP_ATTR_STREAM_MSG_BUFFER |
- IWCH_QP_ATTR_MPA_ATTR |
- IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE)
-};
-
-int iwch_modify_qp(struct iwch_dev *rhp,
- struct iwch_qp *qhp,
- enum iwch_qp_attr_mask mask,
- struct iwch_qp_attributes *attrs,
- int internal);
-
-enum iwch_qp_state {
- IWCH_QP_STATE_IDLE,
- IWCH_QP_STATE_RTS,
- IWCH_QP_STATE_ERROR,
- IWCH_QP_STATE_TERMINATE,
- IWCH_QP_STATE_CLOSING,
- IWCH_QP_STATE_TOT
-};
-
-static inline int iwch_convert_state(enum ib_qp_state ib_state)
-{
- switch (ib_state) {
- case IB_QPS_RESET:
- case IB_QPS_INIT:
- return IWCH_QP_STATE_IDLE;
- case IB_QPS_RTS:
- return IWCH_QP_STATE_RTS;
- case IB_QPS_SQD:
- return IWCH_QP_STATE_CLOSING;
- case IB_QPS_SQE:
- return IWCH_QP_STATE_TERMINATE;
- case IB_QPS_ERR:
- return IWCH_QP_STATE_ERROR;
- default:
- return -1;
- }
-}
-
-static inline u32 iwch_ib_to_tpt_access(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) |
- (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) |
- (acc & IB_ACCESS_MW_BIND ? TPT_MW_BIND : 0) |
- TPT_LOCAL_READ;
-}
-
-static inline u32 iwch_ib_to_tpt_bind_access(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0);
-}
-
-enum iwch_mmid_state {
- IWCH_STAG_STATE_VALID,
- IWCH_STAG_STATE_INVALID
-};
-
-enum iwch_qp_query_flags {
- IWCH_QP_QUERY_CONTEXT_NONE = 0x0, /* No ctx; Only attrs */
- IWCH_QP_QUERY_CONTEXT_GET = 0x1, /* Get ctx + attrs */
- IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2, /* Not Supported */
-
- /*
- * Quiesce QP context; Consumer
- * will NOT replay outstanding WR
- */
- IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4,
- IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8,
- IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */
-};
-
-u16 iwch_rqes_posted(struct iwch_qp *qhp);
-int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr);
-int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr);
-int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
-int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
-int iwch_post_zb_read(struct iwch_ep *ep);
-int iwch_register_device(struct iwch_dev *dev);
-void iwch_unregister_device(struct iwch_dev *dev);
-void stop_read_rep_timer(struct iwch_qp *qhp);
-int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp, int shift);
-int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
-void iwch_free_pbl(struct iwch_mr *mhp);
-int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
-
-#define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
-
-#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
deleted file mode 100644
index d939980a708f..000000000000
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ /dev/null
@@ -1,1082 +0,0 @@
-/*
- * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/sched.h>
-#include <linux/gfp.h>
-#include "iwch_provider.h"
-#include "iwch.h"
-#include "iwch_cm.h"
-#include "cxio_hal.h"
-#include "cxio_resource.h"
-
-#define NO_SUPPORT -1
-
-static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
- u8 * flit_cnt)
-{
- int i;
- u32 plen;
-
- switch (wr->opcode) {
- case IB_WR_SEND:
- if (wr->send_flags & IB_SEND_SOLICITED)
- wqe->send.rdmaop = T3_SEND_WITH_SE;
- else
- wqe->send.rdmaop = T3_SEND;
- wqe->send.rem_stag = 0;
- break;
- case IB_WR_SEND_WITH_INV:
- if (wr->send_flags & IB_SEND_SOLICITED)
- wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
- else
- wqe->send.rdmaop = T3_SEND_WITH_INV;
- wqe->send.rem_stag = cpu_to_be32(wr->ex.invalidate_rkey);
- break;
- default:
- return -EINVAL;
- }
- if (wr->num_sge > T3_MAX_SGE)
- return -EINVAL;
- wqe->send.reserved[0] = 0;
- wqe->send.reserved[1] = 0;
- wqe->send.reserved[2] = 0;
- plen = 0;
- for (i = 0; i < wr->num_sge; i++) {
- if ((plen + wr->sg_list[i].length) < plen)
- return -EMSGSIZE;
-
- plen += wr->sg_list[i].length;
- wqe->send.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
- wqe->send.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
- wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
- }
- wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
- *flit_cnt = 4 + ((wr->num_sge) << 1);
- wqe->send.plen = cpu_to_be32(plen);
- return 0;
-}
-
-static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
- u8 *flit_cnt)
-{
- int i;
- u32 plen;
- if (wr->num_sge > T3_MAX_SGE)
- return -EINVAL;
- wqe->write.rdmaop = T3_RDMA_WRITE;
- wqe->write.reserved[0] = 0;
- wqe->write.reserved[1] = 0;
- wqe->write.reserved[2] = 0;
- wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
- wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
-
- if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
- plen = 4;
- wqe->write.sgl[0].stag = wr->ex.imm_data;
- wqe->write.sgl[0].len = cpu_to_be32(0);
- wqe->write.num_sgle = cpu_to_be32(0);
- *flit_cnt = 6;
- } else {
- plen = 0;
- for (i = 0; i < wr->num_sge; i++) {
- if ((plen + wr->sg_list[i].length) < plen) {
- return -EMSGSIZE;
- }
- plen += wr->sg_list[i].length;
- wqe->write.sgl[i].stag =
- cpu_to_be32(wr->sg_list[i].lkey);
- wqe->write.sgl[i].len =
- cpu_to_be32(wr->sg_list[i].length);
- wqe->write.sgl[i].to =
- cpu_to_be64(wr->sg_list[i].addr);
- }
- wqe->write.num_sgle = cpu_to_be32(wr->num_sge);
- *flit_cnt = 5 + ((wr->num_sge) << 1);
- }
- wqe->write.plen = cpu_to_be32(plen);
- return 0;
-}
-
-static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
- u8 *flit_cnt)
-{
- if (wr->num_sge > 1)
- return -EINVAL;
- wqe->read.rdmaop = T3_READ_REQ;
- if (wr->opcode == IB_WR_RDMA_READ_WITH_INV)
- wqe->read.local_inv = 1;
- else
- wqe->read.local_inv = 0;
- wqe->read.reserved[0] = 0;
- wqe->read.reserved[1] = 0;
- wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey);
- wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr);
- wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
- wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length);
- wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr);
- *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
- return 0;
-}
-
-static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr,
- u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
-{
- struct iwch_mr *mhp = to_iwch_mr(wr->mr);
- int i;
- __be64 *p;
-
- if (mhp->npages > T3_MAX_FASTREG_DEPTH)
- return -EINVAL;
- *wr_cnt = 1;
- wqe->fastreg.stag = cpu_to_be32(wr->key);
- wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length);
- wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
- wqe->fastreg.va_base_lo_fbo =
- cpu_to_be32(mhp->ibmr.iova & 0xffffffff);
- wqe->fastreg.page_type_perms = cpu_to_be32(
- V_FR_PAGE_COUNT(mhp->npages) |
- V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) |
- V_FR_TYPE(TPT_VATO) |
- V_FR_PERMS(iwch_ib_to_tpt_access(wr->access)));
- p = &wqe->fastreg.pbl_addrs[0];
- for (i = 0; i < mhp->npages; i++, p++) {
-
- /* If we need a 2nd WR, then set it up */
- if (i == T3_MAX_FASTREG_FRAG) {
- *wr_cnt = 2;
- wqe = (union t3_wr *)(wq->queue +
- Q_PTR2IDX((wq->wptr+1), wq->size_log2));
- build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0,
- Q_GENBIT(wq->wptr + 1, wq->size_log2),
- 0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG,
- T3_EOP);
-
- p = &wqe->pbl_frag.pbl_addrs[0];
- }
- *p = cpu_to_be64((u64)mhp->pages[i]);
- }
- *flit_cnt = 5 + mhp->npages;
- if (*flit_cnt > 15)
- *flit_cnt = 15;
- return 0;
-}
-
-static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr,
- u8 *flit_cnt)
-{
- wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey);
- wqe->local_inv.reserved = 0;
- *flit_cnt = sizeof(struct t3_local_inv_wr) >> 3;
- return 0;
-}
-
-static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
- u32 num_sgle, u32 * pbl_addr, u8 * page_size)
-{
- int i;
- struct iwch_mr *mhp;
- u64 offset;
- for (i = 0; i < num_sgle; i++) {
-
- mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
- if (!mhp) {
- PDBG("%s %d\n", __func__, __LINE__);
- return -EIO;
- }
- if (!mhp->attr.state) {
- PDBG("%s %d\n", __func__, __LINE__);
- return -EIO;
- }
- if (mhp->attr.zbva) {
- PDBG("%s %d\n", __func__, __LINE__);
- return -EIO;
- }
-
- if (sg_list[i].addr < mhp->attr.va_fbo) {
- PDBG("%s %d\n", __func__, __LINE__);
- return -EINVAL;
- }
- if (sg_list[i].addr + ((u64) sg_list[i].length) <
- sg_list[i].addr) {
- PDBG("%s %d\n", __func__, __LINE__);
- return -EINVAL;
- }
- if (sg_list[i].addr + ((u64) sg_list[i].length) >
- mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
- PDBG("%s %d\n", __func__, __LINE__);
- return -EINVAL;
- }
- offset = sg_list[i].addr - mhp->attr.va_fbo;
- offset += mhp->attr.va_fbo &
- ((1UL << (12 + mhp->attr.page_size)) - 1);
- pbl_addr[i] = ((mhp->attr.pbl_addr -
- rhp->rdev.rnic_info.pbl_base) >> 3) +
- (offset >> (12 + mhp->attr.page_size));
- page_size[i] = mhp->attr.page_size;
- }
- return 0;
-}
-
-static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
- struct ib_recv_wr *wr)
-{
- int i, err = 0;
- u32 pbl_addr[T3_MAX_SGE];
- u8 page_size[T3_MAX_SGE];
-
- err = iwch_sgl2pbl_map(qhp->rhp, wr->sg_list, wr->num_sge, pbl_addr,
- page_size);
- if (err)
- return err;
- wqe->recv.pagesz[0] = page_size[0];
- wqe->recv.pagesz[1] = page_size[1];
- wqe->recv.pagesz[2] = page_size[2];
- wqe->recv.pagesz[3] = page_size[3];
- wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
- for (i = 0; i < wr->num_sge; i++) {
- wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
- wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
-
- /* to in the WQE == the offset into the page */
- wqe->recv.sgl[i].to = cpu_to_be64(((u32)wr->sg_list[i].addr) &
- ((1UL << (12 + page_size[i])) - 1));
-
- /* pbl_addr is the adapters address in the PBL */
- wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
- }
- for (; i < T3_MAX_SGE; i++) {
- wqe->recv.sgl[i].stag = 0;
- wqe->recv.sgl[i].len = 0;
- wqe->recv.sgl[i].to = 0;
- wqe->recv.pbl_addr[i] = 0;
- }
- qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
- qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
- qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
- qhp->wq.rq_size_log2)].pbl_addr = 0;
- return 0;
-}
-
-static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
- struct ib_recv_wr *wr)
-{
- int i;
- u32 pbl_addr;
- u32 pbl_offset;
-
-
- /*
- * The T3 HW requires the PBL in the HW recv descriptor to reference
- * a PBL entry. So we allocate the max needed PBL memory here and pass
- * it to the uP in the recv WR. The uP will build the PBL and setup
- * the HW recv descriptor.
- */
- pbl_addr = cxio_hal_pblpool_alloc(&qhp->rhp->rdev, T3_STAG0_PBL_SIZE);
- if (!pbl_addr)
- return -ENOMEM;
-
- /*
- * Compute the 8B aligned offset.
- */
- pbl_offset = (pbl_addr - qhp->rhp->rdev.rnic_info.pbl_base) >> 3;
-
- wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
-
- for (i = 0; i < wr->num_sge; i++) {
-
- /*
- * Use a 128MB page size. This and an imposed 128MB
- * sge length limit allows us to require only a 2-entry HW
- * PBL for each SGE. This restriction is acceptable since
- * since it is not possible to allocate 128MB of contiguous
- * DMA coherent memory!
- */
- if (wr->sg_list[i].length > T3_STAG0_MAX_PBE_LEN)
- return -EINVAL;
- wqe->recv.pagesz[i] = T3_STAG0_PAGE_SHIFT;
-
- /*
- * T3 restricts a recv to all zero-stag or all non-zero-stag.
- */
- if (wr->sg_list[i].lkey != 0)
- return -EINVAL;
- wqe->recv.sgl[i].stag = 0;
- wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
- wqe->recv.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
- wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_offset);
- pbl_offset += 2;
- }
- for (; i < T3_MAX_SGE; i++) {
- wqe->recv.pagesz[i] = 0;
- wqe->recv.sgl[i].stag = 0;
- wqe->recv.sgl[i].len = 0;
- wqe->recv.sgl[i].to = 0;
- wqe->recv.pbl_addr[i] = 0;
- }
- qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
- qhp->wq.rq_size_log2)].wr_id = wr->wr_id;
- qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr,
- qhp->wq.rq_size_log2)].pbl_addr = pbl_addr;
- return 0;
-}
-
-int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
-{
- int err = 0;
- u8 uninitialized_var(t3_wr_flit_cnt);
- enum t3_wr_opcode t3_wr_opcode = 0;
- enum t3_wr_flags t3_wr_flags;
- struct iwch_qp *qhp;
- u32 idx;
- union t3_wr *wqe;
- u32 num_wrs;
- unsigned long flag;
- struct t3_swsq *sqp;
- int wr_cnt = 1;
-
- qhp = to_iwch_qp(ibqp);
- spin_lock_irqsave(&qhp->lock, flag);
- if (qhp->attr.state > IWCH_QP_STATE_RTS) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- err = -EINVAL;
- goto out;
- }
- num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
- qhp->wq.sq_size_log2);
- if (num_wrs == 0) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- err = -ENOMEM;
- goto out;
- }
- while (wr) {
- if (num_wrs == 0) {
- err = -ENOMEM;
- break;
- }
- idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
- wqe = (union t3_wr *) (qhp->wq.queue + idx);
- t3_wr_flags = 0;
- if (wr->send_flags & IB_SEND_SOLICITED)
- t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
- if (wr->send_flags & IB_SEND_SIGNALED)
- t3_wr_flags |= T3_COMPLETION_FLAG;
- sqp = qhp->wq.sq +
- Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
- switch (wr->opcode) {
- case IB_WR_SEND:
- case IB_WR_SEND_WITH_INV:
- if (wr->send_flags & IB_SEND_FENCE)
- t3_wr_flags |= T3_READ_FENCE_FLAG;
- t3_wr_opcode = T3_WR_SEND;
- err = build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
- break;
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- t3_wr_opcode = T3_WR_WRITE;
- err = build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
- break;
- case IB_WR_RDMA_READ:
- case IB_WR_RDMA_READ_WITH_INV:
- t3_wr_opcode = T3_WR_READ;
- t3_wr_flags = 0; /* T3 reads are always signaled */
- err = build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
- if (err)
- break;
- sqp->read_len = wqe->read.local_len;
- if (!qhp->wq.oldest_read)
- qhp->wq.oldest_read = sqp;
- break;
- case IB_WR_REG_MR:
- t3_wr_opcode = T3_WR_FASTREG;
- err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt,
- &wr_cnt, &qhp->wq);
- break;
- case IB_WR_LOCAL_INV:
- if (wr->send_flags & IB_SEND_FENCE)
- t3_wr_flags |= T3_LOCAL_FENCE_FLAG;
- t3_wr_opcode = T3_WR_INV_STAG;
- err = build_inv_stag(wqe, wr, &t3_wr_flit_cnt);
- break;
- default:
- PDBG("%s post of type=%d TBD!\n", __func__,
- wr->opcode);
- err = -EINVAL;
- }
- if (err)
- break;
- wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
- sqp->wr_id = wr->wr_id;
- sqp->opcode = wr2opcode(t3_wr_opcode);
- sqp->sq_wptr = qhp->wq.sq_wptr;
- sqp->complete = 0;
- sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED);
-
- build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
- Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
- 0, t3_wr_flit_cnt,
- (wr_cnt == 1) ? T3_SOPEOP : T3_SOP);
- PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
- __func__, (unsigned long long) wr->wr_id, idx,
- Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
- sqp->opcode);
- wr = wr->next;
- num_wrs--;
- qhp->wq.wptr += wr_cnt;
- ++(qhp->wq.sq_wptr);
- }
- spin_unlock_irqrestore(&qhp->lock, flag);
- if (cxio_wq_db_enabled(&qhp->wq))
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
-
-out:
- if (err)
- *bad_wr = wr;
- return err;
-}
-
-int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
-{
- int err = 0;
- struct iwch_qp *qhp;
- u32 idx;
- union t3_wr *wqe;
- u32 num_wrs;
- unsigned long flag;
-
- qhp = to_iwch_qp(ibqp);
- spin_lock_irqsave(&qhp->lock, flag);
- if (qhp->attr.state > IWCH_QP_STATE_RTS) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- err = -EINVAL;
- goto out;
- }
- num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
- qhp->wq.rq_size_log2) - 1;
- if (!wr) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- err = -ENOMEM;
- goto out;
- }
- while (wr) {
- if (wr->num_sge > T3_MAX_SGE) {
- err = -EINVAL;
- break;
- }
- idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
- wqe = (union t3_wr *) (qhp->wq.queue + idx);
- if (num_wrs)
- if (wr->sg_list[0].lkey)
- err = build_rdma_recv(qhp, wqe, wr);
- else
- err = build_zero_stag_recv(qhp, wqe, wr);
- else
- err = -ENOMEM;
-
- if (err)
- break;
-
- build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
- Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
- 0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
- PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
- "wqe %p \n", __func__, (unsigned long long) wr->wr_id,
- idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
- ++(qhp->wq.rq_wptr);
- ++(qhp->wq.wptr);
- wr = wr->next;
- num_wrs--;
- }
- spin_unlock_irqrestore(&qhp->lock, flag);
- if (cxio_wq_db_enabled(&qhp->wq))
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
-
-out:
- if (err)
- *bad_wr = wr;
- return err;
-}
-
-static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
- u8 *layer_type, u8 *ecode)
-{
- int status = TPT_ERR_INTERNAL_ERR;
- int tagged = 0;
- int opcode = -1;
- int rqtype = 0;
- int send_inv = 0;
-
- if (rsp_msg) {
- status = CQE_STATUS(rsp_msg->cqe);
- opcode = CQE_OPCODE(rsp_msg->cqe);
- rqtype = RQ_TYPE(rsp_msg->cqe);
- send_inv = (opcode == T3_SEND_WITH_INV) ||
- (opcode == T3_SEND_WITH_SE_INV);
- tagged = (opcode == T3_RDMA_WRITE) ||
- (rqtype && (opcode == T3_READ_RESP));
- }
-
- switch (status) {
- case TPT_ERR_STAG:
- if (send_inv) {
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
- *ecode = RDMAP_CANT_INV_STAG;
- } else {
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_INV_STAG;
- }
- break;
- case TPT_ERR_PDID:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- if ((opcode == T3_SEND_WITH_INV) ||
- (opcode == T3_SEND_WITH_SE_INV))
- *ecode = RDMAP_CANT_INV_STAG;
- else
- *ecode = RDMAP_STAG_NOT_ASSOC;
- break;
- case TPT_ERR_QPID:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_STAG_NOT_ASSOC;
- break;
- case TPT_ERR_ACCESS:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_ACC_VIOL;
- break;
- case TPT_ERR_WRAP:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_TO_WRAP;
- break;
- case TPT_ERR_BOUND:
- if (tagged) {
- *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
- *ecode = DDPT_BASE_BOUNDS;
- } else {
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
- *ecode = RDMAP_BASE_BOUNDS;
- }
- break;
- case TPT_ERR_INVALIDATE_SHARED_MR:
- case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
- *ecode = RDMAP_CANT_INV_STAG;
- break;
- case TPT_ERR_ECC:
- case TPT_ERR_ECC_PSTAG:
- case TPT_ERR_INTERNAL_ERR:
- *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
- *ecode = 0;
- break;
- case TPT_ERR_OUT_OF_RQE:
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_INV_MSN_NOBUF;
- break;
- case TPT_ERR_PBL_ADDR_BOUND:
- *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
- *ecode = DDPT_BASE_BOUNDS;
- break;
- case TPT_ERR_CRC:
- *layer_type = LAYER_MPA|DDP_LLP;
- *ecode = MPA_CRC_ERR;
- break;
- case TPT_ERR_MARKER:
- *layer_type = LAYER_MPA|DDP_LLP;
- *ecode = MPA_MARKER_ERR;
- break;
- case TPT_ERR_PDU_LEN_ERR:
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_MSG_TOOBIG;
- break;
- case TPT_ERR_DDP_VERSION:
- if (tagged) {
- *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
- *ecode = DDPT_INV_VERS;
- } else {
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_INV_VERS;
- }
- break;
- case TPT_ERR_RDMA_VERSION:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
- *ecode = RDMAP_INV_VERS;
- break;
- case TPT_ERR_OPCODE:
- *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
- *ecode = RDMAP_INV_OPCODE;
- break;
- case TPT_ERR_DDP_QUEUE_NUM:
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_INV_QN;
- break;
- case TPT_ERR_MSN:
- case TPT_ERR_MSN_GAP:
- case TPT_ERR_MSN_RANGE:
- case TPT_ERR_IRD_OVERFLOW:
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_INV_MSN_RANGE;
- break;
- case TPT_ERR_TBIT:
- *layer_type = LAYER_DDP|DDP_LOCAL_CATA;
- *ecode = 0;
- break;
- case TPT_ERR_MO:
- *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
- *ecode = DDPU_INV_MO;
- break;
- default:
- *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
- *ecode = 0;
- break;
- }
-}
-
-int iwch_post_zb_read(struct iwch_ep *ep)
-{
- union t3_wr *wqe;
- struct sk_buff *skb;
- u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
-
- PDBG("%s enter\n", __func__);
- skb = alloc_skb(40, GFP_KERNEL);
- if (!skb) {
- printk(KERN_ERR "%s cannot send zb_read!!\n", __func__);
- return -ENOMEM;
- }
- wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr));
- memset(wqe, 0, sizeof(struct t3_rdma_read_wr));
- wqe->read.rdmaop = T3_READ_REQ;
- wqe->read.reserved[0] = 0;
- wqe->read.reserved[1] = 0;
- wqe->read.rem_stag = cpu_to_be32(1);
- wqe->read.rem_to = cpu_to_be64(1);
- wqe->read.local_stag = cpu_to_be32(1);
- wqe->read.local_len = cpu_to_be32(0);
- wqe->read.local_to = cpu_to_be64(1);
- wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ));
- wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(ep->hwtid)|
- V_FW_RIWR_LEN(flit_cnt));
- skb->priority = CPL_PRIORITY_DATA;
- return iwch_cxgb3_ofld_send(ep->com.qp->rhp->rdev.t3cdev_p, skb);
-}
-
-/*
- * This posts a TERMINATE with layer=RDMA, type=catastrophic.
- */
-int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
-{
- union t3_wr *wqe;
- struct terminate_message *term;
- struct sk_buff *skb;
-
- PDBG("%s %d\n", __func__, __LINE__);
- skb = alloc_skb(40, GFP_ATOMIC);
- if (!skb) {
- printk(KERN_ERR "%s cannot send TERMINATE!\n", __func__);
- return -ENOMEM;
- }
- wqe = (union t3_wr *)skb_put(skb, 40);
- memset(wqe, 0, 40);
- wqe->send.rdmaop = T3_TERMINATE;
-
- /* immediate data length */
- wqe->send.plen = htonl(4);
-
- /* immediate data starts here. */
- term = (struct terminate_message *)wqe->send.sgl;
- build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
- wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) |
- V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
- wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid));
- skb->priority = CPL_PRIORITY_DATA;
- return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
-}
-
-/*
- * Assumes qhp lock is held.
- */
-static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
- struct iwch_cq *schp)
-{
- int count;
- int flushed;
-
-
- PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
- /* take a ref on the qhp since we must release the lock */
- atomic_inc(&qhp->refcnt);
- spin_unlock(&qhp->lock);
-
- /* locking hierarchy: cq lock first, then qp lock. */
- spin_lock(&rchp->lock);
- spin_lock(&qhp->lock);
- cxio_flush_hw_cq(&rchp->cq);
- cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
- flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
- spin_unlock(&qhp->lock);
- spin_unlock(&rchp->lock);
- if (flushed) {
- spin_lock(&rchp->comp_handler_lock);
- (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
- spin_unlock(&rchp->comp_handler_lock);
- }
-
- /* locking hierarchy: cq lock first, then qp lock. */
- spin_lock(&schp->lock);
- spin_lock(&qhp->lock);
- cxio_flush_hw_cq(&schp->cq);
- cxio_count_scqes(&schp->cq, &qhp->wq, &count);
- flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
- spin_unlock(&qhp->lock);
- spin_unlock(&schp->lock);
- if (flushed) {
- spin_lock(&schp->comp_handler_lock);
- (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
- spin_unlock(&schp->comp_handler_lock);
- }
-
- /* deref */
- if (atomic_dec_and_test(&qhp->refcnt))
- wake_up(&qhp->wait);
-
- spin_lock(&qhp->lock);
-}
-
-static void flush_qp(struct iwch_qp *qhp)
-{
- struct iwch_cq *rchp, *schp;
-
- rchp = get_chp(qhp->rhp, qhp->attr.rcq);
- schp = get_chp(qhp->rhp, qhp->attr.scq);
-
- if (qhp->ibqp.uobject) {
- cxio_set_wq_in_error(&qhp->wq);
- cxio_set_cq_in_error(&rchp->cq);
- spin_lock(&rchp->comp_handler_lock);
- (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
- spin_unlock(&rchp->comp_handler_lock);
- if (schp != rchp) {
- cxio_set_cq_in_error(&schp->cq);
- spin_lock(&schp->comp_handler_lock);
- (*schp->ibcq.comp_handler)(&schp->ibcq,
- schp->ibcq.cq_context);
- spin_unlock(&schp->comp_handler_lock);
- }
- return;
- }
- __flush_qp(qhp, rchp, schp);
-}
-
-
-/*
- * Return count of RECV WRs posted
- */
-u16 iwch_rqes_posted(struct iwch_qp *qhp)
-{
- union t3_wr *wqe = qhp->wq.queue;
- u16 count = 0;
-
- while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
- count++;
- wqe++;
- }
- PDBG("%s qhp %p count %u\n", __func__, qhp, count);
- return count;
-}
-
-static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
- enum iwch_qp_attr_mask mask,
- struct iwch_qp_attributes *attrs)
-{
- struct t3_rdma_init_attr init_attr;
- int ret;
-
- init_attr.tid = qhp->ep->hwtid;
- init_attr.qpid = qhp->wq.qpid;
- init_attr.pdid = qhp->attr.pd;
- init_attr.scqid = qhp->attr.scq;
- init_attr.rcqid = qhp->attr.rcq;
- init_attr.rq_addr = qhp->wq.rq_addr;
- init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
- init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE |
- qhp->attr.mpa_attr.recv_marker_enabled |
- (qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
- (qhp->attr.mpa_attr.crc_enabled << 2);
-
- init_attr.qpcaps = uP_RI_QP_RDMA_READ_ENABLE |
- uP_RI_QP_RDMA_WRITE_ENABLE |
- uP_RI_QP_BIND_ENABLE;
- if (!qhp->ibqp.uobject)
- init_attr.qpcaps |= uP_RI_QP_STAG0_ENABLE |
- uP_RI_QP_FAST_REGISTER_ENABLE;
-
- init_attr.tcp_emss = qhp->ep->emss;
- init_attr.ord = qhp->attr.max_ord;
- init_attr.ird = qhp->attr.max_ird;
- init_attr.qp_dma_addr = qhp->wq.dma_addr;
- init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
- init_attr.rqe_count = iwch_rqes_posted(qhp);
- init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
- init_attr.chan = qhp->ep->l2t->smt_idx;
- if (peer2peer) {
- init_attr.rtr_type = RTR_READ;
- if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
- init_attr.ord = 1;
- if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator)
- init_attr.ird = 1;
- } else
- init_attr.rtr_type = 0;
- init_attr.irs = qhp->ep->rcv_seq;
- PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
- "flags 0x%x qpcaps 0x%x\n", __func__,
- init_attr.rq_addr, init_attr.rq_size,
- init_attr.flags, init_attr.qpcaps);
- ret = cxio_rdma_init(&rhp->rdev, &init_attr);
- PDBG("%s ret %d\n", __func__, ret);
- return ret;
-}
-
-int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
- enum iwch_qp_attr_mask mask,
- struct iwch_qp_attributes *attrs,
- int internal)
-{
- int ret = 0;
- struct iwch_qp_attributes newattr = qhp->attr;
- unsigned long flag;
- int disconnect = 0;
- int terminate = 0;
- int abort = 0;
- int free = 0;
- struct iwch_ep *ep = NULL;
-
- PDBG("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __func__,
- qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
- (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
-
- spin_lock_irqsave(&qhp->lock, flag);
-
- /* Process attr changes if in IDLE */
- if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
- if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
- ret = -EIO;
- goto out;
- }
- if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
- newattr.enable_rdma_read = attrs->enable_rdma_read;
- if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
- newattr.enable_rdma_write = attrs->enable_rdma_write;
- if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
- newattr.enable_bind = attrs->enable_bind;
- if (mask & IWCH_QP_ATTR_MAX_ORD) {
- if (attrs->max_ord >
- rhp->attr.max_rdma_read_qp_depth) {
- ret = -EINVAL;
- goto out;
- }
- newattr.max_ord = attrs->max_ord;
- }
- if (mask & IWCH_QP_ATTR_MAX_IRD) {
- if (attrs->max_ird >
- rhp->attr.max_rdma_reads_per_qp) {
- ret = -EINVAL;
- goto out;
- }
- newattr.max_ird = attrs->max_ird;
- }
- qhp->attr = newattr;
- }
-
- if (!(mask & IWCH_QP_ATTR_NEXT_STATE))
- goto out;
- if (qhp->attr.state == attrs->next_state)
- goto out;
-
- switch (qhp->attr.state) {
- case IWCH_QP_STATE_IDLE:
- switch (attrs->next_state) {
- case IWCH_QP_STATE_RTS:
- if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
- ret = -EINVAL;
- goto out;
- }
- if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
- ret = -EINVAL;
- goto out;
- }
- qhp->attr.mpa_attr = attrs->mpa_attr;
- qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
- qhp->ep = qhp->attr.llp_stream_handle;
- qhp->attr.state = IWCH_QP_STATE_RTS;
-
- /*
- * Ref the endpoint here and deref when we
- * disassociate the endpoint from the QP. This
- * happens in CLOSING->IDLE transition or *->ERROR
- * transition.
- */
- get_ep(&qhp->ep->com);
- spin_unlock_irqrestore(&qhp->lock, flag);
- ret = rdma_init(rhp, qhp, mask, attrs);
- spin_lock_irqsave(&qhp->lock, flag);
- if (ret)
- goto err;
- break;
- case IWCH_QP_STATE_ERROR:
- qhp->attr.state = IWCH_QP_STATE_ERROR;
- flush_qp(qhp);
- break;
- default:
- ret = -EINVAL;
- goto out;
- }
- break;
- case IWCH_QP_STATE_RTS:
- switch (attrs->next_state) {
- case IWCH_QP_STATE_CLOSING:
- BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
- qhp->attr.state = IWCH_QP_STATE_CLOSING;
- if (!internal) {
- abort=0;
- disconnect = 1;
- ep = qhp->ep;
- get_ep(&ep->com);
- }
- break;
- case IWCH_QP_STATE_TERMINATE:
- qhp->attr.state = IWCH_QP_STATE_TERMINATE;
- if (qhp->ibqp.uobject)
- cxio_set_wq_in_error(&qhp->wq);
- if (!internal)
- terminate = 1;
- break;
- case IWCH_QP_STATE_ERROR:
- qhp->attr.state = IWCH_QP_STATE_ERROR;
- if (!internal) {
- abort=1;
- disconnect = 1;
- ep = qhp->ep;
- get_ep(&ep->com);
- }
- goto err;
- break;
- default:
- ret = -EINVAL;
- goto out;
- }
- break;
- case IWCH_QP_STATE_CLOSING:
- if (!internal) {
- ret = -EINVAL;
- goto out;
- }
- switch (attrs->next_state) {
- case IWCH_QP_STATE_IDLE:
- flush_qp(qhp);
- qhp->attr.state = IWCH_QP_STATE_IDLE;
- qhp->attr.llp_stream_handle = NULL;
- put_ep(&qhp->ep->com);
- qhp->ep = NULL;
- wake_up(&qhp->wait);
- break;
- case IWCH_QP_STATE_ERROR:
- goto err;
- default:
- ret = -EINVAL;
- goto err;
- }
- break;
- case IWCH_QP_STATE_ERROR:
- if (attrs->next_state != IWCH_QP_STATE_IDLE) {
- ret = -EINVAL;
- goto out;
- }
-
- if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) ||
- !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
- ret = -EINVAL;
- goto out;
- }
- qhp->attr.state = IWCH_QP_STATE_IDLE;
- break;
- case IWCH_QP_STATE_TERMINATE:
- if (!internal) {
- ret = -EINVAL;
- goto out;
- }
- goto err;
- break;
- default:
- printk(KERN_ERR "%s in a bad state %d\n",
- __func__, qhp->attr.state);
- ret = -EINVAL;
- goto err;
- break;
- }
- goto out;
-err:
- PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
- qhp->wq.qpid);
-
- /* disassociate the LLP connection */
- qhp->attr.llp_stream_handle = NULL;
- ep = qhp->ep;
- qhp->ep = NULL;
- qhp->attr.state = IWCH_QP_STATE_ERROR;
- free=1;
- wake_up(&qhp->wait);
- BUG_ON(!ep);
- flush_qp(qhp);
-out:
- spin_unlock_irqrestore(&qhp->lock, flag);
-
- if (terminate)
- iwch_post_terminate(qhp, NULL);
-
- /*
- * If disconnect is 1, then we need to initiate a disconnect
- * on the EP. This can be a normal close (RTS->CLOSING) or
- * an abnormal close (RTS/CLOSING->ERROR).
- */
- if (disconnect) {
- iwch_ep_disconnect(ep, abort, GFP_KERNEL);
- put_ep(&ep->com);
- }
-
- /*
- * If free is 1, then we've disassociated the EP from the QP
- * and we need to dereference the EP.
- */
- if (free)
- put_ep(&ep->com);
-
- PDBG("%s exit state %d\n", __func__, qhp->attr.state);
- return ret;
-}
diff --git a/drivers/infiniband/hw/cxgb3/tcb.h b/drivers/infiniband/hw/cxgb3/tcb.h
deleted file mode 100644
index c702dc199e18..000000000000
--- a/drivers/infiniband/hw/cxgb3/tcb.h
+++ /dev/null
@@ -1,632 +0,0 @@
-/*
- * Copyright (c) 2007 Chelsio, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef _TCB_DEFS_H
-#define _TCB_DEFS_H
-
-#define W_TCB_T_STATE 0
-#define S_TCB_T_STATE 0
-#define M_TCB_T_STATE 0xfULL
-#define V_TCB_T_STATE(x) ((x) << S_TCB_T_STATE)
-
-#define W_TCB_TIMER 0
-#define S_TCB_TIMER 4
-#define M_TCB_TIMER 0x1ULL
-#define V_TCB_TIMER(x) ((x) << S_TCB_TIMER)
-
-#define W_TCB_DACK_TIMER 0
-#define S_TCB_DACK_TIMER 5
-#define M_TCB_DACK_TIMER 0x1ULL
-#define V_TCB_DACK_TIMER(x) ((x) << S_TCB_DACK_TIMER)
-
-#define W_TCB_DEL_FLAG 0
-#define S_TCB_DEL_FLAG 6
-#define M_TCB_DEL_FLAG 0x1ULL
-#define V_TCB_DEL_FLAG(x) ((x) << S_TCB_DEL_FLAG)
-
-#define W_TCB_L2T_IX 0
-#define S_TCB_L2T_IX 7
-#define M_TCB_L2T_IX 0x7ffULL
-#define V_TCB_L2T_IX(x) ((x) << S_TCB_L2T_IX)
-
-#define W_TCB_SMAC_SEL 0
-#define S_TCB_SMAC_SEL 18
-#define M_TCB_SMAC_SEL 0x3ULL
-#define V_TCB_SMAC_SEL(x) ((x) << S_TCB_SMAC_SEL)
-
-#define W_TCB_TOS 0
-#define S_TCB_TOS 20
-#define M_TCB_TOS 0x3fULL
-#define V_TCB_TOS(x) ((x) << S_TCB_TOS)
-
-#define W_TCB_MAX_RT 0
-#define S_TCB_MAX_RT 26
-#define M_TCB_MAX_RT 0xfULL
-#define V_TCB_MAX_RT(x) ((x) << S_TCB_MAX_RT)
-
-#define W_TCB_T_RXTSHIFT 0
-#define S_TCB_T_RXTSHIFT 30
-#define M_TCB_T_RXTSHIFT 0xfULL
-#define V_TCB_T_RXTSHIFT(x) ((x) << S_TCB_T_RXTSHIFT)
-
-#define W_TCB_T_DUPACKS 1
-#define S_TCB_T_DUPACKS 2
-#define M_TCB_T_DUPACKS 0xfULL
-#define V_TCB_T_DUPACKS(x) ((x) << S_TCB_T_DUPACKS)
-
-#define W_TCB_T_MAXSEG 1
-#define S_TCB_T_MAXSEG 6
-#define M_TCB_T_MAXSEG 0xfULL
-#define V_TCB_T_MAXSEG(x) ((x) << S_TCB_T_MAXSEG)
-
-#define W_TCB_T_FLAGS1 1
-#define S_TCB_T_FLAGS1 10
-#define M_TCB_T_FLAGS1 0xffffffffULL
-#define V_TCB_T_FLAGS1(x) ((x) << S_TCB_T_FLAGS1)
-
-#define W_TCB_T_MIGRATION 1
-#define S_TCB_T_MIGRATION 20
-#define M_TCB_T_MIGRATION 0x1ULL
-#define V_TCB_T_MIGRATION(x) ((x) << S_TCB_T_MIGRATION)
-
-#define W_TCB_T_FLAGS2 2
-#define S_TCB_T_FLAGS2 10
-#define M_TCB_T_FLAGS2 0x7fULL
-#define V_TCB_T_FLAGS2(x) ((x) << S_TCB_T_FLAGS2)
-
-#define W_TCB_SND_SCALE 2
-#define S_TCB_SND_SCALE 17
-#define M_TCB_SND_SCALE 0xfULL
-#define V_TCB_SND_SCALE(x) ((x) << S_TCB_SND_SCALE)
-
-#define W_TCB_RCV_SCALE 2
-#define S_TCB_RCV_SCALE 21
-#define M_TCB_RCV_SCALE 0xfULL
-#define V_TCB_RCV_SCALE(x) ((x) << S_TCB_RCV_SCALE)
-
-#define W_TCB_SND_UNA_RAW 2
-#define S_TCB_SND_UNA_RAW 25
-#define M_TCB_SND_UNA_RAW 0x7ffffffULL
-#define V_TCB_SND_UNA_RAW(x) ((x) << S_TCB_SND_UNA_RAW)
-
-#define W_TCB_SND_NXT_RAW 3
-#define S_TCB_SND_NXT_RAW 20
-#define M_TCB_SND_NXT_RAW 0x7ffffffULL
-#define V_TCB_SND_NXT_RAW(x) ((x) << S_TCB_SND_NXT_RAW)
-
-#define W_TCB_RCV_NXT 4
-#define S_TCB_RCV_NXT 15
-#define M_TCB_RCV_NXT 0xffffffffULL
-#define V_TCB_RCV_NXT(x) ((x) << S_TCB_RCV_NXT)
-
-#define W_TCB_RCV_ADV 5
-#define S_TCB_RCV_ADV 15
-#define M_TCB_RCV_ADV 0xffffULL
-#define V_TCB_RCV_ADV(x) ((x) << S_TCB_RCV_ADV)
-
-#define W_TCB_SND_MAX_RAW 5
-#define S_TCB_SND_MAX_RAW 31
-#define M_TCB_SND_MAX_RAW 0x7ffffffULL
-#define V_TCB_SND_MAX_RAW(x) ((x) << S_TCB_SND_MAX_RAW)
-
-#define W_TCB_SND_CWND 6
-#define S_TCB_SND_CWND 26
-#define M_TCB_SND_CWND 0x7ffffffULL
-#define V_TCB_SND_CWND(x) ((x) << S_TCB_SND_CWND)
-
-#define W_TCB_SND_SSTHRESH 7
-#define S_TCB_SND_SSTHRESH 21
-#define M_TCB_SND_SSTHRESH 0x7ffffffULL
-#define V_TCB_SND_SSTHRESH(x) ((x) << S_TCB_SND_SSTHRESH)
-
-#define W_TCB_T_RTT_TS_RECENT_AGE 8
-#define S_TCB_T_RTT_TS_RECENT_AGE 16
-#define M_TCB_T_RTT_TS_RECENT_AGE 0xffffffffULL
-#define V_TCB_T_RTT_TS_RECENT_AGE(x) ((x) << S_TCB_T_RTT_TS_RECENT_AGE)
-
-#define W_TCB_T_RTSEQ_RECENT 9
-#define S_TCB_T_RTSEQ_RECENT 16
-#define M_TCB_T_RTSEQ_RECENT 0xffffffffULL
-#define V_TCB_T_RTSEQ_RECENT(x) ((x) << S_TCB_T_RTSEQ_RECENT)
-
-#define W_TCB_T_SRTT 10
-#define S_TCB_T_SRTT 16
-#define M_TCB_T_SRTT 0xffffULL
-#define V_TCB_T_SRTT(x) ((x) << S_TCB_T_SRTT)
-
-#define W_TCB_T_RTTVAR 11
-#define S_TCB_T_RTTVAR 0
-#define M_TCB_T_RTTVAR 0xffffULL
-#define V_TCB_T_RTTVAR(x) ((x) << S_TCB_T_RTTVAR)
-
-#define W_TCB_TS_LAST_ACK_SENT_RAW 11
-#define S_TCB_TS_LAST_ACK_SENT_RAW 16
-#define M_TCB_TS_LAST_ACK_SENT_RAW 0x7ffffffULL
-#define V_TCB_TS_LAST_ACK_SENT_RAW(x) ((x) << S_TCB_TS_LAST_ACK_SENT_RAW)
-
-#define W_TCB_DIP 12
-#define S_TCB_DIP 11
-#define M_TCB_DIP 0xffffffffULL
-#define V_TCB_DIP(x) ((x) << S_TCB_DIP)
-
-#define W_TCB_SIP 13
-#define S_TCB_SIP 11
-#define M_TCB_SIP 0xffffffffULL
-#define V_TCB_SIP(x) ((x) << S_TCB_SIP)
-
-#define W_TCB_DP 14
-#define S_TCB_DP 11
-#define M_TCB_DP 0xffffULL
-#define V_TCB_DP(x) ((x) << S_TCB_DP)
-
-#define W_TCB_SP 14
-#define S_TCB_SP 27
-#define M_TCB_SP 0xffffULL
-#define V_TCB_SP(x) ((x) << S_TCB_SP)
-
-#define W_TCB_TIMESTAMP 15
-#define S_TCB_TIMESTAMP 11
-#define M_TCB_TIMESTAMP 0xffffffffULL
-#define V_TCB_TIMESTAMP(x) ((x) << S_TCB_TIMESTAMP)
-
-#define W_TCB_TIMESTAMP_OFFSET 16
-#define S_TCB_TIMESTAMP_OFFSET 11
-#define M_TCB_TIMESTAMP_OFFSET 0xfULL
-#define V_TCB_TIMESTAMP_OFFSET(x) ((x) << S_TCB_TIMESTAMP_OFFSET)
-
-#define W_TCB_TX_MAX 16
-#define S_TCB_TX_MAX 15
-#define M_TCB_TX_MAX 0xffffffffULL
-#define V_TCB_TX_MAX(x) ((x) << S_TCB_TX_MAX)
-
-#define W_TCB_TX_HDR_PTR_RAW 17
-#define S_TCB_TX_HDR_PTR_RAW 15
-#define M_TCB_TX_HDR_PTR_RAW 0x1ffffULL
-#define V_TCB_TX_HDR_PTR_RAW(x) ((x) << S_TCB_TX_HDR_PTR_RAW)
-
-#define W_TCB_TX_LAST_PTR_RAW 18
-#define S_TCB_TX_LAST_PTR_RAW 0
-#define M_TCB_TX_LAST_PTR_RAW 0x1ffffULL
-#define V_TCB_TX_LAST_PTR_RAW(x) ((x) << S_TCB_TX_LAST_PTR_RAW)
-
-#define W_TCB_TX_COMPACT 18
-#define S_TCB_TX_COMPACT 17
-#define M_TCB_TX_COMPACT 0x1ULL
-#define V_TCB_TX_COMPACT(x) ((x) << S_TCB_TX_COMPACT)
-
-#define W_TCB_RX_COMPACT 18
-#define S_TCB_RX_COMPACT 18
-#define M_TCB_RX_COMPACT 0x1ULL
-#define V_TCB_RX_COMPACT(x) ((x) << S_TCB_RX_COMPACT)
-
-#define W_TCB_RCV_WND 18
-#define S_TCB_RCV_WND 19
-#define M_TCB_RCV_WND 0x7ffffffULL
-#define V_TCB_RCV_WND(x) ((x) << S_TCB_RCV_WND)
-
-#define W_TCB_RX_HDR_OFFSET 19
-#define S_TCB_RX_HDR_OFFSET 14
-#define M_TCB_RX_HDR_OFFSET 0x7ffffffULL
-#define V_TCB_RX_HDR_OFFSET(x) ((x) << S_TCB_RX_HDR_OFFSET)
-
-#define W_TCB_RX_FRAG0_START_IDX_RAW 20
-#define S_TCB_RX_FRAG0_START_IDX_RAW 9
-#define M_TCB_RX_FRAG0_START_IDX_RAW 0x7ffffffULL
-#define V_TCB_RX_FRAG0_START_IDX_RAW(x) ((x) << S_TCB_RX_FRAG0_START_IDX_RAW)
-
-#define W_TCB_RX_FRAG1_START_IDX_OFFSET 21
-#define S_TCB_RX_FRAG1_START_IDX_OFFSET 4
-#define M_TCB_RX_FRAG1_START_IDX_OFFSET 0x7ffffffULL
-#define V_TCB_RX_FRAG1_START_IDX_OFFSET(x) ((x) << S_TCB_RX_FRAG1_START_IDX_OFFSET)
-
-#define W_TCB_RX_FRAG0_LEN 21
-#define S_TCB_RX_FRAG0_LEN 31
-#define M_TCB_RX_FRAG0_LEN 0x7ffffffULL
-#define V_TCB_RX_FRAG0_LEN(x) ((x) << S_TCB_RX_FRAG0_LEN)
-
-#define W_TCB_RX_FRAG1_LEN 22
-#define S_TCB_RX_FRAG1_LEN 26
-#define M_TCB_RX_FRAG1_LEN 0x7ffffffULL
-#define V_TCB_RX_FRAG1_LEN(x) ((x) << S_TCB_RX_FRAG1_LEN)
-
-#define W_TCB_NEWRENO_RECOVER 23
-#define S_TCB_NEWRENO_RECOVER 21
-#define M_TCB_NEWRENO_RECOVER 0x7ffffffULL
-#define V_TCB_NEWRENO_RECOVER(x) ((x) << S_TCB_NEWRENO_RECOVER)
-
-#define W_TCB_PDU_HAVE_LEN 24
-#define S_TCB_PDU_HAVE_LEN 16
-#define M_TCB_PDU_HAVE_LEN 0x1ULL
-#define V_TCB_PDU_HAVE_LEN(x) ((x) << S_TCB_PDU_HAVE_LEN)
-
-#define W_TCB_PDU_LEN 24
-#define S_TCB_PDU_LEN 17
-#define M_TCB_PDU_LEN 0xffffULL
-#define V_TCB_PDU_LEN(x) ((x) << S_TCB_PDU_LEN)
-
-#define W_TCB_RX_QUIESCE 25
-#define S_TCB_RX_QUIESCE 1
-#define M_TCB_RX_QUIESCE 0x1ULL
-#define V_TCB_RX_QUIESCE(x) ((x) << S_TCB_RX_QUIESCE)
-
-#define W_TCB_RX_PTR_RAW 25
-#define S_TCB_RX_PTR_RAW 2
-#define M_TCB_RX_PTR_RAW 0x1ffffULL
-#define V_TCB_RX_PTR_RAW(x) ((x) << S_TCB_RX_PTR_RAW)
-
-#define W_TCB_CPU_NO 25
-#define S_TCB_CPU_NO 19
-#define M_TCB_CPU_NO 0x7fULL
-#define V_TCB_CPU_NO(x) ((x) << S_TCB_CPU_NO)
-
-#define W_TCB_ULP_TYPE 25
-#define S_TCB_ULP_TYPE 26
-#define M_TCB_ULP_TYPE 0xfULL
-#define V_TCB_ULP_TYPE(x) ((x) << S_TCB_ULP_TYPE)
-
-#define W_TCB_RX_FRAG1_PTR_RAW 25
-#define S_TCB_RX_FRAG1_PTR_RAW 30
-#define M_TCB_RX_FRAG1_PTR_RAW 0x1ffffULL
-#define V_TCB_RX_FRAG1_PTR_RAW(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW)
-
-#define W_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 26
-#define S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 15
-#define M_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 0x7ffffffULL
-#define V_TCB_RX_FRAG2_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW)
-
-#define W_TCB_RX_FRAG2_PTR_RAW 27
-#define S_TCB_RX_FRAG2_PTR_RAW 10
-#define M_TCB_RX_FRAG2_PTR_RAW 0x1ffffULL
-#define V_TCB_RX_FRAG2_PTR_RAW(x) ((x) << S_TCB_RX_FRAG2_PTR_RAW)
-
-#define W_TCB_RX_FRAG2_LEN_RAW 27
-#define S_TCB_RX_FRAG2_LEN_RAW 27
-#define M_TCB_RX_FRAG2_LEN_RAW 0x7ffffffULL
-#define V_TCB_RX_FRAG2_LEN_RAW(x) ((x) << S_TCB_RX_FRAG2_LEN_RAW)
-
-#define W_TCB_RX_FRAG3_PTR_RAW 28
-#define S_TCB_RX_FRAG3_PTR_RAW 22
-#define M_TCB_RX_FRAG3_PTR_RAW 0x1ffffULL
-#define V_TCB_RX_FRAG3_PTR_RAW(x) ((x) << S_TCB_RX_FRAG3_PTR_RAW)
-
-#define W_TCB_RX_FRAG3_LEN_RAW 29
-#define S_TCB_RX_FRAG3_LEN_RAW 7
-#define M_TCB_RX_FRAG3_LEN_RAW 0x7ffffffULL
-#define V_TCB_RX_FRAG3_LEN_RAW(x) ((x) << S_TCB_RX_FRAG3_LEN_RAW)
-
-#define W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 30
-#define S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 2
-#define M_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 0x7ffffffULL
-#define V_TCB_RX_FRAG3_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW)
-
-#define W_TCB_PDU_HDR_LEN 30
-#define S_TCB_PDU_HDR_LEN 29
-#define M_TCB_PDU_HDR_LEN 0xffULL
-#define V_TCB_PDU_HDR_LEN(x) ((x) << S_TCB_PDU_HDR_LEN)
-
-#define W_TCB_SLUSH1 31
-#define S_TCB_SLUSH1 5
-#define M_TCB_SLUSH1 0x7ffffULL
-#define V_TCB_SLUSH1(x) ((x) << S_TCB_SLUSH1)
-
-#define W_TCB_ULP_RAW 31
-#define S_TCB_ULP_RAW 24
-#define M_TCB_ULP_RAW 0xffULL
-#define V_TCB_ULP_RAW(x) ((x) << S_TCB_ULP_RAW)
-
-#define W_TCB_DDP_RDMAP_VERSION 25
-#define S_TCB_DDP_RDMAP_VERSION 30
-#define M_TCB_DDP_RDMAP_VERSION 0x1ULL
-#define V_TCB_DDP_RDMAP_VERSION(x) ((x) << S_TCB_DDP_RDMAP_VERSION)
-
-#define W_TCB_MARKER_ENABLE_RX 25
-#define S_TCB_MARKER_ENABLE_RX 31
-#define M_TCB_MARKER_ENABLE_RX 0x1ULL
-#define V_TCB_MARKER_ENABLE_RX(x) ((x) << S_TCB_MARKER_ENABLE_RX)
-
-#define W_TCB_MARKER_ENABLE_TX 26
-#define S_TCB_MARKER_ENABLE_TX 0
-#define M_TCB_MARKER_ENABLE_TX 0x1ULL
-#define V_TCB_MARKER_ENABLE_TX(x) ((x) << S_TCB_MARKER_ENABLE_TX)
-
-#define W_TCB_CRC_ENABLE 26
-#define S_TCB_CRC_ENABLE 1
-#define M_TCB_CRC_ENABLE 0x1ULL
-#define V_TCB_CRC_ENABLE(x) ((x) << S_TCB_CRC_ENABLE)
-
-#define W_TCB_IRS_ULP 26
-#define S_TCB_IRS_ULP 2
-#define M_TCB_IRS_ULP 0x1ffULL
-#define V_TCB_IRS_ULP(x) ((x) << S_TCB_IRS_ULP)
-
-#define W_TCB_ISS_ULP 26
-#define S_TCB_ISS_ULP 11
-#define M_TCB_ISS_ULP 0x1ffULL
-#define V_TCB_ISS_ULP(x) ((x) << S_TCB_ISS_ULP)
-
-#define W_TCB_TX_PDU_LEN 26
-#define S_TCB_TX_PDU_LEN 20
-#define M_TCB_TX_PDU_LEN 0x3fffULL
-#define V_TCB_TX_PDU_LEN(x) ((x) << S_TCB_TX_PDU_LEN)
-
-#define W_TCB_TX_PDU_OUT 27
-#define S_TCB_TX_PDU_OUT 2
-#define M_TCB_TX_PDU_OUT 0x1ULL
-#define V_TCB_TX_PDU_OUT(x) ((x) << S_TCB_TX_PDU_OUT)
-
-#define W_TCB_CQ_IDX_SQ 27
-#define S_TCB_CQ_IDX_SQ 3
-#define M_TCB_CQ_IDX_SQ 0xffffULL
-#define V_TCB_CQ_IDX_SQ(x) ((x) << S_TCB_CQ_IDX_SQ)
-
-#define W_TCB_CQ_IDX_RQ 27
-#define S_TCB_CQ_IDX_RQ 19
-#define M_TCB_CQ_IDX_RQ 0xffffULL
-#define V_TCB_CQ_IDX_RQ(x) ((x) << S_TCB_CQ_IDX_RQ)
-
-#define W_TCB_QP_ID 28
-#define S_TCB_QP_ID 3
-#define M_TCB_QP_ID 0xffffULL
-#define V_TCB_QP_ID(x) ((x) << S_TCB_QP_ID)
-
-#define W_TCB_PD_ID 28
-#define S_TCB_PD_ID 19
-#define M_TCB_PD_ID 0xffffULL
-#define V_TCB_PD_ID(x) ((x) << S_TCB_PD_ID)
-
-#define W_TCB_STAG 29
-#define S_TCB_STAG 3
-#define M_TCB_STAG 0xffffffffULL
-#define V_TCB_STAG(x) ((x) << S_TCB_STAG)
-
-#define W_TCB_RQ_START 30
-#define S_TCB_RQ_START 3
-#define M_TCB_RQ_START 0x3ffffffULL
-#define V_TCB_RQ_START(x) ((x) << S_TCB_RQ_START)
-
-#define W_TCB_RQ_MSN 30
-#define S_TCB_RQ_MSN 29
-#define M_TCB_RQ_MSN 0x3ffULL
-#define V_TCB_RQ_MSN(x) ((x) << S_TCB_RQ_MSN)
-
-#define W_TCB_RQ_MAX_OFFSET 31
-#define S_TCB_RQ_MAX_OFFSET 7
-#define M_TCB_RQ_MAX_OFFSET 0xfULL
-#define V_TCB_RQ_MAX_OFFSET(x) ((x) << S_TCB_RQ_MAX_OFFSET)
-
-#define W_TCB_RQ_WRITE_PTR 31
-#define S_TCB_RQ_WRITE_PTR 11
-#define M_TCB_RQ_WRITE_PTR 0x3ffULL
-#define V_TCB_RQ_WRITE_PTR(x) ((x) << S_TCB_RQ_WRITE_PTR)
-
-#define W_TCB_INB_WRITE_PERM 31
-#define S_TCB_INB_WRITE_PERM 21
-#define M_TCB_INB_WRITE_PERM 0x1ULL
-#define V_TCB_INB_WRITE_PERM(x) ((x) << S_TCB_INB_WRITE_PERM)
-
-#define W_TCB_INB_READ_PERM 31
-#define S_TCB_INB_READ_PERM 22
-#define M_TCB_INB_READ_PERM 0x1ULL
-#define V_TCB_INB_READ_PERM(x) ((x) << S_TCB_INB_READ_PERM)
-
-#define W_TCB_ORD_L_BIT_VLD 31
-#define S_TCB_ORD_L_BIT_VLD 23
-#define M_TCB_ORD_L_BIT_VLD 0x1ULL
-#define V_TCB_ORD_L_BIT_VLD(x) ((x) << S_TCB_ORD_L_BIT_VLD)
-
-#define W_TCB_RDMAP_OPCODE 31
-#define S_TCB_RDMAP_OPCODE 24
-#define M_TCB_RDMAP_OPCODE 0xfULL
-#define V_TCB_RDMAP_OPCODE(x) ((x) << S_TCB_RDMAP_OPCODE)
-
-#define W_TCB_TX_FLUSH 31
-#define S_TCB_TX_FLUSH 28
-#define M_TCB_TX_FLUSH 0x1ULL
-#define V_TCB_TX_FLUSH(x) ((x) << S_TCB_TX_FLUSH)
-
-#define W_TCB_TX_OOS_RXMT 31
-#define S_TCB_TX_OOS_RXMT 29
-#define M_TCB_TX_OOS_RXMT 0x1ULL
-#define V_TCB_TX_OOS_RXMT(x) ((x) << S_TCB_TX_OOS_RXMT)
-
-#define W_TCB_TX_OOS_TXMT 31
-#define S_TCB_TX_OOS_TXMT 30
-#define M_TCB_TX_OOS_TXMT 0x1ULL
-#define V_TCB_TX_OOS_TXMT(x) ((x) << S_TCB_TX_OOS_TXMT)
-
-#define W_TCB_SLUSH_AUX2 31
-#define S_TCB_SLUSH_AUX2 31
-#define M_TCB_SLUSH_AUX2 0x1ULL
-#define V_TCB_SLUSH_AUX2(x) ((x) << S_TCB_SLUSH_AUX2)
-
-#define W_TCB_RX_FRAG1_PTR_RAW2 25
-#define S_TCB_RX_FRAG1_PTR_RAW2 30
-#define M_TCB_RX_FRAG1_PTR_RAW2 0x1ffffULL
-#define V_TCB_RX_FRAG1_PTR_RAW2(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW2)
-
-#define W_TCB_RX_DDP_FLAGS 26
-#define S_TCB_RX_DDP_FLAGS 15
-#define M_TCB_RX_DDP_FLAGS 0x3ffULL
-#define V_TCB_RX_DDP_FLAGS(x) ((x) << S_TCB_RX_DDP_FLAGS)
-
-#define W_TCB_SLUSH_AUX3 26
-#define S_TCB_SLUSH_AUX3 31
-#define M_TCB_SLUSH_AUX3 0x1ffULL
-#define V_TCB_SLUSH_AUX3(x) ((x) << S_TCB_SLUSH_AUX3)
-
-#define W_TCB_RX_DDP_BUF0_OFFSET 27
-#define S_TCB_RX_DDP_BUF0_OFFSET 8
-#define M_TCB_RX_DDP_BUF0_OFFSET 0x3fffffULL
-#define V_TCB_RX_DDP_BUF0_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF0_OFFSET)
-
-#define W_TCB_RX_DDP_BUF0_LEN 27
-#define S_TCB_RX_DDP_BUF0_LEN 30
-#define M_TCB_RX_DDP_BUF0_LEN 0x3fffffULL
-#define V_TCB_RX_DDP_BUF0_LEN(x) ((x) << S_TCB_RX_DDP_BUF0_LEN)
-
-#define W_TCB_RX_DDP_BUF1_OFFSET 28
-#define S_TCB_RX_DDP_BUF1_OFFSET 20
-#define M_TCB_RX_DDP_BUF1_OFFSET 0x3fffffULL
-#define V_TCB_RX_DDP_BUF1_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF1_OFFSET)
-
-#define W_TCB_RX_DDP_BUF1_LEN 29
-#define S_TCB_RX_DDP_BUF1_LEN 10
-#define M_TCB_RX_DDP_BUF1_LEN 0x3fffffULL
-#define V_TCB_RX_DDP_BUF1_LEN(x) ((x) << S_TCB_RX_DDP_BUF1_LEN)
-
-#define W_TCB_RX_DDP_BUF0_TAG 30
-#define S_TCB_RX_DDP_BUF0_TAG 0
-#define M_TCB_RX_DDP_BUF0_TAG 0xffffffffULL
-#define V_TCB_RX_DDP_BUF0_TAG(x) ((x) << S_TCB_RX_DDP_BUF0_TAG)
-
-#define W_TCB_RX_DDP_BUF1_TAG 31
-#define S_TCB_RX_DDP_BUF1_TAG 0
-#define M_TCB_RX_DDP_BUF1_TAG 0xffffffffULL
-#define V_TCB_RX_DDP_BUF1_TAG(x) ((x) << S_TCB_RX_DDP_BUF1_TAG)
-
-#define S_TF_DACK 10
-#define V_TF_DACK(x) ((x) << S_TF_DACK)
-
-#define S_TF_NAGLE 11
-#define V_TF_NAGLE(x) ((x) << S_TF_NAGLE)
-
-#define S_TF_RECV_SCALE 12
-#define V_TF_RECV_SCALE(x) ((x) << S_TF_RECV_SCALE)
-
-#define S_TF_RECV_TSTMP 13
-#define V_TF_RECV_TSTMP(x) ((x) << S_TF_RECV_TSTMP)
-
-#define S_TF_RECV_SACK 14
-#define V_TF_RECV_SACK(x) ((x) << S_TF_RECV_SACK)
-
-#define S_TF_TURBO 15
-#define V_TF_TURBO(x) ((x) << S_TF_TURBO)
-
-#define S_TF_KEEPALIVE 16
-#define V_TF_KEEPALIVE(x) ((x) << S_TF_KEEPALIVE)
-
-#define S_TF_TCAM_BYPASS 17
-#define V_TF_TCAM_BYPASS(x) ((x) << S_TF_TCAM_BYPASS)
-
-#define S_TF_CORE_FIN 18
-#define V_TF_CORE_FIN(x) ((x) << S_TF_CORE_FIN)
-
-#define S_TF_CORE_MORE 19
-#define V_TF_CORE_MORE(x) ((x) << S_TF_CORE_MORE)
-
-#define S_TF_MIGRATING 20
-#define V_TF_MIGRATING(x) ((x) << S_TF_MIGRATING)
-
-#define S_TF_ACTIVE_OPEN 21
-#define V_TF_ACTIVE_OPEN(x) ((x) << S_TF_ACTIVE_OPEN)
-
-#define S_TF_ASK_MODE 22
-#define V_TF_ASK_MODE(x) ((x) << S_TF_ASK_MODE)
-
-#define S_TF_NON_OFFLOAD 23
-#define V_TF_NON_OFFLOAD(x) ((x) << S_TF_NON_OFFLOAD)
-
-#define S_TF_MOD_SCHD 24
-#define V_TF_MOD_SCHD(x) ((x) << S_TF_MOD_SCHD)
-
-#define S_TF_MOD_SCHD_REASON0 25
-#define V_TF_MOD_SCHD_REASON0(x) ((x) << S_TF_MOD_SCHD_REASON0)
-
-#define S_TF_MOD_SCHD_REASON1 26
-#define V_TF_MOD_SCHD_REASON1(x) ((x) << S_TF_MOD_SCHD_REASON1)
-
-#define S_TF_MOD_SCHD_RX 27
-#define V_TF_MOD_SCHD_RX(x) ((x) << S_TF_MOD_SCHD_RX)
-
-#define S_TF_CORE_PUSH 28
-#define V_TF_CORE_PUSH(x) ((x) << S_TF_CORE_PUSH)
-
-#define S_TF_RCV_COALESCE_ENABLE 29
-#define V_TF_RCV_COALESCE_ENABLE(x) ((x) << S_TF_RCV_COALESCE_ENABLE)
-
-#define S_TF_RCV_COALESCE_PUSH 30
-#define V_TF_RCV_COALESCE_PUSH(x) ((x) << S_TF_RCV_COALESCE_PUSH)
-
-#define S_TF_RCV_COALESCE_LAST_PSH 31
-#define V_TF_RCV_COALESCE_LAST_PSH(x) ((x) << S_TF_RCV_COALESCE_LAST_PSH)
-
-#define S_TF_RCV_COALESCE_HEARTBEAT 32
-#define V_TF_RCV_COALESCE_HEARTBEAT(x) ((x) << S_TF_RCV_COALESCE_HEARTBEAT)
-
-#define S_TF_HALF_CLOSE 33
-#define V_TF_HALF_CLOSE(x) ((x) << S_TF_HALF_CLOSE)
-
-#define S_TF_DACK_MSS 34
-#define V_TF_DACK_MSS(x) ((x) << S_TF_DACK_MSS)
-
-#define S_TF_CCTRL_SEL0 35
-#define V_TF_CCTRL_SEL0(x) ((x) << S_TF_CCTRL_SEL0)
-
-#define S_TF_CCTRL_SEL1 36
-#define V_TF_CCTRL_SEL1(x) ((x) << S_TF_CCTRL_SEL1)
-
-#define S_TF_TCP_NEWRENO_FAST_RECOVERY 37
-#define V_TF_TCP_NEWRENO_FAST_RECOVERY(x) ((x) << S_TF_TCP_NEWRENO_FAST_RECOVERY)
-
-#define S_TF_TX_PACE_AUTO 38
-#define V_TF_TX_PACE_AUTO(x) ((x) << S_TF_TX_PACE_AUTO)
-
-#define S_TF_PEER_FIN_HELD 39
-#define V_TF_PEER_FIN_HELD(x) ((x) << S_TF_PEER_FIN_HELD)
-
-#define S_TF_CORE_URG 40
-#define V_TF_CORE_URG(x) ((x) << S_TF_CORE_URG)
-
-#define S_TF_RDMA_ERROR 41
-#define V_TF_RDMA_ERROR(x) ((x) << S_TF_RDMA_ERROR)
-
-#define S_TF_SSWS_DISABLED 42
-#define V_TF_SSWS_DISABLED(x) ((x) << S_TF_SSWS_DISABLED)
-
-#define S_TF_DUPACK_COUNT_ODD 43
-#define V_TF_DUPACK_COUNT_ODD(x) ((x) << S_TF_DUPACK_COUNT_ODD)
-
-#define S_TF_TX_CHANNEL 44
-#define V_TF_TX_CHANNEL(x) ((x) << S_TF_TX_CHANNEL)
-
-#define S_TF_RX_CHANNEL 45
-#define V_TF_RX_CHANNEL(x) ((x) << S_TF_RX_CHANNEL)
-
-#define S_TF_TX_PACE_FIXED 46
-#define V_TF_TX_PACE_FIXED(x) ((x) << S_TF_TX_PACE_FIXED)
-
-#define S_TF_RDMA_FLM_ERROR 47
-#define V_TF_RDMA_FLM_ERROR(x) ((x) << S_TF_RDMA_FLM_ERROR)
-
-#define S_TF_RX_FLOW_CONTROL_DISABLE 48
-#define V_TF_RX_FLOW_CONTROL_DISABLE(x) ((x) << S_TF_RX_FLOW_CONTROL_DISABLE)
-
-#endif /* _TCB_DEFS_H */
diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig
index afe8b28e0878..9e2b2c348afd 100644
--- a/drivers/infiniband/hw/cxgb4/Kconfig
+++ b/drivers/infiniband/hw/cxgb4/Kconfig
@@ -1,9 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_CXGB4
tristate "Chelsio T4/T5 RDMA Driver"
- depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n)
+ depends on CHELSIO_T4 && INET
+ depends on INFINIBAND_ADDR_TRANS
select CHELSIO_LIB
select GENERIC_ALLOCATOR
- ---help---
+ help
This is an iWARP/RDMA driver for the Chelsio T4 and T5
1GbE, 10GbE adapters and T5 40GbE adapter.
diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile
index fa40b685831b..291d259d2319 100644
--- a/drivers/infiniband/hw/cxgb4/Makefile
+++ b/drivers/infiniband/hw/cxgb4/Makefile
@@ -1,6 +1,8 @@
-ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
-ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb
+# SPDX-License-Identifier: GPL-2.0-only
+ccflags-y := -I $(srctree)/drivers/net/ethernet/chelsio/cxgb4
+ccflags-y += -I $(srctree)/drivers/net/ethernet/chelsio/libcxgb
obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o
-iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o id_table.o
+iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o id_table.o \
+ restrack.o
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 9749c52f8729..b3b45c49077d 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -77,9 +77,9 @@ static int enable_ecn;
module_param(enable_ecn, int, 0644);
MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)");
-static int dack_mode = 1;
+static int dack_mode;
module_param(dack_mode, int, 0644);
-MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
+MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)");
uint c4iw_max_read_depth = 32;
module_param(c4iw_max_read_depth, int, 0644);
@@ -99,10 +99,6 @@ module_param(enable_tcp_window_scaling, int, 0644);
MODULE_PARM_DESC(enable_tcp_window_scaling,
"Enable tcp window scaling (default=1)");
-int c4iw_debug;
-module_param(c4iw_debug, int, 0644);
-MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)");
-
static int peer2peer = 1;
module_param(peer2peer, int, 0644);
MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)");
@@ -144,12 +140,12 @@ static struct workqueue_struct *workq;
static struct sk_buff_head rxq;
static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
-static void ep_timeout(unsigned long arg);
+static void ep_timeout(struct timer_list *t);
static void connect_reply_upcall(struct c4iw_ep *ep, int status);
static int sched(struct c4iw_dev *dev, struct sk_buff *skb);
static LIST_HEAD(timeout_list);
-static spinlock_t timeout_lock;
+static DEFINE_SPINLOCK(timeout_lock);
static void deref_cm_id(struct c4iw_ep_common *epc)
{
@@ -180,7 +176,7 @@ static void ref_qp(struct c4iw_ep *ep)
static void start_ep_timer(struct c4iw_ep *ep)
{
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("ep %p\n", ep);
if (timer_pending(&ep->timer)) {
pr_err("%s timer already started! ep %p\n",
__func__, ep);
@@ -189,15 +185,13 @@ static void start_ep_timer(struct c4iw_ep *ep)
clear_bit(TIMEOUT, &ep->com.flags);
c4iw_get_ep(&ep->com);
ep->timer.expires = jiffies + ep_timeout_secs * HZ;
- ep->timer.data = (unsigned long)ep;
- ep->timer.function = ep_timeout;
add_timer(&ep->timer);
}
static int stop_ep_timer(struct c4iw_ep *ep)
{
- PDBG("%s ep %p stopping\n", __func__, ep);
- del_timer_sync(&ep->timer);
+ pr_debug("ep %p stopping\n", ep);
+ timer_delete_sync(&ep->timer);
if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
c4iw_put_ep(&ep->com);
return 0;
@@ -212,7 +206,7 @@ static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
if (c4iw_fatal_error(rdev)) {
kfree_skb(skb);
- PDBG("%s - device in error state - dropping\n", __func__);
+ pr_err("%s - device in error state - dropping\n", __func__);
return -EIO;
}
error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
@@ -229,7 +223,7 @@ int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
if (c4iw_fatal_error(rdev)) {
kfree_skb(skb);
- PDBG("%s - device in error state - dropping\n", __func__);
+ pr_err("%s - device in error state - dropping\n", __func__);
return -EIO;
}
error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
@@ -263,10 +257,10 @@ static void set_emss(struct c4iw_ep *ep, u16 opt)
if (ep->emss < 128)
ep->emss = 128;
if (ep->emss & 7)
- PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n",
- TCPOPT_MSS_G(opt), ep->mss, ep->emss);
- PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, TCPOPT_MSS_G(opt),
- ep->mss, ep->emss);
+ pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n",
+ TCPOPT_MSS_G(opt), ep->mss, ep->emss);
+ pr_debug("mss_idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss,
+ ep->emss);
}
static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
@@ -287,7 +281,7 @@ static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
{
mutex_lock(&epc->mutex);
- PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
+ pr_debug("%s -> %s\n", states[epc->state], states[new]);
__state_set(epc, new);
mutex_unlock(&epc->mutex);
return;
@@ -318,11 +312,18 @@ static void *alloc_ep(int size, gfp_t gfp)
epc = kzalloc(size, gfp);
if (epc) {
+ epc->wr_waitp = c4iw_alloc_wr_wait(gfp);
+ if (!epc->wr_waitp) {
+ kfree(epc);
+ epc = NULL;
+ goto out;
+ }
kref_init(&epc->kref);
mutex_init(&epc->mutex);
- c4iw_init_wr_wait(&epc->wr_wait);
+ c4iw_init_wr_wait(epc->wr_waitp);
}
- PDBG("%s alloc ep %p\n", __func__, epc);
+ pr_debug("alloc ep %p\n", epc);
+out:
return epc;
}
@@ -330,20 +331,23 @@ static void remove_ep_tid(struct c4iw_ep *ep)
{
unsigned long flags;
- spin_lock_irqsave(&ep->com.dev->lock, flags);
- _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0);
- if (idr_is_empty(&ep->com.dev->hwtid_idr))
+ xa_lock_irqsave(&ep->com.dev->hwtids, flags);
+ __xa_erase(&ep->com.dev->hwtids, ep->hwtid);
+ if (xa_empty(&ep->com.dev->hwtids))
wake_up(&ep->com.dev->wait);
- spin_unlock_irqrestore(&ep->com.dev->lock, flags);
+ xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
}
-static void insert_ep_tid(struct c4iw_ep *ep)
+static int insert_ep_tid(struct c4iw_ep *ep)
{
unsigned long flags;
+ int err;
+
+ xa_lock_irqsave(&ep->com.dev->hwtids, flags);
+ err = __xa_insert(&ep->com.dev->hwtids, ep->hwtid, ep, GFP_KERNEL);
+ xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
- spin_lock_irqsave(&ep->com.dev->lock, flags);
- _insert_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep, ep->hwtid, 0);
- spin_unlock_irqrestore(&ep->com.dev->lock, flags);
+ return err;
}
/*
@@ -354,11 +358,11 @@ static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid)
struct c4iw_ep *ep;
unsigned long flags;
- spin_lock_irqsave(&dev->lock, flags);
- ep = idr_find(&dev->hwtid_idr, tid);
+ xa_lock_irqsave(&dev->hwtids, flags);
+ ep = xa_load(&dev->hwtids, tid);
if (ep)
c4iw_get_ep(&ep->com);
- spin_unlock_irqrestore(&dev->lock, flags);
+ xa_unlock_irqrestore(&dev->hwtids, flags);
return ep;
}
@@ -371,11 +375,11 @@ static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev,
struct c4iw_listen_ep *ep;
unsigned long flags;
- spin_lock_irqsave(&dev->lock, flags);
- ep = idr_find(&dev->stid_idr, stid);
+ xa_lock_irqsave(&dev->stids, flags);
+ ep = xa_load(&dev->stids, stid);
if (ep)
c4iw_get_ep(&ep->com);
- spin_unlock_irqrestore(&dev->lock, flags);
+ xa_unlock_irqrestore(&dev->stids, flags);
return ep;
}
@@ -384,7 +388,7 @@ void _c4iw_free_ep(struct kref *kref)
struct c4iw_ep *ep;
ep = container_of(kref, struct c4iw_ep, com.kref);
- PDBG("%s ep %p state %s\n", __func__, ep, states[ep->com.state]);
+ pr_debug("ep %p state %s\n", ep, states[ep->com.state]);
if (test_bit(QP_REFERENCED, &ep->com.flags))
deref_qp(ep);
if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
@@ -398,14 +402,15 @@ void _c4iw_free_ep(struct kref *kref)
(const u32 *)&sin6->sin6_addr.s6_addr,
1);
}
- cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
+ cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
+ ep->com.local_addr.ss_family);
dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t);
- if (ep->mpa_skb)
- kfree_skb(ep->mpa_skb);
+ kfree_skb(ep->mpa_skb);
}
if (!skb_queue_empty(&ep->com.ep_skb_list))
skb_queue_purge(&ep->com.ep_skb_list);
+ c4iw_put_wr_wait(ep->com.wr_waitp);
kfree(ep);
}
@@ -455,6 +460,8 @@ static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
skb_reset_transport_header(skb);
} else {
skb = alloc_skb(len, gfp);
+ if (!skb)
+ return NULL;
}
t4_set_arp_err_handler(skb, NULL, NULL);
return skb;
@@ -467,7 +474,7 @@ static struct net_device *get_real_dev(struct net_device *egress_dev)
static void arp_failure_discard(void *handle, struct sk_buff *skb)
{
- pr_err(MOD "ARP failure\n");
+ pr_err("ARP failure\n");
kfree_skb(skb);
}
@@ -528,7 +535,7 @@ static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb)
{
struct c4iw_ep *ep = handle;
- pr_err(MOD "ARP failure during accept - tid %u -dropping connection\n",
+ pr_err("ARP failure during accept - tid %u - dropping connection\n",
ep->hwtid);
__state_set(&ep->com, DEAD);
@@ -542,7 +549,7 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
{
struct c4iw_ep *ep = handle;
- printk(KERN_ERR MOD "ARP failure during connect\n");
+ pr_err("ARP failure during connect\n");
connect_reply_upcall(ep, -EHOSTUNREACH);
__state_set(&ep->com, DEAD);
if (ep->com.remote_addr.ss_family == AF_INET6) {
@@ -551,7 +558,7 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&sin6->sin6_addr.s6_addr, 1);
}
- remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
+ xa_erase_irq(&ep->com.dev->atids, ep->atid);
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
}
@@ -567,37 +574,44 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb)
struct c4iw_rdev *rdev = &ep->com.dev->rdev;
struct cpl_abort_req *req = cplhdr(skb);
- PDBG("%s rdev %p\n", __func__, rdev);
+ pr_debug("rdev %p\n", rdev);
req->cmd = CPL_ABORT_NO_RST;
+ skb_get(skb);
ret = c4iw_ofld_send(rdev, skb);
if (ret) {
__state_set(&ep->com, DEAD);
queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
- }
+ } else
+ kfree_skb(skb);
}
static int send_flowc(struct c4iw_ep *ep)
{
struct fw_flowc_wr *flowc;
struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
- int i;
u16 vlan = ep->l2t->vlan;
int nparams;
+ int flowclen, flowclen16;
if (WARN_ON(!skb))
return -ENOMEM;
if (vlan == CPL_L2T_VLAN_NONE)
- nparams = 8;
- else
nparams = 9;
+ else
+ nparams = 10;
+
+ flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
+ flowclen16 = DIV_ROUND_UP(flowclen, 16);
+ flowclen = flowclen16 * 16;
- flowc = (struct fw_flowc_wr *)__skb_put(skb, FLOWC_LEN);
+ flowc = __skb_put(skb, flowclen);
+ memset(flowc, 0, flowclen);
flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
FW_FLOWC_WR_NPARAMS_V(nparams));
- flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(FLOWC_LEN,
- 16)) | FW_WR_FLOWID_V(ep->hwtid));
+ flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
+ FW_WR_FLOWID_V(ep->hwtid));
flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V
@@ -616,21 +630,13 @@ static int send_flowc(struct c4iw_ep *ep)
flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
flowc->mnemval[7].val = cpu_to_be32(ep->emss);
- if (nparams == 9) {
+ flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE;
+ flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale);
+ if (nparams == 10) {
u16 pri;
-
pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
- flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
- flowc->mnemval[8].val = cpu_to_be32(pri);
- } else {
- /* Pad WR to 16 byte boundary */
- flowc->mnemval[8].mnemonic = 0;
- flowc->mnemval[8].val = 0;
- }
- for (i = 0; i < 9; i++) {
- flowc->mnemval[i].r4[0] = 0;
- flowc->mnemval[i].r4[1] = 0;
- flowc->mnemval[i].r4[2] = 0;
+ flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
+ flowc->mnemval[9].val = cpu_to_be32(pri);
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
@@ -642,7 +648,7 @@ static int send_halfclose(struct c4iw_ep *ep)
struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
if (WARN_ON(!skb))
return -ENOMEM;
@@ -652,12 +658,38 @@ static int send_halfclose(struct c4iw_ep *ep)
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
-static int send_abort(struct c4iw_ep *ep)
+static void read_tcb(struct c4iw_ep *ep)
+{
+ struct sk_buff *skb;
+ struct cpl_get_tcb *req;
+ int wrlen = roundup(sizeof(*req), 16);
+
+ skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+ if (WARN_ON(!skb))
+ return;
+
+ set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
+ req = (struct cpl_get_tcb *) skb_put(skb, wrlen);
+ memset(req, 0, wrlen);
+ INIT_TP_WR(req, ep->hwtid);
+ OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid));
+ req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid));
+
+ /*
+ * keep a ref on the ep so the tcb is not unlocked before this
+ * cpl completes. The ref is released in read_tcb_rpl().
+ */
+ c4iw_get_ep(&ep->com);
+ if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb)))
+ c4iw_put_ep(&ep->com);
+}
+
+static int send_abort_req(struct c4iw_ep *ep)
{
u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
if (WARN_ON(!req_skb))
return -ENOMEM;
@@ -667,6 +699,17 @@ static int send_abort(struct c4iw_ep *ep)
return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
}
+static int send_abort(struct c4iw_ep *ep)
+{
+ if (!ep->com.qp || !ep->com.qp->srq) {
+ send_abort_req(ep);
+ return 0;
+ }
+ set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags);
+ read_tcb(ep);
+ return 0;
+}
+
static int send_connect(struct c4iw_ep *ep)
{
struct cpl_act_open_req *req = NULL;
@@ -691,7 +734,11 @@ static int send_connect(struct c4iw_ep *ep)
&ep->com.remote_addr;
int ret;
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
- u32 isn = (prandom_u32() & ~7UL) - 1;
+ u32 isn = (get_random_u32() & ~7UL) - 1;
+ struct net_device *netdev;
+ u64 params;
+
+ netdev = ep->com.dev->rdev.lldi.ports[0];
switch (CHELSIO_CHIP_VERSION(adapter_type)) {
case CHELSIO_T4:
@@ -716,12 +763,11 @@ static int send_connect(struct c4iw_ep *ep)
roundup(sizev4, 16) :
roundup(sizev6, 16);
- PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid);
+ pr_debug("ep %p atid %u\n", ep, ep->atid);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
- __func__);
+ pr_err("%s - failed to alloc skb\n", __func__);
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
@@ -768,6 +814,8 @@ static int send_connect(struct c4iw_ep *ep)
opt2 |= T5_ISS_F;
}
+ params = cxgb4_select_ntuple(netdev, ep->l2t);
+
if (ep->com.remote_addr.ss_family == AF_INET6)
cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&la6->sin6_addr.s6_addr, 1);
@@ -777,18 +825,16 @@ static int send_connect(struct c4iw_ep *ep)
if (ep->com.remote_addr.ss_family == AF_INET) {
switch (CHELSIO_CHIP_VERSION(adapter_type)) {
case CHELSIO_T4:
- req = (struct cpl_act_open_req *)skb_put(skb, wrlen);
+ req = skb_put(skb, wrlen);
INIT_TP_WR(req, 0);
break;
case CHELSIO_T5:
- t5req = (struct cpl_t5_act_open_req *)skb_put(skb,
- wrlen);
+ t5req = skb_put(skb, wrlen);
INIT_TP_WR(t5req, 0);
req = (struct cpl_act_open_req *)t5req;
break;
case CHELSIO_T6:
- t6req = (struct cpl_t6_act_open_req *)skb_put(skb,
- wrlen);
+ t6req = skb_put(skb, wrlen);
INIT_TP_WR(t6req, 0);
req = (struct cpl_act_open_req *)t6req;
t5req = (struct cpl_t5_act_open_req *)t6req;
@@ -809,34 +855,36 @@ static int send_connect(struct c4iw_ep *ep)
req->opt0 = cpu_to_be64(opt0);
if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
- req->params = cpu_to_be32(cxgb4_select_ntuple(
- ep->com.dev->rdev.lldi.ports[0],
- ep->l2t));
+ req->params = cpu_to_be32(params);
req->opt2 = cpu_to_be32(opt2);
} else {
- t5req->params = cpu_to_be64(FILTER_TUPLE_V(
- cxgb4_select_ntuple(
- ep->com.dev->rdev.lldi.ports[0],
- ep->l2t)));
- t5req->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__, t5req->rsvd);
- t5req->opt2 = cpu_to_be32(opt2);
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ t5req->params =
+ cpu_to_be64(FILTER_TUPLE_V(params));
+ t5req->rsvd = cpu_to_be32(isn);
+ pr_debug("snd_isn %u\n", t5req->rsvd);
+ t5req->opt2 = cpu_to_be32(opt2);
+ } else {
+ t6req->params =
+ cpu_to_be64(FILTER_TUPLE_V(params));
+ t6req->rsvd = cpu_to_be32(isn);
+ pr_debug("snd_isn %u\n", t6req->rsvd);
+ t6req->opt2 = cpu_to_be32(opt2);
+ }
}
} else {
switch (CHELSIO_CHIP_VERSION(adapter_type)) {
case CHELSIO_T4:
- req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen);
+ req6 = skb_put(skb, wrlen);
INIT_TP_WR(req6, 0);
break;
case CHELSIO_T5:
- t5req6 = (struct cpl_t5_act_open_req6 *)skb_put(skb,
- wrlen);
+ t5req6 = skb_put(skb, wrlen);
INIT_TP_WR(t5req6, 0);
req6 = (struct cpl_act_open_req6 *)t5req6;
break;
case CHELSIO_T6:
- t6req6 = (struct cpl_t6_act_open_req6 *)skb_put(skb,
- wrlen);
+ t6req6 = skb_put(skb, wrlen);
INIT_TP_WR(t6req6, 0);
req6 = (struct cpl_act_open_req6 *)t6req6;
t5req6 = (struct cpl_t5_act_open_req6 *)t6req6;
@@ -859,18 +907,24 @@ static int send_connect(struct c4iw_ep *ep)
req6->opt0 = cpu_to_be64(opt0);
if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
- req6->params = cpu_to_be32(cxgb4_select_ntuple(
- ep->com.dev->rdev.lldi.ports[0],
- ep->l2t));
+ req6->params = cpu_to_be32(cxgb4_select_ntuple(netdev,
+ ep->l2t));
req6->opt2 = cpu_to_be32(opt2);
} else {
- t5req6->params = cpu_to_be64(FILTER_TUPLE_V(
- cxgb4_select_ntuple(
- ep->com.dev->rdev.lldi.ports[0],
- ep->l2t)));
- t5req6->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__, t5req6->rsvd);
- t5req6->opt2 = cpu_to_be32(opt2);
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ t5req6->params =
+ cpu_to_be64(FILTER_TUPLE_V(params));
+ t5req6->rsvd = cpu_to_be32(isn);
+ pr_debug("snd_isn %u\n", t5req6->rsvd);
+ t5req6->opt2 = cpu_to_be32(opt2);
+ } else {
+ t6req6->params =
+ cpu_to_be64(FILTER_TUPLE_V(params));
+ t6req6->rsvd = cpu_to_be32(isn);
+ pr_debug("snd_isn %u\n", t6req6->rsvd);
+ t6req6->opt2 = cpu_to_be32(opt2);
+ }
+
}
}
@@ -891,14 +945,13 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
struct mpa_message *mpa;
struct mpa_v2_conn_params mpa_v2_params;
- PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
-
- BUG_ON(skb_cloned(skb));
+ pr_debug("ep %p tid %u pd_len %d\n",
+ ep, ep->hwtid, ep->plen);
mpalen = sizeof(*mpa) + ep->plen;
if (mpa_rev_to_use == 2)
mpalen += sizeof(struct mpa_v2_conn_params);
- wrlen = roundup(mpalen + sizeof *req, 16);
+ wrlen = roundup(mpalen + sizeof(*req), 16);
skb = get_skb(skb, wrlen, GFP_KERNEL);
if (!skb) {
connect_reply_upcall(ep, -ENOMEM);
@@ -906,8 +959,7 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
- memset(req, 0, wrlen);
+ req = skb_put_zero(skb, wrlen);
req->op_to_immdlen = cpu_to_be32(
FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
FW_WR_COMPL_F |
@@ -943,10 +995,11 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
}
if (mpa_rev_to_use == 2) {
- mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
- sizeof (struct mpa_v2_conn_params));
- PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
- ep->ord);
+ mpa->private_data_size =
+ htons(ntohs(mpa->private_data_size) +
+ sizeof(struct mpa_v2_conn_params));
+ pr_debug("initiator ird %u ord %u\n", ep->ird,
+ ep->ord);
mpa_v2_params.ird = htons((u16)ep->ird);
mpa_v2_params.ord = htons((u16)ep->ord);
@@ -978,7 +1031,6 @@ static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
*/
skb_get(skb);
t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
- BUG_ON(ep->mpa_skb);
ep->mpa_skb = skb;
ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
if (ret)
@@ -998,22 +1050,22 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
struct sk_buff *skb;
struct mpa_v2_conn_params mpa_v2_params;
- PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
+ pr_debug("ep %p tid %u pd_len %d\n",
+ ep, ep->hwtid, ep->plen);
mpalen = sizeof(*mpa) + plen;
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
mpalen += sizeof(struct mpa_v2_conn_params);
- wrlen = roundup(mpalen + sizeof *req, 16);
+ wrlen = roundup(mpalen + sizeof(*req), 16);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ pr_err("%s - cannot alloc skb!\n", __func__);
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
- memset(req, 0, wrlen);
+ req = skb_put_zero(skb, wrlen);
req->op_to_immdlen = cpu_to_be32(
FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
FW_WR_COMPL_F |
@@ -1035,8 +1087,9 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
mpa->flags |= MPA_ENHANCED_RDMA_CONN;
- mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
- sizeof (struct mpa_v2_conn_params));
+ mpa->private_data_size =
+ htons(ntohs(mpa->private_data_size) +
+ sizeof(struct mpa_v2_conn_params));
mpa_v2_params.ird = htons(((u16)ep->ird) |
(peer2peer ? MPA_V2_PEER2PEER_MODEL :
0));
@@ -1064,7 +1117,6 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
skb_get(skb);
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
- BUG_ON(ep->mpa_skb);
ep->mpa_skb = skb;
ep->snd_seq += mpalen;
return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
@@ -1078,22 +1130,22 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
struct sk_buff *skb;
struct mpa_v2_conn_params mpa_v2_params;
- PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
+ pr_debug("ep %p tid %u pd_len %d\n",
+ ep, ep->hwtid, ep->plen);
mpalen = sizeof(*mpa) + plen;
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
mpalen += sizeof(struct mpa_v2_conn_params);
- wrlen = roundup(mpalen + sizeof *req, 16);
+ wrlen = roundup(mpalen + sizeof(*req), 16);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
+ pr_err("%s - cannot alloc skb!\n", __func__);
return -ENOMEM;
}
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- req = (struct fw_ofld_tx_data_wr *) skb_put(skb, wrlen);
- memset(req, 0, wrlen);
+ req = skb_put_zero(skb, wrlen);
req->op_to_immdlen = cpu_to_be32(
FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
FW_WR_COMPL_F |
@@ -1119,8 +1171,9 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
mpa->flags |= MPA_ENHANCED_RDMA_CONN;
- mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
- sizeof (struct mpa_v2_conn_params));
+ mpa->private_data_size =
+ htons(ntohs(mpa->private_data_size) +
+ sizeof(struct mpa_v2_conn_params));
mpa_v2_params.ird = htons((u16)ep->ird);
mpa_v2_params.ord = htons((u16)ep->ord);
if (peer2peer && (ep->mpa_attr.p2p_type !=
@@ -1162,31 +1215,35 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct c4iw_ep *ep;
struct cpl_act_establish *req = cplhdr(skb);
+ unsigned short tcp_opt = ntohs(req->tcp_opt);
unsigned int tid = GET_TID(req);
unsigned int atid = TID_TID_G(ntohl(req->tos_atid));
struct tid_info *t = dev->rdev.lldi.tids;
int ret;
ep = lookup_atid(t, atid);
+ if (!ep)
+ return -EINVAL;
- PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
- be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
+ pr_debug("ep %p tid %u snd_isn %u rcv_isn %u\n", ep, tid,
+ be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
mutex_lock(&ep->com.mutex);
dst_confirm(ep->dst);
/* setup the hwtid for this connection */
ep->hwtid = tid;
- cxgb4_insert_tid(t, ep, tid);
+ cxgb4_insert_tid(t, ep, tid, ep->com.local_addr.ss_family);
insert_ep_tid(ep);
ep->snd_seq = be32_to_cpu(req->snd_isn);
ep->rcv_seq = be32_to_cpu(req->rcv_isn);
+ ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
- set_emss(ep, ntohs(req->tcp_opt));
+ set_emss(ep, tcp_opt);
/* dealloc the atid */
- remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid);
+ xa_erase_irq(&ep->com.dev->atids, atid);
cxgb4_free_atid(t, atid);
set_bit(ACT_ESTAB, &ep->com.history);
@@ -1213,13 +1270,13 @@ static void close_complete_upcall(struct c4iw_ep *ep, int status)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
event.status = status;
if (ep->com.cm_id) {
- PDBG("close complete delivered ep %p cm_id %p tid %u\n",
- ep, ep->com.cm_id, ep->hwtid);
+ pr_debug("close complete delivered ep %p cm_id %p tid %u\n",
+ ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
deref_cm_id(&ep->com);
set_bit(CLOSE_UPCALL, &ep->com.history);
@@ -1230,12 +1287,12 @@ static void peer_close_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_DISCONNECT;
if (ep->com.cm_id) {
- PDBG("peer close delivered ep %p cm_id %p tid %u\n",
- ep, ep->com.cm_id, ep->hwtid);
+ pr_debug("peer close delivered ep %p cm_id %p tid %u\n",
+ ep, ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
set_bit(DISCONN_UPCALL, &ep->com.history);
}
@@ -1245,13 +1302,13 @@ static void peer_abort_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CLOSE;
event.status = -ECONNRESET;
if (ep->com.cm_id) {
- PDBG("abort delivered ep %p cm_id %p tid %u\n", ep,
- ep->com.cm_id, ep->hwtid);
+ pr_debug("abort delivered ep %p cm_id %p tid %u\n", ep,
+ ep->com.cm_id, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
deref_cm_id(&ep->com);
set_bit(ABORT_UPCALL, &ep->com.history);
@@ -1262,7 +1319,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status);
+ pr_debug("ep %p tid %u status %d\n",
+ ep, ep->hwtid, status);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REPLY;
event.status = status;
@@ -1291,8 +1349,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
}
}
- PDBG("%s ep %p tid %u status %d\n", __func__, ep,
- ep->hwtid, status);
+ pr_debug("ep %p tid %u status %d\n", ep,
+ ep->hwtid, status);
set_bit(CONN_RPL_UPCALL, &ep->com.history);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
@@ -1305,7 +1363,7 @@ static int connect_request_upcall(struct c4iw_ep *ep)
struct iw_cm_event event;
int ret;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_CONNECT_REQUEST;
memcpy(&event.local_addr, &ep->com.local_addr,
@@ -1342,13 +1400,13 @@ static void established_upcall(struct c4iw_ep *ep)
{
struct iw_cm_event event;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
memset(&event, 0, sizeof(event));
event.event = IW_CM_EVENT_ESTABLISHED;
event.ird = ep->ord;
event.ord = ep->ird;
if (ep->com.cm_id) {
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
ep->com.cm_id->event_handler(ep->com.cm_id, &event);
set_bit(ESTAB_UPCALL, &ep->com.history);
}
@@ -1360,10 +1418,11 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
u32 credit_dack;
- PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
+ pr_debug("ep %p tid %u credits %u\n",
+ ep, ep->hwtid, credits);
skb = get_skb(NULL, wrlen, GFP_KERNEL);
if (!skb) {
- printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
+ pr_err("update_rx_credits - cannot alloc skb!\n");
return 0;
}
@@ -1411,7 +1470,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
int err;
int disconnect = 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
/*
* If we get more than the supported amount of private data
@@ -1438,8 +1497,8 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
/* Validate MPA header. */
if (mpa->revision > mpa_rev) {
- printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
- " Received = %d\n", __func__, mpa_rev, mpa->revision);
+ pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
+ __func__, mpa_rev, mpa->revision);
err = -EPROTO;
goto err_stop_timer;
}
@@ -1509,8 +1568,8 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
MPA_V2_IRD_ORD_MASK;
resp_ord = ntohs(mpa_v2_params->ord) &
MPA_V2_IRD_ORD_MASK;
- PDBG("%s responder ird %u ord %u ep ird %u ord %u\n",
- __func__, resp_ird, resp_ord, ep->ird, ep->ord);
+ pr_debug("responder ird %u ord %u ep ird %u ord %u\n",
+ resp_ird, resp_ord, ep->ird, ep->ord);
/*
* This is a double-check. Ideally, below checks are
@@ -1554,12 +1613,11 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
if (peer2peer)
ep->mpa_attr.p2p_type = p2p_type;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = "
- "%d\n", __func__, ep->mpa_attr.crc_enabled,
- ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
- ep->mpa_attr.p2p_type, p2p_type);
+ pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = %d\n",
+ ep->mpa_attr.crc_enabled,
+ ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
+ ep->mpa_attr.p2p_type, p2p_type);
/*
* If responder's RTR does not match with that of initiator, assign
@@ -1594,7 +1652,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* supports, generate TERM message
*/
if (rtr_mismatch) {
- printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
+ pr_err("%s: RTR mismatch, sending TERM\n", __func__);
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_NOMATCH_RTR;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
@@ -1613,8 +1671,7 @@ static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
* initiator ORD.
*/
if (insuff_ird) {
- printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
- __func__);
+ pr_err("%s: Insufficient IRD, sending TERM\n", __func__);
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_INSUFF_IRD;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
@@ -1653,7 +1710,7 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
struct mpa_v2_conn_params *mpa_v2_params;
u16 plen;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
/*
* If we get more than the supported amount of private data
@@ -1662,7 +1719,7 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt))
goto err_stop_timer;
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
/*
* Copy the new data into our accumulation buffer.
@@ -1678,15 +1735,15 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (ep->mpa_pkt_len < sizeof(*mpa))
return 0;
- PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
+ pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
mpa = (struct mpa_message *) ep->mpa_pkt;
/*
* Validate MPA Header.
*/
if (mpa->revision > mpa_rev) {
- printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
- " Received = %d\n", __func__, mpa_rev, mpa->revision);
+ pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
+ __func__, mpa_rev, mpa->revision);
goto err_stop_timer;
}
@@ -1741,8 +1798,8 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
MPA_V2_IRD_ORD_MASK;
ep->ord = min_t(u32, ep->ord,
cur_max_read_depth(ep->com.dev));
- PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
- ep->ord);
+ pr_debug("initiator ird %u ord %u\n",
+ ep->ird, ep->ord);
if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
if (peer2peer) {
if (ntohs(mpa_v2_params->ord) &
@@ -1759,11 +1816,10 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
if (peer2peer)
ep->mpa_attr.p2p_type = p2p_type;
- PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
- "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__,
- ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
- ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
- ep->mpa_attr.p2p_type);
+ pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n",
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
+ ep->mpa_attr.p2p_type);
__state_set(&ep->com, MPA_REQ_RCVD);
@@ -1799,26 +1855,26 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
- PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
+ pr_debug("ep %p tid %u dlen %u\n", ep, ep->hwtid, dlen);
skb_pull(skb, sizeof(*hdr));
skb_trim(skb, dlen);
mutex_lock(&ep->com.mutex);
- /* update RX credits */
- update_rx_credits(ep, dlen);
-
switch (ep->com.state) {
case MPA_REQ_SENT:
+ update_rx_credits(ep, dlen);
ep->rcv_seq += dlen;
disconnect = process_mpa_reply(ep, skb);
break;
case MPA_REQ_WAIT:
+ update_rx_credits(ep, dlen);
ep->rcv_seq += dlen;
disconnect = process_mpa_request(ep, skb);
break;
case FPDU_MODE: {
struct c4iw_qp_attributes attrs;
- BUG_ON(!ep->com.qp);
+
+ update_rx_credits(ep, dlen);
if (status)
pr_err("%s Unexpected streaming data." \
" qpid %u ep %p state %d tid %u status %d\n",
@@ -1840,35 +1896,63 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
}
+static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx)
+{
+ enum chip_type adapter_type;
+
+ adapter_type = ep->com.dev->rdev.lldi.adapter_type;
+
+ /*
+ * If this TCB had a srq buffer cached, then we must complete
+ * it. For user mode, that means saving the srqidx in the
+ * user/kernel status page for this qp. For kernel mode, just
+ * synthesize the CQE now.
+ */
+ if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) {
+ if (ep->com.qp->ibqp.uobject)
+ t4_set_wq_in_error(&ep->com.qp->wq, srqidx);
+ else
+ c4iw_flush_srqidx(ep->com.qp, srqidx);
+ }
+}
+
static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
{
+ u32 srqidx;
struct c4iw_ep *ep;
- struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
+ struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
int release = 0;
unsigned int tid = GET_TID(rpl);
ep = get_ep_from_tid(dev, tid);
if (!ep) {
- printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n");
+ pr_warn("Abort rpl to freed endpoint\n");
return 0;
}
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+
+ if (ep->com.qp && ep->com.qp->srq) {
+ srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status));
+ complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx);
+ }
+
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
switch (ep->com.state) {
case ABORTING:
- c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+ c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
__state_set(&ep->com, DEAD);
release = 1;
break;
default:
- printk(KERN_ERR "%s ep %p state %d\n",
- __func__, ep, ep->com.state);
+ pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
break;
}
mutex_unlock(&ep->com.mutex);
- if (release)
+ if (release) {
+ close_complete_upcall(ep, -ECONNRESET);
release_ep_resources(ep);
+ }
c4iw_put_ep(&ep->com);
return 0;
}
@@ -1883,8 +1967,10 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
int win;
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
- req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req));
- memset(req, 0, sizeof(*req));
+ if (!skb)
+ return -ENOMEM;
+
+ req = __skb_put_zero(skb, sizeof(*req));
req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR));
req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
@@ -1978,7 +2064,8 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
{
ep->snd_win = snd_win;
ep->rcv_win = rcv_win;
- PDBG("%s snd_win %d rcv_win %d\n", __func__, ep->snd_win, ep->rcv_win);
+ pr_debug("snd_win %d rcv_win %d\n",
+ ep->snd_win, ep->rcv_win);
}
#define ACT_OPEN_RETRY_COUNT 2
@@ -1999,7 +2086,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
err = -ENOMEM;
if (n->dev->flags & IFF_LOOPBACK) {
if (iptype == 4)
- pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip);
+ pdev = __ip_dev_find(&init_net, *(__be32 *)peer_ip, false);
else if (IS_ENABLED(CONFIG_IPV6))
for_each_netdev(&init_net, pdev) {
if (ipv6_chk_addr(&init_net,
@@ -2014,16 +2101,15 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
err = -ENODEV;
goto out;
}
+ if (is_vlan_dev(pdev))
+ pdev = vlan_dev_real_dev(pdev);
ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
n, pdev, rt_tos2priority(tos));
- if (!ep->l2t) {
- dev_put(pdev);
+ if (!ep->l2t)
goto out;
- }
ep->mtu = pdev->mtu;
ep->tx_chan = cxgb4_port_chan(pdev);
- ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
- cxgb4_port_viid(pdev));
+ ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
step = cdev->rdev.lldi.ntxq /
cdev->rdev.lldi.nchan;
ep->txq_idx = cxgb4_port_idx(pdev) * step;
@@ -2033,17 +2119,15 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
ep->rss_qid = cdev->rdev.lldi.rxq_ids[
cxgb4_port_idx(pdev) * step];
set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
- dev_put(pdev);
} else {
pdev = get_real_dev(n->dev);
ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
- n, pdev, 0);
+ n, pdev, rt_tos2priority(tos));
if (!ep->l2t)
goto out;
ep->mtu = dst_mtu(dst);
ep->tx_chan = cxgb4_port_chan(pdev);
- ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
- cxgb4_port_viid(pdev));
+ ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
step = cdev->rdev.lldi.ntxq /
cdev->rdev.lldi.nchan;
ep->txq_idx = cxgb4_port_idx(pdev) * step;
@@ -2083,9 +2167,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
int iptype;
__u8 *ra;
- PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
- init_timer(&ep->timer);
- c4iw_init_wr_wait(&ep->com.wr_wait);
+ pr_debug("qp %p cm_id %p\n", ep->com.qp, ep->com.cm_id);
+ c4iw_init_wr_wait(ep->com.wr_waitp);
/* When MPA revision is different on nodes, the node with MPA_rev=2
* tries to reconnect with MPA_rev 1 for the same EP through
@@ -2093,7 +2176,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
* further connection establishment. As we are using the same EP pointer
* for reconnect, few skbs are used during the previous c4iw_connect(),
* which leaves the EP with inadequate skbs for further
- * c4iw_reconnect(), Further causing an assert BUG_ON() due to empty
+ * c4iw_reconnect(), Further causing a crash due to an empty
* skb_list() during peer_abort(). Allocate skbs which is already used.
*/
size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list));
@@ -2107,11 +2190,13 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
*/
ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
if (ep->atid == -1) {
- pr_err("%s - cannot alloc atid.\n", __func__);
+ pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM;
goto fail2;
}
- insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid);
+ err = xa_insert_irq(&ep->com.dev->atids, ep->atid, ep, GFP_KERNEL);
+ if (err)
+ goto fail2a;
/* find a route */
if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
@@ -2128,13 +2213,14 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
laddr6->sin6_addr.s6_addr,
raddr6->sin6_addr.s6_addr,
laddr6->sin6_port,
- raddr6->sin6_port, 0,
+ raddr6->sin6_port,
+ ep->com.cm_id->tos,
raddr6->sin6_scope_id);
iptype = 6;
ra = (__u8 *)&raddr6->sin6_addr;
}
if (!ep->dst) {
- pr_err("%s - cannot find route.\n", __func__);
+ pr_err("%s - cannot find route\n", __func__);
err = -EHOSTUNREACH;
goto fail3;
}
@@ -2142,13 +2228,13 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
ep->com.dev->rdev.lldi.adapter_type,
ep->com.cm_id->tos);
if (err) {
- pr_err("%s - cannot alloc l2e.\n", __func__);
+ pr_err("%s - cannot alloc l2e\n", __func__);
goto fail4;
}
- PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
- __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
- ep->l2t->idx);
+ pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
+ ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
+ ep->l2t->idx);
state_set(&ep->com, CONNECTING);
ep->tos = ep->com.cm_id->tos;
@@ -2162,7 +2248,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
fail4:
dst_release(ep->dst);
fail3:
- remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
+ xa_erase_irq(&ep->com.dev->atids, ep->atid);
+fail2a:
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
fail2:
/*
@@ -2193,17 +2280,20 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
int ret = 0;
ep = lookup_atid(t, atid);
+ if (!ep)
+ return -EINVAL;
+
la = (struct sockaddr_in *)&ep->com.local_addr;
ra = (struct sockaddr_in *)&ep->com.remote_addr;
la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
- PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
- status, status2errno(status));
+ pr_debug("ep %p atid %u status %u errno %d\n", ep, atid,
+ status, status2errno(status));
if (cxgb_is_neg_adv(status)) {
- PDBG("%s Connection problems for atid %u status %u (%s)\n",
- __func__, atid, status, neg_adv_str(status));
+ pr_debug("Connection problems for atid %u status %u (%s)\n",
+ atid, status, neg_adv_str(status));
ep->stats.connect_neg_adv++;
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.neg_adv++;
@@ -2245,8 +2335,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
(const u32 *)
&sin6->sin6_addr.s6_addr, 1);
}
- remove_handle(ep->com.dev, &ep->com.dev->atid_idr,
- atid);
+ xa_erase_irq(&ep->com.dev->atids, atid);
cxgb4_free_atid(t, atid);
dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t);
@@ -2280,9 +2369,10 @@ fail:
(const u32 *)&sin6->sin6_addr.s6_addr, 1);
}
if (status && act_open_has_tid(status))
- cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl));
+ cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl),
+ ep->com.local_addr.ss_family);
- remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid);
+ xa_erase_irq(&ep->com.dev->atids, atid);
cxgb4_free_atid(t, atid);
dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t);
@@ -2298,12 +2388,12 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
if (!ep) {
- PDBG("%s stid %d lookup failure!\n", __func__, stid);
+ pr_warn("%s stid %d lookup failure!\n", __func__, stid);
goto out;
}
- PDBG("%s ep %p status %d error %d\n", __func__, ep,
- rpl->status, status2errno(rpl->status));
- c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
+ pr_debug("ep %p status %d error %d\n", ep,
+ rpl->status, status2errno(rpl->status));
+ c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status));
c4iw_put_ep(&ep->com);
out:
return 0;
@@ -2315,9 +2405,14 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
unsigned int stid = GET_TID(rpl);
struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
- PDBG("%s ep %p\n", __func__, ep);
- c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
+ if (!ep) {
+ pr_warn("%s stid %d lookup failure!\n", __func__, stid);
+ goto out;
+ }
+ pr_debug("ep %p\n", ep);
+ c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status));
c4iw_put_ep(&ep->com);
+out:
return 0;
}
@@ -2333,22 +2428,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
int win;
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- BUG_ON(skb_cloned(skb));
-
- skb_get(skb);
- rpl = cplhdr(skb);
- if (!is_t4(adapter_type)) {
- skb_trim(skb, roundup(sizeof(*rpl5), 16));
- rpl5 = (void *)rpl;
- INIT_TP_WR(rpl5, ep->hwtid);
- } else {
- skb_trim(skb, sizeof(*rpl));
- INIT_TP_WR(rpl, ep->hwtid);
- }
- OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
- ep->hwtid));
-
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
enable_tcp_timestamps && req->tcpopt.tstamp,
(ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
@@ -2394,17 +2474,25 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
if (tcph->ece && tcph->cwr)
opt2 |= CCTRL_ECN_V(1);
}
- if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
- u32 isn = (prandom_u32() & ~7UL) - 1;
+
+ if (!is_t4(adapter_type)) {
+ u32 isn = (get_random_u32() & ~7UL) - 1;
+
+ skb = get_skb(skb, roundup(sizeof(*rpl5), 16), GFP_KERNEL);
+ rpl5 = __skb_put_zero(skb, roundup(sizeof(*rpl5), 16));
+ rpl = (void *)rpl5;
+ INIT_TP_WR_CPL(rpl5, CPL_PASS_ACCEPT_RPL, ep->hwtid);
opt2 |= T5_OPT_2_VALID_F;
opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
opt2 |= T5_ISS_F;
- rpl5 = (void *)rpl;
- memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
if (peer2peer)
isn += 4;
rpl5->iss = cpu_to_be32(isn);
- PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss));
+ pr_debug("iss %u\n", be32_to_cpu(rpl5->iss));
+ } else {
+ skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
+ rpl = __skb_put_zero(skb, sizeof(*rpl));
+ INIT_TP_WR_CPL(rpl, CPL_PASS_ACCEPT_RPL, ep->hwtid);
}
rpl->opt0 = cpu_to_be64(opt0);
@@ -2417,8 +2505,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
{
- PDBG("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid);
- BUG_ON(skb_cloned(skb));
+ pr_debug("c4iw_dev %p tid %u\n", dev, hwtid);
skb_trim(skb, sizeof(struct cpl_tid_release));
release_tid(&dev->rdev, hwtid, skb);
return;
@@ -2439,52 +2526,56 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
u16 peer_mss = ntohs(req->tcpopt.mss);
int iptype;
unsigned short hdrs;
- u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
+ u8 tos;
parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
if (!parent_ep) {
- PDBG("%s connect request on invalid stid %d\n", __func__, stid);
+ pr_err("%s connect request on invalid stid %d\n",
+ __func__, stid);
goto reject;
}
if (state_read(&parent_ep->com) != LISTEN) {
- PDBG("%s - listening ep not in LISTEN\n", __func__);
+ pr_err("%s - listening ep not in LISTEN\n", __func__);
goto reject;
}
+ if (parent_ep->com.cm_id->tos_set)
+ tos = parent_ep->com.cm_id->tos;
+ else
+ tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
+
cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
&iptype, local_ip, peer_ip, &local_port, &peer_port);
/* Find output route */
if (iptype == 4) {
- PDBG("%s parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
- , __func__, parent_ep, hwtid,
- local_ip, peer_ip, ntohs(local_port),
- ntohs(peer_port), peer_mss);
+ pr_debug("parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
+ , parent_ep, hwtid,
+ local_ip, peer_ip, ntohs(local_port),
+ ntohs(peer_port), peer_mss);
dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
*(__be32 *)local_ip, *(__be32 *)peer_ip,
local_port, peer_port, tos);
} else {
- PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
- , __func__, parent_ep, hwtid,
- local_ip, peer_ip, ntohs(local_port),
- ntohs(peer_port), peer_mss);
+ pr_debug("parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
+ , parent_ep, hwtid,
+ local_ip, peer_ip, ntohs(local_port),
+ ntohs(peer_port), peer_mss);
dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
local_ip, peer_ip, local_port, peer_port,
- PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
+ tos,
((struct sockaddr_in6 *)
&parent_ep->com.local_addr)->sin6_scope_id);
}
if (!dst) {
- printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
- __func__);
+ pr_err("%s - failed to find dst entry!\n", __func__);
goto reject;
}
child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
if (!child_ep) {
- printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
- __func__);
+ pr_err("%s - failed to allocate ep entry!\n", __func__);
dst_release(dst);
goto reject;
}
@@ -2492,14 +2583,14 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
parent_ep->com.dev->rdev.lldi.adapter_type, tos);
if (err) {
- printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
- __func__);
+ pr_err("%s - failed to allocate l2t entry!\n", __func__);
dst_release(dst);
kfree(child_ep);
goto reject;
}
- hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) +
+ hdrs = ((iptype == 4) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) +
+ sizeof(struct tcphdr) +
((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
child_ep->mtu = peer_mss + hdrs;
@@ -2554,11 +2645,12 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
child_ep->dst = dst;
child_ep->hwtid = hwtid;
- PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__,
- child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
+ pr_debug("tx_chan %u smac_idx %u rss_qid %u\n",
+ child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
- init_timer(&child_ep->timer);
- cxgb4_insert_tid(t, child_ep, hwtid);
+ timer_setup(&child_ep->timer, ep_timeout, 0);
+ cxgb4_insert_tid(t, child_ep, hwtid,
+ child_ep->com.local_addr.ss_family);
insert_ep_tid(child_ep);
if (accept_cr(child_ep, skb, req)) {
c4iw_put_ep(&parent_ep->com);
@@ -2576,9 +2668,9 @@ fail:
c4iw_put_ep(&child_ep->com);
reject:
reject_cr(dev, hwtid, skb);
+out:
if (parent_ep)
c4iw_put_ep(&parent_ep->com);
-out:
return 0;
}
@@ -2588,16 +2680,20 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
struct cpl_pass_establish *req = cplhdr(skb);
unsigned int tid = GET_TID(req);
int ret;
+ u16 tcp_opt = ntohs(req->tcp_opt);
ep = get_ep_from_tid(dev, tid);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ if (!ep)
+ return 0;
+
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
ep->snd_seq = be32_to_cpu(req->snd_isn);
ep->rcv_seq = be32_to_cpu(req->rcv_isn);
+ ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
- PDBG("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid,
- ntohs(req->tcp_opt));
+ pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt);
- set_emss(ep, ntohs(req->tcp_opt));
+ set_emss(ep, tcp_opt);
dst_confirm(ep->dst);
mutex_lock(&ep->com.mutex);
@@ -2627,7 +2723,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
if (!ep)
return 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
dst_confirm(ep->dst);
set_bit(PEER_CLOSE, &ep->com.history);
@@ -2649,13 +2745,13 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
* in rdma connection migration (see c4iw_accept_cr()).
*/
__state_set(&ep->com, CLOSING);
- PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
- c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+ pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
+ c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
break;
case MPA_REP_SENT:
__state_set(&ep->com, CLOSING);
- PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
- c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+ pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
+ c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
break;
case FPDU_MODE:
start_ep_timer(ep);
@@ -2691,7 +2787,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
disconnect = 0;
break;
default:
- BUG_ON(1);
+ WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
}
mutex_unlock(&ep->com.mutex);
if (disconnect)
@@ -2702,33 +2798,53 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
}
+static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep)
+{
+ complete_cached_srq_buffers(ep, ep->srqe_idx);
+ if (ep->com.cm_id && ep->com.qp) {
+ struct c4iw_qp_attributes attrs;
+
+ attrs.next_state = C4IW_QP_STATE_ERROR;
+ c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ }
+ peer_abort_upcall(ep);
+ release_ep_resources(ep);
+ c4iw_put_ep(&ep->com);
+}
+
static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
{
- struct cpl_abort_req_rss *req = cplhdr(skb);
+ struct cpl_abort_req_rss6 *req = cplhdr(skb);
struct c4iw_ep *ep;
struct sk_buff *rpl_skb;
struct c4iw_qp_attributes attrs;
int ret;
int release = 0;
unsigned int tid = GET_TID(req);
+ u8 status;
+ u32 srqidx;
+
u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
- if (cxgb_is_neg_adv(req->status)) {
- PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
- __func__, ep->hwtid, req->status,
- neg_adv_str(req->status));
+ status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status));
+
+ if (cxgb_is_neg_adv(status)) {
+ pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
+ ep->hwtid, status, neg_adv_str(status));
ep->stats.abort_neg_adv++;
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.neg_adv++;
mutex_unlock(&dev->rdev.stats.lock);
goto deref_ep;
}
- PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
- ep->com.state);
+
+ pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
+ ep->com.state);
set_bit(PEER_ABORT, &ep->com.history);
/*
@@ -2737,7 +2853,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
* MPA_REQ_SENT
*/
if (ep->com.state != MPA_REQ_SENT)
- c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+ c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
mutex_lock(&ep->com.mutex);
switch (ep->com.state) {
@@ -2749,7 +2865,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
break;
case MPA_REQ_SENT:
(void)stop_ep_timer(ep);
- if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1))
+ if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 ||
+ (mpa_rev == 2 && ep->tried_with_mpa_v1))
connect_reply_upcall(ep, -ECONNRESET);
else {
/*
@@ -2760,8 +2877,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
* do some housekeeping so as to re-initiate the
* connection
*/
- PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__,
- mpa_rev);
+ pr_info("%s: mpa_rev=%d. Retrying with mpav1\n",
+ __func__, mpa_rev);
ep->retry_with_mpa_v1 = 1;
}
break;
@@ -2772,28 +2889,42 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
case MORIBUND:
case CLOSING:
stop_ep_timer(ep);
- /*FALLTHROUGH*/
+ fallthrough;
case FPDU_MODE:
+ if (ep->com.qp && ep->com.qp->srq) {
+ srqidx = ABORT_RSS_SRQIDX_G(
+ be32_to_cpu(req->srqidx_status));
+ if (srqidx) {
+ complete_cached_srq_buffers(ep, srqidx);
+ } else {
+ /* Hold ep ref until finish_peer_abort() */
+ c4iw_get_ep(&ep->com);
+ __state_set(&ep->com, ABORTING);
+ set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags);
+ read_tcb(ep);
+ break;
+
+ }
+ }
+
if (ep->com.cm_id && ep->com.qp) {
attrs.next_state = C4IW_QP_STATE_ERROR;
ret = c4iw_modify_qp(ep->com.qp->rhp,
ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
&attrs, 1);
if (ret)
- printk(KERN_ERR MOD
- "%s - qp <- error failed!\n",
- __func__);
+ pr_err("%s - qp <- error failed!\n", __func__);
}
peer_abort_upcall(ep);
break;
case ABORTING:
break;
case DEAD:
- PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
+ pr_warn("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
mutex_unlock(&ep->com.mutex);
goto deref_ep;
default:
- BUG_ON(1);
+ WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
break;
}
dst_confirm(ep->dst);
@@ -2827,8 +2958,9 @@ out:
(const u32 *)&sin6->sin6_addr.s6_addr,
1);
}
- remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid);
- cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
+ xa_erase_irq(&ep->com.dev->hwtids, ep->hwtid);
+ cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
+ ep->com.local_addr.ss_family);
dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t);
c4iw_reconnect(ep);
@@ -2853,8 +2985,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
if (!ep)
return 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
- BUG_ON(!ep);
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
/* The cm_id may be null if we failed to connect */
mutex_lock(&ep->com.mutex);
@@ -2880,7 +3011,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
case DEAD:
break;
default:
- BUG_ON(1);
+ WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
break;
}
mutex_unlock(&ep->com.mutex);
@@ -2898,17 +3029,23 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
struct c4iw_qp_attributes attrs;
ep = get_ep_from_tid(dev, tid);
- BUG_ON(!ep);
- if (ep && ep->com.qp) {
- printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
- ep->com.qp->wq.sq.qid);
- attrs.next_state = C4IW_QP_STATE_TERMINATE;
- c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
- C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ if (ep) {
+ if (ep->com.qp) {
+ pr_warn("TERM received tid %u qpid %u\n", tid,
+ ep->com.qp->wq.sq.qid);
+ attrs.next_state = C4IW_QP_STATE_TERMINATE;
+ c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ }
+
+ /* As per draft-hilland-iwarp-verbs-v1.0, sec 6.2.3,
+ * when entering the TERM state the RNIC MUST initiate a CLOSE.
+ */
+ c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
+ c4iw_put_ep(&ep->com);
} else
- printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
- c4iw_put_ep(&ep->com);
+ pr_warn("TERM received tid %u no ep/qp\n", tid);
return 0;
}
@@ -2929,18 +3066,19 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
if (!ep)
return 0;
- PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
+ pr_debug("ep %p tid %u credits %u\n",
+ ep, ep->hwtid, credits);
if (credits == 0) {
- PDBG("%s 0 credit ack ep %p tid %u state %u\n",
- __func__, ep, ep->hwtid, state_read(&ep->com));
+ pr_debug("0 credit ack ep %p tid %u state %u\n",
+ ep, ep->hwtid, state_read(&ep->com));
goto out;
}
dst_confirm(ep->dst);
if (ep->mpa_skb) {
- PDBG("%s last streaming msg ack ep %p tid %u state %u "
- "initiator %u freeing skb\n", __func__, ep, ep->hwtid,
- state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
+ pr_debug("last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n",
+ ep, ep->hwtid, state_read(&ep->com),
+ ep->mpa_attr.initiator ? 1 : 0);
mutex_lock(&ep->com.mutex);
kfree_skb(ep->mpa_skb);
ep->mpa_skb = NULL;
@@ -2958,7 +3096,7 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
int abort;
struct c4iw_ep *ep = to_ep(cm_id);
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
if (ep->com.state != MPA_REQ_RCVD) {
@@ -2989,7 +3127,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
int abort = 0;
- PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
+ pr_debug("ep %p tid %u\n", ep, ep->hwtid);
mutex_lock(&ep->com.mutex);
if (ep->com.state != MPA_REQ_RCVD) {
@@ -2997,7 +3135,10 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
goto err_out;
}
- BUG_ON(!qp);
+ if (!qp) {
+ err = -EINVAL;
+ goto err_out;
+ }
set_bit(ULP_ACCEPT, &ep->com.history);
if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) ||
@@ -3042,7 +3183,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->ird = 1;
}
- PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
+ pr_debug("ird %d ord %d\n", ep->ird, ep->ord);
ep->com.cm_id = cm_id;
ref_cm_id(&ep->com);
@@ -3097,17 +3238,22 @@ static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
int found = 0;
struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
+ const struct in_ifaddr *ifa;
ind = in_dev_get(dev->rdev.lldi.ports[0]);
if (!ind)
return -EADDRNOTAVAIL;
- for_primary_ifa(ind) {
+ rcu_read_lock();
+ in_dev_for_each_ifa_rcu(ifa, ind) {
+ if (ifa->ifa_flags & IFA_F_SECONDARY)
+ continue;
laddr->sin_addr.s_addr = ifa->ifa_address;
raddr->sin_addr.s_addr = ifa->ifa_address;
found = 1;
break;
}
- endfor_ifa(ind);
+ rcu_read_unlock();
+
in_dev_put(ind);
return found ? 0 : -EADDRNOTAVAIL;
}
@@ -3140,7 +3286,7 @@ static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
{
- struct in6_addr uninitialized_var(addr);
+ struct in6_addr addr;
struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
@@ -3171,7 +3317,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
}
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ pr_err("%s - cannot alloc ep\n", __func__);
err = -ENOMEM;
goto out;
}
@@ -3182,7 +3328,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
goto fail1;
}
- init_timer(&ep->timer);
+ timer_setup(&ep->timer, ep_timeout, 0);
ep->plen = conn_param->private_data_len;
if (ep->plen)
memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
@@ -3195,27 +3341,30 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->com.cm_id = cm_id;
ref_cm_id(&ep->com);
+ cm_id->provider_data = ep;
ep->com.dev = dev;
ep->com.qp = get_qhp(dev, conn_param->qpn);
if (!ep->com.qp) {
- PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
+ pr_warn("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
err = -EINVAL;
goto fail2;
}
ref_qp(ep);
- PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
- ep->com.qp, cm_id);
+ pr_debug("qpn 0x%x qp %p cm_id %p\n", conn_param->qpn,
+ ep->com.qp, cm_id);
/*
* Allocate an active TID to initiate a TCP connection.
*/
ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
if (ep->atid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
+ pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM;
goto fail2;
}
- insert_handle(dev, &dev->atid_idr, ep, ep->atid);
+ err = xa_insert_irq(&dev->atids, ep->atid, ep, GFP_KERNEL);
+ if (err)
+ goto fail5;
memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
sizeof(ep->com.local_addr));
@@ -3237,13 +3386,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
err = pick_local_ipaddrs(dev, cm_id);
if (err)
- goto fail2;
+ goto fail3;
}
/* find a route */
- PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
- __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
- ra, ntohs(raddr->sin_port));
+ pr_debug("saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
+ &laddr->sin_addr, ntohs(laddr->sin_port),
+ ra, ntohs(raddr->sin_port));
ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
laddr->sin_addr.s_addr,
raddr->sin_addr.s_addr,
@@ -3259,23 +3408,23 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
err = pick_local_ip6addrs(dev, cm_id);
if (err)
- goto fail2;
+ goto fail3;
}
/* find a route */
- PDBG("%s saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
- __func__, laddr6->sin6_addr.s6_addr,
- ntohs(laddr6->sin6_port),
- raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
+ pr_debug("saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
+ laddr6->sin6_addr.s6_addr,
+ ntohs(laddr6->sin6_port),
+ raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
laddr6->sin6_addr.s6_addr,
raddr6->sin6_addr.s6_addr,
laddr6->sin6_port,
- raddr6->sin6_port, 0,
+ raddr6->sin6_port, cm_id->tos,
raddr6->sin6_scope_id);
}
if (!ep->dst) {
- printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
+ pr_err("%s - cannot find route\n", __func__);
err = -EHOSTUNREACH;
goto fail3;
}
@@ -3283,13 +3432,13 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
if (err) {
- printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
+ pr_err("%s - cannot alloc l2e\n", __func__);
goto fail4;
}
- PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
- __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
- ep->l2t->idx);
+ pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
+ ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
+ ep->l2t->idx);
state_set(&ep->com, CONNECTING);
ep->tos = cm_id->tos;
@@ -3303,7 +3452,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
fail4:
dst_release(ep->dst);
fail3:
- remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
+ xa_erase_irq(&ep->com.dev->atids, ep->atid);
+fail5:
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
fail2:
skb_queue_purge(&ep->com.ep_skb_list);
@@ -3326,14 +3476,14 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
if (err)
return err;
}
- c4iw_init_wr_wait(&ep->com.wr_wait);
+ c4iw_init_wr_wait(ep->com.wr_waitp);
err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
ep->stid, &sin6->sin6_addr,
sin6->sin6_port,
ep->com.dev->rdev.lldi.rxq_ids[0]);
if (!err)
err = c4iw_wait_for_reply(&ep->com.dev->rdev,
- &ep->com.wr_wait,
+ ep->com.wr_waitp,
0, 0, __func__);
else if (err > 0)
err = net_xmit_errno(err);
@@ -3369,13 +3519,13 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
}
} while (err == -EBUSY);
} else {
- c4iw_init_wr_wait(&ep->com.wr_wait);
+ c4iw_init_wr_wait(ep->com.wr_waitp);
err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0],
ep->stid, sin->sin_addr.s_addr, sin->sin_port,
0, ep->com.dev->rdev.lldi.rxq_ids[0]);
if (!err)
err = c4iw_wait_for_reply(&ep->com.dev->rdev,
- &ep->com.wr_wait,
+ ep->com.wr_waitp,
0, 0, __func__);
else if (err > 0)
err = net_xmit_errno(err);
@@ -3397,12 +3547,12 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
if (!ep) {
- printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ pr_err("%s - cannot alloc ep\n", __func__);
err = -ENOMEM;
goto fail1;
}
skb_queue_head_init(&ep->com.ep_skb_list);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("ep %p\n", ep);
ep->com.cm_id = cm_id;
ref_cm_id(&ep->com);
ep->com.dev = dev;
@@ -3422,14 +3572,13 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->m_local_addr.ss_family, ep);
if (ep->stid == -1) {
- printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
+ pr_err("%s - cannot alloc stid\n", __func__);
err = -ENOMEM;
goto fail2;
}
- insert_handle(dev, &dev->stid_idr, ep, ep->stid);
-
- memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
- sizeof(ep->com.local_addr));
+ err = xa_insert_irq(&dev->stids, ep->stid, ep, GFP_KERNEL);
+ if (err)
+ goto fail3;
state_set(&ep->com, LISTEN);
if (ep->com.local_addr.ss_family == AF_INET)
@@ -3440,7 +3589,8 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->provider_data = ep;
goto out;
}
-
+ xa_erase_irq(&ep->com.dev->stids, ep->stid);
+fail3:
cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
ep->com.local_addr.ss_family);
fail2:
@@ -3456,7 +3606,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
int err;
struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
- PDBG("%s ep %p\n", __func__, ep);
+ pr_debug("ep %p\n", ep);
might_sleep();
state_set(&ep->com, DEAD);
@@ -3464,22 +3614,23 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
ep->com.local_addr.ss_family == AF_INET) {
err = cxgb4_remove_server_filter(
ep->com.dev->rdev.lldi.ports[0], ep->stid,
- ep->com.dev->rdev.lldi.rxq_ids[0], 0);
+ ep->com.dev->rdev.lldi.rxq_ids[0], false);
} else {
struct sockaddr_in6 *sin6;
- c4iw_init_wr_wait(&ep->com.wr_wait);
+ c4iw_init_wr_wait(ep->com.wr_waitp);
err = cxgb4_remove_server(
ep->com.dev->rdev.lldi.ports[0], ep->stid,
- ep->com.dev->rdev.lldi.rxq_ids[0], 0);
+ ep->com.dev->rdev.lldi.rxq_ids[0],
+ ep->com.local_addr.ss_family == AF_INET6);
if (err)
goto done;
- err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait,
+ err = c4iw_wait_for_reply(&ep->com.dev->rdev, ep->com.wr_waitp,
0, 0, __func__);
sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&sin6->sin6_addr.s6_addr, 1);
}
- remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid);
+ xa_erase_irq(&ep->com.dev->stids, ep->stid);
cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
ep->com.local_addr.ss_family);
done:
@@ -3497,8 +3648,8 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
mutex_lock(&ep->com.mutex);
- PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
- states[ep->com.state], abrupt);
+ pr_debug("ep %p state %s, abrupt %d\n", ep,
+ states[ep->com.state], abrupt);
/*
* Ref the ep here in case we have fatal errors causing the
@@ -3551,18 +3702,17 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
case MORIBUND:
case ABORTING:
case DEAD:
- PDBG("%s ignoring disconnect ep %p state %u\n",
- __func__, ep, ep->com.state);
+ pr_debug("ignoring disconnect ep %p state %u\n",
+ ep, ep->com.state);
break;
default:
- BUG();
+ WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
break;
}
if (close) {
if (abrupt) {
set_bit(EP_DISC_ABORT, &ep->com.history);
- close_complete_upcall(ep, -ECONNRESET);
ret = send_abort(ep);
} else {
set_bit(EP_DISC_CLOSE, &ep->com.history);
@@ -3583,8 +3733,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
C4IW_QP_ATTR_NEXT_STATE,
&attrs, 1);
if (ret)
- pr_err(MOD
- "%s - qp <- error failed!\n",
+ pr_err("%s - qp <- error failed!\n",
__func__);
}
fatal = 1;
@@ -3615,6 +3764,7 @@ static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
send_fw_act_open_req(ep, atid);
return;
}
+ fallthrough;
case FW_EADDRINUSE:
set_bit(ACT_RETRY_INUSE, &ep->com.history);
if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
@@ -3640,7 +3790,7 @@ static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&sin6->sin6_addr.s6_addr, 1);
}
- remove_handle(dev, &dev->atid_idr, atid);
+ xa_erase_irq(&dev->atids, atid);
cxgb4_free_atid(dev->rdev.lldi.tids, atid);
dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t);
@@ -3655,9 +3805,8 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
int ret;
rpl_skb = (struct sk_buff *)(unsigned long)req->cookie;
- BUG_ON(!rpl_skb);
if (req->retval) {
- PDBG("%s passive open failure %d\n", __func__, req->retval);
+ pr_err("%s passive open failure %d\n", __func__, req->retval);
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.pas_ofld_conn_fails++;
mutex_unlock(&dev->rdev.stats.lock);
@@ -3674,6 +3823,80 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
return;
}
+static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word)
+{
+ u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]);
+ u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]);
+ u64 t;
+ u32 shift = 32;
+
+ t = (thi << shift) | (tlo >> shift);
+
+ return t;
+}
+
+static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift)
+{
+ u32 v;
+ u64 t = be64_to_cpu(tcb[(31 - word) / 2]);
+
+ if (word & 0x1)
+ shift += 32;
+ v = (t >> shift) & mask;
+ return v;
+}
+
+static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+ struct cpl_get_tcb_rpl *rpl = cplhdr(skb);
+ __be64 *tcb = (__be64 *)(rpl + 1);
+ unsigned int tid = GET_TID(rpl);
+ struct c4iw_ep *ep;
+ u64 t_flags_64;
+ u32 rx_pdu_out;
+
+ ep = get_ep_from_tid(dev, tid);
+ if (!ep)
+ return 0;
+ /* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to
+ * determine if there's a rx PDU feedback event pending.
+ *
+ * If that bit is set, it means we'll need to re-read the TCB's
+ * rq_start value. The final value is the one present in a TCB
+ * with the TF_RX_PDU_OUT bit cleared.
+ */
+
+ t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W);
+ rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S;
+
+ c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */
+ c4iw_put_ep(&ep->com); /* from read_tcb() */
+
+ /* If TF_RX_PDU_OUT bit is set, re-read the TCB */
+ if (rx_pdu_out) {
+ if (++ep->rx_pdu_out_cnt >= 2) {
+ WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n");
+ goto cleanup;
+ }
+ read_tcb(ep);
+ return 0;
+ }
+
+ ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_M,
+ TCB_RQ_START_S);
+cleanup:
+ pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx);
+
+ if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags))
+ finish_peer_abort(dev, ep);
+ else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags))
+ send_abort_req(ep);
+ else
+ WARN_ONCE(1, "unexpected state!");
+
+ return 0;
+}
+
static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_fw6_msg *rpl = cplhdr(skb);
@@ -3731,9 +3954,9 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
*/
memset(&tmp_opt, 0, sizeof(tmp_opt));
tcp_clear_options(&tmp_opt);
- tcp_parse_options(skb, &tmp_opt, 0, NULL);
+ tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
- req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req));
+ req = __skb_push(skb, sizeof(*req));
memset(req, 0, sizeof(*req));
req->l2info = cpu_to_be16(SYN_INTF_V(intf) |
SYN_MAC_IDX_V(RX_MACIDX_G(
@@ -3783,8 +4006,9 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
int ret;
req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
- req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req));
- memset(req, 0, sizeof(*req));
+ if (!req_skb)
+ return;
+ req = __skb_put_zero(req_skb, sizeof(*req));
req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F);
req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F);
@@ -3852,7 +4076,6 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
struct net_device *pdev;
u16 rss_qid, eth_hdr_len;
int step;
- u32 tx_chan;
struct neighbour *neigh;
/* Drop all non-SYN packets */
@@ -3873,7 +4096,8 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
if (!lep) {
- PDBG("%s connect request on invalid stid %d\n", __func__, stid);
+ pr_warn("%s connect request on invalid stid %d\n",
+ __func__, stid);
goto reject;
}
@@ -3899,7 +4123,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
} else {
vlan_eh = (struct vlan_ethhdr *)(req + 1);
iph = (struct iphdr *)(vlan_eh + 1);
- skb->vlan_tci = ntohs(cpl->vlan);
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan));
}
if (iph->version != 0x4)
@@ -3910,39 +4134,39 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
skb_set_transport_header(skb, (void *)tcph - (void *)rss);
skb_get(skb);
- PDBG("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__,
- ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
- ntohs(tcph->source), iph->tos);
+ pr_debug("lip 0x%x lport %u pip 0x%x pport %u tos %d\n",
+ ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
+ ntohs(tcph->source), iph->tos);
dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
iph->daddr, iph->saddr, tcph->dest,
tcph->source, iph->tos);
if (!dst) {
- pr_err("%s - failed to find dst entry!\n",
- __func__);
+ pr_err("%s - failed to find dst entry!\n", __func__);
goto reject;
}
neigh = dst_neigh_lookup_skb(dst, skb);
if (!neigh) {
- pr_err("%s - failed to allocate neigh!\n",
- __func__);
+ pr_err("%s - failed to allocate neigh!\n", __func__);
goto free_dst;
}
if (neigh->dev->flags & IFF_LOOPBACK) {
pdev = ip_dev_find(&init_net, iph->daddr);
+ if (!pdev) {
+ pr_err("%s - failed to find device!\n", __func__);
+ goto free_dst;
+ }
e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
pdev, 0);
pi = (struct port_info *)netdev_priv(pdev);
- tx_chan = cxgb4_port_chan(pdev);
dev_put(pdev);
} else {
pdev = get_real_dev(neigh->dev);
e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
pdev, 0);
pi = (struct port_info *)netdev_priv(pdev);
- tx_chan = cxgb4_port_chan(pdev);
}
neigh_release(neigh);
if (!e) {
@@ -3997,6 +4221,7 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = {
[CPL_CLOSE_CON_RPL] = close_con_rpl,
[CPL_RDMA_TERMINATE] = terminate,
[CPL_FW4_ACK] = fw4_ack,
+ [CPL_GET_TCB_RPL] = read_tcb_rpl,
[CPL_FW6_MSG] = deferred_fw6_msg,
[CPL_RX_PKT] = rx_pkt,
[FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe,
@@ -4009,8 +4234,7 @@ static void process_timeout(struct c4iw_ep *ep)
int abort = 1;
mutex_lock(&ep->com.mutex);
- PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
- ep->com.state);
+ pr_debug("ep %p tid %u state %d\n", ep, ep->hwtid, ep->com.state);
set_bit(TIMEDOUT, &ep->com.history);
switch (ep->com.state) {
case MPA_REQ_SENT:
@@ -4086,19 +4310,24 @@ static void process_work(struct work_struct *work)
dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
opcode = rpl->ot.opcode;
- BUG_ON(!work_handlers[opcode]);
- ret = work_handlers[opcode](dev, skb);
- if (!ret)
+ if (opcode >= ARRAY_SIZE(work_handlers) ||
+ !work_handlers[opcode]) {
+ pr_err("No handler for opcode 0x%x.\n", opcode);
kfree_skb(skb);
+ } else {
+ ret = work_handlers[opcode](dev, skb);
+ if (!ret)
+ kfree_skb(skb);
+ }
process_timedout_eps();
}
}
static DECLARE_WORK(skb_work, process_work);
-static void ep_timeout(unsigned long arg)
+static void ep_timeout(struct timer_list *t)
{
- struct c4iw_ep *ep = (struct c4iw_ep *)arg;
+ struct c4iw_ep *ep = timer_container_of(ep, t, timer);
int kickit = 0;
spin_lock(&timeout_lock);
@@ -4140,8 +4369,8 @@ static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
if (rpl->status != CPL_ERR_NONE) {
- printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
- "for tid %u\n", rpl->status, GET_TID(rpl));
+ pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
+ rpl->status, GET_TID(rpl));
}
kfree_skb(skb);
return 0;
@@ -4153,15 +4382,15 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
struct c4iw_wr_wait *wr_waitp;
int ret;
- PDBG("%s type %u\n", __func__, rpl->type);
+ pr_debug("type %u\n", rpl->type);
switch (rpl->type) {
case FW6_TYPE_WR_RPL:
ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
- PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
+ pr_debug("wr_waitp %p ret %u\n", wr_waitp, ret);
if (wr_waitp)
- c4iw_wake_up(wr_waitp, ret ? -ret : 0);
+ c4iw_wake_up_deref(wr_waitp, ret ? -ret : 0);
kfree_skb(skb);
break;
case FW6_TYPE_CQE:
@@ -4169,8 +4398,8 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
sched(dev, skb);
break;
default:
- printk(KERN_ERR MOD "%s unexpected fw6 msg type %u\n", __func__,
- rpl->type);
+ pr_err("%s unexpected fw6 msg type %u\n",
+ __func__, rpl->type);
kfree_skb(skb);
break;
}
@@ -4186,21 +4415,19 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
ep = get_ep_from_tid(dev, tid);
/* This EP will be dereferenced in peer_abort() */
if (!ep) {
- printk(KERN_WARNING MOD
- "Abort on non-existent endpoint, tid %d\n", tid);
+ pr_warn("Abort on non-existent endpoint, tid %d\n", tid);
kfree_skb(skb);
return 0;
}
if (cxgb_is_neg_adv(req->status)) {
- PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
- __func__, ep->hwtid, req->status,
- neg_adv_str(req->status));
+ pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
+ ep->hwtid, req->status,
+ neg_adv_str(req->status));
goto out;
}
- PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
- ep->com.state);
+ pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state);
- c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+ c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
out:
sched(dev, skb);
return 0;
@@ -4226,13 +4453,13 @@ c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
[CPL_RDMA_TERMINATE] = sched,
[CPL_FW4_ACK] = sched,
[CPL_SET_TCB_RPL] = set_tcb_rpl,
+ [CPL_GET_TCB_RPL] = sched,
[CPL_FW6_MSG] = fw6_msg,
[CPL_RX_PKT] = sched
};
int __init c4iw_cm_init(void)
{
- spin_lock_init(&timeout_lock);
skb_queue_head_init(&rxq);
workq = alloc_ordered_workqueue("iw_cxgb4", WQ_MEM_RECLAIM);
@@ -4245,6 +4472,5 @@ int __init c4iw_cm_init(void)
void c4iw_cm_term(void)
{
WARN_ON(!list_empty(&timeout_list));
- flush_workqueue(workq);
destroy_workqueue(workq);
}
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 19c6477af19f..14ced7b667fa 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -30,57 +30,57 @@
* SOFTWARE.
*/
+#include <rdma/uverbs_ioctl.h>
+
#include "iw_cxgb4.h"
-static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
- struct c4iw_dev_ucontext *uctx, struct sk_buff *skb)
+static void destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
+ struct c4iw_dev_ucontext *uctx, struct sk_buff *skb,
+ struct c4iw_wr_wait *wr_waitp)
{
struct fw_ri_res_wr *res_wr;
struct fw_ri_res *res;
int wr_len;
- struct c4iw_wr_wait wr_wait;
- int ret;
- wr_len = sizeof *res_wr + sizeof *res;
+ wr_len = sizeof(*res_wr) + sizeof(*res);
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
- res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
- memset(res_wr, 0, wr_len);
+ res_wr = __skb_put_zero(skb, wr_len);
res_wr->op_nres = cpu_to_be32(
FW_WR_OP_V(FW_RI_RES_WR) |
FW_RI_RES_WR_NRES_V(1) |
FW_WR_COMPL_F);
res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
- res_wr->cookie = (uintptr_t)&wr_wait;
+ res_wr->cookie = (uintptr_t)wr_waitp;
res = res_wr->res;
res->u.cq.restype = FW_RI_RES_TYPE_CQ;
res->u.cq.op = FW_RI_RES_OP_RESET;
res->u.cq.iqid = cpu_to_be32(cq->cqid);
- c4iw_init_wr_wait(&wr_wait);
- ret = c4iw_ofld_send(rdev, skb);
- if (!ret) {
- ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
- }
+ c4iw_init_wr_wait(wr_waitp);
+ c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
kfree(cq->sw_queue);
dma_free_coherent(&(rdev->lldi.pdev->dev),
cq->memsize, cq->queue,
dma_unmap_addr(cq, mapping));
c4iw_put_cqid(rdev, cq->cqid, uctx);
- return ret;
}
static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
- struct c4iw_dev_ucontext *uctx)
+ struct c4iw_dev_ucontext *uctx,
+ struct c4iw_wr_wait *wr_waitp)
{
struct fw_ri_res_wr *res_wr;
struct fw_ri_res *res;
int wr_len;
int user = (uctx != &rdev->uctx);
- struct c4iw_wr_wait wr_wait;
int ret;
struct sk_buff *skb;
+ struct c4iw_ucontext *ucontext = NULL;
+
+ if (user)
+ ucontext = container_of(uctx, struct c4iw_ucontext, uctx);
cq->cqid = c4iw_get_cqid(rdev, uctx);
if (!cq->cqid) {
@@ -102,10 +102,19 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
goto err3;
}
dma_unmap_addr_set(cq, mapping, cq->dma_addr);
- memset(cq->queue, 0, cq->memsize);
+
+ if (user && ucontext->is_32b_cqe) {
+ cq->qp_errp = &((struct t4_status_page *)
+ ((u8 *)cq->queue + (cq->size - 1) *
+ (sizeof(*cq->queue) / 2)))->qp_err;
+ } else {
+ cq->qp_errp = &((struct t4_status_page *)
+ ((u8 *)cq->queue + (cq->size - 1) *
+ sizeof(*cq->queue)))->qp_err;
+ }
/* build fw_ri_res_wr */
- wr_len = sizeof *res_wr + sizeof *res;
+ wr_len = sizeof(*res_wr) + sizeof(*res);
skb = alloc_skb(wr_len, GFP_KERNEL);
if (!skb) {
@@ -114,14 +123,13 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
}
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
- res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
- memset(res_wr, 0, wr_len);
+ res_wr = __skb_put_zero(skb, wr_len);
res_wr->op_nres = cpu_to_be32(
FW_WR_OP_V(FW_RI_RES_WR) |
FW_RI_RES_WR_NRES_V(1) |
FW_WR_COMPL_F);
res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
- res_wr->cookie = (uintptr_t)&wr_wait;
+ res_wr->cookie = (uintptr_t)wr_waitp;
res = res_wr->res;
res->u.cq.restype = FW_RI_RES_TYPE_CQ;
res->u.cq.op = FW_RI_RES_OP_WRITE;
@@ -137,17 +145,14 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
FW_RI_RES_WR_IQPCIECH_V(2) |
FW_RI_RES_WR_IQINTCNTTHRESH_V(0) |
FW_RI_RES_WR_IQO_F |
- FW_RI_RES_WR_IQESIZE_V(1));
+ ((user && ucontext->is_32b_cqe) ?
+ FW_RI_RES_WR_IQESIZE_V(1) :
+ FW_RI_RES_WR_IQESIZE_V(2)));
res->u.cq.iqsize = cpu_to_be16(cq->size);
res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
- c4iw_init_wr_wait(&wr_wait);
-
- ret = c4iw_ofld_send(rdev, skb);
- if (ret)
- goto err4;
- PDBG("%s wait_event wr_wait %p\n", __func__, &wr_wait);
- ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
+ c4iw_init_wr_wait(wr_waitp);
+ ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
if (ret)
goto err4;
@@ -155,11 +160,11 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
cq->gts = rdev->lldi.gts_reg;
cq->rdev = rdev;
- cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS,
+ cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, CXGB4_BAR2_QTYPE_INGRESS,
&cq->bar2_qid,
user ? &cq->bar2_pa : NULL);
if (user && !cq->bar2_pa) {
- pr_warn(MOD "%s: cqid %u not in BAR2 range.\n",
+ pr_warn("%s: cqid %u not in BAR2 range\n",
pci_name(rdev->lldi.pdev), cq->cqid);
ret = -EINVAL;
goto err4;
@@ -176,12 +181,12 @@ err1:
return ret;
}
-static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
+static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq, u32 srqidx)
{
struct t4_cqe cqe;
- PDBG("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
- wq, cq, cq->sw_cidx, cq->sw_pidx);
+ pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n",
+ wq, cq, cq->sw_cidx, cq->sw_pidx);
memset(&cqe, 0, sizeof(cqe));
cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
CQE_OPCODE_V(FW_RI_SEND) |
@@ -189,6 +194,8 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
CQE_SWCQE_V(1) |
CQE_QPID_V(wq->sq.qid));
cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
+ if (srqidx)
+ cqe.u.srcqe.abs_rqe_idx = cpu_to_be32(srqidx);
cq->sw_queue[cq->sw_pidx] = cqe;
t4_swcq_produce(cq);
}
@@ -198,11 +205,10 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
int flushed = 0;
int in_use = wq->rq.in_use - count;
- BUG_ON(in_use < 0);
- PDBG("%s wq %p cq %p rq.in_use %u skip count %u\n", __func__,
- wq, cq, wq->rq.in_use, count);
+ pr_debug("wq %p cq %p rq.in_use %u skip count %u\n",
+ wq, cq, wq->rq.in_use, count);
while (in_use--) {
- insert_recv_cqe(wq, cq);
+ insert_recv_cqe(wq, cq, 0);
flushed++;
}
return flushed;
@@ -213,8 +219,8 @@ static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
{
struct t4_cqe cqe;
- PDBG("%s wq %p cq %p sw_cidx %u sw_pidx %u\n", __func__,
- wq, cq, cq->sw_cidx, cq->sw_pidx);
+ pr_debug("wq %p cq %p sw_cidx %u sw_pidx %u\n",
+ wq, cq, cq->sw_cidx, cq->sw_pidx);
memset(&cqe, 0, sizeof(cqe));
cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
CQE_OPCODE_V(swcqe->opcode) |
@@ -241,14 +247,11 @@ int c4iw_flush_sq(struct c4iw_qp *qhp)
if (wq->sq.flush_cidx == -1)
wq->sq.flush_cidx = wq->sq.cidx;
idx = wq->sq.flush_cidx;
- BUG_ON(idx >= wq->sq.size);
while (idx != wq->sq.pidx) {
swsqe = &wq->sq.sw_sq[idx];
- BUG_ON(swsqe->flushed);
swsqe->flushed = 1;
insert_sq_cqe(wq, cq, swsqe);
if (wq->sq.oldest_read == swsqe) {
- BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
advance_oldest_read(wq);
}
flushed++;
@@ -269,7 +272,6 @@ static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
if (wq->sq.flush_cidx == -1)
wq->sq.flush_cidx = wq->sq.cidx;
cidx = wq->sq.flush_cidx;
- BUG_ON(cidx > wq->sq.size);
while (cidx != wq->sq.pidx) {
swsqe = &wq->sq.sw_sq[cidx];
@@ -278,13 +280,11 @@ static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
cidx = 0;
} else if (swsqe->complete) {
- BUG_ON(swsqe->flushed);
-
/*
* Insert this completed cqe into the swcq.
*/
- PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n",
- __func__, cidx, cq->sw_pidx);
+ pr_debug("moving cqe into swcq sq idx %u cq idx %u\n",
+ cidx, cq->sw_pidx);
swsqe->cqe.header |= htonl(CQE_SWCQE_V(1));
cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
t4_swcq_produce(cq);
@@ -332,14 +332,14 @@ static void advance_oldest_read(struct t4_wq *wq)
* Deal with out-of-order and/or completions that complete
* prior unsignalled WRs.
*/
-void c4iw_flush_hw_cq(struct c4iw_cq *chp)
+void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp)
{
struct t4_cqe *hw_cqe, *swcqe, read_cqe;
struct c4iw_qp *qhp;
struct t4_swsqe *swsqe;
int ret;
- PDBG("%s cqid 0x%x\n", __func__, chp->cq.cqid);
+ pr_debug("cqid 0x%x\n", chp->cq.cqid);
ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
/*
@@ -356,6 +356,13 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
if (qhp == NULL)
goto next_cqe;
+ if (flush_qhp != qhp) {
+ spin_lock(&qhp->lock);
+
+ if (qhp->wq.flushed == 1)
+ goto next_cqe;
+ }
+
if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE)
goto next_cqe;
@@ -407,11 +414,18 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp)
next_cqe:
t4_hwcq_consume(&chp->cq);
ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
+ if (qhp && flush_qhp != qhp)
+ spin_unlock(&qhp->lock);
}
}
static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
{
+ if (DRAIN_CQE(cqe)) {
+ WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid);
+ return 0;
+ }
+
if (CQE_OPCODE(cqe) == FW_RI_TERMINATE)
return 0;
@@ -432,7 +446,7 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
u32 ptr;
*count = 0;
- PDBG("%s count zero %d\n", __func__, *count);
+ pr_debug("count zero %d\n", *count);
ptr = cq->sw_cidx;
while (ptr != cq->sw_pidx) {
cqe = &cq->sw_queue[ptr];
@@ -442,7 +456,73 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
if (++ptr == cq->size)
ptr = 0;
}
- PDBG("%s cq %p count %d\n", __func__, cq, *count);
+ pr_debug("cq %p count %d\n", cq, *count);
+}
+
+static void post_pending_srq_wrs(struct t4_srq *srq)
+{
+ struct t4_srq_pending_wr *pwr;
+ u16 idx = 0;
+
+ while (srq->pending_in_use) {
+ pwr = &srq->pending_wrs[srq->pending_cidx];
+ srq->sw_rq[srq->pidx].wr_id = pwr->wr_id;
+ srq->sw_rq[srq->pidx].valid = 1;
+
+ pr_debug("%s posting pending cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
+ __func__,
+ srq->cidx, srq->pidx, srq->wq_pidx,
+ srq->in_use, srq->size,
+ (unsigned long long)pwr->wr_id);
+
+ c4iw_copy_wr_to_srq(srq, &pwr->wqe, pwr->len16);
+ t4_srq_consume_pending_wr(srq);
+ t4_srq_produce(srq, pwr->len16);
+ idx += DIV_ROUND_UP(pwr->len16 * 16, T4_EQ_ENTRY_SIZE);
+ }
+
+ if (idx) {
+ t4_ring_srq_db(srq, idx, pwr->len16, &pwr->wqe);
+ srq->queue[srq->size].status.host_wq_pidx =
+ srq->wq_pidx;
+ }
+}
+
+static u64 reap_srq_cqe(struct t4_cqe *hw_cqe, struct t4_srq *srq)
+{
+ int rel_idx = CQE_ABS_RQE_IDX(hw_cqe) - srq->rqt_abs_idx;
+ u64 wr_id;
+
+ srq->sw_rq[rel_idx].valid = 0;
+ wr_id = srq->sw_rq[rel_idx].wr_id;
+
+ if (rel_idx == srq->cidx) {
+ pr_debug("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
+ __func__, rel_idx, srq->cidx, srq->pidx,
+ srq->wq_pidx, srq->in_use, srq->size,
+ (unsigned long long)srq->sw_rq[rel_idx].wr_id);
+ t4_srq_consume(srq);
+ while (srq->ooo_count && !srq->sw_rq[srq->cidx].valid) {
+ pr_debug("%s eat ooo cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
+ __func__, srq->cidx, srq->pidx,
+ srq->wq_pidx, srq->in_use,
+ srq->size, srq->ooo_count,
+ (unsigned long long)
+ srq->sw_rq[srq->cidx].wr_id);
+ t4_srq_consume_ooo(srq);
+ }
+ if (srq->ooo_count == 0 && srq->pending_in_use)
+ post_pending_srq_wrs(srq);
+ } else {
+ pr_debug("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
+ __func__, rel_idx, srq->cidx,
+ srq->pidx, srq->wq_pidx,
+ srq->in_use, srq->size,
+ srq->ooo_count,
+ (unsigned long long)srq->sw_rq[rel_idx].wr_id);
+ t4_srq_produce_ooo(srq);
+ }
+ return wr_id;
}
/*
@@ -462,7 +542,8 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
* -EOVERFLOW CQ overflow detected.
*/
static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
- u8 *cqe_flushed, u64 *cookie, u32 *credit)
+ u8 *cqe_flushed, u64 *cookie, u32 *credit,
+ struct t4_srq *srq)
{
int ret = 0;
struct t4_cqe *hw_cqe, read_cqe;
@@ -473,12 +554,11 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
if (ret)
return ret;
- PDBG("%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x"
- " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
- __func__, CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe),
- CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe),
- CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
- CQE_WRID_LOW(hw_cqe));
+ pr_debug("CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
+ CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe),
+ CQE_GENBIT(hw_cqe), CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe),
+ CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
+ CQE_WRID_LOW(hw_cqe));
/*
* skip cqe's not affiliated with a QP.
@@ -505,6 +585,15 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
}
/*
+ * Special cqe for drain WR completions...
+ */
+ if (DRAIN_CQE(hw_cqe)) {
+ *cookie = CQE_DRAIN_COOKIE(hw_cqe);
+ *cqe = *hw_cqe;
+ goto skip_cqe;
+ }
+
+ /*
* Gotta tweak READ completions:
* 1) the cqe doesn't contain the sq_wptr from the wr.
* 2) opcode not reflected from the wr.
@@ -519,7 +608,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
*/
if (CQE_TYPE(hw_cqe) == 1) {
if (CQE_STATUS(hw_cqe))
- t4_set_wq_in_error(wq);
+ t4_set_wq_in_error(wq, 0);
ret = -EAGAIN;
goto skip_cqe;
}
@@ -530,7 +619,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
*/
if (CQE_WRID_STAG(hw_cqe) == 1) {
if (CQE_STATUS(hw_cqe))
- t4_set_wq_in_error(wq);
+ t4_set_wq_in_error(wq, 0);
ret = -EAGAIN;
goto skip_cqe;
}
@@ -555,7 +644,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
*cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
- t4_set_wq_in_error(wq);
+ t4_set_wq_in_error(wq, 0);
}
/*
@@ -569,16 +658,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
* then we complete this with T4_ERR_MSN and mark the wq in
* error.
*/
-
- if (t4_rq_empty(wq)) {
- t4_set_wq_in_error(wq);
- ret = -EAGAIN;
- goto skip_cqe;
- }
- if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
- t4_set_wq_in_error(wq);
- hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN));
- goto proc_cqe;
+ if (unlikely(!CQE_STATUS(hw_cqe) &&
+ CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
+ t4_set_wq_in_error(wq, 0);
+ hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
}
goto proc_cqe;
}
@@ -597,8 +680,8 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) {
struct t4_swsqe *swsqe;
- PDBG("%s out of order completion going in sw_sq at idx %u\n",
- __func__, CQE_WRID_SQ_IDX(hw_cqe));
+ pr_debug("out of order completion going in sw_sq at idx %u\n",
+ CQE_WRID_SQ_IDX(hw_cqe));
swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
swsqe->cqe = *hw_cqe;
swsqe->complete = 1;
@@ -615,7 +698,6 @@ proc_cqe:
*/
if (SQ_TYPE(hw_cqe)) {
int idx = CQE_WRID_SQ_IDX(hw_cqe);
- BUG_ON(idx >= wq->sq.size);
/*
* Account for any unsignaled completions completed by
@@ -629,21 +711,24 @@ proc_cqe:
wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx;
else
wq->sq.in_use -= idx - wq->sq.cidx;
- BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size);
wq->sq.cidx = (uint16_t)idx;
- PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx);
+ pr_debug("completing sq idx %u\n", wq->sq.cidx);
*cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
if (c4iw_wr_log)
c4iw_log_wr_stats(wq, hw_cqe);
t4_sq_consume(wq);
} else {
- PDBG("%s completing rq idx %u\n", __func__, wq->rq.cidx);
- *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
- BUG_ON(t4_rq_empty(wq));
- if (c4iw_wr_log)
- c4iw_log_wr_stats(wq, hw_cqe);
- t4_rq_consume(wq);
+ if (!srq) {
+ pr_debug("completing rq idx %u\n", wq->rq.cidx);
+ *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
+ if (c4iw_wr_log)
+ c4iw_log_wr_stats(wq, hw_cqe);
+ t4_rq_consume(wq);
+ } else {
+ *cookie = reap_srq_cqe(hw_cqe, srq);
+ }
+ wq->rq.msn++;
goto skip_cqe;
}
@@ -655,49 +740,29 @@ flush_wq:
skip_cqe:
if (SW_CQE(hw_cqe)) {
- PDBG("%s cq %p cqid 0x%x skip sw cqe cidx %u\n",
- __func__, cq, cq->cqid, cq->sw_cidx);
+ pr_debug("cq %p cqid 0x%x skip sw cqe cidx %u\n",
+ cq, cq->cqid, cq->sw_cidx);
t4_swcq_consume(cq);
} else {
- PDBG("%s cq %p cqid 0x%x skip hw cqe cidx %u\n",
- __func__, cq, cq->cqid, cq->cidx);
+ pr_debug("cq %p cqid 0x%x skip hw cqe cidx %u\n",
+ cq, cq->cqid, cq->cidx);
t4_hwcq_consume(cq);
}
return ret;
}
-/*
- * Get one cq entry from c4iw and map it to openib.
- *
- * Returns:
- * 0 cqe returned
- * -ENODATA EMPTY;
- * -EAGAIN caller must try again
- * any other -errno fatal error
- */
-static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
+static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
+ struct ib_wc *wc, struct c4iw_srq *srq)
{
- struct c4iw_qp *qhp = NULL;
- struct t4_cqe uninitialized_var(cqe), *rd_cqe;
- struct t4_wq *wq;
+ struct t4_cqe cqe;
+ struct t4_wq *wq = qhp ? &qhp->wq : NULL;
u32 credit = 0;
u8 cqe_flushed;
u64 cookie = 0;
int ret;
- ret = t4_next_cqe(&chp->cq, &rd_cqe);
-
- if (ret)
- return ret;
-
- qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
- if (!qhp)
- wq = NULL;
- else {
- spin_lock(&qhp->lock);
- wq = &(qhp->wq);
- }
- ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
+ ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit,
+ srq ? &srq->wq : NULL);
if (ret)
goto out;
@@ -706,25 +771,51 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
wc->vendor_err = CQE_STATUS(&cqe);
wc->wc_flags = 0;
- PDBG("%s qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x "
- "lo 0x%x cookie 0x%llx\n", __func__, CQE_QPID(&cqe),
- CQE_TYPE(&cqe), CQE_OPCODE(&cqe), CQE_STATUS(&cqe), CQE_LEN(&cqe),
- CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe), (unsigned long long)cookie);
+ /*
+ * Simulate a SRQ_LIMIT_REACHED HW notification if required.
+ */
+ if (srq && !(srq->flags & T4_SRQ_LIMIT_SUPPORT) && srq->armed &&
+ srq->wq.in_use < srq->srq_limit)
+ c4iw_dispatch_srq_limit_reached_event(srq);
+
+ pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
+ CQE_QPID(&cqe),
+ CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
+ CQE_STATUS(&cqe), CQE_LEN(&cqe),
+ CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe),
+ (unsigned long long)cookie);
if (CQE_TYPE(&cqe) == 0) {
if (!CQE_STATUS(&cqe))
wc->byte_len = CQE_LEN(&cqe);
else
wc->byte_len = 0;
- wc->opcode = IB_WC_RECV;
- if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV ||
- CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
+
+ switch (CQE_OPCODE(&cqe)) {
+ case FW_RI_SEND:
+ wc->opcode = IB_WC_RECV;
+ break;
+ case FW_RI_SEND_WITH_INV:
+ case FW_RI_SEND_WITH_SE_INV:
+ wc->opcode = IB_WC_RECV;
wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
+ break;
+ case FW_RI_WRITE_IMMEDIATE:
+ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ wc->ex.imm_data = CQE_IMM_DATA(&cqe);
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ break;
+ default:
+ pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
+ CQE_OPCODE(&cqe), CQE_QPID(&cqe));
+ ret = -EINVAL;
+ goto out;
}
} else {
switch (CQE_OPCODE(&cqe)) {
+ case FW_RI_WRITE_IMMEDIATE:
case FW_RI_RDMA_WRITE:
wc->opcode = IB_WC_RDMA_WRITE;
break;
@@ -754,8 +845,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
CQE_WRID_FR_STAG(&cqe));
break;
default:
- printk(KERN_ERR MOD "Unexpected opcode %d "
- "in the CQE received for QPID=0x%0x\n",
+ pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
CQE_OPCODE(&cqe), CQE_QPID(&cqe));
ret = -EINVAL;
goto out;
@@ -810,21 +900,48 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
wc->status = IB_WC_WR_FLUSH_ERR;
break;
default:
- printk(KERN_ERR MOD
- "Unexpected cqe_status 0x%x for QPID=0x%0x\n",
+ pr_err("Unexpected cqe_status 0x%x for QPID=0x%0x\n",
CQE_STATUS(&cqe), CQE_QPID(&cqe));
wc->status = IB_WC_FATAL_ERR;
}
}
out:
- if (wq) {
- if (unlikely(qhp->attr.state != C4IW_QP_STATE_RTS)) {
- if (t4_sq_empty(wq))
- complete(&qhp->sq_drained);
- if (t4_rq_empty(wq))
- complete(&qhp->rq_drained);
- }
+ return ret;
+}
+
+/*
+ * Get one cq entry from c4iw and map it to openib.
+ *
+ * Returns:
+ * 0 cqe returned
+ * -ENODATA EMPTY;
+ * -EAGAIN caller must try again
+ * any other -errno fatal error
+ */
+static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
+{
+ struct c4iw_srq *srq = NULL;
+ struct c4iw_qp *qhp = NULL;
+ struct t4_cqe *rd_cqe;
+ int ret;
+
+ ret = t4_next_cqe(&chp->cq, &rd_cqe);
+
+ if (ret)
+ return ret;
+
+ qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
+ if (qhp) {
+ spin_lock(&qhp->lock);
+ srq = qhp->srq;
+ if (srq)
+ spin_lock(&srq->lock);
+ ret = __c4iw_poll_cq_one(chp, qhp, wc, srq);
spin_unlock(&qhp->lock);
+ if (srq)
+ spin_unlock(&srq->lock);
+ } else {
+ ret = __c4iw_poll_cq_one(chp, NULL, wc, NULL);
}
return ret;
}
@@ -850,66 +967,79 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
return !err || err == -ENODATA ? npolled : err;
}
-int c4iw_destroy_cq(struct ib_cq *ib_cq)
+void c4iw_cq_rem_ref(struct c4iw_cq *chp)
+{
+ if (refcount_dec_and_test(&chp->refcnt))
+ complete(&chp->cq_rel_comp);
+}
+
+int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct c4iw_cq *chp;
struct c4iw_ucontext *ucontext;
- PDBG("%s ib_cq %p\n", __func__, ib_cq);
+ pr_debug("ib_cq %p\n", ib_cq);
chp = to_c4iw_cq(ib_cq);
- remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
- atomic_dec(&chp->refcnt);
- wait_event(chp->wait, !atomic_read(&chp->refcnt));
+ xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid);
+ c4iw_cq_rem_ref(chp);
+ wait_for_completion(&chp->cq_rel_comp);
- ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context)
- : NULL;
+ ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
+ ibucontext);
destroy_cq(&chp->rhp->rdev, &chp->cq,
ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx,
- chp->destroy_skb);
- chp->destroy_skb = NULL;
- kfree(chp);
+ chp->destroy_skb, chp->wr_waitp);
+ c4iw_put_wr_wait(chp->wr_waitp);
return 0;
}
-struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *ib_context,
- struct ib_udata *udata)
+int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
- struct c4iw_dev *rhp;
- struct c4iw_cq *chp;
+ struct c4iw_dev *rhp = to_c4iw_dev(ibcq->device);
+ struct c4iw_cq *chp = to_c4iw_cq(ibcq);
+ struct c4iw_create_cq ucmd;
struct c4iw_create_cq_resp uresp;
- struct c4iw_ucontext *ucontext = NULL;
int ret, wr_len;
size_t memsize, hwentries;
struct c4iw_mm_entry *mm, *mm2;
+ struct c4iw_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct c4iw_ucontext, ibucontext);
- PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries);
+ pr_debug("ib_dev %p entries %d\n", ibdev, entries);
if (attr->flags)
- return ERR_PTR(-EINVAL);
+ return -EOPNOTSUPP;
- rhp = to_c4iw_dev(ibdev);
+ if (entries < 1 || entries > ibdev->attrs.max_cqe)
+ return -EINVAL;
if (vector >= rhp->rdev.lldi.nciq)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
+
+ if (udata) {
+ if (udata->inlen < sizeof(ucmd))
+ ucontext->is_32b_cqe = 1;
+ }
- chp = kzalloc(sizeof(*chp), GFP_KERNEL);
- if (!chp)
- return ERR_PTR(-ENOMEM);
+ chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
+ if (!chp->wr_waitp) {
+ ret = -ENOMEM;
+ goto err_free_chp;
+ }
+ c4iw_init_wr_wait(chp->wr_waitp);
wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
chp->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
if (!chp->destroy_skb) {
ret = -ENOMEM;
- goto err1;
+ goto err_free_wr_wait;
}
- if (ib_context)
- ucontext = to_c4iw_ucontext(ib_context);
-
/* account for the status page. */
entries++;
@@ -933,41 +1063,46 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
if (hwentries < 64)
hwentries = 64;
- memsize = hwentries * sizeof *chp->cq.queue;
+ memsize = hwentries * ((ucontext && ucontext->is_32b_cqe) ?
+ (sizeof(*chp->cq.queue) / 2) : sizeof(*chp->cq.queue));
/*
* memsize must be a multiple of the page size if its a user cq.
*/
- if (ucontext)
+ if (udata)
memsize = roundup(memsize, PAGE_SIZE);
+
chp->cq.size = hwentries;
chp->cq.memsize = memsize;
chp->cq.vector = vector;
ret = create_cq(&rhp->rdev, &chp->cq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
+ chp->wr_waitp);
if (ret)
- goto err2;
+ goto err_free_skb;
chp->rhp = rhp;
chp->cq.size--; /* status page */
chp->ibcq.cqe = entries - 2;
spin_lock_init(&chp->lock);
spin_lock_init(&chp->comp_handler_lock);
- atomic_set(&chp->refcnt, 1);
- init_waitqueue_head(&chp->wait);
- ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
+ refcount_set(&chp->refcnt, 1);
+ init_completion(&chp->cq_rel_comp);
+ ret = xa_insert_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL);
if (ret)
- goto err3;
+ goto err_destroy_cq;
if (ucontext) {
- mm = kmalloc(sizeof *mm, GFP_KERNEL);
+ ret = -ENOMEM;
+ mm = kmalloc(sizeof(*mm), GFP_KERNEL);
if (!mm)
- goto err4;
- mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
+ goto err_remove_handle;
+ mm2 = kmalloc(sizeof(*mm2), GFP_KERNEL);
if (!mm2)
- goto err5;
+ goto err_free_mm;
+ memset(&uresp, 0, sizeof(uresp));
uresp.qid_mask = rhp->rdev.cqmask;
uresp.cqid = chp->cq.cqid;
uresp.size = chp->cq.size;
@@ -977,46 +1112,56 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
ucontext->key += PAGE_SIZE;
uresp.gts_key = ucontext->key;
ucontext->key += PAGE_SIZE;
+ /* communicate to the userspace that
+ * kernel driver supports 64B CQE
+ */
+ uresp.flags |= C4IW_64B_CQE;
+
spin_unlock(&ucontext->mmap_lock);
ret = ib_copy_to_udata(udata, &uresp,
- sizeof(uresp) - sizeof(uresp.reserved));
+ ucontext->is_32b_cqe ?
+ sizeof(uresp) - sizeof(uresp.flags) :
+ sizeof(uresp));
if (ret)
- goto err6;
+ goto err_free_mm2;
mm->key = uresp.key;
- mm->addr = virt_to_phys(chp->cq.queue);
+ mm->addr = 0;
+ mm->vaddr = chp->cq.queue;
+ mm->dma_addr = chp->cq.dma_addr;
mm->len = chp->cq.memsize;
+ insert_flag_to_mmap(&rhp->rdev, mm, mm->addr);
insert_mmap(ucontext, mm);
mm2->key = uresp.gts_key;
mm2->addr = chp->cq.bar2_pa;
mm2->len = PAGE_SIZE;
+ mm2->vaddr = NULL;
+ mm2->dma_addr = 0;
+ insert_flag_to_mmap(&rhp->rdev, mm2, mm2->addr);
insert_mmap(ucontext, mm2);
}
- PDBG("%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx\n",
- __func__, chp->cq.cqid, chp, chp->cq.size,
- chp->cq.memsize, (unsigned long long) chp->cq.dma_addr);
- return &chp->ibcq;
-err6:
+
+ pr_debug("cqid 0x%0x chp %p size %u memsize %zu, dma_addr %pad\n",
+ chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize,
+ &chp->cq.dma_addr);
+ return 0;
+err_free_mm2:
kfree(mm2);
-err5:
+err_free_mm:
kfree(mm);
-err4:
- remove_handle(rhp, &rhp->cqidr, chp->cq.cqid);
-err3:
+err_remove_handle:
+ xa_erase_irq(&rhp->cqs, chp->cq.cqid);
+err_destroy_cq:
destroy_cq(&chp->rhp->rdev, &chp->cq,
ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
- chp->destroy_skb);
-err2:
+ chp->destroy_skb, chp->wr_waitp);
+err_free_skb:
kfree_skb(chp->destroy_skb);
-err1:
- kfree(chp);
- return ERR_PTR(ret);
-}
-
-int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
-{
- return -ENOSYS;
+err_free_wr_wait:
+ c4iw_put_wr_wait(chp->wr_waitp);
+err_free_chp:
+ return ret;
}
int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
@@ -1034,3 +1179,19 @@ int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
spin_unlock_irqrestore(&chp->lock, flag);
return ret;
}
+
+void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx)
+{
+ struct c4iw_cq *rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
+ unsigned long flag;
+
+ /* locking heirarchy: cq lock first, then qp lock. */
+ spin_lock_irqsave(&rchp->lock, flag);
+ spin_lock(&qhp->lock);
+
+ /* create a SRQ RECV CQE for srqidx */
+ insert_recv_cqe(&qhp->wq, &rchp->cq, srqidx);
+
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&rchp->lock, flag);
+}
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 516b0ae6dc3f..d892f55febe2 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -44,7 +44,6 @@
MODULE_AUTHOR("Steve Wise");
MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver");
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION);
static int allow_db_fc_on_t5;
module_param(allow_db_fc_on_t5, int, 0644);
@@ -65,14 +64,9 @@ module_param(c4iw_wr_log_size_order, int, 0444);
MODULE_PARM_DESC(c4iw_wr_log_size_order,
"Number of entries (log2) in the work request timing log.");
-struct uld_ctx {
- struct list_head entry;
- struct cxgb4_lld_info lldi;
- struct c4iw_dev *dev;
-};
-
static LIST_HEAD(uld_ctx_list);
static DEFINE_MUTEX(dev_mutex);
+static struct workqueue_struct *reg_workq;
#define DB_FC_RESUME_SIZE 64
#define DB_FC_RESUME_DELAY 1
@@ -87,14 +81,6 @@ struct c4iw_debugfs_data {
int pos;
};
-static int count_idrs(int id, void *p, void *data)
-{
- int *countp = data;
-
- *countp = *countp + 1;
- return 0;
-}
-
static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
@@ -114,19 +100,19 @@ void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe)
idx = (atomic_inc_return(&wq->rdev->wr_log_idx) - 1) &
(wq->rdev->wr_log_size - 1);
le.poll_sge_ts = cxgb4_read_sge_timestamp(wq->rdev->lldi.ports[0]);
- getnstimeofday(&le.poll_host_ts);
+ le.poll_host_time = ktime_get();
le.valid = 1;
le.cqe_sge_ts = CQE_TS(cqe);
if (SQ_TYPE(cqe)) {
le.qid = wq->sq.qid;
le.opcode = CQE_OPCODE(cqe);
- le.post_host_ts = wq->sq.sw_sq[wq->sq.cidx].host_ts;
+ le.post_host_time = wq->sq.sw_sq[wq->sq.cidx].host_time;
le.post_sge_ts = wq->sq.sw_sq[wq->sq.cidx].sge_ts;
le.wr_id = CQE_WRID_SQ_IDX(cqe);
} else {
le.qid = wq->rq.qid;
le.opcode = FW_RI_RECEIVE;
- le.post_host_ts = wq->rq.sw_rq[wq->rq.cidx].host_ts;
+ le.post_host_time = wq->rq.sw_rq[wq->rq.cidx].host_time;
le.post_sge_ts = wq->rq.sw_rq[wq->rq.cidx].sge_ts;
le.wr_id = CQE_WRID_MSN(cqe);
}
@@ -136,9 +122,9 @@ void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe)
static int wr_log_show(struct seq_file *seq, void *v)
{
struct c4iw_dev *dev = seq->private;
- struct timespec prev_ts = {0, 0};
+ ktime_t prev_time;
struct wr_log_entry *lep;
- int prev_ts_set = 0;
+ int prev_time_set = 0;
int idx, end;
#define ts2ns(ts) div64_u64((ts) * dev->rdev.lldi.cclk_ps, 1000)
@@ -151,33 +137,29 @@ static int wr_log_show(struct seq_file *seq, void *v)
lep = &dev->rdev.wr_log[idx];
while (idx != end) {
if (lep->valid) {
- if (!prev_ts_set) {
- prev_ts_set = 1;
- prev_ts = lep->poll_host_ts;
+ if (!prev_time_set) {
+ prev_time_set = 1;
+ prev_time = lep->poll_host_time;
}
- seq_printf(seq, "%04u: sec %lu nsec %lu qid %u opcode "
- "%u %s 0x%x host_wr_delta sec %lu nsec %lu "
+ seq_printf(seq, "%04u: nsec %llu qid %u opcode "
+ "%u %s 0x%x host_wr_delta nsec %llu "
"post_sge_ts 0x%llx cqe_sge_ts 0x%llx "
"poll_sge_ts 0x%llx post_poll_delta_ns %llu "
"cqe_poll_delta_ns %llu\n",
idx,
- timespec_sub(lep->poll_host_ts,
- prev_ts).tv_sec,
- timespec_sub(lep->poll_host_ts,
- prev_ts).tv_nsec,
+ ktime_to_ns(ktime_sub(lep->poll_host_time,
+ prev_time)),
lep->qid, lep->opcode,
lep->opcode == FW_RI_RECEIVE ?
"msn" : "wrid",
lep->wr_id,
- timespec_sub(lep->poll_host_ts,
- lep->post_host_ts).tv_sec,
- timespec_sub(lep->poll_host_ts,
- lep->post_host_ts).tv_nsec,
+ ktime_to_ns(ktime_sub(lep->poll_host_time,
+ lep->post_host_time)),
lep->post_sge_ts, lep->cqe_sge_ts,
lep->poll_sge_ts,
ts2ns(lep->poll_sge_ts - lep->post_sge_ts),
ts2ns(lep->poll_sge_ts - lep->cqe_sge_ts));
- prev_ts = lep->poll_host_ts;
+ prev_time = lep->poll_host_time;
}
idx++;
if (idx > (dev->rdev.wr_log_size - 1))
@@ -214,13 +196,57 @@ static const struct file_operations wr_log_debugfs_fops = {
.write = wr_log_clear,
};
-static int dump_qp(int id, void *p, void *data)
+static struct sockaddr_in zero_sin = {
+ .sin_family = AF_INET,
+};
+
+static struct sockaddr_in6 zero_sin6 = {
+ .sin6_family = AF_INET6,
+};
+
+static void set_ep_sin_addrs(struct c4iw_ep *ep,
+ struct sockaddr_in **lsin,
+ struct sockaddr_in **rsin,
+ struct sockaddr_in **m_lsin,
+ struct sockaddr_in **m_rsin)
+{
+ struct iw_cm_id *id = ep->com.cm_id;
+
+ *m_lsin = (struct sockaddr_in *)&ep->com.local_addr;
+ *m_rsin = (struct sockaddr_in *)&ep->com.remote_addr;
+ if (id) {
+ *lsin = (struct sockaddr_in *)&id->local_addr;
+ *rsin = (struct sockaddr_in *)&id->remote_addr;
+ } else {
+ *lsin = &zero_sin;
+ *rsin = &zero_sin;
+ }
+}
+
+static void set_ep_sin6_addrs(struct c4iw_ep *ep,
+ struct sockaddr_in6 **lsin6,
+ struct sockaddr_in6 **rsin6,
+ struct sockaddr_in6 **m_lsin6,
+ struct sockaddr_in6 **m_rsin6)
+{
+ struct iw_cm_id *id = ep->com.cm_id;
+
+ *m_lsin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+ *m_rsin6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
+ if (id) {
+ *lsin6 = (struct sockaddr_in6 *)&id->local_addr;
+ *rsin6 = (struct sockaddr_in6 *)&id->remote_addr;
+ } else {
+ *lsin6 = &zero_sin6;
+ *rsin6 = &zero_sin6;
+ }
+}
+
+static int dump_qp(unsigned long id, struct c4iw_qp *qp,
+ struct c4iw_debugfs_data *qpd)
{
- struct c4iw_qp *qp = p;
- struct c4iw_debugfs_data *qpd = data;
int space;
int cc;
-
if (id != qp->wq.sq.qid)
return 0;
@@ -229,40 +255,36 @@ static int dump_qp(int id, void *p, void *data)
return 1;
if (qp->ep) {
- if (qp->ep->com.local_addr.ss_family == AF_INET) {
- struct sockaddr_in *lsin = (struct sockaddr_in *)
- &qp->ep->com.cm_id->local_addr;
- struct sockaddr_in *rsin = (struct sockaddr_in *)
- &qp->ep->com.cm_id->remote_addr;
- struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
- &qp->ep->com.cm_id->m_local_addr;
- struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
- &qp->ep->com.cm_id->m_remote_addr;
+ struct c4iw_ep *ep = qp->ep;
+ if (ep->com.local_addr.ss_family == AF_INET) {
+ struct sockaddr_in *lsin;
+ struct sockaddr_in *rsin;
+ struct sockaddr_in *m_lsin;
+ struct sockaddr_in *m_rsin;
+
+ set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin);
cc = snprintf(qpd->buf + qpd->pos, space,
- "rc qp sq id %u rq id %u state %u "
+ "rc qp sq id %u %s id %u state %u "
"onchip %u ep tid %u state %u "
"%pI4:%u/%u->%pI4:%u/%u\n",
- qp->wq.sq.qid, qp->wq.rq.qid,
+ qp->wq.sq.qid, qp->srq ? "srq" : "rq",
+ qp->srq ? qp->srq->idx : qp->wq.rq.qid,
(int)qp->attr.state,
qp->wq.sq.flags & T4_SQ_ONCHIP,
- qp->ep->hwtid, (int)qp->ep->com.state,
+ ep->hwtid, (int)ep->com.state,
&lsin->sin_addr, ntohs(lsin->sin_port),
- ntohs(mapped_lsin->sin_port),
+ ntohs(m_lsin->sin_port),
&rsin->sin_addr, ntohs(rsin->sin_port),
- ntohs(mapped_rsin->sin_port));
+ ntohs(m_rsin->sin_port));
} else {
- struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
- &qp->ep->com.cm_id->local_addr;
- struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
- &qp->ep->com.cm_id->remote_addr;
- struct sockaddr_in6 *mapped_lsin6 =
- (struct sockaddr_in6 *)
- &qp->ep->com.cm_id->m_local_addr;
- struct sockaddr_in6 *mapped_rsin6 =
- (struct sockaddr_in6 *)
- &qp->ep->com.cm_id->m_remote_addr;
+ struct sockaddr_in6 *lsin6;
+ struct sockaddr_in6 *rsin6;
+ struct sockaddr_in6 *m_lsin6;
+ struct sockaddr_in6 *m_rsin6;
+ set_ep_sin6_addrs(ep, &lsin6, &rsin6, &m_lsin6,
+ &m_rsin6);
cc = snprintf(qpd->buf + qpd->pos, space,
"rc qp sq id %u rq id %u state %u "
"onchip %u ep tid %u state %u "
@@ -270,13 +292,13 @@ static int dump_qp(int id, void *p, void *data)
qp->wq.sq.qid, qp->wq.rq.qid,
(int)qp->attr.state,
qp->wq.sq.flags & T4_SQ_ONCHIP,
- qp->ep->hwtid, (int)qp->ep->com.state,
+ ep->hwtid, (int)ep->com.state,
&lsin6->sin6_addr,
ntohs(lsin6->sin6_port),
- ntohs(mapped_lsin6->sin6_port),
+ ntohs(m_lsin6->sin6_port),
&rsin6->sin6_addr,
ntohs(rsin6->sin6_port),
- ntohs(mapped_rsin6->sin6_port));
+ ntohs(m_rsin6->sin6_port));
}
} else
cc = snprintf(qpd->buf + qpd->pos, space,
@@ -293,7 +315,7 @@ static int qp_release(struct inode *inode, struct file *file)
{
struct c4iw_debugfs_data *qpd = file->private_data;
if (!qpd) {
- printk(KERN_INFO "%s null qpd?\n", __func__);
+ pr_info("%s null qpd?\n", __func__);
return 0;
}
vfree(qpd->buf);
@@ -303,19 +325,24 @@ static int qp_release(struct inode *inode, struct file *file)
static int qp_open(struct inode *inode, struct file *file)
{
+ struct c4iw_qp *qp;
struct c4iw_debugfs_data *qpd;
+ unsigned long index;
int count = 1;
- qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
+ qpd = kmalloc(sizeof(*qpd), GFP_KERNEL);
if (!qpd)
return -ENOMEM;
qpd->devp = inode->i_private;
qpd->pos = 0;
- spin_lock_irq(&qpd->devp->lock);
- idr_for_each(&qpd->devp->qpidr, count_idrs, &count);
- spin_unlock_irq(&qpd->devp->lock);
+ /*
+ * No need to lock; we drop the lock to call vmalloc so it's racy
+ * anyway. Someone who cares should switch this over to seq_file
+ */
+ xa_for_each(&qpd->devp->qps, index, qp)
+ count++;
qpd->bufsize = count * 180;
qpd->buf = vmalloc(qpd->bufsize);
@@ -324,9 +351,10 @@ static int qp_open(struct inode *inode, struct file *file)
return -ENOMEM;
}
- spin_lock_irq(&qpd->devp->lock);
- idr_for_each(&qpd->devp->qpidr, dump_qp, qpd);
- spin_unlock_irq(&qpd->devp->lock);
+ xa_lock_irq(&qpd->devp->qps);
+ xa_for_each(&qpd->devp->qps, index, qp)
+ dump_qp(index, qp, qpd);
+ xa_unlock_irq(&qpd->devp->qps);
qpd->buf[qpd->pos++] = 0;
file->private_data = qpd;
@@ -341,9 +369,8 @@ static const struct file_operations qp_debugfs_fops = {
.llseek = default_llseek,
};
-static int dump_stag(int id, void *p, void *data)
+static int dump_stag(unsigned long id, struct c4iw_debugfs_data *stagd)
{
- struct c4iw_debugfs_data *stagd = data;
int space;
int cc;
struct fw_ri_tpte tpte;
@@ -381,7 +408,7 @@ static int stag_release(struct inode *inode, struct file *file)
{
struct c4iw_debugfs_data *stagd = file->private_data;
if (!stagd) {
- printk(KERN_INFO "%s null stagd?\n", __func__);
+ pr_info("%s null stagd?\n", __func__);
return 0;
}
vfree(stagd->buf);
@@ -392,10 +419,12 @@ static int stag_release(struct inode *inode, struct file *file)
static int stag_open(struct inode *inode, struct file *file)
{
struct c4iw_debugfs_data *stagd;
+ void *p;
+ unsigned long index;
int ret = 0;
int count = 1;
- stagd = kmalloc(sizeof *stagd, GFP_KERNEL);
+ stagd = kmalloc(sizeof(*stagd), GFP_KERNEL);
if (!stagd) {
ret = -ENOMEM;
goto out;
@@ -403,9 +432,8 @@ static int stag_open(struct inode *inode, struct file *file)
stagd->devp = inode->i_private;
stagd->pos = 0;
- spin_lock_irq(&stagd->devp->lock);
- idr_for_each(&stagd->devp->mmidr, count_idrs, &count);
- spin_unlock_irq(&stagd->devp->lock);
+ xa_for_each(&stagd->devp->mrs, index, p)
+ count++;
stagd->bufsize = count * 256;
stagd->buf = vmalloc(stagd->bufsize);
@@ -414,9 +442,10 @@ static int stag_open(struct inode *inode, struct file *file)
goto err1;
}
- spin_lock_irq(&stagd->devp->lock);
- idr_for_each(&stagd->devp->mmidr, dump_stag, stagd);
- spin_unlock_irq(&stagd->devp->lock);
+ xa_lock_irq(&stagd->devp->mrs);
+ xa_for_each(&stagd->devp->mrs, index, p)
+ dump_stag(index, stagd);
+ xa_unlock_irq(&stagd->devp->mrs);
stagd->buf[stagd->pos++] = 0;
file->private_data = stagd;
@@ -449,6 +478,9 @@ static int stats_show(struct seq_file *seq, void *v)
seq_printf(seq, " QID: %10llu %10llu %10llu %10llu\n",
dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
+ seq_printf(seq, " SRQS: %10llu %10llu %10llu %10llu\n",
+ dev->rdev.stats.srqt.total, dev->rdev.stats.srqt.cur,
+ dev->rdev.stats.srqt.max, dev->rdev.stats.srqt.fail);
seq_printf(seq, " TPTMEM: %10llu %10llu %10llu %10llu\n",
dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
@@ -499,6 +531,8 @@ static ssize_t stats_clear(struct file *file, const char __user *buf,
dev->rdev.stats.pbl.fail = 0;
dev->rdev.stats.rqt.max = 0;
dev->rdev.stats.rqt.fail = 0;
+ dev->rdev.stats.rqt.max = 0;
+ dev->rdev.stats.rqt.fail = 0;
dev->rdev.stats.ocqp.max = 0;
dev->rdev.stats.ocqp.fail = 0;
dev->rdev.stats.db_full = 0;
@@ -521,10 +555,8 @@ static const struct file_operations stats_debugfs_fops = {
.write = stats_clear,
};
-static int dump_ep(int id, void *p, void *data)
+static int dump_ep(struct c4iw_ep *ep, struct c4iw_debugfs_data *epd)
{
- struct c4iw_ep *ep = p;
- struct c4iw_debugfs_data *epd = data;
int space;
int cc;
@@ -533,15 +565,12 @@ static int dump_ep(int id, void *p, void *data)
return 1;
if (ep->com.local_addr.ss_family == AF_INET) {
- struct sockaddr_in *lsin = (struct sockaddr_in *)
- &ep->com.cm_id->local_addr;
- struct sockaddr_in *rsin = (struct sockaddr_in *)
- &ep->com.cm_id->remote_addr;
- struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
- &ep->com.cm_id->m_local_addr;
- struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
- &ep->com.cm_id->m_remote_addr;
+ struct sockaddr_in *lsin;
+ struct sockaddr_in *rsin;
+ struct sockaddr_in *m_lsin;
+ struct sockaddr_in *m_rsin;
+ set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin);
cc = snprintf(epd->buf + epd->pos, space,
"ep %p cm_id %p qp %p state %d flags 0x%lx "
"history 0x%lx hwtid %d atid %d "
@@ -553,19 +582,16 @@ static int dump_ep(int id, void *p, void *data)
ep->stats.connect_neg_adv,
ep->stats.abort_neg_adv,
&lsin->sin_addr, ntohs(lsin->sin_port),
- ntohs(mapped_lsin->sin_port),
+ ntohs(m_lsin->sin_port),
&rsin->sin_addr, ntohs(rsin->sin_port),
- ntohs(mapped_rsin->sin_port));
+ ntohs(m_rsin->sin_port));
} else {
- struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
- &ep->com.cm_id->local_addr;
- struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
- &ep->com.cm_id->remote_addr;
- struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
- &ep->com.cm_id->m_local_addr;
- struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *)
- &ep->com.cm_id->m_remote_addr;
+ struct sockaddr_in6 *lsin6;
+ struct sockaddr_in6 *rsin6;
+ struct sockaddr_in6 *m_lsin6;
+ struct sockaddr_in6 *m_rsin6;
+ set_ep_sin6_addrs(ep, &lsin6, &rsin6, &m_lsin6, &m_rsin6);
cc = snprintf(epd->buf + epd->pos, space,
"ep %p cm_id %p qp %p state %d flags 0x%lx "
"history 0x%lx hwtid %d atid %d "
@@ -577,19 +603,18 @@ static int dump_ep(int id, void *p, void *data)
ep->stats.connect_neg_adv,
ep->stats.abort_neg_adv,
&lsin6->sin6_addr, ntohs(lsin6->sin6_port),
- ntohs(mapped_lsin6->sin6_port),
+ ntohs(m_lsin6->sin6_port),
&rsin6->sin6_addr, ntohs(rsin6->sin6_port),
- ntohs(mapped_rsin6->sin6_port));
+ ntohs(m_rsin6->sin6_port));
}
if (cc < space)
epd->pos += cc;
return 0;
}
-static int dump_listen_ep(int id, void *p, void *data)
+static
+int dump_listen_ep(struct c4iw_listen_ep *ep, struct c4iw_debugfs_data *epd)
{
- struct c4iw_listen_ep *ep = p;
- struct c4iw_debugfs_data *epd = data;
int space;
int cc;
@@ -600,7 +625,7 @@ static int dump_listen_ep(int id, void *p, void *data)
if (ep->com.local_addr.ss_family == AF_INET) {
struct sockaddr_in *lsin = (struct sockaddr_in *)
&ep->com.cm_id->local_addr;
- struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+ struct sockaddr_in *m_lsin = (struct sockaddr_in *)
&ep->com.cm_id->m_local_addr;
cc = snprintf(epd->buf + epd->pos, space,
@@ -609,11 +634,11 @@ static int dump_listen_ep(int id, void *p, void *data)
ep, ep->com.cm_id, (int)ep->com.state,
ep->com.flags, ep->stid, ep->backlog,
&lsin->sin_addr, ntohs(lsin->sin_port),
- ntohs(mapped_lsin->sin_port));
+ ntohs(m_lsin->sin_port));
} else {
struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
&ep->com.cm_id->local_addr;
- struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
+ struct sockaddr_in6 *m_lsin6 = (struct sockaddr_in6 *)
&ep->com.cm_id->m_local_addr;
cc = snprintf(epd->buf + epd->pos, space,
@@ -622,7 +647,7 @@ static int dump_listen_ep(int id, void *p, void *data)
ep, ep->com.cm_id, (int)ep->com.state,
ep->com.flags, ep->stid, ep->backlog,
&lsin6->sin6_addr, ntohs(lsin6->sin6_port),
- ntohs(mapped_lsin6->sin6_port));
+ ntohs(m_lsin6->sin6_port));
}
if (cc < space)
epd->pos += cc;
@@ -643,6 +668,9 @@ static int ep_release(struct inode *inode, struct file *file)
static int ep_open(struct inode *inode, struct file *file)
{
+ struct c4iw_ep *ep;
+ struct c4iw_listen_ep *lep;
+ unsigned long index;
struct c4iw_debugfs_data *epd;
int ret = 0;
int count = 1;
@@ -655,11 +683,12 @@ static int ep_open(struct inode *inode, struct file *file)
epd->devp = inode->i_private;
epd->pos = 0;
- spin_lock_irq(&epd->devp->lock);
- idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count);
- idr_for_each(&epd->devp->atid_idr, count_idrs, &count);
- idr_for_each(&epd->devp->stid_idr, count_idrs, &count);
- spin_unlock_irq(&epd->devp->lock);
+ xa_for_each(&epd->devp->hwtids, index, ep)
+ count++;
+ xa_for_each(&epd->devp->atids, index, ep)
+ count++;
+ xa_for_each(&epd->devp->stids, index, lep)
+ count++;
epd->bufsize = count * 240;
epd->buf = vmalloc(epd->bufsize);
@@ -668,11 +697,18 @@ static int ep_open(struct inode *inode, struct file *file)
goto err1;
}
- spin_lock_irq(&epd->devp->lock);
- idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd);
- idr_for_each(&epd->devp->atid_idr, dump_ep, epd);
- idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd);
- spin_unlock_irq(&epd->devp->lock);
+ xa_lock_irq(&epd->devp->hwtids);
+ xa_for_each(&epd->devp->hwtids, index, ep)
+ dump_ep(ep, epd);
+ xa_unlock_irq(&epd->devp->hwtids);
+ xa_lock_irq(&epd->devp->atids);
+ xa_for_each(&epd->devp->atids, index, ep)
+ dump_ep(ep, epd);
+ xa_unlock_irq(&epd->devp->atids);
+ xa_lock_irq(&epd->devp->stids);
+ xa_for_each(&epd->devp->stids, index, lep)
+ dump_listen_ep(lep, epd);
+ xa_unlock_irq(&epd->devp->stids);
file->private_data = epd;
goto out;
@@ -689,11 +725,8 @@ static const struct file_operations ep_debugfs_fops = {
.read = debugfs_read,
};
-static int setup_debugfs(struct c4iw_dev *devp)
+static void setup_debugfs(struct c4iw_dev *devp)
{
- if (!devp->debugfs_root)
- return -1;
-
debugfs_create_file_size("qps", S_IWUSR, devp->debugfs_root,
(void *)devp, &qp_debugfs_fops, 4096);
@@ -709,7 +742,6 @@ static int setup_debugfs(struct c4iw_dev *devp)
if (c4iw_wr_log)
debugfs_create_file_size("wr_log", S_IWUSR, devp->debugfs_root,
(void *)devp, &wr_log_debugfs_fops, 4096);
- return 0;
}
void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
@@ -732,7 +764,7 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
kfree(entry);
}
- list_for_each_safe(pos, nxt, &uctx->qpids) {
+ list_for_each_safe(pos, nxt, &uctx->cqids) {
entry = list_entry(pos, struct c4iw_qid_list, entry);
list_del_init(&entry->entry);
kfree(entry);
@@ -752,6 +784,7 @@ void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
static int c4iw_rdev_open(struct c4iw_rdev *rdev)
{
int err;
+ unsigned int factor;
c4iw_init_dev_ucontext(rdev, &rdev->uctx);
@@ -761,40 +794,47 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
* cqid and qpid range must match for now.
*/
if (rdev->lldi.udb_density != rdev->lldi.ucq_density) {
- pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
+ pr_err("%s: unsupported udb/ucq densities %u/%u\n",
pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
rdev->lldi.ucq_density);
return -EINVAL;
}
if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
- pr_err(MOD "%s: unsupported qp and cq id ranges "
- "qp start %u size %u cq start %u size %u\n",
+ pr_err("%s: unsupported qp and cq id ranges qp start %u size %u cq start %u size %u\n",
pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
rdev->lldi.vr->cq.size);
return -EINVAL;
}
- rdev->qpmask = rdev->lldi.udb_density - 1;
- rdev->cqmask = rdev->lldi.ucq_density - 1;
- PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d "
- "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x "
- "qp qid start %u size %u cq qid start %u size %u\n",
- __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
- rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
- rdev->lldi.vr->pbl.start,
- rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
- rdev->lldi.vr->rq.size,
- rdev->lldi.vr->qp.start,
- rdev->lldi.vr->qp.size,
- rdev->lldi.vr->cq.start,
- rdev->lldi.vr->cq.size);
- PDBG("udb %pR db_reg %p gts_reg %p "
- "qpmask 0x%x cqmask 0x%x\n",
- &rdev->lldi.pdev->resource[2],
- rdev->lldi.db_reg, rdev->lldi.gts_reg,
- rdev->qpmask, rdev->cqmask);
+ /* This implementation requires a sge_host_page_size <= PAGE_SIZE. */
+ if (rdev->lldi.sge_host_page_size > PAGE_SIZE) {
+ pr_err("%s: unsupported sge host page size %u\n",
+ pci_name(rdev->lldi.pdev),
+ rdev->lldi.sge_host_page_size);
+ return -EINVAL;
+ }
+
+ factor = PAGE_SIZE / rdev->lldi.sge_host_page_size;
+ rdev->qpmask = (rdev->lldi.udb_density * factor) - 1;
+ rdev->cqmask = (rdev->lldi.ucq_density * factor) - 1;
+
+ pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n",
+ pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
+ rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
+ rdev->lldi.vr->pbl.start,
+ rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
+ rdev->lldi.vr->rq.size,
+ rdev->lldi.vr->qp.start,
+ rdev->lldi.vr->qp.size,
+ rdev->lldi.vr->cq.start,
+ rdev->lldi.vr->cq.size,
+ rdev->lldi.vr->srq.size);
+ pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n",
+ &rdev->lldi.pdev->resource[2],
+ rdev->lldi.db_reg, rdev->lldi.gts_reg,
+ rdev->qpmask, rdev->cqmask);
if (c4iw_num_stags(rdev) == 0)
return -EINVAL;
@@ -803,27 +843,29 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->stats.stag.total = rdev->lldi.vr->stag.size;
rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
+ rdev->stats.srqt.total = rdev->lldi.vr->srq.size;
rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
rdev->stats.qid.total = rdev->lldi.vr->qp.size;
- err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
+ err = c4iw_init_resource(rdev, c4iw_num_stags(rdev),
+ T4_MAX_NUM_PD, rdev->lldi.vr->srq.size);
if (err) {
- printk(KERN_ERR MOD "error %d initializing resources\n", err);
+ pr_err("error %d initializing resources\n", err);
return err;
}
err = c4iw_pblpool_create(rdev);
if (err) {
- printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
+ pr_err("error %d initializing pbl pool\n", err);
goto destroy_resource;
}
err = c4iw_rqtpool_create(rdev);
if (err) {
- printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
+ pr_err("error %d initializing rqt pool\n", err);
goto destroy_pblpool;
}
err = c4iw_ocqp_pool_create(rdev);
if (err) {
- printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
+ pr_err("error %d initializing ocqp pool\n", err);
goto destroy_rqtpool;
}
rdev->status_page = (struct t4_dev_status_page *)
@@ -836,19 +878,35 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
rdev->status_page->cq_size = rdev->lldi.vr->cq.size;
+ rdev->status_page->write_cmpl_supported = rdev->lldi.write_cmpl_support;
if (c4iw_wr_log) {
- rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) *
- sizeof(*rdev->wr_log), GFP_KERNEL);
+ rdev->wr_log = kcalloc(1 << c4iw_wr_log_size_order,
+ sizeof(*rdev->wr_log),
+ GFP_KERNEL);
if (rdev->wr_log) {
rdev->wr_log_size = 1 << c4iw_wr_log_size_order;
atomic_set(&rdev->wr_log_idx, 0);
}
}
+ rdev->free_workq = create_singlethread_workqueue("iw_cxgb4_free");
+ if (!rdev->free_workq) {
+ err = -ENOMEM;
+ goto err_free_status_page_and_wr_log;
+ }
+
rdev->status_page->db_off = 0;
+ init_completion(&rdev->rqt_compl);
+ init_completion(&rdev->pbl_compl);
+ kref_init(&rdev->rqt_kref);
+ kref_init(&rdev->pbl_kref);
+
return 0;
+err_free_status_page_and_wr_log:
+ kfree(rdev->wr_log);
+ free_page((unsigned long)rdev->status_page);
destroy_ocqp_pool:
c4iw_ocqp_pool_destroy(rdev);
destroy_rqtpool:
@@ -863,25 +921,26 @@ destroy_resource:
static void c4iw_rdev_close(struct c4iw_rdev *rdev)
{
kfree(rdev->wr_log);
+ c4iw_release_dev_ucontext(rdev, &rdev->uctx);
free_page((unsigned long)rdev->status_page);
c4iw_pblpool_destroy(rdev);
c4iw_rqtpool_destroy(rdev);
+ wait_for_completion(&rdev->pbl_compl);
+ wait_for_completion(&rdev->rqt_compl);
+ c4iw_ocqp_pool_destroy(rdev);
+ destroy_workqueue(rdev->free_workq);
c4iw_destroy_resource(&rdev->resource);
}
-static void c4iw_dealloc(struct uld_ctx *ctx)
+void c4iw_dealloc(struct uld_ctx *ctx)
{
c4iw_rdev_close(&ctx->dev->rdev);
- WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr));
- idr_destroy(&ctx->dev->cqidr);
- WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr));
- idr_destroy(&ctx->dev->qpidr);
- WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr));
- idr_destroy(&ctx->dev->mmidr);
- wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr));
- idr_destroy(&ctx->dev->hwtid_idr);
- idr_destroy(&ctx->dev->stid_idr);
- idr_destroy(&ctx->dev->atid_idr);
+ WARN_ON(!xa_empty(&ctx->dev->cqs));
+ WARN_ON(!xa_empty(&ctx->dev->qps));
+ WARN_ON(!xa_empty(&ctx->dev->mrs));
+ wait_event(ctx->dev->wait, xa_empty(&ctx->dev->hwtids));
+ WARN_ON(!xa_empty(&ctx->dev->stids));
+ WARN_ON(!xa_empty(&ctx->dev->atids));
if (ctx->dev->rdev.bar2_kva)
iounmap(ctx->dev->rdev.bar2_kva);
if (ctx->dev->rdev.oc_mw_kva)
@@ -892,7 +951,8 @@ static void c4iw_dealloc(struct uld_ctx *ctx)
static void c4iw_remove(struct uld_ctx *ctx)
{
- PDBG("%s c4iw_dev %p\n", __func__, ctx->dev);
+ pr_debug("c4iw_dev %p\n", ctx->dev);
+ debugfs_remove_recursive(ctx->dev->debugfs_root);
c4iw_unregister_device(ctx->dev);
c4iw_dealloc(ctx);
}
@@ -910,28 +970,28 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
int ret;
if (!rdma_supported(infop)) {
- printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n",
- pci_name(infop->pdev));
+ pr_info("%s: RDMA not supported on this device\n",
+ pci_name(infop->pdev));
return ERR_PTR(-ENOSYS);
}
if (!ocqp_supported(infop))
- pr_info("%s: On-Chip Queues not supported on this device.\n",
+ pr_info("%s: On-Chip Queues not supported on this device\n",
pci_name(infop->pdev));
- devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
+ devp = ib_alloc_device(c4iw_dev, ibdev);
if (!devp) {
- printk(KERN_ERR MOD "Cannot allocate ib device\n");
+ pr_err("Cannot allocate ib device\n");
return ERR_PTR(-ENOMEM);
}
devp->rdev.lldi = *infop;
/* init various hw-queue params based on lld info */
- PDBG("%s: Ing. padding boundary is %d, egrsstatuspagesize = %d\n",
- __func__, devp->rdev.lldi.sge_ingpadboundary,
- devp->rdev.lldi.sge_egrstatuspagesize);
+ pr_debug("Ing. padding boundary is %d, egrsstatuspagesize = %d\n",
+ devp->rdev.lldi.sge_ingpadboundary,
+ devp->rdev.lldi.sge_egrstatuspagesize);
devp->rdev.hw_queue.t4_eq_status_entries =
- devp->rdev.lldi.sge_ingpadboundary > 64 ? 2 : 1;
+ devp->rdev.lldi.sge_egrstatuspagesize / 64;
devp->rdev.hw_queue.t4_max_eq_size = 65520;
devp->rdev.hw_queue.t4_max_iq_size = 65520;
devp->rdev.hw_queue.t4_max_rq_size = 8192 -
@@ -956,7 +1016,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
pci_resource_len(devp->rdev.lldi.pdev, 2));
if (!devp->rdev.bar2_kva) {
- pr_err(MOD "Unable to ioremap BAR2\n");
+ pr_err("Unable to ioremap BAR2\n");
ib_dealloc_device(&devp->ibdev);
return ERR_PTR(-EINVAL);
}
@@ -968,31 +1028,29 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
devp->rdev.lldi.vr->ocq.size);
if (!devp->rdev.oc_mw_kva) {
- pr_err(MOD "Unable to ioremap onchip mem\n");
+ pr_err("Unable to ioremap onchip mem\n");
ib_dealloc_device(&devp->ibdev);
return ERR_PTR(-EINVAL);
}
}
- PDBG(KERN_INFO MOD "ocq memory: "
- "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
- devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
- devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
+ pr_debug("ocq memory: hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
+ devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
+ devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
ret = c4iw_rdev_open(&devp->rdev);
if (ret) {
- printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
+ pr_err("Unable to open CXIO rdev err %d\n", ret);
ib_dealloc_device(&devp->ibdev);
return ERR_PTR(ret);
}
- idr_init(&devp->cqidr);
- idr_init(&devp->qpidr);
- idr_init(&devp->mmidr);
- idr_init(&devp->hwtid_idr);
- idr_init(&devp->stid_idr);
- idr_init(&devp->atid_idr);
- spin_lock_init(&devp->lock);
+ xa_init_flags(&devp->cqs, XA_FLAGS_LOCK_IRQ);
+ xa_init_flags(&devp->qps, XA_FLAGS_LOCK_IRQ);
+ xa_init_flags(&devp->mrs, XA_FLAGS_LOCK_IRQ);
+ xa_init_flags(&devp->hwtids, XA_FLAGS_LOCK_IRQ);
+ xa_init_flags(&devp->atids, XA_FLAGS_LOCK_IRQ);
+ xa_init_flags(&devp->stids, XA_FLAGS_LOCK_IRQ);
mutex_init(&devp->rdev.stats.lock);
mutex_init(&devp->db_mutex);
INIT_LIST_HEAD(&devp->db_fc_list);
@@ -1020,24 +1078,24 @@ static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
pr_info("Chelsio T4/T5 RDMA Driver - version %s\n",
DRV_VERSION);
- ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx) {
ctx = ERR_PTR(-ENOMEM);
goto out;
}
ctx->lldi = *infop;
- PDBG("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
- __func__, pci_name(ctx->lldi.pdev),
- ctx->lldi.nchan, ctx->lldi.nrxq,
- ctx->lldi.ntxq, ctx->lldi.nports);
+ pr_debug("found device %s nchan %u nrxq %u ntxq %u nports %u\n",
+ pci_name(ctx->lldi.pdev),
+ ctx->lldi.nchan, ctx->lldi.nrxq,
+ ctx->lldi.ntxq, ctx->lldi.nports);
mutex_lock(&dev_mutex);
list_add_tail(&ctx->entry, &uld_ctx_list);
mutex_unlock(&dev_mutex);
for (i = 0; i < ctx->lldi.nrxq; i++)
- PDBG("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
+ pr_debug("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
out:
return ctx;
}
@@ -1055,13 +1113,15 @@ static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
* The math here assumes sizeof cpl_pass_accept_req >= sizeof
* cpl_rx_pkt.
*/
- skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) +
- sizeof(struct rss_header) - pktshift, GFP_ATOMIC);
+ skb = alloc_skb(size_add(gl->tot_len,
+ sizeof(struct cpl_pass_accept_req) +
+ sizeof(struct rss_header)) - pktshift,
+ GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
- __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) +
- sizeof(struct rss_header) - pktshift);
+ __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) +
+ sizeof(struct rss_header) - pktshift);
/*
* This skb will contain:
@@ -1094,8 +1154,7 @@ static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl,
goto out;
if (c4iw_handlers[opcode] == NULL) {
- pr_info("%s no handler opcode 0x%x...\n", __func__,
- opcode);
+ pr_info("%s no handler opcode 0x%x...\n", __func__, opcode);
kfree_skb(skb);
goto out;
}
@@ -1132,13 +1191,11 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
if (recv_rx_pkt(dev, gl, rsp))
return 0;
- pr_info("%s: unexpected FL contents at %p, " \
- "RSS %#llx, FL %#llx, len %u\n",
- pci_name(ctx->lldi.pdev), gl->va,
- (unsigned long long)be64_to_cpu(*rsp),
- (unsigned long long)be64_to_cpu(
- *(__force __be64 *)gl->va),
- gl->tot_len);
+ pr_info("%s: unexpected FL contents at %p, RSS %#llx, FL %#llx, len %u\n",
+ pci_name(ctx->lldi.pdev), gl->va,
+ be64_to_cpu(*rsp),
+ be64_to_cpu(*(__force __be64 *)gl->va),
+ gl->tot_len);
return 0;
} else {
@@ -1151,8 +1208,7 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
if (c4iw_handlers[opcode]) {
c4iw_handlers[opcode](dev, skb);
} else {
- pr_info("%s no handler opcode 0x%x...\n", __func__,
- opcode);
+ pr_info("%s no handler opcode 0x%x...\n", __func__, opcode);
kfree_skb(skb);
}
@@ -1165,45 +1221,35 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
{
struct uld_ctx *ctx = handle;
- PDBG("%s new_state %u\n", __func__, new_state);
+ pr_debug("new_state %u\n", new_state);
switch (new_state) {
case CXGB4_STATE_UP:
- printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
+ pr_info("%s: Up\n", pci_name(ctx->lldi.pdev));
if (!ctx->dev) {
- int ret;
-
ctx->dev = c4iw_alloc(&ctx->lldi);
if (IS_ERR(ctx->dev)) {
- printk(KERN_ERR MOD
- "%s: initialization failed: %ld\n",
- pci_name(ctx->lldi.pdev),
- PTR_ERR(ctx->dev));
+ pr_err("%s: initialization failed: %pe\n",
+ pci_name(ctx->lldi.pdev), ctx->dev);
ctx->dev = NULL;
break;
}
- ret = c4iw_register_device(ctx->dev);
- if (ret) {
- printk(KERN_ERR MOD
- "%s: RDMA registration failed: %d\n",
- pci_name(ctx->lldi.pdev), ret);
- c4iw_dealloc(ctx);
- }
+
+ INIT_WORK(&ctx->reg_work, c4iw_register_device);
+ queue_work(reg_workq, &ctx->reg_work);
}
break;
case CXGB4_STATE_DOWN:
- printk(KERN_INFO MOD "%s: Down\n",
- pci_name(ctx->lldi.pdev));
+ pr_info("%s: Down\n", pci_name(ctx->lldi.pdev));
if (ctx->dev)
c4iw_remove(ctx);
break;
+ case CXGB4_STATE_FATAL_ERROR:
case CXGB4_STATE_START_RECOVERY:
- printk(KERN_INFO MOD "%s: Fatal Error\n",
- pci_name(ctx->lldi.pdev));
+ pr_info("%s: Fatal Error\n", pci_name(ctx->lldi.pdev));
if (ctx->dev) {
- struct ib_event event;
+ struct ib_event event = {};
ctx->dev->rdev.flags |= T4_FATAL_ERROR;
- memset(&event, 0, sizeof event);
event.event = IB_EVENT_DEVICE_FATAL;
event.device = &ctx->dev->ibdev;
ib_dispatch_event(&event);
@@ -1211,8 +1257,7 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
}
break;
case CXGB4_STATE_DETACH:
- printk(KERN_INFO MOD "%s: Detach\n",
- pci_name(ctx->lldi.pdev));
+ pr_info("%s: Detach\n", pci_name(ctx->lldi.pdev));
if (ctx->dev)
c4iw_remove(ctx);
break;
@@ -1220,34 +1265,21 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
return 0;
}
-static int disable_qp_db(int id, void *p, void *data)
-{
- struct c4iw_qp *qp = p;
-
- t4_disable_wq_db(&qp->wq);
- return 0;
-}
-
static void stop_queues(struct uld_ctx *ctx)
{
- unsigned long flags;
+ struct c4iw_qp *qp;
+ unsigned long index, flags;
- spin_lock_irqsave(&ctx->dev->lock, flags);
+ xa_lock_irqsave(&ctx->dev->qps, flags);
ctx->dev->rdev.stats.db_state_transitions++;
ctx->dev->db_state = STOPPED;
- if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED)
- idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
- else
+ if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) {
+ xa_for_each(&ctx->dev->qps, index, qp)
+ t4_disable_wq_db(&qp->wq);
+ } else {
ctx->dev->rdev.status_page->db_off = 1;
- spin_unlock_irqrestore(&ctx->dev->lock, flags);
-}
-
-static int enable_qp_db(int id, void *p, void *data)
-{
- struct c4iw_qp *qp = p;
-
- t4_enable_wq_db(&qp->wq);
- return 0;
+ }
+ xa_unlock_irqrestore(&ctx->dev->qps, flags);
}
static void resume_rc_qp(struct c4iw_qp *qp)
@@ -1277,18 +1309,21 @@ static void resume_a_chunk(struct uld_ctx *ctx)
static void resume_queues(struct uld_ctx *ctx)
{
- spin_lock_irq(&ctx->dev->lock);
+ xa_lock_irq(&ctx->dev->qps);
if (ctx->dev->db_state != STOPPED)
goto out;
ctx->dev->db_state = FLOW_CONTROL;
while (1) {
if (list_empty(&ctx->dev->db_fc_list)) {
+ struct c4iw_qp *qp;
+ unsigned long index;
+
WARN_ON(ctx->dev->db_state != FLOW_CONTROL);
ctx->dev->db_state = NORMAL;
ctx->dev->rdev.stats.db_state_transitions++;
if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) {
- idr_for_each(&ctx->dev->qpidr, enable_qp_db,
- NULL);
+ xa_for_each(&ctx->dev->qps, index, qp)
+ t4_enable_wq_db(&qp->wq);
} else {
ctx->dev->rdev.status_page->db_off = 0;
}
@@ -1300,12 +1335,12 @@ static void resume_queues(struct uld_ctx *ctx)
resume_a_chunk(ctx);
}
if (!list_empty(&ctx->dev->db_fc_list)) {
- spin_unlock_irq(&ctx->dev->lock);
+ xa_unlock_irq(&ctx->dev->qps);
if (DB_FC_RESUME_DELAY) {
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(DB_FC_RESUME_DELAY);
}
- spin_lock_irq(&ctx->dev->lock);
+ xa_lock_irq(&ctx->dev->qps);
if (ctx->dev->db_state != FLOW_CONTROL)
break;
}
@@ -1314,7 +1349,7 @@ static void resume_queues(struct uld_ctx *ctx)
out:
if (ctx->dev->db_state != NORMAL)
ctx->dev->rdev.stats.db_fc_interruptions++;
- spin_unlock_irq(&ctx->dev->lock);
+ xa_unlock_irq(&ctx->dev->qps);
}
struct qp_list {
@@ -1322,23 +1357,6 @@ struct qp_list {
struct c4iw_qp **qps;
};
-static int add_and_ref_qp(int id, void *p, void *data)
-{
- struct qp_list *qp_listp = data;
- struct c4iw_qp *qp = p;
-
- c4iw_qp_add_ref(&qp->ibqp);
- qp_listp->qps[qp_listp->idx++] = qp;
- return 0;
-}
-
-static int count_qps(int id, void *p, void *data)
-{
- unsigned *countp = data;
- (*countp)++;
- return 0;
-}
-
static void deref_qps(struct qp_list *qp_list)
{
int idx;
@@ -1355,19 +1373,17 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
for (idx = 0; idx < qp_list->idx; idx++) {
struct c4iw_qp *qp = qp_list->qps[idx];
- spin_lock_irq(&qp->rhp->lock);
+ xa_lock_irq(&qp->rhp->qps);
spin_lock(&qp->lock);
ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
qp->wq.sq.qid,
t4_sq_host_wq_pidx(&qp->wq),
t4_sq_wq_size(&qp->wq));
if (ret) {
- pr_err(MOD "%s: Fatal error - "
- "DB overflow recovery failed - "
- "error syncing SQ qid %u\n",
+ pr_err("%s: Fatal error - DB overflow recovery failed - error syncing SQ qid %u\n",
pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
spin_unlock(&qp->lock);
- spin_unlock_irq(&qp->rhp->lock);
+ xa_unlock_irq(&qp->rhp->qps);
return;
}
qp->wq.sq.wq_pidx_inc = 0;
@@ -1378,17 +1394,15 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
t4_rq_wq_size(&qp->wq));
if (ret) {
- pr_err(MOD "%s: Fatal error - "
- "DB overflow recovery failed - "
- "error syncing RQ qid %u\n",
+ pr_err("%s: Fatal error - DB overflow recovery failed - error syncing RQ qid %u\n",
pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
spin_unlock(&qp->lock);
- spin_unlock_irq(&qp->rhp->lock);
+ xa_unlock_irq(&qp->rhp->qps);
return;
}
qp->wq.rq.wq_pidx_inc = 0;
spin_unlock(&qp->lock);
- spin_unlock_irq(&qp->rhp->lock);
+ xa_unlock_irq(&qp->rhp->qps);
/* Wait for the dbfifo to drain */
while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
@@ -1400,6 +1414,8 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
static void recover_queues(struct uld_ctx *ctx)
{
+ struct c4iw_qp *qp;
+ unsigned long index;
int count = 0;
struct qp_list qp_list;
int ret;
@@ -1411,28 +1427,32 @@ static void recover_queues(struct uld_ctx *ctx)
/* flush the SGE contexts */
ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
if (ret) {
- printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
+ pr_err("%s: Fatal error - DB overflow recovery failed\n",
pci_name(ctx->lldi.pdev));
return;
}
/* Count active queues so we can build a list of queues to recover */
- spin_lock_irq(&ctx->dev->lock);
+ xa_lock_irq(&ctx->dev->qps);
WARN_ON(ctx->dev->db_state != STOPPED);
ctx->dev->db_state = RECOVERY;
- idr_for_each(&ctx->dev->qpidr, count_qps, &count);
+ xa_for_each(&ctx->dev->qps, index, qp)
+ count++;
- qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
+ qp_list.qps = kcalloc(count, sizeof(*qp_list.qps), GFP_ATOMIC);
if (!qp_list.qps) {
- spin_unlock_irq(&ctx->dev->lock);
+ xa_unlock_irq(&ctx->dev->qps);
return;
}
qp_list.idx = 0;
/* add and ref each qp so it doesn't get freed */
- idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
+ xa_for_each(&ctx->dev->qps, index, qp) {
+ c4iw_qp_add_ref(&qp->ibqp);
+ qp_list.qps[qp_list.idx++] = qp;
+ }
- spin_unlock_irq(&ctx->dev->lock);
+ xa_unlock_irq(&ctx->dev->qps);
/* now traverse the list in a safe context to recover the db state*/
recover_lost_dbs(ctx, &qp_list);
@@ -1441,10 +1461,10 @@ static void recover_queues(struct uld_ctx *ctx)
deref_qps(&qp_list);
kfree(qp_list.qps);
- spin_lock_irq(&ctx->dev->lock);
+ xa_lock_irq(&ctx->dev->qps);
WARN_ON(ctx->dev->db_state != RECOVERY);
ctx->dev->db_state = STOPPED;
- spin_unlock_irq(&ctx->dev->lock);
+ xa_unlock_irq(&ctx->dev->qps);
}
static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
@@ -1469,8 +1489,8 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
mutex_unlock(&ctx->dev->rdev.stats.lock);
break;
default:
- printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
- pci_name(ctx->lldi.pdev), control);
+ pr_warn("%s: unknown control cmd %u\n",
+ pci_name(ctx->lldi.pdev), control);
break;
}
return 0;
@@ -1489,6 +1509,27 @@ static struct cxgb4_uld_info c4iw_uld_info = {
.control = c4iw_uld_control,
};
+void _c4iw_free_wr_wait(struct kref *kref)
+{
+ struct c4iw_wr_wait *wr_waitp;
+
+ wr_waitp = container_of(kref, struct c4iw_wr_wait, kref);
+ pr_debug("Free wr_wait %p\n", wr_waitp);
+ kfree(wr_waitp);
+}
+
+struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp)
+{
+ struct c4iw_wr_wait *wr_waitp;
+
+ wr_waitp = kzalloc(sizeof(*wr_waitp), gfp);
+ if (wr_waitp) {
+ kref_init(&wr_waitp->kref);
+ pr_debug("wr_wait %p\n", wr_waitp);
+ }
+ return wr_waitp;
+}
+
static int __init c4iw_init_module(void)
{
int err;
@@ -1498,9 +1539,12 @@ static int __init c4iw_init_module(void)
return err;
c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
- if (!c4iw_debugfs_root)
- printk(KERN_WARNING MOD
- "could not create debugfs entry, continuing\n");
+
+ reg_workq = create_singlethread_workqueue("Register_iWARP_device");
+ if (!reg_workq) {
+ pr_err("Failed creating workqueue to register iwarp device\n");
+ return -ENOMEM;
+ }
cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
@@ -1518,6 +1562,7 @@ static void __exit c4iw_exit_module(void)
kfree(ctx);
}
mutex_unlock(&dev_mutex);
+ destroy_workqueue(reg_workq);
cxgb4_unregister_uld(CXGB4_ULD_RDMA);
c4iw_cm_term();
debugfs_remove_recursive(c4iw_debugfs_root);
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index bdfac2ccb704..34211a533d5c 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -47,17 +47,16 @@ static void print_tpte(struct c4iw_dev *dev, u32 stag)
"%s cxgb4_read_tpte err %d\n", __func__, ret);
return;
}
- PDBG("stag idx 0x%x valid %d key 0x%x state %d pdid %d "
- "perm 0x%x ps %d len 0x%llx va 0x%llx\n",
- stag & 0xffffff00,
- FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_STAGKEY_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)),
- FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)),
- FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)),
- ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo),
- ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo));
+ pr_debug("stag idx 0x%x valid %d key 0x%x state %d pdid %d perm 0x%x ps %d len 0x%llx va 0x%llx\n",
+ stag & 0xffffff00,
+ FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_STAGKEY_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)),
+ FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)),
+ FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)),
+ ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo),
+ ((u64)ntohl(tpte.va_hi) << 32) | ntohl(tpte.va_lo_fbo));
}
static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
@@ -71,9 +70,10 @@ static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), ntohl(err_cqe->len),
CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe));
- PDBG("%016llx %016llx %016llx %016llx\n",
- be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]),
- be64_to_cpu(p[3]));
+ pr_debug("%016llx %016llx %016llx %016llx - %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]),
+ be64_to_cpu(p[3]), be64_to_cpu(p[4]), be64_to_cpu(p[5]),
+ be64_to_cpu(p[6]), be64_to_cpu(p[7]));
/*
* Ingress WRITE and READ_RESP errors provide
@@ -110,9 +110,11 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp,
if (qhp->ibqp.event_handler)
(*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
- spin_lock_irqsave(&chp->comp_handler_lock, flag);
- (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
- spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
+ if (t4_clear_cq_armed(&chp->cq)) {
+ spin_lock_irqsave(&chp->comp_handler_lock, flag);
+ (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+ spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
+ }
}
void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
@@ -121,16 +123,15 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
struct c4iw_qp *qhp;
u32 cqid;
- spin_lock_irq(&dev->lock);
- qhp = get_qhp(dev, CQE_QPID(err_cqe));
+ xa_lock_irq(&dev->qps);
+ qhp = xa_load(&dev->qps, CQE_QPID(err_cqe));
if (!qhp) {
- printk(KERN_ERR MOD "BAD AE qpid 0x%x opcode %d "
- "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ pr_err("BAD AE qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
CQE_QPID(err_cqe),
CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
CQE_WRID_LOW(err_cqe));
- spin_unlock_irq(&dev->lock);
+ xa_unlock_irq(&dev->qps);
goto out;
}
@@ -140,19 +141,18 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
cqid = qhp->attr.rcq;
chp = get_chp(dev, cqid);
if (!chp) {
- printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d "
- "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ pr_err("BAD AE cqid 0x%x qpid 0x%x opcode %d status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
cqid, CQE_QPID(err_cqe),
CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
CQE_WRID_LOW(err_cqe));
- spin_unlock_irq(&dev->lock);
+ xa_unlock_irq(&dev->qps);
goto out;
}
c4iw_qp_add_ref(&qhp->ibqp);
- atomic_inc(&chp->refcnt);
- spin_unlock_irq(&dev->lock);
+ refcount_inc(&chp->refcnt);
+ xa_unlock_irq(&dev->qps);
/* Bad incoming write */
if (RQ_TYPE(err_cqe) &&
@@ -165,7 +165,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
/* Completion Events */
case T4_ERR_SUCCESS:
- printk(KERN_ERR MOD "AE with status 0!\n");
+ pr_err("AE with status 0!\n");
break;
case T4_ERR_STAG:
@@ -207,14 +207,13 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
break;
default:
- printk(KERN_ERR MOD "Unknown T4 status 0x%x QPID 0x%x\n",
+ pr_err("Unknown T4 status 0x%x QPID 0x%x\n",
CQE_STATUS(err_cqe), qhp->wq.sq.qid);
post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_FATAL);
break;
}
done:
- if (atomic_dec_and_test(&chp->refcnt))
- wake_up(&chp->wait);
+ c4iw_cq_rem_ref(chp);
c4iw_qp_rem_ref(&qhp->ibqp);
out:
return;
@@ -225,20 +224,19 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
struct c4iw_cq *chp;
unsigned long flag;
- spin_lock_irqsave(&dev->lock, flag);
- chp = get_chp(dev, qid);
+ xa_lock_irqsave(&dev->cqs, flag);
+ chp = xa_load(&dev->cqs, qid);
if (chp) {
- atomic_inc(&chp->refcnt);
- spin_unlock_irqrestore(&dev->lock, flag);
+ refcount_inc(&chp->refcnt);
+ xa_unlock_irqrestore(&dev->cqs, flag);
t4_clear_cq_armed(&chp->cq);
spin_lock_irqsave(&chp->comp_handler_lock, flag);
(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
- if (atomic_dec_and_test(&chp->refcnt))
- wake_up(&chp->wait);
+ c4iw_cq_rem_ref(chp);
} else {
- PDBG("%s unknown cqid 0x%x\n", __func__, qid);
- spin_unlock_irqrestore(&dev->lock, flag);
+ pr_debug("unknown cqid 0x%x\n", qid);
+ xa_unlock_irqrestore(&dev->cqs, flag);
}
return 0;
}
diff --git a/drivers/infiniband/hw/cxgb4/id_table.c b/drivers/infiniband/hw/cxgb4/id_table.c
index 0161ae6ad629..e2188b335e76 100644
--- a/drivers/infiniband/hw/cxgb4/id_table.c
+++ b/drivers/infiniband/hw/cxgb4/id_table.c
@@ -54,12 +54,12 @@ u32 c4iw_id_alloc(struct c4iw_id_table *alloc)
if (obj < alloc->max) {
if (alloc->flags & C4IW_ID_TABLE_F_RANDOM)
- alloc->last += prandom_u32() % RANDOM_SKIP;
+ alloc->last += get_random_u32_below(RANDOM_SKIP);
else
alloc->last = obj + 1;
if (alloc->last >= alloc->max)
alloc->last = 0;
- set_bit(obj, alloc->table);
+ __set_bit(obj, alloc->table);
obj += alloc->start;
} else
obj = -1;
@@ -73,40 +73,34 @@ void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj)
unsigned long flags;
obj -= alloc->start;
- BUG_ON((int)obj < 0);
spin_lock_irqsave(&alloc->lock, flags);
- clear_bit(obj, alloc->table);
+ __clear_bit(obj, alloc->table);
spin_unlock_irqrestore(&alloc->lock, flags);
}
int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num,
u32 reserved, u32 flags)
{
- int i;
-
alloc->start = start;
alloc->flags = flags;
if (flags & C4IW_ID_TABLE_F_RANDOM)
- alloc->last = prandom_u32() % RANDOM_SKIP;
+ alloc->last = get_random_u32_below(RANDOM_SKIP);
else
alloc->last = 0;
- alloc->max = num;
+ alloc->max = num;
spin_lock_init(&alloc->lock);
- alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof(long),
- GFP_KERNEL);
+ alloc->table = bitmap_zalloc(num, GFP_KERNEL);
if (!alloc->table)
return -ENOMEM;
- bitmap_zero(alloc->table, num);
if (!(alloc->flags & C4IW_ID_TABLE_F_EMPTY))
- for (i = 0; i < reserved; ++i)
- set_bit(i, alloc->table);
+ bitmap_set(alloc->table, 0, reserved);
return 0;
}
void c4iw_id_table_free(struct c4iw_id_table *alloc)
{
- kfree(alloc->table);
+ bitmap_free(alloc->table);
}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 4788e1a46fde..e17c1252536b 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -34,10 +34,10 @@
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/spinlock.h>
-#include <linux/idr.h>
+#include <linux/xarray.h>
#include <linux/completion.h>
#include <linux/netdevice.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/inet.h>
@@ -45,6 +45,7 @@
#include <linux/kref.h>
#include <linux/timer.h>
#include <linux/io.h>
+#include <linux/workqueue.h>
#include <asm/byteorder.h>
@@ -54,6 +55,7 @@
#include <rdma/iw_cm.h>
#include <rdma/rdma_netlink.h>
#include <rdma/iw_portmap.h>
+#include <rdma/restrack.h>
#include "cxgb4.h"
#include "cxgb4_uld.h"
@@ -63,12 +65,11 @@
#define DRV_NAME "iw_cxgb4"
#define MOD DRV_NAME ":"
-extern int c4iw_debug;
-#define PDBG(fmt, args...) \
-do { \
- if (c4iw_debug) \
- printk(MOD fmt, ## args); \
-} while (0)
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include "t4.h"
@@ -96,6 +97,7 @@ struct c4iw_resource {
struct c4iw_id_table tpt_table;
struct c4iw_id_table qid_table;
struct c4iw_id_table pdid_table;
+ struct c4iw_id_table srq_table;
};
struct c4iw_qid_list {
@@ -107,6 +109,7 @@ struct c4iw_dev_ucontext {
struct list_head qpids;
struct list_head cqids;
struct mutex lock;
+ struct kref kref;
};
enum c4iw_rdev_flags {
@@ -128,6 +131,8 @@ struct c4iw_stats {
struct c4iw_stat stag;
struct c4iw_stat pbl;
struct c4iw_stat rqt;
+ struct c4iw_stat srqt;
+ struct c4iw_stat srq;
struct c4iw_stat ocqp;
u64 db_full;
u64 db_empty;
@@ -152,8 +157,8 @@ struct c4iw_hw_queue {
};
struct wr_log_entry {
- struct timespec post_host_ts;
- struct timespec poll_host_ts;
+ ktime_t post_host_time;
+ ktime_t poll_host_time;
u64 post_sge_ts;
u64 cqe_sge_ts;
u64 poll_sge_ts;
@@ -183,6 +188,11 @@ struct c4iw_rdev {
atomic_t wr_log_idx;
struct wr_log_entry *wr_log;
int wr_log_size;
+ struct workqueue_struct *free_workq;
+ struct completion rqt_compl;
+ struct completion pbl_compl;
+ struct kref rqt_kref;
+ struct kref pbl_kref;
};
static inline int c4iw_fatal_error(struct c4iw_rdev *rdev)
@@ -200,18 +210,50 @@ static inline int c4iw_num_stags(struct c4iw_rdev *rdev)
struct c4iw_wr_wait {
struct completion completion;
int ret;
+ struct kref kref;
};
+void _c4iw_free_wr_wait(struct kref *kref);
+
+static inline void c4iw_put_wr_wait(struct c4iw_wr_wait *wr_waitp)
+{
+ pr_debug("wr_wait %p ref before put %u\n", wr_waitp,
+ kref_read(&wr_waitp->kref));
+ WARN_ON(kref_read(&wr_waitp->kref) == 0);
+ kref_put(&wr_waitp->kref, _c4iw_free_wr_wait);
+}
+
+static inline void c4iw_get_wr_wait(struct c4iw_wr_wait *wr_waitp)
+{
+ pr_debug("wr_wait %p ref before get %u\n", wr_waitp,
+ kref_read(&wr_waitp->kref));
+ WARN_ON(kref_read(&wr_waitp->kref) == 0);
+ kref_get(&wr_waitp->kref);
+}
+
static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp)
{
wr_waitp->ret = 0;
init_completion(&wr_waitp->completion);
}
-static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret)
+static inline void _c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret,
+ bool deref)
{
wr_waitp->ret = ret;
complete(&wr_waitp->completion);
+ if (deref)
+ c4iw_put_wr_wait(wr_waitp);
+}
+
+static inline void c4iw_wake_up_noref(struct c4iw_wr_wait *wr_waitp, int ret)
+{
+ _c4iw_wake_up(wr_waitp, ret, false);
+}
+
+static inline void c4iw_wake_up_deref(struct c4iw_wr_wait *wr_waitp, int ret)
+{
+ _c4iw_wake_up(wr_waitp, ret, true);
}
static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
@@ -228,18 +270,40 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
ret = wait_for_completion_timeout(&wr_waitp->completion, C4IW_WR_TO);
if (!ret) {
- PDBG("%s - Device %s not responding (disabling device) - tid %u qpid %u\n",
- func, pci_name(rdev->lldi.pdev), hwtid, qpid);
+ pr_err("%s - Device %s not responding (disabling device) - tid %u qpid %u\n",
+ func, pci_name(rdev->lldi.pdev), hwtid, qpid);
rdev->flags |= T4_FATAL_ERROR;
wr_waitp->ret = -EIO;
+ goto out;
}
-out:
if (wr_waitp->ret)
- PDBG("%s: FW reply %d tid %u qpid %u\n",
- pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
+ pr_debug("%s: FW reply %d tid %u qpid %u\n",
+ pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
+out:
return wr_waitp->ret;
}
+int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb);
+
+static inline int c4iw_ref_send_wait(struct c4iw_rdev *rdev,
+ struct sk_buff *skb,
+ struct c4iw_wr_wait *wr_waitp,
+ u32 hwtid, u32 qpid,
+ const char *func)
+{
+ int ret;
+
+ pr_debug("%s wr_wait %p hwtid %u qpid %u\n", func, wr_waitp, hwtid,
+ qpid);
+ c4iw_get_wr_wait(wr_waitp);
+ ret = c4iw_ofld_send(rdev, skb);
+ if (ret) {
+ c4iw_put_wr_wait(wr_waitp);
+ return ret;
+ }
+ return c4iw_wait_for_reply(rdev, wr_waitp, hwtid, qpid, func);
+}
+
enum db_state {
NORMAL = 0,
FLOW_CONTROL = 1,
@@ -250,99 +314,40 @@ enum db_state {
struct c4iw_dev {
struct ib_device ibdev;
struct c4iw_rdev rdev;
- u32 device_cap_flags;
- struct idr cqidr;
- struct idr qpidr;
- struct idr mmidr;
- spinlock_t lock;
+ struct xarray cqs;
+ struct xarray qps;
+ struct xarray mrs;
struct mutex db_mutex;
struct dentry *debugfs_root;
enum db_state db_state;
- struct idr hwtid_idr;
- struct idr atid_idr;
- struct idr stid_idr;
+ struct xarray hwtids;
+ struct xarray atids;
+ struct xarray stids;
struct list_head db_fc_list;
u32 avail_ird;
wait_queue_head_t wait;
};
+struct uld_ctx {
+ struct list_head entry;
+ struct cxgb4_lld_info lldi;
+ struct c4iw_dev *dev;
+ struct work_struct reg_work;
+};
+
static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
{
return container_of(ibdev, struct c4iw_dev, ibdev);
}
-static inline struct c4iw_dev *rdev_to_c4iw_dev(struct c4iw_rdev *rdev)
-{
- return container_of(rdev, struct c4iw_dev, rdev);
-}
-
static inline struct c4iw_cq *get_chp(struct c4iw_dev *rhp, u32 cqid)
{
- return idr_find(&rhp->cqidr, cqid);
+ return xa_load(&rhp->cqs, cqid);
}
static inline struct c4iw_qp *get_qhp(struct c4iw_dev *rhp, u32 qpid)
{
- return idr_find(&rhp->qpidr, qpid);
-}
-
-static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid)
-{
- return idr_find(&rhp->mmidr, mmid);
-}
-
-static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr,
- void *handle, u32 id, int lock)
-{
- int ret;
-
- if (lock) {
- idr_preload(GFP_KERNEL);
- spin_lock_irq(&rhp->lock);
- }
-
- ret = idr_alloc(idr, handle, id, id + 1, GFP_ATOMIC);
-
- if (lock) {
- spin_unlock_irq(&rhp->lock);
- idr_preload_end();
- }
-
- BUG_ON(ret == -ENOSPC);
- return ret < 0 ? ret : 0;
-}
-
-static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
- void *handle, u32 id)
-{
- return _insert_handle(rhp, idr, handle, id, 1);
-}
-
-static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr,
- void *handle, u32 id)
-{
- return _insert_handle(rhp, idr, handle, id, 0);
-}
-
-static inline void _remove_handle(struct c4iw_dev *rhp, struct idr *idr,
- u32 id, int lock)
-{
- if (lock)
- spin_lock_irq(&rhp->lock);
- idr_remove(idr, id);
- if (lock)
- spin_unlock_irq(&rhp->lock);
-}
-
-static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id)
-{
- _remove_handle(rhp, idr, id, 1);
-}
-
-static inline void remove_handle_nolock(struct c4iw_dev *rhp,
- struct idr *idr, u32 id)
-{
- _remove_handle(rhp, idr, id, 0);
+ return xa_load(&rhp->qps, qpid);
}
extern uint c4iw_max_read_depth;
@@ -392,6 +397,7 @@ struct c4iw_mr {
dma_addr_t mpl_addr;
u32 max_mpl_len;
u32 mpl_len;
+ struct c4iw_wr_wait *wr_waitp;
};
static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr)
@@ -405,6 +411,7 @@ struct c4iw_mw {
struct sk_buff *dereg_skb;
u64 kva;
struct tpt_attributes attr;
+ struct c4iw_wr_wait *wr_waitp;
};
static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw)
@@ -419,8 +426,9 @@ struct c4iw_cq {
struct t4_cq cq;
spinlock_t lock;
spinlock_t comp_handler_lock;
- atomic_t refcnt;
- wait_queue_head_t wait;
+ refcount_t refcnt;
+ struct completion cq_rel_comp;
+ struct c4iw_wr_wait *wr_waitp;
};
static inline struct c4iw_cq *to_c4iw_cq(struct ib_cq *ibcq)
@@ -476,12 +484,13 @@ struct c4iw_qp {
struct t4_wq wq;
spinlock_t lock;
struct mutex mutex;
- struct kref kref;
wait_queue_head_t wait;
- struct timer_list timer;
int sq_sig_all;
- struct completion rq_drained;
- struct completion sq_drained;
+ struct c4iw_srq *srq;
+ struct c4iw_ucontext *ucontext;
+ struct c4iw_wr_wait *wr_waitp;
+ struct completion qp_rel_comp;
+ refcount_t qp_refcnt;
};
static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
@@ -489,12 +498,33 @@ static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
return container_of(ibqp, struct c4iw_qp, ibqp);
}
+struct c4iw_srq {
+ struct ib_srq ibsrq;
+ struct list_head db_fc_entry;
+ struct c4iw_dev *rhp;
+ struct t4_srq wq;
+ struct sk_buff *destroy_skb;
+ u32 srq_limit;
+ u32 pdid;
+ int idx;
+ u32 flags;
+ spinlock_t lock; /* protects srq */
+ struct c4iw_wr_wait *wr_waitp;
+ bool armed;
+};
+
+static inline struct c4iw_srq *to_c4iw_srq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct c4iw_srq, ibsrq);
+}
+
struct c4iw_ucontext {
struct ib_ucontext ibucontext;
struct c4iw_dev_ucontext uctx;
u32 key;
spinlock_t mmap_lock;
struct list_head mmaps;
+ bool is_32b_cqe;
};
static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c)
@@ -502,11 +532,21 @@ static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c)
return container_of(c, struct c4iw_ucontext, ibucontext);
}
+enum {
+ CXGB4_MMAP_BAR,
+ CXGB4_MMAP_BAR_WC,
+ CXGB4_MMAP_CONTIG,
+ CXGB4_MMAP_NON_CONTIG,
+};
+
struct c4iw_mm_entry {
struct list_head entry;
u64 addr;
u32 key;
+ void *vaddr;
+ dma_addr_t dma_addr;
unsigned len;
+ u8 mmap_flag;
};
static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext,
@@ -522,8 +562,8 @@ static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext,
if (mm->key == key && mm->len == len) {
list_del_init(&mm->entry);
spin_unlock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- key, (unsigned long long) mm->addr, mm->len);
+ pr_debug("key 0x%x addr 0x%llx len %d\n", key,
+ (unsigned long long)mm->addr, mm->len);
return mm;
}
}
@@ -531,12 +571,38 @@ static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext,
return NULL;
}
+static inline void insert_flag_to_mmap(struct c4iw_rdev *rdev,
+ struct c4iw_mm_entry *mm, u64 addr)
+{
+ if (addr >= pci_resource_start(rdev->lldi.pdev, 0) &&
+ (addr < (pci_resource_start(rdev->lldi.pdev, 0) +
+ pci_resource_len(rdev->lldi.pdev, 0))))
+ mm->mmap_flag = CXGB4_MMAP_BAR;
+ else if (addr >= pci_resource_start(rdev->lldi.pdev, 2) &&
+ (addr < (pci_resource_start(rdev->lldi.pdev, 2) +
+ pci_resource_len(rdev->lldi.pdev, 2)))) {
+ if (addr >= rdev->oc_mw_pa) {
+ mm->mmap_flag = CXGB4_MMAP_BAR_WC;
+ } else {
+ if (is_t4(rdev->lldi.adapter_type))
+ mm->mmap_flag = CXGB4_MMAP_BAR;
+ else
+ mm->mmap_flag = CXGB4_MMAP_BAR_WC;
+ }
+ } else {
+ if (addr)
+ mm->mmap_flag = CXGB4_MMAP_CONTIG;
+ else
+ mm->mmap_flag = CXGB4_MMAP_NON_CONTIG;
+ }
+}
+
static inline void insert_mmap(struct c4iw_ucontext *ucontext,
struct c4iw_mm_entry *mm)
{
spin_lock(&ucontext->mmap_lock);
- PDBG("%s key 0x%x addr 0x%llx len %d\n", __func__,
- mm->key, (unsigned long long) mm->addr, mm->len);
+ pr_debug("key 0x%x addr 0x%llx len %d\n",
+ mm->key, (unsigned long long)mm->addr, mm->len);
list_add_tail(&mm->entry, &ucontext->mmaps);
spin_unlock(&ucontext->mmap_lock);
}
@@ -623,12 +689,6 @@ static inline u32 c4iw_ib_to_tpt_access(int a)
FW_RI_MEM_ACCESS_LOCAL_READ;
}
-static inline u32 c4iw_ib_to_tpt_bind_access(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? FW_RI_MEM_ACCESS_REM_READ : 0);
-}
-
enum c4iw_mmid_state {
C4IW_STAG_STATE_VALID,
C4IW_STAG_STATE_INVALID
@@ -652,17 +712,17 @@ enum c4iw_mmid_state {
#define MPA_V2_RDMA_READ_RTR 0x4000
#define MPA_V2_IRD_ORD_MASK 0x3FFF
-#define c4iw_put_ep(ep) { \
- PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
- WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
- kref_put(&((ep)->kref), _c4iw_free_ep); \
+#define c4iw_put_ep(ep) { \
+ pr_debug("put_ep ep %p refcnt %d\n", \
+ ep, kref_read(&((ep)->kref))); \
+ WARN_ON(kref_read(&((ep)->kref)) < 1); \
+ kref_put(&((ep)->kref), _c4iw_free_ep); \
}
-#define c4iw_get_ep(ep) { \
- PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
- kref_get(&((ep)->kref)); \
+#define c4iw_get_ep(ep) { \
+ pr_debug("get_ep ep %p, refcnt %d\n", \
+ ep, kref_read(&((ep)->kref))); \
+ kref_get(&((ep)->kref)); \
}
void _c4iw_free_ep(struct kref *kref);
@@ -671,7 +731,7 @@ struct mpa_message {
u8 flags;
u8 revision;
__be16 private_data_size;
- u8 private_data[0];
+ u8 private_data[];
};
struct mpa_v2_conn_params {
@@ -683,7 +743,7 @@ struct terminate_message {
u8 layer_etype;
u8 ecode;
__be16 hdrct_rsvd;
- u8 len_hdrs[0];
+ u8 len_hdrs[];
};
#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28)
@@ -802,7 +862,10 @@ enum conn_pre_alloc_buffers {
CN_MAX_CON_BUF
};
-#define FLOWC_LEN 80
+enum {
+ FLOWC_LEN = offsetof(struct fw_flowc_wr, mnemval[FW_FLOWC_MNEM_MAX])
+};
+
union cpl_wr_size {
struct cpl_abort_req abrt_req;
struct cpl_abort_rpl abrt_rpl;
@@ -821,7 +884,7 @@ struct c4iw_ep_common {
struct mutex mutex;
struct sockaddr_storage local_addr;
struct sockaddr_storage remote_addr;
- struct c4iw_wr_wait wr_wait;
+ struct c4iw_wr_wait *wr_waitp;
unsigned long flags;
unsigned long history;
};
@@ -869,7 +932,11 @@ struct c4iw_ep {
unsigned int retry_count;
int snd_win;
int rcv_win;
+ u32 snd_wscale;
struct c4iw_ep_stats stats;
+ u32 srqe_idx;
+ u32 rx_pdu_out_cnt;
+ struct sk_buff *peer_abort_skb;
};
static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
@@ -899,14 +966,12 @@ void c4iw_id_table_free(struct c4iw_id_table *alloc);
typedef int (*c4iw_handler_func)(struct c4iw_dev *dev, struct sk_buff *skb);
-int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
- struct l2t_entry *l2t);
void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid,
struct c4iw_dev_ucontext *uctx);
u32 c4iw_get_resource(struct c4iw_id_table *id_table);
void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry);
-int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid);
-int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev);
+int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt,
+ u32 nr_pdid, u32 nr_srqt);
int c4iw_pblpool_create(struct c4iw_rdev *rdev);
int c4iw_rqtpool_create(struct c4iw_rdev *rdev);
int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev);
@@ -914,8 +979,7 @@ void c4iw_pblpool_destroy(struct c4iw_rdev *rdev);
void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev);
void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev);
void c4iw_destroy_resource(struct c4iw_resource *rscp);
-int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev);
-int c4iw_register_device(struct c4iw_dev *dev);
+void c4iw_register_device(struct work_struct *work);
void c4iw_unregister_device(struct c4iw_dev *dev);
int __init c4iw_cm_init(void);
void c4iw_cm_term(void);
@@ -924,10 +988,10 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
struct c4iw_dev_ucontext *uctx);
int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
-int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr);
-int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr);
+int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr);
+int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
int c4iw_destroy_listen(struct iw_cm_id *cm_id);
@@ -935,30 +999,31 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
void c4iw_qp_add_ref(struct ib_qp *qp);
void c4iw_qp_rem_ref(struct ib_qp *qp);
-struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
+struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
-int c4iw_dealloc_mw(struct ib_mw *mw);
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
+void c4iw_dealloc(struct uld_ctx *ctx);
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
u64 length, u64 virt, int acc,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
-int c4iw_dereg_mr(struct ib_mr *ib_mr);
-int c4iw_destroy_cq(struct ib_cq *ib_cq);
-struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *ib_context,
- struct ib_udata *udata);
-int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
+int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+void c4iw_cq_rem_ref(struct c4iw_cq *chp);
+int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
-int c4iw_destroy_qp(struct ib_qp *ib_qp);
-struct ib_qp *c4iw_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *attrs,
- struct ib_udata *udata);
+int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata);
+int c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata);
+int c4iw_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attrs,
+ struct ib_udata *udata);
+int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata);
+int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata);
int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -970,15 +1035,12 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size);
void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size);
u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size);
void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size);
-int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb);
-void c4iw_flush_hw_cq(struct c4iw_cq *chp);
+void c4iw_flush_hw_cq(struct c4iw_cq *chp, struct c4iw_qp *flush_qhp);
void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count);
int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp);
int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count);
int c4iw_flush_sq(struct c4iw_qp *qhp);
int c4iw_ev_handler(struct c4iw_dev *rnicp, u32 qid);
-u16 c4iw_rqes_posted(struct c4iw_qp *qhp);
-int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe);
u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx);
void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid,
struct c4iw_dev_ucontext *uctx);
@@ -992,13 +1054,24 @@ extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS];
void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
enum cxgb4_bar2_qtype qtype,
unsigned int *pbar2_qid, u64 *pbar2_pa);
+int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev);
+void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx);
extern void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe);
extern int c4iw_wr_log;
extern int db_fc_threshold;
extern int db_coalescing_threshold;
extern int use_dsgl;
-void c4iw_drain_rq(struct ib_qp *qp);
-void c4iw_drain_sq(struct ib_qp *qp);
void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey);
+void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq);
+void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16);
+void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx);
+int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp);
+
+int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr);
+int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq);
+int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp);
+int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, struct rdma_cm_id *cm_id);
#endif
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 410408f886c1..adeed7447e7b 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -38,9 +38,9 @@
#include "iw_cxgb4.h"
-int use_dsgl = 0;
+int use_dsgl = 1;
module_param(use_dsgl, int, 0644);
-MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=0)");
+MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=1) (DEPRECATED)");
#define T4_ULPTX_MIN_IO 32
#define C4IW_MAX_INLINE_SIZE 96
@@ -60,18 +60,18 @@ static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length)
static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
u32 len, dma_addr_t data,
- int wait, struct sk_buff *skb)
+ struct sk_buff *skb,
+ struct c4iw_wr_wait *wr_waitp)
{
struct ulp_mem_io *req;
struct ulptx_sgl *sgl;
u8 wr_len;
int ret = 0;
- struct c4iw_wr_wait wr_wait;
addr &= 0x7FFFFFF;
- if (wait)
- c4iw_init_wr_wait(&wr_wait);
+ if (wr_waitp)
+ c4iw_init_wr_wait(wr_waitp);
wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16);
if (!skb) {
@@ -81,12 +81,11 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
}
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
- req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
- memset(req, 0, wr_len);
+ req = __skb_put_zero(skb, wr_len);
INIT_ULPTX_WR(req, wr_len, 0, 0);
req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) |
- (wait ? FW_WR_COMPL_F : 0));
- req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L;
+ (wr_waitp ? FW_WR_COMPL_F : 0));
+ req->wr.wr_lo = wr_waitp ? (__force __be64)(unsigned long)wr_waitp : 0L;
req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(wr_len, 16)));
req->cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE) |
T5_ULP_MEMIO_ORDER_V(1) |
@@ -101,22 +100,21 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
sgl->len0 = cpu_to_be32(len);
sgl->addr0 = cpu_to_be64(data);
- ret = c4iw_ofld_send(rdev, skb);
- if (ret)
- return ret;
- if (wait)
- ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
+ if (wr_waitp)
+ ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
+ else
+ ret = c4iw_ofld_send(rdev, skb);
return ret;
}
static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
- void *data, struct sk_buff *skb)
+ void *data, struct sk_buff *skb,
+ struct c4iw_wr_wait *wr_waitp)
{
struct ulp_mem_io *req;
struct ulptx_idata *sc;
u8 wr_len, *to_dp, *from_dp;
int copy_len, num_wqe, i, ret = 0;
- struct c4iw_wr_wait wr_wait;
__be32 cmd = cpu_to_be32(ULPTX_CMD_V(ULP_TX_MEM_WRITE));
if (is_t4(rdev->lldi.adapter_type))
@@ -125,15 +123,16 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
cmd |= cpu_to_be32(T5_ULP_MEMIO_IMM_F);
addr &= 0x7FFFFFF;
- PDBG("%s addr 0x%x len %u\n", __func__, addr, len);
+ pr_debug("addr 0x%x len %u\n", addr, len);
num_wqe = DIV_ROUND_UP(len, C4IW_MAX_INLINE_SIZE);
- c4iw_init_wr_wait(&wr_wait);
+ c4iw_init_wr_wait(wr_waitp);
for (i = 0; i < num_wqe; i++) {
copy_len = len > C4IW_MAX_INLINE_SIZE ? C4IW_MAX_INLINE_SIZE :
len;
- wr_len = roundup(sizeof *req + sizeof *sc +
- roundup(copy_len, T4_ULPTX_MIN_IO), 16);
+ wr_len = roundup(sizeof(*req) + sizeof(*sc) +
+ roundup(copy_len, T4_ULPTX_MIN_IO),
+ 16);
if (!skb) {
skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
@@ -142,14 +141,13 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
}
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
- req = (struct ulp_mem_io *)__skb_put(skb, wr_len);
- memset(req, 0, wr_len);
+ req = __skb_put_zero(skb, wr_len);
INIT_ULPTX_WR(req, wr_len, 0, 0);
if (i == (num_wqe-1)) {
req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) |
FW_WR_COMPL_F);
- req->wr.wr_lo = (__force __be64)(unsigned long)&wr_wait;
+ req->wr.wr_lo = (__force __be64)(unsigned long)wr_waitp;
} else
req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR));
req->wr.wr_mid = cpu_to_be32(
@@ -175,19 +173,23 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
if (copy_len % T4_ULPTX_MIN_IO)
memset(to_dp + copy_len, 0, T4_ULPTX_MIN_IO -
(copy_len % T4_ULPTX_MIN_IO));
- ret = c4iw_ofld_send(rdev, skb);
- skb = NULL;
+ if (i == (num_wqe-1))
+ ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0,
+ __func__);
+ else
+ ret = c4iw_ofld_send(rdev, skb);
if (ret)
- return ret;
+ break;
+ skb = NULL;
len -= C4IW_MAX_INLINE_SIZE;
}
- ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
return ret;
}
static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len,
- void *data, struct sk_buff *skb)
+ void *data, struct sk_buff *skb,
+ struct c4iw_wr_wait *wr_waitp)
{
u32 remain = len;
u32 dmalen;
@@ -210,7 +212,7 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len,
dmalen = T4_ULPTX_MAX_DMA;
remain -= dmalen;
ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr,
- !remain, skb);
+ skb, remain ? NULL : wr_waitp);
if (ret)
goto out;
addr += dmalen >> 5;
@@ -218,7 +220,8 @@ static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len,
daddr += dmalen;
}
if (remain)
- ret = _c4iw_write_mem_inline(rdev, addr, remain, data, skb);
+ ret = _c4iw_write_mem_inline(rdev, addr, remain, data, skb,
+ wr_waitp);
out:
dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE);
return ret;
@@ -229,25 +232,33 @@ out:
* If data is NULL, clear len byte of memory to zero.
*/
static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len,
- void *data, struct sk_buff *skb)
+ void *data, struct sk_buff *skb,
+ struct c4iw_wr_wait *wr_waitp)
{
- if (is_t5(rdev->lldi.adapter_type) && use_dsgl) {
- if (len > inline_threshold) {
- if (_c4iw_write_mem_dma(rdev, addr, len, data, skb)) {
- printk_ratelimited(KERN_WARNING
- "%s: dma map"
- " failure (non fatal)\n",
- pci_name(rdev->lldi.pdev));
- return _c4iw_write_mem_inline(rdev, addr, len,
- data, skb);
- } else {
- return 0;
- }
- } else
- return _c4iw_write_mem_inline(rdev, addr,
- len, data, skb);
- } else
- return _c4iw_write_mem_inline(rdev, addr, len, data, skb);
+ int ret;
+
+ if (!rdev->lldi.ulptx_memwrite_dsgl || !use_dsgl) {
+ ret = _c4iw_write_mem_inline(rdev, addr, len, data, skb,
+ wr_waitp);
+ goto out;
+ }
+
+ if (len <= inline_threshold) {
+ ret = _c4iw_write_mem_inline(rdev, addr, len, data, skb,
+ wr_waitp);
+ goto out;
+ }
+
+ ret = _c4iw_write_mem_dma(rdev, addr, len, data, skb, wr_waitp);
+ if (ret) {
+ pr_warn_ratelimited("%s: dma map failure (non fatal)\n",
+ pci_name(rdev->lldi.pdev));
+ ret = _c4iw_write_mem_inline(rdev, addr, len, data, skb,
+ wr_waitp);
+ }
+out:
+ return ret;
+
}
/*
@@ -261,16 +272,20 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
enum fw_ri_stag_type type, enum fw_ri_mem_perms perm,
int bind_enabled, u32 zbva, u64 to,
u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr,
- struct sk_buff *skb)
+ struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp)
{
int err;
- struct fw_ri_tpte tpt;
+ struct fw_ri_tpte *tpt;
u32 stag_idx;
static atomic_t key;
if (c4iw_fatal_error(rdev))
return -EIO;
+ tpt = kmalloc(sizeof(*tpt), GFP_KERNEL);
+ if (!tpt)
+ return -ENOMEM;
+
stag_state = stag_state > 0;
stag_idx = (*stag) >> 8;
@@ -280,6 +295,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
mutex_lock(&rdev->stats.lock);
rdev->stats.stag.fail++;
mutex_unlock(&rdev->stats.lock);
+ kfree(tpt);
return -ENOMEM;
}
mutex_lock(&rdev->stats.lock);
@@ -289,33 +305,33 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
mutex_unlock(&rdev->stats.lock);
*stag = (stag_idx << 8) | (atomic_inc_return(&key) & 0xff);
}
- PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
- __func__, stag_state, type, pdid, stag_idx);
+ pr_debug("stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
+ stag_state, type, pdid, stag_idx);
/* write TPT entry */
if (reset_tpt_entry)
- memset(&tpt, 0, sizeof(tpt));
+ memset(tpt, 0, sizeof(*tpt));
else {
- tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
+ tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) |
FW_RI_TPTE_STAGSTATE_V(stag_state) |
FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid));
- tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
+ tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
(bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) |
FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO :
FW_RI_VA_BASED_TO))|
FW_RI_TPTE_PS_V(page_size));
- tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
+ tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3));
- tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
- tpt.va_hi = cpu_to_be32((u32)(to >> 32));
- tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
- tpt.dca_mwbcnt_pstag = cpu_to_be32(0);
- tpt.len_hi = cpu_to_be32((u32)(len >> 32));
+ tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
+ tpt->va_hi = cpu_to_be32((u32)(to >> 32));
+ tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
+ tpt->dca_mwbcnt_pstag = cpu_to_be32(0);
+ tpt->len_hi = cpu_to_be32((u32)(len >> 32));
}
err = write_adapter_mem(rdev, stag_idx +
(rdev->lldi.vr->stag.start >> 5),
- sizeof(tpt), &tpt, skb);
+ sizeof(*tpt), tpt, skb, wr_waitp);
if (reset_tpt_entry) {
c4iw_put_resource(&rdev->resource.tpt_table, stag_idx);
@@ -323,49 +339,39 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
rdev->stats.stag.cur -= 32;
mutex_unlock(&rdev->stats.lock);
}
+ kfree(tpt);
return err;
}
static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl,
- u32 pbl_addr, u32 pbl_size)
+ u32 pbl_addr, u32 pbl_size, struct c4iw_wr_wait *wr_waitp)
{
int err;
- PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
- __func__, pbl_addr, rdev->lldi.vr->pbl.start,
- pbl_size);
+ pr_debug("*pbl_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
+ pbl_addr, rdev->lldi.vr->pbl.start,
+ pbl_size);
- err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL);
+ err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl, NULL,
+ wr_waitp);
return err;
}
static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size,
- u32 pbl_addr, struct sk_buff *skb)
+ u32 pbl_addr, struct sk_buff *skb,
+ struct c4iw_wr_wait *wr_waitp)
{
return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0,
- pbl_size, pbl_addr, skb);
-}
-
-static int allocate_window(struct c4iw_rdev *rdev, u32 * stag, u32 pdid)
-{
- *stag = T4_STAG_UNSET;
- return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0,
- 0UL, 0, 0, 0, 0, NULL);
-}
-
-static int deallocate_window(struct c4iw_rdev *rdev, u32 stag,
- struct sk_buff *skb)
-{
- return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0,
- 0, skb);
+ pbl_size, pbl_addr, skb, wr_waitp);
}
static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid,
- u32 pbl_size, u32 pbl_addr)
+ u32 pbl_size, u32 pbl_addr,
+ struct c4iw_wr_wait *wr_waitp)
{
*stag = T4_STAG_UNSET;
return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_NSMR, 0, 0, 0,
- 0UL, 0, 0, pbl_size, pbl_addr, NULL);
+ 0UL, 0, 0, pbl_size, pbl_addr, NULL, wr_waitp);
}
static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
@@ -376,8 +382,10 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
mhp->attr.stag = stag;
mmid = stag >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
- return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
+ mhp->ibmr.length = mhp->attr.len;
+ mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12);
+ pr_debug("mmid 0x%x mhp %p\n", mmid, mhp);
+ return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL);
}
static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
@@ -392,14 +400,15 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
mhp->attr.mw_bind_enable, mhp->attr.zbva,
mhp->attr.va_fbo, mhp->attr.len ?
mhp->attr.len : -1, shift - 12,
- mhp->attr.pbl_size, mhp->attr.pbl_addr, NULL);
+ mhp->attr.pbl_size, mhp->attr.pbl_addr, NULL,
+ mhp->wr_waitp);
if (ret)
return ret;
ret = finish_mem_reg(mhp, stag);
if (ret) {
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr, mhp->dereg_skb);
+ mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp);
mhp->dereg_skb = NULL;
}
return ret;
@@ -426,18 +435,24 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
int ret;
u32 stag = T4_STAG_UNSET;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("ib_pd %p\n", pd);
php = to_c4iw_pd(pd);
rhp = php->rhp;
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
if (!mhp)
return ERR_PTR(-ENOMEM);
+ mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
+ if (!mhp->wr_waitp) {
+ ret = -ENOMEM;
+ goto err_free_mhp;
+ }
+ c4iw_init_wr_wait(mhp->wr_waitp);
mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
if (!mhp->dereg_skb) {
ret = -ENOMEM;
- goto err0;
+ goto err_free_wr_wait;
}
mhp->rhp = rhp;
@@ -453,37 +468,42 @@ struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid,
FW_RI_STAG_NSMR, mhp->attr.perms,
mhp->attr.mw_bind_enable, 0, 0, ~0ULL, 0, 0, 0,
- NULL);
+ NULL, mhp->wr_waitp);
if (ret)
- goto err1;
+ goto err_free_skb;
ret = finish_mem_reg(mhp, stag);
if (ret)
- goto err2;
+ goto err_dereg_mem;
return &mhp->ibmr;
-err2:
+err_dereg_mem:
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr, mhp->dereg_skb);
-err1:
+ mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp);
+err_free_skb:
kfree_skb(mhp->dereg_skb);
-err0:
+err_free_wr_wait:
+ c4iw_put_wr_wait(mhp->wr_waitp);
+err_free_mhp:
kfree(mhp);
return ERR_PTR(ret);
}
struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *udata)
+ u64 virt, int acc, struct ib_dmah *dmah,
+ struct ib_udata *udata)
{
__be64 *pages;
- int shift, n, len;
- int i, k, entry;
- int err = 0;
- struct scatterlist *sg;
+ int shift, n, i;
+ int err = -ENOMEM;
+ struct ib_block_iter biter;
struct c4iw_dev *rhp;
struct c4iw_pd *php;
struct c4iw_mr *mhp;
- PDBG("%s ib_pd %p\n", __func__, pd);
+ pr_debug("ib_pd %p\n", pd);
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
if (length == ~0ULL)
return ERR_PTR(-EINVAL);
@@ -500,63 +520,57 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
if (!mhp)
return ERR_PTR(-ENOMEM);
+ mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
+ if (!mhp->wr_waitp)
+ goto err_free_mhp;
mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
- if (!mhp->dereg_skb) {
- kfree(mhp);
- return ERR_PTR(-ENOMEM);
- }
+ if (!mhp->dereg_skb)
+ goto err_free_wr_wait;
mhp->rhp = rhp;
- mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
- if (IS_ERR(mhp->umem)) {
- err = PTR_ERR(mhp->umem);
- kfree_skb(mhp->dereg_skb);
- kfree(mhp);
- return ERR_PTR(err);
- }
+ mhp->umem = ib_umem_get(pd->device, start, length, acc);
+ if (IS_ERR(mhp->umem))
+ goto err_free_skb;
- shift = ffs(mhp->umem->page_size) - 1;
+ shift = PAGE_SHIFT;
- n = mhp->umem->nmap;
+ n = ib_umem_num_dma_blocks(mhp->umem, 1 << shift);
err = alloc_pbl(mhp, n);
if (err)
- goto err;
+ goto err_umem_release;
pages = (__be64 *) __get_free_page(GFP_KERNEL);
if (!pages) {
err = -ENOMEM;
- goto err_pbl;
+ goto err_pbl_free;
}
i = n = 0;
- for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) {
- len = sg_dma_len(sg) >> shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = cpu_to_be64(sg_dma_address(sg) +
- mhp->umem->page_size * k);
- if (i == PAGE_SIZE / sizeof *pages) {
- err = write_pbl(&mhp->rhp->rdev,
- pages,
- mhp->attr.pbl_addr + (n << 3), i);
- if (err)
- goto pbl_done;
- n += i;
- i = 0;
- }
+ rdma_umem_for_each_dma_block(mhp->umem, &biter, 1 << shift) {
+ pages[i++] = cpu_to_be64(rdma_block_iter_dma_address(&biter));
+ if (i == PAGE_SIZE / sizeof(*pages)) {
+ err = write_pbl(&mhp->rhp->rdev, pages,
+ mhp->attr.pbl_addr + (n << 3), i,
+ mhp->wr_waitp);
+ if (err)
+ goto pbl_done;
+ n += i;
+ i = 0;
}
}
if (i)
err = write_pbl(&mhp->rhp->rdev, pages,
- mhp->attr.pbl_addr + (n << 3), i);
+ mhp->attr.pbl_addr + (n << 3), i,
+ mhp->wr_waitp);
pbl_done:
free_page((unsigned long) pages);
if (err)
- goto err_pbl;
+ goto err_pbl_free;
mhp->attr.pdid = php->pdid;
mhp->attr.zbva = 0;
@@ -567,90 +581,25 @@ pbl_done:
err = register_mem(rhp, php, mhp, shift);
if (err)
- goto err_pbl;
+ goto err_pbl_free;
return &mhp->ibmr;
-err_pbl:
+err_pbl_free:
c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
mhp->attr.pbl_size << 3);
-
-err:
+err_umem_release:
ib_umem_release(mhp->umem);
+err_free_skb:
kfree_skb(mhp->dereg_skb);
+err_free_wr_wait:
+ c4iw_put_wr_wait(mhp->wr_waitp);
+err_free_mhp:
kfree(mhp);
return ERR_PTR(err);
}
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata)
-{
- struct c4iw_dev *rhp;
- struct c4iw_pd *php;
- struct c4iw_mw *mhp;
- u32 mmid;
- u32 stag = 0;
- int ret;
-
- if (type != IB_MW_TYPE_1)
- return ERR_PTR(-EINVAL);
-
- php = to_c4iw_pd(pd);
- rhp = php->rhp;
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
-
- mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL);
- if (!mhp->dereg_skb) {
- ret = -ENOMEM;
- goto free_mhp;
- }
-
- ret = allocate_window(&rhp->rdev, &stag, php->pdid);
- if (ret)
- goto free_skb;
- mhp->rhp = rhp;
- mhp->attr.pdid = php->pdid;
- mhp->attr.type = FW_RI_STAG_MW;
- mhp->attr.stag = stag;
- mmid = (stag) >> 8;
- mhp->ibmw.rkey = stag;
- if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
- ret = -ENOMEM;
- goto dealloc_win;
- }
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
- return &(mhp->ibmw);
-
-dealloc_win:
- deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb);
-free_skb:
- kfree_skb(mhp->dereg_skb);
-free_mhp:
- kfree(mhp);
- return ERR_PTR(ret);
-}
-
-int c4iw_dealloc_mw(struct ib_mw *mw)
-{
- struct c4iw_dev *rhp;
- struct c4iw_mw *mhp;
- u32 mmid;
-
- mhp = to_c4iw_mw(mw);
- rhp = mhp->rhp;
- mmid = (mw->rkey) >> 8;
- remove_handle(rhp, &rhp->mmidr, mmid);
- deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb);
- kfree_skb(mhp->dereg_skb);
- kfree(mhp);
- PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
- return 0;
-}
-
-struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
+struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg)
{
struct c4iw_dev *rhp;
@@ -665,7 +614,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
rhp = php->rhp;
if (mr_type != IB_MR_TYPE_MEM_REG ||
- max_num_sg > t4_max_fr_depth(&rhp->rdev.lldi.ulptx_memwrite_dsgl &&
+ max_num_sg > t4_max_fr_depth(rhp->rdev.lldi.ulptx_memwrite_dsgl &&
use_dsgl))
return ERR_PTR(-EINVAL);
@@ -675,46 +624,56 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
goto err;
}
+ mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
+ if (!mhp->wr_waitp) {
+ ret = -ENOMEM;
+ goto err_free_mhp;
+ }
+ c4iw_init_wr_wait(mhp->wr_waitp);
+
mhp->mpl = dma_alloc_coherent(&rhp->rdev.lldi.pdev->dev,
length, &mhp->mpl_addr, GFP_KERNEL);
if (!mhp->mpl) {
ret = -ENOMEM;
- goto err_mpl;
+ goto err_free_wr_wait;
}
mhp->max_mpl_len = length;
mhp->rhp = rhp;
ret = alloc_pbl(mhp, max_num_sg);
if (ret)
- goto err1;
+ goto err_free_dma;
mhp->attr.pbl_size = max_num_sg;
ret = allocate_stag(&rhp->rdev, &stag, php->pdid,
- mhp->attr.pbl_size, mhp->attr.pbl_addr);
+ mhp->attr.pbl_size, mhp->attr.pbl_addr,
+ mhp->wr_waitp);
if (ret)
- goto err2;
+ goto err_free_pbl;
mhp->attr.pdid = php->pdid;
mhp->attr.type = FW_RI_STAG_NSMR;
mhp->attr.stag = stag;
mhp->attr.state = 0;
mmid = (stag) >> 8;
mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
- if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
+ if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) {
ret = -ENOMEM;
- goto err3;
+ goto err_dereg;
}
- PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __func__, mmid, mhp, stag);
+ pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag);
return &(mhp->ibmr);
-err3:
+err_dereg:
dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr, mhp->dereg_skb);
-err2:
+ mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp);
+err_free_pbl:
c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
mhp->attr.pbl_size << 3);
-err1:
+err_free_dma:
dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
-err_mpl:
+err_free_wr_wait:
+ c4iw_put_wr_wait(mhp->wr_waitp);
+err_free_mhp:
kfree(mhp);
err:
return ERR_PTR(ret);
@@ -724,7 +683,7 @@ static int c4iw_set_page(struct ib_mr *ibmr, u64 addr)
{
struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
- if (unlikely(mhp->mpl_len == mhp->max_mpl_len))
+ if (unlikely(mhp->mpl_len == mhp->attr.pbl_size))
return -ENOMEM;
mhp->mpl[mhp->mpl_len++] = addr;
@@ -742,31 +701,31 @@ int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, c4iw_set_page);
}
-int c4iw_dereg_mr(struct ib_mr *ib_mr)
+int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
struct c4iw_mr *mhp;
u32 mmid;
- PDBG("%s ib_mr %p\n", __func__, ib_mr);
+ pr_debug("ib_mr %p\n", ib_mr);
mhp = to_c4iw_mr(ib_mr);
rhp = mhp->rhp;
mmid = mhp->attr.stag >> 8;
- remove_handle(rhp, &rhp->mmidr, mmid);
+ xa_erase_irq(&rhp->mrs, mmid);
if (mhp->mpl)
dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr, mhp->dereg_skb);
+ mhp->attr.pbl_addr, mhp->dereg_skb, mhp->wr_waitp);
if (mhp->attr.pbl_size)
c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
mhp->attr.pbl_size << 3);
if (mhp->kva)
kfree((void *) (unsigned long) mhp->kva);
- if (mhp->umem)
- ib_umem_release(mhp->umem);
- PDBG("%s mmid 0x%x ptr %p\n", __func__, mmid, mhp);
+ ib_umem_release(mhp->umem);
+ pr_debug("mmid 0x%x ptr %p\n", mmid, mhp);
+ c4iw_put_wr_wait(mhp->wr_waitp);
kfree(mhp);
return 0;
}
@@ -776,9 +735,9 @@ void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey)
struct c4iw_mr *mhp;
unsigned long flags;
- spin_lock_irqsave(&rhp->lock, flags);
- mhp = get_mhp(rhp, rkey >> 8);
+ xa_lock_irqsave(&rhp->mrs, flags);
+ mhp = xa_load(&rhp->mrs, rkey >> 8);
if (mhp)
mhp->attr.state = 0;
- spin_unlock_irqrestore(&rhp->lock, flags);
+ xa_unlock_irqrestore(&rhp->mrs, flags);
}
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 49b51b7e0fd7..e059f92d90fd 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -41,6 +41,7 @@
#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
#include <linux/inetdevice.h>
+#include <net/addrconf.h>
#include <linux/io.h>
#include <asm/irq.h>
@@ -58,85 +59,43 @@ static int fastreg_support = 1;
module_param(fastreg_support, int, 0644);
MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)");
-static struct ib_ah *c4iw_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
- struct ib_udata *udata)
-
-{
- return ERR_PTR(-ENOSYS);
-}
-
-static int c4iw_ah_destroy(struct ib_ah *ah)
-{
- return -ENOSYS;
-}
-
-static int c4iw_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
- return -ENOSYS;
-}
-
-static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+static void c4iw_dealloc_ucontext(struct ib_ucontext *context)
{
- return -ENOSYS;
-}
-
-static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags,
- u8 port_num, const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad,
- size_t in_mad_size,
- struct ib_mad_hdr *out_mad,
- size_t *out_mad_size,
- u16 *out_mad_pkey_index)
-{
- return -ENOSYS;
-}
-
-static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
-{
- struct c4iw_dev *rhp = to_c4iw_dev(context->device);
struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
+ struct c4iw_dev *rhp;
struct c4iw_mm_entry *mm, *tmp;
- PDBG("%s context %p\n", __func__, context);
+ pr_debug("context %p\n", context);
+ rhp = to_c4iw_dev(ucontext->ibucontext.device);
+
list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
kfree(mm);
c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx);
- kfree(ucontext);
- return 0;
}
-static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
+static int c4iw_alloc_ucontext(struct ib_ucontext *ucontext,
+ struct ib_udata *udata)
{
- struct c4iw_ucontext *context;
+ struct ib_device *ibdev = ucontext->device;
+ struct c4iw_ucontext *context = to_c4iw_ucontext(ucontext);
struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
- static int warned;
struct c4iw_alloc_ucontext_resp uresp;
int ret = 0;
struct c4iw_mm_entry *mm = NULL;
- PDBG("%s ibdev %p\n", __func__, ibdev);
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context) {
- ret = -ENOMEM;
- goto err;
- }
-
+ pr_debug("ibdev %p\n", ibdev);
c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx);
INIT_LIST_HEAD(&context->mmaps);
spin_lock_init(&context->mmap_lock);
if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
- if (!warned++)
- pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled.");
+ pr_err_once("Warning - downlevel libcxgb4 (non-fatal), device status page disabled\n");
rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED;
} else {
mm = kmalloc(sizeof(*mm), GFP_KERNEL);
if (!mm) {
ret = -ENOMEM;
- goto err_free;
+ goto err;
}
uresp.status_page_size = PAGE_SIZE;
@@ -154,15 +113,16 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
mm->key = uresp.status_page_key;
mm->addr = virt_to_phys(rhp->rdev.status_page);
mm->len = PAGE_SIZE;
+ mm->vaddr = NULL;
+ mm->dma_addr = 0;
+ insert_flag_to_mmap(&rhp->rdev, mm, mm->addr);
insert_mmap(context, mm);
}
- return &context->ibucontext;
+ return 0;
err_mm:
kfree(mm);
-err_free:
- kfree(context);
err:
- return ERR_PTR(ret);
+ return ret;
}
static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
@@ -174,9 +134,14 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
struct c4iw_mm_entry *mm;
struct c4iw_ucontext *ucontext;
u64 addr;
+ u8 mmap_flag;
+ size_t size;
+ void *vaddr;
+ unsigned long vm_pgoff;
+ dma_addr_t dma_addr;
- PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __func__, vma->vm_pgoff,
- key, len);
+ pr_debug("pgoff 0x%lx key 0x%x len %d\n", vma->vm_pgoff,
+ key, len);
if (vma->vm_start & (PAGE_SIZE-1))
return -EINVAL;
@@ -188,92 +153,79 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
if (!mm)
return -EINVAL;
addr = mm->addr;
+ vaddr = mm->vaddr;
+ dma_addr = mm->dma_addr;
+ size = mm->len;
+ mmap_flag = mm->mmap_flag;
kfree(mm);
- if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) &&
- (addr < (pci_resource_start(rdev->lldi.pdev, 0) +
- pci_resource_len(rdev->lldi.pdev, 0)))) {
-
- /*
- * MA_SYNC register...
- */
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ switch (mmap_flag) {
+ case CXGB4_MMAP_BAR:
+ ret = io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT,
+ len,
+ pgprot_noncached(vma->vm_page_prot));
+ break;
+ case CXGB4_MMAP_BAR_WC:
ret = io_remap_pfn_range(vma, vma->vm_start,
addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
- } else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) &&
- (addr < (pci_resource_start(rdev->lldi.pdev, 2) +
- pci_resource_len(rdev->lldi.pdev, 2)))) {
-
- /*
- * Map user DB or OCQP memory...
- */
- if (addr >= rdev->oc_mw_pa)
- vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot);
- else {
- if (!is_t4(rdev->lldi.adapter_type))
- vma->vm_page_prot =
- t4_pgprot_wc(vma->vm_page_prot);
- else
- vma->vm_page_prot =
- pgprot_noncached(vma->vm_page_prot);
- }
+ len, t4_pgprot_wc(vma->vm_page_prot));
+ break;
+ case CXGB4_MMAP_CONTIG:
ret = io_remap_pfn_range(vma, vma->vm_start,
addr >> PAGE_SHIFT,
len, vma->vm_page_prot);
- } else {
-
- /*
- * Map WQ or CQ contig dma memory...
- */
- ret = remap_pfn_range(vma, vma->vm_start,
- addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
+ break;
+ case CXGB4_MMAP_NON_CONTIG:
+ vm_pgoff = vma->vm_pgoff;
+ vma->vm_pgoff = 0;
+ ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
+ vaddr, dma_addr, size);
+ vma->vm_pgoff = vm_pgoff;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
}
return ret;
}
-static int c4iw_deallocate_pd(struct ib_pd *pd)
+static int c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
struct c4iw_pd *php;
php = to_c4iw_pd(pd);
rhp = php->rhp;
- PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
+ pr_debug("ibpd %p pdid 0x%x\n", pd, php->pdid);
c4iw_put_resource(&rhp->rdev.resource.pdid_table, php->pdid);
mutex_lock(&rhp->rdev.stats.lock);
rhp->rdev.stats.pd.cur--;
mutex_unlock(&rhp->rdev.stats.lock);
- kfree(php);
return 0;
}
-static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata)
{
- struct c4iw_pd *php;
+ struct c4iw_pd *php = to_c4iw_pd(pd);
+ struct ib_device *ibdev = pd->device;
u32 pdid;
struct c4iw_dev *rhp;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("ibdev %p\n", ibdev);
rhp = (struct c4iw_dev *) ibdev;
pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table);
if (!pdid)
- return ERR_PTR(-EINVAL);
- php = kzalloc(sizeof(*php), GFP_KERNEL);
- if (!php) {
- c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid);
- return ERR_PTR(-ENOMEM);
- }
+ return -EINVAL;
+
php->pdid = pdid;
php->rhp = rhp;
- if (context) {
- if (ib_copy_to_udata(udata, &php->pdid, sizeof(u32))) {
- c4iw_deallocate_pd(&php->ibpd);
- return ERR_PTR(-EFAULT);
+ if (udata) {
+ struct c4iw_alloc_pd_resp uresp = {.pdid = php->pdid};
+
+ if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
+ c4iw_deallocate_pd(&php->ibpd, udata);
+ return -EFAULT;
}
}
mutex_lock(&rhp->rdev.stats.lock);
@@ -281,27 +233,20 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
if (rhp->rdev.stats.pd.cur > rhp->rdev.stats.pd.max)
rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur;
mutex_unlock(&rhp->rdev.stats.lock);
- PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
- return &php->ibpd;
-}
-
-static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey)
-{
- PDBG("%s ibdev %p\n", __func__, ibdev);
- *pkey = 0;
+ pr_debug("pdid 0x%0x ptr 0x%p\n", pdid, php);
return 0;
}
-static int c4iw_query_gid(struct ib_device *ibdev, u8 port, int index,
+static int c4iw_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid)
{
struct c4iw_dev *dev;
- PDBG("%s ibdev %p, port %d, index %d, gid %p\n",
- __func__, ibdev, port, index, gid);
+ pr_debug("ibdev %p, port %u, index %d, gid %p\n",
+ ibdev, port, index, gid);
+ if (!port)
+ return -EINVAL;
dev = to_c4iw_dev(ibdev);
- BUG_ON(port == 0);
memset(&(gid->raw[0]), 0, sizeof(gid->raw));
memcpy(&(gid->raw[0]), dev->rdev.lldi.ports[port-1]->dev_addr, 6);
return 0;
@@ -313,24 +258,31 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
struct c4iw_dev *dev;
- PDBG("%s ibdev %p\n", __func__, ibdev);
+ pr_debug("ibdev %p\n", ibdev);
if (uhw->inlen || uhw->outlen)
return -EINVAL;
dev = to_c4iw_dev(ibdev);
- memset(props, 0, sizeof *props);
- memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
+ addrconf_addr_eui48((u8 *)&props->sys_image_guid,
+ dev->rdev.lldi.ports[0]->dev_addr);
props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type);
props->fw_ver = dev->rdev.lldi.fw_vers;
- props->device_cap_flags = dev->device_cap_flags;
+ props->device_cap_flags = IB_DEVICE_MEM_WINDOW;
+ props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
+ if (fastreg_support)
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
props->page_size_cap = T4_PAGESIZE_MASK;
props->vendor_id = (u32)dev->rdev.lldi.pdev->vendor;
props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device;
props->max_mr_size = T4_MAX_MR_SIZE;
props->max_qp = dev->rdev.lldi.vr->qp.size / 2;
+ props->max_srq = dev->rdev.lldi.vr->srq.size;
props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth;
- props->max_sge = T4_MAX_RECV_SGE;
+ props->max_srq_wr = dev->rdev.hw_queue.t4_max_qp_depth;
+ props->max_send_sge = min(T4_MAX_SEND_SGE, T4_MAX_WRITE_SGE);
+ props->max_recv_sge = T4_MAX_RECV_SGE;
+ props->max_srq_sge = T4_MAX_RECV_SGE;
props->max_sge_rd = 1;
props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter;
props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp,
@@ -347,44 +299,13 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
return 0;
}
-static int c4iw_query_port(struct ib_device *ibdev, u8 port,
+static int c4iw_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
- struct c4iw_dev *dev;
- struct net_device *netdev;
- struct in_device *inetdev;
-
- PDBG("%s ibdev %p\n", __func__, ibdev);
-
- dev = to_c4iw_dev(ibdev);
- netdev = dev->rdev.lldi.ports[port-1];
-
- memset(props, 0, sizeof(struct ib_port_attr));
- props->max_mtu = IB_MTU_4096;
- if (netdev->mtu >= 4096)
- props->active_mtu = IB_MTU_4096;
- else if (netdev->mtu >= 2048)
- props->active_mtu = IB_MTU_2048;
- else if (netdev->mtu >= 1024)
- props->active_mtu = IB_MTU_1024;
- else if (netdev->mtu >= 512)
- props->active_mtu = IB_MTU_512;
- else
- props->active_mtu = IB_MTU_256;
-
- if (!netif_carrier_ok(netdev))
- props->state = IB_PORT_DOWN;
- else {
- inetdev = in_dev_get(netdev);
- if (inetdev) {
- if (inetdev->ifa_list)
- props->state = IB_PORT_ACTIVE;
- else
- props->state = IB_PORT_INIT;
- in_dev_put(inetdev);
- } else
- props->state = IB_PORT_INIT;
- }
+ int ret = 0;
+ pr_debug("ibdev %p\n", ibdev);
+ ret = ib_get_eth_speed(ibdev, port, &props->active_speed,
+ &props->active_width);
props->port_cap_flags =
IB_PORT_CM_SUP |
@@ -393,46 +314,49 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
IB_PORT_DEVICE_MGMT_SUP |
IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
props->gid_tbl_len = 1;
- props->pkey_tbl_len = 1;
- props->active_width = 2;
- props->active_speed = IB_SPEED_DDR;
props->max_msg_sz = -1;
- return 0;
+ return ret;
}
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
- ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
- return sprintf(buf, "%d\n",
- CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
+ struct c4iw_dev *c4iw_dev =
+ rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
+
+ pr_debug("dev 0x%p\n", dev);
+ return sysfs_emit(
+ buf, "%d\n",
+ CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
- ibdev.dev);
+ struct c4iw_dev *c4iw_dev =
+ rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
struct ethtool_drvinfo info;
struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0];
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("dev 0x%p\n", dev);
lldev->ethtool_ops->get_drvinfo(lldev, &info);
- return sprintf(buf, "%s\n", info.driver);
+ return sysfs_emit(buf, "%s\n", info.driver);
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
- struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
- ibdev.dev);
- PDBG("%s dev 0x%p\n", __func__, dev);
- return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
- c4iw_dev->rdev.lldi.pdev->device);
+ struct c4iw_dev *c4iw_dev =
+ rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
+
+ pr_debug("dev 0x%p\n", dev);
+ return sysfs_emit(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
+ c4iw_dev->rdev.lldi.pdev->device);
}
+static DEVICE_ATTR_RO(board_id);
enum counters {
IP4INSEGS,
@@ -446,32 +370,29 @@ enum counters {
NR_COUNTERS
};
-static const char * const names[] = {
- [IP4INSEGS] = "ip4InSegs",
- [IP4OUTSEGS] = "ip4OutSegs",
- [IP4RETRANSSEGS] = "ip4RetransSegs",
- [IP4OUTRSTS] = "ip4OutRsts",
- [IP6INSEGS] = "ip6InSegs",
- [IP6OUTSEGS] = "ip6OutSegs",
- [IP6RETRANSSEGS] = "ip6RetransSegs",
- [IP6OUTRSTS] = "ip6OutRsts"
+static const struct rdma_stat_desc cxgb4_descs[] = {
+ [IP4INSEGS].name = "ip4InSegs",
+ [IP4OUTSEGS].name = "ip4OutSegs",
+ [IP4RETRANSSEGS].name = "ip4RetransSegs",
+ [IP4OUTRSTS].name = "ip4OutRsts",
+ [IP6INSEGS].name = "ip6InSegs",
+ [IP6OUTSEGS].name = "ip6OutSegs",
+ [IP6RETRANSSEGS].name = "ip6RetransSegs",
+ [IP6OUTRSTS].name = "ip6OutRsts"
};
-static struct rdma_hw_stats *c4iw_alloc_stats(struct ib_device *ibdev,
- u8 port_num)
+static struct rdma_hw_stats *c4iw_alloc_device_stats(struct ib_device *ibdev)
{
- BUILD_BUG_ON(ARRAY_SIZE(names) != NR_COUNTERS);
-
- if (port_num != 0)
- return NULL;
+ BUILD_BUG_ON(ARRAY_SIZE(cxgb4_descs) != NR_COUNTERS);
- return rdma_alloc_hw_stats_struct(names, NR_COUNTERS,
+ /* FIXME: these look like port stats */
+ return rdma_alloc_hw_stats_struct(cxgb4_descs, NR_COUNTERS,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int c4iw_get_mib(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
- u8 port, int index)
+ u32 port, int index)
{
struct tp_tcp_stats v4, v6;
struct c4iw_dev *c4iw_dev = to_c4iw_dev(ibdev);
@@ -489,169 +410,160 @@ static int c4iw_get_mib(struct ib_device *ibdev,
return stats->num_counters;
}
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *c4iw_class_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
-static struct device_attribute *c4iw_class_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id,
+static const struct attribute_group c4iw_attr_group = {
+ .attrs = c4iw_class_attributes,
};
-static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
+static int c4iw_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
int err;
- err = c4iw_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
-static void get_dev_fw_str(struct ib_device *dev, char *str,
- size_t str_len)
+static void get_dev_fw_str(struct ib_device *dev, char *str)
{
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev);
- PDBG("%s dev 0x%p\n", __func__, dev);
+ pr_debug("dev 0x%p\n", dev);
- snprintf(str, str_len, "%u.%u.%u.%u",
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%u.%u",
FW_HDR_FW_VER_MAJOR_G(c4iw_dev->rdev.lldi.fw_vers),
FW_HDR_FW_VER_MINOR_G(c4iw_dev->rdev.lldi.fw_vers),
FW_HDR_FW_VER_MICRO_G(c4iw_dev->rdev.lldi.fw_vers),
FW_HDR_FW_VER_BUILD_G(c4iw_dev->rdev.lldi.fw_vers));
}
-int c4iw_register_device(struct c4iw_dev *dev)
+static const struct ib_device_ops c4iw_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_CXGB4,
+ .uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION,
+
+ .alloc_hw_device_stats = c4iw_alloc_device_stats,
+ .alloc_mr = c4iw_alloc_mr,
+ .alloc_pd = c4iw_allocate_pd,
+ .alloc_ucontext = c4iw_alloc_ucontext,
+ .create_cq = c4iw_create_cq,
+ .create_qp = c4iw_create_qp,
+ .create_srq = c4iw_create_srq,
+ .dealloc_pd = c4iw_deallocate_pd,
+ .dealloc_ucontext = c4iw_dealloc_ucontext,
+ .dereg_mr = c4iw_dereg_mr,
+ .destroy_cq = c4iw_destroy_cq,
+ .destroy_qp = c4iw_destroy_qp,
+ .destroy_srq = c4iw_destroy_srq,
+ .device_group = &c4iw_attr_group,
+ .fill_res_cq_entry = c4iw_fill_res_cq_entry,
+ .fill_res_cm_id_entry = c4iw_fill_res_cm_id_entry,
+ .fill_res_mr_entry = c4iw_fill_res_mr_entry,
+ .fill_res_qp_entry = c4iw_fill_res_qp_entry,
+ .get_dev_fw_str = get_dev_fw_str,
+ .get_dma_mr = c4iw_get_dma_mr,
+ .get_hw_stats = c4iw_get_mib,
+ .get_port_immutable = c4iw_port_immutable,
+ .iw_accept = c4iw_accept_cr,
+ .iw_add_ref = c4iw_qp_add_ref,
+ .iw_connect = c4iw_connect,
+ .iw_create_listen = c4iw_create_listen,
+ .iw_destroy_listen = c4iw_destroy_listen,
+ .iw_get_qp = c4iw_get_qp,
+ .iw_reject = c4iw_reject_cr,
+ .iw_rem_ref = c4iw_qp_rem_ref,
+ .map_mr_sg = c4iw_map_mr_sg,
+ .mmap = c4iw_mmap,
+ .modify_qp = c4iw_ib_modify_qp,
+ .modify_srq = c4iw_modify_srq,
+ .poll_cq = c4iw_poll_cq,
+ .post_recv = c4iw_post_receive,
+ .post_send = c4iw_post_send,
+ .post_srq_recv = c4iw_post_srq_recv,
+ .query_device = c4iw_query_device,
+ .query_gid = c4iw_query_gid,
+ .query_port = c4iw_query_port,
+ .query_qp = c4iw_ib_query_qp,
+ .reg_user_mr = c4iw_reg_user_mr,
+ .req_notify_cq = c4iw_arm_cq,
+
+ INIT_RDMA_OBJ_SIZE(ib_cq, c4iw_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_mw, c4iw_mw, ibmw),
+ INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, c4iw_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_srq, c4iw_srq, ibsrq),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext),
+};
+
+static int set_netdevs(struct ib_device *ib_dev, struct c4iw_rdev *rdev)
{
int ret;
int i;
- PDBG("%s c4iw_dev %p\n", __func__, dev);
- BUG_ON(!dev->rdev.lldi.ports[0]);
- strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX);
- memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
- memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
- dev->ibdev.owner = THIS_MODULE;
- dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
- if (fastreg_support)
- dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ for (i = 0; i < rdev->lldi.nports; i++) {
+ ret = ib_device_set_netdev(ib_dev, rdev->lldi.ports[i],
+ i + 1);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+void c4iw_register_device(struct work_struct *work)
+{
+ int ret;
+ struct uld_ctx *ctx = container_of(work, struct uld_ctx, reg_work);
+ struct c4iw_dev *dev = ctx->dev;
+
+ pr_debug("c4iw_dev %p\n", dev);
+ addrconf_addr_eui48((u8 *)&dev->ibdev.node_guid,
+ dev->rdev.lldi.ports[0]->dev_addr);
dev->ibdev.local_dma_lkey = 0;
- dev->ibdev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_POST_SEND) |
- (1ull << IB_USER_VERBS_CMD_POST_RECV);
dev->ibdev.node_type = RDMA_NODE_RNIC;
BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports;
dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq;
- dev->ibdev.dma_device = &(dev->rdev.lldi.pdev->dev);
- dev->ibdev.query_device = c4iw_query_device;
- dev->ibdev.query_port = c4iw_query_port;
- dev->ibdev.query_pkey = c4iw_query_pkey;
- dev->ibdev.query_gid = c4iw_query_gid;
- dev->ibdev.alloc_ucontext = c4iw_alloc_ucontext;
- dev->ibdev.dealloc_ucontext = c4iw_dealloc_ucontext;
- dev->ibdev.mmap = c4iw_mmap;
- dev->ibdev.alloc_pd = c4iw_allocate_pd;
- dev->ibdev.dealloc_pd = c4iw_deallocate_pd;
- dev->ibdev.create_ah = c4iw_ah_create;
- dev->ibdev.destroy_ah = c4iw_ah_destroy;
- dev->ibdev.create_qp = c4iw_create_qp;
- dev->ibdev.modify_qp = c4iw_ib_modify_qp;
- dev->ibdev.query_qp = c4iw_ib_query_qp;
- dev->ibdev.destroy_qp = c4iw_destroy_qp;
- dev->ibdev.create_cq = c4iw_create_cq;
- dev->ibdev.destroy_cq = c4iw_destroy_cq;
- dev->ibdev.resize_cq = c4iw_resize_cq;
- dev->ibdev.poll_cq = c4iw_poll_cq;
- dev->ibdev.get_dma_mr = c4iw_get_dma_mr;
- dev->ibdev.reg_user_mr = c4iw_reg_user_mr;
- dev->ibdev.dereg_mr = c4iw_dereg_mr;
- dev->ibdev.alloc_mw = c4iw_alloc_mw;
- dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
- dev->ibdev.alloc_mr = c4iw_alloc_mr;
- dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
- dev->ibdev.attach_mcast = c4iw_multicast_attach;
- dev->ibdev.detach_mcast = c4iw_multicast_detach;
- dev->ibdev.process_mad = c4iw_process_mad;
- dev->ibdev.req_notify_cq = c4iw_arm_cq;
- dev->ibdev.post_send = c4iw_post_send;
- dev->ibdev.post_recv = c4iw_post_receive;
- dev->ibdev.alloc_hw_stats = c4iw_alloc_stats;
- dev->ibdev.get_hw_stats = c4iw_get_mib;
- dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
- dev->ibdev.get_port_immutable = c4iw_port_immutable;
- dev->ibdev.get_dev_fw_str = get_dev_fw_str;
- dev->ibdev.drain_sq = c4iw_drain_sq;
- dev->ibdev.drain_rq = c4iw_drain_rq;
-
- dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
- if (!dev->ibdev.iwcm)
- return -ENOMEM;
-
- dev->ibdev.iwcm->connect = c4iw_connect;
- dev->ibdev.iwcm->accept = c4iw_accept_cr;
- dev->ibdev.iwcm->reject = c4iw_reject_cr;
- dev->ibdev.iwcm->create_listen = c4iw_create_listen;
- dev->ibdev.iwcm->destroy_listen = c4iw_destroy_listen;
- dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref;
- dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref;
- dev->ibdev.iwcm->get_qp = c4iw_get_qp;
- memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
- sizeof(dev->ibdev.iwcm->ifname));
-
- ret = ib_register_device(&dev->ibdev, NULL);
+ dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev;
+
+ memcpy(dev->ibdev.iw_ifname, dev->rdev.lldi.ports[0]->name,
+ sizeof(dev->ibdev.iw_ifname));
+
+ ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops);
+ ret = set_netdevs(&dev->ibdev, &dev->rdev);
+ if (ret)
+ goto err_dealloc_ctx;
+ dma_set_max_seg_size(&dev->rdev.lldi.pdev->dev, UINT_MAX);
+ ret = ib_register_device(&dev->ibdev, "cxgb4_%d",
+ &dev->rdev.lldi.pdev->dev);
if (ret)
- goto bail1;
+ goto err_dealloc_ctx;
+ return;
- for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) {
- ret = device_create_file(&dev->ibdev.dev,
- c4iw_class_attributes[i]);
- if (ret)
- goto bail2;
- }
- return 0;
-bail2:
- ib_unregister_device(&dev->ibdev);
-bail1:
- kfree(dev->ibdev.iwcm);
- return ret;
+err_dealloc_ctx:
+ pr_err("%s - Failed registering iwarp device: %d\n",
+ pci_name(ctx->lldi.pdev), ret);
+ c4iw_dealloc(ctx);
+ return;
}
void c4iw_unregister_device(struct c4iw_dev *dev)
{
- int i;
-
- PDBG("%s c4iw_dev %p\n", __func__, dev);
- for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i)
- device_remove_file(&dev->ibdev.dev,
- c4iw_class_attributes[i]);
+ pr_debug("c4iw_dev %p\n", dev);
ib_unregister_device(&dev->ibdev);
- kfree(dev->ibdev.iwcm);
return;
}
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index cda5542e13a2..955f061a55e9 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -31,6 +31,7 @@
*/
#include <linux/module.h>
+#include <rdma/uverbs_ioctl.h>
#include "iw_cxgb4.h"
@@ -56,18 +57,18 @@ MODULE_PARM_DESC(db_coalescing_threshold,
static int max_fr_immd = T4_MAX_FR_IMMD;
module_param(max_fr_immd, int, 0644);
-MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immedate");
+MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immediate");
static int alloc_ird(struct c4iw_dev *dev, u32 ird)
{
int ret = 0;
- spin_lock_irq(&dev->lock);
+ xa_lock_irq(&dev->qps);
if (ird <= dev->avail_ird)
dev->avail_ird -= ird;
else
ret = -ENOMEM;
- spin_unlock_irq(&dev->lock);
+ xa_unlock_irq(&dev->qps);
if (ret)
dev_warn(&dev->rdev.lldi.pdev->dev,
@@ -78,9 +79,9 @@ static int alloc_ird(struct c4iw_dev *dev, u32 ird)
static void free_ird(struct c4iw_dev *dev, int ird)
{
- spin_lock_irq(&dev->lock);
+ xa_lock_irq(&dev->qps);
dev->avail_ird += ird;
- spin_unlock_irq(&dev->lock);
+ xa_unlock_irq(&dev->qps);
}
static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
@@ -99,7 +100,7 @@ static void dealloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
{
dma_free_coherent(&(rdev->lldi.pdev->dev), sq->memsize, sq->queue,
- pci_unmap_addr(sq, mapping));
+ dma_unmap_addr(sq, mapping));
}
static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
@@ -132,7 +133,7 @@ static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
if (!sq->queue)
return -ENOMEM;
sq->phys_addr = virt_to_phys(sq->queue);
- pci_unmap_addr_set(sq, mapping, sq->dma_addr);
+ dma_unmap_addr_set(sq, mapping, sq->dma_addr);
return 0;
}
@@ -147,21 +148,24 @@ static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user)
}
static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
- struct c4iw_dev_ucontext *uctx)
+ struct c4iw_dev_ucontext *uctx, int has_rq)
{
/*
* uP clears EQ contexts when the connection exits rdma mode,
* so no need to post a RESET WR for these EQs.
*/
- dma_free_coherent(&(rdev->lldi.pdev->dev),
- wq->rq.memsize, wq->rq.queue,
- dma_unmap_addr(&wq->rq, mapping));
dealloc_sq(rdev, &wq->sq);
- c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
- kfree(wq->rq.sw_rq);
kfree(wq->sq.sw_sq);
- c4iw_put_qpid(rdev, wq->rq.qid, uctx);
c4iw_put_qpid(rdev, wq->sq.qid, uctx);
+
+ if (has_rq) {
+ dma_free_coherent(&rdev->lldi.pdev->dev,
+ wq->rq.memsize, wq->rq.queue,
+ dma_unmap_addr(&wq->rq, mapping));
+ c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
+ kfree(wq->rq.sw_rq);
+ c4iw_put_qpid(rdev, wq->rq.qid, uctx);
+ }
return 0;
}
@@ -194,13 +198,14 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
struct t4_cq *rcq, struct t4_cq *scq,
- struct c4iw_dev_ucontext *uctx)
+ struct c4iw_dev_ucontext *uctx,
+ struct c4iw_wr_wait *wr_waitp,
+ int need_rq)
{
int user = (uctx != &rdev->uctx);
struct fw_ri_res_wr *res_wr;
struct fw_ri_res *res;
int wr_len;
- struct c4iw_wr_wait wr_wait;
struct sk_buff *skb;
int ret = 0;
int eqsize;
@@ -209,36 +214,44 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
if (!wq->sq.qid)
return -ENOMEM;
- wq->rq.qid = c4iw_get_qpid(rdev, uctx);
- if (!wq->rq.qid) {
- ret = -ENOMEM;
- goto free_sq_qid;
+ if (need_rq) {
+ wq->rq.qid = c4iw_get_qpid(rdev, uctx);
+ if (!wq->rq.qid) {
+ ret = -ENOMEM;
+ goto free_sq_qid;
+ }
}
if (!user) {
- wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq,
- GFP_KERNEL);
+ wq->sq.sw_sq = kcalloc(wq->sq.size, sizeof(*wq->sq.sw_sq),
+ GFP_KERNEL);
if (!wq->sq.sw_sq) {
ret = -ENOMEM;
- goto free_rq_qid;
+ goto free_rq_qid;//FIXME
}
- wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq,
- GFP_KERNEL);
- if (!wq->rq.sw_rq) {
- ret = -ENOMEM;
- goto free_sw_sq;
+ if (need_rq) {
+ wq->rq.sw_rq = kcalloc(wq->rq.size,
+ sizeof(*wq->rq.sw_rq),
+ GFP_KERNEL);
+ if (!wq->rq.sw_rq) {
+ ret = -ENOMEM;
+ goto free_sw_sq;
+ }
}
}
- /*
- * RQT must be a power of 2 and at least 16 deep.
- */
- wq->rq.rqt_size = roundup_pow_of_two(max_t(u16, wq->rq.size, 16));
- wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
- if (!wq->rq.rqt_hwaddr) {
- ret = -ENOMEM;
- goto free_sw_rq;
+ if (need_rq) {
+ /*
+ * RQT must be a power of 2 and at least 16 deep.
+ */
+ wq->rq.rqt_size =
+ roundup_pow_of_two(max_t(u16, wq->rq.size, 16));
+ wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
+ if (!wq->rq.rqt_hwaddr) {
+ ret = -ENOMEM;
+ goto free_sw_rq;
+ }
}
ret = alloc_sq(rdev, &wq->sq, user);
@@ -247,36 +260,42 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
memset(wq->sq.queue, 0, wq->sq.memsize);
dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
- wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev),
- wq->rq.memsize, &(wq->rq.dma_addr),
- GFP_KERNEL);
- if (!wq->rq.queue) {
- ret = -ENOMEM;
- goto free_sq;
+ if (need_rq) {
+ wq->rq.queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
+ wq->rq.memsize,
+ &wq->rq.dma_addr,
+ GFP_KERNEL);
+ if (!wq->rq.queue) {
+ ret = -ENOMEM;
+ goto free_sq;
+ }
+ pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
+ wq->sq.queue,
+ (unsigned long long)virt_to_phys(wq->sq.queue),
+ wq->rq.queue,
+ (unsigned long long)virt_to_phys(wq->rq.queue));
+ dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
}
- PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
- __func__, wq->sq.queue,
- (unsigned long long)virt_to_phys(wq->sq.queue),
- wq->rq.queue,
- (unsigned long long)virt_to_phys(wq->rq.queue));
- memset(wq->rq.queue, 0, wq->rq.memsize);
- dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
wq->db = rdev->lldi.db_reg;
- wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS,
+ wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid,
+ CXGB4_BAR2_QTYPE_EGRESS,
&wq->sq.bar2_qid,
user ? &wq->sq.bar2_pa : NULL);
- wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, T4_BAR2_QTYPE_EGRESS,
- &wq->rq.bar2_qid,
- user ? &wq->rq.bar2_pa : NULL);
+ if (need_rq)
+ wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
+ CXGB4_BAR2_QTYPE_EGRESS,
+ &wq->rq.bar2_qid,
+ user ? &wq->rq.bar2_pa : NULL);
/*
* User mode must have bar2 access.
*/
- if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) {
- pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n",
+ if (user && (!wq->sq.bar2_pa || (need_rq && !wq->rq.bar2_pa))) {
+ pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n",
pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
+ ret = -EINVAL;
goto free_dma;
}
@@ -284,8 +303,9 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
wq->rq.msn = 1;
/* build fw_ri_res_wr */
- wr_len = sizeof *res_wr + 2 * sizeof *res;
-
+ wr_len = sizeof(*res_wr) + 2 * sizeof(*res);
+ if (need_rq)
+ wr_len += sizeof(*res);
skb = alloc_skb(wr_len, GFP_KERNEL);
if (!skb) {
ret = -ENOMEM;
@@ -293,14 +313,13 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
}
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
- res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
- memset(res_wr, 0, wr_len);
+ res_wr = __skb_put_zero(skb, wr_len);
res_wr->op_nres = cpu_to_be32(
FW_WR_OP_V(FW_RI_RES_WR) |
- FW_RI_RES_WR_NRES_V(2) |
+ FW_RI_RES_WR_NRES_V(need_rq ? 2 : 1) |
FW_WR_COMPL_F);
res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
- res_wr->cookie = (uintptr_t)&wr_wait;
+ res_wr->cookie = (uintptr_t)wr_waitp;
res = res_wr->res;
res->u.sqrq.restype = FW_RI_RES_TYPE_SQ;
res->u.sqrq.op = FW_RI_RES_OP_WRITE;
@@ -328,66 +347,72 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
FW_RI_RES_WR_EQSIZE_V(eqsize));
res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid);
res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr);
- res++;
- res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
- res->u.sqrq.op = FW_RI_RES_OP_WRITE;
-
- /*
- * eqsize is the number of 64B entries plus the status page size.
- */
- eqsize = wq->rq.size * T4_RQ_NUM_SLOTS +
- rdev->hw_queue.t4_eq_status_entries;
- res->u.sqrq.fetchszm_to_iqid = cpu_to_be32(
- FW_RI_RES_WR_HOSTFCMODE_V(0) | /* no host cidx updates */
- FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */
- FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */
- FW_RI_RES_WR_IQID_V(rcq->cqid));
- res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
- FW_RI_RES_WR_DCAEN_V(0) |
- FW_RI_RES_WR_DCACPU_V(0) |
- FW_RI_RES_WR_FBMIN_V(2) |
- FW_RI_RES_WR_FBMAX_V(3) |
- FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
- FW_RI_RES_WR_CIDXFTHRESH_V(0) |
- FW_RI_RES_WR_EQSIZE_V(eqsize));
- res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
- res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
- c4iw_init_wr_wait(&wr_wait);
+ if (need_rq) {
+ res++;
+ res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
+ res->u.sqrq.op = FW_RI_RES_OP_WRITE;
+
+ /*
+ * eqsize is the number of 64B entries plus the status page size
+ */
+ eqsize = wq->rq.size * T4_RQ_NUM_SLOTS +
+ rdev->hw_queue.t4_eq_status_entries;
+ res->u.sqrq.fetchszm_to_iqid =
+ /* no host cidx updates */
+ cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
+ /* don't keep in chip cache */
+ FW_RI_RES_WR_CPRIO_V(0) |
+ /* set by uP at ri_init time */
+ FW_RI_RES_WR_PCIECHN_V(0) |
+ FW_RI_RES_WR_IQID_V(rcq->cqid));
+ res->u.sqrq.dcaen_to_eqsize =
+ cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
+ FW_RI_RES_WR_DCACPU_V(0) |
+ FW_RI_RES_WR_FBMIN_V(2) |
+ FW_RI_RES_WR_FBMAX_V(3) |
+ FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
+ FW_RI_RES_WR_CIDXFTHRESH_V(0) |
+ FW_RI_RES_WR_EQSIZE_V(eqsize));
+ res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
+ res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
+ }
- ret = c4iw_ofld_send(rdev, skb);
- if (ret)
- goto free_dma;
- ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__);
+ c4iw_init_wr_wait(wr_waitp);
+ ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__);
if (ret)
goto free_dma;
- PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n",
- __func__, wq->sq.qid, wq->rq.qid, wq->db,
- wq->sq.bar2_va, wq->rq.bar2_va);
+ pr_debug("sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n",
+ wq->sq.qid, wq->rq.qid, wq->db,
+ wq->sq.bar2_va, wq->rq.bar2_va);
return 0;
free_dma:
- dma_free_coherent(&(rdev->lldi.pdev->dev),
- wq->rq.memsize, wq->rq.queue,
- dma_unmap_addr(&wq->rq, mapping));
+ if (need_rq)
+ dma_free_coherent(&rdev->lldi.pdev->dev,
+ wq->rq.memsize, wq->rq.queue,
+ dma_unmap_addr(&wq->rq, mapping));
free_sq:
dealloc_sq(rdev, &wq->sq);
free_hwaddr:
- c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
+ if (need_rq)
+ c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
free_sw_rq:
- kfree(wq->rq.sw_rq);
+ if (need_rq)
+ kfree(wq->rq.sw_rq);
free_sw_sq:
kfree(wq->sq.sw_sq);
free_rq_qid:
- c4iw_put_qpid(rdev, wq->rq.qid, uctx);
+ if (need_rq)
+ c4iw_put_qpid(rdev, wq->rq.qid, uctx);
free_sq_qid:
c4iw_put_qpid(rdev, wq->sq.qid, uctx);
return ret;
}
static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
- struct ib_send_wr *wr, int max, u32 *plenp)
+ const struct ib_send_wr *wr, int max, u32 *plenp)
{
u8 *dstp, *srcp;
u32 plen = 0;
@@ -414,7 +439,7 @@ static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
rem -= len;
}
}
- len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp);
+ len = roundup(plen + sizeof(*immdp), 16) - (plen + sizeof(*immdp));
if (len)
memset(dstp, 0, len);
immdp->op = FW_RI_DATA_IMMD;
@@ -432,7 +457,12 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end,
{
int i;
u32 plen = 0;
- __be64 *flitp = (__be64 *)isglp->sge;
+ __be64 *flitp;
+
+ if ((__be64 *)isglp == queue_end)
+ isglp = (struct fw_ri_isgl *)queue_start;
+
+ flitp = (__be64 *)isglp->sge;
for (i = 0; i < num_sge; i++) {
if ((plen + sg_list[i].length) < plen)
@@ -457,7 +487,7 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end,
}
static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
- struct ib_send_wr *wr, u8 *len16)
+ const struct ib_send_wr *wr, u8 *len16)
{
u32 plen;
int size;
@@ -498,7 +528,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
T4_MAX_SEND_INLINE, &plen);
if (ret)
return ret;
- size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
+ size = sizeof(wqe->send) + sizeof(struct fw_ri_immd) +
plen;
} else {
ret = build_isgl((__be64 *)sq->queue,
@@ -507,7 +537,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
wr->sg_list, wr->num_sge, &plen);
if (ret)
return ret;
- size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
+ size = sizeof(wqe->send) + sizeof(struct fw_ri_isgl) +
wr->num_sge * sizeof(struct fw_ri_sge);
}
} else {
@@ -515,7 +545,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
wqe->send.u.immd_src[0].r1 = 0;
wqe->send.u.immd_src[0].r2 = 0;
wqe->send.u.immd_src[0].immdlen = 0;
- size = sizeof wqe->send + sizeof(struct fw_ri_immd);
+ size = sizeof(wqe->send) + sizeof(struct fw_ri_immd);
plen = 0;
}
*len16 = DIV_ROUND_UP(size, 16);
@@ -524,7 +554,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
}
static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
- struct ib_send_wr *wr, u8 *len16)
+ const struct ib_send_wr *wr, u8 *len16)
{
u32 plen;
int size;
@@ -532,7 +562,15 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
if (wr->num_sge > T4_MAX_SEND_SGE)
return -EINVAL;
- wqe->write.r2 = 0;
+
+ /*
+ * iWARP protocol supports 64 bit immediate data but rdma api
+ * limits it to 32bit.
+ */
+ if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+ wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr->ex.imm_data;
+ else
+ wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0;
wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
if (wr->num_sge) {
@@ -541,7 +579,7 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
T4_MAX_WRITE_INLINE, &plen);
if (ret)
return ret;
- size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
+ size = sizeof(wqe->write) + sizeof(struct fw_ri_immd) +
plen;
} else {
ret = build_isgl((__be64 *)sq->queue,
@@ -550,7 +588,7 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
wr->sg_list, wr->num_sge, &plen);
if (ret)
return ret;
- size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
+ size = sizeof(wqe->write) + sizeof(struct fw_ri_isgl) +
wr->num_sge * sizeof(struct fw_ri_sge);
}
} else {
@@ -558,7 +596,7 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
wqe->write.u.immd_src[0].r1 = 0;
wqe->write.u.immd_src[0].r2 = 0;
wqe->write.u.immd_src[0].immdlen = 0;
- size = sizeof wqe->write + sizeof(struct fw_ri_immd);
+ size = sizeof(wqe->write) + sizeof(struct fw_ri_immd);
plen = 0;
}
*len16 = DIV_ROUND_UP(size, 16);
@@ -566,11 +604,65 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
return 0;
}
-static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
+static void build_immd_cmpl(struct t4_sq *sq, struct fw_ri_immd_cmpl *immdp,
+ struct ib_send_wr *wr)
+{
+ memcpy((u8 *)immdp->data, (u8 *)(uintptr_t)wr->sg_list->addr, 16);
+ memset(immdp->r1, 0, 6);
+ immdp->op = FW_RI_DATA_IMMD;
+ immdp->immdlen = 16;
+}
+
+static void build_rdma_write_cmpl(struct t4_sq *sq,
+ struct fw_ri_rdma_write_cmpl_wr *wcwr,
+ const struct ib_send_wr *wr, u8 *len16)
+{
+ u32 plen;
+ int size;
+
+ /*
+ * This code assumes the struct fields preceding the write isgl
+ * fit in one 64B WR slot. This is because the WQE is built
+ * directly in the dma queue, and wrapping is only handled
+ * by the code buildling sgls. IE the "fixed part" of the wr
+ * structs must all fit in 64B. The WQE build code should probably be
+ * redesigned to avoid this restriction, but for now just add
+ * the BUILD_BUG_ON() to catch if this WQE struct gets too big.
+ */
+ BUILD_BUG_ON(offsetof(struct fw_ri_rdma_write_cmpl_wr, u) > 64);
+
+ wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
+ wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
+ if (wr->next->opcode == IB_WR_SEND)
+ wcwr->stag_inv = 0;
+ else
+ wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey);
+ wcwr->r2 = 0;
+ wcwr->r3 = 0;
+
+ /* SEND_INV SGL */
+ if (wr->next->send_flags & IB_SEND_INLINE)
+ build_immd_cmpl(sq, &wcwr->u_cmpl.immd_src, wr->next);
+ else
+ build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size],
+ &wcwr->u_cmpl.isgl_src, wr->next->sg_list, 1, NULL);
+
+ /* WRITE SGL */
+ build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size],
+ wcwr->u.isgl_src, wr->sg_list, wr->num_sge, &plen);
+
+ size = sizeof(*wcwr) + sizeof(struct fw_ri_isgl) +
+ wr->num_sge * sizeof(struct fw_ri_sge);
+ wcwr->plen = cpu_to_be32(plen);
+ *len16 = DIV_ROUND_UP(size, 16);
+}
+
+static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr,
+ u8 *len16)
{
if (wr->num_sge > 1)
return -EINVAL;
- if (wr->num_sge) {
+ if (wr->num_sge && wr->sg_list[0].length) {
wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey);
wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr
>> 32));
@@ -591,12 +683,81 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
}
wqe->read.r2 = 0;
wqe->read.r5 = 0;
- *len16 = DIV_ROUND_UP(sizeof wqe->read, 16);
+ *len16 = DIV_ROUND_UP(sizeof(wqe->read), 16);
return 0;
}
+static void post_write_cmpl(struct c4iw_qp *qhp, const struct ib_send_wr *wr)
+{
+ bool send_signaled = (wr->next->send_flags & IB_SEND_SIGNALED) ||
+ qhp->sq_sig_all;
+ bool write_signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
+ qhp->sq_sig_all;
+ struct t4_swsqe *swsqe;
+ union t4_wr *wqe;
+ u16 write_wrid;
+ u8 len16;
+ u16 idx;
+
+ /*
+ * The sw_sq entries still look like a WRITE and a SEND and consume
+ * 2 slots. The FW WR, however, will be a single uber-WR.
+ */
+ wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
+ qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);
+ build_rdma_write_cmpl(&qhp->wq.sq, &wqe->write_cmpl, wr, &len16);
+
+ /* WRITE swsqe */
+ swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
+ swsqe->opcode = FW_RI_RDMA_WRITE;
+ swsqe->idx = qhp->wq.sq.pidx;
+ swsqe->complete = 0;
+ swsqe->signaled = write_signaled;
+ swsqe->flushed = 0;
+ swsqe->wr_id = wr->wr_id;
+ if (c4iw_wr_log) {
+ swsqe->sge_ts =
+ cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]);
+ swsqe->host_time = ktime_get();
+ }
+
+ write_wrid = qhp->wq.sq.pidx;
+
+ /* just bump the sw_sq */
+ qhp->wq.sq.in_use++;
+ if (++qhp->wq.sq.pidx == qhp->wq.sq.size)
+ qhp->wq.sq.pidx = 0;
+
+ /* SEND_WITH_INV swsqe */
+ swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
+ if (wr->next->opcode == IB_WR_SEND)
+ swsqe->opcode = FW_RI_SEND;
+ else
+ swsqe->opcode = FW_RI_SEND_WITH_INV;
+ swsqe->idx = qhp->wq.sq.pidx;
+ swsqe->complete = 0;
+ swsqe->signaled = send_signaled;
+ swsqe->flushed = 0;
+ swsqe->wr_id = wr->next->wr_id;
+ if (c4iw_wr_log) {
+ swsqe->sge_ts =
+ cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]);
+ swsqe->host_time = ktime_get();
+ }
+
+ wqe->write_cmpl.flags_send = send_signaled ? FW_RI_COMPLETION_FLAG : 0;
+ wqe->write_cmpl.wrid_send = qhp->wq.sq.pidx;
+
+ init_wr_hdr(wqe, write_wrid, FW_RI_RDMA_WRITE_CMPL_WR,
+ write_signaled ? FW_RI_COMPLETION_FLAG : 0, len16);
+ t4_sq_produce(&qhp->wq, len16);
+ idx = DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
+
+ t4_ring_sq_db(&qhp->wq, idx, wqe);
+}
+
static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
- struct ib_recv_wr *wr, u8 *len16)
+ const struct ib_recv_wr *wr, u8 *len16)
{
int ret;
@@ -605,13 +766,27 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
&wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
if (ret)
return ret;
- *len16 = DIV_ROUND_UP(sizeof wqe->recv +
+ *len16 = DIV_ROUND_UP(
+ sizeof(wqe->recv) + wr->num_sge * sizeof(struct fw_ri_sge), 16);
+ return 0;
+}
+
+static int build_srq_recv(union t4_recv_wr *wqe, const struct ib_recv_wr *wr,
+ u8 *len16)
+{
+ int ret;
+
+ ret = build_isgl((__be64 *)wqe, (__be64 *)(wqe + 1),
+ &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
+ if (ret)
+ return ret;
+ *len16 = DIV_ROUND_UP(sizeof(wqe->recv) +
wr->num_sge * sizeof(struct fw_ri_sge), 16);
return 0;
}
static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
- struct ib_reg_wr *wr, struct c4iw_mr *mhp,
+ const struct ib_reg_wr *wr, struct c4iw_mr *mhp,
u8 *len16)
{
__be64 *p = (__be64 *)fr->pbl;
@@ -643,8 +818,8 @@ static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
}
static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
- struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16,
- bool dsgl_supported)
+ const struct ib_reg_wr *wr, struct c4iw_mr *mhp,
+ u8 *len16, bool dsgl_supported)
{
struct fw_ri_immd *imdp;
__be64 *p;
@@ -694,7 +869,6 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
if (++p == (__be64 *)&sq->queue[sq->size])
p = (__be64 *)sq->queue;
}
- BUG_ON(rem < 0);
while (rem) {
*p = 0;
rem -= sizeof(*p);
@@ -707,33 +881,26 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
return 0;
}
-static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
+static int build_inv_stag(union t4_wr *wqe, const struct ib_send_wr *wr,
+ u8 *len16)
{
wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
wqe->inv.r2 = 0;
- *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16);
+ *len16 = DIV_ROUND_UP(sizeof(wqe->inv), 16);
return 0;
}
-static void _free_qp(struct kref *kref)
-{
- struct c4iw_qp *qhp;
-
- qhp = container_of(kref, struct c4iw_qp, kref);
- PDBG("%s qhp %p\n", __func__, qhp);
- kfree(qhp);
-}
-
void c4iw_qp_add_ref(struct ib_qp *qp)
{
- PDBG("%s ib_qp %p\n", __func__, qp);
- kref_get(&to_c4iw_qp(qp)->kref);
+ pr_debug("ib_qp %p\n", qp);
+ refcount_inc(&to_c4iw_qp(qp)->qp_refcnt);
}
void c4iw_qp_rem_ref(struct ib_qp *qp)
{
- PDBG("%s ib_qp %p\n", __func__, qp);
- kref_put(&to_c4iw_qp(qp)->kref, _free_qp);
+ pr_debug("ib_qp %p\n", qp);
+ if (refcount_dec_and_test(&to_c4iw_qp(qp)->qp_refcnt))
+ complete(&to_c4iw_qp(qp)->qp_rel_comp);
}
static void add_to_fc_list(struct list_head *head, struct list_head *entry)
@@ -746,7 +913,7 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
{
unsigned long flags;
- spin_lock_irqsave(&qhp->rhp->lock, flags);
+ xa_lock_irqsave(&qhp->rhp->qps, flags);
spin_lock(&qhp->lock);
if (qhp->rhp->db_state == NORMAL)
t4_ring_sq_db(&qhp->wq, inc, NULL);
@@ -755,7 +922,7 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
qhp->wq.sq.wq_pidx_inc += inc;
}
spin_unlock(&qhp->lock);
- spin_unlock_irqrestore(&qhp->rhp->lock, flags);
+ xa_unlock_irqrestore(&qhp->rhp->qps, flags);
return 0;
}
@@ -763,7 +930,7 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
{
unsigned long flags;
- spin_lock_irqsave(&qhp->rhp->lock, flags);
+ xa_lock_irqsave(&qhp->rhp->qps, flags);
spin_lock(&qhp->lock);
if (qhp->rhp->db_state == NORMAL)
t4_ring_rq_db(&qhp->wq, inc, NULL);
@@ -772,18 +939,150 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
qhp->wq.rq.wq_pidx_inc += inc;
}
spin_unlock(&qhp->lock);
- spin_unlock_irqrestore(&qhp->rhp->lock, flags);
+ xa_unlock_irqrestore(&qhp->rhp->qps, flags);
return 0;
}
-int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
+static int ib_to_fw_opcode(int ib_opcode)
+{
+ int opcode;
+
+ switch (ib_opcode) {
+ case IB_WR_SEND_WITH_INV:
+ opcode = FW_RI_SEND_WITH_INV;
+ break;
+ case IB_WR_SEND:
+ opcode = FW_RI_SEND;
+ break;
+ case IB_WR_RDMA_WRITE:
+ opcode = FW_RI_RDMA_WRITE;
+ break;
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ opcode = FW_RI_WRITE_IMMEDIATE;
+ break;
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_READ_WITH_INV:
+ opcode = FW_RI_READ_REQ;
+ break;
+ case IB_WR_REG_MR:
+ opcode = FW_RI_FAST_REGISTER;
+ break;
+ case IB_WR_LOCAL_INV:
+ opcode = FW_RI_LOCAL_INV;
+ break;
+ default:
+ opcode = -EINVAL;
+ }
+ return opcode;
+}
+
+static int complete_sq_drain_wr(struct c4iw_qp *qhp,
+ const struct ib_send_wr *wr)
+{
+ struct t4_cqe cqe = {};
+ struct c4iw_cq *schp;
+ unsigned long flag;
+ struct t4_cq *cq;
+ int opcode;
+
+ schp = to_c4iw_cq(qhp->ibqp.send_cq);
+ cq = &schp->cq;
+
+ opcode = ib_to_fw_opcode(wr->opcode);
+ if (opcode < 0)
+ return opcode;
+
+ cqe.u.drain_cookie = wr->wr_id;
+ cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
+ CQE_OPCODE_V(opcode) |
+ CQE_TYPE_V(1) |
+ CQE_SWCQE_V(1) |
+ CQE_DRAIN_V(1) |
+ CQE_QPID_V(qhp->wq.sq.qid));
+
+ spin_lock_irqsave(&schp->lock, flag);
+ cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
+ cq->sw_queue[cq->sw_pidx] = cqe;
+ t4_swcq_produce(cq);
+ spin_unlock_irqrestore(&schp->lock, flag);
+
+ if (t4_clear_cq_armed(&schp->cq)) {
+ spin_lock_irqsave(&schp->comp_handler_lock, flag);
+ (*schp->ibcq.comp_handler)(&schp->ibcq,
+ schp->ibcq.cq_context);
+ spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+ }
+ return 0;
+}
+
+static int complete_sq_drain_wrs(struct c4iw_qp *qhp,
+ const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ int ret = 0;
+
+ while (wr) {
+ ret = complete_sq_drain_wr(qhp, wr);
+ if (ret) {
+ *bad_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+ return ret;
+}
+
+static void complete_rq_drain_wr(struct c4iw_qp *qhp,
+ const struct ib_recv_wr *wr)
+{
+ struct t4_cqe cqe = {};
+ struct c4iw_cq *rchp;
+ unsigned long flag;
+ struct t4_cq *cq;
+
+ rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
+ cq = &rchp->cq;
+
+ cqe.u.drain_cookie = wr->wr_id;
+ cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
+ CQE_OPCODE_V(FW_RI_SEND) |
+ CQE_TYPE_V(0) |
+ CQE_SWCQE_V(1) |
+ CQE_DRAIN_V(1) |
+ CQE_QPID_V(qhp->wq.sq.qid));
+
+ spin_lock_irqsave(&rchp->lock, flag);
+ cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
+ cq->sw_queue[cq->sw_pidx] = cqe;
+ t4_swcq_produce(cq);
+ spin_unlock_irqrestore(&rchp->lock, flag);
+
+ if (t4_clear_cq_armed(&rchp->cq)) {
+ spin_lock_irqsave(&rchp->comp_handler_lock, flag);
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq,
+ rchp->ibcq.cq_context);
+ spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+ }
+}
+
+static void complete_rq_drain_wrs(struct c4iw_qp *qhp,
+ const struct ib_recv_wr *wr)
+{
+ while (wr) {
+ complete_rq_drain_wr(qhp, wr);
+ wr = wr->next;
+ }
+}
+
+int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
{
int err = 0;
u8 len16 = 0;
enum fw_wr_opcodes fw_opcode = 0;
enum fw_ri_wr_flags fw_flags;
struct c4iw_qp *qhp;
+ struct c4iw_dev *rhp;
union t4_wr *wqe = NULL;
u32 num_wrs;
struct t4_swsqe *swsqe;
@@ -791,11 +1090,17 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
u16 idx = 0;
qhp = to_c4iw_qp(ibqp);
+ rhp = qhp->rhp;
spin_lock_irqsave(&qhp->lock, flag);
- if (t4_wq_in_error(&qhp->wq)) {
+
+ /*
+ * If the qp has been flushed, then just insert a special
+ * drain cqe.
+ */
+ if (qhp->wq.flushed) {
spin_unlock_irqrestore(&qhp->lock, flag);
- *bad_wr = wr;
- return -EINVAL;
+ err = complete_sq_drain_wrs(qhp, wr, bad_wr);
+ return err;
}
num_wrs = t4_sq_avail(&qhp->wq);
if (num_wrs == 0) {
@@ -803,6 +1108,31 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
*bad_wr = wr;
return -ENOMEM;
}
+
+ /*
+ * Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is
+ * the response for small NVMEe-oF READ requests. If the chain is
+ * exactly a WRITE->SEND_WITH_INV or a WRITE->SEND and the sgl depths
+ * and lengths meet the requirements of the fw_ri_write_cmpl_wr work
+ * request, then build and post the write_cmpl WR. If any of the tests
+ * below are not true, then we continue on with the tradtional WRITE
+ * and SEND WRs.
+ */
+ if (qhp->rhp->rdev.lldi.write_cmpl_support &&
+ CHELSIO_CHIP_VERSION(qhp->rhp->rdev.lldi.adapter_type) >=
+ CHELSIO_T5 &&
+ wr && wr->next && !wr->next->next &&
+ wr->opcode == IB_WR_RDMA_WRITE &&
+ wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL &&
+ (wr->next->opcode == IB_WR_SEND ||
+ wr->next->opcode == IB_WR_SEND_WITH_INV) &&
+ wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE &&
+ wr->next->num_sge == 1 && num_wrs >= 2) {
+ post_write_cmpl(qhp, wr);
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return 0;
+ }
+
while (wr) {
if (num_wrs == 0) {
err = -ENOMEM;
@@ -830,6 +1160,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
swsqe->opcode = FW_RI_SEND_WITH_INV;
err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
break;
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ if (unlikely(!rhp->rdev.lldi.write_w_imm_support)) {
+ err = -EINVAL;
+ break;
+ }
+ fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE;
+ fallthrough;
case IB_WR_RDMA_WRITE:
fw_opcode = FW_RI_RDMA_WRITE_WR;
swsqe->opcode = FW_RI_RDMA_WRITE;
@@ -840,8 +1177,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
fw_opcode = FW_RI_RDMA_READ_WR;
swsqe->opcode = FW_RI_READ_REQ;
if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) {
- c4iw_invalidate_mr(qhp->rhp,
- wr->sg_list[0].lkey);
+ c4iw_invalidate_mr(rhp, wr->sg_list[0].lkey);
fw_flags = FW_RI_RDMA_READ_INVALIDATE;
} else {
fw_flags = 0;
@@ -857,7 +1193,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr);
swsqe->opcode = FW_RI_FAST_REGISTER;
- if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support &&
+ if (rhp->rdev.lldi.fr_nsmr_tpte_wr_support &&
!mhp->attr.state && mhp->mpl_len <= 2) {
fw_opcode = FW_RI_FR_NSMR_TPTE_WR;
build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr),
@@ -866,7 +1202,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
fw_opcode = FW_RI_FR_NSMR_WR;
err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr),
mhp, &len16,
- qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl);
+ rhp->rdev.lldi.ulptx_memwrite_dsgl);
if (err)
break;
}
@@ -879,11 +1215,11 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
fw_opcode = FW_RI_INV_LSTAG_WR;
swsqe->opcode = FW_RI_LOCAL_INV;
err = build_inv_stag(wqe, wr, &len16);
- c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey);
+ c4iw_invalidate_mr(rhp, wr->ex.invalidate_rkey);
break;
default:
- PDBG("%s post of type=%d TBD!\n", __func__,
- wr->opcode);
+ pr_warn("%s post of type=%d TBD!\n", __func__,
+ wr->opcode);
err = -EINVAL;
}
if (err) {
@@ -898,21 +1234,21 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
swsqe->wr_id = wr->wr_id;
if (c4iw_wr_log) {
swsqe->sge_ts = cxgb4_read_sge_timestamp(
- qhp->rhp->rdev.lldi.ports[0]);
- getnstimeofday(&swsqe->host_ts);
+ rhp->rdev.lldi.ports[0]);
+ swsqe->host_time = ktime_get();
}
init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
- PDBG("%s cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n",
- __func__, (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
- swsqe->opcode, swsqe->read_len);
+ pr_debug("cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n",
+ (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
+ swsqe->opcode, swsqe->read_len);
wr = wr->next;
num_wrs--;
t4_sq_produce(&qhp->wq, len16);
idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
}
- if (!qhp->rhp->rdev.status_page->db_off) {
+ if (!rhp->rdev.status_page->db_off) {
t4_ring_sq_db(&qhp->wq, idx, wqe);
spin_unlock_irqrestore(&qhp->lock, flag);
} else {
@@ -922,8 +1258,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
return err;
}
-int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
{
int err = 0;
struct c4iw_qp *qhp;
@@ -935,10 +1271,15 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
qhp = to_c4iw_qp(ibqp);
spin_lock_irqsave(&qhp->lock, flag);
- if (t4_wq_in_error(&qhp->wq)) {
+
+ /*
+ * If the qp has been flushed, then just insert a special
+ * drain cqe.
+ */
+ if (qhp->wq.flushed) {
spin_unlock_irqrestore(&qhp->lock, flag);
- *bad_wr = wr;
- return -EINVAL;
+ complete_rq_drain_wrs(qhp, wr);
+ return err;
}
num_wrs = t4_rq_avail(&qhp->wq);
if (num_wrs == 0) {
@@ -969,8 +1310,8 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].sge_ts =
cxgb4_read_sge_timestamp(
qhp->rhp->rdev.lldi.ports[0]);
- getnstimeofday(
- &qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].host_ts);
+ qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].host_time =
+ ktime_get();
}
wqe->recv.opcode = FW_RI_RECV_WR;
@@ -980,8 +1321,8 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
wqe->recv.r2[1] = 0;
wqe->recv.r2[2] = 0;
wqe->recv.len16 = len16;
- PDBG("%s cookie 0x%llx pidx %u\n", __func__,
- (unsigned long long) wr->wr_id, qhp->wq.rq.pidx);
+ pr_debug("cookie 0x%llx pidx %u\n",
+ (unsigned long long)wr->wr_id, qhp->wq.rq.pidx);
t4_rq_produce(&qhp->wq, len16);
idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
wr = wr->next;
@@ -997,6 +1338,89 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
return err;
}
+static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe,
+ u64 wr_id, u8 len16)
+{
+ struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx];
+
+ pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx pending_cidx %u pending_pidx %u pending_in_use %u\n",
+ __func__, srq->cidx, srq->pidx, srq->wq_pidx,
+ srq->in_use, srq->ooo_count,
+ (unsigned long long)wr_id, srq->pending_cidx,
+ srq->pending_pidx, srq->pending_in_use);
+ pwr->wr_id = wr_id;
+ pwr->len16 = len16;
+ memcpy(&pwr->wqe, wqe, len16 * 16);
+ t4_srq_produce_pending_wr(srq);
+}
+
+int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ union t4_recv_wr *wqe, lwqe;
+ struct c4iw_srq *srq;
+ unsigned long flag;
+ u8 len16 = 0;
+ u16 idx = 0;
+ int err = 0;
+ u32 num_wrs;
+
+ srq = to_c4iw_srq(ibsrq);
+ spin_lock_irqsave(&srq->lock, flag);
+ num_wrs = t4_srq_avail(&srq->wq);
+ if (num_wrs == 0) {
+ spin_unlock_irqrestore(&srq->lock, flag);
+ return -ENOMEM;
+ }
+ while (wr) {
+ if (wr->num_sge > T4_MAX_RECV_SGE) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+ wqe = &lwqe;
+ if (num_wrs)
+ err = build_srq_recv(wqe, wr, &len16);
+ else
+ err = -ENOMEM;
+ if (err) {
+ *bad_wr = wr;
+ break;
+ }
+
+ wqe->recv.opcode = FW_RI_RECV_WR;
+ wqe->recv.r1 = 0;
+ wqe->recv.wrid = srq->wq.pidx;
+ wqe->recv.r2[0] = 0;
+ wqe->recv.r2[1] = 0;
+ wqe->recv.r2[2] = 0;
+ wqe->recv.len16 = len16;
+
+ if (srq->wq.ooo_count ||
+ srq->wq.pending_in_use ||
+ srq->wq.sw_rq[srq->wq.pidx].valid) {
+ defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16);
+ } else {
+ srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id;
+ srq->wq.sw_rq[srq->wq.pidx].valid = 1;
+ c4iw_copy_wr_to_srq(&srq->wq, wqe, len16);
+ pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u wr_id 0x%llx\n",
+ __func__, srq->wq.cidx,
+ srq->wq.pidx, srq->wq.wq_pidx,
+ srq->wq.in_use,
+ (unsigned long long)wr->wr_id);
+ t4_srq_produce(&srq->wq, len16);
+ idx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
+ }
+ wr = wr->next;
+ num_wrs--;
+ }
+ if (idx)
+ t4_ring_srq_db(&srq->wq, idx, len16, wqe);
+ spin_unlock_irqrestore(&srq->lock, flag);
+ return err;
+}
+
static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
u8 *ecode)
{
@@ -1140,8 +1564,8 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
struct sk_buff *skb;
struct terminate_message *term;
- PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
- qhp->ep->hwtid);
+ pr_debug("qhp %p qid 0x%x tid %u\n", qhp, qhp->wq.sq.qid,
+ qhp->ep->hwtid);
skb = skb_dequeue(&qhp->ep->com.ep_skb_list);
if (WARN_ON(!skb))
@@ -1149,15 +1573,14 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
- wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
- memset(wqe, 0, sizeof *wqe);
+ wqe = __skb_put_zero(skb, sizeof(*wqe));
wqe->op_compl = cpu_to_be32(FW_WR_OP_V(FW_RI_INIT_WR));
wqe->flowid_len16 = cpu_to_be32(
FW_WR_FLOWID_V(qhp->ep->hwtid) |
FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
wqe->u.terminate.type = FW_RI_TYPE_TERMINATE;
- wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term);
+ wqe->u.terminate.immdlen = cpu_to_be32(sizeof(*term));
term = (struct terminate_message *)wqe->u.terminate.termmsg;
if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) {
term->layer_etype = qhp->attr.layer_etype;
@@ -1174,53 +1597,66 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
struct c4iw_cq *schp)
{
int count;
- int rq_flushed, sq_flushed;
+ int rq_flushed = 0, sq_flushed;
unsigned long flag;
+ struct ib_event ev;
- PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
+ pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp);
- /* locking hierarchy: cq lock first, then qp lock. */
+ /* locking hierarchy: cqs lock first, then qp lock. */
spin_lock_irqsave(&rchp->lock, flag);
+ if (schp != rchp)
+ spin_lock(&schp->lock);
spin_lock(&qhp->lock);
+ if (qhp->srq && qhp->attr.state == C4IW_QP_STATE_ERROR &&
+ qhp->ibqp.event_handler) {
+ ev.device = qhp->ibqp.device;
+ ev.element.qp = &qhp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qhp->ibqp.event_handler(&ev, qhp->ibqp.qp_context);
+ }
if (qhp->wq.flushed) {
spin_unlock(&qhp->lock);
+ if (schp != rchp)
+ spin_unlock(&schp->lock);
spin_unlock_irqrestore(&rchp->lock, flag);
return;
}
qhp->wq.flushed = 1;
+ t4_set_wq_in_error(&qhp->wq, 0);
- c4iw_flush_hw_cq(rchp);
- c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
- rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
- spin_unlock(&qhp->lock);
- spin_unlock_irqrestore(&rchp->lock, flag);
+ c4iw_flush_hw_cq(rchp, qhp);
+ if (!qhp->srq) {
+ c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
+ rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
+ }
- /* locking hierarchy: cq lock first, then qp lock. */
- spin_lock_irqsave(&schp->lock, flag);
- spin_lock(&qhp->lock);
if (schp != rchp)
- c4iw_flush_hw_cq(schp);
+ c4iw_flush_hw_cq(schp, qhp);
sq_flushed = c4iw_flush_sq(qhp);
+
spin_unlock(&qhp->lock);
- spin_unlock_irqrestore(&schp->lock, flag);
+ if (schp != rchp)
+ spin_unlock(&schp->lock);
+ spin_unlock_irqrestore(&rchp->lock, flag);
if (schp == rchp) {
- if (t4_clear_cq_armed(&rchp->cq) &&
- (rq_flushed || sq_flushed)) {
+ if ((rq_flushed || sq_flushed) &&
+ t4_clear_cq_armed(&rchp->cq)) {
spin_lock_irqsave(&rchp->comp_handler_lock, flag);
(*rchp->ibcq.comp_handler)(&rchp->ibcq,
rchp->ibcq.cq_context);
spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
}
} else {
- if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) {
+ if (rq_flushed && t4_clear_cq_armed(&rchp->cq)) {
spin_lock_irqsave(&rchp->comp_handler_lock, flag);
(*rchp->ibcq.comp_handler)(&rchp->ibcq,
rchp->ibcq.cq_context);
spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
}
- if (t4_clear_cq_armed(&schp->cq) && sq_flushed) {
+ if (sq_flushed && t4_clear_cq_armed(&schp->cq)) {
spin_lock_irqsave(&schp->comp_handler_lock, flag);
(*schp->ibcq.comp_handler)(&schp->ibcq,
schp->ibcq.cq_context);
@@ -1237,8 +1673,14 @@ static void flush_qp(struct c4iw_qp *qhp)
rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
schp = to_c4iw_cq(qhp->ibqp.send_cq);
- t4_set_wq_in_error(&qhp->wq);
if (qhp->ibqp.uobject) {
+
+ /* for user qps, qhp->wq.flushed is protected by qhp->mutex */
+ if (qhp->wq.flushed)
+ return;
+
+ qhp->wq.flushed = 1;
+ t4_set_wq_in_error(&qhp->wq, 0);
t4_set_cq_in_error(&rchp->cq);
spin_lock_irqsave(&rchp->comp_handler_lock, flag);
(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
@@ -1262,8 +1704,7 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
int ret;
struct sk_buff *skb;
- PDBG("%s qhp %p qid 0x%x tid %u\n", __func__, qhp, qhp->wq.sq.qid,
- ep->hwtid);
+ pr_debug("qhp %p qid 0x%x tid %u\n", qhp, qhp->wq.sq.qid, ep->hwtid);
skb = skb_dequeue(&ep->com.ep_skb_list);
if (WARN_ON(!skb))
@@ -1271,41 +1712,36 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
- memset(wqe, 0, sizeof *wqe);
+ wqe = __skb_put_zero(skb, sizeof(*wqe));
wqe->op_compl = cpu_to_be32(
FW_WR_OP_V(FW_RI_INIT_WR) |
FW_WR_COMPL_F);
wqe->flowid_len16 = cpu_to_be32(
FW_WR_FLOWID_V(ep->hwtid) |
FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
- wqe->cookie = (uintptr_t)&ep->com.wr_wait;
+ wqe->cookie = (uintptr_t)ep->com.wr_waitp;
wqe->u.fini.type = FW_RI_TYPE_FINI;
- ret = c4iw_ofld_send(&rhp->rdev, skb);
- if (ret)
- goto out;
- ret = c4iw_wait_for_reply(&rhp->rdev, &ep->com.wr_wait, qhp->ep->hwtid,
- qhp->wq.sq.qid, __func__);
-out:
- PDBG("%s ret %d\n", __func__, ret);
+ ret = c4iw_ref_send_wait(&rhp->rdev, skb, ep->com.wr_waitp,
+ qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
+
+ pr_debug("ret %d\n", ret);
return ret;
}
static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
{
- PDBG("%s p2p_type = %d\n", __func__, p2p_type);
- memset(&init->u, 0, sizeof init->u);
+ pr_debug("p2p_type = %d\n", p2p_type);
+ memset(&init->u, 0, sizeof(init->u));
switch (p2p_type) {
case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
init->u.write.opcode = FW_RI_RDMA_WRITE_WR;
init->u.write.stag_sink = cpu_to_be32(1);
init->u.write.to_sink = cpu_to_be64(1);
init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD;
- init->u.write.len16 = DIV_ROUND_UP(sizeof init->u.write +
- sizeof(struct fw_ri_immd),
- 16);
+ init->u.write.len16 = DIV_ROUND_UP(
+ sizeof(init->u.write) + sizeof(struct fw_ri_immd), 16);
break;
case FW_RI_INIT_P2PTYPE_READ_REQ:
init->u.write.opcode = FW_RI_RDMA_READ_WR;
@@ -1313,7 +1749,7 @@ static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
init->u.read.to_src_lo = cpu_to_be32(1);
init->u.read.stag_sink = cpu_to_be32(1);
init->u.read.to_sink_lo = cpu_to_be32(1);
- init->u.read.len16 = DIV_ROUND_UP(sizeof init->u.read, 16);
+ init->u.read.len16 = DIV_ROUND_UP(sizeof(init->u.read), 16);
break;
}
}
@@ -1324,10 +1760,10 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
int ret;
struct sk_buff *skb;
- PDBG("%s qhp %p qid 0x%x tid %u ird %u ord %u\n", __func__, qhp,
- qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord);
+ pr_debug("qhp %p qid 0x%x tid %u ird %u ord %u\n", qhp,
+ qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord);
- skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
+ skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
if (!skb) {
ret = -ENOMEM;
goto out;
@@ -1340,8 +1776,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
}
set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
- wqe = (struct fw_ri_wr *)__skb_put(skb, sizeof(*wqe));
- memset(wqe, 0, sizeof *wqe);
+ wqe = __skb_put_zero(skb, sizeof(*wqe));
wqe->op_compl = cpu_to_be32(
FW_WR_OP_V(FW_RI_INIT_WR) |
FW_WR_COMPL_F);
@@ -1349,7 +1784,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
FW_WR_FLOWID_V(qhp->ep->hwtid) |
FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
- wqe->cookie = (uintptr_t)&qhp->ep->com.wr_wait;
+ wqe->cookie = (uintptr_t)qhp->ep->com.wr_waitp;
wqe->u.init.type = FW_RI_TYPE_INIT;
wqe->u.init.mpareqbit_p2ptype =
@@ -1373,31 +1808,32 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd);
wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid);
wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid);
- wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
+ if (qhp->srq) {
+ wqe->u.init.rq_eqid = cpu_to_be32(FW_RI_INIT_RQEQID_SRQ |
+ qhp->srq->idx);
+ } else {
+ wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
+ wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
+ wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
+ rhp->rdev.lldi.vr->rq.start);
+ }
wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq);
wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq);
wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord);
wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird);
wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq);
wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq);
- wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
- wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
- rhp->rdev.lldi.vr->rq.start);
if (qhp->attr.mpa_attr.initiator)
build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);
- ret = c4iw_ofld_send(&rhp->rdev, skb);
- if (ret)
- goto err1;
-
- ret = c4iw_wait_for_reply(&rhp->rdev, &qhp->ep->com.wr_wait,
- qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
+ ret = c4iw_ref_send_wait(&rhp->rdev, skb, qhp->ep->com.wr_waitp,
+ qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
if (!ret)
goto out;
-err1:
+
free_ird(rhp, qhp->attr.max_ird);
out:
- PDBG("%s ret %d\n", __func__, ret);
+ pr_debug("ret %d\n", ret);
return ret;
}
@@ -1414,9 +1850,9 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
int free = 0;
struct c4iw_ep *ep = NULL;
- PDBG("%s qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n", __func__,
- qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state,
- (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
+ pr_debug("qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n",
+ qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state,
+ (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
mutex_lock(&qhp->mutex);
@@ -1503,8 +1939,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
case C4IW_QP_STATE_RTS:
switch (attrs->next_state) {
case C4IW_QP_STATE_CLOSING:
- BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
- t4_set_wq_in_error(&qhp->wq);
+ t4_set_wq_in_error(&qhp->wq, 0);
set_state(qhp, C4IW_QP_STATE_CLOSING);
ep = qhp->ep;
if (!internal) {
@@ -1517,13 +1952,13 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
goto err;
break;
case C4IW_QP_STATE_TERMINATE:
- t4_set_wq_in_error(&qhp->wq);
+ t4_set_wq_in_error(&qhp->wq, 0);
set_state(qhp, C4IW_QP_STATE_TERMINATE);
qhp->attr.layer_etype = attrs->layer_etype;
qhp->attr.ecode = attrs->ecode;
ep = qhp->ep;
if (!internal) {
- c4iw_get_ep(&qhp->ep->com);
+ c4iw_get_ep(&ep->com);
terminate = 1;
disconnect = 1;
} else {
@@ -1534,10 +1969,9 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
}
break;
case C4IW_QP_STATE_ERROR:
- t4_set_wq_in_error(&qhp->wq);
+ t4_set_wq_in_error(&qhp->wq, 0);
set_state(qhp, C4IW_QP_STATE_ERROR);
if (!internal) {
- abort = 1;
disconnect = 1;
ep = qhp->ep;
c4iw_get_ep(&qhp->ep->com);
@@ -1550,7 +1984,12 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
}
break;
case C4IW_QP_STATE_CLOSING:
- if (!internal) {
+
+ /*
+ * Allow kernel users to move to ERROR for qp draining.
+ */
+ if (!internal && (qhp->ibqp.uobject || attrs->next_state !=
+ C4IW_QP_STATE_ERROR)) {
ret = -EINVAL;
goto out;
}
@@ -1589,16 +2028,15 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
goto err;
break;
default:
- printk(KERN_ERR "%s in a bad state %d\n",
- __func__, qhp->attr.state);
+ pr_err("%s in a bad state %d\n", __func__, qhp->attr.state);
ret = -EINVAL;
goto err;
break;
}
goto out;
err:
- PDBG("%s disassociating ep %p qpid 0x%x\n", __func__, qhp->ep,
- qhp->wq.sq.qid);
+ pr_debug("disassociating ep %p qpid 0x%x\n", qhp->ep,
+ qhp->wq.sq.qid);
/* disassociate the LLP connection */
qhp->attr.llp_stream_handle = NULL;
@@ -1608,7 +2046,6 @@ err:
set_state(qhp, C4IW_QP_STATE_ERROR);
free = 1;
abort = 1;
- BUG_ON(!ep);
flush_qp(qhp);
wake_up(&qhp->wait);
out:
@@ -1634,19 +2071,20 @@ out:
*/
if (free)
c4iw_put_ep(&ep->com);
- PDBG("%s exit state %d\n", __func__, qhp->attr.state);
+ pr_debug("exit state %d\n", qhp->attr.state);
return ret;
}
-int c4iw_destroy_qp(struct ib_qp *ib_qp)
+int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
{
struct c4iw_dev *rhp;
struct c4iw_qp *qhp;
- struct c4iw_qp_attributes attrs;
struct c4iw_ucontext *ucontext;
+ struct c4iw_qp_attributes attrs;
qhp = to_c4iw_qp(ib_qp);
rhp = qhp->rhp;
+ ucontext = qhp->ucontext;
attrs.next_state = C4IW_QP_STATE_ERROR;
if (qhp->attr.state == C4IW_QP_STATE_TERMINATE)
@@ -1655,91 +2093,99 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
wait_event(qhp->wait, !qhp->ep);
- remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
-
- spin_lock_irq(&rhp->lock);
+ xa_lock_irq(&rhp->qps);
+ __xa_erase(&rhp->qps, qhp->wq.sq.qid);
if (!list_empty(&qhp->db_fc_entry))
list_del_init(&qhp->db_fc_entry);
- spin_unlock_irq(&rhp->lock);
+ xa_unlock_irq(&rhp->qps);
free_ird(rhp, qhp->attr.max_ird);
- ucontext = ib_qp->uobject ?
- to_c4iw_ucontext(ib_qp->uobject->context) : NULL;
- destroy_qp(&rhp->rdev, &qhp->wq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
-
c4iw_qp_rem_ref(ib_qp);
- PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
+ wait_for_completion(&qhp->qp_rel_comp);
+
+ pr_debug("ib_qp %p qpid 0x%0x\n", ib_qp, qhp->wq.sq.qid);
+ pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
+
+ destroy_qp(&rhp->rdev, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
+
+ c4iw_put_wr_wait(qhp->wr_waitp);
return 0;
}
-struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
- struct ib_udata *udata)
+int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
{
+ struct ib_pd *pd = qp->pd;
struct c4iw_dev *rhp;
- struct c4iw_qp *qhp;
+ struct c4iw_qp *qhp = to_c4iw_qp(qp);
struct c4iw_pd *php;
struct c4iw_cq *schp;
struct c4iw_cq *rchp;
struct c4iw_create_qp_resp uresp;
- unsigned int sqsize, rqsize;
- struct c4iw_ucontext *ucontext;
+ unsigned int sqsize, rqsize = 0;
+ struct c4iw_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct c4iw_ucontext, ibucontext);
int ret;
struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
- PDBG("%s ib_pd %p\n", __func__, pd);
-
- if (attrs->qp_type != IB_QPT_RC)
- return ERR_PTR(-EINVAL);
+ if (attrs->qp_type != IB_QPT_RC || attrs->create_flags)
+ return -EOPNOTSUPP;
php = to_c4iw_pd(pd);
rhp = php->rhp;
schp = get_chp(rhp, ((struct c4iw_cq *)attrs->send_cq)->cq.cqid);
rchp = get_chp(rhp, ((struct c4iw_cq *)attrs->recv_cq)->cq.cqid);
if (!schp || !rchp)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
- if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
- return ERR_PTR(-E2BIG);
- rqsize = attrs->cap.max_recv_wr + 1;
- if (rqsize < 8)
- rqsize = 8;
+ if (!attrs->srq) {
+ if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
+ return -E2BIG;
+ rqsize = attrs->cap.max_recv_wr + 1;
+ if (rqsize < 8)
+ rqsize = 8;
+ }
if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size)
- return ERR_PTR(-E2BIG);
+ return -E2BIG;
sqsize = attrs->cap.max_send_wr + 1;
if (sqsize < 8)
sqsize = 8;
- ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
+ qhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
+ if (!qhp->wr_waitp)
+ return -ENOMEM;
- qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
- if (!qhp)
- return ERR_PTR(-ENOMEM);
qhp->wq.sq.size = sqsize;
qhp->wq.sq.memsize =
(sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64);
qhp->wq.sq.flush_cidx = -1;
- qhp->wq.rq.size = rqsize;
- qhp->wq.rq.memsize =
- (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
- sizeof(*qhp->wq.rq.queue);
+ if (!attrs->srq) {
+ qhp->wq.rq.size = rqsize;
+ qhp->wq.rq.memsize =
+ (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
+ sizeof(*qhp->wq.rq.queue);
+ }
if (ucontext) {
qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE);
- qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE);
+ if (!attrs->srq)
+ qhp->wq.rq.memsize =
+ roundup(qhp->wq.rq.memsize, PAGE_SIZE);
}
ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
+ qhp->wr_waitp, !attrs->srq);
if (ret)
- goto err1;
+ goto err_free_wr_wait;
attrs->cap.max_recv_wr = rqsize - 1;
attrs->cap.max_send_wr = sqsize - 1;
@@ -1750,10 +2196,12 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid;
qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid;
qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
- qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
- qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+ if (!attrs->srq) {
+ qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
+ qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+ }
qhp->attr.state = C4IW_QP_STATE_IDLE;
qhp->attr.next_state = C4IW_QP_STATE_IDLE;
qhp->attr.enable_rdma_read = 1;
@@ -1763,125 +2211,173 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
qhp->attr.max_ird = 0;
qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
spin_lock_init(&qhp->lock);
- init_completion(&qhp->sq_drained);
- init_completion(&qhp->rq_drained);
mutex_init(&qhp->mutex);
init_waitqueue_head(&qhp->wait);
- kref_init(&qhp->kref);
+ init_completion(&qhp->qp_rel_comp);
+ refcount_set(&qhp->qp_refcnt, 1);
- ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
+ ret = xa_insert_irq(&rhp->qps, qhp->wq.sq.qid, qhp, GFP_KERNEL);
if (ret)
- goto err2;
+ goto err_destroy_qp;
- if (udata) {
+ if (udata && ucontext) {
sq_key_mm = kmalloc(sizeof(*sq_key_mm), GFP_KERNEL);
if (!sq_key_mm) {
ret = -ENOMEM;
- goto err3;
+ goto err_remove_handle;
}
- rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
- if (!rq_key_mm) {
- ret = -ENOMEM;
- goto err4;
+ if (!attrs->srq) {
+ rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
+ if (!rq_key_mm) {
+ ret = -ENOMEM;
+ goto err_free_sq_key;
+ }
}
sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL);
if (!sq_db_key_mm) {
ret = -ENOMEM;
- goto err5;
+ goto err_free_rq_key;
}
- rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
- if (!rq_db_key_mm) {
- ret = -ENOMEM;
- goto err6;
+ if (!attrs->srq) {
+ rq_db_key_mm =
+ kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
+ if (!rq_db_key_mm) {
+ ret = -ENOMEM;
+ goto err_free_sq_db_key;
+ }
}
+ memset(&uresp, 0, sizeof(uresp));
if (t4_sq_onchip(&qhp->wq.sq)) {
ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm),
GFP_KERNEL);
if (!ma_sync_key_mm) {
ret = -ENOMEM;
- goto err7;
+ goto err_free_rq_db_key;
}
uresp.flags = C4IW_QPF_ONCHIP;
- } else
- uresp.flags = 0;
+ }
+ if (rhp->rdev.lldi.write_w_imm_support)
+ uresp.flags |= C4IW_QPF_WRITE_W_IMM;
uresp.qid_mask = rhp->rdev.qpmask;
uresp.sqid = qhp->wq.sq.qid;
uresp.sq_size = qhp->wq.sq.size;
uresp.sq_memsize = qhp->wq.sq.memsize;
- uresp.rqid = qhp->wq.rq.qid;
- uresp.rq_size = qhp->wq.rq.size;
- uresp.rq_memsize = qhp->wq.rq.memsize;
+ if (!attrs->srq) {
+ uresp.rqid = qhp->wq.rq.qid;
+ uresp.rq_size = qhp->wq.rq.size;
+ uresp.rq_memsize = qhp->wq.rq.memsize;
+ }
spin_lock(&ucontext->mmap_lock);
if (ma_sync_key_mm) {
uresp.ma_sync_key = ucontext->key;
ucontext->key += PAGE_SIZE;
- } else {
- uresp.ma_sync_key = 0;
}
uresp.sq_key = ucontext->key;
ucontext->key += PAGE_SIZE;
- uresp.rq_key = ucontext->key;
- ucontext->key += PAGE_SIZE;
+ if (!attrs->srq) {
+ uresp.rq_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ }
uresp.sq_db_gts_key = ucontext->key;
ucontext->key += PAGE_SIZE;
- uresp.rq_db_gts_key = ucontext->key;
- ucontext->key += PAGE_SIZE;
+ if (!attrs->srq) {
+ uresp.rq_db_gts_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ }
spin_unlock(&ucontext->mmap_lock);
- ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
+ ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (ret)
- goto err8;
+ goto err_free_ma_sync_key;
sq_key_mm->key = uresp.sq_key;
- sq_key_mm->addr = qhp->wq.sq.phys_addr;
+ sq_key_mm->addr = 0;
+ sq_key_mm->vaddr = qhp->wq.sq.queue;
+ sq_key_mm->dma_addr = qhp->wq.sq.dma_addr;
sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
+ insert_flag_to_mmap(&rhp->rdev, sq_key_mm, sq_key_mm->addr);
insert_mmap(ucontext, sq_key_mm);
- rq_key_mm->key = uresp.rq_key;
- rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
- rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
- insert_mmap(ucontext, rq_key_mm);
+ if (!attrs->srq) {
+ rq_key_mm->key = uresp.rq_key;
+ rq_key_mm->addr = 0;
+ rq_key_mm->vaddr = qhp->wq.rq.queue;
+ rq_key_mm->dma_addr = qhp->wq.rq.dma_addr;
+ rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
+ insert_flag_to_mmap(&rhp->rdev, rq_key_mm,
+ rq_key_mm->addr);
+ insert_mmap(ucontext, rq_key_mm);
+ }
sq_db_key_mm->key = uresp.sq_db_gts_key;
sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
+ sq_db_key_mm->vaddr = NULL;
+ sq_db_key_mm->dma_addr = 0;
sq_db_key_mm->len = PAGE_SIZE;
+ insert_flag_to_mmap(&rhp->rdev, sq_db_key_mm,
+ sq_db_key_mm->addr);
insert_mmap(ucontext, sq_db_key_mm);
- rq_db_key_mm->key = uresp.rq_db_gts_key;
- rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa;
- rq_db_key_mm->len = PAGE_SIZE;
- insert_mmap(ucontext, rq_db_key_mm);
+ if (!attrs->srq) {
+ rq_db_key_mm->key = uresp.rq_db_gts_key;
+ rq_db_key_mm->addr =
+ (u64)(unsigned long)qhp->wq.rq.bar2_pa;
+ rq_db_key_mm->len = PAGE_SIZE;
+ rq_db_key_mm->vaddr = NULL;
+ rq_db_key_mm->dma_addr = 0;
+ insert_flag_to_mmap(&rhp->rdev, rq_db_key_mm,
+ rq_db_key_mm->addr);
+ insert_mmap(ucontext, rq_db_key_mm);
+ }
if (ma_sync_key_mm) {
ma_sync_key_mm->key = uresp.ma_sync_key;
ma_sync_key_mm->addr =
(pci_resource_start(rhp->rdev.lldi.pdev, 0) +
PCIE_MA_SYNC_A) & PAGE_MASK;
ma_sync_key_mm->len = PAGE_SIZE;
+ ma_sync_key_mm->vaddr = NULL;
+ ma_sync_key_mm->dma_addr = 0;
+ insert_flag_to_mmap(&rhp->rdev, ma_sync_key_mm,
+ ma_sync_key_mm->addr);
insert_mmap(ucontext, ma_sync_key_mm);
}
+
+ qhp->ucontext = ucontext;
+ }
+ if (!attrs->srq) {
+ qhp->wq.qp_errp =
+ &qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err;
+ } else {
+ qhp->wq.qp_errp =
+ &qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err;
+ qhp->wq.srqidxp =
+ &qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx;
}
+
qhp->ibqp.qp_num = qhp->wq.sq.qid;
- init_timer(&(qhp->timer));
+ if (attrs->srq)
+ qhp->srq = to_c4iw_srq(attrs->srq);
INIT_LIST_HEAD(&qhp->db_fc_entry);
- PDBG("%s sq id %u size %u memsize %zu num_entries %u "
- "rq id %u size %u memsize %zu num_entries %u\n", __func__,
- qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
- attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size,
- qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
- return &qhp->ibqp;
-err8:
+ pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n",
+ qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
+ attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size,
+ qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
+ return 0;
+err_free_ma_sync_key:
kfree(ma_sync_key_mm);
-err7:
- kfree(rq_db_key_mm);
-err6:
+err_free_rq_db_key:
+ if (!attrs->srq)
+ kfree(rq_db_key_mm);
+err_free_sq_db_key:
kfree(sq_db_key_mm);
-err5:
- kfree(rq_key_mm);
-err4:
+err_free_rq_key:
+ if (!attrs->srq)
+ kfree(rq_key_mm);
+err_free_sq_key:
kfree(sq_key_mm);
-err3:
- remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
-err2:
+err_remove_handle:
+ xa_erase_irq(&rhp->qps, qhp->wq.sq.qid);
+err_destroy_qp:
destroy_qp(&rhp->rdev, &qhp->wq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
-err1:
- kfree(qhp);
- return ERR_PTR(ret);
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq);
+err_free_wr_wait:
+ c4iw_put_wr_wait(qhp->wr_waitp);
+ return ret;
}
int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -1890,9 +2386,12 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct c4iw_dev *rhp;
struct c4iw_qp *qhp;
enum c4iw_qp_attr_mask mask = 0;
- struct c4iw_qp_attributes attrs;
+ struct c4iw_qp_attributes attrs = {};
+
+ pr_debug("ib_qp %p\n", ibqp);
- PDBG("%s ib_qp %p\n", __func__, ibqp);
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
/* iwarp does not support the RTR state */
if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
@@ -1902,7 +2401,6 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (!attr_mask)
return 0;
- memset(&attrs, 0, sizeof attrs);
qhp = to_c4iw_qp(ibqp);
rhp = qhp->rhp;
@@ -1938,60 +2436,412 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
{
- PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
+ pr_debug("ib_dev %p qpn 0x%x\n", dev, qpn);
return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
}
+void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq)
+{
+ struct ib_event event = {};
+
+ event.device = &srq->rhp->ibdev;
+ event.element.srq = &srq->ibsrq;
+ event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ ib_dispatch_event(&event);
+}
+
+int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata)
+{
+ struct c4iw_srq *srq = to_c4iw_srq(ib_srq);
+ int ret = 0;
+
+ /*
+ * XXX 0 mask == a SW interrupt for srq_limit reached...
+ */
+ if (udata && !srq_attr_mask) {
+ c4iw_dispatch_srq_limit_reached_event(srq);
+ goto out;
+ }
+
+ /* no support for this yet */
+ if (srq_attr_mask & IB_SRQ_MAX_WR) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!udata && (srq_attr_mask & IB_SRQ_LIMIT)) {
+ srq->armed = true;
+ srq->srq_limit = attr->srq_limit;
+ }
+out:
+ return ret;
+}
+
int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_qp_init_attr *init_attr)
{
struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
- memset(attr, 0, sizeof *attr);
- memset(init_attr, 0, sizeof *init_attr);
+ memset(attr, 0, sizeof(*attr));
+ memset(init_attr, 0, sizeof(*init_attr));
attr->qp_state = to_ib_qp_state(qhp->attr.state);
+ attr->cur_qp_state = to_ib_qp_state(qhp->attr.state);
init_attr->cap.max_send_wr = qhp->attr.sq_num_entries;
init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries;
init_attr->cap.max_send_sge = qhp->attr.sq_max_sges;
- init_attr->cap.max_recv_sge = qhp->attr.sq_max_sges;
+ init_attr->cap.max_recv_sge = qhp->attr.rq_max_sges;
init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE;
- init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
+ init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
return 0;
}
-static void move_qp_to_err(struct c4iw_qp *qp)
+static void free_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
+ struct c4iw_wr_wait *wr_waitp)
{
- struct c4iw_qp_attributes attrs = { .next_state = C4IW_QP_STATE_ERROR };
+ struct c4iw_rdev *rdev = &srq->rhp->rdev;
+ struct sk_buff *skb = srq->destroy_skb;
+ struct t4_srq *wq = &srq->wq;
+ struct fw_ri_res_wr *res_wr;
+ struct fw_ri_res *res;
+ int wr_len;
+
+ wr_len = sizeof(*res_wr) + sizeof(*res);
+ set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
- (void)c4iw_modify_qp(qp->rhp, qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+ memset(res_wr, 0, wr_len);
+ res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
+ FW_RI_RES_WR_NRES_V(1) |
+ FW_WR_COMPL_F);
+ res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+ res_wr->cookie = (uintptr_t)wr_waitp;
+ res = res_wr->res;
+ res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
+ res->u.srq.op = FW_RI_RES_OP_RESET;
+ res->u.srq.srqid = cpu_to_be32(srq->idx);
+ res->u.srq.eqid = cpu_to_be32(wq->qid);
+
+ c4iw_init_wr_wait(wr_waitp);
+ c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
+
+ dma_free_coherent(&rdev->lldi.pdev->dev,
+ wq->memsize, wq->queue,
+ dma_unmap_addr(wq, mapping));
+ c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
+ kfree(wq->sw_rq);
+ c4iw_put_qpid(rdev, wq->qid, uctx);
}
-void c4iw_drain_sq(struct ib_qp *ibqp)
+static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
+ struct c4iw_wr_wait *wr_waitp)
{
- struct c4iw_qp *qp = to_c4iw_qp(ibqp);
- unsigned long flag;
- bool need_to_wait;
+ struct c4iw_rdev *rdev = &srq->rhp->rdev;
+ int user = (uctx != &rdev->uctx);
+ struct t4_srq *wq = &srq->wq;
+ struct fw_ri_res_wr *res_wr;
+ struct fw_ri_res *res;
+ struct sk_buff *skb;
+ int wr_len;
+ int eqsize;
+ int ret = -ENOMEM;
+
+ wq->qid = c4iw_get_qpid(rdev, uctx);
+ if (!wq->qid)
+ goto err;
+
+ if (!user) {
+ wq->sw_rq = kcalloc(wq->size, sizeof(*wq->sw_rq),
+ GFP_KERNEL);
+ if (!wq->sw_rq)
+ goto err_put_qpid;
+ wq->pending_wrs = kcalloc(srq->wq.size,
+ sizeof(*srq->wq.pending_wrs),
+ GFP_KERNEL);
+ if (!wq->pending_wrs)
+ goto err_free_sw_rq;
+ }
+
+ wq->rqt_size = wq->size;
+ wq->rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rqt_size);
+ if (!wq->rqt_hwaddr)
+ goto err_free_pending_wrs;
+ wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >>
+ T4_RQT_ENTRY_SHIFT;
+
+ wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, wq->memsize,
+ &wq->dma_addr, GFP_KERNEL);
+ if (!wq->queue)
+ goto err_free_rqtpool;
- move_qp_to_err(qp);
- spin_lock_irqsave(&qp->lock, flag);
- need_to_wait = !t4_sq_empty(&qp->wq);
- spin_unlock_irqrestore(&qp->lock, flag);
+ dma_unmap_addr_set(wq, mapping, wq->dma_addr);
- if (need_to_wait)
- wait_for_completion(&qp->sq_drained);
+ wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS,
+ &wq->bar2_qid,
+ user ? &wq->bar2_pa : NULL);
+
+ /*
+ * User mode must have bar2 access.
+ */
+
+ if (user && !wq->bar2_va) {
+ pr_warn(MOD "%s: srqid %u not in BAR2 range.\n",
+ pci_name(rdev->lldi.pdev), wq->qid);
+ ret = -EINVAL;
+ goto err_free_queue;
+ }
+
+ /* build fw_ri_res_wr */
+ wr_len = sizeof(*res_wr) + sizeof(*res);
+
+ skb = alloc_skb(wr_len, GFP_KERNEL);
+ if (!skb)
+ goto err_free_queue;
+ set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
+
+ res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+ memset(res_wr, 0, wr_len);
+ res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
+ FW_RI_RES_WR_NRES_V(1) |
+ FW_WR_COMPL_F);
+ res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+ res_wr->cookie = (uintptr_t)wr_waitp;
+ res = res_wr->res;
+ res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
+ res->u.srq.op = FW_RI_RES_OP_WRITE;
+
+ /*
+ * eqsize is the number of 64B entries plus the status page size.
+ */
+ eqsize = wq->size * T4_RQ_NUM_SLOTS +
+ rdev->hw_queue.t4_eq_status_entries;
+ res->u.srq.eqid = cpu_to_be32(wq->qid);
+ res->u.srq.fetchszm_to_iqid =
+ /* no host cidx updates */
+ cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
+ FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */
+ FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */
+ FW_RI_RES_WR_FETCHRO_V(0)); /* relaxed_ordering */
+ res->u.srq.dcaen_to_eqsize =
+ cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
+ FW_RI_RES_WR_DCACPU_V(0) |
+ FW_RI_RES_WR_FBMIN_V(2) |
+ FW_RI_RES_WR_FBMAX_V(3) |
+ FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
+ FW_RI_RES_WR_CIDXFTHRESH_V(0) |
+ FW_RI_RES_WR_EQSIZE_V(eqsize));
+ res->u.srq.eqaddr = cpu_to_be64(wq->dma_addr);
+ res->u.srq.srqid = cpu_to_be32(srq->idx);
+ res->u.srq.pdid = cpu_to_be32(srq->pdid);
+ res->u.srq.hwsrqsize = cpu_to_be32(wq->rqt_size);
+ res->u.srq.hwsrqaddr = cpu_to_be32(wq->rqt_hwaddr -
+ rdev->lldi.vr->rq.start);
+
+ c4iw_init_wr_wait(wr_waitp);
+
+ ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->qid, __func__);
+ if (ret)
+ goto err_free_queue;
+
+ pr_debug("%s srq %u eqid %u pdid %u queue va %p pa 0x%llx\n"
+ " bar2_addr %p rqt addr 0x%x size %d\n",
+ __func__, srq->idx, wq->qid, srq->pdid, wq->queue,
+ (u64)virt_to_phys(wq->queue), wq->bar2_va,
+ wq->rqt_hwaddr, wq->rqt_size);
+
+ return 0;
+err_free_queue:
+ dma_free_coherent(&rdev->lldi.pdev->dev,
+ wq->memsize, wq->queue,
+ dma_unmap_addr(wq, mapping));
+err_free_rqtpool:
+ c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
+err_free_pending_wrs:
+ if (!user)
+ kfree(wq->pending_wrs);
+err_free_sw_rq:
+ if (!user)
+ kfree(wq->sw_rq);
+err_put_qpid:
+ c4iw_put_qpid(rdev, wq->qid, uctx);
+err:
+ return ret;
}
-void c4iw_drain_rq(struct ib_qp *ibqp)
+void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16)
{
- struct c4iw_qp *qp = to_c4iw_qp(ibqp);
- unsigned long flag;
- bool need_to_wait;
+ u64 *src, *dst;
+
+ src = (u64 *)wqe;
+ dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE);
+ while (len16) {
+ *dst++ = *src++;
+ if (dst >= (u64 *)&srq->queue[srq->size])
+ dst = (u64 *)srq->queue;
+ *dst++ = *src++;
+ if (dst >= (u64 *)&srq->queue[srq->size])
+ dst = (u64 *)srq->queue;
+ len16--;
+ }
+}
+
+int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
+ struct ib_udata *udata)
+{
+ struct ib_pd *pd = ib_srq->pd;
+ struct c4iw_dev *rhp;
+ struct c4iw_srq *srq = to_c4iw_srq(ib_srq);
+ struct c4iw_pd *php;
+ struct c4iw_create_srq_resp uresp;
+ struct c4iw_ucontext *ucontext;
+ struct c4iw_mm_entry *srq_key_mm, *srq_db_key_mm;
+ int rqsize;
+ int ret;
+ int wr_len;
+
+ if (attrs->srq_type != IB_SRQT_BASIC)
+ return -EOPNOTSUPP;
+
+ pr_debug("%s ib_pd %p\n", __func__, pd);
+
+ php = to_c4iw_pd(pd);
+ rhp = php->rhp;
+
+ if (!rhp->rdev.lldi.vr->srq.size)
+ return -EINVAL;
+ if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size)
+ return -E2BIG;
+ if (attrs->attr.max_sge > T4_MAX_RECV_SGE)
+ return -E2BIG;
+
+ /*
+ * SRQ RQT and RQ must be a power of 2 and at least 16 deep.
+ */
+ rqsize = attrs->attr.max_wr + 1;
+ rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16));
+
+ ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
+ ibucontext);
+
+ srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
+ if (!srq->wr_waitp)
+ return -ENOMEM;
+
+ srq->idx = c4iw_alloc_srq_idx(&rhp->rdev);
+ if (srq->idx < 0) {
+ ret = -ENOMEM;
+ goto err_free_wr_wait;
+ }
+
+ wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
+ srq->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
+ if (!srq->destroy_skb) {
+ ret = -ENOMEM;
+ goto err_free_srq_idx;
+ }
- move_qp_to_err(qp);
- spin_lock_irqsave(&qp->lock, flag);
- need_to_wait = !t4_rq_empty(&qp->wq);
- spin_unlock_irqrestore(&qp->lock, flag);
+ srq->rhp = rhp;
+ srq->pdid = php->pdid;
- if (need_to_wait)
- wait_for_completion(&qp->rq_drained);
+ srq->wq.size = rqsize;
+ srq->wq.memsize =
+ (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
+ sizeof(*srq->wq.queue);
+ if (ucontext)
+ srq->wq.memsize = roundup(srq->wq.memsize, PAGE_SIZE);
+
+ ret = alloc_srq_queue(srq, ucontext ? &ucontext->uctx :
+ &rhp->rdev.uctx, srq->wr_waitp);
+ if (ret)
+ goto err_free_skb;
+ attrs->attr.max_wr = rqsize - 1;
+
+ if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
+ srq->flags = T4_SRQ_LIMIT_SUPPORT;
+
+ if (udata) {
+ srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
+ if (!srq_key_mm) {
+ ret = -ENOMEM;
+ goto err_free_queue;
+ }
+ srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
+ if (!srq_db_key_mm) {
+ ret = -ENOMEM;
+ goto err_free_srq_key_mm;
+ }
+ memset(&uresp, 0, sizeof(uresp));
+ uresp.flags = srq->flags;
+ uresp.qid_mask = rhp->rdev.qpmask;
+ uresp.srqid = srq->wq.qid;
+ uresp.srq_size = srq->wq.size;
+ uresp.srq_memsize = srq->wq.memsize;
+ uresp.rqt_abs_idx = srq->wq.rqt_abs_idx;
+ spin_lock(&ucontext->mmap_lock);
+ uresp.srq_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ uresp.srq_db_gts_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ spin_unlock(&ucontext->mmap_lock);
+ ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (ret)
+ goto err_free_srq_db_key_mm;
+ srq_key_mm->key = uresp.srq_key;
+ srq_key_mm->addr = 0;
+ srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
+ srq_key_mm->vaddr = srq->wq.queue;
+ srq_key_mm->dma_addr = srq->wq.dma_addr;
+ insert_flag_to_mmap(&rhp->rdev, srq_key_mm, srq_key_mm->addr);
+ insert_mmap(ucontext, srq_key_mm);
+ srq_db_key_mm->key = uresp.srq_db_gts_key;
+ srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
+ srq_db_key_mm->len = PAGE_SIZE;
+ srq_db_key_mm->vaddr = NULL;
+ srq_db_key_mm->dma_addr = 0;
+ insert_flag_to_mmap(&rhp->rdev, srq_db_key_mm,
+ srq_db_key_mm->addr);
+ insert_mmap(ucontext, srq_db_key_mm);
+ }
+
+ pr_debug("%s srq qid %u idx %u size %u memsize %lu num_entries %u\n",
+ __func__, srq->wq.qid, srq->idx, srq->wq.size,
+ (unsigned long)srq->wq.memsize, attrs->attr.max_wr);
+
+ spin_lock_init(&srq->lock);
+ return 0;
+
+err_free_srq_db_key_mm:
+ kfree(srq_db_key_mm);
+err_free_srq_key_mm:
+ kfree(srq_key_mm);
+err_free_queue:
+ free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
+ srq->wr_waitp);
+err_free_skb:
+ kfree_skb(srq->destroy_skb);
+err_free_srq_idx:
+ c4iw_free_srq_idx(&rhp->rdev, srq->idx);
+err_free_wr_wait:
+ c4iw_put_wr_wait(srq->wr_waitp);
+ return ret;
+}
+
+int c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_srq *srq;
+ struct c4iw_ucontext *ucontext;
+
+ srq = to_c4iw_srq(ibsrq);
+ rhp = srq->rhp;
+
+ pr_debug("%s id %d\n", __func__, srq->wq.qid);
+ ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
+ ibucontext);
+ free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
+ srq->wr_waitp);
+ c4iw_free_srq_idx(&rhp->rdev, srq->idx);
+ c4iw_put_wr_wait(srq->wr_waitp);
+ return 0;
}
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
index 67df71a7012e..e800e8e8bed5 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -53,7 +53,8 @@ static int c4iw_init_qid_table(struct c4iw_rdev *rdev)
}
/* nr_* must be power of 2 */
-int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid)
+int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt,
+ u32 nr_pdid, u32 nr_srqt)
{
int err = 0;
err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1,
@@ -67,7 +68,17 @@ int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid)
nr_pdid, 1, 0);
if (err)
goto pdid_err;
+ if (!nr_srqt)
+ err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0,
+ 1, 1, 0);
+ else
+ err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0,
+ nr_srqt, 0, 0);
+ if (err)
+ goto srq_err;
return 0;
+ srq_err:
+ c4iw_id_table_free(&rdev->resource.pdid_table);
pdid_err:
c4iw_id_table_free(&rdev->resource.qid_table);
qid_err:
@@ -90,7 +101,7 @@ u32 c4iw_get_resource(struct c4iw_id_table *id_table)
void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry)
{
- PDBG("%s entry 0x%x\n", __func__, entry);
+ pr_debug("entry 0x%x\n", entry);
c4iw_id_free(id_table, entry);
}
@@ -115,7 +126,7 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
rdev->stats.qid.cur += rdev->qpmask + 1;
mutex_unlock(&rdev->stats.lock);
for (i = qid+1; i & rdev->qpmask; i++) {
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = i;
@@ -126,13 +137,13 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
* now put the same ids on the qp list since they all
* map to the same db/gts page.
*/
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = qid;
list_add_tail(&entry->entry, &uctx->qpids);
for (i = qid+1; i & rdev->qpmask; i++) {
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = i;
@@ -141,7 +152,7 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
}
out:
mutex_unlock(&uctx->lock);
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("qid 0x%x\n", qid);
mutex_lock(&rdev->stats.lock);
if (rdev->stats.qid.cur > rdev->stats.qid.max)
rdev->stats.qid.max = rdev->stats.qid.cur;
@@ -154,10 +165,10 @@ void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid,
{
struct c4iw_qid_list *entry;
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return;
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("qid 0x%x\n", qid);
entry->qid = qid;
mutex_lock(&uctx->lock);
list_add_tail(&entry->entry, &uctx->cqids);
@@ -189,7 +200,7 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
rdev->stats.qid.cur += rdev->qpmask + 1;
mutex_unlock(&rdev->stats.lock);
for (i = qid+1; i & rdev->qpmask; i++) {
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = i;
@@ -200,13 +211,13 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
* now put the same ids on the cq list since they all
* map to the same db/gts page.
*/
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = qid;
list_add_tail(&entry->entry, &uctx->cqids);
- for (i = qid; i & rdev->qpmask; i++) {
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ for (i = qid + 1; i & rdev->qpmask; i++) {
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
goto out;
entry->qid = i;
@@ -215,7 +226,7 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
}
out:
mutex_unlock(&uctx->lock);
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("qid 0x%x\n", qid);
mutex_lock(&rdev->stats.lock);
if (rdev->stats.qid.cur > rdev->stats.qid.max)
rdev->stats.qid.max = rdev->stats.qid.cur;
@@ -228,10 +239,10 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid,
{
struct c4iw_qid_list *entry;
- entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return;
- PDBG("%s qid 0x%x\n", __func__, qid);
+ pr_debug("qid 0x%x\n", qid);
entry->qid = qid;
mutex_lock(&uctx->lock);
list_add_tail(&entry->entry, &uctx->qpids);
@@ -254,25 +265,36 @@ void c4iw_destroy_resource(struct c4iw_resource *rscp)
u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size)
{
unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
+ pr_debug("addr 0x%x size %d\n", (u32)addr, size);
mutex_lock(&rdev->stats.lock);
if (addr) {
rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT);
if (rdev->stats.pbl.cur > rdev->stats.pbl.max)
rdev->stats.pbl.max = rdev->stats.pbl.cur;
+ kref_get(&rdev->pbl_kref);
} else
rdev->stats.pbl.fail++;
mutex_unlock(&rdev->stats.lock);
return (u32)addr;
}
+static void destroy_pblpool(struct kref *kref)
+{
+ struct c4iw_rdev *rdev;
+
+ rdev = container_of(kref, struct c4iw_rdev, pbl_kref);
+ gen_pool_destroy(rdev->pbl_pool);
+ complete(&rdev->pbl_compl);
+}
+
void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+ pr_debug("addr 0x%x size %d\n", addr, size);
mutex_lock(&rdev->stats.lock);
rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT);
mutex_unlock(&rdev->stats.lock);
gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size);
+ kref_put(&rdev->pbl_kref, destroy_pblpool);
}
int c4iw_pblpool_create(struct c4iw_rdev *rdev)
@@ -290,19 +312,17 @@ int c4iw_pblpool_create(struct c4iw_rdev *rdev)
while (pbl_start < pbl_top) {
pbl_chunk = min(pbl_top - pbl_start + 1, pbl_chunk);
if (gen_pool_add(rdev->pbl_pool, pbl_start, pbl_chunk, -1)) {
- PDBG("%s failed to add PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
+ pr_debug("failed to add PBL chunk (%x/%x)\n",
+ pbl_start, pbl_chunk);
if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
- printk(KERN_WARNING MOD
- "Failed to add all PBL chunks (%x/%x)\n",
- pbl_start,
- pbl_top - pbl_start);
+ pr_warn("Failed to add all PBL chunks (%x/%x)\n",
+ pbl_start, pbl_top - pbl_start);
return 0;
}
pbl_chunk >>= 1;
} else {
- PDBG("%s added PBL chunk (%x/%x)\n",
- __func__, pbl_start, pbl_chunk);
+ pr_debug("added PBL chunk (%x/%x)\n",
+ pbl_start, pbl_chunk);
pbl_start += pbl_chunk;
}
}
@@ -312,7 +332,7 @@ int c4iw_pblpool_create(struct c4iw_rdev *rdev)
void c4iw_pblpool_destroy(struct c4iw_rdev *rdev)
{
- gen_pool_destroy(rdev->pbl_pool);
+ kref_put(&rdev->pbl_kref, destroy_pblpool);
}
/*
@@ -324,57 +344,75 @@ void c4iw_pblpool_destroy(struct c4iw_rdev *rdev)
u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
{
unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6);
+ pr_debug("addr 0x%x size %d\n", (u32)addr, size << 6);
if (!addr)
- pr_warn_ratelimited(MOD "%s: Out of RQT memory\n",
+ pr_warn_ratelimited("%s: Out of RQT memory\n",
pci_name(rdev->lldi.pdev));
mutex_lock(&rdev->stats.lock);
if (addr) {
rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT);
if (rdev->stats.rqt.cur > rdev->stats.rqt.max)
rdev->stats.rqt.max = rdev->stats.rqt.cur;
+ kref_get(&rdev->rqt_kref);
} else
rdev->stats.rqt.fail++;
mutex_unlock(&rdev->stats.lock);
return (u32)addr;
}
+static void destroy_rqtpool(struct kref *kref)
+{
+ struct c4iw_rdev *rdev;
+
+ rdev = container_of(kref, struct c4iw_rdev, rqt_kref);
+ gen_pool_destroy(rdev->rqt_pool);
+ complete(&rdev->rqt_compl);
+}
+
void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6);
+ pr_debug("addr 0x%x size %d\n", addr, size << 6);
mutex_lock(&rdev->stats.lock);
rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT);
mutex_unlock(&rdev->stats.lock);
gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6);
+ kref_put(&rdev->rqt_kref, destroy_rqtpool);
}
int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
{
unsigned rqt_start, rqt_chunk, rqt_top;
+ int skip = 0;
rdev->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1);
if (!rdev->rqt_pool)
return -ENOMEM;
- rqt_start = rdev->lldi.vr->rq.start;
- rqt_chunk = rdev->lldi.vr->rq.size;
+ /*
+ * If SRQs are supported, then never use the first RQE from
+ * the RQT region. This is because HW uses RQT index 0 as NULL.
+ */
+ if (rdev->lldi.vr->srq.size)
+ skip = T4_RQT_ENTRY_SIZE;
+
+ rqt_start = rdev->lldi.vr->rq.start + skip;
+ rqt_chunk = rdev->lldi.vr->rq.size - skip;
rqt_top = rqt_start + rqt_chunk;
while (rqt_start < rqt_top) {
rqt_chunk = min(rqt_top - rqt_start + 1, rqt_chunk);
if (gen_pool_add(rdev->rqt_pool, rqt_start, rqt_chunk, -1)) {
- PDBG("%s failed to add RQT chunk (%x/%x)\n",
- __func__, rqt_start, rqt_chunk);
+ pr_debug("failed to add RQT chunk (%x/%x)\n",
+ rqt_start, rqt_chunk);
if (rqt_chunk <= 1024 << MIN_RQT_SHIFT) {
- printk(KERN_WARNING MOD
- "Failed to add all RQT chunks (%x/%x)\n",
- rqt_start, rqt_top - rqt_start);
+ pr_warn("Failed to add all RQT chunks (%x/%x)\n",
+ rqt_start, rqt_top - rqt_start);
return 0;
}
rqt_chunk >>= 1;
} else {
- PDBG("%s added RQT chunk (%x/%x)\n",
- __func__, rqt_start, rqt_chunk);
+ pr_debug("added RQT chunk (%x/%x)\n",
+ rqt_start, rqt_chunk);
rqt_start += rqt_chunk;
}
}
@@ -383,7 +421,33 @@ int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev)
{
- gen_pool_destroy(rdev->rqt_pool);
+ kref_put(&rdev->rqt_kref, destroy_rqtpool);
+}
+
+int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev)
+{
+ int idx;
+
+ idx = c4iw_id_alloc(&rdev->resource.srq_table);
+ mutex_lock(&rdev->stats.lock);
+ if (idx == -1) {
+ rdev->stats.srqt.fail++;
+ mutex_unlock(&rdev->stats.lock);
+ return -ENOMEM;
+ }
+ rdev->stats.srqt.cur++;
+ if (rdev->stats.srqt.cur > rdev->stats.srqt.max)
+ rdev->stats.srqt.max = rdev->stats.srqt.cur;
+ mutex_unlock(&rdev->stats.lock);
+ return idx;
+}
+
+void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx)
+{
+ c4iw_id_free(&rdev->resource.srq_table, idx);
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.srqt.cur--;
+ mutex_unlock(&rdev->stats.lock);
}
/*
@@ -394,7 +458,7 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev)
u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size)
{
unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size);
- PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
+ pr_debug("addr 0x%x size %d\n", (u32)addr, size);
if (addr) {
mutex_lock(&rdev->stats.lock);
rdev->stats.ocqp.cur += roundup(size, 1 << MIN_OCQP_SHIFT);
@@ -407,7 +471,7 @@ u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size)
void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size)
{
- PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
+ pr_debug("addr 0x%x size %d\n", addr, size);
mutex_lock(&rdev->stats.lock);
rdev->stats.ocqp.cur -= roundup(size, 1 << MIN_OCQP_SHIFT);
mutex_unlock(&rdev->stats.lock);
@@ -429,18 +493,17 @@ int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev)
while (start < top) {
chunk = min(top - start + 1, chunk);
if (gen_pool_add(rdev->ocqp_pool, start, chunk, -1)) {
- PDBG("%s failed to add OCQP chunk (%x/%x)\n",
- __func__, start, chunk);
+ pr_debug("failed to add OCQP chunk (%x/%x)\n",
+ start, chunk);
if (chunk <= 1024 << MIN_OCQP_SHIFT) {
- printk(KERN_WARNING MOD
- "Failed to add all OCQP chunks (%x/%x)\n",
- start, top - start);
+ pr_warn("Failed to add all OCQP chunks (%x/%x)\n",
+ start, top - start);
return 0;
}
chunk >>= 1;
} else {
- PDBG("%s added OCQP chunk (%x/%x)\n",
- __func__, start, chunk);
+ pr_debug("added OCQP chunk (%x/%x)\n",
+ start, chunk);
start += chunk;
}
}
diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c
new file mode 100644
index 000000000000..fd22c85d35f4
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/restrack.c
@@ -0,0 +1,487 @@
+/*
+ * Copyright (c) 2018 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/rdma_cm.h>
+
+#include "iw_cxgb4.h"
+#include <rdma/restrack.h>
+#include <uapi/rdma/rdma_netlink.h>
+
+static int fill_sq(struct sk_buff *msg, struct t4_wq *wq)
+{
+ /* WQ+SQ */
+ if (rdma_nl_put_driver_u32(msg, "sqid", wq->sq.qid))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "flushed", wq->flushed))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "memsize", wq->sq.memsize))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "cidx", wq->sq.cidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "pidx", wq->sq.pidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "wq_pidx", wq->sq.wq_pidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "flush_cidx", wq->sq.flush_cidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "in_use", wq->sq.in_use))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "size", wq->sq.size))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "flags", wq->sq.flags))
+ goto err;
+ return 0;
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_rq(struct sk_buff *msg, struct t4_wq *wq)
+{
+ /* RQ */
+ if (rdma_nl_put_driver_u32(msg, "rqid", wq->rq.qid))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "memsize", wq->rq.memsize))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "cidx", wq->rq.cidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "pidx", wq->rq.pidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "wq_pidx", wq->rq.wq_pidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "msn", wq->rq.msn))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "rqt_hwaddr", wq->rq.rqt_hwaddr))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rqt_size", wq->rq.rqt_size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "in_use", wq->rq.in_use))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "size", wq->rq.size))
+ goto err;
+ return 0;
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_swsqe(struct sk_buff *msg, struct t4_sq *sq, u16 idx,
+ struct t4_swsqe *sqe)
+{
+ if (rdma_nl_put_driver_u32(msg, "idx", idx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "opcode", sqe->opcode))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "complete", sqe->complete))
+ goto err;
+ if (sqe->complete &&
+ rdma_nl_put_driver_u32(msg, "cqe_status", CQE_STATUS(&sqe->cqe)))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "signaled", sqe->signaled))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "flushed", sqe->flushed))
+ goto err;
+ return 0;
+err:
+ return -EMSGSIZE;
+}
+
+/*
+ * Dump the first and last pending sqes.
+ */
+static int fill_swsqes(struct sk_buff *msg, struct t4_sq *sq,
+ u16 first_idx, struct t4_swsqe *first_sqe,
+ u16 last_idx, struct t4_swsqe *last_sqe)
+{
+ if (!first_sqe)
+ goto out;
+ if (fill_swsqe(msg, sq, first_idx, first_sqe))
+ goto err;
+ if (!last_sqe)
+ goto out;
+ if (fill_swsqe(msg, sq, last_idx, last_sqe))
+ goto err;
+out:
+ return 0;
+err:
+ return -EMSGSIZE;
+}
+
+int c4iw_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp)
+{
+ struct t4_swsqe *fsp = NULL, *lsp = NULL;
+ struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
+ u16 first_sq_idx = 0, last_sq_idx = 0;
+ struct t4_swsqe first_sqe, last_sqe;
+ struct nlattr *table_attr;
+ struct t4_wq wq;
+
+ /* User qp state is not available, so don't dump user qps */
+ if (qhp->ucontext)
+ return 0;
+
+ table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ goto err;
+
+ /* Get a consistent snapshot */
+ spin_lock_irq(&qhp->lock);
+ wq = qhp->wq;
+
+ /* If there are any pending sqes, copy the first and last */
+ if (wq.sq.cidx != wq.sq.pidx) {
+ first_sq_idx = wq.sq.cidx;
+ first_sqe = qhp->wq.sq.sw_sq[first_sq_idx];
+ fsp = &first_sqe;
+ last_sq_idx = wq.sq.pidx;
+ if (last_sq_idx-- == 0)
+ last_sq_idx = wq.sq.size - 1;
+ if (last_sq_idx != first_sq_idx) {
+ last_sqe = qhp->wq.sq.sw_sq[last_sq_idx];
+ lsp = &last_sqe;
+ }
+ }
+ spin_unlock_irq(&qhp->lock);
+
+ if (fill_sq(msg, &wq))
+ goto err_cancel_table;
+
+ if (fill_swsqes(msg, &wq.sq, first_sq_idx, fsp, last_sq_idx, lsp))
+ goto err_cancel_table;
+
+ if (fill_rq(msg, &wq))
+ goto err_cancel_table;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err_cancel_table:
+ nla_nest_cancel(msg, table_attr);
+err:
+ return -EMSGSIZE;
+}
+
+union union_ep {
+ struct c4iw_listen_ep lep;
+ struct c4iw_ep ep;
+};
+
+int c4iw_fill_res_cm_id_entry(struct sk_buff *msg,
+ struct rdma_cm_id *cm_id)
+{
+ struct nlattr *table_attr;
+ struct c4iw_ep_common *epcp;
+ struct c4iw_listen_ep *listen_ep = NULL;
+ struct c4iw_ep *ep = NULL;
+ struct iw_cm_id *iw_cm_id;
+ union union_ep *uep;
+
+ iw_cm_id = rdma_iw_cm_id(cm_id);
+ if (!iw_cm_id)
+ return 0;
+ epcp = (struct c4iw_ep_common *)iw_cm_id->provider_data;
+ if (!epcp)
+ return 0;
+ uep = kzalloc(sizeof(*uep), GFP_KERNEL);
+ if (!uep)
+ return 0;
+
+ table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ goto err_free_uep;
+
+ /* Get a consistent snapshot */
+ mutex_lock(&epcp->mutex);
+ if (epcp->state == LISTEN) {
+ uep->lep = *(struct c4iw_listen_ep *)epcp;
+ mutex_unlock(&epcp->mutex);
+ listen_ep = &uep->lep;
+ epcp = &listen_ep->com;
+ } else {
+ uep->ep = *(struct c4iw_ep *)epcp;
+ mutex_unlock(&epcp->mutex);
+ ep = &uep->ep;
+ epcp = &ep->com;
+ }
+
+ if (rdma_nl_put_driver_u32(msg, "state", epcp->state))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u64_hex(msg, "flags", epcp->flags))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u64_hex(msg, "history", epcp->history))
+ goto err_cancel_table;
+
+ if (listen_ep) {
+ if (rdma_nl_put_driver_u32(msg, "stid", listen_ep->stid))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "backlog", listen_ep->backlog))
+ goto err_cancel_table;
+ } else {
+ if (rdma_nl_put_driver_u32(msg, "hwtid", ep->hwtid))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "ord", ep->ord))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "ird", ep->ird))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "emss", ep->emss))
+ goto err_cancel_table;
+
+ if (!ep->parent_ep && rdma_nl_put_driver_u32(msg, "atid",
+ ep->atid))
+ goto err_cancel_table;
+ }
+ nla_nest_end(msg, table_attr);
+ kfree(uep);
+ return 0;
+
+err_cancel_table:
+ nla_nest_cancel(msg, table_attr);
+err_free_uep:
+ kfree(uep);
+ return -EMSGSIZE;
+}
+
+static int fill_cq(struct sk_buff *msg, struct t4_cq *cq)
+{
+ if (rdma_nl_put_driver_u32(msg, "cqid", cq->cqid))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "memsize", cq->memsize))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "size", cq->size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "cidx", cq->cidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "cidx_inc", cq->cidx_inc))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sw_cidx", cq->sw_cidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sw_pidx", cq->sw_pidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sw_in_use", cq->sw_in_use))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "vector", cq->vector))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "gen", cq->gen))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "error", cq->error))
+ goto err;
+ if (rdma_nl_put_driver_u64_hex(msg, "bits_type_ts",
+ be64_to_cpu(cq->bits_type_ts)))
+ goto err;
+ if (rdma_nl_put_driver_u64_hex(msg, "flags", cq->flags))
+ goto err;
+
+ return 0;
+
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_cqe(struct sk_buff *msg, struct t4_cqe *cqe, u16 idx,
+ const char *qstr)
+{
+ if (rdma_nl_put_driver_u32(msg, qstr, idx))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "header",
+ be32_to_cpu(cqe->header)))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "len", be32_to_cpu(cqe->len)))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "wrid_hi",
+ be32_to_cpu(cqe->u.gen.wrid_hi)))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "wrid_low",
+ be32_to_cpu(cqe->u.gen.wrid_low)))
+ goto err;
+ if (rdma_nl_put_driver_u64_hex(msg, "bits_type_ts",
+ be64_to_cpu(cqe->bits_type_ts)))
+ goto err;
+
+ return 0;
+
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_hwcqes(struct sk_buff *msg, struct t4_cq *cq,
+ struct t4_cqe *cqes)
+{
+ u16 idx;
+
+ idx = (cq->cidx > 0) ? cq->cidx - 1 : cq->size - 1;
+ if (fill_cqe(msg, cqes, idx, "hwcq_idx"))
+ goto err;
+ idx = cq->cidx;
+ if (fill_cqe(msg, cqes + 1, idx, "hwcq_idx"))
+ goto err;
+
+ return 0;
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_swcqes(struct sk_buff *msg, struct t4_cq *cq,
+ struct t4_cqe *cqes)
+{
+ u16 idx;
+
+ if (!cq->sw_in_use)
+ return 0;
+
+ idx = cq->sw_cidx;
+ if (fill_cqe(msg, cqes, idx, "swcq_idx"))
+ goto err;
+ if (cq->sw_in_use == 1)
+ goto out;
+ idx = (cq->sw_pidx > 0) ? cq->sw_pidx - 1 : cq->size - 1;
+ if (fill_cqe(msg, cqes + 1, idx, "swcq_idx"))
+ goto err;
+out:
+ return 0;
+err:
+ return -EMSGSIZE;
+}
+
+int c4iw_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ibcq)
+{
+ struct c4iw_cq *chp = to_c4iw_cq(ibcq);
+ struct nlattr *table_attr;
+ struct t4_cqe hwcqes[2];
+ struct t4_cqe swcqes[2];
+ struct t4_cq cq;
+ u16 idx;
+
+ /* User cq state is not available, so don't dump user cqs */
+ if (ibcq->uobject)
+ return 0;
+
+ table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ goto err;
+
+ /* Get a consistent snapshot */
+ spin_lock_irq(&chp->lock);
+
+ /* t4_cq struct */
+ cq = chp->cq;
+
+ /* get 2 hw cqes: cidx-1, and cidx */
+ idx = (cq.cidx > 0) ? cq.cidx - 1 : cq.size - 1;
+ hwcqes[0] = chp->cq.queue[idx];
+
+ idx = cq.cidx;
+ hwcqes[1] = chp->cq.queue[idx];
+
+ /* get first and last sw cqes */
+ if (cq.sw_in_use) {
+ swcqes[0] = chp->cq.sw_queue[cq.sw_cidx];
+ if (cq.sw_in_use > 1) {
+ idx = (cq.sw_pidx > 0) ? cq.sw_pidx - 1 : cq.size - 1;
+ swcqes[1] = chp->cq.sw_queue[idx];
+ }
+ }
+
+ spin_unlock_irq(&chp->lock);
+
+ if (fill_cq(msg, &cq))
+ goto err_cancel_table;
+
+ if (fill_swcqes(msg, &cq, swcqes))
+ goto err_cancel_table;
+
+ if (fill_hwcqes(msg, &cq, hwcqes))
+ goto err_cancel_table;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err_cancel_table:
+ nla_nest_cancel(msg, table_attr);
+err:
+ return -EMSGSIZE;
+}
+
+int c4iw_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
+{
+ struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
+ struct c4iw_dev *dev = mhp->rhp;
+ u32 stag = mhp->attr.stag;
+ struct nlattr *table_attr;
+ struct fw_ri_tpte tpte;
+ int ret;
+
+ if (!stag)
+ return 0;
+
+ table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ goto err;
+
+ ret = cxgb4_read_tpte(dev->rdev.lldi.ports[0], stag, (__be32 *)&tpte);
+ if (ret) {
+ dev_err(&dev->rdev.lldi.pdev->dev,
+ "%s cxgb4_read_tpte err %d\n", __func__, ret);
+ return 0;
+ }
+
+ if (rdma_nl_put_driver_u32_hex(msg, "idx", stag >> 8))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "valid",
+ FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid))))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32_hex(msg, "key", stag & 0xff))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "state",
+ FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid))))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "pdid",
+ FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid))))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32_hex(msg, "perm",
+ FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid))))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "ps",
+ FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid))))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u64(msg, "len",
+ ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo)))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32_hex(msg, "pbl_addr",
+ FW_RI_TPTE_PBLADDR_G(ntohl(tpte.nosnoop_pbladdr))))
+ goto err_cancel_table;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err_cancel_table:
+ nla_nest_cancel(msg, table_attr);
+err:
+ return -EMSGSIZE;
+}
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 862381aa83c8..c3b0e2896475 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -35,6 +35,7 @@
#include "t4_regs.h"
#include "t4_values.h"
#include "t4_msg.h"
+#include "t4_tcb.h"
#include "t4fw_ri_api.h"
#define T4_MAX_NUM_PD 65536
@@ -52,12 +53,16 @@ struct t4_status_page {
__be16 pidx;
u8 qp_err; /* flit 1 - sw owns */
u8 db_off;
- u8 pad;
+ u8 pad[2];
u16 host_wq_pidx;
u16 host_cidx;
u16 host_pidx;
+ u16 pad2;
+ u32 srqidx;
};
+#define T4_RQT_ENTRY_SHIFT 6
+#define T4_RQT_ENTRY_SIZE BIT(T4_RQT_ENTRY_SHIFT)
#define T4_EQ_ENTRY_SIZE 64
#define T4_SQ_NUM_SLOTS 5
@@ -87,6 +92,9 @@ static inline int t4_max_fr_depth(int use_dsgl)
#define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS)
#define T4_MAX_RECV_SGE 4
+#define T4_WRITE_CMPL_MAX_SGL 4
+#define T4_WRITE_CMPL_MAX_CQE 16
+
union t4_wr {
struct fw_ri_res_wr res;
struct fw_ri_wr ri;
@@ -97,6 +105,7 @@ union t4_wr {
struct fw_ri_fr_nsmr_wr fr;
struct fw_ri_fr_nsmr_tpte_wr fr_tpte;
struct fw_ri_inv_lstag_wr inv;
+ struct fw_ri_rdma_write_cmpl_wr write_cmpl;
struct t4_status_page status;
__be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS];
};
@@ -171,7 +180,7 @@ struct t4_cqe {
__be32 msn;
} rcqe;
struct {
- u32 stag;
+ __be32 stag;
u16 nada2;
u16 cidx;
} scqe;
@@ -179,8 +188,32 @@ struct t4_cqe {
__be32 wrid_hi;
__be32 wrid_low;
} gen;
+ struct {
+ __be32 stag;
+ __be32 msn;
+ __be32 reserved;
+ __be32 abs_rqe_idx;
+ } srcqe;
+ struct {
+ __be32 mo;
+ __be32 msn;
+ /*
+ * Use union for immediate data to be consistent with
+ * stack's 32 bit data and iWARP spec's 64 bit data.
+ */
+ union {
+ struct {
+ __be32 imm_data32;
+ u32 reserved;
+ } ib_imm_data;
+ __be64 imm_data64;
+ } iw_imm_data;
+ } imm_data_rcqe;
+
+ u64 drain_cookie;
+ __be64 flits[3];
} u;
- __be64 reserved;
+ __be64 reserved[3];
__be64 bits_type_ts;
};
@@ -196,6 +229,11 @@ struct t4_cqe {
#define CQE_SWCQE_G(x) ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M)
#define CQE_SWCQE_V(x) ((x)<<CQE_SWCQE_S)
+#define CQE_DRAIN_S 10
+#define CQE_DRAIN_M 0x1
+#define CQE_DRAIN_G(x) ((((x) >> CQE_DRAIN_S)) & CQE_DRAIN_M)
+#define CQE_DRAIN_V(x) ((x)<<CQE_DRAIN_S)
+
#define CQE_STATUS_S 5
#define CQE_STATUS_M 0x1F
#define CQE_STATUS_G(x) ((((x) >> CQE_STATUS_S)) & CQE_STATUS_M)
@@ -212,6 +250,7 @@ struct t4_cqe {
#define CQE_OPCODE_V(x) ((x)<<CQE_OPCODE_S)
#define SW_CQE(x) (CQE_SWCQE_G(be32_to_cpu((x)->header)))
+#define DRAIN_CQE(x) (CQE_DRAIN_G(be32_to_cpu((x)->header)))
#define CQE_QPID(x) (CQE_QPID_G(be32_to_cpu((x)->header)))
#define CQE_TYPE(x) (CQE_TYPE_G(be32_to_cpu((x)->header)))
#define SQ_TYPE(x) (CQE_TYPE((x)))
@@ -230,6 +269,9 @@ struct t4_cqe {
/* used for RQ completion processing */
#define CQE_WRID_STAG(x) (be32_to_cpu((x)->u.rcqe.stag))
#define CQE_WRID_MSN(x) (be32_to_cpu((x)->u.rcqe.msn))
+#define CQE_ABS_RQE_IDX(x) (be32_to_cpu((x)->u.srcqe.abs_rqe_idx))
+#define CQE_IMM_DATA(x)( \
+ (x)->u.imm_data_rcqe.iw_imm_data.ib_imm_data.imm_data32)
/* used for SQ completion processing */
#define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx)
@@ -238,6 +280,7 @@ struct t4_cqe {
/* generic accessor macros */
#define CQE_WRID_HI(x) (be32_to_cpu((x)->u.gen.wrid_hi))
#define CQE_WRID_LOW(x) (be32_to_cpu((x)->u.gen.wrid_low))
+#define CQE_DRAIN_COOKIE(x) ((x)->u.drain_cookie)
/* macros for flit 3 of the cqe */
#define CQE_GENBIT_S 63
@@ -269,7 +312,7 @@ struct t4_swsqe {
int signaled;
u16 idx;
int flushed;
- struct timespec host_ts;
+ ktime_t host_time;
u64 sge_ts;
};
@@ -310,8 +353,9 @@ struct t4_sq {
struct t4_swrqe {
u64 wr_id;
- struct timespec host_ts;
+ ktime_t host_time;
u64 sge_ts;
+ int valid;
};
struct t4_rq {
@@ -341,8 +385,98 @@ struct t4_wq {
void __iomem *db;
struct c4iw_rdev *rdev;
int flushed;
+ u8 *qp_errp;
+ u32 *srqidxp;
+};
+
+struct t4_srq_pending_wr {
+ u64 wr_id;
+ union t4_recv_wr wqe;
+ u8 len16;
};
+struct t4_srq {
+ union t4_recv_wr *queue;
+ dma_addr_t dma_addr;
+ DEFINE_DMA_UNMAP_ADDR(mapping);
+ struct t4_swrqe *sw_rq;
+ void __iomem *bar2_va;
+ u64 bar2_pa;
+ size_t memsize;
+ u32 bar2_qid;
+ u32 qid;
+ u32 msn;
+ u32 rqt_hwaddr;
+ u32 rqt_abs_idx;
+ u16 rqt_size;
+ u16 size;
+ u16 cidx;
+ u16 pidx;
+ u16 wq_pidx;
+ u16 wq_pidx_inc;
+ u16 in_use;
+ struct t4_srq_pending_wr *pending_wrs;
+ u16 pending_cidx;
+ u16 pending_pidx;
+ u16 pending_in_use;
+ u16 ooo_count;
+};
+
+static inline u32 t4_srq_avail(struct t4_srq *srq)
+{
+ return srq->size - 1 - srq->in_use;
+}
+
+static inline void t4_srq_produce(struct t4_srq *srq, u8 len16)
+{
+ srq->in_use++;
+ if (++srq->pidx == srq->size)
+ srq->pidx = 0;
+ srq->wq_pidx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
+ if (srq->wq_pidx >= srq->size * T4_RQ_NUM_SLOTS)
+ srq->wq_pidx %= srq->size * T4_RQ_NUM_SLOTS;
+ srq->queue[srq->size].status.host_pidx = srq->pidx;
+}
+
+static inline void t4_srq_produce_pending_wr(struct t4_srq *srq)
+{
+ srq->pending_in_use++;
+ srq->in_use++;
+ if (++srq->pending_pidx == srq->size)
+ srq->pending_pidx = 0;
+}
+
+static inline void t4_srq_consume_pending_wr(struct t4_srq *srq)
+{
+ srq->pending_in_use--;
+ srq->in_use--;
+ if (++srq->pending_cidx == srq->size)
+ srq->pending_cidx = 0;
+}
+
+static inline void t4_srq_produce_ooo(struct t4_srq *srq)
+{
+ srq->in_use--;
+ srq->ooo_count++;
+}
+
+static inline void t4_srq_consume_ooo(struct t4_srq *srq)
+{
+ srq->cidx++;
+ if (srq->cidx == srq->size)
+ srq->cidx = 0;
+ srq->queue[srq->size].status.host_cidx = srq->cidx;
+ srq->ooo_count--;
+}
+
+static inline void t4_srq_consume(struct t4_srq *srq)
+{
+ srq->in_use--;
+ if (++srq->cidx == srq->size)
+ srq->cidx = 0;
+ srq->queue[srq->size].status.host_cidx = srq->cidx;
+}
+
static inline int t4_rqes_posted(struct t4_wq *wq)
{
return wq->rq.in_use;
@@ -353,11 +487,6 @@ static inline int t4_rq_empty(struct t4_wq *wq)
return wq->rq.in_use == 0;
}
-static inline int t4_rq_full(struct t4_wq *wq)
-{
- return wq->rq.in_use == (wq->rq.size - 1);
-}
-
static inline u32 t4_rq_avail(struct t4_wq *wq)
{
return wq->rq.size - 1 - wq->rq.in_use;
@@ -376,7 +505,6 @@ static inline void t4_rq_produce(struct t4_wq *wq, u8 len16)
static inline void t4_rq_consume(struct t4_wq *wq)
{
wq->rq.in_use--;
- wq->rq.msn++;
if (++wq->rq.cidx == wq->rq.size)
wq->rq.cidx = 0;
}
@@ -401,11 +529,6 @@ static inline int t4_sq_empty(struct t4_wq *wq)
return wq->sq.in_use == 0;
}
-static inline int t4_sq_full(struct t4_wq *wq)
-{
- return wq->sq.in_use == (wq->sq.size - 1);
-}
-
static inline u32 t4_sq_avail(struct t4_wq *wq)
{
return wq->sq.size - 1 - wq->sq.in_use;
@@ -423,7 +546,6 @@ static inline void t4_sq_produce(struct t4_wq *wq, u8 len16)
static inline void t4_sq_consume(struct t4_wq *wq)
{
- BUG_ON(wq->sq.in_use < 1);
if (wq->sq.cidx == wq->sq.flush_cidx)
wq->sq.flush_cidx = -1;
wq->sq.in_use--;
@@ -457,6 +579,25 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src)
}
}
+static inline void t4_ring_srq_db(struct t4_srq *srq, u16 inc, u8 len16,
+ union t4_recv_wr *wqe)
+{
+ /* Flush host queue memory writes. */
+ wmb();
+ if (inc == 1 && srq->bar2_qid == 0 && wqe) {
+ pr_debug("%s : WC srq->pidx = %d; len16=%d\n",
+ __func__, srq->pidx, len16);
+ pio_copy(srq->bar2_va + SGE_UDB_WCDOORBELL, (u64 *)wqe);
+ } else {
+ pr_debug("%s: DB srq->pidx = %d; len16=%d\n",
+ __func__, srq->pidx, len16);
+ writel(PIDX_T5_V(inc) | QID_V(srq->bar2_qid),
+ srq->bar2_va + SGE_UDB_KDOORBELL);
+ }
+ /* Flush user doorbell area writes. */
+ wmb();
+}
+
static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe)
{
@@ -464,14 +605,12 @@ static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe)
wmb();
if (wq->sq.bar2_va) {
if (inc == 1 && wq->sq.bar2_qid == 0 && wqe) {
- PDBG("%s: WC wq->sq.pidx = %d\n",
- __func__, wq->sq.pidx);
+ pr_debug("WC wq->sq.pidx = %d\n", wq->sq.pidx);
pio_copy((u64 __iomem *)
(wq->sq.bar2_va + SGE_UDB_WCDOORBELL),
(u64 *)wqe);
} else {
- PDBG("%s: DB wq->sq.pidx = %d\n",
- __func__, wq->sq.pidx);
+ pr_debug("DB wq->sq.pidx = %d\n", wq->sq.pidx);
writel(PIDX_T5_V(inc) | QID_V(wq->sq.bar2_qid),
wq->sq.bar2_va + SGE_UDB_KDOORBELL);
}
@@ -491,14 +630,12 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc,
wmb();
if (wq->rq.bar2_va) {
if (inc == 1 && wq->rq.bar2_qid == 0 && wqe) {
- PDBG("%s: WC wq->rq.pidx = %d\n",
- __func__, wq->rq.pidx);
+ pr_debug("WC wq->rq.pidx = %d\n", wq->rq.pidx);
pio_copy((u64 __iomem *)
(wq->rq.bar2_va + SGE_UDB_WCDOORBELL),
(void *)wqe);
} else {
- PDBG("%s: DB wq->rq.pidx = %d\n",
- __func__, wq->rq.pidx);
+ pr_debug("DB wq->rq.pidx = %d\n", wq->rq.pidx);
writel(PIDX_T5_V(inc) | QID_V(wq->rq.bar2_qid),
wq->rq.bar2_va + SGE_UDB_KDOORBELL);
}
@@ -512,12 +649,14 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc,
static inline int t4_wq_in_error(struct t4_wq *wq)
{
- return wq->rq.queue[wq->rq.size].status.qp_err;
+ return *wq->qp_errp;
}
-static inline void t4_set_wq_in_error(struct t4_wq *wq)
+static inline void t4_set_wq_in_error(struct t4_wq *wq, u32 srqidx)
{
- wq->rq.queue[wq->rq.size].status.qp_err = 1;
+ if (srqidx)
+ *wq->srqidxp = srqidx;
+ *wq->qp_errp = 1;
}
static inline void t4_disable_wq_db(struct t4_wq *wq)
@@ -530,11 +669,6 @@ static inline void t4_enable_wq_db(struct t4_wq *wq)
wq->rq.queue[wq->rq.size].status.db_off = 0;
}
-static inline int t4_wq_db_enabled(struct t4_wq *wq)
-{
- return !wq->rq.queue[wq->rq.size].status.db_off;
-}
-
enum t4_cq_flags {
CQ_ARMED = 1,
};
@@ -562,6 +696,7 @@ struct t4_cq {
u16 cidx_inc;
u8 gen;
u8 error;
+ u8 *qp_errp;
unsigned long flags;
};
@@ -599,9 +734,11 @@ static inline void t4_swcq_produce(struct t4_cq *cq)
{
cq->sw_in_use++;
if (cq->sw_in_use == cq->size) {
- PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
+ pr_warn("%s cxgb4 sw cq overflow cqid %u\n",
+ __func__, cq->cqid);
cq->error = 1;
- BUG_ON(1);
+ cq->sw_in_use--;
+ return;
}
if (++cq->sw_pidx == cq->size)
cq->sw_pidx = 0;
@@ -609,7 +746,6 @@ static inline void t4_swcq_produce(struct t4_cq *cq)
static inline void t4_swcq_consume(struct t4_cq *cq)
{
- BUG_ON(cq->sw_in_use < 1);
cq->sw_in_use--;
if (++cq->sw_cidx == cq->size)
cq->sw_cidx = 0;
@@ -654,8 +790,7 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
if (cq->queue[prev_cidx].bits_type_ts != cq->bits_type_ts) {
ret = -EOVERFLOW;
cq->error = 1;
- printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid);
- BUG_ON(1);
+ pr_err("cq overflow cqid %u\n", cq->cqid);
} else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) {
/* Ensure CQE is flushed to memory */
@@ -667,19 +802,6 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
return ret;
}
-static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq)
-{
- if (cq->sw_in_use == cq->size) {
- PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid);
- cq->error = 1;
- BUG_ON(1);
- return NULL;
- }
- if (cq->sw_in_use)
- return &cq->sw_queue[cq->sw_cidx];
- return NULL;
-}
-
static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
{
int ret = 0;
@@ -693,20 +815,15 @@ static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
return ret;
}
-static inline int t4_cq_in_error(struct t4_cq *cq)
-{
- return ((struct t4_status_page *)&cq->queue[cq->size])->qp_err;
-}
-
static inline void t4_set_cq_in_error(struct t4_cq *cq)
{
- ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1;
+ *cq->qp_errp = 1;
}
#endif
struct t4_dev_status_page {
u8 db_off;
- u8 pad1;
+ u8 write_cmpl_supported;
u16 pad2;
u32 pad3;
u64 qp_start;
diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
index 010c709ba3bb..1f79537fc8d1 100644
--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
@@ -50,7 +50,8 @@ enum fw_ri_wr_opcode {
FW_RI_BYPASS = 0xd,
FW_RI_RECEIVE = 0xe,
- FW_RI_SGE_EC_CR_RETURN = 0xf
+ FW_RI_SGE_EC_CR_RETURN = 0xf,
+ FW_RI_WRITE_IMMEDIATE = FW_RI_RDMA_INIT
};
enum fw_ri_wr_flags {
@@ -59,7 +60,8 @@ enum fw_ri_wr_flags {
FW_RI_SOLICITED_EVENT_FLAG = 0x04,
FW_RI_READ_FENCE_FLAG = 0x08,
FW_RI_LOCAL_FENCE_FLAG = 0x10,
- FW_RI_RDMA_READ_INVALIDATE = 0x20
+ FW_RI_RDMA_READ_INVALIDATE = 0x20,
+ FW_RI_RDMA_WRITE_WITH_IMMEDIATE = 0x40
};
enum fw_ri_mpa_attrs {
@@ -120,9 +122,7 @@ struct fw_ri_dsgl {
__be16 nsge;
__be32 len0;
__be64 addr0;
-#ifndef C99_NOT_SUPPORTED
- struct fw_ri_dsge_pair sge[0];
-#endif
+ struct fw_ri_dsge_pair sge[];
};
struct fw_ri_sge {
@@ -136,9 +136,7 @@ struct fw_ri_isgl {
__u8 r1;
__be16 nsge;
__be32 r2;
-#ifndef C99_NOT_SUPPORTED
- struct fw_ri_sge sge[0];
-#endif
+ struct fw_ri_sge sge[];
};
struct fw_ri_immd {
@@ -146,9 +144,7 @@ struct fw_ri_immd {
__u8 r1;
__be16 r2;
__be32 immdlen;
-#ifndef C99_NOT_SUPPORTED
- __u8 data[0];
-#endif
+ __u8 data[];
};
struct fw_ri_tpte {
@@ -263,6 +259,7 @@ enum fw_ri_res_type {
FW_RI_RES_TYPE_SQ,
FW_RI_RES_TYPE_RQ,
FW_RI_RES_TYPE_CQ,
+ FW_RI_RES_TYPE_SRQ,
};
enum fw_ri_res_op {
@@ -296,6 +293,20 @@ struct fw_ri_res {
__be32 r6_lo;
__be64 r7;
} cq;
+ struct fw_ri_res_srq {
+ __u8 restype;
+ __u8 op;
+ __be16 r3;
+ __be32 eqid;
+ __be32 r4[2];
+ __be32 fetchszm_to_iqid;
+ __be32 dcaen_to_eqsize;
+ __be64 eqaddr;
+ __be32 srqid;
+ __be32 pdid;
+ __be32 hwsrqsize;
+ __be32 hwsrqaddr;
+ } srq;
} u;
};
@@ -303,9 +314,7 @@ struct fw_ri_res_wr {
__be32 op_nres;
__be32 len16_pkd;
__u64 cookie;
-#ifndef C99_NOT_SUPPORTED
- struct fw_ri_res res[0];
-#endif
+ struct fw_ri_res res[];
};
#define FW_RI_RES_WR_NRES_S 0
@@ -531,16 +540,24 @@ struct fw_ri_rdma_write_wr {
__u16 wrid;
__u8 r1[3];
__u8 len16;
- __be64 r2;
+ /*
+ * Use union for immediate data to be consistent with stack's 32 bit
+ * data and iWARP spec's 64 bit data.
+ */
+ union {
+ struct {
+ __be32 imm_data32;
+ u32 reserved;
+ } ib_imm_data;
+ __be64 imm_data64;
+ } iw_imm_data;
__be32 plen;
__be32 stag_sink;
__be64 to_sink;
-#ifndef C99_NOT_SUPPORTED
union {
- struct fw_ri_immd immd_src[0];
- struct fw_ri_isgl isgl_src[0];
+ DECLARE_FLEX_ARRAY(struct fw_ri_immd, immd_src);
+ DECLARE_FLEX_ARRAY(struct fw_ri_isgl, isgl_src);
} u;
-#endif
};
struct fw_ri_send_wr {
@@ -554,12 +571,10 @@ struct fw_ri_send_wr {
__be32 plen;
__be32 r3;
__be64 r4;
-#ifndef C99_NOT_SUPPORTED
union {
- struct fw_ri_immd immd_src[0];
- struct fw_ri_isgl isgl_src[0];
+ DECLARE_FLEX_ARRAY(struct fw_ri_immd, immd_src);
+ DECLARE_FLEX_ARRAY(struct fw_ri_isgl, isgl_src);
} u;
-#endif
};
#define FW_RI_SEND_WR_SENDOP_S 0
@@ -568,6 +583,35 @@ struct fw_ri_send_wr {
#define FW_RI_SEND_WR_SENDOP_G(x) \
(((x) >> FW_RI_SEND_WR_SENDOP_S) & FW_RI_SEND_WR_SENDOP_M)
+struct fw_ri_rdma_write_cmpl_wr {
+ __u8 opcode;
+ __u8 flags;
+ __u16 wrid;
+ __u8 r1[3];
+ __u8 len16;
+ __u8 r2;
+ __u8 flags_send;
+ __u16 wrid_send;
+ __be32 stag_inv;
+ __be32 plen;
+ __be32 stag_sink;
+ __be64 to_sink;
+ union fw_ri_cmpl {
+ struct fw_ri_immd_cmpl {
+ __u8 op;
+ __u8 r1[6];
+ __u8 immdlen;
+ __u8 data[16];
+ } immd_src;
+ struct fw_ri_isgl isgl_src;
+ } u_cmpl;
+ __be64 r3;
+ union fw_ri_write {
+ DECLARE_FLEX_ARRAY(struct fw_ri_immd, immd_src);
+ DECLARE_FLEX_ARRAY(struct fw_ri_isgl, isgl_src);
+ } u;
+};
+
struct fw_ri_rdma_read_wr {
__u8 opcode;
__u8 flags;
@@ -675,8 +719,8 @@ struct fw_ri_fr_nsmr_tpte_wr {
__u16 wrid;
__u8 r1[3];
__u8 len16;
- __u32 r2;
- __u32 stag;
+ __be32 r2;
+ __be32 stag;
struct fw_ri_tpte tpte;
__u64 pbl[2];
};
@@ -707,6 +751,10 @@ enum fw_ri_init_p2ptype {
FW_RI_INIT_P2PTYPE_DISABLED = 0xf,
};
+enum fw_ri_init_rqeqid_srq {
+ FW_RI_INIT_RQEQID_SRQ = 1 << 31,
+};
+
struct fw_ri_wr {
__be32 op_compl;
__be32 flowid_len16;
diff --git a/drivers/infiniband/hw/efa/Kconfig b/drivers/infiniband/hw/efa/Kconfig
new file mode 100644
index 000000000000..457e18ba1d57
--- /dev/null
+++ b/drivers/infiniband/hw/efa/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+# Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+#
+# Amazon fabric device configuration
+#
+
+config INFINIBAND_EFA
+ tristate "Amazon Elastic Fabric Adapter (EFA) support"
+ depends on PCI_MSI && 64BIT && !CPU_BIG_ENDIAN
+ depends on INFINIBAND_USER_ACCESS
+ help
+ This driver supports Amazon Elastic Fabric Adapter (EFA).
+
+ To compile this driver as a module, choose M here.
+ The module will be called efa.
diff --git a/drivers/infiniband/hw/efa/Makefile b/drivers/infiniband/hw/efa/Makefile
new file mode 100644
index 000000000000..6e83083af0bc
--- /dev/null
+++ b/drivers/infiniband/hw/efa/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+# Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved.
+#
+# Makefile for Amazon Elastic Fabric Adapter (EFA) device driver.
+#
+
+obj-$(CONFIG_INFINIBAND_EFA) += efa.o
+
+efa-y := efa_com_cmd.o efa_com.o efa_main.o efa_verbs.o
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
new file mode 100644
index 000000000000..96f9c3bc98b2
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_H_
+#define _EFA_H_
+
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+
+#include <rdma/efa-abi.h>
+#include <rdma/ib_verbs.h>
+
+#include "efa_com_cmd.h"
+
+#define DRV_MODULE_NAME "efa"
+#define DEVICE_NAME "Elastic Fabric Adapter (EFA)"
+
+#define EFA_IRQNAME_SIZE 40
+
+#define EFA_MGMNT_MSIX_VEC_IDX 0
+#define EFA_COMP_EQS_VEC_BASE 1
+
+struct efa_irq {
+ irq_handler_t handler;
+ void *data;
+ u32 irqn;
+ u32 vector;
+ cpumask_t affinity_hint_mask;
+ char name[EFA_IRQNAME_SIZE];
+};
+
+/* Don't use anything other than atomic64 */
+struct efa_stats {
+ atomic64_t alloc_pd_err;
+ atomic64_t create_qp_err;
+ atomic64_t create_cq_err;
+ atomic64_t reg_mr_err;
+ atomic64_t alloc_ucontext_err;
+ atomic64_t create_ah_err;
+ atomic64_t mmap_err;
+ atomic64_t keep_alive_rcvd;
+};
+
+struct efa_dev {
+ struct ib_device ibdev;
+ struct efa_com_dev edev;
+ struct pci_dev *pdev;
+ struct efa_com_get_device_attr_result dev_attr;
+
+ u64 reg_bar_addr;
+ u64 reg_bar_len;
+ u64 mem_bar_addr;
+ u64 mem_bar_len;
+ u64 db_bar_addr;
+ u64 db_bar_len;
+
+ u32 num_irq_vectors;
+ u32 admin_msix_vector_idx;
+ struct efa_irq admin_irq;
+
+ struct efa_stats stats;
+
+ /* Array of completion EQs */
+ struct efa_eq *eqs;
+ u32 neqs;
+
+ /* Only stores CQs with interrupts enabled */
+ struct xarray cqs_xa;
+};
+
+struct efa_ucontext {
+ struct ib_ucontext ibucontext;
+ u16 uarn;
+};
+
+struct efa_pd {
+ struct ib_pd ibpd;
+ u16 pdn;
+};
+
+struct efa_mr_interconnect_info {
+ u16 recv_ic_id;
+ u16 rdma_read_ic_id;
+ u16 rdma_recv_ic_id;
+ u8 recv_ic_id_valid : 1;
+ u8 rdma_read_ic_id_valid : 1;
+ u8 rdma_recv_ic_id_valid : 1;
+};
+
+struct efa_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct efa_mr_interconnect_info ic_info;
+};
+
+struct efa_cq {
+ struct ib_cq ibcq;
+ struct efa_ucontext *ucontext;
+ dma_addr_t dma_addr;
+ void *cpu_addr;
+ struct rdma_user_mmap_entry *mmap_entry;
+ struct rdma_user_mmap_entry *db_mmap_entry;
+ size_t size;
+ u16 cq_idx;
+ /* NULL when no interrupts requested */
+ struct efa_eq *eq;
+ struct ib_umem *umem;
+};
+
+struct efa_qp {
+ struct ib_qp ibqp;
+ dma_addr_t rq_dma_addr;
+ void *rq_cpu_addr;
+ size_t rq_size;
+ enum ib_qp_state state;
+
+ /* Used for saving mmap_xa entries */
+ struct rdma_user_mmap_entry *sq_db_mmap_entry;
+ struct rdma_user_mmap_entry *llq_desc_mmap_entry;
+ struct rdma_user_mmap_entry *rq_db_mmap_entry;
+ struct rdma_user_mmap_entry *rq_mmap_entry;
+
+ u32 qp_handle;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 max_inline_data;
+};
+
+struct efa_ah {
+ struct ib_ah ibah;
+ u16 ah;
+ /* dest_addr */
+ u8 id[EFA_GID_SIZE];
+};
+
+struct efa_eq {
+ struct efa_com_eq eeq;
+ struct efa_irq irq;
+};
+
+int efa_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props,
+ struct ib_udata *udata);
+int efa_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *props);
+int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr);
+int efa_query_gid(struct ib_device *ibdev, u32 port, int index,
+ union ib_gid *gid);
+int efa_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
+ u16 *pkey);
+int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+int efa_create_cq_umem(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_umem *umem, struct uverbs_attr_bundle *attrs);
+struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
+ struct ib_udata *udata);
+struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs);
+int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
+int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable);
+int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata);
+void efa_dealloc_ucontext(struct ib_ucontext *ibucontext);
+int efa_mmap(struct ib_ucontext *ibucontext,
+ struct vm_area_struct *vma);
+void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
+int efa_create_ah(struct ib_ah *ibah,
+ struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int efa_destroy_ah(struct ib_ah *ibah, u32 flags);
+int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_udata *udata);
+enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
+ u32 port_num);
+struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num);
+struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev);
+int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index);
+
+#endif /* _EFA_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
new file mode 100644
index 000000000000..57178dad5eb7
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -0,0 +1,1140 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_ADMIN_CMDS_H_
+#define _EFA_ADMIN_CMDS_H_
+
+#define EFA_ADMIN_API_VERSION_MAJOR 0
+#define EFA_ADMIN_API_VERSION_MINOR 1
+
+/* EFA admin queue opcodes */
+enum efa_admin_aq_opcode {
+ EFA_ADMIN_CREATE_QP = 1,
+ EFA_ADMIN_MODIFY_QP = 2,
+ EFA_ADMIN_QUERY_QP = 3,
+ EFA_ADMIN_DESTROY_QP = 4,
+ EFA_ADMIN_CREATE_AH = 5,
+ EFA_ADMIN_DESTROY_AH = 6,
+ EFA_ADMIN_REG_MR = 7,
+ EFA_ADMIN_DEREG_MR = 8,
+ EFA_ADMIN_CREATE_CQ = 9,
+ EFA_ADMIN_DESTROY_CQ = 10,
+ EFA_ADMIN_GET_FEATURE = 11,
+ EFA_ADMIN_SET_FEATURE = 12,
+ EFA_ADMIN_GET_STATS = 13,
+ EFA_ADMIN_ALLOC_PD = 14,
+ EFA_ADMIN_DEALLOC_PD = 15,
+ EFA_ADMIN_ALLOC_UAR = 16,
+ EFA_ADMIN_DEALLOC_UAR = 17,
+ EFA_ADMIN_CREATE_EQ = 18,
+ EFA_ADMIN_DESTROY_EQ = 19,
+ EFA_ADMIN_ALLOC_MR = 20,
+ EFA_ADMIN_MAX_OPCODE = 20,
+};
+
+enum efa_admin_aq_feature_id {
+ EFA_ADMIN_DEVICE_ATTR = 1,
+ EFA_ADMIN_AENQ_CONFIG = 2,
+ EFA_ADMIN_NETWORK_ATTR = 3,
+ EFA_ADMIN_QUEUE_ATTR = 4,
+ EFA_ADMIN_HW_HINTS = 5,
+ EFA_ADMIN_HOST_INFO = 6,
+ EFA_ADMIN_EVENT_QUEUE_ATTR = 7,
+};
+
+/* QP transport type */
+enum efa_admin_qp_type {
+ /* Unreliable Datagram */
+ EFA_ADMIN_QP_TYPE_UD = 1,
+ /* Scalable Reliable Datagram */
+ EFA_ADMIN_QP_TYPE_SRD = 2,
+};
+
+/* QP state */
+enum efa_admin_qp_state {
+ EFA_ADMIN_QP_STATE_RESET = 0,
+ EFA_ADMIN_QP_STATE_INIT = 1,
+ EFA_ADMIN_QP_STATE_RTR = 2,
+ EFA_ADMIN_QP_STATE_RTS = 3,
+ EFA_ADMIN_QP_STATE_SQD = 4,
+ EFA_ADMIN_QP_STATE_SQE = 5,
+ EFA_ADMIN_QP_STATE_ERR = 6,
+};
+
+enum efa_admin_get_stats_type {
+ EFA_ADMIN_GET_STATS_TYPE_BASIC = 0,
+ EFA_ADMIN_GET_STATS_TYPE_MESSAGES = 1,
+ EFA_ADMIN_GET_STATS_TYPE_RDMA_READ = 2,
+ EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE = 3,
+ EFA_ADMIN_GET_STATS_TYPE_NETWORK = 4,
+};
+
+enum efa_admin_get_stats_scope {
+ EFA_ADMIN_GET_STATS_SCOPE_ALL = 0,
+ EFA_ADMIN_GET_STATS_SCOPE_QUEUE = 1,
+};
+
+/*
+ * QP allocation sizes, converted by fabric QueuePair (QP) create command
+ * from QP capabilities.
+ */
+struct efa_admin_qp_alloc_size {
+ /* Send descriptor ring size in bytes */
+ u32 send_queue_ring_size;
+
+ /* Max number of WQEs that can be outstanding on send queue. */
+ u32 send_queue_depth;
+
+ /*
+ * Recv descriptor ring size in bytes, sufficient for user-provided
+ * number of WQEs
+ */
+ u32 recv_queue_ring_size;
+
+ /* Max number of WQEs that can be outstanding on recv queue */
+ u32 recv_queue_depth;
+};
+
+struct efa_admin_create_qp_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* Protection Domain associated with this QP */
+ u16 pd;
+
+ /* QP type */
+ u8 qp_type;
+
+ /*
+ * 0 : sq_virt - If set, SQ ring base address is
+ * virtual (IOVA returned by MR registration)
+ * 1 : rq_virt - If set, RQ ring base address is
+ * virtual (IOVA returned by MR registration)
+ * 2 : unsolicited_write_recv - If set, work requests
+ * will not be consumed for incoming RDMA write with
+ * immediate
+ * 7:3 : reserved - MBZ
+ */
+ u8 flags;
+
+ /*
+ * Send queue (SQ) ring base physical address. This field is not
+ * used if this is a Low Latency Queue(LLQ).
+ */
+ u64 sq_base_addr;
+
+ /* Receive queue (RQ) ring base address. */
+ u64 rq_base_addr;
+
+ /* Index of CQ to be associated with Send Queue completions */
+ u32 send_cq_idx;
+
+ /* Index of CQ to be associated with Recv Queue completions */
+ u32 recv_cq_idx;
+
+ /*
+ * Memory registration key for the SQ ring, used only when not in
+ * LLQ mode and base address is virtual
+ */
+ u32 sq_l_key;
+
+ /*
+ * Memory registration key for the RQ ring, used only when base
+ * address is virtual
+ */
+ u32 rq_l_key;
+
+ /* Requested QP allocation sizes */
+ struct efa_admin_qp_alloc_size qp_alloc_size;
+
+ /* UAR number */
+ u16 uar;
+
+ /* Requested service level for the QP, 0 is the default SL */
+ u8 sl;
+
+ /* MBZ */
+ u8 reserved;
+
+ /* MBZ */
+ u32 reserved2;
+};
+
+struct efa_admin_create_qp_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /*
+ * Opaque handle to be used for consequent admin operations on the
+ * QP
+ */
+ u32 qp_handle;
+
+ /*
+ * QP number in the given EFA virtual device. Least-significant bits (as
+ * needed according to max_qp) carry unique QP ID
+ */
+ u16 qp_num;
+
+ /* MBZ */
+ u16 reserved;
+
+ /* Index of sub-CQ for Send Queue completions */
+ u16 send_sub_cq_idx;
+
+ /* Index of sub-CQ for Receive Queue completions */
+ u16 recv_sub_cq_idx;
+
+ /* SQ doorbell address, as offset to PCIe DB BAR */
+ u32 sq_db_offset;
+
+ /* RQ doorbell address, as offset to PCIe DB BAR */
+ u32 rq_db_offset;
+
+ /*
+ * low latency send queue ring base address as an offset to PCIe
+ * MMIO LLQ_MEM BAR
+ */
+ u32 llq_descriptors_offset;
+};
+
+struct efa_admin_modify_qp_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /*
+ * Mask indicating which fields should be updated
+ * 0 : qp_state
+ * 1 : cur_qp_state
+ * 2 : qkey
+ * 3 : sq_psn
+ * 4 : sq_drained_async_notify
+ * 5 : rnr_retry
+ * 31:6 : reserved
+ */
+ u32 modify_mask;
+
+ /* QP handle returned by create_qp command */
+ u32 qp_handle;
+
+ /* QP state */
+ u32 qp_state;
+
+ /* Override current QP state (before applying the transition) */
+ u32 cur_qp_state;
+
+ /* QKey */
+ u32 qkey;
+
+ /* SQ PSN */
+ u32 sq_psn;
+
+ /* Enable async notification when SQ is drained */
+ u8 sq_drained_async_notify;
+
+ /* Number of RNR retries (valid only for SRD QPs) */
+ u8 rnr_retry;
+
+ /* MBZ */
+ u16 reserved2;
+};
+
+struct efa_admin_modify_qp_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+struct efa_admin_query_qp_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* QP handle returned by create_qp command */
+ u32 qp_handle;
+};
+
+struct efa_admin_query_qp_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /* QP state */
+ u32 qp_state;
+
+ /* QKey */
+ u32 qkey;
+
+ /* SQ PSN */
+ u32 sq_psn;
+
+ /* Indicates that draining is in progress */
+ u8 sq_draining;
+
+ /* Number of RNR retries (valid only for SRD QPs) */
+ u8 rnr_retry;
+
+ /* MBZ */
+ u16 reserved2;
+};
+
+struct efa_admin_destroy_qp_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* QP handle returned by create_qp command */
+ u32 qp_handle;
+};
+
+struct efa_admin_destroy_qp_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+/*
+ * Create Address Handle command parameters. Must not be called more than
+ * once for the same destination
+ */
+struct efa_admin_create_ah_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* Destination address in network byte order */
+ u8 dest_addr[16];
+
+ /* PD number */
+ u16 pd;
+
+ /* MBZ */
+ u16 reserved;
+};
+
+struct efa_admin_create_ah_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /* Target interface address handle (opaque) */
+ u16 ah;
+
+ /* MBZ */
+ u16 reserved;
+};
+
+struct efa_admin_destroy_ah_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* Target interface address handle (opaque) */
+ u16 ah;
+
+ /* PD number */
+ u16 pd;
+};
+
+struct efa_admin_destroy_ah_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+/*
+ * Registration of MemoryRegion, required for QP working with Virtual
+ * Addresses. In standard verbs semantics, region length is limited to 2GB
+ * space, but EFA offers larger MR support for large memory space, to ease
+ * on users working with very large datasets (i.e. full GPU memory mapping).
+ */
+struct efa_admin_reg_mr_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* Protection Domain */
+ u16 pd;
+
+ /* MBZ */
+ u16 reserved16_w1;
+
+ /* Physical Buffer List, each element is page-aligned. */
+ union {
+ /*
+ * Inline array of guest-physical page addresses of user
+ * memory pages (optimization for short region
+ * registrations)
+ */
+ u64 inline_pbl_array[4];
+
+ /* points to PBL (direct or indirect, chained if needed) */
+ struct efa_admin_ctrl_buff_info pbl;
+ } pbl;
+
+ /* Memory region length, in bytes. */
+ u64 mr_length;
+
+ /*
+ * flags and page size
+ * 4:0 : phys_page_size_shift - page size is (1 <<
+ * phys_page_size_shift). Page size is used for
+ * building the Virtual to Physical address mapping
+ * 6:5 : reserved - MBZ
+ * 7 : mem_addr_phy_mode_en - Enable bit for physical
+ * memory registration (no translation), can be used
+ * only by privileged clients. If set, PBL must
+ * contain a single entry.
+ */
+ u8 flags;
+
+ /*
+ * permissions
+ * 0 : local_write_enable - Local write permissions:
+ * must be set for RQ buffers and buffers posted for
+ * RDMA Read requests
+ * 1 : remote_write_enable - Remote write
+ * permissions: must be set to enable RDMA write to
+ * the region
+ * 2 : remote_read_enable - Remote read permissions:
+ * must be set to enable RDMA read from the region
+ * 7:3 : reserved2 - MBZ
+ */
+ u8 permissions;
+
+ /* MBZ */
+ u16 reserved16_w5;
+
+ /* number of pages in PBL (redundant, could be calculated) */
+ u32 page_num;
+
+ /*
+ * IO Virtual Address associated with this MR. If
+ * mem_addr_phy_mode_en is set, contains the physical address of
+ * the region.
+ */
+ u64 iova;
+};
+
+struct efa_admin_reg_mr_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /*
+ * L_Key, to be used in conjunction with local buffer references in
+ * SQ and RQ WQE, or with virtual RQ/CQ rings
+ */
+ u32 l_key;
+
+ /*
+ * R_Key, to be used in RDMA messages to refer to remotely accessed
+ * memory region
+ */
+ u32 r_key;
+
+ /*
+ * Mask indicating which fields have valid values
+ * 0 : recv_ic_id
+ * 1 : rdma_read_ic_id
+ * 2 : rdma_recv_ic_id
+ */
+ u8 validity;
+
+ /*
+ * Physical interconnect used by the device to reach the MR for receive
+ * operation
+ */
+ u8 recv_ic_id;
+
+ /*
+ * Physical interconnect used by the device to reach the MR for RDMA
+ * read operation
+ */
+ u8 rdma_read_ic_id;
+
+ /*
+ * Physical interconnect used by the device to reach the MR for RDMA
+ * write receive
+ */
+ u8 rdma_recv_ic_id;
+};
+
+struct efa_admin_dereg_mr_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* L_Key, memory region's l_key */
+ u32 l_key;
+};
+
+struct efa_admin_dereg_mr_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+/*
+ * Allocation of MemoryRegion, required for QP working with Virtual
+ * Addresses in kernel verbs semantics, ready for fast registration use.
+ */
+struct efa_admin_alloc_mr_cmd {
+ /* Common Admin Queue descriptor */
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /* Protection Domain */
+ u16 pd;
+
+ /* MBZ */
+ u16 reserved1;
+
+ /* Maximum number of pages this MR supports. */
+ u32 max_pages;
+};
+
+struct efa_admin_alloc_mr_resp {
+ /* Common Admin Queue completion descriptor */
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /*
+ * L_Key, to be used in conjunction with local buffer references in
+ * SQ and RQ WQE, or with virtual RQ/CQ rings
+ */
+ u32 l_key;
+
+ /*
+ * R_Key, to be used in RDMA messages to refer to remotely accessed
+ * memory region
+ */
+ u32 r_key;
+};
+
+struct efa_admin_create_cq_cmd {
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ /*
+ * 4:0 : reserved5 - MBZ
+ * 5 : interrupt_mode_enabled - if set, cq operates
+ * in interrupt mode (i.e. CQ events and EQ elements
+ * are generated), otherwise - polling
+ * 6 : virt - If set, ring base address is virtual
+ * (IOVA returned by MR registration)
+ * 7 : reserved6 - MBZ
+ */
+ u8 cq_caps_1;
+
+ /*
+ * 4:0 : cq_entry_size_words - size of CQ entry in
+ * 32-bit words, valid values: 4, 8.
+ * 5 : set_src_addr - If set, source address will be
+ * filled on RX completions from unknown senders.
+ * Requires 8 words CQ entry size.
+ * 7:6 : reserved7 - MBZ
+ */
+ u8 cq_caps_2;
+
+ /* Sub completion queue depth in # of entries. must be power of 2 */
+ u16 sub_cq_depth;
+
+ /* EQ number assigned to this cq */
+ u16 eqn;
+
+ /* MBZ */
+ u16 reserved;
+
+ /*
+ * CQ ring base address, virtual or physical depending on 'virt'
+ * flag
+ */
+ struct efa_common_mem_addr cq_ba;
+
+ /*
+ * Memory registration key for the ring, used only when base
+ * address is virtual
+ */
+ u32 l_key;
+
+ /*
+ * number of sub cqs - must be equal to sub_cqs_per_cq of queue
+ * attributes.
+ */
+ u16 num_sub_cqs;
+
+ /* UAR number */
+ u16 uar;
+};
+
+struct efa_admin_create_cq_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ u16 cq_idx;
+
+ /* actual sub cq depth in number of entries */
+ u16 sub_cq_actual_depth;
+
+ /* CQ doorbell address, as offset to PCIe DB BAR */
+ u32 db_offset;
+
+ /*
+ * 0 : db_valid - If set, doorbell offset is valid.
+ * Always set when interrupts are requested.
+ */
+ u32 flags;
+};
+
+struct efa_admin_destroy_cq_cmd {
+ struct efa_admin_aq_common_desc aq_common_desc;
+
+ u16 cq_idx;
+
+ /* MBZ */
+ u16 reserved1;
+};
+
+struct efa_admin_destroy_cq_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+/*
+ * EFA AQ Get Statistics command. Extended statistics are placed in control
+ * buffer pointed by AQ entry
+ */
+struct efa_admin_aq_get_stats_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ union {
+ /* command specific inline data */
+ u32 inline_data_w1[3];
+
+ struct efa_admin_ctrl_buff_info control_buffer;
+ } u;
+
+ /* stats type as defined in enum efa_admin_get_stats_type */
+ u8 type;
+
+ /* stats scope defined in enum efa_admin_get_stats_scope */
+ u8 scope;
+
+ u16 scope_modifier;
+};
+
+struct efa_admin_basic_stats {
+ u64 tx_bytes;
+
+ u64 tx_pkts;
+
+ u64 rx_bytes;
+
+ u64 rx_pkts;
+
+ u64 rx_drops;
+
+ u64 qkey_viol;
+};
+
+struct efa_admin_messages_stats {
+ u64 send_bytes;
+
+ u64 send_wrs;
+
+ u64 recv_bytes;
+
+ u64 recv_wrs;
+};
+
+struct efa_admin_rdma_read_stats {
+ u64 read_wrs;
+
+ u64 read_bytes;
+
+ u64 read_wr_err;
+
+ u64 read_resp_bytes;
+};
+
+struct efa_admin_rdma_write_stats {
+ u64 write_wrs;
+
+ u64 write_bytes;
+
+ u64 write_wr_err;
+
+ u64 write_recv_bytes;
+};
+
+struct efa_admin_network_stats {
+ u64 retrans_bytes;
+
+ u64 retrans_pkts;
+
+ u64 retrans_timeout_events;
+
+ u64 unresponsive_remote_events;
+
+ u64 impaired_remote_conn_events;
+};
+
+struct efa_admin_acq_get_stats_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ union {
+ struct efa_admin_basic_stats basic_stats;
+
+ struct efa_admin_messages_stats messages_stats;
+
+ struct efa_admin_rdma_read_stats rdma_read_stats;
+
+ struct efa_admin_rdma_write_stats rdma_write_stats;
+
+ struct efa_admin_network_stats network_stats;
+ } u;
+};
+
+struct efa_admin_get_set_feature_common_desc {
+ /* MBZ */
+ u8 reserved0;
+
+ /* as appears in efa_admin_aq_feature_id */
+ u8 feature_id;
+
+ /* MBZ */
+ u16 reserved16;
+};
+
+struct efa_admin_feature_device_attr_desc {
+ /* Bitmap of efa_admin_aq_feature_id */
+ u64 supported_features;
+
+ /* Bitmap of supported page sizes in MR registrations */
+ u64 page_size_cap;
+
+ u32 fw_version;
+
+ u32 admin_api_version;
+
+ u32 device_version;
+
+ /* Bar used for SQ and RQ doorbells */
+ u16 db_bar;
+
+ /* Indicates how many bits are used on physical address access */
+ u8 phys_addr_width;
+
+ /* Indicates how many bits are used on virtual address access */
+ u8 virt_addr_width;
+
+ /*
+ * 0 : rdma_read - If set, RDMA Read is supported on
+ * TX queues
+ * 1 : rnr_retry - If set, RNR retry is supported on
+ * modify QP command
+ * 2 : data_polling_128 - If set, 128 bytes data
+ * polling is supported
+ * 3 : rdma_write - If set, RDMA Write is supported
+ * on TX queues
+ * 4 : unsolicited_write_recv - If set, unsolicited
+ * write with imm. receive is supported
+ * 31:5 : reserved - MBZ
+ */
+ u32 device_caps;
+
+ /* Max RDMA transfer size in bytes */
+ u32 max_rdma_size;
+
+ /* Unique global ID for an EFA device */
+ u64 guid;
+
+ /* The device maximum link speed in Gbit/sec */
+ u16 max_link_speed_gbps;
+
+ /* MBZ */
+ u16 reserved0;
+
+ /* MBZ */
+ u32 reserved1;
+};
+
+struct efa_admin_feature_queue_attr_desc {
+ /* The maximum number of queue pairs supported */
+ u32 max_qp;
+
+ /* Maximum number of WQEs per Send Queue */
+ u32 max_sq_depth;
+
+ /* Maximum size of data that can be sent inline in a Send WQE */
+ u32 inline_buf_size;
+
+ /* Maximum number of buffer descriptors per Recv Queue */
+ u32 max_rq_depth;
+
+ /* The maximum number of completion queues supported per VF */
+ u32 max_cq;
+
+ /* Maximum number of CQEs per Completion Queue */
+ u32 max_cq_depth;
+
+ /* Number of sub-CQs to be created for each CQ */
+ u16 sub_cqs_per_cq;
+
+ /* Minimum number of WQEs per SQ */
+ u16 min_sq_depth;
+
+ /* Maximum number of SGEs (buffers) allowed for a single send WQE */
+ u16 max_wr_send_sges;
+
+ /* Maximum number of SGEs allowed for a single recv WQE */
+ u16 max_wr_recv_sges;
+
+ /* The maximum number of memory regions supported */
+ u32 max_mr;
+
+ /* The maximum number of pages can be registered */
+ u32 max_mr_pages;
+
+ /* The maximum number of protection domains supported */
+ u32 max_pd;
+
+ /* The maximum number of address handles supported */
+ u32 max_ah;
+
+ /* The maximum size of LLQ in bytes */
+ u32 max_llq_size;
+
+ /* Maximum number of SGEs for a single RDMA read/write WQE */
+ u16 max_wr_rdma_sges;
+
+ /*
+ * Maximum number of bytes that can be written to SQ between two
+ * consecutive doorbells (in units of 64B). Driver must ensure that only
+ * complete WQEs are written to queue before issuing a doorbell.
+ * Examples: max_tx_batch=16 and WQE size = 64B, means up to 16 WQEs can
+ * be written to SQ between two consecutive doorbells. max_tx_batch=11
+ * and WQE size = 128B, means up to 5 WQEs can be written to SQ between
+ * two consecutive doorbells. Zero means unlimited.
+ */
+ u16 max_tx_batch;
+};
+
+struct efa_admin_event_queue_attr_desc {
+ /* The maximum number of event queues supported */
+ u32 max_eq;
+
+ /* Maximum number of EQEs per Event Queue */
+ u32 max_eq_depth;
+
+ /* Supported events bitmask */
+ u32 event_bitmask;
+};
+
+struct efa_admin_feature_aenq_desc {
+ /* bitmask for AENQ groups the device can report */
+ u32 supported_groups;
+
+ /* bitmask for AENQ groups to report */
+ u32 enabled_groups;
+};
+
+struct efa_admin_feature_network_attr_desc {
+ /* Raw address data in network byte order */
+ u8 addr[16];
+
+ /* max packet payload size in bytes */
+ u32 mtu;
+};
+
+/*
+ * When hint value is 0, hints capabilities are not supported or driver
+ * should use its own predefined value
+ */
+struct efa_admin_hw_hints {
+ /* value in ms */
+ u16 mmio_read_timeout;
+
+ /* value in ms */
+ u16 driver_watchdog_timeout;
+
+ /* value in ms */
+ u16 admin_completion_timeout;
+
+ /* poll interval in ms */
+ u16 poll_interval;
+};
+
+struct efa_admin_get_feature_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ struct efa_admin_ctrl_buff_info control_buffer;
+
+ struct efa_admin_get_set_feature_common_desc feature_common;
+
+ u32 raw[11];
+};
+
+struct efa_admin_get_feature_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ union {
+ u32 raw[14];
+
+ struct efa_admin_feature_device_attr_desc device_attr;
+
+ struct efa_admin_feature_aenq_desc aenq;
+
+ struct efa_admin_feature_network_attr_desc network_attr;
+
+ struct efa_admin_feature_queue_attr_desc queue_attr;
+
+ struct efa_admin_event_queue_attr_desc event_queue_attr;
+
+ struct efa_admin_hw_hints hw_hints;
+ } u;
+};
+
+struct efa_admin_set_feature_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ struct efa_admin_ctrl_buff_info control_buffer;
+
+ struct efa_admin_get_set_feature_common_desc feature_common;
+
+ union {
+ u32 raw[11];
+
+ /* AENQ configuration */
+ struct efa_admin_feature_aenq_desc aenq;
+ } u;
+};
+
+struct efa_admin_set_feature_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ union {
+ u32 raw[14];
+ } u;
+};
+
+struct efa_admin_alloc_pd_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+};
+
+struct efa_admin_alloc_pd_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /* PD number */
+ u16 pd;
+
+ /* MBZ */
+ u16 reserved;
+};
+
+struct efa_admin_dealloc_pd_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ /* PD number */
+ u16 pd;
+
+ /* MBZ */
+ u16 reserved;
+};
+
+struct efa_admin_dealloc_pd_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+struct efa_admin_alloc_uar_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+};
+
+struct efa_admin_alloc_uar_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /* UAR number */
+ u16 uar;
+
+ /* MBZ */
+ u16 reserved;
+};
+
+struct efa_admin_dealloc_uar_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ /* UAR number */
+ u16 uar;
+
+ /* MBZ */
+ u16 reserved;
+};
+
+struct efa_admin_dealloc_uar_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+struct efa_admin_create_eq_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ /* Size of the EQ in entries, must be power of 2 */
+ u16 depth;
+
+ /* MSI-X table entry index */
+ u8 msix_vec;
+
+ /*
+ * 4:0 : entry_size_words - size of EQ entry in
+ * 32-bit words
+ * 7:5 : reserved - MBZ
+ */
+ u8 caps;
+
+ /* EQ ring base address */
+ struct efa_common_mem_addr ba;
+
+ /*
+ * Enabled events on this EQ
+ * 0 : completion_events - Enable completion events
+ * 31:1 : reserved - MBZ
+ */
+ u32 event_bitmask;
+
+ /* MBZ */
+ u32 reserved;
+};
+
+struct efa_admin_create_eq_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+
+ /* EQ number */
+ u16 eqn;
+
+ /* MBZ */
+ u16 reserved;
+};
+
+struct efa_admin_destroy_eq_cmd {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ /* EQ number */
+ u16 eqn;
+
+ /* MBZ */
+ u16 reserved;
+};
+
+struct efa_admin_destroy_eq_resp {
+ struct efa_admin_acq_common_desc acq_common_desc;
+};
+
+/* asynchronous event notification groups */
+enum efa_admin_aenq_group {
+ EFA_ADMIN_FATAL_ERROR = 1,
+ EFA_ADMIN_WARNING = 2,
+ EFA_ADMIN_NOTIFICATION = 3,
+ EFA_ADMIN_KEEP_ALIVE = 4,
+ EFA_ADMIN_AENQ_GROUPS_NUM = 5,
+};
+
+struct efa_admin_mmio_req_read_less_resp {
+ u16 req_id;
+
+ u16 reg_off;
+
+ /* value is valid when poll is cleared */
+ u32 reg_val;
+};
+
+enum efa_admin_os_type {
+ EFA_ADMIN_OS_LINUX = 0,
+};
+
+struct efa_admin_host_info {
+ /* OS distribution string format */
+ u8 os_dist_str[128];
+
+ /* Defined in enum efa_admin_os_type */
+ u32 os_type;
+
+ /* Kernel version string format */
+ u8 kernel_ver_str[32];
+
+ /* Kernel version numeric format */
+ u32 kernel_ver;
+
+ /*
+ * 7:0 : driver_module_type
+ * 15:8 : driver_sub_minor
+ * 23:16 : driver_minor
+ * 31:24 : driver_major
+ */
+ u32 driver_ver;
+
+ /*
+ * Device's Bus, Device and Function
+ * 2:0 : function
+ * 7:3 : device
+ * 15:8 : bus
+ */
+ u16 bdf;
+
+ /*
+ * Spec version
+ * 7:0 : spec_minor
+ * 15:8 : spec_major
+ */
+ u16 spec_ver;
+
+ /*
+ * 0 : intree - Intree driver
+ * 1 : gdr - GPUDirect RDMA supported
+ * 31:2 : reserved2
+ */
+ u32 flags;
+};
+
+/* create_qp_cmd */
+#define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0)
+#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1)
+#define EFA_ADMIN_CREATE_QP_CMD_UNSOLICITED_WRITE_RECV_MASK BIT(2)
+
+/* modify_qp_cmd */
+#define EFA_ADMIN_MODIFY_QP_CMD_QP_STATE_MASK BIT(0)
+#define EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE_MASK BIT(1)
+#define EFA_ADMIN_MODIFY_QP_CMD_QKEY_MASK BIT(2)
+#define EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN_MASK BIT(3)
+#define EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY_MASK BIT(4)
+#define EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY_MASK BIT(5)
+
+/* reg_mr_cmd */
+#define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0)
+#define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7)
+#define EFA_ADMIN_REG_MR_CMD_LOCAL_WRITE_ENABLE_MASK BIT(0)
+#define EFA_ADMIN_REG_MR_CMD_REMOTE_WRITE_ENABLE_MASK BIT(1)
+#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_MASK BIT(2)
+
+/* reg_mr_resp */
+#define EFA_ADMIN_REG_MR_RESP_RECV_IC_ID_MASK BIT(0)
+#define EFA_ADMIN_REG_MR_RESP_RDMA_READ_IC_ID_MASK BIT(1)
+#define EFA_ADMIN_REG_MR_RESP_RDMA_RECV_IC_ID_MASK BIT(2)
+
+/* create_cq_cmd */
+#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5)
+#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6)
+#define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
+#define EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR_MASK BIT(5)
+
+/* create_cq_resp */
+#define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK BIT(0)
+
+/* feature_device_attr_desc */
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0)
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1)
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_DATA_POLLING_128_MASK BIT(2)
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_WRITE_MASK BIT(3)
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_UNSOLICITED_WRITE_RECV_MASK BIT(4)
+
+/* create_eq_cmd */
+#define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
+#define EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS_MASK BIT(0)
+
+/* host_info */
+#define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK GENMASK(7, 0)
+#define EFA_ADMIN_HOST_INFO_DRIVER_SUB_MINOR_MASK GENMASK(15, 8)
+#define EFA_ADMIN_HOST_INFO_DRIVER_MINOR_MASK GENMASK(23, 16)
+#define EFA_ADMIN_HOST_INFO_DRIVER_MAJOR_MASK GENMASK(31, 24)
+#define EFA_ADMIN_HOST_INFO_FUNCTION_MASK GENMASK(2, 0)
+#define EFA_ADMIN_HOST_INFO_DEVICE_MASK GENMASK(7, 3)
+#define EFA_ADMIN_HOST_INFO_BUS_MASK GENMASK(15, 8)
+#define EFA_ADMIN_HOST_INFO_SPEC_MINOR_MASK GENMASK(7, 0)
+#define EFA_ADMIN_HOST_INFO_SPEC_MAJOR_MASK GENMASK(15, 8)
+#define EFA_ADMIN_HOST_INFO_INTREE_MASK BIT(0)
+#define EFA_ADMIN_HOST_INFO_GDR_MASK BIT(1)
+
+#endif /* _EFA_ADMIN_CMDS_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h
new file mode 100644
index 000000000000..35700c93e639
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_admin_defs.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_ADMIN_H_
+#define _EFA_ADMIN_H_
+
+enum efa_admin_aq_completion_status {
+ EFA_ADMIN_SUCCESS = 0,
+ EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE = 1,
+ EFA_ADMIN_BAD_OPCODE = 2,
+ EFA_ADMIN_UNSUPPORTED_OPCODE = 3,
+ EFA_ADMIN_MALFORMED_REQUEST = 4,
+ /* Additional status is provided in ACQ entry extended_status */
+ EFA_ADMIN_ILLEGAL_PARAMETER = 5,
+ EFA_ADMIN_UNKNOWN_ERROR = 6,
+ EFA_ADMIN_RESOURCE_BUSY = 7,
+};
+
+struct efa_admin_aq_common_desc {
+ /*
+ * 11:0 : command_id
+ * 15:12 : reserved12
+ */
+ u16 command_id;
+
+ /* as appears in efa_admin_aq_opcode */
+ u8 opcode;
+
+ /*
+ * 0 : phase
+ * 1 : ctrl_data - control buffer address valid
+ * 2 : ctrl_data_indirect - control buffer address
+ * points to list of pages with addresses of control
+ * buffers
+ * 7:3 : reserved3
+ */
+ u8 flags;
+};
+
+/*
+ * used in efa_admin_aq_entry. Can point directly to control data, or to a
+ * page list chunk. Used also at the end of indirect mode page list chunks,
+ * for chaining.
+ */
+struct efa_admin_ctrl_buff_info {
+ u32 length;
+
+ struct efa_common_mem_addr address;
+};
+
+struct efa_admin_aq_entry {
+ struct efa_admin_aq_common_desc aq_common_descriptor;
+
+ union {
+ u32 inline_data_w1[3];
+
+ struct efa_admin_ctrl_buff_info control_buffer;
+ } u;
+
+ u32 inline_data_w4[12];
+};
+
+struct efa_admin_acq_common_desc {
+ /*
+ * command identifier to associate it with the aq descriptor
+ * 11:0 : command_id
+ * 15:12 : reserved12
+ */
+ u16 command;
+
+ u8 status;
+
+ /*
+ * 0 : phase
+ * 7:1 : reserved1
+ */
+ u8 flags;
+
+ u16 extended_status;
+
+ /*
+ * indicates to the driver which AQ entry has been consumed by the
+ * device and could be reused
+ */
+ u16 sq_head_indx;
+};
+
+struct efa_admin_acq_entry {
+ struct efa_admin_acq_common_desc acq_common_descriptor;
+
+ u32 response_specific_data[14];
+};
+
+struct efa_admin_aenq_common_desc {
+ u16 group;
+
+ u16 syndrome;
+
+ /*
+ * 0 : phase
+ * 7:1 : reserved - MBZ
+ */
+ u8 flags;
+
+ u8 reserved1[3];
+
+ u32 timestamp_low;
+
+ u32 timestamp_high;
+};
+
+struct efa_admin_aenq_entry {
+ struct efa_admin_aenq_common_desc aenq_common_desc;
+
+ /* command specific inline data */
+ u32 inline_data_w4[12];
+};
+
+enum efa_admin_eqe_event_type {
+ EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION = 0,
+};
+
+/* Completion event */
+struct efa_admin_comp_event {
+ /* CQ number */
+ u16 cqn;
+
+ /* MBZ */
+ u16 reserved;
+
+ /* MBZ */
+ u32 reserved2;
+};
+
+/* Event Queue Element */
+struct efa_admin_eqe {
+ /*
+ * 0 : phase
+ * 8:1 : event_type - Event type
+ * 31:9 : reserved - MBZ
+ */
+ u32 common;
+
+ /* MBZ */
+ u32 reserved;
+
+ union {
+ /* Event data */
+ u32 event_data[2];
+
+ /* Completion Event */
+ struct efa_admin_comp_event comp_event;
+ } u;
+};
+
+/* aq_common_desc */
+#define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
+#define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0)
+#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1)
+#define EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2)
+
+/* acq_common_desc */
+#define EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
+#define EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0)
+
+/* aenq_common_desc */
+#define EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0)
+
+/* eqe */
+#define EFA_ADMIN_EQE_PHASE_MASK BIT(0)
+#define EFA_ADMIN_EQE_EVENT_TYPE_MASK GENMASK(8, 1)
+
+#endif /* _EFA_ADMIN_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
new file mode 100644
index 000000000000..0e979ca10d24
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -0,0 +1,1255 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include "efa_com.h"
+#include "efa_regs_defs.h"
+
+#define ADMIN_CMD_TIMEOUT_US 30000000 /* usecs */
+
+#define EFA_REG_READ_TIMEOUT_US 50000 /* usecs */
+#define EFA_MMIO_READ_INVALID 0xffffffff
+
+#define EFA_POLL_INTERVAL_MS 100 /* msecs */
+
+#define EFA_ASYNC_QUEUE_DEPTH 16
+#define EFA_ADMIN_QUEUE_DEPTH 32
+
+#define EFA_CTRL_MAJOR 0
+#define EFA_CTRL_MINOR 0
+#define EFA_CTRL_SUB_MINOR 1
+
+enum efa_cmd_status {
+ EFA_CMD_SUBMITTED,
+ EFA_CMD_COMPLETED,
+};
+
+struct efa_comp_ctx {
+ struct completion wait_event;
+ struct efa_admin_acq_entry *user_cqe;
+ u32 comp_size;
+ enum efa_cmd_status status;
+ u16 cmd_id;
+ u8 cmd_opcode;
+ u8 occupied;
+};
+
+static const char *efa_com_cmd_str(u8 cmd)
+{
+#define EFA_CMD_STR_CASE(_cmd) case EFA_ADMIN_##_cmd: return #_cmd
+
+ switch (cmd) {
+ EFA_CMD_STR_CASE(CREATE_QP);
+ EFA_CMD_STR_CASE(MODIFY_QP);
+ EFA_CMD_STR_CASE(QUERY_QP);
+ EFA_CMD_STR_CASE(DESTROY_QP);
+ EFA_CMD_STR_CASE(CREATE_AH);
+ EFA_CMD_STR_CASE(DESTROY_AH);
+ EFA_CMD_STR_CASE(REG_MR);
+ EFA_CMD_STR_CASE(DEREG_MR);
+ EFA_CMD_STR_CASE(CREATE_CQ);
+ EFA_CMD_STR_CASE(DESTROY_CQ);
+ EFA_CMD_STR_CASE(GET_FEATURE);
+ EFA_CMD_STR_CASE(SET_FEATURE);
+ EFA_CMD_STR_CASE(GET_STATS);
+ EFA_CMD_STR_CASE(ALLOC_PD);
+ EFA_CMD_STR_CASE(DEALLOC_PD);
+ EFA_CMD_STR_CASE(ALLOC_UAR);
+ EFA_CMD_STR_CASE(DEALLOC_UAR);
+ EFA_CMD_STR_CASE(CREATE_EQ);
+ EFA_CMD_STR_CASE(DESTROY_EQ);
+ default: return "unknown command opcode";
+ }
+#undef EFA_CMD_STR_CASE
+}
+
+void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low)
+{
+ *addr_low = lower_32_bits(addr);
+ *addr_high = upper_32_bits(addr);
+}
+
+static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset)
+{
+ struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
+ struct efa_admin_mmio_req_read_less_resp *read_resp;
+ unsigned long exp_time;
+ u32 mmio_read_reg = 0;
+ u32 err;
+
+ read_resp = mmio_read->read_resp;
+
+ spin_lock(&mmio_read->lock);
+ mmio_read->seq_num++;
+
+ /* trash DMA req_id to identify when hardware is done */
+ read_resp->req_id = mmio_read->seq_num + 0x9aL;
+ EFA_SET(&mmio_read_reg, EFA_REGS_MMIO_REG_READ_REG_OFF, offset);
+ EFA_SET(&mmio_read_reg, EFA_REGS_MMIO_REG_READ_REQ_ID,
+ mmio_read->seq_num);
+
+ writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF);
+
+ exp_time = jiffies + usecs_to_jiffies(mmio_read->mmio_read_timeout);
+ do {
+ if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num)
+ break;
+ udelay(1);
+ } while (time_is_after_jiffies(exp_time));
+
+ if (read_resp->req_id != mmio_read->seq_num) {
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n",
+ mmio_read->seq_num, offset, read_resp->req_id,
+ read_resp->reg_off);
+ err = EFA_MMIO_READ_INVALID;
+ goto out;
+ }
+
+ if (read_resp->reg_off != offset) {
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Reading register failed: wrong offset provided\n");
+ err = EFA_MMIO_READ_INVALID;
+ goto out;
+ }
+
+ err = read_resp->reg_val;
+out:
+ spin_unlock(&mmio_read->lock);
+ return err;
+}
+
+static int efa_com_admin_init_sq(struct efa_com_dev *edev)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_com_admin_sq *sq = &aq->sq;
+ u16 size = aq->depth * sizeof(*sq->entries);
+ u32 aq_caps = 0;
+ u32 addr_high;
+ u32 addr_low;
+
+ sq->entries =
+ dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL);
+ if (!sq->entries)
+ return -ENOMEM;
+
+ spin_lock_init(&sq->lock);
+
+ sq->cc = 0;
+ sq->pc = 0;
+ sq->phase = 1;
+
+ sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF);
+
+ addr_high = upper_32_bits(sq->dma_addr);
+ addr_low = lower_32_bits(sq->dma_addr);
+
+ writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF);
+ writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF);
+
+ EFA_SET(&aq_caps, EFA_REGS_AQ_CAPS_AQ_DEPTH, aq->depth);
+ EFA_SET(&aq_caps, EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE,
+ sizeof(struct efa_admin_aq_entry));
+
+ writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF);
+
+ return 0;
+}
+
+static int efa_com_admin_init_cq(struct efa_com_dev *edev)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_com_admin_cq *cq = &aq->cq;
+ u16 size = aq->depth * sizeof(*cq->entries);
+ u32 acq_caps = 0;
+ u32 addr_high;
+ u32 addr_low;
+
+ cq->entries =
+ dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL);
+ if (!cq->entries)
+ return -ENOMEM;
+
+ spin_lock_init(&cq->lock);
+
+ cq->cc = 0;
+ cq->phase = 1;
+
+ addr_high = upper_32_bits(cq->dma_addr);
+ addr_low = lower_32_bits(cq->dma_addr);
+
+ writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF);
+ writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF);
+
+ EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_DEPTH, aq->depth);
+ EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE,
+ sizeof(struct efa_admin_acq_entry));
+ EFA_SET(&acq_caps, EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR,
+ aq->msix_vector_idx);
+
+ writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF);
+
+ return 0;
+}
+
+static int efa_com_admin_init_aenq(struct efa_com_dev *edev,
+ struct efa_aenq_handlers *aenq_handlers)
+{
+ struct efa_com_aenq *aenq = &edev->aenq;
+ u32 addr_low, addr_high;
+ u32 aenq_caps = 0;
+ u16 size;
+
+ if (!aenq_handlers) {
+ ibdev_err(edev->efa_dev, "aenq handlers pointer is NULL\n");
+ return -EINVAL;
+ }
+
+ size = EFA_ASYNC_QUEUE_DEPTH * sizeof(*aenq->entries);
+ aenq->entries = dma_alloc_coherent(edev->dmadev, size, &aenq->dma_addr,
+ GFP_KERNEL);
+ if (!aenq->entries)
+ return -ENOMEM;
+
+ aenq->aenq_handlers = aenq_handlers;
+ aenq->depth = EFA_ASYNC_QUEUE_DEPTH;
+ aenq->cc = 0;
+ aenq->phase = 1;
+
+ addr_low = lower_32_bits(aenq->dma_addr);
+ addr_high = upper_32_bits(aenq->dma_addr);
+
+ writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF);
+ writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF);
+
+ EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_DEPTH, aenq->depth);
+ EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE,
+ sizeof(struct efa_admin_aenq_entry));
+ EFA_SET(&aenq_caps, EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR,
+ aenq->msix_vector_idx);
+ writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF);
+
+ /*
+ * Init cons_db to mark that all entries in the queue
+ * are initially available
+ */
+ writel(edev->aenq.cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
+
+ return 0;
+}
+
+/* ID to be used with efa_com_get_comp_ctx */
+static u16 efa_com_alloc_ctx_id(struct efa_com_admin_queue *aq)
+{
+ u16 ctx_id;
+
+ spin_lock(&aq->comp_ctx_lock);
+ ctx_id = aq->comp_ctx_pool[aq->comp_ctx_pool_next];
+ aq->comp_ctx_pool_next++;
+ spin_unlock(&aq->comp_ctx_lock);
+
+ return ctx_id;
+}
+
+static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq,
+ u16 ctx_id)
+{
+ spin_lock(&aq->comp_ctx_lock);
+ aq->comp_ctx_pool_next--;
+ aq->comp_ctx_pool[aq->comp_ctx_pool_next] = ctx_id;
+ spin_unlock(&aq->comp_ctx_lock);
+}
+
+static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq,
+ struct efa_comp_ctx *comp_ctx)
+{
+ u16 cmd_id = EFA_GET(&comp_ctx->user_cqe->acq_common_descriptor.command,
+ EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID);
+ u16 ctx_id = cmd_id & (aq->depth - 1);
+
+ ibdev_dbg(aq->efa_dev, "Put completion command_id %#x\n", cmd_id);
+ comp_ctx->occupied = 0;
+ efa_com_dealloc_ctx_id(aq, ctx_id);
+}
+
+static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq,
+ u16 cmd_id, bool capture)
+{
+ u16 ctx_id = cmd_id & (aq->depth - 1);
+
+ if (aq->comp_ctx[ctx_id].occupied && capture) {
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "Completion context for command_id %#x is occupied\n",
+ cmd_id);
+ return NULL;
+ }
+
+ if (capture) {
+ aq->comp_ctx[ctx_id].occupied = 1;
+ ibdev_dbg(aq->efa_dev,
+ "Take completion ctxt for command_id %#x\n", cmd_id);
+ }
+
+ return &aq->comp_ctx[ctx_id];
+}
+
+static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
+ struct efa_admin_aq_entry *cmd,
+ size_t cmd_size_in_bytes,
+ struct efa_admin_acq_entry *comp,
+ size_t comp_size_in_bytes)
+{
+ struct efa_admin_aq_entry *aqe;
+ struct efa_comp_ctx *comp_ctx;
+ u16 queue_size_mask;
+ u16 cmd_id;
+ u16 ctx_id;
+ u16 pi;
+
+ queue_size_mask = aq->depth - 1;
+ pi = aq->sq.pc & queue_size_mask;
+
+ ctx_id = efa_com_alloc_ctx_id(aq);
+
+ /* cmd_id LSBs are the ctx_id and MSBs are entropy bits from pc */
+ cmd_id = ctx_id & queue_size_mask;
+ cmd_id |= aq->sq.pc & ~queue_size_mask;
+ cmd_id &= EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
+
+ cmd->aq_common_descriptor.command_id = cmd_id;
+ EFA_SET(&cmd->aq_common_descriptor.flags,
+ EFA_ADMIN_AQ_COMMON_DESC_PHASE, aq->sq.phase);
+
+ comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, true);
+ if (!comp_ctx) {
+ efa_com_dealloc_ctx_id(aq, ctx_id);
+ return ERR_PTR(-EINVAL);
+ }
+
+ comp_ctx->status = EFA_CMD_SUBMITTED;
+ comp_ctx->comp_size = comp_size_in_bytes;
+ comp_ctx->user_cqe = comp;
+ comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
+ comp_ctx->cmd_id = cmd_id;
+
+ reinit_completion(&comp_ctx->wait_event);
+
+ aqe = &aq->sq.entries[pi];
+ memset(aqe, 0, sizeof(*aqe));
+ memcpy(aqe, cmd, cmd_size_in_bytes);
+
+ aq->sq.pc++;
+ atomic64_inc(&aq->stats.submitted_cmd);
+
+ if ((aq->sq.pc & queue_size_mask) == 0)
+ aq->sq.phase = !aq->sq.phase;
+
+ /* barrier not needed in case of writel */
+ writel(aq->sq.pc, aq->sq.db_addr);
+
+ return comp_ctx;
+}
+
+static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq)
+{
+ size_t pool_size = aq->depth * sizeof(*aq->comp_ctx_pool);
+ size_t size = aq->depth * sizeof(struct efa_comp_ctx);
+ struct efa_comp_ctx *comp_ctx;
+ u16 i;
+
+ aq->comp_ctx = devm_kzalloc(aq->dmadev, size, GFP_KERNEL);
+ aq->comp_ctx_pool = devm_kzalloc(aq->dmadev, pool_size, GFP_KERNEL);
+ if (!aq->comp_ctx || !aq->comp_ctx_pool) {
+ devm_kfree(aq->dmadev, aq->comp_ctx_pool);
+ devm_kfree(aq->dmadev, aq->comp_ctx);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < aq->depth; i++) {
+ comp_ctx = efa_com_get_comp_ctx(aq, i, false);
+ if (comp_ctx)
+ init_completion(&comp_ctx->wait_event);
+
+ aq->comp_ctx_pool[i] = i;
+ }
+
+ spin_lock_init(&aq->comp_ctx_lock);
+
+ aq->comp_ctx_pool_next = 0;
+
+ return 0;
+}
+
+static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq,
+ struct efa_admin_aq_entry *cmd,
+ size_t cmd_size_in_bytes,
+ struct efa_admin_acq_entry *comp,
+ size_t comp_size_in_bytes)
+{
+ struct efa_comp_ctx *comp_ctx;
+
+ spin_lock(&aq->sq.lock);
+ if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) {
+ ibdev_err_ratelimited(aq->efa_dev, "Admin queue is closed\n");
+ spin_unlock(&aq->sq.lock);
+ return ERR_PTR(-ENODEV);
+ }
+
+ comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp,
+ comp_size_in_bytes);
+ spin_unlock(&aq->sq.lock);
+ if (IS_ERR(comp_ctx))
+ clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+
+ return comp_ctx;
+}
+
+static int efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
+ struct efa_admin_acq_entry *cqe)
+{
+ struct efa_comp_ctx *comp_ctx;
+ u16 cmd_id;
+
+ cmd_id = EFA_GET(&cqe->acq_common_descriptor.command,
+ EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID);
+
+ comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false);
+ if (comp_ctx->status != EFA_CMD_SUBMITTED) {
+ ibdev_err(aq->efa_dev,
+ "Received completion with unexpected command id[%d], sq producer: %d, sq consumer: %d, cq consumer: %d\n",
+ cmd_id, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+ return -EINVAL;
+ }
+
+ comp_ctx->status = EFA_CMD_COMPLETED;
+ memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size);
+
+ if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
+ complete(&comp_ctx->wait_event);
+
+ return 0;
+}
+
+static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
+{
+ struct efa_admin_acq_entry *cqe;
+ u16 queue_size_mask;
+ u16 comp_cmds = 0;
+ u8 phase;
+ int err;
+ u16 ci;
+
+ queue_size_mask = aq->depth - 1;
+
+ ci = aq->cq.cc & queue_size_mask;
+ phase = aq->cq.phase;
+
+ cqe = &aq->cq.entries[ci];
+
+ /* Go over all the completions */
+ while ((READ_ONCE(cqe->acq_common_descriptor.flags) &
+ EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
+ /*
+ * Do not read the rest of the completion entry before the
+ * phase bit was validated
+ */
+ dma_rmb();
+ err = efa_com_handle_single_admin_completion(aq, cqe);
+ if (!err)
+ comp_cmds++;
+
+ aq->cq.cc++;
+ ci++;
+ if (ci == aq->depth) {
+ ci = 0;
+ phase = !phase;
+ }
+
+ cqe = &aq->cq.entries[ci];
+ }
+
+ aq->cq.phase = phase;
+ aq->sq.cc += comp_cmds;
+ atomic64_add(comp_cmds, &aq->stats.completed_cmd);
+}
+
+static int efa_com_comp_status_to_errno(u8 comp_status)
+{
+ switch (comp_status) {
+ case EFA_ADMIN_SUCCESS:
+ return 0;
+ case EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE:
+ return -ENOMEM;
+ case EFA_ADMIN_UNSUPPORTED_OPCODE:
+ return -EOPNOTSUPP;
+ case EFA_ADMIN_BAD_OPCODE:
+ case EFA_ADMIN_MALFORMED_REQUEST:
+ case EFA_ADMIN_ILLEGAL_PARAMETER:
+ case EFA_ADMIN_UNKNOWN_ERROR:
+ return -EINVAL;
+ default:
+ return -EINVAL;
+ }
+}
+
+static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_ctx,
+ struct efa_com_admin_queue *aq)
+{
+ unsigned long timeout;
+ unsigned long flags;
+ int err;
+
+ timeout = jiffies + usecs_to_jiffies(aq->completion_timeout);
+
+ while (1) {
+ spin_lock_irqsave(&aq->cq.lock, flags);
+ efa_com_handle_admin_completion(aq);
+ spin_unlock_irqrestore(&aq->cq.lock, flags);
+
+ if (comp_ctx->status != EFA_CMD_SUBMITTED)
+ break;
+
+ if (time_is_before_jiffies(timeout)) {
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "Wait for completion (polling) timeout\n");
+ /* EFA didn't have any completion */
+ atomic64_inc(&aq->stats.no_completion);
+
+ clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+ err = -ETIME;
+ goto out;
+ }
+
+ msleep(aq->poll_interval);
+ }
+
+ err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status);
+out:
+ efa_com_put_comp_ctx(aq, comp_ctx);
+ return err;
+}
+
+static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx,
+ struct efa_com_admin_queue *aq)
+{
+ unsigned long flags;
+ int err;
+
+ wait_for_completion_timeout(&comp_ctx->wait_event,
+ usecs_to_jiffies(aq->completion_timeout));
+
+ /*
+ * In case the command wasn't completed find out the root cause.
+ * There might be 2 kinds of errors
+ * 1) No completion (timeout reached)
+ * 2) There is completion but the device didn't get any msi-x interrupt.
+ */
+ if (comp_ctx->status == EFA_CMD_SUBMITTED) {
+ spin_lock_irqsave(&aq->cq.lock, flags);
+ efa_com_handle_admin_completion(aq);
+ spin_unlock_irqrestore(&aq->cq.lock, flags);
+
+ atomic64_inc(&aq->stats.no_completion);
+
+ if (comp_ctx->status == EFA_CMD_COMPLETED)
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (id: %d, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
+ efa_com_cmd_str(comp_ctx->cmd_opcode),
+ comp_ctx->cmd_opcode, comp_ctx->status,
+ comp_ctx->cmd_id, aq->sq.pc, aq->sq.cc,
+ aq->cq.cc);
+ else
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "The device didn't send any completion for admin cmd %s(%d) status %d (id: %d, sq producer: %d, sq consumer: %d, cq consumer: %d)\n",
+ efa_com_cmd_str(comp_ctx->cmd_opcode),
+ comp_ctx->cmd_opcode, comp_ctx->status,
+ comp_ctx->cmd_id, aq->sq.pc, aq->sq.cc,
+ aq->cq.cc);
+
+ clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+ err = -ETIME;
+ goto out;
+ }
+
+ err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status);
+out:
+ efa_com_put_comp_ctx(aq, comp_ctx);
+ return err;
+}
+
+/*
+ * There are two types to wait for completion.
+ * Polling mode - wait until the completion is available.
+ * Async mode - wait on wait queue until the completion is ready
+ * (or the timeout expired).
+ * It is expected that the IRQ called efa_com_handle_admin_completion
+ * to mark the completions.
+ */
+static int efa_com_wait_and_process_admin_cq(struct efa_comp_ctx *comp_ctx,
+ struct efa_com_admin_queue *aq)
+{
+ if (test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
+ return efa_com_wait_and_process_admin_cq_polling(comp_ctx, aq);
+
+ return efa_com_wait_and_process_admin_cq_interrupts(comp_ctx, aq);
+}
+
+/**
+ * efa_com_cmd_exec - Execute admin command
+ * @aq: admin queue.
+ * @cmd: the admin command to execute.
+ * @cmd_size: the command size.
+ * @comp: command completion return entry.
+ * @comp_size: command completion size.
+ * Submit an admin command and then wait until the device will return a
+ * completion.
+ * The completion will be copied into comp.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
+ struct efa_admin_aq_entry *cmd,
+ size_t cmd_size,
+ struct efa_admin_acq_entry *comp,
+ size_t comp_size)
+{
+ struct efa_comp_ctx *comp_ctx;
+ int err;
+
+ might_sleep();
+
+ /* In case of queue FULL */
+ down(&aq->avail_cmds);
+
+ ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n",
+ efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
+ cmd->aq_common_descriptor.opcode);
+ comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size);
+ if (IS_ERR(comp_ctx)) {
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "Failed to submit command %s (opcode %u) err %pe\n",
+ efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
+ cmd->aq_common_descriptor.opcode, comp_ctx);
+
+ up(&aq->avail_cmds);
+ atomic64_inc(&aq->stats.cmd_err);
+ return PTR_ERR(comp_ctx);
+ }
+
+ err = efa_com_wait_and_process_admin_cq(comp_ctx, aq);
+ if (err) {
+ ibdev_err_ratelimited(
+ aq->efa_dev,
+ "Failed to process command %s (opcode %u) comp_status %d err %d\n",
+ efa_com_cmd_str(cmd->aq_common_descriptor.opcode),
+ cmd->aq_common_descriptor.opcode,
+ comp_ctx->user_cqe->acq_common_descriptor.status, err);
+ atomic64_inc(&aq->stats.cmd_err);
+ }
+
+ up(&aq->avail_cmds);
+
+ return err;
+}
+
+/**
+ * efa_com_admin_destroy - Destroy the admin and the async events queues.
+ * @edev: EFA communication layer struct
+ */
+void efa_com_admin_destroy(struct efa_com_dev *edev)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_com_aenq *aenq = &edev->aenq;
+ struct efa_com_admin_cq *cq = &aq->cq;
+ struct efa_com_admin_sq *sq = &aq->sq;
+ u16 size;
+
+ clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+
+ devm_kfree(edev->dmadev, aq->comp_ctx_pool);
+ devm_kfree(edev->dmadev, aq->comp_ctx);
+
+ size = aq->depth * sizeof(*sq->entries);
+ dma_free_coherent(edev->dmadev, size, sq->entries, sq->dma_addr);
+
+ size = aq->depth * sizeof(*cq->entries);
+ dma_free_coherent(edev->dmadev, size, cq->entries, cq->dma_addr);
+
+ size = aenq->depth * sizeof(*aenq->entries);
+ dma_free_coherent(edev->dmadev, size, aenq->entries, aenq->dma_addr);
+}
+
+/**
+ * efa_com_set_admin_polling_mode - Set the admin completion queue polling mode
+ * @edev: EFA communication layer struct
+ * @polling: Enable/Disable polling mode
+ *
+ * Set the admin completion mode.
+ */
+void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling)
+{
+ u32 mask_value = 0;
+
+ if (polling)
+ EFA_SET(&mask_value, EFA_REGS_INTR_MASK_EN, 1);
+
+ writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF);
+ if (polling)
+ set_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
+ else
+ clear_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state);
+}
+
+static void efa_com_stats_init(struct efa_com_dev *edev)
+{
+ atomic64_t *s = (atomic64_t *)&edev->aq.stats;
+ int i;
+
+ for (i = 0; i < sizeof(edev->aq.stats) / sizeof(*s); i++, s++)
+ atomic64_set(s, 0);
+}
+
+/**
+ * efa_com_admin_init - Init the admin and the async queues
+ * @edev: EFA communication layer struct
+ * @aenq_handlers: Those handlers to be called upon event.
+ *
+ * Initialize the admin submission and completion queues.
+ * Initialize the asynchronous events notification queues.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int efa_com_admin_init(struct efa_com_dev *edev,
+ struct efa_aenq_handlers *aenq_handlers)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ u32 timeout;
+ u32 dev_sts;
+ u32 cap;
+ int err;
+
+ dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
+ if (!EFA_GET(&dev_sts, EFA_REGS_DEV_STS_READY)) {
+ ibdev_err(edev->efa_dev,
+ "Device isn't ready, abort com init %#x\n", dev_sts);
+ return -ENODEV;
+ }
+
+ aq->depth = EFA_ADMIN_QUEUE_DEPTH;
+
+ aq->dmadev = edev->dmadev;
+ aq->efa_dev = edev->efa_dev;
+ set_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state);
+
+ sema_init(&aq->avail_cmds, aq->depth);
+
+ efa_com_stats_init(edev);
+
+ err = efa_com_init_comp_ctxt(aq);
+ if (err)
+ return err;
+
+ err = efa_com_admin_init_sq(edev);
+ if (err)
+ goto err_destroy_comp_ctxt;
+
+ err = efa_com_admin_init_cq(edev);
+ if (err)
+ goto err_destroy_sq;
+
+ efa_com_set_admin_polling_mode(edev, false);
+
+ err = efa_com_admin_init_aenq(edev, aenq_handlers);
+ if (err)
+ goto err_destroy_cq;
+
+ cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
+ timeout = EFA_GET(&cap, EFA_REGS_CAPS_ADMIN_CMD_TO);
+ if (timeout)
+ /* the resolution of timeout reg is 100ms */
+ aq->completion_timeout = timeout * 100000;
+ else
+ aq->completion_timeout = ADMIN_CMD_TIMEOUT_US;
+
+ aq->poll_interval = EFA_POLL_INTERVAL_MS;
+
+ set_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
+
+ return 0;
+
+err_destroy_cq:
+ dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->cq.entries),
+ aq->cq.entries, aq->cq.dma_addr);
+err_destroy_sq:
+ dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->sq.entries),
+ aq->sq.entries, aq->sq.dma_addr);
+err_destroy_comp_ctxt:
+ devm_kfree(edev->dmadev, aq->comp_ctx);
+
+ return err;
+}
+
+/**
+ * efa_com_admin_q_comp_intr_handler - admin queue interrupt handler
+ * @edev: EFA communication layer struct
+ *
+ * This method goes over the admin completion queue and wakes up
+ * all the pending threads that wait on the commands wait event.
+ *
+ * Note: Should be called after MSI-X interrupt.
+ */
+void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&edev->aq.cq.lock, flags);
+ efa_com_handle_admin_completion(&edev->aq);
+ spin_unlock_irqrestore(&edev->aq.cq.lock, flags);
+}
+
+/*
+ * efa_handle_specific_aenq_event:
+ * return the handler that is relevant to the specific event group
+ */
+static efa_aenq_handler efa_com_get_specific_aenq_cb(struct efa_com_dev *edev,
+ u16 group)
+{
+ struct efa_aenq_handlers *aenq_handlers = edev->aenq.aenq_handlers;
+
+ if (group < EFA_MAX_HANDLERS && aenq_handlers->handlers[group])
+ return aenq_handlers->handlers[group];
+
+ return aenq_handlers->unimplemented_handler;
+}
+
+/**
+ * efa_com_aenq_intr_handler - AENQ interrupt handler
+ * @edev: EFA communication layer struct
+ * @data: Data of interrupt handler.
+ *
+ * Go over the async event notification queue and call the proper aenq handler.
+ */
+void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data)
+{
+ struct efa_admin_aenq_common_desc *aenq_common;
+ struct efa_com_aenq *aenq = &edev->aenq;
+ struct efa_admin_aenq_entry *aenq_e;
+ efa_aenq_handler handler_cb;
+ u32 processed = 0;
+ u8 phase;
+ u32 ci;
+
+ ci = aenq->cc & (aenq->depth - 1);
+ phase = aenq->phase;
+ aenq_e = &aenq->entries[ci]; /* Get first entry */
+ aenq_common = &aenq_e->aenq_common_desc;
+
+ /* Go over all the events */
+ while ((READ_ONCE(aenq_common->flags) &
+ EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) {
+ /*
+ * Do not read the rest of the completion entry before the
+ * phase bit was validated
+ */
+ dma_rmb();
+
+ /* Handle specific event*/
+ handler_cb = efa_com_get_specific_aenq_cb(edev,
+ aenq_common->group);
+ handler_cb(data, aenq_e); /* call the actual event handler*/
+
+ /* Get next event entry */
+ ci++;
+ processed++;
+
+ if (ci == aenq->depth) {
+ ci = 0;
+ phase = !phase;
+ }
+ aenq_e = &aenq->entries[ci];
+ aenq_common = &aenq_e->aenq_common_desc;
+ }
+
+ aenq->cc += processed;
+ aenq->phase = phase;
+
+ /* Don't update aenq doorbell if there weren't any processed events */
+ if (!processed)
+ return;
+
+ /* barrier not needed in case of writel */
+ writel(aenq->cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF);
+}
+
+static void efa_com_mmio_reg_read_resp_addr_init(struct efa_com_dev *edev)
+{
+ struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
+ u32 addr_high;
+ u32 addr_low;
+
+ /* dma_addr_bits is unknown at this point */
+ addr_high = (mmio_read->read_resp_dma_addr >> 32) & GENMASK(31, 0);
+ addr_low = mmio_read->read_resp_dma_addr & GENMASK(31, 0);
+
+ writel(addr_high, edev->reg_bar + EFA_REGS_MMIO_RESP_HI_OFF);
+ writel(addr_low, edev->reg_bar + EFA_REGS_MMIO_RESP_LO_OFF);
+}
+
+int efa_com_mmio_reg_read_init(struct efa_com_dev *edev)
+{
+ struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
+
+ spin_lock_init(&mmio_read->lock);
+ mmio_read->read_resp =
+ dma_alloc_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
+ &mmio_read->read_resp_dma_addr, GFP_KERNEL);
+ if (!mmio_read->read_resp)
+ return -ENOMEM;
+
+ efa_com_mmio_reg_read_resp_addr_init(edev);
+
+ mmio_read->read_resp->req_id = 0;
+ mmio_read->seq_num = 0;
+ mmio_read->mmio_read_timeout = EFA_REG_READ_TIMEOUT_US;
+
+ return 0;
+}
+
+void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev)
+{
+ struct efa_com_mmio_read *mmio_read = &edev->mmio_read;
+
+ dma_free_coherent(edev->dmadev, sizeof(*mmio_read->read_resp),
+ mmio_read->read_resp, mmio_read->read_resp_dma_addr);
+}
+
+int efa_com_validate_version(struct efa_com_dev *edev)
+{
+ u32 min_ctrl_ver = 0;
+ u32 ctrl_ver_masked;
+ u32 min_ver = 0;
+ u32 ctrl_ver;
+ u32 ver;
+
+ /*
+ * Make sure the EFA version and the controller version are at least
+ * as the driver expects
+ */
+ ver = efa_com_reg_read32(edev, EFA_REGS_VERSION_OFF);
+ ctrl_ver = efa_com_reg_read32(edev,
+ EFA_REGS_CONTROLLER_VERSION_OFF);
+
+ ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n",
+ EFA_GET(&ver, EFA_REGS_VERSION_MAJOR_VERSION),
+ EFA_GET(&ver, EFA_REGS_VERSION_MINOR_VERSION));
+
+ EFA_SET(&min_ver, EFA_REGS_VERSION_MAJOR_VERSION,
+ EFA_ADMIN_API_VERSION_MAJOR);
+ EFA_SET(&min_ver, EFA_REGS_VERSION_MINOR_VERSION,
+ EFA_ADMIN_API_VERSION_MINOR);
+ if (ver < min_ver) {
+ ibdev_err(edev->efa_dev,
+ "EFA version is lower than the minimal version the driver supports\n");
+ return -EOPNOTSUPP;
+ }
+
+ ibdev_dbg(
+ edev->efa_dev,
+ "efa controller version: %d.%d.%d implementation version %d\n",
+ EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION),
+ EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION),
+ EFA_GET(&ctrl_ver,
+ EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION),
+ EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_IMPL_ID));
+
+ ctrl_ver_masked =
+ EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION) |
+ EFA_GET(&ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION) |
+ EFA_GET(&ctrl_ver,
+ EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION);
+
+ EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION,
+ EFA_CTRL_MAJOR);
+ EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION,
+ EFA_CTRL_MINOR);
+ EFA_SET(&min_ctrl_ver, EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION,
+ EFA_CTRL_SUB_MINOR);
+ /* Validate the ctrl version without the implementation ID */
+ if (ctrl_ver_masked < min_ctrl_ver) {
+ ibdev_err(edev->efa_dev,
+ "EFA ctrl version is lower than the minimal ctrl version the driver supports\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+/**
+ * efa_com_get_dma_width - Retrieve physical dma address width the device
+ * supports.
+ * @edev: EFA communication layer struct
+ *
+ * Retrieve the maximum physical address bits the device can handle.
+ *
+ * @return: > 0 on Success and negative value otherwise.
+ */
+int efa_com_get_dma_width(struct efa_com_dev *edev)
+{
+ u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
+ int width;
+
+ width = EFA_GET(&caps, EFA_REGS_CAPS_DMA_ADDR_WIDTH);
+
+ ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width);
+
+ if (width < 32 || width > 64) {
+ ibdev_err(edev->efa_dev, "DMA width illegal value: %d\n", width);
+ return -EINVAL;
+ }
+
+ edev->dma_addr_bits = width;
+
+ return width;
+}
+
+static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout, int on)
+{
+ u32 val, i;
+
+ for (i = 0; i < timeout; i++) {
+ val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
+
+ if (EFA_GET(&val, EFA_REGS_DEV_STS_RESET_IN_PROGRESS) == on)
+ return 0;
+
+ ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val);
+ msleep(EFA_POLL_INTERVAL_MS);
+ }
+
+ return -ETIME;
+}
+
+/**
+ * efa_com_dev_reset - Perform device FLR to the device.
+ * @edev: EFA communication layer struct
+ * @reset_reason: Specify what is the trigger for the reset in case of an error.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int efa_com_dev_reset(struct efa_com_dev *edev,
+ enum efa_regs_reset_reason_types reset_reason)
+{
+ u32 stat, timeout, cap;
+ u32 reset_val = 0;
+ int err;
+
+ stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF);
+ cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF);
+
+ if (!EFA_GET(&stat, EFA_REGS_DEV_STS_READY)) {
+ ibdev_err(edev->efa_dev,
+ "Device isn't ready, can't reset device\n");
+ return -EINVAL;
+ }
+
+ timeout = EFA_GET(&cap, EFA_REGS_CAPS_RESET_TIMEOUT);
+ if (!timeout) {
+ ibdev_err(edev->efa_dev, "Invalid timeout value\n");
+ return -EINVAL;
+ }
+
+ /* start reset */
+ EFA_SET(&reset_val, EFA_REGS_DEV_CTL_DEV_RESET, 1);
+ EFA_SET(&reset_val, EFA_REGS_DEV_CTL_RESET_REASON, reset_reason);
+ writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
+
+ /* reset clears the mmio readless address, restore it */
+ efa_com_mmio_reg_read_resp_addr_init(edev);
+
+ err = wait_for_reset_state(edev, timeout, 1);
+ if (err) {
+ ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n");
+ return err;
+ }
+
+ /* reset done */
+ writel(0, edev->reg_bar + EFA_REGS_DEV_CTL_OFF);
+ err = wait_for_reset_state(edev, timeout, 0);
+ if (err) {
+ ibdev_err(edev->efa_dev, "Reset indication didn't turn off\n");
+ return err;
+ }
+
+ timeout = EFA_GET(&cap, EFA_REGS_CAPS_ADMIN_CMD_TO);
+ if (timeout)
+ /* the resolution of timeout reg is 100ms */
+ edev->aq.completion_timeout = timeout * 100000;
+ else
+ edev->aq.completion_timeout = ADMIN_CMD_TIMEOUT_US;
+
+ return 0;
+}
+
+static int efa_com_create_eq(struct efa_com_dev *edev,
+ struct efa_com_create_eq_params *params,
+ struct efa_com_create_eq_result *result)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_create_eq_resp resp = {};
+ struct efa_admin_create_eq_cmd cmd = {};
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_CREATE_EQ;
+ EFA_SET(&cmd.caps, EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS,
+ params->entry_size_in_bytes / 4);
+ cmd.depth = params->depth;
+ cmd.event_bitmask = params->event_bitmask;
+ cmd.msix_vec = params->msix_vec;
+
+ efa_com_set_dma_addr(params->dma_addr, &cmd.ba.mem_addr_high,
+ &cmd.ba.mem_addr_low);
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to create eq[%d]\n", err);
+ return err;
+ }
+
+ result->eqn = resp.eqn;
+
+ return 0;
+}
+
+static void efa_com_destroy_eq(struct efa_com_dev *edev,
+ struct efa_com_destroy_eq_params *params)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_destroy_eq_resp resp = {};
+ struct efa_admin_destroy_eq_cmd cmd = {};
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_DESTROY_EQ;
+ cmd.eqn = params->eqn;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err)
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to destroy EQ-%u [%d]\n", cmd.eqn,
+ err);
+}
+
+static void efa_com_arm_eq(struct efa_com_dev *edev, struct efa_com_eq *eeq)
+{
+ u32 val = 0;
+
+ EFA_SET(&val, EFA_REGS_EQ_DB_EQN, eeq->eqn);
+ EFA_SET(&val, EFA_REGS_EQ_DB_ARM, 1);
+
+ writel(val, edev->reg_bar + EFA_REGS_EQ_DB_OFF);
+}
+
+void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev,
+ struct efa_com_eq *eeq)
+{
+ struct efa_admin_eqe *eqe;
+ u32 processed = 0;
+ u8 phase;
+ u32 ci;
+
+ ci = eeq->cc & (eeq->depth - 1);
+ phase = eeq->phase;
+ eqe = &eeq->eqes[ci];
+
+ /* Go over all the events */
+ while ((READ_ONCE(eqe->common) & EFA_ADMIN_EQE_PHASE_MASK) == phase) {
+ /*
+ * Do not read the rest of the completion entry before the
+ * phase bit was validated
+ */
+ dma_rmb();
+
+ eeq->cb(eeq, eqe);
+
+ /* Get next event entry */
+ ci++;
+ processed++;
+
+ if (ci == eeq->depth) {
+ ci = 0;
+ phase = !phase;
+ }
+
+ eqe = &eeq->eqes[ci];
+ }
+
+ eeq->cc += processed;
+ eeq->phase = phase;
+ efa_com_arm_eq(eeq->edev, eeq);
+}
+
+void efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq)
+{
+ struct efa_com_destroy_eq_params params = {
+ .eqn = eeq->eqn,
+ };
+
+ efa_com_destroy_eq(edev, &params);
+ dma_free_coherent(edev->dmadev, eeq->depth * sizeof(*eeq->eqes),
+ eeq->eqes, eeq->dma_addr);
+}
+
+int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq,
+ efa_eqe_handler cb, u16 depth, u8 msix_vec)
+{
+ struct efa_com_create_eq_params params = {};
+ struct efa_com_create_eq_result result = {};
+ int err;
+
+ params.depth = depth;
+ params.entry_size_in_bytes = sizeof(*eeq->eqes);
+ EFA_SET(&params.event_bitmask,
+ EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS, 1);
+ params.msix_vec = msix_vec;
+
+ eeq->eqes = dma_alloc_coherent(edev->dmadev,
+ params.depth * sizeof(*eeq->eqes),
+ &params.dma_addr, GFP_KERNEL);
+ if (!eeq->eqes)
+ return -ENOMEM;
+
+ err = efa_com_create_eq(edev, &params, &result);
+ if (err)
+ goto err_free_coherent;
+
+ eeq->eqn = result.eqn;
+ eeq->edev = edev;
+ eeq->dma_addr = params.dma_addr;
+ eeq->phase = 1;
+ eeq->depth = params.depth;
+ eeq->cb = cb;
+ efa_com_arm_eq(edev, eeq);
+
+ return 0;
+
+err_free_coherent:
+ dma_free_coherent(edev->dmadev, params.depth * sizeof(*eeq->eqes),
+ eeq->eqes, params.dma_addr);
+ return err;
+}
diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h
new file mode 100644
index 000000000000..4d9ca97e4296
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_com.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_COM_H_
+#define _EFA_COM_H_
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/semaphore.h>
+#include <linux/sched.h>
+
+#include <rdma/ib_verbs.h>
+
+#include "efa_common_defs.h"
+#include "efa_admin_defs.h"
+#include "efa_admin_cmds_defs.h"
+#include "efa_regs_defs.h"
+
+#define EFA_MAX_HANDLERS 256
+
+struct efa_com_admin_cq {
+ struct efa_admin_acq_entry *entries;
+ dma_addr_t dma_addr;
+ spinlock_t lock; /* Protects ACQ */
+
+ u16 cc; /* consumer counter */
+ u8 phase;
+};
+
+struct efa_com_admin_sq {
+ struct efa_admin_aq_entry *entries;
+ dma_addr_t dma_addr;
+ spinlock_t lock; /* Protects ASQ */
+
+ u32 __iomem *db_addr;
+
+ u16 cc; /* consumer counter */
+ u16 pc; /* producer counter */
+ u8 phase;
+
+};
+
+/* Don't use anything other than atomic64 */
+struct efa_com_stats_admin {
+ atomic64_t submitted_cmd;
+ atomic64_t completed_cmd;
+ atomic64_t cmd_err;
+ atomic64_t no_completion;
+};
+
+enum {
+ EFA_AQ_STATE_RUNNING_BIT = 0,
+ EFA_AQ_STATE_POLLING_BIT = 1,
+};
+
+struct efa_com_admin_queue {
+ void *dmadev;
+ void *efa_dev;
+ struct efa_comp_ctx *comp_ctx;
+ u32 completion_timeout; /* usecs */
+ u16 poll_interval; /* msecs */
+ u16 depth;
+ struct efa_com_admin_cq cq;
+ struct efa_com_admin_sq sq;
+ u32 msix_vector_idx;
+
+ unsigned long state;
+
+ /* Count the number of available admin commands */
+ struct semaphore avail_cmds;
+
+ struct efa_com_stats_admin stats;
+
+ spinlock_t comp_ctx_lock; /* Protects completion context pool */
+ u32 *comp_ctx_pool;
+ u16 comp_ctx_pool_next;
+};
+
+struct efa_aenq_handlers;
+struct efa_com_eq;
+typedef void (*efa_eqe_handler)(struct efa_com_eq *eeq,
+ struct efa_admin_eqe *eqe);
+
+struct efa_com_aenq {
+ struct efa_admin_aenq_entry *entries;
+ struct efa_aenq_handlers *aenq_handlers;
+ dma_addr_t dma_addr;
+ u32 cc; /* consumer counter */
+ u32 msix_vector_idx;
+ u16 depth;
+ u8 phase;
+};
+
+struct efa_com_mmio_read {
+ struct efa_admin_mmio_req_read_less_resp *read_resp;
+ dma_addr_t read_resp_dma_addr;
+ u16 seq_num;
+ u16 mmio_read_timeout; /* usecs */
+ /* serializes mmio reads */
+ spinlock_t lock;
+};
+
+struct efa_com_dev {
+ struct efa_com_admin_queue aq;
+ struct efa_com_aenq aenq;
+ u8 __iomem *reg_bar;
+ void *dmadev;
+ void *efa_dev;
+ u32 supported_features;
+ u32 dma_addr_bits;
+
+ struct efa_com_mmio_read mmio_read;
+};
+
+struct efa_com_eq {
+ struct efa_com_dev *edev;
+ struct efa_admin_eqe *eqes;
+ dma_addr_t dma_addr;
+ u32 cc; /* Consumer counter */
+ u16 eqn;
+ u16 depth;
+ u8 phase;
+ efa_eqe_handler cb;
+};
+
+struct efa_com_create_eq_params {
+ dma_addr_t dma_addr;
+ u32 event_bitmask;
+ u16 depth;
+ u8 entry_size_in_bytes;
+ u8 msix_vec;
+};
+
+struct efa_com_create_eq_result {
+ u16 eqn;
+};
+
+struct efa_com_destroy_eq_params {
+ u16 eqn;
+};
+
+typedef void (*efa_aenq_handler)(void *data,
+ struct efa_admin_aenq_entry *aenq_e);
+
+/* Holds aenq handlers. Indexed by AENQ event group */
+struct efa_aenq_handlers {
+ efa_aenq_handler handlers[EFA_MAX_HANDLERS];
+ efa_aenq_handler unimplemented_handler;
+};
+
+void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low);
+int efa_com_admin_init(struct efa_com_dev *edev,
+ struct efa_aenq_handlers *aenq_handlers);
+void efa_com_admin_destroy(struct efa_com_dev *edev);
+int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq,
+ efa_eqe_handler cb, u16 depth, u8 msix_vec);
+void efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq);
+int efa_com_dev_reset(struct efa_com_dev *edev,
+ enum efa_regs_reset_reason_types reset_reason);
+void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling);
+void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev);
+int efa_com_mmio_reg_read_init(struct efa_com_dev *edev);
+void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev);
+
+int efa_com_validate_version(struct efa_com_dev *edev);
+int efa_com_get_dma_width(struct efa_com_dev *edev);
+
+int efa_com_cmd_exec(struct efa_com_admin_queue *aq,
+ struct efa_admin_aq_entry *cmd,
+ size_t cmd_size,
+ struct efa_admin_acq_entry *comp,
+ size_t comp_size);
+void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data);
+void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev,
+ struct efa_com_eq *eeq);
+
+#endif /* _EFA_COM_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
new file mode 100644
index 000000000000..9ead02800ac7
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -0,0 +1,838 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include "efa_com.h"
+#include "efa_com_cmd.h"
+
+int efa_com_create_qp(struct efa_com_dev *edev,
+ struct efa_com_create_qp_params *params,
+ struct efa_com_create_qp_result *res)
+{
+ struct efa_admin_create_qp_cmd create_qp_cmd = {};
+ struct efa_admin_create_qp_resp cmd_completion;
+ struct efa_com_admin_queue *aq = &edev->aq;
+ int err;
+
+ create_qp_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_QP;
+
+ create_qp_cmd.pd = params->pd;
+ create_qp_cmd.qp_type = params->qp_type;
+ create_qp_cmd.rq_base_addr = params->rq_base_addr;
+ create_qp_cmd.send_cq_idx = params->send_cq_idx;
+ create_qp_cmd.recv_cq_idx = params->recv_cq_idx;
+ create_qp_cmd.qp_alloc_size.send_queue_ring_size =
+ params->sq_ring_size_in_bytes;
+ create_qp_cmd.qp_alloc_size.send_queue_depth =
+ params->sq_depth;
+ create_qp_cmd.qp_alloc_size.recv_queue_ring_size =
+ params->rq_ring_size_in_bytes;
+ create_qp_cmd.qp_alloc_size.recv_queue_depth =
+ params->rq_depth;
+ create_qp_cmd.uar = params->uarn;
+ create_qp_cmd.sl = params->sl;
+
+ if (params->unsolicited_write_recv)
+ EFA_SET(&create_qp_cmd.flags, EFA_ADMIN_CREATE_QP_CMD_UNSOLICITED_WRITE_RECV, 1);
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&create_qp_cmd,
+ sizeof(create_qp_cmd),
+ (struct efa_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to create qp [%d]\n", err);
+ return err;
+ }
+
+ res->qp_handle = cmd_completion.qp_handle;
+ res->qp_num = cmd_completion.qp_num;
+ res->sq_db_offset = cmd_completion.sq_db_offset;
+ res->rq_db_offset = cmd_completion.rq_db_offset;
+ res->llq_descriptors_offset = cmd_completion.llq_descriptors_offset;
+ res->send_sub_cq_idx = cmd_completion.send_sub_cq_idx;
+ res->recv_sub_cq_idx = cmd_completion.recv_sub_cq_idx;
+
+ return 0;
+}
+
+int efa_com_modify_qp(struct efa_com_dev *edev,
+ struct efa_com_modify_qp_params *params)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_modify_qp_cmd cmd = {};
+ struct efa_admin_modify_qp_resp resp;
+ int err;
+
+ cmd.aq_common_desc.opcode = EFA_ADMIN_MODIFY_QP;
+ cmd.modify_mask = params->modify_mask;
+ cmd.qp_handle = params->qp_handle;
+ cmd.qp_state = params->qp_state;
+ cmd.cur_qp_state = params->cur_qp_state;
+ cmd.qkey = params->qkey;
+ cmd.sq_psn = params->sq_psn;
+ cmd.sq_drained_async_notify = params->sq_drained_async_notify;
+ cmd.rnr_retry = params->rnr_retry;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to modify qp-%u modify_mask[%#x] [%d]\n",
+ cmd.qp_handle, cmd.modify_mask, err);
+ return err;
+ }
+
+ return 0;
+}
+
+int efa_com_query_qp(struct efa_com_dev *edev,
+ struct efa_com_query_qp_params *params,
+ struct efa_com_query_qp_result *result)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_query_qp_cmd cmd = {};
+ struct efa_admin_query_qp_resp resp;
+ int err;
+
+ cmd.aq_common_desc.opcode = EFA_ADMIN_QUERY_QP;
+ cmd.qp_handle = params->qp_handle;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to query qp-%u [%d]\n",
+ cmd.qp_handle, err);
+ return err;
+ }
+
+ result->qp_state = resp.qp_state;
+ result->qkey = resp.qkey;
+ result->sq_draining = resp.sq_draining;
+ result->sq_psn = resp.sq_psn;
+ result->rnr_retry = resp.rnr_retry;
+
+ return 0;
+}
+
+int efa_com_destroy_qp(struct efa_com_dev *edev,
+ struct efa_com_destroy_qp_params *params)
+{
+ struct efa_admin_destroy_qp_resp cmd_completion;
+ struct efa_admin_destroy_qp_cmd qp_cmd = {};
+ struct efa_com_admin_queue *aq = &edev->aq;
+ int err;
+
+ qp_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_QP;
+ qp_cmd.qp_handle = params->qp_handle;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&qp_cmd,
+ sizeof(qp_cmd),
+ (struct efa_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to destroy qp-%u [%d]\n",
+ qp_cmd.qp_handle, err);
+ return err;
+ }
+
+ return 0;
+}
+
+int efa_com_create_cq(struct efa_com_dev *edev,
+ struct efa_com_create_cq_params *params,
+ struct efa_com_create_cq_result *result)
+{
+ struct efa_admin_create_cq_resp cmd_completion = {};
+ struct efa_admin_create_cq_cmd create_cmd = {};
+ struct efa_com_admin_queue *aq = &edev->aq;
+ int err;
+
+ create_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_CQ;
+ EFA_SET(&create_cmd.cq_caps_2,
+ EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS,
+ params->entry_size_in_bytes / 4);
+ create_cmd.sub_cq_depth = params->sub_cq_depth;
+ create_cmd.num_sub_cqs = params->num_sub_cqs;
+ create_cmd.uar = params->uarn;
+ if (params->interrupt_mode_enabled) {
+ EFA_SET(&create_cmd.cq_caps_1,
+ EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED, 1);
+ create_cmd.eqn = params->eqn;
+ }
+ if (params->set_src_addr) {
+ EFA_SET(&create_cmd.cq_caps_2,
+ EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR, 1);
+ }
+ efa_com_set_dma_addr(params->dma_addr,
+ &create_cmd.cq_ba.mem_addr_high,
+ &create_cmd.cq_ba.mem_addr_low);
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&create_cmd,
+ sizeof(create_cmd),
+ (struct efa_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to create cq[%d]\n", err);
+ return err;
+ }
+
+ result->cq_idx = cmd_completion.cq_idx;
+ result->actual_depth = params->sub_cq_depth;
+ result->db_off = cmd_completion.db_offset;
+ result->db_valid = EFA_GET(&cmd_completion.flags,
+ EFA_ADMIN_CREATE_CQ_RESP_DB_VALID);
+
+ return 0;
+}
+
+int efa_com_destroy_cq(struct efa_com_dev *edev,
+ struct efa_com_destroy_cq_params *params)
+{
+ struct efa_admin_destroy_cq_cmd destroy_cmd = {};
+ struct efa_admin_destroy_cq_resp destroy_resp;
+ struct efa_com_admin_queue *aq = &edev->aq;
+ int err;
+
+ destroy_cmd.cq_idx = params->cq_idx;
+ destroy_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_CQ;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&destroy_cmd,
+ sizeof(destroy_cmd),
+ (struct efa_admin_acq_entry *)&destroy_resp,
+ sizeof(destroy_resp));
+
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to destroy CQ-%u [%d]\n",
+ params->cq_idx, err);
+ return err;
+ }
+
+ return 0;
+}
+
+int efa_com_register_mr(struct efa_com_dev *edev,
+ struct efa_com_reg_mr_params *params,
+ struct efa_com_reg_mr_result *result)
+{
+ struct efa_admin_reg_mr_resp cmd_completion;
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_reg_mr_cmd mr_cmd = {};
+ int err;
+
+ mr_cmd.aq_common_desc.opcode = EFA_ADMIN_REG_MR;
+ mr_cmd.pd = params->pd;
+ mr_cmd.mr_length = params->mr_length_in_bytes;
+ EFA_SET(&mr_cmd.flags, EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT,
+ params->page_shift);
+ mr_cmd.iova = params->iova;
+ mr_cmd.permissions = params->permissions;
+
+ if (params->inline_pbl) {
+ memcpy(mr_cmd.pbl.inline_pbl_array,
+ params->pbl.inline_pbl_array,
+ sizeof(mr_cmd.pbl.inline_pbl_array));
+ } else {
+ mr_cmd.pbl.pbl.length = params->pbl.pbl.length;
+ mr_cmd.pbl.pbl.address.mem_addr_low =
+ params->pbl.pbl.address.mem_addr_low;
+ mr_cmd.pbl.pbl.address.mem_addr_high =
+ params->pbl.pbl.address.mem_addr_high;
+ EFA_SET(&mr_cmd.aq_common_desc.flags,
+ EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA, 1);
+ if (params->indirect)
+ EFA_SET(&mr_cmd.aq_common_desc.flags,
+ EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT, 1);
+ }
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&mr_cmd,
+ sizeof(mr_cmd),
+ (struct efa_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to register mr [%d]\n", err);
+ return err;
+ }
+
+ result->l_key = cmd_completion.l_key;
+ result->r_key = cmd_completion.r_key;
+ result->ic_info.recv_ic_id = cmd_completion.recv_ic_id;
+ result->ic_info.rdma_read_ic_id = cmd_completion.rdma_read_ic_id;
+ result->ic_info.rdma_recv_ic_id = cmd_completion.rdma_recv_ic_id;
+ result->ic_info.recv_ic_id_valid = EFA_GET(&cmd_completion.validity,
+ EFA_ADMIN_REG_MR_RESP_RECV_IC_ID);
+ result->ic_info.rdma_read_ic_id_valid = EFA_GET(&cmd_completion.validity,
+ EFA_ADMIN_REG_MR_RESP_RDMA_READ_IC_ID);
+ result->ic_info.rdma_recv_ic_id_valid = EFA_GET(&cmd_completion.validity,
+ EFA_ADMIN_REG_MR_RESP_RDMA_RECV_IC_ID);
+
+ return 0;
+}
+
+int efa_com_dereg_mr(struct efa_com_dev *edev,
+ struct efa_com_dereg_mr_params *params)
+{
+ struct efa_admin_dereg_mr_resp cmd_completion;
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_dereg_mr_cmd mr_cmd = {};
+ int err;
+
+ mr_cmd.aq_common_desc.opcode = EFA_ADMIN_DEREG_MR;
+ mr_cmd.l_key = params->l_key;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&mr_cmd,
+ sizeof(mr_cmd),
+ (struct efa_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to de-register mr(lkey-%u) [%d]\n",
+ mr_cmd.l_key, err);
+ return err;
+ }
+
+ return 0;
+}
+
+int efa_com_create_ah(struct efa_com_dev *edev,
+ struct efa_com_create_ah_params *params,
+ struct efa_com_create_ah_result *result)
+{
+ struct efa_admin_create_ah_resp cmd_completion;
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_create_ah_cmd ah_cmd = {};
+ int err;
+
+ ah_cmd.aq_common_desc.opcode = EFA_ADMIN_CREATE_AH;
+
+ memcpy(ah_cmd.dest_addr, params->dest_addr, sizeof(ah_cmd.dest_addr));
+ ah_cmd.pd = params->pdn;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&ah_cmd,
+ sizeof(ah_cmd),
+ (struct efa_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to create ah for %pI6 [%d]\n",
+ ah_cmd.dest_addr, err);
+ return err;
+ }
+
+ result->ah = cmd_completion.ah;
+
+ return 0;
+}
+
+int efa_com_destroy_ah(struct efa_com_dev *edev,
+ struct efa_com_destroy_ah_params *params)
+{
+ struct efa_admin_destroy_ah_resp cmd_completion;
+ struct efa_admin_destroy_ah_cmd ah_cmd = {};
+ struct efa_com_admin_queue *aq = &edev->aq;
+ int err;
+
+ ah_cmd.aq_common_desc.opcode = EFA_ADMIN_DESTROY_AH;
+ ah_cmd.ah = params->ah;
+ ah_cmd.pd = params->pdn;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&ah_cmd,
+ sizeof(ah_cmd),
+ (struct efa_admin_acq_entry *)&cmd_completion,
+ sizeof(cmd_completion));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to destroy ah-%d pd-%d [%d]\n",
+ ah_cmd.ah, ah_cmd.pd, err);
+ return err;
+ }
+
+ return 0;
+}
+
+bool
+efa_com_check_supported_feature_id(struct efa_com_dev *edev,
+ enum efa_admin_aq_feature_id feature_id)
+{
+ u32 feature_mask = 1 << feature_id;
+
+ /* Device attributes is always supported */
+ if (feature_id != EFA_ADMIN_DEVICE_ATTR &&
+ !(edev->supported_features & feature_mask))
+ return false;
+
+ return true;
+}
+
+static int efa_com_get_feature_ex(struct efa_com_dev *edev,
+ struct efa_admin_get_feature_resp *get_resp,
+ enum efa_admin_aq_feature_id feature_id,
+ dma_addr_t control_buf_dma_addr,
+ u32 control_buff_size)
+{
+ struct efa_admin_get_feature_cmd get_cmd = {};
+ struct efa_com_admin_queue *aq;
+ int err;
+
+ if (!efa_com_check_supported_feature_id(edev, feature_id)) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Feature %d isn't supported\n",
+ feature_id);
+ return -EOPNOTSUPP;
+ }
+
+ aq = &edev->aq;
+
+ get_cmd.aq_common_descriptor.opcode = EFA_ADMIN_GET_FEATURE;
+
+ if (control_buff_size)
+ EFA_SET(&get_cmd.aq_common_descriptor.flags,
+ EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA, 1);
+
+ efa_com_set_dma_addr(control_buf_dma_addr,
+ &get_cmd.control_buffer.address.mem_addr_high,
+ &get_cmd.control_buffer.address.mem_addr_low);
+
+ get_cmd.control_buffer.length = control_buff_size;
+ get_cmd.feature_common.feature_id = feature_id;
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)
+ &get_cmd,
+ sizeof(get_cmd),
+ (struct efa_admin_acq_entry *)
+ get_resp,
+ sizeof(*get_resp));
+
+ if (err) {
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to submit get_feature command %d [%d]\n",
+ feature_id, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static int efa_com_get_feature(struct efa_com_dev *edev,
+ struct efa_admin_get_feature_resp *get_resp,
+ enum efa_admin_aq_feature_id feature_id)
+{
+ return efa_com_get_feature_ex(edev, get_resp, feature_id, 0, 0);
+}
+
+int efa_com_get_device_attr(struct efa_com_dev *edev,
+ struct efa_com_get_device_attr_result *result)
+{
+ struct efa_admin_get_feature_resp resp;
+ int err;
+
+ err = efa_com_get_feature(edev, &resp, EFA_ADMIN_DEVICE_ATTR);
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to get device attributes %d\n",
+ err);
+ return err;
+ }
+
+ result->page_size_cap = resp.u.device_attr.page_size_cap;
+ result->fw_version = resp.u.device_attr.fw_version;
+ result->admin_api_version = resp.u.device_attr.admin_api_version;
+ result->device_version = resp.u.device_attr.device_version;
+ result->supported_features = resp.u.device_attr.supported_features;
+ result->phys_addr_width = resp.u.device_attr.phys_addr_width;
+ result->virt_addr_width = resp.u.device_attr.virt_addr_width;
+ result->db_bar = resp.u.device_attr.db_bar;
+ result->max_rdma_size = resp.u.device_attr.max_rdma_size;
+ result->device_caps = resp.u.device_attr.device_caps;
+ result->guid = resp.u.device_attr.guid;
+ result->max_link_speed_gbps = resp.u.device_attr.max_link_speed_gbps;
+
+ if (result->admin_api_version < 1) {
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to get device attr api version [%u < 1]\n",
+ result->admin_api_version);
+ return -EINVAL;
+ }
+
+ edev->supported_features = resp.u.device_attr.supported_features;
+ err = efa_com_get_feature(edev, &resp,
+ EFA_ADMIN_QUEUE_ATTR);
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to get queue attributes %d\n",
+ err);
+ return err;
+ }
+
+ result->max_qp = resp.u.queue_attr.max_qp;
+ result->max_sq_depth = resp.u.queue_attr.max_sq_depth;
+ result->max_rq_depth = resp.u.queue_attr.max_rq_depth;
+ result->max_cq = resp.u.queue_attr.max_cq;
+ result->max_cq_depth = resp.u.queue_attr.max_cq_depth;
+ result->inline_buf_size = resp.u.queue_attr.inline_buf_size;
+ result->max_sq_sge = resp.u.queue_attr.max_wr_send_sges;
+ result->max_rq_sge = resp.u.queue_attr.max_wr_recv_sges;
+ result->max_mr = resp.u.queue_attr.max_mr;
+ result->max_mr_pages = resp.u.queue_attr.max_mr_pages;
+ result->max_pd = resp.u.queue_attr.max_pd;
+ result->max_ah = resp.u.queue_attr.max_ah;
+ result->max_llq_size = resp.u.queue_attr.max_llq_size;
+ result->sub_cqs_per_cq = resp.u.queue_attr.sub_cqs_per_cq;
+ result->max_wr_rdma_sge = resp.u.queue_attr.max_wr_rdma_sges;
+ result->max_tx_batch = resp.u.queue_attr.max_tx_batch;
+ result->min_sq_depth = resp.u.queue_attr.min_sq_depth;
+
+ err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR);
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to get network attributes %d\n",
+ err);
+ return err;
+ }
+
+ memcpy(result->addr, resp.u.network_attr.addr,
+ sizeof(resp.u.network_attr.addr));
+ result->mtu = resp.u.network_attr.mtu;
+
+ if (efa_com_check_supported_feature_id(edev,
+ EFA_ADMIN_EVENT_QUEUE_ATTR)) {
+ err = efa_com_get_feature(edev, &resp,
+ EFA_ADMIN_EVENT_QUEUE_ATTR);
+ if (err) {
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to get event queue attributes %d\n",
+ err);
+ return err;
+ }
+
+ result->max_eq = resp.u.event_queue_attr.max_eq;
+ result->max_eq_depth = resp.u.event_queue_attr.max_eq_depth;
+ result->event_bitmask = resp.u.event_queue_attr.event_bitmask;
+ }
+
+ return 0;
+}
+
+int efa_com_get_hw_hints(struct efa_com_dev *edev,
+ struct efa_com_get_hw_hints_result *result)
+{
+ struct efa_admin_get_feature_resp resp;
+ int err;
+
+ err = efa_com_get_feature(edev, &resp, EFA_ADMIN_HW_HINTS);
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to get hw hints %d\n", err);
+ return err;
+ }
+
+ result->admin_completion_timeout = resp.u.hw_hints.admin_completion_timeout;
+ result->driver_watchdog_timeout = resp.u.hw_hints.driver_watchdog_timeout;
+ result->mmio_read_timeout = resp.u.hw_hints.mmio_read_timeout;
+ result->poll_interval = resp.u.hw_hints.poll_interval;
+
+ return 0;
+}
+
+int efa_com_set_feature_ex(struct efa_com_dev *edev,
+ struct efa_admin_set_feature_resp *set_resp,
+ struct efa_admin_set_feature_cmd *set_cmd,
+ enum efa_admin_aq_feature_id feature_id,
+ dma_addr_t control_buf_dma_addr,
+ u32 control_buff_size)
+{
+ struct efa_com_admin_queue *aq;
+ int err;
+
+ if (!efa_com_check_supported_feature_id(edev, feature_id)) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Feature %d isn't supported\n",
+ feature_id);
+ return -EOPNOTSUPP;
+ }
+
+ aq = &edev->aq;
+
+ set_cmd->aq_common_descriptor.opcode = EFA_ADMIN_SET_FEATURE;
+ if (control_buff_size) {
+ set_cmd->aq_common_descriptor.flags = 0;
+ EFA_SET(&set_cmd->aq_common_descriptor.flags,
+ EFA_ADMIN_AQ_COMMON_DESC_CTRL_DATA, 1);
+ efa_com_set_dma_addr(control_buf_dma_addr,
+ &set_cmd->control_buffer.address.mem_addr_high,
+ &set_cmd->control_buffer.address.mem_addr_low);
+ }
+
+ set_cmd->control_buffer.length = control_buff_size;
+ set_cmd->feature_common.feature_id = feature_id;
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)set_cmd,
+ sizeof(*set_cmd),
+ (struct efa_admin_acq_entry *)set_resp,
+ sizeof(*set_resp));
+
+ if (err) {
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to submit set_feature command %d error: %d\n",
+ feature_id, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static int efa_com_set_feature(struct efa_com_dev *edev,
+ struct efa_admin_set_feature_resp *set_resp,
+ struct efa_admin_set_feature_cmd *set_cmd,
+ enum efa_admin_aq_feature_id feature_id)
+{
+ return efa_com_set_feature_ex(edev, set_resp, set_cmd, feature_id,
+ 0, 0);
+}
+
+int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups)
+{
+ struct efa_admin_get_feature_resp get_resp;
+ struct efa_admin_set_feature_resp set_resp;
+ struct efa_admin_set_feature_cmd cmd = {};
+ int err;
+
+ ibdev_dbg(edev->efa_dev, "Configuring aenq with groups[%#x]\n", groups);
+
+ err = efa_com_get_feature(edev, &get_resp, EFA_ADMIN_AENQ_CONFIG);
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to get aenq attributes: %d\n",
+ err);
+ return err;
+ }
+
+ ibdev_dbg(edev->efa_dev,
+ "Get aenq groups: supported[%#x] enabled[%#x]\n",
+ get_resp.u.aenq.supported_groups,
+ get_resp.u.aenq.enabled_groups);
+
+ if ((get_resp.u.aenq.supported_groups & groups) != groups) {
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Trying to set unsupported aenq groups[%#x] supported[%#x]\n",
+ groups, get_resp.u.aenq.supported_groups);
+ return -EOPNOTSUPP;
+ }
+
+ cmd.u.aenq.enabled_groups = groups;
+ err = efa_com_set_feature(edev, &set_resp, &cmd,
+ EFA_ADMIN_AENQ_CONFIG);
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to set aenq attributes: %d\n",
+ err);
+ return err;
+ }
+
+ return 0;
+}
+
+int efa_com_alloc_pd(struct efa_com_dev *edev,
+ struct efa_com_alloc_pd_result *result)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_alloc_pd_cmd cmd = {};
+ struct efa_admin_alloc_pd_resp resp;
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_ALLOC_PD;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to allocate pd[%d]\n", err);
+ return err;
+ }
+
+ result->pdn = resp.pd;
+
+ return 0;
+}
+
+int efa_com_dealloc_pd(struct efa_com_dev *edev,
+ struct efa_com_dealloc_pd_params *params)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_dealloc_pd_cmd cmd = {};
+ struct efa_admin_dealloc_pd_resp resp;
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_DEALLOC_PD;
+ cmd.pd = params->pdn;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to deallocate pd-%u [%d]\n",
+ cmd.pd, err);
+ return err;
+ }
+
+ return 0;
+}
+
+int efa_com_alloc_uar(struct efa_com_dev *edev,
+ struct efa_com_alloc_uar_result *result)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_alloc_uar_cmd cmd = {};
+ struct efa_admin_alloc_uar_resp resp;
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_ALLOC_UAR;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to allocate uar[%d]\n", err);
+ return err;
+ }
+
+ result->uarn = resp.uar;
+
+ return 0;
+}
+
+int efa_com_dealloc_uar(struct efa_com_dev *edev,
+ struct efa_com_dealloc_uar_params *params)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_dealloc_uar_cmd cmd = {};
+ struct efa_admin_dealloc_uar_resp resp;
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_DEALLOC_UAR;
+ cmd.uar = params->uarn;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err_ratelimited(edev->efa_dev,
+ "Failed to deallocate uar-%u [%d]\n",
+ cmd.uar, err);
+ return err;
+ }
+
+ return 0;
+}
+
+int efa_com_get_stats(struct efa_com_dev *edev,
+ struct efa_com_get_stats_params *params,
+ union efa_com_get_stats_result *result)
+{
+ struct efa_com_admin_queue *aq = &edev->aq;
+ struct efa_admin_aq_get_stats_cmd cmd = {};
+ struct efa_admin_acq_get_stats_resp resp;
+ struct efa_admin_rdma_write_stats *rws;
+ struct efa_admin_rdma_read_stats *rrs;
+ struct efa_admin_messages_stats *ms;
+ struct efa_admin_network_stats *ns;
+ struct efa_admin_basic_stats *bs;
+ int err;
+
+ cmd.aq_common_descriptor.opcode = EFA_ADMIN_GET_STATS;
+ cmd.type = params->type;
+ cmd.scope = params->scope;
+ cmd.scope_modifier = params->scope_modifier;
+
+ err = efa_com_cmd_exec(aq,
+ (struct efa_admin_aq_entry *)&cmd,
+ sizeof(cmd),
+ (struct efa_admin_acq_entry *)&resp,
+ sizeof(resp));
+ if (err) {
+ ibdev_err_ratelimited(
+ edev->efa_dev,
+ "Failed to get stats type-%u scope-%u.%u [%d]\n",
+ cmd.type, cmd.scope, cmd.scope_modifier, err);
+ return err;
+ }
+
+ switch (cmd.type) {
+ case EFA_ADMIN_GET_STATS_TYPE_BASIC:
+ bs = &resp.u.basic_stats;
+ result->basic_stats.tx_bytes = bs->tx_bytes;
+ result->basic_stats.tx_pkts = bs->tx_pkts;
+ result->basic_stats.rx_bytes = bs->rx_bytes;
+ result->basic_stats.rx_pkts = bs->rx_pkts;
+ result->basic_stats.rx_drops = bs->rx_drops;
+ break;
+ case EFA_ADMIN_GET_STATS_TYPE_MESSAGES:
+ ms = &resp.u.messages_stats;
+ result->messages_stats.send_bytes = ms->send_bytes;
+ result->messages_stats.send_wrs = ms->send_wrs;
+ result->messages_stats.recv_bytes = ms->recv_bytes;
+ result->messages_stats.recv_wrs = ms->recv_wrs;
+ break;
+ case EFA_ADMIN_GET_STATS_TYPE_RDMA_READ:
+ rrs = &resp.u.rdma_read_stats;
+ result->rdma_read_stats.read_wrs = rrs->read_wrs;
+ result->rdma_read_stats.read_bytes = rrs->read_bytes;
+ result->rdma_read_stats.read_wr_err = rrs->read_wr_err;
+ result->rdma_read_stats.read_resp_bytes = rrs->read_resp_bytes;
+ break;
+ case EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE:
+ rws = &resp.u.rdma_write_stats;
+ result->rdma_write_stats.write_wrs = rws->write_wrs;
+ result->rdma_write_stats.write_bytes = rws->write_bytes;
+ result->rdma_write_stats.write_wr_err = rws->write_wr_err;
+ result->rdma_write_stats.write_recv_bytes = rws->write_recv_bytes;
+ break;
+ case EFA_ADMIN_GET_STATS_TYPE_NETWORK:
+ ns = &resp.u.network_stats;
+ result->network_stats.retrans_bytes = ns->retrans_bytes;
+ result->network_stats.retrans_pkts = ns->retrans_pkts;
+ result->network_stats.retrans_timeout_events = ns->retrans_timeout_events;
+ result->network_stats.unresponsive_remote_events = ns->unresponsive_remote_events;
+ result->network_stats.impaired_remote_conn_events = ns->impaired_remote_conn_events;
+ break;
+ }
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
new file mode 100644
index 000000000000..3ac2686abba1
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -0,0 +1,353 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_COM_CMD_H_
+#define _EFA_COM_CMD_H_
+
+#include "efa_com.h"
+
+#define EFA_GID_SIZE 16
+
+struct efa_com_create_qp_params {
+ u64 rq_base_addr;
+ u32 send_cq_idx;
+ u32 recv_cq_idx;
+ /*
+ * Send descriptor ring size in bytes,
+ * sufficient for user-provided number of WQEs and SGL size
+ */
+ u32 sq_ring_size_in_bytes;
+ /* Max number of WQEs that will be posted on send queue */
+ u32 sq_depth;
+ /* Recv descriptor ring size in bytes */
+ u32 rq_ring_size_in_bytes;
+ u32 rq_depth;
+ u16 pd;
+ u16 uarn;
+ u8 qp_type;
+ u8 sl;
+ u8 unsolicited_write_recv : 1;
+};
+
+struct efa_com_create_qp_result {
+ u32 qp_handle;
+ u32 qp_num;
+ u32 sq_db_offset;
+ u32 rq_db_offset;
+ u32 llq_descriptors_offset;
+ u16 send_sub_cq_idx;
+ u16 recv_sub_cq_idx;
+};
+
+struct efa_com_modify_qp_params {
+ u32 modify_mask;
+ u32 qp_handle;
+ u32 qp_state;
+ u32 cur_qp_state;
+ u32 qkey;
+ u32 sq_psn;
+ u8 sq_drained_async_notify;
+ u8 rnr_retry;
+};
+
+struct efa_com_query_qp_params {
+ u32 qp_handle;
+};
+
+struct efa_com_query_qp_result {
+ u32 qp_state;
+ u32 qkey;
+ u32 sq_draining;
+ u32 sq_psn;
+ u8 rnr_retry;
+};
+
+struct efa_com_destroy_qp_params {
+ u32 qp_handle;
+};
+
+struct efa_com_create_cq_params {
+ /* cq physical base address in OS memory */
+ dma_addr_t dma_addr;
+ /* completion queue depth in # of entries */
+ u16 sub_cq_depth;
+ u16 num_sub_cqs;
+ u16 uarn;
+ u16 eqn;
+ u8 entry_size_in_bytes;
+ u8 interrupt_mode_enabled : 1;
+ u8 set_src_addr : 1;
+};
+
+struct efa_com_create_cq_result {
+ /* cq identifier */
+ u16 cq_idx;
+ /* actual cq depth in # of entries */
+ u16 actual_depth;
+ u32 db_off;
+ bool db_valid;
+};
+
+struct efa_com_destroy_cq_params {
+ u16 cq_idx;
+};
+
+struct efa_com_create_ah_params {
+ u16 pdn;
+ /* Destination address in network byte order */
+ u8 dest_addr[EFA_GID_SIZE];
+};
+
+struct efa_com_create_ah_result {
+ u16 ah;
+};
+
+struct efa_com_destroy_ah_params {
+ u16 ah;
+ u16 pdn;
+};
+
+struct efa_com_get_device_attr_result {
+ u8 addr[EFA_GID_SIZE];
+ u64 page_size_cap;
+ u64 max_mr_pages;
+ u64 guid;
+ u32 mtu;
+ u32 fw_version;
+ u32 admin_api_version;
+ u32 device_version;
+ u32 supported_features;
+ u32 phys_addr_width;
+ u32 virt_addr_width;
+ u32 max_qp;
+ u32 max_sq_depth; /* wqes */
+ u32 max_rq_depth; /* wqes */
+ u32 max_cq;
+ u32 max_cq_depth; /* cqes */
+ u32 inline_buf_size;
+ u32 max_mr;
+ u32 max_pd;
+ u32 max_ah;
+ u32 max_llq_size;
+ u32 max_rdma_size;
+ u32 device_caps;
+ u32 max_eq;
+ u32 max_eq_depth;
+ u32 event_bitmask; /* EQ events bitmask */
+ u16 sub_cqs_per_cq;
+ u16 max_sq_sge;
+ u16 max_rq_sge;
+ u16 max_wr_rdma_sge;
+ u16 max_tx_batch;
+ u16 min_sq_depth;
+ u16 max_link_speed_gbps;
+ u8 db_bar;
+};
+
+struct efa_com_get_hw_hints_result {
+ u16 mmio_read_timeout;
+ u16 driver_watchdog_timeout;
+ u16 admin_completion_timeout;
+ u16 poll_interval;
+ u32 reserved[4];
+};
+
+struct efa_com_mem_addr {
+ u32 mem_addr_low;
+ u32 mem_addr_high;
+};
+
+/* Used at indirect mode page list chunks for chaining */
+struct efa_com_ctrl_buff_info {
+ /* indicates length of the buffer pointed by control_buffer_address. */
+ u32 length;
+ /* points to control buffer (direct or indirect) */
+ struct efa_com_mem_addr address;
+};
+
+struct efa_com_reg_mr_params {
+ /* Memory region length, in bytes. */
+ u64 mr_length_in_bytes;
+ /* IO Virtual Address associated with this MR. */
+ u64 iova;
+ /* words 8:15: Physical Buffer List, each element is page-aligned. */
+ union {
+ /*
+ * Inline array of physical addresses of app pages
+ * (optimization for short region reservations)
+ */
+ u64 inline_pbl_array[4];
+ /*
+ * Describes the next physically contiguous chunk of indirect
+ * page list. A page list contains physical addresses of command
+ * data pages. Data pages are 4KB; page list chunks are
+ * variable-sized.
+ */
+ struct efa_com_ctrl_buff_info pbl;
+ } pbl;
+ /* number of pages in PBL (redundant, could be calculated) */
+ u32 page_num;
+ /* Protection Domain */
+ u16 pd;
+ /*
+ * phys_page_size_shift - page size is (1 << phys_page_size_shift)
+ * Page size is used for building the Virtual to Physical
+ * address mapping
+ */
+ u8 page_shift;
+ /* see permissions field of struct efa_admin_reg_mr_cmd */
+ u8 permissions;
+ u8 inline_pbl;
+ u8 indirect;
+};
+
+struct efa_com_mr_interconnect_info {
+ u16 recv_ic_id;
+ u16 rdma_read_ic_id;
+ u16 rdma_recv_ic_id;
+ u8 recv_ic_id_valid : 1;
+ u8 rdma_read_ic_id_valid : 1;
+ u8 rdma_recv_ic_id_valid : 1;
+};
+
+struct efa_com_reg_mr_result {
+ /*
+ * To be used in conjunction with local buffers references in SQ and
+ * RQ WQE
+ */
+ u32 l_key;
+ /*
+ * To be used in incoming RDMA semantics messages to refer to remotely
+ * accessed memory region
+ */
+ u32 r_key;
+ struct efa_com_mr_interconnect_info ic_info;
+};
+
+struct efa_com_dereg_mr_params {
+ u32 l_key;
+};
+
+struct efa_com_alloc_pd_result {
+ u16 pdn;
+};
+
+struct efa_com_dealloc_pd_params {
+ u16 pdn;
+};
+
+struct efa_com_alloc_uar_result {
+ u16 uarn;
+};
+
+struct efa_com_dealloc_uar_params {
+ u16 uarn;
+};
+
+struct efa_com_get_stats_params {
+ /* see enum efa_admin_get_stats_type */
+ u8 type;
+ /* see enum efa_admin_get_stats_scope */
+ u8 scope;
+ u16 scope_modifier;
+};
+
+struct efa_com_basic_stats {
+ u64 tx_bytes;
+ u64 tx_pkts;
+ u64 rx_bytes;
+ u64 rx_pkts;
+ u64 rx_drops;
+};
+
+struct efa_com_messages_stats {
+ u64 send_bytes;
+ u64 send_wrs;
+ u64 recv_bytes;
+ u64 recv_wrs;
+};
+
+struct efa_com_rdma_read_stats {
+ u64 read_wrs;
+ u64 read_bytes;
+ u64 read_wr_err;
+ u64 read_resp_bytes;
+};
+
+struct efa_com_rdma_write_stats {
+ u64 write_wrs;
+ u64 write_bytes;
+ u64 write_wr_err;
+ u64 write_recv_bytes;
+};
+
+struct efa_com_network_stats {
+ u64 retrans_bytes;
+ u64 retrans_pkts;
+ u64 retrans_timeout_events;
+ u64 unresponsive_remote_events;
+ u64 impaired_remote_conn_events;
+};
+
+union efa_com_get_stats_result {
+ struct efa_com_basic_stats basic_stats;
+ struct efa_com_messages_stats messages_stats;
+ struct efa_com_rdma_read_stats rdma_read_stats;
+ struct efa_com_rdma_write_stats rdma_write_stats;
+ struct efa_com_network_stats network_stats;
+};
+
+int efa_com_create_qp(struct efa_com_dev *edev,
+ struct efa_com_create_qp_params *params,
+ struct efa_com_create_qp_result *res);
+int efa_com_modify_qp(struct efa_com_dev *edev,
+ struct efa_com_modify_qp_params *params);
+int efa_com_query_qp(struct efa_com_dev *edev,
+ struct efa_com_query_qp_params *params,
+ struct efa_com_query_qp_result *result);
+int efa_com_destroy_qp(struct efa_com_dev *edev,
+ struct efa_com_destroy_qp_params *params);
+int efa_com_create_cq(struct efa_com_dev *edev,
+ struct efa_com_create_cq_params *params,
+ struct efa_com_create_cq_result *result);
+int efa_com_destroy_cq(struct efa_com_dev *edev,
+ struct efa_com_destroy_cq_params *params);
+int efa_com_register_mr(struct efa_com_dev *edev,
+ struct efa_com_reg_mr_params *params,
+ struct efa_com_reg_mr_result *result);
+int efa_com_dereg_mr(struct efa_com_dev *edev,
+ struct efa_com_dereg_mr_params *params);
+int efa_com_create_ah(struct efa_com_dev *edev,
+ struct efa_com_create_ah_params *params,
+ struct efa_com_create_ah_result *result);
+int efa_com_destroy_ah(struct efa_com_dev *edev,
+ struct efa_com_destroy_ah_params *params);
+int efa_com_get_device_attr(struct efa_com_dev *edev,
+ struct efa_com_get_device_attr_result *result);
+int efa_com_get_hw_hints(struct efa_com_dev *edev,
+ struct efa_com_get_hw_hints_result *result);
+bool
+efa_com_check_supported_feature_id(struct efa_com_dev *edev,
+ enum efa_admin_aq_feature_id feature_id);
+int efa_com_set_feature_ex(struct efa_com_dev *edev,
+ struct efa_admin_set_feature_resp *set_resp,
+ struct efa_admin_set_feature_cmd *set_cmd,
+ enum efa_admin_aq_feature_id feature_id,
+ dma_addr_t control_buf_dma_addr,
+ u32 control_buff_size);
+int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups);
+int efa_com_alloc_pd(struct efa_com_dev *edev,
+ struct efa_com_alloc_pd_result *result);
+int efa_com_dealloc_pd(struct efa_com_dev *edev,
+ struct efa_com_dealloc_pd_params *params);
+int efa_com_alloc_uar(struct efa_com_dev *edev,
+ struct efa_com_alloc_uar_result *result);
+int efa_com_dealloc_uar(struct efa_com_dev *edev,
+ struct efa_com_dealloc_uar_params *params);
+int efa_com_get_stats(struct efa_com_dev *edev,
+ struct efa_com_get_stats_params *params,
+ union efa_com_get_stats_result *result);
+
+#endif /* _EFA_COM_CMD_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_common_defs.h b/drivers/infiniband/hw/efa/efa_common_defs.h
new file mode 100644
index 000000000000..90af1c82c9c6
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_common_defs.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_COMMON_H_
+#define _EFA_COMMON_H_
+
+#include <linux/bitfield.h>
+
+#define EFA_COMMON_SPEC_VERSION_MAJOR 2
+#define EFA_COMMON_SPEC_VERSION_MINOR 0
+
+#define EFA_GET(ptr, mask) FIELD_GET(mask##_MASK, *(ptr))
+
+#define EFA_SET(ptr, mask, value) \
+ ({ \
+ typeof(ptr) _ptr = ptr; \
+ *_ptr = (*_ptr & ~(mask##_MASK)) | \
+ FIELD_PREP(mask##_MASK, value); \
+ })
+
+struct efa_common_mem_addr {
+ u32 mem_addr_low;
+
+ u32 mem_addr_high;
+};
+
+#endif /* _EFA_COMMON_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_io_defs.h b/drivers/infiniband/hw/efa/efa_io_defs.h
new file mode 100644
index 000000000000..a4c9fd33da38
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_io_defs.h
@@ -0,0 +1,391 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_IO_H_
+#define _EFA_IO_H_
+
+#define EFA_IO_TX_DESC_NUM_BUFS 2
+#define EFA_IO_TX_DESC_NUM_RDMA_BUFS 1
+#define EFA_IO_TX_DESC_INLINE_MAX_SIZE 32
+#define EFA_IO_TX_DESC_IMM_DATA_SIZE 4
+#define EFA_IO_TX_DESC_INLINE_PBL_SIZE 1
+
+enum efa_io_queue_type {
+ /* send queue (of a QP) */
+ EFA_IO_SEND_QUEUE = 1,
+ /* recv queue (of a QP) */
+ EFA_IO_RECV_QUEUE = 2,
+};
+
+enum efa_io_send_op_type {
+ /* send message */
+ EFA_IO_SEND = 0,
+ /* RDMA read */
+ EFA_IO_RDMA_READ = 1,
+ /* RDMA write */
+ EFA_IO_RDMA_WRITE = 2,
+ /* Fast MR registration */
+ EFA_IO_FAST_REG = 3,
+ /* Fast MR invalidation */
+ EFA_IO_FAST_INV = 4,
+};
+
+enum efa_io_comp_status {
+ /* Successful completion */
+ EFA_IO_COMP_STATUS_OK = 0,
+ /* Flushed during QP destroy */
+ EFA_IO_COMP_STATUS_FLUSHED = 1,
+ /* Internal QP error */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_QP_INTERNAL_ERROR = 2,
+ /* Unsupported operation */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_UNSUPPORTED_OP = 3,
+ /* Bad AH */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_AH = 4,
+ /* LKEY not registered or does not match IOVA */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_LKEY = 5,
+ /* Message too long */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH = 6,
+ /* RKEY not registered or does not match remote IOVA */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_ADDRESS = 7,
+ /* Connection was reset by remote side */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_ABORT = 8,
+ /* Bad dest QP number (QP does not exist or is in error state) */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_DEST_QPN = 9,
+ /* Destination resource not ready (no WQEs posted on RQ) */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_RNR = 10,
+ /* Receiver SGL too short */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_LENGTH = 11,
+ /* Unexpected status returned by responder */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_STATUS = 12,
+ /* Unresponsive remote - was previously responsive */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE = 13,
+ /* No valid AH at remote side (required for RDMA operations) */
+ EFA_IO_COMP_STATUS_REMOTE_ERROR_UNKNOWN_PEER = 14,
+ /* Unreachable remote - never received a response */
+ EFA_IO_COMP_STATUS_LOCAL_ERROR_UNREACH_REMOTE = 15,
+};
+
+enum efa_io_frwr_pbl_mode {
+ EFA_IO_FRWR_INLINE_PBL = 0,
+ EFA_IO_FRWR_DIRECT_PBL = 1,
+};
+
+struct efa_io_tx_meta_desc {
+ /* Verbs-generated Request ID */
+ u16 req_id;
+
+ /*
+ * control flags
+ * 3:0 : op_type - enum efa_io_send_op_type
+ * 4 : has_imm - immediate_data field carries valid
+ * data.
+ * 5 : inline_msg - inline mode - inline message data
+ * follows this descriptor (no buffer descriptors).
+ * Note that it is different from immediate data
+ * 6 : meta_extension - Extended metadata. MBZ
+ * 7 : meta_desc - Indicates metadata descriptor.
+ * Must be set.
+ */
+ u8 ctrl1;
+
+ /*
+ * control flags
+ * 0 : phase
+ * 1 : reserved25 - MBZ
+ * 2 : first - Indicates first descriptor in
+ * transaction. Must be set.
+ * 3 : last - Indicates last descriptor in
+ * transaction. Must be set.
+ * 4 : comp_req - Indicates whether completion should
+ * be posted, after packet is transmitted. Valid only
+ * for the first descriptor
+ * 7:5 : reserved29 - MBZ
+ */
+ u8 ctrl2;
+
+ u16 dest_qp_num;
+
+ /*
+ * If inline_msg bit is set, length of inline message in bytes,
+ * otherwise length of SGL (number of buffers).
+ */
+ u16 length;
+
+ /*
+ * immediate data: if has_imm is set, then this field is included within
+ * Tx message and reported in remote Rx completion.
+ */
+ u32 immediate_data;
+
+ u16 ah;
+
+ u16 reserved;
+
+ /* Queue key */
+ u32 qkey;
+
+ u8 reserved2[12];
+};
+
+/*
+ * Tx queue buffer descriptor, for any transport type. Preceded by metadata
+ * descriptor.
+ */
+struct efa_io_tx_buf_desc {
+ /* length in bytes */
+ u32 length;
+
+ /*
+ * 23:0 : lkey - local memory translation key
+ * 31:24 : reserved - MBZ
+ */
+ u32 lkey;
+
+ /* Buffer address bits[31:0] */
+ u32 buf_addr_lo;
+
+ /* Buffer address bits[63:32] */
+ u32 buf_addr_hi;
+};
+
+struct efa_io_remote_mem_addr {
+ /* length in bytes */
+ u32 length;
+
+ /* remote memory translation key */
+ u32 rkey;
+
+ /* Buffer address bits[31:0] */
+ u32 buf_addr_lo;
+
+ /* Buffer address bits[63:32] */
+ u32 buf_addr_hi;
+};
+
+struct efa_io_rdma_req {
+ /* Remote memory address */
+ struct efa_io_remote_mem_addr remote_mem;
+
+ /* Local memory address */
+ struct efa_io_tx_buf_desc local_mem[1];
+};
+
+struct efa_io_fast_mr_reg_req {
+ /* Updated local key of the MR after lkey/rkey increment */
+ u32 lkey;
+
+ /*
+ * permissions
+ * 0 : local_write_enable - Local write permissions:
+ * must be set for RQ buffers and buffers posted for
+ * RDMA Read requests
+ * 1 : remote_write_enable - Remote write
+ * permissions: must be set to enable RDMA write to
+ * the region
+ * 2 : remote_read_enable - Remote read permissions:
+ * must be set to enable RDMA read from the region
+ * 7:3 : reserved2 - MBZ
+ */
+ u8 permissions;
+
+ /*
+ * control flags
+ * 4:0 : phys_page_size_shift - page size is (1 <<
+ * phys_page_size_shift)
+ * 6:5 : pbl_mode - enum efa_io_frwr_pbl_mode
+ * 7 : reserved - MBZ
+ */
+ u8 flags;
+
+ /* MBZ */
+ u8 reserved[2];
+
+ /* IO Virtual Address associated with this MR */
+ u64 iova;
+
+ /* Memory region length, in bytes */
+ u64 mr_length;
+
+ /* Physical Buffer List, each element is page-aligned. */
+ union {
+ /*
+ * Inline array of physical page addresses (optimization
+ * for short region activation).
+ */
+ u64 inline_array[1];
+
+ /* points to PBL (Currently only direct) */
+ u64 dma_addr;
+ } pbl;
+};
+
+struct efa_io_fast_mr_inv_req {
+ /* Local key of the MR to invalidate */
+ u32 lkey;
+
+ /* MBZ */
+ u8 reserved[28];
+};
+
+/*
+ * Tx WQE, composed of tx meta descriptors followed by either tx buffer
+ * descriptors or inline data
+ */
+struct efa_io_tx_wqe {
+ /* TX meta */
+ struct efa_io_tx_meta_desc meta;
+
+ union {
+ /* Send buffer descriptors */
+ struct efa_io_tx_buf_desc sgl[2];
+
+ u8 inline_data[32];
+
+ /* RDMA local and remote memory addresses */
+ struct efa_io_rdma_req rdma_req;
+
+ /* Fast registration */
+ struct efa_io_fast_mr_reg_req reg_mr_req;
+
+ /* Fast invalidation */
+ struct efa_io_fast_mr_inv_req inv_mr_req;
+ } data;
+};
+
+/*
+ * Rx buffer descriptor; RX WQE is composed of one or more RX buffer
+ * descriptors.
+ */
+struct efa_io_rx_desc {
+ /* Buffer address bits[31:0] */
+ u32 buf_addr_lo;
+
+ /* Buffer Pointer[63:32] */
+ u32 buf_addr_hi;
+
+ /* Verbs-generated request id. */
+ u16 req_id;
+
+ /* Length in bytes. */
+ u16 length;
+
+ /*
+ * LKey and control flags
+ * 23:0 : lkey
+ * 29:24 : reserved - MBZ
+ * 30 : first - Indicates first descriptor in WQE
+ * 31 : last - Indicates last descriptor in WQE
+ */
+ u32 lkey_ctrl;
+};
+
+/* Common IO completion descriptor */
+struct efa_io_cdesc_common {
+ /*
+ * verbs-generated request ID, as provided in the completed tx or rx
+ * descriptor.
+ */
+ u16 req_id;
+
+ u8 status;
+
+ /*
+ * flags
+ * 0 : phase - Phase bit
+ * 2:1 : q_type - enum efa_io_queue_type: send/recv
+ * 3 : has_imm - indicates that immediate data is
+ * present - for RX completions only
+ * 6:4 : op_type - enum efa_io_send_op_type
+ * 7 : unsolicited - indicates that there is no
+ * matching request - for RDMA with imm. RX only
+ */
+ u8 flags;
+
+ /* local QP number */
+ u16 qp_num;
+};
+
+/* Tx completion descriptor */
+struct efa_io_tx_cdesc {
+ /* Common completion info */
+ struct efa_io_cdesc_common common;
+
+ /* MBZ */
+ u16 reserved16;
+};
+
+/* Rx Completion Descriptor */
+struct efa_io_rx_cdesc {
+ /* Common completion info */
+ struct efa_io_cdesc_common common;
+
+ /* Transferred length bits[15:0] */
+ u16 length;
+
+ /* Remote Address Handle FW index, 0xFFFF indicates invalid ah */
+ u16 ah;
+
+ u16 src_qp_num;
+
+ /* Immediate data */
+ u32 imm;
+};
+
+/* Rx Completion Descriptor RDMA write info */
+struct efa_io_rx_cdesc_rdma_write {
+ /* Transferred length bits[31:16] */
+ u16 length_hi;
+};
+
+/* Extended Rx Completion Descriptor */
+struct efa_io_rx_cdesc_ex {
+ /* Base RX completion info */
+ struct efa_io_rx_cdesc base;
+
+ union {
+ struct efa_io_rx_cdesc_rdma_write rdma_write;
+
+ /*
+ * Valid only in case of unknown AH (0xFFFF) and CQ
+ * set_src_addr is enabled.
+ */
+ u8 src_addr[16];
+ } u;
+};
+
+/* tx_meta_desc */
+#define EFA_IO_TX_META_DESC_OP_TYPE_MASK GENMASK(3, 0)
+#define EFA_IO_TX_META_DESC_HAS_IMM_MASK BIT(4)
+#define EFA_IO_TX_META_DESC_INLINE_MSG_MASK BIT(5)
+#define EFA_IO_TX_META_DESC_META_EXTENSION_MASK BIT(6)
+#define EFA_IO_TX_META_DESC_META_DESC_MASK BIT(7)
+#define EFA_IO_TX_META_DESC_PHASE_MASK BIT(0)
+#define EFA_IO_TX_META_DESC_FIRST_MASK BIT(2)
+#define EFA_IO_TX_META_DESC_LAST_MASK BIT(3)
+#define EFA_IO_TX_META_DESC_COMP_REQ_MASK BIT(4)
+
+/* tx_buf_desc */
+#define EFA_IO_TX_BUF_DESC_LKEY_MASK GENMASK(23, 0)
+
+/* fast_mr_reg_req */
+#define EFA_IO_FAST_MR_REG_REQ_LOCAL_WRITE_ENABLE_MASK BIT(0)
+#define EFA_IO_FAST_MR_REG_REQ_REMOTE_WRITE_ENABLE_MASK BIT(1)
+#define EFA_IO_FAST_MR_REG_REQ_REMOTE_READ_ENABLE_MASK BIT(2)
+#define EFA_IO_FAST_MR_REG_REQ_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0)
+#define EFA_IO_FAST_MR_REG_REQ_PBL_MODE_MASK GENMASK(6, 5)
+
+/* rx_desc */
+#define EFA_IO_RX_DESC_LKEY_MASK GENMASK(23, 0)
+#define EFA_IO_RX_DESC_FIRST_MASK BIT(30)
+#define EFA_IO_RX_DESC_LAST_MASK BIT(31)
+
+/* cdesc_common */
+#define EFA_IO_CDESC_COMMON_PHASE_MASK BIT(0)
+#define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1)
+#define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3)
+#define EFA_IO_CDESC_COMMON_OP_TYPE_MASK GENMASK(6, 4)
+#define EFA_IO_CDESC_COMMON_UNSOLICITED_MASK BIT(7)
+
+#endif /* _EFA_IO_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
new file mode 100644
index 000000000000..6c415b9adb5f
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -0,0 +1,706 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_ioctl.h>
+
+#include "efa.h"
+
+#define PCI_DEV_ID_EFA0_VF 0xefa0
+#define PCI_DEV_ID_EFA1_VF 0xefa1
+#define PCI_DEV_ID_EFA2_VF 0xefa2
+#define PCI_DEV_ID_EFA3_VF 0xefa3
+
+static const struct pci_device_id efa_pci_tbl[] = {
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) },
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) },
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA2_VF) },
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA3_VF) },
+ { }
+};
+
+MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION(DEVICE_NAME);
+MODULE_DEVICE_TABLE(pci, efa_pci_tbl);
+
+#define EFA_REG_BAR 0
+#define EFA_MEM_BAR 2
+#define EFA_BASE_BAR_MASK (BIT(EFA_REG_BAR) | BIT(EFA_MEM_BAR))
+
+#define EFA_AENQ_ENABLED_GROUPS \
+ (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
+ BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
+
+extern const struct uapi_definition efa_uapi_defs[];
+
+/* This handler will called for unknown event group or unimplemented handlers */
+static void unimplemented_aenq_handler(void *data,
+ struct efa_admin_aenq_entry *aenq_e)
+{
+ struct efa_dev *dev = (struct efa_dev *)data;
+
+ ibdev_err(&dev->ibdev,
+ "Unknown event was received or event with unimplemented handler\n");
+}
+
+static void efa_keep_alive(void *data, struct efa_admin_aenq_entry *aenq_e)
+{
+ struct efa_dev *dev = (struct efa_dev *)data;
+
+ atomic64_inc(&dev->stats.keep_alive_rcvd);
+}
+
+static struct efa_aenq_handlers aenq_handlers = {
+ .handlers = {
+ [EFA_ADMIN_KEEP_ALIVE] = efa_keep_alive,
+ },
+ .unimplemented_handler = unimplemented_aenq_handler
+};
+
+static void efa_release_bars(struct efa_dev *dev, int bars_mask)
+{
+ struct pci_dev *pdev = dev->pdev;
+ int release_bars;
+
+ release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & bars_mask;
+ pci_release_selected_regions(pdev, release_bars);
+}
+
+static void efa_process_comp_eqe(struct efa_dev *dev, struct efa_admin_eqe *eqe)
+{
+ u16 cqn = eqe->u.comp_event.cqn;
+ struct efa_cq *cq;
+
+ /* Safe to load as we're in irq and removal calls synchronize_irq() */
+ cq = xa_load(&dev->cqs_xa, cqn);
+ if (unlikely(!cq)) {
+ ibdev_err_ratelimited(&dev->ibdev,
+ "Completion event on non-existent CQ[%u]",
+ cqn);
+ return;
+ }
+
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
+static void efa_process_eqe(struct efa_com_eq *eeq, struct efa_admin_eqe *eqe)
+{
+ struct efa_dev *dev = container_of(eeq->edev, struct efa_dev, edev);
+
+ if (likely(EFA_GET(&eqe->common, EFA_ADMIN_EQE_EVENT_TYPE) ==
+ EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION))
+ efa_process_comp_eqe(dev, eqe);
+ else
+ ibdev_err_ratelimited(&dev->ibdev,
+ "Unknown event type received %lu",
+ EFA_GET(&eqe->common,
+ EFA_ADMIN_EQE_EVENT_TYPE));
+}
+
+static irqreturn_t efa_intr_msix_comp(int irq, void *data)
+{
+ struct efa_eq *eq = data;
+ struct efa_com_dev *edev = eq->eeq.edev;
+
+ efa_com_eq_comp_intr_handler(edev, &eq->eeq);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data)
+{
+ struct efa_dev *dev = data;
+
+ efa_com_admin_q_comp_intr_handler(&dev->edev);
+ efa_com_aenq_intr_handler(&dev->edev, data);
+
+ return IRQ_HANDLED;
+}
+
+static int efa_request_irq(struct efa_dev *dev, struct efa_irq *irq)
+{
+ int err;
+
+ err = request_irq(irq->irqn, irq->handler, 0, irq->name, irq->data);
+ if (err) {
+ dev_err(&dev->pdev->dev, "Failed to request irq %s (%d)\n",
+ irq->name, err);
+ return err;
+ }
+
+ irq_set_affinity_hint(irq->irqn, &irq->affinity_hint_mask);
+
+ return 0;
+}
+
+static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, u32 vector)
+{
+ u32 cpu;
+
+ cpu = vector - EFA_COMP_EQS_VEC_BASE;
+ snprintf(eq->irq.name, EFA_IRQNAME_SIZE, "efa-comp%d@pci:%s", cpu,
+ pci_name(dev->pdev));
+ eq->irq.handler = efa_intr_msix_comp;
+ eq->irq.data = eq;
+ eq->irq.vector = vector;
+ eq->irq.irqn = pci_irq_vector(dev->pdev, vector);
+ cpumask_set_cpu(cpu, &eq->irq.affinity_hint_mask);
+}
+
+static void efa_free_irq(struct efa_dev *dev, struct efa_irq *irq)
+{
+ irq_set_affinity_hint(irq->irqn, NULL);
+ free_irq(irq->irqn, irq->data);
+}
+
+static void efa_setup_mgmnt_irq(struct efa_dev *dev)
+{
+ u32 cpu;
+
+ snprintf(dev->admin_irq.name, EFA_IRQNAME_SIZE,
+ "efa-mgmnt@pci:%s", pci_name(dev->pdev));
+ dev->admin_irq.handler = efa_intr_msix_mgmnt;
+ dev->admin_irq.data = dev;
+ dev->admin_irq.vector = dev->admin_msix_vector_idx;
+ dev->admin_irq.irqn = pci_irq_vector(dev->pdev,
+ dev->admin_msix_vector_idx);
+ cpu = cpumask_first(cpu_online_mask);
+ cpumask_set_cpu(cpu,
+ &dev->admin_irq.affinity_hint_mask);
+ dev_info(&dev->pdev->dev, "Setup irq:%d name:%s\n",
+ dev->admin_irq.irqn,
+ dev->admin_irq.name);
+}
+
+static int efa_set_mgmnt_irq(struct efa_dev *dev)
+{
+ efa_setup_mgmnt_irq(dev);
+
+ return efa_request_irq(dev, &dev->admin_irq);
+}
+
+static int efa_request_doorbell_bar(struct efa_dev *dev)
+{
+ u8 db_bar_idx = dev->dev_attr.db_bar;
+ struct pci_dev *pdev = dev->pdev;
+ int pci_mem_bars;
+ int db_bar;
+ int err;
+
+ db_bar = BIT(db_bar_idx);
+ if (!(db_bar & EFA_BASE_BAR_MASK)) {
+ pci_mem_bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ if (db_bar & ~pci_mem_bars) {
+ dev_err(&pdev->dev,
+ "Doorbells BAR unavailable. Requested %#x, available %#x\n",
+ db_bar, pci_mem_bars);
+ return -ENODEV;
+ }
+
+ err = pci_request_selected_regions(pdev, db_bar, DRV_MODULE_NAME);
+ if (err) {
+ dev_err(&pdev->dev,
+ "pci_request_selected_regions for bar %d failed %d\n",
+ db_bar_idx, err);
+ return err;
+ }
+ }
+
+ dev->db_bar_addr = pci_resource_start(dev->pdev, db_bar_idx);
+ dev->db_bar_len = pci_resource_len(dev->pdev, db_bar_idx);
+
+ return 0;
+}
+
+static void efa_release_doorbell_bar(struct efa_dev *dev)
+{
+ if (!(BIT(dev->dev_attr.db_bar) & EFA_BASE_BAR_MASK))
+ efa_release_bars(dev, BIT(dev->dev_attr.db_bar));
+}
+
+static void efa_update_hw_hints(struct efa_dev *dev,
+ struct efa_com_get_hw_hints_result *hw_hints)
+{
+ struct efa_com_dev *edev = &dev->edev;
+
+ if (hw_hints->mmio_read_timeout)
+ edev->mmio_read.mmio_read_timeout =
+ hw_hints->mmio_read_timeout * 1000;
+
+ if (hw_hints->poll_interval)
+ edev->aq.poll_interval = hw_hints->poll_interval;
+
+ if (hw_hints->admin_completion_timeout)
+ edev->aq.completion_timeout =
+ hw_hints->admin_completion_timeout;
+}
+
+static void efa_stats_init(struct efa_dev *dev)
+{
+ atomic64_t *s = (atomic64_t *)&dev->stats;
+ int i;
+
+ for (i = 0; i < sizeof(dev->stats) / sizeof(*s); i++, s++)
+ atomic64_set(s, 0);
+}
+
+static void efa_set_host_info(struct efa_dev *dev)
+{
+ struct efa_admin_set_feature_resp resp = {};
+ struct efa_admin_set_feature_cmd cmd = {};
+ struct efa_admin_host_info *hinf;
+ u32 bufsz = sizeof(*hinf);
+ dma_addr_t hinf_dma;
+
+ if (!efa_com_check_supported_feature_id(&dev->edev,
+ EFA_ADMIN_HOST_INFO))
+ return;
+
+ /* Failures in host info set shall not disturb probe */
+ hinf = dma_alloc_coherent(&dev->pdev->dev, bufsz, &hinf_dma,
+ GFP_KERNEL);
+ if (!hinf)
+ return;
+
+ strscpy(hinf->os_dist_str, utsname()->release,
+ sizeof(hinf->os_dist_str));
+ hinf->os_type = EFA_ADMIN_OS_LINUX;
+ strscpy(hinf->kernel_ver_str, utsname()->version,
+ sizeof(hinf->kernel_ver_str));
+ hinf->kernel_ver = LINUX_VERSION_CODE;
+ EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_MAJOR, 0);
+ EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_MINOR, 0);
+ EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_SUB_MINOR, 0);
+ EFA_SET(&hinf->driver_ver, EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE, 0);
+ EFA_SET(&hinf->bdf, EFA_ADMIN_HOST_INFO_BUS, dev->pdev->bus->number);
+ EFA_SET(&hinf->bdf, EFA_ADMIN_HOST_INFO_DEVICE,
+ PCI_SLOT(dev->pdev->devfn));
+ EFA_SET(&hinf->bdf, EFA_ADMIN_HOST_INFO_FUNCTION,
+ PCI_FUNC(dev->pdev->devfn));
+ EFA_SET(&hinf->spec_ver, EFA_ADMIN_HOST_INFO_SPEC_MAJOR,
+ EFA_COMMON_SPEC_VERSION_MAJOR);
+ EFA_SET(&hinf->spec_ver, EFA_ADMIN_HOST_INFO_SPEC_MINOR,
+ EFA_COMMON_SPEC_VERSION_MINOR);
+ EFA_SET(&hinf->flags, EFA_ADMIN_HOST_INFO_INTREE, 1);
+ EFA_SET(&hinf->flags, EFA_ADMIN_HOST_INFO_GDR, 0);
+
+ efa_com_set_feature_ex(&dev->edev, &resp, &cmd, EFA_ADMIN_HOST_INFO,
+ hinf_dma, bufsz);
+
+ dma_free_coherent(&dev->pdev->dev, bufsz, hinf, hinf_dma);
+}
+
+static void efa_destroy_eq(struct efa_dev *dev, struct efa_eq *eq)
+{
+ efa_com_eq_destroy(&dev->edev, &eq->eeq);
+ efa_free_irq(dev, &eq->irq);
+}
+
+static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u32 msix_vec)
+{
+ int err;
+
+ efa_setup_comp_irq(dev, eq, msix_vec);
+ err = efa_request_irq(dev, &eq->irq);
+ if (err)
+ return err;
+
+ err = efa_com_eq_init(&dev->edev, &eq->eeq, efa_process_eqe,
+ dev->dev_attr.max_eq_depth, msix_vec);
+ if (err)
+ goto err_free_comp_irq;
+
+ return 0;
+
+err_free_comp_irq:
+ efa_free_irq(dev, &eq->irq);
+ return err;
+}
+
+static int efa_create_eqs(struct efa_dev *dev)
+{
+ u32 neqs = dev->dev_attr.max_eq;
+ int err, i;
+
+ neqs = min_t(u32, neqs, dev->num_irq_vectors - EFA_COMP_EQS_VEC_BASE);
+ dev->neqs = neqs;
+ dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL);
+ if (!dev->eqs)
+ return -ENOMEM;
+
+ for (i = 0; i < neqs; i++) {
+ err = efa_create_eq(dev, &dev->eqs[i], i + EFA_COMP_EQS_VEC_BASE);
+ if (err)
+ goto err_destroy_eqs;
+ }
+
+ return 0;
+
+err_destroy_eqs:
+ for (i--; i >= 0; i--)
+ efa_destroy_eq(dev, &dev->eqs[i]);
+ kfree(dev->eqs);
+
+ return err;
+}
+
+static void efa_destroy_eqs(struct efa_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->neqs; i++)
+ efa_destroy_eq(dev, &dev->eqs[i]);
+
+ kfree(dev->eqs);
+}
+
+static const struct ib_device_ops efa_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_EFA,
+ .uverbs_abi_ver = EFA_UVERBS_ABI_VERSION,
+
+ .alloc_hw_port_stats = efa_alloc_hw_port_stats,
+ .alloc_hw_device_stats = efa_alloc_hw_device_stats,
+ .alloc_pd = efa_alloc_pd,
+ .alloc_ucontext = efa_alloc_ucontext,
+ .create_cq = efa_create_cq,
+ .create_cq_umem = efa_create_cq_umem,
+ .create_qp = efa_create_qp,
+ .create_user_ah = efa_create_ah,
+ .dealloc_pd = efa_dealloc_pd,
+ .dealloc_ucontext = efa_dealloc_ucontext,
+ .dereg_mr = efa_dereg_mr,
+ .destroy_ah = efa_destroy_ah,
+ .destroy_cq = efa_destroy_cq,
+ .destroy_qp = efa_destroy_qp,
+ .get_hw_stats = efa_get_hw_stats,
+ .get_link_layer = efa_port_link_layer,
+ .get_port_immutable = efa_get_port_immutable,
+ .mmap = efa_mmap,
+ .mmap_free = efa_mmap_free,
+ .modify_qp = efa_modify_qp,
+ .query_device = efa_query_device,
+ .query_gid = efa_query_gid,
+ .query_pkey = efa_query_pkey,
+ .query_port = efa_query_port,
+ .query_qp = efa_query_qp,
+ .reg_user_mr = efa_reg_mr,
+ .reg_user_mr_dmabuf = efa_reg_user_mr_dmabuf,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, efa_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, efa_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, efa_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, efa_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, efa_ucontext, ibucontext),
+};
+
+static int efa_ib_device_add(struct efa_dev *dev)
+{
+ struct efa_com_get_hw_hints_result hw_hints;
+ struct pci_dev *pdev = dev->pdev;
+ int err;
+
+ efa_stats_init(dev);
+
+ err = efa_com_get_device_attr(&dev->edev, &dev->dev_attr);
+ if (err)
+ return err;
+
+ dev_dbg(&dev->pdev->dev, "Doorbells bar (%d)\n", dev->dev_attr.db_bar);
+ err = efa_request_doorbell_bar(dev);
+ if (err)
+ return err;
+
+ err = efa_com_get_hw_hints(&dev->edev, &hw_hints);
+ if (err)
+ goto err_release_doorbell_bar;
+
+ efa_update_hw_hints(dev, &hw_hints);
+
+ /* Try to enable all the available aenq groups */
+ err = efa_com_set_aenq_config(&dev->edev, EFA_AENQ_ENABLED_GROUPS);
+ if (err)
+ goto err_release_doorbell_bar;
+
+ err = efa_create_eqs(dev);
+ if (err)
+ goto err_release_doorbell_bar;
+
+ efa_set_host_info(dev);
+
+ dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED;
+ dev->ibdev.node_guid = dev->dev_attr.guid;
+ dev->ibdev.phys_port_cnt = 1;
+ dev->ibdev.num_comp_vectors = dev->neqs ?: 1;
+ dev->ibdev.dev.parent = &pdev->dev;
+
+ ib_set_device_ops(&dev->ibdev, &efa_dev_ops);
+
+ dev->ibdev.driver_def = efa_uapi_defs;
+
+ err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev);
+ if (err)
+ goto err_destroy_eqs;
+
+ ibdev_info(&dev->ibdev, "IB device registered\n");
+
+ return 0;
+
+err_destroy_eqs:
+ efa_destroy_eqs(dev);
+err_release_doorbell_bar:
+ efa_release_doorbell_bar(dev);
+ return err;
+}
+
+static void efa_ib_device_remove(struct efa_dev *dev)
+{
+ ibdev_info(&dev->ibdev, "Unregister ib device\n");
+ ib_unregister_device(&dev->ibdev);
+ efa_destroy_eqs(dev);
+ efa_release_doorbell_bar(dev);
+}
+
+static void efa_disable_msix(struct efa_dev *dev)
+{
+ pci_free_irq_vectors(dev->pdev);
+}
+
+static int efa_enable_msix(struct efa_dev *dev)
+{
+ int max_vecs, num_vecs;
+
+ /*
+ * Reserve the max msix vectors we might need, one vector is reserved
+ * for admin.
+ */
+ max_vecs = min_t(int, pci_msix_vec_count(dev->pdev),
+ num_online_cpus() + 1);
+ dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n",
+ max_vecs);
+
+ dev->admin_msix_vector_idx = EFA_MGMNT_MSIX_VEC_IDX;
+ num_vecs = pci_alloc_irq_vectors(dev->pdev, 1,
+ max_vecs, PCI_IRQ_MSIX);
+
+ if (num_vecs < 0) {
+ dev_err(&dev->pdev->dev, "Failed to enable MSI-X. error %d\n",
+ num_vecs);
+ return -ENOSPC;
+ }
+
+ dev_dbg(&dev->pdev->dev, "Allocated %d MSI-X vectors\n", num_vecs);
+
+ dev->num_irq_vectors = num_vecs;
+
+ return 0;
+}
+
+static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev)
+{
+ int dma_width;
+ int err;
+
+ err = efa_com_dev_reset(edev, EFA_REGS_RESET_NORMAL);
+ if (err)
+ return err;
+
+ err = efa_com_validate_version(edev);
+ if (err)
+ return err;
+
+ dma_width = efa_com_get_dma_width(edev);
+ if (dma_width < 0) {
+ err = dma_width;
+ return err;
+ }
+
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(dma_width));
+ if (err) {
+ dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", err);
+ return err;
+ }
+
+ dma_set_max_seg_size(&pdev->dev, UINT_MAX);
+ return 0;
+}
+
+static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
+{
+ struct efa_com_dev *edev;
+ struct efa_dev *dev;
+ int pci_mem_bars;
+ int err;
+
+ err = pci_enable_device_mem(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
+ return ERR_PTR(err);
+ }
+
+ pci_set_master(pdev);
+
+ dev = ib_alloc_device(efa_dev, ibdev);
+ if (!dev) {
+ dev_err(&pdev->dev, "Device alloc failed\n");
+ err = -ENOMEM;
+ goto err_disable_device;
+ }
+
+ pci_set_drvdata(pdev, dev);
+ edev = &dev->edev;
+ edev->efa_dev = dev;
+ edev->dmadev = &pdev->dev;
+ dev->pdev = pdev;
+ xa_init(&dev->cqs_xa);
+
+ pci_mem_bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ if (EFA_BASE_BAR_MASK & ~pci_mem_bars) {
+ dev_err(&pdev->dev, "BARs unavailable. Requested %#x, available %#x\n",
+ (int)EFA_BASE_BAR_MASK, pci_mem_bars);
+ err = -ENODEV;
+ goto err_ibdev_destroy;
+ }
+ err = pci_request_selected_regions(pdev, EFA_BASE_BAR_MASK, DRV_MODULE_NAME);
+ if (err) {
+ dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
+ err);
+ goto err_ibdev_destroy;
+ }
+
+ dev->reg_bar_addr = pci_resource_start(pdev, EFA_REG_BAR);
+ dev->reg_bar_len = pci_resource_len(pdev, EFA_REG_BAR);
+ dev->mem_bar_addr = pci_resource_start(pdev, EFA_MEM_BAR);
+ dev->mem_bar_len = pci_resource_len(pdev, EFA_MEM_BAR);
+
+ edev->reg_bar = devm_ioremap(&pdev->dev,
+ dev->reg_bar_addr,
+ dev->reg_bar_len);
+ if (!edev->reg_bar) {
+ dev_err(&pdev->dev, "Failed to remap register bar\n");
+ err = -EFAULT;
+ goto err_release_bars;
+ }
+
+ err = efa_com_mmio_reg_read_init(edev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to init readless MMIO\n");
+ goto err_iounmap;
+ }
+
+ err = efa_device_init(edev, pdev);
+ if (err) {
+ dev_err(&pdev->dev, "EFA device init failed\n");
+ if (err == -ETIME)
+ err = -EPROBE_DEFER;
+ goto err_reg_read_destroy;
+ }
+
+ err = efa_enable_msix(dev);
+ if (err)
+ goto err_reg_read_destroy;
+
+ edev->aq.msix_vector_idx = dev->admin_msix_vector_idx;
+ edev->aenq.msix_vector_idx = dev->admin_msix_vector_idx;
+
+ err = efa_set_mgmnt_irq(dev);
+ if (err)
+ goto err_disable_msix;
+
+ err = efa_com_admin_init(edev, &aenq_handlers);
+ if (err)
+ goto err_free_mgmnt_irq;
+
+ return dev;
+
+err_free_mgmnt_irq:
+ efa_free_irq(dev, &dev->admin_irq);
+err_disable_msix:
+ efa_disable_msix(dev);
+err_reg_read_destroy:
+ efa_com_mmio_reg_read_destroy(edev);
+err_iounmap:
+ devm_iounmap(&pdev->dev, edev->reg_bar);
+err_release_bars:
+ efa_release_bars(dev, EFA_BASE_BAR_MASK);
+err_ibdev_destroy:
+ ib_dealloc_device(&dev->ibdev);
+err_disable_device:
+ pci_disable_device(pdev);
+ return ERR_PTR(err);
+}
+
+static void efa_remove_device(struct pci_dev *pdev,
+ enum efa_regs_reset_reason_types reset_reason)
+{
+ struct efa_dev *dev = pci_get_drvdata(pdev);
+ struct efa_com_dev *edev;
+
+ edev = &dev->edev;
+ efa_com_dev_reset(edev, reset_reason);
+ efa_com_admin_destroy(edev);
+ efa_free_irq(dev, &dev->admin_irq);
+ efa_disable_msix(dev);
+ efa_com_mmio_reg_read_destroy(edev);
+ devm_iounmap(&pdev->dev, edev->reg_bar);
+ efa_release_bars(dev, EFA_BASE_BAR_MASK);
+ xa_destroy(&dev->cqs_xa);
+ ib_dealloc_device(&dev->ibdev);
+ pci_disable_device(pdev);
+}
+
+static int efa_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ struct efa_dev *dev;
+ int err;
+
+ dev = efa_probe_device(pdev);
+ if (IS_ERR(dev))
+ return PTR_ERR(dev);
+
+ err = efa_ib_device_add(dev);
+ if (err)
+ goto err_remove_device;
+
+ return 0;
+
+err_remove_device:
+ efa_remove_device(pdev, EFA_REGS_RESET_INIT_ERR);
+ return err;
+}
+
+static void efa_remove(struct pci_dev *pdev)
+{
+ struct efa_dev *dev = pci_get_drvdata(pdev);
+
+ efa_ib_device_remove(dev);
+ efa_remove_device(pdev, EFA_REGS_RESET_NORMAL);
+}
+
+static void efa_shutdown(struct pci_dev *pdev)
+{
+ struct efa_dev *dev = pci_get_drvdata(pdev);
+
+ efa_destroy_eqs(dev);
+ efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_SHUTDOWN);
+ efa_free_irq(dev, &dev->admin_irq);
+ efa_disable_msix(dev);
+}
+
+static struct pci_driver efa_pci_driver = {
+ .name = DRV_MODULE_NAME,
+ .id_table = efa_pci_tbl,
+ .probe = efa_probe,
+ .remove = efa_remove,
+ .shutdown = efa_shutdown,
+};
+
+module_pci_driver(efa_pci_driver);
diff --git a/drivers/infiniband/hw/efa/efa_regs_defs.h b/drivers/infiniband/hw/efa/efa_regs_defs.h
new file mode 100644
index 000000000000..714ae6258800
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_regs_defs.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
+/*
+ * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#ifndef _EFA_REGS_H_
+#define _EFA_REGS_H_
+
+enum efa_regs_reset_reason_types {
+ EFA_REGS_RESET_NORMAL = 0,
+ /* Keep alive timeout */
+ EFA_REGS_RESET_KEEP_ALIVE_TO = 1,
+ EFA_REGS_RESET_ADMIN_TO = 2,
+ EFA_REGS_RESET_INIT_ERR = 3,
+ EFA_REGS_RESET_DRIVER_INVALID_STATE = 4,
+ EFA_REGS_RESET_OS_TRIGGER = 5,
+ EFA_REGS_RESET_SHUTDOWN = 6,
+ EFA_REGS_RESET_USER_TRIGGER = 7,
+ EFA_REGS_RESET_GENERIC = 8,
+};
+
+/* efa_registers offsets */
+
+/* 0 base */
+#define EFA_REGS_VERSION_OFF 0x0
+#define EFA_REGS_CONTROLLER_VERSION_OFF 0x4
+#define EFA_REGS_CAPS_OFF 0x8
+#define EFA_REGS_AQ_BASE_LO_OFF 0x10
+#define EFA_REGS_AQ_BASE_HI_OFF 0x14
+#define EFA_REGS_AQ_CAPS_OFF 0x18
+#define EFA_REGS_ACQ_BASE_LO_OFF 0x20
+#define EFA_REGS_ACQ_BASE_HI_OFF 0x24
+#define EFA_REGS_ACQ_CAPS_OFF 0x28
+#define EFA_REGS_AQ_PROD_DB_OFF 0x2c
+#define EFA_REGS_AENQ_CAPS_OFF 0x34
+#define EFA_REGS_AENQ_BASE_LO_OFF 0x38
+#define EFA_REGS_AENQ_BASE_HI_OFF 0x3c
+#define EFA_REGS_AENQ_CONS_DB_OFF 0x40
+#define EFA_REGS_INTR_MASK_OFF 0x4c
+#define EFA_REGS_DEV_CTL_OFF 0x54
+#define EFA_REGS_DEV_STS_OFF 0x58
+#define EFA_REGS_MMIO_REG_READ_OFF 0x5c
+#define EFA_REGS_MMIO_RESP_LO_OFF 0x60
+#define EFA_REGS_MMIO_RESP_HI_OFF 0x64
+#define EFA_REGS_EQ_DB_OFF 0x68
+
+/* version register */
+#define EFA_REGS_VERSION_MINOR_VERSION_MASK 0xff
+#define EFA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00
+
+/* controller_version register */
+#define EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff
+#define EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00
+#define EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000
+#define EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000
+
+/* caps register */
+#define EFA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1
+#define EFA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e
+#define EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00
+#define EFA_REGS_CAPS_ADMIN_CMD_TO_MASK 0xf0000
+
+/* aq_caps register */
+#define EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff
+#define EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000
+
+/* acq_caps register */
+#define EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff
+#define EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xff0000
+#define EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK 0xff000000
+
+/* aenq_caps register */
+#define EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff
+#define EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xff0000
+#define EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK 0xff000000
+
+/* intr_mask register */
+#define EFA_REGS_INTR_MASK_EN_MASK 0x1
+
+/* dev_ctl register */
+#define EFA_REGS_DEV_CTL_DEV_RESET_MASK 0x1
+#define EFA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2
+#define EFA_REGS_DEV_CTL_RESET_REASON_MASK 0xf0000000
+
+/* dev_sts register */
+#define EFA_REGS_DEV_STS_READY_MASK 0x1
+#define EFA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2
+#define EFA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4
+#define EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8
+#define EFA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10
+#define EFA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20
+
+/* mmio_reg_read register */
+#define EFA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff
+#define EFA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000
+
+/* eq_db register */
+#define EFA_REGS_EQ_DB_EQN_MASK 0xffff
+#define EFA_REGS_EQ_DB_ARM_MASK 0x80000000
+
+#endif /* _EFA_REGS_H_ */
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
new file mode 100644
index 000000000000..22d3e25c3b9d
--- /dev/null
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -0,0 +1,2348 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
+#include <linux/vmalloc.h>
+#include <linux/log2.h>
+
+#include <rdma/ib_addr.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_ioctl.h>
+#define UVERBS_MODULE_NAME efa_ib
+#include <rdma/uverbs_named_ioctl.h>
+#include <rdma/ib_user_ioctl_cmds.h>
+
+#include "efa.h"
+#include "efa_io_defs.h"
+
+enum {
+ EFA_MMAP_DMA_PAGE = 0,
+ EFA_MMAP_IO_WC,
+ EFA_MMAP_IO_NC,
+};
+
+struct efa_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ u64 address;
+ u8 mmap_flag;
+};
+
+#define EFA_DEFINE_DEVICE_STATS(op) \
+ op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
+ op(EFA_COMPLETED_CMDS, "completed_cmds") \
+ op(EFA_CMDS_ERR, "cmds_err") \
+ op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
+ op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
+ op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
+ op(EFA_CREATE_QP_ERR, "create_qp_err") \
+ op(EFA_CREATE_CQ_ERR, "create_cq_err") \
+ op(EFA_REG_MR_ERR, "reg_mr_err") \
+ op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \
+ op(EFA_CREATE_AH_ERR, "create_ah_err") \
+ op(EFA_MMAP_ERR, "mmap_err")
+
+#define EFA_DEFINE_PORT_STATS(op) \
+ op(EFA_TX_BYTES, "tx_bytes") \
+ op(EFA_TX_PKTS, "tx_pkts") \
+ op(EFA_RX_BYTES, "rx_bytes") \
+ op(EFA_RX_PKTS, "rx_pkts") \
+ op(EFA_RX_DROPS, "rx_drops") \
+ op(EFA_SEND_BYTES, "send_bytes") \
+ op(EFA_SEND_WRS, "send_wrs") \
+ op(EFA_RECV_BYTES, "recv_bytes") \
+ op(EFA_RECV_WRS, "recv_wrs") \
+ op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \
+ op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \
+ op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \
+ op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \
+ op(EFA_RDMA_WRITE_WRS, "rdma_write_wrs") \
+ op(EFA_RDMA_WRITE_BYTES, "rdma_write_bytes") \
+ op(EFA_RDMA_WRITE_WR_ERR, "rdma_write_wr_err") \
+ op(EFA_RDMA_WRITE_RECV_BYTES, "rdma_write_recv_bytes") \
+ op(EFA_RETRANS_BYTES, "retrans_bytes") \
+ op(EFA_RETRANS_PKTS, "retrans_pkts") \
+ op(EFA_RETRANS_TIMEOUT_EVENS, "retrans_timeout_events") \
+ op(EFA_UNRESPONSIVE_REMOTE_EVENTS, "unresponsive_remote_events") \
+ op(EFA_IMPAIRED_REMOTE_CONN_EVENTS, "impaired_remote_conn_events") \
+
+#define EFA_STATS_ENUM(ename, name) ename,
+#define EFA_STATS_STR(ename, nam) \
+ [ename].name = nam,
+
+enum efa_hw_device_stats {
+ EFA_DEFINE_DEVICE_STATS(EFA_STATS_ENUM)
+};
+
+static const struct rdma_stat_desc efa_device_stats_descs[] = {
+ EFA_DEFINE_DEVICE_STATS(EFA_STATS_STR)
+};
+
+enum efa_hw_port_stats {
+ EFA_DEFINE_PORT_STATS(EFA_STATS_ENUM)
+};
+
+static const struct rdma_stat_desc efa_port_stats_descs[] = {
+ EFA_DEFINE_PORT_STATS(EFA_STATS_STR)
+};
+
+#define EFA_DEFAULT_LINK_SPEED_GBPS 100
+
+#define EFA_CHUNK_PAYLOAD_SHIFT 12
+#define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT)
+#define EFA_CHUNK_PAYLOAD_PTR_SIZE 8
+
+#define EFA_CHUNK_SHIFT 12
+#define EFA_CHUNK_SIZE BIT(EFA_CHUNK_SHIFT)
+#define EFA_CHUNK_PTR_SIZE sizeof(struct efa_com_ctrl_buff_info)
+
+#define EFA_PTRS_PER_CHUNK \
+ ((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE)
+
+#define EFA_CHUNK_USED_SIZE \
+ ((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
+
+struct pbl_chunk {
+ dma_addr_t dma_addr;
+ u64 *buf;
+ u32 length;
+};
+
+struct pbl_chunk_list {
+ struct pbl_chunk *chunks;
+ unsigned int size;
+};
+
+struct pbl_context {
+ union {
+ struct {
+ dma_addr_t dma_addr;
+ } continuous;
+ struct {
+ u32 pbl_buf_size_in_pages;
+ struct scatterlist *sgl;
+ int sg_dma_cnt;
+ struct pbl_chunk_list chunk_list;
+ } indirect;
+ } phys;
+ u64 *pbl_buf;
+ u32 pbl_buf_size_in_bytes;
+ u8 physically_continuous;
+};
+
+static inline struct efa_dev *to_edev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct efa_dev, ibdev);
+}
+
+static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct efa_ucontext, ibucontext);
+}
+
+static inline struct efa_pd *to_epd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct efa_pd, ibpd);
+}
+
+static inline struct efa_mr *to_emr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct efa_mr, ibmr);
+}
+
+static inline struct efa_qp *to_eqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct efa_qp, ibqp);
+}
+
+static inline struct efa_cq *to_ecq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct efa_cq, ibcq);
+}
+
+static inline struct efa_ah *to_eah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct efa_ah, ibah);
+}
+
+static inline struct efa_user_mmap_entry *
+to_emmap(struct rdma_user_mmap_entry *rdma_entry)
+{
+ return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry);
+}
+
+#define EFA_DEV_CAP(dev, cap) \
+ ((dev)->dev_attr.device_caps & \
+ EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK)
+
+#define is_reserved_cleared(reserved) \
+ !memchr_inv(reserved, 0, sizeof(reserved))
+
+static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
+ size_t size, enum dma_data_direction dir)
+{
+ void *addr;
+
+ addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+ if (!addr)
+ return NULL;
+
+ *dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir);
+ if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) {
+ ibdev_err(&dev->ibdev, "Failed to map DMA address\n");
+ free_pages_exact(addr, size);
+ return NULL;
+ }
+
+ return addr;
+}
+
+static void efa_free_mapped(struct efa_dev *dev, void *cpu_addr,
+ dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction dir)
+{
+ dma_unmap_single(&dev->pdev->dev, dma_addr, size, dir);
+ free_pages_exact(cpu_addr, size);
+}
+
+int efa_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props,
+ struct ib_udata *udata)
+{
+ struct efa_com_get_device_attr_result *dev_attr;
+ struct efa_ibv_ex_query_device_resp resp = {};
+ struct efa_dev *dev = to_edev(ibdev);
+ int err;
+
+ if (udata && udata->inlen &&
+ !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+ ibdev_dbg(ibdev,
+ "Incompatible ABI params, udata not cleared\n");
+ return -EINVAL;
+ }
+
+ dev_attr = &dev->dev_attr;
+
+ memset(props, 0, sizeof(*props));
+ props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE;
+ props->page_size_cap = dev_attr->page_size_cap;
+ props->vendor_id = dev->pdev->vendor;
+ props->vendor_part_id = dev->pdev->device;
+ props->hw_ver = dev->pdev->subsystem_device;
+ props->max_qp = dev_attr->max_qp;
+ props->max_cq = dev_attr->max_cq;
+ props->max_pd = dev_attr->max_pd;
+ props->max_mr = dev_attr->max_mr;
+ props->max_ah = dev_attr->max_ah;
+ props->max_cqe = dev_attr->max_cq_depth;
+ props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth,
+ dev_attr->max_rq_depth);
+ props->max_send_sge = dev_attr->max_sq_sge;
+ props->max_recv_sge = dev_attr->max_rq_sge;
+ props->max_sge_rd = dev_attr->max_wr_rdma_sge;
+ props->max_pkeys = 1;
+
+ if (udata && udata->outlen) {
+ resp.max_sq_sge = dev_attr->max_sq_sge;
+ resp.max_rq_sge = dev_attr->max_rq_sge;
+ resp.max_sq_wr = dev_attr->max_sq_depth;
+ resp.max_rq_wr = dev_attr->max_rq_depth;
+ resp.max_rdma_size = dev_attr->max_rdma_size;
+
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID;
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_EXT_MEM;
+ if (EFA_DEV_CAP(dev, RDMA_READ))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
+
+ if (EFA_DEV_CAP(dev, RNR_RETRY))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY;
+
+ if (EFA_DEV_CAP(dev, DATA_POLLING_128))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128;
+
+ if (EFA_DEV_CAP(dev, RDMA_WRITE))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_WRITE;
+
+ if (EFA_DEV_CAP(dev, UNSOLICITED_WRITE_RECV))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_UNSOLICITED_WRITE_RECV;
+
+ if (dev->neqs)
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS;
+
+ err = ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(ibdev,
+ "Failed to copy udata for query_device\n");
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static void efa_link_gbps_to_speed_and_width(u16 gbps,
+ enum ib_port_speed *speed,
+ enum ib_port_width *width)
+{
+ if (gbps >= 400) {
+ *width = IB_WIDTH_8X;
+ *speed = IB_SPEED_HDR;
+ } else if (gbps >= 200) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_HDR;
+ } else if (gbps >= 120) {
+ *width = IB_WIDTH_12X;
+ *speed = IB_SPEED_FDR10;
+ } else if (gbps >= 100) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_EDR;
+ } else if (gbps >= 60) {
+ *width = IB_WIDTH_12X;
+ *speed = IB_SPEED_DDR;
+ } else if (gbps >= 50) {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_HDR;
+ } else if (gbps >= 40) {
+ *width = IB_WIDTH_4X;
+ *speed = IB_SPEED_FDR10;
+ } else if (gbps >= 30) {
+ *width = IB_WIDTH_12X;
+ *speed = IB_SPEED_SDR;
+ } else {
+ *width = IB_WIDTH_1X;
+ *speed = IB_SPEED_EDR;
+ }
+}
+
+int efa_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *props)
+{
+ struct efa_dev *dev = to_edev(ibdev);
+ enum ib_port_speed link_speed;
+ enum ib_port_width link_width;
+ u16 link_gbps;
+
+ props->lmc = 1;
+
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ props->gid_tbl_len = 1;
+ props->pkey_tbl_len = 1;
+ link_gbps = dev->dev_attr.max_link_speed_gbps ?: EFA_DEFAULT_LINK_SPEED_GBPS;
+ efa_link_gbps_to_speed_and_width(link_gbps, &link_speed, &link_width);
+ props->active_speed = link_speed;
+ props->active_width = link_width;
+ props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
+ props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
+ props->max_msg_sz = dev->dev_attr.mtu;
+ props->max_vl_num = 1;
+
+ return 0;
+}
+
+int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct efa_dev *dev = to_edev(ibqp->device);
+ struct efa_com_query_qp_params params = {};
+ struct efa_com_query_qp_result result;
+ struct efa_qp *qp = to_eqp(ibqp);
+ int err;
+
+#define EFA_QUERY_QP_SUPP_MASK \
+ (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
+ IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY)
+
+ if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) {
+ ibdev_dbg(&dev->ibdev,
+ "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
+ qp_attr_mask, EFA_QUERY_QP_SUPP_MASK);
+ return -EOPNOTSUPP;
+ }
+
+ memset(qp_attr, 0, sizeof(*qp_attr));
+ memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+
+ params.qp_handle = qp->qp_handle;
+ err = efa_com_query_qp(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ qp_attr->qp_state = result.qp_state;
+ qp_attr->qkey = result.qkey;
+ qp_attr->sq_psn = result.sq_psn;
+ qp_attr->sq_draining = result.sq_draining;
+ qp_attr->port_num = 1;
+ qp_attr->rnr_retry = result.rnr_retry;
+
+ qp_attr->cap.max_send_wr = qp->max_send_wr;
+ qp_attr->cap.max_recv_wr = qp->max_recv_wr;
+ qp_attr->cap.max_send_sge = qp->max_send_sge;
+ qp_attr->cap.max_recv_sge = qp->max_recv_sge;
+ qp_attr->cap.max_inline_data = qp->max_inline_data;
+
+ qp_init_attr->qp_type = ibqp->qp_type;
+ qp_init_attr->recv_cq = ibqp->recv_cq;
+ qp_init_attr->send_cq = ibqp->send_cq;
+ qp_init_attr->qp_context = ibqp->qp_context;
+ qp_init_attr->cap = qp_attr->cap;
+
+ return 0;
+}
+
+int efa_query_gid(struct ib_device *ibdev, u32 port, int index,
+ union ib_gid *gid)
+{
+ struct efa_dev *dev = to_edev(ibdev);
+
+ memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr));
+
+ return 0;
+}
+
+int efa_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
+ u16 *pkey)
+{
+ if (index > 0)
+ return -EINVAL;
+
+ *pkey = 0xffff;
+ return 0;
+}
+
+static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn)
+{
+ struct efa_com_dealloc_pd_params params = {
+ .pdn = pdn,
+ };
+
+ return efa_com_dealloc_pd(&dev->edev, &params);
+}
+
+int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibpd->device);
+ struct efa_ibv_alloc_pd_resp resp = {};
+ struct efa_com_alloc_pd_result result;
+ struct efa_pd *pd = to_epd(ibpd);
+ int err;
+
+ if (udata->inlen &&
+ !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+ ibdev_dbg(&dev->ibdev,
+ "Incompatible ABI params, udata not cleared\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ err = efa_com_alloc_pd(&dev->edev, &result);
+ if (err)
+ goto err_out;
+
+ pd->pdn = result.pdn;
+ resp.pdn = result.pdn;
+
+ if (udata->outlen) {
+ err = ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&dev->ibdev,
+ "Failed to copy udata for alloc_pd\n");
+ goto err_dealloc_pd;
+ }
+ }
+
+ ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn);
+
+ return 0;
+
+err_dealloc_pd:
+ efa_pd_dealloc(dev, result.pdn);
+err_out:
+ atomic64_inc(&dev->stats.alloc_pd_err);
+ return err;
+}
+
+int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibpd->device);
+ struct efa_pd *pd = to_epd(ibpd);
+
+ ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn);
+ efa_pd_dealloc(dev, pd->pdn);
+ return 0;
+}
+
+static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
+{
+ struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle };
+
+ return efa_com_destroy_qp(&dev->edev, &params);
+}
+
+static void efa_qp_user_mmap_entries_remove(struct efa_qp *qp)
+{
+ rdma_user_mmap_entry_remove(qp->rq_mmap_entry);
+ rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry);
+ rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry);
+ rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry);
+}
+
+int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibqp->pd->device);
+ struct efa_qp *qp = to_eqp(ibqp);
+ int err;
+
+ ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num);
+
+ err = efa_destroy_qp_handle(dev, qp->qp_handle);
+ if (err)
+ return err;
+
+ efa_qp_user_mmap_entries_remove(qp);
+
+ if (qp->rq_cpu_addr) {
+ ibdev_dbg(&dev->ibdev,
+ "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n",
+ qp->rq_cpu_addr, qp->rq_size,
+ &qp->rq_dma_addr);
+ efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr,
+ qp->rq_size, DMA_TO_DEVICE);
+ }
+
+ return 0;
+}
+
+static struct rdma_user_mmap_entry*
+efa_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+ u64 address, size_t length,
+ u8 mmap_flag, u64 *offset)
+{
+ struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ int err;
+
+ if (!entry)
+ return NULL;
+
+ entry->address = address;
+ entry->mmap_flag = mmap_flag;
+
+ err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry,
+ length);
+ if (err) {
+ kfree(entry);
+ return NULL;
+ }
+ *offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+ return &entry->rdma_entry;
+}
+
+static int qp_mmap_entries_setup(struct efa_qp *qp,
+ struct efa_dev *dev,
+ struct efa_ucontext *ucontext,
+ struct efa_com_create_qp_params *params,
+ struct efa_ibv_create_qp_resp *resp)
+{
+ size_t length;
+ u64 address;
+
+ address = dev->db_bar_addr + resp->sq_db_offset;
+ qp->sq_db_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address,
+ PAGE_SIZE, EFA_MMAP_IO_NC,
+ &resp->sq_db_mmap_key);
+ if (!qp->sq_db_mmap_entry)
+ return -ENOMEM;
+
+ resp->sq_db_offset &= ~PAGE_MASK;
+
+ address = dev->mem_bar_addr + resp->llq_desc_offset;
+ length = PAGE_ALIGN(params->sq_ring_size_in_bytes +
+ offset_in_page(resp->llq_desc_offset));
+
+ qp->llq_desc_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address, length,
+ EFA_MMAP_IO_WC,
+ &resp->llq_desc_mmap_key);
+ if (!qp->llq_desc_mmap_entry)
+ goto err_remove_mmap;
+
+ resp->llq_desc_offset &= ~PAGE_MASK;
+
+ if (qp->rq_size) {
+ address = dev->db_bar_addr + resp->rq_db_offset;
+
+ qp->rq_db_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address, PAGE_SIZE,
+ EFA_MMAP_IO_NC,
+ &resp->rq_db_mmap_key);
+ if (!qp->rq_db_mmap_entry)
+ goto err_remove_mmap;
+
+ resp->rq_db_offset &= ~PAGE_MASK;
+
+ address = virt_to_phys(qp->rq_cpu_addr);
+ qp->rq_mmap_entry =
+ efa_user_mmap_entry_insert(&ucontext->ibucontext,
+ address, qp->rq_size,
+ EFA_MMAP_DMA_PAGE,
+ &resp->rq_mmap_key);
+ if (!qp->rq_mmap_entry)
+ goto err_remove_mmap;
+
+ resp->rq_mmap_size = qp->rq_size;
+ }
+
+ return 0;
+
+err_remove_mmap:
+ efa_qp_user_mmap_entries_remove(qp);
+
+ return -ENOMEM;
+}
+
+static int efa_qp_validate_cap(struct efa_dev *dev,
+ struct ib_qp_init_attr *init_attr)
+{
+ if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) {
+ ibdev_dbg(&dev->ibdev,
+ "qp: requested send wr[%u] exceeds the max[%u]\n",
+ init_attr->cap.max_send_wr,
+ dev->dev_attr.max_sq_depth);
+ return -EINVAL;
+ }
+ if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) {
+ ibdev_dbg(&dev->ibdev,
+ "qp: requested receive wr[%u] exceeds the max[%u]\n",
+ init_attr->cap.max_recv_wr,
+ dev->dev_attr.max_rq_depth);
+ return -EINVAL;
+ }
+ if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) {
+ ibdev_dbg(&dev->ibdev,
+ "qp: requested sge send[%u] exceeds the max[%u]\n",
+ init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge);
+ return -EINVAL;
+ }
+ if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) {
+ ibdev_dbg(&dev->ibdev,
+ "qp: requested sge recv[%u] exceeds the max[%u]\n",
+ init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge);
+ return -EINVAL;
+ }
+ if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) {
+ ibdev_dbg(&dev->ibdev,
+ "qp: requested inline data[%u] exceeds the max[%u]\n",
+ init_attr->cap.max_inline_data,
+ dev->dev_attr.inline_buf_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int efa_qp_validate_attr(struct efa_dev *dev,
+ struct ib_qp_init_attr *init_attr)
+{
+ if (init_attr->qp_type != IB_QPT_DRIVER &&
+ init_attr->qp_type != IB_QPT_UD) {
+ ibdev_dbg(&dev->ibdev,
+ "Unsupported qp type %d\n", init_attr->qp_type);
+ return -EOPNOTSUPP;
+ }
+
+ if (init_attr->srq) {
+ ibdev_dbg(&dev->ibdev, "SRQ is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (init_attr->create_flags) {
+ ibdev_dbg(&dev->ibdev, "Unsupported create flags\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct efa_com_create_qp_params create_qp_params = {};
+ struct efa_com_create_qp_result create_qp_resp;
+ struct efa_dev *dev = to_edev(ibqp->device);
+ struct efa_ibv_create_qp_resp resp = {};
+ struct efa_ibv_create_qp cmd = {};
+ struct efa_qp *qp = to_eqp(ibqp);
+ struct efa_ucontext *ucontext;
+ u16 supported_efa_flags = 0;
+ int err;
+
+ ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext,
+ ibucontext);
+
+ err = efa_qp_validate_cap(dev, init_attr);
+ if (err)
+ goto err_out;
+
+ err = efa_qp_validate_attr(dev, init_attr);
+ if (err)
+ goto err_out;
+
+ if (offsetofend(typeof(cmd), driver_qp_type) > udata->inlen) {
+ ibdev_dbg(&dev->ibdev,
+ "Incompatible ABI params, no input udata\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ if (udata->inlen > sizeof(cmd) &&
+ !ib_is_udata_cleared(udata, sizeof(cmd),
+ udata->inlen - sizeof(cmd))) {
+ ibdev_dbg(&dev->ibdev,
+ "Incompatible ABI params, unknown fields in udata\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ err = ib_copy_from_udata(&cmd, udata,
+ min(sizeof(cmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&dev->ibdev,
+ "Cannot copy udata for create_qp\n");
+ goto err_out;
+ }
+
+ if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_98)) {
+ ibdev_dbg(&dev->ibdev,
+ "Incompatible ABI params, unknown fields in udata\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ if (EFA_DEV_CAP(dev, UNSOLICITED_WRITE_RECV))
+ supported_efa_flags |= EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV;
+
+ if (cmd.flags & ~supported_efa_flags) {
+ ibdev_dbg(&dev->ibdev, "Unsupported EFA QP create flags[%#x], supported[%#x]\n",
+ cmd.flags, supported_efa_flags);
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ create_qp_params.uarn = ucontext->uarn;
+ create_qp_params.pd = to_epd(ibqp->pd)->pdn;
+
+ if (init_attr->qp_type == IB_QPT_UD) {
+ create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD;
+ } else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) {
+ create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD;
+ } else {
+ ibdev_dbg(&dev->ibdev,
+ "Unsupported qp type %d driver qp type %d\n",
+ init_attr->qp_type, cmd.driver_qp_type);
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n",
+ init_attr->qp_type, cmd.driver_qp_type);
+ create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx;
+ create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx;
+ create_qp_params.sq_depth = init_attr->cap.max_send_wr;
+ create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size;
+
+ create_qp_params.rq_depth = init_attr->cap.max_recv_wr;
+ create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size;
+ qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes);
+ if (qp->rq_size) {
+ qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr,
+ qp->rq_size, DMA_TO_DEVICE);
+ if (!qp->rq_cpu_addr) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ ibdev_dbg(&dev->ibdev,
+ "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n",
+ qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr);
+ create_qp_params.rq_base_addr = qp->rq_dma_addr;
+ }
+
+ create_qp_params.sl = cmd.sl;
+
+ if (cmd.flags & EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV)
+ create_qp_params.unsolicited_write_recv = true;
+
+ err = efa_com_create_qp(&dev->edev, &create_qp_params,
+ &create_qp_resp);
+ if (err)
+ goto err_free_mapped;
+
+ resp.sq_db_offset = create_qp_resp.sq_db_offset;
+ resp.rq_db_offset = create_qp_resp.rq_db_offset;
+ resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset;
+ resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx;
+ resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx;
+
+ err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params,
+ &resp);
+ if (err)
+ goto err_destroy_qp;
+
+ qp->qp_handle = create_qp_resp.qp_handle;
+ qp->ibqp.qp_num = create_qp_resp.qp_num;
+ qp->max_send_wr = init_attr->cap.max_send_wr;
+ qp->max_recv_wr = init_attr->cap.max_recv_wr;
+ qp->max_send_sge = init_attr->cap.max_send_sge;
+ qp->max_recv_sge = init_attr->cap.max_recv_sge;
+ qp->max_inline_data = init_attr->cap.max_inline_data;
+
+ if (udata->outlen) {
+ err = ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&dev->ibdev,
+ "Failed to copy udata for qp[%u]\n",
+ create_qp_resp.qp_num);
+ goto err_remove_mmap_entries;
+ }
+ }
+
+ ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num);
+
+ return 0;
+
+err_remove_mmap_entries:
+ efa_qp_user_mmap_entries_remove(qp);
+err_destroy_qp:
+ efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
+err_free_mapped:
+ if (qp->rq_size)
+ efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr,
+ qp->rq_size, DMA_TO_DEVICE);
+err_out:
+ atomic64_inc(&dev->stats.create_qp_err);
+ return err;
+}
+
+static const struct {
+ int valid;
+ enum ib_qp_attr_mask req_param;
+ enum ib_qp_attr_mask opt_param;
+} srd_qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_INIT] = {
+ .valid = 1,
+ .req_param = IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY,
+ },
+ },
+ [IB_QPS_INIT] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_INIT] = {
+ .valid = 1,
+ .opt_param = IB_QP_PKEY_INDEX |
+ IB_QP_PORT |
+ IB_QP_QKEY,
+ },
+ [IB_QPS_RTR] = {
+ .valid = 1,
+ .opt_param = IB_QP_PKEY_INDEX |
+ IB_QP_QKEY,
+ },
+ },
+ [IB_QPS_RTR] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .req_param = IB_QP_SQ_PSN,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY |
+ IB_QP_RNR_RETRY,
+
+ }
+ },
+ [IB_QPS_RTS] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY,
+ },
+ [IB_QPS_SQD] = {
+ .valid = 1,
+ .opt_param = IB_QP_EN_SQD_ASYNC_NOTIFY,
+ },
+ },
+ [IB_QPS_SQD] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY,
+ },
+ [IB_QPS_SQD] = {
+ .valid = 1,
+ .opt_param = IB_QP_PKEY_INDEX |
+ IB_QP_QKEY,
+ }
+ },
+ [IB_QPS_SQE] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ [IB_QPS_RTS] = {
+ .valid = 1,
+ .opt_param = IB_QP_CUR_STATE |
+ IB_QP_QKEY,
+ }
+ },
+ [IB_QPS_ERR] = {
+ [IB_QPS_RESET] = { .valid = 1 },
+ [IB_QPS_ERR] = { .valid = 1 },
+ }
+};
+
+static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state,
+ enum ib_qp_state next_state,
+ enum ib_qp_attr_mask mask)
+{
+ enum ib_qp_attr_mask req_param, opt_param;
+
+ if (mask & IB_QP_CUR_STATE &&
+ cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
+ cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
+ return false;
+
+ if (!srd_qp_state_table[cur_state][next_state].valid)
+ return false;
+
+ req_param = srd_qp_state_table[cur_state][next_state].req_param;
+ opt_param = srd_qp_state_table[cur_state][next_state].opt_param;
+
+ if ((mask & req_param) != req_param)
+ return false;
+
+ if (mask & ~(req_param | opt_param | IB_QP_STATE))
+ return false;
+
+ return true;
+}
+
+static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
+ struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+ int err;
+
+#define EFA_MODIFY_QP_SUPP_MASK \
+ (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
+ IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \
+ IB_QP_RNR_RETRY)
+
+ if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) {
+ ibdev_dbg(&dev->ibdev,
+ "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
+ qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK);
+ return -EOPNOTSUPP;
+ }
+
+ if (qp->ibqp.qp_type == IB_QPT_DRIVER)
+ err = !efa_modify_srd_qp_is_ok(cur_state, new_state,
+ qp_attr_mask);
+ else
+ err = !ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
+ qp_attr_mask);
+
+ if (err) {
+ ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n");
+ return -EINVAL;
+ }
+
+ if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) {
+ ibdev_dbg(&dev->ibdev, "Can't change port num\n");
+ return -EOPNOTSUPP;
+ }
+
+ if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) {
+ ibdev_dbg(&dev->ibdev, "Can't change pkey index\n");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibqp->device);
+ struct efa_com_modify_qp_params params = {};
+ struct efa_qp *qp = to_eqp(ibqp);
+ enum ib_qp_state cur_state;
+ enum ib_qp_state new_state;
+ int err;
+
+ if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
+ if (udata->inlen &&
+ !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+ ibdev_dbg(&dev->ibdev,
+ "Incompatible ABI params, udata not cleared\n");
+ return -EINVAL;
+ }
+
+ cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state :
+ qp->state;
+ new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state;
+
+ err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state,
+ new_state);
+ if (err)
+ return err;
+
+ params.qp_handle = qp->qp_handle;
+
+ if (qp_attr_mask & IB_QP_STATE) {
+ EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE,
+ 1);
+ EFA_SET(&params.modify_mask,
+ EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1);
+ params.cur_qp_state = cur_state;
+ params.qp_state = new_state;
+ }
+
+ if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
+ EFA_SET(&params.modify_mask,
+ EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY, 1);
+ params.sq_drained_async_notify = qp_attr->en_sqd_async_notify;
+ }
+
+ if (qp_attr_mask & IB_QP_QKEY) {
+ EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QKEY, 1);
+ params.qkey = qp_attr->qkey;
+ }
+
+ if (qp_attr_mask & IB_QP_SQ_PSN) {
+ EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN, 1);
+ params.sq_psn = qp_attr->sq_psn;
+ }
+
+ if (qp_attr_mask & IB_QP_RNR_RETRY) {
+ EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY,
+ 1);
+ params.rnr_retry = qp_attr->rnr_retry;
+ }
+
+ err = efa_com_modify_qp(&dev->edev, &params);
+ if (err)
+ return err;
+
+ qp->state = new_state;
+
+ return 0;
+}
+
+static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx)
+{
+ struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx };
+
+ return efa_com_destroy_cq(&dev->edev, &params);
+}
+
+static void efa_cq_user_mmap_entries_remove(struct efa_cq *cq)
+{
+ rdma_user_mmap_entry_remove(cq->db_mmap_entry);
+ rdma_user_mmap_entry_remove(cq->mmap_entry);
+}
+
+int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibcq->device);
+ struct efa_cq *cq = to_ecq(ibcq);
+
+ ibdev_dbg(&dev->ibdev,
+ "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
+ cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr);
+
+ efa_destroy_cq_idx(dev, cq->cq_idx);
+ efa_cq_user_mmap_entries_remove(cq);
+ if (cq->eq) {
+ xa_erase(&dev->cqs_xa, cq->cq_idx);
+ synchronize_irq(cq->eq->irq.irqn);
+ }
+
+ if (cq->umem)
+ ib_umem_release(cq->umem);
+ else
+ efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, DMA_FROM_DEVICE);
+ return 0;
+}
+
+static struct efa_eq *efa_vec2eq(struct efa_dev *dev, int vec)
+{
+ return &dev->eqs[vec];
+}
+
+static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
+ struct efa_ibv_create_cq_resp *resp,
+ bool db_valid)
+{
+ resp->q_mmap_size = cq->size;
+ cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext,
+ virt_to_phys(cq->cpu_addr),
+ cq->size, EFA_MMAP_DMA_PAGE,
+ &resp->q_mmap_key);
+ if (!cq->mmap_entry)
+ return -ENOMEM;
+
+ if (db_valid) {
+ cq->db_mmap_entry =
+ efa_user_mmap_entry_insert(&cq->ucontext->ibucontext,
+ dev->db_bar_addr + resp->db_off,
+ PAGE_SIZE, EFA_MMAP_IO_NC,
+ &resp->db_mmap_key);
+ if (!cq->db_mmap_entry) {
+ rdma_user_mmap_entry_remove(cq->mmap_entry);
+ return -ENOMEM;
+ }
+
+ resp->db_off &= ~PAGE_MASK;
+ resp->comp_mask |= EFA_CREATE_CQ_RESP_DB_OFF;
+ }
+
+ return 0;
+}
+
+int efa_create_cq_umem(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct ib_umem *umem, struct uverbs_attr_bundle *attrs)
+{
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct efa_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct efa_ucontext, ibucontext);
+ struct efa_com_create_cq_params params = {};
+ struct efa_ibv_create_cq_resp resp = {};
+ struct efa_com_create_cq_result result;
+ struct ib_device *ibdev = ibcq->device;
+ struct efa_dev *dev = to_edev(ibdev);
+ struct efa_ibv_create_cq cmd = {};
+ struct efa_cq *cq = to_ecq(ibcq);
+ int entries = attr->cqe;
+ bool set_src_addr;
+ int err;
+
+ ibdev_dbg(ibdev, "create_cq entries %d\n", entries);
+
+ if (attr->flags)
+ return -EOPNOTSUPP;
+
+ if (entries < 1 || entries > dev->dev_attr.max_cq_depth) {
+ ibdev_dbg(ibdev,
+ "cq: requested entries[%u] non-positive or greater than max[%u]\n",
+ entries, dev->dev_attr.max_cq_depth);
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ if (offsetofend(typeof(cmd), num_sub_cqs) > udata->inlen) {
+ ibdev_dbg(ibdev,
+ "Incompatible ABI params, no input udata\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ if (udata->inlen > sizeof(cmd) &&
+ !ib_is_udata_cleared(udata, sizeof(cmd),
+ udata->inlen - sizeof(cmd))) {
+ ibdev_dbg(ibdev,
+ "Incompatible ABI params, unknown fields in udata\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ err = ib_copy_from_udata(&cmd, udata,
+ min(sizeof(cmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n");
+ goto err_out;
+ }
+
+ if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_58)) {
+ ibdev_dbg(ibdev,
+ "Incompatible ABI params, unknown fields in udata\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ set_src_addr = !!(cmd.flags & EFA_CREATE_CQ_WITH_SGID);
+ if ((cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc_ex)) &&
+ (set_src_addr ||
+ cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc))) {
+ ibdev_dbg(ibdev,
+ "Invalid entry size [%u]\n", cmd.cq_entry_size);
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) {
+ ibdev_dbg(ibdev,
+ "Invalid number of sub cqs[%u] expected[%u]\n",
+ cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq);
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ cq->ucontext = ucontext;
+ cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs);
+
+ if (umem) {
+ if (umem->length < cq->size) {
+ ibdev_dbg(&dev->ibdev, "External memory too small\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ if (!ib_umem_is_contiguous(umem)) {
+ ibdev_dbg(&dev->ibdev, "Non contiguous CQ unsupported\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ cq->cpu_addr = NULL;
+ cq->dma_addr = ib_umem_start_dma_addr(umem);
+ cq->umem = umem;
+ } else {
+ cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
+ DMA_FROM_DEVICE);
+ if (!cq->cpu_addr) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ }
+
+ params.uarn = cq->ucontext->uarn;
+ params.sub_cq_depth = entries;
+ params.dma_addr = cq->dma_addr;
+ params.entry_size_in_bytes = cmd.cq_entry_size;
+ params.num_sub_cqs = cmd.num_sub_cqs;
+ params.set_src_addr = set_src_addr;
+ if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) {
+ cq->eq = efa_vec2eq(dev, attr->comp_vector);
+ params.eqn = cq->eq->eeq.eqn;
+ params.interrupt_mode_enabled = true;
+ }
+
+ err = efa_com_create_cq(&dev->edev, &params, &result);
+ if (err)
+ goto err_free_mapped;
+
+ resp.db_off = result.db_off;
+ resp.cq_idx = result.cq_idx;
+ cq->cq_idx = result.cq_idx;
+ cq->ibcq.cqe = result.actual_depth;
+ WARN_ON_ONCE(entries != result.actual_depth);
+
+ if (!umem)
+ err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid);
+
+ if (err) {
+ ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n",
+ cq->cq_idx);
+ goto err_destroy_cq;
+ }
+
+ if (cq->eq) {
+ err = xa_err(xa_store(&dev->cqs_xa, cq->cq_idx, cq, GFP_KERNEL));
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to store cq[%u] in xarray\n",
+ cq->cq_idx);
+ goto err_remove_mmap;
+ }
+ }
+
+ if (udata->outlen) {
+ err = ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(ibdev,
+ "Failed to copy udata for create_cq\n");
+ goto err_xa_erase;
+ }
+ }
+
+ ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
+ cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr);
+
+ return 0;
+
+err_xa_erase:
+ if (cq->eq)
+ xa_erase(&dev->cqs_xa, cq->cq_idx);
+err_remove_mmap:
+ efa_cq_user_mmap_entries_remove(cq);
+err_destroy_cq:
+ efa_destroy_cq_idx(dev, cq->cq_idx);
+err_free_mapped:
+ if (!umem)
+ efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
+ DMA_FROM_DEVICE);
+err_out:
+ atomic64_inc(&dev->stats.create_cq_err);
+ return err;
+}
+
+int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ return efa_create_cq_umem(ibcq, attr, NULL, attrs);
+}
+
+static int umem_to_page_list(struct efa_dev *dev,
+ struct ib_umem *umem,
+ u64 *page_list,
+ u32 hp_cnt,
+ u8 hp_shift)
+{
+ u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT);
+ struct ib_block_iter biter;
+ unsigned int hp_idx = 0;
+
+ ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
+ hp_cnt, pages_in_hp);
+
+ rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift))
+ page_list[hp_idx++] = rdma_block_iter_dma_address(&biter);
+
+ return 0;
+}
+
+static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt)
+{
+ struct scatterlist *sglist;
+ struct page *pg;
+ int i;
+
+ sglist = kmalloc_array(page_cnt, sizeof(*sglist), GFP_KERNEL);
+ if (!sglist)
+ return NULL;
+ sg_init_table(sglist, page_cnt);
+ for (i = 0; i < page_cnt; i++) {
+ pg = vmalloc_to_page(buf);
+ if (!pg)
+ goto err;
+ sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
+ buf += PAGE_SIZE / sizeof(*buf);
+ }
+ return sglist;
+
+err:
+ kfree(sglist);
+ return NULL;
+}
+
+/*
+ * create a chunk list of physical pages dma addresses from the supplied
+ * scatter gather list
+ */
+static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl)
+{
+ struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
+ int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages;
+ struct scatterlist *pages_sgl = pbl->phys.indirect.sgl;
+ unsigned int chunk_list_size, chunk_idx, payload_idx;
+ int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt;
+ struct efa_com_ctrl_buff_info *ctrl_buf;
+ u64 *cur_chunk_buf, *prev_chunk_buf;
+ struct ib_block_iter biter;
+ dma_addr_t dma_addr;
+ int i;
+
+ /* allocate a chunk list that consists of 4KB chunks */
+ chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK);
+
+ chunk_list->size = chunk_list_size;
+ chunk_list->chunks = kcalloc(chunk_list_size,
+ sizeof(*chunk_list->chunks),
+ GFP_KERNEL);
+ if (!chunk_list->chunks)
+ return -ENOMEM;
+
+ ibdev_dbg(&dev->ibdev,
+ "chunk_list_size[%u] - pages[%u]\n", chunk_list_size,
+ page_cnt);
+
+ /* allocate chunk buffers: */
+ for (i = 0; i < chunk_list_size; i++) {
+ chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL);
+ if (!chunk_list->chunks[i].buf)
+ goto chunk_list_dealloc;
+
+ chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE;
+ }
+ chunk_list->chunks[chunk_list_size - 1].length =
+ ((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) +
+ EFA_CHUNK_PTR_SIZE;
+
+ /* fill the dma addresses of sg list pages to chunks: */
+ chunk_idx = 0;
+ payload_idx = 0;
+ cur_chunk_buf = chunk_list->chunks[0].buf;
+ rdma_for_each_block(pages_sgl, &biter, sg_dma_cnt,
+ EFA_CHUNK_PAYLOAD_SIZE) {
+ cur_chunk_buf[payload_idx++] =
+ rdma_block_iter_dma_address(&biter);
+
+ if (payload_idx == EFA_PTRS_PER_CHUNK) {
+ chunk_idx++;
+ cur_chunk_buf = chunk_list->chunks[chunk_idx].buf;
+ payload_idx = 0;
+ }
+ }
+
+ /* map chunks to dma and fill chunks next ptrs */
+ for (i = chunk_list_size - 1; i >= 0; i--) {
+ dma_addr = dma_map_single(&dev->pdev->dev,
+ chunk_list->chunks[i].buf,
+ chunk_list->chunks[i].length,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
+ ibdev_err(&dev->ibdev,
+ "chunk[%u] dma_map_failed\n", i);
+ goto chunk_list_unmap;
+ }
+
+ chunk_list->chunks[i].dma_addr = dma_addr;
+ ibdev_dbg(&dev->ibdev,
+ "chunk[%u] mapped at [%pad]\n", i, &dma_addr);
+
+ if (!i)
+ break;
+
+ prev_chunk_buf = chunk_list->chunks[i - 1].buf;
+
+ ctrl_buf = (struct efa_com_ctrl_buff_info *)
+ &prev_chunk_buf[EFA_PTRS_PER_CHUNK];
+ ctrl_buf->length = chunk_list->chunks[i].length;
+
+ efa_com_set_dma_addr(dma_addr,
+ &ctrl_buf->address.mem_addr_high,
+ &ctrl_buf->address.mem_addr_low);
+ }
+
+ return 0;
+
+chunk_list_unmap:
+ for (; i < chunk_list_size; i++) {
+ dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
+ chunk_list->chunks[i].length, DMA_TO_DEVICE);
+ }
+chunk_list_dealloc:
+ for (i = 0; i < chunk_list_size; i++)
+ kfree(chunk_list->chunks[i].buf);
+
+ kfree(chunk_list->chunks);
+ return -ENOMEM;
+}
+
+static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl)
+{
+ struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
+ int i;
+
+ for (i = 0; i < chunk_list->size; i++) {
+ dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
+ chunk_list->chunks[i].length, DMA_TO_DEVICE);
+ kfree(chunk_list->chunks[i].buf);
+ }
+
+ kfree(chunk_list->chunks);
+}
+
+/* initialize pbl continuous mode: map pbl buffer to a dma address. */
+static int pbl_continuous_initialize(struct efa_dev *dev,
+ struct pbl_context *pbl)
+{
+ dma_addr_t dma_addr;
+
+ dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf,
+ pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
+ if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
+ ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n");
+ return -ENOMEM;
+ }
+
+ pbl->phys.continuous.dma_addr = dma_addr;
+ ibdev_dbg(&dev->ibdev,
+ "pbl continuous - dma_addr = %pad, size[%u]\n",
+ &dma_addr, pbl->pbl_buf_size_in_bytes);
+
+ return 0;
+}
+
+/*
+ * initialize pbl indirect mode:
+ * create a chunk list out of the dma addresses of the physical pages of
+ * pbl buffer.
+ */
+static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl)
+{
+ u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, EFA_CHUNK_PAYLOAD_SIZE);
+ struct scatterlist *sgl;
+ int sg_dma_cnt, err;
+
+ BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE);
+ sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages);
+ if (!sgl)
+ return -ENOMEM;
+
+ sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
+ if (!sg_dma_cnt) {
+ err = -EINVAL;
+ goto err_map;
+ }
+
+ pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages;
+ pbl->phys.indirect.sgl = sgl;
+ pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt;
+ err = pbl_chunk_list_create(dev, pbl);
+ if (err) {
+ ibdev_dbg(&dev->ibdev,
+ "chunk_list creation failed[%d]\n", err);
+ goto err_chunk;
+ }
+
+ ibdev_dbg(&dev->ibdev,
+ "pbl indirect - size[%u], chunks[%u]\n",
+ pbl->pbl_buf_size_in_bytes,
+ pbl->phys.indirect.chunk_list.size);
+
+ return 0;
+
+err_chunk:
+ dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
+err_map:
+ kfree(sgl);
+ return err;
+}
+
+static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl)
+{
+ pbl_chunk_list_destroy(dev, pbl);
+ dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl,
+ pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE);
+ kfree(pbl->phys.indirect.sgl);
+}
+
+/* create a page buffer list from a mapped user memory region */
+static int pbl_create(struct efa_dev *dev,
+ struct pbl_context *pbl,
+ struct ib_umem *umem,
+ int hp_cnt,
+ u8 hp_shift)
+{
+ int err;
+
+ pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE;
+ pbl->pbl_buf = kvzalloc(pbl->pbl_buf_size_in_bytes, GFP_KERNEL);
+ if (!pbl->pbl_buf)
+ return -ENOMEM;
+
+ if (is_vmalloc_addr(pbl->pbl_buf)) {
+ pbl->physically_continuous = 0;
+ err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
+ hp_shift);
+ if (err)
+ goto err_free;
+
+ err = pbl_indirect_initialize(dev, pbl);
+ if (err)
+ goto err_free;
+ } else {
+ pbl->physically_continuous = 1;
+ err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
+ hp_shift);
+ if (err)
+ goto err_free;
+
+ err = pbl_continuous_initialize(dev, pbl);
+ if (err)
+ goto err_free;
+ }
+
+ ibdev_dbg(&dev->ibdev,
+ "user_pbl_created: user_pages[%u], continuous[%u]\n",
+ hp_cnt, pbl->physically_continuous);
+
+ return 0;
+
+err_free:
+ kvfree(pbl->pbl_buf);
+ return err;
+}
+
+static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl)
+{
+ if (pbl->physically_continuous)
+ dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr,
+ pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
+ else
+ pbl_indirect_terminate(dev, pbl);
+
+ kvfree(pbl->pbl_buf);
+}
+
+static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr,
+ struct efa_com_reg_mr_params *params)
+{
+ int err;
+
+ params->inline_pbl = 1;
+ err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array,
+ params->page_num, params->page_shift);
+ if (err)
+ return err;
+
+ ibdev_dbg(&dev->ibdev,
+ "inline_pbl_array - pages[%u]\n", params->page_num);
+
+ return 0;
+}
+
+static int efa_create_pbl(struct efa_dev *dev,
+ struct pbl_context *pbl,
+ struct efa_mr *mr,
+ struct efa_com_reg_mr_params *params)
+{
+ int err;
+
+ err = pbl_create(dev, pbl, mr->umem, params->page_num,
+ params->page_shift);
+ if (err) {
+ ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err);
+ return err;
+ }
+
+ params->inline_pbl = 0;
+ params->indirect = !pbl->physically_continuous;
+ if (pbl->physically_continuous) {
+ params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes;
+
+ efa_com_set_dma_addr(pbl->phys.continuous.dma_addr,
+ &params->pbl.pbl.address.mem_addr_high,
+ &params->pbl.pbl.address.mem_addr_low);
+ } else {
+ params->pbl.pbl.length =
+ pbl->phys.indirect.chunk_list.chunks[0].length;
+
+ efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr,
+ &params->pbl.pbl.address.mem_addr_high,
+ &params->pbl.pbl.address.mem_addr_low);
+ }
+
+ return 0;
+}
+
+static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags,
+ struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibpd->device);
+ int supp_access_flags;
+ struct efa_mr *mr;
+
+ if (udata && udata->inlen &&
+ !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) {
+ ibdev_dbg(&dev->ibdev,
+ "Incompatible ABI params, udata not cleared\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ supp_access_flags =
+ IB_ACCESS_LOCAL_WRITE |
+ (EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0) |
+ (EFA_DEV_CAP(dev, RDMA_WRITE) ? IB_ACCESS_REMOTE_WRITE : 0);
+
+ access_flags &= ~IB_ACCESS_OPTIONAL;
+ if (access_flags & ~supp_access_flags) {
+ ibdev_dbg(&dev->ibdev,
+ "Unsupported access flags[%#x], supported[%#x]\n",
+ access_flags, supp_access_flags);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ return mr;
+}
+
+static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start,
+ u64 length, u64 virt_addr, int access_flags)
+{
+ struct efa_dev *dev = to_edev(ibpd->device);
+ struct efa_com_reg_mr_params params = {};
+ struct efa_com_reg_mr_result result = {};
+ struct pbl_context pbl;
+ unsigned int pg_sz;
+ int inline_size;
+ int err;
+
+ params.pd = to_epd(ibpd)->pdn;
+ params.iova = virt_addr;
+ params.mr_length_in_bytes = length;
+ params.permissions = access_flags;
+
+ pg_sz = ib_umem_find_best_pgsz(mr->umem,
+ dev->dev_attr.page_size_cap,
+ virt_addr);
+ if (!pg_sz) {
+ ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n",
+ dev->dev_attr.page_size_cap);
+ return -EOPNOTSUPP;
+ }
+
+ params.page_shift = order_base_2(pg_sz);
+ params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz);
+
+ ibdev_dbg(&dev->ibdev,
+ "start %#llx length %#llx params.page_shift %u params.page_num %u\n",
+ start, length, params.page_shift, params.page_num);
+
+ inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array);
+ if (params.page_num <= inline_size) {
+ err = efa_create_inline_pbl(dev, mr, &params);
+ if (err)
+ return err;
+
+ err = efa_com_register_mr(&dev->edev, &params, &result);
+ if (err)
+ return err;
+ } else {
+ err = efa_create_pbl(dev, &pbl, mr, &params);
+ if (err)
+ return err;
+
+ err = efa_com_register_mr(&dev->edev, &params, &result);
+ pbl_destroy(dev, &pbl);
+
+ if (err)
+ return err;
+ }
+
+ mr->ibmr.lkey = result.l_key;
+ mr->ibmr.rkey = result.r_key;
+ mr->ibmr.length = length;
+ mr->ic_info.recv_ic_id = result.ic_info.recv_ic_id;
+ mr->ic_info.rdma_read_ic_id = result.ic_info.rdma_read_ic_id;
+ mr->ic_info.rdma_recv_ic_id = result.ic_info.rdma_recv_ic_id;
+ mr->ic_info.recv_ic_id_valid = result.ic_info.recv_ic_id_valid;
+ mr->ic_info.rdma_read_ic_id_valid = result.ic_info.rdma_read_ic_id_valid;
+ mr->ic_info.rdma_recv_ic_id_valid = result.ic_info.rdma_recv_ic_id_valid;
+ ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey);
+
+ return 0;
+}
+
+struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct efa_dev *dev = to_edev(ibpd->device);
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct efa_mr *mr;
+ int err;
+
+ if (dmah) {
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ mr = efa_alloc_mr(ibpd, access_flags, &attrs->driver_udata);
+ if (IS_ERR(mr)) {
+ err = PTR_ERR(mr);
+ goto err_out;
+ }
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(ibpd->device, start, length, fd,
+ access_flags);
+ if (IS_ERR(umem_dmabuf)) {
+ err = PTR_ERR(umem_dmabuf);
+ ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%pe]\n",
+ umem_dmabuf);
+ goto err_free;
+ }
+
+ mr->umem = &umem_dmabuf->umem;
+ err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags);
+ if (err)
+ goto err_release;
+
+ return &mr->ibmr;
+
+err_release:
+ ib_umem_release(mr->umem);
+err_free:
+ kfree(mr);
+err_out:
+ atomic64_inc(&dev->stats.reg_mr_err);
+ return ERR_PTR(err);
+}
+
+struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibpd->device);
+ struct efa_mr *mr;
+ int err;
+
+ if (dmah) {
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ mr = efa_alloc_mr(ibpd, access_flags, udata);
+ if (IS_ERR(mr)) {
+ err = PTR_ERR(mr);
+ goto err_out;
+ }
+
+ mr->umem = ib_umem_get(ibpd->device, start, length, access_flags);
+ if (IS_ERR(mr->umem)) {
+ err = PTR_ERR(mr->umem);
+ ibdev_dbg(&dev->ibdev,
+ "Failed to pin and map user space memory[%pe]\n",
+ mr->umem);
+ goto err_free;
+ }
+
+ err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags);
+ if (err)
+ goto err_release;
+
+ return &mr->ibmr;
+
+err_release:
+ ib_umem_release(mr->umem);
+err_free:
+ kfree(mr);
+err_out:
+ atomic64_inc(&dev->stats.reg_mr_err);
+ return ERR_PTR(err);
+}
+
+static int UVERBS_HANDLER(EFA_IB_METHOD_MR_QUERY)(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_mr *ibmr = uverbs_attr_get_obj(attrs, EFA_IB_ATTR_QUERY_MR_HANDLE);
+ struct efa_mr *mr = to_emr(ibmr);
+ u16 ic_id_validity = 0;
+ int ret;
+
+ ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID,
+ &mr->ic_info.recv_ic_id, sizeof(mr->ic_info.recv_ic_id));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID,
+ &mr->ic_info.rdma_read_ic_id, sizeof(mr->ic_info.rdma_read_ic_id));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID,
+ &mr->ic_info.rdma_recv_ic_id, sizeof(mr->ic_info.rdma_recv_ic_id));
+ if (ret)
+ return ret;
+
+ if (mr->ic_info.recv_ic_id_valid)
+ ic_id_validity |= EFA_QUERY_MR_VALIDITY_RECV_IC_ID;
+ if (mr->ic_info.rdma_read_ic_id_valid)
+ ic_id_validity |= EFA_QUERY_MR_VALIDITY_RDMA_READ_IC_ID;
+ if (mr->ic_info.rdma_recv_ic_id_valid)
+ ic_id_validity |= EFA_QUERY_MR_VALIDITY_RDMA_RECV_IC_ID;
+
+ return uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY,
+ &ic_id_validity, sizeof(ic_id_validity));
+}
+
+int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct efa_dev *dev = to_edev(ibmr->device);
+ struct efa_com_dereg_mr_params params;
+ struct efa_mr *mr = to_emr(ibmr);
+ int err;
+
+ ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey);
+
+ params.l_key = mr->ibmr.lkey;
+ err = efa_com_dereg_mr(&dev->edev, &params);
+ if (err)
+ return err;
+
+ ib_umem_release(mr->umem);
+ kfree(mr);
+
+ return 0;
+}
+
+int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err) {
+ ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err);
+ return err;
+ }
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ return 0;
+}
+
+static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn)
+{
+ struct efa_com_dealloc_uar_params params = {
+ .uarn = uarn,
+ };
+
+ return efa_com_dealloc_uar(&dev->edev, &params);
+}
+
+#define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \
+ (_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \
+ NULL : #_attr)
+
+static int efa_user_comp_handshake(const struct ib_ucontext *ibucontext,
+ const struct efa_ibv_alloc_ucontext_cmd *cmd)
+{
+ struct efa_dev *dev = to_edev(ibucontext->device);
+ char *attr_str;
+
+ if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, max_tx_batch,
+ EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH, attr_str))
+ goto err;
+
+ if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, min_sq_depth,
+ EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR,
+ attr_str))
+ goto err;
+
+ return 0;
+
+err:
+ ibdev_dbg(&dev->ibdev, "Userspace handshake failed for %s attribute\n",
+ attr_str);
+ return -EOPNOTSUPP;
+}
+
+int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
+{
+ struct efa_ucontext *ucontext = to_eucontext(ibucontext);
+ struct efa_dev *dev = to_edev(ibucontext->device);
+ struct efa_ibv_alloc_ucontext_resp resp = {};
+ struct efa_ibv_alloc_ucontext_cmd cmd = {};
+ struct efa_com_alloc_uar_result result;
+ int err;
+
+ /*
+ * it's fine if the driver does not know all request fields,
+ * we will ack input fields in our response.
+ */
+
+ err = ib_copy_from_udata(&cmd, udata,
+ min(sizeof(cmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&dev->ibdev,
+ "Cannot copy udata for alloc_ucontext\n");
+ goto err_out;
+ }
+
+ err = efa_user_comp_handshake(ibucontext, &cmd);
+ if (err)
+ goto err_out;
+
+ err = efa_com_alloc_uar(&dev->edev, &result);
+ if (err)
+ goto err_out;
+
+ ucontext->uarn = result.uarn;
+
+ resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
+ resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
+ resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq;
+ resp.inline_buf_size = dev->dev_attr.inline_buf_size;
+ resp.max_llq_size = dev->dev_attr.max_llq_size;
+ resp.max_tx_batch = dev->dev_attr.max_tx_batch;
+ resp.min_sq_wr = dev->dev_attr.min_sq_depth;
+
+ err = ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen));
+ if (err)
+ goto err_dealloc_uar;
+
+ return 0;
+
+err_dealloc_uar:
+ efa_dealloc_uar(dev, result.uarn);
+err_out:
+ atomic64_inc(&dev->stats.alloc_ucontext_err);
+ return err;
+}
+
+void efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
+{
+ struct efa_ucontext *ucontext = to_eucontext(ibucontext);
+ struct efa_dev *dev = to_edev(ibucontext->device);
+
+ efa_dealloc_uar(dev, ucontext->uarn);
+}
+
+void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct efa_user_mmap_entry *entry = to_emmap(rdma_entry);
+
+ kfree(entry);
+}
+
+static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
+ struct vm_area_struct *vma)
+{
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct efa_user_mmap_entry *entry;
+ unsigned long va;
+ int err = 0;
+ u64 pfn;
+
+ rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma);
+ if (!rdma_entry) {
+ ibdev_dbg(&dev->ibdev,
+ "pgoff[%#lx] does not have valid entry\n",
+ vma->vm_pgoff);
+ atomic64_inc(&dev->stats.mmap_err);
+ return -EINVAL;
+ }
+ entry = to_emmap(rdma_entry);
+
+ ibdev_dbg(&dev->ibdev,
+ "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
+ entry->address, rdma_entry->npages * PAGE_SIZE,
+ entry->mmap_flag);
+
+ pfn = entry->address >> PAGE_SHIFT;
+ switch (entry->mmap_flag) {
+ case EFA_MMAP_IO_NC:
+ err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
+ entry->rdma_entry.npages * PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ case EFA_MMAP_IO_WC:
+ err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
+ entry->rdma_entry.npages * PAGE_SIZE,
+ pgprot_writecombine(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ case EFA_MMAP_DMA_PAGE:
+ for (va = vma->vm_start; va < vma->vm_end;
+ va += PAGE_SIZE, pfn++) {
+ err = vm_insert_page(vma, va, pfn_to_page(pfn));
+ if (err)
+ break;
+ }
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ if (err) {
+ ibdev_dbg(
+ &dev->ibdev,
+ "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
+ entry->address, rdma_entry->npages * PAGE_SIZE,
+ entry->mmap_flag, err);
+ atomic64_inc(&dev->stats.mmap_err);
+ }
+
+ rdma_user_mmap_entry_put(rdma_entry);
+ return err;
+}
+
+int efa_mmap(struct ib_ucontext *ibucontext,
+ struct vm_area_struct *vma)
+{
+ struct efa_ucontext *ucontext = to_eucontext(ibucontext);
+ struct efa_dev *dev = to_edev(ibucontext->device);
+ size_t length = vma->vm_end - vma->vm_start;
+
+ ibdev_dbg(&dev->ibdev,
+ "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
+ vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
+
+ return __efa_mmap(dev, ucontext, vma);
+}
+
+static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
+{
+ struct efa_com_destroy_ah_params params = {
+ .ah = ah->ah,
+ .pdn = to_epd(ah->ibah.pd)->pdn,
+ };
+
+ return efa_com_destroy_ah(&dev->edev, &params);
+}
+
+int efa_create_ah(struct ib_ah *ibah,
+ struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
+ struct efa_dev *dev = to_edev(ibah->device);
+ struct efa_com_create_ah_params params = {};
+ struct efa_ibv_create_ah_resp resp = {};
+ struct efa_com_create_ah_result result;
+ struct efa_ah *ah = to_eah(ibah);
+ int err;
+
+ if (!(init_attr->flags & RDMA_CREATE_AH_SLEEPABLE)) {
+ ibdev_dbg(&dev->ibdev,
+ "Create address handle is not supported in atomic context\n");
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ if (udata->inlen &&
+ !ib_is_udata_cleared(udata, 0, udata->inlen)) {
+ ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ memcpy(params.dest_addr, ah_attr->grh.dgid.raw,
+ sizeof(params.dest_addr));
+ params.pdn = to_epd(ibah->pd)->pdn;
+ err = efa_com_create_ah(&dev->edev, &params, &result);
+ if (err)
+ goto err_out;
+
+ memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id));
+ ah->ah = result.ah;
+
+ resp.efa_address_handle = result.ah;
+
+ if (udata->outlen) {
+ err = ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&dev->ibdev,
+ "Failed to copy udata for create_ah response\n");
+ goto err_destroy_ah;
+ }
+ }
+ ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah);
+
+ return 0;
+
+err_destroy_ah:
+ efa_ah_destroy(dev, ah);
+err_out:
+ atomic64_inc(&dev->stats.create_ah_err);
+ return err;
+}
+
+int efa_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct efa_dev *dev = to_edev(ibah->pd->device);
+ struct efa_ah *ah = to_eah(ibah);
+
+ ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah);
+
+ if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) {
+ ibdev_dbg(&dev->ibdev,
+ "Destroy address handle is not supported in atomic context\n");
+ return -EOPNOTSUPP;
+ }
+
+ efa_ah_destroy(dev, ah);
+ return 0;
+}
+
+struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ return rdma_alloc_hw_stats_struct(efa_port_stats_descs,
+ ARRAY_SIZE(efa_port_stats_descs),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev)
+{
+ return rdma_alloc_hw_stats_struct(efa_device_stats_descs,
+ ARRAY_SIZE(efa_device_stats_descs),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int efa_fill_device_stats(struct efa_dev *dev,
+ struct rdma_hw_stats *stats)
+{
+ struct efa_com_stats_admin *as = &dev->edev.aq.stats;
+ struct efa_stats *s = &dev->stats;
+
+ stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
+ stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
+ stats->value[EFA_CMDS_ERR] = atomic64_read(&as->cmd_err);
+ stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion);
+
+ stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
+ stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err);
+ stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err);
+ stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err);
+ stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err);
+ stats->value[EFA_ALLOC_UCONTEXT_ERR] =
+ atomic64_read(&s->alloc_ucontext_err);
+ stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err);
+ stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err);
+
+ return ARRAY_SIZE(efa_device_stats_descs);
+}
+
+static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats,
+ u32 port_num)
+{
+ struct efa_com_get_stats_params params = {};
+ union efa_com_get_stats_result result;
+ struct efa_com_rdma_write_stats *rws;
+ struct efa_com_rdma_read_stats *rrs;
+ struct efa_com_messages_stats *ms;
+ struct efa_com_network_stats *ns;
+ struct efa_com_basic_stats *bs;
+ int err;
+
+ params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL;
+ params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
+
+ err = efa_com_get_stats(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ bs = &result.basic_stats;
+ stats->value[EFA_TX_BYTES] = bs->tx_bytes;
+ stats->value[EFA_TX_PKTS] = bs->tx_pkts;
+ stats->value[EFA_RX_BYTES] = bs->rx_bytes;
+ stats->value[EFA_RX_PKTS] = bs->rx_pkts;
+ stats->value[EFA_RX_DROPS] = bs->rx_drops;
+
+ params.type = EFA_ADMIN_GET_STATS_TYPE_MESSAGES;
+ err = efa_com_get_stats(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ ms = &result.messages_stats;
+ stats->value[EFA_SEND_BYTES] = ms->send_bytes;
+ stats->value[EFA_SEND_WRS] = ms->send_wrs;
+ stats->value[EFA_RECV_BYTES] = ms->recv_bytes;
+ stats->value[EFA_RECV_WRS] = ms->recv_wrs;
+
+ params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_READ;
+ err = efa_com_get_stats(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ rrs = &result.rdma_read_stats;
+ stats->value[EFA_RDMA_READ_WRS] = rrs->read_wrs;
+ stats->value[EFA_RDMA_READ_BYTES] = rrs->read_bytes;
+ stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err;
+ stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes;
+
+ if (EFA_DEV_CAP(dev, RDMA_WRITE)) {
+ params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE;
+ err = efa_com_get_stats(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ rws = &result.rdma_write_stats;
+ stats->value[EFA_RDMA_WRITE_WRS] = rws->write_wrs;
+ stats->value[EFA_RDMA_WRITE_BYTES] = rws->write_bytes;
+ stats->value[EFA_RDMA_WRITE_WR_ERR] = rws->write_wr_err;
+ stats->value[EFA_RDMA_WRITE_RECV_BYTES] = rws->write_recv_bytes;
+ }
+
+ params.type = EFA_ADMIN_GET_STATS_TYPE_NETWORK;
+ err = efa_com_get_stats(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ ns = &result.network_stats;
+ stats->value[EFA_RETRANS_BYTES] = ns->retrans_bytes;
+ stats->value[EFA_RETRANS_PKTS] = ns->retrans_pkts;
+ stats->value[EFA_RETRANS_TIMEOUT_EVENS] = ns->retrans_timeout_events;
+ stats->value[EFA_UNRESPONSIVE_REMOTE_EVENTS] = ns->unresponsive_remote_events;
+ stats->value[EFA_IMPAIRED_REMOTE_CONN_EVENTS] = ns->impaired_remote_conn_events;
+
+ return ARRAY_SIZE(efa_port_stats_descs);
+}
+
+int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ if (port_num)
+ return efa_fill_port_stats(to_edev(ibdev), stats, port_num);
+ else
+ return efa_fill_device_stats(to_edev(ibdev), stats);
+}
+
+enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
+ u32 port_num)
+{
+ return IB_LINK_LAYER_UNSPECIFIED;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(EFA_IB_METHOD_MR_QUERY,
+ UVERBS_ATTR_IDR(EFA_IB_ATTR_QUERY_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(efa_mr,
+ UVERBS_OBJECT_MR,
+ &UVERBS_METHOD(EFA_IB_METHOD_MR_QUERY));
+
+const struct uapi_definition efa_uapi_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_MR,
+ &efa_mr),
+ {},
+};
diff --git a/drivers/infiniband/hw/erdma/Kconfig b/drivers/infiniband/hw/erdma/Kconfig
new file mode 100644
index 000000000000..267fc1f3c42a
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/Kconfig
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config INFINIBAND_ERDMA
+ tristate "Alibaba Elastic RDMA Adapter (ERDMA) support"
+ depends on PCI_MSI && 64BIT
+ depends on INFINIBAND_ADDR_TRANS
+ depends on INFINIBAND_USER_ACCESS
+ help
+ This is a RDMA driver for Alibaba Elastic RDMA Adapter(ERDMA),
+ which supports RDMA features in Alibaba cloud environment.
+
+ To compile this driver as module, choose M here. The module will be
+ called erdma.
diff --git a/drivers/infiniband/hw/erdma/Makefile b/drivers/infiniband/hw/erdma/Makefile
new file mode 100644
index 000000000000..51d2ef91905a
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_INFINIBAND_ERDMA) := erdma.o
+
+erdma-y := erdma_cm.o erdma_main.o erdma_cmdq.o erdma_cq.o erdma_verbs.o erdma_qp.o erdma_eq.o
diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h
new file mode 100644
index 000000000000..2a023b99f992
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma.h
@@ -0,0 +1,283 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#ifndef __ERDMA_H__
+#define __ERDMA_H__
+
+#include <linux/bitfield.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/xarray.h>
+#include <rdma/ib_verbs.h>
+
+#include "erdma_hw.h"
+
+#define DRV_MODULE_NAME "erdma"
+#define ERDMA_NODE_DESC "Elastic RDMA Adapter stack"
+
+struct erdma_eq {
+ void *qbuf;
+ dma_addr_t qbuf_dma_addr;
+
+ spinlock_t lock;
+
+ u32 depth;
+
+ u16 ci;
+ u16 rsvd;
+
+ atomic64_t event_num;
+ atomic64_t notify_num;
+
+ void __iomem *db;
+ u64 *dbrec;
+ dma_addr_t dbrec_dma;
+};
+
+struct erdma_cmdq_sq {
+ void *qbuf;
+ dma_addr_t qbuf_dma_addr;
+
+ spinlock_t lock;
+
+ u32 depth;
+ u16 ci;
+ u16 pi;
+
+ u16 wqebb_cnt;
+
+ u64 *dbrec;
+ dma_addr_t dbrec_dma;
+};
+
+struct erdma_cmdq_cq {
+ void *qbuf;
+ dma_addr_t qbuf_dma_addr;
+
+ spinlock_t lock;
+
+ u32 depth;
+ u32 ci;
+ u32 cmdsn;
+
+ u64 *dbrec;
+ dma_addr_t dbrec_dma;
+
+ atomic64_t armed_num;
+};
+
+enum {
+ ERDMA_CMD_STATUS_INIT,
+ ERDMA_CMD_STATUS_ISSUED,
+ ERDMA_CMD_STATUS_FINISHED,
+ ERDMA_CMD_STATUS_TIMEOUT
+};
+
+struct erdma_comp_wait {
+ struct completion wait_event;
+ u32 cmd_status;
+ u32 ctx_id;
+ u16 sq_pi;
+ u8 comp_status;
+ u8 rsvd;
+ u32 comp_data[4];
+};
+
+enum {
+ ERDMA_CMDQ_STATE_OK_BIT = 0,
+ ERDMA_CMDQ_STATE_TIMEOUT_BIT = 1,
+ ERDMA_CMDQ_STATE_CTX_ERR_BIT = 2,
+};
+
+#define ERDMA_CMDQ_TIMEOUT_MS 15000
+#define ERDMA_REG_ACCESS_WAIT_MS 20
+#define ERDMA_WAIT_DEV_DONE_CNT 500
+
+struct erdma_cmdq {
+ unsigned long *comp_wait_bitmap;
+ struct erdma_comp_wait *wait_pool;
+ spinlock_t lock;
+
+ struct erdma_cmdq_sq sq;
+ struct erdma_cmdq_cq cq;
+ struct erdma_eq eq;
+
+ unsigned long state;
+
+ struct semaphore credits;
+ u16 max_outstandings;
+};
+
+#define COMPROMISE_CC ERDMA_CC_CUBIC
+enum erdma_cc_alg {
+ ERDMA_CC_NEWRENO = 0,
+ ERDMA_CC_CUBIC,
+ ERDMA_CC_HPCC_RTT,
+ ERDMA_CC_HPCC_ECN,
+ ERDMA_CC_HPCC_INT,
+ ERDMA_CC_METHODS_NUM
+};
+
+struct erdma_devattr {
+ u32 fw_version;
+
+ unsigned char peer_addr[ETH_ALEN];
+ unsigned long cap_flags;
+
+ int numa_node;
+ enum erdma_cc_alg cc;
+ u32 irq_num;
+
+ u32 max_qp;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_ord;
+ u32 max_ird;
+
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 max_sge_rd;
+ u32 max_cq;
+ u32 max_cqe;
+ u64 max_mr_size;
+ u32 max_mr;
+ u32 max_pd;
+ u32 max_mw;
+ u32 max_gid;
+ u32 max_ah;
+ u32 local_dma_key;
+};
+
+#define ERDMA_IRQNAME_SIZE 50
+
+struct erdma_irq {
+ char name[ERDMA_IRQNAME_SIZE];
+ u32 msix_vector;
+ cpumask_t affinity_hint_mask;
+};
+
+struct erdma_eq_cb {
+ bool ready;
+ void *dev; /* All EQs use this fields to get erdma_dev struct */
+ struct erdma_irq irq;
+ struct erdma_eq eq;
+ struct tasklet_struct tasklet;
+};
+
+struct erdma_resource_cb {
+ unsigned long *bitmap;
+ spinlock_t lock;
+ u32 next_alloc_idx;
+ u32 max_cap;
+};
+
+enum {
+ ERDMA_RES_TYPE_PD = 0,
+ ERDMA_RES_TYPE_STAG_IDX = 1,
+ ERDMA_RES_TYPE_AH = 2,
+ ERDMA_RES_CNT = 3,
+};
+
+struct erdma_dev {
+ struct ib_device ibdev;
+ struct net_device *netdev;
+ struct pci_dev *pdev;
+ struct notifier_block netdev_nb;
+ struct workqueue_struct *reflush_wq;
+
+ resource_size_t func_bar_addr;
+ resource_size_t func_bar_len;
+ u8 __iomem *func_bar;
+
+ struct erdma_devattr attrs;
+ u32 mtu;
+
+ /* cmdq and aeq use the same msix vector */
+ struct erdma_irq comm_irq;
+ struct erdma_cmdq cmdq;
+ struct erdma_eq aeq;
+ struct erdma_eq_cb ceqs[ERDMA_NUM_MSIX_VEC - 1];
+
+ spinlock_t lock;
+ struct erdma_resource_cb res_cb[ERDMA_RES_CNT];
+ struct xarray qp_xa;
+ struct xarray cq_xa;
+
+ u32 next_alloc_qpn;
+ u32 next_alloc_cqn;
+
+ atomic_t num_ctx;
+ struct list_head cep_list;
+
+ struct dma_pool *db_pool;
+ struct dma_pool *resp_pool;
+ enum erdma_proto_type proto;
+};
+
+static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift)
+{
+ idx &= (depth - 1);
+
+ return qbuf + (idx << shift);
+}
+
+static inline struct erdma_dev *to_edev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct erdma_dev, ibdev);
+}
+
+static inline u32 erdma_reg_read32(struct erdma_dev *dev, u32 reg)
+{
+ return readl(dev->func_bar + reg);
+}
+
+static inline u64 erdma_reg_read64(struct erdma_dev *dev, u32 reg)
+{
+ return readq(dev->func_bar + reg);
+}
+
+static inline void erdma_reg_write32(struct erdma_dev *dev, u32 reg, u32 value)
+{
+ writel(value, dev->func_bar + reg);
+}
+
+static inline void erdma_reg_write64(struct erdma_dev *dev, u32 reg, u64 value)
+{
+ writeq(value, dev->func_bar + reg);
+}
+
+static inline u32 erdma_reg_read32_filed(struct erdma_dev *dev, u32 reg,
+ u32 filed_mask)
+{
+ u32 val = erdma_reg_read32(dev, reg);
+
+ return FIELD_GET(filed_mask, val);
+}
+
+#define ERDMA_GET(val, name) FIELD_GET(ERDMA_CMD_##name##_MASK, val)
+
+int erdma_cmdq_init(struct erdma_dev *dev);
+void erdma_finish_cmdq_init(struct erdma_dev *dev);
+void erdma_cmdq_destroy(struct erdma_dev *dev);
+
+void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op);
+int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
+ u64 *resp0, u64 *resp1, bool sleepable);
+void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq);
+
+int erdma_ceqs_init(struct erdma_dev *dev);
+void erdma_ceqs_uninit(struct erdma_dev *dev);
+void notify_eq(struct erdma_eq *eq);
+void *get_next_valid_eqe(struct erdma_eq *eq);
+
+int erdma_aeq_init(struct erdma_dev *dev);
+int erdma_eq_common_init(struct erdma_dev *dev, struct erdma_eq *eq, u32 depth);
+void erdma_eq_destroy(struct erdma_dev *dev, struct erdma_eq *eq);
+
+void erdma_aeq_event_handler(struct erdma_dev *dev);
+void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb);
+
+#endif
diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c
new file mode 100644
index 000000000000..ed21ba0037a4
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_cm.c
@@ -0,0 +1,1431 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Fredy Neeser */
+/* Greg Joyce <greg@opengridcomputing.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+/* Copyright (c) 2017, Open Grid Computing, Inc. */
+
+#include <linux/workqueue.h>
+#include <trace/events/sock.h>
+
+#include "erdma.h"
+#include "erdma_cm.h"
+#include "erdma_verbs.h"
+
+static struct workqueue_struct *erdma_cm_wq;
+
+static void erdma_cm_llp_state_change(struct sock *sk);
+static void erdma_cm_llp_data_ready(struct sock *sk);
+static void erdma_cm_llp_error_report(struct sock *sk);
+
+static void erdma_sk_assign_cm_upcalls(struct sock *sk)
+{
+ write_lock_bh(&sk->sk_callback_lock);
+ sk->sk_state_change = erdma_cm_llp_state_change;
+ sk->sk_data_ready = erdma_cm_llp_data_ready;
+ sk->sk_error_report = erdma_cm_llp_error_report;
+ write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void erdma_sk_save_upcalls(struct sock *sk)
+{
+ struct erdma_cep *cep = sk_to_cep(sk);
+
+ write_lock_bh(&sk->sk_callback_lock);
+ cep->sk_state_change = sk->sk_state_change;
+ cep->sk_data_ready = sk->sk_data_ready;
+ cep->sk_error_report = sk->sk_error_report;
+ write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void erdma_sk_restore_upcalls(struct sock *sk, struct erdma_cep *cep)
+{
+ sk->sk_state_change = cep->sk_state_change;
+ sk->sk_data_ready = cep->sk_data_ready;
+ sk->sk_error_report = cep->sk_error_report;
+ sk->sk_user_data = NULL;
+}
+
+static void erdma_socket_disassoc(struct socket *s)
+{
+ struct sock *sk = s->sk;
+ struct erdma_cep *cep;
+
+ if (sk) {
+ write_lock_bh(&sk->sk_callback_lock);
+ cep = sk_to_cep(sk);
+ if (cep) {
+ erdma_sk_restore_upcalls(sk, cep);
+ erdma_cep_put(cep);
+ } else {
+ WARN_ON_ONCE(1);
+ }
+ write_unlock_bh(&sk->sk_callback_lock);
+ } else {
+ WARN_ON_ONCE(1);
+ }
+}
+
+static void erdma_cep_socket_assoc(struct erdma_cep *cep, struct socket *s)
+{
+ cep->sock = s;
+ erdma_cep_get(cep);
+ s->sk->sk_user_data = cep;
+
+ erdma_sk_save_upcalls(s->sk);
+ erdma_sk_assign_cm_upcalls(s->sk);
+}
+
+static void erdma_disassoc_listen_cep(struct erdma_cep *cep)
+{
+ if (cep->listen_cep) {
+ erdma_cep_put(cep->listen_cep);
+ cep->listen_cep = NULL;
+ }
+}
+
+static struct erdma_cep *erdma_cep_alloc(struct erdma_dev *dev)
+{
+ struct erdma_cep *cep = kzalloc(sizeof(*cep), GFP_KERNEL);
+ unsigned long flags;
+
+ if (!cep)
+ return NULL;
+
+ INIT_LIST_HEAD(&cep->listenq);
+ INIT_LIST_HEAD(&cep->devq);
+ INIT_LIST_HEAD(&cep->work_freelist);
+
+ kref_init(&cep->ref);
+ cep->state = ERDMA_EPSTATE_IDLE;
+ init_waitqueue_head(&cep->waitq);
+ spin_lock_init(&cep->lock);
+ cep->dev = dev;
+
+ spin_lock_irqsave(&dev->lock, flags);
+ list_add_tail(&cep->devq, &dev->cep_list);
+ spin_unlock_irqrestore(&dev->lock, flags);
+
+ return cep;
+}
+
+static void erdma_cm_free_work(struct erdma_cep *cep)
+{
+ struct list_head *w, *tmp;
+ struct erdma_cm_work *work;
+
+ list_for_each_safe(w, tmp, &cep->work_freelist) {
+ work = list_entry(w, struct erdma_cm_work, list);
+ list_del(&work->list);
+ kfree(work);
+ }
+}
+
+static void erdma_cancel_mpatimer(struct erdma_cep *cep)
+{
+ spin_lock_bh(&cep->lock);
+ if (cep->mpa_timer) {
+ if (cancel_delayed_work(&cep->mpa_timer->work)) {
+ erdma_cep_put(cep);
+ kfree(cep->mpa_timer);
+ }
+ cep->mpa_timer = NULL;
+ }
+ spin_unlock_bh(&cep->lock);
+}
+
+static void erdma_put_work(struct erdma_cm_work *work)
+{
+ INIT_LIST_HEAD(&work->list);
+ spin_lock_bh(&work->cep->lock);
+ list_add(&work->list, &work->cep->work_freelist);
+ spin_unlock_bh(&work->cep->lock);
+}
+
+static void erdma_cep_set_inuse(struct erdma_cep *cep)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cep->lock, flags);
+ while (cep->in_use) {
+ spin_unlock_irqrestore(&cep->lock, flags);
+ wait_event_interruptible(cep->waitq, !cep->in_use);
+ if (signal_pending(current))
+ flush_signals(current);
+
+ spin_lock_irqsave(&cep->lock, flags);
+ }
+
+ cep->in_use = 1;
+ spin_unlock_irqrestore(&cep->lock, flags);
+}
+
+static void erdma_cep_set_free(struct erdma_cep *cep)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cep->lock, flags);
+ cep->in_use = 0;
+ spin_unlock_irqrestore(&cep->lock, flags);
+
+ wake_up(&cep->waitq);
+}
+
+static void __erdma_cep_dealloc(struct kref *ref)
+{
+ struct erdma_cep *cep = container_of(ref, struct erdma_cep, ref);
+ struct erdma_dev *dev = cep->dev;
+ unsigned long flags;
+
+ WARN_ON(cep->listen_cep);
+
+ kfree(cep->private_data);
+ kfree(cep->mpa.pdata);
+ spin_lock_bh(&cep->lock);
+ if (!list_empty(&cep->work_freelist))
+ erdma_cm_free_work(cep);
+ spin_unlock_bh(&cep->lock);
+
+ spin_lock_irqsave(&dev->lock, flags);
+ list_del(&cep->devq);
+ spin_unlock_irqrestore(&dev->lock, flags);
+ kfree(cep);
+}
+
+static struct erdma_cm_work *erdma_get_work(struct erdma_cep *cep)
+{
+ struct erdma_cm_work *work = NULL;
+
+ spin_lock_bh(&cep->lock);
+ if (!list_empty(&cep->work_freelist)) {
+ work = list_entry(cep->work_freelist.next, struct erdma_cm_work,
+ list);
+ list_del_init(&work->list);
+ }
+
+ spin_unlock_bh(&cep->lock);
+ return work;
+}
+
+static int erdma_cm_alloc_work(struct erdma_cep *cep, int num)
+{
+ struct erdma_cm_work *work;
+
+ while (num--) {
+ work = kmalloc(sizeof(*work), GFP_KERNEL);
+ if (!work) {
+ if (!(list_empty(&cep->work_freelist)))
+ erdma_cm_free_work(cep);
+ return -ENOMEM;
+ }
+ work->cep = cep;
+ INIT_LIST_HEAD(&work->list);
+ list_add(&work->list, &cep->work_freelist);
+ }
+
+ return 0;
+}
+
+static int erdma_cm_upcall(struct erdma_cep *cep, enum iw_cm_event_type reason,
+ int status)
+{
+ struct iw_cm_event event;
+ struct iw_cm_id *cm_id;
+
+ memset(&event, 0, sizeof(event));
+ event.status = status;
+ event.event = reason;
+
+ if (reason == IW_CM_EVENT_CONNECT_REQUEST) {
+ event.provider_data = cep;
+ cm_id = cep->listen_cep->cm_id;
+
+ event.ird = cep->dev->attrs.max_ird;
+ event.ord = cep->dev->attrs.max_ord;
+ } else {
+ cm_id = cep->cm_id;
+ }
+
+ if (reason == IW_CM_EVENT_CONNECT_REQUEST ||
+ reason == IW_CM_EVENT_CONNECT_REPLY) {
+ u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len);
+
+ if (pd_len && cep->mpa.pdata) {
+ event.private_data_len = pd_len;
+ event.private_data = cep->mpa.pdata;
+ }
+
+ getname_local(cep->sock, &event.local_addr);
+ getname_peer(cep->sock, &event.remote_addr);
+ }
+
+ return cm_id->event_handler(cm_id, &event);
+}
+
+void erdma_qp_cm_drop(struct erdma_qp *qp)
+{
+ struct erdma_cep *cep = qp->cep;
+
+ if (!qp->cep)
+ return;
+
+ erdma_cep_set_inuse(cep);
+
+ /* already closed. */
+ if (cep->state == ERDMA_EPSTATE_CLOSED)
+ goto out;
+
+ if (cep->cm_id) {
+ switch (cep->state) {
+ case ERDMA_EPSTATE_AWAIT_MPAREP:
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -EINVAL);
+ break;
+ case ERDMA_EPSTATE_RDMA_MODE:
+ erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
+ break;
+ case ERDMA_EPSTATE_IDLE:
+ case ERDMA_EPSTATE_LISTENING:
+ case ERDMA_EPSTATE_CONNECTING:
+ case ERDMA_EPSTATE_AWAIT_MPAREQ:
+ case ERDMA_EPSTATE_RECVD_MPAREQ:
+ case ERDMA_EPSTATE_CLOSED:
+ default:
+ break;
+ }
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ erdma_cep_put(cep);
+ }
+ cep->state = ERDMA_EPSTATE_CLOSED;
+
+ if (cep->sock) {
+ erdma_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+ }
+
+ if (cep->qp) {
+ cep->qp = NULL;
+ erdma_qp_put(qp);
+ }
+out:
+ erdma_cep_set_free(cep);
+}
+
+void erdma_cep_put(struct erdma_cep *cep)
+{
+ WARN_ON(kref_read(&cep->ref) < 1);
+ kref_put(&cep->ref, __erdma_cep_dealloc);
+}
+
+void erdma_cep_get(struct erdma_cep *cep)
+{
+ kref_get(&cep->ref);
+}
+
+static int erdma_send_mpareqrep(struct erdma_cep *cep, const void *pdata,
+ u8 pd_len)
+{
+ struct socket *s = cep->sock;
+ struct mpa_rr *rr = &cep->mpa.hdr;
+ struct kvec iov[3];
+ struct msghdr msg;
+ int iovec_num = 0;
+ int ret;
+ int mpa_len;
+
+ memset(&msg, 0, sizeof(msg));
+
+ rr->params.pd_len = cpu_to_be16(pd_len);
+
+ iov[iovec_num].iov_base = rr;
+ iov[iovec_num].iov_len = sizeof(*rr);
+ iovec_num++;
+ mpa_len = sizeof(*rr);
+
+ iov[iovec_num].iov_base = &cep->mpa.ext_data;
+ iov[iovec_num].iov_len = sizeof(cep->mpa.ext_data);
+ iovec_num++;
+ mpa_len += sizeof(cep->mpa.ext_data);
+
+ if (pd_len) {
+ iov[iovec_num].iov_base = (char *)pdata;
+ iov[iovec_num].iov_len = pd_len;
+ mpa_len += pd_len;
+ iovec_num++;
+ }
+
+ ret = kernel_sendmsg(s, &msg, iov, iovec_num, mpa_len);
+
+ return ret < 0 ? ret : 0;
+}
+
+static inline int ksock_recv(struct socket *sock, char *buf, size_t size,
+ int flags)
+{
+ struct kvec iov = { buf, size };
+ struct msghdr msg = { .msg_name = NULL, .msg_flags = flags };
+
+ return kernel_recvmsg(sock, &msg, &iov, 1, size, flags);
+}
+
+static int __recv_mpa_hdr(struct erdma_cep *cep, int hdr_rcvd, char *hdr,
+ int hdr_size, int *rcvd_out)
+{
+ struct socket *s = cep->sock;
+ int rcvd;
+
+ *rcvd_out = 0;
+ if (hdr_rcvd < hdr_size) {
+ rcvd = ksock_recv(s, hdr + hdr_rcvd, hdr_size - hdr_rcvd,
+ MSG_DONTWAIT);
+ if (rcvd == -EAGAIN)
+ return -EAGAIN;
+
+ if (rcvd <= 0)
+ return -ECONNABORTED;
+
+ hdr_rcvd += rcvd;
+ *rcvd_out = rcvd;
+
+ if (hdr_rcvd < hdr_size)
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static void __mpa_rr_set_revision(__be16 *bits, u8 rev)
+{
+ *bits = (*bits & ~MPA_RR_MASK_REVISION) |
+ (cpu_to_be16(rev) & MPA_RR_MASK_REVISION);
+}
+
+static u8 __mpa_rr_revision(__be16 mpa_rr_bits)
+{
+ __be16 rev = mpa_rr_bits & MPA_RR_MASK_REVISION;
+
+ return (u8)be16_to_cpu(rev);
+}
+
+static void __mpa_ext_set_cc(__be32 *bits, u32 cc)
+{
+ *bits = (*bits & ~MPA_EXT_FLAG_CC) |
+ (cpu_to_be32(cc) & MPA_EXT_FLAG_CC);
+}
+
+static u8 __mpa_ext_cc(__be32 mpa_ext_bits)
+{
+ __be32 cc = mpa_ext_bits & MPA_EXT_FLAG_CC;
+
+ return (u8)be32_to_cpu(cc);
+}
+
+/*
+ * Receive MPA Request/Reply header.
+ *
+ * Returns 0 if complete MPA Request/Reply haeder including
+ * eventual private data was received. Returns -EAGAIN if
+ * header was partially received or negative error code otherwise.
+ *
+ * Context: May be called in process context only
+ */
+static int erdma_recv_mpa_rr(struct erdma_cep *cep)
+{
+ struct mpa_rr *hdr = &cep->mpa.hdr;
+ struct socket *s = cep->sock;
+ u16 pd_len;
+ int rcvd, to_rcv, ret, pd_rcvd;
+
+ if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) {
+ ret = __recv_mpa_hdr(cep, cep->mpa.bytes_rcvd,
+ (char *)&cep->mpa.hdr,
+ sizeof(struct mpa_rr), &rcvd);
+ cep->mpa.bytes_rcvd += rcvd;
+ if (ret)
+ return ret;
+ }
+
+ if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA ||
+ __mpa_rr_revision(hdr->params.bits) != MPA_REVISION_EXT_1)
+ return -EPROTO;
+
+ if (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) <
+ sizeof(struct erdma_mpa_ext)) {
+ ret = __recv_mpa_hdr(
+ cep, cep->mpa.bytes_rcvd - sizeof(struct mpa_rr),
+ (char *)&cep->mpa.ext_data,
+ sizeof(struct erdma_mpa_ext), &rcvd);
+ cep->mpa.bytes_rcvd += rcvd;
+ if (ret)
+ return ret;
+ }
+
+ pd_len = be16_to_cpu(hdr->params.pd_len);
+ pd_rcvd = cep->mpa.bytes_rcvd - sizeof(struct mpa_rr) -
+ sizeof(struct erdma_mpa_ext);
+ to_rcv = pd_len - pd_rcvd;
+
+ if (!to_rcv) {
+ /*
+ * We have received the whole MPA Request/Reply message.
+ * Check against peer protocol violation.
+ */
+ u32 word;
+
+ ret = __recv_mpa_hdr(cep, 0, (char *)&word, sizeof(word),
+ &rcvd);
+ if (ret == -EAGAIN && rcvd == 0)
+ return 0;
+
+ if (ret)
+ return ret;
+
+ return -EPROTO;
+ }
+
+ /*
+ * At this point, MPA header has been fully received, and pd_len != 0.
+ * So, begin to receive private data.
+ */
+ if (!cep->mpa.pdata) {
+ cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL);
+ if (!cep->mpa.pdata)
+ return -ENOMEM;
+ }
+
+ rcvd = ksock_recv(s, cep->mpa.pdata + pd_rcvd, to_rcv + 4,
+ MSG_DONTWAIT);
+ if (rcvd < 0)
+ return rcvd;
+
+ if (rcvd > to_rcv)
+ return -EPROTO;
+
+ cep->mpa.bytes_rcvd += rcvd;
+
+ if (to_rcv == rcvd)
+ return 0;
+
+ return -EAGAIN;
+}
+
+/*
+ * erdma_proc_mpareq()
+ *
+ * Read MPA Request from socket and signal new connection to IWCM
+ * if success. Caller must hold lock on corresponding listening CEP.
+ */
+static int erdma_proc_mpareq(struct erdma_cep *cep)
+{
+ struct mpa_rr *req;
+ int ret;
+
+ ret = erdma_recv_mpa_rr(cep);
+ if (ret)
+ return ret;
+
+ req = &cep->mpa.hdr;
+
+ if (memcmp(req->key, MPA_KEY_REQ, MPA_KEY_SIZE))
+ return -EPROTO;
+
+ memcpy(req->key, MPA_KEY_REP, MPA_KEY_SIZE);
+
+ /* Currently does not support marker and crc. */
+ if (req->params.bits & MPA_RR_FLAG_MARKERS ||
+ req->params.bits & MPA_RR_FLAG_CRC)
+ goto reject_conn;
+
+ cep->state = ERDMA_EPSTATE_RECVD_MPAREQ;
+
+ /* Keep reference until IWCM accepts/rejects */
+ erdma_cep_get(cep);
+ ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0);
+ if (ret)
+ erdma_cep_put(cep);
+
+ return ret;
+
+reject_conn:
+ req->params.bits &= ~MPA_RR_FLAG_MARKERS;
+ req->params.bits |= MPA_RR_FLAG_REJECT;
+ req->params.bits &= ~MPA_RR_FLAG_CRC;
+
+ kfree(cep->mpa.pdata);
+ cep->mpa.pdata = NULL;
+ erdma_send_mpareqrep(cep, NULL, 0);
+
+ return -EOPNOTSUPP;
+}
+
+static int erdma_proc_mpareply(struct erdma_cep *cep)
+{
+ enum erdma_qpa_mask_iwarp to_modify_attrs = 0;
+ struct erdma_mod_qp_params_iwarp params;
+ struct erdma_qp *qp = cep->qp;
+ struct mpa_rr *rep;
+ int ret;
+
+ ret = erdma_recv_mpa_rr(cep);
+ if (ret)
+ goto out_err;
+
+ erdma_cancel_mpatimer(cep);
+
+ rep = &cep->mpa.hdr;
+
+ if (memcmp(rep->key, MPA_KEY_REP, MPA_KEY_SIZE)) {
+ ret = -EPROTO;
+ goto out_err;
+ }
+
+ if (rep->params.bits & MPA_RR_FLAG_REJECT) {
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET);
+ return -ECONNRESET;
+ }
+
+ /* Currently does not support marker and crc. */
+ if ((rep->params.bits & MPA_RR_FLAG_MARKERS) ||
+ (rep->params.bits & MPA_RR_FLAG_CRC)) {
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED);
+ return -EINVAL;
+ }
+
+ memset(&params, 0, sizeof(params));
+ params.state = ERDMA_QPS_IWARP_RTS;
+ params.irq_size = cep->ird;
+ params.orq_size = cep->ord;
+
+ down_write(&qp->state_lock);
+ if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) {
+ ret = -EINVAL;
+ up_write(&qp->state_lock);
+ goto out_err;
+ }
+
+ to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_LLP_HANDLE |
+ ERDMA_QPA_IWARP_MPA | ERDMA_QPA_IWARP_IRD |
+ ERDMA_QPA_IWARP_ORD;
+
+ params.qp_type = ERDMA_QP_ACTIVE;
+ if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc) {
+ to_modify_attrs |= ERDMA_QPA_IWARP_CC;
+ params.cc = COMPROMISE_CC;
+ }
+
+ ret = erdma_modify_qp_state_iwarp(qp, &params, to_modify_attrs);
+
+ up_write(&qp->state_lock);
+
+ if (!ret) {
+ ret = erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 0);
+ if (!ret)
+ cep->state = ERDMA_EPSTATE_RDMA_MODE;
+
+ return 0;
+ }
+
+out_err:
+ if (ret != -EAGAIN)
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL);
+
+ return ret;
+}
+
+static void erdma_accept_newconn(struct erdma_cep *cep)
+{
+ struct socket *s = cep->sock;
+ struct socket *new_s = NULL;
+ struct erdma_cep *new_cep = NULL;
+ int ret = 0;
+
+ if (cep->state != ERDMA_EPSTATE_LISTENING)
+ goto error;
+
+ new_cep = erdma_cep_alloc(cep->dev);
+ if (!new_cep)
+ goto error;
+
+ /*
+ * 4: Allocate a sufficient number of work elements
+ * to allow concurrent handling of local + peer close
+ * events, MPA header processing + MPA timeout.
+ */
+ if (erdma_cm_alloc_work(new_cep, 4) != 0)
+ goto error;
+
+ /*
+ * Copy saved socket callbacks from listening CEP
+ * and assign new socket with new CEP
+ */
+ new_cep->sk_state_change = cep->sk_state_change;
+ new_cep->sk_data_ready = cep->sk_data_ready;
+ new_cep->sk_error_report = cep->sk_error_report;
+
+ ret = kernel_accept(s, &new_s, O_NONBLOCK);
+ if (ret != 0)
+ goto error;
+
+ new_cep->sock = new_s;
+ erdma_cep_get(new_cep);
+ new_s->sk->sk_user_data = new_cep;
+
+ tcp_sock_set_nodelay(new_s->sk);
+ new_cep->state = ERDMA_EPSTATE_AWAIT_MPAREQ;
+
+ ret = erdma_cm_queue_work(new_cep, ERDMA_CM_WORK_MPATIMEOUT);
+ if (ret)
+ goto error;
+
+ new_cep->listen_cep = cep;
+ erdma_cep_get(cep);
+
+ if (atomic_read(&new_s->sk->sk_rmem_alloc)) {
+ /* MPA REQ already queued */
+ erdma_cep_set_inuse(new_cep);
+ ret = erdma_proc_mpareq(new_cep);
+ if (ret != -EAGAIN) {
+ erdma_cep_put(cep);
+ new_cep->listen_cep = NULL;
+ if (ret) {
+ erdma_cep_set_free(new_cep);
+ goto error;
+ }
+ }
+ erdma_cep_set_free(new_cep);
+ }
+ return;
+
+error:
+ if (new_cep) {
+ new_cep->state = ERDMA_EPSTATE_CLOSED;
+ erdma_cancel_mpatimer(new_cep);
+
+ erdma_cep_put(new_cep);
+ }
+
+ if (new_s) {
+ erdma_socket_disassoc(new_s);
+ sock_release(new_s);
+ }
+}
+
+static int erdma_newconn_connected(struct erdma_cep *cep)
+{
+ int ret = 0;
+
+ cep->mpa.hdr.params.bits = 0;
+ __mpa_rr_set_revision(&cep->mpa.hdr.params.bits, MPA_REVISION_EXT_1);
+
+ memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, MPA_KEY_SIZE);
+ cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie);
+ __mpa_ext_set_cc(&cep->mpa.ext_data.bits, cep->qp->attrs.cc);
+
+ ret = erdma_send_mpareqrep(cep, cep->private_data, cep->pd_len);
+ cep->state = ERDMA_EPSTATE_AWAIT_MPAREP;
+ cep->mpa.hdr.params.pd_len = 0;
+
+ if (ret >= 0)
+ ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_MPATIMEOUT);
+
+ return ret;
+}
+
+static void erdma_cm_work_handler(struct work_struct *w)
+{
+ struct erdma_cm_work *work;
+ struct erdma_cep *cep;
+ int release_cep = 0, ret = 0;
+
+ work = container_of(w, struct erdma_cm_work, work.work);
+ cep = work->cep;
+
+ erdma_cep_set_inuse(cep);
+
+ switch (work->type) {
+ case ERDMA_CM_WORK_CONNECTED:
+ erdma_cancel_mpatimer(cep);
+ if (cep->state == ERDMA_EPSTATE_CONNECTING) {
+ ret = erdma_newconn_connected(cep);
+ if (ret) {
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -EIO);
+ release_cep = 1;
+ }
+ }
+ break;
+ case ERDMA_CM_WORK_CONNECTTIMEOUT:
+ if (cep->state == ERDMA_EPSTATE_CONNECTING) {
+ cep->mpa_timer = NULL;
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -ETIMEDOUT);
+ release_cep = 1;
+ }
+ break;
+ case ERDMA_CM_WORK_ACCEPT:
+ erdma_accept_newconn(cep);
+ break;
+ case ERDMA_CM_WORK_READ_MPAHDR:
+ if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
+ if (cep->listen_cep) {
+ erdma_cep_set_inuse(cep->listen_cep);
+
+ if (cep->listen_cep->state ==
+ ERDMA_EPSTATE_LISTENING)
+ ret = erdma_proc_mpareq(cep);
+ else
+ ret = -EFAULT;
+
+ erdma_cep_set_free(cep->listen_cep);
+
+ if (ret != -EAGAIN) {
+ erdma_cep_put(cep->listen_cep);
+ cep->listen_cep = NULL;
+ if (ret)
+ erdma_cep_put(cep);
+ }
+ }
+ } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
+ ret = erdma_proc_mpareply(cep);
+ }
+
+ if (ret && ret != -EAGAIN)
+ release_cep = 1;
+ break;
+ case ERDMA_CM_WORK_CLOSE_LLP:
+ if (cep->cm_id)
+ erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
+ release_cep = 1;
+ break;
+ case ERDMA_CM_WORK_PEER_CLOSE:
+ if (cep->cm_id) {
+ if (cep->state == ERDMA_EPSTATE_CONNECTING ||
+ cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
+ /*
+ * MPA reply not received, but connection drop
+ */
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -ECONNRESET);
+ } else if (cep->state == ERDMA_EPSTATE_RDMA_MODE) {
+ /*
+ * NOTE: IW_CM_EVENT_DISCONNECT is given just
+ * to transition IWCM into CLOSING.
+ */
+ erdma_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0);
+ erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0);
+ }
+ } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
+ /* Socket close before MPA request received. */
+ erdma_disassoc_listen_cep(cep);
+ erdma_cep_put(cep);
+ }
+ release_cep = 1;
+ break;
+ case ERDMA_CM_WORK_MPATIMEOUT:
+ cep->mpa_timer = NULL;
+ if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP) {
+ /*
+ * MPA request timed out:
+ * Hide any partially received private data and signal
+ * timeout
+ */
+ cep->mpa.hdr.params.pd_len = 0;
+
+ if (cep->cm_id)
+ erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY,
+ -ETIMEDOUT);
+ release_cep = 1;
+ } else if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ) {
+ /* No MPA req received after peer TCP stream setup. */
+ erdma_disassoc_listen_cep(cep);
+
+ erdma_cep_put(cep);
+ release_cep = 1;
+ }
+ break;
+ default:
+ WARN(1, "Undefined CM work type: %d\n", work->type);
+ }
+
+ if (release_cep) {
+ erdma_cancel_mpatimer(cep);
+ cep->state = ERDMA_EPSTATE_CLOSED;
+ if (cep->qp) {
+ struct erdma_qp *qp = cep->qp;
+ /*
+ * Serialize a potential race with application
+ * closing the QP and calling erdma_qp_cm_drop()
+ */
+ erdma_qp_get(qp);
+ erdma_cep_set_free(cep);
+
+ erdma_qp_llp_close(qp);
+ erdma_qp_put(qp);
+
+ erdma_cep_set_inuse(cep);
+ cep->qp = NULL;
+ erdma_qp_put(qp);
+ }
+
+ if (cep->sock) {
+ erdma_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+ }
+
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ if (cep->state != ERDMA_EPSTATE_LISTENING)
+ erdma_cep_put(cep);
+ }
+ }
+ erdma_cep_set_free(cep);
+ erdma_put_work(work);
+ erdma_cep_put(cep);
+}
+
+int erdma_cm_queue_work(struct erdma_cep *cep, enum erdma_work_type type)
+{
+ struct erdma_cm_work *work = erdma_get_work(cep);
+ unsigned long delay = 0;
+
+ if (!work)
+ return -ENOMEM;
+
+ work->type = type;
+ work->cep = cep;
+
+ erdma_cep_get(cep);
+
+ INIT_DELAYED_WORK(&work->work, erdma_cm_work_handler);
+
+ if (type == ERDMA_CM_WORK_MPATIMEOUT) {
+ cep->mpa_timer = work;
+
+ if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
+ delay = MPAREP_TIMEOUT;
+ else
+ delay = MPAREQ_TIMEOUT;
+ } else if (type == ERDMA_CM_WORK_CONNECTTIMEOUT) {
+ cep->mpa_timer = work;
+
+ delay = CONNECT_TIMEOUT;
+ }
+
+ queue_delayed_work(erdma_cm_wq, &work->work, delay);
+
+ return 0;
+}
+
+static void erdma_cm_llp_data_ready(struct sock *sk)
+{
+ struct erdma_cep *cep;
+
+ trace_sk_data_ready(sk);
+
+ read_lock(&sk->sk_callback_lock);
+
+ cep = sk_to_cep(sk);
+ if (!cep)
+ goto out;
+
+ if (cep->state == ERDMA_EPSTATE_AWAIT_MPAREQ ||
+ cep->state == ERDMA_EPSTATE_AWAIT_MPAREP)
+ erdma_cm_queue_work(cep, ERDMA_CM_WORK_READ_MPAHDR);
+
+out:
+ read_unlock(&sk->sk_callback_lock);
+}
+
+static void erdma_cm_llp_error_report(struct sock *sk)
+{
+ struct erdma_cep *cep = sk_to_cep(sk);
+
+ if (cep)
+ cep->sk_error_report(sk);
+}
+
+static void erdma_cm_llp_state_change(struct sock *sk)
+{
+ struct erdma_cep *cep;
+ void (*orig_state_change)(struct sock *sk);
+
+ read_lock(&sk->sk_callback_lock);
+
+ cep = sk_to_cep(sk);
+ if (!cep) {
+ read_unlock(&sk->sk_callback_lock);
+ return;
+ }
+ orig_state_change = cep->sk_state_change;
+
+ switch (sk->sk_state) {
+ case TCP_ESTABLISHED:
+ if (cep->state == ERDMA_EPSTATE_CONNECTING)
+ erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
+ else
+ erdma_cm_queue_work(cep, ERDMA_CM_WORK_ACCEPT);
+ break;
+ case TCP_CLOSE:
+ case TCP_CLOSE_WAIT:
+ if (cep->state != ERDMA_EPSTATE_LISTENING)
+ erdma_cm_queue_work(cep, ERDMA_CM_WORK_PEER_CLOSE);
+ break;
+ default:
+ break;
+ }
+ read_unlock(&sk->sk_callback_lock);
+ orig_state_change(sk);
+}
+
+static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr,
+ int laddrlen, struct sockaddr *raddr,
+ int raddrlen, int flags)
+{
+ int ret;
+
+ sock_set_reuseaddr(s->sk);
+ ret = s->ops->bind(s, (struct sockaddr_unsized *)laddr, laddrlen);
+ if (ret)
+ return ret;
+ ret = s->ops->connect(s, (struct sockaddr_unsized *)raddr, raddrlen, flags);
+ return ret < 0 ? ret : 0;
+}
+
+int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
+{
+ struct erdma_dev *dev = to_edev(id->device);
+ struct erdma_qp *qp;
+ struct erdma_cep *cep = NULL;
+ struct socket *s = NULL;
+ struct sockaddr *laddr = (struct sockaddr *)&id->m_local_addr;
+ struct sockaddr *raddr = (struct sockaddr *)&id->m_remote_addr;
+ u16 pd_len = params->private_data_len;
+ int ret;
+
+ if (pd_len > MPA_MAX_PRIVDATA)
+ return -EINVAL;
+
+ if (params->ird > dev->attrs.max_ird ||
+ params->ord > dev->attrs.max_ord)
+ return -EINVAL;
+
+ if (laddr->sa_family != AF_INET || raddr->sa_family != AF_INET)
+ return -EAFNOSUPPORT;
+
+ qp = find_qp_by_qpn(dev, params->qpn);
+ if (!qp)
+ return -ENOENT;
+ erdma_qp_get(qp);
+
+ ret = sock_create(AF_INET, SOCK_STREAM, IPPROTO_TCP, &s);
+ if (ret < 0)
+ goto error_put_qp;
+
+ cep = erdma_cep_alloc(dev);
+ if (!cep) {
+ ret = -ENOMEM;
+ goto error_release_sock;
+ }
+
+ erdma_cep_set_inuse(cep);
+
+ /* Associate QP with CEP */
+ erdma_cep_get(cep);
+ qp->cep = cep;
+ cep->qp = qp;
+
+ /* Associate cm_id with CEP */
+ id->add_ref(id);
+ cep->cm_id = id;
+
+ /*
+ * 6: Allocate a sufficient number of work elements
+ * to allow concurrent handling of local + peer close
+ * events, MPA header processing + MPA timeout, connected event
+ * and connect timeout.
+ */
+ ret = erdma_cm_alloc_work(cep, 6);
+ if (ret != 0) {
+ ret = -ENOMEM;
+ goto error_release_cep;
+ }
+
+ cep->ird = params->ird;
+ cep->ord = params->ord;
+ cep->state = ERDMA_EPSTATE_CONNECTING;
+
+ erdma_cep_socket_assoc(cep, s);
+
+ if (pd_len) {
+ cep->pd_len = pd_len;
+ cep->private_data = kmalloc(pd_len, GFP_KERNEL);
+ if (!cep->private_data) {
+ ret = -ENOMEM;
+ goto error_disassoc;
+ }
+
+ memcpy(cep->private_data, params->private_data,
+ params->private_data_len);
+ }
+
+ ret = kernel_bindconnect(s, laddr, sizeof(*laddr), raddr,
+ sizeof(*raddr), O_NONBLOCK);
+ if (ret != -EINPROGRESS && ret != 0) {
+ goto error_disassoc;
+ } else if (ret == 0) {
+ ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTED);
+ if (ret)
+ goto error_disassoc;
+ } else {
+ ret = erdma_cm_queue_work(cep, ERDMA_CM_WORK_CONNECTTIMEOUT);
+ if (ret)
+ goto error_disassoc;
+ }
+
+ erdma_cep_set_free(cep);
+ return 0;
+
+error_disassoc:
+ kfree(cep->private_data);
+ cep->private_data = NULL;
+ cep->pd_len = 0;
+
+ erdma_socket_disassoc(s);
+
+error_release_cep:
+ /* disassoc with cm_id */
+ cep->cm_id = NULL;
+ id->rem_ref(id);
+
+ /* disassoc with qp */
+ qp->cep = NULL;
+ erdma_cep_put(cep);
+ cep->qp = NULL;
+
+ cep->state = ERDMA_EPSTATE_CLOSED;
+
+ erdma_cep_set_free(cep);
+
+ /* release the cep. */
+ erdma_cep_put(cep);
+
+error_release_sock:
+ if (s)
+ sock_release(s);
+error_put_qp:
+ erdma_qp_put(qp);
+
+ return ret;
+}
+
+int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
+{
+ struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
+ struct erdma_mod_qp_params_iwarp mod_qp_params;
+ enum erdma_qpa_mask_iwarp to_modify_attrs = 0;
+ struct erdma_dev *dev = to_edev(id->device);
+ struct erdma_qp *qp;
+ int ret;
+
+ erdma_cep_set_inuse(cep);
+ erdma_cep_put(cep);
+
+ /* Free lingering inbound private data */
+ if (cep->mpa.hdr.params.pd_len) {
+ cep->mpa.hdr.params.pd_len = 0;
+ kfree(cep->mpa.pdata);
+ cep->mpa.pdata = NULL;
+ }
+ erdma_cancel_mpatimer(cep);
+
+ if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+
+ return -ECONNRESET;
+ }
+
+ qp = find_qp_by_qpn(dev, params->qpn);
+ if (!qp)
+ return -ENOENT;
+ erdma_qp_get(qp);
+
+ down_write(&qp->state_lock);
+ if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) {
+ ret = -EINVAL;
+ up_write(&qp->state_lock);
+ goto error;
+ }
+
+ if (params->ord > dev->attrs.max_ord ||
+ params->ird > dev->attrs.max_ord) {
+ ret = -EINVAL;
+ up_write(&qp->state_lock);
+ goto error;
+ }
+
+ if (params->private_data_len > MPA_MAX_PRIVDATA) {
+ ret = -EINVAL;
+ up_write(&qp->state_lock);
+ goto error;
+ }
+
+ cep->ird = params->ird;
+ cep->ord = params->ord;
+
+ cep->cm_id = id;
+ id->add_ref(id);
+
+ memset(&mod_qp_params, 0, sizeof(mod_qp_params));
+
+ mod_qp_params.irq_size = params->ird;
+ mod_qp_params.orq_size = params->ord;
+ mod_qp_params.state = ERDMA_QPS_IWARP_RTS;
+
+ /* Associate QP with CEP */
+ erdma_cep_get(cep);
+ qp->cep = cep;
+ cep->qp = qp;
+
+ cep->state = ERDMA_EPSTATE_RDMA_MODE;
+
+ mod_qp_params.qp_type = ERDMA_QP_PASSIVE;
+ mod_qp_params.pd_len = params->private_data_len;
+
+ to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_ORD |
+ ERDMA_QPA_IWARP_LLP_HANDLE | ERDMA_QPA_IWARP_IRD |
+ ERDMA_QPA_IWARP_MPA;
+
+ if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits)) {
+ to_modify_attrs |= ERDMA_QPA_IWARP_CC;
+ mod_qp_params.cc = COMPROMISE_CC;
+ }
+
+ /* move to rts */
+ ret = erdma_modify_qp_state_iwarp(qp, &mod_qp_params, to_modify_attrs);
+
+ up_write(&qp->state_lock);
+
+ if (ret)
+ goto error;
+
+ cep->mpa.ext_data.bits = 0;
+ __mpa_ext_set_cc(&cep->mpa.ext_data.bits, qp->attrs.cc);
+ cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie);
+
+ ret = erdma_send_mpareqrep(cep, params->private_data,
+ params->private_data_len);
+ if (!ret) {
+ ret = erdma_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0);
+ if (ret)
+ goto error;
+
+ erdma_cep_set_free(cep);
+
+ return 0;
+ }
+
+error:
+ erdma_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+
+ cep->state = ERDMA_EPSTATE_CLOSED;
+
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(id);
+ cep->cm_id = NULL;
+ }
+
+ if (qp->cep) {
+ erdma_cep_put(cep);
+ qp->cep = NULL;
+ }
+
+ cep->qp = NULL;
+ erdma_qp_put(qp);
+
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+
+ return ret;
+}
+
+int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen)
+{
+ struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
+
+ erdma_cep_set_inuse(cep);
+ erdma_cep_put(cep);
+
+ erdma_cancel_mpatimer(cep);
+
+ if (cep->state != ERDMA_EPSTATE_RECVD_MPAREQ) {
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+
+ return -ECONNRESET;
+ }
+
+ if (__mpa_rr_revision(cep->mpa.hdr.params.bits) == MPA_REVISION_EXT_1) {
+ cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */
+ erdma_send_mpareqrep(cep, pdata, plen);
+ }
+
+ erdma_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+
+ cep->state = ERDMA_EPSTATE_CLOSED;
+
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+
+ return 0;
+}
+
+int erdma_create_listen(struct iw_cm_id *id, int backlog)
+{
+ struct socket *s;
+ struct erdma_cep *cep = NULL;
+ int ret = 0;
+ struct erdma_dev *dev = to_edev(id->device);
+ int addr_family = id->local_addr.ss_family;
+ struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
+
+ if (addr_family != AF_INET)
+ return -EAFNOSUPPORT;
+
+ ret = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s);
+ if (ret < 0)
+ return ret;
+
+ sock_set_reuseaddr(s->sk);
+
+ /* For wildcard addr, limit binding to current device only */
+ if (ipv4_is_zeronet(laddr->sin_addr.s_addr))
+ s->sk->sk_bound_dev_if = dev->netdev->ifindex;
+
+ ret = s->ops->bind(s, (struct sockaddr_unsized *)laddr,
+ sizeof(struct sockaddr_in));
+ if (ret)
+ goto error;
+
+ cep = erdma_cep_alloc(dev);
+ if (!cep) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ erdma_cep_socket_assoc(cep, s);
+
+ ret = erdma_cm_alloc_work(cep, backlog);
+ if (ret)
+ goto error;
+
+ ret = s->ops->listen(s, backlog);
+ if (ret)
+ goto error;
+
+ cep->cm_id = id;
+ id->add_ref(id);
+
+ if (!id->provider_data) {
+ id->provider_data =
+ kmalloc(sizeof(struct list_head), GFP_KERNEL);
+ if (!id->provider_data) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ INIT_LIST_HEAD((struct list_head *)id->provider_data);
+ }
+
+ list_add_tail(&cep->listenq, (struct list_head *)id->provider_data);
+ cep->state = ERDMA_EPSTATE_LISTENING;
+
+ return 0;
+
+error:
+ if (cep) {
+ erdma_cep_set_inuse(cep);
+
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ }
+ cep->sock = NULL;
+ erdma_socket_disassoc(s);
+ cep->state = ERDMA_EPSTATE_CLOSED;
+
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+ }
+ sock_release(s);
+
+ return ret;
+}
+
+static void erdma_drop_listeners(struct iw_cm_id *id)
+{
+ struct list_head *p, *tmp;
+ /*
+ * In case of a wildcard rdma_listen on a multi-homed device,
+ * a listener's IWCM id is associated with more than one listening CEP.
+ */
+ list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) {
+ struct erdma_cep *cep =
+ list_entry(p, struct erdma_cep, listenq);
+
+ list_del(p);
+
+ erdma_cep_set_inuse(cep);
+
+ if (cep->cm_id) {
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+ }
+ if (cep->sock) {
+ erdma_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+ }
+ cep->state = ERDMA_EPSTATE_CLOSED;
+ erdma_cep_set_free(cep);
+ erdma_cep_put(cep);
+ }
+}
+
+int erdma_destroy_listen(struct iw_cm_id *id)
+{
+ if (!id->provider_data)
+ return 0;
+
+ erdma_drop_listeners(id);
+ kfree(id->provider_data);
+ id->provider_data = NULL;
+
+ return 0;
+}
+
+int erdma_cm_init(void)
+{
+ erdma_cm_wq = create_singlethread_workqueue("erdma_cm_wq");
+ if (!erdma_cm_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void erdma_cm_exit(void)
+{
+ if (erdma_cm_wq)
+ destroy_workqueue(erdma_cm_wq);
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_cm.h b/drivers/infiniband/hw/erdma/erdma_cm.h
new file mode 100644
index 000000000000..a26d80770188
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_cm.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Greg Joyce <greg@opengridcomputing.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+/* Copyright (c) 2017, Open Grid Computing, Inc. */
+
+#ifndef __ERDMA_CM_H__
+#define __ERDMA_CM_H__
+
+#include <linux/tcp.h>
+#include <net/sock.h>
+#include <rdma/iw_cm.h>
+
+/* iWarp MPA protocol defs */
+#define MPA_REVISION_EXT_1 129
+#define MPA_MAX_PRIVDATA RDMA_MAX_PRIVATE_DATA
+#define MPA_KEY_REQ "MPA ID Req Frame"
+#define MPA_KEY_REP "MPA ID Rep Frame"
+#define MPA_KEY_SIZE 16
+#define MPA_DEFAULT_HDR_LEN 28
+
+struct mpa_rr_params {
+ __be16 bits;
+ __be16 pd_len;
+};
+
+/*
+ * MPA request/response Hdr bits & fields
+ */
+enum {
+ MPA_RR_FLAG_MARKERS = cpu_to_be16(0x8000),
+ MPA_RR_FLAG_CRC = cpu_to_be16(0x4000),
+ MPA_RR_FLAG_REJECT = cpu_to_be16(0x2000),
+ MPA_RR_RESERVED = cpu_to_be16(0x1f00),
+ MPA_RR_MASK_REVISION = cpu_to_be16(0x00ff)
+};
+
+/*
+ * MPA request/reply header
+ */
+struct mpa_rr {
+ u8 key[16];
+ struct mpa_rr_params params;
+};
+
+struct erdma_mpa_ext {
+ __be32 cookie;
+ __be32 bits;
+};
+
+enum {
+ MPA_EXT_FLAG_CC = cpu_to_be32(0x0000000f),
+};
+
+struct erdma_mpa_info {
+ struct mpa_rr hdr; /* peer mpa hdr in host byte order */
+ struct erdma_mpa_ext ext_data;
+ char *pdata;
+ int bytes_rcvd;
+};
+
+struct erdma_sk_upcalls {
+ void (*sk_state_change)(struct sock *sk);
+ void (*sk_data_ready)(struct sock *sk, int bytes);
+ void (*sk_error_report)(struct sock *sk);
+};
+
+struct erdma_dev;
+
+enum erdma_cep_state {
+ ERDMA_EPSTATE_IDLE = 1,
+ ERDMA_EPSTATE_LISTENING,
+ ERDMA_EPSTATE_CONNECTING,
+ ERDMA_EPSTATE_AWAIT_MPAREQ,
+ ERDMA_EPSTATE_RECVD_MPAREQ,
+ ERDMA_EPSTATE_AWAIT_MPAREP,
+ ERDMA_EPSTATE_RDMA_MODE,
+ ERDMA_EPSTATE_CLOSED
+};
+
+struct erdma_cep {
+ struct iw_cm_id *cm_id;
+ struct erdma_dev *dev;
+ struct list_head devq;
+ spinlock_t lock;
+ struct kref ref;
+ int in_use;
+ wait_queue_head_t waitq;
+ enum erdma_cep_state state;
+
+ struct list_head listenq;
+ struct erdma_cep *listen_cep;
+
+ struct erdma_qp *qp;
+ struct socket *sock;
+
+ struct erdma_cm_work *mpa_timer;
+ struct list_head work_freelist;
+
+ struct erdma_mpa_info mpa;
+ int ord;
+ int ird;
+
+ int pd_len;
+ /* hold user's private data. */
+ void *private_data;
+
+ /* Saved upcalls of socket llp.sock */
+ void (*sk_state_change)(struct sock *sk);
+ void (*sk_data_ready)(struct sock *sk);
+ void (*sk_error_report)(struct sock *sk);
+};
+
+#define MPAREQ_TIMEOUT (HZ * 20)
+#define MPAREP_TIMEOUT (HZ * 10)
+#define CONNECT_TIMEOUT (HZ * 10)
+
+enum erdma_work_type {
+ ERDMA_CM_WORK_ACCEPT = 1,
+ ERDMA_CM_WORK_READ_MPAHDR,
+ ERDMA_CM_WORK_CLOSE_LLP, /* close socket */
+ ERDMA_CM_WORK_PEER_CLOSE, /* socket indicated peer close */
+ ERDMA_CM_WORK_MPATIMEOUT,
+ ERDMA_CM_WORK_CONNECTED,
+ ERDMA_CM_WORK_CONNECTTIMEOUT
+};
+
+struct erdma_cm_work {
+ struct delayed_work work;
+ struct list_head list;
+ enum erdma_work_type type;
+ struct erdma_cep *cep;
+};
+
+#define to_sockaddr_in(a) (*(struct sockaddr_in *)(&(a)))
+
+static inline int getname_peer(struct socket *s, struct sockaddr_storage *a)
+{
+ return s->ops->getname(s, (struct sockaddr *)a, 1);
+}
+
+static inline int getname_local(struct socket *s, struct sockaddr_storage *a)
+{
+ return s->ops->getname(s, (struct sockaddr *)a, 0);
+}
+
+int erdma_connect(struct iw_cm_id *id, struct iw_cm_conn_param *param);
+int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *param);
+int erdma_reject(struct iw_cm_id *id, const void *pdata, u8 plen);
+int erdma_create_listen(struct iw_cm_id *id, int backlog);
+int erdma_destroy_listen(struct iw_cm_id *id);
+
+void erdma_cep_get(struct erdma_cep *ceq);
+void erdma_cep_put(struct erdma_cep *ceq);
+int erdma_cm_queue_work(struct erdma_cep *ceq, enum erdma_work_type type);
+
+int erdma_cm_init(void);
+void erdma_cm_exit(void);
+
+#define sk_to_cep(sk) ((struct erdma_cep *)((sk)->sk_user_data))
+
+#endif
diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c
new file mode 100644
index 000000000000..b867aefe83b2
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c
@@ -0,0 +1,452 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#include "erdma.h"
+
+static void arm_cmdq_cq(struct erdma_cmdq *cmdq)
+{
+ struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
+ u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) |
+ FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
+ FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) |
+ FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn);
+
+ *cmdq->cq.dbrec = db_data;
+ writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG);
+
+ atomic64_inc(&cmdq->cq.armed_num);
+}
+
+static void kick_cmdq_db(struct erdma_cmdq *cmdq)
+{
+ struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
+ u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi);
+
+ *cmdq->sq.dbrec = db_data;
+ writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG);
+}
+
+static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq)
+{
+ int comp_idx;
+
+ spin_lock(&cmdq->lock);
+ comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap,
+ cmdq->max_outstandings);
+ if (comp_idx == cmdq->max_outstandings) {
+ spin_unlock(&cmdq->lock);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ __set_bit(comp_idx, cmdq->comp_wait_bitmap);
+ spin_unlock(&cmdq->lock);
+
+ return &cmdq->wait_pool[comp_idx];
+}
+
+static void put_comp_wait(struct erdma_cmdq *cmdq,
+ struct erdma_comp_wait *comp_wait)
+{
+ int used;
+
+ cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT;
+ spin_lock(&cmdq->lock);
+ used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap);
+ spin_unlock(&cmdq->lock);
+
+ WARN_ON(!used);
+}
+
+static int erdma_cmdq_wait_res_init(struct erdma_dev *dev,
+ struct erdma_cmdq *cmdq)
+{
+ int i;
+
+ cmdq->wait_pool =
+ devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings,
+ sizeof(struct erdma_comp_wait), GFP_KERNEL);
+ if (!cmdq->wait_pool)
+ return -ENOMEM;
+
+ spin_lock_init(&cmdq->lock);
+ cmdq->comp_wait_bitmap = devm_bitmap_zalloc(
+ &dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL);
+ if (!cmdq->comp_wait_bitmap)
+ return -ENOMEM;
+
+ for (i = 0; i < cmdq->max_outstandings; i++) {
+ init_completion(&cmdq->wait_pool[i].wait_event);
+ cmdq->wait_pool[i].ctx_id = i;
+ }
+
+ return 0;
+}
+
+static int erdma_cmdq_sq_init(struct erdma_dev *dev)
+{
+ struct erdma_cmdq *cmdq = &dev->cmdq;
+ struct erdma_cmdq_sq *sq = &cmdq->sq;
+
+ sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE);
+ sq->depth = cmdq->max_outstandings * sq->wqebb_cnt;
+
+ sq->qbuf = dma_alloc_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT,
+ &sq->qbuf_dma_addr, GFP_KERNEL);
+ if (!sq->qbuf)
+ return -ENOMEM;
+
+ sq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &sq->dbrec_dma);
+ if (!sq->dbrec)
+ goto err_out;
+
+ spin_lock_init(&sq->lock);
+
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG,
+ upper_32_bits(sq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG,
+ lower_32_bits(sq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth);
+ erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG, sq->dbrec_dma);
+
+ return 0;
+
+err_out:
+ dma_free_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT,
+ sq->qbuf, sq->qbuf_dma_addr);
+
+ return -ENOMEM;
+}
+
+static int erdma_cmdq_cq_init(struct erdma_dev *dev)
+{
+ struct erdma_cmdq *cmdq = &dev->cmdq;
+ struct erdma_cmdq_cq *cq = &cmdq->cq;
+
+ cq->depth = cmdq->sq.depth;
+ cq->qbuf = dma_alloc_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
+ &cq->qbuf_dma_addr, GFP_KERNEL);
+ if (!cq->qbuf)
+ return -ENOMEM;
+
+ spin_lock_init(&cq->lock);
+
+ cq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &cq->dbrec_dma);
+ if (!cq->dbrec)
+ goto err_out;
+
+ atomic64_set(&cq->armed_num, 0);
+
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG,
+ upper_32_bits(cq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG,
+ lower_32_bits(cq->qbuf_dma_addr));
+ erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG, cq->dbrec_dma);
+
+ return 0;
+
+err_out:
+ dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT, cq->qbuf,
+ cq->qbuf_dma_addr);
+
+ return -ENOMEM;
+}
+
+static int erdma_cmdq_eq_init(struct erdma_dev *dev)
+{
+ struct erdma_cmdq *cmdq = &dev->cmdq;
+ struct erdma_eq *eq = &cmdq->eq;
+ int ret;
+
+ ret = erdma_eq_common_init(dev, eq, cmdq->max_outstandings);
+ if (ret)
+ return ret;
+
+ eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG;
+
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG,
+ upper_32_bits(eq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG,
+ lower_32_bits(eq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth);
+ erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG, eq->dbrec_dma);
+
+ return 0;
+}
+
+int erdma_cmdq_init(struct erdma_dev *dev)
+{
+ struct erdma_cmdq *cmdq = &dev->cmdq;
+ int err;
+
+ cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING;
+
+ sema_init(&cmdq->credits, cmdq->max_outstandings);
+
+ err = erdma_cmdq_wait_res_init(dev, cmdq);
+ if (err)
+ return err;
+
+ err = erdma_cmdq_sq_init(dev);
+ if (err)
+ return err;
+
+ err = erdma_cmdq_cq_init(dev);
+ if (err)
+ goto err_destroy_sq;
+
+ err = erdma_cmdq_eq_init(dev);
+ if (err)
+ goto err_destroy_cq;
+
+ set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
+
+ return 0;
+
+err_destroy_cq:
+ dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT,
+ cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
+
+ dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma);
+
+err_destroy_sq:
+ dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT,
+ cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
+
+ dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma);
+
+ return err;
+}
+
+void erdma_finish_cmdq_init(struct erdma_dev *dev)
+{
+ arm_cmdq_cq(&dev->cmdq);
+}
+
+void erdma_cmdq_destroy(struct erdma_dev *dev)
+{
+ struct erdma_cmdq *cmdq = &dev->cmdq;
+
+ clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
+
+ erdma_eq_destroy(dev, &cmdq->eq);
+
+ dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT,
+ cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
+
+ dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma);
+
+ dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT,
+ cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
+
+ dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma);
+}
+
+static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq)
+{
+ __be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci,
+ cmdq->cq.depth, CQE_SHIFT);
+ u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
+ be32_to_cpu(READ_ONCE(*cqe)));
+
+ return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL;
+}
+
+static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len,
+ struct erdma_comp_wait *comp_wait)
+{
+ __le64 *wqe;
+ u64 hdr = *req;
+
+ comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED;
+ reinit_completion(&comp_wait->wait_event);
+ comp_wait->sq_pi = cmdq->sq.pi;
+
+ wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth,
+ SQEBB_SHIFT);
+ memcpy(wqe, req, req_len);
+
+ cmdq->sq.pi += cmdq->sq.wqebb_cnt;
+ hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) |
+ FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK,
+ comp_wait->ctx_id) |
+ FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1);
+ *wqe = cpu_to_le64(hdr);
+
+ kick_cmdq_db(cmdq);
+}
+
+static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq)
+{
+ struct erdma_comp_wait *comp_wait;
+ u32 hdr0, sqe_idx;
+ __be32 *cqe;
+ u16 ctx_id;
+ u64 *sqe;
+
+ cqe = get_next_valid_cmdq_cqe(cmdq);
+ if (!cqe)
+ return -EAGAIN;
+
+ cmdq->cq.ci++;
+
+ dma_rmb();
+ hdr0 = be32_to_cpu(*cqe);
+ sqe_idx = be32_to_cpu(*(cqe + 1));
+
+ sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth,
+ SQEBB_SHIFT);
+ ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe);
+ comp_wait = &cmdq->wait_pool[ctx_id];
+ if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED)
+ return -EIO;
+
+ comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED;
+ comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0);
+ cmdq->sq.ci += cmdq->sq.wqebb_cnt;
+ /* Copy 16B comp data after cqe hdr to outer */
+ be32_to_cpu_array(comp_wait->comp_data, cqe + 2, 4);
+
+ complete(&comp_wait->wait_event);
+
+ return 0;
+}
+
+static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq)
+{
+ unsigned long flags;
+ u16 comp_num;
+
+ spin_lock_irqsave(&cmdq->cq.lock, flags);
+
+ /* We must have less than # of max_outstandings
+ * completions at one time.
+ */
+ for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++)
+ if (erdma_poll_single_cmd_completion(cmdq))
+ break;
+
+ spin_unlock_irqrestore(&cmdq->cq.lock, flags);
+}
+
+void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
+{
+ int got_event = 0;
+
+ if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
+ return;
+
+ while (get_next_valid_eqe(&cmdq->eq)) {
+ cmdq->eq.ci++;
+ got_event++;
+ }
+
+ if (got_event) {
+ cmdq->cq.cmdsn++;
+ erdma_polling_cmd_completions(cmdq);
+ arm_cmdq_cq(cmdq);
+ }
+
+ notify_eq(&cmdq->eq);
+}
+
+static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx,
+ struct erdma_cmdq *cmdq, u32 timeout)
+{
+ unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout);
+
+ while (1) {
+ erdma_polling_cmd_completions(cmdq);
+ if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED)
+ break;
+
+ if (time_is_before_jiffies(comp_timeout))
+ return -ETIME;
+
+ udelay(20);
+ }
+
+ return 0;
+}
+
+static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx,
+ struct erdma_cmdq *cmdq, u32 timeout)
+{
+ unsigned long flags = 0;
+
+ wait_for_completion_timeout(&comp_ctx->wait_event,
+ msecs_to_jiffies(timeout));
+
+ if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) {
+ spin_lock_irqsave(&cmdq->cq.lock, flags);
+ comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT;
+ spin_unlock_irqrestore(&cmdq->cq.lock, flags);
+ return -ETIME;
+ }
+
+ return 0;
+}
+
+void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op)
+{
+ *hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) |
+ FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op);
+}
+
+int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
+ u64 *resp0, u64 *resp1, bool sleepable)
+{
+ struct erdma_comp_wait *comp_wait;
+ int ret;
+
+ if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
+ return -ENODEV;
+
+ if (!sleepable) {
+ while (down_trylock(&cmdq->credits))
+ ;
+ } else {
+ down(&cmdq->credits);
+ }
+
+ comp_wait = get_comp_wait(cmdq);
+ if (IS_ERR(comp_wait)) {
+ clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
+ set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state);
+ up(&cmdq->credits);
+ return PTR_ERR(comp_wait);
+ }
+
+ spin_lock(&cmdq->sq.lock);
+ push_cmdq_sqe(cmdq, req, req_size, comp_wait);
+ spin_unlock(&cmdq->sq.lock);
+
+ if (sleepable)
+ ret = erdma_wait_cmd_completion(comp_wait, cmdq,
+ ERDMA_CMDQ_TIMEOUT_MS);
+ else
+ ret = erdma_poll_cmd_completion(comp_wait, cmdq,
+ ERDMA_CMDQ_TIMEOUT_MS);
+
+ if (ret) {
+ set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state);
+ clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
+ goto out;
+ }
+
+ if (comp_wait->comp_status)
+ ret = -EIO;
+
+ if (resp0 && resp1) {
+ *resp0 = *((u64 *)&comp_wait->comp_data[0]);
+ *resp1 = *((u64 *)&comp_wait->comp_data[2]);
+ }
+ put_comp_wait(cmdq, comp_wait);
+
+out:
+ up(&cmdq->credits);
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c
new file mode 100644
index 000000000000..1f456327e63c
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_cq.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#include "erdma_verbs.h"
+
+static void *get_next_valid_cqe(struct erdma_cq *cq)
+{
+ __be32 *cqe = get_queue_entry(cq->kern_cq.qbuf, cq->kern_cq.ci,
+ cq->depth, CQE_SHIFT);
+ u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
+ be32_to_cpu(READ_ONCE(*cqe)));
+
+ return owner ^ !!(cq->kern_cq.ci & cq->depth) ? cqe : NULL;
+}
+
+static void notify_cq(struct erdma_cq *cq, u8 solcitied)
+{
+ u64 db_data =
+ FIELD_PREP(ERDMA_CQDB_IDX_MASK, (cq->kern_cq.notify_cnt)) |
+ FIELD_PREP(ERDMA_CQDB_CQN_MASK, cq->cqn) |
+ FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
+ FIELD_PREP(ERDMA_CQDB_SOL_MASK, solcitied) |
+ FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cq->kern_cq.cmdsn) |
+ FIELD_PREP(ERDMA_CQDB_CI_MASK, cq->kern_cq.ci);
+
+ *cq->kern_cq.dbrec = db_data;
+ writeq(db_data, cq->kern_cq.db);
+}
+
+int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct erdma_cq *cq = to_ecq(ibcq);
+ unsigned long irq_flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&cq->kern_cq.lock, irq_flags);
+
+ notify_cq(cq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
+
+ if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && get_next_valid_cqe(cq))
+ ret = 1;
+
+ cq->kern_cq.notify_cnt++;
+
+ spin_unlock_irqrestore(&cq->kern_cq.lock, irq_flags);
+
+ return ret;
+}
+
+static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = {
+ [ERDMA_OP_WRITE] = IB_WC_RDMA_WRITE,
+ [ERDMA_OP_READ] = IB_WC_RDMA_READ,
+ [ERDMA_OP_SEND] = IB_WC_SEND,
+ [ERDMA_OP_SEND_WITH_IMM] = IB_WC_SEND,
+ [ERDMA_OP_RECEIVE] = IB_WC_RECV,
+ [ERDMA_OP_RECV_IMM] = IB_WC_RECV_RDMA_WITH_IMM,
+ [ERDMA_OP_RECV_INV] = IB_WC_RECV,
+ [ERDMA_OP_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [ERDMA_OP_RSP_SEND_IMM] = IB_WC_RECV,
+ [ERDMA_OP_SEND_WITH_INV] = IB_WC_SEND,
+ [ERDMA_OP_REG_MR] = IB_WC_REG_MR,
+ [ERDMA_OP_LOCAL_INV] = IB_WC_LOCAL_INV,
+ [ERDMA_OP_READ_WITH_INV] = IB_WC_RDMA_READ,
+ [ERDMA_OP_ATOMIC_CAS] = IB_WC_COMP_SWAP,
+ [ERDMA_OP_ATOMIC_FAA] = IB_WC_FETCH_ADD,
+};
+
+static const struct {
+ enum erdma_wc_status erdma;
+ enum ib_wc_status base;
+ enum erdma_vendor_err vendor;
+} map_cqe_status[ERDMA_NUM_WC_STATUS] = {
+ { ERDMA_WC_SUCCESS, IB_WC_SUCCESS, ERDMA_WC_VENDOR_NO_ERR },
+ { ERDMA_WC_GENERAL_ERR, IB_WC_GENERAL_ERR, ERDMA_WC_VENDOR_NO_ERR },
+ { ERDMA_WC_RECV_WQE_FORMAT_ERR, IB_WC_GENERAL_ERR,
+ ERDMA_WC_VENDOR_INVALID_RQE },
+ { ERDMA_WC_RECV_STAG_INVALID_ERR, IB_WC_REM_ACCESS_ERR,
+ ERDMA_WC_VENDOR_RQE_INVALID_STAG },
+ { ERDMA_WC_RECV_ADDR_VIOLATION_ERR, IB_WC_REM_ACCESS_ERR,
+ ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION },
+ { ERDMA_WC_RECV_RIGHT_VIOLATION_ERR, IB_WC_REM_ACCESS_ERR,
+ ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR },
+ { ERDMA_WC_RECV_PDID_ERR, IB_WC_REM_ACCESS_ERR,
+ ERDMA_WC_VENDOR_RQE_INVALID_PD },
+ { ERDMA_WC_RECV_WARRPING_ERR, IB_WC_REM_ACCESS_ERR,
+ ERDMA_WC_VENDOR_RQE_WRAP_ERR },
+ { ERDMA_WC_SEND_WQE_FORMAT_ERR, IB_WC_LOC_QP_OP_ERR,
+ ERDMA_WC_VENDOR_INVALID_SQE },
+ { ERDMA_WC_SEND_WQE_ORD_EXCEED, IB_WC_GENERAL_ERR,
+ ERDMA_WC_VENDOR_ZERO_ORD },
+ { ERDMA_WC_SEND_STAG_INVALID_ERR, IB_WC_LOC_ACCESS_ERR,
+ ERDMA_WC_VENDOR_SQE_INVALID_STAG },
+ { ERDMA_WC_SEND_ADDR_VIOLATION_ERR, IB_WC_LOC_ACCESS_ERR,
+ ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION },
+ { ERDMA_WC_SEND_RIGHT_VIOLATION_ERR, IB_WC_LOC_ACCESS_ERR,
+ ERDMA_WC_VENDOR_SQE_ACCESS_ERR },
+ { ERDMA_WC_SEND_PDID_ERR, IB_WC_LOC_ACCESS_ERR,
+ ERDMA_WC_VENDOR_SQE_INVALID_PD },
+ { ERDMA_WC_SEND_WARRPING_ERR, IB_WC_LOC_ACCESS_ERR,
+ ERDMA_WC_VENDOR_SQE_WARP_ERR },
+ { ERDMA_WC_FLUSH_ERR, IB_WC_WR_FLUSH_ERR, ERDMA_WC_VENDOR_NO_ERR },
+ { ERDMA_WC_RETRY_EXC_ERR, IB_WC_RETRY_EXC_ERR, ERDMA_WC_VENDOR_NO_ERR },
+};
+
+static void erdma_process_ud_cqe(struct erdma_cqe *cqe, struct ib_wc *wc)
+{
+ u32 ud_info;
+
+ wc->wc_flags |= (IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE);
+ ud_info = be32_to_cpu(cqe->ud.info);
+ wc->network_hdr_type = FIELD_GET(ERDMA_CQE_NTYPE_MASK, ud_info);
+ if (wc->network_hdr_type == ERDMA_NETWORK_TYPE_IPV4)
+ wc->network_hdr_type = RDMA_NETWORK_IPV4;
+ else
+ wc->network_hdr_type = RDMA_NETWORK_IPV6;
+ wc->src_qp = FIELD_GET(ERDMA_CQE_SQPN_MASK, ud_info);
+ wc->sl = FIELD_GET(ERDMA_CQE_SL_MASK, ud_info);
+ wc->pkey_index = 0;
+}
+
+#define ERDMA_POLLCQ_NO_QP 1
+
+static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc)
+{
+ struct erdma_dev *dev = to_edev(cq->ibcq.device);
+ u8 opcode, syndrome, qtype;
+ struct erdma_kqp *kern_qp;
+ struct erdma_cqe *cqe;
+ struct erdma_qp *qp;
+ u16 wqe_idx, depth;
+ u32 qpn, cqe_hdr;
+ u64 *id_table;
+ u64 *wqe_hdr;
+
+ cqe = get_next_valid_cqe(cq);
+ if (!cqe)
+ return -EAGAIN;
+
+ cq->kern_cq.ci++;
+
+ /* cqbuf should be ready when we poll */
+ dma_rmb();
+
+ qpn = be32_to_cpu(cqe->qpn);
+ wqe_idx = be32_to_cpu(cqe->qe_idx);
+ cqe_hdr = be32_to_cpu(cqe->hdr);
+
+ qp = find_qp_by_qpn(dev, qpn);
+ if (!qp)
+ return ERDMA_POLLCQ_NO_QP;
+
+ kern_qp = &qp->kern_qp;
+
+ qtype = FIELD_GET(ERDMA_CQE_HDR_QTYPE_MASK, cqe_hdr);
+ syndrome = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, cqe_hdr);
+ opcode = FIELD_GET(ERDMA_CQE_HDR_OPCODE_MASK, cqe_hdr);
+
+ if (qtype == ERDMA_CQE_QTYPE_SQ) {
+ id_table = kern_qp->swr_tbl;
+ depth = qp->attrs.sq_size;
+ wqe_hdr = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
+ qp->attrs.sq_size, SQEBB_SHIFT);
+ kern_qp->sq_ci =
+ FIELD_GET(ERDMA_SQE_HDR_WQEBB_CNT_MASK, *wqe_hdr) +
+ wqe_idx + 1;
+ } else {
+ id_table = kern_qp->rwr_tbl;
+ depth = qp->attrs.rq_size;
+ }
+ wc->wr_id = id_table[wqe_idx & (depth - 1)];
+ wc->byte_len = be32_to_cpu(cqe->size);
+
+ wc->wc_flags = 0;
+
+ wc->opcode = wc_mapping_table[opcode];
+ if (opcode == ERDMA_OP_RECV_IMM || opcode == ERDMA_OP_RSP_SEND_IMM) {
+ wc->ex.imm_data = cpu_to_be32(le32_to_cpu(cqe->imm_data));
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ } else if (opcode == ERDMA_OP_RECV_INV) {
+ wc->ex.invalidate_rkey = be32_to_cpu(cqe->inv_rkey);
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ }
+
+ if (erdma_device_rocev2(dev) &&
+ (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_GSI))
+ erdma_process_ud_cqe(cqe, wc);
+
+ if (syndrome >= ERDMA_NUM_WC_STATUS)
+ syndrome = ERDMA_WC_GENERAL_ERR;
+
+ wc->status = map_cqe_status[syndrome].base;
+ wc->vendor_err = map_cqe_status[syndrome].vendor;
+ wc->qp = &qp->ibqp;
+
+ return 0;
+}
+
+int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct erdma_cq *cq = to_ecq(ibcq);
+ unsigned long flags;
+ int npolled, ret;
+
+ spin_lock_irqsave(&cq->kern_cq.lock, flags);
+
+ for (npolled = 0; npolled < num_entries;) {
+ ret = erdma_poll_one_cqe(cq, wc + npolled);
+
+ if (ret == -EAGAIN) /* no received new CQEs. */
+ break;
+ else if (ret) /* ignore invalid CQEs. */
+ continue;
+
+ npolled++;
+ }
+
+ spin_unlock_irqrestore(&cq->kern_cq.lock, flags);
+
+ return npolled;
+}
+
+void erdma_remove_cqes_of_qp(struct ib_cq *ibcq, u32 qpn)
+{
+ struct erdma_cq *cq = to_ecq(ibcq);
+ struct erdma_cqe *cqe, *dst_cqe;
+ u32 prev_cq_ci, cur_cq_ci;
+ u32 ncqe = 0, nqp_cqe = 0;
+ unsigned long flags;
+ u8 owner;
+
+ spin_lock_irqsave(&cq->kern_cq.lock, flags);
+
+ prev_cq_ci = cq->kern_cq.ci;
+
+ while (ncqe < cq->depth && (cqe = get_next_valid_cqe(cq)) != NULL) {
+ ++cq->kern_cq.ci;
+ ++ncqe;
+ }
+
+ while (ncqe > 0) {
+ cur_cq_ci = prev_cq_ci + ncqe - 1;
+ cqe = get_queue_entry(cq->kern_cq.qbuf, cur_cq_ci, cq->depth,
+ CQE_SHIFT);
+
+ if (be32_to_cpu(cqe->qpn) == qpn) {
+ ++nqp_cqe;
+ } else if (nqp_cqe) {
+ dst_cqe = get_queue_entry(cq->kern_cq.qbuf,
+ cur_cq_ci + nqp_cqe,
+ cq->depth, CQE_SHIFT);
+ owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
+ be32_to_cpu(dst_cqe->hdr));
+ cqe->hdr = cpu_to_be32(
+ (be32_to_cpu(cqe->hdr) &
+ ~ERDMA_CQE_HDR_OWNER_MASK) |
+ FIELD_PREP(ERDMA_CQE_HDR_OWNER_MASK, owner));
+ memcpy(dst_cqe, cqe, sizeof(*cqe));
+ }
+
+ --ncqe;
+ }
+
+ cq->kern_cq.ci = prev_cq_ci + nqp_cqe;
+ spin_unlock_irqrestore(&cq->kern_cq.lock, flags);
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c
new file mode 100644
index 000000000000..6486234a2360
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_eq.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#include "erdma_verbs.h"
+
+#define MAX_POLL_CHUNK_SIZE 16
+
+void notify_eq(struct erdma_eq *eq)
+{
+ u64 db_data = FIELD_PREP(ERDMA_EQDB_CI_MASK, eq->ci) |
+ FIELD_PREP(ERDMA_EQDB_ARM_MASK, 1);
+
+ *eq->dbrec = db_data;
+ writeq(db_data, eq->db);
+
+ atomic64_inc(&eq->notify_num);
+}
+
+void *get_next_valid_eqe(struct erdma_eq *eq)
+{
+ u64 *eqe = get_queue_entry(eq->qbuf, eq->ci, eq->depth, EQE_SHIFT);
+ u32 owner = FIELD_GET(ERDMA_CEQE_HDR_O_MASK, READ_ONCE(*eqe));
+
+ return owner ^ !!(eq->ci & eq->depth) ? eqe : NULL;
+}
+
+void erdma_aeq_event_handler(struct erdma_dev *dev)
+{
+ struct erdma_aeqe *aeqe;
+ u32 cqn, qpn;
+ struct erdma_qp *qp;
+ struct erdma_cq *cq;
+ struct ib_event event;
+ u32 poll_cnt = 0;
+
+ memset(&event, 0, sizeof(event));
+
+ while (poll_cnt < MAX_POLL_CHUNK_SIZE) {
+ aeqe = get_next_valid_eqe(&dev->aeq);
+ if (!aeqe)
+ break;
+
+ dma_rmb();
+
+ dev->aeq.ci++;
+ atomic64_inc(&dev->aeq.event_num);
+ poll_cnt++;
+
+ if (FIELD_GET(ERDMA_AEQE_HDR_TYPE_MASK,
+ le32_to_cpu(aeqe->hdr)) == ERDMA_AE_TYPE_CQ_ERR) {
+ cqn = le32_to_cpu(aeqe->event_data0);
+ cq = find_cq_by_cqn(dev, cqn);
+ if (!cq)
+ continue;
+
+ event.device = cq->ibcq.device;
+ event.element.cq = &cq->ibcq;
+ event.event = IB_EVENT_CQ_ERR;
+ if (cq->ibcq.event_handler)
+ cq->ibcq.event_handler(&event,
+ cq->ibcq.cq_context);
+ } else {
+ qpn = le32_to_cpu(aeqe->event_data0);
+ qp = find_qp_by_qpn(dev, qpn);
+ if (!qp)
+ continue;
+
+ event.device = qp->ibqp.device;
+ event.element.qp = &qp->ibqp;
+ event.event = IB_EVENT_QP_FATAL;
+ if (qp->ibqp.event_handler)
+ qp->ibqp.event_handler(&event,
+ qp->ibqp.qp_context);
+ }
+ }
+
+ notify_eq(&dev->aeq);
+}
+
+int erdma_eq_common_init(struct erdma_dev *dev, struct erdma_eq *eq, u32 depth)
+{
+ u32 buf_size = depth << EQE_SHIFT;
+
+ eq->qbuf = dma_alloc_coherent(&dev->pdev->dev, buf_size,
+ &eq->qbuf_dma_addr, GFP_KERNEL);
+ if (!eq->qbuf)
+ return -ENOMEM;
+
+ eq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &eq->dbrec_dma);
+ if (!eq->dbrec)
+ goto err_free_qbuf;
+
+ spin_lock_init(&eq->lock);
+ atomic64_set(&eq->event_num, 0);
+ atomic64_set(&eq->notify_num, 0);
+ eq->ci = 0;
+ eq->depth = depth;
+
+ return 0;
+
+err_free_qbuf:
+ dma_free_coherent(&dev->pdev->dev, buf_size, eq->qbuf,
+ eq->qbuf_dma_addr);
+
+ return -ENOMEM;
+}
+
+void erdma_eq_destroy(struct erdma_dev *dev, struct erdma_eq *eq)
+{
+ dma_pool_free(dev->db_pool, eq->dbrec, eq->dbrec_dma);
+ dma_free_coherent(&dev->pdev->dev, eq->depth << EQE_SHIFT, eq->qbuf,
+ eq->qbuf_dma_addr);
+}
+
+int erdma_aeq_init(struct erdma_dev *dev)
+{
+ struct erdma_eq *eq = &dev->aeq;
+ int ret;
+
+ ret = erdma_eq_common_init(dev, &dev->aeq, ERDMA_DEFAULT_EQ_DEPTH);
+ if (ret)
+ return ret;
+
+ eq->db = dev->func_bar + ERDMA_REGS_AEQ_DB_REG;
+
+ erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_H_REG,
+ upper_32_bits(eq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_L_REG,
+ lower_32_bits(eq->qbuf_dma_addr));
+ erdma_reg_write32(dev, ERDMA_REGS_AEQ_DEPTH_REG, eq->depth);
+ erdma_reg_write64(dev, ERDMA_AEQ_DB_HOST_ADDR_REG, eq->dbrec_dma);
+
+ return 0;
+}
+
+void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb)
+{
+ struct erdma_dev *dev = ceq_cb->dev;
+ struct erdma_cq *cq;
+ u32 poll_cnt = 0;
+ u64 *ceqe;
+ int cqn;
+
+ if (!ceq_cb->ready)
+ return;
+
+ while (poll_cnt < MAX_POLL_CHUNK_SIZE) {
+ ceqe = get_next_valid_eqe(&ceq_cb->eq);
+ if (!ceqe)
+ break;
+
+ dma_rmb();
+ ceq_cb->eq.ci++;
+ poll_cnt++;
+ cqn = FIELD_GET(ERDMA_CEQE_HDR_CQN_MASK, READ_ONCE(*ceqe));
+
+ cq = find_cq_by_cqn(dev, cqn);
+ if (!cq)
+ continue;
+
+ if (rdma_is_kernel_res(&cq->ibcq.res))
+ cq->kern_cq.cmdsn++;
+
+ if (cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+ }
+
+ notify_eq(&ceq_cb->eq);
+}
+
+static irqreturn_t erdma_intr_ceq_handler(int irq, void *data)
+{
+ struct erdma_eq_cb *ceq_cb = data;
+
+ tasklet_schedule(&ceq_cb->tasklet);
+
+ return IRQ_HANDLED;
+}
+
+static void erdma_intr_ceq_task(unsigned long data)
+{
+ erdma_ceq_completion_handler((struct erdma_eq_cb *)data);
+}
+
+static int erdma_set_ceq_irq(struct erdma_dev *dev, u16 ceqn)
+{
+ struct erdma_eq_cb *eqc = &dev->ceqs[ceqn];
+ int err;
+
+ snprintf(eqc->irq.name, ERDMA_IRQNAME_SIZE, "erdma-ceq%u@pci:%s", ceqn,
+ pci_name(dev->pdev));
+ eqc->irq.msix_vector = pci_irq_vector(dev->pdev, ceqn + 1);
+
+ tasklet_init(&dev->ceqs[ceqn].tasklet, erdma_intr_ceq_task,
+ (unsigned long)&dev->ceqs[ceqn]);
+
+ cpumask_set_cpu(cpumask_local_spread(ceqn + 1, dev->attrs.numa_node),
+ &eqc->irq.affinity_hint_mask);
+
+ err = request_irq(eqc->irq.msix_vector, erdma_intr_ceq_handler, 0,
+ eqc->irq.name, eqc);
+ if (err) {
+ dev_err(&dev->pdev->dev, "failed to request_irq(%d)\n", err);
+ return err;
+ }
+
+ irq_set_affinity_hint(eqc->irq.msix_vector,
+ &eqc->irq.affinity_hint_mask);
+
+ return 0;
+}
+
+static void erdma_free_ceq_irq(struct erdma_dev *dev, u16 ceqn)
+{
+ struct erdma_eq_cb *eqc = &dev->ceqs[ceqn];
+
+ irq_set_affinity_hint(eqc->irq.msix_vector, NULL);
+ free_irq(eqc->irq.msix_vector, eqc);
+}
+
+static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq)
+{
+ struct erdma_cmdq_create_eq_req req;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_CREATE_EQ);
+ req.eqn = eqn;
+ req.depth = ilog2(eq->depth);
+ req.qbuf_addr = eq->qbuf_dma_addr;
+ req.qtype = ERDMA_EQ_TYPE_CEQ;
+ /* Vector index is the same as EQN. */
+ req.vector_idx = eqn;
+ req.db_dma_addr_l = lower_32_bits(eq->dbrec_dma);
+ req.db_dma_addr_h = upper_32_bits(eq->dbrec_dma);
+
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ false);
+}
+
+static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn)
+{
+ struct erdma_eq *eq = &dev->ceqs[ceqn].eq;
+ int ret;
+
+ ret = erdma_eq_common_init(dev, eq, ERDMA_DEFAULT_EQ_DEPTH);
+ if (ret)
+ return ret;
+
+ eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG +
+ (ceqn + 1) * ERDMA_DB_SIZE;
+ dev->ceqs[ceqn].dev = dev;
+ dev->ceqs[ceqn].ready = true;
+
+ /* CEQ indexed from 1, 0 rsvd for CMDQ-EQ. */
+ ret = create_eq_cmd(dev, ceqn + 1, eq);
+ if (ret) {
+ erdma_eq_destroy(dev, eq);
+ dev->ceqs[ceqn].ready = false;
+ }
+
+ return ret;
+}
+
+static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn)
+{
+ struct erdma_eq *eq = &dev->ceqs[ceqn].eq;
+ struct erdma_cmdq_destroy_eq_req req;
+ int err;
+
+ dev->ceqs[ceqn].ready = 0;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_DESTROY_EQ);
+ /* CEQ indexed from 1, 0 rsvd for CMDQ-EQ. */
+ req.eqn = ceqn + 1;
+ req.qtype = ERDMA_EQ_TYPE_CEQ;
+ req.vector_idx = ceqn + 1;
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ false);
+ if (err)
+ return;
+
+ erdma_eq_destroy(dev, eq);
+}
+
+int erdma_ceqs_init(struct erdma_dev *dev)
+{
+ u32 i, j;
+ int err;
+
+ for (i = 0; i < dev->attrs.irq_num - 1; i++) {
+ err = erdma_ceq_init_one(dev, i);
+ if (err)
+ goto out_err;
+
+ err = erdma_set_ceq_irq(dev, i);
+ if (err) {
+ erdma_ceq_uninit_one(dev, i);
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ for (j = 0; j < i; j++) {
+ erdma_free_ceq_irq(dev, j);
+ erdma_ceq_uninit_one(dev, j);
+ }
+
+ return err;
+}
+
+void erdma_ceqs_uninit(struct erdma_dev *dev)
+{
+ u32 i;
+
+ for (i = 0; i < dev->attrs.irq_num - 1; i++) {
+ erdma_free_ceq_irq(dev, i);
+ erdma_ceq_uninit_one(dev, i);
+ }
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
new file mode 100644
index 000000000000..ea4db53901a4
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -0,0 +1,753 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#ifndef __ERDMA_HW_H__
+#define __ERDMA_HW_H__
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+/* PCIe device related definition. */
+#define ERDMA_PCI_WIDTH 64
+#define ERDMA_FUNC_BAR 0
+#define ERDMA_MISX_BAR 2
+
+#define ERDMA_BAR_MASK (BIT(ERDMA_FUNC_BAR) | BIT(ERDMA_MISX_BAR))
+
+/* MSI-X related. */
+#define ERDMA_NUM_MSIX_VEC 32U
+#define ERDMA_MSIX_VECTOR_CMDQ 0
+
+/* RoCEv2 related */
+#define ERDMA_ROCEV2_GID_SIZE 16
+#define ERDMA_MAX_PKEYS 1
+#define ERDMA_DEFAULT_PKEY 0xFFFF
+
+/* erdma device protocol type */
+enum erdma_proto_type {
+ ERDMA_PROTO_IWARP = 0,
+ ERDMA_PROTO_ROCEV2 = 1,
+ ERDMA_PROTO_COUNT = 2,
+};
+
+/* PCIe Bar0 Registers. */
+#define ERDMA_REGS_VERSION_REG 0x0
+#define ERDMA_REGS_DEV_PROTO_REG 0xC
+#define ERDMA_REGS_DEV_CTRL_REG 0x10
+#define ERDMA_REGS_DEV_ST_REG 0x14
+#define ERDMA_REGS_NETDEV_MAC_L_REG 0x18
+#define ERDMA_REGS_NETDEV_MAC_H_REG 0x1C
+#define ERDMA_REGS_CMDQ_SQ_ADDR_L_REG 0x20
+#define ERDMA_REGS_CMDQ_SQ_ADDR_H_REG 0x24
+#define ERDMA_REGS_CMDQ_CQ_ADDR_L_REG 0x28
+#define ERDMA_REGS_CMDQ_CQ_ADDR_H_REG 0x2C
+#define ERDMA_REGS_CMDQ_DEPTH_REG 0x30
+#define ERDMA_REGS_CMDQ_EQ_DEPTH_REG 0x34
+#define ERDMA_REGS_CMDQ_EQ_ADDR_L_REG 0x38
+#define ERDMA_REGS_CMDQ_EQ_ADDR_H_REG 0x3C
+#define ERDMA_REGS_AEQ_ADDR_L_REG 0x40
+#define ERDMA_REGS_AEQ_ADDR_H_REG 0x44
+#define ERDMA_REGS_AEQ_DEPTH_REG 0x48
+#define ERDMA_REGS_GRP_NUM_REG 0x4c
+#define ERDMA_REGS_AEQ_DB_REG 0x50
+#define ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG 0x60
+#define ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG 0x68
+#define ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG 0x70
+#define ERDMA_AEQ_DB_HOST_ADDR_REG 0x78
+#define ERDMA_REGS_STATS_TSO_IN_PKTS_REG 0x80
+#define ERDMA_REGS_STATS_TSO_OUT_PKTS_REG 0x88
+#define ERDMA_REGS_STATS_TSO_OUT_BYTES_REG 0x90
+#define ERDMA_REGS_STATS_TX_DROP_PKTS_REG 0x98
+#define ERDMA_REGS_STATS_TX_BPS_METER_DROP_PKTS_REG 0xa0
+#define ERDMA_REGS_STATS_TX_PPS_METER_DROP_PKTS_REG 0xa8
+#define ERDMA_REGS_STATS_RX_PKTS_REG 0xc0
+#define ERDMA_REGS_STATS_RX_BYTES_REG 0xc8
+#define ERDMA_REGS_STATS_RX_DROP_PKTS_REG 0xd0
+#define ERDMA_REGS_STATS_RX_BPS_METER_DROP_PKTS_REG 0xd8
+#define ERDMA_REGS_STATS_RX_PPS_METER_DROP_PKTS_REG 0xe0
+#define ERDMA_REGS_CEQ_DB_BASE_REG 0x100
+#define ERDMA_CMDQ_SQDB_REG 0x200
+#define ERDMA_CMDQ_CQDB_REG 0x300
+
+/* DEV_CTRL_REG details. */
+#define ERDMA_REG_DEV_CTRL_RESET_MASK 0x00000001
+#define ERDMA_REG_DEV_CTRL_INIT_MASK 0x00000002
+
+/* DEV_ST_REG details. */
+#define ERDMA_REG_DEV_ST_RESET_DONE_MASK 0x00000001U
+#define ERDMA_REG_DEV_ST_INIT_DONE_MASK 0x00000002U
+
+/* eRDMA PCIe DBs definition. */
+#define ERDMA_BAR_DB_SPACE_BASE 4096
+
+#define ERDMA_BAR_SQDB_SPACE_OFFSET ERDMA_BAR_DB_SPACE_BASE
+#define ERDMA_BAR_SQDB_SPACE_SIZE (384 * 1024)
+
+#define ERDMA_BAR_RQDB_SPACE_OFFSET \
+ (ERDMA_BAR_SQDB_SPACE_OFFSET + ERDMA_BAR_SQDB_SPACE_SIZE)
+#define ERDMA_BAR_RQDB_SPACE_SIZE (96 * 1024)
+
+#define ERDMA_BAR_CQDB_SPACE_OFFSET \
+ (ERDMA_BAR_RQDB_SPACE_OFFSET + ERDMA_BAR_RQDB_SPACE_SIZE)
+
+#define ERDMA_SDB_SHARED_PAGE_INDEX 95
+
+/* Doorbell related. */
+#define ERDMA_DB_SIZE 8
+
+#define ERDMA_CQDB_IDX_MASK GENMASK_ULL(63, 56)
+#define ERDMA_CQDB_CQN_MASK GENMASK_ULL(55, 32)
+#define ERDMA_CQDB_ARM_MASK BIT_ULL(31)
+#define ERDMA_CQDB_SOL_MASK BIT_ULL(30)
+#define ERDMA_CQDB_CMDSN_MASK GENMASK_ULL(29, 28)
+#define ERDMA_CQDB_CI_MASK GENMASK_ULL(23, 0)
+
+#define ERDMA_EQDB_ARM_MASK BIT(31)
+#define ERDMA_EQDB_CI_MASK GENMASK_ULL(23, 0)
+
+#define ERDMA_PAGE_SIZE_SUPPORT 0x7FFFF000
+
+/* Hardware page size definition */
+#define ERDMA_HW_PAGE_SHIFT 12
+#define ERDMA_HW_PAGE_SIZE 4096
+
+/* WQE related. */
+#define EQE_SIZE 16
+#define EQE_SHIFT 4
+#define RQE_SIZE 32
+#define RQE_SHIFT 5
+#define CQE_SIZE 32
+#define CQE_SHIFT 5
+#define SQEBB_SIZE 32
+#define SQEBB_SHIFT 5
+#define SQEBB_MASK (~(SQEBB_SIZE - 1))
+#define SQEBB_ALIGN(size) ((size + SQEBB_SIZE - 1) & SQEBB_MASK)
+#define SQEBB_COUNT(size) (SQEBB_ALIGN(size) >> SQEBB_SHIFT)
+
+#define ERDMA_MAX_SQE_SIZE 128
+#define ERDMA_MAX_WQEBB_PER_SQE 4
+
+/* CMDQ related. */
+#define ERDMA_CMDQ_MAX_OUTSTANDING 128
+#define ERDMA_CMDQ_SQE_SIZE 128
+
+/* cmdq sub module definition. */
+enum CMDQ_WQE_SUB_MOD {
+ CMDQ_SUBMOD_RDMA = 0,
+ CMDQ_SUBMOD_COMMON = 1
+};
+
+enum CMDQ_RDMA_OPCODE {
+ CMDQ_OPCODE_QUERY_DEVICE = 0,
+ CMDQ_OPCODE_CREATE_QP = 1,
+ CMDQ_OPCODE_DESTROY_QP = 2,
+ CMDQ_OPCODE_MODIFY_QP = 3,
+ CMDQ_OPCODE_CREATE_CQ = 4,
+ CMDQ_OPCODE_DESTROY_CQ = 5,
+ CMDQ_OPCODE_REFLUSH = 6,
+ CMDQ_OPCODE_REG_MR = 8,
+ CMDQ_OPCODE_DEREG_MR = 9,
+ CMDQ_OPCODE_SET_GID = 14,
+ CMDQ_OPCODE_CREATE_AH = 15,
+ CMDQ_OPCODE_DESTROY_AH = 16,
+ CMDQ_OPCODE_QUERY_QP = 17,
+};
+
+enum CMDQ_COMMON_OPCODE {
+ CMDQ_OPCODE_CREATE_EQ = 0,
+ CMDQ_OPCODE_DESTROY_EQ = 1,
+ CMDQ_OPCODE_QUERY_FW_INFO = 2,
+ CMDQ_OPCODE_CONF_MTU = 3,
+ CMDQ_OPCODE_GET_STATS = 4,
+ CMDQ_OPCODE_CONF_DEVICE = 5,
+ CMDQ_OPCODE_ALLOC_DB = 8,
+ CMDQ_OPCODE_FREE_DB = 9,
+};
+
+/* cmdq-SQE HDR */
+#define ERDMA_CMD_HDR_WQEBB_CNT_MASK GENMASK_ULL(54, 52)
+#define ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK GENMASK_ULL(47, 32)
+#define ERDMA_CMD_HDR_SUB_MOD_MASK GENMASK_ULL(25, 24)
+#define ERDMA_CMD_HDR_OPCODE_MASK GENMASK_ULL(23, 16)
+#define ERDMA_CMD_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0)
+
+struct erdma_cmdq_destroy_cq_req {
+ u64 hdr;
+ u32 cqn;
+};
+
+#define ERDMA_EQ_TYPE_AEQ 0
+#define ERDMA_EQ_TYPE_CEQ 1
+
+struct erdma_cmdq_create_eq_req {
+ u64 hdr;
+ u64 qbuf_addr;
+ u8 vector_idx;
+ u8 eqn;
+ u8 depth;
+ u8 qtype;
+ u32 db_dma_addr_l;
+ u32 db_dma_addr_h;
+};
+
+struct erdma_cmdq_destroy_eq_req {
+ u64 hdr;
+ u64 rsvd0;
+ u8 vector_idx;
+ u8 eqn;
+ u8 rsvd1;
+ u8 qtype;
+};
+
+/* config device cfg */
+#define ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK BIT(31)
+#define ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK GENMASK(4, 0)
+
+struct erdma_cmdq_config_device_req {
+ u64 hdr;
+ u32 cfg;
+ u32 rsvd[5];
+};
+
+struct erdma_cmdq_config_mtu_req {
+ u64 hdr;
+ u32 mtu;
+};
+
+/* ext db requests(alloc and free) cfg */
+#define ERDMA_CMD_EXT_DB_CQ_EN_MASK BIT(2)
+#define ERDMA_CMD_EXT_DB_RQ_EN_MASK BIT(1)
+#define ERDMA_CMD_EXT_DB_SQ_EN_MASK BIT(0)
+
+struct erdma_cmdq_ext_db_req {
+ u64 hdr;
+ u32 cfg;
+ u16 rdb_off;
+ u16 sdb_off;
+ u16 rsvd0;
+ u16 cdb_off;
+ u32 rsvd1[3];
+};
+
+/* alloc db response qword 0 definition */
+#define ERDMA_CMD_ALLOC_DB_RESP_RDB_MASK GENMASK_ULL(63, 48)
+#define ERDMA_CMD_ALLOC_DB_RESP_CDB_MASK GENMASK_ULL(47, 32)
+#define ERDMA_CMD_ALLOC_DB_RESP_SDB_MASK GENMASK_ULL(15, 0)
+
+/* create_cq cfg0 */
+#define ERDMA_CMD_CREATE_CQ_DEPTH_MASK GENMASK(31, 24)
+#define ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK GENMASK(23, 20)
+#define ERDMA_CMD_CREATE_CQ_CQN_MASK GENMASK(19, 0)
+
+/* create_cq cfg1 */
+#define ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK GENMASK(31, 16)
+#define ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK BIT(15)
+#define ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK BIT(11)
+#define ERDMA_CMD_CREATE_CQ_EQN_MASK GENMASK(9, 0)
+
+/* create_cq cfg2 */
+#define ERDMA_CMD_CREATE_CQ_DB_CFG_MASK GENMASK(15, 0)
+
+struct erdma_cmdq_create_cq_req {
+ u64 hdr;
+ u32 cfg0;
+ u32 qbuf_addr_l;
+ u32 qbuf_addr_h;
+ u32 cfg1;
+ u64 cq_dbrec_dma;
+ u32 first_page_offset;
+ u32 cfg2;
+};
+
+/* regmr/deregmr cfg0 */
+#define ERDMA_CMD_MR_VALID_MASK BIT(31)
+#define ERDMA_CMD_MR_VERSION_MASK GENMASK(30, 28)
+#define ERDMA_CMD_MR_KEY_MASK GENMASK(27, 20)
+#define ERDMA_CMD_MR_MPT_IDX_MASK GENMASK(19, 0)
+
+/* regmr cfg1 */
+#define ERDMA_CMD_REGMR_PD_MASK GENMASK(31, 12)
+#define ERDMA_CMD_REGMR_TYPE_MASK GENMASK(7, 6)
+#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 1)
+
+/* regmr cfg2 */
+#define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27)
+#define ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK GENMASK(26, 24)
+#define ERDMA_CMD_REGMR_MTT_LEVEL_MASK GENMASK(21, 20)
+#define ERDMA_CMD_REGMR_MTT_CNT_MASK GENMASK(19, 0)
+
+struct erdma_cmdq_reg_mr_req {
+ u64 hdr;
+ u32 cfg0;
+ u32 cfg1;
+ u64 start_va;
+ u32 size;
+ u32 cfg2;
+ union {
+ u64 phy_addr[4];
+ struct {
+ u64 rsvd;
+ u32 size_h;
+ u32 mtt_cnt_h;
+ };
+ };
+};
+
+struct erdma_cmdq_dereg_mr_req {
+ u64 hdr;
+ u32 cfg;
+};
+
+/* create_av cfg0 */
+#define ERDMA_CMD_CREATE_AV_FL_MASK GENMASK(19, 0)
+#define ERDMA_CMD_CREATE_AV_NTYPE_MASK BIT(20)
+
+struct erdma_av_cfg {
+ u32 cfg0;
+ u8 traffic_class;
+ u8 hop_limit;
+ u8 sl;
+ u8 rsvd;
+ u16 udp_sport;
+ u16 sgid_index;
+ u8 dmac[ETH_ALEN];
+ u8 padding[2];
+ u8 dgid[ERDMA_ROCEV2_GID_SIZE];
+};
+
+struct erdma_cmdq_create_ah_req {
+ u64 hdr;
+ u32 pdn;
+ u32 ahn;
+ struct erdma_av_cfg av_cfg;
+};
+
+struct erdma_cmdq_destroy_ah_req {
+ u64 hdr;
+ u32 pdn;
+ u32 ahn;
+};
+
+/* modify qp cfg */
+#define ERDMA_CMD_MODIFY_QP_STATE_MASK GENMASK(31, 24)
+#define ERDMA_CMD_MODIFY_QP_CC_MASK GENMASK(23, 20)
+#define ERDMA_CMD_MODIFY_QP_QPN_MASK GENMASK(19, 0)
+
+struct erdma_cmdq_modify_qp_req {
+ u64 hdr;
+ u32 cfg;
+ u32 cookie;
+ __be32 dip;
+ __be32 sip;
+ __be16 sport;
+ __be16 dport;
+ u32 send_nxt;
+ u32 recv_nxt;
+};
+
+/* modify qp cfg1 for roce device */
+#define ERDMA_CMD_MODIFY_QP_DQPN_MASK GENMASK(19, 0)
+
+struct erdma_cmdq_mod_qp_req_rocev2 {
+ u64 hdr;
+ u32 cfg0;
+ u32 cfg1;
+ u32 attr_mask;
+ u32 qkey;
+ u32 rq_psn;
+ u32 sq_psn;
+ struct erdma_av_cfg av_cfg;
+};
+
+/* query qp response mask */
+#define ERDMA_CMD_QUERY_QP_RESP_SQ_PSN_MASK GENMASK_ULL(23, 0)
+#define ERDMA_CMD_QUERY_QP_RESP_RQ_PSN_MASK GENMASK_ULL(47, 24)
+#define ERDMA_CMD_QUERY_QP_RESP_QP_STATE_MASK GENMASK_ULL(55, 48)
+#define ERDMA_CMD_QUERY_QP_RESP_SQ_DRAINING_MASK GENMASK_ULL(56, 56)
+
+struct erdma_cmdq_query_qp_req_rocev2 {
+ u64 hdr;
+ u32 qpn;
+};
+
+enum erdma_qp_type {
+ ERDMA_QPT_RC = 0,
+ ERDMA_QPT_UD = 1,
+};
+
+/* create qp cfg0 */
+#define ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK GENMASK(31, 20)
+#define ERDMA_CMD_CREATE_QP_QPN_MASK GENMASK(19, 0)
+
+/* create qp cfg1 */
+#define ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK GENMASK(31, 20)
+#define ERDMA_CMD_CREATE_QP_PD_MASK GENMASK(19, 0)
+
+/* create qp cfg2 */
+#define ERDMA_CMD_CREATE_QP_TYPE_MASK GENMASK(3, 0)
+
+/* create qp cqn_mtt_cfg */
+#define ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK GENMASK(31, 28)
+#define ERDMA_CMD_CREATE_QP_DB_CFG_MASK BIT(25)
+#define ERDMA_CMD_CREATE_QP_CQN_MASK GENMASK(23, 0)
+
+/* create qp mtt_cfg */
+#define ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK GENMASK(31, 12)
+#define ERDMA_CMD_CREATE_QP_MTT_CNT_MASK GENMASK(11, 1)
+#define ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK BIT(0)
+
+/* create qp db cfg */
+#define ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK GENMASK(31, 16)
+#define ERDMA_CMD_CREATE_QP_RQDB_CFG_MASK GENMASK(15, 0)
+
+#define ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK GENMASK_ULL(31, 0)
+
+struct erdma_cmdq_create_qp_req {
+ u64 hdr;
+ u32 cfg0;
+ u32 cfg1;
+ u32 sq_cqn_mtt_cfg;
+ u32 rq_cqn_mtt_cfg;
+ u64 sq_buf_addr;
+ u64 rq_buf_addr;
+ u32 sq_mtt_cfg;
+ u32 rq_mtt_cfg;
+ u64 sq_dbrec_dma;
+ u64 rq_dbrec_dma;
+
+ u64 sq_mtt_entry[3];
+ u64 rq_mtt_entry[3];
+
+ u32 db_cfg;
+ u32 cfg2;
+};
+
+struct erdma_cmdq_destroy_qp_req {
+ u64 hdr;
+ u32 qpn;
+};
+
+struct erdma_cmdq_reflush_req {
+ u64 hdr;
+ u32 qpn;
+ u32 sq_pi;
+ u32 rq_pi;
+};
+
+#define ERDMA_HW_RESP_SIZE 256
+
+struct erdma_cmdq_query_req {
+ u64 hdr;
+ u32 rsvd;
+ u32 index;
+
+ u64 target_addr;
+ u32 target_length;
+};
+
+#define ERDMA_HW_RESP_MAGIC 0x5566
+
+struct erdma_cmdq_query_resp_hdr {
+ u16 magic;
+ u8 ver;
+ u8 length;
+
+ u32 index;
+ u32 rsvd[2];
+};
+
+struct erdma_cmdq_query_stats_resp {
+ struct erdma_cmdq_query_resp_hdr hdr;
+
+ u64 tx_req_cnt;
+ u64 tx_packets_cnt;
+ u64 tx_bytes_cnt;
+ u64 tx_drop_packets_cnt;
+ u64 tx_bps_meter_drop_packets_cnt;
+ u64 tx_pps_meter_drop_packets_cnt;
+ u64 rx_packets_cnt;
+ u64 rx_bytes_cnt;
+ u64 rx_drop_packets_cnt;
+ u64 rx_bps_meter_drop_packets_cnt;
+ u64 rx_pps_meter_drop_packets_cnt;
+};
+
+enum erdma_network_type {
+ ERDMA_NETWORK_TYPE_IPV4 = 0,
+ ERDMA_NETWORK_TYPE_IPV6 = 1,
+};
+
+enum erdma_set_gid_op {
+ ERDMA_SET_GID_OP_ADD = 0,
+ ERDMA_SET_GID_OP_DEL = 1,
+};
+
+/* set gid cfg */
+#define ERDMA_CMD_SET_GID_SGID_IDX_MASK GENMASK(15, 0)
+#define ERDMA_CMD_SET_GID_NTYPE_MASK BIT(16)
+#define ERDMA_CMD_SET_GID_OP_MASK BIT(31)
+
+struct erdma_cmdq_set_gid_req {
+ u64 hdr;
+ u32 cfg;
+ u8 gid[ERDMA_ROCEV2_GID_SIZE];
+};
+
+/* cap qword 0 definition */
+#define ERDMA_CMD_DEV_CAP_MAX_GID_MASK GENMASK_ULL(51, 48)
+#define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40)
+#define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24)
+#define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16)
+#define ERDMA_CMD_DEV_CAP_MAX_AH_MASK GENMASK_ULL(15, 8)
+#define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0)
+
+/* cap qword 1 definition */
+#define ERDMA_CMD_DEV_CAP_DMA_LOCAL_KEY_MASK GENMASK_ULL(63, 32)
+#define ERDMA_CMD_DEV_CAP_DEFAULT_CC_MASK GENMASK_ULL(31, 28)
+#define ERDMA_CMD_DEV_CAP_QBLOCK_MASK GENMASK_ULL(27, 16)
+#define ERDMA_CMD_DEV_CAP_MAX_MW_MASK GENMASK_ULL(7, 0)
+
+#define ERDMA_NQP_PER_QBLOCK 1024
+
+enum {
+ ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7,
+ ERDMA_DEV_CAP_FLAGS_MTT_VA = 1 << 5,
+ ERDMA_DEV_CAP_FLAGS_EXTEND_DB = 1 << 3,
+};
+
+#define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0)
+
+/* CQE hdr */
+#define ERDMA_CQE_HDR_OWNER_MASK BIT(31)
+#define ERDMA_CQE_HDR_OPCODE_MASK GENMASK(23, 16)
+#define ERDMA_CQE_HDR_QTYPE_MASK GENMASK(15, 8)
+#define ERDMA_CQE_HDR_SYNDROME_MASK GENMASK(7, 0)
+
+#define ERDMA_CQE_QTYPE_SQ 0
+#define ERDMA_CQE_QTYPE_RQ 1
+#define ERDMA_CQE_QTYPE_CMDQ 2
+
+#define ERDMA_CQE_NTYPE_MASK BIT(31)
+#define ERDMA_CQE_SL_MASK GENMASK(27, 20)
+#define ERDMA_CQE_SQPN_MASK GENMASK(19, 0)
+
+struct erdma_cqe {
+ __be32 hdr;
+ __be32 qe_idx;
+ __be32 qpn;
+ union {
+ __le32 imm_data;
+ __be32 inv_rkey;
+ };
+ __be32 size;
+ union {
+ struct {
+ __be32 rsvd[3];
+ } rc;
+
+ struct {
+ __be32 rsvd[2];
+ __be32 info;
+ } ud;
+ };
+};
+
+struct erdma_sge {
+ __aligned_le64 addr;
+ __le32 length;
+ __le32 key;
+};
+
+/* Receive Queue Element */
+struct erdma_rqe {
+ __le16 qe_idx;
+ __le16 rsvd0;
+ __le32 qpn;
+ __le32 rsvd1;
+ __le32 rsvd2;
+ __le64 to;
+ __le32 length;
+ __le32 stag;
+};
+
+/* SQE */
+#define ERDMA_SQE_HDR_SGL_LEN_MASK GENMASK_ULL(63, 56)
+#define ERDMA_SQE_HDR_WQEBB_CNT_MASK GENMASK_ULL(54, 52)
+#define ERDMA_SQE_HDR_QPN_MASK GENMASK_ULL(51, 32)
+#define ERDMA_SQE_HDR_OPCODE_MASK GENMASK_ULL(31, 27)
+#define ERDMA_SQE_HDR_DWQE_MASK BIT_ULL(26)
+#define ERDMA_SQE_HDR_INLINE_MASK BIT_ULL(25)
+#define ERDMA_SQE_HDR_FENCE_MASK BIT_ULL(24)
+#define ERDMA_SQE_HDR_SE_MASK BIT_ULL(23)
+#define ERDMA_SQE_HDR_CE_MASK BIT_ULL(22)
+#define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0)
+
+/* REG MR attrs */
+#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 1)
+#define ERDMA_SQE_MR_MTT_TYPE_MASK GENMASK(7, 6)
+#define ERDMA_SQE_MR_MTT_CNT_MASK GENMASK(31, 12)
+
+struct erdma_write_sqe {
+ __le64 hdr;
+ __be32 imm_data;
+ __le32 length;
+
+ __le32 sink_stag;
+ __le32 sink_to_l;
+ __le32 sink_to_h;
+
+ __le32 rsvd;
+
+ struct erdma_sge sgl[];
+};
+
+struct erdma_send_sqe_rc {
+ __le64 hdr;
+ union {
+ __be32 imm_data;
+ __le32 invalid_stag;
+ };
+
+ __le32 length;
+ struct erdma_sge sgl[];
+};
+
+struct erdma_send_sqe_ud {
+ __le64 hdr;
+ __be32 imm_data;
+ __le32 length;
+ __le32 qkey;
+ __le32 dst_qpn;
+ __le32 ahn;
+ __le32 rsvd;
+ struct erdma_sge sgl[];
+};
+
+struct erdma_readreq_sqe {
+ __le64 hdr;
+ __le32 invalid_stag;
+ __le32 length;
+ __le32 sink_stag;
+ __le32 sink_to_l;
+ __le32 sink_to_h;
+ __le32 rsvd;
+};
+
+struct erdma_atomic_sqe {
+ __le64 hdr;
+ __le64 rsvd;
+ __le64 fetchadd_swap_data;
+ __le64 cmp_data;
+
+ struct erdma_sge remote;
+ struct erdma_sge sgl;
+};
+
+struct erdma_reg_mr_sqe {
+ __le64 hdr;
+ __le64 addr;
+ __le32 length;
+ __le32 stag;
+ __le32 attrs;
+ __le32 rsvd;
+};
+
+/* EQ related. */
+#define ERDMA_DEFAULT_EQ_DEPTH 4096
+
+/* ceqe */
+#define ERDMA_CEQE_HDR_DB_MASK BIT_ULL(63)
+#define ERDMA_CEQE_HDR_PI_MASK GENMASK_ULL(55, 32)
+#define ERDMA_CEQE_HDR_O_MASK BIT_ULL(31)
+#define ERDMA_CEQE_HDR_CQN_MASK GENMASK_ULL(19, 0)
+
+/* aeqe */
+#define ERDMA_AEQE_HDR_O_MASK BIT(31)
+#define ERDMA_AEQE_HDR_TYPE_MASK GENMASK(23, 16)
+#define ERDMA_AEQE_HDR_SUBTYPE_MASK GENMASK(7, 0)
+
+#define ERDMA_AE_TYPE_QP_FATAL_EVENT 0
+#define ERDMA_AE_TYPE_QP_ERQ_ERR_EVENT 1
+#define ERDMA_AE_TYPE_ACC_ERR_EVENT 2
+#define ERDMA_AE_TYPE_CQ_ERR 3
+#define ERDMA_AE_TYPE_OTHER_ERROR 4
+
+struct erdma_aeqe {
+ __le32 hdr;
+ __le32 event_data0;
+ __le32 event_data1;
+ __le32 rsvd;
+};
+
+enum erdma_opcode {
+ ERDMA_OP_WRITE = 0,
+ ERDMA_OP_READ = 1,
+ ERDMA_OP_SEND = 2,
+ ERDMA_OP_SEND_WITH_IMM = 3,
+
+ ERDMA_OP_RECEIVE = 4,
+ ERDMA_OP_RECV_IMM = 5,
+ ERDMA_OP_RECV_INV = 6,
+
+ ERDMA_OP_RSVD0 = 7,
+ ERDMA_OP_RSVD1 = 8,
+ ERDMA_OP_WRITE_WITH_IMM = 9,
+
+ ERDMA_OP_RSVD2 = 10,
+ ERDMA_OP_RSVD3 = 11,
+
+ ERDMA_OP_RSP_SEND_IMM = 12,
+ ERDMA_OP_SEND_WITH_INV = 13,
+
+ ERDMA_OP_REG_MR = 14,
+ ERDMA_OP_LOCAL_INV = 15,
+ ERDMA_OP_READ_WITH_INV = 16,
+ ERDMA_OP_ATOMIC_CAS = 17,
+ ERDMA_OP_ATOMIC_FAA = 18,
+ ERDMA_NUM_OPCODES = 19,
+ ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1
+};
+
+enum erdma_wc_status {
+ ERDMA_WC_SUCCESS = 0,
+ ERDMA_WC_GENERAL_ERR = 1,
+ ERDMA_WC_RECV_WQE_FORMAT_ERR = 2,
+ ERDMA_WC_RECV_STAG_INVALID_ERR = 3,
+ ERDMA_WC_RECV_ADDR_VIOLATION_ERR = 4,
+ ERDMA_WC_RECV_RIGHT_VIOLATION_ERR = 5,
+ ERDMA_WC_RECV_PDID_ERR = 6,
+ ERDMA_WC_RECV_WARRPING_ERR = 7,
+ ERDMA_WC_SEND_WQE_FORMAT_ERR = 8,
+ ERDMA_WC_SEND_WQE_ORD_EXCEED = 9,
+ ERDMA_WC_SEND_STAG_INVALID_ERR = 10,
+ ERDMA_WC_SEND_ADDR_VIOLATION_ERR = 11,
+ ERDMA_WC_SEND_RIGHT_VIOLATION_ERR = 12,
+ ERDMA_WC_SEND_PDID_ERR = 13,
+ ERDMA_WC_SEND_WARRPING_ERR = 14,
+ ERDMA_WC_FLUSH_ERR = 15,
+ ERDMA_WC_RETRY_EXC_ERR = 16,
+ ERDMA_NUM_WC_STATUS
+};
+
+enum erdma_vendor_err {
+ ERDMA_WC_VENDOR_NO_ERR = 0,
+ ERDMA_WC_VENDOR_INVALID_RQE = 1,
+ ERDMA_WC_VENDOR_RQE_INVALID_STAG = 2,
+ ERDMA_WC_VENDOR_RQE_ADDR_VIOLATION = 3,
+ ERDMA_WC_VENDOR_RQE_ACCESS_RIGHT_ERR = 4,
+ ERDMA_WC_VENDOR_RQE_INVALID_PD = 5,
+ ERDMA_WC_VENDOR_RQE_WRAP_ERR = 6,
+ ERDMA_WC_VENDOR_INVALID_SQE = 0x20,
+ ERDMA_WC_VENDOR_ZERO_ORD = 0x21,
+ ERDMA_WC_VENDOR_SQE_INVALID_STAG = 0x30,
+ ERDMA_WC_VENDOR_SQE_ADDR_VIOLATION = 0x31,
+ ERDMA_WC_VENDOR_SQE_ACCESS_ERR = 0x32,
+ ERDMA_WC_VENDOR_SQE_INVALID_PD = 0x33,
+ ERDMA_WC_VENDOR_SQE_WARP_ERR = 0x34
+};
+
+#endif
diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
new file mode 100644
index 000000000000..f35b30235018
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -0,0 +1,684 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#include <linux/module.h>
+#include <net/addrconf.h>
+#include <rdma/erdma-abi.h>
+
+#include "erdma.h"
+#include "erdma_cm.h"
+#include "erdma_verbs.h"
+
+MODULE_AUTHOR("Cheng Xu <chengyou@linux.alibaba.com>");
+MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static int erdma_netdev_event(struct notifier_block *nb, unsigned long event,
+ void *arg)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(arg);
+ struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb);
+
+ if (dev->netdev == NULL || dev->netdev != netdev)
+ goto done;
+
+ switch (event) {
+ case NETDEV_CHANGEMTU:
+ if (dev->mtu != netdev->mtu) {
+ erdma_set_mtu(dev, netdev->mtu);
+ dev->mtu = netdev->mtu;
+ }
+ break;
+ case NETDEV_REGISTER:
+ case NETDEV_UNREGISTER:
+ case NETDEV_CHANGEADDR:
+ case NETDEV_GOING_DOWN:
+ case NETDEV_CHANGE:
+ default:
+ break;
+ }
+
+done:
+ return NOTIFY_OK;
+}
+
+static int erdma_enum_and_get_netdev(struct erdma_dev *dev)
+{
+ struct net_device *netdev;
+ int ret = -EPROBE_DEFER;
+
+ /* Already binded to a net_device, so we skip. */
+ if (dev->netdev)
+ return 0;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, netdev) {
+ /*
+ * In erdma, the paired netdev and ibdev should have the same
+ * MAC address. erdma can get the value from its PCIe bar
+ * registers. Since erdma can not get the paired netdev
+ * reference directly, we do a traverse here to get the paired
+ * netdev.
+ */
+ if (ether_addr_equal_unaligned(netdev->perm_addr,
+ dev->attrs.peer_addr)) {
+ ret = ib_device_set_netdev(&dev->ibdev, netdev, 1);
+ if (ret) {
+ rtnl_unlock();
+ ibdev_warn(&dev->ibdev,
+ "failed (%d) to link netdev", ret);
+ return ret;
+ }
+
+ dev->netdev = netdev;
+ break;
+ }
+ }
+
+ rtnl_unlock();
+
+ return ret;
+}
+
+static int erdma_device_register(struct erdma_dev *dev)
+{
+ struct ib_device *ibdev = &dev->ibdev;
+ int ret;
+
+ ret = erdma_enum_and_get_netdev(dev);
+ if (ret)
+ return ret;
+
+ dev->mtu = dev->netdev->mtu;
+ addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr);
+
+ ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev);
+ if (ret) {
+ dev_err(&dev->pdev->dev,
+ "ib_register_device failed: ret = %d\n", ret);
+ return ret;
+ }
+
+ dev->netdev_nb.notifier_call = erdma_netdev_event;
+ ret = register_netdevice_notifier(&dev->netdev_nb);
+ if (ret) {
+ ibdev_err(&dev->ibdev, "failed to register notifier.\n");
+ ib_unregister_device(ibdev);
+ }
+
+ return ret;
+}
+
+static irqreturn_t erdma_comm_irq_handler(int irq, void *data)
+{
+ struct erdma_dev *dev = data;
+
+ erdma_cmdq_completion_handler(&dev->cmdq);
+ erdma_aeq_event_handler(dev);
+
+ return IRQ_HANDLED;
+}
+
+static int erdma_request_vectors(struct erdma_dev *dev)
+{
+ int expect_irq_num = min(num_possible_cpus() + 1, ERDMA_NUM_MSIX_VEC);
+ int ret;
+
+ ret = pci_alloc_irq_vectors(dev->pdev, 1, expect_irq_num, PCI_IRQ_MSIX);
+ if (ret < 0) {
+ dev_err(&dev->pdev->dev, "request irq vectors failed(%d)\n",
+ ret);
+ return ret;
+ }
+ dev->attrs.irq_num = ret;
+
+ return 0;
+}
+
+static int erdma_comm_irq_init(struct erdma_dev *dev)
+{
+ snprintf(dev->comm_irq.name, ERDMA_IRQNAME_SIZE, "erdma-common@pci:%s",
+ pci_name(dev->pdev));
+ dev->comm_irq.msix_vector =
+ pci_irq_vector(dev->pdev, ERDMA_MSIX_VECTOR_CMDQ);
+
+ cpumask_set_cpu(cpumask_first(cpumask_of_pcibus(dev->pdev->bus)),
+ &dev->comm_irq.affinity_hint_mask);
+ irq_set_affinity_hint(dev->comm_irq.msix_vector,
+ &dev->comm_irq.affinity_hint_mask);
+
+ return request_irq(dev->comm_irq.msix_vector, erdma_comm_irq_handler, 0,
+ dev->comm_irq.name, dev);
+}
+
+static void erdma_comm_irq_uninit(struct erdma_dev *dev)
+{
+ irq_set_affinity_hint(dev->comm_irq.msix_vector, NULL);
+ free_irq(dev->comm_irq.msix_vector, dev);
+}
+
+static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
+{
+ int ret;
+
+ dev->proto = erdma_reg_read32(dev, ERDMA_REGS_DEV_PROTO_REG);
+
+ dev->resp_pool = dma_pool_create("erdma_resp_pool", &pdev->dev,
+ ERDMA_HW_RESP_SIZE, ERDMA_HW_RESP_SIZE,
+ 0);
+ if (!dev->resp_pool)
+ return -ENOMEM;
+
+ dev->db_pool = dma_pool_create("erdma_db_pool", &pdev->dev,
+ ERDMA_DB_SIZE, ERDMA_DB_SIZE, 0);
+ if (!dev->db_pool) {
+ ret = -ENOMEM;
+ goto destroy_resp_pool;
+ }
+
+ ret = dma_set_mask_and_coherent(&pdev->dev,
+ DMA_BIT_MASK(ERDMA_PCI_WIDTH));
+ if (ret)
+ goto destroy_db_pool;
+
+ dma_set_max_seg_size(&pdev->dev, UINT_MAX);
+
+ return 0;
+
+destroy_db_pool:
+ dma_pool_destroy(dev->db_pool);
+
+destroy_resp_pool:
+ dma_pool_destroy(dev->resp_pool);
+
+ return ret;
+}
+
+static void erdma_device_uninit(struct erdma_dev *dev)
+{
+ dma_pool_destroy(dev->db_pool);
+ dma_pool_destroy(dev->resp_pool);
+}
+
+static void erdma_hw_reset(struct erdma_dev *dev)
+{
+ u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1);
+
+ erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl);
+}
+
+static int erdma_wait_hw_init_done(struct erdma_dev *dev)
+{
+ int i;
+
+ erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG,
+ FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1));
+
+ for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) {
+ if (erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG,
+ ERDMA_REG_DEV_ST_INIT_DONE_MASK))
+ break;
+
+ msleep(ERDMA_REG_ACCESS_WAIT_MS);
+ }
+
+ if (i == ERDMA_WAIT_DEV_DONE_CNT) {
+ dev_err(&dev->pdev->dev, "wait init done failed.\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static const struct pci_device_id erdma_pci_tbl[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_ALIBABA, 0x107f) },
+ {}
+};
+
+static int erdma_probe_dev(struct pci_dev *pdev)
+{
+ struct erdma_dev *dev;
+ int bars, err;
+ u32 version;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "pci_enable_device failed(%d)\n", err);
+ return err;
+ }
+
+ pci_set_master(pdev);
+
+ dev = ib_alloc_device(erdma_dev, ibdev);
+ if (!dev) {
+ dev_err(&pdev->dev, "ib_alloc_device failed\n");
+ err = -ENOMEM;
+ goto err_disable_device;
+ }
+
+ pci_set_drvdata(pdev, dev);
+ dev->pdev = pdev;
+ dev->attrs.numa_node = dev_to_node(&pdev->dev);
+
+ bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+ if (bars != ERDMA_BAR_MASK || err) {
+ err = err ? err : -EINVAL;
+ goto err_ib_device_release;
+ }
+
+ dev->func_bar_addr = pci_resource_start(pdev, ERDMA_FUNC_BAR);
+ dev->func_bar_len = pci_resource_len(pdev, ERDMA_FUNC_BAR);
+
+ dev->func_bar =
+ devm_ioremap(&pdev->dev, dev->func_bar_addr, dev->func_bar_len);
+ if (!dev->func_bar) {
+ dev_err(&pdev->dev, "devm_ioremap failed.\n");
+ err = -EFAULT;
+ goto err_release_bars;
+ }
+
+ version = erdma_reg_read32(dev, ERDMA_REGS_VERSION_REG);
+ if (version == 0) {
+ /* we knows that it is a non-functional function. */
+ err = -ENODEV;
+ goto err_iounmap_func_bar;
+ }
+
+ err = erdma_device_init(dev, pdev);
+ if (err)
+ goto err_iounmap_func_bar;
+
+ err = erdma_request_vectors(dev);
+ if (err)
+ goto err_uninit_device;
+
+ err = erdma_comm_irq_init(dev);
+ if (err)
+ goto err_free_vectors;
+
+ err = erdma_aeq_init(dev);
+ if (err)
+ goto err_uninit_comm_irq;
+
+ err = erdma_cmdq_init(dev);
+ if (err)
+ goto err_uninit_aeq;
+
+ err = erdma_wait_hw_init_done(dev);
+ if (err)
+ goto err_uninit_cmdq;
+
+ err = erdma_ceqs_init(dev);
+ if (err)
+ goto err_reset_hw;
+
+ erdma_finish_cmdq_init(dev);
+
+ return 0;
+
+err_reset_hw:
+ erdma_hw_reset(dev);
+
+err_uninit_cmdq:
+ erdma_cmdq_destroy(dev);
+
+err_uninit_aeq:
+ erdma_eq_destroy(dev, &dev->aeq);
+
+err_uninit_comm_irq:
+ erdma_comm_irq_uninit(dev);
+
+err_free_vectors:
+ pci_free_irq_vectors(dev->pdev);
+
+err_uninit_device:
+ erdma_device_uninit(dev);
+
+err_iounmap_func_bar:
+ devm_iounmap(&pdev->dev, dev->func_bar);
+
+err_release_bars:
+ pci_release_selected_regions(pdev, bars);
+
+err_ib_device_release:
+ ib_dealloc_device(&dev->ibdev);
+
+err_disable_device:
+ pci_disable_device(pdev);
+
+ return err;
+}
+
+static void erdma_remove_dev(struct pci_dev *pdev)
+{
+ struct erdma_dev *dev = pci_get_drvdata(pdev);
+
+ erdma_ceqs_uninit(dev);
+ erdma_hw_reset(dev);
+ erdma_cmdq_destroy(dev);
+ erdma_eq_destroy(dev, &dev->aeq);
+ erdma_comm_irq_uninit(dev);
+ pci_free_irq_vectors(dev->pdev);
+ erdma_device_uninit(dev);
+
+ devm_iounmap(&pdev->dev, dev->func_bar);
+ pci_release_selected_regions(pdev, ERDMA_BAR_MASK);
+
+ ib_dealloc_device(&dev->ibdev);
+
+ pci_disable_device(pdev);
+}
+
+#define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap)
+
+static int erdma_dev_attrs_init(struct erdma_dev *dev)
+{
+ int err;
+ u64 req_hdr, cap0, cap1;
+
+ erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_QUERY_DEVICE);
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
+ &cap1, true);
+ if (err)
+ return err;
+
+ dev->attrs.max_cqe = 1 << ERDMA_GET_CAP(MAX_CQE, cap0);
+ dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0);
+ dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1);
+ dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0);
+ dev->attrs.max_gid = 1 << ERDMA_GET_CAP(MAX_GID, cap0);
+ dev->attrs.max_ah = 1 << ERDMA_GET_CAP(MAX_AH, cap0);
+ dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1);
+ dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1);
+ dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
+ dev->attrs.max_mr = dev->attrs.max_qp << 1;
+ dev->attrs.max_cq = dev->attrs.max_qp << 1;
+ dev->attrs.cap_flags = ERDMA_GET_CAP(FLAGS, cap0);
+
+ dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR;
+ dev->attrs.max_ord = ERDMA_MAX_ORD;
+ dev->attrs.max_ird = ERDMA_MAX_IRD;
+ dev->attrs.max_send_sge = ERDMA_MAX_SEND_SGE;
+ dev->attrs.max_recv_sge = ERDMA_MAX_RECV_SGE;
+ dev->attrs.max_sge_rd = ERDMA_MAX_SGE_RD;
+ dev->attrs.max_pd = ERDMA_MAX_PD;
+
+ dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD;
+ dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr;
+ dev->res_cb[ERDMA_RES_TYPE_AH].max_cap = dev->attrs.max_ah;
+
+ erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_QUERY_FW_INFO);
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
+ &cap1, true);
+ if (!err)
+ dev->attrs.fw_version =
+ FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0);
+
+ return err;
+}
+
+static int erdma_device_config(struct erdma_dev *dev)
+{
+ struct erdma_cmdq_config_device_req req = {};
+
+ if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_EXTEND_DB))
+ return 0;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_CONF_DEVICE);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK, PAGE_SHIFT) |
+ FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK, 1);
+
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+}
+
+static int erdma_res_cb_init(struct erdma_dev *dev)
+{
+ int i, j;
+
+ for (i = 0; i < ERDMA_RES_CNT; i++) {
+ dev->res_cb[i].next_alloc_idx = 1;
+ spin_lock_init(&dev->res_cb[i].lock);
+ dev->res_cb[i].bitmap =
+ bitmap_zalloc(dev->res_cb[i].max_cap, GFP_KERNEL);
+ if (!dev->res_cb[i].bitmap)
+ goto err;
+ }
+
+ return 0;
+
+err:
+ for (j = 0; j < i; j++)
+ bitmap_free(dev->res_cb[j].bitmap);
+
+ return -ENOMEM;
+}
+
+static void erdma_res_cb_free(struct erdma_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < ERDMA_RES_CNT; i++)
+ bitmap_free(dev->res_cb[i].bitmap);
+}
+
+static const struct ib_device_ops erdma_device_ops_rocev2 = {
+ .get_link_layer = erdma_get_link_layer,
+ .add_gid = erdma_add_gid,
+ .del_gid = erdma_del_gid,
+ .query_pkey = erdma_query_pkey,
+ .create_ah = erdma_create_ah,
+ .destroy_ah = erdma_destroy_ah,
+ .query_ah = erdma_query_ah,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, erdma_ah, ibah),
+};
+
+static const struct ib_device_ops erdma_device_ops_iwarp = {
+ .iw_accept = erdma_accept,
+ .iw_add_ref = erdma_qp_get_ref,
+ .iw_connect = erdma_connect,
+ .iw_create_listen = erdma_create_listen,
+ .iw_destroy_listen = erdma_destroy_listen,
+ .iw_get_qp = erdma_get_ibqp,
+ .iw_reject = erdma_reject,
+ .iw_rem_ref = erdma_qp_put_ref,
+};
+
+static const struct ib_device_ops erdma_device_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_ERDMA,
+ .uverbs_abi_ver = ERDMA_ABI_VERSION,
+
+ .alloc_hw_port_stats = erdma_alloc_hw_port_stats,
+ .alloc_mr = erdma_ib_alloc_mr,
+ .alloc_pd = erdma_alloc_pd,
+ .alloc_ucontext = erdma_alloc_ucontext,
+ .create_cq = erdma_create_cq,
+ .create_qp = erdma_create_qp,
+ .dealloc_pd = erdma_dealloc_pd,
+ .dealloc_ucontext = erdma_dealloc_ucontext,
+ .dereg_mr = erdma_dereg_mr,
+ .destroy_cq = erdma_destroy_cq,
+ .destroy_qp = erdma_destroy_qp,
+ .disassociate_ucontext = erdma_disassociate_ucontext,
+ .get_dma_mr = erdma_get_dma_mr,
+ .get_hw_stats = erdma_get_hw_stats,
+ .get_port_immutable = erdma_get_port_immutable,
+ .map_mr_sg = erdma_map_mr_sg,
+ .mmap = erdma_mmap,
+ .mmap_free = erdma_mmap_free,
+ .post_recv = erdma_post_recv,
+ .post_send = erdma_post_send,
+ .poll_cq = erdma_poll_cq,
+ .query_device = erdma_query_device,
+ .query_gid = erdma_query_gid,
+ .query_port = erdma_query_port,
+ .query_qp = erdma_query_qp,
+ .req_notify_cq = erdma_req_notify_cq,
+ .reg_user_mr = erdma_reg_user_mr,
+ .modify_qp = erdma_modify_qp,
+
+ INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, erdma_ucontext, ibucontext),
+ INIT_RDMA_OBJ_SIZE(ib_qp, erdma_qp, ibqp),
+};
+
+static int erdma_ib_device_add(struct pci_dev *pdev)
+{
+ struct erdma_dev *dev = pci_get_drvdata(pdev);
+ struct ib_device *ibdev = &dev->ibdev;
+ u64 mac;
+ int ret;
+
+ ret = erdma_dev_attrs_init(dev);
+ if (ret)
+ return ret;
+
+ ret = erdma_device_config(dev);
+ if (ret)
+ return ret;
+
+ if (erdma_device_iwarp(dev)) {
+ ibdev->node_type = RDMA_NODE_RNIC;
+ ib_set_device_ops(ibdev, &erdma_device_ops_iwarp);
+ } else {
+ ibdev->node_type = RDMA_NODE_IB_CA;
+ ib_set_device_ops(ibdev, &erdma_device_ops_rocev2);
+ }
+
+ memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
+
+ /*
+ * Current model (one-to-one device association):
+ * One ERDMA device per net_device or, equivalently,
+ * per physical port.
+ */
+ ibdev->phys_port_cnt = 1;
+ ibdev->num_comp_vectors = dev->attrs.irq_num - 1;
+
+ ib_set_device_ops(ibdev, &erdma_device_ops);
+
+ INIT_LIST_HEAD(&dev->cep_list);
+
+ spin_lock_init(&dev->lock);
+ xa_init_flags(&dev->qp_xa, XA_FLAGS_ALLOC1);
+ xa_init_flags(&dev->cq_xa, XA_FLAGS_ALLOC1);
+ dev->next_alloc_cqn = 1;
+ dev->next_alloc_qpn = 1;
+
+ ret = erdma_res_cb_init(dev);
+ if (ret)
+ return ret;
+
+ atomic_set(&dev->num_ctx, 0);
+
+ mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG);
+ mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32;
+
+ u64_to_ether_addr(mac, dev->attrs.peer_addr);
+
+ dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND,
+ WQ_UNBOUND_MAX_ACTIVE);
+ if (!dev->reflush_wq) {
+ ret = -ENOMEM;
+ goto err_alloc_workqueue;
+ }
+
+ ret = erdma_device_register(dev);
+ if (ret)
+ goto err_register;
+
+ return 0;
+
+err_register:
+ destroy_workqueue(dev->reflush_wq);
+err_alloc_workqueue:
+ xa_destroy(&dev->qp_xa);
+ xa_destroy(&dev->cq_xa);
+
+ erdma_res_cb_free(dev);
+
+ return ret;
+}
+
+static void erdma_ib_device_remove(struct pci_dev *pdev)
+{
+ struct erdma_dev *dev = pci_get_drvdata(pdev);
+
+ unregister_netdevice_notifier(&dev->netdev_nb);
+ ib_unregister_device(&dev->ibdev);
+
+ destroy_workqueue(dev->reflush_wq);
+ erdma_res_cb_free(dev);
+ xa_destroy(&dev->qp_xa);
+ xa_destroy(&dev->cq_xa);
+}
+
+static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ int ret;
+
+ ret = erdma_probe_dev(pdev);
+ if (ret)
+ return ret;
+
+ ret = erdma_ib_device_add(pdev);
+ if (ret) {
+ erdma_remove_dev(pdev);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void erdma_remove(struct pci_dev *pdev)
+{
+ erdma_ib_device_remove(pdev);
+ erdma_remove_dev(pdev);
+}
+
+static struct pci_driver erdma_pci_driver = {
+ .name = DRV_MODULE_NAME,
+ .id_table = erdma_pci_tbl,
+ .probe = erdma_probe,
+ .remove = erdma_remove
+};
+
+MODULE_DEVICE_TABLE(pci, erdma_pci_tbl);
+
+static __init int erdma_init_module(void)
+{
+ int ret;
+
+ ret = erdma_cm_init();
+ if (ret)
+ return ret;
+
+ ret = pci_register_driver(&erdma_pci_driver);
+ if (ret)
+ erdma_cm_exit();
+
+ return ret;
+}
+
+static void __exit erdma_exit_module(void)
+{
+ pci_unregister_driver(&erdma_pci_driver);
+
+ erdma_cm_exit();
+}
+
+module_init(erdma_init_module);
+module_exit(erdma_exit_module);
diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
new file mode 100644
index 000000000000..25f6c49aec77
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -0,0 +1,757 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2021, Alibaba Group */
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+#include "erdma_cm.h"
+#include "erdma_verbs.h"
+
+void erdma_qp_llp_close(struct erdma_qp *qp)
+{
+ struct erdma_mod_qp_params_iwarp params;
+
+ down_write(&qp->state_lock);
+
+ switch (qp->attrs.iwarp.state) {
+ case ERDMA_QPS_IWARP_RTS:
+ case ERDMA_QPS_IWARP_RTR:
+ case ERDMA_QPS_IWARP_IDLE:
+ case ERDMA_QPS_IWARP_TERMINATE:
+ params.state = ERDMA_QPS_IWARP_CLOSING;
+ erdma_modify_qp_state_iwarp(qp, &params, ERDMA_QPA_IWARP_STATE);
+ break;
+ case ERDMA_QPS_IWARP_CLOSING:
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
+ break;
+ default:
+ break;
+ }
+
+ if (qp->cep) {
+ erdma_cep_put(qp->cep);
+ qp->cep = NULL;
+ }
+
+ up_write(&qp->state_lock);
+}
+
+struct ib_qp *erdma_get_ibqp(struct ib_device *ibdev, int id)
+{
+ struct erdma_qp *qp = find_qp_by_qpn(to_edev(ibdev), id);
+
+ if (qp)
+ return &qp->ibqp;
+
+ return NULL;
+}
+
+static int
+erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ enum erdma_qpa_mask_iwarp mask)
+{
+ int ret;
+ struct erdma_dev *dev = qp->dev;
+ struct erdma_cmdq_modify_qp_req req;
+ struct tcp_sock *tp;
+ struct erdma_cep *cep = qp->cep;
+ struct sockaddr_storage local_addr, remote_addr;
+
+ if (!(mask & ERDMA_QPA_IWARP_LLP_HANDLE))
+ return -EINVAL;
+
+ if (!(mask & ERDMA_QPA_IWARP_MPA))
+ return -EINVAL;
+
+ if (!(mask & ERDMA_QPA_IWARP_CC))
+ params->cc = qp->attrs.cc;
+
+ ret = getname_local(cep->sock, &local_addr);
+ if (ret < 0)
+ return ret;
+
+ ret = getname_peer(cep->sock, &remote_addr);
+ if (ret < 0)
+ return ret;
+
+ tp = tcp_sk(qp->cep->sock->sk);
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_MODIFY_QP);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) |
+ FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, params->cc) |
+ FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
+
+ req.cookie = be32_to_cpu(cep->mpa.ext_data.cookie);
+ req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr;
+ req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr;
+ req.dport = to_sockaddr_in(remote_addr).sin_port;
+ req.sport = to_sockaddr_in(local_addr).sin_port;
+
+ req.send_nxt = tp->snd_nxt;
+ /* rsvd tcp seq for mpa-rsp in server. */
+ if (params->qp_type == ERDMA_QP_PASSIVE)
+ req.send_nxt += MPA_DEFAULT_HDR_LEN + params->pd_len;
+ req.recv_nxt = tp->rcv_nxt;
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (ret)
+ return ret;
+
+ if (mask & ERDMA_QPA_IWARP_IRD)
+ qp->attrs.irq_size = params->irq_size;
+
+ if (mask & ERDMA_QPA_IWARP_ORD)
+ qp->attrs.orq_size = params->orq_size;
+
+ if (mask & ERDMA_QPA_IWARP_CC)
+ qp->attrs.cc = params->cc;
+
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_RTS;
+
+ return 0;
+}
+
+static int
+erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ enum erdma_qpa_mask_iwarp mask)
+{
+ struct erdma_dev *dev = qp->dev;
+ struct erdma_cmdq_modify_qp_req req;
+ int ret;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_MODIFY_QP);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) |
+ FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (ret)
+ return ret;
+
+ qp->attrs.iwarp.state = params->state;
+
+ return 0;
+}
+
+int erdma_modify_qp_state_iwarp(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ int mask)
+{
+ bool need_reflush = false;
+ int drop_conn, ret = 0;
+
+ if (!mask)
+ return 0;
+
+ if (!(mask & ERDMA_QPA_IWARP_STATE))
+ return 0;
+
+ switch (qp->attrs.iwarp.state) {
+ case ERDMA_QPS_IWARP_IDLE:
+ case ERDMA_QPS_IWARP_RTR:
+ if (params->state == ERDMA_QPS_IWARP_RTS) {
+ ret = erdma_modify_qp_state_to_rts(qp, params, mask);
+ } else if (params->state == ERDMA_QPS_IWARP_ERROR) {
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
+ need_reflush = true;
+ if (qp->cep) {
+ erdma_cep_put(qp->cep);
+ qp->cep = NULL;
+ }
+ ret = erdma_modify_qp_state_to_stop(qp, params, mask);
+ }
+ break;
+ case ERDMA_QPS_IWARP_RTS:
+ drop_conn = 0;
+
+ if (params->state == ERDMA_QPS_IWARP_CLOSING ||
+ params->state == ERDMA_QPS_IWARP_TERMINATE ||
+ params->state == ERDMA_QPS_IWARP_ERROR) {
+ ret = erdma_modify_qp_state_to_stop(qp, params, mask);
+ drop_conn = 1;
+ need_reflush = true;
+ }
+
+ if (drop_conn)
+ erdma_qp_cm_drop(qp);
+
+ break;
+ case ERDMA_QPS_IWARP_TERMINATE:
+ if (params->state == ERDMA_QPS_IWARP_ERROR)
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
+ break;
+ case ERDMA_QPS_IWARP_CLOSING:
+ if (params->state == ERDMA_QPS_IWARP_IDLE) {
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
+ } else if (params->state == ERDMA_QPS_IWARP_ERROR) {
+ ret = erdma_modify_qp_state_to_stop(qp, params, mask);
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
+ } else if (params->state != ERDMA_QPS_IWARP_CLOSING) {
+ return -ECONNABORTED;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (need_reflush && !ret && rdma_is_kernel_res(&qp->ibqp.res)) {
+ qp->flags |= ERDMA_QP_IN_FLUSHING;
+ mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
+ usecs_to_jiffies(100));
+ }
+
+ return ret;
+}
+
+static int modify_qp_cmd_rocev2(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_rocev2 *params,
+ enum erdma_qpa_mask_rocev2 attr_mask)
+{
+ struct erdma_cmdq_mod_qp_req_rocev2 req;
+
+ memset(&req, 0, sizeof(req));
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_MODIFY_QP);
+
+ req.cfg0 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_STATE)
+ req.cfg0 |= FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK,
+ params->state);
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN)
+ req.cfg1 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_DQPN_MASK,
+ params->dst_qpn);
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_QKEY)
+ req.qkey = params->qkey;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_AV)
+ erdma_set_av_cfg(&req.av_cfg, &params->av);
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_SQ_PSN)
+ req.sq_psn = params->sq_psn;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_RQ_PSN)
+ req.rq_psn = params->rq_psn;
+
+ req.attr_mask = attr_mask;
+
+ return erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL,
+ NULL, true);
+}
+
+static void erdma_reset_qp(struct erdma_qp *qp)
+{
+ qp->kern_qp.sq_pi = 0;
+ qp->kern_qp.sq_ci = 0;
+ qp->kern_qp.rq_pi = 0;
+ qp->kern_qp.rq_ci = 0;
+ memset(qp->kern_qp.swr_tbl, 0, qp->attrs.sq_size * sizeof(u64));
+ memset(qp->kern_qp.rwr_tbl, 0, qp->attrs.rq_size * sizeof(u64));
+ memset(qp->kern_qp.sq_buf, 0, qp->attrs.sq_size << SQEBB_SHIFT);
+ memset(qp->kern_qp.rq_buf, 0, qp->attrs.rq_size << RQE_SHIFT);
+ erdma_remove_cqes_of_qp(&qp->scq->ibcq, QP_ID(qp));
+ if (qp->rcq != qp->scq)
+ erdma_remove_cqes_of_qp(&qp->rcq->ibcq, QP_ID(qp));
+}
+
+int erdma_modify_qp_state_rocev2(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_rocev2 *params,
+ int attr_mask)
+{
+ struct erdma_dev *dev = to_edev(qp->ibqp.device);
+ int ret;
+
+ ret = modify_qp_cmd_rocev2(qp, params, attr_mask);
+ if (ret)
+ return ret;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_STATE)
+ qp->attrs.rocev2.state = params->state;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_QKEY)
+ qp->attrs.rocev2.qkey = params->qkey;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN)
+ qp->attrs.rocev2.dst_qpn = params->dst_qpn;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_AV)
+ memcpy(&qp->attrs.rocev2.av, &params->av,
+ sizeof(struct erdma_av));
+
+ if (rdma_is_kernel_res(&qp->ibqp.res) &&
+ params->state == ERDMA_QPS_ROCEV2_RESET)
+ erdma_reset_qp(qp);
+
+ if (rdma_is_kernel_res(&qp->ibqp.res) &&
+ params->state == ERDMA_QPS_ROCEV2_ERROR) {
+ qp->flags |= ERDMA_QP_IN_FLUSHING;
+ mod_delayed_work(dev->reflush_wq, &qp->reflush_dwork,
+ usecs_to_jiffies(100));
+ }
+
+ return 0;
+}
+
+static void erdma_qp_safe_free(struct kref *ref)
+{
+ struct erdma_qp *qp = container_of(ref, struct erdma_qp, ref);
+
+ complete(&qp->safe_free);
+}
+
+void erdma_qp_put(struct erdma_qp *qp)
+{
+ WARN_ON(kref_read(&qp->ref) < 1);
+ kref_put(&qp->ref, erdma_qp_safe_free);
+}
+
+void erdma_qp_get(struct erdma_qp *qp)
+{
+ kref_get(&qp->ref);
+}
+
+static int fill_inline_data(struct erdma_qp *qp,
+ const struct ib_send_wr *send_wr, u16 wqe_idx,
+ u32 sgl_offset, __le32 *length_field)
+{
+ u32 remain_size, copy_size, data_off, bytes = 0;
+ char *data;
+ int i = 0;
+
+ wqe_idx += (sgl_offset >> SQEBB_SHIFT);
+ sgl_offset &= (SQEBB_SIZE - 1);
+ data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx, qp->attrs.sq_size,
+ SQEBB_SHIFT);
+
+ while (i < send_wr->num_sge) {
+ bytes += send_wr->sg_list[i].length;
+ if (bytes > (int)ERDMA_MAX_INLINE)
+ return -EINVAL;
+
+ remain_size = send_wr->sg_list[i].length;
+ data_off = 0;
+
+ while (1) {
+ copy_size = min(remain_size, SQEBB_SIZE - sgl_offset);
+
+ memcpy(data + sgl_offset,
+ (void *)(uintptr_t)send_wr->sg_list[i].addr +
+ data_off,
+ copy_size);
+ remain_size -= copy_size;
+ data_off += copy_size;
+ sgl_offset += copy_size;
+ wqe_idx += (sgl_offset >> SQEBB_SHIFT);
+ sgl_offset &= (SQEBB_SIZE - 1);
+
+ data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
+ qp->attrs.sq_size, SQEBB_SHIFT);
+ if (!remain_size)
+ break;
+ }
+
+ i++;
+ }
+ *length_field = cpu_to_le32(bytes);
+
+ return bytes;
+}
+
+static int fill_sgl(struct erdma_qp *qp, const struct ib_send_wr *send_wr,
+ u16 wqe_idx, u32 sgl_offset, __le32 *length_field)
+{
+ int i = 0;
+ u32 bytes = 0;
+ char *sgl;
+
+ if (send_wr->num_sge > qp->dev->attrs.max_send_sge)
+ return -EINVAL;
+
+ if (sgl_offset & 0xF)
+ return -EINVAL;
+
+ while (i < send_wr->num_sge) {
+ wqe_idx += (sgl_offset >> SQEBB_SHIFT);
+ sgl_offset &= (SQEBB_SIZE - 1);
+ sgl = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
+ qp->attrs.sq_size, SQEBB_SHIFT);
+
+ bytes += send_wr->sg_list[i].length;
+ memcpy(sgl + sgl_offset, &send_wr->sg_list[i],
+ sizeof(struct ib_sge));
+
+ sgl_offset += sizeof(struct ib_sge);
+ i++;
+ }
+
+ *length_field = cpu_to_le32(bytes);
+ return 0;
+}
+
+static void init_send_sqe_rc(struct erdma_qp *qp, struct erdma_send_sqe_rc *sqe,
+ const struct ib_send_wr *wr, u32 *hw_op)
+{
+ u32 op = ERDMA_OP_SEND;
+
+ if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+ op = ERDMA_OP_SEND_WITH_IMM;
+ sqe->imm_data = wr->ex.imm_data;
+ } else if (wr->opcode == IB_WR_SEND_WITH_INV) {
+ op = ERDMA_OP_SEND_WITH_INV;
+ sqe->invalid_stag = cpu_to_le32(wr->ex.invalidate_rkey);
+ }
+
+ *hw_op = op;
+}
+
+static void init_send_sqe_ud(struct erdma_qp *qp, struct erdma_send_sqe_ud *sqe,
+ const struct ib_send_wr *wr, u32 *hw_op)
+{
+ const struct ib_ud_wr *uwr = ud_wr(wr);
+ struct erdma_ah *ah = to_eah(uwr->ah);
+ u32 op = ERDMA_OP_SEND;
+
+ if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+ op = ERDMA_OP_SEND_WITH_IMM;
+ sqe->imm_data = wr->ex.imm_data;
+ }
+
+ *hw_op = op;
+
+ sqe->ahn = cpu_to_le32(ah->ahn);
+ sqe->dst_qpn = cpu_to_le32(uwr->remote_qpn);
+ /* Not allowed to send control qkey */
+ if (uwr->remote_qkey & 0x80000000)
+ sqe->qkey = cpu_to_le32(qp->attrs.rocev2.qkey);
+ else
+ sqe->qkey = cpu_to_le32(uwr->remote_qkey);
+}
+
+static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
+ const struct ib_send_wr *send_wr)
+{
+ u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset;
+ u32 idx = *pi & (qp->attrs.sq_size - 1);
+ enum ib_wr_opcode op = send_wr->opcode;
+ struct erdma_send_sqe_rc *rc_send_sqe;
+ struct erdma_send_sqe_ud *ud_send_sqe;
+ struct erdma_atomic_sqe *atomic_sqe;
+ struct erdma_readreq_sqe *read_sqe;
+ struct erdma_reg_mr_sqe *regmr_sge;
+ struct erdma_write_sqe *write_sqe;
+ struct ib_rdma_wr *rdma_wr;
+ struct erdma_sge *sge;
+ __le32 *length_field;
+ struct erdma_mr *mr;
+ u64 wqe_hdr, *entry;
+ u32 attrs;
+ int ret;
+
+ if (qp->ibqp.qp_type != IB_QPT_RC && send_wr->opcode != IB_WR_SEND &&
+ send_wr->opcode != IB_WR_SEND_WITH_IMM)
+ return -EINVAL;
+
+ entry = get_queue_entry(qp->kern_qp.sq_buf, idx, qp->attrs.sq_size,
+ SQEBB_SHIFT);
+
+ /* Clear the SQE header section. */
+ *entry = 0;
+
+ qp->kern_qp.swr_tbl[idx] = send_wr->wr_id;
+ flags = send_wr->send_flags;
+ wqe_hdr = FIELD_PREP(
+ ERDMA_SQE_HDR_CE_MASK,
+ ((flags & IB_SEND_SIGNALED) || qp->kern_qp.sig_all) ? 1 : 0);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SE_MASK,
+ flags & IB_SEND_SOLICITED ? 1 : 0);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_FENCE_MASK,
+ flags & IB_SEND_FENCE ? 1 : 0);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_INLINE_MASK,
+ flags & IB_SEND_INLINE ? 1 : 0);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp));
+
+ switch (op) {
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ hw_op = ERDMA_OP_WRITE;
+ if (op == IB_WR_RDMA_WRITE_WITH_IMM)
+ hw_op = ERDMA_OP_WRITE_WITH_IMM;
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
+ rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
+ write_sqe = (struct erdma_write_sqe *)entry;
+
+ write_sqe->imm_data = send_wr->ex.imm_data;
+ write_sqe->sink_stag = cpu_to_le32(rdma_wr->rkey);
+ write_sqe->sink_to_h =
+ cpu_to_le32(upper_32_bits(rdma_wr->remote_addr));
+ write_sqe->sink_to_l =
+ cpu_to_le32(lower_32_bits(rdma_wr->remote_addr));
+
+ length_field = &write_sqe->length;
+ wqe_size = sizeof(struct erdma_write_sqe);
+ sgl_offset = wqe_size;
+ break;
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_READ_WITH_INV:
+ read_sqe = (struct erdma_readreq_sqe *)entry;
+ if (unlikely(send_wr->num_sge != 1))
+ return -EINVAL;
+ hw_op = ERDMA_OP_READ;
+ if (op == IB_WR_RDMA_READ_WITH_INV) {
+ hw_op = ERDMA_OP_READ_WITH_INV;
+ read_sqe->invalid_stag =
+ cpu_to_le32(send_wr->ex.invalidate_rkey);
+ }
+
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
+ rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
+ read_sqe->length = cpu_to_le32(send_wr->sg_list[0].length);
+ read_sqe->sink_stag = cpu_to_le32(send_wr->sg_list[0].lkey);
+ read_sqe->sink_to_l =
+ cpu_to_le32(lower_32_bits(send_wr->sg_list[0].addr));
+ read_sqe->sink_to_h =
+ cpu_to_le32(upper_32_bits(send_wr->sg_list[0].addr));
+
+ sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
+ qp->attrs.sq_size, SQEBB_SHIFT);
+ sge->addr = cpu_to_le64(rdma_wr->remote_addr);
+ sge->key = cpu_to_le32(rdma_wr->rkey);
+ sge->length = cpu_to_le32(send_wr->sg_list[0].length);
+ wqe_size = sizeof(struct erdma_readreq_sqe) +
+ send_wr->num_sge * sizeof(struct ib_sge);
+
+ goto out;
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_SEND_WITH_INV:
+ if (qp->ibqp.qp_type == IB_QPT_RC) {
+ rc_send_sqe = (struct erdma_send_sqe_rc *)entry;
+ init_send_sqe_rc(qp, rc_send_sqe, send_wr, &hw_op);
+ length_field = &rc_send_sqe->length;
+ wqe_size = sizeof(struct erdma_send_sqe_rc);
+ } else {
+ ud_send_sqe = (struct erdma_send_sqe_ud *)entry;
+ init_send_sqe_ud(qp, ud_send_sqe, send_wr, &hw_op);
+ length_field = &ud_send_sqe->length;
+ wqe_size = sizeof(struct erdma_send_sqe_ud);
+ }
+
+ sgl_offset = wqe_size;
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
+ break;
+ case IB_WR_REG_MR:
+ wqe_hdr |=
+ FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, ERDMA_OP_REG_MR);
+ regmr_sge = (struct erdma_reg_mr_sqe *)entry;
+ mr = to_emr(reg_wr(send_wr)->mr);
+
+ mr->access = ERDMA_MR_ACC_LR |
+ to_erdma_access_flags(reg_wr(send_wr)->access);
+ regmr_sge->addr = cpu_to_le64(mr->ibmr.iova);
+ regmr_sge->length = cpu_to_le32(mr->ibmr.length);
+ regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key);
+ attrs = FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) |
+ FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK,
+ mr->mem.mtt_nents);
+
+ if (mr->mem.mtt_nents <= ERDMA_MAX_INLINE_MTT_ENTRIES) {
+ attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 0);
+ /* Copy SGLs to SQE content to accelerate */
+ memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
+ qp->attrs.sq_size, SQEBB_SHIFT),
+ mr->mem.mtt->buf, MTT_SIZE(mr->mem.mtt_nents));
+ wqe_size = sizeof(struct erdma_reg_mr_sqe) +
+ MTT_SIZE(mr->mem.mtt_nents);
+ } else {
+ attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 1);
+ wqe_size = sizeof(struct erdma_reg_mr_sqe);
+ }
+
+ regmr_sge->attrs = cpu_to_le32(attrs);
+ goto out;
+ case IB_WR_LOCAL_INV:
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
+ ERDMA_OP_LOCAL_INV);
+ regmr_sge = (struct erdma_reg_mr_sqe *)entry;
+ regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey);
+ wqe_size = sizeof(struct erdma_reg_mr_sqe);
+ goto out;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ atomic_sqe = (struct erdma_atomic_sqe *)entry;
+ if (op == IB_WR_ATOMIC_CMP_AND_SWP) {
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
+ ERDMA_OP_ATOMIC_CAS);
+ atomic_sqe->fetchadd_swap_data =
+ cpu_to_le64(atomic_wr(send_wr)->swap);
+ atomic_sqe->cmp_data =
+ cpu_to_le64(atomic_wr(send_wr)->compare_add);
+ } else {
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
+ ERDMA_OP_ATOMIC_FAA);
+ atomic_sqe->fetchadd_swap_data =
+ cpu_to_le64(atomic_wr(send_wr)->compare_add);
+ }
+
+ sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
+ qp->attrs.sq_size, SQEBB_SHIFT);
+ sge->addr = cpu_to_le64(atomic_wr(send_wr)->remote_addr);
+ sge->key = cpu_to_le32(atomic_wr(send_wr)->rkey);
+ sge++;
+
+ sge->addr = cpu_to_le64(send_wr->sg_list[0].addr);
+ sge->key = cpu_to_le32(send_wr->sg_list[0].lkey);
+ sge->length = cpu_to_le32(send_wr->sg_list[0].length);
+
+ wqe_size = sizeof(*atomic_sqe);
+ goto out;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (flags & IB_SEND_INLINE) {
+ ret = fill_inline_data(qp, send_wr, idx, sgl_offset,
+ length_field);
+ if (ret < 0)
+ return -EINVAL;
+ wqe_size += ret;
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, ret);
+ } else {
+ ret = fill_sgl(qp, send_wr, idx, sgl_offset, length_field);
+ if (ret)
+ return -EINVAL;
+ wqe_size += send_wr->num_sge * sizeof(struct ib_sge);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK,
+ send_wr->num_sge);
+ }
+
+out:
+ wqebb_cnt = SQEBB_COUNT(wqe_size);
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1);
+ *pi += wqebb_cnt;
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, *pi);
+
+ *entry = wqe_hdr;
+
+ return 0;
+}
+
+static void kick_sq_db(struct erdma_qp *qp, u16 pi)
+{
+ u64 db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp)) |
+ FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi);
+
+ *(u64 *)qp->kern_qp.sq_dbrec = db_data;
+ writeq(db_data, qp->kern_qp.hw_sq_db);
+}
+
+int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
+ const struct ib_send_wr **bad_send_wr)
+{
+ struct erdma_qp *qp = to_eqp(ibqp);
+ int ret = 0;
+ const struct ib_send_wr *wr = send_wr;
+ unsigned long flags;
+ u16 sq_pi;
+
+ if (!send_wr)
+ return -EINVAL;
+
+ spin_lock_irqsave(&qp->lock, flags);
+ sq_pi = qp->kern_qp.sq_pi;
+
+ while (wr) {
+ if ((u16)(sq_pi - qp->kern_qp.sq_ci) >= qp->attrs.sq_size) {
+ ret = -ENOMEM;
+ *bad_send_wr = send_wr;
+ break;
+ }
+
+ ret = erdma_push_one_sqe(qp, &sq_pi, wr);
+ if (ret) {
+ *bad_send_wr = wr;
+ break;
+ }
+ qp->kern_qp.sq_pi = sq_pi;
+ kick_sq_db(qp, sq_pi);
+
+ wr = wr->next;
+ }
+ spin_unlock_irqrestore(&qp->lock, flags);
+
+ if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
+ mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
+ usecs_to_jiffies(100));
+
+ return ret;
+}
+
+static int erdma_post_recv_one(struct erdma_qp *qp,
+ const struct ib_recv_wr *recv_wr)
+{
+ struct erdma_rqe *rqe =
+ get_queue_entry(qp->kern_qp.rq_buf, qp->kern_qp.rq_pi,
+ qp->attrs.rq_size, RQE_SHIFT);
+
+ rqe->qe_idx = cpu_to_le16(qp->kern_qp.rq_pi + 1);
+ rqe->qpn = cpu_to_le32(QP_ID(qp));
+
+ if (recv_wr->num_sge == 0) {
+ rqe->length = 0;
+ } else if (recv_wr->num_sge == 1) {
+ rqe->stag = cpu_to_le32(recv_wr->sg_list[0].lkey);
+ rqe->to = cpu_to_le64(recv_wr->sg_list[0].addr);
+ rqe->length = cpu_to_le32(recv_wr->sg_list[0].length);
+ } else {
+ return -EINVAL;
+ }
+
+ *(u64 *)qp->kern_qp.rq_dbrec = *(u64 *)rqe;
+ writeq(*(u64 *)rqe, qp->kern_qp.hw_rq_db);
+
+ qp->kern_qp.rwr_tbl[qp->kern_qp.rq_pi & (qp->attrs.rq_size - 1)] =
+ recv_wr->wr_id;
+ qp->kern_qp.rq_pi++;
+
+ return 0;
+}
+
+int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr)
+{
+ const struct ib_recv_wr *wr = recv_wr;
+ struct erdma_qp *qp = to_eqp(ibqp);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&qp->lock, flags);
+
+ while (wr) {
+ ret = erdma_post_recv_one(qp, wr);
+ if (ret) {
+ *bad_recv_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+
+ spin_unlock_irqrestore(&qp->lock, flags);
+
+ if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
+ mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
+ usecs_to_jiffies(100));
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
new file mode 100644
index 000000000000..109a3f3de911
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -0,0 +1,2300 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
+/* Copyright (c) 2008-2019, IBM Corporation */
+
+/* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */
+
+#include <linux/vmalloc.h>
+#include <net/addrconf.h>
+#include <rdma/erdma-abi.h>
+#include <rdma/ib_umem.h>
+#include <rdma/uverbs_ioctl.h>
+
+#include "erdma.h"
+#include "erdma_cm.h"
+#include "erdma_verbs.h"
+
+static void assemble_qbuf_mtt_for_cmd(struct erdma_mem *mem, u32 *cfg,
+ u64 *addr0, u64 *addr1)
+{
+ struct erdma_mtt *mtt = mem->mtt;
+
+ if (mem->mtt_nents > ERDMA_MAX_INLINE_MTT_ENTRIES) {
+ *addr0 = mtt->buf_dma;
+ *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_1LEVEL);
+ } else {
+ *addr0 = mtt->buf[0];
+ memcpy(addr1, mtt->buf + 1, MTT_SIZE(mem->mtt_nents - 1));
+ *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_0LEVEL);
+ }
+}
+
+static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
+{
+ struct erdma_dev *dev = to_edev(qp->ibqp.device);
+ struct erdma_pd *pd = to_epd(qp->ibqp.pd);
+ struct erdma_cmdq_create_qp_req req;
+ struct erdma_uqp *user_qp;
+ u64 resp0, resp1;
+ int err;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_CREATE_QP);
+
+ req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK,
+ ilog2(qp->attrs.sq_size)) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_QPN_MASK, QP_ID(qp));
+ req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK,
+ ilog2(qp->attrs.rq_size)) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn);
+
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK,
+ ERDMA_QPT_RC);
+ else
+ req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK,
+ ERDMA_QPT_UD);
+
+ if (rdma_is_kernel_res(&qp->ibqp.res)) {
+ u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT;
+
+ req.sq_cqn_mtt_cfg =
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
+ pgsz_range) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
+ req.rq_cqn_mtt_cfg =
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
+ pgsz_range) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
+
+ req.sq_mtt_cfg =
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_0LEVEL);
+ req.rq_mtt_cfg = req.sq_mtt_cfg;
+
+ req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr;
+ req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr;
+ req.sq_dbrec_dma = qp->kern_qp.sq_dbrec_dma;
+ req.rq_dbrec_dma = qp->kern_qp.rq_dbrec_dma;
+ } else {
+ user_qp = &qp->user_qp;
+ req.sq_cqn_mtt_cfg = FIELD_PREP(
+ ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
+ ilog2(user_qp->sq_mem.page_size) - ERDMA_HW_PAGE_SHIFT);
+ req.sq_cqn_mtt_cfg |=
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
+
+ req.rq_cqn_mtt_cfg = FIELD_PREP(
+ ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
+ ilog2(user_qp->rq_mem.page_size) - ERDMA_HW_PAGE_SHIFT);
+ req.rq_cqn_mtt_cfg |=
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
+
+ req.sq_mtt_cfg = user_qp->sq_mem.page_offset;
+ req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
+ user_qp->sq_mem.mtt_nents);
+
+ req.rq_mtt_cfg = user_qp->rq_mem.page_offset;
+ req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
+ user_qp->rq_mem.mtt_nents);
+
+ assemble_qbuf_mtt_for_cmd(&user_qp->sq_mem, &req.sq_mtt_cfg,
+ &req.sq_buf_addr, req.sq_mtt_entry);
+ assemble_qbuf_mtt_for_cmd(&user_qp->rq_mem, &req.rq_mtt_cfg,
+ &req.rq_buf_addr, req.rq_mtt_entry);
+
+ req.sq_dbrec_dma = user_qp->sq_dbrec_dma;
+ req.rq_dbrec_dma = user_qp->rq_dbrec_dma;
+
+ if (uctx->ext_db.enable) {
+ req.sq_cqn_mtt_cfg |=
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_DB_CFG_MASK, 1);
+ req.db_cfg =
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK,
+ uctx->ext_db.sdb_off) |
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_RQDB_CFG_MASK,
+ uctx->ext_db.rdb_off);
+ }
+ }
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, &resp1,
+ true);
+ if (!err && erdma_device_iwarp(dev))
+ qp->attrs.iwarp.cookie =
+ FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0);
+
+ return err;
+}
+
+static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
+{
+ struct erdma_pd *pd = to_epd(mr->ibmr.pd);
+ u32 mtt_level = ERDMA_MR_MTT_0LEVEL;
+ struct erdma_cmdq_reg_mr_req req;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR);
+
+ if (mr->type == ERDMA_MR_TYPE_FRMR ||
+ mr->mem.page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES) {
+ if (mr->mem.mtt->continuous) {
+ req.phy_addr[0] = mr->mem.mtt->buf_dma;
+ mtt_level = ERDMA_MR_MTT_1LEVEL;
+ } else {
+ req.phy_addr[0] = mr->mem.mtt->dma_addrs[0];
+ mtt_level = mr->mem.mtt->level;
+ }
+ } else if (mr->type != ERDMA_MR_TYPE_DMA) {
+ memcpy(req.phy_addr, mr->mem.mtt->buf,
+ MTT_SIZE(mr->mem.page_cnt));
+ }
+
+ req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) |
+ FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) |
+ FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
+ req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) |
+ FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) |
+ FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access);
+ req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
+ ilog2(mr->mem.page_size)) |
+ FIELD_PREP(ERDMA_CMD_REGMR_MTT_LEVEL_MASK, mtt_level) |
+ FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt);
+
+ if (mr->type == ERDMA_MR_TYPE_DMA)
+ goto post_cmd;
+
+ if (mr->type == ERDMA_MR_TYPE_NORMAL) {
+ req.start_va = mr->mem.va;
+ req.size = mr->mem.len;
+ }
+
+ if (!mr->mem.mtt->continuous && mr->mem.mtt->level > 1) {
+ req.cfg0 |= FIELD_PREP(ERDMA_CMD_MR_VERSION_MASK, 1);
+ req.cfg2 |= FIELD_PREP(ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK,
+ PAGE_SHIFT - ERDMA_HW_PAGE_SHIFT);
+ req.size_h = upper_32_bits(mr->mem.len);
+ req.mtt_cnt_h = mr->mem.page_cnt >> 20;
+ }
+
+post_cmd:
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+}
+
+static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
+{
+ struct erdma_dev *dev = to_edev(cq->ibcq.device);
+ struct erdma_cmdq_create_cq_req req;
+ struct erdma_mem *mem;
+ u32 page_size;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_CREATE_CQ);
+
+ req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_CQN_MASK, cq->cqn) |
+ FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK, ilog2(cq->depth));
+ req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK, cq->assoc_eqn);
+
+ if (rdma_is_kernel_res(&cq->ibcq.res)) {
+ page_size = SZ_32M;
+ req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
+ ilog2(page_size) - ERDMA_HW_PAGE_SHIFT);
+ req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr);
+ req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr);
+
+ req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_0LEVEL);
+
+ req.first_page_offset = 0;
+ req.cq_dbrec_dma = cq->kern_cq.dbrec_dma;
+ } else {
+ mem = &cq->user_cq.qbuf_mem;
+ req.cfg0 |=
+ FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
+ ilog2(mem->page_size) - ERDMA_HW_PAGE_SHIFT);
+ if (mem->mtt_nents == 1) {
+ req.qbuf_addr_l = lower_32_bits(mem->mtt->buf[0]);
+ req.qbuf_addr_h = upper_32_bits(mem->mtt->buf[0]);
+ req.cfg1 |=
+ FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_0LEVEL);
+ } else {
+ req.qbuf_addr_l = lower_32_bits(mem->mtt->buf_dma);
+ req.qbuf_addr_h = upper_32_bits(mem->mtt->buf_dma);
+ req.cfg1 |=
+ FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_1LEVEL);
+ }
+ req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK,
+ mem->mtt_nents);
+
+ req.first_page_offset = mem->page_offset;
+ req.cq_dbrec_dma = cq->user_cq.dbrec_dma;
+
+ if (uctx->ext_db.enable) {
+ req.cfg1 |= FIELD_PREP(
+ ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK, 1);
+ req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_DB_CFG_MASK,
+ uctx->ext_db.cdb_off);
+ }
+ }
+
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+}
+
+static int erdma_alloc_idx(struct erdma_resource_cb *res_cb)
+{
+ int idx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&res_cb->lock, flags);
+ idx = find_next_zero_bit(res_cb->bitmap, res_cb->max_cap,
+ res_cb->next_alloc_idx);
+ if (idx == res_cb->max_cap) {
+ idx = find_first_zero_bit(res_cb->bitmap, res_cb->max_cap);
+ if (idx == res_cb->max_cap) {
+ res_cb->next_alloc_idx = 1;
+ spin_unlock_irqrestore(&res_cb->lock, flags);
+ return -ENOSPC;
+ }
+ }
+
+ set_bit(idx, res_cb->bitmap);
+ res_cb->next_alloc_idx = idx + 1;
+ spin_unlock_irqrestore(&res_cb->lock, flags);
+
+ return idx;
+}
+
+static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx)
+{
+ unsigned long flags;
+ u32 used;
+
+ spin_lock_irqsave(&res_cb->lock, flags);
+ used = __test_and_clear_bit(idx, res_cb->bitmap);
+ spin_unlock_irqrestore(&res_cb->lock, flags);
+ WARN_ON(!used);
+}
+
+static struct rdma_user_mmap_entry *
+erdma_user_mmap_entry_insert(struct erdma_ucontext *uctx, void *address,
+ u32 size, u8 mmap_flag, u64 *mmap_offset)
+{
+ struct erdma_user_mmap_entry *entry =
+ kzalloc(sizeof(*entry), GFP_KERNEL);
+ int ret;
+
+ if (!entry)
+ return NULL;
+
+ entry->address = (u64)address;
+ entry->mmap_flag = mmap_flag;
+
+ size = PAGE_ALIGN(size);
+
+ ret = rdma_user_mmap_entry_insert(&uctx->ibucontext, &entry->rdma_entry,
+ size);
+ if (ret) {
+ kfree(entry);
+ return NULL;
+ }
+
+ *mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+ return &entry->rdma_entry;
+}
+
+int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
+ struct ib_udata *unused)
+{
+ struct erdma_dev *dev = to_edev(ibdev);
+
+ memset(attr, 0, sizeof(*attr));
+
+ attr->max_mr_size = dev->attrs.max_mr_size;
+ attr->vendor_id = PCI_VENDOR_ID_ALIBABA;
+ attr->vendor_part_id = dev->pdev->device;
+ attr->hw_ver = dev->pdev->revision;
+ attr->max_qp = dev->attrs.max_qp - 1;
+ attr->max_qp_wr = min(dev->attrs.max_send_wr, dev->attrs.max_recv_wr);
+ attr->max_qp_rd_atom = dev->attrs.max_ord;
+ attr->max_qp_init_rd_atom = dev->attrs.max_ird;
+ attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird;
+ attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
+ attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
+ ibdev->local_dma_lkey = dev->attrs.local_dma_key;
+ attr->max_send_sge = dev->attrs.max_send_sge;
+ attr->max_recv_sge = dev->attrs.max_recv_sge;
+ attr->max_sge_rd = dev->attrs.max_sge_rd;
+ attr->max_cq = dev->attrs.max_cq - 1;
+ attr->max_cqe = dev->attrs.max_cqe;
+ attr->max_mr = dev->attrs.max_mr;
+ attr->max_pd = dev->attrs.max_pd;
+ attr->max_mw = dev->attrs.max_mw;
+ attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
+ attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
+
+ if (erdma_device_rocev2(dev)) {
+ attr->max_pkeys = ERDMA_MAX_PKEYS;
+ attr->max_ah = dev->attrs.max_ah;
+ }
+
+ if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC)
+ attr->atomic_cap = IB_ATOMIC_GLOB;
+
+ attr->fw_ver = dev->attrs.fw_version;
+
+ if (dev->netdev)
+ addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
+ dev->netdev->dev_addr);
+
+ return 0;
+}
+
+int erdma_query_gid(struct ib_device *ibdev, u32 port, int idx,
+ union ib_gid *gid)
+{
+ struct erdma_dev *dev = to_edev(ibdev);
+
+ memset(gid, 0, sizeof(*gid));
+ ether_addr_copy(gid->raw, dev->attrs.peer_addr);
+
+ return 0;
+}
+
+int erdma_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *attr)
+{
+ struct erdma_dev *dev = to_edev(ibdev);
+ struct net_device *ndev = dev->netdev;
+
+ memset(attr, 0, sizeof(*attr));
+
+ if (erdma_device_iwarp(dev)) {
+ attr->gid_tbl_len = 1;
+ } else {
+ attr->gid_tbl_len = dev->attrs.max_gid;
+ attr->ip_gids = true;
+ attr->pkey_tbl_len = ERDMA_MAX_PKEYS;
+ }
+
+ attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
+ attr->max_msg_sz = -1;
+
+ if (!ndev)
+ goto out;
+
+ ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width);
+ attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu);
+ attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
+ attr->state = ib_get_curr_port_state(ndev);
+
+out:
+ if (attr->state == IB_PORT_ACTIVE)
+ attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ else
+ attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+
+ return 0;
+}
+
+int erdma_get_port_immutable(struct ib_device *ibdev, u32 port,
+ struct ib_port_immutable *port_immutable)
+{
+ struct erdma_dev *dev = to_edev(ibdev);
+
+ if (erdma_device_iwarp(dev)) {
+ port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+ port_immutable->gid_tbl_len = 1;
+ } else {
+ port_immutable->core_cap_flags =
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ port_immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ port_immutable->gid_tbl_len = dev->attrs.max_gid;
+ port_immutable->pkey_tbl_len = ERDMA_MAX_PKEYS;
+ }
+
+ return 0;
+}
+
+int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct erdma_pd *pd = to_epd(ibpd);
+ struct erdma_dev *dev = to_edev(ibpd->device);
+ int pdn;
+
+ pdn = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_PD]);
+ if (pdn < 0)
+ return pdn;
+
+ pd->pdn = pdn;
+
+ return 0;
+}
+
+int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct erdma_pd *pd = to_epd(ibpd);
+ struct erdma_dev *dev = to_edev(ibpd->device);
+
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn);
+
+ return 0;
+}
+
+static void erdma_flush_worker(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct erdma_qp *qp =
+ container_of(dwork, struct erdma_qp, reflush_dwork);
+ struct erdma_cmdq_reflush_req req;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_REFLUSH);
+ req.qpn = QP_ID(qp);
+ req.sq_pi = qp->kern_qp.sq_pi;
+ req.rq_pi = qp->kern_qp.rq_pi;
+ erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+}
+
+static int erdma_qp_validate_cap(struct erdma_dev *dev,
+ struct ib_qp_init_attr *attrs)
+{
+ if ((attrs->cap.max_send_wr > dev->attrs.max_send_wr) ||
+ (attrs->cap.max_recv_wr > dev->attrs.max_recv_wr) ||
+ (attrs->cap.max_send_sge > dev->attrs.max_send_sge) ||
+ (attrs->cap.max_recv_sge > dev->attrs.max_recv_sge) ||
+ (attrs->cap.max_inline_data > ERDMA_MAX_INLINE) ||
+ !attrs->cap.max_send_wr || !attrs->cap.max_recv_wr) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int erdma_qp_validate_attr(struct erdma_dev *dev,
+ struct ib_qp_init_attr *attrs)
+{
+ if (erdma_device_iwarp(dev) && attrs->qp_type != IB_QPT_RC)
+ return -EOPNOTSUPP;
+
+ if (erdma_device_rocev2(dev) && attrs->qp_type != IB_QPT_RC &&
+ attrs->qp_type != IB_QPT_UD && attrs->qp_type != IB_QPT_GSI)
+ return -EOPNOTSUPP;
+
+ if (attrs->srq)
+ return -EOPNOTSUPP;
+
+ if (!attrs->send_cq || !attrs->recv_cq)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static void free_kernel_qp(struct erdma_qp *qp)
+{
+ struct erdma_dev *dev = qp->dev;
+
+ vfree(qp->kern_qp.swr_tbl);
+ vfree(qp->kern_qp.rwr_tbl);
+
+ if (qp->kern_qp.sq_buf)
+ dma_free_coherent(&dev->pdev->dev,
+ qp->attrs.sq_size << SQEBB_SHIFT,
+ qp->kern_qp.sq_buf,
+ qp->kern_qp.sq_buf_dma_addr);
+
+ if (qp->kern_qp.sq_dbrec)
+ dma_pool_free(dev->db_pool, qp->kern_qp.sq_dbrec,
+ qp->kern_qp.sq_dbrec_dma);
+
+ if (qp->kern_qp.rq_buf)
+ dma_free_coherent(&dev->pdev->dev,
+ qp->attrs.rq_size << RQE_SHIFT,
+ qp->kern_qp.rq_buf,
+ qp->kern_qp.rq_buf_dma_addr);
+
+ if (qp->kern_qp.rq_dbrec)
+ dma_pool_free(dev->db_pool, qp->kern_qp.rq_dbrec,
+ qp->kern_qp.rq_dbrec_dma);
+}
+
+static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp,
+ struct ib_qp_init_attr *attrs)
+{
+ struct erdma_kqp *kqp = &qp->kern_qp;
+ int size;
+
+ if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR)
+ kqp->sig_all = 1;
+
+ kqp->sq_pi = 0;
+ kqp->sq_ci = 0;
+ kqp->rq_pi = 0;
+ kqp->rq_ci = 0;
+ kqp->hw_sq_db =
+ dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT);
+ kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET;
+
+ kqp->swr_tbl = vmalloc_array(qp->attrs.sq_size, sizeof(u64));
+ kqp->rwr_tbl = vmalloc_array(qp->attrs.rq_size, sizeof(u64));
+ if (!kqp->swr_tbl || !kqp->rwr_tbl)
+ goto err_out;
+
+ size = qp->attrs.sq_size << SQEBB_SHIFT;
+ kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
+ &kqp->sq_buf_dma_addr, GFP_KERNEL);
+ if (!kqp->sq_buf)
+ goto err_out;
+
+ kqp->sq_dbrec =
+ dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &kqp->sq_dbrec_dma);
+ if (!kqp->sq_dbrec)
+ goto err_out;
+
+ size = qp->attrs.rq_size << RQE_SHIFT;
+ kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
+ &kqp->rq_buf_dma_addr, GFP_KERNEL);
+ if (!kqp->rq_buf)
+ goto err_out;
+
+ kqp->rq_dbrec =
+ dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &kqp->rq_dbrec_dma);
+ if (!kqp->rq_dbrec)
+ goto err_out;
+
+ return 0;
+
+err_out:
+ free_kernel_qp(qp);
+ return -ENOMEM;
+}
+
+static void erdma_fill_bottom_mtt(struct erdma_dev *dev, struct erdma_mem *mem)
+{
+ struct erdma_mtt *mtt = mem->mtt;
+ struct ib_block_iter biter;
+ u32 idx = 0;
+
+ while (mtt->low_level)
+ mtt = mtt->low_level;
+
+ rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size)
+ mtt->buf[idx++] = rdma_block_iter_dma_address(&biter);
+}
+
+static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev,
+ size_t size)
+{
+ struct erdma_mtt *mtt;
+
+ mtt = kzalloc(sizeof(*mtt), GFP_KERNEL);
+ if (!mtt)
+ return ERR_PTR(-ENOMEM);
+
+ mtt->size = size;
+ mtt->buf = kzalloc(mtt->size, GFP_KERNEL);
+ if (!mtt->buf)
+ goto err_free_mtt;
+
+ mtt->continuous = true;
+ mtt->buf_dma = dma_map_single(&dev->pdev->dev, mtt->buf, mtt->size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dev->pdev->dev, mtt->buf_dma))
+ goto err_free_mtt_buf;
+
+ return mtt;
+
+err_free_mtt_buf:
+ kfree(mtt->buf);
+
+err_free_mtt:
+ kfree(mtt);
+
+ return ERR_PTR(-ENOMEM);
+}
+
+static void erdma_unmap_page_list(struct erdma_dev *dev, dma_addr_t *pg_dma,
+ u32 npages)
+{
+ u32 i;
+
+ for (i = 0; i < npages; i++)
+ dma_unmap_page(&dev->pdev->dev, pg_dma[i], PAGE_SIZE,
+ DMA_TO_DEVICE);
+}
+
+static void erdma_destroy_mtt_buf_dma_addrs(struct erdma_dev *dev,
+ struct erdma_mtt *mtt)
+{
+ erdma_unmap_page_list(dev, mtt->dma_addrs, mtt->npages);
+ vfree(mtt->dma_addrs);
+}
+
+static void erdma_destroy_scatter_mtt(struct erdma_dev *dev,
+ struct erdma_mtt *mtt)
+{
+ erdma_destroy_mtt_buf_dma_addrs(dev, mtt);
+ vfree(mtt->buf);
+ kfree(mtt);
+}
+
+static void erdma_init_middle_mtt(struct erdma_mtt *mtt,
+ struct erdma_mtt *low_mtt)
+{
+ dma_addr_t *pg_addr = mtt->buf;
+ u32 i;
+
+ for (i = 0; i < low_mtt->npages; i++)
+ pg_addr[i] = low_mtt->dma_addrs[i];
+}
+
+static u32 vmalloc_to_dma_addrs(struct erdma_dev *dev, dma_addr_t **dma_addrs,
+ void *buf, u64 len)
+{
+ dma_addr_t *pg_dma;
+ struct page *pg;
+ u32 npages, i;
+ void *addr;
+
+ npages = (PAGE_ALIGN((u64)buf + len) - PAGE_ALIGN_DOWN((u64)buf)) >>
+ PAGE_SHIFT;
+ pg_dma = vcalloc(npages, sizeof(*pg_dma));
+ if (!pg_dma)
+ return 0;
+
+ addr = buf;
+ for (i = 0; i < npages; i++) {
+ pg = vmalloc_to_page(addr);
+ if (!pg)
+ goto err;
+
+ pg_dma[i] = dma_map_page(&dev->pdev->dev, pg, 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dev->pdev->dev, pg_dma[i]))
+ goto err;
+
+ addr += PAGE_SIZE;
+ }
+
+ *dma_addrs = pg_dma;
+
+ return npages;
+err:
+ erdma_unmap_page_list(dev, pg_dma, i);
+ vfree(pg_dma);
+
+ return 0;
+}
+
+static int erdma_create_mtt_buf_dma_addrs(struct erdma_dev *dev,
+ struct erdma_mtt *mtt)
+{
+ dma_addr_t *addrs;
+ u32 npages;
+
+ /* Failed if buf is not page aligned */
+ if ((uintptr_t)mtt->buf & ~PAGE_MASK)
+ return -EINVAL;
+
+ npages = vmalloc_to_dma_addrs(dev, &addrs, mtt->buf, mtt->size);
+ if (!npages)
+ return -ENOMEM;
+
+ mtt->dma_addrs = addrs;
+ mtt->npages = npages;
+
+ return 0;
+}
+
+static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev,
+ size_t size)
+{
+ struct erdma_mtt *mtt;
+ int ret = -ENOMEM;
+
+ mtt = kzalloc(sizeof(*mtt), GFP_KERNEL);
+ if (!mtt)
+ return ERR_PTR(-ENOMEM);
+
+ mtt->size = ALIGN(size, PAGE_SIZE);
+ mtt->buf = vzalloc(mtt->size);
+ mtt->continuous = false;
+ if (!mtt->buf)
+ goto err_free_mtt;
+
+ ret = erdma_create_mtt_buf_dma_addrs(dev, mtt);
+ if (ret)
+ goto err_free_mtt_buf;
+
+ ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, npages:%u\n",
+ mtt->size, mtt->npages);
+
+ return mtt;
+
+err_free_mtt_buf:
+ vfree(mtt->buf);
+
+err_free_mtt:
+ kfree(mtt);
+
+ return ERR_PTR(ret);
+}
+
+static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size,
+ bool force_continuous)
+{
+ struct erdma_mtt *mtt, *tmp_mtt;
+ int ret, level = 0;
+
+ ibdev_dbg(&dev->ibdev, "create_mtt, size:%lu, force cont:%d\n", size,
+ force_continuous);
+
+ if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_MTT_VA))
+ force_continuous = true;
+
+ if (force_continuous)
+ return erdma_create_cont_mtt(dev, size);
+
+ mtt = erdma_create_scatter_mtt(dev, size);
+ if (IS_ERR(mtt))
+ return mtt;
+ level = 1;
+
+ /* convergence the mtt table. */
+ while (mtt->npages != 1 && level <= 3) {
+ tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->npages));
+ if (IS_ERR(tmp_mtt)) {
+ ret = PTR_ERR(tmp_mtt);
+ goto err_free_mtt;
+ }
+ erdma_init_middle_mtt(tmp_mtt, mtt);
+ tmp_mtt->low_level = mtt;
+ mtt = tmp_mtt;
+ level++;
+ }
+
+ if (level > 3) {
+ ret = -ENOMEM;
+ goto err_free_mtt;
+ }
+
+ mtt->level = level;
+ ibdev_dbg(&dev->ibdev, "top mtt: level:%d, dma_addr 0x%llx\n",
+ mtt->level, mtt->dma_addrs[0]);
+
+ return mtt;
+err_free_mtt:
+ while (mtt) {
+ tmp_mtt = mtt->low_level;
+ erdma_destroy_scatter_mtt(dev, mtt);
+ mtt = tmp_mtt;
+ }
+
+ return ERR_PTR(ret);
+}
+
+static void erdma_destroy_mtt(struct erdma_dev *dev, struct erdma_mtt *mtt)
+{
+ struct erdma_mtt *tmp_mtt;
+
+ if (mtt->continuous) {
+ dma_unmap_single(&dev->pdev->dev, mtt->buf_dma, mtt->size,
+ DMA_TO_DEVICE);
+ kfree(mtt->buf);
+ kfree(mtt);
+ } else {
+ while (mtt) {
+ tmp_mtt = mtt->low_level;
+ erdma_destroy_scatter_mtt(dev, mtt);
+ mtt = tmp_mtt;
+ }
+ }
+}
+
+static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
+ u64 start, u64 len, int access, u64 virt,
+ unsigned long req_page_size, bool force_continuous)
+{
+ int ret = 0;
+
+ mem->umem = ib_umem_get(&dev->ibdev, start, len, access);
+ if (IS_ERR(mem->umem)) {
+ ret = PTR_ERR(mem->umem);
+ mem->umem = NULL;
+ return ret;
+ }
+
+ mem->va = virt;
+ mem->len = len;
+ mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt);
+ mem->page_offset = start & (mem->page_size - 1);
+ mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size);
+ mem->page_cnt = mem->mtt_nents;
+ mem->mtt = erdma_create_mtt(dev, MTT_SIZE(mem->page_cnt),
+ force_continuous);
+ if (IS_ERR(mem->mtt)) {
+ ret = PTR_ERR(mem->mtt);
+ goto error_ret;
+ }
+
+ erdma_fill_bottom_mtt(dev, mem);
+
+ return 0;
+
+error_ret:
+ if (mem->umem) {
+ ib_umem_release(mem->umem);
+ mem->umem = NULL;
+ }
+
+ return ret;
+}
+
+static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem)
+{
+ if (mem->mtt)
+ erdma_destroy_mtt(dev, mem->mtt);
+
+ if (mem->umem) {
+ ib_umem_release(mem->umem);
+ mem->umem = NULL;
+ }
+}
+
+static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx,
+ u64 dbrecords_va,
+ struct erdma_user_dbrecords_page **dbr_page,
+ dma_addr_t *dma_addr)
+{
+ struct erdma_user_dbrecords_page *page = NULL;
+ int rv = 0;
+
+ mutex_lock(&ctx->dbrecords_page_mutex);
+
+ list_for_each_entry(page, &ctx->dbrecords_page_list, list)
+ if (page->va == (dbrecords_va & PAGE_MASK))
+ goto found;
+
+ page = kmalloc(sizeof(*page), GFP_KERNEL);
+ if (!page) {
+ rv = -ENOMEM;
+ goto out;
+ }
+
+ page->va = (dbrecords_va & PAGE_MASK);
+ page->refcnt = 0;
+
+ page->umem = ib_umem_get(ctx->ibucontext.device,
+ dbrecords_va & PAGE_MASK, PAGE_SIZE, 0);
+ if (IS_ERR(page->umem)) {
+ rv = PTR_ERR(page->umem);
+ kfree(page);
+ goto out;
+ }
+
+ list_add(&page->list, &ctx->dbrecords_page_list);
+
+found:
+ *dma_addr = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
+ (dbrecords_va & ~PAGE_MASK);
+ *dbr_page = page;
+ page->refcnt++;
+
+out:
+ mutex_unlock(&ctx->dbrecords_page_mutex);
+ return rv;
+}
+
+static void
+erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx,
+ struct erdma_user_dbrecords_page **dbr_page)
+{
+ if (!ctx || !(*dbr_page))
+ return;
+
+ mutex_lock(&ctx->dbrecords_page_mutex);
+ if (--(*dbr_page)->refcnt == 0) {
+ list_del(&(*dbr_page)->list);
+ ib_umem_release((*dbr_page)->umem);
+ kfree(*dbr_page);
+ }
+
+ *dbr_page = NULL;
+ mutex_unlock(&ctx->dbrecords_page_mutex);
+}
+
+static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx,
+ u64 va, u32 len, u64 dbrec_va)
+{
+ dma_addr_t dbrec_dma;
+ u32 rq_offset;
+ int ret;
+
+ if (len < (ALIGN(qp->attrs.sq_size * SQEBB_SIZE, ERDMA_HW_PAGE_SIZE) +
+ qp->attrs.rq_size * RQE_SIZE))
+ return -EINVAL;
+
+ ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mem, va,
+ qp->attrs.sq_size << SQEBB_SHIFT, 0, va,
+ (SZ_1M - SZ_4K), true);
+ if (ret)
+ return ret;
+
+ rq_offset = ALIGN(qp->attrs.sq_size << SQEBB_SHIFT, ERDMA_HW_PAGE_SIZE);
+ qp->user_qp.rq_offset = rq_offset;
+
+ ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mem, va + rq_offset,
+ qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset,
+ (SZ_1M - SZ_4K), true);
+ if (ret)
+ goto put_sq_mtt;
+
+ ret = erdma_map_user_dbrecords(uctx, dbrec_va,
+ &qp->user_qp.user_dbr_page,
+ &dbrec_dma);
+ if (ret)
+ goto put_rq_mtt;
+
+ qp->user_qp.sq_dbrec_dma = dbrec_dma;
+ qp->user_qp.rq_dbrec_dma = dbrec_dma + ERDMA_DB_SIZE;
+
+ return 0;
+
+put_rq_mtt:
+ put_mtt_entries(qp->dev, &qp->user_qp.rq_mem);
+
+put_sq_mtt:
+ put_mtt_entries(qp->dev, &qp->user_qp.sq_mem);
+
+ return ret;
+}
+
+static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx)
+{
+ put_mtt_entries(qp->dev, &qp->user_qp.sq_mem);
+ put_mtt_entries(qp->dev, &qp->user_qp.rq_mem);
+ erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page);
+}
+
+int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
+{
+ struct erdma_qp *qp = to_eqp(ibqp);
+ struct erdma_dev *dev = to_edev(ibqp->device);
+ struct erdma_ucontext *uctx = rdma_udata_to_drv_context(
+ udata, struct erdma_ucontext, ibucontext);
+ struct erdma_ureq_create_qp ureq;
+ struct erdma_uresp_create_qp uresp;
+ void *old_entry;
+ int ret = 0;
+
+ ret = erdma_qp_validate_cap(dev, attrs);
+ if (ret)
+ goto err_out;
+
+ ret = erdma_qp_validate_attr(dev, attrs);
+ if (ret)
+ goto err_out;
+
+ qp->scq = to_ecq(attrs->send_cq);
+ qp->rcq = to_ecq(attrs->recv_cq);
+ qp->dev = dev;
+ qp->attrs.cc = dev->attrs.cc;
+
+ init_rwsem(&qp->state_lock);
+ kref_init(&qp->ref);
+ init_completion(&qp->safe_free);
+
+ if (qp->ibqp.qp_type == IB_QPT_GSI) {
+ old_entry = xa_store(&dev->qp_xa, 1, qp, GFP_KERNEL);
+ if (xa_is_err(old_entry))
+ ret = xa_err(old_entry);
+ else
+ qp->ibqp.qp_num = 1;
+ } else {
+ ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
+ XA_LIMIT(1, dev->attrs.max_qp - 1),
+ &dev->next_alloc_qpn, GFP_KERNEL);
+ }
+
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ qp->attrs.sq_size = roundup_pow_of_two(attrs->cap.max_send_wr *
+ ERDMA_MAX_WQEBB_PER_SQE);
+ qp->attrs.rq_size = roundup_pow_of_two(attrs->cap.max_recv_wr);
+
+ if (uctx) {
+ ret = ib_copy_from_udata(&ureq, udata,
+ min(sizeof(ureq), udata->inlen));
+ if (ret)
+ goto err_out_xa;
+
+ ret = init_user_qp(qp, uctx, ureq.qbuf_va, ureq.qbuf_len,
+ ureq.db_record_va);
+ if (ret)
+ goto err_out_xa;
+
+ memset(&uresp, 0, sizeof(uresp));
+
+ uresp.num_sqe = qp->attrs.sq_size;
+ uresp.num_rqe = qp->attrs.rq_size;
+ uresp.qp_id = QP_ID(qp);
+ uresp.rq_offset = qp->user_qp.rq_offset;
+
+ ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (ret)
+ goto err_out_cmd;
+ } else {
+ ret = init_kernel_qp(dev, qp, attrs);
+ if (ret)
+ goto err_out_xa;
+ }
+
+ qp->attrs.max_send_sge = attrs->cap.max_send_sge;
+ qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
+
+ if (erdma_device_iwarp(qp->dev))
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
+ else
+ qp->attrs.rocev2.state = ERDMA_QPS_ROCEV2_RESET;
+
+ INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
+
+ ret = create_qp_cmd(uctx, qp);
+ if (ret)
+ goto err_out_cmd;
+
+ spin_lock_init(&qp->lock);
+
+ return 0;
+
+err_out_cmd:
+ if (uctx)
+ free_user_qp(qp, uctx);
+ else
+ free_kernel_qp(qp);
+err_out_xa:
+ xa_erase(&dev->qp_xa, QP_ID(qp));
+err_out:
+ return ret;
+}
+
+static int erdma_create_stag(struct erdma_dev *dev, u32 *stag)
+{
+ int stag_idx;
+
+ stag_idx = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX]);
+ if (stag_idx < 0)
+ return stag_idx;
+
+ /* For now, we always let key field be zero. */
+ *stag = (stag_idx << 8);
+
+ return 0;
+}
+
+struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc)
+{
+ struct erdma_dev *dev = to_edev(ibpd->device);
+ struct erdma_mr *mr;
+ u32 stag;
+ int ret;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ ret = erdma_create_stag(dev, &stag);
+ if (ret)
+ goto out_free;
+
+ mr->type = ERDMA_MR_TYPE_DMA;
+
+ mr->ibmr.lkey = stag;
+ mr->ibmr.rkey = stag;
+ mr->ibmr.pd = ibpd;
+ mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(acc);
+ ret = regmr_cmd(dev, mr);
+ if (ret)
+ goto out_remove_stag;
+
+ return &mr->ibmr;
+
+out_remove_stag:
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
+ mr->ibmr.lkey >> 8);
+
+out_free:
+ kfree(mr);
+
+ return ERR_PTR(ret);
+}
+
+struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ struct erdma_mr *mr;
+ struct erdma_dev *dev = to_edev(ibpd->device);
+ int ret;
+ u32 stag;
+
+ if (mr_type != IB_MR_TYPE_MEM_REG)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (max_num_sg > ERDMA_MR_MAX_MTT_CNT)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ ret = erdma_create_stag(dev, &stag);
+ if (ret)
+ goto out_free;
+
+ mr->type = ERDMA_MR_TYPE_FRMR;
+
+ mr->ibmr.lkey = stag;
+ mr->ibmr.rkey = stag;
+ mr->ibmr.pd = ibpd;
+ /* update it in FRMR. */
+ mr->access = ERDMA_MR_ACC_LR | ERDMA_MR_ACC_LW | ERDMA_MR_ACC_RR |
+ ERDMA_MR_ACC_RW;
+
+ mr->mem.page_size = PAGE_SIZE; /* update it later. */
+ mr->mem.page_cnt = max_num_sg;
+ mr->mem.mtt = erdma_create_mtt(dev, MTT_SIZE(max_num_sg), true);
+ if (IS_ERR(mr->mem.mtt)) {
+ ret = PTR_ERR(mr->mem.mtt);
+ goto out_remove_stag;
+ }
+
+ ret = regmr_cmd(dev, mr);
+ if (ret)
+ goto out_destroy_mtt;
+
+ return &mr->ibmr;
+
+out_destroy_mtt:
+ erdma_destroy_mtt(dev, mr->mem.mtt);
+
+out_remove_stag:
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
+ mr->ibmr.lkey >> 8);
+
+out_free:
+ kfree(mr);
+
+ return ERR_PTR(ret);
+}
+
+static int erdma_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct erdma_mr *mr = to_emr(ibmr);
+
+ if (mr->mem.mtt_nents >= mr->mem.page_cnt)
+ return -1;
+
+ mr->mem.mtt->buf[mr->mem.mtt_nents] = addr;
+ mr->mem.mtt_nents++;
+
+ return 0;
+}
+
+int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct erdma_mr *mr = to_emr(ibmr);
+ int num;
+
+ mr->mem.mtt_nents = 0;
+
+ num = ib_sg_to_pages(&mr->ibmr, sg, sg_nents, sg_offset,
+ erdma_set_page);
+
+ return num;
+}
+
+struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
+ u64 virt, int access, struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+ struct erdma_mr *mr = NULL;
+ struct erdma_dev *dev = to_edev(ibpd->device);
+ u32 stag;
+ int ret;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!len || len > dev->attrs.max_mr_size)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt,
+ SZ_2G - SZ_4K, false);
+ if (ret)
+ goto err_out_free;
+
+ ret = erdma_create_stag(dev, &stag);
+ if (ret)
+ goto err_out_put_mtt;
+
+ mr->ibmr.lkey = mr->ibmr.rkey = stag;
+ mr->ibmr.pd = ibpd;
+ mr->mem.va = virt;
+ mr->mem.len = len;
+ mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(access);
+ mr->valid = 1;
+ mr->type = ERDMA_MR_TYPE_NORMAL;
+
+ ret = regmr_cmd(dev, mr);
+ if (ret)
+ goto err_out_mr;
+
+ return &mr->ibmr;
+
+err_out_mr:
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
+ mr->ibmr.lkey >> 8);
+
+err_out_put_mtt:
+ put_mtt_entries(dev, &mr->mem);
+
+err_out_free:
+ kfree(mr);
+
+ return ERR_PTR(ret);
+}
+
+int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct erdma_mr *mr;
+ struct erdma_dev *dev = to_edev(ibmr->device);
+ struct erdma_cmdq_dereg_mr_req req;
+ int ret;
+
+ mr = to_emr(ibmr);
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_DEREG_MR);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) |
+ FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF);
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (ret)
+ return ret;
+
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], ibmr->lkey >> 8);
+
+ put_mtt_entries(dev, &mr->mem);
+
+ kfree(mr);
+ return 0;
+}
+
+int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+{
+ struct erdma_cq *cq = to_ecq(ibcq);
+ struct erdma_dev *dev = to_edev(ibcq->device);
+ struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
+ udata, struct erdma_ucontext, ibucontext);
+ int err;
+ struct erdma_cmdq_destroy_cq_req req;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_DESTROY_CQ);
+ req.cqn = cq->cqn;
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (err)
+ return err;
+
+ if (rdma_is_kernel_res(&cq->ibcq.res)) {
+ dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
+ cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
+ dma_pool_free(dev->db_pool, cq->kern_cq.dbrec,
+ cq->kern_cq.dbrec_dma);
+ } else {
+ erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
+ put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
+ }
+
+ xa_erase(&dev->cq_xa, cq->cqn);
+
+ return 0;
+}
+
+int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct erdma_qp *qp = to_eqp(ibqp);
+ struct erdma_dev *dev = to_edev(ibqp->device);
+ struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
+ udata, struct erdma_ucontext, ibucontext);
+ struct erdma_cmdq_destroy_qp_req req;
+ union erdma_mod_qp_params params;
+ int err;
+
+ down_write(&qp->state_lock);
+ if (erdma_device_iwarp(dev)) {
+ params.iwarp.state = ERDMA_QPS_IWARP_ERROR;
+ erdma_modify_qp_state_iwarp(qp, &params.iwarp,
+ ERDMA_QPA_IWARP_STATE);
+ } else {
+ params.rocev2.state = ERDMA_QPS_ROCEV2_ERROR;
+ erdma_modify_qp_state_rocev2(qp, &params.rocev2,
+ ERDMA_QPA_ROCEV2_STATE);
+ }
+ up_write(&qp->state_lock);
+
+ cancel_delayed_work_sync(&qp->reflush_dwork);
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_DESTROY_QP);
+ req.qpn = QP_ID(qp);
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (err)
+ return err;
+
+ erdma_qp_put(qp);
+ wait_for_completion(&qp->safe_free);
+
+ if (rdma_is_kernel_res(&qp->ibqp.res)) {
+ free_kernel_qp(qp);
+ } else {
+ put_mtt_entries(dev, &qp->user_qp.sq_mem);
+ put_mtt_entries(dev, &qp->user_qp.rq_mem);
+ erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page);
+ }
+
+ if (qp->cep)
+ erdma_cep_put(qp->cep);
+ xa_erase(&dev->qp_xa, QP_ID(qp));
+
+ return 0;
+}
+
+void erdma_qp_get_ref(struct ib_qp *ibqp)
+{
+ erdma_qp_get(to_eqp(ibqp));
+}
+
+void erdma_qp_put_ref(struct ib_qp *ibqp)
+{
+ erdma_qp_put(to_eqp(ibqp));
+}
+
+int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
+{
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct erdma_user_mmap_entry *entry;
+ pgprot_t prot;
+ int err;
+
+ rdma_entry = rdma_user_mmap_entry_get(ctx, vma);
+ if (!rdma_entry)
+ return -EINVAL;
+
+ entry = to_emmap(rdma_entry);
+
+ switch (entry->mmap_flag) {
+ case ERDMA_MMAP_IO_NC:
+ /* map doorbell. */
+ prot = pgprot_device(vma->vm_page_prot);
+ break;
+ default:
+ err = -EINVAL;
+ goto put_entry;
+ }
+
+ err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), PAGE_SIZE,
+ prot, rdma_entry);
+
+put_entry:
+ rdma_user_mmap_entry_put(rdma_entry);
+ return err;
+}
+
+void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct erdma_user_mmap_entry *entry = to_emmap(rdma_entry);
+
+ kfree(entry);
+}
+
+static int alloc_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx,
+ bool ext_db_en)
+{
+ struct erdma_cmdq_ext_db_req req = {};
+ u64 val0, val1;
+ int ret;
+
+ /*
+ * CAP_SYS_RAWIO is required if hardware does not support extend
+ * doorbell mechanism.
+ */
+ if (!ext_db_en && !capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ if (!ext_db_en) {
+ ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET;
+ ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET;
+ ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET;
+ return 0;
+ }
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_ALLOC_DB);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1,
+ true);
+ if (ret)
+ return ret;
+
+ ctx->ext_db.enable = true;
+ ctx->ext_db.sdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_SDB);
+ ctx->ext_db.rdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_RDB);
+ ctx->ext_db.cdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_CDB);
+
+ ctx->sdb = dev->func_bar_addr + (ctx->ext_db.sdb_off << PAGE_SHIFT);
+ ctx->cdb = dev->func_bar_addr + (ctx->ext_db.rdb_off << PAGE_SHIFT);
+ ctx->rdb = dev->func_bar_addr + (ctx->ext_db.cdb_off << PAGE_SHIFT);
+
+ return 0;
+}
+
+static void free_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx)
+{
+ struct erdma_cmdq_ext_db_req req = {};
+ int ret;
+
+ if (!ctx->ext_db.enable)
+ return;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_FREE_DB);
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
+ FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
+
+ req.sdb_off = ctx->ext_db.sdb_off;
+ req.rdb_off = ctx->ext_db.rdb_off;
+ req.cdb_off = ctx->ext_db.cdb_off;
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (ret)
+ ibdev_err_ratelimited(&dev->ibdev,
+ "free db resources failed %d", ret);
+}
+
+static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx)
+{
+ rdma_user_mmap_entry_remove(uctx->sq_db_mmap_entry);
+ rdma_user_mmap_entry_remove(uctx->rq_db_mmap_entry);
+ rdma_user_mmap_entry_remove(uctx->cq_db_mmap_entry);
+}
+
+int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
+{
+ struct erdma_ucontext *ctx = to_ectx(ibctx);
+ struct erdma_dev *dev = to_edev(ibctx->device);
+ int ret;
+ struct erdma_uresp_alloc_ctx uresp = {};
+
+ if (atomic_inc_return(&dev->num_ctx) > ERDMA_MAX_CONTEXT) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
+
+ if (udata->outlen < sizeof(uresp)) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ INIT_LIST_HEAD(&ctx->dbrecords_page_list);
+ mutex_init(&ctx->dbrecords_page_mutex);
+
+ ret = alloc_db_resources(dev, ctx,
+ !!(dev->attrs.cap_flags &
+ ERDMA_DEV_CAP_FLAGS_EXTEND_DB));
+ if (ret)
+ goto err_out;
+
+ ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert(
+ ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb);
+ if (!ctx->sq_db_mmap_entry) {
+ ret = -ENOMEM;
+ goto err_free_ext_db;
+ }
+
+ ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert(
+ ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb);
+ if (!ctx->rq_db_mmap_entry) {
+ ret = -EINVAL;
+ goto err_put_mmap_entries;
+ }
+
+ ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert(
+ ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb);
+ if (!ctx->cq_db_mmap_entry) {
+ ret = -EINVAL;
+ goto err_put_mmap_entries;
+ }
+
+ uresp.dev_id = dev->pdev->device;
+
+ ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (ret)
+ goto err_put_mmap_entries;
+
+ return 0;
+
+err_put_mmap_entries:
+ erdma_uctx_user_mmap_entries_remove(ctx);
+
+err_free_ext_db:
+ free_db_resources(dev, ctx);
+
+err_out:
+ atomic_dec(&dev->num_ctx);
+ return ret;
+}
+
+void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
+{
+ struct erdma_dev *dev = to_edev(ibctx->device);
+ struct erdma_ucontext *ctx = to_ectx(ibctx);
+
+ erdma_uctx_user_mmap_entries_remove(ctx);
+ free_db_resources(dev, ctx);
+ atomic_dec(&dev->num_ctx);
+}
+
+static void erdma_attr_to_av(const struct rdma_ah_attr *ah_attr,
+ struct erdma_av *av, u16 sport)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+
+ av->port = rdma_ah_get_port_num(ah_attr);
+ av->sgid_index = grh->sgid_index;
+ av->hop_limit = grh->hop_limit;
+ av->traffic_class = grh->traffic_class;
+ av->sl = rdma_ah_get_sl(ah_attr);
+
+ av->flow_label = grh->flow_label;
+ av->udp_sport = sport;
+
+ ether_addr_copy(av->dmac, ah_attr->roce.dmac);
+ memcpy(av->dgid, grh->dgid.raw, ERDMA_ROCEV2_GID_SIZE);
+
+ if (ipv6_addr_v4mapped((struct in6_addr *)&grh->dgid))
+ av->ntype = ERDMA_NETWORK_TYPE_IPV4;
+ else
+ av->ntype = ERDMA_NETWORK_TYPE_IPV6;
+}
+
+static void erdma_av_to_attr(struct erdma_av *av, struct rdma_ah_attr *ah_attr)
+{
+ ah_attr->type = RDMA_AH_ATTR_TYPE_ROCE;
+
+ rdma_ah_set_sl(ah_attr, av->sl);
+ rdma_ah_set_port_num(ah_attr, av->port);
+ rdma_ah_set_ah_flags(ah_attr, IB_AH_GRH);
+
+ rdma_ah_set_grh(ah_attr, NULL, av->flow_label, av->sgid_index,
+ av->hop_limit, av->traffic_class);
+ rdma_ah_set_dgid_raw(ah_attr, av->dgid);
+}
+
+static int ib_qps_to_erdma_qps[ERDMA_PROTO_COUNT][IB_QPS_ERR + 1] = {
+ [ERDMA_PROTO_IWARP] = {
+ [IB_QPS_RESET] = ERDMA_QPS_IWARP_IDLE,
+ [IB_QPS_INIT] = ERDMA_QPS_IWARP_IDLE,
+ [IB_QPS_RTR] = ERDMA_QPS_IWARP_RTR,
+ [IB_QPS_RTS] = ERDMA_QPS_IWARP_RTS,
+ [IB_QPS_SQD] = ERDMA_QPS_IWARP_CLOSING,
+ [IB_QPS_SQE] = ERDMA_QPS_IWARP_TERMINATE,
+ [IB_QPS_ERR] = ERDMA_QPS_IWARP_ERROR,
+ },
+ [ERDMA_PROTO_ROCEV2] = {
+ [IB_QPS_RESET] = ERDMA_QPS_ROCEV2_RESET,
+ [IB_QPS_INIT] = ERDMA_QPS_ROCEV2_INIT,
+ [IB_QPS_RTR] = ERDMA_QPS_ROCEV2_RTR,
+ [IB_QPS_RTS] = ERDMA_QPS_ROCEV2_RTS,
+ [IB_QPS_SQD] = ERDMA_QPS_ROCEV2_SQD,
+ [IB_QPS_SQE] = ERDMA_QPS_ROCEV2_SQE,
+ [IB_QPS_ERR] = ERDMA_QPS_ROCEV2_ERROR,
+ },
+};
+
+static int erdma_qps_to_ib_qps[ERDMA_PROTO_COUNT][ERDMA_QPS_ROCEV2_COUNT] = {
+ [ERDMA_PROTO_IWARP] = {
+ [ERDMA_QPS_IWARP_IDLE] = IB_QPS_INIT,
+ [ERDMA_QPS_IWARP_RTR] = IB_QPS_RTR,
+ [ERDMA_QPS_IWARP_RTS] = IB_QPS_RTS,
+ [ERDMA_QPS_IWARP_CLOSING] = IB_QPS_ERR,
+ [ERDMA_QPS_IWARP_TERMINATE] = IB_QPS_ERR,
+ [ERDMA_QPS_IWARP_ERROR] = IB_QPS_ERR,
+ },
+ [ERDMA_PROTO_ROCEV2] = {
+ [ERDMA_QPS_ROCEV2_RESET] = IB_QPS_RESET,
+ [ERDMA_QPS_ROCEV2_INIT] = IB_QPS_INIT,
+ [ERDMA_QPS_ROCEV2_RTR] = IB_QPS_RTR,
+ [ERDMA_QPS_ROCEV2_RTS] = IB_QPS_RTS,
+ [ERDMA_QPS_ROCEV2_SQD] = IB_QPS_SQD,
+ [ERDMA_QPS_ROCEV2_SQE] = IB_QPS_SQE,
+ [ERDMA_QPS_ROCEV2_ERROR] = IB_QPS_ERR,
+ },
+};
+
+static inline enum erdma_qps_iwarp ib_to_iwarp_qps(enum ib_qp_state state)
+{
+ return ib_qps_to_erdma_qps[ERDMA_PROTO_IWARP][state];
+}
+
+static inline enum erdma_qps_rocev2 ib_to_rocev2_qps(enum ib_qp_state state)
+{
+ return ib_qps_to_erdma_qps[ERDMA_PROTO_ROCEV2][state];
+}
+
+static inline enum ib_qp_state iwarp_to_ib_qps(enum erdma_qps_iwarp state)
+{
+ return erdma_qps_to_ib_qps[ERDMA_PROTO_IWARP][state];
+}
+
+static inline enum ib_qp_state rocev2_to_ib_qps(enum erdma_qps_rocev2 state)
+{
+ return erdma_qps_to_ib_qps[ERDMA_PROTO_ROCEV2][state];
+}
+
+static int erdma_check_qp_attrs(struct erdma_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ enum ib_qp_state cur_state, nxt_state;
+ struct erdma_dev *dev = qp->dev;
+ int ret = -EINVAL;
+
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if ((attr_mask & IB_QP_PORT) &&
+ !rdma_is_port_valid(&dev->ibdev, attr->port_num))
+ goto out;
+
+ if (erdma_device_rocev2(dev)) {
+ cur_state = (attr_mask & IB_QP_CUR_STATE) ?
+ attr->cur_qp_state :
+ rocev2_to_ib_qps(qp->attrs.rocev2.state);
+
+ nxt_state = (attr_mask & IB_QP_STATE) ? attr->qp_state :
+ cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, nxt_state, qp->ibqp.qp_type,
+ attr_mask))
+ goto out;
+
+ if ((attr_mask & IB_QP_AV) &&
+ erdma_check_gid_attr(
+ rdma_ah_read_grh(&attr->ah_attr)->sgid_attr))
+ goto out;
+
+ if ((attr_mask & IB_QP_PKEY_INDEX) &&
+ attr->pkey_index >= ERDMA_MAX_PKEYS)
+ goto out;
+ }
+
+ return 0;
+
+out:
+ return ret;
+}
+
+static void erdma_init_mod_qp_params_rocev2(
+ struct erdma_qp *qp, struct erdma_mod_qp_params_rocev2 *params,
+ int *erdma_attr_mask, struct ib_qp_attr *attr, int ib_attr_mask)
+{
+ enum erdma_qpa_mask_rocev2 to_modify_attrs = 0;
+ enum erdma_qps_rocev2 cur_state, nxt_state;
+ u16 udp_sport;
+
+ if (ib_attr_mask & IB_QP_CUR_STATE)
+ cur_state = ib_to_rocev2_qps(attr->cur_qp_state);
+ else
+ cur_state = qp->attrs.rocev2.state;
+
+ if (ib_attr_mask & IB_QP_STATE)
+ nxt_state = ib_to_rocev2_qps(attr->qp_state);
+ else
+ nxt_state = cur_state;
+
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_STATE;
+ params->state = nxt_state;
+
+ if (ib_attr_mask & IB_QP_QKEY) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_QKEY;
+ params->qkey = attr->qkey;
+ }
+
+ if (ib_attr_mask & IB_QP_SQ_PSN) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_SQ_PSN;
+ params->sq_psn = attr->sq_psn;
+ }
+
+ if (ib_attr_mask & IB_QP_RQ_PSN) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_RQ_PSN;
+ params->rq_psn = attr->rq_psn;
+ }
+
+ if (ib_attr_mask & IB_QP_DEST_QPN) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_DST_QPN;
+ params->dst_qpn = attr->dest_qp_num;
+ }
+
+ if (ib_attr_mask & IB_QP_AV) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_AV;
+ udp_sport = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
+ QP_ID(qp), params->dst_qpn);
+ erdma_attr_to_av(&attr->ah_attr, &params->av, udp_sport);
+ }
+
+ *erdma_attr_mask = to_modify_attrs;
+}
+
+int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata)
+{
+ struct erdma_qp *qp = to_eqp(ibqp);
+ union erdma_mod_qp_params params;
+ int ret = 0, erdma_attr_mask = 0;
+
+ down_write(&qp->state_lock);
+
+ ret = erdma_check_qp_attrs(qp, attr, attr_mask);
+ if (ret)
+ goto out;
+
+ if (erdma_device_iwarp(qp->dev)) {
+ if (attr_mask & IB_QP_STATE) {
+ erdma_attr_mask |= ERDMA_QPA_IWARP_STATE;
+ params.iwarp.state = ib_to_iwarp_qps(attr->qp_state);
+ }
+
+ ret = erdma_modify_qp_state_iwarp(qp, &params.iwarp,
+ erdma_attr_mask);
+ } else {
+ erdma_init_mod_qp_params_rocev2(
+ qp, &params.rocev2, &erdma_attr_mask, attr, attr_mask);
+
+ ret = erdma_modify_qp_state_rocev2(qp, &params.rocev2,
+ erdma_attr_mask);
+ }
+
+out:
+ up_write(&qp->state_lock);
+ return ret;
+}
+
+static enum ib_qp_state query_qp_state(struct erdma_qp *qp)
+{
+ if (erdma_device_iwarp(qp->dev))
+ return iwarp_to_ib_qps(qp->attrs.iwarp.state);
+ else
+ return rocev2_to_ib_qps(qp->attrs.rocev2.state);
+}
+
+int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ struct erdma_cmdq_query_qp_req_rocev2 req;
+ struct erdma_dev *dev;
+ struct erdma_qp *qp;
+ u64 resp0, resp1;
+ int ret;
+
+ if (ibqp && qp_attr && qp_init_attr) {
+ qp = to_eqp(ibqp);
+ dev = to_edev(ibqp->device);
+ } else {
+ return -EINVAL;
+ }
+
+ qp_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
+ qp_init_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
+
+ qp_attr->cap.max_send_wr = qp->attrs.sq_size;
+ qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
+ qp_attr->cap.max_send_sge = qp->attrs.max_send_sge;
+ qp_attr->cap.max_recv_sge = qp->attrs.max_recv_sge;
+
+ qp_attr->path_mtu = ib_mtu_int_to_enum(dev->netdev->mtu);
+ qp_attr->max_rd_atomic = qp->attrs.irq_size;
+ qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
+
+ qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ;
+
+ qp_init_attr->cap = qp_attr->cap;
+
+ if (erdma_device_rocev2(dev)) {
+ /* Query hardware to get some attributes */
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_QUERY_QP);
+ req.qpn = QP_ID(qp);
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
+ &resp1, true);
+ if (ret)
+ return ret;
+
+ qp_attr->sq_psn =
+ FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_SQ_PSN_MASK, resp0);
+ qp_attr->rq_psn =
+ FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_RQ_PSN_MASK, resp0);
+ qp_attr->qp_state = rocev2_to_ib_qps(FIELD_GET(
+ ERDMA_CMD_QUERY_QP_RESP_QP_STATE_MASK, resp0));
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->sq_draining = FIELD_GET(
+ ERDMA_CMD_QUERY_QP_RESP_SQ_DRAINING_MASK, resp0);
+
+ qp_attr->pkey_index = 0;
+ qp_attr->dest_qp_num = qp->attrs.rocev2.dst_qpn;
+
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ erdma_av_to_attr(&qp->attrs.rocev2.av,
+ &qp_attr->ah_attr);
+ } else {
+ qp_attr->qp_state = query_qp_state(qp);
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ }
+
+ return 0;
+}
+
+static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq,
+ struct erdma_ureq_create_cq *ureq)
+{
+ int ret;
+ struct erdma_dev *dev = to_edev(cq->ibcq.device);
+
+ ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mem, ureq->qbuf_va,
+ ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K,
+ true);
+ if (ret)
+ return ret;
+
+ ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va,
+ &cq->user_cq.user_dbr_page,
+ &cq->user_cq.dbrec_dma);
+ if (ret)
+ put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
+
+ return ret;
+}
+
+static int erdma_init_kernel_cq(struct erdma_cq *cq)
+{
+ struct erdma_dev *dev = to_edev(cq->ibcq.device);
+
+ cq->kern_cq.qbuf =
+ dma_alloc_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
+ &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL);
+ if (!cq->kern_cq.qbuf)
+ return -ENOMEM;
+
+ cq->kern_cq.dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL,
+ &cq->kern_cq.dbrec_dma);
+ if (!cq->kern_cq.dbrec)
+ goto err_out;
+
+ spin_lock_init(&cq->kern_cq.lock);
+ /* use default cqdb addr */
+ cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET;
+
+ return 0;
+
+err_out:
+ dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
+ cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
+
+ return -ENOMEM;
+}
+
+int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct erdma_cq *cq = to_ecq(ibcq);
+ struct erdma_dev *dev = to_edev(ibcq->device);
+ unsigned int depth = attr->cqe;
+ int ret;
+ struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
+ udata, struct erdma_ucontext, ibucontext);
+
+ if (depth > dev->attrs.max_cqe)
+ return -EINVAL;
+
+ depth = roundup_pow_of_two(depth);
+ cq->ibcq.cqe = depth;
+ cq->depth = depth;
+ cq->assoc_eqn = attr->comp_vector + 1;
+
+ ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq,
+ XA_LIMIT(1, dev->attrs.max_cq - 1),
+ &dev->next_alloc_cqn, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+
+ if (!rdma_is_kernel_res(&ibcq->res)) {
+ struct erdma_ureq_create_cq ureq;
+ struct erdma_uresp_create_cq uresp;
+
+ ret = ib_copy_from_udata(&ureq, udata,
+ min(udata->inlen, sizeof(ureq)));
+ if (ret)
+ goto err_out_xa;
+
+ ret = erdma_init_user_cq(ctx, cq, &ureq);
+ if (ret)
+ goto err_out_xa;
+
+ uresp.cq_id = cq->cqn;
+ uresp.num_cqe = depth;
+
+ ret = ib_copy_to_udata(udata, &uresp,
+ min(sizeof(uresp), udata->outlen));
+ if (ret)
+ goto err_free_res;
+ } else {
+ ret = erdma_init_kernel_cq(cq);
+ if (ret)
+ goto err_out_xa;
+ }
+
+ ret = create_cq_cmd(ctx, cq);
+ if (ret)
+ goto err_free_res;
+
+ return 0;
+
+err_free_res:
+ if (!rdma_is_kernel_res(&ibcq->res)) {
+ erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
+ put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
+ } else {
+ dma_free_coherent(&dev->pdev->dev, depth << CQE_SHIFT,
+ cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
+ dma_pool_free(dev->db_pool, cq->kern_cq.dbrec,
+ cq->kern_cq.dbrec_dma);
+ }
+
+err_out_xa:
+ xa_erase(&dev->cq_xa, cq->cqn);
+
+ return ret;
+}
+
+void erdma_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+}
+
+void erdma_set_mtu(struct erdma_dev *dev, u32 mtu)
+{
+ struct erdma_cmdq_config_mtu_req req;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_CONF_MTU);
+ req.mtu = mtu;
+
+ erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, true);
+}
+
+void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason)
+{
+ struct ib_event event;
+
+ event.device = &dev->ibdev;
+ event.element.port_num = 1;
+ event.event = reason;
+
+ ib_dispatch_event(&event);
+}
+
+enum counters {
+ ERDMA_STATS_TX_REQS_CNT,
+ ERDMA_STATS_TX_PACKETS_CNT,
+ ERDMA_STATS_TX_BYTES_CNT,
+ ERDMA_STATS_TX_DISABLE_DROP_CNT,
+ ERDMA_STATS_TX_BPS_METER_DROP_CNT,
+ ERDMA_STATS_TX_PPS_METER_DROP_CNT,
+
+ ERDMA_STATS_RX_PACKETS_CNT,
+ ERDMA_STATS_RX_BYTES_CNT,
+ ERDMA_STATS_RX_DISABLE_DROP_CNT,
+ ERDMA_STATS_RX_BPS_METER_DROP_CNT,
+ ERDMA_STATS_RX_PPS_METER_DROP_CNT,
+
+ ERDMA_STATS_MAX
+};
+
+static const struct rdma_stat_desc erdma_descs[] = {
+ [ERDMA_STATS_TX_REQS_CNT].name = "tx_reqs_cnt",
+ [ERDMA_STATS_TX_PACKETS_CNT].name = "tx_packets_cnt",
+ [ERDMA_STATS_TX_BYTES_CNT].name = "tx_bytes_cnt",
+ [ERDMA_STATS_TX_DISABLE_DROP_CNT].name = "tx_disable_drop_cnt",
+ [ERDMA_STATS_TX_BPS_METER_DROP_CNT].name = "tx_bps_limit_drop_cnt",
+ [ERDMA_STATS_TX_PPS_METER_DROP_CNT].name = "tx_pps_limit_drop_cnt",
+ [ERDMA_STATS_RX_PACKETS_CNT].name = "rx_packets_cnt",
+ [ERDMA_STATS_RX_BYTES_CNT].name = "rx_bytes_cnt",
+ [ERDMA_STATS_RX_DISABLE_DROP_CNT].name = "rx_disable_drop_cnt",
+ [ERDMA_STATS_RX_BPS_METER_DROP_CNT].name = "rx_bps_limit_drop_cnt",
+ [ERDMA_STATS_RX_PPS_METER_DROP_CNT].name = "rx_pps_limit_drop_cnt",
+};
+
+struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device,
+ u32 port_num)
+{
+ return rdma_alloc_hw_stats_struct(erdma_descs, ERDMA_STATS_MAX,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int erdma_query_hw_stats(struct erdma_dev *dev,
+ struct rdma_hw_stats *stats)
+{
+ struct erdma_cmdq_query_stats_resp *resp;
+ struct erdma_cmdq_query_req req;
+ dma_addr_t dma_addr;
+ int err;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_GET_STATS);
+
+ resp = dma_pool_zalloc(dev->resp_pool, GFP_KERNEL, &dma_addr);
+ if (!resp)
+ return -ENOMEM;
+
+ req.target_addr = dma_addr;
+ req.target_length = ERDMA_HW_RESP_SIZE;
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (err)
+ goto out;
+
+ if (resp->hdr.magic != ERDMA_HW_RESP_MAGIC) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&stats->value[0], &resp->tx_req_cnt,
+ sizeof(u64) * stats->num_counters);
+
+out:
+ dma_pool_free(dev->resp_pool, resp, dma_addr);
+
+ return err;
+}
+
+int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port, int index)
+{
+ struct erdma_dev *dev = to_edev(ibdev);
+ int ret;
+
+ if (port == 0)
+ return 0;
+
+ ret = erdma_query_hw_stats(dev, stats);
+ if (ret)
+ return ret;
+
+ return stats->num_counters;
+}
+
+enum rdma_link_layer erdma_get_link_layer(struct ib_device *ibdev, u32 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+static int erdma_set_gid(struct erdma_dev *dev, u8 op, u32 idx,
+ const union ib_gid *gid)
+{
+ struct erdma_cmdq_set_gid_req req;
+ u8 ntype;
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_SET_GID_SGID_IDX_MASK, idx) |
+ FIELD_PREP(ERDMA_CMD_SET_GID_OP_MASK, op);
+
+ if (op == ERDMA_SET_GID_OP_ADD) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)gid))
+ ntype = ERDMA_NETWORK_TYPE_IPV4;
+ else
+ ntype = ERDMA_NETWORK_TYPE_IPV6;
+
+ req.cfg |= FIELD_PREP(ERDMA_CMD_SET_GID_NTYPE_MASK, ntype);
+
+ memcpy(&req.gid, gid, ERDMA_ROCEV2_GID_SIZE);
+ }
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_SET_GID);
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+}
+
+int erdma_add_gid(const struct ib_gid_attr *attr, void **context)
+{
+ struct erdma_dev *dev = to_edev(attr->device);
+ int ret;
+
+ ret = erdma_check_gid_attr(attr);
+ if (ret)
+ return ret;
+
+ return erdma_set_gid(dev, ERDMA_SET_GID_OP_ADD, attr->index,
+ &attr->gid);
+}
+
+int erdma_del_gid(const struct ib_gid_attr *attr, void **context)
+{
+ return erdma_set_gid(to_edev(attr->device), ERDMA_SET_GID_OP_DEL,
+ attr->index, NULL);
+}
+
+int erdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
+{
+ if (index >= ERDMA_MAX_PKEYS)
+ return -EINVAL;
+
+ *pkey = ERDMA_DEFAULT_PKEY;
+ return 0;
+}
+
+void erdma_set_av_cfg(struct erdma_av_cfg *av_cfg, struct erdma_av *av)
+{
+ av_cfg->cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_AV_FL_MASK, av->flow_label) |
+ FIELD_PREP(ERDMA_CMD_CREATE_AV_NTYPE_MASK, av->ntype);
+
+ av_cfg->traffic_class = av->traffic_class;
+ av_cfg->hop_limit = av->hop_limit;
+ av_cfg->sl = av->sl;
+
+ av_cfg->udp_sport = av->udp_sport;
+ av_cfg->sgid_index = av->sgid_index;
+
+ ether_addr_copy(av_cfg->dmac, av->dmac);
+ memcpy(av_cfg->dgid, av->dgid, ERDMA_ROCEV2_GID_SIZE);
+}
+
+int erdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ const struct ib_global_route *grh =
+ rdma_ah_read_grh(init_attr->ah_attr);
+ struct erdma_dev *dev = to_edev(ibah->device);
+ struct erdma_pd *pd = to_epd(ibah->pd);
+ struct erdma_ah *ah = to_eah(ibah);
+ struct erdma_cmdq_create_ah_req req;
+ u32 udp_sport;
+ int ret;
+
+ ret = erdma_check_gid_attr(grh->sgid_attr);
+ if (ret)
+ return ret;
+
+ ret = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_AH]);
+ if (ret < 0)
+ return ret;
+
+ ah->ahn = ret;
+
+ if (grh->flow_label)
+ udp_sport = rdma_flow_label_to_udp_sport(grh->flow_label);
+ else
+ udp_sport =
+ IB_ROCE_UDP_ENCAP_VALID_PORT_MIN + (ah->ahn & 0x3FFF);
+
+ erdma_attr_to_av(init_attr->ah_attr, &ah->av, udp_sport);
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_CREATE_AH);
+
+ req.pdn = pd->pdn;
+ req.ahn = ah->ahn;
+ erdma_set_av_cfg(&req.av_cfg, &ah->av);
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
+ if (ret) {
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn);
+ return ret;
+ }
+
+ return 0;
+}
+
+int erdma_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct erdma_dev *dev = to_edev(ibah->device);
+ struct erdma_pd *pd = to_epd(ibah->pd);
+ struct erdma_ah *ah = to_eah(ibah);
+ struct erdma_cmdq_destroy_ah_req req;
+ int ret;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_DESTROY_AH);
+
+ req.pdn = pd->pdn;
+ req.ahn = ah->ahn;
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ flags & RDMA_DESTROY_AH_SLEEPABLE);
+ if (ret)
+ return ret;
+
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn);
+
+ return 0;
+}
+
+int erdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
+{
+ struct erdma_ah *ah = to_eah(ibah);
+
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ erdma_av_to_attr(&ah->av, ah_attr);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
new file mode 100644
index 000000000000..7d8d3fe501d5
--- /dev/null
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -0,0 +1,491 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+
+/* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
+/* Kai Shen <kaishen@linux.alibaba.com> */
+/* Copyright (c) 2020-2022, Alibaba Group. */
+
+#ifndef __ERDMA_VERBS_H__
+#define __ERDMA_VERBS_H__
+
+#include "erdma.h"
+
+/* RDMA Capability. */
+#define ERDMA_MAX_PD (128 * 1024)
+#define ERDMA_MAX_SEND_WR 8192
+#define ERDMA_MAX_ORD 128
+#define ERDMA_MAX_IRD 128
+#define ERDMA_MAX_SGE_RD 1
+#define ERDMA_MAX_CONTEXT (128 * 1024)
+#define ERDMA_MAX_SEND_SGE 6
+#define ERDMA_MAX_RECV_SGE 1
+#define ERDMA_MAX_INLINE (sizeof(struct erdma_sge) * (ERDMA_MAX_SEND_SGE))
+#define ERDMA_MAX_FRMR_PA 512
+
+enum {
+ ERDMA_MMAP_IO_NC = 0, /* no cache */
+};
+
+struct erdma_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ u64 address;
+ u8 mmap_flag;
+};
+
+struct erdma_ext_db_info {
+ bool enable;
+ u16 sdb_off;
+ u16 rdb_off;
+ u16 cdb_off;
+};
+
+struct erdma_ucontext {
+ struct ib_ucontext ibucontext;
+
+ struct erdma_ext_db_info ext_db;
+
+ u64 sdb;
+ u64 rdb;
+ u64 cdb;
+
+ struct rdma_user_mmap_entry *sq_db_mmap_entry;
+ struct rdma_user_mmap_entry *rq_db_mmap_entry;
+ struct rdma_user_mmap_entry *cq_db_mmap_entry;
+
+ /* doorbell records */
+ struct list_head dbrecords_page_list;
+ struct mutex dbrecords_page_mutex;
+};
+
+struct erdma_pd {
+ struct ib_pd ibpd;
+ u32 pdn;
+};
+
+/*
+ * MemoryRegion definition.
+ */
+#define ERDMA_MAX_INLINE_MTT_ENTRIES 4
+#define MTT_SIZE(mtt_cnt) ((mtt_cnt) << 3) /* per mtt entry takes 8 Bytes. */
+#define ERDMA_MR_MAX_MTT_CNT 524288
+#define ERDMA_MTT_ENTRY_SIZE 8
+
+#define ERDMA_MR_TYPE_NORMAL 0
+#define ERDMA_MR_TYPE_FRMR 1
+#define ERDMA_MR_TYPE_DMA 2
+
+#define ERDMA_MR_MTT_0LEVEL 0
+#define ERDMA_MR_MTT_1LEVEL 1
+
+#define ERDMA_MR_ACC_RA BIT(0)
+#define ERDMA_MR_ACC_LR BIT(1)
+#define ERDMA_MR_ACC_LW BIT(2)
+#define ERDMA_MR_ACC_RR BIT(3)
+#define ERDMA_MR_ACC_RW BIT(4)
+
+static inline u8 to_erdma_access_flags(int access)
+{
+ return (access & IB_ACCESS_REMOTE_READ ? ERDMA_MR_ACC_RR : 0) |
+ (access & IB_ACCESS_LOCAL_WRITE ? ERDMA_MR_ACC_LW : 0) |
+ (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0) |
+ (access & IB_ACCESS_REMOTE_ATOMIC ? ERDMA_MR_ACC_RA : 0);
+}
+
+/* Hierarchical storage structure for MTT entries */
+struct erdma_mtt {
+ u64 *buf;
+ size_t size;
+
+ bool continuous;
+ union {
+ dma_addr_t buf_dma;
+ struct {
+ dma_addr_t *dma_addrs;
+ u32 npages;
+ u32 level;
+ };
+ };
+
+ struct erdma_mtt *low_level;
+};
+
+struct erdma_mem {
+ struct ib_umem *umem;
+ struct erdma_mtt *mtt;
+
+ u32 page_size;
+ u32 page_offset;
+ u32 page_cnt;
+ u32 mtt_nents;
+
+ u64 va;
+ u64 len;
+};
+
+struct erdma_mr {
+ struct ib_mr ibmr;
+ struct erdma_mem mem;
+ u8 type;
+ u8 access;
+ u8 valid;
+};
+
+struct erdma_user_dbrecords_page {
+ struct list_head list;
+ struct ib_umem *umem;
+ u64 va;
+ int refcnt;
+};
+
+struct erdma_av {
+ u8 port;
+ u8 hop_limit;
+ u8 traffic_class;
+ u8 sl;
+ u8 sgid_index;
+ u16 udp_sport;
+ u32 flow_label;
+ u8 dmac[ETH_ALEN];
+ u8 dgid[ERDMA_ROCEV2_GID_SIZE];
+ enum erdma_network_type ntype;
+};
+
+struct erdma_ah {
+ struct ib_ah ibah;
+ struct erdma_av av;
+ u32 ahn;
+};
+
+struct erdma_uqp {
+ struct erdma_mem sq_mem;
+ struct erdma_mem rq_mem;
+
+ dma_addr_t sq_dbrec_dma;
+ dma_addr_t rq_dbrec_dma;
+
+ struct erdma_user_dbrecords_page *user_dbr_page;
+
+ u32 rq_offset;
+};
+
+struct erdma_kqp {
+ u16 sq_pi;
+ u16 sq_ci;
+
+ u16 rq_pi;
+ u16 rq_ci;
+
+ u64 *swr_tbl;
+ u64 *rwr_tbl;
+
+ void __iomem *hw_sq_db;
+ void __iomem *hw_rq_db;
+
+ void *sq_buf;
+ dma_addr_t sq_buf_dma_addr;
+
+ void *rq_buf;
+ dma_addr_t rq_buf_dma_addr;
+
+ void *sq_dbrec;
+ void *rq_dbrec;
+
+ dma_addr_t sq_dbrec_dma;
+ dma_addr_t rq_dbrec_dma;
+
+ u8 sig_all;
+};
+
+enum erdma_qps_iwarp {
+ ERDMA_QPS_IWARP_IDLE = 0,
+ ERDMA_QPS_IWARP_RTR = 1,
+ ERDMA_QPS_IWARP_RTS = 2,
+ ERDMA_QPS_IWARP_CLOSING = 3,
+ ERDMA_QPS_IWARP_TERMINATE = 4,
+ ERDMA_QPS_IWARP_ERROR = 5,
+ ERDMA_QPS_IWARP_UNDEF = 6,
+ ERDMA_QPS_IWARP_COUNT = 7,
+};
+
+enum erdma_qpa_mask_iwarp {
+ ERDMA_QPA_IWARP_STATE = (1 << 0),
+ ERDMA_QPA_IWARP_LLP_HANDLE = (1 << 2),
+ ERDMA_QPA_IWARP_ORD = (1 << 3),
+ ERDMA_QPA_IWARP_IRD = (1 << 4),
+ ERDMA_QPA_IWARP_SQ_SIZE = (1 << 5),
+ ERDMA_QPA_IWARP_RQ_SIZE = (1 << 6),
+ ERDMA_QPA_IWARP_MPA = (1 << 7),
+ ERDMA_QPA_IWARP_CC = (1 << 8),
+};
+
+enum erdma_qps_rocev2 {
+ ERDMA_QPS_ROCEV2_RESET = 0,
+ ERDMA_QPS_ROCEV2_INIT = 1,
+ ERDMA_QPS_ROCEV2_RTR = 2,
+ ERDMA_QPS_ROCEV2_RTS = 3,
+ ERDMA_QPS_ROCEV2_SQD = 4,
+ ERDMA_QPS_ROCEV2_SQE = 5,
+ ERDMA_QPS_ROCEV2_ERROR = 6,
+ ERDMA_QPS_ROCEV2_COUNT = 7,
+};
+
+enum erdma_qpa_mask_rocev2 {
+ ERDMA_QPA_ROCEV2_STATE = (1 << 0),
+ ERDMA_QPA_ROCEV2_QKEY = (1 << 1),
+ ERDMA_QPA_ROCEV2_AV = (1 << 2),
+ ERDMA_QPA_ROCEV2_SQ_PSN = (1 << 3),
+ ERDMA_QPA_ROCEV2_RQ_PSN = (1 << 4),
+ ERDMA_QPA_ROCEV2_DST_QPN = (1 << 5),
+};
+
+enum erdma_qp_flags {
+ ERDMA_QP_IN_FLUSHING = (1 << 0),
+};
+
+#define ERDMA_QP_ACTIVE 0
+#define ERDMA_QP_PASSIVE 1
+
+struct erdma_mod_qp_params_iwarp {
+ enum erdma_qps_iwarp state;
+ enum erdma_cc_alg cc;
+ u8 qp_type;
+ u8 pd_len;
+ u32 irq_size;
+ u32 orq_size;
+};
+
+struct erdma_qp_attrs_iwarp {
+ enum erdma_qps_iwarp state;
+ u32 cookie;
+};
+
+struct erdma_mod_qp_params_rocev2 {
+ enum erdma_qps_rocev2 state;
+ u32 qkey;
+ u32 sq_psn;
+ u32 rq_psn;
+ u32 dst_qpn;
+ struct erdma_av av;
+};
+
+union erdma_mod_qp_params {
+ struct erdma_mod_qp_params_iwarp iwarp;
+ struct erdma_mod_qp_params_rocev2 rocev2;
+};
+
+struct erdma_qp_attrs_rocev2 {
+ enum erdma_qps_rocev2 state;
+ u32 qkey;
+ u32 dst_qpn;
+ struct erdma_av av;
+};
+
+struct erdma_qp_attrs {
+ enum erdma_cc_alg cc; /* Congestion control algorithm */
+ u32 sq_size;
+ u32 rq_size;
+ u32 orq_size;
+ u32 irq_size;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ union {
+ struct erdma_qp_attrs_iwarp iwarp;
+ struct erdma_qp_attrs_rocev2 rocev2;
+ };
+};
+
+struct erdma_qp {
+ struct ib_qp ibqp;
+ struct kref ref;
+ struct completion safe_free;
+ struct erdma_dev *dev;
+ struct erdma_cep *cep;
+ struct rw_semaphore state_lock;
+
+ unsigned long flags;
+ struct delayed_work reflush_dwork;
+
+ union {
+ struct erdma_kqp kern_qp;
+ struct erdma_uqp user_qp;
+ };
+
+ struct erdma_cq *scq;
+ struct erdma_cq *rcq;
+
+ struct erdma_qp_attrs attrs;
+ spinlock_t lock;
+};
+
+struct erdma_kcq_info {
+ void *qbuf;
+ dma_addr_t qbuf_dma_addr;
+ u32 ci;
+ u32 cmdsn;
+ u32 notify_cnt;
+
+ spinlock_t lock;
+ u8 __iomem *db;
+ u64 *dbrec;
+ dma_addr_t dbrec_dma;
+};
+
+struct erdma_ucq_info {
+ struct erdma_mem qbuf_mem;
+ struct erdma_user_dbrecords_page *user_dbr_page;
+ dma_addr_t dbrec_dma;
+};
+
+struct erdma_cq {
+ struct ib_cq ibcq;
+ u32 cqn;
+
+ u32 depth;
+ u32 assoc_eqn;
+
+ union {
+ struct erdma_kcq_info kern_cq;
+ struct erdma_ucq_info user_cq;
+ };
+};
+
+#define QP_ID(qp) ((qp)->ibqp.qp_num)
+
+static inline struct erdma_qp *find_qp_by_qpn(struct erdma_dev *dev, int id)
+{
+ return (struct erdma_qp *)xa_load(&dev->qp_xa, id);
+}
+
+static inline struct erdma_cq *find_cq_by_cqn(struct erdma_dev *dev, int id)
+{
+ return (struct erdma_cq *)xa_load(&dev->cq_xa, id);
+}
+
+void erdma_qp_get(struct erdma_qp *qp);
+void erdma_qp_put(struct erdma_qp *qp);
+int erdma_modify_qp_state_iwarp(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ int mask);
+int erdma_modify_qp_state_rocev2(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_rocev2 *params,
+ int attr_mask);
+void erdma_qp_llp_close(struct erdma_qp *qp);
+void erdma_qp_cm_drop(struct erdma_qp *qp);
+
+static inline bool erdma_device_iwarp(struct erdma_dev *dev)
+{
+ return dev->proto == ERDMA_PROTO_IWARP;
+}
+
+static inline bool erdma_device_rocev2(struct erdma_dev *dev)
+{
+ return dev->proto == ERDMA_PROTO_ROCEV2;
+}
+
+static inline struct erdma_ucontext *to_ectx(struct ib_ucontext *ibctx)
+{
+ return container_of(ibctx, struct erdma_ucontext, ibucontext);
+}
+
+static inline struct erdma_pd *to_epd(struct ib_pd *pd)
+{
+ return container_of(pd, struct erdma_pd, ibpd);
+}
+
+static inline struct erdma_mr *to_emr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct erdma_mr, ibmr);
+}
+
+static inline struct erdma_qp *to_eqp(struct ib_qp *qp)
+{
+ return container_of(qp, struct erdma_qp, ibqp);
+}
+
+static inline struct erdma_cq *to_ecq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct erdma_cq, ibcq);
+}
+
+static inline struct erdma_ah *to_eah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct erdma_ah, ibah);
+}
+
+static inline int erdma_check_gid_attr(const struct ib_gid_attr *attr)
+{
+ u8 ntype = rdma_gid_attr_network_type(attr);
+
+ if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6)
+ return -EINVAL;
+
+ return 0;
+}
+
+static inline struct erdma_user_mmap_entry *
+to_emmap(struct rdma_user_mmap_entry *ibmmap)
+{
+ return container_of(ibmmap, struct erdma_user_mmap_entry, rdma_entry);
+}
+
+int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *data);
+void erdma_dealloc_ucontext(struct ib_ucontext *ibctx);
+int erdma_query_device(struct ib_device *dev, struct ib_device_attr *attr,
+ struct ib_udata *data);
+int erdma_get_port_immutable(struct ib_device *dev, u32 port,
+ struct ib_port_immutable *ib_port_immutable);
+int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+int erdma_query_port(struct ib_device *dev, u32 port,
+ struct ib_port_attr *attr);
+int erdma_query_gid(struct ib_device *dev, u32 port, int idx,
+ union ib_gid *gid);
+int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *data);
+int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+ struct ib_udata *data);
+int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
+ struct ib_qp_init_attr *init_attr);
+int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
+ struct ib_udata *data);
+int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+void erdma_disassociate_ucontext(struct ib_ucontext *ibcontext);
+int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
+ u64 virt, int access, struct ib_dmah *dmah,
+ struct ib_udata *udata);
+struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int rights);
+int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *data);
+int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma);
+void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
+void erdma_qp_get_ref(struct ib_qp *ibqp);
+void erdma_qp_put_ref(struct ib_qp *ibqp);
+struct ib_qp *erdma_get_ibqp(struct ib_device *dev, int id);
+int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
+ const struct ib_send_wr **bad_send_wr);
+int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
+ const struct ib_recv_wr **bad_recv_wr);
+int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+void erdma_remove_cqes_of_qp(struct ib_cq *ibcq, u32 qpn);
+struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
+int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
+void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason);
+void erdma_set_mtu(struct erdma_dev *dev, u32 mtu);
+struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device,
+ u32 port_num);
+int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port, int index);
+enum rdma_link_layer erdma_get_link_layer(struct ib_device *ibdev,
+ u32 port_num);
+int erdma_add_gid(const struct ib_gid_attr *attr, void **context);
+int erdma_del_gid(const struct ib_gid_attr *attr, void **context);
+int erdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey);
+void erdma_set_av_cfg(struct erdma_av_cfg *av_cfg, struct erdma_av *av);
+int erdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int erdma_destroy_ah(struct ib_ah *ibah, u32 flags);
+int erdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
+
+#endif
diff --git a/drivers/infiniband/hw/hfi1/Kconfig b/drivers/infiniband/hw/hfi1/Kconfig
index f6ea0881765a..14b92e12bf29 100644
--- a/drivers/infiniband/hw/hfi1/Kconfig
+++ b/drivers/infiniband/hw/hfi1/Kconfig
@@ -1,29 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_HFI1
- tristate "Intel OPA Gen1 support"
- depends on X86_64 && INFINIBAND_RDMAVT && I2C
+ tristate "Cornelis OPX Gen1 support"
+ depends on X86_64 && INFINIBAND_RDMAVT && I2C && !UML
select MMU_NOTIFIER
select CRC32
select I2C_ALGOBIT
- ---help---
- This is a low-level driver for Intel OPA Gen1 adapter.
+ help
+ This is a low-level driver for Cornelis OPX Gen1 adapter.
config HFI1_DEBUG_SDMA_ORDER
bool "HFI1 SDMA Order debug"
depends on INFINIBAND_HFI1
default n
- ---help---
+ help
This is a debug flag to test for out of order
sdma completions for unit testing
-config HFI1_VERBS_31BIT_PSN
- bool "HFI1 enable 31 bit PSN"
- depends on INFINIBAND_HFI1
- default y
- ---help---
- Setting this enables 31 BIT PSN
- For verbs RC/UC
config SDMA_VERBOSITY
bool "Config SDMA Verbosity"
depends on INFINIBAND_HFI1
default n
- ---help---
+ help
This is a configuration flag to enable verbose
SDMA debug
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 0cf97a09b64b..5d977f363684 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# HFI driver
#
@@ -7,13 +8,59 @@
#
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
-hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
- eprom.o file_ops.o firmware.o \
- init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
- qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
- uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
- verbs_txreq.o
-hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
+hfi1-y := \
+ affinity.o \
+ aspm.o \
+ chip.o \
+ device.o \
+ driver.o \
+ efivar.o \
+ eprom.o \
+ exp_rcv.o \
+ file_ops.o \
+ firmware.o \
+ init.o \
+ intr.o \
+ iowait.o \
+ ipoib_main.o \
+ ipoib_rx.o \
+ ipoib_tx.o \
+ mad.o \
+ mmu_rb.o \
+ msix.o \
+ netdev_rx.o \
+ opfn.o \
+ pcie.o \
+ pin_system.o \
+ pio.o \
+ pio_copy.o \
+ platform.o \
+ qp.o \
+ qsfp.o \
+ rc.o \
+ ruc.o \
+ sdma.o \
+ sysfs.o \
+ tid_rdma.o \
+ trace.o \
+ uc.o \
+ ud.o \
+ user_exp_rcv.o \
+ user_pages.o \
+ user_sdma.o \
+ verbs.o \
+ verbs_txreq.o \
+ vnic_main.o \
+ vnic_sdma.o
+
+ifdef CONFIG_DEBUG_FS
+hfi1-y += debugfs.o
+ifdef CONFIG_FAULT_INJECTION
+ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+hfi1-y += fault.o
+endif
+endif
+endif
CFLAGS_trace.o = -I$(src)
ifdef MVERSION
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 7a3d906b3671..ee7fedc67b86 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -1,53 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*/
+
#include <linux/topology.h>
#include <linux/cpumask.h>
-#include <linux/module.h>
#include <linux/interrupt.h>
+#include <linux/numa.h>
#include "hfi.h"
#include "affinity.h"
@@ -63,6 +22,7 @@ struct hfi1_affinity_node_list node_affinity = {
static const char * const irq_type_names[] = {
"SDMA",
"RCVCTXT",
+ "NETDEVCTXT",
"GENERAL",
"OTHER",
};
@@ -77,12 +37,62 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set)
set->gen = 0;
}
+/* Increment generation of CPU set if needed */
+static void _cpu_mask_set_gen_inc(struct cpu_mask_set *set)
+{
+ if (cpumask_equal(&set->mask, &set->used)) {
+ /*
+ * We've used up all the CPUs, bump up the generation
+ * and reset the 'used' map
+ */
+ set->gen++;
+ cpumask_clear(&set->used);
+ }
+}
+
+static void _cpu_mask_set_gen_dec(struct cpu_mask_set *set)
+{
+ if (cpumask_empty(&set->used) && set->gen) {
+ set->gen--;
+ cpumask_copy(&set->used, &set->mask);
+ }
+}
+
+/* Get the first CPU from the list of unused CPUs in a CPU set data structure */
+static int cpu_mask_set_get_first(struct cpu_mask_set *set, cpumask_var_t diff)
+{
+ int cpu;
+
+ if (!diff || !set)
+ return -EINVAL;
+
+ _cpu_mask_set_gen_inc(set);
+
+ /* Find out CPUs left in CPU mask */
+ cpumask_andnot(diff, &set->mask, &set->used);
+
+ cpu = cpumask_first(diff);
+ if (cpu >= nr_cpu_ids) /* empty */
+ cpu = -EINVAL;
+ else
+ cpumask_set_cpu(cpu, &set->used);
+
+ return cpu;
+}
+
+static void cpu_mask_set_put(struct cpu_mask_set *set, int cpu)
+{
+ if (!set)
+ return;
+
+ cpumask_clear_cpu(cpu, &set->used);
+ _cpu_mask_set_gen_dec(set);
+}
+
/* Initialize non-HT cpu cores mask */
void init_real_cpu_mask(void)
{
- int possible, curr_cpu, i, ht;
-
- cpumask_clear(&node_affinity.real_cpu_mask);
+ int possible, curr_cpu, ht;
/* Start with cpu online mask as the real cpu mask */
cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask);
@@ -98,17 +108,10 @@ void init_real_cpu_mask(void)
* "real" cores. Assumes that HT cores are not enumerated in
* succession (except in the single core case).
*/
- curr_cpu = cpumask_first(&node_affinity.real_cpu_mask);
- for (i = 0; i < possible / ht; i++)
- curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
- /*
- * Step 2. Remove the remaining HT siblings. Use cpumask_next() to
- * skip any gaps.
- */
- for (; i < possible; i++) {
- cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask);
- curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
- }
+ curr_cpu = cpumask_nth(possible / ht, &node_affinity.real_cpu_mask) + 1;
+
+ /* Step 2. Remove the remaining HT siblings. */
+ cpumask_clear_cpus(&node_affinity.real_cpu_mask, curr_cpu, nr_cpu_ids - curr_cpu);
}
int node_affinity_init(void)
@@ -146,7 +149,7 @@ int node_affinity_init(void)
while ((dev = pci_get_device(ids->vendor, ids->device, dev))) {
node = pcibus_to_node(dev->bus);
if (node < 0)
- node = numa_node_id();
+ goto out;
hfi1_per_node_cntr[node]++;
}
@@ -154,9 +157,29 @@ int node_affinity_init(void)
}
return 0;
+
+out:
+ /*
+ * Invalid PCI NUMA node information found, note it, and populate
+ * our database 1:1.
+ */
+ pr_err("HFI: Invalid PCI NUMA node. Performance may be affected\n");
+ pr_err("HFI: System BIOS may need to be upgraded\n");
+ for (node = 0; node < node_affinity.num_possible_nodes; node++)
+ hfi1_per_node_cntr[node] = 1;
+
+ pci_dev_put(dev);
+
+ return 0;
}
-void node_affinity_destroy(void)
+static void node_affinity_destroy(struct hfi1_affinity_node *entry)
+{
+ free_percpu(entry->comp_vect_affinity);
+ kfree(entry);
+}
+
+void node_affinity_destroy_all(void)
{
struct list_head *pos, *q;
struct hfi1_affinity_node *entry;
@@ -166,7 +189,7 @@ void node_affinity_destroy(void)
entry = list_entry(pos, struct hfi1_affinity_node,
list);
list_del(pos);
- kfree(entry);
+ node_affinity_destroy(entry);
}
mutex_unlock(&node_affinity.lock);
kfree(hfi1_per_node_cntr);
@@ -180,6 +203,7 @@ static struct hfi1_affinity_node *node_affinity_allocate(int node)
if (!entry)
return NULL;
entry->node = node;
+ entry->comp_vect_affinity = alloc_percpu(u16);
INIT_LIST_HEAD(&entry->list);
return entry;
@@ -197,11 +221,9 @@ static void node_affinity_add_tail(struct hfi1_affinity_node *entry)
/* It must be called with node_affinity.lock held */
static struct hfi1_affinity_node *node_affinity_lookup(int node)
{
- struct list_head *pos;
struct hfi1_affinity_node *entry;
- list_for_each(pos, &node_affinity.list) {
- entry = list_entry(pos, struct hfi1_affinity_node, list);
+ list_for_each_entry(entry, &node_affinity.list, list) {
if (entry->node == node)
return entry;
}
@@ -209,6 +231,344 @@ static struct hfi1_affinity_node *node_affinity_lookup(int node)
return NULL;
}
+static int per_cpu_affinity_get(cpumask_var_t possible_cpumask,
+ u16 __percpu *comp_vect_affinity)
+{
+ int curr_cpu;
+ u16 cntr;
+ u16 prev_cntr;
+ int ret_cpu;
+
+ if (!possible_cpumask) {
+ ret_cpu = -EINVAL;
+ goto fail;
+ }
+
+ if (!comp_vect_affinity) {
+ ret_cpu = -EINVAL;
+ goto fail;
+ }
+
+ ret_cpu = cpumask_first(possible_cpumask);
+ if (ret_cpu >= nr_cpu_ids) {
+ ret_cpu = -EINVAL;
+ goto fail;
+ }
+
+ prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu);
+ for_each_cpu(curr_cpu, possible_cpumask) {
+ cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
+
+ if (cntr < prev_cntr) {
+ ret_cpu = curr_cpu;
+ prev_cntr = cntr;
+ }
+ }
+
+ *per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1;
+
+fail:
+ return ret_cpu;
+}
+
+static int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask,
+ u16 __percpu *comp_vect_affinity)
+{
+ int curr_cpu;
+ int max_cpu;
+ u16 cntr;
+ u16 prev_cntr;
+
+ if (!possible_cpumask)
+ return -EINVAL;
+
+ if (!comp_vect_affinity)
+ return -EINVAL;
+
+ max_cpu = cpumask_first(possible_cpumask);
+ if (max_cpu >= nr_cpu_ids)
+ return -EINVAL;
+
+ prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu);
+ for_each_cpu(curr_cpu, possible_cpumask) {
+ cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
+
+ if (cntr > prev_cntr) {
+ max_cpu = curr_cpu;
+ prev_cntr = cntr;
+ }
+ }
+
+ *per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1;
+
+ return max_cpu;
+}
+
+/*
+ * Non-interrupt CPUs are used first, then interrupt CPUs.
+ * Two already allocated cpu masks must be passed.
+ */
+static int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry,
+ cpumask_var_t non_intr_cpus,
+ cpumask_var_t available_cpus)
+ __must_hold(&node_affinity.lock)
+{
+ int cpu;
+ struct cpu_mask_set *set = dd->comp_vect;
+
+ lockdep_assert_held(&node_affinity.lock);
+ if (!non_intr_cpus) {
+ cpu = -1;
+ goto fail;
+ }
+
+ if (!available_cpus) {
+ cpu = -1;
+ goto fail;
+ }
+
+ /* Available CPUs for pinning completion vectors */
+ _cpu_mask_set_gen_inc(set);
+ cpumask_andnot(available_cpus, &set->mask, &set->used);
+
+ /* Available CPUs without SDMA engine interrupts */
+ cpumask_andnot(non_intr_cpus, available_cpus,
+ &entry->def_intr.used);
+
+ /* If there are non-interrupt CPUs available, use them first */
+ cpu = cpumask_first(non_intr_cpus);
+
+ /* Otherwise, use interrupt CPUs */
+ if (cpu >= nr_cpu_ids)
+ cpu = cpumask_first(available_cpus);
+
+ if (cpu >= nr_cpu_ids) { /* empty */
+ cpu = -1;
+ goto fail;
+ }
+ cpumask_set_cpu(cpu, &set->used);
+
+fail:
+ return cpu;
+}
+
+static void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu)
+{
+ struct cpu_mask_set *set = dd->comp_vect;
+
+ if (cpu < 0)
+ return;
+
+ cpu_mask_set_put(set, cpu);
+}
+
+/* _dev_comp_vect_mappings_destroy() is reentrant */
+static void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd)
+{
+ int i, cpu;
+
+ if (!dd->comp_vect_mappings)
+ return;
+
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ cpu = dd->comp_vect_mappings[i];
+ _dev_comp_vect_cpu_put(dd, cpu);
+ dd->comp_vect_mappings[i] = -1;
+ hfi1_cdbg(AFFINITY,
+ "[%s] Release CPU %d from completion vector %d",
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i);
+ }
+
+ kfree(dd->comp_vect_mappings);
+ dd->comp_vect_mappings = NULL;
+}
+
+/*
+ * This function creates the table for looking up CPUs for completion vectors.
+ * num_comp_vectors needs to have been initilized before calling this function.
+ */
+static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry)
+ __must_hold(&node_affinity.lock)
+{
+ int i, cpu, ret;
+ cpumask_var_t non_intr_cpus;
+ cpumask_var_t available_cpus;
+
+ lockdep_assert_held(&node_affinity.lock);
+
+ if (!zalloc_cpumask_var(&non_intr_cpus, GFP_KERNEL))
+ return -ENOMEM;
+
+ if (!zalloc_cpumask_var(&available_cpus, GFP_KERNEL)) {
+ free_cpumask_var(non_intr_cpus);
+ return -ENOMEM;
+ }
+
+ dd->comp_vect_mappings = kcalloc(dd->comp_vect_possible_cpus,
+ sizeof(*dd->comp_vect_mappings),
+ GFP_KERNEL);
+ if (!dd->comp_vect_mappings) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++)
+ dd->comp_vect_mappings[i] = -1;
+
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus,
+ available_cpus);
+ if (cpu < 0) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ dd->comp_vect_mappings[i] = cpu;
+ hfi1_cdbg(AFFINITY,
+ "[%s] Completion Vector %d -> CPU %d",
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu);
+ }
+
+ free_cpumask_var(available_cpus);
+ free_cpumask_var(non_intr_cpus);
+ return 0;
+
+fail:
+ free_cpumask_var(available_cpus);
+ free_cpumask_var(non_intr_cpus);
+ _dev_comp_vect_mappings_destroy(dd);
+
+ return ret;
+}
+
+int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd)
+{
+ int ret;
+ struct hfi1_affinity_node *entry;
+
+ mutex_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ if (!entry) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+ ret = _dev_comp_vect_mappings_create(dd, entry);
+unlock:
+ mutex_unlock(&node_affinity.lock);
+
+ return ret;
+}
+
+void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd)
+{
+ _dev_comp_vect_mappings_destroy(dd);
+}
+
+int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect)
+{
+ struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+ struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+
+ if (!dd->comp_vect_mappings)
+ return -EINVAL;
+ if (comp_vect >= dd->comp_vect_possible_cpus)
+ return -EINVAL;
+
+ return dd->comp_vect_mappings[comp_vect];
+}
+
+/*
+ * It assumes dd->comp_vect_possible_cpus is available.
+ */
+static int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry,
+ bool first_dev_init)
+ __must_hold(&node_affinity.lock)
+{
+ int i, j, curr_cpu;
+ int possible_cpus_comp_vect = 0;
+ struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask;
+
+ lockdep_assert_held(&node_affinity.lock);
+ /*
+ * If there's only one CPU available for completion vectors, then
+ * there will only be one completion vector available. Othewise,
+ * the number of completion vector available will be the number of
+ * available CPUs divide it by the number of devices in the
+ * local NUMA node.
+ */
+ if (cpumask_weight(&entry->comp_vect_mask) == 1) {
+ possible_cpus_comp_vect = 1;
+ dd_dev_warn(dd,
+ "Number of kernel receive queues is too large for completion vector affinity to be effective\n");
+ } else {
+ possible_cpus_comp_vect +=
+ cpumask_weight(&entry->comp_vect_mask) /
+ hfi1_per_node_cntr[dd->node];
+
+ /*
+ * If the completion vector CPUs available doesn't divide
+ * evenly among devices, then the first device device to be
+ * initialized gets an extra CPU.
+ */
+ if (first_dev_init &&
+ cpumask_weight(&entry->comp_vect_mask) %
+ hfi1_per_node_cntr[dd->node] != 0)
+ possible_cpus_comp_vect++;
+ }
+
+ dd->comp_vect_possible_cpus = possible_cpus_comp_vect;
+
+ /* Reserving CPUs for device completion vector */
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ curr_cpu = per_cpu_affinity_get(&entry->comp_vect_mask,
+ entry->comp_vect_affinity);
+ if (curr_cpu < 0)
+ goto fail;
+
+ cpumask_set_cpu(curr_cpu, dev_comp_vect_mask);
+ }
+
+ hfi1_cdbg(AFFINITY,
+ "[%s] Completion vector affinity CPU set(s) %*pbl",
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi),
+ cpumask_pr_args(dev_comp_vect_mask));
+
+ return 0;
+
+fail:
+ for (j = 0; j < i; j++)
+ per_cpu_affinity_put_max(&entry->comp_vect_mask,
+ entry->comp_vect_affinity);
+
+ return curr_cpu;
+}
+
+/*
+ * It assumes dd->comp_vect_possible_cpus is available.
+ */
+static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry)
+ __must_hold(&node_affinity.lock)
+{
+ int i, cpu;
+
+ lockdep_assert_held(&node_affinity.lock);
+ if (!dd->comp_vect_possible_cpus)
+ return;
+
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ cpu = per_cpu_affinity_put_max(&dd->comp_vect->mask,
+ entry->comp_vect_affinity);
+ /* Clearing CPU in device completion vector cpu mask */
+ if (cpu >= 0)
+ cpumask_clear_cpu(cpu, &dd->comp_vect->mask);
+ }
+
+ dd->comp_vect_possible_cpus = 0;
+}
+
/*
* Interrupt affinity.
*
@@ -222,14 +582,10 @@ static struct hfi1_affinity_node *node_affinity_lookup(int node)
*/
int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
{
- int node = pcibus_to_node(dd->pcidev->bus);
struct hfi1_affinity_node *entry;
const struct cpumask *local_mask;
- int curr_cpu, possible, i;
-
- if (node < 0)
- node = numa_node_id();
- dd->node = node;
+ int curr_cpu, possible, i, ret;
+ bool new_entry = false;
local_mask = cpumask_of_node(dd->node);
if (cpumask_first(local_mask) >= nr_cpu_ids)
@@ -243,15 +599,18 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
* create an entry in the global affinity structure and initialize it.
*/
if (!entry) {
- entry = node_affinity_allocate(node);
+ entry = node_affinity_allocate(dd->node);
if (!entry) {
dd_dev_err(dd,
"Unable to allocate global affinity node\n");
- mutex_unlock(&node_affinity.lock);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
+ new_entry = true;
+
init_cpu_mask_set(&entry->def_intr);
init_cpu_mask_set(&entry->rcv_intr);
+ cpumask_clear(&entry->comp_vect_mask);
cpumask_clear(&entry->general_intr_mask);
/* Use the "real" cpu mask of this node as the default */
cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask,
@@ -299,15 +658,69 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
* engines, use the same CPU cores as general/control
* context.
*/
- if (cpumask_weight(&entry->def_intr.mask) == 0)
+ if (cpumask_empty(&entry->def_intr.mask))
cpumask_copy(&entry->def_intr.mask,
&entry->general_intr_mask);
}
- node_affinity_add_tail(entry);
+ /* Determine completion vector CPUs for the entire node */
+ cpumask_and(&entry->comp_vect_mask,
+ &node_affinity.real_cpu_mask, local_mask);
+ cpumask_andnot(&entry->comp_vect_mask,
+ &entry->comp_vect_mask,
+ &entry->rcv_intr.mask);
+ cpumask_andnot(&entry->comp_vect_mask,
+ &entry->comp_vect_mask,
+ &entry->general_intr_mask);
+
+ /*
+ * If there ends up being 0 CPU cores leftover for completion
+ * vectors, use the same CPU core as the general/control
+ * context.
+ */
+ if (cpumask_empty(&entry->comp_vect_mask))
+ cpumask_copy(&entry->comp_vect_mask,
+ &entry->general_intr_mask);
}
+
+ ret = _dev_comp_vect_cpu_mask_init(dd, entry, new_entry);
+ if (ret < 0)
+ goto fail;
+
+ if (new_entry)
+ node_affinity_add_tail(entry);
+
+ dd->affinity_entry = entry;
mutex_unlock(&node_affinity.lock);
+
return 0;
+
+fail:
+ if (new_entry)
+ node_affinity_destroy(entry);
+ mutex_unlock(&node_affinity.lock);
+ return ret;
+}
+
+void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
+{
+ struct hfi1_affinity_node *entry;
+
+ mutex_lock(&node_affinity.lock);
+ if (!dd->affinity_entry)
+ goto unlock;
+ entry = node_affinity_lookup(dd->node);
+ if (!entry)
+ goto unlock;
+
+ /*
+ * Free device completion vector CPUs to be used by future
+ * completion vectors
+ */
+ _dev_comp_vect_cpu_mask_clean_up(dd, entry);
+unlock:
+ dd->affinity_entry = NULL;
+ mutex_unlock(&node_affinity.lock);
}
/*
@@ -335,10 +748,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
sde->cpu = cpu;
cpumask_clear(&msix->mask);
cpumask_set_cpu(cpu, &msix->mask);
- dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n",
- msix->msix.vector, irq_type_names[msix->type],
+ dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n",
+ msix->irq, irq_type_names[msix->type],
sde->this_idx, cpu);
- irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+ irq_set_affinity_hint(msix->irq, &msix->mask);
/*
* Set the new cpu in the hfi1_affinity_node and clean
@@ -347,10 +760,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
set = &entry->def_intr;
cpumask_set_cpu(cpu, &set->mask);
cpumask_set_cpu(cpu, &set->used);
- for (i = 0; i < dd->num_msix_entries; i++) {
+ for (i = 0; i < dd->msix_info.max_requested; i++) {
struct hfi1_msix_entry *other_msix;
- other_msix = &dd->msix_entries[i];
+ other_msix = &dd->msix_info.msix_entries[i];
if (other_msix->type != IRQ_SDMA || other_msix == msix)
continue;
@@ -387,7 +800,7 @@ static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix)
{
struct irq_affinity_notify *notify = &msix->notify;
- notify->irq = msix->msix.vector;
+ notify->irq = msix->irq;
notify->notify = hfi1_irq_notifier_notify;
notify->release = hfi1_irq_notifier_release;
@@ -412,7 +825,6 @@ static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix)
static int get_irq_affinity(struct hfi1_devdata *dd,
struct hfi1_msix_entry *msix)
{
- int ret;
cpumask_var_t diff;
struct hfi1_affinity_node *entry;
struct cpu_mask_set *set = NULL;
@@ -424,10 +836,6 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
extra[0] = '\0';
cpumask_clear(&msix->mask);
- ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
- if (!ret)
- return -ENOMEM;
-
entry = node_affinity_lookup(dd->node);
switch (msix->type) {
@@ -447,6 +855,11 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
set = &entry->rcv_intr;
scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
break;
+ case IRQ_NETDEVCTXT:
+ rcd = (struct hfi1_ctxtdata *)msix->arg;
+ set = &entry->def_intr;
+ scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
+ break;
default:
dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type);
return -EINVAL;
@@ -458,31 +871,30 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
* finds its CPU here.
*/
if (cpu == -1 && set) {
- if (cpumask_equal(&set->mask, &set->used)) {
- /*
- * We've used up all the CPUs, bump up the generation
- * and reset the 'used' map
- */
- set->gen++;
- cpumask_clear(&set->used);
+ if (!zalloc_cpumask_var(&diff, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpu = cpu_mask_set_get_first(set, diff);
+ if (cpu < 0) {
+ free_cpumask_var(diff);
+ dd_dev_err(dd, "Failure to obtain CPU for IRQ\n");
+ return cpu;
}
- cpumask_andnot(diff, &set->mask, &set->used);
- cpu = cpumask_first(diff);
- cpumask_set_cpu(cpu, &set->used);
+
+ free_cpumask_var(diff);
}
cpumask_set_cpu(cpu, &msix->mask);
- dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n",
- msix->msix.vector, irq_type_names[msix->type],
+ dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n",
+ msix->irq, irq_type_names[msix->type],
extra, cpu);
- irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+ irq_set_affinity_hint(msix->irq, &msix->mask);
if (msix->type == IRQ_SDMA) {
sde->cpu = cpu;
hfi1_setup_sdma_notifier(msix);
}
- free_cpumask_var(diff);
return 0;
}
@@ -500,7 +912,6 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
struct hfi1_msix_entry *msix)
{
struct cpu_mask_set *set = NULL;
- struct hfi1_ctxtdata *rcd;
struct hfi1_affinity_node *entry;
mutex_lock(&node_affinity.lock);
@@ -514,12 +925,17 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
case IRQ_GENERAL:
/* Don't do accounting for general contexts */
break;
- case IRQ_RCVCTXT:
- rcd = (struct hfi1_ctxtdata *)msix->arg;
+ case IRQ_RCVCTXT: {
+ struct hfi1_ctxtdata *rcd = msix->arg;
+
/* Don't do accounting for control contexts */
if (rcd->ctxt != HFI1_CTRL_CTXT)
set = &entry->rcv_intr;
break;
+ }
+ case IRQ_NETDEVCTXT:
+ set = &entry->def_intr;
+ break;
default:
mutex_unlock(&node_affinity.lock);
return;
@@ -527,13 +943,10 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
if (set) {
cpumask_andnot(&set->used, &set->used, &msix->mask);
- if (cpumask_empty(&set->used) && set->gen) {
- set->gen--;
- cpumask_copy(&set->used, &set->mask);
- }
+ _cpu_mask_set_gen_dec(set);
}
- irq_set_affinity_hint(msix->msix.vector, NULL);
+ irq_set_affinity_hint(msix->irq, NULL);
cpumask_clear(&msix->mask);
mutex_unlock(&node_affinity.lock);
}
@@ -542,32 +955,23 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask,
struct hfi1_affinity_node_list *affinity)
{
- int possible, curr_cpu, i;
- uint num_cores_per_socket = node_affinity.num_online_cpus /
- affinity->num_core_siblings /
- node_affinity.num_online_nodes;
+ int curr_cpu;
+ uint num_cores;
cpumask_copy(hw_thread_mask, &affinity->proc.mask);
- if (affinity->num_core_siblings > 0) {
- /* Removing other siblings not needed for now */
- possible = cpumask_weight(hw_thread_mask);
- curr_cpu = cpumask_first(hw_thread_mask);
- for (i = 0;
- i < num_cores_per_socket * node_affinity.num_online_nodes;
- i++)
- curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
-
- for (; i < possible; i++) {
- cpumask_clear_cpu(curr_cpu, hw_thread_mask);
- curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
- }
- /* Identifying correct HW threads within physical cores */
- cpumask_shift_left(hw_thread_mask, hw_thread_mask,
- num_cores_per_socket *
- node_affinity.num_online_nodes *
- hw_thread_no);
- }
+ if (affinity->num_core_siblings == 0)
+ return;
+
+ num_cores = rounddown(node_affinity.num_online_cpus / affinity->num_core_siblings,
+ node_affinity.num_online_nodes);
+
+ /* Removing other siblings not needed for now */
+ curr_cpu = cpumask_nth(num_cores * node_affinity.num_online_nodes, hw_thread_mask) + 1;
+ cpumask_clear_cpus(hw_thread_mask, curr_cpu, nr_cpu_ids - curr_cpu);
+
+ /* Identifying correct HW threads within physical cores */
+ cpumask_shift_left(hw_thread_mask, hw_thread_mask, num_cores * hw_thread_no);
}
int hfi1_get_proc_affinity(int node)
@@ -576,7 +980,7 @@ int hfi1_get_proc_affinity(int node)
struct hfi1_affinity_node *entry;
cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
const struct cpumask *node_mask,
- *proc_mask = tsk_cpus_allowed(current);
+ *proc_mask = current->cpus_ptr;
struct hfi1_affinity_node_list *affinity = &node_affinity;
struct cpu_mask_set *set = &affinity->proc;
@@ -584,7 +988,7 @@ int hfi1_get_proc_affinity(int node)
* check whether process/context affinity has already
* been set
*/
- if (cpumask_weight(proc_mask) == 1) {
+ if (current->nr_cpus_allowed == 1) {
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
current->pid, current->comm,
cpumask_pr_args(proc_mask));
@@ -595,7 +999,7 @@ int hfi1_get_proc_affinity(int node)
cpu = cpumask_first(proc_mask);
cpumask_set_cpu(cpu, &set->used);
goto done;
- } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
+ } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) {
hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
current->pid, current->comm,
cpumask_pr_args(proc_mask));
@@ -641,10 +1045,7 @@ int hfi1_get_proc_affinity(int node)
* If we've used all available HW threads, clear the mask and start
* overloading.
*/
- if (cpumask_equal(&set->mask, &set->used)) {
- set->gen++;
- cpumask_clear(&set->used);
- }
+ _cpu_mask_set_gen_inc(set);
/*
* If NUMA node has CPUs used by interrupt handlers, include them in the
@@ -669,22 +1070,19 @@ int hfi1_get_proc_affinity(int node)
* If HT cores are enabled, identify which HW threads within the
* physical cores should be used.
*/
- if (affinity->num_core_siblings > 0) {
- for (i = 0; i < affinity->num_core_siblings; i++) {
- find_hw_thread_mask(i, hw_thread_mask, affinity);
+ for (i = 0; i < affinity->num_core_siblings; i++) {
+ find_hw_thread_mask(i, hw_thread_mask, affinity);
- /*
- * If there's at least one available core for this HW
- * thread number, stop looking for a core.
- *
- * diff will always be not empty at least once in this
- * loop as the used mask gets reset when
- * (set->mask == set->used) before this loop.
- */
- cpumask_andnot(diff, hw_thread_mask, &set->used);
- if (!cpumask_empty(diff))
- break;
- }
+ /*
+ * If there's at least one available core for this HW
+ * thread number, stop looking for a core.
+ *
+ * diff will always be not empty at least once in this
+ * loop as the used mask gets reset when
+ * (set->mask == set->used) before this loop.
+ */
+ if (cpumask_andnot(diff, hw_thread_mask, &set->used))
+ break;
}
hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl",
cpumask_pr_args(hw_thread_mask));
@@ -715,8 +1113,7 @@ int hfi1_get_proc_affinity(int node)
* used for process assignments using the same method as
* the preferred NUMA node.
*/
- cpumask_andnot(diff, available_mask, intrs_mask);
- if (!cpumask_empty(diff))
+ if (cpumask_andnot(diff, available_mask, intrs_mask))
cpumask_copy(available_mask, diff);
/* If we don't have CPUs on the preferred node, use other NUMA nodes */
@@ -732,8 +1129,7 @@ int hfi1_get_proc_affinity(int node)
* At first, we don't want to place processes on the same
* CPUs as interrupt handlers.
*/
- cpumask_andnot(diff, available_mask, intrs_mask);
- if (!cpumask_empty(diff))
+ if (cpumask_andnot(diff, available_mask, intrs_mask))
cpumask_copy(available_mask, diff);
}
hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl",
@@ -768,11 +1164,7 @@ void hfi1_put_proc_affinity(int cpu)
return;
mutex_lock(&affinity->lock);
- cpumask_clear_cpu(cpu, &set->used);
+ cpu_mask_set_put(set, cpu);
hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu);
- if (cpumask_empty(&set->used) && set->gen) {
- set->gen--;
- cpumask_copy(&set->used, &set->mask);
- }
mutex_unlock(&affinity->lock);
}
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index e78c7aa094e0..ffdd0d571c7a 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*/
+
#ifndef _HFI1_AFFINITY_H
#define _HFI1_AFFINITY_H
@@ -52,6 +11,7 @@
enum irq_type {
IRQ_SDMA,
IRQ_RCVCTXT,
+ IRQ_NETDEVCTXT,
IRQ_GENERAL,
IRQ_OTHER
};
@@ -75,30 +35,34 @@ struct hfi1_msix_entry;
/* Initialize non-HT cpu cores mask */
void init_real_cpu_mask(void);
/* Initialize driver affinity data */
-int hfi1_dev_affinity_init(struct hfi1_devdata *);
+int hfi1_dev_affinity_init(struct hfi1_devdata *dd);
/*
* Set IRQ affinity to a CPU. The function will determine the
* CPU and set the affinity to it.
*/
-int hfi1_get_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+int hfi1_get_irq_affinity(struct hfi1_devdata *dd,
+ struct hfi1_msix_entry *msix);
/*
* Remove the IRQ's CPU affinity. This function also updates
* any internal CPU tracking data
*/
-void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
+ struct hfi1_msix_entry *msix);
/*
* Determine a CPU affinity for a user process, if the process does not
* have an affinity set yet.
*/
-int hfi1_get_proc_affinity(int);
+int hfi1_get_proc_affinity(int node);
/* Release a CPU used by a user process. */
-void hfi1_put_proc_affinity(int);
+void hfi1_put_proc_affinity(int cpu);
struct hfi1_affinity_node {
int node;
+ u16 __percpu *comp_vect_affinity;
struct cpu_mask_set def_intr;
struct cpu_mask_set rcv_intr;
struct cpumask general_intr_mask;
+ struct cpumask comp_vect_mask;
struct list_head list;
};
@@ -114,7 +78,11 @@ struct hfi1_affinity_node_list {
};
int node_affinity_init(void);
-void node_affinity_destroy(void);
+void node_affinity_destroy_all(void);
extern struct hfi1_affinity_node_list node_affinity;
+void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd);
+int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect);
+int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd);
+void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd);
#endif /* _HFI1_AFFINITY_H */
diff --git a/drivers/infiniband/hw/hfi1/aspm.c b/drivers/infiniband/hw/hfi1/aspm.c
new file mode 100644
index 000000000000..79990d09522b
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/aspm.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2019 Intel Corporation.
+ *
+ */
+
+#include "aspm.h"
+
+/* Time after which the timer interrupt will re-enable ASPM */
+#define ASPM_TIMER_MS 1000
+/* Time for which interrupts are ignored after a timer has been scheduled */
+#define ASPM_RESCHED_TIMER_MS (ASPM_TIMER_MS / 2)
+/* Two interrupts within this time trigger ASPM disable */
+#define ASPM_TRIGGER_MS 1
+#define ASPM_TRIGGER_NS (ASPM_TRIGGER_MS * 1000 * 1000ull)
+#define ASPM_L1_SUPPORTED(reg) \
+ ((((reg) & PCI_EXP_LNKCAP_ASPMS) >> 10) & 0x2)
+
+uint aspm_mode = ASPM_MODE_DISABLED;
+module_param_named(aspm, aspm_mode, uint, 0444);
+MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
+
+static bool aspm_hw_l1_supported(struct hfi1_devdata *dd)
+{
+ struct pci_dev *parent = dd->pcidev->bus->self;
+ u32 up, dn;
+
+ /*
+ * If the driver does not have access to the upstream component,
+ * it cannot support ASPM L1 at all.
+ */
+ if (!parent)
+ return false;
+
+ pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn);
+ dn = ASPM_L1_SUPPORTED(dn);
+
+ pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &up);
+ up = ASPM_L1_SUPPORTED(up);
+
+ /* ASPM works on A-step but is reported as not supported */
+ return (!!dn || is_ax(dd)) && !!up;
+}
+
+/* Set L1 entrance latency for slower entry to L1 */
+static void aspm_hw_set_l1_ent_latency(struct hfi1_devdata *dd)
+{
+ u32 l1_ent_lat = 0x4u;
+ u32 reg32;
+
+ pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, &reg32);
+ reg32 &= ~PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK;
+ reg32 |= l1_ent_lat << PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT;
+ pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, reg32);
+}
+
+static void aspm_hw_enable_l1(struct hfi1_devdata *dd)
+{
+ struct pci_dev *parent = dd->pcidev->bus->self;
+
+ /*
+ * If the driver does not have access to the upstream component,
+ * it cannot support ASPM L1 at all.
+ */
+ if (!parent)
+ return;
+
+ /* Enable ASPM L1 first in upstream component and then downstream */
+ pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC,
+ PCI_EXP_LNKCTL_ASPM_L1);
+ pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC,
+ PCI_EXP_LNKCTL_ASPM_L1);
+}
+
+void aspm_hw_disable_l1(struct hfi1_devdata *dd)
+{
+ struct pci_dev *parent = dd->pcidev->bus->self;
+
+ /* Disable ASPM L1 first in downstream component and then upstream */
+ pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC, 0x0);
+ if (parent)
+ pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
+ PCI_EXP_LNKCTL_ASPMC, 0x0);
+}
+
+static void aspm_enable(struct hfi1_devdata *dd)
+{
+ if (dd->aspm_enabled || aspm_mode == ASPM_MODE_DISABLED ||
+ !dd->aspm_supported)
+ return;
+
+ aspm_hw_enable_l1(dd);
+ dd->aspm_enabled = true;
+}
+
+static void aspm_disable(struct hfi1_devdata *dd)
+{
+ if (!dd->aspm_enabled || aspm_mode == ASPM_MODE_ENABLED)
+ return;
+
+ aspm_hw_disable_l1(dd);
+ dd->aspm_enabled = false;
+}
+
+static void aspm_disable_inc(struct hfi1_devdata *dd)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->aspm_lock, flags);
+ aspm_disable(dd);
+ atomic_inc(&dd->aspm_disabled_cnt);
+ spin_unlock_irqrestore(&dd->aspm_lock, flags);
+}
+
+static void aspm_enable_dec(struct hfi1_devdata *dd)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->aspm_lock, flags);
+ if (atomic_dec_and_test(&dd->aspm_disabled_cnt))
+ aspm_enable(dd);
+ spin_unlock_irqrestore(&dd->aspm_lock, flags);
+}
+
+/* ASPM processing for each receive context interrupt */
+void __aspm_ctx_disable(struct hfi1_ctxtdata *rcd)
+{
+ bool restart_timer;
+ bool close_interrupts;
+ unsigned long flags;
+ ktime_t now, prev;
+
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ /* PSM contexts are open */
+ if (!rcd->aspm_intr_enable)
+ goto unlock;
+
+ prev = rcd->aspm_ts_last_intr;
+ now = ktime_get();
+ rcd->aspm_ts_last_intr = now;
+
+ /* An interrupt pair close together in time */
+ close_interrupts = ktime_to_ns(ktime_sub(now, prev)) < ASPM_TRIGGER_NS;
+
+ /* Don't push out our timer till this much time has elapsed */
+ restart_timer = ktime_to_ns(ktime_sub(now, rcd->aspm_ts_timer_sched)) >
+ ASPM_RESCHED_TIMER_MS * NSEC_PER_MSEC;
+ restart_timer = restart_timer && close_interrupts;
+
+ /* Disable ASPM and schedule timer */
+ if (rcd->aspm_enabled && close_interrupts) {
+ aspm_disable_inc(rcd->dd);
+ rcd->aspm_enabled = false;
+ restart_timer = true;
+ }
+
+ if (restart_timer) {
+ mod_timer(&rcd->aspm_timer,
+ jiffies + msecs_to_jiffies(ASPM_TIMER_MS));
+ rcd->aspm_ts_timer_sched = now;
+ }
+unlock:
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+}
+
+/* Timer function for re-enabling ASPM in the absence of interrupt activity */
+static void aspm_ctx_timer_function(struct timer_list *t)
+{
+ struct hfi1_ctxtdata *rcd = timer_container_of(rcd, t, aspm_timer);
+ unsigned long flags;
+
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ aspm_enable_dec(rcd->dd);
+ rcd->aspm_enabled = true;
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+}
+
+/*
+ * Disable interrupt processing for verbs contexts when PSM or VNIC contexts
+ * are open.
+ */
+void aspm_disable_all(struct hfi1_devdata *dd)
+{
+ struct hfi1_ctxtdata *rcd;
+ unsigned long flags;
+ u16 i;
+
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+ if (rcd) {
+ timer_delete_sync(&rcd->aspm_timer);
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ rcd->aspm_intr_enable = false;
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+ hfi1_rcd_put(rcd);
+ }
+ }
+
+ aspm_disable(dd);
+ atomic_set(&dd->aspm_disabled_cnt, 0);
+}
+
+/* Re-enable interrupt processing for verbs contexts */
+void aspm_enable_all(struct hfi1_devdata *dd)
+{
+ struct hfi1_ctxtdata *rcd;
+ unsigned long flags;
+ u16 i;
+
+ aspm_enable(dd);
+
+ if (aspm_mode != ASPM_MODE_DYNAMIC)
+ return;
+
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+ if (rcd) {
+ spin_lock_irqsave(&rcd->aspm_lock, flags);
+ rcd->aspm_intr_enable = true;
+ rcd->aspm_enabled = true;
+ spin_unlock_irqrestore(&rcd->aspm_lock, flags);
+ hfi1_rcd_put(rcd);
+ }
+ }
+}
+
+static void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
+{
+ spin_lock_init(&rcd->aspm_lock);
+ timer_setup(&rcd->aspm_timer, aspm_ctx_timer_function, 0);
+ rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
+ aspm_mode == ASPM_MODE_DYNAMIC &&
+ rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt;
+}
+
+void aspm_init(struct hfi1_devdata *dd)
+{
+ struct hfi1_ctxtdata *rcd;
+ u16 i;
+
+ spin_lock_init(&dd->aspm_lock);
+ dd->aspm_supported = aspm_hw_l1_supported(dd);
+
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+ if (rcd)
+ aspm_ctx_init(rcd);
+ hfi1_rcd_put(rcd);
+ }
+
+ /* Start with ASPM disabled */
+ aspm_hw_set_l1_ent_latency(dd);
+ dd->aspm_enabled = false;
+ aspm_hw_disable_l1(dd);
+
+ /* Now turn on ASPM if configured */
+ aspm_enable_all(dd);
+}
+
+void aspm_exit(struct hfi1_devdata *dd)
+{
+ aspm_disable_all(dd);
+
+ /* Turn on ASPM on exit to conserve power */
+ aspm_enable(dd);
+}
+
diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h
index 0d58fe3b49b5..c8d92dc13daa 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015-2017 Intel Corporation.
*/
+
#ifndef _ASPM_H
#define _ASPM_H
@@ -57,253 +16,20 @@ enum aspm_mode {
ASPM_MODE_DYNAMIC = 2, /* ASPM enabled/disabled dynamically */
};
-/* Time after which the timer interrupt will re-enable ASPM */
-#define ASPM_TIMER_MS 1000
-/* Time for which interrupts are ignored after a timer has been scheduled */
-#define ASPM_RESCHED_TIMER_MS (ASPM_TIMER_MS / 2)
-/* Two interrupts within this time trigger ASPM disable */
-#define ASPM_TRIGGER_MS 1
-#define ASPM_TRIGGER_NS (ASPM_TRIGGER_MS * 1000 * 1000ull)
-#define ASPM_L1_SUPPORTED(reg) \
- (((reg & PCI_EXP_LNKCAP_ASPMS) >> 10) & 0x2)
-
-static inline bool aspm_hw_l1_supported(struct hfi1_devdata *dd)
-{
- struct pci_dev *parent = dd->pcidev->bus->self;
- u32 up, dn;
-
- /*
- * If the driver does not have access to the upstream component,
- * it cannot support ASPM L1 at all.
- */
- if (!parent)
- return false;
-
- pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &dn);
- dn = ASPM_L1_SUPPORTED(dn);
-
- pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &up);
- up = ASPM_L1_SUPPORTED(up);
-
- /* ASPM works on A-step but is reported as not supported */
- return (!!dn || is_ax(dd)) && !!up;
-}
-
-/* Set L1 entrance latency for slower entry to L1 */
-static inline void aspm_hw_set_l1_ent_latency(struct hfi1_devdata *dd)
-{
- u32 l1_ent_lat = 0x4u;
- u32 reg32;
-
- pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, &reg32);
- reg32 &= ~PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SMASK;
- reg32 |= l1_ent_lat << PCIE_CFG_REG_PL3_L1_ENT_LATENCY_SHIFT;
- pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL3, reg32);
-}
-
-static inline void aspm_hw_enable_l1(struct hfi1_devdata *dd)
-{
- struct pci_dev *parent = dd->pcidev->bus->self;
-
- /*
- * If the driver does not have access to the upstream component,
- * it cannot support ASPM L1 at all.
- */
- if (!parent)
- return;
-
- /* Enable ASPM L1 first in upstream component and then downstream */
- pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPMC,
- PCI_EXP_LNKCTL_ASPM_L1);
- pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPMC,
- PCI_EXP_LNKCTL_ASPM_L1);
-}
-
-static inline void aspm_hw_disable_l1(struct hfi1_devdata *dd)
-{
- struct pci_dev *parent = dd->pcidev->bus->self;
-
- /* Disable ASPM L1 first in downstream component and then upstream */
- pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPMC, 0x0);
- if (parent)
- pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL,
- PCI_EXP_LNKCTL_ASPMC, 0x0);
-}
-
-static inline void aspm_enable(struct hfi1_devdata *dd)
-{
- if (dd->aspm_enabled || aspm_mode == ASPM_MODE_DISABLED ||
- !dd->aspm_supported)
- return;
-
- aspm_hw_enable_l1(dd);
- dd->aspm_enabled = true;
-}
-
-static inline void aspm_disable(struct hfi1_devdata *dd)
-{
- if (!dd->aspm_enabled || aspm_mode == ASPM_MODE_ENABLED)
- return;
-
- aspm_hw_disable_l1(dd);
- dd->aspm_enabled = false;
-}
-
-static inline void aspm_disable_inc(struct hfi1_devdata *dd)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&dd->aspm_lock, flags);
- aspm_disable(dd);
- atomic_inc(&dd->aspm_disabled_cnt);
- spin_unlock_irqrestore(&dd->aspm_lock, flags);
-}
-
-static inline void aspm_enable_dec(struct hfi1_devdata *dd)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&dd->aspm_lock, flags);
- if (atomic_dec_and_test(&dd->aspm_disabled_cnt))
- aspm_enable(dd);
- spin_unlock_irqrestore(&dd->aspm_lock, flags);
-}
+void aspm_init(struct hfi1_devdata *dd);
+void aspm_exit(struct hfi1_devdata *dd);
+void aspm_hw_disable_l1(struct hfi1_devdata *dd);
+void __aspm_ctx_disable(struct hfi1_ctxtdata *rcd);
+void aspm_disable_all(struct hfi1_devdata *dd);
+void aspm_enable_all(struct hfi1_devdata *dd);
-/* ASPM processing for each receive context interrupt */
static inline void aspm_ctx_disable(struct hfi1_ctxtdata *rcd)
{
- bool restart_timer;
- bool close_interrupts;
- unsigned long flags;
- ktime_t now, prev;
-
/* Quickest exit for minimum impact */
- if (!rcd->aspm_intr_supported)
- return;
-
- spin_lock_irqsave(&rcd->aspm_lock, flags);
- /* PSM contexts are open */
- if (!rcd->aspm_intr_enable)
- goto unlock;
-
- prev = rcd->aspm_ts_last_intr;
- now = ktime_get();
- rcd->aspm_ts_last_intr = now;
-
- /* An interrupt pair close together in time */
- close_interrupts = ktime_to_ns(ktime_sub(now, prev)) < ASPM_TRIGGER_NS;
-
- /* Don't push out our timer till this much time has elapsed */
- restart_timer = ktime_to_ns(ktime_sub(now, rcd->aspm_ts_timer_sched)) >
- ASPM_RESCHED_TIMER_MS * NSEC_PER_MSEC;
- restart_timer = restart_timer && close_interrupts;
-
- /* Disable ASPM and schedule timer */
- if (rcd->aspm_enabled && close_interrupts) {
- aspm_disable_inc(rcd->dd);
- rcd->aspm_enabled = false;
- restart_timer = true;
- }
-
- if (restart_timer) {
- mod_timer(&rcd->aspm_timer,
- jiffies + msecs_to_jiffies(ASPM_TIMER_MS));
- rcd->aspm_ts_timer_sched = now;
- }
-unlock:
- spin_unlock_irqrestore(&rcd->aspm_lock, flags);
-}
-
-/* Timer function for re-enabling ASPM in the absence of interrupt activity */
-static inline void aspm_ctx_timer_function(unsigned long data)
-{
- struct hfi1_ctxtdata *rcd = (struct hfi1_ctxtdata *)data;
- unsigned long flags;
-
- spin_lock_irqsave(&rcd->aspm_lock, flags);
- aspm_enable_dec(rcd->dd);
- rcd->aspm_enabled = true;
- spin_unlock_irqrestore(&rcd->aspm_lock, flags);
-}
-
-/* Disable interrupt processing for verbs contexts when PSM contexts are open */
-static inline void aspm_disable_all(struct hfi1_devdata *dd)
-{
- struct hfi1_ctxtdata *rcd;
- unsigned long flags;
- unsigned i;
-
- for (i = 0; i < dd->first_user_ctxt; i++) {
- rcd = dd->rcd[i];
- del_timer_sync(&rcd->aspm_timer);
- spin_lock_irqsave(&rcd->aspm_lock, flags);
- rcd->aspm_intr_enable = false;
- spin_unlock_irqrestore(&rcd->aspm_lock, flags);
- }
-
- aspm_disable(dd);
- atomic_set(&dd->aspm_disabled_cnt, 0);
-}
-
-/* Re-enable interrupt processing for verbs contexts */
-static inline void aspm_enable_all(struct hfi1_devdata *dd)
-{
- struct hfi1_ctxtdata *rcd;
- unsigned long flags;
- unsigned i;
-
- aspm_enable(dd);
-
- if (aspm_mode != ASPM_MODE_DYNAMIC)
+ if (likely(!rcd->aspm_intr_supported))
return;
- for (i = 0; i < dd->first_user_ctxt; i++) {
- rcd = dd->rcd[i];
- spin_lock_irqsave(&rcd->aspm_lock, flags);
- rcd->aspm_intr_enable = true;
- rcd->aspm_enabled = true;
- spin_unlock_irqrestore(&rcd->aspm_lock, flags);
- }
-}
-
-static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd)
-{
- spin_lock_init(&rcd->aspm_lock);
- setup_timer(&rcd->aspm_timer, aspm_ctx_timer_function,
- (unsigned long)rcd);
- rcd->aspm_intr_supported = rcd->dd->aspm_supported &&
- aspm_mode == ASPM_MODE_DYNAMIC &&
- rcd->ctxt < rcd->dd->first_user_ctxt;
-}
-
-static inline void aspm_init(struct hfi1_devdata *dd)
-{
- unsigned i;
-
- spin_lock_init(&dd->aspm_lock);
- dd->aspm_supported = aspm_hw_l1_supported(dd);
-
- for (i = 0; i < dd->first_user_ctxt; i++)
- aspm_ctx_init(dd->rcd[i]);
-
- /* Start with ASPM disabled */
- aspm_hw_set_l1_ent_latency(dd);
- dd->aspm_enabled = false;
- aspm_hw_disable_l1(dd);
-
- /* Now turn on ASPM if configured */
- aspm_enable_all(dd);
-}
-
-static inline void aspm_exit(struct hfi1_devdata *dd)
-{
- aspm_disable_all(dd);
-
- /* Turn on ASPM on exit to conserve power */
- aspm_enable(dd);
+ __aspm_ctx_disable(rcd);
}
#endif /* _ASPM_H */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index ef72bc2a9e1d..0781ab756d44 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1,48 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
+ * Copyright(c) 2021 Cornelis Networks.
*/
/*
@@ -64,12 +23,9 @@
#include "platform.h"
#include "aspm.h"
#include "affinity.h"
-
-#define NUM_IB_PORTS 1
-
-uint kdeth_qp;
-module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
-MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
+#include "debugfs.h"
+#include "fault.h"
+#include "netdev.h"
uint num_vls = HFI1_MAX_VLS_SUPPORTED;
module_param(num_vls, uint, S_IRUGO);
@@ -125,9 +81,18 @@ struct flag_table {
#define DEFAULT_KRCVQS 2
#define MIN_KERNEL_KCTXTS 2
#define FIRST_KERNEL_KCTXT 1
-/* sizes for both the QP and RSM map tables */
-#define NUM_MAP_ENTRIES 256
-#define NUM_MAP_REGS 32
+
+/*
+ * RSM instance allocation
+ * 0 - User Fecn Handling
+ * 1 - Vnic
+ * 2 - AIP
+ * 3 - Verbs
+ */
+#define RSM_INS_FECN 0
+#define RSM_INS_VNIC 1
+#define RSM_INS_AIP 2
+#define RSM_INS_VERBS 3
/* Bit offset into the GUID which carries HFI id information */
#define GUID_HFI_INDEX_SHIFT 39
@@ -138,8 +103,7 @@ struct flag_table {
#define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3)
#define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4)
-/* RSM fields */
-
+/* RSM fields for Verbs */
/* packet type */
#define IB_PACKET_TYPE 2ull
#define QW_SHIFT 6ull
@@ -169,6 +133,47 @@ struct flag_table {
/* QPN[m+n:1] QW 1, OFFSET 1 */
#define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull))
+/* RSM fields for AIP */
+/* LRH.BTH above is reused for this rule */
+
+/* BTH.DESTQP: QW 1, OFFSET 16 for match */
+#define BTH_DESTQP_QW 1ull
+#define BTH_DESTQP_BIT_OFFSET 16ull
+#define BTH_DESTQP_OFFSET(off) ((BTH_DESTQP_QW << QW_SHIFT) | (off))
+#define BTH_DESTQP_MATCH_OFFSET BTH_DESTQP_OFFSET(BTH_DESTQP_BIT_OFFSET)
+#define BTH_DESTQP_MASK 0xFFull
+#define BTH_DESTQP_VALUE 0x81ull
+
+/* DETH.SQPN: QW 1 Offset 56 for select */
+/* We use 8 most significant Soure QPN bits as entropy fpr AIP */
+#define DETH_AIP_SQPN_QW 3ull
+#define DETH_AIP_SQPN_BIT_OFFSET 56ull
+#define DETH_AIP_SQPN_OFFSET(off) ((DETH_AIP_SQPN_QW << QW_SHIFT) | (off))
+#define DETH_AIP_SQPN_SELECT_OFFSET \
+ DETH_AIP_SQPN_OFFSET(DETH_AIP_SQPN_BIT_OFFSET)
+
+/* RSM fields for Vnic */
+/* L2_TYPE: QW 0, OFFSET 61 - for match */
+#define L2_TYPE_QW 0ull
+#define L2_TYPE_BIT_OFFSET 61ull
+#define L2_TYPE_OFFSET(off) ((L2_TYPE_QW << QW_SHIFT) | (off))
+#define L2_TYPE_MATCH_OFFSET L2_TYPE_OFFSET(L2_TYPE_BIT_OFFSET)
+#define L2_TYPE_MASK 3ull
+#define L2_16B_VALUE 2ull
+
+/* L4_TYPE QW 1, OFFSET 0 - for match */
+#define L4_TYPE_QW 1ull
+#define L4_TYPE_BIT_OFFSET 0ull
+#define L4_TYPE_OFFSET(off) ((L4_TYPE_QW << QW_SHIFT) | (off))
+#define L4_TYPE_MATCH_OFFSET L4_TYPE_OFFSET(L4_TYPE_BIT_OFFSET)
+#define L4_16B_TYPE_MASK 0xFFull
+#define L4_16B_ETH_VALUE 0x78ull
+
+/* 16B VESWID - for select */
+#define L4_16B_HDR_VESWID_OFFSET ((2 << QW_SHIFT) | (16ull))
+/* 16B ENTROPY - for select */
+#define L2_16B_ENTROPY_OFFSET ((1 << QW_SHIFT) | (32ull))
+
/* defines to build power on SC2VL table */
#define SC2VL_VAL( \
num, \
@@ -246,7 +251,7 @@ struct flag_table {
/*
* CCE Error flags.
*/
-static struct flag_table cce_err_status_flags[] = {
+static const struct flag_table cce_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("CceCsrParityErr",
CCE_ERR_STATUS_CCE_CSR_PARITY_ERR_SMASK),
/* 1*/ FLAG_ENTRY0("CceCsrReadBadAddrErr",
@@ -336,7 +341,7 @@ static struct flag_table cce_err_status_flags[] = {
* Misc Error flags
*/
#define MES(text) MISC_ERR_STATUS_MISC_##text##_ERR_SMASK
-static struct flag_table misc_err_status_flags[] = {
+static const struct flag_table misc_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("CSR_PARITY", MES(CSR_PARITY)),
/* 1*/ FLAG_ENTRY0("CSR_READ_BAD_ADDR", MES(CSR_READ_BAD_ADDR)),
/* 2*/ FLAG_ENTRY0("CSR_WRITE_BAD_ADDR", MES(CSR_WRITE_BAD_ADDR)),
@@ -355,7 +360,7 @@ static struct flag_table misc_err_status_flags[] = {
/*
* TXE PIO Error flags and consequences
*/
-static struct flag_table pio_err_status_flags[] = {
+static const struct flag_table pio_err_status_flags[] = {
/* 0*/ FLAG_ENTRY("PioWriteBadCtxt",
SEC_WRITE_DROPPED,
SEND_PIO_ERR_STATUS_PIO_WRITE_BAD_CTXT_ERR_SMASK),
@@ -497,7 +502,7 @@ static struct flag_table pio_err_status_flags[] = {
/*
* TXE SDMA Error flags
*/
-static struct flag_table sdma_err_status_flags[] = {
+static const struct flag_table sdma_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("SDmaRpyTagErr",
SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK),
/* 1*/ FLAG_ENTRY0("SDmaCsrParityErr",
@@ -525,7 +530,7 @@ static struct flag_table sdma_err_status_flags[] = {
* TXE Egress Error flags
*/
#define SEES(text) SEND_EGRESS_ERR_STATUS_##text##_ERR_SMASK
-static struct flag_table egress_err_status_flags[] = {
+static const struct flag_table egress_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("TxPktIntegrityMemCorErr", SEES(TX_PKT_INTEGRITY_MEM_COR)),
/* 1*/ FLAG_ENTRY0("TxPktIntegrityMemUncErr", SEES(TX_PKT_INTEGRITY_MEM_UNC)),
/* 2 reserved */
@@ -626,7 +631,7 @@ static struct flag_table egress_err_status_flags[] = {
* TXE Egress Error Info flags
*/
#define SEEI(text) SEND_EGRESS_ERR_INFO_##text##_ERR_SMASK
-static struct flag_table egress_err_info_flags[] = {
+static const struct flag_table egress_err_info_flags[] = {
/* 0*/ FLAG_ENTRY0("Reserved", 0ull),
/* 1*/ FLAG_ENTRY0("VLErr", SEEI(VL)),
/* 2*/ FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
@@ -675,7 +680,7 @@ static struct flag_table egress_err_info_flags[] = {
* TXE Send error flags
*/
#define SES(name) SEND_ERR_STATUS_SEND_##name##_ERR_SMASK
-static struct flag_table send_err_status_flags[] = {
+static const struct flag_table send_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("SendCsrParityErr", SES(CSR_PARITY)),
/* 1*/ FLAG_ENTRY0("SendCsrReadBadAddrErr", SES(CSR_READ_BAD_ADDR)),
/* 2*/ FLAG_ENTRY0("SendCsrWriteBadAddrErr", SES(CSR_WRITE_BAD_ADDR))
@@ -684,7 +689,7 @@ static struct flag_table send_err_status_flags[] = {
/*
* TXE Send Context Error flags and consequences
*/
-static struct flag_table sc_err_status_flags[] = {
+static const struct flag_table sc_err_status_flags[] = {
/* 0*/ FLAG_ENTRY("InconsistentSop",
SEC_PACKET_DROPPED | SEC_SC_HALTED,
SEND_CTXT_ERR_STATUS_PIO_INCONSISTENT_SOP_ERR_SMASK),
@@ -707,7 +712,7 @@ static struct flag_table sc_err_status_flags[] = {
* RXE Receive Error flags
*/
#define RXES(name) RCV_ERR_STATUS_RX_##name##_ERR_SMASK
-static struct flag_table rxe_err_status_flags[] = {
+static const struct flag_table rxe_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("RxDmaCsrCorErr", RXES(DMA_CSR_COR)),
/* 1*/ FLAG_ENTRY0("RxDcIntfParityErr", RXES(DC_INTF_PARITY)),
/* 2*/ FLAG_ENTRY0("RxRcvHdrUncErr", RXES(RCV_HDR_UNC)),
@@ -842,7 +847,7 @@ static struct flag_table rxe_err_status_flags[] = {
* DCC Error Flags
*/
#define DCCE(name) DCC_ERR_FLG_##name##_SMASK
-static struct flag_table dcc_err_flags[] = {
+static const struct flag_table dcc_err_flags[] = {
FLAG_ENTRY0("bad_l2_err", DCCE(BAD_L2_ERR)),
FLAG_ENTRY0("bad_sc_err", DCCE(BAD_SC_ERR)),
FLAG_ENTRY0("bad_mid_tail_err", DCCE(BAD_MID_TAIL_ERR)),
@@ -895,7 +900,7 @@ static struct flag_table dcc_err_flags[] = {
* LCB error flags
*/
#define LCBE(name) DC_LCB_ERR_FLG_##name##_SMASK
-static struct flag_table lcb_err_flags[] = {
+static const struct flag_table lcb_err_flags[] = {
/* 0*/ FLAG_ENTRY0("CSR_PARITY_ERR", LCBE(CSR_PARITY_ERR)),
/* 1*/ FLAG_ENTRY0("INVALID_CSR_ADDR", LCBE(INVALID_CSR_ADDR)),
/* 2*/ FLAG_ENTRY0("RST_FOR_FAILED_DESKEW", LCBE(RST_FOR_FAILED_DESKEW)),
@@ -938,7 +943,7 @@ static struct flag_table lcb_err_flags[] = {
* DC8051 Error Flags
*/
#define D8E(name) DC_DC8051_ERR_FLG_##name##_SMASK
-static struct flag_table dc8051_err_flags[] = {
+static const struct flag_table dc8051_err_flags[] = {
FLAG_ENTRY0("SET_BY_8051", D8E(SET_BY_8051)),
FLAG_ENTRY0("LOST_8051_HEART_BEAT", D8E(LOST_8051_HEART_BEAT)),
FLAG_ENTRY0("CRAM_MBE", D8E(CRAM_MBE)),
@@ -957,7 +962,7 @@ static struct flag_table dc8051_err_flags[] = {
*
* Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR field.
*/
-static struct flag_table dc8051_info_err_flags[] = {
+static const struct flag_table dc8051_info_err_flags[] = {
FLAG_ENTRY0("Spico ROM check failed", SPICO_ROM_FAILED),
FLAG_ENTRY0("Unknown frame received", UNKNOWN_FRAME),
FLAG_ENTRY0("Target BER not met", TARGET_BER_NOT_MET),
@@ -981,16 +986,17 @@ static struct flag_table dc8051_info_err_flags[] = {
*
* Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG field.
*/
-static struct flag_table dc8051_info_host_msg_flags[] = {
+static const struct flag_table dc8051_info_host_msg_flags[] = {
FLAG_ENTRY0("Host request done", 0x0001),
- FLAG_ENTRY0("BC SMA message", 0x0002),
- FLAG_ENTRY0("BC PWR_MGM message", 0x0004),
+ FLAG_ENTRY0("BC PWR_MGM message", 0x0002),
+ FLAG_ENTRY0("BC SMA message", 0x0004),
FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008),
FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010),
FLAG_ENTRY0("External device config request", 0x0020),
FLAG_ENTRY0("VerifyCap all frames received", 0x0040),
FLAG_ENTRY0("LinkUp achieved", 0x0080),
FLAG_ENTRY0("Link going down", 0x0100),
+ FLAG_ENTRY0("Link width downgraded", 0x0200),
};
static u32 encoded_size(u32 size);
@@ -1002,11 +1008,10 @@ static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
u8 *vcu, u16 *vl15buf, u8 *crc_sizes);
static void read_vc_remote_link_width(struct hfi1_devdata *dd,
u8 *remote_tx_rate, u16 *link_widths);
-static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
- u8 *flag_bits, u16 *link_widths);
+static void read_vc_local_link_mode(struct hfi1_devdata *dd, u8 *misc_bits,
+ u8 *flag_bits, u16 *link_widths);
static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
u8 *device_rev);
-static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed);
static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx);
static int read_tx_settings(struct hfi1_devdata *dd, u8 *enable_lane_tx,
u8 *tx_polarity_inversion,
@@ -1026,7 +1031,7 @@ static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
-static void set_partition_keys(struct hfi1_pportdata *);
+static void set_partition_keys(struct hfi1_pportdata *ppd);
static const char *link_state_name(u32 state);
static const char *link_state_reason_name(struct hfi1_pportdata *ppd,
u32 state);
@@ -1035,16 +1040,28 @@ static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
static int thermal_init(struct hfi1_devdata *dd);
+static void update_statusp(struct hfi1_pportdata *ppd, u32 state);
+static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
+ int msecs);
static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs);
+static void log_state_transition(struct hfi1_pportdata *ppd, u32 state);
+static void log_physical_state(struct hfi1_pportdata *ppd, u32 state);
+static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
+ int msecs);
+static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd,
+ int msecs);
static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
-static void handle_temp_err(struct hfi1_devdata *);
-static void dc_shutdown(struct hfi1_devdata *);
-static void dc_start(struct hfi1_devdata *);
-static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
+static void handle_temp_err(struct hfi1_devdata *dd);
+static void dc_shutdown(struct hfi1_devdata *dd);
+static void dc_start(struct hfi1_devdata *dd);
+static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp,
unsigned int *np);
static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
+static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms);
+static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index);
+static void update_xmit_counters(struct hfi1_pportdata *ppd, u16 link_width);
/*
* Error interrupt table entry. This is used as input to the interrupt
@@ -1060,9 +1077,9 @@ struct err_reg_info {
const char *desc;
};
-#define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
-#define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
-#define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
+#define NUM_MISC_ERRS (IS_GENERAL_ERR_END + 1 - IS_GENERAL_ERR_START)
+#define NUM_DC_ERRS (IS_DC_END + 1 - IS_DC_START)
+#define NUM_VARIOUS (IS_VARIOUS_END + 1 - IS_VARIOUS_START)
/*
* Helpers for building HFI and DC error interrupt table entries. Different
@@ -1263,25 +1280,71 @@ CNTR_ELEM(#name, \
CNTR_SYNTH, \
access_ibp_##cntr)
+/**
+ * hfi1_addr_from_offset - return addr for readq/writeq
+ * @dd: the dd device
+ * @offset: the offset of the CSR within bar0
+ *
+ * This routine selects the appropriate base address
+ * based on the indicated offset.
+ */
+static inline void __iomem *hfi1_addr_from_offset(
+ const struct hfi1_devdata *dd,
+ u32 offset)
+{
+ if (offset >= dd->base2_start)
+ return dd->kregbase2 + (offset - dd->base2_start);
+ return dd->kregbase1 + offset;
+}
+
+/**
+ * read_csr - read CSR at the indicated offset
+ * @dd: the dd device
+ * @offset: the offset of the CSR within bar0
+ *
+ * Return: the value read or all FF's if there
+ * is no mapping
+ */
u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
{
- if (dd->flags & HFI1_PRESENT) {
- return readq((void __iomem *)dd->kregbase + offset);
- }
+ if (dd->flags & HFI1_PRESENT)
+ return readq(hfi1_addr_from_offset(dd, offset));
return -1;
}
+/**
+ * write_csr - write CSR at the indicated offset
+ * @dd: the dd device
+ * @offset: the offset of the CSR within bar0
+ * @value: value to write
+ */
void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
{
- if (dd->flags & HFI1_PRESENT)
- writeq(value, (void __iomem *)dd->kregbase + offset);
+ if (dd->flags & HFI1_PRESENT) {
+ void __iomem *base = hfi1_addr_from_offset(dd, offset);
+
+ /* avoid write to RcvArray */
+ if (WARN_ON(offset >= RCV_ARRAY && offset < dd->base2_start))
+ return;
+ writeq(value, base);
+ }
}
+/**
+ * get_csr_addr - return te iomem address for offset
+ * @dd: the dd device
+ * @offset: the offset of the CSR within bar0
+ *
+ * Return: The iomem address to use in subsequent
+ * writeq/readq operations.
+ */
void __iomem *get_csr_addr(
- struct hfi1_devdata *dd,
+ const struct hfi1_devdata *dd,
u32 offset)
{
- return (void __iomem *)dd->kregbase + offset;
+ if (dd->flags & HFI1_PRESENT)
+ return hfi1_addr_from_offset(dd, offset);
+ return NULL;
}
static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
@@ -1398,7 +1461,8 @@ static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
ret = write_lcb_csr(dd, csr, data);
if (ret) {
- dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr);
+ if (!(dd->flags & HFI1_SHUTDOWN))
+ dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr);
return 0;
}
@@ -1599,6 +1663,14 @@ static u64 access_sw_pio_drain(const struct cntr_entry *entry,
return dd->verbs_dev.n_piodrain;
}
+static u64 access_sw_ctx0_seq_drop(const struct cntr_entry *entry,
+ void *context, int vl, int mode, u64 data)
+{
+ struct hfi1_devdata *dd = context;
+
+ return dd->ctx0_seq_drop;
+}
+
static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
void *context, int vl, int mode, u64 data)
{
@@ -4015,9 +4087,14 @@ def_access_ibp_counter(rc_dupreq);
def_access_ibp_counter(rdma_seq);
def_access_ibp_counter(unaligned);
def_access_ibp_counter(seq_naks);
+def_access_ibp_counter(rc_crwaits);
static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
[C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
+[C_RX_LEN_ERR] = RXE32_DEV_CNTR_ELEM(RxLenErr, RCV_LENGTH_ERR_CNT, CNTR_SYNTH),
+[C_RX_SHORT_ERR] = RXE32_DEV_CNTR_ELEM(RxShrErr, RCV_SHORT_ERR_CNT, CNTR_SYNTH),
+[C_RX_ICRC_ERR] = RXE32_DEV_CNTR_ELEM(RxICrcErr, RCV_ICRC_ERR_CNT, CNTR_SYNTH),
+[C_RX_EBP] = RXE32_DEV_CNTR_ELEM(RxEbpCnt, RCV_EBP_CNT, CNTR_SYNTH),
[C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
CNTR_NORMAL),
[C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT,
@@ -4159,6 +4236,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
access_sw_cpu_intr),
[C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL,
access_sw_cpu_rcv_limit),
+[C_SW_CTX0_SEQ_DROP] = CNTR_ELEM("SeqDrop0", 0, 0, CNTR_NORMAL,
+ access_sw_ctx0_seq_drop),
[C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL,
access_sw_vtx_wait),
[C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
@@ -4167,6 +4246,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
access_sw_pio_drain),
[C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
access_sw_kmem_wait),
+[C_SW_TID_WAIT] = CNTR_ELEM("TidWait", 0, 0, CNTR_NORMAL,
+ hfi1_access_sw_tid_wait),
[C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
access_sw_send_schedule),
[C_SDMA_DESC_FETCHED_CNT] = CNTR_ELEM("SDEDscFdCn",
@@ -5028,6 +5109,7 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
[C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
[C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
[C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
+[C_SW_IBP_RC_CRWAITS] = SW_IBP_CNTR(RcCrWait, rc_crwaits),
[C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
access_sw_cpu_rc_acks),
[C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
@@ -5136,6 +5218,17 @@ int is_bx(struct hfi1_devdata *dd)
return (chip_rev_minor & 0xF0) == 0x10;
}
+/* return true is kernel urg disabled for rcd */
+bool is_urg_masked(struct hfi1_ctxtdata *rcd)
+{
+ u64 mask;
+ u32 is = IS_RCVURGENT_START + rcd->ctxt;
+ u8 bit = is % 64;
+
+ mask = read_csr(rcd->dd, CCE_INT_MASK + (8 * (is / 64)));
+ return !(mask & BIT_ULL(bit));
+}
+
/*
* Append string s to buffer buf. Arguments curp and len are the current
* position and remaining length, respectively.
@@ -5182,7 +5275,7 @@ done:
* the buffer. End in '*' if the buffer is too short.
*/
static char *flag_string(char *buf, int buf_len, u64 flags,
- struct flag_table *table, int table_size)
+ const struct flag_table *table, int table_size)
{
char extra[32];
char *p = buf;
@@ -5241,7 +5334,7 @@ static const char * const cce_misc_names[] = {
static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source)
{
if (source < ARRAY_SIZE(cce_misc_names))
- strncpy(buf, cce_misc_names[source], bsize);
+ strscpy_pad(buf, cce_misc_names[source], bsize);
else
snprintf(buf, bsize, "Reserved%u",
source + IS_GENERAL_ERR_START);
@@ -5281,7 +5374,7 @@ static const char * const various_names[] = {
static char *is_various_name(char *buf, size_t bsize, unsigned int source)
{
if (source < ARRAY_SIZE(various_names))
- strncpy(buf, various_names[source], bsize);
+ strscpy_pad(buf, various_names[source], bsize);
else
snprintf(buf, bsize, "Reserved%u", source + IS_VARIOUS_START);
return buf;
@@ -5453,9 +5546,9 @@ static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
* associated with them.
*/
#define RCVERR_CHECK_TIME 10
-static void update_rcverr_timer(unsigned long opaque)
+static void update_rcverr_timer(struct timer_list *t)
{
- struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
+ struct hfi1_devdata *dd = timer_container_of(dd, t, rcverr_timer);
struct hfi1_pportdata *ppd = dd->pport;
u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
@@ -5465,7 +5558,7 @@ static void update_rcverr_timer(unsigned long opaque)
set_link_down_reason(
ppd, OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
- queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
+ queue_work(ppd->link_wq, &ppd->link_bounce_work);
}
dd->rcv_ovfl_cnt = (u32)cur_ovfl_cnt;
@@ -5474,7 +5567,7 @@ static void update_rcverr_timer(unsigned long opaque)
static int init_rcverr(struct hfi1_devdata *dd)
{
- setup_timer(&dd->rcverr_timer, update_rcverr_timer, (unsigned long)dd);
+ timer_setup(&dd->rcverr_timer, update_rcverr_timer, 0);
/* Assume the hardware counter has been reset */
dd->rcv_ovfl_cnt = 0;
return mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
@@ -5482,9 +5575,8 @@ static int init_rcverr(struct hfi1_devdata *dd)
static void free_rcverr(struct hfi1_devdata *dd)
{
- if (dd->rcverr_timer.data)
- del_timer_sync(&dd->rcverr_timer);
- dd->rcverr_timer.data = 0;
+ if (dd->rcverr_timer.function)
+ timer_delete_sync(&dd->rcverr_timer);
}
static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
@@ -5860,6 +5952,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
u64 status;
u32 sw_index;
int i = 0;
+ unsigned long irq_flags;
sw_index = dd->hw_to_sw[hw_context];
if (sw_index >= dd->num_send_contexts) {
@@ -5869,10 +5962,12 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
return;
}
sci = &dd->send_contexts[sw_index];
+ spin_lock_irqsave(&dd->sc_lock, irq_flags);
sc = sci->sc;
if (!sc) {
dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
sw_index, hw_context);
+ spin_unlock_irqrestore(&dd->sc_lock, irq_flags);
return;
}
@@ -5894,6 +5989,7 @@ static void is_sendctxt_err_int(struct hfi1_devdata *dd,
*/
if (sc->type != SC_USER)
queue_work(dd->pport->hfi1_wq, &sc->halt_work);
+ spin_unlock_irqrestore(&dd->sc_lock, irq_flags);
/*
* Update the counters for the corresponding status bits.
@@ -6020,7 +6116,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
* will not happen. We have to do it here
* before turning the DC off.
*/
- queue_work(ppd->hfi1_wq, &ppd->link_down_work);
+ queue_work(ppd->link_wq, &ppd->link_down_work);
}
} else {
dd_dev_info(dd, "%s: QSFP module inserted\n",
@@ -6055,7 +6151,7 @@ static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
/* Schedule the QSFP work only if there is a cable attached. */
if (qsfp_mod_present(ppd))
- queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work);
+ queue_work(ppd->link_wq, &ppd->qsfp_info.qsfp_work);
}
static int request_host_lcb_access(struct hfi1_devdata *dd)
@@ -6065,7 +6161,7 @@ static int request_host_lcb_access(struct hfi1_devdata *dd)
ret = do_8051_command(dd, HCMD_MISC,
(u64)HCMD_MISC_REQUEST_LCB_ACCESS <<
LOAD_DATA_FIELD_ID_SHIFT, NULL);
- if (ret != HCMD_SUCCESS) {
+ if (ret != HCMD_SUCCESS && !(dd->flags & HFI1_SHUTDOWN)) {
dd_dev_err(dd, "%s: command failed with error %d\n",
__func__, ret);
}
@@ -6146,7 +6242,8 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
if (dd->lcb_access_count == 0) {
ret = request_host_lcb_access(dd);
if (ret) {
- dd_dev_err(dd,
+ if (!(dd->flags & HFI1_SHUTDOWN))
+ dd_dev_err(dd,
"%s: unable to acquire LCB access, err %d\n",
__func__, ret);
goto done;
@@ -6267,6 +6364,18 @@ static void handle_8051_request(struct hfi1_pportdata *ppd)
type);
hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
break;
+ case HREQ_LCB_RESET:
+ /* Put the LCB, RX FPE and TX FPE into reset */
+ write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_INTO_RESET);
+ /* Make sure the write completed */
+ (void)read_csr(dd, DCC_CFG_RESET);
+ /* Hold the reset long enough to take effect */
+ udelay(1);
+ /* Take the LCB, RX FPE and TX FPE out of reset */
+ write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_OUT_OF_RESET);
+ hreq_response(dd, HREQ_SUCCESS, 0);
+
+ break;
case HREQ_CONFIG_DONE:
hreq_response(dd, HREQ_SUCCESS, 0);
break;
@@ -6281,25 +6390,38 @@ static void handle_8051_request(struct hfi1_pportdata *ppd)
}
}
-static void write_global_credit(struct hfi1_devdata *dd,
- u8 vau, u16 total, u16 shared)
+/*
+ * Set up allocation unit vaulue.
+ */
+void set_up_vau(struct hfi1_devdata *dd, u8 vau)
{
- write_csr(dd, SEND_CM_GLOBAL_CREDIT,
- ((u64)total <<
- SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT) |
- ((u64)shared <<
- SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT) |
- ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
+ u64 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
+
+ /* do not modify other values in the register */
+ reg &= ~SEND_CM_GLOBAL_CREDIT_AU_SMASK;
+ reg |= (u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT;
+ write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
}
/*
* Set up initial VL15 credits of the remote. Assumes the rest of
- * the CM credit registers are zero from a previous global or credit reset .
+ * the CM credit registers are zero from a previous global or credit reset.
+ * Shared limit for VL15 will always be 0.
*/
-void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
+void set_up_vl15(struct hfi1_devdata *dd, u16 vl15buf)
{
- /* leave shared count at zero for both global and VL15 */
- write_global_credit(dd, vau, vl15buf, 0);
+ u64 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
+
+ /* set initial values for total and shared credit limit */
+ reg &= ~(SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK |
+ SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK);
+
+ /*
+ * Set total limit to be equal to VL15 credits.
+ * Leave shared limit at 0.
+ */
+ reg |= (u64)vl15buf << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT;
+ write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
<< SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
@@ -6317,9 +6439,11 @@ void reset_link_credits(struct hfi1_devdata *dd)
for (i = 0; i < TXE_NUM_DATA_VL; i++)
write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0);
write_csr(dd, SEND_CM_CREDIT_VL15, 0);
- write_global_credit(dd, 0, 0, 0);
+ write_csr(dd, SEND_CM_GLOBAL_CREDIT, 0);
/* reset the CM block */
pio_send_control(dd, PSC_CM_RESET);
+ /* reset cached value */
+ dd->vl15buf_cached = 0;
}
/* convert a vCU to a CU */
@@ -6362,8 +6486,7 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
reg = read_csr(dd, DCC_CFG_RESET);
write_csr(dd, DCC_CFG_RESET, reg |
- (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) |
- (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
+ DCC_CFG_RESET_RESET_LCB | DCC_CFG_RESET_RESET_RX_FPE);
(void)read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
if (!abort) {
udelay(1); /* must hold for the longer of 16cclks or 20ns */
@@ -6379,18 +6502,17 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
*
* The expectation is that the caller of this routine would have taken
* care of properly transitioning the link into the correct state.
+ * NOTE: the caller needs to acquire the dd->dc8051_lock lock
+ * before calling this function.
*/
-static void dc_shutdown(struct hfi1_devdata *dd)
+static void _dc_shutdown(struct hfi1_devdata *dd)
{
- unsigned long flags;
+ lockdep_assert_held(&dd->dc8051_lock);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
- if (dd->dc_shutdown) {
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+ if (dd->dc_shutdown)
return;
- }
+
dd->dc_shutdown = 1;
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
/* Shutdown the LCB */
lcb_shutdown(dd, 1);
/*
@@ -6401,35 +6523,45 @@ static void dc_shutdown(struct hfi1_devdata *dd)
write_csr(dd, DC_DC8051_CFG_RST, 0x1);
}
+static void dc_shutdown(struct hfi1_devdata *dd)
+{
+ mutex_lock(&dd->dc8051_lock);
+ _dc_shutdown(dd);
+ mutex_unlock(&dd->dc8051_lock);
+}
+
/*
* Calling this after the DC has been brought out of reset should not
* do any damage.
+ * NOTE: the caller needs to acquire the dd->dc8051_lock lock
+ * before calling this function.
*/
-static void dc_start(struct hfi1_devdata *dd)
+static void _dc_start(struct hfi1_devdata *dd)
{
- unsigned long flags;
- int ret;
+ lockdep_assert_held(&dd->dc8051_lock);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
if (!dd->dc_shutdown)
- goto done;
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+ return;
+
/* Take the 8051 out of reset */
write_csr(dd, DC_DC8051_CFG_RST, 0ull);
/* Wait until 8051 is ready */
- ret = wait_fm_ready(dd, TIMEOUT_8051_START);
- if (ret) {
+ if (wait_fm_ready(dd, TIMEOUT_8051_START))
dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
__func__);
- }
+
/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
- write_csr(dd, DCC_CFG_RESET, 0x10);
+ write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_OUT_OF_RESET);
/* lcb_shutdown() with abort=1 does not restore these */
write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
dd->dc_shutdown = 0;
-done:
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
+}
+
+static void dc_start(struct hfi1_devdata *dd)
+{
+ mutex_lock(&dd->dc8051_lock);
+ _dc_start(dd);
+ mutex_unlock(&dd->dc8051_lock);
}
/*
@@ -6609,6 +6741,7 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
struct hfi1_devdata *dd = ppd->dd;
struct send_context *sc;
int i;
+ int sc_flags;
if (flags & FREEZE_SELF)
write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
@@ -6619,11 +6752,13 @@ void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
/* notify all SDMA engines that they are going into a freeze */
sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN));
+ sc_flags = SCF_FROZEN | SCF_HALTED | (flags & FREEZE_LINK_DOWN ?
+ SCF_LINK_DOWN : 0);
/* do halt pre-handling on all enabled send contexts */
for (i = 0; i < dd->num_send_contexts; i++) {
sc = dd->send_contexts[i].sc;
if (sc && (sc->flags & SCF_ENABLED))
- sc_stop(sc, SCF_FROZEN | SCF_HALTED);
+ sc_stop(sc, sc_flags);
}
/* Send context are frozen. Notify user space */
@@ -6680,13 +6815,17 @@ static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze)
static void rxe_freeze(struct hfi1_devdata *dd)
{
int i;
+ struct hfi1_ctxtdata *rcd;
/* disable port */
clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
/* disable all receive contexts */
- for (i = 0; i < dd->num_rcv_contexts; i++)
- hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i);
+ for (i = 0; i < dd->num_rcv_contexts; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, rcd);
+ hfi1_rcd_put(rcd);
+ }
}
/*
@@ -6698,15 +6837,25 @@ static void rxe_freeze(struct hfi1_devdata *dd)
static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
{
u32 rcvmask;
- int i;
+ u16 i;
+ struct hfi1_ctxtdata *rcd;
/* enable all kernel contexts */
- for (i = 0; i < dd->n_krcv_queues; i++) {
+ for (i = 0; i < dd->num_rcv_contexts; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+
+ /* Ensure all non-user contexts(including vnic) are enabled */
+ if (!rcd ||
+ (i >= dd->first_dyn_alloc_ctxt && !rcd->is_vnic)) {
+ hfi1_rcd_put(rcd);
+ continue;
+ }
rcvmask = HFI1_RCVCTRL_CTXT_ENB;
/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
- rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
+ rcvmask |= hfi1_rcvhdrtail_kvaddr(rcd) ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
- hfi1_rcvctrl(dd, rcvmask, i);
+ hfi1_rcvctrl(dd, rcvmask, rcd);
+ hfi1_rcd_put(rcd);
}
/* enable port */
@@ -6784,6 +6933,32 @@ void handle_freeze(struct work_struct *work)
/* no longer frozen */
}
+/**
+ * update_xmit_counters - update PortXmitWait/PortVlXmitWait
+ * counters.
+ * @ppd: info of physical Hfi port
+ * @link_width: new link width after link up or downgrade
+ *
+ * Update the PortXmitWait and PortVlXmitWait counters after
+ * a link up or downgrade event to reflect a link width change.
+ */
+static void update_xmit_counters(struct hfi1_pportdata *ppd, u16 link_width)
+{
+ int i;
+ u16 tx_width;
+ u16 link_speed;
+
+ tx_width = tx_link_width(link_width);
+ link_speed = get_link_speed(ppd->link_speed_active);
+
+ /*
+ * There are C_VL_COUNT number of PortVLXmitWait counters.
+ * Adding 1 to C_VL_COUNT to include the PortXmitWait counter.
+ */
+ for (i = 0; i < C_VL_COUNT + 1; i++)
+ get_xmit_wait_counters(ppd, tx_width, link_speed, i);
+}
+
/*
* Handle a link up interrupt from the 8051.
*
@@ -6793,24 +6968,35 @@ void handle_link_up(struct work_struct *work)
{
struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
link_up_work);
+ struct hfi1_devdata *dd = ppd->dd;
+
set_link_state(ppd, HLS_UP_INIT);
/* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
- read_ltp_rtt(ppd->dd);
+ read_ltp_rtt(dd);
/*
* OPA specifies that certain counters are cleared on a transition
* to link up, so do that.
*/
- clear_linkup_counters(ppd->dd);
+ clear_linkup_counters(dd);
/*
* And (re)set link up default values.
*/
set_linkup_defaults(ppd);
+ /*
+ * Set VL15 credits. Use cached value from verify cap interrupt.
+ * In case of quick linkup or simulator, vl15 value will be set by
+ * handle_linkup_change. VerifyCap interrupt handler will not be
+ * called in those scenarios.
+ */
+ if (!(quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR))
+ set_up_vl15(dd, dd->vl15buf_cached);
+
/* enforce link speed enabled */
if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
/* oops - current speed is not enabled, bounce */
- dd_dev_err(ppd->dd,
+ dd_dev_err(dd,
"Link speed active 0x%x is outside enabled 0x%x, downing link\n",
ppd->link_speed_active, ppd->link_speed_enabled);
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
@@ -6834,7 +7020,7 @@ static void reset_neighbor_info(struct hfi1_pportdata *ppd)
static const char * const link_down_reason_strs[] = {
[OPA_LINKDOWN_REASON_NONE] = "None",
- [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Recive error 0",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Receive error 0",
[OPA_LINKDOWN_REASON_BAD_PKT_LEN] = "Bad packet length",
[OPA_LINKDOWN_REASON_PKT_TOO_LONG] = "Packet too long",
[OPA_LINKDOWN_REASON_PKT_TOO_SHORT] = "Packet too short",
@@ -6924,6 +7110,7 @@ void handle_link_down(struct work_struct *work)
/* Go offline first, then deal with reading/writing through 8051 */
was_up = !!(ppd->host_link_state & HLS_UP);
set_link_state(ppd, HLS_DN_OFFLINE);
+ xchg(&ppd->is_link_down_queued, 0);
if (was_up) {
lcl_reason = 0;
@@ -7065,27 +7252,6 @@ static int lcb_to_port_ltp(int lcb_crc)
return port_ltp;
}
-/*
- * Our neighbor has indicated that we are allowed to act as a fabric
- * manager, so place the full management partition key in the second
- * (0-based) pkey array position (see OPAv1, section 20.2.2.6.8). Note
- * that we should already have the limited management partition key in
- * array element 1, and also that the port is not yet up when
- * add_full_mgmt_pkey() is invoked.
- */
-static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
-{
- struct hfi1_devdata *dd = ppd->dd;
-
- /* Sanity check - ppd->pkeys[2] should be 0, or already initalized */
- if (!((ppd->pkeys[2] == 0) || (ppd->pkeys[2] == FULL_MGMT_P_KEY)))
- dd_dev_warn(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
- __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
- ppd->pkeys[2] = FULL_MGMT_P_KEY;
- (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
- hfi1_event_pkey_change(ppd->dd, ppd->port);
-}
-
static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd)
{
if (ppd->pkeys[2] != 0) {
@@ -7112,11 +7278,11 @@ static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
case 1: return OPA_LINK_WIDTH_1X;
case 2: return OPA_LINK_WIDTH_2X;
case 3: return OPA_LINK_WIDTH_3X;
+ case 4: return OPA_LINK_WIDTH_4X;
default:
dd_dev_info(dd, "%s: invalid width %d, using 4\n",
__func__, width);
- /* fall through */
- case 4: return OPA_LINK_WIDTH_4X;
+ return OPA_LINK_WIDTH_4X;
}
}
@@ -7165,18 +7331,19 @@ static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
* set the max_rate field in handle_verify_cap until v0.19.
*/
if ((dd->icode == ICODE_RTL_SILICON) &&
- (dd->dc8051_ver < dc8051_ver(0, 19))) {
+ (dd->dc8051_ver < dc8051_ver(0, 19, 0))) {
/* max_rate: 0 = 12.5G, 1 = 25G */
switch (max_rate) {
case 0:
dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
break;
+ case 1:
+ dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
+ break;
default:
dd_dev_err(dd,
"%s: unexpected max rate %d, using 25Gb\n",
__func__, (int)max_rate);
- /* fall through */
- case 1:
dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
break;
}
@@ -7209,7 +7376,7 @@ static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width,
u8 misc_bits, local_flags;
u16 active_tx, active_rx;
- read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths);
+ read_vc_local_link_mode(dd, &misc_bits, &local_flags, &widths);
tx = widths >> 12;
rx = (widths >> 8) & 0xf;
@@ -7258,7 +7425,7 @@ void handle_verify_cap(struct work_struct *work)
struct hfi1_devdata *dd = ppd->dd;
u64 reg;
u8 power_management;
- u8 continious;
+ u8 continuous;
u8 vcu;
u8 vau;
u8 z;
@@ -7277,30 +7444,17 @@ void handle_verify_cap(struct work_struct *work)
lcb_shutdown(dd, 0);
adjust_lcb_for_fpga_serdes(dd);
- /*
- * These are now valid:
- * remote VerifyCap fields in the general LNI config
- * CSR DC8051_STS_REMOTE_GUID
- * CSR DC8051_STS_REMOTE_NODE_TYPE
- * CSR DC8051_STS_REMOTE_FM_SECURITY
- * CSR DC8051_STS_REMOTE_PORT_NO
- */
-
- read_vc_remote_phy(dd, &power_management, &continious);
+ read_vc_remote_phy(dd, &power_management, &continuous);
read_vc_remote_fabric(dd, &vau, &z, &vcu, &vl15buf,
&partner_supported_crc);
read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
read_remote_device_id(dd, &device_id, &device_rev);
- /*
- * And the 'MgmtAllowed' information, which is exchanged during
- * LNI, is also be available at this point.
- */
- read_mgmt_allowed(dd, &ppd->mgmt_allowed);
+
/* print the active widths */
get_link_widths(dd, &active_tx, &active_rx);
dd_dev_info(dd,
"Peer PHY: power management 0x%x, continuous updates 0x%x\n",
- (int)power_management, (int)continious);
+ (int)power_management, (int)continuous);
dd_dev_info(dd,
"Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
(int)vau, (int)z, (int)vcu, (int)vl15buf,
@@ -7320,7 +7474,14 @@ void handle_verify_cap(struct work_struct *work)
*/
if (vau == 0)
vau = 1;
- set_up_vl15(dd, vau, vl15buf);
+ set_up_vau(dd, vau);
+
+ /*
+ * Set VL15 credits to 0 in global credit register. Cache remote VL15
+ * credits value and wait for link-up interrupt ot set it.
+ */
+ set_up_vl15(dd, 0);
+ dd->vl15buf_cached = vl15buf;
/* set up the LCB CRC mode */
crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc;
@@ -7350,7 +7511,7 @@ void handle_verify_cap(struct work_struct *work)
}
ppd->link_speed_active = 0; /* invalid value */
- if (dd->dc8051_ver < dc8051_ver(0, 20)) {
+ if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* remote_tx_rate: 0 = 12.5G, 1 = 25G */
switch (remote_tx_rate) {
case 0:
@@ -7416,39 +7577,33 @@ void handle_verify_cap(struct work_struct *work)
write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
set_8051_lcb_access(dd);
- ppd->neighbor_guid =
- read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
- ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
- DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
- ppd->neighbor_type =
- read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
- DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
- ppd->neighbor_fm_security =
- read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
- DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
- dd_dev_info(dd,
- "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
- ppd->neighbor_guid, ppd->neighbor_type,
- ppd->mgmt_allowed, ppd->neighbor_fm_security);
- if (ppd->mgmt_allowed)
- add_full_mgmt_pkey(ppd);
-
/* tell the 8051 to go to LinkUp */
set_link_state(ppd, HLS_GOING_UP);
}
-/*
- * Apply the link width downgrade enabled policy against the current active
- * link widths.
+/**
+ * apply_link_downgrade_policy - Apply the link width downgrade enabled
+ * policy against the current active link widths.
+ * @ppd: info of physical Hfi port
+ * @refresh_widths: True indicates link downgrade event
+ * @return: True indicates a successful link downgrade. False indicates
+ * link downgrade event failed and the link will bounce back to
+ * default link width.
*
- * Called when the enabled policy changes or the active link widths change.
+ * Called when the enabled policy changes or the active link widths
+ * change.
+ * Refresh_widths indicates that a link downgrade occurred. The
+ * link_downgraded variable is set by refresh_widths and
+ * determines the success/failure of the policy application.
*/
-void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths)
+bool apply_link_downgrade_policy(struct hfi1_pportdata *ppd,
+ bool refresh_widths)
{
int do_bounce = 0;
int tries;
u16 lwde;
u16 tx, rx;
+ bool link_downgraded = refresh_widths;
/* use the hls lock to avoid a race with actual link up */
tries = 0;
@@ -7482,6 +7637,7 @@ retry:
ppd->link_width_downgrade_rx_active == 0) {
/* the 8051 reported a dead link as a downgrade */
dd_dev_err(ppd->dd, "Link downgrade is really a link down, ignoring\n");
+ link_downgraded = false;
} else if (lwde == 0) {
/* downgrade is disabled */
@@ -7498,6 +7654,7 @@ retry:
ppd->link_width_downgrade_tx_active,
ppd->link_width_downgrade_rx_active);
do_bounce = 1;
+ link_downgraded = false;
}
} else if ((lwde & ppd->link_width_downgrade_tx_active) == 0 ||
(lwde & ppd->link_width_downgrade_rx_active) == 0) {
@@ -7509,6 +7666,7 @@ retry:
lwde, ppd->link_width_downgrade_tx_active,
ppd->link_width_downgrade_rx_active);
do_bounce = 1;
+ link_downgraded = false;
}
done:
@@ -7520,6 +7678,8 @@ done:
set_link_state(ppd, HLS_DN_OFFLINE);
start_link(ppd);
}
+
+ return link_downgraded;
}
/*
@@ -7533,7 +7693,8 @@ void handle_link_downgrade(struct work_struct *work)
link_downgrade_work);
dd_dev_info(ppd->dd, "8051: Link width downgrade\n");
- apply_link_downgrade_policy(ppd, 1);
+ if (apply_link_downgrade_policy(ppd, true))
+ update_xmit_counters(ppd, ppd->link_width_downgrade_tx_active);
}
static char *dcc_err_string(char *buf, int buf_len, u64 flags)
@@ -7633,12 +7794,12 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
host_msg &= ~(u64)HOST_REQ_DONE;
}
if (host_msg & BC_SMA_MSG) {
- queue_work(ppd->hfi1_wq, &ppd->sma_message_work);
+ queue_work(ppd->link_wq, &ppd->sma_message_work);
host_msg &= ~(u64)BC_SMA_MSG;
}
if (host_msg & LINKUP_ACHIEVED) {
dd_dev_info(dd, "8051: Link up\n");
- queue_work(ppd->hfi1_wq, &ppd->link_up_work);
+ queue_work(ppd->link_wq, &ppd->link_up_work);
host_msg &= ~(u64)LINKUP_ACHIEVED;
}
if (host_msg & EXT_DEVICE_CFG_REQ) {
@@ -7646,7 +7807,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
}
if (host_msg & VERIFY_CAP_FRAME) {
- queue_work(ppd->hfi1_wq, &ppd->link_vc_work);
+ queue_work(ppd->link_wq, &ppd->link_vc_work);
host_msg &= ~(u64)VERIFY_CAP_FRAME;
}
if (host_msg & LINK_GOING_DOWN) {
@@ -7661,7 +7822,7 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
host_msg &= ~(u64)LINK_GOING_DOWN;
}
if (host_msg & LINK_WIDTH_DOWNGRADED) {
- queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work);
+ queue_work(ppd->link_wq, &ppd->link_downgrade_work);
host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
}
if (host_msg) {
@@ -7696,15 +7857,22 @@ static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
if (queue_link_down) {
/*
* if the link is already going down or disabled, do not
- * queue another
+ * queue another. If there's a link down entry already
+ * queued, don't queue another one.
*/
if ((ppd->host_link_state &
(HLS_GOING_OFFLINE | HLS_LINK_COOLDOWN)) ||
ppd->link_enabled == 0) {
- dd_dev_info(dd, "%s: not queuing link down\n",
- __func__);
+ dd_dev_info(dd, "%s: not queuing link down. host_link_state %x, link_enabled %x\n",
+ __func__, ppd->host_link_state,
+ ppd->link_enabled);
} else {
- queue_work(ppd->hfi1_wq, &ppd->link_down_work);
+ if (xchg(&ppd->is_link_down_queued, 1) == 1)
+ dd_dev_info(dd,
+ "%s: link down request already queued\n",
+ __func__);
+ else
+ queue_work(ppd->link_wq, &ppd->link_down_work);
}
}
}
@@ -7827,7 +7995,8 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
}
/* just report this */
- dd_dev_info(dd, "DCC Error: fmconfig error: %s\n", extra);
+ dd_dev_info_ratelimited(dd, "DCC Error: fmconfig error: %s\n",
+ extra);
reg &= ~DCC_ERR_FLG_FMCONFIG_ERR_SMASK;
}
@@ -7878,36 +8047,40 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
}
/* just report this */
- dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
- dd_dev_info(dd, " hdr0 0x%llx, hdr1 0x%llx\n",
- hdr0, hdr1);
+ dd_dev_info_ratelimited(dd, "DCC Error: PortRcv error: %s\n"
+ " hdr0 0x%llx, hdr1 0x%llx\n",
+ extra, hdr0, hdr1);
reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
}
if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK) {
/* informative only */
- dd_dev_info(dd, "8051 access to LCB blocked\n");
+ dd_dev_info_ratelimited(dd, "8051 access to LCB blocked\n");
reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK;
}
if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK) {
/* informative only */
- dd_dev_info(dd, "host access to LCB blocked\n");
+ dd_dev_info_ratelimited(dd, "host access to LCB blocked\n");
reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
}
+ if (unlikely(hfi1_dbg_fault_suppress_err(&dd->verbs_dev)))
+ reg &= ~DCC_ERR_FLG_LATE_EBP_ERR_SMASK;
+
/* report any remaining errors */
if (reg)
- dd_dev_info(dd, "DCC Error: %s\n",
- dcc_err_string(buf, sizeof(buf), reg));
+ dd_dev_info_ratelimited(dd, "DCC Error: %s\n",
+ dcc_err_string(buf, sizeof(buf), reg));
if (lcl_reason == 0)
lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
if (do_bounce) {
- dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
+ dd_dev_info_ratelimited(dd, "%s: PortErrorAction bounce\n",
+ __func__);
set_link_down_reason(ppd, lcl_reason, 0, lcl_reason);
- queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
+ queue_work(ppd->link_wq, &ppd->link_bounce_work);
}
}
@@ -7982,8 +8155,15 @@ static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source)
}
}
-/*
+/**
+ * is_rcv_avail_int() - User receive context available IRQ handler
+ * @dd: valid dd
+ * @source: logical IRQ source (offset from IS_RCVAVAIL_START)
+ *
* RX block receive available interrupt. Source is < 160.
+ *
+ * This is the general interrupt handler for user (PSM) receive contexts,
+ * and can only be used for non-threaded IRQs.
*/
static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
{
@@ -7991,12 +8171,10 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
char *err_detail;
if (likely(source < dd->num_rcv_contexts)) {
- rcd = dd->rcd[source];
+ rcd = hfi1_rcd_get_by_index(dd, source);
if (rcd) {
- if (source < dd->first_user_ctxt)
- rcd->do_interrupt(rcd, 0);
- else
- handle_user_interrupt(rcd);
+ handle_user_interrupt(rcd);
+ hfi1_rcd_put(rcd);
return; /* OK */
}
/* received an interrupt, but no rcd */
@@ -8009,8 +8187,14 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
err_detail, source);
}
-/*
+/**
+ * is_rcv_urgent_int() - User receive context urgent IRQ handler
+ * @dd: valid dd
+ * @source: logical IRQ source (offset from IS_RCVURGENT_START)
+ *
* RX block receive urgent interrupt. Source is < 160.
+ *
+ * NOTE: kernel receive contexts specifically do NOT enable this IRQ.
*/
static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
{
@@ -8018,11 +8202,10 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
char *err_detail;
if (likely(source < dd->num_rcv_contexts)) {
- rcd = dd->rcd[source];
+ rcd = hfi1_rcd_get_by_index(dd, source);
if (rcd) {
- /* only pay attention to user urgent interrupts */
- if (source >= dd->first_user_ctxt)
- handle_user_interrupt(rcd);
+ handle_user_interrupt(rcd);
+ hfi1_rcd_put(rcd);
return; /* OK */
}
/* received an interrupt, but no rcd */
@@ -8057,7 +8240,7 @@ static const struct is_table is_table[] = {
is_sdma_eng_err_name, is_sdma_eng_err_int },
{ IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
is_sendctxt_err_name, is_sendctxt_err_int },
-{ IS_SDMA_START, IS_SDMA_END,
+{ IS_SDMA_START, IS_SDMA_IDLE_END,
is_sdma_eng_name, is_sdma_eng_int },
{ IS_VARIOUS_START, IS_VARIOUS_END,
is_various_name, is_various_int },
@@ -8083,7 +8266,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
/* avoids a double compare by walking the table in-order */
for (entry = &is_table[0]; entry->is_name; entry++) {
- if (source < entry->end) {
+ if (source <= entry->end) {
trace_hfi1_interrupt(dd, entry, source);
entry->is_int(dd, source - entry->start);
return;
@@ -8093,16 +8276,22 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
dd_dev_err(dd, "invalid interrupt source %u\n", source);
}
-/*
- * General interrupt handler. This is able to correctly handle
- * all interrupts in case INTx is used.
+/**
+ * general_interrupt - General interrupt handler
+ * @irq: MSIx IRQ vector
+ * @data: hfi1 devdata
+ *
+ * This is able to correctly handle all non-threaded interrupts. Receive
+ * context DATA IRQs are threaded and are not supported by this handler.
+ *
*/
-static irqreturn_t general_interrupt(int irq, void *data)
+irqreturn_t general_interrupt(int irq, void *data)
{
struct hfi1_devdata *dd = data;
u64 regs[CCE_NUM_INT_CSRS];
u32 bit;
int i;
+ irqreturn_t handled = IRQ_NONE;
this_cpu_inc(*dd->int_counter);
@@ -8123,12 +8312,13 @@ static irqreturn_t general_interrupt(int irq, void *data)
for_each_set_bit(bit, (unsigned long *)&regs[0],
CCE_NUM_INT_CSRS * 64) {
is_interrupt(dd, bit);
+ handled = IRQ_HANDLED;
}
- return IRQ_HANDLED;
+ return handled;
}
-static irqreturn_t sdma_interrupt(int irq, void *data)
+irqreturn_t sdma_interrupt(int irq, void *data)
{
struct sdma_engine *sde = data;
struct hfi1_devdata *dd = sde->dd;
@@ -8154,10 +8344,10 @@ static irqreturn_t sdma_interrupt(int irq, void *data)
/* handle the interrupt(s) */
sdma_engine_interrupt(sde, status);
- } else
- dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
- sde->this_idx);
-
+ } else {
+ dd_dev_info_ratelimited(dd, "SDMA engine %u interrupt, but no status bits set\n",
+ sde->this_idx);
+ }
return IRQ_HANDLED;
}
@@ -8171,7 +8361,6 @@ static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
struct hfi1_devdata *dd = rcd->dd;
u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg);
- mmiowb(); /* make sure everything before is written */
write_csr(dd, addr, rcd->imask);
/* force the above write on the chip and get a value back */
(void)read_csr(dd, addr);
@@ -8196,20 +8385,107 @@ void force_recv_intr(struct hfi1_ctxtdata *rcd)
static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
{
u32 tail;
- int present;
- if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
- present = (rcd->seq_cnt ==
- rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
- else /* is RDMA rtail */
- present = (rcd->head != get_rcvhdrtail(rcd));
-
- if (present)
+ if (hfi1_packet_present(rcd))
return 1;
/* fall back to a CSR read, correct indpendent of DMA_RTAIL */
tail = (u32)read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
- return rcd->head != tail;
+ return hfi1_rcd_head(rcd) != tail;
+}
+
+/*
+ * Common code for receive contexts interrupt handlers.
+ * Update traces, increment kernel IRQ counter and
+ * setup ASPM when needed.
+ */
+static void receive_interrupt_common(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+
+ trace_hfi1_receive_interrupt(dd, rcd);
+ this_cpu_inc(*dd->int_counter);
+ aspm_ctx_disable(rcd);
+}
+
+/*
+ * __hfi1_rcd_eoi_intr() - Make HW issue receive interrupt
+ * when there are packets present in the queue. When calling
+ * with interrupts enabled please use hfi1_rcd_eoi_intr.
+ *
+ * @rcd: valid receive context
+ */
+static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
+{
+ if (!rcd->rcvhdrq)
+ return;
+ clear_recv_intr(rcd);
+ if (check_packet_present(rcd))
+ force_recv_intr(rcd);
+}
+
+/**
+ * hfi1_rcd_eoi_intr() - End of Interrupt processing action
+ *
+ * @rcd: Ptr to hfi1_ctxtdata of receive context
+ *
+ * Hold IRQs so we can safely clear the interrupt and
+ * recheck for a packet that may have arrived after the previous
+ * check and the interrupt clear. If a packet arrived, force another
+ * interrupt. This routine can be called at the end of receive packet
+ * processing in interrupt service routines, interrupt service thread
+ * and softirqs
+ */
+static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __hfi1_rcd_eoi_intr(rcd);
+ local_irq_restore(flags);
+}
+
+/**
+ * hfi1_netdev_rx_napi - napi poll function to move eoi inline
+ * @napi: pointer to napi object
+ * @budget: netdev budget
+ */
+int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget)
+{
+ struct hfi1_netdev_rxq *rxq = container_of(napi,
+ struct hfi1_netdev_rxq, napi);
+ struct hfi1_ctxtdata *rcd = rxq->rcd;
+ int work_done = 0;
+
+ work_done = rcd->do_interrupt(rcd, budget);
+
+ if (work_done < budget) {
+ napi_complete_done(napi, work_done);
+ hfi1_rcd_eoi_intr(rcd);
+ }
+
+ return work_done;
+}
+
+/* Receive packet napi handler for netdevs VNIC and AIP */
+irqreturn_t receive_context_interrupt_napi(int irq, void *data)
+{
+ struct hfi1_ctxtdata *rcd = data;
+
+ receive_interrupt_common(rcd);
+
+ if (likely(rcd->napi)) {
+ if (likely(napi_schedule_prep(rcd->napi)))
+ __napi_schedule_irqoff(rcd->napi);
+ else
+ __hfi1_rcd_eoi_intr(rcd);
+ } else {
+ WARN_ONCE(1, "Napi IRQ handler without napi set up ctxt=%d\n",
+ rcd->ctxt);
+ __hfi1_rcd_eoi_intr(rcd);
+ }
+
+ return IRQ_HANDLED;
}
/*
@@ -8220,16 +8496,12 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
* invoked) is finished. The intent is to avoid extra interrupts while we
* are processing packets anyway.
*/
-static irqreturn_t receive_context_interrupt(int irq, void *data)
+irqreturn_t receive_context_interrupt(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
- struct hfi1_devdata *dd = rcd->dd;
int disposition;
- int present;
- trace_hfi1_receive_interrupt(dd, rcd->ctxt);
- this_cpu_inc(*dd->int_counter);
- aspm_ctx_disable(rcd);
+ receive_interrupt_common(rcd);
/* receive interrupt remains blocked while processing packets */
disposition = rcd->do_interrupt(rcd, 0);
@@ -8242,17 +8514,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data)
if (disposition == RCV_PKT_LIMIT)
return IRQ_WAKE_THREAD;
- /*
- * The packet processor detected no more packets. Clear the receive
- * interrupt and recheck for a packet packet that may have arrived
- * after the previous check and interrupt clear. If a packet arrived,
- * force another interrupt.
- */
- clear_recv_intr(rcd);
- present = check_packet_present(rcd);
- if (present)
- force_recv_intr(rcd);
-
+ __hfi1_rcd_eoi_intr(rcd);
return IRQ_HANDLED;
}
@@ -8260,27 +8522,14 @@ static irqreturn_t receive_context_interrupt(int irq, void *data)
* Receive packet thread handler. This expects to be invoked with the
* receive interrupt still blocked.
*/
-static irqreturn_t receive_context_thread(int irq, void *data)
+irqreturn_t receive_context_thread(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
- int present;
/* receive interrupt is still blocked from the IRQ handler */
(void)rcd->do_interrupt(rcd, 1);
- /*
- * The packet processor will only return if it detected no more
- * packets. Hold IRQs here so we can safely clear the interrupt and
- * recheck for a packet that may have arrived after the previous
- * check and the interrupt clear. If a packet arrived, force another
- * interrupt.
- */
- local_irq_disable();
- clear_recv_intr(rcd);
- present = check_packet_present(rcd);
- if (present)
- force_recv_intr(rcd);
- local_irq_enable();
+ hfi1_rcd_eoi_intr(rcd);
return IRQ_HANDLED;
}
@@ -8342,6 +8591,52 @@ static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
}
/*
+ * Provide a cache for some of the LCB registers in case the LCB is
+ * unavailable.
+ * (The LCB is unavailable in certain link states, for example.)
+ */
+struct lcb_datum {
+ u32 off;
+ u64 val;
+};
+
+static struct lcb_datum lcb_cache[] = {
+ { DC_LCB_ERR_INFO_RX_REPLAY_CNT, 0},
+ { DC_LCB_ERR_INFO_SEQ_CRC_CNT, 0 },
+ { DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT, 0 },
+};
+
+static void update_lcb_cache(struct hfi1_devdata *dd)
+{
+ int i;
+ int ret;
+ u64 val;
+
+ for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
+ ret = read_lcb_csr(dd, lcb_cache[i].off, &val);
+
+ /* Update if we get good data */
+ if (likely(ret != -EBUSY))
+ lcb_cache[i].val = val;
+ }
+}
+
+static int read_lcb_cache(u32 off, u64 *val)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(lcb_cache); i++) {
+ if (lcb_cache[i].off == off) {
+ *val = lcb_cache[i].val;
+ return 0;
+ }
+ }
+
+ pr_warn("%s bad offset 0x%x\n", __func__, off);
+ return -1;
+}
+
+/*
* Read an LCB CSR. Access may not be in host control, so check.
* Return 0 on success, -EBUSY on failure.
*/
@@ -8352,9 +8647,13 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
/* if up, go through the 8051 for the value */
if (ppd->host_link_state & HLS_UP)
return read_lcb_via_8051(dd, addr, data);
- /* if going up or down, no access */
- if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
- return -EBUSY;
+ /* if going up or down, check the cache, otherwise, no access */
+ if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE)) {
+ if (read_lcb_cache(addr, data))
+ return -EBUSY;
+ return 0;
+ }
+
/* otherwise, host has access */
*data = read_csr(dd, addr);
return 0;
@@ -8369,7 +8668,7 @@ static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
int ret;
if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR ||
- (dd->dc8051_ver < dc8051_ver(0, 20))) {
+ (dd->dc8051_ver < dc8051_ver(0, 20, 0))) {
if (acquire_lcb_access(dd, 0) == 0) {
write_csr(dd, addr, data);
release_lcb_access(dd, 0);
@@ -8410,24 +8709,16 @@ int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
* < 0 = Linux error, not able to get access
* > 0 = 8051 command RETURN_CODE
*/
-static int do_8051_command(
- struct hfi1_devdata *dd,
- u32 type,
- u64 in_data,
- u64 *out_data)
+static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
+ u64 *out_data)
{
u64 reg, completed;
int return_code;
- unsigned long flags;
unsigned long timeout;
hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
- /*
- * Alternative to holding the lock for a long time:
- * - keep busy wait - have other users bounce off
- */
- spin_lock_irqsave(&dd->dc8051_lock, flags);
+ mutex_lock(&dd->dc8051_lock);
/* We can't send any commands to the 8051 if it's in reset */
if (dd->dc_shutdown) {
@@ -8453,10 +8744,8 @@ static int do_8051_command(
return_code = -ENXIO;
goto fail;
}
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
- dc_shutdown(dd);
- dc_start(dd);
- spin_lock_irqsave(&dd->dc8051_lock, flags);
+ _dc_shutdown(dd);
+ _dc_start(dd);
}
/*
@@ -8466,7 +8755,7 @@ static int do_8051_command(
/*
* When writing a LCB CSR, out_data contains the full value to
- * to be written, while in_data contains the relative LCB
+ * be written, while in_data contains the relative LCB
* address in 7:0. Do the work here, rather than the caller,
* of distrubting the write data to where it needs to go:
*
@@ -8537,8 +8826,7 @@ static int do_8051_command(
write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
fail:
- spin_unlock_irqrestore(&dd->dc8051_lock, flags);
-
+ mutex_unlock(&dd->dc8051_lock);
return return_code;
}
@@ -8628,29 +8916,29 @@ static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu,
GENERAL_CONFIG, frame);
}
-static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
- u8 *flag_bits, u16 *link_widths)
+static void read_vc_local_link_mode(struct hfi1_devdata *dd, u8 *misc_bits,
+ u8 *flag_bits, u16 *link_widths)
{
u32 frame;
- read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
+ read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_MODE, GENERAL_CONFIG,
&frame);
*misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
*flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
}
-static int write_vc_local_link_width(struct hfi1_devdata *dd,
- u8 misc_bits,
- u8 flag_bits,
- u16 link_widths)
+static int write_vc_local_link_mode(struct hfi1_devdata *dd,
+ u8 misc_bits,
+ u8 flag_bits,
+ u16 link_widths)
{
u32 frame;
frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT
| (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT
| (u32)link_widths << LINK_WIDTH_SHIFT;
- return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
+ return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_MODE, GENERAL_CONFIG,
frame);
}
@@ -8675,13 +8963,34 @@ static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
& REMOTE_DEVICE_REV_MASK;
}
-void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
+int write_host_interface_version(struct hfi1_devdata *dd, u8 version)
+{
+ u32 frame;
+ u32 mask;
+
+ mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT);
+ read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame);
+ /* Clear, then set field */
+ frame &= ~mask;
+ frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT);
+ return load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG,
+ frame);
+}
+
+void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
+ u8 *ver_patch)
{
u32 frame;
read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
- *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
- *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
+ *ver_major = (frame >> STS_FM_VERSION_MAJOR_SHIFT) &
+ STS_FM_VERSION_MAJOR_MASK;
+ *ver_minor = (frame >> STS_FM_VERSION_MINOR_SHIFT) &
+ STS_FM_VERSION_MINOR_MASK;
+
+ read_8051_config(dd, VERSION_PATCH, GENERAL_CONFIG, &frame);
+ *ver_patch = (frame >> STS_FM_VERSION_PATCH_SHIFT) &
+ STS_FM_VERSION_PATCH_MASK;
}
static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
@@ -8730,14 +9039,6 @@ static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx)
*enable_lane_rx = (frame >> ENABLE_LANE_RX_SHIFT) & ENABLE_LANE_RX_MASK;
}
-static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed)
-{
- u32 frame;
-
- read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame);
- *mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) & MGMT_ALLOWED_MASK;
-}
-
static void read_last_local_state(struct hfi1_devdata *dd, u32 *lls)
{
read_8051_config(dd, LAST_LOCAL_STATE_COMPLETE, GENERAL_CONFIG, lls);
@@ -8889,8 +9190,6 @@ int send_idle_sma(struct hfi1_devdata *dd, u64 message)
*/
static int do_quick_linkup(struct hfi1_devdata *dd)
{
- u64 reg;
- unsigned long timeout;
int ret;
lcb_shutdown(dd, 0);
@@ -8913,19 +9212,9 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
write_csr(dd, DC_LCB_CFG_RUN,
1ull << DC_LCB_CFG_RUN_EN_SHIFT);
- /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
- timeout = jiffies + msecs_to_jiffies(10);
- while (1) {
- reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
- if (reg)
- break;
- if (time_after(jiffies, timeout)) {
- dd_dev_err(dd,
- "timeout waiting for LINK_TRANSFER_ACTIVE\n");
- return -ETIMEDOUT;
- }
- udelay(2);
- }
+ ret = wait_link_transfer_active(dd, 10);
+ if (ret)
+ return ret;
write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
@@ -8971,25 +9260,6 @@ static int do_quick_linkup(struct hfi1_devdata *dd)
}
/*
- * Set the SerDes to internal loopback mode.
- * Returns 0 on success, -errno on error.
- */
-static int set_serdes_loopback_mode(struct hfi1_devdata *dd)
-{
- int ret;
-
- ret = set_physical_link_state(dd, PLS_INTERNAL_SERDES_LOOPBACK);
- if (ret == HCMD_SUCCESS)
- return 0;
- dd_dev_err(dd,
- "Set physical link state to SerDes Loopback failed with return %d\n",
- ret);
- if (ret >= 0)
- ret = -EINVAL;
- return ret;
-}
-
-/*
* Do all special steps to set up loopback.
*/
static int init_loopback(struct hfi1_devdata *dd)
@@ -9014,13 +9284,11 @@ static int init_loopback(struct hfi1_devdata *dd)
return 0;
}
- /* handle serdes loopback */
- if (loopback == LOOPBACK_SERDES) {
- /* internal serdes loopack needs quick linkup on RTL */
- if (dd->icode == ICODE_RTL_SILICON)
- quick_linkup = 1;
- return set_serdes_loopback_mode(dd);
- }
+ /*
+ * SerDes loopback init sequence is handled in set_local_link_attributes
+ */
+ if (loopback == LOOPBACK_SERDES)
+ return 0;
/* LCB loopback - handled at poll time */
if (loopback == LOOPBACK_LCB) {
@@ -9079,7 +9347,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
u8 tx_polarity_inversion;
u8 rx_polarity_inversion;
int ret;
-
+ u32 misc_bits = 0;
/* reset our fabric serdes to clear any lingering problems */
fabric_serdes_reset(dd);
@@ -9089,7 +9357,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
if (ret)
goto set_local_link_attributes_fail;
- if (dd->dc8051_ver < dc8051_ver(0, 20)) {
+ if (dd->dc8051_ver < dc8051_ver(0, 20, 0)) {
/* set the tx rate to the fastest enabled */
if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
ppd->local_tx_rate = 1;
@@ -9110,6 +9378,14 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
if (ret != HCMD_SUCCESS)
goto set_local_link_attributes_fail;
+ ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
+ if (ret != HCMD_SUCCESS) {
+ dd_dev_err(dd,
+ "Failed to set host interface version, return 0x%x\n",
+ ret);
+ goto set_local_link_attributes_fail;
+ }
+
/*
* DC supports continuous updates.
*/
@@ -9125,8 +9401,23 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
if (ret != HCMD_SUCCESS)
goto set_local_link_attributes_fail;
- ret = write_vc_local_link_width(dd, 0, 0,
- opa_to_vc_link_widths(
+ /*
+ * SerDes loopback init sequence requires
+ * setting bit 0 of MISC_CONFIG_BITS
+ */
+ if (loopback == LOOPBACK_SERDES)
+ misc_bits |= 1 << LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT;
+
+ /*
+ * An external device configuration request is used to reset the LCB
+ * to retry to obtain operational lanes when the first attempt is
+ * unsuccesful.
+ */
+ if (dd->dc8051_ver >= dc8051_ver(1, 25, 0))
+ misc_bits |= 1 << EXT_CFG_LCB_RESET_SUPPORTED_SHIFT;
+
+ ret = write_vc_local_link_mode(dd, misc_bits, 0,
+ opa_to_vc_link_widths(
ppd->link_width_enabled));
if (ret != HCMD_SUCCESS)
goto set_local_link_attributes_fail;
@@ -9156,12 +9447,6 @@ int start_link(struct hfi1_pportdata *ppd)
*/
tune_serdes(ppd);
- if (!ppd->link_enabled) {
- dd_dev_info(ppd->dd,
- "%s: stopping link start because link is disabled\n",
- __func__);
- return 0;
- }
if (!ppd->driver_link_ready) {
dd_dev_info(ppd->dd,
"%s: stopping link start because driver is not ready\n",
@@ -9233,7 +9518,7 @@ static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable)
write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask);
}
-void reset_qsfp(struct hfi1_pportdata *ppd)
+int reset_qsfp(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
u64 mask, qsfp_mask;
@@ -9263,6 +9548,13 @@ void reset_qsfp(struct hfi1_pportdata *ppd)
* for alarms and warnings
*/
set_qsfp_int_n(ppd, 1);
+
+ /*
+ * After the reset, AOC transmitters are enabled by default. They need
+ * to be turned off to complete the QSFP setup before they can be
+ * enabled again.
+ */
+ return set_qsfp_tx(ppd, 0);
}
static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
@@ -9272,13 +9564,13 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
(qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
- dd_dev_info(dd, "%s: QSFP cable on fire\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP cable temperature too high\n",
+ __func__);
if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
(qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
- dd_dev_info(dd, "%s: QSFP cable temperature too low\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP cable temperature too low\n",
+ __func__);
/*
* The remaining alarms/warnings don't matter if the link is down.
@@ -9288,75 +9580,75 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
(qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
- dd_dev_info(dd, "%s: QSFP supply voltage too high\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP supply voltage too high\n",
+ __func__);
if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
(qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
- dd_dev_info(dd, "%s: QSFP supply voltage too low\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP supply voltage too low\n",
+ __func__);
/* Byte 2 is vendor specific */
if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 1/2 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 1/2 power too low\n",
+ __func__);
if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 3/4 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 3/4 power too low\n",
+ __func__);
if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
(qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too high\n",
+ __func__);
if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
(qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too low\n",
+ __func__);
if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
(qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too high\n",
+ __func__);
if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
(qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too low\n",
+ __func__);
if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 power too low\n",
+ __func__);
if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 power too low\n",
+ __func__);
/* Bytes 9-10 and 11-12 are reserved */
/* Bytes 13-15 are vendor specific */
@@ -9379,6 +9671,13 @@ void qsfp_event(struct work_struct *work)
if (!qsfp_mod_present(ppd))
return;
+ if (ppd->host_link_state == HLS_DN_DISABLE) {
+ dd_dev_info(ppd->dd,
+ "%s: stopping link start because link is disabled\n",
+ __func__);
+ return;
+ }
+
/*
* Turn DC back on after cable has been re-inserted. Up until
* now, the DC has been in reset to save power.
@@ -9420,30 +9719,10 @@ void qsfp_event(struct work_struct *work)
}
}
-static void init_qsfp_int(struct hfi1_devdata *dd)
+void init_qsfp_int(struct hfi1_devdata *dd)
{
struct hfi1_pportdata *ppd = dd->pport;
- u64 qsfp_mask, cce_int_mask;
- const int qsfp1_int_smask = QSFP1_INT % 64;
- const int qsfp2_int_smask = QSFP2_INT % 64;
-
- /*
- * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
- * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
- * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
- * the index of the appropriate CSR in the CCEIntMask CSR array
- */
- cce_int_mask = read_csr(dd, CCE_INT_MASK +
- (8 * (QSFP1_INT / 64)));
- if (dd->hfi1_id) {
- cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
- write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
- cce_int_mask);
- } else {
- cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
- write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
- cce_int_mask);
- }
+ u64 qsfp_mask;
qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
/* Clear current status to avoid spurious interrupts */
@@ -9460,6 +9739,12 @@ static void init_qsfp_int(struct hfi1_devdata *dd)
write_csr(dd,
dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
qsfp_mask);
+
+ /* Enable the appropriate QSFP IRQ source */
+ if (!dd->hfi1_id)
+ set_intr_bits(dd, QSFP1_INT, QSFP1_INT, true);
+ else
+ set_intr_bits(dd, QSFP2_INT, QSFP2_INT, true);
}
/*
@@ -9492,8 +9777,11 @@ static int test_qsfp_read(struct hfi1_pportdata *ppd)
int ret;
u8 status;
- /* report success if not a QSFP */
- if (ppd->port_type != PORT_TYPE_QSFP)
+ /*
+ * Report success if not a QSFP or, if it is a QSFP, but the cable is
+ * not present
+ */
+ if (ppd->port_type != PORT_TYPE_QSFP || !qsfp_mod_present(ppd))
return 0;
/* read byte 2, the status byte */
@@ -9531,7 +9819,7 @@ static void try_start_link(struct hfi1_pportdata *ppd)
"QSFP not responding, waiting and retrying %d\n",
(int)ppd->qsfp_retry_count);
ppd->qsfp_retry_count++;
- queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work,
+ queue_delayed_work(ppd->link_wq, &ppd->start_link_work,
msecs_to_jiffies(QSFP_RETRY_WAIT));
return;
}
@@ -9608,13 +9896,14 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
cancel_delayed_work_sync(&ppd->start_link_work);
ppd->offline_disabled_reason =
- HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
- set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
- OPA_LINKDOWN_REASON_SMA_DISABLED);
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_REBOOT);
+ set_link_down_reason(ppd, OPA_LINKDOWN_REASON_REBOOT, 0,
+ OPA_LINKDOWN_REASON_REBOOT);
set_link_state(ppd, HLS_DN_OFFLINE);
/* disable the port */
clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
+ cancel_work_sync(&ppd->freeze_work);
}
static inline int init_cpu_counters(struct hfi1_devdata *dd)
@@ -9638,17 +9927,6 @@ static inline int init_cpu_counters(struct hfi1_devdata *dd)
return 0;
}
-static const char * const pt_names[] = {
- "expected",
- "eager",
- "invalid"
-};
-
-static const char *pt_name(u32 type)
-{
- return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type];
-}
-
/*
* index is the index into the receive array
*/
@@ -9656,35 +9934,34 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
u32 type, unsigned long pa, u16 order)
{
u64 reg;
- void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc :
- (dd->kregbase + RCV_ARRAY));
if (!(dd->flags & HFI1_PRESENT))
goto done;
- if (type == PT_INVALID) {
+ if (type == PT_INVALID || type == PT_INVALID_FLUSH) {
pa = 0;
+ order = 0;
} else if (type > PT_INVALID) {
dd_dev_err(dd,
"unexpected receive array type %u for index %u, not handled\n",
type, index);
goto done;
}
-
- hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx",
- pt_name(type), index, pa, (unsigned long)order);
+ trace_hfi1_put_tid(dd, index, type, pa, order);
#define RT_ADDR_SHIFT 12 /* 4KB kernel address boundary */
reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK
| (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
| ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
<< RCV_ARRAY_RT_ADDR_SHIFT;
- writeq(reg, base + (index * 8));
+ trace_hfi1_write_rcvarray(dd->rcvarray_wc + (index * 8), reg);
+ writeq(reg, dd->rcvarray_wc + (index * 8));
- if (type == PT_EAGER)
+ if (type == PT_EAGER || type == PT_INVALID_FLUSH || (index & 3) == 3)
/*
- * Eager entries are written one-by-one so we have to push them
- * after we write the entry.
+ * Eager entries are written and flushed
+ *
+ * Expected entries are flushed every 4 writes
*/
flush_wc();
done:
@@ -9706,15 +9983,6 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
}
-struct ib_header *hfi1_get_msgheader(
- struct hfi1_devdata *dd, __le32 *rhf_addr)
-{
- u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
-
- return (struct ib_header *)
- (rhf_addr - dd->rhf_offset + offset);
-}
-
static const char * const ib_cfg_name_strings[] = {
"HFI1_IB_CFG_LIDLMC",
"HFI1_IB_CFG_LWID_DG_ENB",
@@ -9772,7 +10040,7 @@ int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which)
goto unimplemented;
case HFI1_IB_CFG_OP_VLS:
- val = ppd->vls_operational;
+ val = ppd->actual_vls_operational;
break;
case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */
val = VL_ARB_HIGH_PRIO_TABLE_SIZE;
@@ -9787,7 +10055,7 @@ int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which)
val = ppd->phy_error_threshold;
break;
case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
- val = dd->link_default;
+ val = HLS_DEFAULT;
break;
case HFI1_IB_CFG_HRTBT: /* Heartbeat off/enable/auto */
@@ -9832,12 +10100,12 @@ u32 lrh_max_header_bytes(struct hfi1_devdata *dd)
* the first kernel context would have been allocated by now so
* we are guaranteed a valid value.
*/
- return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
+ return (get_hdrqentsize(dd->rcd[0]) - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
}
/*
* Set Send Length
- * @ppd - per port data
+ * @ppd: per port data
*
* Set the MTU by limiting how many DWs may be sent. The SendLenCheck*
* registers compare against LRH.PktLen, so use the max bytes included
@@ -9877,7 +10145,7 @@ static void set_send_length(struct hfi1_pportdata *ppd)
thres = min(sc_percent_to_threshold(dd->vld[i].sc, 50),
sc_mtu_to_threshold(dd->vld[i].sc,
dd->vld[i].mtu,
- dd->rcd[0]->rcvhdrqentsize));
+ get_hdrqentsize(dd->rcd[0])));
for (j = 0; j < INIT_SC_PER_VL; j++)
sc_set_cr_threshold(
pio_select_send_context_vl(dd, j, i),
@@ -9906,10 +10174,16 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
struct hfi1_devdata *dd = ppd->dd;
u32 mask = ~((1U << ppd->lmc) - 1);
u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
+ u32 lid;
+ /*
+ * Program 0 in CSR if port lid is extended. This prevents
+ * 9B packets being sent out for large lids.
+ */
+ lid = (ppd->lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) ? 0 : ppd->lid;
c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
| DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
- c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
+ c1 |= ((lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
<< DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT) |
((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
<< DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
@@ -9920,39 +10194,17 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
*/
sreg = ((mask & SEND_CTXT_CHECK_SLID_MASK_MASK) <<
SEND_CTXT_CHECK_SLID_MASK_SHIFT) |
- (((ppd->lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
+ (((lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
SEND_CTXT_CHECK_SLID_VALUE_SHIFT);
- for (i = 0; i < dd->chip_send_contexts; i++) {
+ for (i = 0; i < chip_send_contexts(dd); i++) {
hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x",
i, (u32)sreg);
write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg);
}
/* Now we have to do the same thing for the sdma engines */
- sdma_update_lmc(dd, mask, ppd->lid);
-}
-
-static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
-{
- unsigned long timeout;
- u32 curr_state;
-
- timeout = jiffies + msecs_to_jiffies(msecs);
- while (1) {
- curr_state = read_physical_state(dd);
- if (curr_state == state)
- break;
- if (time_after(jiffies, timeout)) {
- dd_dev_err(dd,
- "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
- state, curr_state);
- return -ETIMEDOUT;
- }
- usleep_range(1950, 2050); /* sleep 2ms-ish */
- }
-
- return 0;
+ sdma_update_lmc(dd, mask, lid);
}
static const char *state_completed_string(u32 completed)
@@ -10006,6 +10258,10 @@ static const char * const state_complete_reasons[] = {
[0x33] =
"Link partner completed the VerifyCap state, but the passing lanes do not meet the local link width policy",
[0x34] = tx_out_of_policy,
+ [0x35] = "Negotiated link width is mutually exclusive",
+ [0x36] =
+ "Timed out before receiving verifycap frames in VerifyCap.Exchange",
+ [0x37] = "Unable to resolve secure data exchange",
};
static const char *state_complete_reason_code_string(struct hfi1_pportdata *ppd,
@@ -10080,6 +10336,63 @@ static void check_lni_states(struct hfi1_pportdata *ppd)
decode_state_complete(ppd, last_remote_state, "received");
}
+/* wait for wait_ms for LINK_TRANSFER_ACTIVE to go to 1 */
+static int wait_link_transfer_active(struct hfi1_devdata *dd, int wait_ms)
+{
+ u64 reg;
+ unsigned long timeout;
+
+ /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
+ timeout = jiffies + msecs_to_jiffies(wait_ms);
+ while (1) {
+ reg = read_csr(dd, DC_LCB_STS_LINK_TRANSFER_ACTIVE);
+ if (reg)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(dd,
+ "timeout waiting for LINK_TRANSFER_ACTIVE\n");
+ return -ETIMEDOUT;
+ }
+ udelay(2);
+ }
+ return 0;
+}
+
+/* called when the logical link state is not down as it should be */
+static void force_logical_link_state_down(struct hfi1_pportdata *ppd)
+{
+ struct hfi1_devdata *dd = ppd->dd;
+
+ /*
+ * Bring link up in LCB loopback
+ */
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
+ write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
+ DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
+
+ write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
+ write_csr(dd, DC_LCB_CFG_REINIT_AS_SLAVE, 0);
+ write_csr(dd, DC_LCB_CFG_CNT_FOR_SKIP_STALL, 0x110);
+ write_csr(dd, DC_LCB_CFG_LOOPBACK, 0x2);
+
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
+ (void)read_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET);
+ udelay(3);
+ write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 1);
+ write_csr(dd, DC_LCB_CFG_RUN, 1ull << DC_LCB_CFG_RUN_EN_SHIFT);
+
+ wait_link_transfer_active(dd, 100);
+
+ /*
+ * Bring the link down again.
+ */
+ write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 1);
+ write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0);
+ write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0);
+
+ dd_dev_info(ppd->dd, "logical state forced to LINK_DOWN\n");
+}
+
/*
* Helper for set_link_state(). Do not call except from that routine.
* Expects ppd->hls_mutex to be held.
@@ -10091,59 +10404,34 @@ static void check_lni_states(struct hfi1_pportdata *ppd)
static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
{
struct hfi1_devdata *dd = ppd->dd;
- u32 pstate, previous_state;
+ u32 previous_state;
+ int offline_state_ret;
int ret;
- int do_transition;
- int do_wait;
+
+ update_lcb_cache(dd);
previous_state = ppd->host_link_state;
ppd->host_link_state = HLS_GOING_OFFLINE;
- pstate = read_physical_state(dd);
- if (pstate == PLS_OFFLINE) {
- do_transition = 0; /* in right state */
- do_wait = 0; /* ...no need to wait */
- } else if ((pstate & 0xff) == PLS_OFFLINE) {
- do_transition = 0; /* in an offline transient state */
- do_wait = 1; /* ...wait for it to settle */
- } else {
- do_transition = 1; /* need to move to offline */
- do_wait = 1; /* ...will need to wait */
- }
-
- if (do_transition) {
- ret = set_physical_link_state(dd,
- (rem_reason << 8) | PLS_OFFLINE);
- if (ret != HCMD_SUCCESS) {
- dd_dev_err(dd,
- "Failed to transition to Offline link state, return %d\n",
- ret);
- return -EINVAL;
- }
- if (ppd->offline_disabled_reason ==
- HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))
- ppd->offline_disabled_reason =
- HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
- }
+ /* start offline transition */
+ ret = set_physical_link_state(dd, (rem_reason << 8) | PLS_OFFLINE);
- if (do_wait) {
- /* it can take a while for the link to go down */
- ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000);
- if (ret < 0)
- return ret;
+ if (ret != HCMD_SUCCESS) {
+ dd_dev_err(dd,
+ "Failed to transition to Offline link state, return %d\n",
+ ret);
+ return -EINVAL;
}
+ if (ppd->offline_disabled_reason ==
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE))
+ ppd->offline_disabled_reason =
+ HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
- /* make sure the logical state is also down */
- wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
-
- /*
- * Now in charge of LCB - must be after the physical state is
- * offline.quiet and before host_link_state is changed.
- */
- set_host_lcb_access(dd);
- write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
- ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
+ offline_state_ret = wait_phys_link_offline_substates(ppd, 10000);
+ if (offline_state_ret < 0)
+ return offline_state_ret;
+ /* Disabling AOC transmitters */
if (ppd->port_type == PORT_TYPE_QSFP &&
ppd->qsfp_info.limiting_active &&
qsfp_mod_present(ppd)) {
@@ -10161,6 +10449,31 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
}
/*
+ * Wait for the offline.Quiet transition if it hasn't happened yet. It
+ * can take a while for the link to go down.
+ */
+ if (offline_state_ret != PLS_OFFLINE_QUIET) {
+ ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 30000);
+ if (ret < 0)
+ return ret;
+ }
+
+ /*
+ * Now in charge of LCB - must be after the physical state is
+ * offline.quiet and before host_link_state is changed.
+ */
+ set_host_lcb_access(dd);
+ write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
+
+ /* make sure the logical state is also down */
+ ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
+ if (ret)
+ force_logical_link_state_down(ppd);
+
+ ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
+ update_statusp(ppd, IB_PORT_DOWN);
+
+ /*
* The LNI has a mandatory wait time after the physical state
* moves to Offline.Quiet. The wait time may be different
* depending on how the link went down. The 8051 firmware
@@ -10192,6 +10505,9 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
& (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
/* went down while attempting link up */
check_lni_states(ppd);
+
+ /* The QSFP doesn't need to be reset on LNI failure */
+ ppd->qsfp_info.reset_needed = 0;
}
/* the active link width (downgrade) is 0 on link down */
@@ -10248,11 +10564,11 @@ static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state)
}
/*
- * driver_physical_state - convert the driver's notion of a port's
+ * driver_pstate - convert the driver's notion of a port's
* state (an HLS_*) into a physical state (a {IB,OPA}_PORTPHYSSTATE_*).
* Return -1 (converted to a u32) to indicate error.
*/
-u32 driver_physical_state(struct hfi1_pportdata *ppd)
+u32 driver_pstate(struct hfi1_pportdata *ppd)
{
switch (ppd->host_link_state) {
case HLS_UP_INIT:
@@ -10266,9 +10582,9 @@ u32 driver_physical_state(struct hfi1_pportdata *ppd)
case HLS_DN_OFFLINE:
return OPA_PORTPHYSSTATE_OFFLINE;
case HLS_VERIFY_CAP:
- return IB_PORTPHYSSTATE_POLLING;
+ return IB_PORTPHYSSTATE_TRAINING;
case HLS_GOING_UP:
- return IB_PORTPHYSSTATE_POLLING;
+ return IB_PORTPHYSSTATE_TRAINING;
case HLS_GOING_OFFLINE:
return OPA_PORTPHYSSTATE_OFFLINE;
case HLS_LINK_COOLDOWN:
@@ -10282,11 +10598,11 @@ u32 driver_physical_state(struct hfi1_pportdata *ppd)
}
/*
- * driver_logical_state - convert the driver's notion of a port's
+ * driver_lstate - convert the driver's notion of a port's
* state (an HLS_*) into a logical state (a IB_PORT_*). Return -1
* (converted to a u32) to indicate error.
*/
-u32 driver_logical_state(struct hfi1_pportdata *ppd)
+u32 driver_lstate(struct hfi1_pportdata *ppd)
{
if (ppd->host_link_state && (ppd->host_link_state & HLS_DOWN))
return IB_PORT_DOWN;
@@ -10316,6 +10632,31 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
}
}
+/**
+ * data_vls_operational() - Verify if data VL BCT credits and MTU
+ * are both set.
+ * @ppd: pointer to hfi1_pportdata structure
+ *
+ * Return: true - Ok, false -otherwise.
+ */
+static inline bool data_vls_operational(struct hfi1_pportdata *ppd)
+{
+ int i;
+ u64 reg;
+
+ if (!ppd->actual_vls_operational)
+ return false;
+
+ for (i = 0; i < ppd->vls_supported; i++) {
+ reg = read_csr(ppd->dd, SEND_CM_CREDIT_VL + (8 * i));
+ if ((reg && !ppd->dd->vld[i].mtu) ||
+ (!reg && ppd->dd->vld[i].mtu))
+ return false;
+ }
+
+ return true;
+}
+
/*
* Change the physical and/or logical link state.
*
@@ -10335,7 +10676,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
orig_new_state = state;
if (state == HLS_DN_DOWNDEF)
- state = dd->link_default;
+ state = HLS_DEFAULT;
/* interpret poll -> poll as a link bounce */
poll_bounce = ppd->host_link_state == HLS_DN_POLL &&
@@ -10378,40 +10719,70 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
goto unexpected;
}
- ppd->host_link_state = HLS_UP_INIT;
+ /*
+ * Wait for Link_Up physical state.
+ * Physical and Logical states should already be
+ * be transitioned to LinkUp and LinkInit respectively.
+ */
+ ret = wait_physical_linkstate(ppd, PLS_LINKUP, 1000);
+ if (ret) {
+ dd_dev_err(dd,
+ "%s: physical state did not change to LINK-UP\n",
+ __func__);
+ break;
+ }
+
ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
if (ret) {
- /* logical state didn't change, stay at going_up */
- ppd->host_link_state = HLS_GOING_UP;
dd_dev_err(dd,
"%s: logical state did not change to INIT\n",
__func__);
- } else {
- /* clear old transient LINKINIT_REASON code */
- if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
- ppd->linkinit_reason =
- OPA_LINKINIT_REASON_LINKUP;
+ break;
+ }
- /* enable the port */
- add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
+ /* clear old transient LINKINIT_REASON code */
+ if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
+ ppd->linkinit_reason =
+ OPA_LINKINIT_REASON_LINKUP;
- handle_linkup_change(dd, 1);
- }
+ /* enable the port */
+ add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
+
+ handle_linkup_change(dd, 1);
+ pio_kernel_linkup(dd);
+
+ /*
+ * After link up, a new link width will have been set.
+ * Update the xmit counters with regards to the new
+ * link width.
+ */
+ update_xmit_counters(ppd, ppd->link_width_active);
+
+ ppd->host_link_state = HLS_UP_INIT;
+ update_statusp(ppd, IB_PORT_INIT);
break;
case HLS_UP_ARMED:
if (ppd->host_link_state != HLS_UP_INIT)
goto unexpected;
- ppd->host_link_state = HLS_UP_ARMED;
+ if (!data_vls_operational(ppd)) {
+ dd_dev_err(dd,
+ "%s: Invalid data VL credits or mtu\n",
+ __func__);
+ ret = -EINVAL;
+ break;
+ }
+
set_logical_state(dd, LSTATE_ARMED);
ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000);
if (ret) {
- /* logical state didn't change, stay at init */
- ppd->host_link_state = HLS_UP_INIT;
dd_dev_err(dd,
"%s: logical state did not change to ARMED\n",
__func__);
+ break;
}
+ ppd->host_link_state = HLS_UP_ARMED;
+ update_statusp(ppd, IB_PORT_ARMED);
/*
* The simulator does not currently implement SMA messages,
* so neighbor_normal is not set. Set it here when we first
@@ -10424,18 +10795,17 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
if (ppd->host_link_state != HLS_UP_ARMED)
goto unexpected;
- ppd->host_link_state = HLS_UP_ACTIVE;
set_logical_state(dd, LSTATE_ACTIVE);
ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000);
if (ret) {
- /* logical state didn't change, stay at armed */
- ppd->host_link_state = HLS_UP_ARMED;
dd_dev_err(dd,
"%s: logical state did not change to ACTIVE\n",
__func__);
} else {
/* tell all engines to go running */
sdma_all_running(dd);
+ ppd->host_link_state = HLS_UP_ACTIVE;
+ update_statusp(ppd, IB_PORT_ACTIVE);
/* Signal the IB layer that the port has went active */
event.device = &dd->verbs_dev.rdi.ibdev;
@@ -10471,13 +10841,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
break;
ppd->port_error_action = 0;
- ppd->host_link_state = HLS_DN_POLL;
if (quick_linkup) {
/* quick linkup does not go into polling */
ret = do_quick_linkup(dd);
} else {
ret1 = set_physical_link_state(dd, PLS_POLLING);
+ if (!ret1)
+ ret1 = wait_phys_link_out_of_offline(ppd,
+ 3000);
if (ret1 != HCMD_SUCCESS) {
dd_dev_err(dd,
"Failed to transition to Polling link state, return 0x%x\n",
@@ -10485,6 +10857,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
ret = -EINVAL;
}
}
+
+ /*
+ * Change the host link state after requesting DC8051 to
+ * change its physical state so that we can ignore any
+ * interrupt with stale LNI(XX) error, which will not be
+ * cleared until DC8051 transitions to Polling state.
+ */
+ ppd->host_link_state = HLS_DN_POLL;
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
/*
@@ -10493,6 +10873,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
*/
if (ret)
goto_offline(ppd, 0);
+ else
+ log_physical_state(ppd, PLS_POLLING);
break;
case HLS_DN_DISABLE:
/* link is disabled */
@@ -10508,16 +10890,25 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
ppd->remote_link_down_reason = 0;
}
- ret1 = set_physical_link_state(dd, PLS_DISABLED);
- if (ret1 != HCMD_SUCCESS) {
- dd_dev_err(dd,
- "Failed to transition to Disabled link state, return 0x%x\n",
- ret1);
- ret = -EINVAL;
- break;
+ if (!dd->dc_shutdown) {
+ ret1 = set_physical_link_state(dd, PLS_DISABLED);
+ if (ret1 != HCMD_SUCCESS) {
+ dd_dev_err(dd,
+ "Failed to transition to Disabled link state, return 0x%x\n",
+ ret1);
+ ret = -EINVAL;
+ break;
+ }
+ ret = wait_physical_linkstate(ppd, PLS_DISABLED, 10000);
+ if (ret) {
+ dd_dev_err(dd,
+ "%s: physical state did not change to DISABLED\n",
+ __func__);
+ break;
+ }
+ dc_shutdown(dd);
}
ppd->host_link_state = HLS_DN_DISABLE;
- dc_shutdown(dd);
break;
case HLS_DN_OFFLINE:
if (ppd->host_link_state == HLS_DN_DISABLE)
@@ -10532,6 +10923,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
if (ppd->host_link_state != HLS_DN_POLL)
goto unexpected;
ppd->host_link_state = HLS_VERIFY_CAP;
+ log_physical_state(ppd, PLS_CONFIGPHY_VERIFYCAP);
break;
case HLS_GOING_UP:
if (ppd->host_link_state != HLS_VERIFY_CAP)
@@ -11467,12 +11859,10 @@ void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
<< RCV_EGR_INDEX_HEAD_HEAD_SHIFT;
write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, reg);
}
- mmiowb();
reg = ((u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT) |
(((u64)hd & RCV_HDR_HEAD_HEAD_MASK)
<< RCV_HDR_HEAD_HEAD_SHIFT);
write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
- mmiowb();
}
u32 hdrqempty(struct hfi1_ctxtdata *rcd)
@@ -11482,7 +11872,7 @@ u32 hdrqempty(struct hfi1_ctxtdata *rcd)
head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
& RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
- if (rcd->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(rcd))
tail = get_rcvhdrtail(rcd);
else
tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
@@ -11526,16 +11916,96 @@ static u32 encoded_size(u32 size)
return 0x1; /* if invalid, go with the minimum size */
}
-void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
+/**
+ * encode_rcv_header_entry_size - return chip specific encoding for size
+ * @size: size in dwords
+ *
+ * Convert a receive header entry size that to the encoding used in the CSR.
+ *
+ * Return a zero if the given size is invalid, otherwise the encoding.
+ */
+u8 encode_rcv_header_entry_size(u8 size)
+{
+ /* there are only 3 valid receive header entry sizes */
+ if (size == 2)
+ return 1;
+ if (size == 16)
+ return 2;
+ if (size == 32)
+ return 4;
+ return 0; /* invalid */
+}
+
+/**
+ * hfi1_validate_rcvhdrcnt - validate hdrcnt
+ * @dd: the device data
+ * @thecnt: the header count
+ */
+int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
+{
+ if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
+ dd_dev_err(dd, "Receive header queue count too small\n");
+ return -EINVAL;
+ }
+
+ if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
+ dd_dev_err(dd,
+ "Receive header queue count cannot be greater than %u\n",
+ HFI1_MAX_HDRQ_EGRBUF_CNT);
+ return -EINVAL;
+ }
+
+ if (thecnt % HDRQ_INCREMENT) {
+ dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
+ thecnt, HDRQ_INCREMENT);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * set_hdrq_regs - set header queue registers for context
+ * @dd: the device data
+ * @ctxt: the context
+ * @entsize: the dword entry size
+ * @hdrcnt: the number of header entries
+ */
+void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt)
+{
+ u64 reg;
+
+ reg = (((u64)hdrcnt >> HDRQ_SIZE_SHIFT) & RCV_HDR_CNT_CNT_MASK) <<
+ RCV_HDR_CNT_CNT_SHIFT;
+ write_kctxt_csr(dd, ctxt, RCV_HDR_CNT, reg);
+ reg = ((u64)encode_rcv_header_entry_size(entsize) &
+ RCV_HDR_ENT_SIZE_ENT_SIZE_MASK) <<
+ RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
+ write_kctxt_csr(dd, ctxt, RCV_HDR_ENT_SIZE, reg);
+ reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK) <<
+ RCV_HDR_SIZE_HDR_SIZE_SHIFT;
+ write_kctxt_csr(dd, ctxt, RCV_HDR_SIZE, reg);
+
+ /*
+ * Program dummy tail address for every receive context
+ * before enabling any receive context
+ */
+ write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
+ dd->rcvhdrtail_dummy_dma);
+}
+
+void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
+ struct hfi1_ctxtdata *rcd)
{
- struct hfi1_ctxtdata *rcd;
u64 rcvctrl, reg;
int did_enable = 0;
+ u16 ctxt;
- rcd = dd->rcd[ctxt];
if (!rcd)
return;
+ ctxt = rcd->ctxt;
+
hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op);
rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
@@ -11545,13 +12015,13 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
/* reset the tail and hdr addresses, and sequence count */
write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
rcd->rcvhdrq_dma);
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
+ if (hfi1_rcvhdrtail_kvaddr(rcd))
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
rcd->rcvhdrqtailaddr_dma);
- rcd->seq_cnt = 1;
+ hfi1_set_seq_cnt(rcd, 1);
/* reset the cached receive header queue head value */
- rcd->head = 0;
+ hfi1_set_rcd_head(rcd, 0);
/*
* Zero the receive header queue so we don't get false
@@ -11559,7 +12029,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
* sequence numbers could land exactly on the same spot.
* E.g. a rcd restart before the receive header wrapped.
*/
- memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size);
+ memset(rcd->rcvhdrq, 0, rcvhdrq_size(rcd));
/* starting timeout */
rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr;
@@ -11621,11 +12091,17 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
}
- if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
+ if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) {
+ set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
+ IS_RCVAVAIL_START + rcd->ctxt, true);
rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
- if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
+ }
+ if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) {
+ set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
+ IS_RCVAVAIL_START + rcd->ctxt, false);
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
- if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma)
+ }
+ if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && hfi1_rcvhdrtail_kvaddr(rcd))
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
/* See comment on RcvCtxtCtrl.TailUpd above */
@@ -11654,9 +12130,15 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
- rcd->rcvctrl = rcvctrl;
- hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
- write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
+ if (op & HFI1_RCVCTRL_URGENT_ENB)
+ set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
+ IS_RCVURGENT_START + rcd->ctxt, true);
+ if (op & HFI1_RCVCTRL_URGENT_DIS)
+ set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
+ IS_RCVURGENT_START + rcd->ctxt, false);
+
+ hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx", ctxt, rcvctrl);
+ write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl);
/* work around sticky RcvCtxtStatus.BlockedRHQFull */
if (did_enable &&
@@ -11725,10 +12207,10 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
hfi1_cdbg(CNTR, "reading %s", entry->name);
if (entry->flags & CNTR_DISABLED) {
/* Nothing */
- hfi1_cdbg(CNTR, "\tDisabled\n");
+ hfi1_cdbg(CNTR, "\tDisabled");
} else {
if (entry->flags & CNTR_VL) {
- hfi1_cdbg(CNTR, "\tPer VL\n");
+ hfi1_cdbg(CNTR, "\tPer VL");
for (j = 0; j < C_VL_COUNT; j++) {
val = entry->rw_cntr(entry,
dd, j,
@@ -11736,21 +12218,21 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
0);
hfi1_cdbg(
CNTR,
- "\t\tRead 0x%llx for %d\n",
+ "\t\tRead 0x%llx for %d",
val, j);
dd->cntrs[entry->offset + j] =
val;
}
} else if (entry->flags & CNTR_SDMA) {
hfi1_cdbg(CNTR,
- "\t Per SDMA Engine\n");
- for (j = 0; j < dd->chip_sdma_engines;
+ "\t Per SDMA Engine");
+ for (j = 0; j < chip_sdma_engines(dd);
j++) {
val =
entry->rw_cntr(entry, dd, j,
CNTR_MODE_R, 0);
hfi1_cdbg(CNTR,
- "\t\tRead 0x%llx for %d\n",
+ "\t\tRead 0x%llx for %d",
val, j);
dd->cntrs[entry->offset + j] =
val;
@@ -11791,7 +12273,7 @@ u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp)
hfi1_cdbg(CNTR, "reading %s", entry->name);
if (entry->flags & CNTR_DISABLED) {
/* Nothing */
- hfi1_cdbg(CNTR, "\tDisabled\n");
+ hfi1_cdbg(CNTR, "\tDisabled");
continue;
}
@@ -11825,9 +12307,9 @@ static void free_cntrs(struct hfi1_devdata *dd)
struct hfi1_pportdata *ppd;
int i;
- if (dd->synth_stats_timer.data)
- del_timer_sync(&dd->synth_stats_timer);
- dd->synth_stats_timer.data = 0;
+ if (dd->synth_stats_timer.function)
+ timer_delete_sync(&dd->synth_stats_timer);
+ cancel_work_sync(&dd->update_cntr_work);
ppd = (struct hfi1_pportdata *)(dd + 1);
for (i = 0; i < dd->num_pports; i++, ppd++) {
kfree(ppd->cntrs);
@@ -11849,6 +12331,10 @@ static void free_cntrs(struct hfi1_devdata *dd)
dd->scntrs = NULL;
kfree(dd->cntrnames);
dd->cntrnames = NULL;
+ if (dd->update_cntr_wq) {
+ destroy_workqueue(dd->update_cntr_wq);
+ dd->update_cntr_wq = NULL;
+ }
}
static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
@@ -12004,7 +12490,7 @@ u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data)
return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data);
}
-static void update_synth_timer(unsigned long opaque)
+static void do_update_synth_timer(struct work_struct *work)
{
u64 cur_tx;
u64 cur_rx;
@@ -12013,8 +12499,8 @@ static void update_synth_timer(unsigned long opaque)
int i, j, vl;
struct hfi1_pportdata *ppd;
struct cntr_entry *entry;
-
- struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
+ struct hfi1_devdata *dd = container_of(work, struct hfi1_devdata,
+ update_cntr_work);
/*
* Rather than keep beating on the CSRs pick a minimal set that we can
@@ -12030,7 +12516,7 @@ static void update_synth_timer(unsigned long opaque)
hfi1_cdbg(
CNTR,
- "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx\n",
+ "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx",
dd->unit, cur_tx, cur_rx, dd->last_tx, dd->last_rx);
if ((cur_tx < dd->last_tx) || (cur_rx < dd->last_rx)) {
@@ -12044,7 +12530,7 @@ static void update_synth_timer(unsigned long opaque)
} else {
total_flits = (cur_tx - dd->last_tx) + (cur_rx - dd->last_rx);
hfi1_cdbg(CNTR,
- "[%d] total flits 0x%llx limit 0x%llx\n", dd->unit,
+ "[%d] total flits 0x%llx limit 0x%llx", dd->unit,
total_flits, (u64)CNTR_32BIT_MAX);
if (total_flits >= CNTR_32BIT_MAX) {
hfi1_cdbg(CNTR, "[%d] 32bit limit hit, updating",
@@ -12097,7 +12583,13 @@ static void update_synth_timer(unsigned long opaque)
} else {
hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
}
+}
+
+static void update_synth_timer(struct timer_list *t)
+{
+ struct hfi1_devdata *dd = timer_container_of(dd, t, synth_stats_timer);
+ queue_work(dd->update_cntr_wq, &dd->update_cntr_work);
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
}
@@ -12111,10 +12603,10 @@ static int init_cntrs(struct hfi1_devdata *dd)
struct hfi1_pportdata *ppd;
const char *bit_type_32 = ",32";
const int bit_type_32_sz = strlen(bit_type_32);
+ u32 sdma_engines = chip_sdma_engines(dd);
/* set up the stats timer; the add_timer is done at the end */
- setup_timer(&dd->synth_stats_timer, update_synth_timer,
- (unsigned long)dd);
+ timer_setup(&dd->synth_stats_timer, update_synth_timer, 0);
/***********************/
/* per device counters */
@@ -12144,7 +12636,7 @@ static int init_cntrs(struct hfi1_devdata *dd)
}
} else if (dev_cntrs[i].flags & CNTR_SDMA) {
dev_cntrs[i].offset = dd->ndevcntrs;
- for (j = 0; j < dd->chip_sdma_engines; j++) {
+ for (j = 0; j < sdma_engines; j++) {
snprintf(name, C_MAX_NAME, "%s%d",
dev_cntrs[i].name, j);
sz += strlen(name);
@@ -12166,7 +12658,8 @@ static int init_cntrs(struct hfi1_devdata *dd)
}
/* allocate space for the counter values */
- dd->cntrs = kcalloc(dd->ndevcntrs, sizeof(u64), GFP_KERNEL);
+ dd->cntrs = kcalloc(dd->ndevcntrs + num_driver_cntrs, sizeof(u64),
+ GFP_KERNEL);
if (!dd->cntrs)
goto bail;
@@ -12201,7 +12694,7 @@ static int init_cntrs(struct hfi1_devdata *dd)
*p++ = '\n';
}
} else if (dev_cntrs[i].flags & CNTR_SDMA) {
- for (j = 0; j < dd->chip_sdma_engines; j++) {
+ for (j = 0; j < sdma_engines; j++) {
snprintf(name, C_MAX_NAME, "%s%d",
dev_cntrs[i].name, j);
memcpy(p, name, strlen(name));
@@ -12333,6 +12826,13 @@ static int init_cntrs(struct hfi1_devdata *dd)
if (init_cpu_counters(dd))
goto bail;
+ dd->update_cntr_wq = alloc_ordered_workqueue("hfi1_update_cntr_%d",
+ WQ_MEM_RECLAIM, dd->unit);
+ if (!dd->update_cntr_wq)
+ goto bail;
+
+ INIT_WORK(&dd->update_cntr_work, do_update_synth_timer);
+
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
return 0;
bail:
@@ -12343,11 +12843,6 @@ bail:
static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
{
switch (chip_lstate) {
- default:
- dd_dev_err(dd,
- "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
- chip_lstate);
- /* fall through */
case LSTATE_DOWN:
return IB_PORT_DOWN;
case LSTATE_INIT:
@@ -12356,6 +12851,11 @@ static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
return IB_PORT_ARMED;
case LSTATE_ACTIVE:
return IB_PORT_ACTIVE;
+ default:
+ dd_dev_err(dd,
+ "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
+ chip_lstate);
+ return IB_PORT_DOWN;
}
}
@@ -12363,10 +12863,6 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
{
/* look at the HFI meta-states only */
switch (chip_pstate & 0xf0) {
- default:
- dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
- chip_pstate);
- /* fall through */
case PLS_DISABLED:
return IB_PORTPHYSSTATE_DISABLED;
case PLS_OFFLINE:
@@ -12379,25 +12875,13 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
return IB_PORTPHYSSTATE_LINKUP;
case PLS_PHYTEST:
return IB_PORTPHYSSTATE_PHY_TEST;
+ default:
+ dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
+ chip_pstate);
+ return IB_PORTPHYSSTATE_DISABLED;
}
}
-/* return the OPA port logical state name */
-const char *opa_lstate_name(u32 lstate)
-{
- static const char * const port_logical_names[] = {
- "PORT_NOP",
- "PORT_DOWN",
- "PORT_INIT",
- "PORT_ARMED",
- "PORT_ACTIVE",
- "PORT_ACTIVE_DEFER",
- };
- if (lstate < ARRAY_SIZE(port_logical_names))
- return port_logical_names[lstate];
- return "unknown";
-}
-
/* return the OPA port physical state name */
const char *opa_pstate_name(u32 pstate)
{
@@ -12420,20 +12904,19 @@ const char *opa_pstate_name(u32 pstate)
return "unknown";
}
-/*
- * Read the hardware link state and set the driver's cached value of it.
- * Return the (new) current value.
+/**
+ * update_statusp - Update userspace status flag
+ * @ppd: Port data structure
+ * @state: port state information
+ *
+ * Actual port status is determined by the host_link_state value
+ * in the ppd.
+ *
+ * host_link_state MUST be updated before updating the user space
+ * statusp.
*/
-u32 get_logical_state(struct hfi1_pportdata *ppd)
+static void update_statusp(struct hfi1_pportdata *ppd, u32 state)
{
- u32 new_state;
-
- new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
- if (new_state != ppd->lstate) {
- dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
- opa_lstate_name(new_state), new_state);
- ppd->lstate = new_state;
- }
/*
* Set port status flags in the page mapped into userspace
* memory. Do it here to ensure a reliable state - this is
@@ -12443,7 +12926,7 @@ u32 get_logical_state(struct hfi1_pportdata *ppd)
* function.
*/
if (ppd->statusp) {
- switch (ppd->lstate) {
+ switch (state) {
case IB_PORT_DOWN:
case IB_PORT_INIT:
*ppd->statusp &= ~(HFI1_STATUS_IB_CONF |
@@ -12457,7 +12940,6 @@ u32 get_logical_state(struct hfi1_pportdata *ppd)
break;
}
}
- return ppd->lstate;
}
/**
@@ -12474,35 +12956,149 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs)
{
unsigned long timeout;
+ u32 new_state;
timeout = jiffies + msecs_to_jiffies(msecs);
while (1) {
- if (get_logical_state(ppd) == state)
- return 0;
- if (time_after(jiffies, timeout))
+ new_state = chip_to_opa_lstate(ppd->dd,
+ read_logical_state(ppd->dd));
+ if (new_state == state)
break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(ppd->dd,
+ "timeout waiting for link state 0x%x\n",
+ state);
+ return -ETIMEDOUT;
+ }
msleep(20);
}
- dd_dev_err(ppd->dd, "timeout waiting for link state 0x%x\n", state);
- return -ETIMEDOUT;
+ return 0;
}
-u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
+static void log_state_transition(struct hfi1_pportdata *ppd, u32 state)
{
- u32 pstate;
- u32 ib_pstate;
+ u32 ib_pstate = chip_to_opa_pstate(ppd->dd, state);
- pstate = read_physical_state(ppd->dd);
- ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
- if (ppd->last_pstate != ib_pstate) {
- dd_dev_info(ppd->dd,
- "%s: physical state changed to %s (0x%x), phy 0x%x\n",
- __func__, opa_pstate_name(ib_pstate), ib_pstate,
- pstate);
- ppd->last_pstate = ib_pstate;
+ dd_dev_info(ppd->dd,
+ "physical state changed to %s (0x%x), phy 0x%x\n",
+ opa_pstate_name(ib_pstate), ib_pstate, state);
+}
+
+/*
+ * Read the physical hardware link state and check if it matches host
+ * drivers anticipated state.
+ */
+static void log_physical_state(struct hfi1_pportdata *ppd, u32 state)
+{
+ u32 read_state = read_physical_state(ppd->dd);
+
+ if (read_state == state) {
+ log_state_transition(ppd, state);
+ } else {
+ dd_dev_err(ppd->dd,
+ "anticipated phy link state 0x%x, read 0x%x\n",
+ state, read_state);
+ }
+}
+
+/*
+ * wait_physical_linkstate - wait for an physical link state change to occur
+ * @ppd: port device
+ * @state: the state to wait for
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for physical link state change to occur.
+ * Returns 0 if state reached, otherwise -ETIMEDOUT.
+ */
+static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
+ int msecs)
+{
+ u32 read_state;
+ unsigned long timeout;
+
+ timeout = jiffies + msecs_to_jiffies(msecs);
+ while (1) {
+ read_state = read_physical_state(ppd->dd);
+ if (read_state == state)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(ppd->dd,
+ "timeout waiting for phy link state 0x%x\n",
+ state);
+ return -ETIMEDOUT;
+ }
+ usleep_range(1950, 2050); /* sleep 2ms-ish */
}
- return ib_pstate;
+
+ log_state_transition(ppd, state);
+ return 0;
+}
+
+/*
+ * wait_phys_link_offline_quiet_substates - wait for any offline substate
+ * @ppd: port device
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for any offline physical link
+ * state change to occur.
+ * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT.
+ */
+static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
+ int msecs)
+{
+ u32 read_state;
+ unsigned long timeout;
+
+ timeout = jiffies + msecs_to_jiffies(msecs);
+ while (1) {
+ read_state = read_physical_state(ppd->dd);
+ if ((read_state & 0xF0) == PLS_OFFLINE)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(ppd->dd,
+ "timeout waiting for phy link offline.quiet substates. Read state 0x%x, %dms\n",
+ read_state, msecs);
+ return -ETIMEDOUT;
+ }
+ usleep_range(1950, 2050); /* sleep 2ms-ish */
+ }
+
+ log_state_transition(ppd, read_state);
+ return read_state;
+}
+
+/*
+ * wait_phys_link_out_of_offline - wait for any out of offline state
+ * @ppd: port device
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for any out of offline physical link
+ * state change to occur.
+ * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT.
+ */
+static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd,
+ int msecs)
+{
+ u32 read_state;
+ unsigned long timeout;
+
+ timeout = jiffies + msecs_to_jiffies(msecs);
+ while (1) {
+ read_state = read_physical_state(ppd->dd);
+ if ((read_state & 0xF0) != PLS_OFFLINE)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(ppd->dd,
+ "timeout waiting for phy link out of offline. Read state 0x%x, %dms\n",
+ read_state, msecs);
+ return -ETIMEDOUT;
+ }
+ usleep_range(1950, 2050); /* sleep 2ms-ish */
+ }
+
+ log_state_transition(ppd, read_state);
+ return read_state;
}
#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
@@ -12511,7 +13107,7 @@ u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
#define SET_STATIC_RATE_CONTROL_SMASK(r) \
(r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
-int hfi1_init_ctxt(struct send_context *sc)
+void hfi1_init_ctxt(struct send_context *sc)
{
if (sc) {
struct hfi1_devdata *dd = sc->dd;
@@ -12528,7 +13124,6 @@ int hfi1_init_ctxt(struct send_context *sc)
write_kctxt_csr(dd, sc->hw_context,
SEND_CTXT_CHECK_ENABLE, reg);
}
- return 0;
}
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
@@ -12559,26 +13154,64 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
/* ========================================================================= */
-/*
- * Enable/disable chip from delivering interrupts.
+/**
+ * read_mod_write() - Calculate the IRQ register index and set/clear the bits
+ * @dd: valid devdata
+ * @src: IRQ source to determine register index from
+ * @bits: the bits to set or clear
+ * @set: true == set the bits, false == clear the bits
+ *
*/
-void set_intr_state(struct hfi1_devdata *dd, u32 enable)
+static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits,
+ bool set)
{
- int i;
+ u64 reg;
+ u16 idx = src / BITS_PER_REGISTER;
+ unsigned long flags;
- /*
- * In HFI, the mask needs to be 1 to allow interrupts.
- */
- if (enable) {
- /* enable all interrupts */
- for (i = 0; i < CCE_NUM_INT_CSRS; i++)
- write_csr(dd, CCE_INT_MASK + (8 * i), ~(u64)0);
+ spin_lock_irqsave(&dd->irq_src_lock, flags);
+ reg = read_csr(dd, CCE_INT_MASK + (8 * idx));
+ if (set)
+ reg |= bits;
+ else
+ reg &= ~bits;
+ write_csr(dd, CCE_INT_MASK + (8 * idx), reg);
+ spin_unlock_irqrestore(&dd->irq_src_lock, flags);
+}
- init_qsfp_int(dd);
- } else {
- for (i = 0; i < CCE_NUM_INT_CSRS; i++)
- write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
+/**
+ * set_intr_bits() - Enable/disable a range (one or more) IRQ sources
+ * @dd: valid devdata
+ * @first: first IRQ source to set/clear
+ * @last: last IRQ source (inclusive) to set/clear
+ * @set: true == set the bits, false == clear the bits
+ *
+ * If first == last, set the exact source.
+ */
+int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set)
+{
+ u64 bits = 0;
+ u64 bit;
+ u16 src;
+
+ if (first > NUM_INTERRUPT_SOURCES || last > NUM_INTERRUPT_SOURCES)
+ return -EINVAL;
+
+ if (last < first)
+ return -ERANGE;
+
+ for (src = first; src <= last; src++) {
+ bit = src % BITS_PER_REGISTER;
+ /* wrapped to next register? */
+ if (!bit && bits) {
+ read_mod_write(dd, src - 1, bits, set);
+ bits = 0;
+ }
+ bits |= BIT_ULL(bit);
}
+ read_mod_write(dd, last, bits, set);
+
+ return 0;
}
/*
@@ -12598,9 +13231,9 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0);
write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0);
write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0);
- for (i = 0; i < dd->chip_send_contexts; i++)
+ for (i = 0; i < chip_send_contexts(dd); i++)
write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0);
- for (i = 0; i < dd->chip_sdma_engines; i++)
+ for (i = 0; i < chip_sdma_engines(dd); i++)
write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0);
write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0);
@@ -12608,55 +13241,11 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
}
-/* Move to pcie.c? */
-static void disable_intx(struct pci_dev *pdev)
-{
- pci_intx(pdev, 0);
-}
-
-static void clean_up_interrupts(struct hfi1_devdata *dd)
-{
- int i;
-
- /* remove irqs - must happen before disabling/turning off */
- if (dd->num_msix_entries) {
- /* MSI-X */
- struct hfi1_msix_entry *me = dd->msix_entries;
-
- for (i = 0; i < dd->num_msix_entries; i++, me++) {
- if (!me->arg) /* => no irq, no affinity */
- continue;
- hfi1_put_irq_affinity(dd, &dd->msix_entries[i]);
- free_irq(me->msix.vector, me->arg);
- }
- } else {
- /* INTx */
- if (dd->requested_intx_irq) {
- free_irq(dd->pcidev->irq, dd);
- dd->requested_intx_irq = 0;
- }
- }
-
- /* turn off interrupts */
- if (dd->num_msix_entries) {
- /* MSI-X */
- pci_disable_msix(dd->pcidev);
- } else {
- /* INTx */
- disable_intx(dd->pcidev);
- }
-
- /* clean structures */
- kfree(dd->msix_entries);
- dd->msix_entries = NULL;
- dd->num_msix_entries = 0;
-}
-
/*
* Remap the interrupt source from the general handler to the given MSI-X
* interrupt.
*/
-static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
+void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
{
u64 reg;
int m, n;
@@ -12664,7 +13253,12 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
/* clear from the handled mask of the general interrupt */
m = isrc / 64;
n = isrc % 64;
- dd->gi_mask[m] &= ~((u64)1 << n);
+ if (likely(m < CCE_NUM_INT_CSRS)) {
+ dd->gi_mask[m] &= ~((u64)1 << n);
+ } else {
+ dd_dev_err(dd, "remap interrupt err\n");
+ return;
+ }
/* direct the chip source to the given MSI-X interrupt */
m = isrc / 8;
@@ -12675,8 +13269,7 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
write_csr(dd, CCE_INT_MAP + (8 * m), reg);
}
-static void remap_sdma_interrupts(struct hfi1_devdata *dd,
- int engine, int msix_intr)
+void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr)
{
/*
* SDMA engine interrupt sources grouped by type, rather than
@@ -12685,144 +13278,16 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd,
* SDMAProgress
* SDMAIdle
*/
- remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
- remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
- remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
-}
-
-static int request_intx_irq(struct hfi1_devdata *dd)
-{
- int ret;
-
- snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME "_%d",
- dd->unit);
- ret = request_irq(dd->pcidev->irq, general_interrupt,
- IRQF_SHARED, dd->intx_name, dd);
- if (ret)
- dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
- ret);
- else
- dd->requested_intx_irq = 1;
- return ret;
-}
-
-static int request_msix_irqs(struct hfi1_devdata *dd)
-{
- int first_general, last_general;
- int first_sdma, last_sdma;
- int first_rx, last_rx;
- int i, ret = 0;
-
- /* calculate the ranges we are going to use */
- first_general = 0;
- last_general = first_general + 1;
- first_sdma = last_general;
- last_sdma = first_sdma + dd->num_sdma;
- first_rx = last_sdma;
- last_rx = first_rx + dd->n_krcv_queues;
-
- /*
- * Sanity check - the code expects all SDMA chip source
- * interrupts to be in the same CSR, starting at bit 0. Verify
- * that this is true by checking the bit location of the start.
- */
- BUILD_BUG_ON(IS_SDMA_START % 64);
-
- for (i = 0; i < dd->num_msix_entries; i++) {
- struct hfi1_msix_entry *me = &dd->msix_entries[i];
- const char *err_info;
- irq_handler_t handler;
- irq_handler_t thread = NULL;
- void *arg;
- int idx;
- struct hfi1_ctxtdata *rcd = NULL;
- struct sdma_engine *sde = NULL;
-
- /* obtain the arguments to request_irq */
- if (first_general <= i && i < last_general) {
- idx = i - first_general;
- handler = general_interrupt;
- arg = dd;
- snprintf(me->name, sizeof(me->name),
- DRIVER_NAME "_%d", dd->unit);
- err_info = "general";
- me->type = IRQ_GENERAL;
- } else if (first_sdma <= i && i < last_sdma) {
- idx = i - first_sdma;
- sde = &dd->per_sdma[idx];
- handler = sdma_interrupt;
- arg = sde;
- snprintf(me->name, sizeof(me->name),
- DRIVER_NAME "_%d sdma%d", dd->unit, idx);
- err_info = "sdma";
- remap_sdma_interrupts(dd, idx, i);
- me->type = IRQ_SDMA;
- } else if (first_rx <= i && i < last_rx) {
- idx = i - first_rx;
- rcd = dd->rcd[idx];
- /* no interrupt if no rcd */
- if (!rcd)
- continue;
- /*
- * Set the interrupt register and mask for this
- * context's interrupt.
- */
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
- rcd->imask = ((u64)1) <<
- ((IS_RCVAVAIL_START + idx) % 64);
- handler = receive_context_interrupt;
- thread = receive_context_thread;
- arg = rcd;
- snprintf(me->name, sizeof(me->name),
- DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
- err_info = "receive context";
- remap_intr(dd, IS_RCVAVAIL_START + idx, i);
- me->type = IRQ_RCVCTXT;
- } else {
- /* not in our expected range - complain, then
- * ignore it
- */
- dd_dev_err(dd,
- "Unexpected extra MSI-X interrupt %d\n", i);
- continue;
- }
- /* no argument, no interrupt */
- if (!arg)
- continue;
- /* make sure the name is terminated */
- me->name[sizeof(me->name) - 1] = 0;
-
- ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
- me->name, arg);
- if (ret) {
- dd_dev_err(dd,
- "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
- err_info, me->msix.vector, idx, ret);
- return ret;
- }
- /*
- * assign arg after request_irq call, so it will be
- * cleaned up
- */
- me->arg = arg;
-
- ret = hfi1_get_irq_affinity(dd, me);
- if (ret)
- dd_dev_err(dd,
- "unable to pin IRQ %d\n", ret);
- }
-
- return ret;
+ remap_intr(dd, IS_SDMA_START + engine, msix_intr);
+ remap_intr(dd, IS_SDMA_PROGRESS_START + engine, msix_intr);
+ remap_intr(dd, IS_SDMA_IDLE_START + engine, msix_intr);
}
/*
* Set the general handler to accept all interrupts, remap all
* chip interrupts back to MSI-X 0.
*/
-static void reset_interrupts(struct hfi1_devdata *dd)
+void reset_interrupts(struct hfi1_devdata *dd)
{
int i;
@@ -12835,77 +13300,33 @@ static void reset_interrupts(struct hfi1_devdata *dd)
write_csr(dd, CCE_INT_MAP + (8 * i), 0);
}
+/**
+ * set_up_interrupts() - Initialize the IRQ resources and state
+ * @dd: valid devdata
+ *
+ */
static int set_up_interrupts(struct hfi1_devdata *dd)
{
- struct hfi1_msix_entry *entries;
- u32 total, request;
- int i, ret;
- int single_interrupt = 0; /* we expect to have all the interrupts */
-
- /*
- * Interrupt count:
- * 1 general, "slow path" interrupt (includes the SDMA engines
- * slow source, SDMACleanupDone)
- * N interrupts - one per used SDMA engine
- * M interrupt - one per kernel receive context
- */
- total = 1 + dd->num_sdma + dd->n_krcv_queues;
-
- entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
- if (!entries) {
- ret = -ENOMEM;
- goto fail;
- }
- /* 1-1 MSI-X entry assignment */
- for (i = 0; i < total; i++)
- entries[i].msix.entry = i;
-
- /* ask for MSI-X interrupts */
- request = total;
- request_msix(dd, &request, entries);
-
- if (request == 0) {
- /* using INTx */
- /* dd->num_msix_entries already zero */
- kfree(entries);
- single_interrupt = 1;
- dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
- } else {
- /* using MSI-X */
- dd->num_msix_entries = request;
- dd->msix_entries = entries;
-
- if (request != total) {
- /* using MSI-X, with reduced interrupts */
- dd_dev_err(
- dd,
- "cannot handle reduced interrupt case, want %u, got %u\n",
- total, request);
- ret = -EINVAL;
- goto fail;
- }
- dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
- }
+ int ret;
/* mask all interrupts */
- set_intr_state(dd, 0);
+ set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
+
/* clear all pending interrupts */
clear_all_interrupts(dd);
/* reset general handler mask, chip MSI-X mappings */
reset_interrupts(dd);
- if (single_interrupt)
- ret = request_intx_irq(dd);
- else
- ret = request_msix_irqs(dd);
+ /* ask for MSI-X interrupts */
+ ret = msix_initialize(dd);
if (ret)
- goto fail;
+ return ret;
- return 0;
+ ret = msix_request_irqs(dd);
+ if (ret)
+ msix_clean_up_interrupts(dd);
-fail:
- clean_up_interrupts(dd);
return ret;
}
@@ -12914,18 +13335,22 @@ fail:
*
* num_rcv_contexts - number of contexts being used
* n_krcv_queues - number of kernel contexts
- * first_user_ctxt - first non-kernel context in array of contexts
+ * first_dyn_alloc_ctxt - first dynamically allocated context
+ * in array of contexts
* freectxts - number of free user contexts
* num_send_contexts - number of PIO send contexts being used
+ * num_netdev_contexts - number of contexts reserved for netdev
*/
static int set_up_context_variables(struct hfi1_devdata *dd)
{
unsigned long num_kernel_contexts;
- int total_contexts;
+ u16 num_netdev_contexts;
int ret;
unsigned ngroups;
- int qos_rmt_count;
- int user_rmt_reduced;
+ int rmt_count;
+ u32 n_usr_ctxts;
+ u32 send_contexts = chip_send_contexts(dd);
+ u32 rcv_contexts = chip_rcv_contexts(dd);
/*
* Kernel receive contexts:
@@ -12947,62 +13372,85 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
* Every kernel receive context needs an ACK send context.
* one send context is allocated for each VL{0-7} and VL15
*/
- if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
+ if (num_kernel_contexts > (send_contexts - num_vls - 1)) {
dd_dev_err(dd,
"Reducing # kernel rcv contexts to: %d, from %lu\n",
- (int)(dd->chip_send_contexts - num_vls - 1),
+ send_contexts - num_vls - 1,
num_kernel_contexts);
- num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
+ num_kernel_contexts = send_contexts - num_vls - 1;
}
+
/*
* User contexts:
* - default to 1 user context per real (non-HT) CPU core if
* num_user_contexts is negative
*/
if (num_user_contexts < 0)
- num_user_contexts =
- cpumask_weight(&node_affinity.real_cpu_mask);
-
- total_contexts = num_kernel_contexts + num_user_contexts;
-
+ n_usr_ctxts = cpumask_weight(&node_affinity.real_cpu_mask);
+ else
+ n_usr_ctxts = num_user_contexts;
/*
* Adjust the counts given a global max.
*/
- if (total_contexts > dd->chip_rcv_contexts) {
+ if (num_kernel_contexts + n_usr_ctxts > rcv_contexts) {
dd_dev_err(dd,
- "Reducing # user receive contexts to: %d, from %d\n",
- (int)(dd->chip_rcv_contexts - num_kernel_contexts),
- (int)num_user_contexts);
- num_user_contexts = dd->chip_rcv_contexts - num_kernel_contexts;
+ "Reducing # user receive contexts to: %u, from %u\n",
+ (u32)(rcv_contexts - num_kernel_contexts),
+ n_usr_ctxts);
/* recalculate */
- total_contexts = num_kernel_contexts + num_user_contexts;
+ n_usr_ctxts = rcv_contexts - num_kernel_contexts;
}
- /* each user context requires an entry in the RMT */
- qos_rmt_count = qos_rmt_entries(dd, NULL, NULL);
- if (qos_rmt_count + num_user_contexts > NUM_MAP_ENTRIES) {
- user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count;
- dd_dev_err(dd,
- "RMT size is reducing the number of user receive contexts from %d to %d\n",
- (int)num_user_contexts,
- user_rmt_reduced);
- /* recalculate */
- num_user_contexts = user_rmt_reduced;
- total_contexts = num_kernel_contexts + num_user_contexts;
+ num_netdev_contexts =
+ hfi1_num_netdev_contexts(dd, rcv_contexts -
+ (num_kernel_contexts + n_usr_ctxts),
+ &node_affinity.real_cpu_mask);
+ /*
+ * RMT entries are allocated as follows:
+ * 1. QOS (0 to 128 entries)
+ * 2. FECN (num_kernel_context - 1 [a] + num_user_contexts +
+ * num_netdev_contexts [b])
+ * 3. netdev (NUM_NETDEV_MAP_ENTRIES)
+ *
+ * Notes:
+ * [a] Kernel contexts (except control) are included in FECN if kernel
+ * TID_RDMA is active.
+ * [b] Netdev and user contexts are randomly allocated from the same
+ * context pool, so FECN must cover all contexts in the pool.
+ */
+ rmt_count = qos_rmt_entries(num_kernel_contexts - 1, NULL, NULL)
+ + (HFI1_CAP_IS_KSET(TID_RDMA) ? num_kernel_contexts - 1
+ : 0)
+ + n_usr_ctxts
+ + num_netdev_contexts
+ + NUM_NETDEV_MAP_ENTRIES;
+ if (rmt_count > NUM_MAP_ENTRIES) {
+ int over = rmt_count - NUM_MAP_ENTRIES;
+ /* try to squish user contexts, minimum of 1 */
+ if (over >= n_usr_ctxts) {
+ dd_dev_err(dd, "RMT overflow: reduce the requested number of contexts\n");
+ return -EINVAL;
+ }
+ dd_dev_err(dd, "RMT overflow: reducing # user contexts from %u to %u\n",
+ n_usr_ctxts, n_usr_ctxts - over);
+ n_usr_ctxts -= over;
}
- /* the first N are kernel contexts, the rest are user contexts */
- dd->num_rcv_contexts = total_contexts;
+ /* the first N are kernel contexts, the rest are user/netdev contexts */
+ dd->num_rcv_contexts =
+ num_kernel_contexts + n_usr_ctxts + num_netdev_contexts;
dd->n_krcv_queues = num_kernel_contexts;
- dd->first_user_ctxt = num_kernel_contexts;
- dd->num_user_contexts = num_user_contexts;
- dd->freectxts = num_user_contexts;
+ dd->first_dyn_alloc_ctxt = num_kernel_contexts;
+ dd->num_netdev_contexts = num_netdev_contexts;
+ dd->num_user_contexts = n_usr_ctxts;
+ dd->freectxts = n_usr_ctxts;
dd_dev_info(dd,
- "rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
- (int)dd->chip_rcv_contexts,
+ "rcv contexts: chip %d, used %d (kernel %d, netdev %u, user %u)\n",
+ rcv_contexts,
(int)dd->num_rcv_contexts,
(int)dd->n_krcv_queues,
- (int)dd->num_rcv_contexts - dd->n_krcv_queues);
+ dd->num_netdev_contexts,
+ dd->num_user_contexts);
/*
* Receive array allocation:
@@ -13016,7 +13464,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
* contexts.
*/
dd->rcv_entries.group_size = RCV_INCREMENT;
- ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size;
+ ngroups = chip_rcv_array_count(dd) / dd->rcv_entries.group_size;
dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts;
dd->rcv_entries.nctxt_extra = ngroups -
(dd->num_rcv_contexts * dd->rcv_entries.ngroups);
@@ -13041,7 +13489,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
dd_dev_info(
dd,
"send contexts: chip %d, used %d (kernel %d, ack %d, user %d, vl15 %d)\n",
- dd->chip_send_contexts,
+ send_contexts,
dd->num_send_contexts,
dd->sc_sizes[SC_KERNEL].count,
dd->sc_sizes[SC_ACK].count,
@@ -13099,7 +13547,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
write_csr(dd, CCE_INT_MAP + (8 * i), 0);
/* SendCtxtCreditReturnAddr */
- for (i = 0; i < dd->chip_send_contexts; i++)
+ for (i = 0; i < chip_send_contexts(dd); i++)
write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
/* PIO Send buffers */
@@ -13112,7 +13560,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
/* RcvHdrAddr */
/* RcvHdrTailAddr */
/* RcvTidFlowTable */
- for (i = 0; i < dd->chip_rcv_contexts; i++) {
+ for (i = 0; i < chip_rcv_contexts(dd); i++) {
write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
@@ -13120,9 +13568,8 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
}
/* RcvArray */
- for (i = 0; i < dd->chip_rcv_array_count; i++)
- write_csr(dd, RCV_ARRAY + (8 * i),
- RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
+ for (i = 0; i < chip_rcv_array_count(dd); i++)
+ hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0);
/* RcvQPMapTable */
for (i = 0; i < 32; i++)
@@ -13279,7 +13726,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
write_csr(dd, SEND_LOW_PRIORITY_LIST + (8 * i), 0);
for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8 * i), 0);
- for (i = 0; i < dd->chip_send_contexts / NUM_CONTEXTS_PER_SET; i++)
+ for (i = 0; i < chip_send_contexts(dd) / NUM_CONTEXTS_PER_SET; i++)
write_csr(dd, SEND_CONTEXT_SET_CTRL + (8 * i), 0);
for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
write_csr(dd, SEND_COUNTER_ARRAY32 + (8 * i), 0);
@@ -13307,7 +13754,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
/*
* TXE Per-Context CSRs
*/
- for (i = 0; i < dd->chip_send_contexts; i++) {
+ for (i = 0; i < chip_send_contexts(dd); i++) {
write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0);
write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
@@ -13325,7 +13772,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
/*
* TXE Per-SDMA CSRs
*/
- for (i = 0; i < dd->chip_sdma_engines; i++) {
+ for (i = 0; i < chip_sdma_engines(dd); i++) {
write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
/* SEND_DMA_STATUS read-only */
write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0);
@@ -13450,18 +13897,15 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd)
write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0);
for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++)
write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0);
- for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) {
- write_csr(dd, RCV_RSM_CFG + (8 * i), 0);
- write_csr(dd, RCV_RSM_SELECT + (8 * i), 0);
- write_csr(dd, RCV_RSM_MATCH + (8 * i), 0);
- }
+ for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++)
+ clear_rsm_rule(dd, i);
for (i = 0; i < 32; i++)
write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0);
/*
* RXE Kernel and User Per-Context CSRs
*/
- for (i = 0; i < dd->chip_rcv_contexts; i++) {
+ for (i = 0; i < chip_rcv_contexts(dd); i++) {
/* kernel */
write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0);
/* RCV_CTXT_STATUS read-only */
@@ -13559,9 +14003,10 @@ static void init_sc2vl_tables(struct hfi1_devdata *dd)
* a reset following the (possible) FLR in this routine.
*
*/
-static void init_chip(struct hfi1_devdata *dd)
+static int init_chip(struct hfi1_devdata *dd)
{
int i;
+ int ret = 0;
/*
* Put the HFI CSRs in a known state.
@@ -13576,13 +14021,13 @@ static void init_chip(struct hfi1_devdata *dd)
/* disable send contexts and SDMA engines */
write_csr(dd, SEND_CTRL, 0);
- for (i = 0; i < dd->chip_send_contexts; i++)
+ for (i = 0; i < chip_send_contexts(dd); i++)
write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
- for (i = 0; i < dd->chip_sdma_engines; i++)
+ for (i = 0; i < chip_sdma_engines(dd); i++)
write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
/* disable port (turn off RXE inbound traffic) and contexts */
write_csr(dd, RCV_CTRL, 0);
- for (i = 0; i < dd->chip_rcv_contexts; i++)
+ for (i = 0; i < chip_rcv_contexts(dd); i++)
write_csr(dd, RCV_CTXT_CTRL, 0);
/* mask all interrupt sources */
for (i = 0; i < CCE_NUM_INT_CSRS; i++)
@@ -13606,15 +14051,25 @@ static void init_chip(struct hfi1_devdata *dd)
dd_dev_info(dd, "Resetting CSRs with FLR\n");
/* do the FLR, the DC reset will remain */
- hfi1_pcie_flr(dd);
+ pcie_flr(dd->pcidev);
/* restore command and BARs */
- restore_pci_variables(dd);
+ ret = restore_pci_variables(dd);
+ if (ret) {
+ dd_dev_err(dd, "%s: Could not restore PCI variables\n",
+ __func__);
+ return ret;
+ }
if (is_ax(dd)) {
dd_dev_info(dd, "Resetting CSRs with FLR\n");
- hfi1_pcie_flr(dd);
- restore_pci_variables(dd);
+ pcie_flr(dd->pcidev);
+ ret = restore_pci_variables(dd);
+ if (ret) {
+ dd_dev_err(dd, "%s: Could not restore PCI variables\n",
+ __func__);
+ return ret;
+ }
}
} else {
dd_dev_info(dd, "Resetting CSRs with writes\n");
@@ -13642,6 +14097,7 @@ static void init_chip(struct hfi1_devdata *dd)
write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
init_chip_resources(dd);
+ return ret;
}
static void init_early_variables(struct hfi1_devdata *dd)
@@ -13672,29 +14128,33 @@ static void init_early_variables(struct hfi1_devdata *dd)
static void init_kdeth_qp(struct hfi1_devdata *dd)
{
- /* user changed the KDETH_QP */
- if (kdeth_qp != 0 && kdeth_qp >= 0xff) {
- /* out of range or illegal value */
- dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring");
- kdeth_qp = 0;
- }
- if (kdeth_qp == 0) /* not set, or failed range check */
- kdeth_qp = DEFAULT_KDETH_QP;
-
write_csr(dd, SEND_BTH_QP,
- (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK) <<
+ (RVT_KDETH_QP_PREFIX & SEND_BTH_QP_KDETH_QP_MASK) <<
SEND_BTH_QP_KDETH_QP_SHIFT);
write_csr(dd, RCV_BTH_QP,
- (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK) <<
+ (RVT_KDETH_QP_PREFIX & RCV_BTH_QP_KDETH_QP_MASK) <<
RCV_BTH_QP_KDETH_QP_SHIFT);
}
/**
- * init_qpmap_table
- * @dd - device data
- * @first_ctxt - first context
- * @last_ctxt - first context
+ * hfi1_get_qp_map - get qp map
+ * @dd: device data
+ * @idx: index to read
+ */
+u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx)
+{
+ u64 reg = read_csr(dd, RCV_QP_MAP_TABLE + (idx / 8) * 8);
+
+ reg >>= (idx % 8) * 8;
+ return reg;
+}
+
+/**
+ * init_qpmap_table - init qp map
+ * @dd: device data
+ * @first_ctxt: first context
+ * @last_ctxt: first context
*
* This return sets the qpn mapping table that
* is indexed by qpn[8:1].
@@ -13789,6 +14249,12 @@ static void complete_rsm_map_table(struct hfi1_devdata *dd,
}
}
+/* Is a receive side mapping rule */
+static bool has_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
+{
+ return read_csr(dd, RCV_RSM_CFG + (8 * rule_index)) != 0;
+}
+
/*
* Add a receive side mapping rule.
*/
@@ -13813,16 +14279,26 @@ static void add_rsm_rule(struct hfi1_devdata *dd, u8 rule_index,
(u64)rrd->value2 << RCV_RSM_MATCH_VALUE2_SHIFT);
}
+/*
+ * Clear a receive side mapping rule.
+ */
+static void clear_rsm_rule(struct hfi1_devdata *dd, u8 rule_index)
+{
+ write_csr(dd, RCV_RSM_CFG + (8 * rule_index), 0);
+ write_csr(dd, RCV_RSM_SELECT + (8 * rule_index), 0);
+ write_csr(dd, RCV_RSM_MATCH + (8 * rule_index), 0);
+}
+
/* return the number of RSM map table entries that will be used for QOS */
-static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
+static int qos_rmt_entries(unsigned int n_krcv_queues, unsigned int *mp,
unsigned int *np)
{
int i;
unsigned int m, n;
- u8 max_by_vl = 0;
+ uint max_by_vl = 0;
/* is QOS active at all? */
- if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
+ if (n_krcv_queues < MIN_KERNEL_KCTXTS ||
num_vls == 1 ||
krcvqsset <= 1)
goto no_qos;
@@ -13859,8 +14335,8 @@ no_qos:
/**
* init_qos - init RX qos
- * @dd - device data
- * @rmt - RSM map table
+ * @dd: device data
+ * @rmt: RSM map table
*
* This routine initializes Rule 0 and the RSM map table to implement
* quality of service (qos).
@@ -13880,7 +14356,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
if (!rmt)
goto bail;
- rmt_entries = qos_rmt_entries(dd, &m, &n);
+ rmt_entries = qos_rmt_entries(dd->n_krcv_queues - 1, &m, &n);
if (rmt_entries == 0)
goto bail;
qpns_per_vl = 1 << m;
@@ -13890,7 +14366,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
if (rmt->used + rmt_entries >= NUM_MAP_ENTRIES)
goto bail;
- /* add qos entries to the the RSM map table */
+ /* add qos entries to the RSM map table */
for (i = 0, ctxt = FIRST_KERNEL_KCTXT; i < num_vls; i++) {
unsigned tctxt;
@@ -13928,7 +14404,7 @@ static void init_qos(struct hfi1_devdata *dd, struct rsm_map_table *rmt)
rrd.value2 = LRH_SC_VALUE;
/* add rule 0 */
- add_rsm_rule(dd, 0, &rrd);
+ add_rsm_rule(dd, RSM_INS_VERBS, &rrd);
/* mark RSM map entries as used */
rmt->used += rmt_entries;
@@ -13941,35 +14417,43 @@ bail:
init_qpmap_table(dd, FIRST_KERNEL_KCTXT, dd->n_krcv_queues - 1);
}
-static void init_user_fecn_handling(struct hfi1_devdata *dd,
- struct rsm_map_table *rmt)
+static void init_fecn_handling(struct hfi1_devdata *dd,
+ struct rsm_map_table *rmt)
{
struct rsm_rule_data rrd;
u64 reg;
- int i, idx, regoff, regidx;
+ int i, idx, regoff, regidx, start;
u8 offset;
+ u32 total_cnt;
+
+ if (HFI1_CAP_IS_KSET(TID_RDMA))
+ /* Exclude context 0 */
+ start = 1;
+ else
+ start = dd->first_dyn_alloc_ctxt;
+
+ total_cnt = dd->num_rcv_contexts - start;
/* there needs to be enough room in the map table */
- if (rmt->used + dd->num_user_contexts >= NUM_MAP_ENTRIES) {
- dd_dev_err(dd, "User FECN handling disabled - too many user contexts allocated\n");
+ if (rmt->used + total_cnt >= NUM_MAP_ENTRIES) {
+ dd_dev_err(dd, "FECN handling disabled - too many contexts allocated\n");
return;
}
/*
* RSM will extract the destination context as an index into the
* map table. The destination contexts are a sequential block
- * in the range first_user_ctxt...num_rcv_contexts-1 (inclusive).
+ * in the range start...num_rcv_contexts-1 (inclusive).
* Map entries are accessed as offset + extracted value. Adjust
* the added offset so this sequence can be placed anywhere in
* the table - as long as the entries themselves do not wrap.
* There are only enough bits in offset for the table size, so
* start with that to allow for a "negative" offset.
*/
- offset = (u8)(NUM_MAP_ENTRIES + (int)rmt->used -
- (int)dd->first_user_ctxt);
+ offset = (u8)(NUM_MAP_ENTRIES + rmt->used - start);
- for (i = dd->first_user_ctxt, idx = rmt->used;
- i < dd->num_rcv_contexts; i++, idx++) {
+ for (i = start, idx = rmt->used; i < dd->num_rcv_contexts;
+ i++, idx++) {
/* replace with identity mapping */
regoff = (idx % 8) * 8;
regidx = idx / 8;
@@ -14002,23 +14486,163 @@ static void init_user_fecn_handling(struct hfi1_devdata *dd,
rrd.value2 = 1;
/* add rule 1 */
- add_rsm_rule(dd, 1, &rrd);
+ add_rsm_rule(dd, RSM_INS_FECN, &rrd);
+
+ rmt->used += total_cnt;
+}
+
+static inline bool hfi1_is_rmt_full(int start, int spare)
+{
+ return (start + spare) > NUM_MAP_ENTRIES;
+}
+
+static bool hfi1_netdev_update_rmt(struct hfi1_devdata *dd)
+{
+ u8 i, j;
+ u8 ctx_id = 0;
+ u64 reg;
+ u32 regoff;
+ int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
+ int ctxt_count = hfi1_netdev_ctxt_count(dd);
+
+ /* We already have contexts mapped in RMT */
+ if (has_rsm_rule(dd, RSM_INS_VNIC) || has_rsm_rule(dd, RSM_INS_AIP)) {
+ dd_dev_info(dd, "Contexts are already mapped in RMT\n");
+ return true;
+ }
+
+ if (hfi1_is_rmt_full(rmt_start, NUM_NETDEV_MAP_ENTRIES)) {
+ dd_dev_err(dd, "Not enough RMT entries used = %d\n",
+ rmt_start);
+ return false;
+ }
+
+ dev_dbg(&(dd)->pcidev->dev, "RMT start = %d, end %d\n",
+ rmt_start,
+ rmt_start + NUM_NETDEV_MAP_ENTRIES);
+
+ /* Update RSM mapping table, 32 regs, 256 entries - 1 ctx per byte */
+ regoff = RCV_RSM_MAP_TABLE + (rmt_start / 8) * 8;
+ reg = read_csr(dd, regoff);
+ for (i = 0; i < NUM_NETDEV_MAP_ENTRIES; i++) {
+ /* Update map register with netdev context */
+ j = (rmt_start + i) % 8;
+ reg &= ~(0xffllu << (j * 8));
+ reg |= (u64)hfi1_netdev_get_ctxt(dd, ctx_id++)->ctxt << (j * 8);
+ /* Wrap up netdev ctx index */
+ ctx_id %= ctxt_count;
+ /* Write back map register */
+ if (j == 7 || ((i + 1) == NUM_NETDEV_MAP_ENTRIES)) {
+ dev_dbg(&(dd)->pcidev->dev,
+ "RMT[%d] =0x%llx\n",
+ regoff - RCV_RSM_MAP_TABLE, reg);
+
+ write_csr(dd, regoff, reg);
+ regoff += 8;
+ if (i < (NUM_NETDEV_MAP_ENTRIES - 1))
+ reg = read_csr(dd, regoff);
+ }
+ }
+
+ return true;
+}
+
+static void hfi1_enable_rsm_rule(struct hfi1_devdata *dd,
+ int rule, struct rsm_rule_data *rrd)
+{
+ if (!hfi1_netdev_update_rmt(dd)) {
+ dd_dev_err(dd, "Failed to update RMT for RSM%d rule\n", rule);
+ return;
+ }
+
+ add_rsm_rule(dd, rule, rrd);
+ add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
+}
+
+void hfi1_init_aip_rsm(struct hfi1_devdata *dd)
+{
+ /*
+ * go through with the initialisation only if this rule actually doesn't
+ * exist yet
+ */
+ if (atomic_fetch_inc(&dd->ipoib_rsm_usr_num) == 0) {
+ int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
+ struct rsm_rule_data rrd = {
+ .offset = rmt_start,
+ .pkt_type = IB_PACKET_TYPE,
+ .field1_off = LRH_BTH_MATCH_OFFSET,
+ .mask1 = LRH_BTH_MASK,
+ .value1 = LRH_BTH_VALUE,
+ .field2_off = BTH_DESTQP_MATCH_OFFSET,
+ .mask2 = BTH_DESTQP_MASK,
+ .value2 = BTH_DESTQP_VALUE,
+ .index1_off = DETH_AIP_SQPN_SELECT_OFFSET +
+ ilog2(NUM_NETDEV_MAP_ENTRIES),
+ .index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES),
+ .index2_off = DETH_AIP_SQPN_SELECT_OFFSET,
+ .index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES)
+ };
+
+ hfi1_enable_rsm_rule(dd, RSM_INS_AIP, &rrd);
+ }
+}
+
+/* Initialize RSM for VNIC */
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd)
+{
+ int rmt_start = hfi1_netdev_get_free_rmt_idx(dd);
+ struct rsm_rule_data rrd = {
+ /* Add rule for vnic */
+ .offset = rmt_start,
+ .pkt_type = 4,
+ /* Match 16B packets */
+ .field1_off = L2_TYPE_MATCH_OFFSET,
+ .mask1 = L2_TYPE_MASK,
+ .value1 = L2_16B_VALUE,
+ /* Match ETH L4 packets */
+ .field2_off = L4_TYPE_MATCH_OFFSET,
+ .mask2 = L4_16B_TYPE_MASK,
+ .value2 = L4_16B_ETH_VALUE,
+ /* Calc context from veswid and entropy */
+ .index1_off = L4_16B_HDR_VESWID_OFFSET,
+ .index1_width = ilog2(NUM_NETDEV_MAP_ENTRIES),
+ .index2_off = L2_16B_ENTROPY_OFFSET,
+ .index2_width = ilog2(NUM_NETDEV_MAP_ENTRIES)
+ };
+
+ hfi1_enable_rsm_rule(dd, RSM_INS_VNIC, &rrd);
+}
- rmt->used += dd->num_user_contexts;
+void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd)
+{
+ clear_rsm_rule(dd, RSM_INS_VNIC);
+}
+
+void hfi1_deinit_aip_rsm(struct hfi1_devdata *dd)
+{
+ /* only actually clear the rule if it's the last user asking to do so */
+ if (atomic_fetch_add_unless(&dd->ipoib_rsm_usr_num, -1, 0) == 1)
+ clear_rsm_rule(dd, RSM_INS_AIP);
}
-static void init_rxe(struct hfi1_devdata *dd)
+static int init_rxe(struct hfi1_devdata *dd)
{
struct rsm_map_table *rmt;
+ u64 val;
/* enable all receive errors */
write_csr(dd, RCV_ERR_MASK, ~0ull);
rmt = alloc_rsm_map_table(dd);
+ if (!rmt)
+ return -ENOMEM;
+
/* set up QOS, including the QPN map table */
init_qos(dd, rmt);
- init_user_fecn_handling(dd, rmt);
+ init_fecn_handling(dd, rmt);
complete_rsm_map_table(dd, rmt);
+ /* record number of used rsm map entries for netdev */
+ hfi1_netdev_set_free_rmt_idx(dd, rmt->used);
kfree(rmt);
/*
@@ -14032,6 +14656,14 @@ static void init_rxe(struct hfi1_devdata *dd)
* (64 bytes). Max_Payload_Size is possibly modified upward in
* tune_pcie_caps() which is called after this routine.
*/
+
+ /* Have 16 bytes (4DW) of bypass header available in header queue */
+ val = read_csr(dd, RCV_BYPASS);
+ val &= ~RCV_BYPASS_HDR_SIZE_SMASK;
+ val |= ((4ull & RCV_BYPASS_HDR_SIZE_MASK) <<
+ RCV_BYPASS_HDR_SIZE_SHIFT);
+ write_csr(dd, RCV_BYPASS, val);
+ return 0;
}
static void init_other(struct hfi1_devdata *dd)
@@ -14097,9 +14729,9 @@ static void init_txe(struct hfi1_devdata *dd)
write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull);
/* enable all per-context and per-SDMA engine errors */
- for (i = 0; i < dd->chip_send_contexts; i++)
+ for (i = 0; i < chip_send_contexts(dd); i++)
write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull);
- for (i = 0; i < dd->chip_sdma_engines; i++)
+ for (i = 0; i < chip_sdma_engines(dd); i++)
write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull);
/* set the local CU to AU mapping */
@@ -14113,129 +14745,107 @@ static void init_txe(struct hfi1_devdata *dd)
write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE);
}
-int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey)
+int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd,
+ u16 jkey)
{
- struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
- unsigned sctxt;
- int ret = 0;
+ u8 hw_ctxt;
u64 reg;
- if (!rcd || !rcd->sc) {
- ret = -EINVAL;
- goto done;
- }
- sctxt = rcd->sc->hw_context;
+ if (!rcd || !rcd->sc)
+ return -EINVAL;
+
+ hw_ctxt = rcd->sc->hw_context;
reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */
((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) <<
SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT);
/* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */
if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY))
reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
/*
* Enable send-side J_KEY integrity check, unless this is A0 h/w
*/
if (!is_ax(dd)) {
- reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
+ reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg);
}
/* Enable J_KEY check on receive context. */
reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK |
((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) <<
RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT);
- write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg);
-done:
- return ret;
+ write_kctxt_csr(dd, rcd->ctxt, RCV_KEY_CTRL, reg);
+
+ return 0;
}
-int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt)
+int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
- struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
- unsigned sctxt;
- int ret = 0;
+ u8 hw_ctxt;
u64 reg;
- if (!rcd || !rcd->sc) {
- ret = -EINVAL;
- goto done;
- }
- sctxt = rcd->sc->hw_context;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0);
+ if (!rcd || !rcd->sc)
+ return -EINVAL;
+
+ hw_ctxt = rcd->sc->hw_context;
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_JOB_KEY, 0);
/*
* Disable send-side J_KEY integrity check, unless this is A0 h/w.
* This check would not have been enabled for A0 h/w, see
* set_ctxt_jkey().
*/
if (!is_ax(dd)) {
- reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
+ reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg);
}
/* Turn off the J_KEY on the receive side */
- write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0);
-done:
- return ret;
+ write_kctxt_csr(dd, rcd->ctxt, RCV_KEY_CTRL, 0);
+
+ return 0;
}
-int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
+int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd,
+ u16 pkey)
{
- struct hfi1_ctxtdata *rcd;
- unsigned sctxt;
- int ret = 0;
+ u8 hw_ctxt;
u64 reg;
- if (ctxt < dd->num_rcv_contexts) {
- rcd = dd->rcd[ctxt];
- } else {
- ret = -EINVAL;
- goto done;
- }
- if (!rcd || !rcd->sc) {
- ret = -EINVAL;
- goto done;
- }
- sctxt = rcd->sc->hw_context;
+ if (!rcd || !rcd->sc)
+ return -EINVAL;
+
+ hw_ctxt = rcd->sc->hw_context;
reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) <<
SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
- reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
+ reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
reg &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
-done:
- return ret;
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg);
+
+ return 0;
}
-int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
+int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt)
{
- struct hfi1_ctxtdata *rcd;
- unsigned sctxt;
- int ret = 0;
+ u8 hw_ctxt;
u64 reg;
- if (ctxt < dd->num_rcv_contexts) {
- rcd = dd->rcd[ctxt];
- } else {
- ret = -EINVAL;
- goto done;
- }
- if (!rcd || !rcd->sc) {
- ret = -EINVAL;
- goto done;
- }
- sctxt = rcd->sc->hw_context;
- reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
+ if (!ctxt || !ctxt->sc)
+ return -EINVAL;
+
+ hw_ctxt = ctxt->sc->hw_context;
+ reg = read_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE);
reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
- write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
-done:
- return ret;
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_ENABLE, reg);
+ write_kctxt_csr(dd, hw_ctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
+
+ return 0;
}
/*
- * Start doing the clean up the the chip. Our clean up happens in multiple
+ * Start doing the clean up the chip. Our clean up happens in multiple
* stages and this is just the first.
*/
void hfi1_start_cleanup(struct hfi1_devdata *dd)
@@ -14243,7 +14853,6 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd)
aspm_exit(dd);
free_cntrs(dd);
free_rcverr(dd);
- clean_up_interrupts(dd);
finish_chip_resources(dd);
}
@@ -14257,8 +14866,8 @@ void hfi1_start_cleanup(struct hfi1_devdata *dd)
*/
static int init_asic_data(struct hfi1_devdata *dd)
{
- unsigned long flags;
- struct hfi1_devdata *tmp, *peer = NULL;
+ unsigned long index;
+ struct hfi1_devdata *peer;
struct hfi1_asic_data *asic_data;
int ret = 0;
@@ -14267,14 +14876,12 @@ static int init_asic_data(struct hfi1_devdata *dd)
if (!asic_data)
return -ENOMEM;
- spin_lock_irqsave(&hfi1_devs_lock, flags);
+ xa_lock_irq(&hfi1_dev_table);
/* Find our peer device */
- list_for_each_entry(tmp, &hfi1_dev_list, list) {
- if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) &&
- dd->unit != tmp->unit) {
- peer = tmp;
+ xa_for_each(&hfi1_dev_table, index, peer) {
+ if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(peer)) &&
+ dd->unit != peer->unit)
break;
- }
}
if (peer) {
@@ -14286,7 +14893,7 @@ static int init_asic_data(struct hfi1_devdata *dd)
mutex_init(&dd->asic_data->asic_resource_mutex);
}
dd->asic_data->dds[dd->hfi1_id] = dd; /* self back-pointer */
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+ xa_unlock_irq(&hfi1_dev_table);
/* first one through - set up i2c devices */
if (!peer)
@@ -14305,7 +14912,7 @@ static int obtain_boardname(struct hfi1_devdata *dd)
{
/* generic board description */
const char generic[] =
- "Intel Omni-Path Host Fabric Interface Adapter 100 Series";
+ "Cornelis Omni-Path Host Fabric Interface Adapter 100 Series";
unsigned long size;
int ret;
@@ -14366,20 +14973,15 @@ err_exit:
}
/**
- * Allocate and initialize the device structure for the hfi.
- * @dev: the pci_dev for hfi1_ib device
- * @ent: pci_device_id struct for this dev
- *
- * Also allocates, initializes, and returns the devdata struct for this
- * device instance
+ * hfi1_init_dd() - Initialize most of the dd structure.
+ * @dd: the dd device
*
* This is global, and is called directly at init to set up the
* chip-specific function pointers for later use.
*/
-struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+int hfi1_init_dd(struct hfi1_devdata *dd)
{
- struct hfi1_devdata *dd;
+ struct pci_dev *pdev = dd->pcidev;
struct hfi1_pportdata *ppd;
u64 reg;
int i, ret;
@@ -14390,11 +14992,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
"Functional simulator"
};
struct pci_dev *parent = pdev->bus->self;
+ u32 sdma_engines = chip_sdma_engines(dd);
- dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
- sizeof(struct hfi1_pportdata));
- if (IS_ERR(dd))
- goto bail;
ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++) {
int vl;
@@ -14415,14 +15014,12 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (num_vls < HFI1_MIN_VLS_SUPPORTED ||
num_vls > HFI1_MAX_VLS_SUPPORTED) {
- hfi1_early_err(&pdev->dev,
- "Invalid num_vls %u, using %u VLs\n",
- num_vls, HFI1_MAX_VLS_SUPPORTED);
+ dd_dev_err(dd, "Invalid num_vls %u, using %u VLs\n",
+ num_vls, HFI1_MAX_VLS_SUPPORTED);
num_vls = HFI1_MAX_VLS_SUPPORTED;
}
ppd->vls_supported = num_vls;
ppd->vls_operational = ppd->vls_supported;
- ppd->actual_vls_operational = ppd->vls_supported;
/* Set the default MTU. */
for (vl = 0; vl < num_vls; vl++)
dd->vld[vl].mtu = hfi1_max_mtu;
@@ -14431,7 +15028,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
* Set the initial values to reasonable default, will be set
* for real when link is up.
*/
- ppd->lstate = IB_PORT_DOWN;
ppd->overrun_threshold = 0x4;
ppd->phy_error_threshold = 0xf;
ppd->port_crc_mode_enabled = link_crc_mask;
@@ -14442,11 +15038,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
/* start in offline */
ppd->host_link_state = HLS_DN_OFFLINE;
init_vl_arb_caches(ppd);
- ppd->last_pstate = 0xff; /* invalid value */
}
- dd->link_default = HLS_DN_POLL;
-
/*
* Do remaining PCIe setup and save PCIe values in dd.
* Any error printing is already done by the init code.
@@ -14456,13 +15049,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret < 0)
goto bail_free;
- /* verify that reads actually work, save revision for reset check */
- dd->revision = read_csr(dd, CCE_REVISION);
- if (dd->revision == ~(u64)0) {
- dd_dev_err(dd, "cannot read chip CSRs\n");
- ret = -EINVAL;
+ /* Save PCI space registers to rewrite after device reset */
+ ret = save_pci_variables(dd);
+ if (ret < 0)
goto bail_cleanup;
- }
+
dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT)
& CCE_REVISION_CHIP_REV_MAJOR_MASK;
dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
@@ -14500,11 +15091,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
/* give a reasonable active value, will be set on link up */
dd->pport->link_speed_active = OPA_LINK_SPEED_25G;
- dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS);
- dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS);
- dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES);
- dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE);
- dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE);
/* fix up link widths for emulation _p */
ppd = dd->pport;
if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) {
@@ -14515,11 +15101,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
OPA_LINK_WIDTH_1X;
}
/* insure num_vls isn't larger than number of sdma engines */
- if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) {
+ if (HFI1_CAP_IS_KSET(SDMA) && num_vls > sdma_engines) {
dd_dev_err(dd, "num_vls %u too large, using %u VLs\n",
- num_vls, dd->chip_sdma_engines);
- num_vls = dd->chip_sdma_engines;
- ppd->vls_supported = dd->chip_sdma_engines;
+ num_vls, sdma_engines);
+ num_vls = sdma_engines;
+ ppd->vls_supported = sdma_engines;
ppd->vls_operational = ppd->vls_supported;
}
@@ -14548,7 +15134,9 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
goto bail_cleanup;
/* obtain chip sizes, reset chip CSRs */
- init_chip(dd);
+ ret = init_chip(dd);
+ if (ret)
+ goto bail_cleanup;
/* read in the PCIe link speed information */
ret = pcie_speeds(dd);
@@ -14584,6 +15172,12 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_cleanup;
+ /*
+ * This should probably occur in hfi1_pcie_init(), but historically
+ * occurs after the do_pcie_gen3_transition() code.
+ */
+ tune_pcie_caps(dd);
+
/* start setting dd values and adjusting CSRs */
init_early_variables(dd);
@@ -14601,12 +15195,20 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
(dd->revision >> CCE_REVISION_SW_SHIFT)
& CCE_REVISION_SW_MASK);
+ /* alloc VNIC/AIP rx data */
+ ret = hfi1_alloc_rx(dd);
+ if (ret)
+ goto bail_cleanup;
+
ret = set_up_context_variables(dd);
if (ret)
goto bail_cleanup;
/* set initial RXE CSRs */
- init_rxe(dd);
+ ret = init_rxe(dd);
+ if (ret)
+ goto bail_cleanup;
+
/* set initial TXE CSRs */
init_txe(dd);
/* set initial non-RXE, non-TXE CSRs */
@@ -14623,16 +15225,15 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_cleanup;
- ret = hfi1_create_ctxts(dd);
+ ret = hfi1_create_kctxts(dd);
if (ret)
goto bail_cleanup;
- dd->rcvhdrsize = DEFAULT_RCVHDRSIZE;
/*
- * rcd[0] is guaranteed to be valid by this point. Also, all
- * context are using the same value, as per the module parameter.
+ * Initialize aspm, to be done after gen3 transition and setting up
+ * contexts and before enabling interrupts
*/
- dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
+ aspm_init(dd);
ret = init_pervl_scs(dd);
if (ret)
@@ -14645,11 +15246,15 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
goto bail_cleanup;
}
- /* use contexts created by hfi1_create_ctxts */
+ /* use contexts created by hfi1_create_kctxts */
ret = set_up_interrupts(dd);
if (ret)
goto bail_cleanup;
+ ret = hfi1_comp_vectors_set_up(dd);
+ if (ret)
+ goto bail_clear_intr;
+
/* set up LCB access - must be after set_up_interrupts() */
init_lcb_access(dd);
@@ -14683,7 +15288,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
init_completion(&dd->user_comp);
/* The user refcount starts with one to inidicate an active device */
- atomic_set(&dd->user_refcount, 1);
+ refcount_set(&dd->user_refcount, 1);
goto bail;
@@ -14692,14 +15297,15 @@ bail_free_rcverr:
bail_free_cntrs:
free_cntrs(dd);
bail_clear_intr:
- clean_up_interrupts(dd);
+ hfi1_comp_vectors_clean_up(dd);
+ msix_clean_up_interrupts(dd);
bail_cleanup:
+ hfi1_free_rx(dd);
hfi1_pcie_ddcleanup(dd);
bail_free:
hfi1_free_devdata(dd);
- dd = ERR_PTR(ret);
bail:
- return dd;
+ return ret;
}
static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
@@ -14723,10 +15329,11 @@ static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
/**
* create_pbc - build a pbc for transmission
+ * @ppd: info of physical Hfi port
* @flags: special case flags or-ed in built pbc
- * @srate: static rate
+ * @srate_mbs: static rate
* @vl: vl
- * @dwlen: dword length (header words + data words + pbc words)
+ * @dw_len: dword length (header words + data words + pbc words)
*
* Create a PBC with the given flags, rate, VL, and length.
*
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 043fd21dc5f3..6992f6d40255 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -1,60 +1,16 @@
-#ifndef _CHIP_H
-#define _CHIP_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*/
+#ifndef _CHIP_H
+#define _CHIP_H
/*
* This file contains all of the defines that is specific to the HFI chip
*/
/* sizes */
-#define CCE_NUM_MSIX_VECTORS 256
-#define CCE_NUM_INT_CSRS 12
-#define CCE_NUM_INT_MAP_CSRS 96
+#define BITS_PER_REGISTER (BITS_PER_BYTE * sizeof(u64))
#define NUM_INTERRUPT_SOURCES 768
#define RXE_NUM_CONTEXTS 160
#define RXE_PER_CONTEXT_SIZE 0x1000 /* 4k */
@@ -161,34 +117,49 @@
(CR_CREDIT_RETURN_DUE_TO_FORCE_MASK << \
CR_CREDIT_RETURN_DUE_TO_FORCE_SHIFT)
-/* interrupt source numbers */
-#define IS_GENERAL_ERR_START 0
-#define IS_SDMAENG_ERR_START 16
-#define IS_SENDCTXT_ERR_START 32
-#define IS_SDMA_START 192 /* includes SDmaProgress,SDmaIdle */
+/* Specific IRQ sources */
+#define CCE_ERR_INT 0
+#define RXE_ERR_INT 1
+#define MISC_ERR_INT 2
+#define PIO_ERR_INT 4
+#define SDMA_ERR_INT 5
+#define EGRESS_ERR_INT 6
+#define TXE_ERR_INT 7
+#define PBC_INT 240
+#define GPIO_ASSERT_INT 241
+#define QSFP1_INT 242
+#define QSFP2_INT 243
+#define TCRIT_INT 244
+
+/* interrupt source ranges */
+#define IS_FIRST_SOURCE CCE_ERR_INT
+#define IS_GENERAL_ERR_START 0
+#define IS_SDMAENG_ERR_START 16
+#define IS_SENDCTXT_ERR_START 32
+#define IS_SDMA_START 192
+#define IS_SDMA_PROGRESS_START 208
+#define IS_SDMA_IDLE_START 224
#define IS_VARIOUS_START 240
#define IS_DC_START 248
#define IS_RCVAVAIL_START 256
#define IS_RCVURGENT_START 416
#define IS_SENDCREDIT_START 576
#define IS_RESERVED_START 736
-#define IS_MAX_SOURCES 768
+#define IS_LAST_SOURCE 767
/* derived interrupt source values */
-#define IS_GENERAL_ERR_END IS_SDMAENG_ERR_START
-#define IS_SDMAENG_ERR_END IS_SENDCTXT_ERR_START
-#define IS_SENDCTXT_ERR_END IS_SDMA_START
-#define IS_SDMA_END IS_VARIOUS_START
-#define IS_VARIOUS_END IS_DC_START
-#define IS_DC_END IS_RCVAVAIL_START
-#define IS_RCVAVAIL_END IS_RCVURGENT_START
-#define IS_RCVURGENT_END IS_SENDCREDIT_START
-#define IS_SENDCREDIT_END IS_RESERVED_START
-#define IS_RESERVED_END IS_MAX_SOURCES
-
-/* absolute interrupt numbers for QSFP1Int and QSFP2Int */
-#define QSFP1_INT 242
-#define QSFP2_INT 243
+#define IS_GENERAL_ERR_END 7
+#define IS_SDMAENG_ERR_END 31
+#define IS_SENDCTXT_ERR_END 191
+#define IS_SDMA_END 207
+#define IS_SDMA_PROGRESS_END 223
+#define IS_SDMA_IDLE_END 239
+#define IS_VARIOUS_END 244
+#define IS_DC_END 255
+#define IS_RCVAVAIL_END 415
+#define IS_RCVURGENT_END 575
+#define IS_SENDCREDIT_END 735
+#define IS_RESERVED_END IS_LAST_SOURCE
/* DCC_CFG_PORT_CONFIG logical link states */
#define LSTATE_DOWN 0x1
@@ -196,6 +167,15 @@
#define LSTATE_ARMED 0x3
#define LSTATE_ACTIVE 0x4
+/* DCC_CFG_RESET reset states */
+#define LCB_RX_FPE_TX_FPE_INTO_RESET (DCC_CFG_RESET_RESET_LCB | \
+ DCC_CFG_RESET_RESET_TX_FPE | \
+ DCC_CFG_RESET_RESET_RX_FPE | \
+ DCC_CFG_RESET_ENABLE_CCLK_BCC)
+ /* 0x17 */
+
+#define LCB_RX_FPE_TX_FPE_OUT_OF_RESET DCC_CFG_RESET_ENABLE_CCLK_BCC /* 0x10 */
+
/* DC8051_STS_CUR_STATE port values (physical link states) */
#define PLS_DISABLED 0x30
#define PLS_OFFLINE 0x90
@@ -204,6 +184,7 @@
#define PLS_OFFLINE_READY_TO_QUIET_LT 0x92
#define PLS_OFFLINE_REPORT_FAILURE 0x93
#define PLS_OFFLINE_READY_TO_QUIET_BCC 0x94
+#define PLS_OFFLINE_QUIET_DURATION 0x95
#define PLS_POLLING 0x20
#define PLS_POLLING_QUIET 0x20
#define PLS_POLLING_ACTIVE 0x21
@@ -282,6 +263,7 @@
#define HREQ_SET_TX_EQ_ABS 0x04
#define HREQ_SET_TX_EQ_REL 0x05
#define HREQ_ENABLE 0x06
+#define HREQ_LCB_RESET 0x07
#define HREQ_CONFIG_DONE 0xfe
#define HREQ_INTERFACE_TEST 0xff
@@ -334,6 +316,8 @@
#define MAX_EAGER_BUFFER (256 * 1024)
#define MAX_EAGER_BUFFER_TOTAL (64 * (1 << 20)) /* max per ctxt 64MB */
#define MAX_EXPECTED_BUFFER (2048 * 1024)
+#define HFI1_MIN_HDRQ_EGRBUF_CNT 32
+#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
/*
* Receive expected base and count and eager base and count increment -
@@ -382,8 +366,9 @@
#define TX_SETTINGS 0x06
#define VERIFY_CAP_LOCAL_PHY 0x07
#define VERIFY_CAP_LOCAL_FABRIC 0x08
-#define VERIFY_CAP_LOCAL_LINK_WIDTH 0x09
+#define VERIFY_CAP_LOCAL_LINK_MODE 0x09
#define LOCAL_DEVICE_ID 0x0a
+#define RESERVED_REGISTERS 0x0b
#define LOCAL_LNI_INFO 0x0c
#define REMOTE_LNI_INFO 0x0d
#define MISC_STATUS 0x0e
@@ -394,7 +379,8 @@
#define LAST_REMOTE_STATE_COMPLETE 0x13
#define LINK_QUALITY_INFO 0x14
#define REMOTE_DEVICE_ID 0x15
-#define LINK_DOWN_REASON 0x16
+#define LINK_DOWN_REASON 0x16 /* first byte of offset 0x16 */
+#define VERSION_PATCH 0x16 /* last byte of offset 0x16 */
/* 8051 lane specific register field IDs */
#define TX_EQ_SETTINGS 0x00
@@ -505,6 +491,10 @@
#define DOWN_REMOTE_REASON_SHIFT 16
#define DOWN_REMOTE_REASON_MASK 0xff
+#define HOST_INTERFACE_VERSION 1
+#define HOST_INTERFACE_VERSION_SHIFT 16
+#define HOST_INTERFACE_VERSION_MASK 0xff
+
/* verify capability PHY power management bits */
#define PWRM_BER_CONTROL 0x1
#define PWRM_BANDWIDTH_CONTROL 0x2
@@ -524,10 +514,12 @@ enum {
#define SUPPORTED_CRCS (CAP_CRC_14B | CAP_CRC_48B)
/* misc status version fields */
-#define STS_FM_VERSION_A_SHIFT 16
-#define STS_FM_VERSION_A_MASK 0xff
-#define STS_FM_VERSION_B_SHIFT 24
-#define STS_FM_VERSION_B_MASK 0xff
+#define STS_FM_VERSION_MINOR_SHIFT 16
+#define STS_FM_VERSION_MINOR_MASK 0xff
+#define STS_FM_VERSION_MAJOR_SHIFT 24
+#define STS_FM_VERSION_MAJOR_MASK 0xff
+#define STS_FM_VERSION_PATCH_SHIFT 24
+#define STS_FM_VERSION_PATCH_MASK 0xff
/* LCB_CFG_CRC_MODE TX_VAL and RX_VAL CRC mode values */
#define LCB_CRC_16B 0x0 /* 16b CRC */
@@ -552,7 +544,7 @@ enum {
/* timeouts */
#define LINK_RESTART_DELAY 1000 /* link restart delay, in ms */
#define TIMEOUT_8051_START 5000 /* 8051 start timeout, in ms */
-#define DC8051_COMMAND_TIMEOUT 20000 /* DC8051 command timeout, in ms */
+#define DC8051_COMMAND_TIMEOUT 1000 /* DC8051 command timeout, in ms */
#define FREEZE_STATUS_TIMEOUT 20 /* wait for freeze indicators, in ms */
#define VL_STATUS_CLEAR_TIMEOUT 5000 /* per-VL status clear, in ms */
#define CCE_STATUS_TIMEOUT 10 /* time to clear CCE Status, in ms */
@@ -575,6 +567,10 @@ enum {
#define LOOPBACK_LCB 2
#define LOOPBACK_CABLE 3 /* external cable */
+/* set up bits in MISC_CONFIG_BITS */
+#define LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT 0
+#define EXT_CFG_LCB_RESET_SUPPORTED_SHIFT 3
+
/* read and write hardware registers */
u64 read_csr(const struct hfi1_devdata *dd, u32 offset);
void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value);
@@ -602,11 +598,11 @@ int read_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 *data);
int write_lcb_csr(struct hfi1_devdata *dd, u32 offset, u64 data);
void __iomem *get_csr_addr(
- struct hfi1_devdata *dd,
+ const struct hfi1_devdata *dd,
u32 offset);
static inline void __iomem *get_kctxt_csr_addr(
- struct hfi1_devdata *dd,
+ const struct hfi1_devdata *dd,
int ctxt,
u32 offset0)
{
@@ -633,14 +629,48 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt,
write_csr(dd, offset0 + (0x1000 * ctxt), value);
}
-u64 create_pbc(struct hfi1_pportdata *ppd, u64, int, u32, u32);
+static inline u32 chip_rcv_contexts(struct hfi1_devdata *dd)
+{
+ return read_csr(dd, RCV_CONTEXTS);
+}
+
+static inline u32 chip_send_contexts(struct hfi1_devdata *dd)
+{
+ return read_csr(dd, SEND_CONTEXTS);
+}
+
+static inline u32 chip_sdma_engines(struct hfi1_devdata *dd)
+{
+ return read_csr(dd, SEND_DMA_ENGINES);
+}
+
+static inline u32 chip_pio_mem_size(struct hfi1_devdata *dd)
+{
+ return read_csr(dd, SEND_PIO_MEM_SIZE);
+}
+
+static inline u32 chip_sdma_mem_size(struct hfi1_devdata *dd)
+{
+ return read_csr(dd, SEND_DMA_MEM_SIZE);
+}
+
+static inline u32 chip_rcv_array_count(struct hfi1_devdata *dd)
+{
+ return read_csr(dd, RCV_ARRAY_CNT);
+}
+
+u8 encode_rcv_header_entry_size(u8 size);
+int hfi1_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt);
+void set_hdrq_regs(struct hfi1_devdata *dd, u8 ctxt, u8 entsize, u16 hdrcnt);
+
+u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
+ u32 dw_len);
/* firmware.c */
#define SBUS_MASTER_BROADCAST 0xfd
#define NUM_PCIE_SERDES 16 /* number of PCIe serdes on the SBus */
extern const u8 pcie_serdes_broadcast[];
extern const u8 pcie_pcs_addrs[2][NUM_PCIE_SERDES];
-extern uint platform_config_load;
/* SBus commands */
#define RESET_SBUS_RECEIVER 0x20
@@ -698,7 +728,9 @@ void fabric_serdes_reset(struct hfi1_devdata *dd);
int read_8051_data(struct hfi1_devdata *dd, u32 addr, u32 len, u64 *result);
/* chip.c */
-void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b);
+void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor,
+ u8 *ver_patch);
+int write_host_interface_version(struct hfi1_devdata *dd, u8 version);
void read_guid(struct hfi1_devdata *dd);
int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout);
void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
@@ -713,7 +745,7 @@ void handle_link_downgrade(struct work_struct *work);
void handle_link_bounce(struct work_struct *work);
void handle_start_link(struct work_struct *work);
void handle_sma_message(struct work_struct *work);
-void reset_qsfp(struct hfi1_pportdata *ppd);
+int reset_qsfp(struct hfi1_pportdata *ppd);
void qsfp_event(struct work_struct *work);
void start_freeze_handling(struct hfi1_pportdata *ppd, int flags);
int send_idle_sma(struct hfi1_devdata *dd, u64 message);
@@ -722,9 +754,10 @@ int read_8051_config(struct hfi1_devdata *, u8, u8, u32 *);
int start_link(struct hfi1_pportdata *ppd);
int bringup_serdes(struct hfi1_pportdata *ppd);
void set_intr_state(struct hfi1_devdata *dd, u32 enable);
-void apply_link_downgrade_policy(struct hfi1_pportdata *ppd,
- int refresh_widths);
-void update_usrhead(struct hfi1_ctxtdata *, u32, u32, u32, u32, u32);
+bool apply_link_downgrade_policy(struct hfi1_pportdata *ppd,
+ bool refresh_widths);
+void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
+ u32 intr_adjust, u32 npkts);
int stop_drain_data_vls(struct hfi1_devdata *dd);
int open_fill_data_vls(struct hfi1_devdata *dd);
u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns);
@@ -735,23 +768,17 @@ void clear_linkup_counters(struct hfi1_devdata *dd);
u32 hdrqempty(struct hfi1_ctxtdata *rcd);
int is_ax(struct hfi1_devdata *dd);
int is_bx(struct hfi1_devdata *dd);
+bool is_urg_masked(struct hfi1_ctxtdata *rcd);
u32 read_physical_state(struct hfi1_devdata *dd);
u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
-u32 get_logical_state(struct hfi1_pportdata *ppd);
-const char *opa_lstate_name(u32 lstate);
const char *opa_pstate_name(u32 pstate);
-u32 driver_physical_state(struct hfi1_pportdata *ppd);
-u32 driver_logical_state(struct hfi1_pportdata *ppd);
+u32 driver_pstate(struct hfi1_pportdata *ppd);
+u32 driver_lstate(struct hfi1_pportdata *ppd);
int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok);
int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok);
#define LCB_START DC_LCB_CSRS
#define LCB_END DC_8051_CSRS /* next block is 8051 */
-static inline int is_lcb_offset(u32 offset)
-{
- return (offset >= LCB_START && offset < LCB_END);
-}
-
extern uint num_vls;
extern uint disable_integrity;
@@ -789,6 +816,10 @@ static inline int idx_from_vl(int vl)
/* Per device counter indexes */
enum {
C_RCV_OVF = 0,
+ C_RX_LEN_ERR,
+ C_RX_SHORT_ERR,
+ C_RX_ICRC_ERR,
+ C_RX_EBP,
C_RX_TID_FULL,
C_RX_TID_INVALID,
C_RX_TID_FLGMS,
@@ -854,10 +885,12 @@ enum {
C_DC_PG_STS_TX_MBE_CNT,
C_SW_CPU_INTR,
C_SW_CPU_RCV_LIM,
+ C_SW_CTX0_SEQ_DROP,
C_SW_VTX_WAIT,
C_SW_PIO_WAIT,
C_SW_PIO_DRAIN,
C_SW_KMEM_WAIT,
+ C_SW_TID_WAIT,
C_SW_SEND_SCHED,
C_SDMA_DESC_FETCHED_CNT,
C_SDMA_INT_CNT,
@@ -1172,6 +1205,7 @@ enum {
C_SW_IBP_RDMA_SEQ,
C_SW_IBP_UNALIGNED,
C_SW_IBP_SEQ_NAK,
+ C_SW_IBP_RC_CRWAITS,
C_SW_CPU_RC_ACKS,
C_SW_CPU_RC_QACKS,
C_SW_CPU_RC_DELAYED_COMP,
@@ -1341,23 +1375,40 @@ enum {
u64 get_all_cpu_total(u64 __percpu *cntr);
void hfi1_start_cleanup(struct hfi1_devdata *dd);
void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
-struct ib_header *hfi1_get_msgheader(
- struct hfi1_devdata *dd, __le32 *rhf_addr);
-int hfi1_init_ctxt(struct send_context *sc);
+void hfi1_init_ctxt(struct send_context *sc);
void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
u32 type, unsigned long pa, u16 order);
void hfi1_quiet_serdes(struct hfi1_pportdata *ppd);
-void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt);
+void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
+ struct hfi1_ctxtdata *rcd);
u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp);
u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp);
-u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd);
int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which);
int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val);
-int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey);
-int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt);
-int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey);
-int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt);
+int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd,
+ u16 jkey);
+int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt);
+int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt,
+ u16 pkey);
+int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, struct hfi1_ctxtdata *ctxt);
void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
+void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
+void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
+
+irqreturn_t general_interrupt(int irq, void *data);
+irqreturn_t sdma_interrupt(int irq, void *data);
+irqreturn_t receive_context_interrupt(int irq, void *data);
+irqreturn_t receive_context_thread(int irq, void *data);
+irqreturn_t receive_context_interrupt_napi(int irq, void *data);
+
+int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set);
+void init_qsfp_int(struct hfi1_devdata *dd);
+void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
+void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
+void reset_interrupts(struct hfi1_devdata *dd);
+u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx);
+void hfi1_init_aip_rsm(struct hfi1_devdata *dd);
+void hfi1_deinit_aip_rsm(struct hfi1_devdata *dd);
/*
* Interrupt source table.
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index 5bfa839d1c48..d79e25d20fb8 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -1,53 +1,11 @@
-#ifndef DEF_CHIP_REG
-#define DEF_CHIP_REG
-
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#ifndef DEF_CHIP_REG
+#define DEF_CHIP_REG
+
#define CORE 0x000000000000
#define CCE (CORE + 0x000000000000)
#define ASIC (CORE + 0x000000400000)
@@ -97,8 +55,11 @@
#define DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT 32
#define DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK 0x700000000ull
#define DCC_CFG_RESET (DCC_CSRS + 0x000000000000)
-#define DCC_CFG_RESET_RESET_LCB_SHIFT 0
-#define DCC_CFG_RESET_RESET_RX_FPE_SHIFT 2
+#define DCC_CFG_RESET_RESET_LCB BIT_ULL(0)
+#define DCC_CFG_RESET_RESET_TX_FPE BIT_ULL(1)
+#define DCC_CFG_RESET_RESET_RX_FPE BIT_ULL(2)
+#define DCC_CFG_RESET_RESET_8051 BIT_ULL(3)
+#define DCC_CFG_RESET_ENABLE_CCLK_BCC BIT_ULL(4)
#define DCC_CFG_SC_VL_TABLE_15_0 (DCC_CSRS + 0x000000000028)
#define DCC_CFG_SC_VL_TABLE_15_0_ENTRY0_SHIFT 0
#define DCC_CFG_SC_VL_TABLE_15_0_ENTRY10_SHIFT 40
@@ -377,6 +338,10 @@
#define DC_LCB_PRF_TX_FLIT_CNT (DC_LCB_CSRS + 0x000000000418)
#define DC_LCB_STS_LINK_TRANSFER_ACTIVE (DC_LCB_CSRS + 0x000000000468)
#define DC_LCB_STS_ROUND_TRIP_LTP_CNT (DC_LCB_CSRS + 0x0000000004B0)
+#define RCV_LENGTH_ERR_CNT 0
+#define RCV_SHORT_ERR_CNT 2
+#define RCV_ICRC_ERR_CNT 6
+#define RCV_EBP_CNT 9
#define RCV_BUF_OVFL_CNT 10
#define RCV_CONTEXT_EGR_STALL 22
#define RCV_DATA_PKT_CNT 0
@@ -635,6 +600,12 @@
#define RCV_BTH_QP_KDETH_QP_MASK 0xFFull
#define RCV_BTH_QP_KDETH_QP_SHIFT 16
#define RCV_BYPASS (RXE + 0x000000000038)
+#define RCV_BYPASS_HDR_SIZE_SHIFT 16
+#define RCV_BYPASS_HDR_SIZE_MASK 0x1Full
+#define RCV_BYPASS_HDR_SIZE_SMASK 0x1F0000ull
+#define RCV_BYPASS_BYPASS_CONTEXT_SHIFT 0
+#define RCV_BYPASS_BYPASS_CONTEXT_MASK 0xFFull
+#define RCV_BYPASS_BYPASS_CONTEXT_SMASK 0xFFull
#define RCV_CONTEXTS (RXE + 0x000000000010)
#define RCV_COUNTER_ARRAY32 (RXE + 0x000000000400)
#define RCV_COUNTER_ARRAY64 (RXE + 0x000000000500)
@@ -839,7 +810,9 @@
#define SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK 0x8ull
#define SEND_CM_CTRL_RESETCSR 0x0000000000000020ull
#define SEND_CM_GLOBAL_CREDIT (TXE + 0x000000000508)
+#define SEND_CM_GLOBAL_CREDIT_AU_MASK 0x7ull
#define SEND_CM_GLOBAL_CREDIT_AU_SHIFT 16
+#define SEND_CM_GLOBAL_CREDIT_AU_SMASK 0x70000ull
#define SEND_CM_GLOBAL_CREDIT_RESETCSR 0x0000094000030000ull
#define SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK 0xFFFFull
#define SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT 0
@@ -867,6 +840,10 @@
#define SEND_CTRL (TXE + 0x000000000000)
#define SEND_CTRL_CM_RESET_SMASK 0x4ull
#define SEND_CTRL_SEND_ENABLE_SMASK 0x1ull
+#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
+#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xFFull
+#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
+ << SEND_CTRL_UNSUPPORTED_VL_SHIFT)
#define SEND_CTRL_VL_ARBITER_ENABLE_SMASK 0x2ull
#define SEND_CTXT_CHECK_ENABLE (TXE + 0x000000100080)
#define SEND_CTXT_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK 0x80ull
@@ -920,6 +897,10 @@
#define SEND_CTXT_CREDIT_CTRL_THRESHOLD_MASK 0x7FFull
#define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SHIFT 0
#define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SMASK 0x7FFull
+#define SEND_CTXT_CREDIT_STATUS (TXE + 0x000000100018)
+#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK 0x7FFull
+#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT 32
+#define SEND_CTXT_CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK 0x7FFull
#define SEND_CTXT_CREDIT_FORCE (TXE + 0x000000100028)
#define SEND_CTXT_CREDIT_FORCE_FORCE_RETURN_SMASK 0x1ull
#define SEND_CTXT_CREDIT_RETURN_ADDR (TXE + 0x000000100020)
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index da7be21bedb4..8abc902b96f3 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*/
#ifndef _COMMON_H
@@ -72,13 +30,6 @@
* compilation unit
*/
-/*
- * If a packet's QP[23:16] bits match this value, then it is
- * a PSM packet and the hardware will expect a KDETH header
- * following the BTH.
- */
-#define DEFAULT_KDETH_QP 0x80
-
/* driver/hw feature set bitmask */
#define HFI1_CAP_USER_SHIFT 24
#define HFI1_CAP_MASK ((1UL << HFI1_CAP_USER_SHIFT) - 1)
@@ -136,18 +87,22 @@
HFI1_CAP_ALLOW_PERM_JKEY | \
HFI1_CAP_STATIC_RATE_CTRL | \
HFI1_CAP_PRINT_UNIMPL | \
- HFI1_CAP_TID_UNMAP)
+ HFI1_CAP_TID_UNMAP | \
+ HFI1_CAP_OPFN)
/*
* A set of capability bits that are "global" and are not allowed to be
* set in the user bitmask.
*/
#define HFI1_CAP_RESERVED_MASK ((HFI1_CAP_SDMA | \
- HFI1_CAP_USE_SDMA_HEAD | \
- HFI1_CAP_EXTENDED_PSN | \
- HFI1_CAP_PRINT_UNIMPL | \
- HFI1_CAP_NO_INTEGRITY | \
- HFI1_CAP_PKEY_CHECK) << \
- HFI1_CAP_USER_SHIFT)
+ HFI1_CAP_USE_SDMA_HEAD | \
+ HFI1_CAP_EXTENDED_PSN | \
+ HFI1_CAP_PRINT_UNIMPL | \
+ HFI1_CAP_NO_INTEGRITY | \
+ HFI1_CAP_PKEY_CHECK | \
+ HFI1_CAP_TID_RDMA | \
+ HFI1_CAP_OPFN | \
+ HFI1_CAP_AIP) << \
+ HFI1_CAP_USER_SHIFT)
/*
* Set of capabilities that need to be enabled for kernel context in
* order to be allowed for user contexts, as well.
@@ -163,6 +118,7 @@
HFI1_CAP_PKEY_CHECK | \
HFI1_CAP_MULTI_PKT_EGR | \
HFI1_CAP_EXTENDED_PSN | \
+ HFI1_CAP_AIP | \
((HFI1_CAP_HDRSUPP | \
HFI1_CAP_MULTI_PKT_EGR | \
HFI1_CAP_STATIC_RATE_CTRL | \
@@ -181,61 +137,6 @@
#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \
HFI1_USER_SWMINOR)
-#ifndef HFI1_KERN_TYPE
-#define HFI1_KERN_TYPE 0
-#endif
-
-/*
- * Similarly, this is the kernel version going back to the user. It's
- * slightly different, in that we want to tell if the driver was built as
- * part of a Intel release, or from the driver from openfabrics.org,
- * kernel.org, or a standard distribution, for support reasons.
- * The high bit is 0 for non-Intel and 1 for Intel-built/supplied.
- *
- * It's returned by the driver to the user code during initialization in the
- * spi_sw_version field of hfi1_base_info, so the user code can in turn
- * check for compatibility with the kernel.
-*/
-#define HFI1_KERN_SWVERSION ((HFI1_KERN_TYPE << 31) | HFI1_USER_SWVERSION)
-
-/*
- * Define the driver version number. This is something that refers only
- * to the driver itself, not the software interfaces it supports.
- */
-#ifndef HFI1_DRIVER_VERSION_BASE
-#define HFI1_DRIVER_VERSION_BASE "0.9-294"
-#endif
-
-/* create the final driver version string */
-#ifdef HFI1_IDSTR
-#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE " " HFI1_IDSTR
-#else
-#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE
-#endif
-
-/*
- * Diagnostics can send a packet by writing the following
- * struct to the diag packet special file.
- *
- * This allows a custom PBC qword, so that special modes and deliberate
- * changes to CRCs can be used.
- */
-#define _DIAG_PKT_VERS 1
-struct diag_pkt {
- __u16 version; /* structure version */
- __u16 unit; /* which device */
- __u16 sw_index; /* send sw index to use */
- __u16 len; /* data length, in bytes */
- __u16 port; /* port number */
- __u16 unused;
- __u32 flags; /* call flags */
- __u64 data; /* user data pointer */
- __u64 pbc; /* PBC for the packet */
-};
-
-/* diag_pkt flags */
-#define F_DIAGPKT_WAIT 0x1 /* wait until packet is sent */
-
/*
* The next set of defines are for packet headers, and chip register
* and memory bits that are visible to and/or used by user-mode software.
@@ -283,7 +184,7 @@ struct diag_pkt {
#define RHF_TID_ERR (0x1ull << 59)
#define RHF_LEN_ERR (0x1ull << 60)
#define RHF_ECC_ERR (0x1ull << 61)
-#define RHF_VCRC_ERR (0x1ull << 62)
+#define RHF_RESERVED (0x1ull << 62)
#define RHF_ICRC_ERR (0x1ull << 63)
#define RHF_ERROR_SMASK 0xffe0000000000000ull /* bits 63:53 */
@@ -320,30 +221,30 @@ struct diag_pkt {
/* RHF receive type error - bypass packet errors */
#define RHF_RTE_BYPASS_NO_ERR 0x0
+/* MAX RcvSEQ */
+#define RHF_MAX_SEQ 13
+
/* IB - LRH header constants */
#define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
#define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
/* misc. */
+#define SC15_PACKET 0xF
#define SIZE_OF_CRC 1
+#define SIZE_OF_LT 1
+#define MAX_16B_PADDING 12 /* CRC = 4, LT = 1, Pad = 0 to 7 bytes */
#define LIM_MGMT_P_KEY 0x7FFF
#define FULL_MGMT_P_KEY 0xFFFF
#define DEFAULT_P_KEY LIM_MGMT_P_KEY
-#define HFI1_AETH_CREDIT_SHIFT 24
-#define HFI1_AETH_CREDIT_MASK 0x1F
-#define HFI1_AETH_CREDIT_INVAL 0x1F
-#define HFI1_MSN_MASK 0xFFFFFF
-#define HFI1_FECN_SHIFT 31
-#define HFI1_FECN_MASK 1
-#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT)
-#define HFI1_BECN_SHIFT 30
-#define HFI1_BECN_MASK 1
-#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT)
#define HFI1_PSM_IOC_BASE_SEQ 0x0
+/* Number of BTH.PSN bits used for sequence number in expected rcvs */
+#define HFI1_KDETH_BTH_SEQ_SHIFT 11
+#define HFI1_KDETH_BTH_SEQ_MASK (BIT(HFI1_KDETH_BTH_SEQ_SHIFT) - 1)
+
static inline __u64 rhf_to_cpu(const __le32 *rbuf)
{
return __le64_to_cpu(*((__le64 *)rbuf));
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 8725f4c086cf..ac37ab7f8995 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -1,143 +1,30 @@
-#ifdef CONFIG_DEBUG_FS
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015-2018 Intel Corporation.
*/
+
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/kernel.h>
#include <linux/export.h>
-#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ratelimit.h>
+#include <linux/fault-inject.h>
#include "hfi.h"
+#include "trace.h"
#include "debugfs.h"
#include "device.h"
#include "qp.h"
#include "sdma.h"
+#include "fault.h"
static struct dentry *hfi1_dbg_root;
-/* wrappers to enforce srcu in seq file */
-static ssize_t hfi1_seq_read(
- struct file *file,
- char __user *buf,
- size_t size,
- loff_t *ppos)
-{
- struct dentry *d = file->f_path.dentry;
- int srcu_idx;
- ssize_t r;
-
- r = debugfs_use_file_start(d, &srcu_idx);
- if (likely(!r))
- r = seq_read(file, buf, size, ppos);
- debugfs_use_file_finish(srcu_idx);
- return r;
-}
-
-static loff_t hfi1_seq_lseek(
- struct file *file,
- loff_t offset,
- int whence)
-{
- struct dentry *d = file->f_path.dentry;
- int srcu_idx;
- loff_t r;
-
- r = debugfs_use_file_start(d, &srcu_idx);
- if (likely(!r))
- r = seq_lseek(file, offset, whence);
- debugfs_use_file_finish(srcu_idx);
- return r;
-}
-
#define private2dd(file) (file_inode(file)->i_private)
#define private2ppd(file) (file_inode(file)->i_private)
-#define DEBUGFS_SEQ_FILE_OPS(name) \
-static const struct seq_operations _##name##_seq_ops = { \
- .start = _##name##_seq_start, \
- .next = _##name##_seq_next, \
- .stop = _##name##_seq_stop, \
- .show = _##name##_seq_show \
-}
-
-#define DEBUGFS_SEQ_FILE_OPEN(name) \
-static int _##name##_open(struct inode *inode, struct file *s) \
-{ \
- struct seq_file *seq; \
- int ret; \
- ret = seq_open(s, &_##name##_seq_ops); \
- if (ret) \
- return ret; \
- seq = s->private_data; \
- seq->private = inode->i_private; \
- return 0; \
-}
-
-#define DEBUGFS_FILE_OPS(name) \
-static const struct file_operations _##name##_file_ops = { \
- .owner = THIS_MODULE, \
- .open = _##name##_open, \
- .read = hfi1_seq_read, \
- .llseek = hfi1_seq_lseek, \
- .release = seq_release \
-}
-
-#define DEBUGFS_FILE_CREATE(name, parent, data, ops, mode) \
-do { \
- struct dentry *ent; \
- ent = debugfs_create_file(name, mode, parent, \
- data, ops); \
- if (!ent) \
- pr_warn("create of %s failed\n", name); \
-} while (0)
-
-#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \
- DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO)
-
static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
{
struct hfi1_opcode_stats_perctx *opstats;
@@ -161,6 +48,17 @@ static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
{
}
+static int opcode_stats_show(struct seq_file *s, u8 i, u64 packets, u64 bytes)
+{
+ if (!packets && !bytes)
+ return SEQ_SKIP;
+ seq_printf(s, "%02x %llu/%llu\n", i,
+ (unsigned long long)packets,
+ (unsigned long long)bytes);
+
+ return 0;
+}
+
static int _opcode_stats_seq_show(struct seq_file *s, void *v)
{
loff_t *spos = v;
@@ -168,26 +66,59 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v)
u64 n_packets = 0, n_bytes = 0;
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
+ struct hfi1_ctxtdata *rcd;
- for (j = 0; j < dd->first_user_ctxt; j++) {
- if (!dd->rcd[j])
- continue;
- n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
- n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
+ for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
+ rcd = hfi1_rcd_get_by_index(dd, j);
+ if (rcd) {
+ n_packets += rcd->opstats->stats[i].n_packets;
+ n_bytes += rcd->opstats->stats[i].n_bytes;
+ }
+ hfi1_rcd_put(rcd);
}
- if (!n_packets && !n_bytes)
- return SEQ_SKIP;
- seq_printf(s, "%02llx %llu/%llu\n", i,
- (unsigned long long)n_packets,
- (unsigned long long)n_bytes);
-
- return 0;
+ return opcode_stats_show(s, i, n_packets, n_bytes);
}
DEBUGFS_SEQ_FILE_OPS(opcode_stats);
DEBUGFS_SEQ_FILE_OPEN(opcode_stats)
DEBUGFS_FILE_OPS(opcode_stats);
+static void *_tx_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ return _opcode_stats_seq_start(s, pos);
+}
+
+static void *_tx_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ return _opcode_stats_seq_next(s, v, pos);
+}
+
+static void _tx_opcode_stats_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _tx_opcode_stats_seq_show(struct seq_file *s, void *v)
+{
+ loff_t *spos = v;
+ loff_t i = *spos;
+ int j;
+ u64 n_packets = 0, n_bytes = 0;
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+
+ for_each_possible_cpu(j) {
+ struct hfi1_opcode_stats_perctx *s =
+ per_cpu_ptr(dd->tx_opstats, j);
+ n_packets += s->stats[i].n_packets;
+ n_bytes += s->stats[i].n_bytes;
+ }
+ return opcode_stats_show(s, i, n_packets, n_bytes);
+}
+
+DEBUGFS_SEQ_FILE_OPS(tx_opcode_stats);
+DEBUGFS_SEQ_FILE_OPEN(tx_opcode_stats)
+DEBUGFS_FILE_OPS(tx_opcode_stats);
+
static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos)
{
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
@@ -195,7 +126,7 @@ static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos)
if (!*pos)
return SEQ_START_TOKEN;
- if (*pos >= dd->first_user_ctxt)
+ if (*pos >= dd->first_dyn_alloc_ctxt)
return NULL;
return pos;
}
@@ -209,7 +140,7 @@ static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
return pos;
++*pos;
- if (*pos >= dd->first_user_ctxt)
+ if (*pos >= dd->first_dyn_alloc_ctxt)
return NULL;
return pos;
}
@@ -226,6 +157,7 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v)
u64 n_packets = 0;
struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
struct hfi1_devdata *dd = dd_from_dev(ibd);
+ struct hfi1_ctxtdata *rcd;
if (v == SEQ_START_TOKEN) {
seq_puts(s, "Ctx:npkts\n");
@@ -235,11 +167,14 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v)
spos = v;
i = *spos;
- if (!dd->rcd[i])
+ rcd = hfi1_rcd_get_by_index_safe(dd, i);
+ if (!rcd)
return SEQ_SKIP;
- for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++)
- n_packets += dd->rcd[i]->opstats->stats[j].n_packets;
+ for (j = 0; j < ARRAY_SIZE(rcd->opstats->stats); j++)
+ n_packets += rcd->opstats->stats[j].n_packets;
+
+ hfi1_rcd_put(rcd);
if (!n_packets)
return SEQ_SKIP;
@@ -255,10 +190,10 @@ DEBUGFS_FILE_OPS(ctx_stats);
static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
__acquires(RCU)
{
- struct qp_iter *iter;
+ struct rvt_qp_iter *iter;
loff_t n = *pos;
- iter = qp_iter_init(s->private);
+ iter = rvt_qp_iter_init(s->private, 0, NULL);
/* stop calls rcu_read_unlock */
rcu_read_lock();
@@ -267,7 +202,7 @@ static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
return NULL;
do {
- if (qp_iter_next(iter)) {
+ if (rvt_qp_iter_next(iter)) {
kfree(iter);
return NULL;
}
@@ -280,11 +215,11 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
loff_t *pos)
__must_hold(RCU)
{
- struct qp_iter *iter = iter_ptr;
+ struct rvt_qp_iter *iter = iter_ptr;
(*pos)++;
- if (qp_iter_next(iter)) {
+ if (rvt_qp_iter_next(iter)) {
kfree(iter);
return NULL;
}
@@ -300,7 +235,7 @@ static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr)
{
- struct qp_iter *iter = iter_ptr;
+ struct rvt_qp_iter *iter = iter_ptr;
if (!iter)
return 0;
@@ -356,6 +291,100 @@ DEBUGFS_SEQ_FILE_OPS(sdes);
DEBUGFS_SEQ_FILE_OPEN(sdes)
DEBUGFS_FILE_OPS(sdes);
+static void *_rcds_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct hfi1_ibdev *ibd;
+ struct hfi1_devdata *dd;
+
+ ibd = (struct hfi1_ibdev *)s->private;
+ dd = dd_from_dev(ibd);
+ if (!dd->rcd || *pos >= dd->n_krcv_queues)
+ return NULL;
+ return pos;
+}
+
+static void *_rcds_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+
+ ++*pos;
+ if (!dd->rcd || *pos >= dd->num_rcv_contexts)
+ return NULL;
+ return pos;
+}
+
+static void _rcds_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _rcds_seq_show(struct seq_file *s, void *v)
+{
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+ struct hfi1_ctxtdata *rcd;
+ loff_t *spos = v;
+ loff_t i = *spos;
+
+ rcd = hfi1_rcd_get_by_index_safe(dd, i);
+ if (rcd)
+ seqfile_dump_rcd(s, rcd);
+ hfi1_rcd_put(rcd);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(rcds);
+DEBUGFS_SEQ_FILE_OPEN(rcds)
+DEBUGFS_FILE_OPS(rcds);
+
+static void *_pios_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct hfi1_ibdev *ibd;
+ struct hfi1_devdata *dd;
+
+ ibd = (struct hfi1_ibdev *)s->private;
+ dd = dd_from_dev(ibd);
+ if (!dd->send_contexts || *pos >= dd->num_send_contexts)
+ return NULL;
+ return pos;
+}
+
+static void *_pios_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+
+ ++*pos;
+ if (!dd->send_contexts || *pos >= dd->num_send_contexts)
+ return NULL;
+ return pos;
+}
+
+static void _pios_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _pios_seq_show(struct seq_file *s, void *v)
+{
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+ struct send_context_info *sci;
+ loff_t *spos = v;
+ loff_t i = *spos;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->sc_lock, flags);
+ sci = &dd->send_contexts[i];
+ if (sci && sci->type != SC_USER && sci->allocated && sci->sc)
+ seqfile_dump_sci(s, i, sci);
+ spin_unlock_irqrestore(&dd->sc_lock, flags);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(pios);
+DEBUGFS_SEQ_FILE_OPEN(pios)
+DEBUGFS_FILE_OPS(pios);
+
/* read the per-device counters */
static ssize_t dev_counters_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
@@ -503,18 +532,11 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
ppd = private2ppd(file);
dd = ppd->dd;
- buff = kmalloc(count + 1, GFP_KERNEL);
- if (!buff)
- return -ENOMEM;
-
- ret = copy_from_user(buff, buf, count);
- if (ret > 0) {
- ret = -EFAULT;
- goto do_free;
- }
-
/* zero terminate and read the expected integer */
- buff[count] = 0;
+ buff = memdup_user_nul(buf, count);
+ if (IS_ERR(buff))
+ return PTR_ERR(buff);
+
ret = kstrtoull(buff, 0, &value);
if (ret)
goto do_free;
@@ -692,15 +714,9 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
if (i2c_addr == 0)
return -EINVAL;
- buff = kmalloc(count, GFP_KERNEL);
- if (!buff)
- return -ENOMEM;
-
- ret = copy_from_user(buff, buf, count);
- if (ret > 0) {
- ret = -EFAULT;
- goto _free;
- }
+ buff = memdup_user(buf, count);
+ if (IS_ERR(buff))
+ return PTR_ERR(buff);
total_written = i2c_write(ppd, target, i2c_addr, offset, buff, count);
if (total_written < 0) {
@@ -805,15 +821,10 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
ppd = private2ppd(file);
- buff = kmalloc(count, GFP_KERNEL);
- if (!buff)
- return -ENOMEM;
+ buff = memdup_user(buf, count);
+ if (IS_ERR(buff))
+ return PTR_ERR(buff);
- ret = copy_from_user(buff, buf, count);
- if (ret > 0) {
- ret = -EFAULT;
- goto _free;
- }
total_written = qsfp_write(ppd, target, *ppos, buff, count);
if (total_written < 0) {
ret = total_written;
@@ -904,18 +915,10 @@ static ssize_t qsfp2_debugfs_read(struct file *file, char __user *buf,
static int __i2c_debugfs_open(struct inode *in, struct file *fp, u32 target)
{
struct hfi1_pportdata *ppd;
- int ret;
-
- if (!try_module_get(THIS_MODULE))
- return -ENODEV;
ppd = private2ppd(fp);
- ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0);
- if (ret) /* failed - release the module */
- module_put(THIS_MODULE);
-
- return ret;
+ return acquire_chip_resource(ppd->dd, i2c_target(target), 0);
}
static int i2c1_debugfs_open(struct inode *in, struct file *fp)
@@ -935,7 +938,6 @@ static int __i2c_debugfs_release(struct inode *in, struct file *fp, u32 target)
ppd = private2ppd(fp);
release_chip_resource(ppd->dd, i2c_target(target));
- module_put(THIS_MODULE);
return 0;
}
@@ -953,18 +955,10 @@ static int i2c2_debugfs_release(struct inode *in, struct file *fp)
static int __qsfp_debugfs_open(struct inode *in, struct file *fp, u32 target)
{
struct hfi1_pportdata *ppd;
- int ret;
-
- if (!try_module_get(THIS_MODULE))
- return -ENODEV;
ppd = private2ppd(fp);
- ret = acquire_chip_resource(ppd->dd, i2c_target(target), 0);
- if (ret) /* failed - release the module */
- module_put(THIS_MODULE);
-
- return ret;
+ return acquire_chip_resource(ppd->dd, i2c_target(target), 0);
}
static int qsfp1_debugfs_open(struct inode *in, struct file *fp)
@@ -984,7 +978,6 @@ static int __qsfp_debugfs_release(struct inode *in, struct file *fp, u32 target)
ppd = private2ppd(fp);
release_chip_resource(ppd->dd, i2c_target(target));
- module_put(THIS_MODULE);
return 0;
}
@@ -999,10 +992,82 @@ static int qsfp2_debugfs_release(struct inode *in, struct file *fp)
return __qsfp_debugfs_release(in, fp, 1);
}
+#define EXPROM_WRITE_ENABLE BIT_ULL(14)
+
+static bool exprom_wp_disabled;
+
+static int exprom_wp_set(struct hfi1_devdata *dd, bool disable)
+{
+ u64 gpio_val = 0;
+
+ if (disable) {
+ gpio_val = EXPROM_WRITE_ENABLE;
+ exprom_wp_disabled = true;
+ dd_dev_info(dd, "Disable Expansion ROM Write Protection\n");
+ } else {
+ exprom_wp_disabled = false;
+ dd_dev_info(dd, "Enable Expansion ROM Write Protection\n");
+ }
+
+ write_csr(dd, ASIC_GPIO_OUT, gpio_val);
+ write_csr(dd, ASIC_GPIO_OE, gpio_val);
+
+ return 0;
+}
+
+static ssize_t exprom_wp_debugfs_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return 0;
+}
+
+static ssize_t exprom_wp_debugfs_write(struct file *file,
+ const char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ struct hfi1_pportdata *ppd = private2ppd(file);
+ char cdata;
+
+ if (count != 1)
+ return -EINVAL;
+ if (get_user(cdata, buf))
+ return -EFAULT;
+ if (cdata == '0')
+ exprom_wp_set(ppd->dd, false);
+ else if (cdata == '1')
+ exprom_wp_set(ppd->dd, true);
+ else
+ return -EINVAL;
+
+ return 1;
+}
+
+static unsigned long exprom_in_use;
+
+static int exprom_wp_debugfs_open(struct inode *in, struct file *fp)
+{
+ if (test_and_set_bit(0, &exprom_in_use))
+ return -EBUSY;
+
+ return 0;
+}
+
+static int exprom_wp_debugfs_release(struct inode *in, struct file *fp)
+{
+ struct hfi1_pportdata *ppd = private2ppd(fp);
+
+ if (exprom_wp_disabled)
+ exprom_wp_set(ppd->dd, false);
+ clear_bit(0, &exprom_in_use);
+
+ return 0;
+}
+
#define DEBUGFS_OPS(nm, readroutine, writeroutine) \
{ \
.name = nm, \
.ops = { \
+ .owner = THIS_MODULE, \
.read = readroutine, \
.write = writeroutine, \
.llseek = generic_file_llseek, \
@@ -1013,6 +1078,7 @@ static int qsfp2_debugfs_release(struct inode *in, struct file *fp)
{ \
.name = nm, \
.ops = { \
+ .owner = THIS_MODULE, \
.read = readf, \
.write = writef, \
.llseek = generic_file_llseek, \
@@ -1038,6 +1104,9 @@ static const struct counter_info port_cntr_ops[] = {
qsfp1_debugfs_open, qsfp1_debugfs_release),
DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write,
qsfp2_debugfs_open, qsfp2_debugfs_release),
+ DEBUGFS_XOPS("exprom_wp", exprom_wp_debugfs_read,
+ exprom_wp_debugfs_write, exprom_wp_debugfs_open,
+ exprom_wp_debugfs_release),
DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write),
DEBUGFS_OPS("dc8051_memory", dc8051_memory_read, NULL),
DEBUGFS_OPS("lcb", debugfs_lcb_read, debugfs_lcb_write),
@@ -1086,6 +1155,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
char link[10];
struct hfi1_devdata *dd = dd_from_dev(ibd);
struct hfi1_pportdata *ppd;
+ struct dentry *root;
int unit = dd->unit;
int i, j;
@@ -1093,28 +1163,29 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
return;
snprintf(name, sizeof(name), "%s_%d", class_name(), unit);
snprintf(link, sizeof(link), "%d", unit);
- ibd->hfi1_ibdev_dbg = debugfs_create_dir(name, hfi1_dbg_root);
- if (!ibd->hfi1_ibdev_dbg) {
- pr_warn("create of %s failed\n", name);
- return;
- }
+ root = debugfs_create_dir(name, hfi1_dbg_root);
+ ibd->hfi1_ibdev_dbg = root;
+
ibd->hfi1_ibdev_link =
debugfs_create_symlink(link, hfi1_dbg_root, name);
- if (!ibd->hfi1_ibdev_link) {
- pr_warn("create of %s symlink failed\n", name);
- return;
- }
- DEBUGFS_SEQ_FILE_CREATE(opcode_stats, ibd->hfi1_ibdev_dbg, ibd);
- DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd);
- DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd);
- DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd);
- DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd);
+
+ debugfs_create_file("opcode_stats", 0444, root, ibd,
+ &_opcode_stats_file_ops);
+ debugfs_create_file("tx_opcode_stats", 0444, root, ibd,
+ &_tx_opcode_stats_file_ops);
+ debugfs_create_file("ctx_stats", 0444, root, ibd, &_ctx_stats_file_ops);
+ debugfs_create_file("qp_stats", 0444, root, ibd, &_qp_stats_file_ops);
+ debugfs_create_file("sdes", 0444, root, ibd, &_sdes_file_ops);
+ debugfs_create_file("rcds", 0444, root, ibd, &_rcds_file_ops);
+ debugfs_create_file("pios", 0444, root, ibd, &_pios_file_ops);
+ debugfs_create_file("sdma_cpu_list", 0444, root, ibd,
+ &_sdma_cpu_list_file_ops);
+
/* dev counter files */
for (i = 0; i < ARRAY_SIZE(cntr_ops); i++)
- DEBUGFS_FILE_CREATE(cntr_ops[i].name,
- ibd->hfi1_ibdev_dbg,
- dd,
- &cntr_ops[i].ops, S_IRUGO);
+ debugfs_create_file(cntr_ops[i].name, 0444, root, dd,
+ &cntr_ops[i].ops);
+
/* per port files */
for (ppd = dd->pport, j = 0; j < dd->num_pports; j++, ppd++)
for (i = 0; i < ARRAY_SIZE(port_cntr_ops); i++) {
@@ -1122,19 +1193,21 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
sizeof(name),
port_cntr_ops[i].name,
j + 1);
- DEBUGFS_FILE_CREATE(name,
- ibd->hfi1_ibdev_dbg,
- ppd,
- &port_cntr_ops[i].ops,
+ debugfs_create_file(name,
!port_cntr_ops[i].ops.write ?
- S_IRUGO : S_IRUGO | S_IWUSR);
+ S_IRUGO :
+ S_IRUGO | S_IWUSR,
+ root, ppd, &port_cntr_ops[i].ops);
}
+
+ hfi1_fault_init_debugfs(ibd);
}
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
if (!hfi1_dbg_root)
goto out;
+ hfi1_fault_exit_debugfs(ibd);
debugfs_remove(ibd->hfi1_ibdev_link);
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:
@@ -1215,35 +1288,30 @@ static void _driver_stats_seq_stop(struct seq_file *s, void *v)
{
}
-static u64 hfi1_sps_ints(void)
+static void hfi1_sps_show_ints(struct seq_file *s)
{
- unsigned long flags;
+ unsigned long index, flags;
struct hfi1_devdata *dd;
u64 sps_ints = 0;
- spin_lock_irqsave(&hfi1_devs_lock, flags);
- list_for_each_entry(dd, &hfi1_dev_list, list) {
+ xa_lock_irqsave(&hfi1_dev_table, flags);
+ xa_for_each(&hfi1_dev_table, index, dd) {
sps_ints += get_all_cpu_total(dd->int_counter);
}
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
- return sps_ints;
+ xa_unlock_irqrestore(&hfi1_dev_table, flags);
+ seq_write(s, &sps_ints, sizeof(u64));
}
static int _driver_stats_seq_show(struct seq_file *s, void *v)
{
loff_t *spos = v;
- char *buffer;
u64 *stats = (u64 *)&hfi1_stats;
- size_t sz = seq_get_buf(s, &buffer);
- if (sz < sizeof(u64))
- return SEQ_SKIP;
/* special case for interrupts */
if (*spos == 0)
- *(u64 *)buffer = hfi1_sps_ints();
+ hfi1_sps_show_ints(s);
else
- *(u64 *)buffer = stats[*spos];
- seq_commit(s, sizeof(u64));
+ seq_write(s, stats + *spos, sizeof(u64));
return 0;
}
@@ -1254,10 +1322,10 @@ DEBUGFS_FILE_OPS(driver_stats);
void hfi1_dbg_init(void)
{
hfi1_dbg_root = debugfs_create_dir(DRIVER_NAME, NULL);
- if (!hfi1_dbg_root)
- pr_warn("init of debugfs failed\n");
- DEBUGFS_SEQ_FILE_CREATE(driver_stats_names, hfi1_dbg_root, NULL);
- DEBUGFS_SEQ_FILE_CREATE(driver_stats, hfi1_dbg_root, NULL);
+ debugfs_create_file("driver_stats_names", 0444, hfi1_dbg_root, NULL,
+ &_driver_stats_names_file_ops);
+ debugfs_create_file("driver_stats", 0444, hfi1_dbg_root, NULL,
+ &_driver_stats_file_ops);
}
void hfi1_dbg_exit(void)
@@ -1265,5 +1333,3 @@ void hfi1_dbg_exit(void)
debugfs_remove_recursive(hfi1_dbg_root);
hfi1_dbg_root = NULL;
}
-
-#endif
diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index b6fb6814f1b8..65b48839abc6 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
@@ -1,75 +1,65 @@
-#ifndef _HFI1_DEBUGFS_H
-#define _HFI1_DEBUGFS_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015, 2016, 2018 Intel Corporation.
*/
+#ifndef _HFI1_DEBUGFS_H
+#define _HFI1_DEBUGFS_H
+
struct hfi1_ibdev;
+
+#define DEBUGFS_SEQ_FILE_OPS(name) \
+static const struct seq_operations _##name##_seq_ops = { \
+ .start = _##name##_seq_start, \
+ .next = _##name##_seq_next, \
+ .stop = _##name##_seq_stop, \
+ .show = _##name##_seq_show \
+}
+
+#define DEBUGFS_SEQ_FILE_OPEN(name) \
+static int _##name##_open(struct inode *inode, struct file *s) \
+{ \
+ struct seq_file *seq; \
+ int ret; \
+ ret = seq_open(s, &_##name##_seq_ops); \
+ if (ret) \
+ return ret; \
+ seq = s->private_data; \
+ seq->private = inode->i_private; \
+ return 0; \
+}
+
+#define DEBUGFS_FILE_OPS(name) \
+static const struct file_operations _##name##_file_ops = { \
+ .owner = THIS_MODULE, \
+ .open = _##name##_open, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .release = seq_release \
+}
+
#ifdef CONFIG_DEBUG_FS
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
void hfi1_dbg_init(void);
void hfi1_dbg_exit(void);
+
#else
static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
}
-void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
+static inline void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
}
-void hfi1_dbg_init(void)
+static inline void hfi1_dbg_init(void)
{
}
-void hfi1_dbg_exit(void)
+static inline void hfi1_dbg_exit(void)
{
}
-
#endif
#endif /* _HFI1_DEBUGFS_H */
diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c
index bf64b5a7bfd7..a98a4175e53b 100644
--- a/drivers/infiniband/hw/hfi1/device.c
+++ b/drivers/infiniband/hw/hfi1/device.c
@@ -1,60 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <linux/cdev.h>
-#include <linux/module.h>
#include <linux/device.h>
#include <linux/fs.h>
#include "hfi.h"
#include "device.h"
-static struct class *class;
-static struct class *user_class;
+static char *hfi1_devnode(const struct device *dev, umode_t *mode)
+{
+ if (mode)
+ *mode = 0600;
+ return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
+}
+
+static const struct class class = {
+ .name = "hfi1",
+ .devnode = hfi1_devnode,
+};
+
+static char *hfi1_user_devnode(const struct device *dev, umode_t *mode)
+{
+ if (mode)
+ *mode = 0666;
+ return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
+}
+
+static const struct class user_class = {
+ .name = "hfi1_user",
+ .devnode = hfi1_user_devnode,
+};
static dev_t hfi1_dev;
int hfi1_cdev_init(int minor, const char *name,
@@ -69,7 +47,7 @@ int hfi1_cdev_init(int minor, const char *name,
cdev_init(cdev, fops);
cdev->owner = THIS_MODULE;
- cdev->kobj.parent = parent;
+ cdev_set_parent(cdev, parent);
kobject_set_name(&cdev->kobj, name);
ret = cdev_add(cdev, dev, 1);
@@ -80,15 +58,15 @@ int hfi1_cdev_init(int minor, const char *name,
}
if (user_accessible)
- device = device_create(user_class, NULL, dev, NULL, "%s", name);
+ device = device_create(&user_class, NULL, dev, NULL, "%s", name);
else
- device = device_create(class, NULL, dev, NULL, "%s", name);
+ device = device_create(&class, NULL, dev, NULL, "%s", name);
if (IS_ERR(device)) {
ret = PTR_ERR(device);
+ pr_err("Could not create device for minor %d, %s (err %pe)\n",
+ minor, name, device);
device = NULL;
- pr_err("Could not create device for minor %d, %s (err %d)\n",
- minor, name, -ret);
cdev_del(cdev);
}
done:
@@ -115,26 +93,6 @@ const char *class_name(void)
return hfi1_class_name;
}
-static char *hfi1_devnode(struct device *dev, umode_t *mode)
-{
- if (mode)
- *mode = 0600;
- return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
-}
-
-static const char *hfi1_class_name_user = "hfi1_user";
-static const char *class_name_user(void)
-{
- return hfi1_class_name_user;
-}
-
-static char *hfi1_user_devnode(struct device *dev, umode_t *mode)
-{
- if (mode)
- *mode = 0666;
- return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
-}
-
int __init dev_init(void)
{
int ret;
@@ -145,27 +103,21 @@ int __init dev_init(void)
goto done;
}
- class = class_create(THIS_MODULE, class_name());
- if (IS_ERR(class)) {
- ret = PTR_ERR(class);
+ ret = class_register(&class);
+ if (ret) {
pr_err("Could not create device class (err %d)\n", -ret);
unregister_chrdev_region(hfi1_dev, HFI1_NMINORS);
goto done;
}
- class->devnode = hfi1_devnode;
- user_class = class_create(THIS_MODULE, class_name_user());
- if (IS_ERR(user_class)) {
- ret = PTR_ERR(user_class);
+ ret = class_register(&user_class);
+ if (ret) {
pr_err("Could not create device class for user accessible files (err %d)\n",
-ret);
- class_destroy(class);
- class = NULL;
- user_class = NULL;
+ class_unregister(&class);
unregister_chrdev_region(hfi1_dev, HFI1_NMINORS);
goto done;
}
- user_class->devnode = hfi1_user_devnode;
done:
return ret;
@@ -173,11 +125,8 @@ done:
void dev_cleanup(void)
{
- class_destroy(class);
- class = NULL;
-
- class_destroy(user_class);
- user_class = NULL;
+ class_unregister(&class);
+ class_unregister(&user_class);
unregister_chrdev_region(hfi1_dev, HFI1_NMINORS);
}
diff --git a/drivers/infiniband/hw/hfi1/device.h b/drivers/infiniband/hw/hfi1/device.h
index c3ec19cb0ac9..a91bea426ba5 100644
--- a/drivers/infiniband/hw/hfi1/device.h
+++ b/drivers/infiniband/hw/hfi1/device.h
@@ -1,52 +1,11 @@
-#ifndef _HFI1_DEVICE_H
-#define _HFI1_DEVICE_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#ifndef _HFI1_DEVICE_H
+#define _HFI1_DEVICE_H
+
int hfi1_cdev_init(int minor, const char *name,
const struct file_operations *fops,
struct cdev *cdev, struct device **devp,
diff --git a/drivers/infiniband/hw/hfi1/dma.c b/drivers/infiniband/hw/hfi1/dma.c
deleted file mode 100644
index 7e8dab892848..000000000000
--- a/drivers/infiniband/hw/hfi1/dma.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-#include <linux/types.h>
-#include <linux/scatterlist.h>
-
-#include "verbs.h"
-
-#define BAD_DMA_ADDRESS ((u64)0)
-
-/*
- * The following functions implement driver specific replacements
- * for the ib_dma_*() functions.
- *
- * These functions return kernel virtual addresses instead of
- * device bus addresses since the driver uses the CPU to copy
- * data instead of using hardware DMA.
- */
-
-static int hfi1_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
- return dma_addr == BAD_DMA_ADDRESS;
-}
-
-static u64 hfi1_dma_map_single(struct ib_device *dev, void *cpu_addr,
- size_t size, enum dma_data_direction direction)
-{
- if (WARN_ON(!valid_dma_direction(direction)))
- return BAD_DMA_ADDRESS;
-
- return (u64)cpu_addr;
-}
-
-static void hfi1_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- /* This is a stub, nothing to be done here */
-}
-
-static u64 hfi1_dma_map_page(struct ib_device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction)
-{
- u64 addr;
-
- if (WARN_ON(!valid_dma_direction(direction)))
- return BAD_DMA_ADDRESS;
-
- if (offset + size > PAGE_SIZE)
- return BAD_DMA_ADDRESS;
-
- addr = (u64)page_address(page);
- if (addr)
- addr += offset;
-
- return addr;
-}
-
-static void hfi1_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- /* This is a stub, nothing to be done here */
-}
-
-static int hfi1_map_sg(struct ib_device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction)
-{
- struct scatterlist *sg;
- u64 addr;
- int i;
- int ret = nents;
-
- if (WARN_ON(!valid_dma_direction(direction)))
- return BAD_DMA_ADDRESS;
-
- for_each_sg(sgl, sg, nents, i) {
- addr = (u64)page_address(sg_page(sg));
- if (!addr) {
- ret = 0;
- break;
- }
- sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
- sg->dma_length = sg->length;
-#endif
- }
- return ret;
-}
-
-static void hfi1_unmap_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- /* This is a stub, nothing to be done here */
-}
-
-static void hfi1_sync_single_for_cpu(struct ib_device *dev, u64 addr,
- size_t size, enum dma_data_direction dir)
-{
-}
-
-static void hfi1_sync_single_for_device(struct ib_device *dev, u64 addr,
- size_t size,
- enum dma_data_direction dir)
-{
-}
-
-static void *hfi1_dma_alloc_coherent(struct ib_device *dev, size_t size,
- u64 *dma_handle, gfp_t flag)
-{
- struct page *p;
- void *addr = NULL;
-
- p = alloc_pages(flag, get_order(size));
- if (p)
- addr = page_address(p);
- if (dma_handle)
- *dma_handle = (u64)addr;
- return addr;
-}
-
-static void hfi1_dma_free_coherent(struct ib_device *dev, size_t size,
- void *cpu_addr, u64 dma_handle)
-{
- free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops hfi1_dma_mapping_ops = {
- .mapping_error = hfi1_mapping_error,
- .map_single = hfi1_dma_map_single,
- .unmap_single = hfi1_dma_unmap_single,
- .map_page = hfi1_dma_map_page,
- .unmap_page = hfi1_dma_unmap_page,
- .map_sg = hfi1_map_sg,
- .unmap_sg = hfi1_unmap_sg,
- .sync_single_for_cpu = hfi1_sync_single_for_cpu,
- .sync_single_for_device = hfi1_sync_single_for_device,
- .alloc_coherent = hfi1_dma_alloc_coherent,
- .free_coherent = hfi1_dma_free_coherent
-};
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 4fbaee68012b..06487e20f723 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,48 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015-2020 Intel Corporation.
+ * Copyright(c) 2021 Cornelis Networks.
*/
#include <linux/spinlock.h>
@@ -54,23 +13,22 @@
#include <linux/module.h>
#include <linux/prefetch.h>
#include <rdma/ib_verbs.h>
+#include <linux/etherdevice.h>
#include "hfi.h"
#include "trace.h"
#include "qp.h"
#include "sdma.h"
+#include "debugfs.h"
+#include "vnic.h"
+#include "fault.h"
+
+#include "ipoib.h"
+#include "netdev.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
-/*
- * The size has to be longer than this string, so we can append
- * board/chip information to it in the initialization code.
- */
-const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n";
-
-DEFINE_SPINLOCK(hfi1_devs_lock);
-LIST_HEAD(hfi1_dev_list);
DEFINE_MUTEX(hfi1_mutex); /* general driver use */
unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU;
@@ -83,8 +41,8 @@ module_param_named(cu, hfi1_cu, uint, S_IRUGO);
MODULE_PARM_DESC(cu, "Credit return units");
unsigned long hfi1_cap_mask = HFI1_CAP_MASK_DEFAULT;
-static int hfi1_caps_set(const char *, const struct kernel_param *);
-static int hfi1_caps_get(char *, const struct kernel_param *);
+static int hfi1_caps_set(const char *val, const struct kernel_param *kp);
+static int hfi1_caps_get(char *buffer, const struct kernel_param *kp);
static const struct kernel_param_ops cap_ops = {
.set = hfi1_caps_set,
.get = hfi1_caps_get
@@ -93,13 +51,17 @@ module_param_cb(cap_mask, &cap_ops, &hfi1_cap_mask, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(cap_mask, "Bit mask of enabled/disabled HW features");
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("Intel Omni-Path Architecture driver");
-MODULE_VERSION(HFI1_DRIVER_VERSION);
+MODULE_DESCRIPTION("Cornelis Omni-Path Express driver");
/*
* MAX_PKT_RCV is the max # if packets processed per receive interrupt.
*/
#define MAX_PKT_RECV 64
+/*
+ * MAX_PKT_THREAD_RCV is the max # of packets processed before
+ * the qp_wait_list queue is flushed.
+ */
+#define MAX_PKT_RECV_THREAD (MAX_PKT_RECV * 4)
#define EGR_HEAD_UPDATE_THRESHOLD 16
struct hfi1_ib_stats hfi1_stats;
@@ -150,23 +112,7 @@ static int hfi1_caps_get(char *buffer, const struct kernel_param *kp)
cap_mask &= ~HFI1_CAP_LOCKED_SMASK;
cap_mask |= ((cap_mask & HFI1_CAP_K2U) << HFI1_CAP_USER_SHIFT);
- return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask);
-}
-
-const char *get_unit_name(int unit)
-{
- static char iname[16];
-
- snprintf(iname, sizeof(iname), DRIVER_NAME "_%u", unit);
- return iname;
-}
-
-const char *get_card_name(struct rvt_dev_info *rdi)
-{
- struct hfi1_ibdev *ibdev = container_of(rdi, struct hfi1_ibdev, rdi);
- struct hfi1_devdata *dd = container_of(ibdev,
- struct hfi1_devdata, verbs_dev);
- return get_unit_name(dd->unit);
+ return sysfs_emit(buffer, "0x%lx\n", cap_mask);
}
struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
@@ -184,12 +130,12 @@ int hfi1_count_active_units(void)
{
struct hfi1_devdata *dd;
struct hfi1_pportdata *ppd;
- unsigned long flags;
+ unsigned long index, flags;
int pidx, nunits_active = 0;
- spin_lock_irqsave(&hfi1_devs_lock, flags);
- list_for_each_entry(dd, &hfi1_dev_list, list) {
- if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase)
+ xa_lock_irqsave(&hfi1_dev_table, flags);
+ xa_for_each(&hfi1_dev_table, index, dd) {
+ if (!(dd->flags & HFI1_PRESENT) || !dd->kregbase1)
continue;
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
@@ -199,47 +145,11 @@ int hfi1_count_active_units(void)
}
}
}
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+ xa_unlock_irqrestore(&hfi1_dev_table, flags);
return nunits_active;
}
/*
- * Return count of all units, optionally return in arguments
- * the number of usable (present) units, and the number of
- * ports that are up.
- */
-int hfi1_count_units(int *npresentp, int *nupp)
-{
- int nunits = 0, npresent = 0, nup = 0;
- struct hfi1_devdata *dd;
- unsigned long flags;
- int pidx;
- struct hfi1_pportdata *ppd;
-
- spin_lock_irqsave(&hfi1_devs_lock, flags);
-
- list_for_each_entry(dd, &hfi1_dev_list, list) {
- nunits++;
- if ((dd->flags & HFI1_PRESENT) && dd->kregbase)
- npresent++;
- for (pidx = 0; pidx < dd->num_pports; ++pidx) {
- ppd = dd->pport + pidx;
- if (ppd->lid && ppd->linkup)
- nup++;
- }
- }
-
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
-
- if (npresentp)
- *npresentp = npresent;
- if (nupp)
- *nupp = nup;
-
- return nunits;
-}
-
-/*
* Get address of eager buffer from it's index (allocated in chunks, not
* contiguous).
*/
@@ -253,13 +163,34 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf,
(offset * RCV_BUF_BLOCK_SIZE));
}
+static inline void *hfi1_get_header(struct hfi1_ctxtdata *rcd,
+ __le32 *rhf_addr)
+{
+ u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
+
+ return (void *)(rhf_addr - rcd->rhf_offset + offset);
+}
+
+static inline struct ib_header *hfi1_get_msgheader(struct hfi1_ctxtdata *rcd,
+ __le32 *rhf_addr)
+{
+ return (struct ib_header *)hfi1_get_header(rcd, rhf_addr);
+}
+
+static inline struct hfi1_16b_header
+ *hfi1_get_16B_header(struct hfi1_ctxtdata *rcd,
+ __le32 *rhf_addr)
+{
+ return (struct hfi1_16b_header *)hfi1_get_header(rcd, rhf_addr);
+}
+
/*
* Validate and encode the a given RcvArray Buffer size.
* The function will check whether the given size falls within
* allowed size ranges for the respective type and, optionally,
* return the proper encoding.
*/
-inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
+int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
{
if (unlikely(!PAGE_ALIGNED(size)))
return 0;
@@ -278,45 +209,60 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
{
struct ib_header *rhdr = packet->hdr;
u32 rte = rhf_rcv_type_err(packet->rhf);
- int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
- struct hfi1_ibport *ibp = &ppd->ibport_data;
+ u32 mlid_base;
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct hfi1_devdata *dd = ppd->dd;
- struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+ struct hfi1_ibdev *verbs_dev = &dd->verbs_dev;
+ struct rvt_dev_info *rdi = &verbs_dev->rdi;
- if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
+ if ((packet->rhf & RHF_DC_ERR) &&
+ hfi1_dbg_fault_suppress_err(verbs_dev))
return;
+ if (packet->rhf & RHF_ICRC_ERR)
+ return;
+
+ if (packet->etype == RHF_RCV_TYPE_BYPASS) {
+ goto drop;
+ } else {
+ u8 lnh = ib_get_lnh(rhdr);
+
+ mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE);
+ if (lnh == HFI1_LRH_BTH) {
+ packet->ohdr = &rhdr->u.oth;
+ } else if (lnh == HFI1_LRH_GRH) {
+ packet->ohdr = &rhdr->u.l.oth;
+ packet->grh = &rhdr->u.l.grh;
+ } else {
+ goto drop;
+ }
+ }
+
if (packet->rhf & RHF_TID_ERR) {
/* For TIDERR and RC QPs preemptively schedule a NAK */
- struct ib_other_headers *ohdr = NULL;
u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
- u16 lid = be16_to_cpu(rhdr->lrh[1]);
+ u32 dlid = ib_get_dlid(rhdr);
u32 qp_num;
- u32 rcv_flags = 0;
/* Sanity check packet */
if (tlen < 24)
goto drop;
/* Check for GRH */
- if (lnh == HFI1_LRH_BTH) {
- ohdr = &rhdr->u.oth;
- } else if (lnh == HFI1_LRH_GRH) {
+ if (packet->grh) {
u32 vtf;
+ struct ib_grh *grh = packet->grh;
- ohdr = &rhdr->u.l.oth;
- if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+ if (grh->next_hdr != IB_GRH_NEXT_HDR)
goto drop;
- vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow);
+ vtf = be32_to_cpu(grh->version_tclass_flow);
if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
goto drop;
- rcv_flags |= HFI1_HAS_GRH;
- } else {
- goto drop;
}
+
/* Get the destination QP number. */
- qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
- if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+ qp_num = ib_bth_get_qpn(packet->ohdr);
+ if (dlid < mlid_base) {
struct rvt_qp *qp;
unsigned long flags;
@@ -341,11 +287,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
- hfi1_rc_hdrerr(
- rcd,
- rhdr,
- rcv_flags,
- qp);
+ hfi1_rc_hdrerr(rcd, packet, qp);
break;
default:
/* For now don't handle any other QP types */
@@ -361,9 +303,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
switch (rte) {
case RHF_RTE_ERROR_OP_CODE_ERR:
{
- u32 opcode;
void *ebuf = NULL;
- __be32 *bth = NULL;
+ u8 opcode;
if (rhf_use_egr_bfr(packet->rhf))
ebuf = packet->ebuf;
@@ -371,16 +312,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
if (!ebuf)
goto drop; /* this should never happen */
- if (lnh == HFI1_LRH_BTH)
- bth = (__be32 *)ebuf;
- else if (lnh == HFI1_LRH_GRH)
- bth = (__be32 *)((char *)ebuf + sizeof(struct ib_grh));
- else
- goto drop;
-
- opcode = be32_to_cpu(bth[0]) >> 24;
- opcode &= 0xff;
-
+ opcode = ib_bth_get_opcode(packet->ohdr);
if (opcode == IB_OPCODE_CNP) {
/*
* Only in pre-B0 h/w is the CNP_OPCODE handled
@@ -391,10 +323,10 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
u16 rlid;
u8 svc_type, sl, sc5;
- sc5 = hdr2sc(rhdr, packet->rhf);
+ sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
- lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
+ lqpn = ib_bth_get_qpn(packet->ohdr);
rcu_read_lock();
qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn);
if (!qp) {
@@ -409,11 +341,12 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
svc_type = IB_CC_SVCTYPE_UD;
break;
case IB_QPT_UC:
- rlid = be16_to_cpu(rhdr->lrh[3]);
+ rlid = ib_get_slid(rhdr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
default:
+ rcu_read_unlock();
goto drop;
}
@@ -435,74 +368,125 @@ drop:
static inline void init_packet(struct hfi1_ctxtdata *rcd,
struct hfi1_packet *packet)
{
- packet->rsize = rcd->rcvhdrqentsize; /* words */
- packet->maxcnt = rcd->rcvhdrq_cnt * packet->rsize; /* words */
+ packet->rsize = get_hdrqentsize(rcd); /* words */
+ packet->maxcnt = get_hdrq_cnt(rcd) * packet->rsize; /* words */
packet->rcd = rcd;
packet->updegr = 0;
packet->etail = -1;
packet->rhf_addr = get_rhf_addr(rcd);
packet->rhf = rhf_to_cpu(packet->rhf_addr);
- packet->rhqoff = rcd->head;
+ packet->rhqoff = hfi1_rcd_head(rcd);
packet->numpkt = 0;
- packet->rcv_flags = 0;
}
-void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
- bool do_cnp)
+/* We support only two types - 9B and 16B for now */
+static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = {
+ [HFI1_PKT_TYPE_9B] = &return_cnp,
+ [HFI1_PKT_TYPE_16B] = &return_cnp_16B
+};
+
+/**
+ * hfi1_process_ecn_slowpath - Process FECN or BECN bits
+ * @qp: The packet's destination QP
+ * @pkt: The packet itself.
+ * @prescan: Is the caller the RXQ prescan
+ *
+ * Process the packet's FECN or BECN bits. By now, the packet
+ * has already been evaluated whether processing of those bit should
+ * be done.
+ * The significance of the @prescan argument is that if the caller
+ * is the RXQ prescan, a CNP will be send out instead of waiting for the
+ * normal packet processing to send an ACK with BECN set (or a CNP).
+ */
+bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
+ bool prescan)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- struct ib_header *hdr = pkt->hdr;
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_other_headers *ohdr = pkt->ohdr;
- struct ib_grh *grh = NULL;
- u32 rqpn = 0, bth1;
- u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]);
- u8 sc, svc_type;
- bool is_mcast = false;
-
- if (pkt->rcv_flags & HFI1_HAS_GRH)
- grh = &hdr->u.l.grh;
+ struct ib_grh *grh = pkt->grh;
+ u32 rqpn = 0;
+ u16 pkey;
+ u32 rlid, slid, dlid = 0;
+ u8 hdr_type, sc, svc_type, opcode;
+ bool is_mcast = false, ignore_fecn = false, do_cnp = false,
+ fecn, becn;
+
+ /* can be called from prescan */
+ if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
+ pkey = hfi1_16B_get_pkey(pkt->hdr);
+ sc = hfi1_16B_get_sc(pkt->hdr);
+ dlid = hfi1_16B_get_dlid(pkt->hdr);
+ slid = hfi1_16B_get_slid(pkt->hdr);
+ is_mcast = hfi1_is_16B_mcast(dlid);
+ opcode = ib_bth_get_opcode(ohdr);
+ hdr_type = HFI1_PKT_TYPE_16B;
+ fecn = hfi1_16B_get_fecn(pkt->hdr);
+ becn = hfi1_16B_get_becn(pkt->hdr);
+ } else {
+ pkey = ib_bth_get_pkey(ohdr);
+ sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf);
+ dlid = qp->ibqp.qp_type != IB_QPT_UD ? ib_get_dlid(pkt->hdr) :
+ ppd->lid;
+ slid = ib_get_slid(pkt->hdr);
+ is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+ (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
+ opcode = ib_bth_get_opcode(ohdr);
+ hdr_type = HFI1_PKT_TYPE_9B;
+ fecn = ib_bth_get_fecn(ohdr);
+ becn = ib_bth_get_becn(ohdr);
+ }
switch (qp->ibqp.qp_type) {
+ case IB_QPT_UD:
+ rlid = slid;
+ rqpn = ib_get_sqpn(pkt->ohdr);
+ svc_type = IB_CC_SVCTYPE_UD;
+ break;
case IB_QPT_SMI:
case IB_QPT_GSI:
- case IB_QPT_UD:
- rlid = be16_to_cpu(hdr->lrh[3]);
- rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
+ rlid = slid;
+ rqpn = ib_get_sqpn(pkt->ohdr);
svc_type = IB_CC_SVCTYPE_UD;
- is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
- (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
break;
case IB_QPT_UC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
case IB_QPT_RC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_RC;
break;
default:
- return;
- }
-
- sc = hdr2sc(hdr, pkt->rhf);
-
- bth1 = be32_to_cpu(ohdr->bth[1]);
- if (do_cnp && (bth1 & HFI1_FECN_SMASK)) {
- u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
-
- return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh);
+ return false;
}
- if (!is_mcast && (bth1 & HFI1_BECN_SMASK)) {
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- u32 lqpn = bth1 & RVT_QPN_MASK;
+ ignore_fecn = is_mcast || (opcode == IB_OPCODE_CNP) ||
+ (opcode == IB_OPCODE_RC_ACKNOWLEDGE);
+ /*
+ * ACKNOWLEDGE packets do not get a CNP but this will be
+ * guarded by ignore_fecn above.
+ */
+ do_cnp = prescan ||
+ (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST &&
+ opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE) ||
+ opcode == TID_OP(READ_RESP) ||
+ opcode == TID_OP(ACK);
+
+ /* Call appropriate CNP handler */
+ if (!ignore_fecn && do_cnp && fecn)
+ hfi1_handle_cnp_tbl[hdr_type](ibp, qp, rqpn, pkey,
+ dlid, rlid, sc, grh);
+
+ if (becn) {
+ u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
u8 sl = ibp->sc_to_sl[sc];
process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
}
-
+ return !ignore_fecn && fecn;
}
struct ps_mdata {
@@ -524,22 +508,22 @@ static inline void init_ps_mdata(struct ps_mdata *mdata,
mdata->maxcnt = packet->maxcnt;
mdata->ps_head = packet->rhqoff;
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
+ if (get_dma_rtail_setting(rcd)) {
mdata->ps_tail = get_rcvhdrtail(rcd);
if (rcd->ctxt == HFI1_CTRL_CTXT)
- mdata->ps_seq = rcd->seq_cnt;
+ mdata->ps_seq = hfi1_seq_cnt(rcd);
else
mdata->ps_seq = 0; /* not used with DMA_RTAIL */
} else {
mdata->ps_tail = 0; /* used only with DMA_RTAIL*/
- mdata->ps_seq = rcd->seq_cnt;
+ mdata->ps_seq = hfi1_seq_cnt(rcd);
}
}
static inline int ps_done(struct ps_mdata *mdata, u64 rhf,
struct hfi1_ctxtdata *rcd)
{
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
+ if (get_dma_rtail_setting(rcd))
return mdata->ps_head == mdata->ps_tail;
return mdata->ps_seq != rhf_rcv_seq(rhf);
}
@@ -565,11 +549,9 @@ static inline void update_ps_mdata(struct ps_mdata *mdata,
mdata->ps_head = 0;
/* Control context must do seq counting */
- if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
- (rcd->ctxt == HFI1_CTRL_CTXT)) {
- if (++mdata->ps_seq > 13)
- mdata->ps_seq = 1;
- }
+ if (!get_dma_rtail_setting(rcd) ||
+ rcd->ctxt == HFI1_CTRL_CTXT)
+ mdata->ps_seq = hfi1_seq_incr_wrap(mdata->ps_seq);
}
/*
@@ -593,16 +575,14 @@ static void __prescan_rxq(struct hfi1_packet *packet)
init_ps_mdata(&mdata, packet);
while (1) {
- struct hfi1_devdata *dd = rcd->dd;
- struct hfi1_ibport *ibp = &rcd->ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
- dd->rhf_offset;
+ packet->rcd->rhf_offset;
struct rvt_qp *qp;
struct ib_header *hdr;
- struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+ struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi;
u64 rhf = rhf_to_cpu(rhf_addr);
u32 etype = rhf_rcv_type(rhf), qpn, bth1;
- int is_ecn = 0;
u8 lnh;
if (ps_done(&mdata, rhf, rcd))
@@ -614,26 +594,24 @@ static void __prescan_rxq(struct hfi1_packet *packet)
if (etype != RHF_RCV_TYPE_IB)
goto next;
- packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
+ packet->hdr = hfi1_get_msgheader(packet->rcd, rhf_addr);
hdr = packet->hdr;
-
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ lnh = ib_get_lnh(hdr);
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
+ packet->grh = NULL;
} else if (lnh == HFI1_LRH_GRH) {
packet->ohdr = &hdr->u.l.oth;
- packet->rcv_flags |= HFI1_HAS_GRH;
+ packet->grh = &hdr->u.l.grh;
} else {
goto next; /* just in case */
}
- bth1 = be32_to_cpu(packet->ohdr->bth[1]);
- is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
-
- if (!is_ecn)
+ if (!hfi1_may_ecn(packet))
goto next;
+ bth1 = be32_to_cpu(packet->ohdr->bth[1]);
qpn = bth1 & RVT_QPN_MASK;
rcu_read_lock();
qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn);
@@ -643,51 +621,129 @@ static void __prescan_rxq(struct hfi1_packet *packet)
goto next;
}
- process_ecn(qp, packet, true);
+ hfi1_process_ecn_slowpath(qp, packet, true);
rcu_read_unlock();
/* turn off BECN, FECN */
- bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK);
+ bth1 &= ~(IB_FECN_SMASK | IB_BECN_SMASK);
packet->ohdr->bth[1] = cpu_to_be32(bth1);
next:
update_ps_mdata(&mdata, rcd);
}
}
-static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
+static void process_rcv_qp_work(struct hfi1_packet *packet)
+{
+ struct rvt_qp *qp, *nqp;
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+
+ /*
+ * Iterate over all QPs waiting to respond.
+ * The list won't change since the IRQ is only run on one CPU.
+ */
+ list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
+ list_del_init(&qp->rspwait);
+ if (qp->r_flags & RVT_R_RSP_NAK) {
+ qp->r_flags &= ~RVT_R_RSP_NAK;
+ packet->qp = qp;
+ hfi1_send_rc_ack(packet, 0);
+ }
+ if (qp->r_flags & RVT_R_RSP_SEND) {
+ unsigned long flags;
+
+ qp->r_flags &= ~RVT_R_RSP_SEND;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_rvt_state_ops[qp->state] &
+ RVT_PROCESS_OR_FLUSH_SEND)
+ hfi1_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ }
+ rvt_put_qp(qp);
+ }
+}
+
+static noinline int max_packet_exceeded(struct hfi1_packet *packet, int thread)
+{
+ if (thread) {
+ if ((packet->numpkt & (MAX_PKT_RECV_THREAD - 1)) == 0)
+ /* allow defered processing */
+ process_rcv_qp_work(packet);
+ cond_resched();
+ return RCV_PKT_OK;
+ } else {
+ this_cpu_inc(*packet->rcd->dd->rcv_limit);
+ return RCV_PKT_LIMIT;
+ }
+}
+
+static inline int check_max_packet(struct hfi1_packet *packet, int thread)
{
int ret = RCV_PKT_OK;
+ if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0))
+ ret = max_packet_exceeded(packet, thread);
+ return ret;
+}
+
+static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
+{
+ int ret;
+
+ packet->rcd->dd->ctx0_seq_drop++;
/* Set up for the next packet */
packet->rhqoff += packet->rsize;
if (packet->rhqoff >= packet->maxcnt)
packet->rhqoff = 0;
packet->numpkt++;
- if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) {
- if (thread) {
- cond_resched();
- } else {
- ret = RCV_PKT_LIMIT;
- this_cpu_inc(*packet->rcd->dd->rcv_limit);
- }
- }
+ ret = check_max_packet(packet, thread);
packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
- packet->rcd->dd->rhf_offset;
+ packet->rcd->rhf_offset;
packet->rhf = rhf_to_cpu(packet->rhf_addr);
return ret;
}
+static void process_rcv_packet_napi(struct hfi1_packet *packet)
+{
+ packet->etype = rhf_rcv_type(packet->rhf);
+
+ /* total length */
+ packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */
+ /* retrieve eager buffer details */
+ packet->etail = rhf_egr_index(packet->rhf);
+ packet->ebuf = get_egrbuf(packet->rcd, packet->rhf,
+ &packet->updegr);
+ /*
+ * Prefetch the contents of the eager buffer. It is
+ * OK to send a negative length to prefetch_range().
+ * The +2 is the size of the RHF.
+ */
+ prefetch_range(packet->ebuf,
+ packet->tlen - ((packet->rcd->rcvhdrqentsize -
+ (rhf_hdrq_offset(packet->rhf)
+ + 2)) * 4));
+
+ packet->rcd->rhf_rcv_function_map[packet->etype](packet);
+ packet->numpkt++;
+
+ /* Set up for the next packet */
+ packet->rhqoff += packet->rsize;
+ if (packet->rhqoff >= packet->maxcnt)
+ packet->rhqoff = 0;
+
+ packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
+ packet->rcd->rhf_offset;
+ packet->rhf = rhf_to_cpu(packet->rhf_addr);
+}
+
static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
{
- int ret = RCV_PKT_OK;
+ int ret;
- packet->hdr = hfi1_get_msgheader(packet->rcd->dd,
- packet->rhf_addr);
- packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
packet->etype = rhf_rcv_type(packet->rhf);
+
/* total length */
packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */
/* retrieve eager buffer details */
@@ -702,7 +758,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
* The +2 is the size of the RHF.
*/
prefetch_range(packet->ebuf,
- packet->tlen - ((packet->rcd->rcvhdrqentsize -
+ packet->tlen - ((get_hdrqentsize(packet->rcd) -
(rhf_hdrq_offset(packet->rhf)
+ 2)) * 4));
}
@@ -715,7 +771,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
* crashing down. There is no need to eat another
* comparison in this performance critical code.
*/
- packet->rcd->dd->rhf_rcv_function_map[packet->etype](packet);
+ packet->rcd->rhf_rcv_function_map[packet->etype](packet);
packet->numpkt++;
/* Set up for the next packet */
@@ -723,17 +779,10 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
if (packet->rhqoff >= packet->maxcnt)
packet->rhqoff = 0;
- if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) {
- if (thread) {
- cond_resched();
- } else {
- ret = RCV_PKT_LIMIT;
- this_cpu_inc(*packet->rcd->dd->rcv_limit);
- }
- }
+ ret = check_max_packet(packet, thread);
packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
- packet->rcd->dd->rhf_offset;
+ packet->rcd->rhf_offset;
packet->rhf = rhf_to_cpu(packet->rhf_addr);
return ret;
@@ -752,7 +801,7 @@ static inline void process_rcv_update(int last, struct hfi1_packet *packet)
packet->etail, 0, 0);
packet->updegr = 0;
}
- packet->rcv_flags = 0;
+ packet->grh = NULL;
}
static inline void finish_packet(struct hfi1_packet *packet)
@@ -763,40 +812,38 @@ static inline void finish_packet(struct hfi1_packet *packet)
* The only thing we need to do is a final update and call for an
* interrupt
*/
- update_usrhead(packet->rcd, packet->rcd->head, packet->updegr,
+ update_usrhead(packet->rcd, hfi1_rcd_head(packet->rcd), packet->updegr,
packet->etail, rcv_intr_dynamic, packet->numpkt);
}
-static inline void process_rcv_qp_work(struct hfi1_packet *packet)
+/*
+ * handle_receive_interrupt_napi_fp - receive a packet
+ * @rcd: the context
+ * @budget: polling budget
+ *
+ * Called from interrupt handler for receive interrupt.
+ * This is the fast path interrupt handler
+ * when executing napi soft irq environment.
+ */
+int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget)
{
- struct hfi1_ctxtdata *rcd;
- struct rvt_qp *qp, *nqp;
+ struct hfi1_packet packet;
- rcd = packet->rcd;
- rcd->head = packet->rhqoff;
+ init_packet(rcd, &packet);
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
+ goto bail;
- /*
- * Iterate over all QPs waiting to respond.
- * The list won't change since the IRQ is only run on one CPU.
- */
- list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
- list_del_init(&qp->rspwait);
- if (qp->r_flags & RVT_R_RSP_NAK) {
- qp->r_flags &= ~RVT_R_RSP_NAK;
- hfi1_send_rc_ack(rcd, qp, 0);
- }
- if (qp->r_flags & RVT_R_RSP_SEND) {
- unsigned long flags;
+ while (packet.numpkt < budget) {
+ process_rcv_packet_napi(&packet);
+ if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
+ break;
- qp->r_flags &= ~RVT_R_RSP_SEND;
- spin_lock_irqsave(&qp->s_lock, flags);
- if (ib_rvt_state_ops[qp->state] &
- RVT_PROCESS_OR_FLUSH_SEND)
- hfi1_schedule_send(qp);
- spin_unlock_irqrestore(&qp->s_lock, flags);
- }
- rvt_put_qp(qp);
+ process_rcv_update(0, &packet);
}
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
+bail:
+ finish_packet(&packet);
+ return packet.numpkt;
}
/*
@@ -804,13 +851,11 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet)
*/
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
{
- u32 seq;
int last = RCV_PKT_OK;
struct hfi1_packet packet;
init_packet(rcd, &packet);
- seq = rhf_rcv_seq(packet.rhf);
- if (seq != rcd->seq_cnt) {
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) {
last = RCV_PKT_DONE;
goto bail;
}
@@ -819,14 +864,12 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
while (last == RCV_PKT_OK) {
last = process_rcv_packet(&packet, thread);
- seq = rhf_rcv_seq(packet.rhf);
- if (++rcd->seq_cnt > 13)
- rcd->seq_cnt = 1;
- if (seq != rcd->seq_cnt)
+ if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
last = RCV_PKT_DONE;
process_rcv_update(last, &packet);
}
process_rcv_qp_work(&packet);
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
bail:
finish_packet(&packet);
return last;
@@ -855,59 +898,97 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
process_rcv_update(last, &packet);
}
process_rcv_qp_work(&packet);
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
bail:
finish_packet(&packet);
return last;
}
-static inline void set_all_nodma_rtail(struct hfi1_devdata *dd)
+static void set_all_fastpath(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
- int i;
+ u16 i;
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
- dd->rcd[i]->do_interrupt =
- &handle_receive_interrupt_nodma_rtail;
-}
-
-static inline void set_all_dma_rtail(struct hfi1_devdata *dd)
-{
- int i;
+ /*
+ * For dynamically allocated kernel contexts (like vnic) switch
+ * interrupt handler only for that context. Otherwise, switch
+ * interrupt handler for all statically allocated kernel contexts.
+ */
+ if (rcd->ctxt >= dd->first_dyn_alloc_ctxt && !rcd->is_vnic) {
+ hfi1_rcd_get(rcd);
+ hfi1_set_fast(rcd);
+ hfi1_rcd_put(rcd);
+ return;
+ }
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
- dd->rcd[i]->do_interrupt =
- &handle_receive_interrupt_dma_rtail;
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+ if (rcd && (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic))
+ hfi1_set_fast(rcd);
+ hfi1_rcd_put(rcd);
+ }
}
void set_all_slowpath(struct hfi1_devdata *dd)
{
- int i;
+ struct hfi1_ctxtdata *rcd;
+ u16 i;
/* HFI1_CTRL_CTXT must always use the slow path interrupt handler */
- for (i = HFI1_CTRL_CTXT + 1; i < dd->first_user_ctxt; i++)
- dd->rcd[i]->do_interrupt = &handle_receive_interrupt;
+ for (i = HFI1_CTRL_CTXT + 1; i < dd->num_rcv_contexts; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+ if (!rcd)
+ continue;
+ if (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
+ rcd->do_interrupt = rcd->slow_handler;
+
+ hfi1_rcd_put(rcd);
+ }
}
-static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
- struct hfi1_packet *packet,
- struct hfi1_devdata *dd)
+static bool __set_armed_to_active(struct hfi1_packet *packet)
{
- struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
- struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
- packet->rhf_addr);
u8 etype = rhf_rcv_type(packet->rhf);
-
- if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) {
- int hwstate = read_logical_state(dd);
-
- if (hwstate != LSTATE_ACTIVE) {
- dd_dev_info(dd, "Unexpected link state %d\n", hwstate);
- return 0;
+ u8 sc = SC15_PACKET;
+
+ if (etype == RHF_RCV_TYPE_IB) {
+ struct ib_header *hdr = hfi1_get_msgheader(packet->rcd,
+ packet->rhf_addr);
+ sc = hfi1_9B_get_sc5(hdr, packet->rhf);
+ } else if (etype == RHF_RCV_TYPE_BYPASS) {
+ struct hfi1_16b_header *hdr = hfi1_get_16B_header(
+ packet->rcd,
+ packet->rhf_addr);
+ sc = hfi1_16B_get_sc(hdr);
+ }
+ if (sc != SC15_PACKET) {
+ int hwstate = driver_lstate(packet->rcd->ppd);
+ struct work_struct *lsaw =
+ &packet->rcd->ppd->linkstate_active_work;
+
+ if (hwstate != IB_PORT_ACTIVE) {
+ dd_dev_info(packet->rcd->dd,
+ "Unexpected link state %s\n",
+ ib_port_state_to_str(hwstate));
+ return false;
}
- queue_work(rcd->ppd->hfi1_wq, lsaw);
- return 1;
+ queue_work(packet->rcd->ppd->link_wq, lsaw);
+ return true;
}
- return 0;
+ return false;
+}
+
+/**
+ * set_armed_to_active - the fast path for armed to active
+ * @packet: the packet structure
+ *
+ * Return true if packet processing needs to bail.
+ */
+static bool set_armed_to_active(struct hfi1_packet *packet)
+{
+ if (likely(packet->rcd->ppd->host_link_state != HLS_UP_ARMED))
+ return false;
+ return __set_armed_to_active(packet);
}
/*
@@ -925,15 +1006,15 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
struct hfi1_packet packet;
int skip_pkt = 0;
+ if (!rcd->rcvhdrq)
+ return RCV_PKT_OK;
/* Control context will always use the slow path interrupt handler */
needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1;
init_packet(rcd, &packet);
- if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
- u32 seq = rhf_rcv_seq(packet.rhf);
-
- if (seq != rcd->seq_cnt) {
+ if (!get_dma_rtail_setting(rcd)) {
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf))) {
last = RCV_PKT_DONE;
goto bail;
}
@@ -950,53 +1031,34 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
* Control context can potentially receive an invalid
* rhf. Drop such packets.
*/
- if (rcd->ctxt == HFI1_CTRL_CTXT) {
- u32 seq = rhf_rcv_seq(packet.rhf);
-
- if (seq != rcd->seq_cnt)
+ if (rcd->ctxt == HFI1_CTRL_CTXT)
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
skip_pkt = 1;
- }
}
prescan_rxq(rcd, &packet);
while (last == RCV_PKT_OK) {
- if (unlikely(dd->do_drop &&
- atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
- DROP_PACKET_ON)) {
- dd->do_drop = 0;
-
+ if (hfi1_need_drop(dd)) {
/* On to the next packet */
packet.rhqoff += packet.rsize;
packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
packet.rhqoff +
- dd->rhf_offset;
+ rcd->rhf_offset;
packet.rhf = rhf_to_cpu(packet.rhf_addr);
} else if (skip_pkt) {
last = skip_rcv_packet(&packet, thread);
skip_pkt = 0;
} else {
- /* Auto activate link on non-SC15 packet receive */
- if (unlikely(rcd->ppd->host_link_state ==
- HLS_UP_ARMED) &&
- set_armed_to_active(rcd, &packet, dd))
+ if (set_armed_to_active(&packet))
goto bail;
last = process_rcv_packet(&packet, thread);
}
- if (!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
- u32 seq = rhf_rcv_seq(packet.rhf);
-
- if (++rcd->seq_cnt > 13)
- rcd->seq_cnt = 1;
- if (seq != rcd->seq_cnt)
+ if (!get_dma_rtail_setting(rcd)) {
+ if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
last = RCV_PKT_DONE;
- if (needset) {
- dd_dev_info(dd, "Switching to NO_DMA_RTAIL\n");
- set_all_nodma_rtail(dd);
- needset = 0;
- }
} else {
if (packet.rhqoff == hdrqtail)
last = RCV_PKT_DONE;
@@ -1005,26 +1067,24 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
* rhf. Drop such packets.
*/
if (rcd->ctxt == HFI1_CTRL_CTXT) {
- u32 seq = rhf_rcv_seq(packet.rhf);
+ bool lseq;
- if (++rcd->seq_cnt > 13)
- rcd->seq_cnt = 1;
- if (!last && (seq != rcd->seq_cnt))
+ lseq = hfi1_seq_incr(rcd,
+ rhf_rcv_seq(packet.rhf));
+ if (!last && lseq)
skip_pkt = 1;
}
-
- if (needset) {
- dd_dev_info(dd,
- "Switching to DMA_RTAIL\n");
- set_all_dma_rtail(dd);
- needset = 0;
- }
}
+ if (needset) {
+ needset = false;
+ set_all_fastpath(dd, rcd);
+ }
process_rcv_update(last, &packet);
}
process_rcv_qp_work(&packet);
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
bail:
/*
@@ -1036,6 +1096,63 @@ bail:
}
/*
+ * handle_receive_interrupt_napi_sp - receive a packet
+ * @rcd: the context
+ * @budget: polling budget
+ *
+ * Called from interrupt handler for errors or receive interrupt.
+ * This is the slow path interrupt handler
+ * when executing napi soft irq environment.
+ */
+int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ int last = RCV_PKT_OK;
+ bool needset = true;
+ struct hfi1_packet packet;
+
+ init_packet(rcd, &packet);
+ if (last_rcv_seq(rcd, rhf_rcv_seq(packet.rhf)))
+ goto bail;
+
+ while (last != RCV_PKT_DONE && packet.numpkt < budget) {
+ if (hfi1_need_drop(dd)) {
+ /* On to the next packet */
+ packet.rhqoff += packet.rsize;
+ packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
+ packet.rhqoff +
+ rcd->rhf_offset;
+ packet.rhf = rhf_to_cpu(packet.rhf_addr);
+
+ } else {
+ if (set_armed_to_active(&packet))
+ goto bail;
+ process_rcv_packet_napi(&packet);
+ }
+
+ if (hfi1_seq_incr(rcd, rhf_rcv_seq(packet.rhf)))
+ last = RCV_PKT_DONE;
+
+ if (needset) {
+ needset = false;
+ set_all_fastpath(dd, rcd);
+ }
+
+ process_rcv_update(last, &packet);
+ }
+
+ hfi1_set_rcd_head(rcd, packet.rhqoff);
+
+bail:
+ /*
+ * Always write head at end, and setup rcv interrupt, even
+ * if no packets were processed.
+ */
+ finish_packet(&packet);
+ return packet.numpkt;
+}
+
+/*
* We may discover in the interrupt that the hardware link state has
* changed from ARMED to ACTIVE (due to the arrival of a non-SC15 packet),
* and we need to update the driver's notion of the link state. We cannot
@@ -1057,18 +1174,23 @@ void receive_interrupt_work(struct work_struct *work)
struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
linkstate_active_work);
struct hfi1_devdata *dd = ppd->dd;
- int i;
+ struct hfi1_ctxtdata *rcd;
+ u16 i;
/* Received non-SC15 packet implies neighbor_normal */
ppd->neighbor_normal = 1;
set_link_state(ppd, HLS_UP_ACTIVE);
/*
- * Interrupt all kernel contexts that could have had an
- * interrupt during auto activation.
+ * Interrupt all statically allocated kernel contexts that could
+ * have had an interrupt during auto activation.
*/
- for (i = HFI1_CTRL_CTXT; i < dd->first_user_ctxt; i++)
- force_recv_intr(dd->rcd[i]);
+ for (i = HFI1_CTRL_CTXT; i < dd->first_dyn_alloc_ctxt; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+ if (rcd)
+ force_recv_intr(rcd);
+ hfi1_rcd_put(rcd);
+ }
}
/*
@@ -1181,7 +1303,7 @@ void shutdown_led_override(struct hfi1_pportdata *ppd)
*/
smp_rmb();
if (atomic_read(&ppd->led_override_timer_active)) {
- del_timer_sync(&ppd->led_override_timer);
+ timer_delete_sync(&ppd->led_override_timer);
atomic_set(&ppd->led_override_timer_active, 0);
/* Ensure the atomic_set is visible to all CPUs */
smp_wmb();
@@ -1191,9 +1313,10 @@ void shutdown_led_override(struct hfi1_pportdata *ppd)
write_csr(dd, DCC_CFG_LED_CNTRL, 0);
}
-static void run_led_override(unsigned long opaque)
+static void run_led_override(struct timer_list *t)
{
- struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)opaque;
+ struct hfi1_pportdata *ppd = timer_container_of(ppd, t,
+ led_override_timer);
struct hfi1_devdata *dd = ppd->dd;
unsigned long timeout;
int phase_idx;
@@ -1237,8 +1360,7 @@ void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
* timeout so the handler will be called soon to look at our request.
*/
if (!timer_pending(&ppd->led_override_timer)) {
- setup_timer(&ppd->led_override_timer, run_led_override,
- (unsigned long)ppd);
+ timer_setup(&ppd->led_override_timer, run_led_override, 0);
ppd->led_override_timer.expires = jiffies + 1;
add_timer(&ppd->led_override_timer);
atomic_set(&ppd->led_override_timer_active, 1);
@@ -1258,10 +1380,9 @@ void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
*/
int hfi1_reset_device(int unit)
{
- int ret, i;
+ int ret;
struct hfi1_devdata *dd = hfi1_lookup(unit);
struct hfi1_pportdata *ppd;
- unsigned long flags;
int pidx;
if (!dd) {
@@ -1271,7 +1392,7 @@ int hfi1_reset_device(int unit)
dd_dev_info(dd, "Reset on unit %u requested\n", unit);
- if (!dd->kregbase || !(dd->flags & HFI1_PRESENT)) {
+ if (!dd->kregbase1 || !(dd->flags & HFI1_PRESENT)) {
dd_dev_info(dd,
"Invalid unit number %u or not initialized or not present\n",
unit);
@@ -1279,16 +1400,15 @@ int hfi1_reset_device(int unit)
goto bail;
}
- spin_lock_irqsave(&dd->uctxt_lock, flags);
+ /* If there are any user/vnic contexts, we cannot reset */
+ mutex_lock(&hfi1_mutex);
if (dd->rcd)
- for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) {
- if (!dd->rcd[i] || !dd->rcd[i]->cnt)
- continue;
- spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+ if (hfi1_stats.sps_ctxts) {
+ mutex_unlock(&hfi1_mutex);
ret = -EBUSY;
goto bail;
}
- spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+ mutex_unlock(&hfi1_mutex);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
@@ -1314,109 +1434,473 @@ bail:
return ret;
}
-void handle_eflags(struct hfi1_packet *packet)
+static inline void hfi1_setup_ib_header(struct hfi1_packet *packet)
+{
+ packet->hdr = (struct hfi1_ib_message_header *)
+ hfi1_get_msgheader(packet->rcd,
+ packet->rhf_addr);
+ packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
+}
+
+static int hfi1_bypass_ingress_pkt_check(struct hfi1_packet *packet)
+{
+ struct hfi1_pportdata *ppd = packet->rcd->ppd;
+
+ /* slid and dlid cannot be 0 */
+ if ((!packet->slid) || (!packet->dlid))
+ return -EINVAL;
+
+ /* Compare port lid with incoming packet dlid */
+ if ((!(hfi1_is_16B_mcast(packet->dlid))) &&
+ (packet->dlid !=
+ opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))) {
+ if ((packet->dlid & ~((1 << ppd->lmc) - 1)) != ppd->lid)
+ return -EINVAL;
+ }
+
+ /* No multicast packets with SC15 */
+ if ((hfi1_is_16B_mcast(packet->dlid)) && (packet->sc == 0xF))
+ return -EINVAL;
+
+ /* Packets with permissive DLID always on SC15 */
+ if ((packet->dlid == opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE),
+ 16B)) &&
+ (packet->sc != 0xF))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int hfi1_setup_9B_packet(struct hfi1_packet *packet)
+{
+ struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
+ struct ib_header *hdr;
+ u8 lnh;
+
+ hfi1_setup_ib_header(packet);
+ hdr = packet->hdr;
+
+ lnh = ib_get_lnh(hdr);
+ if (lnh == HFI1_LRH_BTH) {
+ packet->ohdr = &hdr->u.oth;
+ packet->grh = NULL;
+ } else if (lnh == HFI1_LRH_GRH) {
+ u32 vtf;
+
+ packet->ohdr = &hdr->u.l.oth;
+ packet->grh = &hdr->u.l.grh;
+ if (packet->grh->next_hdr != IB_GRH_NEXT_HDR)
+ goto drop;
+ vtf = be32_to_cpu(packet->grh->version_tclass_flow);
+ if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
+ goto drop;
+ } else {
+ goto drop;
+ }
+
+ /* Query commonly used fields from packet header */
+ packet->payload = packet->ebuf;
+ packet->opcode = ib_bth_get_opcode(packet->ohdr);
+ packet->slid = ib_get_slid(hdr);
+ packet->dlid = ib_get_dlid(hdr);
+ if (unlikely((packet->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+ (packet->dlid != be16_to_cpu(IB_LID_PERMISSIVE))))
+ packet->dlid += opa_get_mcast_base(OPA_MCAST_NR) -
+ be16_to_cpu(IB_MULTICAST_LID_BASE);
+ packet->sl = ib_get_sl(hdr);
+ packet->sc = hfi1_9B_get_sc5(hdr, packet->rhf);
+ packet->pad = ib_bth_get_pad(packet->ohdr);
+ packet->extra_byte = 0;
+ packet->pkey = ib_bth_get_pkey(packet->ohdr);
+ packet->migrated = ib_bth_is_migration(packet->ohdr);
+
+ return 0;
+drop:
+ ibp->rvp.n_pkt_drops++;
+ return -EINVAL;
+}
+
+static int hfi1_setup_bypass_packet(struct hfi1_packet *packet)
+{
+ /*
+ * Bypass packets have a different header/payload split
+ * compared to an IB packet.
+ * Current split is set such that 16 bytes of the actual
+ * header is in the header buffer and the remining is in
+ * the eager buffer. We chose 16 since hfi1 driver only
+ * supports 16B bypass packets and we will be able to
+ * receive the entire LRH with such a split.
+ */
+
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ struct hfi1_pportdata *ppd = rcd->ppd;
+ struct hfi1_ibport *ibp = &ppd->ibport_data;
+ u8 l4;
+
+ packet->hdr = (struct hfi1_16b_header *)
+ hfi1_get_16B_header(packet->rcd,
+ packet->rhf_addr);
+ l4 = hfi1_16B_get_l4(packet->hdr);
+ if (l4 == OPA_16B_L4_IB_LOCAL) {
+ packet->ohdr = packet->ebuf;
+ packet->grh = NULL;
+ packet->opcode = ib_bth_get_opcode(packet->ohdr);
+ packet->pad = hfi1_16B_bth_get_pad(packet->ohdr);
+ /* hdr_len_by_opcode already has an IB LRH factored in */
+ packet->hlen = hdr_len_by_opcode[packet->opcode] +
+ (LRH_16B_BYTES - LRH_9B_BYTES);
+ packet->migrated = opa_bth_is_migration(packet->ohdr);
+ } else if (l4 == OPA_16B_L4_IB_GLOBAL) {
+ u32 vtf;
+ u8 grh_len = sizeof(struct ib_grh);
+
+ packet->ohdr = packet->ebuf + grh_len;
+ packet->grh = packet->ebuf;
+ packet->opcode = ib_bth_get_opcode(packet->ohdr);
+ packet->pad = hfi1_16B_bth_get_pad(packet->ohdr);
+ /* hdr_len_by_opcode already has an IB LRH factored in */
+ packet->hlen = hdr_len_by_opcode[packet->opcode] +
+ (LRH_16B_BYTES - LRH_9B_BYTES) + grh_len;
+ packet->migrated = opa_bth_is_migration(packet->ohdr);
+
+ if (packet->grh->next_hdr != IB_GRH_NEXT_HDR)
+ goto drop;
+ vtf = be32_to_cpu(packet->grh->version_tclass_flow);
+ if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
+ goto drop;
+ } else if (l4 == OPA_16B_L4_FM) {
+ packet->mgmt = packet->ebuf;
+ packet->ohdr = NULL;
+ packet->grh = NULL;
+ packet->opcode = IB_OPCODE_UD_SEND_ONLY;
+ packet->pad = OPA_16B_L4_FM_PAD;
+ packet->hlen = OPA_16B_L4_FM_HLEN;
+ packet->migrated = false;
+ } else {
+ goto drop;
+ }
+
+ /* Query commonly used fields from packet header */
+ packet->payload = packet->ebuf + packet->hlen - LRH_16B_BYTES;
+ packet->slid = hfi1_16B_get_slid(packet->hdr);
+ packet->dlid = hfi1_16B_get_dlid(packet->hdr);
+ if (unlikely(hfi1_is_16B_mcast(packet->dlid)))
+ packet->dlid += opa_get_mcast_base(OPA_MCAST_NR) -
+ opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR),
+ 16B);
+ packet->sc = hfi1_16B_get_sc(packet->hdr);
+ packet->sl = ibp->sc_to_sl[packet->sc];
+ packet->extra_byte = SIZE_OF_LT;
+ packet->pkey = hfi1_16B_get_pkey(packet->hdr);
+
+ if (hfi1_bypass_ingress_pkt_check(packet))
+ goto drop;
+
+ return 0;
+drop:
+ hfi1_cdbg(PKT, "%s: packet dropped", __func__);
+ ibp->rvp.n_pkt_drops++;
+ return -EINVAL;
+}
+
+static void show_eflags_errs(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
u32 rte = rhf_rcv_type_err(packet->rhf);
+ dd_dev_err(rcd->dd,
+ "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s] rte 0x%x\n",
+ rcd->ctxt, packet->rhf,
+ packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "",
+ packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "",
+ packet->rhf & RHF_DC_ERR ? "dc " : "",
+ packet->rhf & RHF_TID_ERR ? "tid " : "",
+ packet->rhf & RHF_LEN_ERR ? "len " : "",
+ packet->rhf & RHF_ECC_ERR ? "ecc " : "",
+ packet->rhf & RHF_ICRC_ERR ? "icrc " : "",
+ rte);
+}
+
+void handle_eflags(struct hfi1_packet *packet)
+{
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+
rcv_hdrerr(rcd, rcd->ppd, packet);
if (rhf_err_flags(packet->rhf))
- dd_dev_err(rcd->dd,
- "receive context %d: rhf 0x%016llx, errs [ %s%s%s%s%s%s%s%s] rte 0x%x\n",
- rcd->ctxt, packet->rhf,
- packet->rhf & RHF_K_HDR_LEN_ERR ? "k_hdr_len " : "",
- packet->rhf & RHF_DC_UNC_ERR ? "dc_unc " : "",
- packet->rhf & RHF_DC_ERR ? "dc " : "",
- packet->rhf & RHF_TID_ERR ? "tid " : "",
- packet->rhf & RHF_LEN_ERR ? "len " : "",
- packet->rhf & RHF_ECC_ERR ? "ecc " : "",
- packet->rhf & RHF_VCRC_ERR ? "vcrc " : "",
- packet->rhf & RHF_ICRC_ERR ? "icrc " : "",
- rte);
+ show_eflags_errs(packet);
+}
+
+static void hfi1_ipoib_ib_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_ibport *ibp;
+ struct net_device *netdev;
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ struct napi_struct *napi = rcd->napi;
+ struct sk_buff *skb;
+ struct hfi1_netdev_rxq *rxq = container_of(napi,
+ struct hfi1_netdev_rxq, napi);
+ u32 extra_bytes;
+ u32 tlen, qpnum;
+ bool do_work, do_cnp;
+
+ trace_hfi1_rcvhdr(packet);
+
+ hfi1_setup_ib_header(packet);
+
+ packet->ohdr = &((struct ib_header *)packet->hdr)->u.oth;
+ packet->grh = NULL;
+
+ if (unlikely(rhf_err_flags(packet->rhf))) {
+ handle_eflags(packet);
+ return;
+ }
+
+ qpnum = ib_bth_get_qpn(packet->ohdr);
+ netdev = hfi1_netdev_get_data(rcd->dd, qpnum);
+ if (!netdev)
+ goto drop_no_nd;
+
+ trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
+ trace_ctxt_rsm_hist(rcd->ctxt);
+
+ /* handle congestion notifications */
+ do_work = hfi1_may_ecn(packet);
+ if (unlikely(do_work)) {
+ do_cnp = (packet->opcode != IB_OPCODE_CNP);
+ (void)hfi1_process_ecn_slowpath(hfi1_ipoib_priv(netdev)->qp,
+ packet, do_cnp);
+ }
+
+ /*
+ * We have split point after last byte of DETH
+ * lets strip padding and CRC and ICRC.
+ * tlen is whole packet len so we need to
+ * subtract header size as well.
+ */
+ tlen = packet->tlen;
+ extra_bytes = ib_bth_get_pad(packet->ohdr) + (SIZE_OF_CRC << 2) +
+ packet->hlen;
+ if (unlikely(tlen < extra_bytes))
+ goto drop;
+
+ tlen -= extra_bytes;
+
+ skb = hfi1_ipoib_prepare_skb(rxq, tlen, packet->ebuf);
+ if (unlikely(!skb))
+ goto drop;
+
+ dev_sw_netstats_rx_add(netdev, skb->len);
+
+ skb->dev = netdev;
+ skb->pkt_type = PACKET_HOST;
+ netif_receive_skb(skb);
+
+ return;
+
+drop:
+ ++netdev->stats.rx_dropped;
+drop_no_nd:
+ ibp = rcd_to_iport(packet->rcd);
+ ++ibp->rvp.n_pkt_drops;
}
/*
* The following functions are called by the interrupt handler. They are type
* specific handlers for each packet type.
*/
-int process_receive_ib(struct hfi1_packet *packet)
+static void process_receive_ib(struct hfi1_packet *packet)
{
- trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
- packet->rcd->ctxt,
- rhf_err_flags(packet->rhf),
- RHF_RCV_TYPE_IB,
- packet->hlen,
- packet->tlen,
- packet->updegr,
- rhf_egr_index(packet->rhf));
+ if (hfi1_setup_9B_packet(packet))
+ return;
+
+ if (unlikely(hfi1_dbg_should_fault_rx(packet)))
+ return;
+
+ trace_hfi1_rcvhdr(packet);
if (unlikely(rhf_err_flags(packet->rhf))) {
handle_eflags(packet);
- return RHF_RCV_CONTINUE;
+ return;
}
hfi1_ib_rcv(packet);
- return RHF_RCV_CONTINUE;
}
-int process_receive_bypass(struct hfi1_packet *packet)
+static void process_receive_bypass(struct hfi1_packet *packet)
{
struct hfi1_devdata *dd = packet->rcd->dd;
- if (unlikely(rhf_err_flags(packet->rhf)))
- handle_eflags(packet);
+ if (hfi1_setup_bypass_packet(packet))
+ return;
- dd_dev_err(dd,
- "Bypass packets are not supported in normal operation. Dropping\n");
- incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
- if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
- u64 *flits = packet->ebuf;
+ trace_hfi1_rcvhdr(packet);
- if (flits && !(packet->rhf & RHF_LEN_ERR)) {
- dd->err_info_rcvport.packet_flit1 = flits[0];
- dd->err_info_rcvport.packet_flit2 =
- packet->tlen > sizeof(flits[0]) ? flits[1] : 0;
+ if (unlikely(rhf_err_flags(packet->rhf))) {
+ handle_eflags(packet);
+ return;
+ }
+
+ if (hfi1_16B_get_l2(packet->hdr) == 0x2) {
+ hfi1_16B_rcv(packet);
+ } else {
+ dd_dev_err(dd,
+ "Bypass packets other than 16B are not supported in normal operation. Dropping\n");
+ incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
+ if (!(dd->err_info_rcvport.status_and_code &
+ OPA_EI_STATUS_SMASK)) {
+ u64 *flits = packet->ebuf;
+
+ if (flits && !(packet->rhf & RHF_LEN_ERR)) {
+ dd->err_info_rcvport.packet_flit1 = flits[0];
+ dd->err_info_rcvport.packet_flit2 =
+ packet->tlen > sizeof(flits[0]) ?
+ flits[1] : 0;
+ }
+ dd->err_info_rcvport.status_and_code |=
+ (OPA_EI_STATUS_SMASK | BAD_L2_ERR);
}
- dd->err_info_rcvport.status_and_code |=
- (OPA_EI_STATUS_SMASK | BAD_L2_ERR);
}
- return RHF_RCV_CONTINUE;
}
-int process_receive_error(struct hfi1_packet *packet)
+static void process_receive_error(struct hfi1_packet *packet)
{
+ /* KHdrHCRCErr -- KDETH packet with a bad HCRC */
+ if (unlikely(
+ hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
+ (rhf_rcv_type_err(packet->rhf) == RHF_RCV_TYPE_ERROR ||
+ packet->rhf & RHF_DC_ERR)))
+ return;
+
+ hfi1_setup_ib_header(packet);
handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
dd_dev_err(packet->rcd->dd,
"Unhandled error packet received. Dropping.\n");
-
- return RHF_RCV_CONTINUE;
}
-int kdeth_process_expected(struct hfi1_packet *packet)
+static void kdeth_process_expected(struct hfi1_packet *packet)
{
- if (unlikely(rhf_err_flags(packet->rhf)))
- handle_eflags(packet);
+ hfi1_setup_9B_packet(packet);
+ if (unlikely(hfi1_dbg_should_fault_rx(packet)))
+ return;
+
+ if (unlikely(rhf_err_flags(packet->rhf))) {
+ struct hfi1_ctxtdata *rcd = packet->rcd;
- dd_dev_err(packet->rcd->dd,
- "Unhandled expected packet received. Dropping.\n");
- return RHF_RCV_CONTINUE;
+ if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
+ return;
+ }
+
+ hfi1_kdeth_expected_rcv(packet);
}
-int kdeth_process_eager(struct hfi1_packet *packet)
+static void kdeth_process_eager(struct hfi1_packet *packet)
{
- if (unlikely(rhf_err_flags(packet->rhf)))
- handle_eflags(packet);
+ hfi1_setup_9B_packet(packet);
+ if (unlikely(hfi1_dbg_should_fault_rx(packet)))
+ return;
+
+ trace_hfi1_rcvhdr(packet);
+ if (unlikely(rhf_err_flags(packet->rhf))) {
+ struct hfi1_ctxtdata *rcd = packet->rcd;
- dd_dev_err(packet->rcd->dd,
- "Unhandled eager packet received. Dropping.\n");
- return RHF_RCV_CONTINUE;
+ show_eflags_errs(packet);
+ if (hfi1_handle_kdeth_eflags(rcd, rcd->ppd, packet))
+ return;
+ }
+
+ hfi1_kdeth_eager_rcv(packet);
}
-int process_receive_invalid(struct hfi1_packet *packet)
+static void process_receive_invalid(struct hfi1_packet *packet)
{
dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
rhf_rcv_type(packet->rhf));
- return RHF_RCV_CONTINUE;
}
+
+#define HFI1_RCVHDR_DUMP_MAX 5
+
+void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_packet packet;
+ struct ps_mdata mdata;
+ int i;
+
+ seq_printf(s, "Rcd %u: RcvHdr cnt %u entsize %u %s ctrl 0x%08llx status 0x%08llx, head %llu tail %llu sw head %u\n",
+ rcd->ctxt, get_hdrq_cnt(rcd), get_hdrqentsize(rcd),
+ get_dma_rtail_setting(rcd) ?
+ "dma_rtail" : "nodma_rtail",
+ read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_CTRL),
+ read_kctxt_csr(rcd->dd, rcd->ctxt, RCV_CTXT_STATUS),
+ read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD) &
+ RCV_HDR_HEAD_HEAD_MASK,
+ read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL),
+ rcd->head);
+
+ init_packet(rcd, &packet);
+ init_ps_mdata(&mdata, &packet);
+
+ for (i = 0; i < HFI1_RCVHDR_DUMP_MAX; i++) {
+ __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
+ rcd->rhf_offset;
+ struct ib_header *hdr;
+ u64 rhf = rhf_to_cpu(rhf_addr);
+ u32 etype = rhf_rcv_type(rhf), qpn;
+ u8 opcode;
+ u32 psn;
+ u8 lnh;
+
+ if (ps_done(&mdata, rhf, rcd))
+ break;
+
+ if (ps_skip(&mdata, rhf, rcd))
+ goto next;
+
+ if (etype > RHF_RCV_TYPE_IB)
+ goto next;
+
+ packet.hdr = hfi1_get_msgheader(rcd, rhf_addr);
+ hdr = packet.hdr;
+
+ lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+
+ if (lnh == HFI1_LRH_BTH)
+ packet.ohdr = &hdr->u.oth;
+ else if (lnh == HFI1_LRH_GRH)
+ packet.ohdr = &hdr->u.l.oth;
+ else
+ goto next; /* just in case */
+
+ opcode = (be32_to_cpu(packet.ohdr->bth[0]) >> 24);
+ qpn = be32_to_cpu(packet.ohdr->bth[1]) & RVT_QPN_MASK;
+ psn = mask_psn(be32_to_cpu(packet.ohdr->bth[2]));
+
+ seq_printf(s, "\tEnt %u: opcode 0x%x, qpn 0x%x, psn 0x%x\n",
+ mdata.ps_head, opcode, qpn, psn);
+next:
+ update_ps_mdata(&mdata, rcd);
+ }
+}
+
+const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = {
+ [RHF_RCV_TYPE_EXPECTED] = kdeth_process_expected,
+ [RHF_RCV_TYPE_EAGER] = kdeth_process_eager,
+ [RHF_RCV_TYPE_IB] = process_receive_ib,
+ [RHF_RCV_TYPE_ERROR] = process_receive_error,
+ [RHF_RCV_TYPE_BYPASS] = process_receive_bypass,
+ [RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
+ [RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
+ [RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
+};
+
+const rhf_rcv_function_ptr netdev_rhf_rcv_functions[] = {
+ [RHF_RCV_TYPE_EXPECTED] = process_receive_invalid,
+ [RHF_RCV_TYPE_EAGER] = process_receive_invalid,
+ [RHF_RCV_TYPE_IB] = hfi1_ipoib_ib_rcv,
+ [RHF_RCV_TYPE_ERROR] = process_receive_error,
+ [RHF_RCV_TYPE_BYPASS] = hfi1_vnic_bypass_rcv,
+ [RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
+ [RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
+ [RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
+};
diff --git a/drivers/infiniband/hw/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c
index 106349fc1fb9..9ed05e10020e 100644
--- a/drivers/infiniband/hw/hfi1/efivar.c
+++ b/drivers/infiniband/hw/hfi1/efivar.c
@@ -1,50 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#include <linux/string.h>
+#include <linux/string_helpers.h>
+
#include "efivar.h"
/* GUID for HFI1 variables in EFI */
@@ -77,7 +38,7 @@ static int read_efi_var(const char *name, unsigned long *size,
*size = 0;
*return_data = NULL;
- if (!efi_enabled(EFI_RUNTIME_SERVICES))
+ if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
return -EOPNOTSUPP;
uni_name = kcalloc(strlen(name) + 1, sizeof(efi_char16_t), GFP_KERNEL);
@@ -111,7 +72,7 @@ static int read_efi_var(const char *name, unsigned long *size,
* is in the EFIVAR_FS code and may not be compiled in.
* However, even that is insufficient since it does not cover
* EFI_BUFFER_TOO_SMALL which could be an important return.
- * For now, just split out succces or not found.
+ * For now, just split out success or not found.
*/
ret = status == EFI_SUCCESS ? 0 :
status == EFI_NOT_FOUND ? -ENOENT :
@@ -150,15 +111,28 @@ fail:
int read_hfi1_efi_var(struct hfi1_devdata *dd, const char *kind,
unsigned long *size, void **return_data)
{
- char name[64];
+ char prefix_name[64];
+ char name[128];
+ int result;
/* create a common prefix */
- snprintf(name, sizeof(name), "%04x:%02x:%02x.%x-%s",
+ snprintf(prefix_name, sizeof(prefix_name), "%04x:%02x:%02x.%x",
pci_domain_nr(dd->pcidev->bus),
dd->pcidev->bus->number,
PCI_SLOT(dd->pcidev->devfn),
- PCI_FUNC(dd->pcidev->devfn),
- kind);
+ PCI_FUNC(dd->pcidev->devfn));
+ snprintf(name, sizeof(name), "%s-%s", prefix_name, kind);
+ result = read_efi_var(name, size, return_data);
+
+ /*
+ * If reading the lowercase EFI variable fail, read the uppercase
+ * variable.
+ */
+ if (result) {
+ string_upper(prefix_name, prefix_name);
+ snprintf(name, sizeof(name), "%s-%s", prefix_name, kind);
+ result = read_efi_var(name, size, return_data);
+ }
- return read_efi_var(name, size, return_data);
+ return result;
}
diff --git a/drivers/infiniband/hw/hfi1/efivar.h b/drivers/infiniband/hw/hfi1/efivar.h
index 94e9e70de568..882240929a4b 100644
--- a/drivers/infiniband/hw/hfi1/efivar.h
+++ b/drivers/infiniband/hw/hfi1/efivar.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#ifndef _HFI1_EFIVAR_H
#define _HFI1_EFIVAR_H
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index 26da124c88e2..f93a160d8d05 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -1,49 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#include <linux/delay.h>
#include "hfi.h"
#include "common.h"
@@ -204,7 +163,10 @@ done_asic:
return ret;
}
-/* magic character sequence that trails an image */
+/* magic character sequence that begins an image */
+#define IMAGE_START_MAGIC "APO="
+
+/* magic character sequence that might trail an image */
#define IMAGE_TRAIL_MAGIC "egamiAPO"
/* EPROM file types */
@@ -263,6 +225,12 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
return ret;
}
+ /* config partition is valid only if it starts with IMAGE_START_MAGIC */
+ if (memcmp(buffer, IMAGE_START_MAGIC, strlen(IMAGE_START_MAGIC))) {
+ kfree(buffer);
+ return -ENOENT;
+ }
+
/* scan for image magic that may trail the actual data */
p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
if (p)
diff --git a/drivers/infiniband/hw/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h
index e774184f1643..51648d1afcf1 100644
--- a/drivers/infiniband/hw/hfi1/eprom.h
+++ b/drivers/infiniband/hw/hfi1/eprom.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
struct hfi1_devdata;
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c
new file mode 100644
index 000000000000..879a66edbded
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ */
+
+#include "exp_rcv.h"
+#include "trace.h"
+
+/**
+ * hfi1_exp_tid_set_init - initialize exp_tid_set
+ * @set: the set
+ */
+static void hfi1_exp_tid_set_init(struct exp_tid_set *set)
+{
+ INIT_LIST_HEAD(&set->list);
+ set->count = 0;
+}
+
+/**
+ * hfi1_exp_tid_group_init - initialize rcd expected receive
+ * @rcd: the rcd
+ */
+void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd)
+{
+ hfi1_exp_tid_set_init(&rcd->tid_group_list);
+ hfi1_exp_tid_set_init(&rcd->tid_used_list);
+ hfi1_exp_tid_set_init(&rcd->tid_full_list);
+}
+
+/**
+ * hfi1_alloc_ctxt_rcv_groups - initialize expected receive groups
+ * @rcd: the context to add the groupings to
+ */
+int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ u32 tidbase;
+ struct tid_group *grp;
+ int i;
+ u32 ngroups;
+
+ ngroups = rcd->expected_count / dd->rcv_entries.group_size;
+ rcd->groups =
+ kcalloc_node(ngroups, sizeof(*rcd->groups),
+ GFP_KERNEL, rcd->numa_id);
+ if (!rcd->groups)
+ return -ENOMEM;
+ tidbase = rcd->expected_base;
+ for (i = 0; i < ngroups; i++) {
+ grp = &rcd->groups[i];
+ grp->size = dd->rcv_entries.group_size;
+ grp->base = tidbase;
+ tid_group_add_tail(grp, &rcd->tid_group_list);
+ tidbase += dd->rcv_entries.group_size;
+ }
+
+ return 0;
+}
+
+/**
+ * hfi1_free_ctxt_rcv_groups - free expected receive groups
+ * @rcd: the context to free
+ *
+ * The routine dismantles the expect receive linked
+ * list and clears any tids associated with the receive
+ * context.
+ *
+ * This should only be called for kernel contexts and the
+ * a base user context.
+ */
+void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
+{
+ kfree(rcd->groups);
+ rcd->groups = NULL;
+ hfi1_exp_tid_group_init(rcd);
+
+ hfi1_clear_tids(rcd);
+}
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h
new file mode 100644
index 000000000000..141413d9fbc7
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ */
+
+#ifndef _HFI1_EXP_RCV_H
+#define _HFI1_EXP_RCV_H
+#include "hfi.h"
+
+#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
+
+#define EXP_TID_TIDLEN_MASK 0x7FFULL
+#define EXP_TID_TIDLEN_SHIFT 0
+#define EXP_TID_TIDCTRL_MASK 0x3ULL
+#define EXP_TID_TIDCTRL_SHIFT 20
+#define EXP_TID_TIDIDX_MASK 0x3FFULL
+#define EXP_TID_TIDIDX_SHIFT 22
+#define EXP_TID_GET(tid, field) \
+ (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK)
+
+#define EXP_TID_SET(field, value) \
+ (((value) & EXP_TID_TID##field##_MASK) << \
+ EXP_TID_TID##field##_SHIFT)
+#define EXP_TID_CLEAR(tid, field) ({ \
+ (tid) &= ~(EXP_TID_TID##field##_MASK << \
+ EXP_TID_TID##field##_SHIFT); \
+ })
+#define EXP_TID_RESET(tid, field, value) do { \
+ EXP_TID_CLEAR(tid, field); \
+ (tid) |= EXP_TID_SET(field, (value)); \
+ } while (0)
+
+/*
+ * Define fields in the KDETH header so we can update the header
+ * template.
+ */
+#define KDETH_OFFSET_SHIFT 0
+#define KDETH_OFFSET_MASK 0x7fff
+#define KDETH_OM_SHIFT 15
+#define KDETH_OM_MASK 0x1
+#define KDETH_TID_SHIFT 16
+#define KDETH_TID_MASK 0x3ff
+#define KDETH_TIDCTRL_SHIFT 26
+#define KDETH_TIDCTRL_MASK 0x3
+#define KDETH_INTR_SHIFT 28
+#define KDETH_INTR_MASK 0x1
+#define KDETH_SH_SHIFT 29
+#define KDETH_SH_MASK 0x1
+#define KDETH_KVER_SHIFT 30
+#define KDETH_KVER_MASK 0x3
+#define KDETH_JKEY_SHIFT 0x0
+#define KDETH_JKEY_MASK 0xff
+#define KDETH_HCRC_UPPER_SHIFT 16
+#define KDETH_HCRC_UPPER_MASK 0xff
+#define KDETH_HCRC_LOWER_SHIFT 24
+#define KDETH_HCRC_LOWER_MASK 0xff
+
+#define KDETH_GET(val, field) \
+ (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK)
+#define KDETH_SET(dw, field, val) do { \
+ u32 dwval = le32_to_cpu(dw); \
+ dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \
+ dwval |= (((val) & KDETH_##field##_MASK) << \
+ KDETH_##field##_SHIFT); \
+ dw = cpu_to_le32(dwval); \
+ } while (0)
+
+#define KDETH_RESET(dw, field, val) ({ dw = 0; KDETH_SET(dw, field, val); })
+
+/* KDETH OM multipliers and switch over point */
+#define KDETH_OM_SMALL 4
+#define KDETH_OM_SMALL_SHIFT 2
+#define KDETH_OM_LARGE 64
+#define KDETH_OM_LARGE_SHIFT 6
+#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
+
+struct tid_group {
+ struct list_head list;
+ u32 base;
+ u8 size;
+ u8 used;
+ u8 map;
+};
+
+/*
+ * Write an "empty" RcvArray entry.
+ * This function exists so the TID registaration code can use it
+ * to write to unused/unneeded entries and still take advantage
+ * of the WC performance improvements. The HFI will ignore this
+ * write to the RcvArray entry.
+ */
+static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
+{
+ /*
+ * Doing the WC fill writes only makes sense if the device is
+ * present and the RcvArray has been mapped as WC memory.
+ */
+ if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc) {
+ writeq(0, dd->rcvarray_wc + (index * 8));
+ if ((index & 3) == 3)
+ flush_wc();
+ }
+}
+
+static inline void tid_group_add_tail(struct tid_group *grp,
+ struct exp_tid_set *set)
+{
+ list_add_tail(&grp->list, &set->list);
+ set->count++;
+}
+
+static inline void tid_group_remove(struct tid_group *grp,
+ struct exp_tid_set *set)
+{
+ list_del_init(&grp->list);
+ set->count--;
+}
+
+static inline void tid_group_move(struct tid_group *group,
+ struct exp_tid_set *s1,
+ struct exp_tid_set *s2)
+{
+ tid_group_remove(group, s1);
+ tid_group_add_tail(group, s2);
+}
+
+static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
+{
+ struct tid_group *grp =
+ list_first_entry(&set->list, struct tid_group, list);
+ list_del_init(&grp->list);
+ set->count--;
+ return grp;
+}
+
+static inline u32 create_tid(u32 rcventry, u32 npages)
+{
+ u32 pair = rcventry & ~0x1;
+
+ return EXP_TID_SET(IDX, pair >> 1) |
+ EXP_TID_SET(CTRL, 1 << (rcventry - pair)) |
+ EXP_TID_SET(LEN, npages);
+}
+
+/**
+ * hfi1_tid_group_to_idx - convert an index to a group
+ * @rcd - the receive context
+ * @grp - the group pointer
+ */
+static inline u16
+hfi1_tid_group_to_idx(struct hfi1_ctxtdata *rcd, struct tid_group *grp)
+{
+ return grp - &rcd->groups[0];
+}
+
+/**
+ * hfi1_idx_to_tid_group - convert a group to an index
+ * @rcd - the receive context
+ * @idx - the index
+ */
+static inline struct tid_group *
+hfi1_idx_to_tid_group(struct hfi1_ctxtdata *rcd, u16 idx)
+{
+ return &rcd->groups[idx];
+}
+
+int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
+void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
+void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd);
+
+#endif /* _HFI1_EXP_RCV_H */
diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c
new file mode 100644
index 000000000000..a45cbffd52c7
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/fault.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitmap.h>
+
+#include "debugfs.h"
+#include "fault.h"
+#include "trace.h"
+
+#define HFI1_FAULT_DIR_TX BIT(0)
+#define HFI1_FAULT_DIR_RX BIT(1)
+#define HFI1_FAULT_DIR_TXRX (HFI1_FAULT_DIR_TX | HFI1_FAULT_DIR_RX)
+
+static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ ++*pos;
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void _fault_stats_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _fault_stats_seq_show(struct seq_file *s, void *v)
+{
+ loff_t *spos = v;
+ loff_t i = *spos, j;
+ u64 n_packets = 0, n_bytes = 0;
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+ struct hfi1_ctxtdata *rcd;
+
+ for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
+ rcd = hfi1_rcd_get_by_index(dd, j);
+ if (rcd) {
+ n_packets += rcd->opstats->stats[i].n_packets;
+ n_bytes += rcd->opstats->stats[i].n_bytes;
+ }
+ hfi1_rcd_put(rcd);
+ }
+ for_each_possible_cpu(j) {
+ struct hfi1_opcode_stats_perctx *sp =
+ per_cpu_ptr(dd->tx_opstats, j);
+
+ n_packets += sp->stats[i].n_packets;
+ n_bytes += sp->stats[i].n_bytes;
+ }
+ if (!n_packets && !n_bytes)
+ return SEQ_SKIP;
+ if (!ibd->fault->n_rxfaults[i] && !ibd->fault->n_txfaults[i])
+ return SEQ_SKIP;
+ seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
+ (unsigned long long)n_packets,
+ (unsigned long long)n_bytes,
+ (unsigned long long)ibd->fault->n_rxfaults[i],
+ (unsigned long long)ibd->fault->n_txfaults[i]);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(fault_stats);
+DEBUGFS_SEQ_FILE_OPEN(fault_stats);
+DEBUGFS_FILE_OPS(fault_stats);
+
+static int fault_opcodes_open(struct inode *inode, struct file *file)
+{
+ file->private_data = inode->i_private;
+ return nonseekable_open(inode, file);
+}
+
+static ssize_t fault_opcodes_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ ssize_t ret = 0;
+ /* 1280 = 256 opcodes * 4 chars/opcode + 255 commas + NULL */
+ size_t copy, datalen = 1280;
+ char *data, *token, *ptr, *end;
+ struct fault *fault = file->private_data;
+
+ data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ copy = min(len, datalen - 1);
+ if (copy_from_user(data, buf, copy)) {
+ ret = -EFAULT;
+ goto free_data;
+ }
+
+ ptr = data;
+ token = ptr;
+ for (ptr = data; *ptr; ptr = end + 1, token = ptr) {
+ char *dash;
+ unsigned long range_start, range_end, i;
+ bool remove = false;
+ unsigned long bound = 1U << BITS_PER_BYTE;
+
+ end = strchr(ptr, ',');
+ if (end)
+ *end = '\0';
+ if (token[0] == '-') {
+ remove = true;
+ token++;
+ }
+ dash = strchr(token, '-');
+ if (dash)
+ *dash = '\0';
+ if (kstrtoul(token, 0, &range_start))
+ break;
+ if (dash) {
+ token = dash + 1;
+ if (kstrtoul(token, 0, &range_end))
+ break;
+ } else {
+ range_end = range_start;
+ }
+ if (range_start == range_end && range_start == -1UL) {
+ bitmap_zero(fault->opcodes, sizeof(fault->opcodes) *
+ BITS_PER_BYTE);
+ break;
+ }
+ /* Check the inputs */
+ if (range_start >= bound || range_end >= bound)
+ break;
+
+ for (i = range_start; i <= range_end; i++) {
+ if (remove)
+ clear_bit(i, fault->opcodes);
+ else
+ set_bit(i, fault->opcodes);
+ }
+ if (!end)
+ break;
+ }
+ ret = len;
+
+free_data:
+ kfree(data);
+ return ret;
+}
+
+static ssize_t fault_opcodes_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ ssize_t ret = 0;
+ char *data;
+ size_t datalen = 1280, size = 0; /* see fault_opcodes_write() */
+ unsigned long bit = 0, zero = 0;
+ struct fault *fault = file->private_data;
+ size_t bitsize = sizeof(fault->opcodes) * BITS_PER_BYTE;
+
+ data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ bit = find_first_bit(fault->opcodes, bitsize);
+ while (bit < bitsize) {
+ zero = find_next_zero_bit(fault->opcodes, bitsize, bit);
+ if (zero - 1 != bit)
+ size += scnprintf(data + size,
+ datalen - size - 1,
+ "0x%lx-0x%lx,", bit, zero - 1);
+ else
+ size += scnprintf(data + size,
+ datalen - size - 1, "0x%lx,",
+ bit);
+ bit = find_next_bit(fault->opcodes, bitsize, zero);
+ }
+ data[size - 1] = '\n';
+ data[size] = '\0';
+ ret = simple_read_from_buffer(buf, len, pos, data, size);
+ kfree(data);
+ return ret;
+}
+
+static const struct file_operations __fault_opcodes_fops = {
+ .owner = THIS_MODULE,
+ .open = fault_opcodes_open,
+ .read = fault_opcodes_read,
+ .write = fault_opcodes_write,
+};
+
+void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
+{
+ if (ibd->fault)
+ debugfs_remove_recursive(ibd->fault->dir);
+ kfree(ibd->fault);
+ ibd->fault = NULL;
+}
+
+int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
+{
+ struct dentry *parent = ibd->hfi1_ibdev_dbg;
+ struct dentry *fault_dir;
+
+ ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL);
+ if (!ibd->fault)
+ return -ENOMEM;
+
+ ibd->fault->attr.interval = 1;
+ ibd->fault->attr.require_end = ULONG_MAX;
+ ibd->fault->attr.stacktrace_depth = 32;
+ ibd->fault->attr.dname = NULL;
+ ibd->fault->attr.verbose = 0;
+ ibd->fault->enable = false;
+ ibd->fault->opcode = false;
+ ibd->fault->fault_skip = 0;
+ ibd->fault->skip = 0;
+ ibd->fault->direction = HFI1_FAULT_DIR_TXRX;
+ ibd->fault->suppress_err = false;
+ bitmap_zero(ibd->fault->opcodes,
+ sizeof(ibd->fault->opcodes) * BITS_PER_BYTE);
+
+ fault_dir =
+ fault_create_debugfs_attr("fault", parent, &ibd->fault->attr);
+ if (IS_ERR(fault_dir)) {
+ kfree(ibd->fault);
+ ibd->fault = NULL;
+ return -ENOENT;
+ }
+ ibd->fault->dir = fault_dir;
+
+ debugfs_create_file("fault_stats", 0444, fault_dir, ibd,
+ &_fault_stats_file_ops);
+ debugfs_create_bool("enable", 0600, fault_dir, &ibd->fault->enable);
+ debugfs_create_bool("suppress_err", 0600, fault_dir,
+ &ibd->fault->suppress_err);
+ debugfs_create_bool("opcode_mode", 0600, fault_dir,
+ &ibd->fault->opcode);
+ debugfs_create_file("opcodes", 0600, fault_dir, ibd->fault,
+ &__fault_opcodes_fops);
+ debugfs_create_u64("skip_pkts", 0600, fault_dir,
+ &ibd->fault->fault_skip);
+ debugfs_create_u64("skip_usec", 0600, fault_dir,
+ &ibd->fault->fault_skip_usec);
+ debugfs_create_u8("direction", 0600, fault_dir, &ibd->fault->direction);
+
+ return 0;
+}
+
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ if (ibd->fault)
+ return ibd->fault->suppress_err;
+ return false;
+}
+
+static bool __hfi1_should_fault(struct hfi1_ibdev *ibd, u32 opcode,
+ u8 direction)
+{
+ bool ret = false;
+
+ if (!ibd->fault || !ibd->fault->enable)
+ return false;
+ if (!(ibd->fault->direction & direction))
+ return false;
+ if (ibd->fault->opcode) {
+ if (bitmap_empty(ibd->fault->opcodes,
+ (sizeof(ibd->fault->opcodes) *
+ BITS_PER_BYTE)))
+ return false;
+ if (!(test_bit(opcode, ibd->fault->opcodes)))
+ return false;
+ }
+ if (ibd->fault->fault_skip_usec &&
+ time_before(jiffies, ibd->fault->skip_usec))
+ return false;
+ if (ibd->fault->fault_skip && ibd->fault->skip) {
+ ibd->fault->skip--;
+ return false;
+ }
+ ret = should_fail(&ibd->fault->attr, 1);
+ if (ret) {
+ ibd->fault->skip = ibd->fault->fault_skip;
+ ibd->fault->skip_usec = jiffies +
+ usecs_to_jiffies(ibd->fault->fault_skip_usec);
+ }
+ return ret;
+}
+
+bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode)
+{
+ struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
+
+ if (__hfi1_should_fault(ibd, opcode, HFI1_FAULT_DIR_TX)) {
+ trace_hfi1_fault_opcode(qp, opcode);
+ ibd->fault->n_txfaults[opcode]++;
+ return true;
+ }
+ return false;
+}
+
+bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet)
+{
+ struct hfi1_ibdev *ibd = &packet->rcd->dd->verbs_dev;
+
+ if (__hfi1_should_fault(ibd, packet->opcode, HFI1_FAULT_DIR_RX)) {
+ trace_hfi1_fault_packet(packet);
+ ibd->fault->n_rxfaults[packet->opcode]++;
+ return true;
+ }
+ return false;
+}
diff --git a/drivers/infiniband/hw/hfi1/fault.h b/drivers/infiniband/hw/hfi1/fault.h
new file mode 100644
index 000000000000..51adafe240d7
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/fault.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#ifndef _HFI1_FAULT_H
+#define _HFI1_FAULT_H
+
+#include <linux/fault-inject.h>
+#include <linux/dcache.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <rdma/rdma_vt.h>
+
+#include "hfi.h"
+
+struct hfi1_ibdev;
+
+#if defined(CONFIG_FAULT_INJECTION) && defined(CONFIG_FAULT_INJECTION_DEBUG_FS)
+struct fault {
+ struct fault_attr attr;
+ struct dentry *dir;
+ u64 n_rxfaults[(1U << BITS_PER_BYTE)];
+ u64 n_txfaults[(1U << BITS_PER_BYTE)];
+ u64 fault_skip;
+ u64 skip;
+ u64 fault_skip_usec;
+ unsigned long skip_usec;
+ unsigned long opcodes[(1U << BITS_PER_BYTE) / BITS_PER_LONG];
+ bool enable;
+ bool suppress_err;
+ bool opcode;
+ u8 direction;
+};
+
+int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd);
+bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode);
+bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet);
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
+void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd);
+
+#else
+
+static inline int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
+{
+ return 0;
+}
+
+static inline bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp,
+ u32 opcode)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return false;
+}
+
+static inline void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
+{
+}
+#endif
+#endif /* _HFI1_FAULT_H */
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index bd786b7bd30b..503abec709c9 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1,53 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2020 Cornelis Networks, Inc.
+ * Copyright(c) 2015-2020 Intel Corporation.
*/
+
#include <linux/poll.h>
#include <linux/cdev.h>
#include <linux/vmalloc.h>
#include <linux/io.h>
+#include <linux/sched/mm.h>
+#include <linux/bitmap.h>
#include <rdma/ib.h>
@@ -56,10 +18,10 @@
#include "device.h"
#include "common.h"
#include "trace.h"
+#include "mmu_rb.h"
#include "user_sdma.h"
#include "user_exp_rcv.h"
#include "aspm.h"
-#include "mmu_rb.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -69,30 +31,46 @@
/*
* File operation functions
*/
-static int hfi1_file_open(struct inode *, struct file *);
-static int hfi1_file_close(struct inode *, struct file *);
-static ssize_t hfi1_write_iter(struct kiocb *, struct iov_iter *);
-static unsigned int hfi1_poll(struct file *, struct poll_table_struct *);
-static int hfi1_file_mmap(struct file *, struct vm_area_struct *);
-
-static u64 kvirt_to_phys(void *);
-static int assign_ctxt(struct file *, struct hfi1_user_info *);
-static int init_subctxts(struct hfi1_ctxtdata *, const struct hfi1_user_info *);
-static int user_init(struct file *);
-static int get_ctxt_info(struct file *, void __user *, __u32);
-static int get_base_info(struct file *, void __user *, __u32);
-static int setup_ctxt(struct file *);
-static int setup_subctxt(struct hfi1_ctxtdata *);
-static int get_user_context(struct file *, struct hfi1_user_info *, int);
-static int find_shared_ctxt(struct file *, const struct hfi1_user_info *);
-static int allocate_ctxt(struct file *, struct hfi1_devdata *,
- struct hfi1_user_info *);
-static unsigned int poll_urgent(struct file *, struct poll_table_struct *);
-static unsigned int poll_next(struct file *, struct poll_table_struct *);
-static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
-static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
-static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
-static int vma_fault(struct vm_area_struct *, struct vm_fault *);
+static int hfi1_file_open(struct inode *inode, struct file *fp);
+static int hfi1_file_close(struct inode *inode, struct file *fp);
+static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from);
+static __poll_t hfi1_poll(struct file *fp, struct poll_table_struct *pt);
+static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma);
+
+static u64 kvirt_to_phys(void *addr);
+static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len);
+static void init_subctxts(struct hfi1_ctxtdata *uctxt,
+ const struct hfi1_user_info *uinfo);
+static int init_user_ctxt(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt);
+static void user_init(struct hfi1_ctxtdata *uctxt);
+static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len);
+static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len);
+static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg,
+ u32 len);
+static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg,
+ u32 len);
+static int user_exp_rcv_invalid(struct hfi1_filedata *fd, unsigned long arg,
+ u32 len);
+static int setup_base_ctxt(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt);
+static int setup_subctxt(struct hfi1_ctxtdata *uctxt);
+
+static int find_sub_ctxt(struct hfi1_filedata *fd,
+ const struct hfi1_user_info *uinfo);
+static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
+ struct hfi1_user_info *uinfo,
+ struct hfi1_ctxtdata **cd);
+static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt);
+static __poll_t poll_urgent(struct file *fp, struct poll_table_struct *pt);
+static __poll_t poll_next(struct file *fp, struct poll_table_struct *pt);
+static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
+ unsigned long arg);
+static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg);
+static int ctxt_reset(struct hfi1_ctxtdata *uctxt);
+static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
+ unsigned long arg);
+static vm_fault_t vma_fault(struct vm_fault *vmf);
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
unsigned long arg);
@@ -107,7 +85,7 @@ static const struct file_operations hfi1_file_ops = {
.llseek = noop_llseek,
};
-static struct vm_operations_struct vm_ops = {
+static const struct vm_operations_struct vm_ops = {
.fault = vma_fault,
};
@@ -172,31 +150,31 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
struct hfi1_devdata,
user_cdev);
- if (!atomic_inc_not_zero(&dd->user_refcount))
- return -ENXIO;
+ if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1))
+ return -EINVAL;
- /* Just take a ref now. Not all opens result in a context assign */
- kobject_get(&dd->kobj);
+ if (!refcount_inc_not_zero(&dd->user_refcount))
+ return -ENXIO;
/* The real work is performed later in assign_ctxt() */
fd = kzalloc(sizeof(*fd), GFP_KERNEL);
- if (fd) {
- fd->rec_cpu_num = -1; /* no cpu affinity by default */
- fd->mm = current->mm;
- atomic_inc(&fd->mm->mm_count);
- fp->private_data = fd;
- } else {
- fp->private_data = NULL;
-
- if (atomic_dec_and_test(&dd->user_refcount))
- complete(&dd->user_comp);
-
- return -ENOMEM;
- }
-
+ if (!fd || init_srcu_struct(&fd->pq_srcu))
+ goto nomem;
+ spin_lock_init(&fd->pq_rcu_lock);
+ spin_lock_init(&fd->tid_lock);
+ spin_lock_init(&fd->invalid_lock);
+ fd->rec_cpu_num = -1; /* no cpu affinity by default */
+ fd->dd = dd;
+ fp->private_data = fd;
return 0;
+nomem:
+ kfree(fd);
+ fp->private_data = NULL;
+ if (refcount_dec_and_test(&dd->user_refcount))
+ complete(&dd->user_comp);
+ return -ENOMEM;
}
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
@@ -204,13 +182,8 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
{
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct hfi1_user_info uinfo;
- struct hfi1_tid_info tinfo;
int ret = 0;
- unsigned long addr;
int uval = 0;
- unsigned long ul_uval = 0;
- u16 uval16 = 0;
hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd);
if (cmd != HFI1_IOCTL_ASSIGN_CTXT &&
@@ -220,176 +193,55 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
switch (cmd) {
case HFI1_IOCTL_ASSIGN_CTXT:
- if (uctxt)
- return -EINVAL;
-
- if (copy_from_user(&uinfo,
- (struct hfi1_user_info __user *)arg,
- sizeof(uinfo)))
- return -EFAULT;
-
- ret = assign_ctxt(fp, &uinfo);
- if (ret < 0)
- return ret;
- ret = setup_ctxt(fp);
- if (ret)
- return ret;
- ret = user_init(fp);
+ ret = assign_ctxt(fd, arg, _IOC_SIZE(cmd));
break;
+
case HFI1_IOCTL_CTXT_INFO:
- ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg,
- sizeof(struct hfi1_ctxt_info));
+ ret = get_ctxt_info(fd, arg, _IOC_SIZE(cmd));
break;
+
case HFI1_IOCTL_USER_INFO:
- ret = get_base_info(fp, (void __user *)(unsigned long)arg,
- sizeof(struct hfi1_base_info));
+ ret = get_base_info(fd, arg, _IOC_SIZE(cmd));
break;
+
case HFI1_IOCTL_CREDIT_UPD:
if (uctxt)
sc_return_credits(uctxt->sc);
break;
case HFI1_IOCTL_TID_UPDATE:
- if (copy_from_user(&tinfo,
- (struct hfi11_tid_info __user *)arg,
- sizeof(tinfo)))
- return -EFAULT;
-
- ret = hfi1_user_exp_rcv_setup(fp, &tinfo);
- if (!ret) {
- /*
- * Copy the number of tidlist entries we used
- * and the length of the buffer we registered.
- * These fields are adjacent in the structure so
- * we can copy them at the same time.
- */
- addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
- if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
- sizeof(tinfo.tidcnt) +
- sizeof(tinfo.length)))
- ret = -EFAULT;
- }
+ ret = user_exp_rcv_setup(fd, arg, _IOC_SIZE(cmd));
break;
case HFI1_IOCTL_TID_FREE:
- if (copy_from_user(&tinfo,
- (struct hfi11_tid_info __user *)arg,
- sizeof(tinfo)))
- return -EFAULT;
-
- ret = hfi1_user_exp_rcv_clear(fp, &tinfo);
- if (ret)
- break;
- addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
- if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
- sizeof(tinfo.tidcnt)))
- ret = -EFAULT;
+ ret = user_exp_rcv_clear(fd, arg, _IOC_SIZE(cmd));
break;
case HFI1_IOCTL_TID_INVAL_READ:
- if (copy_from_user(&tinfo,
- (struct hfi11_tid_info __user *)arg,
- sizeof(tinfo)))
- return -EFAULT;
-
- ret = hfi1_user_exp_rcv_invalid(fp, &tinfo);
- if (ret)
- break;
- addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
- if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
- sizeof(tinfo.tidcnt)))
- ret = -EFAULT;
+ ret = user_exp_rcv_invalid(fd, arg, _IOC_SIZE(cmd));
break;
case HFI1_IOCTL_RECV_CTRL:
- ret = get_user(uval, (int __user *)arg);
- if (ret != 0)
- return -EFAULT;
- ret = manage_rcvq(uctxt, fd->subctxt, uval);
+ ret = manage_rcvq(uctxt, fd->subctxt, arg);
break;
case HFI1_IOCTL_POLL_TYPE:
- ret = get_user(uval, (int __user *)arg);
- if (ret != 0)
+ if (get_user(uval, (int __user *)arg))
return -EFAULT;
uctxt->poll_type = (typeof(uctxt->poll_type))uval;
break;
case HFI1_IOCTL_ACK_EVENT:
- ret = get_user(ul_uval, (unsigned long __user *)arg);
- if (ret != 0)
- return -EFAULT;
- ret = user_event_ack(uctxt, fd->subctxt, ul_uval);
+ ret = user_event_ack(uctxt, fd->subctxt, arg);
break;
case HFI1_IOCTL_SET_PKEY:
- ret = get_user(uval16, (u16 __user *)arg);
- if (ret != 0)
- return -EFAULT;
- if (HFI1_CAP_IS_USET(PKEY_CHECK))
- ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16);
- else
- return -EPERM;
+ ret = set_ctxt_pkey(uctxt, arg);
break;
- case HFI1_IOCTL_CTXT_RESET: {
- struct send_context *sc;
- struct hfi1_devdata *dd;
-
- if (!uctxt || !uctxt->dd || !uctxt->sc)
- return -EINVAL;
-
- /*
- * There is no protection here. User level has to
- * guarantee that no one will be writing to the send
- * context while it is being re-initialized.
- * If user level breaks that guarantee, it will break
- * it's own context and no one else's.
- */
- dd = uctxt->dd;
- sc = uctxt->sc;
- /*
- * Wait until the interrupt handler has marked the
- * context as halted or frozen. Report error if we time
- * out.
- */
- wait_event_interruptible_timeout(
- sc->halt_wait, (sc->flags & SCF_HALTED),
- msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
- if (!(sc->flags & SCF_HALTED))
- return -ENOLCK;
-
- /*
- * If the send context was halted due to a Freeze,
- * wait until the device has been "unfrozen" before
- * resetting the context.
- */
- if (sc->flags & SCF_FROZEN) {
- wait_event_interruptible_timeout(
- dd->event_queue,
- !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN),
- msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
- if (dd->flags & HFI1_FROZEN)
- return -ENOLCK;
-
- if (dd->flags & HFI1_FORCED_FREEZE)
- /*
- * Don't allow context reset if we are into
- * forced freeze
- */
- return -ENODEV;
-
- sc_disable(sc);
- ret = sc_enable(sc);
- hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB,
- uctxt->ctxt);
- } else {
- ret = sc_restart(sc);
- }
- if (!ret)
- sc_return_credits(sc);
+ case HFI1_IOCTL_CTXT_RESET:
+ ret = ctxt_reset(uctxt);
break;
- }
case HFI1_IOCTL_GET_VERS:
uval = HFI1_USER_SWVERSION;
@@ -407,29 +259,37 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
{
struct hfi1_filedata *fd = kiocb->ki_filp->private_data;
- struct hfi1_user_sdma_pkt_q *pq = fd->pq;
+ struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq = fd->cq;
int done = 0, reqs = 0;
unsigned long dim = from->nr_segs;
+ int idx;
- if (!cq || !pq)
- return -EIO;
-
- if (!iter_is_iovec(from) || !dim)
+ if (!HFI1_CAP_IS_KSET(SDMA))
return -EINVAL;
+ if (!user_backed_iter(from))
+ return -EINVAL;
+ idx = srcu_read_lock(&fd->pq_srcu);
+ pq = srcu_dereference(fd->pq, &fd->pq_srcu);
+ if (!cq || !pq) {
+ srcu_read_unlock(&fd->pq_srcu, idx);
+ return -EIO;
+ }
- hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)",
- fd->uctxt->ctxt, fd->subctxt, dim);
+ trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim);
- if (atomic_read(&pq->n_reqs) == pq->n_max_reqs)
+ if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) {
+ srcu_read_unlock(&fd->pq_srcu, idx);
return -ENOSPC;
+ }
while (dim) {
+ const struct iovec *iov = iter_iov(from);
int ret;
unsigned long count = 0;
ret = hfi1_user_sdma_process_request(
- kiocb->ki_filp, (struct iovec *)(from->iov + done),
+ fd, (struct iovec *)(iov + done),
dim, &count);
if (ret) {
reqs = ret;
@@ -440,9 +300,21 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
reqs++;
}
+ srcu_read_unlock(&fd->pq_srcu, idx);
return reqs;
}
+static inline void mmap_cdbg(u16 ctxt, u8 subctxt, u8 type, u8 mapio, u8 vmf,
+ u64 memaddr, void *memvirt, dma_addr_t memdma,
+ ssize_t memlen, struct vm_area_struct *vma)
+{
+ hfi1_cdbg(PROC,
+ "%u:%u type:%u io/vf/dma:%d/%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx",
+ ctxt, subctxt, type, mapio, vmf, !!memdma,
+ memaddr ?: (u64)memvirt, memlen,
+ vma->vm_end - vma->vm_start, vma->vm_flags);
+}
+
static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
{
struct hfi1_filedata *fd = fp->private_data;
@@ -452,6 +324,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
u64 token = vma->vm_pgoff << PAGE_SHIFT,
memaddr = 0;
void *memvirt = NULL;
+ dma_addr_t memdma = 0;
u8 subctxt, mapio = 0, vmf = 0, type;
ssize_t memlen = 0;
int ret = 0;
@@ -471,6 +344,11 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
goto done;
}
+ /*
+ * vm_pgoff is used as a buffer selector cookie. Always mmap from
+ * the beginning.
+ */
+ vma->vm_pgoff = 0;
flags = vma->vm_flags;
switch (type) {
@@ -492,7 +370,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
mapio = 1;
break;
- case PIO_CRED:
+ case PIO_CRED: {
+ u64 cr_page_offset;
if (flags & VM_WRITE) {
ret = -EPERM;
goto done;
@@ -502,10 +381,11 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* second or third page allocated for credit returns (if number
* of enabled contexts > 64 and 128 respectively).
*/
- memvirt = dd->cr_base[uctxt->numa_id].va;
- memaddr = virt_to_phys(memvirt) +
- (((u64)uctxt->sc->hw_free -
- (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK);
+ cr_page_offset = ((u64)uctxt->sc->hw_free -
+ (u64)dd->cr_base[uctxt->numa_id].va) &
+ PAGE_MASK;
+ memvirt = dd->cr_base[uctxt->numa_id].va + cr_page_offset;
+ memdma = dd->cr_base[uctxt->numa_id].dma + cr_page_offset;
memlen = PAGE_SIZE;
flags &= ~VM_MAYWRITE;
flags |= VM_DONTCOPY | VM_DONTEXPAND;
@@ -515,14 +395,16 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* memory been flagged as non-cached?
*/
/* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */
- mapio = 1;
break;
+ }
case RCV_HDRQ:
- memlen = uctxt->rcvhdrq_size;
+ memlen = rcvhdrq_size(uctxt);
memvirt = uctxt->rcvhdrq;
+ memdma = uctxt->rcvhdrq_dma;
break;
case RCV_EGRBUF: {
- unsigned long addr;
+ unsigned long vm_start_save;
+ unsigned long vm_end_save;
int i;
/*
* The RcvEgr buffer need to be handled differently
@@ -540,25 +422,35 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
ret = -EPERM;
goto done;
}
- vma->vm_flags &= ~VM_MAYWRITE;
- addr = vma->vm_start;
+ vm_flags_clear(vma, VM_MAYWRITE);
+ /*
+ * Mmap multiple separate allocations into a single vma. From
+ * here, dma_mmap_coherent() calls dma_direct_mmap(), which
+ * requires the mmap to exactly fill the vma starting at
+ * vma_start. Adjust the vma start and end for each eager
+ * buffer segment mapped. Restore the originals when done.
+ */
+ vm_start_save = vma->vm_start;
+ vm_end_save = vma->vm_end;
+ vma->vm_end = vma->vm_start;
for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) {
memlen = uctxt->egrbufs.buffers[i].len;
memvirt = uctxt->egrbufs.buffers[i].addr;
- ret = remap_pfn_range(
- vma, addr,
- /*
- * virt_to_pfn() does the same, but
- * it's not available on x86_64
- * when CONFIG_MMU is enabled.
- */
- PFN_DOWN(__pa(memvirt)),
- memlen,
- vma->vm_page_prot);
- if (ret < 0)
+ memdma = uctxt->egrbufs.buffers[i].dma;
+ vma->vm_end += memlen;
+ mmap_cdbg(ctxt, subctxt, type, mapio, vmf, memaddr,
+ memvirt, memdma, memlen, vma);
+ ret = dma_mmap_coherent(&dd->pcidev->dev, vma,
+ memvirt, memdma, memlen);
+ if (ret < 0) {
+ vma->vm_start = vm_start_save;
+ vma->vm_end = vm_end_save;
goto done;
- addr += memlen;
+ }
+ vma->vm_start += memlen;
}
+ vma->vm_start = vm_start_save;
+ vma->vm_end = vm_end_save;
ret = 0;
goto done;
}
@@ -584,9 +476,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* Use the page where this context's flags are. User level
* knows where it's own bitmap is within the page.
*/
- memaddr = (unsigned long)(dd->events +
- ((uctxt->ctxt - dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK;
+ memaddr = (unsigned long)
+ (dd->events + uctxt_offset(uctxt)) & PAGE_MASK;
memlen = PAGE_SIZE;
/*
* v3.7 removes VM_RESERVED but the effect is kept by
@@ -596,6 +487,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
vmf = 1;
break;
case STATUS:
+ if (flags & VM_WRITE) {
+ ret = -EPERM;
+ goto done;
+ }
memaddr = kvirt_to_phys((void *)dd->status);
memlen = PAGE_SIZE;
flags |= VM_IO | VM_DONTEXPAND;
@@ -609,12 +504,13 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
ret = -EINVAL;
goto done;
}
- if (flags & VM_WRITE) {
+ if ((flags & VM_WRITE) || !hfi1_rcvhdrtail_kvaddr(uctxt)) {
ret = -EPERM;
goto done;
}
memlen = PAGE_SIZE;
- memvirt = (void *)uctxt->rcvhdrtail_kvaddr;
+ memvirt = (void *)hfi1_rcvhdrtail_kvaddr(uctxt);
+ memdma = uctxt->rcvhdrqtailaddr_dma;
flags &= ~VM_MAYWRITE;
break;
case SUBCTXT_UREGS:
@@ -625,7 +521,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
break;
case SUBCTXT_RCV_HDRQ:
memaddr = (u64)uctxt->subctxt_rcvhdr_base;
- memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt;
+ memlen = rcvhdrq_size(uctxt) * uctxt->subctxt_cnt;
flags |= VM_IO | VM_DONTEXPAND;
vmf = 1;
break;
@@ -662,15 +558,16 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
goto done;
}
- vma->vm_flags = flags;
- hfi1_cdbg(PROC,
- "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n",
- ctxt, subctxt, type, mapio, vmf, memaddr, memlen,
- vma->vm_end - vma->vm_start, vma->vm_flags);
+ vm_flags_reset(vma, flags);
+ mmap_cdbg(ctxt, subctxt, type, mapio, vmf, memaddr, memvirt, memdma,
+ memlen, vma);
if (vmf) {
vma->vm_pgoff = PFN_DOWN(memaddr);
vma->vm_ops = &vm_ops;
ret = 0;
+ } else if (memdma) {
+ ret = dma_mmap_coherent(&dd->pcidev->dev, vma,
+ memvirt, memdma, memlen);
} else if (mapio) {
ret = io_remap_pfn_range(vma, vma->vm_start,
PFN_DOWN(memaddr),
@@ -695,7 +592,7 @@ done:
* Local (non-chip) user memory is not mapped right away but as it is
* accessed by the user-level code.
*/
-static int vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static vm_fault_t vma_fault(struct vm_fault *vmf)
{
struct page *page;
@@ -709,20 +606,20 @@ static int vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return 0;
}
-static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt)
+static __poll_t hfi1_poll(struct file *fp, struct poll_table_struct *pt)
{
struct hfi1_ctxtdata *uctxt;
- unsigned pollflag;
+ __poll_t pollflag;
uctxt = ((struct hfi1_filedata *)fp->private_data)->uctxt;
if (!uctxt)
- pollflag = POLLERR;
+ pollflag = EPOLLERR;
else if (uctxt->poll_type == HFI1_POLL_TYPE_URGENT)
pollflag = poll_urgent(fp, pt);
else if (uctxt->poll_type == HFI1_POLL_TYPE_ANYRCV)
pollflag = poll_next(fp, pt);
else /* invalid */
- pollflag = POLLERR;
+ pollflag = EPOLLERR;
return pollflag;
}
@@ -741,31 +638,40 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
if (!uctxt)
goto done;
- hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt);
- mutex_lock(&hfi1_mutex);
+ hfi1_cdbg(PROC, "closing ctxt %u:%u", uctxt->ctxt, fdata->subctxt);
flush_wc();
/* drain user sdma queue */
- hfi1_user_sdma_free_queues(fdata);
+ hfi1_user_sdma_free_queues(fdata, uctxt);
/* release the cpu */
hfi1_put_proc_affinity(fdata->rec_cpu_num);
+ /* clean up rcv side */
+ hfi1_user_exp_rcv_free(fdata);
+
+ /*
+ * fdata->uctxt is used in the above cleanup. It is not ready to be
+ * removed until here.
+ */
+ fdata->uctxt = NULL;
+ hfi1_rcd_put(uctxt);
+
/*
* Clear any left over, unhandled events so the next process that
* gets this context doesn't get confused.
*/
- ev = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS) + fdata->subctxt;
+ ev = dd->events + uctxt_offset(uctxt) + fdata->subctxt;
*ev = 0;
- if (--uctxt->cnt) {
- uctxt->active_slaves &= ~(1 << fdata->subctxt);
- mutex_unlock(&hfi1_mutex);
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ __clear_bit(fdata->subctxt, uctxt->in_use_ctxts);
+ if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) {
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
goto done;
}
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
- spin_lock_irqsave(&dd->uctxt_lock, flags);
/*
* Disable receive context and interrupt available, reset all
* RcvCtxtCtrl bits to default values.
@@ -776,41 +682,31 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
HFI1_RCVCTRL_TAILUPD_DIS |
HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
- HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
+ HFI1_RCVCTRL_NO_EGR_DROP_DIS |
+ HFI1_RCVCTRL_URGENT_DIS, uctxt);
/* Clear the context's J_KEY */
- hfi1_clear_ctxt_jkey(dd, uctxt->ctxt);
+ hfi1_clear_ctxt_jkey(dd, uctxt);
/*
- * Reset context integrity checks to default.
- * (writes to CSRs probably belong in chip.c)
+ * If a send context is allocated, reset context integrity
+ * checks to default and disable the send context.
*/
- write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
- hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type));
- sc_disable(uctxt->sc);
- spin_unlock_irqrestore(&dd->uctxt_lock, flags);
-
- dd->rcd[uctxt->ctxt] = NULL;
+ if (uctxt->sc) {
+ sc_disable(uctxt->sc);
+ set_pio_integrity(uctxt->sc);
+ }
- hfi1_user_exp_rcv_free(fdata);
- hfi1_clear_ctxt_pkey(dd, uctxt->ctxt);
+ hfi1_free_ctxt_rcv_groups(uctxt);
+ hfi1_clear_ctxt_pkey(dd, uctxt);
- uctxt->rcvwait_to = 0;
- uctxt->piowait_to = 0;
- uctxt->rcvnowait = 0;
- uctxt->pionowait = 0;
uctxt->event_flags = 0;
- hfi1_stats.sps_ctxts--;
- if (++dd->freectxts == dd->num_user_contexts)
- aspm_enable_all(dd);
- mutex_unlock(&hfi1_mutex);
- hfi1_free_ctxtdata(dd, uctxt);
+ deallocate_ctxt(uctxt);
done:
- mmdrop(fdata->mm);
- kobject_put(&dd->kobj);
- if (atomic_dec_and_test(&dd->user_refcount))
+ if (refcount_dec_and_test(&dd->user_refcount))
complete(&dd->user_comp);
+ cleanup_srcu_struct(&fdata->pq_srcu);
kfree(fdata);
return 0;
}
@@ -831,121 +727,212 @@ static u64 kvirt_to_phys(void *addr)
return paddr;
}
-static int assign_ctxt(struct file *fp, struct hfi1_user_info *uinfo)
+/**
+ * complete_subctxt - complete sub-context info
+ * @fd: valid filedata pointer
+ *
+ * Sub-context info can only be set up after the base context
+ * has been completed. This is indicated by the clearing of the
+ * HFI1_CTXT_BASE_UINIT bit.
+ *
+ * Wait for the bit to be cleared, and then complete the subcontext
+ * initialization.
+ *
+ */
+static int complete_subctxt(struct hfi1_filedata *fd)
{
- int i_minor, ret = 0;
- unsigned int swmajor, swminor;
+ int ret;
+ unsigned long flags;
- swmajor = uinfo->userversion >> 16;
- if (swmajor != HFI1_USER_SWMAJOR) {
- ret = -ENODEV;
- goto done;
- }
+ /*
+ * sub-context info can only be set up after the base context
+ * has been completed.
+ */
+ ret = wait_event_interruptible(
+ fd->uctxt->wait,
+ !test_bit(HFI1_CTXT_BASE_UNINIT, &fd->uctxt->event_flags));
- swminor = uinfo->userversion & 0xffff;
+ if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags))
+ ret = -ENOMEM;
- mutex_lock(&hfi1_mutex);
- /* First, lets check if we need to setup a shared context? */
- if (uinfo->subctxt_cnt) {
- struct hfi1_filedata *fd = fp->private_data;
+ /* Finish the sub-context init */
+ if (!ret) {
+ fd->rec_cpu_num = hfi1_get_proc_affinity(fd->uctxt->numa_id);
+ ret = init_user_ctxt(fd, fd->uctxt);
+ }
- ret = find_shared_ctxt(fp, uinfo);
- if (ret < 0)
- goto done_unlock;
- if (ret) {
- fd->rec_cpu_num =
- hfi1_get_proc_affinity(fd->uctxt->numa_id);
- }
+ if (ret) {
+ spin_lock_irqsave(&fd->dd->uctxt_lock, flags);
+ __clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
+ spin_unlock_irqrestore(&fd->dd->uctxt_lock, flags);
+ hfi1_rcd_put(fd->uctxt);
+ fd->uctxt = NULL;
}
+ return ret;
+}
+
+static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len)
+{
+ int ret;
+ unsigned int swmajor;
+ struct hfi1_ctxtdata *uctxt = NULL;
+ struct hfi1_user_info uinfo;
+
+ if (fd->uctxt)
+ return -EINVAL;
+
+ if (sizeof(uinfo) != len)
+ return -EINVAL;
+
+ if (copy_from_user(&uinfo, (void __user *)arg, sizeof(uinfo)))
+ return -EFAULT;
+
+ swmajor = uinfo.userversion >> 16;
+ if (swmajor != HFI1_USER_SWMAJOR)
+ return -ENODEV;
+
+ if (uinfo.subctxt_cnt > HFI1_MAX_SHARED_CTXTS)
+ return -EINVAL;
+
/*
- * We execute the following block if we couldn't find a
- * shared context or if context sharing is not required.
+ * Acquire the mutex to protect against multiple creations of what
+ * could be a shared base context.
*/
- if (!ret) {
- i_minor = iminor(file_inode(fp)) - HFI1_USER_MINOR_BASE;
- ret = get_user_context(fp, uinfo, i_minor);
- }
-done_unlock:
+ mutex_lock(&hfi1_mutex);
+ /*
+ * Get a sub context if available (fd->uctxt will be set).
+ * ret < 0 error, 0 no context, 1 sub-context found
+ */
+ ret = find_sub_ctxt(fd, &uinfo);
+
+ /*
+ * Allocate a base context if context sharing is not required or a
+ * sub context wasn't found.
+ */
+ if (!ret)
+ ret = allocate_ctxt(fd, fd->dd, &uinfo, &uctxt);
+
mutex_unlock(&hfi1_mutex);
-done:
+
+ /* Depending on the context type, finish the appropriate init */
+ switch (ret) {
+ case 0:
+ ret = setup_base_ctxt(fd, uctxt);
+ if (ret)
+ deallocate_ctxt(uctxt);
+ break;
+ case 1:
+ ret = complete_subctxt(fd);
+ break;
+ default:
+ break;
+ }
+
return ret;
}
-static int get_user_context(struct file *fp, struct hfi1_user_info *uinfo,
- int devno)
+/**
+ * match_ctxt - match context
+ * @fd: valid filedata pointer
+ * @uinfo: user info to compare base context with
+ * @uctxt: context to compare uinfo to.
+ *
+ * Compare the given context with the given information to see if it
+ * can be used for a sub context.
+ */
+static int match_ctxt(struct hfi1_filedata *fd,
+ const struct hfi1_user_info *uinfo,
+ struct hfi1_ctxtdata *uctxt)
{
- struct hfi1_devdata *dd = NULL;
- int devmax, npresent, nup;
+ struct hfi1_devdata *dd = fd->dd;
+ unsigned long flags;
+ u16 subctxt;
- devmax = hfi1_count_units(&npresent, &nup);
- if (!npresent)
- return -ENXIO;
+ /* Skip dynamically allocated kernel contexts */
+ if (uctxt->sc && (uctxt->sc->type == SC_KERNEL))
+ return 0;
- if (!nup)
- return -ENETDOWN;
+ /* Skip ctxt if it doesn't match the requested one */
+ if (memcmp(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)) ||
+ uctxt->jkey != generate_jkey(current_uid()) ||
+ uctxt->subctxt_id != uinfo->subctxt_id ||
+ uctxt->subctxt_cnt != uinfo->subctxt_cnt)
+ return 0;
- dd = hfi1_lookup(devno);
- if (!dd)
- return -ENODEV;
- else if (!dd->freectxts)
+ /* Verify the sharing process matches the base */
+ if (uctxt->userversion != uinfo->userversion)
+ return -EINVAL;
+
+ /* Find an unused sub context */
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ if (bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) {
+ /* context is being closed, do not use */
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+ return 0;
+ }
+
+ subctxt = find_first_zero_bit(uctxt->in_use_ctxts,
+ HFI1_MAX_SHARED_CTXTS);
+ if (subctxt >= uctxt->subctxt_cnt) {
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
return -EBUSY;
+ }
+
+ fd->subctxt = subctxt;
+ __set_bit(fd->subctxt, uctxt->in_use_ctxts);
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+
+ fd->uctxt = uctxt;
+ hfi1_rcd_get(uctxt);
- return allocate_ctxt(fp, dd, uinfo);
+ return 1;
}
-static int find_shared_ctxt(struct file *fp,
- const struct hfi1_user_info *uinfo)
+/**
+ * find_sub_ctxt - fund sub-context
+ * @fd: valid filedata pointer
+ * @uinfo: matching info to use to find a possible context to share.
+ *
+ * The hfi1_mutex must be held when this function is called. It is
+ * necessary to ensure serialized creation of shared contexts.
+ *
+ * Return:
+ * 0 No sub-context found
+ * 1 Subcontext found and allocated
+ * errno EINVAL (incorrect parameters)
+ * EBUSY (all sub contexts in use)
+ */
+static int find_sub_ctxt(struct hfi1_filedata *fd,
+ const struct hfi1_user_info *uinfo)
{
- int devmax, ndev, i;
- int ret = 0;
- struct hfi1_filedata *fd = fp->private_data;
-
- devmax = hfi1_count_units(NULL, NULL);
+ struct hfi1_ctxtdata *uctxt;
+ struct hfi1_devdata *dd = fd->dd;
+ u16 i;
+ int ret;
- for (ndev = 0; ndev < devmax; ndev++) {
- struct hfi1_devdata *dd = hfi1_lookup(ndev);
+ if (!uinfo->subctxt_cnt)
+ return 0;
- if (!(dd && (dd->flags & HFI1_PRESENT) && dd->kregbase))
- continue;
- for (i = dd->first_user_ctxt; i < dd->num_rcv_contexts; i++) {
- struct hfi1_ctxtdata *uctxt = dd->rcd[i];
-
- /* Skip ctxts which are not yet open */
- if (!uctxt || !uctxt->cnt)
- continue;
- /* Skip ctxt if it doesn't match the requested one */
- if (memcmp(uctxt->uuid, uinfo->uuid,
- sizeof(uctxt->uuid)) ||
- uctxt->jkey != generate_jkey(current_uid()) ||
- uctxt->subctxt_id != uinfo->subctxt_id ||
- uctxt->subctxt_cnt != uinfo->subctxt_cnt)
- continue;
-
- /* Verify the sharing process matches the master */
- if (uctxt->userversion != uinfo->userversion ||
- uctxt->cnt >= uctxt->subctxt_cnt) {
- ret = -EINVAL;
- goto done;
- }
- fd->uctxt = uctxt;
- fd->subctxt = uctxt->cnt++;
- uctxt->active_slaves |= 1 << fd->subctxt;
- ret = 1;
- goto done;
+ for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) {
+ uctxt = hfi1_rcd_get_by_index(dd, i);
+ if (uctxt) {
+ ret = match_ctxt(fd, uinfo, uctxt);
+ hfi1_rcd_put(uctxt);
+ /* value of != 0 will return */
+ if (ret)
+ return ret;
}
}
-done:
- return ret;
+ return 0;
}
-static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
- struct hfi1_user_info *uinfo)
+static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
+ struct hfi1_user_info *uinfo,
+ struct hfi1_ctxtdata **rcd)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt;
- unsigned ctxt;
int ret, numa;
if (dd->flags & HFI1_FROZEN) {
@@ -959,11 +946,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
return -EIO;
}
- for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts; ctxt++)
- if (!dd->rcd[ctxt])
- break;
-
- if (ctxt == dd->num_rcv_contexts)
+ if (!dd->freectxts)
return -EBUSY;
/*
@@ -975,11 +958,10 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
numa = cpu_to_node(fd->rec_cpu_num);
else
numa = numa_node_id();
- uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, numa);
- if (!uctxt) {
- dd_dev_err(dd,
- "Unable to allocate ctxtdata memory, failing open\n");
- return -ENOMEM;
+ ret = hfi1_create_ctxtdata(dd->pport, numa, &uctxt);
+ if (ret < 0) {
+ dd_dev_err(dd, "user ctxtdata allocation failed\n");
+ return ret;
}
hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)",
uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num,
@@ -988,41 +970,35 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
/*
* Allocate and enable a PIO send context.
*/
- uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize,
- uctxt->dd->node);
+ uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, dd->node);
if (!uctxt->sc) {
ret = -ENOMEM;
goto ctxdata_free;
}
- hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index,
+ hfi1_cdbg(PROC, "allocated send context %u(%u)", uctxt->sc->sw_index,
uctxt->sc->hw_context);
ret = sc_enable(uctxt->sc);
if (ret)
goto ctxdata_free;
/*
- * Setup shared context resources if the user-level has requested
- * shared contexts and this is the 'master' process.
+ * Setup sub context information if the user-level has requested
+ * sub contexts.
* This has to be done here so the rest of the sub-contexts find the
- * proper master.
+ * proper base context.
+ * NOTE: _set_bit() can be used here because the context creation is
+ * protected by the mutex (rather than the spin_lock), and will be the
+ * very first instance of this context.
*/
- if (uinfo->subctxt_cnt && !fd->subctxt) {
- ret = init_subctxts(uctxt, uinfo);
- /*
- * On error, we don't need to disable and de-allocate the
- * send context because it will be done during file close
- */
- if (ret)
- goto ctxdata_free;
- }
+ __set_bit(0, uctxt->in_use_ctxts);
+ if (uinfo->subctxt_cnt)
+ init_subctxts(uctxt, uinfo);
uctxt->userversion = uinfo->userversion;
uctxt->flags = hfi1_cap_mask; /* save current flag state */
init_waitqueue_head(&uctxt->wait);
- strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm));
+ strscpy(uctxt->comm, current->comm, sizeof(uctxt->comm));
memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid));
uctxt->jkey = generate_jkey(current_uid());
- INIT_LIST_HEAD(&uctxt->sdma_queues);
- spin_lock_init(&uctxt->sdma_qlock);
hfi1_stats.sps_ctxts++;
/*
* Disable ASPM when there are open user/PSM contexts to avoid
@@ -1030,46 +1006,46 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
*/
if (dd->freectxts-- == dd->num_user_contexts)
aspm_disable_all(dd);
- fd->uctxt = uctxt;
+
+ *rcd = uctxt;
return 0;
ctxdata_free:
- dd->rcd[ctxt] = NULL;
- hfi1_free_ctxtdata(dd, uctxt);
+ hfi1_free_ctxt(uctxt);
return ret;
}
-static int init_subctxts(struct hfi1_ctxtdata *uctxt,
- const struct hfi1_user_info *uinfo)
+static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt)
{
- unsigned num_subctxts;
+ mutex_lock(&hfi1_mutex);
+ hfi1_stats.sps_ctxts--;
+ if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts)
+ aspm_enable_all(uctxt->dd);
+ mutex_unlock(&hfi1_mutex);
- num_subctxts = uinfo->subctxt_cnt;
- if (num_subctxts > HFI1_MAX_SHARED_CTXTS)
- return -EINVAL;
+ hfi1_free_ctxt(uctxt);
+}
+static void init_subctxts(struct hfi1_ctxtdata *uctxt,
+ const struct hfi1_user_info *uinfo)
+{
uctxt->subctxt_cnt = uinfo->subctxt_cnt;
uctxt->subctxt_id = uinfo->subctxt_id;
- uctxt->active_slaves = 1;
- uctxt->redirect_seq_cnt = 1;
- set_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
-
- return 0;
+ set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
}
static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
{
int ret = 0;
- unsigned num_subctxts = uctxt->subctxt_cnt;
+ u16 num_subctxts = uctxt->subctxt_cnt;
uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE);
- if (!uctxt->subctxt_uregbase) {
- ret = -ENOMEM;
- goto bail;
- }
+ if (!uctxt->subctxt_uregbase)
+ return -ENOMEM;
+
/* We can take the size of the RcvHdr Queue from the master */
- uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size *
+ uctxt->subctxt_rcvhdr_base = vmalloc_user(rcvhdrq_size(uctxt) *
num_subctxts);
if (!uctxt->subctxt_rcvhdr_base) {
ret = -ENOMEM;
@@ -1082,25 +1058,22 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
ret = -ENOMEM;
goto bail_rhdr;
}
- goto bail;
+
+ return 0;
+
bail_rhdr:
vfree(uctxt->subctxt_rcvhdr_base);
+ uctxt->subctxt_rcvhdr_base = NULL;
bail_ureg:
vfree(uctxt->subctxt_uregbase);
uctxt->subctxt_uregbase = NULL;
-bail:
+
return ret;
}
-static int user_init(struct file *fp)
+static void user_init(struct hfi1_ctxtdata *uctxt)
{
unsigned int rcvctrl_ops = 0;
- struct hfi1_filedata *fd = fp->private_data;
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
-
- /* make sure that the context has already been setup */
- if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
- return -EFAULT;
/* initialize poll variables... */
uctxt->urgent = 0;
@@ -1117,13 +1090,14 @@ static int user_init(struct file *fp)
* don't have to wait to be sure the DMA update has happened
* (chip resets head/tail to 0 on transition to enable).
*/
- if (uctxt->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(uctxt))
clear_rcvhdrtail(uctxt);
/* Setup J_KEY before enabling the context */
- hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey);
+ hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey);
rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
+ rcvctrl_ops |= HFI1_RCVCTRL_URGENT_ENB;
if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP))
rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB;
/*
@@ -1147,23 +1121,16 @@ static int user_init(struct file *fp)
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
else
rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
- hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
-
- /* Notify any waiting slaves */
- if (uctxt->subctxt_cnt) {
- clear_bit(HFI1_CTXT_MASTER_UNINIT, &uctxt->event_flags);
- wake_up(&uctxt->wait);
- }
-
- return 0;
+ hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
}
-static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
+static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
{
struct hfi1_ctxt_info cinfo;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
- int ret = 0;
+
+ if (sizeof(cinfo) != len)
+ return -EINVAL;
memset(&cinfo, 0, sizeof(cinfo));
cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) &
@@ -1171,7 +1138,7 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
HFI1_CAP_UGET_MASK(uctxt->flags, MASK) |
HFI1_CAP_KGET_MASK(uctxt->flags, K2U);
/* adjust flag if this fd is not able to cache */
- if (!fd->handler)
+ if (!fd->use_mn)
cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */
cinfo.num_active = hfi1_count_active_units();
@@ -1187,92 +1154,109 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len)
cinfo.send_ctxt = uctxt->sc->hw_context;
cinfo.egrtids = uctxt->egrbufs.alloced;
- cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
- cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2;
+ cinfo.rcvhdrq_cnt = get_hdrq_cnt(uctxt);
+ cinfo.rcvhdrq_entsize = get_hdrqentsize(uctxt) << 2;
cinfo.sdma_ring_size = fd->cq->nentries;
cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size;
- trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo);
- if (copy_to_user(ubase, &cinfo, sizeof(cinfo)))
- ret = -EFAULT;
+ trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, &cinfo);
+ if (copy_to_user((void __user *)arg, &cinfo, len))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int init_user_ctxt(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt)
+{
+ int ret;
+
+ ret = hfi1_user_sdma_alloc_queues(uctxt, fd);
+ if (ret)
+ return ret;
+
+ ret = hfi1_user_exp_rcv_init(fd, uctxt);
+ if (ret)
+ hfi1_user_sdma_free_queues(fd, uctxt);
return ret;
}
-static int setup_ctxt(struct file *fp)
+static int setup_base_ctxt(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt)
{
- struct hfi1_filedata *fd = fp->private_data;
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
int ret = 0;
- /*
- * Context should be set up only once, including allocation and
- * programming of eager buffers. This is done if context sharing
- * is not requested or by the master process.
- */
- if (!uctxt->subctxt_cnt || !fd->subctxt) {
- ret = hfi1_init_ctxt(uctxt->sc);
- if (ret)
- goto done;
+ hfi1_init_ctxt(uctxt->sc);
- /* Now allocate the RcvHdr queue and eager buffers. */
- ret = hfi1_create_rcvhdrq(dd, uctxt);
- if (ret)
- goto done;
- ret = hfi1_setup_eagerbufs(uctxt);
- if (ret)
- goto done;
- if (uctxt->subctxt_cnt && !fd->subctxt) {
- ret = setup_subctxt(uctxt);
- if (ret)
- goto done;
- }
- } else {
- ret = wait_event_interruptible(uctxt->wait, !test_bit(
- HFI1_CTXT_MASTER_UNINIT,
- &uctxt->event_flags));
- if (ret)
- goto done;
- }
+ /* Now allocate the RcvHdr queue and eager buffers. */
+ ret = hfi1_create_rcvhdrq(dd, uctxt);
+ if (ret)
+ goto done;
- ret = hfi1_user_sdma_alloc_queues(uctxt, fp);
+ ret = hfi1_setup_eagerbufs(uctxt);
if (ret)
goto done;
- /*
- * Expected receive has to be setup for all processes (including
- * shared contexts). However, it has to be done after the master
- * context has been fully configured as it depends on the
- * eager/expected split of the RcvArray entries.
- * Setting it up here ensures that the subcontexts will be waiting
- * (due to the above wait_event_interruptible() until the master
- * is setup.
- */
- ret = hfi1_user_exp_rcv_init(fp);
+
+ /* If sub-contexts are enabled, do the appropriate setup */
+ if (uctxt->subctxt_cnt)
+ ret = setup_subctxt(uctxt);
if (ret)
goto done;
- set_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags);
+ ret = hfi1_alloc_ctxt_rcv_groups(uctxt);
+ if (ret)
+ goto done;
+
+ ret = init_user_ctxt(fd, uctxt);
+ if (ret) {
+ hfi1_free_ctxt_rcv_groups(uctxt);
+ goto done;
+ }
+
+ user_init(uctxt);
+
+ /* Now that the context is set up, the fd can get a reference. */
+ fd->uctxt = uctxt;
+ hfi1_rcd_get(uctxt);
+
done:
+ if (uctxt->subctxt_cnt) {
+ /*
+ * On error, set the failed bit so sub-contexts will clean up
+ * correctly.
+ */
+ if (ret)
+ set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags);
+
+ /*
+ * Base context is done (successfully or not), notify anybody
+ * using a sub-context that is waiting for this completion.
+ */
+ clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
+ wake_up(&uctxt->wait);
+ }
+
return ret;
}
-static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
+static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
{
struct hfi1_base_info binfo;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
- ssize_t sz;
unsigned offset;
- int ret = 0;
- trace_hfi1_uctxtdata(uctxt->dd, uctxt);
+ trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt);
+
+ if (sizeof(binfo) != len)
+ return -EINVAL;
memset(&binfo, 0, sizeof(binfo));
binfo.hw_version = dd->revision;
- binfo.sw_version = HFI1_KERN_SWVERSION;
- binfo.bthqp = kdeth_qp;
+ binfo.sw_version = HFI1_USER_SWVERSION;
+ binfo.bthqp = RVT_KDETH_QP_PREFIX;
binfo.jkey = uctxt->jkey;
/*
* If more than 64 contexts are enabled the allocated credit
@@ -1298,55 +1282,171 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
fd->subctxt,
uctxt->egrbufs.rcvtids[0].dma);
binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt,
- fd->subctxt, 0);
+ fd->subctxt, 0);
/*
* user regs are at
* (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE))
*/
binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt,
- fd->subctxt, 0);
- offset = offset_in_page((((uctxt->ctxt - dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS) + fd->subctxt) *
- sizeof(*dd->events));
+ fd->subctxt, 0);
+ offset = offset_in_page((uctxt_offset(uctxt) + fd->subctxt) *
+ sizeof(*dd->events));
binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt,
- fd->subctxt,
- offset);
+ fd->subctxt,
+ offset);
binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt,
- fd->subctxt,
- dd->status);
+ fd->subctxt,
+ dd->status);
if (HFI1_CAP_IS_USET(DMA_RTAIL))
binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt,
- fd->subctxt, 0);
+ fd->subctxt, 0);
if (uctxt->subctxt_cnt) {
binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS,
- uctxt->ctxt,
- fd->subctxt, 0);
- binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ,
uctxt->ctxt,
fd->subctxt, 0);
+ binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ,
+ uctxt->ctxt,
+ fd->subctxt, 0);
binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF,
- uctxt->ctxt,
- fd->subctxt, 0);
+ uctxt->ctxt,
+ fd->subctxt, 0);
}
- sz = (len < sizeof(binfo)) ? len : sizeof(binfo);
- if (copy_to_user(ubase, &binfo, sz))
+
+ if (copy_to_user((void __user *)arg, &binfo, len))
+ return -EFAULT;
+
+ return 0;
+}
+
+/**
+ * user_exp_rcv_setup - Set up the given tid rcv list
+ * @fd: file data of the current driver instance
+ * @arg: ioctl argumnent for user space information
+ * @len: length of data structure associated with ioctl command
+ *
+ * Wrapper to validate ioctl information before doing _rcv_setup.
+ *
+ */
+static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg,
+ u32 len)
+{
+ int ret;
+ unsigned long addr;
+ struct hfi1_tid_info tinfo;
+
+ if (sizeof(tinfo) != len)
+ return -EINVAL;
+
+ if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo))))
+ return -EFAULT;
+
+ ret = hfi1_user_exp_rcv_setup(fd, &tinfo);
+ if (!ret) {
+ /*
+ * Copy the number of tidlist entries we used
+ * and the length of the buffer we registered.
+ */
+ addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
+ if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+ sizeof(tinfo.tidcnt)))
+ ret = -EFAULT;
+
+ addr = arg + offsetof(struct hfi1_tid_info, length);
+ if (!ret && copy_to_user((void __user *)addr, &tinfo.length,
+ sizeof(tinfo.length)))
+ ret = -EFAULT;
+
+ if (ret)
+ hfi1_user_exp_rcv_invalid(fd, &tinfo);
+ }
+
+ return ret;
+}
+
+/**
+ * user_exp_rcv_clear - Clear the given tid rcv list
+ * @fd: file data of the current driver instance
+ * @arg: ioctl argumnent for user space information
+ * @len: length of data structure associated with ioctl command
+ *
+ * The hfi1_user_exp_rcv_clear() can be called from the error path. Because
+ * of this, we need to use this wrapper to copy the user space information
+ * before doing the clear.
+ */
+static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg,
+ u32 len)
+{
+ int ret;
+ unsigned long addr;
+ struct hfi1_tid_info tinfo;
+
+ if (sizeof(tinfo) != len)
+ return -EINVAL;
+
+ if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo))))
+ return -EFAULT;
+
+ ret = hfi1_user_exp_rcv_clear(fd, &tinfo);
+ if (!ret) {
+ addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
+ if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+ sizeof(tinfo.tidcnt)))
+ return -EFAULT;
+ }
+
+ return ret;
+}
+
+/**
+ * user_exp_rcv_invalid - Invalidate the given tid rcv list
+ * @fd: file data of the current driver instance
+ * @arg: ioctl argumnent for user space information
+ * @len: length of data structure associated with ioctl command
+ *
+ * Wrapper to validate ioctl information before doing _rcv_invalid.
+ *
+ */
+static int user_exp_rcv_invalid(struct hfi1_filedata *fd, unsigned long arg,
+ u32 len)
+{
+ int ret;
+ unsigned long addr;
+ struct hfi1_tid_info tinfo;
+
+ if (sizeof(tinfo) != len)
+ return -EINVAL;
+
+ if (!fd->invalid_tids)
+ return -EINVAL;
+
+ if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo))))
+ return -EFAULT;
+
+ ret = hfi1_user_exp_rcv_invalid(fd, &tinfo);
+ if (ret)
+ return ret;
+
+ addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
+ if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
+ sizeof(tinfo.tidcnt)))
ret = -EFAULT;
+
return ret;
}
-static unsigned int poll_urgent(struct file *fp,
+static __poll_t poll_urgent(struct file *fp,
struct poll_table_struct *pt)
{
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
- unsigned pollflag;
+ __poll_t pollflag;
poll_wait(fp, &uctxt->wait, pt);
spin_lock_irq(&dd->uctxt_lock);
if (uctxt->urgent != uctxt->urgent_poll) {
- pollflag = POLLIN | POLLRDNORM;
+ pollflag = EPOLLIN | EPOLLRDNORM;
uctxt->urgent_poll = uctxt->urgent;
} else {
pollflag = 0;
@@ -1357,23 +1457,23 @@ static unsigned int poll_urgent(struct file *fp,
return pollflag;
}
-static unsigned int poll_next(struct file *fp,
+static __poll_t poll_next(struct file *fp,
struct poll_table_struct *pt)
{
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
- unsigned pollflag;
+ __poll_t pollflag;
poll_wait(fp, &uctxt->wait, pt);
spin_lock_irq(&dd->uctxt_lock);
if (hdrqempty(uctxt)) {
set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags);
- hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt);
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt);
pollflag = 0;
} else {
- pollflag = POLLIN | POLLRDNORM;
+ pollflag = EPOLLIN | EPOLLRDNORM;
}
spin_unlock_irq(&dd->uctxt_lock);
@@ -1389,56 +1489,55 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit)
{
struct hfi1_ctxtdata *uctxt;
struct hfi1_devdata *dd = ppd->dd;
- unsigned ctxt;
- int ret = 0;
- unsigned long flags;
+ u16 ctxt;
- if (!dd->events) {
- ret = -EINVAL;
- goto done;
- }
+ if (!dd->events)
+ return -EINVAL;
- spin_lock_irqsave(&dd->uctxt_lock, flags);
- for (ctxt = dd->first_user_ctxt; ctxt < dd->num_rcv_contexts;
+ for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts;
ctxt++) {
- uctxt = dd->rcd[ctxt];
+ uctxt = hfi1_rcd_get_by_index(dd, ctxt);
if (uctxt) {
- unsigned long *evs = dd->events +
- (uctxt->ctxt - dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS;
+ unsigned long *evs;
int i;
/*
* subctxt_cnt is 0 if not shared, so do base
* separately, first, then remaining subctxt, if any
*/
+ evs = dd->events + uctxt_offset(uctxt);
set_bit(evtbit, evs);
for (i = 1; i < uctxt->subctxt_cnt; i++)
set_bit(evtbit, evs + i);
+ hfi1_rcd_put(uctxt);
}
}
- spin_unlock_irqrestore(&dd->uctxt_lock, flags);
-done:
- return ret;
+
+ return 0;
}
/**
* manage_rcvq - manage a context's receive queue
* @uctxt: the context
* @subctxt: the sub-context
- * @start_stop: action to carry out
+ * @arg: start/stop action to carry out
*
* start_stop == 0 disables receive on the context, for use in queue
* overflow conditions. start_stop==1 re-enables, to be used to
* re-init the software copy of the head register
*/
-static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
- int start_stop)
+static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
+ unsigned long arg)
{
struct hfi1_devdata *dd = uctxt->dd;
unsigned int rcvctrl_op;
+ int start_stop;
if (subctxt)
- goto bail;
+ return 0;
+
+ if (get_user(start_stop, (int __user *)arg))
+ return -EFAULT;
+
/* atomically clear receive enable ctxt. */
if (start_stop) {
/*
@@ -1449,15 +1548,15 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
* always resets it's tail register back to 0 on a
* transition from disabled to enabled.
*/
- if (uctxt->rcvhdrtail_kvaddr)
+ if (hfi1_rcvhdrtail_kvaddr(uctxt))
clear_rcvhdrtail(uctxt);
rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB;
} else {
rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS;
}
- hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt);
+ hfi1_rcvctrl(dd, rcvctrl_op, uctxt);
/* always; new head should be equal to new tail; see above */
-bail:
+
return 0;
}
@@ -1466,18 +1565,21 @@ bail:
* User process then performs actions appropriate to bit having been
* set, if desired, and checks again in future.
*/
-static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
- unsigned long events)
+static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
+ unsigned long arg)
{
int i;
struct hfi1_devdata *dd = uctxt->dd;
unsigned long *evs;
+ unsigned long events;
if (!dd->events)
return 0;
- evs = dd->events + ((uctxt->ctxt - dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS) + subctxt;
+ if (get_user(events, (unsigned long __user *)arg))
+ return -EFAULT;
+
+ evs = dd->events + uctxt_offset(uctxt) + subctxt;
for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) {
if (!test_bit(i, &events))
@@ -1487,27 +1589,89 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, int subctxt,
return 0;
}
-static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned subctxt,
- u16 pkey)
+static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg)
{
- int ret = -ENOENT, i, intable = 0;
+ int i;
struct hfi1_pportdata *ppd = uctxt->ppd;
struct hfi1_devdata *dd = uctxt->dd;
+ u16 pkey;
- if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) {
- ret = -EINVAL;
- goto done;
- }
+ if (!HFI1_CAP_IS_USET(PKEY_CHECK))
+ return -EPERM;
+
+ if (get_user(pkey, (u16 __user *)arg))
+ return -EFAULT;
+
+ if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
+ return -EINVAL;
for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++)
- if (pkey == ppd->pkeys[i]) {
- intable = 1;
- break;
- }
+ if (pkey == ppd->pkeys[i])
+ return hfi1_set_ctxt_pkey(dd, uctxt, pkey);
+
+ return -ENOENT;
+}
+
+/**
+ * ctxt_reset - Reset the user context
+ * @uctxt: valid user context
+ */
+static int ctxt_reset(struct hfi1_ctxtdata *uctxt)
+{
+ struct send_context *sc;
+ struct hfi1_devdata *dd;
+ int ret = 0;
+
+ if (!uctxt || !uctxt->dd || !uctxt->sc)
+ return -EINVAL;
+
+ /*
+ * There is no protection here. User level has to guarantee that
+ * no one will be writing to the send context while it is being
+ * re-initialized. If user level breaks that guarantee, it will
+ * break it's own context and no one else's.
+ */
+ dd = uctxt->dd;
+ sc = uctxt->sc;
+
+ /*
+ * Wait until the interrupt handler has marked the context as
+ * halted or frozen. Report error if we time out.
+ */
+ wait_event_interruptible_timeout(
+ sc->halt_wait, (sc->flags & SCF_HALTED),
+ msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
+ if (!(sc->flags & SCF_HALTED))
+ return -ENOLCK;
+
+ /*
+ * If the send context was halted due to a Freeze, wait until the
+ * device has been "unfrozen" before resetting the context.
+ */
+ if (sc->flags & SCF_FROZEN) {
+ wait_event_interruptible_timeout(
+ dd->event_queue,
+ !(READ_ONCE(dd->flags) & HFI1_FROZEN),
+ msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
+ if (dd->flags & HFI1_FROZEN)
+ return -ENOLCK;
+
+ if (dd->flags & HFI1_FORCED_FREEZE)
+ /*
+ * Don't allow context reset if we are into
+ * forced freeze
+ */
+ return -ENODEV;
+
+ sc_disable(sc);
+ ret = sc_enable(sc);
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt);
+ } else {
+ ret = sc_restart(sc);
+ }
+ if (!ret)
+ sc_return_credits(sc);
- if (intable)
- ret = hfi1_set_ctxt_pkey(dd, uctxt->ctxt, pkey);
-done:
return ret;
}
@@ -1525,7 +1689,7 @@ static int user_add(struct hfi1_devdata *dd)
snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit);
ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops,
&dd->user_cdev, &dd->user_device,
- true, &dd->kobj);
+ true, &dd->verbs_dev.rdi.ibdev.dev.kobj);
if (ret)
user_remove(dd);
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 0dd50cdb039a..3c228aeaaf81 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -1,53 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*/
#include <linux/firmware.h>
#include <linux/mutex.h>
-#include <linux/module.h>
#include <linux/delay.h>
#include <linux/crc32.h>
@@ -64,30 +21,26 @@
#define DEFAULT_FW_FABRIC_NAME "hfi1_fabric.fw"
#define DEFAULT_FW_SBUS_NAME "hfi1_sbus.fw"
#define DEFAULT_FW_PCIE_NAME "hfi1_pcie.fw"
-#define DEFAULT_PLATFORM_CONFIG_NAME "hfi1_platform.dat"
#define ALT_FW_8051_NAME_ASIC "hfi1_dc8051_d.fw"
#define ALT_FW_FABRIC_NAME "hfi1_fabric_d.fw"
#define ALT_FW_SBUS_NAME "hfi1_sbus_d.fw"
#define ALT_FW_PCIE_NAME "hfi1_pcie_d.fw"
+MODULE_FIRMWARE(DEFAULT_FW_8051_NAME_ASIC);
+MODULE_FIRMWARE(DEFAULT_FW_FABRIC_NAME);
+MODULE_FIRMWARE(DEFAULT_FW_SBUS_NAME);
+MODULE_FIRMWARE(DEFAULT_FW_PCIE_NAME);
+
static uint fw_8051_load = 1;
static uint fw_fabric_serdes_load = 1;
static uint fw_pcie_serdes_load = 1;
static uint fw_sbus_load = 1;
-/*
- * Access required in platform.c
- * Maintains state of whether the platform config was fetched via the
- * fallback option
- */
-uint platform_config_load;
-
/* Firmware file names get set in hfi1_firmware_init() based on the above */
static char *fw_8051_name;
static char *fw_fabric_serdes_name;
static char *fw_sbus_name;
static char *fw_pcie_serdes_name;
-static char *platform_config_name;
#define SBUS_MAX_POLL_COUNT 100
#define SBUS_COUNTER(reg, name) \
@@ -121,6 +74,12 @@ struct css_header {
#define MU_SIZE 8
#define EXPONENT_SIZE 4
+/* size of platform configuration partition */
+#define MAX_PLATFORM_CONFIG_FILE_SIZE 4096
+
+/* size of file of plaform configuration encoded in format version 4 */
+#define PLATFORM_CONFIG_FORMAT_4_FILE_SIZE 528
+
/* the file itself */
struct firmware_file {
struct css_header css_header;
@@ -177,7 +136,6 @@ static struct firmware_details fw_8051;
static struct firmware_details fw_fabric;
static struct firmware_details fw_pcie;
static struct firmware_details fw_sbus;
-static const struct firmware *platform_config;
/* flags for turn_off_spicos() */
#define SPICO_SBUS 0x1
@@ -615,6 +573,14 @@ retry:
fw_fabric_serdes_name = ALT_FW_FABRIC_NAME;
fw_sbus_name = ALT_FW_SBUS_NAME;
fw_pcie_serdes_name = ALT_FW_PCIE_NAME;
+
+ /*
+ * Add a delay before obtaining and loading debug firmware.
+ * Authorization will fail if the delay between firmware
+ * authorization events is shorter than 50us. Add 100us to
+ * make a delay time safe.
+ */
+ usleep_range(100, 120);
}
if (fw_sbus_load) {
@@ -675,7 +641,6 @@ done:
static int obtain_firmware(struct hfi1_devdata *dd)
{
unsigned long timeout;
- int err = 0;
mutex_lock(&fw_mutex);
@@ -699,38 +664,11 @@ static int obtain_firmware(struct hfi1_devdata *dd)
}
/* not in FW_TRY state */
- if (fw_state == FW_FINAL) {
- if (platform_config) {
- dd->platform_config.data = platform_config->data;
- dd->platform_config.size = platform_config->size;
- }
- goto done; /* already acquired */
- } else if (fw_state == FW_ERR) {
- goto done; /* already tried and failed */
- }
- /* fw_state is FW_EMPTY */
-
/* set fw_state to FW_TRY, FW_FINAL, or FW_ERR, and fw_err */
- __obtain_firmware(dd);
-
- if (platform_config_load) {
- platform_config = NULL;
- err = request_firmware(&platform_config, platform_config_name,
- &dd->pcidev->dev);
- if (err) {
- platform_config = NULL;
- dd_dev_err(dd,
- "%s: No default platform config file found\n",
- __func__);
- goto done;
- }
- dd->platform_config.data = platform_config->data;
- dd->platform_config.size = platform_config->size;
- }
+ if (fw_state == FW_EMPTY)
+ __obtain_firmware(dd);
-done:
mutex_unlock(&fw_mutex);
-
return fw_err;
}
@@ -752,9 +690,6 @@ void dispose_firmware(void)
dispose_one_firmware(&fw_pcie);
dispose_one_firmware(&fw_sbus);
- release_firmware(platform_config);
- platform_config = NULL;
-
/* retain the error state, otherwise revert to empty */
if (fw_state != FW_ERR)
fw_state = FW_EMPTY;
@@ -1004,7 +939,9 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
{
u64 reg;
int ret;
- u8 ver_a, ver_b;
+ u8 ver_major;
+ u8 ver_minor;
+ u8 ver_patch;
/*
* DC Reset sequence
@@ -1073,10 +1010,17 @@ static int load_8051_firmware(struct hfi1_devdata *dd,
return -ETIMEDOUT;
}
- read_misc_status(dd, &ver_a, &ver_b);
- dd_dev_info(dd, "8051 firmware version %d.%d\n",
- (int)ver_b, (int)ver_a);
- dd->dc8051_ver = dc8051_ver(ver_b, ver_a);
+ read_misc_status(dd, &ver_major, &ver_minor, &ver_patch);
+ dd_dev_info(dd, "8051 firmware version %d.%d.%d\n",
+ (int)ver_major, (int)ver_minor, (int)ver_patch);
+ dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch);
+ ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION);
+ if (ret != HCMD_SUCCESS) {
+ dd_dev_err(dd,
+ "Failed to set host interface version, return 0x%x\n",
+ ret);
+ return -EIO;
+ }
return 0;
}
@@ -1170,7 +1114,7 @@ static void turn_off_spicos(struct hfi1_devdata *dd, int flags)
* Reset all of the fabric serdes for this HFI in preparation to take the
* link to Polling.
*
- * To do a reset, we need to write to to the serdes registers. Unfortunately,
+ * To do a reset, we need to write to the serdes registers. Unfortunately,
* the fabric serdes download to the other HFI on the ASIC will have turned
* off the firmware validation on this HFI. This means we can't write to the
* registers to reset the serdes. Work around this by performing a complete
@@ -1410,7 +1354,14 @@ int acquire_hw_mutex(struct hfi1_devdata *dd)
unsigned long timeout;
int try = 0;
u8 mask = 1 << dd->hfi1_id;
- u8 user;
+ u8 user = (u8)read_csr(dd, ASIC_CFG_MUTEX);
+
+ if (user == mask) {
+ dd_dev_info(dd,
+ "Hardware mutex already acquired, mutex mask %u\n",
+ (u32)mask);
+ return 0;
+ }
retry:
timeout = msecs_to_jiffies(HM_TIMEOUT) + jiffies;
@@ -1441,7 +1392,15 @@ retry:
void release_hw_mutex(struct hfi1_devdata *dd)
{
- write_csr(dd, ASIC_CFG_MUTEX, 0);
+ u8 mask = 1 << dd->hfi1_id;
+ u8 user = (u8)read_csr(dd, ASIC_CFG_MUTEX);
+
+ if (user != mask)
+ dd_dev_warn(dd,
+ "Unable to release hardware mutex, mutex mask %u, my mask %u\n",
+ (u32)user, (u32)mask);
+ else
+ write_csr(dd, ASIC_CFG_MUTEX, 0);
}
/* return the given resource bit(s) as a mask for the given HFI */
@@ -1707,10 +1666,8 @@ int hfi1_firmware_init(struct hfi1_devdata *dd)
}
/* no 8051 or QSFP on simulator */
- if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
+ if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
fw_8051_load = 0;
- platform_config_load = 0;
- }
if (!fw_8051_name) {
if (dd->icode == ICODE_RTL_SILICON)
@@ -1724,8 +1681,6 @@ int hfi1_firmware_init(struct hfi1_devdata *dd)
fw_sbus_name = DEFAULT_FW_SBUS_NAME;
if (!fw_pcie_serdes_name)
fw_pcie_serdes_name = DEFAULT_FW_PCIE_NAME;
- if (!platform_config_name)
- platform_config_name = DEFAULT_PLATFORM_CONFIG_NAME;
return obtain_firmware(dd);
}
@@ -1760,7 +1715,7 @@ static int check_meta_version(struct hfi1_devdata *dd, u32 *system_table)
ver_start /= 8;
meta_ver = *((u8 *)system_table + ver_start) & ((1 << ver_len) - 1);
- if (meta_ver < 5) {
+ if (meta_ver < 4) {
dd_dev_info(
dd, "%s:Please update platform config\n", __func__);
return -EINVAL;
@@ -1771,6 +1726,7 @@ static int check_meta_version(struct hfi1_devdata *dd, u32 *system_table)
int parse_platform_config(struct hfi1_devdata *dd)
{
struct platform_config_cache *pcfgcache = &dd->pcfg_cache;
+ struct hfi1_pportdata *ppd = dd->pport;
u32 *ptr = NULL;
u32 header1 = 0, header2 = 0, magic_num = 0, crc = 0, file_length = 0;
u32 record_idx = 0, table_type = 0, table_length_dwords = 0;
@@ -1782,11 +1738,12 @@ int parse_platform_config(struct hfi1_devdata *dd)
* scratch register bitmap, thus there is no platform config to parse.
* Skip parsing in these situations.
*/
- if (is_integrated(dd) && !platform_config_load)
+ if (ppd->config_from_scratch)
return 0;
if (!dd->platform_config.data) {
dd_dev_err(dd, "%s: Missing config file\n", __func__);
+ ret = -EINVAL;
goto bail;
}
ptr = (u32 *)dd->platform_config.data;
@@ -1795,16 +1752,31 @@ int parse_platform_config(struct hfi1_devdata *dd)
ptr++;
if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) {
dd_dev_err(dd, "%s: Bad config file\n", __func__);
+ ret = -EINVAL;
goto bail;
}
/* Field is file size in DWORDs */
file_length = (*ptr) * 4;
- ptr++;
+
+ /*
+ * Length can't be larger than partition size. Assume platform
+ * config format version 4 is being used. Interpret the file size
+ * field as header instead by not moving the pointer.
+ */
+ if (file_length > MAX_PLATFORM_CONFIG_FILE_SIZE) {
+ dd_dev_info(dd,
+ "%s:File length out of bounds, using alternative format\n",
+ __func__);
+ file_length = PLATFORM_CONFIG_FORMAT_4_FILE_SIZE;
+ } else {
+ ptr++;
+ }
if (file_length > dd->platform_config.size) {
dd_dev_info(dd, "%s:File claims to be larger than read size\n",
__func__);
+ ret = -EINVAL;
goto bail;
} else if (file_length < dd->platform_config.size) {
dd_dev_info(dd,
@@ -1815,7 +1787,8 @@ int parse_platform_config(struct hfi1_devdata *dd)
/*
* In both cases where we proceed, using the self-reported file length
- * is the safer option
+ * is the safer option. In case of old format a predefined value is
+ * being used.
*/
while (ptr < (u32 *)(dd->platform_config.data + file_length)) {
header1 = *ptr;
@@ -1824,6 +1797,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
dd_dev_err(dd, "%s: Failed validation at offset %ld\n",
__func__, (ptr - (u32 *)
dd->platform_config.data));
+ ret = -EINVAL;
goto bail;
}
@@ -1855,11 +1829,8 @@ int parse_platform_config(struct hfi1_devdata *dd)
2;
break;
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
pcfgcache->config_tables[table_type].num_table =
table_length_dwords;
@@ -1870,6 +1841,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
__func__, table_type,
(ptr - (u32 *)
dd->platform_config.data));
+ ret = -EINVAL;
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table = ptr;
@@ -1877,15 +1849,10 @@ int parse_platform_config(struct hfi1_devdata *dd)
/* metadata table */
switch (table_type) {
case PLATFORM_CONFIG_SYSTEM_TABLE:
- /* fall through */
case PLATFORM_CONFIG_PORT_TABLE:
- /* fall through */
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
break;
default:
@@ -1894,6 +1861,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
__func__, table_type,
(ptr -
(u32 *)dd->platform_config.data));
+ ret = -EINVAL;
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table_metadata =
@@ -1911,6 +1879,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
dd_dev_err(dd, "%s: Failed CRC check at offset %ld\n",
__func__, (ptr -
(u32 *)dd->platform_config.data));
+ ret = -EINVAL;
goto bail;
}
/* Jump the CRC DWORD */
@@ -2014,15 +1983,10 @@ static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
switch (table) {
case PLATFORM_CONFIG_SYSTEM_TABLE:
- /* fall through */
case PLATFORM_CONFIG_PORT_TABLE:
- /* fall through */
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
if (field && field < platform_config_table_limits[table])
src_ptr =
@@ -2071,13 +2035,14 @@ int get_platform_config_field(struct hfi1_devdata *dd,
int ret = 0, wlen = 0, seek = 0;
u32 field_len_bits = 0, field_start_bits = 0, *src_ptr = NULL;
struct platform_config_cache *pcfgcache = &dd->pcfg_cache;
+ struct hfi1_pportdata *ppd = dd->pport;
if (data)
memset(data, 0, len);
else
return -EINVAL;
- if (is_integrated(dd) && !platform_config_load) {
+ if (ppd->config_from_scratch) {
/*
* Use saved configuration from ppd for integrated platforms
*/
@@ -2124,11 +2089,8 @@ int get_platform_config_field(struct hfi1_devdata *dd,
pcfgcache->config_tables[table_type].table;
break;
case PLATFORM_CONFIG_RX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_TX_PRESET_TABLE:
- /* fall through */
case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
- /* fall through */
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
src_ptr = pcfgcache->config_tables[table_type].table;
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 751a0fb29fa5..cb630551cf1a 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,52 +1,13 @@
-#ifndef _HFI1_KERNEL_H
-#define _HFI1_KERNEL_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2020-2023 Cornelis Networks, Inc.
+ * Copyright(c) 2015-2020 Intel Corporation.
*/
+#ifndef _HFI1_KERNEL_H
+#define _HFI1_KERNEL_H
+
+#include <linux/refcount.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
@@ -64,12 +25,15 @@
#include <linux/kthread.h>
#include <linux/i2c.h>
#include <linux/i2c-algo-bit.h>
+#include <linux/xarray.h>
#include <rdma/ib_hdrs.h>
+#include <rdma/opa_addr.h>
#include <linux/rhashtable.h>
#include <rdma/rdma_vt.h>
#include "chip_registers.h"
#include "common.h"
+#include "opfn.h"
#include "verbs.h"
#include "pio.h"
#include "chip.h"
@@ -77,6 +41,7 @@
#include "qsfp.h"
#include "platform.h"
#include "affinity.h"
+#include "msix.h"
/* bumped 1 from s/w major version of TrueScale */
#define HFI1_CHIP_VERS_MAJ 3U
@@ -91,6 +56,11 @@
#define DROP_PACKET_OFF 0
#define DROP_PACKET_ON 1
+#define NEIGHBOR_TYPE_HFI 0
+#define NEIGHBOR_TYPE_SWITCH 1
+
+#define HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES 5
+
extern unsigned long hfi1_cap_mask;
#define HFI1_CAP_KGET_MASK(mask, cap) ((mask) & HFI1_CAP_##cap)
#define HFI1_CAP_UGET_MASK(mask, cap) \
@@ -148,6 +118,8 @@ struct hfi1_ib_stats {
extern struct hfi1_ib_stats hfi1_stats;
extern const struct pci_error_handlers hfi1_pci_err_handler;
+extern int num_driver_cntrs;
+
/*
* First-cut criterion for "device is active" is
* two thousand dwords combined Tx, Rx traffic per
@@ -160,17 +132,9 @@ extern const struct pci_error_handlers hfi1_pci_err_handler;
* Below contains all data related to a single context (formerly called port).
*/
-#ifdef CONFIG_DEBUG_FS
struct hfi1_opcode_stats_perctx;
-#endif
struct ctxt_eager_bufs {
- ssize_t size; /* total size of eager buffers */
- u32 count; /* size of buffers array */
- u32 numbufs; /* number of buffers allocated */
- u32 alloced; /* number of rcvarray entries used */
- u32 rcvtid_size; /* size of each eager rcv tid */
- u32 threshold; /* head update threshold */
struct eager_buffer {
void *addr;
dma_addr_t dma;
@@ -180,6 +144,12 @@ struct ctxt_eager_bufs {
void *addr;
dma_addr_t dma;
} *rcvtids;
+ u32 size; /* total size of eager buffers */
+ u32 rcvtid_size; /* size of each eager rcv tid */
+ u16 count; /* size of buffers array */
+ u16 numbufs; /* number of buffers allocated */
+ u16 alloced; /* number of rcvarray entries used */
+ u16 threshold; /* head update threshold */
};
struct exp_tid_set {
@@ -187,158 +157,172 @@ struct exp_tid_set {
u32 count;
};
+struct hfi1_ctxtdata;
+typedef int (*intr_handler)(struct hfi1_ctxtdata *rcd, int data);
+typedef void (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
+
+struct tid_queue {
+ struct list_head queue_head;
+ /* queue head for QP TID resource waiters */
+ u32 enqueue; /* count of tid enqueues */
+ u32 dequeue; /* count of tid dequeues */
+};
+
struct hfi1_ctxtdata {
- /* shadow the ctxt's RcvCtrl register */
- u64 rcvctrl;
/* rcvhdrq base, needs mmap before useful */
void *rcvhdrq;
/* kernel virtual address where hdrqtail is updated */
volatile __le64 *rcvhdrtail_kvaddr;
+ /* so functions that need physical port can get it easily */
+ struct hfi1_pportdata *ppd;
+ /* so file ops can get at unit */
+ struct hfi1_devdata *dd;
+ /* this receive context's assigned PIO ACK send context */
+ struct send_context *sc;
+ /* per context recv functions */
+ const rhf_rcv_function_ptr *rhf_rcv_function_map;
/*
- * Shared page for kernel to signal user processes that send buffers
- * need disarming. The process should call HFI1_CMD_DISARM_BUFS
- * or HFI1_CMD_ACK_EVENT with IPATH_EVENT_DISARM_BUFS set.
+ * The interrupt handler for a particular receive context can vary
+ * throughout it's lifetime. This is not a lock protected data member so
+ * it must be updated atomically and the prev and new value must always
+ * be valid. Worst case is we process an extra interrupt and up to 64
+ * packets with the wrong interrupt handler.
*/
- unsigned long *user_event_mask;
- /* when waiting for rcv or pioavail */
- wait_queue_head_t wait;
- /* rcvhdrq size (for freeing) */
- size_t rcvhdrq_size;
+ intr_handler do_interrupt;
+ /** fast handler after autoactive */
+ intr_handler fast_handler;
+ /** slow handler */
+ intr_handler slow_handler;
+ /* napi pointer assiociated with netdev */
+ struct napi_struct *napi;
+ /* verbs rx_stats per rcd */
+ struct hfi1_opcode_stats_perctx *opstats;
+ /* clear interrupt mask */
+ u64 imask;
+ /* ctxt rcvhdrq head offset */
+ u32 head;
/* number of rcvhdrq entries */
u16 rcvhdrq_cnt;
+ u8 ireg; /* clear interrupt register */
+ /* receive packet sequence counter */
+ u8 seq_cnt;
/* size of each of the rcvhdrq entries */
- u16 rcvhdrqentsize;
+ u8 rcvhdrqentsize;
+ /* offset of RHF within receive header entry */
+ u8 rhf_offset;
+ /* dynamic receive available interrupt timeout */
+ u8 rcvavail_timeout;
+ /* Indicates that this is vnic context */
+ bool is_vnic;
+ /* vnic queue index this context is mapped to */
+ u8 vnic_q_idx;
+ /* Is ASPM interrupt supported for this context */
+ bool aspm_intr_supported;
+ /* ASPM state (enabled/disabled) for this context */
+ bool aspm_enabled;
+ /* Is ASPM processing enabled for this context (in intr context) */
+ bool aspm_intr_enable;
+ struct ctxt_eager_bufs egrbufs;
+ /* QPs waiting for context processing */
+ struct list_head qp_wait_list;
+ /* tid allocation lists */
+ struct exp_tid_set tid_group_list;
+ struct exp_tid_set tid_used_list;
+ struct exp_tid_set tid_full_list;
+
+ /* Timer for re-enabling ASPM if interrupt activity quiets down */
+ struct timer_list aspm_timer;
+ /* per-context configuration flags */
+ unsigned long flags;
+ /* array of tid_groups */
+ struct tid_group *groups;
/* mmap of hdrq, must fit in 44 bits */
dma_addr_t rcvhdrq_dma;
dma_addr_t rcvhdrqtailaddr_dma;
- struct ctxt_eager_bufs egrbufs;
- /* this receive context's assigned PIO ACK send context */
- struct send_context *sc;
-
- /* dynamic receive available interrupt timeout */
- u32 rcvavail_timeout;
- /*
- * number of opens (including slave sub-contexts) on this instance
- * (ignoring forks, dup, etc. for now)
- */
- int cnt;
- /*
- * how much space to leave at start of eager TID entries for
- * protocol use, on each TID
- */
- /* instead of calculating it */
- unsigned ctxt;
- /* non-zero if ctxt is being shared. */
- u16 subctxt_cnt;
- /* non-zero if ctxt is being shared. */
- u16 subctxt_id;
- u8 uuid[16];
+ /* Last interrupt timestamp */
+ ktime_t aspm_ts_last_intr;
+ /* Last timestamp at which we scheduled a timer for this context */
+ ktime_t aspm_ts_timer_sched;
+ /* Lock to serialize between intr, timer intr and user threads */
+ spinlock_t aspm_lock;
+ /* Reference count the base context usage */
+ struct kref kref;
+ /* numa node of this context */
+ int numa_id;
+ /* associated msix interrupt. */
+ s16 msix_intr;
/* job key */
u16 jkey;
/* number of RcvArray groups for this context. */
- u32 rcv_array_groups;
+ u16 rcv_array_groups;
/* index of first eager TID entry. */
- u32 eager_base;
+ u16 eager_base;
/* number of expected TID entries */
- u32 expected_count;
+ u16 expected_count;
/* index of first expected TID entry. */
- u32 expected_base;
-
- struct exp_tid_set tid_group_list;
- struct exp_tid_set tid_used_list;
- struct exp_tid_set tid_full_list;
+ u16 expected_base;
+ /* Device context index */
+ u8 ctxt;
+ /* PSM Specific fields */
/* lock protecting all Expected TID data */
- struct mutex exp_lock;
- /* number of pio bufs for this ctxt (all procs, if shared) */
- u32 piocnt;
- /* first pio buffer for this ctxt */
- u32 pio_base;
- /* chip offset of PIO buffers for this ctxt */
- u32 piobufs;
- /* per-context configuration flags */
- unsigned long flags;
- /* per-context event flags for fileops/intr communication */
- unsigned long event_flags;
- /* WAIT_RCV that timed out, no interrupt */
- u32 rcvwait_to;
- /* WAIT_PIO that timed out, no interrupt */
- u32 piowait_to;
- /* WAIT_RCV already happened, no wait */
- u32 rcvnowait;
- /* WAIT_PIO already happened, no wait */
- u32 pionowait;
- /* total number of polled urgent packets */
- u32 urgent;
- /* saved total number of polled urgent packets for poll edge trigger */
- u32 urgent_poll;
+ struct mutex exp_mutex;
+ /* lock protecting all Expected TID data of kernel contexts */
+ spinlock_t exp_lock;
+ /* Queue for QP's waiting for HW TID flows */
+ struct tid_queue flow_queue;
+ /* Queue for QP's waiting for HW receive array entries */
+ struct tid_queue rarr_queue;
+ /* when waiting for rcv or pioavail */
+ wait_queue_head_t wait;
+ /* uuid from PSM */
+ u8 uuid[16];
/* same size as task_struct .comm[], command that opened context */
char comm[TASK_COMM_LEN];
- /* so file ops can get at unit */
- struct hfi1_devdata *dd;
- /* so functions that need physical port can get it easily */
- struct hfi1_pportdata *ppd;
+ /* Bitmask of in use context(s) */
+ DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS);
+ /* per-context event flags for fileops/intr communication */
+ unsigned long event_flags;
/* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
void *subctxt_uregbase;
/* An array of pages for the eager receive buffers * N */
void *subctxt_rcvegrbuf;
/* An array of pages for the eager header queue entries * N */
void *subctxt_rcvhdr_base;
- /* The version of the library which opened this ctxt */
- u32 userversion;
- /* Bitmask of active slaves */
- u32 active_slaves;
+ /* total number of polled urgent packets */
+ u32 urgent;
+ /* saved total number of polled urgent packets for poll edge trigger */
+ u32 urgent_poll;
/* Type of packets or conditions we want to poll for */
u16 poll_type;
- /* receive packet sequence counter */
- u8 seq_cnt;
- u8 redirect_seq_cnt;
- /* ctxt rcvhdrq head offset */
- u32 head;
- u32 pkt_count;
- /* QPs waiting for context processing */
- struct list_head qp_wait_list;
- /* interrupt handling */
- u64 imask; /* clear interrupt mask */
- int ireg; /* clear interrupt register */
- unsigned numa_id; /* numa node of this context */
- /* verbs stats per CTX */
- struct hfi1_opcode_stats_perctx *opstats;
+ /* non-zero if ctxt is being shared. */
+ u16 subctxt_id;
+ /* The version of the library which opened this ctxt */
+ u32 userversion;
/*
- * This is the kernel thread that will keep making
- * progress on the user sdma requests behind the scenes.
- * There is one per context (shared contexts use the master's).
+ * non-zero if ctxt can be shared, and defines the maximum number of
+ * sub-contexts for this device context.
*/
- struct task_struct *progress;
- struct list_head sdma_queues;
- /* protect sdma queues */
- spinlock_t sdma_qlock;
+ u8 subctxt_cnt;
- /* Is ASPM interrupt supported for this context */
- bool aspm_intr_supported;
- /* ASPM state (enabled/disabled) for this context */
- bool aspm_enabled;
- /* Timer for re-enabling ASPM if interrupt activity quietens down */
- struct timer_list aspm_timer;
- /* Lock to serialize between intr, timer intr and user threads */
- spinlock_t aspm_lock;
- /* Is ASPM processing enabled for this context (in intr context) */
- bool aspm_intr_enable;
- /* Last interrupt timestamp */
- ktime_t aspm_ts_last_intr;
- /* Last timestamp at which we scheduled a timer for this context */
- ktime_t aspm_ts_timer_sched;
-
- /*
- * The interrupt handler for a particular receive context can vary
- * throughout it's lifetime. This is not a lock protected data member so
- * it must be updated atomically and the prev and new value must always
- * be valid. Worst case is we process an extra interrupt and up to 64
- * packets with the wrong interrupt handler.
- */
- int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+ /* Bit mask to track free TID RDMA HW flows */
+ unsigned long flow_mask;
+ struct tid_flow_state flows[RXE_NUM_TID_FLOWS];
};
+/**
+ * rcvhdrq_size - return total size in bytes for header queue
+ * @rcd: the receive context
+ *
+ * rcvhdrqentsize is in DWs, so we have to convert to bytes
+ *
+ */
+static inline u32 rcvhdrq_size(struct hfi1_ctxtdata *rcd)
+{
+ return PAGE_ALIGN(rcd->rcvhdrq_cnt *
+ rcd->rcvhdrqentsize * sizeof(u32));
+}
+
/*
* Represents a single packet at a high level. Put commonly computed things in
* here so we do not have to keep doing them over and over. The rule of thumb is
@@ -349,24 +333,211 @@ struct hfi1_ctxtdata {
struct hfi1_packet {
void *ebuf;
void *hdr;
+ void *payload;
struct hfi1_ctxtdata *rcd;
__le32 *rhf_addr;
struct rvt_qp *qp;
struct ib_other_headers *ohdr;
+ struct ib_grh *grh;
+ struct opa_16b_mgmt *mgmt;
u64 rhf;
u32 maxcnt;
u32 rhqoff;
- u32 hdrqtail;
+ u32 dlid;
+ u32 slid;
int numpkt;
u16 tlen;
- u16 hlen;
s16 etail;
- u16 rsize;
+ u16 pkey;
+ u8 hlen;
+ u8 rsize;
u8 updegr;
- u8 rcv_flags;
u8 etype;
+ u8 extra_byte;
+ u8 pad;
+ u8 sc;
+ u8 sl;
+ u8 opcode;
+ bool migrated;
};
+/* Packet types */
+#define HFI1_PKT_TYPE_9B 0
+#define HFI1_PKT_TYPE_16B 1
+
+/*
+ * OPA 16B Header
+ */
+#define OPA_16B_L4_MASK 0xFFull
+#define OPA_16B_SC_MASK 0x1F00000ull
+#define OPA_16B_SC_SHIFT 20
+#define OPA_16B_LID_MASK 0xFFFFFull
+#define OPA_16B_DLID_MASK 0xF000ull
+#define OPA_16B_DLID_SHIFT 20
+#define OPA_16B_DLID_HIGH_SHIFT 12
+#define OPA_16B_SLID_MASK 0xF00ull
+#define OPA_16B_SLID_SHIFT 20
+#define OPA_16B_SLID_HIGH_SHIFT 8
+#define OPA_16B_BECN_MASK 0x80000000ull
+#define OPA_16B_BECN_SHIFT 31
+#define OPA_16B_FECN_MASK 0x10000000ull
+#define OPA_16B_FECN_SHIFT 28
+#define OPA_16B_L2_MASK 0x60000000ull
+#define OPA_16B_L2_SHIFT 29
+#define OPA_16B_PKEY_MASK 0xFFFF0000ull
+#define OPA_16B_PKEY_SHIFT 16
+#define OPA_16B_LEN_MASK 0x7FF00000ull
+#define OPA_16B_LEN_SHIFT 20
+#define OPA_16B_RC_MASK 0xE000000ull
+#define OPA_16B_RC_SHIFT 25
+#define OPA_16B_AGE_MASK 0xFF0000ull
+#define OPA_16B_AGE_SHIFT 16
+#define OPA_16B_ENTROPY_MASK 0xFFFFull
+
+/*
+ * OPA 16B L2/L4 Encodings
+ */
+#define OPA_16B_L4_9B 0x00
+#define OPA_16B_L2_TYPE 0x02
+#define OPA_16B_L4_FM 0x08
+#define OPA_16B_L4_IB_LOCAL 0x09
+#define OPA_16B_L4_IB_GLOBAL 0x0A
+#define OPA_16B_L4_ETHR OPA_VNIC_L4_ETHR
+
+/*
+ * OPA 16B Management
+ */
+#define OPA_16B_L4_FM_PAD 3 /* fixed 3B pad */
+#define OPA_16B_L4_FM_HLEN 24 /* 16B(16) + L4_FM(8) */
+
+static inline u8 hfi1_16B_get_l4(struct hfi1_16b_header *hdr)
+{
+ return (u8)(hdr->lrh[2] & OPA_16B_L4_MASK);
+}
+
+static inline u8 hfi1_16B_get_sc(struct hfi1_16b_header *hdr)
+{
+ return (u8)((hdr->lrh[1] & OPA_16B_SC_MASK) >> OPA_16B_SC_SHIFT);
+}
+
+static inline u32 hfi1_16B_get_dlid(struct hfi1_16b_header *hdr)
+{
+ return (u32)((hdr->lrh[1] & OPA_16B_LID_MASK) |
+ (((hdr->lrh[2] & OPA_16B_DLID_MASK) >>
+ OPA_16B_DLID_HIGH_SHIFT) << OPA_16B_DLID_SHIFT));
+}
+
+static inline u32 hfi1_16B_get_slid(struct hfi1_16b_header *hdr)
+{
+ return (u32)((hdr->lrh[0] & OPA_16B_LID_MASK) |
+ (((hdr->lrh[2] & OPA_16B_SLID_MASK) >>
+ OPA_16B_SLID_HIGH_SHIFT) << OPA_16B_SLID_SHIFT));
+}
+
+static inline u8 hfi1_16B_get_becn(struct hfi1_16b_header *hdr)
+{
+ return (u8)((hdr->lrh[0] & OPA_16B_BECN_MASK) >> OPA_16B_BECN_SHIFT);
+}
+
+static inline u8 hfi1_16B_get_fecn(struct hfi1_16b_header *hdr)
+{
+ return (u8)((hdr->lrh[1] & OPA_16B_FECN_MASK) >> OPA_16B_FECN_SHIFT);
+}
+
+static inline u8 hfi1_16B_get_l2(struct hfi1_16b_header *hdr)
+{
+ return (u8)((hdr->lrh[1] & OPA_16B_L2_MASK) >> OPA_16B_L2_SHIFT);
+}
+
+static inline u16 hfi1_16B_get_pkey(struct hfi1_16b_header *hdr)
+{
+ return (u16)((hdr->lrh[2] & OPA_16B_PKEY_MASK) >> OPA_16B_PKEY_SHIFT);
+}
+
+static inline u8 hfi1_16B_get_rc(struct hfi1_16b_header *hdr)
+{
+ return (u8)((hdr->lrh[1] & OPA_16B_RC_MASK) >> OPA_16B_RC_SHIFT);
+}
+
+static inline u8 hfi1_16B_get_age(struct hfi1_16b_header *hdr)
+{
+ return (u8)((hdr->lrh[3] & OPA_16B_AGE_MASK) >> OPA_16B_AGE_SHIFT);
+}
+
+static inline u16 hfi1_16B_get_len(struct hfi1_16b_header *hdr)
+{
+ return (u16)((hdr->lrh[0] & OPA_16B_LEN_MASK) >> OPA_16B_LEN_SHIFT);
+}
+
+static inline u16 hfi1_16B_get_entropy(struct hfi1_16b_header *hdr)
+{
+ return (u16)(hdr->lrh[3] & OPA_16B_ENTROPY_MASK);
+}
+
+#define OPA_16B_MAKE_QW(low_dw, high_dw) (((u64)(high_dw) << 32) | (low_dw))
+
+/*
+ * BTH
+ */
+#define OPA_16B_BTH_PAD_MASK 7
+static inline u8 hfi1_16B_bth_get_pad(struct ib_other_headers *ohdr)
+{
+ return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_PAD_SHIFT) &
+ OPA_16B_BTH_PAD_MASK);
+}
+
+/*
+ * 16B Management
+ */
+#define OPA_16B_MGMT_QPN_MASK 0xFFFFFF
+static inline u32 hfi1_16B_get_dest_qpn(struct opa_16b_mgmt *mgmt)
+{
+ return be32_to_cpu(mgmt->dest_qpn) & OPA_16B_MGMT_QPN_MASK;
+}
+
+static inline u32 hfi1_16B_get_src_qpn(struct opa_16b_mgmt *mgmt)
+{
+ return be32_to_cpu(mgmt->src_qpn) & OPA_16B_MGMT_QPN_MASK;
+}
+
+static inline void hfi1_16B_set_qpn(struct opa_16b_mgmt *mgmt,
+ u32 dest_qp, u32 src_qp)
+{
+ mgmt->dest_qpn = cpu_to_be32(dest_qp & OPA_16B_MGMT_QPN_MASK);
+ mgmt->src_qpn = cpu_to_be32(src_qp & OPA_16B_MGMT_QPN_MASK);
+}
+
+/**
+ * hfi1_get_rc_ohdr - get extended header
+ * @opah - the opaheader
+ */
+static inline struct ib_other_headers *
+hfi1_get_rc_ohdr(struct hfi1_opa_header *opah)
+{
+ struct ib_other_headers *ohdr;
+ struct ib_header *hdr = NULL;
+ struct hfi1_16b_header *hdr_16b = NULL;
+
+ /* Find out where the BTH is */
+ if (opah->hdr_type == HFI1_PKT_TYPE_9B) {
+ hdr = &opah->ibh;
+ if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
+ ohdr = &hdr->u.oth;
+ else
+ ohdr = &hdr->u.l.oth;
+ } else {
+ u8 l4;
+
+ hdr_16b = &opah->opah;
+ l4 = hfi1_16B_get_l4(hdr_16b);
+ if (l4 == OPA_16B_L4_IB_LOCAL)
+ ohdr = &hdr_16b->u.oth;
+ else
+ ohdr = &hdr_16b->u.l.oth;
+ }
+ return ohdr;
+}
+
struct rvt_sge_state;
/*
@@ -431,6 +602,8 @@ struct rvt_sge_state;
#define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE)
#define HLS_DOWN ~(HLS_UP)
+#define HLS_DEFAULT HLS_DN_POLL
+
/* use this MTU size if none other is given */
#define HFI1_DEFAULT_ACTIVE_MTU 10240
/* use this MTU size as the default maximum */
@@ -469,13 +642,15 @@ struct rvt_sge_state;
#define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000
#define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000
#define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000
+#define HFI1_RCVCTRL_URGENT_ENB 0x40000
+#define HFI1_RCVCTRL_URGENT_DIS 0x80000
/* partition enforcement flags */
#define HFI1_PART_ENFORCE_IN 0x1
#define HFI1_PART_ENFORCE_OUT 0x2
/* how often we check for synthetic counter wrap around */
-#define SYNTH_CNT_TIME 2
+#define SYNTH_CNT_TIME 3
/* Counter flags */
#define CNTR_NORMAL 0x0 /* Normal counters, just read register */
@@ -501,22 +676,23 @@ static inline void incr_cntr64(u64 *cntr)
(*cntr)++;
}
-static inline void incr_cntr32(u32 *cntr)
-{
- if (*cntr < (u32)-1LL)
- (*cntr)++;
-}
-
#define MAX_NAME_SIZE 64
struct hfi1_msix_entry {
enum irq_type type;
- struct msix_entry msix;
+ int irq;
void *arg;
- char name[MAX_NAME_SIZE];
cpumask_t mask;
struct irq_affinity_notify notify;
};
+struct hfi1_msix_info {
+ /* lock to synchronize in_use_msix access */
+ spinlock_t msix_lock;
+ DECLARE_BITMAP(in_use_msix, CCE_NUM_MSIX_VECTORS);
+ struct hfi1_msix_entry *msix_entries;
+ u16 max_requested;
+};
+
/* per-SL CCA information */
struct cca_timer {
struct hrtimer hrtimer;
@@ -556,10 +732,6 @@ struct hfi1_pportdata {
struct hfi1_ibport ibport_data;
struct hfi1_devdata *dd;
- struct kobject pport_cc_kobj;
- struct kobject sc2vl_kobj;
- struct kobject sl2sc_kobj;
- struct kobject vl2mtu_kobj;
/* PHY support */
struct qsfp_data qsfp_info;
@@ -573,6 +745,9 @@ struct hfi1_pportdata {
u8 default_atten;
u8 max_power_class;
+ /* did we read platform config from scratch registers? */
+ bool config_from_scratch;
+
/* GUIDs for this interface, in host order, guids[0] is a port guid */
u64 guids[HFI1_GUIDS_PER_PORT];
@@ -591,6 +766,7 @@ struct hfi1_pportdata {
/* SendDMA related entries */
struct workqueue_struct *hfi1_wq;
+ struct workqueue_struct *link_wq;
/* move out of interrupt context */
struct work_struct link_vc_work;
@@ -605,8 +781,6 @@ struct hfi1_pportdata {
struct mutex hls_lock;
u32 host_link_state;
- u32 lstate; /* logical link state */
-
/* these are the "32 bit" regs */
u32 ibmtu; /* The MTU programmed for this unit */
@@ -617,7 +791,7 @@ struct hfi1_pportdata {
u32 ibmaxlen;
u32 current_egress_rate; /* units [10^6 bits/sec] */
/* LID programmed for this instance */
- u16 lid;
+ u32 lid;
/* list of pkeys programmed; 0 if not set */
u16 pkeys[MAX_PKEY_VALUES];
u16 link_width_supported;
@@ -639,7 +813,7 @@ struct hfi1_pportdata {
u8 rx_pol_inv;
u8 hw_pidx; /* physical port index */
- u8 port; /* IB port number and index into dd->pports - 1 */
+ u32 port; /* IB port number and index into dd->pports - 1 */
/* type of neighbor node */
u8 neighbor_type;
u8 neighbor_normal;
@@ -652,12 +826,12 @@ struct hfi1_pportdata {
u8 link_enabled; /* link enabled? */
u8 linkinit_reason;
u8 local_tx_rate; /* rate given to 8051 firmware */
- u8 last_pstate; /* info only */
u8 qsfp_retry_count;
/* placeholders for IB MAD packet settings */
u8 overrun_threshold;
u8 phy_error_threshold;
+ unsigned int is_link_down_queued;
/* Used to override LED behavior for things like maintenance beaconing*/
/*
@@ -749,11 +923,21 @@ struct hfi1_pportdata {
struct work_struct linkstate_active_work;
/* Does this port need to prescan for FECNs */
bool cc_prescan;
+ /*
+ * Sample sendWaitCnt & sendWaitVlCnt during link transition
+ * and counter request.
+ */
+ u64 port_vl_xmit_wait_last[C_VL_COUNT + 1];
+ u16 prev_link_width;
+ u64 vl_xmit_flit_cnt[C_VL_COUNT + 1];
};
-typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
-
typedef void (*opcode_handler)(struct hfi1_packet *packet);
+typedef void (*hfi1_make_req)(struct rvt_qp *qp,
+ struct hfi1_pkt_state *ps,
+ struct rvt_swqe *wqe);
+extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[];
+extern const rhf_rcv_function_ptr netdev_rhf_rcv_functions[];
/* return values for the RHF receive functions */
#define RHF_RCV_CONTINUE 0 /* keep going */
@@ -761,9 +945,9 @@ typedef void (*opcode_handler)(struct hfi1_packet *packet);
#define RHF_RCV_REPROCESS 2 /* stop. retain this packet */
struct rcv_array_data {
- u8 group_size;
u16 ngroups;
u16 nctxt_extra;
+ u8 group_size;
};
struct per_vl_data {
@@ -809,6 +993,18 @@ struct hfi1_asic_data {
struct hfi1_i2c_bus *i2c_bus1;
};
+/* sizes for both the QP and RSM map tables */
+#define NUM_MAP_ENTRIES 256
+#define NUM_MAP_REGS 32
+
+/* Virtual NIC information */
+struct hfi1_vnic_data {
+ struct kmem_cache *txreq_cache;
+ u8 num_vports;
+};
+
+struct hfi1_vnic_vport_info;
+
/* device data struct now contains only "general per-device" info.
* fields related to a physical IB port are in a hfi1_pportdata struct.
*/
@@ -819,9 +1015,9 @@ struct sdma_vl_map;
#define SERIAL_MAX 16 /* length of the serial number */
typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64);
+struct hfi1_netdev_rx;
struct hfi1_devdata {
struct hfi1_ibdev verbs_dev; /* must be first */
- struct list_head list;
/* pointers to related structs for this device */
/* pci access data structure */
struct pci_dev *pcidev;
@@ -832,12 +1028,15 @@ struct hfi1_devdata {
struct device *diag_device;
struct device *ui_device;
- /* mem-mapped pointer to base of chip regs */
- u8 __iomem *kregbase;
- /* end of mem-mapped chip space excluding sendbuf and user regs */
- u8 __iomem *kregend;
- /* physical address of chip for io_remap, etc. */
+ /* first mapping up to RcvArray */
+ u8 __iomem *kregbase1;
resource_size_t physaddr;
+
+ /* second uncached mapping from RcvArray to pio send buffers */
+ u8 __iomem *kregbase2;
+ /* for detecting offset above kregbase2 address */
+ u32 base2_start;
+
/* Per VL data. Enough for all VLs but not all elements are set/used. */
struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* send context data */
@@ -867,8 +1066,6 @@ struct hfi1_devdata {
dma_addr_t sdma_pad_phys;
/* for deallocation */
size_t sdma_heads_size;
- /* number from the chip */
- u32 chip_sdma_engines;
/* num used */
u32 num_sdma;
/* array of engines sized by num_sdma */
@@ -902,12 +1099,16 @@ struct hfi1_devdata {
char *boardname; /* human readable board info */
+ u64 ctx0_seq_drop;
+
/* reset value */
u64 z_int_counter;
u64 z_rcv_limit;
u64 z_send_schedule;
u64 __percpu *send_schedule;
+ /* number of reserved contexts for netdev usage */
+ u16 num_netdev_contexts;
/* number of receive contexts in use by the driver */
u32 num_rcv_contexts;
/* number of pio send contexts in use by the driver */
@@ -921,14 +1122,12 @@ struct hfi1_devdata {
/* base receive interrupt timeout, in CSR units */
u32 rcv_intr_timeout_csr;
- u32 freezelen; /* max length of freezemsg */
- u64 __iomem *egrtidbase;
spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
- /* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */
- spinlock_t uctxt_lock; /* rcd and user context changes */
- /* exclusive access to 8051 */
- spinlock_t dc8051_lock;
+ spinlock_t uctxt_lock; /* protect rcd changes */
+ struct mutex dc8051_lock; /* exclusive access to 8051 */
+ struct workqueue_struct *update_cntr_wq;
+ struct work_struct update_cntr_work;
/* exclusive access to 8051 memory */
spinlock_t dc8051_memlock;
int dc8051_timed_out; /* remember if the 8051 timed out */
@@ -949,29 +1148,9 @@ struct hfi1_devdata {
/* Base GUID for device (network order) */
u64 base_guid;
- /* these are the "32 bit" regs */
-
- /* value we put in kr_rcvhdrsize */
- u32 rcvhdrsize;
- /* number of receive contexts the chip supports */
- u32 chip_rcv_contexts;
- /* number of receive array entries */
- u32 chip_rcv_array_count;
- /* number of PIO send contexts the chip supports */
- u32 chip_send_contexts;
- /* number of bytes in the PIO memory buffer */
- u32 chip_pio_mem_size;
- /* number of bytes in the SDMA memory buffer */
- u32 chip_sdma_mem_size;
-
- /* size of each rcvegrbuffer */
- u32 rcvegrbufsize;
- /* log2 of above */
- u16 rcvegrbufsize_shift;
/* both sides of the PCIe link are gen3 capable */
u8 link_gen3_capable;
- /* default link down value (poll/sleep) */
- u8 link_default;
+ u8 dc_shutdown;
/* localbus width (1, 2,4,8,16,32) from config space */
u32 lbus_width;
/* localbus speed in MHz */
@@ -988,7 +1167,6 @@ struct hfi1_devdata {
u16 pcie_lnkctl;
u16 pcie_devctl2;
u32 pci_msix0;
- u32 pci_lnkctl3;
u32 pci_tph2;
/*
@@ -1016,12 +1194,20 @@ struct hfi1_devdata {
/* initial vl15 credits to use */
u16 vl15_init;
+ /*
+ * Cached value for vl15buf, read during verify cap interrupt. VL15
+ * credits are to be kept at 0 and set when handling the link-up
+ * interrupt. This removes the possibility of receiving VL15 MAD
+ * packets before this HFI is ready.
+ */
+ u16 vl15buf_cached;
+
/* Misc small ints */
u8 n_krcv_queues;
u8 qos_shift;
u16 irev; /* implementation revision */
- u16 dc8051_ver; /* 8051 firmware version */
+ u32 dc8051_ver; /* 8051 firmware version */
spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
struct platform_config platform_config;
@@ -1029,14 +1215,6 @@ struct hfi1_devdata {
struct diag_client *diag_client;
- /* MSI-X information */
- struct hfi1_msix_entry *msix_entries;
- u32 num_msix_entries;
-
- /* INTx information */
- u32 requested_intx_irq; /* did we request one? */
- char intx_name[MAX_NAME_SIZE]; /* INTx name */
-
/* general interrupt: mask of handled interrupts */
u64 gi_mask[CCE_NUM_INT_CSRS];
@@ -1050,6 +1228,9 @@ struct hfi1_devdata {
*/
struct timer_list synth_stats_timer;
+ /* MSI-X information */
+ struct hfi1_msix_info msix_info;
+
/*
* device counters
*/
@@ -1077,7 +1258,7 @@ struct hfi1_devdata {
struct err_info_constraint err_info_xmit_constraint;
atomic_t drop_packet;
- u8 do_drop;
+ bool do_drop;
u8 err_info_uncorrectable;
u8 err_info_fmconfig;
@@ -1102,11 +1283,12 @@ struct hfi1_devdata {
u64 sw_cce_err_status_aggregate;
/* Software counter that aggregates all bypass packet rcv errors */
u64 sw_rcv_bypass_packet_errors;
- /* receive interrupt function */
- rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
/* Save the enabled LCB error bits */
u64 lcb_err_en;
+ struct cpu_mask_set *comp_vect;
+ int *comp_vect_mappings;
+ u32 comp_vect_possible_cpus;
/*
* Capability to have different send engines simply by changing a
@@ -1116,6 +1298,9 @@ struct hfi1_devdata {
send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
+ int (*process_vnic_dma_send)(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen);
/* hfi1_pportdata, points to array of (physical) port-specific
* data structs, indexed by pidx (0..n-1)
*/
@@ -1123,28 +1308,26 @@ struct hfi1_devdata {
/* receive context data */
struct hfi1_ctxtdata **rcd;
u64 __percpu *int_counter;
+ /* verbs tx opcode stats */
+ struct hfi1_opcode_stats_perctx __percpu *tx_opstats;
/* device (not port) flags, basically device capabilities */
u16 flags;
/* Number of physical ports available */
u8 num_pports;
- /* Lowest context number which can be used by user processes */
- u8 first_user_ctxt;
+ /* Lowest context number which can be used by user processes or VNIC */
+ u8 first_dyn_alloc_ctxt;
/* adding a new field here would make it part of this cacheline */
/* seqlock for sc2vl */
seqlock_t sc2vl_lock ____cacheline_aligned_in_smp;
u64 sc2vl[4];
- /* receive interrupt functions */
- rhf_rcv_function_ptr *rhf_rcv_function_map;
u64 __percpu *rcv_limit;
- u16 rhf_offset; /* offset of RHF within receive header entry */
/* adding a new field here would make it part of this cacheline */
/* OUI comes from the HW. Used everywhere as 3 separate bytes. */
u8 oui1;
u8 oui2;
u8 oui3;
- u8 dc_shutdown;
/* Timer and counter used to detect RcvBufOvflCnt changes */
struct timer_list rcverr_timer;
@@ -1161,42 +1344,55 @@ struct hfi1_devdata {
/* Number of verbs contexts which have disabled ASPM */
atomic_t aspm_disabled_cnt;
/* Keeps track of user space clients */
- atomic_t user_refcount;
+ refcount_t user_refcount;
/* Used to wait for outstanding user space clients before dev removal */
struct completion user_comp;
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
bool aspm_enabled; /* ASPM state: enabled/disabled */
- struct rhashtable sdma_rht;
-
- struct kobject kobj;
+ struct rhashtable *sdma_rht;
+
+ /* vnic data */
+ struct hfi1_vnic_data vnic;
+ /* Lock to protect IRQ SRC register access */
+ spinlock_t irq_src_lock;
+ int vnic_num_vports;
+ struct hfi1_netdev_rx *netdev_rx;
+ struct hfi1_affinity_node *affinity_entry;
+
+ /* Keeps track of IPoIB RSM rule users */
+ atomic_t ipoib_rsm_usr_num;
};
/* 8051 firmware version helper */
-#define dc8051_ver(a, b) ((a) << 8 | (b))
-#define dc8051_ver_maj(a) ((a & 0xff00) >> 8)
-#define dc8051_ver_min(a) (a & 0x00ff)
+#define dc8051_ver(a, b, c) ((a) << 16 | (b) << 8 | (c))
+#define dc8051_ver_maj(a) (((a) & 0xff0000) >> 16)
+#define dc8051_ver_min(a) (((a) & 0x00ff00) >> 8)
+#define dc8051_ver_patch(a) ((a) & 0x0000ff)
/* f_put_tid types */
-#define PT_EXPECTED 0
-#define PT_EAGER 1
-#define PT_INVALID 2
+#define PT_EXPECTED 0
+#define PT_EAGER 1
+#define PT_INVALID_FLUSH 2
+#define PT_INVALID 3
struct tid_rb_node;
-struct mmu_rb_node;
-struct mmu_rb_handler;
/* Private data for file operations */
struct hfi1_filedata {
+ struct srcu_struct pq_srcu;
+ struct hfi1_devdata *dd;
struct hfi1_ctxtdata *uctxt;
- unsigned subctxt;
struct hfi1_user_sdma_comp_q *cq;
- struct hfi1_user_sdma_pkt_q *pq;
+ /* update side lock for SRCU */
+ spinlock_t pq_rcu_lock;
+ struct hfi1_user_sdma_pkt_q __rcu *pq;
+ u16 subctxt;
/* for cpu affinity; -1 if none */
int rec_cpu_num;
u32 tid_n_pinned;
- struct mmu_rb_handler *handler;
+ bool use_mn;
struct tid_rb_node **entry_to_rb;
spinlock_t tid_lock; /* protect tid_[limit,used] counters */
u32 tid_limit;
@@ -1205,66 +1401,211 @@ struct hfi1_filedata {
u32 invalid_tid_idx;
/* protect invalid_tids array and invalid_tid_idx */
spinlock_t invalid_lock;
- struct mm_struct *mm;
};
-extern struct list_head hfi1_dev_list;
-extern spinlock_t hfi1_devs_lock;
+extern struct xarray hfi1_dev_table;
struct hfi1_devdata *hfi1_lookup(int unit);
-extern u32 hfi1_cpulist_count;
-extern unsigned long *hfi1_cpulist;
-int hfi1_init(struct hfi1_devdata *, int);
-int hfi1_count_units(int *npresentp, int *nupp);
+static inline unsigned long uctxt_offset(struct hfi1_ctxtdata *uctxt)
+{
+ return (uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
+ HFI1_MAX_SHARED_CTXTS;
+}
+
+int hfi1_init(struct hfi1_devdata *dd, int reinit);
int hfi1_count_active_units(void);
-int hfi1_diag_add(struct hfi1_devdata *);
-void hfi1_diag_remove(struct hfi1_devdata *);
+int hfi1_diag_add(struct hfi1_devdata *dd);
+void hfi1_diag_remove(struct hfi1_devdata *dd);
void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup);
void handle_user_interrupt(struct hfi1_ctxtdata *rcd);
-int hfi1_create_rcvhdrq(struct hfi1_devdata *, struct hfi1_ctxtdata *);
-int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *);
-int hfi1_create_ctxts(struct hfi1_devdata *dd);
-struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32, int);
-void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *,
- struct hfi1_devdata *, u8, u8);
-void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *);
-
-int handle_receive_interrupt(struct hfi1_ctxtdata *, int);
-int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int);
-int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
+int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
+int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd);
+int hfi1_create_kctxts(struct hfi1_devdata *dd);
+int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
+ struct hfi1_ctxtdata **rcd);
+void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd);
+void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
+ struct hfi1_devdata *dd, u8 hw_pidx, u32 port);
+void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
+int hfi1_rcd_put(struct hfi1_ctxtdata *rcd);
+int hfi1_rcd_get(struct hfi1_ctxtdata *rcd);
+struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd,
+ u16 ctxt);
+struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt);
+int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
+int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
+int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
+int handle_receive_interrupt_napi_fp(struct hfi1_ctxtdata *rcd, int budget);
+int handle_receive_interrupt_napi_sp(struct hfi1_ctxtdata *rcd, int budget);
void set_all_slowpath(struct hfi1_devdata *dd);
extern const struct pci_device_id hfi1_pci_tbl[];
+void hfi1_make_ud_req_9B(struct rvt_qp *qp,
+ struct hfi1_pkt_state *ps,
+ struct rvt_swqe *wqe);
+
+void hfi1_make_ud_req_16B(struct rvt_qp *qp,
+ struct hfi1_pkt_state *ps,
+ struct rvt_swqe *wqe);
/* receive packet handler dispositions */
#define RCV_PKT_OK 0x0 /* keep going */
#define RCV_PKT_LIMIT 0x1 /* stop, hit limit, start thread */
#define RCV_PKT_DONE 0x2 /* stop, no more packets detected */
+/**
+ * hfi1_rcd_head - add accessor for rcd head
+ * @rcd: the context
+ */
+static inline u32 hfi1_rcd_head(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->head;
+}
+
+/**
+ * hfi1_set_rcd_head - add accessor for rcd head
+ * @rcd: the context
+ * @head: the new head
+ */
+static inline void hfi1_set_rcd_head(struct hfi1_ctxtdata *rcd, u32 head)
+{
+ rcd->head = head;
+}
+
/* calculate the current RHF address */
static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd)
{
- return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->dd->rhf_offset;
+ return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset;
}
-int hfi1_reset_device(int);
+/* return DMA_RTAIL configuration */
+static inline bool get_dma_rtail_setting(struct hfi1_ctxtdata *rcd)
+{
+ return !!HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL);
+}
-/* return the driver's idea of the logical OPA port state */
-static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
+/**
+ * hfi1_seq_incr_wrap - wrapping increment for sequence
+ * @seq: the current sequence number
+ *
+ * Returns: the incremented seq
+ */
+static inline u8 hfi1_seq_incr_wrap(u8 seq)
+{
+ if (++seq > RHF_MAX_SEQ)
+ seq = 1;
+ return seq;
+}
+
+/**
+ * hfi1_seq_cnt - return seq_cnt member
+ * @rcd: the receive context
+ *
+ * Return seq_cnt member
+ */
+static inline u8 hfi1_seq_cnt(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->seq_cnt;
+}
+
+/**
+ * hfi1_set_seq_cnt - return seq_cnt member
+ * @rcd: the receive context
+ *
+ * Return seq_cnt member
+ */
+static inline void hfi1_set_seq_cnt(struct hfi1_ctxtdata *rcd, u8 cnt)
+{
+ rcd->seq_cnt = cnt;
+}
+
+/**
+ * last_rcv_seq - is last
+ * @rcd: the receive context
+ * @seq: sequence
+ *
+ * return true if last packet
+ */
+static inline bool last_rcv_seq(struct hfi1_ctxtdata *rcd, u32 seq)
+{
+ return seq != rcd->seq_cnt;
+}
+
+/**
+ * rcd_seq_incr - increment context sequence number
+ * @rcd: the receive context
+ * @seq: the current sequence number
+ *
+ * Returns: true if the this was the last packet
+ */
+static inline bool hfi1_seq_incr(struct hfi1_ctxtdata *rcd, u32 seq)
+{
+ rcd->seq_cnt = hfi1_seq_incr_wrap(rcd->seq_cnt);
+ return last_rcv_seq(rcd, seq);
+}
+
+/**
+ * get_hdrqentsize - return hdrq entry size
+ * @rcd: the receive context
+ */
+static inline u8 get_hdrqentsize(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->rcvhdrqentsize;
+}
+
+/**
+ * get_hdrq_cnt - return hdrq count
+ * @rcd: the receive context
+ */
+static inline u16 get_hdrq_cnt(struct hfi1_ctxtdata *rcd)
{
- return ppd->lstate; /* use the cached value */
+ return rcd->rcvhdrq_cnt;
}
+/**
+ * hfi1_is_slowpath - check if this context is slow path
+ * @rcd: the receive context
+ */
+static inline bool hfi1_is_slowpath(struct hfi1_ctxtdata *rcd)
+{
+ return rcd->do_interrupt == rcd->slow_handler;
+}
+
+/**
+ * hfi1_is_fastpath - check if this context is fast path
+ * @rcd: the receive context
+ */
+static inline bool hfi1_is_fastpath(struct hfi1_ctxtdata *rcd)
+{
+ if (rcd->ctxt == HFI1_CTRL_CTXT)
+ return false;
+
+ return rcd->do_interrupt == rcd->fast_handler;
+}
+
+/**
+ * hfi1_set_fast - change to the fast handler
+ * @rcd: the receive context
+ */
+static inline void hfi1_set_fast(struct hfi1_ctxtdata *rcd)
+{
+ if (unlikely(!rcd))
+ return;
+ if (unlikely(!hfi1_is_fastpath(rcd)))
+ rcd->do_interrupt = rcd->fast_handler;
+}
+
+int hfi1_reset_device(int);
+
void receive_interrupt_work(struct work_struct *work);
/* extract service channel from header and rhf */
-static inline int hdr2sc(struct ib_header *hdr, u64 rhf)
+static inline int hfi1_9B_get_sc5(struct ib_header *hdr, u64 rhf)
{
- return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
- ((!!(rhf_dc_info(rhf))) << 4);
+ return ib_get_sc(hdr) | ((!!(rhf_dc_info(rhf))) << 4);
}
#define HFI1_JKEY_WIDTH 16
@@ -1351,13 +1692,20 @@ static inline u32 egress_cycles(u32 len, u32 rate)
}
void set_link_ipg(struct hfi1_pportdata *ppd);
-void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
+void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn,
u32 rqpn, u8 svc_type);
void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
- u32 pkey, u32 slid, u32 dlid, u8 sc5,
+ u16 pkey, u32 slid, u32 dlid, u8 sc5,
const struct ib_grh *old_grh);
+void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
+ u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
+ u8 sc5, const struct ib_grh *old_grh);
+typedef void (*hfi1_handle_cnp)(struct hfi1_ibport *ibp, struct rvt_qp *qp,
+ u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
+ u8 sc5, const struct ib_grh *old_grh);
+
#define PKEY_CHECK_INVALID -1
-int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth,
+int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey,
u8 sc5, int8_t s_pkey_index);
#define PACKET_EGRESS_TIMEOUT 350
@@ -1370,7 +1718,7 @@ static inline void pause_for_credit_return(struct hfi1_devdata *dd)
}
/**
- * sc_to_vlt() reverse lookup sc to vl
+ * sc_to_vlt() - reverse lookup sc to vl
* @dd - devdata
* @sc5 - 5 bit sc
*/
@@ -1439,7 +1787,7 @@ static int ingress_pkey_table_search(struct hfi1_pportdata *ppd, u16 pkey)
* the 'error info' for this failure.
*/
static void ingress_pkey_table_fail(struct hfi1_pportdata *ppd, u16 pkey,
- u16 slid)
+ u32 slid)
{
struct hfi1_devdata *dd = ppd->dd;
@@ -1460,9 +1808,9 @@ static void ingress_pkey_table_fail(struct hfi1_pportdata *ppd, u16 pkey,
* by HW and rcv_pkey_check function should be called instead.
*/
static inline int ingress_pkey_check(struct hfi1_pportdata *ppd, u16 pkey,
- u8 sc5, u8 idx, u16 slid)
+ u8 sc5, u8 idx, u32 slid, bool force)
{
- if (!(ppd->part_enforce & HFI1_PART_ENFORCE_IN))
+ if (!(force) && !(ppd->part_enforce & HFI1_PART_ENFORCE_IN))
return 0;
/* If SC15, pkey[0:14] must be 0x7fff */
@@ -1520,7 +1868,7 @@ bad:
u32 lrh_max_header_bytes(struct hfi1_devdata *dd);
int mtu_to_enum(u32 mtu, int default_if_bad);
-u16 enum_to_mtu(int);
+u16 enum_to_mtu(int mtu);
static inline int valid_ib_mtu(unsigned int mtu)
{
return mtu == 256 || mtu == 512 ||
@@ -1534,17 +1882,18 @@ static inline int valid_opa_max_mtu(unsigned int mtu)
(valid_ib_mtu(mtu) || mtu == 8192 || mtu == 10240);
}
-int set_mtu(struct hfi1_pportdata *);
+int set_mtu(struct hfi1_pportdata *ppd);
-int hfi1_set_lid(struct hfi1_pportdata *, u32, u8);
-void hfi1_disable_after_error(struct hfi1_devdata *);
-int hfi1_set_uevent_bits(struct hfi1_pportdata *, const int);
-int hfi1_rcvbuf_validate(u32, u8, u16 *);
+int hfi1_set_lid(struct hfi1_pportdata *ppd, u32 lid, u8 lmc);
+void hfi1_disable_after_error(struct hfi1_devdata *dd);
+int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit);
+int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encode);
-int fm_get_table(struct hfi1_pportdata *, int, void *);
-int fm_set_table(struct hfi1_pportdata *, int, void *);
+int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t);
+int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t);
-void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf);
+void set_up_vau(struct hfi1_devdata *dd, u8 vau);
+void set_up_vl15(struct hfi1_devdata *dd, u16 vl15buf);
void reset_link_credits(struct hfi1_devdata *dd);
void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu);
@@ -1575,28 +1924,54 @@ static inline struct hfi1_ibdev *dev_from_rdi(struct rvt_dev_info *rdi)
return container_of(rdi, struct hfi1_ibdev, rdi);
}
-static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u8 port)
+static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u32 port)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */
+ u32 pidx = port - 1; /* IB number port from 1, hdw from 0 */
WARN_ON(pidx >= dd->num_pports);
return &dd->pport[pidx].ibport_data;
}
-void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
- bool do_cnp);
-static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
- bool do_cnp)
+static inline struct hfi1_ibport *rcd_to_iport(struct hfi1_ctxtdata *rcd)
{
- struct ib_other_headers *ohdr = pkt->ohdr;
- u32 bth1;
+ return &rcd->ppd->ibport_data;
+}
- bth1 = be32_to_cpu(ohdr->bth[1]);
- if (unlikely(bth1 & (HFI1_BECN_SMASK | HFI1_FECN_SMASK))) {
- hfi1_process_ecn_slowpath(qp, pkt, do_cnp);
- return bth1 & HFI1_FECN_SMASK;
+/**
+ * hfi1_may_ecn - Check whether FECN or BECN processing should be done
+ * @pkt: the packet to be evaluated
+ *
+ * Check whether the FECN or BECN bits in the packet's header are
+ * enabled, depending on packet type.
+ *
+ * This function only checks for FECN and BECN bits. Additional checks
+ * are done in the slowpath (hfi1_process_ecn_slowpath()) in order to
+ * ensure correct handling.
+ */
+static inline bool hfi1_may_ecn(struct hfi1_packet *pkt)
+{
+ bool fecn, becn;
+
+ if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
+ fecn = hfi1_16B_get_fecn(pkt->hdr);
+ becn = hfi1_16B_get_becn(pkt->hdr);
+ } else {
+ fecn = ib_bth_get_fecn(pkt->ohdr);
+ becn = ib_bth_get_becn(pkt->ohdr);
}
+ return fecn || becn;
+}
+
+bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
+ bool prescan);
+static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt)
+{
+ bool do_work;
+
+ do_work = hfi1_may_ecn(pkt);
+ if (unlikely(do_work))
+ return hfi1_process_ecn_slowpath(qp, pkt, false);
return false;
}
@@ -1654,25 +2029,24 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
#define HFI1_HAS_SDMA_TIMEOUT 0x8
#define HFI1_HAS_SEND_DMA 0x10 /* Supports Send DMA */
#define HFI1_FORCED_FREEZE 0x80 /* driver forced freeze mode */
+#define HFI1_SHUTDOWN 0x100 /* device is shutting down */
/* IB dword length mask in PBC (lower 11 bits); same for all chips */
#define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1)
/* ctxt_flag bit offsets */
- /* context has been setup */
-#define HFI1_CTXT_SETUP_DONE 1
+ /* base context has not finished initializing */
+#define HFI1_CTXT_BASE_UNINIT 1
+ /* base context initaliation failed */
+#define HFI1_CTXT_BASE_FAILED 2
/* waiting for a packet to arrive */
-#define HFI1_CTXT_WAITING_RCV 2
- /* master has not finished initializing */
-#define HFI1_CTXT_MASTER_UNINIT 4
+#define HFI1_CTXT_WAITING_RCV 3
/* waiting for an urgent packet to arrive */
-#define HFI1_CTXT_WAITING_URG 5
+#define HFI1_CTXT_WAITING_URG 4
/* free up any allocated data at closes */
-struct hfi1_devdata *hfi1_init_dd(struct pci_dev *,
- const struct pci_device_id *);
-void hfi1_free_devdata(struct hfi1_devdata *);
-struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
+int hfi1_init_dd(struct hfi1_devdata *dd);
+void hfi1_free_devdata(struct hfi1_devdata *dd);
/* LED beaconing functions */
void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
@@ -1726,9 +2100,21 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr,
void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
size_t npages, bool dirty);
+/**
+ * hfi1_rcvhdrtail_kvaddr - return tail kvaddr
+ * @rcd - the receive context
+ */
+static inline __le64 *hfi1_rcvhdrtail_kvaddr(const struct hfi1_ctxtdata *rcd)
+{
+ return (__le64 *)rcd->rcvhdrtail_kvaddr;
+}
+
static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
{
- *((u64 *)rcd->rcvhdrtail_kvaddr) = 0ULL;
+ u64 *kv = (u64 *)hfi1_rcvhdrtail_kvaddr(rcd);
+
+ if (kv)
+ *kv = 0ULL;
}
static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
@@ -1737,7 +2123,17 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
* volatile because it's a DMA target from the chip, routine is
* inlined, and don't want register caching or reordering.
*/
- return (u32)le64_to_cpu(*rcd->rcvhdrtail_kvaddr);
+ return (u32)le64_to_cpu(*hfi1_rcvhdrtail_kvaddr(rcd));
+}
+
+static inline bool hfi1_packet_present(struct hfi1_ctxtdata *rcd)
+{
+ if (likely(!rcd->rcvhdrtail_kvaddr)) {
+ u32 seq = rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd)));
+
+ return !last_rcv_seq(rcd, seq);
+ }
+ return hfi1_rcd_head(rcd) != get_rcvhdrtail(rcd);
}
/*
@@ -1745,35 +2141,32 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
*/
extern const char ib_hfi1_version[];
+extern const struct attribute_group ib_hfi1_attr_group;
+extern const struct attribute_group *hfi1_attr_port_groups[];
-int hfi1_device_create(struct hfi1_devdata *);
-void hfi1_device_remove(struct hfi1_devdata *);
+int hfi1_device_create(struct hfi1_devdata *dd);
+void hfi1_device_remove(struct hfi1_devdata *dd);
-int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
- struct kobject *kobj);
-int hfi1_verbs_register_sysfs(struct hfi1_devdata *);
-void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *);
+int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd);
+void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
/* Hook for sysfs read of QSFP */
int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
-int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *);
-void hfi1_pcie_cleanup(struct pci_dev *);
-int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *);
+int hfi1_pcie_init(struct hfi1_devdata *dd);
+void hfi1_pcie_cleanup(struct pci_dev *pdev);
+int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
-void hfi1_pcie_flr(struct hfi1_devdata *);
-int pcie_speeds(struct hfi1_devdata *);
-void request_msix(struct hfi1_devdata *, u32 *, struct hfi1_msix_entry *);
-void hfi1_enable_intx(struct pci_dev *);
-void restore_pci_variables(struct hfi1_devdata *dd);
+int pcie_speeds(struct hfi1_devdata *dd);
+int restore_pci_variables(struct hfi1_devdata *dd);
+int save_pci_variables(struct hfi1_devdata *dd);
int do_pcie_gen3_transition(struct hfi1_devdata *dd);
+void tune_pcie_caps(struct hfi1_devdata *dd);
int parse_platform_config(struct hfi1_devdata *dd);
int get_platform_config_field(struct hfi1_devdata *dd,
enum platform_config_table_type_encoding
table_type, int table_index, int field_index,
u32 *data, u32 len);
-const char *get_unit_name(int unit);
-const char *get_card_name(struct rvt_dev_info *rdi);
struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi);
/*
@@ -1786,14 +2179,7 @@ static inline void flush_wc(void)
}
void handle_eflags(struct hfi1_packet *packet);
-int process_receive_ib(struct hfi1_packet *packet);
-int process_receive_bypass(struct hfi1_packet *packet);
-int process_receive_error(struct hfi1_packet *packet);
-int kdeth_process_expected(struct hfi1_packet *packet);
-int kdeth_process_eager(struct hfi1_packet *packet);
-int process_receive_invalid(struct hfi1_packet *packet);
-
-void update_sge(struct rvt_sge_state *ss, u32 length);
+void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd);
/* global module parameter variables */
extern unsigned int hfi1_max_mtu;
@@ -1803,7 +2189,6 @@ extern int num_user_contexts;
extern unsigned long n_krcvqs;
extern uint krcvqs[];
extern int krcvqsset;
-extern uint kdeth_qp;
extern uint loopback;
extern uint quick_linkup;
extern uint rcv_intr_timeout;
@@ -1849,7 +2234,9 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK
| SEND_CTXT_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK
+#ifndef CONFIG_FAULT_INJECTION
| SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK
+#endif
| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_BYPASS_PACKETS_SMASK
| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_IB_PACKETS_SMASK
| SEND_CTXT_CHECK_ENABLE_DISALLOW_RAW_IPV6_SMASK
@@ -1862,8 +2249,12 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
| SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK;
if (ctxt_type == SC_USER)
- base_sc_integrity |= HFI1_PKT_USER_SC_INTEGRITY;
- else
+ base_sc_integrity |=
+#ifndef CONFIG_FAULT_INJECTION
+ SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK |
+#endif
+ HFI1_PKT_USER_SC_INTEGRITY;
+ else if (ctxt_type != SC_KERNEL)
base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
/* turn on send-side job key checks if !A0 */
@@ -1909,58 +2300,44 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
return base_sdma_integrity;
}
-/*
- * hfi1_early_err is used (only!) to print early errors before devdata is
- * allocated, or when dd->pcidev may not be valid, and at the tail end of
- * cleanup when devdata may have been freed, etc. hfi1_dev_porterr is
- * the same as dd_dev_err, but is used when the message really needs
- * the IB port# to be definitive as to what's happening..
- */
-#define hfi1_early_err(dev, fmt, ...) \
- dev_err(dev, fmt, ##__VA_ARGS__)
-
-#define hfi1_early_info(dev, fmt, ...) \
- dev_info(dev, fmt, ##__VA_ARGS__)
-
#define dd_dev_emerg(dd, fmt, ...) \
dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
+
#define dd_dev_err(dd, fmt, ...) \
dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
+
+#define dd_dev_err_ratelimited(dd, fmt, ...) \
+ dev_err_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
+ ##__VA_ARGS__)
+
#define dd_dev_warn(dd, fmt, ...) \
dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
#define dd_dev_warn_ratelimited(dd, fmt, ...) \
dev_warn_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
+ ##__VA_ARGS__)
#define dd_dev_info(dd, fmt, ...) \
dev_info(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
+
+#define dd_dev_info_ratelimited(dd, fmt, ...) \
+ dev_info_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), \
+ ##__VA_ARGS__)
#define dd_dev_dbg(dd, fmt, ...) \
dev_dbg(&(dd)->pcidev->dev, "%s: " fmt, \
- get_unit_name((dd)->unit), ##__VA_ARGS__)
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
#define hfi1_dev_porterr(dd, port, fmt, ...) \
dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \
- get_unit_name((dd)->unit), (port), ##__VA_ARGS__)
-
-/*
- * this is used for formatting hw error messages...
- */
-struct hfi1_hwerror_msgs {
- u64 mask;
- const char *msg;
- size_t sz;
-};
-
-/* in intr.c... */
-void hfi1_format_hwerrors(u64 hwerrs,
- const struct hfi1_hwerror_msgs *hwerrmsgs,
- size_t nhwerrmsgs, char *msg, size_t lmsg);
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), (port), ##__VA_ARGS__)
#define USER_OPCODE_CHECK_VAL 0xC0
#define USER_OPCODE_CHECK_MASK 0xC0
@@ -2012,57 +2389,244 @@ static inline bool is_integrated(struct hfi1_devdata *dd)
return dd->pcidev->device == PCI_DEVICE_ID_INTEL1;
}
+/**
+ * hfi1_need_drop - detect need for drop
+ * @dd: - the device
+ *
+ * In some cases, the first packet needs to be dropped.
+ *
+ * Return true is the current packet needs to be dropped and false otherwise.
+ */
+static inline bool hfi1_need_drop(struct hfi1_devdata *dd)
+{
+ if (unlikely(dd->do_drop &&
+ atomic_xchg(&dd->drop_packet, DROP_PACKET_OFF) ==
+ DROP_PACKET_ON)) {
+ dd->do_drop = false;
+ return true;
+ }
+ return false;
+}
+
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))
-#define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev))
-
-#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
-#define show_packettype(etype) \
-__print_symbolic(etype, \
- packettype_name(EXPECTED), \
- packettype_name(EAGER), \
- packettype_name(IB), \
- packettype_name(ERROR), \
- packettype_name(BYPASS))
-
-#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode }
-#define show_ib_opcode(opcode) \
-__print_symbolic(opcode, \
- ib_opcode_name(RC_SEND_FIRST), \
- ib_opcode_name(RC_SEND_MIDDLE), \
- ib_opcode_name(RC_SEND_LAST), \
- ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(RC_SEND_ONLY), \
- ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(RC_RDMA_WRITE_FIRST), \
- ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \
- ib_opcode_name(RC_RDMA_WRITE_LAST), \
- ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(RC_RDMA_WRITE_ONLY), \
- ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(RC_RDMA_READ_REQUEST), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \
- ib_opcode_name(RC_ACKNOWLEDGE), \
- ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
- ib_opcode_name(RC_COMPARE_SWAP), \
- ib_opcode_name(RC_FETCH_ADD), \
- ib_opcode_name(UC_SEND_FIRST), \
- ib_opcode_name(UC_SEND_MIDDLE), \
- ib_opcode_name(UC_SEND_LAST), \
- ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(UC_SEND_ONLY), \
- ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(UC_RDMA_WRITE_FIRST), \
- ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \
- ib_opcode_name(UC_RDMA_WRITE_LAST), \
- ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(UC_RDMA_WRITE_ONLY), \
- ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(UD_SEND_ONLY), \
- ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(CNP))
+#define DD_DEV_ASSIGN(dd) __assign_str(dev)
+
+static inline void hfi1_update_ah_attr(struct ib_device *ibdev,
+ struct rdma_ah_attr *attr)
+{
+ struct hfi1_pportdata *ppd;
+ struct hfi1_ibport *ibp;
+ u32 dlid = rdma_ah_get_dlid(attr);
+
+ /*
+ * Kernel clients may not have setup GRH information
+ * Set that here.
+ */
+ ibp = to_iport(ibdev, rdma_ah_get_port_num(attr));
+ ppd = ppd_from_ibp(ibp);
+ if ((((dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) ||
+ (ppd->lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))) &&
+ (dlid != be32_to_cpu(OPA_LID_PERMISSIVE)) &&
+ (dlid != be16_to_cpu(IB_LID_PERMISSIVE)) &&
+ (!(rdma_ah_get_ah_flags(attr) & IB_AH_GRH))) ||
+ (rdma_ah_get_make_grd(attr))) {
+ rdma_ah_set_ah_flags(attr, IB_AH_GRH);
+ rdma_ah_set_interface_id(attr, OPA_MAKE_ID(dlid));
+ rdma_ah_set_subnet_prefix(attr, ibp->rvp.gid_prefix);
+ }
+}
+
+/*
+ * hfi1_check_mcast- Check if the given lid is
+ * in the OPA multicast range.
+ *
+ * The LID might either reside in ah.dlid or might be
+ * in the GRH of the address handle as DGID if extended
+ * addresses are in use.
+ */
+static inline bool hfi1_check_mcast(u32 lid)
+{
+ return ((lid >= opa_get_mcast_base(OPA_MCAST_NR)) &&
+ (lid != be32_to_cpu(OPA_LID_PERMISSIVE)));
+}
+
+#define opa_get_lid(lid, format) \
+ __opa_get_lid(lid, OPA_PORT_PACKET_FORMAT_##format)
+
+/* Convert a lid to a specific lid space */
+static inline u32 __opa_get_lid(u32 lid, u8 format)
+{
+ bool is_mcast = hfi1_check_mcast(lid);
+
+ switch (format) {
+ case OPA_PORT_PACKET_FORMAT_8B:
+ case OPA_PORT_PACKET_FORMAT_10B:
+ if (is_mcast)
+ return (lid - opa_get_mcast_base(OPA_MCAST_NR) +
+ 0xF0000);
+ return lid & 0xFFFFF;
+ case OPA_PORT_PACKET_FORMAT_16B:
+ if (is_mcast)
+ return (lid - opa_get_mcast_base(OPA_MCAST_NR) +
+ 0xF00000);
+ return lid & 0xFFFFFF;
+ case OPA_PORT_PACKET_FORMAT_9B:
+ if (is_mcast)
+ return (lid -
+ opa_get_mcast_base(OPA_MCAST_NR) +
+ be16_to_cpu(IB_MULTICAST_LID_BASE));
+ else
+ return lid & 0xFFFF;
+ default:
+ return lid;
+ }
+}
+
+/* Return true if the given lid is the OPA 16B multicast range */
+static inline bool hfi1_is_16B_mcast(u32 lid)
+{
+ return ((lid >=
+ opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), 16B)) &&
+ (lid != opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B)));
+}
+
+static inline void hfi1_make_opa_lid(struct rdma_ah_attr *attr)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(attr);
+ u32 dlid = rdma_ah_get_dlid(attr);
+
+ /* Modify ah_attr.dlid to be in the 32 bit LID space.
+ * This is how the address will be laid out:
+ * Assuming MCAST_NR to be 4,
+ * 32 bit permissive LID = 0xFFFFFFFF
+ * Multicast LID range = 0xFFFFFFFE to 0xF0000000
+ * Unicast LID range = 0xEFFFFFFF to 1
+ * Invalid LID = 0
+ */
+ if (ib_is_opa_gid(&grh->dgid))
+ dlid = opa_get_lid_from_gid(&grh->dgid);
+ else if ((dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+ (dlid != be16_to_cpu(IB_LID_PERMISSIVE)) &&
+ (dlid != be32_to_cpu(OPA_LID_PERMISSIVE)))
+ dlid = dlid - be16_to_cpu(IB_MULTICAST_LID_BASE) +
+ opa_get_mcast_base(OPA_MCAST_NR);
+ else if (dlid == be16_to_cpu(IB_LID_PERMISSIVE))
+ dlid = be32_to_cpu(OPA_LID_PERMISSIVE);
+
+ rdma_ah_set_dlid(attr, dlid);
+}
+
+static inline u8 hfi1_get_packet_type(u32 lid)
+{
+ /* 9B if lid > 0xF0000000 */
+ if (lid >= opa_get_mcast_base(OPA_MCAST_NR))
+ return HFI1_PKT_TYPE_9B;
+
+ /* 16B if lid > 0xC000 */
+ if (lid >= opa_get_lid(opa_get_mcast_base(OPA_MCAST_NR), 9B))
+ return HFI1_PKT_TYPE_16B;
+
+ return HFI1_PKT_TYPE_9B;
+}
+
+static inline bool hfi1_get_hdr_type(u32 lid, struct rdma_ah_attr *attr)
+{
+ /*
+ * If there was an incoming 16B packet with permissive
+ * LIDs, OPA GIDs would have been programmed when those
+ * packets were received. A 16B packet will have to
+ * be sent in response to that packet. Return a 16B
+ * header type if that's the case.
+ */
+ if (rdma_ah_get_dlid(attr) == be32_to_cpu(OPA_LID_PERMISSIVE))
+ return (ib_is_opa_gid(&rdma_ah_read_grh(attr)->dgid)) ?
+ HFI1_PKT_TYPE_16B : HFI1_PKT_TYPE_9B;
+
+ /*
+ * Return a 16B header type if either the destination
+ * or source lid is extended.
+ */
+ if (hfi1_get_packet_type(rdma_ah_get_dlid(attr)) == HFI1_PKT_TYPE_16B)
+ return HFI1_PKT_TYPE_16B;
+
+ return hfi1_get_packet_type(lid);
+}
+
+static inline void hfi1_make_ext_grh(struct hfi1_packet *packet,
+ struct ib_grh *grh, u32 slid,
+ u32 dlid)
+{
+ struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+
+ if (!ibp)
+ return;
+
+ grh->hop_limit = 1;
+ grh->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
+ if (slid == opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))
+ grh->sgid.global.interface_id =
+ OPA_MAKE_ID(be32_to_cpu(OPA_LID_PERMISSIVE));
+ else
+ grh->sgid.global.interface_id = OPA_MAKE_ID(slid);
+
+ /*
+ * Upper layers (like mad) may compare the dgid in the
+ * wc that is obtained here with the sgid_index in
+ * the wr. Since sgid_index in wr is always 0 for
+ * extended lids, set the dgid here to the default
+ * IB gid.
+ */
+ grh->dgid.global.subnet_prefix = ibp->rvp.gid_prefix;
+ grh->dgid.global.interface_id =
+ cpu_to_be64(ppd->guids[HFI1_PORT_GUID_INDEX]);
+}
+
+static inline int hfi1_get_16b_padding(u32 hdr_size, u32 payload)
+{
+ return -(hdr_size + payload + (SIZE_OF_CRC << 2) +
+ SIZE_OF_LT) & 0x7;
+}
+
+static inline void hfi1_make_ib_hdr(struct ib_header *hdr,
+ u16 lrh0, u16 len,
+ u16 dlid, u16 slid)
+{
+ hdr->lrh[0] = cpu_to_be16(lrh0);
+ hdr->lrh[1] = cpu_to_be16(dlid);
+ hdr->lrh[2] = cpu_to_be16(len);
+ hdr->lrh[3] = cpu_to_be16(slid);
+}
+
+static inline void hfi1_make_16b_hdr(struct hfi1_16b_header *hdr,
+ u32 slid, u32 dlid,
+ u16 len, u16 pkey,
+ bool becn, bool fecn, u8 l4,
+ u8 sc)
+{
+ u32 lrh0 = 0;
+ u32 lrh1 = 0x40000000;
+ u32 lrh2 = 0;
+ u32 lrh3 = 0;
+
+ lrh0 = (lrh0 & ~OPA_16B_BECN_MASK) | (becn << OPA_16B_BECN_SHIFT);
+ lrh0 = (lrh0 & ~OPA_16B_LEN_MASK) | (len << OPA_16B_LEN_SHIFT);
+ lrh0 = (lrh0 & ~OPA_16B_LID_MASK) | (slid & OPA_16B_LID_MASK);
+ lrh1 = (lrh1 & ~OPA_16B_FECN_MASK) | (fecn << OPA_16B_FECN_SHIFT);
+ lrh1 = (lrh1 & ~OPA_16B_SC_MASK) | (sc << OPA_16B_SC_SHIFT);
+ lrh1 = (lrh1 & ~OPA_16B_LID_MASK) | (dlid & OPA_16B_LID_MASK);
+ lrh2 = (lrh2 & ~OPA_16B_SLID_MASK) |
+ ((slid >> OPA_16B_SLID_SHIFT) << OPA_16B_SLID_HIGH_SHIFT);
+ lrh2 = (lrh2 & ~OPA_16B_DLID_MASK) |
+ ((dlid >> OPA_16B_DLID_SHIFT) << OPA_16B_DLID_HIGH_SHIFT);
+ lrh2 = (lrh2 & ~OPA_16B_PKEY_MASK) | ((u32)pkey << OPA_16B_PKEY_SHIFT);
+ lrh2 = (lrh2 & ~OPA_16B_L4_MASK) | l4;
+
+ hdr->lrh[0] = lrh0;
+ hdr->lrh[1] = lrh1;
+ hdr->lrh[2] = lrh2;
+ hdr->lrh[3] = lrh3;
+}
#endif /* _HFI1_KERNEL_H */
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index e3b5bc93bc70..e4aef102dac0 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,58 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
+ * Copyright(c) 2021 Cornelis Networks.
*/
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
#include <linux/delay.h>
-#include <linux/idr.h>
+#include <linux/xarray.h>
#include <linux/module.h>
#include <linux/printk.h>
#include <linux/hrtimer.h>
+#include <linux/bitmap.h>
+#include <linux/numa.h>
#include <rdma/rdma_vt.h>
#include "hfi.h"
@@ -65,6 +26,9 @@
#include "verbs.h"
#include "aspm.h"
#include "affinity.h"
+#include "vnic.h"
+#include "exp_rcv.h"
+#include "netdev.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -74,19 +38,19 @@
*/
#define HFI1_MIN_USER_CTXT_BUFCNT 7
-#define HFI1_MIN_HDRQ_EGRBUF_CNT 2
-#define HFI1_MAX_HDRQ_EGRBUF_CNT 16352
#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
+#define NUM_IB_PORTS 1
+
/*
* Number of user receive contexts we are configured to use (to allow for more
* pio buffers per ctxt, etc.) Zero means use one user context per CPU.
*/
int num_user_contexts = -1;
-module_param_named(num_user_contexts, num_user_contexts, uint, S_IRUGO);
+module_param_named(num_user_contexts, num_user_contexts, int, 0444);
MODULE_PARM_DESC(
- num_user_contexts, "Set max number of user contexts to use");
+ num_user_contexts, "Set max number of user contexts to use (default: -1 will use the real (non-HT) CPU count)");
uint krcvqs[RXE_NUM_DATA_VL];
int krcvqsset;
@@ -100,113 +64,245 @@ static unsigned hfi1_rcvarr_split = 25;
module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO);
MODULE_PARM_DESC(rcvarr_split, "Percent of context's RcvArray entries used for Eager buffers");
-static uint eager_buffer_size = (2 << 20); /* 2MB */
+static uint eager_buffer_size = (8 << 20); /* 8MB */
module_param(eager_buffer_size, uint, S_IRUGO);
-MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 2MB");
+MODULE_PARM_DESC(eager_buffer_size, "Size of the eager buffers, default: 8MB");
static uint rcvhdrcnt = 2048; /* 2x the max eager buffer count */
module_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO);
MODULE_PARM_DESC(rcvhdrcnt, "Receive header queue count (default 2048)");
static uint hfi1_hdrq_entsize = 32;
-module_param_named(hdrq_entsize, hfi1_hdrq_entsize, uint, S_IRUGO);
-MODULE_PARM_DESC(hdrq_entsize, "Size of header queue entries: 2 - 8B, 16 - 64B (default), 32 - 128B");
+module_param_named(hdrq_entsize, hfi1_hdrq_entsize, uint, 0444);
+MODULE_PARM_DESC(hdrq_entsize, "Size of header queue entries: 2 - 8B, 16 - 64B, 32 - 128B (default)");
unsigned int user_credit_return_threshold = 33; /* default is 33% */
module_param(user_credit_return_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user send contexts, return when unreturned credits passes this many blocks (in percent of allocated blocks, 0 is off)");
-static inline u64 encode_rcv_header_entry_size(u16);
-
-static struct idr hfi1_unit_table;
-u32 hfi1_cpulist_count;
-unsigned long *hfi1_cpulist;
+DEFINE_XARRAY_FLAGS(hfi1_dev_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
-/*
- * Common code for creating the receive context array.
- */
-int hfi1_create_ctxts(struct hfi1_devdata *dd)
+static int hfi1_create_kctxt(struct hfi1_devdata *dd,
+ struct hfi1_pportdata *ppd)
{
- unsigned i;
+ struct hfi1_ctxtdata *rcd;
int ret;
/* Control context has to be always 0 */
BUILD_BUG_ON(HFI1_CTRL_CTXT != 0);
- dd->rcd = kzalloc_node(dd->num_rcv_contexts * sizeof(*dd->rcd),
- GFP_KERNEL, dd->node);
- if (!dd->rcd)
- goto nomem;
+ ret = hfi1_create_ctxtdata(ppd, dd->node, &rcd);
+ if (ret < 0) {
+ dd_dev_err(dd, "Kernel receive context allocation failed\n");
+ return ret;
+ }
- /* create one or more kernel contexts */
- for (i = 0; i < dd->first_user_ctxt; ++i) {
- struct hfi1_pportdata *ppd;
- struct hfi1_ctxtdata *rcd;
+ /*
+ * Set up the kernel context flags here and now because they use
+ * default values for all receive side memories. User contexts will
+ * be handled as they are created.
+ */
+ rcd->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
+ HFI1_CAP_KGET(NODROP_RHQ_FULL) |
+ HFI1_CAP_KGET(NODROP_EGR_FULL) |
+ HFI1_CAP_KGET(DMA_RTAIL);
+
+ /* Control context must use DMA_RTAIL */
+ if (rcd->ctxt == HFI1_CTRL_CTXT)
+ rcd->flags |= HFI1_CAP_DMA_RTAIL;
+ rcd->fast_handler = get_dma_rtail_setting(rcd) ?
+ handle_receive_interrupt_dma_rtail :
+ handle_receive_interrupt_nodma_rtail;
+
+ hfi1_set_seq_cnt(rcd, 1);
+
+ rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
+ if (!rcd->sc) {
+ dd_dev_err(dd, "Kernel send context allocation failed\n");
+ return -ENOMEM;
+ }
+ hfi1_init_ctxt(rcd->sc);
- ppd = dd->pport + (i % dd->num_pports);
+ return 0;
+}
- /* dd->rcd[i] gets assigned inside the callee */
- rcd = hfi1_create_ctxtdata(ppd, i, dd->node);
- if (!rcd) {
- dd_dev_err(dd,
- "Unable to allocate kernel receive context, failing\n");
- goto nomem;
- }
- /*
- * Set up the kernel context flags here and now because they
- * use default values for all receive side memories. User
- * contexts will be handled as they are created.
- */
- rcd->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
- HFI1_CAP_KGET(NODROP_RHQ_FULL) |
- HFI1_CAP_KGET(NODROP_EGR_FULL) |
- HFI1_CAP_KGET(DMA_RTAIL);
-
- /* Control context must use DMA_RTAIL */
- if (rcd->ctxt == HFI1_CTRL_CTXT)
- rcd->flags |= HFI1_CAP_DMA_RTAIL;
- rcd->seq_cnt = 1;
-
- rcd->sc = sc_alloc(dd, SC_ACK, rcd->rcvhdrqentsize, dd->node);
- if (!rcd->sc) {
- dd_dev_err(dd,
- "Unable to allocate kernel send context, failing\n");
- goto nomem;
- }
+/*
+ * Create the receive context array and one or more kernel contexts
+ */
+int hfi1_create_kctxts(struct hfi1_devdata *dd)
+{
+ u16 i;
+ int ret;
- ret = hfi1_init_ctxt(rcd->sc);
- if (ret < 0) {
- dd_dev_err(dd,
- "Failed to setup kernel receive context, failing\n");
- ret = -EFAULT;
+ dd->rcd = kcalloc_node(dd->num_rcv_contexts, sizeof(*dd->rcd),
+ GFP_KERNEL, dd->node);
+ if (!dd->rcd)
+ return -ENOMEM;
+
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
+ ret = hfi1_create_kctxt(dd, dd->pport);
+ if (ret)
goto bail;
- }
}
- /*
- * Initialize aspm, to be done after gen3 transition and setting up
- * contexts and before enabling interrupts
- */
- aspm_init(dd);
-
return 0;
-nomem:
- ret = -ENOMEM;
bail:
- if (dd->rcd) {
- for (i = 0; i < dd->num_rcv_contexts; ++i)
- hfi1_free_ctxtdata(dd, dd->rcd[i]);
- }
+ for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i)
+ hfi1_free_ctxt(dd->rcd[i]);
+
+ /* All the contexts should be freed, free the array */
kfree(dd->rcd);
dd->rcd = NULL;
return ret;
}
/*
- * Common code for user and kernel context setup.
+ * Helper routines for the receive context reference count (rcd and uctxt).
+ */
+static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd)
+{
+ kref_init(&rcd->kref);
+}
+
+/**
+ * hfi1_rcd_free - When reference is zero clean up.
+ * @kref: pointer to an initialized rcd data structure
+ *
+ */
+static void hfi1_rcd_free(struct kref *kref)
+{
+ unsigned long flags;
+ struct hfi1_ctxtdata *rcd =
+ container_of(kref, struct hfi1_ctxtdata, kref);
+
+ spin_lock_irqsave(&rcd->dd->uctxt_lock, flags);
+ rcd->dd->rcd[rcd->ctxt] = NULL;
+ spin_unlock_irqrestore(&rcd->dd->uctxt_lock, flags);
+
+ hfi1_free_ctxtdata(rcd->dd, rcd);
+
+ kfree(rcd);
+}
+
+/**
+ * hfi1_rcd_put - decrement reference for rcd
+ * @rcd: pointer to an initialized rcd data structure
+ *
+ * Use this to put a reference after the init.
+ */
+int hfi1_rcd_put(struct hfi1_ctxtdata *rcd)
+{
+ if (rcd)
+ return kref_put(&rcd->kref, hfi1_rcd_free);
+
+ return 0;
+}
+
+/**
+ * hfi1_rcd_get - increment reference for rcd
+ * @rcd: pointer to an initialized rcd data structure
+ *
+ * Use this to get a reference after the init.
+ *
+ * Return : reflect kref_get_unless_zero(), which returns non-zero on
+ * increment, otherwise 0.
+ */
+int hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
+{
+ return kref_get_unless_zero(&rcd->kref);
+}
+
+/**
+ * allocate_rcd_index - allocate an rcd index from the rcd array
+ * @dd: pointer to a valid devdata structure
+ * @rcd: rcd data structure to assign
+ * @index: pointer to index that is allocated
+ *
+ * Find an empty index in the rcd array, and assign the given rcd to it.
+ * If the array is full, we are EBUSY.
+ *
+ */
+static int allocate_rcd_index(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata *rcd, u16 *index)
+{
+ unsigned long flags;
+ u16 ctxt;
+
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ for (ctxt = 0; ctxt < dd->num_rcv_contexts; ctxt++)
+ if (!dd->rcd[ctxt])
+ break;
+
+ if (ctxt < dd->num_rcv_contexts) {
+ rcd->ctxt = ctxt;
+ dd->rcd[ctxt] = rcd;
+ hfi1_rcd_init(rcd);
+ }
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+
+ if (ctxt >= dd->num_rcv_contexts)
+ return -EBUSY;
+
+ *index = ctxt;
+
+ return 0;
+}
+
+/**
+ * hfi1_rcd_get_by_index_safe - validate the ctxt index before accessing the
+ * array
+ * @dd: pointer to a valid devdata structure
+ * @ctxt: the index of an possilbe rcd
+ *
+ * This is a wrapper for hfi1_rcd_get_by_index() to validate that the given
+ * ctxt index is valid.
+ *
+ * The caller is responsible for making the _put().
+ *
+ */
+struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd,
+ u16 ctxt)
+{
+ if (ctxt < dd->num_rcv_contexts)
+ return hfi1_rcd_get_by_index(dd, ctxt);
+
+ return NULL;
+}
+
+/**
+ * hfi1_rcd_get_by_index - get by index
+ * @dd: pointer to a valid devdata structure
+ * @ctxt: the index of an possilbe rcd
+ *
+ * We need to protect access to the rcd array. If access is needed to
+ * one or more index, get the protecting spinlock and then increment the
+ * kref.
+ *
+ * The caller is responsible for making the _put().
+ *
+ */
+struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt)
+{
+ unsigned long flags;
+ struct hfi1_ctxtdata *rcd = NULL;
+
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ if (dd->rcd[ctxt]) {
+ rcd = dd->rcd[ctxt];
+ if (!hfi1_rcd_get(rcd))
+ rcd = NULL;
+ }
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+
+ return rcd;
+}
+
+/*
+ * Common code for user and kernel context create and setup.
+ * NOTE: the initial kref is done here (hf1_rcd_init()).
*/
-struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
- int numa)
+int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
+ struct hfi1_ctxtdata **context)
{
struct hfi1_devdata *dd = ppd->dd;
struct hfi1_ctxtdata *rcd;
@@ -214,56 +310,74 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
u32 base;
if (dd->rcv_entries.nctxt_extra >
- dd->num_rcv_contexts - dd->first_user_ctxt)
+ dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt)
kctxt_ngroups = (dd->rcv_entries.nctxt_extra -
- (dd->num_rcv_contexts - dd->first_user_ctxt));
+ (dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt));
rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa);
if (rcd) {
u32 rcvtids, max_entries;
-
- hfi1_cdbg(PROC, "setting up context %u\n", ctxt);
+ u16 ctxt;
+ int ret;
+
+ ret = allocate_rcd_index(dd, rcd, &ctxt);
+ if (ret) {
+ *context = NULL;
+ kfree(rcd);
+ return ret;
+ }
INIT_LIST_HEAD(&rcd->qp_wait_list);
+ hfi1_exp_tid_group_init(rcd);
rcd->ppd = ppd;
rcd->dd = dd;
- rcd->cnt = 1;
- rcd->ctxt = ctxt;
- dd->rcd[ctxt] = rcd;
rcd->numa_id = numa;
rcd->rcv_array_groups = dd->rcv_entries.ngroups;
+ rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
+ rcd->slow_handler = handle_receive_interrupt;
+ rcd->do_interrupt = rcd->slow_handler;
+ rcd->msix_intr = CCE_NUM_MSIX_VECTORS;
+
+ mutex_init(&rcd->exp_mutex);
+ spin_lock_init(&rcd->exp_lock);
+ INIT_LIST_HEAD(&rcd->flow_queue.queue_head);
+ INIT_LIST_HEAD(&rcd->rarr_queue.queue_head);
- mutex_init(&rcd->exp_lock);
+ hfi1_cdbg(PROC, "setting up context %u", rcd->ctxt);
/*
* Calculate the context's RcvArray entry starting point.
* We do this here because we have to take into account all
* the RcvArray entries that previous context would have
- * taken and we have to account for any extra groups
- * assigned to the kernel or user contexts.
+ * taken and we have to account for any extra groups assigned
+ * to the static (kernel) or dynamic (vnic/user) contexts.
*/
- if (ctxt < dd->first_user_ctxt) {
+ if (ctxt < dd->first_dyn_alloc_ctxt) {
if (ctxt < kctxt_ngroups) {
base = ctxt * (dd->rcv_entries.ngroups + 1);
rcd->rcv_array_groups++;
- } else
+ } else {
base = kctxt_ngroups +
(ctxt * dd->rcv_entries.ngroups);
+ }
} else {
- u16 ct = ctxt - dd->first_user_ctxt;
+ u16 ct = ctxt - dd->first_dyn_alloc_ctxt;
base = ((dd->n_krcv_queues * dd->rcv_entries.ngroups) +
kctxt_ngroups);
if (ct < dd->rcv_entries.nctxt_extra) {
base += ct * (dd->rcv_entries.ngroups + 1);
rcd->rcv_array_groups++;
- } else
+ } else {
base += dd->rcv_entries.nctxt_extra +
(ct * dd->rcv_entries.ngroups);
+ }
}
rcd->eager_base = base * dd->rcv_entries.group_size;
rcd->rcvhdrq_cnt = rcvhdrcnt;
rcd->rcvhdrqentsize = hfi1_hdrq_entsize;
+ rcd->rhf_offset =
+ rcd->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
/*
* Simple Eager buffer allocation: we have already pre-allocated
* the number of RcvArray entry groups. Each ctxtdata structure
@@ -286,7 +400,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
rcd->egrbufs.count = MAX_EAGER_ENTRIES;
}
hfi1_cdbg(PROC,
- "ctxt%u: max Eager buffer RcvArray entries: %u\n",
+ "ctxt%u: max Eager buffer RcvArray entries: %u",
rcd->ctxt, rcd->egrbufs.count);
/*
@@ -297,14 +411,16 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
* The resulting value will be rounded down to the closest
* multiple of dd->rcv_entries.group_size.
*/
- rcd->egrbufs.buffers = kcalloc(rcd->egrbufs.count,
- sizeof(*rcd->egrbufs.buffers),
- GFP_KERNEL);
+ rcd->egrbufs.buffers =
+ kcalloc_node(rcd->egrbufs.count,
+ sizeof(*rcd->egrbufs.buffers),
+ GFP_KERNEL, numa);
if (!rcd->egrbufs.buffers)
goto bail;
- rcd->egrbufs.rcvtids = kcalloc(rcd->egrbufs.count,
- sizeof(*rcd->egrbufs.rcvtids),
- GFP_KERNEL);
+ rcd->egrbufs.rcvtids =
+ kcalloc_node(rcd->egrbufs.count,
+ sizeof(*rcd->egrbufs.rcvtids),
+ GFP_KERNEL, numa);
if (!rcd->egrbufs.rcvtids)
goto bail;
rcd->egrbufs.size = eager_buffer_size;
@@ -316,42 +432,45 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
if (rcd->egrbufs.size < hfi1_max_mtu) {
rcd->egrbufs.size = __roundup_pow_of_two(hfi1_max_mtu);
hfi1_cdbg(PROC,
- "ctxt%u: eager bufs size too small. Adjusting to %zu\n",
+ "ctxt%u: eager bufs size too small. Adjusting to %u",
rcd->ctxt, rcd->egrbufs.size);
}
rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE;
- if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */
- rcd->opstats = kzalloc(sizeof(*rcd->opstats),
- GFP_KERNEL);
+ /* Applicable only for statically created kernel contexts */
+ if (ctxt < dd->first_dyn_alloc_ctxt) {
+ rcd->opstats = kzalloc_node(sizeof(*rcd->opstats),
+ GFP_KERNEL, numa);
if (!rcd->opstats)
goto bail;
+
+ /* Initialize TID flow generations for the context */
+ hfi1_kern_init_ctxt_generations(rcd);
}
+
+ *context = rcd;
+ return 0;
}
- return rcd;
+
bail:
- dd->rcd[ctxt] = NULL;
- kfree(rcd->egrbufs.rcvtids);
- kfree(rcd->egrbufs.buffers);
- kfree(rcd);
- return NULL;
+ *context = NULL;
+ hfi1_free_ctxt(rcd);
+ return -ENOMEM;
}
-/*
- * Convert a receive header entry size that to the encoding used in the CSR.
+/**
+ * hfi1_free_ctxt - free context
+ * @rcd: pointer to an initialized rcd data structure
*
- * Return a zero if the given size is invalid.
+ * This wrapper is the free function that matches hfi1_create_ctxtdata().
+ * When a context is done being used (kernel or user), this function is called
+ * for the "final" put to match the kref init from hfi1_create_ctxtdata().
+ * Other users of the context do a get/put sequence to make sure that the
+ * structure isn't removed while in use.
*/
-static inline u64 encode_rcv_header_entry_size(u16 size)
+void hfi1_free_ctxt(struct hfi1_ctxtdata *rcd)
{
- /* there are only 3 valid receive header entry sizes */
- if (size == 2)
- return 1;
- if (size == 16)
- return 2;
- else if (size == 32)
- return 4;
- return 0; /* invalid */
+ hfi1_rcd_put(rcd);
}
/*
@@ -370,7 +489,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd)
u16 shift, mult;
u64 src;
u32 current_egress_rate; /* Mbits /sec */
- u32 max_pkt_time;
+ u64 max_pkt_time;
/*
* max_pkt_time is the maximum packet egress time in units
* of the fabric clock period 1/(805 MHz).
@@ -468,7 +587,7 @@ static enum hrtimer_restart cca_timer_fn(struct hrtimer *t)
* Common code for initializing the physical port structure.
*/
void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
- struct hfi1_devdata *dd, u8 hw_pidx, u8 port)
+ struct hfi1_devdata *dd, u8 hw_pidx, u32 port)
{
int i;
uint default_pkey_idx;
@@ -477,16 +596,21 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
ppd->dd = dd;
ppd->hw_pidx = hw_pidx;
ppd->port = port; /* IB port number, not index */
+ ppd->prev_link_width = LINK_WIDTH_DEFAULT;
+ /*
+ * There are C_VL_COUNT number of PortVLXmitWait counters.
+ * Adding 1 to C_VL_COUNT to include the PortXmitWait counter.
+ */
+ for (i = 0; i < C_VL_COUNT + 1; i++) {
+ ppd->port_vl_xmit_wait_last[i] = 0;
+ ppd->vl_xmit_flit_cnt[i] = 0;
+ }
default_pkey_idx = 1;
ppd->pkeys[default_pkey_idx] = DEFAULT_P_KEY;
- if (loopback) {
- hfi1_early_err(&pdev->dev,
- "Faking data partition 0x8001 in idx %u\n",
- !default_pkey_idx);
- ppd->pkeys[!default_pkey_idx] = 0x8001;
- }
+ ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
+ ppd->pkeys[0] = 0x8001;
INIT_WORK(&ppd->link_vc_work, handle_verify_cap);
INIT_WORK(&ppd->link_up_work, handle_link_up);
@@ -511,12 +635,11 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
spin_lock_init(&ppd->cca_timer_lock);
for (i = 0; i < OPA_MAX_SLS; i++) {
- hrtimer_init(&ppd->cca_timer[i].hrtimer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
ppd->cca_timer[i].ppd = ppd;
ppd->cca_timer[i].sl = i;
ppd->cca_timer[i].ccti = 0;
- ppd->cca_timer[i].hrtimer.function = cca_timer_fn;
+ hrtimer_setup(&ppd->cca_timer[i].hrtimer, cca_timer_fn, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
}
ppd->cc_max_table_entries = IB_CC_TABLE_CAP_DEFAULT;
@@ -530,9 +653,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
return;
bail:
-
- hfi1_early_err(&pdev->dev,
- "Congestion Control Agent disabled for port %d\n", port);
+ dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port);
}
/*
@@ -555,16 +676,19 @@ static int loadtime_init(struct hfi1_devdata *dd)
static int init_after_reset(struct hfi1_devdata *dd)
{
int i;
-
+ struct hfi1_ctxtdata *rcd;
/*
* Ensure chip does no sends or receives, tail updates, or
* pioavail updates while we re-initialize. This is mostly
* for the driver data structures, not chip registers.
*/
- for (i = 0; i < dd->num_rcv_contexts; i++)
+ for (i = 0; i < dd->num_rcv_contexts; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
- HFI1_RCVCTRL_INTRAVAIL_DIS |
- HFI1_RCVCTRL_TAILUPD_DIS, i);
+ HFI1_RCVCTRL_INTRAVAIL_DIS |
+ HFI1_RCVCTRL_TAILUPD_DIS, rcd);
+ hfi1_rcd_put(rcd);
+ }
pio_send_control(dd, PSC_GLOBAL_DISABLE);
for (i = 0; i < dd->num_send_contexts; i++)
sc_disable(dd->send_contexts[i].sc);
@@ -574,8 +698,9 @@ static int init_after_reset(struct hfi1_devdata *dd)
static void enable_chip(struct hfi1_devdata *dd)
{
+ struct hfi1_ctxtdata *rcd;
u32 rcvmask;
- u32 i;
+ u16 i;
/* enable PIO send */
pio_send_control(dd, PSC_GLOBAL_ENABLE);
@@ -584,18 +709,24 @@ static void enable_chip(struct hfi1_devdata *dd)
* Enable kernel ctxts' receive and receive interrupt.
* Other ctxts done as user opens and initializes them.
*/
- for (i = 0; i < dd->first_user_ctxt; ++i) {
+ for (i = 0; i < dd->first_dyn_alloc_ctxt; ++i) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
+ if (!rcd)
+ continue;
rcvmask = HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB;
- rcvmask |= HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, DMA_RTAIL) ?
+ rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
- if (!HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, MULTI_PKT_EGR))
+ if (!HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR))
rcvmask |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
- if (HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, NODROP_RHQ_FULL))
+ if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_RHQ_FULL))
rcvmask |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
- if (HFI1_CAP_KGET_MASK(dd->rcd[i]->flags, NODROP_EGR_FULL))
+ if (HFI1_CAP_KGET_MASK(rcd->flags, NODROP_EGR_FULL))
rcvmask |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
- hfi1_rcvctrl(dd, rcvmask, i);
- sc_enable(dd->rcd[i]->sc);
+ if (HFI1_CAP_IS_KSET(TID_RDMA))
+ rcvmask |= HFI1_RCVCTRL_TIDFLOW_ENB;
+ hfi1_rcvctrl(dd, rcvmask, rcd);
+ sc_enable(rcd->sc);
+ hfi1_rcd_put(rcd);
}
}
@@ -614,12 +745,27 @@ static int create_workqueues(struct hfi1_devdata *dd)
ppd->hfi1_wq =
alloc_workqueue(
"hfi%d_%d",
- WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE,
- dd->num_sdma,
+ WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM |
+ WQ_PERCPU,
+ HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES,
dd->unit, pidx);
if (!ppd->hfi1_wq)
goto wq_error;
}
+ if (!ppd->link_wq) {
+ /*
+ * Make the link workqueue single-threaded to enforce
+ * serialization.
+ */
+ ppd->link_wq =
+ alloc_workqueue(
+ "hfi_link_%d_%d",
+ WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND,
+ 1, /* max_active */
+ dd->unit, pidx);
+ if (!ppd->link_wq)
+ goto wq_error;
+ }
}
return 0;
wq_error:
@@ -630,11 +776,55 @@ wq_error:
destroy_workqueue(ppd->hfi1_wq);
ppd->hfi1_wq = NULL;
}
+ if (ppd->link_wq) {
+ destroy_workqueue(ppd->link_wq);
+ ppd->link_wq = NULL;
+ }
}
return -ENOMEM;
}
/**
+ * destroy_workqueues - destroy per port workqueues
+ * @dd: the hfi1_ib device
+ */
+static void destroy_workqueues(struct hfi1_devdata *dd)
+{
+ int pidx;
+ struct hfi1_pportdata *ppd;
+
+ for (pidx = 0; pidx < dd->num_pports; ++pidx) {
+ ppd = dd->pport + pidx;
+
+ if (ppd->hfi1_wq) {
+ destroy_workqueue(ppd->hfi1_wq);
+ ppd->hfi1_wq = NULL;
+ }
+ if (ppd->link_wq) {
+ destroy_workqueue(ppd->link_wq);
+ ppd->link_wq = NULL;
+ }
+ }
+}
+
+/**
+ * enable_general_intr() - Enable the IRQs that will be handled by the
+ * general interrupt handler.
+ * @dd: valid devdata
+ *
+ */
+static void enable_general_intr(struct hfi1_devdata *dd)
+{
+ set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true);
+ set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true);
+ set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true);
+ set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true);
+ set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true);
+ set_intr_bits(dd, IS_DC_START, IS_DC_END, true);
+ set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true);
+}
+
+/**
* hfi1_init - do the actual initialization sequence on the chip
* @dd: the hfi1_ib device
* @reinit: re-initializing, so don't allocate new memory
@@ -652,39 +842,23 @@ wq_error:
int hfi1_init(struct hfi1_devdata *dd, int reinit)
{
int ret = 0, pidx, lastfail = 0;
- unsigned i, len;
+ unsigned long len;
+ u16 i;
struct hfi1_ctxtdata *rcd;
struct hfi1_pportdata *ppd;
- /* Set up recv low level handlers */
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EXPECTED] =
- kdeth_process_expected;
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EAGER] =
- kdeth_process_eager;
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_IB] = process_receive_ib;
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_ERROR] =
- process_receive_error;
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_BYPASS] =
- process_receive_bypass;
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID5] =
- process_receive_invalid;
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID6] =
- process_receive_invalid;
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID7] =
- process_receive_invalid;
- dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions;
-
/* Set up send low level handlers */
dd->process_pio_send = hfi1_verbs_send_pio;
dd->process_dma_send = hfi1_verbs_send_dma;
dd->pio_inline_send = pio_copy;
+ dd->process_vnic_dma_send = hfi1_vnic_send_dma;
if (is_ax(dd)) {
atomic_set(&dd->drop_packet, DROP_PACKET_ON);
- dd->do_drop = 1;
+ dd->do_drop = true;
} else {
atomic_set(&dd->drop_packet, DROP_PACKET_OFF);
- dd->do_drop = 0;
+ dd->do_drop = false;
}
/* make sure the link is not "up" */
@@ -700,44 +874,34 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
if (ret)
goto done;
- /* allocate dummy tail memory for all receive contexts */
- dd->rcvhdrtail_dummy_kvaddr = dma_zalloc_coherent(
- &dd->pcidev->dev, sizeof(u64),
- &dd->rcvhdrtail_dummy_dma,
- GFP_KERNEL);
-
- if (!dd->rcvhdrtail_dummy_kvaddr) {
- dd_dev_err(dd, "cannot allocate dummy tail memory\n");
- ret = -ENOMEM;
- goto done;
- }
-
/* dd->rcd can be NULL if early initialization failed */
- for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) {
+ for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
/*
* Set up the (kernel) rcvhdr queue and egr TIDs. If doing
* re-init, the simplest way to handle this is to free
* existing, and re-allocate.
* Need to re-create rest of ctxt 0 ctxtdata as well.
*/
- rcd = dd->rcd[i];
+ rcd = hfi1_rcd_get_by_index(dd, i);
if (!rcd)
continue;
- rcd->do_interrupt = &handle_receive_interrupt;
-
lastfail = hfi1_create_rcvhdrq(dd, rcd);
if (!lastfail)
lastfail = hfi1_setup_eagerbufs(rcd);
+ if (!lastfail)
+ lastfail = hfi1_kern_exp_rcv_init(rcd, reinit);
if (lastfail) {
dd_dev_err(dd,
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
ret = lastfail;
}
+ /* enable IRQ */
+ hfi1_rcd_put(rcd);
}
/* Allocate enough memory for user event notification. */
- len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS *
+ len = PAGE_ALIGN(chip_rcv_contexts(dd) * HFI1_MAX_SHARED_CTXTS *
sizeof(*dd->events));
dd->events = vmalloc_user(len);
if (!dd->events)
@@ -749,9 +913,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
dd->status = vmalloc_user(PAGE_SIZE);
if (!dd->status)
dd_dev_err(dd, "Failed to allocate dev status page\n");
- else
- dd->freezelen = PAGE_SIZE - (sizeof(*dd->status) -
- sizeof(dd->status->freezemsg));
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
if (dd->status)
@@ -774,7 +935,8 @@ done:
HFI1_STATUS_INITTED;
if (!ret) {
/* enable all interrupts from the chip */
- set_intr_state(dd, 1);
+ enable_general_intr(dd);
+ init_qsfp_int(dd);
/* chip is OK for user apps; mark it as initialized */
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
@@ -806,21 +968,9 @@ done:
return ret;
}
-static inline struct hfi1_devdata *__hfi1_lookup(int unit)
-{
- return idr_find(&hfi1_unit_table, unit);
-}
-
struct hfi1_devdata *hfi1_lookup(int unit)
{
- struct hfi1_devdata *dd;
- unsigned long flags;
-
- spin_lock_irqsave(&hfi1_devs_lock, flags);
- dd = __hfi1_lookup(unit);
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
-
- return dd;
+ return xa_load(&hfi1_dev_table, unit);
}
/*
@@ -834,8 +984,8 @@ static void stop_timers(struct hfi1_devdata *dd)
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
- if (ppd->led_override_timer.data) {
- del_timer_sync(&ppd->led_override_timer);
+ if (ppd->led_override_timer.function) {
+ timer_delete_sync(&ppd->led_override_timer);
atomic_set(&ppd->led_override_timer_active, 0);
}
}
@@ -853,9 +1003,14 @@ static void stop_timers(struct hfi1_devdata *dd)
static void shutdown_device(struct hfi1_devdata *dd)
{
struct hfi1_pportdata *ppd;
+ struct hfi1_ctxtdata *rcd;
unsigned pidx;
int i;
+ if (dd->flags & HFI1_SHUTDOWN)
+ return;
+ dd->flags |= HFI1_SHUTDOWN;
+
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
@@ -866,17 +1021,20 @@ static void shutdown_device(struct hfi1_devdata *dd)
}
dd->flags &= ~HFI1_INITTED;
- /* mask interrupts, but not errors */
- set_intr_state(dd, 0);
+ /* mask and clean up interrupts */
+ set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
+ msix_clean_up_interrupts(dd);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
- ppd = dd->pport + pidx;
- for (i = 0; i < dd->num_rcv_contexts; i++)
+ for (i = 0; i < dd->num_rcv_contexts; i++) {
+ rcd = hfi1_rcd_get_by_index(dd, i);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_TAILUPD_DIS |
- HFI1_RCVCTRL_CTXT_DIS |
- HFI1_RCVCTRL_INTRAVAIL_DIS |
- HFI1_RCVCTRL_PKEY_DIS |
- HFI1_RCVCTRL_ONE_PKT_EGR_DIS, i);
+ HFI1_RCVCTRL_CTXT_DIS |
+ HFI1_RCVCTRL_INTRAVAIL_DIS |
+ HFI1_RCVCTRL_PKEY_DIS |
+ HFI1_RCVCTRL_ONE_PKT_EGR_DIS, rcd);
+ hfi1_rcd_put(rcd);
+ }
/*
* Gracefully stop all sends allowing any in progress to
* trickle out first.
@@ -907,11 +1065,10 @@ static void shutdown_device(struct hfi1_devdata *dd)
* We can't count on interrupts since we are stopping.
*/
hfi1_quiet_serdes(ppd);
-
- if (ppd->hfi1_wq) {
- destroy_workqueue(ppd->hfi1_wq);
- ppd->hfi1_wq = NULL;
- }
+ if (ppd->hfi1_wq)
+ flush_workqueue(ppd->hfi1_wq);
+ if (ppd->link_wq)
+ flush_workqueue(ppd->link_wq);
}
sdma_exit(dd);
}
@@ -922,25 +1079,22 @@ static void shutdown_device(struct hfi1_devdata *dd)
* @rcd: the ctxtdata structure
*
* free up any allocated data for a context
- * This should not touch anything that would affect a simultaneous
- * re-allocation of context data, because it is called after hfi1_mutex
- * is released (and can be called from reinit as well).
* It should never change any chip state, or global driver state.
*/
void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
- unsigned e;
+ u32 e;
if (!rcd)
return;
if (rcd->rcvhdrq) {
- dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size,
+ dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd),
rcd->rcvhdrq, rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
- if (rcd->rcvhdrtail_kvaddr) {
+ if (hfi1_rcvhdrtail_kvaddr(rcd)) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
- (void *)rcd->rcvhdrtail_kvaddr,
+ (void *)hfi1_rcvhdrtail_kvaddr(rcd),
rcd->rcvhdrqtailaddr_dma);
rcd->rcvhdrtail_kvaddr = NULL;
}
@@ -948,29 +1102,37 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
/* all the RcvArray entries should have been cleared by now */
kfree(rcd->egrbufs.rcvtids);
+ rcd->egrbufs.rcvtids = NULL;
for (e = 0; e < rcd->egrbufs.alloced; e++) {
- if (rcd->egrbufs.buffers[e].dma)
+ if (rcd->egrbufs.buffers[e].addr)
dma_free_coherent(&dd->pcidev->dev,
rcd->egrbufs.buffers[e].len,
rcd->egrbufs.buffers[e].addr,
rcd->egrbufs.buffers[e].dma);
}
kfree(rcd->egrbufs.buffers);
+ rcd->egrbufs.alloced = 0;
+ rcd->egrbufs.buffers = NULL;
sc_free(rcd->sc);
- vfree(rcd->user_event_mask);
+ rcd->sc = NULL;
+
vfree(rcd->subctxt_uregbase);
vfree(rcd->subctxt_rcvegrbuf);
vfree(rcd->subctxt_rcvhdr_base);
kfree(rcd->opstats);
- kfree(rcd);
+
+ rcd->subctxt_uregbase = NULL;
+ rcd->subctxt_rcvegrbuf = NULL;
+ rcd->subctxt_rcvhdr_base = NULL;
+ rcd->opstats = NULL;
}
/*
* Release our hold on the shared asic data. If we are the last one,
* return the structure to be finalized outside the lock. Must be
- * holding hfi1_devs_lock.
+ * holding hfi1_dev_table lock.
*/
static struct hfi1_asic_data *release_asic_data(struct hfi1_devdata *dd)
{
@@ -994,48 +1156,57 @@ static void finalize_asic_data(struct hfi1_devdata *dd,
kfree(ad);
}
-static void __hfi1_free_devdata(struct kobject *kobj)
+/**
+ * hfi1_free_devdata - cleans up and frees per-unit data structure
+ * @dd: pointer to a valid devdata structure
+ *
+ * It cleans up and frees all data structures set up by
+ * by hfi1_alloc_devdata().
+ */
+void hfi1_free_devdata(struct hfi1_devdata *dd)
{
- struct hfi1_devdata *dd =
- container_of(kobj, struct hfi1_devdata, kobj);
struct hfi1_asic_data *ad;
unsigned long flags;
- spin_lock_irqsave(&hfi1_devs_lock, flags);
- idr_remove(&hfi1_unit_table, dd->unit);
- list_del(&dd->list);
+ xa_lock_irqsave(&hfi1_dev_table, flags);
+ __xa_erase(&hfi1_dev_table, dd->unit);
ad = release_asic_data(dd);
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
- if (ad)
- finalize_asic_data(dd, ad);
+ xa_unlock_irqrestore(&hfi1_dev_table, flags);
+
+ finalize_asic_data(dd, ad);
free_platform_config(dd);
rcu_barrier(); /* wait for rcu callbacks to complete */
free_percpu(dd->int_counter);
free_percpu(dd->rcv_limit);
free_percpu(dd->send_schedule);
+ free_percpu(dd->tx_opstats);
+ dd->int_counter = NULL;
+ dd->rcv_limit = NULL;
+ dd->send_schedule = NULL;
+ dd->tx_opstats = NULL;
+ kfree(dd->comp_vect);
+ dd->comp_vect = NULL;
+ if (dd->rcvhdrtail_dummy_kvaddr)
+ dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
+ (void *)dd->rcvhdrtail_dummy_kvaddr,
+ dd->rcvhdrtail_dummy_dma);
+ dd->rcvhdrtail_dummy_kvaddr = NULL;
+ sdma_clean(dd, dd->num_sdma);
rvt_dealloc_device(&dd->verbs_dev.rdi);
}
-static struct kobj_type hfi1_devdata_type = {
- .release = __hfi1_free_devdata,
-};
-
-void hfi1_free_devdata(struct hfi1_devdata *dd)
-{
- kobject_put(&dd->kobj);
-}
-
-/*
- * Allocate our primary per-unit data structure. Must be done via verbs
- * allocator, because the verbs cleanup process both does cleanup and
- * free of the data structure.
- * "extra" is for chip-specific data.
+/**
+ * hfi1_alloc_devdata - Allocate our primary per-unit data structure.
+ * @pdev: Valid PCI device
+ * @extra: How many bytes to alloc past the default
*
- * Use the idr mechanism to get a unit number for this unit.
+ * Must be done via verbs allocator, because the verbs cleanup process
+ * both does cleanup and free of the data structure.
+ * "extra" is for chip-specific data.
*/
-struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
+static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
+ size_t extra)
{
- unsigned long flags;
struct hfi1_devdata *dd;
int ret, nports;
@@ -1048,25 +1219,27 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
return ERR_PTR(-ENOMEM);
dd->num_pports = nports;
dd->pport = (struct hfi1_pportdata *)(dd + 1);
+ dd->pcidev = pdev;
+ pci_set_drvdata(pdev, dd);
- INIT_LIST_HEAD(&dd->list);
- idr_preload(GFP_KERNEL);
- spin_lock_irqsave(&hfi1_devs_lock, flags);
-
- ret = idr_alloc(&hfi1_unit_table, dd, 0, 0, GFP_NOWAIT);
- if (ret >= 0) {
- dd->unit = ret;
- list_add(&dd->list, &hfi1_dev_list);
- }
-
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
- idr_preload_end();
-
+ ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b,
+ GFP_KERNEL);
if (ret < 0) {
- hfi1_early_err(&pdev->dev,
- "Could not allocate unit ID: error %d\n", -ret);
+ dev_err(&pdev->dev,
+ "Could not allocate unit ID: error %d\n", -ret);
goto bail;
}
+ rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
+ /*
+ * If the BIOS does not have the NUMA node information set, select
+ * NUMA 0 so we get consistent performance.
+ */
+ dd->node = pcibus_to_node(pdev->bus);
+ if (dd->node == NUMA_NO_NODE) {
+ dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
+ dd->node = 0;
+ }
+
/*
* Initialize all locks for the device. This needs to be as early as
* possible so locks are usable.
@@ -1077,56 +1250,58 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
spin_lock_init(&dd->uctxt_lock);
spin_lock_init(&dd->hfi1_diag_trans_lock);
spin_lock_init(&dd->sc_init_lock);
- spin_lock_init(&dd->dc8051_lock);
spin_lock_init(&dd->dc8051_memlock);
seqlock_init(&dd->sc2vl_lock);
spin_lock_init(&dd->sde_map_lock);
spin_lock_init(&dd->pio_map_lock);
+ mutex_init(&dd->dc8051_lock);
init_waitqueue_head(&dd->event_queue);
+ spin_lock_init(&dd->irq_src_lock);
dd->int_counter = alloc_percpu(u64);
if (!dd->int_counter) {
ret = -ENOMEM;
- hfi1_early_err(&pdev->dev,
- "Could not allocate per-cpu int_counter\n");
goto bail;
}
dd->rcv_limit = alloc_percpu(u64);
if (!dd->rcv_limit) {
ret = -ENOMEM;
- hfi1_early_err(&pdev->dev,
- "Could not allocate per-cpu rcv_limit\n");
goto bail;
}
dd->send_schedule = alloc_percpu(u64);
if (!dd->send_schedule) {
ret = -ENOMEM;
- hfi1_early_err(&pdev->dev,
- "Could not allocate per-cpu int_counter\n");
goto bail;
}
- if (!hfi1_cpulist_count) {
- u32 count = num_online_cpus();
-
- hfi1_cpulist = kcalloc(BITS_TO_LONGS(count), sizeof(long),
- GFP_KERNEL);
- if (hfi1_cpulist)
- hfi1_cpulist_count = count;
- else
- hfi1_early_err(
- &pdev->dev,
- "Could not alloc cpulist info, cpu affinity might be wrong\n");
+ dd->tx_opstats = alloc_percpu(struct hfi1_opcode_stats_perctx);
+ if (!dd->tx_opstats) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ dd->comp_vect = kzalloc(sizeof(*dd->comp_vect), GFP_KERNEL);
+ if (!dd->comp_vect) {
+ ret = -ENOMEM;
+ goto bail;
}
- kobject_init(&dd->kobj, &hfi1_devdata_type);
+
+ /* allocate dummy tail memory for all receive contexts */
+ dd->rcvhdrtail_dummy_kvaddr =
+ dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64),
+ &dd->rcvhdrtail_dummy_dma, GFP_KERNEL);
+ if (!dd->rcvhdrtail_dummy_kvaddr) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ atomic_set(&dd->ipoib_rsm_usr_num, 0);
return dd;
bail:
- if (!list_empty(&dd->list))
- list_del_init(&dd->list);
- rvt_dealloc_device(&dd->verbs_dev.rdi);
+ hfi1_free_devdata(dd);
return ERR_PTR(ret);
}
@@ -1165,8 +1340,9 @@ void hfi1_disable_after_error(struct hfi1_devdata *dd)
static void remove_one(struct pci_dev *);
static int init_one(struct pci_dev *, const struct pci_device_id *);
+static void shutdown_one(struct pci_dev *);
-#define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: "
+#define DRIVER_LOAD_MSG "Cornelis " DRIVER_NAME " loaded: "
#define PFX DRIVER_NAME ": "
const struct pci_device_id hfi1_pci_tbl[] = {
@@ -1181,6 +1357,7 @@ static struct pci_driver hfi1_pci_driver = {
.name = DRIVER_NAME,
.probe = init_one,
.remove = remove_one,
+ .shutdown = shutdown_one,
.id_table = hfi1_pci_tbl,
.err_handler = &hfi1_pci_err_handler,
};
@@ -1254,16 +1431,17 @@ static int __init hfi1_mod_init(void)
/* sanitize link CRC options */
link_crc_mask &= SUPPORTED_CRCS;
+ ret = opfn_init();
+ if (ret < 0) {
+ pr_err("Failed to allocate opfn_wq");
+ goto bail_dev;
+ }
+
/*
* These must be called before the driver is registered with
* the PCI subsystem.
*/
- idr_init(&hfi1_unit_table);
-
hfi1_dbg_init();
- ret = hfi1_wss_init();
- if (ret < 0)
- goto bail_wss;
ret = pci_register_driver(&hfi1_pci_driver);
if (ret < 0) {
pr_err("Unable to register driver: error %d\n", -ret);
@@ -1272,10 +1450,7 @@ static int __init hfi1_mod_init(void)
goto bail; /* all OK */
bail_dev:
- hfi1_wss_exit();
-bail_wss:
hfi1_dbg_exit();
- idr_destroy(&hfi1_unit_table);
dev_cleanup();
bail:
return ret;
@@ -1289,13 +1464,11 @@ module_init(hfi1_mod_init);
static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
- node_affinity_destroy();
- hfi1_wss_exit();
+ opfn_exit();
+ node_affinity_destroy_all();
hfi1_dbg_exit();
- hfi1_cpulist_count = 0;
- kfree(hfi1_cpulist);
- idr_destroy(&hfi1_unit_table);
+ WARN_ON(!xa_empty(&hfi1_dev_table));
dispose_firmware(); /* asymmetric with obtain_firmware() */
dev_cleanup();
}
@@ -1307,8 +1480,6 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
{
int ctxt;
int pidx;
- struct hfi1_ctxtdata **tmp;
- unsigned long flags;
/* users can't do anything more with chip */
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
@@ -1336,32 +1507,19 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
/*
* Free any resources still in use (usually just kernel contexts)
* at unload; we do for ctxtcnt, because that's what we allocate.
- * We acquire lock to be really paranoid that rcd isn't being
- * accessed from some interrupt-related code (that should not happen,
- * but best to be sure).
*/
- spin_lock_irqsave(&dd->uctxt_lock, flags);
- tmp = dd->rcd;
- dd->rcd = NULL;
- spin_unlock_irqrestore(&dd->uctxt_lock, flags);
-
- if (dd->rcvhdrtail_dummy_kvaddr) {
- dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
- (void *)dd->rcvhdrtail_dummy_kvaddr,
- dd->rcvhdrtail_dummy_dma);
- dd->rcvhdrtail_dummy_kvaddr = NULL;
- }
-
- for (ctxt = 0; tmp && ctxt < dd->num_rcv_contexts; ctxt++) {
- struct hfi1_ctxtdata *rcd = tmp[ctxt];
+ for (ctxt = 0; dd->rcd && ctxt < dd->num_rcv_contexts; ctxt++) {
+ struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
- tmp[ctxt] = NULL; /* debugging paranoia */
if (rcd) {
- hfi1_clear_tids(rcd);
- hfi1_free_ctxtdata(dd, rcd);
+ hfi1_free_ctxt_rcv_groups(rcd);
+ hfi1_free_ctxt(rcd);
}
}
- kfree(tmp);
+
+ kfree(dd->rcd);
+ dd->rcd = NULL;
+
free_pio_map(dd);
/* must follow rcv context free - need to remove rcv's hooks */
for (ctxt = 0; ctxt < dd->num_send_contexts; ctxt++)
@@ -1383,6 +1541,8 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
static void postinit_cleanup(struct hfi1_devdata *dd)
{
hfi1_start_cleanup(dd);
+ hfi1_comp_vectors_clean_up(dd);
+ hfi1_dev_affinity_clean_up(dd);
hfi1_pcie_ddcleanup(dd);
hfi1_pcie_cleanup(dd->pcidev);
@@ -1392,29 +1552,6 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
hfi1_free_devdata(dd);
}
-static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt)
-{
- if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
- hfi1_early_err(dev, "Receive header queue count too small\n");
- return -EINVAL;
- }
-
- if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
- hfi1_early_err(dev,
- "Receive header queue count cannot be greater than %u\n",
- HFI1_MAX_HDRQ_EGRBUF_CNT);
- return -EINVAL;
- }
-
- if (thecnt % HDRQ_INCREMENT) {
- hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n",
- thecnt, HDRQ_INCREMENT);
- return -EINVAL;
- }
-
- return 0;
-}
-
static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
int ret = 0, j, pidx, initfail;
@@ -1424,15 +1561,32 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* First, lock the non-writable module parameters */
HFI1_CAP_LOCK();
+ /* Validate dev ids */
+ if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
+ ent->device == PCI_DEVICE_ID_INTEL1)) {
+ dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n",
+ ent->device);
+ ret = -ENODEV;
+ goto bail;
+ }
+
+ /* Allocate the dd so we can get to work */
+ dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
+ sizeof(struct hfi1_pportdata));
+ if (IS_ERR(dd)) {
+ ret = PTR_ERR(dd);
+ goto bail;
+ }
+
/* Validate some global module parameters */
- ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
+ ret = hfi1_validate_rcvhdrcnt(dd, rcvhdrcnt);
if (ret)
goto bail;
/* use the encoding function as a sanitization check */
if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
- hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
- hfi1_hdrq_entsize);
+ dd_dev_err(dd, "Invalid HdrQ Entry size %u\n",
+ hfi1_hdrq_entsize);
ret = -EINVAL;
goto bail;
}
@@ -1454,10 +1608,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
clamp_val(eager_buffer_size,
MIN_EAGER_BUFFER * 8,
MAX_EAGER_BUFFER_TOTAL);
- hfi1_early_info(&pdev->dev, "Eager buffer size %u\n",
- eager_buffer_size);
+ dd_dev_info(dd, "Eager buffer size %u\n",
+ eager_buffer_size);
} else {
- hfi1_early_err(&pdev->dev, "Invalid Eager buffer size of 0\n");
+ dd_dev_err(dd, "Invalid Eager buffer size of 0\n");
ret = -EINVAL;
goto bail;
}
@@ -1465,29 +1619,17 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* restrict value of hfi1_rcvarr_split */
hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100);
- ret = hfi1_pcie_init(pdev, ent);
+ ret = hfi1_pcie_init(dd);
if (ret)
goto bail;
- if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
- ent->device == PCI_DEVICE_ID_INTEL1)) {
- hfi1_early_err(&pdev->dev,
- "Failing on unknown Intel deviceid 0x%x\n",
- ent->device);
- ret = -ENODEV;
- goto clean_bail;
- }
-
/*
* Do device-specific initialization, function table setup, dd
* allocation, etc.
*/
- dd = hfi1_init_dd(pdev, ent);
-
- if (IS_ERR(dd)) {
- ret = PTR_ERR(dd);
+ ret = hfi1_init_dd(dd);
+ if (ret)
goto clean_bail; /* error already printed */
- }
ret = create_workqueues(dd);
if (ret)
@@ -1515,6 +1657,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j);
if (initfail || ret) {
+ msix_clean_up_interrupts(dd);
stop_timers(dd);
flush_workqueue(ib_wq);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
@@ -1524,6 +1667,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
destroy_workqueue(ppd->hfi1_wq);
ppd->hfi1_wq = NULL;
}
+ if (ppd->link_wq) {
+ destroy_workqueue(ppd->link_wq);
+ ppd->link_wq = NULL;
+ }
}
if (!j)
hfi1_device_remove(dd);
@@ -1551,7 +1698,7 @@ static void wait_for_clients(struct hfi1_devdata *dd)
* Remove the device init value and complete the device if there is
* no clients or wait for active clients to finish.
*/
- if (atomic_dec_and_test(&dd->user_refcount))
+ if (refcount_dec_and_test(&dd->user_refcount))
complete(&dd->user_comp);
wait_for_completion(&dd->user_comp);
@@ -1573,11 +1720,15 @@ static void remove_one(struct pci_dev *pdev)
/* unregister from IB core */
hfi1_unregister_ib_device(dd);
+ /* free netdev data */
+ hfi1_free_rx(dd);
+
/*
* Disable the IB link, disable interrupts on the device,
* clear dma engines, etc.
*/
shutdown_device(dd);
+ destroy_workqueues(dd);
stop_timers(dd);
@@ -1587,6 +1738,13 @@ static void remove_one(struct pci_dev *pdev)
postinit_cleanup(dd);
}
+static void shutdown_one(struct pci_dev *pdev)
+{
+ struct hfi1_devdata *dd = pci_get_drvdata(pdev);
+
+ shutdown_device(dd);
+}
+
/**
* hfi1_create_rcvhdrq - create a receive header queue
* @dd: the hfi1_ib device
@@ -1599,24 +1757,13 @@ static void remove_one(struct pci_dev *pdev)
int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
unsigned amt;
- u64 reg;
if (!rcd->rcvhdrq) {
- dma_addr_t dma_hdrqtail;
- gfp_t gfp_flags;
-
- /*
- * rcvhdrqentsize is in DWs, so we have to convert to bytes
- * (* sizeof(u32)).
- */
- amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize *
- sizeof(u32));
+ amt = rcvhdrq_size(rcd);
- gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
- GFP_USER : GFP_KERNEL;
- rcd->rcvhdrq = dma_zalloc_coherent(
- &dd->pcidev->dev, amt, &rcd->rcvhdrq_dma,
- gfp_flags | __GFP_COMP);
+ rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt,
+ &rcd->rcvhdrq_dma,
+ GFP_KERNEL);
if (!rcd->rcvhdrq) {
dd_dev_err(dd,
@@ -1625,41 +1772,19 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
goto bail;
}
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
- rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
- &dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail,
- gfp_flags);
+ if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
+ HFI1_CAP_UGET_MASK(rcd->flags, DMA_RTAIL)) {
+ rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
+ PAGE_SIZE,
+ &rcd->rcvhdrqtailaddr_dma,
+ GFP_KERNEL);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
- rcd->rcvhdrqtailaddr_dma = dma_hdrqtail;
}
-
- rcd->rcvhdrq_size = amt;
}
- /*
- * These values are per-context:
- * RcvHdrCnt
- * RcvHdrEntSize
- * RcvHdrSize
- */
- reg = ((u64)(rcd->rcvhdrq_cnt >> HDRQ_SIZE_SHIFT)
- & RCV_HDR_CNT_CNT_MASK)
- << RCV_HDR_CNT_CNT_SHIFT;
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_CNT, reg);
- reg = (encode_rcv_header_entry_size(rcd->rcvhdrqentsize)
- & RCV_HDR_ENT_SIZE_ENT_SIZE_MASK)
- << RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg);
- reg = (dd->rcvhdrsize & RCV_HDR_SIZE_HDR_SIZE_MASK)
- << RCV_HDR_SIZE_HDR_SIZE_SHIFT;
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg);
- /*
- * Program dummy tail address for every receive context
- * before enabling any receive context
- */
- write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_dma);
+ set_hdrq_regs(rcd->dd, rcd->ctxt, rcd->rcvhdrqentsize,
+ rcd->rcvhdrq_cnt);
return 0;
@@ -1667,8 +1792,6 @@ bail_free:
dd_dev_err(dd,
"attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n",
rcd->ctxt);
- vfree(rcd->user_event_mask);
- rcd->user_event_mask = NULL;
dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
@@ -1677,7 +1800,8 @@ bail:
}
/**
- * allocate eager buffers, both kernel and user contexts.
+ * hfi1_setup_eagerbufs - llocate eager buffers, both kernel and user
+ * contexts.
* @rcd: the context we are setting up.
*
* Allocate the eager TID buffers and program them into hip.
@@ -1688,21 +1812,12 @@ bail:
int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
{
struct hfi1_devdata *dd = rcd->dd;
- u32 max_entries, egrtop, alloced_bytes = 0, idx = 0;
- gfp_t gfp_flags;
- u16 order;
+ u32 max_entries, egrtop, alloced_bytes = 0;
+ u16 order, idx = 0;
int ret = 0;
u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu);
/*
- * GFP_USER, but without GFP_FS, so buffer cache can be
- * coalesced (we hope); otherwise, even at order 4,
- * heavy filesystem activity makes these fail, and we can
- * use compound pages.
- */
- gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
-
- /*
* The minimum size of the eager buffers is a groups of MTU-sized
* buffers.
* The global eager_buffer_size parameter is checked against the
@@ -1729,10 +1844,10 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
while (alloced_bytes < rcd->egrbufs.size &&
rcd->egrbufs.alloced < rcd->egrbufs.count) {
rcd->egrbufs.buffers[idx].addr =
- dma_zalloc_coherent(&dd->pcidev->dev,
- rcd->egrbufs.rcvtid_size,
- &rcd->egrbufs.buffers[idx].dma,
- gfp_flags);
+ dma_alloc_coherent(&dd->pcidev->dev,
+ rcd->egrbufs.rcvtid_size,
+ &rcd->egrbufs.buffers[idx].dma,
+ GFP_KERNEL);
if (rcd->egrbufs.buffers[idx].addr) {
rcd->egrbufs.buffers[idx].len =
rcd->egrbufs.rcvtid_size;
@@ -1757,6 +1872,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
!HFI1_CAP_KGET_MASK(rcd->flags, MULTI_PKT_EGR)) {
dd_dev_err(dd, "ctxt%u: Failed to allocate eager buffers\n",
rcd->ctxt);
+ ret = -ENOMEM;
goto bail_rcvegrbuf_phys;
}
@@ -1802,7 +1918,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->egrbufs.size = alloced_bytes;
hfi1_cdbg(PROC,
- "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %zuKB\n",
+ "ctxt%u: Alloced %u rcv tid entries @ %uKB, total %uKB",
rcd->ctxt, rcd->egrbufs.alloced,
rcd->egrbufs.rcvtid_size / 1024, rcd->egrbufs.size / 1024);
@@ -1825,16 +1941,16 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->expected_count = MAX_TID_PAIR_ENTRIES * 2;
rcd->expected_base = rcd->eager_base + egrtop;
- hfi1_cdbg(PROC, "ctxt%u: eager:%u, exp:%u, egrbase:%u, expbase:%u\n",
+ hfi1_cdbg(PROC, "ctxt%u: eager:%u, exp:%u, egrbase:%u, expbase:%u",
rcd->ctxt, rcd->egrbufs.alloced, rcd->expected_count,
rcd->eager_base, rcd->expected_base);
if (!hfi1_rcvbuf_validate(rcd->egrbufs.rcvtid_size, PT_EAGER, &order)) {
hfi1_cdbg(PROC,
- "ctxt%u: current Eager buffer size is invalid %u\n",
+ "ctxt%u: current Eager buffer size is invalid %u",
rcd->ctxt, rcd->egrbufs.rcvtid_size);
ret = -EINVAL;
- goto bail;
+ goto bail_rcvegrbuf_phys;
}
for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
@@ -1842,7 +1958,8 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->egrbufs.rcvtids[idx].dma, order);
cond_resched();
}
- goto bail;
+
+ return 0;
bail_rcvegrbuf_phys:
for (idx = 0; idx < rcd->egrbufs.alloced &&
@@ -1856,6 +1973,6 @@ bail_rcvegrbuf_phys:
rcd->egrbufs.buffers[idx].dma = 0;
rcd->egrbufs.buffers[idx].len = 0;
}
-bail:
+
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 65348d16ab2f..d8dd1a599631 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
@@ -1,86 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <linux/pci.h>
#include <linux/delay.h>
+#include <linux/bitmap.h>
#include "hfi.h"
#include "common.h"
#include "sdma.h"
-/**
- * format_hwmsg - format a single hwerror message
- * @msg message buffer
- * @msgl length of message buffer
- * @hwmsg message to add to message buffer
- */
-static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
+#define LINK_UP_DELAY 500 /* in microseconds */
+
+static void set_mgmt_allowed(struct hfi1_pportdata *ppd)
{
- strlcat(msg, "[", msgl);
- strlcat(msg, hwmsg, msgl);
- strlcat(msg, "]", msgl);
+ u32 frame;
+ struct hfi1_devdata *dd = ppd->dd;
+
+ if (ppd->neighbor_type == NEIGHBOR_TYPE_HFI) {
+ ppd->mgmt_allowed = 1;
+ } else {
+ read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame);
+ ppd->mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT)
+ & MGMT_ALLOWED_MASK;
+ }
}
-/**
- * hfi1_format_hwerrors - format hardware error messages for display
- * @hwerrs hardware errors bit vector
- * @hwerrmsgs hardware error descriptions
- * @nhwerrmsgs number of hwerrmsgs
- * @msg message buffer
- * @msgl message buffer length
+/*
+ * Our neighbor has indicated that we are allowed to act as a fabric
+ * manager, so place the full management partition key in the second
+ * (0-based) pkey array position. Note that we should already have
+ * the limited management partition key in array element 1, and also
+ * that the port is not yet up when add_full_mgmt_pkey() is invoked.
*/
-void hfi1_format_hwerrors(u64 hwerrs, const struct hfi1_hwerror_msgs *hwerrmsgs,
- size_t nhwerrmsgs, char *msg, size_t msgl)
+static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
{
- int i;
+ struct hfi1_devdata *dd = ppd->dd;
- for (i = 0; i < nhwerrmsgs; i++)
- if (hwerrs & hwerrmsgs[i].mask)
- format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
+ /* Sanity check - ppd->pkeys[2] should be 0, or already initialized */
+ if (!((ppd->pkeys[2] == 0) || (ppd->pkeys[2] == FULL_MGMT_P_KEY)))
+ dd_dev_warn(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
+ __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
+ ppd->pkeys[2] = FULL_MGMT_P_KEY;
+ (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+ hfi1_event_pkey_change(ppd->dd, ppd->port);
}
static void signal_ib_event(struct hfi1_pportdata *ppd, enum ib_event_type ev)
@@ -101,9 +65,16 @@ static void signal_ib_event(struct hfi1_pportdata *ppd, enum ib_event_type ev)
ib_dispatch_event(&event);
}
-/*
+/**
+ * handle_linkup_change - finish linkup/down state changes
+ * @dd: valid device
+ * @linkup: link state information
+ *
* Handle a linkup or link down notification.
+ * The HW needs time to finish its link up state change. Give it that chance.
+ *
* This is called outside an interrupt.
+ *
*/
void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
{
@@ -129,21 +100,39 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
* the remote values. Both sides must be using the values.
*/
if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
- set_up_vl15(dd, dd->vau, dd->vl15_init);
+ set_up_vau(dd, dd->vau);
+ set_up_vl15(dd, dd->vl15_init);
assign_remote_cm_au_table(dd, dd->vcu);
- ppd->neighbor_guid =
- read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
- ppd->neighbor_type =
- read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
- DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
- ppd->neighbor_port_number =
- read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
- DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
- dd_dev_info(dd, "Neighbor GUID: %llx Neighbor type %d\n",
- ppd->neighbor_guid,
- ppd->neighbor_type);
}
+ ppd->neighbor_guid =
+ read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
+ ppd->neighbor_type =
+ read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
+ DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
+ ppd->neighbor_port_number =
+ read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
+ DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
+ ppd->neighbor_fm_security =
+ read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
+ DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
+ dd_dev_info(dd,
+ "Neighbor Guid %llx, Type %d, Port Num %d\n",
+ ppd->neighbor_guid, ppd->neighbor_type,
+ ppd->neighbor_port_number);
+
+ /* HW needs LINK_UP_DELAY to settle, give it that chance */
+ udelay(LINK_UP_DELAY);
+
+ /*
+ * 'MgmtAllowed' information, which is exchanged during
+ * LNI, is available at this point.
+ */
+ set_mgmt_allowed(ppd);
+
+ if (ppd->mgmt_allowed)
+ add_full_mgmt_pkey(ppd);
+
/* physical link went up */
ppd->linkup = 1;
ppd->offline_disabled_reason =
@@ -157,6 +146,7 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup)
ppd->linkup = 0;
/* clear HW details of the previous connection */
+ ppd->actual_vls_operational = 0;
reset_link_credits(dd);
/* freeze after a link down to guarantee a clean egress */
@@ -184,12 +174,12 @@ void handle_user_interrupt(struct hfi1_ctxtdata *rcd)
unsigned long flags;
spin_lock_irqsave(&dd->uctxt_lock, flags);
- if (!rcd->cnt)
+ if (bitmap_empty(rcd->in_use_ctxts, HFI1_MAX_SHARED_CTXTS))
goto done;
if (test_and_clear_bit(HFI1_CTXT_WAITING_RCV, &rcd->event_flags)) {
wake_up_interruptible(&rcd->wait);
- hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_DIS, rcd->ctxt);
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_DIS, rcd);
} else if (test_and_clear_bit(HFI1_CTXT_WAITING_URG,
&rcd->event_flags)) {
rcd->urgent++;
diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c
new file mode 100644
index 000000000000..111489802614
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/iowait.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#include "iowait.h"
+#include "trace_iowait.h"
+
+/* 1 priority == 16 starve_cnt */
+#define IOWAIT_PRIORITY_STARVE_SHIFT 4
+
+void iowait_set_flag(struct iowait *wait, u32 flag)
+{
+ trace_hfi1_iowait_set(wait, flag);
+ set_bit(flag, &wait->flags);
+}
+
+bool iowait_flag_set(struct iowait *wait, u32 flag)
+{
+ return test_bit(flag, &wait->flags);
+}
+
+inline void iowait_clear_flag(struct iowait *wait, u32 flag)
+{
+ trace_hfi1_iowait_clear(wait, flag);
+ clear_bit(flag, &wait->flags);
+}
+
+/*
+ * iowait_init() - initialize wait structure
+ * @wait: wait struct to initialize
+ * @tx_limit: limit for overflow queuing
+ * @func: restart function for workqueue
+ * @sleep: sleep function for no space
+ * @resume: wakeup function for no space
+ *
+ * This function initializes the iowait
+ * structure embedded in the QP or PQ.
+ *
+ */
+void iowait_init(struct iowait *wait, u32 tx_limit,
+ void (*func)(struct work_struct *work),
+ void (*tidfunc)(struct work_struct *work),
+ int (*sleep)(struct sdma_engine *sde,
+ struct iowait_work *wait,
+ struct sdma_txreq *tx,
+ uint seq,
+ bool pkts_sent),
+ void (*wakeup)(struct iowait *wait, int reason),
+ void (*sdma_drained)(struct iowait *wait),
+ void (*init_priority)(struct iowait *wait))
+{
+ int i;
+
+ wait->count = 0;
+ INIT_LIST_HEAD(&wait->list);
+ init_waitqueue_head(&wait->wait_dma);
+ init_waitqueue_head(&wait->wait_pio);
+ atomic_set(&wait->sdma_busy, 0);
+ atomic_set(&wait->pio_busy, 0);
+ wait->tx_limit = tx_limit;
+ wait->sleep = sleep;
+ wait->wakeup = wakeup;
+ wait->sdma_drained = sdma_drained;
+ wait->init_priority = init_priority;
+ wait->flags = 0;
+ for (i = 0; i < IOWAIT_SES; i++) {
+ wait->wait[i].iow = wait;
+ INIT_LIST_HEAD(&wait->wait[i].tx_head);
+ if (i == IOWAIT_IB_SE)
+ INIT_WORK(&wait->wait[i].iowork, func);
+ else
+ INIT_WORK(&wait->wait[i].iowork, tidfunc);
+ }
+}
+
+/**
+ * iowait_cancel_work - cancel all work in iowait
+ * @w: the iowait struct
+ */
+void iowait_cancel_work(struct iowait *w)
+{
+ cancel_work_sync(&iowait_get_ib_work(w)->iowork);
+ /* Make sure that the iowork for TID RDMA is used */
+ if (iowait_get_tid_work(w)->iowork.func)
+ cancel_work_sync(&iowait_get_tid_work(w)->iowork);
+}
+
+/**
+ * iowait_set_work_flag - set work flag based on leg
+ * @w: the iowait work struct
+ */
+int iowait_set_work_flag(struct iowait_work *w)
+{
+ if (w == &w->iow->wait[IOWAIT_IB_SE]) {
+ iowait_set_flag(w->iow, IOWAIT_PENDING_IB);
+ return IOWAIT_IB_SE;
+ }
+ iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
+ return IOWAIT_TID_SE;
+}
+
+/**
+ * iowait_priority_update_top - update the top priority entry
+ * @w: the iowait struct
+ * @top: a pointer to the top priority entry
+ * @idx: the index of the current iowait in an array
+ * @top_idx: the array index for the iowait entry that has the top priority
+ *
+ * This function is called to compare the priority of a given
+ * iowait with the given top priority entry. The top index will
+ * be returned.
+ */
+uint iowait_priority_update_top(struct iowait *w,
+ struct iowait *top,
+ uint idx, uint top_idx)
+{
+ u8 cnt, tcnt;
+
+ /* Convert priority into starve_cnt and compare the total.*/
+ cnt = (w->priority << IOWAIT_PRIORITY_STARVE_SHIFT) + w->starved_cnt;
+ tcnt = (top->priority << IOWAIT_PRIORITY_STARVE_SHIFT) +
+ top->starved_cnt;
+ if (cnt > tcnt)
+ return idx;
+ else
+ return top_idx;
+}
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index d9740ddea6f1..7259f4f55700 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -1,54 +1,14 @@
-#ifndef _HFI1_IOWAIT_H
-#define _HFI1_IOWAIT_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*/
+#ifndef _HFI1_IOWAIT_H
+#define _HFI1_IOWAIT_H
+
#include <linux/list.h>
#include <linux/workqueue.h>
+#include <linux/wait.h>
#include <linux/sched.h>
#include "sdma_txreq.h"
@@ -59,16 +19,48 @@
*/
typedef void (*restart_t)(struct work_struct *work);
+#define IOWAIT_PENDING_IB 0x0
+#define IOWAIT_PENDING_TID 0x1
+
+/*
+ * A QP can have multiple Send Engines (SEs).
+ *
+ * The current use case is for supporting a TID RDMA
+ * packet build/xmit mechanism independent from verbs.
+ */
+#define IOWAIT_SES 2
+#define IOWAIT_IB_SE 0
+#define IOWAIT_TID_SE 1
+
struct sdma_txreq;
struct sdma_engine;
/**
- * struct iowait - linkage for delayed progress/waiting
+ * @iowork: the work struct
+ * @tx_head: list of prebuilt packets
+ * @iow: the parent iowait structure
+ *
+ * This structure is the work item (process) specific
+ * details associated with the each of the two SEs of the
+ * QP.
+ *
+ * The workstruct and the queued TXs are unique to each
+ * SE.
+ */
+struct iowait;
+struct iowait_work {
+ struct work_struct iowork;
+ struct list_head tx_head;
+ struct iowait *iow;
+};
+
+/**
* @list: used to add/insert into QP/PQ wait lists
- * @lock: uses to record the list head lock
* @tx_head: overflow list of sdma_txreq's
* @sleep: no space callback
* @wakeup: space callback wakeup
* @sdma_drained: sdma count drained
+ * @init_priority: callback to manipulate priority
+ * @lock: lock protected head of wait queue
* @iowork: workqueue overhead
* @wait_dma: wait for sdma_busy == 0
* @wait_pio: wait for pio_busy == 0
@@ -76,6 +68,8 @@ struct sdma_engine;
* @count: total number of descriptors in tx_head'ed list
* @tx_limit: limit for overflow queuing
* @tx_count: number of tx entry's in tx_head'ed list
+ * @flags: wait flags (one per QP)
+ * @wait: SE array for multiple legs
*
* This is to be embedded in user's state structure
* (QP or PQ).
@@ -86,31 +80,34 @@ struct sdma_engine;
* are callbacks for the ULP to implement
* what ever queuing/dequeuing of
* the embedded iowait and its containing struct
- * when a resource shortage like SDMA ring space is seen.
+ * when a resource shortage like SDMA ring space
+ * or PIO credit space is seen.
*
* Both potentially have locks help
- * so sleeping is not allowed.
+ * so sleeping is not allowed and it is not
+ * supported to submit txreqs from the wakeup
+ * call directly because of lock conflicts.
*
* The wait_dma member along with the iow
*
* The lock field is used by waiters to record
* the seqlock_t that guards the list head.
- * Waiters explicity know that, but the destroy
+ * Waiters explicitly know that, but the destroy
* code that unwaits QPs does not.
*/
-
struct iowait {
struct list_head list;
- struct list_head tx_head;
int (*sleep)(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *tx,
- unsigned seq);
+ uint seq,
+ bool pkts_sent
+ );
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
+ void (*init_priority)(struct iowait *wait);
seqlock_t *lock;
- struct work_struct iowork;
wait_queue_head_t wait_dma;
wait_queue_head_t wait_pio;
atomic_t sdma_busy;
@@ -118,62 +115,52 @@ struct iowait {
u32 count;
u32 tx_limit;
u32 tx_count;
+ u8 starved_cnt;
+ u8 priority;
+ unsigned long flags;
+ struct iowait_work wait[IOWAIT_SES];
};
#define SDMA_AVAIL_REASON 0
-/**
- * iowait_init() - initialize wait structure
- * @wait: wait struct to initialize
- * @tx_limit: limit for overflow queuing
- * @func: restart function for workqueue
- * @sleep: sleep function for no space
- * @resume: wakeup function for no space
- *
- * This function initializes the iowait
- * structure embedded in the QP or PQ.
- *
- */
+void iowait_set_flag(struct iowait *wait, u32 flag);
+bool iowait_flag_set(struct iowait *wait, u32 flag);
+void iowait_clear_flag(struct iowait *wait, u32 flag);
-static inline void iowait_init(
- struct iowait *wait,
- u32 tx_limit,
- void (*func)(struct work_struct *work),
- int (*sleep)(
- struct sdma_engine *sde,
- struct iowait *wait,
- struct sdma_txreq *tx,
- unsigned seq),
- void (*wakeup)(struct iowait *wait, int reason),
- void (*sdma_drained)(struct iowait *wait))
-{
- wait->count = 0;
- wait->lock = NULL;
- INIT_LIST_HEAD(&wait->list);
- INIT_LIST_HEAD(&wait->tx_head);
- INIT_WORK(&wait->iowork, func);
- init_waitqueue_head(&wait->wait_dma);
- init_waitqueue_head(&wait->wait_pio);
- atomic_set(&wait->sdma_busy, 0);
- atomic_set(&wait->pio_busy, 0);
- wait->tx_limit = tx_limit;
- wait->sleep = sleep;
- wait->wakeup = wakeup;
- wait->sdma_drained = sdma_drained;
-}
+void iowait_init(struct iowait *wait, u32 tx_limit,
+ void (*func)(struct work_struct *work),
+ void (*tidfunc)(struct work_struct *work),
+ int (*sleep)(struct sdma_engine *sde,
+ struct iowait_work *wait,
+ struct sdma_txreq *tx,
+ uint seq,
+ bool pkts_sent),
+ void (*wakeup)(struct iowait *wait, int reason),
+ void (*sdma_drained)(struct iowait *wait),
+ void (*init_priority)(struct iowait *wait));
/**
- * iowait_schedule() - initialize wait structure
+ * iowait_schedule() - schedule the default send engine work
* @wait: wait struct to schedule
* @wq: workqueue for schedule
* @cpu: cpu
*/
-static inline void iowait_schedule(
- struct iowait *wait,
- struct workqueue_struct *wq,
- int cpu)
+static inline bool iowait_schedule(struct iowait *wait,
+ struct workqueue_struct *wq, int cpu)
{
- queue_work_on(cpu, wq, &wait->iowork);
+ return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
+}
+
+/**
+ * iowait_tid_schedule - schedule the tid SE
+ * @wait: the iowait structure
+ * @wq: the work queue
+ * @cpu: the cpu
+ */
+static inline bool iowait_tid_schedule(struct iowait *wait,
+ struct workqueue_struct *wq, int cpu)
+{
+ return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_TID_SE].iowork);
}
/**
@@ -224,6 +211,8 @@ static inline void iowait_sdma_add(struct iowait *wait, int count)
*/
static inline int iowait_sdma_dec(struct iowait *wait)
{
+ if (!wait)
+ return 0;
return atomic_dec_and_test(&wait->sdma_busy);
}
@@ -263,11 +252,13 @@ static inline void iowait_pio_inc(struct iowait *wait)
}
/**
- * iowait_sdma_dec - note pio complete
+ * iowait_pio_dec - note pio complete
* @wait: iowait structure
*/
static inline int iowait_pio_dec(struct iowait *wait)
{
+ if (!wait)
+ return 0;
return atomic_dec_and_test(&wait->pio_busy);
}
@@ -289,9 +280,9 @@ static inline void iowait_drain_wakeup(struct iowait *wait)
/**
* iowait_get_txhead() - get packet off of iowait list
*
- * @wait wait struture
+ * @wait: iowait_work structure
*/
-static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
+static inline struct sdma_txreq *iowait_get_txhead(struct iowait_work *wait)
{
struct sdma_txreq *tx = NULL;
@@ -305,4 +296,162 @@ static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
return tx;
}
+static inline u16 iowait_get_desc(struct iowait_work *w)
+{
+ u16 num_desc = 0;
+ struct sdma_txreq *tx = NULL;
+
+ if (!list_empty(&w->tx_head)) {
+ tx = list_first_entry(&w->tx_head, struct sdma_txreq,
+ list);
+ num_desc = tx->num_desc;
+ if (tx->flags & SDMA_TXREQ_F_VIP)
+ w->iow->priority++;
+ }
+ return num_desc;
+}
+
+static inline u32 iowait_get_all_desc(struct iowait *w)
+{
+ u32 num_desc = 0;
+
+ num_desc = iowait_get_desc(&w->wait[IOWAIT_IB_SE]);
+ num_desc += iowait_get_desc(&w->wait[IOWAIT_TID_SE]);
+ return num_desc;
+}
+
+static inline void iowait_update_priority(struct iowait_work *w)
+{
+ struct sdma_txreq *tx = NULL;
+
+ if (!list_empty(&w->tx_head)) {
+ tx = list_first_entry(&w->tx_head, struct sdma_txreq,
+ list);
+ if (tx->flags & SDMA_TXREQ_F_VIP)
+ w->iow->priority++;
+ }
+}
+
+static inline void iowait_update_all_priority(struct iowait *w)
+{
+ iowait_update_priority(&w->wait[IOWAIT_IB_SE]);
+ iowait_update_priority(&w->wait[IOWAIT_TID_SE]);
+}
+
+static inline void iowait_init_priority(struct iowait *w)
+{
+ w->priority = 0;
+ if (w->init_priority)
+ w->init_priority(w);
+}
+
+static inline void iowait_get_priority(struct iowait *w)
+{
+ iowait_init_priority(w);
+ iowait_update_all_priority(w);
+}
+
+/**
+ * iowait_queue - Put the iowait on a wait queue
+ * @pkts_sent: have some packets been sent before queuing?
+ * @w: the iowait struct
+ * @wait_head: the wait queue
+ *
+ * This function is called to insert an iowait struct into a
+ * wait queue after a resource (eg, sdma descriptor or pio
+ * buffer) is run out.
+ */
+static inline void iowait_queue(bool pkts_sent, struct iowait *w,
+ struct list_head *wait_head)
+{
+ /*
+ * To play fair, insert the iowait at the tail of the wait queue if it
+ * has already sent some packets; Otherwise, put it at the head.
+ * However, if it has priority packets to send, also put it at the
+ * head.
+ */
+ if (pkts_sent)
+ w->starved_cnt = 0;
+ else
+ w->starved_cnt++;
+
+ if (w->priority > 0 || !pkts_sent)
+ list_add(&w->list, wait_head);
+ else
+ list_add_tail(&w->list, wait_head);
+}
+
+/**
+ * iowait_starve_clear - clear the wait queue's starve count
+ * @pkts_sent: have some packets been sent?
+ * @w: the iowait struct
+ *
+ * This function is called to clear the starve count. If no
+ * packets have been sent, the starve count will not be cleared.
+ */
+static inline void iowait_starve_clear(bool pkts_sent, struct iowait *w)
+{
+ if (pkts_sent)
+ w->starved_cnt = 0;
+}
+
+/* Update the top priority index */
+uint iowait_priority_update_top(struct iowait *w,
+ struct iowait *top,
+ uint idx, uint top_idx);
+
+/**
+ * iowait_packet_queued() - determine if a packet is queued
+ * @wait: the iowait_work structure
+ */
+static inline bool iowait_packet_queued(struct iowait_work *wait)
+{
+ return !list_empty(&wait->tx_head);
+}
+
+/**
+ * inc_wait_count - increment wait counts
+ * @w: the log work struct
+ * @n: the count
+ */
+static inline void iowait_inc_wait_count(struct iowait_work *w, u16 n)
+{
+ if (!w)
+ return;
+ w->iow->tx_count++;
+ w->iow->count += n;
+}
+
+/**
+ * iowait_get_tid_work - return iowait_work for tid SE
+ * @w: the iowait struct
+ */
+static inline struct iowait_work *iowait_get_tid_work(struct iowait *w)
+{
+ return &w->wait[IOWAIT_TID_SE];
+}
+
+/**
+ * iowait_get_ib_work - return iowait_work for ib SE
+ * @w: the iowait struct
+ */
+static inline struct iowait_work *iowait_get_ib_work(struct iowait *w)
+{
+ return &w->wait[IOWAIT_IB_SE];
+}
+
+/**
+ * iowait_ioww_to_iow - return iowait given iowait_work
+ * @w: the iowait_work struct
+ */
+static inline struct iowait *iowait_ioww_to_iow(struct iowait_work *w)
+{
+ if (likely(w))
+ return w->iow;
+ return NULL;
+}
+
+void iowait_cancel_work(struct iowait *w);
+int iowait_set_work_flag(struct iowait_work *w);
+
#endif
diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h
new file mode 100644
index 000000000000..aec60d4888eb
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for IPOIB functionality
+ */
+
+#ifndef HFI1_IPOIB_H
+#define HFI1_IPOIB_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/if_infiniband.h>
+
+#include "hfi.h"
+#include "iowait.h"
+#include "netdev.h"
+
+#include <rdma/ib_verbs.h>
+
+#define HFI1_IPOIB_ENTROPY_SHIFT 24
+
+#define HFI1_IPOIB_TXREQ_NAME_LEN 32
+
+#define HFI1_IPOIB_PSEUDO_LEN 20
+#define HFI1_IPOIB_ENCAP_LEN 4
+
+struct hfi1_ipoib_dev_priv;
+
+union hfi1_ipoib_flow {
+ u16 as_int;
+ struct {
+ u8 tx_queue;
+ u8 sc5;
+ } __attribute__((__packed__));
+};
+
+/**
+ * struct ipoib_txreq - IPOIB transmit descriptor
+ * @txreq: sdma transmit request
+ * @sdma_hdr: 9b ib headers
+ * @sdma_status: status returned by sdma engine
+ * @complete: non-zero implies complete
+ * @priv: ipoib netdev private data
+ * @txq: txq on which skb was output
+ * @skb: skb to send
+ */
+struct ipoib_txreq {
+ struct sdma_txreq txreq;
+ struct hfi1_sdma_header *sdma_hdr;
+ int sdma_status;
+ int complete;
+ struct hfi1_ipoib_dev_priv *priv;
+ struct hfi1_ipoib_txq *txq;
+ struct sk_buff *skb;
+};
+
+/**
+ * struct hfi1_ipoib_circ_buf - List of items to be processed
+ * @items: ring of items each a power of two size
+ * @max_items: max items + 1 that the ring can contain
+ * @shift: log2 of size for getting txreq
+ * @sent_txreqs: count of txreqs posted to sdma
+ * @tail: ring tail
+ * @stops: count of stops of queue
+ * @ring_full: ring has been filled
+ * @no_desc: descriptor shortage seen
+ * @complete_txreqs: count of txreqs completed by sdma
+ * @head: ring head
+ */
+struct hfi1_ipoib_circ_buf {
+ void *items;
+ u32 max_items;
+ u32 shift;
+ /* consumer cache line */
+ u64 ____cacheline_aligned_in_smp sent_txreqs;
+ u32 avail;
+ u32 tail;
+ atomic_t stops;
+ atomic_t ring_full;
+ atomic_t no_desc;
+ /* producer cache line */
+ u64 ____cacheline_aligned_in_smp complete_txreqs;
+ u32 head;
+};
+
+/**
+ * struct hfi1_ipoib_txq - IPOIB per Tx queue information
+ * @priv: private pointer
+ * @sde: sdma engine
+ * @tx_list: tx request list
+ * @sent_txreqs: count of txreqs posted to sdma
+ * @flow: tracks when list needs to be flushed for a flow change
+ * @q_idx: ipoib Tx queue index
+ * @pkts_sent: indicator packets have been sent from this queue
+ * @wait: iowait structure
+ * @napi: pointer to tx napi interface
+ * @tx_ring: ring of ipoib txreqs to be reaped by napi callback
+ */
+struct hfi1_ipoib_txq {
+ struct napi_struct napi;
+ struct hfi1_ipoib_dev_priv *priv;
+ struct sdma_engine *sde;
+ struct list_head tx_list;
+ union hfi1_ipoib_flow flow;
+ u8 q_idx;
+ bool pkts_sent;
+ struct iowait wait;
+
+ struct hfi1_ipoib_circ_buf ____cacheline_aligned_in_smp tx_ring;
+};
+
+struct hfi1_ipoib_dev_priv {
+ struct hfi1_devdata *dd;
+ struct net_device *netdev;
+ struct ib_device *device;
+ struct hfi1_ipoib_txq *txqs;
+ const struct net_device_ops *netdev_ops;
+ struct rvt_qp *qp;
+ u32 qkey;
+ u16 pkey;
+ u16 pkey_index;
+ u8 port_num;
+};
+
+/* hfi1 ipoib rdma netdev's private data structure */
+struct hfi1_ipoib_rdma_netdev {
+ struct rdma_netdev rn; /* keep this first */
+ /* followed by device private data */
+ struct hfi1_ipoib_dev_priv dev_priv;
+};
+
+static inline struct hfi1_ipoib_dev_priv *
+hfi1_ipoib_priv(const struct net_device *dev)
+{
+ return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv;
+}
+
+int hfi1_ipoib_send(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ib_ah *address,
+ u32 dqpn);
+
+int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv);
+void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv);
+
+int hfi1_ipoib_rxq_init(struct net_device *dev);
+void hfi1_ipoib_rxq_deinit(struct net_device *dev);
+
+void hfi1_ipoib_napi_tx_enable(struct net_device *dev);
+void hfi1_ipoib_napi_tx_disable(struct net_device *dev);
+
+struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
+ int size, void *data);
+
+int hfi1_ipoib_rn_get_params(struct ib_device *device,
+ u32 port_num,
+ enum rdma_netdev_t type,
+ struct rdma_netdev_alloc_params *params);
+
+void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q);
+
+#endif /* _IPOIB_H */
diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c
new file mode 100644
index 000000000000..7c9d5203002b
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib_main.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for ipoib functionality
+ */
+
+#include "ipoib.h"
+#include "hfi.h"
+
+static u32 qpn_from_mac(const u8 *mac_arr)
+{
+ return (u32)mac_arr[1] << 16 | mac_arr[2] << 8 | mac_arr[3];
+}
+
+static int hfi1_ipoib_dev_init(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ int ret;
+
+ ret = priv->netdev_ops->ndo_init(dev);
+ if (ret)
+ return ret;
+
+ ret = hfi1_netdev_add_data(priv->dd,
+ qpn_from_mac(priv->netdev->dev_addr),
+ dev);
+ if (ret < 0) {
+ priv->netdev_ops->ndo_uninit(dev);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void hfi1_ipoib_dev_uninit(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+ hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr));
+
+ priv->netdev_ops->ndo_uninit(dev);
+}
+
+static int hfi1_ipoib_dev_open(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ int ret;
+
+ ret = priv->netdev_ops->ndo_open(dev);
+ if (!ret) {
+ struct hfi1_ibport *ibp = to_iport(priv->device,
+ priv->port_num);
+ struct rvt_qp *qp;
+ u32 qpn = qpn_from_mac(priv->netdev->dev_addr);
+
+ rcu_read_lock();
+ qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn);
+ if (!qp) {
+ rcu_read_unlock();
+ priv->netdev_ops->ndo_stop(dev);
+ return -EINVAL;
+ }
+ rvt_get_qp(qp);
+ priv->qp = qp;
+ rcu_read_unlock();
+
+ hfi1_netdev_enable_queues(priv->dd);
+ hfi1_ipoib_napi_tx_enable(dev);
+ }
+
+ return ret;
+}
+
+static int hfi1_ipoib_dev_stop(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+ if (!priv->qp)
+ return 0;
+
+ hfi1_ipoib_napi_tx_disable(dev);
+ hfi1_netdev_disable_queues(priv->dd);
+
+ rvt_put_qp(priv->qp);
+ priv->qp = NULL;
+
+ return priv->netdev_ops->ndo_stop(dev);
+}
+
+static const struct net_device_ops hfi1_ipoib_netdev_ops = {
+ .ndo_init = hfi1_ipoib_dev_init,
+ .ndo_uninit = hfi1_ipoib_dev_uninit,
+ .ndo_open = hfi1_ipoib_dev_open,
+ .ndo_stop = hfi1_ipoib_dev_stop,
+};
+
+static int hfi1_ipoib_mcast_attach(struct net_device *dev,
+ struct ib_device *device,
+ union ib_gid *mgid,
+ u16 mlid,
+ int set_qkey,
+ u32 qkey)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ u32 qpn = (u32)qpn_from_mac(priv->netdev->dev_addr);
+ struct hfi1_ibport *ibp = to_iport(priv->device, priv->port_num);
+ struct rvt_qp *qp;
+ int ret = -EINVAL;
+
+ rcu_read_lock();
+
+ qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn);
+ if (qp) {
+ rvt_get_qp(qp);
+ rcu_read_unlock();
+ if (set_qkey)
+ priv->qkey = qkey;
+
+ /* attach QP to multicast group */
+ ret = ib_attach_mcast(&qp->ibqp, mgid, mlid);
+ rvt_put_qp(qp);
+ } else {
+ rcu_read_unlock();
+ }
+
+ return ret;
+}
+
+static int hfi1_ipoib_mcast_detach(struct net_device *dev,
+ struct ib_device *device,
+ union ib_gid *mgid,
+ u16 mlid)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ u32 qpn = (u32)qpn_from_mac(priv->netdev->dev_addr);
+ struct hfi1_ibport *ibp = to_iport(priv->device, priv->port_num);
+ struct rvt_qp *qp;
+ int ret = -EINVAL;
+
+ rcu_read_lock();
+
+ qp = rvt_lookup_qpn(ib_to_rvt(priv->device), &ibp->rvp, qpn);
+ if (qp) {
+ rvt_get_qp(qp);
+ rcu_read_unlock();
+ ret = ib_detach_mcast(&qp->ibqp, mgid, mlid);
+ rvt_put_qp(qp);
+ } else {
+ rcu_read_unlock();
+ }
+ return ret;
+}
+
+static void hfi1_ipoib_netdev_dtor(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+ hfi1_ipoib_txreq_deinit(priv);
+ hfi1_ipoib_rxq_deinit(priv->netdev);
+}
+
+static void hfi1_ipoib_set_id(struct net_device *dev, int id)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+
+ priv->pkey_index = (u16)id;
+ ib_query_pkey(priv->device,
+ priv->port_num,
+ priv->pkey_index,
+ &priv->pkey);
+}
+
+static int hfi1_ipoib_setup_rn(struct ib_device *device,
+ u32 port_num,
+ struct net_device *netdev,
+ void *param)
+{
+ struct hfi1_devdata *dd = dd_from_ibdev(device);
+ struct rdma_netdev *rn = netdev_priv(netdev);
+ struct hfi1_ipoib_dev_priv *priv;
+ int rc;
+
+ rn->send = hfi1_ipoib_send;
+ rn->tx_timeout = hfi1_ipoib_tx_timeout;
+ rn->attach_mcast = hfi1_ipoib_mcast_attach;
+ rn->detach_mcast = hfi1_ipoib_mcast_detach;
+ rn->set_id = hfi1_ipoib_set_id;
+ rn->hca = device;
+ rn->port_num = port_num;
+ rn->mtu = netdev->mtu;
+
+ priv = hfi1_ipoib_priv(netdev);
+ priv->dd = dd;
+ priv->netdev = netdev;
+ priv->device = device;
+ priv->port_num = port_num;
+ priv->netdev_ops = netdev->netdev_ops;
+
+ ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey);
+
+ rc = hfi1_ipoib_txreq_init(priv);
+ if (rc) {
+ dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc);
+ return rc;
+ }
+
+ rc = hfi1_ipoib_rxq_init(netdev);
+ if (rc) {
+ dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc);
+ hfi1_ipoib_txreq_deinit(priv);
+ return rc;
+ }
+
+ netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
+
+ netdev->priv_destructor = hfi1_ipoib_netdev_dtor;
+ netdev->needs_free_netdev = true;
+ netdev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
+
+ return 0;
+}
+
+int hfi1_ipoib_rn_get_params(struct ib_device *device,
+ u32 port_num,
+ enum rdma_netdev_t type,
+ struct rdma_netdev_alloc_params *params)
+{
+ struct hfi1_devdata *dd = dd_from_ibdev(device);
+
+ if (type != RDMA_NETDEV_IPOIB)
+ return -EOPNOTSUPP;
+
+ if (!HFI1_CAP_IS_KSET(AIP) || !dd->num_netdev_contexts)
+ return -EOPNOTSUPP;
+
+ if (!port_num || port_num > dd->num_pports)
+ return -EINVAL;
+
+ params->sizeof_priv = sizeof(struct hfi1_ipoib_rdma_netdev);
+ params->txqs = dd->num_sdma;
+ params->rxqs = dd->num_netdev_contexts;
+ params->param = NULL;
+ params->initialize_rdma_netdev = hfi1_ipoib_setup_rn;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/hfi1/ipoib_rx.c b/drivers/infiniband/hw/hfi1/ipoib_rx.c
new file mode 100644
index 000000000000..629691a572ef
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib_rx.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+#include "netdev.h"
+#include "ipoib.h"
+
+#define HFI1_IPOIB_SKB_PAD ((NET_SKB_PAD) + (NET_IP_ALIGN))
+
+static void copy_ipoib_buf(struct sk_buff *skb, void *data, int size)
+{
+ skb_checksum_none_assert(skb);
+ skb->protocol = *((__be16 *)data);
+
+ skb_put_data(skb, data, size);
+ skb->mac_header = HFI1_IPOIB_PSEUDO_LEN;
+ skb_pull(skb, HFI1_IPOIB_ENCAP_LEN);
+}
+
+static struct sk_buff *prepare_frag_skb(struct napi_struct *napi, int size)
+{
+ struct sk_buff *skb;
+ int skb_size = SKB_DATA_ALIGN(size + HFI1_IPOIB_SKB_PAD);
+ void *frag;
+
+ skb_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ skb_size = SKB_DATA_ALIGN(skb_size);
+ frag = napi_alloc_frag(skb_size);
+
+ if (unlikely(!frag))
+ return napi_alloc_skb(napi, size);
+
+ skb = build_skb(frag, skb_size);
+
+ if (unlikely(!skb)) {
+ skb_free_frag(frag);
+ return NULL;
+ }
+
+ skb_reserve(skb, HFI1_IPOIB_SKB_PAD);
+ return skb;
+}
+
+struct sk_buff *hfi1_ipoib_prepare_skb(struct hfi1_netdev_rxq *rxq,
+ int size, void *data)
+{
+ struct napi_struct *napi = &rxq->napi;
+ int skb_size = size + HFI1_IPOIB_ENCAP_LEN;
+ struct sk_buff *skb;
+
+ /*
+ * For smaller(4k + skb overhead) allocations we will go using
+ * napi cache. Otherwise we will try to use napi frag cache.
+ */
+ if (size <= SKB_WITH_OVERHEAD(PAGE_SIZE))
+ skb = napi_alloc_skb(napi, skb_size);
+ else
+ skb = prepare_frag_skb(napi, skb_size);
+
+ if (unlikely(!skb))
+ return NULL;
+
+ copy_ipoib_buf(skb, data, size);
+
+ return skb;
+}
+
+int hfi1_ipoib_rxq_init(struct net_device *netdev)
+{
+ struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev);
+ struct hfi1_devdata *dd = ipoib_priv->dd;
+ int ret;
+
+ ret = hfi1_netdev_rx_init(dd);
+ if (ret)
+ return ret;
+
+ hfi1_init_aip_rsm(dd);
+
+ return ret;
+}
+
+void hfi1_ipoib_rxq_deinit(struct net_device *netdev)
+{
+ struct hfi1_ipoib_dev_priv *ipoib_priv = hfi1_ipoib_priv(netdev);
+ struct hfi1_devdata *dd = ipoib_priv->dd;
+
+ hfi1_deinit_aip_rsm(dd);
+ hfi1_netdev_rx_destroy(dd);
+}
diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
new file mode 100644
index 000000000000..8b9cc55db59d
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
@@ -0,0 +1,868 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for IPOIB SDMA functionality
+ */
+
+#include <linux/log2.h>
+#include <linux/circ_buf.h>
+
+#include "sdma.h"
+#include "verbs.h"
+#include "trace_ibhdrs.h"
+#include "ipoib.h"
+#include "trace_tx.h"
+
+/* Add a convenience helper */
+#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1))
+#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
+#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)
+
+struct ipoib_txparms {
+ struct hfi1_devdata *dd;
+ struct rdma_ah_attr *ah_attr;
+ struct hfi1_ibport *ibp;
+ struct hfi1_ipoib_txq *txq;
+ union hfi1_ipoib_flow flow;
+ u32 dqpn;
+ u8 hdr_dwords;
+ u8 entropy;
+};
+
+static struct ipoib_txreq *
+hfi1_txreq_from_idx(struct hfi1_ipoib_circ_buf *r, u32 idx)
+{
+ return (struct ipoib_txreq *)(r->items + (idx << r->shift));
+}
+
+static u32 hfi1_ipoib_txreqs(const u64 sent, const u64 completed)
+{
+ return sent - completed;
+}
+
+static u64 hfi1_ipoib_used(struct hfi1_ipoib_txq *txq)
+{
+ return hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs,
+ txq->tx_ring.complete_txreqs);
+}
+
+static void hfi1_ipoib_stop_txq(struct hfi1_ipoib_txq *txq)
+{
+ trace_hfi1_txq_stop(txq);
+ if (atomic_inc_return(&txq->tx_ring.stops) == 1)
+ netif_stop_subqueue(txq->priv->netdev, txq->q_idx);
+}
+
+static void hfi1_ipoib_wake_txq(struct hfi1_ipoib_txq *txq)
+{
+ trace_hfi1_txq_wake(txq);
+ if (atomic_dec_and_test(&txq->tx_ring.stops))
+ netif_wake_subqueue(txq->priv->netdev, txq->q_idx);
+}
+
+static uint hfi1_ipoib_ring_hwat(struct hfi1_ipoib_txq *txq)
+{
+ return min_t(uint, txq->priv->netdev->tx_queue_len,
+ txq->tx_ring.max_items - 1);
+}
+
+static uint hfi1_ipoib_ring_lwat(struct hfi1_ipoib_txq *txq)
+{
+ return min_t(uint, txq->priv->netdev->tx_queue_len,
+ txq->tx_ring.max_items) >> 1;
+}
+
+static void hfi1_ipoib_check_queue_depth(struct hfi1_ipoib_txq *txq)
+{
+ ++txq->tx_ring.sent_txreqs;
+ if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq) &&
+ !atomic_xchg(&txq->tx_ring.ring_full, 1)) {
+ trace_hfi1_txq_full(txq);
+ hfi1_ipoib_stop_txq(txq);
+ }
+}
+
+static void hfi1_ipoib_check_queue_stopped(struct hfi1_ipoib_txq *txq)
+{
+ struct net_device *dev = txq->priv->netdev;
+
+ /* If shutting down just return as queue state is irrelevant */
+ if (unlikely(dev->reg_state != NETREG_REGISTERED))
+ return;
+
+ /*
+ * When the queue has been drained to less than half full it will be
+ * restarted.
+ * The size of the txreq ring is fixed at initialization.
+ * The tx queue len can be adjusted upward while the interface is
+ * running.
+ * The tx queue len can be large enough to overflow the txreq_ring.
+ * Use the minimum of the current tx_queue_len or the rings max txreqs
+ * to protect against ring overflow.
+ */
+ if (hfi1_ipoib_used(txq) < hfi1_ipoib_ring_lwat(txq) &&
+ atomic_xchg(&txq->tx_ring.ring_full, 0)) {
+ trace_hfi1_txq_xmit_unstopped(txq);
+ hfi1_ipoib_wake_txq(txq);
+ }
+}
+
+static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget)
+{
+ struct hfi1_ipoib_dev_priv *priv = tx->txq->priv;
+
+ if (likely(!tx->sdma_status)) {
+ dev_sw_netstats_tx_add(priv->netdev, 1, tx->skb->len);
+ } else {
+ ++priv->netdev->stats.tx_errors;
+ dd_dev_warn(priv->dd,
+ "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n",
+ __func__, tx->sdma_status,
+ le64_to_cpu(tx->sdma_hdr->pbc), tx->txq->q_idx,
+ tx->txq->sde->this_idx);
+ }
+
+ napi_consume_skb(tx->skb, budget);
+ tx->skb = NULL;
+ sdma_txclean(priv->dd, &tx->txreq);
+}
+
+static void hfi1_ipoib_drain_tx_ring(struct hfi1_ipoib_txq *txq)
+{
+ struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
+ int i;
+ struct ipoib_txreq *tx;
+
+ for (i = 0; i < tx_ring->max_items; i++) {
+ tx = hfi1_txreq_from_idx(tx_ring, i);
+ tx->complete = 0;
+ dev_kfree_skb_any(tx->skb);
+ tx->skb = NULL;
+ sdma_txclean(txq->priv->dd, &tx->txreq);
+ }
+ tx_ring->head = 0;
+ tx_ring->tail = 0;
+ tx_ring->complete_txreqs = 0;
+ tx_ring->sent_txreqs = 0;
+ tx_ring->avail = hfi1_ipoib_ring_hwat(txq);
+}
+
+static int hfi1_ipoib_poll_tx_ring(struct napi_struct *napi, int budget)
+{
+ struct hfi1_ipoib_txq *txq =
+ container_of(napi, struct hfi1_ipoib_txq, napi);
+ struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
+ u32 head = tx_ring->head;
+ u32 max_tx = tx_ring->max_items;
+ int work_done;
+ struct ipoib_txreq *tx = hfi1_txreq_from_idx(tx_ring, head);
+
+ trace_hfi1_txq_poll(txq);
+ for (work_done = 0; work_done < budget; work_done++) {
+ /* See hfi1_ipoib_sdma_complete() */
+ if (!smp_load_acquire(&tx->complete))
+ break;
+ tx->complete = 0;
+ trace_hfi1_tx_produce(tx, head);
+ hfi1_ipoib_free_tx(tx, budget);
+ head = CIRC_NEXT(head, max_tx);
+ tx = hfi1_txreq_from_idx(tx_ring, head);
+ }
+ tx_ring->complete_txreqs += work_done;
+
+ /* Finished freeing tx items so store the head value. */
+ smp_store_release(&tx_ring->head, head);
+
+ hfi1_ipoib_check_queue_stopped(txq);
+
+ if (work_done < budget)
+ napi_complete_done(napi, work_done);
+
+ return work_done;
+}
+
+static void hfi1_ipoib_sdma_complete(struct sdma_txreq *txreq, int status)
+{
+ struct ipoib_txreq *tx = container_of(txreq, struct ipoib_txreq, txreq);
+
+ trace_hfi1_txq_complete(tx->txq);
+ tx->sdma_status = status;
+ /* see hfi1_ipoib_poll_tx_ring */
+ smp_store_release(&tx->complete, 1);
+ napi_schedule_irqoff(&tx->txq->napi);
+}
+
+static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_devdata *dd = txp->dd;
+ struct sdma_txreq *txreq = &tx->txreq;
+ struct sk_buff *skb = tx->skb;
+ int ret = 0;
+ int i;
+
+ if (skb_headlen(skb)) {
+ ret = sdma_txadd_kvaddr(dd, txreq, skb->data, skb_headlen(skb));
+ if (unlikely(ret))
+ return ret;
+ }
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ ret = sdma_txadd_page(dd,
+ txreq,
+ skb_frag_page(frag),
+ skb_frag_off(frag),
+ skb_frag_size(frag),
+ NULL, NULL, NULL);
+ if (unlikely(ret))
+ break;
+ }
+
+ return ret;
+}
+
+static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_devdata *dd = txp->dd;
+ struct sdma_txreq *txreq = &tx->txreq;
+ struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr;
+ u16 pkt_bytes =
+ sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len;
+ int ret;
+
+ ret = sdma_txinit(txreq, 0, pkt_bytes, hfi1_ipoib_sdma_complete);
+ if (unlikely(ret))
+ return ret;
+
+ /* add pbc + headers */
+ ret = sdma_txadd_kvaddr(dd,
+ txreq,
+ sdma_hdr,
+ sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2));
+ if (unlikely(ret))
+ return ret;
+
+ /* add the ulp payload */
+ return hfi1_ipoib_build_ulp_payload(tx, txp);
+}
+
+static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_ipoib_dev_priv *priv = tx->txq->priv;
+ struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr;
+ struct sk_buff *skb = tx->skb;
+ struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp);
+ struct rdma_ah_attr *ah_attr = txp->ah_attr;
+ struct ib_other_headers *ohdr;
+ struct ib_grh *grh;
+ u16 dwords;
+ u16 slid;
+ u16 dlid;
+ u16 lrh0;
+ u32 bth0;
+ u32 sqpn = (u32)(priv->netdev->dev_addr[1] << 16 |
+ priv->netdev->dev_addr[2] << 8 |
+ priv->netdev->dev_addr[3]);
+ u16 payload_dwords;
+ u8 pad_cnt;
+
+ pad_cnt = -skb->len & 3;
+
+ /* Includes ICRC */
+ payload_dwords = ((skb->len + pad_cnt) >> 2) + SIZE_OF_CRC;
+
+ /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */
+ txp->hdr_dwords = 7;
+
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
+ grh = &sdma_hdr->hdr.ibh.u.l.grh;
+ txp->hdr_dwords +=
+ hfi1_make_grh(txp->ibp,
+ grh,
+ rdma_ah_read_grh(ah_attr),
+ txp->hdr_dwords - LRH_9B_DWORDS,
+ payload_dwords);
+ lrh0 = HFI1_LRH_GRH;
+ ohdr = &sdma_hdr->hdr.ibh.u.l.oth;
+ } else {
+ lrh0 = HFI1_LRH_BTH;
+ ohdr = &sdma_hdr->hdr.ibh.u.oth;
+ }
+
+ lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4;
+ lrh0 |= (txp->flow.sc5 & 0xf) << 12;
+
+ dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B);
+ if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
+ slid = be16_to_cpu(IB_LID_PERMISSIVE);
+ } else {
+ u16 lid = (u16)ppd->lid;
+
+ if (lid) {
+ lid |= rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1);
+ slid = lid;
+ } else {
+ slid = be16_to_cpu(IB_LID_PERMISSIVE);
+ }
+ }
+
+ /* Includes ICRC */
+ dwords = txp->hdr_dwords + payload_dwords;
+
+ /* Build the lrh */
+ sdma_hdr->hdr.hdr_type = HFI1_PKT_TYPE_9B;
+ hfi1_make_ib_hdr(&sdma_hdr->hdr.ibh, lrh0, dwords, dlid, slid);
+
+ /* Build the bth */
+ bth0 = (IB_OPCODE_UD_SEND_ONLY << 24) | (pad_cnt << 20) | priv->pkey;
+
+ ohdr->bth[0] = cpu_to_be32(bth0);
+ ohdr->bth[1] = cpu_to_be32(txp->dqpn);
+ ohdr->bth[2] = cpu_to_be32(mask_psn((u32)txp->txq->tx_ring.sent_txreqs));
+
+ /* Build the deth */
+ ohdr->u.ud.deth[0] = cpu_to_be32(priv->qkey);
+ ohdr->u.ud.deth[1] = cpu_to_be32((txp->entropy <<
+ HFI1_IPOIB_ENTROPY_SHIFT) | sqpn);
+
+ /* Construct the pbc. */
+ sdma_hdr->pbc =
+ cpu_to_le64(create_pbc(ppd,
+ ib_is_sc5(txp->flow.sc5) <<
+ PBC_DC_INFO_SHIFT,
+ 0,
+ sc_to_vlt(priv->dd, txp->flow.sc5),
+ dwords - SIZE_OF_CRC +
+ (sizeof(sdma_hdr->pbc) >> 2)));
+}
+
+static struct ipoib_txreq *hfi1_ipoib_send_dma_common(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ struct hfi1_ipoib_txq *txq = txp->txq;
+ struct ipoib_txreq *tx;
+ struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
+ u32 tail = tx_ring->tail;
+ int ret;
+
+ if (unlikely(!tx_ring->avail)) {
+ u32 head;
+
+ if (hfi1_ipoib_used(txq) >= hfi1_ipoib_ring_hwat(txq))
+ /* This shouldn't happen with a stopped queue */
+ return ERR_PTR(-ENOMEM);
+ /* See hfi1_ipoib_poll_tx_ring() */
+ head = smp_load_acquire(&tx_ring->head);
+ tx_ring->avail =
+ min_t(u32, hfi1_ipoib_ring_hwat(txq),
+ CIRC_CNT(head, tail, tx_ring->max_items));
+ } else {
+ tx_ring->avail--;
+ }
+ tx = hfi1_txreq_from_idx(tx_ring, tail);
+ trace_hfi1_txq_alloc_tx(txq);
+
+ /* so that we can test if the sdma descriptors are there */
+ tx->txreq.num_desc = 0;
+ tx->txq = txq;
+ tx->skb = skb;
+ INIT_LIST_HEAD(&tx->txreq.list);
+
+ hfi1_ipoib_build_ib_tx_headers(tx, txp);
+
+ ret = hfi1_ipoib_build_tx_desc(tx, txp);
+ if (likely(!ret)) {
+ if (txq->flow.as_int != txp->flow.as_int) {
+ txq->flow.tx_queue = txp->flow.tx_queue;
+ txq->flow.sc5 = txp->flow.sc5;
+ txq->sde =
+ sdma_select_engine_sc(priv->dd,
+ txp->flow.tx_queue,
+ txp->flow.sc5);
+ trace_hfi1_flow_switch(txq);
+ }
+
+ return tx;
+ }
+
+ sdma_txclean(priv->dd, &tx->txreq);
+
+ return ERR_PTR(ret);
+}
+
+static int hfi1_ipoib_submit_tx_list(struct net_device *dev,
+ struct hfi1_ipoib_txq *txq)
+{
+ int ret;
+ u16 count_out;
+
+ ret = sdma_send_txlist(txq->sde,
+ iowait_get_ib_work(&txq->wait),
+ &txq->tx_list,
+ &count_out);
+ if (likely(!ret) || ret == -EBUSY || ret == -ECOMM)
+ return ret;
+
+ dd_dev_warn(txq->priv->dd, "cannot send skb tx list, err %d.\n", ret);
+
+ return ret;
+}
+
+static int hfi1_ipoib_flush_tx_list(struct net_device *dev,
+ struct hfi1_ipoib_txq *txq)
+{
+ int ret = 0;
+
+ if (!list_empty(&txq->tx_list)) {
+ /* Flush the current list */
+ ret = hfi1_ipoib_submit_tx_list(dev, txq);
+
+ if (unlikely(ret))
+ if (ret != -EBUSY)
+ ++dev->stats.tx_carrier_errors;
+ }
+
+ return ret;
+}
+
+static int hfi1_ipoib_submit_tx(struct hfi1_ipoib_txq *txq,
+ struct ipoib_txreq *tx)
+{
+ int ret;
+
+ ret = sdma_send_txreq(txq->sde,
+ iowait_get_ib_work(&txq->wait),
+ &tx->txreq,
+ txq->pkts_sent);
+ if (likely(!ret)) {
+ txq->pkts_sent = true;
+ iowait_starve_clear(txq->pkts_sent, &txq->wait);
+ }
+
+ return ret;
+}
+
+static int hfi1_ipoib_send_dma_single(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_ipoib_txq *txq = txp->txq;
+ struct hfi1_ipoib_circ_buf *tx_ring;
+ struct ipoib_txreq *tx;
+ int ret;
+
+ tx = hfi1_ipoib_send_dma_common(dev, skb, txp);
+ if (IS_ERR(tx)) {
+ int ret = PTR_ERR(tx);
+
+ dev_kfree_skb_any(skb);
+
+ if (ret == -ENOMEM)
+ ++dev->stats.tx_errors;
+ else
+ ++dev->stats.tx_carrier_errors;
+
+ return NETDEV_TX_OK;
+ }
+
+ tx_ring = &txq->tx_ring;
+ trace_hfi1_tx_consume(tx, tx_ring->tail);
+ /* consume tx */
+ smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items));
+ ret = hfi1_ipoib_submit_tx(txq, tx);
+ if (likely(!ret)) {
+tx_ok:
+ trace_sdma_output_ibhdr(txq->priv->dd,
+ &tx->sdma_hdr->hdr,
+ ib_is_sc5(txp->flow.sc5));
+ hfi1_ipoib_check_queue_depth(txq);
+ return NETDEV_TX_OK;
+ }
+
+ txq->pkts_sent = false;
+
+ if (ret == -EBUSY || ret == -ECOMM)
+ goto tx_ok;
+
+ /* mark complete and kick napi tx */
+ smp_store_release(&tx->complete, 1);
+ napi_schedule(&tx->txq->napi);
+
+ ++dev->stats.tx_carrier_errors;
+
+ return NETDEV_TX_OK;
+}
+
+static int hfi1_ipoib_send_dma_list(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ipoib_txparms *txp)
+{
+ struct hfi1_ipoib_txq *txq = txp->txq;
+ struct hfi1_ipoib_circ_buf *tx_ring;
+ struct ipoib_txreq *tx;
+
+ /* Has the flow change ? */
+ if (txq->flow.as_int != txp->flow.as_int) {
+ int ret;
+
+ trace_hfi1_flow_flush(txq);
+ ret = hfi1_ipoib_flush_tx_list(dev, txq);
+ if (unlikely(ret)) {
+ if (ret == -EBUSY)
+ ++dev->stats.tx_dropped;
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+ }
+ tx = hfi1_ipoib_send_dma_common(dev, skb, txp);
+ if (IS_ERR(tx)) {
+ int ret = PTR_ERR(tx);
+
+ dev_kfree_skb_any(skb);
+
+ if (ret == -ENOMEM)
+ ++dev->stats.tx_errors;
+ else
+ ++dev->stats.tx_carrier_errors;
+
+ return NETDEV_TX_OK;
+ }
+
+ tx_ring = &txq->tx_ring;
+ trace_hfi1_tx_consume(tx, tx_ring->tail);
+ /* consume tx */
+ smp_store_release(&tx_ring->tail, CIRC_NEXT(tx_ring->tail, tx_ring->max_items));
+ list_add_tail(&tx->txreq.list, &txq->tx_list);
+
+ hfi1_ipoib_check_queue_depth(txq);
+
+ trace_sdma_output_ibhdr(txq->priv->dd,
+ &tx->sdma_hdr->hdr,
+ ib_is_sc5(txp->flow.sc5));
+
+ if (!netdev_xmit_more())
+ (void)hfi1_ipoib_flush_tx_list(dev, txq);
+
+ return NETDEV_TX_OK;
+}
+
+static u8 hfi1_ipoib_calc_entropy(struct sk_buff *skb)
+{
+ if (skb_transport_header_was_set(skb)) {
+ u8 *hdr = (u8 *)skb_transport_header(skb);
+
+ return (hdr[0] ^ hdr[1] ^ hdr[2] ^ hdr[3]);
+ }
+
+ return (u8)skb_get_queue_mapping(skb);
+}
+
+int hfi1_ipoib_send(struct net_device *dev,
+ struct sk_buff *skb,
+ struct ib_ah *address,
+ u32 dqpn)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ struct ipoib_txparms txp;
+ struct rdma_netdev *rn = netdev_priv(dev);
+
+ if (unlikely(skb->len > rn->mtu + HFI1_IPOIB_ENCAP_LEN)) {
+ dd_dev_warn(priv->dd, "packet len %d (> %d) too long to send, dropping\n",
+ skb->len,
+ rn->mtu + HFI1_IPOIB_ENCAP_LEN);
+ ++dev->stats.tx_dropped;
+ ++dev->stats.tx_errors;
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+ }
+
+ txp.dd = priv->dd;
+ txp.ah_attr = &ibah_to_rvtah(address)->attr;
+ txp.ibp = to_iport(priv->device, priv->port_num);
+ txp.txq = &priv->txqs[skb_get_queue_mapping(skb)];
+ txp.dqpn = dqpn;
+ txp.flow.sc5 = txp.ibp->sl_to_sc[rdma_ah_get_sl(txp.ah_attr)];
+ txp.flow.tx_queue = (u8)skb_get_queue_mapping(skb);
+ txp.entropy = hfi1_ipoib_calc_entropy(skb);
+
+ if (netdev_xmit_more() || !list_empty(&txp.txq->tx_list))
+ return hfi1_ipoib_send_dma_list(dev, skb, &txp);
+
+ return hfi1_ipoib_send_dma_single(dev, skb, &txp);
+}
+
+/*
+ * hfi1_ipoib_sdma_sleep - ipoib sdma sleep function
+ *
+ * This function gets called from sdma_send_txreq() when there are not enough
+ * sdma descriptors available to send the packet. It adds Tx queue's wait
+ * structure to sdma engine's dmawait list to be woken up when descriptors
+ * become available.
+ */
+static int hfi1_ipoib_sdma_sleep(struct sdma_engine *sde,
+ struct iowait_work *wait,
+ struct sdma_txreq *txreq,
+ uint seq,
+ bool pkts_sent)
+{
+ struct hfi1_ipoib_txq *txq =
+ container_of(wait->iow, struct hfi1_ipoib_txq, wait);
+
+ write_seqlock(&sde->waitlock);
+
+ if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED)) {
+ if (sdma_progress(sde, seq, txreq)) {
+ write_sequnlock(&sde->waitlock);
+ return -EAGAIN;
+ }
+
+ if (list_empty(&txreq->list))
+ /* came from non-list submit */
+ list_add_tail(&txreq->list, &txq->tx_list);
+ if (list_empty(&txq->wait.list)) {
+ struct hfi1_ibport *ibp = &sde->ppd->ibport_data;
+
+ if (!atomic_xchg(&txq->tx_ring.no_desc, 1)) {
+ trace_hfi1_txq_queued(txq);
+ hfi1_ipoib_stop_txq(txq);
+ }
+ ibp->rvp.n_dmawait++;
+ iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
+ }
+
+ write_sequnlock(&sde->waitlock);
+ return -EBUSY;
+ }
+
+ write_sequnlock(&sde->waitlock);
+ return -EINVAL;
+}
+
+/*
+ * hfi1_ipoib_sdma_wakeup - ipoib sdma wakeup function
+ *
+ * This function gets called when SDMA descriptors becomes available and Tx
+ * queue's wait structure was previously added to sdma engine's dmawait list.
+ */
+static void hfi1_ipoib_sdma_wakeup(struct iowait *wait, int reason)
+{
+ struct hfi1_ipoib_txq *txq =
+ container_of(wait, struct hfi1_ipoib_txq, wait);
+
+ trace_hfi1_txq_wakeup(txq);
+ if (likely(txq->priv->netdev->reg_state == NETREG_REGISTERED))
+ iowait_schedule(wait, system_highpri_wq, WORK_CPU_UNBOUND);
+}
+
+static void hfi1_ipoib_flush_txq(struct work_struct *work)
+{
+ struct iowait_work *ioww =
+ container_of(work, struct iowait_work, iowork);
+ struct iowait *wait = iowait_ioww_to_iow(ioww);
+ struct hfi1_ipoib_txq *txq =
+ container_of(wait, struct hfi1_ipoib_txq, wait);
+ struct net_device *dev = txq->priv->netdev;
+
+ if (likely(dev->reg_state == NETREG_REGISTERED) &&
+ likely(!hfi1_ipoib_flush_tx_list(dev, txq)))
+ if (atomic_xchg(&txq->tx_ring.no_desc, 0))
+ hfi1_ipoib_wake_txq(txq);
+}
+
+int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv)
+{
+ struct net_device *dev = priv->netdev;
+ u32 tx_ring_size, tx_item_size;
+ struct hfi1_ipoib_circ_buf *tx_ring;
+ int i, j;
+
+ /*
+ * Ring holds 1 less than tx_ring_size
+ * Round up to next power of 2 in order to hold at least tx_queue_len
+ */
+ tx_ring_size = roundup_pow_of_two(dev->tx_queue_len + 1);
+ tx_item_size = roundup_pow_of_two(sizeof(struct ipoib_txreq));
+
+ priv->txqs = kcalloc_node(dev->num_tx_queues,
+ sizeof(struct hfi1_ipoib_txq),
+ GFP_KERNEL,
+ priv->dd->node);
+ if (!priv->txqs)
+ return -ENOMEM;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+ struct ipoib_txreq *tx;
+
+ tx_ring = &txq->tx_ring;
+ iowait_init(&txq->wait,
+ 0,
+ hfi1_ipoib_flush_txq,
+ NULL,
+ hfi1_ipoib_sdma_sleep,
+ hfi1_ipoib_sdma_wakeup,
+ NULL,
+ NULL);
+ txq->priv = priv;
+ txq->sde = NULL;
+ INIT_LIST_HEAD(&txq->tx_list);
+ atomic_set(&txq->tx_ring.stops, 0);
+ atomic_set(&txq->tx_ring.ring_full, 0);
+ atomic_set(&txq->tx_ring.no_desc, 0);
+ txq->q_idx = i;
+ txq->flow.tx_queue = 0xff;
+ txq->flow.sc5 = 0xff;
+ txq->pkts_sent = false;
+
+ netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
+ priv->dd->node);
+
+ txq->tx_ring.items =
+ kvzalloc_node(array_size(tx_ring_size, tx_item_size),
+ GFP_KERNEL, priv->dd->node);
+ if (!txq->tx_ring.items)
+ goto free_txqs;
+
+ txq->tx_ring.max_items = tx_ring_size;
+ txq->tx_ring.shift = ilog2(tx_item_size);
+ txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq);
+ tx_ring = &txq->tx_ring;
+ for (j = 0; j < tx_ring_size; j++) {
+ hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr =
+ kzalloc_node(sizeof(*tx->sdma_hdr),
+ GFP_KERNEL, priv->dd->node);
+ if (!hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr)
+ goto free_txqs;
+ }
+
+ netif_napi_add_tx(dev, &txq->napi, hfi1_ipoib_poll_tx_ring);
+ }
+
+ return 0;
+
+free_txqs:
+ for (i--; i >= 0; i--) {
+ struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+ netif_napi_del(&txq->napi);
+ tx_ring = &txq->tx_ring;
+ for (j = 0; j < tx_ring_size; j++)
+ kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr);
+ kvfree(tx_ring->items);
+ }
+
+ kfree(priv->txqs);
+ priv->txqs = NULL;
+ return -ENOMEM;
+}
+
+static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq)
+{
+ struct sdma_txreq *txreq;
+ struct sdma_txreq *txreq_tmp;
+
+ list_for_each_entry_safe(txreq, txreq_tmp, &txq->tx_list, list) {
+ struct ipoib_txreq *tx =
+ container_of(txreq, struct ipoib_txreq, txreq);
+
+ list_del(&txreq->list);
+ sdma_txclean(txq->priv->dd, &tx->txreq);
+ dev_kfree_skb_any(tx->skb);
+ tx->skb = NULL;
+ txq->tx_ring.complete_txreqs++;
+ }
+
+ if (hfi1_ipoib_used(txq))
+ dd_dev_warn(txq->priv->dd,
+ "txq %d not empty found %u requests\n",
+ txq->q_idx,
+ hfi1_ipoib_txreqs(txq->tx_ring.sent_txreqs,
+ txq->tx_ring.complete_txreqs));
+}
+
+void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv)
+{
+ int i, j;
+
+ for (i = 0; i < priv->netdev->num_tx_queues; i++) {
+ struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+ struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring;
+
+ iowait_cancel_work(&txq->wait);
+ iowait_sdma_drain(&txq->wait);
+ hfi1_ipoib_drain_tx_list(txq);
+ netif_napi_del(&txq->napi);
+ hfi1_ipoib_drain_tx_ring(txq);
+ for (j = 0; j < tx_ring->max_items; j++)
+ kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr);
+ kvfree(tx_ring->items);
+ }
+
+ kfree(priv->txqs);
+ priv->txqs = NULL;
+}
+
+void hfi1_ipoib_napi_tx_enable(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+ napi_enable(&txq->napi);
+ }
+}
+
+void hfi1_ipoib_napi_tx_disable(struct net_device *dev)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct hfi1_ipoib_txq *txq = &priv->txqs[i];
+
+ napi_disable(&txq->napi);
+ hfi1_ipoib_drain_tx_ring(txq);
+ }
+}
+
+void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q)
+{
+ struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
+ struct hfi1_ipoib_txq *txq = &priv->txqs[q];
+
+ dd_dev_info(priv->dd, "timeout txq %p q %u stopped %u stops %d no_desc %d ring_full %d\n",
+ txq, q,
+ __netif_subqueue_stopped(dev, txq->q_idx),
+ atomic_read(&txq->tx_ring.stops),
+ atomic_read(&txq->tx_ring.no_desc),
+ atomic_read(&txq->tx_ring.ring_full));
+ dd_dev_info(priv->dd, "sde %p engine %u\n",
+ txq->sde,
+ txq->sde ? txq->sde->this_idx : 0);
+ dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int);
+ dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n",
+ txq->tx_ring.sent_txreqs, txq->tx_ring.complete_txreqs,
+ hfi1_ipoib_used(txq));
+ dd_dev_info(priv->dd, "tx_queue_len %u max_items %u\n",
+ dev->tx_queue_len, txq->tx_ring.max_items);
+ dd_dev_info(priv->dd, "head %u tail %u\n",
+ txq->tx_ring.head, txq->tx_ring.tail);
+ dd_dev_info(priv->dd, "wait queued %u\n",
+ !list_empty(&txq->wait.list));
+ dd_dev_info(priv->dd, "tx_list empty %u\n",
+ list_empty(&txq->tx_list));
+}
+
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 6e595afca24c..961fa07116f0 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1,51 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015-2018 Intel Corporation.
*/
#include <linux/net.h>
+#include <rdma/opa_addr.h>
#define OPA_NUM_PKEY_BLOCKS_PER_SMP (OPA_SMP_DR_DATA_SIZE \
/ (OPA_PARTITION_TABLE_BLK_SIZE * sizeof(u16)))
@@ -53,11 +12,30 @@
#include "mad.h"
#include "trace.h"
#include "qp.h"
+#include "vnic.h"
/* the reset value from the FM is supposed to be 0xffff, handle both */
#define OPA_LINK_WIDTH_RESET_OLD 0x0fff
#define OPA_LINK_WIDTH_RESET 0xffff
+struct trap_node {
+ struct list_head list;
+ struct opa_mad_notice_attr data;
+ __be64 tid;
+ int len;
+ u32 retry;
+ u8 in_use;
+ u8 repress;
+};
+
+static int smp_length_check(u32 data_size, u32 request_len)
+{
+ if (unlikely(request_len < data_size))
+ return -EINVAL;
+
+ return 0;
+}
+
static int reply(struct ib_mad_hdr *smp)
{
/*
@@ -78,7 +56,17 @@ static inline void clear_opa_smp_data(struct opa_smp *smp)
memset(data, 0, size);
}
-void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
+static u16 hfi1_lookup_pkey_value(struct hfi1_ibport *ibp, int pkey_idx)
+{
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+
+ if (pkey_idx < ARRAY_SIZE(ppd->pkeys))
+ return ppd->pkeys[pkey_idx];
+
+ return 0;
+}
+
+void hfi1_event_pkey_change(struct hfi1_devdata *dd, u32 port)
{
struct ib_event event;
@@ -88,28 +76,222 @@ void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
ib_dispatch_event(&event);
}
-static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
+/*
+ * If the port is down, clean up all pending traps. We need to be careful
+ * with the given trap, because it may be queued.
+ */
+static void cleanup_traps(struct hfi1_ibport *ibp, struct trap_node *trap)
+{
+ struct trap_node *node, *q;
+ unsigned long flags;
+ struct list_head trap_list;
+ int i;
+
+ for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
+ list_replace_init(&ibp->rvp.trap_lists[i].list, &trap_list);
+ ibp->rvp.trap_lists[i].list_len = 0;
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+
+ /*
+ * Remove all items from the list, freeing all the non-given
+ * traps.
+ */
+ list_for_each_entry_safe(node, q, &trap_list, list) {
+ list_del(&node->list);
+ if (node != trap)
+ kfree(node);
+ }
+ }
+
+ /*
+ * If this wasn't on one of the lists it would not be freed. If it
+ * was on the list, it is now safe to free.
+ */
+ kfree(trap);
+}
+
+static struct trap_node *check_and_add_trap(struct hfi1_ibport *ibp,
+ struct trap_node *trap)
+{
+ struct trap_node *node;
+ struct trap_list *trap_list;
+ unsigned long flags;
+ unsigned long timeout;
+ int found = 0;
+ unsigned int queue_id;
+ static int trap_count;
+
+ queue_id = trap->data.generic_type & 0x0F;
+ if (queue_id >= RVT_MAX_TRAP_LISTS) {
+ trap_count++;
+ pr_err_ratelimited("hfi1: Invalid trap 0x%0x dropped. Total dropped: %d\n",
+ trap->data.generic_type, trap_count);
+ kfree(trap);
+ return NULL;
+ }
+
+ /*
+ * Since the retry (handle timeout) does not remove a trap request
+ * from the list, all we have to do is compare the node.
+ */
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
+ trap_list = &ibp->rvp.trap_lists[queue_id];
+
+ list_for_each_entry(node, &trap_list->list, list) {
+ if (node == trap) {
+ node->retry++;
+ found = 1;
+ break;
+ }
+ }
+
+ /* If it is not on the list, add it, limited to RVT-MAX_TRAP_LEN. */
+ if (!found) {
+ if (trap_list->list_len < RVT_MAX_TRAP_LEN) {
+ trap_list->list_len++;
+ list_add_tail(&trap->list, &trap_list->list);
+ } else {
+ pr_warn_ratelimited("hfi1: Maximum trap limit reached for 0x%0x traps\n",
+ trap->data.generic_type);
+ kfree(trap);
+ }
+ }
+
+ /*
+ * Next check to see if there is a timer pending. If not, set it up
+ * and get the first trap from the list.
+ */
+ node = NULL;
+ if (!timer_pending(&ibp->rvp.trap_timer)) {
+ /*
+ * o14-2
+ * If the time out is set we have to wait until it expires
+ * before the trap can be sent.
+ * This should be > RVT_TRAP_TIMEOUT
+ */
+ timeout = (RVT_TRAP_TIMEOUT *
+ (1UL << ibp->rvp.subnet_timeout)) / 1000;
+ mod_timer(&ibp->rvp.trap_timer,
+ jiffies + usecs_to_jiffies(timeout));
+ node = list_first_entry(&trap_list->list, struct trap_node,
+ list);
+ node->in_use = 1;
+ }
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+
+ return node;
+}
+
+static void subn_handle_opa_trap_repress(struct hfi1_ibport *ibp,
+ struct opa_smp *smp)
+{
+ struct trap_list *trap_list;
+ struct trap_node *trap;
+ unsigned long flags;
+ int i;
+
+ if (smp->attr_id != IB_SMP_ATTR_NOTICE)
+ return;
+
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
+ for (i = 0; i < RVT_MAX_TRAP_LISTS; i++) {
+ trap_list = &ibp->rvp.trap_lists[i];
+ trap = list_first_entry_or_null(&trap_list->list,
+ struct trap_node, list);
+ if (trap && trap->tid == smp->tid) {
+ if (trap->in_use) {
+ trap->repress = 1;
+ } else {
+ trap_list->list_len--;
+ list_del(&trap->list);
+ kfree(trap);
+ }
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+}
+
+static void hfi1_update_sm_ah_attr(struct hfi1_ibport *ibp,
+ struct rdma_ah_attr *attr, u32 dlid)
+{
+ rdma_ah_set_dlid(attr, dlid);
+ rdma_ah_set_port_num(attr, ppd_from_ibp(ibp)->port);
+ if (dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+ struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
+
+ rdma_ah_set_ah_flags(attr, IB_AH_GRH);
+ grh->sgid_index = 0;
+ grh->hop_limit = 1;
+ grh->dgid.global.subnet_prefix =
+ ibp->rvp.gid_prefix;
+ grh->dgid.global.interface_id = OPA_MAKE_ID(dlid);
+ }
+}
+
+static int hfi1_modify_qp0_ah(struct hfi1_ibport *ibp,
+ struct rvt_ah *ah, u32 dlid)
+{
+ struct rdma_ah_attr attr;
+ struct rvt_qp *qp0;
+ int ret = -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.type = ah->ibah.type;
+ hfi1_update_sm_ah_attr(ibp, &attr, dlid);
+ rcu_read_lock();
+ qp0 = rcu_dereference(ibp->rvp.qp[0]);
+ if (qp0)
+ ret = rdma_modify_ah(&ah->ibah, &attr);
+ rcu_read_unlock();
+ return ret;
+}
+
+static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid)
+{
+ struct rdma_ah_attr attr;
+ struct ib_ah *ah = ERR_PTR(-EINVAL);
+ struct rvt_qp *qp0;
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ struct hfi1_devdata *dd = dd_from_ppd(ppd);
+ u32 port_num = ppd->port;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.type = rdma_ah_find_type(&dd->verbs_dev.rdi.ibdev, port_num);
+ hfi1_update_sm_ah_attr(ibp, &attr, dlid);
+ rcu_read_lock();
+ qp0 = rcu_dereference(ibp->rvp.qp[0]);
+ if (qp0)
+ ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0);
+ rcu_read_unlock();
+ return ah;
+}
+
+static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap)
{
struct ib_mad_send_buf *send_buf;
struct ib_mad_agent *agent;
struct opa_smp *smp;
- int ret;
unsigned long flags;
- unsigned long timeout;
int pkey_idx;
u32 qpn = ppd_from_ibp(ibp)->sm_trap_qp;
agent = ibp->rvp.send_agent;
- if (!agent)
+ if (!agent) {
+ cleanup_traps(ibp, trap);
return;
+ }
/* o14-3.2.1 */
- if (ppd_from_ibp(ibp)->lstate != IB_PORT_ACTIVE)
+ if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE) {
+ cleanup_traps(ibp, trap);
return;
+ }
- /* o14-2 */
- if (ibp->rvp.trap_timeout && time_before(jiffies,
- ibp->rvp.trap_timeout))
+ /* Add the trap to the list if necessary and see if we can send it */
+ trap = check_and_add_trap(ibp, trap);
+ if (!trap)
return;
pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
@@ -130,11 +312,21 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
smp->class_version = OPA_SM_CLASS_VERSION;
smp->method = IB_MGMT_METHOD_TRAP;
- ibp->rvp.tid++;
- smp->tid = cpu_to_be64(ibp->rvp.tid);
+
+ /* Only update the transaction ID for new traps (o13-5). */
+ if (trap->tid == 0) {
+ ibp->rvp.tid++;
+ /* make sure that tid != 0 */
+ if (ibp->rvp.tid == 0)
+ ibp->rvp.tid++;
+ trap->tid = cpu_to_be64(ibp->rvp.tid);
+ }
+ smp->tid = trap->tid;
+
smp->attr_id = IB_SMP_ATTR_NOTICE;
/* o14-1: smp->mkey = 0; */
- memcpy(smp->route.lid.data, data, len);
+
+ memcpy(smp->route.lid.data, &trap->data, trap->len);
spin_lock_irqsave(&ibp->rvp.lock, flags);
if (!ibp->rvp.sm_ah) {
@@ -143,65 +335,101 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
ah = hfi1_create_qp0_ah(ibp, ibp->rvp.sm_lid);
if (IS_ERR(ah)) {
- ret = PTR_ERR(ah);
- } else {
- send_buf->ah = ah;
- ibp->rvp.sm_ah = ibah_to_rvtah(ah);
- ret = 0;
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+ return;
}
+ send_buf->ah = ah;
+ ibp->rvp.sm_ah = ibah_to_rvtah(ah);
} else {
- ret = -EINVAL;
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+ return;
}
} else {
send_buf->ah = &ibp->rvp.sm_ah->ibah;
- ret = 0;
}
+
+ /*
+ * If the trap was repressed while things were getting set up, don't
+ * bother sending it. This could happen for a retry.
+ */
+ if (trap->repress) {
+ list_del(&trap->list);
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+ kfree(trap);
+ ib_free_send_mad(send_buf);
+ return;
+ }
+
+ trap->in_use = 0;
spin_unlock_irqrestore(&ibp->rvp.lock, flags);
- if (!ret)
- ret = ib_post_send_mad(send_buf, NULL);
- if (!ret) {
- /* 4.096 usec. */
- timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
- ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
- } else {
+ if (ib_post_send_mad(send_buf, NULL))
ib_free_send_mad(send_buf);
- ibp->rvp.trap_timeout = 0;
+}
+
+void hfi1_handle_trap_timer(struct timer_list *t)
+{
+ struct hfi1_ibport *ibp = timer_container_of(ibp, t, rvp.trap_timer);
+ struct trap_node *trap = NULL;
+ unsigned long flags;
+ int i;
+
+ /* Find the trap with the highest priority */
+ spin_lock_irqsave(&ibp->rvp.lock, flags);
+ for (i = 0; !trap && i < RVT_MAX_TRAP_LISTS; i++) {
+ trap = list_first_entry_or_null(&ibp->rvp.trap_lists[i].list,
+ struct trap_node, list);
}
+ spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+
+ if (trap)
+ send_trap(ibp, trap);
+}
+
+static struct trap_node *create_trap_node(u8 type, __be16 trap_num, u32 lid)
+{
+ struct trap_node *trap;
+
+ trap = kzalloc(sizeof(*trap), GFP_ATOMIC);
+ if (!trap)
+ return NULL;
+
+ INIT_LIST_HEAD(&trap->list);
+ trap->data.generic_type = type;
+ trap->data.prod_type_lsb = IB_NOTICE_PROD_CA;
+ trap->data.trap_num = trap_num;
+ trap->data.issuer_lid = cpu_to_be32(lid);
+
+ return trap;
}
/*
- * Send a bad [PQ]_Key trap (ch. 14.3.8).
+ * Send a bad P_Key trap (ch. 14.3.8).
*/
-void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
- u32 qp1, u32 qp2, u16 lid1, u16 lid2)
+void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
+ u32 qp1, u32 qp2, u32 lid1, u32 lid2)
{
- struct opa_mad_notice_attr data;
+ struct trap_node *trap;
u32 lid = ppd_from_ibp(ibp)->lid;
- u32 _lid1 = lid1;
- u32 _lid2 = lid2;
-
- memset(&data, 0, sizeof(data));
- if (trap_num == OPA_TRAP_BAD_P_KEY)
- ibp->rvp.pkey_violations++;
- else
- ibp->rvp.qkey_violations++;
ibp->rvp.n_pkt_drops++;
+ ibp->rvp.pkey_violations++;
+
+ trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_P_KEY,
+ lid);
+ if (!trap)
+ return;
/* Send violation trap */
- data.generic_type = IB_NOTICE_TYPE_SECURITY;
- data.prod_type_lsb = IB_NOTICE_PROD_CA;
- data.trap_num = trap_num;
- data.issuer_lid = cpu_to_be32(lid);
- data.ntc_257_258.lid1 = cpu_to_be32(_lid1);
- data.ntc_257_258.lid2 = cpu_to_be32(_lid2);
- data.ntc_257_258.key = cpu_to_be32(key);
- data.ntc_257_258.sl = sl << 3;
- data.ntc_257_258.qp1 = cpu_to_be32(qp1);
- data.ntc_257_258.qp2 = cpu_to_be32(qp2);
-
- send_trap(ibp, &data, sizeof(data));
+ trap->data.ntc_257_258.lid1 = cpu_to_be32(lid1);
+ trap->data.ntc_257_258.lid2 = cpu_to_be32(lid2);
+ trap->data.ntc_257_258.key = cpu_to_be32(key);
+ trap->data.ntc_257_258.sl = sl << 3;
+ trap->data.ntc_257_258.qp1 = cpu_to_be32(qp1);
+ trap->data.ntc_257_258.qp2 = cpu_to_be32(qp2);
+
+ trap->len = sizeof(trap->data);
+ send_trap(ibp, trap);
}
/*
@@ -210,57 +438,61 @@ void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
static void bad_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
__be64 mkey, __be32 dr_slid, u8 return_path[], u8 hop_cnt)
{
- struct opa_mad_notice_attr data;
+ struct trap_node *trap;
u32 lid = ppd_from_ibp(ibp)->lid;
- memset(&data, 0, sizeof(data));
+ trap = create_trap_node(IB_NOTICE_TYPE_SECURITY, OPA_TRAP_BAD_M_KEY,
+ lid);
+ if (!trap)
+ return;
+
/* Send violation trap */
- data.generic_type = IB_NOTICE_TYPE_SECURITY;
- data.prod_type_lsb = IB_NOTICE_PROD_CA;
- data.trap_num = OPA_TRAP_BAD_M_KEY;
- data.issuer_lid = cpu_to_be32(lid);
- data.ntc_256.lid = data.issuer_lid;
- data.ntc_256.method = mad->method;
- data.ntc_256.attr_id = mad->attr_id;
- data.ntc_256.attr_mod = mad->attr_mod;
- data.ntc_256.mkey = mkey;
+ trap->data.ntc_256.lid = trap->data.issuer_lid;
+ trap->data.ntc_256.method = mad->method;
+ trap->data.ntc_256.attr_id = mad->attr_id;
+ trap->data.ntc_256.attr_mod = mad->attr_mod;
+ trap->data.ntc_256.mkey = mkey;
if (mad->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
- data.ntc_256.dr_slid = dr_slid;
- data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
- if (hop_cnt > ARRAY_SIZE(data.ntc_256.dr_rtn_path)) {
- data.ntc_256.dr_trunc_hop |=
+ trap->data.ntc_256.dr_slid = dr_slid;
+ trap->data.ntc_256.dr_trunc_hop = IB_NOTICE_TRAP_DR_NOTICE;
+ if (hop_cnt > ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path)) {
+ trap->data.ntc_256.dr_trunc_hop |=
IB_NOTICE_TRAP_DR_TRUNC;
- hop_cnt = ARRAY_SIZE(data.ntc_256.dr_rtn_path);
+ hop_cnt = ARRAY_SIZE(trap->data.ntc_256.dr_rtn_path);
}
- data.ntc_256.dr_trunc_hop |= hop_cnt;
- memcpy(data.ntc_256.dr_rtn_path, return_path,
+ trap->data.ntc_256.dr_trunc_hop |= hop_cnt;
+ memcpy(trap->data.ntc_256.dr_rtn_path, return_path,
hop_cnt);
}
- send_trap(ibp, &data, sizeof(data));
+ trap->len = sizeof(trap->data);
+
+ send_trap(ibp, trap);
}
/*
* Send a Port Capability Mask Changed trap (ch. 14.3.11).
*/
-void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
+void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u32 port_num)
{
- struct opa_mad_notice_attr data;
+ struct trap_node *trap;
struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
u32 lid = ppd_from_ibp(ibp)->lid;
- memset(&data, 0, sizeof(data));
+ trap = create_trap_node(IB_NOTICE_TYPE_INFO,
+ OPA_TRAP_CHANGE_CAPABILITY,
+ lid);
+ if (!trap)
+ return;
- data.generic_type = IB_NOTICE_TYPE_INFO;
- data.prod_type_lsb = IB_NOTICE_PROD_CA;
- data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
- data.issuer_lid = cpu_to_be32(lid);
- data.ntc_144.lid = data.issuer_lid;
- data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
+ trap->data.ntc_144.lid = trap->data.issuer_lid;
+ trap->data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
+ trap->data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags);
- send_trap(ibp, &data, sizeof(data));
+ trap->len = sizeof(trap->data);
+ send_trap(ibp, trap);
}
/*
@@ -268,19 +500,19 @@ void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
*/
void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
{
- struct opa_mad_notice_attr data;
+ struct trap_node *trap;
u32 lid = ppd_from_ibp(ibp)->lid;
- memset(&data, 0, sizeof(data));
+ trap = create_trap_node(IB_NOTICE_TYPE_INFO, OPA_TRAP_CHANGE_SYSGUID,
+ lid);
+ if (!trap)
+ return;
- data.generic_type = IB_NOTICE_TYPE_INFO;
- data.prod_type_lsb = IB_NOTICE_PROD_CA;
- data.trap_num = OPA_TRAP_CHANGE_SYSGUID;
- data.issuer_lid = cpu_to_be32(lid);
- data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
- data.ntc_145.lid = data.issuer_lid;
+ trap->data.ntc_145.new_sys_guid = ib_hfi1_sys_image_guid;
+ trap->data.ntc_145.lid = trap->data.issuer_lid;
- send_trap(ibp, &data, sizeof(data));
+ trap->len = sizeof(trap->data);
+ send_trap(ibp, trap);
}
/*
@@ -288,29 +520,30 @@ void hfi1_sys_guid_chg(struct hfi1_ibport *ibp)
*/
void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
{
- struct opa_mad_notice_attr data;
+ struct trap_node *trap;
u32 lid = ppd_from_ibp(ibp)->lid;
- memset(&data, 0, sizeof(data));
+ trap = create_trap_node(IB_NOTICE_TYPE_INFO,
+ OPA_TRAP_CHANGE_CAPABILITY,
+ lid);
+ if (!trap)
+ return;
- data.generic_type = IB_NOTICE_TYPE_INFO;
- data.prod_type_lsb = IB_NOTICE_PROD_CA;
- data.trap_num = OPA_TRAP_CHANGE_CAPABILITY;
- data.issuer_lid = cpu_to_be32(lid);
- data.ntc_144.lid = data.issuer_lid;
- data.ntc_144.change_flags =
+ trap->data.ntc_144.lid = trap->data.issuer_lid;
+ trap->data.ntc_144.change_flags =
cpu_to_be16(OPA_NOTICE_TRAP_NODE_DESC_CHG);
- send_trap(ibp, &data, sizeof(data));
+ trap->len = sizeof(trap->data);
+ send_trap(ibp, trap);
}
static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u32 port, u32 *resp_len, u32 max_len)
{
struct opa_node_description *nd;
- if (am) {
+ if (am || smp_length_check(sizeof(*nd), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -326,17 +559,18 @@ static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
}
static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct opa_node_info *ni;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
+ u32 pidx = port - 1; /* IB number port from 1, hw from 0 */
ni = (struct opa_node_info *)data;
/* GUID 0 is illegal */
if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
+ smp_length_check(sizeof(*ni), max_len) ||
get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
@@ -365,11 +599,11 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
}
static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
- u8 port)
+ u32 port)
{
struct ib_node_info *nip = (struct ib_node_info *)&smp->data;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- unsigned pidx = port - 1; /* IB number port from 1, hw from 0 */
+ u32 pidx = port - 1; /* IB number port from 1, hw from 0 */
/* GUID 0 is illegal */
if (smp->attr_mod || pidx >= dd->num_pports ||
@@ -445,6 +679,7 @@ static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad,
/* Bad mkey not a violation below level 2 */
if (ibp->rvp.mkeyprot < 2)
break;
+ fallthrough;
case IB_MGMT_METHOD_SET:
case IB_MGMT_METHOD_TRAP_REPRESS:
if (ibp->rvp.mkey_violations != 0xFFFF)
@@ -517,8 +752,8 @@ void read_ltp_rtt(struct hfi1_devdata *dd)
}
static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
int i;
struct hfi1_devdata *dd;
@@ -534,7 +769,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
u32 buffer_units;
u64 tmp = 0;
- if (num_ports != 1) {
+ if (num_ports != 1 || smp_length_check(sizeof(*pi), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -604,7 +839,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
ppd->offline_disabled_reason;
pi->port_states.portphysstate_portstate =
- (hfi1_ibphys_portstate(ppd) << 4) | state;
+ (driver_pstate(ppd) << 4) | state;
pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
@@ -650,9 +885,11 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE : 0);
pi->port_packet_format.supported =
- cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
+ cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
+ OPA_PORT_PACKET_FORMAT_16B);
pi->port_packet_format.enabled =
- cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B);
+ cpu_to_be16(OPA_PORT_PACKET_FORMAT_9B |
+ OPA_PORT_PACKET_FORMAT_16B);
/* flit_control.interleave is (OPA V1, version .76):
* bits use
@@ -701,7 +938,9 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
pi->buffer_units = cpu_to_be32(buffer_units);
- pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported);
+ pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags);
+ pi->collectivemask_multicastmask = ((OPA_COLLECTIVE_NR & 0x7)
+ << 3 | (OPA_MCAST_NR & 0x7));
/* HFI supports a replay buffer 128 LTPs in size */
pi->replay_depth.buffer = 0x80;
@@ -728,7 +967,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
* @port: the IB port number
* @pkeys: the pkey table is placed here
*/
-static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
+static int get_pkeys(struct hfi1_devdata *dd, u32 port, u16 *pkeys)
{
struct hfi1_pportdata *ppd = dd->pport + port - 1;
@@ -738,8 +977,8 @@ static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
}
static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u32 n_blocks_req = OPA_AM_NBLK(am);
@@ -762,6 +1001,11 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
+ if (smp_length_check(size, max_len)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
if (start_block + n_blocks_req > n_blocks_avail ||
n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
@@ -906,8 +1150,8 @@ static int physical_transition_allowed(int old, int new)
static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
u32 logical_new, u32 physical_new)
{
- u32 physical_old = driver_physical_state(ppd);
- u32 logical_old = driver_logical_state(ppd);
+ u32 physical_old = driver_pstate(ppd);
+ u32 logical_old = driver_lstate(ppd);
int ret, logical_allowed, physical_allowed;
ret = logical_transition_allowed(logical_old, logical_new);
@@ -916,8 +1160,8 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
if (ret == HFI_TRANSITION_DISALLOWED ||
ret == HFI_TRANSITION_UNDEFINED) {
pr_warn("invalid logical state transition %s -> %s\n",
- opa_lstate_name(logical_old),
- opa_lstate_name(logical_new));
+ ib_port_state_to_str(logical_old),
+ ib_port_state_to_str(logical_new));
return ret;
}
@@ -952,8 +1196,7 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
}
static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
- u32 logical_state, u32 phys_state,
- int suppress_idle_sma)
+ u32 logical_state, u32 phys_state, int local_mad)
{
struct hfi1_devdata *dd = ppd->dd;
u32 link_state;
@@ -987,7 +1230,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
case IB_PORT_NOP:
if (phys_state == IB_PORTPHYSSTATE_NOP)
break;
- /* FALLTHROUGH */
+ fallthrough;
case IB_PORT_DOWN:
if (phys_state == IB_PORTPHYSSTATE_NOP) {
link_state = HLS_DN_DOWNDEF;
@@ -1029,12 +1272,12 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
* Don't send a reply if the response would be sent
* through the disabled port.
*/
- if (link_state == HLS_DN_DISABLE && smp->hop_cnt)
+ if (link_state == HLS_DN_DISABLE && !local_mad)
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
break;
case IB_PORT_ARMED:
ret = set_link_state(ppd, HLS_UP_ARMED);
- if ((ret == 0) && (suppress_idle_sma == 0))
+ if (!ret)
send_idle_sma(dd, SMA_IDLE_ARM);
break;
case IB_PORT_ACTIVE:
@@ -1056,7 +1299,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
return 0;
}
-/**
+/*
* subn_set_opa_portinfo - set port information
* @smp: the incoming SM packet
* @ibdev: the infiniband device
@@ -1064,8 +1307,8 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
*
*/
static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len, int local_mad)
{
struct opa_port_info *pi = (struct opa_port_info *)data;
struct ib_event event;
@@ -1074,8 +1317,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
struct hfi1_ibport *ibp;
u8 clientrereg;
unsigned long flags;
- u32 smlid, opa_lid; /* tmp vars to hold LID values */
- u16 lid;
+ u32 smlid;
+ u32 lid;
u8 ls_old, ls_new, ps_new;
u8 vls;
u8 msl;
@@ -1086,27 +1329,26 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
int ret, i, invalid = 0, call_set_mtu = 0;
int call_link_downgrade_policy = 0;
- if (num_ports != 1) {
+ if (num_ports != 1 ||
+ smp_length_check(sizeof(*pi), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
- opa_lid = be32_to_cpu(pi->lid);
- if (opa_lid & 0xFFFF0000) {
- pr_warn("OPA_PortInfo lid out of range: %X\n", opa_lid);
+ lid = be32_to_cpu(pi->lid);
+ if (lid & 0xFF000000) {
+ pr_warn("OPA_PortInfo lid out of range: %X\n", lid);
smp->status |= IB_SMP_INVALID_FIELD;
goto get_only;
}
- lid = (u16)(opa_lid & 0x0000FFFF);
smlid = be32_to_cpu(pi->sm_lid);
- if (smlid & 0xFFFF0000) {
+ if (smlid & 0xFF000000) {
pr_warn("OPA_PortInfo SM lid out of range: %X\n", smlid);
smp->status |= IB_SMP_INVALID_FIELD;
goto get_only;
}
- smlid &= 0x0000FFFF;
clientrereg = (pi->clientrereg_subnettimeout &
OPA_PI_MASK_CLIENT_REREGISTER);
@@ -1121,12 +1363,16 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
ls_old = driver_lstate(ppd);
ibp->rvp.mkey = pi->mkey;
- ibp->rvp.gid_prefix = pi->subnet_prefix;
+ if (ibp->rvp.gid_prefix != pi->subnet_prefix) {
+ ibp->rvp.gid_prefix = pi->subnet_prefix;
+ event.event = IB_EVENT_GID_CHANGE;
+ ib_dispatch_event(&event);
+ }
ibp->rvp.mkey_lease_period = be16_to_cpu(pi->mkey_lease_period);
/* Must be a valid unicast LID address. */
if ((lid == 0 && ls_old > IB_PORT_INIT) ||
- lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+ (hfi1_is_16B_mcast(lid))) {
smp->status |= IB_SMP_INVALID_FIELD;
pr_warn("SubnSet(OPA_PortInfo) lid invalid 0x%x\n",
lid);
@@ -1139,6 +1385,16 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
hfi1_set_lid(ppd, lid, pi->mkeyprotect_lmc & OPA_PI_MASK_LMC);
event.event = IB_EVENT_LID_CHANGE;
ib_dispatch_event(&event);
+
+ if (HFI1_PORT_GUID_INDEX + 1 < HFI1_GUIDS_PER_PORT) {
+ /* Manufacture GID from LID to support extended
+ * addresses
+ */
+ ppd->guids[HFI1_PORT_GUID_INDEX + 1] =
+ be64_to_cpu(OPA_MAKE_ID(lid));
+ event.event = IB_EVENT_GID_CHANGE;
+ ib_dispatch_event(&event);
+ }
}
msl = pi->smsl & OPA_PI_MASK_SMSL;
@@ -1146,20 +1402,10 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
ppd->linkinit_reason =
(pi->partenforce_filterraw &
OPA_PI_MASK_LINKINIT_REASON);
- /* enable/disable SW pkey checking as per FM control */
- if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_IN)
- ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
- else
- ppd->part_enforce &= ~HFI1_PART_ENFORCE_IN;
-
- if (pi->partenforce_filterraw & OPA_PI_MASK_PARTITION_ENFORCE_OUT)
- ppd->part_enforce |= HFI1_PART_ENFORCE_OUT;
- else
- ppd->part_enforce &= ~HFI1_PART_ENFORCE_OUT;
/* Must be a valid unicast LID address. */
if ((smlid == 0 && ls_old > IB_PORT_INIT) ||
- smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+ (hfi1_is_16B_mcast(smlid))) {
smp->status |= IB_SMP_INVALID_FIELD;
pr_warn("SubnSet(OPA_PortInfo) smlid invalid 0x%x\n", smlid);
} else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
@@ -1167,9 +1413,9 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
spin_lock_irqsave(&ibp->rvp.lock, flags);
if (ibp->rvp.sm_ah) {
if (smlid != ibp->rvp.sm_lid)
- ibp->rvp.sm_ah->attr.dlid = smlid;
+ hfi1_modify_qp0_ah(ibp, ibp->rvp.sm_ah, smlid);
if (msl != ibp->rvp.sm_sl)
- ibp->rvp.sm_ah->attr.sl = msl;
+ rdma_ah_set_sl(&ibp->rvp.sm_ah->attr, msl);
}
spin_unlock_irqrestore(&ibp->rvp.lock, flags);
if (smlid != ibp->rvp.sm_lid)
@@ -1325,8 +1571,10 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
ppd->is_sm_config_started = 1;
} else if (ls_new == IB_PORT_ARMED) {
- if (ppd->is_sm_config_started == 0)
+ if (ppd->is_sm_config_started == 0) {
invalid = 1;
+ smp->status |= IB_SMP_INVALID_FIELD;
+ }
}
}
@@ -1343,11 +1591,14 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
* is down or is being set to down.
*/
- ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
- if (ret)
- return ret;
+ if (!invalid) {
+ ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad);
+ if (ret)
+ return ret;
+ }
- ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
+ ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
+ max_len);
/* restore re-reg bit per o14-12.2.1 */
pi->clientrereg_subnettimeout |= clientrereg;
@@ -1364,7 +1615,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
return ret;
get_only:
- return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
/**
@@ -1373,7 +1625,7 @@ get_only:
* @port: the IB port number
* @pkeys: the PKEY table
*/
-static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
+static int set_pkeys(struct hfi1_devdata *dd, u32 port, u16 *pkeys)
{
struct hfi1_pportdata *ppd;
int i;
@@ -1424,8 +1676,8 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
}
static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u32 n_blocks_sent = OPA_AM_NBLK(am);
@@ -1435,9 +1687,10 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
int i;
u16 n_blocks_avail;
unsigned npkeys = hfi1_get_npkeys(dd);
+ u32 size = 0;
if (n_blocks_sent == 0) {
- pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
+ pr_warn("OPA Get PKey AM Invalid : P = %u; B = 0x%x; N = 0x%x\n",
port, start_block, n_blocks_sent);
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
@@ -1445,6 +1698,13 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
+ size = sizeof(u16) * (n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE);
+
+ if (smp_length_check(size, max_len)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
if (start_block + n_blocks_sent > n_blocks_avail ||
n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
@@ -1462,18 +1722,8 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
return reply((struct ib_mad_hdr *)smp);
}
- return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
-}
-
-static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
-{
- u64 *val = data;
-
- *val++ = read_csr(dd, SEND_SC2VLT0);
- *val++ = read_csr(dd, SEND_SC2VLT1);
- *val++ = read_csr(dd, SEND_SC2VLT2);
- *val++ = read_csr(dd, SEND_SC2VLT3);
- return 0;
+ return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
#define ILLEGAL_VL 12
@@ -1482,8 +1732,9 @@ static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
* for SC15, which must map to VL15). If we don't remap things this
* way it is possible for VL15 counters to increment when we try to
* send on a SC which is mapped to an invalid VL.
+ * When getting the table convert ILLEGAL_VL back to VL15.
*/
-static void filter_sc2vlt(void *data)
+static void filter_sc2vlt(void *data, bool set)
{
int i;
u8 *pd = data;
@@ -1491,8 +1742,14 @@ static void filter_sc2vlt(void *data)
for (i = 0; i < OPA_MAX_SCS; i++) {
if (i == 15)
continue;
- if ((pd[i] & 0x1f) == 0xf)
- pd[i] = ILLEGAL_VL;
+
+ if (set) {
+ if ((pd[i] & 0x1f) == 0xf)
+ pd[i] = ILLEGAL_VL;
+ } else {
+ if ((pd[i] & 0x1f) == ILLEGAL_VL)
+ pd[i] = 0xf;
+ }
}
}
@@ -1500,7 +1757,7 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
{
u64 *val = data;
- filter_sc2vlt(data);
+ filter_sc2vlt(data, true);
write_csr(dd, SEND_SC2VLT0, *val++);
write_csr(dd, SEND_SC2VLT1, *val++);
@@ -1512,16 +1769,29 @@ static int set_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
return 0;
}
+static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
+{
+ u64 *val = (u64 *)data;
+
+ *val++ = read_csr(dd, SEND_SC2VLT0);
+ *val++ = read_csr(dd, SEND_SC2VLT1);
+ *val++ = read_csr(dd, SEND_SC2VLT2);
+ *val++ = read_csr(dd, SEND_SC2VLT3);
+
+ filter_sc2vlt((u64 *)data, false);
+ return 0;
+}
+
static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
u8 *p = data;
size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
unsigned i;
- if (am) {
+ if (am || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1536,15 +1806,16 @@ static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
u8 *p = data;
+ size_t size = ARRAY_SIZE(ibp->sl_to_sc);
int i;
u8 sc;
- if (am) {
+ if (am || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1559,19 +1830,20 @@ static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
}
}
- return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
u8 *p = data;
size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
unsigned i;
- if (am) {
+ if (am || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1586,14 +1858,15 @@ static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
+ size_t size = ARRAY_SIZE(ibp->sc_to_sl);
u8 *p = data;
int i;
- if (am) {
+ if (am || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1601,19 +1874,20 @@ static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
ibp->sc_to_sl[i] = *p++;
- return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NBLK(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
void *vp = (void *)data;
size_t size = 4 * sizeof(u64);
- if (n_blocks != 1) {
+ if (n_blocks != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1627,8 +1901,8 @@ static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NBLK(am);
int async_update = OPA_AM_ASYNC(am);
@@ -1636,8 +1910,15 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
void *vp = (void *)data;
struct hfi1_pportdata *ppd;
int lstate;
+ /*
+ * set_sc2vlt_tables writes the information contained in *data
+ * to four 64-bit registers SendSC2VLt[0-3]. We need to make
+ * sure *max_len is not greater than the total size of the four
+ * SendSC2VLt[0-3] registers.
+ */
+ size_t size = 4 * sizeof(u64);
- if (n_blocks != 1 || async_update) {
+ if (n_blocks != 1 || async_update || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1657,27 +1938,28 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
set_sc2vlt_tables(dd, vp);
- return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
void *vp = (void *)data;
- int size;
+ int size = sizeof(struct sc2vlnt);
- if (n_blocks != 1) {
+ if (n_blocks != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
ppd = dd->pport + (port - 1);
- size = fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
+ fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
if (resp_len)
*resp_len += size;
@@ -1686,16 +1968,17 @@ static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
void *vp = (void *)data;
int lstate;
+ int size = sizeof(struct sc2vlnt);
- if (n_blocks != 1) {
+ if (n_blocks != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1713,12 +1996,12 @@ static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
}
static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
u32 nports = OPA_AM_NPORT(am);
u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
@@ -1727,7 +2010,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
struct hfi1_pportdata *ppd;
struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
- if (nports != 1) {
+ if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1747,7 +2030,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
ppd->offline_disabled_reason;
psi->port_states.portphysstate_portstate =
- (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf);
+ (driver_pstate(ppd) << 4) | (lstate & 0xf);
psi->link_width_downgrade_tx_active =
cpu_to_be16(ppd->link_width_downgrade_tx_active);
psi->link_width_downgrade_rx_active =
@@ -1759,8 +2042,8 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len, int local_mad)
{
u32 nports = OPA_AM_NPORT(am);
u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
@@ -1771,7 +2054,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
int ret, invalid = 0;
- if (nports != 1) {
+ if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1789,31 +2072,34 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
if (ls_new == ls_old || (ls_new == IB_PORT_ARMED))
ppd->is_sm_config_started = 1;
} else if (ls_new == IB_PORT_ARMED) {
- if (ppd->is_sm_config_started == 0)
+ if (ppd->is_sm_config_started == 0) {
invalid = 1;
+ smp->status |= IB_SMP_INVALID_FIELD;
+ }
}
}
- ret = set_port_states(ppd, smp, ls_new, ps_new, invalid);
- if (ret)
- return ret;
-
- if (invalid)
- smp->status |= IB_SMP_INVALID_FIELD;
+ if (!invalid) {
+ ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad);
+ if (ret)
+ return ret;
+ }
- return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u32 addr = OPA_AM_CI_ADDR(am);
u32 len = OPA_AM_CI_LEN(am) + 1;
int ret;
- if (dd->pport->port_type != PORT_TYPE_QSFP) {
+ if (dd->pport->port_type != PORT_TYPE_QSFP ||
+ smp_length_check(len, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1856,21 +2142,22 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port, u32 *resp_len)
+ struct ib_device *ibdev, u32 port, u32 *resp_len,
+ u32 max_len)
{
u32 num_ports = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
struct buffer_control *p = (struct buffer_control *)data;
- int size;
+ int size = sizeof(struct buffer_control);
- if (num_ports != 1) {
+ if (num_ports != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
ppd = dd->pport + (port - 1);
- size = fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
+ fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
trace_bct_get(dd, p);
if (resp_len)
*resp_len += size;
@@ -1879,14 +2166,15 @@ static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port, u32 *resp_len)
+ struct ib_device *ibdev, u32 port, u32 *resp_len,
+ u32 max_len)
{
u32 num_ports = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
struct buffer_control *p = (struct buffer_control *)data;
- if (num_ports != 1) {
+ if (num_ports != 1 || smp_length_check(sizeof(*p), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1897,41 +2185,43 @@ static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
return reply((struct ib_mad_hdr *)smp);
}
- return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
u32 num_ports = OPA_AM_NPORT(am);
u8 section = (am & 0x00ff0000) >> 16;
u8 *p = data;
- int size = 0;
+ int size = 256;
- if (num_ports != 1) {
+ if (num_ports != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
switch (section) {
case OPA_VLARB_LOW_ELEMENTS:
- size = fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
+ fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
break;
case OPA_VLARB_HIGH_ELEMENTS:
- size = fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
+ fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
break;
case OPA_VLARB_PREEMPT_ELEMENTS:
- size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
+ fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
break;
case OPA_VLARB_PREEMPT_MATRIX:
- size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
+ fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
break;
default:
pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
be32_to_cpu(smp->attr_mod));
smp->status |= IB_SMP_INVALID_FIELD;
+ size = 0;
break;
}
@@ -1942,15 +2232,16 @@ static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
u32 num_ports = OPA_AM_NPORT(am);
u8 section = (am & 0x00ff0000) >> 16;
u8 *p = data;
+ int size = 256;
- if (num_ports != 1) {
+ if (num_ports != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1967,7 +2258,6 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
* can be changed from the default values
*/
case OPA_VLARB_PREEMPT_ELEMENTS:
- /* FALLTHROUGH */
case OPA_VLARB_PREEMPT_MATRIX:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
break;
@@ -1978,7 +2268,8 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
break;
}
- return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
struct opa_pma_mad {
@@ -1986,38 +2277,13 @@ struct opa_pma_mad {
u8 data[2024];
} __packed;
-struct opa_class_port_info {
- u8 base_version;
- u8 class_version;
- __be16 cap_mask;
- __be32 cap_mask2_resp_time;
-
- u8 redirect_gid[16];
- __be32 redirect_tc_fl;
- __be32 redirect_lid;
- __be32 redirect_sl_qp;
- __be32 redirect_qkey;
-
- u8 trap_gid[16];
- __be32 trap_tc_fl;
- __be32 trap_lid;
- __be32 trap_hl_qp;
- __be32 trap_qkey;
-
- __be16 trap_pkey;
- __be16 redirect_pkey;
-
- u8 trap_sl_rsvd;
- u8 reserved[3];
-} __packed;
-
struct opa_port_status_req {
__u8 port_num;
__u8 reserved[3];
__be32 vl_select_mask;
};
-#define VL_MASK_ALL 0x000080ff
+#define VL_MASK_ALL 0x00000000000080ffUL
struct opa_port_status_rsp {
__u8 port_num;
@@ -2072,7 +2338,7 @@ struct opa_port_status_rsp {
__be64 port_vl_rcv_bubble;
__be64 port_vl_mark_fecn;
__be64 port_vl_xmit_discards;
- } vls[0]; /* real array size defined by # bits set in vl_select_mask */
+ } vls[]; /* real array size defined by # bits set in vl_select_mask */
};
enum counter_selects {
@@ -2114,7 +2380,7 @@ struct opa_aggregate {
__be16 attr_id;
__be16 err_reqlength; /* 1 bit, 8 res, 7 bit */
__be32 attr_mod;
- u8 data[0];
+ u8 data[];
};
#define MSK_LLI 0x000000f0
@@ -2171,9 +2437,9 @@ struct opa_port_data_counters_msg {
__be64 port_vl_xmit_wait_data;
__be64 port_vl_rcv_bubble;
__be64 port_vl_mark_fecn;
- } vls[0];
+ } vls[];
/* array size defined by #bits set in vl_select_mask*/
- } port[1]; /* array size defined by #ports in attribute modifier */
+ } port;
};
struct opa_port_error_counters64_msg {
@@ -2204,9 +2470,9 @@ struct opa_port_error_counters64_msg {
u8 reserved3[7];
struct _vls_ectrs {
__be64 port_vl_xmit_discards;
- } vls[0];
+ } vls[];
/* array size defined by #bits set in vl_select_mask */
- } port[1]; /* array size defined by #ports in attribute modifier */
+ } port;
};
struct opa_port_error_info_msg {
@@ -2277,7 +2543,7 @@ struct opa_port_error_info_msg {
u8 error_info;
} __packed fm_config_ei;
__u32 reserved9;
- } port[1]; /* actual array size defined by #ports in attr modifier */
+ } port;
};
/* opa_port_error_info_msg error_info_select_mask bit definitions */
@@ -2316,15 +2582,14 @@ static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
}
static void a0_portstatus(struct hfi1_pportdata *ppd,
- struct opa_port_status_rsp *rsp, u32 vl_select_mask)
+ struct opa_port_status_rsp *rsp)
{
if (!is_bx(ppd->dd)) {
unsigned long vl;
u64 sum_vl_xmit_wait = 0;
- u32 vl_all_mask = VL_MASK_ALL;
+ unsigned long vl_all_mask = VL_MASK_ALL;
- for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
- 8 * sizeof(vl_all_mask)) {
+ for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
u64 tmp = sum_vl_xmit_wait +
read_port_cntr(ppd, C_TX_WAIT_VL,
idx_from_vl(vl));
@@ -2340,28 +2605,102 @@ static void a0_portstatus(struct hfi1_pportdata *ppd,
}
}
+/**
+ * tx_link_width - convert link width bitmask to integer
+ * value representing actual link width.
+ * @link_width: width of active link
+ * @return: return index of the bit set in link_width var
+ *
+ * The function convert and return the index of bit set
+ * that indicate the current link width.
+ */
+u16 tx_link_width(u16 link_width)
+{
+ int n = LINK_WIDTH_DEFAULT;
+ u16 tx_width = n;
+
+ while (link_width && n) {
+ if (link_width & (1 << (n - 1))) {
+ tx_width = n;
+ break;
+ }
+ n--;
+ }
+
+ return tx_width;
+}
+
+/**
+ * get_xmit_wait_counters - Convert HFI 's SendWaitCnt/SendWaitVlCnt
+ * counter in unit of TXE cycle times to flit times.
+ * @ppd: info of physical Hfi port
+ * @link_width: width of active link
+ * @link_speed: speed of active link
+ * @vl: represent VL0-VL7, VL15 for PortVLXmitWait counters request
+ * and if vl value is C_VL_COUNT, it represent SendWaitCnt
+ * counter request
+ * @return: return SendWaitCnt/SendWaitVlCnt counter value per vl.
+ *
+ * Convert SendWaitCnt/SendWaitVlCnt counter from TXE cycle times to
+ * flit times. Call this function to samples these counters. This
+ * function will calculate for previous state transition and update
+ * current state at end of function using ppd->prev_link_width and
+ * ppd->port_vl_xmit_wait_last to port_vl_xmit_wait_curr and link_width.
+ */
+u64 get_xmit_wait_counters(struct hfi1_pportdata *ppd,
+ u16 link_width, u16 link_speed, int vl)
+{
+ u64 port_vl_xmit_wait_curr;
+ u64 delta_vl_xmit_wait;
+ u64 xmit_wait_val;
+
+ if (vl > C_VL_COUNT)
+ return 0;
+ if (vl < C_VL_COUNT)
+ port_vl_xmit_wait_curr =
+ read_port_cntr(ppd, C_TX_WAIT_VL, vl);
+ else
+ port_vl_xmit_wait_curr =
+ read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL);
+
+ xmit_wait_val =
+ port_vl_xmit_wait_curr -
+ ppd->port_vl_xmit_wait_last[vl];
+ delta_vl_xmit_wait =
+ convert_xmit_counter(xmit_wait_val,
+ ppd->prev_link_width,
+ link_speed);
+
+ ppd->vl_xmit_flit_cnt[vl] += delta_vl_xmit_wait;
+ ppd->port_vl_xmit_wait_last[vl] = port_vl_xmit_wait_curr;
+ ppd->prev_link_width = link_width;
+
+ return ppd->vl_xmit_flit_cnt[vl];
+}
+
static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u32 port, u32 *resp_len)
{
struct opa_port_status_req *req =
(struct opa_port_status_req *)pmp->data;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct opa_port_status_rsp *rsp;
- u32 vl_select_mask = be32_to_cpu(req->vl_select_mask);
+ unsigned long vl_select_mask = be32_to_cpu(req->vl_select_mask);
unsigned long vl;
size_t response_data_size;
u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
- u8 port_num = req->port_num;
- u8 num_vls = hweight32(vl_select_mask);
+ u32 port_num = req->port_num;
+ u8 num_vls = hweight64(vl_select_mask);
struct _vls_pctrs *vlinfo;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
int vfi;
u64 tmp, tmp2;
+ u16 link_width;
+ u16 link_speed;
- response_data_size = sizeof(struct opa_port_status_rsp) +
- num_vls * sizeof(struct _vls_pctrs);
+ response_data_size = struct_size(rsp, vls, num_vls);
if (response_data_size > sizeof(pmp->data)) {
pmp->mad_hdr.status |= OPA_PM_STATUS_REQUEST_TOO_LARGE;
return reply((struct ib_mad_hdr *)pmp);
@@ -2387,7 +2726,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
hfi1_read_link_quality(dd, &rsp->link_quality_indicator);
- rsp->vl_select_mask = cpu_to_be32(vl_select_mask);
+ rsp->vl_select_mask = cpu_to_be32((u32)vl_select_mask);
rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS,
CNTR_INVALID_VL));
rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS,
@@ -2402,8 +2741,16 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
rsp->port_multicast_rcv_pkts =
cpu_to_be64(read_dev_cntr(dd, C_DC_MC_RCV_PKTS,
CNTR_INVALID_VL));
+ /*
+ * Convert PortXmitWait counter from TXE cycle times
+ * to flit times.
+ */
+ link_width =
+ tx_link_width(ppd->link_width_downgrade_tx_active);
+ link_speed = get_link_speed(ppd->link_speed_active);
rsp->port_xmit_wait =
- cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
+ cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
+ link_speed, C_VL_COUNT));
rsp->port_rcv_fecn =
cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
rsp->port_rcv_becn =
@@ -2450,8 +2797,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
* So in the for_each_set_bit() loop below, we don't need
* any additional checks for vl.
*/
- for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
- 8 * sizeof(vl_select_mask)) {
+ for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
memset(vlinfo, 0, sizeof(*vlinfo));
tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl));
@@ -2468,10 +2814,14 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
rsp->vls[vfi].port_vl_xmit_pkts =
cpu_to_be64(read_port_cntr(ppd, C_TX_PKT_VL,
idx_from_vl(vl)));
-
+ /*
+ * Convert PortVlXmitWait counter from TXE cycle
+ * times to flit times.
+ */
rsp->vls[vfi].port_vl_xmit_wait =
- cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
- idx_from_vl(vl)));
+ cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
+ link_speed,
+ idx_from_vl(vl)));
rsp->vls[vfi].port_vl_rcv_fecn =
cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
@@ -2488,7 +2838,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
vfi++;
}
- a0_portstatus(ppd, rsp, vl_select_mask);
+ a0_portstatus(ppd, rsp);
if (resp_len)
*resp_len += response_data_size;
@@ -2496,7 +2846,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
-static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
+static u64 get_error_counter_summary(struct ib_device *ibdev, u32 port,
u8 res_lli, u8 res_ler)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -2535,16 +2885,14 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port,
return error_counter_summary;
}
-static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp,
- u32 vl_select_mask)
+static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp)
{
if (!is_bx(ppd->dd)) {
unsigned long vl;
u64 sum_vl_xmit_wait = 0;
- u32 vl_all_mask = VL_MASK_ALL;
+ unsigned long vl_all_mask = VL_MASK_ALL;
- for_each_set_bit(vl, (unsigned long *)&(vl_all_mask),
- 8 * sizeof(vl_all_mask)) {
+ for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) {
u64 tmp = sum_vl_xmit_wait +
read_port_cntr(ppd, C_TX_WAIT_VL,
idx_from_vl(vl));
@@ -2583,7 +2931,7 @@ static void pma_get_opa_port_dctrs(struct ib_device *ibdev,
static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u32 port, u32 *resp_len)
{
struct opa_port_data_counters_msg *req =
(struct opa_port_data_counters_msg *)pmp->data;
@@ -2594,17 +2942,17 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
struct _vls_dctrs *vlinfo;
size_t response_data_size;
u32 num_ports;
- u8 num_pslm;
u8 lq, num_vls;
u8 res_lli, res_ler;
u64 port_mask;
- u8 port_num;
+ u32 port_num;
unsigned long vl;
- u32 vl_select_mask;
+ unsigned long vl_select_mask;
int vfi;
+ u16 link_width;
+ u16 link_speed;
num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
- num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
num_vls = hweight32(be32_to_cpu(req->vl_select_mask));
vl_select_mask = be32_to_cpu(req->vl_select_mask);
res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT;
@@ -2618,8 +2966,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
}
/* Sanity check */
- response_data_size = sizeof(struct opa_port_data_counters_msg) +
- num_vls * sizeof(struct _vls_dctrs);
+ response_data_size = struct_size(req, port.vls, num_vls);
if (response_data_size > sizeof(pmp->data)) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -2639,7 +2986,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
- rsp = &req->port[0];
+ rsp = &req->port;
memset(rsp, 0, sizeof(*rsp));
rsp->port_number = port;
@@ -2652,8 +2999,16 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
rsp->link_quality_indicator = cpu_to_be32((u32)lq);
pma_get_opa_port_dctrs(ibdev, rsp);
+ /*
+ * Convert PortXmitWait counter from TXE
+ * cycle times to flit times.
+ */
+ link_width =
+ tx_link_width(ppd->link_width_downgrade_tx_active);
+ link_speed = get_link_speed(ppd->link_speed_active);
rsp->port_xmit_wait =
- cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL));
+ cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
+ link_speed, C_VL_COUNT));
rsp->port_rcv_fecn =
cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN, CNTR_INVALID_VL));
rsp->port_rcv_becn =
@@ -2669,8 +3024,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
* So in the for_each_set_bit() loop below, we don't need
* any additional checks for vl.
*/
- for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
- 8 * sizeof(req->vl_select_mask)) {
+ for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
memset(vlinfo, 0, sizeof(*vlinfo));
rsp->vls[vfi].port_vl_xmit_data =
@@ -2689,9 +3043,14 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
cpu_to_be64(read_dev_cntr(dd, C_DC_RX_PKT_VL,
idx_from_vl(vl)));
+ /*
+ * Convert PortVlXmitWait counter from TXE
+ * cycle times to flit times.
+ */
rsp->vls[vfi].port_vl_xmit_wait =
- cpu_to_be64(read_port_cntr(ppd, C_TX_WAIT_VL,
- idx_from_vl(vl)));
+ cpu_to_be64(get_xmit_wait_counters(ppd, link_width,
+ link_speed,
+ idx_from_vl(vl)));
rsp->vls[vfi].port_vl_rcv_fecn =
cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FCN_VL,
@@ -2713,7 +3072,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
vfi++;
}
- a0_datacounters(ppd, rsp, vl_select_mask);
+ a0_datacounters(ppd, rsp);
if (resp_len)
*resp_len += response_data_size;
@@ -2722,7 +3081,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
}
static int pma_get_ib_portcounters_ext(struct ib_pma_mad *pmp,
- struct ib_device *ibdev, u8 port)
+ struct ib_device *ibdev, u32 port)
{
struct ib_pma_portcounters_ext *p = (struct ib_pma_portcounters_ext *)
pmp->data;
@@ -2750,7 +3109,7 @@ bail:
}
static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
- struct _port_ectrs *rsp, u8 port)
+ struct _port_ectrs *rsp, u32 port)
{
u64 tmp, tmp2;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -2793,11 +3152,11 @@ static void pma_get_opa_port_ectrs(struct ib_device *ibdev,
static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u32 port, u32 *resp_len)
{
size_t response_data_size;
struct _port_ectrs *rsp;
- u8 port_num;
+ u32 port_num;
struct opa_port_error_counters64_msg *req;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u32 num_ports;
@@ -2808,7 +3167,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
struct _vls_ectrs *vlinfo;
unsigned long vl;
u64 port_mask, tmp;
- u32 vl_select_mask;
+ unsigned long vl_select_mask;
int vfi;
req = (struct opa_port_error_counters64_msg *)pmp->data;
@@ -2823,8 +3182,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
- response_data_size = sizeof(struct opa_port_error_counters64_msg) +
- num_vls * sizeof(struct _vls_ectrs);
+ response_data_size = struct_size(req, port.vls, num_vls);
if (response_data_size > sizeof(pmp->data)) {
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -2843,7 +3201,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
return reply((struct ib_mad_hdr *)pmp);
}
- rsp = &req->port[0];
+ rsp = &req->port;
ibp = to_iport(ibdev, port_num);
ppd = ppd_from_ibp(ibp);
@@ -2867,8 +3225,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
vlinfo = &rsp->vls[0];
vfi = 0;
vl_select_mask = be32_to_cpu(req->vl_select_mask);
- for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
- 8 * sizeof(req->vl_select_mask)) {
+ for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
memset(vlinfo, 0, sizeof(*vlinfo));
rsp->vls[vfi].port_vl_xmit_discards =
cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL,
@@ -2884,7 +3241,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
}
static int pma_get_ib_portcounters(struct ib_pma_mad *pmp,
- struct ib_device *ibdev, u8 port)
+ struct ib_device *ibdev, u32 port)
{
struct ib_pma_portcounters *p = (struct ib_pma_portcounters *)
pmp->data;
@@ -2970,7 +3327,7 @@ bail:
static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u32 port, u32 *resp_len)
{
size_t response_data_size;
struct _port_ei *rsp;
@@ -2978,12 +3335,12 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u64 port_mask;
u32 num_ports;
- u8 port_num;
+ u32 port_num;
u8 num_pslm;
u64 reg;
req = (struct opa_port_error_info_msg *)pmp->data;
- rsp = &req->port[0];
+ rsp = &req->port;
num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
@@ -3015,6 +3372,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)pmp);
}
+ rsp->port_number = port;
/* PortRcvErrorInfo */
rsp->port_rcv_ei.status_and_code =
@@ -3068,7 +3426,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u32 port, u32 *resp_len)
{
struct opa_clear_port_status *req =
(struct opa_clear_port_status *)pmp->data;
@@ -3078,7 +3436,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24;
u64 portn = be64_to_cpu(req->port_select_mask[3]);
u32 counter_select = be32_to_cpu(req->counter_select_mask);
- u32 vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
+ unsigned long vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */
unsigned long vl;
if ((nports != 1) || (portn != 1 << port)) {
@@ -3109,9 +3467,11 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
if (counter_select & CS_PORT_MCAST_RCV_PKTS)
write_dev_cntr(dd, C_DC_MC_RCV_PKTS, CNTR_INVALID_VL, 0);
- if (counter_select & CS_PORT_XMIT_WAIT)
+ if (counter_select & CS_PORT_XMIT_WAIT) {
write_port_cntr(ppd, C_TX_WAIT, CNTR_INVALID_VL, 0);
-
+ ppd->port_vl_xmit_wait_last[C_VL_COUNT] = 0;
+ ppd->vl_xmit_flit_cnt[C_VL_COUNT] = 0;
+ }
/* ignore cs_sw_portCongestion for HFIs */
if (counter_select & CS_PORT_RCV_FECN)
@@ -3170,8 +3530,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
if (counter_select & CS_UNCORRECTABLE_ERRORS)
write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0);
- for_each_set_bit(vl, (unsigned long *)&(vl_select_mask),
- 8 * sizeof(vl_select_mask)) {
+ for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) {
if (counter_select & CS_PORT_XMIT_DATA)
write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0);
@@ -3184,8 +3543,11 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
if (counter_select & CS_PORT_RCV_PKTS)
write_dev_cntr(dd, C_DC_RX_PKT_VL, idx_from_vl(vl), 0);
- if (counter_select & CS_PORT_XMIT_WAIT)
+ if (counter_select & CS_PORT_XMIT_WAIT) {
write_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl), 0);
+ ppd->port_vl_xmit_wait_last[idx_from_vl(vl)] = 0;
+ ppd->vl_xmit_flit_cnt[idx_from_vl(vl)] = 0;
+ }
/* sw_port_vl_congestion is 0 for HFIs */
if (counter_select & CS_PORT_RCV_FECN)
@@ -3216,19 +3578,19 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp,
static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u32 port, u32 *resp_len)
{
struct _port_ei *rsp;
struct opa_port_error_info_msg *req;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u64 port_mask;
u32 num_ports;
- u8 port_num;
+ u32 port_num;
u8 num_pslm;
u32 error_info_select;
req = (struct opa_port_error_info_msg *)pmp->data;
- rsp = &req->port[0];
+ rsp = &req->port;
num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
@@ -3298,14 +3660,19 @@ struct opa_congestion_info_attr {
} __packed;
static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct opa_congestion_info_attr *p =
(struct opa_congestion_info_attr *)data;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ if (smp_length_check(sizeof(*p), max_len)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
p->congestion_info = 0;
p->control_table_cap = ppd->cc_max_table_entries;
p->congestion_log_length = OPA_CONG_LOG_ELEMS;
@@ -3318,7 +3685,7 @@ static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u32 port, u32 *resp_len, u32 max_len)
{
int i;
struct opa_congestion_setting_attr *p =
@@ -3328,6 +3695,11 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
struct opa_congestion_setting_entry_shadow *entries;
struct cc_state *cc_state;
+ if (smp_length_check(sizeof(*p), max_len)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
rcu_read_lock();
cc_state = get_cc_state(ppd);
@@ -3384,7 +3756,11 @@ static void apply_cc_state(struct hfi1_pportdata *ppd)
*new_cc_state = *old_cc_state;
- new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
+ if (ppd->total_cct_entry)
+ new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1;
+ else
+ new_cc_state->cct.ccti_limit = 0;
+
memcpy(new_cc_state->cct.entries, ppd->ccti_entries,
ppd->total_cct_entry * sizeof(struct ib_cc_table_entry));
@@ -3401,8 +3777,8 @@ static void apply_cc_state(struct hfi1_pportdata *ppd)
}
static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct opa_congestion_setting_attr *p =
(struct opa_congestion_setting_attr *)data;
@@ -3411,6 +3787,11 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
struct opa_congestion_setting_entry_shadow *entries;
int i;
+ if (smp_length_check(sizeof(*p), max_len)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
/*
* Save details from packet into the ppd. Hold the cc_state_lock so
* our information is consistent with anyone trying to apply the state.
@@ -3432,20 +3813,20 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
apply_cc_state(ppd);
return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
}
static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u32 port, u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data;
- s64 ts;
+ u64 ts;
int i;
- if (am != 0) {
+ if (am || smp_length_check(sizeof(*cong_log), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -3460,7 +3841,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
ppd->threshold_cong_event_map,
sizeof(cong_log->threshold_cong_event_map));
/* keep timestamp in units of 1.024 usec */
- ts = ktime_to_ns(ktime_get()) / 1024;
+ ts = ktime_get_ns() / 1024;
cong_log->current_time_stamp = cpu_to_be32(ts);
for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) {
struct opa_hfi1_cong_log_event_internal *cce =
@@ -3472,7 +3853,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
* required to wrap the counter are supposed to
* be zeroed (CA10-49 IBTA, release 1.2.1, V1).
*/
- if ((u64)(ts - cce->timestamp) > (2 * UINT_MAX))
+ if ((ts - cce->timestamp) / 2 > U32_MAX)
continue;
memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3);
memcpy(cong_log->events[i].remote_qp_number_cn_entry,
@@ -3502,8 +3883,8 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
}
static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct ib_cc_table_attr *cc_table_attr =
(struct ib_cc_table_attr *)data;
@@ -3515,9 +3896,10 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
int i, j;
u32 sentry, eentry;
struct cc_state *cc_state;
+ u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
/* sanity check n_blocks, start_block */
- if (n_blocks == 0 ||
+ if (n_blocks == 0 || smp_length_check(size, max_len) ||
start_block + n_blocks > ppd->cc_max_table_entries) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
@@ -3547,14 +3929,14 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
rcu_read_unlock();
if (resp_len)
- *resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
+ *resp_len += size;
return reply((struct ib_mad_hdr *)smp);
}
static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -3565,9 +3947,10 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
int i, j;
u32 sentry, eentry;
u16 ccti_limit;
+ u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
/* sanity check n_blocks, start_block */
- if (n_blocks == 0 ||
+ if (n_blocks == 0 || smp_length_check(size, max_len) ||
start_block + n_blocks > ppd->cc_max_table_entries) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
@@ -3598,7 +3981,8 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
/* now apply the information */
apply_cc_state(ppd);
- return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
struct opa_led_info {
@@ -3610,8 +3994,8 @@ struct opa_led_info {
#define OPA_LED_MASK BIT(OPA_LED_SHIFT)
static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd = dd->pport;
@@ -3619,7 +4003,7 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
u32 nport = OPA_AM_NPORT(am);
u32 is_beaconing_active;
- if (nport != 1) {
+ if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -3640,15 +4024,15 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct opa_led_info *p = (struct opa_led_info *)data;
u32 nport = OPA_AM_NPORT(am);
int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
- if (nport != 1) {
+ if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -3658,12 +4042,13 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
else
shutdown_led_override(dd->pport);
- return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
- u8 *data, struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u8 *data, struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len)
{
int ret;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -3671,78 +4056,78 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
switch (attr_id) {
case IB_SMP_ATTR_NODE_DESC:
ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_NODE_INFO:
ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_PORT_INFO:
ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_PKEY_TABLE:
ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SL_TO_SC_MAP:
ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_SL_MAP:
ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_PORT_STATE_INFO:
ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_CABLE_INFO:
ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_VL_ARB_TABLE:
ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_CONGESTION_INFO:
ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
- port, resp_len);
+ port, resp_len, max_len);
break;
case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
- port, resp_len);
+ port, resp_len, max_len);
break;
case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_LED_INFO:
ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_SM_INFO:
if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return IB_MAD_RESULT_SUCCESS;
- /* FALLTHROUGH */
+ fallthrough;
default:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_mad_hdr *)smp);
@@ -3752,8 +4137,8 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
}
static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
- u8 *data, struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u8 *data, struct ib_device *ibdev, u32 port,
+ u32 *resp_len, u32 max_len, int local_mad)
{
int ret;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -3761,58 +4146,58 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
switch (attr_id) {
case IB_SMP_ATTR_PORT_INFO:
ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len, local_mad);
break;
case IB_SMP_ATTR_PKEY_TABLE:
ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SL_TO_SC_MAP:
ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_SL_MAP:
ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_PORT_STATE_INFO:
ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len, local_mad);
break;
case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_VL_ARB_TABLE:
ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
- port, resp_len);
+ port, resp_len, max_len);
break;
case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_LED_INFO:
ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_SM_INFO:
if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return IB_MAD_RESULT_SUCCESS;
- /* FALLTHROUGH */
+ fallthrough;
default:
smp->status |= IB_SMP_UNSUP_METH_ATTR;
ret = reply((struct ib_mad_hdr *)smp);
@@ -3827,7 +4212,7 @@ static inline void set_aggr_error(struct opa_aggregate *ag)
}
static int subn_get_opa_aggregate(struct opa_smp *smp,
- struct ib_device *ibdev, u8 port,
+ struct ib_device *ibdev, u32 port,
u32 *resp_len)
{
int i;
@@ -3861,7 +4246,10 @@ static int subn_get_opa_aggregate(struct opa_smp *smp,
memset(next_smp + sizeof(*agg), 0, agg_data_len);
(void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
- ibdev, port, NULL);
+ ibdev, port, NULL, (u32)agg_data_len);
+
+ if (smp->status & IB_SMP_INVALID_FIELD)
+ break;
if (smp->status & ~IB_SMP_DIRECTION) {
set_aggr_error(agg);
return reply((struct ib_mad_hdr *)smp);
@@ -3873,8 +4261,8 @@ static int subn_get_opa_aggregate(struct opa_smp *smp,
}
static int subn_set_opa_aggregate(struct opa_smp *smp,
- struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ struct ib_device *ibdev, u32 port,
+ u32 *resp_len, int local_mad)
{
int i;
u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
@@ -3904,7 +4292,11 @@ static int subn_set_opa_aggregate(struct opa_smp *smp,
}
(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
- ibdev, port, NULL);
+ ibdev, port, NULL, (u32)agg_data_len,
+ local_mad);
+
+ if (smp->status & IB_SMP_INVALID_FIELD)
+ break;
if (smp->status & ~IB_SMP_DIRECTION) {
set_aggr_error(agg);
return reply((struct ib_mad_hdr *)smp);
@@ -3942,6 +4334,18 @@ void clear_linkup_counters(struct hfi1_devdata *dd)
dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK;
}
+static int is_full_mgmt_pkey_in_table(struct hfi1_ibport *ibp)
+{
+ unsigned int i;
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+
+ for (i = 0; i < ARRAY_SIZE(ppd->pkeys); ++i)
+ if (ppd->pkeys[i] == FULL_MGMT_P_KEY)
+ return 1;
+
+ return 0;
+}
+
/*
* is_local_mad() returns 1 if 'mad' is sent from, and destined to the
* local node, 0 otherwise.
@@ -3975,7 +4379,6 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp,
const struct ib_wc *in_wc)
{
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- u16 slid = in_wc->slid;
u16 pkey;
if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys))
@@ -4002,24 +4405,82 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp,
*/
if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY)
return 0;
- ingress_pkey_table_fail(ppd, pkey, slid);
+ ingress_pkey_table_fail(ppd, pkey, in_wc->slid);
return 1;
}
+/**
+ * hfi1_pkey_validation_pma - It validates PKEYs for incoming PMA MAD packets.
+ * @ibp: IB port data
+ * @in_mad: MAD packet with header and data
+ * @in_wc: Work completion data such as source LID, port number, etc.
+ *
+ * These are all the possible logic rules for validating a pkey:
+ *
+ * a) If pkey neither FULL_MGMT_P_KEY nor LIM_MGMT_P_KEY,
+ * and NOT self-originated packet:
+ * Drop MAD packet as it should always be part of the
+ * management partition unless it's a self-originated packet.
+ *
+ * b) If pkey_index -> FULL_MGMT_P_KEY, and LIM_MGMT_P_KEY in pkey table:
+ * The packet is coming from a management node and the receiving node
+ * is also a management node, so it is safe for the packet to go through.
+ *
+ * c) If pkey_index -> FULL_MGMT_P_KEY, and LIM_MGMT_P_KEY is NOT in pkey table:
+ * Drop the packet as LIM_MGMT_P_KEY should always be in the pkey table.
+ * It could be an FM misconfiguration.
+ *
+ * d) If pkey_index -> LIM_MGMT_P_KEY and FULL_MGMT_P_KEY is NOT in pkey table:
+ * It is safe for the packet to go through since a non-management node is
+ * talking to another non-management node.
+ *
+ * e) If pkey_index -> LIM_MGMT_P_KEY and FULL_MGMT_P_KEY in pkey table:
+ * Drop the packet because a non-management node is talking to a
+ * management node, and it could be an attack.
+ *
+ * For the implementation, these rules can be simplied to only checking
+ * for (a) and (e). There's no need to check for rule (b) as
+ * the packet doesn't need to be dropped. Rule (c) is not possible in
+ * the driver as LIM_MGMT_P_KEY is always in the pkey table.
+ *
+ * Return:
+ * 0 - pkey is okay, -EINVAL it's a bad pkey
+ */
+static int hfi1_pkey_validation_pma(struct hfi1_ibport *ibp,
+ const struct opa_mad *in_mad,
+ const struct ib_wc *in_wc)
+{
+ u16 pkey_value = hfi1_lookup_pkey_value(ibp, in_wc->pkey_index);
+
+ /* Rule (a) from above */
+ if (!is_local_mad(ibp, in_mad, in_wc) &&
+ pkey_value != LIM_MGMT_P_KEY &&
+ pkey_value != FULL_MGMT_P_KEY)
+ return -EINVAL;
+
+ /* Rule (e) from above */
+ if (pkey_value == LIM_MGMT_P_KEY &&
+ is_full_mgmt_pkey_in_table(ibp))
+ return -EINVAL;
+
+ return 0;
+}
+
static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
- u8 port, const struct opa_mad *in_mad,
+ u32 port, const struct opa_mad *in_mad,
struct opa_mad *out_mad,
- u32 *resp_len)
+ u32 *resp_len, int local_mad)
{
struct opa_smp *smp = (struct opa_smp *)out_mad;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
u8 *data;
- u32 am;
+ u32 am, data_size;
__be16 attr_id;
int ret;
*out_mad = *in_mad;
data = opa_get_smp_data(smp);
+ data_size = (u32)opa_get_smp_data_size(smp);
am = be32_to_cpu(smp->attr_mod);
attr_id = smp->attr_id;
@@ -4063,7 +4524,8 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
default:
clear_opa_smp_data(smp);
ret = subn_get_opa_sma(attr_id, smp, am, data,
- ibdev, port, resp_len);
+ ibdev, port, resp_len,
+ data_size);
break;
case OPA_ATTRIB_ID_AGGREGATE:
ret = subn_get_opa_aggregate(smp, ibdev, port,
@@ -4075,11 +4537,12 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
switch (attr_id) {
default:
ret = subn_set_opa_sma(attr_id, smp, am, data,
- ibdev, port, resp_len);
+ ibdev, port, resp_len,
+ data_size, local_mad);
break;
case OPA_ATTRIB_ID_AGGREGATE:
ret = subn_set_opa_aggregate(smp, ibdev, port,
- resp_len);
+ resp_len, local_mad);
break;
}
break;
@@ -4094,6 +4557,11 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
*/
ret = IB_MAD_RESULT_SUCCESS;
break;
+ case IB_MGMT_METHOD_TRAP_REPRESS:
+ subn_handle_opa_trap_repress(ibp, smp);
+ /* Always successful */
+ ret = IB_MAD_RESULT_SUCCESS;
+ break;
default:
smp->status |= IB_SMP_UNSUP_METHOD;
ret = reply((struct ib_mad_hdr *)smp);
@@ -4104,7 +4572,7 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
}
static int process_subn(struct ib_device *ibdev, int mad_flags,
- u8 port, const struct ib_mad *in_mad,
+ u32 port, const struct ib_mad *in_mad,
struct ib_mad *out_mad)
{
struct ib_smp *smp = (struct ib_smp *)out_mad;
@@ -4162,7 +4630,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
return ret;
}
-static int process_perf(struct ib_device *ibdev, u8 port,
+static int process_perf(struct ib_device *ibdev, u32 port,
const struct ib_mad *in_mad,
struct ib_mad *out_mad)
{
@@ -4224,7 +4692,7 @@ static int process_perf(struct ib_device *ibdev, u8 port,
return ret;
}
-static int process_perf_opa(struct ib_device *ibdev, u8 port,
+static int process_perf_opa(struct ib_device *ibdev, u32 port,
const struct opa_mad *in_mad,
struct opa_mad *out_mad, u32 *resp_len)
{
@@ -4306,7 +4774,7 @@ static int process_perf_opa(struct ib_device *ibdev, u8 port,
}
static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
- u8 port, const struct ib_wc *in_wc,
+ u32 port, const struct ib_wc *in_wc,
const struct ib_grh *in_grh,
const struct opa_mad *in_mad,
struct opa_mad *out_mad, size_t *out_mad_size,
@@ -4314,7 +4782,8 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
{
int ret;
int pkey_idx;
- u32 resp_len = 0;
+ int local_mad = 0;
+ u32 resp_len = in_wc->byte_len - sizeof(*in_grh);
struct hfi1_ibport *ibp = to_iport(ibdev, port);
pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
@@ -4328,17 +4797,21 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
switch (in_mad->mad_hdr.mgmt_class) {
case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
case IB_MGMT_CLASS_SUBN_LID_ROUTED:
- if (is_local_mad(ibp, in_mad, in_wc)) {
+ local_mad = is_local_mad(ibp, in_mad, in_wc);
+ if (local_mad) {
ret = opa_local_smp_check(ibp, in_wc);
if (ret)
return IB_MAD_RESULT_FAILURE;
}
ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
- out_mad, &resp_len);
+ out_mad, &resp_len, local_mad);
goto bail;
case IB_MGMT_CLASS_PERF_MGMT:
- ret = process_perf_opa(ibdev, port, in_mad, out_mad,
- &resp_len);
+ ret = hfi1_pkey_validation_pma(ibp, in_mad, in_wc);
+ if (ret)
+ return IB_MAD_RESULT_FAILURE;
+
+ ret = process_perf_opa(ibdev, port, in_mad, out_mad, &resp_len);
goto bail;
default:
@@ -4354,7 +4827,7 @@ bail:
return ret;
}
-static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
+static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u32 port,
const struct ib_wc *in_wc,
const struct ib_grh *in_grh,
const struct ib_mad *in_mad,
@@ -4387,6 +4860,8 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
* @in_grh: the global route header for this packet
* @in_mad: the incoming MAD
* @out_mad: any outgoing MAD reply
+ * @out_mad_size: size of the outgoing MAD reply
+ * @out_mad_pkey_index: used to apss back the packet key index
*
* Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not
* interested in processing.
@@ -4397,18 +4872,13 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port,
*
* This is called by the ib_mad module.
*/
-int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
+int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u32 port,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad, size_t in_mad_size,
- struct ib_mad_hdr *out_mad, size_t *out_mad_size,
- u16 *out_mad_pkey_index)
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
{
- switch (in_mad->base_version) {
+ switch (in_mad->mad_hdr.base_version) {
case OPA_MGMT_BASE_VERSION:
- if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
- dev_err(ibdev->dma_device, "invalid in_mad_size\n");
- return IB_MAD_RESULT_FAILURE;
- }
return hfi1_process_opa_mad(ibdev, mad_flags, port,
in_wc, in_grh,
(struct opa_mad *)in_mad,
@@ -4416,10 +4886,8 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
out_mad_size,
out_mad_pkey_index);
case IB_MGMT_BASE_VERSION:
- return hfi1_process_ib_mad(ibdev, mad_flags, port,
- in_wc, in_grh,
- (const struct ib_mad *)in_mad,
- (struct ib_mad *)out_mad);
+ return hfi1_process_ib_mad(ibdev, mad_flags, port, in_wc,
+ in_grh, in_mad, out_mad);
default:
break;
}
diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 5aa3fd1be653..d6dde762921a 100644
--- a/drivers/infiniband/hw/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*/
+
#ifndef _HFI1_MAD_H
#define _HFI1_MAD_H
@@ -115,7 +74,7 @@ struct opa_mad_notice_attr {
__be32 lid; /* LID where change occurred */
__be32 new_cap_mask; /* new capability mask */
__be16 reserved2;
- __be16 cap_mask;
+ __be16 cap_mask3;
__be16 change_flags; /* low 4 bits only */
} __packed ntc_144;
@@ -165,7 +124,6 @@ struct opa_mad_notice_attr {
} __packed ntc_2048;
};
- u8 class_data[0];
};
#define IB_VLARB_LOWPRI_0_31 1
@@ -180,6 +138,15 @@ struct opa_mad_notice_attr {
#define OPA_VLARB_PREEMPT_MATRIX 3
#define IB_PMA_PORT_COUNTERS_CONG cpu_to_be16(0xFF00)
+#define LINK_SPEED_25G 1
+#define LINK_SPEED_12_5G 2
+#define LINK_WIDTH_DEFAULT 4
+#define DECIMAL_FACTORING 1000
+/*
+ * The default link width is multiplied by 1000
+ * to get accurate value after division.
+ */
+#define FACTOR_LINK_WIDTH (LINK_WIDTH_DEFAULT * DECIMAL_FACTORING)
struct ib_pma_portcounters_cong {
u8 reserved;
@@ -239,7 +206,7 @@ struct opa_hfi1_cong_log_event_internal {
u8 sl;
u8 svc_type;
u32 rlid;
- s64 timestamp; /* wider than 32 bits to detect 32 bit rollover */
+ u64 timestamp; /* wider than 32 bits to detect 32 bit rollover */
};
struct opa_hfi1_cong_log_event {
@@ -427,6 +394,43 @@ struct sc2vlnt {
COUNTER_MASK(1, 3) | \
COUNTER_MASK(1, 4))
-void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port);
-
+void hfi1_event_pkey_change(struct hfi1_devdata *dd, u32 port);
+void hfi1_handle_trap_timer(struct timer_list *t);
+u16 tx_link_width(u16 link_width);
+u64 get_xmit_wait_counters(struct hfi1_pportdata *ppd, u16 link_width,
+ u16 link_speed, int vl);
+/**
+ * get_link_speed - determine whether 12.5G or 25G speed
+ * @link_speed: the speed of active link
+ * @return: Return 2 if link speed identified as 12.5G
+ * or return 1 if link speed is 25G.
+ *
+ * The function indirectly calculate required link speed
+ * value for convert_xmit_counter function. If the link
+ * speed is 25G, the function return as 1 as it is required
+ * by xmit counter conversion formula :-( 25G / link_speed).
+ * This conversion will provide value 1 if current
+ * link speed is 25G or 2 if 12.5G.This is done to avoid
+ * 12.5 float number conversion.
+ */
+static inline u16 get_link_speed(u16 link_speed)
+{
+ return (link_speed == 1) ?
+ LINK_SPEED_12_5G : LINK_SPEED_25G;
+}
+
+/**
+ * convert_xmit_counter - calculate flit times for given xmit counter
+ * value
+ * @xmit_wait_val: current xmit counter value
+ * @link_width: width of active link
+ * @link_speed: speed of active link
+ * @return: return xmit counter value in flit times.
+ */
+static inline u64 convert_xmit_counter(u64 xmit_wait_val, u16 link_width,
+ u16 link_speed)
+{
+ return (xmit_wait_val * 2 * (FACTOR_LINK_WIDTH / link_width)
+ * link_speed) / DECIMAL_FACTORING;
+}
#endif /* _HFI1_MAD_H */
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index ccbf52c8ff6f..67a5c410fb5e 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -1,88 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2020 Cornelis Networks, Inc.
+ * Copyright(c) 2016 - 2017 Intel Corporation.
*/
+
#include <linux/list.h>
#include <linux/rculist.h>
#include <linux/mmu_notifier.h>
#include <linux/interval_tree_generic.h>
+#include <linux/sched/mm.h>
#include "mmu_rb.h"
#include "trace.h"
-struct mmu_rb_handler {
- struct mmu_notifier mn;
- struct rb_root root;
- void *ops_arg;
- spinlock_t lock; /* protect the RB tree */
- struct mmu_rb_ops *ops;
- struct mm_struct *mm;
- struct list_head lru_list;
- struct work_struct del_work;
- struct list_head del_list;
- struct workqueue_struct *wq;
-};
-
static unsigned long mmu_node_start(struct mmu_rb_node *);
static unsigned long mmu_node_last(struct mmu_rb_node *);
-static inline void mmu_notifier_page(struct mmu_notifier *, struct mm_struct *,
- unsigned long);
-static inline void mmu_notifier_range_start(struct mmu_notifier *,
- struct mm_struct *,
- unsigned long, unsigned long);
-static void mmu_notifier_mem_invalidate(struct mmu_notifier *,
- struct mm_struct *,
- unsigned long, unsigned long);
+static int mmu_notifier_range_start(struct mmu_notifier *,
+ const struct mmu_notifier_range *);
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
unsigned long, unsigned long);
-static void do_remove(struct mmu_rb_handler *handler,
- struct list_head *del_list);
+static void release_immediate(struct kref *refcount);
static void handle_remove(struct work_struct *work);
static const struct mmu_notifier_ops mn_opts = {
- .invalidate_page = mmu_notifier_page,
.invalidate_range_start = mmu_notifier_range_start,
};
@@ -99,37 +39,39 @@ static unsigned long mmu_node_last(struct mmu_rb_node *node)
return PAGE_ALIGN(node->addr + node->len) - 1;
}
-int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
- struct mmu_rb_ops *ops,
+int hfi1_mmu_rb_register(void *ops_arg,
+ const struct mmu_rb_ops *ops,
struct workqueue_struct *wq,
struct mmu_rb_handler **handler)
{
- struct mmu_rb_handler *handlr;
+ struct mmu_rb_handler *h;
+ void *free_ptr;
int ret;
- handlr = kmalloc(sizeof(*handlr), GFP_KERNEL);
- if (!handlr)
+ free_ptr = kzalloc(sizeof(*h) + cache_line_size() - 1, GFP_KERNEL);
+ if (!free_ptr)
return -ENOMEM;
- handlr->root = RB_ROOT;
- handlr->ops = ops;
- handlr->ops_arg = ops_arg;
- INIT_HLIST_NODE(&handlr->mn.hlist);
- spin_lock_init(&handlr->lock);
- handlr->mn.ops = &mn_opts;
- handlr->mm = mm;
- INIT_WORK(&handlr->del_work, handle_remove);
- INIT_LIST_HEAD(&handlr->del_list);
- INIT_LIST_HEAD(&handlr->lru_list);
- handlr->wq = wq;
-
- ret = mmu_notifier_register(&handlr->mn, handlr->mm);
+ h = PTR_ALIGN(free_ptr, cache_line_size());
+ h->root = RB_ROOT_CACHED;
+ h->ops = ops;
+ h->ops_arg = ops_arg;
+ INIT_HLIST_NODE(&h->mn.hlist);
+ spin_lock_init(&h->lock);
+ h->mn.ops = &mn_opts;
+ INIT_WORK(&h->del_work, handle_remove);
+ INIT_LIST_HEAD(&h->del_list);
+ INIT_LIST_HEAD(&h->lru_list);
+ h->wq = wq;
+ h->free_ptr = free_ptr;
+
+ ret = mmu_notifier_register(&h->mn, current->mm);
if (ret) {
- kfree(handlr);
+ kfree(free_ptr);
return ret;
}
- *handler = handlr;
+ *handler = h;
return 0;
}
@@ -140,8 +82,11 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
unsigned long flags;
struct list_head del_list;
+ /* Prevent freeing of mm until we are completely finished. */
+ mmgrab(handler->mn.mm);
+
/* Unregister first so we don't get any more notifications. */
- mmu_notifier_unregister(&handler->mn, handler->mm);
+ mmu_notifier_unregister(&handler->mn, handler->mn.mm);
/*
* Make sure the wq delete handler is finished running. It will not
@@ -152,17 +97,24 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
INIT_LIST_HEAD(&del_list);
spin_lock_irqsave(&handler->lock, flags);
- while ((node = rb_first(&handler->root))) {
+ while ((node = rb_first_cached(&handler->root))) {
rbnode = rb_entry(node, struct mmu_rb_node, node);
- rb_erase(node, &handler->root);
+ rb_erase_cached(node, &handler->root);
/* move from LRU list to delete list */
list_move(&rbnode->list, &del_list);
}
spin_unlock_irqrestore(&handler->lock, flags);
- do_remove(handler, &del_list);
+ while (!list_empty(&del_list)) {
+ rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
+ list_del(&rbnode->list);
+ kref_put(&rbnode->refcount, release_immediate);
+ }
+
+ /* Now the mm may be freed. */
+ mmdrop(handler->mn.mm);
- kfree(handler);
+ kfree(handler->free_ptr);
}
int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
@@ -172,35 +124,46 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
unsigned long flags;
int ret = 0;
+ trace_hfi1_mmu_rb_insert(mnode);
+
+ if (current->mm != handler->mn.mm)
+ return -EPERM;
+
spin_lock_irqsave(&handler->lock, flags);
- hfi1_cdbg(MMU, "Inserting node addr 0x%llx, len %u", mnode->addr,
- mnode->len);
node = __mmu_rb_search(handler, mnode->addr, mnode->len);
if (node) {
- ret = -EINVAL;
+ ret = -EEXIST;
goto unlock;
}
__mmu_int_rb_insert(mnode, &handler->root);
- list_add(&mnode->list, &handler->lru_list);
-
- ret = handler->ops->insert(handler->ops_arg, mnode);
- if (ret) {
- __mmu_int_rb_remove(mnode, &handler->root);
- list_del(&mnode->list); /* remove from LRU list */
- }
+ list_add_tail(&mnode->list, &handler->lru_list);
+ mnode->handler = handler;
unlock:
spin_unlock_irqrestore(&handler->lock, flags);
return ret;
}
/* Caller must hold handler lock */
+struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler,
+ unsigned long addr, unsigned long len)
+{
+ struct mmu_rb_node *node;
+
+ trace_hfi1_mmu_rb_search(addr, len);
+ node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1);
+ if (node)
+ list_move_tail(&node->list, &handler->lru_list);
+ return node;
+}
+
+/* Caller must hold handler lock */
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
unsigned long addr,
unsigned long len)
{
struct mmu_rb_node *node = NULL;
- hfi1_cdbg(MMU, "Searching for addr 0x%llx, len %u", addr, len);
+ trace_hfi1_mmu_rb_search(addr, len);
if (!handler->ops->filter) {
node = __mmu_int_rb_iter_first(&handler->root, addr,
(addr + len) - 1);
@@ -217,21 +180,47 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
return node;
}
-struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler,
- unsigned long addr, unsigned long len)
+/*
+ * Must NOT call while holding mnode->handler->lock.
+ * mnode->handler->ops->remove() may sleep and mnode->handler->lock is a
+ * spinlock.
+ */
+static void release_immediate(struct kref *refcount)
+{
+ struct mmu_rb_node *mnode =
+ container_of(refcount, struct mmu_rb_node, refcount);
+ trace_hfi1_mmu_release_node(mnode);
+ mnode->handler->ops->remove(mnode->handler->ops_arg, mnode);
+}
+
+/* Caller must hold mnode->handler->lock */
+static void release_nolock(struct kref *refcount)
{
- struct mmu_rb_node *node;
+ struct mmu_rb_node *mnode =
+ container_of(refcount, struct mmu_rb_node, refcount);
+ list_move(&mnode->list, &mnode->handler->del_list);
+ queue_work(mnode->handler->wq, &mnode->handler->del_work);
+}
+
+/*
+ * struct mmu_rb_node->refcount kref_put() callback.
+ * Adds mmu_rb_node to mmu_rb_node->handler->del_list and queues
+ * handler->del_work on handler->wq.
+ * Does not remove mmu_rb_node from handler->lru_list or handler->rb_root.
+ * Acquires mmu_rb_node->handler->lock; do not call while already holding
+ * handler->lock.
+ */
+void hfi1_mmu_rb_release(struct kref *refcount)
+{
+ struct mmu_rb_node *mnode =
+ container_of(refcount, struct mmu_rb_node, refcount);
+ struct mmu_rb_handler *handler = mnode->handler;
unsigned long flags;
spin_lock_irqsave(&handler->lock, flags);
- node = __mmu_rb_search(handler, addr, len);
- if (node) {
- __mmu_int_rb_remove(node, &handler->root);
- list_del(&node->list); /* remove from LRU list */
- }
+ list_move(&mnode->list, &mnode->handler->del_list);
spin_unlock_irqrestore(&handler->lock, flags);
-
- return node;
+ queue_work(handler->wq, &handler->del_work);
}
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
@@ -241,11 +230,17 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
unsigned long flags;
bool stop = false;
+ if (current->mm != handler->mn.mm)
+ return;
+
INIT_LIST_HEAD(&del_list);
spin_lock_irqsave(&handler->lock, flags);
- list_for_each_entry_safe_reverse(rbnode, ptr, &handler->lru_list,
- list) {
+ list_for_each_entry_safe(rbnode, ptr, &handler->lru_list, list) {
+ /* refcount == 1 implies mmu_rb_handler has only rbnode ref */
+ if (kref_read(&rbnode->refcount) > 1)
+ continue;
+
if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg,
&stop)) {
__mmu_int_rb_remove(rbnode, &handler->root);
@@ -257,99 +252,41 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
}
spin_unlock_irqrestore(&handler->lock, flags);
- while (!list_empty(&del_list)) {
- rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
- list_del(&rbnode->list);
- handler->ops->remove(handler->ops_arg, rbnode);
+ list_for_each_entry_safe(rbnode, ptr, &del_list, list) {
+ trace_hfi1_mmu_rb_evict(rbnode);
+ kref_put(&rbnode->refcount, release_immediate);
}
}
-/*
- * It is up to the caller to ensure that this function does not race with the
- * mmu invalidate notifier which may be calling the users remove callback on
- * 'node'.
- */
-void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
- struct mmu_rb_node *node)
-{
- unsigned long flags;
-
- /* Validity of handler and node pointers has been checked by caller. */
- hfi1_cdbg(MMU, "Removing node addr 0x%llx, len %u", node->addr,
- node->len);
- spin_lock_irqsave(&handler->lock, flags);
- __mmu_int_rb_remove(node, &handler->root);
- list_del(&node->list); /* remove from LRU list */
- spin_unlock_irqrestore(&handler->lock, flags);
-
- handler->ops->remove(handler->ops_arg, node);
-}
-
-static inline void mmu_notifier_page(struct mmu_notifier *mn,
- struct mm_struct *mm, unsigned long addr)
-{
- mmu_notifier_mem_invalidate(mn, mm, addr, addr + PAGE_SIZE);
-}
-
-static inline void mmu_notifier_range_start(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
-{
- mmu_notifier_mem_invalidate(mn, mm, start, end);
-}
-
-static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start, unsigned long end)
+static int mmu_notifier_range_start(struct mmu_notifier *mn,
+ const struct mmu_notifier_range *range)
{
struct mmu_rb_handler *handler =
container_of(mn, struct mmu_rb_handler, mn);
- struct rb_root *root = &handler->root;
+ struct rb_root_cached *root = &handler->root;
struct mmu_rb_node *node, *ptr = NULL;
unsigned long flags;
- bool added = false;
spin_lock_irqsave(&handler->lock, flags);
- for (node = __mmu_int_rb_iter_first(root, start, end - 1);
+ for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1);
node; node = ptr) {
/* Guard against node removal. */
- ptr = __mmu_int_rb_iter_next(node, start, end - 1);
- hfi1_cdbg(MMU, "Invalidating node addr 0x%llx, len %u",
- node->addr, node->len);
- if (handler->ops->invalidate(handler->ops_arg, node)) {
- __mmu_int_rb_remove(node, root);
- /* move from LRU list to delete list */
- list_move(&node->list, &handler->del_list);
- added = true;
- }
+ ptr = __mmu_int_rb_iter_next(node, range->start,
+ range->end - 1);
+ trace_hfi1_mmu_mem_invalidate(node);
+ /* Remove from rb tree and lru_list. */
+ __mmu_int_rb_remove(node, root);
+ list_del_init(&node->list);
+ kref_put(&node->refcount, release_nolock);
}
spin_unlock_irqrestore(&handler->lock, flags);
- if (added)
- queue_work(handler->wq, &handler->del_work);
-}
-
-/*
- * Call the remove function for the given handler and the list. This
- * is expected to be called with a delete list extracted from handler.
- * The caller should not be holding the handler lock.
- */
-static void do_remove(struct mmu_rb_handler *handler,
- struct list_head *del_list)
-{
- struct mmu_rb_node *node;
-
- while (!list_empty(del_list)) {
- node = list_first_entry(del_list, struct mmu_rb_node, list);
- list_del(&node->list);
- handler->ops->remove(handler->ops_arg, node);
- }
+ return 0;
}
/*
* Work queue function to remove all nodes that have been queued up to
- * be removed. The key feature is that mm->mmap_sem is not being held
+ * be removed. The key feature is that mm->mmap_lock is not being held
* and the remove callback can sleep while taking it, if needed.
*/
static void handle_remove(struct work_struct *work)
@@ -359,11 +296,17 @@ static void handle_remove(struct work_struct *work)
del_work);
struct list_head del_list;
unsigned long flags;
+ struct mmu_rb_node *node;
/* remove anything that is queued to get removed */
spin_lock_irqsave(&handler->lock, flags);
list_replace_init(&handler->del_list, &del_list);
spin_unlock_irqrestore(&handler->lock, flags);
- do_remove(handler, &del_list);
+ while (!list_empty(&del_list)) {
+ node = list_first_entry(&del_list, struct mmu_rb_node, list);
+ list_del(&node->list);
+ trace_hfi1_mmu_release_node(node);
+ handler->ops->remove(handler->ops_arg, node);
+ }
}
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h
index 754f6ebf13fb..3fa50dd64db6 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.h
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
@@ -1,49 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
+ * Copyright(c) 2020 Cornelis Networks, Inc.
* Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#ifndef _HFI1_MMU_RB_H
#define _HFI1_MMU_RB_H
@@ -54,34 +14,54 @@ struct mmu_rb_node {
unsigned long len;
unsigned long __last;
struct rb_node node;
+ struct mmu_rb_handler *handler;
struct list_head list;
+ struct kref refcount;
};
-/*
- * NOTE: filter, insert, invalidate, and evict must not sleep. Only remove is
- * allowed to sleep.
- */
+/* filter and evict must not sleep. Only remove is allowed to sleep. */
struct mmu_rb_ops {
bool (*filter)(struct mmu_rb_node *node, unsigned long addr,
unsigned long len);
- int (*insert)(void *ops_arg, struct mmu_rb_node *mnode);
void (*remove)(void *ops_arg, struct mmu_rb_node *mnode);
- int (*invalidate)(void *ops_arg, struct mmu_rb_node *node);
int (*evict)(void *ops_arg, struct mmu_rb_node *mnode,
void *evict_arg, bool *stop);
};
-int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm,
- struct mmu_rb_ops *ops,
+struct mmu_rb_handler {
+ /*
+ * struct mmu_notifier is 56 bytes, and spinlock_t is 4 bytes, so
+ * they fit together in one cache line. mn is relatively rarely
+ * accessed, so co-locating the spinlock with it achieves much of
+ * the cacheline contention reduction of giving the spinlock its own
+ * cacheline without the overhead of doing so.
+ */
+ struct mmu_notifier mn;
+ spinlock_t lock; /* protect the RB tree */
+
+ /* Begin on a new cachline boundary here */
+ struct rb_root_cached root ____cacheline_aligned_in_smp;
+ void *ops_arg;
+ const struct mmu_rb_ops *ops;
+ struct list_head lru_list;
+ struct work_struct del_work;
+ struct list_head del_list;
+ struct workqueue_struct *wq;
+ void *free_ptr;
+};
+
+int hfi1_mmu_rb_register(void *ops_arg,
+ const struct mmu_rb_ops *ops,
struct workqueue_struct *wq,
struct mmu_rb_handler **handler);
void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler);
int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
struct mmu_rb_node *mnode);
+void hfi1_mmu_rb_release(struct kref *refcount);
+
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg);
-void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
- struct mmu_rb_node *mnode);
-struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler,
- unsigned long addr, unsigned long len);
+struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler,
+ unsigned long addr,
+ unsigned long len);
#endif /* _HFI1_MMU_RB_H */
diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c
new file mode 100644
index 000000000000..77d2ece9a9cb
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/msix.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 - 2020 Intel Corporation.
+ */
+
+#include "hfi.h"
+#include "affinity.h"
+#include "sdma.h"
+#include "netdev.h"
+
+/**
+ * msix_initialize() - Calculate, request and configure MSIx IRQs
+ * @dd: valid hfi1 devdata
+ *
+ */
+int msix_initialize(struct hfi1_devdata *dd)
+{
+ u32 total;
+ int ret;
+ struct hfi1_msix_entry *entries;
+
+ /*
+ * MSIx interrupt count:
+ * one for the general, "slow path" interrupt
+ * one per used SDMA engine
+ * one per kernel receive context
+ * one for each VNIC context
+ * ...any new IRQs should be added here.
+ */
+ total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_netdev_contexts;
+
+ if (total >= CCE_NUM_MSIX_VECTORS)
+ return -EINVAL;
+
+ ret = pci_alloc_irq_vectors(dd->pcidev, total, total, PCI_IRQ_MSIX);
+ if (ret < 0) {
+ dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", ret);
+ return ret;
+ }
+
+ entries = kcalloc(total, sizeof(*dd->msix_info.msix_entries),
+ GFP_KERNEL);
+ if (!entries) {
+ pci_free_irq_vectors(dd->pcidev);
+ return -ENOMEM;
+ }
+
+ dd->msix_info.msix_entries = entries;
+ spin_lock_init(&dd->msix_info.msix_lock);
+ bitmap_zero(dd->msix_info.in_use_msix, total);
+ dd->msix_info.max_requested = total;
+ dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
+
+ return 0;
+}
+
+/**
+ * msix_request_irq() - Allocate a free MSIx IRQ
+ * @dd: valid devdata
+ * @arg: context information for the IRQ
+ * @handler: IRQ handler
+ * @thread: IRQ thread handler (could be NULL)
+ * @type: affinty IRQ type
+ * @name: IRQ name
+ *
+ * Allocated an MSIx vector if available, and then create the appropriate
+ * meta data needed to keep track of the pci IRQ request.
+ *
+ * Return:
+ * < 0 Error
+ * >= 0 MSIx vector
+ *
+ */
+static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
+ irq_handler_t handler, irq_handler_t thread,
+ enum irq_type type, const char *name)
+{
+ unsigned long nr;
+ int irq;
+ int ret;
+ struct hfi1_msix_entry *me;
+
+ /* Allocate an MSIx vector */
+ spin_lock(&dd->msix_info.msix_lock);
+ nr = find_first_zero_bit(dd->msix_info.in_use_msix,
+ dd->msix_info.max_requested);
+ if (nr < dd->msix_info.max_requested)
+ __set_bit(nr, dd->msix_info.in_use_msix);
+ spin_unlock(&dd->msix_info.msix_lock);
+
+ if (nr == dd->msix_info.max_requested)
+ return -ENOSPC;
+
+ if (type < IRQ_SDMA || type >= IRQ_OTHER)
+ return -EINVAL;
+
+ irq = pci_irq_vector(dd->pcidev, nr);
+ ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name);
+ if (ret) {
+ dd_dev_err(dd,
+ "%s: request for IRQ %d failed, MSIx %lx, err %d\n",
+ name, irq, nr, ret);
+ spin_lock(&dd->msix_info.msix_lock);
+ __clear_bit(nr, dd->msix_info.in_use_msix);
+ spin_unlock(&dd->msix_info.msix_lock);
+ return ret;
+ }
+
+ /*
+ * assign arg after pci_request_irq call, so it will be
+ * cleaned up
+ */
+ me = &dd->msix_info.msix_entries[nr];
+ me->irq = irq;
+ me->arg = arg;
+ me->type = type;
+
+ /* This is a request, so a failure is not fatal */
+ ret = hfi1_get_irq_affinity(dd, me);
+ if (ret)
+ dd_dev_err(dd, "%s: unable to pin IRQ %d\n", name, ret);
+
+ return nr;
+}
+
+static int msix_request_rcd_irq_common(struct hfi1_ctxtdata *rcd,
+ irq_handler_t handler,
+ irq_handler_t thread,
+ const char *name)
+{
+ int nr = msix_request_irq(rcd->dd, rcd, handler, thread,
+ rcd->is_vnic ? IRQ_NETDEVCTXT : IRQ_RCVCTXT,
+ name);
+ if (nr < 0)
+ return nr;
+
+ /*
+ * Set the interrupt register and mask for this
+ * context's interrupt.
+ */
+ rcd->ireg = (IS_RCVAVAIL_START + rcd->ctxt) / 64;
+ rcd->imask = ((u64)1) << ((IS_RCVAVAIL_START + rcd->ctxt) % 64);
+ rcd->msix_intr = nr;
+ remap_intr(rcd->dd, IS_RCVAVAIL_START + rcd->ctxt, nr);
+
+ return 0;
+}
+
+/**
+ * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
+ * @rcd: valid rcd context
+ *
+ */
+int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+{
+ char name[MAX_NAME_SIZE];
+
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
+ rcd->dd->unit, rcd->ctxt);
+
+ return msix_request_rcd_irq_common(rcd, receive_context_interrupt,
+ receive_context_thread, name);
+}
+
+/**
+ * msix_netdev_request_rcd_irq - Helper function for RCVAVAIL IRQs
+ * for netdev context
+ * @rcd: valid netdev contexti
+ */
+int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+{
+ char name[MAX_NAME_SIZE];
+
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d nd kctxt%d",
+ rcd->dd->unit, rcd->ctxt);
+ return msix_request_rcd_irq_common(rcd, receive_context_interrupt_napi,
+ NULL, name);
+}
+
+/**
+ * msix_request_sdma_irq - Helper for getting SDMA IRQ resources
+ * @sde: valid sdma engine
+ *
+ */
+int msix_request_sdma_irq(struct sdma_engine *sde)
+{
+ int nr;
+ char name[MAX_NAME_SIZE];
+
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
+ sde->dd->unit, sde->this_idx);
+ nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL,
+ IRQ_SDMA, name);
+ if (nr < 0)
+ return nr;
+ sde->msix_intr = nr;
+ remap_sdma_interrupts(sde->dd, sde->this_idx, nr);
+
+ return 0;
+}
+
+/**
+ * msix_request_general_irq - Helper for getting general IRQ
+ * resources
+ * @dd: valid device data
+ */
+int msix_request_general_irq(struct hfi1_devdata *dd)
+{
+ int nr;
+ char name[MAX_NAME_SIZE];
+
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
+ nr = msix_request_irq(dd, dd, general_interrupt, NULL, IRQ_GENERAL,
+ name);
+ if (nr < 0)
+ return nr;
+
+ /* general interrupt must be MSIx vector 0 */
+ if (nr) {
+ msix_free_irq(dd, (u8)nr);
+ dd_dev_err(dd, "Invalid index %d for GENERAL IRQ\n", nr);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * enable_sdma_srcs - Helper to enable SDMA IRQ srcs
+ * @dd: valid devdata structure
+ * @i: index of SDMA engine
+ */
+static void enable_sdma_srcs(struct hfi1_devdata *dd, int i)
+{
+ set_intr_bits(dd, IS_SDMA_START + i, IS_SDMA_START + i, true);
+ set_intr_bits(dd, IS_SDMA_PROGRESS_START + i,
+ IS_SDMA_PROGRESS_START + i, true);
+ set_intr_bits(dd, IS_SDMA_IDLE_START + i, IS_SDMA_IDLE_START + i, true);
+ set_intr_bits(dd, IS_SDMAENG_ERR_START + i, IS_SDMAENG_ERR_START + i,
+ true);
+}
+
+/**
+ * msix_request_irqs() - Allocate all MSIx IRQs
+ * @dd: valid devdata structure
+ *
+ * Helper function to request the used MSIx IRQs.
+ *
+ */
+int msix_request_irqs(struct hfi1_devdata *dd)
+{
+ int i;
+ int ret = msix_request_general_irq(dd);
+
+ if (ret)
+ return ret;
+
+ for (i = 0; i < dd->num_sdma; i++) {
+ struct sdma_engine *sde = &dd->per_sdma[i];
+
+ ret = msix_request_sdma_irq(sde);
+ if (ret)
+ return ret;
+ enable_sdma_srcs(sde->dd, i);
+ }
+
+ for (i = 0; i < dd->n_krcv_queues; i++) {
+ struct hfi1_ctxtdata *rcd = hfi1_rcd_get_by_index_safe(dd, i);
+
+ if (rcd)
+ ret = msix_request_rcd_irq(rcd);
+ hfi1_rcd_put(rcd);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * msix_free_irq() - Free the specified MSIx resources and IRQ
+ * @dd: valid devdata
+ * @msix_intr: MSIx vector to free.
+ *
+ */
+void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr)
+{
+ struct hfi1_msix_entry *me;
+
+ if (msix_intr >= dd->msix_info.max_requested)
+ return;
+
+ me = &dd->msix_info.msix_entries[msix_intr];
+
+ if (!me->arg) /* => no irq, no affinity */
+ return;
+
+ hfi1_put_irq_affinity(dd, me);
+ pci_free_irq(dd->pcidev, msix_intr, me->arg);
+
+ me->arg = NULL;
+
+ spin_lock(&dd->msix_info.msix_lock);
+ __clear_bit(msix_intr, dd->msix_info.in_use_msix);
+ spin_unlock(&dd->msix_info.msix_lock);
+}
+
+/**
+ * msix_clean_up_interrupts - Free all MSIx IRQ resources
+ * @dd: valid device data data structure
+ *
+ * Free the MSIx and associated PCI resources, if they have been allocated.
+ */
+void msix_clean_up_interrupts(struct hfi1_devdata *dd)
+{
+ int i;
+ struct hfi1_msix_entry *me = dd->msix_info.msix_entries;
+
+ /* remove irqs - must happen before disabling/turning off */
+ for (i = 0; i < dd->msix_info.max_requested; i++, me++)
+ msix_free_irq(dd, i);
+
+ /* clean structures */
+ kfree(dd->msix_info.msix_entries);
+ dd->msix_info.msix_entries = NULL;
+ dd->msix_info.max_requested = 0;
+
+ pci_free_irq_vectors(dd->pcidev);
+}
+
+/**
+ * msix_netdev_synchronize_irq - netdev IRQ synchronize
+ * @dd: valid devdata
+ */
+void msix_netdev_synchronize_irq(struct hfi1_devdata *dd)
+{
+ int i;
+ int ctxt_count = hfi1_netdev_ctxt_count(dd);
+
+ for (i = 0; i < ctxt_count; i++) {
+ struct hfi1_ctxtdata *rcd = hfi1_netdev_get_ctxt(dd, i);
+ struct hfi1_msix_entry *me;
+
+ me = &dd->msix_info.msix_entries[rcd->msix_intr];
+
+ synchronize_irq(me->irq);
+ }
+}
diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h
new file mode 100644
index 000000000000..9530ccb0a2ce
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/msix.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 - 2020 Intel Corporation.
+ */
+
+#ifndef _HFI1_MSIX_H
+#define _HFI1_MSIX_H
+
+#include "hfi.h"
+
+/* MSIx interface */
+int msix_initialize(struct hfi1_devdata *dd);
+int msix_request_irqs(struct hfi1_devdata *dd);
+void msix_clean_up_interrupts(struct hfi1_devdata *dd);
+int msix_request_general_irq(struct hfi1_devdata *dd);
+int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd);
+int msix_request_sdma_irq(struct sdma_engine *sde);
+void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
+
+/* Netdev interface */
+void msix_netdev_synchronize_irq(struct hfi1_devdata *dd);
+int msix_netdev_request_rcd_irq(struct hfi1_ctxtdata *rcd);
+
+#endif
diff --git a/drivers/infiniband/hw/hfi1/netdev.h b/drivers/infiniband/hw/hfi1/netdev.h
new file mode 100644
index 000000000000..07c8f77c9181
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/netdev.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+#ifndef HFI1_NETDEV_H
+#define HFI1_NETDEV_H
+
+#include "hfi.h"
+
+#include <linux/netdevice.h>
+#include <linux/xarray.h>
+
+/**
+ * struct hfi1_netdev_rxq - Receive Queue for HFI
+ * Both IPoIB and VNIC netdevices will be working on the rx abstraction.
+ * @napi: napi object
+ * @rx: ptr to netdev_rx
+ * @rcd: ptr to receive context data
+ */
+struct hfi1_netdev_rxq {
+ struct napi_struct napi;
+ struct hfi1_netdev_rx *rx;
+ struct hfi1_ctxtdata *rcd;
+};
+
+/*
+ * Number of netdev contexts used. Ensure it is less than or equal to
+ * max queues supported by VNIC (HFI1_VNIC_MAX_QUEUE).
+ */
+#define HFI1_MAX_NETDEV_CTXTS 8
+
+/* Number of NETDEV RSM entries */
+#define NUM_NETDEV_MAP_ENTRIES HFI1_MAX_NETDEV_CTXTS
+
+/**
+ * struct hfi1_netdev_rx: data required to setup and run HFI netdev.
+ * @rx_napi: the dummy netdevice to support "polling" the receive contexts
+ * @dd: hfi1_devdata
+ * @rxq: pointer to dummy netdev receive queues.
+ * @num_rx_q: number of receive queues
+ * @rmt_index: first free index in RMT Array
+ * @msix_start: first free MSI-X interrupt vector.
+ * @dev_tbl: netdev table for unique identifier VNIC and IPoIb VLANs.
+ * @enabled: atomic counter of netdevs enabling receive queues.
+ * When 0 NAPI will be disabled.
+ * @netdevs: atomic counter of netdevs using dummy netdev.
+ * When 0 receive queues will be freed.
+ */
+struct hfi1_netdev_rx {
+ struct net_device *rx_napi;
+ struct hfi1_devdata *dd;
+ struct hfi1_netdev_rxq *rxq;
+ int num_rx_q;
+ int rmt_start;
+ struct xarray dev_tbl;
+ /* count of enabled napi polls */
+ atomic_t enabled;
+ /* count of netdevs on top */
+ atomic_t netdevs;
+};
+
+static inline
+int hfi1_netdev_ctxt_count(struct hfi1_devdata *dd)
+{
+ return dd->netdev_rx->num_rx_q;
+}
+
+static inline
+struct hfi1_ctxtdata *hfi1_netdev_get_ctxt(struct hfi1_devdata *dd, int ctxt)
+{
+ return dd->netdev_rx->rxq[ctxt].rcd;
+}
+
+static inline
+int hfi1_netdev_get_free_rmt_idx(struct hfi1_devdata *dd)
+{
+ return dd->netdev_rx->rmt_start;
+}
+
+static inline
+void hfi1_netdev_set_free_rmt_idx(struct hfi1_devdata *dd, int rmt_idx)
+{
+ dd->netdev_rx->rmt_start = rmt_idx;
+}
+
+u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
+ struct cpumask *cpu_mask);
+
+void hfi1_netdev_enable_queues(struct hfi1_devdata *dd);
+void hfi1_netdev_disable_queues(struct hfi1_devdata *dd);
+int hfi1_netdev_rx_init(struct hfi1_devdata *dd);
+int hfi1_netdev_rx_destroy(struct hfi1_devdata *dd);
+int hfi1_alloc_rx(struct hfi1_devdata *dd);
+void hfi1_free_rx(struct hfi1_devdata *dd);
+int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data);
+void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id);
+void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id);
+void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id);
+
+/* chip.c */
+int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget);
+
+#endif /* HFI1_NETDEV_H */
diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c
new file mode 100644
index 000000000000..8608044203bb
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/netdev_rx.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2020 Intel Corporation.
+ *
+ */
+
+/*
+ * This file contains HFI1 support for netdev RX functionality
+ */
+
+#include "sdma.h"
+#include "verbs.h"
+#include "netdev.h"
+#include "hfi.h"
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <rdma/ib_verbs.h>
+
+static int hfi1_netdev_setup_ctxt(struct hfi1_netdev_rx *rx,
+ struct hfi1_ctxtdata *uctxt)
+{
+ unsigned int rcvctrl_ops;
+ struct hfi1_devdata *dd = rx->dd;
+ int ret;
+
+ uctxt->rhf_rcv_function_map = netdev_rhf_rcv_functions;
+ uctxt->do_interrupt = &handle_receive_interrupt_napi_sp;
+
+ /* Now allocate the RcvHdr queue and eager buffers. */
+ ret = hfi1_create_rcvhdrq(dd, uctxt);
+ if (ret)
+ goto done;
+
+ ret = hfi1_setup_eagerbufs(uctxt);
+ if (ret)
+ goto done;
+
+ clear_rcvhdrtail(uctxt);
+
+ rcvctrl_ops = HFI1_RCVCTRL_CTXT_DIS;
+ rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_DIS;
+
+ if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
+ rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
+ rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
+ if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
+ rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
+
+ hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
+done:
+ return ret;
+}
+
+static int hfi1_netdev_allocate_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata **ctxt)
+{
+ struct hfi1_ctxtdata *uctxt;
+ int ret;
+
+ if (dd->flags & HFI1_FROZEN)
+ return -EIO;
+
+ ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
+ if (ret < 0) {
+ dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
+ return -ENOMEM;
+ }
+
+ uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
+ HFI1_CAP_KGET(NODROP_RHQ_FULL) |
+ HFI1_CAP_KGET(NODROP_EGR_FULL) |
+ HFI1_CAP_KGET(DMA_RTAIL);
+ /* Netdev contexts are always NO_RDMA_RTAIL */
+ uctxt->fast_handler = handle_receive_interrupt_napi_fp;
+ uctxt->slow_handler = handle_receive_interrupt_napi_sp;
+ hfi1_set_seq_cnt(uctxt, 1);
+ uctxt->is_vnic = true;
+
+ hfi1_stats.sps_ctxts++;
+
+ dd_dev_info(dd, "created netdev context %d\n", uctxt->ctxt);
+ *ctxt = uctxt;
+
+ return 0;
+}
+
+static void hfi1_netdev_deallocate_ctxt(struct hfi1_devdata *dd,
+ struct hfi1_ctxtdata *uctxt)
+{
+ flush_wc();
+
+ /*
+ * Disable receive context and interrupt available, reset all
+ * RcvCtxtCtrl bits to default values.
+ */
+ hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
+ HFI1_RCVCTRL_TIDFLOW_DIS |
+ HFI1_RCVCTRL_INTRAVAIL_DIS |
+ HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
+ HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
+ HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
+
+ if (uctxt->msix_intr != CCE_NUM_MSIX_VECTORS)
+ msix_free_irq(dd, uctxt->msix_intr);
+
+ uctxt->msix_intr = CCE_NUM_MSIX_VECTORS;
+ uctxt->event_flags = 0;
+
+ hfi1_clear_tids(uctxt);
+ hfi1_clear_ctxt_pkey(dd, uctxt);
+
+ hfi1_stats.sps_ctxts--;
+
+ hfi1_free_ctxt(uctxt);
+}
+
+static int hfi1_netdev_allot_ctxt(struct hfi1_netdev_rx *rx,
+ struct hfi1_ctxtdata **ctxt)
+{
+ int rc;
+ struct hfi1_devdata *dd = rx->dd;
+
+ rc = hfi1_netdev_allocate_ctxt(dd, ctxt);
+ if (rc) {
+ dd_dev_err(dd, "netdev ctxt alloc failed %d\n", rc);
+ return rc;
+ }
+
+ rc = hfi1_netdev_setup_ctxt(rx, *ctxt);
+ if (rc) {
+ dd_dev_err(dd, "netdev ctxt setup failed %d\n", rc);
+ hfi1_netdev_deallocate_ctxt(dd, *ctxt);
+ *ctxt = NULL;
+ }
+
+ return rc;
+}
+
+/**
+ * hfi1_num_netdev_contexts - Count of netdev recv contexts to use.
+ * @dd: device on which to allocate netdev contexts
+ * @available_contexts: count of available receive contexts
+ * @cpu_mask: mask of possible cpus to include for contexts
+ *
+ * Return: count of physical cores on a node or the remaining available recv
+ * contexts for netdev recv context usage up to the maximum of
+ * HFI1_MAX_NETDEV_CTXTS.
+ * A value of 0 can be returned when acceleration is explicitly turned off,
+ * a memory allocation error occurs or when there are no available contexts.
+ *
+ */
+u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
+ struct cpumask *cpu_mask)
+{
+ cpumask_var_t node_cpu_mask;
+ unsigned int available_cpus;
+
+ if (!HFI1_CAP_IS_KSET(AIP))
+ return 0;
+
+ /* Always give user contexts priority over netdev contexts */
+ if (available_contexts == 0) {
+ dd_dev_info(dd, "No receive contexts available for netdevs.\n");
+ return 0;
+ }
+
+ if (!zalloc_cpumask_var(&node_cpu_mask, GFP_KERNEL)) {
+ dd_dev_err(dd, "Unable to allocate cpu_mask for netdevs.\n");
+ return 0;
+ }
+
+ cpumask_and(node_cpu_mask, cpu_mask, cpumask_of_node(dd->node));
+
+ available_cpus = cpumask_weight(node_cpu_mask);
+
+ free_cpumask_var(node_cpu_mask);
+
+ return min3(available_cpus, available_contexts,
+ (u32)HFI1_MAX_NETDEV_CTXTS);
+}
+
+static int hfi1_netdev_rxq_init(struct hfi1_netdev_rx *rx)
+{
+ int i;
+ int rc;
+ struct hfi1_devdata *dd = rx->dd;
+ struct net_device *dev = rx->rx_napi;
+
+ rx->num_rx_q = dd->num_netdev_contexts;
+ rx->rxq = kcalloc_node(rx->num_rx_q, sizeof(*rx->rxq),
+ GFP_KERNEL, dd->node);
+
+ if (!rx->rxq) {
+ dd_dev_err(dd, "Unable to allocate netdev queue data\n");
+ return (-ENOMEM);
+ }
+
+ for (i = 0; i < rx->num_rx_q; i++) {
+ struct hfi1_netdev_rxq *rxq = &rx->rxq[i];
+
+ rc = hfi1_netdev_allot_ctxt(rx, &rxq->rcd);
+ if (rc)
+ goto bail_context_irq_failure;
+
+ hfi1_rcd_get(rxq->rcd);
+ rxq->rx = rx;
+ rxq->rcd->napi = &rxq->napi;
+ dd_dev_info(dd, "Setting rcv queue %d napi to context %d\n",
+ i, rxq->rcd->ctxt);
+ /*
+ * Disable BUSY_POLL on this NAPI as this is not supported
+ * right now.
+ */
+ set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state);
+ netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi);
+ rc = msix_netdev_request_rcd_irq(rxq->rcd);
+ if (rc)
+ goto bail_context_irq_failure;
+ }
+
+ return 0;
+
+bail_context_irq_failure:
+ dd_dev_err(dd, "Unable to allot receive context\n");
+ for (; i >= 0; i--) {
+ struct hfi1_netdev_rxq *rxq = &rx->rxq[i];
+
+ if (rxq->rcd) {
+ hfi1_netdev_deallocate_ctxt(dd, rxq->rcd);
+ hfi1_rcd_put(rxq->rcd);
+ rxq->rcd = NULL;
+ }
+ }
+ kfree(rx->rxq);
+ rx->rxq = NULL;
+
+ return rc;
+}
+
+static void hfi1_netdev_rxq_deinit(struct hfi1_netdev_rx *rx)
+{
+ int i;
+ struct hfi1_devdata *dd = rx->dd;
+
+ for (i = 0; i < rx->num_rx_q; i++) {
+ struct hfi1_netdev_rxq *rxq = &rx->rxq[i];
+
+ netif_napi_del(&rxq->napi);
+ hfi1_netdev_deallocate_ctxt(dd, rxq->rcd);
+ hfi1_rcd_put(rxq->rcd);
+ rxq->rcd = NULL;
+ }
+
+ kfree(rx->rxq);
+ rx->rxq = NULL;
+ rx->num_rx_q = 0;
+}
+
+static void enable_queues(struct hfi1_netdev_rx *rx)
+{
+ int i;
+
+ for (i = 0; i < rx->num_rx_q; i++) {
+ struct hfi1_netdev_rxq *rxq = &rx->rxq[i];
+
+ dd_dev_info(rx->dd, "enabling queue %d on context %d\n", i,
+ rxq->rcd->ctxt);
+ napi_enable(&rxq->napi);
+ hfi1_rcvctrl(rx->dd,
+ HFI1_RCVCTRL_CTXT_ENB | HFI1_RCVCTRL_INTRAVAIL_ENB,
+ rxq->rcd);
+ }
+}
+
+static void disable_queues(struct hfi1_netdev_rx *rx)
+{
+ int i;
+
+ msix_netdev_synchronize_irq(rx->dd);
+
+ for (i = 0; i < rx->num_rx_q; i++) {
+ struct hfi1_netdev_rxq *rxq = &rx->rxq[i];
+
+ dd_dev_info(rx->dd, "disabling queue %d on context %d\n", i,
+ rxq->rcd->ctxt);
+
+ /* wait for napi if it was scheduled */
+ hfi1_rcvctrl(rx->dd,
+ HFI1_RCVCTRL_CTXT_DIS | HFI1_RCVCTRL_INTRAVAIL_DIS,
+ rxq->rcd);
+ napi_synchronize(&rxq->napi);
+ napi_disable(&rxq->napi);
+ }
+}
+
+/**
+ * hfi1_netdev_rx_init - Incrememnts netdevs counter. When called first time,
+ * it allocates receive queue data and calls netif_napi_add
+ * for each queue.
+ *
+ * @dd: hfi1 dev data
+ */
+int hfi1_netdev_rx_init(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_rx *rx = dd->netdev_rx;
+ int res;
+
+ if (atomic_fetch_inc(&rx->netdevs))
+ return 0;
+
+ mutex_lock(&hfi1_mutex);
+ res = hfi1_netdev_rxq_init(rx);
+ mutex_unlock(&hfi1_mutex);
+ return res;
+}
+
+/**
+ * hfi1_netdev_rx_destroy - Decrements netdevs counter, when it reaches 0
+ * napi is deleted and receive queses memory is freed.
+ *
+ * @dd: hfi1 dev data
+ */
+int hfi1_netdev_rx_destroy(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_rx *rx = dd->netdev_rx;
+
+ /* destroy the RX queues only if it is the last netdev going away */
+ if (atomic_fetch_add_unless(&rx->netdevs, -1, 0) == 1) {
+ mutex_lock(&hfi1_mutex);
+ hfi1_netdev_rxq_deinit(rx);
+ mutex_unlock(&hfi1_mutex);
+ }
+
+ return 0;
+}
+
+/**
+ * hfi1_alloc_rx - Allocates the rx support structure
+ * @dd: hfi1 dev data
+ *
+ * Allocate the rx structure to support gathering the receive
+ * resources and the dummy netdev.
+ *
+ * Updates dd struct pointer upon success.
+ *
+ * Return: 0 (success) -error on failure
+ *
+ */
+int hfi1_alloc_rx(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_rx *rx;
+
+ dd_dev_info(dd, "allocating rx size %ld\n", sizeof(*rx));
+ rx = kzalloc_node(sizeof(*rx), GFP_KERNEL, dd->node);
+
+ if (!rx)
+ return -ENOMEM;
+ rx->dd = dd;
+ rx->rx_napi = alloc_netdev_dummy(0);
+ if (!rx->rx_napi) {
+ kfree(rx);
+ return -ENOMEM;
+ }
+
+ xa_init(&rx->dev_tbl);
+ atomic_set(&rx->enabled, 0);
+ atomic_set(&rx->netdevs, 0);
+ dd->netdev_rx = rx;
+
+ return 0;
+}
+
+void hfi1_free_rx(struct hfi1_devdata *dd)
+{
+ if (dd->netdev_rx) {
+ dd_dev_info(dd, "hfi1 rx freed\n");
+ free_netdev(dd->netdev_rx->rx_napi);
+ kfree(dd->netdev_rx);
+ dd->netdev_rx = NULL;
+ }
+}
+
+/**
+ * hfi1_netdev_enable_queues - This is napi enable function.
+ * It enables napi objects associated with queues.
+ * When at least one device has called it it increments atomic counter.
+ * Disable function decrements counter and when it is 0,
+ * calls napi_disable for every queue.
+ *
+ * @dd: hfi1 dev data
+ */
+void hfi1_netdev_enable_queues(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_rx *rx;
+
+ if (!dd->netdev_rx)
+ return;
+
+ rx = dd->netdev_rx;
+ if (atomic_fetch_inc(&rx->enabled))
+ return;
+
+ mutex_lock(&hfi1_mutex);
+ enable_queues(rx);
+ mutex_unlock(&hfi1_mutex);
+}
+
+void hfi1_netdev_disable_queues(struct hfi1_devdata *dd)
+{
+ struct hfi1_netdev_rx *rx;
+
+ if (!dd->netdev_rx)
+ return;
+
+ rx = dd->netdev_rx;
+ if (atomic_dec_if_positive(&rx->enabled))
+ return;
+
+ mutex_lock(&hfi1_mutex);
+ disable_queues(rx);
+ mutex_unlock(&hfi1_mutex);
+}
+
+/**
+ * hfi1_netdev_add_data - Registers data with unique identifier
+ * to be requested later this is needed for VNIC and IPoIB VLANs
+ * implementations.
+ * This call is protected by mutex idr_lock.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ * @data: data to be associated with index
+ */
+int hfi1_netdev_add_data(struct hfi1_devdata *dd, int id, void *data)
+{
+ struct hfi1_netdev_rx *rx = dd->netdev_rx;
+
+ return xa_insert(&rx->dev_tbl, id, data, GFP_NOWAIT);
+}
+
+/**
+ * hfi1_netdev_remove_data - Removes data with previously given id.
+ * Returns the reference to removed entry.
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_remove_data(struct hfi1_devdata *dd, int id)
+{
+ struct hfi1_netdev_rx *rx = dd->netdev_rx;
+
+ return xa_erase(&rx->dev_tbl, id);
+}
+
+/**
+ * hfi1_netdev_get_data - Gets data with given id
+ *
+ * @dd: hfi1 dev data
+ * @id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id)
+{
+ struct hfi1_netdev_rx *rx = dd->netdev_rx;
+
+ return xa_load(&rx->dev_tbl, id);
+}
+
+/**
+ * hfi1_netdev_get_first_data - Gets first entry with greater or equal id.
+ *
+ * @dd: hfi1 dev data
+ * @start_id: requested integer id up to INT_MAX
+ */
+void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id)
+{
+ struct hfi1_netdev_rx *rx = dd->netdev_rx;
+ unsigned long index = *start_id;
+ void *ret;
+
+ ret = xa_find(&rx->dev_tbl, &index, UINT_MAX, XA_PRESENT);
+ *start_id = (int)index;
+ return ret;
+}
diff --git a/drivers/infiniband/hw/hfi1/opa_compat.h b/drivers/infiniband/hw/hfi1/opa_compat.h
index 6ef3c1cbdcd7..49f2da677b03 100644
--- a/drivers/infiniband/hw/hfi1/opa_compat.h
+++ b/drivers/infiniband/hw/hfi1/opa_compat.h
@@ -1,52 +1,10 @@
-#ifndef _LINUX_H
-#define _LINUX_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#ifndef _LINUX_H
+#define _LINUX_H
/*
* This header file is for OPA-specific definitions which are
* required by the HFI driver, and which aren't yet in the Linux
@@ -84,7 +42,8 @@ static inline u8 port_states_to_phys_state(struct opa_port_states *ps)
/*
* OPA port physical states
* IB Volume 1, Table 146 PortInfo/IB Volume 2 Section 5.4.2(1) PortPhysState
- * values.
+ * values are the same in OmniPath Architecture. OPA leverages some of the same
+ * concepts as InfiniBand, but has a few other states as well.
*
* When writing, only values 0-3 are valid, other values are ignored.
* When reading, 0 is reserved.
@@ -92,6 +51,8 @@ static inline u8 port_states_to_phys_state(struct opa_port_states *ps)
* Returned by the ibphys_portstate() routine.
*/
enum opa_port_phys_state {
+ /* Values 0-7 have the same meaning in OPA as in InfiniBand. */
+
IB_PORTPHYSSTATE_NOP = 0,
/* 1 is reserved */
IB_PORTPHYSSTATE_POLLING = 2,
@@ -101,9 +62,23 @@ enum opa_port_phys_state {
IB_PORTPHYSSTATE_LINK_ERROR_RECOVERY = 6,
IB_PORTPHYSSTATE_PHY_TEST = 7,
/* 8 is reserved */
+
+ /*
+ * Offline: Port is quiet (transmitters disabled) due to lack of
+ * physical media, unsupported media, or transition between link up
+ * and next link up attempt
+ */
OPA_PORTPHYSSTATE_OFFLINE = 9,
- OPA_PORTPHYSSTATE_GANGED = 10,
+
+ /* 10 is reserved */
+
+ /*
+ * Phy_Test: Specific test patterns are transmitted, and receiver BER
+ * can be monitored. This facilitates signal integrity testing for the
+ * physical layer of the port.
+ */
OPA_PORTPHYSSTATE_TEST = 11,
+
OPA_PORTPHYSSTATE_MAX = 11,
/* values 12-15 are reserved/ignored */
};
diff --git a/drivers/infiniband/hw/hfi1/opfn.c b/drivers/infiniband/hw/hfi1/opfn.c
new file mode 100644
index 000000000000..6e0e3458d202
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/opfn.c
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#include "hfi.h"
+#include "trace.h"
+#include "qp.h"
+#include "opfn.h"
+
+#define IB_BTHE_E BIT(IB_BTHE_E_SHIFT)
+
+#define OPFN_CODE(code) BIT((code) - 1)
+#define OPFN_MASK(code) OPFN_CODE(STL_VERBS_EXTD_##code)
+
+struct hfi1_opfn_type {
+ bool (*request)(struct rvt_qp *qp, u64 *data);
+ bool (*response)(struct rvt_qp *qp, u64 *data);
+ bool (*reply)(struct rvt_qp *qp, u64 data);
+ void (*error)(struct rvt_qp *qp);
+};
+
+static struct hfi1_opfn_type hfi1_opfn_handlers[STL_VERBS_EXTD_MAX] = {
+ [STL_VERBS_EXTD_TID_RDMA] = {
+ .request = tid_rdma_conn_req,
+ .response = tid_rdma_conn_resp,
+ .reply = tid_rdma_conn_reply,
+ .error = tid_rdma_conn_error,
+ },
+};
+
+static struct workqueue_struct *opfn_wq;
+
+static void opfn_schedule_conn_request(struct rvt_qp *qp);
+
+static bool hfi1_opfn_extended(u32 bth1)
+{
+ return !!(bth1 & IB_BTHE_E);
+}
+
+static void opfn_conn_request(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct ib_atomic_wr wr;
+ u16 mask, capcode;
+ struct hfi1_opfn_type *extd;
+ u64 data;
+ unsigned long flags;
+ int ret = 0;
+
+ trace_hfi1_opfn_state_conn_request(qp);
+ spin_lock_irqsave(&priv->opfn.lock, flags);
+ /*
+ * Exit if the extended bit is not set, or if nothing is requested, or
+ * if we have completed all requests, or if a previous request is in
+ * progress
+ */
+ if (!priv->opfn.extended || !priv->opfn.requested ||
+ priv->opfn.requested == priv->opfn.completed || priv->opfn.curr)
+ goto done;
+
+ mask = priv->opfn.requested & ~priv->opfn.completed;
+ capcode = ilog2(mask & ~(mask - 1)) + 1;
+ if (capcode >= STL_VERBS_EXTD_MAX) {
+ priv->opfn.completed |= OPFN_CODE(capcode);
+ goto done;
+ }
+
+ extd = &hfi1_opfn_handlers[capcode];
+ if (!extd || !extd->request || !extd->request(qp, &data)) {
+ /*
+ * Either there is no handler for this capability or the request
+ * packet could not be generated. Either way, mark it as done so
+ * we don't keep attempting to complete it.
+ */
+ priv->opfn.completed |= OPFN_CODE(capcode);
+ goto done;
+ }
+
+ trace_hfi1_opfn_data_conn_request(qp, capcode, data);
+ data = (data & ~0xf) | capcode;
+
+ memset(&wr, 0, sizeof(wr));
+ wr.wr.opcode = IB_WR_OPFN;
+ wr.remote_addr = HFI1_VERBS_E_ATOMIC_VADDR;
+ wr.compare_add = data;
+
+ priv->opfn.curr = capcode; /* A new request is now in progress */
+ /* Drop opfn.lock before calling ib_post_send() */
+ spin_unlock_irqrestore(&priv->opfn.lock, flags);
+
+ ret = ib_post_send(&qp->ibqp, &wr.wr, NULL);
+ if (ret)
+ goto err;
+ trace_hfi1_opfn_state_conn_request(qp);
+ return;
+err:
+ trace_hfi1_msg_opfn_conn_request(qp, "ib_ost_send failed: ret = ",
+ (u64)ret);
+ spin_lock_irqsave(&priv->opfn.lock, flags);
+ /*
+ * In case of an unexpected error return from ib_post_send
+ * clear opfn.curr and reschedule to try again
+ */
+ priv->opfn.curr = STL_VERBS_EXTD_NONE;
+ opfn_schedule_conn_request(qp);
+done:
+ spin_unlock_irqrestore(&priv->opfn.lock, flags);
+}
+
+void opfn_send_conn_request(struct work_struct *work)
+{
+ struct hfi1_opfn_data *od;
+ struct hfi1_qp_priv *qpriv;
+
+ od = container_of(work, struct hfi1_opfn_data, opfn_work);
+ qpriv = container_of(od, struct hfi1_qp_priv, opfn);
+
+ opfn_conn_request(qpriv->owner);
+}
+
+/*
+ * When QP s_lock is held in the caller, the OPFN request must be scheduled
+ * to a different workqueue to avoid double locking QP s_lock in call to
+ * ib_post_send in opfn_conn_request
+ */
+static void opfn_schedule_conn_request(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ trace_hfi1_opfn_state_sched_conn_request(qp);
+ queue_work(opfn_wq, &priv->opfn.opfn_work);
+}
+
+void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
+ struct ib_atomic_eth *ateth)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ u64 data = be64_to_cpu(ateth->compare_data);
+ struct hfi1_opfn_type *extd;
+ u8 capcode;
+ unsigned long flags;
+
+ trace_hfi1_opfn_state_conn_response(qp);
+ capcode = data & 0xf;
+ trace_hfi1_opfn_data_conn_response(qp, capcode, data);
+ if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
+ return;
+
+ extd = &hfi1_opfn_handlers[capcode];
+
+ if (!extd || !extd->response) {
+ e->atomic_data = capcode;
+ return;
+ }
+
+ spin_lock_irqsave(&priv->opfn.lock, flags);
+ if (priv->opfn.completed & OPFN_CODE(capcode)) {
+ /*
+ * We are receiving a request for a feature that has already
+ * been negotiated. This may mean that the other side has reset
+ */
+ priv->opfn.completed &= ~OPFN_CODE(capcode);
+ if (extd->error)
+ extd->error(qp);
+ }
+
+ if (extd->response(qp, &data))
+ priv->opfn.completed |= OPFN_CODE(capcode);
+ e->atomic_data = (data & ~0xf) | capcode;
+ trace_hfi1_opfn_state_conn_response(qp);
+ spin_unlock_irqrestore(&priv->opfn.lock, flags);
+}
+
+void opfn_conn_reply(struct rvt_qp *qp, u64 data)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct hfi1_opfn_type *extd;
+ u8 capcode;
+ unsigned long flags;
+
+ trace_hfi1_opfn_state_conn_reply(qp);
+ capcode = data & 0xf;
+ trace_hfi1_opfn_data_conn_reply(qp, capcode, data);
+ if (!capcode || capcode >= STL_VERBS_EXTD_MAX)
+ return;
+
+ spin_lock_irqsave(&priv->opfn.lock, flags);
+ /*
+ * Either there is no previous request or the reply is not for the
+ * current request
+ */
+ if (!priv->opfn.curr || capcode != priv->opfn.curr)
+ goto done;
+
+ extd = &hfi1_opfn_handlers[capcode];
+
+ if (!extd || !extd->reply)
+ goto clear;
+
+ if (extd->reply(qp, data))
+ priv->opfn.completed |= OPFN_CODE(capcode);
+clear:
+ /*
+ * Clear opfn.curr to indicate that the previous request is no longer in
+ * progress
+ */
+ priv->opfn.curr = STL_VERBS_EXTD_NONE;
+ trace_hfi1_opfn_state_conn_reply(qp);
+done:
+ spin_unlock_irqrestore(&priv->opfn.lock, flags);
+}
+
+void opfn_conn_error(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct hfi1_opfn_type *extd = NULL;
+ unsigned long flags;
+ u16 capcode;
+
+ trace_hfi1_opfn_state_conn_error(qp);
+ trace_hfi1_msg_opfn_conn_error(qp, "error. qp state ", (u64)qp->state);
+ /*
+ * The QP has gone into the Error state. We have to invalidate all
+ * negotiated feature, including the one in progress (if any). The RC
+ * QP handling will clean the WQE for the connection request.
+ */
+ spin_lock_irqsave(&priv->opfn.lock, flags);
+ while (priv->opfn.completed) {
+ capcode = priv->opfn.completed & ~(priv->opfn.completed - 1);
+ extd = &hfi1_opfn_handlers[ilog2(capcode) + 1];
+ if (extd->error)
+ extd->error(qp);
+ priv->opfn.completed &= ~OPFN_CODE(capcode);
+ }
+ priv->opfn.extended = 0;
+ priv->opfn.requested = 0;
+ priv->opfn.curr = STL_VERBS_EXTD_NONE;
+ spin_unlock_irqrestore(&priv->opfn.lock, flags);
+}
+
+void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask)
+{
+ struct ib_qp *ibqp = &qp->ibqp;
+ struct hfi1_qp_priv *priv = qp->priv;
+ unsigned long flags;
+
+ if (attr_mask & IB_QP_RETRY_CNT)
+ priv->s_retry = attr->retry_cnt;
+
+ spin_lock_irqsave(&priv->opfn.lock, flags);
+ if (ibqp->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
+ struct tid_rdma_params *local = &priv->tid_rdma.local;
+
+ if (attr_mask & IB_QP_TIMEOUT)
+ priv->tid_retry_timeout_jiffies = qp->timeout_jiffies;
+ if (qp->pmtu == enum_to_mtu(OPA_MTU_4096) ||
+ qp->pmtu == enum_to_mtu(OPA_MTU_8192)) {
+ tid_rdma_opfn_init(qp, local);
+ /*
+ * We only want to set the OPFN requested bit when the
+ * QP transitions to RTS.
+ */
+ if (attr_mask & IB_QP_STATE &&
+ attr->qp_state == IB_QPS_RTS) {
+ priv->opfn.requested |= OPFN_MASK(TID_RDMA);
+ /*
+ * If the QP is transitioning to RTS and the
+ * opfn.completed for TID RDMA has already been
+ * set, the QP is being moved *back* into RTS.
+ * We can now renegotiate the TID RDMA
+ * parameters.
+ */
+ if (priv->opfn.completed &
+ OPFN_MASK(TID_RDMA)) {
+ priv->opfn.completed &=
+ ~OPFN_MASK(TID_RDMA);
+ /*
+ * Since the opfn.completed bit was
+ * already set, it is safe to assume
+ * that the opfn.extended is also set.
+ */
+ opfn_schedule_conn_request(qp);
+ }
+ }
+ } else {
+ memset(local, 0, sizeof(*local));
+ }
+ }
+ spin_unlock_irqrestore(&priv->opfn.lock, flags);
+}
+
+void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ if (!priv->opfn.extended && hfi1_opfn_extended(bth1) &&
+ HFI1_CAP_IS_KSET(OPFN)) {
+ priv->opfn.extended = 1;
+ if (qp->state == IB_QPS_RTS)
+ opfn_conn_request(qp);
+ }
+}
+
+int opfn_init(void)
+{
+ opfn_wq = alloc_workqueue("hfi_opfn",
+ WQ_SYSFS | WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM |
+ WQ_PERCPU,
+ HFI1_MAX_ACTIVE_WORKQUEUE_ENTRIES);
+ if (!opfn_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void opfn_exit(void)
+{
+ if (opfn_wq) {
+ destroy_workqueue(opfn_wq);
+ opfn_wq = NULL;
+ }
+}
diff --git a/drivers/infiniband/hw/hfi1/opfn.h b/drivers/infiniband/hw/hfi1/opfn.h
new file mode 100644
index 000000000000..62f93c1dc082
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/opfn.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#ifndef _HFI1_OPFN_H
+#define _HFI1_OPFN_H
+
+/**
+ * DOC: Omni Path Feature Negotion (OPFN)
+ *
+ * OPFN is a discovery protocol for Intel Omni-Path fabric that
+ * allows two RC QPs to negotiate a common feature that both QPs
+ * can support. Currently, the only OPA feature that OPFN
+ * supports is TID RDMA.
+ *
+ * Architecture
+ *
+ * OPFN involves the communication between two QPs on the HFI
+ * level on an Omni-Path fabric, and ULPs have no knowledge of
+ * OPFN at all.
+ *
+ * Implementation
+ *
+ * OPFN extends the existing IB RC protocol with the following
+ * changes:
+ * -- Uses Bit 24 (reserved) of DWORD 1 of Base Transport
+ * Header (BTH1) to indicate that the RC QP supports OPFN;
+ * -- Uses a combination of RC COMPARE_SWAP opcode (0x13) and
+ * the address U64_MAX (0xFFFFFFFFFFFFFFFF) as an OPFN
+ * request; The 64-bit data carried with the request/response
+ * contains the parameters for negotiation and will be
+ * defined in tid_rdma.c file;
+ * -- Defines IB_WR_RESERVED3 as IB_WR_OPFN.
+ *
+ * The OPFN communication will be triggered when an RC QP
+ * receives a request with Bit 24 of BTH1 set. The responder QP
+ * will then post send an OPFN request with its local
+ * parameters, which will be sent to the requester QP once all
+ * existing requests on the responder QP side have been sent.
+ * Once the requester QP receives the OPFN request, it will
+ * keep a copy of the responder QP's parameters, and return a
+ * response packet with its own local parameters. The responder
+ * QP receives the response packet and keeps a copy of the requester
+ * QP's parameters. After this exchange, each side has the parameters
+ * for both sides and therefore can select the right parameters
+ * for future transactions
+ */
+
+#include <linux/workqueue.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/rdmavt_qp.h>
+
+/* STL Verbs Extended */
+#define IB_BTHE_E_SHIFT 24
+#define HFI1_VERBS_E_ATOMIC_VADDR U64_MAX
+
+enum hfi1_opfn_codes {
+ STL_VERBS_EXTD_NONE = 0,
+ STL_VERBS_EXTD_TID_RDMA,
+ STL_VERBS_EXTD_MAX
+};
+
+struct hfi1_opfn_data {
+ u8 extended;
+ u16 requested;
+ u16 completed;
+ enum hfi1_opfn_codes curr;
+ /* serialize opfn function calls */
+ spinlock_t lock;
+ struct work_struct opfn_work;
+};
+
+/* WR opcode for OPFN */
+#define IB_WR_OPFN IB_WR_RESERVED3
+
+void opfn_send_conn_request(struct work_struct *work);
+void opfn_conn_response(struct rvt_qp *qp, struct rvt_ack_entry *e,
+ struct ib_atomic_eth *ateth);
+void opfn_conn_reply(struct rvt_qp *qp, u64 data);
+void opfn_conn_error(struct rvt_qp *qp);
+void opfn_qp_init(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask);
+void opfn_trigger_conn_request(struct rvt_qp *qp, u32 bth1);
+int opfn_init(void);
+void opfn_exit(void);
+
+#endif /* _HFI1_OPFN_H */
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index ebd941fc8a92..7133964749f8 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -1,84 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2019 Intel Corporation.
*/
+#include <linux/bitfield.h>
#include <linux/pci.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/vmalloc.h>
-#include <linux/aer.h>
#include <linux/module.h>
#include "hfi.h"
#include "chip_registers.h"
#include "aspm.h"
-/* link speed vector for Gen3 speed - not in Linux headers */
-#define GEN1_SPEED_VECTOR 0x1
-#define GEN2_SPEED_VECTOR 0x2
-#define GEN3_SPEED_VECTOR 0x3
-
/*
* This file contains PCIe utility routines.
*/
/*
- * Code to adjust PCIe capabilities.
- */
-static void tune_pcie_caps(struct hfi1_devdata *);
-
-/*
* Do all the common PCIe setup and initialization.
- * devdata is not yet allocated, and is not allocated until after this
- * routine returns success. Therefore dd_dev_err() can't be used for error
- * printing.
*/
-int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
+int hfi1_pcie_init(struct hfi1_devdata *dd)
{
int ret;
+ struct pci_dev *pdev = dd->pcidev;
ret = pci_enable_device(pdev);
if (ret) {
@@ -94,48 +40,35 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
* about that, it appears. If the original BAR was retained
* in the kernel data structures, this may be OK.
*/
- hfi1_early_err(&pdev->dev, "pci enable failed: error %d\n",
- -ret);
- goto done;
+ dd_dev_err(dd, "pci enable failed: error %d\n", -ret);
+ return ret;
}
ret = pci_request_regions(pdev, DRIVER_NAME);
if (ret) {
- hfi1_early_err(&pdev->dev,
- "pci_request_regions fails: err %d\n", -ret);
+ dd_dev_err(dd, "pci_request_regions fails: err %d\n", -ret);
goto bail;
}
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (ret) {
/*
* If the 64 bit setup fails, try 32 bit. Some systems
* do not setup 64 bit maps on systems with 2GB or less
* memory installed.
*/
- ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (ret) {
- hfi1_early_err(&pdev->dev,
- "Unable to set DMA mask: %d\n", ret);
+ dd_dev_err(dd, "Unable to set DMA mask: %d\n", ret);
goto bail;
}
- ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- } else {
- ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
- }
- if (ret) {
- hfi1_early_err(&pdev->dev,
- "Unable to set DMA consistent mask: %d\n", ret);
- goto bail;
}
pci_set_master(pdev);
- (void)pci_enable_pcie_error_reporting(pdev);
- goto done;
+ return 0;
bail:
hfi1_pcie_cleanup(pdev);
-done:
return ret;
}
@@ -161,9 +94,8 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
{
unsigned long len;
resource_size_t addr;
-
- dd->pcidev = pdev;
- pci_set_drvdata(pdev, dd);
+ int ret = 0;
+ u32 rcv_array_count;
addr = pci_resource_start(pdev, 0);
len = pci_resource_len(pdev, 0);
@@ -179,47 +111,62 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
return -EINVAL;
}
- dd->kregbase = ioremap_nocache(addr, TXE_PIO_SEND);
- if (!dd->kregbase)
+ dd->kregbase1 = ioremap(addr, RCV_ARRAY);
+ if (!dd->kregbase1) {
+ dd_dev_err(dd, "UC mapping of kregbase1 failed\n");
return -ENOMEM;
+ }
+ dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY);
+
+ /* verify that reads actually work, save revision for reset check */
+ dd->revision = readq(dd->kregbase1 + CCE_REVISION);
+ if (dd->revision == ~(u64)0) {
+ dd_dev_err(dd, "Cannot read chip CSRs\n");
+ goto nomem;
+ }
+
+ rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT);
+ dd_dev_info(dd, "RcvArray count: %u\n", rcv_array_count);
+ dd->base2_start = RCV_ARRAY + rcv_array_count * 8;
+
+ dd->kregbase2 = ioremap(
+ addr + dd->base2_start,
+ TXE_PIO_SEND - dd->base2_start);
+ if (!dd->kregbase2) {
+ dd_dev_err(dd, "UC mapping of kregbase2 failed\n");
+ goto nomem;
+ }
+ dd_dev_info(dd, "UC base2: %p for %x\n", dd->kregbase2,
+ TXE_PIO_SEND - dd->base2_start);
dd->piobase = ioremap_wc(addr + TXE_PIO_SEND, TXE_PIO_SIZE);
if (!dd->piobase) {
- iounmap(dd->kregbase);
- return -ENOMEM;
+ dd_dev_err(dd, "WC mapping of send buffers failed\n");
+ goto nomem;
}
+ dd_dev_info(dd, "WC piobase: %p for %x\n", dd->piobase, TXE_PIO_SIZE);
- dd->flags |= HFI1_PRESENT; /* now register routines work */
-
- dd->kregend = dd->kregbase + TXE_PIO_SEND;
dd->physaddr = addr; /* used for io_remap, etc. */
/*
- * Re-map the chip's RcvArray as write-combining to allow us
+ * Map the chip's RcvArray as write-combining to allow us
* to write an entire cacheline worth of entries in one shot.
- * If this re-map fails, just continue - the RcvArray programming
- * function will handle both cases.
*/
- dd->chip_rcv_array_count = read_csr(dd, RCV_ARRAY_CNT);
dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY,
- dd->chip_rcv_array_count * 8);
- dd_dev_info(dd, "WC Remapped RcvArray: %p\n", dd->rcvarray_wc);
- /*
- * Save BARs and command to rewrite after device reset.
- */
- dd->pcibar0 = addr;
- dd->pcibar1 = addr >> 32;
- pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom);
- pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command);
- pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl);
- pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL, &dd->pcie_lnkctl);
- pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2,
- &dd->pcie_devctl2);
- pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0);
- pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, &dd->pci_lnkctl3);
- pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
+ rcv_array_count * 8);
+ if (!dd->rcvarray_wc) {
+ dd_dev_err(dd, "WC mapping of receive array failed\n");
+ goto nomem;
+ }
+ dd_dev_info(dd, "WC RcvArray: %p for %x\n",
+ dd->rcvarray_wc, rcv_array_count * 8);
+ dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */
return 0;
+nomem:
+ ret = -ENOMEM;
+ hfi1_pcie_ddcleanup(dd);
+ return ret;
}
/*
@@ -229,89 +176,19 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
*/
void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
{
- u64 __iomem *base = (void __iomem *)dd->kregbase;
-
dd->flags &= ~HFI1_PRESENT;
- dd->kregbase = NULL;
- iounmap(base);
+ if (dd->kregbase1)
+ iounmap(dd->kregbase1);
+ dd->kregbase1 = NULL;
+ if (dd->kregbase2)
+ iounmap(dd->kregbase2);
+ dd->kregbase2 = NULL;
if (dd->rcvarray_wc)
iounmap(dd->rcvarray_wc);
+ dd->rcvarray_wc = NULL;
if (dd->piobase)
iounmap(dd->piobase);
-}
-
-/*
- * Do a Function Level Reset (FLR) on the device.
- * Based on static function drivers/pci/pci.c:pcie_flr().
- */
-void hfi1_pcie_flr(struct hfi1_devdata *dd)
-{
- int i;
- u16 status;
-
- /* no need to check for the capability - we know the device has it */
-
- /* wait for Transaction Pending bit to clear, at most a few ms */
- for (i = 0; i < 4; i++) {
- if (i)
- msleep((1 << (i - 1)) * 100);
-
- pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVSTA, &status);
- if (!(status & PCI_EXP_DEVSTA_TRPND))
- goto clear;
- }
-
- dd_dev_err(dd, "Transaction Pending bit is not clearing, proceeding with reset anyway\n");
-
-clear:
- pcie_capability_set_word(dd->pcidev, PCI_EXP_DEVCTL,
- PCI_EXP_DEVCTL_BCR_FLR);
- /* PCIe spec requires the function to be back within 100ms */
- msleep(100);
-}
-
-static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt,
- struct hfi1_msix_entry *hfi1_msix_entry)
-{
- int ret;
- int nvec = *msixcnt;
- struct msix_entry *msix_entry;
- int i;
-
- /*
- * We can't pass hfi1_msix_entry array to msix_setup
- * so use a dummy msix_entry array and copy the allocated
- * irq back to the hfi1_msix_entry array.
- */
- msix_entry = kmalloc_array(nvec, sizeof(*msix_entry), GFP_KERNEL);
- if (!msix_entry) {
- ret = -ENOMEM;
- goto do_intx;
- }
-
- for (i = 0; i < nvec; i++)
- msix_entry[i] = hfi1_msix_entry[i].msix;
-
- ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec);
- if (ret < 0)
- goto free_msix_entry;
- nvec = ret;
-
- for (i = 0; i < nvec; i++)
- hfi1_msix_entry[i].msix = msix_entry[i];
-
- kfree(msix_entry);
- *msixcnt = nvec;
- return;
-
-free_msix_entry:
- kfree(msix_entry);
-
-do_intx:
- dd_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, falling back to INTx\n",
- nvec, ret);
- *msixcnt = 0;
- hfi1_enable_intx(dd->pcidev);
+ dd->piobase = NULL;
}
/* return the PCIe link speed from the given link status */
@@ -327,26 +204,26 @@ static u32 extract_speed(u16 linkstat)
case PCI_EXP_LNKSTA_CLS_5_0GB:
speed = 5000; /* Gen 2, 5GHz */
break;
- case GEN3_SPEED_VECTOR:
+ case PCI_EXP_LNKSTA_CLS_8_0GB:
speed = 8000; /* Gen 3, 8GHz */
break;
}
return speed;
}
-/* return the PCIe link speed from the given link status */
-static u32 extract_width(u16 linkstat)
-{
- return (linkstat & PCI_EXP_LNKSTA_NLW) >> PCI_EXP_LNKSTA_NLW_SHIFT;
-}
-
/* read the link status and set dd->{lbus_width,lbus_speed,lbus_info} */
static void update_lbus_info(struct hfi1_devdata *dd)
{
u16 linkstat;
+ int ret;
- pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat);
- dd->lbus_width = extract_width(linkstat);
+ ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat);
+ if (ret) {
+ dd_dev_err(dd, "Unable to read from PCI config\n");
+ return;
+ }
+
+ dd->lbus_width = FIELD_GET(PCI_EXP_LNKSTA_NLW, linkstat);
dd->lbus_speed = extract_speed(linkstat);
snprintf(dd->lbus_info, sizeof(dd->lbus_info),
"PCIe,%uMHz,x%u", dd->lbus_speed, dd->lbus_width);
@@ -360,6 +237,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
{
u32 linkcap;
struct pci_dev *parent = dd->pcidev->bus->self;
+ int ret;
if (!pci_is_pcie(dd->pcidev)) {
dd_dev_err(dd, "Can't find PCI Express capability!\n");
@@ -369,8 +247,13 @@ int pcie_speeds(struct hfi1_devdata *dd)
/* find if our max speed is Gen3 and parent supports Gen3 speeds */
dd->link_gen3_capable = 1;
- pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
- if ((linkcap & PCI_EXP_LNKCAP_SLS) != GEN3_SPEED_VECTOR) {
+ ret = pcie_capability_read_dword(dd->pcidev, PCI_EXP_LNKCAP, &linkcap);
+ if (ret) {
+ dd_dev_err(dd, "Unable to read from PCI config\n");
+ return pcibios_err_to_errno(ret);
+ }
+
+ if ((linkcap & PCI_EXP_LNKCAP_SLS) != PCI_EXP_LNKCAP_SLS_8_0GB) {
dd_dev_info(dd,
"This HFI is not Gen3 capable, max speed 0x%x, need 0x3\n",
linkcap & PCI_EXP_LNKCAP_SLS);
@@ -380,7 +263,9 @@ int pcie_speeds(struct hfi1_devdata *dd)
/*
* bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
*/
- if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
+ if (parent &&
+ (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT ||
+ dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) {
dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
dd->link_gen3_capable = 0;
}
@@ -394,49 +279,121 @@ int pcie_speeds(struct hfi1_devdata *dd)
}
/*
- * Returns in *nent:
- * - actual number of interrupts allocated
- * - 0 if fell back to INTx.
+ * Restore command and BARs after a reset has wiped them out
+ *
+ * Returns 0 on success, otherwise a negative error value
*/
-void request_msix(struct hfi1_devdata *dd, u32 *nent,
- struct hfi1_msix_entry *entry)
+int restore_pci_variables(struct hfi1_devdata *dd)
{
- int pos;
+ int ret;
- pos = dd->pcidev->msix_cap;
- if (*nent && pos) {
- msix_setup(dd, pos, nent, entry);
- /* did it, either MSI-X or INTx */
- } else {
- *nent = 0;
- hfi1_enable_intx(dd->pcidev);
+ ret = pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
+ if (ret)
+ goto error;
+
+ ret = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
+ dd->pcibar0);
+ if (ret)
+ goto error;
+
+ ret = pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
+ dd->pcibar1);
+ if (ret)
+ goto error;
+
+ ret = pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom);
+ if (ret)
+ goto error;
+
+ ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL,
+ dd->pcie_devctl);
+ if (ret)
+ goto error;
+
+ ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL,
+ dd->pcie_lnkctl);
+ if (ret)
+ goto error;
+
+ ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2,
+ dd->pcie_devctl2);
+ if (ret)
+ goto error;
+
+ ret = pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0);
+ if (ret)
+ goto error;
+
+ if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
+ ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2,
+ dd->pci_tph2);
+ if (ret)
+ goto error;
}
+ return 0;
- tune_pcie_caps(dd);
+error:
+ dd_dev_err(dd, "Unable to write to PCI config\n");
+ return pcibios_err_to_errno(ret);
}
-void hfi1_enable_intx(struct pci_dev *pdev)
+/*
+ * Save BARs and command to rewrite after device reset
+ *
+ * Returns 0 on success, otherwise a negative error value
+ */
+int save_pci_variables(struct hfi1_devdata *dd)
{
- /* first, turn on INTx */
- pci_intx(pdev, 1);
- /* then turn off MSI-X */
- pci_disable_msix(pdev);
-}
+ int ret;
-/* restore command and BARs after a reset has wiped them out */
-void restore_pci_variables(struct hfi1_devdata *dd)
-{
- pci_write_config_word(dd->pcidev, PCI_COMMAND, dd->pci_command);
- pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, dd->pcibar0);
- pci_write_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, dd->pcibar1);
- pci_write_config_dword(dd->pcidev, PCI_ROM_ADDRESS, dd->pci_rom);
- pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, dd->pcie_devctl);
- pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL, dd->pcie_lnkctl);
- pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL2,
- dd->pcie_devctl2);
- pci_write_config_dword(dd->pcidev, PCI_CFG_MSIX0, dd->pci_msix0);
- pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1, dd->pci_lnkctl3);
- pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
+ ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0,
+ &dd->pcibar0);
+ if (ret)
+ goto error;
+
+ ret = pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1,
+ &dd->pcibar1);
+ if (ret)
+ goto error;
+
+ ret = pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom);
+ if (ret)
+ goto error;
+
+ ret = pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command);
+ if (ret)
+ goto error;
+
+ ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL,
+ &dd->pcie_devctl);
+ if (ret)
+ goto error;
+
+ ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL,
+ &dd->pcie_lnkctl);
+ if (ret)
+ goto error;
+
+ ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL2,
+ &dd->pcie_devctl2);
+ if (ret)
+ goto error;
+
+ ret = pci_read_config_dword(dd->pcidev, PCI_CFG_MSIX0, &dd->pci_msix0);
+ if (ret)
+ goto error;
+
+ if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
+ ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2,
+ &dd->pci_tph2);
+ if (ret)
+ goto error;
+ }
+ return 0;
+
+error:
+ dd_dev_err(dd, "Unable to read from PCI config\n");
+ return pcibios_err_to_errno(ret);
}
/*
@@ -444,28 +401,33 @@ void restore_pci_variables(struct hfi1_devdata *dd)
* Check and optionally adjust them to maximize our throughput.
*/
static int hfi1_pcie_caps;
-module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO);
+module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444);
MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
-uint aspm_mode = ASPM_MODE_DISABLED;
-module_param_named(aspm, aspm_mode, uint, S_IRUGO);
-MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
-
-static void tune_pcie_caps(struct hfi1_devdata *dd)
+/**
+ * tune_pcie_caps() - Code to adjust PCIe capabilities.
+ * @dd: Valid device data structure
+ *
+ */
+void tune_pcie_caps(struct hfi1_devdata *dd)
{
struct pci_dev *parent;
u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
u16 rc_mrrs, ep_mrrs, max_mrrs, ectl;
+ int ret;
/*
* Turn on extended tags in DevCtl in case the BIOS has turned it off
* to improve WFR SDMA bandwidth
*/
- pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl);
- if (!(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
+ ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl);
+ if ((!ret) && !(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
dd_dev_info(dd, "Enabling PCIe extended tags\n");
ectl |= PCI_EXP_DEVCTL_EXT_TAG;
- pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl);
+ ret = pcie_capability_write_word(dd->pcidev,
+ PCI_EXP_DEVCTL, ectl);
+ if (ret)
+ dd_dev_info(dd, "Unable to write to PCI config\n");
}
/* Find out supported and configured values for parent (root) */
parent = dd->pcidev->bus->self;
@@ -473,15 +435,22 @@ static void tune_pcie_caps(struct hfi1_devdata *dd)
* The driver cannot perform the tuning if it does not have
* access to the upstream component.
*/
- if (!parent)
+ if (!parent) {
+ dd_dev_info(dd, "Parent not found\n");
return;
+ }
if (!pci_is_root_bus(parent->bus)) {
dd_dev_info(dd, "Parent not root\n");
return;
}
-
- if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev))
+ if (!pci_is_pcie(parent)) {
+ dd_dev_info(dd, "Parent is not PCI Express capable\n");
+ return;
+ }
+ if (!pci_is_pcie(dd->pcidev)) {
+ dd_dev_info(dd, "PCI device is not PCI Express capable\n");
return;
+ }
rc_mpss = parent->pcie_mpss;
rc_mps = ffs(pcie_get_mps(parent)) - 8;
/* Find out supported and configured values for endpoint (us) */
@@ -583,7 +552,7 @@ pci_mmio_enabled(struct pci_dev *pdev)
if (words == ~0ULL)
ret = PCI_ERS_RESULT_NEED_RESET;
dd_dev_info(dd,
- "HFI1 mmio_enabled function called, read wordscntr %Lx, returning %d\n",
+ "HFI1 mmio_enabled function called, read wordscntr %llx, returning %d\n",
words, ret);
}
return ret;
@@ -604,7 +573,6 @@ pci_resume(struct pci_dev *pdev)
struct hfi1_devdata *dd = pci_get_drvdata(pdev);
dd_dev_info(dd, "HFI1 resume function called\n");
- pci_cleanup_aer_uncorrect_error_status(pdev);
/*
* Running jobs will fail, since it's asynchronous
* unlike sysfs-requested reset. Better than
@@ -646,9 +614,6 @@ const struct pci_error_handlers hfi1_pci_err_handler = {
/* gasket block secondary bus reset delay */
#define SBR_DELAY_US 200000 /* 200ms */
-/* mask for PCIe capability register lnkctl2 target link speed */
-#define LNKCTL2_TARGET_LINK_SPEED_MASK 0xf
-
static uint pcie_target = 3;
module_param(pcie_target, uint, S_IRUGO);
MODULE_PARM_DESC(pcie_target, "PCIe target speed (0 skip, 1-3 Gen1-3)");
@@ -663,12 +628,12 @@ MODULE_PARM_DESC(pcie_retry, "Driver will try this many times to reach requested
#define UNSET_PSET 255
#define DEFAULT_DISCRETE_PSET 2 /* discrete HFI */
-#define DEFAULT_MCP_PSET 4 /* MCP HFI */
+#define DEFAULT_MCP_PSET 6 /* MCP HFI */
static uint pcie_pset = UNSET_PSET;
module_param(pcie_pset, uint, S_IRUGO);
MODULE_PARM_DESC(pcie_pset, "PCIe Eq Pset value to use, range is 0-10");
-static uint pcie_ctle = 1; /* discrete on, integrated off */
+static uint pcie_ctle = 3; /* discrete on, integrated on */
module_param(pcie_ctle, uint, S_IRUGO);
MODULE_PARM_DESC(pcie_ctle, "PCIe static CTLE mode, bit 0 - discrete on/off, bit 1 - integrated on/off");
@@ -758,6 +723,7 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs,
u32 violation;
u32 i;
u8 c_minus1, c0, c_plus1;
+ int ret;
for (i = 0; i < 11; i++) {
/* set index */
@@ -769,8 +735,14 @@ static int load_eq_table(struct hfi1_devdata *dd, const u8 eq[11][3], u8 fs,
pci_write_config_dword(pdev, PCIE_CFG_REG_PL102,
eq_value(c_minus1, c0, c_plus1));
/* check if these coefficients violate EQ rules */
- pci_read_config_dword(dd->pcidev, PCIE_CFG_REG_PL105,
- &violation);
+ ret = pci_read_config_dword(dd->pcidev,
+ PCIE_CFG_REG_PL105, &violation);
+ if (ret) {
+ dd_dev_err(dd, "Unable to read from PCI config\n");
+ hit_error = 1;
+ break;
+ }
+
if (violation
& PCIE_CFG_REG_PL105_GEN3_EQ_VIOLATE_COEF_RULES_SMASK){
if (hit_error == 0) {
@@ -843,16 +815,11 @@ static int trigger_sbr(struct hfi1_devdata *dd)
}
/*
- * A secondary bus reset (SBR) issues a hot reset to our device.
- * The following routine does a 1s wait after the reset is dropped
- * per PCI Trhfa (recovery time). PCIe 3.0 section 6.6.1 -
- * Conventional Reset, paragraph 3, line 35 also says that a 1s
- * delay after a reset is required. Per spec requirements,
- * the link is either working or not after that point.
+ * This is an end around to do an SBR during probe time. A new API needs
+ * to be implemented to have cleaner interface but this fixes the
+ * current brokenness
*/
- pci_reset_bridge_secondary_bus(dev->bus->self);
-
- return 0;
+ return pci_bridge_secondary_bus_reset(dev->bus->self);
}
/*
@@ -976,6 +943,7 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
int do_retry, retry_count = 0;
int intnum = 0;
uint default_pset;
+ uint pset = pcie_pset;
u16 target_vector, target_speed;
u16 lnkctl2, vendor;
u8 div;
@@ -983,19 +951,20 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
const u8 (*ctle_tunings)[4];
uint static_ctle_mode;
int return_error = 0;
+ u32 target_width;
/* PCIe Gen3 is for the ASIC only */
if (dd->icode != ICODE_RTL_SILICON)
return 0;
if (pcie_target == 1) { /* target Gen1 */
- target_vector = GEN1_SPEED_VECTOR;
+ target_vector = PCI_EXP_LNKCTL2_TLS_2_5GT;
target_speed = 2500;
} else if (pcie_target == 2) { /* target Gen2 */
- target_vector = GEN2_SPEED_VECTOR;
+ target_vector = PCI_EXP_LNKCTL2_TLS_5_0GT;
target_speed = 5000;
} else if (pcie_target == 3) { /* target Gen3 */
- target_vector = GEN3_SPEED_VECTOR;
+ target_vector = PCI_EXP_LNKCTL2_TLS_8_0GT;
target_speed = 8000;
} else {
/* off or invalid target - skip */
@@ -1022,6 +991,9 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
return 0;
}
+ /* Previous Gen1/Gen2 bus width */
+ target_width = dd->lbus_width;
+
/*
* Do the Gen3 transition. Steps are those of the PCIe Gen3
* recipe.
@@ -1143,16 +1115,16 @@ retry:
*
* Set Gen3EqPsetReqVec, leave other fields 0.
*/
- if (pcie_pset == UNSET_PSET)
- pcie_pset = default_pset;
- if (pcie_pset > 10) { /* valid range is 0-10, inclusive */
+ if (pset == UNSET_PSET)
+ pset = default_pset;
+ if (pset > 10) { /* valid range is 0-10, inclusive */
dd_dev_err(dd, "%s: Invalid Eq Pset %u, setting to %d\n",
- __func__, pcie_pset, default_pset);
- pcie_pset = default_pset;
+ __func__, pset, default_pset);
+ pset = default_pset;
}
- dd_dev_info(dd, "%s: using EQ Pset %u\n", __func__, pcie_pset);
+ dd_dev_info(dd, "%s: using EQ Pset %u\n", __func__, pset);
pci_write_config_dword(dd->pcidev, PCIE_CFG_REG_PL106,
- ((1 << pcie_pset) <<
+ ((1 << pset) <<
PCIE_CFG_REG_PL106_GEN3_EQ_PSET_REQ_VEC_SHIFT) |
PCIE_CFG_REG_PL106_GEN3_EQ_EVAL2MS_DISABLE_SMASK |
PCIE_CFG_REG_PL106_GEN3_EQ_PHASE23_EXIT_MODE_SMASK);
@@ -1182,10 +1154,10 @@ retry:
/* apply static CTLE tunings */
u8 pcie_dc, pcie_lf, pcie_hf, pcie_bw;
- pcie_dc = ctle_tunings[pcie_pset][0];
- pcie_lf = ctle_tunings[pcie_pset][1];
- pcie_hf = ctle_tunings[pcie_pset][2];
- pcie_bw = ctle_tunings[pcie_pset][3];
+ pcie_dc = ctle_tunings[pset][0];
+ pcie_lf = ctle_tunings[pset][1];
+ pcie_hf = ctle_tunings[pset][2];
+ pcie_bw = ctle_tunings[pset][3];
write_gasket_interrupt(dd, intnum++, 0x0026, 0x0200 | pcie_dc);
write_gasket_interrupt(dd, intnum++, 0x0026, 0x0100 | pcie_lf);
write_gasket_interrupt(dd, intnum++, 0x0026, 0x0000 | pcie_hf);
@@ -1224,29 +1196,38 @@ retry:
* that it is Gen3 capable earlier.
*/
dd_dev_info(dd, "%s: setting parent target link speed\n", __func__);
- pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2);
+ ret = pcie_capability_read_word(parent, PCI_EXP_LNKCTL2, &lnkctl2);
+ if (ret) {
+ dd_dev_err(dd, "Unable to read from PCI config\n");
+ return_error = 1;
+ goto done;
+ }
+
dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
(u32)lnkctl2);
/* only write to parent if target is not as high as ours */
- if ((lnkctl2 & LNKCTL2_TARGET_LINK_SPEED_MASK) < target_vector) {
- lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
- lnkctl2 |= target_vector;
- dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
- (u32)lnkctl2);
- pcie_capability_write_word(parent, PCI_EXP_LNKCTL2, lnkctl2);
+ if ((lnkctl2 & PCI_EXP_LNKCTL2_TLS) < target_vector) {
+ ret = pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_TLS,
+ target_vector);
+ if (ret) {
+ dd_dev_err(dd, "Unable to change parent PCI target speed\n");
+ return_error = 1;
+ goto done;
+ }
} else {
dd_dev_info(dd, "%s: ..target speed is OK\n", __func__);
}
dd_dev_info(dd, "%s: setting target link speed\n", __func__);
- pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2);
- dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
- (u32)lnkctl2);
- lnkctl2 &= ~LNKCTL2_TARGET_LINK_SPEED_MASK;
- lnkctl2 |= target_vector;
- dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
- (u32)lnkctl2);
- pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2);
+ ret = pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_TLS,
+ target_vector);
+ if (ret) {
+ dd_dev_err(dd, "Unable to change device PCI target speed\n");
+ return_error = 1;
+ goto done;
+ }
/* step 5h: arm gasket logic */
/* hold DC in reset across the SBR */
@@ -1296,7 +1277,14 @@ retry:
/* restore PCI space registers we know were reset */
dd_dev_info(dd, "%s: calling restore_pci_variables\n", __func__);
- restore_pci_variables(dd);
+ ret = restore_pci_variables(dd);
+ if (ret) {
+ dd_dev_err(dd, "%s: Could not restore PCI variables\n",
+ __func__);
+ return_error = 1;
+ goto done;
+ }
+
/* restore firmware control */
write_csr(dd, MISC_CFG_FW_CTRL, fw_ctrl);
@@ -1326,7 +1314,13 @@ retry:
setextled(dd, 0);
/* check for any per-lane errors */
- pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, &reg32);
+ ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE2, &reg32);
+ if (ret) {
+ dd_dev_err(dd, "Unable to read from PCI config\n");
+ return_error = 1;
+ goto done;
+ }
+
dd_dev_info(dd, "%s: per-lane errors: 0x%x\n", __func__, reg32);
/* extract status, look for our HFI */
@@ -1354,11 +1348,12 @@ retry:
dd_dev_info(dd, "%s: new speed and width: %s\n", __func__,
dd->lbus_info);
- if (dd->lbus_speed != target_speed) { /* not target */
+ if (dd->lbus_speed != target_speed ||
+ dd->lbus_width < target_width) { /* not target */
/* maybe retry */
do_retry = retry_count < pcie_retry;
- dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n",
- pcie_target, do_retry ? ", retrying" : "");
+ dd_dev_err(dd, "PCIe link speed or width did not match target%s\n",
+ do_retry ? ", retrying" : "");
retry_count++;
if (do_retry) {
msleep(100); /* allow time to settle */
diff --git a/drivers/infiniband/hw/hfi1/pin_system.c b/drivers/infiniband/hw/hfi1/pin_system.c
new file mode 100644
index 000000000000..cce56134519b
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/pin_system.c
@@ -0,0 +1,474 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+/*
+ * Copyright(c) 2023 - Cornelis Networks, Inc.
+ */
+
+#include <linux/types.h>
+
+#include "hfi.h"
+#include "common.h"
+#include "device.h"
+#include "pinning.h"
+#include "mmu_rb.h"
+#include "user_sdma.h"
+#include "trace.h"
+
+struct sdma_mmu_node {
+ struct mmu_rb_node rb;
+ struct hfi1_user_sdma_pkt_q *pq;
+ struct page **pages;
+ unsigned int npages;
+};
+
+static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+ unsigned long len);
+static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, void *arg2,
+ bool *stop);
+static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode);
+
+static const struct mmu_rb_ops sdma_rb_ops = {
+ .filter = sdma_rb_filter,
+ .evict = sdma_rb_evict,
+ .remove = sdma_rb_remove,
+};
+
+int hfi1_init_system_pinning(struct hfi1_user_sdma_pkt_q *pq)
+{
+ struct hfi1_devdata *dd = pq->dd;
+ int ret;
+
+ ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq,
+ &pq->handler);
+ if (ret)
+ dd_dev_err(dd,
+ "[%u:%u] Failed to register system memory DMA support with MMU: %d\n",
+ pq->ctxt, pq->subctxt, ret);
+ return ret;
+}
+
+void hfi1_free_system_pinning(struct hfi1_user_sdma_pkt_q *pq)
+{
+ if (pq->handler)
+ hfi1_mmu_rb_unregister(pq->handler);
+}
+
+static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
+{
+ struct evict_data evict_data;
+
+ evict_data.cleared = 0;
+ evict_data.target = npages;
+ hfi1_mmu_rb_evict(pq->handler, &evict_data);
+ return evict_data.cleared;
+}
+
+static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
+ unsigned int start, unsigned int npages)
+{
+ hfi1_release_user_pages(mm, pages + start, npages, false);
+ kfree(pages);
+}
+
+static inline struct mm_struct *mm_from_sdma_node(struct sdma_mmu_node *node)
+{
+ return node->rb.handler->mn.mm;
+}
+
+static void free_system_node(struct sdma_mmu_node *node)
+{
+ if (node->npages) {
+ unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0,
+ node->npages);
+ atomic_sub(node->npages, &node->pq->n_locked);
+ }
+ kfree(node);
+}
+
+/*
+ * kref_get()'s an additional kref on the returned rb_node to prevent rb_node
+ * from being released until after rb_node is assigned to an SDMA descriptor
+ * (struct sdma_desc) under add_system_iovec_to_sdma_packet(), even if the
+ * virtual address range for rb_node is invalidated between now and then.
+ */
+static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler,
+ unsigned long start,
+ unsigned long end)
+{
+ struct mmu_rb_node *rb_node;
+ unsigned long flags;
+
+ spin_lock_irqsave(&handler->lock, flags);
+ rb_node = hfi1_mmu_rb_get_first(handler, start, (end - start));
+ if (!rb_node) {
+ spin_unlock_irqrestore(&handler->lock, flags);
+ return NULL;
+ }
+
+ /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */
+ kref_get(&rb_node->refcount);
+ spin_unlock_irqrestore(&handler->lock, flags);
+
+ return container_of(rb_node, struct sdma_mmu_node, rb);
+}
+
+static int pin_system_pages(struct user_sdma_request *req,
+ uintptr_t start_address, size_t length,
+ struct sdma_mmu_node *node, int npages)
+{
+ struct hfi1_user_sdma_pkt_q *pq = req->pq;
+ int pinned, cleared;
+ struct page **pages;
+
+ pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+retry:
+ if (!hfi1_can_pin_pages(pq->dd, current->mm, atomic_read(&pq->n_locked),
+ npages)) {
+ SDMA_DBG(req, "Evicting: nlocked %u npages %u",
+ atomic_read(&pq->n_locked), npages);
+ cleared = sdma_cache_evict(pq, npages);
+ if (cleared >= npages)
+ goto retry;
+ }
+
+ SDMA_DBG(req, "Acquire user pages start_address %lx node->npages %u npages %u",
+ start_address, node->npages, npages);
+ pinned = hfi1_acquire_user_pages(current->mm, start_address, npages, 0,
+ pages);
+
+ if (pinned < 0) {
+ kfree(pages);
+ SDMA_DBG(req, "pinned %d", pinned);
+ return pinned;
+ }
+ if (pinned != npages) {
+ unpin_vector_pages(current->mm, pages, node->npages, pinned);
+ SDMA_DBG(req, "npages %u pinned %d", npages, pinned);
+ return -EFAULT;
+ }
+ node->rb.addr = start_address;
+ node->rb.len = length;
+ node->pages = pages;
+ node->npages = npages;
+ atomic_add(pinned, &pq->n_locked);
+ SDMA_DBG(req, "done. pinned %d", pinned);
+ return 0;
+}
+
+/*
+ * kref refcount on *node_p will be 2 on successful addition: one kref from
+ * kref_init() for mmu_rb_handler and one kref to prevent *node_p from being
+ * released until after *node_p is assigned to an SDMA descriptor (struct
+ * sdma_desc) under add_system_iovec_to_sdma_packet(), even if the virtual
+ * address range for *node_p is invalidated between now and then.
+ */
+static int add_system_pinning(struct user_sdma_request *req,
+ struct sdma_mmu_node **node_p,
+ unsigned long start, unsigned long len)
+
+{
+ struct hfi1_user_sdma_pkt_q *pq = req->pq;
+ struct sdma_mmu_node *node;
+ int ret;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ /* First kref "moves" to mmu_rb_handler */
+ kref_init(&node->rb.refcount);
+
+ /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */
+ kref_get(&node->rb.refcount);
+
+ node->pq = pq;
+ ret = pin_system_pages(req, start, len, node, PFN_DOWN(len));
+ if (ret == 0) {
+ ret = hfi1_mmu_rb_insert(pq->handler, &node->rb);
+ if (ret)
+ free_system_node(node);
+ else
+ *node_p = node;
+
+ return ret;
+ }
+
+ kfree(node);
+ return ret;
+}
+
+static int get_system_cache_entry(struct user_sdma_request *req,
+ struct sdma_mmu_node **node_p,
+ size_t req_start, size_t req_len)
+{
+ struct hfi1_user_sdma_pkt_q *pq = req->pq;
+ u64 start = ALIGN_DOWN(req_start, PAGE_SIZE);
+ u64 end = PFN_ALIGN(req_start + req_len);
+ int ret;
+
+ if ((end - start) == 0) {
+ SDMA_DBG(req,
+ "Request for empty cache entry req_start %lx req_len %lx start %llx end %llx",
+ req_start, req_len, start, end);
+ return -EINVAL;
+ }
+
+ SDMA_DBG(req, "req_start %lx req_len %lu", req_start, req_len);
+
+ while (1) {
+ struct sdma_mmu_node *node =
+ find_system_node(pq->handler, start, end);
+ u64 prepend_len = 0;
+
+ SDMA_DBG(req, "node %p start %llx end %llu", node, start, end);
+ if (!node) {
+ ret = add_system_pinning(req, node_p, start,
+ end - start);
+ if (ret == -EEXIST) {
+ /*
+ * Another execution context has inserted a
+ * conficting entry first.
+ */
+ continue;
+ }
+ return ret;
+ }
+
+ if (node->rb.addr <= start) {
+ /*
+ * This entry covers at least part of the region. If it doesn't extend
+ * to the end, then this will be called again for the next segment.
+ */
+ *node_p = node;
+ return 0;
+ }
+
+ SDMA_DBG(req, "prepend: node->rb.addr %lx, node->rb.refcount %d",
+ node->rb.addr, kref_read(&node->rb.refcount));
+ prepend_len = node->rb.addr - start;
+
+ /*
+ * This node will not be returned, instead a new node
+ * will be. So release the reference.
+ */
+ kref_put(&node->rb.refcount, hfi1_mmu_rb_release);
+
+ /* Prepend a node to cover the beginning of the allocation */
+ ret = add_system_pinning(req, node_p, start, prepend_len);
+ if (ret == -EEXIST) {
+ /* Another execution context has inserted a conficting entry first. */
+ continue;
+ }
+ return ret;
+ }
+}
+
+static void sdma_mmu_rb_node_get(void *ctx)
+{
+ struct mmu_rb_node *node = ctx;
+
+ kref_get(&node->refcount);
+}
+
+static void sdma_mmu_rb_node_put(void *ctx)
+{
+ struct sdma_mmu_node *node = ctx;
+
+ kref_put(&node->rb.refcount, hfi1_mmu_rb_release);
+}
+
+static int add_mapping_to_sdma_packet(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx,
+ struct sdma_mmu_node *cache_entry,
+ size_t start,
+ size_t from_this_cache_entry)
+{
+ struct hfi1_user_sdma_pkt_q *pq = req->pq;
+ unsigned int page_offset;
+ unsigned int from_this_page;
+ size_t page_index;
+ void *ctx;
+ int ret;
+
+ /*
+ * Because the cache may be more fragmented than the memory that is being accessed,
+ * it's not strictly necessary to have a descriptor per cache entry.
+ */
+
+ while (from_this_cache_entry) {
+ page_index = PFN_DOWN(start - cache_entry->rb.addr);
+
+ if (page_index >= cache_entry->npages) {
+ SDMA_DBG(req,
+ "Request for page_index %zu >= cache_entry->npages %u",
+ page_index, cache_entry->npages);
+ return -EINVAL;
+ }
+
+ page_offset = start - ALIGN_DOWN(start, PAGE_SIZE);
+ from_this_page = PAGE_SIZE - page_offset;
+
+ if (from_this_page < from_this_cache_entry) {
+ ctx = NULL;
+ } else {
+ /*
+ * In the case they are equal the next line has no practical effect,
+ * but it's better to do a register to register copy than a conditional
+ * branch.
+ */
+ from_this_page = from_this_cache_entry;
+ ctx = cache_entry;
+ }
+
+ ret = sdma_txadd_page(pq->dd, &tx->txreq,
+ cache_entry->pages[page_index],
+ page_offset, from_this_page,
+ ctx,
+ sdma_mmu_rb_node_get,
+ sdma_mmu_rb_node_put);
+ if (ret) {
+ /*
+ * When there's a failure, the entire request is freed by
+ * user_sdma_send_pkts().
+ */
+ SDMA_DBG(req,
+ "sdma_txadd_page failed %d page_index %lu page_offset %u from_this_page %u",
+ ret, page_index, page_offset, from_this_page);
+ return ret;
+ }
+ start += from_this_page;
+ from_this_cache_entry -= from_this_page;
+ }
+ return 0;
+}
+
+static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx,
+ struct user_sdma_iovec *iovec,
+ size_t from_this_iovec)
+{
+ while (from_this_iovec > 0) {
+ struct sdma_mmu_node *cache_entry;
+ size_t from_this_cache_entry;
+ size_t start;
+ int ret;
+
+ start = (uintptr_t)iovec->iov.iov_base + iovec->offset;
+ ret = get_system_cache_entry(req, &cache_entry, start,
+ from_this_iovec);
+ if (ret) {
+ SDMA_DBG(req, "pin system segment failed %d", ret);
+ return ret;
+ }
+
+ from_this_cache_entry = cache_entry->rb.len - (start - cache_entry->rb.addr);
+ if (from_this_cache_entry > from_this_iovec)
+ from_this_cache_entry = from_this_iovec;
+
+ ret = add_mapping_to_sdma_packet(req, tx, cache_entry, start,
+ from_this_cache_entry);
+
+ /*
+ * Done adding cache_entry to zero or more sdma_desc. Can
+ * kref_put() the "safety" kref taken under
+ * get_system_cache_entry().
+ */
+ kref_put(&cache_entry->rb.refcount, hfi1_mmu_rb_release);
+
+ if (ret) {
+ SDMA_DBG(req, "add system segment failed %d", ret);
+ return ret;
+ }
+
+ iovec->offset += from_this_cache_entry;
+ from_this_iovec -= from_this_cache_entry;
+ }
+
+ return 0;
+}
+
+/*
+ * Add up to pkt_data_remaining bytes to the txreq, starting at the current
+ * offset in the given iovec entry and continuing until all data has been added
+ * to the iovec or the iovec entry type changes.
+ *
+ * On success, prior to returning, adjust pkt_data_remaining, req->iov_idx, and
+ * the offset value in req->iov[req->iov_idx] to reflect the data that has been
+ * consumed.
+ */
+int hfi1_add_pages_to_sdma_packet(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx,
+ struct user_sdma_iovec *iovec,
+ u32 *pkt_data_remaining)
+{
+ size_t remaining_to_add = *pkt_data_remaining;
+ /*
+ * Walk through iovec entries, ensure the associated pages
+ * are pinned and mapped, add data to the packet until no more
+ * data remains to be added or the iovec entry type changes.
+ */
+ while (remaining_to_add > 0) {
+ struct user_sdma_iovec *cur_iovec;
+ size_t from_this_iovec;
+ int ret;
+
+ cur_iovec = iovec;
+ from_this_iovec = iovec->iov.iov_len - iovec->offset;
+
+ if (from_this_iovec > remaining_to_add) {
+ from_this_iovec = remaining_to_add;
+ } else {
+ /* The current iovec entry will be consumed by this pass. */
+ req->iov_idx++;
+ iovec++;
+ }
+
+ ret = add_system_iovec_to_sdma_packet(req, tx, cur_iovec,
+ from_this_iovec);
+ if (ret)
+ return ret;
+
+ remaining_to_add -= from_this_iovec;
+ }
+ *pkt_data_remaining = remaining_to_add;
+
+ return 0;
+}
+
+static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+ unsigned long len)
+{
+ return (bool)(node->addr == addr);
+}
+
+/*
+ * Return 1 to remove the node from the rb tree and call the remove op.
+ *
+ * Called with the rb tree lock held.
+ */
+static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
+ void *evict_arg, bool *stop)
+{
+ struct sdma_mmu_node *node =
+ container_of(mnode, struct sdma_mmu_node, rb);
+ struct evict_data *evict_data = evict_arg;
+
+ /* this node will be evicted, add its pages to our count */
+ evict_data->cleared += node->npages;
+
+ /* have enough pages been cleared? */
+ if (evict_data->cleared >= evict_data->target)
+ *stop = true;
+
+ return 1; /* remove this node */
+}
+
+static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode)
+{
+ struct sdma_mmu_node *node =
+ container_of(mnode, struct sdma_mmu_node, rb);
+
+ free_system_node(node);
+}
diff --git a/drivers/infiniband/hw/hfi1/pinning.h b/drivers/infiniband/hw/hfi1/pinning.h
new file mode 100644
index 000000000000..a814a3aa9654
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/pinning.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/*
+ * Copyright(c) 2023 Cornelis Networks, Inc.
+ */
+#ifndef _HFI1_PINNING_H
+#define _HFI1_PINNING_H
+
+struct hfi1_user_sdma_pkt_q;
+struct user_sdma_request;
+struct user_sdma_txreq;
+struct user_sdma_iovec;
+
+int hfi1_init_system_pinning(struct hfi1_user_sdma_pkt_q *pq);
+void hfi1_free_system_pinning(struct hfi1_user_sdma_pkt_q *pq);
+int hfi1_add_pages_to_sdma_packet(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx,
+ struct user_sdma_iovec *iovec,
+ u32 *pkt_data_remaining);
+
+#endif /* _HFI1_PINNING_H */
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 615be68e40b3..764286da2ce8 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015-2018 Intel Corporation.
*/
#include <linux/delay.h>
@@ -50,8 +8,6 @@
#include "qp.h"
#include "trace.h"
-#define SC_CTXT_PACKET_EGRESS_TIMEOUT 350 /* in chip cycles */
-
#define SC(name) SEND_CTXT_##name
/*
* Send Context functions
@@ -73,14 +29,6 @@ void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl)
}
}
-/* defined in header release 48 and higher */
-#ifndef SEND_CTRL_UNSUPPORTED_VL_SHIFT
-#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
-#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xffull
-#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
- << SEND_CTRL_UNSUPPORTED_VL_SHIFT)
-#endif
-
/* global control of PIO send */
void pio_send_control(struct hfi1_devdata *dd, int op)
{
@@ -88,6 +36,7 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
unsigned long flags;
int write = 1; /* write sendctrl back */
int flush = 0; /* re-read sendctrl to make sure it is flushed */
+ int i;
spin_lock_irqsave(&dd->sendctrl_lock, flags);
@@ -95,11 +44,15 @@ void pio_send_control(struct hfi1_devdata *dd, int op)
switch (op) {
case PSC_GLOBAL_ENABLE:
reg |= SEND_CTRL_SEND_ENABLE_SMASK;
- /* Fall through */
+ fallthrough;
case PSC_DATA_VL_ENABLE:
+ mask = 0;
+ for (i = 0; i < ARRAY_SIZE(dd->vld); i++)
+ if (!dd->vld[i].mtu)
+ mask |= BIT_ULL(i);
/* Disallow sending on VLs not enabled */
- mask = (((~0ull) << num_vls) & SEND_CTRL_UNSUPPORTED_VL_MASK) <<
- SEND_CTRL_UNSUPPORTED_VL_SHIFT;
+ mask = (mask & SEND_CTRL_UNSUPPORTED_VL_MASK) <<
+ SEND_CTRL_UNSUPPORTED_VL_SHIFT;
reg = (reg & ~SEND_CTRL_UNSUPPORTED_VL_SMASK) | mask;
break;
case PSC_GLOBAL_DISABLE:
@@ -228,7 +181,7 @@ static const char *sc_type_name(int index)
int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
{
struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } };
- int total_blocks = (dd->chip_pio_mem_size / PIO_BLOCK_SIZE) - 1;
+ int total_blocks = (chip_pio_mem_size(dd) / PIO_BLOCK_SIZE) - 1;
int total_contexts = 0;
int fixed_blocks;
int pool_blocks;
@@ -345,8 +298,8 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
sc_type_name(i), count);
return -EINVAL;
}
- if (total_contexts + count > dd->chip_send_contexts)
- count = dd->chip_send_contexts - total_contexts;
+ if (total_contexts + count > chip_send_contexts(dd))
+ count = chip_send_contexts(dd) - total_contexts;
total_contexts += count;
@@ -455,8 +408,8 @@ int init_send_contexts(struct hfi1_devdata *dd)
dd->hw_to_sw = kmalloc_array(TXE_NUM_CONTEXTS, sizeof(u8),
GFP_KERNEL);
dd->send_contexts = kcalloc(dd->num_send_contexts,
- sizeof(struct send_context_info),
- GFP_KERNEL);
+ sizeof(struct send_context_info),
+ GFP_KERNEL);
if (!dd->send_contexts || !dd->hw_to_sw) {
kfree(dd->hw_to_sw);
kfree(dd->send_contexts);
@@ -509,7 +462,7 @@ static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index,
if (sci->type == type && sci->allocated == 0) {
sci->allocated = 1;
/* use a 1:1 mapping, but make them non-equal */
- context = dd->chip_send_contexts - index - 1;
+ context = chip_send_contexts(dd) - index - 1;
dd->hw_to_sw[context] = index;
*sw_index = index;
*hw_context = context;
@@ -747,6 +700,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
spin_lock_init(&sc->alloc_lock);
spin_lock_init(&sc->release_lock);
spin_lock_init(&sc->credit_ctrl_lock);
+ seqlock_init(&sc->waitlock);
INIT_LIST_HEAD(&sc->piowait);
INIT_WORK(&sc->halt_work, sc_halted);
init_waitqueue_head(&sc->halt_wait);
@@ -856,8 +810,9 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
* so head == tail can mean empty.
*/
sc->sr_size = sci->credits + 1;
- sc->sr = kzalloc_node(sizeof(union pio_shadow_ring) *
- sc->sr_size, GFP_KERNEL, numa);
+ sc->sr = kcalloc_node(sc->sr_size,
+ sizeof(union pio_shadow_ring),
+ GFP_KERNEL, numa);
if (!sc->sr) {
sc_free(sc);
return NULL;
@@ -865,7 +820,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
}
hfi1_cdbg(PIO,
- "Send context %u(%u) %s group %u credits %u credit_ctrl 0x%llx threshold %u\n",
+ "Send context %u(%u) %s group %u credits %u credit_ctrl 0x%llx threshold %u",
sw_index,
hw_context,
sc_type_name(type),
@@ -922,20 +877,19 @@ void sc_free(struct send_context *sc)
void sc_disable(struct send_context *sc)
{
u64 reg;
- unsigned long flags;
struct pio_buf *pbuf;
+ LIST_HEAD(wake_list);
if (!sc)
return;
/* do all steps, even if already disabled */
- spin_lock_irqsave(&sc->alloc_lock, flags);
+ spin_lock_irq(&sc->alloc_lock);
reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL));
reg &= ~SC(CTRL_CTXT_ENABLE_SMASK);
sc->flags &= ~SCF_ENABLED;
sc_wait_for_packet_egress(sc, 1);
write_kctxt_csr(sc->dd, sc->hw_context, SC(CTRL), reg);
- spin_unlock_irqrestore(&sc->alloc_lock, flags);
/*
* Flush any waiters. Once the context is disabled,
@@ -945,7 +899,7 @@ void sc_disable(struct send_context *sc)
* proceed with the flush.
*/
udelay(1);
- spin_lock_irqsave(&sc->release_lock, flags);
+ spin_lock(&sc->release_lock);
if (sc->sr) { /* this context has a shadow ring */
while (sc->sr_tail != sc->sr_head) {
pbuf = &sc->sr[sc->sr_tail].pbuf;
@@ -956,19 +910,62 @@ void sc_disable(struct send_context *sc)
sc->sr_tail = 0;
}
}
- spin_unlock_irqrestore(&sc->release_lock, flags);
+ spin_unlock(&sc->release_lock);
+
+ write_seqlock(&sc->waitlock);
+ list_splice_init(&sc->piowait, &wake_list);
+ write_sequnlock(&sc->waitlock);
+ while (!list_empty(&wake_list)) {
+ struct iowait *wait;
+ struct rvt_qp *qp;
+ struct hfi1_qp_priv *priv;
+
+ wait = list_first_entry(&wake_list, struct iowait, list);
+ qp = iowait_to_qp(wait);
+ priv = qp->priv;
+ list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
+ hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
+ }
+
+ spin_unlock_irq(&sc->alloc_lock);
}
/* return SendEgressCtxtStatus.PacketOccupancy */
-#define packet_occupancy(r) \
- (((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK)\
- >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT)
+static u64 packet_occupancy(u64 reg)
+{
+ return (reg &
+ SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK)
+ >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT;
+}
/* is egress halted on the context? */
-#define egress_halted(r) \
- ((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK)
+static bool egress_halted(u64 reg)
+{
+ return !!(reg & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK);
+}
-/* wait for packet egress, optionally pause for credit return */
+/* is the send context halted? */
+static bool is_sc_halted(struct hfi1_devdata *dd, u32 hw_context)
+{
+ return !!(read_kctxt_csr(dd, hw_context, SC(STATUS)) &
+ SC(STATUS_CTXT_HALTED_SMASK));
+}
+
+/**
+ * sc_wait_for_packet_egress - wait for packet
+ * @sc: valid send context
+ * @pause: wait for credit return
+ *
+ * Wait for packet egress, optionally pause for credit return
+ *
+ * Egress halt and Context halt are not necessarily the same thing, so
+ * check for both.
+ *
+ * NOTE: The context halt bit may not be set immediately. Because of this,
+ * it is necessary to check the SW SFC_HALTED bit (set in the IRQ) and the HW
+ * context bit to determine if the context is halted.
+ */
static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
{
struct hfi1_devdata *dd = sc->dd;
@@ -980,8 +977,9 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
reg_prev = reg;
reg = read_csr(dd, sc->hw_context * 8 +
SEND_EGRESS_CTXT_STATUS);
- /* done if egress is stopped */
- if (egress_halted(reg))
+ /* done if any halt bits, SW or HW are set */
+ if (sc->flags & SCF_HALTED ||
+ is_sc_halted(dd, sc->hw_context) || egress_halted(reg))
break;
reg = packet_occupancy(reg);
if (reg == 0)
@@ -995,7 +993,7 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
"%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
__func__, sc->sw_index,
sc->hw_context, (u32)reg);
- queue_work(dd->pport->hfi1_wq,
+ queue_work(dd->pport->link_wq,
&dd->pport->link_bounce_work);
break;
}
@@ -1153,11 +1151,39 @@ void pio_kernel_unfreeze(struct hfi1_devdata *dd)
sc = dd->send_contexts[i].sc;
if (!sc || !(sc->flags & SCF_FROZEN) || sc->type == SC_USER)
continue;
+ if (sc->flags & SCF_LINK_DOWN)
+ continue;
sc_enable(sc); /* will clear the sc frozen flag */
}
}
+/**
+ * pio_kernel_linkup() - Re-enable send contexts after linkup event
+ * @dd: valid devive data
+ *
+ * When the link goes down, the freeze path is taken. However, a link down
+ * event is different from a freeze because if the send context is re-enabled
+ * whowever is sending data will start sending data again, which will hang
+ * any QP that is sending data.
+ *
+ * The freeze path now looks at the type of event that occurs and takes this
+ * path for link down event.
+ */
+void pio_kernel_linkup(struct hfi1_devdata *dd)
+{
+ struct send_context *sc;
+ int i;
+
+ for (i = 0; i < dd->num_send_contexts; i++) {
+ sc = dd->send_contexts[i].sc;
+ if (!sc || !(sc->flags & SCF_LINK_DOWN) || sc->type == SC_USER)
+ continue;
+
+ sc_enable(sc); /* will clear the sc link down flag */
+ }
+}
+
/*
* Wait for the SendPioInitCtxt.PioInitInProgress bit to clear.
* Returns:
@@ -1335,16 +1361,6 @@ void sc_flush(struct send_context *sc)
sc_wait_for_packet_egress(sc, 1);
}
-/* drop all packets on the context, no waiting until they are sent */
-void sc_drop(struct send_context *sc)
-{
- if (!sc)
- return;
-
- dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n",
- __func__, sc->sw_index, sc->hw_context);
-}
-
/*
* Start the software reaction to a context halt or SPC freeze:
* - mark the context as halted or frozen
@@ -1357,11 +1373,10 @@ void sc_stop(struct send_context *sc, int flag)
{
unsigned long flags;
- /* mark the context */
- sc->flags |= flag;
-
/* stop buffer allocations */
spin_lock_irqsave(&sc->alloc_lock, flags);
+ /* mark the context */
+ sc->flags |= flag;
sc->flags &= ~SCF_ENABLED;
spin_unlock_irqrestore(&sc->alloc_lock, flags);
wake_up(&sc->halt_wait);
@@ -1378,7 +1393,8 @@ void sc_stop(struct send_context *sc, int flag)
* @cb: optional callback to call when the buffer is finished sending
* @arg: argument for cb
*
- * Return a pointer to a PIO buffer if successful, NULL if not enough room.
+ * Return a pointer to a PIO buffer, NULL if not enough room, -ECOMM
+ * when link is down.
*/
struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
pio_release_cb cb, void *arg)
@@ -1394,7 +1410,7 @@ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
spin_lock_irqsave(&sc->alloc_lock, flags);
if (!(sc->flags & SCF_ENABLED)) {
spin_unlock_irqrestore(&sc->alloc_lock, flags);
- goto done;
+ return ERR_PTR(-ECOMM);
}
retry:
@@ -1406,14 +1422,14 @@ retry:
goto done;
}
/* copy from receiver cache line and recalculate */
- sc->alloc_free = ACCESS_ONCE(sc->free);
+ sc->alloc_free = READ_ONCE(sc->free);
avail =
(unsigned long)sc->credits -
(sc->fill - sc->alloc_free);
if (blocks > avail) {
/* still no room, actively update */
sc_release_update(sc);
- sc->alloc_free = ACCESS_ONCE(sc->free);
+ sc->alloc_free = READ_ONCE(sc->free);
trycount++;
goto retry;
}
@@ -1528,10 +1544,8 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint)
else
sc_del_credit_return_intr(sc);
trace_hfi1_wantpiointr(sc, needint, sc->credit_ctrl);
- if (needint) {
- mmiowb();
+ if (needint)
sc_return_credits(sc);
- }
}
/**
@@ -1545,13 +1559,12 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint)
static void sc_piobufavail(struct send_context *sc)
{
struct hfi1_devdata *dd = sc->dd;
- struct hfi1_ibdev *dev = &dd->verbs_dev;
struct list_head *list;
struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE];
struct rvt_qp *qp;
struct hfi1_qp_priv *priv;
unsigned long flags;
- unsigned i, n = 0;
+ uint i, n = 0, top_idx = 0;
if (dd->send_contexts[sc->sw_index].type != SC_KERNEL &&
dd->send_contexts[sc->sw_index].type != SC_VL15)
@@ -1563,17 +1576,25 @@ static void sc_piobufavail(struct send_context *sc)
* could end up with QPs on the wait list with the interrupt
* disabled.
*/
- write_seqlock_irqsave(&dev->iowait_lock, flags);
+ write_seqlock_irqsave(&sc->waitlock, flags);
while (!list_empty(list)) {
struct iowait *wait;
if (n == ARRAY_SIZE(qps))
break;
wait = list_first_entry(list, struct iowait, list);
+ iowait_get_priority(wait);
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
priv->s_iowait.lock = NULL;
+ if (n) {
+ priv = qps[top_idx]->priv;
+ top_idx = iowait_priority_update_top(wait,
+ &priv->s_iowait,
+ n, top_idx);
+ }
+
/* refcount held until actual wake up */
qps[n++] = qp;
}
@@ -1586,11 +1607,16 @@ static void sc_piobufavail(struct send_context *sc)
if (!list_empty(list))
hfi1_sc_wantpiobuf_intr(sc, 1);
}
- write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+ write_sequnlock_irqrestore(&sc->waitlock, flags);
+ /* Wake up the top-priority one first */
+ if (n)
+ hfi1_qp_wakeup(qps[top_idx],
+ RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
for (i = 0; i < n; i++)
- hfi1_qp_wakeup(qps[i],
- RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
+ if (i != top_idx)
+ hfi1_qp_wakeup(qps[i],
+ RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
}
/* translate a send credit update to a bit code of reasons */
@@ -1643,7 +1669,7 @@ void sc_release_update(struct send_context *sc)
/* call sent buffer callbacks */
code = -1; /* code not yet set */
- head = ACCESS_ONCE(sc->sr_head); /* snapshot the head */
+ head = READ_ONCE(sc->sr_head); /* snapshot the head */
tail = sc->sr_tail;
while (head != tail) {
pbuf = &sc->sr[tail].pbuf;
@@ -1857,9 +1883,7 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0);
}
/* build new map */
- newmap = kzalloc(sizeof(*newmap) +
- roundup_pow_of_two(num_vls) *
- sizeof(struct pio_map_elem *),
+ newmap = kzalloc(struct_size(newmap, map, roundup_pow_of_two(num_vls)),
GFP_KERNEL);
if (!newmap)
goto bail;
@@ -1874,9 +1898,8 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
int sz = roundup_pow_of_two(vl_scontexts[i]);
/* only allocate once */
- newmap->map[i] = kzalloc(sizeof(*newmap->map[i]) +
- sz * sizeof(struct
- send_context *),
+ newmap->map[i] = kzalloc(struct_size(newmap->map[i],
+ ksc, sz),
GFP_KERNEL);
if (!newmap->map[i])
goto bail;
@@ -1951,9 +1974,9 @@ int init_pervl_scs(struct hfi1_devdata *dd)
hfi1_init_ctxt(dd->vld[15].sc);
dd->vld[15].mtu = enum_to_mtu(OPA_MTU_2048);
- dd->kernel_send_context = kzalloc_node(dd->num_send_contexts *
- sizeof(struct send_context *),
- GFP_KERNEL, dd->node);
+ dd->kernel_send_context = kcalloc_node(dd->num_send_contexts,
+ sizeof(struct send_context *),
+ GFP_KERNEL, dd->node);
if (!dd->kernel_send_context)
goto freesc15;
@@ -2043,18 +2066,17 @@ int init_credit_return(struct hfi1_devdata *dd)
int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return);
set_dev_node(&dd->pcidev->dev, i);
- dd->cr_base[i].va = dma_zalloc_coherent(
- &dd->pcidev->dev,
- bytes,
- &dd->cr_base[i].dma,
- GFP_KERNEL);
+ dd->cr_base[i].va = dma_alloc_coherent(&dd->pcidev->dev,
+ bytes,
+ &dd->cr_base[i].dma,
+ GFP_KERNEL);
if (!dd->cr_base[i].va) {
set_dev_node(&dd->pcidev->dev, dd->node);
dd_dev_err(dd,
"Unable to allocate credit return DMA range for NUMA %d\n",
i);
ret = -ENOMEM;
- goto done;
+ goto free_cr_base;
}
}
set_dev_node(&dd->pcidev->dev, dd->node);
@@ -2062,6 +2084,10 @@ int init_credit_return(struct hfi1_devdata *dd)
ret = 0;
done:
return ret;
+
+free_cr_base:
+ free_credit_return(dd);
+ goto done;
}
void free_credit_return(struct hfi1_devdata *dd)
@@ -2082,3 +2108,28 @@ void free_credit_return(struct hfi1_devdata *dd)
kfree(dd->cr_base);
dd->cr_base = NULL;
}
+
+void seqfile_dump_sci(struct seq_file *s, u32 i,
+ struct send_context_info *sci)
+{
+ struct send_context *sc = sci->sc;
+ u64 reg;
+
+ seq_printf(s, "SCI %u: type %u base %u credits %u\n",
+ i, sci->type, sci->base, sci->credits);
+ seq_printf(s, " flags 0x%x sw_inx %u hw_ctxt %u grp %u\n",
+ sc->flags, sc->sw_index, sc->hw_context, sc->group);
+ seq_printf(s, " sr_size %u credits %u sr_head %u sr_tail %u\n",
+ sc->sr_size, sc->credits, sc->sr_head, sc->sr_tail);
+ seq_printf(s, " fill %lu free %lu fill_wrap %u alloc_free %lu\n",
+ sc->fill, sc->free, sc->fill_wrap, sc->alloc_free);
+ seq_printf(s, " credit_intr_count %u credit_ctrl 0x%llx\n",
+ sc->credit_intr_count, sc->credit_ctrl);
+ reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_STATUS));
+ seq_printf(s, " *hw_free %llu CurrentFree %llu LastReturned %llu\n",
+ (le64_to_cpu(*sc->hw_free) & CR_COUNTER_SMASK) >>
+ CR_COUNTER_SHIFT,
+ (reg >> SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT)) &
+ SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK),
+ reg & SC(CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK));
+}
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 867e5ffc3595..ab0f9a3a8d12 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -1,52 +1,10 @@
-#ifndef _PIO_H
-#define _PIO_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015-2017 Intel Corporation.
*/
+#ifndef _PIO_H
+#define _PIO_H
/* send context types */
#define SC_KERNEL 0
#define SC_VL15 1
@@ -127,6 +85,8 @@ struct send_context {
volatile __le64 *hw_free; /* HW free counter */
/* list for PIO waiters */
struct list_head piowait ____cacheline_aligned_in_smp;
+ seqlock_t waitlock;
+
spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp;
u32 credit_intr_count; /* count of credit intr users */
u64 credit_ctrl; /* cache for credit control */
@@ -139,6 +99,7 @@ struct send_context {
#define SCF_IN_FREE 0x02
#define SCF_HALTED 0x04
#define SCF_FROZEN 0x08
+#define SCF_LINK_DOWN 0x10
struct send_context_info {
struct send_context *sc; /* allocated working context */
@@ -195,7 +156,7 @@ struct sc_config_sizes {
* | mask | --/ |--------------------|
* |--------------------------| -/ | * |
* | actual_vls (max 8) | -/ |--------------------|
- * |--------------------------| --/ | ksc[n] -> sc n |
+ * |--------------------------| --/ | ksc[n-1] -> sc n |
* | vls (max 8) | -/ +--------------------+
* |--------------------------| --/
* | map[0] |-/
@@ -208,21 +169,21 @@ struct sc_config_sizes {
* |--------------------------| |--------------------|
* | map[vls - 1] |- | * |
* +--------------------------+ \- |--------------------|
- * \- | ksc[m] -> sc m+n |
+ * \- | ksc[m-1] -> sc m+n |
* \ +--------------------+
* \-
* \
- * \- +--------------------+
- * \- | mask |
- * \ |--------------------|
- * \- | ksc[0] -> sc 1+m+n |
- * \- |--------------------|
- * >| ksc[1] -> sc 2+m+n |
- * |--------------------|
- * | * |
- * |--------------------|
- * | ksc[o] -> sc o+m+n |
- * +--------------------+
+ * \- +----------------------+
+ * \- | mask |
+ * \ |----------------------|
+ * \- | ksc[0] -> sc 1+m+n |
+ * \- |----------------------|
+ * >| ksc[1] -> sc 2+m+n |
+ * |----------------------|
+ * | * |
+ * |----------------------|
+ * | ksc[o-1] -> sc o+m+n |
+ * +----------------------+
*
*/
@@ -240,7 +201,7 @@ struct sc_config_sizes {
*/
struct pio_map_elem {
u32 mask;
- struct send_context *ksc[0];
+ struct send_context *ksc[];
};
/*
@@ -260,7 +221,7 @@ struct pio_vl_map {
u32 mask;
u8 actual_vls;
u8 vls;
- struct pio_map_elem *map[0];
+ struct pio_map_elem *map[];
};
int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls,
@@ -276,7 +237,6 @@ int init_credit_return(struct hfi1_devdata *dd);
void free_credit_return(struct hfi1_devdata *dd);
int init_sc_pools_and_sizes(struct hfi1_devdata *dd);
int init_send_contexts(struct hfi1_devdata *dd);
-int init_credit_return(struct hfi1_devdata *dd);
int init_pervl_scs(struct hfi1_devdata *dd);
struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
uint hdrqentsize, int numa);
@@ -286,12 +246,10 @@ void sc_disable(struct send_context *sc);
int sc_restart(struct send_context *sc);
void sc_return_credits(struct send_context *sc);
void sc_flush(struct send_context *sc);
-void sc_drop(struct send_context *sc);
void sc_stop(struct send_context *sc, int bit);
struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
pio_release_cb cb, void *arg);
void sc_release_update(struct send_context *sc);
-void sc_return_credits(struct send_context *sc);
void sc_group_release_update(struct hfi1_devdata *dd, u32 hw_context);
void sc_add_credit_return_intr(struct send_context *sc);
void sc_del_credit_return_intr(struct send_context *sc);
@@ -306,6 +264,7 @@ void set_pio_integrity(struct send_context *sc);
void pio_reset_all(struct hfi1_devdata *dd);
void pio_freeze(struct hfi1_devdata *dd);
void pio_kernel_unfreeze(struct hfi1_devdata *dd);
+void pio_kernel_linkup(struct hfi1_devdata *dd);
/* global PIO send control operations */
#define PSC_GLOBAL_ENABLE 0
@@ -327,4 +286,7 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes);
void seg_pio_copy_end(struct pio_buf *pbuf);
+void seqfile_dump_sci(struct seq_file *s, u32 i,
+ struct send_context_info *sci);
+
#endif /* _PIO_H */
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 03024cec78dd..80fee812a930 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include "hfi.h"
@@ -55,6 +13,7 @@
/**
* pio_copy - copy data block to MMIO space
+ * @dd: hfi1 dev data
* @pbuf: a number of blocks allocated within a PIO send context
* @pbc: PBC to send
* @from: source, must be 8 byte aligned
@@ -191,30 +150,29 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
switch (n) {
case 7:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 6:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 5:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 4:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 3:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 2:
*dest++ = *src++;
- /* fall through */
+ fallthrough;
case 1:
*dest++ = *src++;
- /* fall through */
}
}
/*
- * Read nbytes from "from" and and place them in the low bytes
+ * Read nbytes from "from" and place them in the low bytes
* of pbuf->carry. Other bytes are left as-is. Any previous
* value in pbuf->carry is lost.
*
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 838fe84e285a..7bd0e9b6cb50 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -1,54 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+#include <linux/firmware.h>
+
#include "hfi.h"
#include "efivar.h"
#include "eprom.h"
+#define DEFAULT_PLATFORM_CONFIG_NAME "hfi1_platform.dat"
+
static int validate_scratch_checksum(struct hfi1_devdata *dd)
{
u64 checksum = 0, temp_scratch = 0;
@@ -58,8 +20,13 @@ static int validate_scratch_checksum(struct hfi1_devdata *dd)
version = (temp_scratch & BITMAP_VERSION_SMASK) >> BITMAP_VERSION_SHIFT;
/* Prevent power on default of all zeroes from passing checksum */
- if (!version)
+ if (!version) {
+ dd_dev_err(dd, "%s: Config bitmap uninitialized\n", __func__);
+ dd_dev_err(dd,
+ "%s: Please update your BIOS to support active channels\n",
+ __func__);
return 0;
+ }
/*
* ASIC scratch 0 only contains the checksum and bitmap version as
@@ -84,6 +51,8 @@ static int validate_scratch_checksum(struct hfi1_devdata *dd)
if (checksum + temp_scratch == 0xFFFF)
return 1;
+
+ dd_dev_err(dd, "%s: Configuration bitmap corrupted\n", __func__);
return 0;
}
@@ -131,25 +100,22 @@ static void save_platform_config_fields(struct hfi1_devdata *dd)
ppd->max_power_class = (temp_scratch & QSFP_MAX_POWER_SMASK) >>
QSFP_MAX_POWER_SHIFT;
+
+ ppd->config_from_scratch = true;
}
void get_platform_config(struct hfi1_devdata *dd)
{
int ret = 0;
- unsigned long size = 0;
u8 *temp_platform_config = NULL;
u32 esize;
+ const struct firmware *platform_config_file = NULL;
if (is_integrated(dd)) {
if (validate_scratch_checksum(dd)) {
save_platform_config_fields(dd);
return;
}
- dd_dev_err(dd, "%s: Config bitmap corrupted/uninitialized\n",
- __func__);
- dd_dev_err(dd,
- "%s: Please update your BIOS to support active channels\n",
- __func__);
} else {
ret = eprom_read_platform_config(dd,
(void **)&temp_platform_config,
@@ -160,36 +126,38 @@ void get_platform_config(struct hfi1_devdata *dd)
dd->platform_config.size = esize;
return;
}
- /* fail, try EFI variable */
-
- ret = read_hfi1_efi_var(dd, "configuration", &size,
- (void **)&temp_platform_config);
- if (!ret) {
- dd->platform_config.data = temp_platform_config;
- dd->platform_config.size = size;
- return;
- }
}
dd_dev_err(dd,
"%s: Failed to get platform config, falling back to sub-optimal default file\n",
__func__);
- /* fall back to request firmware */
- platform_config_load = 1;
-}
-void free_platform_config(struct hfi1_devdata *dd)
-{
- if (!platform_config_load) {
- /*
- * was loaded from EFI or the EPROM, release memory
- * allocated by read_efi_var/eprom_read_platform_config
- */
- kfree(dd->platform_config.data);
+ ret = request_firmware(&platform_config_file,
+ DEFAULT_PLATFORM_CONFIG_NAME,
+ &dd->pcidev->dev);
+ if (ret) {
+ dd_dev_err(dd,
+ "%s: No default platform config file found\n",
+ __func__);
+ return;
}
+
/*
- * else do nothing, dispose_firmware will release
- * struct firmware platform_config on driver exit
+ * Allocate separate memory block to store data and free firmware
+ * structure. This allows free_platform_config to treat EPROM and
+ * fallback configs in the same manner.
*/
+ dd->platform_config.data = kmemdup(platform_config_file->data,
+ platform_config_file->size,
+ GFP_KERNEL);
+ dd->platform_config.size = platform_config_file->size;
+ release_firmware(platform_config_file);
+}
+
+void free_platform_config(struct hfi1_devdata *dd)
+{
+ /* Release memory allocated for eprom or fallback file read. */
+ kfree(dd->platform_config.data);
+ dd->platform_config.data = NULL;
}
void get_port_type(struct hfi1_pportdata *ppd)
@@ -242,7 +210,7 @@ static int qual_power(struct hfi1_pportdata *ppd)
if (ppd->offline_disabled_reason ==
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) {
- dd_dev_info(
+ dd_dev_err(
ppd->dd,
"%s: Port disabled due to system power restrictions\n",
__func__);
@@ -268,7 +236,7 @@ static int qual_bitrate(struct hfi1_pportdata *ppd)
if (ppd->offline_disabled_reason ==
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY)) {
- dd_dev_info(
+ dd_dev_err(
ppd->dd,
"%s: Cable failed bitrate check, disabling port\n",
__func__);
@@ -624,7 +592,7 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id,
u32 config_data, const char *message)
{
u8 i;
- int ret = HCMD_SUCCESS;
+ int ret;
for (i = 0; i < 4; i++) {
ret = load_8051_config(ppd->dd, field_id, i, config_data);
@@ -658,8 +626,8 @@ static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd)
/* active optical cables only */
switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) {
- case 0x0 ... 0x9: /* fallthrough */
- case 0xC: /* fallthrough */
+ case 0x0 ... 0x9: fallthrough;
+ case 0xC: fallthrough;
case 0xE:
/* active AOC */
power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
@@ -709,15 +677,15 @@ static void apply_tunings(
ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
GENERAL_CONFIG, config_data);
if (ret != HCMD_SUCCESS)
- dd_dev_info(ppd->dd,
- "%s: Failed set ext device config params\n",
- __func__);
+ dd_dev_err(ppd->dd,
+ "%s: Failed set ext device config params\n",
+ __func__);
}
if (tx_preset_index == OPA_INVALID_INDEX) {
if (ppd->port_type == PORT_TYPE_QSFP && limiting_active)
- dd_dev_info(ppd->dd, "%s: Invalid Tx preset index\n",
- __func__);
+ dd_dev_err(ppd->dd, "%s: Invalid Tx preset index\n",
+ __func__);
return;
}
@@ -781,7 +749,9 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
* reuse of stale settings established in our previous pass through.
*/
if (ppd->qsfp_info.reset_needed) {
- reset_qsfp(ppd);
+ ret = reset_qsfp(ppd);
+ if (ret)
+ return ret;
refresh_qsfp_cache(ppd, &ppd->qsfp_info);
} else {
ppd->qsfp_info.reset_needed = 1;
@@ -887,8 +857,8 @@ static int tune_qsfp(struct hfi1_pportdata *ppd,
*ptr_tuning_method = OPA_PASSIVE_TUNING;
break;
- case 0x0 ... 0x9: /* fallthrough */
- case 0xC: /* fallthrough */
+ case 0x0 ... 0x9: fallthrough;
+ case 0xC: fallthrough;
case 0xE:
ret = tune_active_qsfp(ppd, ptr_tx_preset, ptr_rx_preset,
ptr_total_atten);
@@ -897,10 +867,10 @@ static int tune_qsfp(struct hfi1_pportdata *ppd,
*ptr_tuning_method = OPA_ACTIVE_TUNING;
break;
- case 0xD: /* fallthrough */
+ case 0xD: fallthrough;
case 0xF:
default:
- dd_dev_info(ppd->dd, "%s: Unknown/unsupported cable\n",
+ dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n",
__func__);
break;
}
@@ -935,6 +905,21 @@ void tune_serdes(struct hfi1_pportdata *ppd)
if (loopback != LOOPBACK_NONE ||
ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
ppd->driver_link_ready = 1;
+
+ if (qsfp_mod_present(ppd)) {
+ ret = acquire_chip_resource(ppd->dd,
+ qsfp_resource(ppd->dd),
+ QSFP_WAIT);
+ if (ret) {
+ dd_dev_err(ppd->dd, "%s: hfi%d: cannot lock i2c chain\n",
+ __func__, (int)ppd->dd->hfi1_id);
+ goto bail;
+ }
+
+ refresh_qsfp_cache(ppd, &ppd->qsfp_info);
+ release_chip_resource(ppd->dd, qsfp_resource(ppd->dd));
+ }
+
return;
}
@@ -942,7 +927,7 @@ void tune_serdes(struct hfi1_pportdata *ppd)
case PORT_TYPE_DISCONNECTED:
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_DISCONNECTED);
- dd_dev_info(dd, "%s: Port disconnected, disabling port\n",
+ dd_dev_warn(dd, "%s: Port disconnected, disabling port\n",
__func__);
goto bail;
case PORT_TYPE_FIXED:
@@ -1027,7 +1012,7 @@ void tune_serdes(struct hfi1_pportdata *ppd)
}
break;
default:
- dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__);
+ dd_dev_warn(ppd->dd, "%s: Unknown port type\n", __func__);
ppd->port_type = PORT_TYPE_UNKNOWN;
tuning_method = OPA_UNKNOWN_TUNING;
total_atten = 0;
diff --git a/drivers/infiniband/hw/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h
index eed0aa9124fa..0631f9bf3a89 100644
--- a/drivers/infiniband/hw/hfi1/platform.h
+++ b/drivers/infiniband/hw/hfi1/platform.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
+
#ifndef __PLATFORM_H
#define __PLATFORM_H
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index d752d6768a49..f3d8c0c193ac 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*/
#include <linux/err.h>
@@ -66,56 +24,14 @@ MODULE_PARM_DESC(qp_table_size, "QP table size");
static void flush_tx_list(struct rvt_qp *qp);
static int iowait_sleep(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *stx,
- unsigned seq);
+ unsigned int seq,
+ bool pkts_sent);
static void iowait_wakeup(struct iowait *wait, int reason);
static void iowait_sdma_drained(struct iowait *wait);
static void qp_pio_drain(struct rvt_qp *qp);
-static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
- struct rvt_qpn_map *map, unsigned off)
-{
- return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
-}
-
-/*
- * Convert the AETH credit code into the number of credits.
- */
-static const u16 credit_table[31] = {
- 0, /* 0 */
- 1, /* 1 */
- 2, /* 2 */
- 3, /* 3 */
- 4, /* 4 */
- 6, /* 5 */
- 8, /* 6 */
- 12, /* 7 */
- 16, /* 8 */
- 24, /* 9 */
- 32, /* A */
- 48, /* B */
- 64, /* C */
- 96, /* D */
- 128, /* E */
- 192, /* F */
- 256, /* 10 */
- 384, /* 11 */
- 512, /* 12 */
- 768, /* 13 */
- 1024, /* 14 */
- 1536, /* 15 */
- 2048, /* 16 */
- 3072, /* 17 */
- 4096, /* 18 */
- 6144, /* 19 */
- 8192, /* 1A */
- 12288, /* 1B */
- 16384, /* 1C */
- 24576, /* 1D */
- 32768 /* 1E */
-};
-
const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
[IB_WR_RDMA_WRITE] = {
.length = sizeof(struct ib_rdma_wr),
@@ -174,17 +90,27 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
.qpt_support = BIT(IB_QPT_RC),
},
+[IB_WR_OPFN] = {
+ .length = sizeof(struct ib_atomic_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_USE_RESERVE,
+},
+
+[IB_WR_TID_RDMA_WRITE] = {
+ .length = sizeof(struct ib_rdma_wr),
+ .qpt_support = BIT(IB_QPT_RC),
+ .flags = RVT_OPERATION_IGN_RNR_CNT,
+},
+
};
-static void flush_tx_list(struct rvt_qp *qp)
+static void flush_list_head(struct list_head *l)
{
- struct hfi1_qp_priv *priv = qp->priv;
-
- while (!list_empty(&priv->s_iowait.tx_head)) {
+ while (!list_empty(l)) {
struct sdma_txreq *tx;
tx = list_first_entry(
- &priv->s_iowait.tx_head,
+ l,
struct sdma_txreq,
list);
list_del_init(&tx->list);
@@ -193,6 +119,14 @@ static void flush_tx_list(struct rvt_qp *qp)
}
}
+static void flush_tx_list(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head);
+ flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head);
+}
+
static void flush_iowait(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
@@ -210,31 +144,17 @@ static void flush_iowait(struct rvt_qp *qp)
write_sequnlock_irqrestore(lock, flags);
}
-static inline int opa_mtu_enum_to_int(int mtu)
-{
- switch (mtu) {
- case OPA_MTU_8192: return 8192;
- case OPA_MTU_10240: return 10240;
- default: return -1;
- }
-}
-
-/**
+/*
* This function is what we would push to the core layer if we wanted to be a
* "first class citizen". Instead we hide this here and rely on Verbs ULPs
* to blindly pass the MTU enum value from the PathRecord to us.
*/
static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu)
{
- int val;
-
/* Constraining 10KB packets to 8KB packets */
if (mtu == (enum ib_mtu)OPA_MTU_10240)
- mtu = OPA_MTU_8192;
- val = opa_mtu_enum_to_int((int)mtu);
- if (val > 0)
- return val;
- return ib_mtu_enum_to_int(mtu);
+ mtu = (enum ib_mtu)OPA_MTU_8192;
+ return opa_mtu_enum_to_int((enum opa_mtu)mtu);
}
int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
@@ -274,6 +194,31 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
return 0;
}
+/*
+ * qp_set_16b - Set the hdr_type based on whether the slid or the
+ * dlid in the connection is extended. Only applicable for RC and UC
+ * QPs. UD QPs determine this on the fly from the ah in the wqe
+ */
+static inline void qp_set_16b(struct rvt_qp *qp)
+{
+ struct hfi1_pportdata *ppd;
+ struct hfi1_ibport *ibp;
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ /* Update ah_attr to account for extended LIDs */
+ hfi1_update_ah_attr(qp->ibqp.device, &qp->remote_ah_attr);
+
+ /* Create 32 bit LIDs */
+ hfi1_make_opa_lid(&qp->remote_ah_attr);
+
+ if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH))
+ return;
+
+ ibp = to_iport(qp->ibqp.device, qp->port_num);
+ ppd = ppd_from_ibp(ibp);
+ priv->hdr_type = hfi1_get_hdr_type(ppd->lid, &qp->remote_ah_attr);
+}
+
void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
@@ -284,121 +229,86 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
+ qp_set_16b(qp);
}
if (attr_mask & IB_QP_PATH_MIG_STATE &&
attr->path_mig_state == IB_MIG_MIGRATED &&
qp->s_mig_state == IB_MIG_ARMED) {
- qp->s_flags |= RVT_S_AHG_CLEAR;
+ qp->s_flags |= HFI1_S_AHG_CLEAR;
priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
+ qp_set_16b(qp);
}
+
+ opfn_qp_init(qp, attr, attr_mask);
}
/**
- * hfi1_check_send_wqe - validate wqe
- * @qp - The qp
- * @wqe - The built wqe
+ * hfi1_setup_wqe - set up the wqe
+ * @qp: The qp
+ * @wqe: The built wqe
+ * @call_send: Determine if the send should be posted or scheduled.
*
- * validate wqe. This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
+ * Perform setup of the wqe. This is called
+ * prior to inserting the wqe into the ring but after
+ * the wqe has been setup by RDMAVT. This function
+ * allows the driver the opportunity to perform
+ * validation and additional setup of the wqe.
*
* Returns 0 on success, -EINVAL on failure
*
*/
-int hfi1_check_send_wqe(struct rvt_qp *qp,
- struct rvt_swqe *wqe)
+int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct rvt_ah *ah;
+ struct hfi1_pportdata *ppd;
+ struct hfi1_devdata *dd;
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
+ hfi1_setup_tid_rdma_wqe(qp, wqe);
+ fallthrough;
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
+ if (wqe->length > qp->pmtu)
+ *call_send = false;
break;
case IB_QPT_SMI:
- ah = ibah_to_rvtah(wqe->ud_wr.ah);
- if (wqe->length > (1 << ah->log_pmtu))
+ /*
+ * SM packets should exclusively use VL15 and their SL is
+ * ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah
+ * is created, SL is 0 in most cases and as a result some
+ * fields (vl and pmtu) in ah may not be set correctly,
+ * depending on the SL2SC and SC2VL tables at the time.
+ */
+ ppd = ppd_from_ibp(ibp);
+ dd = dd_from_ppd(ppd);
+ if (wqe->length > dd->vld[15].mtu)
return -EINVAL;
break;
case IB_QPT_GSI:
case IB_QPT_UD:
- ah = ibah_to_rvtah(wqe->ud_wr.ah);
+ ah = rvt_get_swqe_ah(wqe);
if (wqe->length > (1 << ah->log_pmtu))
return -EINVAL;
- if (ibp->sl_to_sc[ah->attr.sl] == 0xf)
+ if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf)
return -EINVAL;
+ break;
default:
break;
}
- return wqe->length <= piothreshold;
-}
-/**
- * hfi1_compute_aeth - compute the AETH (syndrome + MSN)
- * @qp: the queue pair to compute the AETH for
- *
- * Returns the AETH.
- */
-__be32 hfi1_compute_aeth(struct rvt_qp *qp)
-{
- u32 aeth = qp->r_msn & HFI1_MSN_MASK;
-
- if (qp->ibqp.srq) {
- /*
- * Shared receive queues don't generate credits.
- * Set the credit field to the invalid value.
- */
- aeth |= HFI1_AETH_CREDIT_INVAL << HFI1_AETH_CREDIT_SHIFT;
- } else {
- u32 min, max, x;
- u32 credits;
- struct rvt_rwq *wq = qp->r_rq.wq;
- u32 head;
- u32 tail;
-
- /* sanity check pointers before trusting them */
- head = wq->head;
- if (head >= qp->r_rq.size)
- head = 0;
- tail = wq->tail;
- if (tail >= qp->r_rq.size)
- tail = 0;
- /*
- * Compute the number of credits available (RWQEs).
- * There is a small chance that the pair of reads are
- * not atomic, which is OK, since the fuzziness is
- * resolved as further ACKs go out.
- */
- credits = head - tail;
- if ((int)credits < 0)
- credits += qp->r_rq.size;
- /*
- * Binary search the credit table to find the code to
- * use.
- */
- min = 0;
- max = 31;
- for (;;) {
- x = (min + max) / 2;
- if (credit_table[x] == credits)
- break;
- if (credit_table[x] > credits) {
- max = x;
- } else {
- if (min == x)
- break;
- min = x;
- }
- }
- aeth |= x << HFI1_AETH_CREDIT_SHIFT;
- }
- return cpu_to_be32(aeth);
+ /*
+ * System latency between send and schedule is large enough that
+ * forcing call_send to true for piothreshold packets is necessary.
+ */
+ if (wqe->length <= piothreshold)
+ *call_send = true;
+ return 0;
}
/**
@@ -410,36 +320,37 @@ __be32 hfi1_compute_aeth(struct rvt_qp *qp)
* It is only used in the post send, which doesn't hold
* the s_lock.
*/
-void _hfi1_schedule_send(struct rvt_qp *qp)
+bool _hfi1_schedule_send(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp =
to_iport(qp->ibqp.device, qp->port_num);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+ struct hfi1_devdata *dd = ppd->dd;
- iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
- priv->s_sde ?
- priv->s_sde->cpu :
- cpumask_first(cpumask_of_node(dd->node)));
+ if (dd->flags & HFI1_SHUTDOWN)
+ return true;
+
+ return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
+ priv->s_sde ?
+ priv->s_sde->cpu :
+ cpumask_first(cpumask_of_node(dd->node)));
}
static void qp_pio_drain(struct rvt_qp *qp)
{
- struct hfi1_ibdev *dev;
struct hfi1_qp_priv *priv = qp->priv;
if (!priv->s_sendcontext)
return;
- dev = to_idev(qp->ibqp.device);
while (iowait_pio_pending(&priv->s_iowait)) {
- write_seqlock_irq(&dev->iowait_lock);
+ write_seqlock_irq(&priv->s_sendcontext->waitlock);
hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
- write_sequnlock_irq(&dev->iowait_lock);
+ write_sequnlock_irq(&priv->s_sendcontext->waitlock);
iowait_pio_drain(&priv->s_iowait);
- write_seqlock_irq(&dev->iowait_lock);
+ write_seqlock_irq(&priv->s_sendcontext->waitlock);
hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
- write_sequnlock_irq(&dev->iowait_lock);
+ write_sequnlock_irq(&priv->s_sendcontext->waitlock);
}
}
@@ -449,49 +360,36 @@ static void qp_pio_drain(struct rvt_qp *qp)
*
* This schedules qp progress and caller should hold
* the s_lock.
+ * @return true if the first leg is scheduled;
+ * false if the first leg is not scheduled.
*/
-void hfi1_schedule_send(struct rvt_qp *qp)
+bool hfi1_schedule_send(struct rvt_qp *qp)
{
lockdep_assert_held(&qp->s_lock);
- if (hfi1_send_ok(qp))
+ if (hfi1_send_ok(qp)) {
_hfi1_schedule_send(qp);
+ return true;
+ }
+ if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
+ iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
+ IOWAIT_PENDING_IB);
+ return false;
}
-/**
- * hfi1_get_credit - handle credit in aeth
- * @qp: the qp
- * @aeth: the Acknowledge Extended Transport Header
- *
- * The QP s_lock should be held.
- */
-void hfi1_get_credit(struct rvt_qp *qp, u32 aeth)
+static void hfi1_qp_schedule(struct rvt_qp *qp)
{
- u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK;
+ struct hfi1_qp_priv *priv = qp->priv;
+ bool ret;
- lockdep_assert_held(&qp->s_lock);
- /*
- * If the credit is invalid, we can send
- * as many packets as we like. Otherwise, we have to
- * honor the credit field.
- */
- if (credit == HFI1_AETH_CREDIT_INVAL) {
- if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
- qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
- if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
- qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
- hfi1_schedule_send(qp);
- }
- }
- } else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
- /* Compute new LSN (i.e., MSN + credit) */
- credit = (aeth + credit_table[credit]) & HFI1_MSN_MASK;
- if (cmp_msn(credit, qp->s_lsn) > 0) {
- qp->s_lsn = credit;
- if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
- qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
- hfi1_schedule_send(qp);
- }
- }
+ if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) {
+ ret = hfi1_schedule_send(qp);
+ if (ret)
+ iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
+ }
+ if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_TID)) {
+ ret = hfi1_schedule_tid_send(qp);
+ if (ret)
+ iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
}
}
@@ -503,25 +401,50 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
if (qp->s_flags & flag) {
qp->s_flags &= ~flag;
trace_hfi1_qpwakeup(qp, flag);
- hfi1_schedule_send(qp);
+ hfi1_qp_schedule(qp);
}
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify hfi1_destroy_qp() if it is waiting. */
rvt_put_qp(qp);
}
+void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ if (iowait_set_work_flag(wait) == IOWAIT_IB_SE) {
+ qp->s_flags &= ~RVT_S_BUSY;
+ /*
+ * If we are sending a first-leg packet from the second leg,
+ * we need to clear the busy flag from priv->s_flags to
+ * avoid a race condition when the qp wakes up before
+ * the call to hfi1_verbs_send() returns to the second
+ * leg. In that case, the second leg will terminate without
+ * being re-scheduled, resulting in failure to send TID RDMA
+ * WRITE DATA and TID RDMA ACK packets.
+ */
+ if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
+ priv->s_flags &= ~(HFI1_S_TID_BUSY_SET |
+ RVT_S_BUSY);
+ iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
+ }
+ } else {
+ priv->s_flags &= ~RVT_S_BUSY;
+ }
+}
+
static int iowait_sleep(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *stx,
- unsigned seq)
+ uint seq,
+ bool pkts_sent)
{
struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq);
struct rvt_qp *qp;
struct hfi1_qp_priv *priv;
unsigned long flags;
int ret = 0;
- struct hfi1_ibdev *dev;
qp = tx->qp;
priv = qp->priv;
@@ -534,9 +457,8 @@ static int iowait_sleep(
* buffer and undoing the side effects of the copy.
*/
/* Make a common routine? */
- dev = &sde->dd->verbs_dev;
list_add_tail(&stx->list, &wait->tx_head);
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&sde->waitlock);
if (sdma_progress(sde, seq, stx))
goto eagain;
if (list_empty(&priv->s_iowait.list)) {
@@ -545,13 +467,15 @@ static int iowait_sleep(
ibp->rvp.n_dmawait++;
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
- list_add_tail(&priv->s_iowait.list, &sde->dmawait);
- priv->s_iowait.lock = &dev->iowait_lock;
+ iowait_get_priority(&priv->s_iowait);
+ iowait_queue(pkts_sent, &priv->s_iowait,
+ &sde->dmawait);
+ priv->s_iowait.lock = &sde->waitlock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
rvt_get_qp(qp);
}
- write_sequnlock(&dev->iowait_lock);
- qp->s_flags &= ~RVT_S_BUSY;
+ write_sequnlock(&sde->waitlock);
+ hfi1_qp_unbusy(qp, wait);
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EBUSY;
} else {
@@ -560,7 +484,7 @@ static int iowait_sleep(
}
return ret;
eagain:
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
spin_unlock_irqrestore(&qp->s_lock, flags);
list_del_init(&stx->list);
return -EAGAIN;
@@ -593,8 +517,18 @@ static void iowait_sdma_drained(struct iowait *wait)
spin_unlock_irqrestore(&qp->s_lock, flags);
}
+static void hfi1_init_priority(struct iowait *w)
+{
+ struct rvt_qp *qp = iowait_to_qp(w);
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ if (qp->s_flags & RVT_S_ACK_PENDING)
+ w->priority++;
+ if (priv->s_flags & RVT_S_ACK_PENDING)
+ w->priority++;
+}
+
/**
- *
* qp_to_sdma_engine - map a qp to a send engine
* @qp: the QP
* @sc5: the 5 bit sc
@@ -619,7 +553,7 @@ struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5)
return sde;
}
-/*
+/**
* qp_to_send_context - map a qp to a send context
* @qp: the QP
* @sc5: the 5 bit sc
@@ -643,82 +577,6 @@ struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5)
sc5);
}
-struct qp_iter {
- struct hfi1_ibdev *dev;
- struct rvt_qp *qp;
- int specials;
- int n;
-};
-
-struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev)
-{
- struct qp_iter *iter;
-
- iter = kzalloc(sizeof(*iter), GFP_KERNEL);
- if (!iter)
- return NULL;
-
- iter->dev = dev;
- iter->specials = dev->rdi.ibdev.phys_port_cnt * 2;
-
- return iter;
-}
-
-int qp_iter_next(struct qp_iter *iter)
-{
- struct hfi1_ibdev *dev = iter->dev;
- int n = iter->n;
- int ret = 1;
- struct rvt_qp *pqp = iter->qp;
- struct rvt_qp *qp;
-
- /*
- * The approach is to consider the special qps
- * as an additional table entries before the
- * real hash table. Since the qp code sets
- * the qp->next hash link to NULL, this works just fine.
- *
- * iter->specials is 2 * # ports
- *
- * n = 0..iter->specials is the special qp indices
- *
- * n = iter->specials..dev->rdi.qp_dev->qp_table_size+iter->specials are
- * the potential hash bucket entries
- *
- */
- for (; n < dev->rdi.qp_dev->qp_table_size + iter->specials; n++) {
- if (pqp) {
- qp = rcu_dereference(pqp->next);
- } else {
- if (n < iter->specials) {
- struct hfi1_pportdata *ppd;
- struct hfi1_ibport *ibp;
- int pidx;
-
- pidx = n % dev->rdi.ibdev.phys_port_cnt;
- ppd = &dd_from_dev(dev)->pport[pidx];
- ibp = &ppd->ibport_data;
-
- if (!(n & 1))
- qp = rcu_dereference(ibp->rvp.qp[0]);
- else
- qp = rcu_dereference(ibp->rvp.qp[1]);
- } else {
- qp = rcu_dereference(
- dev->rdi.qp_dev->qp_table[
- (n - iter->specials)]);
- }
- }
- pqp = qp;
- if (qp) {
- iter->qp = qp;
- iter->n = n;
- return 0;
- }
- }
- return ret;
-}
-
static const char * const qp_type_str[] = {
"SMI", "GSI", "RC", "UC", "UD",
};
@@ -732,19 +590,29 @@ static int qp_idle(struct rvt_qp *qp)
qp->s_tail == qp->s_head;
}
-void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
+/**
+ * qp_iter_print - print the qp information to seq_file
+ * @s: the seq_file to emit the qp information on
+ * @iter: the iterator for the qp hash list
+ */
+void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter)
{
struct rvt_swqe *wqe;
struct rvt_qp *qp = iter->qp;
struct hfi1_qp_priv *priv = qp->priv;
struct sdma_engine *sde;
struct send_context *send_context;
+ struct rvt_ack_entry *e = NULL;
+ struct rvt_srq *srq = qp->ibqp.srq ?
+ ibsrq_to_rvtsrq(qp->ibqp.srq) : NULL;
sde = qp_to_sdma_engine(qp, priv->s_sc);
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
send_context = qp_to_send_context(qp, priv->s_sc);
+ if (qp->s_ack_queue)
+ e = &qp->s_ack_queue[qp->s_tail_ack_queue];
seq_printf(s,
- "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
+ "N %d %s QP %x R %u %s %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d OS %x %x E %x %x %x RNR %d %s %d\n",
iter->n,
qp_idle(qp) ? "I" : "B",
qp->ibqp.qp_num,
@@ -752,7 +620,6 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
qp_type_str[qp->ibqp.qp_type],
qp->state,
wqe ? wqe->wr.opcode : 0,
- qp->s_hdrwords,
qp->s_flags,
iowait_sdma_pending(&priv->s_iowait),
iowait_pio_pending(&priv->s_iowait),
@@ -763,50 +630,52 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
qp->s_last_psn,
qp->s_psn, qp->s_next_psn,
qp->s_sending_psn, qp->s_sending_hpsn,
+ qp->r_psn,
qp->s_last, qp->s_acked, qp->s_cur,
qp->s_tail, qp->s_head, qp->s_size,
qp->s_avail,
+ /* ack_queue ring pointers, size */
+ qp->s_tail_ack_queue, qp->r_head_ack_queue,
+ rvt_max_atomic(&to_idev(qp->ibqp.device)->rdi),
+ /* remote QP info */
qp->remote_qpn,
- qp->remote_ah_attr.dlid,
- qp->remote_ah_attr.sl,
+ rdma_ah_get_dlid(&qp->remote_ah_attr),
+ rdma_ah_get_sl(&qp->remote_ah_attr),
qp->pmtu,
qp->s_retry,
qp->s_retry_cnt,
qp->s_rnr_retry_cnt,
+ qp->s_rnr_retry,
sde,
sde ? sde->this_idx : 0,
send_context,
send_context ? send_context->sw_index : 0,
- ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head,
- ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail,
- qp->pid);
-}
-
-void qp_comm_est(struct rvt_qp *qp)
-{
- qp->r_flags |= RVT_R_COMM_EST;
- if (qp->ibqp.event_handler) {
- struct ib_event ev;
-
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_COMM_EST;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
-}
-
-void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
- gfp_t gfp)
+ ib_cq_head(qp->ibqp.send_cq),
+ ib_cq_tail(qp->ibqp.send_cq),
+ qp->pid,
+ qp->s_state,
+ qp->s_ack_state,
+ /* ack queue information */
+ e ? e->opcode : 0,
+ e ? e->psn : 0,
+ e ? e->lpsn : 0,
+ qp->r_min_rnr_timer,
+ srq ? "SRQ" : "RQ",
+ srq ? srq->rq.size : qp->r_rq.size
+ );
+}
+
+void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv;
- priv = kzalloc_node(sizeof(*priv), gfp, rdi->dparms.node);
+ priv = kzalloc_node(sizeof(*priv), GFP_KERNEL, rdi->dparms.node);
if (!priv)
return ERR_PTR(-ENOMEM);
priv->owner = qp;
- priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), gfp,
+ priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), GFP_KERNEL,
rdi->dparms.node);
if (!priv->s_ahg) {
kfree(priv);
@@ -816,11 +685,13 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
&priv->s_iowait,
1,
_hfi1_do_send,
+ _hfi1_do_tid_send,
iowait_sleep,
iowait_wakeup,
- iowait_sdma_drained);
- setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp);
- qp->s_timer.function = hfi1_rc_timeout;
+ iowait_sdma_drained,
+ hfi1_init_priority);
+ /* Init to a value to start the running average correctly */
+ priv->s_running_pkt_size = piothreshold / 2;
return priv;
}
@@ -828,6 +699,7 @@ void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
+ hfi1_qp_priv_tid_free(rdi, qp);
kfree(priv->s_ahg);
kfree(priv);
}
@@ -861,21 +733,24 @@ void flush_qp_waiters(struct rvt_qp *qp)
{
lockdep_assert_held(&qp->s_lock);
flush_iowait(qp);
- hfi1_stop_rc_timers(qp);
+ hfi1_tid_rdma_flush_wait(qp);
}
void stop_send_queue(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- cancel_work_sync(&priv->s_iowait.iowork);
- hfi1_del_timers_sync(qp);
+ iowait_cancel_work(&priv->s_iowait);
+ if (cancel_work_sync(&priv->tid_rdma.trigger_work))
+ rvt_put_qp(qp);
}
void quiesce_qp(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
+ hfi1_del_tid_reap_timer(qp);
+ hfi1_del_tid_retry_timer(qp);
iowait_sdma_drain(&priv->s_iowait);
qp_pio_drain(qp);
flush_tx_list(qp);
@@ -883,10 +758,13 @@ void quiesce_qp(struct rvt_qp *qp)
void notify_qp_reset(struct rvt_qp *qp)
{
- struct hfi1_qp_priv *priv = qp->priv;
-
- priv->r_adefered = 0;
+ hfi1_qp_kern_exp_rcv_clear_all(qp);
+ qp->r_adefered = 0;
clear_ahg(qp);
+
+ /* Clear any OPFN state */
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ opfn_conn_error(qp);
}
/*
@@ -900,11 +778,12 @@ void hfi1_migrate_qp(struct rvt_qp *qp)
qp->s_mig_state = IB_MIG_MIGRATED;
qp->remote_ah_attr = qp->alt_ah_attr;
- qp->port_num = qp->alt_ah_attr.port_num;
+ qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
qp->s_pkey_index = qp->s_alt_pkey_index;
- qp->s_flags |= RVT_S_AHG_CLEAR;
+ qp->s_flags |= HFI1_S_AHG_CLEAR;
priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
+ qp_set_16b(qp);
ev.device = qp->ibqp.device;
ev.element.qp = &qp->ibqp;
@@ -930,7 +809,7 @@ u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
u8 sc, vl;
ibp = &dd->pport[qp->port_num - 1].ibport_data;
- sc = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ sc = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
vl = sc_to_vlt(dd, sc);
mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu);
@@ -961,19 +840,25 @@ int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
void notify_error_qp(struct rvt_qp *qp)
{
- struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
+ seqlock_t *lock = priv->s_iowait.lock;
- write_seqlock(&dev->iowait_lock);
- if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
- qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
- list_del_init(&priv->s_iowait.list);
- priv->s_iowait.lock = NULL;
- rvt_put_qp(qp);
+ if (lock) {
+ write_seqlock(lock);
+ if (!list_empty(&priv->s_iowait.list) &&
+ !(qp->s_flags & RVT_S_BUSY) &&
+ !(priv->s_flags & RVT_S_BUSY)) {
+ qp->s_flags &= ~HFI1_S_ANY_WAIT_IO;
+ iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
+ iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
+ list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
+ rvt_put_qp(qp);
+ }
+ write_sequnlock(lock);
}
- write_sequnlock(&dev->iowait_lock);
- if (!(qp->s_flags & RVT_S_BUSY)) {
+ if (!(qp->s_flags & RVT_S_BUSY) && !(priv->s_flags & RVT_S_BUSY)) {
qp->s_hdrwords = 0;
if (qp->s_rdma_mr) {
rvt_put_mr(qp->s_rdma_mr);
@@ -984,6 +869,45 @@ void notify_error_qp(struct rvt_qp *qp)
}
/**
+ * hfi1_qp_iter_cb - callback for iterator
+ * @qp: the qp
+ * @v: the sl in low bits of v
+ *
+ * This is called from the iterator callback to work
+ * on an individual qp.
+ */
+static void hfi1_qp_iter_cb(struct rvt_qp *qp, u64 v)
+{
+ int lastwqe;
+ struct ib_event ev;
+ struct hfi1_ibport *ibp =
+ to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ u8 sl = (u8)v;
+
+ if (qp->port_num != ppd->port ||
+ (qp->ibqp.qp_type != IB_QPT_UC &&
+ qp->ibqp.qp_type != IB_QPT_RC) ||
+ rdma_ah_get_sl(&qp->remote_ah_attr) != sl ||
+ !(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))
+ return;
+
+ spin_lock_irq(&qp->r_lock);
+ spin_lock(&qp->s_hlock);
+ spin_lock(&qp->s_lock);
+ lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ spin_unlock(&qp->s_lock);
+ spin_unlock(&qp->s_hlock);
+ spin_unlock_irq(&qp->r_lock);
+ if (lastwqe) {
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+}
+
+/**
* hfi1_error_port_qps - put a port's RC/UC qps into error state
* @ibp: the ibport.
* @sl: the service level.
@@ -994,44 +918,8 @@ void notify_error_qp(struct rvt_qp *qp)
*/
void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl)
{
- struct rvt_qp *qp = NULL;
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_ibdev *dev = &ppd->dd->verbs_dev;
- int n;
- int lastwqe;
- struct ib_event ev;
-
- rcu_read_lock();
-
- /* Deal only with RC/UC qps that use the given SL. */
- for (n = 0; n < dev->rdi.qp_dev->qp_table_size; n++) {
- for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp;
- qp = rcu_dereference(qp->next)) {
- if (qp->port_num == ppd->port &&
- (qp->ibqp.qp_type == IB_QPT_UC ||
- qp->ibqp.qp_type == IB_QPT_RC) &&
- qp->remote_ah_attr.sl == sl &&
- (ib_rvt_state_ops[qp->state] &
- RVT_POST_SEND_OK)) {
- spin_lock_irq(&qp->r_lock);
- spin_lock(&qp->s_hlock);
- spin_lock(&qp->s_lock);
- lastwqe = rvt_error_qp(qp,
- IB_WC_WR_FLUSH_ERR);
- spin_unlock(&qp->s_lock);
- spin_unlock(&qp->s_hlock);
- spin_unlock_irq(&qp->r_lock);
- if (lastwqe) {
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event =
- IB_EVENT_QP_LAST_WQE_REACHED;
- qp->ibqp.event_handler(&ev,
- qp->ibqp.qp_context);
- }
- }
- }
- }
- rcu_read_unlock();
+ rvt_qp_iter(&dev->rdi, sl, hfi1_qp_iter_cb);
}
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index 587d84d65bb8..870ff1a6e5c4 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -1,62 +1,62 @@
-#ifndef _QP_H
-#define _QP_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*/
+#ifndef _QP_H
+#define _QP_H
#include <linux/hash.h>
#include <rdma/rdmavt_qp.h>
#include "verbs.h"
#include "sdma.h"
+#include "verbs_txreq.h"
extern unsigned int hfi1_qp_table_size;
extern const struct rvt_operation_params hfi1_post_parms[];
/*
+ * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK
+ *
+ * HFI1_S_AHG_VALID - ahg header valid on chip
+ * HFI1_S_AHG_CLEAR - have send engine clear ahg state
+ * HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain
+ * HFI1_S_WAIT_TID_SPACE - a QP is waiting for TID resource
+ * HFI1_S_WAIT_TID_RESP - waiting for a TID RDMA WRITE response
+ * HFI1_S_WAIT_HALT - halt the first leg send engine
+ * HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1
+ */
+#define HFI1_S_AHG_VALID 0x80000000
+#define HFI1_S_AHG_CLEAR 0x40000000
+#define HFI1_S_WAIT_PIO_DRAIN 0x20000000
+#define HFI1_S_WAIT_TID_SPACE 0x10000000
+#define HFI1_S_WAIT_TID_RESP 0x08000000
+#define HFI1_S_WAIT_HALT 0x04000000
+#define HFI1_S_MIN_BIT_MASK 0x01000000
+
+/*
+ * overload wait defines
+ */
+
+#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN)
+#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
+#define HFI1_S_ANY_TID_WAIT_SEND (RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA)
+
+/*
+ * Send if not busy or waiting for I/O and either
+ * a RC response is pending or we can process send work requests.
+ */
+static inline int hfi1_send_ok(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ return !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)) &&
+ (verbs_txreq_queued(iowait_get_ib_work(&priv->s_iowait)) ||
+ (qp->s_flags & RVT_S_RESP_PENDING) ||
+ !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
+}
+
+/*
* free_ahg - clear ahg from QP
*/
static inline void clear_ahg(struct rvt_qp *qp)
@@ -64,43 +64,13 @@ static inline void clear_ahg(struct rvt_qp *qp)
struct hfi1_qp_priv *priv = qp->priv;
priv->s_ahg->ahgcount = 0;
- qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR);
+ qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR);
if (priv->s_sde && qp->s_ahgidx >= 0)
sdma_ahg_free(priv->s_sde, qp->s_ahgidx);
qp->s_ahgidx = -1;
}
/**
- * hfi1_compute_aeth - compute the AETH (syndrome + MSN)
- * @qp: the queue pair to compute the AETH for
- *
- * Returns the AETH.
- */
-__be32 hfi1_compute_aeth(struct rvt_qp *qp);
-
-/**
- * hfi1_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: user data for libibverbs.so
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata);
-/**
- * hfi1_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
- * @aeth: the Acknowledge Extended Transport Header
- *
- * The QP s_lock should be held.
- */
-void hfi1_get_credit(struct rvt_qp *qp, u32 aeth);
-
-/**
* hfi1_qp_wakeup - wake up on the indicated event
* @qp: the QP
* @flag: flag the qp on which the qp is stalled
@@ -110,43 +80,17 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag);
struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5);
struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5);
-struct qp_iter;
-
-/**
- * qp_iter_init - initialize the iterator for the qp hash list
- * @dev: the hfi1_ibdev
- */
-struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev);
-
-/**
- * qp_iter_next - Find the next qp in the hash list
- * @iter: the iterator for the qp hash list
- */
-int qp_iter_next(struct qp_iter *iter);
-
-/**
- * qp_iter_print - print the qp information to seq_file
- * @s: the seq_file to emit the qp information on
- * @iter: the iterator for the qp hash list
- */
-void qp_iter_print(struct seq_file *s, struct qp_iter *iter);
-
-/**
- * qp_comm_est - handle trap with QP established
- * @qp: the QP
- */
-void qp_comm_est(struct rvt_qp *qp);
+void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter);
-void _hfi1_schedule_send(struct rvt_qp *qp);
-void hfi1_schedule_send(struct rvt_qp *qp);
+bool _hfi1_schedule_send(struct rvt_qp *qp);
+bool hfi1_schedule_send(struct rvt_qp *qp);
void hfi1_migrate_qp(struct rvt_qp *qp);
/*
* Functions provided by hfi1 driver for rdmavt to use
*/
-void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
- gfp_t gfp);
+void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp);
void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
unsigned free_all_qps(struct rvt_dev_info *rdi);
void notify_qp_reset(struct rvt_qp *qp);
@@ -159,4 +103,5 @@ void quiesce_qp(struct rvt_qp *qp);
u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
int mtu_to_path_mtu(u32 mtu);
void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl);
+void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait);
#endif /* _QP_H */
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 1869f639c3ae..3b7842a7f634 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#include <linux/delay.h>
@@ -204,6 +162,8 @@ static void clean_i2c_bus(struct hfi1_i2c_bus *bus)
void clean_up_i2c(struct hfi1_devdata *dd, struct hfi1_asic_data *ad)
{
+ if (!ad)
+ return;
clean_i2c_bus(ad->i2c_bus0);
ad->i2c_bus0 = NULL;
clean_i2c_bus(ad->i2c_bus1);
@@ -229,7 +189,7 @@ static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c,
break;
case 2:
offset_bytes[1] = (offset >> 8) & 0xff;
- /* fall through */
+ fallthrough;
case 1:
num_msgs = 2;
offset_bytes[0] = offset & 0xff;
@@ -240,7 +200,7 @@ static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c,
msgs[0].buf = offset_bytes;
msgs[1].addr = slave_addr;
- msgs[1].flags = I2C_M_NOSTART,
+ msgs[1].flags = I2C_M_NOSTART;
msgs[1].len = len;
msgs[1].buf = data;
break;
@@ -277,7 +237,7 @@ static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus,
break;
case 2:
offset_bytes[1] = (offset >> 8) & 0xff;
- /* fall through */
+ fallthrough;
case 1:
num_msgs = 2;
offset_bytes[0] = offset & 0xff;
@@ -288,7 +248,7 @@ static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus,
msgs[0].buf = offset_bytes;
msgs[1].addr = slave_addr;
- msgs[1].flags = I2C_M_RD,
+ msgs[1].flags = I2C_M_RD;
msgs[1].len = len;
msgs[1].buf = data;
break;
@@ -445,26 +405,6 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
}
/*
- * Perform a stand-alone single QSFP write. Acquire the resource, do the
- * write, then release the resource.
- */
-int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
- int len)
-{
- struct hfi1_devdata *dd = ppd->dd;
- u32 resource = qsfp_resource(dd);
- int ret;
-
- ret = acquire_chip_resource(dd, resource, QSFP_WAIT);
- if (ret)
- return ret;
- ret = qsfp_write(ppd, target, addr, bp, len);
- release_chip_resource(dd, resource);
-
- return ret;
-}
-
-/*
* Access page n, offset m of QSFP memory as defined by SFF 8636
* by reading @addr = ((256 * n) + m)
*
diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h
index 36cf52359848..5c59d53fcb63 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.h
+++ b/drivers/infiniband/hw/hfi1/qsfp.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
/* QSFP support common definitions, for hfi driver */
@@ -237,8 +195,6 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
-int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
- int len);
int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
struct hfi1_asic_data;
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 809b26eb6d3c..b36242c9d42c 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*/
#include <linux/io.h>
@@ -51,151 +9,48 @@
#include "hfi.h"
#include "qp.h"
+#include "rc.h"
#include "verbs_txreq.h"
#include "trace.h"
-/* cut down ridiculously long IB macro names */
-#define OP(x) RC_OP(x)
-
-/**
- * hfi1_add_retry_timer - add/start a retry timer
- * @qp - the QP
- *
- * add a retry timer on the QP
- */
-static inline void hfi1_add_retry_timer(struct rvt_qp *qp)
-{
- struct ib_qp *ibqp = &qp->ibqp;
- struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
-
- lockdep_assert_held(&qp->s_lock);
- qp->s_flags |= RVT_S_TIMER;
- /* 4.096 usec. * (1 << qp->timeout) */
- qp->s_timer.expires = jiffies + qp->timeout_jiffies +
- rdi->busy_jiffies;
- add_timer(&qp->s_timer);
-}
-
-/**
- * hfi1_add_rnr_timer - add/start an rnr timer
- * @qp - the QP
- * @to - timeout in usecs
- *
- * add an rnr timer on the QP
- */
-void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
-{
- struct hfi1_qp_priv *priv = qp->priv;
-
- lockdep_assert_held(&qp->s_lock);
- qp->s_flags |= RVT_S_WAIT_RNR;
- priv->s_rnr_timer.expires = jiffies + usecs_to_jiffies(to);
- add_timer(&priv->s_rnr_timer);
-}
-
-/**
- * hfi1_mod_retry_timer - mod a retry timer
- * @qp - the QP
- *
- * Modify a potentially already running retry
- * timer
- */
-static inline void hfi1_mod_retry_timer(struct rvt_qp *qp)
-{
- struct ib_qp *ibqp = &qp->ibqp;
- struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
-
- lockdep_assert_held(&qp->s_lock);
- qp->s_flags |= RVT_S_TIMER;
- /* 4.096 usec. * (1 << qp->timeout) */
- mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
- rdi->busy_jiffies);
-}
-
-/**
- * hfi1_stop_retry_timer - stop a retry timer
- * @qp - the QP
- *
- * stop a retry timer and return if the timer
- * had been pending.
- */
-static inline int hfi1_stop_retry_timer(struct rvt_qp *qp)
-{
- int rval = 0;
-
- lockdep_assert_held(&qp->s_lock);
- /* Remove QP from retry */
- if (qp->s_flags & RVT_S_TIMER) {
- qp->s_flags &= ~RVT_S_TIMER;
- rval = del_timer(&qp->s_timer);
- }
- return rval;
-}
-
-/**
- * hfi1_stop_rc_timers - stop all timers
- * @qp - the QP
- *
- * stop any pending timers
- */
-void hfi1_stop_rc_timers(struct rvt_qp *qp)
+struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev,
+ u8 *prev_ack, bool *scheduled)
+ __must_hold(&qp->s_lock)
{
- struct hfi1_qp_priv *priv = qp->priv;
-
- lockdep_assert_held(&qp->s_lock);
- /* Remove QP from all timers */
- if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
- qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
- del_timer(&qp->s_timer);
- del_timer(&priv->s_rnr_timer);
- }
-}
+ struct rvt_ack_entry *e = NULL;
+ u8 i, p;
+ bool s = true;
-/**
- * hfi1_stop_rnr_timer - stop an rnr timer
- * @qp - the QP
- *
- * stop an rnr timer and return if the timer
- * had been pending.
- */
-static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp)
-{
- int rval = 0;
- struct hfi1_qp_priv *priv = qp->priv;
-
- lockdep_assert_held(&qp->s_lock);
- /* Remove QP from rnr timer */
- if (qp->s_flags & RVT_S_WAIT_RNR) {
- qp->s_flags &= ~RVT_S_WAIT_RNR;
- rval = del_timer(&priv->s_rnr_timer);
+ for (i = qp->r_head_ack_queue; ; i = p) {
+ if (i == qp->s_tail_ack_queue)
+ s = false;
+ if (i)
+ p = i - 1;
+ else
+ p = rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
+ if (p == qp->r_head_ack_queue) {
+ e = NULL;
+ break;
+ }
+ e = &qp->s_ack_queue[p];
+ if (!e->opcode) {
+ e = NULL;
+ break;
+ }
+ if (cmp_psn(psn, e->psn) >= 0) {
+ if (p == qp->s_tail_ack_queue &&
+ cmp_psn(psn, e->lpsn) <= 0)
+ s = false;
+ break;
+ }
}
- return rval;
-}
-
-/**
- * hfi1_del_timers_sync - wait for any timeout routines to exit
- * @qp - the QP
- */
-void hfi1_del_timers_sync(struct rvt_qp *qp)
-{
- struct hfi1_qp_priv *priv = qp->priv;
-
- del_timer_sync(&qp->s_timer);
- del_timer_sync(&priv->s_rnr_timer);
-}
-
-static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
- u32 psn, u32 pmtu)
-{
- u32 len;
-
- len = delta_psn(psn, wqe->psn) * pmtu;
- ss->sge = wqe->sg_list[0];
- ss->sg_list = wqe->sg_list + 1;
- ss->num_sge = wqe->wr.num_sge;
- ss->total_len = wqe->length;
- hfi1_skip_sge(ss, len, 0);
- return wqe->length - len;
+ if (prev)
+ *prev = p;
+ if (prev_ack)
+ *prev_ack = i;
+ if (scheduled)
+ *scheduled = s;
+ return e;
}
/**
@@ -214,40 +69,56 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
struct hfi1_pkt_state *ps)
{
struct rvt_ack_entry *e;
- u32 hwords;
- u32 len;
- u32 bth0;
- u32 bth2;
+ u32 hwords, hdrlen;
+ u32 len = 0;
+ u32 bth0 = 0, bth2 = 0;
+ u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
int middle = 0;
u32 pmtu = qp->pmtu;
- struct hfi1_qp_priv *priv = qp->priv;
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ bool last_pkt;
+ u32 delta;
+ u8 next = qp->s_tail_ack_queue;
+ struct tid_rdma_request *req;
+ trace_hfi1_rsp_make_rc_ack(qp, 0);
lockdep_assert_held(&qp->s_lock);
/* Don't send an ACK if we aren't supposed to. */
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
goto bail;
- /* header size in 32-bit words LRH+BTH = (8+12)/4. */
- hwords = 5;
+ if (qpriv->hdr_type == HFI1_PKT_TYPE_9B)
+ /* header size in 32-bit words LRH+BTH = (8+12)/4. */
+ hwords = 5;
+ else
+ /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
+ hwords = 7;
switch (qp->s_ack_state) {
case OP(RDMA_READ_RESPONSE_LAST):
case OP(RDMA_READ_RESPONSE_ONLY):
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
- if (e->rdma_sge.mr) {
- rvt_put_mr(e->rdma_sge.mr);
- e->rdma_sge.mr = NULL;
- }
- /* FALLTHROUGH */
+ release_rdma_sge_mr(e);
+ fallthrough;
case OP(ATOMIC_ACKNOWLEDGE):
/*
* We can increment the tail pointer now that the last
* response has been sent instead of only being
* constructed.
*/
- if (++qp->s_tail_ack_queue > HFI1_MAX_RDMA_ATOMIC)
- qp->s_tail_ack_queue = 0;
- /* FALLTHROUGH */
+ if (++next > rvt_size_atomic(&dev->rdi))
+ next = 0;
+ /*
+ * Only advance the s_acked_ack_queue pointer if there
+ * have been no TID RDMA requests.
+ */
+ e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ if (e->opcode != TID_OP(WRITE_REQ) &&
+ qp->s_acked_ack_queue == qp->s_tail_ack_queue)
+ qp->s_acked_ack_queue = next;
+ qp->s_tail_ack_queue = next;
+ trace_hfi1_rsp_make_rc_ack(qp, e->psn);
+ fallthrough;
case OP(SEND_ONLY):
case OP(ACKNOWLEDGE):
/* Check for no next entry in the queue. */
@@ -258,6 +129,12 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
}
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ /* Check for tid write fence */
+ if ((qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK) ||
+ hfi1_tid_rdma_ack_interlock(qp, e)) {
+ iowait_set_flag(&qpriv->s_iowait, IOWAIT_PENDING_IB);
+ goto bail;
+ }
if (e->opcode == OP(RDMA_READ_REQUEST)) {
/*
* If a RDMA read response is being resent and
@@ -267,6 +144,10 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
*/
len = e->rdma_sge.sge_length;
if (len && !e->rdma_sge.mr) {
+ if (qp->s_acked_ack_queue ==
+ qp->s_tail_ack_queue)
+ qp->s_acked_ack_queue =
+ qp->r_head_ack_queue;
qp->s_tail_ack_queue = qp->r_head_ack_queue;
goto bail;
}
@@ -284,27 +165,67 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
e->sent = 1;
}
- ohdr->u.aeth = hfi1_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
qp->s_ack_rdma_psn = e->psn;
bth2 = mask_psn(qp->s_ack_rdma_psn++);
+ } else if (e->opcode == TID_OP(WRITE_REQ)) {
+ /*
+ * If a TID RDMA WRITE RESP is being resent, we have to
+ * wait for the actual request. All requests that are to
+ * be resent will have their state set to
+ * TID_REQUEST_RESEND. When the new request arrives, the
+ * state will be changed to TID_REQUEST_RESEND_ACTIVE.
+ */
+ req = ack_to_tid_req(e);
+ if (req->state == TID_REQUEST_RESEND ||
+ req->state == TID_REQUEST_INIT_RESEND)
+ goto bail;
+ qp->s_ack_state = TID_OP(WRITE_RESP);
+ qp->s_ack_rdma_psn = mask_psn(e->psn + req->cur_seg);
+ goto write_resp;
+ } else if (e->opcode == TID_OP(READ_REQ)) {
+ /*
+ * If a TID RDMA read response is being resent and
+ * we haven't seen the duplicate request yet,
+ * then stop sending the remaining responses the
+ * responder has seen until the requester re-sends it.
+ */
+ len = e->rdma_sge.sge_length;
+ if (len && !e->rdma_sge.mr) {
+ if (qp->s_acked_ack_queue ==
+ qp->s_tail_ack_queue)
+ qp->s_acked_ack_queue =
+ qp->r_head_ack_queue;
+ qp->s_tail_ack_queue = qp->r_head_ack_queue;
+ goto bail;
+ }
+ /* Copy SGE state in case we need to resend */
+ ps->s_txreq->mr = e->rdma_sge.mr;
+ if (ps->s_txreq->mr)
+ rvt_get_mr(ps->s_txreq->mr);
+ qp->s_ack_rdma_sge.sge = e->rdma_sge;
+ qp->s_ack_rdma_sge.num_sge = 1;
+ qp->s_ack_state = TID_OP(READ_RESP);
+ goto read_resp;
} else {
/* COMPARE_SWAP or FETCH_ADD */
ps->s_txreq->ss = NULL;
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
- ohdr->u.at.aeth = hfi1_compute_aeth(qp);
+ ohdr->u.at.aeth = rvt_compute_aeth(qp);
ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
hwords += sizeof(ohdr->u.at) / sizeof(u32);
bth2 = mask_psn(e->psn);
e->sent = 1;
}
+ trace_hfi1_tid_write_rsp_make_rc_ack(qp);
bth0 = qp->s_ack_state << 24;
break;
case OP(RDMA_READ_RESPONSE_FIRST):
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_READ_RESPONSE_MIDDLE):
ps->s_txreq->ss = &qp->s_ack_rdma_sge;
ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
@@ -315,7 +236,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
len = pmtu;
middle = HFI1_CAP_IS_KSET(SDMA_AHG);
} else {
- ohdr->u.aeth = hfi1_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
@@ -325,6 +246,84 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
bth2 = mask_psn(qp->s_ack_rdma_psn++);
break;
+ case TID_OP(WRITE_RESP):
+write_resp:
+ /*
+ * 1. Check if RVT_S_ACK_PENDING is set. If yes,
+ * goto normal.
+ * 2. Attempt to allocate TID resources.
+ * 3. Remove RVT_S_RESP_PENDING flags from s_flags
+ * 4. If resources not available:
+ * 4.1 Set RVT_S_WAIT_TID_SPACE
+ * 4.2 Queue QP on RCD TID queue
+ * 4.3 Put QP on iowait list.
+ * 4.4 Build IB RNR NAK with appropriate timeout value
+ * 4.5 Return indication progress made.
+ * 5. If resources are available:
+ * 5.1 Program HW flow CSRs
+ * 5.2 Build TID RDMA WRITE RESP packet
+ * 5.3 If more resources needed, do 2.1 - 2.3.
+ * 5.4 Wake up next QP on RCD TID queue.
+ * 5.5 Return indication progress made.
+ */
+
+ e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ req = ack_to_tid_req(e);
+
+ /*
+ * Send scheduled RNR NAK's. RNR NAK's need to be sent at
+ * segment boundaries, not at request boundaries. Don't change
+ * s_ack_state because we are still in the middle of a request
+ */
+ if (qpriv->rnr_nak_state == TID_RNR_NAK_SEND &&
+ qp->s_tail_ack_queue == qpriv->r_tid_alloc &&
+ req->cur_seg == req->alloc_seg) {
+ qpriv->rnr_nak_state = TID_RNR_NAK_SENT;
+ goto normal_no_state;
+ }
+
+ bth2 = mask_psn(qp->s_ack_rdma_psn);
+ hdrlen = hfi1_build_tid_rdma_write_resp(qp, e, ohdr, &bth1,
+ bth2, &len,
+ &ps->s_txreq->ss);
+ if (!hdrlen)
+ return 0;
+
+ hwords += hdrlen;
+ bth0 = qp->s_ack_state << 24;
+ qp->s_ack_rdma_psn++;
+ trace_hfi1_tid_req_make_rc_ack_write(qp, 0, e->opcode, e->psn,
+ e->lpsn, req);
+ if (req->cur_seg != req->total_segs)
+ break;
+
+ e->sent = 1;
+ /* Do not free e->rdma_sge until all data are received */
+ qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+ break;
+
+ case TID_OP(READ_RESP):
+read_resp:
+ e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ ps->s_txreq->ss = &qp->s_ack_rdma_sge;
+ delta = hfi1_build_tid_rdma_read_resp(qp, e, ohdr, &bth0,
+ &bth1, &bth2, &len,
+ &last_pkt);
+ if (delta == 0)
+ goto error_qp;
+ hwords += delta;
+ if (last_pkt) {
+ e->sent = 1;
+ /*
+ * Increment qp->s_tail_ack_queue through s_ack_state
+ * transition.
+ */
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+ }
+ break;
+ case TID_OP(READ_REQ):
+ goto bail;
+
default:
normal:
/*
@@ -334,29 +333,36 @@ normal:
* (see above).
*/
qp->s_ack_state = OP(SEND_ONLY);
- qp->s_flags &= ~RVT_S_ACK_PENDING;
- ps->s_txreq->ss = NULL;
+normal_no_state:
if (qp->s_nak_state)
ohdr->u.aeth =
- cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) |
+ cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
(qp->s_nak_state <<
- HFI1_AETH_CREDIT_SHIFT));
+ IB_AETH_CREDIT_SHIFT));
else
- ohdr->u.aeth = hfi1_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
len = 0;
bth0 = OP(ACKNOWLEDGE) << 24;
bth2 = mask_psn(qp->s_ack_psn);
+ qp->s_flags &= ~RVT_S_ACK_PENDING;
+ ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
+ ps->s_txreq->ss = NULL;
}
qp->s_rdma_ack_cnt++;
- qp->s_hdrwords = hwords;
- ps->s_txreq->sde = priv->s_sde;
+ ps->s_txreq->sde = qpriv->s_sde;
ps->s_txreq->s_cur_size = len;
- hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps);
- /* pbc */
- ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
+ ps->s_txreq->hdr_dwords = hwords;
+ hfi1_make_ruc_header(qp, ohdr, bth0, bth1, bth2, middle, ps);
return 1;
-
+error_qp:
+ spin_unlock_irqrestore(&qp->s_lock, ps->flags);
+ spin_lock_irqsave(&qp->r_lock, ps->flags);
+ spin_lock(&qp->s_lock);
+ rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ spin_unlock(&qp->s_lock);
+ spin_unlock_irqrestore(&qp->r_lock, ps->flags);
+ spin_lock_irqsave(&qp->s_lock, ps->flags);
bail:
qp->s_ack_state = OP(ACKNOWLEDGE);
/*
@@ -366,13 +372,14 @@ bail:
smp_wmb();
qp->s_flags &= ~(RVT_S_RESP_PENDING
| RVT_S_ACK_PENDING
- | RVT_S_AHG_VALID);
+ | HFI1_S_AHG_VALID);
return 0;
}
/**
* hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
* @qp: a pointer to the QP
+ * @ps: the current packet state
*
* Assumes s_lock is held.
*
@@ -383,26 +390,44 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
struct ib_other_headers *ohdr;
- struct rvt_sge_state *ss;
+ struct rvt_sge_state *ss = NULL;
struct rvt_swqe *wqe;
+ struct hfi1_swqe_priv *wpriv;
+ struct tid_rdma_request *req = NULL;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
u32 hwords = 5;
- u32 len;
- u32 bth0 = 0;
- u32 bth2;
+ u32 len = 0;
+ u32 bth0 = 0, bth2 = 0;
+ u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
u32 pmtu = qp->pmtu;
char newreq;
int middle = 0;
int delta;
+ struct tid_rdma_flow *flow = NULL;
+ struct tid_rdma_params *remote;
+ trace_hfi1_sender_make_rc_req(qp);
lockdep_assert_held(&qp->s_lock);
ps->s_txreq = get_txreq(ps->dev, qp);
- if (IS_ERR(ps->s_txreq))
+ if (!ps->s_txreq)
goto bail_no_tx;
- ohdr = &ps->s_txreq->phdr.hdr.u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
- ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
+ if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
+ /* header size in 32-bit words LRH+BTH = (8+12)/4. */
+ hwords = 5;
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
+ ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth;
+ else
+ ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
+ } else {
+ /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
+ hwords = 7;
+ if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
+ (hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))))
+ ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth;
+ else
+ ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth;
+ }
/* Sending responses has higher priority over sending requests. */
if ((qp->s_flags & RVT_S_RESP_PENDING) &&
@@ -413,8 +438,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- smp_read_barrier_depends(); /* see post_one_send() */
- if (qp->s_last == ACCESS_ONCE(qp->s_head))
+ if (qp->s_last == READ_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (iowait_sdma_pending(&priv->s_iowait)) {
@@ -423,13 +447,13 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
- IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
+ hfi1_trdma_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
+ IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
/* will get called again */
goto done_free_tx;
}
- if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
+ if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK | HFI1_S_WAIT_HALT))
goto bail;
if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) {
@@ -443,6 +467,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
/* Send a request. */
wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
+check_s_state:
switch (qp->s_state) {
default:
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
@@ -457,16 +482,20 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
newreq = 0;
if (qp->s_cur == qp->s_tail) {
/* Check if send work queue is empty. */
- if (qp->s_tail == qp->s_head) {
+ if (qp->s_tail == READ_ONCE(qp->s_head)) {
clear_ahg(qp);
goto bail;
}
/*
* If a fence is requested, wait for previous
* RDMA read and atomic operations to finish.
+ * However, there is no need to guard against
+ * TID RDMA READ after TID RDMA READ.
*/
if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
- qp->s_num_rd_atomic) {
+ qp->s_num_rd_atomic &&
+ (wqe->wr.opcode != IB_WR_TID_RDMA_READ ||
+ priv->pending_tid_r_segs < qp->s_num_rd_atomic)) {
qp->s_flags |= RVT_S_WAIT_FENCE;
goto bail;
}
@@ -492,12 +521,11 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
wqe->wr.ex.invalidate_rkey);
local_ops = 1;
}
- hfi1_send_complete(qp, wqe,
- err ? IB_WC_LOC_PROT_ERR
- : IB_WC_SUCCESS);
+ rvt_send_complete(qp, wqe,
+ err ? IB_WC_LOC_PROT_ERR
+ : IB_WC_SUCCESS);
if (local_ops)
atomic_dec(&qp->local_ops_pending);
- qp->s_hdrwords = 0;
goto done_free_tx;
}
@@ -512,16 +540,22 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
len = wqe->length;
ss = &qp->s_sge;
bth2 = mask_psn(qp->s_psn);
+
+ /*
+ * Interlock between various IB requests and TID RDMA
+ * if necessary.
+ */
+ if ((priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) ||
+ hfi1_tid_rdma_wqe_interlock(qp, wqe))
+ goto bail;
+
switch (wqe->wr.opcode) {
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
case IB_WR_SEND_WITH_INV:
/* If no credit, return. */
- if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
- qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+ if (!rvt_rc_credit_avail(qp, wqe))
goto bail;
- }
if (len > pmtu) {
qp->s_state = OP(SEND_FIRST);
len = pmtu;
@@ -551,14 +585,12 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
case IB_WR_RDMA_WRITE:
if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
- /* FALLTHROUGH */
+ goto no_flow_control;
case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */
- if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
- qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+ if (!rvt_rc_credit_avail(qp, wqe))
goto bail;
- }
+no_flow_control:
put_ib_reth_vaddr(
wqe->rdma_wr.remote_addr,
&ohdr->u.rc.reth);
@@ -587,21 +619,126 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_cur = 0;
break;
+ case IB_WR_TID_RDMA_WRITE:
+ if (newreq) {
+ /*
+ * Limit the number of TID RDMA WRITE requests.
+ */
+ if (atomic_read(&priv->n_tid_requests) >=
+ HFI1_TID_RDMA_WRITE_CNT)
+ goto bail;
+
+ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
+ qp->s_lsn++;
+ }
+
+ hwords += hfi1_build_tid_rdma_write_req(qp, wqe, ohdr,
+ &bth1, &bth2,
+ &len);
+ ss = NULL;
+ if (priv->s_tid_cur == HFI1_QP_WQE_INVALID) {
+ priv->s_tid_cur = qp->s_cur;
+ if (priv->s_tid_tail == HFI1_QP_WQE_INVALID) {
+ priv->s_tid_tail = qp->s_cur;
+ priv->s_state = TID_OP(WRITE_RESP);
+ }
+ } else if (priv->s_tid_cur == priv->s_tid_head) {
+ struct rvt_swqe *__w;
+ struct tid_rdma_request *__r;
+
+ __w = rvt_get_swqe_ptr(qp, priv->s_tid_cur);
+ __r = wqe_to_tid_req(__w);
+
+ /*
+ * The s_tid_cur pointer is advanced to s_cur if
+ * any of the following conditions about the WQE
+ * to which s_ti_cur currently points to are
+ * satisfied:
+ * 1. The request is not a TID RDMA WRITE
+ * request,
+ * 2. The request is in the INACTIVE or
+ * COMPLETE states (TID RDMA READ requests
+ * stay at INACTIVE and TID RDMA WRITE
+ * transition to COMPLETE when done),
+ * 3. The request is in the ACTIVE or SYNC
+ * state and the number of completed
+ * segments is equal to the total segment
+ * count.
+ * (If ACTIVE, the request is waiting for
+ * ACKs. If SYNC, the request has not
+ * received any responses because it's
+ * waiting on a sync point.)
+ */
+ if (__w->wr.opcode != IB_WR_TID_RDMA_WRITE ||
+ __r->state == TID_REQUEST_INACTIVE ||
+ __r->state == TID_REQUEST_COMPLETE ||
+ ((__r->state == TID_REQUEST_ACTIVE ||
+ __r->state == TID_REQUEST_SYNC) &&
+ __r->comp_seg == __r->total_segs)) {
+ if (priv->s_tid_tail ==
+ priv->s_tid_cur &&
+ priv->s_state ==
+ TID_OP(WRITE_DATA_LAST)) {
+ priv->s_tid_tail = qp->s_cur;
+ priv->s_state =
+ TID_OP(WRITE_RESP);
+ }
+ priv->s_tid_cur = qp->s_cur;
+ }
+ /*
+ * A corner case: when the last TID RDMA WRITE
+ * request was completed, s_tid_head,
+ * s_tid_cur, and s_tid_tail all point to the
+ * same location. Other requests are posted and
+ * s_cur wraps around to the same location,
+ * where a new TID RDMA WRITE is posted. In
+ * this case, none of the indices need to be
+ * updated. However, the priv->s_state should.
+ */
+ if (priv->s_tid_tail == qp->s_cur &&
+ priv->s_state == TID_OP(WRITE_DATA_LAST))
+ priv->s_state = TID_OP(WRITE_RESP);
+ }
+ req = wqe_to_tid_req(wqe);
+ if (newreq) {
+ priv->s_tid_head = qp->s_cur;
+ priv->pending_tid_w_resp += req->total_segs;
+ atomic_inc(&priv->n_tid_requests);
+ atomic_dec(&priv->n_requests);
+ } else {
+ req->state = TID_REQUEST_RESEND;
+ req->comp_seg = delta_psn(bth2, wqe->psn);
+ /*
+ * Pull back any segments since we are going
+ * to re-receive them.
+ */
+ req->setup_head = req->clear_tail;
+ priv->pending_tid_w_resp +=
+ delta_psn(wqe->lpsn, bth2) + 1;
+ }
+
+ trace_hfi1_tid_write_sender_make_req(qp, newreq);
+ trace_hfi1_tid_req_make_req_write(qp, newreq,
+ wqe->wr.opcode,
+ wqe->psn, wqe->lpsn,
+ req);
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+
case IB_WR_RDMA_READ:
/*
* Don't allow more operations to be started
* than the QP limits allow.
*/
- if (newreq) {
- if (qp->s_num_rd_atomic >=
- qp->s_max_rd_atomic) {
- qp->s_flags |= RVT_S_WAIT_RDMAR;
- goto bail;
- }
- qp->s_num_rd_atomic++;
- if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
- qp->s_lsn++;
+ if (qp->s_num_rd_atomic >=
+ qp->s_max_rd_atomic) {
+ qp->s_flags |= RVT_S_WAIT_RDMAR;
+ goto bail;
}
+ qp->s_num_rd_atomic++;
+ if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
+ qp->s_lsn++;
put_ib_reth_vaddr(
wqe->rdma_wr.remote_addr,
&ohdr->u.rc.reth);
@@ -617,23 +754,98 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_cur = 0;
break;
+ case IB_WR_TID_RDMA_READ:
+ trace_hfi1_tid_read_sender_make_req(qp, newreq);
+ wpriv = wqe->priv;
+ req = wqe_to_tid_req(wqe);
+ trace_hfi1_tid_req_make_req_read(qp, newreq,
+ wqe->wr.opcode,
+ wqe->psn, wqe->lpsn,
+ req);
+ delta = cmp_psn(qp->s_psn, wqe->psn);
+
+ /*
+ * Don't allow more operations to be started
+ * than the QP limits allow. We could get here under
+ * three conditions; (1) It's a new request; (2) We are
+ * sending the second or later segment of a request,
+ * but the qp->s_state is set to OP(RDMA_READ_REQUEST)
+ * when the last segment of a previous request is
+ * received just before this; (3) We are re-sending a
+ * request.
+ */
+ if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) {
+ qp->s_flags |= RVT_S_WAIT_RDMAR;
+ goto bail;
+ }
+ if (newreq) {
+ struct tid_rdma_flow *flow =
+ &req->flows[req->setup_head];
+
+ /*
+ * Set up s_sge as it is needed for TID
+ * allocation. However, if the pages have been
+ * walked and mapped, skip it. An earlier try
+ * has failed to allocate the TID entries.
+ */
+ if (!flow->npagesets) {
+ qp->s_sge.sge = wqe->sg_list[0];
+ qp->s_sge.sg_list = wqe->sg_list + 1;
+ qp->s_sge.num_sge = wqe->wr.num_sge;
+ qp->s_sge.total_len = wqe->length;
+ qp->s_len = wqe->length;
+ req->isge = 0;
+ req->clear_tail = req->setup_head;
+ req->flow_idx = req->setup_head;
+ req->state = TID_REQUEST_ACTIVE;
+ }
+ } else if (delta == 0) {
+ /* Re-send a request */
+ req->cur_seg = 0;
+ req->comp_seg = 0;
+ req->ack_pending = 0;
+ req->flow_idx = req->clear_tail;
+ req->state = TID_REQUEST_RESEND;
+ }
+ req->s_next_psn = qp->s_psn;
+ /* Read one segment at a time */
+ len = min_t(u32, req->seg_len,
+ wqe->length - req->seg_len * req->cur_seg);
+ delta = hfi1_build_tid_rdma_read_req(qp, wqe, ohdr,
+ &bth1, &bth2,
+ &len);
+ if (delta <= 0) {
+ /* Wait for TID space */
+ goto bail;
+ }
+ if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
+ qp->s_lsn++;
+ hwords += delta;
+ ss = &wpriv->ss;
+ /* Check if this is the last segment */
+ if (req->cur_seg >= req->total_segs &&
+ ++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
/*
* Don't allow more operations to be started
* than the QP limits allow.
*/
- if (newreq) {
- if (qp->s_num_rd_atomic >=
- qp->s_max_rd_atomic) {
- qp->s_flags |= RVT_S_WAIT_RDMAR;
- goto bail;
- }
- qp->s_num_rd_atomic++;
- if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
- qp->s_lsn++;
+ if (qp->s_num_rd_atomic >=
+ qp->s_max_rd_atomic) {
+ qp->s_flags |= RVT_S_WAIT_RDMAR;
+ goto bail;
}
- if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ qp->s_num_rd_atomic++;
+ fallthrough;
+ case IB_WR_OPFN:
+ if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
+ qp->s_lsn++;
+ if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_OPFN) {
qp->s_state = OP(COMPARE_SWAP);
put_ib_ateth_swap(wqe->atomic_wr.swap,
&ohdr->u.atomic_eth);
@@ -660,18 +872,23 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
default:
goto bail;
}
- qp->s_sge.sge = wqe->sg_list[0];
- qp->s_sge.sg_list = wqe->sg_list + 1;
- qp->s_sge.num_sge = wqe->wr.num_sge;
- qp->s_sge.total_len = wqe->length;
- qp->s_len = wqe->length;
+ if (wqe->wr.opcode != IB_WR_TID_RDMA_READ) {
+ qp->s_sge.sge = wqe->sg_list[0];
+ qp->s_sge.sg_list = wqe->sg_list + 1;
+ qp->s_sge.num_sge = wqe->wr.num_sge;
+ qp->s_sge.total_len = wqe->length;
+ qp->s_len = wqe->length;
+ }
if (newreq) {
qp->s_tail++;
if (qp->s_tail >= qp->s_size)
qp->s_tail = 0;
}
- if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ if (wqe->wr.opcode == IB_WR_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
qp->s_psn = wqe->lpsn + 1;
+ else if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
+ qp->s_psn = req->s_next_psn;
else
qp->s_psn++;
break;
@@ -687,10 +904,10 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
* See restart_rc().
*/
qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_FIRST):
qp->s_state = OP(SEND_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
ss = &qp->s_sge;
@@ -732,10 +949,10 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
* See restart_rc().
*/
qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_FIRST):
qp->s_state = OP(RDMA_WRITE_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_MIDDLE):
bth2 = mask_psn(qp->s_psn++);
ss = &qp->s_sge;
@@ -788,10 +1005,137 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
if (qp->s_cur == qp->s_size)
qp->s_cur = 0;
break;
+
+ case TID_OP(WRITE_RESP):
+ /*
+ * This value for s_state is used for restarting a TID RDMA
+ * WRITE request. See comment in OP(RDMA_READ_RESPONSE_MIDDLE
+ * for more).
+ */
+ req = wqe_to_tid_req(wqe);
+ req->state = TID_REQUEST_RESEND;
+ rcu_read_lock();
+ remote = rcu_dereference(priv->tid_rdma.remote);
+ req->comp_seg = delta_psn(qp->s_psn, wqe->psn);
+ len = wqe->length - (req->comp_seg * remote->max_len);
+ rcu_read_unlock();
+
+ bth2 = mask_psn(qp->s_psn);
+ hwords += hfi1_build_tid_rdma_write_req(qp, wqe, ohdr, &bth1,
+ &bth2, &len);
+ qp->s_psn = wqe->lpsn + 1;
+ ss = NULL;
+ qp->s_state = TID_OP(WRITE_REQ);
+ priv->pending_tid_w_resp += delta_psn(wqe->lpsn, bth2) + 1;
+ priv->s_tid_cur = qp->s_cur;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ trace_hfi1_tid_req_make_req_write(qp, 0, wqe->wr.opcode,
+ wqe->psn, wqe->lpsn, req);
+ break;
+
+ case TID_OP(READ_RESP):
+ if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
+ goto bail;
+ /* This is used to restart a TID read request */
+ req = wqe_to_tid_req(wqe);
+ wpriv = wqe->priv;
+ /*
+ * Back down. The field qp->s_psn has been set to the psn with
+ * which the request should be restart. It's OK to use division
+ * as this is on the retry path.
+ */
+ req->cur_seg = delta_psn(qp->s_psn, wqe->psn) / priv->pkts_ps;
+
+ /*
+ * The following function need to be redefined to return the
+ * status to make sure that we find the flow. At the same
+ * time, we can use the req->state change to check if the
+ * call succeeds or not.
+ */
+ req->state = TID_REQUEST_RESEND;
+ hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
+ if (req->state != TID_REQUEST_ACTIVE) {
+ /*
+ * Failed to find the flow. Release all allocated tid
+ * resources.
+ */
+ hfi1_kern_exp_rcv_clear_all(req);
+ hfi1_kern_clear_hw_flow(priv->rcd, qp);
+
+ hfi1_trdma_send_complete(qp, wqe, IB_WC_LOC_QP_OP_ERR);
+ goto bail;
+ }
+ req->state = TID_REQUEST_RESEND;
+ len = min_t(u32, req->seg_len,
+ wqe->length - req->seg_len * req->cur_seg);
+ flow = &req->flows[req->flow_idx];
+ len -= flow->sent;
+ req->s_next_psn = flow->flow_state.ib_lpsn + 1;
+ delta = hfi1_build_tid_rdma_read_packet(wqe, ohdr, &bth1,
+ &bth2, &len);
+ if (delta <= 0) {
+ /* Wait for TID space */
+ goto bail;
+ }
+ hwords += delta;
+ ss = &wpriv->ss;
+ /* Check if this is the last segment */
+ if (req->cur_seg >= req->total_segs &&
+ ++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ qp->s_psn = req->s_next_psn;
+ trace_hfi1_tid_req_make_req_read(qp, 0, wqe->wr.opcode,
+ wqe->psn, wqe->lpsn, req);
+ break;
+ case TID_OP(READ_REQ):
+ req = wqe_to_tid_req(wqe);
+ delta = cmp_psn(qp->s_psn, wqe->psn);
+ /*
+ * If the current WR is not TID RDMA READ, or this is the start
+ * of a new request, we need to change the qp->s_state so that
+ * the request can be set up properly.
+ */
+ if (wqe->wr.opcode != IB_WR_TID_RDMA_READ || delta == 0 ||
+ qp->s_cur == qp->s_tail) {
+ qp->s_state = OP(RDMA_READ_REQUEST);
+ if (delta == 0 || qp->s_cur == qp->s_tail)
+ goto check_s_state;
+ else
+ goto bail;
+ }
+
+ /* Rate limiting */
+ if (qp->s_num_rd_atomic >= qp->s_max_rd_atomic) {
+ qp->s_flags |= RVT_S_WAIT_RDMAR;
+ goto bail;
+ }
+
+ wpriv = wqe->priv;
+ /* Read one segment at a time */
+ len = min_t(u32, req->seg_len,
+ wqe->length - req->seg_len * req->cur_seg);
+ delta = hfi1_build_tid_rdma_read_req(qp, wqe, ohdr, &bth1,
+ &bth2, &len);
+ if (delta <= 0) {
+ /* Wait for TID space */
+ goto bail;
+ }
+ hwords += delta;
+ ss = &wpriv->ss;
+ /* Check if this is the last segment */
+ if (req->cur_seg >= req->total_segs &&
+ ++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ qp->s_psn = req->s_next_psn;
+ trace_hfi1_tid_req_make_req_read(qp, 0, wqe->wr.opcode,
+ wqe->psn, wqe->lpsn, req);
+ break;
}
qp->s_sending_hpsn = bth2;
delta = delta_psn(bth2, wqe->psn);
- if (delta && delta % HFI1_PSN_CREDIT == 0)
+ if (delta && delta % HFI1_PSN_CREDIT == 0 &&
+ wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
bth2 |= IB_BTH_REQ_ACK;
if (qp->s_flags & RVT_S_SEND_ONE) {
qp->s_flags &= ~RVT_S_SEND_ONE;
@@ -799,7 +1143,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
bth2 |= IB_BTH_REQ_ACK;
}
qp->s_len -= len;
- qp->s_hdrwords = hwords;
+ ps->s_txreq->hdr_dwords = hwords;
ps->s_txreq->sde = priv->s_sde;
ps->s_txreq->ss = ss;
ps->s_txreq->s_cur_size = len;
@@ -807,11 +1151,10 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp,
ohdr,
bth0 | (qp->s_state << 24),
+ bth1,
bth2,
middle,
ps);
- /* pbc */
- ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
return 1;
done_free_tx:
@@ -825,121 +1168,282 @@ bail:
bail_no_tx:
ps->s_txreq = NULL;
qp->s_flags &= ~RVT_S_BUSY;
- qp->s_hdrwords = 0;
+ /*
+ * If we didn't get a txreq, the QP will be woken up later to try
+ * again. Set the flags to indicate which work item to wake
+ * up.
+ */
+ iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
return 0;
}
-/**
+static inline void hfi1_make_bth_aeth(struct rvt_qp *qp,
+ struct ib_other_headers *ohdr,
+ u32 bth0, u32 bth1)
+{
+ if (qp->r_nak_state)
+ ohdr->u.aeth = cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
+ (qp->r_nak_state <<
+ IB_AETH_CREDIT_SHIFT));
+ else
+ ohdr->u.aeth = rvt_compute_aeth(qp);
+
+ ohdr->bth[0] = cpu_to_be32(bth0);
+ ohdr->bth[1] = cpu_to_be32(bth1 | qp->remote_qpn);
+ ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn));
+}
+
+static inline void hfi1_queue_rc_ack(struct hfi1_packet *packet, bool is_fecn)
+{
+ struct rvt_qp *qp = packet->qp;
+ struct hfi1_ibport *ibp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
+ goto unlock;
+ ibp = rcd_to_iport(packet->rcd);
+ this_cpu_inc(*ibp->rvp.rc_qacks);
+ qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
+ qp->s_nak_state = qp->r_nak_state;
+ qp->s_ack_psn = qp->r_ack_psn;
+ if (is_fecn)
+ qp->s_flags |= RVT_S_ECN;
+
+ /* Schedule the send tasklet. */
+ hfi1_schedule_send(qp);
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+}
+
+static inline void hfi1_make_rc_ack_9B(struct hfi1_packet *packet,
+ struct hfi1_opa_header *opa_hdr,
+ u8 sc5, bool is_fecn,
+ u64 *pbc_flags, u32 *hwords,
+ u32 *nwords)
+{
+ struct rvt_qp *qp = packet->qp;
+ struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ struct ib_header *hdr = &opa_hdr->ibh;
+ struct ib_other_headers *ohdr;
+ u16 lrh0 = HFI1_LRH_BTH;
+ u16 pkey;
+ u32 bth0, bth1;
+
+ opa_hdr->hdr_type = HFI1_PKT_TYPE_9B;
+ ohdr = &hdr->u.oth;
+ /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */
+ *hwords = 6;
+
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
+ *hwords += hfi1_make_grh(ibp, &hdr->u.l.grh,
+ rdma_ah_read_grh(&qp->remote_ah_attr),
+ *hwords - 2, SIZE_OF_CRC);
+ ohdr = &hdr->u.l.oth;
+ lrh0 = HFI1_LRH_GRH;
+ }
+ /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
+ *pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
+
+ /* read pkey_index w/o lock (its atomic) */
+ pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
+
+ lrh0 |= (sc5 & IB_SC_MASK) << IB_SC_SHIFT |
+ (rdma_ah_get_sl(&qp->remote_ah_attr) & IB_SL_MASK) <<
+ IB_SL_SHIFT;
+
+ hfi1_make_ib_hdr(hdr, lrh0, *hwords + SIZE_OF_CRC,
+ opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 9B),
+ ppd->lid | rdma_ah_get_path_bits(&qp->remote_ah_attr));
+
+ bth0 = pkey | (OP(ACKNOWLEDGE) << 24);
+ if (qp->s_mig_state == IB_MIG_MIGRATED)
+ bth0 |= IB_BTH_MIG_REQ;
+ bth1 = (!!is_fecn) << IB_BECN_SHIFT;
+ /*
+ * Inline ACKs go out without the use of the Verbs send engine, so
+ * we need to set the STL Verbs Extended bit here
+ */
+ bth1 |= HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT;
+ hfi1_make_bth_aeth(qp, ohdr, bth0, bth1);
+}
+
+static inline void hfi1_make_rc_ack_16B(struct hfi1_packet *packet,
+ struct hfi1_opa_header *opa_hdr,
+ u8 sc5, bool is_fecn,
+ u64 *pbc_flags, u32 *hwords,
+ u32 *nwords)
+{
+ struct rvt_qp *qp = packet->qp;
+ struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ struct hfi1_16b_header *hdr = &opa_hdr->opah;
+ struct ib_other_headers *ohdr;
+ u32 bth0, bth1 = 0;
+ u16 len, pkey;
+ bool becn = is_fecn;
+ u8 l4 = OPA_16B_L4_IB_LOCAL;
+ u8 extra_bytes;
+
+ opa_hdr->hdr_type = HFI1_PKT_TYPE_16B;
+ ohdr = &hdr->u.oth;
+ /* header size in 32-bit words 16B LRH+BTH+AETH = (16+12+4)/4 */
+ *hwords = 8;
+ extra_bytes = hfi1_get_16b_padding(*hwords << 2, 0);
+ *nwords = SIZE_OF_CRC + ((extra_bytes + SIZE_OF_LT) >> 2);
+
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
+ hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))) {
+ *hwords += hfi1_make_grh(ibp, &hdr->u.l.grh,
+ rdma_ah_read_grh(&qp->remote_ah_attr),
+ *hwords - 4, *nwords);
+ ohdr = &hdr->u.l.oth;
+ l4 = OPA_16B_L4_IB_GLOBAL;
+ }
+ *pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
+
+ /* read pkey_index w/o lock (its atomic) */
+ pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
+
+ /* Convert dwords to flits */
+ len = (*hwords + *nwords) >> 1;
+
+ hfi1_make_16b_hdr(hdr, ppd->lid |
+ (rdma_ah_get_path_bits(&qp->remote_ah_attr) &
+ ((1 << ppd->lmc) - 1)),
+ opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr),
+ 16B), len, pkey, becn, 0, l4, sc5);
+
+ bth0 = pkey | (OP(ACKNOWLEDGE) << 24);
+ bth0 |= extra_bytes << 20;
+ if (qp->s_mig_state == IB_MIG_MIGRATED)
+ bth1 = OPA_BTH_MIG_REQ;
+ hfi1_make_bth_aeth(qp, ohdr, bth0, bth1);
+}
+
+typedef void (*hfi1_make_rc_ack)(struct hfi1_packet *packet,
+ struct hfi1_opa_header *opa_hdr,
+ u8 sc5, bool is_fecn,
+ u64 *pbc_flags, u32 *hwords,
+ u32 *nwords);
+
+/* We support only two types - 9B and 16B for now */
+static const hfi1_make_rc_ack hfi1_make_rc_ack_tbl[2] = {
+ [HFI1_PKT_TYPE_9B] = &hfi1_make_rc_ack_9B,
+ [HFI1_PKT_TYPE_16B] = &hfi1_make_rc_ack_16B
+};
+
+/*
* hfi1_send_rc_ack - Construct an ACK packet and send it
- * @qp: a pointer to the QP
*
* This is called from hfi1_rc_rcv() and handle_receive_interrupt().
* Note that RDMA reads and atomics are handled in the
* send side QP state and send engine.
*/
-void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
- int is_fecn)
+void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn)
{
- struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ struct rvt_qp *qp = packet->qp;
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
+ struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
u64 pbc, pbc_flags = 0;
- u16 lrh0;
- u16 sc5;
- u32 bth0;
- u32 hwords;
- u32 vl, plen;
- struct send_context *sc;
+ u32 hwords = 0;
+ u32 nwords = 0;
+ u32 plen;
struct pio_buf *pbuf;
- struct ib_header hdr;
- struct ib_other_headers *ohdr;
- unsigned long flags;
+ struct hfi1_opa_header opa_hdr;
+
+ /* clear the defer count */
+ qp->r_adefered = 0;
/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
- if (qp->s_flags & RVT_S_RESP_PENDING)
- goto queue_ack;
+ if (qp->s_flags & RVT_S_RESP_PENDING) {
+ hfi1_queue_rc_ack(packet, is_fecn);
+ return;
+ }
/* Ensure s_rdma_ack_cnt changes are committed */
- smp_read_barrier_depends();
- if (qp->s_rdma_ack_cnt)
- goto queue_ack;
-
- /* Construct the header */
- /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */
- hwords = 6;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
- hwords += hfi1_make_grh(ibp, &hdr.u.l.grh,
- &qp->remote_ah_attr.grh, hwords, 0);
- ohdr = &hdr.u.l.oth;
- lrh0 = HFI1_LRH_GRH;
- } else {
- ohdr = &hdr.u.oth;
- lrh0 = HFI1_LRH_BTH;
+ if (qp->s_rdma_ack_cnt) {
+ hfi1_queue_rc_ack(packet, is_fecn);
+ return;
}
- /* read pkey_index w/o lock (its atomic) */
- bth0 = hfi1_get_pkey(ibp, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24);
- if (qp->s_mig_state == IB_MIG_MIGRATED)
- bth0 |= IB_BTH_MIG_REQ;
- if (qp->r_nak_state)
- ohdr->u.aeth = cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) |
- (qp->r_nak_state <<
- HFI1_AETH_CREDIT_SHIFT));
- else
- ohdr->u.aeth = hfi1_compute_aeth(qp);
- sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
- /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
- lrh0 |= (sc5 & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
- hdr.lrh[0] = cpu_to_be16(lrh0);
- hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
- hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
- hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
- ohdr->bth[0] = cpu_to_be32(bth0);
- ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
- ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << HFI1_BECN_SHIFT);
- ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn));
/* Don't try to send ACKs if the link isn't ACTIVE */
if (driver_lstate(ppd) != IB_PORT_ACTIVE)
return;
- sc = rcd->sc;
- plen = 2 /* PBC */ + hwords;
- vl = sc_to_vlt(ppd->dd, sc5);
- pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
+ /* Make the appropriate header */
+ hfi1_make_rc_ack_tbl[priv->hdr_type](packet, &opa_hdr, sc5, is_fecn,
+ &pbc_flags, &hwords, &nwords);
- pbuf = sc_buffer_alloc(sc, plen, NULL, NULL);
- if (!pbuf) {
+ plen = 2 /* PBC */ + hwords + nwords;
+ pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps,
+ sc_to_vlt(ppd->dd, sc5), plen);
+ pbuf = sc_buffer_alloc(rcd->sc, plen, NULL, NULL);
+ if (IS_ERR_OR_NULL(pbuf)) {
/*
* We have no room to send at the moment. Pass
* responsibility for sending the ACK to the send engine
* so that when enough buffer space becomes available,
* the ACK is sent ahead of other outgoing packets.
*/
- goto queue_ack;
+ hfi1_queue_rc_ack(packet, is_fecn);
+ return;
}
-
- trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr);
+ trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
+ &opa_hdr, ib_is_sc5(sc5));
/* write the pbc and data */
- ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords);
-
+ ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
+ (priv->hdr_type == HFI1_PKT_TYPE_9B ?
+ (void *)&opa_hdr.ibh :
+ (void *)&opa_hdr.opah), hwords);
return;
+}
-queue_ack:
- spin_lock_irqsave(&qp->s_lock, flags);
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
- goto unlock;
- this_cpu_inc(*ibp->rvp.rc_qacks);
- qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
- qp->s_nak_state = qp->r_nak_state;
- qp->s_ack_psn = qp->r_ack_psn;
- if (is_fecn)
- qp->s_flags |= RVT_S_ECN;
+/**
+ * update_num_rd_atomic - update the qp->s_num_rd_atomic
+ * @qp: the QP
+ * @psn: the packet sequence number to restart at
+ * @wqe: the wqe
+ *
+ * This is called from reset_psn() to update qp->s_num_rd_atomic
+ * for the current wqe.
+ * Called at interrupt level with the QP s_lock held.
+ */
+static void update_num_rd_atomic(struct rvt_qp *qp, u32 psn,
+ struct rvt_swqe *wqe)
+{
+ u32 opcode = wqe->wr.opcode;
+
+ if (opcode == IB_WR_RDMA_READ ||
+ opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+ qp->s_num_rd_atomic++;
+ } else if (opcode == IB_WR_TID_RDMA_READ) {
+ struct tid_rdma_request *req = wqe_to_tid_req(wqe);
+ struct hfi1_qp_priv *priv = qp->priv;
- /* Schedule the send engine. */
- hfi1_schedule_send(qp);
-unlock:
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ if (cmp_psn(psn, wqe->lpsn) <= 0) {
+ u32 cur_seg;
+
+ cur_seg = (psn - wqe->psn) / priv->pkts_ps;
+ req->ack_pending = cur_seg - req->comp_seg;
+ priv->pending_tid_r_segs += req->ack_pending;
+ qp->s_num_rd_atomic += req->ack_pending;
+ trace_hfi1_tid_req_update_num_rd_atomic(qp, 0,
+ wqe->wr.opcode,
+ wqe->psn,
+ wqe->lpsn,
+ req);
+ } else {
+ priv->pending_tid_r_segs += req->total_segs;
+ qp->s_num_rd_atomic += req->total_segs;
+ }
+ }
}
/**
@@ -956,9 +1460,13 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
u32 n = qp->s_acked;
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
u32 opcode;
+ struct hfi1_qp_priv *priv = qp->priv;
lockdep_assert_held(&qp->s_lock);
qp->s_cur = n;
+ priv->pending_tid_r_segs = 0;
+ priv->pending_tid_w_resp = 0;
+ qp->s_num_rd_atomic = 0;
/*
* If we are starting the request from the beginning,
@@ -968,9 +1476,9 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
qp->s_state = OP(SEND_LAST);
goto done;
}
+ update_num_rd_atomic(qp, psn, wqe);
/* Find the work request opcode corresponding to the given PSN. */
- opcode = wqe->wr.opcode;
for (;;) {
int diff;
@@ -980,8 +1488,11 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
break;
wqe = rvt_get_swqe_ptr(qp, n);
diff = cmp_psn(psn, wqe->psn);
- if (diff < 0)
+ if (diff < 0) {
+ /* Point wqe back to the previous one*/
+ wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
break;
+ }
qp->s_cur = n;
/*
* If we are starting the request from the beginning,
@@ -991,8 +1502,10 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
qp->s_state = OP(SEND_LAST);
goto done;
}
- opcode = wqe->wr.opcode;
+
+ update_num_rd_atomic(qp, psn, wqe);
}
+ opcode = wqe->wr.opcode;
/*
* Set the state to restart in the middle of a request.
@@ -1010,10 +1523,18 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
break;
+ case IB_WR_TID_RDMA_WRITE:
+ qp->s_state = TID_OP(WRITE_RESP);
+ break;
+
case IB_WR_RDMA_READ:
qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
break;
+ case IB_WR_TID_RDMA_READ:
+ qp->s_state = TID_OP(READ_RESP);
+ break;
+
default:
/*
* This case shouldn't happen since its only
@@ -1022,6 +1543,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
qp->s_state = OP(SEND_LAST);
}
done:
+ priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
qp->s_psn = psn;
/*
* Set RVT_S_WAIT_PSN as rc_complete() may start the timer
@@ -1031,27 +1553,57 @@ done:
if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
(cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
qp->s_flags |= RVT_S_WAIT_PSN;
- qp->s_flags &= ~RVT_S_AHG_VALID;
+ qp->s_flags &= ~HFI1_S_AHG_VALID;
+ trace_hfi1_sender_reset_psn(qp);
}
/*
* Back up requester to resend the last un-ACKed request.
* The QP r_lock and s_lock should be held and interrupts disabled.
*/
-static void restart_rc(struct rvt_qp *qp, u32 psn, int wait)
+void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
{
+ struct hfi1_qp_priv *priv = qp->priv;
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
struct hfi1_ibport *ibp;
lockdep_assert_held(&qp->r_lock);
lockdep_assert_held(&qp->s_lock);
+ trace_hfi1_sender_restart_rc(qp);
if (qp->s_retry == 0) {
if (qp->s_mig_state == IB_MIG_ARMED) {
hfi1_migrate_qp(qp);
qp->s_retry = qp->s_retry_cnt;
} else if (qp->s_last == qp->s_acked) {
- hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
- rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ /*
+ * We need special handling for the OPFN request WQEs as
+ * they are not allowed to generate real user errors
+ */
+ if (wqe->wr.opcode == IB_WR_OPFN) {
+ struct hfi1_ibport *ibp =
+ to_iport(qp->ibqp.device, qp->port_num);
+ /*
+ * Call opfn_conn_reply() with capcode and
+ * remaining data as 0 to close out the
+ * current request
+ */
+ opfn_conn_reply(qp, priv->opfn.curr);
+ wqe = do_rc_completion(qp, wqe, ibp);
+ qp->s_flags &= ~RVT_S_WAIT_ACK;
+ } else {
+ trace_hfi1_tid_write_sender_restart_rc(qp, 0);
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
+ struct tid_rdma_request *req;
+
+ req = wqe_to_tid_req(wqe);
+ hfi1_kern_exp_rcv_clear_all(req);
+ hfi1_kern_clear_hw_flow(priv->rcd, qp);
+ }
+
+ hfi1_trdma_send_complete(qp, wqe,
+ IB_WC_RETRY_EXC_ERR);
+ rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ }
return;
} else { /* need to handle delayed completion */
return;
@@ -1061,60 +1613,24 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait)
}
ibp = to_iport(qp->ibqp.device, qp->port_num);
- if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ if (wqe->wr.opcode == IB_WR_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_TID_RDMA_READ)
ibp->rvp.n_rc_resends++;
else
ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
- RVT_S_WAIT_ACK);
+ RVT_S_WAIT_ACK | HFI1_S_WAIT_TID_RESP);
if (wait)
qp->s_flags |= RVT_S_SEND_ONE;
reset_psn(qp, psn);
}
/*
- * This is called from s_timer for missing responses.
- */
-void hfi1_rc_timeout(unsigned long arg)
-{
- struct rvt_qp *qp = (struct rvt_qp *)arg;
- struct hfi1_ibport *ibp;
- unsigned long flags;
-
- spin_lock_irqsave(&qp->r_lock, flags);
- spin_lock(&qp->s_lock);
- if (qp->s_flags & RVT_S_TIMER) {
- ibp = to_iport(qp->ibqp.device, qp->port_num);
- ibp->rvp.n_rc_timeouts++;
- qp->s_flags &= ~RVT_S_TIMER;
- del_timer(&qp->s_timer);
- trace_hfi1_timeout(qp, qp->s_last_psn + 1);
- restart_rc(qp, qp->s_last_psn + 1, 1);
- hfi1_schedule_send(qp);
- }
- spin_unlock(&qp->s_lock);
- spin_unlock_irqrestore(&qp->r_lock, flags);
-}
-
-/*
- * This is called from s_timer for RNR timeouts.
- */
-void hfi1_rc_rnr_retry(unsigned long arg)
-{
- struct rvt_qp *qp = (struct rvt_qp *)arg;
- unsigned long flags;
-
- spin_lock_irqsave(&qp->s_lock, flags);
- hfi1_stop_rnr_timer(qp);
- hfi1_schedule_send(qp);
- spin_unlock_irqrestore(&qp->s_lock, flags);
-}
-
-/*
* Set qp->s_sending_psn to the next PSN after the given one.
- * This would be psn+1 except when RDMA reads are present.
+ * This would be psn+1 except when RDMA reads or TID RDMA ops
+ * are present.
*/
static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
{
@@ -1126,7 +1642,9 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
for (;;) {
wqe = rvt_get_swqe_ptr(qp, n);
if (cmp_psn(psn, wqe->lpsn) <= 0) {
- if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ if (wqe->wr.opcode == IB_WR_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
qp->s_sending_psn = wqe->lpsn + 1;
else
qp->s_sending_psn = psn + 1;
@@ -1139,62 +1657,145 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
}
}
+/**
+ * hfi1_rc_verbs_aborted - handle abort status
+ * @qp: the QP
+ * @opah: the opa header
+ *
+ * This code modifies both ACK bit in BTH[2]
+ * and the s_flags to go into send one mode.
+ *
+ * This serves to throttle the send engine to only
+ * send a single packet in the likely case the
+ * a link has gone down.
+ */
+void hfi1_rc_verbs_aborted(struct rvt_qp *qp, struct hfi1_opa_header *opah)
+{
+ struct ib_other_headers *ohdr = hfi1_get_rc_ohdr(opah);
+ u8 opcode = ib_bth_get_opcode(ohdr);
+ u32 psn;
+
+ /* ignore responses */
+ if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
+ opcode <= OP(ATOMIC_ACKNOWLEDGE)) ||
+ opcode == TID_OP(READ_RESP) ||
+ opcode == TID_OP(WRITE_RESP))
+ return;
+
+ psn = ib_bth_get_psn(ohdr) | IB_BTH_REQ_ACK;
+ ohdr->bth[2] = cpu_to_be32(psn);
+ qp->s_flags |= RVT_S_SEND_ONE;
+}
+
/*
* This should be called with the QP s_lock held and interrupts disabled.
*/
-void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
{
struct ib_other_headers *ohdr;
+ struct hfi1_qp_priv *priv = qp->priv;
struct rvt_swqe *wqe;
- u32 opcode;
+ u32 opcode, head, tail;
u32 psn;
+ struct tid_rdma_request *req;
lockdep_assert_held(&qp->s_lock);
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK))
return;
- /* Find out where the BTH is */
- if ((be16_to_cpu(hdr->lrh[0]) & 3) == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth;
- else
- ohdr = &hdr->u.l.oth;
-
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
- if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
- opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
+ ohdr = hfi1_get_rc_ohdr(opah);
+ opcode = ib_bth_get_opcode(ohdr);
+ if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
+ opcode <= OP(ATOMIC_ACKNOWLEDGE)) ||
+ opcode == TID_OP(READ_RESP) ||
+ opcode == TID_OP(WRITE_RESP)) {
WARN_ON(!qp->s_rdma_ack_cnt);
qp->s_rdma_ack_cnt--;
return;
}
- psn = be32_to_cpu(ohdr->bth[2]);
- reset_sending_psn(qp, psn);
+ psn = ib_bth_get_psn(ohdr);
+ /*
+ * Don't attempt to reset the sending PSN for packets in the
+ * KDETH PSN space since the PSN does not match anything.
+ */
+ if (opcode != TID_OP(WRITE_DATA) &&
+ opcode != TID_OP(WRITE_DATA_LAST) &&
+ opcode != TID_OP(ACK) && opcode != TID_OP(RESYNC))
+ reset_sending_psn(qp, psn);
+
+ /* Handle TID RDMA WRITE packets differently */
+ if (opcode >= TID_OP(WRITE_REQ) &&
+ opcode <= TID_OP(WRITE_DATA_LAST)) {
+ head = priv->s_tid_head;
+ tail = priv->s_tid_cur;
+ /*
+ * s_tid_cur is set to s_tid_head in the case, where
+ * a new TID RDMA request is being started and all
+ * previous ones have been completed.
+ * Therefore, we need to do a secondary check in order
+ * to properly determine whether we should start the
+ * RC timer.
+ */
+ wqe = rvt_get_swqe_ptr(qp, tail);
+ req = wqe_to_tid_req(wqe);
+ if (head == tail && req->comp_seg < req->total_segs) {
+ if (tail == 0)
+ tail = qp->s_size - 1;
+ else
+ tail -= 1;
+ }
+ } else {
+ head = qp->s_tail;
+ tail = qp->s_acked;
+ }
/*
* Start timer after a packet requesting an ACK has been sent and
* there are still requests that haven't been acked.
*/
- if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
+ if ((psn & IB_BTH_REQ_ACK) && tail != head &&
+ opcode != TID_OP(WRITE_DATA) && opcode != TID_OP(WRITE_DATA_LAST) &&
+ opcode != TID_OP(RESYNC) &&
!(qp->s_flags &
- (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
- (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
- hfi1_add_retry_timer(qp);
+ (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
+ (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+ if (opcode == TID_OP(READ_REQ))
+ rvt_add_retry_timer_ext(qp, priv->timeout_shift);
+ else
+ rvt_add_retry_timer(qp);
+ }
- while (qp->s_last != qp->s_acked) {
- u32 s_last;
+ /* Start TID RDMA ACK timer */
+ if ((opcode == TID_OP(WRITE_DATA) ||
+ opcode == TID_OP(WRITE_DATA_LAST) ||
+ opcode == TID_OP(RESYNC)) &&
+ (psn & IB_BTH_REQ_ACK) &&
+ !(priv->s_flags & HFI1_S_TID_RETRY_TIMER) &&
+ (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+ /*
+ * The TID RDMA ACK packet could be received before this
+ * function is called. Therefore, add the timer only if TID
+ * RDMA ACK packets are actually pending.
+ */
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
+ req = wqe_to_tid_req(wqe);
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
+ req->ack_seg < req->cur_seg)
+ hfi1_add_tid_retry_timer(qp);
+ }
+ while (qp->s_last != qp->s_acked) {
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
- s_last = qp->s_last;
- if (++s_last >= qp->s_size)
- s_last = 0;
- qp->s_last = s_last;
- /* see post_send() */
- barrier();
- rvt_put_swqe(wqe);
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ trdma_clean_swqe(qp, wqe);
+ trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
+ rvt_qp_complete_swqe(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before re-sending,
@@ -1220,28 +1821,27 @@ static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
* This is similar to hfi1_send_complete but has to check to be sure
* that the SGEs are not being referenced if the SWQE is being resent.
*/
-static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
- struct rvt_swqe *wqe,
- struct hfi1_ibport *ibp)
+struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
+ struct rvt_swqe *wqe,
+ struct hfi1_ibport *ibp)
{
+ struct hfi1_qp_priv *priv = qp->priv;
+
lockdep_assert_held(&qp->s_lock);
/*
* Don't decrement refcount and don't generate a
* completion if the SWQE is being resent until the send
* is finished.
*/
+ trace_hfi1_rc_completion(qp, wqe->lpsn);
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
- u32 s_last;
-
- rvt_put_swqe(wqe);
- s_last = qp->s_last;
- if (++s_last >= qp->s_size)
- s_last = 0;
- qp->s_last = s_last;
- /* see post_send() */
- barrier();
- rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
+ trdma_clean_swqe(qp, wqe);
+ trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
+ rvt_qp_complete_swqe(qp,
+ wqe,
+ ib_hfi1_wc_opcode[wqe->wr.opcode],
+ IB_WC_SUCCESS);
} else {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -1252,17 +1852,27 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
*/
if (ppd->dd->flags & HFI1_HAS_SEND_DMA) {
struct sdma_engine *engine;
+ u8 sl = rdma_ah_get_sl(&qp->remote_ah_attr);
u8 sc5;
/* For now use sc to find engine */
- sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
+ sc5 = ibp->sl_to_sc[sl];
engine = qp_to_sdma_engine(qp, sc5);
sdma_engine_progress_schedule(engine);
}
}
qp->s_retry = qp->s_retry_cnt;
- update_last_psn(qp, wqe->lpsn);
+ /*
+ * Don't update the last PSN if the request being completed is
+ * a TID RDMA WRITE request.
+ * Completion of the TID RDMA WRITE requests are done by the
+ * TID RDMA ACKs and as such could be for a request that has
+ * already been ACKed as far as the IB state machine is
+ * concerned.
+ */
+ if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
+ update_last_psn(qp, wqe->lpsn);
/*
* If we are completing a request which is in the process of
@@ -1285,10 +1895,62 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
qp->s_draining = 0;
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
}
+ if (priv->s_flags & HFI1_S_TID_WAIT_INTERLCK) {
+ priv->s_flags &= ~HFI1_S_TID_WAIT_INTERLCK;
+ hfi1_schedule_send(qp);
+ }
return wqe;
}
+static void set_restart_qp(struct rvt_qp *qp, struct hfi1_ctxtdata *rcd)
+{
+ /* Retry this request. */
+ if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
+ qp->r_flags |= RVT_R_RDMAR_SEQ;
+ hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= RVT_R_RSP_SEND;
+ rvt_get_qp(qp);
+ list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+ }
+ }
+}
+
/**
+ * update_qp_retry_state - Update qp retry state.
+ * @qp: the QP
+ * @psn: the packet sequence number of the TID RDMA WRITE RESP.
+ * @spsn: The start psn for the given TID RDMA WRITE swqe.
+ * @lpsn: The last psn for the given TID RDMA WRITE swqe.
+ *
+ * This function is called to update the qp retry state upon
+ * receiving a TID WRITE RESP after the qp is scheduled to retry
+ * a request.
+ */
+static void update_qp_retry_state(struct rvt_qp *qp, u32 psn, u32 spsn,
+ u32 lpsn)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+
+ qp->s_psn = psn + 1;
+ /*
+ * If this is the first TID RDMA WRITE RESP packet for the current
+ * request, change the s_state so that the retry will be processed
+ * correctly. Similarly, if this is the last TID RDMA WRITE RESP
+ * packet, change the s_state and advance the s_cur.
+ */
+ if (cmp_psn(psn, lpsn) >= 0) {
+ qp->s_cur = qpriv->s_tid_cur + 1;
+ if (qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ qp->s_state = TID_OP(WRITE_REQ);
+ } else if (!cmp_psn(psn, spsn)) {
+ qp->s_cur = qpriv->s_tid_cur;
+ qp->s_state = TID_OP(WRITE_RESP);
+ }
+}
+
+/*
* do_rc_ack - process an incoming RC ACK
* @qp: the QP the ACK came in on
* @psn: the packet sequence number of the ACK
@@ -1299,16 +1961,17 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
* May be called at interrupt level, with the QP s_lock held.
* Returns 1 if OK, 0 if current operation should be aborted (NAK).
*/
-static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
- u64 val, struct hfi1_ctxtdata *rcd)
+int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
+ u64 val, struct hfi1_ctxtdata *rcd)
{
struct hfi1_ibport *ibp;
enum ib_wc_status status;
+ struct hfi1_qp_priv *qpriv = qp->priv;
struct rvt_swqe *wqe;
int ret = 0;
u32 ack_psn;
int diff;
- unsigned long to;
+ struct rvt_dev_info *rdi;
lockdep_assert_held(&qp->s_lock);
/*
@@ -1318,10 +1981,10 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
* request but will include an ACK'ed request(s).
*/
ack_psn = psn;
- if (aeth >> 29)
+ if (aeth >> IB_AETH_NAK_SHIFT)
ack_psn--;
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
- ibp = to_iport(qp->ibqp.device, qp->port_num);
+ ibp = rcd_to_iport(rcd);
/*
* The MSN might be for a later WQE than the PSN indicates so
@@ -1351,20 +2014,14 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
*/
if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
(opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
+ (wqe->wr.opcode == IB_WR_TID_RDMA_READ &&
+ (opcode != TID_OP(READ_RESP) || diff != 0)) ||
((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
- (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
- /* Retry this request. */
- if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
- qp->r_flags |= RVT_R_RDMAR_SEQ;
- restart_rc(qp, qp->s_last_psn + 1, 0);
- if (list_empty(&qp->rspwait)) {
- qp->r_flags |= RVT_R_RSP_SEND;
- rvt_get_qp(qp);
- list_add_tail(&qp->rspwait,
- &rcd->qp_wait_list);
- }
- }
+ (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0)) ||
+ (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
+ (delta_psn(psn, qp->s_last_psn) != 1))) {
+ set_restart_qp(qp, rcd);
/*
* No need to process the ACK/NAK since we are
* restarting an earlier request.
@@ -1376,6 +2033,9 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
u64 *vaddr = wqe->sg_list[0].vaddr;
*vaddr = val;
}
+ if (wqe->wr.opcode == IB_WR_OPFN)
+ opfn_conn_reply(qp, val);
+
if (qp->s_num_rd_atomic &&
(wqe->wr.opcode == IB_WR_RDMA_READ ||
wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
@@ -1393,29 +2053,88 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
hfi1_schedule_send(qp);
}
}
+
+ /*
+ * TID RDMA WRITE requests will be completed by the TID RDMA
+ * ACK packet handler (see tid_rdma.c).
+ */
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
+ break;
+
wqe = do_rc_completion(qp, wqe, ibp);
if (qp->s_acked == qp->s_tail)
break;
}
- switch (aeth >> 29) {
+ trace_hfi1_rc_ack_do(qp, aeth, psn, wqe);
+ trace_hfi1_sender_do_rc_ack(qp);
+ switch (aeth >> IB_AETH_NAK_SHIFT) {
case 0: /* ACK */
this_cpu_inc(*ibp->rvp.rc_acks);
- if (qp->s_acked != qp->s_tail) {
- /*
- * We are expecting more ACKs so
- * mod the retry timer.
- */
- hfi1_mod_retry_timer(qp);
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
+ if (wqe_to_tid_req(wqe)->ack_pending)
+ rvt_mod_retry_timer_ext(qp,
+ qpriv->timeout_shift);
+ else
+ rvt_stop_rc_timers(qp);
+ } else if (qp->s_acked != qp->s_tail) {
+ struct rvt_swqe *__w = NULL;
+
+ if (qpriv->s_tid_cur != HFI1_QP_WQE_INVALID)
+ __w = rvt_get_swqe_ptr(qp, qpriv->s_tid_cur);
+
/*
- * We can stop re-sending the earlier packets and
- * continue with the next packet the receiver wants.
+ * Stop timers if we've received all of the TID RDMA
+ * WRITE * responses.
*/
- if (cmp_psn(qp->s_psn, psn) <= 0)
- reset_psn(qp, psn + 1);
+ if (__w && __w->wr.opcode == IB_WR_TID_RDMA_WRITE &&
+ opcode == TID_OP(WRITE_RESP)) {
+ /*
+ * Normally, the loop above would correctly
+ * process all WQEs from s_acked onward and
+ * either complete them or check for correct
+ * PSN sequencing.
+ * However, for TID RDMA, due to pipelining,
+ * the response may not be for the request at
+ * s_acked so the above look would just be
+ * skipped. This does not allow for checking
+ * the PSN sequencing. It has to be done
+ * separately.
+ */
+ if (cmp_psn(psn, qp->s_last_psn + 1)) {
+ set_restart_qp(qp, rcd);
+ goto bail_stop;
+ }
+ /*
+ * If the psn is being resent, stop the
+ * resending.
+ */
+ if (qp->s_cur != qp->s_tail &&
+ cmp_psn(qp->s_psn, psn) <= 0)
+ update_qp_retry_state(qp, psn,
+ __w->psn,
+ __w->lpsn);
+ else if (--qpriv->pending_tid_w_resp)
+ rvt_mod_retry_timer(qp);
+ else
+ rvt_stop_rc_timers(qp);
+ } else {
+ /*
+ * We are expecting more ACKs so
+ * mod the retry timer.
+ */
+ rvt_mod_retry_timer(qp);
+ /*
+ * We can stop re-sending the earlier packets
+ * and continue with the next packet the
+ * receiver wants.
+ */
+ if (cmp_psn(qp->s_psn, psn) <= 0)
+ reset_psn(qp, psn + 1);
+ }
} else {
/* No more acks - kill all timers */
- hfi1_stop_rc_timers(qp);
+ rvt_stop_rc_timers(qp);
if (cmp_psn(qp->s_psn, psn) <= 0) {
qp->s_state = OP(SEND_LAST);
qp->s_psn = psn + 1;
@@ -1425,9 +2144,18 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
qp->s_flags &= ~RVT_S_WAIT_ACK;
hfi1_schedule_send(qp);
}
- hfi1_get_credit(qp, aeth);
+ rvt_get_credit(qp, aeth);
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
qp->s_retry = qp->s_retry_cnt;
+ /*
+ * If the current request is a TID RDMA WRITE request and the
+ * response is not a TID RDMA WRITE RESP packet, s_last_psn
+ * can't be advanced.
+ */
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
+ opcode != TID_OP(WRITE_RESP) &&
+ cmp_psn(psn, wqe->psn) >= 0)
+ return 1;
update_last_psn(qp, psn);
return 1;
@@ -1437,26 +2165,34 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
goto bail_stop;
if (qp->s_flags & RVT_S_WAIT_RNR)
goto bail_stop;
- if (qp->s_rnr_retry == 0) {
- status = IB_WC_RNR_RETRY_EXC_ERR;
- goto class_b;
+ rdi = ib_to_rvt(qp->ibqp.device);
+ if (!(rdi->post_parms[wqe->wr.opcode].flags &
+ RVT_OPERATION_IGN_RNR_CNT)) {
+ if (qp->s_rnr_retry == 0) {
+ status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto class_b;
+ }
+ if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
+ qp->s_rnr_retry--;
}
- if (qp->s_rnr_retry_cnt < 7)
- qp->s_rnr_retry--;
- /* The last valid PSN is the previous PSN. */
- update_last_psn(qp, psn - 1);
+ /*
+ * The last valid PSN is the previous PSN. For TID RDMA WRITE
+ * request, s_last_psn should be incremented only when a TID
+ * RDMA WRITE RESP is received to avoid skipping lost TID RDMA
+ * WRITE RESP packets.
+ */
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
+ reset_psn(qp, qp->s_last_psn + 1);
+ } else {
+ update_last_psn(qp, psn - 1);
+ reset_psn(qp, psn);
+ }
ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
-
- reset_psn(qp, psn);
-
qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
- hfi1_stop_rc_timers(qp);
- to =
- ib_hfi1_rnr_table[(aeth >> HFI1_AETH_CREDIT_SHIFT) &
- HFI1_AETH_CREDIT_MASK];
- hfi1_add_rnr_timer(qp, to);
+ rvt_stop_rc_timers(qp);
+ rvt_add_rnr_timer(qp, aeth);
return 0;
case 3: /* NAK */
@@ -1464,8 +2200,8 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
goto bail_stop;
/* The last valid PSN is the previous PSN. */
update_last_psn(qp, psn - 1);
- switch ((aeth >> HFI1_AETH_CREDIT_SHIFT) &
- HFI1_AETH_CREDIT_MASK) {
+ switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
+ IB_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
ibp->rvp.n_seq_naks++;
/*
@@ -1474,7 +2210,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
* RDMA READ response which terminates the RDMA
* READ.
*/
- restart_rc(qp, psn, 0);
+ hfi1_restart_rc(qp, psn, 0);
hfi1_schedule_send(qp);
break;
@@ -1493,7 +2229,10 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
ibp->rvp.n_other_naks++;
class_b:
if (qp->s_last == qp->s_acked) {
- hfi1_send_complete(qp, wqe, status);
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
+ hfi1_kern_read_tid_flow_free(qp);
+
+ hfi1_trdma_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
break;
@@ -1513,7 +2252,7 @@ reserved:
}
/* cannot be reached */
bail_stop:
- hfi1_stop_rc_timers(qp);
+ rvt_stop_rc_timers(qp);
return ret;
}
@@ -1528,12 +2267,14 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
lockdep_assert_held(&qp->s_lock);
/* Remove QP from retry timer */
- hfi1_stop_rc_timers(qp);
+ rvt_stop_rc_timers(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
while (cmp_psn(psn, wqe->lpsn) > 0) {
if (wqe->wr.opcode == IB_WR_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_TID_RDMA_WRITE ||
wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
break;
@@ -1542,7 +2283,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
ibp->rvp.n_rdma_seq++;
qp->r_flags |= RVT_R_RDMAR_SEQ;
- restart_rc(qp, qp->s_last_psn + 1, 0);
+ hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
rvt_get_qp(qp);
@@ -1552,41 +2293,38 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
/**
* rc_rcv_resp - process an incoming RC response packet
- * @ibp: the port this packet came in on
- * @ohdr: the other headers for this packet
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP for this packet
- * @opcode: the opcode for this packet
- * @psn: the packet sequence number for this packet
- * @hdrsize: the header length
- * @pmtu: the path MTU
+ * @packet: data packet information
*
* This is called from hfi1_rc_rcv() to process an incoming RC response
* packet for the given QP.
* Called at interrupt level.
*/
-static void rc_rcv_resp(struct hfi1_ibport *ibp,
- struct ib_other_headers *ohdr,
- void *data, u32 tlen, struct rvt_qp *qp,
- u32 opcode, u32 psn, u32 hdrsize, u32 pmtu,
- struct hfi1_ctxtdata *rcd)
+static void rc_rcv_resp(struct hfi1_packet *packet)
{
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ void *data = packet->payload;
+ u32 tlen = packet->tlen;
+ struct rvt_qp *qp = packet->qp;
+ struct hfi1_ibport *ibp;
+ struct ib_other_headers *ohdr = packet->ohdr;
struct rvt_swqe *wqe;
enum ib_wc_status status;
unsigned long flags;
int diff;
- u32 pad;
- u32 aeth;
u64 val;
+ u32 aeth;
+ u32 psn = ib_bth_get_psn(packet->ohdr);
+ u32 pmtu = qp->pmtu;
+ u16 hdrsize = packet->hlen;
+ u8 opcode = packet->opcode;
+ u8 pad = packet->pad;
+ u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
spin_lock_irqsave(&qp->s_lock, flags);
-
trace_hfi1_ack(qp, psn);
/* Ignore invalid responses. */
- smp_read_barrier_depends(); /* see post_one_send */
- if (cmp_psn(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
+ if (cmp_psn(psn, READ_ONCE(qp->s_next_psn)) >= 0)
goto ack_done;
/* Ignore duplicate responses. */
@@ -1595,8 +2333,8 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
/* Update credits for "ghost" ACKs */
if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
aeth = be32_to_cpu(ohdr->u.aeth);
- if ((aeth >> 29) == 0)
- hfi1_get_credit(qp, aeth);
+ if ((aeth >> IB_AETH_NAK_SHIFT) == 0)
+ rvt_get_credit(qp, aeth);
}
goto ack_done;
}
@@ -1647,7 +2385,7 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
goto ack_op_err;
read_middle:
- if (unlikely(tlen != (hdrsize + pmtu + 4)))
+ if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
goto ack_len_err;
if (unlikely(pmtu >= qp->s_rdma_read_len))
goto ack_len_err;
@@ -1656,8 +2394,7 @@ read_middle:
* We got a response so update the timeout.
* 4.096 usec. * (1 << qp->timeout)
*/
- qp->s_flags |= RVT_S_TIMER;
- mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
+ rvt_mod_retry_timer(qp);
if (qp->s_flags & RVT_S_WAIT_ACK) {
qp->s_flags &= ~RVT_S_WAIT_ACK;
hfi1_schedule_send(qp);
@@ -1673,20 +2410,19 @@ read_middle:
qp->s_rdma_read_len -= pmtu;
update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
- hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0, 0);
+ rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+ data, pmtu, false, false);
goto bail;
case OP(RDMA_READ_RESPONSE_ONLY):
aeth = be32_to_cpu(ohdr->u.aeth);
if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
goto ack_done;
- /* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/*
* Check that the data size is >= 0 && <= pmtu.
* Remember to account for ICRC (4).
*/
- if (unlikely(tlen < (hdrsize + pad + 4)))
+ if (unlikely(tlen < (hdrsize + extra_bytes)))
goto ack_len_err;
/*
* If this is a response to a resent RDMA read, we
@@ -1704,20 +2440,19 @@ read_middle:
goto ack_seq_err;
if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
goto ack_op_err;
- /* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/*
* Check that the data size is >= 1 && <= pmtu.
* Remember to account for ICRC (4).
*/
- if (unlikely(tlen <= (hdrsize + pad + 4)))
+ if (unlikely(tlen <= (hdrsize + extra_bytes)))
goto ack_len_err;
read_last:
- tlen -= hdrsize + pad + 4;
+ tlen -= hdrsize + extra_bytes;
if (unlikely(tlen != qp->s_rdma_read_len))
goto ack_len_err;
aeth = be32_to_cpu(ohdr->u.aeth);
- hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0, 0);
+ rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+ data, tlen, false, false);
WARN_ON(qp->s_rdma_read_sge.num_sge);
(void)do_rc_ack(qp, aeth, psn,
OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
@@ -1729,6 +2464,7 @@ ack_op_err:
goto ack_err;
ack_seq_err:
+ ibp = rcd_to_iport(rcd);
rdma_seq_err(qp, ibp, psn, rcd);
goto ack_done;
@@ -1736,7 +2472,7 @@ ack_len_err:
status = IB_WC_LOC_LEN_ERR;
ack_err:
if (qp->s_last == qp->s_acked) {
- hfi1_send_complete(qp, wqe, status);
+ rvt_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
ack_done:
@@ -1745,21 +2481,9 @@ bail:
return;
}
-static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
- struct rvt_qp *qp)
-{
- if (list_empty(&qp->rspwait)) {
- qp->r_flags |= RVT_R_RSP_NAK;
- rvt_get_qp(qp);
- list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
- }
-}
-
static inline void rc_cancel_ack(struct rvt_qp *qp)
{
- struct hfi1_qp_priv *priv = qp->priv;
-
- priv->r_adefered = 0;
+ qp->r_adefered = 0;
if (list_empty(&qp->rspwait))
return;
list_del_init(&qp->rspwait);
@@ -1775,6 +2499,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp)
* @opcode: the opcode for this packet
* @psn: the packet sequence number for this packet
* @diff: the difference between the PSN and the expected PSN
+ * @rcd: the receive context
*
* This is called from hfi1_rc_rcv() to process an unexpected
* incoming RC packet for the given QP.
@@ -1786,11 +2511,12 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
struct rvt_qp *qp, u32 opcode, u32 psn,
int diff, struct hfi1_ctxtdata *rcd)
{
- struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct rvt_ack_entry *e;
unsigned long flags;
- u8 i, prev;
- int old_req;
+ u8 prev;
+ u8 mra; /* most recent ACK */
+ bool old_req;
trace_hfi1_rcv_error(qp, psn);
if (diff > 0) {
@@ -1831,34 +2557,13 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
* to be sent before sending this one.
*/
e = NULL;
- old_req = 1;
+ old_req = true;
ibp->rvp.n_rc_dupreq++;
spin_lock_irqsave(&qp->s_lock, flags);
- for (i = qp->r_head_ack_queue; ; i = prev) {
- if (i == qp->s_tail_ack_queue)
- old_req = 0;
- if (i)
- prev = i - 1;
- else
- prev = HFI1_MAX_RDMA_ATOMIC;
- if (prev == qp->r_head_ack_queue) {
- e = NULL;
- break;
- }
- e = &qp->s_ack_queue[prev];
- if (!e->opcode) {
- e = NULL;
- break;
- }
- if (cmp_psn(psn, e->psn) >= 0) {
- if (prev == qp->s_tail_ack_queue &&
- cmp_psn(psn, e->lpsn) <= 0)
- old_req = 0;
- break;
- }
- }
+ e = find_prev_entry(qp, psn, &prev, &mra, &old_req);
+
switch (opcode) {
case OP(RDMA_READ_REQUEST): {
struct ib_reth *reth;
@@ -1884,10 +2589,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
len = be32_to_cpu(reth->length);
if (unlikely(offset + len != e->rdma_sge.sge_length))
goto unlock_done;
- if (e->rdma_sge.mr) {
- rvt_put_mr(e->rdma_sge.mr);
- e->rdma_sge.mr = NULL;
- }
+ release_rdma_sge_mr(e);
if (len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
u64 vaddr = get_ib_reth_vaddr(reth);
@@ -1905,6 +2607,8 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
e->psn = psn;
if (old_req)
goto unlock_done;
+ if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
+ qp->s_acked_ack_queue = prev;
qp->s_tail_ack_queue = prev;
break;
}
@@ -1918,6 +2622,8 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
*/
if (!e || e->opcode != (u8)opcode || old_req)
goto unlock_done;
+ if (qp->s_tail_ack_queue == qp->s_acked_ack_queue)
+ qp->s_acked_ack_queue = prev;
qp->s_tail_ack_queue = prev;
break;
}
@@ -1933,7 +2639,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
* Resend the most recent ACK if this request is
* after all the previous RDMA reads and atomics.
*/
- if (i == qp->r_head_ack_queue) {
+ if (mra == qp->r_head_ack_queue) {
spin_unlock_irqrestore(&qp->s_lock, flags);
qp->r_nak_state = 0;
qp->r_ack_psn = qp->r_psn - 1;
@@ -1944,7 +2650,9 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
* Resend the RDMA read or atomic op which
* ACKs this duplicate request.
*/
- qp->s_tail_ack_queue = i;
+ if (qp->s_tail_ack_queue == qp->s_acked_ack_queue)
+ qp->s_acked_ack_queue = mra;
+ qp->s_tail_ack_queue = mra;
break;
}
qp->s_ack_state = OP(ACKNOWLEDGE);
@@ -1961,36 +2669,6 @@ send_ack:
return 0;
}
-void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
-{
- unsigned long flags;
- int lastwqe;
-
- spin_lock_irqsave(&qp->s_lock, flags);
- lastwqe = rvt_error_qp(qp, err);
- spin_unlock_irqrestore(&qp->s_lock, flags);
-
- if (lastwqe) {
- struct ib_event ev;
-
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
-}
-
-static inline void update_ack_queue(struct rvt_qp *qp, unsigned n)
-{
- unsigned next;
-
- next = n + 1;
- if (next > HFI1_MAX_RDMA_ATOMIC)
- next = 0;
- qp->s_tail_ack_queue = next;
- qp->s_ack_state = OP(ACKNOWLEDGE);
-}
-
static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid,
u32 lqpn, u32 rqpn, u8 svc_type)
{
@@ -2014,12 +2692,12 @@ static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid,
cc_event->svc_type = svc_type;
cc_event->rlid = rlid;
/* keep timestamp in units of 1.024 usec */
- cc_event->timestamp = ktime_to_ns(ktime_get()) / 1024;
+ cc_event->timestamp = ktime_get_ns() / 1024;
spin_unlock_irqrestore(&ppd->cc_log_lock, flags);
}
-void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
+void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn,
u32 rqpn, u8 svc_type)
{
struct cca_timer *cca_timer;
@@ -2065,7 +2743,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
unsigned long nsec = 1024 * ccti_timer;
hrtimer_start(&cca_timer->hrtimer, ns_to_ktime(nsec),
- HRTIMER_MODE_REL);
+ HRTIMER_MODE_REL_PINNED);
}
spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
@@ -2076,12 +2754,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
/**
* hfi1_rc_rcv - process an incoming RC packet
- * @rcd: the context pointer
- * @hdr: the header of this packet
- * @rcv_flags: flags relevant to rcv processing
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP for this packet
+ * @packet: data packet information
*
* This is called from qp_rcv() to process an incoming RC packet
* for the given QP.
@@ -2090,35 +2763,33 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
void hfi1_rc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
- struct ib_header *hdr = packet->hdr;
- u32 rcv_flags = packet->rcv_flags;
- void *data = packet->ebuf;
+ void *data = packet->payload;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
- struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct ib_other_headers *ohdr = packet->ohdr;
- u32 bth0, opcode;
+ u32 opcode = packet->opcode;
u32 hdrsize = packet->hlen;
- u32 psn;
- u32 pad;
+ u32 psn = ib_bth_get_psn(packet->ohdr);
+ u32 pad = packet->pad;
struct ib_wc wc;
u32 pmtu = qp->pmtu;
int diff;
struct ib_reth *reth;
unsigned long flags;
- int ret, is_fecn = 0;
- int copy_last = 0;
+ int ret;
+ bool copy_last = false, fecn;
u32 rkey;
+ u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
lockdep_assert_held(&qp->r_lock);
- bth0 = be32_to_cpu(ohdr->bth[0]);
- if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
- return;
- is_fecn = process_ecn(qp, packet, false);
+ if (hfi1_ruc_check_hdr(ibp, packet))
+ return;
- psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ fecn = process_ecn(qp, packet);
+ opfn_trigger_conn_request(qp, be32_to_cpu(ohdr->bth[1]));
/*
* Process responses (ACKs) before anything else. Note that the
@@ -2128,10 +2799,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
*/
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
- rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn,
- hdrsize, pmtu, rcd);
- if (is_fecn)
- goto send_ack;
+ rc_rcv_resp(packet);
return;
}
@@ -2180,33 +2848,38 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
}
if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
- qp_comm_est(qp);
+ rvt_comm_est(qp);
/* OK, process the packet. */
switch (opcode) {
case OP(SEND_FIRST):
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto nack_op_err;
if (!ret)
goto rnr_nak;
qp->r_rcv_len = 0;
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
case OP(RDMA_WRITE_MIDDLE):
send_middle:
/* Check for invalid length PMTU or posted rwqe len. */
- if (unlikely(tlen != (hdrsize + pmtu + 4)))
+ /*
+ * There will be no padding for 9B packet but 16B packets
+ * will come in with some padding since we always add
+ * CRC and LT bytes which will need to be flit aligned
+ */
+ if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
goto nack_inv;
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto nack_inv;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
/* consume RWQE */
- ret = hfi1_rvt_get_rwqe(qp, 1);
+ ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto nack_op_err;
if (!ret)
@@ -2216,7 +2889,7 @@ send_middle:
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
case OP(SEND_ONLY_WITH_INVALIDATE):
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto nack_op_err;
if (!ret)
@@ -2226,7 +2899,7 @@ send_middle:
goto no_immediate_data;
if (opcode == OP(SEND_ONLY_WITH_INVALIDATE))
goto send_last_inv;
- /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */
+ fallthrough; /* for SEND_ONLY_WITH_IMMEDIATE */
case OP(SEND_LAST_WITH_IMMEDIATE):
send_last_imm:
wc.ex.imm_data = ohdr->u.imm_data;
@@ -2241,25 +2914,23 @@ send_last_inv:
wc.wc_flags = IB_WC_WITH_INVALIDATE;
goto send_last;
case OP(RDMA_WRITE_LAST):
- copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
- /* fall through */
+ copy_last = rvt_is_user_qp(qp);
+ fallthrough;
case OP(SEND_LAST):
no_immediate_data:
wc.wc_flags = 0;
wc.ex.imm_data = 0;
send_last:
- /* Get the number of bytes the message was padded by. */
- pad = (bth0 >> 20) & 3;
/* Check for invalid length. */
/* LAST len should be >= 1 */
- if (unlikely(tlen < (hdrsize + pad + 4)))
+ if (unlikely(tlen < (hdrsize + extra_bytes)))
goto nack_inv;
- /* Don't count the CRC. */
- tlen -= (hdrsize + pad + 4);
+ /* Don't count the CRC(and padding and LT byte for 16B). */
+ tlen -= (hdrsize + extra_bytes);
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, copy_last);
rvt_put_ss(&qp->r_sge);
qp->r_msn++;
if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
@@ -2273,7 +2944,7 @@ send_last:
wc.opcode = IB_WC_RECV;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
/*
* It seems that IB mandates the presence of an SL in a
* work completion only for the UD transport (see section
@@ -2285,20 +2956,19 @@ send_last:
*
* See also OPA Vol. 1, section 9.7.6, and table 9-17.
*/
- wc.sl = qp->remote_ah_attr.sl;
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- (bth0 & IB_BTH_SOLICITED) != 0);
+ rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
break;
case OP(RDMA_WRITE_ONLY):
- copy_last = 1;
- /* fall through */
+ copy_last = rvt_is_user_qp(qp);
+ fallthrough;
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
@@ -2330,11 +3000,14 @@ send_last:
goto send_middle;
else if (opcode == OP(RDMA_WRITE_ONLY))
goto no_immediate_data;
- ret = hfi1_rvt_get_rwqe(qp, 1);
+ ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto nack_op_err;
- if (!ret)
+ if (!ret) {
+ /* peer will send again */
+ rvt_put_ss(&qp->r_sge);
goto rnr_nak;
+ }
wc.ex.imm_data = ohdr->u.rc.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
goto send_last;
@@ -2347,20 +3020,17 @@ send_last:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
goto nack_inv;
next = qp->r_head_ack_queue + 1;
- /* s_ack_queue is size HFI1_MAX_RDMA_ATOMIC+1 so use > not >= */
- if (next > HFI1_MAX_RDMA_ATOMIC)
+ /* s_ack_queue is size rvt_size_atomic()+1 so use > not >= */
+ if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
next = 0;
spin_lock_irqsave(&qp->s_lock, flags);
- if (unlikely(next == qp->s_tail_ack_queue)) {
+ if (unlikely(next == qp->s_acked_ack_queue)) {
if (!qp->s_ack_queue[next].sent)
goto nack_inv_unlck;
update_ack_queue(qp, next);
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
- if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
- rvt_put_mr(e->rdma_sge.mr);
- e->rdma_sge.mr = NULL;
- }
+ release_rdma_sge_mr(e);
reth = &ohdr->u.rc.reth;
len = be32_to_cpu(reth->length);
if (len) {
@@ -2398,45 +3068,49 @@ send_last:
qp->r_state = opcode;
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
+ qpriv->r_tid_alloc = qp->r_head_ack_queue;
/* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
+ if (fecn)
+ qp->s_flags |= RVT_S_ECN;
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
- if (is_fecn)
- goto send_ack;
return;
}
case OP(COMPARE_SWAP):
case OP(FETCH_ADD): {
- struct ib_atomic_eth *ateth;
+ struct ib_atomic_eth *ateth = &ohdr->u.atomic_eth;
+ u64 vaddr = get_ib_ateth_vaddr(ateth);
+ bool opfn = opcode == OP(COMPARE_SWAP) &&
+ vaddr == HFI1_VERBS_E_ATOMIC_VADDR;
struct rvt_ack_entry *e;
- u64 vaddr;
atomic64_t *maddr;
u64 sdata;
u32 rkey;
u8 next;
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+ !opfn))
goto nack_inv;
next = qp->r_head_ack_queue + 1;
- if (next > HFI1_MAX_RDMA_ATOMIC)
+ if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
next = 0;
spin_lock_irqsave(&qp->s_lock, flags);
- if (unlikely(next == qp->s_tail_ack_queue)) {
+ if (unlikely(next == qp->s_acked_ack_queue)) {
if (!qp->s_ack_queue[next].sent)
goto nack_inv_unlck;
update_ack_queue(qp, next);
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
- if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
- rvt_put_mr(e->rdma_sge.mr);
- e->rdma_sge.mr = NULL;
+ release_rdma_sge_mr(e);
+ /* Process OPFN special virtual address */
+ if (opfn) {
+ opfn_conn_response(qp, e, ateth);
+ goto ack;
}
- ateth = &ohdr->u.atomic_eth;
- vaddr = get_ib_ateth_vaddr(ateth);
if (unlikely(vaddr & (sizeof(u64) - 1)))
goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
@@ -2455,6 +3129,7 @@ send_last:
sdata);
rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
+ack:
e->opcode = opcode;
e->sent = 0;
e->psn = psn;
@@ -2464,14 +3139,15 @@ send_last:
qp->r_state = opcode;
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
+ qpriv->r_tid_alloc = qp->r_head_ack_queue;
/* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
+ if (fecn)
+ qp->s_flags |= RVT_S_ECN;
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
- if (is_fecn)
- goto send_ack;
return;
}
@@ -2484,22 +3160,13 @@ send_last:
qp->r_ack_psn = psn;
qp->r_nak_state = 0;
/* Send an ACK if requested or required. */
- if (psn & IB_BTH_REQ_ACK) {
- struct hfi1_qp_priv *priv = qp->priv;
-
- if (packet->numpkt == 0) {
- rc_cancel_ack(qp);
- goto send_ack;
- }
- if (priv->r_adefered >= HFI1_PSN_CREDIT) {
+ if (psn & IB_BTH_REQ_ACK || fecn) {
+ if (packet->numpkt == 0 || fecn ||
+ qp->r_adefered >= HFI1_PSN_CREDIT) {
rc_cancel_ack(qp);
goto send_ack;
}
- if (unlikely(is_fecn)) {
- rc_cancel_ack(qp);
- goto send_ack;
- }
- priv->r_adefered++;
+ qp->r_adefered++;
rc_defered_ack(rcd, qp);
}
return;
@@ -2512,7 +3179,7 @@ rnr_nak:
return;
nack_op_err:
- hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
@@ -2522,7 +3189,7 @@ nack_op_err:
nack_inv_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_inv:
- hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
@@ -2532,37 +3199,28 @@ nack_inv:
nack_acc_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_acc:
- hfi1_rc_error(qp, IB_WC_LOC_PROT_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
send_ack:
- hfi1_send_rc_ack(rcd, qp, is_fecn);
+ hfi1_send_rc_ack(packet, fecn);
}
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
- struct ib_header *hdr,
- u32 rcv_flags,
+ struct hfi1_packet *packet,
struct rvt_qp *qp)
{
- int has_grh = rcv_flags & HFI1_HAS_GRH;
- struct ib_other_headers *ohdr;
- struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
int diff;
u32 opcode;
- u32 psn, bth0;
-
- /* Check for GRH */
- ohdr = &hdr->u.oth;
- if (has_grh)
- ohdr = &hdr->u.l.oth;
+ u32 psn;
- bth0 = be32_to_cpu(ohdr->bth[0]);
- if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0))
+ if (hfi1_ruc_check_hdr(ibp, packet))
return;
- psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ psn = ib_bth_get_psn(packet->ohdr);
+ opcode = ib_bth_get_opcode(packet->ohdr);
/* Only deal with RDMA Writes for now */
if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
diff --git a/drivers/infiniband/hw/hfi1/rc.h b/drivers/infiniband/hw/hfi1/rc.h
new file mode 100644
index 000000000000..5ed5e85d5841
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/rc.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+
+#ifndef HFI1_RC_H
+#define HFI1_RC_H
+
+/* cut down ridiculously long IB macro names */
+#define OP(x) IB_OPCODE_RC_##x
+
+static inline void update_ack_queue(struct rvt_qp *qp, unsigned int n)
+{
+ unsigned int next;
+
+ next = n + 1;
+ if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
+ next = 0;
+ qp->s_tail_ack_queue = next;
+ qp->s_acked_ack_queue = next;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+}
+
+static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
+ struct rvt_qp *qp)
+{
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= RVT_R_RSP_NAK;
+ rvt_get_qp(qp);
+ list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+ }
+}
+
+static inline u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
+ u32 psn, u32 pmtu)
+{
+ u32 len;
+
+ len = delta_psn(psn, wqe->psn) * pmtu;
+ return rvt_restart_sge(ss, wqe, len);
+}
+
+static inline void release_rdma_sge_mr(struct rvt_ack_entry *e)
+{
+ if (e->rdma_sge.mr) {
+ rvt_put_mr(e->rdma_sge.mr);
+ e->rdma_sge.mr = NULL;
+ }
+}
+
+struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev,
+ u8 *prev_ack, bool *scheduled);
+int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val,
+ struct hfi1_ctxtdata *rcd);
+struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ struct hfi1_ibport *ibp);
+
+#endif /* HFI1_RC_H */
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 717ed4b159d3..aafa4e03b179 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*/
#include <linux/spinlock.h>
@@ -53,192 +11,6 @@
#include "verbs_txreq.h"
#include "trace.h"
-/*
- * Convert the AETH RNR timeout code into the number of microseconds.
- */
-const u32 ib_hfi1_rnr_table[32] = {
- 655360, /* 00: 655.36 */
- 10, /* 01: .01 */
- 20, /* 02 .02 */
- 30, /* 03: .03 */
- 40, /* 04: .04 */
- 60, /* 05: .06 */
- 80, /* 06: .08 */
- 120, /* 07: .12 */
- 160, /* 08: .16 */
- 240, /* 09: .24 */
- 320, /* 0A: .32 */
- 480, /* 0B: .48 */
- 640, /* 0C: .64 */
- 960, /* 0D: .96 */
- 1280, /* 0E: 1.28 */
- 1920, /* 0F: 1.92 */
- 2560, /* 10: 2.56 */
- 3840, /* 11: 3.84 */
- 5120, /* 12: 5.12 */
- 7680, /* 13: 7.68 */
- 10240, /* 14: 10.24 */
- 15360, /* 15: 15.36 */
- 20480, /* 16: 20.48 */
- 30720, /* 17: 30.72 */
- 40960, /* 18: 40.96 */
- 61440, /* 19: 61.44 */
- 81920, /* 1A: 81.92 */
- 122880, /* 1B: 122.88 */
- 163840, /* 1C: 163.84 */
- 245760, /* 1D: 245.76 */
- 327680, /* 1E: 327.68 */
- 491520 /* 1F: 491.52 */
-};
-
-/*
- * Validate a RWQE and fill in the SGE state.
- * Return 1 if OK.
- */
-static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
-{
- int i, j, ret;
- struct ib_wc wc;
- struct rvt_lkey_table *rkt;
- struct rvt_pd *pd;
- struct rvt_sge_state *ss;
-
- rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
- pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
- ss = &qp->r_sge;
- ss->sg_list = qp->r_sg_list;
- qp->r_len = 0;
- for (i = j = 0; i < wqe->num_sge; i++) {
- if (wqe->sg_list[i].length == 0)
- continue;
- /* Check LKEY */
- if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
- &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
- goto bad_lkey;
- qp->r_len += wqe->sg_list[i].length;
- j++;
- }
- ss->num_sge = j;
- ss->total_len = qp->r_len;
- ret = 1;
- goto bail;
-
-bad_lkey:
- while (j) {
- struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
-
- rvt_put_mr(sge->mr);
- }
- ss->num_sge = 0;
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr_id;
- wc.status = IB_WC_LOC_PROT_ERR;
- wc.opcode = IB_WC_RECV;
- wc.qp = &qp->ibqp;
- /* Signal solicited completion event. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
- ret = 0;
-bail:
- return ret;
-}
-
-/**
- * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE
- * @qp: the QP
- * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
- *
- * Return -1 if there is a local error, 0 if no RWQE is available,
- * otherwise return 1.
- *
- * Can be called from interrupt level.
- */
-int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only)
-{
- unsigned long flags;
- struct rvt_rq *rq;
- struct rvt_rwq *wq;
- struct rvt_srq *srq;
- struct rvt_rwqe *wqe;
- void (*handler)(struct ib_event *, void *);
- u32 tail;
- int ret;
-
- if (qp->ibqp.srq) {
- srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
- handler = srq->ibsrq.event_handler;
- rq = &srq->rq;
- } else {
- srq = NULL;
- handler = NULL;
- rq = &qp->r_rq;
- }
-
- spin_lock_irqsave(&rq->lock, flags);
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
- ret = 0;
- goto unlock;
- }
-
- wq = rq->wq;
- tail = wq->tail;
- /* Validate tail before using it since it is user writable. */
- if (tail >= rq->size)
- tail = 0;
- if (unlikely(tail == wq->head)) {
- ret = 0;
- goto unlock;
- }
- /* Make sure entry is read after head index is read. */
- smp_rmb();
- wqe = rvt_get_rwqe_ptr(rq, tail);
- /*
- * Even though we update the tail index in memory, the verbs
- * consumer is not supposed to post more entries until a
- * completion is generated.
- */
- if (++tail >= rq->size)
- tail = 0;
- wq->tail = tail;
- if (!wr_id_only && !init_sge(qp, wqe)) {
- ret = -1;
- goto unlock;
- }
- qp->r_wr_id = wqe->wr_id;
-
- ret = 1;
- set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
- if (handler) {
- u32 n;
-
- /*
- * Validate head pointer value and compute
- * the number of remaining WQEs.
- */
- n = wq->head;
- if (n >= rq->size)
- n = 0;
- if (n < tail)
- n += rq->size - tail;
- else
- n -= tail;
- if (n < srq->limit) {
- struct ib_event ev;
-
- srq->limit = 0;
- spin_unlock_irqrestore(&rq->lock, flags);
- ev.device = qp->ibqp.device;
- ev.element.srq = qp->ibqp.srq;
- ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
- handler(&ev, srq->ibsrq.srq_context);
- goto bail;
- }
- }
-unlock:
- spin_unlock_irqrestore(&rq->lock, flags);
-bail:
- return ret;
-}
-
static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
{
return (gid->global.interface_id == id &&
@@ -252,417 +24,93 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
*
* The s_lock will be acquired around the hfi1_migrate_qp() call.
*/
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
- int has_grh, struct rvt_qp *qp, u32 bth0)
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet)
{
__be64 guid;
unsigned long flags;
- u8 sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
-
- if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
- if (!has_grh) {
- if (qp->alt_ah_attr.ah_flags & IB_AH_GRH)
- goto err;
+ struct rvt_qp *qp = packet->qp;
+ u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
+ u32 dlid = packet->dlid;
+ u32 slid = packet->slid;
+ u32 sl = packet->sl;
+ bool migrated = packet->migrated;
+ u16 pkey = packet->pkey;
+
+ if (qp->s_mig_state == IB_MIG_ARMED && migrated) {
+ if (!packet->grh) {
+ if ((rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
+ IB_AH_GRH) &&
+ (packet->etype != RHF_RCV_TYPE_BYPASS))
+ return 1;
} else {
- if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
- goto err;
- guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
- if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+ const struct ib_global_route *grh;
+
+ if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
+ IB_AH_GRH))
+ return 1;
+ grh = rdma_ah_read_grh(&qp->alt_ah_attr);
+ guid = get_sguid(ibp, grh->sgid_index);
+ if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix,
guid))
- goto err;
+ return 1;
if (!gid_ok(
- &hdr->u.l.grh.sgid,
- qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
- qp->alt_ah_attr.grh.dgid.global.interface_id))
- goto err;
+ &packet->grh->sgid,
+ grh->dgid.global.subnet_prefix,
+ grh->dgid.global.interface_id))
+ return 1;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
- sc5, be16_to_cpu(hdr->lrh[3])))) {
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
- (u16)bth0,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
- 0, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
- goto err;
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), pkey,
+ sc5, slid))) {
+ hfi1_bad_pkey(ibp, pkey, sl, 0, qp->ibqp.qp_num,
+ slid, dlid);
+ return 1;
}
/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
- ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
- goto err;
+ if (slid != rdma_ah_get_dlid(&qp->alt_ah_attr) ||
+ ppd_from_ibp(ibp)->port !=
+ rdma_ah_get_port_num(&qp->alt_ah_attr))
+ return 1;
spin_lock_irqsave(&qp->s_lock, flags);
hfi1_migrate_qp(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else {
- if (!has_grh) {
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
- goto err;
+ if (!packet->grh) {
+ if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH) &&
+ (packet->etype != RHF_RCV_TYPE_BYPASS))
+ return 1;
} else {
- if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH))
- goto err;
- guid = get_sguid(ibp,
- qp->remote_ah_attr.grh.sgid_index);
- if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+ const struct ib_global_route *grh;
+
+ if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
+ IB_AH_GRH))
+ return 1;
+ grh = rdma_ah_read_grh(&qp->remote_ah_attr);
+ guid = get_sguid(ibp, grh->sgid_index);
+ if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix,
guid))
- goto err;
+ return 1;
if (!gid_ok(
- &hdr->u.l.grh.sgid,
- qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
- qp->remote_ah_attr.grh.dgid.global.interface_id))
- goto err;
+ &packet->grh->sgid,
+ grh->dgid.global.subnet_prefix,
+ grh->dgid.global.interface_id))
+ return 1;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
- sc5, be16_to_cpu(hdr->lrh[3])))) {
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
- (u16)bth0,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
- 0, qp->ibqp.qp_num,
- be16_to_cpu(hdr->lrh[3]),
- be16_to_cpu(hdr->lrh[1]));
- goto err;
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), pkey,
+ sc5, slid))) {
+ hfi1_bad_pkey(ibp, pkey, sl, 0, qp->ibqp.qp_num,
+ slid, dlid);
+ return 1;
}
/* Validate the SLID. See Ch. 9.6.1.5 */
- if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid ||
+ if ((slid != rdma_ah_get_dlid(&qp->remote_ah_attr)) ||
ppd_from_ibp(ibp)->port != qp->port_num)
- goto err;
- if (qp->s_mig_state == IB_MIG_REARM &&
- !(bth0 & IB_BTH_MIG_REQ))
+ return 1;
+ if (qp->s_mig_state == IB_MIG_REARM && !migrated)
qp->s_mig_state = IB_MIG_ARMED;
}
return 0;
-
-err:
- return 1;
-}
-
-/**
- * ruc_loopback - handle UC and RC loopback requests
- * @sqp: the sending QP
- *
- * This is called from hfi1_do_send() to
- * forward a WQE addressed to the same HFI.
- * Note that although we are single threaded due to the send engine, we still
- * have to protect against post_send(). We don't have to worry about
- * receive interrupts since this is a connected protocol and all packets
- * will pass through here.
- */
-static void ruc_loopback(struct rvt_qp *sqp)
-{
- struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
- struct rvt_qp *qp;
- struct rvt_swqe *wqe;
- struct rvt_sge *sge;
- unsigned long flags;
- struct ib_wc wc;
- u64 sdata;
- atomic64_t *maddr;
- enum ib_wc_status send_status;
- int release;
- int ret;
- int copy_last = 0;
- u32 to;
- int local_ops = 0;
-
- rcu_read_lock();
-
- /*
- * Note that we check the responder QP state after
- * checking the requester's state.
- */
- qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
- sqp->remote_qpn);
-
- spin_lock_irqsave(&sqp->s_lock, flags);
-
- /* Return if we are already busy processing a work request. */
- if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
- !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
- goto unlock;
-
- sqp->s_flags |= RVT_S_BUSY;
-
-again:
- smp_read_barrier_depends(); /* see post_one_send() */
- if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
- goto clr_busy;
- wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
-
- /* Return if it is not OK to start a new work request. */
- if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
- if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
- goto clr_busy;
- /* We are in the error state, flush the work request. */
- send_status = IB_WC_WR_FLUSH_ERR;
- goto flush_send;
- }
-
- /*
- * We can rely on the entry not changing without the s_lock
- * being held until we update s_last.
- * We increment s_cur to indicate s_last is in progress.
- */
- if (sqp->s_last == sqp->s_cur) {
- if (++sqp->s_cur >= sqp->s_size)
- sqp->s_cur = 0;
- }
- spin_unlock_irqrestore(&sqp->s_lock, flags);
-
- if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
- qp->ibqp.qp_type != sqp->ibqp.qp_type) {
- ibp->rvp.n_pkt_drops++;
- /*
- * For RC, the requester would timeout and retry so
- * shortcut the timeouts and just signal too many retries.
- */
- if (sqp->ibqp.qp_type == IB_QPT_RC)
- send_status = IB_WC_RETRY_EXC_ERR;
- else
- send_status = IB_WC_SUCCESS;
- goto serr;
- }
-
- memset(&wc, 0, sizeof(wc));
- send_status = IB_WC_SUCCESS;
-
- release = 1;
- sqp->s_sge.sge = wqe->sg_list[0];
- sqp->s_sge.sg_list = wqe->sg_list + 1;
- sqp->s_sge.num_sge = wqe->wr.num_sge;
- sqp->s_len = wqe->length;
- switch (wqe->wr.opcode) {
- case IB_WR_REG_MR:
- goto send_comp;
-
- case IB_WR_LOCAL_INV:
- if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
- if (rvt_invalidate_rkey(sqp,
- wqe->wr.ex.invalidate_rkey))
- send_status = IB_WC_LOC_PROT_ERR;
- local_ops = 1;
- }
- goto send_comp;
-
- case IB_WR_SEND_WITH_INV:
- if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
- wc.wc_flags = IB_WC_WITH_INVALIDATE;
- wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
- }
- goto send;
-
- case IB_WR_SEND_WITH_IMM:
- wc.wc_flags = IB_WC_WITH_IMM;
- wc.ex.imm_data = wqe->wr.ex.imm_data;
- /* FALLTHROUGH */
- case IB_WR_SEND:
-send:
- ret = hfi1_rvt_get_rwqe(qp, 0);
- if (ret < 0)
- goto op_err;
- if (!ret)
- goto rnr_nak;
- break;
-
- case IB_WR_RDMA_WRITE_WITH_IMM:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
- goto inv_err;
- wc.wc_flags = IB_WC_WITH_IMM;
- wc.ex.imm_data = wqe->wr.ex.imm_data;
- ret = hfi1_rvt_get_rwqe(qp, 1);
- if (ret < 0)
- goto op_err;
- if (!ret)
- goto rnr_nak;
- /* skip copy_last set and qp_access_flags recheck */
- goto do_write;
- case IB_WR_RDMA_WRITE:
- copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
- goto inv_err;
-do_write:
- if (wqe->length == 0)
- break;
- if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
- wqe->rdma_wr.remote_addr,
- wqe->rdma_wr.rkey,
- IB_ACCESS_REMOTE_WRITE)))
- goto acc_err;
- qp->r_sge.sg_list = NULL;
- qp->r_sge.num_sge = 1;
- qp->r_sge.total_len = wqe->length;
- break;
-
- case IB_WR_RDMA_READ:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
- goto inv_err;
- if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
- wqe->rdma_wr.remote_addr,
- wqe->rdma_wr.rkey,
- IB_ACCESS_REMOTE_READ)))
- goto acc_err;
- release = 0;
- sqp->s_sge.sg_list = NULL;
- sqp->s_sge.num_sge = 1;
- qp->r_sge.sge = wqe->sg_list[0];
- qp->r_sge.sg_list = wqe->sg_list + 1;
- qp->r_sge.num_sge = wqe->wr.num_sge;
- qp->r_sge.total_len = wqe->length;
- break;
-
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
- goto inv_err;
- if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
- wqe->atomic_wr.remote_addr,
- wqe->atomic_wr.rkey,
- IB_ACCESS_REMOTE_ATOMIC)))
- goto acc_err;
- /* Perform atomic OP and save result. */
- maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
- sdata = wqe->atomic_wr.compare_add;
- *(u64 *)sqp->s_sge.sge.vaddr =
- (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
- (u64)atomic64_add_return(sdata, maddr) - sdata :
- (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
- sdata, wqe->atomic_wr.swap);
- rvt_put_mr(qp->r_sge.sge.mr);
- qp->r_sge.num_sge = 0;
- goto send_comp;
-
- default:
- send_status = IB_WC_LOC_QP_OP_ERR;
- goto serr;
- }
-
- sge = &sqp->s_sge.sge;
- while (sqp->s_len) {
- u32 len = sqp->s_len;
-
- if (len > sge->length)
- len = sge->length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- WARN_ON_ONCE(len == 0);
- hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (!release)
- rvt_put_mr(sge->mr);
- if (--sqp->s_sge.num_sge)
- *sge = *sqp->s_sge.sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
- sqp->s_len -= len;
- }
- if (release)
- rvt_put_ss(&qp->r_sge);
-
- if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
- goto send_comp;
-
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
- else
- wc.opcode = IB_WC_RECV;
- wc.wr_id = qp->r_wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
- wc.sl = qp->remote_ah_attr.sl;
- wc.port_num = 1;
- /* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- wqe->wr.send_flags & IB_SEND_SOLICITED);
-
-send_comp:
- spin_lock_irqsave(&sqp->s_lock, flags);
- ibp->rvp.n_loop_pkts++;
-flush_send:
- sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
- hfi1_send_complete(sqp, wqe, send_status);
- if (local_ops) {
- atomic_dec(&sqp->local_ops_pending);
- local_ops = 0;
- }
- goto again;
-
-rnr_nak:
- /* Handle RNR NAK */
- if (qp->ibqp.qp_type == IB_QPT_UC)
- goto send_comp;
- ibp->rvp.n_rnr_naks++;
- /*
- * Note: we don't need the s_lock held since the BUSY flag
- * makes this single threaded.
- */
- if (sqp->s_rnr_retry == 0) {
- send_status = IB_WC_RNR_RETRY_EXC_ERR;
- goto serr;
- }
- if (sqp->s_rnr_retry_cnt < 7)
- sqp->s_rnr_retry--;
- spin_lock_irqsave(&sqp->s_lock, flags);
- if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
- goto clr_busy;
- to = ib_hfi1_rnr_table[qp->r_min_rnr_timer];
- hfi1_add_rnr_timer(sqp, to);
- goto clr_busy;
-
-op_err:
- send_status = IB_WC_REM_OP_ERR;
- wc.status = IB_WC_LOC_QP_OP_ERR;
- goto err;
-
-inv_err:
- send_status = IB_WC_REM_INV_REQ_ERR;
- wc.status = IB_WC_LOC_QP_OP_ERR;
- goto err;
-
-acc_err:
- send_status = IB_WC_REM_ACCESS_ERR;
- wc.status = IB_WC_LOC_PROT_ERR;
-err:
- /* responder goes to error state */
- hfi1_rc_error(qp, wc.status);
-
-serr:
- spin_lock_irqsave(&sqp->s_lock, flags);
- hfi1_send_complete(sqp, wqe, send_status);
- if (sqp->ibqp.qp_type == IB_QPT_RC) {
- int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
-
- sqp->s_flags &= ~RVT_S_BUSY;
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- if (lastwqe) {
- struct ib_event ev;
-
- ev.device = sqp->ibqp.device;
- ev.element.qp = &sqp->ibqp;
- ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
- sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
- }
- goto done;
- }
-clr_busy:
- sqp->s_flags &= ~RVT_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&sqp->s_lock, flags);
-done:
- rcu_read_unlock();
}
/**
@@ -670,19 +118,19 @@ done:
* @ibp: a pointer to the IB port
* @hdr: a pointer to the GRH header being constructed
* @grh: the global route address to send to
- * @hwords: the number of 32 bit words of header being sent
+ * @hwords: size of header after grh being sent in dwords
* @nwords: the number of 32 bit words of data being sent
*
* Return the size of the header in 32 bit words.
*/
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
- struct ib_global_route *grh, u32 hwords, u32 nwords)
+ const struct ib_global_route *grh, u32 hwords, u32 nwords)
{
hdr->version_tclass_flow =
cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
(grh->traffic_class << IB_GRH_TCLASS_SHIFT) |
(grh->flow_label << IB_GRH_FLOW_SHIFT));
- hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
+ hdr->paylen = cpu_to_be16((hwords + nwords) << 2);
/* next_hdr is defined by C8-7 in ch. 8.4.1 */
hdr->next_hdr = IB_GRH_NEXT_HDR;
hdr->hop_limit = grh->hop_limit;
@@ -698,7 +146,8 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
return sizeof(struct ib_grh) / sizeof(u32);
}
-#define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, hdr.u.oth.bth[2]) / 4)
+#define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, \
+ hdr.ibh.u.oth.bth[2]) / 4)
/**
* build_ahg - create ahg in s_ahg
@@ -715,9 +164,9 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
{
struct hfi1_qp_priv *priv = qp->priv;
- if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR))
+ if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR))
clear_ahg(qp);
- if (!(qp->s_flags & RVT_S_AHG_VALID)) {
+ if (!(qp->s_flags & HFI1_S_AHG_VALID)) {
/* first middle that needs copy */
if (qp->s_ahgidx < 0)
qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde);
@@ -726,7 +175,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
/* save to protect a change in another thread */
priv->s_ahg->ahgidx = qp->s_ahgidx;
- qp->s_flags |= RVT_S_AHG_VALID;
+ qp->s_flags |= HFI1_S_AHG_VALID;
}
} else {
/* subsequent middle after valid */
@@ -755,30 +204,191 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
}
}
-void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
- u32 bth0, u32 bth2, int middle,
- struct hfi1_pkt_state *ps)
+static inline void hfi1_make_ruc_bth(struct rvt_qp *qp,
+ struct ib_other_headers *ohdr,
+ u32 bth0, u32 bth1, u32 bth2)
+{
+ ohdr->bth[0] = cpu_to_be32(bth0);
+ ohdr->bth[1] = cpu_to_be32(bth1);
+ ohdr->bth[2] = cpu_to_be32(bth2);
+}
+
+/**
+ * hfi1_make_ruc_header_16B - build a 16B header
+ * @qp: the queue pair
+ * @ohdr: a pointer to the destination header memory
+ * @bth0: bth0 passed in from the RC/UC builder
+ * @bth1: bth1 passed in from the RC/UC builder
+ * @bth2: bth2 passed in from the RC/UC builder
+ * @middle: non zero implies indicates ahg "could" be used
+ * @ps: the current packet state
+ *
+ * This routine may disarm ahg under these situations:
+ * - packet needs a GRH
+ * - BECN needed
+ * - migration state not IB_MIG_MIGRATED
+ */
+static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
+ struct ib_other_headers *ohdr,
+ u32 bth0, u32 bth1, u32 bth2,
+ int middle,
+ struct hfi1_pkt_state *ps)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct hfi1_ibport *ibp = ps->ibp;
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ u32 slid;
+ u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
+ u8 l4 = OPA_16B_L4_IB_LOCAL;
+ u8 extra_bytes = hfi1_get_16b_padding(
+ (ps->s_txreq->hdr_dwords << 2),
+ ps->s_txreq->s_cur_size);
+ u32 nwords = SIZE_OF_CRC + ((ps->s_txreq->s_cur_size +
+ extra_bytes + SIZE_OF_LT) >> 2);
+ bool becn = false;
+
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
+ hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))) {
+ struct ib_grh *grh;
+ struct ib_global_route *grd =
+ rdma_ah_retrieve_grh(&qp->remote_ah_attr);
+ /*
+ * Ensure OPA GIDs are transformed to IB gids
+ * before creating the GRH.
+ */
+ if (grd->sgid_index == OPA_GID_INDEX)
+ grd->sgid_index = 0;
+ grh = &ps->s_txreq->phdr.hdr.opah.u.l.grh;
+ l4 = OPA_16B_L4_IB_GLOBAL;
+ ps->s_txreq->hdr_dwords +=
+ hfi1_make_grh(ibp, grh, grd,
+ ps->s_txreq->hdr_dwords - LRH_16B_DWORDS,
+ nwords);
+ middle = 0;
+ }
+
+ if (qp->s_mig_state == IB_MIG_MIGRATED)
+ bth1 |= OPA_BTH_MIG_REQ;
+ else
+ middle = 0;
+
+ if (qp->s_flags & RVT_S_ECN) {
+ qp->s_flags &= ~RVT_S_ECN;
+ /* we recently received a FECN, so return a BECN */
+ becn = true;
+ middle = 0;
+ }
+ if (middle)
+ build_ahg(qp, bth2);
+ else
+ qp->s_flags &= ~HFI1_S_AHG_VALID;
+
+ bth0 |= pkey;
+ bth0 |= extra_bytes << 20;
+ hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2);
+
+ if (!ppd->lid)
+ slid = be32_to_cpu(OPA_LID_PERMISSIVE);
+ else
+ slid = ppd->lid |
+ (rdma_ah_get_path_bits(&qp->remote_ah_attr) &
+ ((1 << ppd->lmc) - 1));
+
+ hfi1_make_16b_hdr(&ps->s_txreq->phdr.hdr.opah,
+ slid,
+ opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr),
+ 16B),
+ (ps->s_txreq->hdr_dwords + nwords) >> 1,
+ pkey, becn, 0, l4, priv->s_sc);
+}
+
+/**
+ * hfi1_make_ruc_header_9B - build a 9B header
+ * @qp: the queue pair
+ * @ohdr: a pointer to the destination header memory
+ * @bth0: bth0 passed in from the RC/UC builder
+ * @bth1: bth1 passed in from the RC/UC builder
+ * @bth2: bth2 passed in from the RC/UC builder
+ * @middle: non zero implies indicates ahg "could" be used
+ * @ps: the current packet state
+ *
+ * This routine may disarm ahg under these situations:
+ * - packet needs a GRH
+ * - BECN needed
+ * - migration state not IB_MIG_MIGRATED
+ */
+static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
+ struct ib_other_headers *ohdr,
+ u32 bth0, u32 bth1, u32 bth2,
+ int middle,
+ struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp = ps->ibp;
- u16 lrh0;
- u32 nwords;
- u32 extra_bytes;
- u32 bth1;
-
- /* Construct the header. */
- extra_bytes = -ps->s_txreq->s_cur_size & 3;
- nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2;
- lrh0 = HFI1_LRH_BTH;
- if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
- qp->s_hdrwords += hfi1_make_grh(ibp,
- &ps->s_txreq->phdr.hdr.u.l.grh,
- &qp->remote_ah_attr.grh,
- qp->s_hdrwords, nwords);
+ u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
+ u16 lrh0 = HFI1_LRH_BTH;
+ u8 extra_bytes = -ps->s_txreq->s_cur_size & 3;
+ u32 nwords = SIZE_OF_CRC + ((ps->s_txreq->s_cur_size +
+ extra_bytes) >> 2);
+
+ if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
+ struct ib_grh *grh = &ps->s_txreq->phdr.hdr.ibh.u.l.grh;
+
lrh0 = HFI1_LRH_GRH;
+ ps->s_txreq->hdr_dwords +=
+ hfi1_make_grh(ibp, grh,
+ rdma_ah_read_grh(&qp->remote_ah_attr),
+ ps->s_txreq->hdr_dwords - LRH_9B_DWORDS,
+ nwords);
+ middle = 0;
+ }
+ lrh0 |= (priv->s_sc & 0xf) << 12 |
+ (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4;
+
+ if (qp->s_mig_state == IB_MIG_MIGRATED)
+ bth0 |= IB_BTH_MIG_REQ;
+ else
+ middle = 0;
+
+ if (qp->s_flags & RVT_S_ECN) {
+ qp->s_flags &= ~RVT_S_ECN;
+ /* we recently received a FECN, so return a BECN */
+ bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT);
middle = 0;
}
- lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4;
+ if (middle)
+ build_ahg(qp, bth2);
+ else
+ qp->s_flags &= ~HFI1_S_AHG_VALID;
+
+ bth0 |= pkey;
+ bth0 |= extra_bytes << 20;
+ hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2);
+ hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh,
+ lrh0,
+ ps->s_txreq->hdr_dwords + nwords,
+ opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 9B),
+ ppd_from_ibp(ibp)->lid |
+ rdma_ah_get_path_bits(&qp->remote_ah_attr));
+}
+
+typedef void (*hfi1_make_ruc_hdr)(struct rvt_qp *qp,
+ struct ib_other_headers *ohdr,
+ u32 bth0, u32 bth1, u32 bth2, int middle,
+ struct hfi1_pkt_state *ps);
+
+/* We support only two types - 9B and 16B for now */
+static const hfi1_make_ruc_hdr hfi1_ruc_header_tbl[2] = {
+ [HFI1_PKT_TYPE_9B] = &hfi1_make_ruc_header_9B,
+ [HFI1_PKT_TYPE_16B] = &hfi1_make_ruc_header_16B
+};
+
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
+ u32 bth0, u32 bth1, u32 bth2, int middle,
+ struct hfi1_pkt_state *ps)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
/*
* reset s_ahg/AHG fields
*
@@ -793,109 +403,158 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
priv->s_ahg->tx_flags = 0;
priv->s_ahg->ahgcount = 0;
priv->s_ahg->ahgidx = 0;
- if (qp->s_mig_state == IB_MIG_MIGRATED)
- bth0 |= IB_BTH_MIG_REQ;
- else
- middle = 0;
- if (middle)
- build_ahg(qp, bth2);
- else
- qp->s_flags &= ~RVT_S_AHG_VALID;
- ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
- ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
- ps->s_txreq->phdr.hdr.lrh[2] =
- cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
- qp->remote_ah_attr.src_path_bits);
- bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
- bth0 |= extra_bytes << 20;
- ohdr->bth[0] = cpu_to_be32(bth0);
- bth1 = qp->remote_qpn;
- if (qp->s_flags & RVT_S_ECN) {
- qp->s_flags &= ~RVT_S_ECN;
- /* we recently received a FECN, so return a BECN */
- bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT);
- }
- ohdr->bth[1] = cpu_to_be32(bth1);
- ohdr->bth[2] = cpu_to_be32(bth2);
+
+ /* Make the appropriate header */
+ hfi1_ruc_header_tbl[priv->hdr_type](qp, ohdr, bth0, bth1, bth2, middle,
+ ps);
}
/* when sending, force a reschedule every one of these periods */
#define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
+/**
+ * hfi1_schedule_send_yield - test for a yield required for QP
+ * send engine
+ * @qp: a pointer to QP
+ * @ps: a pointer to a structure with commonly lookup values for
+ * the send engine progress
+ * @tid: true if it is the tid leg
+ *
+ * This routine checks if the time slice for the QP has expired
+ * for RC QPs, if so an additional work entry is queued. At this
+ * point, other QPs have an opportunity to be scheduled. It
+ * returns true if a yield is required, otherwise, false
+ * is returned.
+ */
+bool hfi1_schedule_send_yield(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
+ bool tid)
+{
+ ps->pkts_sent = true;
+
+ if (unlikely(time_after(jiffies, ps->timeout))) {
+ if (!ps->in_thread ||
+ workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) {
+ spin_lock_irqsave(&qp->s_lock, ps->flags);
+ if (!tid) {
+ qp->s_flags &= ~RVT_S_BUSY;
+ hfi1_schedule_send(qp);
+ } else {
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ if (priv->s_flags &
+ HFI1_S_TID_BUSY_SET) {
+ qp->s_flags &= ~RVT_S_BUSY;
+ priv->s_flags &=
+ ~(HFI1_S_TID_BUSY_SET |
+ RVT_S_BUSY);
+ } else {
+ priv->s_flags &= ~RVT_S_BUSY;
+ }
+ hfi1_schedule_tid_send(qp);
+ }
+
+ spin_unlock_irqrestore(&qp->s_lock, ps->flags);
+ this_cpu_inc(*ps->ppd->dd->send_schedule);
+ trace_hfi1_rc_expired_time_slice(qp, true);
+ return true;
+ }
+
+ cond_resched();
+ this_cpu_inc(*ps->ppd->dd->send_schedule);
+ ps->timeout = jiffies + ps->timeout_int;
+ }
+
+ trace_hfi1_rc_expired_time_slice(qp, false);
+ return false;
+}
+
+void hfi1_do_send_from_rvt(struct rvt_qp *qp)
+{
+ hfi1_do_send(qp, false);
+}
+
void _hfi1_do_send(struct work_struct *work)
{
- struct iowait *wait = container_of(work, struct iowait, iowork);
- struct rvt_qp *qp = iowait_to_qp(wait);
+ struct iowait_work *w = container_of(work, struct iowait_work, iowork);
+ struct rvt_qp *qp = iowait_to_qp(w->iow);
- hfi1_do_send(qp);
+ hfi1_do_send(qp, true);
}
/**
* hfi1_do_send - perform a send on a QP
- * @work: contains a pointer to the QP
+ * @qp: a pointer to the QP
+ * @in_thread: true if in a workqueue thread
*
* Process entries in the send work queue until credit or queue is
* exhausted. Only allow one CPU to send a packet per QP.
* Otherwise, two threads could send packets out of order.
*/
-void hfi1_do_send(struct rvt_qp *qp)
+void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
{
struct hfi1_pkt_state ps;
struct hfi1_qp_priv *priv = qp->priv;
int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
- unsigned long timeout;
- unsigned long timeout_int;
- int cpu;
ps.dev = to_idev(qp->ibqp.device);
ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
ps.ppd = ppd_from_ibp(ps.ibp);
+ ps.in_thread = in_thread;
+ ps.wait = iowait_get_ib_work(&priv->s_iowait);
+
+ trace_hfi1_rc_do_send(qp, in_thread);
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
- if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
- ) - 1)) ==
- ps.ppd->lid)) {
- ruc_loopback(qp);
+ if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
+ ~((1 << ps.ppd->lmc) - 1)) ==
+ ps.ppd->lid)) {
+ rvt_ruc_loopback(qp);
return;
}
make_req = hfi1_make_rc_req;
- timeout_int = (qp->timeout_jiffies);
+ ps.timeout_int = qp->timeout_jiffies;
break;
case IB_QPT_UC:
- if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc
- ) - 1)) ==
- ps.ppd->lid)) {
- ruc_loopback(qp);
+ if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
+ ~((1 << ps.ppd->lmc) - 1)) ==
+ ps.ppd->lid)) {
+ rvt_ruc_loopback(qp);
return;
}
make_req = hfi1_make_uc_req;
- timeout_int = SEND_RESCHED_TIMEOUT;
+ ps.timeout_int = SEND_RESCHED_TIMEOUT;
break;
default:
make_req = hfi1_make_ud_req;
- timeout_int = SEND_RESCHED_TIMEOUT;
+ ps.timeout_int = SEND_RESCHED_TIMEOUT;
}
spin_lock_irqsave(&qp->s_lock, ps.flags);
/* Return if we are already busy processing a work request. */
if (!hfi1_send_ok(qp)) {
+ if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
+ iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
return;
}
qp->s_flags |= RVT_S_BUSY;
- timeout = jiffies + (timeout_int) / 8;
- cpu = priv->s_sde ? priv->s_sde->cpu :
+ ps.timeout_int = ps.timeout_int / 8;
+ ps.timeout = jiffies + ps.timeout_int;
+ ps.cpu = priv->s_sde ? priv->s_sde->cpu :
cpumask_first(cpumask_of_node(ps.ppd->dd->node));
+ ps.pkts_sent = false;
+
/* insure a pre-built packet is handled */
- ps.s_txreq = get_waiting_verbs_txreq(qp);
+ ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
do {
/* Check for a constructed packet to be sent. */
- if (qp->s_hdrwords != 0) {
+ if (ps.s_txreq) {
+ if (priv->s_flags & HFI1_S_TID_BUSY_SET)
+ qp->s_flags |= RVT_S_BUSY;
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
/*
* If the packet cannot be sent now, return and
@@ -903,70 +562,14 @@ void hfi1_do_send(struct rvt_qp *qp)
*/
if (hfi1_verbs_send(qp, &ps))
return;
- /* Record that s_ahg is empty. */
- qp->s_hdrwords = 0;
+
/* allow other tasks to run */
- if (unlikely(time_after(jiffies, timeout))) {
- if (workqueue_congested(cpu,
- ps.ppd->hfi1_wq)) {
- spin_lock_irqsave(
- &qp->s_lock,
- ps.flags);
- qp->s_flags &= ~RVT_S_BUSY;
- hfi1_schedule_send(qp);
- spin_unlock_irqrestore(
- &qp->s_lock,
- ps.flags);
- this_cpu_inc(
- *ps.ppd->dd->send_schedule);
- return;
- }
- if (!irqs_disabled()) {
- cond_resched();
- this_cpu_inc(
- *ps.ppd->dd->send_schedule);
- }
- timeout = jiffies + (timeout_int) / 8;
- }
+ if (hfi1_schedule_send_yield(qp, &ps, false))
+ return;
+
spin_lock_irqsave(&qp->s_lock, ps.flags);
}
} while (make_req(qp, &ps));
-
+ iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
}
-
-/*
- * This should be called with s_lock held.
- */
-void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
- enum ib_wc_status status)
-{
- u32 old_last, last;
-
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
- return;
-
- last = qp->s_last;
- old_last = last;
- if (++last >= qp->s_size)
- last = 0;
- qp->s_last = last;
- /* See post_send() */
- barrier();
- rvt_put_swqe(wqe);
- if (qp->ibqp.qp_type == IB_QPT_UD ||
- qp->ibqp.qp_type == IB_QPT_SMI ||
- qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
-
- rvt_qp_swqe_complete(qp, wqe, status);
-
- if (qp->s_acked == old_last)
- qp->s_acked = last;
- if (qp->s_cur == old_last)
- qp->s_cur = last;
- if (qp->s_tail == old_last)
- qp->s_tail = last;
- if (qp->state == IB_QPS_SQD && last == qp->s_cur)
- qp->s_draining = 0;
-}
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 1d81cac1fa6c..5cfa4f8fbf3d 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*/
#include <linux/spinlock.h>
@@ -65,6 +23,7 @@
#define SDMA_DESCQ_CNT 2048
#define SDMA_DESC_INTR 64
#define INVALID_TAIL 0xffff
+#define SDMA_PAD max_t(size_t, MAX_16B_PADDING, sizeof(u32))
static uint sdma_descq_cnt = SDMA_DESCQ_CNT;
module_param(sdma_descq_cnt, uint, S_IRUGO);
@@ -231,11 +190,11 @@ static const struct sdma_set_state_action sdma_action_table[] = {
static void sdma_complete(struct kref *);
static void sdma_finalput(struct sdma_state *);
static void sdma_get(struct sdma_state *);
-static void sdma_hw_clean_up_task(unsigned long);
+static void sdma_hw_clean_up_task(struct tasklet_struct *);
static void sdma_put(struct sdma_state *);
static void sdma_set_state(struct sdma_engine *, enum sdma_states);
static void sdma_start_hw_clean_up(struct sdma_engine *);
-static void sdma_sw_clean_up_task(unsigned long);
+static void sdma_sw_clean_up_task(struct tasklet_struct *);
static void sdma_sendctrl(struct sdma_engine *, unsigned);
static void init_sdma_regs(struct sdma_engine *, u32, uint);
static void sdma_process_event(
@@ -246,7 +205,7 @@ static void __sdma_process_event(
enum sdma_events event);
static void dump_sdma_state(struct sdma_engine *sde);
static void sdma_make_progress(struct sdma_engine *sde, u64 status);
-static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail);
+static void sdma_desc_avail(struct sdma_engine *sde, uint avail);
static void sdma_flush_descq(struct sdma_engine *sde);
/**
@@ -325,7 +284,7 @@ static void sdma_wait_for_packet_egress(struct sdma_engine *sde,
/* timed out - bounce the link */
dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n",
__func__, sde->this_idx, (u32)reg);
- queue_work(dd->pport->hfi1_wq,
+ queue_work(dd->pport->link_wq,
&dd->pport->link_bounce_work);
break;
}
@@ -378,7 +337,7 @@ static inline void complete_tx(struct sdma_engine *sde,
__sdma_txclean(sde->dd, tx);
if (complete)
(*complete)(tx, res);
- if (wait && iowait_sdma_dec(wait))
+ if (iowait_sdma_dec(wait))
iowait_drain_wakeup(wait);
}
@@ -405,19 +364,33 @@ static void sdma_flush(struct sdma_engine *sde)
struct sdma_txreq *txp, *txp_next;
LIST_HEAD(flushlist);
unsigned long flags;
+ uint seq;
/* flush from head to tail */
sdma_flush_descq(sde);
spin_lock_irqsave(&sde->flushlist_lock, flags);
/* copy flush list */
- list_for_each_entry_safe(txp, txp_next, &sde->flushlist, list) {
- list_del_init(&txp->list);
- list_add_tail(&txp->list, &flushlist);
- }
+ list_splice_init(&sde->flushlist, &flushlist);
spin_unlock_irqrestore(&sde->flushlist_lock, flags);
/* flush from flush list */
list_for_each_entry_safe(txp, txp_next, &flushlist, list)
complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
+ /* wakeup QPs orphaned on the dmawait list */
+ do {
+ struct iowait *w, *nw;
+
+ seq = read_seqbegin(&sde->waitlock);
+ if (!list_empty(&sde->dmawait)) {
+ write_seqlock(&sde->waitlock);
+ list_for_each_entry_safe(w, nw, &sde->dmawait, list) {
+ if (w->wakeup) {
+ w->wakeup(w, SDMA_AVAIL_REASON);
+ list_del_init(&w->list);
+ }
+ }
+ write_sequnlock(&sde->waitlock);
+ }
+ } while (read_seqretry(&sde->waitlock, seq));
}
/*
@@ -491,10 +464,11 @@ static void sdma_err_progress_check_schedule(struct sdma_engine *sde)
}
}
-static void sdma_err_progress_check(unsigned long data)
+static void sdma_err_progress_check(struct timer_list *t)
{
unsigned index;
- struct sdma_engine *sde = (struct sdma_engine *)data;
+ struct sdma_engine *sde = timer_container_of(sde, t,
+ err_progress_check_timer);
dd_dev_err(sde->dd, "SDE progress check event\n");
for (index = 0; index < sde->dd->num_sdma; index++) {
@@ -530,9 +504,10 @@ static void sdma_err_progress_check(unsigned long data)
schedule_work(&sde->err_halt_worker);
}
-static void sdma_hw_clean_up_task(unsigned long opaque)
+static void sdma_hw_clean_up_task(struct tasklet_struct *t)
{
- struct sdma_engine *sde = (struct sdma_engine *)opaque;
+ struct sdma_engine *sde = from_tasklet(sde, t,
+ sdma_hw_clean_up_task);
u64 statuscsr;
while (1) {
@@ -553,7 +528,6 @@ static void sdma_hw_clean_up_task(unsigned long opaque)
static inline struct sdma_txreq *get_txhead(struct sdma_engine *sde)
{
- smp_read_barrier_depends(); /* see sdma_update_tail() */
return sde->tx_ring[sde->tx_head & sde->sdma_mask];
}
@@ -590,9 +564,9 @@ static void sdma_flush_descq(struct sdma_engine *sde)
sdma_desc_avail(sde, sdma_descq_freecnt(sde));
}
-static void sdma_sw_clean_up_task(unsigned long opaque)
+static void sdma_sw_clean_up_task(struct tasklet_struct *t)
{
- struct sdma_engine *sde = (struct sdma_engine *)opaque;
+ struct sdma_engine *sde = from_tasklet(sde, t, sdma_sw_clean_up_task);
unsigned long flags;
spin_lock_irqsave(&sde->tail_lock, flags);
@@ -819,7 +793,7 @@ struct sdma_engine *sdma_select_engine_sc(
struct sdma_rht_map_elem {
u32 mask;
u8 ctr;
- struct sdma_engine *sde[0];
+ struct sdma_engine *sde[];
};
struct sdma_rht_node {
@@ -834,7 +808,7 @@ static const struct rhashtable_params sdma_rht_params = {
.nelem_hint = NR_CPUS_HINT,
.head_offset = offsetof(struct sdma_rht_node, node),
.key_offset = offsetof(struct sdma_rht_node, cpu_id),
- .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
+ .key_len = sizeof_field(struct sdma_rht_node, cpu_id),
.max_size = NR_CPUS,
.min_size = 8,
.automatic_shrinking = true,
@@ -856,20 +830,19 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
{
struct sdma_rht_node *rht_node;
struct sdma_engine *sde = NULL;
- const struct cpumask *current_mask = tsk_cpus_allowed(current);
unsigned long cpu_id;
/*
* To ensure that always the same sdma engine(s) will be
* selected make sure the process is pinned to this CPU only.
*/
- if (cpumask_weight(current_mask) != 1)
+ if (current->nr_cpus_allowed != 1)
goto out;
- cpu_id = smp_processor_id();
rcu_read_lock();
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
- sdma_rht_params);
+ cpu_id = smp_processor_id();
+ rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id,
+ sdma_rht_params);
if (rht_node && rht_node->map[vl]) {
struct sdma_rht_map_elem *map = rht_node->map[vl];
@@ -924,9 +897,10 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
cpumask_var_t mask, new_mask;
unsigned long cpu;
int ret, vl, sz;
+ struct sdma_rht_node *rht_node;
vl = sdma_engine_get_vl(sde);
- if (unlikely(vl < 0))
+ if (unlikely(vl < 0 || vl >= ARRAY_SIZE(rht_node->map)))
return -EINVAL;
ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
@@ -954,15 +928,13 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
mutex_lock(&process_to_sde_mutex);
for_each_cpu(cpu, mask) {
- struct sdma_rht_node *rht_node;
-
/* Check if we have this already mapped */
if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
cpumask_set_cpu(cpu, new_mask);
continue;
}
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (!rht_node) {
rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
@@ -982,7 +954,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
rht_node->map[vl]->ctr = 1;
rht_node->map[vl]->sde[0] = sde;
- ret = rhashtable_insert_fast(&dd->sdma_rht,
+ ret = rhashtable_insert_fast(dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
if (ret) {
@@ -1018,14 +990,14 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
}
/* Clean up old mappings */
- for_each_cpu(cpu, cpu_online_mask) {
+ for_each_online_cpu(cpu) {
struct sdma_rht_node *rht_node;
/* Don't cleanup sdes that are set in the new mask */
if (cpumask_test_cpu(cpu, mask))
continue;
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (rht_node) {
bool empty = true;
@@ -1049,7 +1021,7 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
}
if (empty) {
- ret = rhashtable_remove_fast(&dd->sdma_rht,
+ ret = rhashtable_remove_fast(dd->sdma_rht,
&rht_node->node,
sdma_rht_params);
WARN_ON(ret);
@@ -1108,7 +1080,7 @@ void sdma_seqfile_dump_cpu_list(struct seq_file *s,
struct sdma_rht_node *rht_node;
int i, j;
- rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid,
+ rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid,
sdma_rht_params);
if (!rht_node)
return;
@@ -1271,19 +1243,21 @@ bail:
return -ENOMEM;
}
-/*
- * Clean up allocated memory.
- *
- * This routine is can be called regardless of the success of sdma_init()
+/**
+ * sdma_clean - Clean up allocated memory
+ * @dd: struct hfi1_devdata
+ * @num_engines: num sdma engines
*
+ * This routine can be called regardless of the success of
+ * sdma_init()
*/
-static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
+void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
{
size_t i;
struct sdma_engine *sde;
if (dd->sdma_pad_dma) {
- dma_free_coherent(&dd->pcidev->dev, 4,
+ dma_free_coherent(&dd->pcidev->dev, SDMA_PAD,
(void *)dd->sdma_pad_dma,
dd->sdma_pad_phys);
dd->sdma_pad_dma = NULL;
@@ -1315,13 +1289,21 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
kvfree(sde->tx_ring);
sde->tx_ring = NULL;
}
- spin_lock_irq(&dd->sde_map_lock);
- sdma_map_free(rcu_access_pointer(dd->sdma_map));
- RCU_INIT_POINTER(dd->sdma_map, NULL);
- spin_unlock_irq(&dd->sde_map_lock);
- synchronize_rcu();
+ if (rcu_access_pointer(dd->sdma_map)) {
+ spin_lock_irq(&dd->sde_map_lock);
+ sdma_map_free(rcu_access_pointer(dd->sdma_map));
+ RCU_INIT_POINTER(dd->sdma_map, NULL);
+ spin_unlock_irq(&dd->sde_map_lock);
+ synchronize_rcu();
+ }
kfree(dd->per_sdma);
dd->per_sdma = NULL;
+
+ if (dd->sdma_rht) {
+ rhashtable_free_and_destroy(dd->sdma_rht, sdma_rht_free, NULL);
+ kfree(dd->sdma_rht);
+ dd->sdma_rht = NULL;
+ }
}
/**
@@ -1329,10 +1311,8 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
* @dd: hfi1_devdata
* @port: port number (currently only zero)
*
- * sdma_init initializes the specified number of engines.
- *
- * The code initializes each sde, its csrs. Interrupts
- * are not required to be enabled.
+ * Initializes each sde and its csrs.
+ * Interrupts are not required to be enabled.
*
* Returns:
* 0 - success, -errno on failure
@@ -1341,12 +1321,14 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
{
unsigned this_idx;
struct sdma_engine *sde;
+ struct rhashtable *tmp_sdma_rht;
u16 descq_cnt;
void *curr_head;
struct hfi1_pportdata *ppd = dd->pport + port;
u32 per_sdma_credits;
uint idle_cnt = sdma_idle_cnt;
- size_t num_engines = dd->chip_sdma_engines;
+ size_t num_engines = chip_sdma_engines(dd);
+ int ret = -ENOMEM;
if (!HFI1_CAP_IS_KSET(SDMA)) {
HFI1_CAP_CLEAR(SDMA_AHG);
@@ -1354,18 +1336,18 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
}
if (mod_num_sdma &&
/* can't exceed chip support */
- mod_num_sdma <= dd->chip_sdma_engines &&
+ mod_num_sdma <= chip_sdma_engines(dd) &&
/* count must be >= vls */
mod_num_sdma >= num_vls)
num_engines = mod_num_sdma;
dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma);
- dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines);
+ dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", chip_sdma_engines(dd));
dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n",
- dd->chip_sdma_mem_size);
+ chip_sdma_mem_size(dd));
per_sdma_credits =
- dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE);
+ chip_sdma_mem_size(dd) / (num_engines * SDMA_BLOCK_SIZE);
/* set up freeze waitqueue */
init_waitqueue_head(&dd->sdma_unfreeze_wq);
@@ -1376,11 +1358,19 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
num_engines, descq_cnt);
/* alloc memory for array of send engines */
- dd->per_sdma = kcalloc(num_engines, sizeof(*dd->per_sdma), GFP_KERNEL);
+ dd->per_sdma = kcalloc_node(num_engines, sizeof(*dd->per_sdma),
+ GFP_KERNEL, dd->node);
if (!dd->per_sdma)
- return -ENOMEM;
+ return ret;
idle_cnt = ns_to_cclock(dd, idle_cnt);
+ if (idle_cnt)
+ dd->default_desc1 =
+ SDMA_DESC1_HEAD_TO_HOST_FLAG;
+ else
+ dd->default_desc1 =
+ SDMA_DESC1_INT_REQ_FLAG;
+
if (!sdma_desct_intr)
sdma_desct_intr = SDMA_DESC_INTR;
@@ -1410,6 +1400,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
seqlock_init(&sde->head_lock);
spin_lock_init(&sde->senddmactrl_lock);
spin_lock_init(&sde->flushlist_lock);
+ seqlock_init(&sde->waitlock);
/* insure there is always a zero bit */
sde->ahg_bits = 0xfffffffe00000000ULL;
@@ -1425,66 +1416,45 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
sde->tail_csr =
get_kctxt_csr_addr(dd, this_idx, SD(TAIL));
- if (idle_cnt)
- dd->default_desc1 =
- SDMA_DESC1_HEAD_TO_HOST_FLAG;
- else
- dd->default_desc1 =
- SDMA_DESC1_INT_REQ_FLAG;
-
- tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task,
- (unsigned long)sde);
-
- tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
- (unsigned long)sde);
+ tasklet_setup(&sde->sdma_hw_clean_up_task,
+ sdma_hw_clean_up_task);
+ tasklet_setup(&sde->sdma_sw_clean_up_task,
+ sdma_sw_clean_up_task);
INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait);
INIT_WORK(&sde->flush_worker, sdma_field_flush);
sde->progress_check_head = 0;
- setup_timer(&sde->err_progress_check_timer,
- sdma_err_progress_check, (unsigned long)sde);
+ timer_setup(&sde->err_progress_check_timer,
+ sdma_err_progress_check, 0);
- sde->descq = dma_zalloc_coherent(
- &dd->pcidev->dev,
- descq_cnt * sizeof(u64[2]),
- &sde->descq_phys,
- GFP_KERNEL
- );
+ sde->descq = dma_alloc_coherent(&dd->pcidev->dev,
+ descq_cnt * sizeof(u64[2]),
+ &sde->descq_phys, GFP_KERNEL);
if (!sde->descq)
goto bail;
sde->tx_ring =
- kcalloc(descq_cnt, sizeof(struct sdma_txreq *),
- GFP_KERNEL);
- if (!sde->tx_ring)
- sde->tx_ring =
- vzalloc(
- sizeof(struct sdma_txreq *) *
- descq_cnt);
+ kvzalloc_node(array_size(descq_cnt,
+ sizeof(struct sdma_txreq *)),
+ GFP_KERNEL, dd->node);
if (!sde->tx_ring)
goto bail;
}
dd->sdma_heads_size = L1_CACHE_BYTES * num_engines;
/* Allocate memory for DMA of head registers to memory */
- dd->sdma_heads_dma = dma_zalloc_coherent(
- &dd->pcidev->dev,
- dd->sdma_heads_size,
- &dd->sdma_heads_phys,
- GFP_KERNEL
- );
+ dd->sdma_heads_dma = dma_alloc_coherent(&dd->pcidev->dev,
+ dd->sdma_heads_size,
+ &dd->sdma_heads_phys,
+ GFP_KERNEL);
if (!dd->sdma_heads_dma) {
dd_dev_err(dd, "failed to allocate SendDMA head memory\n");
goto bail;
}
/* Allocate memory for pad */
- dd->sdma_pad_dma = dma_zalloc_coherent(
- &dd->pcidev->dev,
- sizeof(u32),
- &dd->sdma_pad_phys,
- GFP_KERNEL
- );
+ dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD,
+ &dd->sdma_pad_phys, GFP_KERNEL);
if (!dd->sdma_pad_dma) {
dd_dev_err(dd, "failed to allocate SendDMA pad memory\n");
goto bail;
@@ -1507,18 +1477,30 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
dd->flags |= HFI1_HAS_SEND_DMA;
dd->flags |= idle_cnt ? HFI1_HAS_SDMA_TIMEOUT : 0;
dd->num_sdma = num_engines;
- if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
+ ret = sdma_map_init(dd, port, ppd->vls_operational, NULL);
+ if (ret < 0)
goto bail;
- if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
+ tmp_sdma_rht = kzalloc(sizeof(*tmp_sdma_rht), GFP_KERNEL);
+ if (!tmp_sdma_rht) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
+ if (ret < 0) {
+ kfree(tmp_sdma_rht);
goto bail;
+ }
+
+ dd->sdma_rht = tmp_sdma_rht;
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
return 0;
bail:
sdma_clean(dd, num_engines);
- return -ENOMEM;
+ return ret;
}
/**
@@ -1540,24 +1522,6 @@ void sdma_all_running(struct hfi1_devdata *dd)
}
/**
- * sdma_all_idle() - called when the link goes down
- * @dd: hfi1_devdata
- *
- * This routine moves all engines to the idle state.
- */
-void sdma_all_idle(struct hfi1_devdata *dd)
-{
- struct sdma_engine *sde;
- unsigned int i;
-
- /* idle all engines */
- for (i = 0; i < dd->num_sdma; ++i) {
- sde = &dd->per_sdma[i];
- sdma_process_event(sde, sdma_event_e70_go_idle);
- }
-}
-
-/**
* sdma_start() - called to kick off state processing for all engines
* @dd: hfi1_devdata
*
@@ -1594,7 +1558,7 @@ void sdma_exit(struct hfi1_devdata *dd)
sde->this_idx);
sdma_process_event(sde, sdma_event_e00_go_hw_down);
- del_timer_sync(&sde->err_progress_check_timer);
+ timer_delete_sync(&sde->err_progress_check_timer);
/*
* This waits for the state machine to exit so it is not
@@ -1603,8 +1567,6 @@ void sdma_exit(struct hfi1_devdata *dd)
*/
sdma_finalput(&sde->state);
}
- sdma_clean(dd, dd->num_sdma);
- rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
}
/*
@@ -1616,20 +1578,18 @@ static inline void sdma_unmap_desc(
{
switch (sdma_mapping_type(descp)) {
case SDMA_MAP_SINGLE:
- dma_unmap_single(
- &dd->pcidev->dev,
- sdma_mapping_addr(descp),
- sdma_mapping_len(descp),
- DMA_TO_DEVICE);
+ dma_unmap_single(&dd->pcidev->dev, sdma_mapping_addr(descp),
+ sdma_mapping_len(descp), DMA_TO_DEVICE);
break;
case SDMA_MAP_PAGE:
- dma_unmap_page(
- &dd->pcidev->dev,
- sdma_mapping_addr(descp),
- sdma_mapping_len(descp),
- DMA_TO_DEVICE);
+ dma_unmap_page(&dd->pcidev->dev, sdma_mapping_addr(descp),
+ sdma_mapping_len(descp), DMA_TO_DEVICE);
break;
}
+
+ if (descp->pinning_ctx && descp->ctx_put)
+ descp->ctx_put(descp->pinning_ctx);
+ descp->pinning_ctx = NULL;
}
/*
@@ -1706,7 +1666,7 @@ retry:
swhead = sde->descq_head & sde->sdma_mask;
/* this code is really bad for cache line trading */
- swtail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask;
+ swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
cnt = sde->descq_cnt;
if (swhead < swtail)
@@ -1721,7 +1681,7 @@ retry:
sane = (hwhead == swhead);
if (unlikely(!sane)) {
- dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n",
+ dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%u swhd=%u swtl=%u cnt=%u\n",
sde->this_idx,
use_dmahead ? "dma" : "kreg",
hwhead, swhead, swtail, cnt);
@@ -1743,13 +1703,11 @@ retry:
*
* This is called with head_lock held.
*/
-static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail)
+static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
{
- struct iowait *wait, *nw;
+ struct iowait *wait, *nw, *twait;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
- unsigned i, n = 0, seq;
- struct sdma_txreq *stx;
- struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
+ uint i, n = 0, seq, tidx = 0;
#ifdef CONFIG_SDMA_VERBOSITY
dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
@@ -1758,42 +1716,51 @@ static void sdma_desc_avail(struct sdma_engine *sde, unsigned avail)
#endif
do {
- seq = read_seqbegin(&dev->iowait_lock);
+ seq = read_seqbegin(&sde->waitlock);
if (!list_empty(&sde->dmawait)) {
/* at least one item */
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&sde->waitlock);
/* Harvest waiters wanting DMA descriptors */
list_for_each_entry_safe(
wait,
nw,
&sde->dmawait,
list) {
- u16 num_desc = 0;
+ u32 num_desc;
if (!wait->wakeup)
continue;
if (n == ARRAY_SIZE(waits))
break;
- if (!list_empty(&wait->tx_head)) {
- stx = list_first_entry(
- &wait->tx_head,
- struct sdma_txreq,
- list);
- num_desc = stx->num_desc;
- }
+ iowait_init_priority(wait);
+ num_desc = iowait_get_all_desc(wait);
if (num_desc > avail)
break;
avail -= num_desc;
+ /* Find the top-priority wait memeber */
+ if (n) {
+ twait = waits[tidx];
+ tidx =
+ iowait_priority_update_top(wait,
+ twait,
+ n,
+ tidx);
+ }
list_del_init(&wait->list);
waits[n++] = wait;
}
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
break;
}
- } while (read_seqretry(&dev->iowait_lock, seq));
+ } while (read_seqretry(&sde->waitlock, seq));
+
+ /* Schedule the top-priority entry first */
+ if (n)
+ waits[tidx]->wakeup(waits[tidx], SDMA_AVAIL_REASON);
for (i = 0; i < n; i++)
- waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
+ if (i != tidx)
+ waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
}
/* head_lock must be held */
@@ -1834,7 +1801,7 @@ retry:
/*
* The SDMA idle interrupt is not guaranteed to be ordered with respect
- * to updates to the the dma_head location in host memory. The head
+ * to updates to the dma_head location in host memory. The head
* value read might not be fully up to date. If there are pending
* descriptors and the SDMA idle interrupt fired then read from the
* CSR SDMA head instead to get the latest value from the hardware.
@@ -1844,7 +1811,7 @@ retry:
if ((status & sde->idle_mask) && !idle_check_done) {
u16 swtail;
- swtail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask;
+ swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
if (swtail != hwhead) {
hwhead = (u16)read_sde_csr(sde, SD(HEAD));
idle_check_done = 1;
@@ -2116,7 +2083,6 @@ void sdma_dumpstate(struct sdma_engine *sde)
static void dump_sdma_state(struct sdma_engine *sde)
{
- struct hw_sdma_desc *descq;
struct hw_sdma_desc *descqp;
u64 desc[2];
u64 addr;
@@ -2127,7 +2093,6 @@ static void dump_sdma_state(struct sdma_engine *sde)
head = sde->descq_head & sde->sdma_mask;
tail = sde->descq_tail & sde->sdma_mask;
cnt = sdma_descq_freecnt(sde);
- descq = sde->descq;
dd_dev_err(sde->dd,
"SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n",
@@ -2194,7 +2159,7 @@ void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde)
u16 len;
head = sde->descq_head & sde->sdma_mask;
- tail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask;
+ tail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
seq_printf(s, SDE_FMT, sde->this_idx,
sde->cpu,
sdma_state_name(sde->state.current_state),
@@ -2329,8 +2294,9 @@ static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
*/
static int sdma_check_progress(
struct sdma_engine *sde,
- struct iowait *wait,
- struct sdma_txreq *tx)
+ struct iowait_work *wait,
+ struct sdma_txreq *tx,
+ bool pkts_sent)
{
int ret;
@@ -2338,12 +2304,12 @@ static int sdma_check_progress(
if (tx->num_desc <= sde->desc_avail)
return -EAGAIN;
/* pulse the head_lock */
- if (wait && wait->sleep) {
+ if (wait && iowait_ioww_to_iow(wait)->sleep) {
unsigned seq;
seq = raw_seqcount_begin(
(const seqcount_t *)&sde->head_lock.seqcount);
- ret = wait->sleep(sde, wait, tx, seq);
+ ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent);
if (ret == -EAGAIN)
sde->desc_avail = sdma_descq_freecnt(sde);
} else {
@@ -2355,8 +2321,9 @@ static int sdma_check_progress(
/**
* sdma_send_txreq() - submit a tx req to ring
* @sde: sdma engine to use
- * @wait: wait structure to use when full (may be NULL)
+ * @wait: SE wait structure to use when full (may be NULL)
* @tx: sdma_txreq to submit
+ * @pkts_sent: has any packet been sent yet?
*
* The call submits the tx into the ring. If a iowait structure is non-NULL
* the packet will be queued to the list in wait.
@@ -2367,8 +2334,9 @@ static int sdma_check_progress(
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
int sdma_send_txreq(struct sdma_engine *sde,
- struct iowait *wait,
- struct sdma_txreq *tx)
+ struct iowait_work *wait,
+ struct sdma_txreq *tx,
+ bool pkts_sent)
{
int ret = 0;
u16 tail;
@@ -2377,7 +2345,7 @@ int sdma_send_txreq(struct sdma_engine *sde,
/* user should have supplied entire packet */
if (unlikely(tx->tlen))
return -EINVAL;
- tx->wait = wait;
+ tx->wait = iowait_ioww_to_iow(wait);
spin_lock_irqsave(&sde->tail_lock, flags);
retry:
if (unlikely(!__sdma_running(sde)))
@@ -2386,14 +2354,14 @@ retry:
goto nodesc;
tail = submit_tx(sde, tx);
if (wait)
- iowait_sdma_inc(wait);
+ iowait_sdma_inc(iowait_ioww_to_iow(wait));
sdma_update_tail(sde, tail);
unlock:
spin_unlock_irqrestore(&sde->tail_lock, flags);
return ret;
unlock_noconn:
if (wait)
- iowait_sdma_inc(wait);
+ iowait_sdma_inc(iowait_ioww_to_iow(wait));
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
tx->sn = sde->tail_sn++;
@@ -2402,15 +2370,12 @@ unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_add_tail(&tx->list, &sde->flushlist);
spin_unlock(&sde->flushlist_lock);
- if (wait) {
- wait->tx_count++;
- wait->count += tx->num_desc;
- }
- schedule_work(&sde->flush_worker);
+ iowait_inc_wait_count(wait, tx->num_desc);
+ queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker);
ret = -ECOMM;
goto unlock;
nodesc:
- ret = sdma_check_progress(sde, wait, tx);
+ ret = sdma_check_progress(sde, wait, tx, pkts_sent);
if (ret == -EAGAIN) {
ret = 0;
goto retry;
@@ -2422,13 +2387,13 @@ nodesc:
/**
* sdma_send_txlist() - submit a list of tx req to ring
* @sde: sdma engine to use
- * @wait: wait structure to use when full (may be NULL)
+ * @wait: SE wait structure to use when full (may be NULL)
* @tx_list: list of sdma_txreqs to submit
- * @count: pointer to a u32 which, after return will contain the total number of
- * sdma_txreqs removed from the tx_list. This will include sdma_txreqs
- * whose SDMA descriptors are submitted to the ring and the sdma_txreqs
- * which are added to SDMA engine flush list if the SDMA engine state is
- * not running.
+ * @count_out: pointer to a u16 which, after return will contain the total number of
+ * sdma_txreqs removed from the tx_list. This will include sdma_txreqs
+ * whose SDMA descriptors are submitted to the ring and the sdma_txreqs
+ * which are added to SDMA engine flush list if the SDMA engine state is
+ * not running.
*
* The call submits the list into the ring.
*
@@ -2447,8 +2412,8 @@ nodesc:
* -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
-int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
- struct list_head *tx_list, u32 *count_out)
+int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
+ struct list_head *tx_list, u16 *count_out)
{
struct sdma_txreq *tx, *tx_next;
int ret = 0;
@@ -2459,7 +2424,7 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
spin_lock_irqsave(&sde->tail_lock, flags);
retry:
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
- tx->wait = wait;
+ tx->wait = iowait_ioww_to_iow(wait);
if (unlikely(!__sdma_running(sde)))
goto unlock_noconn;
if (unlikely(tx->num_desc > sde->desc_avail))
@@ -2479,8 +2444,11 @@ retry:
}
update_tail:
total_count = submit_count + flush_count;
- if (wait)
- iowait_sdma_add(wait, total_count);
+ if (wait) {
+ iowait_sdma_add(iowait_ioww_to_iow(wait), total_count);
+ iowait_starve_clear(submit_count > 0,
+ iowait_ioww_to_iow(wait));
+ }
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
spin_unlock_irqrestore(&sde->tail_lock, flags);
@@ -2489,7 +2457,7 @@ update_tail:
unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
- tx->wait = wait;
+ tx->wait = iowait_ioww_to_iow(wait);
list_del_init(&tx->list);
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
@@ -2498,17 +2466,14 @@ unlock_noconn:
#endif
list_add_tail(&tx->list, &sde->flushlist);
flush_count++;
- if (wait) {
- wait->tx_count++;
- wait->count += tx->num_desc;
- }
+ iowait_inc_wait_count(wait, tx->num_desc);
}
spin_unlock(&sde->flushlist_lock);
- schedule_work(&sde->flush_worker);
+ queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker);
ret = -ECOMM;
goto update_tail;
nodesc:
- ret = sdma_check_progress(sde, wait, tx);
+ ret = sdma_check_progress(sde, wait, tx, submit_count > 0);
if (ret == -EAGAIN) {
ret = 0;
goto retry;
@@ -2560,7 +2525,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
* 7220, e.g.
*/
ss->go_s99_running = 1;
- /* fall through and start dma engine */
+ fallthrough; /* and start dma engine */
case sdma_event_e10_go_hw_start:
/* This reference means the state machine is started */
sdma_get(&sde->state);
@@ -2702,7 +2667,6 @@ static void __sdma_process_event(struct sdma_engine *sde,
case sdma_event_e70_go_idle:
break;
case sdma_event_e85_link_down:
- /* fall through */
case sdma_event_e80_hw_freeze:
sdma_set_state(sde, sdma_state_s80_hw_freeze);
atomic_dec(&sde->dd->sdma_unfreeze_count);
@@ -2983,6 +2947,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
case sdma_event_e60_hw_halted:
need_progress = 1;
sdma_err_progress_check_schedule(sde);
+ fallthrough;
case sdma_event_e90_sw_halted:
/*
* SW initiated halt does not perform engines
@@ -2996,7 +2961,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
break;
case sdma_event_e85_link_down:
ss->go_s99_running = 0;
- /* fall through */
+ fallthrough;
case sdma_event_e80_hw_freeze:
sdma_set_state(sde, sdma_state_s80_hw_freeze);
atomic_dec(&sde->dd->sdma_unfreeze_count);
@@ -3031,6 +2996,7 @@ static void __sdma_process_event(struct sdma_engine *sde,
static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
{
int i;
+ struct sdma_desc *descp;
/* Handle last descriptor */
if (unlikely((tx->num_desc == (MAX_DESC - 1)))) {
@@ -3051,12 +3017,10 @@ static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
if (unlikely(tx->num_desc == MAX_DESC))
goto enomem;
- tx->descp = kmalloc_array(
- MAX_DESC,
- sizeof(struct sdma_desc),
- GFP_ATOMIC);
- if (!tx->descp)
+ descp = kmalloc_array(MAX_DESC, sizeof(struct sdma_desc), GFP_ATOMIC);
+ if (!descp)
goto enomem;
+ tx->descp = descp;
/* reserve last descriptor for coalescing */
tx->desc_limit = MAX_DESC - 1;
@@ -3106,7 +3070,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
}
if (type == SDMA_MAP_PAGE) {
- kvaddr = kmap(page);
+ kvaddr = kmap_local_page(page);
kvaddr += offset;
} else if (WARN_ON(!kvaddr)) {
__sdma_txclean(dd, tx);
@@ -3116,7 +3080,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
memcpy(tx->coalesce_buf + tx->coalesce_idx, kvaddr, len);
tx->coalesce_idx += len;
if (type == SDMA_MAP_PAGE)
- kunmap(page);
+ kunmap_local(kvaddr);
/* If there is more data, return */
if (tx->tlen - tx->coalesce_idx)
@@ -3146,7 +3110,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
/* Add descriptor for coalesce buffer */
tx->desc_limit = MAX_DESC;
return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx,
- addr, tx->tlen);
+ addr, tx->tlen, NULL, NULL, NULL);
}
return 1;
@@ -3177,7 +3141,6 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
{
int rval = 0;
- tx->num_desc++;
if ((unlikely(tx->num_desc == tx->desc_limit))) {
rval = _extend_sdma_tx_descs(dd, tx);
if (rval) {
@@ -3185,12 +3148,15 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
return rval;
}
}
+
/* finish the one just added */
make_tx_sdma_desc(
tx,
SDMA_MAP_NONE,
dd->sdma_pad_phys,
- sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)));
+ sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)),
+ NULL, NULL, NULL);
+ tx->num_desc++;
_sdma_close_tx(dd, tx);
return rval;
}
@@ -3227,7 +3193,7 @@ void _sdma_txreq_ahgadd(
tx->num_desc++;
tx->descs[2].qw[0] = 0;
tx->descs[2].qw[1] = 0;
- /* FALLTHROUGH */
+ fallthrough;
case SDMA_AHG_APPLY_UPDATE2:
tx->num_desc++;
tx->descs[1].qw[0] = 0;
@@ -3272,7 +3238,7 @@ int sdma_ahg_alloc(struct sdma_engine *sde)
return -EINVAL;
}
while (1) {
- nr = ffz(ACCESS_ONCE(sde->ahg_bits));
+ nr = ffz(READ_ONCE(sde->ahg_bits));
if (nr > 31) {
trace_hfi1_ahg_allocate(sde, -ENOSPC);
return -ENOSPC;
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 21f1e2834f37..91dfd5d0c419 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -1,52 +1,11 @@
-#ifndef _HFI1_SDMA_H
-#define _HFI1_SDMA_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*/
+#ifndef _HFI1_SDMA_H
+#define _HFI1_SDMA_H
+
#include <linux/types.h>
#include <linux/list.h>
#include <asm/byteorder.h>
@@ -62,16 +21,6 @@
/* Hardware limit for SDMA packet size */
#define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
-#define SDMA_TXREQ_S_OK 0
-#define SDMA_TXREQ_S_SENDERROR 1
-#define SDMA_TXREQ_S_ABORTED 2
-#define SDMA_TXREQ_S_SHUTDOWN 3
-
-/* flags bits */
-#define SDMA_TXREQ_F_URGENT 0x0001
-#define SDMA_TXREQ_F_AHG_COPY 0x0002
-#define SDMA_TXREQ_F_USE_AHG 0x0004
-
#define SDMA_MAP_NONE 0
#define SDMA_MAP_SINGLE 1
#define SDMA_MAP_PAGE 2
@@ -392,6 +341,7 @@ struct sdma_engine {
u64 progress_int_cnt;
/* private: */
+ seqlock_t waitlock;
struct list_head dmawait;
/* CONFIG SDMA for now, just blindly duplicate */
@@ -415,13 +365,14 @@ struct sdma_engine {
struct list_head flushlist;
struct cpumask cpu_mask;
struct kobject kobj;
+ u32 msix_intr;
};
int sdma_init(struct hfi1_devdata *dd, u8 port);
void sdma_start(struct hfi1_devdata *dd);
void sdma_exit(struct hfi1_devdata *dd);
+void sdma_clean(struct hfi1_devdata *dd, size_t num_engines);
void sdma_all_running(struct hfi1_devdata *dd);
-void sdma_all_idle(struct hfi1_devdata *dd);
void sdma_freeze_notify(struct hfi1_devdata *dd, int go_idle);
void sdma_freeze(struct hfi1_devdata *dd);
void sdma_unfreeze(struct hfi1_devdata *dd);
@@ -445,7 +396,7 @@ static inline u16 sdma_descq_freecnt(struct sdma_engine *sde)
{
return sde->descq_cnt -
(sde->descq_tail -
- ACCESS_ONCE(sde->descq_head)) - 1;
+ READ_ONCE(sde->descq_head)) - 1;
}
static inline u16 sdma_descq_inprocess(struct sdma_engine *sde)
@@ -643,7 +594,10 @@ static inline void make_tx_sdma_desc(
struct sdma_txreq *tx,
int type,
dma_addr_t addr,
- size_t len)
+ size_t len,
+ void *pinning_ctx,
+ void (*ctx_get)(void *),
+ void (*ctx_put)(void *))
{
struct sdma_desc *desc = &tx->descp[tx->num_desc];
@@ -660,6 +614,11 @@ static inline void make_tx_sdma_desc(
<< SDMA_DESC0_PHY_ADDR_SHIFT) |
(((u64)len & SDMA_DESC0_BYTE_COUNT_MASK)
<< SDMA_DESC0_BYTE_COUNT_SHIFT);
+
+ desc->pinning_ctx = pinning_ctx;
+ desc->ctx_put = ctx_put;
+ if (pinning_ctx && ctx_get)
+ ctx_get(pinning_ctx);
}
/* helper to extend txreq */
@@ -679,14 +638,13 @@ static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx)
static inline void _sdma_close_tx(struct hfi1_devdata *dd,
struct sdma_txreq *tx)
{
- tx->descp[tx->num_desc].qw[0] |=
- SDMA_DESC0_LAST_DESC_FLAG;
- tx->descp[tx->num_desc].qw[1] |=
- dd->default_desc1;
+ u16 last_desc = tx->num_desc - 1;
+
+ tx->descp[last_desc].qw[0] |= SDMA_DESC0_LAST_DESC_FLAG;
+ tx->descp[last_desc].qw[1] |= dd->default_desc1;
if (tx->flags & SDMA_TXREQ_F_URGENT)
- tx->descp[tx->num_desc].qw[1] |=
- (SDMA_DESC1_HEAD_TO_HOST_FLAG |
- SDMA_DESC1_INT_REQ_FLAG);
+ tx->descp[last_desc].qw[1] |= (SDMA_DESC1_HEAD_TO_HOST_FLAG |
+ SDMA_DESC1_INT_REQ_FLAG);
}
static inline int _sdma_txadd_daddr(
@@ -694,15 +652,20 @@ static inline int _sdma_txadd_daddr(
int type,
struct sdma_txreq *tx,
dma_addr_t addr,
- u16 len)
+ u16 len,
+ void *pinning_ctx,
+ void (*ctx_get)(void *),
+ void (*ctx_put)(void *))
{
int rval = 0;
make_tx_sdma_desc(
tx,
type,
- addr, len);
+ addr, len,
+ pinning_ctx, ctx_get, ctx_put);
WARN_ON(len > tx->tlen);
+ tx->num_desc++;
tx->tlen -= len;
/* special cases for last */
if (!tx->tlen) {
@@ -714,7 +677,6 @@ static inline int _sdma_txadd_daddr(
_sdma_close_tx(dd, tx);
}
}
- tx->num_desc++;
return rval;
}
@@ -725,6 +687,14 @@ static inline int _sdma_txadd_daddr(
* @page: page to map
* @offset: offset within the page
* @len: length in bytes
+ * @pinning_ctx: context to be stored on struct sdma_desc .pinning_ctx. Not
+ * added if coalesce buffer is used. E.g. pointer to pinned-page
+ * cache entry for the sdma_desc.
+ * @ctx_get: optional function to take reference to @pinning_ctx. Not called if
+ * @pinning_ctx is NULL.
+ * @ctx_put: optional function to release reference to @pinning_ctx after
+ * sdma_desc completes. May be called in interrupt context so must
+ * not sleep. Not called if @pinning_ctx is NULL.
*
* This is used to add a page/offset/length descriptor.
*
@@ -739,7 +709,10 @@ static inline int sdma_txadd_page(
struct sdma_txreq *tx,
struct page *page,
unsigned long offset,
- u16 len)
+ u16 len,
+ void *pinning_ctx,
+ void (*ctx_get)(void *),
+ void (*ctx_put)(void *))
{
dma_addr_t addr;
int rval;
@@ -763,8 +736,8 @@ static inline int sdma_txadd_page(
return -ENOSPC;
}
- return _sdma_txadd_daddr(
- dd, SDMA_MAP_PAGE, tx, addr, len);
+ return _sdma_txadd_daddr(dd, SDMA_MAP_PAGE, tx, addr, len,
+ pinning_ctx, ctx_get, ctx_put);
}
/**
@@ -798,7 +771,8 @@ static inline int sdma_txadd_daddr(
return rval;
}
- return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, tx, addr, len);
+ return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, tx, addr, len,
+ NULL, NULL, NULL);
}
/**
@@ -844,19 +818,20 @@ static inline int sdma_txadd_kvaddr(
return -ENOSPC;
}
- return _sdma_txadd_daddr(
- dd, SDMA_MAP_SINGLE, tx, addr, len);
+ return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx, addr, len,
+ NULL, NULL, NULL);
}
-struct iowait;
+struct iowait_work;
int sdma_send_txreq(struct sdma_engine *sde,
- struct iowait *wait,
- struct sdma_txreq *tx);
+ struct iowait_work *wait,
+ struct sdma_txreq *tx,
+ bool pkts_sent);
int sdma_send_txlist(struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct list_head *tx_list,
- u32 *count);
+ u16 *count_out);
int sdma_ahg_alloc(struct sdma_engine *sde);
void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
@@ -913,24 +888,6 @@ static inline unsigned sdma_progress(struct sdma_engine *sde, unsigned seq,
return 0;
}
-/**
- * sdma_iowait_schedule() - initialize wait structure
- * @sde: sdma_engine to schedule
- * @wait: wait struct to schedule
- *
- * This function initializes the iowait
- * structure embedded in the QP or PQ.
- *
- */
-static inline void sdma_iowait_schedule(
- struct sdma_engine *sde,
- struct iowait *wait)
-{
- struct hfi1_pportdata *ppd = sde->dd->pport;
-
- iowait_schedule(wait, ppd->hfi1_wq, sde->cpu);
-}
-
/* for use by interrupt handling */
void sdma_engine_error(struct sdma_engine *sde, u64 status);
void sdma_engine_interrupt(struct sdma_engine *sde, u64 status);
@@ -966,34 +923,34 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status);
* | mask | --/ |--------------------|
* |--------------------------| -/ | * |
* | actual_vls (max 8) | -/ |--------------------|
- * |--------------------------| --/ | sde[n] -> eng n |
+ * |--------------------------| --/ | sde[n-1] -> eng n |
* | vls (max 8) | -/ +--------------------+
* |--------------------------| --/
* | map[0] |-/
- * |--------------------------| +--------------------+
- * | map[1] |--- | mask |
- * |--------------------------| \---- |--------------------|
- * | * | \-- | sde[0] -> eng 1+n |
- * | * | \---- |--------------------|
- * | * | \->| sde[1] -> eng 2+n |
- * |--------------------------| |--------------------|
- * | map[vls - 1] |- | * |
- * +--------------------------+ \- |--------------------|
- * \- | sde[m] -> eng m+n |
- * \ +--------------------+
+ * |--------------------------| +---------------------+
+ * | map[1] |--- | mask |
+ * |--------------------------| \---- |---------------------|
+ * | * | \-- | sde[0] -> eng 1+n |
+ * | * | \---- |---------------------|
+ * | * | \->| sde[1] -> eng 2+n |
+ * |--------------------------| |---------------------|
+ * | map[vls - 1] |- | * |
+ * +--------------------------+ \- |---------------------|
+ * \- | sde[m-1] -> eng m+n |
+ * \ +---------------------+
* \-
* \
- * \- +--------------------+
- * \- | mask |
- * \ |--------------------|
- * \- | sde[0] -> eng 1+m+n|
- * \- |--------------------|
- * >| sde[1] -> eng 2+m+n|
- * |--------------------|
- * | * |
- * |--------------------|
- * | sde[o] -> eng o+m+n|
- * +--------------------+
+ * \- +----------------------+
+ * \- | mask |
+ * \ |----------------------|
+ * \- | sde[0] -> eng 1+m+n |
+ * \- |----------------------|
+ * >| sde[1] -> eng 2+m+n |
+ * |----------------------|
+ * | * |
+ * |----------------------|
+ * | sde[o-1] -> eng o+m+n|
+ * +----------------------+
*
*/
@@ -1008,7 +965,7 @@ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status);
*/
struct sdma_map_elem {
u32 mask;
- struct sdma_engine *sde[0];
+ struct sdma_engine *sde[];
};
/**
@@ -1030,7 +987,7 @@ struct sdma_vl_map {
u32 mask;
u8 actual_vls;
u8 vls;
- struct sdma_map_elem *map[0];
+ struct sdma_map_elem *map[];
};
int sdma_map_init(
@@ -1095,5 +1052,4 @@ u16 sdma_get_descq_cnt(void);
extern uint mod_num_sdma;
void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid);
-
#endif
diff --git a/drivers/infiniband/hw/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h
index bf7d777d756e..5782166d984c 100644
--- a/drivers/infiniband/hw/hfi1/sdma_txreq.h
+++ b/drivers/infiniband/hw/hfi1/sdma_txreq.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/
#ifndef HFI1_SDMA_TXREQ_H
@@ -61,6 +19,9 @@
struct sdma_desc {
/* private: don't use directly */
u64 qw[2];
+ void *pinning_ctx;
+ /* Release reference to @pinning_ctx. May be called in interrupt context. Must not sleep. */
+ void (*ctx_put)(void *ctx);
};
/**
@@ -91,6 +52,7 @@ struct sdma_desc {
#define SDMA_TXREQ_F_URGENT 0x0001
#define SDMA_TXREQ_F_AHG_COPY 0x0002
#define SDMA_TXREQ_F_USE_AHG 0x0004
+#define SDMA_TXREQ_F_VIP 0x0010
struct sdma_txreq;
typedef void (*callback_t)(struct sdma_txreq *, int);
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 919a5474e651..372cfd13dc61 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -1,55 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015-2017 Intel Corporation.
*/
+
#include <linux/ctype.h>
+#include <rdma/ib_sysfs.h>
#include "hfi.h"
#include "mad.h"
#include "trace.h"
+static struct hfi1_pportdata *hfi1_get_pportdata_kobj(struct kobject *kobj)
+{
+ u32 port_num;
+ struct ib_device *ibdev = ib_port_sysfs_get_ibdev_kobj(kobj, &port_num);
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+
+ return &dd->pport[port_num - 1];
+}
+
/*
* Start of per-port congestion control structures and support code
*/
@@ -57,13 +26,12 @@
/*
* Congestion control table size followed by table entries
*/
-static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
int ret;
- struct hfi1_pportdata *ppd =
- container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
+ struct hfi1_pportdata *ppd = hfi1_get_pportdata_kobj(kobj);
struct cc_state *cc_state;
ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow)
@@ -89,30 +57,19 @@ static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
return count;
}
-
-static void port_release(struct kobject *kobj)
-{
- /* nothing to do since memory is freed by hfi1_free_devdata() */
-}
-
-static struct bin_attribute cc_table_bin_attr = {
- .attr = {.name = "cc_table_bin", .mode = 0444},
- .read = read_cc_table_bin,
- .size = PAGE_SIZE,
-};
+static const BIN_ATTR_RO(cc_table_bin, PAGE_SIZE);
/*
* Congestion settings: port control, control map and an array of 16
* entries for the congestion entries - increase, timer, event log
* trigger threshold and the minimum injection rate delay.
*/
-static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj,
+ const struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
+ struct hfi1_pportdata *ppd = hfi1_get_pportdata_kobj(kobj);
int ret;
- struct hfi1_pportdata *ppd =
- container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
struct cc_state *cc_state;
ret = sizeof(struct opa_congestion_setting_attr_shadow);
@@ -136,27 +93,30 @@ static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
return count;
}
+static const BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE);
-static struct bin_attribute cc_setting_bin_attr = {
- .attr = {.name = "cc_settings_bin", .mode = 0444},
- .read = read_cc_setting_bin,
- .size = PAGE_SIZE,
-};
-
-struct hfi1_port_attr {
- struct attribute attr;
- ssize_t (*show)(struct hfi1_pportdata *, char *);
- ssize_t (*store)(struct hfi1_pportdata *, const char *, size_t);
+static const struct bin_attribute *const port_cc_bin_attributes[] = {
+ &bin_attr_cc_setting_bin,
+ &bin_attr_cc_table_bin,
+ NULL
};
-static ssize_t cc_prescan_show(struct hfi1_pportdata *ppd, char *buf)
+static ssize_t cc_prescan_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
{
- return sprintf(buf, "%s\n", ppd->cc_prescan ? "on" : "off");
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
+
+ return sysfs_emit(buf, "%s\n", ppd->cc_prescan ? "on" : "off");
}
-static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf,
+static ssize_t cc_prescan_store(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, const char *buf,
size_t count)
{
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
+
if (!memcmp(buf, "on", 2))
ppd->cc_prescan = true;
else if (!memcmp(buf, "off", 3))
@@ -164,59 +124,41 @@ static ssize_t cc_prescan_store(struct hfi1_pportdata *ppd, const char *buf,
return count;
}
+static IB_PORT_ATTR_ADMIN_RW(cc_prescan);
-static struct hfi1_port_attr cc_prescan_attr =
- __ATTR(cc_prescan, 0600, cc_prescan_show, cc_prescan_store);
-
-static ssize_t cc_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct hfi1_port_attr *port_attr =
- container_of(attr, struct hfi1_port_attr, attr);
- struct hfi1_pportdata *ppd =
- container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
-
- return port_attr->show(ppd, buf);
-}
-
-static ssize_t cc_attr_store(struct kobject *kobj, struct attribute *attr,
- const char *buf, size_t count)
-{
- struct hfi1_port_attr *port_attr =
- container_of(attr, struct hfi1_port_attr, attr);
- struct hfi1_pportdata *ppd =
- container_of(kobj, struct hfi1_pportdata, pport_cc_kobj);
-
- return port_attr->store(ppd, buf, count);
-}
-
-static const struct sysfs_ops port_cc_sysfs_ops = {
- .show = cc_attr_show,
- .store = cc_attr_store
-};
-
-static struct attribute *port_cc_default_attributes[] = {
- &cc_prescan_attr.attr
+static struct attribute *port_cc_attributes[] = {
+ &ib_port_attr_cc_prescan.attr,
+ NULL
};
-static struct kobj_type port_cc_ktype = {
- .release = port_release,
- .sysfs_ops = &port_cc_sysfs_ops,
- .default_attrs = port_cc_default_attributes
+static const struct attribute_group port_cc_group = {
+ .name = "CCMgtA",
+ .attrs = port_cc_attributes,
+ .bin_attrs = port_cc_bin_attributes,
};
/* Start sc2vl */
-#define HFI1_SC2VL_ATTR(N) \
- static struct hfi1_sc2vl_attr hfi1_sc2vl_attr_##N = { \
- .attr = { .name = __stringify(N), .mode = 0444 }, \
- .sc = N \
- }
-
struct hfi1_sc2vl_attr {
- struct attribute attr;
+ struct ib_port_attribute attr;
int sc;
};
+static ssize_t sc2vl_attr_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
+{
+ struct hfi1_sc2vl_attr *sattr =
+ container_of(attr, struct hfi1_sc2vl_attr, attr);
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+
+ return sysfs_emit(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc));
+}
+
+#define HFI1_SC2VL_ATTR(N) \
+ static struct hfi1_sc2vl_attr hfi1_sc2vl_attr_##N = { \
+ .attr = __ATTR(N, 0444, sc2vl_attr_show, NULL), \
+ .sc = N, \
+ }
+
HFI1_SC2VL_ATTR(0);
HFI1_SC2VL_ATTR(1);
HFI1_SC2VL_ATTR(2);
@@ -250,78 +192,70 @@ HFI1_SC2VL_ATTR(29);
HFI1_SC2VL_ATTR(30);
HFI1_SC2VL_ATTR(31);
-static struct attribute *sc2vl_default_attributes[] = {
- &hfi1_sc2vl_attr_0.attr,
- &hfi1_sc2vl_attr_1.attr,
- &hfi1_sc2vl_attr_2.attr,
- &hfi1_sc2vl_attr_3.attr,
- &hfi1_sc2vl_attr_4.attr,
- &hfi1_sc2vl_attr_5.attr,
- &hfi1_sc2vl_attr_6.attr,
- &hfi1_sc2vl_attr_7.attr,
- &hfi1_sc2vl_attr_8.attr,
- &hfi1_sc2vl_attr_9.attr,
- &hfi1_sc2vl_attr_10.attr,
- &hfi1_sc2vl_attr_11.attr,
- &hfi1_sc2vl_attr_12.attr,
- &hfi1_sc2vl_attr_13.attr,
- &hfi1_sc2vl_attr_14.attr,
- &hfi1_sc2vl_attr_15.attr,
- &hfi1_sc2vl_attr_16.attr,
- &hfi1_sc2vl_attr_17.attr,
- &hfi1_sc2vl_attr_18.attr,
- &hfi1_sc2vl_attr_19.attr,
- &hfi1_sc2vl_attr_20.attr,
- &hfi1_sc2vl_attr_21.attr,
- &hfi1_sc2vl_attr_22.attr,
- &hfi1_sc2vl_attr_23.attr,
- &hfi1_sc2vl_attr_24.attr,
- &hfi1_sc2vl_attr_25.attr,
- &hfi1_sc2vl_attr_26.attr,
- &hfi1_sc2vl_attr_27.attr,
- &hfi1_sc2vl_attr_28.attr,
- &hfi1_sc2vl_attr_29.attr,
- &hfi1_sc2vl_attr_30.attr,
- &hfi1_sc2vl_attr_31.attr,
+static struct attribute *port_sc2vl_attributes[] = {
+ &hfi1_sc2vl_attr_0.attr.attr,
+ &hfi1_sc2vl_attr_1.attr.attr,
+ &hfi1_sc2vl_attr_2.attr.attr,
+ &hfi1_sc2vl_attr_3.attr.attr,
+ &hfi1_sc2vl_attr_4.attr.attr,
+ &hfi1_sc2vl_attr_5.attr.attr,
+ &hfi1_sc2vl_attr_6.attr.attr,
+ &hfi1_sc2vl_attr_7.attr.attr,
+ &hfi1_sc2vl_attr_8.attr.attr,
+ &hfi1_sc2vl_attr_9.attr.attr,
+ &hfi1_sc2vl_attr_10.attr.attr,
+ &hfi1_sc2vl_attr_11.attr.attr,
+ &hfi1_sc2vl_attr_12.attr.attr,
+ &hfi1_sc2vl_attr_13.attr.attr,
+ &hfi1_sc2vl_attr_14.attr.attr,
+ &hfi1_sc2vl_attr_15.attr.attr,
+ &hfi1_sc2vl_attr_16.attr.attr,
+ &hfi1_sc2vl_attr_17.attr.attr,
+ &hfi1_sc2vl_attr_18.attr.attr,
+ &hfi1_sc2vl_attr_19.attr.attr,
+ &hfi1_sc2vl_attr_20.attr.attr,
+ &hfi1_sc2vl_attr_21.attr.attr,
+ &hfi1_sc2vl_attr_22.attr.attr,
+ &hfi1_sc2vl_attr_23.attr.attr,
+ &hfi1_sc2vl_attr_24.attr.attr,
+ &hfi1_sc2vl_attr_25.attr.attr,
+ &hfi1_sc2vl_attr_26.attr.attr,
+ &hfi1_sc2vl_attr_27.attr.attr,
+ &hfi1_sc2vl_attr_28.attr.attr,
+ &hfi1_sc2vl_attr_29.attr.attr,
+ &hfi1_sc2vl_attr_30.attr.attr,
+ &hfi1_sc2vl_attr_31.attr.attr,
NULL
};
-static ssize_t sc2vl_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct hfi1_sc2vl_attr *sattr =
- container_of(attr, struct hfi1_sc2vl_attr, attr);
- struct hfi1_pportdata *ppd =
- container_of(kobj, struct hfi1_pportdata, sc2vl_kobj);
- struct hfi1_devdata *dd = ppd->dd;
-
- return sprintf(buf, "%u\n", *((u8 *)dd->sc2vl + sattr->sc));
-}
-
-static const struct sysfs_ops hfi1_sc2vl_ops = {
- .show = sc2vl_attr_show,
+static const struct attribute_group port_sc2vl_group = {
+ .name = "sc2vl",
+ .attrs = port_sc2vl_attributes,
};
-
-static struct kobj_type hfi1_sc2vl_ktype = {
- .release = port_release,
- .sysfs_ops = &hfi1_sc2vl_ops,
- .default_attrs = sc2vl_default_attributes
-};
-
/* End sc2vl */
/* Start sl2sc */
-#define HFI1_SL2SC_ATTR(N) \
- static struct hfi1_sl2sc_attr hfi1_sl2sc_attr_##N = { \
- .attr = { .name = __stringify(N), .mode = 0444 }, \
- .sl = N \
- }
-
struct hfi1_sl2sc_attr {
- struct attribute attr;
+ struct ib_port_attribute attr;
int sl;
};
+static ssize_t sl2sc_attr_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
+{
+ struct hfi1_sl2sc_attr *sattr =
+ container_of(attr, struct hfi1_sl2sc_attr, attr);
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ struct hfi1_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
+
+ return sysfs_emit(buf, "%u\n", ibp->sl_to_sc[sattr->sl]);
+}
+
+#define HFI1_SL2SC_ATTR(N) \
+ static struct hfi1_sl2sc_attr hfi1_sl2sc_attr_##N = { \
+ .attr = __ATTR(N, 0444, sl2sc_attr_show, NULL), .sl = N \
+ }
+
HFI1_SL2SC_ATTR(0);
HFI1_SL2SC_ATTR(1);
HFI1_SL2SC_ATTR(2);
@@ -355,79 +289,72 @@ HFI1_SL2SC_ATTR(29);
HFI1_SL2SC_ATTR(30);
HFI1_SL2SC_ATTR(31);
-static struct attribute *sl2sc_default_attributes[] = {
- &hfi1_sl2sc_attr_0.attr,
- &hfi1_sl2sc_attr_1.attr,
- &hfi1_sl2sc_attr_2.attr,
- &hfi1_sl2sc_attr_3.attr,
- &hfi1_sl2sc_attr_4.attr,
- &hfi1_sl2sc_attr_5.attr,
- &hfi1_sl2sc_attr_6.attr,
- &hfi1_sl2sc_attr_7.attr,
- &hfi1_sl2sc_attr_8.attr,
- &hfi1_sl2sc_attr_9.attr,
- &hfi1_sl2sc_attr_10.attr,
- &hfi1_sl2sc_attr_11.attr,
- &hfi1_sl2sc_attr_12.attr,
- &hfi1_sl2sc_attr_13.attr,
- &hfi1_sl2sc_attr_14.attr,
- &hfi1_sl2sc_attr_15.attr,
- &hfi1_sl2sc_attr_16.attr,
- &hfi1_sl2sc_attr_17.attr,
- &hfi1_sl2sc_attr_18.attr,
- &hfi1_sl2sc_attr_19.attr,
- &hfi1_sl2sc_attr_20.attr,
- &hfi1_sl2sc_attr_21.attr,
- &hfi1_sl2sc_attr_22.attr,
- &hfi1_sl2sc_attr_23.attr,
- &hfi1_sl2sc_attr_24.attr,
- &hfi1_sl2sc_attr_25.attr,
- &hfi1_sl2sc_attr_26.attr,
- &hfi1_sl2sc_attr_27.attr,
- &hfi1_sl2sc_attr_28.attr,
- &hfi1_sl2sc_attr_29.attr,
- &hfi1_sl2sc_attr_30.attr,
- &hfi1_sl2sc_attr_31.attr,
+static struct attribute *port_sl2sc_attributes[] = {
+ &hfi1_sl2sc_attr_0.attr.attr,
+ &hfi1_sl2sc_attr_1.attr.attr,
+ &hfi1_sl2sc_attr_2.attr.attr,
+ &hfi1_sl2sc_attr_3.attr.attr,
+ &hfi1_sl2sc_attr_4.attr.attr,
+ &hfi1_sl2sc_attr_5.attr.attr,
+ &hfi1_sl2sc_attr_6.attr.attr,
+ &hfi1_sl2sc_attr_7.attr.attr,
+ &hfi1_sl2sc_attr_8.attr.attr,
+ &hfi1_sl2sc_attr_9.attr.attr,
+ &hfi1_sl2sc_attr_10.attr.attr,
+ &hfi1_sl2sc_attr_11.attr.attr,
+ &hfi1_sl2sc_attr_12.attr.attr,
+ &hfi1_sl2sc_attr_13.attr.attr,
+ &hfi1_sl2sc_attr_14.attr.attr,
+ &hfi1_sl2sc_attr_15.attr.attr,
+ &hfi1_sl2sc_attr_16.attr.attr,
+ &hfi1_sl2sc_attr_17.attr.attr,
+ &hfi1_sl2sc_attr_18.attr.attr,
+ &hfi1_sl2sc_attr_19.attr.attr,
+ &hfi1_sl2sc_attr_20.attr.attr,
+ &hfi1_sl2sc_attr_21.attr.attr,
+ &hfi1_sl2sc_attr_22.attr.attr,
+ &hfi1_sl2sc_attr_23.attr.attr,
+ &hfi1_sl2sc_attr_24.attr.attr,
+ &hfi1_sl2sc_attr_25.attr.attr,
+ &hfi1_sl2sc_attr_26.attr.attr,
+ &hfi1_sl2sc_attr_27.attr.attr,
+ &hfi1_sl2sc_attr_28.attr.attr,
+ &hfi1_sl2sc_attr_29.attr.attr,
+ &hfi1_sl2sc_attr_30.attr.attr,
+ &hfi1_sl2sc_attr_31.attr.attr,
NULL
};
-static ssize_t sl2sc_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct hfi1_sl2sc_attr *sattr =
- container_of(attr, struct hfi1_sl2sc_attr, attr);
- struct hfi1_pportdata *ppd =
- container_of(kobj, struct hfi1_pportdata, sl2sc_kobj);
- struct hfi1_ibport *ibp = &ppd->ibport_data;
-
- return sprintf(buf, "%u\n", ibp->sl_to_sc[sattr->sl]);
-}
-
-static const struct sysfs_ops hfi1_sl2sc_ops = {
- .show = sl2sc_attr_show,
-};
-
-static struct kobj_type hfi1_sl2sc_ktype = {
- .release = port_release,
- .sysfs_ops = &hfi1_sl2sc_ops,
- .default_attrs = sl2sc_default_attributes
+static const struct attribute_group port_sl2sc_group = {
+ .name = "sl2sc",
+ .attrs = port_sl2sc_attributes,
};
/* End sl2sc */
/* Start vl2mtu */
-#define HFI1_VL2MTU_ATTR(N) \
- static struct hfi1_vl2mtu_attr hfi1_vl2mtu_attr_##N = { \
- .attr = { .name = __stringify(N), .mode = 0444 }, \
- .vl = N \
- }
-
struct hfi1_vl2mtu_attr {
- struct attribute attr;
+ struct ib_port_attribute attr;
int vl;
};
+static ssize_t vl2mtu_attr_show(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_attribute *attr, char *buf)
+{
+ struct hfi1_vl2mtu_attr *vlattr =
+ container_of(attr, struct hfi1_vl2mtu_attr, attr);
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+
+ return sysfs_emit(buf, "%u\n", dd->vld[vlattr->vl].mtu);
+}
+
+#define HFI1_VL2MTU_ATTR(N) \
+ static struct hfi1_vl2mtu_attr hfi1_vl2mtu_attr_##N = { \
+ .attr = __ATTR(N, 0444, vl2mtu_attr_show, NULL), \
+ .vl = N, \
+ }
+
HFI1_VL2MTU_ATTR(0);
HFI1_VL2MTU_ATTR(1);
HFI1_VL2MTU_ATTR(2);
@@ -445,46 +372,29 @@ HFI1_VL2MTU_ATTR(13);
HFI1_VL2MTU_ATTR(14);
HFI1_VL2MTU_ATTR(15);
-static struct attribute *vl2mtu_default_attributes[] = {
- &hfi1_vl2mtu_attr_0.attr,
- &hfi1_vl2mtu_attr_1.attr,
- &hfi1_vl2mtu_attr_2.attr,
- &hfi1_vl2mtu_attr_3.attr,
- &hfi1_vl2mtu_attr_4.attr,
- &hfi1_vl2mtu_attr_5.attr,
- &hfi1_vl2mtu_attr_6.attr,
- &hfi1_vl2mtu_attr_7.attr,
- &hfi1_vl2mtu_attr_8.attr,
- &hfi1_vl2mtu_attr_9.attr,
- &hfi1_vl2mtu_attr_10.attr,
- &hfi1_vl2mtu_attr_11.attr,
- &hfi1_vl2mtu_attr_12.attr,
- &hfi1_vl2mtu_attr_13.attr,
- &hfi1_vl2mtu_attr_14.attr,
- &hfi1_vl2mtu_attr_15.attr,
+static struct attribute *port_vl2mtu_attributes[] = {
+ &hfi1_vl2mtu_attr_0.attr.attr,
+ &hfi1_vl2mtu_attr_1.attr.attr,
+ &hfi1_vl2mtu_attr_2.attr.attr,
+ &hfi1_vl2mtu_attr_3.attr.attr,
+ &hfi1_vl2mtu_attr_4.attr.attr,
+ &hfi1_vl2mtu_attr_5.attr.attr,
+ &hfi1_vl2mtu_attr_6.attr.attr,
+ &hfi1_vl2mtu_attr_7.attr.attr,
+ &hfi1_vl2mtu_attr_8.attr.attr,
+ &hfi1_vl2mtu_attr_9.attr.attr,
+ &hfi1_vl2mtu_attr_10.attr.attr,
+ &hfi1_vl2mtu_attr_11.attr.attr,
+ &hfi1_vl2mtu_attr_12.attr.attr,
+ &hfi1_vl2mtu_attr_13.attr.attr,
+ &hfi1_vl2mtu_attr_14.attr.attr,
+ &hfi1_vl2mtu_attr_15.attr.attr,
NULL
};
-static ssize_t vl2mtu_attr_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
-{
- struct hfi1_vl2mtu_attr *vlattr =
- container_of(attr, struct hfi1_vl2mtu_attr, attr);
- struct hfi1_pportdata *ppd =
- container_of(kobj, struct hfi1_pportdata, vl2mtu_kobj);
- struct hfi1_devdata *dd = ppd->dd;
-
- return sprintf(buf, "%u\n", dd->vld[vlattr->vl].mtu);
-}
-
-static const struct sysfs_ops hfi1_vl2mtu_ops = {
- .show = vl2mtu_attr_show,
-};
-
-static struct kobj_type hfi1_vl2mtu_ktype = {
- .release = port_release,
- .sysfs_ops = &hfi1_vl2mtu_ops,
- .default_attrs = vl2mtu_default_attributes
+static const struct attribute_group port_vl2mtu_group = {
+ .name = "vl2mtu",
+ .attrs = port_vl2mtu_attributes,
};
/* end of per-port file structures and support code */
@@ -493,46 +403,47 @@ static struct kobj_type hfi1_vl2mtu_ktype = {
* Start of per-unit (or driver, in some cases, but replicated
* per unit) functions (these get a device *)
*/
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+ rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
- return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
+ return sysfs_emit(buf, "%x\n", dd_from_dev(dev)->minrev);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+ rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
- int ret;
if (!dd->boardname)
- ret = -EINVAL;
- else
- ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
- return ret;
+ return -EINVAL;
+
+ return sysfs_emit(buf, "%s\n", dd->boardname);
}
+static DEVICE_ATTR_RO(board_id);
-static ssize_t show_boardversion(struct device *device,
+static ssize_t boardversion_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+ rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
/* The string printed here is already newline-terminated. */
- return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
+ return sysfs_emit(buf, "%s", dd->boardversion);
}
+static DEVICE_ATTR_RO(boardversion);
-static ssize_t show_nctxts(struct device *device,
+static ssize_t nctxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+ rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
/*
@@ -541,38 +452,42 @@ static ssize_t show_nctxts(struct device *device,
* and a receive context, so returning the smaller of the two counts
* give a more accurate picture of total contexts available.
*/
- return scnprintf(buf, PAGE_SIZE, "%u\n",
- min(dd->num_rcv_contexts - dd->first_user_ctxt,
- (u32)dd->sc_sizes[SC_USER].count));
+ return sysfs_emit(buf, "%u\n",
+ min(dd->num_user_contexts,
+ (u32)dd->sc_sizes[SC_USER].count));
}
+static DEVICE_ATTR_RO(nctxts);
-static ssize_t show_nfreectxts(struct device *device,
+static ssize_t nfreectxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+ rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
/* Return the number of free user ports (contexts) available. */
- return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
+ return sysfs_emit(buf, "%u\n", dd->freectxts);
}
+static DEVICE_ATTR_RO(nfreectxts);
-static ssize_t show_serial(struct device *device,
+static ssize_t serial_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+ rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
- return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
+ /* dd->serial is already newline terminated in chip.c */
+ return sysfs_emit(buf, "%s", dd->serial);
}
+static DEVICE_ATTR_RO(serial);
-static ssize_t store_chip_reset(struct device *device,
+static ssize_t chip_reset_store(struct device *device,
struct device_attribute *attr, const char *buf,
size_t count)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+ rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
int ret;
@@ -585,42 +500,41 @@ static ssize_t store_chip_reset(struct device *device,
bail:
return ret < 0 ? ret : count;
}
+static DEVICE_ATTR_WO(chip_reset);
/*
* Convert the reported temperature from an integer (reported in
* units of 0.25C) to a floating point number.
*/
-#define temp2str(temp, buf, size, idx) \
- scnprintf((buf) + (idx), (size) - (idx), "%u.%02u ", \
- ((temp) >> 2), ((temp) & 0x3) * 25)
+#define temp_d(t) ((t) >> 2)
+#define temp_f(t) (((t)&0x3) * 25u)
/*
* Dump tempsense values, in decimal, to ease shell-scripts.
*/
-static ssize_t show_tempsense(struct device *device,
+static ssize_t tempsense_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
- container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+ rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
struct hfi1_devdata *dd = dd_from_dev(dev);
struct hfi1_temp temp;
int ret;
ret = hfi1_tempsense_rd(dd, &temp);
- if (!ret) {
- int idx = 0;
-
- idx += temp2str(temp.curr, buf, PAGE_SIZE, idx);
- idx += temp2str(temp.lo_lim, buf, PAGE_SIZE, idx);
- idx += temp2str(temp.hi_lim, buf, PAGE_SIZE, idx);
- idx += temp2str(temp.crit_lim, buf, PAGE_SIZE, idx);
- idx += scnprintf(buf + idx, PAGE_SIZE - idx,
- "%u %u %u\n", temp.triggers & 0x1,
- temp.triggers & 0x2, temp.triggers & 0x4);
- ret = idx;
- }
- return ret;
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%u.%02u %u.%02u %u.%02u %u.%02u %u %u %u\n",
+ temp_d(temp.curr), temp_f(temp.curr),
+ temp_d(temp.lo_lim), temp_f(temp.lo_lim),
+ temp_d(temp.hi_lim), temp_f(temp.hi_lim),
+ temp_d(temp.crit_lim), temp_f(temp.crit_lim),
+ temp.triggers & 0x1,
+ temp.triggers & 0x2,
+ temp.triggers & 0x4);
}
+static DEVICE_ATTR_RO(tempsense);
/*
* end of per-unit (or driver, in some cases, but replicated
@@ -628,118 +542,29 @@ static ssize_t show_tempsense(struct device *device,
*/
/* start of per-unit file structures and support code */
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_hfi, NULL);
-static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL);
-static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL);
-static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
-static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
-static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
-static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
-
-static struct device_attribute *hfi1_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_board_id,
- &dev_attr_nctxts,
- &dev_attr_nfreectxts,
- &dev_attr_serial,
- &dev_attr_boardversion,
- &dev_attr_tempsense,
- &dev_attr_chip_reset,
+static struct attribute *hfi1_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_board_id.attr,
+ &dev_attr_nctxts.attr,
+ &dev_attr_nfreectxts.attr,
+ &dev_attr_serial.attr,
+ &dev_attr_boardversion.attr,
+ &dev_attr_tempsense.attr,
+ &dev_attr_chip_reset.attr,
+ NULL,
};
-int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
- struct kobject *kobj)
-{
- struct hfi1_pportdata *ppd;
- struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
- int ret;
-
- if (!port_num || port_num > dd->num_pports) {
- dd_dev_err(dd,
- "Skipping infiniband class with invalid port %u\n",
- port_num);
- return -ENODEV;
- }
- ppd = &dd->pport[port_num - 1];
-
- ret = kobject_init_and_add(&ppd->sc2vl_kobj, &hfi1_sc2vl_ktype, kobj,
- "sc2vl");
- if (ret) {
- dd_dev_err(dd,
- "Skipping sc2vl sysfs info, (err %d) port %u\n",
- ret, port_num);
- goto bail;
- }
- kobject_uevent(&ppd->sc2vl_kobj, KOBJ_ADD);
-
- ret = kobject_init_and_add(&ppd->sl2sc_kobj, &hfi1_sl2sc_ktype, kobj,
- "sl2sc");
- if (ret) {
- dd_dev_err(dd,
- "Skipping sl2sc sysfs info, (err %d) port %u\n",
- ret, port_num);
- goto bail_sc2vl;
- }
- kobject_uevent(&ppd->sl2sc_kobj, KOBJ_ADD);
-
- ret = kobject_init_and_add(&ppd->vl2mtu_kobj, &hfi1_vl2mtu_ktype, kobj,
- "vl2mtu");
- if (ret) {
- dd_dev_err(dd,
- "Skipping vl2mtu sysfs info, (err %d) port %u\n",
- ret, port_num);
- goto bail_sl2sc;
- }
- kobject_uevent(&ppd->vl2mtu_kobj, KOBJ_ADD);
-
- ret = kobject_init_and_add(&ppd->pport_cc_kobj, &port_cc_ktype,
- kobj, "CCMgtA");
- if (ret) {
- dd_dev_err(dd,
- "Skipping Congestion Control sysfs info, (err %d) port %u\n",
- ret, port_num);
- goto bail_vl2mtu;
- }
-
- kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
-
- ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr);
- if (ret) {
- dd_dev_err(dd,
- "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
- ret, port_num);
- goto bail_cc;
- }
-
- ret = sysfs_create_bin_file(&ppd->pport_cc_kobj, &cc_table_bin_attr);
- if (ret) {
- dd_dev_err(dd,
- "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
- ret, port_num);
- goto bail_cc_entry_bin;
- }
-
- dd_dev_info(dd,
- "Congestion Control Agent enabled for port %d\n",
- port_num);
-
- return 0;
+const struct attribute_group ib_hfi1_attr_group = {
+ .attrs = hfi1_attributes,
+};
-bail_cc_entry_bin:
- sysfs_remove_bin_file(&ppd->pport_cc_kobj,
- &cc_setting_bin_attr);
-bail_cc:
- kobject_put(&ppd->pport_cc_kobj);
-bail_vl2mtu:
- kobject_put(&ppd->vl2mtu_kobj);
-bail_sl2sc:
- kobject_put(&ppd->sl2sc_kobj);
-bail_sc2vl:
- kobject_put(&ppd->sc2vl_kobj);
-bail:
- return ret;
-}
+const struct attribute_group *hfi1_attr_port_groups[] = {
+ &port_cc_group,
+ &port_sc2vl_group,
+ &port_sl2sc_group,
+ &port_vl2mtu_group,
+ NULL,
+};
struct sde_attribute {
struct attribute attr;
@@ -809,7 +634,7 @@ static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf)
if (vl < 0)
return vl;
- return snprintf(buf, PAGE_SIZE, "%d\n", vl);
+ return sysfs_emit(buf, "%d\n", vl);
}
static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO,
@@ -831,12 +656,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
struct device *class_dev = &dev->dev;
int i, j, ret;
- for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
- ret = device_create_file(&dev->dev, hfi1_attributes[i]);
- if (ret)
- goto bail;
- }
-
for (i = 0; i < dd->num_sdma; i++) {
ret = kobject_init_and_add(&dd->per_sdma[i].kobj,
&sde_ktype, &class_dev->kobj,
@@ -854,11 +673,13 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
return 0;
bail:
- for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
- device_remove_file(&dev->dev, hfi1_attributes[i]);
-
- for (i = 0; i < dd->num_sdma; i++)
- kobject_del(&dd->per_sdma[i].kobj);
+ /*
+ * The function kobject_put() will call kobject_del() if the kobject
+ * has been added successfully. The sysfs files created under the
+ * kobject directory will also be removed during the process.
+ */
+ for (; i >= 0; i--)
+ kobject_put(&dd->per_sdma[i].kobj);
return ret;
}
@@ -868,19 +689,9 @@ bail:
*/
void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd)
{
- struct hfi1_pportdata *ppd;
int i;
- for (i = 0; i < dd->num_pports; i++) {
- ppd = &dd->pport[i];
-
- sysfs_remove_bin_file(&ppd->pport_cc_kobj,
- &cc_setting_bin_attr);
- sysfs_remove_bin_file(&ppd->pport_cc_kobj,
- &cc_table_bin_attr);
- kobject_put(&ppd->pport_cc_kobj);
- kobject_put(&ppd->vl2mtu_kobj);
- kobject_put(&ppd->sl2sc_kobj);
- kobject_put(&ppd->sc2vl_kobj);
- }
+ /* Unwind operations in hfi1_verbs_register_sysfs() */
+ for (i = 0; i < dd->num_sdma; i++)
+ kobject_put(&dd->per_sdma[i].kobj);
}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
new file mode 100644
index 000000000000..eafd2f157e32
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -0,0 +1,5534 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 - 2020 Intel Corporation.
+ *
+ */
+
+#include "hfi.h"
+#include "qp.h"
+#include "rc.h"
+#include "verbs.h"
+#include "tid_rdma.h"
+#include "exp_rcv.h"
+#include "trace.h"
+
+/**
+ * DOC: TID RDMA READ protocol
+ *
+ * This is an end-to-end protocol at the hfi1 level between two nodes that
+ * improves performance by avoiding data copy on the requester side. It
+ * converts a qualified RDMA READ request into a TID RDMA READ request on
+ * the requester side and thereafter handles the request and response
+ * differently. To be qualified, the RDMA READ request should meet the
+ * following:
+ * -- The total data length should be greater than 256K;
+ * -- The total data length should be a multiple of 4K page size;
+ * -- Each local scatter-gather entry should be 4K page aligned;
+ * -- Each local scatter-gather entry should be a multiple of 4K page size;
+ */
+
+#define RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK BIT_ULL(32)
+#define RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK BIT_ULL(33)
+#define RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK BIT_ULL(34)
+#define RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK BIT_ULL(35)
+#define RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK BIT_ULL(37)
+#define RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK BIT_ULL(38)
+
+/* Maximum number of packets within a flow generation. */
+#define MAX_TID_FLOW_PSN BIT(HFI1_KDETH_BTH_SEQ_SHIFT)
+
+#define GENERATION_MASK 0xFFFFF
+
+static u32 mask_generation(u32 a)
+{
+ return a & GENERATION_MASK;
+}
+
+/* Reserved generation value to set to unused flows for kernel contexts */
+#define KERN_GENERATION_RESERVED mask_generation(U32_MAX)
+
+/*
+ * J_KEY for kernel contexts when TID RDMA is used.
+ * See generate_jkey() in hfi.h for more information.
+ */
+#define TID_RDMA_JKEY 32
+#define HFI1_KERNEL_MIN_JKEY HFI1_ADMIN_JKEY_RANGE
+#define HFI1_KERNEL_MAX_JKEY (2 * HFI1_ADMIN_JKEY_RANGE - 1)
+
+/* Maximum number of segments in flight per QP request. */
+#define TID_RDMA_MAX_READ_SEGS_PER_REQ 6
+#define TID_RDMA_MAX_WRITE_SEGS_PER_REQ 4
+#define MAX_REQ max_t(u16, TID_RDMA_MAX_READ_SEGS_PER_REQ, \
+ TID_RDMA_MAX_WRITE_SEGS_PER_REQ)
+#define MAX_FLOWS roundup_pow_of_two(MAX_REQ + 1)
+
+#define MAX_EXPECTED_PAGES (MAX_EXPECTED_BUFFER / PAGE_SIZE)
+
+#define TID_RDMA_DESTQP_FLOW_SHIFT 11
+#define TID_RDMA_DESTQP_FLOW_MASK 0x1f
+
+#define TID_OPFN_QP_CTXT_MASK 0xff
+#define TID_OPFN_QP_CTXT_SHIFT 56
+#define TID_OPFN_QP_KDETH_MASK 0xff
+#define TID_OPFN_QP_KDETH_SHIFT 48
+#define TID_OPFN_MAX_LEN_MASK 0x7ff
+#define TID_OPFN_MAX_LEN_SHIFT 37
+#define TID_OPFN_TIMEOUT_MASK 0x1f
+#define TID_OPFN_TIMEOUT_SHIFT 32
+#define TID_OPFN_RESERVED_MASK 0x3f
+#define TID_OPFN_RESERVED_SHIFT 26
+#define TID_OPFN_URG_MASK 0x1
+#define TID_OPFN_URG_SHIFT 25
+#define TID_OPFN_VER_MASK 0x7
+#define TID_OPFN_VER_SHIFT 22
+#define TID_OPFN_JKEY_MASK 0x3f
+#define TID_OPFN_JKEY_SHIFT 16
+#define TID_OPFN_MAX_READ_MASK 0x3f
+#define TID_OPFN_MAX_READ_SHIFT 10
+#define TID_OPFN_MAX_WRITE_MASK 0x3f
+#define TID_OPFN_MAX_WRITE_SHIFT 4
+
+/*
+ * OPFN TID layout
+ *
+ * 63 47 31 15
+ * NNNNNNNNKKKKKKKK MMMMMMMMMMMTTTTT DDDDDDUVVVJJJJJJ RRRRRRWWWWWWCCCC
+ * 3210987654321098 7654321098765432 1098765432109876 5432109876543210
+ * N - the context Number
+ * K - the Kdeth_qp
+ * M - Max_len
+ * T - Timeout
+ * D - reserveD
+ * V - version
+ * U - Urg capable
+ * J - Jkey
+ * R - max_Read
+ * W - max_Write
+ * C - Capcode
+ */
+
+static void tid_rdma_trigger_resume(struct work_struct *work);
+static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
+static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
+ gfp_t gfp);
+static void hfi1_init_trdma_req(struct rvt_qp *qp,
+ struct tid_rdma_request *req);
+static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx);
+static void hfi1_tid_timeout(struct timer_list *t);
+static void hfi1_add_tid_reap_timer(struct rvt_qp *qp);
+static void hfi1_mod_tid_reap_timer(struct rvt_qp *qp);
+static void hfi1_mod_tid_retry_timer(struct rvt_qp *qp);
+static int hfi1_stop_tid_retry_timer(struct rvt_qp *qp);
+static void hfi1_tid_retry_timeout(struct timer_list *t);
+static int make_tid_rdma_ack(struct rvt_qp *qp,
+ struct ib_other_headers *ohdr,
+ struct hfi1_pkt_state *ps);
+static void hfi1_do_tid_send(struct rvt_qp *qp);
+static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx);
+static void tid_rdma_rcv_err(struct hfi1_packet *packet,
+ struct ib_other_headers *ohdr,
+ struct rvt_qp *qp, u32 psn, int diff, bool fecn);
+static void update_r_next_psn_fecn(struct hfi1_packet *packet,
+ struct hfi1_qp_priv *priv,
+ struct hfi1_ctxtdata *rcd,
+ struct tid_rdma_flow *flow,
+ bool fecn);
+
+static void validate_r_tid_ack(struct hfi1_qp_priv *priv)
+{
+ if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
+ priv->r_tid_ack = priv->r_tid_tail;
+}
+
+static void tid_rdma_schedule_ack(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ priv->s_flags |= RVT_S_ACK_PENDING;
+ hfi1_schedule_tid_send(qp);
+}
+
+static void tid_rdma_trigger_ack(struct rvt_qp *qp)
+{
+ validate_r_tid_ack(qp->priv);
+ tid_rdma_schedule_ack(qp);
+}
+
+static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
+{
+ return
+ (((u64)p->qp & TID_OPFN_QP_CTXT_MASK) <<
+ TID_OPFN_QP_CTXT_SHIFT) |
+ ((((u64)p->qp >> 16) & TID_OPFN_QP_KDETH_MASK) <<
+ TID_OPFN_QP_KDETH_SHIFT) |
+ (((u64)((p->max_len >> PAGE_SHIFT) - 1) &
+ TID_OPFN_MAX_LEN_MASK) << TID_OPFN_MAX_LEN_SHIFT) |
+ (((u64)p->timeout & TID_OPFN_TIMEOUT_MASK) <<
+ TID_OPFN_TIMEOUT_SHIFT) |
+ (((u64)p->urg & TID_OPFN_URG_MASK) << TID_OPFN_URG_SHIFT) |
+ (((u64)p->jkey & TID_OPFN_JKEY_MASK) << TID_OPFN_JKEY_SHIFT) |
+ (((u64)p->max_read & TID_OPFN_MAX_READ_MASK) <<
+ TID_OPFN_MAX_READ_SHIFT) |
+ (((u64)p->max_write & TID_OPFN_MAX_WRITE_MASK) <<
+ TID_OPFN_MAX_WRITE_SHIFT);
+}
+
+static void tid_rdma_opfn_decode(struct tid_rdma_params *p, u64 data)
+{
+ p->max_len = (((data >> TID_OPFN_MAX_LEN_SHIFT) &
+ TID_OPFN_MAX_LEN_MASK) + 1) << PAGE_SHIFT;
+ p->jkey = (data >> TID_OPFN_JKEY_SHIFT) & TID_OPFN_JKEY_MASK;
+ p->max_write = (data >> TID_OPFN_MAX_WRITE_SHIFT) &
+ TID_OPFN_MAX_WRITE_MASK;
+ p->max_read = (data >> TID_OPFN_MAX_READ_SHIFT) &
+ TID_OPFN_MAX_READ_MASK;
+ p->qp =
+ ((((data >> TID_OPFN_QP_KDETH_SHIFT) & TID_OPFN_QP_KDETH_MASK)
+ << 16) |
+ ((data >> TID_OPFN_QP_CTXT_SHIFT) & TID_OPFN_QP_CTXT_MASK));
+ p->urg = (data >> TID_OPFN_URG_SHIFT) & TID_OPFN_URG_MASK;
+ p->timeout = (data >> TID_OPFN_TIMEOUT_SHIFT) & TID_OPFN_TIMEOUT_MASK;
+}
+
+void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ p->qp = (RVT_KDETH_QP_PREFIX << 16) | priv->rcd->ctxt;
+ p->max_len = TID_RDMA_MAX_SEGMENT_SIZE;
+ p->jkey = priv->rcd->jkey;
+ p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ;
+ p->max_write = TID_RDMA_MAX_WRITE_SEGS_PER_REQ;
+ p->timeout = qp->timeout;
+ p->urg = is_urg_masked(priv->rcd);
+}
+
+bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ *data = tid_rdma_opfn_encode(&priv->tid_rdma.local);
+ return true;
+}
+
+bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct tid_rdma_params *remote, *old;
+ bool ret = true;
+
+ old = rcu_dereference_protected(priv->tid_rdma.remote,
+ lockdep_is_held(&priv->opfn.lock));
+ data &= ~0xfULL;
+ /*
+ * If data passed in is zero, return true so as not to continue the
+ * negotiation process
+ */
+ if (!data || !HFI1_CAP_IS_KSET(TID_RDMA))
+ goto null;
+ /*
+ * If kzalloc fails, return false. This will result in:
+ * * at the requester a new OPFN request being generated to retry
+ * the negotiation
+ * * at the responder, 0 being returned to the requester so as to
+ * disable TID RDMA at both the requester and the responder
+ */
+ remote = kzalloc(sizeof(*remote), GFP_ATOMIC);
+ if (!remote) {
+ ret = false;
+ goto null;
+ }
+
+ tid_rdma_opfn_decode(remote, data);
+ priv->tid_timer_timeout_jiffies =
+ usecs_to_jiffies((((4096UL * (1UL << remote->timeout)) /
+ 1000UL) << 3) * 7);
+ trace_hfi1_opfn_param(qp, 0, &priv->tid_rdma.local);
+ trace_hfi1_opfn_param(qp, 1, remote);
+ rcu_assign_pointer(priv->tid_rdma.remote, remote);
+ /*
+ * A TID RDMA READ request's segment size is not equal to
+ * remote->max_len only when the request's data length is smaller
+ * than remote->max_len. In that case, there will be only one segment.
+ * Therefore, when priv->pkts_ps is used to calculate req->cur_seg
+ * during retry, it will lead to req->cur_seg = 0, which is exactly
+ * what is expected.
+ */
+ priv->pkts_ps = (u16)rvt_div_mtu(qp, remote->max_len);
+ priv->timeout_shift = ilog2(priv->pkts_ps - 1) + 1;
+ goto free;
+null:
+ RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
+ priv->timeout_shift = 0;
+free:
+ if (old)
+ kfree_rcu(old, rcu_head);
+ return ret;
+}
+
+bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data)
+{
+ bool ret;
+
+ ret = tid_rdma_conn_reply(qp, *data);
+ *data = 0;
+ /*
+ * If tid_rdma_conn_reply() returns error, set *data as 0 to indicate
+ * TID RDMA could not be enabled. This will result in TID RDMA being
+ * disabled at the requester too.
+ */
+ if (ret)
+ (void)tid_rdma_conn_req(qp, data);
+ return ret;
+}
+
+void tid_rdma_conn_error(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct tid_rdma_params *old;
+
+ old = rcu_dereference_protected(priv->tid_rdma.remote,
+ lockdep_is_held(&priv->opfn.lock));
+ RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
+ if (old)
+ kfree_rcu(old, rcu_head);
+}
+
+/* This is called at context initialization time */
+int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
+{
+ if (reinit)
+ return 0;
+
+ BUILD_BUG_ON(TID_RDMA_JKEY < HFI1_KERNEL_MIN_JKEY);
+ BUILD_BUG_ON(TID_RDMA_JKEY > HFI1_KERNEL_MAX_JKEY);
+ rcd->jkey = TID_RDMA_JKEY;
+ hfi1_set_ctxt_jkey(rcd->dd, rcd, rcd->jkey);
+ return hfi1_alloc_ctxt_rcv_groups(rcd);
+}
+
+/**
+ * qp_to_rcd - determine the receive context used by a qp
+ * @rdi: rvt dev struct
+ * @qp: the qp
+ *
+ * This routine returns the receive context associated
+ * with a a qp's qpn.
+ *
+ * Return: the context.
+ */
+static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
+ struct rvt_qp *qp)
+{
+ struct hfi1_ibdev *verbs_dev = container_of(rdi,
+ struct hfi1_ibdev,
+ rdi);
+ struct hfi1_devdata *dd = container_of(verbs_dev,
+ struct hfi1_devdata,
+ verbs_dev);
+ unsigned int ctxt;
+
+ if (qp->ibqp.qp_num == 0)
+ ctxt = 0;
+ else
+ ctxt = hfi1_get_qp_map(dd, qp->ibqp.qp_num >> dd->qos_shift);
+ return dd->rcd[ctxt];
+}
+
+int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ int i, ret;
+
+ qpriv->rcd = qp_to_rcd(rdi, qp);
+
+ spin_lock_init(&qpriv->opfn.lock);
+ INIT_WORK(&qpriv->opfn.opfn_work, opfn_send_conn_request);
+ INIT_WORK(&qpriv->tid_rdma.trigger_work, tid_rdma_trigger_resume);
+ qpriv->flow_state.psn = 0;
+ qpriv->flow_state.index = RXE_NUM_TID_FLOWS;
+ qpriv->flow_state.last_index = RXE_NUM_TID_FLOWS;
+ qpriv->flow_state.generation = KERN_GENERATION_RESERVED;
+ qpriv->s_state = TID_OP(WRITE_RESP);
+ qpriv->s_tid_cur = HFI1_QP_WQE_INVALID;
+ qpriv->s_tid_head = HFI1_QP_WQE_INVALID;
+ qpriv->s_tid_tail = HFI1_QP_WQE_INVALID;
+ qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
+ qpriv->r_tid_head = HFI1_QP_WQE_INVALID;
+ qpriv->r_tid_tail = HFI1_QP_WQE_INVALID;
+ qpriv->r_tid_ack = HFI1_QP_WQE_INVALID;
+ qpriv->r_tid_alloc = HFI1_QP_WQE_INVALID;
+ atomic_set(&qpriv->n_requests, 0);
+ atomic_set(&qpriv->n_tid_requests, 0);
+ timer_setup(&qpriv->s_tid_timer, hfi1_tid_timeout, 0);
+ timer_setup(&qpriv->s_tid_retry_timer, hfi1_tid_retry_timeout, 0);
+ INIT_LIST_HEAD(&qpriv->tid_wait);
+
+ if (init_attr->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
+ struct hfi1_devdata *dd = qpriv->rcd->dd;
+
+ qpriv->pages = kzalloc_node(TID_RDMA_MAX_PAGES *
+ sizeof(*qpriv->pages),
+ GFP_KERNEL, dd->node);
+ if (!qpriv->pages)
+ return -ENOMEM;
+ for (i = 0; i < qp->s_size; i++) {
+ struct hfi1_swqe_priv *priv;
+ struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
+
+ priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
+ dd->node);
+ if (!priv)
+ return -ENOMEM;
+
+ hfi1_init_trdma_req(qp, &priv->tid_req);
+ priv->tid_req.e.swqe = wqe;
+ wqe->priv = priv;
+ }
+ for (i = 0; i < rvt_max_atomic(rdi); i++) {
+ struct hfi1_ack_priv *priv;
+
+ priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
+ dd->node);
+ if (!priv)
+ return -ENOMEM;
+
+ hfi1_init_trdma_req(qp, &priv->tid_req);
+ priv->tid_req.e.ack = &qp->s_ack_queue[i];
+
+ ret = hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req,
+ GFP_KERNEL);
+ if (ret) {
+ kfree(priv);
+ return ret;
+ }
+ qp->s_ack_queue[i].priv = priv;
+ }
+ }
+
+ return 0;
+}
+
+void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct rvt_swqe *wqe;
+ u32 i;
+
+ if (qp->ibqp.qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
+ for (i = 0; i < qp->s_size; i++) {
+ wqe = rvt_get_swqe_ptr(qp, i);
+ kfree(wqe->priv);
+ wqe->priv = NULL;
+ }
+ for (i = 0; i < rvt_max_atomic(rdi); i++) {
+ struct hfi1_ack_priv *priv = qp->s_ack_queue[i].priv;
+
+ if (priv)
+ hfi1_kern_exp_rcv_free_flows(&priv->tid_req);
+ kfree(priv);
+ qp->s_ack_queue[i].priv = NULL;
+ }
+ cancel_work_sync(&qpriv->opfn.opfn_work);
+ kfree(qpriv->pages);
+ qpriv->pages = NULL;
+ }
+}
+
+/* Flow and tid waiter functions */
+/**
+ * DOC: lock ordering
+ *
+ * There are two locks involved with the queuing
+ * routines: the qp s_lock and the exp_lock.
+ *
+ * Since the tid space allocation is called from
+ * the send engine, the qp s_lock is already held.
+ *
+ * The allocation routines will get the exp_lock.
+ *
+ * The first_qp() call is provided to allow the head of
+ * the rcd wait queue to be fetched under the exp_lock and
+ * followed by a drop of the exp_lock.
+ *
+ * Any qp in the wait list will have the qp reference count held
+ * to hold the qp in memory.
+ */
+
+/*
+ * return head of rcd wait list
+ *
+ * Must hold the exp_lock.
+ *
+ * Get a reference to the QP to hold the QP in memory.
+ *
+ * The caller must release the reference when the local
+ * is no longer being used.
+ */
+static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd,
+ struct tid_queue *queue)
+ __must_hold(&rcd->exp_lock)
+{
+ struct hfi1_qp_priv *priv;
+
+ lockdep_assert_held(&rcd->exp_lock);
+ priv = list_first_entry_or_null(&queue->queue_head,
+ struct hfi1_qp_priv,
+ tid_wait);
+ if (!priv)
+ return NULL;
+ rvt_get_qp(priv->owner);
+ return priv->owner;
+}
+
+/**
+ * kernel_tid_waiters - determine rcd wait
+ * @rcd: the receive context
+ * @queue: the queue to operate on
+ * @qp: the head of the qp being processed
+ *
+ * This routine will return false IFF
+ * the list is NULL or the head of the
+ * list is the indicated qp.
+ *
+ * Must hold the qp s_lock and the exp_lock.
+ *
+ * Return:
+ * false if either of the conditions below are satisfied:
+ * 1. The list is empty or
+ * 2. The indicated qp is at the head of the list and the
+ * HFI1_S_WAIT_TID_SPACE bit is set in qp->s_flags.
+ * true is returned otherwise.
+ */
+static bool kernel_tid_waiters(struct hfi1_ctxtdata *rcd,
+ struct tid_queue *queue, struct rvt_qp *qp)
+ __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
+{
+ struct rvt_qp *fqp;
+ bool ret = true;
+
+ lockdep_assert_held(&qp->s_lock);
+ lockdep_assert_held(&rcd->exp_lock);
+ fqp = first_qp(rcd, queue);
+ if (!fqp || (fqp == qp && (qp->s_flags & HFI1_S_WAIT_TID_SPACE)))
+ ret = false;
+ rvt_put_qp(fqp);
+ return ret;
+}
+
+/**
+ * dequeue_tid_waiter - dequeue the qp from the list
+ * @rcd: the receive context
+ * @queue: the queue to operate on
+ * @qp: the qp to remove the wait list
+ *
+ * This routine removes the indicated qp from the
+ * wait list if it is there.
+ *
+ * This should be done after the hardware flow and
+ * tid array resources have been allocated.
+ *
+ * Must hold the qp s_lock and the rcd exp_lock.
+ *
+ * It assumes the s_lock to protect the s_flags
+ * field and to reliably test the HFI1_S_WAIT_TID_SPACE flag.
+ */
+static void dequeue_tid_waiter(struct hfi1_ctxtdata *rcd,
+ struct tid_queue *queue, struct rvt_qp *qp)
+ __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ lockdep_assert_held(&qp->s_lock);
+ lockdep_assert_held(&rcd->exp_lock);
+ if (list_empty(&priv->tid_wait))
+ return;
+ list_del_init(&priv->tid_wait);
+ qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
+ queue->dequeue++;
+ rvt_put_qp(qp);
+}
+
+/**
+ * queue_qp_for_tid_wait - suspend QP on tid space
+ * @rcd: the receive context
+ * @queue: the queue to operate on
+ * @qp: the qp
+ *
+ * The qp is inserted at the tail of the rcd
+ * wait queue and the HFI1_S_WAIT_TID_SPACE s_flag is set.
+ *
+ * Must hold the qp s_lock and the exp_lock.
+ */
+static void queue_qp_for_tid_wait(struct hfi1_ctxtdata *rcd,
+ struct tid_queue *queue, struct rvt_qp *qp)
+ __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ lockdep_assert_held(&qp->s_lock);
+ lockdep_assert_held(&rcd->exp_lock);
+ if (list_empty(&priv->tid_wait)) {
+ qp->s_flags |= HFI1_S_WAIT_TID_SPACE;
+ list_add_tail(&priv->tid_wait, &queue->queue_head);
+ priv->tid_enqueue = ++queue->enqueue;
+ rcd->dd->verbs_dev.n_tidwait++;
+ trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TID_SPACE);
+ rvt_get_qp(qp);
+ }
+}
+
+/**
+ * __trigger_tid_waiter - trigger tid waiter
+ * @qp: the qp
+ *
+ * This is a private entrance to schedule the qp
+ * assuming the caller is holding the qp->s_lock.
+ */
+static void __trigger_tid_waiter(struct rvt_qp *qp)
+ __must_hold(&qp->s_lock)
+{
+ lockdep_assert_held(&qp->s_lock);
+ if (!(qp->s_flags & HFI1_S_WAIT_TID_SPACE))
+ return;
+ trace_hfi1_qpwakeup(qp, HFI1_S_WAIT_TID_SPACE);
+ hfi1_schedule_send(qp);
+}
+
+/**
+ * tid_rdma_schedule_tid_wakeup - schedule wakeup for a qp
+ * @qp: the qp
+ *
+ * trigger a schedule or a waiting qp in a deadlock
+ * safe manner. The qp reference is held prior
+ * to this call via first_qp().
+ *
+ * If the qp trigger was already scheduled (!rval)
+ * the reference is dropped, otherwise the resume
+ * or the destroy cancel will dispatch the reference.
+ */
+static void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv;
+ struct hfi1_ibport *ibp;
+ struct hfi1_pportdata *ppd;
+ struct hfi1_devdata *dd;
+ bool rval;
+
+ if (!qp)
+ return;
+
+ priv = qp->priv;
+ ibp = to_iport(qp->ibqp.device, qp->port_num);
+ ppd = ppd_from_ibp(ibp);
+ dd = dd_from_ibdev(qp->ibqp.device);
+
+ rval = queue_work_on(priv->s_sde ?
+ priv->s_sde->cpu :
+ cpumask_first(cpumask_of_node(dd->node)),
+ ppd->hfi1_wq,
+ &priv->tid_rdma.trigger_work);
+ if (!rval)
+ rvt_put_qp(qp);
+}
+
+/**
+ * tid_rdma_trigger_resume - field a trigger work request
+ * @work: the work item
+ *
+ * Complete the off qp trigger processing by directly
+ * calling the progress routine.
+ */
+static void tid_rdma_trigger_resume(struct work_struct *work)
+{
+ struct tid_rdma_qp_params *tr;
+ struct hfi1_qp_priv *priv;
+ struct rvt_qp *qp;
+
+ tr = container_of(work, struct tid_rdma_qp_params, trigger_work);
+ priv = container_of(tr, struct hfi1_qp_priv, tid_rdma);
+ qp = priv->owner;
+ spin_lock_irq(&qp->s_lock);
+ if (qp->s_flags & HFI1_S_WAIT_TID_SPACE) {
+ spin_unlock_irq(&qp->s_lock);
+ hfi1_do_send(priv->owner, true);
+ } else {
+ spin_unlock_irq(&qp->s_lock);
+ }
+ rvt_put_qp(qp);
+}
+
+/*
+ * tid_rdma_flush_wait - unwind any tid space wait
+ *
+ * This is called when resetting a qp to
+ * allow a destroy or reset to get rid
+ * of any tid space linkage and reference counts.
+ */
+static void _tid_rdma_flush_wait(struct rvt_qp *qp, struct tid_queue *queue)
+ __must_hold(&qp->s_lock)
+{
+ struct hfi1_qp_priv *priv;
+
+ if (!qp)
+ return;
+ lockdep_assert_held(&qp->s_lock);
+ priv = qp->priv;
+ qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
+ spin_lock(&priv->rcd->exp_lock);
+ if (!list_empty(&priv->tid_wait)) {
+ list_del_init(&priv->tid_wait);
+ qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
+ queue->dequeue++;
+ rvt_put_qp(qp);
+ }
+ spin_unlock(&priv->rcd->exp_lock);
+}
+
+void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
+ __must_hold(&qp->s_lock)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ _tid_rdma_flush_wait(qp, &priv->rcd->flow_queue);
+ _tid_rdma_flush_wait(qp, &priv->rcd->rarr_queue);
+}
+
+/* Flow functions */
+/**
+ * kern_reserve_flow - allocate a hardware flow
+ * @rcd: the context to use for allocation
+ * @last: the index of the preferred flow. Use RXE_NUM_TID_FLOWS to
+ * signify "don't care".
+ *
+ * Use a bit mask based allocation to reserve a hardware
+ * flow for use in receiving KDETH data packets. If a preferred flow is
+ * specified the function will attempt to reserve that flow again, if
+ * available.
+ *
+ * The exp_lock must be held.
+ *
+ * Return:
+ * On success: a value positive value between 0 and RXE_NUM_TID_FLOWS - 1
+ * On failure: -EAGAIN
+ */
+static int kern_reserve_flow(struct hfi1_ctxtdata *rcd, int last)
+ __must_hold(&rcd->exp_lock)
+{
+ int nr;
+
+ /* Attempt to reserve the preferred flow index */
+ if (last >= 0 && last < RXE_NUM_TID_FLOWS &&
+ !test_and_set_bit(last, &rcd->flow_mask))
+ return last;
+
+ nr = ffz(rcd->flow_mask);
+ BUILD_BUG_ON(RXE_NUM_TID_FLOWS >=
+ (sizeof(rcd->flow_mask) * BITS_PER_BYTE));
+ if (nr > (RXE_NUM_TID_FLOWS - 1))
+ return -EAGAIN;
+ set_bit(nr, &rcd->flow_mask);
+ return nr;
+}
+
+static void kern_set_hw_flow(struct hfi1_ctxtdata *rcd, u32 generation,
+ u32 flow_idx)
+{
+ u64 reg;
+
+ reg = ((u64)generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
+ RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK |
+ RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK |
+ RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK |
+ RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK |
+ RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK;
+
+ if (generation != KERN_GENERATION_RESERVED)
+ reg |= RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK;
+
+ write_uctxt_csr(rcd->dd, rcd->ctxt,
+ RCV_TID_FLOW_TABLE + 8 * flow_idx, reg);
+}
+
+static u32 kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
+ __must_hold(&rcd->exp_lock)
+{
+ u32 generation = rcd->flows[flow_idx].generation;
+
+ kern_set_hw_flow(rcd, generation, flow_idx);
+ return generation;
+}
+
+static u32 kern_flow_generation_next(u32 gen)
+{
+ u32 generation = mask_generation(gen + 1);
+
+ if (generation == KERN_GENERATION_RESERVED)
+ generation = mask_generation(generation + 1);
+ return generation;
+}
+
+static void kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
+ __must_hold(&rcd->exp_lock)
+{
+ rcd->flows[flow_idx].generation =
+ kern_flow_generation_next(rcd->flows[flow_idx].generation);
+ kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, flow_idx);
+}
+
+int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
+ struct tid_flow_state *fs = &qpriv->flow_state;
+ struct rvt_qp *fqp;
+ unsigned long flags;
+ int ret = 0;
+
+ /* The QP already has an allocated flow */
+ if (fs->index != RXE_NUM_TID_FLOWS)
+ return ret;
+
+ spin_lock_irqsave(&rcd->exp_lock, flags);
+ if (kernel_tid_waiters(rcd, &rcd->flow_queue, qp))
+ goto queue;
+
+ ret = kern_reserve_flow(rcd, fs->last_index);
+ if (ret < 0)
+ goto queue;
+ fs->index = ret;
+ fs->last_index = fs->index;
+
+ /* Generation received in a RESYNC overrides default flow generation */
+ if (fs->generation != KERN_GENERATION_RESERVED)
+ rcd->flows[fs->index].generation = fs->generation;
+ fs->generation = kern_setup_hw_flow(rcd, fs->index);
+ fs->psn = 0;
+ dequeue_tid_waiter(rcd, &rcd->flow_queue, qp);
+ /* get head before dropping lock */
+ fqp = first_qp(rcd, &rcd->flow_queue);
+ spin_unlock_irqrestore(&rcd->exp_lock, flags);
+
+ tid_rdma_schedule_tid_wakeup(fqp);
+ return 0;
+queue:
+ queue_qp_for_tid_wait(rcd, &rcd->flow_queue, qp);
+ spin_unlock_irqrestore(&rcd->exp_lock, flags);
+ return -EAGAIN;
+}
+
+void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
+ struct tid_flow_state *fs = &qpriv->flow_state;
+ struct rvt_qp *fqp;
+ unsigned long flags;
+
+ if (fs->index >= RXE_NUM_TID_FLOWS)
+ return;
+ spin_lock_irqsave(&rcd->exp_lock, flags);
+ kern_clear_hw_flow(rcd, fs->index);
+ clear_bit(fs->index, &rcd->flow_mask);
+ fs->index = RXE_NUM_TID_FLOWS;
+ fs->psn = 0;
+ fs->generation = KERN_GENERATION_RESERVED;
+
+ /* get head before dropping lock */
+ fqp = first_qp(rcd, &rcd->flow_queue);
+ spin_unlock_irqrestore(&rcd->exp_lock, flags);
+
+ if (fqp == qp) {
+ __trigger_tid_waiter(fqp);
+ rvt_put_qp(fqp);
+ } else {
+ tid_rdma_schedule_tid_wakeup(fqp);
+ }
+}
+
+void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd)
+{
+ int i;
+
+ for (i = 0; i < RXE_NUM_TID_FLOWS; i++) {
+ rcd->flows[i].generation = mask_generation(get_random_u32());
+ kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, i);
+ }
+}
+
+/* TID allocation functions */
+static u8 trdma_pset_order(struct tid_rdma_pageset *s)
+{
+ u8 count = s->count;
+
+ return ilog2(count) + 1;
+}
+
+/**
+ * tid_rdma_find_phys_blocks_4k - get groups base on mr info
+ * @flow: overall info for a TID RDMA segment
+ * @pages: pointer to an array of page structs
+ * @npages: number of pages
+ * @list: page set array to return
+ *
+ * This routine returns the number of groups associated with
+ * the current sge information. This implementation is based
+ * on the expected receive find_phys_blocks() adjusted to
+ * use the MR information vs. the pfn.
+ *
+ * Return:
+ * the number of RcvArray entries
+ */
+static u32 tid_rdma_find_phys_blocks_4k(struct tid_rdma_flow *flow,
+ struct page **pages,
+ u32 npages,
+ struct tid_rdma_pageset *list)
+{
+ u32 pagecount, pageidx, setcount = 0, i;
+ void *vaddr, *this_vaddr;
+
+ if (!npages)
+ return 0;
+
+ /*
+ * Look for sets of physically contiguous pages in the user buffer.
+ * This will allow us to optimize Expected RcvArray entry usage by
+ * using the bigger supported sizes.
+ */
+ vaddr = page_address(pages[0]);
+ trace_hfi1_tid_flow_page(flow->req->qp, flow, 0, 0, 0, vaddr);
+ for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) {
+ this_vaddr = i < npages ? page_address(pages[i]) : NULL;
+ trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 0, 0,
+ this_vaddr);
+ /*
+ * If the vaddr's are not sequential, pages are not physically
+ * contiguous.
+ */
+ if (this_vaddr != (vaddr + PAGE_SIZE)) {
+ /*
+ * At this point we have to loop over the set of
+ * physically contiguous pages and break them down it
+ * sizes supported by the HW.
+ * There are two main constraints:
+ * 1. The max buffer size is MAX_EXPECTED_BUFFER.
+ * If the total set size is bigger than that
+ * program only a MAX_EXPECTED_BUFFER chunk.
+ * 2. The buffer size has to be a power of two. If
+ * it is not, round down to the closes power of
+ * 2 and program that size.
+ */
+ while (pagecount) {
+ int maxpages = pagecount;
+ u32 bufsize = pagecount * PAGE_SIZE;
+
+ if (bufsize > MAX_EXPECTED_BUFFER)
+ maxpages =
+ MAX_EXPECTED_BUFFER >>
+ PAGE_SHIFT;
+ else if (!is_power_of_2(bufsize))
+ maxpages =
+ rounddown_pow_of_two(bufsize) >>
+ PAGE_SHIFT;
+
+ list[setcount].idx = pageidx;
+ list[setcount].count = maxpages;
+ trace_hfi1_tid_pageset(flow->req->qp, setcount,
+ list[setcount].idx,
+ list[setcount].count);
+ pagecount -= maxpages;
+ pageidx += maxpages;
+ setcount++;
+ }
+ pageidx = i;
+ pagecount = 1;
+ vaddr = this_vaddr;
+ } else {
+ vaddr += PAGE_SIZE;
+ pagecount++;
+ }
+ }
+ /* insure we always return an even number of sets */
+ if (setcount & 1)
+ list[setcount++].count = 0;
+ return setcount;
+}
+
+/**
+ * tid_flush_pages - dump out pages into pagesets
+ * @list: list of pagesets
+ * @idx: pointer to current page index
+ * @pages: number of pages to dump
+ * @sets: current number of pagesset
+ *
+ * This routine flushes out accumuated pages.
+ *
+ * To insure an even number of sets the
+ * code may add a filler.
+ *
+ * This can happen with when pages is not
+ * a power of 2 or pages is a power of 2
+ * less than the maximum pages.
+ *
+ * Return:
+ * The new number of sets
+ */
+
+static u32 tid_flush_pages(struct tid_rdma_pageset *list,
+ u32 *idx, u32 pages, u32 sets)
+{
+ while (pages) {
+ u32 maxpages = pages;
+
+ if (maxpages > MAX_EXPECTED_PAGES)
+ maxpages = MAX_EXPECTED_PAGES;
+ else if (!is_power_of_2(maxpages))
+ maxpages = rounddown_pow_of_two(maxpages);
+ list[sets].idx = *idx;
+ list[sets++].count = maxpages;
+ *idx += maxpages;
+ pages -= maxpages;
+ }
+ /* might need a filler */
+ if (sets & 1)
+ list[sets++].count = 0;
+ return sets;
+}
+
+/**
+ * tid_rdma_find_phys_blocks_8k - get groups base on mr info
+ * @flow: overall info for a TID RDMA segment
+ * @pages: pointer to an array of page structs
+ * @npages: number of pages
+ * @list: page set array to return
+ *
+ * This routine parses an array of pages to compute pagesets
+ * in an 8k compatible way.
+ *
+ * pages are tested two at a time, i, i + 1 for contiguous
+ * pages and i - 1 and i contiguous pages.
+ *
+ * If any condition is false, any accumulated pages are flushed and
+ * v0,v1 are emitted as separate PAGE_SIZE pagesets
+ *
+ * Otherwise, the current 8k is totaled for a future flush.
+ *
+ * Return:
+ * The number of pagesets
+ * list set with the returned number of pagesets
+ *
+ */
+static u32 tid_rdma_find_phys_blocks_8k(struct tid_rdma_flow *flow,
+ struct page **pages,
+ u32 npages,
+ struct tid_rdma_pageset *list)
+{
+ u32 idx, sets = 0, i;
+ u32 pagecnt = 0;
+ void *v0, *v1, *vm1;
+
+ if (!npages)
+ return 0;
+ for (idx = 0, i = 0, vm1 = NULL; i < npages; i += 2) {
+ /* get a new v0 */
+ v0 = page_address(pages[i]);
+ trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 0, v0);
+ v1 = i + 1 < npages ?
+ page_address(pages[i + 1]) : NULL;
+ trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 1, v1);
+ /* compare i, i + 1 vaddr */
+ if (v1 != (v0 + PAGE_SIZE)) {
+ /* flush out pages */
+ sets = tid_flush_pages(list, &idx, pagecnt, sets);
+ /* output v0,v1 as two pagesets */
+ list[sets].idx = idx++;
+ list[sets++].count = 1;
+ if (v1) {
+ list[sets].count = 1;
+ list[sets++].idx = idx++;
+ } else {
+ list[sets++].count = 0;
+ }
+ vm1 = NULL;
+ pagecnt = 0;
+ continue;
+ }
+ /* i,i+1 consecutive, look at i-1,i */
+ if (vm1 && v0 != (vm1 + PAGE_SIZE)) {
+ /* flush out pages */
+ sets = tid_flush_pages(list, &idx, pagecnt, sets);
+ pagecnt = 0;
+ }
+ /* pages will always be a multiple of 8k */
+ pagecnt += 2;
+ /* save i-1 */
+ vm1 = v1;
+ /* move to next pair */
+ }
+ /* dump residual pages at end */
+ sets = tid_flush_pages(list, &idx, npages - idx, sets);
+ /* by design cannot be odd sets */
+ WARN_ON(sets & 1);
+ return sets;
+}
+
+/*
+ * Find pages for one segment of a sge array represented by @ss. The function
+ * does not check the sge, the sge must have been checked for alignment with a
+ * prior call to hfi1_kern_trdma_ok. Other sge checking is done as part of
+ * rvt_lkey_ok and rvt_rkey_ok. Also, the function only modifies the local sge
+ * copy maintained in @ss->sge, the original sge is not modified.
+ *
+ * Unlike IB RDMA WRITE, we can't decrement ss->num_sge here because we are not
+ * releasing the MR reference count at the same time. Otherwise, we'll "leak"
+ * references to the MR. This difference requires that we keep track of progress
+ * into the sg_list. This is done by the cur_seg cursor in the tid_rdma_request
+ * structure.
+ */
+static u32 kern_find_pages(struct tid_rdma_flow *flow,
+ struct page **pages,
+ struct rvt_sge_state *ss, bool *last)
+{
+ struct tid_rdma_request *req = flow->req;
+ struct rvt_sge *sge = &ss->sge;
+ u32 length = flow->req->seg_len;
+ u32 len = PAGE_SIZE;
+ u32 i = 0;
+
+ while (length && req->isge < ss->num_sge) {
+ pages[i++] = virt_to_page(sge->vaddr);
+
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (!sge->sge_length) {
+ if (++req->isge < ss->num_sge)
+ *sge = ss->sg_list[req->isge - 1];
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= RVT_SEGSZ) {
+ ++sge->m;
+ sge->n = 0;
+ }
+ sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ length -= len;
+ }
+
+ flow->length = flow->req->seg_len - length;
+ *last = req->isge != ss->num_sge;
+ return i;
+}
+
+static void dma_unmap_flow(struct tid_rdma_flow *flow)
+{
+ struct hfi1_devdata *dd;
+ int i;
+ struct tid_rdma_pageset *pset;
+
+ dd = flow->req->rcd->dd;
+ for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
+ i++, pset++) {
+ if (pset->count && pset->addr) {
+ dma_unmap_page(&dd->pcidev->dev,
+ pset->addr,
+ PAGE_SIZE * pset->count,
+ DMA_FROM_DEVICE);
+ pset->mapped = 0;
+ }
+ }
+}
+
+static int dma_map_flow(struct tid_rdma_flow *flow, struct page **pages)
+{
+ int i;
+ struct hfi1_devdata *dd = flow->req->rcd->dd;
+ struct tid_rdma_pageset *pset;
+
+ for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
+ i++, pset++) {
+ if (pset->count) {
+ pset->addr = dma_map_page(&dd->pcidev->dev,
+ pages[pset->idx],
+ 0,
+ PAGE_SIZE * pset->count,
+ DMA_FROM_DEVICE);
+
+ if (dma_mapping_error(&dd->pcidev->dev, pset->addr)) {
+ dma_unmap_flow(flow);
+ return -ENOMEM;
+ }
+ pset->mapped = 1;
+ }
+ }
+ return 0;
+}
+
+static inline bool dma_mapped(struct tid_rdma_flow *flow)
+{
+ return !!flow->pagesets[0].mapped;
+}
+
+/*
+ * Get pages pointers and identify contiguous physical memory chunks for a
+ * segment. All segments are of length flow->req->seg_len.
+ */
+static int kern_get_phys_blocks(struct tid_rdma_flow *flow,
+ struct page **pages,
+ struct rvt_sge_state *ss, bool *last)
+{
+ u8 npages;
+
+ /* Reuse previously computed pagesets, if any */
+ if (flow->npagesets) {
+ trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head,
+ flow);
+ if (!dma_mapped(flow))
+ return dma_map_flow(flow, pages);
+ return 0;
+ }
+
+ npages = kern_find_pages(flow, pages, ss, last);
+
+ if (flow->req->qp->pmtu == enum_to_mtu(OPA_MTU_4096))
+ flow->npagesets =
+ tid_rdma_find_phys_blocks_4k(flow, pages, npages,
+ flow->pagesets);
+ else
+ flow->npagesets =
+ tid_rdma_find_phys_blocks_8k(flow, pages, npages,
+ flow->pagesets);
+
+ return dma_map_flow(flow, pages);
+}
+
+static inline void kern_add_tid_node(struct tid_rdma_flow *flow,
+ struct hfi1_ctxtdata *rcd, char *s,
+ struct tid_group *grp, u8 cnt)
+{
+ struct kern_tid_node *node = &flow->tnode[flow->tnode_cnt++];
+
+ WARN_ON_ONCE(flow->tnode_cnt >=
+ (TID_RDMA_MAX_SEGMENT_SIZE >> PAGE_SHIFT));
+ if (WARN_ON_ONCE(cnt & 1))
+ dd_dev_err(rcd->dd,
+ "unexpected odd allocation cnt %u map 0x%x used %u",
+ cnt, grp->map, grp->used);
+
+ node->grp = grp;
+ node->map = grp->map;
+ node->cnt = cnt;
+ trace_hfi1_tid_node_add(flow->req->qp, s, flow->tnode_cnt - 1,
+ grp->base, grp->map, grp->used, cnt);
+}
+
+/*
+ * Try to allocate pageset_count TID's from TID groups for a context
+ *
+ * This function allocates TID's without moving groups between lists or
+ * modifying grp->map. This is done as follows, being cogizant of the lists
+ * between which the TID groups will move:
+ * 1. First allocate complete groups of 8 TID's since this is more efficient,
+ * these groups will move from group->full without affecting used
+ * 2. If more TID's are needed allocate from used (will move from used->full or
+ * stay in used)
+ * 3. If we still don't have the required number of TID's go back and look again
+ * at a complete group (will move from group->used)
+ */
+static int kern_alloc_tids(struct tid_rdma_flow *flow)
+{
+ struct hfi1_ctxtdata *rcd = flow->req->rcd;
+ struct hfi1_devdata *dd = rcd->dd;
+ u32 ngroups, pageidx = 0;
+ struct tid_group *group = NULL, *used;
+ u8 use;
+
+ flow->tnode_cnt = 0;
+ ngroups = flow->npagesets / dd->rcv_entries.group_size;
+ if (!ngroups)
+ goto used_list;
+
+ /* First look at complete groups */
+ list_for_each_entry(group, &rcd->tid_group_list.list, list) {
+ kern_add_tid_node(flow, rcd, "complete groups", group,
+ group->size);
+
+ pageidx += group->size;
+ if (!--ngroups)
+ break;
+ }
+
+ if (pageidx >= flow->npagesets)
+ goto ok;
+
+used_list:
+ /* Now look at partially used groups */
+ list_for_each_entry(used, &rcd->tid_used_list.list, list) {
+ use = min_t(u32, flow->npagesets - pageidx,
+ used->size - used->used);
+ kern_add_tid_node(flow, rcd, "used groups", used, use);
+
+ pageidx += use;
+ if (pageidx >= flow->npagesets)
+ goto ok;
+ }
+
+ /*
+ * Look again at a complete group, continuing from where we left.
+ * However, if we are at the head, we have reached the end of the
+ * complete groups list from the first loop above
+ */
+ if (group && &group->list == &rcd->tid_group_list.list)
+ goto bail_eagain;
+ group = list_prepare_entry(group, &rcd->tid_group_list.list,
+ list);
+ if (list_is_last(&group->list, &rcd->tid_group_list.list))
+ goto bail_eagain;
+ group = list_next_entry(group, list);
+ use = min_t(u32, flow->npagesets - pageidx, group->size);
+ kern_add_tid_node(flow, rcd, "complete continue", group, use);
+ pageidx += use;
+ if (pageidx >= flow->npagesets)
+ goto ok;
+bail_eagain:
+ trace_hfi1_msg_alloc_tids(flow->req->qp, " insufficient tids: needed ",
+ (u64)flow->npagesets);
+ return -EAGAIN;
+ok:
+ return 0;
+}
+
+static void kern_program_rcv_group(struct tid_rdma_flow *flow, int grp_num,
+ u32 *pset_idx)
+{
+ struct hfi1_ctxtdata *rcd = flow->req->rcd;
+ struct hfi1_devdata *dd = rcd->dd;
+ struct kern_tid_node *node = &flow->tnode[grp_num];
+ struct tid_group *grp = node->grp;
+ struct tid_rdma_pageset *pset;
+ u32 pmtu_pg = flow->req->qp->pmtu >> PAGE_SHIFT;
+ u32 rcventry, npages = 0, pair = 0, tidctrl;
+ u8 i, cnt = 0;
+
+ for (i = 0; i < grp->size; i++) {
+ rcventry = grp->base + i;
+
+ if (node->map & BIT(i) || cnt >= node->cnt) {
+ rcv_array_wc_fill(dd, rcventry);
+ continue;
+ }
+ pset = &flow->pagesets[(*pset_idx)++];
+ if (pset->count) {
+ hfi1_put_tid(dd, rcventry, PT_EXPECTED,
+ pset->addr, trdma_pset_order(pset));
+ } else {
+ hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
+ }
+ npages += pset->count;
+
+ rcventry -= rcd->expected_base;
+ tidctrl = pair ? 0x3 : rcventry & 0x1 ? 0x2 : 0x1;
+ /*
+ * A single TID entry will be used to use a rcvarr pair (with
+ * tidctrl 0x3), if ALL these are true (a) the bit pos is even
+ * (b) the group map shows current and the next bits as free
+ * indicating two consecutive rcvarry entries are available (c)
+ * we actually need 2 more entries
+ */
+ pair = !(i & 0x1) && !((node->map >> i) & 0x3) &&
+ node->cnt >= cnt + 2;
+ if (!pair) {
+ if (!pset->count)
+ tidctrl = 0x1;
+ flow->tid_entry[flow->tidcnt++] =
+ EXP_TID_SET(IDX, rcventry >> 1) |
+ EXP_TID_SET(CTRL, tidctrl) |
+ EXP_TID_SET(LEN, npages);
+ trace_hfi1_tid_entry_alloc(/* entry */
+ flow->req->qp, flow->tidcnt - 1,
+ flow->tid_entry[flow->tidcnt - 1]);
+
+ /* Efficient DIV_ROUND_UP(npages, pmtu_pg) */
+ flow->npkts += (npages + pmtu_pg - 1) >> ilog2(pmtu_pg);
+ npages = 0;
+ }
+
+ if (grp->used == grp->size - 1)
+ tid_group_move(grp, &rcd->tid_used_list,
+ &rcd->tid_full_list);
+ else if (!grp->used)
+ tid_group_move(grp, &rcd->tid_group_list,
+ &rcd->tid_used_list);
+
+ grp->used++;
+ grp->map |= BIT(i);
+ cnt++;
+ }
+}
+
+static void kern_unprogram_rcv_group(struct tid_rdma_flow *flow, int grp_num)
+{
+ struct hfi1_ctxtdata *rcd = flow->req->rcd;
+ struct hfi1_devdata *dd = rcd->dd;
+ struct kern_tid_node *node = &flow->tnode[grp_num];
+ struct tid_group *grp = node->grp;
+ u32 rcventry;
+ u8 i, cnt = 0;
+
+ for (i = 0; i < grp->size; i++) {
+ rcventry = grp->base + i;
+
+ if (node->map & BIT(i) || cnt >= node->cnt) {
+ rcv_array_wc_fill(dd, rcventry);
+ continue;
+ }
+
+ hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
+
+ grp->used--;
+ grp->map &= ~BIT(i);
+ cnt++;
+
+ if (grp->used == grp->size - 1)
+ tid_group_move(grp, &rcd->tid_full_list,
+ &rcd->tid_used_list);
+ else if (!grp->used)
+ tid_group_move(grp, &rcd->tid_used_list,
+ &rcd->tid_group_list);
+ }
+ if (WARN_ON_ONCE(cnt & 1)) {
+ struct hfi1_ctxtdata *rcd = flow->req->rcd;
+ struct hfi1_devdata *dd = rcd->dd;
+
+ dd_dev_err(dd, "unexpected odd free cnt %u map 0x%x used %u",
+ cnt, grp->map, grp->used);
+ }
+}
+
+static void kern_program_rcvarray(struct tid_rdma_flow *flow)
+{
+ u32 pset_idx = 0;
+ int i;
+
+ flow->npkts = 0;
+ flow->tidcnt = 0;
+ for (i = 0; i < flow->tnode_cnt; i++)
+ kern_program_rcv_group(flow, i, &pset_idx);
+ trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head, flow);
+}
+
+/**
+ * hfi1_kern_exp_rcv_setup() - setup TID's and flow for one segment of a
+ * TID RDMA request
+ *
+ * @req: TID RDMA request for which the segment/flow is being set up
+ * @ss: sge state, maintains state across successive segments of a sge
+ * @last: set to true after the last sge segment has been processed
+ *
+ * This function
+ * (1) finds a free flow entry in the flow circular buffer
+ * (2) finds pages and continuous physical chunks constituing one segment
+ * of an sge
+ * (3) allocates TID group entries for those chunks
+ * (4) programs rcvarray entries in the hardware corresponding to those
+ * TID's
+ * (5) computes a tidarray with formatted TID entries which can be sent
+ * to the sender
+ * (6) Reserves and programs HW flows.
+ * (7) It also manages queueing the QP when TID/flow resources are not
+ * available.
+ *
+ * @req points to struct tid_rdma_request of which the segments are a part. The
+ * function uses qp, rcd and seg_len members of @req. In the absence of errors,
+ * req->flow_idx is the index of the flow which has been prepared in this
+ * invocation of function call. With flow = &req->flows[req->flow_idx],
+ * flow->tid_entry contains the TID array which the sender can use for TID RDMA
+ * sends and flow->npkts contains number of packets required to send the
+ * segment.
+ *
+ * hfi1_check_sge_align should be called prior to calling this function and if
+ * it signals error TID RDMA cannot be used for this sge and this function
+ * should not be called.
+ *
+ * For the queuing, caller must hold the flow->req->qp s_lock from the send
+ * engine and the function will procure the exp_lock.
+ *
+ * Return:
+ * The function returns -EAGAIN if sufficient number of TID/flow resources to
+ * map the segment could not be allocated. In this case the function should be
+ * called again with previous arguments to retry the TID allocation. There are
+ * no other error returns. The function returns 0 on success.
+ */
+int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
+ struct rvt_sge_state *ss, bool *last)
+ __must_hold(&req->qp->s_lock)
+{
+ struct tid_rdma_flow *flow = &req->flows[req->setup_head];
+ struct hfi1_ctxtdata *rcd = req->rcd;
+ struct hfi1_qp_priv *qpriv = req->qp->priv;
+ unsigned long flags;
+ struct rvt_qp *fqp;
+ u16 clear_tail = req->clear_tail;
+
+ lockdep_assert_held(&req->qp->s_lock);
+ /*
+ * We return error if either (a) we don't have space in the flow
+ * circular buffer, or (b) we already have max entries in the buffer.
+ * Max entries depend on the type of request we are processing and the
+ * negotiated TID RDMA parameters.
+ */
+ if (!CIRC_SPACE(req->setup_head, clear_tail, MAX_FLOWS) ||
+ CIRC_CNT(req->setup_head, clear_tail, MAX_FLOWS) >=
+ req->n_flows)
+ return -EINVAL;
+
+ /*
+ * Get pages, identify contiguous physical memory chunks for the segment
+ * If we can not determine a DMA address mapping we will treat it just
+ * like if we ran out of space above.
+ */
+ if (kern_get_phys_blocks(flow, qpriv->pages, ss, last)) {
+ hfi1_wait_kmem(flow->req->qp);
+ return -ENOMEM;
+ }
+
+ spin_lock_irqsave(&rcd->exp_lock, flags);
+ if (kernel_tid_waiters(rcd, &rcd->rarr_queue, flow->req->qp))
+ goto queue;
+
+ /*
+ * At this point we know the number of pagesets and hence the number of
+ * TID's to map the segment. Allocate the TID's from the TID groups. If
+ * we cannot allocate the required number we exit and try again later
+ */
+ if (kern_alloc_tids(flow))
+ goto queue;
+ /*
+ * Finally program the TID entries with the pagesets, compute the
+ * tidarray and enable the HW flow
+ */
+ kern_program_rcvarray(flow);
+
+ /*
+ * Setup the flow state with relevant information.
+ * This information is used for tracking the sequence of data packets
+ * for the segment.
+ * The flow is setup here as this is the most accurate time and place
+ * to do so. Doing at a later time runs the risk of the flow data in
+ * qpriv getting out of sync.
+ */
+ memset(&flow->flow_state, 0x0, sizeof(flow->flow_state));
+ flow->idx = qpriv->flow_state.index;
+ flow->flow_state.generation = qpriv->flow_state.generation;
+ flow->flow_state.spsn = qpriv->flow_state.psn;
+ flow->flow_state.lpsn = flow->flow_state.spsn + flow->npkts - 1;
+ flow->flow_state.r_next_psn =
+ full_flow_psn(flow, flow->flow_state.spsn);
+ qpriv->flow_state.psn += flow->npkts;
+
+ dequeue_tid_waiter(rcd, &rcd->rarr_queue, flow->req->qp);
+ /* get head before dropping lock */
+ fqp = first_qp(rcd, &rcd->rarr_queue);
+ spin_unlock_irqrestore(&rcd->exp_lock, flags);
+ tid_rdma_schedule_tid_wakeup(fqp);
+
+ req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
+ return 0;
+queue:
+ queue_qp_for_tid_wait(rcd, &rcd->rarr_queue, flow->req->qp);
+ spin_unlock_irqrestore(&rcd->exp_lock, flags);
+ return -EAGAIN;
+}
+
+static void hfi1_tid_rdma_reset_flow(struct tid_rdma_flow *flow)
+{
+ flow->npagesets = 0;
+}
+
+/*
+ * This function is called after one segment has been successfully sent to
+ * release the flow and TID HW/SW resources for that segment. The segments for a
+ * TID RDMA request are setup and cleared in FIFO order which is managed using a
+ * circular buffer.
+ */
+int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req)
+ __must_hold(&req->qp->s_lock)
+{
+ struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
+ struct hfi1_ctxtdata *rcd = req->rcd;
+ unsigned long flags;
+ int i;
+ struct rvt_qp *fqp;
+
+ lockdep_assert_held(&req->qp->s_lock);
+ /* Exit if we have nothing in the flow circular buffer */
+ if (!CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS))
+ return -EINVAL;
+
+ spin_lock_irqsave(&rcd->exp_lock, flags);
+
+ for (i = 0; i < flow->tnode_cnt; i++)
+ kern_unprogram_rcv_group(flow, i);
+ /* To prevent double unprogramming */
+ flow->tnode_cnt = 0;
+ /* get head before dropping lock */
+ fqp = first_qp(rcd, &rcd->rarr_queue);
+ spin_unlock_irqrestore(&rcd->exp_lock, flags);
+
+ dma_unmap_flow(flow);
+
+ hfi1_tid_rdma_reset_flow(flow);
+ req->clear_tail = (req->clear_tail + 1) & (MAX_FLOWS - 1);
+
+ if (fqp == req->qp) {
+ __trigger_tid_waiter(fqp);
+ rvt_put_qp(fqp);
+ } else {
+ tid_rdma_schedule_tid_wakeup(fqp);
+ }
+
+ return 0;
+}
+
+/*
+ * This function is called to release all the tid entries for
+ * a request.
+ */
+void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req)
+ __must_hold(&req->qp->s_lock)
+{
+ /* Use memory barrier for proper ordering */
+ while (CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS)) {
+ if (hfi1_kern_exp_rcv_clear(req))
+ break;
+ }
+}
+
+/**
+ * hfi1_kern_exp_rcv_free_flows - free previously allocated flow information
+ * @req: the tid rdma request to be cleaned
+ */
+static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req)
+{
+ kfree(req->flows);
+ req->flows = NULL;
+}
+
+/**
+ * __trdma_clean_swqe - clean up for large sized QPs
+ * @qp: the queue patch
+ * @wqe: the send wqe
+ */
+void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+ struct hfi1_swqe_priv *p = wqe->priv;
+
+ hfi1_kern_exp_rcv_free_flows(&p->tid_req);
+}
+
+/*
+ * This can be called at QP create time or in the data path.
+ */
+static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
+ gfp_t gfp)
+{
+ struct tid_rdma_flow *flows;
+ int i;
+
+ if (likely(req->flows))
+ return 0;
+ flows = kmalloc_node(MAX_FLOWS * sizeof(*flows), gfp,
+ req->rcd->numa_id);
+ if (!flows)
+ return -ENOMEM;
+ /* mini init */
+ for (i = 0; i < MAX_FLOWS; i++) {
+ flows[i].req = req;
+ flows[i].npagesets = 0;
+ flows[i].pagesets[0].mapped = 0;
+ flows[i].resync_npkts = 0;
+ }
+ req->flows = flows;
+ return 0;
+}
+
+static void hfi1_init_trdma_req(struct rvt_qp *qp,
+ struct tid_rdma_request *req)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+
+ /*
+ * Initialize various TID RDMA request variables.
+ * These variables are "static", which is why they
+ * can be pre-initialized here before the WRs has
+ * even been submitted.
+ * However, non-NULL values for these variables do not
+ * imply that this WQE has been enabled for TID RDMA.
+ * Drivers should check the WQE's opcode to determine
+ * if a request is a TID RDMA one or not.
+ */
+ req->qp = qp;
+ req->rcd = qpriv->rcd;
+}
+
+u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
+ void *context, int vl, int mode, u64 data)
+{
+ struct hfi1_devdata *dd = context;
+
+ return dd->verbs_dev.n_tidwait;
+}
+
+static struct tid_rdma_flow *find_flow_ib(struct tid_rdma_request *req,
+ u32 psn, u16 *fidx)
+{
+ u16 head, tail;
+ struct tid_rdma_flow *flow;
+
+ head = req->setup_head;
+ tail = req->clear_tail;
+ for ( ; CIRC_CNT(head, tail, MAX_FLOWS);
+ tail = CIRC_NEXT(tail, MAX_FLOWS)) {
+ flow = &req->flows[tail];
+ if (cmp_psn(psn, flow->flow_state.ib_spsn) >= 0 &&
+ cmp_psn(psn, flow->flow_state.ib_lpsn) <= 0) {
+ if (fidx)
+ *fidx = tail;
+ return flow;
+ }
+ }
+ return NULL;
+}
+
+/* TID RDMA READ functions */
+u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
+ struct ib_other_headers *ohdr, u32 *bth1,
+ u32 *bth2, u32 *len)
+{
+ struct tid_rdma_request *req = wqe_to_tid_req(wqe);
+ struct tid_rdma_flow *flow = &req->flows[req->flow_idx];
+ struct rvt_qp *qp = req->qp;
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct hfi1_swqe_priv *wpriv = wqe->priv;
+ struct tid_rdma_read_req *rreq = &ohdr->u.tid_rdma.r_req;
+ struct tid_rdma_params *remote;
+ u32 req_len = 0;
+ void *req_addr = NULL;
+
+ /* This is the IB psn used to send the request */
+ *bth2 = mask_psn(flow->flow_state.ib_spsn + flow->pkt);
+ trace_hfi1_tid_flow_build_read_pkt(qp, req->flow_idx, flow);
+
+ /* TID Entries for TID RDMA READ payload */
+ req_addr = &flow->tid_entry[flow->tid_idx];
+ req_len = sizeof(*flow->tid_entry) *
+ (flow->tidcnt - flow->tid_idx);
+
+ memset(&ohdr->u.tid_rdma.r_req, 0, sizeof(ohdr->u.tid_rdma.r_req));
+ wpriv->ss.sge.vaddr = req_addr;
+ wpriv->ss.sge.sge_length = req_len;
+ wpriv->ss.sge.length = wpriv->ss.sge.sge_length;
+ /*
+ * We can safely zero these out. Since the first SGE covers the
+ * entire packet, nothing else should even look at the MR.
+ */
+ wpriv->ss.sge.mr = NULL;
+ wpriv->ss.sge.m = 0;
+ wpriv->ss.sge.n = 0;
+
+ wpriv->ss.sg_list = NULL;
+ wpriv->ss.total_len = wpriv->ss.sge.sge_length;
+ wpriv->ss.num_sge = 1;
+
+ /* Construct the TID RDMA READ REQ packet header */
+ rcu_read_lock();
+ remote = rcu_dereference(qpriv->tid_rdma.remote);
+
+ KDETH_RESET(rreq->kdeth0, KVER, 0x1);
+ KDETH_RESET(rreq->kdeth1, JKEY, remote->jkey);
+ rreq->reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr +
+ req->cur_seg * req->seg_len + flow->sent);
+ rreq->reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey);
+ rreq->reth.length = cpu_to_be32(*len);
+ rreq->tid_flow_psn =
+ cpu_to_be32((flow->flow_state.generation <<
+ HFI1_KDETH_BTH_SEQ_SHIFT) |
+ ((flow->flow_state.spsn + flow->pkt) &
+ HFI1_KDETH_BTH_SEQ_MASK));
+ rreq->tid_flow_qp =
+ cpu_to_be32(qpriv->tid_rdma.local.qp |
+ ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
+ TID_RDMA_DESTQP_FLOW_SHIFT) |
+ qpriv->rcd->ctxt);
+ rreq->verbs_qp = cpu_to_be32(qp->remote_qpn);
+ *bth1 &= ~RVT_QPN_MASK;
+ *bth1 |= remote->qp;
+ *bth2 |= IB_BTH_REQ_ACK;
+ rcu_read_unlock();
+
+ /* We are done with this segment */
+ flow->sent += *len;
+ req->cur_seg++;
+ qp->s_state = TID_OP(READ_REQ);
+ req->ack_pending++;
+ req->flow_idx = (req->flow_idx + 1) & (MAX_FLOWS - 1);
+ qpriv->pending_tid_r_segs++;
+ qp->s_num_rd_atomic++;
+
+ /* Set the TID RDMA READ request payload size */
+ *len = req_len;
+
+ return sizeof(ohdr->u.tid_rdma.r_req) / sizeof(u32);
+}
+
+/*
+ * @len: contains the data length to read upon entry and the read request
+ * payload length upon exit.
+ */
+u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ struct ib_other_headers *ohdr, u32 *bth1,
+ u32 *bth2, u32 *len)
+ __must_hold(&qp->s_lock)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct tid_rdma_request *req = wqe_to_tid_req(wqe);
+ struct tid_rdma_flow *flow = NULL;
+ u32 hdwords = 0;
+ bool last;
+ bool retry = true;
+ u32 npkts = rvt_div_round_up_mtu(qp, *len);
+
+ trace_hfi1_tid_req_build_read_req(qp, 0, wqe->wr.opcode, wqe->psn,
+ wqe->lpsn, req);
+ /*
+ * Check sync conditions. Make sure that there are no pending
+ * segments before freeing the flow.
+ */
+sync_check:
+ if (req->state == TID_REQUEST_SYNC) {
+ if (qpriv->pending_tid_r_segs)
+ goto done;
+
+ hfi1_kern_clear_hw_flow(req->rcd, qp);
+ qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
+ req->state = TID_REQUEST_ACTIVE;
+ }
+
+ /*
+ * If the request for this segment is resent, the tid resources should
+ * have been allocated before. In this case, req->flow_idx should
+ * fall behind req->setup_head.
+ */
+ if (req->flow_idx == req->setup_head) {
+ retry = false;
+ if (req->state == TID_REQUEST_RESEND) {
+ /*
+ * This is the first new segment for a request whose
+ * earlier segments have been re-sent. We need to
+ * set up the sge pointer correctly.
+ */
+ restart_sge(&qp->s_sge, wqe, req->s_next_psn,
+ qp->pmtu);
+ req->isge = 0;
+ req->state = TID_REQUEST_ACTIVE;
+ }
+
+ /*
+ * Check sync. The last PSN of each generation is reserved for
+ * RESYNC.
+ */
+ if ((qpriv->flow_state.psn + npkts) > MAX_TID_FLOW_PSN - 1) {
+ req->state = TID_REQUEST_SYNC;
+ goto sync_check;
+ }
+
+ /* Allocate the flow if not yet */
+ if (hfi1_kern_setup_hw_flow(qpriv->rcd, qp))
+ goto done;
+
+ /*
+ * The following call will advance req->setup_head after
+ * allocating the tid entries.
+ */
+ if (hfi1_kern_exp_rcv_setup(req, &qp->s_sge, &last)) {
+ req->state = TID_REQUEST_QUEUED;
+
+ /*
+ * We don't have resources for this segment. The QP has
+ * already been queued.
+ */
+ goto done;
+ }
+ }
+
+ /* req->flow_idx should only be one slot behind req->setup_head */
+ flow = &req->flows[req->flow_idx];
+ flow->pkt = 0;
+ flow->tid_idx = 0;
+ flow->sent = 0;
+ if (!retry) {
+ /* Set the first and last IB PSN for the flow in use.*/
+ flow->flow_state.ib_spsn = req->s_next_psn;
+ flow->flow_state.ib_lpsn =
+ flow->flow_state.ib_spsn + flow->npkts - 1;
+ }
+
+ /* Calculate the next segment start psn.*/
+ req->s_next_psn += flow->npkts;
+
+ /* Build the packet header */
+ hdwords = hfi1_build_tid_rdma_read_packet(wqe, ohdr, bth1, bth2, len);
+done:
+ return hdwords;
+}
+
+/*
+ * Validate and accept the TID RDMA READ request parameters.
+ * Return 0 if the request is accepted successfully;
+ * Return 1 otherwise.
+ */
+static int tid_rdma_rcv_read_request(struct rvt_qp *qp,
+ struct rvt_ack_entry *e,
+ struct hfi1_packet *packet,
+ struct ib_other_headers *ohdr,
+ u32 bth0, u32 psn, u64 vaddr, u32 len)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct tid_rdma_request *req;
+ struct tid_rdma_flow *flow;
+ u32 flow_psn, i, tidlen = 0, pktlen, tlen;
+
+ req = ack_to_tid_req(e);
+
+ /* Validate the payload first */
+ flow = &req->flows[req->setup_head];
+
+ /* payload length = packet length - (header length + ICRC length) */
+ pktlen = packet->tlen - (packet->hlen + 4);
+ if (pktlen > sizeof(flow->tid_entry))
+ return 1;
+ memcpy(flow->tid_entry, packet->ebuf, pktlen);
+ flow->tidcnt = pktlen / sizeof(*flow->tid_entry);
+
+ /*
+ * Walk the TID_ENTRY list to make sure we have enough space for a
+ * complete segment. Also calculate the number of required packets.
+ */
+ flow->npkts = rvt_div_round_up_mtu(qp, len);
+ for (i = 0; i < flow->tidcnt; i++) {
+ trace_hfi1_tid_entry_rcv_read_req(qp, i,
+ flow->tid_entry[i]);
+ tlen = EXP_TID_GET(flow->tid_entry[i], LEN);
+ if (!tlen)
+ return 1;
+
+ /*
+ * For tid pair (tidctr == 3), the buffer size of the pair
+ * should be the sum of the buffer size described by each
+ * tid entry. However, only the first entry needs to be
+ * specified in the request (see WFR HAS Section 8.5.7.1).
+ */
+ tidlen += tlen;
+ }
+ if (tidlen * PAGE_SIZE < len)
+ return 1;
+
+ /* Empty the flow array */
+ req->clear_tail = req->setup_head;
+ flow->pkt = 0;
+ flow->tid_idx = 0;
+ flow->tid_offset = 0;
+ flow->sent = 0;
+ flow->tid_qpn = be32_to_cpu(ohdr->u.tid_rdma.r_req.tid_flow_qp);
+ flow->idx = (flow->tid_qpn >> TID_RDMA_DESTQP_FLOW_SHIFT) &
+ TID_RDMA_DESTQP_FLOW_MASK;
+ flow_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_req.tid_flow_psn));
+ flow->flow_state.generation = flow_psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
+ flow->flow_state.spsn = flow_psn & HFI1_KDETH_BTH_SEQ_MASK;
+ flow->length = len;
+
+ flow->flow_state.lpsn = flow->flow_state.spsn +
+ flow->npkts - 1;
+ flow->flow_state.ib_spsn = psn;
+ flow->flow_state.ib_lpsn = flow->flow_state.ib_spsn + flow->npkts - 1;
+
+ trace_hfi1_tid_flow_rcv_read_req(qp, req->setup_head, flow);
+ /* Set the initial flow index to the current flow. */
+ req->flow_idx = req->setup_head;
+
+ /* advance circular buffer head */
+ req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
+
+ /*
+ * Compute last PSN for request.
+ */
+ e->opcode = (bth0 >> 24) & 0xff;
+ e->psn = psn;
+ e->lpsn = psn + flow->npkts - 1;
+ e->sent = 0;
+
+ req->n_flows = qpriv->tid_rdma.local.max_read;
+ req->state = TID_REQUEST_ACTIVE;
+ req->cur_seg = 0;
+ req->comp_seg = 0;
+ req->ack_seg = 0;
+ req->isge = 0;
+ req->seg_len = qpriv->tid_rdma.local.max_len;
+ req->total_len = len;
+ req->total_segs = 1;
+ req->r_flow_psn = e->psn;
+
+ trace_hfi1_tid_req_rcv_read_req(qp, 0, e->opcode, e->psn, e->lpsn,
+ req);
+ return 0;
+}
+
+static int tid_rdma_rcv_error(struct hfi1_packet *packet,
+ struct ib_other_headers *ohdr,
+ struct rvt_qp *qp, u32 psn, int diff)
+{
+ struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_ctxtdata *rcd = ((struct hfi1_qp_priv *)qp->priv)->rcd;
+ struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct rvt_ack_entry *e;
+ struct tid_rdma_request *req;
+ unsigned long flags;
+ u8 prev;
+ bool old_req;
+
+ trace_hfi1_rsp_tid_rcv_error(qp, psn);
+ trace_hfi1_tid_rdma_rcv_err(qp, 0, psn, diff);
+ if (diff > 0) {
+ /* sequence error */
+ if (!qp->r_nak_state) {
+ ibp->rvp.n_rc_seqnak++;
+ qp->r_nak_state = IB_NAK_PSN_ERROR;
+ qp->r_ack_psn = qp->r_psn;
+ rc_defered_ack(rcd, qp);
+ }
+ goto done;
+ }
+
+ ibp->rvp.n_rc_dupreq++;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ e = find_prev_entry(qp, psn, &prev, NULL, &old_req);
+ if (!e || (e->opcode != TID_OP(READ_REQ) &&
+ e->opcode != TID_OP(WRITE_REQ)))
+ goto unlock;
+
+ req = ack_to_tid_req(e);
+ req->r_flow_psn = psn;
+ trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn, e->lpsn, req);
+ if (e->opcode == TID_OP(READ_REQ)) {
+ struct ib_reth *reth;
+ u32 len;
+ u32 rkey;
+ u64 vaddr;
+ int ok;
+ u32 bth0;
+
+ reth = &ohdr->u.tid_rdma.r_req.reth;
+ /*
+ * The requester always restarts from the start of the original
+ * request.
+ */
+ len = be32_to_cpu(reth->length);
+ if (psn != e->psn || len != req->total_len)
+ goto unlock;
+
+ release_rdma_sge_mr(e);
+
+ rkey = be32_to_cpu(reth->rkey);
+ vaddr = get_ib_reth_vaddr(reth);
+
+ qp->r_len = len;
+ ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
+ IB_ACCESS_REMOTE_READ);
+ if (unlikely(!ok))
+ goto unlock;
+
+ /*
+ * If all the response packets for the current request have
+ * been sent out and this request is complete (old_request
+ * == false) and the TID flow may be unusable (the
+ * req->clear_tail is advanced). However, when an earlier
+ * request is received, this request will not be complete any
+ * more (qp->s_tail_ack_queue is moved back, see below).
+ * Consequently, we need to update the TID flow info every time
+ * a duplicate request is received.
+ */
+ bth0 = be32_to_cpu(ohdr->bth[0]);
+ if (tid_rdma_rcv_read_request(qp, e, packet, ohdr, bth0, psn,
+ vaddr, len))
+ goto unlock;
+
+ /*
+ * True if the request is already scheduled (between
+ * qp->s_tail_ack_queue and qp->r_head_ack_queue);
+ */
+ if (old_req)
+ goto unlock;
+ } else {
+ struct flow_state *fstate;
+ bool schedule = false;
+ u8 i;
+
+ if (req->state == TID_REQUEST_RESEND) {
+ req->state = TID_REQUEST_RESEND_ACTIVE;
+ } else if (req->state == TID_REQUEST_INIT_RESEND) {
+ req->state = TID_REQUEST_INIT;
+ schedule = true;
+ }
+
+ /*
+ * True if the request is already scheduled (between
+ * qp->s_tail_ack_queue and qp->r_head_ack_queue).
+ * Also, don't change requests, which are at the SYNC
+ * point and haven't generated any responses yet.
+ * There is nothing to retransmit for them yet.
+ */
+ if (old_req || req->state == TID_REQUEST_INIT ||
+ (req->state == TID_REQUEST_SYNC && !req->cur_seg)) {
+ for (i = prev + 1; ; i++) {
+ if (i > rvt_size_atomic(&dev->rdi))
+ i = 0;
+ if (i == qp->r_head_ack_queue)
+ break;
+ e = &qp->s_ack_queue[i];
+ req = ack_to_tid_req(e);
+ if (e->opcode == TID_OP(WRITE_REQ) &&
+ req->state == TID_REQUEST_INIT)
+ req->state = TID_REQUEST_INIT_RESEND;
+ }
+ /*
+ * If the state of the request has been changed,
+ * the first leg needs to get scheduled in order to
+ * pick up the change. Otherwise, normal response
+ * processing should take care of it.
+ */
+ if (!schedule)
+ goto unlock;
+ }
+
+ /*
+ * If there is no more allocated segment, just schedule the qp
+ * without changing any state.
+ */
+ if (req->clear_tail == req->setup_head)
+ goto schedule;
+ /*
+ * If this request has sent responses for segments, which have
+ * not received data yet (flow_idx != clear_tail), the flow_idx
+ * pointer needs to be adjusted so the same responses can be
+ * re-sent.
+ */
+ if (CIRC_CNT(req->flow_idx, req->clear_tail, MAX_FLOWS)) {
+ fstate = &req->flows[req->clear_tail].flow_state;
+ qpriv->pending_tid_w_segs -=
+ CIRC_CNT(req->flow_idx, req->clear_tail,
+ MAX_FLOWS);
+ req->flow_idx =
+ CIRC_ADD(req->clear_tail,
+ delta_psn(psn, fstate->resp_ib_psn),
+ MAX_FLOWS);
+ qpriv->pending_tid_w_segs +=
+ delta_psn(psn, fstate->resp_ib_psn);
+ /*
+ * When flow_idx == setup_head, we've gotten a duplicate
+ * request for a segment, which has not been allocated
+ * yet. In that case, don't adjust this request.
+ * However, we still want to go through the loop below
+ * to adjust all subsequent requests.
+ */
+ if (CIRC_CNT(req->setup_head, req->flow_idx,
+ MAX_FLOWS)) {
+ req->cur_seg = delta_psn(psn, e->psn);
+ req->state = TID_REQUEST_RESEND_ACTIVE;
+ }
+ }
+
+ for (i = prev + 1; ; i++) {
+ /*
+ * Look at everything up to and including
+ * s_tail_ack_queue
+ */
+ if (i > rvt_size_atomic(&dev->rdi))
+ i = 0;
+ if (i == qp->r_head_ack_queue)
+ break;
+ e = &qp->s_ack_queue[i];
+ req = ack_to_tid_req(e);
+ trace_hfi1_tid_req_rcv_err(qp, 0, e->opcode, e->psn,
+ e->lpsn, req);
+ if (e->opcode != TID_OP(WRITE_REQ) ||
+ req->cur_seg == req->comp_seg ||
+ req->state == TID_REQUEST_INIT ||
+ req->state == TID_REQUEST_INIT_RESEND) {
+ if (req->state == TID_REQUEST_INIT)
+ req->state = TID_REQUEST_INIT_RESEND;
+ continue;
+ }
+ qpriv->pending_tid_w_segs -=
+ CIRC_CNT(req->flow_idx,
+ req->clear_tail,
+ MAX_FLOWS);
+ req->flow_idx = req->clear_tail;
+ req->state = TID_REQUEST_RESEND;
+ req->cur_seg = req->comp_seg;
+ }
+ qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
+ }
+ /* Re-process old requests.*/
+ if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
+ qp->s_acked_ack_queue = prev;
+ qp->s_tail_ack_queue = prev;
+ /*
+ * Since the qp->s_tail_ack_queue is modified, the
+ * qp->s_ack_state must be changed to re-initialize
+ * qp->s_ack_rdma_sge; Otherwise, we will end up in
+ * wrong memory region.
+ */
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+schedule:
+ /*
+ * It's possible to receive a retry psn that is earlier than an RNRNAK
+ * psn. In this case, the rnrnak state should be cleared.
+ */
+ if (qpriv->rnr_nak_state) {
+ qp->s_nak_state = 0;
+ qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
+ qp->r_psn = e->lpsn + 1;
+ hfi1_tid_write_alloc_resources(qp, true);
+ }
+
+ qp->r_state = e->opcode;
+ qp->r_nak_state = 0;
+ qp->s_flags |= RVT_S_RESP_PENDING;
+ hfi1_schedule_send(qp);
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+done:
+ return 1;
+}
+
+void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
+{
+ /* HANDLER FOR TID RDMA READ REQUEST packet (Responder side)*/
+
+ /*
+ * 1. Verify TID RDMA READ REQ as per IB_OPCODE_RC_RDMA_READ
+ * (see hfi1_rc_rcv())
+ * 2. Put TID RDMA READ REQ into the response queue (s_ack_queue)
+ * - Setup struct tid_rdma_req with request info
+ * - Initialize struct tid_rdma_flow info;
+ * - Copy TID entries;
+ * 3. Set the qp->s_ack_state.
+ * 4. Set RVT_S_RESP_PENDING in s_flags.
+ * 5. Kick the send engine (hfi1_schedule_send())
+ */
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ struct rvt_qp *qp = packet->qp;
+ struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct ib_other_headers *ohdr = packet->ohdr;
+ struct rvt_ack_entry *e;
+ unsigned long flags;
+ struct ib_reth *reth;
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ u32 bth0, psn, len, rkey;
+ bool fecn;
+ u8 next;
+ u64 vaddr;
+ int diff;
+ u8 nack_state = IB_NAK_INVALID_REQUEST;
+
+ bth0 = be32_to_cpu(ohdr->bth[0]);
+ if (hfi1_ruc_check_hdr(ibp, packet))
+ return;
+
+ fecn = process_ecn(qp, packet);
+ psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
+ trace_hfi1_rsp_rcv_tid_read_req(qp, psn);
+
+ if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
+ rvt_comm_est(qp);
+
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+ goto nack_inv;
+
+ reth = &ohdr->u.tid_rdma.r_req.reth;
+ vaddr = be64_to_cpu(reth->vaddr);
+ len = be32_to_cpu(reth->length);
+ /* The length needs to be in multiples of PAGE_SIZE */
+ if (!len || len & ~PAGE_MASK || len > qpriv->tid_rdma.local.max_len)
+ goto nack_inv;
+
+ diff = delta_psn(psn, qp->r_psn);
+ if (unlikely(diff)) {
+ tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
+ return;
+ }
+
+ /* We've verified the request, insert it into the ack queue. */
+ next = qp->r_head_ack_queue + 1;
+ if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
+ next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (unlikely(next == qp->s_tail_ack_queue)) {
+ if (!qp->s_ack_queue[next].sent) {
+ nack_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
+ goto nack_inv_unlock;
+ }
+ update_ack_queue(qp, next);
+ }
+ e = &qp->s_ack_queue[qp->r_head_ack_queue];
+ release_rdma_sge_mr(e);
+
+ rkey = be32_to_cpu(reth->rkey);
+ qp->r_len = len;
+
+ if (unlikely(!rvt_rkey_ok(qp, &e->rdma_sge, qp->r_len, vaddr,
+ rkey, IB_ACCESS_REMOTE_READ)))
+ goto nack_acc;
+
+ /* Accept the request parameters */
+ if (tid_rdma_rcv_read_request(qp, e, packet, ohdr, bth0, psn, vaddr,
+ len))
+ goto nack_inv_unlock;
+
+ qp->r_state = e->opcode;
+ qp->r_nak_state = 0;
+ /*
+ * We need to increment the MSN here instead of when we
+ * finish sending the result since a duplicate request would
+ * increment it more than once.
+ */
+ qp->r_msn++;
+ qp->r_psn += e->lpsn - e->psn + 1;
+
+ qp->r_head_ack_queue = next;
+
+ /*
+ * For all requests other than TID WRITE which are added to the ack
+ * queue, qpriv->r_tid_alloc follows qp->r_head_ack_queue. It is ok to
+ * do this because of interlocks between these and TID WRITE
+ * requests. The same change has also been made in hfi1_rc_rcv().
+ */
+ qpriv->r_tid_alloc = qp->r_head_ack_queue;
+
+ /* Schedule the send tasklet. */
+ qp->s_flags |= RVT_S_RESP_PENDING;
+ if (fecn)
+ qp->s_flags |= RVT_S_ECN;
+ hfi1_schedule_send(qp);
+
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return;
+
+nack_inv_unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+nack_inv:
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ qp->r_nak_state = nack_state;
+ qp->r_ack_psn = qp->r_psn;
+ /* Queue NAK for later */
+ rc_defered_ack(rcd, qp);
+ return;
+nack_acc:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
+ qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
+ qp->r_ack_psn = qp->r_psn;
+}
+
+u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
+ struct ib_other_headers *ohdr, u32 *bth0,
+ u32 *bth1, u32 *bth2, u32 *len, bool *last)
+{
+ struct hfi1_ack_priv *epriv = e->priv;
+ struct tid_rdma_request *req = &epriv->tid_req;
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
+ u32 tidentry = flow->tid_entry[flow->tid_idx];
+ u32 tidlen = EXP_TID_GET(tidentry, LEN) << PAGE_SHIFT;
+ struct tid_rdma_read_resp *resp = &ohdr->u.tid_rdma.r_rsp;
+ u32 next_offset, om = KDETH_OM_LARGE;
+ bool last_pkt;
+ u32 hdwords = 0;
+ struct tid_rdma_params *remote;
+
+ *len = min_t(u32, qp->pmtu, tidlen - flow->tid_offset);
+ flow->sent += *len;
+ next_offset = flow->tid_offset + *len;
+ last_pkt = (flow->sent >= flow->length);
+
+ trace_hfi1_tid_entry_build_read_resp(qp, flow->tid_idx, tidentry);
+ trace_hfi1_tid_flow_build_read_resp(qp, req->clear_tail, flow);
+
+ rcu_read_lock();
+ remote = rcu_dereference(qpriv->tid_rdma.remote);
+ if (!remote) {
+ rcu_read_unlock();
+ goto done;
+ }
+ KDETH_RESET(resp->kdeth0, KVER, 0x1);
+ KDETH_SET(resp->kdeth0, SH, !last_pkt);
+ KDETH_SET(resp->kdeth0, INTR, !!(!last_pkt && remote->urg));
+ KDETH_SET(resp->kdeth0, TIDCTRL, EXP_TID_GET(tidentry, CTRL));
+ KDETH_SET(resp->kdeth0, TID, EXP_TID_GET(tidentry, IDX));
+ KDETH_SET(resp->kdeth0, OM, om == KDETH_OM_LARGE);
+ KDETH_SET(resp->kdeth0, OFFSET, flow->tid_offset / om);
+ KDETH_RESET(resp->kdeth1, JKEY, remote->jkey);
+ resp->verbs_qp = cpu_to_be32(qp->remote_qpn);
+ rcu_read_unlock();
+
+ resp->aeth = rvt_compute_aeth(qp);
+ resp->verbs_psn = cpu_to_be32(mask_psn(flow->flow_state.ib_spsn +
+ flow->pkt));
+
+ *bth0 = TID_OP(READ_RESP) << 24;
+ *bth1 = flow->tid_qpn;
+ *bth2 = mask_psn(((flow->flow_state.spsn + flow->pkt++) &
+ HFI1_KDETH_BTH_SEQ_MASK) |
+ (flow->flow_state.generation <<
+ HFI1_KDETH_BTH_SEQ_SHIFT));
+ *last = last_pkt;
+ if (last_pkt)
+ /* Advance to next flow */
+ req->clear_tail = (req->clear_tail + 1) &
+ (MAX_FLOWS - 1);
+
+ if (next_offset >= tidlen) {
+ flow->tid_offset = 0;
+ flow->tid_idx++;
+ } else {
+ flow->tid_offset = next_offset;
+ }
+
+ hdwords = sizeof(ohdr->u.tid_rdma.r_rsp) / sizeof(u32);
+
+done:
+ return hdwords;
+}
+
+static inline struct tid_rdma_request *
+find_tid_request(struct rvt_qp *qp, u32 psn, enum ib_wr_opcode opcode)
+ __must_hold(&qp->s_lock)
+{
+ struct rvt_swqe *wqe;
+ struct tid_rdma_request *req = NULL;
+ u32 i, end;
+
+ end = qp->s_cur + 1;
+ if (end == qp->s_size)
+ end = 0;
+ for (i = qp->s_acked; i != end;) {
+ wqe = rvt_get_swqe_ptr(qp, i);
+ if (cmp_psn(psn, wqe->psn) >= 0 &&
+ cmp_psn(psn, wqe->lpsn) <= 0) {
+ if (wqe->wr.opcode == opcode)
+ req = wqe_to_tid_req(wqe);
+ break;
+ }
+ if (++i == qp->s_size)
+ i = 0;
+ }
+
+ return req;
+}
+
+void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
+{
+ /* HANDLER FOR TID RDMA READ RESPONSE packet (Requester side) */
+
+ /*
+ * 1. Find matching SWQE
+ * 2. Check that the entire segment has been read.
+ * 3. Remove HFI1_S_WAIT_TID_RESP from s_flags.
+ * 4. Free the TID flow resources.
+ * 5. Kick the send engine (hfi1_schedule_send())
+ */
+ struct ib_other_headers *ohdr = packet->ohdr;
+ struct rvt_qp *qp = packet->qp;
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ struct tid_rdma_request *req;
+ struct tid_rdma_flow *flow;
+ u32 opcode, aeth;
+ bool fecn;
+ unsigned long flags;
+ u32 kpsn, ipsn;
+
+ trace_hfi1_sender_rcv_tid_read_resp(qp);
+ fecn = process_ecn(qp, packet);
+ kpsn = mask_psn(be32_to_cpu(ohdr->bth[2]));
+ aeth = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.aeth);
+ opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ ipsn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn));
+ req = find_tid_request(qp, ipsn, IB_WR_TID_RDMA_READ);
+ if (unlikely(!req))
+ goto ack_op_err;
+
+ flow = &req->flows[req->clear_tail];
+ /* When header suppression is disabled */
+ if (cmp_psn(ipsn, flow->flow_state.ib_lpsn)) {
+ update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
+
+ if (cmp_psn(kpsn, flow->flow_state.r_next_psn))
+ goto ack_done;
+ flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
+ /*
+ * Copy the payload to destination buffer if this packet is
+ * delivered as an eager packet due to RSM rule and FECN.
+ * The RSM rule selects FECN bit in BTH and SH bit in
+ * KDETH header and therefore will not match the last
+ * packet of each segment that has SH bit cleared.
+ */
+ if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
+ struct rvt_sge_state ss;
+ u32 len;
+ u32 tlen = packet->tlen;
+ u16 hdrsize = packet->hlen;
+ u8 pad = packet->pad;
+ u8 extra_bytes = pad + packet->extra_byte +
+ (SIZE_OF_CRC << 2);
+ u32 pmtu = qp->pmtu;
+
+ if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
+ goto ack_op_err;
+ len = restart_sge(&ss, req->e.swqe, ipsn, pmtu);
+ if (unlikely(len < pmtu))
+ goto ack_op_err;
+ rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
+ false);
+ /* Raise the sw sequence check flag for next packet */
+ priv->s_flags |= HFI1_R_TID_SW_PSN;
+ }
+
+ goto ack_done;
+ }
+ flow->flow_state.r_next_psn = mask_psn(kpsn + 1);
+ req->ack_pending--;
+ priv->pending_tid_r_segs--;
+ qp->s_num_rd_atomic--;
+ if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
+ !qp->s_num_rd_atomic) {
+ qp->s_flags &= ~(RVT_S_WAIT_FENCE |
+ RVT_S_WAIT_ACK);
+ hfi1_schedule_send(qp);
+ }
+ if (qp->s_flags & RVT_S_WAIT_RDMAR) {
+ qp->s_flags &= ~(RVT_S_WAIT_RDMAR | RVT_S_WAIT_ACK);
+ hfi1_schedule_send(qp);
+ }
+
+ trace_hfi1_ack(qp, ipsn);
+ trace_hfi1_tid_req_rcv_read_resp(qp, 0, req->e.swqe->wr.opcode,
+ req->e.swqe->psn, req->e.swqe->lpsn,
+ req);
+ trace_hfi1_tid_flow_rcv_read_resp(qp, req->clear_tail, flow);
+
+ /* Release the tid resources */
+ hfi1_kern_exp_rcv_clear(req);
+
+ if (!do_rc_ack(qp, aeth, ipsn, opcode, 0, rcd))
+ goto ack_done;
+
+ /* If not done yet, build next read request */
+ if (++req->comp_seg >= req->total_segs) {
+ priv->tid_r_comp++;
+ req->state = TID_REQUEST_COMPLETE;
+ }
+
+ /*
+ * Clear the hw flow under two conditions:
+ * 1. This request is a sync point and it is complete;
+ * 2. Current request is completed and there are no more requests.
+ */
+ if ((req->state == TID_REQUEST_SYNC &&
+ req->comp_seg == req->cur_seg) ||
+ priv->tid_r_comp == priv->tid_r_reqs) {
+ hfi1_kern_clear_hw_flow(priv->rcd, qp);
+ priv->s_flags &= ~HFI1_R_TID_SW_PSN;
+ if (req->state == TID_REQUEST_SYNC)
+ req->state = TID_REQUEST_ACTIVE;
+ }
+
+ hfi1_schedule_send(qp);
+ goto ack_done;
+
+ack_op_err:
+ /*
+ * The test indicates that the send engine has finished its cleanup
+ * after sending the request and it's now safe to put the QP into error
+ * state. However, if the wqe queue is empty (qp->s_acked == qp->s_tail
+ * == qp->s_head), it would be unsafe to complete the wqe pointed by
+ * qp->s_acked here. Putting the qp into error state will safely flush
+ * all remaining requests.
+ */
+ if (qp->s_last == qp->s_acked)
+ rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+
+ack_done:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+}
+
+void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp)
+ __must_hold(&qp->s_lock)
+{
+ u32 n = qp->s_acked;
+ struct rvt_swqe *wqe;
+ struct tid_rdma_request *req;
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ lockdep_assert_held(&qp->s_lock);
+ /* Free any TID entries */
+ while (n != qp->s_tail) {
+ wqe = rvt_get_swqe_ptr(qp, n);
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
+ req = wqe_to_tid_req(wqe);
+ hfi1_kern_exp_rcv_clear_all(req);
+ }
+
+ if (++n == qp->s_size)
+ n = 0;
+ }
+ /* Free flow */
+ hfi1_kern_clear_hw_flow(priv->rcd, qp);
+}
+
+static bool tid_rdma_tid_err(struct hfi1_packet *packet, u8 rcv_type)
+{
+ struct rvt_qp *qp = packet->qp;
+
+ if (rcv_type >= RHF_RCV_TYPE_IB)
+ goto done;
+
+ spin_lock(&qp->s_lock);
+
+ /*
+ * We've ran out of space in the eager buffer.
+ * Eagerly received KDETH packets which require space in the
+ * Eager buffer (packet that have payload) are TID RDMA WRITE
+ * response packets. In this case, we have to re-transmit the
+ * TID RDMA WRITE request.
+ */
+ if (rcv_type == RHF_RCV_TYPE_EAGER) {
+ hfi1_restart_rc(qp, qp->s_last_psn + 1, 1);
+ hfi1_schedule_send(qp);
+ }
+
+ /* Since no payload is delivered, just drop the packet */
+ spin_unlock(&qp->s_lock);
+done:
+ return true;
+}
+
+static void restart_tid_rdma_read_req(struct hfi1_ctxtdata *rcd,
+ struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+ struct tid_rdma_request *req;
+ struct tid_rdma_flow *flow;
+
+ /* Start from the right segment */
+ qp->r_flags |= RVT_R_RDMAR_SEQ;
+ req = wqe_to_tid_req(wqe);
+ flow = &req->flows[req->clear_tail];
+ hfi1_restart_rc(qp, flow->flow_state.ib_spsn, 0);
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= RVT_R_RSP_SEND;
+ rvt_get_qp(qp);
+ list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+ }
+}
+
+/*
+ * Handle the KDETH eflags for TID RDMA READ response.
+ *
+ * Return true if the last packet for a segment has been received and it is
+ * time to process the response normally; otherwise, return true.
+ *
+ * The caller must hold the packet->qp->r_lock and the rcu_read_lock.
+ */
+static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
+ struct hfi1_packet *packet, u8 rcv_type,
+ u8 rte, u32 psn, u32 ibpsn)
+ __must_hold(&packet->qp->r_lock) __must_hold(RCU)
+{
+ struct hfi1_pportdata *ppd = rcd->ppd;
+ struct hfi1_devdata *dd = ppd->dd;
+ struct hfi1_ibport *ibp;
+ struct rvt_swqe *wqe;
+ struct tid_rdma_request *req;
+ struct tid_rdma_flow *flow;
+ u32 ack_psn;
+ struct rvt_qp *qp = packet->qp;
+ struct hfi1_qp_priv *priv = qp->priv;
+ bool ret = true;
+ int diff = 0;
+ u32 fpsn;
+
+ lockdep_assert_held(&qp->r_lock);
+ trace_hfi1_rsp_read_kdeth_eflags(qp, ibpsn);
+ trace_hfi1_sender_read_kdeth_eflags(qp);
+ trace_hfi1_tid_read_sender_kdeth_eflags(qp, 0);
+ spin_lock(&qp->s_lock);
+ /* If the psn is out of valid range, drop the packet */
+ if (cmp_psn(ibpsn, qp->s_last_psn) < 0 ||
+ cmp_psn(ibpsn, qp->s_psn) > 0)
+ goto s_unlock;
+
+ /*
+ * Note that NAKs implicitly ACK outstanding SEND and RDMA write
+ * requests and implicitly NAK RDMA read and atomic requests issued
+ * before the NAK'ed request.
+ */
+ ack_psn = ibpsn - 1;
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
+ ibp = to_iport(qp->ibqp.device, qp->port_num);
+
+ /* Complete WQEs that the PSN finishes. */
+ while ((int)delta_psn(ack_psn, wqe->lpsn) >= 0) {
+ /*
+ * If this request is a RDMA read or atomic, and the NACK is
+ * for a later operation, this NACK NAKs the RDMA read or
+ * atomic.
+ */
+ if (wqe->wr.opcode == IB_WR_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+ /* Retry this request. */
+ if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
+ qp->r_flags |= RVT_R_RDMAR_SEQ;
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
+ restart_tid_rdma_read_req(rcd, qp,
+ wqe);
+ } else {
+ hfi1_restart_rc(qp, qp->s_last_psn + 1,
+ 0);
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= RVT_R_RSP_SEND;
+ rvt_get_qp(qp);
+ list_add_tail(/* wait */
+ &qp->rspwait,
+ &rcd->qp_wait_list);
+ }
+ }
+ }
+ /*
+ * No need to process the NAK since we are
+ * restarting an earlier request.
+ */
+ break;
+ }
+
+ wqe = do_rc_completion(qp, wqe, ibp);
+ if (qp->s_acked == qp->s_tail)
+ goto s_unlock;
+ }
+
+ if (qp->s_acked == qp->s_tail)
+ goto s_unlock;
+
+ /* Handle the eflags for the request */
+ if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
+ goto s_unlock;
+
+ req = wqe_to_tid_req(wqe);
+ trace_hfi1_tid_req_read_kdeth_eflags(qp, 0, wqe->wr.opcode, wqe->psn,
+ wqe->lpsn, req);
+ switch (rcv_type) {
+ case RHF_RCV_TYPE_EXPECTED:
+ switch (rte) {
+ case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
+ /*
+ * On the first occurrence of a Flow Sequence error,
+ * the flag TID_FLOW_SW_PSN is set.
+ *
+ * After that, the flow is *not* reprogrammed and the
+ * protocol falls back to SW PSN checking. This is done
+ * to prevent continuous Flow Sequence errors for any
+ * packets that could be still in the fabric.
+ */
+ flow = &req->flows[req->clear_tail];
+ trace_hfi1_tid_flow_read_kdeth_eflags(qp,
+ req->clear_tail,
+ flow);
+ if (priv->s_flags & HFI1_R_TID_SW_PSN) {
+ diff = cmp_psn(psn,
+ flow->flow_state.r_next_psn);
+ if (diff > 0) {
+ /* Drop the packet.*/
+ goto s_unlock;
+ } else if (diff < 0) {
+ /*
+ * If a response packet for a restarted
+ * request has come back, reset the
+ * restart flag.
+ */
+ if (qp->r_flags & RVT_R_RDMAR_SEQ)
+ qp->r_flags &=
+ ~RVT_R_RDMAR_SEQ;
+
+ /* Drop the packet.*/
+ goto s_unlock;
+ }
+
+ /*
+ * If SW PSN verification is successful and
+ * this is the last packet in the segment, tell
+ * the caller to process it as a normal packet.
+ */
+ fpsn = full_flow_psn(flow,
+ flow->flow_state.lpsn);
+ if (cmp_psn(fpsn, psn) == 0) {
+ ret = false;
+ if (qp->r_flags & RVT_R_RDMAR_SEQ)
+ qp->r_flags &=
+ ~RVT_R_RDMAR_SEQ;
+ }
+ flow->flow_state.r_next_psn =
+ mask_psn(psn + 1);
+ } else {
+ u32 last_psn;
+
+ last_psn = read_r_next_psn(dd, rcd->ctxt,
+ flow->idx);
+ flow->flow_state.r_next_psn = last_psn;
+ priv->s_flags |= HFI1_R_TID_SW_PSN;
+ /*
+ * If no request has been restarted yet,
+ * restart the current one.
+ */
+ if (!(qp->r_flags & RVT_R_RDMAR_SEQ))
+ restart_tid_rdma_read_req(rcd, qp,
+ wqe);
+ }
+
+ break;
+
+ case RHF_RTE_EXPECTED_FLOW_GEN_ERR:
+ /*
+ * Since the TID flow is able to ride through
+ * generation mismatch, drop this stale packet.
+ */
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case RHF_RCV_TYPE_ERROR:
+ switch (rte) {
+ case RHF_RTE_ERROR_OP_CODE_ERR:
+ case RHF_RTE_ERROR_KHDR_MIN_LEN_ERR:
+ case RHF_RTE_ERROR_KHDR_HCRC_ERR:
+ case RHF_RTE_ERROR_KHDR_KVER_ERR:
+ case RHF_RTE_ERROR_CONTEXT_ERR:
+ case RHF_RTE_ERROR_KHDR_TID_ERR:
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+s_unlock:
+ spin_unlock(&qp->s_lock);
+ return ret;
+}
+
+bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
+ struct hfi1_pportdata *ppd,
+ struct hfi1_packet *packet)
+{
+ struct hfi1_ibport *ibp = &ppd->ibport_data;
+ struct hfi1_devdata *dd = ppd->dd;
+ struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+ u8 rcv_type = rhf_rcv_type(packet->rhf);
+ u8 rte = rhf_rcv_type_err(packet->rhf);
+ struct ib_header *hdr = packet->hdr;
+ struct ib_other_headers *ohdr = NULL;
+ int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ u16 lid = be16_to_cpu(hdr->lrh[1]);
+ u8 opcode;
+ u32 qp_num, psn, ibpsn;
+ struct rvt_qp *qp;
+ struct hfi1_qp_priv *qpriv;
+ unsigned long flags;
+ bool ret = true;
+ struct rvt_ack_entry *e;
+ struct tid_rdma_request *req;
+ struct tid_rdma_flow *flow;
+ int diff = 0;
+
+ trace_hfi1_msg_handle_kdeth_eflags(NULL, "Kdeth error: rhf ",
+ packet->rhf);
+ if (packet->rhf & RHF_ICRC_ERR)
+ return ret;
+
+ packet->ohdr = &hdr->u.oth;
+ ohdr = packet->ohdr;
+ trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
+
+ /* Get the destination QP number. */
+ qp_num = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_qp) &
+ RVT_QPN_MASK;
+ if (lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
+ goto drop;
+
+ psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
+ opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
+
+ rcu_read_lock();
+ qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
+ if (!qp)
+ goto rcu_unlock;
+
+ packet->qp = qp;
+
+ /* Check for valid receive state. */
+ spin_lock_irqsave(&qp->r_lock, flags);
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+ ibp->rvp.n_pkt_drops++;
+ goto r_unlock;
+ }
+
+ if (packet->rhf & RHF_TID_ERR) {
+ /* For TIDERR and RC QPs preemptively schedule a NAK */
+ u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
+
+ /* Sanity check packet */
+ if (tlen < 24)
+ goto r_unlock;
+
+ /*
+ * Check for GRH. We should never get packets with GRH in this
+ * path.
+ */
+ if (lnh == HFI1_LRH_GRH)
+ goto r_unlock;
+
+ if (tid_rdma_tid_err(packet, rcv_type))
+ goto r_unlock;
+ }
+
+ /* handle TID RDMA READ */
+ if (opcode == TID_OP(READ_RESP)) {
+ ibpsn = be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn);
+ ibpsn = mask_psn(ibpsn);
+ ret = handle_read_kdeth_eflags(rcd, packet, rcv_type, rte, psn,
+ ibpsn);
+ goto r_unlock;
+ }
+
+ /*
+ * qp->s_tail_ack_queue points to the rvt_ack_entry currently being
+ * processed. These a completed sequentially so we can be sure that
+ * the pointer will not change until the entire request has completed.
+ */
+ spin_lock(&qp->s_lock);
+ qpriv = qp->priv;
+ if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID ||
+ qpriv->r_tid_tail == qpriv->r_tid_head)
+ goto unlock;
+ e = &qp->s_ack_queue[qpriv->r_tid_tail];
+ if (e->opcode != TID_OP(WRITE_REQ))
+ goto unlock;
+ req = ack_to_tid_req(e);
+ if (req->comp_seg == req->cur_seg)
+ goto unlock;
+ flow = &req->flows[req->clear_tail];
+ trace_hfi1_eflags_err_write(qp, rcv_type, rte, psn);
+ trace_hfi1_rsp_handle_kdeth_eflags(qp, psn);
+ trace_hfi1_tid_write_rsp_handle_kdeth_eflags(qp);
+ trace_hfi1_tid_req_handle_kdeth_eflags(qp, 0, e->opcode, e->psn,
+ e->lpsn, req);
+ trace_hfi1_tid_flow_handle_kdeth_eflags(qp, req->clear_tail, flow);
+
+ switch (rcv_type) {
+ case RHF_RCV_TYPE_EXPECTED:
+ switch (rte) {
+ case RHF_RTE_EXPECTED_FLOW_SEQ_ERR:
+ if (!(qpriv->s_flags & HFI1_R_TID_SW_PSN)) {
+ qpriv->s_flags |= HFI1_R_TID_SW_PSN;
+ flow->flow_state.r_next_psn =
+ read_r_next_psn(dd, rcd->ctxt,
+ flow->idx);
+ qpriv->r_next_psn_kdeth =
+ flow->flow_state.r_next_psn;
+ goto nak_psn;
+ } else {
+ /*
+ * If the received PSN does not match the next
+ * expected PSN, NAK the packet.
+ * However, only do that if we know that the a
+ * NAK has already been sent. Otherwise, this
+ * mismatch could be due to packets that were
+ * already in flight.
+ */
+ diff = cmp_psn(psn,
+ flow->flow_state.r_next_psn);
+ if (diff > 0)
+ goto nak_psn;
+ else if (diff < 0)
+ break;
+
+ qpriv->s_nak_state = 0;
+ /*
+ * If SW PSN verification is successful and this
+ * is the last packet in the segment, tell the
+ * caller to process it as a normal packet.
+ */
+ if (psn == full_flow_psn(flow,
+ flow->flow_state.lpsn))
+ ret = false;
+ flow->flow_state.r_next_psn =
+ mask_psn(psn + 1);
+ qpriv->r_next_psn_kdeth =
+ flow->flow_state.r_next_psn;
+ }
+ break;
+
+ case RHF_RTE_EXPECTED_FLOW_GEN_ERR:
+ goto nak_psn;
+
+ default:
+ break;
+ }
+ break;
+
+ case RHF_RCV_TYPE_ERROR:
+ switch (rte) {
+ case RHF_RTE_ERROR_OP_CODE_ERR:
+ case RHF_RTE_ERROR_KHDR_MIN_LEN_ERR:
+ case RHF_RTE_ERROR_KHDR_HCRC_ERR:
+ case RHF_RTE_ERROR_KHDR_KVER_ERR:
+ case RHF_RTE_ERROR_CONTEXT_ERR:
+ case RHF_RTE_ERROR_KHDR_TID_ERR:
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+unlock:
+ spin_unlock(&qp->s_lock);
+r_unlock:
+ spin_unlock_irqrestore(&qp->r_lock, flags);
+rcu_unlock:
+ rcu_read_unlock();
+drop:
+ return ret;
+nak_psn:
+ ibp->rvp.n_rc_seqnak++;
+ if (!qpriv->s_nak_state) {
+ qpriv->s_nak_state = IB_NAK_PSN_ERROR;
+ /* We are NAK'ing the next expected PSN */
+ qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
+ tid_rdma_trigger_ack(qp);
+ }
+ goto unlock;
+}
+
+/*
+ * "Rewind" the TID request information.
+ * This means that we reset the state back to ACTIVE,
+ * find the proper flow, set the flow index to that flow,
+ * and reset the flow information.
+ */
+void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ u32 *bth2)
+{
+ struct tid_rdma_request *req = wqe_to_tid_req(wqe);
+ struct tid_rdma_flow *flow;
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ int diff, delta_pkts;
+ u32 tididx = 0, i;
+ u16 fidx;
+
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
+ *bth2 = mask_psn(qp->s_psn);
+ flow = find_flow_ib(req, *bth2, &fidx);
+ if (!flow) {
+ trace_hfi1_msg_tid_restart_req(/* msg */
+ qp, "!!!!!! Could not find flow to restart: bth2 ",
+ (u64)*bth2);
+ trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode,
+ wqe->psn, wqe->lpsn,
+ req);
+ return;
+ }
+ } else {
+ fidx = req->acked_tail;
+ flow = &req->flows[fidx];
+ *bth2 = mask_psn(req->r_ack_psn);
+ }
+
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
+ delta_pkts = delta_psn(*bth2, flow->flow_state.ib_spsn);
+ else
+ delta_pkts = delta_psn(*bth2,
+ full_flow_psn(flow,
+ flow->flow_state.spsn));
+
+ trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
+ diff = delta_pkts + flow->resync_npkts;
+
+ flow->sent = 0;
+ flow->pkt = 0;
+ flow->tid_idx = 0;
+ flow->tid_offset = 0;
+ if (diff) {
+ for (tididx = 0; tididx < flow->tidcnt; tididx++) {
+ u32 tidentry = flow->tid_entry[tididx], tidlen,
+ tidnpkts, npkts;
+
+ flow->tid_offset = 0;
+ tidlen = EXP_TID_GET(tidentry, LEN) * PAGE_SIZE;
+ tidnpkts = rvt_div_round_up_mtu(qp, tidlen);
+ npkts = min_t(u32, diff, tidnpkts);
+ flow->pkt += npkts;
+ flow->sent += (npkts == tidnpkts ? tidlen :
+ npkts * qp->pmtu);
+ flow->tid_offset += npkts * qp->pmtu;
+ diff -= npkts;
+ if (!diff)
+ break;
+ }
+ }
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
+ rvt_skip_sge(&qpriv->tid_ss, (req->cur_seg * req->seg_len) +
+ flow->sent, 0);
+ /*
+ * Packet PSN is based on flow_state.spsn + flow->pkt. However,
+ * during a RESYNC, the generation is incremented and the
+ * sequence is reset to 0. Since we've adjusted the npkts in the
+ * flow and the SGE has been sufficiently advanced, we have to
+ * adjust flow->pkt in order to calculate the correct PSN.
+ */
+ flow->pkt -= flow->resync_npkts;
+ }
+
+ if (flow->tid_offset ==
+ EXP_TID_GET(flow->tid_entry[tididx], LEN) * PAGE_SIZE) {
+ tididx++;
+ flow->tid_offset = 0;
+ }
+ flow->tid_idx = tididx;
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
+ /* Move flow_idx to correct index */
+ req->flow_idx = fidx;
+ else
+ req->clear_tail = fidx;
+
+ trace_hfi1_tid_flow_restart_req(qp, fidx, flow);
+ trace_hfi1_tid_req_restart_req(qp, 0, wqe->wr.opcode, wqe->psn,
+ wqe->lpsn, req);
+ req->state = TID_REQUEST_ACTIVE;
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
+ /* Reset all the flows that we are going to resend */
+ fidx = CIRC_NEXT(fidx, MAX_FLOWS);
+ i = qpriv->s_tid_tail;
+ do {
+ for (; CIRC_CNT(req->setup_head, fidx, MAX_FLOWS);
+ fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
+ req->flows[fidx].sent = 0;
+ req->flows[fidx].pkt = 0;
+ req->flows[fidx].tid_idx = 0;
+ req->flows[fidx].tid_offset = 0;
+ req->flows[fidx].resync_npkts = 0;
+ }
+ if (i == qpriv->s_tid_cur)
+ break;
+ do {
+ i = (++i == qp->s_size ? 0 : i);
+ wqe = rvt_get_swqe_ptr(qp, i);
+ } while (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE);
+ req = wqe_to_tid_req(wqe);
+ req->cur_seg = req->ack_seg;
+ fidx = req->acked_tail;
+ /* Pull req->clear_tail back */
+ req->clear_tail = fidx;
+ } while (1);
+ }
+}
+
+void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp)
+{
+ int i, ret;
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct tid_flow_state *fs;
+
+ if (qp->ibqp.qp_type != IB_QPT_RC || !HFI1_CAP_IS_KSET(TID_RDMA))
+ return;
+
+ /*
+ * First, clear the flow to help prevent any delayed packets from
+ * being delivered.
+ */
+ fs = &qpriv->flow_state;
+ if (fs->index != RXE_NUM_TID_FLOWS)
+ hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
+
+ for (i = qp->s_acked; i != qp->s_head;) {
+ struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
+
+ if (++i == qp->s_size)
+ i = 0;
+ /* Free only locally allocated TID entries */
+ if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
+ continue;
+ do {
+ struct hfi1_swqe_priv *priv = wqe->priv;
+
+ ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
+ } while (!ret);
+ }
+ for (i = qp->s_acked_ack_queue; i != qp->r_head_ack_queue;) {
+ struct rvt_ack_entry *e = &qp->s_ack_queue[i];
+
+ if (++i == rvt_max_atomic(ib_to_rvt(qp->ibqp.device)))
+ i = 0;
+ /* Free only locally allocated TID entries */
+ if (e->opcode != TID_OP(WRITE_REQ))
+ continue;
+ do {
+ struct hfi1_ack_priv *priv = e->priv;
+
+ ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
+ } while (!ret);
+ }
+}
+
+bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+ struct rvt_swqe *prev;
+ struct hfi1_qp_priv *priv = qp->priv;
+ u32 s_prev;
+ struct tid_rdma_request *req;
+
+ s_prev = (qp->s_cur == 0 ? qp->s_size : qp->s_cur) - 1;
+ prev = rvt_get_swqe_ptr(qp, s_prev);
+
+ switch (wqe->wr.opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_SEND_WITH_INV:
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ switch (prev->wr.opcode) {
+ case IB_WR_TID_RDMA_WRITE:
+ req = wqe_to_tid_req(prev);
+ if (req->ack_seg != req->total_segs)
+ goto interlock;
+ break;
+ default:
+ break;
+ }
+ break;
+ case IB_WR_RDMA_READ:
+ if (prev->wr.opcode != IB_WR_TID_RDMA_WRITE)
+ break;
+ fallthrough;
+ case IB_WR_TID_RDMA_READ:
+ switch (prev->wr.opcode) {
+ case IB_WR_RDMA_READ:
+ if (qp->s_acked != qp->s_cur)
+ goto interlock;
+ break;
+ case IB_WR_TID_RDMA_WRITE:
+ req = wqe_to_tid_req(prev);
+ if (req->ack_seg != req->total_segs)
+ goto interlock;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ return false;
+
+interlock:
+ priv->s_flags |= HFI1_S_TID_WAIT_INTERLCK;
+ return true;
+}
+
+/* Does @sge meet the alignment requirements for tid rdma? */
+static inline bool hfi1_check_sge_align(struct rvt_qp *qp,
+ struct rvt_sge *sge, int num_sge)
+{
+ int i;
+
+ for (i = 0; i < num_sge; i++, sge++) {
+ trace_hfi1_sge_check_align(qp, i, sge);
+ if ((u64)sge->vaddr & ~PAGE_MASK ||
+ sge->sge_length & ~PAGE_MASK)
+ return false;
+ }
+ return true;
+}
+
+void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+ struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
+ struct hfi1_swqe_priv *priv = wqe->priv;
+ struct tid_rdma_params *remote;
+ enum ib_wr_opcode new_opcode;
+ bool do_tid_rdma = false;
+ struct hfi1_pportdata *ppd = qpriv->rcd->ppd;
+
+ if ((rdma_ah_get_dlid(&qp->remote_ah_attr) & ~((1 << ppd->lmc) - 1)) ==
+ ppd->lid)
+ return;
+ if (qpriv->hdr_type != HFI1_PKT_TYPE_9B)
+ return;
+
+ rcu_read_lock();
+ remote = rcu_dereference(qpriv->tid_rdma.remote);
+ /*
+ * If TID RDMA is disabled by the negotiation, don't
+ * use it.
+ */
+ if (!remote)
+ goto exit;
+
+ if (wqe->wr.opcode == IB_WR_RDMA_READ) {
+ if (hfi1_check_sge_align(qp, &wqe->sg_list[0],
+ wqe->wr.num_sge)) {
+ new_opcode = IB_WR_TID_RDMA_READ;
+ do_tid_rdma = true;
+ }
+ } else if (wqe->wr.opcode == IB_WR_RDMA_WRITE) {
+ /*
+ * TID RDMA is enabled for this RDMA WRITE request iff:
+ * 1. The remote address is page-aligned,
+ * 2. The length is larger than the minimum segment size,
+ * 3. The length is page-multiple.
+ */
+ if (!(wqe->rdma_wr.remote_addr & ~PAGE_MASK) &&
+ !(wqe->length & ~PAGE_MASK)) {
+ new_opcode = IB_WR_TID_RDMA_WRITE;
+ do_tid_rdma = true;
+ }
+ }
+
+ if (do_tid_rdma) {
+ if (hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req, GFP_ATOMIC))
+ goto exit;
+ wqe->wr.opcode = new_opcode;
+ priv->tid_req.seg_len =
+ min_t(u32, remote->max_len, wqe->length);
+ priv->tid_req.total_segs =
+ DIV_ROUND_UP(wqe->length, priv->tid_req.seg_len);
+ /* Compute the last PSN of the request */
+ wqe->lpsn = wqe->psn;
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_READ) {
+ priv->tid_req.n_flows = remote->max_read;
+ qpriv->tid_r_reqs++;
+ wqe->lpsn += rvt_div_round_up_mtu(qp, wqe->length) - 1;
+ } else {
+ wqe->lpsn += priv->tid_req.total_segs - 1;
+ atomic_inc(&qpriv->n_requests);
+ }
+
+ priv->tid_req.cur_seg = 0;
+ priv->tid_req.comp_seg = 0;
+ priv->tid_req.ack_seg = 0;
+ priv->tid_req.state = TID_REQUEST_INACTIVE;
+ /*
+ * Reset acked_tail.
+ * TID RDMA READ does not have ACKs so it does not
+ * update the pointer. We have to reset it so TID RDMA
+ * WRITE does not get confused.
+ */
+ priv->tid_req.acked_tail = priv->tid_req.setup_head;
+ trace_hfi1_tid_req_setup_tid_wqe(qp, 1, wqe->wr.opcode,
+ wqe->psn, wqe->lpsn,
+ &priv->tid_req);
+ }
+exit:
+ rcu_read_unlock();
+}
+
+/* TID RDMA WRITE functions */
+
+u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ struct ib_other_headers *ohdr,
+ u32 *bth1, u32 *bth2, u32 *len)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct tid_rdma_request *req = wqe_to_tid_req(wqe);
+ struct tid_rdma_params *remote;
+
+ rcu_read_lock();
+ remote = rcu_dereference(qpriv->tid_rdma.remote);
+ /*
+ * Set the number of flow to be used based on negotiated
+ * parameters.
+ */
+ req->n_flows = remote->max_write;
+ req->state = TID_REQUEST_ACTIVE;
+
+ KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth0, KVER, 0x1);
+ KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth1, JKEY, remote->jkey);
+ ohdr->u.tid_rdma.w_req.reth.vaddr =
+ cpu_to_be64(wqe->rdma_wr.remote_addr + (wqe->length - *len));
+ ohdr->u.tid_rdma.w_req.reth.rkey =
+ cpu_to_be32(wqe->rdma_wr.rkey);
+ ohdr->u.tid_rdma.w_req.reth.length = cpu_to_be32(*len);
+ ohdr->u.tid_rdma.w_req.verbs_qp = cpu_to_be32(qp->remote_qpn);
+ *bth1 &= ~RVT_QPN_MASK;
+ *bth1 |= remote->qp;
+ qp->s_state = TID_OP(WRITE_REQ);
+ qp->s_flags |= HFI1_S_WAIT_TID_RESP;
+ *bth2 |= IB_BTH_REQ_ACK;
+ *len = 0;
+
+ rcu_read_unlock();
+ return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
+}
+
+static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
+{
+ /*
+ * Heuristic for computing the RNR timeout when waiting on the flow
+ * queue. Rather than a computationaly expensive exact estimate of when
+ * a flow will be available, we assume that if a QP is at position N in
+ * the flow queue it has to wait approximately (N + 1) * (number of
+ * segments between two sync points). The rationale for this is that
+ * flows are released and recycled at each sync point.
+ */
+ return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
+}
+
+static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
+ struct tid_queue *queue)
+{
+ return qpriv->tid_enqueue - queue->dequeue;
+}
+
+/*
+ * @qp: points to rvt_qp context.
+ * @to_seg: desired RNR timeout in segments.
+ * Return: index of the next highest timeout in the ib_hfi1_rnr_table[]
+ */
+static u32 hfi1_compute_tid_rnr_timeout(struct rvt_qp *qp, u32 to_seg)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ u64 timeout;
+ u32 bytes_per_us;
+ u8 i;
+
+ bytes_per_us = active_egress_rate(qpriv->rcd->ppd) / 8;
+ timeout = (to_seg * TID_RDMA_MAX_SEGMENT_SIZE) / bytes_per_us;
+ /*
+ * Find the next highest value in the RNR table to the required
+ * timeout. This gives the responder some padding.
+ */
+ for (i = 1; i <= IB_AETH_CREDIT_MASK; i++)
+ if (rvt_rnr_tbl_to_usec(i) >= timeout)
+ return i;
+ return 0;
+}
+
+/*
+ * Central place for resource allocation at TID write responder,
+ * is called from write_req and write_data interrupt handlers as
+ * well as the send thread when a queued QP is scheduled for
+ * resource allocation.
+ *
+ * Iterates over (a) segments of a request and then (b) queued requests
+ * themselves to allocate resources for up to local->max_write
+ * segments across multiple requests. Stop allocating when we
+ * hit a sync point, resume allocating after data packets at
+ * sync point have been received.
+ *
+ * Resource allocation and sending of responses is decoupled. The
+ * request/segment which are being allocated and sent are as follows.
+ * Resources are allocated for:
+ * [request: qpriv->r_tid_alloc, segment: req->alloc_seg]
+ * The send thread sends:
+ * [request: qp->s_tail_ack_queue, segment:req->cur_seg]
+ */
+static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
+{
+ struct tid_rdma_request *req;
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct hfi1_ctxtdata *rcd = qpriv->rcd;
+ struct tid_rdma_params *local = &qpriv->tid_rdma.local;
+ struct rvt_ack_entry *e;
+ u32 npkts, to_seg;
+ bool last;
+ int ret = 0;
+
+ lockdep_assert_held(&qp->s_lock);
+
+ while (1) {
+ trace_hfi1_rsp_tid_write_alloc_res(qp, 0);
+ trace_hfi1_tid_write_rsp_alloc_res(qp);
+ /*
+ * Don't allocate more segments if a RNR NAK has already been
+ * scheduled to avoid messing up qp->r_psn: the RNR NAK will
+ * be sent only when all allocated segments have been sent.
+ * However, if more segments are allocated before that, TID RDMA
+ * WRITE RESP packets will be sent out for these new segments
+ * before the RNR NAK packet. When the requester receives the
+ * RNR NAK packet, it will restart with qp->s_last_psn + 1,
+ * which does not match qp->r_psn and will be dropped.
+ * Consequently, the requester will exhaust its retries and
+ * put the qp into error state.
+ */
+ if (qpriv->rnr_nak_state == TID_RNR_NAK_SEND)
+ break;
+
+ /* No requests left to process */
+ if (qpriv->r_tid_alloc == qpriv->r_tid_head) {
+ /* If all data has been received, clear the flow */
+ if (qpriv->flow_state.index < RXE_NUM_TID_FLOWS &&
+ !qpriv->alloc_w_segs) {
+ hfi1_kern_clear_hw_flow(rcd, qp);
+ qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
+ }
+ break;
+ }
+
+ e = &qp->s_ack_queue[qpriv->r_tid_alloc];
+ if (e->opcode != TID_OP(WRITE_REQ))
+ goto next_req;
+ req = ack_to_tid_req(e);
+ trace_hfi1_tid_req_write_alloc_res(qp, 0, e->opcode, e->psn,
+ e->lpsn, req);
+ /* Finished allocating for all segments of this request */
+ if (req->alloc_seg >= req->total_segs)
+ goto next_req;
+
+ /* Can allocate only a maximum of local->max_write for a QP */
+ if (qpriv->alloc_w_segs >= local->max_write)
+ break;
+
+ /* Don't allocate at a sync point with data packets pending */
+ if (qpriv->sync_pt && qpriv->alloc_w_segs)
+ break;
+
+ /* All data received at the sync point, continue */
+ if (qpriv->sync_pt && !qpriv->alloc_w_segs) {
+ hfi1_kern_clear_hw_flow(rcd, qp);
+ qpriv->sync_pt = false;
+ qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
+ }
+
+ /* Allocate flow if we don't have one */
+ if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
+ ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
+ if (ret) {
+ to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
+ position_in_queue(qpriv,
+ &rcd->flow_queue);
+ break;
+ }
+ }
+
+ npkts = rvt_div_round_up_mtu(qp, req->seg_len);
+
+ /*
+ * We are at a sync point if we run out of KDETH PSN space.
+ * Last PSN of every generation is reserved for RESYNC.
+ */
+ if (qpriv->flow_state.psn + npkts > MAX_TID_FLOW_PSN - 1) {
+ qpriv->sync_pt = true;
+ break;
+ }
+
+ /*
+ * If overtaking req->acked_tail, send an RNR NAK. Because the
+ * QP is not queued in this case, and the issue can only be
+ * caused by a delay in scheduling the second leg which we
+ * cannot estimate, we use a rather arbitrary RNR timeout of
+ * (MAX_FLOWS / 2) segments
+ */
+ if (!CIRC_SPACE(req->setup_head, req->acked_tail,
+ MAX_FLOWS)) {
+ ret = -EAGAIN;
+ to_seg = MAX_FLOWS >> 1;
+ tid_rdma_trigger_ack(qp);
+ break;
+ }
+
+ /* Try to allocate rcv array / TID entries */
+ ret = hfi1_kern_exp_rcv_setup(req, &req->ss, &last);
+ if (ret == -EAGAIN)
+ to_seg = position_in_queue(qpriv, &rcd->rarr_queue);
+ if (ret)
+ break;
+
+ qpriv->alloc_w_segs++;
+ req->alloc_seg++;
+ continue;
+next_req:
+ /* Begin processing the next request */
+ if (++qpriv->r_tid_alloc >
+ rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
+ qpriv->r_tid_alloc = 0;
+ }
+
+ /*
+ * Schedule an RNR NAK to be sent if (a) flow or rcv array allocation
+ * has failed (b) we are called from the rcv handler interrupt context
+ * (c) an RNR NAK has not already been scheduled
+ */
+ if (ret == -EAGAIN && intr_ctx && !qp->r_nak_state)
+ goto send_rnr_nak;
+
+ return;
+
+send_rnr_nak:
+ lockdep_assert_held(&qp->r_lock);
+
+ /* Set r_nak_state to prevent unrelated events from generating NAK's */
+ qp->r_nak_state = hfi1_compute_tid_rnr_timeout(qp, to_seg) | IB_RNR_NAK;
+
+ /* Pull back r_psn to the segment being RNR NAK'd */
+ qp->r_psn = e->psn + req->alloc_seg;
+ qp->r_ack_psn = qp->r_psn;
+ /*
+ * Pull back r_head_ack_queue to the ack entry following the request
+ * being RNR NAK'd. This allows resources to be allocated to the request
+ * if the queued QP is scheduled.
+ */
+ qp->r_head_ack_queue = qpriv->r_tid_alloc + 1;
+ if (qp->r_head_ack_queue > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
+ qp->r_head_ack_queue = 0;
+ qpriv->r_tid_head = qp->r_head_ack_queue;
+ /*
+ * These send side fields are used in make_rc_ack(). They are set in
+ * hfi1_send_rc_ack() but must be set here before dropping qp->s_lock
+ * for consistency
+ */
+ qp->s_nak_state = qp->r_nak_state;
+ qp->s_ack_psn = qp->r_ack_psn;
+ /*
+ * Clear the ACK PENDING flag to prevent unwanted ACK because we
+ * have modified qp->s_ack_psn here.
+ */
+ qp->s_flags &= ~(RVT_S_ACK_PENDING);
+
+ trace_hfi1_rsp_tid_write_alloc_res(qp, qp->r_psn);
+ /*
+ * qpriv->rnr_nak_state is used to determine when the scheduled RNR NAK
+ * has actually been sent. qp->s_flags RVT_S_ACK_PENDING bit cannot be
+ * used for this because qp->s_lock is dropped before calling
+ * hfi1_send_rc_ack() leading to inconsistency between the receive
+ * interrupt handlers and the send thread in make_rc_ack()
+ */
+ qpriv->rnr_nak_state = TID_RNR_NAK_SEND;
+
+ /*
+ * Schedule RNR NAK to be sent. RNR NAK's are scheduled from the receive
+ * interrupt handlers but will be sent from the send engine behind any
+ * previous responses that may have been scheduled
+ */
+ rc_defered_ack(rcd, qp);
+}
+
+void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
+{
+ /* HANDLER FOR TID RDMA WRITE REQUEST packet (Responder side)*/
+
+ /*
+ * 1. Verify TID RDMA WRITE REQ as per IB_OPCODE_RC_RDMA_WRITE_FIRST
+ * (see hfi1_rc_rcv())
+ * - Don't allow 0-length requests.
+ * 2. Put TID RDMA WRITE REQ into the response queue (s_ack_queue)
+ * - Setup struct tid_rdma_req with request info
+ * - Prepare struct tid_rdma_flow array?
+ * 3. Set the qp->s_ack_state as state diagram in design doc.
+ * 4. Set RVT_S_RESP_PENDING in s_flags.
+ * 5. Kick the send engine (hfi1_schedule_send())
+ */
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ struct rvt_qp *qp = packet->qp;
+ struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct ib_other_headers *ohdr = packet->ohdr;
+ struct rvt_ack_entry *e;
+ unsigned long flags;
+ struct ib_reth *reth;
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct tid_rdma_request *req;
+ u32 bth0, psn, len, rkey, num_segs;
+ bool fecn;
+ u8 next;
+ u64 vaddr;
+ int diff;
+
+ bth0 = be32_to_cpu(ohdr->bth[0]);
+ if (hfi1_ruc_check_hdr(ibp, packet))
+ return;
+
+ fecn = process_ecn(qp, packet);
+ psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
+ trace_hfi1_rsp_rcv_tid_write_req(qp, psn);
+
+ if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
+ rvt_comm_est(qp);
+
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto nack_inv;
+
+ reth = &ohdr->u.tid_rdma.w_req.reth;
+ vaddr = be64_to_cpu(reth->vaddr);
+ len = be32_to_cpu(reth->length);
+
+ num_segs = DIV_ROUND_UP(len, qpriv->tid_rdma.local.max_len);
+ diff = delta_psn(psn, qp->r_psn);
+ if (unlikely(diff)) {
+ tid_rdma_rcv_err(packet, ohdr, qp, psn, diff, fecn);
+ return;
+ }
+
+ /*
+ * The resent request which was previously RNR NAK'd is inserted at the
+ * location of the original request, which is one entry behind
+ * r_head_ack_queue
+ */
+ if (qpriv->rnr_nak_state)
+ qp->r_head_ack_queue = qp->r_head_ack_queue ?
+ qp->r_head_ack_queue - 1 :
+ rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
+
+ /* We've verified the request, insert it into the ack queue. */
+ next = qp->r_head_ack_queue + 1;
+ if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
+ next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (unlikely(next == qp->s_acked_ack_queue)) {
+ if (!qp->s_ack_queue[next].sent)
+ goto nack_inv_unlock;
+ update_ack_queue(qp, next);
+ }
+ e = &qp->s_ack_queue[qp->r_head_ack_queue];
+ req = ack_to_tid_req(e);
+
+ /* Bring previously RNR NAK'd request back to life */
+ if (qpriv->rnr_nak_state) {
+ qp->r_nak_state = 0;
+ qp->s_nak_state = 0;
+ qpriv->rnr_nak_state = TID_RNR_NAK_INIT;
+ qp->r_psn = e->lpsn + 1;
+ req->state = TID_REQUEST_INIT;
+ goto update_head;
+ }
+
+ release_rdma_sge_mr(e);
+
+ /* The length needs to be in multiples of PAGE_SIZE */
+ if (!len || len & ~PAGE_MASK)
+ goto nack_inv_unlock;
+
+ rkey = be32_to_cpu(reth->rkey);
+ qp->r_len = len;
+
+ if (e->opcode == TID_OP(WRITE_REQ) &&
+ (req->setup_head != req->clear_tail ||
+ req->clear_tail != req->acked_tail))
+ goto nack_inv_unlock;
+
+ if (unlikely(!rvt_rkey_ok(qp, &e->rdma_sge, qp->r_len, vaddr,
+ rkey, IB_ACCESS_REMOTE_WRITE)))
+ goto nack_acc;
+
+ qp->r_psn += num_segs - 1;
+
+ e->opcode = (bth0 >> 24) & 0xff;
+ e->psn = psn;
+ e->lpsn = qp->r_psn;
+ e->sent = 0;
+
+ req->n_flows = min_t(u16, num_segs, qpriv->tid_rdma.local.max_write);
+ req->state = TID_REQUEST_INIT;
+ req->cur_seg = 0;
+ req->comp_seg = 0;
+ req->ack_seg = 0;
+ req->alloc_seg = 0;
+ req->isge = 0;
+ req->seg_len = qpriv->tid_rdma.local.max_len;
+ req->total_len = len;
+ req->total_segs = num_segs;
+ req->r_flow_psn = e->psn;
+ req->ss.sge = e->rdma_sge;
+ req->ss.num_sge = 1;
+
+ req->flow_idx = req->setup_head;
+ req->clear_tail = req->setup_head;
+ req->acked_tail = req->setup_head;
+
+ qp->r_state = e->opcode;
+ qp->r_nak_state = 0;
+ /*
+ * We need to increment the MSN here instead of when we
+ * finish sending the result since a duplicate request would
+ * increment it more than once.
+ */
+ qp->r_msn++;
+ qp->r_psn++;
+
+ trace_hfi1_tid_req_rcv_write_req(qp, 0, e->opcode, e->psn, e->lpsn,
+ req);
+
+ if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID) {
+ qpriv->r_tid_tail = qp->r_head_ack_queue;
+ } else if (qpriv->r_tid_tail == qpriv->r_tid_head) {
+ struct tid_rdma_request *ptr;
+
+ e = &qp->s_ack_queue[qpriv->r_tid_tail];
+ ptr = ack_to_tid_req(e);
+
+ if (e->opcode != TID_OP(WRITE_REQ) ||
+ ptr->comp_seg == ptr->total_segs) {
+ if (qpriv->r_tid_tail == qpriv->r_tid_ack)
+ qpriv->r_tid_ack = qp->r_head_ack_queue;
+ qpriv->r_tid_tail = qp->r_head_ack_queue;
+ }
+ }
+update_head:
+ qp->r_head_ack_queue = next;
+ qpriv->r_tid_head = qp->r_head_ack_queue;
+
+ hfi1_tid_write_alloc_resources(qp, true);
+ trace_hfi1_tid_write_rsp_rcv_req(qp);
+
+ /* Schedule the send tasklet. */
+ qp->s_flags |= RVT_S_RESP_PENDING;
+ if (fecn)
+ qp->s_flags |= RVT_S_ECN;
+ hfi1_schedule_send(qp);
+
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return;
+
+nack_inv_unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+nack_inv:
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ qp->r_nak_state = IB_NAK_INVALID_REQUEST;
+ qp->r_ack_psn = qp->r_psn;
+ /* Queue NAK for later */
+ rc_defered_ack(rcd, qp);
+ return;
+nack_acc:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
+ qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
+ qp->r_ack_psn = qp->r_psn;
+}
+
+u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
+ struct ib_other_headers *ohdr, u32 *bth1,
+ u32 bth2, u32 *len,
+ struct rvt_sge_state **ss)
+{
+ struct hfi1_ack_priv *epriv = e->priv;
+ struct tid_rdma_request *req = &epriv->tid_req;
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct tid_rdma_flow *flow = NULL;
+ u32 resp_len = 0, hdwords = 0;
+ void *resp_addr = NULL;
+ struct tid_rdma_params *remote;
+
+ trace_hfi1_tid_req_build_write_resp(qp, 0, e->opcode, e->psn, e->lpsn,
+ req);
+ trace_hfi1_tid_write_rsp_build_resp(qp);
+ trace_hfi1_rsp_build_tid_write_resp(qp, bth2);
+ flow = &req->flows[req->flow_idx];
+ switch (req->state) {
+ default:
+ /*
+ * Try to allocate resources here in case QP was queued and was
+ * later scheduled when resources became available
+ */
+ hfi1_tid_write_alloc_resources(qp, false);
+
+ /* We've already sent everything which is ready */
+ if (req->cur_seg >= req->alloc_seg)
+ goto done;
+
+ /*
+ * Resources can be assigned but responses cannot be sent in
+ * rnr_nak state, till the resent request is received
+ */
+ if (qpriv->rnr_nak_state == TID_RNR_NAK_SENT)
+ goto done;
+
+ req->state = TID_REQUEST_ACTIVE;
+ trace_hfi1_tid_flow_build_write_resp(qp, req->flow_idx, flow);
+ req->flow_idx = CIRC_NEXT(req->flow_idx, MAX_FLOWS);
+ hfi1_add_tid_reap_timer(qp);
+ break;
+
+ case TID_REQUEST_RESEND_ACTIVE:
+ case TID_REQUEST_RESEND:
+ trace_hfi1_tid_flow_build_write_resp(qp, req->flow_idx, flow);
+ req->flow_idx = CIRC_NEXT(req->flow_idx, MAX_FLOWS);
+ if (!CIRC_CNT(req->setup_head, req->flow_idx, MAX_FLOWS))
+ req->state = TID_REQUEST_ACTIVE;
+
+ hfi1_mod_tid_reap_timer(qp);
+ break;
+ }
+ flow->flow_state.resp_ib_psn = bth2;
+ resp_addr = (void *)flow->tid_entry;
+ resp_len = sizeof(*flow->tid_entry) * flow->tidcnt;
+ req->cur_seg++;
+
+ memset(&ohdr->u.tid_rdma.w_rsp, 0, sizeof(ohdr->u.tid_rdma.w_rsp));
+ epriv->ss.sge.vaddr = resp_addr;
+ epriv->ss.sge.sge_length = resp_len;
+ epriv->ss.sge.length = epriv->ss.sge.sge_length;
+ /*
+ * We can safely zero these out. Since the first SGE covers the
+ * entire packet, nothing else should even look at the MR.
+ */
+ epriv->ss.sge.mr = NULL;
+ epriv->ss.sge.m = 0;
+ epriv->ss.sge.n = 0;
+
+ epriv->ss.sg_list = NULL;
+ epriv->ss.total_len = epriv->ss.sge.sge_length;
+ epriv->ss.num_sge = 1;
+
+ *ss = &epriv->ss;
+ *len = epriv->ss.total_len;
+
+ /* Construct the TID RDMA WRITE RESP packet header */
+ rcu_read_lock();
+ remote = rcu_dereference(qpriv->tid_rdma.remote);
+
+ KDETH_RESET(ohdr->u.tid_rdma.w_rsp.kdeth0, KVER, 0x1);
+ KDETH_RESET(ohdr->u.tid_rdma.w_rsp.kdeth1, JKEY, remote->jkey);
+ ohdr->u.tid_rdma.w_rsp.aeth = rvt_compute_aeth(qp);
+ ohdr->u.tid_rdma.w_rsp.tid_flow_psn =
+ cpu_to_be32((flow->flow_state.generation <<
+ HFI1_KDETH_BTH_SEQ_SHIFT) |
+ (flow->flow_state.spsn &
+ HFI1_KDETH_BTH_SEQ_MASK));
+ ohdr->u.tid_rdma.w_rsp.tid_flow_qp =
+ cpu_to_be32(qpriv->tid_rdma.local.qp |
+ ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
+ TID_RDMA_DESTQP_FLOW_SHIFT) |
+ qpriv->rcd->ctxt);
+ ohdr->u.tid_rdma.w_rsp.verbs_qp = cpu_to_be32(qp->remote_qpn);
+ *bth1 = remote->qp;
+ rcu_read_unlock();
+ hdwords = sizeof(ohdr->u.tid_rdma.w_rsp) / sizeof(u32);
+ qpriv->pending_tid_w_segs++;
+done:
+ return hdwords;
+}
+
+static void hfi1_add_tid_reap_timer(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+
+ lockdep_assert_held(&qp->s_lock);
+ if (!(qpriv->s_flags & HFI1_R_TID_RSC_TIMER)) {
+ qpriv->s_flags |= HFI1_R_TID_RSC_TIMER;
+ qpriv->s_tid_timer.expires = jiffies +
+ qpriv->tid_timer_timeout_jiffies;
+ add_timer(&qpriv->s_tid_timer);
+ }
+}
+
+static void hfi1_mod_tid_reap_timer(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+
+ lockdep_assert_held(&qp->s_lock);
+ qpriv->s_flags |= HFI1_R_TID_RSC_TIMER;
+ mod_timer(&qpriv->s_tid_timer, jiffies +
+ qpriv->tid_timer_timeout_jiffies);
+}
+
+static int hfi1_stop_tid_reap_timer(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ int rval = 0;
+
+ lockdep_assert_held(&qp->s_lock);
+ if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
+ rval = timer_delete(&qpriv->s_tid_timer);
+ qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
+ }
+ return rval;
+}
+
+void hfi1_del_tid_reap_timer(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+
+ timer_delete_sync(&qpriv->s_tid_timer);
+ qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
+}
+
+static void hfi1_tid_timeout(struct timer_list *t)
+{
+ struct hfi1_qp_priv *qpriv = timer_container_of(qpriv, t, s_tid_timer);
+ struct rvt_qp *qp = qpriv->owner;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+ unsigned long flags;
+ u32 i;
+
+ spin_lock_irqsave(&qp->r_lock, flags);
+ spin_lock(&qp->s_lock);
+ if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
+ dd_dev_warn(dd_from_ibdev(qp->ibqp.device), "[QP%u] %s %d\n",
+ qp->ibqp.qp_num, __func__, __LINE__);
+ trace_hfi1_msg_tid_timeout(/* msg */
+ qp, "resource timeout = ",
+ (u64)qpriv->tid_timer_timeout_jiffies);
+ hfi1_stop_tid_reap_timer(qp);
+ /*
+ * Go though the entire ack queue and clear any outstanding
+ * HW flow and RcvArray resources.
+ */
+ hfi1_kern_clear_hw_flow(qpriv->rcd, qp);
+ for (i = 0; i < rvt_max_atomic(rdi); i++) {
+ struct tid_rdma_request *req =
+ ack_to_tid_req(&qp->s_ack_queue[i]);
+
+ hfi1_kern_exp_rcv_clear_all(req);
+ }
+ spin_unlock(&qp->s_lock);
+ if (qp->ibqp.event_handler) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_FATAL;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ rvt_rc_error(qp, IB_WC_RESP_TIMEOUT_ERR);
+ goto unlock_r_lock;
+ }
+ spin_unlock(&qp->s_lock);
+unlock_r_lock:
+ spin_unlock_irqrestore(&qp->r_lock, flags);
+}
+
+void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet)
+{
+ /* HANDLER FOR TID RDMA WRITE RESPONSE packet (Requester side) */
+
+ /*
+ * 1. Find matching SWQE
+ * 2. Check that TIDENTRY array has enough space for a complete
+ * segment. If not, put QP in error state.
+ * 3. Save response data in struct tid_rdma_req and struct tid_rdma_flow
+ * 4. Remove HFI1_S_WAIT_TID_RESP from s_flags.
+ * 5. Set qp->s_state
+ * 6. Kick the send engine (hfi1_schedule_send())
+ */
+ struct ib_other_headers *ohdr = packet->ohdr;
+ struct rvt_qp *qp = packet->qp;
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ struct rvt_swqe *wqe;
+ struct tid_rdma_request *req;
+ struct tid_rdma_flow *flow;
+ enum ib_wc_status status;
+ u32 opcode, aeth, psn, flow_psn, i, tidlen = 0, pktlen;
+ bool fecn;
+ unsigned long flags;
+
+ fecn = process_ecn(qp, packet);
+ psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
+ aeth = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.aeth);
+ opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Ignore invalid responses */
+ if (cmp_psn(psn, qp->s_next_psn) >= 0)
+ goto ack_done;
+
+ /* Ignore duplicate responses. */
+ if (unlikely(cmp_psn(psn, qp->s_last_psn) <= 0))
+ goto ack_done;
+
+ if (unlikely(qp->s_acked == qp->s_tail))
+ goto ack_done;
+
+ /*
+ * If we are waiting for a particular packet sequence number
+ * due to a request being resent, check for it. Otherwise,
+ * ensure that we haven't missed anything.
+ */
+ if (qp->r_flags & RVT_R_RDMAR_SEQ) {
+ if (cmp_psn(psn, qp->s_last_psn + 1) != 0)
+ goto ack_done;
+ qp->r_flags &= ~RVT_R_RDMAR_SEQ;
+ }
+
+ wqe = rvt_get_swqe_ptr(qp, qpriv->s_tid_cur);
+ if (unlikely(wqe->wr.opcode != IB_WR_TID_RDMA_WRITE))
+ goto ack_op_err;
+
+ req = wqe_to_tid_req(wqe);
+ /*
+ * If we've lost ACKs and our acked_tail pointer is too far
+ * behind, don't overwrite segments. Just drop the packet and
+ * let the reliability protocol take care of it.
+ */
+ if (!CIRC_SPACE(req->setup_head, req->acked_tail, MAX_FLOWS))
+ goto ack_done;
+
+ /*
+ * The call to do_rc_ack() should be last in the chain of
+ * packet checks because it will end up updating the QP state.
+ * Therefore, anything that would prevent the packet from
+ * being accepted as a successful response should be prior
+ * to it.
+ */
+ if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
+ goto ack_done;
+
+ trace_hfi1_ack(qp, psn);
+
+ flow = &req->flows[req->setup_head];
+ flow->pkt = 0;
+ flow->tid_idx = 0;
+ flow->tid_offset = 0;
+ flow->sent = 0;
+ flow->resync_npkts = 0;
+ flow->tid_qpn = be32_to_cpu(ohdr->u.tid_rdma.w_rsp.tid_flow_qp);
+ flow->idx = (flow->tid_qpn >> TID_RDMA_DESTQP_FLOW_SHIFT) &
+ TID_RDMA_DESTQP_FLOW_MASK;
+ flow_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.w_rsp.tid_flow_psn));
+ flow->flow_state.generation = flow_psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
+ flow->flow_state.spsn = flow_psn & HFI1_KDETH_BTH_SEQ_MASK;
+ flow->flow_state.resp_ib_psn = psn;
+ flow->length = min_t(u32, req->seg_len,
+ (wqe->length - (req->comp_seg * req->seg_len)));
+
+ flow->npkts = rvt_div_round_up_mtu(qp, flow->length);
+ flow->flow_state.lpsn = flow->flow_state.spsn +
+ flow->npkts - 1;
+ /* payload length = packet length - (header length + ICRC length) */
+ pktlen = packet->tlen - (packet->hlen + 4);
+ if (pktlen > sizeof(flow->tid_entry)) {
+ status = IB_WC_LOC_LEN_ERR;
+ goto ack_err;
+ }
+ memcpy(flow->tid_entry, packet->ebuf, pktlen);
+ flow->tidcnt = pktlen / sizeof(*flow->tid_entry);
+ trace_hfi1_tid_flow_rcv_write_resp(qp, req->setup_head, flow);
+
+ req->comp_seg++;
+ trace_hfi1_tid_write_sender_rcv_resp(qp, 0);
+ /*
+ * Walk the TID_ENTRY list to make sure we have enough space for a
+ * complete segment.
+ */
+ for (i = 0; i < flow->tidcnt; i++) {
+ trace_hfi1_tid_entry_rcv_write_resp(/* entry */
+ qp, i, flow->tid_entry[i]);
+ if (!EXP_TID_GET(flow->tid_entry[i], LEN)) {
+ status = IB_WC_LOC_LEN_ERR;
+ goto ack_err;
+ }
+ tidlen += EXP_TID_GET(flow->tid_entry[i], LEN);
+ }
+ if (tidlen * PAGE_SIZE < flow->length) {
+ status = IB_WC_LOC_LEN_ERR;
+ goto ack_err;
+ }
+
+ trace_hfi1_tid_req_rcv_write_resp(qp, 0, wqe->wr.opcode, wqe->psn,
+ wqe->lpsn, req);
+ /*
+ * If this is the first response for this request, set the initial
+ * flow index to the current flow.
+ */
+ if (!cmp_psn(psn, wqe->psn)) {
+ req->r_last_acked = mask_psn(wqe->psn - 1);
+ /* Set acked flow index to head index */
+ req->acked_tail = req->setup_head;
+ }
+
+ /* advance circular buffer head */
+ req->setup_head = CIRC_NEXT(req->setup_head, MAX_FLOWS);
+ req->state = TID_REQUEST_ACTIVE;
+
+ /*
+ * If all responses for this TID RDMA WRITE request have been received
+ * advance the pointer to the next one.
+ * Since TID RDMA requests could be mixed in with regular IB requests,
+ * they might not appear sequentially in the queue. Therefore, the
+ * next request needs to be "found".
+ */
+ if (qpriv->s_tid_cur != qpriv->s_tid_head &&
+ req->comp_seg == req->total_segs) {
+ for (i = qpriv->s_tid_cur + 1; ; i++) {
+ if (i == qp->s_size)
+ i = 0;
+ wqe = rvt_get_swqe_ptr(qp, i);
+ if (i == qpriv->s_tid_head)
+ break;
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
+ break;
+ }
+ qpriv->s_tid_cur = i;
+ }
+ qp->s_flags &= ~HFI1_S_WAIT_TID_RESP;
+ hfi1_schedule_tid_send(qp);
+ goto ack_done;
+
+ack_op_err:
+ status = IB_WC_LOC_QP_OP_ERR;
+ack_err:
+ rvt_error_qp(qp, status);
+ack_done:
+ if (fecn)
+ qp->s_flags |= RVT_S_ECN;
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+}
+
+bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
+ struct ib_other_headers *ohdr,
+ u32 *bth1, u32 *bth2, u32 *len)
+{
+ struct tid_rdma_request *req = wqe_to_tid_req(wqe);
+ struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
+ struct tid_rdma_params *remote;
+ struct rvt_qp *qp = req->qp;
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ u32 tidentry = flow->tid_entry[flow->tid_idx];
+ u32 tidlen = EXP_TID_GET(tidentry, LEN) << PAGE_SHIFT;
+ struct tid_rdma_write_data *wd = &ohdr->u.tid_rdma.w_data;
+ u32 next_offset, om = KDETH_OM_LARGE;
+ bool last_pkt;
+
+ if (!tidlen) {
+ hfi1_trdma_send_complete(qp, wqe, IB_WC_REM_INV_RD_REQ_ERR);
+ rvt_error_qp(qp, IB_WC_REM_INV_RD_REQ_ERR);
+ }
+
+ *len = min_t(u32, qp->pmtu, tidlen - flow->tid_offset);
+ flow->sent += *len;
+ next_offset = flow->tid_offset + *len;
+ last_pkt = (flow->tid_idx == (flow->tidcnt - 1) &&
+ next_offset >= tidlen) || (flow->sent >= flow->length);
+ trace_hfi1_tid_entry_build_write_data(qp, flow->tid_idx, tidentry);
+ trace_hfi1_tid_flow_build_write_data(qp, req->clear_tail, flow);
+
+ rcu_read_lock();
+ remote = rcu_dereference(qpriv->tid_rdma.remote);
+ KDETH_RESET(wd->kdeth0, KVER, 0x1);
+ KDETH_SET(wd->kdeth0, SH, !last_pkt);
+ KDETH_SET(wd->kdeth0, INTR, !!(!last_pkt && remote->urg));
+ KDETH_SET(wd->kdeth0, TIDCTRL, EXP_TID_GET(tidentry, CTRL));
+ KDETH_SET(wd->kdeth0, TID, EXP_TID_GET(tidentry, IDX));
+ KDETH_SET(wd->kdeth0, OM, om == KDETH_OM_LARGE);
+ KDETH_SET(wd->kdeth0, OFFSET, flow->tid_offset / om);
+ KDETH_RESET(wd->kdeth1, JKEY, remote->jkey);
+ wd->verbs_qp = cpu_to_be32(qp->remote_qpn);
+ rcu_read_unlock();
+
+ *bth1 = flow->tid_qpn;
+ *bth2 = mask_psn(((flow->flow_state.spsn + flow->pkt++) &
+ HFI1_KDETH_BTH_SEQ_MASK) |
+ (flow->flow_state.generation <<
+ HFI1_KDETH_BTH_SEQ_SHIFT));
+ if (last_pkt) {
+ /* PSNs are zero-based, so +1 to count number of packets */
+ if (flow->flow_state.lpsn + 1 +
+ rvt_div_round_up_mtu(qp, req->seg_len) >
+ MAX_TID_FLOW_PSN)
+ req->state = TID_REQUEST_SYNC;
+ *bth2 |= IB_BTH_REQ_ACK;
+ }
+
+ if (next_offset >= tidlen) {
+ flow->tid_offset = 0;
+ flow->tid_idx++;
+ } else {
+ flow->tid_offset = next_offset;
+ }
+ return last_pkt;
+}
+
+void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
+{
+ struct rvt_qp *qp = packet->qp;
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct hfi1_ctxtdata *rcd = priv->rcd;
+ struct ib_other_headers *ohdr = packet->ohdr;
+ struct rvt_ack_entry *e;
+ struct tid_rdma_request *req;
+ struct tid_rdma_flow *flow;
+ struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
+ unsigned long flags;
+ u32 psn, next;
+ u8 opcode;
+ bool fecn;
+
+ fecn = process_ecn(qp, packet);
+ psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
+ opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
+
+ /*
+ * All error handling should be done by now. If we are here, the packet
+ * is either good or been accepted by the error handler.
+ */
+ spin_lock_irqsave(&qp->s_lock, flags);
+ e = &qp->s_ack_queue[priv->r_tid_tail];
+ req = ack_to_tid_req(e);
+ flow = &req->flows[req->clear_tail];
+ if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.lpsn))) {
+ update_r_next_psn_fecn(packet, priv, rcd, flow, fecn);
+
+ if (cmp_psn(psn, flow->flow_state.r_next_psn))
+ goto send_nak;
+
+ flow->flow_state.r_next_psn = mask_psn(psn + 1);
+ /*
+ * Copy the payload to destination buffer if this packet is
+ * delivered as an eager packet due to RSM rule and FECN.
+ * The RSM rule selects FECN bit in BTH and SH bit in
+ * KDETH header and therefore will not match the last
+ * packet of each segment that has SH bit cleared.
+ */
+ if (fecn && packet->etype == RHF_RCV_TYPE_EAGER) {
+ struct rvt_sge_state ss;
+ u32 len;
+ u32 tlen = packet->tlen;
+ u16 hdrsize = packet->hlen;
+ u8 pad = packet->pad;
+ u8 extra_bytes = pad + packet->extra_byte +
+ (SIZE_OF_CRC << 2);
+ u32 pmtu = qp->pmtu;
+
+ if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
+ goto send_nak;
+ len = req->comp_seg * req->seg_len;
+ len += delta_psn(psn,
+ full_flow_psn(flow, flow->flow_state.spsn)) *
+ pmtu;
+ if (unlikely(req->total_len - len < pmtu))
+ goto send_nak;
+
+ /*
+ * The e->rdma_sge field is set when TID RDMA WRITE REQ
+ * is first received and is never modified thereafter.
+ */
+ ss.sge = e->rdma_sge;
+ ss.sg_list = NULL;
+ ss.num_sge = 1;
+ ss.total_len = req->total_len;
+ rvt_skip_sge(&ss, len, false);
+ rvt_copy_sge(qp, &ss, packet->payload, pmtu, false,
+ false);
+ /* Raise the sw sequence check flag for next packet */
+ priv->r_next_psn_kdeth = mask_psn(psn + 1);
+ priv->s_flags |= HFI1_R_TID_SW_PSN;
+ }
+ goto exit;
+ }
+ flow->flow_state.r_next_psn = mask_psn(psn + 1);
+ hfi1_kern_exp_rcv_clear(req);
+ priv->alloc_w_segs--;
+ rcd->flows[flow->idx].psn = psn & HFI1_KDETH_BTH_SEQ_MASK;
+ req->comp_seg++;
+ priv->s_nak_state = 0;
+
+ /*
+ * Release the flow if one of the following conditions has been met:
+ * - The request has reached a sync point AND all outstanding
+ * segments have been completed, or
+ * - The entire request is complete and there are no more requests
+ * (of any kind) in the queue.
+ */
+ trace_hfi1_rsp_rcv_tid_write_data(qp, psn);
+ trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
+ req);
+ trace_hfi1_tid_write_rsp_rcv_data(qp);
+ validate_r_tid_ack(priv);
+
+ if (opcode == TID_OP(WRITE_DATA_LAST)) {
+ release_rdma_sge_mr(e);
+ for (next = priv->r_tid_tail + 1; ; next++) {
+ if (next > rvt_size_atomic(&dev->rdi))
+ next = 0;
+ if (next == priv->r_tid_head)
+ break;
+ e = &qp->s_ack_queue[next];
+ if (e->opcode == TID_OP(WRITE_REQ))
+ break;
+ }
+ priv->r_tid_tail = next;
+ if (++qp->s_acked_ack_queue > rvt_size_atomic(&dev->rdi))
+ qp->s_acked_ack_queue = 0;
+ }
+
+ hfi1_tid_write_alloc_resources(qp, true);
+
+ /*
+ * If we need to generate more responses, schedule the
+ * send engine.
+ */
+ if (req->cur_seg < req->total_segs ||
+ qp->s_tail_ack_queue != qp->r_head_ack_queue) {
+ qp->s_flags |= RVT_S_RESP_PENDING;
+ hfi1_schedule_send(qp);
+ }
+
+ priv->pending_tid_w_segs--;
+ if (priv->s_flags & HFI1_R_TID_RSC_TIMER) {
+ if (priv->pending_tid_w_segs)
+ hfi1_mod_tid_reap_timer(req->qp);
+ else
+ hfi1_stop_tid_reap_timer(req->qp);
+ }
+
+done:
+ tid_rdma_schedule_ack(qp);
+exit:
+ priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
+ if (fecn)
+ qp->s_flags |= RVT_S_ECN;
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return;
+
+send_nak:
+ if (!priv->s_nak_state) {
+ priv->s_nak_state = IB_NAK_PSN_ERROR;
+ priv->s_nak_psn = flow->flow_state.r_next_psn;
+ tid_rdma_trigger_ack(qp);
+ }
+ goto done;
+}
+
+static bool hfi1_tid_rdma_is_resync_psn(u32 psn)
+{
+ return (bool)((psn & HFI1_KDETH_BTH_SEQ_MASK) ==
+ HFI1_KDETH_BTH_SEQ_MASK);
+}
+
+u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,
+ struct ib_other_headers *ohdr, u16 iflow,
+ u32 *bth1, u32 *bth2)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct tid_flow_state *fs = &qpriv->flow_state;
+ struct tid_rdma_request *req = ack_to_tid_req(e);
+ struct tid_rdma_flow *flow = &req->flows[iflow];
+ struct tid_rdma_params *remote;
+
+ rcu_read_lock();
+ remote = rcu_dereference(qpriv->tid_rdma.remote);
+ KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth1, JKEY, remote->jkey);
+ ohdr->u.tid_rdma.ack.verbs_qp = cpu_to_be32(qp->remote_qpn);
+ *bth1 = remote->qp;
+ rcu_read_unlock();
+
+ if (qpriv->resync) {
+ *bth2 = mask_psn((fs->generation <<
+ HFI1_KDETH_BTH_SEQ_SHIFT) - 1);
+ ohdr->u.tid_rdma.ack.aeth = rvt_compute_aeth(qp);
+ } else if (qpriv->s_nak_state) {
+ *bth2 = mask_psn(qpriv->s_nak_psn);
+ ohdr->u.tid_rdma.ack.aeth =
+ cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
+ (qpriv->s_nak_state <<
+ IB_AETH_CREDIT_SHIFT));
+ } else {
+ *bth2 = full_flow_psn(flow, flow->flow_state.lpsn);
+ ohdr->u.tid_rdma.ack.aeth = rvt_compute_aeth(qp);
+ }
+ KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth0, KVER, 0x1);
+ ohdr->u.tid_rdma.ack.tid_flow_qp =
+ cpu_to_be32(qpriv->tid_rdma.local.qp |
+ ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
+ TID_RDMA_DESTQP_FLOW_SHIFT) |
+ qpriv->rcd->ctxt);
+
+ ohdr->u.tid_rdma.ack.tid_flow_psn = 0;
+ ohdr->u.tid_rdma.ack.verbs_psn =
+ cpu_to_be32(flow->flow_state.resp_ib_psn);
+
+ if (qpriv->resync) {
+ /*
+ * If the PSN before the current expect KDETH PSN is the
+ * RESYNC PSN, then we never received a good TID RDMA WRITE
+ * DATA packet after a previous RESYNC.
+ * In this case, the next expected KDETH PSN stays the same.
+ */
+ if (hfi1_tid_rdma_is_resync_psn(qpriv->r_next_psn_kdeth - 1)) {
+ ohdr->u.tid_rdma.ack.tid_flow_psn =
+ cpu_to_be32(qpriv->r_next_psn_kdeth_save);
+ } else {
+ /*
+ * Because the KDETH PSNs jump during a RESYNC, it's
+ * not possible to infer (or compute) the previous value
+ * of r_next_psn_kdeth in the case of back-to-back
+ * RESYNC packets. Therefore, we save it.
+ */
+ qpriv->r_next_psn_kdeth_save =
+ qpriv->r_next_psn_kdeth - 1;
+ ohdr->u.tid_rdma.ack.tid_flow_psn =
+ cpu_to_be32(qpriv->r_next_psn_kdeth_save);
+ qpriv->r_next_psn_kdeth = mask_psn(*bth2 + 1);
+ }
+ qpriv->resync = false;
+ }
+
+ return sizeof(ohdr->u.tid_rdma.ack) / sizeof(u32);
+}
+
+void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet)
+{
+ struct ib_other_headers *ohdr = packet->ohdr;
+ struct rvt_qp *qp = packet->qp;
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct rvt_swqe *wqe;
+ struct tid_rdma_request *req;
+ struct tid_rdma_flow *flow;
+ u32 aeth, psn, req_psn, ack_psn, flpsn, resync_psn, ack_kpsn;
+ unsigned long flags;
+ u16 fidx;
+
+ trace_hfi1_tid_write_sender_rcv_tid_ack(qp, 0);
+ process_ecn(qp, packet);
+ psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
+ aeth = be32_to_cpu(ohdr->u.tid_rdma.ack.aeth);
+ req_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.verbs_psn));
+ resync_psn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.ack.tid_flow_psn));
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ trace_hfi1_rcv_tid_ack(qp, aeth, psn, req_psn, resync_psn);
+
+ /* If we are waiting for an ACK to RESYNC, drop any other packets */
+ if ((qp->s_flags & HFI1_S_WAIT_HALT) &&
+ cmp_psn(psn, qpriv->s_resync_psn))
+ goto ack_op_err;
+
+ ack_psn = req_psn;
+ if (hfi1_tid_rdma_is_resync_psn(psn))
+ ack_kpsn = resync_psn;
+ else
+ ack_kpsn = psn;
+ if (aeth >> 29) {
+ ack_psn--;
+ ack_kpsn--;
+ }
+
+ if (unlikely(qp->s_acked == qp->s_tail))
+ goto ack_op_err;
+
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
+
+ if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
+ goto ack_op_err;
+
+ req = wqe_to_tid_req(wqe);
+ trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
+ wqe->lpsn, req);
+ flow = &req->flows[req->acked_tail];
+ trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
+
+ /* Drop stale ACK/NAK */
+ if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0 ||
+ cmp_psn(req_psn, flow->flow_state.resp_ib_psn) < 0)
+ goto ack_op_err;
+
+ while (cmp_psn(ack_kpsn,
+ full_flow_psn(flow, flow->flow_state.lpsn)) >= 0 &&
+ req->ack_seg < req->cur_seg) {
+ req->ack_seg++;
+ /* advance acked segment pointer */
+ req->acked_tail = CIRC_NEXT(req->acked_tail, MAX_FLOWS);
+ req->r_last_acked = flow->flow_state.resp_ib_psn;
+ trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
+ wqe->lpsn, req);
+ if (req->ack_seg == req->total_segs) {
+ req->state = TID_REQUEST_COMPLETE;
+ wqe = do_rc_completion(qp, wqe,
+ to_iport(qp->ibqp.device,
+ qp->port_num));
+ trace_hfi1_sender_rcv_tid_ack(qp);
+ atomic_dec(&qpriv->n_tid_requests);
+ if (qp->s_acked == qp->s_tail)
+ break;
+ if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
+ break;
+ req = wqe_to_tid_req(wqe);
+ }
+ flow = &req->flows[req->acked_tail];
+ trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow);
+ }
+
+ trace_hfi1_tid_req_rcv_tid_ack(qp, 0, wqe->wr.opcode, wqe->psn,
+ wqe->lpsn, req);
+ switch (aeth >> 29) {
+ case 0: /* ACK */
+ if (qpriv->s_flags & RVT_S_WAIT_ACK)
+ qpriv->s_flags &= ~RVT_S_WAIT_ACK;
+ if (!hfi1_tid_rdma_is_resync_psn(psn)) {
+ /* Check if there is any pending TID ACK */
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
+ req->ack_seg < req->cur_seg)
+ hfi1_mod_tid_retry_timer(qp);
+ else
+ hfi1_stop_tid_retry_timer(qp);
+ hfi1_schedule_send(qp);
+ } else {
+ u32 spsn, fpsn, last_acked, generation;
+ struct tid_rdma_request *rptr;
+
+ /* ACK(RESYNC) */
+ hfi1_stop_tid_retry_timer(qp);
+ /* Allow new requests (see hfi1_make_tid_rdma_pkt) */
+ qp->s_flags &= ~HFI1_S_WAIT_HALT;
+ /*
+ * Clear RVT_S_SEND_ONE flag in case that the TID RDMA
+ * ACK is received after the TID retry timer is fired
+ * again. In this case, do not send any more TID
+ * RESYNC request or wait for any more TID ACK packet.
+ */
+ qpriv->s_flags &= ~RVT_S_SEND_ONE;
+ hfi1_schedule_send(qp);
+
+ if ((qp->s_acked == qpriv->s_tid_tail &&
+ req->ack_seg == req->total_segs) ||
+ qp->s_acked == qp->s_tail) {
+ qpriv->s_state = TID_OP(WRITE_DATA_LAST);
+ goto done;
+ }
+
+ if (req->ack_seg == req->comp_seg) {
+ qpriv->s_state = TID_OP(WRITE_DATA);
+ goto done;
+ }
+
+ /*
+ * The PSN to start with is the next PSN after the
+ * RESYNC PSN.
+ */
+ psn = mask_psn(psn + 1);
+ generation = psn >> HFI1_KDETH_BTH_SEQ_SHIFT;
+ spsn = 0;
+
+ /*
+ * Update to the correct WQE when we get an ACK(RESYNC)
+ * in the middle of a request.
+ */
+ if (delta_psn(ack_psn, wqe->lpsn))
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
+ req = wqe_to_tid_req(wqe);
+ flow = &req->flows[req->acked_tail];
+ /*
+ * RESYNC re-numbers the PSN ranges of all remaining
+ * segments. Also, PSN's start from 0 in the middle of a
+ * segment and the first segment size is less than the
+ * default number of packets. flow->resync_npkts is used
+ * to track the number of packets from the start of the
+ * real segment to the point of 0 PSN after the RESYNC
+ * in order to later correctly rewind the SGE.
+ */
+ fpsn = full_flow_psn(flow, flow->flow_state.spsn);
+ req->r_ack_psn = psn;
+ /*
+ * If resync_psn points to the last flow PSN for a
+ * segment and the new segment (likely from a new
+ * request) starts with a new generation number, we
+ * need to adjust resync_psn accordingly.
+ */
+ if (flow->flow_state.generation !=
+ (resync_psn >> HFI1_KDETH_BTH_SEQ_SHIFT))
+ resync_psn = mask_psn(fpsn - 1);
+ flow->resync_npkts +=
+ delta_psn(mask_psn(resync_psn + 1), fpsn);
+ /*
+ * Renumber all packet sequence number ranges
+ * based on the new generation.
+ */
+ last_acked = qp->s_acked;
+ rptr = req;
+ while (1) {
+ /* start from last acked segment */
+ for (fidx = rptr->acked_tail;
+ CIRC_CNT(rptr->setup_head, fidx,
+ MAX_FLOWS);
+ fidx = CIRC_NEXT(fidx, MAX_FLOWS)) {
+ u32 lpsn;
+ u32 gen;
+
+ flow = &rptr->flows[fidx];
+ gen = flow->flow_state.generation;
+ if (WARN_ON(gen == generation &&
+ flow->flow_state.spsn !=
+ spsn))
+ continue;
+ lpsn = flow->flow_state.lpsn;
+ lpsn = full_flow_psn(flow, lpsn);
+ flow->npkts =
+ delta_psn(lpsn,
+ mask_psn(resync_psn)
+ );
+ flow->flow_state.generation =
+ generation;
+ flow->flow_state.spsn = spsn;
+ flow->flow_state.lpsn =
+ flow->flow_state.spsn +
+ flow->npkts - 1;
+ flow->pkt = 0;
+ spsn += flow->npkts;
+ resync_psn += flow->npkts;
+ trace_hfi1_tid_flow_rcv_tid_ack(qp,
+ fidx,
+ flow);
+ }
+ if (++last_acked == qpriv->s_tid_cur + 1)
+ break;
+ if (last_acked == qp->s_size)
+ last_acked = 0;
+ wqe = rvt_get_swqe_ptr(qp, last_acked);
+ rptr = wqe_to_tid_req(wqe);
+ }
+ req->cur_seg = req->ack_seg;
+ qpriv->s_tid_tail = qp->s_acked;
+ qpriv->s_state = TID_OP(WRITE_REQ);
+ hfi1_schedule_tid_send(qp);
+ }
+done:
+ qpriv->s_retry = qp->s_retry_cnt;
+ break;
+
+ case 3: /* NAK */
+ hfi1_stop_tid_retry_timer(qp);
+ switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
+ IB_AETH_CREDIT_MASK) {
+ case 0: /* PSN sequence error */
+ if (!req->flows)
+ break;
+ flow = &req->flows[req->acked_tail];
+ flpsn = full_flow_psn(flow, flow->flow_state.lpsn);
+ if (cmp_psn(psn, flpsn) > 0)
+ break;
+ trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail,
+ flow);
+ req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
+ req->cur_seg = req->ack_seg;
+ qpriv->s_tid_tail = qp->s_acked;
+ qpriv->s_state = TID_OP(WRITE_REQ);
+ qpriv->s_retry = qp->s_retry_cnt;
+ hfi1_schedule_tid_send(qp);
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ack_op_err:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+}
+
+void hfi1_add_tid_retry_timer(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct ib_qp *ibqp = &qp->ibqp;
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+ lockdep_assert_held(&qp->s_lock);
+ if (!(priv->s_flags & HFI1_S_TID_RETRY_TIMER)) {
+ priv->s_flags |= HFI1_S_TID_RETRY_TIMER;
+ priv->s_tid_retry_timer.expires = jiffies +
+ priv->tid_retry_timeout_jiffies + rdi->busy_jiffies;
+ add_timer(&priv->s_tid_retry_timer);
+ }
+}
+
+static void hfi1_mod_tid_retry_timer(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct ib_qp *ibqp = &qp->ibqp;
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+ lockdep_assert_held(&qp->s_lock);
+ priv->s_flags |= HFI1_S_TID_RETRY_TIMER;
+ mod_timer(&priv->s_tid_retry_timer, jiffies +
+ priv->tid_retry_timeout_jiffies + rdi->busy_jiffies);
+}
+
+static int hfi1_stop_tid_retry_timer(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ int rval = 0;
+
+ lockdep_assert_held(&qp->s_lock);
+ if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
+ rval = timer_delete(&priv->s_tid_retry_timer);
+ priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
+ }
+ return rval;
+}
+
+void hfi1_del_tid_retry_timer(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ timer_delete_sync(&priv->s_tid_retry_timer);
+ priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
+}
+
+static void hfi1_tid_retry_timeout(struct timer_list *t)
+{
+ struct hfi1_qp_priv *priv = timer_container_of(priv, t,
+ s_tid_retry_timer);
+ struct rvt_qp *qp = priv->owner;
+ struct rvt_swqe *wqe;
+ unsigned long flags;
+ struct tid_rdma_request *req;
+
+ spin_lock_irqsave(&qp->r_lock, flags);
+ spin_lock(&qp->s_lock);
+ trace_hfi1_tid_write_sender_retry_timeout(qp, 0);
+ if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
+ hfi1_stop_tid_retry_timer(qp);
+ if (!priv->s_retry) {
+ trace_hfi1_msg_tid_retry_timeout(/* msg */
+ qp,
+ "Exhausted retries. Tid retry timeout = ",
+ (u64)priv->tid_retry_timeout_jiffies);
+
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
+ hfi1_trdma_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+ rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ } else {
+ wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
+ req = wqe_to_tid_req(wqe);
+ trace_hfi1_tid_req_tid_retry_timeout(/* req */
+ qp, 0, wqe->wr.opcode, wqe->psn, wqe->lpsn, req);
+
+ priv->s_flags &= ~RVT_S_WAIT_ACK;
+ /* Only send one packet (the RESYNC) */
+ priv->s_flags |= RVT_S_SEND_ONE;
+ /*
+ * No additional request shall be made by this QP until
+ * the RESYNC has been complete.
+ */
+ qp->s_flags |= HFI1_S_WAIT_HALT;
+ priv->s_state = TID_OP(RESYNC);
+ priv->s_retry--;
+ hfi1_schedule_tid_send(qp);
+ }
+ }
+ spin_unlock(&qp->s_lock);
+ spin_unlock_irqrestore(&qp->r_lock, flags);
+}
+
+u32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ struct ib_other_headers *ohdr, u32 *bth1,
+ u32 *bth2, u16 fidx)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct tid_rdma_params *remote;
+ struct tid_rdma_request *req = wqe_to_tid_req(wqe);
+ struct tid_rdma_flow *flow = &req->flows[fidx];
+ u32 generation;
+
+ rcu_read_lock();
+ remote = rcu_dereference(qpriv->tid_rdma.remote);
+ KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth1, JKEY, remote->jkey);
+ ohdr->u.tid_rdma.ack.verbs_qp = cpu_to_be32(qp->remote_qpn);
+ *bth1 = remote->qp;
+ rcu_read_unlock();
+
+ generation = kern_flow_generation_next(flow->flow_state.generation);
+ *bth2 = mask_psn((generation << HFI1_KDETH_BTH_SEQ_SHIFT) - 1);
+ qpriv->s_resync_psn = *bth2;
+ *bth2 |= IB_BTH_REQ_ACK;
+ KDETH_RESET(ohdr->u.tid_rdma.ack.kdeth0, KVER, 0x1);
+
+ return sizeof(ohdr->u.tid_rdma.resync) / sizeof(u32);
+}
+
+void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
+{
+ struct ib_other_headers *ohdr = packet->ohdr;
+ struct rvt_qp *qp = packet->qp;
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct hfi1_ctxtdata *rcd = qpriv->rcd;
+ struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
+ struct rvt_ack_entry *e;
+ struct tid_rdma_request *req;
+ struct tid_rdma_flow *flow;
+ struct tid_flow_state *fs = &qpriv->flow_state;
+ u32 psn, generation, idx, gen_next;
+ bool fecn;
+ unsigned long flags;
+
+ fecn = process_ecn(qp, packet);
+ psn = mask_psn(be32_to_cpu(ohdr->bth[2]));
+
+ generation = mask_psn(psn + 1) >> HFI1_KDETH_BTH_SEQ_SHIFT;
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ gen_next = (fs->generation == KERN_GENERATION_RESERVED) ?
+ generation : kern_flow_generation_next(fs->generation);
+ /*
+ * RESYNC packet contains the "next" generation and can only be
+ * from the current or previous generations
+ */
+ if (generation != mask_generation(gen_next - 1) &&
+ generation != gen_next)
+ goto bail;
+ /* Already processing a resync */
+ if (qpriv->resync)
+ goto bail;
+
+ spin_lock(&rcd->exp_lock);
+ if (fs->index >= RXE_NUM_TID_FLOWS) {
+ /*
+ * If we don't have a flow, save the generation so it can be
+ * applied when a new flow is allocated
+ */
+ fs->generation = generation;
+ } else {
+ /* Reprogram the QP flow with new generation */
+ rcd->flows[fs->index].generation = generation;
+ fs->generation = kern_setup_hw_flow(rcd, fs->index);
+ }
+ fs->psn = 0;
+ /*
+ * Disable SW PSN checking since a RESYNC is equivalent to a
+ * sync point and the flow has/will be reprogrammed
+ */
+ qpriv->s_flags &= ~HFI1_R_TID_SW_PSN;
+ trace_hfi1_tid_write_rsp_rcv_resync(qp);
+
+ /*
+ * Reset all TID flow information with the new generation.
+ * This is done for all requests and segments after the
+ * last received segment
+ */
+ for (idx = qpriv->r_tid_tail; ; idx++) {
+ u16 flow_idx;
+
+ if (idx > rvt_size_atomic(&dev->rdi))
+ idx = 0;
+ e = &qp->s_ack_queue[idx];
+ if (e->opcode == TID_OP(WRITE_REQ)) {
+ req = ack_to_tid_req(e);
+ trace_hfi1_tid_req_rcv_resync(qp, 0, e->opcode, e->psn,
+ e->lpsn, req);
+
+ /* start from last unacked segment */
+ for (flow_idx = req->clear_tail;
+ CIRC_CNT(req->setup_head, flow_idx,
+ MAX_FLOWS);
+ flow_idx = CIRC_NEXT(flow_idx, MAX_FLOWS)) {
+ u32 lpsn;
+ u32 next;
+
+ flow = &req->flows[flow_idx];
+ lpsn = full_flow_psn(flow,
+ flow->flow_state.lpsn);
+ next = flow->flow_state.r_next_psn;
+ flow->npkts = delta_psn(lpsn, next - 1);
+ flow->flow_state.generation = fs->generation;
+ flow->flow_state.spsn = fs->psn;
+ flow->flow_state.lpsn =
+ flow->flow_state.spsn + flow->npkts - 1;
+ flow->flow_state.r_next_psn =
+ full_flow_psn(flow,
+ flow->flow_state.spsn);
+ fs->psn += flow->npkts;
+ trace_hfi1_tid_flow_rcv_resync(qp, flow_idx,
+ flow);
+ }
+ }
+ if (idx == qp->s_tail_ack_queue)
+ break;
+ }
+
+ spin_unlock(&rcd->exp_lock);
+ qpriv->resync = true;
+ /* RESYNC request always gets a TID RDMA ACK. */
+ qpriv->s_nak_state = 0;
+ tid_rdma_trigger_ack(qp);
+bail:
+ if (fecn)
+ qp->s_flags |= RVT_S_ECN;
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+}
+
+/*
+ * Call this function when the last TID RDMA WRITE DATA packet for a request
+ * is built.
+ */
+static void update_tid_tail(struct rvt_qp *qp)
+ __must_hold(&qp->s_lock)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ u32 i;
+ struct rvt_swqe *wqe;
+
+ lockdep_assert_held(&qp->s_lock);
+ /* Can't move beyond s_tid_cur */
+ if (priv->s_tid_tail == priv->s_tid_cur)
+ return;
+ for (i = priv->s_tid_tail + 1; ; i++) {
+ if (i == qp->s_size)
+ i = 0;
+
+ if (i == priv->s_tid_cur)
+ break;
+ wqe = rvt_get_swqe_ptr(qp, i);
+ if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
+ break;
+ }
+ priv->s_tid_tail = i;
+ priv->s_state = TID_OP(WRITE_RESP);
+}
+
+int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
+ __must_hold(&qp->s_lock)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct rvt_swqe *wqe;
+ u32 bth1 = 0, bth2 = 0, hwords = 5, len, middle = 0;
+ struct ib_other_headers *ohdr;
+ struct rvt_sge_state *ss = &qp->s_sge;
+ struct rvt_ack_entry *e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ struct tid_rdma_request *req = ack_to_tid_req(e);
+ bool last = false;
+ u8 opcode = TID_OP(WRITE_DATA);
+
+ lockdep_assert_held(&qp->s_lock);
+ trace_hfi1_tid_write_sender_make_tid_pkt(qp, 0);
+ /*
+ * Prioritize the sending of the requests and responses over the
+ * sending of the TID RDMA data packets.
+ */
+ if (((atomic_read(&priv->n_tid_requests) < HFI1_TID_RDMA_WRITE_CNT) &&
+ atomic_read(&priv->n_requests) &&
+ !(qp->s_flags & (RVT_S_BUSY | RVT_S_WAIT_ACK |
+ HFI1_S_ANY_WAIT_IO))) ||
+ (e->opcode == TID_OP(WRITE_REQ) && req->cur_seg < req->alloc_seg &&
+ !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)))) {
+ struct iowait_work *iowork;
+
+ iowork = iowait_get_ib_work(&priv->s_iowait);
+ ps->s_txreq = get_waiting_verbs_txreq(iowork);
+ if (ps->s_txreq || hfi1_make_rc_req(qp, ps)) {
+ priv->s_flags |= HFI1_S_TID_BUSY_SET;
+ return 1;
+ }
+ }
+
+ ps->s_txreq = get_txreq(ps->dev, qp);
+ if (!ps->s_txreq)
+ goto bail_no_tx;
+
+ ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
+
+ if ((priv->s_flags & RVT_S_ACK_PENDING) &&
+ make_tid_rdma_ack(qp, ohdr, ps))
+ return 1;
+
+ /*
+ * Bail out if we can't send data.
+ * Be reminded that this check must been done after the call to
+ * make_tid_rdma_ack() because the responding QP could be in
+ * RTR state where it can send TID RDMA ACK, not TID RDMA WRITE DATA.
+ */
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK))
+ goto bail;
+
+ if (priv->s_flags & RVT_S_WAIT_ACK)
+ goto bail;
+
+ /* Check whether there is anything to do. */
+ if (priv->s_tid_tail == HFI1_QP_WQE_INVALID)
+ goto bail;
+ wqe = rvt_get_swqe_ptr(qp, priv->s_tid_tail);
+ req = wqe_to_tid_req(wqe);
+ trace_hfi1_tid_req_make_tid_pkt(qp, 0, wqe->wr.opcode, wqe->psn,
+ wqe->lpsn, req);
+ switch (priv->s_state) {
+ case TID_OP(WRITE_REQ):
+ case TID_OP(WRITE_RESP):
+ priv->tid_ss.sge = wqe->sg_list[0];
+ priv->tid_ss.sg_list = wqe->sg_list + 1;
+ priv->tid_ss.num_sge = wqe->wr.num_sge;
+ priv->tid_ss.total_len = wqe->length;
+
+ if (priv->s_state == TID_OP(WRITE_REQ))
+ hfi1_tid_rdma_restart_req(qp, wqe, &bth2);
+ priv->s_state = TID_OP(WRITE_DATA);
+ fallthrough;
+
+ case TID_OP(WRITE_DATA):
+ /*
+ * 1. Check whether TID RDMA WRITE RESP available.
+ * 2. If no:
+ * 2.1 If have more segments and no TID RDMA WRITE RESP,
+ * set HFI1_S_WAIT_TID_RESP
+ * 2.2 Return indicating no progress made.
+ * 3. If yes:
+ * 3.1 Build TID RDMA WRITE DATA packet.
+ * 3.2 If last packet in segment:
+ * 3.2.1 Change KDETH header bits
+ * 3.2.2 Advance RESP pointers.
+ * 3.3 Return indicating progress made.
+ */
+ trace_hfi1_sender_make_tid_pkt(qp);
+ trace_hfi1_tid_write_sender_make_tid_pkt(qp, 0);
+ wqe = rvt_get_swqe_ptr(qp, priv->s_tid_tail);
+ req = wqe_to_tid_req(wqe);
+ len = wqe->length;
+
+ if (!req->comp_seg || req->cur_seg == req->comp_seg)
+ goto bail;
+
+ trace_hfi1_tid_req_make_tid_pkt(qp, 0, wqe->wr.opcode,
+ wqe->psn, wqe->lpsn, req);
+ last = hfi1_build_tid_rdma_packet(wqe, ohdr, &bth1, &bth2,
+ &len);
+
+ if (last) {
+ /* move pointer to next flow */
+ req->clear_tail = CIRC_NEXT(req->clear_tail,
+ MAX_FLOWS);
+ if (++req->cur_seg < req->total_segs) {
+ if (!CIRC_CNT(req->setup_head, req->clear_tail,
+ MAX_FLOWS))
+ qp->s_flags |= HFI1_S_WAIT_TID_RESP;
+ } else {
+ priv->s_state = TID_OP(WRITE_DATA_LAST);
+ opcode = TID_OP(WRITE_DATA_LAST);
+
+ /* Advance the s_tid_tail now */
+ update_tid_tail(qp);
+ }
+ }
+ hwords += sizeof(ohdr->u.tid_rdma.w_data) / sizeof(u32);
+ ss = &priv->tid_ss;
+ break;
+
+ case TID_OP(RESYNC):
+ trace_hfi1_sender_make_tid_pkt(qp);
+ /* Use generation from the most recently received response */
+ wqe = rvt_get_swqe_ptr(qp, priv->s_tid_cur);
+ req = wqe_to_tid_req(wqe);
+ /* If no responses for this WQE look at the previous one */
+ if (!req->comp_seg) {
+ wqe = rvt_get_swqe_ptr(qp,
+ (!priv->s_tid_cur ? qp->s_size :
+ priv->s_tid_cur) - 1);
+ req = wqe_to_tid_req(wqe);
+ }
+ hwords += hfi1_build_tid_rdma_resync(qp, wqe, ohdr, &bth1,
+ &bth2,
+ CIRC_PREV(req->setup_head,
+ MAX_FLOWS));
+ ss = NULL;
+ len = 0;
+ opcode = TID_OP(RESYNC);
+ break;
+
+ default:
+ goto bail;
+ }
+ if (priv->s_flags & RVT_S_SEND_ONE) {
+ priv->s_flags &= ~RVT_S_SEND_ONE;
+ priv->s_flags |= RVT_S_WAIT_ACK;
+ bth2 |= IB_BTH_REQ_ACK;
+ }
+ qp->s_len -= len;
+ ps->s_txreq->hdr_dwords = hwords;
+ ps->s_txreq->sde = priv->s_sde;
+ ps->s_txreq->ss = ss;
+ ps->s_txreq->s_cur_size = len;
+ hfi1_make_ruc_header(qp, ohdr, (opcode << 24), bth1, bth2,
+ middle, ps);
+ return 1;
+bail:
+ hfi1_put_txreq(ps->s_txreq);
+bail_no_tx:
+ ps->s_txreq = NULL;
+ priv->s_flags &= ~RVT_S_BUSY;
+ /*
+ * If we didn't get a txreq, the QP will be woken up later to try
+ * again, set the flags to the wake up which work item to wake
+ * up.
+ * (A better algorithm should be found to do this and generalize the
+ * sleep/wakeup flags.)
+ */
+ iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
+ return 0;
+}
+
+static int make_tid_rdma_ack(struct rvt_qp *qp,
+ struct ib_other_headers *ohdr,
+ struct hfi1_pkt_state *ps)
+{
+ struct rvt_ack_entry *e;
+ struct hfi1_qp_priv *qpriv = qp->priv;
+ struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
+ u32 hwords, next;
+ u32 len = 0;
+ u32 bth1 = 0, bth2 = 0;
+ int middle = 0;
+ u16 flow;
+ struct tid_rdma_request *req, *nreq;
+
+ trace_hfi1_tid_write_rsp_make_tid_ack(qp);
+ /* Don't send an ACK if we aren't supposed to. */
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
+ goto bail;
+
+ /* header size in 32-bit words LRH+BTH = (8+12)/4. */
+ hwords = 5;
+
+ e = &qp->s_ack_queue[qpriv->r_tid_ack];
+ req = ack_to_tid_req(e);
+ /*
+ * In the RESYNC case, we are exactly one segment past the
+ * previously sent ack or at the previously sent NAK. So to send
+ * the resync ack, we go back one segment (which might be part of
+ * the previous request) and let the do-while loop execute again.
+ * The advantage of executing the do-while loop is that any data
+ * received after the previous ack is automatically acked in the
+ * RESYNC ack. It turns out that for the do-while loop we only need
+ * to pull back qpriv->r_tid_ack, not the segment
+ * indices/counters. The scheme works even if the previous request
+ * was not a TID WRITE request.
+ */
+ if (qpriv->resync) {
+ if (!req->ack_seg || req->ack_seg == req->total_segs)
+ qpriv->r_tid_ack = !qpriv->r_tid_ack ?
+ rvt_size_atomic(&dev->rdi) :
+ qpriv->r_tid_ack - 1;
+ e = &qp->s_ack_queue[qpriv->r_tid_ack];
+ req = ack_to_tid_req(e);
+ }
+
+ trace_hfi1_rsp_make_tid_ack(qp, e->psn);
+ trace_hfi1_tid_req_make_tid_ack(qp, 0, e->opcode, e->psn, e->lpsn,
+ req);
+ /*
+ * If we've sent all the ACKs that we can, we are done
+ * until we get more segments...
+ */
+ if (!qpriv->s_nak_state && !qpriv->resync &&
+ req->ack_seg == req->comp_seg)
+ goto bail;
+
+ do {
+ /*
+ * To deal with coalesced ACKs, the acked_tail pointer
+ * into the flow array is used. The distance between it
+ * and the clear_tail is the number of flows that are
+ * being ACK'ed.
+ */
+ req->ack_seg +=
+ /* Get up-to-date value */
+ CIRC_CNT(req->clear_tail, req->acked_tail,
+ MAX_FLOWS);
+ /* Advance acked index */
+ req->acked_tail = req->clear_tail;
+
+ /*
+ * req->clear_tail points to the segment currently being
+ * received. So, when sending an ACK, the previous
+ * segment is being ACK'ed.
+ */
+ flow = CIRC_PREV(req->acked_tail, MAX_FLOWS);
+ if (req->ack_seg != req->total_segs)
+ break;
+ req->state = TID_REQUEST_COMPLETE;
+
+ next = qpriv->r_tid_ack + 1;
+ if (next > rvt_size_atomic(&dev->rdi))
+ next = 0;
+ qpriv->r_tid_ack = next;
+ if (qp->s_ack_queue[next].opcode != TID_OP(WRITE_REQ))
+ break;
+ nreq = ack_to_tid_req(&qp->s_ack_queue[next]);
+ if (!nreq->comp_seg || nreq->ack_seg == nreq->comp_seg)
+ break;
+
+ /* Move to the next ack entry now */
+ e = &qp->s_ack_queue[qpriv->r_tid_ack];
+ req = ack_to_tid_req(e);
+ } while (1);
+
+ /*
+ * At this point qpriv->r_tid_ack == qpriv->r_tid_tail but e and
+ * req could be pointing at the previous ack queue entry
+ */
+ if (qpriv->s_nak_state ||
+ (qpriv->resync &&
+ !hfi1_tid_rdma_is_resync_psn(qpriv->r_next_psn_kdeth - 1) &&
+ (cmp_psn(qpriv->r_next_psn_kdeth - 1,
+ full_flow_psn(&req->flows[flow],
+ req->flows[flow].flow_state.lpsn)) > 0))) {
+ /*
+ * A NAK will implicitly acknowledge all previous TID RDMA
+ * requests. Therefore, we NAK with the req->acked_tail
+ * segment for the request at qpriv->r_tid_ack (same at
+ * this point as the req->clear_tail segment for the
+ * qpriv->r_tid_tail request)
+ */
+ e = &qp->s_ack_queue[qpriv->r_tid_ack];
+ req = ack_to_tid_req(e);
+ flow = req->acked_tail;
+ } else if (req->ack_seg == req->total_segs &&
+ qpriv->s_flags & HFI1_R_TID_WAIT_INTERLCK)
+ qpriv->s_flags &= ~HFI1_R_TID_WAIT_INTERLCK;
+
+ trace_hfi1_tid_write_rsp_make_tid_ack(qp);
+ trace_hfi1_tid_req_make_tid_ack(qp, 0, e->opcode, e->psn, e->lpsn,
+ req);
+ hwords += hfi1_build_tid_rdma_write_ack(qp, e, ohdr, flow, &bth1,
+ &bth2);
+ len = 0;
+ qpriv->s_flags &= ~RVT_S_ACK_PENDING;
+ ps->s_txreq->hdr_dwords = hwords;
+ ps->s_txreq->sde = qpriv->s_sde;
+ ps->s_txreq->s_cur_size = len;
+ ps->s_txreq->ss = NULL;
+ hfi1_make_ruc_header(qp, ohdr, (TID_OP(ACK) << 24), bth1, bth2, middle,
+ ps);
+ ps->s_txreq->txreq.flags |= SDMA_TXREQ_F_VIP;
+ return 1;
+bail:
+ /*
+ * Ensure s_rdma_ack_cnt changes are committed prior to resetting
+ * RVT_S_RESP_PENDING
+ */
+ smp_wmb();
+ qpriv->s_flags &= ~RVT_S_ACK_PENDING;
+ return 0;
+}
+
+static int hfi1_send_tid_ok(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ return !(priv->s_flags & RVT_S_BUSY ||
+ qp->s_flags & HFI1_S_ANY_WAIT_IO) &&
+ (verbs_txreq_queued(iowait_get_tid_work(&priv->s_iowait)) ||
+ (priv->s_flags & RVT_S_RESP_PENDING) ||
+ !(qp->s_flags & HFI1_S_ANY_TID_WAIT_SEND));
+}
+
+void _hfi1_do_tid_send(struct work_struct *work)
+{
+ struct iowait_work *w = container_of(work, struct iowait_work, iowork);
+ struct rvt_qp *qp = iowait_to_qp(w->iow);
+
+ hfi1_do_tid_send(qp);
+}
+
+static void hfi1_do_tid_send(struct rvt_qp *qp)
+{
+ struct hfi1_pkt_state ps;
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ ps.dev = to_idev(qp->ibqp.device);
+ ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
+ ps.ppd = ppd_from_ibp(ps.ibp);
+ ps.wait = iowait_get_tid_work(&priv->s_iowait);
+ ps.in_thread = false;
+ ps.timeout_int = qp->timeout_jiffies / 8;
+
+ trace_hfi1_rc_do_tid_send(qp, false);
+ spin_lock_irqsave(&qp->s_lock, ps.flags);
+
+ /* Return if we are already busy processing a work request. */
+ if (!hfi1_send_tid_ok(qp)) {
+ if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
+ iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID);
+ spin_unlock_irqrestore(&qp->s_lock, ps.flags);
+ return;
+ }
+
+ priv->s_flags |= RVT_S_BUSY;
+
+ ps.timeout = jiffies + ps.timeout_int;
+ ps.cpu = priv->s_sde ? priv->s_sde->cpu :
+ cpumask_first(cpumask_of_node(ps.ppd->dd->node));
+ ps.pkts_sent = false;
+
+ /* insure a pre-built packet is handled */
+ ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
+ do {
+ /* Check for a constructed packet to be sent. */
+ if (ps.s_txreq) {
+ if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
+ qp->s_flags |= RVT_S_BUSY;
+ ps.wait = iowait_get_ib_work(&priv->s_iowait);
+ }
+ spin_unlock_irqrestore(&qp->s_lock, ps.flags);
+
+ /*
+ * If the packet cannot be sent now, return and
+ * the send tasklet will be woken up later.
+ */
+ if (hfi1_verbs_send(qp, &ps))
+ return;
+
+ /* allow other tasks to run */
+ if (hfi1_schedule_send_yield(qp, &ps, true))
+ return;
+
+ spin_lock_irqsave(&qp->s_lock, ps.flags);
+ if (priv->s_flags & HFI1_S_TID_BUSY_SET) {
+ qp->s_flags &= ~RVT_S_BUSY;
+ priv->s_flags &= ~HFI1_S_TID_BUSY_SET;
+ ps.wait = iowait_get_tid_work(&priv->s_iowait);
+ if (iowait_flag_set(&priv->s_iowait,
+ IOWAIT_PENDING_IB))
+ hfi1_schedule_send(qp);
+ }
+ }
+ } while (hfi1_make_tid_rdma_pkt(qp, &ps));
+ iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
+ spin_unlock_irqrestore(&qp->s_lock, ps.flags);
+}
+
+static bool _hfi1_schedule_tid_send(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct hfi1_ibport *ibp =
+ to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ struct hfi1_devdata *dd = ppd->dd;
+
+ if ((dd->flags & HFI1_SHUTDOWN))
+ return true;
+
+ return iowait_tid_schedule(&priv->s_iowait, ppd->hfi1_wq,
+ priv->s_sde ?
+ priv->s_sde->cpu :
+ cpumask_first(cpumask_of_node(dd->node)));
+}
+
+/**
+ * hfi1_schedule_tid_send - schedule progress on TID RDMA state machine
+ * @qp: the QP
+ *
+ * This schedules qp progress on the TID RDMA state machine. Caller
+ * should hold the s_lock.
+ * Unlike hfi1_schedule_send(), this cannot use hfi1_send_ok() because
+ * the two state machines can step on each other with respect to the
+ * RVT_S_BUSY flag.
+ * Therefore, a modified test is used.
+ *
+ * Return: %true if the second leg is scheduled;
+ * %false if the second leg is not scheduled.
+ */
+bool hfi1_schedule_tid_send(struct rvt_qp *qp)
+{
+ lockdep_assert_held(&qp->s_lock);
+ if (hfi1_send_tid_ok(qp)) {
+ /*
+ * The following call returns true if the qp is not on the
+ * queue and false if the qp is already on the queue before
+ * this call. Either way, the qp will be on the queue when the
+ * call returns.
+ */
+ _hfi1_schedule_tid_send(qp);
+ return true;
+ }
+ if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
+ iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
+ IOWAIT_PENDING_TID);
+ return false;
+}
+
+bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e)
+{
+ struct rvt_ack_entry *prev;
+ struct tid_rdma_request *req;
+ struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
+ struct hfi1_qp_priv *priv = qp->priv;
+ u32 s_prev;
+
+ s_prev = qp->s_tail_ack_queue == 0 ? rvt_size_atomic(&dev->rdi) :
+ (qp->s_tail_ack_queue - 1);
+ prev = &qp->s_ack_queue[s_prev];
+
+ if ((e->opcode == TID_OP(READ_REQ) ||
+ e->opcode == OP(RDMA_READ_REQUEST)) &&
+ prev->opcode == TID_OP(WRITE_REQ)) {
+ req = ack_to_tid_req(prev);
+ if (req->ack_seg != req->total_segs) {
+ priv->s_flags |= HFI1_R_TID_WAIT_INTERLCK;
+ return true;
+ }
+ }
+ return false;
+}
+
+static u32 read_r_next_psn(struct hfi1_devdata *dd, u8 ctxt, u8 fidx)
+{
+ u64 reg;
+
+ /*
+ * The only sane way to get the amount of
+ * progress is to read the HW flow state.
+ */
+ reg = read_uctxt_csr(dd, ctxt, RCV_TID_FLOW_TABLE + (8 * fidx));
+ return mask_psn(reg);
+}
+
+static void tid_rdma_rcv_err(struct hfi1_packet *packet,
+ struct ib_other_headers *ohdr,
+ struct rvt_qp *qp, u32 psn, int diff, bool fecn)
+{
+ unsigned long flags;
+
+ tid_rdma_rcv_error(packet, ohdr, qp, psn, diff);
+ if (fecn) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ qp->s_flags |= RVT_S_ECN;
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ }
+}
+
+static void update_r_next_psn_fecn(struct hfi1_packet *packet,
+ struct hfi1_qp_priv *priv,
+ struct hfi1_ctxtdata *rcd,
+ struct tid_rdma_flow *flow,
+ bool fecn)
+{
+ /*
+ * If a start/middle packet is delivered here due to
+ * RSM rule and FECN, we need to update the r_next_psn.
+ */
+ if (fecn && packet->etype == RHF_RCV_TYPE_EAGER &&
+ !(priv->s_flags & HFI1_R_TID_SW_PSN)) {
+ struct hfi1_devdata *dd = rcd->dd;
+
+ flow->flow_state.r_next_psn =
+ read_r_next_psn(dd, rcd->ctxt, flow->idx);
+ }
+}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
new file mode 100644
index 000000000000..6e82df2190b7
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -0,0 +1,319 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#ifndef HFI1_TID_RDMA_H
+#define HFI1_TID_RDMA_H
+
+#include <linux/circ_buf.h>
+#include "common.h"
+
+/* Add a convenience helper */
+#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1))
+#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
+#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)
+
+#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
+#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
+#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
+#define TID_RDMA_SEGMENT_SHIFT 18
+
+/*
+ * Bit definitions for priv->s_flags.
+ * These bit flags overload the bit flags defined for the QP's s_flags.
+ * Due to the fact that these bit fields are used only for the QP priv
+ * s_flags, there are no collisions.
+ *
+ * HFI1_S_TID_WAIT_INTERLCK - QP is waiting for requester interlock
+ * HFI1_R_TID_WAIT_INTERLCK - QP is waiting for responder interlock
+ */
+#define HFI1_S_TID_BUSY_SET BIT(0)
+/* BIT(1) reserved for RVT_S_BUSY. */
+#define HFI1_R_TID_RSC_TIMER BIT(2)
+/* BIT(3) reserved for RVT_S_RESP_PENDING. */
+/* BIT(4) reserved for RVT_S_ACK_PENDING. */
+#define HFI1_S_TID_WAIT_INTERLCK BIT(5)
+#define HFI1_R_TID_WAIT_INTERLCK BIT(6)
+/* BIT(7) - BIT(15) reserved for RVT_S_WAIT_*. */
+/* BIT(16) reserved for RVT_S_SEND_ONE */
+#define HFI1_S_TID_RETRY_TIMER BIT(17)
+/* BIT(18) reserved for RVT_S_ECN. */
+#define HFI1_R_TID_SW_PSN BIT(19)
+/* BIT(26) reserved for HFI1_S_WAIT_HALT */
+/* BIT(27) reserved for HFI1_S_WAIT_TID_RESP */
+/* BIT(28) reserved for HFI1_S_WAIT_TID_SPACE */
+
+/*
+ * Unlike regular IB RDMA VERBS, which do not require an entry
+ * in the s_ack_queue, TID RDMA WRITE requests do because they
+ * generate responses.
+ * Therefore, the s_ack_queue needs to be extended by a certain
+ * amount. The key point is that the queue needs to be extended
+ * without letting the "user" know so they user doesn't end up
+ * using these extra entries.
+ */
+#define HFI1_TID_RDMA_WRITE_CNT 8
+
+struct tid_rdma_params {
+ struct rcu_head rcu_head;
+ u32 qp;
+ u32 max_len;
+ u16 jkey;
+ u8 max_read;
+ u8 max_write;
+ u8 timeout;
+ u8 urg;
+ u8 version;
+};
+
+struct tid_rdma_qp_params {
+ struct work_struct trigger_work;
+ struct tid_rdma_params local;
+ struct tid_rdma_params __rcu *remote;
+};
+
+/* Track state for each hardware flow */
+struct tid_flow_state {
+ u32 generation;
+ u32 psn;
+ u8 index;
+ u8 last_index;
+};
+
+enum tid_rdma_req_state {
+ TID_REQUEST_INACTIVE = 0,
+ TID_REQUEST_INIT,
+ TID_REQUEST_INIT_RESEND,
+ TID_REQUEST_ACTIVE,
+ TID_REQUEST_RESEND,
+ TID_REQUEST_RESEND_ACTIVE,
+ TID_REQUEST_QUEUED,
+ TID_REQUEST_SYNC,
+ TID_REQUEST_RNR_NAK,
+ TID_REQUEST_COMPLETE,
+};
+
+struct tid_rdma_request {
+ struct rvt_qp *qp;
+ struct hfi1_ctxtdata *rcd;
+ union {
+ struct rvt_swqe *swqe;
+ struct rvt_ack_entry *ack;
+ } e;
+
+ struct tid_rdma_flow *flows; /* array of tid flows */
+ struct rvt_sge_state ss; /* SGE state for TID RDMA requests */
+ u16 n_flows; /* size of the flow buffer window */
+ u16 setup_head; /* flow index we are setting up */
+ u16 clear_tail; /* flow index we are clearing */
+ u16 flow_idx; /* flow index most recently set up */
+ u16 acked_tail;
+
+ u32 seg_len;
+ u32 total_len;
+ u32 r_ack_psn; /* next expected ack PSN */
+ u32 r_flow_psn; /* IB PSN of next segment start */
+ u32 r_last_acked; /* IB PSN of last ACK'ed packet */
+ u32 s_next_psn; /* IB PSN of next segment start for read */
+
+ u32 total_segs; /* segments required to complete a request */
+ u32 cur_seg; /* index of current segment */
+ u32 comp_seg; /* index of last completed segment */
+ u32 ack_seg; /* index of last ack'ed segment */
+ u32 alloc_seg; /* index of next segment to be allocated */
+ u32 isge; /* index of "current" sge */
+ u32 ack_pending; /* num acks pending for this request */
+
+ enum tid_rdma_req_state state;
+};
+
+/*
+ * When header suppression is used, PSNs associated with a "flow" are
+ * relevant (and not the PSNs maintained by verbs). Track per-flow
+ * PSNs here for a TID RDMA segment.
+ *
+ */
+struct flow_state {
+ u32 flags;
+ u32 resp_ib_psn; /* The IB PSN of the response for this flow */
+ u32 generation; /* generation of flow */
+ u32 spsn; /* starting PSN in TID space */
+ u32 lpsn; /* last PSN in TID space */
+ u32 r_next_psn; /* next PSN to be received (in TID space) */
+
+ /* For tid rdma read */
+ u32 ib_spsn; /* starting PSN in Verbs space */
+ u32 ib_lpsn; /* last PSn in Verbs space */
+};
+
+struct tid_rdma_pageset {
+ dma_addr_t addr : 48; /* Only needed for the first page */
+ u8 idx: 8;
+ u8 count : 7;
+ u8 mapped: 1;
+};
+
+/**
+ * kern_tid_node - used for managing TID's in TID groups
+ *
+ * @grp_idx: rcd relative index to tid_group
+ * @map: grp->map captured prior to programming this TID group in HW
+ * @cnt: Only @cnt of available group entries are actually programmed
+ */
+struct kern_tid_node {
+ struct tid_group *grp;
+ u8 map;
+ u8 cnt;
+};
+
+/* Overall info for a TID RDMA segment */
+struct tid_rdma_flow {
+ /*
+ * While a TID RDMA segment is being transferred, it uses a QP number
+ * from the "KDETH section of QP numbers" (which is different from the
+ * QP number that originated the request). Bits 11-15 of these QP
+ * numbers identify the "TID flow" for the segment.
+ */
+ struct flow_state flow_state;
+ struct tid_rdma_request *req;
+ u32 tid_qpn;
+ u32 tid_offset;
+ u32 length;
+ u32 sent;
+ u8 tnode_cnt;
+ u8 tidcnt;
+ u8 tid_idx;
+ u8 idx;
+ u8 npagesets;
+ u8 npkts;
+ u8 pkt;
+ u8 resync_npkts;
+ struct kern_tid_node tnode[TID_RDMA_MAX_PAGES];
+ struct tid_rdma_pageset pagesets[TID_RDMA_MAX_PAGES];
+ u32 tid_entry[TID_RDMA_MAX_PAGES];
+};
+
+enum tid_rnr_nak_state {
+ TID_RNR_NAK_INIT = 0,
+ TID_RNR_NAK_SEND,
+ TID_RNR_NAK_SENT,
+};
+
+bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data);
+bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data);
+bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data);
+void tid_rdma_conn_error(struct rvt_qp *qp);
+void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p);
+
+int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit);
+int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
+ struct rvt_sge_state *ss, bool *last);
+int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req);
+void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req);
+void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+
+/**
+ * trdma_clean_swqe - clean flows for swqe if large send queue
+ * @qp: the qp
+ * @wqe: the send wqe
+ */
+static inline void trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+ if (!wqe->priv)
+ return;
+ __trdma_clean_swqe(qp, wqe);
+}
+
+void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp);
+
+int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_init_attr *init_attr);
+void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
+
+void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp);
+
+int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp);
+void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp);
+void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd);
+
+struct cntr_entry;
+u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
+ void *context, int vl, int mode, u64 data);
+
+u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
+ struct ib_other_headers *ohdr,
+ u32 *bth1, u32 *bth2, u32 *len);
+u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ struct ib_other_headers *ohdr, u32 *bth1,
+ u32 *bth2, u32 *len);
+void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet);
+u32 hfi1_build_tid_rdma_read_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
+ struct ib_other_headers *ohdr, u32 *bth0,
+ u32 *bth1, u32 *bth2, u32 *len, bool *last);
+void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet);
+bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
+ struct hfi1_pportdata *ppd,
+ struct hfi1_packet *packet);
+void hfi1_tid_rdma_restart_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ u32 *bth2);
+void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp);
+bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe);
+
+void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+static inline void hfi1_setup_tid_rdma_wqe(struct rvt_qp *qp,
+ struct rvt_swqe *wqe)
+{
+ if (wqe->priv &&
+ (wqe->wr.opcode == IB_WR_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_RDMA_WRITE) &&
+ wqe->length >= TID_RDMA_MIN_SEGMENT_SIZE)
+ setup_tid_rdma_wqe(qp, wqe);
+}
+
+u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ struct ib_other_headers *ohdr,
+ u32 *bth1, u32 *bth2, u32 *len);
+
+void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
+
+u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
+ struct ib_other_headers *ohdr, u32 *bth1,
+ u32 bth2, u32 *len,
+ struct rvt_sge_state **ss);
+
+void hfi1_del_tid_reap_timer(struct rvt_qp *qp);
+
+void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet);
+
+bool hfi1_build_tid_rdma_packet(struct rvt_swqe *wqe,
+ struct ib_other_headers *ohdr,
+ u32 *bth1, u32 *bth2, u32 *len);
+
+void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet);
+
+u32 hfi1_build_tid_rdma_write_ack(struct rvt_qp *qp, struct rvt_ack_entry *e,
+ struct ib_other_headers *ohdr, u16 iflow,
+ u32 *bth1, u32 *bth2);
+
+void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet);
+
+void hfi1_add_tid_retry_timer(struct rvt_qp *qp);
+void hfi1_del_tid_retry_timer(struct rvt_qp *qp);
+
+u32 hfi1_build_tid_rdma_resync(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ struct ib_other_headers *ohdr, u32 *bth1,
+ u32 *bth2, u16 fidx);
+
+void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet);
+
+struct hfi1_pkt_state;
+int hfi1_make_tid_rdma_pkt(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
+
+void _hfi1_do_tid_send(struct work_struct *work);
+
+bool hfi1_schedule_tid_send(struct rvt_qp *qp);
+
+bool hfi1_tid_rdma_ack_interlock(struct rvt_qp *qp, struct rvt_ack_entry *e);
+
+#endif /* HFI1_TID_RDMA_H */
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 01f525cd985a..10290ebf76b2 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -1,74 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*/
#define CREATE_TRACE_POINTS
#include "trace.h"
+#include "exp_rcv.h"
+#include "ipoib.h"
-u8 ibhdr_exhdr_len(struct ib_header *hdr)
+static u8 __get_ib_hdr_len(struct ib_header *hdr)
{
struct ib_other_headers *ohdr;
u8 opcode;
- u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
- if (lnh == HFI1_LRH_BTH)
+ if (ib_get_lnh(hdr) == HFI1_LRH_BTH)
ohdr = &hdr->u.oth;
else
ohdr = &hdr->u.l.oth;
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ opcode = ib_bth_get_opcode(ohdr);
return hdr_len_by_opcode[opcode] == 0 ?
0 : hdr_len_by_opcode[opcode] - (12 + 8);
}
-#define IMM_PRN "imm %d"
-#define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x"
-#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x"
-#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x"
-#define IETH_PRN "ieth rkey 0x%.8x"
-#define ATOMICACKETH_PRN "origdata %llx"
-#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx"
+static u8 __get_16b_hdr_len(struct hfi1_16b_header *hdr)
+{
+ struct ib_other_headers *ohdr = NULL;
+ u8 opcode;
+ u8 l4 = hfi1_16B_get_l4(hdr);
+
+ if (l4 == OPA_16B_L4_FM) {
+ opcode = IB_OPCODE_UD_SEND_ONLY;
+ return (8 + 8); /* No BTH */
+ }
+
+ if (l4 == OPA_16B_L4_IB_LOCAL)
+ ohdr = &hdr->u.oth;
+ else
+ ohdr = &hdr->u.l.oth;
+
+ opcode = ib_bth_get_opcode(ohdr);
+ return hdr_len_by_opcode[opcode] == 0 ?
+ 0 : hdr_len_by_opcode[opcode] - (12 + 8 + 8);
+}
+
+u8 hfi1_trace_packet_hdr_len(struct hfi1_packet *packet)
+{
+ if (packet->etype != RHF_RCV_TYPE_BYPASS)
+ return __get_ib_hdr_len(packet->hdr);
+ else
+ return __get_16b_hdr_len(packet->hdr);
+}
+
+u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opa_hdr)
+{
+ if (!opa_hdr->hdr_type)
+ return __get_ib_hdr_len(&opa_hdr->ibh);
+ else
+ return __get_16b_hdr_len(&opa_hdr->opah);
+}
+
+const char *hfi1_trace_get_packet_l4_str(u8 l4)
+{
+ if (l4)
+ return "16B";
+ else
+ return "9B";
+}
+
+const char *hfi1_trace_get_packet_l2_str(u8 l2)
+{
+ switch (l2) {
+ case 0:
+ return "0";
+ case 1:
+ return "1";
+ case 2:
+ return "16B";
+ case 3:
+ return "9B";
+ }
+ return "";
+}
+
+#define IMM_PRN "imm:%d"
+#define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x"
+#define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x"
+#define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x"
+#define DETH_ENTROPY_PRN "deth qkey:0x%.8x sqpn:0x%.6x entropy:0x%.2x"
+#define IETH_PRN "ieth rkey:0x%.8x"
+#define ATOMICACKETH_PRN "origdata:%llx"
+#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
+#define TID_RDMA_KDETH "kdeth0 0x%x kdeth1 0x%x"
+#define TID_RDMA_KDETH_DATA "kdeth0 0x%x: kver %u sh %u intr %u tidctrl %u tid %x offset %x kdeth1 0x%x: jkey %x"
+#define TID_READ_REQ_PRN "tid_flow_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
+#define TID_READ_RSP_PRN "verbs_qp 0x%x"
+#define TID_WRITE_REQ_PRN "original_qp 0x%x"
+#define TID_WRITE_RSP_PRN "tid_flow_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
+#define TID_WRITE_DATA_PRN "verbs_qp 0x%x"
+#define TID_ACK_PRN "tid_flow_psn 0x%x verbs_psn 0x%x tid_flow_qp 0x%x verbs_qp 0x%x"
+#define TID_RESYNC_PRN "verbs_qp 0x%x"
#define OP(transport, op) IB_OPCODE_## transport ## _ ## op
@@ -85,14 +114,150 @@ static const char *parse_syndrome(u8 syndrome)
return "";
}
+void hfi1_trace_parse_9b_bth(struct ib_other_headers *ohdr,
+ u8 *ack, bool *becn, bool *fecn, u8 *mig,
+ u8 *se, u8 *pad, u8 *opcode, u8 *tver,
+ u16 *pkey, u32 *psn, u32 *qpn)
+{
+ *ack = ib_bth_get_ackreq(ohdr);
+ *becn = ib_bth_get_becn(ohdr);
+ *fecn = ib_bth_get_fecn(ohdr);
+ *mig = ib_bth_get_migreq(ohdr);
+ *se = ib_bth_get_se(ohdr);
+ *pad = ib_bth_get_pad(ohdr);
+ *opcode = ib_bth_get_opcode(ohdr);
+ *tver = ib_bth_get_tver(ohdr);
+ *pkey = ib_bth_get_pkey(ohdr);
+ *psn = mask_psn(ib_bth_get_psn(ohdr));
+ *qpn = ib_bth_get_qpn(ohdr);
+}
+
+void hfi1_trace_parse_16b_bth(struct ib_other_headers *ohdr,
+ u8 *ack, u8 *mig, u8 *opcode,
+ u8 *pad, u8 *se, u8 *tver,
+ u32 *psn, u32 *qpn)
+{
+ *ack = ib_bth_get_ackreq(ohdr);
+ *mig = ib_bth_get_migreq(ohdr);
+ *opcode = ib_bth_get_opcode(ohdr);
+ *pad = ib_bth_get_pad(ohdr);
+ *se = ib_bth_get_se(ohdr);
+ *tver = ib_bth_get_tver(ohdr);
+ *psn = mask_psn(ib_bth_get_psn(ohdr));
+ *qpn = ib_bth_get_qpn(ohdr);
+}
+
+static u16 ib_get_len(const struct ib_header *hdr)
+{
+ return be16_to_cpu(hdr->lrh[2]);
+}
+
+void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5,
+ u8 *lnh, u8 *lver, u8 *sl, u8 *sc,
+ u16 *len, u32 *dlid, u32 *slid)
+{
+ *lnh = ib_get_lnh(hdr);
+ *lver = ib_get_lver(hdr);
+ *sl = ib_get_sl(hdr);
+ *sc = ib_get_sc(hdr) | (sc5 << 4);
+ *len = ib_get_len(hdr);
+ *dlid = ib_get_dlid(hdr);
+ *slid = ib_get_slid(hdr);
+}
+
+void hfi1_trace_parse_16b_hdr(struct hfi1_16b_header *hdr,
+ u8 *age, bool *becn, bool *fecn,
+ u8 *l4, u8 *rc, u8 *sc,
+ u16 *entropy, u16 *len, u16 *pkey,
+ u32 *dlid, u32 *slid)
+{
+ *age = hfi1_16B_get_age(hdr);
+ *becn = hfi1_16B_get_becn(hdr);
+ *fecn = hfi1_16B_get_fecn(hdr);
+ *l4 = hfi1_16B_get_l4(hdr);
+ *rc = hfi1_16B_get_rc(hdr);
+ *sc = hfi1_16B_get_sc(hdr);
+ *entropy = hfi1_16B_get_entropy(hdr);
+ *len = hfi1_16B_get_len(hdr);
+ *pkey = hfi1_16B_get_pkey(hdr);
+ *dlid = hfi1_16B_get_dlid(hdr);
+ *slid = hfi1_16B_get_slid(hdr);
+}
+
+#define LRH_PRN "len:%d sc:%d dlid:0x%.4x slid:0x%.4x "
+#define LRH_9B_PRN "lnh:%d,%s lver:%d sl:%d"
+#define LRH_16B_PRN "age:%d becn:%d fecn:%d l4:%d " \
+ "rc:%d sc:%d pkey:0x%.4x entropy:0x%.4x"
+const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass,
+ u8 age, bool becn, bool fecn, u8 l4,
+ u8 lnh, const char *lnh_name, u8 lver,
+ u8 rc, u8 sc, u8 sl, u16 entropy,
+ u16 len, u16 pkey, u32 dlid, u32 slid)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, LRH_PRN, len, sc, dlid, slid);
+
+ if (bypass)
+ trace_seq_printf(p, LRH_16B_PRN,
+ age, becn, fecn, l4, rc, sc, pkey, entropy);
+
+ else
+ trace_seq_printf(p, LRH_9B_PRN,
+ lnh, lnh_name, lver, sl);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+#define BTH_9B_PRN \
+ "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d pkey:0x%.4x " \
+ "f:%d b:%d qpn:0x%.6x a:%d psn:0x%.8x"
+#define BTH_16B_PRN \
+ "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d " \
+ "qpn:0x%.6x a:%d psn:0x%.8x"
+#define L4_FM_16B_PRN \
+ "op:0x%.2x,%s dest_qpn:0x%.6x src_qpn:0x%.6x"
+const char *hfi1_trace_fmt_rest(struct trace_seq *p, bool bypass, u8 l4,
+ u8 ack, bool becn, bool fecn, u8 mig,
+ u8 se, u8 pad, u8 opcode, const char *opname,
+ u8 tver, u16 pkey, u32 psn, u32 qpn,
+ u32 dest_qpn, u32 src_qpn)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (bypass)
+ if (l4 == OPA_16B_L4_FM)
+ trace_seq_printf(p, L4_FM_16B_PRN,
+ opcode, opname, dest_qpn, src_qpn);
+ else
+ trace_seq_printf(p, BTH_16B_PRN,
+ opcode, opname,
+ se, mig, pad, tver, qpn, ack, psn);
+
+ else
+ trace_seq_printf(p, BTH_9B_PRN,
+ opcode, opname,
+ se, mig, pad, tver, pkey, fecn, becn,
+ qpn, ack, psn);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
const char *parse_everbs_hdrs(
struct trace_seq *p,
- u8 opcode,
+ u8 opcode, u8 l4, u32 dest_qpn, u32 src_qpn,
void *ehdrs)
{
union ib_ehdrs *eh = ehdrs;
const char *ret = trace_seq_buffer_ptr(p);
+ if (l4 == OPA_16B_L4_FM) {
+ trace_seq_printf(p, "mgmt pkt");
+ goto out;
+ }
+
switch (opcode) {
/* imm */
case OP(RC, SEND_LAST_WITH_IMMEDIATE):
@@ -130,14 +295,107 @@ const char *parse_everbs_hdrs(
case OP(RC, ACKNOWLEDGE):
trace_seq_printf(p, AETH_PRN, be32_to_cpu(eh->aeth) >> 24,
parse_syndrome(be32_to_cpu(eh->aeth) >> 24),
- be32_to_cpu(eh->aeth) & HFI1_MSN_MASK);
+ be32_to_cpu(eh->aeth) & IB_MSN_MASK);
+ break;
+ case OP(TID_RDMA, WRITE_REQ):
+ trace_seq_printf(p, TID_RDMA_KDETH " " RETH_PRN " "
+ TID_WRITE_REQ_PRN,
+ le32_to_cpu(eh->tid_rdma.w_req.kdeth0),
+ le32_to_cpu(eh->tid_rdma.w_req.kdeth1),
+ ib_u64_get(&eh->tid_rdma.w_req.reth.vaddr),
+ be32_to_cpu(eh->tid_rdma.w_req.reth.rkey),
+ be32_to_cpu(eh->tid_rdma.w_req.reth.length),
+ be32_to_cpu(eh->tid_rdma.w_req.verbs_qp));
+ break;
+ case OP(TID_RDMA, WRITE_RESP):
+ trace_seq_printf(p, TID_RDMA_KDETH " " AETH_PRN " "
+ TID_WRITE_RSP_PRN,
+ le32_to_cpu(eh->tid_rdma.w_rsp.kdeth0),
+ le32_to_cpu(eh->tid_rdma.w_rsp.kdeth1),
+ be32_to_cpu(eh->tid_rdma.w_rsp.aeth) >> 24,
+ parse_syndrome(/* aeth */
+ be32_to_cpu(eh->tid_rdma.w_rsp.aeth)
+ >> 24),
+ (be32_to_cpu(eh->tid_rdma.w_rsp.aeth) &
+ IB_MSN_MASK),
+ be32_to_cpu(eh->tid_rdma.w_rsp.tid_flow_psn),
+ be32_to_cpu(eh->tid_rdma.w_rsp.tid_flow_qp),
+ be32_to_cpu(eh->tid_rdma.w_rsp.verbs_qp));
+ break;
+ case OP(TID_RDMA, WRITE_DATA_LAST):
+ case OP(TID_RDMA, WRITE_DATA):
+ trace_seq_printf(p, TID_RDMA_KDETH_DATA " " TID_WRITE_DATA_PRN,
+ le32_to_cpu(eh->tid_rdma.w_data.kdeth0),
+ KDETH_GET(eh->tid_rdma.w_data.kdeth0, KVER),
+ KDETH_GET(eh->tid_rdma.w_data.kdeth0, SH),
+ KDETH_GET(eh->tid_rdma.w_data.kdeth0, INTR),
+ KDETH_GET(eh->tid_rdma.w_data.kdeth0, TIDCTRL),
+ KDETH_GET(eh->tid_rdma.w_data.kdeth0, TID),
+ KDETH_GET(eh->tid_rdma.w_data.kdeth0, OFFSET),
+ le32_to_cpu(eh->tid_rdma.w_data.kdeth1),
+ KDETH_GET(eh->tid_rdma.w_data.kdeth1, JKEY),
+ be32_to_cpu(eh->tid_rdma.w_data.verbs_qp));
+ break;
+ case OP(TID_RDMA, READ_REQ):
+ trace_seq_printf(p, TID_RDMA_KDETH " " RETH_PRN " "
+ TID_READ_REQ_PRN,
+ le32_to_cpu(eh->tid_rdma.r_req.kdeth0),
+ le32_to_cpu(eh->tid_rdma.r_req.kdeth1),
+ ib_u64_get(&eh->tid_rdma.r_req.reth.vaddr),
+ be32_to_cpu(eh->tid_rdma.r_req.reth.rkey),
+ be32_to_cpu(eh->tid_rdma.r_req.reth.length),
+ be32_to_cpu(eh->tid_rdma.r_req.tid_flow_psn),
+ be32_to_cpu(eh->tid_rdma.r_req.tid_flow_qp),
+ be32_to_cpu(eh->tid_rdma.r_req.verbs_qp));
+ break;
+ case OP(TID_RDMA, READ_RESP):
+ trace_seq_printf(p, TID_RDMA_KDETH_DATA " " AETH_PRN " "
+ TID_READ_RSP_PRN,
+ le32_to_cpu(eh->tid_rdma.r_rsp.kdeth0),
+ KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, KVER),
+ KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, SH),
+ KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, INTR),
+ KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, TIDCTRL),
+ KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, TID),
+ KDETH_GET(eh->tid_rdma.r_rsp.kdeth0, OFFSET),
+ le32_to_cpu(eh->tid_rdma.r_rsp.kdeth1),
+ KDETH_GET(eh->tid_rdma.r_rsp.kdeth1, JKEY),
+ be32_to_cpu(eh->tid_rdma.r_rsp.aeth) >> 24,
+ parse_syndrome(/* aeth */
+ be32_to_cpu(eh->tid_rdma.r_rsp.aeth)
+ >> 24),
+ (be32_to_cpu(eh->tid_rdma.r_rsp.aeth) &
+ IB_MSN_MASK),
+ be32_to_cpu(eh->tid_rdma.r_rsp.verbs_qp));
+ break;
+ case OP(TID_RDMA, ACK):
+ trace_seq_printf(p, TID_RDMA_KDETH " " AETH_PRN " "
+ TID_ACK_PRN,
+ le32_to_cpu(eh->tid_rdma.ack.kdeth0),
+ le32_to_cpu(eh->tid_rdma.ack.kdeth1),
+ be32_to_cpu(eh->tid_rdma.ack.aeth) >> 24,
+ parse_syndrome(/* aeth */
+ be32_to_cpu(eh->tid_rdma.ack.aeth)
+ >> 24),
+ (be32_to_cpu(eh->tid_rdma.ack.aeth) &
+ IB_MSN_MASK),
+ be32_to_cpu(eh->tid_rdma.ack.tid_flow_psn),
+ be32_to_cpu(eh->tid_rdma.ack.verbs_psn),
+ be32_to_cpu(eh->tid_rdma.ack.tid_flow_qp),
+ be32_to_cpu(eh->tid_rdma.ack.verbs_qp));
+ break;
+ case OP(TID_RDMA, RESYNC):
+ trace_seq_printf(p, TID_RDMA_KDETH " " TID_RESYNC_PRN,
+ le32_to_cpu(eh->tid_rdma.resync.kdeth0),
+ le32_to_cpu(eh->tid_rdma.resync.kdeth1),
+ be32_to_cpu(eh->tid_rdma.resync.verbs_qp));
break;
/* aeth + atomicacketh */
case OP(RC, ATOMIC_ACKNOWLEDGE):
trace_seq_printf(p, AETH_PRN " " ATOMICACKETH_PRN,
be32_to_cpu(eh->at.aeth) >> 24,
parse_syndrome(be32_to_cpu(eh->at.aeth) >> 24),
- be32_to_cpu(eh->at.aeth) & HFI1_MSN_MASK,
+ be32_to_cpu(eh->at.aeth) & IB_MSN_MASK,
ib_u64_get(&eh->at.atomic_ack_eth));
break;
/* atomiceth */
@@ -151,6 +409,12 @@ const char *parse_everbs_hdrs(
break;
/* deth */
case OP(UD, SEND_ONLY):
+ trace_seq_printf(p, DETH_ENTROPY_PRN,
+ be32_to_cpu(eh->ud.deth[0]),
+ be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK,
+ be32_to_cpu(eh->ud.deth[1]) >>
+ HFI1_IPOIB_ENTROPY_SHIFT);
+ break;
case OP(UD, SEND_ONLY_WITH_IMMEDIATE):
trace_seq_printf(p, DETH_PRN,
be32_to_cpu(eh->ud.deth[0]),
@@ -163,6 +427,7 @@ const char *parse_everbs_hdrs(
be32_to_cpu(eh->ieth));
break;
}
+out:
trace_seq_putc(p, 0);
return ret;
}
@@ -203,6 +468,54 @@ const char *print_u32_array(
return ret;
}
+u8 hfi1_trace_get_tid_ctrl(u32 ent)
+{
+ return EXP_TID_GET(ent, CTRL);
+}
+
+u16 hfi1_trace_get_tid_len(u32 ent)
+{
+ return EXP_TID_GET(ent, LEN);
+}
+
+u16 hfi1_trace_get_tid_idx(u32 ent)
+{
+ return EXP_TID_GET(ent, IDX);
+}
+
+struct hfi1_ctxt_hist {
+ atomic_t count;
+ atomic_t data[255];
+};
+
+static struct hfi1_ctxt_hist hist = {
+ .count = ATOMIC_INIT(0)
+};
+
+const char *hfi1_trace_print_rsm_hist(struct trace_seq *p, unsigned int ctxt)
+{
+ int i, len = ARRAY_SIZE(hist.data);
+ const char *ret = trace_seq_buffer_ptr(p);
+ unsigned long packet_count = atomic_fetch_inc(&hist.count);
+
+ trace_seq_printf(p, "packet[%lu]", packet_count);
+ for (i = 0; i < len; ++i) {
+ unsigned long val;
+ atomic_t *count = &hist.data[i];
+
+ if (ctxt == i)
+ val = atomic_fetch_inc(count);
+ else
+ val = atomic_read(count);
+
+ if (val)
+ trace_seq_printf(p, "(%d:%lu)", i, val);
+ }
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+__hfi1_trace_fn(AFFINITY);
__hfi1_trace_fn(PKT);
__hfi1_trace_fn(PROC);
__hfi1_trace_fn(SDMA);
diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h
index 92dc88f013c9..bb3cc006bacd 100644
--- a/drivers/infiniband/hw/hfi1/trace.h
+++ b/drivers/infiniband/hw/hfi1/trace.h
@@ -1,49 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*/
+
+#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
+#define show_packettype(etype) \
+__print_symbolic(etype, \
+ packettype_name(EXPECTED), \
+ packettype_name(EAGER), \
+ packettype_name(IB), \
+ packettype_name(ERROR), \
+ packettype_name(BYPASS))
+
#include "trace_dbg.h"
#include "trace_misc.h"
#include "trace_ctxts.h"
@@ -51,3 +19,6 @@
#include "trace_rc.h"
#include "trace_rx.h"
#include "trace_tx.h"
+#include "trace_mmu.h"
+#include "trace_iowait.h"
+#include "trace_tid.h"
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index 26ae789e47cf..76c41bd79071 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
-* Copyright(c) 2015, 2016 Intel Corporation.
-*
-* This file is provided under a dual BSD/GPLv2 license. When using or
-* redistributing this file, you may do so under either license.
-*
-* GPL LICENSE SUMMARY
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of version 2 of the GNU General Public License as
-* published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful, but
-* WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* General Public License for more details.
-*
-* BSD LICENSE
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions
-* are met:
-*
-* - Redistributions of source code must retain the above copyright
-* notice, this list of conditions and the following disclaimer.
-* - Redistributions in binary form must reproduce the above copyright
-* notice, this list of conditions and the following disclaimer in
-* the documentation and/or other materials provided with the
-* distribution.
-* - Neither the name of Intel Corporation nor the names of its
-* contributors may be used to endorse or promote products derived
-* from this software without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*
+* Copyright(c) 2015 - 2020 Intel Corporation.
*/
+
#if !defined(__HFI1_TRACE_CTXTS_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_CTXTS_H
@@ -57,12 +16,14 @@
#define UCTXT_FMT \
"cred:%u, credaddr:0x%llx, piobase:0x%p, rcvhdr_cnt:%u, " \
- "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx"
+ "rcvbase:0x%llx, rcvegrc:%u, rcvegrb:0x%llx, subctxt_cnt:%u"
TRACE_EVENT(hfi1_uctxtdata,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt),
- TP_ARGS(dd, uctxt),
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt,
+ unsigned int subctxt),
+ TP_ARGS(dd, uctxt, subctxt),
TP_STRUCT__entry(DD_DEV_ENTRY(dd)
__field(unsigned int, ctxt)
+ __field(unsigned int, subctxt)
__field(u32, credits)
__field(u64, hw_free)
__field(void __iomem *, piobase)
@@ -70,27 +31,32 @@ TRACE_EVENT(hfi1_uctxtdata,
__field(u64, rcvhdrq_dma)
__field(u32, eager_cnt)
__field(u64, rcvegr_dma)
+ __field(unsigned int, subctxt_cnt)
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = uctxt->ctxt;
+ __entry->subctxt = subctxt;
__entry->credits = uctxt->sc->credits;
__entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
__entry->piobase = uctxt->sc->base_addr;
- __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
+ __entry->rcvhdrq_cnt = get_hdrq_cnt(uctxt);
__entry->rcvhdrq_dma = uctxt->rcvhdrq_dma;
__entry->eager_cnt = uctxt->egrbufs.alloced;
__entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma;
+ __entry->subctxt_cnt = uctxt->subctxt_cnt;
),
- TP_printk("[%s] ctxt %u " UCTXT_FMT,
+ TP_printk("[%s] ctxt %u:%u " UCTXT_FMT,
__get_str(dev),
__entry->ctxt,
+ __entry->subctxt,
__entry->credits,
__entry->hw_free,
__entry->piobase,
__entry->rcvhdrq_cnt,
__entry->rcvhdrq_dma,
__entry->eager_cnt,
- __entry->rcvegr_dma
+ __entry->rcvegr_dma,
+ __entry->subctxt_cnt
)
);
@@ -99,7 +65,7 @@ TRACE_EVENT(hfi1_uctxtdata,
TRACE_EVENT(hfi1_ctxt_info,
TP_PROTO(struct hfi1_devdata *dd, unsigned int ctxt,
unsigned int subctxt,
- struct hfi1_ctxt_info cinfo),
+ struct hfi1_ctxt_info *cinfo),
TP_ARGS(dd, ctxt, subctxt, cinfo),
TP_STRUCT__entry(DD_DEV_ENTRY(dd)
__field(unsigned int, ctxt)
@@ -113,11 +79,11 @@ TRACE_EVENT(hfi1_ctxt_info,
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
- __entry->egrtids = cinfo.egrtids;
- __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
- __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
- __entry->sdma_ring_size = cinfo.sdma_ring_size;
- __entry->rcvegr_size = cinfo.rcvegr_size;
+ __entry->egrtids = cinfo->egrtids;
+ __entry->rcvhdrq_cnt = cinfo->rcvhdrq_cnt;
+ __entry->rcvhdrq_size = cinfo->rcvhdrq_entsize;
+ __entry->sdma_ring_size = cinfo->sdma_ring_size;
+ __entry->rcvegr_size = cinfo->rcvegr_size;
),
TP_printk("[%s] ctxt %u:%u " CINFO_FMT,
__get_str(dev),
@@ -131,6 +97,15 @@ TRACE_EVENT(hfi1_ctxt_info,
)
);
+const char *hfi1_trace_print_rsm_hist(struct trace_seq *p, unsigned int ctxt);
+TRACE_EVENT(ctxt_rsm_hist,
+ TP_PROTO(unsigned int ctxt),
+ TP_ARGS(ctxt),
+ TP_STRUCT__entry(__field(unsigned int, ctxt)),
+ TP_fast_assign(__entry->ctxt = ctxt;),
+ TP_printk("%s", hfi1_trace_print_rsm_hist(p, __entry->ctxt))
+);
+
#endif /* __HFI1_TRACE_CTXTS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h
index 0e7d929530c5..58304b91380f 100644
--- a/drivers/infiniband/hw/hfi1/trace_dbg.h
+++ b/drivers/infiniband/hw/hfi1/trace_dbg.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
-* Copyright(c) 2015, 2016 Intel Corporation.
-*
-* This file is provided under a dual BSD/GPLv2 license. When using or
-* redistributing this file, you may do so under either license.
-*
-* GPL LICENSE SUMMARY
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of version 2 of the GNU General Public License as
-* published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful, but
-* WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* General Public License for more details.
-*
-* BSD LICENSE
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions
-* are met:
-*
-* - Redistributions of source code must retain the above copyright
-* notice, this list of conditions and the following disclaimer.
-* - Redistributions in binary form must reproduce the above copyright
-* notice, this list of conditions and the following disclaimer in
-* the documentation and/or other materials provided with the
-* distribution.
-* - Neither the name of Intel Corporation nor the names of its
-* contributors may be used to endorse or promote products derived
-* from this software without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*
+* Copyright(c) 2015 - 2018 Intel Corporation.
*/
+
#if !defined(__HFI1_TRACE_EXTRA_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_EXTRA_H
@@ -63,37 +22,40 @@
#define MAX_MSG_LEN 512
+#pragma GCC diagnostic push
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Wsuggest-attribute=format"
+#endif
+
DECLARE_EVENT_CLASS(hfi1_trace_template,
TP_PROTO(const char *function, struct va_format *vaf),
TP_ARGS(function, vaf),
TP_STRUCT__entry(__string(function, function)
- __dynamic_array(char, msg, MAX_MSG_LEN)
+ __vstring(msg, vaf->fmt, vaf->va)
),
- TP_fast_assign(__assign_str(function, function);
- WARN_ON_ONCE(vsnprintf
- (__get_dynamic_array(msg),
- MAX_MSG_LEN, vaf->fmt,
- *vaf->va) >=
- MAX_MSG_LEN);
+ TP_fast_assign(__assign_str(function);
+ __assign_vstr(msg, vaf->fmt, vaf->va);
),
TP_printk("(%s) %s",
__get_str(function),
__get_str(msg))
);
+#pragma GCC diagnostic pop
+
/*
* It may be nice to macroize the __hfi1_trace but the va_* stuff requires an
* actual function to work and can not be in a macro.
*/
#define __hfi1_trace_def(lvl) \
-void __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \
+void __printf(2, 3) __hfi1_trace_##lvl(const char *funct, char *fmt, ...); \
\
DEFINE_EVENT(hfi1_trace_template, hfi1_ ##lvl, \
TP_PROTO(const char *function, struct va_format *vaf), \
TP_ARGS(function, vaf))
#define __hfi1_trace_fn(lvl) \
-void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \
+void __printf(2, 3) __hfi1_trace_##lvl(const char *func, char *fmt, ...)\
{ \
struct va_format vaf = { \
.fmt = fmt, \
@@ -113,6 +75,7 @@ void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \
* hfi1_cdbg(LVL, fmt, ...); as well as take care of all
* the debugfs stuff.
*/
+__hfi1_trace_def(AFFINITY);
__hfi1_trace_def(PKT);
__hfi1_trace_def(PROC);
__hfi1_trace_def(SDMA);
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index 382fcda3a5f6..b21356abc9ec 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*/
+
#if !defined(__HFI1_TRACE_IBHDRS_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_IBHDRS_H
@@ -55,10 +14,96 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_ibhdrs
+#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode }
+#define show_ib_opcode(opcode) \
+__print_symbolic(opcode, \
+ ib_opcode_name(RC_SEND_FIRST), \
+ ib_opcode_name(RC_SEND_MIDDLE), \
+ ib_opcode_name(RC_SEND_LAST), \
+ ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_SEND_ONLY), \
+ ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_RDMA_WRITE_FIRST), \
+ ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \
+ ib_opcode_name(RC_RDMA_WRITE_LAST), \
+ ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_RDMA_WRITE_ONLY), \
+ ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_RDMA_READ_REQUEST), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \
+ ib_opcode_name(RC_ACKNOWLEDGE), \
+ ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
+ ib_opcode_name(RC_COMPARE_SWAP), \
+ ib_opcode_name(RC_FETCH_ADD), \
+ ib_opcode_name(RC_SEND_LAST_WITH_INVALIDATE), \
+ ib_opcode_name(RC_SEND_ONLY_WITH_INVALIDATE), \
+ ib_opcode_name(TID_RDMA_WRITE_REQ), \
+ ib_opcode_name(TID_RDMA_WRITE_RESP), \
+ ib_opcode_name(TID_RDMA_WRITE_DATA), \
+ ib_opcode_name(TID_RDMA_WRITE_DATA_LAST), \
+ ib_opcode_name(TID_RDMA_READ_REQ), \
+ ib_opcode_name(TID_RDMA_READ_RESP), \
+ ib_opcode_name(TID_RDMA_RESYNC), \
+ ib_opcode_name(TID_RDMA_ACK), \
+ ib_opcode_name(UC_SEND_FIRST), \
+ ib_opcode_name(UC_SEND_MIDDLE), \
+ ib_opcode_name(UC_SEND_LAST), \
+ ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(UC_SEND_ONLY), \
+ ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(UC_RDMA_WRITE_FIRST), \
+ ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \
+ ib_opcode_name(UC_RDMA_WRITE_LAST), \
+ ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(UC_RDMA_WRITE_ONLY), \
+ ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(UD_SEND_ONLY), \
+ ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(CNP))
+
u8 ibhdr_exhdr_len(struct ib_header *hdr);
-const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
+const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode,
+ u8 l4, u32 dest_qpn, u32 src_qpn,
+ void *ehdrs);
+u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opah);
+u8 hfi1_trace_packet_hdr_len(struct hfi1_packet *packet);
+const char *hfi1_trace_get_packet_l4_str(u8 l4);
+void hfi1_trace_parse_9b_bth(struct ib_other_headers *ohdr,
+ u8 *ack, bool *becn, bool *fecn, u8 *mig,
+ u8 *se, u8 *pad, u8 *opcode, u8 *tver,
+ u16 *pkey, u32 *psn, u32 *qpn);
+void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5,
+ u8 *lnh, u8 *lver, u8 *sl, u8 *sc,
+ u16 *len, u32 *dlid, u32 *slid);
+void hfi1_trace_parse_16b_bth(struct ib_other_headers *ohdr,
+ u8 *ack, u8 *mig, u8 *opcode,
+ u8 *pad, u8 *se, u8 *tver,
+ u32 *psn, u32 *qpn);
+void hfi1_trace_parse_16b_hdr(struct hfi1_16b_header *hdr,
+ u8 *age, bool *becn, bool *fecn,
+ u8 *l4, u8 *rc, u8 *sc,
+ u16 *entropy, u16 *len, u16 *pkey,
+ u32 *dlid, u32 *slid);
+
+const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass,
+ u8 age, bool becn, bool fecn, u8 l4,
+ u8 lnh, const char *lnh_name, u8 lver,
+ u8 rc, u8 sc, u8 sl, u16 entropy,
+ u16 len, u16 pkey, u32 dlid, u32 slid);
+
+const char *hfi1_trace_fmt_rest(struct trace_seq *p, bool bypass, u8 l4,
+ u8 ack, bool becn, bool fecn, u8 mig,
+ u8 se, u8 pad, u8 opcode, const char *opname,
+ u8 tver, u16 pkey, u32 psn, u32 qpn,
+ u32 dest_qpn, u32 src_qpn);
+
+const char *hfi1_trace_get_packet_l2_str(u8 l2);
-#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
+#define __parse_ib_ehdrs(op, l4, dest_qpn, src_qpn, ehdrs) \
+ parse_everbs_hdrs(p, op, l4, dest_qpn, src_qpn, ehdrs)
#define lrh_name(lrh) { HFI1_##lrh, #lrh }
#define show_lnh(lrh) \
@@ -66,139 +111,340 @@ __print_symbolic(lrh, \
lrh_name(LRH_BTH), \
lrh_name(LRH_GRH))
-#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x"
-#define BTH_PRN \
- "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \
- "f %d b %d qpn 0x%.6x a %d psn 0x%.8x"
-#define EHDR_PRN "%s"
-
-DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
+DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
TP_PROTO(struct hfi1_devdata *dd,
- struct ib_header *hdr),
- TP_ARGS(dd, hdr),
+ struct hfi1_packet *packet,
+ bool sc5),
+ TP_ARGS(dd, packet, sc5),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd)
- /* LRH */
- __field(u8, vl)
+ __field(u8, etype)
+ __field(u8, ack)
+ __field(u8, age)
+ __field(bool, becn)
+ __field(bool, fecn)
+ __field(u8, l2)
+ __field(u8, l4)
+ __field(u8, lnh)
__field(u8, lver)
+ __field(u8, mig)
+ __field(u8, opcode)
+ __field(u8, pad)
+ __field(u8, rc)
+ __field(u8, sc)
+ __field(u8, se)
__field(u8, sl)
- __field(u8, lnh)
- __field(u16, dlid)
+ __field(u8, tver)
+ __field(u16, entropy)
__field(u16, len)
- __field(u16, slid)
- /* BTH */
+ __field(u16, pkey)
+ __field(u32, dlid)
+ __field(u32, psn)
+ __field(u32, qpn)
+ __field(u32, slid)
+ __field(u32, dest_qpn)
+ __field(u32, src_qpn)
+ /* extended headers */
+ __dynamic_array(u8, ehdrs,
+ hfi1_trace_packet_hdr_len(packet))
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd);
+
+ __entry->etype = packet->etype;
+ __entry->l2 = hfi1_16B_get_l2(packet->hdr);
+ __entry->dest_qpn = 0;
+ __entry->src_qpn = 0;
+ if (__entry->etype == RHF_RCV_TYPE_BYPASS) {
+ hfi1_trace_parse_16b_hdr(packet->hdr,
+ &__entry->age,
+ &__entry->becn,
+ &__entry->fecn,
+ &__entry->l4,
+ &__entry->rc,
+ &__entry->sc,
+ &__entry->entropy,
+ &__entry->len,
+ &__entry->pkey,
+ &__entry->dlid,
+ &__entry->slid);
+
+ if (__entry->l4 == OPA_16B_L4_FM) {
+ __entry->opcode = IB_OPCODE_UD_SEND_ONLY;
+ __entry->dest_qpn = hfi1_16B_get_dest_qpn(packet->mgmt);
+ __entry->src_qpn = hfi1_16B_get_src_qpn(packet->mgmt);
+ } else {
+ hfi1_trace_parse_16b_bth(packet->ohdr,
+ &__entry->ack,
+ &__entry->mig,
+ &__entry->opcode,
+ &__entry->pad,
+ &__entry->se,
+ &__entry->tver,
+ &__entry->psn,
+ &__entry->qpn);
+ }
+ } else {
+ __entry->l4 = OPA_16B_L4_9B;
+ hfi1_trace_parse_9b_hdr(packet->hdr, sc5,
+ &__entry->lnh,
+ &__entry->lver,
+ &__entry->sl,
+ &__entry->sc,
+ &__entry->len,
+ &__entry->dlid,
+ &__entry->slid);
+
+ hfi1_trace_parse_9b_bth(packet->ohdr,
+ &__entry->ack,
+ &__entry->becn,
+ &__entry->fecn,
+ &__entry->mig,
+ &__entry->se,
+ &__entry->pad,
+ &__entry->opcode,
+ &__entry->tver,
+ &__entry->pkey,
+ &__entry->psn,
+ &__entry->qpn);
+ }
+ /* extended headers */
+ if (__entry->l4 != OPA_16B_L4_FM)
+ memcpy(__get_dynamic_array(ehdrs),
+ &packet->ohdr->u,
+ __get_dynamic_array_len(ehdrs));
+ ),
+ TP_printk("[%s] (%s) %s %s hlen:%d %s",
+ __get_str(dev),
+ __entry->etype != RHF_RCV_TYPE_BYPASS ?
+ show_packettype(__entry->etype) :
+ hfi1_trace_get_packet_l2_str(
+ __entry->l2),
+ hfi1_trace_fmt_lrh(p,
+ __entry->etype ==
+ RHF_RCV_TYPE_BYPASS,
+ __entry->age,
+ __entry->becn,
+ __entry->fecn,
+ __entry->l4,
+ __entry->lnh,
+ show_lnh(__entry->lnh),
+ __entry->lver,
+ __entry->rc,
+ __entry->sc,
+ __entry->sl,
+ __entry->entropy,
+ __entry->len,
+ __entry->pkey,
+ __entry->dlid,
+ __entry->slid),
+ hfi1_trace_fmt_rest(p,
+ __entry->etype ==
+ RHF_RCV_TYPE_BYPASS,
+ __entry->l4,
+ __entry->ack,
+ __entry->becn,
+ __entry->fecn,
+ __entry->mig,
+ __entry->se,
+ __entry->pad,
+ __entry->opcode,
+ show_ib_opcode(__entry->opcode),
+ __entry->tver,
+ __entry->pkey,
+ __entry->psn,
+ __entry->qpn,
+ __entry->dest_qpn,
+ __entry->src_qpn),
+ /* extended headers */
+ __get_dynamic_array_len(ehdrs),
+ __parse_ib_ehdrs(
+ __entry->opcode,
+ __entry->l4,
+ __entry->dest_qpn,
+ __entry->src_qpn,
+ (void *)__get_dynamic_array(ehdrs))
+ )
+);
+
+DEFINE_EVENT(hfi1_input_ibhdr_template, input_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct hfi1_packet *packet, bool sc5),
+ TP_ARGS(dd, packet, sc5));
+
+DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct hfi1_opa_header *opah, bool sc5),
+ TP_ARGS(dd, opah, sc5),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
+ __field(u8, hdr_type)
+ __field(u8, ack)
+ __field(u8, age)
+ __field(bool, becn)
+ __field(bool, fecn)
+ __field(u8, l4)
+ __field(u8, lnh)
+ __field(u8, lver)
+ __field(u8, mig)
__field(u8, opcode)
- __field(u8, se)
- __field(u8, m)
__field(u8, pad)
+ __field(u8, rc)
+ __field(u8, sc)
+ __field(u8, se)
+ __field(u8, sl)
__field(u8, tver)
+ __field(u16, entropy)
+ __field(u16, len)
__field(u16, pkey)
- __field(u8, f)
- __field(u8, b)
- __field(u32, qpn)
- __field(u8, a)
+ __field(u32, dlid)
__field(u32, psn)
+ __field(u32, qpn)
+ __field(u32, slid)
+ __field(u32, dest_qpn)
+ __field(u32, src_qpn)
/* extended headers */
- __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
+ __dynamic_array(u8, ehdrs,
+ hfi1_trace_opa_hdr_len(opah))
),
- TP_fast_assign(
+ TP_fast_assign(
struct ib_other_headers *ohdr;
DD_DEV_ASSIGN(dd);
- /* LRH */
- __entry->vl =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
- __entry->lver =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
- __entry->sl =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
- __entry->lnh =
- (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
- __entry->dlid =
- be16_to_cpu(hdr->lrh[1]);
- /* allow for larger len */
- __entry->len =
- be16_to_cpu(hdr->lrh[2]);
- __entry->slid =
- be16_to_cpu(hdr->lrh[3]);
- /* BTH */
- if (__entry->lnh == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth;
- else
- ohdr = &hdr->u.l.oth;
- __entry->opcode =
- (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
- __entry->se =
- (be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
- __entry->m =
- (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
- __entry->pad =
- (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
- __entry->tver =
- (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
- __entry->pkey =
- be32_to_cpu(ohdr->bth[0]) & 0xffff;
- __entry->f =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_FECN_SHIFT) &
- HFI1_FECN_MASK;
- __entry->b =
- (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) &
- HFI1_BECN_MASK;
- __entry->qpn =
- be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
- __entry->a =
- (be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
- /* allow for larger PSN */
- __entry->psn =
- be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
+
+ __entry->hdr_type = opah->hdr_type;
+ __entry->dest_qpn = 0;
+ __entry->src_qpn = 0;
+ if (__entry->hdr_type) {
+ hfi1_trace_parse_16b_hdr(&opah->opah,
+ &__entry->age,
+ &__entry->becn,
+ &__entry->fecn,
+ &__entry->l4,
+ &__entry->rc,
+ &__entry->sc,
+ &__entry->entropy,
+ &__entry->len,
+ &__entry->pkey,
+ &__entry->dlid,
+ &__entry->slid);
+
+ if (__entry->l4 == OPA_16B_L4_FM) {
+ ohdr = NULL;
+ __entry->opcode = IB_OPCODE_UD_SEND_ONLY;
+ __entry->dest_qpn = hfi1_16B_get_dest_qpn(&opah->opah.u.mgmt);
+ __entry->src_qpn = hfi1_16B_get_src_qpn(&opah->opah.u.mgmt);
+ } else {
+ if (__entry->l4 == OPA_16B_L4_IB_LOCAL)
+ ohdr = &opah->opah.u.oth;
+ else
+ ohdr = &opah->opah.u.l.oth;
+ hfi1_trace_parse_16b_bth(ohdr,
+ &__entry->ack,
+ &__entry->mig,
+ &__entry->opcode,
+ &__entry->pad,
+ &__entry->se,
+ &__entry->tver,
+ &__entry->psn,
+ &__entry->qpn);
+ }
+ } else {
+ __entry->l4 = OPA_16B_L4_9B;
+ hfi1_trace_parse_9b_hdr(&opah->ibh, sc5,
+ &__entry->lnh,
+ &__entry->lver,
+ &__entry->sl,
+ &__entry->sc,
+ &__entry->len,
+ &__entry->dlid,
+ &__entry->slid);
+ if (__entry->lnh == HFI1_LRH_BTH)
+ ohdr = &opah->ibh.u.oth;
+ else
+ ohdr = &opah->ibh.u.l.oth;
+ hfi1_trace_parse_9b_bth(ohdr,
+ &__entry->ack,
+ &__entry->becn,
+ &__entry->fecn,
+ &__entry->mig,
+ &__entry->se,
+ &__entry->pad,
+ &__entry->opcode,
+ &__entry->tver,
+ &__entry->pkey,
+ &__entry->psn,
+ &__entry->qpn);
+ }
+
/* extended headers */
- memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
- ibhdr_exhdr_len(hdr));
- ),
- TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
- __get_str(dev),
- /* LRH */
- __entry->vl,
- __entry->lver,
- __entry->sl,
- __entry->lnh, show_lnh(__entry->lnh),
- __entry->dlid,
- __entry->len,
- __entry->slid,
- /* BTH */
- __entry->opcode, show_ib_opcode(__entry->opcode),
- __entry->se,
- __entry->m,
- __entry->pad,
- __entry->tver,
- __entry->pkey,
- __entry->f,
- __entry->b,
- __entry->qpn,
- __entry->a,
- __entry->psn,
- /* extended headers */
- __parse_ib_ehdrs(
- __entry->opcode,
- (void *)__get_dynamic_array(ehdrs))
- )
+ if (__entry->l4 != OPA_16B_L4_FM)
+ memcpy(__get_dynamic_array(ehdrs),
+ &ohdr->u, __get_dynamic_array_len(ehdrs));
+ ),
+ TP_printk("[%s] (%s) %s %s hlen:%d %s",
+ __get_str(dev),
+ hfi1_trace_get_packet_l4_str(__entry->l4),
+ hfi1_trace_fmt_lrh(p,
+ !!__entry->hdr_type,
+ __entry->age,
+ __entry->becn,
+ __entry->fecn,
+ __entry->l4,
+ __entry->lnh,
+ show_lnh(__entry->lnh),
+ __entry->lver,
+ __entry->rc,
+ __entry->sc,
+ __entry->sl,
+ __entry->entropy,
+ __entry->len,
+ __entry->pkey,
+ __entry->dlid,
+ __entry->slid),
+ hfi1_trace_fmt_rest(p,
+ !!__entry->hdr_type,
+ __entry->l4,
+ __entry->ack,
+ __entry->becn,
+ __entry->fecn,
+ __entry->mig,
+ __entry->se,
+ __entry->pad,
+ __entry->opcode,
+ show_ib_opcode(__entry->opcode),
+ __entry->tver,
+ __entry->pkey,
+ __entry->psn,
+ __entry->qpn,
+ __entry->dest_qpn,
+ __entry->src_qpn),
+ /* extended headers */
+ __get_dynamic_array_len(ehdrs),
+ __parse_ib_ehdrs(
+ __entry->opcode,
+ __entry->l4,
+ __entry->dest_qpn,
+ __entry->src_qpn,
+ (void *)__get_dynamic_array(ehdrs))
+ )
);
-DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
- TP_ARGS(dd, hdr));
+DEFINE_EVENT(hfi1_output_ibhdr_template, pio_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct hfi1_opa_header *opah, bool sc5),
+ TP_ARGS(dd, opah, sc5));
-DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
- TP_ARGS(dd, hdr));
+DEFINE_EVENT(hfi1_output_ibhdr_template, ack_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct hfi1_opa_header *opah, bool sc5),
+ TP_ARGS(dd, opah, sc5));
-DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
- TP_ARGS(dd, hdr));
+DEFINE_EVENT(hfi1_output_ibhdr_template, sdma_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct hfi1_opa_header *opah, bool sc5),
+ TP_ARGS(dd, opah, sc5));
-DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
- TP_ARGS(dd, hdr));
#endif /* __HFI1_TRACE_IBHDRS_H */
diff --git a/drivers/infiniband/hw/hfi1/trace_iowait.h b/drivers/infiniband/hw/hfi1/trace_iowait.h
new file mode 100644
index 000000000000..27f4334ece2b
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/trace_iowait.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#if !defined(__HFI1_TRACE_IOWAIT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_IOWAIT_H
+
+#include <linux/tracepoint.h>
+#include "iowait.h"
+#include "verbs.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_iowait
+
+DECLARE_EVENT_CLASS(hfi1_iowait_template,
+ TP_PROTO(struct iowait *wait, u32 flag),
+ TP_ARGS(wait, flag),
+ TP_STRUCT__entry(/* entry */
+ __field(unsigned long, addr)
+ __field(unsigned long, flags)
+ __field(u32, flag)
+ __field(u32, qpn)
+ ),
+ TP_fast_assign(/* assign */
+ __entry->addr = (unsigned long)wait;
+ __entry->flags = wait->flags;
+ __entry->flag = (1 << flag);
+ __entry->qpn = iowait_to_qp(wait)->ibqp.qp_num;
+ ),
+ TP_printk(/* print */
+ "iowait 0x%lx qp %u flags 0x%lx flag 0x%x",
+ __entry->addr,
+ __entry->qpn,
+ __entry->flags,
+ __entry->flag
+ )
+ );
+
+DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_set,
+ TP_PROTO(struct iowait *wait, u32 flag),
+ TP_ARGS(wait, flag));
+
+DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_clear,
+ TP_PROTO(struct iowait *wait, u32 flag),
+ TP_ARGS(wait, flag));
+
+#endif /* __HFI1_TRACE_IOWAIT_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_iowait
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h
index d308454af7fd..8dc46b6891df 100644
--- a/drivers/infiniband/hw/hfi1/trace_misc.h
+++ b/drivers/infiniband/hw/hfi1/trace_misc.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
-*
-* This file is provided under a dual BSD/GPLv2 license. When using or
-* redistributing this file, you may do so under either license.
-*
-* GPL LICENSE SUMMARY
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of version 2 of the GNU General Public License as
-* published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful, but
-* WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* General Public License for more details.
-*
-* BSD LICENSE
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions
-* are met:
-*
-* - Redistributions of source code must retain the above copyright
-* notice, this list of conditions and the following disclaimer.
-* - Redistributions in binary form must reproduce the above copyright
-* notice, this list of conditions and the following disclaimer in
-* the documentation and/or other materials provided with the
-* distribution.
-* - Neither the name of Intel Corporation nor the names of its
-* contributors may be used to endorse or promote products derived
-* from this software without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*
*/
+
#if !defined(__HFI1_TRACE_MISC_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_MISC_H
@@ -63,7 +22,7 @@ TRACE_EVENT(hfi1_interrupt,
__array(char, buf, 64)
__field(int, src)
),
- TP_fast_assign(DD_DEV_ASSIGN(dd)
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
is_entry->is_name(__entry->buf, 64,
src - is_entry->start);
__entry->src = src;
@@ -72,6 +31,74 @@ TRACE_EVENT(hfi1_interrupt,
__entry->src)
);
+DECLARE_EVENT_CLASS(
+ hfi1_csr_template,
+ TP_PROTO(void __iomem *addr, u64 value),
+ TP_ARGS(addr, value),
+ TP_STRUCT__entry(
+ __field(void __iomem *, addr)
+ __field(u64, value)
+ ),
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->value = value;
+ ),
+ TP_printk("addr %p value %llx", __entry->addr, __entry->value)
+);
+
+DEFINE_EVENT(
+ hfi1_csr_template, hfi1_write_rcvarray,
+ TP_PROTO(void __iomem *addr, u64 value),
+ TP_ARGS(addr, value));
+
+#ifdef CONFIG_FAULT_INJECTION
+TRACE_EVENT(hfi1_fault_opcode,
+ TP_PROTO(struct rvt_qp *qp, u8 opcode),
+ TP_ARGS(qp, opcode),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u8, opcode)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->opcode = opcode;
+ ),
+ TP_printk("[%s] qpn 0x%x opcode 0x%x",
+ __get_str(dev), __entry->qpn, __entry->opcode)
+);
+
+TRACE_EVENT(hfi1_fault_packet,
+ TP_PROTO(struct hfi1_packet *packet),
+ TP_ARGS(packet),
+ TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->ppd->dd)
+ __field(u64, eflags)
+ __field(u32, ctxt)
+ __field(u32, hlen)
+ __field(u32, tlen)
+ __field(u32, updegr)
+ __field(u32, etail)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->ppd->dd);
+ __entry->eflags = rhf_err_flags(packet->rhf);
+ __entry->ctxt = packet->rcd->ctxt;
+ __entry->hlen = packet->hlen;
+ __entry->tlen = packet->tlen;
+ __entry->updegr = packet->updegr;
+ __entry->etail = rhf_egr_index(packet->rhf);
+ ),
+ TP_printk(
+ "[%s] ctxt %d eflags 0x%llx hlen %d tlen %d updegr %d etail %d",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->eflags,
+ __entry->hlen,
+ __entry->tlen,
+ __entry->updegr,
+ __entry->etail
+ )
+);
+#endif
+
#endif /* __HFI1_TRACE_MISC_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/trace_mmu.h b/drivers/infiniband/hw/hfi1/trace_mmu.h
new file mode 100644
index 000000000000..5a9dfd85e7f5
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/trace_mmu.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ */
+
+#if !defined(__HFI1_TRACE_MMU_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_MMU_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_mmu
+
+DECLARE_EVENT_CLASS(hfi1_mmu_rb_template,
+ TP_PROTO(struct mmu_rb_node *node),
+ TP_ARGS(node),
+ TP_STRUCT__entry(__field(unsigned long, addr)
+ __field(unsigned long, len)
+ __field(unsigned int, refcount)
+ ),
+ TP_fast_assign(__entry->addr = node->addr;
+ __entry->len = node->len;
+ __entry->refcount = kref_read(&node->refcount);
+ ),
+ TP_printk("MMU node addr 0x%lx, len %lu, refcount %u",
+ __entry->addr,
+ __entry->len,
+ __entry->refcount
+ )
+);
+
+DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_insert,
+ TP_PROTO(struct mmu_rb_node *node),
+ TP_ARGS(node));
+
+TRACE_EVENT(hfi1_mmu_rb_search,
+ TP_PROTO(unsigned long addr, unsigned long len),
+ TP_ARGS(addr, len),
+ TP_STRUCT__entry(__field(unsigned long, addr)
+ __field(unsigned long, len)
+ ),
+ TP_fast_assign(__entry->addr = addr;
+ __entry->len = len;
+ ),
+ TP_printk("MMU node addr 0x%lx, len %lu",
+ __entry->addr,
+ __entry->len
+ )
+);
+
+DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_mem_invalidate,
+ TP_PROTO(struct mmu_rb_node *node),
+ TP_ARGS(node));
+
+DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_evict,
+ TP_PROTO(struct mmu_rb_node *node),
+ TP_ARGS(node));
+
+DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_release_node,
+ TP_PROTO(struct mmu_rb_node *node),
+ TP_ARGS(node));
+
+#endif /* __HFI1_TRACE_RC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_mmu
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/hfi1/trace_rc.h b/drivers/infiniband/hw/hfi1/trace_rc.h
index 5ea5005f9f41..fa254f9b9c42 100644
--- a/drivers/infiniband/hw/hfi1/trace_rc.h
+++ b/drivers/infiniband/hw/hfi1/trace_rc.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
-* Copyright(c) 2015, 2016 Intel Corporation.
-*
-* This file is provided under a dual BSD/GPLv2 license. When using or
-* redistributing this file, you may do so under either license.
-*
-* GPL LICENSE SUMMARY
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of version 2 of the GNU General Public License as
-* published by the Free Software Foundation.
-*
-* This program is distributed in the hope that it will be useful, but
-* WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-* General Public License for more details.
-*
-* BSD LICENSE
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following conditions
-* are met:
-*
-* - Redistributions of source code must retain the above copyright
-* notice, this list of conditions and the following disclaimer.
-* - Redistributions in binary form must reproduce the above copyright
-* notice, this list of conditions and the following disclaimer in
-* the documentation and/or other materials provided with the
-* distribution.
-* - Neither the name of Intel Corporation nor the names of its
-* contributors may be used to endorse or promote products derived
-* from this software without specific prior written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*
+* Copyright(c) 2015, 2016, 2017 Intel Corporation.
*/
+
#if !defined(__HFI1_TRACE_RC_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_RC_H
@@ -70,7 +29,7 @@ DECLARE_EVENT_CLASS(hfi1_rc_template,
__field(u32, r_psn)
),
TP_fast_assign(
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
__entry->s_flags = qp->s_flags;
__entry->psn = psn;
@@ -104,14 +63,57 @@ DEFINE_EVENT(hfi1_rc_template, hfi1_ack,
TP_ARGS(qp, psn)
);
-DEFINE_EVENT(hfi1_rc_template, hfi1_timeout,
+DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
TP_PROTO(struct rvt_qp *qp, u32 psn),
TP_ARGS(qp, psn)
);
-DEFINE_EVENT(hfi1_rc_template, hfi1_rcv_error,
- TP_PROTO(struct rvt_qp *qp, u32 psn),
- TP_ARGS(qp, psn)
+DEFINE_EVENT(/* event */
+ hfi1_rc_template, hfi1_rc_completion,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DECLARE_EVENT_CLASS(/* rc_ack */
+ hfi1_rc_ack_template,
+ TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
+ struct rvt_swqe *wqe),
+ TP_ARGS(qp, aeth, psn, wqe),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, aeth)
+ __field(u32, psn)
+ __field(u8, opcode)
+ __field(u32, spsn)
+ __field(u32, lpsn)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->aeth = aeth;
+ __entry->psn = psn;
+ __entry->opcode = wqe->wr.opcode;
+ __entry->spsn = wqe->psn;
+ __entry->lpsn = wqe->lpsn;
+ ),
+ TP_printk(/* print */
+ "[%s] qpn 0x%x aeth 0x%x psn 0x%x opcode 0x%x spsn 0x%x lpsn 0x%x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->aeth,
+ __entry->psn,
+ __entry->opcode,
+ __entry->spsn,
+ __entry->lpsn
+ )
+);
+
+DEFINE_EVENT(/* do_rc_ack */
+ hfi1_rc_ack_template, hfi1_rc_ack_do,
+ TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
+ struct rvt_swqe *wqe),
+ TP_ARGS(qp, aeth, psn, wqe)
);
#endif /* __HFI1_TRACE_RC_H */
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index f77e59fb43fe..8d5e12fe88a5 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -1,49 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*/
+
#if !defined(__HFI1_TRACE_RX_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_RX_H
@@ -52,21 +11,20 @@
#include "hfi.h"
+#define tidtype_name(type) { PT_##type, #type }
+#define show_tidtype(type) \
+__print_symbolic(type, \
+ tidtype_name(EXPECTED), \
+ tidtype_name(EAGER), \
+ tidtype_name(INVALID)) \
+
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_rx
TRACE_EVENT(hfi1_rcvhdr,
- TP_PROTO(struct hfi1_devdata *dd,
- u32 ctxt,
- u64 eflags,
- u32 etype,
- u32 hlen,
- u32 tlen,
- u32 updegr,
- u32 etail
- ),
- TP_ARGS(dd, ctxt, eflags, etype, hlen, tlen, updegr, etail),
- TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ TP_PROTO(struct hfi1_packet *packet),
+ TP_ARGS(packet),
+ TP_STRUCT__entry(DD_DEV_ENTRY(packet->rcd->dd)
__field(u64, eflags)
__field(u32, ctxt)
__field(u32, etype)
@@ -75,14 +33,14 @@ TRACE_EVENT(hfi1_rcvhdr,
__field(u32, updegr)
__field(u32, etail)
),
- TP_fast_assign(DD_DEV_ASSIGN(dd);
- __entry->eflags = eflags;
- __entry->ctxt = ctxt;
- __entry->etype = etype;
- __entry->hlen = hlen;
- __entry->tlen = tlen;
- __entry->updegr = updegr;
- __entry->etail = etail;
+ TP_fast_assign(DD_DEV_ASSIGN(packet->rcd->dd);
+ __entry->eflags = rhf_err_flags(packet->rhf);
+ __entry->ctxt = packet->rcd->ctxt;
+ __entry->etype = packet->etype;
+ __entry->hlen = packet->hlen;
+ __entry->tlen = packet->tlen;
+ __entry->updegr = packet->updegr;
+ __entry->etail = rhf_egr_index(packet->rhf);
),
TP_printk(
"[%s] ctxt %d eflags 0x%llx etype %d,%s hlen %d tlen %d updegr %d etail %d",
@@ -98,28 +56,17 @@ TRACE_EVENT(hfi1_rcvhdr,
);
TRACE_EVENT(hfi1_receive_interrupt,
- TP_PROTO(struct hfi1_devdata *dd, u32 ctxt),
- TP_ARGS(dd, ctxt),
+ TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd),
+ TP_ARGS(dd, rcd),
TP_STRUCT__entry(DD_DEV_ENTRY(dd)
__field(u32, ctxt)
__field(u8, slow_path)
__field(u8, dma_rtail)
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
- __entry->ctxt = ctxt;
- if (dd->rcd[ctxt]->do_interrupt ==
- &handle_receive_interrupt) {
- __entry->slow_path = 1;
- __entry->dma_rtail = 0xFF;
- } else if (dd->rcd[ctxt]->do_interrupt ==
- &handle_receive_interrupt_dma_rtail){
- __entry->dma_rtail = 1;
- __entry->slow_path = 0;
- } else if (dd->rcd[ctxt]->do_interrupt ==
- &handle_receive_interrupt_nodma_rtail) {
- __entry->dma_rtail = 0;
- __entry->slow_path = 0;
- }
+ __entry->ctxt = rcd->ctxt;
+ __entry->slow_path = hfi1_is_slowpath(rcd);
+ __entry->dma_rtail = get_dma_rtail_setting(rcd);
),
TP_printk("[%s] ctxt %d SlowPath: %d DmaRtail: %d",
__get_str(dev),
@@ -129,103 +76,6 @@ TRACE_EVENT(hfi1_receive_interrupt,
)
);
-TRACE_EVENT(hfi1_exp_tid_reg,
- TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr,
- u32 npages, unsigned long va, unsigned long pa,
- dma_addr_t dma),
- TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
- TP_STRUCT__entry(
- __field(unsigned int, ctxt)
- __field(u16, subctxt)
- __field(u32, rarr)
- __field(u32, npages)
- __field(unsigned long, va)
- __field(unsigned long, pa)
- __field(dma_addr_t, dma)
- ),
- TP_fast_assign(
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->rarr = rarr;
- __entry->npages = npages;
- __entry->va = va;
- __entry->pa = pa;
- __entry->dma = dma;
- ),
- TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
- __entry->ctxt,
- __entry->subctxt,
- __entry->rarr,
- __entry->npages,
- __entry->pa,
- __entry->va,
- __entry->dma
- )
- );
-
-TRACE_EVENT(hfi1_exp_tid_unreg,
- TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
- unsigned long va, unsigned long pa, dma_addr_t dma),
- TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
- TP_STRUCT__entry(
- __field(unsigned int, ctxt)
- __field(u16, subctxt)
- __field(u32, rarr)
- __field(u32, npages)
- __field(unsigned long, va)
- __field(unsigned long, pa)
- __field(dma_addr_t, dma)
- ),
- TP_fast_assign(
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->rarr = rarr;
- __entry->npages = npages;
- __entry->va = va;
- __entry->pa = pa;
- __entry->dma = dma;
- ),
- TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
- __entry->ctxt,
- __entry->subctxt,
- __entry->rarr,
- __entry->npages,
- __entry->pa,
- __entry->va,
- __entry->dma
- )
- );
-
-TRACE_EVENT(hfi1_exp_tid_inval,
- TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr,
- u32 npages, dma_addr_t dma),
- TP_ARGS(ctxt, subctxt, va, rarr, npages, dma),
- TP_STRUCT__entry(
- __field(unsigned int, ctxt)
- __field(u16, subctxt)
- __field(unsigned long, va)
- __field(u32, rarr)
- __field(u32, npages)
- __field(dma_addr_t, dma)
- ),
- TP_fast_assign(
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->va = va;
- __entry->rarr = rarr;
- __entry->npages = npages;
- __entry->dma = dma;
- ),
- TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx",
- __entry->ctxt,
- __entry->subctxt,
- __entry->rarr,
- __entry->npages,
- __entry->va,
- __entry->dma
- )
- );
-
TRACE_EVENT(hfi1_mmu_invalidate,
TP_PROTO(unsigned int ctxt, u16 subctxt, const char *type,
unsigned long start, unsigned long end),
@@ -240,7 +90,7 @@ TRACE_EVENT(hfi1_mmu_invalidate,
TP_fast_assign(
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
- __assign_str(type, type);
+ __assign_str(type);
__entry->start = start;
__entry->end = end;
),
diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h
new file mode 100644
index 000000000000..e358f5b885fa
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/trace_tid.h
@@ -0,0 +1,1642 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#if !defined(__HFI1_TRACE_TID_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_TID_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "hfi.h"
+
+#define tidtype_name(type) { PT_##type, #type }
+#define show_tidtype(type) \
+__print_symbolic(type, \
+ tidtype_name(EXPECTED), \
+ tidtype_name(EAGER), \
+ tidtype_name(INVALID)) \
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_tid
+
+u8 hfi1_trace_get_tid_ctrl(u32 ent);
+u16 hfi1_trace_get_tid_len(u32 ent);
+u16 hfi1_trace_get_tid_idx(u32 ent);
+
+#define OPFN_PARAM_PRN "[%s] qpn 0x%x %s OPFN: qp 0x%x, max read %u, " \
+ "max write %u, max length %u, jkey 0x%x timeout %u " \
+ "urg %u"
+
+#define TID_FLOW_PRN "[%s] qpn 0x%x flow %d: idx %d resp_ib_psn 0x%x " \
+ "generation 0x%x fpsn 0x%x-%x r_next_psn 0x%x " \
+ "ib_psn 0x%x-%x npagesets %u tnode_cnt %u " \
+ "tidcnt %u tid_idx %u tid_offset %u length %u sent %u"
+
+#define TID_NODE_PRN "[%s] qpn 0x%x %s idx %u grp base 0x%x map 0x%x " \
+ "used %u cnt %u"
+
+#define RSP_INFO_PRN "[%s] qpn 0x%x state 0x%x s_state 0x%x psn 0x%x " \
+ "r_psn 0x%x r_state 0x%x r_flags 0x%x " \
+ "r_head_ack_queue %u s_tail_ack_queue %u " \
+ "s_acked_ack_queue %u s_ack_state 0x%x " \
+ "s_nak_state 0x%x s_flags 0x%x ps_flags 0x%x " \
+ "iow_flags 0x%lx"
+
+#define SENDER_INFO_PRN "[%s] qpn 0x%x state 0x%x s_cur %u s_tail %u " \
+ "s_head %u s_acked %u s_last %u s_psn 0x%x " \
+ "s_last_psn 0x%x s_flags 0x%x ps_flags 0x%x " \
+ "iow_flags 0x%lx s_state 0x%x s_num_rd %u s_retry %u"
+
+#define TID_READ_SENDER_PRN "[%s] qpn 0x%x newreq %u tid_r_reqs %u " \
+ "tid_r_comp %u pending_tid_r_segs %u " \
+ "s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx " \
+ "s_state 0x%x hw_flow_index %u generation 0x%x " \
+ "fpsn 0x%x"
+
+#define TID_REQ_PRN "[%s] qpn 0x%x newreq %u opcode 0x%x psn 0x%x lpsn 0x%x " \
+ "cur_seg %u comp_seg %u ack_seg %u alloc_seg %u " \
+ "total_segs %u setup_head %u clear_tail %u flow_idx %u " \
+ "acked_tail %u state %u r_ack_psn 0x%x r_flow_psn 0x%x " \
+ "r_last_ackd 0x%x s_next_psn 0x%x"
+
+#define RCV_ERR_PRN "[%s] qpn 0x%x s_flags 0x%x state 0x%x " \
+ "s_acked_ack_queue %u s_tail_ack_queue %u " \
+ "r_head_ack_queue %u opcode 0x%x psn 0x%x r_psn 0x%x " \
+ " diff %d"
+
+#define TID_WRITE_RSPDR_PRN "[%s] qpn 0x%x r_tid_head %u r_tid_tail %u " \
+ "r_tid_ack %u r_tid_alloc %u alloc_w_segs %u " \
+ "pending_tid_w_segs %u sync_pt %s " \
+ "ps_nak_psn 0x%x ps_nak_state 0x%x " \
+ "prnr_nak_state 0x%x hw_flow_index %u generation "\
+ "0x%x fpsn 0x%x resync %s" \
+ "r_next_psn_kdeth 0x%x"
+
+#define TID_WRITE_SENDER_PRN "[%s] qpn 0x%x newreq %u s_tid_cur %u " \
+ "s_tid_tail %u s_tid_head %u " \
+ "pending_tid_w_resp %u n_requests %u " \
+ "n_tid_requests %u s_flags 0x%x ps_flags 0x%x "\
+ "iow_flags 0x%lx s_state 0x%x s_retry %u"
+
+#define KDETH_EFLAGS_ERR_PRN "[%s] qpn 0x%x TID ERR: RcvType 0x%x " \
+ "RcvTypeError 0x%x PSN 0x%x"
+
+DECLARE_EVENT_CLASS(/* class */
+ hfi1_exp_tid_reg_unreg,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
+ unsigned long va, unsigned long pa, dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
+ TP_STRUCT__entry(/* entry */
+ __field(unsigned int, ctxt)
+ __field(u16, subctxt)
+ __field(u32, rarr)
+ __field(u32, npages)
+ __field(unsigned long, va)
+ __field(unsigned long, pa)
+ __field(dma_addr_t, dma)
+ ),
+ TP_fast_assign(/* assign */
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->rarr = rarr;
+ __entry->npages = npages;
+ __entry->va = va;
+ __entry->pa = pa;
+ __entry->dma = dma;
+ ),
+ TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->rarr,
+ __entry->npages,
+ __entry->pa,
+ __entry->va,
+ __entry->dma
+ )
+);
+
+DEFINE_EVENT(/* exp_tid_unreg */
+ hfi1_exp_tid_reg_unreg, hfi1_exp_tid_unreg,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
+ unsigned long va, unsigned long pa, dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)
+);
+
+DEFINE_EVENT(/* exp_tid_reg */
+ hfi1_exp_tid_reg_unreg, hfi1_exp_tid_reg,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
+ unsigned long va, unsigned long pa, dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma)
+);
+
+TRACE_EVENT(/* put_tid */
+ hfi1_put_tid,
+ TP_PROTO(struct hfi1_devdata *dd,
+ u32 index, u32 type, unsigned long pa, u16 order),
+ TP_ARGS(dd, index, type, pa, order),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd)
+ __field(unsigned long, pa)
+ __field(u32, index)
+ __field(u32, type)
+ __field(u16, order)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(dd);
+ __entry->pa = pa;
+ __entry->index = index;
+ __entry->type = type;
+ __entry->order = order;
+ ),
+ TP_printk("[%s] type %s pa %lx index %u order %u",
+ __get_str(dev),
+ show_tidtype(__entry->type),
+ __entry->pa,
+ __entry->index,
+ __entry->order
+ )
+);
+
+TRACE_EVENT(/* exp_tid_inval */
+ hfi1_exp_tid_inval,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr,
+ u32 npages, dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, va, rarr, npages, dma),
+ TP_STRUCT__entry(/* entry */
+ __field(unsigned int, ctxt)
+ __field(u16, subctxt)
+ __field(unsigned long, va)
+ __field(u32, rarr)
+ __field(u32, npages)
+ __field(dma_addr_t, dma)
+ ),
+ TP_fast_assign(/* assign */
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->va = va;
+ __entry->rarr = rarr;
+ __entry->npages = npages;
+ __entry->dma = dma;
+ ),
+ TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx dma: 0x%llx",
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->rarr,
+ __entry->npages,
+ __entry->va,
+ __entry->dma
+ )
+);
+
+DECLARE_EVENT_CLASS(/* opfn_state */
+ hfi1_opfn_state_template,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u16, requested)
+ __field(u16, completed)
+ __field(u8, curr)
+ ),
+ TP_fast_assign(/* assign */
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->requested = priv->opfn.requested;
+ __entry->completed = priv->opfn.completed;
+ __entry->curr = priv->opfn.curr;
+ ),
+ TP_printk(/* print */
+ "[%s] qpn 0x%x requested 0x%x completed 0x%x curr 0x%x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->requested,
+ __entry->completed,
+ __entry->curr
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_opfn_state_template, hfi1_opfn_state_conn_request,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_opfn_state_template, hfi1_opfn_state_sched_conn_request,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_opfn_state_template, hfi1_opfn_state_conn_response,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_opfn_state_template, hfi1_opfn_state_conn_reply,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_opfn_state_template, hfi1_opfn_state_conn_error,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DECLARE_EVENT_CLASS(/* opfn_data */
+ hfi1_opfn_data_template,
+ TP_PROTO(struct rvt_qp *qp, u8 capcode, u64 data),
+ TP_ARGS(qp, capcode, data),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, state)
+ __field(u8, capcode)
+ __field(u64, data)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->state = qp->state;
+ __entry->capcode = capcode;
+ __entry->data = data;
+ ),
+ TP_printk(/* printk */
+ "[%s] qpn 0x%x (state 0x%x) Capcode %u data 0x%llx",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->state,
+ __entry->capcode,
+ __entry->data
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_opfn_data_template, hfi1_opfn_data_conn_request,
+ TP_PROTO(struct rvt_qp *qp, u8 capcode, u64 data),
+ TP_ARGS(qp, capcode, data)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_opfn_data_template, hfi1_opfn_data_conn_response,
+ TP_PROTO(struct rvt_qp *qp, u8 capcode, u64 data),
+ TP_ARGS(qp, capcode, data)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_opfn_data_template, hfi1_opfn_data_conn_reply,
+ TP_PROTO(struct rvt_qp *qp, u8 capcode, u64 data),
+ TP_ARGS(qp, capcode, data)
+);
+
+DECLARE_EVENT_CLASS(/* opfn_param */
+ hfi1_opfn_param_template,
+ TP_PROTO(struct rvt_qp *qp, char remote,
+ struct tid_rdma_params *param),
+ TP_ARGS(qp, remote, param),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(char, remote)
+ __field(u32, param_qp)
+ __field(u32, max_len)
+ __field(u16, jkey)
+ __field(u8, max_read)
+ __field(u8, max_write)
+ __field(u8, timeout)
+ __field(u8, urg)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->remote = remote;
+ __entry->param_qp = param->qp;
+ __entry->max_len = param->max_len;
+ __entry->jkey = param->jkey;
+ __entry->max_read = param->max_read;
+ __entry->max_write = param->max_write;
+ __entry->timeout = param->timeout;
+ __entry->urg = param->urg;
+ ),
+ TP_printk(/* print */
+ OPFN_PARAM_PRN,
+ __get_str(dev),
+ __entry->qpn,
+ __entry->remote ? "remote" : "local",
+ __entry->param_qp,
+ __entry->max_read,
+ __entry->max_write,
+ __entry->max_len,
+ __entry->jkey,
+ __entry->timeout,
+ __entry->urg
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_opfn_param_template, hfi1_opfn_param,
+ TP_PROTO(struct rvt_qp *qp, char remote,
+ struct tid_rdma_params *param),
+ TP_ARGS(qp, remote, param)
+);
+
+DECLARE_EVENT_CLASS(/* msg */
+ hfi1_msg_template,
+ TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more),
+ TP_ARGS(qp, msg, more),
+ TP_STRUCT__entry(/* entry */
+ __field(u32, qpn)
+ __string(msg, msg)
+ __field(u64, more)
+ ),
+ TP_fast_assign(/* assign */
+ __entry->qpn = qp ? qp->ibqp.qp_num : 0;
+ __assign_str(msg);
+ __entry->more = more;
+ ),
+ TP_printk(/* print */
+ "qpn 0x%x %s 0x%llx",
+ __entry->qpn,
+ __get_str(msg),
+ __entry->more
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_msg_template, hfi1_msg_opfn_conn_request,
+ TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more),
+ TP_ARGS(qp, msg, more)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_msg_template, hfi1_msg_opfn_conn_error,
+ TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more),
+ TP_ARGS(qp, msg, more)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_msg_template, hfi1_msg_alloc_tids,
+ TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more),
+ TP_ARGS(qp, msg, more)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_msg_template, hfi1_msg_tid_restart_req,
+ TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more),
+ TP_ARGS(qp, msg, more)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_msg_template, hfi1_msg_handle_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more),
+ TP_ARGS(qp, msg, more)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_msg_template, hfi1_msg_tid_timeout,
+ TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more),
+ TP_ARGS(qp, msg, more)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_msg_template, hfi1_msg_tid_retry_timeout,
+ TP_PROTO(struct rvt_qp *qp, const char *msg, u64 more),
+ TP_ARGS(qp, msg, more)
+);
+
+DECLARE_EVENT_CLASS(/* tid_flow_page */
+ hfi1_tid_flow_page_template,
+ TP_PROTO(struct rvt_qp *qp, struct tid_rdma_flow *flow, u32 index,
+ char mtu8k, char v1, void *vaddr),
+ TP_ARGS(qp, flow, index, mtu8k, v1, vaddr),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(char, mtu8k)
+ __field(char, v1)
+ __field(u32, index)
+ __field(u64, page)
+ __field(u64, vaddr)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->mtu8k = mtu8k;
+ __entry->v1 = v1;
+ __entry->index = index;
+ __entry->page = vaddr ? (u64)virt_to_page(vaddr) : 0ULL;
+ __entry->vaddr = (u64)vaddr;
+ ),
+ TP_printk(/* print */
+ "[%s] qpn 0x%x page[%u]: page 0x%llx %s 0x%llx",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->index,
+ __entry->page,
+ __entry->mtu8k ? (__entry->v1 ? "v1" : "v0") : "vaddr",
+ __entry->vaddr
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_page_template, hfi1_tid_flow_page,
+ TP_PROTO(struct rvt_qp *qp, struct tid_rdma_flow *flow, u32 index,
+ char mtu8k, char v1, void *vaddr),
+ TP_ARGS(qp, flow, index, mtu8k, v1, vaddr)
+);
+
+DECLARE_EVENT_CLASS(/* tid_pageset */
+ hfi1_tid_pageset_template,
+ TP_PROTO(struct rvt_qp *qp, u32 index, u16 idx, u16 count),
+ TP_ARGS(qp, index, idx, count),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, index)
+ __field(u16, idx)
+ __field(u16, count)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->index = index;
+ __entry->idx = idx;
+ __entry->count = count;
+ ),
+ TP_printk(/* print */
+ "[%s] qpn 0x%x list[%u]: idx %u count %u",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->index,
+ __entry->idx,
+ __entry->count
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_pageset_template, hfi1_tid_pageset,
+ TP_PROTO(struct rvt_qp *qp, u32 index, u16 idx, u16 count),
+ TP_ARGS(qp, index, idx, count)
+);
+
+DECLARE_EVENT_CLASS(/* tid_fow */
+ hfi1_tid_flow_template,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(int, index)
+ __field(int, idx)
+ __field(u32, resp_ib_psn)
+ __field(u32, generation)
+ __field(u32, fspsn)
+ __field(u32, flpsn)
+ __field(u32, r_next_psn)
+ __field(u32, ib_spsn)
+ __field(u32, ib_lpsn)
+ __field(u32, npagesets)
+ __field(u32, tnode_cnt)
+ __field(u32, tidcnt)
+ __field(u32, tid_idx)
+ __field(u32, tid_offset)
+ __field(u32, length)
+ __field(u32, sent)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->index = index;
+ __entry->idx = flow->idx;
+ __entry->resp_ib_psn = flow->flow_state.resp_ib_psn;
+ __entry->generation = flow->flow_state.generation;
+ __entry->fspsn = full_flow_psn(flow,
+ flow->flow_state.spsn);
+ __entry->flpsn = full_flow_psn(flow,
+ flow->flow_state.lpsn);
+ __entry->r_next_psn = flow->flow_state.r_next_psn;
+ __entry->ib_spsn = flow->flow_state.ib_spsn;
+ __entry->ib_lpsn = flow->flow_state.ib_lpsn;
+ __entry->npagesets = flow->npagesets;
+ __entry->tnode_cnt = flow->tnode_cnt;
+ __entry->tidcnt = flow->tidcnt;
+ __entry->tid_idx = flow->tid_idx;
+ __entry->tid_offset = flow->tid_offset;
+ __entry->length = flow->length;
+ __entry->sent = flow->sent;
+ ),
+ TP_printk(/* print */
+ TID_FLOW_PRN,
+ __get_str(dev),
+ __entry->qpn,
+ __entry->index,
+ __entry->idx,
+ __entry->resp_ib_psn,
+ __entry->generation,
+ __entry->fspsn,
+ __entry->flpsn,
+ __entry->r_next_psn,
+ __entry->ib_spsn,
+ __entry->ib_lpsn,
+ __entry->npagesets,
+ __entry->tnode_cnt,
+ __entry->tidcnt,
+ __entry->tid_idx,
+ __entry->tid_offset,
+ __entry->length,
+ __entry->sent
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_template, hfi1_tid_flow_alloc,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_template, hfi1_tid_flow_build_read_pkt,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_template, hfi1_tid_flow_build_read_resp,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_template, hfi1_tid_flow_rcv_read_req,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_template, hfi1_tid_flow_rcv_read_resp,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_template, hfi1_tid_flow_restart_req,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_template, hfi1_tid_flow_build_write_resp,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_template, hfi1_tid_flow_rcv_write_resp,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_template, hfi1_tid_flow_build_write_data,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_template, hfi1_tid_flow_rcv_tid_ack,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_template, hfi1_tid_flow_rcv_resync,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_template, hfi1_tid_flow_handle_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_flow_template, hfi1_tid_flow_read_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow),
+ TP_ARGS(qp, index, flow)
+);
+
+DECLARE_EVENT_CLASS(/* tid_node */
+ hfi1_tid_node_template,
+ TP_PROTO(struct rvt_qp *qp, const char *msg, u32 index, u32 base,
+ u8 map, u8 used, u8 cnt),
+ TP_ARGS(qp, msg, index, base, map, used, cnt),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __string(msg, msg)
+ __field(u32, index)
+ __field(u32, base)
+ __field(u8, map)
+ __field(u8, used)
+ __field(u8, cnt)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __assign_str(msg);
+ __entry->index = index;
+ __entry->base = base;
+ __entry->map = map;
+ __entry->used = used;
+ __entry->cnt = cnt;
+ ),
+ TP_printk(/* print */
+ TID_NODE_PRN,
+ __get_str(dev),
+ __entry->qpn,
+ __get_str(msg),
+ __entry->index,
+ __entry->base,
+ __entry->map,
+ __entry->used,
+ __entry->cnt
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_node_template, hfi1_tid_node_add,
+ TP_PROTO(struct rvt_qp *qp, const char *msg, u32 index, u32 base,
+ u8 map, u8 used, u8 cnt),
+ TP_ARGS(qp, msg, index, base, map, used, cnt)
+);
+
+DECLARE_EVENT_CLASS(/* tid_entry */
+ hfi1_tid_entry_template,
+ TP_PROTO(struct rvt_qp *qp, int index, u32 ent),
+ TP_ARGS(qp, index, ent),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(int, index)
+ __field(u8, ctrl)
+ __field(u16, idx)
+ __field(u16, len)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->index = index;
+ __entry->ctrl = hfi1_trace_get_tid_ctrl(ent);
+ __entry->idx = hfi1_trace_get_tid_idx(ent);
+ __entry->len = hfi1_trace_get_tid_len(ent);
+ ),
+ TP_printk(/* print */
+ "[%s] qpn 0x%x TID entry %d: idx %u len %u ctrl 0x%x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->index,
+ __entry->idx,
+ __entry->len,
+ __entry->ctrl
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_entry_template, hfi1_tid_entry_alloc,
+ TP_PROTO(struct rvt_qp *qp, int index, u32 entry),
+ TP_ARGS(qp, index, entry)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_entry_template, hfi1_tid_entry_build_read_resp,
+ TP_PROTO(struct rvt_qp *qp, int index, u32 ent),
+ TP_ARGS(qp, index, ent)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_entry_template, hfi1_tid_entry_rcv_read_req,
+ TP_PROTO(struct rvt_qp *qp, int index, u32 ent),
+ TP_ARGS(qp, index, ent)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_entry_template, hfi1_tid_entry_rcv_write_resp,
+ TP_PROTO(struct rvt_qp *qp, int index, u32 entry),
+ TP_ARGS(qp, index, entry)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_entry_template, hfi1_tid_entry_build_write_data,
+ TP_PROTO(struct rvt_qp *qp, int index, u32 entry),
+ TP_ARGS(qp, index, entry)
+);
+
+DECLARE_EVENT_CLASS(/* rsp_info */
+ hfi1_responder_info_template,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u8, state)
+ __field(u8, s_state)
+ __field(u32, psn)
+ __field(u32, r_psn)
+ __field(u8, r_state)
+ __field(u8, r_flags)
+ __field(u8, r_head_ack_queue)
+ __field(u8, s_tail_ack_queue)
+ __field(u8, s_acked_ack_queue)
+ __field(u8, s_ack_state)
+ __field(u8, s_nak_state)
+ __field(u8, r_nak_state)
+ __field(u32, s_flags)
+ __field(u32, ps_flags)
+ __field(unsigned long, iow_flags)
+ ),
+ TP_fast_assign(/* assign */
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->state = qp->state;
+ __entry->s_state = qp->s_state;
+ __entry->psn = psn;
+ __entry->r_psn = qp->r_psn;
+ __entry->r_state = qp->r_state;
+ __entry->r_flags = qp->r_flags;
+ __entry->r_head_ack_queue = qp->r_head_ack_queue;
+ __entry->s_tail_ack_queue = qp->s_tail_ack_queue;
+ __entry->s_acked_ack_queue = qp->s_acked_ack_queue;
+ __entry->s_ack_state = qp->s_ack_state;
+ __entry->s_nak_state = qp->s_nak_state;
+ __entry->s_flags = qp->s_flags;
+ __entry->ps_flags = priv->s_flags;
+ __entry->iow_flags = priv->s_iowait.flags;
+ ),
+ TP_printk(/* print */
+ RSP_INFO_PRN,
+ __get_str(dev),
+ __entry->qpn,
+ __entry->state,
+ __entry->s_state,
+ __entry->psn,
+ __entry->r_psn,
+ __entry->r_state,
+ __entry->r_flags,
+ __entry->r_head_ack_queue,
+ __entry->s_tail_ack_queue,
+ __entry->s_acked_ack_queue,
+ __entry->s_ack_state,
+ __entry->s_nak_state,
+ __entry->s_flags,
+ __entry->ps_flags,
+ __entry->iow_flags
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_responder_info_template, hfi1_rsp_make_rc_ack,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_responder_info_template, hfi1_rsp_rcv_tid_read_req,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_responder_info_template, hfi1_rsp_tid_rcv_error,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_responder_info_template, hfi1_rsp_tid_write_alloc_res,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_responder_info_template, hfi1_rsp_rcv_tid_write_req,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_responder_info_template, hfi1_rsp_build_tid_write_resp,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_responder_info_template, hfi1_rsp_rcv_tid_write_data,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_responder_info_template, hfi1_rsp_make_tid_ack,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_responder_info_template, hfi1_rsp_handle_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_responder_info_template, hfi1_rsp_read_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, u32 psn),
+ TP_ARGS(qp, psn)
+);
+
+DECLARE_EVENT_CLASS(/* sender_info */
+ hfi1_sender_info_template,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u8, state)
+ __field(u32, s_cur)
+ __field(u32, s_tail)
+ __field(u32, s_head)
+ __field(u32, s_acked)
+ __field(u32, s_last)
+ __field(u32, s_psn)
+ __field(u32, s_last_psn)
+ __field(u32, s_flags)
+ __field(u32, ps_flags)
+ __field(unsigned long, iow_flags)
+ __field(u8, s_state)
+ __field(u8, s_num_rd)
+ __field(u8, s_retry)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->state = qp->state;
+ __entry->s_cur = qp->s_cur;
+ __entry->s_tail = qp->s_tail;
+ __entry->s_head = qp->s_head;
+ __entry->s_acked = qp->s_acked;
+ __entry->s_last = qp->s_last;
+ __entry->s_psn = qp->s_psn;
+ __entry->s_last_psn = qp->s_last_psn;
+ __entry->s_flags = qp->s_flags;
+ __entry->ps_flags = ((struct hfi1_qp_priv *)qp->priv)->s_flags;
+ __entry->iow_flags =
+ ((struct hfi1_qp_priv *)qp->priv)->s_iowait.flags;
+ __entry->s_state = qp->s_state;
+ __entry->s_num_rd = qp->s_num_rd_atomic;
+ __entry->s_retry = qp->s_retry;
+ ),
+ TP_printk(/* print */
+ SENDER_INFO_PRN,
+ __get_str(dev),
+ __entry->qpn,
+ __entry->state,
+ __entry->s_cur,
+ __entry->s_tail,
+ __entry->s_head,
+ __entry->s_acked,
+ __entry->s_last,
+ __entry->s_psn,
+ __entry->s_last_psn,
+ __entry->s_flags,
+ __entry->ps_flags,
+ __entry->iow_flags,
+ __entry->s_state,
+ __entry->s_num_rd,
+ __entry->s_retry
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_sender_info_template, hfi1_sender_make_rc_req,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_sender_info_template, hfi1_sender_reset_psn,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_sender_info_template, hfi1_sender_restart_rc,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_sender_info_template, hfi1_sender_do_rc_ack,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_sender_info_template, hfi1_sender_rcv_tid_read_resp,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_sender_info_template, hfi1_sender_rcv_tid_ack,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_sender_info_template, hfi1_sender_make_tid_pkt,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_sender_info_template, hfi1_sender_read_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DECLARE_EVENT_CLASS(/* tid_read_sender */
+ hfi1_tid_read_sender_template,
+ TP_PROTO(struct rvt_qp *qp, char newreq),
+ TP_ARGS(qp, newreq),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(char, newreq)
+ __field(u32, tid_r_reqs)
+ __field(u32, tid_r_comp)
+ __field(u32, pending_tid_r_segs)
+ __field(u32, s_flags)
+ __field(u32, ps_flags)
+ __field(unsigned long, iow_flags)
+ __field(u8, s_state)
+ __field(u32, hw_flow_index)
+ __field(u32, generation)
+ __field(u32, fpsn)
+ ),
+ TP_fast_assign(/* assign */
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->newreq = newreq;
+ __entry->tid_r_reqs = priv->tid_r_reqs;
+ __entry->tid_r_comp = priv->tid_r_comp;
+ __entry->pending_tid_r_segs = priv->pending_tid_r_segs;
+ __entry->s_flags = qp->s_flags;
+ __entry->ps_flags = priv->s_flags;
+ __entry->iow_flags = priv->s_iowait.flags;
+ __entry->s_state = priv->s_state;
+ __entry->hw_flow_index = priv->flow_state.index;
+ __entry->generation = priv->flow_state.generation;
+ __entry->fpsn = priv->flow_state.psn;
+ ),
+ TP_printk(/* print */
+ TID_READ_SENDER_PRN,
+ __get_str(dev),
+ __entry->qpn,
+ __entry->newreq,
+ __entry->tid_r_reqs,
+ __entry->tid_r_comp,
+ __entry->pending_tid_r_segs,
+ __entry->s_flags,
+ __entry->ps_flags,
+ __entry->iow_flags,
+ __entry->s_state,
+ __entry->hw_flow_index,
+ __entry->generation,
+ __entry->fpsn
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_read_sender_template, hfi1_tid_read_sender_make_req,
+ TP_PROTO(struct rvt_qp *qp, char newreq),
+ TP_ARGS(qp, newreq)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_read_sender_template, hfi1_tid_read_sender_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, char newreq),
+ TP_ARGS(qp, newreq)
+);
+
+DECLARE_EVENT_CLASS(/* tid_rdma_request */
+ hfi1_tid_rdma_request_template,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(char, newreq)
+ __field(u8, opcode)
+ __field(u32, psn)
+ __field(u32, lpsn)
+ __field(u32, cur_seg)
+ __field(u32, comp_seg)
+ __field(u32, ack_seg)
+ __field(u32, alloc_seg)
+ __field(u32, total_segs)
+ __field(u16, setup_head)
+ __field(u16, clear_tail)
+ __field(u16, flow_idx)
+ __field(u16, acked_tail)
+ __field(u32, state)
+ __field(u32, r_ack_psn)
+ __field(u32, r_flow_psn)
+ __field(u32, r_last_acked)
+ __field(u32, s_next_psn)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->newreq = newreq;
+ __entry->opcode = opcode;
+ __entry->psn = psn;
+ __entry->lpsn = lpsn;
+ __entry->cur_seg = req->cur_seg;
+ __entry->comp_seg = req->comp_seg;
+ __entry->ack_seg = req->ack_seg;
+ __entry->alloc_seg = req->alloc_seg;
+ __entry->total_segs = req->total_segs;
+ __entry->setup_head = req->setup_head;
+ __entry->clear_tail = req->clear_tail;
+ __entry->flow_idx = req->flow_idx;
+ __entry->acked_tail = req->acked_tail;
+ __entry->state = req->state;
+ __entry->r_ack_psn = req->r_ack_psn;
+ __entry->r_flow_psn = req->r_flow_psn;
+ __entry->r_last_acked = req->r_last_acked;
+ __entry->s_next_psn = req->s_next_psn;
+ ),
+ TP_printk(/* print */
+ TID_REQ_PRN,
+ __get_str(dev),
+ __entry->qpn,
+ __entry->newreq,
+ __entry->opcode,
+ __entry->psn,
+ __entry->lpsn,
+ __entry->cur_seg,
+ __entry->comp_seg,
+ __entry->ack_seg,
+ __entry->alloc_seg,
+ __entry->total_segs,
+ __entry->setup_head,
+ __entry->clear_tail,
+ __entry->flow_idx,
+ __entry->acked_tail,
+ __entry->state,
+ __entry->r_ack_psn,
+ __entry->r_flow_psn,
+ __entry->r_last_acked,
+ __entry->s_next_psn
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_make_req_read,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_build_read_req,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_read_req,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_read_resp,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_err,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_restart_req,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_setup_tid_wqe,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_write_alloc_res,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_write_req,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_build_write_resp,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_write_resp,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_write_data,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_tid_ack,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_tid_retry_timeout,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_rcv_resync,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_make_tid_pkt,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_make_tid_ack,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_handle_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_read_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_make_rc_ack_write,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_make_req_write,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_rdma_request_template, hfi1_tid_req_update_num_rd_atomic,
+ TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn,
+ struct tid_rdma_request *req),
+ TP_ARGS(qp, newreq, opcode, psn, lpsn, req)
+);
+
+DECLARE_EVENT_CLASS(/* rc_rcv_err */
+ hfi1_rc_rcv_err_template,
+ TP_PROTO(struct rvt_qp *qp, u32 opcode, u32 psn, int diff),
+ TP_ARGS(qp, opcode, psn, diff),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, s_flags)
+ __field(u8, state)
+ __field(u8, s_acked_ack_queue)
+ __field(u8, s_tail_ack_queue)
+ __field(u8, r_head_ack_queue)
+ __field(u32, opcode)
+ __field(u32, psn)
+ __field(u32, r_psn)
+ __field(int, diff)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->s_flags = qp->s_flags;
+ __entry->state = qp->state;
+ __entry->s_acked_ack_queue = qp->s_acked_ack_queue;
+ __entry->s_tail_ack_queue = qp->s_tail_ack_queue;
+ __entry->r_head_ack_queue = qp->r_head_ack_queue;
+ __entry->opcode = opcode;
+ __entry->psn = psn;
+ __entry->r_psn = qp->r_psn;
+ __entry->diff = diff;
+ ),
+ TP_printk(/* print */
+ RCV_ERR_PRN,
+ __get_str(dev),
+ __entry->qpn,
+ __entry->s_flags,
+ __entry->state,
+ __entry->s_acked_ack_queue,
+ __entry->s_tail_ack_queue,
+ __entry->r_head_ack_queue,
+ __entry->opcode,
+ __entry->psn,
+ __entry->r_psn,
+ __entry->diff
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_rc_rcv_err_template, hfi1_tid_rdma_rcv_err,
+ TP_PROTO(struct rvt_qp *qp, u32 opcode, u32 psn, int diff),
+ TP_ARGS(qp, opcode, psn, diff)
+);
+
+DECLARE_EVENT_CLASS(/* sge */
+ hfi1_sge_template,
+ TP_PROTO(struct rvt_qp *qp, int index, struct rvt_sge *sge),
+ TP_ARGS(qp, index, sge),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(int, index)
+ __field(u64, vaddr)
+ __field(u32, sge_length)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->index = index;
+ __entry->vaddr = (u64)sge->vaddr;
+ __entry->sge_length = sge->sge_length;
+ ),
+ TP_printk(/* print */
+ "[%s] qpn 0x%x sge %d: vaddr 0x%llx sge_length %u",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->index,
+ __entry->vaddr,
+ __entry->sge_length
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_sge_template, hfi1_sge_check_align,
+ TP_PROTO(struct rvt_qp *qp, int index, struct rvt_sge *sge),
+ TP_ARGS(qp, index, sge)
+);
+
+DECLARE_EVENT_CLASS(/* tid_write_sp */
+ hfi1_tid_write_rsp_template,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, r_tid_head)
+ __field(u32, r_tid_tail)
+ __field(u32, r_tid_ack)
+ __field(u32, r_tid_alloc)
+ __field(u32, alloc_w_segs)
+ __field(u32, pending_tid_w_segs)
+ __field(bool, sync_pt)
+ __field(u32, ps_nak_psn)
+ __field(u8, ps_nak_state)
+ __field(u8, prnr_nak_state)
+ __field(u32, hw_flow_index)
+ __field(u32, generation)
+ __field(u32, fpsn)
+ __field(bool, resync)
+ __field(u32, r_next_psn_kdeth)
+ ),
+ TP_fast_assign(/* assign */
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->r_tid_head = priv->r_tid_head;
+ __entry->r_tid_tail = priv->r_tid_tail;
+ __entry->r_tid_ack = priv->r_tid_ack;
+ __entry->r_tid_alloc = priv->r_tid_alloc;
+ __entry->alloc_w_segs = priv->alloc_w_segs;
+ __entry->pending_tid_w_segs = priv->pending_tid_w_segs;
+ __entry->sync_pt = priv->sync_pt;
+ __entry->ps_nak_psn = priv->s_nak_psn;
+ __entry->ps_nak_state = priv->s_nak_state;
+ __entry->prnr_nak_state = priv->rnr_nak_state;
+ __entry->hw_flow_index = priv->flow_state.index;
+ __entry->generation = priv->flow_state.generation;
+ __entry->fpsn = priv->flow_state.psn;
+ __entry->resync = priv->resync;
+ __entry->r_next_psn_kdeth = priv->r_next_psn_kdeth;
+ ),
+ TP_printk(/* print */
+ TID_WRITE_RSPDR_PRN,
+ __get_str(dev),
+ __entry->qpn,
+ __entry->r_tid_head,
+ __entry->r_tid_tail,
+ __entry->r_tid_ack,
+ __entry->r_tid_alloc,
+ __entry->alloc_w_segs,
+ __entry->pending_tid_w_segs,
+ __entry->sync_pt ? "yes" : "no",
+ __entry->ps_nak_psn,
+ __entry->ps_nak_state,
+ __entry->prnr_nak_state,
+ __entry->hw_flow_index,
+ __entry->generation,
+ __entry->fpsn,
+ __entry->resync ? "yes" : "no",
+ __entry->r_next_psn_kdeth
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_alloc_res,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_rcv_req,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_build_resp,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_rcv_data,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_rcv_resync,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_make_tid_ack,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_handle_kdeth_eflags,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_write_rsp_template, hfi1_tid_write_rsp_make_rc_ack,
+ TP_PROTO(struct rvt_qp *qp),
+ TP_ARGS(qp)
+);
+
+DECLARE_EVENT_CLASS(/* tid_write_sender */
+ hfi1_tid_write_sender_template,
+ TP_PROTO(struct rvt_qp *qp, char newreq),
+ TP_ARGS(qp, newreq),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(char, newreq)
+ __field(u32, s_tid_cur)
+ __field(u32, s_tid_tail)
+ __field(u32, s_tid_head)
+ __field(u32, pending_tid_w_resp)
+ __field(u32, n_requests)
+ __field(u32, n_tid_requests)
+ __field(u32, s_flags)
+ __field(u32, ps_flags)
+ __field(unsigned long, iow_flags)
+ __field(u8, s_state)
+ __field(u8, s_retry)
+ ),
+ TP_fast_assign(/* assign */
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->newreq = newreq;
+ __entry->s_tid_cur = priv->s_tid_cur;
+ __entry->s_tid_tail = priv->s_tid_tail;
+ __entry->s_tid_head = priv->s_tid_head;
+ __entry->pending_tid_w_resp = priv->pending_tid_w_resp;
+ __entry->n_requests = atomic_read(&priv->n_requests);
+ __entry->n_tid_requests = atomic_read(&priv->n_tid_requests);
+ __entry->s_flags = qp->s_flags;
+ __entry->ps_flags = priv->s_flags;
+ __entry->iow_flags = priv->s_iowait.flags;
+ __entry->s_state = priv->s_state;
+ __entry->s_retry = priv->s_retry;
+ ),
+ TP_printk(/* print */
+ TID_WRITE_SENDER_PRN,
+ __get_str(dev),
+ __entry->qpn,
+ __entry->newreq,
+ __entry->s_tid_cur,
+ __entry->s_tid_tail,
+ __entry->s_tid_head,
+ __entry->pending_tid_w_resp,
+ __entry->n_requests,
+ __entry->n_tid_requests,
+ __entry->s_flags,
+ __entry->ps_flags,
+ __entry->iow_flags,
+ __entry->s_state,
+ __entry->s_retry
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_write_sender_template, hfi1_tid_write_sender_rcv_resp,
+ TP_PROTO(struct rvt_qp *qp, char newreq),
+ TP_ARGS(qp, newreq)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_write_sender_template, hfi1_tid_write_sender_rcv_tid_ack,
+ TP_PROTO(struct rvt_qp *qp, char newreq),
+ TP_ARGS(qp, newreq)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_write_sender_template, hfi1_tid_write_sender_retry_timeout,
+ TP_PROTO(struct rvt_qp *qp, char newreq),
+ TP_ARGS(qp, newreq)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_write_sender_template, hfi1_tid_write_sender_make_tid_pkt,
+ TP_PROTO(struct rvt_qp *qp, char newreq),
+ TP_ARGS(qp, newreq)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_write_sender_template, hfi1_tid_write_sender_make_req,
+ TP_PROTO(struct rvt_qp *qp, char newreq),
+ TP_ARGS(qp, newreq)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_tid_write_sender_template, hfi1_tid_write_sender_restart_rc,
+ TP_PROTO(struct rvt_qp *qp, char newreq),
+ TP_ARGS(qp, newreq)
+);
+
+DECLARE_EVENT_CLASS(/* tid_ack */
+ hfi1_tid_ack_template,
+ TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
+ u32 req_psn, u32 resync_psn),
+ TP_ARGS(qp, aeth, psn, req_psn, resync_psn),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, aeth)
+ __field(u32, psn)
+ __field(u32, req_psn)
+ __field(u32, resync_psn)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->aeth = aeth;
+ __entry->psn = psn;
+ __entry->req_psn = req_psn;
+ __entry->resync_psn = resync_psn;
+ ),
+ TP_printk(/* print */
+ "[%s] qpn 0x%x aeth 0x%x psn 0x%x req_psn 0x%x resync_psn 0x%x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->aeth,
+ __entry->psn,
+ __entry->req_psn,
+ __entry->resync_psn
+ )
+);
+
+DEFINE_EVENT(/* rcv_tid_ack */
+ hfi1_tid_ack_template, hfi1_rcv_tid_ack,
+ TP_PROTO(struct rvt_qp *qp, u32 aeth, u32 psn,
+ u32 req_psn, u32 resync_psn),
+ TP_ARGS(qp, aeth, psn, req_psn, resync_psn)
+);
+
+DECLARE_EVENT_CLASS(/* kdeth_eflags_error */
+ hfi1_kdeth_eflags_error_template,
+ TP_PROTO(struct rvt_qp *qp, u8 rcv_type, u8 rte, u32 psn),
+ TP_ARGS(qp, rcv_type, rte, psn),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u8, rcv_type)
+ __field(u8, rte)
+ __field(u32, psn)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->rcv_type = rcv_type;
+ __entry->rte = rte;
+ __entry->psn = psn;
+ ),
+ TP_printk(/* print */
+ KDETH_EFLAGS_ERR_PRN,
+ __get_str(dev),
+ __entry->qpn,
+ __entry->rcv_type,
+ __entry->rte,
+ __entry->psn
+ )
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_kdeth_eflags_error_template, hfi1_eflags_err_write,
+ TP_PROTO(struct rvt_qp *qp, u8 rcv_type, u8 rte, u32 psn),
+ TP_ARGS(qp, rcv_type, rte, psn)
+);
+
+#endif /* __HFI1_TRACE_TID_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_tid
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
index 415d6be42c5d..c0ba6b0a2c4e 100644
--- a/drivers/infiniband/hw/hfi1/trace_tx.h
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*/
#if !defined(__HFI1_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_TX_H
@@ -53,6 +11,8 @@
#include "hfi.h"
#include "mad.h"
#include "sdma.h"
+#include "ipoib.h"
+#include "user_sdma.h"
const char *parse_sdma_flags(struct trace_seq *p, u64 desc0, u64 desc1);
@@ -114,19 +74,27 @@ DECLARE_EVENT_CLASS(hfi1_qpsleepwakeup_template,
__field(u32, qpn)
__field(u32, flags)
__field(u32, s_flags)
+ __field(u32, ps_flags)
+ __field(unsigned long, iow_flags)
),
TP_fast_assign(
- DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device))
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->flags = flags;
__entry->qpn = qp->ibqp.qp_num;
__entry->s_flags = qp->s_flags;
+ __entry->ps_flags =
+ ((struct hfi1_qp_priv *)qp->priv)->s_flags;
+ __entry->iow_flags =
+ ((struct hfi1_qp_priv *)qp->priv)->s_iowait.flags;
),
TP_printk(
- "[%s] qpn 0x%x flags 0x%x s_flags 0x%x",
+ "[%s] qpn 0x%x flags 0x%x s_flags 0x%x ps_flags 0x%x iow_flags 0x%lx",
__get_str(dev),
__entry->qpn,
__entry->flags,
- __entry->s_flags
+ __entry->s_flags,
+ __entry->ps_flags,
+ __entry->iow_flags
)
);
@@ -198,6 +166,140 @@ TRACE_EVENT(hfi1_sdma_engine_select,
)
);
+TRACE_EVENT(hfi1_sdma_user_free_queues,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt),
+ TP_ARGS(dd, ctxt, subctxt),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(u16, ctxt)
+ __field(u16, subctxt)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ ),
+ TP_printk("[%s] SDMA [%u:%u] Freeing user SDMA queues",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->subctxt
+ )
+);
+
+TRACE_EVENT(hfi1_sdma_user_process_request,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
+ u16 comp_idx),
+ TP_ARGS(dd, ctxt, subctxt, comp_idx),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(u16, ctxt)
+ __field(u16, subctxt)
+ __field(u16, comp_idx)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->comp_idx = comp_idx;
+ ),
+ TP_printk("[%s] SDMA [%u:%u] Using req/comp entry: %u",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->comp_idx
+ )
+);
+
+DECLARE_EVENT_CLASS(
+ hfi1_sdma_value_template,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt, u16 comp_idx,
+ u32 value),
+ TP_ARGS(dd, ctxt, subctxt, comp_idx, value),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(u16, ctxt)
+ __field(u16, subctxt)
+ __field(u16, comp_idx)
+ __field(u32, value)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->comp_idx = comp_idx;
+ __entry->value = value;
+ ),
+ TP_printk("[%s] SDMA [%u:%u:%u] value: %u",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->comp_idx,
+ __entry->value
+ )
+);
+
+DEFINE_EVENT(hfi1_sdma_value_template, hfi1_sdma_user_initial_tidoffset,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
+ u16 comp_idx, u32 tidoffset),
+ TP_ARGS(dd, ctxt, subctxt, comp_idx, tidoffset));
+
+DEFINE_EVENT(hfi1_sdma_value_template, hfi1_sdma_user_data_length,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
+ u16 comp_idx, u32 data_len),
+ TP_ARGS(dd, ctxt, subctxt, comp_idx, data_len));
+
+DEFINE_EVENT(hfi1_sdma_value_template, hfi1_sdma_user_compute_length,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
+ u16 comp_idx, u32 data_len),
+ TP_ARGS(dd, ctxt, subctxt, comp_idx, data_len));
+
+TRACE_EVENT(hfi1_sdma_user_tid_info,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
+ u16 comp_idx, u32 tidoffset, u32 units, u8 shift),
+ TP_ARGS(dd, ctxt, subctxt, comp_idx, tidoffset, units, shift),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(u16, ctxt)
+ __field(u16, subctxt)
+ __field(u16, comp_idx)
+ __field(u32, tidoffset)
+ __field(u32, units)
+ __field(u8, shift)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->comp_idx = comp_idx;
+ __entry->tidoffset = tidoffset;
+ __entry->units = units;
+ __entry->shift = shift;
+ ),
+ TP_printk("[%s] SDMA [%u:%u:%u] TID offset %ubytes %uunits om %u",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->comp_idx,
+ __entry->tidoffset,
+ __entry->units,
+ __entry->shift
+ )
+);
+
+TRACE_EVENT(hfi1_sdma_request,
+ TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u16 subctxt,
+ unsigned long dim),
+ TP_ARGS(dd, ctxt, subctxt, dim),
+ TP_STRUCT__entry(DD_DEV_ENTRY(dd)
+ __field(u16, ctxt)
+ __field(u16, subctxt)
+ __field(unsigned long, dim)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(dd);
+ __entry->ctxt = ctxt;
+ __entry->subctxt = subctxt;
+ __entry->dim = dim;
+ ),
+ TP_printk("[%s] SDMA from %u:%u (%lu)",
+ __get_str(dev),
+ __entry->ctxt,
+ __entry->subctxt,
+ __entry->dim
+ )
+);
+
DECLARE_EVENT_CLASS(hfi1_sdma_engine_class,
TP_PROTO(struct sdma_engine *sde, u64 status),
TP_ARGS(sde, status),
@@ -446,7 +548,7 @@ TRACE_EVENT(hfi1_sdma_user_reqinfo,
TP_PROTO(struct hfi1_devdata *dd, u16 ctxt, u8 subctxt, u16 *i),
TP_ARGS(dd, ctxt, subctxt, i),
TP_STRUCT__entry(
- DD_DEV_ENTRY(dd);
+ DD_DEV_ENTRY(dd)
__field(u16, ctxt)
__field(u8, subctxt)
__field(u8, ver_opcode)
@@ -511,6 +613,80 @@ TRACE_EVENT(hfi1_sdma_user_completion,
__entry->code)
);
+TRACE_EVENT(hfi1_usdma_defer,
+ TP_PROTO(struct hfi1_user_sdma_pkt_q *pq,
+ struct sdma_engine *sde,
+ struct iowait *wait),
+ TP_ARGS(pq, sde, wait),
+ TP_STRUCT__entry(DD_DEV_ENTRY(pq->dd)
+ __field(struct hfi1_user_sdma_pkt_q *, pq)
+ __field(struct sdma_engine *, sde)
+ __field(struct iowait *, wait)
+ __field(int, engine)
+ __field(int, empty)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(pq->dd);
+ __entry->pq = pq;
+ __entry->sde = sde;
+ __entry->wait = wait;
+ __entry->engine = sde->this_idx;
+ __entry->empty = list_empty(&__entry->wait->list);
+ ),
+ TP_printk("[%s] pq %llx sde %llx wait %llx engine %d empty %d",
+ __get_str(dev),
+ (unsigned long long)__entry->pq,
+ (unsigned long long)__entry->sde,
+ (unsigned long long)__entry->wait,
+ __entry->engine,
+ __entry->empty
+ )
+);
+
+TRACE_EVENT(hfi1_usdma_activate,
+ TP_PROTO(struct hfi1_user_sdma_pkt_q *pq,
+ struct iowait *wait,
+ int reason),
+ TP_ARGS(pq, wait, reason),
+ TP_STRUCT__entry(DD_DEV_ENTRY(pq->dd)
+ __field(struct hfi1_user_sdma_pkt_q *, pq)
+ __field(struct iowait *, wait)
+ __field(int, reason)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(pq->dd);
+ __entry->pq = pq;
+ __entry->wait = wait;
+ __entry->reason = reason;
+ ),
+ TP_printk("[%s] pq %llx wait %llx reason %d",
+ __get_str(dev),
+ (unsigned long long)__entry->pq,
+ (unsigned long long)__entry->wait,
+ __entry->reason
+ )
+);
+
+TRACE_EVENT(hfi1_usdma_we,
+ TP_PROTO(struct hfi1_user_sdma_pkt_q *pq,
+ int we_ret),
+ TP_ARGS(pq, we_ret),
+ TP_STRUCT__entry(DD_DEV_ENTRY(pq->dd)
+ __field(struct hfi1_user_sdma_pkt_q *, pq)
+ __field(int, state)
+ __field(int, we_ret)
+ ),
+ TP_fast_assign(DD_DEV_ASSIGN(pq->dd);
+ __entry->pq = pq;
+ __entry->state = pq->state;
+ __entry->we_ret = we_ret;
+ ),
+ TP_printk("[%s] pq %llx state %d we_ret %d",
+ __get_str(dev),
+ (unsigned long long)__entry->pq,
+ __entry->state,
+ __entry->we_ret
+ )
+);
+
const char *print_u32_array(struct trace_seq *, u32 *, int);
#define __print_u32_hex(arr, len) print_u32_array(p, arr, len)
@@ -564,8 +740,8 @@ TRACE_EVENT(hfi1_sdma_state,
__string(newstate, nstate)
),
TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __assign_str(curstate, cstate);
- __assign_str(newstate, nstate);
+ __assign_str(curstate);
+ __assign_str(newstate);
),
TP_printk("[%s] current state %s new state %s",
__get_str(dev),
@@ -633,6 +809,253 @@ DEFINE_EVENT(hfi1_bct_template, bct_get,
TP_PROTO(struct hfi1_devdata *dd, struct buffer_control *bc),
TP_ARGS(dd, bc));
+TRACE_EVENT(
+ hfi1_qp_send_completion,
+ TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, u32 idx),
+ TP_ARGS(qp, wqe, idx),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(struct rvt_swqe *, wqe)
+ __field(u64, wr_id)
+ __field(u32, qpn)
+ __field(u32, qpt)
+ __field(u32, length)
+ __field(u32, idx)
+ __field(u32, ssn)
+ __field(enum ib_wr_opcode, opcode)
+ __field(int, send_flags)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->wqe = wqe;
+ __entry->wr_id = wqe->wr.wr_id;
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->qpt = qp->ibqp.qp_type;
+ __entry->length = wqe->length;
+ __entry->idx = idx;
+ __entry->ssn = wqe->ssn;
+ __entry->opcode = wqe->wr.opcode;
+ __entry->send_flags = wqe->wr.send_flags;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x qpt %u wqe %p idx %u wr_id %llx length %u ssn %u opcode %x send_flags %x",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->qpt,
+ __entry->wqe,
+ __entry->idx,
+ __entry->wr_id,
+ __entry->length,
+ __entry->ssn,
+ __entry->opcode,
+ __entry->send_flags
+ )
+);
+
+DECLARE_EVENT_CLASS(
+ hfi1_do_send_template,
+ TP_PROTO(struct rvt_qp *qp, bool flag),
+ TP_ARGS(qp, flag),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(bool, flag)
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->flag = flag;
+ ),
+ TP_printk(
+ "[%s] qpn %x flag %d",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->flag
+ )
+);
+
+DEFINE_EVENT(
+ hfi1_do_send_template, hfi1_rc_do_send,
+ TP_PROTO(struct rvt_qp *qp, bool flag),
+ TP_ARGS(qp, flag)
+);
+
+DEFINE_EVENT(/* event */
+ hfi1_do_send_template, hfi1_rc_do_tid_send,
+ TP_PROTO(struct rvt_qp *qp, bool flag),
+ TP_ARGS(qp, flag)
+);
+
+DEFINE_EVENT(
+ hfi1_do_send_template, hfi1_rc_expired_time_slice,
+ TP_PROTO(struct rvt_qp *qp, bool flag),
+ TP_ARGS(qp, flag)
+);
+
+DECLARE_EVENT_CLASS(/* AIP */
+ hfi1_ipoib_txq_template,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(txq->priv->dd)
+ __field(struct hfi1_ipoib_txq *, txq)
+ __field(struct sdma_engine *, sde)
+ __field(ulong, head)
+ __field(ulong, tail)
+ __field(uint, used)
+ __field(uint, flow)
+ __field(int, stops)
+ __field(int, no_desc)
+ __field(u8, idx)
+ __field(u8, stopped)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(txq->priv->dd);
+ __entry->txq = txq;
+ __entry->sde = txq->sde;
+ __entry->head = txq->tx_ring.head;
+ __entry->tail = txq->tx_ring.tail;
+ __entry->idx = txq->q_idx;
+ __entry->used =
+ txq->tx_ring.sent_txreqs -
+ txq->tx_ring.complete_txreqs;
+ __entry->flow = txq->flow.as_int;
+ __entry->stops = atomic_read(&txq->tx_ring.stops);
+ __entry->no_desc = atomic_read(&txq->tx_ring.no_desc);
+ __entry->stopped =
+ __netif_subqueue_stopped(txq->priv->netdev, txq->q_idx);
+ ),
+ TP_printk(/* print */
+ "[%s] txq %llx idx %u sde %llx:%u cpu %d head %lx tail %lx flow %x used %u stops %d no_desc %d stopped %u",
+ __get_str(dev),
+ (unsigned long long)__entry->txq,
+ __entry->idx,
+ (unsigned long long)__entry->sde,
+ __entry->sde ? __entry->sde->this_idx : 0,
+ __entry->sde ? __entry->sde->cpu : 0,
+ __entry->head,
+ __entry->tail,
+ __entry->flow,
+ __entry->used,
+ __entry->stops,
+ __entry->no_desc,
+ __entry->stopped
+ )
+);
+
+DEFINE_EVENT(/* queue stop */
+ hfi1_ipoib_txq_template, hfi1_txq_stop,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* queue wake */
+ hfi1_ipoib_txq_template, hfi1_txq_wake,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* flow flush */
+ hfi1_ipoib_txq_template, hfi1_flow_flush,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* flow switch */
+ hfi1_ipoib_txq_template, hfi1_flow_switch,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* wakeup */
+ hfi1_ipoib_txq_template, hfi1_txq_wakeup,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* full */
+ hfi1_ipoib_txq_template, hfi1_txq_full,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* queued */
+ hfi1_ipoib_txq_template, hfi1_txq_queued,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* xmit_stopped */
+ hfi1_ipoib_txq_template, hfi1_txq_xmit_stopped,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* xmit_unstopped */
+ hfi1_ipoib_txq_template, hfi1_txq_xmit_unstopped,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DECLARE_EVENT_CLASS(/* AIP */
+ hfi1_ipoib_tx_template,
+ TP_PROTO(struct ipoib_txreq *tx, u32 idx),
+ TP_ARGS(tx, idx),
+ TP_STRUCT__entry(/* entry */
+ DD_DEV_ENTRY(tx->txq->priv->dd)
+ __field(struct ipoib_txreq *, tx)
+ __field(struct hfi1_ipoib_txq *, txq)
+ __field(struct sk_buff *, skb)
+ __field(ulong, idx)
+ ),
+ TP_fast_assign(/* assign */
+ DD_DEV_ASSIGN(tx->txq->priv->dd);
+ __entry->tx = tx;
+ __entry->skb = tx->skb;
+ __entry->txq = tx->txq;
+ __entry->idx = idx;
+ ),
+ TP_printk(/* print */
+ "[%s] tx %llx txq %llx,%u skb %llx idx %lu",
+ __get_str(dev),
+ (unsigned long long)__entry->tx,
+ (unsigned long long)__entry->txq,
+ __entry->txq ? __entry->txq->q_idx : 0,
+ (unsigned long long)__entry->skb,
+ __entry->idx
+ )
+);
+
+DEFINE_EVENT(/* produce */
+ hfi1_ipoib_tx_template, hfi1_tx_produce,
+ TP_PROTO(struct ipoib_txreq *tx, u32 idx),
+ TP_ARGS(tx, idx)
+);
+
+DEFINE_EVENT(/* consume */
+ hfi1_ipoib_tx_template, hfi1_tx_consume,
+ TP_PROTO(struct ipoib_txreq *tx, u32 idx),
+ TP_ARGS(tx, idx)
+);
+
+DEFINE_EVENT(/* alloc_tx */
+ hfi1_ipoib_txq_template, hfi1_txq_alloc_tx,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* poll */
+ hfi1_ipoib_txq_template, hfi1_txq_poll,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
+DEFINE_EVENT(/* complete */
+ hfi1_ipoib_txq_template, hfi1_txq_complete,
+ TP_PROTO(struct hfi1_ipoib_txq *txq),
+ TP_ARGS(txq)
+);
+
#endif /* __HFI1_TRACE_TX_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index b141a78ae38b..33d2c2a218e2 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*/
#include "hfi.h"
@@ -55,6 +13,7 @@
/**
* hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
* @qp: a pointer to the QP
+ * @ps: the current packet state
*
* Assume s_lock is held.
*
@@ -65,22 +24,21 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
struct hfi1_qp_priv *priv = qp->priv;
struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
- u32 hwords = 5;
+ u32 hwords;
u32 bth0 = 0;
u32 len;
u32 pmtu = qp->pmtu;
int middle = 0;
ps->s_txreq = get_txreq(ps->dev, qp);
- if (IS_ERR(ps->s_txreq))
+ if (!ps->s_txreq)
goto bail_no_tx;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- smp_read_barrier_depends(); /* see post_one_send() */
- if (qp->s_last == ACCESS_ONCE(qp->s_head))
+ if (qp->s_last == READ_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (iowait_sdma_pending(&priv->s_iowait)) {
@@ -89,13 +47,26 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done_free_tx;
}
- ohdr = &ps->s_txreq->phdr.hdr.u.oth;
- if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
- ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
+ if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
+ /* header size in 32-bit words LRH+BTH = (8+12)/4. */
+ hwords = 5;
+ if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)
+ ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth;
+ else
+ ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
+ } else {
+ /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */
+ hwords = 7;
+ if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) &&
+ (hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))))
+ ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth;
+ else
+ ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth;
+ }
/* Get the next send request. */
wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
@@ -106,8 +77,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
RVT_PROCESS_NEXT_SEND_OK))
goto bail;
/* Check if send work queue is empty. */
- smp_read_barrier_depends(); /* see post_one_send() */
- if (qp->s_cur == ACCESS_ONCE(qp->s_head)) {
+ if (qp->s_cur == READ_ONCE(qp->s_head)) {
clear_ahg(qp);
goto bail;
}
@@ -129,11 +99,10 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp, wqe->wr.ex.invalidate_rkey);
local_ops = 1;
}
- hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
+ rvt_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
: IB_WC_SUCCESS);
if (local_ops)
atomic_dec(&qp->local_ops_pending);
- qp->s_hdrwords = 0;
goto done_free_tx;
}
/*
@@ -206,7 +175,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
case OP(SEND_FIRST):
qp->s_state = OP(SEND_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
@@ -231,7 +200,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
case OP(RDMA_WRITE_FIRST):
qp->s_state = OP(RDMA_WRITE_MIDDLE);
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_MIDDLE):
len = qp->s_len;
if (len > pmtu) {
@@ -256,14 +225,13 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
break;
}
qp->s_len -= len;
- qp->s_hdrwords = hwords;
+ ps->s_txreq->hdr_dwords = hwords;
ps->s_txreq->sde = priv->s_sde;
ps->s_txreq->ss = &qp->s_sge;
ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
- mask_psn(qp->s_psn++), middle, ps);
- /* pbc */
- ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
+ qp->remote_qpn, mask_psn(qp->s_psn++),
+ middle, ps);
return 1;
done_free_tx:
@@ -277,18 +245,12 @@ bail:
bail_no_tx:
ps->s_txreq = NULL;
qp->s_flags &= ~RVT_S_BUSY;
- qp->s_hdrwords = 0;
return 0;
}
/**
* hfi1_uc_rcv - handle an incoming UC packet
- * @ibp: the port the packet came in on
- * @hdr: the header of the packet
- * @rcv_flags: flags relevant to rcv processing
- * @data: the packet data
- * @tlen: the length of the packet
- * @qp: the QP for this packet.
+ * @packet: the packet structure
*
* This is called from qp_rcv() to process an incoming UC packet
* for the given QP.
@@ -296,32 +258,27 @@ bail_no_tx:
*/
void hfi1_uc_rcv(struct hfi1_packet *packet)
{
- struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
- struct ib_header *hdr = packet->hdr;
- u32 rcv_flags = packet->rcv_flags;
- void *data = packet->ebuf;
+ struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
+ void *data = packet->payload;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
struct ib_other_headers *ohdr = packet->ohdr;
- u32 bth0, opcode;
+ u32 opcode = packet->opcode;
u32 hdrsize = packet->hlen;
u32 psn;
- u32 pad;
+ u32 pad = packet->pad;
struct ib_wc wc;
u32 pmtu = qp->pmtu;
struct ib_reth *reth;
- int has_grh = rcv_flags & HFI1_HAS_GRH;
int ret;
+ u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
- bth0 = be32_to_cpu(ohdr->bth[0]);
- if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0))
+ if (hfi1_ruc_check_hdr(ibp, packet))
return;
- process_ecn(qp, packet, true);
-
- psn = be32_to_cpu(ohdr->bth[2]);
- opcode = (bth0 >> 24) & 0xff;
+ process_ecn(qp, packet);
+ psn = ib_bth_get_psn(ohdr);
/* Compare the PSN verses the expected PSN. */
if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) {
/*
@@ -384,7 +341,7 @@ inv:
}
if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
- qp_comm_est(qp);
+ rvt_comm_est(qp);
/* OK, process the packet. */
switch (opcode) {
@@ -395,7 +352,7 @@ send_first:
if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
qp->r_sge = qp->s_rdma_read_sge;
} else {
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto op_err;
if (!ret)
@@ -411,15 +368,20 @@ send_first:
goto no_immediate_data;
else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
goto send_last_imm;
- /* FALLTHROUGH */
+ fallthrough;
case OP(SEND_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
- if (unlikely(tlen != (hdrsize + pmtu + 4)))
+ /*
+ * There will be no padding for 9B packet but 16B packets
+ * will come in with some padding since we always add
+ * CRC and LT bytes which will need to be flit aligned
+ */
+ if (unlikely(tlen != (hdrsize + pmtu + extra_bytes)))
goto rewind;
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto rewind;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, 0, 0);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
break;
case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -432,26 +394,24 @@ no_immediate_data:
wc.ex.imm_data = 0;
wc.wc_flags = 0;
send_last:
- /* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/* Check for invalid length. */
/* LAST len should be >= 1 */
- if (unlikely(tlen < (hdrsize + pad + 4)))
+ if (unlikely(tlen < (hdrsize + extra_bytes)))
goto rewind;
/* Don't count the CRC. */
- tlen -= (hdrsize + pad + 4);
+ tlen -= (hdrsize + extra_bytes);
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len))
goto rewind;
wc.opcode = IB_WC_RECV;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 0, 0);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
rvt_put_ss(&qp->s_rdma_read_sge);
last_imm:
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
- wc.slid = qp->remote_ah_attr.dlid;
+ wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
/*
* It seems that IB mandates the presence of an SL in a
* work completion only for the UD transport (see section
@@ -463,16 +423,14 @@ last_imm:
*
* See also OPA Vol. 1, section 9.7.6, and table 9-17.
*/
- wc.sl = qp->remote_ah_attr.sl;
+ wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
/* zero fields that are N/A */
wc.vendor_err = 0;
wc.pkey_index = 0;
wc.dlid_path_bits = 0;
wc.port_num = 0;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- (ohdr->bth[0] &
- cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+ rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr));
break;
case OP(RDMA_WRITE_FIRST):
@@ -511,7 +469,7 @@ rdma_first:
wc.ex.imm_data = ohdr->u.rc.imm_data;
goto rdma_last_imm;
}
- /* FALLTHROUGH */
+ fallthrough;
case OP(RDMA_WRITE_MIDDLE):
/* Check for invalid length PMTU or posted rwqe len. */
if (unlikely(tlen != (hdrsize + pmtu + 4)))
@@ -519,7 +477,7 @@ rdma_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto drop;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -527,20 +485,18 @@ rdma_first:
rdma_last_imm:
wc.wc_flags = IB_WC_WITH_IMM;
- /* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
goto drop;
/* Don't count the CRC. */
- tlen -= (hdrsize + pad + 4);
+ tlen -= (hdrsize + extra_bytes);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
rvt_put_ss(&qp->s_rdma_read_sge);
} else {
- ret = hfi1_rvt_get_rwqe(qp, 1);
+ ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto op_err;
if (!ret)
@@ -548,23 +504,21 @@ rdma_last_imm:
}
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
goto last_imm;
case OP(RDMA_WRITE_LAST):
rdma_last:
- /* Get the number of bytes the message was padded by. */
- pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
goto drop;
/* Don't count the CRC. */
- tlen -= (hdrsize + pad + 4);
+ tlen -= (hdrsize + extra_bytes);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
break;
@@ -584,5 +538,5 @@ drop:
return;
op_err:
- hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
}
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index c071955c0272..89d1bae8f824 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2019 Intel Corporation.
*/
#include <linux/net.h>
@@ -51,8 +9,15 @@
#include "hfi.h"
#include "mad.h"
#include "verbs_txreq.h"
+#include "trace_ibhdrs.h"
#include "qp.h"
+/* We support only two types - 9B and 16B for now */
+static const hfi1_make_req hfi1_make_ud_req_tbl[2] = {
+ [HFI1_PKT_TYPE_9B] = &hfi1_make_ud_req_9B,
+ [HFI1_PKT_TYPE_16B] = &hfi1_make_ud_req_16B
+};
+
/**
* ud_loopback - handle send on loopback QPs
* @sqp: the sending QP
@@ -67,8 +32,9 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
{
struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
struct hfi1_pportdata *ppd;
+ struct hfi1_qp_priv *priv = sqp->priv;
struct rvt_qp *qp;
- struct ib_ah_attr *ah_attr;
+ struct rdma_ah_attr *ah_attr;
unsigned long flags;
struct rvt_sge_state ssge;
struct rvt_sge *sge;
@@ -79,7 +45,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
rcu_read_lock();
qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
- swqe->ud_wr.remote_qpn);
+ rvt_get_swqe_remote_qpn(swqe));
if (!qp) {
ibp->rvp.n_pkt_drops++;
rcu_read_unlock();
@@ -97,23 +63,24 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
goto drop;
}
- ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr;
+ ah_attr = rvt_get_swqe_ah_attr(swqe);
ppd = ppd_from_ibp(ibp);
if (qp->ibqp.qp_num > 1) {
u16 pkey;
- u16 slid;
- u8 sc5 = ibp->sl_to_sc[ah_attr->sl];
+ u32 slid;
+ u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
pkey = hfi1_get_pkey(ibp, sqp->s_pkey_index);
- slid = ppd->lid | (ah_attr->src_path_bits &
+ slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
if (unlikely(ingress_pkey_check(ppd, pkey, sc5,
- qp->s_pkey_index, slid))) {
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, pkey,
- ah_attr->sl,
- sqp->ibqp.qp_num, qp->ibqp.qp_num,
- slid, ah_attr->dlid);
+ qp->s_pkey_index,
+ slid, false))) {
+ hfi1_bad_pkey(ibp, pkey,
+ rdma_ah_get_sl(ah_attr),
+ sqp->ibqp.qp_num, qp->ibqp.qp_num,
+ slid, rdma_ah_get_dlid(ah_attr));
goto drop;
}
}
@@ -126,20 +93,10 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (qp->ibqp.qp_num) {
u32 qkey;
- qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
- sqp->qkey : swqe->ud_wr.remote_qkey;
- if (unlikely(qkey != qp->qkey)) {
- u16 lid;
-
- lid = ppd->lid | (ah_attr->src_path_bits &
- ((1 << ppd->lmc) - 1));
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey,
- ah_attr->sl,
- sqp->ibqp.qp_num, qp->ibqp.qp_num,
- lid,
- ah_attr->dlid);
- goto drop;
- }
+ qkey = (int)rvt_get_swqe_remote_qkey(swqe) < 0 ?
+ sqp->qkey : rvt_get_swqe_remote_qkey(swqe);
+ if (unlikely(qkey != qp->qkey))
+ goto drop; /* silently drop per IBTA spec */
}
/*
@@ -165,9 +122,9 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
} else {
int ret;
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0) {
- hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
goto bail_unlock;
}
if (!ret) {
@@ -183,47 +140,51 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
goto bail_unlock;
}
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
struct ib_grh grh;
- struct ib_global_route grd = ah_attr->grh;
+ struct ib_global_route grd = *(rdma_ah_read_grh(ah_attr));
+
+ /*
+ * For loopback packets with extended LIDs, the
+ * sgid_index in the GRH is 0 and the dgid is
+ * OPA GID of the sender. While creating a response
+ * to the loopback packet, IB core creates the new
+ * sgid_index from the DGID and that will be the
+ * OPA_GID_INDEX. The new dgid is from the sgid
+ * index and that will be in the IB GID format.
+ *
+ * We now have a case where the sent packet had a
+ * different sgid_index and dgid compared to the
+ * one that was received in response.
+ *
+ * Fix this inconsistency.
+ */
+ if (priv->hdr_type == HFI1_PKT_TYPE_16B) {
+ if (grd.sgid_index == 0)
+ grd.sgid_index = OPA_GID_INDEX;
+
+ if (ib_is_opa_gid(&grd.dgid))
+ grd.dgid.global.interface_id =
+ cpu_to_be64(ppd->guids[HFI1_PORT_GUID_INDEX]);
+ }
hfi1_make_grh(ibp, &grh, &grd, 0, 0);
- hfi1_copy_sge(&qp->r_sge, &grh,
- sizeof(grh), 1, 0);
+ rvt_copy_sge(qp, &qp->r_sge, &grh,
+ sizeof(grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else {
- hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+ rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
}
ssge.sg_list = swqe->sg_list + 1;
ssge.sge = *swqe->sg_list;
ssge.num_sge = swqe->wr.num_sge;
sge = &ssge.sge;
while (length) {
- u32 len = sge->length;
+ u32 len = rvt_get_sge_length(sge, length);
- if (len > length)
- len = length;
- if (len > sge->sge_length)
- len = sge->sge_length;
WARN_ON_ONCE(len == 0);
- hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1, 0);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (--ssge.num_sge)
- *sge = *ssge.sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
+ rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
+ rvt_update_sge(&ssge, len, false);
length -= len;
}
rvt_put_ss(&qp->r_sge);
@@ -237,22 +198,22 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) {
if (sqp->ibqp.qp_type == IB_QPT_GSI ||
sqp->ibqp.qp_type == IB_QPT_SMI)
- wc.pkey_index = swqe->ud_wr.pkey_index;
+ wc.pkey_index = rvt_get_swqe_pkey_index(swqe);
else
wc.pkey_index = sqp->s_pkey_index;
} else {
wc.pkey_index = 0;
}
- wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
+ wc.slid = (ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1))) & U16_MAX;
/* Check for loopback when the port lid is not set */
if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI)
wc.slid = be16_to_cpu(IB_LID_PERMISSIVE);
- wc.sl = ah_attr->sl;
- wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
+ wc.sl = rdma_ah_get_sl(ah_attr);
+ wc.dlid_path_bits = rdma_ah_get_dlid(ah_attr) & ((1 << ppd->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- swqe->wr.send_flags & IB_SEND_SOLICITED);
+ rvt_recv_cq(qp, &wc, swqe->wr.send_flags & IB_SEND_SOLICITED);
ibp->rvp.n_loop_pkts++;
bail_unlock:
spin_unlock_irqrestore(&qp->r_lock, flags);
@@ -260,9 +221,212 @@ drop:
rcu_read_unlock();
}
+static void hfi1_make_bth_deth(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ struct ib_other_headers *ohdr,
+ u16 *pkey, u32 extra_bytes, bool bypass)
+{
+ u32 bth0;
+ struct hfi1_ibport *ibp;
+
+ ibp = to_iport(qp->ibqp.device, qp->port_num);
+ if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+ ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
+ bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
+ } else {
+ bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
+ }
+
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ bth0 |= extra_bytes << 20;
+ if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI)
+ *pkey = hfi1_get_pkey(ibp, rvt_get_swqe_pkey_index(wqe));
+ else
+ *pkey = hfi1_get_pkey(ibp, qp->s_pkey_index);
+ if (!bypass)
+ bth0 |= *pkey;
+ ohdr->bth[0] = cpu_to_be32(bth0);
+ ohdr->bth[1] = cpu_to_be32(rvt_get_swqe_remote_qpn(wqe));
+ ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn));
+ /*
+ * Qkeys with the high order bit set mean use the
+ * qkey from the QP context instead of the WR (see 10.2.5).
+ */
+ ohdr->u.ud.deth[0] =
+ cpu_to_be32((int)rvt_get_swqe_remote_qkey(wqe) < 0 ? qp->qkey :
+ rvt_get_swqe_remote_qkey(wqe));
+ ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
+}
+
+void hfi1_make_ud_req_9B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
+ struct rvt_swqe *wqe)
+{
+ u32 nwords, extra_bytes;
+ u16 len, slid, dlid, pkey;
+ u16 lrh0 = 0;
+ u8 sc5;
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct ib_other_headers *ohdr;
+ struct rdma_ah_attr *ah_attr;
+ struct hfi1_pportdata *ppd;
+ struct hfi1_ibport *ibp;
+ struct ib_grh *grh;
+
+ ibp = to_iport(qp->ibqp.device, qp->port_num);
+ ppd = ppd_from_ibp(ibp);
+ ah_attr = rvt_get_swqe_ah_attr(wqe);
+
+ extra_bytes = -wqe->length & 3;
+ nwords = ((wqe->length + extra_bytes) >> 2) + SIZE_OF_CRC;
+ /* header size in dwords LRH+BTH+DETH = (8+12+8)/4. */
+ ps->s_txreq->hdr_dwords = 7;
+ if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
+ ps->s_txreq->hdr_dwords++;
+
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
+ grh = &ps->s_txreq->phdr.hdr.ibh.u.l.grh;
+ ps->s_txreq->hdr_dwords +=
+ hfi1_make_grh(ibp, grh, rdma_ah_read_grh(ah_attr),
+ ps->s_txreq->hdr_dwords - LRH_9B_DWORDS,
+ nwords);
+ lrh0 = HFI1_LRH_GRH;
+ ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth;
+ } else {
+ lrh0 = HFI1_LRH_BTH;
+ ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth;
+ }
+
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
+ lrh0 |= (rdma_ah_get_sl(ah_attr) & 0xf) << 4;
+ if (qp->ibqp.qp_type == IB_QPT_SMI) {
+ lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
+ priv->s_sc = 0xf;
+ } else {
+ lrh0 |= (sc5 & 0xf) << 12;
+ priv->s_sc = sc5;
+ }
+
+ dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 9B);
+ if (dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
+ slid = be16_to_cpu(IB_LID_PERMISSIVE);
+ } else {
+ u16 lid = (u16)ppd->lid;
+
+ if (lid) {
+ lid |= rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1);
+ slid = lid;
+ } else {
+ slid = be16_to_cpu(IB_LID_PERMISSIVE);
+ }
+ }
+ hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, false);
+ len = ps->s_txreq->hdr_dwords + nwords;
+
+ /* Setup the packet */
+ ps->s_txreq->phdr.hdr.hdr_type = HFI1_PKT_TYPE_9B;
+ hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh,
+ lrh0, len, dlid, slid);
+}
+
+void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
+ struct rvt_swqe *wqe)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct ib_other_headers *ohdr;
+ struct rdma_ah_attr *ah_attr;
+ struct hfi1_pportdata *ppd;
+ struct hfi1_ibport *ibp;
+ u32 dlid, slid, nwords, extra_bytes;
+ u32 dest_qp = rvt_get_swqe_remote_qpn(wqe);
+ u32 src_qp = qp->ibqp.qp_num;
+ u16 len, pkey;
+ u8 l4, sc5;
+ bool is_mgmt = false;
+
+ ibp = to_iport(qp->ibqp.device, qp->port_num);
+ ppd = ppd_from_ibp(ibp);
+ ah_attr = rvt_get_swqe_ah_attr(wqe);
+
+ /*
+ * Build 16B Management Packet if either the destination
+ * or source queue pair number is 0 or 1.
+ */
+ if (dest_qp == 0 || src_qp == 0 || dest_qp == 1 || src_qp == 1) {
+ /* header size in dwords 16B LRH+L4_FM = (16+8)/4. */
+ ps->s_txreq->hdr_dwords = 6;
+ is_mgmt = true;
+ } else {
+ /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */
+ ps->s_txreq->hdr_dwords = 9;
+ if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
+ ps->s_txreq->hdr_dwords++;
+ }
+
+ /* SW provides space for CRC and LT for bypass packets. */
+ extra_bytes = hfi1_get_16b_padding((ps->s_txreq->hdr_dwords << 2),
+ wqe->length);
+ nwords = ((wqe->length + extra_bytes + SIZE_OF_LT) >> 2) + SIZE_OF_CRC;
+
+ if ((rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) &&
+ hfi1_check_mcast(rdma_ah_get_dlid(ah_attr))) {
+ struct ib_grh *grh;
+ struct ib_global_route *grd = rdma_ah_retrieve_grh(ah_attr);
+ /*
+ * Ensure OPA GIDs are transformed to IB gids
+ * before creating the GRH.
+ */
+ if (grd->sgid_index == OPA_GID_INDEX) {
+ dd_dev_warn(ppd->dd, "Bad sgid_index. sgid_index: %d\n",
+ grd->sgid_index);
+ grd->sgid_index = 0;
+ }
+ grh = &ps->s_txreq->phdr.hdr.opah.u.l.grh;
+ ps->s_txreq->hdr_dwords += hfi1_make_grh(
+ ibp, grh, grd,
+ ps->s_txreq->hdr_dwords - LRH_16B_DWORDS,
+ nwords);
+ ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth;
+ l4 = OPA_16B_L4_IB_GLOBAL;
+ } else {
+ ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth;
+ l4 = OPA_16B_L4_IB_LOCAL;
+ }
+
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(ah_attr)];
+ if (qp->ibqp.qp_type == IB_QPT_SMI)
+ priv->s_sc = 0xf;
+ else
+ priv->s_sc = sc5;
+
+ dlid = opa_get_lid(rdma_ah_get_dlid(ah_attr), 16B);
+ if (!ppd->lid)
+ slid = be32_to_cpu(OPA_LID_PERMISSIVE);
+ else
+ slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
+ ((1 << ppd->lmc) - 1));
+
+ if (is_mgmt) {
+ l4 = OPA_16B_L4_FM;
+ pkey = hfi1_get_pkey(ibp, rvt_get_swqe_pkey_index(wqe));
+ hfi1_16B_set_qpn(&ps->s_txreq->phdr.hdr.opah.u.mgmt,
+ dest_qp, src_qp);
+ } else {
+ hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true);
+ }
+ /* Convert dwords to flits */
+ len = (ps->s_txreq->hdr_dwords + nwords) >> 1;
+
+ /* Setup the packet */
+ ps->s_txreq->phdr.hdr.hdr_type = HFI1_PKT_TYPE_16B;
+ hfi1_make_16b_hdr(&ps->s_txreq->phdr.hdr.opah,
+ slid, dlid, len, pkey, 0, 0, l4, priv->s_sc);
+}
+
/**
* hfi1_make_ud_req - construct a UD request packet
* @qp: the QP
+ * @ps: the current packet state
*
* Assume s_lock is held.
*
@@ -271,29 +435,22 @@ drop:
int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
- struct ib_other_headers *ohdr;
- struct ib_ah_attr *ah_attr;
+ struct rdma_ah_attr *ah_attr;
struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp;
struct rvt_swqe *wqe;
- u32 nwords;
- u32 extra_bytes;
- u32 bth0;
- u16 lrh0;
- u16 lid;
int next_cur;
- u8 sc5;
+ u32 lid;
ps->s_txreq = get_txreq(ps->dev, qp);
- if (IS_ERR(ps->s_txreq))
+ if (!ps->s_txreq)
goto bail_no_tx;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
goto bail;
/* We are in the error state, flush the work request. */
- smp_read_barrier_depends(); /* see post_one_send */
- if (qp->s_last == ACCESS_ONCE(qp->s_head))
+ if (qp->s_last == READ_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (iowait_sdma_pending(&priv->s_iowait)) {
@@ -301,13 +458,12 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto bail;
}
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done_free_tx;
}
/* see post_one_send() */
- smp_read_barrier_depends();
- if (qp->s_cur == ACCESS_ONCE(qp->s_head))
+ if (qp->s_cur == READ_ONCE(qp->s_head))
goto bail;
wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
@@ -318,14 +474,15 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
/* Construct the header. */
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
- ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
- if (ah_attr->dlid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
- ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
- lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
+ ah_attr = rvt_get_swqe_ah_attr(wqe);
+ priv->hdr_type = hfi1_get_hdr_type(ppd->lid, ah_attr);
+ if ((!hfi1_check_mcast(rdma_ah_get_dlid(ah_attr))) ||
+ (rdma_ah_get_dlid(ah_attr) == be32_to_cpu(OPA_LID_PERMISSIVE))) {
+ lid = rdma_ah_get_dlid(ah_attr) & ~((1 << ppd->lmc) - 1);
if (unlikely(!loopback &&
- (lid == ppd->lid ||
- (lid == be16_to_cpu(IB_LID_PERMISSIVE) &&
- qp->ibqp.qp_type == IB_QPT_GSI)))) {
+ ((lid == ppd->lid) ||
+ ((lid == be32_to_cpu(OPA_LID_PERMISSIVE)) &&
+ (qp->ibqp.qp_type == IB_QPT_GSI))))) {
unsigned long tflags = ps->flags;
/*
* If DMAs are in progress, we can't generate
@@ -343,20 +500,15 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, tflags);
ps->flags = tflags;
- hfi1_send_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done_free_tx;
}
}
qp->s_cur = next_cur;
- extra_bytes = -wqe->length & 3;
- nwords = (wqe->length + extra_bytes) >> 2;
-
- /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
- qp->s_hdrwords = 7;
ps->s_txreq->s_cur_size = wqe->length;
ps->s_txreq->ss = &qp->s_sge;
- qp->s_srate = ah_attr->static_rate;
+ qp->s_srate = rdma_ah_get_static_rate(ah_attr);
qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
qp->s_wqe = wqe;
qp->s_sge.sge = wqe->sg_list[0];
@@ -364,81 +516,16 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_sge.num_sge = wqe->wr.num_sge;
qp->s_sge.total_len = wqe->length;
- if (ah_attr->ah_flags & IB_AH_GRH) {
- /* Header size in 32-bit words. */
- qp->s_hdrwords += hfi1_make_grh(ibp,
- &ps->s_txreq->phdr.hdr.u.l.grh,
- &ah_attr->grh,
- qp->s_hdrwords, nwords);
- lrh0 = HFI1_LRH_GRH;
- ohdr = &ps->s_txreq->phdr.hdr.u.l.oth;
- /*
- * Don't worry about sending to locally attached multicast
- * QPs. It is unspecified by the spec. what happens.
- */
- } else {
- /* Header size in 32-bit words. */
- lrh0 = HFI1_LRH_BTH;
- ohdr = &ps->s_txreq->phdr.hdr.u.oth;
- }
- if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
- qp->s_hdrwords++;
- ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
- bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
- } else {
- bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
- }
- sc5 = ibp->sl_to_sc[ah_attr->sl];
- lrh0 |= (ah_attr->sl & 0xf) << 4;
- if (qp->ibqp.qp_type == IB_QPT_SMI) {
- lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
- priv->s_sc = 0xf;
- } else {
- lrh0 |= (sc5 & 0xf) << 12;
- priv->s_sc = sc5;
- }
+ /* Make the appropriate header */
+ hfi1_make_ud_req_tbl[priv->hdr_type](qp, ps, qp->s_wqe);
priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
ps->s_txreq->sde = priv->s_sde;
priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
ps->s_txreq->psc = priv->s_sendcontext;
- ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0);
- ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);
- ps->s_txreq->phdr.hdr.lrh[2] =
- cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
- if (ah_attr->dlid == be16_to_cpu(IB_LID_PERMISSIVE)) {
- ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
- } else {
- lid = ppd->lid;
- if (lid) {
- lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
- ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(lid);
- } else {
- ps->s_txreq->phdr.hdr.lrh[3] = IB_LID_PERMISSIVE;
- }
- }
- if (wqe->wr.send_flags & IB_SEND_SOLICITED)
- bth0 |= IB_BTH_SOLICITED;
- bth0 |= extra_bytes << 20;
- if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI)
- bth0 |= hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index);
- else
- bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
- ohdr->bth[0] = cpu_to_be32(bth0);
- ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn);
- ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn));
- /*
- * Qkeys with the high order bit set mean use the
- * qkey from the QP context instead of the WR (see 10.2.5).
- */
- ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
- qp->qkey : wqe->ud_wr.remote_qkey);
- ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
/* disarm any ahg */
priv->s_ahg->ahgcount = 0;
priv->s_ahg->ahgidx = 0;
priv->s_ahg->tx_flags = 0;
- /* pbc */
- ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
return 1;
@@ -453,7 +540,6 @@ bail:
bail_no_tx:
ps->s_txreq = NULL;
qp->s_flags &= ~RVT_S_BUSY;
- qp->s_hdrwords = 0;
return 0;
}
@@ -502,33 +588,97 @@ int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey)
return -1;
}
+void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
+ u32 remote_qpn, u16 pkey, u32 slid, u32 dlid,
+ u8 sc5, const struct ib_grh *old_grh)
+{
+ u64 pbc, pbc_flags = 0;
+ u32 bth0, plen, vl, hwords = 7;
+ u16 len;
+ u8 l4;
+ struct hfi1_opa_header hdr;
+ struct ib_other_headers *ohdr;
+ struct pio_buf *pbuf;
+ struct send_context *ctxt = qp_to_send_context(qp, sc5);
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ u32 nwords;
+
+ hdr.hdr_type = HFI1_PKT_TYPE_16B;
+ /* Populate length */
+ nwords = ((hfi1_get_16b_padding(hwords << 2, 0) +
+ SIZE_OF_LT) >> 2) + SIZE_OF_CRC;
+ if (old_grh) {
+ struct ib_grh *grh = &hdr.opah.u.l.grh;
+
+ grh->version_tclass_flow = old_grh->version_tclass_flow;
+ grh->paylen = cpu_to_be16(
+ (hwords - LRH_16B_DWORDS + nwords) << 2);
+ grh->hop_limit = 0xff;
+ grh->sgid = old_grh->dgid;
+ grh->dgid = old_grh->sgid;
+ ohdr = &hdr.opah.u.l.oth;
+ l4 = OPA_16B_L4_IB_GLOBAL;
+ hwords += sizeof(struct ib_grh) / sizeof(u32);
+ } else {
+ ohdr = &hdr.opah.u.oth;
+ l4 = OPA_16B_L4_IB_LOCAL;
+ }
+
+ /* BIT 16 to 19 is TVER. Bit 20 to 22 is pad cnt */
+ bth0 = (IB_OPCODE_CNP << 24) | (1 << 16) |
+ (hfi1_get_16b_padding(hwords << 2, 0) << 20);
+ ohdr->bth[0] = cpu_to_be32(bth0);
+
+ ohdr->bth[1] = cpu_to_be32(remote_qpn);
+ ohdr->bth[2] = 0; /* PSN 0 */
+
+ /* Convert dwords to flits */
+ len = (hwords + nwords) >> 1;
+ hfi1_make_16b_hdr(&hdr.opah, slid, dlid, len, pkey, 1, 0, l4, sc5);
+
+ plen = 2 /* PBC */ + hwords + nwords;
+ pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
+ vl = sc_to_vlt(ppd->dd, sc5);
+ pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
+ if (ctxt) {
+ pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL);
+ if (!IS_ERR_OR_NULL(pbuf)) {
+ trace_pio_output_ibhdr(ppd->dd, &hdr, sc5);
+ ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
+ &hdr, hwords);
+ }
+ }
+}
+
void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
- u32 pkey, u32 slid, u32 dlid, u8 sc5,
+ u16 pkey, u32 slid, u32 dlid, u8 sc5,
const struct ib_grh *old_grh)
{
u64 pbc, pbc_flags = 0;
u32 bth0, plen, vl, hwords = 5;
u16 lrh0;
u8 sl = ibp->sc_to_sl[sc5];
- struct ib_header hdr;
+ struct hfi1_opa_header hdr;
struct ib_other_headers *ohdr;
struct pio_buf *pbuf;
struct send_context *ctxt = qp_to_send_context(qp, sc5);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ hdr.hdr_type = HFI1_PKT_TYPE_9B;
if (old_grh) {
- struct ib_grh *grh = &hdr.u.l.grh;
+ struct ib_grh *grh = &hdr.ibh.u.l.grh;
grh->version_tclass_flow = old_grh->version_tclass_flow;
- grh->paylen = cpu_to_be16((hwords - 2 + SIZE_OF_CRC) << 2);
+ grh->paylen = cpu_to_be16(
+ (hwords - LRH_9B_DWORDS + SIZE_OF_CRC) << 2);
grh->hop_limit = 0xff;
grh->sgid = old_grh->dgid;
grh->dgid = old_grh->sgid;
- ohdr = &hdr.u.l.oth;
+ ohdr = &hdr.ibh.u.l.oth;
lrh0 = HFI1_LRH_GRH;
hwords += sizeof(struct ib_grh) / sizeof(u32);
} else {
- ohdr = &hdr.u.oth;
+ ohdr = &hdr.ibh.u.oth;
lrh0 = HFI1_LRH_BTH;
}
@@ -537,23 +687,21 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
bth0 = pkey | (IB_OPCODE_CNP << 24);
ohdr->bth[0] = cpu_to_be32(bth0);
- ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << HFI1_BECN_SHIFT));
+ ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT));
ohdr->bth[2] = 0; /* PSN 0 */
- hdr.lrh[0] = cpu_to_be16(lrh0);
- hdr.lrh[1] = cpu_to_be16(dlid);
- hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
- hdr.lrh[3] = cpu_to_be16(slid);
-
+ hfi1_make_ib_hdr(&hdr.ibh, lrh0, hwords + SIZE_OF_CRC, dlid, slid);
plen = 2 /* PBC */ + hwords;
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
vl = sc_to_vlt(ppd->dd, sc5);
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
if (ctxt) {
pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL);
- if (pbuf)
+ if (!IS_ERR_OR_NULL(pbuf)) {
+ trace_pio_output_ibhdr(ppd->dd, &hdr, sc5);
ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
&hdr, hwords);
+ }
}
}
@@ -651,12 +799,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
/**
* hfi1_ud_rcv - receive an incoming UD packet
- * @ibp: the port the packet came in on
- * @hdr: the packet header
- * @rcv_flags: flags relevant to rcv processing
- * @data: the packet data
- * @tlen: the packet length
- * @qp: the QP the packet came on
+ * @packet: the packet structure
*
* This is called from qp_rcv() to process an incoming UD packet
* for the given QP.
@@ -664,43 +807,53 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
*/
void hfi1_ud_rcv(struct hfi1_packet *packet)
{
- struct ib_other_headers *ohdr = packet->ohdr;
- int opcode;
u32 hdrsize = packet->hlen;
struct ib_wc wc;
- u32 qkey;
u32 src_qp;
- u16 dlid, pkey;
+ u16 pkey;
int mgmt_pkey_idx = -1;
- struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct ib_header *hdr = packet->hdr;
- u32 rcv_flags = packet->rcv_flags;
- void *data = packet->ebuf;
+ void *data = packet->payload;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
- bool has_grh = rcv_flags & HFI1_HAS_GRH;
- u8 sc5 = hdr2sc(hdr, packet->rhf);
- u32 bth1;
- u8 sl_from_sc, sl;
- u16 slid;
+ u8 sc5 = packet->sc;
+ u8 sl_from_sc;
+ u8 opcode = packet->opcode;
+ u8 sl = packet->sl;
+ u32 dlid = packet->dlid;
+ u32 slid = packet->slid;
u8 extra_bytes;
+ u8 l4 = 0;
+ bool dlid_is_permissive;
+ bool slid_is_permissive;
+ bool solicited = false;
+
+ extra_bytes = packet->pad + packet->extra_byte + (SIZE_OF_CRC << 2);
+
+ if (packet->etype == RHF_RCV_TYPE_BYPASS) {
+ u32 permissive_lid =
+ opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B);
- qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
- src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
- dlid = be16_to_cpu(hdr->lrh[1]);
- bth1 = be32_to_cpu(ohdr->bth[1]);
- slid = be16_to_cpu(hdr->lrh[3]);
- pkey = (u16)be32_to_cpu(ohdr->bth[0]);
- sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
- extra_bytes = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
- extra_bytes += (SIZE_OF_CRC << 2);
+ l4 = hfi1_16B_get_l4(packet->hdr);
+ pkey = hfi1_16B_get_pkey(packet->hdr);
+ dlid_is_permissive = (dlid == permissive_lid);
+ slid_is_permissive = (slid == permissive_lid);
+ } else {
+ pkey = ib_bth_get_pkey(packet->ohdr);
+ dlid_is_permissive = (dlid == be16_to_cpu(IB_LID_PERMISSIVE));
+ slid_is_permissive = (slid == be16_to_cpu(IB_LID_PERMISSIVE));
+ }
sl_from_sc = ibp->sc_to_sl[sc5];
- opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
- opcode &= 0xff;
+ if (likely(l4 != OPA_16B_L4_FM)) {
+ src_qp = ib_get_sqpn(packet->ohdr);
+ solicited = ib_bth_is_solicited(packet->ohdr);
+ } else {
+ src_qp = hfi1_16B_get_src_qpn(packet->mgmt);
+ }
- process_ecn(qp, packet, (opcode != IB_OPCODE_CNP));
+ process_ecn(qp, packet);
/*
* Get the number of bytes the message was padded by
* and drop incomplete packets.
@@ -715,8 +868,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
* and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
*/
if (qp->ibqp.qp_num) {
- if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
- hdr->lrh[3] == IB_LID_PERMISSIVE))
+ if (unlikely(dlid_is_permissive || slid_is_permissive))
goto drop;
if (qp->ibqp.qp_num > 1) {
if (unlikely(rcv_pkey_check(ppd, pkey, sc5, slid))) {
@@ -726,10 +878,10 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
* for invalid pkeys is optional according to
* IB spec (release 1.3, section 10.9.4)
*/
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
- pkey, sl,
- src_qp, qp->ibqp.qp_num,
- slid, dlid);
+ hfi1_bad_pkey(ibp,
+ pkey, sl,
+ src_qp, qp->ibqp.qp_num,
+ slid, dlid);
return;
}
} else {
@@ -738,12 +890,10 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
if (mgmt_pkey_idx < 0)
goto drop;
}
- if (unlikely(qkey != qp->qkey)) {
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, sl,
- src_qp, qp->ibqp.qp_num,
- slid, dlid);
- return;
- }
+ if (unlikely(l4 != OPA_16B_L4_FM &&
+ ib_get_qkey(packet->ohdr) != qp->qkey))
+ return; /* Silent drop */
+
/* Drop invalid MAD packets (see 13.5.3.1). */
if (unlikely(qp->ibqp.qp_num == 1 &&
(tlen > 2048 || (sc5 == 0xF))))
@@ -757,8 +907,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
if (tlen > 2048)
goto drop;
- if ((hdr->lrh[1] == IB_LID_PERMISSIVE ||
- hdr->lrh[3] == IB_LID_PERMISSIVE) &&
+ if ((dlid_is_permissive || slid_is_permissive) &&
smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
goto drop;
@@ -770,9 +919,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
if (qp->ibqp.qp_num > 1 &&
opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
- wc.ex.imm_data = ohdr->u.ud.imm_data;
+ wc.ex.imm_data = packet->ohdr->u.ud.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
- tlen -= sizeof(u32);
} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
wc.ex.imm_data = 0;
wc.wc_flags = 0;
@@ -794,9 +942,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
} else {
int ret;
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0) {
- hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
return;
}
if (!ret) {
@@ -810,15 +958,26 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
qp->r_flags |= RVT_R_REUSE_SGE;
goto drop;
}
- if (has_grh) {
- hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh,
- sizeof(struct ib_grh), 1, 0);
+ if (packet->grh) {
+ rvt_copy_sge(qp, &qp->r_sge, packet->grh,
+ sizeof(struct ib_grh), true, false);
+ wc.wc_flags |= IB_WC_GRH;
+ } else if (packet->etype == RHF_RCV_TYPE_BYPASS) {
+ struct ib_grh grh;
+ /*
+ * Assuming we only created 16B on the send side
+ * if we want to use large LIDs, since GRH was stripped
+ * out when creating 16B, add back the GRH here.
+ */
+ hfi1_make_ext_grh(packet, &grh, slid, dlid);
+ rvt_copy_sge(qp, &qp->r_sge, &grh,
+ sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else {
- hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+ rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
}
- hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
- 1, 0);
+ rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
+ true, false);
rvt_put_ss(&qp->r_sge);
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
return;
@@ -844,20 +1003,19 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
} else {
wc.pkey_index = 0;
}
-
- wc.slid = slid;
+ if (slid_is_permissive)
+ slid = be32_to_cpu(OPA_LID_PERMISSIVE);
+ wc.slid = slid & U16_MAX;
wc.sl = sl_from_sc;
/*
* Save the LMC lower bits if the destination LID is a unicast LID.
*/
- wc.dlid_path_bits = dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) ? 0 :
+ wc.dlid_path_bits = hfi1_check_mcast(dlid) ? 0 :
dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- (ohdr->bth[0] &
- cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+ rvt_recv_cq(qp, &wc, solicited);
return;
drop:
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 64d26525435a..62b4f16dab27 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -1,224 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2020 Cornelis Networks, Inc.
+ * Copyright(c) 2015-2018 Intel Corporation.
*/
#include <asm/page.h>
+#include <linux/string.h>
+#include "mmu_rb.h"
#include "user_exp_rcv.h"
#include "trace.h"
-#include "mmu_rb.h"
-
-struct tid_group {
- struct list_head list;
- unsigned base;
- u8 size;
- u8 used;
- u8 map;
-};
-struct tid_rb_node {
- struct mmu_rb_node mmu;
- unsigned long phys;
- struct tid_group *grp;
- u32 rcventry;
- dma_addr_t dma_addr;
- bool freed;
- unsigned npages;
- struct page *pages[0];
-};
-
-struct tid_pageset {
- u16 idx;
- u16 count;
-};
-
-#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
-
-#define num_user_pages(vaddr, len) \
- (1 + (((((unsigned long)(vaddr) + \
- (unsigned long)(len) - 1) & PAGE_MASK) - \
- ((unsigned long)vaddr & PAGE_MASK)) >> PAGE_SHIFT))
-
-static void unlock_exp_tids(struct hfi1_ctxtdata *, struct exp_tid_set *,
- struct hfi1_filedata *);
-static u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
-static int set_rcvarray_entry(struct file *, unsigned long, u32,
- struct tid_group *, struct page **, unsigned);
-static int tid_rb_insert(void *, struct mmu_rb_node *);
+static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
+ struct exp_tid_set *set,
+ struct hfi1_filedata *fd);
+static u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages);
+static int set_rcvarray_entry(struct hfi1_filedata *fd,
+ struct tid_user_buf *tbuf,
+ u32 rcventry, struct tid_group *grp,
+ u16 pageidx, unsigned int npages);
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
struct tid_rb_node *tnode);
-static void tid_rb_remove(void *, struct mmu_rb_node *);
-static int tid_rb_invalidate(void *, struct mmu_rb_node *);
-static int program_rcvarray(struct file *, unsigned long, struct tid_group *,
- struct tid_pageset *, unsigned, u16, struct page **,
- u32 *, unsigned *, unsigned *);
-static int unprogram_rcvarray(struct file *, u32, struct tid_group **);
+static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq);
+static bool tid_cover_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq);
+static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *,
+ struct tid_group *grp, u16 count,
+ u32 *tidlist, unsigned int *tididx,
+ unsigned int *pmapped);
+static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo);
+static void __clear_tid_node(struct hfi1_filedata *fd,
+ struct tid_rb_node *node);
static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node);
-static struct mmu_rb_ops tid_rb_ops = {
- .insert = tid_rb_insert,
- .remove = tid_rb_remove,
- .invalidate = tid_rb_invalidate
+static const struct mmu_interval_notifier_ops tid_mn_ops = {
+ .invalidate = tid_rb_invalidate,
+};
+static const struct mmu_interval_notifier_ops tid_cover_ops = {
+ .invalidate = tid_cover_invalidate,
};
-
-static inline u32 rcventry2tidinfo(u32 rcventry)
-{
- u32 pair = rcventry & ~0x1;
-
- return EXP_TID_SET(IDX, pair >> 1) |
- EXP_TID_SET(CTRL, 1 << (rcventry - pair));
-}
-
-static inline void exp_tid_group_init(struct exp_tid_set *set)
-{
- INIT_LIST_HEAD(&set->list);
- set->count = 0;
-}
-
-static inline void tid_group_remove(struct tid_group *grp,
- struct exp_tid_set *set)
-{
- list_del_init(&grp->list);
- set->count--;
-}
-
-static inline void tid_group_add_tail(struct tid_group *grp,
- struct exp_tid_set *set)
-{
- list_add_tail(&grp->list, &set->list);
- set->count++;
-}
-
-static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
-{
- struct tid_group *grp =
- list_first_entry(&set->list, struct tid_group, list);
- list_del_init(&grp->list);
- set->count--;
- return grp;
-}
-
-static inline void tid_group_move(struct tid_group *group,
- struct exp_tid_set *s1,
- struct exp_tid_set *s2)
-{
- tid_group_remove(group, s1);
- tid_group_add_tail(group, s2);
-}
/*
* Initialize context and file private data needed for Expected
* receive caching. This needs to be done after the context has
* been configured with the eager/expected RcvEntry counts.
*/
-int hfi1_user_exp_rcv_init(struct file *fp)
+int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt)
{
- struct hfi1_filedata *fd = fp->private_data;
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct hfi1_devdata *dd = uctxt->dd;
- unsigned tidbase;
- int i, ret = 0;
-
- spin_lock_init(&fd->tid_lock);
- spin_lock_init(&fd->invalid_lock);
-
- if (!uctxt->subctxt_cnt || !fd->subctxt) {
- exp_tid_group_init(&uctxt->tid_group_list);
- exp_tid_group_init(&uctxt->tid_used_list);
- exp_tid_group_init(&uctxt->tid_full_list);
-
- tidbase = uctxt->expected_base;
- for (i = 0; i < uctxt->expected_count /
- dd->rcv_entries.group_size; i++) {
- struct tid_group *grp;
-
- grp = kzalloc(sizeof(*grp), GFP_KERNEL);
- if (!grp) {
- /*
- * If we fail here, the groups already
- * allocated will be freed by the close
- * call.
- */
- ret = -ENOMEM;
- goto done;
- }
- grp->size = dd->rcv_entries.group_size;
- grp->base = tidbase;
- tid_group_add_tail(grp, &uctxt->tid_group_list);
- tidbase += dd->rcv_entries.group_size;
- }
- }
+ int ret = 0;
fd->entry_to_rb = kcalloc(uctxt->expected_count,
- sizeof(struct rb_node *),
- GFP_KERNEL);
+ sizeof(*fd->entry_to_rb),
+ GFP_KERNEL);
if (!fd->entry_to_rb)
return -ENOMEM;
if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) {
fd->invalid_tid_idx = 0;
- fd->invalid_tids = kzalloc(uctxt->expected_count *
- sizeof(u32), GFP_KERNEL);
+ fd->invalid_tids = kcalloc(uctxt->expected_count,
+ sizeof(*fd->invalid_tids),
+ GFP_KERNEL);
if (!fd->invalid_tids) {
- ret = -ENOMEM;
- goto done;
- }
-
- /*
- * Register MMU notifier callbacks. If the registration
- * fails, continue without TID caching for this context.
- */
- ret = hfi1_mmu_rb_register(fd, fd->mm, &tid_rb_ops,
- dd->pport->hfi1_wq,
- &fd->handler);
- if (ret) {
- dd_dev_info(dd,
- "Failed MMU notifier registration %d\n",
- ret);
- ret = 0;
+ kfree(fd->entry_to_rb);
+ fd->entry_to_rb = NULL;
+ return -ENOMEM;
}
+ fd->use_mn = true;
}
/*
@@ -235,7 +85,7 @@ int hfi1_user_exp_rcv_init(struct file *fp)
* init.
*/
spin_lock(&fd->tid_lock);
- if (uctxt->subctxt_cnt && fd->handler) {
+ if (uctxt->subctxt_cnt && fd->use_mn) {
u16 remainder;
fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt;
@@ -246,58 +96,102 @@ int hfi1_user_exp_rcv_init(struct file *fp)
fd->tid_limit = uctxt->expected_count;
}
spin_unlock(&fd->tid_lock);
-done:
+
return ret;
}
-int hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
+void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct tid_group *grp, *gptr;
- if (!test_bit(HFI1_CTXT_SETUP_DONE, &uctxt->event_flags))
- return 0;
- /*
- * The notifier would have been removed when the process'es mm
- * was freed.
- */
- if (fd->handler)
- hfi1_mmu_rb_unregister(fd->handler);
+ mutex_lock(&uctxt->exp_mutex);
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
+ unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
+ if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
+ unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
+ mutex_unlock(&uctxt->exp_mutex);
kfree(fd->invalid_tids);
-
- if (!uctxt->cnt) {
- if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list))
- unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd);
- if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list))
- unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd);
- list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
- list) {
- list_del_init(&grp->list);
- kfree(grp);
- }
- hfi1_clear_tids(uctxt);
- }
+ fd->invalid_tids = NULL;
kfree(fd->entry_to_rb);
- return 0;
+ fd->entry_to_rb = NULL;
+}
+
+/*
+ * Release pinned receive buffer pages.
+ *
+ * @mapped: true if the pages have been DMA mapped. false otherwise.
+ * @idx: Index of the first page to unpin.
+ * @npages: No of pages to unpin.
+ *
+ * If the pages have been DMA mapped (indicated by mapped parameter), their
+ * info will be passed via a struct tid_rb_node. If they haven't been mapped,
+ * their info will be passed via a struct tid_user_buf.
+ */
+static void unpin_rcv_pages(struct hfi1_filedata *fd,
+ struct tid_user_buf *tidbuf,
+ struct tid_rb_node *node,
+ unsigned int idx,
+ unsigned int npages,
+ bool mapped)
+{
+ struct page **pages;
+ struct hfi1_devdata *dd = fd->uctxt->dd;
+ struct mm_struct *mm;
+
+ if (mapped) {
+ dma_unmap_single(&dd->pcidev->dev, node->dma_addr,
+ node->npages * PAGE_SIZE, DMA_FROM_DEVICE);
+ pages = &node->pages[idx];
+ mm = mm_from_tid_node(node);
+ } else {
+ pages = &tidbuf->pages[idx];
+ mm = current->mm;
+ }
+ hfi1_release_user_pages(mm, pages, npages, mapped);
+ fd->tid_n_pinned -= npages;
}
/*
- * Write an "empty" RcvArray entry.
- * This function exists so the TID registaration code can use it
- * to write to unused/unneeded entries and still take advantage
- * of the WC performance improvements. The HFI will ignore this
- * write to the RcvArray entry.
+ * Pin receive buffer pages.
*/
-static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
+static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf)
{
+ int pinned;
+ unsigned int npages = tidbuf->npages;
+ unsigned long vaddr = tidbuf->vaddr;
+ struct page **pages = NULL;
+ struct hfi1_devdata *dd = fd->uctxt->dd;
+
+ if (npages > fd->uctxt->expected_count) {
+ dd_dev_err(dd, "Expected buffer too big\n");
+ return -EINVAL;
+ }
+
+ /* Allocate the array of struct page pointers needed for pinning */
+ pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
/*
- * Doing the WC fill writes only makes sense if the device is
- * present and the RcvArray has been mapped as WC memory.
+ * Pin all the pages of the user buffer. If we can't pin all the
+ * pages, accept the amount pinned so far and program only that.
+ * User space knows how to deal with partially programmed buffers.
*/
- if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc)
- writeq(0, dd->rcvarray_wc + (index * 8));
+ if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) {
+ kfree(pages);
+ return -ENOMEM;
+ }
+
+ pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages);
+ if (pinned <= 0) {
+ kfree(pages);
+ return pinned;
+ }
+ tidbuf->pages = pages;
+ fd->tid_n_pinned += pinned;
+ return pinned;
}
/*
@@ -349,89 +243,76 @@ static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
* can fit into the group. If the group becomes fully
* used, move it to tid_full_list.
*/
-int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
+int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo)
{
int ret = 0, need_group = 0, pinned;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
- unsigned npages, ngroups, pageidx = 0, pageset_count, npagesets,
+ unsigned int ngroups, pageset_count,
tididx = 0, mapped, mapped_pages = 0;
- unsigned long vaddr = tinfo->vaddr;
- struct page **pages = NULL;
u32 *tidlist = NULL;
- struct tid_pageset *pagesets = NULL;
+ struct tid_user_buf *tidbuf;
+ unsigned long mmu_seq = 0;
- /* Get the number of pages the user buffer spans */
- npages = num_user_pages(vaddr, tinfo->length);
- if (!npages)
+ if (!PAGE_ALIGNED(tinfo->vaddr))
return -EINVAL;
-
- if (npages > uctxt->expected_count) {
- dd_dev_err(dd, "Expected buffer too big\n");
+ if (tinfo->length == 0)
return -EINVAL;
- }
- /* Verify that access is OK for the user buffer */
- if (!access_ok(VERIFY_WRITE, (void __user *)vaddr,
- npages * PAGE_SIZE)) {
- dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n",
- (void *)vaddr, npages);
- return -EFAULT;
- }
-
- pagesets = kcalloc(uctxt->expected_count, sizeof(*pagesets),
- GFP_KERNEL);
- if (!pagesets)
+ tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL);
+ if (!tidbuf)
return -ENOMEM;
- /* Allocate the array of struct page pointers needed for pinning */
- pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
- if (!pages) {
+ mutex_init(&tidbuf->cover_mutex);
+ tidbuf->vaddr = tinfo->vaddr;
+ tidbuf->length = tinfo->length;
+ tidbuf->npages = num_user_pages(tidbuf->vaddr, tidbuf->length);
+ tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets),
+ GFP_KERNEL);
+ if (!tidbuf->psets) {
ret = -ENOMEM;
- goto bail;
+ goto fail_release_mem;
}
- /*
- * Pin all the pages of the user buffer. If we can't pin all the
- * pages, accept the amount pinned so far and program only that.
- * User space knows how to deal with partially programmed buffers.
- */
- if (!hfi1_can_pin_pages(dd, fd->mm, fd->tid_n_pinned, npages)) {
- ret = -ENOMEM;
- goto bail;
+ if (fd->use_mn) {
+ ret = mmu_interval_notifier_insert(
+ &tidbuf->notifier, current->mm,
+ tidbuf->vaddr, tidbuf->npages * PAGE_SIZE,
+ &tid_cover_ops);
+ if (ret)
+ goto fail_release_mem;
+ mmu_seq = mmu_interval_read_begin(&tidbuf->notifier);
}
- pinned = hfi1_acquire_user_pages(fd->mm, vaddr, npages, true, pages);
+ pinned = pin_rcv_pages(fd, tidbuf);
if (pinned <= 0) {
- ret = pinned;
- goto bail;
+ ret = (pinned < 0) ? pinned : -ENOSPC;
+ goto fail_unpin;
}
- fd->tid_n_pinned += npages;
/* Find sets of physically contiguous pages */
- npagesets = find_phys_blocks(pages, pinned, pagesets);
+ tidbuf->n_psets = find_phys_blocks(tidbuf, pinned);
- /*
- * We don't need to access this under a lock since tid_used is per
- * process and the same process cannot be in hfi1_user_exp_rcv_clear()
- * and hfi1_user_exp_rcv_setup() at the same time.
- */
+ /* Reserve the number of expected tids to be used. */
spin_lock(&fd->tid_lock);
- if (fd->tid_used + npagesets > fd->tid_limit)
+ if (fd->tid_used + tidbuf->n_psets > fd->tid_limit)
pageset_count = fd->tid_limit - fd->tid_used;
else
- pageset_count = npagesets;
+ pageset_count = tidbuf->n_psets;
+ fd->tid_used += pageset_count;
spin_unlock(&fd->tid_lock);
- if (!pageset_count)
- goto bail;
+ if (!pageset_count) {
+ ret = -ENOSPC;
+ goto fail_unreserve;
+ }
ngroups = pageset_count / dd->rcv_entries.group_size;
tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL);
if (!tidlist) {
ret = -ENOMEM;
- goto nomem;
+ goto fail_unreserve;
}
tididx = 0;
@@ -440,7 +321,7 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
* From this point on, we are going to be using shared (between master
* and subcontexts) context resources. We need to take the lock.
*/
- mutex_lock(&uctxt->exp_lock);
+ mutex_lock(&uctxt->exp_mutex);
/*
* The first step is to program the RcvArray entries which are complete
* groups.
@@ -449,9 +330,9 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
struct tid_group *grp =
tid_group_pop(&uctxt->tid_group_list);
- ret = program_rcvarray(fp, vaddr, grp, pagesets,
- pageidx, dd->rcv_entries.group_size,
- pages, tidlist, &tididx, &mapped);
+ ret = program_rcvarray(fd, tidbuf, grp,
+ dd->rcv_entries.group_size,
+ tidlist, &tididx, &mapped);
/*
* If there was a failure to program the RcvArray
* entries for the entire group, reset the grp fields
@@ -466,11 +347,10 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
tid_group_add_tail(grp, &uctxt->tid_full_list);
ngroups--;
- pageidx += ret;
mapped_pages += mapped;
}
- while (pageidx < pageset_count) {
+ while (tididx < pageset_count) {
struct tid_group *grp, *ptr;
/*
* If we don't have any partially used tid groups, check
@@ -492,28 +372,26 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
*/
list_for_each_entry_safe(grp, ptr, &uctxt->tid_used_list.list,
list) {
- unsigned use = min_t(unsigned, pageset_count - pageidx,
+ unsigned use = min_t(unsigned, pageset_count - tididx,
grp->size - grp->used);
- ret = program_rcvarray(fp, vaddr, grp, pagesets,
- pageidx, use, pages, tidlist,
+ ret = program_rcvarray(fd, tidbuf, grp,
+ use, tidlist,
&tididx, &mapped);
if (ret < 0) {
hfi1_cdbg(TID,
"Failed to program RcvArray entries %d",
ret);
- ret = -EFAULT;
goto unlock;
} else if (ret > 0) {
if (grp->used == grp->size)
tid_group_move(grp,
&uctxt->tid_used_list,
&uctxt->tid_full_list);
- pageidx += ret;
mapped_pages += mapped;
need_group = 0;
/* Check if we are done so we break out early */
- if (pageidx >= pageset_count)
+ if (tididx >= pageset_count)
break;
} else if (WARN_ON(ret == 0)) {
/*
@@ -527,70 +405,100 @@ int hfi1_user_exp_rcv_setup(struct file *fp, struct hfi1_tid_info *tinfo)
}
}
unlock:
- mutex_unlock(&uctxt->exp_lock);
-nomem:
+ mutex_unlock(&uctxt->exp_mutex);
hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx,
mapped_pages, ret);
- if (tididx) {
- spin_lock(&fd->tid_lock);
- fd->tid_used += tididx;
- spin_unlock(&fd->tid_lock);
- tinfo->tidcnt = tididx;
- tinfo->length = mapped_pages * PAGE_SIZE;
-
- if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist,
- tidlist, sizeof(tidlist[0]) * tididx)) {
- /*
- * On failure to copy to the user level, we need to undo
- * everything done so far so we don't leak resources.
- */
- tinfo->tidlist = (unsigned long)&tidlist;
- hfi1_user_exp_rcv_clear(fp, tinfo);
- tinfo->tidlist = 0;
- ret = -EFAULT;
- goto bail;
+
+ /* fail if nothing was programmed, set error if none provided */
+ if (tididx == 0) {
+ if (ret >= 0)
+ ret = -ENOSPC;
+ goto fail_unreserve;
+ }
+
+ /* adjust reserved tid_used to actual count */
+ spin_lock(&fd->tid_lock);
+ fd->tid_used -= pageset_count - tididx;
+ spin_unlock(&fd->tid_lock);
+
+ /* unpin all pages not covered by a TID */
+ unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages,
+ false);
+
+ if (fd->use_mn) {
+ /* check for an invalidate during setup */
+ bool fail = false;
+
+ mutex_lock(&tidbuf->cover_mutex);
+ fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq);
+ mutex_unlock(&tidbuf->cover_mutex);
+
+ if (fail) {
+ ret = -EBUSY;
+ goto fail_unprogram;
}
}
- /*
- * If not everything was mapped (due to insufficient RcvArray entries,
- * for example), unpin all unmapped pages so we can pin them nex time.
- */
- if (mapped_pages != pinned) {
- hfi1_release_user_pages(fd->mm, &pages[mapped_pages],
- pinned - mapped_pages,
- false);
- fd->tid_n_pinned -= pinned - mapped_pages;
+ tinfo->tidcnt = tididx;
+ tinfo->length = mapped_pages * PAGE_SIZE;
+
+ if (copy_to_user(u64_to_user_ptr(tinfo->tidlist),
+ tidlist, sizeof(tidlist[0]) * tididx)) {
+ ret = -EFAULT;
+ goto fail_unprogram;
}
-bail:
- kfree(pagesets);
- kfree(pages);
+
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(&tidbuf->notifier);
+ kfree(tidbuf->pages);
+ kfree(tidbuf->psets);
+ kfree(tidbuf);
kfree(tidlist);
- return ret > 0 ? 0 : ret;
+ return 0;
+
+fail_unprogram:
+ /* unprogram, unmap, and unpin all allocated TIDs */
+ tinfo->tidlist = (unsigned long)tidlist;
+ hfi1_user_exp_rcv_clear(fd, tinfo);
+ tinfo->tidlist = 0;
+ pinned = 0; /* nothing left to unpin */
+ pageset_count = 0; /* nothing left reserved */
+fail_unreserve:
+ spin_lock(&fd->tid_lock);
+ fd->tid_used -= pageset_count;
+ spin_unlock(&fd->tid_lock);
+fail_unpin:
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(&tidbuf->notifier);
+ if (pinned > 0)
+ unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false);
+fail_release_mem:
+ kfree(tidbuf->pages);
+ kfree(tidbuf->psets);
+ kfree(tidbuf);
+ kfree(tidlist);
+ return ret;
}
-int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
+int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo)
{
int ret = 0;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
u32 *tidinfo;
unsigned tididx;
- tidinfo = kcalloc(tinfo->tidcnt, sizeof(*tidinfo), GFP_KERNEL);
- if (!tidinfo)
- return -ENOMEM;
+ if (unlikely(tinfo->tidcnt > fd->tid_used))
+ return -EINVAL;
- if (copy_from_user(tidinfo, (void __user *)(unsigned long)
- tinfo->tidlist, sizeof(tidinfo[0]) *
- tinfo->tidcnt)) {
- ret = -EFAULT;
- goto done;
- }
+ tidinfo = memdup_array_user(u64_to_user_ptr(tinfo->tidlist),
+ tinfo->tidcnt, sizeof(tidinfo[0]));
+ if (IS_ERR(tidinfo))
+ return PTR_ERR(tidinfo);
- mutex_lock(&uctxt->exp_lock);
+ mutex_lock(&uctxt->exp_mutex);
for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
- ret = unprogram_rcvarray(fp, tidinfo[tididx], NULL);
+ ret = unprogram_rcvarray(fd, tidinfo[tididx]);
if (ret) {
hfi1_cdbg(TID, "Failed to unprogram rcv array %d",
ret);
@@ -601,25 +509,21 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
fd->tid_used -= tididx;
spin_unlock(&fd->tid_lock);
tinfo->tidcnt = tididx;
- mutex_unlock(&uctxt->exp_lock);
-done:
+ mutex_unlock(&uctxt->exp_mutex);
+
kfree(tidinfo);
return ret;
}
-int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo)
+int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
unsigned long *ev = uctxt->dd->events +
- (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS) + fd->subctxt);
+ (uctxt_offset(uctxt) + fd->subctxt);
u32 *array;
int ret = 0;
- if (!fd->invalid_tids)
- return -EINVAL;
-
/*
* copy_to_user() can sleep, which will leave the invalid_lock
* locked and cause the MMU notifier to be blocked on the lock
@@ -658,11 +562,12 @@ int hfi1_user_exp_rcv_invalid(struct file *fp, struct hfi1_tid_info *tinfo)
return ret;
}
-static u32 find_phys_blocks(struct page **pages, unsigned npages,
- struct tid_pageset *list)
+static u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages)
{
unsigned pagecount, pageidx, setcount = 0, i;
unsigned long pfn, this_pfn;
+ struct page **pages = tidbuf->pages;
+ struct tid_pageset *list = tidbuf->psets;
if (!npages)
return 0;
@@ -724,14 +629,13 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages,
/**
* program_rcvarray() - program an RcvArray group with receive buffers
- * @fp: file pointer
- * @vaddr: starting user virtual address
+ * @fd: filedata pointer
+ * @tbuf: pointer to struct tid_user_buf that has the user buffer starting
+ * virtual address, buffer length, page pointers, pagesets (array of
+ * struct tid_pageset holding information on physically contiguous
+ * chunks from the user buffer), and other fields.
* @grp: RcvArray group
- * @sets: array of struct tid_pageset holding information on physically
- * contiguous chunks from the user buffer
- * @start: starting index into sets array
* @count: number of struct tid_pageset's to program
- * @pages: an array of struct page * for the user buffer
* @tidlist: the array of u32 elements when the information about the
* programmed RcvArray entries is to be encoded.
* @tididx: starting offset into tidlist
@@ -749,16 +653,15 @@ static u32 find_phys_blocks(struct page **pages, unsigned npages,
* -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or
* number of RcvArray entries programmed.
*/
-static int program_rcvarray(struct file *fp, unsigned long vaddr,
- struct tid_group *grp,
- struct tid_pageset *sets,
- unsigned start, u16 count, struct page **pages,
- u32 *tidlist, unsigned *tididx, unsigned *pmapped)
+static int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *tbuf,
+ struct tid_group *grp, u16 count,
+ u32 *tidlist, unsigned int *tididx,
+ unsigned int *pmapped)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
u16 idx;
+ unsigned int start = *tididx;
u32 tidinfo = 0, rcventry, useidx = 0;
int mapped = 0;
@@ -793,18 +696,17 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr,
}
rcventry = grp->base + useidx;
- npages = sets[setidx].count;
- pageidx = sets[setidx].idx;
+ npages = tbuf->psets[setidx].count;
+ pageidx = tbuf->psets[setidx].idx;
- ret = set_rcvarray_entry(fp, vaddr + (pageidx * PAGE_SIZE),
- rcventry, grp, pages + pageidx,
+ ret = set_rcvarray_entry(fd, tbuf,
+ rcventry, grp, pageidx,
npages);
if (ret)
return ret;
mapped += npages;
- tidinfo = rcventry2tidinfo(rcventry - uctxt->expected_base) |
- EXP_TID_SET(LEN, npages);
+ tidinfo = create_tid(rcventry - uctxt->expected_base, npages);
tidlist[(*tididx)++] = tidinfo;
grp->used++;
grp->map |= 1 << useidx++;
@@ -818,29 +720,28 @@ static int program_rcvarray(struct file *fp, unsigned long vaddr,
return idx;
}
-static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
+static int set_rcvarray_entry(struct hfi1_filedata *fd,
+ struct tid_user_buf *tbuf,
u32 rcventry, struct tid_group *grp,
- struct page **pages, unsigned npages)
+ u16 pageidx, unsigned int npages)
{
int ret;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct tid_rb_node *node;
struct hfi1_devdata *dd = uctxt->dd;
dma_addr_t phys;
+ struct page **pages = tbuf->pages + pageidx;
/*
* Allocate the node first so we can handle a potential
* failure before we've programmed anything.
*/
- node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages),
- GFP_KERNEL);
+ node = kzalloc(struct_size(node, pages, npages), GFP_KERNEL);
if (!node)
return -ENOMEM;
- phys = pci_map_single(dd->pcidev,
- __va(page_to_phys(pages[0])),
- npages * PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ phys = dma_map_single(&dd->pcidev->dev, __va(page_to_phys(pages[0])),
+ npages * PAGE_SIZE, DMA_FROM_DEVICE);
if (dma_mapping_error(&dd->pcidev->dev, phys)) {
dd_dev_err(dd, "Failed to DMA map Exp Rcv pages 0x%llx\n",
phys);
@@ -848,91 +749,100 @@ static int set_rcvarray_entry(struct file *fp, unsigned long vaddr,
return -EFAULT;
}
- node->mmu.addr = vaddr;
- node->mmu.len = npages * PAGE_SIZE;
+ node->fdata = fd;
+ mutex_init(&node->invalidate_mutex);
node->phys = page_to_phys(pages[0]);
node->npages = npages;
node->rcventry = rcventry;
node->dma_addr = phys;
node->grp = grp;
node->freed = false;
- memcpy(node->pages, pages, sizeof(struct page *) * npages);
-
- if (!fd->handler)
- ret = tid_rb_insert(fd, &node->mmu);
- else
- ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu);
+ memcpy(node->pages, pages, flex_array_size(node, pages, npages));
- if (ret) {
- hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
- node->rcventry, node->mmu.addr, node->phys, ret);
- pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
- kfree(node);
- return -EFAULT;
+ if (fd->use_mn) {
+ ret = mmu_interval_notifier_insert(
+ &node->notifier, current->mm,
+ tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE,
+ &tid_mn_ops);
+ if (ret)
+ goto out_unmap;
}
+ fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node;
+
hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1);
trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
- node->mmu.addr, node->phys, phys);
+ node->notifier.interval_tree.start, node->phys,
+ phys);
return 0;
+
+out_unmap:
+ hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d",
+ node->rcventry, node->notifier.interval_tree.start,
+ node->phys, ret);
+ dma_unmap_single(&dd->pcidev->dev, phys, npages * PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ kfree(node);
+ return -EFAULT;
}
-static int unprogram_rcvarray(struct file *fp, u32 tidinfo,
- struct tid_group **grp)
+static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo)
{
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
struct tid_rb_node *node;
- u8 tidctrl = EXP_TID_GET(tidinfo, CTRL);
+ u32 tidctrl = EXP_TID_GET(tidinfo, CTRL);
u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry;
- if (tididx >= uctxt->expected_count) {
- dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n",
- tididx, uctxt->ctxt);
- return -EINVAL;
- }
-
- if (tidctrl == 0x3)
+ if (tidctrl == 0x3 || tidctrl == 0x0)
return -EINVAL;
rcventry = tididx + (tidctrl - 1);
+ if (rcventry >= uctxt->expected_count) {
+ dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n",
+ rcventry, uctxt->ctxt);
+ return -EINVAL;
+ }
+
node = fd->entry_to_rb[rcventry];
if (!node || node->rcventry != (uctxt->expected_base + rcventry))
return -EBADF;
- if (grp)
- *grp = node->grp;
-
- if (!fd->handler)
- cacheless_tid_rb_remove(fd, node);
- else
- hfi1_mmu_rb_remove(fd->handler, &node->mmu);
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(&node->notifier);
+ cacheless_tid_rb_remove(fd, node);
return 0;
}
-static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
+static void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd = uctxt->dd;
+ mutex_lock(&node->invalidate_mutex);
+ if (node->freed)
+ goto done;
+ node->freed = true;
+
trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry,
- node->npages, node->mmu.addr, node->phys,
+ node->npages,
+ node->notifier.interval_tree.start, node->phys,
node->dma_addr);
- hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0);
- /*
- * Make sure device has seen the write before we unpin the
- * pages.
- */
- flush_wc();
+ /* Make sure device has seen the write before pages are unpinned */
+ hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0);
+
+ unpin_rcv_pages(fd, NULL, node, 0, node->npages, true);
+done:
+ mutex_unlock(&node->invalidate_mutex);
+}
+
+static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
+{
+ struct hfi1_ctxtdata *uctxt = fd->uctxt;
- pci_unmap_single(dd->pcidev, node->dma_addr, node->mmu.len,
- PCI_DMA_FROMDEVICE);
- hfi1_release_user_pages(fd->mm, node->pages, node->npages, true);
- fd->tid_n_pinned -= node->npages;
+ __clear_tid_node(fd, node);
node->grp->used--;
node->grp->map &= ~(1 << (node->rcventry - node->grp->base));
@@ -970,39 +880,43 @@ static void unlock_exp_tids(struct hfi1_ctxtdata *uctxt,
if (!node || node->rcventry != rcventry)
continue;
+ if (fd->use_mn)
+ mmu_interval_notifier_remove(
+ &node->notifier);
cacheless_tid_rb_remove(fd, node);
}
}
}
}
-/*
- * Always return 0 from this function. A non-zero return indicates that the
- * remove operation will be called and that memory should be unpinned.
- * However, the driver cannot unpin out from under PSM. Instead, retain the
- * memory (by returning 0) and inform PSM that the memory is going away. PSM
- * will call back later when it has removed the memory from its list.
- */
-static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
+static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct hfi1_filedata *fdata = arg;
- struct hfi1_ctxtdata *uctxt = fdata->uctxt;
struct tid_rb_node *node =
- container_of(mnode, struct tid_rb_node, mmu);
+ container_of(mni, struct tid_rb_node, notifier);
+ struct hfi1_filedata *fdata = node->fdata;
+ struct hfi1_ctxtdata *uctxt = fdata->uctxt;
if (node->freed)
- return 0;
+ return true;
+
+ /* take action only if unmapping */
+ if (range->event != MMU_NOTIFY_UNMAP)
+ return true;
- trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, node->mmu.addr,
+ trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt,
+ node->notifier.interval_tree.start,
node->rcventry, node->npages, node->dma_addr);
- node->freed = true;
+
+ /* clear the hardware rcvarray entry */
+ __clear_tid_node(fdata, node);
spin_lock(&fdata->invalid_lock);
if (fdata->invalid_tid_idx < uctxt->expected_count) {
fdata->invalid_tids[fdata->invalid_tid_idx] =
- rcventry2tidinfo(node->rcventry - uctxt->expected_base);
- fdata->invalid_tids[fdata->invalid_tid_idx] |=
- EXP_TID_SET(LEN, node->npages);
+ create_tid(node->rcventry - uctxt->expected_base,
+ node->npages);
if (!fdata->invalid_tid_idx) {
unsigned long *ev;
@@ -1016,25 +930,30 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
* process in question.
*/
ev = uctxt->dd->events +
- (((uctxt->ctxt - uctxt->dd->first_user_ctxt) *
- HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
+ (uctxt_offset(uctxt) + fdata->subctxt);
set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
}
fdata->invalid_tid_idx++;
}
spin_unlock(&fdata->invalid_lock);
- return 0;
+ return true;
}
-static int tid_rb_insert(void *arg, struct mmu_rb_node *node)
+static bool tid_cover_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct hfi1_filedata *fdata = arg;
- struct tid_rb_node *tnode =
- container_of(node, struct tid_rb_node, mmu);
- u32 base = fdata->uctxt->expected_base;
+ struct tid_user_buf *tidbuf =
+ container_of(mni, struct tid_user_buf, notifier);
+
+ /* take action only if unmapping */
+ if (range->event == MMU_NOTIFY_UNMAP) {
+ mutex_lock(&tidbuf->cover_mutex);
+ mmu_interval_set_seq(mni, cur_seq);
+ mutex_unlock(&tidbuf->cover_mutex);
+ }
- fdata->entry_to_rb[tnode->rcventry - base] = tnode;
- return 0;
+ return true;
}
static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
@@ -1045,12 +964,3 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
fdata->entry_to_rb[tnode->rcventry - base] = NULL;
clear_tid_node(fdata, tnode);
}
-
-static void tid_rb_remove(void *arg, struct mmu_rb_node *node)
-{
- struct hfi1_filedata *fdata = arg;
- struct tid_rb_node *tnode =
- container_of(node, struct tid_rb_node, mmu);
-
- cacheless_tid_rb_remove(fdata, tnode);
-}
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 9bc8d9fba87e..055726f7c139 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
@@ -1,79 +1,66 @@
-#ifndef _HFI1_USER_EXP_RCV_H
-#define _HFI1_USER_EXP_RCV_H
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2020 - Cornelis Networks, Inc.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*/
+#ifndef _HFI1_USER_EXP_RCV_H
+#define _HFI1_USER_EXP_RCV_H
+
#include "hfi.h"
+#include "exp_rcv.h"
+
+struct tid_pageset {
+ u16 idx;
+ u16 count;
+};
+
+struct tid_user_buf {
+ struct mmu_interval_notifier notifier;
+ struct mutex cover_mutex;
+ unsigned long vaddr;
+ unsigned long length;
+ unsigned int npages;
+ struct page **pages;
+ struct tid_pageset *psets;
+ unsigned int n_psets;
+};
+
+struct tid_rb_node {
+ struct mmu_interval_notifier notifier;
+ struct hfi1_filedata *fdata;
+ struct mutex invalidate_mutex; /* covers hw removal */
+ unsigned long phys;
+ struct tid_group *grp;
+ u32 rcventry;
+ dma_addr_t dma_addr;
+ bool freed;
+ unsigned int npages;
+ struct page *pages[] __counted_by(npages);
+};
+
+static inline int num_user_pages(unsigned long addr,
+ unsigned long len)
+{
+ const unsigned long spage = addr & PAGE_MASK;
+ const unsigned long epage = (addr + len - 1) & PAGE_MASK;
-#define EXP_TID_TIDLEN_MASK 0x7FFULL
-#define EXP_TID_TIDLEN_SHIFT 0
-#define EXP_TID_TIDCTRL_MASK 0x3ULL
-#define EXP_TID_TIDCTRL_SHIFT 20
-#define EXP_TID_TIDIDX_MASK 0x3FFULL
-#define EXP_TID_TIDIDX_SHIFT 22
-#define EXP_TID_GET(tid, field) \
- (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK)
+ return 1 + ((epage - spage) >> PAGE_SHIFT);
+}
-#define EXP_TID_SET(field, value) \
- (((value) & EXP_TID_TID##field##_MASK) << \
- EXP_TID_TID##field##_SHIFT)
-#define EXP_TID_CLEAR(tid, field) ({ \
- (tid) &= ~(EXP_TID_TID##field##_MASK << \
- EXP_TID_TID##field##_SHIFT); \
- })
-#define EXP_TID_RESET(tid, field, value) do { \
- EXP_TID_CLEAR(tid, field); \
- (tid) |= EXP_TID_SET(field, (value)); \
- } while (0)
+int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt);
+void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd);
+int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo);
+int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo);
+int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd,
+ struct hfi1_tid_info *tinfo);
-int hfi1_user_exp_rcv_init(struct file *);
-int hfi1_user_exp_rcv_free(struct hfi1_filedata *);
-int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *);
-int hfi1_user_exp_rcv_clear(struct file *, struct hfi1_tid_info *);
-int hfi1_user_exp_rcv_invalid(struct file *, struct hfi1_tid_info *);
+static inline struct mm_struct *mm_from_tid_node(struct tid_rb_node *node)
+{
+ return node->notifier.mm;
+}
#endif /* _HFI1_USER_EXP_RCV_H */
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 20f4ddcac3b0..c77913a7920f 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -1,52 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015-2017 Intel Corporation.
*/
#include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/device.h>
#include <linux/module.h>
@@ -71,48 +29,65 @@ MODULE_PARM_DESC(cache_size, "Send and receive side cache size limit (in MB)");
bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
u32 nlocked, u32 npages)
{
- unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
- size = (cache_size * (1UL << 20)); /* convert to bytes */
- unsigned usr_ctxts = dd->num_rcv_contexts - dd->first_user_ctxt;
- bool can_lock = capable(CAP_IPC_LOCK);
+ unsigned long ulimit_pages;
+ unsigned long cache_limit_pages;
+ unsigned int usr_ctxts;
/*
- * Calculate per-cache size. The calculation below uses only a quarter
- * of the available per-context limit. This leaves space for other
- * pinning. Should we worry about shared ctxts?
+ * Perform RLIMIT_MEMLOCK based checks unless CAP_IPC_LOCK is present.
*/
- cache_limit = (ulimit / usr_ctxts) / 4;
-
- /* If ulimit isn't set to "unlimited" and is smaller than cache_size. */
- if (ulimit != (-1UL) && size > cache_limit)
- size = cache_limit;
-
- /* Convert to number of pages */
- size = DIV_ROUND_UP(size, PAGE_SIZE);
-
- down_read(&mm->mmap_sem);
- pinned = mm->pinned_vm;
- up_read(&mm->mmap_sem);
+ if (!capable(CAP_IPC_LOCK)) {
+ ulimit_pages =
+ DIV_ROUND_DOWN_ULL(rlimit(RLIMIT_MEMLOCK), PAGE_SIZE);
+
+ /*
+ * Pinning these pages would exceed this process's locked memory
+ * limit.
+ */
+ if (atomic64_read(&mm->pinned_vm) + npages > ulimit_pages)
+ return false;
+
+ /*
+ * Only allow 1/4 of the user's RLIMIT_MEMLOCK to be used for HFI
+ * caches. This fraction is then equally distributed among all
+ * existing user contexts. Note that if RLIMIT_MEMLOCK is
+ * 'unlimited' (-1), the value of this limit will be > 2^42 pages
+ * (2^64 / 2^12 / 2^8 / 2^2).
+ *
+ * The effectiveness of this check may be reduced if I/O occurs on
+ * some user contexts before all user contexts are created. This
+ * check assumes that this process is the only one using this
+ * context (e.g., the corresponding fd was not passed to another
+ * process for concurrent access) as there is no per-context,
+ * per-process tracking of pinned pages. It also assumes that each
+ * user context has only one cache to limit.
+ */
+ usr_ctxts = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
+ if (nlocked + npages > (ulimit_pages / usr_ctxts / 4))
+ return false;
+ }
- /* First, check the absolute limit against all pinned pages. */
- if (pinned + npages >= ulimit && !can_lock)
+ /*
+ * Pinning these pages would exceed the size limit for this cache.
+ */
+ cache_limit_pages = cache_size * (1024 * 1024) / PAGE_SIZE;
+ if (nlocked + npages > cache_limit_pages)
return false;
- return ((nlocked + npages) <= size) || can_lock;
+ return true;
}
int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t npages,
bool writable, struct page **pages)
{
int ret;
+ unsigned int gup_flags = FOLL_LONGTERM | (writable ? FOLL_WRITE : 0);
- ret = get_user_pages_fast(vaddr, npages, writable, pages);
+ ret = pin_user_pages_fast(vaddr, npages, gup_flags, pages);
if (ret < 0)
return ret;
- down_write(&mm->mmap_sem);
- mm->pinned_vm += ret;
- up_write(&mm->mmap_sem);
+ atomic64_add(ret, &mm->pinned_vm);
return ret;
}
@@ -120,17 +95,9 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
size_t npages, bool dirty)
{
- size_t i;
-
- for (i = 0; i < npages; i++) {
- if (dirty)
- set_page_dirty_lock(p[i]);
- put_page(p[i]);
- }
+ unpin_user_pages_dirty_lock(p, npages, dirty);
if (mm) { /* during close after signal, mm can be NULL */
- down_write(&mm->mmap_sem);
- mm->pinned_vm -= npages;
- up_write(&mm->mmap_sem);
+ atomic64_sub(npages, &mm->pinned_vm);
}
}
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 7d22f8ee98ef..9b1aece1b080 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1,49 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2020 - 2023 Cornelis Networks, Inc.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*/
+
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/device.h>
@@ -60,6 +20,7 @@
#include <linux/mmu_context.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
+#include <linux/string.h>
#include "hfi.h"
#include "sdma.h"
@@ -67,292 +28,68 @@
#include "verbs.h" /* for the headers */
#include "common.h" /* for struct hfi1_tid_info */
#include "trace.h"
-#include "mmu_rb.h"
static uint hfi1_sdma_comp_ring_size = 128;
module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO);
MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128");
-/* The maximum number of Data io vectors per message/request */
-#define MAX_VECTORS_PER_REQ 8
-/*
- * Maximum number of packet to send from each message/request
- * before moving to the next one.
- */
-#define MAX_PKTS_PER_QUEUE 16
-
-#define num_pages(x) (1 + ((((x) - 1) & PAGE_MASK) >> PAGE_SHIFT))
-
-#define req_opcode(x) \
- (((x) >> HFI1_SDMA_REQ_OPCODE_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK)
-#define req_version(x) \
- (((x) >> HFI1_SDMA_REQ_VERSION_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK)
-#define req_iovcnt(x) \
- (((x) >> HFI1_SDMA_REQ_IOVCNT_SHIFT) & HFI1_SDMA_REQ_IOVCNT_MASK)
-
-/* Number of BTH.PSN bits used for sequence number in expected rcvs */
-#define BTH_SEQ_MASK 0x7ffull
-
-/*
- * Define fields in the KDETH header so we can update the header
- * template.
- */
-#define KDETH_OFFSET_SHIFT 0
-#define KDETH_OFFSET_MASK 0x7fff
-#define KDETH_OM_SHIFT 15
-#define KDETH_OM_MASK 0x1
-#define KDETH_TID_SHIFT 16
-#define KDETH_TID_MASK 0x3ff
-#define KDETH_TIDCTRL_SHIFT 26
-#define KDETH_TIDCTRL_MASK 0x3
-#define KDETH_INTR_SHIFT 28
-#define KDETH_INTR_MASK 0x1
-#define KDETH_SH_SHIFT 29
-#define KDETH_SH_MASK 0x1
-#define KDETH_HCRC_UPPER_SHIFT 16
-#define KDETH_HCRC_UPPER_MASK 0xff
-#define KDETH_HCRC_LOWER_SHIFT 24
-#define KDETH_HCRC_LOWER_MASK 0xff
-
-#define AHG_KDETH_INTR_SHIFT 12
-#define AHG_KDETH_SH_SHIFT 13
-
-#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
-#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
-
-#define KDETH_GET(val, field) \
- (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK)
-#define KDETH_SET(dw, field, val) do { \
- u32 dwval = le32_to_cpu(dw); \
- dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \
- dwval |= (((val) & KDETH_##field##_MASK) << \
- KDETH_##field##_SHIFT); \
- dw = cpu_to_le32(dwval); \
- } while (0)
-
-#define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \
- do { \
- if ((idx) < ARRAY_SIZE((arr))) \
- (arr)[(idx++)] = sdma_build_ahg_descriptor( \
- (__force u16)(value), (dw), (bit), \
- (width)); \
- else \
- return -ERANGE; \
- } while (0)
-
-/* KDETH OM multipliers and switch over point */
-#define KDETH_OM_SMALL 4
-#define KDETH_OM_LARGE 64
-#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
-
-/* Tx request flag bits */
-#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
-#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
-
-/* SDMA request flag bits */
-#define SDMA_REQ_FOR_THREAD 1
-#define SDMA_REQ_SEND_DONE 2
-#define SDMA_REQ_HAVE_AHG 3
-#define SDMA_REQ_HAS_ERROR 4
-#define SDMA_REQ_DONE_ERROR 5
-
-#define SDMA_PKT_Q_INACTIVE BIT(0)
-#define SDMA_PKT_Q_ACTIVE BIT(1)
-#define SDMA_PKT_Q_DEFERRED BIT(2)
-
-/*
- * Maximum retry attempts to submit a TX request
- * before putting the process to sleep.
- */
-#define MAX_DEFER_RETRY_COUNT 1
-
static unsigned initial_pkt_count = 8;
-#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
-
-struct sdma_mmu_node;
-
-struct user_sdma_iovec {
- struct list_head list;
- struct iovec iov;
- /* number of pages in this vector */
- unsigned npages;
- /* array of pinned pages for this vector */
- struct page **pages;
- /*
- * offset into the virtual address space of the vector at
- * which we last left off.
- */
- u64 offset;
- struct sdma_mmu_node *node;
-};
-
-struct sdma_mmu_node {
- struct mmu_rb_node rb;
- struct hfi1_user_sdma_pkt_q *pq;
- atomic_t refcount;
- struct page **pages;
- unsigned npages;
-};
-
-/* evict operation argument */
-struct evict_data {
- u32 cleared; /* count evicted so far */
- u32 target; /* target count to evict */
-};
-
-struct user_sdma_request {
- struct sdma_req_info info;
- struct hfi1_user_sdma_pkt_q *pq;
- struct hfi1_user_sdma_comp_q *cq;
- /* This is the original header from user space */
- struct hfi1_pkt_header hdr;
- /*
- * Pointer to the SDMA engine for this request.
- * Since different request could be on different VLs,
- * each request will need it's own engine pointer.
- */
- struct sdma_engine *sde;
- u8 ahg_idx;
- u32 ahg[9];
- /*
- * KDETH.Offset (Eager) field
- * We need to remember the initial value so the headers
- * can be updated properly.
- */
- u32 koffset;
- /*
- * KDETH.OFFSET (TID) field
- * The offset can cover multiple packets, depending on the
- * size of the TID entry.
- */
- u32 tidoffset;
- /*
- * KDETH.OM
- * Remember this because the header template always sets it
- * to 0.
- */
- u8 omfactor;
- /*
- * We copy the iovs for this request (based on
- * info.iovcnt). These are only the data vectors
- */
- unsigned data_iovs;
- /* total length of the data in the request */
- u32 data_len;
- /* progress index moving along the iovs array */
- unsigned iov_idx;
- struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
- /* number of elements copied to the tids array */
- u16 n_tids;
- /* TID array values copied from the tid_iov vector */
- u32 *tids;
- u16 tididx;
- u32 sent;
- u64 seqnum;
- u64 seqcomp;
- u64 seqsubmitted;
- struct list_head txps;
- unsigned long flags;
- /* status of the last txreq completed */
- int status;
-};
-
-/*
- * A single txreq could span up to 3 physical pages when the MTU
- * is sufficiently large (> 4K). Each of the IOV pointers also
- * needs it's own set of flags so the vector has been handled
- * independently of each other.
- */
-struct user_sdma_txreq {
- /* Packet header for the txreq */
- struct hfi1_pkt_header hdr;
- struct sdma_txreq txreq;
- struct list_head list;
- struct user_sdma_request *req;
- u16 flags;
- unsigned busycount;
- u64 seqnum;
-};
-
-#define SDMA_DBG(req, fmt, ...) \
- hfi1_cdbg(SDMA, "[%u:%u:%u:%u] " fmt, (req)->pq->dd->unit, \
- (req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \
- ##__VA_ARGS__)
-#define SDMA_Q_DBG(pq, fmt, ...) \
- hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \
- (pq)->subctxt, ##__VA_ARGS__)
-
-static int user_sdma_send_pkts(struct user_sdma_request *, unsigned);
-static int num_user_pages(const struct iovec *);
-static void user_sdma_txreq_cb(struct sdma_txreq *, int);
-static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
-static void user_sdma_free_request(struct user_sdma_request *, bool);
-static int pin_vector_pages(struct user_sdma_request *,
- struct user_sdma_iovec *);
-static void unpin_vector_pages(struct mm_struct *, struct page **, unsigned,
- unsigned);
-static int check_header_template(struct user_sdma_request *,
- struct hfi1_pkt_header *, u32, u32);
-static int set_txreq_header(struct user_sdma_request *,
- struct user_sdma_txreq *, u32);
-static int set_txreq_header_ahg(struct user_sdma_request *,
- struct user_sdma_txreq *, u32);
-static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *,
- struct hfi1_user_sdma_comp_q *,
- u16, enum hfi1_sdma_comp_state, int);
-static inline u32 set_pkt_bth_psn(__be32, u8, u32);
+static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts);
+static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
+static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
+static void user_sdma_free_request(struct user_sdma_request *req);
+static int check_header_template(struct user_sdma_request *req,
+ struct hfi1_pkt_header *hdr, u32 lrhlen,
+ u32 datalen);
+static int set_txreq_header(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx, u32 datalen);
+static int set_txreq_header_ahg(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx, u32 len);
+static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
+ struct hfi1_user_sdma_comp_q *cq,
+ u16 idx, enum hfi1_sdma_comp_state state,
+ int ret);
+static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags);
static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
static int defer_packet_queue(
- struct sdma_engine *,
- struct iowait *,
- struct sdma_txreq *,
- unsigned seq);
-static void activate_packet_queue(struct iowait *, int);
-static bool sdma_rb_filter(struct mmu_rb_node *, unsigned long, unsigned long);
-static int sdma_rb_insert(void *, struct mmu_rb_node *);
-static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
- void *arg2, bool *stop);
-static void sdma_rb_remove(void *, struct mmu_rb_node *);
-static int sdma_rb_invalidate(void *, struct mmu_rb_node *);
-
-static struct mmu_rb_ops sdma_rb_ops = {
- .filter = sdma_rb_filter,
- .insert = sdma_rb_insert,
- .evict = sdma_rb_evict,
- .remove = sdma_rb_remove,
- .invalidate = sdma_rb_invalidate
-};
+ struct sdma_engine *sde,
+ struct iowait_work *wait,
+ struct sdma_txreq *txreq,
+ uint seq,
+ bool pkts_sent);
+static void activate_packet_queue(struct iowait *wait, int reason);
static int defer_packet_queue(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *txreq,
- unsigned seq)
+ uint seq,
+ bool pkts_sent)
{
struct hfi1_user_sdma_pkt_q *pq =
- container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
- struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
- struct user_sdma_txreq *tx =
- container_of(txreq, struct user_sdma_txreq, txreq);
+ container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
- if (sdma_progress(sde, seq, txreq)) {
- if (tx->busycount++ < MAX_DEFER_RETRY_COUNT)
- goto eagain;
- }
+ write_seqlock(&sde->waitlock);
+ trace_hfi1_usdma_defer(pq, sde, &pq->busy);
+ if (sdma_progress(sde, seq, txreq))
+ goto eagain;
/*
* We are assuming that if the list is enqueued somewhere, it
* is to the dmawait list since that is the only place where
* it is supposed to be enqueued.
*/
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
- write_seqlock(&dev->iowait_lock);
- if (list_empty(&pq->busy.list))
- list_add_tail(&pq->busy.list, &sde->dmawait);
- write_sequnlock(&dev->iowait_lock);
+ if (list_empty(&pq->busy.list)) {
+ pq->busy.lock = &sde->waitlock;
+ iowait_get_priority(&pq->busy);
+ iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
+ }
+ write_sequnlock(&sde->waitlock);
return -EBUSY;
eagain:
+ write_sequnlock(&sde->waitlock);
return -EAGAIN;
}
@@ -360,150 +97,146 @@ static void activate_packet_queue(struct iowait *wait, int reason)
{
struct hfi1_user_sdma_pkt_q *pq =
container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
+
+ trace_hfi1_usdma_activate(pq, wait, reason);
xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
wake_up(&wait->wait_dma);
};
-static void sdma_kmem_cache_ctor(void *obj)
-{
- struct user_sdma_txreq *tx = obj;
-
- memset(tx, 0, sizeof(*tx));
-}
-
-int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
+int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
+ struct hfi1_filedata *fd)
{
- struct hfi1_filedata *fd;
- int ret = 0;
- unsigned memsize;
+ int ret = -ENOMEM;
char buf[64];
struct hfi1_devdata *dd;
struct hfi1_user_sdma_comp_q *cq;
struct hfi1_user_sdma_pkt_q *pq;
- unsigned long flags;
-
- if (!uctxt || !fp) {
- ret = -EBADF;
- goto done;
- }
- fd = fp->private_data;
+ if (!uctxt || !fd)
+ return -EBADF;
- if (!hfi1_sdma_comp_ring_size) {
- ret = -EINVAL;
- goto done;
- }
+ if (!hfi1_sdma_comp_ring_size)
+ return -EINVAL;
dd = uctxt->dd;
pq = kzalloc(sizeof(*pq), GFP_KERNEL);
if (!pq)
- goto pq_nomem;
-
- memsize = sizeof(*pq->reqs) * hfi1_sdma_comp_ring_size;
- pq->reqs = kzalloc(memsize, GFP_KERNEL);
- if (!pq->reqs)
- goto pq_reqs_nomem;
-
- memsize = BITS_TO_LONGS(hfi1_sdma_comp_ring_size) * sizeof(long);
- pq->req_in_use = kzalloc(memsize, GFP_KERNEL);
- if (!pq->req_in_use)
- goto pq_reqs_no_in_use;
-
- INIT_LIST_HEAD(&pq->list);
+ return -ENOMEM;
pq->dd = dd;
pq->ctxt = uctxt->ctxt;
pq->subctxt = fd->subctxt;
pq->n_max_reqs = hfi1_sdma_comp_ring_size;
- pq->state = SDMA_PKT_Q_INACTIVE;
atomic_set(&pq->n_reqs, 0);
init_waitqueue_head(&pq->wait);
atomic_set(&pq->n_locked, 0);
- pq->mm = fd->mm;
- iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
- activate_packet_queue, NULL);
+ iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
+ activate_packet_queue, NULL, NULL);
pq->reqidx = 0;
+
+ pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
+ sizeof(*pq->reqs),
+ GFP_KERNEL);
+ if (!pq->reqs)
+ goto pq_reqs_nomem;
+
+ pq->req_in_use = bitmap_zalloc(hfi1_sdma_comp_ring_size, GFP_KERNEL);
+ if (!pq->req_in_use)
+ goto pq_reqs_no_in_use;
+
snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
fd->subctxt);
pq->txreq_cache = kmem_cache_create(buf,
- sizeof(struct user_sdma_txreq),
+ sizeof(struct user_sdma_txreq),
L1_CACHE_BYTES,
SLAB_HWCACHE_ALIGN,
- sdma_kmem_cache_ctor);
+ NULL);
if (!pq->txreq_cache) {
dd_dev_err(dd, "[%u] Failed to allocate TxReq cache\n",
uctxt->ctxt);
goto pq_txreq_nomem;
}
- fd->pq = pq;
+
cq = kzalloc(sizeof(*cq), GFP_KERNEL);
if (!cq)
goto cq_nomem;
- memsize = PAGE_ALIGN(sizeof(*cq->comps) * hfi1_sdma_comp_ring_size);
- cq->comps = vmalloc_user(memsize);
+ cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps)
+ * hfi1_sdma_comp_ring_size));
if (!cq->comps)
goto cq_comps_nomem;
cq->nentries = hfi1_sdma_comp_ring_size;
- fd->cq = cq;
- ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
- &pq->handler);
- if (ret) {
- dd_dev_err(dd, "Failed to register with MMU %d", ret);
- goto done;
- }
+ ret = hfi1_init_system_pinning(pq);
+ if (ret)
+ goto pq_mmu_fail;
- spin_lock_irqsave(&uctxt->sdma_qlock, flags);
- list_add(&pq->list, &uctxt->sdma_queues);
- spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
- goto done;
+ rcu_assign_pointer(fd->pq, pq);
+ fd->cq = cq;
+
+ return 0;
+pq_mmu_fail:
+ vfree(cq->comps);
cq_comps_nomem:
kfree(cq);
cq_nomem:
kmem_cache_destroy(pq->txreq_cache);
pq_txreq_nomem:
- kfree(pq->req_in_use);
+ bitmap_free(pq->req_in_use);
pq_reqs_no_in_use:
kfree(pq->reqs);
pq_reqs_nomem:
kfree(pq);
- fd->pq = NULL;
-pq_nomem:
- ret = -ENOMEM;
-done:
+
return ret;
}
-int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
+static void flush_pq_iowait(struct hfi1_user_sdma_pkt_q *pq)
{
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct hfi1_user_sdma_pkt_q *pq;
unsigned long flags;
+ seqlock_t *lock = pq->busy.lock;
+
+ if (!lock)
+ return;
+ write_seqlock_irqsave(lock, flags);
+ if (!list_empty(&pq->busy.list)) {
+ list_del_init(&pq->busy.list);
+ pq->busy.lock = NULL;
+ }
+ write_sequnlock_irqrestore(lock, flags);
+}
+
+int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt)
+{
+ struct hfi1_user_sdma_pkt_q *pq;
- hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit,
- uctxt->ctxt, fd->subctxt);
- pq = fd->pq;
+ trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt);
+
+ spin_lock(&fd->pq_rcu_lock);
+ pq = srcu_dereference_check(fd->pq, &fd->pq_srcu,
+ lockdep_is_held(&fd->pq_rcu_lock));
if (pq) {
- if (pq->handler)
- hfi1_mmu_rb_unregister(pq->handler);
- spin_lock_irqsave(&uctxt->sdma_qlock, flags);
- if (!list_empty(&pq->list))
- list_del_init(&pq->list);
- spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
+ rcu_assign_pointer(fd->pq, NULL);
+ spin_unlock(&fd->pq_rcu_lock);
+ synchronize_srcu(&fd->pq_srcu);
+ /* at this point there can be no more new requests */
iowait_sdma_drain(&pq->busy);
/* Wait until all requests have been freed. */
wait_event_interruptible(
pq->wait,
- (ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE));
+ !atomic_read(&pq->n_reqs));
kfree(pq->reqs);
- kfree(pq->req_in_use);
+ hfi1_free_system_pinning(pq);
+ bitmap_free(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
+ flush_pq_iowait(pq);
kfree(pq);
- fd->pq = NULL;
+ } else {
+ spin_unlock(&fd->pq_rcu_lock);
}
if (fd->cq) {
vfree(fd->cq->comps);
@@ -534,13 +267,21 @@ static u8 dlid_to_selector(u16 dlid)
return mapping[hash];
}
-int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
- unsigned long dim, unsigned long *count)
+/**
+ * hfi1_user_sdma_process_request() - Process and start a user sdma request
+ * @fd: valid file descriptor
+ * @iovec: array of io vectors to process
+ * @dim: overall iovec array size
+ * @count: number of io vector array entries processed
+ */
+int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
+ struct iovec *iovec, unsigned long dim,
+ unsigned long *count)
{
int ret = 0, i;
- struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct hfi1_user_sdma_pkt_q *pq = fd->pq;
+ struct hfi1_user_sdma_pkt_q *pq =
+ srcu_dereference(fd->pq, &fd->pq_srcu);
struct hfi1_user_sdma_comp_q *cq = fd->cq;
struct hfi1_devdata *dd = pq->dd;
unsigned long idx = 0;
@@ -548,7 +289,8 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
struct sdma_req_info info;
struct user_sdma_request *req;
u8 opcode, sc, vl;
- int req_queued = 0;
+ u16 pkey;
+ u32 slid;
u16 dlid;
u32 selector;
@@ -569,7 +311,6 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt,
(u16 *)&info);
-
if (info.comp_idx >= hfi1_sdma_comp_ring_size) {
hfi1_cdbg(SDMA,
"[%u:%u:%u:%u] Invalid comp index",
@@ -606,18 +347,28 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
/*
* All safety checks have been done and this request has been claimed.
*/
- hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit,
- uctxt->ctxt, fd->subctxt, info.comp_idx);
+ trace_hfi1_sdma_user_process_request(dd, uctxt->ctxt, fd->subctxt,
+ info.comp_idx);
req = pq->reqs + info.comp_idx;
- memset(req, 0, sizeof(*req));
req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
+ req->data_len = 0;
req->pq = pq;
req->cq = cq;
- req->status = -1;
+ req->ahg_idx = -1;
+ req->iov_idx = 0;
+ req->sent = 0;
+ req->seqnum = 0;
+ req->seqcomp = 0;
+ req->seqsubmitted = 0;
+ req->tids = NULL;
+ req->has_error = 0;
INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info));
+ /* The request is initialized, count it */
+ atomic_inc(&pq->n_reqs);
+
if (req_opcode(info.ctrl) == EXPECTED) {
/* expected must have a TID info and at least one data vector */
if (req->data_iovs < 2) {
@@ -635,6 +386,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
ret = -EINVAL;
goto free_req;
}
+
/* Copy the header from the user buffer */
ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info),
sizeof(req->hdr));
@@ -672,8 +424,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
}
/* Checking P_KEY for requests from user-space */
- if (egress_pkey_check(dd->pport, req->hdr.lrh, req->hdr.bth, sc,
- PKEY_CHECK_INVALID)) {
+ pkey = (u16)be32_to_cpu(req->hdr.bth[0]);
+ slid = be16_to_cpu(req->hdr.lrh[3]);
+ if (egress_pkey_check(dd->pport, slid, pkey, sc, PKEY_CHECK_INVALID)) {
ret = -EINVAL;
goto free_req;
}
@@ -697,22 +450,25 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) *
(KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
KDETH_OM_LARGE : KDETH_OM_SMALL);
- SDMA_DBG(req, "Initial TID offset %u", req->tidoffset);
+ trace_hfi1_sdma_user_initial_tidoffset(dd, uctxt->ctxt, fd->subctxt,
+ info.comp_idx, req->tidoffset);
idx++;
/* Save all the IO vector structures */
for (i = 0; i < req->data_iovs; i++) {
+ req->iovs[i].offset = 0;
INIT_LIST_HEAD(&req->iovs[i].list);
- memcpy(&req->iovs[i].iov, iovec + idx++, sizeof(struct iovec));
- ret = pin_vector_pages(req, &req->iovs[i]);
- if (ret) {
- req->status = ret;
+ memcpy(&req->iovs[i].iov,
+ iovec + idx++,
+ sizeof(req->iovs[i].iov));
+ if (req->iovs[i].iov.iov_len == 0) {
+ ret = -EINVAL;
goto free_req;
}
req->data_len += req->iovs[i].iov.iov_len;
}
- SDMA_DBG(req, "total data length %u", req->data_len);
-
+ trace_hfi1_sdma_user_data_length(dd, uctxt->ctxt, fd->subctxt,
+ info.comp_idx, req->data_len);
if (pcount > req->info.npkts)
pcount = req->info.npkts;
/*
@@ -725,31 +481,30 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
*/
if (req_opcode(req->info.ctrl) == EXPECTED) {
u16 ntids = iovec[idx].iov_len / sizeof(*req->tids);
+ u32 *tmp;
if (!ntids || ntids > MAX_TID_PAIR_ENTRIES) {
ret = -EINVAL;
goto free_req;
}
- req->tids = kcalloc(ntids, sizeof(*req->tids), GFP_KERNEL);
- if (!req->tids) {
- ret = -ENOMEM;
- goto free_req;
- }
+
/*
* We have to copy all of the tids because they may vary
* in size and, therefore, the TID count might not be
* equal to the pkt count. However, there is no way to
* tell at this point.
*/
- ret = copy_from_user(req->tids, iovec[idx].iov_base,
- ntids * sizeof(*req->tids));
- if (ret) {
- SDMA_DBG(req, "Failed to copy %d TIDs (%d)",
- ntids, ret);
- ret = -EFAULT;
+ tmp = memdup_array_user(iovec[idx].iov_base,
+ ntids, sizeof(*req->tids));
+ if (IS_ERR(tmp)) {
+ ret = PTR_ERR(tmp);
+ SDMA_DBG(req, "Failed to copy %d TIDs (%pe)", ntids,
+ tmp);
goto free_req;
}
+ req->tids = tmp;
req->n_tids = ntids;
+ req->tididx = 0;
idx++;
}
@@ -764,33 +519,11 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
}
/* We don't need an AHG entry if the request contains only one packet */
- if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG)) {
- int ahg = sdma_ahg_alloc(req->sde);
-
- if (likely(ahg >= 0)) {
- req->ahg_idx = (u8)ahg;
- set_bit(SDMA_REQ_HAVE_AHG, &req->flags);
- }
- }
+ if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG))
+ req->ahg_idx = sdma_ahg_alloc(req->sde);
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
- atomic_inc(&pq->n_reqs);
- req_queued = 1;
- /* Send the first N packets in the request to buy us some time */
- ret = user_sdma_send_pkts(req, pcount);
- if (unlikely(ret < 0 && ret != -EBUSY)) {
- req->status = ret;
- goto free_req;
- }
-
- /*
- * It is possible that the SDMA engine would have processed all the
- * submitted packets by the time we get here. Therefore, only set
- * packet queue state to ACTIVE if there are still uncompleted
- * requests.
- */
- if (atomic_read(&pq->n_reqs))
- xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
+ pq->state = SDMA_PKT_Q_ACTIVE;
/*
* This is a somewhat blocking send implementation.
@@ -798,31 +531,39 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
* request have been submitted to the SDMA engine. However, it
* will not wait for send completions.
*/
- while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
+ while (req->seqsubmitted != req->info.npkts) {
ret = user_sdma_send_pkts(req, pcount);
if (ret < 0) {
- if (ret != -EBUSY) {
- req->status = ret;
- set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
- if (ACCESS_ONCE(req->seqcomp) ==
- req->seqsubmitted - 1)
- goto free_req;
- return ret;
- }
- wait_event_interruptible_timeout(
+ int we_ret;
+
+ if (ret != -EBUSY)
+ goto free_req;
+ we_ret = wait_event_interruptible_timeout(
pq->busy.wait_dma,
- (pq->state == SDMA_PKT_Q_ACTIVE),
+ pq->state == SDMA_PKT_Q_ACTIVE,
msecs_to_jiffies(
SDMA_IOWAIT_TIMEOUT));
+ trace_hfi1_usdma_we(pq, we_ret);
+ if (we_ret <= 0)
+ flush_pq_iowait(pq);
}
}
*count += idx;
return 0;
free_req:
- user_sdma_free_request(req, true);
- if (req_queued)
+ /*
+ * If the submitted seqsubmitted == npkts, the completion routine
+ * controls the final state. If sequbmitted < npkts, wait for any
+ * outstanding packets to finish before cleaning up.
+ */
+ if (req->seqsubmitted < req->info.npkts) {
+ if (req->seqsubmitted)
+ wait_event(pq->busy.wait_dma,
+ (req->seqcomp == req->seqsubmitted - 1));
+ user_sdma_free_request(req);
pq_update(pq);
- set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
+ set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
+ }
return ret;
}
@@ -874,7 +615,11 @@ static inline u32 compute_data_length(struct user_sdma_request *req,
} else {
len = min(req->data_len - req->sent, (u32)req->info.fragsize);
}
- SDMA_DBG(req, "Data Length = %u", len);
+ trace_hfi1_sdma_user_compute_length(req->pq->dd,
+ req->pq->ctxt,
+ req->pq->subctxt,
+ req->info.comp_idx,
+ len);
return len;
}
@@ -891,9 +636,46 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len);
}
-static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
+static int user_sdma_txadd_ahg(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx,
+ u32 datalen)
{
- int ret = 0, count;
+ int ret;
+ u16 pbclen = le16_to_cpu(req->hdr.pbc[0]);
+ u32 lrhlen = get_lrh_len(req->hdr, pad_len(datalen));
+ struct hfi1_user_sdma_pkt_q *pq = req->pq;
+
+ /*
+ * Copy the request header into the tx header
+ * because the HW needs a cacheline-aligned
+ * address.
+ * This copy can be optimized out if the hdr
+ * member of user_sdma_request were also
+ * cacheline aligned.
+ */
+ memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr));
+ if (PBC2LRH(pbclen) != lrhlen) {
+ pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen);
+ tx->hdr.pbc[0] = cpu_to_le16(pbclen);
+ }
+ ret = check_header_template(req, &tx->hdr, lrhlen, datalen);
+ if (ret)
+ return ret;
+ ret = sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_AHG_COPY,
+ sizeof(tx->hdr) + datalen, req->ahg_idx,
+ 0, NULL, 0, user_sdma_txreq_cb);
+ if (ret)
+ return ret;
+ ret = sdma_txadd_kvaddr(pq->dd, &tx->txreq, &tx->hdr, sizeof(tx->hdr));
+ if (ret)
+ sdma_txclean(pq->dd, &tx->txreq);
+ return ret;
+}
+
+static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
+{
+ int ret = 0;
+ u16 count;
unsigned npkts = 0;
struct user_sdma_txreq *tx = NULL;
struct hfi1_user_sdma_pkt_q *pq = NULL;
@@ -905,10 +687,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
pq = req->pq;
/* If tx completion has reported an error, we are done. */
- if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
- set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+ if (READ_ONCE(req->has_error))
return -EFAULT;
- }
/*
* Check if we might have sent the entire request already
@@ -923,18 +703,15 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
maxpkts = req->info.npkts - req->seqnum;
while (npkts < maxpkts) {
- u32 datalen = 0, queued = 0, data_sent = 0;
- u64 iov_offset = 0;
+ u32 datalen = 0;
/*
* Check whether any of the completions have come back
* with errors. If so, we are not going to process any
* more packets from this request.
*/
- if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
- set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+ if (READ_ONCE(req->has_error))
return -EFAULT;
- }
tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
if (!tx)
@@ -942,7 +719,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
tx->flags = 0;
tx->req = req;
- tx->busycount = 0;
INIT_LIST_HEAD(&tx->list);
/*
@@ -960,10 +736,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
*/
if (req->data_len) {
iovec = &req->iovs[req->iov_idx];
- if (ACCESS_ONCE(iovec->offset) == iovec->iov.iov_len) {
+ if (READ_ONCE(iovec->offset) == iovec->iov.iov_len) {
if (++req->iov_idx == req->data_iovs) {
ret = -EFAULT;
- goto free_txreq;
+ goto free_tx;
}
iovec = &req->iovs[req->iov_idx];
WARN_ON(iovec->offset);
@@ -989,53 +765,20 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
}
}
- if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) {
+ if (req->ahg_idx >= 0) {
if (!req->seqnum) {
- u16 pbclen = le16_to_cpu(req->hdr.pbc[0]);
- u32 lrhlen = get_lrh_len(req->hdr,
- pad_len(datalen));
- /*
- * Copy the request header into the tx header
- * because the HW needs a cacheline-aligned
- * address.
- * This copy can be optimized out if the hdr
- * member of user_sdma_request were also
- * cacheline aligned.
- */
- memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr));
- if (PBC2LRH(pbclen) != lrhlen) {
- pbclen = (pbclen & 0xf000) |
- LRH2PBC(lrhlen);
- tx->hdr.pbc[0] = cpu_to_le16(pbclen);
- }
- ret = check_header_template(req, &tx->hdr,
- lrhlen, datalen);
+ ret = user_sdma_txadd_ahg(req, tx, datalen);
if (ret)
goto free_tx;
- ret = sdma_txinit_ahg(&tx->txreq,
- SDMA_TXREQ_F_AHG_COPY,
- sizeof(tx->hdr) + datalen,
- req->ahg_idx, 0, NULL, 0,
- user_sdma_txreq_cb);
- if (ret)
- goto free_tx;
- ret = sdma_txadd_kvaddr(pq->dd, &tx->txreq,
- &tx->hdr,
- sizeof(tx->hdr));
- if (ret)
- goto free_txreq;
} else {
int changes;
changes = set_txreq_header_ahg(req, tx,
datalen);
- if (changes < 0)
+ if (changes < 0) {
+ ret = changes;
goto free_tx;
- sdma_txinit_ahg(&tx->txreq,
- SDMA_TXREQ_F_USE_AHG,
- datalen, req->ahg_idx, changes,
- req->ahg, sizeof(req->hdr),
- user_sdma_txreq_cb);
+ }
}
} else {
ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
@@ -1053,52 +796,17 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
goto free_txreq;
}
- /*
- * If the request contains any data vectors, add up to
- * fragsize bytes to the descriptor.
- */
- while (queued < datalen &&
- (req->sent + data_sent) < req->data_len) {
- unsigned long base, offset;
- unsigned pageidx, len;
-
- base = (unsigned long)iovec->iov.iov_base;
- offset = offset_in_page(base + iovec->offset +
- iov_offset);
- pageidx = (((iovec->offset + iov_offset +
- base) - (base & PAGE_MASK)) >> PAGE_SHIFT);
- len = offset + req->info.fragsize > PAGE_SIZE ?
- PAGE_SIZE - offset : req->info.fragsize;
- len = min((datalen - queued), len);
- ret = sdma_txadd_page(pq->dd, &tx->txreq,
- iovec->pages[pageidx],
- offset, len);
- if (ret) {
- SDMA_DBG(req, "SDMA txreq add page failed %d\n",
- ret);
- goto free_txreq;
- }
- iov_offset += len;
- queued += len;
- data_sent += len;
- if (unlikely(queued < datalen &&
- pageidx == iovec->npages &&
- req->iov_idx < req->data_iovs - 1)) {
- iovec->offset += iov_offset;
- iovec = &req->iovs[++req->iov_idx];
- iov_offset = 0;
- }
- }
- /*
- * The txreq was submitted successfully so we can update
- * the counters.
- */
req->koffset += datalen;
if (req_opcode(req->info.ctrl) == EXPECTED)
req->tidoffset += datalen;
- req->sent += data_sent;
- if (req->data_len)
- iovec->offset += iov_offset;
+ req->sent += datalen;
+ while (datalen) {
+ ret = hfi1_add_pages_to_sdma_packet(req, tx, iovec,
+ &datalen);
+ if (ret)
+ goto free_txreq;
+ iovec = &req->iovs[req->iov_idx];
+ }
list_add_tail(&tx->txreq.list, &req->txps);
/*
* It is important to increment this here as it is used to
@@ -1109,17 +817,18 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
npkts++;
}
dosend:
- ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
+ ret = sdma_send_txlist(req->sde,
+ iowait_get_ib_work(&pq->busy),
+ &req->txps, &count);
req->seqsubmitted += count;
if (req->seqsubmitted == req->info.npkts) {
- set_bit(SDMA_REQ_SEND_DONE, &req->flags);
/*
* The txreq has already been submitted to the HW queue
* so we can free the AHG entry now. Corruption will not
* happen due to the sequential manner in which
* descriptors are processed.
*/
- if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
+ if (req->ahg_idx >= 0)
sdma_ahg_free(req->sde, req->ahg_idx);
}
return ret;
@@ -1131,122 +840,6 @@ free_tx:
return ret;
}
-/*
- * How many pages in this iovec element?
- */
-static inline int num_user_pages(const struct iovec *iov)
-{
- const unsigned long addr = (unsigned long)iov->iov_base;
- const unsigned long len = iov->iov_len;
- const unsigned long spage = addr & PAGE_MASK;
- const unsigned long epage = (addr + len - 1) & PAGE_MASK;
-
- return 1 + ((epage - spage) >> PAGE_SHIFT);
-}
-
-static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
-{
- struct evict_data evict_data;
-
- evict_data.cleared = 0;
- evict_data.target = npages;
- hfi1_mmu_rb_evict(pq->handler, &evict_data);
- return evict_data.cleared;
-}
-
-static int pin_vector_pages(struct user_sdma_request *req,
- struct user_sdma_iovec *iovec)
-{
- int ret = 0, pinned, npages, cleared;
- struct page **pages;
- struct hfi1_user_sdma_pkt_q *pq = req->pq;
- struct sdma_mmu_node *node = NULL;
- struct mmu_rb_node *rb_node;
-
- rb_node = hfi1_mmu_rb_extract(pq->handler,
- (unsigned long)iovec->iov.iov_base,
- iovec->iov.iov_len);
- if (rb_node)
- node = container_of(rb_node, struct sdma_mmu_node, rb);
- else
- rb_node = NULL;
-
- if (!node) {
- node = kzalloc(sizeof(*node), GFP_KERNEL);
- if (!node)
- return -ENOMEM;
-
- node->rb.addr = (unsigned long)iovec->iov.iov_base;
- node->pq = pq;
- atomic_set(&node->refcount, 0);
- }
-
- npages = num_user_pages(&iovec->iov);
- if (node->npages < npages) {
- pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
- if (!pages) {
- SDMA_DBG(req, "Failed page array alloc");
- ret = -ENOMEM;
- goto bail;
- }
- memcpy(pages, node->pages, node->npages * sizeof(*pages));
-
- npages -= node->npages;
-
-retry:
- if (!hfi1_can_pin_pages(pq->dd, pq->mm,
- atomic_read(&pq->n_locked), npages)) {
- cleared = sdma_cache_evict(pq, npages);
- if (cleared >= npages)
- goto retry;
- }
- pinned = hfi1_acquire_user_pages(pq->mm,
- ((unsigned long)iovec->iov.iov_base +
- (node->npages * PAGE_SIZE)), npages, 0,
- pages + node->npages);
- if (pinned < 0) {
- kfree(pages);
- ret = pinned;
- goto bail;
- }
- if (pinned != npages) {
- unpin_vector_pages(pq->mm, pages, node->npages,
- pinned);
- ret = -EFAULT;
- goto bail;
- }
- kfree(node->pages);
- node->rb.len = iovec->iov.iov_len;
- node->pages = pages;
- node->npages += pinned;
- npages = node->npages;
- atomic_add(pinned, &pq->n_locked);
- }
- iovec->pages = node->pages;
- iovec->npages = npages;
- iovec->node = node;
-
- ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb);
- if (ret) {
- atomic_sub(node->npages, &pq->n_locked);
- iovec->node = NULL;
- goto bail;
- }
- return 0;
-bail:
- if (rb_node)
- unpin_vector_pages(pq->mm, node->pages, 0, node->npages);
- kfree(node);
- return ret;
-}
-
-static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
- unsigned start, unsigned npages)
-{
- hfi1_release_user_pages(mm, pages + start, npages, false);
- kfree(pages);
-}
-
static int check_header_template(struct user_sdma_request *req,
struct hfi1_pkt_header *hdr, u32 lrhlen,
u32 datalen)
@@ -1310,7 +903,8 @@ static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags)
0xffffffull),
psn = val & mask;
if (expct)
- psn = (psn & ~BTH_SEQ_MASK) | ((psn + frags) & BTH_SEQ_MASK);
+ psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) |
+ ((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK);
else
psn = psn + frags;
return psn & mask;
@@ -1321,6 +915,7 @@ static int set_txreq_header(struct user_sdma_request *req,
{
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct hfi1_pkt_header *hdr = &tx->hdr;
+ u8 omfactor; /* KDETH.OM */
u16 pbclen;
int ret;
u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
@@ -1398,8 +993,9 @@ static int set_txreq_header(struct user_sdma_request *req,
}
tidval = req->tids[req->tididx];
}
- req->omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
- KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE : KDETH_OM_SMALL;
+ omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
+ KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT :
+ KDETH_OM_SMALL_SHIFT;
/* Set KDETH.TIDCtrl based on value for this TID. */
KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL,
EXP_TID_GET(tidval, CTRL));
@@ -1413,13 +1009,14 @@ static int set_txreq_header(struct user_sdma_request *req,
* Set the KDETH.OFFSET and KDETH.OM based on size of
* transfer.
*/
- SDMA_DBG(req, "TID offset %ubytes %uunits om%u",
- req->tidoffset, req->tidoffset / req->omfactor,
- req->omfactor != KDETH_OM_SMALL);
+ trace_hfi1_sdma_user_tid_info(
+ pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx,
+ req->tidoffset, req->tidoffset >> omfactor,
+ omfactor != KDETH_OM_SMALL_SHIFT);
KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
- req->tidoffset / req->omfactor);
+ req->tidoffset >> omfactor);
KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
- req->omfactor != KDETH_OM_SMALL);
+ omfactor != KDETH_OM_SMALL_SHIFT);
}
done:
trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt,
@@ -1428,21 +1025,28 @@ done:
}
static int set_txreq_header_ahg(struct user_sdma_request *req,
- struct user_sdma_txreq *tx, u32 len)
+ struct user_sdma_txreq *tx, u32 datalen)
{
- int diff = 0;
+ u32 ahg[AHG_KDETH_ARRAY_SIZE];
+ int idx = 0;
+ u8 omfactor; /* KDETH.OM */
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct hfi1_pkt_header *hdr = &req->hdr;
u16 pbclen = le16_to_cpu(hdr->pbc[0]);
- u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len));
+ u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
+ size_t array_size = ARRAY_SIZE(ahg);
if (PBC2LRH(pbclen) != lrhlen) {
/* PBC.PbcLengthDWs */
- AHG_HEADER_SET(req->ahg, diff, 0, 0, 12,
- cpu_to_le16(LRH2PBC(lrhlen)));
+ idx = ahg_header_set(ahg, idx, array_size, 0, 0, 12,
+ (__force u16)cpu_to_le16(LRH2PBC(lrhlen)));
+ if (idx < 0)
+ return idx;
/* LRH.PktLen (we need the full 16 bits due to byte swap) */
- AHG_HEADER_SET(req->ahg, diff, 3, 0, 16,
- cpu_to_be16(lrhlen >> 2));
+ idx = ahg_header_set(ahg, idx, array_size, 3, 0, 16,
+ (__force u16)cpu_to_be16(lrhlen >> 2));
+ if (idx < 0)
+ return idx;
}
/*
@@ -1453,13 +1057,23 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
(HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff);
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
val32 |= 1UL << 31;
- AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
- AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
+ idx = ahg_header_set(ahg, idx, array_size, 6, 0, 16,
+ (__force u16)cpu_to_be16(val32 >> 16));
+ if (idx < 0)
+ return idx;
+ idx = ahg_header_set(ahg, idx, array_size, 6, 16, 16,
+ (__force u16)cpu_to_be16(val32 & 0xffff));
+ if (idx < 0)
+ return idx;
/* KDETH.Offset */
- AHG_HEADER_SET(req->ahg, diff, 15, 0, 16,
- cpu_to_le16(req->koffset & 0xffff));
- AHG_HEADER_SET(req->ahg, diff, 15, 16, 16,
- cpu_to_le16(req->koffset >> 16));
+ idx = ahg_header_set(ahg, idx, array_size, 15, 0, 16,
+ (__force u16)cpu_to_le16(req->koffset & 0xffff));
+ if (idx < 0)
+ return idx;
+ idx = ahg_header_set(ahg, idx, array_size, 15, 16, 16,
+ (__force u16)cpu_to_le16(req->koffset >> 16));
+ if (idx < 0)
+ return idx;
if (req_opcode(req->info.ctrl) == EXPECTED) {
__le16 val;
@@ -1477,19 +1091,22 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
* we have to check again.
*/
if (++req->tididx > req->n_tids - 1 ||
- !req->tids[req->tididx]) {
+ !req->tids[req->tididx])
return -EINVAL;
- }
tidval = req->tids[req->tididx];
}
- req->omfactor = ((EXP_TID_GET(tidval, LEN) *
+ omfactor = ((EXP_TID_GET(tidval, LEN) *
PAGE_SIZE) >=
- KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE :
- KDETH_OM_SMALL;
+ KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
+ KDETH_OM_SMALL_SHIFT;
/* KDETH.OM and KDETH.OFFSET (TID) */
- AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
- ((!!(req->omfactor - KDETH_OM_SMALL)) << 15 |
- ((req->tidoffset / req->omfactor) & 0x7fff)));
+ idx = ahg_header_set(
+ ahg, idx, array_size, 7, 0, 16,
+ ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
+ ((req->tidoffset >> omfactor)
+ & 0x7fff)));
+ if (idx < 0)
+ return idx;
/* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
(EXP_TID_GET(tidval, IDX) & 0x3ff));
@@ -1506,20 +1123,33 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
AHG_KDETH_INTR_SHIFT));
}
- AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
+ idx = ahg_header_set(ahg, idx, array_size,
+ 7, 16, 14, (__force u16)val);
+ if (idx < 0)
+ return idx;
}
trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
req->info.comp_idx, req->sde->this_idx,
- req->ahg_idx, req->ahg, diff, tidval);
- return diff;
+ req->ahg_idx, ahg, idx, tidval);
+ sdma_txinit_ahg(&tx->txreq,
+ SDMA_TXREQ_F_USE_AHG,
+ datalen, req->ahg_idx, idx,
+ ahg, sizeof(req->hdr),
+ user_sdma_txreq_cb);
+
+ return idx;
}
-/*
- * SDMA tx request completion callback. Called when the SDMA progress
- * state machine gets notification that the SDMA descriptors for this
- * tx request have been processed by the DMA engine. Called in
- * interrupt context.
+/**
+ * user_sdma_txreq_cb() - SDMA tx request completion callback.
+ * @txreq: valid sdma tx request
+ * @status: success/failure of request
+ *
+ * Called when the SDMA progress state machine gets notification that
+ * the SDMA descriptors for this tx request have been processed by the
+ * DMA engine. Called in interrupt context.
+ * Only do work on completed sequences.
*/
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
{
@@ -1528,7 +1158,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
struct user_sdma_request *req;
struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq;
- u16 idx;
+ enum hfi1_sdma_comp_state state = COMPLETE;
if (!tx->req)
return;
@@ -1540,43 +1170,29 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
if (status != SDMA_TXREQ_S_OK) {
SDMA_DBG(req, "SDMA completion with error %d",
status);
- set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
+ WRITE_ONCE(req->has_error, 1);
+ state = ERROR;
}
req->seqcomp = tx->seqnum;
kmem_cache_free(pq->txreq_cache, tx);
- tx = NULL;
-
- idx = req->info.comp_idx;
- if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
- if (req->seqcomp == req->info.npkts - 1) {
- req->status = 0;
- user_sdma_free_request(req, false);
- pq_update(pq);
- set_comp_state(pq, cq, idx, COMPLETE, 0);
- }
- } else {
- if (status != SDMA_TXREQ_S_OK)
- req->status = status;
- if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
- (test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
- test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
- user_sdma_free_request(req, false);
- pq_update(pq);
- set_comp_state(pq, cq, idx, ERROR, req->status);
- }
- }
+
+ /* sequence isn't complete? We are done */
+ if (req->seqcomp != req->info.npkts - 1)
+ return;
+
+ user_sdma_free_request(req);
+ set_comp_state(pq, cq, req->info.comp_idx, state, status);
+ pq_update(pq);
}
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
{
- if (atomic_dec_and_test(&pq->n_reqs)) {
- xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
+ if (atomic_dec_and_test(&pq->n_reqs))
wake_up(&pq->wait);
- }
}
-static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
+static void user_sdma_free_request(struct user_sdma_request *req)
{
if (!list_empty(&req->txps)) {
struct sdma_txreq *t, *p;
@@ -1589,22 +1205,7 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
kmem_cache_free(req->pq->txreq_cache, tx);
}
}
- if (req->data_iovs) {
- struct sdma_mmu_node *node;
- int i;
-
- for (i = 0; i < req->data_iovs; i++) {
- node = req->iovs[i].node;
- if (!node)
- continue;
-
- if (unpin)
- hfi1_mmu_rb_remove(req->pq->handler,
- &node->rb);
- else
- atomic_dec(&node->refcount);
- }
- }
+
kfree(req->tids);
clear_bit(req->info.comp_idx, req->pq->req_in_use);
}
@@ -1614,74 +1215,10 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
u16 idx, enum hfi1_sdma_comp_state state,
int ret)
{
- hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
- pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
- cq->comps[idx].status = state;
if (state == ERROR)
cq->comps[idx].errcode = -ret;
+ smp_wmb(); /* make sure errcode is visible first */
+ cq->comps[idx].status = state;
trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
idx, state, ret);
}
-
-static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
- unsigned long len)
-{
- return (bool)(node->addr == addr);
-}
-
-static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode)
-{
- struct sdma_mmu_node *node =
- container_of(mnode, struct sdma_mmu_node, rb);
-
- atomic_inc(&node->refcount);
- return 0;
-}
-
-/*
- * Return 1 to remove the node from the rb tree and call the remove op.
- *
- * Called with the rb tree lock held.
- */
-static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
- void *evict_arg, bool *stop)
-{
- struct sdma_mmu_node *node =
- container_of(mnode, struct sdma_mmu_node, rb);
- struct evict_data *evict_data = evict_arg;
-
- /* is this node still being used? */
- if (atomic_read(&node->refcount))
- return 0; /* keep this node */
-
- /* this node will be evicted, add its pages to our count */
- evict_data->cleared += node->npages;
-
- /* have enough pages been cleared? */
- if (evict_data->cleared >= evict_data->target)
- *stop = true;
-
- return 1; /* remove this node */
-}
-
-static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode)
-{
- struct sdma_mmu_node *node =
- container_of(mnode, struct sdma_mmu_node, rb);
-
- atomic_sub(node->npages, &node->pq->n_locked);
-
- unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages);
-
- kfree(node);
-}
-
-static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
-{
- struct sdma_mmu_node *node =
- container_of(mnode, struct sdma_mmu_node, rb);
-
- if (!atomic_read(&node->refcount))
- return 1;
- return 0;
-}
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index 39001714f551..8735524e3a9a 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -1,62 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2023 - Cornelis Networks, Inc.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*/
+#ifndef _HFI1_USER_SDMA_H
+#define _HFI1_USER_SDMA_H
+
#include <linux/device.h>
#include <linux/wait.h>
#include "common.h"
#include "iowait.h"
#include "user_exp_rcv.h"
+#include "mmu_rb.h"
+#include "pinning.h"
+#include "sdma.h"
+
+/* The maximum number of Data io vectors per message/request */
+#define MAX_VECTORS_PER_REQ 8
+/*
+ * Maximum number of packet to send from each message/request
+ * before moving to the next one.
+ */
+#define MAX_PKTS_PER_QUEUE 16
+
+#define num_pages(x) (1 + ((((x) - 1) & PAGE_MASK) >> PAGE_SHIFT))
+
+#define req_opcode(x) \
+ (((x) >> HFI1_SDMA_REQ_OPCODE_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK)
+#define req_version(x) \
+ (((x) >> HFI1_SDMA_REQ_VERSION_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK)
+#define req_iovcnt(x) \
+ (((x) >> HFI1_SDMA_REQ_IOVCNT_SHIFT) & HFI1_SDMA_REQ_IOVCNT_MASK)
+
+/* Number of BTH.PSN bits used for sequence number in expected rcvs */
+#define BTH_SEQ_MASK 0x7ffull
+
+#define AHG_KDETH_INTR_SHIFT 12
+#define AHG_KDETH_SH_SHIFT 13
+#define AHG_KDETH_ARRAY_SIZE 9
-extern uint extended_psn;
+#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
+#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
+
+/**
+ * Build an SDMA AHG header update descriptor and save it to an array.
+ * @arr - Array to save the descriptor to.
+ * @idx - Index of the array at which the descriptor will be saved.
+ * @array_size - Size of the array arr.
+ * @dw - Update index into the header in DWs.
+ * @bit - Start bit.
+ * @width - Field width.
+ * @value - 16 bits of immediate data to write into the field.
+ * Returns -ERANGE if idx is invalid. If successful, returns the next index
+ * (idx + 1) of the array to be used for the next descriptor.
+ */
+static inline int ahg_header_set(u32 *arr, int idx, size_t array_size,
+ u8 dw, u8 bit, u8 width, u16 value)
+{
+ if ((size_t)idx >= array_size)
+ return -ERANGE;
+ arr[idx++] = sdma_build_ahg_descriptor(value, dw, bit, width);
+ return idx;
+}
+
+/* Tx request flag bits */
+#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
+#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
+
+enum pkt_q_sdma_state {
+ SDMA_PKT_Q_ACTIVE,
+ SDMA_PKT_Q_DEFERRED,
+};
+
+#define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
+
+#define SDMA_DBG(req, fmt, ...) \
+ hfi1_cdbg(SDMA, "[%u:%u:%u:%u] " fmt, (req)->pq->dd->unit, \
+ (req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \
+ ##__VA_ARGS__)
struct hfi1_user_sdma_pkt_q {
- struct list_head list;
- unsigned ctxt;
- unsigned subctxt;
+ u16 ctxt;
+ u16 subctxt;
u16 n_max_reqs;
atomic_t n_reqs;
u16 reqidx;
@@ -65,12 +91,11 @@ struct hfi1_user_sdma_pkt_q {
struct user_sdma_request *reqs;
unsigned long *req_in_use;
struct iowait busy;
- unsigned state;
+ enum pkt_q_sdma_state state;
wait_queue_head_t wait;
unsigned long unpinned;
struct mmu_rb_handler *handler;
atomic_t n_locked;
- struct mm_struct *mm;
};
struct hfi1_user_sdma_comp_q {
@@ -78,7 +103,99 @@ struct hfi1_user_sdma_comp_q {
struct hfi1_sdma_comp_entry *comps;
};
-int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *, struct file *);
-int hfi1_user_sdma_free_queues(struct hfi1_filedata *);
-int hfi1_user_sdma_process_request(struct file *, struct iovec *, unsigned long,
- unsigned long *);
+struct user_sdma_iovec {
+ struct list_head list;
+ struct iovec iov;
+ /*
+ * offset into the virtual address space of the vector at
+ * which we last left off.
+ */
+ u64 offset;
+};
+
+/* evict operation argument */
+struct evict_data {
+ u32 cleared; /* count evicted so far */
+ u32 target; /* target count to evict */
+};
+
+struct user_sdma_request {
+ /* This is the original header from user space */
+ struct hfi1_pkt_header hdr;
+
+ /* Read mostly fields */
+ struct hfi1_user_sdma_pkt_q *pq ____cacheline_aligned_in_smp;
+ struct hfi1_user_sdma_comp_q *cq;
+ /*
+ * Pointer to the SDMA engine for this request.
+ * Since different request could be on different VLs,
+ * each request will need it's own engine pointer.
+ */
+ struct sdma_engine *sde;
+ struct sdma_req_info info;
+ /* TID array values copied from the tid_iov vector */
+ u32 *tids;
+ /* total length of the data in the request */
+ u32 data_len;
+ /* number of elements copied to the tids array */
+ u16 n_tids;
+ /*
+ * We copy the iovs for this request (based on
+ * info.iovcnt). These are only the data vectors
+ */
+ u8 data_iovs;
+ s8 ahg_idx;
+
+ /* Writeable fields shared with interrupt */
+ u16 seqcomp ____cacheline_aligned_in_smp;
+ u16 seqsubmitted;
+
+ /* Send side fields */
+ struct list_head txps ____cacheline_aligned_in_smp;
+ u16 seqnum;
+ /*
+ * KDETH.OFFSET (TID) field
+ * The offset can cover multiple packets, depending on the
+ * size of the TID entry.
+ */
+ u32 tidoffset;
+ /*
+ * KDETH.Offset (Eager) field
+ * We need to remember the initial value so the headers
+ * can be updated properly.
+ */
+ u32 koffset;
+ u32 sent;
+ /* TID index copied from the tid_iov vector */
+ u16 tididx;
+ /* progress index moving along the iovs array */
+ u8 iov_idx;
+ u8 has_error;
+
+ struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
+} ____cacheline_aligned_in_smp;
+
+/*
+ * A single txreq could span up to 3 physical pages when the MTU
+ * is sufficiently large (> 4K). Each of the IOV pointers also
+ * needs it's own set of flags so the vector has been handled
+ * independently of each other.
+ */
+struct user_sdma_txreq {
+ /* Packet header for the txreq */
+ struct hfi1_pkt_header hdr;
+ struct sdma_txreq txreq;
+ struct list_head list;
+ struct user_sdma_request *req;
+ u16 flags;
+ u16 seqnum;
+};
+
+int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
+ struct hfi1_filedata *fd);
+int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
+ struct hfi1_ctxtdata *uctxt);
+int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
+ struct iovec *iovec, unsigned long dim,
+ unsigned long *count);
+#endif /* _HFI1_USER_SDMA_H */
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 95ed4d6da510..3cbbfccdd8cd 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2020 Intel Corporation.
*/
#include <rdma/ib_mad.h>
@@ -53,6 +11,8 @@
#include <linux/rculist.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
+#include <rdma/opa_addr.h>
+#include <linux/nospec.h>
#include "hfi.h"
#include "common.h"
@@ -60,6 +20,11 @@
#include "trace.h"
#include "qp.h"
#include "verbs_txreq.h"
+#include "debugfs.h"
+#include "vnic.h"
+#include "fault.h"
+#include "affinity.h"
+#include "ipoib.h"
static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -124,8 +89,6 @@ unsigned short piothreshold = 256;
module_param(piothreshold, ushort, S_IRUGO);
MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
-#define COPY_CACHELESS 1
-#define COPY_ADAPTIVE 2
static unsigned int sge_copy_mode;
module_param(sge_copy_mode, uint, S_IRUGO);
MODULE_PARM_DESC(sge_copy_mode,
@@ -143,158 +106,30 @@ static int pio_wait(struct rvt_qp *qp,
/* Length of buffer to create verbs txreq cache name */
#define TXREQ_NAME_LEN 24
-static uint wss_threshold;
+static uint wss_threshold = 80;
module_param(wss_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
static uint wss_clean_period = 256;
module_param(wss_clean_period, uint, S_IRUGO);
MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");
-/* memory working set size */
-struct hfi1_wss {
- unsigned long *entries;
- atomic_t total_count;
- atomic_t clean_counter;
- atomic_t clean_entry;
-
- int threshold;
- int num_entries;
- long pages_mask;
-};
-
-static struct hfi1_wss wss;
-
-int hfi1_wss_init(void)
-{
- long llc_size;
- long llc_bits;
- long table_size;
- long table_bits;
-
- /* check for a valid percent range - default to 80 if none or invalid */
- if (wss_threshold < 1 || wss_threshold > 100)
- wss_threshold = 80;
- /* reject a wildly large period */
- if (wss_clean_period > 1000000)
- wss_clean_period = 256;
- /* reject a zero period */
- if (wss_clean_period == 0)
- wss_clean_period = 1;
-
- /*
- * Calculate the table size - the next power of 2 larger than the
- * LLC size. LLC size is in KiB.
- */
- llc_size = wss_llc_size() * 1024;
- table_size = roundup_pow_of_two(llc_size);
-
- /* one bit per page in rounded up table */
- llc_bits = llc_size / PAGE_SIZE;
- table_bits = table_size / PAGE_SIZE;
- wss.pages_mask = table_bits - 1;
- wss.num_entries = table_bits / BITS_PER_LONG;
-
- wss.threshold = (llc_bits * wss_threshold) / 100;
- if (wss.threshold == 0)
- wss.threshold = 1;
-
- atomic_set(&wss.clean_counter, wss_clean_period);
-
- wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries),
- GFP_KERNEL);
- if (!wss.entries) {
- hfi1_wss_exit();
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void hfi1_wss_exit(void)
-{
- /* coded to handle partially initialized and repeat callers */
- kfree(wss.entries);
- wss.entries = NULL;
-}
-
-/*
- * Advance the clean counter. When the clean period has expired,
- * clean an entry.
- *
- * This is implemented in atomics to avoid locking. Because multiple
- * variables are involved, it can be racy which can lead to slightly
- * inaccurate information. Since this is only a heuristic, this is
- * OK. Any innaccuracies will clean themselves out as the counter
- * advances. That said, it is unlikely the entry clean operation will
- * race - the next possible racer will not start until the next clean
- * period.
- *
- * The clean counter is implemented as a decrement to zero. When zero
- * is reached an entry is cleaned.
- */
-static void wss_advance_clean_counter(void)
-{
- int entry;
- int weight;
- unsigned long bits;
-
- /* become the cleaner if we decrement the counter to zero */
- if (atomic_dec_and_test(&wss.clean_counter)) {
- /*
- * Set, not add, the clean period. This avoids an issue
- * where the counter could decrement below the clean period.
- * Doing a set can result in lost decrements, slowing the
- * clean advance. Since this a heuristic, this possible
- * slowdown is OK.
- *
- * An alternative is to loop, advancing the counter by a
- * clean period until the result is > 0. However, this could
- * lead to several threads keeping another in the clean loop.
- * This could be mitigated by limiting the number of times
- * we stay in the loop.
- */
- atomic_set(&wss.clean_counter, wss_clean_period);
-
- /*
- * Uniquely grab the entry to clean and move to next.
- * The current entry is always the lower bits of
- * wss.clean_entry. The table size, wss.num_entries,
- * is always a power-of-2.
- */
- entry = (atomic_inc_return(&wss.clean_entry) - 1)
- & (wss.num_entries - 1);
-
- /* clear the entry and count the bits */
- bits = xchg(&wss.entries[entry], 0);
- weight = hweight64((u64)bits);
- /* only adjust the contended total count if needed */
- if (weight)
- atomic_sub(weight, &wss.total_count);
- }
-}
-
-/*
- * Insert the given address into the working set array.
- */
-static void wss_insert(void *address)
-{
- u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask;
- u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
- u32 nr = page & (BITS_PER_LONG - 1);
-
- if (!test_and_set_bit(nr, &wss.entries[entry]))
- atomic_inc(&wss.total_count);
-
- wss_advance_clean_counter();
-}
-
/*
- * Is the working set larger than the threshold?
+ * Translate ib_wr_opcode into ib_wc_opcode.
*/
-static inline int wss_exceeds_threshold(void)
-{
- return atomic_read(&wss.total_count) >= wss.threshold;
-}
+const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
+ [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_TID_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [IB_WR_SEND] = IB_WC_SEND,
+ [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+ [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_TID_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
+ [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
+ [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
+ [IB_WR_REG_MR] = IB_WC_REG_MR
+};
/*
* Length of header by opcode, 0 --> not supported
@@ -324,6 +159,14 @@ const u8 hdr_len_by_opcode[256] = {
[IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28,
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4,
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4,
+ [IB_OPCODE_TID_RDMA_READ_REQ] = 12 + 8 + 36,
+ [IB_OPCODE_TID_RDMA_READ_RESP] = 12 + 8 + 36,
+ [IB_OPCODE_TID_RDMA_WRITE_REQ] = 12 + 8 + 36,
+ [IB_OPCODE_TID_RDMA_WRITE_RESP] = 12 + 8 + 36,
+ [IB_OPCODE_TID_RDMA_WRITE_DATA] = 12 + 8 + 36,
+ [IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = 12 + 8 + 36,
+ [IB_OPCODE_TID_RDMA_ACK] = 12 + 8 + 36,
+ [IB_OPCODE_TID_RDMA_RESYNC] = 12 + 8 + 36,
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = 12 + 8,
[IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8,
@@ -367,6 +210,17 @@ static const opcode_handler opcode_handler_tbl[256] = {
[IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv,
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv,
[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv,
+
+ /* TID RDMA has separate handlers for different opcodes.*/
+ [IB_OPCODE_TID_RDMA_WRITE_REQ] = &hfi1_rc_rcv_tid_rdma_write_req,
+ [IB_OPCODE_TID_RDMA_WRITE_RESP] = &hfi1_rc_rcv_tid_rdma_write_resp,
+ [IB_OPCODE_TID_RDMA_WRITE_DATA] = &hfi1_rc_rcv_tid_rdma_write_data,
+ [IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = &hfi1_rc_rcv_tid_rdma_write_data,
+ [IB_OPCODE_TID_RDMA_READ_REQ] = &hfi1_rc_rcv_tid_rdma_read_req,
+ [IB_OPCODE_TID_RDMA_READ_RESP] = &hfi1_rc_rcv_tid_rdma_read_resp,
+ [IB_OPCODE_TID_RDMA_RESYNC] = &hfi1_rc_rcv_tid_rdma_resync,
+ [IB_OPCODE_TID_RDMA_ACK] = &hfi1_rc_rcv_tid_rdma_ack,
+
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv,
[IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv,
@@ -414,163 +268,63 @@ static const u32 pio_opmask[BIT(3)] = {
*/
__be64 ib_hfi1_sys_image_guid;
-/**
- * hfi1_copy_sge - copy data to SGE memory
- * @ss: the SGE state
- * @data: the data to copy
- * @length: the length of the data
- * @copy_last: do a separate copy of the last 8 bytes
+/*
+ * Make sure the QP is ready and able to accept the given opcode.
*/
-void hfi1_copy_sge(
- struct rvt_sge_state *ss,
- void *data, u32 length,
- int release,
- int copy_last)
+static inline opcode_handler qp_ok(struct hfi1_packet *packet)
{
- struct rvt_sge *sge = &ss->sge;
- int in_last = 0;
- int i;
- int cacheless_copy = 0;
-
- if (sge_copy_mode == COPY_CACHELESS) {
- cacheless_copy = length >= PAGE_SIZE;
- } else if (sge_copy_mode == COPY_ADAPTIVE) {
- if (length >= PAGE_SIZE) {
- /*
- * NOTE: this *assumes*:
- * o The first vaddr is the dest.
- * o If multiple pages, then vaddr is sequential.
- */
- wss_insert(sge->vaddr);
- if (length >= (2 * PAGE_SIZE))
- wss_insert(sge->vaddr + PAGE_SIZE);
-
- cacheless_copy = wss_exceeds_threshold();
- } else {
- wss_advance_clean_counter();
- }
- }
- if (copy_last) {
- if (length > 8) {
- length -= 8;
- } else {
- copy_last = 0;
- in_last = 1;
- }
- }
-
-again:
- while (length) {
- u32 len = sge->length;
-
- if (len > length)
- len = length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- WARN_ON_ONCE(len == 0);
- if (unlikely(in_last)) {
- /* enforce byte transfer ordering */
- for (i = 0; i < len; i++)
- ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
- } else if (cacheless_copy) {
- cacheless_memcpy(sge->vaddr, data, len);
- } else {
- memcpy(sge->vaddr, data, len);
- }
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (release)
- rvt_put_mr(sge->mr);
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
- data += len;
- length -= len;
- }
+ if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
+ return NULL;
+ if (((packet->opcode & RVT_OPCODE_QP_MASK) ==
+ packet->qp->allowed_ops) ||
+ (packet->opcode == IB_OPCODE_CNP))
+ return opcode_handler_tbl[packet->opcode];
- if (copy_last) {
- copy_last = 0;
- in_last = 1;
- length = 8;
- goto again;
- }
+ return NULL;
}
-/**
- * hfi1_skip_sge - skip over SGE memory
- * @ss: the SGE state
- * @length: the number of bytes to skip
- */
-void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
+static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
{
- struct rvt_sge *sge = &ss->sge;
-
- while (length) {
- u32 len = sge->length;
-
- if (len > length)
- len = length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- WARN_ON_ONCE(len == 0);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (release)
- rvt_put_mr(sge->mr);
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
- length -= len;
+#ifdef CONFIG_FAULT_INJECTION
+ if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP) {
+ /*
+ * In order to drop non-IB traffic we
+ * set PbcInsertHrc to NONE (0x2).
+ * The packet will still be delivered
+ * to the receiving node but a
+ * KHdrHCRCErr (KDETH packet with a bad
+ * HCRC) will be triggered and the
+ * packet will not be delivered to the
+ * correct context.
+ */
+ pbc &= ~PBC_INSERT_HCRC_SMASK;
+ pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT;
+ } else {
+ /*
+ * In order to drop regular verbs
+ * traffic we set the PbcTestEbp
+ * flag. The packet will still be
+ * delivered to the receiving node but
+ * a 'late ebp error' will be
+ * triggered and will be dropped.
+ */
+ pbc |= PBC_TEST_EBP;
}
+#endif
+ return pbc;
}
-/*
- * Make sure the QP is ready and able to accept the given opcode.
- */
-static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
+static opcode_handler tid_qp_ok(int opcode, struct hfi1_packet *packet)
{
- if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
+ if (packet->qp->ibqp.qp_type != IB_QPT_RC ||
+ !(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
return NULL;
- if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
- (opcode == IB_OPCODE_CNP))
+ if ((opcode & RVT_OPCODE_QP_MASK) == IB_OPCODE_TID_RDMA)
return opcode_handler_tbl[opcode];
-
return NULL;
}
-/**
- * hfi1_ib_rcv - process an incoming packet
- * @packet: data packet information
- *
- * This is called to process an incoming packet at interrupt level.
- *
- * Tlen is the length of the header + data + CRC in bytes.
- */
-void hfi1_ib_rcv(struct hfi1_packet *packet)
+void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
struct ib_header *hdr = packet->hdr;
@@ -578,59 +332,162 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
- opcode_handler packet_handler;
+ opcode_handler opcode_handler;
unsigned long flags;
u32 qp_num;
int lnh;
u8 opcode;
- u16 lid;
- /* Check for GRH */
+ /* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */
+ if (unlikely(tlen < 15 * sizeof(u32)))
+ goto drop;
+
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
- if (lnh == HFI1_LRH_BTH) {
- packet->ohdr = &hdr->u.oth;
- } else if (lnh == HFI1_LRH_GRH) {
- u32 vtf;
+ if (lnh != HFI1_LRH_BTH)
+ goto drop;
- packet->ohdr = &hdr->u.l.oth;
- if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
- goto drop;
- vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
- if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
- goto drop;
- packet->rcv_flags |= HFI1_HAS_GRH;
- } else {
+ packet->ohdr = &hdr->u.oth;
+ trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
+
+ opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
+ inc_opstats(tlen, &rcd->opstats->stats[opcode]);
+
+ /* verbs_qp can be picked up from any tid_rdma header struct */
+ qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_req.verbs_qp) &
+ RVT_QPN_MASK;
+
+ rcu_read_lock();
+ packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
+ if (!packet->qp)
+ goto drop_rcu;
+ spin_lock_irqsave(&packet->qp->r_lock, flags);
+ opcode_handler = tid_qp_ok(opcode, packet);
+ if (likely(opcode_handler))
+ opcode_handler(packet);
+ else
+ goto drop_unlock;
+ spin_unlock_irqrestore(&packet->qp->r_lock, flags);
+ rcu_read_unlock();
+
+ return;
+drop_unlock:
+ spin_unlock_irqrestore(&packet->qp->r_lock, flags);
+drop_rcu:
+ rcu_read_unlock();
+drop:
+ ibp->rvp.n_pkt_drops++;
+}
+
+void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ struct ib_header *hdr = packet->hdr;
+ u32 tlen = packet->tlen;
+ struct hfi1_pportdata *ppd = rcd->ppd;
+ struct hfi1_ibport *ibp = &ppd->ibport_data;
+ struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
+ opcode_handler opcode_handler;
+ unsigned long flags;
+ u32 qp_num;
+ int lnh;
+ u8 opcode;
+
+ /* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */
+ if (unlikely(tlen < 15 * sizeof(u32)))
goto drop;
- }
- trace_input_ibhdr(rcd->dd, hdr);
+ lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ if (lnh != HFI1_LRH_BTH)
+ goto drop;
+
+ packet->ohdr = &hdr->u.oth;
+ trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24);
inc_opstats(tlen, &rcd->opstats->stats[opcode]);
- /* Get the destination QP number. */
- qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
- lid = be16_to_cpu(hdr->lrh[1]);
- if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
- (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
+ /* verbs_qp can be picked up from any tid_rdma header struct */
+ qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_rsp.verbs_qp) &
+ RVT_QPN_MASK;
+
+ rcu_read_lock();
+ packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
+ if (!packet->qp)
+ goto drop_rcu;
+ spin_lock_irqsave(&packet->qp->r_lock, flags);
+ opcode_handler = tid_qp_ok(opcode, packet);
+ if (likely(opcode_handler))
+ opcode_handler(packet);
+ else
+ goto drop_unlock;
+ spin_unlock_irqrestore(&packet->qp->r_lock, flags);
+ rcu_read_unlock();
+
+ return;
+drop_unlock:
+ spin_unlock_irqrestore(&packet->qp->r_lock, flags);
+drop_rcu:
+ rcu_read_unlock();
+drop:
+ ibp->rvp.n_pkt_drops++;
+}
+
+static int hfi1_do_pkey_check(struct hfi1_packet *packet)
+{
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ struct hfi1_pportdata *ppd = rcd->ppd;
+ struct hfi1_16b_header *hdr = packet->hdr;
+ u16 pkey;
+
+ /* Pkey check needed only for bypass packets */
+ if (packet->etype != RHF_RCV_TYPE_BYPASS)
+ return 0;
+
+ /* Perform pkey check */
+ pkey = hfi1_16B_get_pkey(hdr);
+ return ingress_pkey_check(ppd, pkey, packet->sc,
+ packet->qp->s_pkey_index,
+ packet->slid, true);
+}
+
+static inline void hfi1_handle_packet(struct hfi1_packet *packet,
+ bool is_mcast)
+{
+ u32 qp_num;
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ struct hfi1_pportdata *ppd = rcd->ppd;
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
+ struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
+ opcode_handler packet_handler;
+ unsigned long flags;
+
+ inc_opstats(packet->tlen, &rcd->opstats->stats[packet->opcode]);
+
+ if (unlikely(is_mcast)) {
struct rvt_mcast *mcast;
struct rvt_mcast_qp *p;
- if (lnh != HFI1_LRH_GRH)
+ if (!packet->grh)
goto drop;
- mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
+ mcast = rvt_mcast_find(&ibp->rvp,
+ &packet->grh->dgid,
+ opa_get_lid(packet->dlid, 9B));
if (!mcast)
goto drop;
+ rcu_read_lock();
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
packet->qp = p->qp;
+ if (hfi1_do_pkey_check(packet))
+ goto unlock_drop;
spin_lock_irqsave(&packet->qp->r_lock, flags);
- packet_handler = qp_ok(opcode, packet);
+ packet_handler = qp_ok(packet);
if (likely(packet_handler))
packet_handler(packet);
else
ibp->rvp.n_pkt_drops++;
spin_unlock_irqrestore(&packet->qp->r_lock, flags);
}
+ rcu_read_unlock();
/*
* Notify rvt_multicast_detach() if it is waiting for us
* to finish.
@@ -638,14 +495,23 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
if (atomic_dec_return(&mcast->refcount) <= 1)
wake_up(&mcast->wait);
} else {
+ /* Get the destination QP number. */
+ if (packet->etype == RHF_RCV_TYPE_BYPASS &&
+ hfi1_16B_get_l4(packet->hdr) == OPA_16B_L4_FM)
+ qp_num = hfi1_16B_get_dest_qpn(packet->mgmt);
+ else
+ qp_num = ib_bth_get_qpn(packet->ohdr);
+
rcu_read_lock();
packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
- if (!packet->qp) {
- rcu_read_unlock();
- goto drop;
- }
+ if (!packet->qp)
+ goto unlock_drop;
+
+ if (hfi1_do_pkey_check(packet))
+ goto unlock_drop;
+
spin_lock_irqsave(&packet->qp->r_lock, flags);
- packet_handler = qp_ok(opcode, packet);
+ packet_handler = qp_ok(packet);
if (likely(packet_handler))
packet_handler(packet);
else
@@ -654,18 +520,41 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
rcu_read_unlock();
}
return;
-
+unlock_drop:
+ rcu_read_unlock();
drop:
ibp->rvp.n_pkt_drops++;
}
+/**
+ * hfi1_ib_rcv - process an incoming packet
+ * @packet: data packet information
+ *
+ * This is called to process an incoming packet at interrupt level.
+ */
+void hfi1_ib_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+
+ trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf)));
+ hfi1_handle_packet(packet, hfi1_check_mcast(packet->dlid));
+}
+
+void hfi1_16B_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+
+ trace_input_ibhdr(rcd->dd, packet, false);
+ hfi1_handle_packet(packet, hfi1_check_mcast(packet->dlid));
+}
+
/*
* This is called from a timer to check for QPs
* which need kernel memory in order to send a packet.
*/
-static void mem_timer(unsigned long data)
+static void mem_timer(struct timer_list *t)
{
- struct hfi1_ibdev *dev = (struct hfi1_ibdev *)data;
+ struct hfi1_ibdev *dev = timer_container_of(dev, t, mem_timer);
struct list_head *list = &dev->memwait;
struct rvt_qp *qp = NULL;
struct iowait *wait;
@@ -689,27 +578,6 @@ static void mem_timer(unsigned long data)
hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM);
}
-void update_sge(struct rvt_sge_state *ss, u32 length)
-{
- struct rvt_sge *sge = &ss->sge;
-
- sge->vaddr += length;
- sge->length -= length;
- sge->sge_length -= length;
- if (sge->sge_length == 0) {
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- return;
- sge->n = 0;
- }
- sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
- }
-}
-
/*
* This is called with progress side lock held.
*/
@@ -724,11 +592,13 @@ static void verbs_sdma_complete(
spin_lock(&qp->s_lock);
if (tx->wqe) {
- hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
+ rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
- struct ib_header *hdr;
+ struct hfi1_opa_header *hdr;
hdr = &tx->phdr.hdr;
+ if (unlikely(status == SDMA_TXREQ_S_ABORTED))
+ hfi1_rc_verbs_aborted(qp, hdr);
hfi1_rc_send_complete(qp, hdr);
}
spin_unlock(&qp->s_lock);
@@ -736,11 +606,28 @@ static void verbs_sdma_complete(
hfi1_put_txreq(tx);
}
+void hfi1_wait_kmem(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ struct ib_qp *ibqp = &qp->ibqp;
+ struct ib_device *ibdev = ibqp->device;
+ struct hfi1_ibdev *dev = to_idev(ibdev);
+
+ if (list_empty(&priv->s_iowait.list)) {
+ if (list_empty(&dev->memwait))
+ mod_timer(&dev->mem_timer, jiffies + 1);
+ qp->s_flags |= RVT_S_WAIT_KMEM;
+ list_add_tail(&priv->s_iowait.list, &dev->memwait);
+ priv->s_iowait.lock = &dev->iowait_lock;
+ trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
+ rvt_get_qp(qp);
+ }
+}
+
static int wait_kmem(struct hfi1_ibdev *dev,
struct rvt_qp *qp,
struct hfi1_pkt_state *ps)
{
- struct hfi1_qp_priv *priv = qp->priv;
unsigned long flags;
int ret = 0;
@@ -748,18 +635,10 @@ static int wait_kmem(struct hfi1_ibdev *dev,
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
write_seqlock(&dev->iowait_lock);
list_add_tail(&ps->s_txreq->txreq.list,
- &priv->s_iowait.tx_head);
- if (list_empty(&priv->s_iowait.list)) {
- if (list_empty(&dev->memwait))
- mod_timer(&dev->mem_timer, jiffies + 1);
- qp->s_flags |= RVT_S_WAIT_KMEM;
- list_add_tail(&priv->s_iowait.list, &dev->memwait);
- priv->s_iowait.lock = &dev->iowait_lock;
- trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
- rvt_get_qp(qp);
- }
+ &ps->wait->tx_head);
+ hfi1_wait_kmem(qp);
write_sequnlock(&dev->iowait_lock);
- qp->s_flags &= ~RVT_S_BUSY;
+ hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -785,11 +664,7 @@ static noinline int build_verbs_ulp_payload(
int ret = 0;
while (length) {
- len = ss->sge.length;
- if (len > length)
- len = length;
- if (len > ss->sge.sge_length)
- len = ss->sge.sge_length;
+ len = rvt_get_sge_length(&ss->sge, length);
WARN_ON_ONCE(len == 0);
ret = sdma_txadd_kvaddr(
sde->dd,
@@ -798,7 +673,7 @@ static noinline int build_verbs_ulp_payload(
len);
if (ret)
goto bail_txadd;
- update_sge(ss, len);
+ rvt_update_sge(ss, len, false);
length -= len;
}
return ret;
@@ -810,6 +685,27 @@ bail_txadd:
return ret;
}
+/**
+ * update_tx_opstats - record stats by opcode
+ * @qp: the qp
+ * @ps: transmit packet state
+ * @plen: the plen in dwords
+ *
+ * This is a routine to record the tx opstats after a
+ * packet has been presented to the egress mechanism.
+ */
+static void update_tx_opstats(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
+ u32 plen)
+{
+#ifdef CONFIG_DEBUG_FS
+ struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+ struct hfi1_opcode_stats_perctx *s = get_cpu_ptr(dd->tx_opstats);
+
+ inc_opstats(plen * 4, &s->stats[ps->opcode]);
+ put_cpu_ptr(s);
+#endif
+}
+
/*
* Build the number of DMA descriptors needed to send length bytes of data.
*
@@ -828,13 +724,23 @@ static int build_verbs_tx_desc(
{
int ret = 0;
struct hfi1_sdma_header *phdr = &tx->phdr;
- u16 hdrbytes = tx->hdr_dwords << 2;
+ u16 hdrbytes = (tx->hdr_dwords + sizeof(pbc) / 4) << 2;
+ u8 extra_bytes = 0;
+ if (tx->phdr.hdr.hdr_type) {
+ /*
+ * hdrbytes accounts for PBC. Need to subtract 8 bytes
+ * before calculating padding.
+ */
+ extra_bytes = hfi1_get_16b_padding(hdrbytes - 8, length) +
+ (SIZE_OF_CRC << 2) + SIZE_OF_LT;
+ }
if (!ahg_info->ahgcount) {
ret = sdma_txinit_ahg(
&tx->txreq,
ahg_info->tx_flags,
- hdrbytes + length,
+ hdrbytes + length +
+ extra_bytes,
ahg_info->ahgidx,
0,
NULL,
@@ -863,57 +769,96 @@ static int build_verbs_tx_desc(
if (ret)
goto bail_txadd;
}
-
/* add the ulp payload - if any. tx->ss can be NULL for acks */
- if (tx->ss)
+ if (tx->ss) {
ret = build_verbs_ulp_payload(sde, length, tx);
+ if (ret)
+ goto bail_txadd;
+ }
+
+ /* add icrc, lt byte, and padding to flit */
+ if (extra_bytes)
+ ret = sdma_txadd_daddr(sde->dd, &tx->txreq, sde->dd->sdma_pad_phys,
+ extra_bytes);
+
bail_txadd:
return ret;
}
+static u64 update_hcrc(u8 opcode, u64 pbc)
+{
+ if ((opcode & IB_OPCODE_TID_RDMA) == IB_OPCODE_TID_RDMA) {
+ pbc &= ~PBC_INSERT_HCRC_SMASK;
+ pbc |= (u64)PBC_IHCRC_LKDETH << PBC_INSERT_HCRC_SHIFT;
+ }
+ return pbc;
+}
+
int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ahg_info *ahg_info = priv->s_ahg;
- u32 hdrwords = qp->s_hdrwords;
+ u32 hdrwords = ps->s_txreq->hdr_dwords;
u32 len = ps->s_txreq->s_cur_size;
- u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */
+ u32 plen;
struct hfi1_ibdev *dev = ps->dev;
struct hfi1_pportdata *ppd = ps->ppd;
struct verbs_txreq *tx;
- u64 pbc_flags = 0;
u8 sc5 = priv->s_sc;
-
int ret;
+ u32 dwords;
+
+ if (ps->s_txreq->phdr.hdr.hdr_type) {
+ u8 extra_bytes = hfi1_get_16b_padding((hdrwords << 2), len);
+
+ dwords = (len + extra_bytes + (SIZE_OF_CRC << 2) +
+ SIZE_OF_LT) >> 2;
+ } else {
+ dwords = (len + 3) >> 2;
+ }
+ plen = hdrwords + dwords + sizeof(pbc) / 4;
tx = ps->s_txreq;
if (!sdma_txreq_built(&tx->txreq)) {
if (likely(pbc == 0)) {
u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
+
/* No vl15 here */
- /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ /* set PBC_DC_INFO bit (aka SC[4]) in pbc */
+ if (ps->s_txreq->phdr.hdr.hdr_type)
+ pbc |= PBC_PACKET_BYPASS |
+ PBC_INSERT_BYPASS_ICRC;
+ else
+ pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
pbc = create_pbc(ppd,
- pbc_flags,
+ pbc,
qp->srate_mbps,
vl,
plen);
+
+ if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
+ pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
+ else
+ /* Update HCRC based on packet opcode */
+ pbc = update_hcrc(ps->opcode, pbc);
}
tx->wqe = qp->s_wqe;
ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
if (unlikely(ret))
goto bail_build;
}
- ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq);
+ ret = sdma_send_txreq(tx->sde, ps->wait, &tx->txreq, ps->pkts_sent);
if (unlikely(ret < 0)) {
if (ret == -ECOMM)
goto bail_ecomm;
return ret;
}
+
+ update_tx_opstats(qp, ps, plen);
trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
- &ps->s_txreq->phdr.hdr);
+ &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
return ret;
bail_ecomm:
@@ -940,7 +885,6 @@ static int pio_wait(struct rvt_qp *qp,
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_devdata *dd = sc->dd;
- struct hfi1_ibdev *dev = &dd->verbs_dev;
unsigned long flags;
int ret = 0;
@@ -952,27 +896,29 @@ static int pio_wait(struct rvt_qp *qp,
*/
spin_lock_irqsave(&qp->s_lock, flags);
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&sc->waitlock);
list_add_tail(&ps->s_txreq->txreq.list,
- &priv->s_iowait.tx_head);
+ &ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) {
struct hfi1_ibdev *dev = &dd->verbs_dev;
int was_empty;
dev->n_piowait += !!(flag & RVT_S_WAIT_PIO);
- dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
+ dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN);
qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
- list_add_tail(&priv->s_iowait.list, &sc->piowait);
- priv->s_iowait.lock = &dev->iowait_lock;
+ iowait_get_priority(&priv->s_iowait);
+ iowait_queue(ps->pkts_sent, &priv->s_iowait,
+ &sc->piowait);
+ priv->s_iowait.lock = &sc->waitlock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
rvt_get_qp(qp);
/* counting: only call wantpiobuf_intr if first user */
if (was_empty)
hfi1_sc_wantpiobuf_intr(sc, 1);
}
- write_sequnlock(&dev->iowait_lock);
- qp->s_flags &= ~RVT_S_BUSY;
+ write_sequnlock(&sc->waitlock);
+ hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -992,14 +938,13 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc)
{
struct hfi1_qp_priv *priv = qp->priv;
- u32 hdrwords = qp->s_hdrwords;
+ u32 hdrwords = ps->s_txreq->hdr_dwords;
struct rvt_sge_state *ss = ps->s_txreq->ss;
u32 len = ps->s_txreq->s_cur_size;
- u32 dwords = (len + 3) >> 2;
- u32 plen = hdrwords + dwords + 2; /* includes pbc */
+ u32 dwords;
+ u32 plen;
struct hfi1_pportdata *ppd = ps->ppd;
- u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr;
- u64 pbc_flags = 0;
+ u32 *hdr;
u8 sc5;
unsigned long flags = 0;
struct send_context *sc;
@@ -1007,6 +952,19 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
int wc_status = IB_WC_SUCCESS;
int ret = 0;
pio_release_cb cb = NULL;
+ u8 extra_bytes = 0;
+
+ if (ps->s_txreq->phdr.hdr.hdr_type) {
+ u8 pad_size = hfi1_get_16b_padding((hdrwords << 2), len);
+
+ extra_bytes = pad_size + (SIZE_OF_CRC << 2) + SIZE_OF_LT;
+ dwords = (len + extra_bytes) >> 2;
+ hdr = (u32 *)&ps->s_txreq->phdr.hdr.opah;
+ } else {
+ dwords = (len + 3) >> 2;
+ hdr = (u32 *)&ps->s_txreq->phdr.hdr.ibh;
+ }
+ plen = hdrwords + dwords + sizeof(pbc) / 4;
/* only RC/UC use complete */
switch (qp->ibqp.qp_type) {
@@ -1024,17 +982,27 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (likely(pbc == 0)) {
u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5);
- /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
- pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
+
+ /* set PBC_DC_INFO bit (aka SC[4]) in pbc */
+ if (ps->s_txreq->phdr.hdr.hdr_type)
+ pbc |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
+ else
+ pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
+
+ pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
+ if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
+ pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
+ else
+ /* Update HCRC based on packet opcode */
+ pbc = update_hcrc(ps->opcode, pbc);
}
if (cb)
iowait_pio_inc(&priv->s_iowait);
pbuf = sc_buffer_alloc(sc, plen, cb, qp);
- if (unlikely(!pbuf)) {
+ if (IS_ERR_OR_NULL(pbuf)) {
if (cb)
verbs_pio_complete(qp, 0);
- if (ppd->host_link_state != HLS_UP_ACTIVE) {
+ if (IS_ERR(pbuf)) {
/*
* If we have filled the PIO buffers to capacity and are
* not in an active state this request is not going to
@@ -1062,38 +1030,43 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
}
}
- if (len == 0) {
+ if (dwords == 0) {
pio_copy(ppd->dd, pbuf, pbc, hdr, hdrwords);
} else {
+ seg_pio_copy_start(pbuf, pbc,
+ hdr, hdrwords * 4);
if (ss) {
- seg_pio_copy_start(pbuf, pbc, hdr, hdrwords * 4);
while (len) {
void *addr = ss->sge.vaddr;
- u32 slen = ss->sge.length;
+ u32 slen = rvt_get_sge_length(&ss->sge, len);
- if (slen > len)
- slen = len;
- update_sge(ss, slen);
+ rvt_update_sge(ss, slen, false);
seg_pio_copy_mid(pbuf, addr, slen);
len -= slen;
}
- seg_pio_copy_end(pbuf);
}
+ /* add icrc, lt byte, and padding to flit */
+ if (extra_bytes)
+ seg_pio_copy_mid(pbuf, ppd->dd->sdma_pad_dma,
+ extra_bytes);
+
+ seg_pio_copy_end(pbuf);
}
+ update_tx_opstats(qp, ps, plen);
trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
- &ps->s_txreq->phdr.hdr);
+ &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
pio_bail:
+ spin_lock_irqsave(&qp->s_lock, flags);
if (qp->s_wqe) {
- spin_lock_irqsave(&qp->s_lock, flags);
- hfi1_send_complete(qp, qp->s_wqe, wc_status);
- spin_unlock_irqrestore(&qp->s_lock, flags);
+ rvt_send_complete(qp, qp->s_wqe, wc_status);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
- spin_lock_irqsave(&qp->s_lock, flags);
+ if (unlikely(wc_status == IB_WC_GENERAL_ERR))
+ hfi1_rc_verbs_aborted(qp, &ps->s_txreq->phdr.hdr);
hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr);
- spin_unlock_irqrestore(&qp->s_lock, flags);
}
+ spin_unlock_irqrestore(&qp->s_lock, flags);
ret = 0;
@@ -1128,10 +1101,10 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
/**
* egress_pkey_check - check P_KEY of a packet
- * @ppd: Physical IB port data
- * @lrh: Local route header
- * @bth: Base transport header
- * @sc5: SC for packet
+ * @ppd: Physical IB port data
+ * @slid: SLID for packet
+ * @pkey: PKEY for header
+ * @sc5: SC for packet
* @s_pkey_index: It will be used for look up optimization for kernel contexts
* only. If it is negative value, then it means user contexts is calling this
* function.
@@ -1140,19 +1113,16 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent)
*
* Return: 0 on success, otherwise, 1
*/
-int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth,
+int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey,
u8 sc5, int8_t s_pkey_index)
{
struct hfi1_devdata *dd;
int i;
- u16 pkey;
int is_user_ctxt_mechanism = (s_pkey_index < 0);
if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT))
return 0;
- pkey = (u16)be32_to_cpu(bth[0]);
-
/* If SC15, pkey[0:14] must be 0x7fff */
if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK))
goto bad;
@@ -1185,8 +1155,6 @@ bad:
dd = ppd->dd;
if (!(dd->err_info_xmit_constraint.status &
OPA_EI_STATUS_SMASK)) {
- u16 slid = be16_to_cpu(lrh[3]);
-
dd->err_info_xmit_constraint.status |=
OPA_EI_STATUS_SMASK;
dd->err_info_xmit_constraint.slid = slid;
@@ -1196,18 +1164,18 @@ bad:
return 1;
}
-/**
+/*
* get_send_routine - choose an egress routine
*
* Choose an egress routine based on QP type
* and size
*/
static inline send_routine get_send_routine(struct rvt_qp *qp,
- struct verbs_txreq *tx)
+ struct hfi1_pkt_state *ps)
{
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
- struct ib_header *h = &tx->phdr.hdr;
+ struct verbs_txreq *tx = ps->s_txreq;
if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA)))
return dd->process_pio_send;
@@ -1218,17 +1186,16 @@ static inline send_routine get_send_routine(struct rvt_qp *qp,
case IB_QPT_UD:
break;
case IB_QPT_UC:
- case IB_QPT_RC: {
- u8 op = get_opcode(h);
-
+ case IB_QPT_RC:
+ priv->s_running_pkt_size =
+ (tx->s_cur_size + priv->s_running_pkt_size) / 2;
if (piothreshold &&
- tx->s_cur_size <= min(piothreshold, qp->pmtu) &&
- (BIT(op & OPMASK) & pio_opmask[op >> 5]) &&
+ priv->s_running_pkt_size <= min(piothreshold, qp->pmtu) &&
+ (BIT(ps->opcode & OPMASK) & pio_opmask[ps->opcode >> 5]) &&
iowait_sdma_pending(&priv->s_iowait) == 0 &&
!sdma_txreq_built(&tx->txreq))
return dd->process_pio_send;
break;
- }
default:
break;
}
@@ -1247,26 +1214,45 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
- struct ib_other_headers *ohdr;
- struct ib_header *hdr;
+ struct ib_other_headers *ohdr = NULL;
send_routine sr;
int ret;
- u8 lnh;
+ u16 pkey;
+ u32 slid;
+ u8 l4 = 0;
- hdr = &ps->s_txreq->phdr.hdr;
/* locate the pkey within the headers */
- lnh = be16_to_cpu(hdr->lrh[0]) & 3;
- if (lnh == HFI1_LRH_GRH)
- ohdr = &hdr->u.l.oth;
+ if (ps->s_txreq->phdr.hdr.hdr_type) {
+ struct hfi1_16b_header *hdr = &ps->s_txreq->phdr.hdr.opah;
+
+ l4 = hfi1_16B_get_l4(hdr);
+ if (l4 == OPA_16B_L4_IB_LOCAL)
+ ohdr = &hdr->u.oth;
+ else if (l4 == OPA_16B_L4_IB_GLOBAL)
+ ohdr = &hdr->u.l.oth;
+
+ slid = hfi1_16B_get_slid(hdr);
+ pkey = hfi1_16B_get_pkey(hdr);
+ } else {
+ struct ib_header *hdr = &ps->s_txreq->phdr.hdr.ibh;
+ u8 lnh = ib_get_lnh(hdr);
+
+ if (lnh == HFI1_LRH_GRH)
+ ohdr = &hdr->u.l.oth;
+ else
+ ohdr = &hdr->u.oth;
+ slid = ib_get_slid(hdr);
+ pkey = ib_bth_get_pkey(ohdr);
+ }
+
+ if (likely(l4 != OPA_16B_L4_FM))
+ ps->opcode = ib_bth_get_opcode(ohdr);
else
- ohdr = &hdr->u.oth;
-
- sr = get_send_routine(qp, ps->s_txreq);
- ret = egress_pkey_check(dd->pport,
- hdr->lrh,
- ohdr->bth,
- priv->s_sc,
- qp->s_pkey_index);
+ ps->opcode = IB_OPCODE_UD_SEND_ONLY;
+
+ sr = get_send_routine(qp, ps);
+ ret = egress_pkey_check(dd->pport, slid, pkey,
+ priv->s_sc, qp->s_pkey_index);
if (unlikely(ret)) {
/*
* The value we are returning here does not get propagated to
@@ -1282,7 +1268,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
hfi1_cdbg(PIO, "%s() Failed. Completing with err",
__func__);
spin_lock_irqsave(&qp->s_lock, flags);
- hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
+ rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
return -EINVAL;
@@ -1291,7 +1277,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
return pio_wait(qp,
ps->s_txreq->psc,
ps,
- RVT_S_WAIT_PIO_DRAIN);
+ HFI1_S_WAIT_PIO_DRAIN);
return sr(qp, ps, 0);
}
@@ -1302,17 +1288,20 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
{
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
- u16 ver = dd->dc8051_ver;
+ u32 ver = dd->dc8051_ver;
memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
- rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) |
- (u64)dc8051_ver_min(ver);
+ rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 32) |
+ ((u64)(dc8051_ver_min(ver)) << 16) |
+ (u64)dc8051_ver_patch(ver);
+
rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
IB_DEVICE_MEM_MGT_EXTENSIONS;
+ rdi->dparms.props.kernel_cap_flags = IBK_RDMA_NETDEV_OPA;
rdi->dparms.props.page_size_cap = PAGE_SIZE;
rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
rdi->dparms.props.vendor_part_id = dd->pcidev->device;
@@ -1321,15 +1310,15 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
rdi->dparms.props.max_mr_size = U64_MAX;
rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
rdi->dparms.props.max_qp = hfi1_max_qps;
- rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
- rdi->dparms.props.max_sge = hfi1_max_sges;
+ rdi->dparms.props.max_qp_wr =
+ (hfi1_max_qp_wrs >= HFI1_QP_WQE_INVALID ?
+ HFI1_QP_WQE_INVALID - 1 : hfi1_max_qp_wrs);
+ rdi->dparms.props.max_send_sge = hfi1_max_sges;
+ rdi->dparms.props.max_recv_sge = hfi1_max_sges;
rdi->dparms.props.max_sge_rd = hfi1_max_sges;
rdi->dparms.props.max_cq = hfi1_max_cqs;
rdi->dparms.props.max_ah = hfi1_max_ahs;
rdi->dparms.props.max_cqe = hfi1_max_cqes;
- rdi->dparms.props.max_mr = rdi->lkey_table.max;
- rdi->dparms.props.max_fmr = rdi->lkey_table.max;
- rdi->dparms.props.max_map_per_fmr = 32767;
rdi->dparms.props.max_pd = hfi1_max_pds;
rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC;
rdi->dparms.props.max_qp_init_rd_atom = 255;
@@ -1376,23 +1365,24 @@ static inline u16 opa_width_to_ib(u16 in)
}
}
-static int query_port(struct rvt_dev_info *rdi, u8 port_num,
+static int query_port(struct rvt_dev_info *rdi, u32 port_num,
struct ib_port_attr *props)
{
struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
- u16 lid = ppd->lid;
+ u32 lid = ppd->lid;
+ /* props being zeroed by the caller, avoid zeroing it here */
props->lid = lid ? lid : 0;
props->lmc = ppd->lmc;
/* OPA logical states match IB logical states */
props->state = driver_lstate(ppd);
- props->phys_state = hfi1_ibphys_portstate(ppd);
+ props->phys_state = driver_pstate(ppd);
props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
/* see rate_show() in ib core/sysfs.c */
- props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active);
+ props->active_speed = opa_speed_to_ib(ppd->link_speed_active);
props->max_vl_num = ppd->vls_supported;
/* Once we are a "first class" citizen and have added the OPA MTUs to
@@ -1406,7 +1396,8 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
props->max_mtu = mtu_to_enum((!valid_ib_mtu(hfi1_max_mtu) ?
4096 : hfi1_max_mtu), IB_MTU_4096);
props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
- mtu_to_enum(ppd->ibmtu, IB_MTU_2048);
+ mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
+ props->phys_mtu = hfi1_max_mtu;
return 0;
}
@@ -1451,17 +1442,15 @@ bail:
return ret;
}
-static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
+static int shut_down_port(struct rvt_dev_info *rdi, u32 port_num)
{
struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
- int ret;
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0,
OPA_LINKDOWN_REASON_UNKNOWN);
- ret = set_link_state(ppd, HLS_DN_DOWNDEF);
- return ret;
+ return set_link_state(ppd, HLS_DN_DOWNDEF);
}
static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
@@ -1479,70 +1468,67 @@ static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
/*
* convert ah port,sl to sc
*/
-u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah)
+u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah)
{
- struct hfi1_ibport *ibp = to_iport(ibdev, ah->port_num);
+ struct hfi1_ibport *ibp = to_iport(ibdev, rdma_ah_get_port_num(ah));
- return ibp->sl_to_sc[ah->sl];
+ return ibp->sl_to_sc[rdma_ah_get_sl(ah)];
}
-static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
+static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
{
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
struct hfi1_devdata *dd;
u8 sc5;
+ u8 sl;
+
+ if (hfi1_check_mcast(rdma_ah_get_dlid(ah_attr)) &&
+ !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
+ return -EINVAL;
/* test the mapping for validity */
- ibp = to_iport(ibdev, ah_attr->port_num);
+ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
ppd = ppd_from_ibp(ibp);
- sc5 = ibp->sl_to_sc[ah_attr->sl];
dd = dd_from_ppd(ppd);
+
+ sl = rdma_ah_get_sl(ah_attr);
+ if (sl >= ARRAY_SIZE(ibp->sl_to_sc))
+ return -EINVAL;
+ sl = array_index_nospec(sl, ARRAY_SIZE(ibp->sl_to_sc));
+
+ sc5 = ibp->sl_to_sc[sl];
if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
return -EINVAL;
return 0;
}
static void hfi1_notify_new_ah(struct ib_device *ibdev,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct rvt_ah *ah)
{
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
struct hfi1_devdata *dd;
u8 sc5;
+ struct rdma_ah_attr *attr = &ah->attr;
/*
* Do not trust reading anything from rvt_ah at this point as it is not
* done being setup. We can however modify things which we need to set.
*/
- ibp = to_iport(ibdev, ah_attr->port_num);
+ ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr));
ppd = ppd_from_ibp(ibp);
- sc5 = ibp->sl_to_sc[ah->attr.sl];
+ sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)];
+ hfi1_update_ah_attr(ibdev, attr);
+ hfi1_make_opa_lid(attr);
dd = dd_from_ppd(ppd);
ah->vl = sc_to_vlt(dd, sc5);
if (ah->vl < num_vls || ah->vl == 15)
ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu);
}
-struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid)
-{
- struct ib_ah_attr attr;
- struct ib_ah *ah = ERR_PTR(-EINVAL);
- struct rvt_qp *qp0;
-
- memset(&attr, 0, sizeof(attr));
- attr.dlid = dlid;
- attr.port_num = ppd_from_ibp(ibp)->port;
- rcu_read_lock();
- qp0 = rcu_dereference(ibp->rvp.qp[0]);
- if (qp0)
- ah = ib_create_ah(qp0->ibqp.pd, &attr);
- rcu_read_unlock();
- return ah;
-}
-
/**
* hfi1_get_npkeys - return the size of the PKEY table for context 0
* @dd: the hfi1_ib device
@@ -1563,13 +1549,21 @@ static void init_ibport(struct hfi1_pportdata *ppd)
ibp->sc_to_sl[i] = i;
}
+ for (i = 0; i < RVT_MAX_TRAP_LISTS ; i++)
+ INIT_LIST_HEAD(&ibp->rvp.trap_lists[i].list);
+ timer_setup(&ibp->rvp.trap_timer, hfi1_handle_trap_timer, 0);
+
spin_lock_init(&ibp->rvp.lock);
/* Set the prefix to the default value (see ch. 4.1.1) */
ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
ibp->rvp.sm_lid = 0;
- /* Below should only set bits defined in OPA PortInfo.CapabilityMask */
+ /*
+ * Below should only set bits defined in OPA PortInfo.CapabilityMask
+ * and PortInfo.CapabilityMask3
+ */
ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
IB_PORT_CAP_MASK_NOTICE_SUP;
+ ibp->rvp.port_cap3_flags = OPA_CAP_MASK3_IsSharedSpaceSupported;
ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
@@ -1580,15 +1574,14 @@ static void init_ibport(struct hfi1_pportdata *ppd)
RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
}
-static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
- size_t str_len)
+static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str)
{
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
struct hfi1_ibdev *dev = dev_from_rdi(rdi);
- u16 ver = dd_from_dev(dev)->dc8051_ver;
+ u32 ver = dd_from_dev(dev)->dc8051_ver;
- snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver),
- dc8051_ver_min(ver));
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%u", dc8051_ver_maj(ver),
+ dc8051_ver_min(ver), dc8051_ver_patch(ver));
}
static const char * const driver_cntr_names[] = {
@@ -1605,25 +1598,23 @@ static const char * const driver_cntr_names[] = {
"DRIVER_EgrHdrFull"
};
-static const char **dev_cntr_names;
-static const char **port_cntr_names;
-static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
+static struct rdma_stat_desc *dev_cntr_descs;
+static struct rdma_stat_desc *port_cntr_descs;
+int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
static int num_dev_cntrs;
static int num_port_cntrs;
-static int cntr_names_initialized;
/*
* Convert a list of names separated by '\n' into an array of NULL terminated
* strings. Optionally some entries can be reserved in the array to hold extra
* external strings.
*/
-static int init_cntr_names(const char *names_in,
- const int names_len,
- int num_extra_names,
- int *num_cntrs,
- const char ***cntr_names)
+static int init_cntr_names(const char *names_in, const size_t names_len,
+ int num_extra_names, int *num_cntrs,
+ struct rdma_stat_desc **cntr_descs)
{
- char *names_out, *p, **q;
+ struct rdma_stat_desc *names_out;
+ char *p;
int i, n;
n = 0;
@@ -1631,90 +1622,83 @@ static int init_cntr_names(const char *names_in,
if (names_in[i] == '\n')
n++;
- names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len,
+ names_out = kzalloc((n + num_extra_names) * sizeof(*names_out)
+ + names_len,
GFP_KERNEL);
if (!names_out) {
*num_cntrs = 0;
- *cntr_names = NULL;
+ *cntr_descs = NULL;
return -ENOMEM;
}
- p = names_out + (n + num_extra_names) * sizeof(char *);
+ p = (char *)&names_out[n + num_extra_names];
memcpy(p, names_in, names_len);
- q = (char **)names_out;
for (i = 0; i < n; i++) {
- q[i] = p;
+ names_out[i].name = p;
p = strchr(p, '\n');
*p++ = '\0';
}
*num_cntrs = n;
- *cntr_names = (const char **)names_out;
+ *cntr_descs = names_out;
return 0;
}
-static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num)
+static struct rdma_hw_stats *hfi1_alloc_hw_device_stats(struct ib_device *ibdev)
{
- int i, err;
-
- if (!cntr_names_initialized) {
+ if (!dev_cntr_descs) {
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ int i, err;
- err = init_cntr_names(dd->cntrnames,
- dd->cntrnameslen,
+ err = init_cntr_names(dd->cntrnames, dd->cntrnameslen,
num_driver_cntrs,
- &num_dev_cntrs,
- &dev_cntr_names);
+ &num_dev_cntrs, &dev_cntr_descs);
if (err)
return NULL;
for (i = 0; i < num_driver_cntrs; i++)
- dev_cntr_names[num_dev_cntrs + i] =
- driver_cntr_names[i];
+ dev_cntr_descs[num_dev_cntrs + i].name =
+ driver_cntr_names[i];
+ }
+ return rdma_alloc_hw_stats_struct(dev_cntr_descs,
+ num_dev_cntrs + num_driver_cntrs,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
- err = init_cntr_names(dd->portcntrnames,
- dd->portcntrnameslen,
+static struct rdma_hw_stats *hfi_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ if (!port_cntr_descs) {
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+ int err;
+
+ err = init_cntr_names(dd->portcntrnames, dd->portcntrnameslen,
0,
- &num_port_cntrs,
- &port_cntr_names);
- if (err) {
- kfree(dev_cntr_names);
- dev_cntr_names = NULL;
+ &num_port_cntrs, &port_cntr_descs);
+ if (err)
return NULL;
- }
- cntr_names_initialized = 1;
}
-
- if (!port_num)
- return rdma_alloc_hw_stats_struct(
- dev_cntr_names,
- num_dev_cntrs + num_driver_cntrs,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
- else
- return rdma_alloc_hw_stats_struct(
- port_cntr_names,
- num_port_cntrs,
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
+ return rdma_alloc_hw_stats_struct(port_cntr_descs, num_port_cntrs,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static u64 hfi1_sps_ints(void)
{
- unsigned long flags;
+ unsigned long index, flags;
struct hfi1_devdata *dd;
u64 sps_ints = 0;
- spin_lock_irqsave(&hfi1_devs_lock, flags);
- list_for_each_entry(dd, &hfi1_dev_list, list) {
+ xa_lock_irqsave(&hfi1_dev_table, flags);
+ xa_for_each(&hfi1_dev_table, index, dd) {
sps_ints += get_all_cpu_total(dd->int_counter);
}
- spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+ xa_unlock_irqrestore(&hfi1_dev_table, flags);
return sps_ints;
}
static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
- u8 port, int index)
+ u32 port, int index)
{
u64 *values;
int count;
@@ -1739,6 +1723,23 @@ static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
return count;
}
+static const struct ib_device_ops hfi1_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_HFI1,
+
+ .alloc_hw_device_stats = hfi1_alloc_hw_device_stats,
+ .alloc_hw_port_stats = hfi_alloc_hw_port_stats,
+ .alloc_rdma_netdev = hfi1_vnic_alloc_rn,
+ .device_group = &ib_hfi1_attr_group,
+ .get_dev_fw_str = hfi1_get_dev_fw_str,
+ .get_hw_stats = get_hw_stats,
+ .modify_device = modify_device,
+ .port_groups = hfi1_attr_port_groups,
+ /* keep process mad in the driver */
+ .process_mad = hfi1_process_mad,
+ .rdma_netdev_get_params = hfi1_ipoib_rn_get_params,
+};
+
/**
* hfi1_register_ib_device - register our device with the infiniband core
* @dd: the device data structure
@@ -1752,14 +1753,13 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
struct hfi1_ibport *ibp = &ppd->ibport_data;
unsigned i;
int ret;
- size_t lcpysz = IB_DEVICE_NAME_MAX;
for (i = 0; i < dd->num_pports; i++)
init_ibport(ppd + i);
/* Only need to initialize non-zero fields. */
- setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
+ timer_setup(&dev->mem_timer, mem_timer, 0);
seqlock_init(&dev->iowait_lock);
seqlock_init(&dev->txwait_lock);
@@ -1780,27 +1780,17 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
*/
if (!ib_hfi1_sys_image_guid)
ib_hfi1_sys_image_guid = ibdev->node_guid;
- lcpysz = strlcpy(ibdev->name, class_name(), lcpysz);
- strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
- ibdev->owner = THIS_MODULE;
ibdev->phys_port_cnt = dd->num_pports;
- ibdev->dma_device = &dd->pcidev->dev;
- ibdev->modify_device = modify_device;
- ibdev->alloc_hw_stats = alloc_hw_stats;
- ibdev->get_hw_stats = get_hw_stats;
+ ibdev->dev.parent = &dd->pcidev->dev;
- /* keep process mad in the driver */
- ibdev->process_mad = hfi1_process_mad;
- ibdev->get_dev_fw_str = hfi1_get_dev_fw_str;
+ ib_set_device_ops(ibdev, &hfi1_dev_ops);
- strncpy(ibdev->node_desc, init_utsname()->nodename,
+ strscpy(ibdev->node_desc, init_utsname()->nodename,
sizeof(ibdev->node_desc));
/*
* Fill in rvt info object.
*/
- dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files;
- dd->verbs_dev.rdi.driver_f.get_card_name = get_card_name;
dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev;
dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah;
dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah;
@@ -1818,21 +1808,22 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.qpn_start = 0;
dd->verbs_dev.rdi.dparms.qpn_inc = 1;
dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift;
- dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16;
- dd->verbs_dev.rdi.dparms.qpn_res_end =
- dd->verbs_dev.rdi.dparms.qpn_res_start + 65535;
+ dd->verbs_dev.rdi.dparms.qpn_res_start = RVT_KDETH_QP_BASE;
+ dd->verbs_dev.rdi.dparms.qpn_res_end = RVT_AIP_QP_MAX;
dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC;
dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK;
dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT;
dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK;
- dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA;
+ dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA |
+ RDMA_CORE_CAP_OPA_AH;
dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE;
dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc;
+ dd->verbs_dev.rdi.driver_f.qp_priv_init = hfi1_qp_priv_init;
dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
- dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send;
+ dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send_from_rvt;
dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send;
dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send;
dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr;
@@ -1845,12 +1836,13 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu;
dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
- dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
+ dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
+ dd->verbs_dev.rdi.driver_f.setup_wqe = hfi1_setup_wqe;
+ dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup =
+ hfi1_comp_vect_mappings_lookup;
/* completeion queue */
- snprintf(dd->verbs_dev.rdi.dparms.cq_name,
- sizeof(dd->verbs_dev.rdi.dparms.cq_name),
- "hfi1_cq%d", dd->unit);
+ dd->verbs_dev.rdi.ibdev.num_comp_vectors = dd->comp_vect_possible_cpus;
dd->verbs_dev.rdi.dparms.node = dd->node;
/* misc settings */
@@ -1858,10 +1850,18 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size;
dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
+ dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
+ dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
+ dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
+ dd->verbs_dev.rdi.dparms.reserved_operations = 1;
+ dd->verbs_dev.rdi.dparms.extra_rdma_atomic = HFI1_TID_RDMA_WRITE_CNT;
/* post send table */
dd->verbs_dev.rdi.post_parms = hfi1_post_parms;
+ /* opcode translation table */
+ dd->verbs_dev.rdi.wc_opcode = ib_hfi1_wc_opcode;
+
ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++)
rvt_init_port(&dd->verbs_dev.rdi,
@@ -1900,17 +1900,18 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
if (!list_empty(&dev->memwait))
dd_dev_err(dd, "memwait list not empty!\n");
- del_timer_sync(&dev->mem_timer);
+ timer_delete_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
- kfree(dev_cntr_names);
- kfree(port_cntr_names);
- cntr_names_initialized = 0;
+ kfree(dev_cntr_descs);
+ kfree(port_cntr_descs);
+ dev_cntr_descs = NULL;
+ port_cntr_descs = NULL;
}
void hfi1_cnp_rcv(struct hfi1_packet *packet)
{
- struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_header *hdr = packet->hdr;
struct rvt_qp *qp = packet->qp;
@@ -1920,12 +1921,12 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
switch (packet->qp->ibqp.qp_type) {
case IB_QPT_UC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_UC;
break;
case IB_QPT_RC:
- rlid = qp->remote_ah_attr.dlid;
+ rlid = rdma_ah_get_dlid(&qp->remote_ah_attr);
rqpn = qp->remote_qpn;
svc_type = IB_CC_SVCTYPE_RC;
break;
@@ -1939,7 +1940,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
return;
}
- sc5 = hdr2sc(hdr, packet->rhf);
+ sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = qp->ibqp.qp_num;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index e6b893010e6d..070e4f0babe8 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*/
#ifndef HFI1_VERBS_H
@@ -71,6 +29,8 @@ struct hfi1_devdata;
struct hfi1_packet;
#include "iowait.h"
+#include "tid_rdma.h"
+#include "opfn.h"
#define HFI1_MAX_RDMA_ATOMIC 16
@@ -95,6 +55,7 @@ struct hfi1_packet;
#define HFI1_VENDOR_IPG cpu_to_be16(0xFFA0)
#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
+#define OPA_BTH_MIG_REQ BIT(31)
#define RC_OP(x) IB_OPCODE_RC_##x
#define UC_OP(x) IB_OPCODE_UC_##x
@@ -104,6 +65,37 @@ enum {
HFI1_HAS_GRH = (1 << 0),
};
+#define LRH_16B_BYTES (sizeof_field(struct hfi1_16b_header, lrh))
+#define LRH_16B_DWORDS (LRH_16B_BYTES / sizeof(u32))
+#define LRH_9B_BYTES (sizeof_field(struct ib_header, lrh))
+#define LRH_9B_DWORDS (LRH_9B_BYTES / sizeof(u32))
+
+/* 24Bits for qpn, upper 8Bits reserved */
+struct opa_16b_mgmt {
+ __be32 dest_qpn;
+ __be32 src_qpn;
+};
+
+struct hfi1_16b_header {
+ u32 lrh[4];
+ union {
+ struct {
+ struct ib_grh grh;
+ struct ib_other_headers oth;
+ } l;
+ struct ib_other_headers oth;
+ struct opa_16b_mgmt mgmt;
+ } u;
+} __packed;
+
+struct hfi1_opa_header {
+ union {
+ struct ib_header ibh; /* 9B header */
+ struct hfi1_16b_header opah; /* 16B header */
+ };
+ u8 hdr_type; /* 9B or 16B */
+} __packed;
+
struct hfi1_ahg_info {
u32 ahgdesc[2];
u16 tx_flags;
@@ -113,7 +105,7 @@ struct hfi1_ahg_info {
struct hfi1_sdma_header {
__le64 pbc;
- struct ib_header hdr;
+ struct hfi1_opa_header hdr;
} __packed;
/*
@@ -124,23 +116,90 @@ struct hfi1_qp_priv {
struct hfi1_ahg_info *s_ahg; /* ahg info for next header */
struct sdma_engine *s_sde; /* current sde */
struct send_context *s_sendcontext; /* current sendcontext */
+ struct hfi1_ctxtdata *rcd; /* QP's receive context */
+ struct page **pages; /* for TID page scan */
+ u32 tid_enqueue; /* saved when tid waited */
u8 s_sc; /* SC[0..4] for next packet */
- u8 r_adefered; /* number of acks defered */
struct iowait s_iowait;
- struct timer_list s_rnr_timer;
+ struct timer_list s_tid_timer; /* for timing tid wait */
+ struct timer_list s_tid_retry_timer; /* for timing tid ack */
+ struct list_head tid_wait; /* for queueing tid space */
+ struct hfi1_opfn_data opfn;
+ struct tid_flow_state flow_state;
+ struct tid_rdma_qp_params tid_rdma;
struct rvt_qp *owner;
+ u16 s_running_pkt_size;
+ u8 hdr_type; /* 9B or 16B */
+ struct rvt_sge_state tid_ss; /* SGE state pointer for 2nd leg */
+ atomic_t n_requests; /* # of TID RDMA requests in the */
+ /* queue */
+ atomic_t n_tid_requests; /* # of sent TID RDMA requests */
+ unsigned long tid_timer_timeout_jiffies;
+ unsigned long tid_retry_timeout_jiffies;
+
+ /* variables for the TID RDMA SE state machine */
+ u8 s_state;
+ u8 s_retry;
+ u8 rnr_nak_state; /* RNR NAK state */
+ u8 s_nak_state;
+ u32 s_nak_psn;
+ u32 s_flags;
+ u32 s_tid_cur;
+ u32 s_tid_head;
+ u32 s_tid_tail;
+ u32 r_tid_head; /* Most recently added TID RDMA request */
+ u32 r_tid_tail; /* the last completed TID RDMA request */
+ u32 r_tid_ack; /* the TID RDMA request to be ACK'ed */
+ u32 r_tid_alloc; /* Request for which we are allocating resources */
+ u32 pending_tid_w_segs; /* Num of pending tid write segments */
+ u32 pending_tid_w_resp; /* Num of pending tid write responses */
+ u32 alloc_w_segs; /* Number of segments for which write */
+ /* resources have been allocated for this QP */
+
+ /* For TID RDMA READ */
+ u32 tid_r_reqs; /* Num of tid reads requested */
+ u32 tid_r_comp; /* Num of tid reads completed */
+ u32 pending_tid_r_segs; /* Num of pending tid read segments */
+ u16 pkts_ps; /* packets per segment */
+ u8 timeout_shift; /* account for number of packets per segment */
+
+ u32 r_next_psn_kdeth;
+ u32 r_next_psn_kdeth_save;
+ u32 s_resync_psn;
+ u8 sync_pt; /* Set when QP reaches sync point */
+ u8 resync;
+};
+
+#define HFI1_QP_WQE_INVALID ((u32)-1)
+
+struct hfi1_swqe_priv {
+ struct tid_rdma_request tid_req;
+ struct rvt_sge_state ss; /* Used for TID RDMA READ Request */
+};
+
+struct hfi1_ack_priv {
+ struct rvt_sge_state ss; /* used for TID WRITE RESP */
+ struct tid_rdma_request tid_req;
};
/*
* This structure is used to hold commonly lookedup and computed values during
* the send engine progress.
*/
+struct iowait_work;
struct hfi1_pkt_state {
struct hfi1_ibdev *dev;
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
struct verbs_txreq *s_txreq;
+ struct iowait_work *wait;
unsigned long flags;
+ unsigned long timeout;
+ unsigned long timeout_int;
+ int cpu;
+ u8 opcode;
+ bool in_thread;
+ bool pkts_sent;
};
#define HFI1_PSN_CREDIT 16
@@ -184,6 +243,7 @@ struct hfi1_ibdev {
struct kmem_cache *verbs_txreq_cache;
u64 n_txwait;
u64 n_kmem_wait;
+ u64 n_tidwait;
/* protect iowait lists */
seqlock_t iowait_lock ____cacheline_aligned_in_smp;
@@ -196,6 +256,9 @@ struct hfi1_ibdev {
struct dentry *hfi1_ibdev_dbg;
/* per HFI symlinks to above */
struct dentry *hfi1_ibdev_link;
+#ifdef CONFIG_FAULT_INJECTION
+ struct fault *fault;
+#endif
#endif
};
@@ -207,7 +270,7 @@ static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
return container_of(rdi, struct hfi1_ibdev, rdi);
}
-static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
+static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
{
struct hfi1_qp_priv *priv;
@@ -216,29 +279,17 @@ static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
}
/*
- * Send if not busy or waiting for I/O and either
- * a RC response is pending or we can process send work requests.
- */
-static inline int hfi1_send_ok(struct rvt_qp *qp)
-{
- return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
- (qp->s_hdrwords || (qp->s_flags & RVT_S_RESP_PENDING) ||
- !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
-}
-
-/*
* This must be called with s_lock held.
*/
-void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
- u32 qp1, u32 qp2, u16 lid1, u16 lid2);
-void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
+void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
+ u32 qp1, u32 qp2, u32 lid1, u32 lid2);
+void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u32 port_num);
void hfi1_sys_guid_chg(struct hfi1_ibport *ibp);
void hfi1_node_desc_chg(struct hfi1_ibport *ibp);
-int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
+int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u32 port,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad, size_t in_mad_size,
- struct ib_mad_hdr *out_mad, size_t *out_mad_size,
- u16 *out_mad_pkey_index);
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
/*
* The PSN_MASK and PSN_SHIFT allow for
@@ -250,25 +301,11 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
* necessarily be at least one bit less than
* the container holding the PSN.
*/
-#ifndef CONFIG_HFI1_VERBS_31BIT_PSN
-#define PSN_MASK 0xFFFFFF
-#define PSN_SHIFT 8
-#else
#define PSN_MASK 0x7FFFFFFF
#define PSN_SHIFT 1
-#endif
#define PSN_MODIFY_MASK 0xFFFFFF
/*
- * Compare the lower 24 bits of the msn values.
- * Returns an integer <, ==, or > than zero.
- */
-static inline int cmp_msn(u32 a, u32 b)
-{
- return (((int)a) - ((int)b)) << 8;
-}
-
-/*
* Compare two PSNs
* Returns an integer <, ==, or > than zero.
*/
@@ -293,16 +330,36 @@ static inline u32 delta_psn(u32 a, u32 b)
return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT;
}
+static inline struct tid_rdma_request *wqe_to_tid_req(struct rvt_swqe *wqe)
+{
+ return &((struct hfi1_swqe_priv *)wqe->priv)->tid_req;
+}
+
+static inline struct tid_rdma_request *ack_to_tid_req(struct rvt_ack_entry *e)
+{
+ return &((struct hfi1_ack_priv *)e->priv)->tid_req;
+}
+
+/*
+ * Look through all the active flows for a TID RDMA request and find
+ * the one (if it exists) that contains the specified PSN.
+ */
+static inline u32 __full_flow_psn(struct flow_state *state, u32 psn)
+{
+ return mask_psn((state->generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
+ (psn & HFI1_KDETH_BTH_SEQ_MASK));
+}
+
+static inline u32 full_flow_psn(struct tid_rdma_flow *flow, u32 psn)
+{
+ return __full_flow_psn(&flow->flow_state, psn);
+}
+
struct verbs_txreq;
void hfi1_put_txreq(struct verbs_txreq *tx);
int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
-void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
- int release, int copy_last);
-
-void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
-
void hfi1_cnp_rcv(struct hfi1_packet *packet);
void hfi1_uc_rcv(struct hfi1_packet *packet);
@@ -311,30 +368,18 @@ void hfi1_rc_rcv(struct hfi1_packet *packet);
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
- struct ib_header *hdr,
- u32 rcv_flags,
+ struct hfi1_packet *packet,
struct rvt_qp *qp);
-u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
-struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid);
-
-void hfi1_rc_rnr_retry(unsigned long arg);
-void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to);
-void hfi1_rc_timeout(unsigned long arg);
-void hfi1_del_timers_sync(struct rvt_qp *qp);
-void hfi1_stop_rc_timers(struct rvt_qp *qp);
-
-void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
-
-void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
+void hfi1_rc_verbs_aborted(struct rvt_qp *qp, struct hfi1_opa_header *opah);
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah);
void hfi1_ud_rcv(struct hfi1_packet *packet);
int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey);
-int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only);
-
void hfi1_migrate_qp(struct rvt_qp *qp);
int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
@@ -342,40 +387,29 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
+void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
+int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ bool *call_send);
-int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
-
-extern const u32 rc_only_opcode;
-extern const u32 uc_only_opcode;
-
-static inline u8 get_opcode(struct ib_header *h)
-{
- u16 lnh = be16_to_cpu(h->lrh[0]) & 3;
-
- if (lnh == IB_LNH_IBA_LOCAL)
- return be32_to_cpu(h->u.oth.bth[0]) >> 24;
- else
- return be32_to_cpu(h->u.l.oth.bth[0]) >> 24;
-}
-
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
- int has_grh, struct rvt_qp *qp, u32 bth0);
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet);
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
- struct ib_global_route *grh, u32 hwords, u32 nwords);
+ const struct ib_global_route *grh, u32 hwords, u32 nwords);
void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
- u32 bth0, u32 bth2, int middle,
+ u32 bth0, u32 bth1, u32 bth2, int middle,
struct hfi1_pkt_state *ps);
+bool hfi1_schedule_send_yield(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
+ bool tid);
+
void _hfi1_do_send(struct work_struct *work);
-void hfi1_do_send(struct rvt_qp *qp);
+void hfi1_do_send_from_rvt(struct rvt_qp *qp);
-void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
- enum ib_wc_status status);
+void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
-void hfi1_send_rc_ack(struct hfi1_ctxtdata *, struct rvt_qp *qp, int is_fecn);
+void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn);
int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
@@ -387,8 +421,14 @@ int hfi1_register_ib_device(struct hfi1_devdata *);
void hfi1_unregister_ib_device(struct hfi1_devdata *);
+void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet);
+
+void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet);
+
void hfi1_ib_rcv(struct hfi1_packet *packet);
+void hfi1_16B_rcv(struct hfi1_packet *packet);
+
unsigned hfi1_get_npkeys(struct hfi1_devdata *);
int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
@@ -397,26 +437,19 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc);
-int hfi1_wss_init(void);
-void hfi1_wss_exit(void);
-
-/* platform specific: return the lowest level cache (llc) size, in KiB */
-static inline int wss_llc_size(void)
+static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr)
{
- /* assume that the boot CPU value is universal for all CPUs */
- return boot_cpu_data.x86_cache_size;
+ return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ);
}
-/* platform specific: cacheless copy */
-static inline void cacheless_memcpy(void *dst, void *src, size_t n)
+void hfi1_wait_kmem(struct rvt_qp *qp);
+
+static inline void hfi1_trdma_send_complete(struct rvt_qp *qp,
+ struct rvt_swqe *wqe,
+ enum ib_wc_status status)
{
- /*
- * Use the only available X64 cacheless copy. Add a __user cast
- * to quiet sparse. The src agument is already in the kernel so
- * there are no security issues. The extra fault recovery machinery
- * is not invoked.
- */
- __copy_user_nocache(dst, (void __user *)src, n, 0);
+ trdma_clean_swqe(qp, wqe);
+ rvt_send_complete(qp, wqe, status);
}
extern const enum ib_wc_opcode ib_hfi1_wc_opcode[];
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index 5d23172c470f..822f0d05bac8 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -1,48 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*/
#include "hfi.h"
@@ -94,13 +52,13 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
struct rvt_qp *qp)
__must_hold(&qp->s_lock)
{
- struct verbs_txreq *tx = ERR_PTR(-EBUSY);
+ struct verbs_txreq *tx = NULL;
write_seqlock(&dev->txwait_lock);
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
struct hfi1_qp_priv *priv;
- tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
+ tx = kmem_cache_alloc(dev->verbs_txreq_cache, VERBS_TXREQ_GFP);
if (tx)
goto out;
priv = qp->priv;
@@ -119,13 +77,6 @@ out:
return tx;
}
-static void verbs_txreq_kmem_cache_ctor(void *obj)
-{
- struct verbs_txreq *tx = (struct verbs_txreq *)obj;
-
- memset(tx, 0, sizeof(*tx));
-}
-
int verbs_txreq_init(struct hfi1_ibdev *dev)
{
char buf[TXREQ_LEN];
@@ -135,7 +86,7 @@ int verbs_txreq_init(struct hfi1_ibdev *dev)
dev->verbs_txreq_cache = kmem_cache_create(buf,
sizeof(struct verbs_txreq),
0, SLAB_HWCACHE_ALIGN,
- verbs_txreq_kmem_cache_ctor);
+ NULL);
if (!dev->verbs_txreq_cache)
return -ENOMEM;
return 0;
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 76216f2ef35a..56353c7676d0 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -1,48 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*/
#ifndef HFI1_VERBS_TXREQ_H
@@ -72,6 +30,7 @@ struct hfi1_ibdev;
struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
struct rvt_qp *qp);
+#define VERBS_TXREQ_GFP (GFP_ATOMIC | __GFP_NOWARN)
static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
struct rvt_qp *qp)
__must_hold(&qp->slock)
@@ -79,38 +38,40 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
struct verbs_txreq *tx;
struct hfi1_qp_priv *priv = qp->priv;
- tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC);
+ tx = kmem_cache_alloc(dev->verbs_txreq_cache, VERBS_TXREQ_GFP);
if (unlikely(!tx)) {
/* call slow path to get the lock */
tx = __get_txreq(dev, qp);
- if (IS_ERR(tx))
+ if (!tx)
return tx;
}
tx->qp = qp;
tx->mr = NULL;
tx->sde = priv->s_sde;
tx->psc = priv->s_sendcontext;
- /* so that we can test if the sdma decriptors are there */
+ /* so that we can test if the sdma descriptors are there */
tx->txreq.num_desc = 0;
+ /* Set the header type */
+ tx->phdr.hdr.hdr_type = priv->hdr_type;
+ tx->txreq.flags = 0;
return tx;
}
-static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx)
-{
- return &tx->txreq;
-}
-
-static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp)
+static inline struct verbs_txreq *get_waiting_verbs_txreq(struct iowait_work *w)
{
struct sdma_txreq *stx;
- struct hfi1_qp_priv *priv = qp->priv;
- stx = iowait_get_txhead(&priv->s_iowait);
+ stx = iowait_get_txhead(w);
if (stx)
return container_of(stx, struct verbs_txreq, txreq);
return NULL;
}
+static inline bool verbs_txreq_queued(struct iowait_work *w)
+{
+ return iowait_packet_queued(w);
+}
+
void hfi1_put_txreq(struct verbs_txreq *tx);
int verbs_txreq_init(struct hfi1_ibdev *dev);
void verbs_txreq_exit(struct hfi1_ibdev *dev);
diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h
new file mode 100644
index 000000000000..bbafeb5fc0ec
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright(c) 2017 - 2020 Intel Corporation.
+ */
+
+#ifndef _HFI1_VNIC_H
+#define _HFI1_VNIC_H
+#include <rdma/opa_vnic.h>
+#include "hfi.h"
+#include "sdma.h"
+
+#define HFI1_VNIC_MAX_TXQ 16
+#define HFI1_VNIC_MAX_PAD 12
+
+/* L4 header definitions */
+#define HFI1_VNIC_L4_HDR_OFFSET OPA_VNIC_L2_HDR_LEN
+
+#define HFI1_VNIC_GET_L4_HDR(data) \
+ (*((u16 *)((u8 *)(data) + HFI1_VNIC_L4_HDR_OFFSET)))
+
+#define HFI1_VNIC_GET_VESWID(data) \
+ (HFI1_VNIC_GET_L4_HDR(data) & 0xFFF)
+
+/* Service class */
+#define HFI1_VNIC_SC_OFFSET_LOW 6
+#define HFI1_VNIC_SC_OFFSET_HI 7
+#define HFI1_VNIC_SC_SHIFT 4
+
+#define HFI1_VNIC_MAX_QUEUE 16
+#define HFI1_NUM_VNIC_CTXT 8
+
+/**
+ * struct hfi1_vnic_sdma - VNIC per Tx ring SDMA information
+ * @dd - device data pointer
+ * @sde - sdma engine
+ * @vinfo - vnic info pointer
+ * @wait - iowait structure
+ * @stx - sdma tx request
+ * @state - vnic Tx ring SDMA state
+ * @q_idx - vnic Tx queue index
+ */
+struct hfi1_vnic_sdma {
+ struct hfi1_devdata *dd;
+ struct sdma_engine *sde;
+ struct hfi1_vnic_vport_info *vinfo;
+ struct iowait wait;
+ struct sdma_txreq stx;
+ unsigned int state;
+ u8 q_idx;
+ bool pkts_sent;
+};
+
+/**
+ * struct hfi1_vnic_rx_queue - HFI1 VNIC receive queue
+ * @idx: queue index
+ * @vinfo: pointer to vport information
+ * @netdev: network device
+ * @napi: netdev napi structure
+ * @skbq: queue of received socket buffers
+ */
+struct hfi1_vnic_rx_queue {
+ u8 idx;
+ struct hfi1_vnic_vport_info *vinfo;
+ struct net_device *netdev;
+ struct napi_struct napi;
+};
+
+/**
+ * struct hfi1_vnic_vport_info - HFI1 VNIC virtual port information
+ * @dd: device data pointer
+ * @netdev: net device pointer
+ * @flags: state flags
+ * @lock: vport lock
+ * @num_tx_q: number of transmit queues
+ * @num_rx_q: number of receive queues
+ * @vesw_id: virtual switch id
+ * @rxq: Array of receive queues
+ * @stats: per queue stats
+ * @sdma: VNIC SDMA structure per TXQ
+ */
+struct hfi1_vnic_vport_info {
+ struct hfi1_devdata *dd;
+ struct net_device *netdev;
+ unsigned long flags;
+
+ /* Lock used around state updates */
+ struct mutex lock;
+
+ u8 num_tx_q;
+ u8 num_rx_q;
+ u16 vesw_id;
+ struct hfi1_vnic_rx_queue rxq[HFI1_NUM_VNIC_CTXT];
+
+ struct opa_vnic_stats stats[HFI1_VNIC_MAX_QUEUE];
+ struct hfi1_vnic_sdma sdma[HFI1_VNIC_MAX_TXQ];
+};
+
+#define v_dbg(format, arg...) \
+ netdev_dbg(vinfo->netdev, format, ## arg)
+#define v_err(format, arg...) \
+ netdev_err(vinfo->netdev, format, ## arg)
+#define v_info(format, arg...) \
+ netdev_info(vinfo->netdev, format, ## arg)
+
+/* vnic hfi1 internal functions */
+void hfi1_vnic_setup(struct hfi1_devdata *dd);
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd);
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd);
+
+void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet);
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo);
+bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx);
+
+/* vnic rdma netdev operations */
+struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
+ u32 port_num,
+ enum rdma_netdev_t type,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *));
+int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen);
+
+#endif /* _HFI1_VNIC_H */
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
new file mode 100644
index 000000000000..16a4c297a897
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -0,0 +1,615 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright(c) 2017 - 2020 Intel Corporation.
+ */
+
+/*
+ * This file contains HFI1 support for VNIC functionality
+ */
+
+#include <linux/io.h>
+#include <linux/if_vlan.h>
+
+#include "vnic.h"
+#include "netdev.h"
+
+#define HFI_TX_TIMEOUT_MS 1000
+
+#define HFI1_VNIC_RCV_Q_SIZE 1024
+
+#define HFI1_VNIC_UP 0
+
+static DEFINE_SPINLOCK(vport_cntr_lock);
+
+#define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
+ u64 *src64, *dst64; \
+ for (src64 = &qstats->x_grp.unicast, \
+ dst64 = &stats->x_grp.unicast; \
+ dst64 <= &stats->x_grp.s_1519_max;) { \
+ *dst64++ += *src64++; \
+ } \
+ } while (0)
+
+#define VNIC_MASK (0xFF)
+#define VNIC_ID(val) ((1ull << 24) | ((val) & VNIC_MASK))
+
+/* hfi1_vnic_update_stats - update statistics */
+static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
+ struct opa_vnic_stats *stats)
+{
+ struct net_device *netdev = vinfo->netdev;
+ u8 i;
+
+ /* add tx counters on different queues */
+ for (i = 0; i < vinfo->num_tx_q; i++) {
+ struct opa_vnic_stats *qstats = &vinfo->stats[i];
+ struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
+
+ stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
+ stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
+ stats->tx_drop_state += qstats->tx_drop_state;
+ stats->tx_dlid_zero += qstats->tx_dlid_zero;
+
+ SUM_GRP_COUNTERS(stats, qstats, tx_grp);
+ stats->netstats.tx_packets += qnstats->tx_packets;
+ stats->netstats.tx_bytes += qnstats->tx_bytes;
+ }
+
+ /* add rx counters on different queues */
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct opa_vnic_stats *qstats = &vinfo->stats[i];
+ struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
+
+ stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
+ stats->netstats.rx_nohandler += qnstats->rx_nohandler;
+ stats->rx_drop_state += qstats->rx_drop_state;
+ stats->rx_oversize += qstats->rx_oversize;
+ stats->rx_runt += qstats->rx_runt;
+
+ SUM_GRP_COUNTERS(stats, qstats, rx_grp);
+ stats->netstats.rx_packets += qnstats->rx_packets;
+ stats->netstats.rx_bytes += qnstats->rx_bytes;
+ }
+
+ stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
+ stats->netstats.tx_carrier_errors +
+ stats->tx_drop_state + stats->tx_dlid_zero;
+ stats->netstats.tx_dropped = stats->netstats.tx_errors;
+
+ stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
+ stats->netstats.rx_nohandler +
+ stats->rx_drop_state + stats->rx_oversize +
+ stats->rx_runt;
+ stats->netstats.rx_dropped = stats->netstats.rx_errors;
+
+ netdev->stats.tx_packets = stats->netstats.tx_packets;
+ netdev->stats.tx_bytes = stats->netstats.tx_bytes;
+ netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
+ netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
+ netdev->stats.tx_errors = stats->netstats.tx_errors;
+ netdev->stats.tx_dropped = stats->netstats.tx_dropped;
+
+ netdev->stats.rx_packets = stats->netstats.rx_packets;
+ netdev->stats.rx_bytes = stats->netstats.rx_bytes;
+ netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
+ netdev->stats.multicast = stats->rx_grp.mcastbcast;
+ netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
+ netdev->stats.rx_errors = stats->netstats.rx_errors;
+ netdev->stats.rx_dropped = stats->netstats.rx_dropped;
+}
+
+/* update_len_counters - update pkt's len histogram counters */
+static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
+ int len)
+{
+ /* account for 4 byte FCS */
+ if (len >= 1515)
+ grp->s_1519_max++;
+ else if (len >= 1020)
+ grp->s_1024_1518++;
+ else if (len >= 508)
+ grp->s_512_1023++;
+ else if (len >= 252)
+ grp->s_256_511++;
+ else if (len >= 124)
+ grp->s_128_255++;
+ else if (len >= 61)
+ grp->s_65_127++;
+ else
+ grp->s_64++;
+}
+
+/* hfi1_vnic_update_tx_counters - update transmit counters */
+static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx, struct sk_buff *skb, int err)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+ struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
+ struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
+ u16 vlan_tci;
+
+ stats->netstats.tx_packets++;
+ stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
+
+ update_len_counters(tx_grp, skb->len);
+
+ /* rest of the counts are for good packets only */
+ if (unlikely(err))
+ return;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ tx_grp->mcastbcast++;
+ else
+ tx_grp->unicast++;
+
+ if (!__vlan_get_tag(skb, &vlan_tci))
+ tx_grp->vlan++;
+ else
+ tx_grp->untagged++;
+}
+
+/* hfi1_vnic_update_rx_counters - update receive counters */
+static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx, struct sk_buff *skb, int err)
+{
+ struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
+ struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
+ struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
+ u16 vlan_tci;
+
+ stats->netstats.rx_packets++;
+ stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
+
+ update_len_counters(rx_grp, skb->len);
+
+ /* rest of the counts are for good packets only */
+ if (unlikely(err))
+ return;
+
+ if (is_multicast_ether_addr(mac_hdr->h_dest))
+ rx_grp->mcastbcast++;
+ else
+ rx_grp->unicast++;
+
+ if (!__vlan_get_tag(skb, &vlan_tci))
+ rx_grp->vlan++;
+ else
+ rx_grp->untagged++;
+}
+
+/* This function is overloaded for opa_vnic specific implementation */
+static void hfi1_vnic_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ hfi1_vnic_update_stats(vinfo, vstats);
+}
+
+static u64 create_bypass_pbc(u32 vl, u32 dw_len)
+{
+ u64 pbc;
+
+ pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
+ | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
+ | PBC_PACKET_BYPASS
+ | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
+ | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
+
+ return pbc;
+}
+
+/* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
+static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx)
+{
+ netif_stop_subqueue(vinfo->netdev, q_idx);
+ if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
+ return;
+
+ netif_start_subqueue(vinfo->netdev, q_idx);
+}
+
+static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ u8 pad_len, q_idx = skb->queue_mapping;
+ struct hfi1_devdata *dd = vinfo->dd;
+ struct opa_vnic_skb_mdata *mdata;
+ u32 pkt_len, total_len;
+ int err = -EINVAL;
+ u64 pbc;
+
+ v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
+ if (unlikely(!netif_oper_up(netdev))) {
+ vinfo->stats[q_idx].tx_drop_state++;
+ goto tx_finish;
+ }
+
+ /* take out meta data */
+ mdata = (struct opa_vnic_skb_mdata *)skb->data;
+ skb_pull(skb, sizeof(*mdata));
+ if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
+ vinfo->stats[q_idx].tx_dlid_zero++;
+ goto tx_finish;
+ }
+
+ /* add tail padding (for 8 bytes size alignment) and icrc */
+ pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
+ pad_len += OPA_VNIC_ICRC_TAIL_LEN;
+
+ /*
+ * pkt_len is how much data we have to write, includes header and data.
+ * total_len is length of the packet in Dwords plus the PBC should not
+ * include the CRC.
+ */
+ pkt_len = (skb->len + pad_len) >> 2;
+ total_len = pkt_len + 2; /* PBC + packet */
+
+ pbc = create_bypass_pbc(mdata->vl, total_len);
+
+ skb_get(skb);
+ v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
+ err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
+ if (unlikely(err)) {
+ if (err == -ENOMEM)
+ vinfo->stats[q_idx].netstats.tx_fifo_errors++;
+ else if (err != -EBUSY)
+ vinfo->stats[q_idx].netstats.tx_carrier_errors++;
+ }
+ /* remove the header before updating tx counters */
+ skb_pull(skb, OPA_VNIC_HDR_LEN);
+
+ if (unlikely(err == -EBUSY)) {
+ hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_BUSY;
+ }
+
+tx_finish:
+ /* update tx counters */
+ hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
+ dev_kfree_skb_any(skb);
+ return NETDEV_TX_OK;
+}
+
+static u16 hfi1_vnic_select_queue(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct net_device *sb_dev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ struct opa_vnic_skb_mdata *mdata;
+ struct sdma_engine *sde;
+
+ mdata = (struct opa_vnic_skb_mdata *)skb->data;
+ sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
+ return sde->this_idx;
+}
+
+/* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
+static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
+ struct sk_buff *skb)
+{
+ struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
+ int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
+ int rc = -EFAULT;
+
+ skb_pull(skb, OPA_VNIC_HDR_LEN);
+
+ /* Validate Packet length */
+ if (unlikely(skb->len > max_len))
+ vinfo->stats[rxq->idx].rx_oversize++;
+ else if (unlikely(skb->len < ETH_ZLEN))
+ vinfo->stats[rxq->idx].rx_runt++;
+ else
+ rc = 0;
+ return rc;
+}
+
+static struct hfi1_vnic_vport_info *get_vnic_port(struct hfi1_devdata *dd,
+ int vesw_id)
+{
+ int vnic_id = VNIC_ID(vesw_id);
+
+ return hfi1_netdev_get_data(dd, vnic_id);
+}
+
+static struct hfi1_vnic_vport_info *get_first_vnic_port(struct hfi1_devdata *dd)
+{
+ struct hfi1_vnic_vport_info *vinfo;
+ int next_id = VNIC_ID(0);
+
+ vinfo = hfi1_netdev_get_first_data(dd, &next_id);
+
+ if (next_id > VNIC_ID(VNIC_MASK))
+ return NULL;
+
+ return vinfo;
+}
+
+void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_devdata *dd = packet->rcd->dd;
+ struct hfi1_vnic_vport_info *vinfo = NULL;
+ struct hfi1_vnic_rx_queue *rxq;
+ struct sk_buff *skb;
+ int l4_type, vesw_id = -1, rc;
+ u8 q_idx;
+ unsigned char *pad_info;
+
+ l4_type = hfi1_16B_get_l4(packet->ebuf);
+ if (likely(l4_type == OPA_16B_L4_ETHR)) {
+ vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
+ vinfo = get_vnic_port(dd, vesw_id);
+
+ /*
+ * In case of invalid vesw id, count the error on
+ * the first available vport.
+ */
+ if (unlikely(!vinfo)) {
+ struct hfi1_vnic_vport_info *vinfo_tmp;
+
+ vinfo_tmp = get_first_vnic_port(dd);
+ if (vinfo_tmp) {
+ spin_lock(&vport_cntr_lock);
+ vinfo_tmp->stats[0].netstats.rx_nohandler++;
+ spin_unlock(&vport_cntr_lock);
+ }
+ }
+ }
+
+ if (unlikely(!vinfo)) {
+ dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
+ l4_type, vesw_id, packet->rcd->ctxt);
+ return;
+ }
+
+ q_idx = packet->rcd->vnic_q_idx;
+ rxq = &vinfo->rxq[q_idx];
+ if (unlikely(!netif_oper_up(vinfo->netdev))) {
+ vinfo->stats[q_idx].rx_drop_state++;
+ return;
+ }
+
+ skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
+ if (unlikely(!skb)) {
+ vinfo->stats[q_idx].netstats.rx_fifo_errors++;
+ return;
+ }
+
+ memcpy(skb->data, packet->ebuf, packet->tlen);
+ skb_put(skb, packet->tlen);
+
+ pad_info = skb->data + skb->len - 1;
+ skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
+ ((*pad_info) & 0x7)));
+
+ rc = hfi1_vnic_decap_skb(rxq, skb);
+
+ /* update rx counters */
+ hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
+ if (unlikely(rc)) {
+ dev_kfree_skb_any(skb);
+ return;
+ }
+
+ skb_checksum_none_assert(skb);
+ skb->protocol = eth_type_trans(skb, rxq->netdev);
+
+ napi_gro_receive(&rxq->napi, skb);
+}
+
+static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ struct net_device *netdev = vinfo->netdev;
+ int rc;
+
+ /* ensure virtual eth switch id is valid */
+ if (!vinfo->vesw_id)
+ return -EINVAL;
+
+ rc = hfi1_netdev_add_data(dd, VNIC_ID(vinfo->vesw_id), vinfo);
+ if (rc < 0)
+ return rc;
+
+ rc = hfi1_netdev_rx_init(dd);
+ if (rc)
+ goto err_remove;
+
+ netif_carrier_on(netdev);
+ netif_tx_start_all_queues(netdev);
+ set_bit(HFI1_VNIC_UP, &vinfo->flags);
+
+ return 0;
+
+err_remove:
+ hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
+ return rc;
+}
+
+static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+
+ clear_bit(HFI1_VNIC_UP, &vinfo->flags);
+ netif_carrier_off(vinfo->netdev);
+ netif_tx_disable(vinfo->netdev);
+ hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
+
+ hfi1_netdev_rx_destroy(dd);
+}
+
+static int hfi1_netdev_open(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ int rc;
+
+ mutex_lock(&vinfo->lock);
+ rc = hfi1_vnic_up(vinfo);
+ mutex_unlock(&vinfo->lock);
+ return rc;
+}
+
+static int hfi1_netdev_close(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ mutex_lock(&vinfo->lock);
+ if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
+ hfi1_vnic_down(vinfo);
+ mutex_unlock(&vinfo->lock);
+ return 0;
+}
+
+static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+ int rc = 0;
+
+ mutex_lock(&hfi1_mutex);
+ if (!dd->vnic_num_vports) {
+ rc = hfi1_vnic_txreq_init(dd);
+ if (rc)
+ goto txreq_fail;
+ }
+
+ rc = hfi1_netdev_rx_init(dd);
+ if (rc) {
+ dd_dev_err(dd, "Unable to initialize netdev contexts\n");
+ goto alloc_fail;
+ }
+
+ hfi1_init_vnic_rsm(dd);
+
+ dd->vnic_num_vports++;
+ hfi1_vnic_sdma_init(vinfo);
+
+alloc_fail:
+ if (!dd->vnic_num_vports)
+ hfi1_vnic_txreq_deinit(dd);
+txreq_fail:
+ mutex_unlock(&hfi1_mutex);
+ return rc;
+}
+
+static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
+{
+ struct hfi1_devdata *dd = vinfo->dd;
+
+ mutex_lock(&hfi1_mutex);
+ if (--dd->vnic_num_vports == 0) {
+ hfi1_deinit_vnic_rsm(dd);
+ hfi1_vnic_txreq_deinit(dd);
+ }
+ mutex_unlock(&hfi1_mutex);
+ hfi1_netdev_rx_destroy(dd);
+}
+
+static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+ bool reopen = false;
+
+ /*
+ * If vesw_id is being changed, and if the vnic port is up,
+ * reset the vnic port to ensure new vesw_id gets picked up
+ */
+ if (id != vinfo->vesw_id) {
+ mutex_lock(&vinfo->lock);
+ if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
+ hfi1_vnic_down(vinfo);
+ reopen = true;
+ }
+
+ vinfo->vesw_id = id;
+ if (reopen)
+ hfi1_vnic_up(vinfo);
+
+ mutex_unlock(&vinfo->lock);
+ }
+}
+
+/* netdev ops */
+static const struct net_device_ops hfi1_netdev_ops = {
+ .ndo_open = hfi1_netdev_open,
+ .ndo_stop = hfi1_netdev_close,
+ .ndo_start_xmit = hfi1_netdev_start_xmit,
+ .ndo_select_queue = hfi1_vnic_select_queue,
+ .ndo_get_stats64 = hfi1_vnic_get_stats64,
+};
+
+static void hfi1_vnic_free_rn(struct net_device *netdev)
+{
+ struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
+
+ hfi1_vnic_deinit(vinfo);
+ mutex_destroy(&vinfo->lock);
+ free_netdev(netdev);
+}
+
+struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
+ u32 port_num,
+ enum rdma_netdev_t type,
+ const char *name,
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *))
+{
+ struct hfi1_devdata *dd = dd_from_ibdev(device);
+ struct hfi1_vnic_vport_info *vinfo;
+ struct net_device *netdev;
+ struct rdma_netdev *rn;
+ int i, size, rc;
+
+ if (!dd->num_netdev_contexts)
+ return ERR_PTR(-ENOMEM);
+
+ if (!port_num || (port_num > dd->num_pports))
+ return ERR_PTR(-EINVAL);
+
+ if (type != RDMA_NETDEV_OPA_VNIC)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
+ netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
+ chip_sdma_engines(dd),
+ dd->num_netdev_contexts);
+ if (!netdev)
+ return ERR_PTR(-ENOMEM);
+
+ rn = netdev_priv(netdev);
+ vinfo = opa_vnic_dev_priv(netdev);
+ vinfo->dd = dd;
+ vinfo->num_tx_q = chip_sdma_engines(dd);
+ vinfo->num_rx_q = dd->num_netdev_contexts;
+ vinfo->netdev = netdev;
+ rn->free_rdma_netdev = hfi1_vnic_free_rn;
+ rn->set_id = hfi1_vnic_set_vesw_id;
+
+ netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
+ netdev->hw_features = netdev->features;
+ netdev->vlan_features = netdev->features;
+ netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
+ netdev->netdev_ops = &hfi1_netdev_ops;
+ mutex_init(&vinfo->lock);
+
+ for (i = 0; i < vinfo->num_rx_q; i++) {
+ struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
+
+ rxq->idx = i;
+ rxq->vinfo = vinfo;
+ rxq->netdev = netdev;
+ }
+
+ rc = hfi1_vnic_init(vinfo);
+ if (rc)
+ goto init_fail;
+
+ return netdev;
+init_fail:
+ mutex_destroy(&vinfo->lock);
+ free_netdev(netdev);
+ return ERR_PTR(rc);
+}
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
new file mode 100644
index 000000000000..6caf01ba0bca
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright(c) 2017 - 2018 Intel Corporation.
+ */
+
+/*
+ * This file contains HFI1 support for VNIC SDMA functionality
+ */
+
+#include "sdma.h"
+#include "vnic.h"
+
+#define HFI1_VNIC_SDMA_Q_ACTIVE BIT(0)
+#define HFI1_VNIC_SDMA_Q_DEFERRED BIT(1)
+
+#define HFI1_VNIC_TXREQ_NAME_LEN 32
+#define HFI1_VNIC_SDMA_DESC_WTRMRK 64
+
+/*
+ * struct vnic_txreq - VNIC transmit descriptor
+ * @txreq: sdma transmit request
+ * @sdma: vnic sdma pointer
+ * @skb: skb to send
+ * @pad: pad buffer
+ * @plen: pad length
+ * @pbc_val: pbc value
+ */
+struct vnic_txreq {
+ struct sdma_txreq txreq;
+ struct hfi1_vnic_sdma *sdma;
+
+ struct sk_buff *skb;
+ unsigned char pad[HFI1_VNIC_MAX_PAD];
+ u16 plen;
+ __le64 pbc_val;
+};
+
+static void vnic_sdma_complete(struct sdma_txreq *txreq,
+ int status)
+{
+ struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
+ struct hfi1_vnic_sdma *vnic_sdma = tx->sdma;
+
+ sdma_txclean(vnic_sdma->dd, txreq);
+ dev_kfree_skb_any(tx->skb);
+ kmem_cache_free(vnic_sdma->dd->vnic.txreq_cache, tx);
+}
+
+static noinline int build_vnic_ulp_payload(struct sdma_engine *sde,
+ struct vnic_txreq *tx)
+{
+ int i, ret = 0;
+
+ ret = sdma_txadd_kvaddr(
+ sde->dd,
+ &tx->txreq,
+ tx->skb->data,
+ skb_headlen(tx->skb));
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ for (i = 0; i < skb_shinfo(tx->skb)->nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(tx->skb)->frags[i];
+
+ /* combine physically continuous fragments later? */
+ ret = sdma_txadd_page(sde->dd,
+ &tx->txreq,
+ skb_frag_page(frag),
+ skb_frag_off(frag),
+ skb_frag_size(frag),
+ NULL, NULL, NULL);
+ if (unlikely(ret))
+ goto bail_txadd;
+ }
+
+ if (tx->plen)
+ ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq,
+ tx->pad + HFI1_VNIC_MAX_PAD - tx->plen,
+ tx->plen);
+
+bail_txadd:
+ return ret;
+}
+
+static int build_vnic_tx_desc(struct sdma_engine *sde,
+ struct vnic_txreq *tx,
+ u64 pbc)
+{
+ int ret = 0;
+ u16 hdrbytes = 2 << 2; /* PBC */
+
+ ret = sdma_txinit_ahg(
+ &tx->txreq,
+ 0,
+ hdrbytes + tx->skb->len + tx->plen,
+ 0,
+ 0,
+ NULL,
+ 0,
+ vnic_sdma_complete);
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ /* add pbc */
+ tx->pbc_val = cpu_to_le64(pbc);
+ ret = sdma_txadd_kvaddr(
+ sde->dd,
+ &tx->txreq,
+ &tx->pbc_val,
+ hdrbytes);
+ if (unlikely(ret))
+ goto bail_txadd;
+
+ /* add the ulp payload */
+ ret = build_vnic_ulp_payload(sde, tx);
+bail_txadd:
+ return ret;
+}
+
+/* setup the last plen bypes of pad */
+static inline void hfi1_vnic_update_pad(unsigned char *pad, u8 plen)
+{
+ pad[HFI1_VNIC_MAX_PAD - 1] = plen - OPA_VNIC_ICRC_TAIL_LEN;
+}
+
+int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
+ struct hfi1_vnic_vport_info *vinfo,
+ struct sk_buff *skb, u64 pbc, u8 plen)
+{
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx];
+ struct sdma_engine *sde = vnic_sdma->sde;
+ struct vnic_txreq *tx;
+ int ret = -ECOMM;
+
+ if (unlikely(READ_ONCE(vnic_sdma->state) != HFI1_VNIC_SDMA_Q_ACTIVE))
+ goto tx_err;
+
+ if (unlikely(!sde || !sdma_running(sde)))
+ goto tx_err;
+
+ tx = kmem_cache_alloc(dd->vnic.txreq_cache, GFP_ATOMIC);
+ if (unlikely(!tx)) {
+ ret = -ENOMEM;
+ goto tx_err;
+ }
+
+ tx->sdma = vnic_sdma;
+ tx->skb = skb;
+ hfi1_vnic_update_pad(tx->pad, plen);
+ tx->plen = plen;
+ ret = build_vnic_tx_desc(sde, tx, pbc);
+ if (unlikely(ret))
+ goto free_desc;
+
+ ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait),
+ &tx->txreq, vnic_sdma->pkts_sent);
+ /* When -ECOMM, sdma callback will be called with ABORT status */
+ if (unlikely(ret && unlikely(ret != -ECOMM)))
+ goto free_desc;
+
+ if (!ret) {
+ vnic_sdma->pkts_sent = true;
+ iowait_starve_clear(vnic_sdma->pkts_sent, &vnic_sdma->wait);
+ }
+ return ret;
+
+free_desc:
+ sdma_txclean(dd, &tx->txreq);
+ kmem_cache_free(dd->vnic.txreq_cache, tx);
+tx_err:
+ if (ret != -EBUSY)
+ dev_kfree_skb_any(skb);
+ else
+ vnic_sdma->pkts_sent = false;
+ return ret;
+}
+
+/*
+ * hfi1_vnic_sdma_sleep - vnic sdma sleep function
+ *
+ * This function gets called from sdma_send_txreq() when there are not enough
+ * sdma descriptors available to send the packet. It adds Tx queue's wait
+ * structure to sdma engine's dmawait list to be woken up when descriptors
+ * become available.
+ */
+static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
+ struct iowait_work *wait,
+ struct sdma_txreq *txreq,
+ uint seq,
+ bool pkts_sent)
+{
+ struct hfi1_vnic_sdma *vnic_sdma =
+ container_of(wait->iow, struct hfi1_vnic_sdma, wait);
+
+ write_seqlock(&sde->waitlock);
+ if (sdma_progress(sde, seq, txreq)) {
+ write_sequnlock(&sde->waitlock);
+ return -EAGAIN;
+ }
+
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
+ if (list_empty(&vnic_sdma->wait.list)) {
+ iowait_get_priority(wait->iow);
+ iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
+ }
+ write_sequnlock(&sde->waitlock);
+ return -EBUSY;
+}
+
+/*
+ * hfi1_vnic_sdma_wakeup - vnic sdma wakeup function
+ *
+ * This function gets called when SDMA descriptors becomes available and Tx
+ * queue's wait structure was previously added to sdma engine's dmawait list.
+ * It notifies the upper driver about Tx queue wakeup.
+ */
+static void hfi1_vnic_sdma_wakeup(struct iowait *wait, int reason)
+{
+ struct hfi1_vnic_sdma *vnic_sdma =
+ container_of(wait, struct hfi1_vnic_sdma, wait);
+ struct hfi1_vnic_vport_info *vinfo = vnic_sdma->vinfo;
+
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE;
+ if (__netif_subqueue_stopped(vinfo->netdev, vnic_sdma->q_idx))
+ netif_wake_subqueue(vinfo->netdev, vnic_sdma->q_idx);
+};
+
+inline bool hfi1_vnic_sdma_write_avail(struct hfi1_vnic_vport_info *vinfo,
+ u8 q_idx)
+{
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[q_idx];
+
+ return (READ_ONCE(vnic_sdma->state) == HFI1_VNIC_SDMA_Q_ACTIVE);
+}
+
+void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
+{
+ int i;
+
+ for (i = 0; i < vinfo->num_tx_q; i++) {
+ struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i];
+
+ iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
+ hfi1_vnic_sdma_sleep,
+ hfi1_vnic_sdma_wakeup, NULL, NULL);
+ vnic_sdma->sde = &vinfo->dd->per_sdma[i];
+ vnic_sdma->dd = vinfo->dd;
+ vnic_sdma->vinfo = vinfo;
+ vnic_sdma->q_idx = i;
+ vnic_sdma->state = HFI1_VNIC_SDMA_Q_ACTIVE;
+
+ /* Add a free descriptor watermark for wakeups */
+ if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) {
+ struct iowait_work *work;
+
+ INIT_LIST_HEAD(&vnic_sdma->stx.list);
+ vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK;
+ work = iowait_get_ib_work(&vnic_sdma->wait);
+ list_add_tail(&vnic_sdma->stx.list, &work->tx_head);
+ }
+ }
+}
+
+int hfi1_vnic_txreq_init(struct hfi1_devdata *dd)
+{
+ char buf[HFI1_VNIC_TXREQ_NAME_LEN];
+
+ snprintf(buf, sizeof(buf), "hfi1_%u_vnic_txreq_cache", dd->unit);
+ dd->vnic.txreq_cache = kmem_cache_create(buf,
+ sizeof(struct vnic_txreq),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!dd->vnic.txreq_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void hfi1_vnic_txreq_deinit(struct hfi1_devdata *dd)
+{
+ kmem_cache_destroy(dd->vnic.txreq_cache);
+ dd->vnic.txreq_cache = NULL;
+}
diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig
index e1a6e055cd60..44cdb706fe27 100644
--- a/drivers/infiniband/hw/hns/Kconfig
+++ b/drivers/infiniband/hw/hns/Kconfig
@@ -1,10 +1,11 @@
-config INFINIBAND_HNS
- tristate "HNS RoCE Driver"
- depends on NET_VENDOR_HISILICON
- depends on ARM64 && HNS && HNS_DSAF && HNS_ENET
- ---help---
- This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine
- is used in Hisilicon Hi1610 and more further ICT SoC.
+# SPDX-License-Identifier: GPL-2.0-only
+config INFINIBAND_HNS_HIP08
+ tristate "Hisilicon Hip08 Family RoCE support"
+ depends on ARM64 || (COMPILE_TEST && 64BIT)
+ depends on PCI && HNS3
+ help
+ RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC.
+ The RoCE engine is a PCI device.
- To compile this driver as a module, choose M here: the module
- will be called hns-roce.
+ To compile this driver, choose M here. This module will be called
+ hns-roce-hw-v2.
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index 7e8ebd24dcae..d07ef02c5231 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -1,8 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Makefile for the Hisilicon RoCE drivers.
#
-obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o
-hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_eq.o hns_roce_pd.o \
+ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
+ccflags-y += -I $(srctree)/drivers/net/ethernet/hisilicon/hns3/hns3pf
+ccflags-y += -I $(srctree)/drivers/net/ethernet/hisilicon/hns3/hns3_common
+ccflags-y += -I $(src)
+
+hns-roce-hw-v2-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
- hns_roce_cq.o hns_roce_alloc.o hns_roce_hw_v1.o
+ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \
+ hns_roce_debugfs.o hns_roce_hw_v2.o hns_roce_bond.o
+
+obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 0ac294db3b29..0c1c32d23c88 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -30,100 +30,112 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include "hns_roce_device.h"
+#include "hns_roce_hw_v2.h"
-#define HNS_ROCE_PORT_NUM_SHIFT 24
-#define HNS_ROCE_VLAN_SL_BIT_MASK 7
-#define HNS_ROCE_VLAN_SL_SHIFT 13
+static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr)
+{
+ u32 fl = ah_attr->grh.flow_label;
+ u16 sport;
+
+ if (!fl)
+ sport = get_random_u32_inclusive(IB_ROCE_UDP_ENCAP_VALID_PORT_MIN,
+ IB_ROCE_UDP_ENCAP_VALID_PORT_MAX);
+ else
+ sport = rdma_flow_label_to_udp_sport(fl);
-struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr,
- struct ib_udata *udata)
+ return sport;
+}
+
+int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device);
- struct device *dev = &hr_dev->pdev->dev;
- struct ib_gid_attr gid_attr;
- struct hns_roce_ah *ah;
- u16 vlan_tag = 0xffff;
- struct in6_addr in6;
- union ib_gid sgid;
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device);
+ struct hns_roce_ib_create_ah_resp resp = {};
+ struct hns_roce_ah *ah = to_hr_ah(ibah);
+ u8 tclass = get_tclass(grh);
+ u8 priority = 0;
+ u8 tc_mode = 0;
int ret;
- ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
- if (!ah)
- return ERR_PTR(-ENOMEM);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata) {
+ ret = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ ah->av.port = rdma_ah_get_port_num(ah_attr);
+ ah->av.gid_index = grh->sgid_index;
+
+ if (rdma_ah_get_static_rate(ah_attr))
+ ah->av.stat_rate = IB_RATE_10_GBPS;
+
+ ah->av.hop_limit = grh->hop_limit;
+ ah->av.flowlabel = grh->flow_label;
+ ah->av.udp_sport = get_ah_udp_sport(ah_attr);
+ ah->av.tclass = tclass;
+
+ ret = hr_dev->hw->get_dscp(hr_dev, tclass, &tc_mode, &priority);
+ if (ret == -EOPNOTSUPP)
+ ret = 0;
- /* Get mac address */
- memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(ah_attr->grh.dgid.raw));
- if (rdma_is_multicast_addr(&in6))
- rdma_get_mcast_mac(&in6, ah->av.mac);
+ if (ret && grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ goto err_out;
+
+ if (tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ ah->av.sl = priority;
else
- memcpy(ah->av.mac, ah_attr->dmac, sizeof(ah_attr->dmac));
-
- /* Get source gid */
- ret = ib_get_cached_gid(ibpd->device, ah_attr->port_num,
- ah_attr->grh.sgid_index, &sgid, &gid_attr);
- if (ret) {
- dev_err(dev, "get sgid failed! ret = %d\n", ret);
- kfree(ah);
- return ERR_PTR(ret);
- }
+ ah->av.sl = rdma_ah_get_sl(ah_attr);
- if (gid_attr.ndev) {
- if (is_vlan_dev(gid_attr.ndev))
- vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
- dev_put(gid_attr.ndev);
+ if (!check_sl_valid(hr_dev, ah->av.sl)) {
+ ret = -EINVAL;
+ goto err_out;
}
- if (vlan_tag < 0x1000)
- vlan_tag |= (ah_attr->sl & HNS_ROCE_VLAN_SL_BIT_MASK) <<
- HNS_ROCE_VLAN_SL_SHIFT;
+ memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE);
+ memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
- ah->av.port_pd = cpu_to_be32(to_hr_pd(ibpd)->pdn | (ah_attr->port_num <<
- HNS_ROCE_PORT_NUM_SHIFT));
- ah->av.gid_index = ah_attr->grh.sgid_index;
- ah->av.vlan = cpu_to_le16(vlan_tag);
- dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index,
- ah->av.vlan);
+ /* HIP08 needs to record vlan info in Address Vector */
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr,
+ &ah->av.vlan_id, NULL);
+ if (ret)
+ goto err_out;
- if (ah_attr->static_rate)
- ah->av.stat_rate = IB_RATE_10_GBPS;
+ ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID;
+ }
+
+ if (udata) {
+ resp.priority = ah->av.sl;
+ resp.tc_mode = tc_mode;
+ memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN);
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ }
- memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, HNS_ROCE_GID_SIZE);
- ah->av.sl_tclass_flowlabel = cpu_to_le32(ah_attr->sl <<
- HNS_ROCE_SL_SHIFT);
+err_out:
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AH_CREATE_ERR_CNT]);
- return &ah->ibah;
+ return ret;
}
-int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct hns_roce_ah *ah = to_hr_ah(ibah);
memset(ah_attr, 0, sizeof(*ah_attr));
- ah_attr->sl = le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
- HNS_ROCE_SL_SHIFT;
- ah_attr->port_num = le32_to_cpu(ah->av.port_pd) >>
- HNS_ROCE_PORT_NUM_SHIFT;
- ah_attr->static_rate = ah->av.stat_rate;
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.traffic_class = le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
- HNS_ROCE_TCLASS_SHIFT;
- ah_attr->grh.flow_label = le32_to_cpu(ah->av.sl_tclass_flowlabel) &
- HNS_ROCE_FLOW_LABLE_MASK;
- ah_attr->grh.hop_limit = ah->av.hop_limit;
- ah_attr->grh.sgid_index = ah->av.gid_index;
- memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, HNS_ROCE_GID_SIZE);
-
- return 0;
-}
-
-int hns_roce_destroy_ah(struct ib_ah *ah)
-{
- kfree(to_hr_ah(ah));
+ rdma_ah_set_sl(ah_attr, ah->av.sl);
+ rdma_ah_set_port_num(ah_attr, ah->av.port);
+ rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate);
+ rdma_ah_set_grh(ah_attr, NULL, ah->av.flowlabel,
+ ah->av.gid_index, ah->av.hop_limit, ah->av.tclass);
+ rdma_ah_set_dgid_raw(ah_attr, ah->av.dgid);
return 0;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 605962f2828c..6ee911f6885b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -31,230 +31,157 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
+#include <linux/vmalloc.h>
+#include <rdma/ib_umem.h>
#include "hns_roce_device.h"
-int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj)
+void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf)
{
- int ret = 0;
+ struct hns_roce_buf_list *trunks;
+ u32 i;
- spin_lock(&bitmap->lock);
- *obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last);
- if (*obj >= bitmap->max) {
- bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
- & bitmap->mask;
- *obj = find_first_zero_bit(bitmap->table, bitmap->max);
- }
+ if (!buf)
+ return;
- if (*obj < bitmap->max) {
- set_bit(*obj, bitmap->table);
- bitmap->last = (*obj + 1);
- if (bitmap->last == bitmap->max)
- bitmap->last = 0;
- *obj |= bitmap->top;
- } else {
- ret = -1;
- }
+ trunks = buf->trunk_list;
+ if (trunks) {
+ buf->trunk_list = NULL;
+ for (i = 0; i < buf->ntrunks; i++)
+ dma_free_coherent(hr_dev->dev, 1 << buf->trunk_shift,
+ trunks[i].buf, trunks[i].map);
- spin_unlock(&bitmap->lock);
+ kfree(trunks);
+ }
- return ret;
+ kfree(buf);
}
-void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
- int rr)
-{
- hns_roce_bitmap_free_range(bitmap, obj, 1, rr);
-}
-
-int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
- int align, unsigned long *obj)
+/*
+ * Allocate the dma buffer for storing ROCEE table entries
+ *
+ * @size: required size
+ * @page_shift: the unit size in a continuous dma address range
+ * @flags: HNS_ROCE_BUF_ flags to control the allocation flow.
+ */
+struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
+ u32 page_shift, u32 flags)
{
- int ret = 0;
- int i;
-
- if (likely(cnt == 1 && align == 1))
- return hns_roce_bitmap_alloc(bitmap, obj);
-
- spin_lock(&bitmap->lock);
-
- *obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max,
- bitmap->last, cnt, align - 1);
- if (*obj >= bitmap->max) {
- bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
- & bitmap->mask;
- *obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max, 0,
- cnt, align - 1);
- }
-
- if (*obj < bitmap->max) {
- for (i = 0; i < cnt; i++)
- set_bit(*obj + i, bitmap->table);
-
- if (*obj == bitmap->last) {
- bitmap->last = (*obj + cnt);
- if (bitmap->last >= bitmap->max)
- bitmap->last = 0;
- }
- *obj |= bitmap->top;
+ u32 trunk_size, page_size, alloced_size;
+ struct hns_roce_buf_list *trunks;
+ struct hns_roce_buf *buf;
+ gfp_t gfp_flags;
+ u32 ntrunk, i;
+
+ /* The minimum shift of the page accessed by hw is HNS_HW_PAGE_SHIFT */
+ if (WARN_ON(page_shift < HNS_HW_PAGE_SHIFT))
+ return ERR_PTR(-EINVAL);
+
+ gfp_flags = (flags & HNS_ROCE_BUF_NOSLEEP) ? GFP_ATOMIC : GFP_KERNEL;
+ buf = kzalloc(sizeof(*buf), gfp_flags);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+
+ buf->page_shift = page_shift;
+ page_size = 1 << buf->page_shift;
+
+ /* Calc the trunk size and num by required size and page_shift */
+ if (flags & HNS_ROCE_BUF_DIRECT) {
+ buf->trunk_shift = order_base_2(ALIGN(size, PAGE_SIZE));
+ ntrunk = 1;
} else {
- ret = -1;
+ buf->trunk_shift = order_base_2(ALIGN(page_size, PAGE_SIZE));
+ ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift);
}
- spin_unlock(&bitmap->lock);
-
- return ret;
-}
-
-void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
- unsigned long obj, int cnt,
- int rr)
-{
- int i;
-
- obj &= bitmap->max + bitmap->reserved_top - 1;
-
- spin_lock(&bitmap->lock);
- for (i = 0; i < cnt; i++)
- clear_bit(obj + i, bitmap->table);
+ trunks = kcalloc(ntrunk, sizeof(*trunks), gfp_flags);
+ if (!trunks) {
+ kfree(buf);
+ return ERR_PTR(-ENOMEM);
+ }
- if (!rr)
- bitmap->last = min(bitmap->last, obj);
- bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
- & bitmap->mask;
- spin_unlock(&bitmap->lock);
-}
+ trunk_size = 1 << buf->trunk_shift;
+ alloced_size = 0;
+ for (i = 0; i < ntrunk; i++) {
+ trunks[i].buf = dma_alloc_coherent(hr_dev->dev, trunk_size,
+ &trunks[i].map, gfp_flags);
+ if (!trunks[i].buf)
+ break;
-int hns_roce_bitmap_init(struct hns_roce_bitmap *bitmap, u32 num, u32 mask,
- u32 reserved_bot, u32 reserved_top)
-{
- u32 i;
+ alloced_size += trunk_size;
+ }
- if (num != roundup_pow_of_two(num))
- return -EINVAL;
+ buf->ntrunks = i;
- bitmap->last = 0;
- bitmap->top = 0;
- bitmap->max = num - reserved_top;
- bitmap->mask = mask;
- bitmap->reserved_top = reserved_top;
- spin_lock_init(&bitmap->lock);
- bitmap->table = kcalloc(BITS_TO_LONGS(bitmap->max), sizeof(long),
- GFP_KERNEL);
- if (!bitmap->table)
- return -ENOMEM;
+ /* In nofail mode, it's only failed when the alloced size is 0 */
+ if ((flags & HNS_ROCE_BUF_NOFAIL) ? i == 0 : i != ntrunk) {
+ for (i = 0; i < buf->ntrunks; i++)
+ dma_free_coherent(hr_dev->dev, trunk_size,
+ trunks[i].buf, trunks[i].map);
- for (i = 0; i < reserved_bot; ++i)
- set_bit(i, bitmap->table);
+ kfree(trunks);
+ kfree(buf);
+ return ERR_PTR(-ENOMEM);
+ }
- return 0;
-}
+ buf->npages = DIV_ROUND_UP(alloced_size, page_size);
+ buf->trunk_list = trunks;
-void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap)
-{
- kfree(bitmap->table);
+ return buf;
}
-void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
- struct hns_roce_buf *buf)
+int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
+ int buf_cnt, struct hns_roce_buf *buf,
+ unsigned int page_shift)
{
+ unsigned int offset, max_size;
+ int total = 0;
int i;
- struct device *dev = &hr_dev->pdev->dev;
- u32 bits_per_long = BITS_PER_LONG;
- if (buf->nbufs == 1) {
- dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map);
- } else {
- if (bits_per_long == 64)
- vunmap(buf->direct.buf);
+ if (page_shift > buf->trunk_shift) {
+ dev_err(hr_dev->dev, "failed to check kmem buf shift %u > %u\n",
+ page_shift, buf->trunk_shift);
+ return -EINVAL;
+ }
- for (i = 0; i < buf->nbufs; ++i)
- if (buf->page_list[i].buf)
- dma_free_coherent(&hr_dev->pdev->dev, PAGE_SIZE,
- buf->page_list[i].buf,
- buf->page_list[i].map);
- kfree(buf->page_list);
+ offset = 0;
+ max_size = buf->ntrunks << buf->trunk_shift;
+ for (i = 0; i < buf_cnt && offset < max_size; i++) {
+ bufs[total++] = hns_roce_buf_dma_addr(buf, offset);
+ offset += (1 << page_shift);
}
+
+ return total;
}
-int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
- struct hns_roce_buf *buf)
+int hns_roce_get_umem_bufs(dma_addr_t *bufs, int buf_cnt, struct ib_umem *umem,
+ unsigned int page_shift)
{
- int i = 0;
- dma_addr_t t;
- struct page **pages;
- struct device *dev = &hr_dev->pdev->dev;
- u32 bits_per_long = BITS_PER_LONG;
-
- /* SQ/RQ buf lease than one page, SQ + RQ = 8K */
- if (size <= max_direct) {
- buf->nbufs = 1;
- /* Npages calculated by page_size */
- buf->npages = 1 << get_order(size);
- buf->page_shift = PAGE_SHIFT;
- /* MTT PA must be recorded in 4k alignment, t is 4k aligned */
- buf->direct.buf = dma_alloc_coherent(dev, size, &t, GFP_KERNEL);
- if (!buf->direct.buf)
- return -ENOMEM;
-
- buf->direct.map = t;
-
- while (t & ((1 << buf->page_shift) - 1)) {
- --buf->page_shift;
- buf->npages *= 2;
- }
-
- memset(buf->direct.buf, 0, size);
- } else {
- buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
- buf->npages = buf->nbufs;
- buf->page_shift = PAGE_SHIFT;
- buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
- GFP_KERNEL);
-
- if (!buf->page_list)
- return -ENOMEM;
-
- for (i = 0; i < buf->nbufs; ++i) {
- buf->page_list[i].buf = dma_alloc_coherent(dev,
- PAGE_SIZE, &t,
- GFP_KERNEL);
-
- if (!buf->page_list[i].buf)
- goto err_free;
-
- buf->page_list[i].map = t;
- memset(buf->page_list[i].buf, 0, PAGE_SIZE);
- }
- if (bits_per_long == 64) {
- pages = kmalloc_array(buf->nbufs, sizeof(*pages),
- GFP_KERNEL);
- if (!pages)
- goto err_free;
-
- for (i = 0; i < buf->nbufs; ++i)
- pages[i] = virt_to_page(buf->page_list[i].buf);
-
- buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP,
- PAGE_KERNEL);
- kfree(pages);
- if (!buf->direct.buf)
- goto err_free;
- }
+ struct ib_block_iter biter;
+ int total = 0;
+
+ /* convert system page cnt to hw page cnt */
+ rdma_umem_for_each_dma_block(umem, &biter, 1 << page_shift) {
+ bufs[total++] = rdma_block_iter_dma_address(&biter);
+ if (total >= buf_cnt)
+ goto done;
}
- return 0;
-
-err_free:
- hns_roce_buf_free(hr_dev, size, buf);
- return -ENOMEM;
+done:
+ return total;
}
void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
{
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+ ida_destroy(&hr_dev->xrcd_ida.ida);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ ida_destroy(&hr_dev->srq_table.srq_ida.ida);
+ xa_destroy(&hr_dev->srq_table.xa);
+ }
hns_roce_cleanup_qp_table(hr_dev);
hns_roce_cleanup_cq_table(hr_dev);
- hns_roce_cleanup_mr_table(hr_dev);
- hns_roce_cleanup_pd_table(hr_dev);
- hns_roce_cleanup_uar_table(hr_dev);
+ ida_destroy(&hr_dev->mr_table.mtpt_ida.ida);
+ ida_destroy(&hr_dev->pd_ida.ida);
+ ida_destroy(&hr_dev->uar_ida.ida);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c
new file mode 100644
index 000000000000..cc85f3ce1f3e
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_bond.c
@@ -0,0 +1,1012 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2025 Hisilicon Limited.
+ */
+
+#include <net/lag.h>
+#include <net/bonding.h>
+#include "hns_roce_device.h"
+#include "hns_roce_hw_v2.h"
+#include "hns_roce_bond.h"
+
+static DEFINE_XARRAY(roce_bond_xa);
+
+static struct hns_roce_dev *hns_roce_get_hrdev_by_netdev(struct net_device *net_dev)
+{
+ struct ib_device *ibdev =
+ ib_device_get_by_netdev(net_dev, RDMA_DRIVER_HNS);
+
+ if (!ibdev)
+ return NULL;
+
+ return container_of(ibdev, struct hns_roce_dev, ib_dev);
+}
+
+static struct net_device *get_upper_dev_from_ndev(struct net_device *net_dev)
+{
+ struct net_device *upper_dev;
+
+ rcu_read_lock();
+ upper_dev = netdev_master_upper_dev_get_rcu(net_dev);
+ dev_hold(upper_dev);
+ rcu_read_unlock();
+
+ return upper_dev;
+}
+
+static int get_netdev_bond_slave_id(struct net_device *net_dev,
+ struct hns_roce_bond_group *bond_grp)
+{
+ int i;
+
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++)
+ if (net_dev == bond_grp->bond_func_info[i].net_dev)
+ return i;
+
+ return -ENOENT;
+}
+
+struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev,
+ u8 bus_num)
+{
+ struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num);
+ struct hns_roce_bond_group *bond_grp;
+ struct net_device *upper_dev = NULL;
+ int i;
+
+ if (!die_info)
+ return NULL;
+
+ for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
+ bond_grp = die_info->bgrps[i];
+ if (!bond_grp)
+ continue;
+ if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0)
+ return bond_grp;
+ if (bond_grp->upper_dev) {
+ upper_dev = get_upper_dev_from_ndev(net_dev);
+ if (bond_grp->upper_dev == upper_dev) {
+ dev_put(upper_dev);
+ return bond_grp;
+ }
+ dev_put(upper_dev);
+ }
+ }
+
+ return NULL;
+}
+
+static int hns_roce_set_bond_netdev(struct hns_roce_bond_group *bond_grp,
+ struct hns_roce_dev *hr_dev)
+{
+ struct net_device *active_dev;
+ struct net_device *old_dev;
+ int i, ret = 0;
+
+ if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+ rcu_read_lock();
+ active_dev =
+ bond_option_active_slave_get_rcu(netdev_priv(bond_grp->upper_dev));
+ rcu_read_unlock();
+ } else {
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
+ active_dev = bond_grp->bond_func_info[i].net_dev;
+ if (active_dev &&
+ ib_get_curr_port_state(active_dev) == IB_PORT_ACTIVE)
+ break;
+ }
+ }
+
+ if (!active_dev || i == ROCE_BOND_FUNC_MAX)
+ active_dev = get_hr_netdev(hr_dev, 0);
+
+ old_dev = ib_device_get_netdev(&hr_dev->ib_dev, 1);
+ if (old_dev == active_dev)
+ goto out;
+
+ ret = ib_device_set_netdev(&hr_dev->ib_dev, active_dev, 1);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to set netdev for bond.\n");
+ goto out;
+ }
+
+ if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+ if (old_dev)
+ roce_del_all_netdev_gids(&hr_dev->ib_dev, 1, old_dev);
+ rdma_roce_rescan_port(&hr_dev->ib_dev, 1);
+ }
+out:
+ dev_put(old_dev);
+ return ret;
+}
+
+bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev)
+{
+ struct net_device *net_dev = get_hr_netdev(hr_dev, 0);
+ struct hns_roce_bond_group *bond_grp;
+ u8 bus_num = get_hr_bus_num(hr_dev);
+
+ bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);
+ if (bond_grp && bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED &&
+ bond_grp->bond_state != HNS_ROCE_BOND_NOT_ATTACHED)
+ return true;
+
+ return false;
+}
+
+static void hns_roce_bond_get_active_slave(struct hns_roce_bond_group *bond_grp)
+{
+ struct net_device *net_dev;
+ u32 active_slave_map = 0;
+ u8 active_slave_num = 0;
+ bool active;
+ u8 i;
+
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
+ net_dev = bond_grp->bond_func_info[i].net_dev;
+ if (!net_dev || !(bond_grp->slave_map & (1U << i)))
+ continue;
+
+ active = (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ?
+ net_lag_port_dev_txable(net_dev) :
+ (ib_get_curr_port_state(net_dev) == IB_PORT_ACTIVE);
+ if (active) {
+ active_slave_num++;
+ active_slave_map |= (1U << i);
+ }
+ }
+
+ bond_grp->active_slave_num = active_slave_num;
+ bond_grp->active_slave_map = active_slave_map;
+}
+
+static int hns_roce_recover_bond(struct hns_roce_bond_group *bond_grp,
+ struct hns_roce_dev *hr_dev)
+{
+ bond_grp->main_hr_dev = hr_dev;
+ hns_roce_bond_get_active_slave(bond_grp);
+
+ return hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND);
+}
+
+static void hns_roce_slave_uninit(struct hns_roce_bond_group *bond_grp,
+ u8 func_idx)
+{
+ struct hnae3_handle *handle;
+
+ handle = bond_grp->bond_func_info[func_idx].handle;
+ if (handle->priv)
+ hns_roce_bond_uninit_client(bond_grp, func_idx);
+}
+
+static struct hns_roce_dev
+ *hns_roce_slave_init(struct hns_roce_bond_group *bond_grp,
+ u8 func_idx, bool need_switch);
+
+static int switch_main_dev(struct hns_roce_bond_group *bond_grp,
+ u8 main_func_idx)
+{
+ struct hns_roce_dev *hr_dev;
+ struct net_device *net_dev;
+ u8 i;
+
+ bond_grp->main_hr_dev = NULL;
+ hns_roce_bond_uninit_client(bond_grp, main_func_idx);
+
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
+ net_dev = bond_grp->bond_func_info[i].net_dev;
+ if ((bond_grp->slave_map & (1U << i)) && net_dev) {
+ /* In case this slave is still being registered as
+ * a non-bonded PF, uninit it first and then re-init
+ * it as the main device.
+ */
+ hns_roce_slave_uninit(bond_grp, i);
+ hr_dev = hns_roce_slave_init(bond_grp, i, false);
+ if (hr_dev) {
+ bond_grp->main_hr_dev = hr_dev;
+ break;
+ }
+ }
+ }
+
+ if (!bond_grp->main_hr_dev)
+ return -ENODEV;
+
+ return 0;
+}
+
+static struct hns_roce_dev
+ *hns_roce_slave_init(struct hns_roce_bond_group *bond_grp,
+ u8 func_idx, bool need_switch)
+{
+ struct hns_roce_dev *hr_dev = NULL;
+ struct hnae3_handle *handle;
+ u8 main_func_idx;
+ int ret;
+
+ if (need_switch) {
+ main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn);
+ if (func_idx == main_func_idx) {
+ ret = switch_main_dev(bond_grp, main_func_idx);
+ if (ret == -ENODEV)
+ return NULL;
+ }
+ }
+
+ handle = bond_grp->bond_func_info[func_idx].handle;
+ if (handle) {
+ if (handle->priv)
+ return handle->priv;
+ /* Prevent this device from being initialized as a bond device */
+ if (need_switch)
+ bond_grp->bond_func_info[func_idx].net_dev = NULL;
+ hr_dev = hns_roce_bond_init_client(bond_grp, func_idx);
+ if (!hr_dev)
+ BOND_ERR_LOG("failed to init slave %u.\n", func_idx);
+ }
+
+ return hr_dev;
+}
+
+static struct hns_roce_die_info *alloc_die_info(int bus_num)
+{
+ struct hns_roce_die_info *die_info;
+ int ret;
+
+ die_info = kzalloc(sizeof(*die_info), GFP_KERNEL);
+ if (!die_info)
+ return NULL;
+
+ ret = xa_err(xa_store(&roce_bond_xa, bus_num, die_info, GFP_KERNEL));
+ if (ret) {
+ kfree(die_info);
+ return NULL;
+ }
+
+ mutex_init(&die_info->die_mutex);
+
+ return die_info;
+}
+
+static void dealloc_die_info(struct hns_roce_die_info *die_info, u8 bus_num)
+{
+ mutex_destroy(&die_info->die_mutex);
+ xa_erase(&roce_bond_xa, bus_num);
+ kfree(die_info);
+}
+
+static int alloc_bond_id(struct hns_roce_bond_group *bond_grp)
+{
+ u8 bus_num = bond_grp->bus_num;
+ struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num);
+ int i;
+
+ if (!die_info) {
+ die_info = alloc_die_info(bus_num);
+ if (!die_info)
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
+ if (die_info->bond_id_mask & BOND_ID(i))
+ continue;
+
+ die_info->bond_id_mask |= BOND_ID(i);
+ die_info->bgrps[i] = bond_grp;
+ bond_grp->bond_id = i;
+
+ return 0;
+ }
+
+ return -ENOSPC;
+}
+
+static int remove_bond_id(int bus_num, u8 bond_id)
+{
+ struct hns_roce_die_info *die_info = xa_load(&roce_bond_xa, bus_num);
+
+ if (bond_id >= ROCE_BOND_NUM_MAX)
+ return -EINVAL;
+
+ if (!die_info)
+ return -ENODEV;
+
+ die_info->bond_id_mask &= ~BOND_ID(bond_id);
+ die_info->bgrps[bond_id] = NULL;
+ if (!die_info->bond_id_mask)
+ dealloc_die_info(die_info, bus_num);
+
+ return 0;
+}
+
+static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp)
+{
+ struct hns_roce_dev *hr_dev;
+ int ret;
+ int i;
+
+ for (i = ROCE_BOND_FUNC_MAX - 1; i >= 0; i--) {
+ if (bond_grp->slave_map & (1 << i))
+ hns_roce_slave_uninit(bond_grp, i);
+ }
+
+ mutex_lock(&bond_grp->bond_mutex);
+ bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED;
+ mutex_unlock(&bond_grp->bond_mutex);
+ bond_grp->main_hr_dev = NULL;
+
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
+ if (bond_grp->slave_map & (1 << i)) {
+ hr_dev = hns_roce_slave_init(bond_grp, i, false);
+ if (hr_dev) {
+ bond_grp->main_hr_dev = hr_dev;
+ break;
+ }
+ }
+ }
+
+ if (!bond_grp->main_hr_dev) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ hns_roce_bond_get_active_slave(bond_grp);
+
+ ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND);
+
+out:
+ if (ret) {
+ BOND_ERR_LOG("failed to set RoCE bond, ret = %d.\n", ret);
+ hns_roce_cleanup_bond(bond_grp);
+ } else {
+ ibdev_info(&bond_grp->main_hr_dev->ib_dev,
+ "RoCE set bond finished!\n");
+ }
+}
+
+static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp)
+{
+ u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn);
+ struct hns_roce_dev *hr_dev;
+ u8 i;
+
+ if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_BONDED)
+ goto out;
+
+ bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED;
+ bond_grp->main_hr_dev = NULL;
+
+ hns_roce_slave_uninit(bond_grp, main_func_idx);
+
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
+ hr_dev = hns_roce_slave_init(bond_grp, i, false);
+ if (hr_dev)
+ bond_grp->main_hr_dev = hr_dev;
+ }
+
+out:
+ hns_roce_cleanup_bond(bond_grp);
+}
+
+static void hns_roce_slave_changestate(struct hns_roce_bond_group *bond_grp)
+{
+ int ret;
+
+ hns_roce_bond_get_active_slave(bond_grp);
+
+ ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND);
+
+ mutex_lock(&bond_grp->bond_mutex);
+ if (bond_grp->bond_state == HNS_ROCE_BOND_SLAVE_CHANGESTATE)
+ bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED;
+ mutex_unlock(&bond_grp->bond_mutex);
+
+ if (ret)
+ ibdev_err(&bond_grp->main_hr_dev->ib_dev,
+ "failed to change RoCE bond slave state, ret = %d.\n",
+ ret);
+ else
+ ibdev_info(&bond_grp->main_hr_dev->ib_dev,
+ "RoCE slave changestate finished!\n");
+}
+
+static void hns_roce_slave_change_num(struct hns_roce_bond_group *bond_grp)
+{
+ int ret;
+ u8 i;
+
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
+ if (bond_grp->slave_map & (1U << i)) {
+ if (i == PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn))
+ continue;
+ hns_roce_slave_uninit(bond_grp, i);
+ } else {
+ hns_roce_slave_init(bond_grp, i, true);
+ if (!bond_grp->main_hr_dev) {
+ ret = -ENODEV;
+ goto out;
+ }
+ bond_grp->bond_func_info[i].net_dev = NULL;
+ bond_grp->bond_func_info[i].handle = NULL;
+ }
+ }
+
+ hns_roce_bond_get_active_slave(bond_grp);
+
+ ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND);
+
+out:
+ if (ret) {
+ BOND_ERR_LOG("failed to change RoCE bond slave num, ret = %d.\n", ret);
+ hns_roce_cleanup_bond(bond_grp);
+ } else {
+ mutex_lock(&bond_grp->bond_mutex);
+ if (bond_grp->bond_state == HNS_ROCE_BOND_SLAVE_CHANGE_NUM)
+ bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED;
+ mutex_unlock(&bond_grp->bond_mutex);
+ ibdev_info(&bond_grp->main_hr_dev->ib_dev,
+ "RoCE slave change num finished!\n");
+ }
+}
+
+static void hns_roce_bond_info_update_nolock(struct hns_roce_bond_group *bond_grp,
+ struct net_device *upper_dev)
+{
+ struct hns_roce_v2_priv *priv;
+ struct hns_roce_dev *hr_dev;
+ struct net_device *net_dev;
+ int func_idx;
+
+ bond_grp->slave_map = 0;
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(upper_dev, net_dev) {
+ func_idx = get_netdev_bond_slave_id(net_dev, bond_grp);
+ if (func_idx < 0) {
+ hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
+ if (!hr_dev)
+ continue;
+ func_idx = PCI_FUNC(hr_dev->pci_dev->devfn);
+ if (!bond_grp->bond_func_info[func_idx].net_dev) {
+ priv = hr_dev->priv;
+ bond_grp->bond_func_info[func_idx].net_dev =
+ net_dev;
+ bond_grp->bond_func_info[func_idx].handle =
+ priv->handle;
+ }
+ ib_device_put(&hr_dev->ib_dev);
+ }
+
+ bond_grp->slave_map |= (1 << func_idx);
+ }
+ rcu_read_unlock();
+}
+
+static bool is_dev_bond_supported(struct hns_roce_bond_group *bond_grp,
+ struct net_device *net_dev)
+{
+ struct hns_roce_dev *hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
+ bool ret = true;
+
+ if (!hr_dev) {
+ if (bond_grp &&
+ get_netdev_bond_slave_id(net_dev, bond_grp) >= 0)
+ return true;
+ else
+ return false;
+ }
+
+ if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND)) {
+ ret = false;
+ goto out;
+ }
+
+ if (hr_dev->is_vf || pci_num_vf(hr_dev->pci_dev) > 0) {
+ ret = false;
+ goto out;
+ }
+
+ if (bond_grp->bus_num != get_hr_bus_num(hr_dev))
+ ret = false;
+
+out:
+ ib_device_put(&hr_dev->ib_dev);
+ return ret;
+}
+
+static bool check_slave_support(struct hns_roce_bond_group *bond_grp,
+ struct net_device *upper_dev)
+{
+ struct net_device *net_dev;
+ u8 slave_num = 0;
+
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(upper_dev, net_dev) {
+ if (is_dev_bond_supported(bond_grp, net_dev)) {
+ slave_num++;
+ continue;
+ }
+ rcu_read_unlock();
+ return false;
+ }
+ rcu_read_unlock();
+
+ return (slave_num > 1 && slave_num <= ROCE_BOND_FUNC_MAX);
+}
+
+static void hns_roce_bond_work(struct work_struct *work)
+{
+ struct delayed_work *delayed_work = to_delayed_work(work);
+ struct hns_roce_bond_group *bond_grp =
+ container_of(delayed_work, struct hns_roce_bond_group,
+ bond_work);
+ enum hns_roce_bond_state bond_state;
+ bool bond_ready;
+
+ mutex_lock(&bond_grp->bond_mutex);
+ bond_ready = check_slave_support(bond_grp, bond_grp->upper_dev);
+ hns_roce_bond_info_update_nolock(bond_grp, bond_grp->upper_dev);
+ bond_state = bond_grp->bond_state;
+ bond_grp->bond_ready = bond_ready;
+ mutex_unlock(&bond_grp->bond_mutex);
+
+ ibdev_info(&bond_grp->main_hr_dev->ib_dev,
+ "bond work: bond_ready - %d, bond_state - %d.\n",
+ bond_ready, bond_state);
+
+ if (!bond_ready) {
+ hns_roce_clear_bond(bond_grp);
+ return;
+ }
+
+ switch (bond_state) {
+ case HNS_ROCE_BOND_NOT_BONDED:
+ hns_roce_set_bond(bond_grp);
+ /* In set_bond flow, we don't need to set bond netdev here as
+ * it has been done when bond_grp->main_hr_dev is registered.
+ */
+ return;
+ case HNS_ROCE_BOND_SLAVE_CHANGESTATE:
+ hns_roce_slave_changestate(bond_grp);
+ break;
+ case HNS_ROCE_BOND_SLAVE_CHANGE_NUM:
+ hns_roce_slave_change_num(bond_grp);
+ break;
+ default:
+ return;
+ }
+ hns_roce_set_bond_netdev(bond_grp, bond_grp->main_hr_dev);
+}
+
+static void hns_roce_attach_bond_grp(struct hns_roce_bond_group *bond_grp,
+ struct hns_roce_dev *hr_dev,
+ struct net_device *upper_dev)
+{
+ bond_grp->upper_dev = upper_dev;
+ bond_grp->main_hr_dev = hr_dev;
+ bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED;
+ bond_grp->bond_ready = false;
+}
+
+static void hns_roce_detach_bond_grp(struct hns_roce_bond_group *bond_grp)
+{
+ mutex_lock(&bond_grp->bond_mutex);
+
+ cancel_delayed_work(&bond_grp->bond_work);
+ bond_grp->upper_dev = NULL;
+ bond_grp->main_hr_dev = NULL;
+ bond_grp->bond_ready = false;
+ bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED;
+ bond_grp->slave_map = 0;
+ memset(bond_grp->bond_func_info, 0, sizeof(bond_grp->bond_func_info));
+
+ mutex_unlock(&bond_grp->bond_mutex);
+}
+
+void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp)
+{
+ int ret;
+
+ ret = bond_grp->main_hr_dev ?
+ hns_roce_cmd_bond(bond_grp, HNS_ROCE_CLEAR_BOND) : -EIO;
+ if (ret)
+ BOND_ERR_LOG("failed to clear RoCE bond, ret = %d.\n", ret);
+ else
+ ibdev_info(&bond_grp->main_hr_dev->ib_dev,
+ "RoCE clear bond finished!\n");
+
+ hns_roce_detach_bond_grp(bond_grp);
+}
+
+static bool lowerstate_event_filter(struct hns_roce_bond_group *bond_grp,
+ struct net_device *net_dev)
+{
+ struct hns_roce_bond_group *bond_grp_tmp;
+
+ bond_grp_tmp = hns_roce_get_bond_grp(net_dev, bond_grp->bus_num);
+ return bond_grp_tmp == bond_grp;
+}
+
+static void lowerstate_event_setting(struct hns_roce_bond_group *bond_grp,
+ struct netdev_notifier_changelowerstate_info *info)
+{
+ mutex_lock(&bond_grp->bond_mutex);
+
+ if (bond_grp->bond_ready &&
+ bond_grp->bond_state == HNS_ROCE_BOND_IS_BONDED)
+ bond_grp->bond_state = HNS_ROCE_BOND_SLAVE_CHANGESTATE;
+
+ mutex_unlock(&bond_grp->bond_mutex);
+}
+
+static bool hns_roce_bond_lowerstate_event(struct hns_roce_bond_group *bond_grp,
+ struct netdev_notifier_changelowerstate_info *info)
+{
+ struct net_device *net_dev =
+ netdev_notifier_info_to_dev((struct netdev_notifier_info *)info);
+
+ if (!netif_is_lag_port(net_dev))
+ return false;
+
+ if (!lowerstate_event_filter(bond_grp, net_dev))
+ return false;
+
+ lowerstate_event_setting(bond_grp, info);
+
+ return true;
+}
+
+static bool is_bond_setting_supported(struct netdev_lag_upper_info *bond_info)
+{
+ if (!bond_info)
+ return false;
+
+ if (bond_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
+ bond_info->tx_type != NETDEV_LAG_TX_TYPE_HASH)
+ return false;
+
+ if (bond_info->tx_type == NETDEV_LAG_TX_TYPE_HASH &&
+ bond_info->hash_type > NETDEV_LAG_HASH_L23)
+ return false;
+
+ return true;
+}
+
+static void upper_event_setting(struct hns_roce_bond_group *bond_grp,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct netdev_lag_upper_info *bond_upper_info = NULL;
+ bool slave_inc = info->linking;
+
+ if (slave_inc)
+ bond_upper_info = info->upper_info;
+
+ if (bond_upper_info) {
+ bond_grp->tx_type = bond_upper_info->tx_type;
+ bond_grp->hash_type = bond_upper_info->hash_type;
+ }
+}
+
+static bool check_unlinking_bond_support(struct hns_roce_bond_group *bond_grp)
+{
+ struct net_device *net_dev;
+ u8 slave_num = 0;
+
+ rcu_read_lock();
+ for_each_netdev_in_bond_rcu(bond_grp->upper_dev, net_dev) {
+ if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0)
+ slave_num++;
+ }
+ rcu_read_unlock();
+
+ return (slave_num > 1);
+}
+
+static bool check_linking_bond_support(struct netdev_lag_upper_info *bond_info,
+ struct hns_roce_bond_group *bond_grp,
+ struct net_device *upper_dev)
+{
+ if (!is_bond_setting_supported(bond_info))
+ return false;
+
+ return check_slave_support(bond_grp, upper_dev);
+}
+
+static enum bond_support_type
+ check_bond_support(struct hns_roce_bond_group *bond_grp,
+ struct net_device *upper_dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ bool bond_grp_exist = false;
+ bool support;
+
+ if (upper_dev == bond_grp->upper_dev)
+ bond_grp_exist = true;
+
+ if (!info->linking && !bond_grp_exist)
+ return BOND_NOT_SUPPORT;
+
+ if (info->linking)
+ support = check_linking_bond_support(info->upper_info, bond_grp,
+ upper_dev);
+ else
+ support = check_unlinking_bond_support(bond_grp);
+
+ if (support)
+ return BOND_SUPPORT;
+
+ return bond_grp_exist ? BOND_EXISTING_NOT_SUPPORT : BOND_NOT_SUPPORT;
+}
+
+static bool upper_event_filter(struct netdev_notifier_changeupper_info *info,
+ struct hns_roce_bond_group *bond_grp,
+ struct net_device *net_dev)
+{
+ struct net_device *upper_dev = info->upper_dev;
+ struct hns_roce_bond_group *bond_grp_tmp;
+ struct hns_roce_dev *hr_dev;
+ bool ret = true;
+ u8 bus_num;
+
+ if (!info->linking ||
+ bond_grp->bond_state != HNS_ROCE_BOND_NOT_ATTACHED)
+ return bond_grp->upper_dev == upper_dev;
+
+ hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
+ if (!hr_dev)
+ return false;
+
+ bus_num = get_hr_bus_num(hr_dev);
+ if (bond_grp->bus_num != bus_num) {
+ ret = false;
+ goto out;
+ }
+
+ bond_grp_tmp = hns_roce_get_bond_grp(net_dev, bus_num);
+ if (bond_grp_tmp && bond_grp_tmp != bond_grp)
+ ret = false;
+out:
+ ib_device_put(&hr_dev->ib_dev);
+ return ret;
+}
+
+static bool hns_roce_bond_upper_event(struct hns_roce_bond_group *bond_grp,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *net_dev =
+ netdev_notifier_info_to_dev((struct netdev_notifier_info *)info);
+ struct net_device *upper_dev = info->upper_dev;
+ enum bond_support_type support = BOND_SUPPORT;
+ struct hns_roce_dev *hr_dev;
+ int slave_id;
+
+ if (!upper_dev || !netif_is_lag_master(upper_dev))
+ return false;
+
+ if (!upper_event_filter(info, bond_grp, net_dev))
+ return false;
+
+ mutex_lock(&bond_grp->bond_mutex);
+ support = check_bond_support(bond_grp, upper_dev, info);
+ if (support == BOND_NOT_SUPPORT) {
+ mutex_unlock(&bond_grp->bond_mutex);
+ return false;
+ }
+
+ if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_ATTACHED) {
+ hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
+ if (!hr_dev) {
+ mutex_unlock(&bond_grp->bond_mutex);
+ return false;
+ }
+ hns_roce_attach_bond_grp(bond_grp, hr_dev, upper_dev);
+ ib_device_put(&hr_dev->ib_dev);
+ }
+
+ /* In the case of netdev being unregistered, the roce
+ * instance shouldn't be inited.
+ */
+ if (net_dev->reg_state >= NETREG_UNREGISTERING) {
+ slave_id = get_netdev_bond_slave_id(net_dev, bond_grp);
+ if (slave_id >= 0) {
+ bond_grp->bond_func_info[slave_id].net_dev = NULL;
+ bond_grp->bond_func_info[slave_id].handle = NULL;
+ }
+ }
+
+ if (support == BOND_SUPPORT) {
+ bond_grp->bond_ready = true;
+ if (bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED)
+ bond_grp->bond_state = HNS_ROCE_BOND_SLAVE_CHANGE_NUM;
+ }
+ mutex_unlock(&bond_grp->bond_mutex);
+ if (support == BOND_SUPPORT)
+ upper_event_setting(bond_grp, info);
+
+ return true;
+}
+
+static int hns_roce_bond_event(struct notifier_block *self,
+ unsigned long event, void *ptr)
+{
+ struct hns_roce_bond_group *bond_grp =
+ container_of(self, struct hns_roce_bond_group, bond_nb);
+ bool changed = false;
+
+ if (event == NETDEV_CHANGEUPPER)
+ changed = hns_roce_bond_upper_event(bond_grp, ptr);
+ if (event == NETDEV_CHANGELOWERSTATE)
+ changed = hns_roce_bond_lowerstate_event(bond_grp, ptr);
+
+ if (changed)
+ schedule_delayed_work(&bond_grp->bond_work, HZ);
+
+ return NOTIFY_DONE;
+}
+
+int hns_roce_alloc_bond_grp(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_bond_group *bgrps[ROCE_BOND_NUM_MAX];
+ struct hns_roce_bond_group *bond_grp;
+ u8 bus_num = get_hr_bus_num(hr_dev);
+ int ret;
+ int i;
+
+ if (xa_load(&roce_bond_xa, bus_num))
+ return 0;
+
+ for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
+ bond_grp = kvzalloc(sizeof(*bond_grp), GFP_KERNEL);
+ if (!bond_grp) {
+ ret = -ENOMEM;
+ goto mem_err;
+ }
+
+ mutex_init(&bond_grp->bond_mutex);
+ INIT_DELAYED_WORK(&bond_grp->bond_work, hns_roce_bond_work);
+
+ bond_grp->bond_ready = false;
+ bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED;
+ bond_grp->bus_num = bus_num;
+
+ ret = alloc_bond_id(bond_grp);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to alloc bond ID, ret = %d.\n", ret);
+ goto alloc_id_err;
+ }
+
+ bond_grp->bond_nb.notifier_call = hns_roce_bond_event;
+ ret = register_netdevice_notifier(&bond_grp->bond_nb);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to register bond nb, ret = %d.\n", ret);
+ goto register_nb_err;
+ }
+ bgrps[i] = bond_grp;
+ }
+
+ return 0;
+
+register_nb_err:
+ remove_bond_id(bond_grp->bus_num, bond_grp->bond_id);
+alloc_id_err:
+ mutex_destroy(&bond_grp->bond_mutex);
+ kvfree(bond_grp);
+mem_err:
+ for (i--; i >= 0; i--) {
+ unregister_netdevice_notifier(&bgrps[i]->bond_nb);
+ cancel_delayed_work_sync(&bgrps[i]->bond_work);
+ remove_bond_id(bgrps[i]->bus_num, bgrps[i]->bond_id);
+ mutex_destroy(&bgrps[i]->bond_mutex);
+ kvfree(bgrps[i]);
+ }
+ return ret;
+}
+
+void hns_roce_dealloc_bond_grp(void)
+{
+ struct hns_roce_bond_group *bond_grp;
+ struct hns_roce_die_info *die_info;
+ unsigned long id;
+ int i;
+
+ xa_for_each(&roce_bond_xa, id, die_info) {
+ for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
+ bond_grp = die_info->bgrps[i];
+ if (!bond_grp)
+ continue;
+ unregister_netdevice_notifier(&bond_grp->bond_nb);
+ cancel_delayed_work_sync(&bond_grp->bond_work);
+ remove_bond_id(bond_grp->bus_num, bond_grp->bond_id);
+ mutex_destroy(&bond_grp->bond_mutex);
+ kvfree(bond_grp);
+ }
+ }
+}
+
+int hns_roce_bond_init(struct hns_roce_dev *hr_dev)
+{
+ struct net_device *net_dev = get_hr_netdev(hr_dev, 0);
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_bond_group *bond_grp;
+ u8 bus_num = get_hr_bus_num(hr_dev);
+ int ret;
+
+ bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);
+
+ if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT) {
+ ret = hns_roce_recover_bond(bond_grp, hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to recover RoCE bond, ret = %d.\n", ret);
+ return ret;
+ }
+ }
+
+ return hns_roce_set_bond_netdev(bond_grp, hr_dev);
+}
+
+void hns_roce_bond_suspend(struct hnae3_handle *handle)
+{
+ u8 bus_num = handle->pdev->bus->number;
+ struct hns_roce_bond_group *bond_grp;
+ struct hns_roce_die_info *die_info;
+ int i;
+
+ die_info = xa_load(&roce_bond_xa, bus_num);
+ if (!die_info)
+ return;
+
+ mutex_lock(&die_info->die_mutex);
+
+ /*
+ * Avoid duplicated processing when calling this function
+ * multiple times.
+ */
+ if (die_info->suspend_cnt)
+ goto out;
+
+ for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
+ bond_grp = die_info->bgrps[i];
+ if (!bond_grp)
+ continue;
+ unregister_netdevice_notifier(&bond_grp->bond_nb);
+ cancel_delayed_work_sync(&bond_grp->bond_work);
+ }
+
+out:
+ die_info->suspend_cnt++;
+ mutex_unlock(&die_info->die_mutex);
+}
+
+void hns_roce_bond_resume(struct hnae3_handle *handle)
+{
+ u8 bus_num = handle->pdev->bus->number;
+ struct hns_roce_bond_group *bond_grp;
+ struct hns_roce_die_info *die_info;
+ int i, ret;
+
+ die_info = xa_load(&roce_bond_xa, bus_num);
+ if (!die_info)
+ return;
+
+ mutex_lock(&die_info->die_mutex);
+
+ die_info->suspend_cnt--;
+ if (die_info->suspend_cnt)
+ goto out;
+
+ for (i = 0; i < ROCE_BOND_NUM_MAX; i++) {
+ bond_grp = die_info->bgrps[i];
+ if (!bond_grp)
+ continue;
+ ret = register_netdevice_notifier(&bond_grp->bond_nb);
+ if (ret)
+ dev_err(&handle->pdev->dev,
+ "failed to resume bond notifier(bus_num = %u, id = %u), ret = %d.\n",
+ bus_num, bond_grp->bond_id, ret);
+ }
+
+out:
+ mutex_unlock(&die_info->die_mutex);
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.h b/drivers/infiniband/hw/hns/hns_roce_bond.h
new file mode 100644
index 000000000000..98c295d78ca1
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_bond.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2025 Hisilicon Limited.
+ */
+
+#ifndef _HNS_ROCE_BOND_H
+#define _HNS_ROCE_BOND_H
+
+#include <linux/netdevice.h>
+#include <net/bonding.h>
+
+#define ROCE_BOND_FUNC_MAX 4
+#define ROCE_BOND_NUM_MAX 2
+
+#define BOND_ID(id) BIT(id)
+
+#define BOND_ERR_LOG(fmt, ...) \
+ pr_err("HNS RoCE Bonding: " fmt, ##__VA_ARGS__)
+
+enum {
+ BOND_MODE_1,
+ BOND_MODE_2_4,
+};
+
+enum hns_roce_bond_hashtype {
+ BOND_HASH_L2,
+ BOND_HASH_L34,
+ BOND_HASH_L23,
+};
+
+enum bond_support_type {
+ BOND_NOT_SUPPORT,
+ /*
+ * bond_grp already exists, but in the current
+ * conditions it's no longer supported
+ */
+ BOND_EXISTING_NOT_SUPPORT,
+ BOND_SUPPORT,
+};
+
+enum hns_roce_bond_state {
+ HNS_ROCE_BOND_NOT_ATTACHED,
+ HNS_ROCE_BOND_NOT_BONDED,
+ HNS_ROCE_BOND_IS_BONDED,
+ HNS_ROCE_BOND_SLAVE_CHANGE_NUM,
+ HNS_ROCE_BOND_SLAVE_CHANGESTATE,
+};
+
+enum hns_roce_bond_cmd_type {
+ HNS_ROCE_SET_BOND,
+ HNS_ROCE_CHANGE_BOND,
+ HNS_ROCE_CLEAR_BOND,
+};
+
+struct hns_roce_func_info {
+ struct net_device *net_dev;
+ struct hnae3_handle *handle;
+};
+
+struct hns_roce_bond_group {
+ struct net_device *upper_dev;
+ struct hns_roce_dev *main_hr_dev;
+ u8 active_slave_num;
+ u32 slave_map;
+ u32 active_slave_map;
+ u8 bond_id;
+ u8 bus_num;
+ struct hns_roce_func_info bond_func_info[ROCE_BOND_FUNC_MAX];
+ bool bond_ready;
+ enum hns_roce_bond_state bond_state;
+ enum netdev_lag_tx_type tx_type;
+ enum netdev_lag_hash hash_type;
+ struct mutex bond_mutex;
+ struct notifier_block bond_nb;
+ struct delayed_work bond_work;
+};
+
+struct hns_roce_die_info {
+ u8 bond_id_mask;
+ struct hns_roce_bond_group *bgrps[ROCE_BOND_NUM_MAX];
+ struct mutex die_mutex;
+ u8 suspend_cnt;
+};
+
+struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev,
+ u8 bus_num);
+int hns_roce_alloc_bond_grp(struct hns_roce_dev *hr_dev);
+void hns_roce_dealloc_bond_grp(void);
+void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp);
+bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev);
+int hns_roce_bond_init(struct hns_roce_dev *hr_dev);
+void hns_roce_bond_suspend(struct hnae3_handle *handle);
+void hns_roce_bond_resume(struct hnae3_handle *handle);
+
+#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 8c1f7a6f84d2..873e8a69a1b9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -31,145 +31,57 @@
*/
#include <linux/dmapool.h>
-#include <linux/platform_device.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
-#define CMD_POLL_TOKEN 0xffff
-#define CMD_MAX_NUM 32
-#define STATUS_MASK 0xff
-#define CMD_TOKEN_MASK 0x1f
-#define GO_BIT_TIMEOUT_MSECS 10000
+#define CMD_POLL_TOKEN 0xffff
+#define CMD_MAX_NUM 32
-enum {
- HCR_TOKEN_OFFSET = 0x14,
- HCR_STATUS_OFFSET = 0x18,
- HCR_GO_BIT = 15,
-};
-
-static int cmd_pending(struct hns_roce_dev *hr_dev)
-{
- u32 status = readl(hr_dev->cmd.hcr + HCR_TOKEN_OFFSET);
-
- return (!!(status & (1 << HCR_GO_BIT)));
-}
-
-/* this function should be serialized with "hcr_mutex" */
-static int __hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev,
- u64 in_param, u64 out_param,
- u32 in_modifier, u8 op_modifier, u16 op,
- u16 token, int event)
+static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
- struct hns_roce_cmdq *cmd = &hr_dev->cmd;
- struct device *dev = &hr_dev->pdev->dev;
- u32 __iomem *hcr = (u32 *)cmd->hcr;
- int ret = -EAGAIN;
- unsigned long end;
- u32 val = 0;
-
- end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies;
- while (cmd_pending(hr_dev)) {
- if (time_after(jiffies, end)) {
- dev_dbg(dev, "jiffies=%d end=%d\n", (int)jiffies,
- (int)end);
- goto out;
- }
- cond_resched();
- }
-
- roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S,
- op);
- roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_MDF_M,
- ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier);
- roce_set_bit(val, ROCEE_MB6_ROCEE_MB_EVENT_S, event);
- roce_set_bit(val, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1);
- roce_set_field(val, ROCEE_MB6_ROCEE_MB_TOKEN_M,
- ROCEE_MB6_ROCEE_MB_TOKEN_S, token);
-
- __raw_writeq(cpu_to_le64(in_param), hcr + 0);
- __raw_writeq(cpu_to_le64(out_param), hcr + 2);
- __raw_writel(cpu_to_le32(in_modifier), hcr + 4);
- /* Memory barrier */
- wmb();
-
- __raw_writel(cpu_to_le32(val), hcr + 5);
-
- mmiowb();
- ret = 0;
-
-out:
- return ret;
-}
+ int ret;
-static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, u32 in_modifier,
- u8 op_modifier, u16 op, u16 token,
- int event)
-{
- struct hns_roce_cmdq *cmd = &hr_dev->cmd;
- int ret = -EAGAIN;
+ ret = hr_dev->hw->post_mbox(hr_dev, mbox_msg);
+ if (ret)
+ return ret;
- mutex_lock(&cmd->hcr_mutex);
- ret = __hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op, token,
- event);
- mutex_unlock(&cmd->hcr_mutex);
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POSTED_CNT]);
- return ret;
+ return 0;
}
/* this should be called with "poll_sem" */
-static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, unsigned long in_modifier,
- u8 op_modifier, u16 op,
- unsigned long timeout)
+static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
- struct device *dev = &hr_dev->pdev->dev;
- u8 __iomem *hcr = hr_dev->cmd.hcr;
- unsigned long end = 0;
- u32 status = 0;
int ret;
- ret = hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op,
- CMD_POLL_TOKEN, 0);
+ ret = hns_roce_cmd_mbox_post_hw(hr_dev, mbox_msg);
if (ret) {
- dev_err(dev, "[cmd_poll]hns_roce_cmd_mbox_post_hw failed\n");
- goto out;
+ dev_err_ratelimited(hr_dev->dev,
+ "failed to post mailbox 0x%x in poll mode, ret = %d.\n",
+ mbox_msg->cmd, ret);
+ return ret;
}
- end = msecs_to_jiffies(timeout) + jiffies;
- while (cmd_pending(hr_dev) && time_before(jiffies, end))
- cond_resched();
+ ret = hr_dev->hw->poll_mbox_done(hr_dev);
+ if (ret)
+ return ret;
- if (cmd_pending(hr_dev)) {
- dev_err(dev, "[cmd_poll]hw run cmd TIMEDOUT!\n");
- ret = -ETIMEDOUT;
- goto out;
- }
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POLLED_CNT]);
- status = le32_to_cpu((__force __be32)
- __raw_readl(hcr + HCR_STATUS_OFFSET));
- if ((status & STATUS_MASK) != 0x1) {
- dev_err(dev, "mailbox status 0x%x!\n", status);
- ret = -EBUSY;
- goto out;
- }
-
-out:
- return ret;
+ return 0;
}
-static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, unsigned long in_modifier,
- u8 op_modifier, u16 op, unsigned long timeout)
+static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
int ret;
down(&hr_dev->cmd.poll_sem);
- ret = __hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param, in_modifier,
- op_modifier, op, timeout);
+ ret = __hns_roce_cmd_mbox_poll(hr_dev, mbox_msg);
up(&hr_dev->cmd.poll_sem);
return ret;
@@ -178,109 +90,116 @@ static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param,
void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
u64 out_param)
{
- struct hns_roce_cmd_context
- *context = &hr_dev->cmd.context[token & hr_dev->cmd.token_mask];
+ struct hns_roce_cmd_context *context =
+ &hr_dev->cmd.context[token % hr_dev->cmd.max_cmds];
- if (token != context->token)
+ if (unlikely(token != context->token)) {
+ dev_err_ratelimited(hr_dev->dev,
+ "[cmd] invalid ae token 0x%x, context token is 0x%x.\n",
+ token, context->token);
return;
+ }
context->result = (status == HNS_ROCE_CMD_SUCCESS) ? 0 : (-EIO);
context->out_param = out_param;
complete(&context->done);
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_EVENT_CNT]);
}
-/* this should be called with "use_events" */
-static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, unsigned long in_modifier,
- u8 op_modifier, u16 op,
- unsigned long timeout)
+static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
struct hns_roce_cmdq *cmd = &hr_dev->cmd;
- struct device *dev = &hr_dev->pdev->dev;
struct hns_roce_cmd_context *context;
- int ret = 0;
+ struct device *dev = hr_dev->dev;
+ int ret;
spin_lock(&cmd->context_lock);
- WARN_ON(cmd->free_head < 0);
- context = &cmd->context[cmd->free_head];
- context->token += cmd->token_mask + 1;
- cmd->free_head = context->next;
+
+ do {
+ context = &cmd->context[cmd->free_head];
+ cmd->free_head = context->next;
+ } while (context->busy);
+
+ context->busy = 1;
+ context->token += cmd->max_cmds;
+
spin_unlock(&cmd->context_lock);
- init_completion(&context->done);
+ reinit_completion(&context->done);
- ret = hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op,
- context->token, 1);
- if (ret)
+ mbox_msg->token = context->token;
+ ret = hns_roce_cmd_mbox_post_hw(hr_dev, mbox_msg);
+ if (ret) {
+ dev_err_ratelimited(dev,
+ "failed to post mailbox 0x%x in event mode, ret = %d.\n",
+ mbox_msg->cmd, ret);
goto out;
+ }
- /*
- * It is timeout when wait_for_completion_timeout return 0
- * The return value is the time limit set in advance
- * how many seconds showing
- */
if (!wait_for_completion_timeout(&context->done,
- msecs_to_jiffies(timeout))) {
- dev_err(dev, "[cmd]wait_for_completion_timeout timeout\n");
+ msecs_to_jiffies(HNS_ROCE_CMD_TIMEOUT_MSECS))) {
+ dev_err_ratelimited(dev, "[cmd] token 0x%x mailbox 0x%x timeout.\n",
+ context->token, mbox_msg->cmd);
ret = -EBUSY;
goto out;
}
ret = context->result;
- if (ret) {
- dev_err(dev, "[cmd]event mod cmd process error!err=%d\n", ret);
- goto out;
- }
+ if (ret)
+ dev_err_ratelimited(dev, "[cmd] token 0x%x mailbox 0x%x error %d.\n",
+ context->token, mbox_msg->cmd, ret);
out:
- spin_lock(&cmd->context_lock);
- context->next = cmd->free_head;
- cmd->free_head = context - cmd->context;
- spin_unlock(&cmd->context_lock);
-
+ context->busy = 0;
return ret;
}
-static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
- u64 out_param, unsigned long in_modifier,
- u8 op_modifier, u16 op, unsigned long timeout)
+static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
{
- int ret = 0;
+ int ret;
down(&hr_dev->cmd.event_sem);
- ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op, timeout);
+ ret = __hns_roce_cmd_mbox_wait(hr_dev, mbox_msg);
up(&hr_dev->cmd.event_sem);
return ret;
}
int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
- unsigned long in_modifier, u8 op_modifier, u16 op,
- unsigned long timeout)
+ u8 cmd, unsigned long tag)
{
- if (hr_dev->cmd.use_events)
- return hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op,
- timeout);
- else
- return hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param,
- in_modifier, op_modifier, op,
- timeout);
+ struct hns_roce_mbox_msg mbox_msg = {};
+ bool is_busy;
+
+ if (hr_dev->hw->chk_mbox_avail)
+ if (!hr_dev->hw->chk_mbox_avail(hr_dev, &is_busy))
+ return is_busy ? -EBUSY : 0;
+
+ mbox_msg.in_param = in_param;
+ mbox_msg.out_param = out_param;
+ mbox_msg.cmd = cmd;
+ mbox_msg.tag = tag;
+
+ if (hr_dev->cmd.use_events) {
+ mbox_msg.event_en = 1;
+
+ return hns_roce_cmd_mbox_wait(hr_dev, &mbox_msg);
+ } else {
+ mbox_msg.event_en = 0;
+ mbox_msg.token = CMD_POLL_TOKEN;
+
+ return hns_roce_cmd_mbox_poll(hr_dev, &mbox_msg);
+ }
}
int hns_roce_cmd_init(struct hns_roce_dev *hr_dev)
{
- struct device *dev = &hr_dev->pdev->dev;
-
- mutex_init(&hr_dev->cmd.hcr_mutex);
sema_init(&hr_dev->cmd.poll_sem, 1);
hr_dev->cmd.use_events = 0;
- hr_dev->cmd.toggle = 1;
hr_dev->cmd.max_cmds = CMD_MAX_NUM;
- hr_dev->cmd.hcr = hr_dev->reg_base + ROCEE_MB1_REG;
- hr_dev->cmd.pool = dma_pool_create("hns_roce_cmd", dev,
+ hr_dev->cmd.pool = dma_pool_create("hns_roce_cmd", hr_dev->dev,
HNS_ROCE_MAILBOX_SIZE,
HNS_ROCE_MAILBOX_SIZE, 0);
if (!hr_dev->cmd.pool)
@@ -299,47 +218,39 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev)
struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd;
int i;
- hr_cmd->context = kmalloc(hr_cmd->max_cmds *
- sizeof(struct hns_roce_cmd_context),
- GFP_KERNEL);
- if (!hr_cmd->context)
+ hr_cmd->context =
+ kcalloc(hr_cmd->max_cmds, sizeof(*hr_cmd->context), GFP_KERNEL);
+ if (!hr_cmd->context) {
+ hr_dev->cmd_mod = 0;
return -ENOMEM;
+ }
for (i = 0; i < hr_cmd->max_cmds; ++i) {
hr_cmd->context[i].token = i;
hr_cmd->context[i].next = i + 1;
+ init_completion(&hr_cmd->context[i].done);
}
-
- hr_cmd->context[hr_cmd->max_cmds - 1].next = -1;
+ hr_cmd->context[hr_cmd->max_cmds - 1].next = 0;
hr_cmd->free_head = 0;
sema_init(&hr_cmd->event_sem, hr_cmd->max_cmds);
spin_lock_init(&hr_cmd->context_lock);
- hr_cmd->token_mask = CMD_TOKEN_MASK;
hr_cmd->use_events = 1;
- down(&hr_cmd->poll_sem);
-
return 0;
}
void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd;
- int i;
-
- hr_cmd->use_events = 0;
-
- for (i = 0; i < hr_cmd->max_cmds; ++i)
- down(&hr_cmd->event_sem);
kfree(hr_cmd->context);
- up(&hr_cmd->poll_sem);
+ hr_cmd->use_events = 0;
}
-struct hns_roce_cmd_mailbox
- *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev)
+struct hns_roce_cmd_mailbox *
+hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cmd_mailbox *mailbox;
@@ -347,8 +258,8 @@ struct hns_roce_cmd_mailbox
if (!mailbox)
return ERR_PTR(-ENOMEM);
- mailbox->buf = dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL,
- &mailbox->dma);
+ mailbox->buf =
+ dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, &mailbox->dma);
if (!mailbox->buf) {
kfree(mailbox);
return ERR_PTR(-ENOMEM);
@@ -366,3 +277,15 @@ void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev,
dma_pool_free(hr_dev->cmd.pool, mailbox->buf, mailbox->dma);
kfree(mailbox);
}
+
+int hns_roce_create_hw_ctx(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ u8 cmd, unsigned long idx)
+{
+ return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cmd, idx);
+}
+
+int hns_roce_destroy_hw_ctx(struct hns_roce_dev *dev, u8 cmd, unsigned long idx)
+{
+ return hns_roce_cmd_mbox(dev, 0, 0, cmd, idx);
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index f5a9ee2fc53d..11dbbabebdc9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -37,13 +37,93 @@
#define HNS_ROCE_CMD_TIMEOUT_MSECS 10000
enum {
+ /* QPC BT commands */
+ HNS_ROCE_CMD_WRITE_QPC_BT0 = 0x0,
+ HNS_ROCE_CMD_WRITE_QPC_BT1 = 0x1,
+ HNS_ROCE_CMD_WRITE_QPC_BT2 = 0x2,
+ HNS_ROCE_CMD_READ_QPC_BT0 = 0x4,
+ HNS_ROCE_CMD_READ_QPC_BT1 = 0x5,
+ HNS_ROCE_CMD_READ_QPC_BT2 = 0x6,
+ HNS_ROCE_CMD_DESTROY_QPC_BT0 = 0x8,
+ HNS_ROCE_CMD_DESTROY_QPC_BT1 = 0x9,
+ HNS_ROCE_CMD_DESTROY_QPC_BT2 = 0xa,
+
+ /* QPC operation */
+ HNS_ROCE_CMD_MODIFY_QPC = 0x41,
+ HNS_ROCE_CMD_QUERY_QPC = 0x42,
+
+ HNS_ROCE_CMD_MODIFY_CQC = 0x52,
+ HNS_ROCE_CMD_QUERY_CQC = 0x53,
+ /* CQC BT commands */
+ HNS_ROCE_CMD_WRITE_CQC_BT0 = 0x10,
+ HNS_ROCE_CMD_WRITE_CQC_BT1 = 0x11,
+ HNS_ROCE_CMD_WRITE_CQC_BT2 = 0x12,
+ HNS_ROCE_CMD_READ_CQC_BT0 = 0x14,
+ HNS_ROCE_CMD_READ_CQC_BT1 = 0x15,
+ HNS_ROCE_CMD_READ_CQC_BT2 = 0x1b,
+ HNS_ROCE_CMD_DESTROY_CQC_BT0 = 0x18,
+ HNS_ROCE_CMD_DESTROY_CQC_BT1 = 0x19,
+ HNS_ROCE_CMD_DESTROY_CQC_BT2 = 0x1a,
+
+ /* MPT BT commands */
+ HNS_ROCE_CMD_WRITE_MPT_BT0 = 0x20,
+ HNS_ROCE_CMD_WRITE_MPT_BT1 = 0x21,
+ HNS_ROCE_CMD_WRITE_MPT_BT2 = 0x22,
+ HNS_ROCE_CMD_READ_MPT_BT0 = 0x24,
+ HNS_ROCE_CMD_READ_MPT_BT1 = 0x25,
+ HNS_ROCE_CMD_READ_MPT_BT2 = 0x26,
+ HNS_ROCE_CMD_DESTROY_MPT_BT0 = 0x28,
+ HNS_ROCE_CMD_DESTROY_MPT_BT1 = 0x29,
+ HNS_ROCE_CMD_DESTROY_MPT_BT2 = 0x2a,
+
+ /* CQC TIMER commands */
+ HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0 = 0x23,
+ HNS_ROCE_CMD_READ_CQC_TIMER_BT0 = 0x27,
+
+ /* MPT commands */
+ HNS_ROCE_CMD_QUERY_MPT = 0x62,
+
+ /* SRQC BT commands */
+ HNS_ROCE_CMD_WRITE_SRQC_BT0 = 0x30,
+ HNS_ROCE_CMD_WRITE_SRQC_BT1 = 0x31,
+ HNS_ROCE_CMD_WRITE_SRQC_BT2 = 0x32,
+ HNS_ROCE_CMD_READ_SRQC_BT0 = 0x34,
+ HNS_ROCE_CMD_READ_SRQC_BT1 = 0x35,
+ HNS_ROCE_CMD_READ_SRQC_BT2 = 0x36,
+ HNS_ROCE_CMD_DESTROY_SRQC_BT0 = 0x38,
+ HNS_ROCE_CMD_DESTROY_SRQC_BT1 = 0x39,
+ HNS_ROCE_CMD_DESTROY_SRQC_BT2 = 0x3a,
+
+ /* QPC TIMER commands */
+ HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0 = 0x33,
+ HNS_ROCE_CMD_READ_QPC_TIMER_BT0 = 0x37,
+
+ /* EQC commands */
+ HNS_ROCE_CMD_CREATE_AEQC = 0x80,
+ HNS_ROCE_CMD_MODIFY_AEQC = 0x81,
+ HNS_ROCE_CMD_QUERY_AEQC = 0x82,
+ HNS_ROCE_CMD_DESTROY_AEQC = 0x83,
+ HNS_ROCE_CMD_CREATE_CEQC = 0x90,
+ HNS_ROCE_CMD_MODIFY_CEQC = 0x91,
+ HNS_ROCE_CMD_QUERY_CEQC = 0x92,
+ HNS_ROCE_CMD_DESTROY_CEQC = 0x93,
+
+ /* SCC CTX commands */
+ HNS_ROCE_CMD_QUERY_SCCC = 0xa2,
+
+ /* SCC CTX BT commands */
+ HNS_ROCE_CMD_READ_SCCC_BT0 = 0xa4,
+ HNS_ROCE_CMD_WRITE_SCCC_BT0 = 0xa5,
+};
+
+enum {
/* TPT commands */
- HNS_ROCE_CMD_SW2HW_MPT = 0xd,
- HNS_ROCE_CMD_HW2SW_MPT = 0xf,
+ HNS_ROCE_CMD_CREATE_MPT = 0xd,
+ HNS_ROCE_CMD_DESTROY_MPT = 0xf,
/* CQ commands */
- HNS_ROCE_CMD_SW2HW_CQ = 0x16,
- HNS_ROCE_CMD_HW2SW_CQ = 0x17,
+ HNS_ROCE_CMD_CREATE_CQC = 0x16,
+ HNS_ROCE_CMD_DESTROY_CQC = 0x17,
/* QP/EE commands */
HNS_ROCE_CMD_RST2INIT_QP = 0x19,
@@ -52,19 +132,27 @@ enum {
HNS_ROCE_CMD_RTS2RTS_QP = 0x1c,
HNS_ROCE_CMD_2ERR_QP = 0x1e,
HNS_ROCE_CMD_RTS2SQD_QP = 0x1f,
- HNS_ROCE_CMD_SQD2SQD_QP = 0x38,
HNS_ROCE_CMD_SQD2RTS_QP = 0x20,
HNS_ROCE_CMD_2RST_QP = 0x21,
HNS_ROCE_CMD_QUERY_QP = 0x22,
+ HNS_ROCE_CMD_SQD2SQD_QP = 0x38,
+ HNS_ROCE_CMD_CREATE_SRQ = 0x70,
+ HNS_ROCE_CMD_MODIFY_SRQC = 0x72,
+ HNS_ROCE_CMD_QUERY_SRQC = 0x73,
+ HNS_ROCE_CMD_DESTROY_SRQ = 0x74,
};
int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
- unsigned long in_modifier, u8 op_modifier, u16 op,
- unsigned long timeout);
+ u8 cmd, unsigned long tag);
-struct hns_roce_cmd_mailbox
- *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev);
+struct hns_roce_cmd_mailbox *
+hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev);
void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox);
+int hns_roce_create_hw_ctx(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ u8 cmd, unsigned long idx);
+int hns_roce_destroy_hw_ctx(struct hns_roce_dev *dev, u8 cmd,
+ unsigned long idx);
#endif /* _HNS_ROCE_CMD_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 4af403e1348c..465d1f914b6c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -32,260 +32,77 @@
#ifndef _HNS_ROCE_COMMON_H
#define _HNS_ROCE_COMMON_H
-
-#ifndef assert
-#define assert(cond)
-#endif
+#include <linux/bitfield.h>
#define roce_write(dev, reg, val) writel((val), (dev)->reg_base + (reg))
#define roce_read(dev, reg) readl((dev)->reg_base + (reg))
#define roce_raw_write(value, addr) \
__raw_writel((__force u32)cpu_to_le32(value), (addr))
-#define roce_get_field(origin, mask, shift) \
- (((origin) & (mask)) >> (shift))
+#define roce_get_field(origin, mask, shift) \
+ ((le32_to_cpu(origin) & (mask)) >> (u32)(shift))
#define roce_get_bit(origin, shift) \
roce_get_field((origin), (1ul << (shift)), (shift))
-#define roce_set_field(origin, mask, shift, val) \
- do { \
- (origin) &= (~(mask)); \
- (origin) |= (((u32)(val) << (shift)) & (mask)); \
+#define roce_set_field(origin, mask, shift, val) \
+ do { \
+ (origin) &= ~cpu_to_le32(mask); \
+ (origin) |= \
+ cpu_to_le32(((u32)(val) << (u32)(shift)) & (mask)); \
} while (0)
-#define roce_set_bit(origin, shift, val) \
+#define roce_set_bit(origin, shift, val) \
roce_set_field((origin), (1ul << (shift)), (shift), (val))
-/*
- * roce_hw_index_cmp_lt - Compare two hardware index values in hisilicon
- * SOC, check if a is less than b.
- * @a: hardware index value
- * @b: hardware index value
- * @bits: the number of bits of a and b, range: 0~31.
- *
- * Hardware index increases continuously till max value, and then restart
- * from zero, again and again. Because the bits of reg field is often
- * limited, the reg field can only hold the low bits of the hardware index
- * in hisilicon SOC.
- * In some scenes we need to compare two values(a,b) getted from two reg
- * fields in this driver, for example:
- * If a equals 0xfffe, b equals 0x1 and bits equals 16, we think b has
- * incresed from 0xffff to 0x1 and a is less than b.
- * If a equals 0xfffe, b equals 0x0xf001 and bits equals 16, we think a
- * is bigger than b.
- *
- * Return true on a less than b, otherwise false.
- */
-#define roce_hw_index_mask(bits) ((1ul << (bits)) - 1)
-#define roce_hw_index_shift(bits) (32 - (bits))
-#define roce_hw_index_cmp_lt(a, b, bits) \
- ((int)((((a) - (b)) & roce_hw_index_mask(bits)) << \
- roce_hw_index_shift(bits)) < 0)
-
-#define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3
-#define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
-
-#define ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S 5
-
-#define ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S 6
-
-#define ROCEE_GLB_CFG_ROCEE_PORT_ST_S 10
-#define ROCEE_GLB_CFG_ROCEE_PORT_ST_M \
- (((1UL << 6) - 1) << ROCEE_GLB_CFG_ROCEE_PORT_ST_S)
-
-#define ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S 16
-
-#define ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S 0
-#define ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M \
- (((1UL << 24) - 1) << ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S)
-
-#define ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S 24
-#define ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M \
- (((1UL << 4) - 1) << ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S)
-
-#define ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S 0
-#define ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M \
- (((1UL << 24) - 1) << ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S)
-
-#define ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S 24
-#define ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M \
- (((1UL << 4) - 1) << ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S)
-
-#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S 0
-#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M \
- (((1UL << 16) - 1) << ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S)
-
-#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S 16
-#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M \
- (((1UL << 16) - 1) << ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S)
-
-#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S 0
-#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M \
- (((1UL << 16) - 1) << ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S)
-
-#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S 16
-#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M \
- (((1UL << 16) - 1) << ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S)
-
-#define ROCEE_RAQ_WL_ROCEE_RAQ_WL_S 0
-#define ROCEE_RAQ_WL_ROCEE_RAQ_WL_M \
- (((1UL << 8) - 1) << ROCEE_RAQ_WL_ROCEE_RAQ_WL_S)
-
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S 0
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M \
- (((1UL << 15) - 1) << \
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S)
-
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S 16
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M \
- (((1UL << 4) - 1) << \
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S)
-
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S 20
-
-#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE 21
-
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S 0
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M \
- (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S)
-
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S 5
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M \
- (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S)
-
-#define ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S 0
-#define ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M \
- (((1UL << 5) - 1) << ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S)
-
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S 5
-#define ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M \
- (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S)
-
-#define ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S 0
-#define ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M \
- (((1UL << 5) - 1) << ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S)
+#define FIELD_LOC(field_type, field_h, field_l) field_type, field_h, field_l
-#define ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S 8
-#define ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M \
- (((1UL << 5) - 1) << ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S)
+#define _hr_reg_enable(ptr, field_type, field_h, field_l) \
+ ({ \
+ const field_type *_ptr = ptr; \
+ *((__le32 *)_ptr + (field_h) / 32) |= cpu_to_le32( \
+ BIT((field_l) % 32) + \
+ BUILD_BUG_ON_ZERO((field_h) != (field_l))); \
+ })
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S 0
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M \
- (((1UL << 19) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S)
+#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field)
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_S 19
+#define _hr_reg_clear(ptr, field_type, field_h, field_l) \
+ ({ \
+ const field_type *_ptr = ptr; \
+ BUILD_BUG_ON(((field_h) / 32) != ((field_l) / 32)); \
+ *((__le32 *)_ptr + (field_h) / 32) &= \
+ ~cpu_to_le32(GENMASK((field_h) % 32, (field_l) % 32)); \
+ })
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S 20
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M \
- (((1UL << 2) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S)
+#define hr_reg_clear(ptr, field) _hr_reg_clear(ptr, field)
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S 22
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M \
- (((1UL << 5) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S)
+#define _hr_reg_write_bool(ptr, field_type, field_h, field_l, val) \
+ ({ \
+ (val) ? _hr_reg_enable(ptr, field_type, field_h, field_l) : \
+ _hr_reg_clear(ptr, field_type, field_h, field_l); \
+ })
-#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S 31
+#define hr_reg_write_bool(ptr, field, val) _hr_reg_write_bool(ptr, field, val)
-#define ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S 0
-#define ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M \
- (((1UL << 3) - 1) << ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S)
+#define _hr_reg_write(ptr, field_type, field_h, field_l, val) \
+ ({ \
+ _hr_reg_clear(ptr, field_type, field_h, field_l); \
+ *((__le32 *)ptr + (field_h) / 32) |= cpu_to_le32(FIELD_PREP( \
+ GENMASK((field_h) % 32, (field_l) % 32), val)); \
+ })
-#define ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S 0
-#define ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M \
- (((1UL << 15) - 1) << ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S)
+#define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val)
-#define ROCEE_MB6_ROCEE_MB_CMD_S 0
-#define ROCEE_MB6_ROCEE_MB_CMD_M \
- (((1UL << 8) - 1) << ROCEE_MB6_ROCEE_MB_CMD_S)
+#define _hr_reg_read(ptr, field_type, field_h, field_l) \
+ ({ \
+ const field_type *_ptr = ptr; \
+ BUILD_BUG_ON(((field_h) / 32) != ((field_l) / 32)); \
+ FIELD_GET(GENMASK((field_h) % 32, (field_l) % 32), \
+ le32_to_cpu(*((__le32 *)_ptr + (field_h) / 32))); \
+ })
-#define ROCEE_MB6_ROCEE_MB_CMD_MDF_S 8
-#define ROCEE_MB6_ROCEE_MB_CMD_MDF_M \
- (((1UL << 4) - 1) << ROCEE_MB6_ROCEE_MB_CMD_MDF_S)
-
-#define ROCEE_MB6_ROCEE_MB_EVENT_S 14
-
-#define ROCEE_MB6_ROCEE_MB_HW_RUN_S 15
-
-#define ROCEE_MB6_ROCEE_MB_TOKEN_S 16
-#define ROCEE_MB6_ROCEE_MB_TOKEN_M \
- (((1UL << 16) - 1) << ROCEE_MB6_ROCEE_MB_TOKEN_S)
-
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S 0
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M \
- (((1UL << 24) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S)
-
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S 24
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M \
- (((1UL << 4) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S)
-
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S 28
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M \
- (((1UL << 3) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S)
-
-#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S 31
-
-#define ROCEE_SMAC_H_ROCEE_SMAC_H_S 0
-#define ROCEE_SMAC_H_ROCEE_SMAC_H_M \
- (((1UL << 16) - 1) << ROCEE_SMAC_H_ROCEE_SMAC_H_S)
-
-#define ROCEE_SMAC_H_ROCEE_PORT_MTU_S 16
-#define ROCEE_SMAC_H_ROCEE_PORT_MTU_M \
- (((1UL << 4) - 1) << ROCEE_SMAC_H_ROCEE_PORT_MTU_S)
-
-#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S 0
-#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M \
- (((1UL << 2) - 1) << ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S)
-
-#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S 8
-#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M \
- (((1UL << 4) - 1) << ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S)
-
-#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S 17
-
-#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S 0
-#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M \
- (((1UL << 5) - 1) << ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S)
-
-#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S 16
-#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M \
- (((1UL << 16) - 1) << ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S)
-
-#define ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S 0
-#define ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M \
- (((1UL << 16) - 1) << ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S)
-
-#define ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S 16
-#define ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S 1
-#define ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S 0
-
-#define ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S 0
-#define ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S 1
-
-#define ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S 0
-
-#define ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S 0
-#define ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M \
- (((1UL << 28) - 1) << ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S)
-
-#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S 0
-#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M \
- (((1UL << 28) - 1) << ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)
-
-#define ROCEE_SDB_PTR_CMP_BITS 28
-
-#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_S 0
-#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_M \
- (((1UL << 16) - 1) << ROCEE_SDB_INV_CNT_SDB_INV_CNT_S)
-
-#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S 0
-#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M \
- (((1UL << 16) - 1) << ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S)
-
-#define ROCEE_SDB_CNT_CMP_BITS 16
-
-#define ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S 20
-
-#define ROCEE_CNT_CLR_CE_CNT_CLR_CE_S 0
+#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
/*************ROCEE_REG DEFINITION****************/
#define ROCEE_VENDOR_ID_REG 0x0
@@ -341,6 +158,7 @@
#define ROCEE_BT_CMD_L_REG 0x200
#define ROCEE_MB1_REG 0x210
+#define ROCEE_MB6_REG 0x224
#define ROCEE_DB_SQ_L_0_REG 0x230
#define ROCEE_DB_OTHERS_L_0_REG 0x238
#define ROCEE_QP1C_CFG0_0_REG 0x270
@@ -352,14 +170,30 @@
#define ROCEE_CAEP_AE_MASK_REG 0x6C8
#define ROCEE_CAEP_AE_ST_REG 0x6CC
-#define ROCEE_SDB_ISSUE_PTR_REG 0x758
-#define ROCEE_SDB_SEND_PTR_REG 0x75C
#define ROCEE_CAEP_CQE_WCMD_EMPTY 0x850
#define ROCEE_SCAEP_WR_CQE_CNT 0x8D0
-#define ROCEE_SDB_INV_CNT_REG 0x9A4
-#define ROCEE_SDB_RETRY_CNT_REG 0x9AC
-#define ROCEE_TSP_BP_ST_REG 0x9EC
#define ROCEE_ECC_UCERR_ALM0_REG 0xB34
#define ROCEE_ECC_CERR_ALM0_REG 0xB40
+/* V2 ROCEE REG */
+#define ROCEE_TX_CMQ_BASEADDR_L_REG 0x07000
+#define ROCEE_TX_CMQ_BASEADDR_H_REG 0x07004
+#define ROCEE_TX_CMQ_DEPTH_REG 0x07008
+#define ROCEE_TX_CMQ_PI_REG 0x07010
+#define ROCEE_TX_CMQ_CI_REG 0x07014
+
+#define ROCEE_RX_CMQ_BASEADDR_L_REG 0x07018
+#define ROCEE_RX_CMQ_BASEADDR_H_REG 0x0701c
+#define ROCEE_RX_CMQ_DEPTH_REG 0x07020
+#define ROCEE_RX_CMQ_TAIL_REG 0x07024
+#define ROCEE_RX_CMQ_HEAD_REG 0x07028
+
+#define ROCEE_VF_EQ_DB_CFG0_REG 0x238
+#define ROCEE_VF_EQ_DB_CFG1_REG 0x23C
+
+#define ROCEE_VF_ABN_INT_CFG_REG 0x13000
+#define ROCEE_VF_ABN_INT_ST_REG 0x13004
+#define ROCEE_VF_ABN_INT_EN_REG 0x13008
+#define ROCEE_VF_EVENT_INT_EN_REG 0x1300c
+
#endif /* _HNS_ROCE_COMMON_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 589496c8fb9e..6aa82fe9dd3d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -30,438 +30,565 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
+#include <linux/pci.h>
#include <rdma/ib_umem.h>
+#include <rdma/uverbs_ioctl.h>
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
-#include <rdma/hns-abi.h>
#include "hns_roce_common.h"
-static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq)
+void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx)
{
- struct ib_cq *ibcq = &hr_cq->ib_cq;
+ struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device);
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+
+ if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09)
+ return;
- ibcq->comp_handler(ibcq, ibcq->cq_context);
+ mutex_lock(&cq_table->bank_mutex);
+ cq_table->ctx_num[uctx->cq_bank_id]--;
+ mutex_unlock(&cq_table->bank_mutex);
}
-static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq,
- enum hns_roce_event event_type)
+void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx)
{
- struct hns_roce_dev *hr_dev;
- struct ib_event event;
- struct ib_cq *ibcq;
-
- ibcq = &hr_cq->ib_cq;
- hr_dev = to_hr_dev(ibcq->device);
+ struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device);
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ u32 least_load = cq_table->ctx_num[0];
+ u8 bankid = 0;
+ u8 i;
- if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID &&
- event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR &&
- event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) {
- dev_err(&hr_dev->pdev->dev,
- "hns_roce_ib: Unexpected event type 0x%x on CQ %06lx\n",
- event_type, hr_cq->cqn);
+ if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09)
return;
+
+ mutex_lock(&cq_table->bank_mutex);
+ for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) {
+ if (cq_table->ctx_num[i] < least_load) {
+ least_load = cq_table->ctx_num[i];
+ bankid = i;
+ }
}
+ cq_table->ctx_num[bankid]++;
+ mutex_unlock(&cq_table->bank_mutex);
- if (ibcq->event_handler) {
- event.device = ibcq->device;
- event.event = IB_EVENT_CQ_ERR;
- event.element.cq = ibcq;
- ibcq->event_handler(&event, ibcq->cq_context);
+ uctx->cq_bank_id = bankid;
+}
+
+static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank)
+{
+ u32 least_load = bank[0].inuse;
+ u8 bankid = 0;
+ u32 bankcnt;
+ u8 i;
+
+ for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) {
+ bankcnt = bank[i].inuse;
+ if (bankcnt < least_load) {
+ least_load = bankcnt;
+ bankid = i;
+ }
}
+
+ return bankid;
}
-static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long cq_num)
+static u8 select_cq_bankid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_bank *bank, struct ib_udata *udata)
{
- return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0,
- HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS);
+ struct hns_roce_ucontext *uctx = udata ?
+ rdma_udata_to_drv_context(udata, struct hns_roce_ucontext,
+ ibucontext) : NULL;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return uctx ? uctx->cq_bank_id : 0;
+
+ return get_least_load_bankid_for_cq(bank);
}
-static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
- struct hns_roce_mtt *hr_mtt,
- struct hns_roce_uar *hr_uar,
- struct hns_roce_cq *hr_cq, int vector)
+static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata)
{
- struct hns_roce_cmd_mailbox *mailbox = NULL;
- struct hns_roce_cq_table *cq_table = NULL;
- struct device *dev = &hr_dev->pdev->dev;
- dma_addr_t dma_handle;
- u64 *mtts = NULL;
- int ret = 0;
-
- cq_table = &hr_dev->cq_table;
-
- /* Get the physical address of cq buf */
- mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table,
- hr_mtt->first_seg, &dma_handle);
- if (!mtts) {
- dev_err(dev, "CQ alloc.Failed to find cq buf addr.\n");
- return -EINVAL;
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct hns_roce_bank *bank;
+ u8 bankid;
+ int id;
+
+ mutex_lock(&cq_table->bank_mutex);
+ bankid = select_cq_bankid(hr_dev, cq_table->bank, udata);
+ bank = &cq_table->bank[bankid];
+
+ id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL);
+ if (id < 0) {
+ mutex_unlock(&cq_table->bank_mutex);
+ return id;
}
- if (vector >= hr_dev->caps.num_comp_vectors) {
- dev_err(dev, "CQ alloc.Invalid vector.\n");
- return -EINVAL;
- }
- hr_cq->vector = vector;
+ /* the lower 2 bits is bankid */
+ hr_cq->cqn = (id << CQ_BANKID_SHIFT) | bankid;
+ bank->inuse++;
+ mutex_unlock(&cq_table->bank_mutex);
- ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn);
- if (ret == -1) {
- dev_err(dev, "CQ alloc.Failed to alloc index.\n");
- return -ENOMEM;
- }
+ return 0;
+}
- /* Get CQC memory HEM(Hardware Entry Memory) table */
- ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn);
- if (ret) {
- dev_err(dev, "CQ alloc.Failed to get context mem.\n");
- goto err_out;
- }
+static inline u8 get_cq_bankid(unsigned long cqn)
+{
+ /* The lower 2 bits of CQN are used to hash to different banks */
+ return (u8)(cqn & GENMASK(1, 0));
+}
- /* The cq insert radix tree */
- spin_lock_irq(&cq_table->lock);
- /* Radix_tree: The associated pointer and long integer key value like */
- ret = radix_tree_insert(&cq_table->tree, hr_cq->cqn, hr_cq);
- spin_unlock_irq(&cq_table->lock);
- if (ret) {
- dev_err(dev, "CQ alloc.Failed to radix_tree_insert.\n");
- goto err_put;
- }
+static void free_cqn(struct hns_roce_dev *hr_dev, unsigned long cqn)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct hns_roce_bank *bank;
+
+ bank = &cq_table->bank[get_cq_bankid(cqn)];
+
+ ida_free(&bank->ida, cqn >> CQ_BANKID_SHIFT);
+
+ mutex_lock(&cq_table->bank_mutex);
+ bank->inuse--;
+ mutex_unlock(&cq_table->bank_mutex);
+}
+
+static int hns_roce_create_cqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq,
+ u64 *mtts, dma_addr_t dma_handle)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
- /* Allocate mailbox memory */
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox)) {
- ret = PTR_ERR(mailbox);
- goto err_radix;
+ ibdev_err(ibdev, "failed to alloc mailbox for CQC.\n");
+ return PTR_ERR(mailbox);
}
- hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle,
- nent, vector);
+ hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle);
+
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_CQC,
+ hr_cq->cqn);
+ if (ret)
+ ibdev_err(ibdev,
+ "failed to send create cmd for CQ(0x%lx), ret = %d.\n",
+ hr_cq->cqn, ret);
- /* Send mailbox to hw */
- ret = hns_roce_sw2hw_cq(hr_dev, mailbox, hr_cq->cqn);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+{
+ struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u64 mtts[MTT_MIN_COUNT] = {};
+ int ret;
+
+ ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts));
if (ret) {
- dev_err(dev, "CQ alloc.Failed to cmd mailbox.\n");
- goto err_radix;
+ ibdev_err(ibdev, "failed to find CQ mtr, ret = %d.\n", ret);
+ return ret;
}
- hr_cq->cons_index = 0;
- hr_cq->uar = hr_uar;
+ /* Get CQC memory HEM(Hardware Entry Memory) table */
+ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn);
+ if (ret) {
+ ibdev_err(ibdev, "failed to get CQ(0x%lx) context, ret = %d.\n",
+ hr_cq->cqn, ret);
+ return ret;
+ }
- atomic_set(&hr_cq->refcount, 1);
- init_completion(&hr_cq->free);
+ ret = xa_err(xa_store_irq(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL));
+ if (ret) {
+ ibdev_err(ibdev, "failed to xa_store CQ, ret = %d.\n", ret);
+ goto err_put;
+ }
- return 0;
+ ret = hns_roce_create_cqc(hr_dev, hr_cq, mtts,
+ hns_roce_get_mtr_ba(&hr_cq->mtr));
+ if (ret)
+ goto err_xa;
-err_radix:
- spin_lock_irq(&cq_table->lock);
- radix_tree_delete(&cq_table->tree, hr_cq->cqn);
- spin_unlock_irq(&cq_table->lock);
+ return 0;
+err_xa:
+ xa_erase_irq(&cq_table->array, hr_cq->cqn);
err_put:
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
-err_out:
- hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
return ret;
}
-static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long cq_num)
-{
- return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
- mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-}
-
-void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
- struct device *dev = &hr_dev->pdev->dev;
+ struct device *dev = hr_dev->dev;
int ret;
- ret = hns_roce_hw2sw_cq(hr_dev, NULL, hr_cq->cqn);
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC,
+ hr_cq->cqn);
if (ret)
- dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret,
- hr_cq->cqn);
+ dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n",
+ ret, hr_cq->cqn);
+
+ xa_erase_irq(&cq_table->array, hr_cq->cqn);
/* Waiting interrupt process procedure carried out */
synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq);
/* wait for all interrupt processed */
- if (atomic_dec_and_test(&hr_cq->refcount))
+ if (refcount_dec_and_test(&hr_cq->refcount))
complete(&hr_cq->free);
wait_for_completion(&hr_cq->free);
- spin_lock_irq(&cq_table->lock);
- radix_tree_delete(&cq_table->tree, hr_cq->cqn);
- spin_unlock_irq(&cq_table->lock);
-
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
- hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
}
-static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
- struct ib_ucontext *context,
- struct hns_roce_cq_buf *buf,
- struct ib_umem **umem, u64 buf_addr, int cqe)
+static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata, unsigned long addr)
{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
int ret;
- *umem = ib_umem_get(context, buf_addr, cqe * hr_dev->caps.cq_entry_sz,
- IB_ACCESS_LOCAL_WRITE, 1);
- if (IS_ERR(*umem))
- return PTR_ERR(*umem);
+ buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + PAGE_SHIFT;
+ buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size;
+ buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num;
+ buf_attr.region_count = 1;
- ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem),
- ilog2((unsigned int)(*umem)->page_size),
- &buf->hr_mtt);
+ ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr,
+ hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT,
+ udata, addr);
if (ret)
- goto err_buf;
+ ibdev_err(ibdev, "failed to alloc CQ mtr, ret = %d.\n", ret);
- ret = hns_roce_ib_umem_write_mtt(hr_dev, &buf->hr_mtt, *umem);
- if (ret)
- goto err_mtt;
+ return ret;
+}
- return 0;
+static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
+{
+ hns_roce_mtr_destroy(hr_dev, &hr_cq->mtr);
+}
-err_mtt:
- hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt);
+static int alloc_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata, unsigned long addr,
+ struct hns_roce_ib_create_cq_resp *resp)
+{
+ bool has_db = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB;
+ struct hns_roce_ucontext *uctx;
+ int err;
+
+ if (udata) {
+ if (has_db &&
+ udata->outlen >= offsetofend(typeof(*resp), cap_flags)) {
+ uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
+ err = hns_roce_db_map_user(uctx, addr, &hr_cq->db);
+ if (err)
+ return err;
+ hr_cq->flags |= HNS_ROCE_CQ_FLAG_RECORD_DB;
+ resp->cap_flags |= HNS_ROCE_CQ_FLAG_RECORD_DB;
+ }
+ } else {
+ if (has_db) {
+ err = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1);
+ if (err)
+ return err;
+ hr_cq->set_ci_db = hr_cq->db.db_record;
+ *hr_cq->set_ci_db = 0;
+ hr_cq->flags |= HNS_ROCE_CQ_FLAG_RECORD_DB;
+ }
+ hr_cq->db_reg = hr_dev->reg_base + hr_dev->odb_offset +
+ DB_REG_OFFSET * hr_dev->priv_uar.index;
+ }
-err_buf:
- ib_umem_release(*umem);
- return ret;
+ return 0;
}
-static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq_buf *buf, u32 nent)
+static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq,
+ struct ib_udata *udata)
{
- int ret;
+ struct hns_roce_ucontext *uctx;
- ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz,
- PAGE_SIZE * 2, &buf->hr_buf);
- if (ret)
- goto out;
+ if (!(hr_cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB))
+ return;
- ret = hns_roce_mtt_init(hr_dev, buf->hr_buf.npages,
- buf->hr_buf.page_shift, &buf->hr_mtt);
- if (ret)
- goto err_buf;
+ hr_cq->flags &= ~HNS_ROCE_CQ_FLAG_RECORD_DB;
+ if (udata) {
+ uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext,
+ ibucontext);
+ hns_roce_db_unmap_user(uctx, &hr_cq->db);
+ } else {
+ hns_roce_free_db(hr_dev, &hr_cq->db);
+ }
+}
- ret = hns_roce_buf_write_mtt(hr_dev, &buf->hr_mtt, &buf->hr_buf);
- if (ret)
- goto err_mtt;
+static int verify_cq_create_attr(struct hns_roce_dev *hr_dev,
+ const struct ib_cq_init_attr *attr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
- return 0;
+ if (!attr->cqe || attr->cqe > hr_dev->caps.max_cqes) {
+ ibdev_err(ibdev, "failed to check CQ count %u, max = %u.\n",
+ attr->cqe, hr_dev->caps.max_cqes);
+ return -EINVAL;
+ }
-err_mtt:
- hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt);
+ if (attr->comp_vector >= hr_dev->caps.num_comp_vectors) {
+ ibdev_err(ibdev, "failed to check CQ vector = %u, max = %d.\n",
+ attr->comp_vector, hr_dev->caps.num_comp_vectors);
+ return -EINVAL;
+ }
-err_buf:
- hns_roce_buf_free(hr_dev, nent * hr_dev->caps.cq_entry_sz,
- &buf->hr_buf);
-out:
- return ret;
+ return 0;
}
-static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq_buf *buf, int cqe)
+static int get_cq_ucmd(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
+ struct hns_roce_ib_create_cq *ucmd)
{
- hns_roce_buf_free(hr_dev, (cqe + 1) * hr_dev->caps.cq_entry_sz,
- &buf->hr_buf);
-}
+ struct ib_device *ibdev = hr_cq->ib_cq.device;
+ int ret;
-struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_ib_create_cq ucmd;
- struct hns_roce_cq *hr_cq = NULL;
- struct hns_roce_uar *uar = NULL;
- int vector = attr->comp_vector;
- int cq_entries = attr->cqe;
- int ret = 0;
-
- if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) {
- dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n",
- cq_entries, hr_dev->caps.max_cqes);
- return ERR_PTR(-EINVAL);
+ ret = ib_copy_from_udata(ucmd, udata, min(udata->inlen, sizeof(*ucmd)));
+ if (ret) {
+ ibdev_err(ibdev, "failed to copy CQ udata, ret = %d.\n", ret);
+ return ret;
}
- hr_cq = kmalloc(sizeof(*hr_cq), GFP_KERNEL);
- if (!hr_cq)
- return ERR_PTR(-ENOMEM);
+ return 0;
+}
+
+static void set_cq_param(struct hns_roce_cq *hr_cq, u32 cq_entries, int vector,
+ struct hns_roce_ib_create_cq *ucmd)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
- /* In v1 engine, parameter verification */
- if (cq_entries < HNS_ROCE_MIN_CQE_NUM)
- cq_entries = HNS_ROCE_MIN_CQE_NUM;
+ cq_entries = max(cq_entries, hr_dev->caps.min_cqes);
+ cq_entries = roundup_pow_of_two(cq_entries);
+ hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */
+ hr_cq->cq_depth = cq_entries;
+ hr_cq->vector = vector;
- cq_entries = roundup_pow_of_two((unsigned int)cq_entries);
- hr_cq->ib_cq.cqe = cq_entries - 1;
spin_lock_init(&hr_cq->lock);
+ INIT_LIST_HEAD(&hr_cq->sq_list);
+ INIT_LIST_HEAD(&hr_cq->rq_list);
+}
- if (context) {
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
- dev_err(dev, "Failed to copy_from_udata.\n");
- ret = -EFAULT;
- goto err_cq;
- }
+static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
+ struct hns_roce_ib_create_cq *ucmd)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+
+ if (!udata) {
+ hr_cq->cqe_size = hr_dev->caps.cqe_sz;
+ return 0;
+ }
- /* Get user space address, write it into mtt table */
- ret = hns_roce_ib_get_cq_umem(hr_dev, context, &hr_cq->hr_buf,
- &hr_cq->umem, ucmd.buf_addr,
- cq_entries);
- if (ret) {
- dev_err(dev, "Failed to get_cq_umem.\n");
- goto err_cq;
+ if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size)) {
+ if (ucmd->cqe_size != HNS_ROCE_V2_CQE_SIZE &&
+ ucmd->cqe_size != HNS_ROCE_V3_CQE_SIZE) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid cqe size %u.\n", ucmd->cqe_size);
+ return -EINVAL;
}
- /* Get user space parameters */
- uar = &to_hr_ucontext(context)->uar;
+ hr_cq->cqe_size = ucmd->cqe_size;
} else {
- /* Init mmt table and write buff address to mtt table */
- ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf,
- cq_entries);
- if (ret) {
- dev_err(dev, "Failed to alloc_cq_buf.\n");
- goto err_cq;
- }
+ hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE;
+ }
+
+ return 0;
+}
+
+int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct hns_roce_ib_create_cq_resp resp = {};
+ struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_ib_create_cq ucmd = {};
+ int ret;
+
+ if (attr->flags) {
+ ret = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ ret = verify_cq_create_attr(hr_dev, attr);
+ if (ret)
+ goto err_out;
- uar = &hr_dev->priv_uar;
- hr_cq->cq_db_l = hr_dev->reg_base + ROCEE_DB_OTHERS_L_0_REG +
- 0x1000 * uar->index;
+ if (udata) {
+ ret = get_cq_ucmd(hr_cq, udata, &ucmd);
+ if (ret)
+ goto err_out;
}
- /* Allocate cq index, fill cq_context */
- ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt, uar,
- hr_cq, vector);
+ set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd);
+
+ ret = set_cqe_size(hr_cq, udata, &ucmd);
+ if (ret)
+ goto err_out;
+
+ ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
if (ret) {
- dev_err(dev, "Creat CQ .Failed to cq_alloc.\n");
- goto err_mtt;
+ ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret);
+ goto err_out;
}
- /*
- * For the QP created by kernel space, tptr value should be initialized
- * to zero; For the QP created by user space, it will cause synchronous
- * problems if tptr is set to zero here, so we initialze it in user
- * space.
- */
- if (!context)
- *hr_cq->tptr_addr = 0;
-
- /* Get created cq handler and carry out event */
- hr_cq->comp = hns_roce_ib_cq_comp;
- hr_cq->event = hns_roce_ib_cq_event;
- hr_cq->cq_depth = cq_entries;
+ ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc CQ db, ret = %d.\n", ret);
+ goto err_cq_buf;
+ }
+
+ ret = alloc_cqn(hr_dev, hr_cq, udata);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret);
+ goto err_cq_db;
+ }
+
+ ret = alloc_cqc(hr_dev, hr_cq);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc CQ context, ret = %d.\n", ret);
+ goto err_cqn;
+ }
- if (context) {
- if (ib_copy_to_udata(udata, &hr_cq->cqn, sizeof(u64))) {
- ret = -EFAULT;
+ if (udata) {
+ resp.cqn = hr_cq->cqn;
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ if (ret)
goto err_cqc;
- }
}
- return &hr_cq->ib_cq;
+ hr_cq->cons_index = 0;
+ hr_cq->arm_sn = 1;
+ refcount_set(&hr_cq->refcount, 1);
+ init_completion(&hr_cq->free);
+
+ return 0;
err_cqc:
- hns_roce_free_cq(hr_dev, hr_cq);
-
-err_mtt:
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
- if (context)
- ib_umem_release(hr_cq->umem);
- else
- hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf,
- hr_cq->ib_cq.cqe);
-
-err_cq:
- kfree(hr_cq);
- return ERR_PTR(ret);
+ free_cqc(hr_dev, hr_cq);
+err_cqn:
+ free_cqn(hr_dev, hr_cq->cqn);
+err_cq_db:
+ free_cq_db(hr_dev, hr_cq, udata);
+err_cq_buf:
+ free_cq_buf(hr_dev, hr_cq);
+err_out:
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT]);
+
+ return ret;
}
-int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq)
+int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
- int ret = 0;
- if (hr_dev->hw->destroy_cq) {
- ret = hr_dev->hw->destroy_cq(ib_cq);
- } else {
- hns_roce_free_cq(hr_dev, hr_cq);
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
-
- if (ib_cq->uobject)
- ib_umem_release(hr_cq->umem);
- else
- /* Free the buff of stored cq */
- hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf,
- ib_cq->cqe);
-
- kfree(hr_cq);
- }
+ free_cqc(hr_dev, hr_cq);
+ free_cqn(hr_dev, hr_cq->cqn);
+ free_cq_db(hr_dev, hr_cq, udata);
+ free_cq_buf(hr_dev, hr_cq);
- return ret;
+ return 0;
}
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_cq *cq;
+ struct hns_roce_cq *hr_cq;
+ struct ib_cq *ibcq;
- cq = radix_tree_lookup(&hr_dev->cq_table.tree,
- cqn & (hr_dev->caps.num_cqs - 1));
- if (!cq) {
- dev_warn(dev, "Completion event for bogus CQ 0x%08x\n", cqn);
+ hr_cq = xa_load(&hr_dev->cq_table.array,
+ cqn & (hr_dev->caps.num_cqs - 1));
+ if (!hr_cq) {
+ dev_warn(hr_dev->dev, "completion event for bogus CQ 0x%06x\n",
+ cqn);
return;
}
- cq->comp(cq);
+ ++hr_cq->arm_sn;
+ ibcq = &hr_cq->ib_cq;
+ if (ibcq->comp_handler)
+ ibcq->comp_handler(ibcq, ibcq->cq_context);
}
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
{
- struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_cq *cq;
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_cq *hr_cq;
+ struct ib_event event;
+ struct ib_cq *ibcq;
- cq = radix_tree_lookup(&cq_table->tree,
- cqn & (hr_dev->caps.num_cqs - 1));
- if (cq)
- atomic_inc(&cq->refcount);
+ if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID &&
+ event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR &&
+ event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) {
+ dev_err(dev, "unexpected event type 0x%x on CQ 0x%06x\n",
+ event_type, cqn);
+ return;
+ }
- if (!cq) {
- dev_warn(dev, "Async event for bogus CQ %08x\n", cqn);
+ xa_lock(&hr_dev->cq_table.array);
+ hr_cq = xa_load(&hr_dev->cq_table.array,
+ cqn & (hr_dev->caps.num_cqs - 1));
+ if (hr_cq)
+ refcount_inc(&hr_cq->refcount);
+ xa_unlock(&hr_dev->cq_table.array);
+ if (!hr_cq) {
+ dev_warn(dev, "async event for bogus CQ 0x%06x\n", cqn);
return;
}
- cq->event(cq, (enum hns_roce_event)event_type);
+ ibcq = &hr_cq->ib_cq;
+ if (ibcq->event_handler) {
+ event.device = ibcq->device;
+ event.element.cq = ibcq;
+ event.event = IB_EVENT_CQ_ERR;
+ ibcq->event_handler(&event, ibcq->cq_context);
+ }
- if (atomic_dec_and_test(&cq->refcount))
- complete(&cq->free);
+ if (refcount_dec_and_test(&hr_cq->refcount))
+ complete(&hr_cq->free);
}
-int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
+void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
+ unsigned int reserved_from_bot;
+ unsigned int i;
- spin_lock_init(&cq_table->lock);
- INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
+ mutex_init(&cq_table->bank_mutex);
+ xa_init(&cq_table->array);
- return hns_roce_bitmap_init(&cq_table->bitmap, hr_dev->caps.num_cqs,
- hr_dev->caps.num_cqs - 1,
- hr_dev->caps.reserved_cqs, 0);
+ reserved_from_bot = hr_dev->caps.reserved_cqs;
+
+ for (i = 0; i < reserved_from_bot; i++) {
+ cq_table->bank[get_cq_bankid(i)].inuse++;
+ cq_table->bank[get_cq_bankid(i)].min++;
+ }
+
+ for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) {
+ ida_init(&cq_table->bank[i].ida);
+ cq_table->bank[i].max = hr_dev->caps.num_cqs /
+ HNS_ROCE_CQ_BANK_NUM - 1;
+ }
}
void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev)
{
- hns_roce_bitmap_cleanup(&hr_dev->cq_table.bitmap);
+ int i;
+
+ for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++)
+ ida_destroy(&hr_dev->cq_table.bank[i].ida);
+ xa_destroy(&hr_dev->cq_table.array);
+ mutex_destroy(&hr_dev->cq_table.bank_mutex);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c
new file mode 100644
index 000000000000..5c4c0480832b
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_db.c
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2017 Hisilicon Limited.
+ * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
+ */
+
+#include <rdma/ib_umem.h>
+#include "hns_roce_device.h"
+
+int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
+ struct hns_roce_db *db)
+{
+ unsigned long page_addr = virt & PAGE_MASK;
+ struct hns_roce_user_db_page *page;
+ unsigned int offset;
+ int ret = 0;
+
+ mutex_lock(&context->page_mutex);
+
+ list_for_each_entry(page, &context->page_list, list)
+ if (page->user_virt == page_addr)
+ goto found;
+
+ page = kmalloc(sizeof(*page), GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ refcount_set(&page->refcount, 1);
+ page->user_virt = page_addr;
+ page->umem = ib_umem_get(context->ibucontext.device, page_addr,
+ PAGE_SIZE, 0);
+ if (IS_ERR(page->umem)) {
+ ret = PTR_ERR(page->umem);
+ kfree(page);
+ goto out;
+ }
+
+ list_add(&page->list, &context->page_list);
+
+found:
+ offset = virt - page_addr;
+ db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) + offset;
+ db->virt_addr = sg_virt(page->umem->sgt_append.sgt.sgl) + offset;
+ db->u.user_page = page;
+ refcount_inc(&page->refcount);
+
+out:
+ mutex_unlock(&context->page_mutex);
+
+ return ret;
+}
+
+void hns_roce_db_unmap_user(struct hns_roce_ucontext *context,
+ struct hns_roce_db *db)
+{
+ mutex_lock(&context->page_mutex);
+
+ refcount_dec(&db->u.user_page->refcount);
+ if (refcount_dec_if_one(&db->u.user_page->refcount)) {
+ list_del(&db->u.user_page->list);
+ ib_umem_release(db->u.user_page->umem);
+ kfree(db->u.user_page);
+ }
+
+ mutex_unlock(&context->page_mutex);
+}
+
+static struct hns_roce_db_pgdir *hns_roce_alloc_db_pgdir(
+ struct device *dma_device)
+{
+ struct hns_roce_db_pgdir *pgdir;
+
+ pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
+ if (!pgdir)
+ return NULL;
+
+ bitmap_fill(pgdir->order1,
+ HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT);
+ pgdir->bits[0] = pgdir->order0;
+ pgdir->bits[1] = pgdir->order1;
+ pgdir->page = dma_alloc_coherent(dma_device, PAGE_SIZE,
+ &pgdir->db_dma, GFP_KERNEL);
+ if (!pgdir->page) {
+ kfree(pgdir);
+ return NULL;
+ }
+
+ return pgdir;
+}
+
+static int hns_roce_alloc_db_from_pgdir(struct hns_roce_db_pgdir *pgdir,
+ struct hns_roce_db *db, int order)
+{
+ unsigned long o;
+ unsigned long i;
+
+ for (o = order; o <= 1; ++o) {
+ i = find_first_bit(pgdir->bits[o], HNS_ROCE_DB_PER_PAGE >> o);
+ if (i < HNS_ROCE_DB_PER_PAGE >> o)
+ goto found;
+ }
+
+ return -ENOMEM;
+
+found:
+ clear_bit(i, pgdir->bits[o]);
+
+ i <<= o;
+
+ if (o > order)
+ set_bit(i ^ 1, pgdir->bits[order]);
+
+ db->u.pgdir = pgdir;
+ db->index = i;
+ db->db_record = pgdir->page + db->index;
+ db->dma = pgdir->db_dma + db->index * HNS_ROCE_DB_UNIT_SIZE;
+ db->order = order;
+
+ return 0;
+}
+
+int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db,
+ int order)
+{
+ struct hns_roce_db_pgdir *pgdir;
+ int ret = 0;
+
+ mutex_lock(&hr_dev->pgdir_mutex);
+
+ list_for_each_entry(pgdir, &hr_dev->pgdir_list, list)
+ if (!hns_roce_alloc_db_from_pgdir(pgdir, db, order))
+ goto out;
+
+ pgdir = hns_roce_alloc_db_pgdir(hr_dev->dev);
+ if (!pgdir) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ list_add(&pgdir->list, &hr_dev->pgdir_list);
+
+ /* This should never fail -- we just allocated an empty page: */
+ WARN_ON(hns_roce_alloc_db_from_pgdir(pgdir, db, order));
+
+out:
+ mutex_unlock(&hr_dev->pgdir_mutex);
+
+ return ret;
+}
+
+void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db)
+{
+ unsigned long o;
+ unsigned long i;
+
+ mutex_lock(&hr_dev->pgdir_mutex);
+
+ o = db->order;
+ i = db->index;
+
+ if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) {
+ clear_bit(i ^ 1, db->u.pgdir->order0);
+ ++o;
+ }
+
+ i >>= o;
+ set_bit(i, db->u.pgdir->bits[o]);
+
+ if (bitmap_full(db->u.pgdir->order1,
+ HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT)) {
+ dma_free_coherent(hr_dev->dev, PAGE_SIZE, db->u.pgdir->page,
+ db->u.pgdir->db_dma);
+ list_del(&db->u.pgdir->list);
+ kfree(db->u.pgdir);
+ }
+
+ mutex_unlock(&hr_dev->pgdir_mutex);
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
new file mode 100644
index 000000000000..b869cdc54118
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2023 Hisilicon Limited.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+
+#include "hns_roce_device.h"
+
+static struct dentry *hns_roce_dbgfs_root;
+
+static int hns_debugfs_seqfile_open(struct inode *inode, struct file *f)
+{
+ struct hns_debugfs_seqfile *seqfile = inode->i_private;
+
+ return single_open(f, seqfile->read, seqfile->data);
+}
+
+static const struct file_operations hns_debugfs_seqfile_fops = {
+ .owner = THIS_MODULE,
+ .open = hns_debugfs_seqfile_open,
+ .release = single_release,
+ .read = seq_read,
+ .llseek = seq_lseek
+};
+
+static void init_debugfs_seqfile(struct hns_debugfs_seqfile *seq,
+ const char *name, struct dentry *parent,
+ int (*read_fn)(struct seq_file *, void *),
+ void *data)
+{
+ debugfs_create_file(name, 0400, parent, seq, &hns_debugfs_seqfile_fops);
+
+ seq->read = read_fn;
+ seq->data = data;
+}
+
+static const char * const sw_stat_info[] = {
+ [HNS_ROCE_DFX_AEQE_CNT] = "aeqe",
+ [HNS_ROCE_DFX_CEQE_CNT] = "ceqe",
+ [HNS_ROCE_DFX_CMDS_CNT] = "cmds",
+ [HNS_ROCE_DFX_CMDS_ERR_CNT] = "cmds_err",
+ [HNS_ROCE_DFX_MBX_POSTED_CNT] = "posted_mbx",
+ [HNS_ROCE_DFX_MBX_POLLED_CNT] = "polled_mbx",
+ [HNS_ROCE_DFX_MBX_EVENT_CNT] = "mbx_event",
+ [HNS_ROCE_DFX_QP_CREATE_ERR_CNT] = "qp_create_err",
+ [HNS_ROCE_DFX_QP_MODIFY_ERR_CNT] = "qp_modify_err",
+ [HNS_ROCE_DFX_CQ_CREATE_ERR_CNT] = "cq_create_err",
+ [HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT] = "cq_modify_err",
+ [HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT] = "srq_create_err",
+ [HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT] = "srq_modify_err",
+ [HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT] = "xrcd_alloc_err",
+ [HNS_ROCE_DFX_MR_REG_ERR_CNT] = "mr_reg_err",
+ [HNS_ROCE_DFX_MR_REREG_ERR_CNT] = "mr_rereg_err",
+ [HNS_ROCE_DFX_AH_CREATE_ERR_CNT] = "ah_create_err",
+ [HNS_ROCE_DFX_MMAP_ERR_CNT] = "mmap_err",
+ [HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT] = "uctx_alloc_err",
+};
+
+static int sw_stat_debugfs_show(struct seq_file *file, void *offset)
+{
+ struct hns_roce_dev *hr_dev = file->private;
+ int i;
+
+ for (i = 0; i < HNS_ROCE_DFX_CNT_TOTAL; i++)
+ seq_printf(file, "%-20s --- %lld\n", sw_stat_info[i],
+ atomic64_read(&hr_dev->dfx_cnt[i]));
+
+ return 0;
+}
+
+static void create_sw_stat_debugfs(struct hns_roce_dev *hr_dev,
+ struct dentry *parent)
+{
+ struct hns_sw_stat_debugfs *dbgfs = &hr_dev->dbgfs.sw_stat_root;
+
+ dbgfs->root = debugfs_create_dir("sw_stat", parent);
+
+ init_debugfs_seqfile(&dbgfs->sw_stat, "sw_stat", dbgfs->root,
+ sw_stat_debugfs_show, hr_dev);
+}
+
+/* debugfs for device */
+void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_dev_debugfs *dbgfs = &hr_dev->dbgfs;
+
+ dbgfs->root = debugfs_create_dir(pci_name(hr_dev->pci_dev),
+ hns_roce_dbgfs_root);
+
+ create_sw_stat_debugfs(hr_dev, dbgfs->root);
+}
+
+void hns_roce_unregister_debugfs(struct hns_roce_dev *hr_dev)
+{
+ debugfs_remove_recursive(hr_dev->dbgfs.root);
+}
+
+/* debugfs for hns module */
+void hns_roce_init_debugfs(void)
+{
+ hns_roce_dbgfs_root = debugfs_create_dir("hns_roce", NULL);
+}
+
+void hns_roce_cleanup_debugfs(void)
+{
+ debugfs_remove_recursive(hns_roce_dbgfs_root);
+ hns_roce_dbgfs_root = NULL;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.h b/drivers/infiniband/hw/hns/hns_roce_debugfs.h
new file mode 100644
index 000000000000..98e87bd3161e
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2023 Hisilicon Limited.
+ */
+
+#ifndef __HNS_ROCE_DEBUGFS_H
+#define __HNS_ROCE_DEBUGFS_H
+
+/* debugfs seqfile */
+struct hns_debugfs_seqfile {
+ int (*read)(struct seq_file *seq, void *data);
+ void *data;
+};
+
+struct hns_sw_stat_debugfs {
+ struct dentry *root;
+ struct hns_debugfs_seqfile sw_stat;
+};
+
+/* Debugfs for device */
+struct hns_roce_dev_debugfs {
+ struct dentry *root;
+ struct hns_sw_stat_debugfs sw_stat_root;
+};
+
+struct hns_roce_dev;
+
+void hns_roce_init_debugfs(void);
+void hns_roce_cleanup_debugfs(void);
+void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev);
+void hns_roce_unregister_debugfs(struct hns_roce_dev *hr_dev);
+
+#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 1a6cb5d7a0dd..318f18cf37aa 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -33,80 +33,86 @@
#ifndef _HNS_ROCE_DEVICE_H
#define _HNS_ROCE_DEVICE_H
+#include <linux/pci.h>
#include <rdma/ib_verbs.h>
+#include <rdma/hns-abi.h>
+#include "hns_roce_debugfs.h"
-#define DRV_NAME "hns_roce"
+#define PCI_REVISION_ID_HIP08 0x21
+#define PCI_REVISION_ID_HIP09 0x30
-#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6')
-
-#define MAC_ADDR_OCTET_NUM 6
#define HNS_ROCE_MAX_MSG_LEN 0x80000000
-#define HNS_ROCE_ALOGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b))
-
#define HNS_ROCE_IB_MIN_SQ_STRIDE 6
-#define HNS_ROCE_BA_SIZE (32 * 4096)
+#define BA_BYTE_LEN 8
-/* Hardware specification only for v1 engine */
#define HNS_ROCE_MIN_CQE_NUM 0x40
-#define HNS_ROCE_MIN_WQE_NUM 0x20
+#define HNS_ROCE_MIN_SRQ_WQE_NUM 1
+
+#define HNS_ROCE_MAX_IRQ_NUM 128
-/* Hardware specification only for v1 engine */
-#define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7
-#define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000
+#define HNS_ROCE_SGE_IN_WQE 2
+#define HNS_ROCE_SGE_SHIFT 4
-#define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS 20
-#define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT \
- (5000 / HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS)
-#define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2
-#define HNS_ROCE_MIN_CQE_CNT 16
+#define EQ_ENABLE 1
+#define EQ_DISABLE 0
-#define HNS_ROCE_MAX_IRQ_NUM 34
+#define HNS_ROCE_CEQ 0
+#define HNS_ROCE_AEQ 1
-#define HNS_ROCE_COMP_VEC_NUM 32
+#define HNS_ROCE_CEQE_SIZE 0x4
+#define HNS_ROCE_AEQE_SIZE 0x10
-#define HNS_ROCE_AEQE_VEC_NUM 1
-#define HNS_ROCE_AEQE_OF_VEC_NUM 1
+#define HNS_ROCE_V3_EQE_SIZE 0x40
-/* 4G/4K = 1M */
-#define HNS_ROCE_SL_SHIFT 28
-#define HNS_ROCE_TCLASS_SHIFT 20
-#define HNS_ROCE_FLOW_LABLE_MASK 0xfffff
+#define HNS_ROCE_V2_CQE_SIZE 32
+#define HNS_ROCE_V3_CQE_SIZE 64
+
+#define HNS_ROCE_V2_QPC_SZ 256
+#define HNS_ROCE_V3_QPC_SZ 512
#define HNS_ROCE_MAX_PORTS 6
-#define HNS_ROCE_MAX_GID_NUM 16
#define HNS_ROCE_GID_SIZE 16
+#define HNS_ROCE_SGE_SIZE 16
+#define HNS_ROCE_DWQE_SIZE 65536
-#define BITMAP_NO_RR 0
-#define BITMAP_RR 1
+#define HNS_ROCE_HOP_NUM_0 0xff
#define MR_TYPE_MR 0x00
+#define MR_TYPE_FRMR 0x01
#define MR_TYPE_DMA 0x03
+#define HNS_ROCE_FRMR_MAX_PA 512
+#define HNS_ROCE_FRMR_ALIGN_SIZE 128
+
#define PKEY_ID 0xffff
-#define GUID_LEN 8
#define NODE_DESC_SIZE 64
#define DB_REG_OFFSET 0x1000
-#define SERV_TYPE_RC 0
-#define SERV_TYPE_RD 1
-#define SERV_TYPE_UC 2
-#define SERV_TYPE_UD 3
+/* Configure to HW for PAGE_SIZE larger than 4KB */
+#define PG_SHIFT_OFFSET (PAGE_SHIFT - 12)
-#define PAGES_SHIFT_8 8
-#define PAGES_SHIFT_16 16
-#define PAGES_SHIFT_24 24
-#define PAGES_SHIFT_32 32
+#define ATOMIC_WR_LEN 8
-enum hns_roce_qp_state {
- HNS_ROCE_QP_STATE_RST,
- HNS_ROCE_QP_STATE_INIT,
- HNS_ROCE_QP_STATE_RTR,
- HNS_ROCE_QP_STATE_RTS,
- HNS_ROCE_QP_STATE_SQD,
- HNS_ROCE_QP_STATE_ERR,
- HNS_ROCE_QP_NUM_STATE,
+#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4
+#define SRQ_DB_REG 0x230
+
+#define HNS_ROCE_QP_BANK_NUM 8
+#define HNS_ROCE_CQ_BANK_NUM 4
+
+#define CQ_BANKID_SHIFT 2
+#define CQ_BANKID_MASK GENMASK(1, 0)
+
+#define HNS_ROCE_MAX_CQ_COUNT 0xFFFF
+#define HNS_ROCE_MAX_CQ_PERIOD 0xFFFF
+
+enum {
+ SERV_TYPE_RC,
+ SERV_TYPE_UC,
+ SERV_TYPE_RD,
+ SERV_TYPE_UD,
+ SERV_TYPE_XRC = 5,
};
enum hns_roce_event {
@@ -127,64 +133,94 @@ enum hns_roce_event {
/* 0x10 and 0x11 is unused in currently application case */
HNS_ROCE_EVENT_TYPE_DB_OVERFLOW = 0x12,
HNS_ROCE_EVENT_TYPE_MB = 0x13,
- HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW = 0x14,
+ HNS_ROCE_EVENT_TYPE_FLR = 0x15,
+ HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION = 0x16,
+ HNS_ROCE_EVENT_TYPE_INVALID_XRCETH = 0x17,
};
-/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */
enum {
- HNS_ROCE_LWQCE_QPC_ERROR = 1,
- HNS_ROCE_LWQCE_MTU_ERROR = 2,
- HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR = 3,
- HNS_ROCE_LWQCE_WQE_ADDR_ERROR = 4,
- HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR = 5,
- HNS_ROCE_LWQCE_SL_ERROR = 6,
- HNS_ROCE_LWQCE_PORT_ERROR = 7,
+ HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0),
+ HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1),
+ HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
+ HNS_ROCE_CAP_FLAG_CQ_RECORD_DB = BIT(3),
+ HNS_ROCE_CAP_FLAG_QP_RECORD_DB = BIT(4),
+ HNS_ROCE_CAP_FLAG_SRQ = BIT(5),
+ HNS_ROCE_CAP_FLAG_XRC = BIT(6),
+ HNS_ROCE_CAP_FLAG_MW = BIT(7),
+ HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
+ HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL = BIT(9),
+ HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
+ HNS_ROCE_CAP_FLAG_DIRECT_WQE = BIT(12),
+ HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14),
+ HNS_ROCE_CAP_FLAG_STASH = BIT(17),
+ HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19),
+ HNS_ROCE_CAP_FLAG_BOND = BIT(21),
+ HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB = BIT(22),
};
-/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */
+#define HNS_ROCE_DB_TYPE_COUNT 2
+#define HNS_ROCE_DB_UNIT_SIZE 4
+
enum {
- HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1,
- HNS_ROCE_LAVWQE_LENGTH_ERROR = 2,
- HNS_ROCE_LAVWQE_VA_ERROR = 3,
- HNS_ROCE_LAVWQE_PD_ERROR = 4,
- HNS_ROCE_LAVWQE_RW_ACC_ERROR = 5,
- HNS_ROCE_LAVWQE_KEY_STATE_ERROR = 6,
- HNS_ROCE_LAVWQE_MR_OPERATION_ERROR = 7,
+ HNS_ROCE_DB_PER_PAGE = PAGE_SIZE / 4
};
-/* DOORBELL overflow subtype */
-enum {
- HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1,
- HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF = 2,
- HNS_ROCE_DB_SUBTYPE_ODB_OVF = 3,
- HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF = 4,
- HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP = 5,
- HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP = 6,
+enum hns_roce_reset_stage {
+ HNS_ROCE_STATE_NON_RST,
+ HNS_ROCE_STATE_RST_BEF_DOWN,
+ HNS_ROCE_STATE_RST_DOWN,
+ HNS_ROCE_STATE_RST_UNINIT,
+ HNS_ROCE_STATE_RST_INIT,
+ HNS_ROCE_STATE_RST_INITED,
+};
+
+enum hns_roce_instance_state {
+ HNS_ROCE_STATE_NON_INIT,
+ HNS_ROCE_STATE_INIT,
+ HNS_ROCE_STATE_INITED,
+ HNS_ROCE_STATE_UNINIT,
+ HNS_ROCE_STATE_BOND_UNINIT,
};
enum {
- /* RQ&SRQ related operations */
- HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06,
- HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07,
+ HNS_ROCE_RST_DIRECT_RETURN = 0,
};
#define HNS_ROCE_CMD_SUCCESS 1
-#define HNS_ROCE_PORT_DOWN 0
-#define HNS_ROCE_PORT_UP 1
-
-#define HNS_ROCE_MTT_ENTRY_PER_SEG 8
+#define HNS_ROCE_MAX_HOP_NUM 3
+/* The minimum page size is 4K for hardware */
+#define HNS_HW_PAGE_SHIFT 12
+#define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT)
-#define PAGE_ADDR_SHIFT 12
+#define HNS_HW_MAX_PAGE_SHIFT 27
+#define HNS_HW_MAX_PAGE_SIZE (1 << HNS_HW_MAX_PAGE_SHIFT)
struct hns_roce_uar {
u64 pfn;
unsigned long index;
+ unsigned long logic_idx;
+};
+
+enum hns_roce_mmap_type {
+ HNS_ROCE_MMAP_TYPE_DB = 1,
+ HNS_ROCE_MMAP_TYPE_DWQE,
+};
+
+struct hns_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ enum hns_roce_mmap_type mmap_type;
+ u64 address;
};
struct hns_roce_ucontext {
struct ib_ucontext ibucontext;
struct hns_roce_uar uar;
+ struct list_head page_list;
+ struct mutex page_mutex;
+ struct hns_user_mmap_entry *db_mmap_entry;
+ u32 config;
+ u8 cq_bank_id;
};
struct hns_roce_pd {
@@ -192,6 +228,11 @@ struct hns_roce_pd {
unsigned long pdn;
};
+struct hns_roce_xrcd {
+ struct ib_xrcd ibxrcd;
+ u32 xrcdn;
+};
+
struct hns_roce_bitmap {
/* Bitmap Traversal last a bit which is 1 */
unsigned long last;
@@ -203,20 +244,10 @@ struct hns_roce_bitmap {
unsigned long *table;
};
-/* Order bitmap length -- bit num compute formula: 1 << (max_order - order) */
-/* Order = 0: bitmap is biggest, order = max bitmap is least (only a bit) */
-/* Every bit repesent to a partner free/used status in bitmap */
-/*
- * Initial, bits of other bitmap are all 0 except that a bit of max_order is 1
- * Bit = 1 represent to idle and available; bit = 0: not available
- */
-struct hns_roce_buddy {
- /* Members point to every order level bitmap */
- unsigned long **bits;
- /* Represent to avail bits of the order level bitmap */
- u32 *num_free;
- int max_order;
- spinlock_t lock;
+struct hns_roce_ida {
+ struct ida ida;
+ u32 min; /* Lowest ID to allocate. */
+ u32 max; /* Highest ID to allocate. */
};
/* For Hardware Entry Memory */
@@ -225,56 +256,108 @@ struct hns_roce_hem_table {
u32 type;
/* HEM array elment num */
unsigned long num_hem;
- /* HEM entry record obj total num */
- unsigned long num_obj;
- /*Single obj size */
+ /* Single obj size */
unsigned long obj_size;
- int lowmem;
+ unsigned long table_chunk_size;
struct mutex mutex;
struct hns_roce_hem **hem;
+ u64 **bt_l1;
+ dma_addr_t *bt_l1_dma_addr;
+ u64 **bt_l0;
+ dma_addr_t *bt_l0_dma_addr;
+};
+
+struct hns_roce_buf_region {
+ u32 offset; /* page offset */
+ u32 count; /* page count */
+ int hopnum; /* addressing hop num */
+};
+
+#define HNS_ROCE_MAX_BT_REGION 3
+#define HNS_ROCE_MAX_BT_LEVEL 3
+struct hns_roce_hem_list {
+ struct list_head root_bt;
+ /* link all bt dma mem by hop config */
+ struct list_head mid_bt[HNS_ROCE_MAX_BT_REGION][HNS_ROCE_MAX_BT_LEVEL];
+ struct list_head btm_bt; /* link all bottom bt in @mid_bt */
+ dma_addr_t root_ba; /* pointer to the root ba table */
+};
+
+enum mtr_type {
+ MTR_DEFAULT = 0,
+ MTR_PBL,
};
-struct hns_roce_mtt {
- unsigned long first_seg;
- int order;
- int page_shift;
+struct hns_roce_buf_attr {
+ struct {
+ size_t size; /* region size */
+ int hopnum; /* multi-hop addressing hop num */
+ } region[HNS_ROCE_MAX_BT_REGION];
+ unsigned int region_count; /* valid region count */
+ unsigned int page_shift; /* buffer page shift */
+ unsigned int user_access; /* umem access flag */
+ u64 iova;
+ enum mtr_type type;
+ bool mtt_only; /* only alloc buffer-required MTT memory */
+ bool adaptive; /* adaptive for page_shift and hopnum */
};
-/* Only support 4K page size for mr register */
-#define MR_SIZE_4K 0
+struct hns_roce_hem_cfg {
+ dma_addr_t root_ba; /* root BA table's address */
+ bool is_direct; /* addressing without BA table */
+ unsigned int ba_pg_shift; /* BA table page shift */
+ unsigned int buf_pg_shift; /* buffer page shift */
+ unsigned int buf_pg_count; /* buffer page count */
+ struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION];
+ unsigned int region_count;
+};
+
+/* memory translate region */
+struct hns_roce_mtr {
+ struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */
+ struct ib_umem *umem; /* user space buffer */
+ struct hns_roce_buf *kmem; /* kernel space buffer */
+ struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */
+};
struct hns_roce_mr {
struct ib_mr ibmr;
- struct ib_umem *umem;
- u64 iova; /* MR's virtual orignal addr */
+ u64 iova; /* MR's virtual original addr */
u64 size; /* Address range of MR */
u32 key; /* Key of MR */
u32 pd; /* PD num of MR */
- u32 access;/* Access permission of MR */
+ u32 access; /* Access permission of MR */
int enabled; /* MR's active status */
- int type; /* MR's register type */
- u64 *pbl_buf;/* MR's PBL space */
- dma_addr_t pbl_dma_addr; /* MR's PBL space PA */
+ int type; /* MR's register type */
+ u32 pbl_hop_num; /* multi-hop number */
+ struct hns_roce_mtr pbl_mtr;
+ u32 npages;
+ dma_addr_t *page_list;
};
struct hns_roce_mr_table {
- struct hns_roce_bitmap mtpt_bitmap;
- struct hns_roce_buddy mtt_buddy;
- struct hns_roce_hem_table mtt_table;
+ struct hns_roce_ida mtpt_ida;
struct hns_roce_hem_table mtpt_table;
};
struct hns_roce_wq {
u64 *wrid; /* Work request ID */
spinlock_t lock;
- int wqe_cnt; /* WQE num */
- u32 max_post;
- int max_gs;
- int offset;
- int wqe_shift;/* WQE size */
+ u32 wqe_cnt; /* WQE num */
+ u32 max_gs;
+ u32 rsv_sge;
+ u32 offset;
+ u32 wqe_shift; /* WQE size */
u32 head;
u32 tail;
- void __iomem *db_reg_l;
+ void __iomem *db_reg;
+ u32 ext_sge_cnt;
+};
+
+struct hns_roce_sge {
+ unsigned int sge_cnt; /* SGE num */
+ u32 offset;
+ u32 sge_shift; /* SGE size */
};
struct hns_roce_buf_list {
@@ -282,74 +365,162 @@ struct hns_roce_buf_list {
dma_addr_t map;
};
+/*
+ * %HNS_ROCE_BUF_DIRECT indicates that the all memory must be in a continuous
+ * dma address range.
+ *
+ * %HNS_ROCE_BUF_NOSLEEP indicates that the caller cannot sleep.
+ *
+ * %HNS_ROCE_BUF_NOFAIL allocation only failed when allocated size is zero, even
+ * the allocated size is smaller than the required size.
+ */
+enum {
+ HNS_ROCE_BUF_DIRECT = BIT(0),
+ HNS_ROCE_BUF_NOSLEEP = BIT(1),
+ HNS_ROCE_BUF_NOFAIL = BIT(2),
+};
+
struct hns_roce_buf {
- struct hns_roce_buf_list direct;
- struct hns_roce_buf_list *page_list;
- int nbufs;
+ struct hns_roce_buf_list *trunk_list;
+ u32 ntrunks;
u32 npages;
- int page_shift;
+ unsigned int trunk_shift;
+ unsigned int page_shift;
+};
+
+struct hns_roce_db_pgdir {
+ struct list_head list;
+ DECLARE_BITMAP(order0, HNS_ROCE_DB_PER_PAGE);
+ DECLARE_BITMAP(order1, HNS_ROCE_DB_PER_PAGE / HNS_ROCE_DB_TYPE_COUNT);
+ unsigned long *bits[HNS_ROCE_DB_TYPE_COUNT];
+ u32 *page;
+ dma_addr_t db_dma;
+};
+
+struct hns_roce_user_db_page {
+ struct list_head list;
+ struct ib_umem *umem;
+ unsigned long user_virt;
+ refcount_t refcount;
};
-struct hns_roce_cq_buf {
- struct hns_roce_buf hr_buf;
- struct hns_roce_mtt hr_mtt;
+struct hns_roce_db {
+ u32 *db_record;
+ union {
+ struct hns_roce_db_pgdir *pgdir;
+ struct hns_roce_user_db_page *user_page;
+ } u;
+ dma_addr_t dma;
+ void *virt_addr;
+ unsigned long index;
+ unsigned long order;
};
struct hns_roce_cq {
struct ib_cq ib_cq;
- struct hns_roce_cq_buf hr_buf;
+ struct hns_roce_mtr mtr;
+ struct hns_roce_db db;
+ u32 flags;
spinlock_t lock;
- struct ib_umem *umem;
- void (*comp)(struct hns_roce_cq *);
- void (*event)(struct hns_roce_cq *, enum hns_roce_event);
-
- struct hns_roce_uar *uar;
u32 cq_depth;
u32 cons_index;
- void __iomem *cq_db_l;
- u16 *tptr_addr;
+ u32 *set_ci_db;
+ void __iomem *db_reg;
+ int arm_sn;
+ int cqe_size;
unsigned long cqn;
u32 vector;
- atomic_t refcount;
+ refcount_t refcount;
struct completion free;
+ struct list_head sq_list; /* all qps on this send cq */
+ struct list_head rq_list; /* all qps on this recv cq */
+ int is_armed; /* cq is armed */
+ struct list_head node; /* all armed cqs are on a list */
+};
+
+struct hns_roce_idx_que {
+ struct hns_roce_mtr mtr;
+ u32 entry_shift;
+ unsigned long *bitmap;
+ u32 head;
+ u32 tail;
};
struct hns_roce_srq {
struct ib_srq ibsrq;
- int srqn;
+ unsigned long srqn;
+ u32 wqe_cnt;
+ int max_gs;
+ u32 rsv_sge;
+ u32 wqe_shift;
+ u32 cqn;
+ u32 xrcdn;
+ void __iomem *db_reg;
+
+ refcount_t refcount;
+ struct completion free;
+
+ struct hns_roce_mtr buf_mtr;
+
+ u64 *wrid;
+ struct hns_roce_idx_que idx_que;
+ spinlock_t lock;
+ struct mutex mutex;
+ void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
+ struct hns_roce_db rdb;
+ u32 cap_flags;
};
struct hns_roce_uar_table {
struct hns_roce_bitmap bitmap;
};
+struct hns_roce_bank {
+ struct ida ida;
+ u32 inuse; /* Number of IDs allocated */
+ u32 min; /* Lowest ID to allocate. */
+ u32 max; /* Highest ID to allocate. */
+ u32 next; /* Next ID to allocate. */
+};
+
struct hns_roce_qp_table {
- struct hns_roce_bitmap bitmap;
- spinlock_t lock;
struct hns_roce_hem_table qp_table;
struct hns_roce_hem_table irrl_table;
+ struct hns_roce_hem_table trrl_table;
+ struct hns_roce_hem_table sccc_table;
+ struct mutex scc_mutex;
+ struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM];
+ struct mutex bank_mutex;
+ struct xarray dip_xa;
};
struct hns_roce_cq_table {
- struct hns_roce_bitmap bitmap;
- spinlock_t lock;
- struct radix_tree_root tree;
+ struct xarray array;
struct hns_roce_hem_table table;
+ struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM];
+ struct mutex bank_mutex;
+ u32 ctx_num[HNS_ROCE_CQ_BANK_NUM];
};
-struct hns_roce_raq_table {
- struct hns_roce_buf_list *e_raq_buf;
+struct hns_roce_srq_table {
+ struct hns_roce_ida srq_ida;
+ struct xarray xa;
+ struct hns_roce_hem_table table;
};
struct hns_roce_av {
- __le32 port_pd;
- u8 gid_index;
- u8 stat_rate;
- u8 hop_limit;
- __le32 sl_tclass_flowlabel;
- u8 dgid[HNS_ROCE_GID_SIZE];
- u8 mac[6];
- __le16 vlan;
+ u8 port;
+ u8 gid_index;
+ u8 stat_rate;
+ u8 hop_limit;
+ u32 flowlabel;
+ u16 udp_sport;
+ u8 sl;
+ u8 tclass;
+ u8 dgid[HNS_ROCE_GID_SIZE];
+ u8 mac[ETH_ALEN];
+ u16 vlan_id;
+ u8 vlan_en;
};
struct hns_roce_ah {
@@ -363,12 +534,16 @@ struct hns_roce_cmd_context {
int next;
u64 out_param;
u16 token;
+ u16 busy;
+};
+
+enum hns_roce_cmdq_state {
+ HNS_ROCE_CMDQ_STATE_NORMAL,
+ HNS_ROCE_CMDQ_STATE_FATAL_ERR,
};
struct hns_roce_cmdq {
struct dma_pool *pool;
- u8 __iomem *hcr;
- struct mutex hcr_mutex;
struct semaphore poll_sem;
/*
* Event mode: cmd register mutex protection,
@@ -380,18 +555,13 @@ struct hns_roce_cmdq {
int free_head;
struct hns_roce_cmd_context *context;
/*
- * Result of get integer part
- * which max_comds compute according a power of 2
- */
- u16 token_mask;
- /*
* Process whether use event mode, init default non-zero
* After the event queue of cmd event ready,
* can switch into event mode
* close device, switch into poll mode(non event mode)
*/
u8 use_events;
- u8 toggle;
+ enum hns_roce_cmdq_state state;
};
struct hns_roce_cmd_mailbox {
@@ -399,21 +569,48 @@ struct hns_roce_cmd_mailbox {
dma_addr_t dma;
};
+struct hns_roce_mbox_msg {
+ u64 in_param;
+ u64 out_param;
+ u8 cmd;
+ u32 tag;
+ u16 token;
+ u8 event_en;
+};
+
struct hns_roce_dev;
+enum {
+ HNS_ROCE_FLUSH_FLAG = 0,
+ HNS_ROCE_STOP_FLUSH_FLAG = 1,
+};
+
+struct hns_roce_work {
+ struct hns_roce_dev *hr_dev;
+ struct work_struct work;
+ int event_type;
+ int sub_type;
+ u32 queue_num;
+};
+
+enum hns_roce_cong_type {
+ CONG_TYPE_DCQCN,
+ CONG_TYPE_LDCP,
+ CONG_TYPE_HC3,
+ CONG_TYPE_DIP,
+};
+
struct hns_roce_qp {
struct ib_qp ibqp;
- struct hns_roce_buf hr_buf;
struct hns_roce_wq rq;
- __le64 doorbell_qpn;
- __le32 sq_signal_bits;
- u32 sq_next_wqe;
- int sq_max_wqes_per_wr;
- int sq_spare_wqes;
+ struct hns_roce_db rdb;
+ struct hns_roce_db sdb;
+ unsigned long en_flags;
+ enum ib_sig_type sq_signal_bits;
struct hns_roce_wq sq;
- struct ib_umem *umem;
- struct hns_roce_mtt mtt;
+ struct hns_roce_mtr mtr;
+
u32 buff_size;
struct mutex mutex;
u8 port;
@@ -421,138 +618,372 @@ struct hns_roce_qp {
u8 sl;
u8 resp_depth;
u8 state;
- u32 access_flags;
- u32 pkey_index;
- void (*event)(struct hns_roce_qp *,
- enum hns_roce_event);
+ u32 atomic_rd_en;
+ u32 qkey;
+ void (*event)(struct hns_roce_qp *qp,
+ enum hns_roce_event event_type);
unsigned long qpn;
- atomic_t refcount;
+ u32 xrcdn;
+
+ refcount_t refcount;
struct completion free;
-};
-struct hns_roce_sqp {
- struct hns_roce_qp hr_qp;
+ struct hns_roce_sge sge;
+ u32 next_sge;
+ enum ib_mtu path_mtu;
+ u32 max_inline_data;
+ u8 free_mr_en;
+
+ /* 0: flush needed, 1: unneeded */
+ unsigned long flush_flag;
+ struct hns_roce_work flush_work;
+ struct list_head node; /* all qps are on a list */
+ struct list_head rq_node; /* all recv qps are on a list */
+ struct list_head sq_node; /* all send qps are on a list */
+ struct hns_user_mmap_entry *dwqe_mmap_entry;
+ u32 config;
+ enum hns_roce_cong_type cong_type;
+ u8 tc_mode;
+ u8 priority;
+ spinlock_t flush_lock;
+ struct hns_roce_dip *dip;
};
struct hns_roce_ib_iboe {
spinlock_t lock;
struct net_device *netdevs[HNS_ROCE_MAX_PORTS];
struct notifier_block nb;
- struct notifier_block nb_inet;
u8 phy_port[HNS_ROCE_MAX_PORTS];
};
+struct hns_roce_ceqe {
+ __le32 comp;
+ __le32 rsv[15];
+};
+
+#define CEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_ceqe, h, l)
+
+#define CEQE_CQN CEQE_FIELD_LOC(23, 0)
+#define CEQE_OWNER CEQE_FIELD_LOC(31, 31)
+
+struct hns_roce_aeqe {
+ __le32 asyn;
+ union {
+ struct {
+ __le32 num;
+ u32 rsv0;
+ u32 rsv1;
+ } queue_event;
+
+ struct {
+ __le64 out_param;
+ __le16 token;
+ u8 status;
+ u8 rsv0;
+ } __packed cmd;
+ } event;
+ __le32 rsv[12];
+};
+
+#define AEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_aeqe, h, l)
+
+#define AEQE_EVENT_TYPE AEQE_FIELD_LOC(7, 0)
+#define AEQE_SUB_TYPE AEQE_FIELD_LOC(15, 8)
+#define AEQE_OWNER AEQE_FIELD_LOC(31, 31)
+#define AEQE_EVENT_QUEUE_NUM AEQE_FIELD_LOC(55, 32)
+
struct hns_roce_eq {
struct hns_roce_dev *hr_dev;
- void __iomem *doorbell;
+ void __iomem *db_reg;
- int type_flag;/* Aeq:1 ceq:0 */
+ int type_flag; /* Aeq:1 ceq:0 */
int eqn;
u32 entries;
- int log_entries;
int eqe_size;
int irq;
- int log_page_size;
- int cons_index;
- struct hns_roce_buf_list *buf_list;
+ u32 cons_index;
+ int over_ignore;
+ int coalesce;
+ int arm_st;
+ int hop_num;
+ struct hns_roce_mtr mtr;
+ u16 eq_max_cnt;
+ u32 eq_period;
+ int shift;
+ int event_type;
+ int sub_type;
+ struct work_struct work;
};
struct hns_roce_eq_table {
struct hns_roce_eq *eq;
- void __iomem **eqc_base;
};
struct hns_roce_caps {
+ u64 fw_ver;
u8 num_ports;
int gid_table_len[HNS_ROCE_MAX_PORTS];
int pkey_table_len[HNS_ROCE_MAX_PORTS];
int local_ca_ack_delay;
int num_uars;
u32 phy_num_uars;
- u32 max_sq_sg; /* 2 */
- u32 max_sq_inline; /* 32 */
- u32 max_rq_sg; /* 2 */
- int num_qps; /* 256k */
- u32 max_wqes; /* 16k */
- u32 max_sq_desc_sz; /* 64 */
- u32 max_rq_desc_sz; /* 64 */
+ u32 max_sq_sg;
+ u32 max_sq_inline;
+ u32 max_rq_sg;
+ u32 rsv0;
+ u32 num_qps;
+ u32 reserved_qps;
+ u32 num_srqs;
+ u32 max_wqes;
+ u32 max_srq_wrs;
+ u32 max_srq_sges;
+ u32 max_sq_desc_sz;
+ u32 max_rq_desc_sz;
+ u32 rsv2;
int max_qp_init_rdma;
int max_qp_dest_rdma;
- int num_cqs;
- int max_cqes;
- int reserved_cqs;
- int num_aeq_vectors; /* 1 */
- int num_comp_vectors; /* 32 ceq */
+ u32 num_cqs;
+ u32 max_cqes;
+ u32 min_cqes;
+ u32 min_wqes;
+ u32 reserved_cqs;
+ u32 reserved_srqs;
+ int num_aeq_vectors;
+ int num_comp_vectors;
int num_other_vectors;
- int num_mtpts;
- u32 num_mtt_segs;
+ u32 num_mtpts;
+ u32 rsv1;
+ u32 num_srqwqe_segs;
+ u32 num_idx_segs;
int reserved_mrws;
int reserved_uars;
int num_pds;
int reserved_pds;
+ u32 num_xrcds;
+ u32 reserved_xrcds;
u32 mtt_entry_sz;
- u32 cq_entry_sz;
+ u32 cqe_sz;
u32 page_size_cap;
u32 reserved_lkey;
int mtpt_entry_sz;
- int qpc_entry_sz;
+ int qpc_sz;
int irrl_entry_sz;
+ int trrl_entry_sz;
int cqc_entry_sz;
+ int sccc_sz;
+ int qpc_timer_entry_sz;
+ int cqc_timer_entry_sz;
+ int srqc_entry_sz;
+ int idx_entry_sz;
+ u32 pbl_ba_pg_sz;
+ u32 pbl_buf_pg_sz;
+ u32 pbl_hop_num;
int aeqe_depth;
- int ceqe_depth[HNS_ROCE_COMP_VEC_NUM];
+ int ceqe_depth;
+ u32 aeqe_size;
+ u32 ceqe_size;
enum ib_mtu max_mtu;
+ u32 qpc_bt_num;
+ u32 qpc_timer_bt_num;
+ u32 srqc_bt_num;
+ u32 cqc_bt_num;
+ u32 cqc_timer_bt_num;
+ u32 mpt_bt_num;
+ u32 eqc_bt_num;
+ u32 smac_bt_num;
+ u32 sgid_bt_num;
+ u32 sccc_bt_num;
+ u32 gmv_bt_num;
+ u32 qpc_ba_pg_sz;
+ u32 qpc_buf_pg_sz;
+ u32 qpc_hop_num;
+ u32 srqc_ba_pg_sz;
+ u32 srqc_buf_pg_sz;
+ u32 srqc_hop_num;
+ u32 cqc_ba_pg_sz;
+ u32 cqc_buf_pg_sz;
+ u32 cqc_hop_num;
+ u32 mpt_ba_pg_sz;
+ u32 mpt_buf_pg_sz;
+ u32 mpt_hop_num;
+ u32 mtt_ba_pg_sz;
+ u32 mtt_buf_pg_sz;
+ u32 mtt_hop_num;
+ u32 wqe_sq_hop_num;
+ u32 wqe_sge_hop_num;
+ u32 wqe_rq_hop_num;
+ u32 sccc_ba_pg_sz;
+ u32 sccc_buf_pg_sz;
+ u32 sccc_hop_num;
+ u32 qpc_timer_ba_pg_sz;
+ u32 qpc_timer_buf_pg_sz;
+ u32 qpc_timer_hop_num;
+ u32 cqc_timer_ba_pg_sz;
+ u32 cqc_timer_buf_pg_sz;
+ u32 cqc_timer_hop_num;
+ u32 cqe_ba_pg_sz; /* page_size = 4K*(2^cqe_ba_pg_sz) */
+ u32 cqe_buf_pg_sz;
+ u32 cqe_hop_num;
+ u32 srqwqe_ba_pg_sz;
+ u32 srqwqe_buf_pg_sz;
+ u32 srqwqe_hop_num;
+ u32 idx_ba_pg_sz;
+ u32 idx_buf_pg_sz;
+ u32 idx_hop_num;
+ u32 eqe_ba_pg_sz;
+ u32 eqe_buf_pg_sz;
+ u32 eqe_hop_num;
+ u32 gmv_entry_num;
+ u32 gmv_entry_sz;
+ u32 gmv_ba_pg_sz;
+ u32 gmv_buf_pg_sz;
+ u32 gmv_hop_num;
+ u32 sl_num;
+ u32 llm_buf_pg_sz;
+ u32 chunk_sz; /* chunk size in non multihop mode */
+ u64 flags;
+ u16 default_ceq_max_cnt;
+ u16 default_ceq_period;
+ u16 default_aeq_max_cnt;
+ u16 default_aeq_period;
+ u16 default_aeq_arm_st;
+ u16 default_ceq_arm_st;
+ u8 cong_cap;
+ enum hns_roce_cong_type default_cong_type;
+ u32 max_ack_req_msg_len;
+};
+
+enum hns_roce_device_state {
+ HNS_ROCE_DEVICE_STATE_INITED,
+ HNS_ROCE_DEVICE_STATE_RST_DOWN,
+ HNS_ROCE_DEVICE_STATE_UNINIT,
+};
+
+enum hns_roce_hw_pkt_stat_index {
+ HNS_ROCE_HW_RX_RC_PKT_CNT,
+ HNS_ROCE_HW_RX_UC_PKT_CNT,
+ HNS_ROCE_HW_RX_UD_PKT_CNT,
+ HNS_ROCE_HW_RX_XRC_PKT_CNT,
+ HNS_ROCE_HW_RX_PKT_CNT,
+ HNS_ROCE_HW_RX_ERR_PKT_CNT,
+ HNS_ROCE_HW_RX_CNP_PKT_CNT,
+ HNS_ROCE_HW_TX_RC_PKT_CNT,
+ HNS_ROCE_HW_TX_UC_PKT_CNT,
+ HNS_ROCE_HW_TX_UD_PKT_CNT,
+ HNS_ROCE_HW_TX_XRC_PKT_CNT,
+ HNS_ROCE_HW_TX_PKT_CNT,
+ HNS_ROCE_HW_TX_ERR_PKT_CNT,
+ HNS_ROCE_HW_TX_CNP_PKT_CNT,
+ HNS_ROCE_HW_TRP_GET_MPT_ERR_PKT_CNT,
+ HNS_ROCE_HW_TRP_GET_IRRL_ERR_PKT_CNT,
+ HNS_ROCE_HW_ECN_DB_CNT,
+ HNS_ROCE_HW_RX_BUF_CNT,
+ HNS_ROCE_HW_TRP_RX_SOF_CNT,
+ HNS_ROCE_HW_CQ_CQE_CNT,
+ HNS_ROCE_HW_CQ_POE_CNT,
+ HNS_ROCE_HW_CQ_NOTIFY_CNT,
+ HNS_ROCE_HW_CNT_TOTAL
+};
+
+enum hns_roce_sw_dfx_stat_index {
+ HNS_ROCE_DFX_AEQE_CNT,
+ HNS_ROCE_DFX_CEQE_CNT,
+ HNS_ROCE_DFX_CMDS_CNT,
+ HNS_ROCE_DFX_CMDS_ERR_CNT,
+ HNS_ROCE_DFX_MBX_POSTED_CNT,
+ HNS_ROCE_DFX_MBX_POLLED_CNT,
+ HNS_ROCE_DFX_MBX_EVENT_CNT,
+ HNS_ROCE_DFX_QP_CREATE_ERR_CNT,
+ HNS_ROCE_DFX_QP_MODIFY_ERR_CNT,
+ HNS_ROCE_DFX_CQ_CREATE_ERR_CNT,
+ HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT,
+ HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT,
+ HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT,
+ HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT,
+ HNS_ROCE_DFX_MR_REG_ERR_CNT,
+ HNS_ROCE_DFX_MR_REREG_ERR_CNT,
+ HNS_ROCE_DFX_AH_CREATE_ERR_CNT,
+ HNS_ROCE_DFX_MMAP_ERR_CNT,
+ HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT,
+ HNS_ROCE_DFX_CNT_TOTAL
};
struct hns_roce_hw {
- int (*reset)(struct hns_roce_dev *hr_dev, bool enable);
- void (*hw_profile)(struct hns_roce_dev *hr_dev);
+ int (*cmq_init)(struct hns_roce_dev *hr_dev);
+ void (*cmq_exit)(struct hns_roce_dev *hr_dev);
+ int (*hw_profile)(struct hns_roce_dev *hr_dev);
int (*hw_init)(struct hns_roce_dev *hr_dev);
void (*hw_exit)(struct hns_roce_dev *hr_dev);
- void (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index,
- union ib_gid *gid);
- void (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr);
- void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port,
- enum ib_mtu mtu);
- int (*write_mtpt)(void *mb_buf, struct hns_roce_mr *mr,
- unsigned long mtpt_idx);
+ int (*post_mbox)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg);
+ int (*poll_mbox_done)(struct hns_roce_dev *hr_dev);
+ bool (*chk_mbox_avail)(struct hns_roce_dev *hr_dev, bool *is_busy);
+ int (*set_gid)(struct hns_roce_dev *hr_dev, int gid_index,
+ const union ib_gid *gid, const struct ib_gid_attr *attr);
+ int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port,
+ const u8 *addr);
+ int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf,
+ struct hns_roce_mr *mr);
+ int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mr *mr, int flags,
+ void *mb_buf);
+ int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr);
void (*write_cqc)(struct hns_roce_dev *hr_dev,
struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
- dma_addr_t dma_handle, int nent, u32 vector);
+ dma_addr_t dma_handle);
+ int (*set_hem)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, int obj, u32 step_idx);
int (*clear_hem)(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table, int obj);
- int (*query_qp)(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
- int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+ struct hns_roce_hem_table *table, int obj,
+ u32 step_idx);
int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
int attr_mask, enum ib_qp_state cur_state,
- enum ib_qp_state new_state);
- int (*destroy_qp)(struct ib_qp *ibqp);
- int (*post_send)(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr);
- int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
- struct ib_recv_wr **bad_recv_wr);
- int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
- int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
- int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr);
- int (*destroy_cq)(struct ib_cq *ibcq);
- void *priv;
+ enum ib_qp_state new_state, struct ib_udata *udata);
+ int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp);
+ void (*dereg_mr)(struct hns_roce_dev *hr_dev);
+ int (*init_eq)(struct hns_roce_dev *hr_dev);
+ void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
+ int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf);
+ int (*query_cqc)(struct hns_roce_dev *hr_dev, u32 cqn, void *buffer);
+ int (*query_qpc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer);
+ int (*query_mpt)(struct hns_roce_dev *hr_dev, u32 key, void *buffer);
+ int (*query_srqc)(struct hns_roce_dev *hr_dev, u32 srqn, void *buffer);
+ int (*query_sccc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer);
+ int (*query_hw_counter)(struct hns_roce_dev *hr_dev,
+ u64 *stats, u32 port, int *hw_counters);
+ int (*get_dscp)(struct hns_roce_dev *hr_dev, u8 dscp,
+ u8 *tc_mode, u8 *priority);
+ const struct ib_device_ops *hns_roce_dev_ops;
+ const struct ib_device_ops *hns_roce_dev_srq_ops;
};
struct hns_roce_dev {
struct ib_device ib_dev;
- struct platform_device *pdev;
+ struct pci_dev *pci_dev;
+ struct device *dev;
struct hns_roce_uar priv_uar;
const char *irq_names[HNS_ROCE_MAX_IRQ_NUM];
spinlock_t sm_lock;
- spinlock_t bt_cmd_lock;
+ bool active;
+ bool is_reset;
+ bool dis_db;
+ unsigned long reset_cnt;
struct hns_roce_ib_iboe iboe;
+ enum hns_roce_device_state state;
+ struct list_head qp_list; /* list of all qps on this dev */
+ spinlock_t qp_list_lock; /* protect qp_list */
+ struct list_head pgdir_list;
+ struct mutex pgdir_mutex;
int irq[HNS_ROCE_MAX_IRQ_NUM];
u8 __iomem *reg_base;
+ void __iomem *mem_base;
struct hns_roce_caps caps;
- struct radix_tree_root qp_table_tree;
+ struct xarray qp_table_xa;
- unsigned char dev_addr[HNS_ROCE_MAX_PORTS][MAC_ADDR_OCTET_NUM];
+ unsigned char dev_addr[HNS_ROCE_MAX_PORTS][ETH_ALEN];
u64 sys_image_guid;
u32 vendor_id;
u32 vendor_part_id;
@@ -560,20 +991,57 @@ struct hns_roce_dev {
void __iomem *priv_addr;
struct hns_roce_cmdq cmd;
- struct hns_roce_bitmap pd_bitmap;
- struct hns_roce_uar_table uar_table;
+ struct hns_roce_ida pd_ida;
+ struct hns_roce_ida xrcd_ida;
+ struct hns_roce_ida uar_ida;
struct hns_roce_mr_table mr_table;
struct hns_roce_cq_table cq_table;
+ struct hns_roce_srq_table srq_table;
struct hns_roce_qp_table qp_table;
struct hns_roce_eq_table eq_table;
+ struct hns_roce_hem_table qpc_timer_table;
+ struct hns_roce_hem_table cqc_timer_table;
+ /* GMV is the memory area that the driver allocates for the hardware
+ * to store SGID, SMAC and VLAN information.
+ */
+ struct hns_roce_hem_table gmv_table;
int cmd_mod;
int loop_idc;
- dma_addr_t tptr_dma_addr; /*only for hw v1*/
- u32 tptr_size; /*only for hw v1*/
- struct hns_roce_hw *hw;
+ u32 sdb_offset;
+ u32 odb_offset;
+ const struct hns_roce_hw *hw;
+ void *priv;
+ struct workqueue_struct *irq_workq;
+ struct work_struct ecc_work;
+ u32 func_num;
+ u32 is_vf;
+ u32 cong_algo_tmpl_id;
+ u64 dwqe_page;
+ struct hns_roce_dev_debugfs dbgfs;
+ atomic64_t *dfx_cnt;
};
+enum hns_roce_trace_type {
+ TRACE_SQ,
+ TRACE_RQ,
+ TRACE_SRQ,
+};
+
+static inline const char *trace_type_to_str(enum hns_roce_trace_type type)
+{
+ switch (type) {
+ case TRACE_SQ:
+ return "SQ";
+ case TRACE_RQ:
+ return "RQ";
+ case TRACE_SRQ:
+ return "SRQ";
+ default:
+ return "UNKNOWN";
+ }
+}
+
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
{
return container_of(ib_dev, struct hns_roce_dev, ib_dev);
@@ -590,6 +1058,11 @@ static inline struct hns_roce_pd *to_hr_pd(struct ib_pd *ibpd)
return container_of(ibpd, struct hns_roce_pd, ibpd);
}
+static inline struct hns_roce_xrcd *to_hr_xrcd(struct ib_xrcd *ibxrcd)
+{
+ return container_of(ibxrcd, struct hns_roce_xrcd, ibxrcd);
+}
+
static inline struct hns_roce_ah *to_hr_ah(struct ib_ah *ibah)
{
return container_of(ibah, struct hns_roce_ah, ibah);
@@ -615,38 +1088,101 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq)
return container_of(ibsrq, struct hns_roce_srq, ibsrq);
}
-static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp)
+static inline struct hns_user_mmap_entry *
+to_hns_mmap(struct rdma_user_mmap_entry *rdma_entry)
{
- return container_of(hr_qp, struct hns_roce_sqp, hr_qp);
+ return container_of(rdma_entry, struct hns_user_mmap_entry, rdma_entry);
}
-static inline void hns_roce_write64_k(__be32 val[2], void __iomem *dest)
+static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest)
{
- __raw_writeq(*(u64 *) val, dest);
+ writeq(*(u64 *)val, dest);
}
static inline struct hns_roce_qp
*__hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, u32 qpn)
{
- return radix_tree_lookup(&hr_dev->qp_table_tree,
- qpn & (hr_dev->caps.num_qps - 1));
+ return xa_load(&hr_dev->qp_table_xa, qpn);
+}
+
+static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf,
+ unsigned int offset)
+{
+ return (char *)(buf->trunk_list[offset >> buf->trunk_shift].buf) +
+ (offset & ((1 << buf->trunk_shift) - 1));
}
-static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset)
+static inline dma_addr_t hns_roce_buf_dma_addr(struct hns_roce_buf *buf,
+ unsigned int offset)
{
- u32 bits_per_long_val = BITS_PER_LONG;
+ return buf->trunk_list[offset >> buf->trunk_shift].map +
+ (offset & ((1 << buf->trunk_shift) - 1));
+}
- if (bits_per_long_val == 64 || buf->nbufs == 1)
- return (char *)(buf->direct.buf) + offset;
- else
- return (char *)(buf->page_list[offset >> PAGE_SHIFT].buf) +
- (offset & (PAGE_SIZE - 1));
+static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx)
+{
+ return hns_roce_buf_dma_addr(buf, idx << buf->page_shift);
}
-int hns_roce_init_uar_table(struct hns_roce_dev *dev);
+#define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT)
+
+static inline u64 to_hr_hw_page_addr(u64 addr)
+{
+ return addr >> HNS_HW_PAGE_SHIFT;
+}
+
+static inline u32 to_hr_hw_page_shift(u32 page_shift)
+{
+ return page_shift - HNS_HW_PAGE_SHIFT;
+}
+
+static inline u32 to_hr_hem_hopnum(u32 hopnum, u32 count)
+{
+ if (count > 0)
+ return hopnum == HNS_ROCE_HOP_NUM_0 ? 0 : hopnum;
+
+ return 0;
+}
+
+static inline u32 to_hr_hem_entries_size(u32 count, u32 buf_shift)
+{
+ return hr_hw_page_align(count << buf_shift);
+}
+
+static inline u32 to_hr_hem_entries_count(u32 count, u32 buf_shift)
+{
+ return hr_hw_page_align(count << buf_shift) >> buf_shift;
+}
+
+static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift)
+{
+ if (!count)
+ return 0;
+
+ return ilog2(to_hr_hem_entries_count(count, buf_shift));
+}
+
+#define DSCP_SHIFT 2
+
+static inline u8 get_tclass(const struct ib_global_route *grh)
+{
+ return grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP ?
+ grh->traffic_class >> DSCP_SHIFT : grh->traffic_class;
+}
+
+static inline struct net_device *get_hr_netdev(struct hns_roce_dev *hr_dev,
+ u8 port)
+{
+ return hr_dev->iboe.netdevs[port];
+}
+
+static inline u8 get_hr_bus_num(struct hns_roce_dev *hr_dev)
+{
+ return hr_dev->pci_dev->bus->number;
+}
+
+void hns_roce_init_uar_table(struct hns_roce_dev *dev);
int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
-void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar);
-void hns_roce_cleanup_uar_table(struct hns_roce_dev *dev);
int hns_roce_cmd_init(struct hns_roce_dev *hr_dev);
void hns_roce_cmd_cleanup(struct hns_roce_dev *hr_dev);
@@ -655,100 +1191,137 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev);
void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev);
-int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
- struct hns_roce_mtt *mtt);
-void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt);
-int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, struct hns_roce_buf *buf);
-
-int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev);
-int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
-int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev);
-int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
+/* hns roce hw need current block and next block addr from mtt */
+#define MTT_MIN_COUNT 2
+static inline dma_addr_t hns_roce_get_mtr_ba(struct hns_roce_mtr *mtr)
+{
+ return mtr->hem_cfg.root_ba;
+}
+
+int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ u32 offset, u64 *mtt_buf, int mtt_max);
+int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr,
+ unsigned int page_shift, struct ib_udata *udata,
+ unsigned long user_addr);
+void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr);
+int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ dma_addr_t *pages, unsigned int page_cnt);
+
+void hns_roce_init_pd_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
+void hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev);
-void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev);
-void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev);
-void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev);
-int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj);
-void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
- int rr);
-int hns_roce_bitmap_init(struct hns_roce_bitmap *bitmap, u32 num, u32 mask,
- u32 reserved_bot, u32 resetrved_top);
-void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap);
void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev);
-int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
- int align, unsigned long *obj);
-void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
- unsigned long obj, int cnt,
- int rr);
-
-struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct ib_udata *udata);
-int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
-int hns_roce_destroy_ah(struct ib_ah *ah);
-
-struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev,
- struct ib_ucontext *context,
- struct ib_udata *udata);
-int hns_roce_dealloc_pd(struct ib_pd *pd);
+
+int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
+static inline int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return 0;
+}
+
+int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
-int hns_roce_dereg_mr(struct ib_mr *ibmr);
-int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long mpt_index);
+struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata);
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
+int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
unsigned long key_to_hw_index(u32 key);
-void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
- struct hns_roce_buf *buf);
-int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
- struct hns_roce_buf *buf);
+void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf);
+struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
+ u32 page_shift, u32 flags);
+
+int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
+ int buf_cnt, struct hns_roce_buf *buf,
+ unsigned int page_shift);
+int hns_roce_get_umem_bufs(dma_addr_t *bufs,
+ int buf_cnt, struct ib_umem *umem,
+ unsigned int page_shift);
-int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, struct ib_umem *umem);
+int hns_roce_create_srq(struct ib_srq *srq,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
-struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata);
+int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata);
+int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata);
+
+int hns_roce_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
-void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n);
-void *get_send_wqe(struct hns_roce_qp *hr_qp, int n);
-bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
+void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
+void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n);
+void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n);
+void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n);
+bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq,
struct ib_cq *ib_cq);
-enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state);
void hns_roce_lock_cqs(struct hns_roce_cq *send_cq,
struct hns_roce_cq *recv_cq);
void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
struct hns_roce_cq *recv_cq);
void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
-void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
-void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
- int cnt);
-__be32 send_ieth(struct ib_send_wr *wr);
+void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_udata *udata);
+__be32 send_ieth(const struct ib_send_wr *wr);
int to_hr_qp_type(int qp_type);
-struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata);
+int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
-int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq);
-void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq);
+int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
+int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
+ struct hns_roce_db *db);
+void hns_roce_db_unmap_user(struct hns_roce_ucontext *context,
+ struct hns_roce_db *db);
+int hns_roce_alloc_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db,
+ int order);
+void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db);
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
+void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
-int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index);
-
-extern struct hns_roce_hw hns_roce_hw_v1;
+void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn);
+void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
+void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
+int hns_roce_init(struct hns_roce_dev *hr_dev);
+void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup);
+int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq);
+int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq);
+int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp);
+int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp);
+int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr);
+int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr);
+int hns_roce_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq);
+int hns_roce_fill_res_srq_entry_raw(struct sk_buff *msg, struct ib_srq *ib_srq);
+struct hns_user_mmap_entry *
+hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
+ size_t length,
+ enum hns_roce_mmap_type mmap_type);
+bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl);
+void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx);
+void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx);
#endif /* _HNS_ROCE_DEVICE_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c
deleted file mode 100644
index 50f864935a0e..000000000000
--- a/drivers/infiniband/hw/hns/hns_roce_eq.c
+++ /dev/null
@@ -1,758 +0,0 @@
-/*
- * Copyright (c) 2016 Hisilicon Limited.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/platform_device.h>
-#include "hns_roce_common.h"
-#include "hns_roce_device.h"
-#include "hns_roce_eq.h"
-
-static void eq_set_cons_index(struct hns_roce_eq *eq, int req_not)
-{
- roce_raw_write((eq->cons_index & CONS_INDEX_MASK) |
- (req_not << eq->log_entries), eq->doorbell);
- /* Memory barrier */
- mb();
-}
-
-static struct hns_roce_aeqe *get_aeqe(struct hns_roce_eq *eq, u32 entry)
-{
- unsigned long off = (entry & (eq->entries - 1)) *
- HNS_ROCE_AEQ_ENTRY_SIZE;
-
- return (struct hns_roce_aeqe *)((u8 *)
- (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
- off % HNS_ROCE_BA_SIZE);
-}
-
-static struct hns_roce_aeqe *next_aeqe_sw(struct hns_roce_eq *eq)
-{
- struct hns_roce_aeqe *aeqe = get_aeqe(eq, eq->cons_index);
-
- return (roce_get_bit(aeqe->asyn, HNS_ROCE_AEQE_U32_4_OWNER_S) ^
- !!(eq->cons_index & eq->entries)) ? aeqe : NULL;
-}
-
-static void hns_roce_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe, int qpn)
-{
- struct device *dev = &hr_dev->pdev->dev;
-
- dev_warn(dev, "Local Work Queue Catastrophic Error.\n");
- switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
- case HNS_ROCE_LWQCE_QPC_ERROR:
- dev_warn(dev, "QP %d, QPC error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_MTU_ERROR:
- dev_warn(dev, "QP %d, MTU error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
- dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
- dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
- dev_warn(dev, "QP %d, WQE shift error\n", qpn);
- break;
- case HNS_ROCE_LWQCE_SL_ERROR:
- dev_warn(dev, "QP %d, SL error.\n", qpn);
- break;
- case HNS_ROCE_LWQCE_PORT_ERROR:
- dev_warn(dev, "QP %d, port error.\n", qpn);
- break;
- default:
- break;
- }
-}
-
-static void hns_roce_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int qpn)
-{
- struct device *dev = &hr_dev->pdev->dev;
-
- dev_warn(dev, "Local Access Violation Work Queue Error.\n");
- switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
- case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
- dev_warn(dev, "QP %d, R_key violation.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_LENGTH_ERROR:
- dev_warn(dev, "QP %d, length error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_VA_ERROR:
- dev_warn(dev, "QP %d, VA error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_PD_ERROR:
- dev_err(dev, "QP %d, PD error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
- dev_warn(dev, "QP %d, rw acc error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
- dev_warn(dev, "QP %d, key state error.\n", qpn);
- break;
- case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
- dev_warn(dev, "QP %d, MR operation error.\n", qpn);
- break;
- default:
- break;
- }
-}
-
-static void hns_roce_qp_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int event_type)
-{
- struct device *dev = &hr_dev->pdev->dev;
- int phy_port;
- int qpn;
-
- qpn = roce_get_field(aeqe->event.qp_event.qp,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S);
- phy_port = roce_get_field(aeqe->event.qp_event.qp,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M,
- HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S);
- if (qpn <= 1)
- qpn = HNS_ROCE_MAX_PORTS * qpn + phy_port;
-
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
- dev_warn(dev, "Invalid Req Local Work Queue Error.\n"
- "QP %d, phy_port %d.\n", qpn, phy_port);
- break;
- case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
- hns_roce_wq_catas_err_handle(hr_dev, aeqe, qpn);
- break;
- case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- hns_roce_local_wq_access_err_handle(hr_dev, aeqe, qpn);
- break;
- default:
- break;
- }
-
- hns_roce_qp_event(hr_dev, qpn, event_type);
-}
-
-static void hns_roce_cq_err_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe,
- int event_type)
-{
- struct device *dev = &hr_dev->pdev->dev;
- u32 cqn;
-
- cqn = le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq,
- HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
- HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S));
-
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
- dev_warn(dev, "CQ 0x%x access err.\n", cqn);
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- dev_warn(dev, "CQ 0x%x overflow\n", cqn);
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
- dev_warn(dev, "CQ 0x%x ID invalid.\n", cqn);
- break;
- default:
- break;
- }
-
- hns_roce_cq_event(hr_dev, cqn, event_type);
-}
-
-static void hns_roce_db_overflow_handle(struct hns_roce_dev *hr_dev,
- struct hns_roce_aeqe *aeqe)
-{
- struct device *dev = &hr_dev->pdev->dev;
-
- switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
- case HNS_ROCE_DB_SUBTYPE_SDB_OVF:
- dev_warn(dev, "SDB overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF:
- dev_warn(dev, "SDB almost overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP:
- dev_warn(dev, "SDB almost empty.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_ODB_OVF:
- dev_warn(dev, "ODB overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF:
- dev_warn(dev, "ODB almost overflow.\n");
- break;
- case HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP:
- dev_warn(dev, "SDB almost empty.\n");
- break;
- default:
- break;
- }
-}
-
-static int hns_roce_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_aeqe *aeqe;
- int aeqes_found = 0;
- int event_type;
-
- while ((aeqe = next_aeqe_sw(eq))) {
- dev_dbg(dev, "aeqe = %p, aeqe->asyn.event_type = 0x%lx\n", aeqe,
- roce_get_field(aeqe->asyn,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S));
- /* Memory barrier */
- rmb();
-
- event_type = roce_get_field(aeqe->asyn,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
- HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S);
- switch (event_type) {
- case HNS_ROCE_EVENT_TYPE_PATH_MIG:
- dev_warn(dev, "PATH MIG not supported\n");
- break;
- case HNS_ROCE_EVENT_TYPE_COMM_EST:
- dev_warn(dev, "COMMUNICATION established\n");
- break;
- case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
- dev_warn(dev, "SQ DRAINED not supported\n");
- break;
- case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
- dev_warn(dev, "PATH MIG failed\n");
- break;
- case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
- case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
- case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
- hns_roce_qp_err_handle(hr_dev, aeqe, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
- case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
- case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
- dev_warn(dev, "SRQ not support!\n");
- break;
- case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
- case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
- case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
- hns_roce_cq_err_handle(hr_dev, aeqe, event_type);
- break;
- case HNS_ROCE_EVENT_TYPE_PORT_CHANGE:
- dev_warn(dev, "port change.\n");
- break;
- case HNS_ROCE_EVENT_TYPE_MB:
- hns_roce_cmd_event(hr_dev,
- le16_to_cpu(aeqe->event.cmd.token),
- aeqe->event.cmd.status,
- le64_to_cpu(aeqe->event.cmd.out_param
- ));
- break;
- case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
- hns_roce_db_overflow_handle(hr_dev, aeqe);
- break;
- case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
- dev_warn(dev, "CEQ 0x%lx overflow.\n",
- roce_get_field(aeqe->event.ce_event.ceqe,
- HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M,
- HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S));
- break;
- default:
- dev_warn(dev, "Unhandled event %d on EQ %d at index %u\n",
- event_type, eq->eqn, eq->cons_index);
- break;
- };
-
- eq->cons_index++;
- aeqes_found = 1;
-
- if (eq->cons_index > 2 * hr_dev->caps.aeqe_depth - 1) {
- dev_warn(dev, "cons_index overflow, set back to zero\n"
- );
- eq->cons_index = 0;
- }
- }
-
- eq_set_cons_index(eq, 0);
-
- return aeqes_found;
-}
-
-static struct hns_roce_ceqe *get_ceqe(struct hns_roce_eq *eq, u32 entry)
-{
- unsigned long off = (entry & (eq->entries - 1)) *
- HNS_ROCE_CEQ_ENTRY_SIZE;
-
- return (struct hns_roce_ceqe *)((u8 *)
- (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) +
- off % HNS_ROCE_BA_SIZE);
-}
-
-static struct hns_roce_ceqe *next_ceqe_sw(struct hns_roce_eq *eq)
-{
- struct hns_roce_ceqe *ceqe = get_ceqe(eq, eq->cons_index);
-
- return (!!(roce_get_bit(ceqe->ceqe.comp,
- HNS_ROCE_CEQE_CEQE_COMP_OWNER_S))) ^
- (!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
-}
-
-static int hns_roce_ceq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
-{
- struct hns_roce_ceqe *ceqe;
- int ceqes_found = 0;
- u32 cqn;
-
- while ((ceqe = next_ceqe_sw(eq))) {
- /* Memory barrier */
- rmb();
- cqn = roce_get_field(ceqe->ceqe.comp,
- HNS_ROCE_CEQE_CEQE_COMP_CQN_M,
- HNS_ROCE_CEQE_CEQE_COMP_CQN_S);
- hns_roce_cq_completion(hr_dev, cqn);
-
- ++eq->cons_index;
- ceqes_found = 1;
-
- if (eq->cons_index > 2 * hr_dev->caps.ceqe_depth[eq->eqn] - 1) {
- dev_warn(&eq->hr_dev->pdev->dev,
- "cons_index overflow, set back to zero\n");
- eq->cons_index = 0;
- }
- }
-
- eq_set_cons_index(eq, 0);
-
- return ceqes_found;
-}
-
-static int hns_roce_aeq_ovf_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
-{
- struct device *dev = &eq->hr_dev->pdev->dev;
- int eqovf_found = 0;
- u32 caepaemask_val;
- u32 cealmovf_val;
- u32 caepaest_val;
- u32 aeshift_val;
- u32 ceshift_val;
- u32 cemask_val;
- int i = 0;
-
- /**
- * AEQ overflow ECC mult bit err CEQ overflow alarm
- * must clear interrupt, mask irq, clear irq, cancel mask operation
- */
- aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG);
-
- if (roce_get_bit(aeshift_val,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) {
- dev_warn(dev, "AEQ overflow!\n");
-
- /* Set mask */
- caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
- roce_set_bit(caepaemask_val,
- ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_ENABLE);
- roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
-
- /* Clear int state(INT_WC : write 1 clear) */
- caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG);
- roce_set_bit(caepaest_val,
- ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1);
- roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val);
-
- /* Clear mask */
- caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
- roce_set_bit(caepaemask_val,
- ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_DISABLE);
- roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
- }
-
- /* CEQ almost overflow */
- for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
- ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG +
- i * CEQ_REG_OFFSET);
-
- if (roce_get_bit(ceshift_val,
- ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) {
- dev_warn(dev, "CEQ[%d] almost overflow!\n", i);
- eqovf_found++;
-
- /* Set mask */
- cemask_val = roce_read(hr_dev,
- ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET);
- roce_set_bit(cemask_val,
- ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_ENABLE);
- roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET, cemask_val);
-
- /* Clear int state(INT_WC : write 1 clear) */
- cealmovf_val = roce_read(hr_dev,
- ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
- i * CEQ_REG_OFFSET);
- roce_set_bit(cealmovf_val,
- ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S,
- 1);
- roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
- i * CEQ_REG_OFFSET, cealmovf_val);
-
- /* Clear mask */
- cemask_val = roce_read(hr_dev,
- ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET);
- roce_set_bit(cemask_val,
- ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
- HNS_ROCE_INT_MASK_DISABLE);
- roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET, cemask_val);
- }
- }
-
- /* ECC multi-bit error alarm */
- dev_warn(dev, "ECC UCERR ALARM: 0x%x, 0x%x, 0x%x\n",
- roce_read(hr_dev, ROCEE_ECC_UCERR_ALM0_REG),
- roce_read(hr_dev, ROCEE_ECC_UCERR_ALM1_REG),
- roce_read(hr_dev, ROCEE_ECC_UCERR_ALM2_REG));
-
- dev_warn(dev, "ECC CERR ALARM: 0x%x, 0x%x, 0x%x\n",
- roce_read(hr_dev, ROCEE_ECC_CERR_ALM0_REG),
- roce_read(hr_dev, ROCEE_ECC_CERR_ALM1_REG),
- roce_read(hr_dev, ROCEE_ECC_CERR_ALM2_REG));
-
- return eqovf_found;
-}
-
-static int hns_roce_eq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
-{
- int eqes_found = 0;
-
- if (likely(eq->type_flag == HNS_ROCE_CEQ))
- /* CEQ irq routine, CEQ is pulse irq, not clear */
- eqes_found = hns_roce_ceq_int(hr_dev, eq);
- else if (likely(eq->type_flag == HNS_ROCE_AEQ))
- /* AEQ irq routine, AEQ is pulse irq, not clear */
- eqes_found = hns_roce_aeq_int(hr_dev, eq);
- else
- /* AEQ queue overflow irq */
- eqes_found = hns_roce_aeq_ovf_int(hr_dev, eq);
-
- return eqes_found;
-}
-
-static irqreturn_t hns_roce_msi_x_interrupt(int irq, void *eq_ptr)
-{
- int int_work = 0;
- struct hns_roce_eq *eq = eq_ptr;
- struct hns_roce_dev *hr_dev = eq->hr_dev;
-
- int_work = hns_roce_eq_int(hr_dev, eq);
-
- return IRQ_RETVAL(int_work);
-}
-
-static void hns_roce_enable_eq(struct hns_roce_dev *hr_dev, int eq_num,
- int enable_flag)
-{
- void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num];
- u32 val;
-
- val = readl(eqc);
-
- if (enable_flag)
- roce_set_field(val,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
- HNS_ROCE_EQ_STAT_VALID);
- else
- roce_set_field(val,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
- HNS_ROCE_EQ_STAT_INVALID);
- writel(val, eqc);
-}
-
-static int hns_roce_create_eq(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
-{
- void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn];
- struct device *dev = &hr_dev->pdev->dev;
- dma_addr_t tmp_dma_addr;
- u32 eqconsindx_val = 0;
- u32 eqcuridx_val = 0;
- u32 eqshift_val = 0;
- int num_bas = 0;
- int ret;
- int i;
-
- num_bas = (PAGE_ALIGN(eq->entries * eq->eqe_size) +
- HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE;
-
- if ((eq->entries * eq->eqe_size) > HNS_ROCE_BA_SIZE) {
- dev_err(dev, "[error]eq buf %d gt ba size(%d) need bas=%d\n",
- (eq->entries * eq->eqe_size), HNS_ROCE_BA_SIZE,
- num_bas);
- return -EINVAL;
- }
-
- eq->buf_list = kcalloc(num_bas, sizeof(*eq->buf_list), GFP_KERNEL);
- if (!eq->buf_list)
- return -ENOMEM;
-
- for (i = 0; i < num_bas; ++i) {
- eq->buf_list[i].buf = dma_alloc_coherent(dev, HNS_ROCE_BA_SIZE,
- &tmp_dma_addr,
- GFP_KERNEL);
- if (!eq->buf_list[i].buf) {
- ret = -ENOMEM;
- goto err_out_free_pages;
- }
-
- eq->buf_list[i].map = tmp_dma_addr;
- memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE);
- }
- eq->cons_index = 0;
- roce_set_field(eqshift_val,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
- HNS_ROCE_EQ_STAT_INVALID);
- roce_set_field(eqshift_val,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S,
- eq->log_entries);
- writel(eqshift_val, eqc);
-
- /* Configure eq extended address 12~44bit */
- writel((u32)(eq->buf_list[0].map >> 12), (u8 *)eqc + 4);
-
- /*
- * Configure eq extended address 45~49 bit.
- * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
- * using 4K page, and shift more 32 because of
- * caculating the high 32 bit value evaluated to hardware.
- */
- roce_set_field(eqcuridx_val, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M,
- ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S,
- eq->buf_list[0].map >> 44);
- roce_set_field(eqcuridx_val,
- ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M,
- ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0);
- writel(eqcuridx_val, (u8 *)eqc + 8);
-
- /* Configure eq consumer index */
- roce_set_field(eqconsindx_val,
- ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M,
- ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0);
- writel(eqconsindx_val, (u8 *)eqc + 0xc);
-
- return 0;
-
-err_out_free_pages:
- for (i = i - 1; i >= 0; i--)
- dma_free_coherent(dev, HNS_ROCE_BA_SIZE, eq->buf_list[i].buf,
- eq->buf_list[i].map);
-
- kfree(eq->buf_list);
- return ret;
-}
-
-static void hns_roce_free_eq(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
-{
- int i = 0;
- int npages = (PAGE_ALIGN(eq->eqe_size * eq->entries) +
- HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE;
-
- if (!eq->buf_list)
- return;
-
- for (i = 0; i < npages; ++i)
- dma_free_coherent(&hr_dev->pdev->dev, HNS_ROCE_BA_SIZE,
- eq->buf_list[i].buf, eq->buf_list[i].map);
-
- kfree(eq->buf_list);
-}
-
-static void hns_roce_int_mask_en(struct hns_roce_dev *hr_dev)
-{
- int i = 0;
- u32 aemask_val;
- int masken = 0;
-
- /* AEQ INT */
- aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
- roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
- masken);
- roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken);
- roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val);
-
- /* CEQ INT */
- for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
- /* IRQ mask */
- roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
- i * CEQ_REG_OFFSET, masken);
- }
-}
-
-static void hns_roce_ce_int_default_cfg(struct hns_roce_dev *hr_dev)
-{
- /* Configure ce int interval */
- roce_write(hr_dev, ROCEE_CAEP_CE_INTERVAL_CFG_REG,
- HNS_ROCE_CEQ_DEFAULT_INTERVAL);
-
- /* Configure ce int burst num */
- roce_write(hr_dev, ROCEE_CAEP_CE_BURST_NUM_CFG_REG,
- HNS_ROCE_CEQ_DEFAULT_BURST_NUM);
-}
-
-int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_eq *eq = NULL;
- int eq_num = 0;
- int ret = 0;
- int i = 0;
- int j = 0;
-
- eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
- eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL);
- if (!eq_table->eq)
- return -ENOMEM;
-
- eq_table->eqc_base = kcalloc(eq_num, sizeof(*eq_table->eqc_base),
- GFP_KERNEL);
- if (!eq_table->eqc_base) {
- ret = -ENOMEM;
- goto err_eqc_base_alloc_fail;
- }
-
- for (i = 0; i < eq_num; i++) {
- eq = &eq_table->eq[i];
- eq->hr_dev = hr_dev;
- eq->eqn = i;
- eq->irq = hr_dev->irq[i];
- eq->log_page_size = PAGE_SHIFT;
-
- if (i < hr_dev->caps.num_comp_vectors) {
- /* CEQ */
- eq_table->eqc_base[i] = hr_dev->reg_base +
- ROCEE_CAEP_CEQC_SHIFT_0_REG +
- HNS_ROCE_CEQC_REG_OFFSET * i;
- eq->type_flag = HNS_ROCE_CEQ;
- eq->doorbell = hr_dev->reg_base +
- ROCEE_CAEP_CEQC_CONS_IDX_0_REG +
- HNS_ROCE_CEQC_REG_OFFSET * i;
- eq->entries = hr_dev->caps.ceqe_depth[i];
- eq->log_entries = ilog2(eq->entries);
- eq->eqe_size = sizeof(struct hns_roce_ceqe);
- } else {
- /* AEQ */
- eq_table->eqc_base[i] = hr_dev->reg_base +
- ROCEE_CAEP_AEQC_AEQE_SHIFT_REG;
- eq->type_flag = HNS_ROCE_AEQ;
- eq->doorbell = hr_dev->reg_base +
- ROCEE_CAEP_AEQE_CONS_IDX_REG;
- eq->entries = hr_dev->caps.aeqe_depth;
- eq->log_entries = ilog2(eq->entries);
- eq->eqe_size = sizeof(struct hns_roce_aeqe);
- }
- }
-
- /* Disable irq */
- hns_roce_int_mask_en(hr_dev);
-
- /* Configure CE irq interval and burst num */
- hns_roce_ce_int_default_cfg(hr_dev);
-
- for (i = 0; i < eq_num; i++) {
- ret = hns_roce_create_eq(hr_dev, &eq_table->eq[i]);
- if (ret) {
- dev_err(dev, "eq create failed\n");
- goto err_create_eq_fail;
- }
- }
-
- for (j = 0; j < eq_num; j++) {
- ret = request_irq(eq_table->eq[j].irq, hns_roce_msi_x_interrupt,
- 0, hr_dev->irq_names[j], eq_table->eq + j);
- if (ret) {
- dev_err(dev, "request irq error!\n");
- goto err_request_irq_fail;
- }
- }
-
- for (i = 0; i < eq_num; i++)
- hns_roce_enable_eq(hr_dev, i, EQ_ENABLE);
-
- return 0;
-
-err_request_irq_fail:
- for (j = j - 1; j >= 0; j--)
- free_irq(eq_table->eq[j].irq, eq_table->eq + j);
-
-err_create_eq_fail:
- for (i = i - 1; i >= 0; i--)
- hns_roce_free_eq(hr_dev, &eq_table->eq[i]);
-
- kfree(eq_table->eqc_base);
-
-err_eqc_base_alloc_fail:
- kfree(eq_table->eq);
-
- return ret;
-}
-
-void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev)
-{
- int i;
- int eq_num;
- struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
-
- eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
- for (i = 0; i < eq_num; i++) {
- /* Disable EQ */
- hns_roce_enable_eq(hr_dev, i, EQ_DISABLE);
-
- free_irq(eq_table->eq[i].irq, eq_table->eq + i);
-
- hns_roce_free_eq(hr_dev, &eq_table->eq[i]);
- }
-
- kfree(eq_table->eqc_base);
- kfree(eq_table->eq);
-}
diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.h b/drivers/infiniband/hw/hns/hns_roce_eq.h
deleted file mode 100644
index c6d212d12e03..000000000000
--- a/drivers/infiniband/hw/hns/hns_roce_eq.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (c) 2016 Hisilicon Limited.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _HNS_ROCE_EQ_H
-#define _HNS_ROCE_EQ_H
-
-#define HNS_ROCE_CEQ 1
-#define HNS_ROCE_AEQ 2
-
-#define HNS_ROCE_CEQ_ENTRY_SIZE 0x4
-#define HNS_ROCE_AEQ_ENTRY_SIZE 0x10
-#define HNS_ROCE_CEQC_REG_OFFSET 0x18
-
-#define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x10
-#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x10
-
-#define HNS_ROCE_INT_MASK_DISABLE 0
-#define HNS_ROCE_INT_MASK_ENABLE 1
-
-#define EQ_ENABLE 1
-#define EQ_DISABLE 0
-#define CONS_INDEX_MASK 0xffff
-
-#define CEQ_REG_OFFSET 0x18
-
-enum {
- HNS_ROCE_EQ_STAT_INVALID = 0,
- HNS_ROCE_EQ_STAT_VALID = 2,
-};
-
-struct hns_roce_aeqe {
- u32 asyn;
- union {
- struct {
- u32 qp;
- u32 rsv0;
- u32 rsv1;
- } qp_event;
-
- struct {
- u32 cq;
- u32 rsv0;
- u32 rsv1;
- } cq_event;
-
- struct {
- u32 port;
- u32 rsv0;
- u32 rsv1;
- } port_event;
-
- struct {
- u32 ceqe;
- u32 rsv0;
- u32 rsv1;
- } ce_event;
-
- struct {
- __le64 out_param;
- __le16 token;
- u8 status;
- u8 rsv0;
- } __packed cmd;
- } event;
-};
-
-#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S 16
-#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M \
- (((1UL << 8) - 1) << HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S)
-
-#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S 24
-#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M \
- (((1UL << 7) - 1) << HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)
-
-#define HNS_ROCE_AEQE_U32_4_OWNER_S 31
-
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S 0
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M \
- (((1UL << 24) - 1) << HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S)
-
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S 25
-#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M \
- (((1UL << 3) - 1) << HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S)
-
-#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S 0
-#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M \
- (((1UL << 16) - 1) << HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)
-
-#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0
-#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M \
- (((1UL << 5) - 1) << HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S)
-
-struct hns_roce_ceqe {
- union {
- int comp;
- } ceqe;
-};
-
-#define HNS_ROCE_CEQE_CEQE_COMP_OWNER_S 0
-
-#define HNS_ROCE_CEQE_CEQE_COMP_CQN_S 16
-#define HNS_ROCE_CEQE_CEQE_COMP_CQN_M \
- (((1UL << 16) - 1) << HNS_ROCE_CEQE_CEQE_COMP_CQN_S)
-
-#endif /* _HNS_ROCE_EQ_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index c5104e0b2916..3d479c63b117 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -31,374 +31,1437 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
#include "hns_roce_common.h"
-#define HNS_ROCE_HEM_ALLOC_SIZE (1 << 17)
-#define HNS_ROCE_TABLE_CHUNK_SIZE (1 << 17)
+#define HEM_INDEX_BUF BIT(0)
+#define HEM_INDEX_L0 BIT(1)
+#define HEM_INDEX_L1 BIT(2)
+struct hns_roce_hem_index {
+ u64 buf;
+ u64 l0;
+ u64 l1;
+ u32 inited; /* indicate which index is available */
+};
+
+bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
+{
+ int hop_num = 0;
+
+ switch (type) {
+ case HEM_TYPE_QPC:
+ hop_num = hr_dev->caps.qpc_hop_num;
+ break;
+ case HEM_TYPE_MTPT:
+ hop_num = hr_dev->caps.mpt_hop_num;
+ break;
+ case HEM_TYPE_CQC:
+ hop_num = hr_dev->caps.cqc_hop_num;
+ break;
+ case HEM_TYPE_SRQC:
+ hop_num = hr_dev->caps.srqc_hop_num;
+ break;
+ case HEM_TYPE_SCCC:
+ hop_num = hr_dev->caps.sccc_hop_num;
+ break;
+ case HEM_TYPE_QPC_TIMER:
+ hop_num = hr_dev->caps.qpc_timer_hop_num;
+ break;
+ case HEM_TYPE_CQC_TIMER:
+ hop_num = hr_dev->caps.cqc_timer_hop_num;
+ break;
+ case HEM_TYPE_GMV:
+ hop_num = hr_dev->caps.gmv_hop_num;
+ break;
+ default:
+ return false;
+ }
-#define DMA_ADDR_T_SHIFT 12
-#define BT_BA_SHIFT 32
+ return hop_num;
+}
-struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, int npages,
- gfp_t gfp_mask)
+static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 hem_idx,
+ u32 bt_chunk_num, u64 hem_max_num)
{
- struct hns_roce_hem_chunk *chunk = NULL;
- struct hns_roce_hem *hem;
- struct scatterlist *mem;
- int order;
- void *buf;
+ u64 start_idx = round_down(hem_idx, bt_chunk_num);
+ u64 check_max_num = start_idx + bt_chunk_num;
+ u64 i;
- WARN_ON(gfp_mask & __GFP_HIGHMEM);
+ for (i = start_idx; (i < check_max_num) && (i < hem_max_num); i++)
+ if (i != hem_idx && hem[i])
+ return false;
- hem = kmalloc(sizeof(*hem),
- gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
- if (!hem)
- return NULL;
+ return true;
+}
- hem->refcount = 0;
- INIT_LIST_HEAD(&hem->chunk_list);
+static bool hns_roce_check_bt_null(u64 **bt, u64 ba_idx, u32 bt_chunk_num)
+{
+ u64 start_idx = round_down(ba_idx, bt_chunk_num);
+ int i;
- order = get_order(HNS_ROCE_HEM_ALLOC_SIZE);
+ for (i = 0; i < bt_chunk_num; i++)
+ if (i != ba_idx && bt[start_idx + i])
+ return false;
- while (npages > 0) {
- if (!chunk) {
- chunk = kmalloc(sizeof(*chunk),
- gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
- if (!chunk)
- goto fail;
+ return true;
+}
- sg_init_table(chunk->mem, HNS_ROCE_HEM_CHUNK_LEN);
- chunk->npages = 0;
- chunk->nsg = 0;
- list_add_tail(&chunk->list, &hem->chunk_list);
- }
+static int hns_roce_get_bt_num(u32 table_type, u32 hop_num)
+{
+ if (check_whether_bt_num_3(table_type, hop_num))
+ return 3;
+ else if (check_whether_bt_num_2(table_type, hop_num))
+ return 2;
+ else if (check_whether_bt_num_1(table_type, hop_num))
+ return 1;
+ else
+ return 0;
+}
- while (1 << order > npages)
- --order;
+static int get_hem_table_config(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_mhop *mhop,
+ u32 type)
+{
+ struct device *dev = hr_dev->dev;
- /*
- * Alloc memory one time. If failed, don't alloc small block
- * memory, directly return fail.
- */
- mem = &chunk->mem[chunk->npages];
- buf = dma_alloc_coherent(&hr_dev->pdev->dev, PAGE_SIZE << order,
- &sg_dma_address(mem), gfp_mask);
- if (!buf)
- goto fail;
+ switch (type) {
+ case HEM_TYPE_QPC:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.qpc_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.qpc_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.qpc_bt_num;
+ mhop->hop_num = hr_dev->caps.qpc_hop_num;
+ break;
+ case HEM_TYPE_MTPT:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.mpt_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.mpt_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.mpt_bt_num;
+ mhop->hop_num = hr_dev->caps.mpt_hop_num;
+ break;
+ case HEM_TYPE_CQC:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.cqc_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.cqc_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.cqc_bt_num;
+ mhop->hop_num = hr_dev->caps.cqc_hop_num;
+ break;
+ case HEM_TYPE_SCCC:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.sccc_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.sccc_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.sccc_bt_num;
+ mhop->hop_num = hr_dev->caps.sccc_hop_num;
+ break;
+ case HEM_TYPE_QPC_TIMER:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.qpc_timer_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.qpc_timer_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.qpc_timer_bt_num;
+ mhop->hop_num = hr_dev->caps.qpc_timer_hop_num;
+ break;
+ case HEM_TYPE_CQC_TIMER:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.cqc_timer_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.cqc_timer_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.cqc_timer_bt_num;
+ mhop->hop_num = hr_dev->caps.cqc_timer_hop_num;
+ break;
+ case HEM_TYPE_SRQC:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.srqc_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.srqc_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.srqc_bt_num;
+ mhop->hop_num = hr_dev->caps.srqc_hop_num;
+ break;
+ case HEM_TYPE_GMV:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.gmv_buf_pg_sz +
+ PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.gmv_ba_pg_sz +
+ PAGE_SHIFT);
+ mhop->ba_l0_num = hr_dev->caps.gmv_bt_num;
+ mhop->hop_num = hr_dev->caps.gmv_hop_num;
+ break;
+ default:
+ dev_err(dev, "table %u not support multi-hop addressing!\n",
+ type);
+ return -EINVAL;
+ }
- sg_set_buf(mem, buf, PAGE_SIZE << order);
- WARN_ON(mem->offset);
- sg_dma_len(mem) = PAGE_SIZE << order;
+ return 0;
+}
- ++chunk->npages;
- ++chunk->nsg;
- npages -= 1 << order;
+int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long *obj,
+ struct hns_roce_hem_mhop *mhop)
+{
+ struct device *dev = hr_dev->dev;
+ u32 chunk_ba_num;
+ u32 chunk_size;
+ u32 table_idx;
+ u32 bt_num;
+
+ if (get_hem_table_config(hr_dev, mhop, table->type))
+ return -EINVAL;
+
+ if (!obj)
+ return 0;
+
+ /*
+ * QPC/MTPT/CQC/SRQC/SCCC alloc hem for buffer pages.
+ * MTT/CQE alloc hem for bt pages.
+ */
+ bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num);
+ chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
+ chunk_size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size :
+ mhop->bt_chunk_size;
+ table_idx = *obj / (chunk_size / table->obj_size);
+ switch (bt_num) {
+ case 3:
+ mhop->l2_idx = table_idx & (chunk_ba_num - 1);
+ mhop->l1_idx = table_idx / chunk_ba_num & (chunk_ba_num - 1);
+ mhop->l0_idx = (table_idx / chunk_ba_num) / chunk_ba_num;
+ break;
+ case 2:
+ mhop->l1_idx = table_idx & (chunk_ba_num - 1);
+ mhop->l0_idx = table_idx / chunk_ba_num;
+ break;
+ case 1:
+ mhop->l0_idx = table_idx;
+ break;
+ default:
+ dev_err(dev, "table %u not support hop_num = %u!\n",
+ table->type, mhop->hop_num);
+ return -EINVAL;
+ }
+ if (mhop->l0_idx >= mhop->ba_l0_num)
+ mhop->l0_idx %= mhop->ba_l0_num;
+
+ return 0;
+}
+
+static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev,
+ unsigned long hem_alloc_size)
+{
+ struct hns_roce_hem *hem;
+ int order;
+ void *buf;
+
+ order = get_order(hem_alloc_size);
+ if (PAGE_SIZE << order != hem_alloc_size) {
+ dev_err(hr_dev->dev, "invalid hem_alloc_size: %lu!\n",
+ hem_alloc_size);
+ return NULL;
}
+ hem = kmalloc(sizeof(*hem), GFP_KERNEL);
+ if (!hem)
+ return NULL;
+
+ buf = dma_alloc_coherent(hr_dev->dev, hem_alloc_size,
+ &hem->dma, GFP_KERNEL);
+ if (!buf)
+ goto fail;
+
+ hem->buf = buf;
+ hem->size = hem_alloc_size;
+
return hem;
fail:
- hns_roce_free_hem(hr_dev, hem);
+ kfree(hem);
return NULL;
}
void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem)
{
- struct hns_roce_hem_chunk *chunk, *tmp;
- int i;
-
if (!hem)
return;
- list_for_each_entry_safe(chunk, tmp, &hem->chunk_list, list) {
- for (i = 0; i < chunk->npages; ++i)
- dma_free_coherent(&hr_dev->pdev->dev,
- chunk->mem[i].length,
- lowmem_page_address(sg_page(&chunk->mem[i])),
- sg_dma_address(&chunk->mem[i]));
- kfree(chunk);
- }
+ dma_free_coherent(hr_dev->dev, hem->size, hem->buf, hem->dma);
kfree(hem);
}
-static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table, unsigned long obj)
+static int calc_hem_config(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj,
+ struct hns_roce_hem_mhop *mhop,
+ struct hns_roce_hem_index *index)
{
- struct device *dev = &hr_dev->pdev->dev;
- spinlock_t *lock = &hr_dev->bt_cmd_lock;
- unsigned long end = 0;
- unsigned long flags;
- struct hns_roce_hem_iter iter;
- void __iomem *bt_cmd;
- u32 bt_cmd_h_val = 0;
- u32 bt_cmd_val[2];
- u32 bt_cmd_l = 0;
- u64 bt_ba = 0;
- int ret = 0;
-
- /* Find the HEM(Hardware Entry Memory) entry */
- unsigned long i = (obj & (table->num_obj - 1)) /
- (HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size);
+ struct device *dev = hr_dev->dev;
+ unsigned long mhop_obj = obj;
+ u32 l0_idx, l1_idx, l2_idx;
+ u32 chunk_ba_num;
+ u32 bt_num;
+ int ret;
+
+ ret = hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, mhop);
+ if (ret)
+ return ret;
- switch (table->type) {
- case HEM_TYPE_QPC:
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC);
- break;
- case HEM_TYPE_MTPT:
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S,
- HEM_TYPE_MTPT);
+ l0_idx = mhop->l0_idx;
+ l1_idx = mhop->l1_idx;
+ l2_idx = mhop->l2_idx;
+ chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
+ bt_num = hns_roce_get_bt_num(table->type, mhop->hop_num);
+ switch (bt_num) {
+ case 3:
+ index->l1 = l0_idx * chunk_ba_num + l1_idx;
+ index->l0 = l0_idx;
+ index->buf = l0_idx * chunk_ba_num * chunk_ba_num +
+ l1_idx * chunk_ba_num + l2_idx;
break;
- case HEM_TYPE_CQC:
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC);
+ case 2:
+ index->l0 = l0_idx;
+ index->buf = l0_idx * chunk_ba_num + l1_idx;
break;
- case HEM_TYPE_SRQC:
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S,
- HEM_TYPE_SRQC);
+ case 1:
+ index->buf = l0_idx;
break;
default:
- return ret;
+ dev_err(dev, "table %u not support mhop.hop_num = %u!\n",
+ table->type, mhop->hop_num);
+ return -EINVAL;
}
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj);
- roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
- roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1);
-
- /* Currently iter only a chunk */
- for (hns_roce_hem_first(table->hem[i], &iter);
- !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) {
- bt_ba = hns_roce_hem_addr(&iter) >> DMA_ADDR_T_SHIFT;
-
- spin_lock_irqsave(lock, flags);
-
- bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
-
- end = msecs_to_jiffies(HW_SYNC_TIMEOUT_MSECS) + jiffies;
- while (1) {
- if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) {
- if (!(time_before(jiffies, end))) {
- dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n");
- spin_unlock_irqrestore(lock, flags);
- return -EBUSY;
- }
- } else {
- break;
- }
- msleep(HW_SYNC_SLEEP_TIME_INTERVAL);
+
+ if (unlikely(index->buf >= table->num_hem)) {
+ dev_err(dev, "table %u exceed hem limt idx %llu, max %lu!\n",
+ table->type, index->buf, table->num_hem);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void free_mhop_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ struct hns_roce_hem_mhop *mhop,
+ struct hns_roce_hem_index *index)
+{
+ u32 bt_size = mhop->bt_chunk_size;
+ struct device *dev = hr_dev->dev;
+
+ if (index->inited & HEM_INDEX_BUF) {
+ hns_roce_free_hem(hr_dev, table->hem[index->buf]);
+ table->hem[index->buf] = NULL;
+ }
+
+ if (index->inited & HEM_INDEX_L1) {
+ dma_free_coherent(dev, bt_size, table->bt_l1[index->l1],
+ table->bt_l1_dma_addr[index->l1]);
+ table->bt_l1[index->l1] = NULL;
+ }
+
+ if (index->inited & HEM_INDEX_L0) {
+ dma_free_coherent(dev, bt_size, table->bt_l0[index->l0],
+ table->bt_l0_dma_addr[index->l0]);
+ table->bt_l0[index->l0] = NULL;
+ }
+}
+
+static int alloc_mhop_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ struct hns_roce_hem_mhop *mhop,
+ struct hns_roce_hem_index *index)
+{
+ u32 bt_size = mhop->bt_chunk_size;
+ struct device *dev = hr_dev->dev;
+ u64 bt_ba;
+ u32 size;
+ int ret;
+
+ /* alloc L1 BA's chunk */
+ if ((check_whether_bt_num_3(table->type, mhop->hop_num) ||
+ check_whether_bt_num_2(table->type, mhop->hop_num)) &&
+ !table->bt_l0[index->l0]) {
+ table->bt_l0[index->l0] = dma_alloc_coherent(dev, bt_size,
+ &table->bt_l0_dma_addr[index->l0],
+ GFP_KERNEL);
+ if (!table->bt_l0[index->l0]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ index->inited |= HEM_INDEX_L0;
+ }
+
+ /* alloc L2 BA's chunk */
+ if (check_whether_bt_num_3(table->type, mhop->hop_num) &&
+ !table->bt_l1[index->l1]) {
+ table->bt_l1[index->l1] = dma_alloc_coherent(dev, bt_size,
+ &table->bt_l1_dma_addr[index->l1],
+ GFP_KERNEL);
+ if (!table->bt_l1[index->l1]) {
+ ret = -ENOMEM;
+ goto err_alloc_hem;
+ }
+ index->inited |= HEM_INDEX_L1;
+ *(table->bt_l0[index->l0] + mhop->l1_idx) =
+ table->bt_l1_dma_addr[index->l1];
+ }
+
+ /*
+ * alloc buffer space chunk for QPC/MTPT/CQC/SRQC/SCCC.
+ * alloc bt space chunk for MTT/CQE.
+ */
+ size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size;
+ table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size);
+ if (!table->hem[index->buf]) {
+ ret = -ENOMEM;
+ goto err_alloc_hem;
+ }
+
+ index->inited |= HEM_INDEX_BUF;
+ bt_ba = table->hem[index->buf]->dma;
+
+ if (table->type < HEM_TYPE_MTT) {
+ if (mhop->hop_num == 2)
+ *(table->bt_l1[index->l1] + mhop->l2_idx) = bt_ba;
+ else if (mhop->hop_num == 1)
+ *(table->bt_l0[index->l0] + mhop->l1_idx) = bt_ba;
+ } else if (mhop->hop_num == 2) {
+ *(table->bt_l0[index->l0] + mhop->l1_idx) = bt_ba;
+ }
+
+ return 0;
+err_alloc_hem:
+ free_mhop_hem(hr_dev, table, mhop, index);
+out:
+ return ret;
+}
+
+static int set_mhop_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj,
+ struct hns_roce_hem_mhop *mhop,
+ struct hns_roce_hem_index *index)
+{
+ struct device *dev = hr_dev->dev;
+ u32 step_idx;
+ int ret = 0;
+
+ if (index->inited & HEM_INDEX_L0) {
+ ret = hr_dev->hw->set_hem(hr_dev, table, obj, 0);
+ if (ret) {
+ dev_err(dev, "set HEM step 0 failed!\n");
+ goto out;
+ }
+ }
+
+ if (index->inited & HEM_INDEX_L1) {
+ ret = hr_dev->hw->set_hem(hr_dev, table, obj, 1);
+ if (ret) {
+ dev_err(dev, "set HEM step 1 failed!\n");
+ goto out;
}
+ }
+
+ if (index->inited & HEM_INDEX_BUF) {
+ if (mhop->hop_num == HNS_ROCE_HOP_NUM_0)
+ step_idx = 0;
+ else
+ step_idx = mhop->hop_num;
+ ret = hr_dev->hw->set_hem(hr_dev, table, obj, step_idx);
+ if (ret)
+ dev_err(dev, "set HEM step last failed!\n");
+ }
+out:
+ return ret;
+}
+
+static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ unsigned long obj)
+{
+ struct hns_roce_hem_index index = {};
+ struct hns_roce_hem_mhop mhop = {};
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
+ if (ret) {
+ dev_err(dev, "calc hem config failed!\n");
+ return ret;
+ }
+
+ mutex_lock(&table->mutex);
+ if (table->hem[index.buf]) {
+ refcount_inc(&table->hem[index.buf]->refcount);
+ goto out;
+ }
- bt_cmd_l = (u32)bt_ba;
- roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S,
- bt_ba >> BT_BA_SHIFT);
+ ret = alloc_mhop_hem(hr_dev, table, &mhop, &index);
+ if (ret) {
+ dev_err(dev, "alloc mhop hem failed!\n");
+ goto out;
+ }
- bt_cmd_val[0] = bt_cmd_l;
- bt_cmd_val[1] = bt_cmd_h_val;
- hns_roce_write64_k(bt_cmd_val,
- hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
- spin_unlock_irqrestore(lock, flags);
+ /* set HEM base address to hardware */
+ if (table->type < HEM_TYPE_MTT) {
+ ret = set_mhop_hem(hr_dev, table, obj, &mhop, &index);
+ if (ret) {
+ dev_err(dev, "set HEM address to HW failed!\n");
+ goto err_alloc;
+ }
}
+ refcount_set(&table->hem[index.buf]->refcount, 1);
+ goto out;
+
+err_alloc:
+ free_mhop_hem(hr_dev, table, &mhop, &index);
+out:
+ mutex_unlock(&table->mutex);
return ret;
}
int hns_roce_table_get(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, unsigned long obj)
{
- struct device *dev = &hr_dev->pdev->dev;
- int ret = 0;
+ struct device *dev = hr_dev->dev;
unsigned long i;
+ int ret = 0;
+
+ if (hns_roce_check_whether_mhop(hr_dev, table->type))
+ return hns_roce_table_mhop_get(hr_dev, table, obj);
- i = (obj & (table->num_obj - 1)) / (HNS_ROCE_TABLE_CHUNK_SIZE /
- table->obj_size);
+ i = obj / (table->table_chunk_size / table->obj_size);
mutex_lock(&table->mutex);
if (table->hem[i]) {
- ++table->hem[i]->refcount;
+ refcount_inc(&table->hem[i]->refcount);
goto out;
}
- table->hem[i] = hns_roce_alloc_hem(hr_dev,
- HNS_ROCE_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
- (table->lowmem ? GFP_KERNEL :
- GFP_HIGHUSER) | __GFP_NOWARN);
+ table->hem[i] = hns_roce_alloc_hem(hr_dev, table->table_chunk_size);
if (!table->hem[i]) {
ret = -ENOMEM;
goto out;
}
/* Set HEM base address(128K/page, pa) to Hardware */
- if (hns_roce_set_hem(hr_dev, table, obj)) {
- ret = -ENODEV;
- dev_err(dev, "set HEM base address to HW failed.\n");
+ ret = hr_dev->hw->set_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT);
+ if (ret) {
+ hns_roce_free_hem(hr_dev, table->hem[i]);
+ table->hem[i] = NULL;
+ dev_err(dev, "set HEM base address to HW failed, ret = %d.\n",
+ ret);
goto out;
}
- ++table->hem[i]->refcount;
+ refcount_set(&table->hem[i]->refcount, 1);
out:
mutex_unlock(&table->mutex);
return ret;
}
-void hns_roce_table_put(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table, unsigned long obj)
+static void clear_mhop_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj,
+ struct hns_roce_hem_mhop *mhop,
+ struct hns_roce_hem_index *index)
{
- struct device *dev = &hr_dev->pdev->dev;
- unsigned long i;
+ struct device *dev = hr_dev->dev;
+ u32 hop_num = mhop->hop_num;
+ u32 chunk_ba_num;
+ u32 step_idx;
+ int ret;
+
+ index->inited = HEM_INDEX_BUF;
+ chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN;
+ if (check_whether_bt_num_2(table->type, hop_num)) {
+ if (hns_roce_check_hem_null(table->hem, index->buf,
+ chunk_ba_num, table->num_hem))
+ index->inited |= HEM_INDEX_L0;
+ } else if (check_whether_bt_num_3(table->type, hop_num)) {
+ if (hns_roce_check_hem_null(table->hem, index->buf,
+ chunk_ba_num, table->num_hem)) {
+ index->inited |= HEM_INDEX_L1;
+ if (hns_roce_check_bt_null(table->bt_l1, index->l1,
+ chunk_ba_num))
+ index->inited |= HEM_INDEX_L0;
+ }
+ }
- i = (obj & (table->num_obj - 1)) /
- (HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size);
+ if (table->type < HEM_TYPE_MTT) {
+ if (hop_num == HNS_ROCE_HOP_NUM_0)
+ step_idx = 0;
+ else
+ step_idx = hop_num;
- mutex_lock(&table->mutex);
+ ret = hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx);
+ if (ret)
+ dev_warn(dev, "failed to clear hop%u HEM, ret = %d.\n",
+ hop_num, ret);
+
+ if (index->inited & HEM_INDEX_L1) {
+ ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 1);
+ if (ret)
+ dev_warn(dev, "failed to clear HEM step 1, ret = %d.\n",
+ ret);
+ }
- if (--table->hem[i]->refcount == 0) {
- /* Clear HEM base address */
- if (hr_dev->hw->clear_hem(hr_dev, table, obj))
- dev_warn(dev, "Clear HEM base address failed.\n");
+ if (index->inited & HEM_INDEX_L0) {
+ ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 0);
+ if (ret)
+ dev_warn(dev, "failed to clear HEM step 0, ret = %d.\n",
+ ret);
+ }
+ }
+}
- hns_roce_free_hem(hr_dev, table->hem[i]);
- table->hem[i] = NULL;
+static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ unsigned long obj,
+ int check_refcount)
+{
+ struct hns_roce_hem_index index = {};
+ struct hns_roce_hem_mhop mhop = {};
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = calc_hem_config(hr_dev, table, obj, &mhop, &index);
+ if (ret) {
+ dev_err(dev, "calc hem config failed!\n");
+ return;
}
+ if (!check_refcount)
+ mutex_lock(&table->mutex);
+ else if (!refcount_dec_and_mutex_lock(&table->hem[index.buf]->refcount,
+ &table->mutex))
+ return;
+
+ clear_mhop_hem(hr_dev, table, obj, &mhop, &index);
+ free_mhop_hem(hr_dev, table, &mhop, &index);
+
mutex_unlock(&table->mutex);
}
-void *hns_roce_table_find(struct hns_roce_hem_table *table, unsigned long obj,
- dma_addr_t *dma_handle)
+void hns_roce_table_put(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj)
{
- struct hns_roce_hem_chunk *chunk;
- unsigned long idx;
- int i;
- int offset, dma_offset;
- struct hns_roce_hem *hem;
- struct page *page = NULL;
+ struct device *dev = hr_dev->dev;
+ unsigned long i;
+ int ret;
- if (!table->lowmem)
- return NULL;
+ if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
+ hns_roce_table_mhop_put(hr_dev, table, obj, 1);
+ return;
+ }
- mutex_lock(&table->mutex);
- idx = (obj & (table->num_obj - 1)) * table->obj_size;
- hem = table->hem[idx / HNS_ROCE_TABLE_CHUNK_SIZE];
- dma_offset = offset = idx % HNS_ROCE_TABLE_CHUNK_SIZE;
+ i = obj / (table->table_chunk_size / table->obj_size);
- if (!hem)
- goto out;
+ if (!refcount_dec_and_mutex_lock(&table->hem[i]->refcount,
+ &table->mutex))
+ return;
- list_for_each_entry(chunk, &hem->chunk_list, list) {
- for (i = 0; i < chunk->npages; ++i) {
- if (dma_handle && dma_offset >= 0) {
- if (sg_dma_len(&chunk->mem[i]) >
- (u32)dma_offset)
- *dma_handle = sg_dma_address(
- &chunk->mem[i]) + dma_offset;
- dma_offset -= sg_dma_len(&chunk->mem[i]);
- }
+ ret = hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT);
+ if (ret)
+ dev_warn_ratelimited(dev, "failed to clear HEM base address, ret = %d.\n",
+ ret);
- if (chunk->mem[i].length > (u32)offset) {
- page = sg_page(&chunk->mem[i]);
- goto out;
- }
- offset -= chunk->mem[i].length;
- }
- }
+ hns_roce_free_hem(hr_dev, table->hem[i]);
+ table->hem[i] = NULL;
-out:
mutex_unlock(&table->mutex);
- return page ? lowmem_page_address(page) + offset : NULL;
}
-int hns_roce_table_get_range(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table,
- unsigned long start, unsigned long end)
+void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ unsigned long obj, dma_addr_t *dma_handle)
{
- unsigned long inc = HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size;
- unsigned long i = 0;
- int ret = 0;
-
- /* Allocate MTT entry memory according to chunk(128K) */
- for (i = start; i <= end; i += inc) {
- ret = hns_roce_table_get(hr_dev, table, i);
- if (ret)
- goto fail;
- }
+ struct hns_roce_hem_mhop mhop;
+ struct hns_roce_hem *hem;
+ unsigned long mhop_obj = obj;
+ unsigned long obj_per_chunk;
+ unsigned long idx_offset;
+ int offset, dma_offset;
+ void *addr = NULL;
+ u32 hem_idx = 0;
+ int i, j;
- return 0;
+ mutex_lock(&table->mutex);
-fail:
- while (i > start) {
- i -= inc;
- hns_roce_table_put(hr_dev, table, i);
+ if (!hns_roce_check_whether_mhop(hr_dev, table->type)) {
+ obj_per_chunk = table->table_chunk_size / table->obj_size;
+ hem = table->hem[obj / obj_per_chunk];
+ idx_offset = obj % obj_per_chunk;
+ dma_offset = offset = idx_offset * table->obj_size;
+ } else {
+ u32 seg_size = 64; /* 8 bytes per BA and 8 BA per segment */
+
+ if (hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop))
+ goto out;
+ /* mtt mhop */
+ i = mhop.l0_idx;
+ j = mhop.l1_idx;
+ if (mhop.hop_num == 2)
+ hem_idx = i * (mhop.bt_chunk_size / BA_BYTE_LEN) + j;
+ else if (mhop.hop_num == 1 ||
+ mhop.hop_num == HNS_ROCE_HOP_NUM_0)
+ hem_idx = i;
+
+ hem = table->hem[hem_idx];
+ dma_offset = offset = obj * seg_size % mhop.bt_chunk_size;
+ if (mhop.hop_num == 2)
+ dma_offset = offset = 0;
}
- return ret;
-}
-void hns_roce_table_put_range(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table,
- unsigned long start, unsigned long end)
-{
- unsigned long i;
+ if (!hem)
+ goto out;
- for (i = start; i <= end;
- i += HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size)
- hns_roce_table_put(hr_dev, table, i);
+ *dma_handle = hem->dma + dma_offset;
+ addr = hem->buf + offset;
+
+out:
+ mutex_unlock(&table->mutex);
+ return addr;
}
int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, u32 type,
- unsigned long obj_size, unsigned long nobj,
- int use_lowmem)
+ unsigned long obj_size, unsigned long nobj)
{
unsigned long obj_per_chunk;
unsigned long num_hem;
- obj_per_chunk = HNS_ROCE_TABLE_CHUNK_SIZE / obj_size;
- num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk;
+ if (!hns_roce_check_whether_mhop(hr_dev, type)) {
+ table->table_chunk_size = hr_dev->caps.chunk_sz;
+ obj_per_chunk = table->table_chunk_size / obj_size;
+ num_hem = DIV_ROUND_UP(nobj, obj_per_chunk);
+
+ table->hem = kcalloc(num_hem, sizeof(*table->hem), GFP_KERNEL);
+ if (!table->hem)
+ return -ENOMEM;
+ } else {
+ struct hns_roce_hem_mhop mhop = {};
+ unsigned long buf_chunk_size;
+ unsigned long bt_chunk_size;
+ unsigned long bt_chunk_num;
+ unsigned long num_bt_l0;
+ u32 hop_num;
+
+ if (get_hem_table_config(hr_dev, &mhop, type))
+ return -EINVAL;
+
+ buf_chunk_size = mhop.buf_chunk_size;
+ bt_chunk_size = mhop.bt_chunk_size;
+ num_bt_l0 = mhop.ba_l0_num;
+ hop_num = mhop.hop_num;
+
+ obj_per_chunk = buf_chunk_size / obj_size;
+ num_hem = DIV_ROUND_UP(nobj, obj_per_chunk);
+ bt_chunk_num = bt_chunk_size / BA_BYTE_LEN;
+
+ if (type >= HEM_TYPE_MTT)
+ num_bt_l0 = bt_chunk_num;
+
+ table->hem = kcalloc(num_hem, sizeof(*table->hem),
+ GFP_KERNEL);
+ if (!table->hem)
+ goto err_kcalloc_hem_buf;
+
+ if (check_whether_bt_num_3(type, hop_num)) {
+ unsigned long num_bt_l1;
+
+ num_bt_l1 = DIV_ROUND_UP(num_hem, bt_chunk_num);
+ table->bt_l1 = kcalloc(num_bt_l1,
+ sizeof(*table->bt_l1),
+ GFP_KERNEL);
+ if (!table->bt_l1)
+ goto err_kcalloc_bt_l1;
+
+ table->bt_l1_dma_addr = kcalloc(num_bt_l1,
+ sizeof(*table->bt_l1_dma_addr),
+ GFP_KERNEL);
+
+ if (!table->bt_l1_dma_addr)
+ goto err_kcalloc_l1_dma;
+ }
- table->hem = kcalloc(num_hem, sizeof(*table->hem), GFP_KERNEL);
- if (!table->hem)
- return -ENOMEM;
+ if (check_whether_bt_num_2(type, hop_num) ||
+ check_whether_bt_num_3(type, hop_num)) {
+ table->bt_l0 = kcalloc(num_bt_l0, sizeof(*table->bt_l0),
+ GFP_KERNEL);
+ if (!table->bt_l0)
+ goto err_kcalloc_bt_l0;
+
+ table->bt_l0_dma_addr = kcalloc(num_bt_l0,
+ sizeof(*table->bt_l0_dma_addr),
+ GFP_KERNEL);
+ if (!table->bt_l0_dma_addr)
+ goto err_kcalloc_l0_dma;
+ }
+ }
table->type = type;
table->num_hem = num_hem;
- table->num_obj = nobj;
table->obj_size = obj_size;
- table->lowmem = use_lowmem;
mutex_init(&table->mutex);
return 0;
+
+err_kcalloc_l0_dma:
+ kfree(table->bt_l0);
+ table->bt_l0 = NULL;
+
+err_kcalloc_bt_l0:
+ kfree(table->bt_l1_dma_addr);
+ table->bt_l1_dma_addr = NULL;
+
+err_kcalloc_l1_dma:
+ kfree(table->bt_l1);
+ table->bt_l1 = NULL;
+
+err_kcalloc_bt_l1:
+ kfree(table->hem);
+ table->hem = NULL;
+
+err_kcalloc_hem_buf:
+ return -ENOMEM;
+}
+
+static void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table)
+{
+ struct hns_roce_hem_mhop mhop;
+ u32 buf_chunk_size;
+ u64 obj;
+ int i;
+
+ if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop))
+ return;
+ buf_chunk_size = table->type < HEM_TYPE_MTT ? mhop.buf_chunk_size :
+ mhop.bt_chunk_size;
+
+ for (i = 0; i < table->num_hem; ++i) {
+ obj = i * buf_chunk_size / table->obj_size;
+ if (table->hem[i])
+ hns_roce_table_mhop_put(hr_dev, table, obj, 0);
+ }
+
+ kfree(table->hem);
+ table->hem = NULL;
+ kfree(table->bt_l1);
+ table->bt_l1 = NULL;
+ kfree(table->bt_l1_dma_addr);
+ table->bt_l1_dma_addr = NULL;
+ kfree(table->bt_l0);
+ table->bt_l0 = NULL;
+ kfree(table->bt_l0_dma_addr);
+ table->bt_l0_dma_addr = NULL;
}
void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table)
{
- struct device *dev = &hr_dev->pdev->dev;
+ struct device *dev = hr_dev->dev;
unsigned long i;
+ int obj;
+ int ret;
+
+ if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
+ hns_roce_cleanup_mhop_hem_table(hr_dev, table);
+ mutex_destroy(&table->mutex);
+ return;
+ }
for (i = 0; i < table->num_hem; ++i)
if (table->hem[i]) {
- if (hr_dev->hw->clear_hem(hr_dev, table,
- i * HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size))
- dev_err(dev, "Clear HEM base address failed.\n");
+ obj = i * table->table_chunk_size / table->obj_size;
+ ret = hr_dev->hw->clear_hem(hr_dev, table, obj, 0);
+ if (ret)
+ dev_err(dev, "clear HEM base address failed, ret = %d.\n",
+ ret);
hns_roce_free_hem(hr_dev, table->hem[i]);
}
+ mutex_destroy(&table->mutex);
kfree(table->hem);
}
void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
{
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->srq_table.table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
+ if (hr_dev->caps.qpc_timer_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->qpc_timer_table);
+ if (hr_dev->caps.cqc_timer_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->cqc_timer_table);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->qp_table.sccc_table);
+ if (hr_dev->caps.trrl_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->qp_table.trrl_table);
+
+ if (hr_dev->caps.gmv_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->gmv_table);
+
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table);
- hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table);
+}
+
+struct hns_roce_hem_item {
+ struct list_head list; /* link all hems in the same bt level */
+ struct list_head sibling; /* link all hems in last hop for mtt */
+ void *addr;
+ dma_addr_t dma_addr;
+ size_t count; /* max ba numbers */
+ int start; /* start buf offset in this hem */
+ int end; /* end buf offset in this hem */
+ bool exist_bt;
+};
+
+/* All HEM items are linked in a tree structure */
+struct hns_roce_hem_head {
+ struct list_head branch[HNS_ROCE_MAX_BT_REGION];
+ struct list_head root;
+ struct list_head leaf;
+};
+
+static struct hns_roce_hem_item *
+hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count,
+ bool exist_bt)
+{
+ struct hns_roce_hem_item *hem;
+
+ hem = kzalloc(sizeof(*hem), GFP_KERNEL);
+ if (!hem)
+ return NULL;
+
+ if (exist_bt) {
+ hem->addr = dma_alloc_coherent(hr_dev->dev, count * BA_BYTE_LEN,
+ &hem->dma_addr, GFP_KERNEL);
+ if (!hem->addr) {
+ kfree(hem);
+ return NULL;
+ }
+ }
+
+ hem->exist_bt = exist_bt;
+ hem->count = count;
+ hem->start = start;
+ hem->end = end;
+ INIT_LIST_HEAD(&hem->list);
+ INIT_LIST_HEAD(&hem->sibling);
+
+ return hem;
+}
+
+static void hem_list_free_item(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_item *hem)
+{
+ if (hem->exist_bt)
+ dma_free_coherent(hr_dev->dev, hem->count * BA_BYTE_LEN,
+ hem->addr, hem->dma_addr);
+ kfree(hem);
+}
+
+static void hem_list_free_all(struct hns_roce_dev *hr_dev,
+ struct list_head *head)
+{
+ struct hns_roce_hem_item *hem, *temp_hem;
+
+ list_for_each_entry_safe(hem, temp_hem, head, list) {
+ list_del(&hem->list);
+ hem_list_free_item(hr_dev, hem);
+ }
+}
+
+static void hem_list_link_bt(void *base_addr, u64 table_addr)
+{
+ *(u64 *)(base_addr) = table_addr;
+}
+
+/* assign L0 table address to hem from root bt */
+static void hem_list_assign_bt(struct hns_roce_hem_item *hem, void *cpu_addr,
+ u64 phy_addr)
+{
+ hem->addr = cpu_addr;
+ hem->dma_addr = (dma_addr_t)phy_addr;
+}
+
+static inline bool hem_list_page_is_in_range(struct hns_roce_hem_item *hem,
+ int offset)
+{
+ return (hem->start <= offset && offset <= hem->end);
+}
+
+static struct hns_roce_hem_item *hem_list_search_item(struct list_head *ba_list,
+ int page_offset)
+{
+ struct hns_roce_hem_item *hem, *temp_hem;
+ struct hns_roce_hem_item *found = NULL;
+
+ list_for_each_entry_safe(hem, temp_hem, ba_list, list) {
+ if (hem_list_page_is_in_range(hem, page_offset)) {
+ found = hem;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static bool hem_list_is_bottom_bt(int hopnum, int bt_level)
+{
+ /*
+ * hopnum base address table levels
+ * 0 L0(buf)
+ * 1 L0 -> buf
+ * 2 L0 -> L1 -> buf
+ * 3 L0 -> L1 -> L2 -> buf
+ */
+ return bt_level >= (hopnum ? hopnum - 1 : hopnum);
+}
+
+/*
+ * calc base address entries num
+ * @hopnum: num of mutihop addressing
+ * @bt_level: base address table level
+ * @unit: ba entries per bt page
+ */
+static u64 hem_list_calc_ba_range(int hopnum, int bt_level, int unit)
+{
+ u64 step;
+ int max;
+ int i;
+
+ if (hopnum <= bt_level)
+ return 0;
+ /*
+ * hopnum bt_level range
+ * 1 0 unit
+ * ------------
+ * 2 0 unit * unit
+ * 2 1 unit
+ * ------------
+ * 3 0 unit * unit * unit
+ * 3 1 unit * unit
+ * 3 2 unit
+ */
+ step = 1;
+ max = hopnum - bt_level;
+ for (i = 0; i < max; i++)
+ step = step * unit;
+
+ return step;
+}
+
+/*
+ * calc the root ba entries which could cover all regions
+ * @regions: buf region array
+ * @region_cnt: array size of @regions
+ * @unit: ba entries per bt page
+ */
+int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions,
+ int region_cnt, int unit)
+{
+ struct hns_roce_buf_region *r;
+ int total = 0;
+ u64 step;
+ int i;
+
+ for (i = 0; i < region_cnt; i++) {
+ r = (struct hns_roce_buf_region *)&regions[i];
+ /* when r->hopnum = 0, the region should not occupy root_ba. */
+ if (!r->hopnum)
+ continue;
+
+ if (r->hopnum > 1) {
+ step = hem_list_calc_ba_range(r->hopnum, 1, unit);
+ if (step > 0)
+ total += (r->count + step - 1) / step;
+ } else {
+ total += r->count;
+ }
+ }
+
+ return total;
+}
+
+static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
+ const struct hns_roce_buf_region *r, int unit,
+ int offset, struct list_head *mid_bt,
+ struct list_head *btm_bt)
+{
+ struct hns_roce_hem_item *hem_ptrs[HNS_ROCE_MAX_BT_LEVEL] = { NULL };
+ struct list_head temp_list[HNS_ROCE_MAX_BT_LEVEL];
+ struct hns_roce_hem_item *cur, *pre;
+ const int hopnum = r->hopnum;
+ int start_aligned;
+ int distance;
+ int ret = 0;
+ int max_ofs;
+ int level;
+ u64 step;
+ int end;
+
+ if (hopnum <= 1)
+ return 0;
+
+ if (hopnum > HNS_ROCE_MAX_BT_LEVEL) {
+ dev_err(hr_dev->dev, "invalid hopnum %d!\n", hopnum);
+ return -EINVAL;
+ }
+
+ if (offset < r->offset) {
+ dev_err(hr_dev->dev, "invalid offset %d, min %u!\n",
+ offset, r->offset);
+ return -EINVAL;
+ }
+
+ distance = offset - r->offset;
+ max_ofs = r->offset + r->count - 1;
+ for (level = 0; level < hopnum; level++)
+ INIT_LIST_HEAD(&temp_list[level]);
+
+ /* config L1 bt to last bt and link them to corresponding parent */
+ for (level = 1; level < hopnum; level++) {
+ if (!hem_list_is_bottom_bt(hopnum, level)) {
+ cur = hem_list_search_item(&mid_bt[level], offset);
+ if (cur) {
+ hem_ptrs[level] = cur;
+ continue;
+ }
+ }
+
+ step = hem_list_calc_ba_range(hopnum, level, unit);
+ if (step < 1) {
+ ret = -EINVAL;
+ goto err_exit;
+ }
+
+ start_aligned = (distance / step) * step + r->offset;
+ end = min_t(u64, start_aligned + step - 1, max_ofs);
+ cur = hem_list_alloc_item(hr_dev, start_aligned, end, unit,
+ true);
+ if (!cur) {
+ ret = -ENOMEM;
+ goto err_exit;
+ }
+ hem_ptrs[level] = cur;
+ list_add(&cur->list, &temp_list[level]);
+ if (hem_list_is_bottom_bt(hopnum, level))
+ list_add(&cur->sibling, &temp_list[0]);
+
+ /* link bt to parent bt */
+ if (level > 1) {
+ pre = hem_ptrs[level - 1];
+ step = (cur->start - pre->start) / step * BA_BYTE_LEN;
+ hem_list_link_bt(pre->addr + step, cur->dma_addr);
+ }
+ }
+
+ list_splice(&temp_list[0], btm_bt);
+ for (level = 1; level < hopnum; level++)
+ list_splice(&temp_list[level], &mid_bt[level]);
+
+ return 0;
+
+err_exit:
+ for (level = 1; level < hopnum; level++)
+ hem_list_free_all(hr_dev, &temp_list[level]);
+
+ return ret;
+}
+
+static struct hns_roce_hem_item *
+alloc_root_hem(struct hns_roce_dev *hr_dev, int unit, int *max_ba_num,
+ const struct hns_roce_buf_region *regions, int region_cnt)
+{
+ const struct hns_roce_buf_region *r;
+ struct hns_roce_hem_item *hem;
+ int ba_num;
+ int offset;
+
+ ba_num = hns_roce_hem_list_calc_root_ba(regions, region_cnt, unit);
+ if (ba_num < 1)
+ return ERR_PTR(-ENOMEM);
+
+ if (ba_num > unit)
+ return ERR_PTR(-ENOBUFS);
+
+ offset = regions[0].offset;
+ /* indicate to last region */
+ r = &regions[region_cnt - 1];
+ hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1,
+ ba_num, true);
+ if (!hem)
+ return ERR_PTR(-ENOMEM);
+
+ *max_ba_num = ba_num;
+
+ return hem;
+}
+
+static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
+ u64 phy_base, const struct hns_roce_buf_region *r,
+ struct list_head *branch_head,
+ struct list_head *leaf_head)
+{
+ struct hns_roce_hem_item *hem;
+
+ /* This is on the has_mtt branch, if r->hopnum
+ * is 0, there is no root_ba to reuse for the
+ * region's fake hem, so a dma_alloc request is
+ * necessary here.
+ */
+ hem = hem_list_alloc_item(hr_dev, r->offset, r->offset + r->count - 1,
+ r->count, !r->hopnum);
+ if (!hem)
+ return -ENOMEM;
+
+ /* The root_ba can be reused only when r->hopnum > 0. */
+ if (r->hopnum)
+ hem_list_assign_bt(hem, cpu_base, phy_base);
+ list_add(&hem->list, branch_head);
+ list_add(&hem->sibling, leaf_head);
+
+ /* If r->hopnum == 0, 0 is returned,
+ * so that the root_bt entry is not occupied.
+ */
+ return r->hopnum ? r->count : 0;
+}
+
+static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
+ int unit, const struct hns_roce_buf_region *r,
+ const struct list_head *branch_head)
+{
+ struct hns_roce_hem_item *hem, *temp_hem;
+ int total = 0;
+ int offset;
+ u64 step;
+
+ step = hem_list_calc_ba_range(r->hopnum, 1, unit);
+ if (step < 1)
+ return -EINVAL;
+
+ /* if exist mid bt, link L1 to L0 */
+ list_for_each_entry_safe(hem, temp_hem, branch_head, list) {
+ offset = (hem->start - r->offset) / step * BA_BYTE_LEN;
+ hem_list_link_bt(cpu_base + offset, hem->dma_addr);
+ total++;
+ }
+
+ return total;
+}
+
+static int
+setup_root_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list,
+ int unit, int max_ba_num, struct hns_roce_hem_head *head,
+ const struct hns_roce_buf_region *regions, int region_cnt)
+{
+ const struct hns_roce_buf_region *r;
+ struct hns_roce_hem_item *root_hem;
+ void *cpu_base;
+ u64 phy_base;
+ int i, total;
+ int ret;
+
+ root_hem = list_first_entry(&head->root,
+ struct hns_roce_hem_item, list);
+ if (!root_hem)
+ return -ENOMEM;
+
+ total = 0;
+ for (i = 0; i < region_cnt && total <= max_ba_num; i++) {
+ r = &regions[i];
+ if (!r->count)
+ continue;
+
+ /* all regions's mid[x][0] shared the root_bt's trunk */
+ cpu_base = root_hem->addr + total * BA_BYTE_LEN;
+ phy_base = root_hem->dma_addr + total * BA_BYTE_LEN;
+
+ /* if hopnum is 0 or 1, cut a new fake hem from the root bt
+ * which's address share to all regions.
+ */
+ if (hem_list_is_bottom_bt(r->hopnum, 0))
+ ret = alloc_fake_root_bt(hr_dev, cpu_base, phy_base, r,
+ &head->branch[i], &head->leaf);
+ else
+ ret = setup_middle_bt(hr_dev, cpu_base, unit, r,
+ &hem_list->mid_bt[i][1]);
+
+ if (ret < 0)
+ return ret;
+
+ total += ret;
+ }
+
+ list_splice(&head->leaf, &hem_list->btm_bt);
+ list_splice(&head->root, &hem_list->root_bt);
+ for (i = 0; i < region_cnt; i++)
+ list_splice(&head->branch[i], &hem_list->mid_bt[i][0]);
+
+ return 0;
+}
+
+static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list, int unit,
+ const struct hns_roce_buf_region *regions,
+ int region_cnt)
+{
+ struct hns_roce_hem_item *root_hem;
+ struct hns_roce_hem_head head;
+ int max_ba_num;
+ int ret;
+ int i;
+
+ root_hem = hem_list_search_item(&hem_list->root_bt, regions[0].offset);
+ if (root_hem)
+ return 0;
+
+ max_ba_num = 0;
+ root_hem = alloc_root_hem(hr_dev, unit, &max_ba_num, regions,
+ region_cnt);
+ if (IS_ERR(root_hem))
+ return PTR_ERR(root_hem);
+
+ /* List head for storing all allocated HEM items */
+ INIT_LIST_HEAD(&head.root);
+ INIT_LIST_HEAD(&head.leaf);
+ for (i = 0; i < region_cnt; i++)
+ INIT_LIST_HEAD(&head.branch[i]);
+
+ hem_list->root_ba = root_hem->dma_addr;
+ list_add(&root_hem->list, &head.root);
+ ret = setup_root_hem(hr_dev, hem_list, unit, max_ba_num, &head, regions,
+ region_cnt);
+ if (ret) {
+ for (i = 0; i < region_cnt; i++)
+ hem_list_free_all(hr_dev, &head.branch[i]);
+
+ hem_list_free_all(hr_dev, &head.root);
+ }
+
+ return ret;
+}
+
+/* This is the bottom bt pages number of a 100G MR on 4K OS, assuming
+ * the bt page size is not expanded by cal_best_bt_pg_sz()
+ */
+#define RESCHED_LOOP_CNT_THRESHOLD_ON_4K 12800
+
+/* construct the base address table and link them by address hop config */
+int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ const struct hns_roce_buf_region *regions,
+ int region_cnt, unsigned int bt_pg_shift)
+{
+ const struct hns_roce_buf_region *r;
+ int ofs, end;
+ int loop;
+ int unit;
+ int ret;
+ int i;
+
+ if (region_cnt > HNS_ROCE_MAX_BT_REGION) {
+ dev_err(hr_dev->dev, "invalid region region_cnt %d!\n",
+ region_cnt);
+ return -EINVAL;
+ }
+
+ unit = (1 << bt_pg_shift) / BA_BYTE_LEN;
+ for (i = 0; i < region_cnt; i++) {
+ r = &regions[i];
+ if (!r->count)
+ continue;
+
+ end = r->offset + r->count;
+ for (ofs = r->offset, loop = 1; ofs < end; ofs += unit, loop++) {
+ if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K))
+ cond_resched();
+
+ ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs,
+ hem_list->mid_bt[i],
+ &hem_list->btm_bt);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "alloc hem trunk fail ret = %d!\n", ret);
+ goto err_alloc;
+ }
+ }
+ }
+
+ ret = hem_list_alloc_root_bt(hr_dev, hem_list, unit, regions,
+ region_cnt);
+ if (ret)
+ dev_err(hr_dev->dev, "alloc hem root fail ret = %d!\n", ret);
+ else
+ return 0;
+
+err_alloc:
+ hns_roce_hem_list_release(hr_dev, hem_list);
+
+ return ret;
+}
+
+void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list)
+{
+ int i, j;
+
+ for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++)
+ for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++)
+ hem_list_free_all(hr_dev, &hem_list->mid_bt[i][j]);
+
+ hem_list_free_all(hr_dev, &hem_list->root_bt);
+ INIT_LIST_HEAD(&hem_list->btm_bt);
+ hem_list->root_ba = 0;
+}
+
+void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list)
+{
+ int i, j;
+
+ INIT_LIST_HEAD(&hem_list->root_bt);
+ INIT_LIST_HEAD(&hem_list->btm_bt);
+ for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++)
+ for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++)
+ INIT_LIST_HEAD(&hem_list->mid_bt[i][j]);
+}
+
+void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ int offset, int *mtt_cnt)
+{
+ struct list_head *head = &hem_list->btm_bt;
+ struct hns_roce_hem_item *hem, *temp_hem;
+ void *cpu_base = NULL;
+ int loop = 1;
+ int nr = 0;
+
+ list_for_each_entry_safe(hem, temp_hem, head, sibling) {
+ if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K))
+ cond_resched();
+ loop++;
+
+ if (hem_list_page_is_in_range(hem, offset)) {
+ nr = offset - hem->start;
+ cpu_base = hem->addr + nr * BA_BYTE_LEN;
+ nr = hem->end + 1 - offset;
+ break;
+ }
+ }
+
+ if (mtt_cnt)
+ *mtt_cnt = nr;
+
+ return cpu_base;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index 435748858252..9c415b2541af 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -34,9 +34,7 @@
#ifndef _HNS_ROCE_HEM_H
#define _HNS_ROCE_HEM_H
-#define HW_SYNC_TIMEOUT_MSECS 500
-#define HW_SYNC_SLEEP_TIME_INTERVAL 20
-#define BT_CMD_SYNC_SHIFT 31
+#define HEM_HOP_STEP_DIRECT 0xff
enum {
/* MAP HEM(Hardware Entry Memory) */
@@ -44,37 +42,47 @@ enum {
HEM_TYPE_MTPT,
HEM_TYPE_CQC,
HEM_TYPE_SRQC,
+ HEM_TYPE_SCCC,
+ HEM_TYPE_QPC_TIMER,
+ HEM_TYPE_CQC_TIMER,
+ HEM_TYPE_GMV,
/* UNMAP HEM */
HEM_TYPE_MTT,
+ HEM_TYPE_CQE,
+ HEM_TYPE_SRQWQE,
+ HEM_TYPE_IDX,
HEM_TYPE_IRRL,
+ HEM_TYPE_TRRL,
};
-#define HNS_ROCE_HEM_CHUNK_LEN \
- ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
- (sizeof(struct scatterlist)))
+#define check_whether_bt_num_3(type, hop_num) \
+ ((type) < HEM_TYPE_MTT && (hop_num) == 2)
-enum {
- HNS_ROCE_HEM_PAGE_SHIFT = 12,
- HNS_ROCE_HEM_PAGE_SIZE = 1 << HNS_ROCE_HEM_PAGE_SHIFT,
-};
+#define check_whether_bt_num_2(type, hop_num) \
+ (((type) < HEM_TYPE_MTT && (hop_num) == 1) || \
+ ((type) >= HEM_TYPE_MTT && (hop_num) == 2))
-struct hns_roce_hem_chunk {
- struct list_head list;
- int npages;
- int nsg;
- struct scatterlist mem[HNS_ROCE_HEM_CHUNK_LEN];
-};
+#define check_whether_bt_num_1(type, hop_num) \
+ (((type) < HEM_TYPE_MTT && (hop_num) == HNS_ROCE_HOP_NUM_0) || \
+ ((type) >= HEM_TYPE_MTT && (hop_num) == 1) || \
+ ((type) >= HEM_TYPE_MTT && (hop_num) == HNS_ROCE_HOP_NUM_0))
struct hns_roce_hem {
- struct list_head chunk_list;
- int refcount;
+ void *buf;
+ dma_addr_t dma;
+ unsigned long size;
+ refcount_t refcount;
};
-struct hns_roce_hem_iter {
- struct hns_roce_hem *hem;
- struct hns_roce_hem_chunk *chunk;
- int page_idx;
+struct hns_roce_hem_mhop {
+ u32 hop_num;
+ u32 buf_chunk_size;
+ u32 bt_chunk_size;
+ u32 ba_l0_num;
+ u32 l0_idx; /* level 0 base address table index */
+ u32 l1_idx; /* level 1 base address table index */
+ u32 l2_idx; /* level 2 base address table index */
};
void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem);
@@ -82,54 +90,31 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, unsigned long obj);
void hns_roce_table_put(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, unsigned long obj);
-void *hns_roce_table_find(struct hns_roce_hem_table *table, unsigned long obj,
+void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long obj,
dma_addr_t *dma_handle);
-int hns_roce_table_get_range(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table,
- unsigned long start, unsigned long end);
-void hns_roce_table_put_range(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table,
- unsigned long start, unsigned long end);
int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, u32 type,
- unsigned long obj_size, unsigned long nobj,
- int use_lowmem);
+ unsigned long obj_size, unsigned long nobj);
void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table);
void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev);
-
-static inline void hns_roce_hem_first(struct hns_roce_hem *hem,
- struct hns_roce_hem_iter *iter)
-{
- iter->hem = hem;
- iter->chunk = list_empty(&hem->chunk_list) ? NULL :
- list_entry(hem->chunk_list.next,
- struct hns_roce_hem_chunk, list);
- iter->page_idx = 0;
-}
-
-static inline int hns_roce_hem_last(struct hns_roce_hem_iter *iter)
-{
- return !iter->chunk;
-}
-
-static inline void hns_roce_hem_next(struct hns_roce_hem_iter *iter)
-{
- if (++iter->page_idx >= iter->chunk->nsg) {
- if (iter->chunk->list.next == &iter->hem->chunk_list) {
- iter->chunk = NULL;
- return;
- }
-
- iter->chunk = list_entry(iter->chunk->list.next,
- struct hns_roce_hem_chunk, list);
- iter->page_idx = 0;
- }
-}
-
-static inline dma_addr_t hns_roce_hem_addr(struct hns_roce_hem_iter *iter)
-{
- return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
-}
-
-#endif /*_HNS_ROCE_HEM_H*/
+int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, unsigned long *obj,
+ struct hns_roce_hem_mhop *mhop);
+bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type);
+
+void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list);
+int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions,
+ int region_cnt, int unit);
+int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ const struct hns_roce_buf_region *regions,
+ int region_cnt, unsigned int bt_pg_shift);
+void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list);
+void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_list *hem_list,
+ int offset, int *mtt_cnt);
+
+#endif /* _HNS_ROCE_HEM_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
deleted file mode 100644
index b8111b0c8877..000000000000
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ /dev/null
@@ -1,3841 +0,0 @@
-/*
- * Copyright (c) 2016 Hisilicon Limited.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/platform_device.h>
-#include <linux/acpi.h>
-#include <linux/etherdevice.h>
-#include <rdma/ib_umem.h>
-#include "hns_roce_common.h"
-#include "hns_roce_device.h"
-#include "hns_roce_cmd.h"
-#include "hns_roce_hem.h"
-#include "hns_roce_hw_v1.h"
-
-static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ib_sge *sg)
-{
- dseg->lkey = cpu_to_le32(sg->lkey);
- dseg->addr = cpu_to_le64(sg->addr);
- dseg->len = cpu_to_le32(sg->length);
-}
-
-static void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, u64 remote_addr,
- u32 rkey)
-{
- rseg->raddr = cpu_to_le64(remote_addr);
- rseg->rkey = cpu_to_le32(rkey);
- rseg->len = 0;
-}
-
-int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
- struct hns_roce_ud_send_wqe *ud_sq_wqe = NULL;
- struct hns_roce_wqe_ctrl_seg *ctrl = NULL;
- struct hns_roce_wqe_data_seg *dseg = NULL;
- struct hns_roce_qp *qp = to_hr_qp(ibqp);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_sq_db sq_db;
- int ps_opcode = 0, i = 0;
- unsigned long flags = 0;
- void *wqe = NULL;
- u32 doorbell[2];
- int nreq = 0;
- u32 ind = 0;
- int ret = 0;
- u8 *smac;
- int loopback;
-
- if (unlikely(ibqp->qp_type != IB_QPT_GSI &&
- ibqp->qp_type != IB_QPT_RC)) {
- dev_err(dev, "un-supported QP type\n");
- *bad_wr = NULL;
- return -EOPNOTSUPP;
- }
-
- spin_lock_irqsave(&qp->sq.lock, flags);
- ind = qp->sq_next_wqe;
- for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
- ret = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- if (unlikely(wr->num_sge > qp->sq.max_gs)) {
- dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n",
- wr->num_sge, qp->sq.max_gs);
- ret = -EINVAL;
- *bad_wr = wr;
- goto out;
- }
-
- wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
- qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] =
- wr->wr_id;
-
- /* Corresponding to the RC and RD type wqe process separately */
- if (ibqp->qp_type == IB_QPT_GSI) {
- ud_sq_wqe = wqe;
- roce_set_field(ud_sq_wqe->dmac_h,
- UD_SEND_WQE_U32_4_DMAC_0_M,
- UD_SEND_WQE_U32_4_DMAC_0_S,
- ah->av.mac[0]);
- roce_set_field(ud_sq_wqe->dmac_h,
- UD_SEND_WQE_U32_4_DMAC_1_M,
- UD_SEND_WQE_U32_4_DMAC_1_S,
- ah->av.mac[1]);
- roce_set_field(ud_sq_wqe->dmac_h,
- UD_SEND_WQE_U32_4_DMAC_2_M,
- UD_SEND_WQE_U32_4_DMAC_2_S,
- ah->av.mac[2]);
- roce_set_field(ud_sq_wqe->dmac_h,
- UD_SEND_WQE_U32_4_DMAC_3_M,
- UD_SEND_WQE_U32_4_DMAC_3_S,
- ah->av.mac[3]);
-
- roce_set_field(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_DMAC_4_M,
- UD_SEND_WQE_U32_8_DMAC_4_S,
- ah->av.mac[4]);
- roce_set_field(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_DMAC_5_M,
- UD_SEND_WQE_U32_8_DMAC_5_S,
- ah->av.mac[5]);
-
- smac = (u8 *)hr_dev->dev_addr[qp->port];
- loopback = ether_addr_equal_unaligned(ah->av.mac,
- smac) ? 1 : 0;
- roce_set_bit(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S,
- loopback);
-
- roce_set_field(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_OPERATION_TYPE_M,
- UD_SEND_WQE_U32_8_OPERATION_TYPE_S,
- HNS_ROCE_WQE_OPCODE_SEND);
- roce_set_field(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_M,
- UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S,
- 2);
- roce_set_bit(ud_sq_wqe->u32_8,
- UD_SEND_WQE_U32_8_SEND_GL_ROUTING_HDR_FLAG_S,
- 1);
-
- ud_sq_wqe->u32_8 |= (wr->send_flags & IB_SEND_SIGNALED ?
- cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) |
- (wr->send_flags & IB_SEND_SOLICITED ?
- cpu_to_le32(HNS_ROCE_WQE_SE) : 0) |
- ((wr->opcode == IB_WR_SEND_WITH_IMM) ?
- cpu_to_le32(HNS_ROCE_WQE_IMM) : 0);
-
- roce_set_field(ud_sq_wqe->u32_16,
- UD_SEND_WQE_U32_16_DEST_QP_M,
- UD_SEND_WQE_U32_16_DEST_QP_S,
- ud_wr(wr)->remote_qpn);
- roce_set_field(ud_sq_wqe->u32_16,
- UD_SEND_WQE_U32_16_MAX_STATIC_RATE_M,
- UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S,
- ah->av.stat_rate);
-
- roce_set_field(ud_sq_wqe->u32_36,
- UD_SEND_WQE_U32_36_FLOW_LABEL_M,
- UD_SEND_WQE_U32_36_FLOW_LABEL_S, 0);
- roce_set_field(ud_sq_wqe->u32_36,
- UD_SEND_WQE_U32_36_PRIORITY_M,
- UD_SEND_WQE_U32_36_PRIORITY_S,
- ah->av.sl_tclass_flowlabel >>
- HNS_ROCE_SL_SHIFT);
- roce_set_field(ud_sq_wqe->u32_36,
- UD_SEND_WQE_U32_36_SGID_INDEX_M,
- UD_SEND_WQE_U32_36_SGID_INDEX_S,
- hns_get_gid_index(hr_dev, qp->phy_port,
- ah->av.gid_index));
-
- roce_set_field(ud_sq_wqe->u32_40,
- UD_SEND_WQE_U32_40_HOP_LIMIT_M,
- UD_SEND_WQE_U32_40_HOP_LIMIT_S,
- ah->av.hop_limit);
- roce_set_field(ud_sq_wqe->u32_40,
- UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M,
- UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S, 0);
-
- memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN);
-
- ud_sq_wqe->va0_l = (u32)wr->sg_list[0].addr;
- ud_sq_wqe->va0_h = (wr->sg_list[0].addr) >> 32;
- ud_sq_wqe->l_key0 = wr->sg_list[0].lkey;
-
- ud_sq_wqe->va1_l = (u32)wr->sg_list[1].addr;
- ud_sq_wqe->va1_h = (wr->sg_list[1].addr) >> 32;
- ud_sq_wqe->l_key1 = wr->sg_list[1].lkey;
- ind++;
- } else if (ibqp->qp_type == IB_QPT_RC) {
- ctrl = wqe;
- memset(ctrl, 0, sizeof(struct hns_roce_wqe_ctrl_seg));
- for (i = 0; i < wr->num_sge; i++)
- ctrl->msg_length += wr->sg_list[i].length;
-
- ctrl->sgl_pa_h = 0;
- ctrl->flag = 0;
- ctrl->imm_data = send_ieth(wr);
-
- /*Ctrl field, ctrl set type: sig, solic, imm, fence */
- /* SO wait for conforming application scenarios */
- ctrl->flag |= (wr->send_flags & IB_SEND_SIGNALED ?
- cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) |
- (wr->send_flags & IB_SEND_SOLICITED ?
- cpu_to_le32(HNS_ROCE_WQE_SE) : 0) |
- ((wr->opcode == IB_WR_SEND_WITH_IMM ||
- wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) ?
- cpu_to_le32(HNS_ROCE_WQE_IMM) : 0) |
- (wr->send_flags & IB_SEND_FENCE ?
- (cpu_to_le32(HNS_ROCE_WQE_FENCE)) : 0);
-
- wqe += sizeof(struct hns_roce_wqe_ctrl_seg);
-
- switch (wr->opcode) {
- case IB_WR_RDMA_READ:
- ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_READ;
- set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
- atomic_wr(wr)->rkey);
- break;
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_WRITE;
- set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
- atomic_wr(wr)->rkey);
- break;
- case IB_WR_SEND:
- case IB_WR_SEND_WITH_INV:
- case IB_WR_SEND_WITH_IMM:
- ps_opcode = HNS_ROCE_WQE_OPCODE_SEND;
- break;
- case IB_WR_LOCAL_INV:
- break;
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- case IB_WR_LSO:
- default:
- ps_opcode = HNS_ROCE_WQE_OPCODE_MASK;
- break;
- }
- ctrl->flag |= cpu_to_le32(ps_opcode);
- wqe += sizeof(struct hns_roce_wqe_raddr_seg);
-
- dseg = wqe;
- if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) {
- if (ctrl->msg_length >
- hr_dev->caps.max_sq_inline) {
- ret = -EINVAL;
- *bad_wr = wr;
- dev_err(dev, "inline len(1-%d)=%d, illegal",
- ctrl->msg_length,
- hr_dev->caps.max_sq_inline);
- goto out;
- }
- for (i = 0; i < wr->num_sge; i++) {
- memcpy(wqe, ((void *) (uintptr_t)
- wr->sg_list[i].addr),
- wr->sg_list[i].length);
- wqe += wr->sg_list[i].length;
- }
- ctrl->flag |= HNS_ROCE_WQE_INLINE;
- } else {
- /*sqe num is two */
- for (i = 0; i < wr->num_sge; i++)
- set_data_seg(dseg + i, wr->sg_list + i);
-
- ctrl->flag |= cpu_to_le32(wr->num_sge <<
- HNS_ROCE_WQE_SGE_NUM_BIT);
- }
- ind++;
- }
- }
-
-out:
- /* Set DB return */
- if (likely(nreq)) {
- qp->sq.head += nreq;
- /* Memory barrier */
- wmb();
-
- sq_db.u32_4 = 0;
- sq_db.u32_8 = 0;
- roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M,
- SQ_DOORBELL_U32_4_SQ_HEAD_S,
- (qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)));
- roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SL_M,
- SQ_DOORBELL_U32_4_SL_S, qp->sl);
- roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_PORT_M,
- SQ_DOORBELL_U32_4_PORT_S, qp->phy_port);
- roce_set_field(sq_db.u32_8, SQ_DOORBELL_U32_8_QPN_M,
- SQ_DOORBELL_U32_8_QPN_S, qp->doorbell_qpn);
- roce_set_bit(sq_db.u32_8, SQ_DOORBELL_HW_SYNC_S, 1);
-
- doorbell[0] = sq_db.u32_4;
- doorbell[1] = sq_db.u32_8;
-
- hns_roce_write64_k(doorbell, qp->sq.db_reg_l);
- qp->sq_next_wqe = ind;
- }
-
- spin_unlock_irqrestore(&qp->sq.lock, flags);
-
- return ret;
-}
-
-int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
-{
- int ret = 0;
- int nreq = 0;
- int ind = 0;
- int i = 0;
- u32 reg_val = 0;
- unsigned long flags = 0;
- struct hns_roce_rq_wqe_ctrl *ctrl = NULL;
- struct hns_roce_wqe_data_seg *scat = NULL;
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_rq_db rq_db;
- uint32_t doorbell[2] = {0};
-
- spin_lock_irqsave(&hr_qp->rq.lock, flags);
- ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1);
-
- for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (hns_roce_wq_overflow(&hr_qp->rq, nreq,
- hr_qp->ibqp.recv_cq)) {
- ret = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
- dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n",
- wr->num_sge, hr_qp->rq.max_gs);
- ret = -EINVAL;
- *bad_wr = wr;
- goto out;
- }
-
- ctrl = get_recv_wqe(hr_qp, ind);
-
- roce_set_field(ctrl->rwqe_byte_12,
- RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M,
- RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S,
- wr->num_sge);
-
- scat = (struct hns_roce_wqe_data_seg *)(ctrl + 1);
-
- for (i = 0; i < wr->num_sge; i++)
- set_data_seg(scat + i, wr->sg_list + i);
-
- hr_qp->rq.wrid[ind] = wr->wr_id;
-
- ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1);
- }
-
-out:
- if (likely(nreq)) {
- hr_qp->rq.head += nreq;
- /* Memory barrier */
- wmb();
-
- if (ibqp->qp_type == IB_QPT_GSI) {
- /* SW update GSI rq header */
- reg_val = roce_read(to_hr_dev(ibqp->device),
- ROCEE_QP1C_CFG3_0_REG +
- QP1C_CFGN_OFFSET * hr_qp->phy_port);
- roce_set_field(reg_val,
- ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M,
- ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S,
- hr_qp->rq.head);
- roce_write(to_hr_dev(ibqp->device),
- ROCEE_QP1C_CFG3_0_REG +
- QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val);
- } else {
- rq_db.u32_4 = 0;
- rq_db.u32_8 = 0;
-
- roce_set_field(rq_db.u32_4, RQ_DOORBELL_U32_4_RQ_HEAD_M,
- RQ_DOORBELL_U32_4_RQ_HEAD_S,
- hr_qp->rq.head);
- roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_QPN_M,
- RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn);
- roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_CMD_M,
- RQ_DOORBELL_U32_8_CMD_S, 1);
- roce_set_bit(rq_db.u32_8, RQ_DOORBELL_U32_8_HW_SYNC_S,
- 1);
-
- doorbell[0] = rq_db.u32_4;
- doorbell[1] = rq_db.u32_8;
-
- hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l);
- }
- }
- spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
-
- return ret;
-}
-
-static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev,
- int sdb_mode, int odb_mode)
-{
- u32 val;
-
- val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode);
- roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode);
- roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
-}
-
-static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode,
- u32 odb_mode)
-{
- u32 val;
-
- /* Configure SDB/ODB extend mode */
- val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- roce_set_bit(val, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode);
- roce_set_bit(val, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode);
- roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
-}
-
-static void hns_roce_set_sdb(struct hns_roce_dev *hr_dev, u32 sdb_alept,
- u32 sdb_alful)
-{
- u32 val;
-
- /* Configure SDB */
- val = roce_read(hr_dev, ROCEE_DB_SQ_WL_REG);
- roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M,
- ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S, sdb_alful);
- roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M,
- ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S, sdb_alept);
- roce_write(hr_dev, ROCEE_DB_SQ_WL_REG, val);
-}
-
-static void hns_roce_set_odb(struct hns_roce_dev *hr_dev, u32 odb_alept,
- u32 odb_alful)
-{
- u32 val;
-
- /* Configure ODB */
- val = roce_read(hr_dev, ROCEE_DB_OTHERS_WL_REG);
- roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M,
- ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S, odb_alful);
- roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M,
- ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S, odb_alept);
- roce_write(hr_dev, ROCEE_DB_OTHERS_WL_REG, val);
-}
-
-static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept,
- u32 ext_sdb_alful)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_db_table *db;
- dma_addr_t sdb_dma_addr;
- u32 val;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- db = &priv->db_table;
-
- /* Configure extend SDB threshold */
- roce_write(hr_dev, ROCEE_EXT_DB_SQ_WL_EMPTY_REG, ext_sdb_alept);
- roce_write(hr_dev, ROCEE_EXT_DB_SQ_WL_REG, ext_sdb_alful);
-
- /* Configure extend SDB base addr */
- sdb_dma_addr = db->ext_db->sdb_buf_list->map;
- roce_write(hr_dev, ROCEE_EXT_DB_SQ_REG, (u32)(sdb_dma_addr >> 12));
-
- /* Configure extend SDB depth */
- val = roce_read(hr_dev, ROCEE_EXT_DB_SQ_H_REG);
- roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M,
- ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S,
- db->ext_db->esdb_dep);
- /*
- * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
- * using 4K page, and shift more 32 because of
- * caculating the high 32 bit value evaluated to hardware.
- */
- roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M,
- ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S, sdb_dma_addr >> 44);
- roce_write(hr_dev, ROCEE_EXT_DB_SQ_H_REG, val);
-
- dev_dbg(dev, "ext SDB depth: 0x%x\n", db->ext_db->esdb_dep);
- dev_dbg(dev, "ext SDB threshold: epmty: 0x%x, ful: 0x%x\n",
- ext_sdb_alept, ext_sdb_alful);
-}
-
-static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept,
- u32 ext_odb_alful)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_db_table *db;
- dma_addr_t odb_dma_addr;
- u32 val;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- db = &priv->db_table;
-
- /* Configure extend ODB threshold */
- roce_write(hr_dev, ROCEE_EXT_DB_OTHERS_WL_EMPTY_REG, ext_odb_alept);
- roce_write(hr_dev, ROCEE_EXT_DB_OTHERS_WL_REG, ext_odb_alful);
-
- /* Configure extend ODB base addr */
- odb_dma_addr = db->ext_db->odb_buf_list->map;
- roce_write(hr_dev, ROCEE_EXT_DB_OTH_REG, (u32)(odb_dma_addr >> 12));
-
- /* Configure extend ODB depth */
- val = roce_read(hr_dev, ROCEE_EXT_DB_OTH_H_REG);
- roce_set_field(val, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M,
- ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S,
- db->ext_db->eodb_dep);
- roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M,
- ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S,
- db->ext_db->eodb_dep);
- roce_write(hr_dev, ROCEE_EXT_DB_OTH_H_REG, val);
-
- dev_dbg(dev, "ext ODB depth: 0x%x\n", db->ext_db->eodb_dep);
- dev_dbg(dev, "ext ODB threshold: empty: 0x%x, ful: 0x%x\n",
- ext_odb_alept, ext_odb_alful);
-}
-
-static int hns_roce_db_ext_init(struct hns_roce_dev *hr_dev, u32 sdb_ext_mod,
- u32 odb_ext_mod)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_db_table *db;
- dma_addr_t sdb_dma_addr;
- dma_addr_t odb_dma_addr;
- int ret = 0;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- db = &priv->db_table;
-
- db->ext_db = kmalloc(sizeof(*db->ext_db), GFP_KERNEL);
- if (!db->ext_db)
- return -ENOMEM;
-
- if (sdb_ext_mod) {
- db->ext_db->sdb_buf_list = kmalloc(
- sizeof(*db->ext_db->sdb_buf_list), GFP_KERNEL);
- if (!db->ext_db->sdb_buf_list) {
- ret = -ENOMEM;
- goto ext_sdb_buf_fail_out;
- }
-
- db->ext_db->sdb_buf_list->buf = dma_alloc_coherent(dev,
- HNS_ROCE_V1_EXT_SDB_SIZE,
- &sdb_dma_addr, GFP_KERNEL);
- if (!db->ext_db->sdb_buf_list->buf) {
- ret = -ENOMEM;
- goto alloc_sq_db_buf_fail;
- }
- db->ext_db->sdb_buf_list->map = sdb_dma_addr;
-
- db->ext_db->esdb_dep = ilog2(HNS_ROCE_V1_EXT_SDB_DEPTH);
- hns_roce_set_sdb_ext(hr_dev, HNS_ROCE_V1_EXT_SDB_ALEPT,
- HNS_ROCE_V1_EXT_SDB_ALFUL);
- } else
- hns_roce_set_sdb(hr_dev, HNS_ROCE_V1_SDB_ALEPT,
- HNS_ROCE_V1_SDB_ALFUL);
-
- if (odb_ext_mod) {
- db->ext_db->odb_buf_list = kmalloc(
- sizeof(*db->ext_db->odb_buf_list), GFP_KERNEL);
- if (!db->ext_db->odb_buf_list) {
- ret = -ENOMEM;
- goto ext_odb_buf_fail_out;
- }
-
- db->ext_db->odb_buf_list->buf = dma_alloc_coherent(dev,
- HNS_ROCE_V1_EXT_ODB_SIZE,
- &odb_dma_addr, GFP_KERNEL);
- if (!db->ext_db->odb_buf_list->buf) {
- ret = -ENOMEM;
- goto alloc_otr_db_buf_fail;
- }
- db->ext_db->odb_buf_list->map = odb_dma_addr;
-
- db->ext_db->eodb_dep = ilog2(HNS_ROCE_V1_EXT_ODB_DEPTH);
- hns_roce_set_odb_ext(hr_dev, HNS_ROCE_V1_EXT_ODB_ALEPT,
- HNS_ROCE_V1_EXT_ODB_ALFUL);
- } else
- hns_roce_set_odb(hr_dev, HNS_ROCE_V1_ODB_ALEPT,
- HNS_ROCE_V1_ODB_ALFUL);
-
- hns_roce_set_db_ext_mode(hr_dev, sdb_ext_mod, odb_ext_mod);
-
- return 0;
-
-alloc_otr_db_buf_fail:
- kfree(db->ext_db->odb_buf_list);
-
-ext_odb_buf_fail_out:
- if (sdb_ext_mod) {
- dma_free_coherent(dev, HNS_ROCE_V1_EXT_SDB_SIZE,
- db->ext_db->sdb_buf_list->buf,
- db->ext_db->sdb_buf_list->map);
- }
-
-alloc_sq_db_buf_fail:
- if (sdb_ext_mod)
- kfree(db->ext_db->sdb_buf_list);
-
-ext_sdb_buf_fail_out:
- kfree(db->ext_db);
- return ret;
-}
-
-static struct hns_roce_qp *hns_roce_v1_create_lp_qp(struct hns_roce_dev *hr_dev,
- struct ib_pd *pd)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct ib_qp_init_attr init_attr;
- struct ib_qp *qp;
-
- memset(&init_attr, 0, sizeof(struct ib_qp_init_attr));
- init_attr.qp_type = IB_QPT_RC;
- init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
- init_attr.cap.max_recv_wr = HNS_ROCE_MIN_WQE_NUM;
- init_attr.cap.max_send_wr = HNS_ROCE_MIN_WQE_NUM;
-
- qp = hns_roce_create_qp(pd, &init_attr, NULL);
- if (IS_ERR(qp)) {
- dev_err(dev, "Create loop qp for mr free failed!");
- return NULL;
- }
-
- return to_hr_qp(qp);
-}
-
-static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_caps *caps = &hr_dev->caps;
- struct device *dev = &hr_dev->pdev->dev;
- struct ib_cq_init_attr cq_init_attr;
- struct hns_roce_free_mr *free_mr;
- struct ib_qp_attr attr = { 0 };
- struct hns_roce_v1_priv *priv;
- struct hns_roce_qp *hr_qp;
- struct ib_cq *cq;
- struct ib_pd *pd;
- u64 subnet_prefix;
- int attr_mask = 0;
- int i;
- int ret;
- u8 phy_port;
- u8 sl;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- free_mr = &priv->free_mr;
-
- /* Reserved cq for loop qp */
- cq_init_attr.cqe = HNS_ROCE_MIN_WQE_NUM * 2;
- cq_init_attr.comp_vector = 0;
- cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL);
- if (IS_ERR(cq)) {
- dev_err(dev, "Create cq for reseved loop qp failed!");
- return -ENOMEM;
- }
- free_mr->mr_free_cq = to_hr_cq(cq);
- free_mr->mr_free_cq->ib_cq.device = &hr_dev->ib_dev;
- free_mr->mr_free_cq->ib_cq.uobject = NULL;
- free_mr->mr_free_cq->ib_cq.comp_handler = NULL;
- free_mr->mr_free_cq->ib_cq.event_handler = NULL;
- free_mr->mr_free_cq->ib_cq.cq_context = NULL;
- atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0);
-
- pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL);
- if (IS_ERR(pd)) {
- dev_err(dev, "Create pd for reseved loop qp failed!");
- ret = -ENOMEM;
- goto alloc_pd_failed;
- }
- free_mr->mr_free_pd = to_hr_pd(pd);
- free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev;
- free_mr->mr_free_pd->ibpd.uobject = NULL;
- atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0);
-
- attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE;
- attr.pkey_index = 0;
- attr.min_rnr_timer = 0;
- /* Disable read ability */
- attr.max_dest_rd_atomic = 0;
- attr.max_rd_atomic = 0;
- /* Use arbitrary values as rq_psn and sq_psn */
- attr.rq_psn = 0x0808;
- attr.sq_psn = 0x0808;
- attr.retry_cnt = 7;
- attr.rnr_retry = 7;
- attr.timeout = 0x12;
- attr.path_mtu = IB_MTU_256;
- attr.ah_attr.ah_flags = 1;
- attr.ah_attr.static_rate = 3;
- attr.ah_attr.grh.sgid_index = 0;
- attr.ah_attr.grh.hop_limit = 1;
- attr.ah_attr.grh.flow_label = 0;
- attr.ah_attr.grh.traffic_class = 0;
-
- subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
- for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
- free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd);
- if (IS_ERR(free_mr->mr_free_qp[i])) {
- dev_err(dev, "Create loop qp failed!\n");
- goto create_lp_qp_failed;
- }
- hr_qp = free_mr->mr_free_qp[i];
-
- sl = i / caps->num_ports;
-
- if (caps->num_ports == HNS_ROCE_MAX_PORTS)
- phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) :
- (i % caps->num_ports);
- else
- phy_port = i % caps->num_ports;
-
- hr_qp->port = phy_port + 1;
- hr_qp->phy_port = phy_port;
- hr_qp->ibqp.qp_type = IB_QPT_RC;
- hr_qp->ibqp.device = &hr_dev->ib_dev;
- hr_qp->ibqp.uobject = NULL;
- atomic_set(&hr_qp->ibqp.usecnt, 0);
- hr_qp->ibqp.pd = pd;
- hr_qp->ibqp.recv_cq = cq;
- hr_qp->ibqp.send_cq = cq;
-
- attr.ah_attr.port_num = phy_port + 1;
- attr.ah_attr.sl = sl;
- attr.port_num = phy_port + 1;
-
- attr.dest_qp_num = hr_qp->qpn;
- memcpy(attr.ah_attr.dmac, hr_dev->dev_addr[phy_port],
- MAC_ADDR_OCTET_NUM);
-
- memcpy(attr.ah_attr.grh.dgid.raw,
- &subnet_prefix, sizeof(u64));
- memcpy(&attr.ah_attr.grh.dgid.raw[8],
- hr_dev->dev_addr[phy_port], 3);
- memcpy(&attr.ah_attr.grh.dgid.raw[13],
- hr_dev->dev_addr[phy_port] + 3, 3);
- attr.ah_attr.grh.dgid.raw[11] = 0xff;
- attr.ah_attr.grh.dgid.raw[12] = 0xfe;
- attr.ah_attr.grh.dgid.raw[8] ^= 2;
-
- attr_mask |= IB_QP_PORT;
-
- ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
- IB_QPS_RESET, IB_QPS_INIT);
- if (ret) {
- dev_err(dev, "modify qp failed(%d)!\n", ret);
- goto create_lp_qp_failed;
- }
-
- ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
- IB_QPS_INIT, IB_QPS_RTR);
- if (ret) {
- dev_err(dev, "modify qp failed(%d)!\n", ret);
- goto create_lp_qp_failed;
- }
-
- ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
- IB_QPS_RTR, IB_QPS_RTS);
- if (ret) {
- dev_err(dev, "modify qp failed(%d)!\n", ret);
- goto create_lp_qp_failed;
- }
- }
-
- return 0;
-
-create_lp_qp_failed:
- for (i -= 1; i >= 0; i--) {
- hr_qp = free_mr->mr_free_qp[i];
- if (hns_roce_v1_destroy_qp(&hr_qp->ibqp))
- dev_err(dev, "Destroy qp %d for mr free failed!\n", i);
- }
-
- if (hns_roce_dealloc_pd(pd))
- dev_err(dev, "Destroy pd for create_lp_qp failed!\n");
-
-alloc_pd_failed:
- if (hns_roce_ib_destroy_cq(cq))
- dev_err(dev, "Destroy cq for create_lp_qp failed!\n");
-
- return -EINVAL;
-}
-
-static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_free_mr *free_mr;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_qp *hr_qp;
- int ret;
- int i;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- free_mr = &priv->free_mr;
-
- for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
- hr_qp = free_mr->mr_free_qp[i];
- ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp);
- if (ret)
- dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n",
- i, ret);
- }
-
- ret = hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq);
- if (ret)
- dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret);
-
- ret = hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd);
- if (ret)
- dev_err(dev, "Destroy pd for mr_free failed(%d)!\n", ret);
-}
-
-static int hns_roce_db_init(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_db_table *db;
- u32 sdb_ext_mod;
- u32 odb_ext_mod;
- u32 sdb_evt_mod;
- u32 odb_evt_mod;
- int ret = 0;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- db = &priv->db_table;
-
- memset(db, 0, sizeof(*db));
-
- /* Default DB mode */
- sdb_ext_mod = HNS_ROCE_SDB_EXTEND_MODE;
- odb_ext_mod = HNS_ROCE_ODB_EXTEND_MODE;
- sdb_evt_mod = HNS_ROCE_SDB_NORMAL_MODE;
- odb_evt_mod = HNS_ROCE_ODB_POLL_MODE;
-
- db->sdb_ext_mod = sdb_ext_mod;
- db->odb_ext_mod = odb_ext_mod;
-
- /* Init extend DB */
- ret = hns_roce_db_ext_init(hr_dev, sdb_ext_mod, odb_ext_mod);
- if (ret) {
- dev_err(dev, "Failed in extend DB configuration.\n");
- return ret;
- }
-
- hns_roce_set_db_event_mode(hr_dev, sdb_evt_mod, odb_evt_mod);
-
- return 0;
-}
-
-void hns_roce_v1_recreate_lp_qp_work_fn(struct work_struct *work)
-{
- struct hns_roce_recreate_lp_qp_work *lp_qp_work;
- struct hns_roce_dev *hr_dev;
-
- lp_qp_work = container_of(work, struct hns_roce_recreate_lp_qp_work,
- work);
- hr_dev = to_hr_dev(lp_qp_work->ib_dev);
-
- hns_roce_v1_release_lp_qp(hr_dev);
-
- if (hns_roce_v1_rsv_lp_qp(hr_dev))
- dev_err(&hr_dev->pdev->dev, "create reserver qp failed\n");
-
- if (lp_qp_work->comp_flag)
- complete(lp_qp_work->comp);
-
- kfree(lp_qp_work);
-}
-
-static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_recreate_lp_qp_work *lp_qp_work;
- struct hns_roce_free_mr *free_mr;
- struct hns_roce_v1_priv *priv;
- struct completion comp;
- unsigned long end =
- msecs_to_jiffies(HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS) + jiffies;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- free_mr = &priv->free_mr;
-
- lp_qp_work = kzalloc(sizeof(struct hns_roce_recreate_lp_qp_work),
- GFP_KERNEL);
-
- INIT_WORK(&(lp_qp_work->work), hns_roce_v1_recreate_lp_qp_work_fn);
-
- lp_qp_work->ib_dev = &(hr_dev->ib_dev);
- lp_qp_work->comp = &comp;
- lp_qp_work->comp_flag = 1;
-
- init_completion(lp_qp_work->comp);
-
- queue_work(free_mr->free_mr_wq, &(lp_qp_work->work));
-
- while (time_before_eq(jiffies, end)) {
- if (try_wait_for_completion(&comp))
- return 0;
- msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE);
- }
-
- lp_qp_work->comp_flag = 0;
- if (try_wait_for_completion(&comp))
- return 0;
-
- dev_warn(dev, "recreate lp qp failed 20s timeout and return failed!\n");
- return -ETIMEDOUT;
-}
-
-static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
- struct device *dev = &hr_dev->pdev->dev;
- struct ib_send_wr send_wr, *bad_wr;
- int ret;
-
- memset(&send_wr, 0, sizeof(send_wr));
- send_wr.next = NULL;
- send_wr.num_sge = 0;
- send_wr.send_flags = 0;
- send_wr.sg_list = NULL;
- send_wr.wr_id = (unsigned long long)&send_wr;
- send_wr.opcode = IB_WR_RDMA_WRITE;
-
- ret = hns_roce_v1_post_send(&hr_qp->ibqp, &send_wr, &bad_wr);
- if (ret) {
- dev_err(dev, "Post write wqe for mr free failed(%d)!", ret);
- return ret;
- }
-
- return 0;
-}
-
-static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
-{
- struct hns_roce_mr_free_work *mr_work;
- struct ib_wc wc[HNS_ROCE_V1_RESV_QP];
- struct hns_roce_free_mr *free_mr;
- struct hns_roce_cq *mr_free_cq;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_dev *hr_dev;
- struct hns_roce_mr *hr_mr;
- struct hns_roce_qp *hr_qp;
- struct device *dev;
- unsigned long end =
- msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies;
- int i;
- int ret;
- int ne;
-
- mr_work = container_of(work, struct hns_roce_mr_free_work, work);
- hr_mr = (struct hns_roce_mr *)mr_work->mr;
- hr_dev = to_hr_dev(mr_work->ib_dev);
- dev = &hr_dev->pdev->dev;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- free_mr = &priv->free_mr;
- mr_free_cq = free_mr->mr_free_cq;
-
- for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
- hr_qp = free_mr->mr_free_qp[i];
- ret = hns_roce_v1_send_lp_wqe(hr_qp);
- if (ret) {
- dev_err(dev,
- "Send wqe (qp:0x%lx) for mr free failed(%d)!\n",
- hr_qp->qpn, ret);
- goto free_work;
- }
- }
-
- ne = HNS_ROCE_V1_RESV_QP;
- do {
- ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc);
- if (ret < 0) {
- dev_err(dev,
- "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n",
- hr_qp->qpn, ret, hr_mr->key, ne);
- goto free_work;
- }
- ne -= ret;
- msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
- } while (ne && time_before_eq(jiffies, end));
-
- if (ne != 0)
- dev_err(dev,
- "Poll cqe for mr 0x%x free timeout! Remain %d cqe\n",
- hr_mr->key, ne);
-
-free_work:
- if (mr_work->comp_flag)
- complete(mr_work->comp);
- kfree(mr_work);
-}
-
-int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_mr_free_work *mr_work;
- struct hns_roce_free_mr *free_mr;
- struct hns_roce_v1_priv *priv;
- struct completion comp;
- unsigned long end =
- msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies;
- unsigned long start = jiffies;
- int npages;
- int ret = 0;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- free_mr = &priv->free_mr;
-
- if (mr->enabled) {
- if (hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key)
- & (hr_dev->caps.num_mtpts - 1)))
- dev_warn(dev, "HW2SW_MPT failed!\n");
- }
-
- mr_work = kzalloc(sizeof(*mr_work), GFP_KERNEL);
- if (!mr_work) {
- ret = -ENOMEM;
- goto free_mr;
- }
-
- INIT_WORK(&(mr_work->work), hns_roce_v1_mr_free_work_fn);
-
- mr_work->ib_dev = &(hr_dev->ib_dev);
- mr_work->comp = &comp;
- mr_work->comp_flag = 1;
- mr_work->mr = (void *)mr;
- init_completion(mr_work->comp);
-
- queue_work(free_mr->free_mr_wq, &(mr_work->work));
-
- while (time_before_eq(jiffies, end)) {
- if (try_wait_for_completion(&comp))
- goto free_mr;
- msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
- }
-
- mr_work->comp_flag = 0;
- if (try_wait_for_completion(&comp))
- goto free_mr;
-
- dev_warn(dev, "Free mr work 0x%x over 50s and failed!\n", mr->key);
- ret = -ETIMEDOUT;
-
-free_mr:
- dev_dbg(dev, "Free mr 0x%x use 0x%x us.\n",
- mr->key, jiffies_to_usecs(jiffies) - jiffies_to_usecs(start));
-
- if (mr->size != ~0ULL) {
- npages = ib_umem_page_count(mr->umem);
- dma_free_coherent(dev, npages * 8, mr->pbl_buf,
- mr->pbl_dma_addr);
- }
-
- hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
- key_to_hw_index(mr->key), 0);
-
- if (mr->umem)
- ib_umem_release(mr->umem);
-
- kfree(mr);
-
- return ret;
-}
-
-static void hns_roce_db_free(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_db_table *db;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- db = &priv->db_table;
-
- if (db->sdb_ext_mod) {
- dma_free_coherent(dev, HNS_ROCE_V1_EXT_SDB_SIZE,
- db->ext_db->sdb_buf_list->buf,
- db->ext_db->sdb_buf_list->map);
- kfree(db->ext_db->sdb_buf_list);
- }
-
- if (db->odb_ext_mod) {
- dma_free_coherent(dev, HNS_ROCE_V1_EXT_ODB_SIZE,
- db->ext_db->odb_buf_list->buf,
- db->ext_db->odb_buf_list->map);
- kfree(db->ext_db->odb_buf_list);
- }
-
- kfree(db->ext_db);
-}
-
-static int hns_roce_raq_init(struct hns_roce_dev *hr_dev)
-{
- int ret;
- int raq_shift = 0;
- dma_addr_t addr;
- u32 val;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_raq_table *raq;
- struct device *dev = &hr_dev->pdev->dev;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- raq = &priv->raq_table;
-
- raq->e_raq_buf = kzalloc(sizeof(*(raq->e_raq_buf)), GFP_KERNEL);
- if (!raq->e_raq_buf)
- return -ENOMEM;
-
- raq->e_raq_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_RAQ_SIZE,
- &addr, GFP_KERNEL);
- if (!raq->e_raq_buf->buf) {
- ret = -ENOMEM;
- goto err_dma_alloc_raq;
- }
- raq->e_raq_buf->map = addr;
-
- /* Configure raq extended address. 48bit 4K align*/
- roce_write(hr_dev, ROCEE_EXT_RAQ_REG, raq->e_raq_buf->map >> 12);
-
- /* Configure raq_shift */
- raq_shift = ilog2(HNS_ROCE_V1_RAQ_SIZE / HNS_ROCE_V1_RAQ_ENTRY);
- val = roce_read(hr_dev, ROCEE_EXT_RAQ_H_REG);
- roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M,
- ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S, raq_shift);
- /*
- * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
- * using 4K page, and shift more 32 because of
- * caculating the high 32 bit value evaluated to hardware.
- */
- roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M,
- ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S,
- raq->e_raq_buf->map >> 44);
- roce_write(hr_dev, ROCEE_EXT_RAQ_H_REG, val);
- dev_dbg(dev, "Configure raq_shift 0x%x.\n", val);
-
- /* Configure raq threshold */
- val = roce_read(hr_dev, ROCEE_RAQ_WL_REG);
- roce_set_field(val, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M,
- ROCEE_RAQ_WL_ROCEE_RAQ_WL_S,
- HNS_ROCE_V1_EXT_RAQ_WF);
- roce_write(hr_dev, ROCEE_RAQ_WL_REG, val);
- dev_dbg(dev, "Configure raq_wl 0x%x.\n", val);
-
- /* Enable extend raq */
- val = roce_read(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG);
- roce_set_field(val,
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M,
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S,
- POL_TIME_INTERVAL_VAL);
- roce_set_bit(val, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1);
- roce_set_field(val,
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M,
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S,
- 2);
- roce_set_bit(val,
- ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S, 1);
- roce_write(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG, val);
- dev_dbg(dev, "Configure WrmsPolTimeInterval 0x%x.\n", val);
-
- /* Enable raq drop */
- val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- roce_set_bit(val, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1);
- roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
- dev_dbg(dev, "Configure GlbCfg = 0x%x.\n", val);
-
- return 0;
-
-err_dma_alloc_raq:
- kfree(raq->e_raq_buf);
- return ret;
-}
-
-static void hns_roce_raq_free(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_raq_table *raq;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- raq = &priv->raq_table;
-
- dma_free_coherent(dev, HNS_ROCE_V1_RAQ_SIZE, raq->e_raq_buf->buf,
- raq->e_raq_buf->map);
- kfree(raq->e_raq_buf);
-}
-
-static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag)
-{
- u32 val;
-
- if (enable_flag) {
- val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- /* Open all ports */
- roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
- ROCEE_GLB_CFG_ROCEE_PORT_ST_S,
- ALL_PORT_VAL_OPEN);
- roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
- } else {
- val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- /* Close all ports */
- roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
- ROCEE_GLB_CFG_ROCEE_PORT_ST_S, 0x0);
- roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
- }
-}
-
-static int hns_roce_bt_init(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- int ret;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
-
- priv->bt_table.qpc_buf.buf = dma_alloc_coherent(dev,
- HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.qpc_buf.map,
- GFP_KERNEL);
- if (!priv->bt_table.qpc_buf.buf)
- return -ENOMEM;
-
- priv->bt_table.mtpt_buf.buf = dma_alloc_coherent(dev,
- HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.mtpt_buf.map,
- GFP_KERNEL);
- if (!priv->bt_table.mtpt_buf.buf) {
- ret = -ENOMEM;
- goto err_failed_alloc_mtpt_buf;
- }
-
- priv->bt_table.cqc_buf.buf = dma_alloc_coherent(dev,
- HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.cqc_buf.map,
- GFP_KERNEL);
- if (!priv->bt_table.cqc_buf.buf) {
- ret = -ENOMEM;
- goto err_failed_alloc_cqc_buf;
- }
-
- return 0;
-
-err_failed_alloc_cqc_buf:
- dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
- priv->bt_table.mtpt_buf.buf, priv->bt_table.mtpt_buf.map);
-
-err_failed_alloc_mtpt_buf:
- dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
- priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map);
-
- return ret;
-}
-
-static void hns_roce_bt_free(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
-
- dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
- priv->bt_table.cqc_buf.buf, priv->bt_table.cqc_buf.map);
-
- dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
- priv->bt_table.mtpt_buf.buf, priv->bt_table.mtpt_buf.map);
-
- dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
- priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map);
-}
-
-static int hns_roce_tptr_init(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_buf_list *tptr_buf;
- struct hns_roce_v1_priv *priv;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- tptr_buf = &priv->tptr_table.tptr_buf;
-
- /*
- * This buffer will be used for CQ's tptr(tail pointer), also
- * named ci(customer index). Every CQ will use 2 bytes to save
- * cqe ci in hip06. Hardware will read this area to get new ci
- * when the queue is almost full.
- */
- tptr_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
- &tptr_buf->map, GFP_KERNEL);
- if (!tptr_buf->buf)
- return -ENOMEM;
-
- hr_dev->tptr_dma_addr = tptr_buf->map;
- hr_dev->tptr_size = HNS_ROCE_V1_TPTR_BUF_SIZE;
-
- return 0;
-}
-
-static void hns_roce_tptr_free(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_buf_list *tptr_buf;
- struct hns_roce_v1_priv *priv;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- tptr_buf = &priv->tptr_table.tptr_buf;
-
- dma_free_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
- tptr_buf->buf, tptr_buf->map);
-}
-
-static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_free_mr *free_mr;
- struct hns_roce_v1_priv *priv;
- int ret = 0;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- free_mr = &priv->free_mr;
-
- free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr");
- if (!free_mr->free_mr_wq) {
- dev_err(dev, "Create free mr workqueue failed!\n");
- return -ENOMEM;
- }
-
- ret = hns_roce_v1_rsv_lp_qp(hr_dev);
- if (ret) {
- dev_err(dev, "Reserved loop qp failed(%d)!\n", ret);
- flush_workqueue(free_mr->free_mr_wq);
- destroy_workqueue(free_mr->free_mr_wq);
- }
-
- return ret;
-}
-
-static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_free_mr *free_mr;
- struct hns_roce_v1_priv *priv;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- free_mr = &priv->free_mr;
-
- flush_workqueue(free_mr->free_mr_wq);
- destroy_workqueue(free_mr->free_mr_wq);
-
- hns_roce_v1_release_lp_qp(hr_dev);
-}
-
-/**
- * hns_roce_v1_reset - reset RoCE
- * @hr_dev: RoCE device struct pointer
- * @enable: true -- drop reset, false -- reset
- * return 0 - success , negative --fail
- */
-int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset)
-{
- struct device_node *dsaf_node;
- struct device *dev = &hr_dev->pdev->dev;
- struct device_node *np = dev->of_node;
- struct fwnode_handle *fwnode;
- int ret;
-
- /* check if this is DT/ACPI case */
- if (dev_of_node(dev)) {
- dsaf_node = of_parse_phandle(np, "dsaf-handle", 0);
- if (!dsaf_node) {
- dev_err(dev, "could not find dsaf-handle\n");
- return -EINVAL;
- }
- fwnode = &dsaf_node->fwnode;
- } else if (is_acpi_device_node(dev->fwnode)) {
- struct acpi_reference_args args;
-
- ret = acpi_node_get_property_reference(dev->fwnode,
- "dsaf-handle", 0, &args);
- if (ret) {
- dev_err(dev, "could not find dsaf-handle\n");
- return ret;
- }
- fwnode = acpi_fwnode_handle(args.adev);
- } else {
- dev_err(dev, "cannot read data from DT or ACPI\n");
- return -ENXIO;
- }
-
- ret = hns_dsaf_roce_reset(fwnode, false);
- if (ret)
- return ret;
-
- if (dereset) {
- msleep(SLEEP_TIME_INTERVAL);
- ret = hns_dsaf_roce_reset(fwnode, true);
- }
-
- return ret;
-}
-
-static int hns_roce_des_qp_init(struct hns_roce_dev *hr_dev)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_des_qp *des_qp;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- des_qp = &priv->des_qp;
-
- des_qp->requeue_flag = 1;
- des_qp->qp_wq = create_singlethread_workqueue("hns_roce_destroy_qp");
- if (!des_qp->qp_wq) {
- dev_err(dev, "Create destroy qp workqueue failed!\n");
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static void hns_roce_des_qp_free(struct hns_roce_dev *hr_dev)
-{
- struct hns_roce_v1_priv *priv;
- struct hns_roce_des_qp *des_qp;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- des_qp = &priv->des_qp;
-
- des_qp->requeue_flag = 0;
- flush_workqueue(des_qp->qp_wq);
- destroy_workqueue(des_qp->qp_wq);
-}
-
-void hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
-{
- int i = 0;
- struct hns_roce_caps *caps = &hr_dev->caps;
-
- hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG));
- hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev,
- ROCEE_VENDOR_PART_ID_REG));
- hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev,
- ROCEE_SYS_IMAGE_GUID_L_REG)) |
- ((u64)le32_to_cpu(roce_read(hr_dev,
- ROCEE_SYS_IMAGE_GUID_H_REG)) << 32);
- hr_dev->hw_rev = HNS_ROCE_HW_VER1;
-
- caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM;
- caps->max_wqes = HNS_ROCE_V1_MAX_WQE_NUM;
- caps->num_cqs = HNS_ROCE_V1_MAX_CQ_NUM;
- caps->max_cqes = HNS_ROCE_V1_MAX_CQE_NUM;
- caps->max_sq_sg = HNS_ROCE_V1_SG_NUM;
- caps->max_rq_sg = HNS_ROCE_V1_SG_NUM;
- caps->max_sq_inline = HNS_ROCE_V1_INLINE_SIZE;
- caps->num_uars = HNS_ROCE_V1_UAR_NUM;
- caps->phy_num_uars = HNS_ROCE_V1_PHY_UAR_NUM;
- caps->num_aeq_vectors = HNS_ROCE_AEQE_VEC_NUM;
- caps->num_comp_vectors = HNS_ROCE_COMP_VEC_NUM;
- caps->num_other_vectors = HNS_ROCE_AEQE_OF_VEC_NUM;
- caps->num_mtpts = HNS_ROCE_V1_MAX_MTPT_NUM;
- caps->num_mtt_segs = HNS_ROCE_V1_MAX_MTT_SEGS;
- caps->num_pds = HNS_ROCE_V1_MAX_PD_NUM;
- caps->max_qp_init_rdma = HNS_ROCE_V1_MAX_QP_INIT_RDMA;
- caps->max_qp_dest_rdma = HNS_ROCE_V1_MAX_QP_DEST_RDMA;
- caps->max_sq_desc_sz = HNS_ROCE_V1_MAX_SQ_DESC_SZ;
- caps->max_rq_desc_sz = HNS_ROCE_V1_MAX_RQ_DESC_SZ;
- caps->qpc_entry_sz = HNS_ROCE_V1_QPC_ENTRY_SIZE;
- caps->irrl_entry_sz = HNS_ROCE_V1_IRRL_ENTRY_SIZE;
- caps->cqc_entry_sz = HNS_ROCE_V1_CQC_ENTRY_SIZE;
- caps->mtpt_entry_sz = HNS_ROCE_V1_MTPT_ENTRY_SIZE;
- caps->mtt_entry_sz = HNS_ROCE_V1_MTT_ENTRY_SIZE;
- caps->cq_entry_sz = HNS_ROCE_V1_CQE_ENTRY_SIZE;
- caps->page_size_cap = HNS_ROCE_V1_PAGE_SIZE_SUPPORT;
- caps->reserved_lkey = 0;
- caps->reserved_pds = 0;
- caps->reserved_mrws = 1;
- caps->reserved_uars = 0;
- caps->reserved_cqs = 0;
-
- for (i = 0; i < caps->num_ports; i++)
- caps->pkey_table_len[i] = 1;
-
- for (i = 0; i < caps->num_ports; i++) {
- /* Six ports shared 16 GID in v1 engine */
- if (i >= (HNS_ROCE_V1_GID_NUM % caps->num_ports))
- caps->gid_table_len[i] = HNS_ROCE_V1_GID_NUM /
- caps->num_ports;
- else
- caps->gid_table_len[i] = HNS_ROCE_V1_GID_NUM /
- caps->num_ports + 1;
- }
-
- for (i = 0; i < caps->num_comp_vectors; i++)
- caps->ceqe_depth[i] = HNS_ROCE_V1_NUM_COMP_EQE;
-
- caps->aeqe_depth = HNS_ROCE_V1_NUM_ASYNC_EQE;
- caps->local_ca_ack_delay = le32_to_cpu(roce_read(hr_dev,
- ROCEE_ACK_DELAY_REG));
- caps->max_mtu = IB_MTU_2048;
-}
-
-int hns_roce_v1_init(struct hns_roce_dev *hr_dev)
-{
- int ret;
- u32 val;
- struct device *dev = &hr_dev->pdev->dev;
-
- /* DMAE user config */
- val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG1_REG);
- roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M,
- ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S, 0xf);
- roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M,
- ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S,
- 1 << PAGES_SHIFT_16);
- roce_write(hr_dev, ROCEE_DMAE_USER_CFG1_REG, val);
-
- val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG2_REG);
- roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M,
- ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S, 0xf);
- roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M,
- ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S,
- 1 << PAGES_SHIFT_16);
-
- ret = hns_roce_db_init(hr_dev);
- if (ret) {
- dev_err(dev, "doorbell init failed!\n");
- return ret;
- }
-
- ret = hns_roce_raq_init(hr_dev);
- if (ret) {
- dev_err(dev, "raq init failed!\n");
- goto error_failed_raq_init;
- }
-
- ret = hns_roce_bt_init(hr_dev);
- if (ret) {
- dev_err(dev, "bt init failed!\n");
- goto error_failed_bt_init;
- }
-
- ret = hns_roce_tptr_init(hr_dev);
- if (ret) {
- dev_err(dev, "tptr init failed!\n");
- goto error_failed_tptr_init;
- }
-
- ret = hns_roce_des_qp_init(hr_dev);
- if (ret) {
- dev_err(dev, "des qp init failed!\n");
- goto error_failed_des_qp_init;
- }
-
- ret = hns_roce_free_mr_init(hr_dev);
- if (ret) {
- dev_err(dev, "free mr init failed!\n");
- goto error_failed_free_mr_init;
- }
-
- hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP);
-
- return 0;
-
-error_failed_free_mr_init:
- hns_roce_des_qp_free(hr_dev);
-
-error_failed_des_qp_init:
- hns_roce_tptr_free(hr_dev);
-
-error_failed_tptr_init:
- hns_roce_bt_free(hr_dev);
-
-error_failed_bt_init:
- hns_roce_raq_free(hr_dev);
-
-error_failed_raq_init:
- hns_roce_db_free(hr_dev);
- return ret;
-}
-
-void hns_roce_v1_exit(struct hns_roce_dev *hr_dev)
-{
- hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN);
- hns_roce_free_mr_free(hr_dev);
- hns_roce_des_qp_free(hr_dev);
- hns_roce_tptr_free(hr_dev);
- hns_roce_bt_free(hr_dev);
- hns_roce_raq_free(hr_dev);
- hns_roce_db_free(hr_dev);
-}
-
-void hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, int gid_index,
- union ib_gid *gid)
-{
- u32 *p = NULL;
- u8 gid_idx = 0;
-
- gid_idx = hns_get_gid_index(hr_dev, port, gid_index);
-
- p = (u32 *)&gid->raw[0];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_L_0_REG +
- (HNS_ROCE_V1_GID_NUM * gid_idx));
-
- p = (u32 *)&gid->raw[4];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_ML_0_REG +
- (HNS_ROCE_V1_GID_NUM * gid_idx));
-
- p = (u32 *)&gid->raw[8];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_MH_0_REG +
- (HNS_ROCE_V1_GID_NUM * gid_idx));
-
- p = (u32 *)&gid->raw[0xc];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_H_0_REG +
- (HNS_ROCE_V1_GID_NUM * gid_idx));
-}
-
-void hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr)
-{
- u32 reg_smac_l;
- u16 reg_smac_h;
- u16 *p_h;
- u32 *p;
- u32 val;
-
- /*
- * When mac changed, loopback may fail
- * because of smac not equal to dmac.
- * We Need to release and create reserved qp again.
- */
- if (hr_dev->hw->dereg_mr && hns_roce_v1_recreate_lp_qp(hr_dev))
- dev_warn(&hr_dev->pdev->dev, "recreate lp qp timeout!\n");
-
- p = (u32 *)(&addr[0]);
- reg_smac_l = *p;
- roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_SMAC_L_0_REG +
- PHY_PORT_OFFSET * phy_port);
-
- val = roce_read(hr_dev,
- ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET);
- p_h = (u16 *)(&addr[4]);
- reg_smac_h = *p_h;
- roce_set_field(val, ROCEE_SMAC_H_ROCEE_SMAC_H_M,
- ROCEE_SMAC_H_ROCEE_SMAC_H_S, reg_smac_h);
- roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET,
- val);
-}
-
-void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port,
- enum ib_mtu mtu)
-{
- u32 val;
-
- val = roce_read(hr_dev,
- ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET);
- roce_set_field(val, ROCEE_SMAC_H_ROCEE_PORT_MTU_M,
- ROCEE_SMAC_H_ROCEE_PORT_MTU_S, mtu);
- roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET,
- val);
-}
-
-int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
- unsigned long mtpt_idx)
-{
- struct hns_roce_v1_mpt_entry *mpt_entry;
- struct scatterlist *sg;
- u64 *pages;
- int entry;
- int i;
-
- /* MPT filled into mailbox buf */
- mpt_entry = (struct hns_roce_v1_mpt_entry *)mb_buf;
- memset(mpt_entry, 0, sizeof(*mpt_entry));
-
- roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_KEY_STATE_M,
- MPT_BYTE_4_KEY_STATE_S, KEY_VALID);
- roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_KEY_M,
- MPT_BYTE_4_KEY_S, mr->key);
- roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_PAGE_SIZE_M,
- MPT_BYTE_4_PAGE_SIZE_S, MR_SIZE_4K);
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_MW_TYPE_S, 0);
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_MW_BIND_ENABLE_S,
- (mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_OWN_S, 0);
- roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_MEMORY_LOCATION_TYPE_M,
- MPT_BYTE_4_MEMORY_LOCATION_TYPE_S, mr->type);
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_ATOMIC_S, 0);
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_LOCAL_WRITE_S,
- (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0));
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_WRITE_S,
- (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0));
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_READ_S,
- (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_INVAL_ENABLE_S,
- 0);
- roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_ADDRESS_TYPE_S, 0);
-
- roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M,
- MPT_BYTE_12_PBL_ADDR_H_S, 0);
- roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_MW_BIND_COUNTER_M,
- MPT_BYTE_12_MW_BIND_COUNTER_S, 0);
-
- mpt_entry->virt_addr_l = (u32)mr->iova;
- mpt_entry->virt_addr_h = (u32)(mr->iova >> 32);
- mpt_entry->length = (u32)mr->size;
-
- roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_PD_M,
- MPT_BYTE_28_PD_S, mr->pd);
- roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_L_KEY_IDX_L_M,
- MPT_BYTE_28_L_KEY_IDX_L_S, mtpt_idx);
- roce_set_field(mpt_entry->mpt_byte_64, MPT_BYTE_64_L_KEY_IDX_H_M,
- MPT_BYTE_64_L_KEY_IDX_H_S, mtpt_idx >> MTPT_IDX_SHIFT);
-
- /* DMA momery regsiter */
- if (mr->type == MR_TYPE_DMA)
- return 0;
-
- pages = (u64 *) __get_free_page(GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
-
- i = 0;
- for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
- pages[i] = ((u64)sg_dma_address(sg)) >> 12;
-
- /* Directly record to MTPT table firstly 7 entry */
- if (i >= HNS_ROCE_MAX_INNER_MTPT_NUM)
- break;
- i++;
- }
-
- /* Register user mr */
- for (i = 0; i < HNS_ROCE_MAX_INNER_MTPT_NUM; i++) {
- switch (i) {
- case 0:
- mpt_entry->pa0_l = cpu_to_le32((u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_36,
- MPT_BYTE_36_PA0_H_M,
- MPT_BYTE_36_PA0_H_S,
- cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32)));
- break;
- case 1:
- roce_set_field(mpt_entry->mpt_byte_36,
- MPT_BYTE_36_PA1_L_M,
- MPT_BYTE_36_PA1_L_S,
- cpu_to_le32((u32)(pages[i])));
- roce_set_field(mpt_entry->mpt_byte_40,
- MPT_BYTE_40_PA1_H_M,
- MPT_BYTE_40_PA1_H_S,
- cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24)));
- break;
- case 2:
- roce_set_field(mpt_entry->mpt_byte_40,
- MPT_BYTE_40_PA2_L_M,
- MPT_BYTE_40_PA2_L_S,
- cpu_to_le32((u32)(pages[i])));
- roce_set_field(mpt_entry->mpt_byte_44,
- MPT_BYTE_44_PA2_H_M,
- MPT_BYTE_44_PA2_H_S,
- cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16)));
- break;
- case 3:
- roce_set_field(mpt_entry->mpt_byte_44,
- MPT_BYTE_44_PA3_L_M,
- MPT_BYTE_44_PA3_L_S,
- cpu_to_le32((u32)(pages[i])));
- roce_set_field(mpt_entry->mpt_byte_48,
- MPT_BYTE_48_PA3_H_M,
- MPT_BYTE_48_PA3_H_S,
- cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_8)));
- break;
- case 4:
- mpt_entry->pa4_l = cpu_to_le32((u32)(pages[i]));
- roce_set_field(mpt_entry->mpt_byte_56,
- MPT_BYTE_56_PA4_H_M,
- MPT_BYTE_56_PA4_H_S,
- cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32)));
- break;
- case 5:
- roce_set_field(mpt_entry->mpt_byte_56,
- MPT_BYTE_56_PA5_L_M,
- MPT_BYTE_56_PA5_L_S,
- cpu_to_le32((u32)(pages[i])));
- roce_set_field(mpt_entry->mpt_byte_60,
- MPT_BYTE_60_PA5_H_M,
- MPT_BYTE_60_PA5_H_S,
- cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24)));
- break;
- case 6:
- roce_set_field(mpt_entry->mpt_byte_60,
- MPT_BYTE_60_PA6_L_M,
- MPT_BYTE_60_PA6_L_S,
- cpu_to_le32((u32)(pages[i])));
- roce_set_field(mpt_entry->mpt_byte_64,
- MPT_BYTE_64_PA6_H_M,
- MPT_BYTE_64_PA6_H_S,
- cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16)));
- break;
- default:
- break;
- }
- }
-
- free_page((unsigned long) pages);
-
- mpt_entry->pbl_addr_l = (u32)(mr->pbl_dma_addr);
-
- roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M,
- MPT_BYTE_12_PBL_ADDR_H_S,
- ((u32)(mr->pbl_dma_addr >> 32)));
-
- return 0;
-}
-
-static void *get_cqe(struct hns_roce_cq *hr_cq, int n)
-{
- return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
- n * HNS_ROCE_V1_CQE_ENTRY_SIZE);
-}
-
-static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n)
-{
- struct hns_roce_cqe *hr_cqe = get_cqe(hr_cq, n & hr_cq->ib_cq.cqe);
-
- /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */
- return (roce_get_bit(hr_cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S) ^
- !!(n & (hr_cq->ib_cq.cqe + 1))) ? hr_cqe : NULL;
-}
-
-static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq)
-{
- return get_sw_cqe(hr_cq, hr_cq->cons_index);
-}
-
-void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
-{
- u32 doorbell[2];
-
- doorbell[0] = cons_index & ((hr_cq->cq_depth << 1) - 1);
- roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S, 0);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S, hr_cq->cqn);
-
- hns_roce_write64_k(doorbell, hr_cq->cq_db_l);
-}
-
-static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
- struct hns_roce_srq *srq)
-{
- struct hns_roce_cqe *cqe, *dest;
- u32 prod_index;
- int nfreed = 0;
- u8 owner_bit;
-
- for (prod_index = hr_cq->cons_index; get_sw_cqe(hr_cq, prod_index);
- ++prod_index) {
- if (prod_index == hr_cq->cons_index + hr_cq->ib_cq.cqe)
- break;
- }
-
- /*
- * Now backwards through the CQ, removing CQ entries
- * that match our QP by overwriting them with next entries.
- */
- while ((int) --prod_index - (int) hr_cq->cons_index >= 0) {
- cqe = get_cqe(hr_cq, prod_index & hr_cq->ib_cq.cqe);
- if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
- CQE_BYTE_16_LOCAL_QPN_S) &
- HNS_ROCE_CQE_QPN_MASK) == qpn) {
- /* In v1 engine, not support SRQ */
- ++nfreed;
- } else if (nfreed) {
- dest = get_cqe(hr_cq, (prod_index + nfreed) &
- hr_cq->ib_cq.cqe);
- owner_bit = roce_get_bit(dest->cqe_byte_4,
- CQE_BYTE_4_OWNER_S);
- memcpy(dest, cqe, sizeof(*cqe));
- roce_set_bit(dest->cqe_byte_4, CQE_BYTE_4_OWNER_S,
- owner_bit);
- }
- }
-
- if (nfreed) {
- hr_cq->cons_index += nfreed;
- /*
- * Make sure update of buffer contents is done before
- * updating consumer index.
- */
- wmb();
-
- hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
- }
-}
-
-static void hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
- struct hns_roce_srq *srq)
-{
- spin_lock_irq(&hr_cq->lock);
- __hns_roce_v1_cq_clean(hr_cq, qpn, srq);
- spin_unlock_irq(&hr_cq->lock);
-}
-
-void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
- dma_addr_t dma_handle, int nent, u32 vector)
-{
- struct hns_roce_cq_context *cq_context = NULL;
- struct hns_roce_buf_list *tptr_buf;
- struct hns_roce_v1_priv *priv;
- dma_addr_t tptr_dma_addr;
- int offset;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- tptr_buf = &priv->tptr_table.tptr_buf;
-
- cq_context = mb_buf;
- memset(cq_context, 0, sizeof(*cq_context));
-
- /* Get the tptr for this CQ. */
- offset = hr_cq->cqn * HNS_ROCE_V1_TPTR_ENTRY_SIZE;
- tptr_dma_addr = tptr_buf->map + offset;
- hr_cq->tptr_addr = (u16 *)(tptr_buf->buf + offset);
-
- /* Register cq_context members */
- roce_set_field(cq_context->cqc_byte_4,
- CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_M,
- CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S, CQ_STATE_VALID);
- roce_set_field(cq_context->cqc_byte_4, CQ_CONTEXT_CQC_BYTE_4_CQN_M,
- CQ_CONTEXT_CQC_BYTE_4_CQN_S, hr_cq->cqn);
- cq_context->cqc_byte_4 = cpu_to_le32(cq_context->cqc_byte_4);
-
- cq_context->cq_bt_l = (u32)dma_handle;
- cq_context->cq_bt_l = cpu_to_le32(cq_context->cq_bt_l);
-
- roce_set_field(cq_context->cqc_byte_12,
- CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M,
- CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S,
- ((u64)dma_handle >> 32));
- roce_set_field(cq_context->cqc_byte_12,
- CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M,
- CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S,
- ilog2((unsigned int)nent));
- roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M,
- CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector);
- cq_context->cqc_byte_12 = cpu_to_le32(cq_context->cqc_byte_12);
-
- cq_context->cur_cqe_ba0_l = (u32)(mtts[0]);
- cq_context->cur_cqe_ba0_l = cpu_to_le32(cq_context->cur_cqe_ba0_l);
-
- roce_set_field(cq_context->cqc_byte_20,
- CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M,
- CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S,
- cpu_to_le32((mtts[0]) >> 32));
- /* Dedicated hardware, directly set 0 */
- roce_set_field(cq_context->cqc_byte_20,
- CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M,
- CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S, 0);
- /**
- * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
- * using 4K page, and shift more 32 because of
- * caculating the high 32 bit value evaluated to hardware.
- */
- roce_set_field(cq_context->cqc_byte_20,
- CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M,
- CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S,
- tptr_dma_addr >> 44);
- cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20);
-
- cq_context->cqe_tptr_addr_l = (u32)(tptr_dma_addr >> 12);
-
- roce_set_field(cq_context->cqc_byte_32,
- CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M,
- CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S, 0);
- roce_set_bit(cq_context->cqc_byte_32,
- CQ_CONTEXT_CQC_BYTE_32_SE_FLAG_S, 0);
- roce_set_bit(cq_context->cqc_byte_32,
- CQ_CONTEXT_CQC_BYTE_32_CE_FLAG_S, 0);
- roce_set_bit(cq_context->cqc_byte_32,
- CQ_CONTEXT_CQC_BYTE_32_NOTIFICATION_FLAG_S, 0);
- roce_set_bit(cq_context->cqc_byte_32,
- CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S,
- 0);
- /* The initial value of cq's ci is 0 */
- roce_set_field(cq_context->cqc_byte_32,
- CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M,
- CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0);
- cq_context->cqc_byte_32 = cpu_to_le32(cq_context->cqc_byte_32);
-}
-
-int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
-{
- struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
- u32 notification_flag;
- u32 doorbell[2];
- int ret = 0;
-
- notification_flag = (flags & IB_CQ_SOLICITED_MASK) ==
- IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL;
- /*
- * flags = 0; Notification Flag = 1, next
- * flags = 1; Notification Flag = 0, solocited
- */
- doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1);
- roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S, 1);
- roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M,
- ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S,
- hr_cq->cqn | notification_flag);
-
- hns_roce_write64_k(doorbell, hr_cq->cq_db_l);
-
- return ret;
-}
-
-static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq,
- struct hns_roce_qp **cur_qp, struct ib_wc *wc)
-{
- int qpn;
- int is_send;
- u16 wqe_ctr;
- u32 status;
- u32 opcode;
- struct hns_roce_cqe *cqe;
- struct hns_roce_qp *hr_qp;
- struct hns_roce_wq *wq;
- struct hns_roce_wqe_ctrl_seg *sq_wqe;
- struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
- struct device *dev = &hr_dev->pdev->dev;
-
- /* Find cqe according consumer index */
- cqe = next_cqe_sw(hr_cq);
- if (!cqe)
- return -EAGAIN;
-
- ++hr_cq->cons_index;
- /* Memory barrier */
- rmb();
- /* 0->SQ, 1->RQ */
- is_send = !(roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_SQ_RQ_FLAG_S));
-
- /* Local_qpn in UD cqe is always 1, so it needs to compute new qpn */
- if (roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
- CQE_BYTE_16_LOCAL_QPN_S) <= 1) {
- qpn = roce_get_field(cqe->cqe_byte_20, CQE_BYTE_20_PORT_NUM_M,
- CQE_BYTE_20_PORT_NUM_S) +
- roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
- CQE_BYTE_16_LOCAL_QPN_S) *
- HNS_ROCE_MAX_PORTS;
- } else {
- qpn = roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
- CQE_BYTE_16_LOCAL_QPN_S);
- }
-
- if (!*cur_qp || (qpn & HNS_ROCE_CQE_QPN_MASK) != (*cur_qp)->qpn) {
- hr_qp = __hns_roce_qp_lookup(hr_dev, qpn);
- if (unlikely(!hr_qp)) {
- dev_err(dev, "CQ %06lx with entry for unknown QPN %06x\n",
- hr_cq->cqn, (qpn & HNS_ROCE_CQE_QPN_MASK));
- return -EINVAL;
- }
-
- *cur_qp = hr_qp;
- }
-
- wc->qp = &(*cur_qp)->ibqp;
- wc->vendor_err = 0;
-
- status = roce_get_field(cqe->cqe_byte_4,
- CQE_BYTE_4_STATUS_OF_THE_OPERATION_M,
- CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) &
- HNS_ROCE_CQE_STATUS_MASK;
- switch (status) {
- case HNS_ROCE_CQE_SUCCESS:
- wc->status = IB_WC_SUCCESS;
- break;
- case HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR:
- wc->status = IB_WC_LOC_LEN_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR:
- wc->status = IB_WC_LOC_QP_OP_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR:
- wc->status = IB_WC_LOC_PROT_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR:
- wc->status = IB_WC_WR_FLUSH_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR:
- wc->status = IB_WC_MW_BIND_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR:
- wc->status = IB_WC_BAD_RESP_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR:
- wc->status = IB_WC_LOC_ACCESS_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
- wc->status = IB_WC_REM_INV_REQ_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR:
- wc->status = IB_WC_REM_ACCESS_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR:
- wc->status = IB_WC_REM_OP_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
- wc->status = IB_WC_RETRY_EXC_ERR;
- break;
- case HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
- wc->status = IB_WC_RNR_RETRY_EXC_ERR;
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- break;
- }
-
- /* CQE status error, directly return */
- if (wc->status != IB_WC_SUCCESS)
- return 0;
-
- if (is_send) {
- /* SQ conrespond to CQE */
- sq_wqe = get_send_wqe(*cur_qp, roce_get_field(cqe->cqe_byte_4,
- CQE_BYTE_4_WQE_INDEX_M,
- CQE_BYTE_4_WQE_INDEX_S)&
- ((*cur_qp)->sq.wqe_cnt-1));
- switch (sq_wqe->flag & HNS_ROCE_WQE_OPCODE_MASK) {
- case HNS_ROCE_WQE_OPCODE_SEND:
- wc->opcode = IB_WC_SEND;
- break;
- case HNS_ROCE_WQE_OPCODE_RDMA_READ:
- wc->opcode = IB_WC_RDMA_READ;
- wc->byte_len = le32_to_cpu(cqe->byte_cnt);
- break;
- case HNS_ROCE_WQE_OPCODE_RDMA_WRITE:
- wc->opcode = IB_WC_RDMA_WRITE;
- break;
- case HNS_ROCE_WQE_OPCODE_LOCAL_INV:
- wc->opcode = IB_WC_LOCAL_INV;
- break;
- case HNS_ROCE_WQE_OPCODE_UD_SEND:
- wc->opcode = IB_WC_SEND;
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- break;
- }
- wc->wc_flags = (sq_wqe->flag & HNS_ROCE_WQE_IMM ?
- IB_WC_WITH_IMM : 0);
-
- wq = &(*cur_qp)->sq;
- if ((*cur_qp)->sq_signal_bits) {
- /*
- * If sg_signal_bit is 1,
- * firstly tail pointer updated to wqe
- * which current cqe correspond to
- */
- wqe_ctr = (u16)roce_get_field(cqe->cqe_byte_4,
- CQE_BYTE_4_WQE_INDEX_M,
- CQE_BYTE_4_WQE_INDEX_S);
- wq->tail += (wqe_ctr - (u16)wq->tail) &
- (wq->wqe_cnt - 1);
- }
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
- } else {
- /* RQ conrespond to CQE */
- wc->byte_len = le32_to_cpu(cqe->byte_cnt);
- opcode = roce_get_field(cqe->cqe_byte_4,
- CQE_BYTE_4_OPERATION_TYPE_M,
- CQE_BYTE_4_OPERATION_TYPE_S) &
- HNS_ROCE_CQE_OPCODE_MASK;
- switch (opcode) {
- case HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE:
- wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
- wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data = le32_to_cpu(cqe->immediate_data);
- break;
- case HNS_ROCE_OPCODE_SEND_DATA_RECEIVE:
- if (roce_get_bit(cqe->cqe_byte_4,
- CQE_BYTE_4_IMM_INDICATOR_S)) {
- wc->opcode = IB_WC_RECV;
- wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data = le32_to_cpu(
- cqe->immediate_data);
- } else {
- wc->opcode = IB_WC_RECV;
- wc->wc_flags = 0;
- }
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- break;
- }
-
- /* Update tail pointer, record wr_id */
- wq = &(*cur_qp)->rq;
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
- wc->sl = (u8)roce_get_field(cqe->cqe_byte_20, CQE_BYTE_20_SL_M,
- CQE_BYTE_20_SL_S);
- wc->src_qp = (u8)roce_get_field(cqe->cqe_byte_20,
- CQE_BYTE_20_REMOTE_QPN_M,
- CQE_BYTE_20_REMOTE_QPN_S);
- wc->wc_flags |= (roce_get_bit(cqe->cqe_byte_20,
- CQE_BYTE_20_GRH_PRESENT_S) ?
- IB_WC_GRH : 0);
- wc->pkey_index = (u16)roce_get_field(cqe->cqe_byte_28,
- CQE_BYTE_28_P_KEY_IDX_M,
- CQE_BYTE_28_P_KEY_IDX_S);
- }
-
- return 0;
-}
-
-int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
-{
- struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
- struct hns_roce_qp *cur_qp = NULL;
- unsigned long flags;
- int npolled;
- int ret = 0;
-
- spin_lock_irqsave(&hr_cq->lock, flags);
-
- for (npolled = 0; npolled < num_entries; ++npolled) {
- ret = hns_roce_v1_poll_one(hr_cq, &cur_qp, wc + npolled);
- if (ret)
- break;
- }
-
- if (npolled) {
- *hr_cq->tptr_addr = hr_cq->cons_index &
- ((hr_cq->cq_depth << 1) - 1);
-
- /* Memroy barrier */
- wmb();
- hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
- }
-
- spin_unlock_irqrestore(&hr_cq->lock, flags);
-
- if (ret == 0 || ret == -EAGAIN)
- return npolled;
- else
- return ret;
-}
-
-int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_table *table, int obj)
-{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_v1_priv *priv;
- unsigned long end = 0, flags = 0;
- uint32_t bt_cmd_val[2] = {0};
- void __iomem *bt_cmd;
- u64 bt_ba = 0;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
-
- switch (table->type) {
- case HEM_TYPE_QPC:
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC);
- bt_ba = priv->bt_table.qpc_buf.map >> 12;
- break;
- case HEM_TYPE_MTPT:
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_MTPT);
- bt_ba = priv->bt_table.mtpt_buf.map >> 12;
- break;
- case HEM_TYPE_CQC:
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC);
- bt_ba = priv->bt_table.cqc_buf.map >> 12;
- break;
- case HEM_TYPE_SRQC:
- dev_dbg(dev, "HEM_TYPE_SRQC not support.\n");
- return -EINVAL;
- default:
- return 0;
- }
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj);
- roce_set_bit(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
- roce_set_bit(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1);
-
- spin_lock_irqsave(&hr_dev->bt_cmd_lock, flags);
-
- bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
-
- end = msecs_to_jiffies(HW_SYNC_TIMEOUT_MSECS) + jiffies;
- while (1) {
- if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) {
- if (!(time_before(jiffies, end))) {
- dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n");
- spin_unlock_irqrestore(&hr_dev->bt_cmd_lock,
- flags);
- return -EBUSY;
- }
- } else {
- break;
- }
- msleep(HW_SYNC_SLEEP_TIME_INTERVAL);
- }
-
- bt_cmd_val[0] = (uint32_t)bt_ba;
- roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
- ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32);
- hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
-
- spin_unlock_irqrestore(&hr_dev->bt_cmd_lock, flags);
-
- return 0;
-}
-
-static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt,
- enum hns_roce_qp_state cur_state,
- enum hns_roce_qp_state new_state,
- struct hns_roce_qp_context *context,
- struct hns_roce_qp *hr_qp)
-{
- static const u16
- op[HNS_ROCE_QP_NUM_STATE][HNS_ROCE_QP_NUM_STATE] = {
- [HNS_ROCE_QP_STATE_RST] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- [HNS_ROCE_QP_STATE_INIT] = HNS_ROCE_CMD_RST2INIT_QP,
- },
- [HNS_ROCE_QP_STATE_INIT] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- /* Note: In v1 engine, HW doesn't support RST2INIT.
- * We use RST2INIT cmd instead of INIT2INIT.
- */
- [HNS_ROCE_QP_STATE_INIT] = HNS_ROCE_CMD_RST2INIT_QP,
- [HNS_ROCE_QP_STATE_RTR] = HNS_ROCE_CMD_INIT2RTR_QP,
- },
- [HNS_ROCE_QP_STATE_RTR] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_RTR2RTS_QP,
- },
- [HNS_ROCE_QP_STATE_RTS] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_RTS2RTS_QP,
- [HNS_ROCE_QP_STATE_SQD] = HNS_ROCE_CMD_RTS2SQD_QP,
- },
- [HNS_ROCE_QP_STATE_SQD] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_SQD2RTS_QP,
- [HNS_ROCE_QP_STATE_SQD] = HNS_ROCE_CMD_SQD2SQD_QP,
- },
- [HNS_ROCE_QP_STATE_ERR] = {
- [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP,
- [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP,
- }
- };
-
- struct hns_roce_cmd_mailbox *mailbox;
- struct device *dev = &hr_dev->pdev->dev;
- int ret = 0;
-
- if (cur_state >= HNS_ROCE_QP_NUM_STATE ||
- new_state >= HNS_ROCE_QP_NUM_STATE ||
- !op[cur_state][new_state]) {
- dev_err(dev, "[modify_qp]not support state %d to %d\n",
- cur_state, new_state);
- return -EINVAL;
- }
-
- if (op[cur_state][new_state] == HNS_ROCE_CMD_2RST_QP)
- return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
- HNS_ROCE_CMD_2RST_QP,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-
- if (op[cur_state][new_state] == HNS_ROCE_CMD_2ERR_QP)
- return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
- HNS_ROCE_CMD_2ERR_QP,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
-
- memcpy(mailbox->buf, context, sizeof(*context));
-
- ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0,
- op[cur_state][new_state],
- HNS_ROCE_CMD_TIMEOUT_MSECS);
-
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
- return ret;
-}
-
-static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
- int attr_mask, enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_sqp_context *context;
- struct device *dev = &hr_dev->pdev->dev;
- dma_addr_t dma_handle = 0;
- int rq_pa_start;
- u32 reg_val;
- u64 *mtts;
- u32 *addr;
-
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
-
- /* Search QP buf's MTTs */
- mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table,
- hr_qp->mtt.first_seg, &dma_handle);
- if (!mtts) {
- dev_err(dev, "qp buf pa find failed\n");
- goto out;
- }
-
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- roce_set_field(context->qp1c_bytes_4,
- QP1C_BYTES_4_SQ_WQE_SHIFT_M,
- QP1C_BYTES_4_SQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(context->qp1c_bytes_4,
- QP1C_BYTES_4_RQ_WQE_SHIFT_M,
- QP1C_BYTES_4_RQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
- roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M,
- QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn);
-
- context->sq_rq_bt_l = (u32)(dma_handle);
- roce_set_field(context->qp1c_bytes_12,
- QP1C_BYTES_12_SQ_RQ_BT_H_M,
- QP1C_BYTES_12_SQ_RQ_BT_H_S,
- ((u32)(dma_handle >> 32)));
-
- roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_HEAD_M,
- QP1C_BYTES_16_RQ_HEAD_S, hr_qp->rq.head);
- roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_PORT_NUM_M,
- QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port);
- roce_set_bit(context->qp1c_bytes_16,
- QP1C_BYTES_16_SIGNALING_TYPE_S,
- hr_qp->sq_signal_bits);
- roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S,
- 1);
- roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S,
- 1);
- roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_QP1_ERR_S,
- 0);
-
- roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_SQ_HEAD_M,
- QP1C_BYTES_20_SQ_HEAD_S, hr_qp->sq.head);
- roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_PKEY_IDX_M,
- QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index);
-
- rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE;
- context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]);
-
- roce_set_field(context->qp1c_bytes_28,
- QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M,
- QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S,
- (mtts[rq_pa_start]) >> 32);
- roce_set_field(context->qp1c_bytes_28,
- QP1C_BYTES_28_RQ_CUR_IDX_M,
- QP1C_BYTES_28_RQ_CUR_IDX_S, 0);
-
- roce_set_field(context->qp1c_bytes_32,
- QP1C_BYTES_32_RX_CQ_NUM_M,
- QP1C_BYTES_32_RX_CQ_NUM_S,
- to_hr_cq(ibqp->recv_cq)->cqn);
- roce_set_field(context->qp1c_bytes_32,
- QP1C_BYTES_32_TX_CQ_NUM_M,
- QP1C_BYTES_32_TX_CQ_NUM_S,
- to_hr_cq(ibqp->send_cq)->cqn);
-
- context->cur_sq_wqe_ba_l = (u32)mtts[0];
-
- roce_set_field(context->qp1c_bytes_40,
- QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M,
- QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S,
- (mtts[0]) >> 32);
- roce_set_field(context->qp1c_bytes_40,
- QP1C_BYTES_40_SQ_CUR_IDX_M,
- QP1C_BYTES_40_SQ_CUR_IDX_S, 0);
-
- /* Copy context to QP1C register */
- addr = (u32 *)(hr_dev->reg_base + ROCEE_QP1C_CFG0_0_REG +
- hr_qp->phy_port * sizeof(*context));
-
- writel(context->qp1c_bytes_4, addr);
- writel(context->sq_rq_bt_l, addr + 1);
- writel(context->qp1c_bytes_12, addr + 2);
- writel(context->qp1c_bytes_16, addr + 3);
- writel(context->qp1c_bytes_20, addr + 4);
- writel(context->cur_rq_wqe_ba_l, addr + 5);
- writel(context->qp1c_bytes_28, addr + 6);
- writel(context->qp1c_bytes_32, addr + 7);
- writel(context->cur_sq_wqe_ba_l, addr + 8);
- writel(context->qp1c_bytes_40, addr + 9);
- }
-
- /* Modify QP1C status */
- reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG0_0_REG +
- hr_qp->phy_port * sizeof(*context));
- roce_set_field(reg_val, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M,
- ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state);
- roce_write(hr_dev, ROCEE_QP1C_CFG0_0_REG +
- hr_qp->phy_port * sizeof(*context), reg_val);
-
- hr_qp->state = new_state;
- if (new_state == IB_QPS_RESET) {
- hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn,
- ibqp->srq ? to_hr_srq(ibqp->srq) : NULL);
- if (ibqp->send_cq != ibqp->recv_cq)
- hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq),
- hr_qp->qpn, NULL);
-
- hr_qp->rq.head = 0;
- hr_qp->rq.tail = 0;
- hr_qp->sq.head = 0;
- hr_qp->sq.tail = 0;
- hr_qp->sq_next_wqe = 0;
- }
-
- kfree(context);
- return 0;
-
-out:
- kfree(context);
- return -EINVAL;
-}
-
-static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
- int attr_mask, enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_qp_context *context;
- dma_addr_t dma_handle_2 = 0;
- dma_addr_t dma_handle = 0;
- uint32_t doorbell[2] = {0};
- int rq_pa_start = 0;
- u64 *mtts_2 = NULL;
- int ret = -EINVAL;
- u64 *mtts = NULL;
- int port;
- u8 *dmac;
- u8 *smac;
-
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
-
- /* Search qp buf's mtts */
- mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table,
- hr_qp->mtt.first_seg, &dma_handle);
- if (mtts == NULL) {
- dev_err(dev, "qp buf pa find failed\n");
- goto out;
- }
-
- /* Search IRRL's mtts */
- mtts_2 = hns_roce_table_find(&hr_dev->qp_table.irrl_table, hr_qp->qpn,
- &dma_handle_2);
- if (mtts_2 == NULL) {
- dev_err(dev, "qp irrl_table find failed\n");
- goto out;
- }
-
- /*
- * Reset to init
- * Mandatory param:
- * IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS
- * Optional param: NA
- */
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M,
- QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S,
- to_hr_qp_type(hr_qp->ibqp.qp_type));
-
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S, 0);
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S,
- !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ));
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S,
- !!(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
- );
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S,
- !!(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)
- );
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S, 1);
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M,
- QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M,
- QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_PD_M,
- QP_CONTEXT_QPC_BYTES_4_PD_S,
- to_hr_pd(ibqp->pd)->pdn);
- hr_qp->access_flags = attr->qp_access_flags;
- roce_set_field(context->qpc_bytes_8,
- QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M,
- QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S,
- to_hr_cq(ibqp->send_cq)->cqn);
- roce_set_field(context->qpc_bytes_8,
- QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M,
- QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S,
- to_hr_cq(ibqp->recv_cq)->cqn);
-
- if (ibqp->srq)
- roce_set_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M,
- QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S,
- to_hr_srq(ibqp->srq)->srqn);
-
- roce_set_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S,
- attr->pkey_index);
- hr_qp->pkey_index = attr->pkey_index;
- roce_set_field(context->qpc_bytes_16,
- QP_CONTEXT_QPC_BYTES_16_QP_NUM_M,
- QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn);
-
- } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M,
- QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S,
- to_hr_qp_type(hr_qp->ibqp.qp_type));
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S, 0);
- if (attr_mask & IB_QP_ACCESS_FLAGS) {
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S,
- !!(attr->qp_access_flags &
- IB_ACCESS_REMOTE_READ));
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S,
- !!(attr->qp_access_flags &
- IB_ACCESS_REMOTE_WRITE));
- } else {
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S,
- !!(hr_qp->access_flags &
- IB_ACCESS_REMOTE_READ));
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S,
- !!(hr_qp->access_flags &
- IB_ACCESS_REMOTE_WRITE));
- }
-
- roce_set_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S, 1);
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M,
- QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->sq.wqe_cnt));
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M,
- QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S,
- ilog2((unsigned int)hr_qp->rq.wqe_cnt));
- roce_set_field(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTES_4_PD_M,
- QP_CONTEXT_QPC_BYTES_4_PD_S,
- to_hr_pd(ibqp->pd)->pdn);
-
- roce_set_field(context->qpc_bytes_8,
- QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M,
- QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S,
- to_hr_cq(ibqp->send_cq)->cqn);
- roce_set_field(context->qpc_bytes_8,
- QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M,
- QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S,
- to_hr_cq(ibqp->recv_cq)->cqn);
-
- if (ibqp->srq)
- roce_set_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M,
- QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S,
- to_hr_srq(ibqp->srq)->srqn);
- if (attr_mask & IB_QP_PKEY_INDEX)
- roce_set_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S,
- attr->pkey_index);
- else
- roce_set_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S,
- hr_qp->pkey_index);
-
- roce_set_field(context->qpc_bytes_16,
- QP_CONTEXT_QPC_BYTES_16_QP_NUM_M,
- QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn);
- } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
- if ((attr_mask & IB_QP_ALT_PATH) ||
- (attr_mask & IB_QP_ACCESS_FLAGS) ||
- (attr_mask & IB_QP_PKEY_INDEX) ||
- (attr_mask & IB_QP_QKEY)) {
- dev_err(dev, "INIT2RTR attr_mask error\n");
- goto out;
- }
-
- dmac = (u8 *)attr->ah_attr.dmac;
-
- context->sq_rq_bt_l = (u32)(dma_handle);
- roce_set_field(context->qpc_bytes_24,
- QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M,
- QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S,
- ((u32)(dma_handle >> 32)));
- roce_set_bit(context->qpc_bytes_24,
- QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S,
- 1);
- roce_set_field(context->qpc_bytes_24,
- QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M,
- QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S,
- attr->min_rnr_timer);
- context->irrl_ba_l = (u32)(dma_handle_2);
- roce_set_field(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M,
- QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S,
- ((u32)(dma_handle_2 >> 32)) &
- QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M);
- roce_set_field(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTES_32_MIG_STATE_M,
- QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S, 0);
- roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_LOCAL_ENABLE_E2E_CREDITS_S,
- 1);
- roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
- hr_qp->sq_signal_bits);
-
- port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) :
- hr_qp->port;
- smac = (u8 *)hr_dev->dev_addr[port];
- /* when dmac equals smac or loop_idc is 1, it should loopback */
- if (ether_addr_equal_unaligned(dmac, smac) ||
- hr_dev->loop_idc == 0x1)
- roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
-
- roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S,
- attr->ah_attr.ah_flags);
- roce_set_field(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M,
- QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S,
- ilog2((unsigned int)attr->max_dest_rd_atomic));
-
- roce_set_field(context->qpc_bytes_36,
- QP_CONTEXT_QPC_BYTES_36_DEST_QP_M,
- QP_CONTEXT_QPC_BYTES_36_DEST_QP_S,
- attr->dest_qp_num);
-
- /* Configure GID index */
- roce_set_field(context->qpc_bytes_36,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S,
- hns_get_gid_index(hr_dev,
- attr->ah_attr.port_num - 1,
- attr->ah_attr.grh.sgid_index));
-
- memcpy(&(context->dmac_l), dmac, 4);
-
- roce_set_field(context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_DMAC_H_M,
- QP_CONTEXT_QPC_BYTES_44_DMAC_H_S,
- *((u16 *)(&dmac[4])));
- roce_set_field(context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M,
- QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S,
- attr->ah_attr.static_rate);
- roce_set_field(context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_M,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_S,
- attr->ah_attr.grh.hop_limit);
-
- roce_set_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S,
- attr->ah_attr.grh.flow_label);
- roce_set_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_M,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_S,
- attr->ah_attr.grh.traffic_class);
- roce_set_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_MTU_M,
- QP_CONTEXT_QPC_BYTES_48_MTU_S, attr->path_mtu);
-
- memcpy(context->dgid, attr->ah_attr.grh.dgid.raw,
- sizeof(attr->ah_attr.grh.dgid.raw));
-
- dev_dbg(dev, "dmac:%x :%lx\n", context->dmac_l,
- roce_get_field(context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_DMAC_H_M,
- QP_CONTEXT_QPC_BYTES_44_DMAC_H_S));
-
- roce_set_field(context->qpc_bytes_68,
- QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_M,
- QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S,
- hr_qp->rq.head);
- roce_set_field(context->qpc_bytes_68,
- QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M,
- QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0);
-
- rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE;
- context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]);
-
- roce_set_field(context->qpc_bytes_76,
- QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M,
- QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S,
- mtts[rq_pa_start] >> 32);
- roce_set_field(context->qpc_bytes_76,
- QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M,
- QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S, 0);
-
- context->rx_rnr_time = 0;
-
- roce_set_field(context->qpc_bytes_84,
- QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_M,
- QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S,
- attr->rq_psn - 1);
- roce_set_field(context->qpc_bytes_84,
- QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_M,
- QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S, 0);
-
- roce_set_field(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M,
- QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S,
- attr->rq_psn);
- roce_set_bit(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RX_REQ_PSN_ERR_FLAG_S, 0);
- roce_set_bit(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RX_LAST_OPCODE_FLG_S, 0);
- roce_set_field(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_M,
- QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S,
- 0);
- roce_set_field(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_M,
- QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S,
- 0);
-
- context->dma_length = 0;
- context->r_key = 0;
- context->va_l = 0;
- context->va_h = 0;
-
- roce_set_field(context->qpc_bytes_108,
- QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_M,
- QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S, 0);
- roce_set_bit(context->qpc_bytes_108,
- QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_FLG_S, 0);
- roce_set_bit(context->qpc_bytes_108,
- QP_CONTEXT_QPC_BYTES_108_TRRL_TDB_PSN_FLG_S, 0);
-
- roce_set_field(context->qpc_bytes_112,
- QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_M,
- QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S, 0);
- roce_set_field(context->qpc_bytes_112,
- QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_M,
- QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S, 0);
-
- /* For chip resp ack */
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S,
- hr_qp->phy_port);
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_SL_M,
- QP_CONTEXT_QPC_BYTES_156_SL_S, attr->ah_attr.sl);
- hr_qp->sl = attr->ah_attr.sl;
- } else if (cur_state == IB_QPS_RTR &&
- new_state == IB_QPS_RTS) {
- /* If exist optional param, return error */
- if ((attr_mask & IB_QP_ALT_PATH) ||
- (attr_mask & IB_QP_ACCESS_FLAGS) ||
- (attr_mask & IB_QP_QKEY) ||
- (attr_mask & IB_QP_PATH_MIG_STATE) ||
- (attr_mask & IB_QP_CUR_STATE) ||
- (attr_mask & IB_QP_MIN_RNR_TIMER)) {
- dev_err(dev, "RTR2RTS attr_mask error\n");
- goto out;
- }
-
- context->rx_cur_sq_wqe_ba_l = (u32)(mtts[0]);
-
- roce_set_field(context->qpc_bytes_120,
- QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M,
- QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S,
- (mtts[0]) >> 32);
-
- roce_set_field(context->qpc_bytes_124,
- QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M,
- QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S, 0);
- roce_set_field(context->qpc_bytes_124,
- QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_M,
- QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S, 0);
-
- roce_set_field(context->qpc_bytes_128,
- QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_M,
- QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S,
- attr->sq_psn);
- roce_set_bit(context->qpc_bytes_128,
- QP_CONTEXT_QPC_BYTES_128_RX_ACK_PSN_ERR_FLG_S, 0);
- roce_set_field(context->qpc_bytes_128,
- QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_M,
- QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S,
- 0);
- roce_set_bit(context->qpc_bytes_128,
- QP_CONTEXT_QPC_BYTES_128_IRRL_PSN_VLD_FLG_S, 0);
-
- roce_set_field(context->qpc_bytes_132,
- QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_M,
- QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S, 0);
- roce_set_field(context->qpc_bytes_132,
- QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_M,
- QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S, 0);
-
- roce_set_field(context->qpc_bytes_136,
- QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_M,
- QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S,
- attr->sq_psn);
- roce_set_field(context->qpc_bytes_136,
- QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_M,
- QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S,
- attr->sq_psn);
-
- roce_set_field(context->qpc_bytes_140,
- QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_M,
- QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S,
- (attr->sq_psn >> SQ_PSN_SHIFT));
- roce_set_field(context->qpc_bytes_140,
- QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_M,
- QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S, 0);
- roce_set_bit(context->qpc_bytes_140,
- QP_CONTEXT_QPC_BYTES_140_RNR_RETRY_FLG_S, 0);
-
- roce_set_field(context->qpc_bytes_148,
- QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_M,
- QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S, 0);
- roce_set_field(context->qpc_bytes_148,
- QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M,
- QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S,
- attr->retry_cnt);
- roce_set_field(context->qpc_bytes_148,
- QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_M,
- QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S,
- attr->rnr_retry);
- roce_set_field(context->qpc_bytes_148,
- QP_CONTEXT_QPC_BYTES_148_LSN_M,
- QP_CONTEXT_QPC_BYTES_148_LSN_S, 0x100);
-
- context->rnr_retry = 0;
-
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_M,
- QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S,
- attr->retry_cnt);
- if (attr->timeout < 0x12) {
- dev_info(dev, "ack timeout value(0x%x) must bigger than 0x12.\n",
- attr->timeout);
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S,
- 0x12);
- } else {
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S,
- attr->timeout);
- }
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_M,
- QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S,
- attr->rnr_retry);
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S,
- hr_qp->phy_port);
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_SL_M,
- QP_CONTEXT_QPC_BYTES_156_SL_S, attr->ah_attr.sl);
- hr_qp->sl = attr->ah_attr.sl;
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M,
- QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S,
- ilog2((unsigned int)attr->max_rd_atomic));
- roce_set_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_M,
- QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S, 0);
- context->pkt_use_len = 0;
-
- roce_set_field(context->qpc_bytes_164,
- QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M,
- QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S, attr->sq_psn);
- roce_set_field(context->qpc_bytes_164,
- QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_M,
- QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S, 0);
-
- roce_set_field(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_M,
- QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S,
- attr->sq_psn);
- roce_set_field(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_M,
- QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S, 0);
- roce_set_field(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_DB_TYPE_M,
- QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S, 0);
- roce_set_bit(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_MSG_LP_IND_S, 0);
- roce_set_bit(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_CSDB_LP_IND_S, 0);
- roce_set_bit(context->qpc_bytes_168,
- QP_CONTEXT_QPC_BYTES_168_QP_ERR_FLG_S, 0);
- context->sge_use_len = 0;
-
- roce_set_field(context->qpc_bytes_176,
- QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_M,
- QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S, 0);
- roce_set_field(context->qpc_bytes_176,
- QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_M,
- QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S,
- 0);
- roce_set_field(context->qpc_bytes_180,
- QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_M,
- QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S, 0);
- roce_set_field(context->qpc_bytes_180,
- QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M,
- QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0);
-
- context->tx_cur_sq_wqe_ba_l = (u32)(mtts[0]);
-
- roce_set_field(context->qpc_bytes_188,
- QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M,
- QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S,
- (mtts[0]) >> 32);
- roce_set_bit(context->qpc_bytes_188,
- QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S, 0);
- roce_set_field(context->qpc_bytes_188,
- QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M,
- QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S,
- 0);
- } else if (!((cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR))) {
- dev_err(dev, "not support this status migration\n");
- goto out;
- }
-
- /* Every status migrate must change state */
- roce_set_field(context->qpc_bytes_144,
- QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
- QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state);
-
- /* SW pass context to HW */
- ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt,
- to_hns_roce_state(cur_state),
- to_hns_roce_state(new_state), context,
- hr_qp);
- if (ret) {
- dev_err(dev, "hns_roce_qp_modify failed\n");
- goto out;
- }
-
- /*
- * Use rst2init to instead of init2init with drv,
- * need to hw to flash RQ HEAD by DB again
- */
- if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
- /* Memory barrier */
- wmb();
-
- roce_set_field(doorbell[0], RQ_DOORBELL_U32_4_RQ_HEAD_M,
- RQ_DOORBELL_U32_4_RQ_HEAD_S, hr_qp->rq.head);
- roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_QPN_M,
- RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn);
- roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_CMD_M,
- RQ_DOORBELL_U32_8_CMD_S, 1);
- roce_set_bit(doorbell[1], RQ_DOORBELL_U32_8_HW_SYNC_S, 1);
-
- if (ibqp->uobject) {
- hr_qp->rq.db_reg_l = hr_dev->reg_base +
- ROCEE_DB_OTHERS_L_0_REG +
- DB_REG_OFFSET * hr_dev->priv_uar.index;
- }
-
- hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l);
- }
-
- hr_qp->state = new_state;
-
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- hr_qp->resp_depth = attr->max_dest_rd_atomic;
- if (attr_mask & IB_QP_PORT) {
- hr_qp->port = attr->port_num - 1;
- hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
- }
-
- if (new_state == IB_QPS_RESET && !ibqp->uobject) {
- hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn,
- ibqp->srq ? to_hr_srq(ibqp->srq) : NULL);
- if (ibqp->send_cq != ibqp->recv_cq)
- hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq),
- hr_qp->qpn, NULL);
-
- hr_qp->rq.head = 0;
- hr_qp->rq.tail = 0;
- hr_qp->sq.head = 0;
- hr_qp->sq.tail = 0;
- hr_qp->sq_next_wqe = 0;
- }
-out:
- kfree(context);
- return ret;
-}
-
-int hns_roce_v1_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
- int attr_mask, enum ib_qp_state cur_state,
- enum ib_qp_state new_state)
-{
-
- if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
- return hns_roce_v1_m_sqp(ibqp, attr, attr_mask, cur_state,
- new_state);
- else
- return hns_roce_v1_m_qp(ibqp, attr, attr_mask, cur_state,
- new_state);
-}
-
-static enum ib_qp_state to_ib_qp_state(enum hns_roce_qp_state state)
-{
- switch (state) {
- case HNS_ROCE_QP_STATE_RST:
- return IB_QPS_RESET;
- case HNS_ROCE_QP_STATE_INIT:
- return IB_QPS_INIT;
- case HNS_ROCE_QP_STATE_RTR:
- return IB_QPS_RTR;
- case HNS_ROCE_QP_STATE_RTS:
- return IB_QPS_RTS;
- case HNS_ROCE_QP_STATE_SQD:
- return IB_QPS_SQD;
- case HNS_ROCE_QP_STATE_ERR:
- return IB_QPS_ERR;
- default:
- return IB_QPS_ERR;
- }
-}
-
-static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev,
- struct hns_roce_qp *hr_qp,
- struct hns_roce_qp_context *hr_context)
-{
- struct hns_roce_cmd_mailbox *mailbox;
- int ret;
-
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
-
- ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0,
- HNS_ROCE_CMD_QUERY_QP,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
- if (!ret)
- memcpy(hr_context, mailbox->buf, sizeof(*hr_context));
- else
- dev_err(&hr_dev->pdev->dev, "QUERY QP cmd process error\n");
-
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
-
- return ret;
-}
-
-static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
- int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_sqp_context context;
- u32 addr;
-
- mutex_lock(&hr_qp->mutex);
-
- if (hr_qp->state == IB_QPS_RESET) {
- qp_attr->qp_state = IB_QPS_RESET;
- goto done;
- }
-
- addr = ROCEE_QP1C_CFG0_0_REG +
- hr_qp->port * sizeof(struct hns_roce_sqp_context);
- context.qp1c_bytes_4 = roce_read(hr_dev, addr);
- context.sq_rq_bt_l = roce_read(hr_dev, addr + 1);
- context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2);
- context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3);
- context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4);
- context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5);
- context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6);
- context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7);
- context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8);
- context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9);
-
- hr_qp->state = roce_get_field(context.qp1c_bytes_4,
- QP1C_BYTES_4_QP_STATE_M,
- QP1C_BYTES_4_QP_STATE_S);
- qp_attr->qp_state = hr_qp->state;
- qp_attr->path_mtu = IB_MTU_256;
- qp_attr->path_mig_state = IB_MIG_ARMED;
- qp_attr->qkey = QKEY_VAL;
- qp_attr->rq_psn = 0;
- qp_attr->sq_psn = 0;
- qp_attr->dest_qp_num = 1;
- qp_attr->qp_access_flags = 6;
-
- qp_attr->pkey_index = roce_get_field(context.qp1c_bytes_20,
- QP1C_BYTES_20_PKEY_IDX_M,
- QP1C_BYTES_20_PKEY_IDX_S);
- qp_attr->port_num = hr_qp->port + 1;
- qp_attr->sq_draining = 0;
- qp_attr->max_rd_atomic = 0;
- qp_attr->max_dest_rd_atomic = 0;
- qp_attr->min_rnr_timer = 0;
- qp_attr->timeout = 0;
- qp_attr->retry_cnt = 0;
- qp_attr->rnr_retry = 0;
- qp_attr->alt_timeout = 0;
-
-done:
- qp_attr->cur_qp_state = qp_attr->qp_state;
- qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
- qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
- qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
- qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
- qp_attr->cap.max_inline_data = 0;
- qp_init_attr->cap = qp_attr->cap;
- qp_init_attr->create_flags = 0;
-
- mutex_unlock(&hr_qp->mutex);
-
- return 0;
-}
-
-static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
- int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_qp_context *context;
- int tmp_qp_state = 0;
- int ret = 0;
- int state;
-
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
-
- memset(qp_attr, 0, sizeof(*qp_attr));
- memset(qp_init_attr, 0, sizeof(*qp_init_attr));
-
- mutex_lock(&hr_qp->mutex);
-
- if (hr_qp->state == IB_QPS_RESET) {
- qp_attr->qp_state = IB_QPS_RESET;
- goto done;
- }
-
- ret = hns_roce_v1_query_qpc(hr_dev, hr_qp, context);
- if (ret) {
- dev_err(dev, "query qpc error\n");
- ret = -EINVAL;
- goto out;
- }
-
- state = roce_get_field(context->qpc_bytes_144,
- QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
- QP_CONTEXT_QPC_BYTES_144_QP_STATE_S);
- tmp_qp_state = (int)to_ib_qp_state((enum hns_roce_qp_state)state);
- if (tmp_qp_state == -1) {
- dev_err(dev, "to_ib_qp_state error\n");
- ret = -EINVAL;
- goto out;
- }
- hr_qp->state = (u8)tmp_qp_state;
- qp_attr->qp_state = (enum ib_qp_state)hr_qp->state;
- qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_MTU_M,
- QP_CONTEXT_QPC_BYTES_48_MTU_S);
- qp_attr->path_mig_state = IB_MIG_ARMED;
- if (hr_qp->ibqp.qp_type == IB_QPT_UD)
- qp_attr->qkey = QKEY_VAL;
-
- qp_attr->rq_psn = roce_get_field(context->qpc_bytes_88,
- QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M,
- QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S);
- qp_attr->sq_psn = (u32)roce_get_field(context->qpc_bytes_164,
- QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M,
- QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S);
- qp_attr->dest_qp_num = (u8)roce_get_field(context->qpc_bytes_36,
- QP_CONTEXT_QPC_BYTES_36_DEST_QP_M,
- QP_CONTEXT_QPC_BYTES_36_DEST_QP_S);
- qp_attr->qp_access_flags = ((roce_get_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S)) << 2) |
- ((roce_get_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S)) << 1) |
- ((roce_get_bit(context->qpc_bytes_4,
- QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S)) << 3);
-
- if (hr_qp->ibqp.qp_type == IB_QPT_RC ||
- hr_qp->ibqp.qp_type == IB_QPT_UC) {
- qp_attr->ah_attr.sl = roce_get_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_SL_M,
- QP_CONTEXT_QPC_BYTES_156_SL_S);
- qp_attr->ah_attr.grh.flow_label = roce_get_field(
- context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M,
- QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S);
- qp_attr->ah_attr.grh.sgid_index = roce_get_field(
- context->qpc_bytes_36,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M,
- QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S);
- qp_attr->ah_attr.grh.hop_limit = roce_get_field(
- context->qpc_bytes_44,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_M,
- QP_CONTEXT_QPC_BYTES_44_HOPLMT_S);
- qp_attr->ah_attr.grh.traffic_class = roce_get_field(
- context->qpc_bytes_48,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_M,
- QP_CONTEXT_QPC_BYTES_48_TCLASS_S);
-
- memcpy(qp_attr->ah_attr.grh.dgid.raw, context->dgid,
- sizeof(qp_attr->ah_attr.grh.dgid.raw));
- }
-
- qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
- QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S);
- qp_attr->port_num = hr_qp->port + 1;
- qp_attr->sq_draining = 0;
- qp_attr->max_rd_atomic = roce_get_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M,
- QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S);
- qp_attr->max_dest_rd_atomic = roce_get_field(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M,
- QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S);
- qp_attr->min_rnr_timer = (u8)(roce_get_field(context->qpc_bytes_24,
- QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M,
- QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S));
- qp_attr->timeout = (u8)(roce_get_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M,
- QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S));
- qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148,
- QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M,
- QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S);
- qp_attr->rnr_retry = context->rnr_retry;
-
-done:
- qp_attr->cur_qp_state = qp_attr->qp_state;
- qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
- qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
-
- if (!ibqp->uobject) {
- qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
- qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
- } else {
- qp_attr->cap.max_send_wr = 0;
- qp_attr->cap.max_send_sge = 0;
- }
-
- qp_init_attr->cap = qp_attr->cap;
-
-out:
- mutex_unlock(&hr_qp->mutex);
- kfree(context);
- return ret;
-}
-
-int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
- int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
-{
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
-
- return hr_qp->doorbell_qpn <= 1 ?
- hns_roce_v1_q_sqp(ibqp, qp_attr, qp_attr_mask, qp_init_attr) :
- hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr);
-}
-
-static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
- struct hns_roce_qp *hr_qp,
- u32 sdb_issue_ptr,
- u32 *sdb_inv_cnt,
- u32 *wait_stage)
-{
- struct device *dev = &hr_dev->pdev->dev;
- u32 sdb_retry_cnt, old_retry;
- u32 sdb_send_ptr, old_send;
- u32 success_flags = 0;
- u32 cur_cnt, old_cnt;
- unsigned long end;
- u32 send_ptr;
- u32 inv_cnt;
- u32 tsp_st;
-
- if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 ||
- *wait_stage < HNS_ROCE_V1_DB_STAGE1) {
- dev_err(dev, "QP(0x%lx) db status wait stage(%d) error!\n",
- hr_qp->qpn, *wait_stage);
- return -EINVAL;
- }
-
- /* Calculate the total timeout for the entire verification process */
- end = msecs_to_jiffies(HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS) + jiffies;
-
- if (*wait_stage == HNS_ROCE_V1_DB_STAGE1) {
- /* Query db process status, until hw process completely */
- sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG);
- while (roce_hw_index_cmp_lt(sdb_send_ptr, sdb_issue_ptr,
- ROCEE_SDB_PTR_CMP_BITS)) {
- if (!time_before(jiffies, end)) {
- dev_dbg(dev, "QP(0x%lx) db process stage1 timeout. issue 0x%x send 0x%x.\n",
- hr_qp->qpn, sdb_issue_ptr,
- sdb_send_ptr);
- return 0;
- }
-
- msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS);
- sdb_send_ptr = roce_read(hr_dev,
- ROCEE_SDB_SEND_PTR_REG);
- }
-
- if (roce_get_field(sdb_issue_ptr,
- ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M,
- ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) ==
- roce_get_field(sdb_send_ptr,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) {
- old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG);
- old_retry = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG);
-
- do {
- tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG);
- if (roce_get_bit(tsp_st,
- ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) {
- *wait_stage = HNS_ROCE_V1_DB_WAIT_OK;
- return 0;
- }
-
- if (!time_before(jiffies, end)) {
- dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n"
- "issue 0x%x send 0x%x.\n",
- hr_qp->qpn, sdb_issue_ptr,
- sdb_send_ptr);
- return 0;
- }
-
- msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS);
-
- sdb_send_ptr = roce_read(hr_dev,
- ROCEE_SDB_SEND_PTR_REG);
- sdb_retry_cnt = roce_read(hr_dev,
- ROCEE_SDB_RETRY_CNT_REG);
- cur_cnt = roce_get_field(sdb_send_ptr,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
- roce_get_field(sdb_retry_cnt,
- ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
- ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
- if (!roce_get_bit(tsp_st,
- ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) {
- old_cnt = roce_get_field(old_send,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
- roce_get_field(old_retry,
- ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
- ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
- if (cur_cnt - old_cnt > SDB_ST_CMP_VAL)
- success_flags = 1;
- } else {
- old_cnt = roce_get_field(old_send,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S);
- if (cur_cnt - old_cnt > SDB_ST_CMP_VAL)
- success_flags = 1;
- else {
- send_ptr = roce_get_field(old_send,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
- roce_get_field(sdb_retry_cnt,
- ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
- ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
- roce_set_field(old_send,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S,
- send_ptr);
- }
- }
- } while (!success_flags);
- }
-
- *wait_stage = HNS_ROCE_V1_DB_STAGE2;
-
- /* Get list pointer */
- *sdb_inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG);
- dev_dbg(dev, "QP(0x%lx) db process stage2. inv cnt = 0x%x.\n",
- hr_qp->qpn, *sdb_inv_cnt);
- }
-
- if (*wait_stage == HNS_ROCE_V1_DB_STAGE2) {
- /* Query db's list status, until hw reversal */
- inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG);
- while (roce_hw_index_cmp_lt(inv_cnt,
- *sdb_inv_cnt + SDB_INV_CNT_OFFSET,
- ROCEE_SDB_CNT_CMP_BITS)) {
- if (!time_before(jiffies, end)) {
- dev_dbg(dev, "QP(0x%lx) db process stage2 timeout. inv cnt 0x%x.\n",
- hr_qp->qpn, inv_cnt);
- return 0;
- }
-
- msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS);
- inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG);
- }
-
- *wait_stage = HNS_ROCE_V1_DB_WAIT_OK;
- }
-
- return 0;
-}
-
-static int check_qp_reset_state(struct hns_roce_dev *hr_dev,
- struct hns_roce_qp *hr_qp,
- struct hns_roce_qp_work *qp_work_entry,
- int *is_timeout)
-{
- struct device *dev = &hr_dev->pdev->dev;
- u32 sdb_issue_ptr;
- int ret;
-
- if (hr_qp->state != IB_QPS_RESET) {
- /* Set qp to ERR, waiting for hw complete processing all dbs */
- ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state,
- IB_QPS_ERR);
- if (ret) {
- dev_err(dev, "Modify QP(0x%lx) to ERR failed!\n",
- hr_qp->qpn);
- return ret;
- }
-
- /* Record issued doorbell */
- sdb_issue_ptr = roce_read(hr_dev, ROCEE_SDB_ISSUE_PTR_REG);
- qp_work_entry->sdb_issue_ptr = sdb_issue_ptr;
- qp_work_entry->db_wait_stage = HNS_ROCE_V1_DB_STAGE1;
-
- /* Query db process status, until hw process completely */
- ret = check_qp_db_process_status(hr_dev, hr_qp, sdb_issue_ptr,
- &qp_work_entry->sdb_inv_cnt,
- &qp_work_entry->db_wait_stage);
- if (ret) {
- dev_err(dev, "Check QP(0x%lx) db process status failed!\n",
- hr_qp->qpn);
- return ret;
- }
-
- if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK) {
- qp_work_entry->sche_cnt = 0;
- *is_timeout = 1;
- return 0;
- }
-
- /* Modify qp to reset before destroying qp */
- ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state,
- IB_QPS_RESET);
- if (ret) {
- dev_err(dev, "Modify QP(0x%lx) to RST failed!\n",
- hr_qp->qpn);
- return ret;
- }
- }
-
- return 0;
-}
-
-static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work)
-{
- struct hns_roce_qp_work *qp_work_entry;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_dev *hr_dev;
- struct hns_roce_qp *hr_qp;
- struct device *dev;
- int ret;
-
- qp_work_entry = container_of(work, struct hns_roce_qp_work, work);
- hr_dev = to_hr_dev(qp_work_entry->ib_dev);
- dev = &hr_dev->pdev->dev;
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- hr_qp = qp_work_entry->qp;
-
- dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", hr_qp->qpn);
-
- qp_work_entry->sche_cnt++;
-
- /* Query db process status, until hw process completely */
- ret = check_qp_db_process_status(hr_dev, hr_qp,
- qp_work_entry->sdb_issue_ptr,
- &qp_work_entry->sdb_inv_cnt,
- &qp_work_entry->db_wait_stage);
- if (ret) {
- dev_err(dev, "Check QP(0x%lx) db process status failed!\n",
- hr_qp->qpn);
- return;
- }
-
- if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK &&
- priv->des_qp.requeue_flag) {
- queue_work(priv->des_qp.qp_wq, work);
- return;
- }
-
- /* Modify qp to reset before destroying qp */
- ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state,
- IB_QPS_RESET);
- if (ret) {
- dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", hr_qp->qpn);
- return;
- }
-
- hns_roce_qp_remove(hr_dev, hr_qp);
- hns_roce_qp_free(hr_dev, hr_qp);
-
- if (hr_qp->ibqp.qp_type == IB_QPT_RC) {
- /* RC QP, release QPN */
- hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
- kfree(hr_qp);
- } else
- kfree(hr_to_hr_sqp(hr_qp));
-
- kfree(qp_work_entry);
-
- dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", hr_qp->qpn);
-}
-
-int hns_roce_v1_destroy_qp(struct ib_qp *ibqp)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_qp_work qp_work_entry;
- struct hns_roce_qp_work *qp_work;
- struct hns_roce_v1_priv *priv;
- struct hns_roce_cq *send_cq, *recv_cq;
- int is_user = !!ibqp->pd->uobject;
- int is_timeout = 0;
- int ret;
-
- ret = check_qp_reset_state(hr_dev, hr_qp, &qp_work_entry, &is_timeout);
- if (ret) {
- dev_err(dev, "QP reset state check failed(%d)!\n", ret);
- return ret;
- }
-
- send_cq = to_hr_cq(hr_qp->ibqp.send_cq);
- recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq);
-
- hns_roce_lock_cqs(send_cq, recv_cq);
- if (!is_user) {
- __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ?
- to_hr_srq(hr_qp->ibqp.srq) : NULL);
- if (send_cq != recv_cq)
- __hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL);
- }
- hns_roce_unlock_cqs(send_cq, recv_cq);
-
- if (!is_timeout) {
- hns_roce_qp_remove(hr_dev, hr_qp);
- hns_roce_qp_free(hr_dev, hr_qp);
-
- /* RC QP, release QPN */
- if (hr_qp->ibqp.qp_type == IB_QPT_RC)
- hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
- }
-
- hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
-
- if (is_user)
- ib_umem_release(hr_qp->umem);
- else {
- kfree(hr_qp->sq.wrid);
- kfree(hr_qp->rq.wrid);
-
- hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
- }
-
- if (!is_timeout) {
- if (hr_qp->ibqp.qp_type == IB_QPT_RC)
- kfree(hr_qp);
- else
- kfree(hr_to_hr_sqp(hr_qp));
- } else {
- qp_work = kzalloc(sizeof(*qp_work), GFP_KERNEL);
- if (!qp_work)
- return -ENOMEM;
-
- INIT_WORK(&qp_work->work, hns_roce_v1_destroy_qp_work_fn);
- qp_work->ib_dev = &hr_dev->ib_dev;
- qp_work->qp = hr_qp;
- qp_work->db_wait_stage = qp_work_entry.db_wait_stage;
- qp_work->sdb_issue_ptr = qp_work_entry.sdb_issue_ptr;
- qp_work->sdb_inv_cnt = qp_work_entry.sdb_inv_cnt;
- qp_work->sche_cnt = qp_work_entry.sche_cnt;
-
- priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
- queue_work(priv->des_qp.qp_wq, &qp_work->work);
- dev_dbg(dev, "Begin destroy QP(0x%lx) work.\n", hr_qp->qpn);
- }
-
- return 0;
-}
-
-int hns_roce_v1_destroy_cq(struct ib_cq *ibcq)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
- struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
- struct device *dev = &hr_dev->pdev->dev;
- u32 cqe_cnt_ori;
- u32 cqe_cnt_cur;
- u32 cq_buf_size;
- int wait_time = 0;
- int ret = 0;
-
- hns_roce_free_cq(hr_dev, hr_cq);
-
- /*
- * Before freeing cq buffer, we need to ensure that the outstanding CQE
- * have been written by checking the CQE counter.
- */
- cqe_cnt_ori = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT);
- while (1) {
- if (roce_read(hr_dev, ROCEE_CAEP_CQE_WCMD_EMPTY) &
- HNS_ROCE_CQE_WCMD_EMPTY_BIT)
- break;
-
- cqe_cnt_cur = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT);
- if ((cqe_cnt_cur - cqe_cnt_ori) >= HNS_ROCE_MIN_CQE_CNT)
- break;
-
- msleep(HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS);
- if (wait_time > HNS_ROCE_MAX_FREE_CQ_WAIT_CNT) {
- dev_warn(dev, "Destroy cq 0x%lx timeout!\n",
- hr_cq->cqn);
- ret = -ETIMEDOUT;
- break;
- }
- wait_time++;
- }
-
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
-
- if (ibcq->uobject)
- ib_umem_release(hr_cq->umem);
- else {
- /* Free the buff of stored cq */
- cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz;
- hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf);
- }
-
- kfree(hr_cq);
-
- return ret;
-}
-
-struct hns_roce_v1_priv hr_v1_priv;
-
-struct hns_roce_hw hns_roce_hw_v1 = {
- .reset = hns_roce_v1_reset,
- .hw_profile = hns_roce_v1_profile,
- .hw_init = hns_roce_v1_init,
- .hw_exit = hns_roce_v1_exit,
- .set_gid = hns_roce_v1_set_gid,
- .set_mac = hns_roce_v1_set_mac,
- .set_mtu = hns_roce_v1_set_mtu,
- .write_mtpt = hns_roce_v1_write_mtpt,
- .write_cqc = hns_roce_v1_write_cqc,
- .clear_hem = hns_roce_v1_clear_hem,
- .modify_qp = hns_roce_v1_modify_qp,
- .query_qp = hns_roce_v1_query_qp,
- .destroy_qp = hns_roce_v1_destroy_qp,
- .post_send = hns_roce_v1_post_send,
- .post_recv = hns_roce_v1_post_recv,
- .req_notify_cq = hns_roce_v1_req_notify_cq,
- .poll_cq = hns_roce_v1_poll_cq,
- .dereg_mr = hns_roce_v1_dereg_mr,
- .destroy_cq = hns_roce_v1_destroy_cq,
- .priv = &hr_v1_priv,
-};
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
deleted file mode 100644
index b213b5e6fef1..000000000000
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ /dev/null
@@ -1,1062 +0,0 @@
-/*
- * Copyright (c) 2016 Hisilicon Limited.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef _HNS_ROCE_HW_V1_H
-#define _HNS_ROCE_HW_V1_H
-
-#define CQ_STATE_VALID 2
-
-#define HNS_ROCE_V1_MAX_PD_NUM 0x8000
-#define HNS_ROCE_V1_MAX_CQ_NUM 0x10000
-#define HNS_ROCE_V1_MAX_CQE_NUM 0x8000
-
-#define HNS_ROCE_V1_MAX_QP_NUM 0x40000
-#define HNS_ROCE_V1_MAX_WQE_NUM 0x4000
-
-#define HNS_ROCE_V1_MAX_MTPT_NUM 0x80000
-
-#define HNS_ROCE_V1_MAX_MTT_SEGS 0x100000
-
-#define HNS_ROCE_V1_MAX_QP_INIT_RDMA 128
-#define HNS_ROCE_V1_MAX_QP_DEST_RDMA 128
-
-#define HNS_ROCE_V1_MAX_SQ_DESC_SZ 64
-#define HNS_ROCE_V1_MAX_RQ_DESC_SZ 64
-#define HNS_ROCE_V1_SG_NUM 2
-#define HNS_ROCE_V1_INLINE_SIZE 32
-
-#define HNS_ROCE_V1_UAR_NUM 256
-#define HNS_ROCE_V1_PHY_UAR_NUM 8
-
-#define HNS_ROCE_V1_GID_NUM 16
-#define HNS_ROCE_V1_RESV_QP 8
-
-#define HNS_ROCE_V1_NUM_COMP_EQE 0x8000
-#define HNS_ROCE_V1_NUM_ASYNC_EQE 0x400
-
-#define HNS_ROCE_V1_QPC_ENTRY_SIZE 256
-#define HNS_ROCE_V1_IRRL_ENTRY_SIZE 8
-#define HNS_ROCE_V1_CQC_ENTRY_SIZE 64
-#define HNS_ROCE_V1_MTPT_ENTRY_SIZE 64
-#define HNS_ROCE_V1_MTT_ENTRY_SIZE 64
-
-#define HNS_ROCE_V1_CQE_ENTRY_SIZE 32
-#define HNS_ROCE_V1_PAGE_SIZE_SUPPORT 0xFFFFF000
-
-#define HNS_ROCE_V1_EXT_RAQ_WF 8
-#define HNS_ROCE_V1_RAQ_ENTRY 64
-#define HNS_ROCE_V1_RAQ_DEPTH 32768
-#define HNS_ROCE_V1_RAQ_SIZE (HNS_ROCE_V1_RAQ_ENTRY * HNS_ROCE_V1_RAQ_DEPTH)
-
-#define HNS_ROCE_V1_SDB_DEPTH 0x400
-#define HNS_ROCE_V1_ODB_DEPTH 0x400
-
-#define HNS_ROCE_V1_DB_RSVD 0x80
-
-#define HNS_ROCE_V1_SDB_ALEPT HNS_ROCE_V1_DB_RSVD
-#define HNS_ROCE_V1_SDB_ALFUL (HNS_ROCE_V1_SDB_DEPTH - HNS_ROCE_V1_DB_RSVD)
-#define HNS_ROCE_V1_ODB_ALEPT HNS_ROCE_V1_DB_RSVD
-#define HNS_ROCE_V1_ODB_ALFUL (HNS_ROCE_V1_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD)
-
-#define HNS_ROCE_V1_EXT_SDB_DEPTH 0x4000
-#define HNS_ROCE_V1_EXT_ODB_DEPTH 0x4000
-#define HNS_ROCE_V1_EXT_SDB_ENTRY 16
-#define HNS_ROCE_V1_EXT_ODB_ENTRY 16
-#define HNS_ROCE_V1_EXT_SDB_SIZE \
- (HNS_ROCE_V1_EXT_SDB_DEPTH * HNS_ROCE_V1_EXT_SDB_ENTRY)
-#define HNS_ROCE_V1_EXT_ODB_SIZE \
- (HNS_ROCE_V1_EXT_ODB_DEPTH * HNS_ROCE_V1_EXT_ODB_ENTRY)
-
-#define HNS_ROCE_V1_EXT_SDB_ALEPT HNS_ROCE_V1_DB_RSVD
-#define HNS_ROCE_V1_EXT_SDB_ALFUL \
- (HNS_ROCE_V1_EXT_SDB_DEPTH - HNS_ROCE_V1_DB_RSVD)
-#define HNS_ROCE_V1_EXT_ODB_ALEPT HNS_ROCE_V1_DB_RSVD
-#define HNS_ROCE_V1_EXT_ODB_ALFUL \
- (HNS_ROCE_V1_EXT_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD)
-
-#define HNS_ROCE_V1_DB_WAIT_OK 0
-#define HNS_ROCE_V1_DB_STAGE1 1
-#define HNS_ROCE_V1_DB_STAGE2 2
-#define HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS 10000
-#define HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS 20
-#define HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS 50000
-#define HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS 10000
-#define HNS_ROCE_V1_FREE_MR_WAIT_VALUE 5
-#define HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE 20
-
-#define HNS_ROCE_BT_RSV_BUF_SIZE (1 << 17)
-
-#define HNS_ROCE_V1_TPTR_ENTRY_SIZE 2
-#define HNS_ROCE_V1_TPTR_BUF_SIZE \
- (HNS_ROCE_V1_TPTR_ENTRY_SIZE * HNS_ROCE_V1_MAX_CQ_NUM)
-
-#define HNS_ROCE_ODB_POLL_MODE 0
-
-#define HNS_ROCE_SDB_NORMAL_MODE 0
-#define HNS_ROCE_SDB_EXTEND_MODE 1
-
-#define HNS_ROCE_ODB_EXTEND_MODE 1
-
-#define KEY_VALID 0x02
-
-#define HNS_ROCE_CQE_QPN_MASK 0x3ffff
-#define HNS_ROCE_CQE_STATUS_MASK 0x1f
-#define HNS_ROCE_CQE_OPCODE_MASK 0xf
-
-#define HNS_ROCE_CQE_SUCCESS 0x00
-#define HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR 0x01
-#define HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR 0x02
-#define HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR 0x03
-#define HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR 0x04
-#define HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR 0x05
-#define HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR 0x06
-#define HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR 0x07
-#define HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR 0x08
-#define HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR 0x09
-#define HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR 0x0a
-#define HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR 0x0b
-#define HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR 0x0c
-
-#define QP1C_CFGN_OFFSET 0x28
-#define PHY_PORT_OFFSET 0x8
-#define MTPT_IDX_SHIFT 16
-#define ALL_PORT_VAL_OPEN 0x3f
-#define POL_TIME_INTERVAL_VAL 0x80
-#define SLEEP_TIME_INTERVAL 20
-#define SQ_PSN_SHIFT 8
-#define QKEY_VAL 0x80010000
-#define SDB_INV_CNT_OFFSET 8
-#define SDB_ST_CMP_VAL 8
-
-struct hns_roce_cq_context {
- u32 cqc_byte_4;
- u32 cq_bt_l;
- u32 cqc_byte_12;
- u32 cur_cqe_ba0_l;
- u32 cqc_byte_20;
- u32 cqe_tptr_addr_l;
- u32 cur_cqe_ba1_l;
- u32 cqc_byte_32;
-};
-
-#define CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S 0
-#define CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_M \
- (((1UL << 2) - 1) << CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S)
-
-#define CQ_CONTEXT_CQC_BYTE_4_CQN_S 16
-#define CQ_CONTEXT_CQC_BYTE_4_CQN_M \
- (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_4_CQN_S)
-
-#define CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S 0
-#define CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M \
- (((1UL << 17) - 1) << CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S)
-
-#define CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S 20
-#define CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M \
- (((1UL << 4) - 1) << CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S)
-
-#define CQ_CONTEXT_CQC_BYTE_12_CEQN_S 24
-#define CQ_CONTEXT_CQC_BYTE_12_CEQN_M \
- (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_12_CEQN_S)
-
-#define CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S 0
-#define CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M \
- (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S)
-
-#define CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S 16
-#define CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M \
- (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S)
-
-#define CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S 8
-#define CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M \
- (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S)
-
-#define CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S 0
-#define CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M \
- (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S)
-
-#define CQ_CONTEXT_CQC_BYTE_32_SE_FLAG_S 9
-
-#define CQ_CONTEXT_CQC_BYTE_32_CE_FLAG_S 8
-#define CQ_CONTEXT_CQC_BYTE_32_NOTIFICATION_FLAG_S 14
-#define CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S 15
-
-#define CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S 16
-#define CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M \
- (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S)
-
-struct hns_roce_cqe {
- u32 cqe_byte_4;
- union {
- u32 r_key;
- u32 immediate_data;
- };
- u32 byte_cnt;
- u32 cqe_byte_16;
- u32 cqe_byte_20;
- u32 s_mac_l;
- u32 cqe_byte_28;
- u32 reserved;
-};
-
-#define CQE_BYTE_4_OWNER_S 7
-#define CQE_BYTE_4_SQ_RQ_FLAG_S 14
-
-#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_S 8
-#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_M \
- (((1UL << 5) - 1) << CQE_BYTE_4_STATUS_OF_THE_OPERATION_S)
-
-#define CQE_BYTE_4_WQE_INDEX_S 16
-#define CQE_BYTE_4_WQE_INDEX_M (((1UL << 14) - 1) << CQE_BYTE_4_WQE_INDEX_S)
-
-#define CQE_BYTE_4_OPERATION_TYPE_S 0
-#define CQE_BYTE_4_OPERATION_TYPE_M \
- (((1UL << 4) - 1) << CQE_BYTE_4_OPERATION_TYPE_S)
-
-#define CQE_BYTE_4_IMM_INDICATOR_S 15
-
-#define CQE_BYTE_16_LOCAL_QPN_S 0
-#define CQE_BYTE_16_LOCAL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LOCAL_QPN_S)
-
-#define CQE_BYTE_20_PORT_NUM_S 26
-#define CQE_BYTE_20_PORT_NUM_M (((1UL << 3) - 1) << CQE_BYTE_20_PORT_NUM_S)
-
-#define CQE_BYTE_20_SL_S 24
-#define CQE_BYTE_20_SL_M (((1UL << 2) - 1) << CQE_BYTE_20_SL_S)
-
-#define CQE_BYTE_20_REMOTE_QPN_S 0
-#define CQE_BYTE_20_REMOTE_QPN_M \
- (((1UL << 24) - 1) << CQE_BYTE_20_REMOTE_QPN_S)
-
-#define CQE_BYTE_20_GRH_PRESENT_S 29
-
-#define CQE_BYTE_28_P_KEY_IDX_S 16
-#define CQE_BYTE_28_P_KEY_IDX_M (((1UL << 16) - 1) << CQE_BYTE_28_P_KEY_IDX_S)
-
-#define CQ_DB_REQ_NOT_SOL 0
-#define CQ_DB_REQ_NOT (1 << 16)
-
-struct hns_roce_v1_mpt_entry {
- u32 mpt_byte_4;
- u32 pbl_addr_l;
- u32 mpt_byte_12;
- u32 virt_addr_l;
- u32 virt_addr_h;
- u32 length;
- u32 mpt_byte_28;
- u32 pa0_l;
- u32 mpt_byte_36;
- u32 mpt_byte_40;
- u32 mpt_byte_44;
- u32 mpt_byte_48;
- u32 pa4_l;
- u32 mpt_byte_56;
- u32 mpt_byte_60;
- u32 mpt_byte_64;
-};
-
-#define MPT_BYTE_4_KEY_STATE_S 0
-#define MPT_BYTE_4_KEY_STATE_M (((1UL << 2) - 1) << MPT_BYTE_4_KEY_STATE_S)
-
-#define MPT_BYTE_4_KEY_S 8
-#define MPT_BYTE_4_KEY_M (((1UL << 8) - 1) << MPT_BYTE_4_KEY_S)
-
-#define MPT_BYTE_4_PAGE_SIZE_S 16
-#define MPT_BYTE_4_PAGE_SIZE_M (((1UL << 2) - 1) << MPT_BYTE_4_PAGE_SIZE_S)
-
-#define MPT_BYTE_4_MW_TYPE_S 20
-
-#define MPT_BYTE_4_MW_BIND_ENABLE_S 21
-
-#define MPT_BYTE_4_OWN_S 22
-
-#define MPT_BYTE_4_MEMORY_LOCATION_TYPE_S 24
-#define MPT_BYTE_4_MEMORY_LOCATION_TYPE_M \
- (((1UL << 2) - 1) << MPT_BYTE_4_MEMORY_LOCATION_TYPE_S)
-
-#define MPT_BYTE_4_REMOTE_ATOMIC_S 26
-#define MPT_BYTE_4_LOCAL_WRITE_S 27
-#define MPT_BYTE_4_REMOTE_WRITE_S 28
-#define MPT_BYTE_4_REMOTE_READ_S 29
-#define MPT_BYTE_4_REMOTE_INVAL_ENABLE_S 30
-#define MPT_BYTE_4_ADDRESS_TYPE_S 31
-
-#define MPT_BYTE_12_PBL_ADDR_H_S 0
-#define MPT_BYTE_12_PBL_ADDR_H_M \
- (((1UL << 17) - 1) << MPT_BYTE_12_PBL_ADDR_H_S)
-
-#define MPT_BYTE_12_MW_BIND_COUNTER_S 17
-#define MPT_BYTE_12_MW_BIND_COUNTER_M \
- (((1UL << 15) - 1) << MPT_BYTE_12_MW_BIND_COUNTER_S)
-
-#define MPT_BYTE_28_PD_S 0
-#define MPT_BYTE_28_PD_M (((1UL << 16) - 1) << MPT_BYTE_28_PD_S)
-
-#define MPT_BYTE_28_L_KEY_IDX_L_S 16
-#define MPT_BYTE_28_L_KEY_IDX_L_M \
- (((1UL << 16) - 1) << MPT_BYTE_28_L_KEY_IDX_L_S)
-
-#define MPT_BYTE_36_PA0_H_S 0
-#define MPT_BYTE_36_PA0_H_M (((1UL << 5) - 1) << MPT_BYTE_36_PA0_H_S)
-
-#define MPT_BYTE_36_PA1_L_S 8
-#define MPT_BYTE_36_PA1_L_M (((1UL << 24) - 1) << MPT_BYTE_36_PA1_L_S)
-
-#define MPT_BYTE_40_PA1_H_S 0
-#define MPT_BYTE_40_PA1_H_M (((1UL << 13) - 1) << MPT_BYTE_40_PA1_H_S)
-
-#define MPT_BYTE_40_PA2_L_S 16
-#define MPT_BYTE_40_PA2_L_M (((1UL << 16) - 1) << MPT_BYTE_40_PA2_L_S)
-
-#define MPT_BYTE_44_PA2_H_S 0
-#define MPT_BYTE_44_PA2_H_M (((1UL << 21) - 1) << MPT_BYTE_44_PA2_H_S)
-
-#define MPT_BYTE_44_PA3_L_S 24
-#define MPT_BYTE_44_PA3_L_M (((1UL << 8) - 1) << MPT_BYTE_44_PA3_L_S)
-
-#define MPT_BYTE_48_PA3_H_S 0
-#define MPT_BYTE_48_PA3_H_M (((1UL << 29) - 1) << MPT_BYTE_48_PA3_H_S)
-
-#define MPT_BYTE_56_PA4_H_S 0
-#define MPT_BYTE_56_PA4_H_M (((1UL << 5) - 1) << MPT_BYTE_56_PA4_H_S)
-
-#define MPT_BYTE_56_PA5_L_S 8
-#define MPT_BYTE_56_PA5_L_M (((1UL << 24) - 1) << MPT_BYTE_56_PA5_L_S)
-
-#define MPT_BYTE_60_PA5_H_S 0
-#define MPT_BYTE_60_PA5_H_M (((1UL << 13) - 1) << MPT_BYTE_60_PA5_H_S)
-
-#define MPT_BYTE_60_PA6_L_S 16
-#define MPT_BYTE_60_PA6_L_M (((1UL << 16) - 1) << MPT_BYTE_60_PA6_L_S)
-
-#define MPT_BYTE_64_PA6_H_S 0
-#define MPT_BYTE_64_PA6_H_M (((1UL << 21) - 1) << MPT_BYTE_64_PA6_H_S)
-
-#define MPT_BYTE_64_L_KEY_IDX_H_S 24
-#define MPT_BYTE_64_L_KEY_IDX_H_M \
- (((1UL << 8) - 1) << MPT_BYTE_64_L_KEY_IDX_H_S)
-
-struct hns_roce_wqe_ctrl_seg {
- __be32 sgl_pa_h;
- __be32 flag;
- __be32 imm_data;
- __be32 msg_length;
-};
-
-struct hns_roce_wqe_data_seg {
- __be64 addr;
- __be32 lkey;
- __be32 len;
-};
-
-struct hns_roce_wqe_raddr_seg {
- __be32 rkey;
- __be32 len;/* reserved */
- __be64 raddr;
-};
-
-struct hns_roce_rq_wqe_ctrl {
-
- u32 rwqe_byte_4;
- u32 rocee_sgl_ba_l;
- u32 rwqe_byte_12;
- u32 reserved[5];
-};
-
-#define RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S 16
-#define RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M \
- (((1UL << 6) - 1) << RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S)
-
-#define HNS_ROCE_QP_DESTROY_TIMEOUT_MSECS 10000
-
-#define GID_LEN 16
-
-struct hns_roce_ud_send_wqe {
- u32 dmac_h;
- u32 u32_8;
- u32 immediate_data;
-
- u32 u32_16;
- union {
- unsigned char dgid[GID_LEN];
- struct {
- u32 u32_20;
- u32 u32_24;
- u32 u32_28;
- u32 u32_32;
- };
- };
-
- u32 u32_36;
- u32 u32_40;
-
- u32 va0_l;
- u32 va0_h;
- u32 l_key0;
-
- u32 va1_l;
- u32 va1_h;
- u32 l_key1;
-};
-
-#define UD_SEND_WQE_U32_4_DMAC_0_S 0
-#define UD_SEND_WQE_U32_4_DMAC_0_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_0_S)
-
-#define UD_SEND_WQE_U32_4_DMAC_1_S 8
-#define UD_SEND_WQE_U32_4_DMAC_1_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_1_S)
-
-#define UD_SEND_WQE_U32_4_DMAC_2_S 16
-#define UD_SEND_WQE_U32_4_DMAC_2_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_2_S)
-
-#define UD_SEND_WQE_U32_4_DMAC_3_S 24
-#define UD_SEND_WQE_U32_4_DMAC_3_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_3_S)
-
-#define UD_SEND_WQE_U32_8_DMAC_4_S 0
-#define UD_SEND_WQE_U32_8_DMAC_4_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_4_S)
-
-#define UD_SEND_WQE_U32_8_DMAC_5_S 8
-#define UD_SEND_WQE_U32_8_DMAC_5_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S)
-
-#define UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S 22
-
-#define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16
-#define UD_SEND_WQE_U32_8_OPERATION_TYPE_M \
- (((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S)
-
-#define UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S 24
-#define UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_M \
- (((1UL << 6) - 1) << UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S)
-
-#define UD_SEND_WQE_U32_8_SEND_GL_ROUTING_HDR_FLAG_S 31
-
-#define UD_SEND_WQE_U32_16_DEST_QP_S 0
-#define UD_SEND_WQE_U32_16_DEST_QP_M \
- (((1UL << 24) - 1) << UD_SEND_WQE_U32_16_DEST_QP_S)
-
-#define UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S 24
-#define UD_SEND_WQE_U32_16_MAX_STATIC_RATE_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S)
-
-#define UD_SEND_WQE_U32_36_FLOW_LABEL_S 0
-#define UD_SEND_WQE_U32_36_FLOW_LABEL_M \
- (((1UL << 20) - 1) << UD_SEND_WQE_U32_36_FLOW_LABEL_S)
-
-#define UD_SEND_WQE_U32_36_PRIORITY_S 20
-#define UD_SEND_WQE_U32_36_PRIORITY_M \
- (((1UL << 4) - 1) << UD_SEND_WQE_U32_36_PRIORITY_S)
-
-#define UD_SEND_WQE_U32_36_SGID_INDEX_S 24
-#define UD_SEND_WQE_U32_36_SGID_INDEX_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_36_SGID_INDEX_S)
-
-#define UD_SEND_WQE_U32_40_HOP_LIMIT_S 0
-#define UD_SEND_WQE_U32_40_HOP_LIMIT_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_40_HOP_LIMIT_S)
-
-#define UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S 8
-#define UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M \
- (((1UL << 8) - 1) << UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S)
-
-struct hns_roce_sqp_context {
- u32 qp1c_bytes_4;
- u32 sq_rq_bt_l;
- u32 qp1c_bytes_12;
- u32 qp1c_bytes_16;
- u32 qp1c_bytes_20;
- u32 cur_rq_wqe_ba_l;
- u32 qp1c_bytes_28;
- u32 qp1c_bytes_32;
- u32 cur_sq_wqe_ba_l;
- u32 qp1c_bytes_40;
-};
-
-#define QP1C_BYTES_4_QP_STATE_S 0
-#define QP1C_BYTES_4_QP_STATE_M \
- (((1UL << 3) - 1) << QP1C_BYTES_4_QP_STATE_S)
-
-#define QP1C_BYTES_4_SQ_WQE_SHIFT_S 8
-#define QP1C_BYTES_4_SQ_WQE_SHIFT_M \
- (((1UL << 4) - 1) << QP1C_BYTES_4_SQ_WQE_SHIFT_S)
-
-#define QP1C_BYTES_4_RQ_WQE_SHIFT_S 12
-#define QP1C_BYTES_4_RQ_WQE_SHIFT_M \
- (((1UL << 4) - 1) << QP1C_BYTES_4_RQ_WQE_SHIFT_S)
-
-#define QP1C_BYTES_4_PD_S 16
-#define QP1C_BYTES_4_PD_M (((1UL << 16) - 1) << QP1C_BYTES_4_PD_S)
-
-#define QP1C_BYTES_12_SQ_RQ_BT_H_S 0
-#define QP1C_BYTES_12_SQ_RQ_BT_H_M \
- (((1UL << 17) - 1) << QP1C_BYTES_12_SQ_RQ_BT_H_S)
-
-#define QP1C_BYTES_16_RQ_HEAD_S 0
-#define QP1C_BYTES_16_RQ_HEAD_M (((1UL << 15) - 1) << QP1C_BYTES_16_RQ_HEAD_S)
-
-#define QP1C_BYTES_16_PORT_NUM_S 16
-#define QP1C_BYTES_16_PORT_NUM_M \
- (((1UL << 3) - 1) << QP1C_BYTES_16_PORT_NUM_S)
-
-#define QP1C_BYTES_16_SIGNALING_TYPE_S 27
-#define QP1C_BYTES_16_LOCAL_ENABLE_E2E_CREDIT_S 28
-#define QP1C_BYTES_16_RQ_BA_FLG_S 29
-#define QP1C_BYTES_16_SQ_BA_FLG_S 30
-#define QP1C_BYTES_16_QP1_ERR_S 31
-
-#define QP1C_BYTES_20_SQ_HEAD_S 0
-#define QP1C_BYTES_20_SQ_HEAD_M (((1UL << 15) - 1) << QP1C_BYTES_20_SQ_HEAD_S)
-
-#define QP1C_BYTES_20_PKEY_IDX_S 16
-#define QP1C_BYTES_20_PKEY_IDX_M \
- (((1UL << 16) - 1) << QP1C_BYTES_20_PKEY_IDX_S)
-
-#define QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S 0
-#define QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M \
- (((1UL << 5) - 1) << QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S)
-
-#define QP1C_BYTES_28_RQ_CUR_IDX_S 16
-#define QP1C_BYTES_28_RQ_CUR_IDX_M \
- (((1UL << 15) - 1) << QP1C_BYTES_28_RQ_CUR_IDX_S)
-
-#define QP1C_BYTES_32_TX_CQ_NUM_S 0
-#define QP1C_BYTES_32_TX_CQ_NUM_M \
- (((1UL << 16) - 1) << QP1C_BYTES_32_TX_CQ_NUM_S)
-
-#define QP1C_BYTES_32_RX_CQ_NUM_S 16
-#define QP1C_BYTES_32_RX_CQ_NUM_M \
- (((1UL << 16) - 1) << QP1C_BYTES_32_RX_CQ_NUM_S)
-
-#define QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S 0
-#define QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M \
- (((1UL << 5) - 1) << QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S)
-
-#define QP1C_BYTES_40_SQ_CUR_IDX_S 16
-#define QP1C_BYTES_40_SQ_CUR_IDX_M \
- (((1UL << 15) - 1) << QP1C_BYTES_40_SQ_CUR_IDX_S)
-
-#define HNS_ROCE_WQE_INLINE (1UL<<31)
-#define HNS_ROCE_WQE_SE (1UL<<30)
-
-#define HNS_ROCE_WQE_SGE_NUM_BIT 24
-#define HNS_ROCE_WQE_IMM (1UL<<23)
-#define HNS_ROCE_WQE_FENCE (1UL<<21)
-#define HNS_ROCE_WQE_CQ_NOTIFY (1UL<<20)
-
-#define HNS_ROCE_WQE_OPCODE_SEND (0<<16)
-#define HNS_ROCE_WQE_OPCODE_RDMA_READ (1<<16)
-#define HNS_ROCE_WQE_OPCODE_RDMA_WRITE (2<<16)
-#define HNS_ROCE_WQE_OPCODE_LOCAL_INV (4<<16)
-#define HNS_ROCE_WQE_OPCODE_UD_SEND (7<<16)
-#define HNS_ROCE_WQE_OPCODE_MASK (15<<16)
-
-struct hns_roce_qp_context {
- u32 qpc_bytes_4;
- u32 qpc_bytes_8;
- u32 qpc_bytes_12;
- u32 qpc_bytes_16;
- u32 sq_rq_bt_l;
- u32 qpc_bytes_24;
- u32 irrl_ba_l;
- u32 qpc_bytes_32;
- u32 qpc_bytes_36;
- u32 dmac_l;
- u32 qpc_bytes_44;
- u32 qpc_bytes_48;
- u8 dgid[16];
- u32 qpc_bytes_68;
- u32 cur_rq_wqe_ba_l;
- u32 qpc_bytes_76;
- u32 rx_rnr_time;
- u32 qpc_bytes_84;
- u32 qpc_bytes_88;
- union {
- u32 rx_sge_len;
- u32 dma_length;
- };
- union {
- u32 rx_sge_num;
- u32 rx_send_pktn;
- u32 r_key;
- };
- u32 va_l;
- u32 va_h;
- u32 qpc_bytes_108;
- u32 qpc_bytes_112;
- u32 rx_cur_sq_wqe_ba_l;
- u32 qpc_bytes_120;
- u32 qpc_bytes_124;
- u32 qpc_bytes_128;
- u32 qpc_bytes_132;
- u32 qpc_bytes_136;
- u32 qpc_bytes_140;
- u32 qpc_bytes_144;
- u32 qpc_bytes_148;
- union {
- u32 rnr_retry;
- u32 ack_time;
- };
- u32 qpc_bytes_156;
- u32 pkt_use_len;
- u32 qpc_bytes_164;
- u32 qpc_bytes_168;
- union {
- u32 sge_use_len;
- u32 pa_use_len;
- };
- u32 qpc_bytes_176;
- u32 qpc_bytes_180;
- u32 tx_cur_sq_wqe_ba_l;
- u32 qpc_bytes_188;
- u32 rvd21;
-};
-
-#define QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S 0
-#define QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S)
-
-#define QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S 3
-#define QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S 4
-#define QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S 5
-#define QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S 6
-#define QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S 7
-
-#define QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S 8
-#define QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M \
- (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S)
-
-#define QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S 12
-#define QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M \
- (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S)
-
-#define QP_CONTEXT_QPC_BYTES_4_PD_S 16
-#define QP_CONTEXT_QPC_BYTES_4_PD_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_4_PD_S)
-
-#define QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S 0
-#define QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S)
-
-#define QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S 16
-#define QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S)
-
-#define QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S 0
-#define QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S)
-
-#define QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S 16
-#define QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_16_QP_NUM_S 0
-#define QP_CONTEXT_QPC_BYTES_16_QP_NUM_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_16_QP_NUM_S)
-
-#define QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S 0
-#define QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M \
- (((1UL << 17) - 1) << QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S 18
-#define QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M \
- (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S)
-
-#define QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S 23
-
-#define QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S 0
-#define QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M \
- (((1UL << 17) - 1) << QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S 18
-#define QP_CONTEXT_QPC_BYTES_32_MIG_STATE_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S)
-
-#define QP_CONTEXT_QPC_BYTE_32_LOCAL_ENABLE_E2E_CREDITS_S 20
-#define QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S 21
-#define QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S 22
-#define QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S 23
-
-#define QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S 24
-#define QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S)
-
-#define QP_CONTEXT_QPC_BYTES_36_DEST_QP_S 0
-#define QP_CONTEXT_QPC_BYTES_36_DEST_QP_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_36_DEST_QP_S)
-
-#define QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S 24
-#define QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_44_DMAC_H_S 0
-#define QP_CONTEXT_QPC_BYTES_44_DMAC_H_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_44_DMAC_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S 16
-#define QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S)
-
-#define QP_CONTEXT_QPC_BYTES_44_HOPLMT_S 24
-#define QP_CONTEXT_QPC_BYTES_44_HOPLMT_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_44_HOPLMT_S)
-
-#define QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S 0
-#define QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M \
- (((1UL << 20) - 1) << QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S)
-
-#define QP_CONTEXT_QPC_BYTES_48_TCLASS_S 20
-#define QP_CONTEXT_QPC_BYTES_48_TCLASS_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_48_TCLASS_S)
-
-#define QP_CONTEXT_QPC_BYTES_48_MTU_S 28
-#define QP_CONTEXT_QPC_BYTES_48_MTU_M \
- (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_48_MTU_S)
-
-#define QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S 0
-#define QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S)
-
-#define QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S 16
-#define QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S 0
-#define QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M \
- (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S 8
-#define QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S 24
-#define QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S)
-
-#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S 0
-#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_PSN_ERR_FLAG_S 24
-#define QP_CONTEXT_QPC_BYTES_88_RX_LAST_OPCODE_FLG_S 25
-
-#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S 26
-#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_M \
- (((1UL << 2) - 1) << \
- QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S)
-
-#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S 29
-#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S)
-
-#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_FLG_S 24
-#define QP_CONTEXT_QPC_BYTES_108_TRRL_TDB_PSN_FLG_S 25
-
-#define QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S 24
-#define QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S)
-
-#define QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S 0
-#define QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M \
- (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S 0
-#define QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S 16
-#define QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S)
-
-#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S 0
-#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_PSN_ERR_FLG_S 24
-
-#define QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S 25
-#define QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S)
-
-#define QP_CONTEXT_QPC_BYTES_128_IRRL_PSN_VLD_FLG_S 27
-
-#define QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S 24
-#define QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S)
-
-#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S 24
-#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S)
-
-#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S 0
-#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S 16
-#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_140_RNR_RETRY_FLG_S 31
-
-#define QP_CONTEXT_QPC_BYTES_144_QP_STATE_S 0
-#define QP_CONTEXT_QPC_BYTES_144_QP_STATE_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_144_QP_STATE_S)
-
-#define QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S 0
-#define QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S)
-
-#define QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S 2
-#define QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S)
-
-#define QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S 5
-#define QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S)
-
-#define QP_CONTEXT_QPC_BYTES_148_LSN_S 8
-#define QP_CONTEXT_QPC_BYTES_148_LSN_M \
- (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_148_LSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S 0
-#define QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S 3
-#define QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M \
- (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S 8
-#define QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S 11
-#define QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M \
- (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_SL_S 14
-#define QP_CONTEXT_QPC_BYTES_156_SL_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_156_SL_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S 16
-#define QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S)
-
-#define QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S 24
-#define QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S)
-
-#define QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S 24
-#define QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_M \
- (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S)
-
-#define QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S 0
-#define QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_M \
- (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S)
-
-#define QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S 24
-#define QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S)
-
-#define QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S 26
-#define QP_CONTEXT_QPC_BYTES_168_DB_TYPE_M \
- (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S)
-
-#define QP_CONTEXT_QPC_BYTES_168_MSG_LP_IND_S 28
-#define QP_CONTEXT_QPC_BYTES_168_CSDB_LP_IND_S 29
-#define QP_CONTEXT_QPC_BYTES_168_QP_ERR_FLG_S 30
-
-#define QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S 0
-#define QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S 16
-#define QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S 0
-#define QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S)
-
-#define QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S 16
-#define QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S)
-
-#define QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S 0
-#define QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M \
- (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S)
-
-#define QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S 8
-
-#define QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S 16
-#define QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M \
- (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S)
-
-struct hns_roce_rq_db {
- u32 u32_4;
- u32 u32_8;
-};
-
-#define RQ_DOORBELL_U32_4_RQ_HEAD_S 0
-#define RQ_DOORBELL_U32_4_RQ_HEAD_M \
- (((1UL << 15) - 1) << RQ_DOORBELL_U32_4_RQ_HEAD_S)
-
-#define RQ_DOORBELL_U32_8_QPN_S 0
-#define RQ_DOORBELL_U32_8_QPN_M (((1UL << 24) - 1) << RQ_DOORBELL_U32_8_QPN_S)
-
-#define RQ_DOORBELL_U32_8_CMD_S 28
-#define RQ_DOORBELL_U32_8_CMD_M (((1UL << 3) - 1) << RQ_DOORBELL_U32_8_CMD_S)
-
-#define RQ_DOORBELL_U32_8_HW_SYNC_S 31
-
-struct hns_roce_sq_db {
- u32 u32_4;
- u32 u32_8;
-};
-
-#define SQ_DOORBELL_U32_4_SQ_HEAD_S 0
-#define SQ_DOORBELL_U32_4_SQ_HEAD_M \
- (((1UL << 15) - 1) << SQ_DOORBELL_U32_4_SQ_HEAD_S)
-
-#define SQ_DOORBELL_U32_4_SL_S 16
-#define SQ_DOORBELL_U32_4_SL_M \
- (((1UL << 2) - 1) << SQ_DOORBELL_U32_4_SL_S)
-
-#define SQ_DOORBELL_U32_4_PORT_S 18
-#define SQ_DOORBELL_U32_4_PORT_M (((1UL << 3) - 1) << SQ_DOORBELL_U32_4_PORT_S)
-
-#define SQ_DOORBELL_U32_8_QPN_S 0
-#define SQ_DOORBELL_U32_8_QPN_M (((1UL << 24) - 1) << SQ_DOORBELL_U32_8_QPN_S)
-
-#define SQ_DOORBELL_HW_SYNC_S 31
-
-struct hns_roce_ext_db {
- int esdb_dep;
- int eodb_dep;
- struct hns_roce_buf_list *sdb_buf_list;
- struct hns_roce_buf_list *odb_buf_list;
-};
-
-struct hns_roce_db_table {
- int sdb_ext_mod;
- int odb_ext_mod;
- struct hns_roce_ext_db *ext_db;
-};
-
-struct hns_roce_bt_table {
- struct hns_roce_buf_list qpc_buf;
- struct hns_roce_buf_list mtpt_buf;
- struct hns_roce_buf_list cqc_buf;
-};
-
-struct hns_roce_tptr_table {
- struct hns_roce_buf_list tptr_buf;
-};
-
-struct hns_roce_qp_work {
- struct work_struct work;
- struct ib_device *ib_dev;
- struct hns_roce_qp *qp;
- u32 db_wait_stage;
- u32 sdb_issue_ptr;
- u32 sdb_inv_cnt;
- u32 sche_cnt;
-};
-
-struct hns_roce_des_qp {
- struct workqueue_struct *qp_wq;
- int requeue_flag;
-};
-
-struct hns_roce_mr_free_work {
- struct work_struct work;
- struct ib_device *ib_dev;
- struct completion *comp;
- int comp_flag;
- void *mr;
-};
-
-struct hns_roce_recreate_lp_qp_work {
- struct work_struct work;
- struct ib_device *ib_dev;
- struct completion *comp;
- int comp_flag;
-};
-
-struct hns_roce_free_mr {
- struct workqueue_struct *free_mr_wq;
- struct hns_roce_qp *mr_free_qp[HNS_ROCE_V1_RESV_QP];
- struct hns_roce_cq *mr_free_cq;
- struct hns_roce_pd *mr_free_pd;
-};
-
-struct hns_roce_v1_priv {
- struct hns_roce_db_table db_table;
- struct hns_roce_raq_table raq_table;
- struct hns_roce_bt_table bt_table;
- struct hns_roce_tptr_table tptr_table;
- struct hns_roce_des_qp des_qp;
- struct hns_roce_free_mr free_mr;
-};
-
-int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset);
-int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
-int hns_roce_v1_destroy_qp(struct ib_qp *ibqp);
-
-#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
new file mode 100644
index 000000000000..2d6ae89e525b
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -0,0 +1,7409 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/acpi.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <net/addrconf.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_umem.h>
+#include <rdma/uverbs_ioctl.h>
+
+#include "hclge_main.h"
+#include "hns_roce_common.h"
+#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
+#include "hns_roce_hem.h"
+#include "hns_roce_hw_v2.h"
+#include "hns_roce_bond.h"
+
+#define CREATE_TRACE_POINTS
+#include "hns_roce_trace.h"
+
+enum {
+ CMD_RST_PRC_OTHERS,
+ CMD_RST_PRC_SUCCESS,
+ CMD_RST_PRC_EBUSY,
+};
+
+enum ecc_resource_type {
+ ECC_RESOURCE_QPC,
+ ECC_RESOURCE_CQC,
+ ECC_RESOURCE_MPT,
+ ECC_RESOURCE_SRQC,
+ ECC_RESOURCE_GMV,
+ ECC_RESOURCE_QPC_TIMER,
+ ECC_RESOURCE_CQC_TIMER,
+ ECC_RESOURCE_SCCC,
+ ECC_RESOURCE_COUNT,
+};
+
+static const struct {
+ const char *name;
+ u8 read_bt0_op;
+ u8 write_bt0_op;
+} fmea_ram_res[] = {
+ { "ECC_RESOURCE_QPC",
+ HNS_ROCE_CMD_READ_QPC_BT0, HNS_ROCE_CMD_WRITE_QPC_BT0 },
+ { "ECC_RESOURCE_CQC",
+ HNS_ROCE_CMD_READ_CQC_BT0, HNS_ROCE_CMD_WRITE_CQC_BT0 },
+ { "ECC_RESOURCE_MPT",
+ HNS_ROCE_CMD_READ_MPT_BT0, HNS_ROCE_CMD_WRITE_MPT_BT0 },
+ { "ECC_RESOURCE_SRQC",
+ HNS_ROCE_CMD_READ_SRQC_BT0, HNS_ROCE_CMD_WRITE_SRQC_BT0 },
+ /* ECC_RESOURCE_GMV is handled by cmdq, not mailbox */
+ { "ECC_RESOURCE_GMV",
+ 0, 0 },
+ { "ECC_RESOURCE_QPC_TIMER",
+ HNS_ROCE_CMD_READ_QPC_TIMER_BT0, HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0 },
+ { "ECC_RESOURCE_CQC_TIMER",
+ HNS_ROCE_CMD_READ_CQC_TIMER_BT0, HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0 },
+ { "ECC_RESOURCE_SCCC",
+ HNS_ROCE_CMD_READ_SCCC_BT0, HNS_ROCE_CMD_WRITE_SCCC_BT0 },
+};
+
+static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
+ struct ib_sge *sg)
+{
+ dseg->lkey = cpu_to_le32(sg->lkey);
+ dseg->addr = cpu_to_le64(sg->addr);
+ dseg->len = cpu_to_le32(sg->length);
+}
+
+/*
+ * mapped-value = 1 + real-value
+ * The hns wr opcode real value is start from 0, In order to distinguish between
+ * initialized and uninitialized map values, we plus 1 to the actual value when
+ * defining the mapping, so that the validity can be identified by checking the
+ * mapped value is greater than 0.
+ */
+#define HR_OPC_MAP(ib_key, hr_key) \
+ [IB_WR_ ## ib_key] = 1 + HNS_ROCE_V2_WQE_OP_ ## hr_key
+
+static const u32 hns_roce_op_code[] = {
+ HR_OPC_MAP(RDMA_WRITE, RDMA_WRITE),
+ HR_OPC_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE_WITH_IMM),
+ HR_OPC_MAP(SEND, SEND),
+ HR_OPC_MAP(SEND_WITH_IMM, SEND_WITH_IMM),
+ HR_OPC_MAP(RDMA_READ, RDMA_READ),
+ HR_OPC_MAP(ATOMIC_CMP_AND_SWP, ATOM_CMP_AND_SWAP),
+ HR_OPC_MAP(ATOMIC_FETCH_AND_ADD, ATOM_FETCH_AND_ADD),
+ HR_OPC_MAP(SEND_WITH_INV, SEND_WITH_INV),
+ HR_OPC_MAP(MASKED_ATOMIC_CMP_AND_SWP, ATOM_MSK_CMP_AND_SWAP),
+ HR_OPC_MAP(MASKED_ATOMIC_FETCH_AND_ADD, ATOM_MSK_FETCH_AND_ADD),
+ HR_OPC_MAP(REG_MR, FAST_REG_PMR),
+};
+
+static u32 to_hr_opcode(u32 ib_opcode)
+{
+ if (ib_opcode >= ARRAY_SIZE(hns_roce_op_code))
+ return HNS_ROCE_V2_WQE_OP_MASK;
+
+ return hns_roce_op_code[ib_opcode] ? hns_roce_op_code[ib_opcode] - 1 :
+ HNS_ROCE_V2_WQE_OP_MASK;
+}
+
+static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ const struct ib_reg_wr *wr)
+{
+ struct hns_roce_wqe_frmr_seg *fseg =
+ (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
+ struct hns_roce_mr *mr = to_hr_mr(wr->mr);
+ u64 pbl_ba;
+
+ /* use ib_access_flags */
+ hr_reg_write_bool(fseg, FRMR_BIND_EN, 0);
+ hr_reg_write_bool(fseg, FRMR_ATOMIC,
+ wr->access & IB_ACCESS_REMOTE_ATOMIC);
+ hr_reg_write_bool(fseg, FRMR_RR, wr->access & IB_ACCESS_REMOTE_READ);
+ hr_reg_write_bool(fseg, FRMR_RW, wr->access & IB_ACCESS_REMOTE_WRITE);
+ hr_reg_write_bool(fseg, FRMR_LW, wr->access & IB_ACCESS_LOCAL_WRITE);
+
+ /* Data structure reuse may lead to confusion */
+ pbl_ba = mr->pbl_mtr.hem_cfg.root_ba;
+ rc_sq_wqe->msg_len = cpu_to_le32(lower_32_bits(pbl_ba));
+ rc_sq_wqe->inv_key = cpu_to_le32(upper_32_bits(pbl_ba));
+
+ rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff);
+ rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32);
+ rc_sq_wqe->rkey = cpu_to_le32(wr->key);
+ rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
+
+ hr_reg_write(fseg, FRMR_PBL_SIZE, mr->npages);
+ hr_reg_write(fseg, FRMR_PBL_BUF_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
+ hr_reg_clear(fseg, FRMR_BLK_MODE);
+ hr_reg_clear(fseg, FRMR_BLOCK_SIZE);
+ hr_reg_clear(fseg, FRMR_ZBVA);
+}
+
+static void set_atomic_seg(const struct ib_send_wr *wr,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ unsigned int valid_num_sge)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg =
+ (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
+ struct hns_roce_wqe_atomic_seg *aseg =
+ (void *)dseg + sizeof(struct hns_roce_v2_wqe_data_seg);
+
+ set_data_seg_v2(dseg, wr->sg_list);
+
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ aseg->fetchadd_swap_data = cpu_to_le64(atomic_wr(wr)->swap);
+ aseg->cmp_data = cpu_to_le64(atomic_wr(wr)->compare_add);
+ } else {
+ aseg->fetchadd_swap_data =
+ cpu_to_le64(atomic_wr(wr)->compare_add);
+ aseg->cmp_data = 0;
+ }
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
+}
+
+static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
+ const struct ib_send_wr *wr,
+ unsigned int *sge_idx, u32 msg_len)
+{
+ struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev;
+ unsigned int left_len_in_pg;
+ unsigned int idx = *sge_idx;
+ unsigned int i = 0;
+ unsigned int len;
+ void *addr;
+ void *dseg;
+
+ if (msg_len > qp->sq.ext_sge_cnt * HNS_ROCE_SGE_SIZE) {
+ ibdev_err(ibdev,
+ "no enough extended sge space for inline data.\n");
+ return -EINVAL;
+ }
+
+ dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1));
+ left_len_in_pg = hr_hw_page_align((uintptr_t)dseg) - (uintptr_t)dseg;
+ len = wr->sg_list[0].length;
+ addr = (void *)(unsigned long)(wr->sg_list[0].addr);
+
+ /* When copying data to extended sge space, the left length in page may
+ * not long enough for current user's sge. So the data should be
+ * splited into several parts, one in the first page, and the others in
+ * the subsequent pages.
+ */
+ while (1) {
+ if (len <= left_len_in_pg) {
+ memcpy(dseg, addr, len);
+
+ idx += len / HNS_ROCE_SGE_SIZE;
+
+ i++;
+ if (i >= wr->num_sge)
+ break;
+
+ left_len_in_pg -= len;
+ len = wr->sg_list[i].length;
+ addr = (void *)(unsigned long)(wr->sg_list[i].addr);
+ dseg += len;
+ } else {
+ memcpy(dseg, addr, left_len_in_pg);
+
+ len -= left_len_in_pg;
+ addr += left_len_in_pg;
+ idx += left_len_in_pg / HNS_ROCE_SGE_SIZE;
+ dseg = hns_roce_get_extend_sge(qp,
+ idx & (qp->sge.sge_cnt - 1));
+ left_len_in_pg = 1 << HNS_HW_PAGE_SHIFT;
+ }
+ }
+
+ *sge_idx = idx;
+
+ return 0;
+}
+
+static void set_extend_sge(struct hns_roce_qp *qp, struct ib_sge *sge,
+ unsigned int *sge_ind, unsigned int cnt)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ unsigned int idx = *sge_ind;
+
+ while (cnt > 0) {
+ dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1));
+ if (likely(sge->length)) {
+ set_data_seg_v2(dseg, sge);
+ idx++;
+ cnt--;
+ }
+ sge++;
+ }
+
+ *sge_ind = idx;
+}
+
+static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+ int mtu = ib_mtu_enum_to_int(qp->path_mtu);
+
+ if (mtu < 0 || len > qp->max_inline_data || len > mtu) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid length of data, data len = %u, max inline len = %u, path mtu = %d.\n",
+ len, qp->max_inline_data, mtu);
+ return false;
+ }
+
+ return true;
+}
+
+static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ unsigned int *sge_idx)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+ u32 msg_len = le32_to_cpu(rc_sq_wqe->msg_len);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ unsigned int curr_idx = *sge_idx;
+ void *dseg = rc_sq_wqe;
+ unsigned int i;
+ int ret;
+
+ if (unlikely(wr->opcode == IB_WR_RDMA_READ)) {
+ ibdev_err(ibdev, "invalid inline parameters!\n");
+ return -EINVAL;
+ }
+
+ if (!check_inl_data_len(qp, msg_len))
+ return -EINVAL;
+
+ dseg += sizeof(struct hns_roce_v2_rc_send_wqe);
+
+ if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) {
+ hr_reg_clear(rc_sq_wqe, RC_SEND_WQE_INL_TYPE);
+
+ for (i = 0; i < wr->num_sge; i++) {
+ memcpy(dseg, ((void *)wr->sg_list[i].addr),
+ wr->sg_list[i].length);
+ dseg += wr->sg_list[i].length;
+ }
+ } else {
+ hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_INL_TYPE);
+
+ ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len);
+ if (ret)
+ return ret;
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, curr_idx - *sge_idx);
+ }
+
+ *sge_idx = curr_idx;
+
+ return 0;
+}
+
+static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ unsigned int *sge_ind,
+ unsigned int valid_num_sge)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg =
+ (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe);
+ struct hns_roce_qp *qp = to_hr_qp(ibqp);
+ int j = 0;
+ int i;
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_INLINE,
+ !!(wr->send_flags & IB_SEND_INLINE));
+ if (wr->send_flags & IB_SEND_INLINE)
+ return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind);
+
+ if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) {
+ for (i = 0; i < wr->num_sge; i++) {
+ if (likely(wr->sg_list[i].length)) {
+ set_data_seg_v2(dseg, wr->sg_list + i);
+ dseg++;
+ }
+ }
+ } else {
+ for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE; i++) {
+ if (likely(wr->sg_list[i].length)) {
+ set_data_seg_v2(dseg, wr->sg_list + i);
+ dseg++;
+ j++;
+ }
+ }
+
+ set_extend_sge(qp, wr->sg_list + i, sge_ind,
+ valid_num_sge - HNS_ROCE_SGE_IN_WQE);
+ }
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
+
+ return 0;
+}
+
+static int check_send_valid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ if (unlikely(hr_qp->state == IB_QPS_RESET ||
+ hr_qp->state == IB_QPS_INIT ||
+ hr_qp->state == IB_QPS_RTR))
+ return -EINVAL;
+ else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
+ return -EIO;
+
+ return 0;
+}
+
+static unsigned int calc_wr_sge_num(const struct ib_send_wr *wr,
+ unsigned int *sge_len)
+{
+ unsigned int valid_num = 0;
+ unsigned int len = 0;
+ int i;
+
+ for (i = 0; i < wr->num_sge; i++) {
+ if (likely(wr->sg_list[i].length)) {
+ len += wr->sg_list[i].length;
+ valid_num++;
+ }
+ }
+
+ *sge_len = len;
+ return valid_num;
+}
+
+static __le32 get_immtdata(const struct ib_send_wr *wr)
+{
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ return cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
+ default:
+ return 0;
+ }
+}
+
+static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
+ const struct ib_send_wr *wr)
+{
+ u32 ib_op = wr->opcode;
+
+ if (ib_op != IB_WR_SEND && ib_op != IB_WR_SEND_WITH_IMM)
+ return -EINVAL;
+
+ ud_sq_wqe->immtdata = get_immtdata(wr);
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OPCODE, to_hr_opcode(ib_op));
+
+ return 0;
+}
+
+static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
+ struct hns_roce_ah *ah)
+{
+ struct ib_device *ib_dev = ah->ibah.device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_UDPSPN, ah->av.udp_sport);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl);
+
+ ud_sq_wqe->sgid_index = ah->av.gid_index;
+
+ memcpy(ud_sq_wqe->dmac, ah->av.mac, ETH_ALEN);
+ memcpy(ud_sq_wqe->dgid, ah->av.dgid, GID_LEN_V2);
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return 0;
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN_EN, ah->av.vlan_en);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN, ah->av.vlan_id);
+
+ return 0;
+}
+
+static inline int set_ud_wqe(struct hns_roce_qp *qp,
+ const struct ib_send_wr *wr,
+ void *wqe, unsigned int *sge_idx,
+ unsigned int owner_bit)
+{
+ struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
+ struct hns_roce_v2_ud_send_wqe *ud_sq_wqe = wqe;
+ unsigned int curr_idx = *sge_idx;
+ unsigned int valid_num_sge;
+ u32 msg_len = 0;
+ int ret;
+
+ valid_num_sge = calc_wr_sge_num(wr, &msg_len);
+
+ ret = set_ud_opcode(ud_sq_wqe, wr);
+ if (WARN_ON_ONCE(ret))
+ return ret;
+
+ ud_sq_wqe->msg_len = cpu_to_le32(msg_len);
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_CQE,
+ !!(wr->send_flags & IB_SEND_SIGNALED));
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SE,
+ !!(wr->send_flags & IB_SEND_SOLICITED));
+
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_PD, to_hr_pd(qp->ibqp.pd)->pdn);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SGE_NUM, valid_num_sge);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_MSG_START_SGE_IDX,
+ curr_idx & (qp->sge.sge_cnt - 1));
+
+ ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
+ qp->qkey : ud_wr(wr)->remote_qkey);
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_DQPN, ud_wr(wr)->remote_qpn);
+
+ ret = fill_ud_av(ud_sq_wqe, ah);
+ if (ret)
+ return ret;
+
+ qp->sl = to_hr_ah(ud_wr(wr)->ah)->av.sl;
+
+ set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge);
+
+ /*
+ * The pipeline can sequentially post all valid WQEs into WQ buffer,
+ * including new WQEs waiting for the doorbell to update the PI again.
+ * Therefore, the owner bit of WQE MUST be updated after all fields
+ * and extSGEs have been written into DDR instead of cache.
+ */
+ if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB)
+ dma_wmb();
+
+ *sge_idx = curr_idx;
+ hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OWNER, owner_bit);
+
+ return 0;
+}
+
+static int set_rc_opcode(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
+ const struct ib_send_wr *wr)
+{
+ u32 ib_op = wr->opcode;
+ int ret = 0;
+
+ rc_sq_wqe->immtdata = get_immtdata(wr);
+
+ switch (ib_op) {
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey);
+ rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr);
+ break;
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey);
+ rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr);
+ break;
+ case IB_WR_REG_MR:
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ set_frmr_seg(rc_sq_wqe, reg_wr(wr));
+ else
+ ret = -EOPNOTSUPP;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (unlikely(ret))
+ return ret;
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OPCODE, to_hr_opcode(ib_op));
+
+ return ret;
+}
+
+static inline int set_rc_wqe(struct hns_roce_qp *qp,
+ const struct ib_send_wr *wr,
+ void *wqe, unsigned int *sge_idx,
+ unsigned int owner_bit)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
+ unsigned int curr_idx = *sge_idx;
+ unsigned int valid_num_sge;
+ u32 msg_len = 0;
+ int ret;
+
+ valid_num_sge = calc_wr_sge_num(wr, &msg_len);
+
+ rc_sq_wqe->msg_len = cpu_to_le32(msg_len);
+
+ ret = set_rc_opcode(hr_dev, rc_sq_wqe, wr);
+ if (WARN_ON_ONCE(ret))
+ return ret;
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SO,
+ (wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE,
+ (wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_CQE,
+ (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
+
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX,
+ curr_idx & (qp->sge.sge_cnt - 1));
+
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+ if (msg_len != ATOMIC_WR_LEN)
+ return -EINVAL;
+ set_atomic_seg(wr, rc_sq_wqe, valid_num_sge);
+ } else if (wr->opcode != IB_WR_REG_MR) {
+ ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe,
+ &curr_idx, valid_num_sge);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * The pipeline can sequentially post all valid WQEs into WQ buffer,
+ * including new WQEs waiting for the doorbell to update the PI again.
+ * Therefore, the owner bit of WQE MUST be updated after all fields
+ * and extSGEs have been written into DDR instead of cache.
+ */
+ if (qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB)
+ dma_wmb();
+
+ *sge_idx = curr_idx;
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OWNER, owner_bit);
+
+ return ret;
+}
+
+static inline void update_sq_db(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *qp)
+{
+ if (unlikely(qp->state == IB_QPS_ERR)) {
+ flush_cqe(hr_dev, qp);
+ } else {
+ struct hns_roce_v2_db sq_db = {};
+
+ hr_reg_write(&sq_db, DB_TAG, qp->qpn);
+ hr_reg_write(&sq_db, DB_CMD, HNS_ROCE_V2_SQ_DB);
+ hr_reg_write(&sq_db, DB_PI, qp->sq.head);
+ hr_reg_write(&sq_db, DB_SL, qp->sl);
+
+ hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg);
+ }
+}
+
+static inline void update_rq_db(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *qp)
+{
+ if (unlikely(qp->state == IB_QPS_ERR)) {
+ flush_cqe(hr_dev, qp);
+ } else {
+ if (likely(qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)) {
+ *qp->rdb.db_record =
+ qp->rq.head & V2_DB_PRODUCER_IDX_M;
+ } else {
+ struct hns_roce_v2_db rq_db = {};
+
+ hr_reg_write(&rq_db, DB_TAG, qp->qpn);
+ hr_reg_write(&rq_db, DB_CMD, HNS_ROCE_V2_RQ_DB);
+ hr_reg_write(&rq_db, DB_PI, qp->rq.head);
+
+ hns_roce_write64(hr_dev, (__le32 *)&rq_db,
+ qp->rq.db_reg);
+ }
+ }
+}
+
+static void hns_roce_write512(struct hns_roce_dev *hr_dev, u64 *val,
+ u64 __iomem *dest)
+{
+#define HNS_ROCE_WRITE_TIMES 8
+ struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ int i;
+
+ if (!hr_dev->dis_db && !ops->get_hw_reset_stat(handle))
+ for (i = 0; i < HNS_ROCE_WRITE_TIMES; i++)
+ writeq_relaxed(*(val + i), dest + i);
+}
+
+static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
+ void *wqe)
+{
+#define HNS_ROCE_SL_SHIFT 2
+ struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
+
+ if (unlikely(qp->state == IB_QPS_ERR)) {
+ flush_cqe(hr_dev, qp);
+ return;
+ }
+ /* All kinds of DirectWQE have the same header field layout */
+ hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_H,
+ qp->sl >> HNS_ROCE_SL_SHIFT);
+ hr_reg_write(rc_sq_wqe, RC_SEND_WQE_WQE_INDEX, qp->sq.head);
+
+ hns_roce_write512(hr_dev, wqe, qp->sq.db_reg);
+}
+
+static int hns_roce_v2_post_send(struct ib_qp *ibqp,
+ const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_qp *qp = to_hr_qp(ibqp);
+ unsigned long flags = 0;
+ unsigned int owner_bit;
+ unsigned int sge_idx;
+ unsigned int wqe_idx;
+ void *wqe = NULL;
+ u32 nreq;
+ int ret;
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ ret = check_send_valid(hr_dev, qp);
+ if (unlikely(ret)) {
+ *bad_wr = wr;
+ nreq = 0;
+ goto out;
+ }
+
+ sge_idx = qp->next_sge;
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
+ ret = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1);
+
+ if (unlikely(wr->num_sge > qp->sq.max_gs)) {
+ ibdev_err(ibdev, "num_sge = %d > qp->sq.max_gs = %u.\n",
+ wr->num_sge, qp->sq.max_gs);
+ ret = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wqe = hns_roce_get_send_wqe(qp, wqe_idx);
+ qp->sq.wrid[wqe_idx] = wr->wr_id;
+ owner_bit =
+ ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1);
+
+ /* RC and UD share the same DirectWQE field layout */
+ ((struct hns_roce_v2_rc_send_wqe *)wqe)->byte_4 = 0;
+
+ /* Corresponding to the QP type, wqe process separately */
+ if (ibqp->qp_type == IB_QPT_RC)
+ ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit);
+ else
+ ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit);
+
+ trace_hns_sq_wqe(qp->qpn, wqe_idx, wqe, 1 << qp->sq.wqe_shift,
+ wr->wr_id, TRACE_SQ);
+ if (unlikely(ret)) {
+ *bad_wr = wr;
+ goto out;
+ }
+ }
+
+out:
+ if (likely(nreq)) {
+ qp->sq.head += nreq;
+ qp->next_sge = sge_idx;
+
+ if (nreq == 1 && !ret &&
+ (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
+ write_dwqe(hr_dev, qp, wqe);
+ else
+ update_sq_db(hr_dev, qp);
+ }
+
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+ return ret;
+}
+
+static int check_recv_valid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
+ return -EIO;
+
+ if (hr_qp->state == IB_QPS_RESET)
+ return -EINVAL;
+
+ return 0;
+}
+
+static void fill_recv_sge_to_wqe(const struct ib_recv_wr *wr, void *wqe,
+ u32 max_sge, bool rsv)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg = wqe;
+ u32 i, cnt;
+
+ for (i = 0, cnt = 0; i < wr->num_sge; i++) {
+ /* Skip zero-length sge */
+ if (!wr->sg_list[i].length)
+ continue;
+ set_data_seg_v2(dseg + cnt, wr->sg_list + i);
+ cnt++;
+ }
+
+ /* Fill a reserved sge to make hw stop reading remaining segments */
+ if (rsv) {
+ dseg[cnt].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
+ dseg[cnt].addr = 0;
+ dseg[cnt].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
+ } else {
+ /* Clear remaining segments to make ROCEE ignore sges */
+ if (cnt < max_sge)
+ memset(dseg + cnt, 0,
+ (max_sge - cnt) * HNS_ROCE_SGE_SIZE);
+ }
+}
+
+static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr,
+ u32 wqe_idx, u32 max_sge)
+{
+ void *wqe = NULL;
+
+ wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge);
+
+ trace_hns_rq_wqe(hr_qp->qpn, wqe_idx, wqe, 1 << hr_qp->rq.wqe_shift,
+ wr->wr_id, TRACE_RQ);
+}
+
+static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u32 wqe_idx, nreq, max_sge;
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&hr_qp->rq.lock, flags);
+
+ ret = check_recv_valid(hr_dev, hr_qp);
+ if (unlikely(ret)) {
+ *bad_wr = wr;
+ nreq = 0;
+ goto out;
+ }
+
+ max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq,
+ hr_qp->ibqp.recv_cq))) {
+ ret = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > max_sge)) {
+ ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n",
+ wr->num_sge, max_sge);
+ ret = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
+ fill_rq_wqe(hr_qp, wr, wqe_idx, max_sge);
+ hr_qp->rq.wrid[wqe_idx] = wr->wr_id;
+ }
+
+out:
+ if (likely(nreq)) {
+ hr_qp->rq.head += nreq;
+
+ update_rq_db(hr_dev, hr_qp);
+ }
+ spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
+
+ return ret;
+}
+
+static void *get_srq_wqe_buf(struct hns_roce_srq *srq, u32 n)
+{
+ return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift);
+}
+
+static void *get_idx_buf(struct hns_roce_idx_que *idx_que, u32 n)
+{
+ return hns_roce_buf_offset(idx_que->mtr.kmem,
+ n << idx_que->entry_shift);
+}
+
+static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, u32 wqe_index)
+{
+ /* always called with interrupts disabled. */
+ spin_lock(&srq->lock);
+
+ bitmap_clear(srq->idx_que.bitmap, wqe_index, 1);
+ srq->idx_que.tail++;
+
+ spin_unlock(&srq->lock);
+}
+
+static int hns_roce_srqwq_overflow(struct hns_roce_srq *srq)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+
+ return idx_que->head - idx_que->tail >= srq->wqe_cnt;
+}
+
+static int check_post_srq_valid(struct hns_roce_srq *srq, u32 max_sge,
+ const struct ib_recv_wr *wr)
+{
+ struct ib_device *ib_dev = srq->ibsrq.device;
+
+ if (unlikely(wr->num_sge > max_sge)) {
+ ibdev_err(ib_dev,
+ "failed to check sge, wr->num_sge = %d, max_sge = %u.\n",
+ wr->num_sge, max_sge);
+ return -EINVAL;
+ }
+
+ if (unlikely(hns_roce_srqwq_overflow(srq))) {
+ ibdev_err(ib_dev,
+ "failed to check srqwq status, srqwq is full.\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ u32 pos;
+
+ pos = find_first_zero_bit(idx_que->bitmap, srq->wqe_cnt);
+ if (unlikely(pos == srq->wqe_cnt))
+ return -ENOSPC;
+
+ bitmap_set(idx_que->bitmap, pos, 1);
+ *wqe_idx = pos;
+ return 0;
+}
+
+static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ unsigned int head;
+ __le32 *buf;
+
+ head = idx_que->head & (srq->wqe_cnt - 1);
+
+ buf = get_idx_buf(idx_que, head);
+ *buf = cpu_to_le32(wqe_idx);
+
+ idx_que->head++;
+}
+
+static void update_srq_db(struct hns_roce_srq *srq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ struct hns_roce_v2_db db = {};
+
+ hr_reg_write(&db, DB_TAG, srq->srqn);
+ hr_reg_write(&db, DB_CMD, HNS_ROCE_V2_SRQ_DB);
+ hr_reg_write(&db, DB_PI, srq->idx_que.head);
+
+ hns_roce_write64(hr_dev, (__le32 *)&db, srq->db_reg);
+}
+
+static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ unsigned long flags;
+ int ret = 0;
+ u32 max_sge;
+ u32 wqe_idx;
+ void *wqe;
+ u32 nreq;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ max_sge = srq->max_gs - srq->rsv_sge;
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ ret = check_post_srq_valid(srq, max_sge, wr);
+ if (ret) {
+ *bad_wr = wr;
+ break;
+ }
+
+ ret = get_srq_wqe_idx(srq, &wqe_idx);
+ if (unlikely(ret)) {
+ *bad_wr = wr;
+ break;
+ }
+
+ wqe = get_srq_wqe_buf(srq, wqe_idx);
+ fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge);
+ fill_wqe_idx(srq, wqe_idx);
+ srq->wrid[wqe_idx] = wr->wr_id;
+
+ trace_hns_srq_wqe(srq->srqn, wqe_idx, wqe, 1 << srq->wqe_shift,
+ wr->wr_id, TRACE_SRQ);
+ }
+
+ if (likely(nreq)) {
+ if (srq->cap_flags & HNS_ROCE_SRQ_CAP_RECORD_DB)
+ *srq->rdb.db_record = srq->idx_que.head &
+ V2_DB_PRODUCER_IDX_M;
+ else
+ update_srq_db(srq);
+ }
+
+ spin_unlock_irqrestore(&srq->lock, flags);
+
+ return ret;
+}
+
+static u32 hns_roce_v2_cmd_hw_reseted(struct hns_roce_dev *hr_dev,
+ unsigned long instance_stage,
+ unsigned long reset_stage)
+{
+ /* When hardware reset has been completed once or more, we should stop
+ * sending mailbox&cmq&doorbell to hardware. If now in .init_instance()
+ * function, we should exit with error. If now at HNAE3_INIT_CLIENT
+ * stage of soft reset process, we should exit with error, and then
+ * HNAE3_INIT_CLIENT related process can rollback the operation like
+ * notifing hardware to free resources, HNAE3_INIT_CLIENT related
+ * process will exit with error to notify NIC driver to reschedule soft
+ * reset process once again.
+ */
+ hr_dev->is_reset = true;
+ hr_dev->dis_db = true;
+
+ if (reset_stage == HNS_ROCE_STATE_RST_INIT ||
+ instance_stage == HNS_ROCE_STATE_INIT)
+ return CMD_RST_PRC_EBUSY;
+
+ return CMD_RST_PRC_SUCCESS;
+}
+
+static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev,
+ unsigned long instance_stage,
+ unsigned long reset_stage)
+{
+#define HW_RESET_TIMEOUT_US 1000000
+#define HW_RESET_SLEEP_US 1000
+
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ unsigned long val;
+ int ret;
+
+ /* When hardware reset is detected, we should stop sending mailbox&cmq&
+ * doorbell to hardware. If now in .init_instance() function, we should
+ * exit with error. If now at HNAE3_INIT_CLIENT stage of soft reset
+ * process, we should exit with error, and then HNAE3_INIT_CLIENT
+ * related process can rollback the operation like notifing hardware to
+ * free resources, HNAE3_INIT_CLIENT related process will exit with
+ * error to notify NIC driver to reschedule soft reset process once
+ * again.
+ */
+ hr_dev->dis_db = true;
+
+ ret = read_poll_timeout(ops->ae_dev_reset_cnt, val,
+ val > hr_dev->reset_cnt, HW_RESET_SLEEP_US,
+ HW_RESET_TIMEOUT_US, false, handle);
+ if (!ret)
+ hr_dev->is_reset = true;
+
+ if (!hr_dev->is_reset || reset_stage == HNS_ROCE_STATE_RST_INIT ||
+ instance_stage == HNS_ROCE_STATE_INIT)
+ return CMD_RST_PRC_EBUSY;
+
+ return CMD_RST_PRC_SUCCESS;
+}
+
+static u32 hns_roce_v2_cmd_sw_resetting(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ /* When software reset is detected at .init_instance() function, we
+ * should stop sending mailbox&cmq&doorbell to hardware, and exit
+ * with error.
+ */
+ hr_dev->dis_db = true;
+ if (ops->ae_dev_reset_cnt(handle) != hr_dev->reset_cnt)
+ hr_dev->is_reset = true;
+
+ return CMD_RST_PRC_EBUSY;
+}
+
+static u32 check_aedev_reset_status(struct hns_roce_dev *hr_dev,
+ struct hnae3_handle *handle)
+{
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ unsigned long instance_stage; /* the current instance stage */
+ unsigned long reset_stage; /* the current reset stage */
+ unsigned long reset_cnt;
+ bool sw_resetting;
+ bool hw_resetting;
+
+ /* Get information about reset from NIC driver or RoCE driver itself,
+ * the meaning of the following variables from NIC driver are described
+ * as below:
+ * reset_cnt -- The count value of completed hardware reset.
+ * hw_resetting -- Whether hardware device is resetting now.
+ * sw_resetting -- Whether NIC's software reset process is running now.
+ */
+ instance_stage = handle->rinfo.instance_state;
+ reset_stage = handle->rinfo.reset_state;
+ reset_cnt = ops->ae_dev_reset_cnt(handle);
+ if (reset_cnt != hr_dev->reset_cnt)
+ return hns_roce_v2_cmd_hw_reseted(hr_dev, instance_stage,
+ reset_stage);
+
+ hw_resetting = ops->get_cmdq_stat(handle);
+ if (hw_resetting)
+ return hns_roce_v2_cmd_hw_resetting(hr_dev, instance_stage,
+ reset_stage);
+
+ sw_resetting = ops->ae_dev_resetting(handle);
+ if (sw_resetting && instance_stage == HNS_ROCE_STATE_INIT)
+ return hns_roce_v2_cmd_sw_resetting(hr_dev);
+
+ return CMD_RST_PRC_OTHERS;
+}
+
+static bool check_device_is_in_reset(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ if (hr_dev->reset_cnt != ops->ae_dev_reset_cnt(handle))
+ return true;
+
+ if (ops->get_hw_reset_stat(handle))
+ return true;
+
+ if (ops->ae_dev_resetting(handle))
+ return true;
+
+ return false;
+}
+
+static bool v2_chk_mbox_is_avail(struct hns_roce_dev *hr_dev, bool *busy)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ u32 status;
+
+ if (hr_dev->is_reset)
+ status = CMD_RST_PRC_SUCCESS;
+ else
+ status = check_aedev_reset_status(hr_dev, priv->handle);
+
+ *busy = (status == CMD_RST_PRC_EBUSY);
+
+ return status == CMD_RST_PRC_OTHERS;
+}
+
+static int hns_roce_alloc_cmq_desc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_cmq_ring *ring)
+{
+ int size = ring->desc_num * sizeof(struct hns_roce_cmq_desc);
+
+ ring->desc = dma_alloc_coherent(hr_dev->dev, size,
+ &ring->desc_dma_addr, GFP_KERNEL);
+ if (!ring->desc)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void hns_roce_free_cmq_desc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_cmq_ring *ring)
+{
+ dma_free_coherent(hr_dev->dev,
+ ring->desc_num * sizeof(struct hns_roce_cmq_desc),
+ ring->desc, ring->desc_dma_addr);
+
+ ring->desc_dma_addr = 0;
+}
+
+static int init_csq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_cmq_ring *csq)
+{
+ dma_addr_t dma;
+ int ret;
+
+ csq->desc_num = CMD_CSQ_DESC_NUM;
+ spin_lock_init(&csq->lock);
+ csq->flag = TYPE_CSQ;
+ csq->head = 0;
+
+ ret = hns_roce_alloc_cmq_desc(hr_dev, csq);
+ if (ret)
+ return ret;
+
+ dma = csq->desc_dma_addr;
+ roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_L_REG, lower_32_bits(dma));
+ roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG, upper_32_bits(dma));
+ roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG,
+ (u32)csq->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S);
+
+ /* Make sure to write CI first and then PI */
+ roce_write(hr_dev, ROCEE_TX_CMQ_CI_REG, 0);
+ roce_write(hr_dev, ROCEE_TX_CMQ_PI_REG, 0);
+
+ return 0;
+}
+
+static int hns_roce_v2_cmq_init(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ int ret;
+
+ priv->cmq.tx_timeout = HNS_ROCE_CMQ_TX_TIMEOUT;
+
+ ret = init_csq(hr_dev, &priv->cmq.csq);
+ if (ret)
+ dev_err(hr_dev->dev, "failed to init CSQ, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static void hns_roce_v2_cmq_exit(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+
+ hns_roce_free_cmq_desc(hr_dev, &priv->cmq.csq);
+}
+
+static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc,
+ enum hns_roce_opcode_type opcode,
+ bool is_read)
+{
+ memset((void *)desc, 0, sizeof(struct hns_roce_cmq_desc));
+ desc->opcode = cpu_to_le16(opcode);
+ desc->flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN);
+ if (is_read)
+ desc->flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_WR);
+ else
+ desc->flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
+}
+
+static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev)
+{
+ u32 tail = roce_read(hr_dev, ROCEE_TX_CMQ_CI_REG);
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+
+ return tail == priv->cmq.csq.head;
+}
+
+static void update_cmdq_status(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+
+ if (handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT ||
+ handle->rinfo.instance_state == HNS_ROCE_STATE_INIT)
+ hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR;
+}
+
+static int hns_roce_cmd_err_convert_errno(u16 desc_ret)
+{
+ struct hns_roce_cmd_errcode errcode_table[] = {
+ {CMD_EXEC_SUCCESS, 0},
+ {CMD_NO_AUTH, -EPERM},
+ {CMD_NOT_EXIST, -EOPNOTSUPP},
+ {CMD_CRQ_FULL, -EXFULL},
+ {CMD_NEXT_ERR, -ENOSR},
+ {CMD_NOT_EXEC, -ENOTBLK},
+ {CMD_PARA_ERR, -EINVAL},
+ {CMD_RESULT_ERR, -ERANGE},
+ {CMD_TIMEOUT, -ETIME},
+ {CMD_HILINK_ERR, -ENOLINK},
+ {CMD_INFO_ILLEGAL, -ENXIO},
+ {CMD_INVALID, -EBADR},
+ };
+ u16 i;
+
+ for (i = 0; i < ARRAY_SIZE(errcode_table); i++)
+ if (desc_ret == errcode_table[i].return_status)
+ return errcode_table[i].errno;
+ return -EIO;
+}
+
+static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout)
+{
+ static const struct hns_roce_cmdq_tx_timeout_map cmdq_tx_timeout[] = {
+ {HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT},
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cmdq_tx_timeout); i++)
+ if (cmdq_tx_timeout[i].opcode == opcode)
+ return cmdq_tx_timeout[i].tx_timeout;
+
+ return tx_timeout;
+}
+
+static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u32 tx_timeout)
+{
+ u32 timeout = 0;
+
+ do {
+ if (hns_roce_cmq_csq_done(hr_dev))
+ break;
+ udelay(1);
+ } while (++timeout < tx_timeout);
+}
+
+static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc,
+ int num, u32 tx_timeout)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
+ u16 desc_ret;
+ u32 tail;
+ int ret;
+ int i;
+
+ tail = csq->head;
+
+ for (i = 0; i < num; i++) {
+ trace_hns_cmdq_req(hr_dev, &desc[i]);
+
+ csq->desc[csq->head++] = desc[i];
+ if (csq->head == csq->desc_num)
+ csq->head = 0;
+ }
+
+ /* Write to hardware */
+ roce_write(hr_dev, ROCEE_TX_CMQ_PI_REG, csq->head);
+
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]);
+
+ hns_roce_wait_csq_done(hr_dev, tx_timeout);
+ if (hns_roce_cmq_csq_done(hr_dev)) {
+ ret = 0;
+ for (i = 0; i < num; i++) {
+ trace_hns_cmdq_resp(hr_dev, &csq->desc[tail]);
+
+ /* check the result of hardware write back */
+ desc_ret = le16_to_cpu(csq->desc[tail++].retval);
+ if (tail == csq->desc_num)
+ tail = 0;
+ if (likely(desc_ret == CMD_EXEC_SUCCESS))
+ continue;
+
+ ret = hns_roce_cmd_err_convert_errno(desc_ret);
+ }
+ } else {
+ /* FW/HW reset or incorrect number of desc */
+ tail = roce_read(hr_dev, ROCEE_TX_CMQ_CI_REG);
+ dev_warn(hr_dev->dev, "CMDQ move tail from %u to %u.\n",
+ csq->head, tail);
+ csq->head = tail;
+
+ update_cmdq_status(hr_dev);
+
+ ret = -EAGAIN;
+ }
+
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]);
+
+ return ret;
+}
+
+static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc, int num)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
+ u16 opcode = le16_to_cpu(desc->opcode);
+ u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout);
+ u8 try_cnt = HNS_ROCE_OPC_POST_MB_TRY_CNT;
+ u32 rsv_tail;
+ int ret;
+ int i;
+
+ while (try_cnt) {
+ try_cnt--;
+
+ spin_lock_bh(&csq->lock);
+ rsv_tail = csq->head;
+ ret = __hns_roce_cmq_send_one(hr_dev, desc, num, tx_timeout);
+ if (opcode == HNS_ROCE_OPC_POST_MB && ret == -ETIME &&
+ try_cnt) {
+ spin_unlock_bh(&csq->lock);
+ mdelay(HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC);
+ continue;
+ }
+
+ for (i = 0; i < num; i++) {
+ desc[i] = csq->desc[rsv_tail++];
+ if (rsv_tail == csq->desc_num)
+ rsv_tail = 0;
+ }
+ spin_unlock_bh(&csq->lock);
+ break;
+ }
+
+ if (ret)
+ dev_err_ratelimited(hr_dev->dev,
+ "Cmdq IO error, opcode = 0x%x, return = %d.\n",
+ opcode, ret);
+
+ return ret;
+}
+
+static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc, int num)
+{
+ bool busy;
+ int ret;
+
+ if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
+ return -EIO;
+
+ if (!v2_chk_mbox_is_avail(hr_dev, &busy))
+ return busy ? -EBUSY : 0;
+
+ ret = __hns_roce_cmq_send(hr_dev, desc, num);
+ if (ret) {
+ if (!v2_chk_mbox_is_avail(hr_dev, &busy))
+ return busy ? -EBUSY : 0;
+ }
+
+ return ret;
+}
+
+static enum hns_roce_opcode_type
+ get_bond_opcode(enum hns_roce_bond_cmd_type bond_type)
+{
+ switch (bond_type) {
+ case HNS_ROCE_SET_BOND:
+ return HNS_ROCE_OPC_SET_BOND_INFO;
+ case HNS_ROCE_CHANGE_BOND:
+ return HNS_ROCE_OPC_CHANGE_ACTIVE_PORT;
+ case HNS_ROCE_CLEAR_BOND:
+ return HNS_ROCE_OPC_CLEAR_BOND_INFO;
+ default:
+ WARN(true, "Invalid bond type %d!\n", bond_type);
+ return HNS_ROCE_OPC_SET_BOND_INFO;
+ }
+}
+
+static enum hns_roce_bond_hashtype
+ get_bond_hashtype(enum netdev_lag_hash netdev_hashtype)
+{
+ switch (netdev_hashtype) {
+ case NETDEV_LAG_HASH_L2:
+ return BOND_HASH_L2;
+ case NETDEV_LAG_HASH_L34:
+ return BOND_HASH_L34;
+ case NETDEV_LAG_HASH_L23:
+ return BOND_HASH_L23;
+ default:
+ WARN(true, "Invalid hash type %d!\n", netdev_hashtype);
+ return BOND_HASH_L2;
+ }
+}
+
+int hns_roce_cmd_bond(struct hns_roce_bond_group *bond_grp,
+ enum hns_roce_bond_cmd_type bond_type)
+{
+ enum hns_roce_opcode_type opcode = get_bond_opcode(bond_type);
+ struct hns_roce_bond_info *slave_info;
+ struct hns_roce_cmq_desc desc = {};
+ int ret;
+
+ slave_info = (struct hns_roce_bond_info *)desc.data;
+ hns_roce_cmq_setup_basic_desc(&desc, opcode, false);
+
+ slave_info->bond_id = cpu_to_le32(bond_grp->bond_id);
+ if (bond_type == HNS_ROCE_CLEAR_BOND)
+ goto out;
+
+ if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+ slave_info->bond_mode = cpu_to_le32(BOND_MODE_1);
+ if (bond_grp->active_slave_num != 1)
+ ibdev_warn(&bond_grp->main_hr_dev->ib_dev,
+ "active slave cnt(%u) in Mode 1 is invalid.\n",
+ bond_grp->active_slave_num);
+ } else {
+ slave_info->bond_mode = cpu_to_le32(BOND_MODE_2_4);
+ slave_info->hash_policy =
+ cpu_to_le32(get_bond_hashtype(bond_grp->hash_type));
+ }
+
+ slave_info->active_slave_cnt = cpu_to_le32(bond_grp->active_slave_num);
+ slave_info->active_slave_mask = cpu_to_le32(bond_grp->active_slave_map);
+ slave_info->slave_mask = cpu_to_le32(bond_grp->slave_map);
+
+out:
+ ret = hns_roce_cmq_send(bond_grp->main_hr_dev, &desc, 1);
+ if (ret)
+ ibdev_err(&bond_grp->main_hr_dev->ib_dev,
+ "cmq bond type(%d) failed, ret = %d.\n",
+ bond_type, ret);
+
+ return ret;
+}
+
+static int config_hem_ba_to_hw(struct hns_roce_dev *hr_dev,
+ dma_addr_t base_addr, u8 cmd, unsigned long tag)
+{
+ struct hns_roce_cmd_mailbox *mbox;
+ int ret;
+
+ mbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mbox))
+ return PTR_ERR(mbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, base_addr, mbox->dma, cmd, tag);
+ hns_roce_free_cmd_mailbox(hr_dev, mbox);
+ return ret;
+}
+
+static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_query_version *resp;
+ struct hns_roce_cmq_desc desc;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_HW_VER, true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ resp = (struct hns_roce_query_version *)desc.data;
+ hr_dev->hw_rev = le16_to_cpu(resp->rocee_hw_version);
+ hr_dev->vendor_id = hr_dev->pci_dev->vendor;
+
+ return 0;
+}
+
+static void func_clr_hw_resetting_state(struct hns_roce_dev *hr_dev,
+ struct hnae3_handle *handle)
+{
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ unsigned long end;
+
+ hr_dev->dis_db = true;
+
+ dev_warn(hr_dev->dev,
+ "func clear is pending, device in resetting state.\n");
+ end = HNS_ROCE_V2_HW_RST_TIMEOUT;
+ while (end) {
+ if (!ops->get_hw_reset_stat(handle)) {
+ hr_dev->is_reset = true;
+ dev_info(hr_dev->dev,
+ "func clear success after reset.\n");
+ return;
+ }
+ msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT);
+ end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT;
+ }
+
+ dev_warn(hr_dev->dev, "func clear failed.\n");
+}
+
+static void func_clr_sw_resetting_state(struct hns_roce_dev *hr_dev,
+ struct hnae3_handle *handle)
+{
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ unsigned long end;
+
+ hr_dev->dis_db = true;
+
+ dev_warn(hr_dev->dev,
+ "func clear is pending, device in resetting state.\n");
+ end = HNS_ROCE_V2_HW_RST_TIMEOUT;
+ while (end) {
+ if (ops->ae_dev_reset_cnt(handle) !=
+ hr_dev->reset_cnt) {
+ hr_dev->is_reset = true;
+ dev_info(hr_dev->dev,
+ "func clear success after sw reset\n");
+ return;
+ }
+ msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT);
+ end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT;
+ }
+
+ dev_warn(hr_dev->dev, "func clear failed because of unfinished sw reset\n");
+}
+
+static void hns_roce_func_clr_rst_proc(struct hns_roce_dev *hr_dev, int retval,
+ int flag)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ if (ops->ae_dev_reset_cnt(handle) != hr_dev->reset_cnt) {
+ hr_dev->dis_db = true;
+ hr_dev->is_reset = true;
+ dev_info(hr_dev->dev, "func clear success after reset.\n");
+ return;
+ }
+
+ if (ops->get_hw_reset_stat(handle)) {
+ func_clr_hw_resetting_state(hr_dev, handle);
+ return;
+ }
+
+ if (ops->ae_dev_resetting(handle) &&
+ handle->rinfo.instance_state == HNS_ROCE_STATE_INIT) {
+ func_clr_sw_resetting_state(hr_dev, handle);
+ return;
+ }
+
+ if (retval && !flag)
+ dev_warn(hr_dev->dev,
+ "func clear read failed, ret = %d.\n", retval);
+
+ dev_warn(hr_dev->dev, "func clear failed.\n");
+}
+
+static void __hns_roce_function_clear(struct hns_roce_dev *hr_dev, int vf_id)
+{
+ bool fclr_write_fail_flag = false;
+ struct hns_roce_func_clear *resp;
+ struct hns_roce_cmq_desc desc;
+ unsigned long end;
+ int ret = 0;
+
+ if (check_device_is_in_reset(hr_dev))
+ goto out;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR, false);
+ resp = (struct hns_roce_func_clear *)desc.data;
+ resp->rst_funcid_en = cpu_to_le32(vf_id);
+
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ fclr_write_fail_flag = true;
+ dev_err(hr_dev->dev, "func clear write failed, ret = %d.\n",
+ ret);
+ goto out;
+ }
+
+ msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL);
+ end = HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS;
+ while (end) {
+ if (check_device_is_in_reset(hr_dev))
+ goto out;
+ msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT);
+ end -= HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR,
+ true);
+
+ resp->rst_funcid_en = cpu_to_le32(vf_id);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ continue;
+
+ if (hr_reg_read(resp, FUNC_CLEAR_RST_FUN_DONE)) {
+ if (vf_id == 0)
+ hr_dev->is_reset = true;
+ return;
+ }
+ }
+
+out:
+ hns_roce_func_clr_rst_proc(hr_dev, ret, fclr_write_fail_flag);
+}
+
+static int hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id)
+{
+ enum hns_roce_opcode_type opcode = HNS_ROCE_OPC_ALLOC_VF_RES;
+ struct hns_roce_cmq_desc desc[2];
+ struct hns_roce_cmq_req *req_a;
+
+ req_a = (struct hns_roce_cmq_req *)desc[0].data;
+ hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false);
+ hr_reg_write(req_a, FUNC_RES_A_VF_ID, vf_id);
+
+ return hns_roce_cmq_send(hr_dev, desc, 2);
+}
+
+static void hns_roce_function_clear(struct hns_roce_dev *hr_dev)
+{
+ int ret;
+ int i;
+
+ if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
+ return;
+
+ for (i = hr_dev->func_num - 1; i >= 0; i--) {
+ __hns_roce_function_clear(hr_dev, i);
+
+ if (i == 0)
+ continue;
+
+ ret = hns_roce_free_vf_resource(hr_dev, i);
+ if (ret)
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to free vf resource, vf_id = %d, ret = %d.\n",
+ i, ret);
+ }
+}
+
+static int hns_roce_clear_extdb_list_info(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CLEAR_EXTDB_LIST_INFO,
+ false);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to clear extended doorbell info, ret = %d.\n",
+ ret);
+
+ return ret;
+}
+
+static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_query_fw_info *resp;
+ struct hns_roce_cmq_desc desc;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_QUERY_FW_VER, true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ resp = (struct hns_roce_query_fw_info *)desc.data;
+ hr_dev->caps.fw_ver = (u64)(le32_to_cpu(resp->fw_ver));
+
+ return 0;
+}
+
+static int hns_roce_query_func_info(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ int ret;
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ hr_dev->func_num = 1;
+ return 0;
+ }
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_FUNC_INFO,
+ true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ hr_dev->func_num = 1;
+ return ret;
+ }
+
+ hr_dev->func_num = le32_to_cpu(desc.func_info.own_func_num);
+ hr_dev->cong_algo_tmpl_id = le32_to_cpu(desc.func_info.own_mac_id);
+
+ return 0;
+}
+
+static int hns_roce_hw_v2_query_counter(struct hns_roce_dev *hr_dev,
+ u64 *stats, u32 port, int *num_counters)
+{
+#define CNT_PER_DESC 3
+ struct hns_roce_cmq_desc *desc;
+ int bd_idx, cnt_idx;
+ __le64 *cnt_data;
+ int desc_num;
+ int ret;
+ int i;
+
+ if (port > hr_dev->caps.num_ports)
+ return -EINVAL;
+
+ desc_num = DIV_ROUND_UP(HNS_ROCE_HW_CNT_TOTAL, CNT_PER_DESC);
+ desc = kcalloc(desc_num, sizeof(*desc), GFP_KERNEL);
+ if (!desc)
+ return -ENOMEM;
+
+ for (i = 0; i < desc_num; i++) {
+ hns_roce_cmq_setup_basic_desc(&desc[i],
+ HNS_ROCE_OPC_QUERY_COUNTER, true);
+ if (i != desc_num - 1)
+ desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ }
+
+ ret = hns_roce_cmq_send(hr_dev, desc, desc_num);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to get counter, ret = %d.\n", ret);
+ goto err_out;
+ }
+
+ for (i = 0; i < HNS_ROCE_HW_CNT_TOTAL && i < *num_counters; i++) {
+ bd_idx = i / CNT_PER_DESC;
+ if (bd_idx != HNS_ROCE_HW_CNT_TOTAL / CNT_PER_DESC &&
+ !(desc[bd_idx].flag & cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT)))
+ break;
+
+ cnt_data = (__le64 *)&desc[bd_idx].data[0];
+ cnt_idx = i % CNT_PER_DESC;
+ stats[i] = le64_to_cpu(cnt_data[cnt_idx]);
+ }
+ *num_counters = i;
+
+err_out:
+ kfree(desc);
+ return ret;
+}
+
+static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ u32 clock_cycles_of_1us;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GLOBAL_PARAM,
+ false);
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ clock_cycles_of_1us = HNS_ROCE_1NS_CFG;
+ else
+ clock_cycles_of_1us = HNS_ROCE_1US_CFG;
+
+ hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, clock_cycles_of_1us);
+ hr_reg_write(req, CFG_GLOBAL_PARAM_UDP_PORT, ROCE_V2_UDP_DPORT);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int load_func_res_caps(struct hns_roce_dev *hr_dev, bool is_vf)
+{
+ struct hns_roce_cmq_desc desc[2];
+ struct hns_roce_cmq_req *r_a = (struct hns_roce_cmq_req *)desc[0].data;
+ struct hns_roce_cmq_req *r_b = (struct hns_roce_cmq_req *)desc[1].data;
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ enum hns_roce_opcode_type opcode;
+ u32 func_num;
+ int ret;
+
+ if (is_vf) {
+ opcode = HNS_ROCE_OPC_QUERY_VF_RES;
+ func_num = 1;
+ } else {
+ opcode = HNS_ROCE_OPC_QUERY_PF_RES;
+ func_num = hr_dev->func_num;
+ }
+
+ hns_roce_cmq_setup_basic_desc(&desc[0], opcode, true);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ hns_roce_cmq_setup_basic_desc(&desc[1], opcode, true);
+
+ ret = hns_roce_cmq_send(hr_dev, desc, 2);
+ if (ret)
+ return ret;
+
+ caps->qpc_bt_num = hr_reg_read(r_a, FUNC_RES_A_QPC_BT_NUM) / func_num;
+ caps->srqc_bt_num = hr_reg_read(r_a, FUNC_RES_A_SRQC_BT_NUM) / func_num;
+ caps->cqc_bt_num = hr_reg_read(r_a, FUNC_RES_A_CQC_BT_NUM) / func_num;
+ caps->mpt_bt_num = hr_reg_read(r_a, FUNC_RES_A_MPT_BT_NUM) / func_num;
+ caps->eqc_bt_num = hr_reg_read(r_a, FUNC_RES_A_EQC_BT_NUM) / func_num;
+ caps->smac_bt_num = hr_reg_read(r_b, FUNC_RES_B_SMAC_NUM) / func_num;
+ caps->sgid_bt_num = hr_reg_read(r_b, FUNC_RES_B_SGID_NUM) / func_num;
+ caps->sccc_bt_num = hr_reg_read(r_b, FUNC_RES_B_SCCC_BT_NUM) / func_num;
+
+ if (is_vf) {
+ caps->sl_num = hr_reg_read(r_b, FUNC_RES_V_QID_NUM) / func_num;
+ caps->gmv_bt_num = hr_reg_read(r_b, FUNC_RES_V_GMV_BT_NUM) /
+ func_num;
+ } else {
+ caps->sl_num = hr_reg_read(r_b, FUNC_RES_B_QID_NUM) / func_num;
+ caps->gmv_bt_num = hr_reg_read(r_b, FUNC_RES_B_GMV_BT_NUM) /
+ func_num;
+ }
+
+ return 0;
+}
+
+static int load_pf_timer_res_caps(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_PF_TIMER_RES,
+ true);
+
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ caps->qpc_timer_bt_num = hr_reg_read(req, PF_TIMER_RES_QPC_ITEM_NUM);
+ caps->cqc_timer_bt_num = hr_reg_read(req, PF_TIMER_RES_CQC_ITEM_NUM);
+
+ return 0;
+}
+
+static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = load_func_res_caps(hr_dev, false);
+ if (ret) {
+ dev_err(dev, "failed to load pf res caps, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = load_pf_timer_res_caps(hr_dev);
+ if (ret)
+ dev_err(dev, "failed to load pf timer resource, ret = %d.\n",
+ ret);
+
+ return ret;
+}
+
+static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = load_func_res_caps(hr_dev, true);
+ if (ret)
+ dev_err(dev, "failed to load vf res caps, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
+ u32 vf_id)
+{
+ struct hns_roce_vf_switch *swt;
+ struct hns_roce_cmq_desc desc;
+ int ret;
+
+ swt = (struct hns_roce_vf_switch *)desc.data;
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true);
+ swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
+ hr_reg_write(swt, VF_SWITCH_VF_ID, vf_id);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ desc.flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN);
+ desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
+ hr_reg_enable(swt, VF_SWITCH_ALW_LPBK);
+ hr_reg_clear(swt, VF_SWITCH_ALW_LCL_LPBK);
+ hr_reg_enable(swt, VF_SWITCH_ALW_DST_OVRD);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev)
+{
+ u32 vf_id;
+ int ret;
+
+ for (vf_id = 0; vf_id < hr_dev->func_num; vf_id++) {
+ ret = __hns_roce_set_vf_switch_param(hr_dev, vf_id);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int config_vf_hem_resource(struct hns_roce_dev *hr_dev, int vf_id)
+{
+ struct hns_roce_cmq_desc desc[2];
+ struct hns_roce_cmq_req *r_a = (struct hns_roce_cmq_req *)desc[0].data;
+ struct hns_roce_cmq_req *r_b = (struct hns_roce_cmq_req *)desc[1].data;
+ enum hns_roce_opcode_type opcode = HNS_ROCE_OPC_ALLOC_VF_RES;
+ struct hns_roce_caps *caps = &hr_dev->caps;
+
+ hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false);
+
+ hr_reg_write(r_a, FUNC_RES_A_VF_ID, vf_id);
+
+ hr_reg_write(r_a, FUNC_RES_A_QPC_BT_NUM, caps->qpc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_QPC_BT_IDX, vf_id * caps->qpc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_SRQC_BT_NUM, caps->srqc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_SRQC_BT_IDX, vf_id * caps->srqc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_CQC_BT_NUM, caps->cqc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_CQC_BT_IDX, vf_id * caps->cqc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_MPT_BT_NUM, caps->mpt_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_MPT_BT_IDX, vf_id * caps->mpt_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_EQC_BT_NUM, caps->eqc_bt_num);
+ hr_reg_write(r_a, FUNC_RES_A_EQC_BT_IDX, vf_id * caps->eqc_bt_num);
+ hr_reg_write(r_b, FUNC_RES_V_QID_NUM, caps->sl_num);
+ hr_reg_write(r_b, FUNC_RES_B_QID_IDX, vf_id * caps->sl_num);
+ hr_reg_write(r_b, FUNC_RES_B_SCCC_BT_NUM, caps->sccc_bt_num);
+ hr_reg_write(r_b, FUNC_RES_B_SCCC_BT_IDX, vf_id * caps->sccc_bt_num);
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ hr_reg_write(r_b, FUNC_RES_V_GMV_BT_NUM, caps->gmv_bt_num);
+ hr_reg_write(r_b, FUNC_RES_B_GMV_BT_IDX,
+ vf_id * caps->gmv_bt_num);
+ } else {
+ hr_reg_write(r_b, FUNC_RES_B_SGID_NUM, caps->sgid_bt_num);
+ hr_reg_write(r_b, FUNC_RES_B_SGID_IDX,
+ vf_id * caps->sgid_bt_num);
+ hr_reg_write(r_b, FUNC_RES_B_SMAC_NUM, caps->smac_bt_num);
+ hr_reg_write(r_b, FUNC_RES_B_SMAC_IDX,
+ vf_id * caps->smac_bt_num);
+ }
+
+ return hns_roce_cmq_send(hr_dev, desc, 2);
+}
+
+static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
+{
+ u32 func_num = max_t(u32, 1, hr_dev->func_num);
+ u32 vf_id;
+ int ret;
+
+ for (vf_id = 0; vf_id < func_num; vf_id++) {
+ ret = config_vf_hem_resource(hr_dev, vf_id);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to config vf-%u hem res, ret = %d.\n",
+ vf_id, ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ struct hns_roce_caps *caps = &hr_dev->caps;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_BT_ATTR, false);
+
+ hr_reg_write(req, CFG_BT_ATTR_QPC_BA_PGSZ,
+ caps->qpc_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_QPC_BUF_PGSZ,
+ caps->qpc_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_QPC_HOPNUM,
+ to_hr_hem_hopnum(caps->qpc_hop_num, caps->num_qps));
+
+ hr_reg_write(req, CFG_BT_ATTR_SRQC_BA_PGSZ,
+ caps->srqc_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_SRQC_BUF_PGSZ,
+ caps->srqc_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_SRQC_HOPNUM,
+ to_hr_hem_hopnum(caps->srqc_hop_num, caps->num_srqs));
+
+ hr_reg_write(req, CFG_BT_ATTR_CQC_BA_PGSZ,
+ caps->cqc_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_CQC_BUF_PGSZ,
+ caps->cqc_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_CQC_HOPNUM,
+ to_hr_hem_hopnum(caps->cqc_hop_num, caps->num_cqs));
+
+ hr_reg_write(req, CFG_BT_ATTR_MPT_BA_PGSZ,
+ caps->mpt_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_MPT_BUF_PGSZ,
+ caps->mpt_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_MPT_HOPNUM,
+ to_hr_hem_hopnum(caps->mpt_hop_num, caps->num_mtpts));
+
+ hr_reg_write(req, CFG_BT_ATTR_SCCC_BA_PGSZ,
+ caps->sccc_ba_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_SCCC_BUF_PGSZ,
+ caps->sccc_buf_pg_sz + PG_SHIFT_OFFSET);
+ hr_reg_write(req, CFG_BT_ATTR_SCCC_HOPNUM,
+ to_hr_hem_hopnum(caps->sccc_hop_num, caps->num_qps));
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static void calc_pg_sz(u32 obj_num, u32 obj_size, u32 hop_num, u32 ctx_bt_num,
+ u32 *buf_page_size, u32 *bt_page_size, u32 hem_type)
+{
+ u64 obj_per_chunk;
+ u64 bt_chunk_size = PAGE_SIZE;
+ u64 buf_chunk_size = PAGE_SIZE;
+ u64 obj_per_chunk_default = buf_chunk_size / obj_size;
+
+ *buf_page_size = 0;
+ *bt_page_size = 0;
+
+ switch (hop_num) {
+ case 3:
+ obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) *
+ (bt_chunk_size / BA_BYTE_LEN) *
+ (bt_chunk_size / BA_BYTE_LEN) *
+ obj_per_chunk_default;
+ break;
+ case 2:
+ obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) *
+ (bt_chunk_size / BA_BYTE_LEN) *
+ obj_per_chunk_default;
+ break;
+ case 1:
+ obj_per_chunk = ctx_bt_num * (bt_chunk_size / BA_BYTE_LEN) *
+ obj_per_chunk_default;
+ break;
+ case HNS_ROCE_HOP_NUM_0:
+ obj_per_chunk = ctx_bt_num * obj_per_chunk_default;
+ break;
+ default:
+ pr_err("table %u not support hop_num = %u!\n", hem_type,
+ hop_num);
+ return;
+ }
+
+ if (hem_type >= HEM_TYPE_MTT)
+ *bt_page_size = ilog2(DIV_ROUND_UP(obj_num, obj_per_chunk));
+ else
+ *buf_page_size = ilog2(DIV_ROUND_UP(obj_num, obj_per_chunk));
+}
+
+static void set_hem_page_size(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_caps *caps = &hr_dev->caps;
+
+ /* EQ */
+ caps->eqe_ba_pg_sz = 0;
+ caps->eqe_buf_pg_sz = 0;
+
+ /* Link Table */
+ caps->llm_buf_pg_sz = 0;
+
+ /* MR */
+ caps->mpt_ba_pg_sz = 0;
+ caps->mpt_buf_pg_sz = 0;
+ caps->pbl_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_16K;
+ caps->pbl_buf_pg_sz = 0;
+ calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num,
+ caps->mpt_bt_num, &caps->mpt_buf_pg_sz, &caps->mpt_ba_pg_sz,
+ HEM_TYPE_MTPT);
+
+ /* QP */
+ caps->qpc_ba_pg_sz = 0;
+ caps->qpc_buf_pg_sz = 0;
+ caps->qpc_timer_ba_pg_sz = 0;
+ caps->qpc_timer_buf_pg_sz = 0;
+ caps->sccc_ba_pg_sz = 0;
+ caps->sccc_buf_pg_sz = 0;
+ caps->mtt_ba_pg_sz = 0;
+ caps->mtt_buf_pg_sz = 0;
+ calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num,
+ caps->qpc_bt_num, &caps->qpc_buf_pg_sz, &caps->qpc_ba_pg_sz,
+ HEM_TYPE_QPC);
+
+ if (caps->flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL)
+ calc_pg_sz(caps->num_qps, caps->sccc_sz, caps->sccc_hop_num,
+ caps->sccc_bt_num, &caps->sccc_buf_pg_sz,
+ &caps->sccc_ba_pg_sz, HEM_TYPE_SCCC);
+
+ /* CQ */
+ caps->cqc_ba_pg_sz = 0;
+ caps->cqc_buf_pg_sz = 0;
+ caps->cqc_timer_ba_pg_sz = 0;
+ caps->cqc_timer_buf_pg_sz = 0;
+ caps->cqe_ba_pg_sz = HNS_ROCE_BA_PG_SZ_SUPPORTED_256K;
+ caps->cqe_buf_pg_sz = 0;
+ calc_pg_sz(caps->num_cqs, caps->cqc_entry_sz, caps->cqc_hop_num,
+ caps->cqc_bt_num, &caps->cqc_buf_pg_sz, &caps->cqc_ba_pg_sz,
+ HEM_TYPE_CQC);
+ calc_pg_sz(caps->max_cqes, caps->cqe_sz, caps->cqe_hop_num,
+ 1, &caps->cqe_buf_pg_sz, &caps->cqe_ba_pg_sz, HEM_TYPE_CQE);
+
+ /* SRQ */
+ if (caps->flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ caps->srqc_ba_pg_sz = 0;
+ caps->srqc_buf_pg_sz = 0;
+ caps->srqwqe_ba_pg_sz = 0;
+ caps->srqwqe_buf_pg_sz = 0;
+ caps->idx_ba_pg_sz = 0;
+ caps->idx_buf_pg_sz = 0;
+ calc_pg_sz(caps->num_srqs, caps->srqc_entry_sz,
+ caps->srqc_hop_num, caps->srqc_bt_num,
+ &caps->srqc_buf_pg_sz, &caps->srqc_ba_pg_sz,
+ HEM_TYPE_SRQC);
+ calc_pg_sz(caps->num_srqwqe_segs, caps->mtt_entry_sz,
+ caps->srqwqe_hop_num, 1, &caps->srqwqe_buf_pg_sz,
+ &caps->srqwqe_ba_pg_sz, HEM_TYPE_SRQWQE);
+ calc_pg_sz(caps->num_idx_segs, caps->idx_entry_sz,
+ caps->idx_hop_num, 1, &caps->idx_buf_pg_sz,
+ &caps->idx_ba_pg_sz, HEM_TYPE_IDX);
+ }
+
+ /* GMV */
+ caps->gmv_ba_pg_sz = 0;
+ caps->gmv_buf_pg_sz = 0;
+}
+
+/* Apply all loaded caps before setting to hardware */
+static void apply_func_caps(struct hns_roce_dev *hr_dev)
+{
+#define MAX_GID_TBL_LEN 256
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+
+ /* The following configurations don't need to be got from firmware. */
+ caps->qpc_timer_entry_sz = HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ;
+ caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ;
+ caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
+
+ caps->pbl_hop_num = HNS_ROCE_PBL_HOP_NUM;
+ caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+ caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
+
+ caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
+ caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS;
+
+ if (!caps->num_comp_vectors)
+ caps->num_comp_vectors =
+ min_t(u32, caps->eqc_bt_num - HNS_ROCE_V2_AEQE_VEC_NUM,
+ (u32)priv->handle->rinfo.num_vectors -
+ (HNS_ROCE_V2_AEQE_VEC_NUM + HNS_ROCE_V2_ABNORMAL_VEC_NUM));
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ caps->eqe_hop_num = HNS_ROCE_V3_EQE_HOP_NUM;
+ caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE;
+ caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE;
+
+ /* The following configurations will be overwritten */
+ caps->qpc_sz = HNS_ROCE_V3_QPC_SZ;
+ caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE;
+ caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ;
+
+ /* The following configurations are not got from firmware */
+ caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ;
+
+ caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0;
+
+ /* It's meaningless to support excessively large gid_table_len,
+ * as the type of sgid_index in kernel struct ib_global_route
+ * and userspace struct ibv_global_route are u8/uint8_t (0-255).
+ */
+ caps->gid_table_len[0] = min_t(u32, MAX_GID_TBL_LEN,
+ caps->gmv_bt_num *
+ (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz));
+
+ caps->gmv_entry_num = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE /
+ caps->gmv_entry_sz);
+ } else {
+ u32 func_num = max_t(u32, 1, hr_dev->func_num);
+
+ caps->eqe_hop_num = HNS_ROCE_V2_EQE_HOP_NUM;
+ caps->ceqe_size = HNS_ROCE_CEQE_SIZE;
+ caps->aeqe_size = HNS_ROCE_AEQE_SIZE;
+ caps->gid_table_len[0] /= func_num;
+ }
+
+ if (hr_dev->is_vf) {
+ caps->default_aeq_arm_st = 0x3;
+ caps->default_ceq_arm_st = 0x3;
+ caps->default_ceq_max_cnt = 0x1;
+ caps->default_ceq_period = 0x10;
+ caps->default_aeq_max_cnt = 0x1;
+ caps->default_aeq_period = 0x10;
+ }
+
+ set_hem_page_size(hr_dev);
+}
+
+static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc[HNS_ROCE_QUERY_PF_CAPS_CMD_NUM] = {};
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ struct hns_roce_query_pf_caps_a *resp_a;
+ struct hns_roce_query_pf_caps_b *resp_b;
+ struct hns_roce_query_pf_caps_c *resp_c;
+ struct hns_roce_query_pf_caps_d *resp_d;
+ struct hns_roce_query_pf_caps_e *resp_e;
+ struct hns_roce_query_pf_caps_f *resp_f;
+ enum hns_roce_opcode_type cmd;
+ int ctx_hop_num;
+ int pbl_hop_num;
+ int cmd_num;
+ int ret;
+ int i;
+
+ cmd = hr_dev->is_vf ? HNS_ROCE_OPC_QUERY_VF_CAPS_NUM :
+ HNS_ROCE_OPC_QUERY_PF_CAPS_NUM;
+ cmd_num = hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ?
+ HNS_ROCE_QUERY_PF_CAPS_CMD_NUM_HIP08 :
+ HNS_ROCE_QUERY_PF_CAPS_CMD_NUM;
+
+ for (i = 0; i < cmd_num - 1; i++) {
+ hns_roce_cmq_setup_basic_desc(&desc[i], cmd, true);
+ desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ }
+
+ hns_roce_cmq_setup_basic_desc(&desc[cmd_num - 1], cmd, true);
+ desc[cmd_num - 1].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+
+ ret = hns_roce_cmq_send(hr_dev, desc, cmd_num);
+ if (ret)
+ return ret;
+
+ resp_a = (struct hns_roce_query_pf_caps_a *)desc[0].data;
+ resp_b = (struct hns_roce_query_pf_caps_b *)desc[1].data;
+ resp_c = (struct hns_roce_query_pf_caps_c *)desc[2].data;
+ resp_d = (struct hns_roce_query_pf_caps_d *)desc[3].data;
+ resp_e = (struct hns_roce_query_pf_caps_e *)desc[4].data;
+ resp_f = (struct hns_roce_query_pf_caps_f *)desc[5].data;
+
+ caps->local_ca_ack_delay = resp_a->local_ca_ack_delay;
+ caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg);
+ caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline);
+ caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg);
+ caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg);
+ caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges);
+ caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges);
+ caps->num_aeq_vectors = resp_a->num_aeq_vectors;
+ caps->num_other_vectors = resp_a->num_other_vectors;
+ caps->max_sq_desc_sz = resp_a->max_sq_desc_sz;
+ caps->max_rq_desc_sz = resp_a->max_rq_desc_sz;
+
+ caps->mtpt_entry_sz = resp_b->mtpt_entry_sz;
+ caps->irrl_entry_sz = resp_b->irrl_entry_sz;
+ caps->trrl_entry_sz = resp_b->trrl_entry_sz;
+ caps->cqc_entry_sz = resp_b->cqc_entry_sz;
+ caps->srqc_entry_sz = resp_b->srqc_entry_sz;
+ caps->idx_entry_sz = resp_b->idx_entry_sz;
+ caps->sccc_sz = resp_b->sccc_sz;
+ caps->max_mtu = resp_b->max_mtu;
+ caps->min_cqes = resp_b->min_cqes;
+ caps->min_wqes = resp_b->min_wqes;
+ caps->page_size_cap = le32_to_cpu(resp_b->page_size_cap);
+ caps->pkey_table_len[0] = resp_b->pkey_table_len;
+ caps->phy_num_uars = resp_b->phy_num_uars;
+ ctx_hop_num = resp_b->ctx_hop_num;
+ pbl_hop_num = resp_b->pbl_hop_num;
+
+ caps->num_pds = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_PDS);
+
+ caps->flags = hr_reg_read(resp_c, PF_CAPS_C_CAP_FLAGS);
+ caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) <<
+ HNS_ROCE_CAP_FLAGS_EX_SHIFT;
+
+ if (hr_dev->is_vf)
+ caps->flags &= ~HNS_ROCE_CAP_FLAG_BOND;
+
+ caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS);
+ caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID);
+ caps->max_cqes = 1 << hr_reg_read(resp_c, PF_CAPS_C_CQ_DEPTH);
+ caps->num_xrcds = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_XRCDS);
+ caps->num_mtpts = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_MRWS);
+ caps->num_qps = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_QPS);
+ caps->max_qp_init_rdma = hr_reg_read(resp_c, PF_CAPS_C_MAX_ORD);
+ caps->max_qp_dest_rdma = caps->max_qp_init_rdma;
+ caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth);
+
+ caps->num_srqs = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_SRQS);
+ caps->cong_cap = hr_reg_read(resp_d, PF_CAPS_D_CONG_CAP);
+ caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth);
+ caps->ceqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_CEQ_DEPTH);
+ caps->num_comp_vectors = hr_reg_read(resp_d, PF_CAPS_D_NUM_CEQS);
+ caps->aeqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_AEQ_DEPTH);
+ caps->default_cong_type = hr_reg_read(resp_d, PF_CAPS_D_DEFAULT_ALG);
+ caps->reserved_pds = hr_reg_read(resp_d, PF_CAPS_D_RSV_PDS);
+ caps->num_uars = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_UARS);
+ caps->reserved_qps = hr_reg_read(resp_d, PF_CAPS_D_RSV_QPS);
+ caps->reserved_uars = hr_reg_read(resp_d, PF_CAPS_D_RSV_UARS);
+
+ caps->reserved_mrws = hr_reg_read(resp_e, PF_CAPS_E_RSV_MRWS);
+ caps->chunk_sz = 1 << hr_reg_read(resp_e, PF_CAPS_E_CHUNK_SIZE_SHIFT);
+ caps->reserved_cqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_CQS);
+ caps->reserved_xrcds = hr_reg_read(resp_e, PF_CAPS_E_RSV_XRCDS);
+ caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS);
+ caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS);
+
+ caps->max_ack_req_msg_len = le32_to_cpu(resp_f->max_ack_req_msg_len);
+
+ caps->qpc_hop_num = ctx_hop_num;
+ caps->sccc_hop_num = ctx_hop_num;
+ caps->srqc_hop_num = ctx_hop_num;
+ caps->cqc_hop_num = ctx_hop_num;
+ caps->mpt_hop_num = ctx_hop_num;
+ caps->mtt_hop_num = pbl_hop_num;
+ caps->cqe_hop_num = pbl_hop_num;
+ caps->srqwqe_hop_num = pbl_hop_num;
+ caps->idx_hop_num = pbl_hop_num;
+ caps->wqe_sq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_SQWQE_HOP_NUM);
+ caps->wqe_sge_hop_num = hr_reg_read(resp_d, PF_CAPS_D_EX_SGE_HOP_NUM);
+ caps->wqe_rq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_RQWQE_HOP_NUM);
+
+ if (!(caps->page_size_cap & PAGE_SIZE))
+ caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
+
+ if (!hr_dev->is_vf) {
+ caps->cqe_sz = resp_a->cqe_sz;
+ caps->qpc_sz = le16_to_cpu(resp_b->qpc_sz);
+ caps->default_aeq_arm_st =
+ hr_reg_read(resp_d, PF_CAPS_D_AEQ_ARM_ST);
+ caps->default_ceq_arm_st =
+ hr_reg_read(resp_d, PF_CAPS_D_CEQ_ARM_ST);
+ caps->default_ceq_max_cnt = le16_to_cpu(resp_e->ceq_max_cnt);
+ caps->default_ceq_period = le16_to_cpu(resp_e->ceq_period);
+ caps->default_aeq_max_cnt = le16_to_cpu(resp_e->aeq_max_cnt);
+ caps->default_aeq_period = le16_to_cpu(resp_e->aeq_period);
+ }
+
+ return 0;
+}
+
+static int config_hem_entry_size(struct hns_roce_dev *hr_dev, u32 type, u32 val)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_ENTRY_SIZE,
+ false);
+
+ hr_reg_write(req, CFG_HEM_ENTRY_SIZE_TYPE, type);
+ hr_reg_write(req, CFG_HEM_ENTRY_SIZE_VALUE, val);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int hns_roce_config_entry_size(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ int ret;
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ return 0;
+
+ ret = config_hem_entry_size(hr_dev, HNS_ROCE_CFG_QPC_SIZE,
+ caps->qpc_sz);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to cfg qpc sz, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = config_hem_entry_size(hr_dev, HNS_ROCE_CFG_SCCC_SIZE,
+ caps->sccc_sz);
+ if (ret)
+ dev_err(hr_dev->dev, "failed to cfg sccc sz, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static int hns_roce_v2_vf_profile(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ hr_dev->func_num = 1;
+
+ ret = hns_roce_query_caps(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query VF caps, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_query_vf_resource(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query VF resource, ret = %d.\n", ret);
+ return ret;
+ }
+
+ apply_func_caps(hr_dev);
+
+ ret = hns_roce_v2_set_bt(hr_dev);
+ if (ret)
+ dev_err(dev, "failed to config VF BA table, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static int hns_roce_v2_pf_profile(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = hns_roce_query_func_info(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query func info, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_config_global_param(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to config global param, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_set_vf_switch_param(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to set switch param, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_query_caps(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query PF caps, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_query_pf_resource(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query pf resource, ret = %d.\n", ret);
+ return ret;
+ }
+
+ apply_func_caps(hr_dev);
+
+ ret = hns_roce_alloc_vf_resource(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to alloc vf resource, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_v2_set_bt(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to config BA table, ret = %d.\n", ret);
+ return ret;
+ }
+
+ /* Configure the size of QPC, SCCC, etc. */
+ return hns_roce_config_entry_size(hr_dev);
+}
+
+static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ ret = hns_roce_cmq_query_hw_info(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query hardware info, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_query_fw_ver(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to query firmware info, ret = %d.\n", ret);
+ return ret;
+ }
+
+ hr_dev->vendor_part_id = hr_dev->pci_dev->device;
+ hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
+
+ if (hr_dev->is_vf)
+ return hns_roce_v2_vf_profile(hr_dev);
+ else
+ return hns_roce_v2_pf_profile(hr_dev);
+}
+
+static void config_llm_table(struct hns_roce_buf *data_buf, void *cfg_buf)
+{
+ u32 i, next_ptr, page_num;
+ __le64 *entry = cfg_buf;
+ dma_addr_t addr;
+ u64 val;
+
+ page_num = data_buf->npages;
+ for (i = 0; i < page_num; i++) {
+ addr = hns_roce_buf_page(data_buf, i);
+ if (i == (page_num - 1))
+ next_ptr = 0;
+ else
+ next_ptr = i + 1;
+
+ val = HNS_ROCE_EXT_LLM_ENTRY(addr, (u64)next_ptr);
+ entry[i] = cpu_to_le64(val);
+ }
+}
+
+static int set_llm_cfg_to_hw(struct hns_roce_dev *hr_dev,
+ struct hns_roce_link_table *table)
+{
+ struct hns_roce_cmq_desc desc[2];
+ struct hns_roce_cmq_req *r_a = (struct hns_roce_cmq_req *)desc[0].data;
+ struct hns_roce_cmq_req *r_b = (struct hns_roce_cmq_req *)desc[1].data;
+ struct hns_roce_buf *buf = table->buf;
+ enum hns_roce_opcode_type opcode;
+ dma_addr_t addr;
+
+ opcode = HNS_ROCE_OPC_CFG_EXT_LLM;
+ hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false);
+
+ hr_reg_write(r_a, CFG_LLM_A_BA_L, lower_32_bits(table->table.map));
+ hr_reg_write(r_a, CFG_LLM_A_BA_H, upper_32_bits(table->table.map));
+ hr_reg_write(r_a, CFG_LLM_A_DEPTH, buf->npages);
+ hr_reg_write(r_a, CFG_LLM_A_PGSZ, to_hr_hw_page_shift(buf->page_shift));
+ hr_reg_enable(r_a, CFG_LLM_A_INIT_EN);
+
+ addr = to_hr_hw_page_addr(hns_roce_buf_page(buf, 0));
+ hr_reg_write(r_a, CFG_LLM_A_HEAD_BA_L, lower_32_bits(addr));
+ hr_reg_write(r_a, CFG_LLM_A_HEAD_BA_H, upper_32_bits(addr));
+ hr_reg_write(r_a, CFG_LLM_A_HEAD_NXTPTR, 1);
+ hr_reg_write(r_a, CFG_LLM_A_HEAD_PTR, 0);
+
+ addr = to_hr_hw_page_addr(hns_roce_buf_page(buf, buf->npages - 1));
+ hr_reg_write(r_b, CFG_LLM_B_TAIL_BA_L, lower_32_bits(addr));
+ hr_reg_write(r_b, CFG_LLM_B_TAIL_BA_H, upper_32_bits(addr));
+ hr_reg_write(r_b, CFG_LLM_B_TAIL_PTR, buf->npages - 1);
+
+ return hns_roce_cmq_send(hr_dev, desc, 2);
+}
+
+static struct hns_roce_link_table *
+alloc_link_table_buf(struct hns_roce_dev *hr_dev)
+{
+ u16 total_sl = hr_dev->caps.sl_num * hr_dev->func_num;
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_link_table *link_tbl;
+ u32 pg_shift, size, min_size;
+
+ link_tbl = &priv->ext_llm;
+ pg_shift = hr_dev->caps.llm_buf_pg_sz + PAGE_SHIFT;
+ size = hr_dev->caps.num_qps * hr_dev->func_num *
+ HNS_ROCE_V2_EXT_LLM_ENTRY_SZ;
+ min_size = HNS_ROCE_EXT_LLM_MIN_PAGES(total_sl) << pg_shift;
+
+ /* Alloc data table */
+ size = max(size, min_size);
+ link_tbl->buf = hns_roce_buf_alloc(hr_dev, size, pg_shift, 0);
+ if (IS_ERR(link_tbl->buf))
+ return ERR_PTR(-ENOMEM);
+
+ /* Alloc config table */
+ size = link_tbl->buf->npages * sizeof(u64);
+ link_tbl->table.buf = dma_alloc_coherent(hr_dev->dev, size,
+ &link_tbl->table.map,
+ GFP_KERNEL);
+ if (!link_tbl->table.buf) {
+ hns_roce_buf_free(hr_dev, link_tbl->buf);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return link_tbl;
+}
+
+static void free_link_table_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_link_table *tbl)
+{
+ if (tbl->buf) {
+ u32 size = tbl->buf->npages * sizeof(u64);
+
+ dma_free_coherent(hr_dev->dev, size, tbl->table.buf,
+ tbl->table.map);
+ }
+
+ hns_roce_buf_free(hr_dev, tbl->buf);
+}
+
+static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_link_table *link_tbl;
+ int ret;
+
+ link_tbl = alloc_link_table_buf(hr_dev);
+ if (IS_ERR(link_tbl))
+ return -ENOMEM;
+
+ if (WARN_ON(link_tbl->buf->npages > HNS_ROCE_V2_EXT_LLM_MAX_DEPTH)) {
+ ret = -EINVAL;
+ goto err_alloc;
+ }
+
+ config_llm_table(link_tbl->buf, link_tbl->table.buf);
+ ret = set_llm_cfg_to_hw(hr_dev, link_tbl);
+ if (ret)
+ goto err_alloc;
+
+ return 0;
+
+err_alloc:
+ free_link_table_buf(hr_dev, link_tbl);
+ return ret;
+}
+
+static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+
+ free_link_table_buf(hr_dev, &priv->ext_llm);
+}
+
+static void free_dip_entry(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_dip *hr_dip;
+ unsigned long idx;
+
+ xa_lock(&hr_dev->qp_table.dip_xa);
+
+ xa_for_each(&hr_dev->qp_table.dip_xa, idx, hr_dip) {
+ __xa_erase(&hr_dev->qp_table.dip_xa, hr_dip->dip_idx);
+ kfree(hr_dip);
+ }
+
+ xa_unlock(&hr_dev->qp_table.dip_xa);
+}
+
+static struct ib_pd *free_mr_init_pd(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_pd *hr_pd;
+ struct ib_pd *pd;
+
+ hr_pd = kzalloc(sizeof(*hr_pd), GFP_KERNEL);
+ if (!hr_pd)
+ return NULL;
+ pd = &hr_pd->ibpd;
+ pd->device = ibdev;
+
+ if (hns_roce_alloc_pd(pd, NULL)) {
+ ibdev_err(ibdev, "failed to create pd for free mr.\n");
+ kfree(hr_pd);
+ return NULL;
+ }
+ free_mr->rsv_pd = to_hr_pd(pd);
+ free_mr->rsv_pd->ibpd.device = &hr_dev->ib_dev;
+ free_mr->rsv_pd->ibpd.uobject = NULL;
+ free_mr->rsv_pd->ibpd.__internal_mr = NULL;
+ atomic_set(&free_mr->rsv_pd->ibpd.usecnt, 0);
+
+ return pd;
+}
+
+static struct ib_cq *free_mr_init_cq(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct ib_cq_init_attr cq_init_attr = {};
+ struct hns_roce_cq *hr_cq;
+ struct ib_cq *cq;
+
+ cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM;
+
+ hr_cq = kzalloc(sizeof(*hr_cq), GFP_KERNEL);
+ if (!hr_cq)
+ return NULL;
+
+ cq = &hr_cq->ib_cq;
+ cq->device = ibdev;
+
+ if (hns_roce_create_cq(cq, &cq_init_attr, NULL)) {
+ ibdev_err(ibdev, "failed to create cq for free mr.\n");
+ kfree(hr_cq);
+ return NULL;
+ }
+ free_mr->rsv_cq = to_hr_cq(cq);
+ free_mr->rsv_cq->ib_cq.device = &hr_dev->ib_dev;
+ free_mr->rsv_cq->ib_cq.uobject = NULL;
+ free_mr->rsv_cq->ib_cq.comp_handler = NULL;
+ free_mr->rsv_cq->ib_cq.event_handler = NULL;
+ free_mr->rsv_cq->ib_cq.cq_context = NULL;
+ atomic_set(&free_mr->rsv_cq->ib_cq.usecnt, 0);
+
+ return cq;
+}
+
+static int free_mr_init_qp(struct hns_roce_dev *hr_dev, struct ib_cq *cq,
+ struct ib_qp_init_attr *init_attr, int i)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_qp *hr_qp;
+ struct ib_qp *qp;
+ int ret;
+
+ hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
+ if (!hr_qp)
+ return -ENOMEM;
+
+ qp = &hr_qp->ibqp;
+ qp->device = ibdev;
+
+ ret = hns_roce_create_qp(qp, init_attr, NULL);
+ if (ret) {
+ ibdev_err(ibdev, "failed to create qp for free mr.\n");
+ kfree(hr_qp);
+ return ret;
+ }
+
+ free_mr->rsv_qp[i] = hr_qp;
+ free_mr->rsv_qp[i]->ibqp.recv_cq = cq;
+ free_mr->rsv_qp[i]->ibqp.send_cq = cq;
+
+ return 0;
+}
+
+static void free_mr_exit(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_qp *qp;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ if (free_mr->rsv_qp[i]) {
+ qp = &free_mr->rsv_qp[i]->ibqp;
+ hns_roce_v2_destroy_qp(qp, NULL);
+ kfree(free_mr->rsv_qp[i]);
+ free_mr->rsv_qp[i] = NULL;
+ }
+ }
+
+ if (free_mr->rsv_cq) {
+ hns_roce_destroy_cq(&free_mr->rsv_cq->ib_cq, NULL);
+ kfree(free_mr->rsv_cq);
+ free_mr->rsv_cq = NULL;
+ }
+
+ if (free_mr->rsv_pd) {
+ hns_roce_dealloc_pd(&free_mr->rsv_pd->ibpd, NULL);
+ kfree(free_mr->rsv_pd);
+ free_mr->rsv_pd = NULL;
+ }
+
+ mutex_destroy(&free_mr->mutex);
+}
+
+static int free_mr_alloc_res(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_qp_init_attr qp_init_attr = {};
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ int ret;
+ int i;
+
+ pd = free_mr_init_pd(hr_dev);
+ if (!pd)
+ return -ENOMEM;
+
+ cq = free_mr_init_cq(hr_dev);
+ if (!cq) {
+ ret = -ENOMEM;
+ goto create_failed_cq;
+ }
+
+ qp_init_attr.qp_type = IB_QPT_RC;
+ qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ qp_init_attr.send_cq = cq;
+ qp_init_attr.recv_cq = cq;
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ qp_init_attr.cap.max_send_wr = HNS_ROCE_FREE_MR_USED_SQWQE_NUM;
+ qp_init_attr.cap.max_send_sge = HNS_ROCE_FREE_MR_USED_SQSGE_NUM;
+ qp_init_attr.cap.max_recv_wr = HNS_ROCE_FREE_MR_USED_RQWQE_NUM;
+ qp_init_attr.cap.max_recv_sge = HNS_ROCE_FREE_MR_USED_RQSGE_NUM;
+
+ ret = free_mr_init_qp(hr_dev, cq, &qp_init_attr, i);
+ if (ret)
+ goto create_failed_qp;
+ }
+
+ return 0;
+
+create_failed_qp:
+ for (i--; i >= 0; i--) {
+ hns_roce_v2_destroy_qp(&free_mr->rsv_qp[i]->ibqp, NULL);
+ kfree(free_mr->rsv_qp[i]);
+ }
+ hns_roce_destroy_cq(cq, NULL);
+ kfree(cq);
+
+create_failed_cq:
+ hns_roce_dealloc_pd(pd, NULL);
+ kfree(pd);
+
+ return ret;
+}
+
+static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
+ struct ib_qp_attr *attr, int sl_num)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_qp *hr_qp;
+ int loopback;
+ int mask;
+ int ret;
+
+ hr_qp = to_hr_qp(&free_mr->rsv_qp[sl_num]->ibqp);
+ hr_qp->free_mr_en = 1;
+ hr_qp->ibqp.device = ibdev;
+ hr_qp->ibqp.qp_type = IB_QPT_RC;
+
+ mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS;
+ attr->qp_state = IB_QPS_INIT;
+ attr->port_num = 1;
+ attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
+ IB_QPS_INIT, NULL);
+ if (ret) {
+ ibdev_err_ratelimited(ibdev, "failed to modify qp to init, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ loopback = hr_dev->loop_idc;
+ /* Set qpc lbi = 1 incidate loopback IO */
+ hr_dev->loop_idc = 1;
+
+ mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN |
+ IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
+ attr->qp_state = IB_QPS_RTR;
+ attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ attr->path_mtu = IB_MTU_256;
+ attr->dest_qp_num = hr_qp->qpn;
+ attr->rq_psn = HNS_ROCE_FREE_MR_USED_PSN;
+
+ rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num);
+
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
+ IB_QPS_RTR, NULL);
+ hr_dev->loop_idc = loopback;
+ if (ret) {
+ ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ mask = IB_QP_STATE | IB_QP_SQ_PSN | IB_QP_RETRY_CNT | IB_QP_TIMEOUT |
+ IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC;
+ attr->qp_state = IB_QPS_RTS;
+ attr->sq_psn = HNS_ROCE_FREE_MR_USED_PSN;
+ attr->retry_cnt = HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT;
+ attr->timeout = HNS_ROCE_FREE_MR_USED_QP_TIMEOUT;
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_RTR,
+ IB_QPS_RTS, NULL);
+ if (ret)
+ ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n",
+ ret);
+
+ return ret;
+}
+
+static int free_mr_modify_qp(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_qp_attr attr = {};
+ int ret;
+ int i;
+
+ rdma_ah_set_grh(&attr.ah_attr, NULL, 0, 0, 1, 0);
+ rdma_ah_set_static_rate(&attr.ah_attr, 3);
+ rdma_ah_set_port_num(&attr.ah_attr, 1);
+
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ ret = free_mr_modify_rsv_qp(hr_dev, &attr, i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int free_mr_init(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ int ret;
+
+ mutex_init(&free_mr->mutex);
+
+ ret = free_mr_alloc_res(hr_dev);
+ if (ret) {
+ mutex_destroy(&free_mr->mutex);
+ return ret;
+ }
+
+ ret = free_mr_modify_qp(hr_dev);
+ if (ret)
+ goto err_modify_qp;
+
+ return 0;
+
+err_modify_qp:
+ free_mr_exit(hr_dev);
+
+ return ret;
+}
+
+static int get_hem_table(struct hns_roce_dev *hr_dev)
+{
+ unsigned int qpc_count;
+ unsigned int cqc_count;
+ unsigned int gmv_count;
+ int ret;
+ int i;
+
+ /* Alloc memory for source address table buffer space chunk */
+ for (gmv_count = 0; gmv_count < hr_dev->caps.gmv_entry_num;
+ gmv_count++) {
+ ret = hns_roce_table_get(hr_dev, &hr_dev->gmv_table, gmv_count);
+ if (ret)
+ goto err_gmv_failed;
+ }
+
+ if (hr_dev->is_vf)
+ return 0;
+
+ /* Alloc memory for QPC Timer buffer space chunk */
+ for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num;
+ qpc_count++) {
+ ret = hns_roce_table_get(hr_dev, &hr_dev->qpc_timer_table,
+ qpc_count);
+ if (ret) {
+ dev_err(hr_dev->dev, "QPC Timer get failed\n");
+ goto err_qpc_timer_failed;
+ }
+ }
+
+ /* Alloc memory for CQC Timer buffer space chunk */
+ for (cqc_count = 0; cqc_count < hr_dev->caps.cqc_timer_bt_num;
+ cqc_count++) {
+ ret = hns_roce_table_get(hr_dev, &hr_dev->cqc_timer_table,
+ cqc_count);
+ if (ret) {
+ dev_err(hr_dev->dev, "CQC Timer get failed\n");
+ goto err_cqc_timer_failed;
+ }
+ }
+
+ return 0;
+
+err_cqc_timer_failed:
+ for (i = 0; i < cqc_count; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->cqc_timer_table, i);
+
+err_qpc_timer_failed:
+ for (i = 0; i < qpc_count; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i);
+
+err_gmv_failed:
+ for (i = 0; i < gmv_count; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->gmv_table, i);
+
+ return ret;
+}
+
+static void put_hem_table(struct hns_roce_dev *hr_dev)
+{
+ int i;
+
+ for (i = 0; i < hr_dev->caps.gmv_entry_num; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->gmv_table, i);
+
+ if (hr_dev->is_vf)
+ return;
+
+ for (i = 0; i < hr_dev->caps.qpc_timer_bt_num; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->qpc_timer_table, i);
+
+ for (i = 0; i < hr_dev->caps.cqc_timer_bt_num; i++)
+ hns_roce_table_put(hr_dev, &hr_dev->cqc_timer_table, i);
+}
+
+static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
+{
+ int ret;
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ ret = free_mr_init(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to init free mr!\n");
+ return ret;
+ }
+ }
+
+ /* The hns ROCEE requires the extdb info to be cleared before using */
+ ret = hns_roce_clear_extdb_list_info(hr_dev);
+ if (ret)
+ goto err_clear_extdb_failed;
+
+ ret = get_hem_table(hr_dev);
+ if (ret)
+ goto err_get_hem_table_failed;
+
+ if (hr_dev->is_vf)
+ return 0;
+
+ ret = hns_roce_init_link_table(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to init llm, ret = %d.\n", ret);
+ goto err_llm_init_failed;
+ }
+
+ return 0;
+
+err_llm_init_failed:
+ put_hem_table(hr_dev);
+err_get_hem_table_failed:
+ hns_roce_function_clear(hr_dev);
+err_clear_extdb_failed:
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ free_mr_exit(hr_dev);
+
+ return ret;
+}
+
+static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
+{
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ free_mr_exit(hr_dev);
+
+ hns_roce_function_clear(hr_dev);
+
+ if (!hr_dev->is_vf)
+ hns_roce_free_link_table(hr_dev);
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ free_dip_entry(hr_dev);
+}
+
+static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_post_mbox *mb = (struct hns_roce_post_mbox *)desc.data;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false);
+
+ mb->in_param_l = cpu_to_le32(mbox_msg->in_param);
+ mb->in_param_h = cpu_to_le32(mbox_msg->in_param >> 32);
+ mb->out_param_l = cpu_to_le32(mbox_msg->out_param);
+ mb->out_param_h = cpu_to_le32(mbox_msg->out_param >> 32);
+ mb->cmd_tag = cpu_to_le32(mbox_msg->tag << 8 | mbox_msg->cmd);
+ mb->token_event_en = cpu_to_le32(mbox_msg->event_en << 16 |
+ mbox_msg->token);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int v2_wait_mbox_complete(struct hns_roce_dev *hr_dev, u32 timeout,
+ u8 *complete_status)
+{
+ struct hns_roce_mbox_status *mb_st;
+ struct hns_roce_cmq_desc desc;
+ unsigned long end;
+ int ret = -EBUSY;
+ u32 status;
+ bool busy;
+
+ mb_st = (struct hns_roce_mbox_status *)desc.data;
+ end = msecs_to_jiffies(timeout) + jiffies;
+ while (v2_chk_mbox_is_avail(hr_dev, &busy)) {
+ if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
+ return -EIO;
+
+ status = 0;
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST,
+ true);
+ ret = __hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (!ret) {
+ status = le32_to_cpu(mb_st->mb_status_hw_run);
+ /* No pending message exists in ROCEE mbox. */
+ if (!(status & MB_ST_HW_RUN_M))
+ break;
+ } else if (!v2_chk_mbox_is_avail(hr_dev, &busy)) {
+ break;
+ }
+
+ if (time_after(jiffies, end)) {
+ dev_err_ratelimited(hr_dev->dev,
+ "failed to wait mbox status 0x%x\n",
+ status);
+ return -ETIMEDOUT;
+ }
+
+ cond_resched();
+ ret = -EBUSY;
+ }
+
+ if (!ret) {
+ *complete_status = (u8)(status & MB_ST_COMPLETE_M);
+ } else if (!v2_chk_mbox_is_avail(hr_dev, &busy)) {
+ /* Ignore all errors if the mbox is unavailable. */
+ ret = 0;
+ *complete_status = MB_ST_COMPLETE_M;
+ }
+
+ return ret;
+}
+
+static int v2_post_mbox(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mbox_msg *mbox_msg)
+{
+ u8 status = 0;
+ int ret;
+
+ /* Waiting for the mbox to be idle */
+ ret = v2_wait_mbox_complete(hr_dev, HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS,
+ &status);
+ if (unlikely(ret)) {
+ dev_err_ratelimited(hr_dev->dev,
+ "failed to check post mbox status = 0x%x, ret = %d.\n",
+ status, ret);
+ return ret;
+ }
+
+ /* Post new message to mbox */
+ ret = hns_roce_mbox_post(hr_dev, mbox_msg);
+ if (ret)
+ dev_err_ratelimited(hr_dev->dev,
+ "failed to post mailbox, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static int v2_poll_mbox_done(struct hns_roce_dev *hr_dev)
+{
+ u8 status = 0;
+ int ret;
+
+ ret = v2_wait_mbox_complete(hr_dev, HNS_ROCE_CMD_TIMEOUT_MSECS,
+ &status);
+ if (!ret) {
+ if (status != MB_ST_COMPLETE_SUCC)
+ return -EBUSY;
+ } else {
+ dev_err_ratelimited(hr_dev->dev,
+ "failed to check mbox status = 0x%x, ret = %d.\n",
+ status, ret);
+ }
+
+ return ret;
+}
+
+static void copy_gid(void *dest, const union ib_gid *gid)
+{
+#define GID_SIZE 4
+ const union ib_gid *src = gid;
+ __le32 (*p)[GID_SIZE] = dest;
+ int i;
+
+ if (!gid)
+ src = &zgid;
+
+ for (i = 0; i < GID_SIZE; i++)
+ (*p)[i] = cpu_to_le32(*(u32 *)&src->raw[i * sizeof(u32)]);
+}
+
+static int config_sgid_table(struct hns_roce_dev *hr_dev,
+ int gid_index, const union ib_gid *gid,
+ enum hns_roce_sgid_type sgid_type)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cfg_sgid_tb *sgid_tb =
+ (struct hns_roce_cfg_sgid_tb *)desc.data;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false);
+
+ hr_reg_write(sgid_tb, CFG_SGID_TB_TABLE_IDX, gid_index);
+ hr_reg_write(sgid_tb, CFG_SGID_TB_VF_SGID_TYPE, sgid_type);
+
+ copy_gid(&sgid_tb->vf_sgid_l, gid);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int config_gmv_table(struct hns_roce_dev *hr_dev,
+ int gid_index, const union ib_gid *gid,
+ enum hns_roce_sgid_type sgid_type,
+ const struct ib_gid_attr *attr)
+{
+ struct hns_roce_cmq_desc desc[2];
+ struct hns_roce_cfg_gmv_tb_a *tb_a =
+ (struct hns_roce_cfg_gmv_tb_a *)desc[0].data;
+ struct hns_roce_cfg_gmv_tb_b *tb_b =
+ (struct hns_roce_cfg_gmv_tb_b *)desc[1].data;
+
+ u16 vlan_id = VLAN_CFI_MASK;
+ u8 mac[ETH_ALEN] = {};
+ int ret;
+
+ if (gid) {
+ ret = rdma_read_gid_l2_fields(attr, &vlan_id, mac);
+ if (ret)
+ return ret;
+ }
+
+ hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_CFG_GMV_TBL, false);
+ desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+
+ hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_CFG_GMV_TBL, false);
+
+ copy_gid(&tb_a->vf_sgid_l, gid);
+
+ hr_reg_write(tb_a, GMV_TB_A_VF_SGID_TYPE, sgid_type);
+ hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_EN, vlan_id < VLAN_CFI_MASK);
+ hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_ID, vlan_id);
+
+ tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac);
+
+ hr_reg_write(tb_b, GMV_TB_B_SMAC_H, *(u16 *)&mac[4]);
+ hr_reg_write(tb_b, GMV_TB_B_SGID_IDX, gid_index);
+
+ return hns_roce_cmq_send(hr_dev, desc, 2);
+}
+
+static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, int gid_index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1;
+ int ret;
+
+ if (gid) {
+ if (attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ if (ipv6_addr_v4mapped((void *)gid))
+ sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV4;
+ else
+ sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6;
+ } else if (attr->gid_type == IB_GID_TYPE_ROCE) {
+ sgid_type = GID_TYPE_FLAG_ROCE_V1;
+ }
+ }
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ ret = config_gmv_table(hr_dev, gid_index, gid, sgid_type, attr);
+ else
+ ret = config_sgid_table(hr_dev, gid_index, gid, sgid_type);
+
+ if (ret)
+ ibdev_err(&hr_dev->ib_dev, "failed to set gid, ret = %d!\n",
+ ret);
+
+ return ret;
+}
+
+static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
+ const u8 *addr)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cfg_smac_tb *smac_tb =
+ (struct hns_roce_cfg_smac_tb *)desc.data;
+ u16 reg_smac_h;
+ u32 reg_smac_l;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SMAC_TB, false);
+
+ reg_smac_l = *(u32 *)(&addr[0]);
+ reg_smac_h = *(u16 *)(&addr[4]);
+
+ hr_reg_write(smac_tb, CFG_SMAC_TB_IDX, phy_port);
+ hr_reg_write(smac_tb, CFG_SMAC_TB_VF_SMAC_H, reg_smac_h);
+ smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_mpt_entry *mpt_entry,
+ struct hns_roce_mr *mr)
+{
+ u64 pages[HNS_ROCE_V2_MAX_INNER_MTPT_NUM] = { 0 };
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ dma_addr_t pbl_ba;
+ int ret;
+ int i;
+
+ ret = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
+ min_t(int, ARRAY_SIZE(pages), mr->npages));
+ if (ret) {
+ ibdev_err(ibdev, "failed to find PBL mtr, ret = %d.\n", ret);
+ return ret;
+ }
+
+ /* Aligned to the hardware address access unit */
+ for (i = 0; i < ARRAY_SIZE(pages); i++)
+ pages[i] >>= MPT_PBL_BUF_ADDR_S;
+
+ pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr);
+
+ mpt_entry->pbl_size = cpu_to_le32(mr->npages);
+ mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> MPT_PBL_BA_ADDR_S);
+ hr_reg_write(mpt_entry, MPT_PBL_BA_H,
+ upper_32_bits(pbl_ba >> MPT_PBL_BA_ADDR_S));
+
+ mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0]));
+ hr_reg_write(mpt_entry, MPT_PA0_H, upper_32_bits(pages[0]));
+
+ mpt_entry->pa1_l = cpu_to_le32(lower_32_bits(pages[1]));
+ hr_reg_write(mpt_entry, MPT_PA1_H, upper_32_bits(pages[1]));
+ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
+
+ return 0;
+}
+
+static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
+ void *mb_buf, struct hns_roce_mr *mr)
+{
+ struct hns_roce_v2_mpt_entry *mpt_entry;
+
+ mpt_entry = mb_buf;
+ memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
+ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
+
+ hr_reg_write_bool(mpt_entry, MPT_ATOMIC_EN,
+ mr->access & IB_ACCESS_REMOTE_ATOMIC);
+ hr_reg_write_bool(mpt_entry, MPT_RR_EN,
+ mr->access & IB_ACCESS_REMOTE_READ);
+ hr_reg_write_bool(mpt_entry, MPT_RW_EN,
+ mr->access & IB_ACCESS_REMOTE_WRITE);
+ hr_reg_write_bool(mpt_entry, MPT_LW_EN,
+ mr->access & IB_ACCESS_LOCAL_WRITE);
+
+ mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size));
+ mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size));
+ mpt_entry->lkey = cpu_to_le32(mr->key);
+ mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova));
+ mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova));
+
+ if (mr->type != MR_TYPE_MR)
+ hr_reg_enable(mpt_entry, MPT_PA);
+
+ if (mr->type == MR_TYPE_DMA)
+ return 0;
+
+ if (mr->pbl_hop_num != HNS_ROCE_HOP_NUM_0)
+ hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, mr->pbl_hop_num);
+
+ hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
+ hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD);
+
+ return set_mtpt_pbl(hr_dev, mpt_entry, mr);
+}
+
+static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mr *mr, int flags,
+ void *mb_buf)
+{
+ struct hns_roce_v2_mpt_entry *mpt_entry = mb_buf;
+ u32 mr_access_flags = mr->access;
+ int ret = 0;
+
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
+ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
+
+ if (flags & IB_MR_REREG_ACCESS) {
+ hr_reg_write(mpt_entry, MPT_ATOMIC_EN,
+ mr_access_flags & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
+ hr_reg_write(mpt_entry, MPT_RR_EN,
+ mr_access_flags & IB_ACCESS_REMOTE_READ ? 1 : 0);
+ hr_reg_write(mpt_entry, MPT_RW_EN,
+ mr_access_flags & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
+ hr_reg_write(mpt_entry, MPT_LW_EN,
+ mr_access_flags & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
+ }
+
+ if (flags & IB_MR_REREG_TRANS) {
+ mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova));
+ mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova));
+ mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size));
+ mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size));
+
+ ret = set_mtpt_pbl(hr_dev, mpt_entry, mr);
+ }
+
+ return ret;
+}
+
+static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
+{
+ dma_addr_t pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr);
+ struct hns_roce_v2_mpt_entry *mpt_entry;
+
+ mpt_entry = mb_buf;
+ memset(mpt_entry, 0, sizeof(*mpt_entry));
+
+ hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE);
+ hr_reg_write(mpt_entry, MPT_PD, mr->pd);
+
+ hr_reg_enable(mpt_entry, MPT_RA_EN);
+ hr_reg_enable(mpt_entry, MPT_R_INV_EN);
+
+ hr_reg_enable(mpt_entry, MPT_FRE);
+ hr_reg_enable(mpt_entry, MPT_BPD);
+ hr_reg_clear(mpt_entry, MPT_PA);
+
+ hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, 1);
+ hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
+ to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
+
+ mpt_entry->pbl_size = cpu_to_le32(mr->npages);
+
+ mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >>
+ MPT_PBL_BA_ADDR_S));
+ hr_reg_write(mpt_entry, MPT_PBL_BA_H,
+ upper_32_bits(pbl_ba >> MPT_PBL_BA_ADDR_S));
+
+ return 0;
+}
+
+static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ const struct ib_send_wr *bad_wr;
+ struct ib_rdma_wr rdma_wr = {};
+ struct ib_send_wr *send_wr;
+ int ret;
+
+ send_wr = &rdma_wr.wr;
+ send_wr->opcode = IB_WR_RDMA_WRITE;
+
+ ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr);
+ if (ret) {
+ ibdev_err_ratelimited(ibdev, "failed to post wqe for free mr, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *wc);
+
+static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+ struct ib_wc wc[ARRAY_SIZE(free_mr->rsv_qp)];
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_qp *hr_qp;
+ unsigned long end;
+ int cqe_cnt = 0;
+ int npolled;
+ int ret;
+ int i;
+
+ /*
+ * If the device initialization is not complete or in the uninstall
+ * process, then there is no need to execute free mr.
+ */
+ if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT ||
+ priv->handle->rinfo.instance_state == HNS_ROCE_STATE_INIT ||
+ hr_dev->state == HNS_ROCE_DEVICE_STATE_UNINIT)
+ return;
+
+ mutex_lock(&free_mr->mutex);
+
+ for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
+ hr_qp = free_mr->rsv_qp[i];
+
+ ret = free_mr_post_send_lp_wqe(hr_qp);
+ if (ret) {
+ ibdev_err_ratelimited(ibdev,
+ "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n",
+ hr_qp->qpn, ret);
+ break;
+ }
+
+ cqe_cnt++;
+ }
+
+ end = msecs_to_jiffies(HNS_ROCE_V2_FREE_MR_TIMEOUT) + jiffies;
+ while (cqe_cnt) {
+ npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc);
+ if (npolled < 0) {
+ ibdev_err_ratelimited(ibdev,
+ "failed to poll cqe for free mr, remain %d cqe.\n",
+ cqe_cnt);
+ goto out;
+ }
+
+ if (time_after(jiffies, end)) {
+ ibdev_err_ratelimited(ibdev,
+ "failed to poll cqe for free mr and timeout, remain %d cqe.\n",
+ cqe_cnt);
+ goto out;
+ }
+ cqe_cnt -= npolled;
+ }
+
+out:
+ mutex_unlock(&free_mr->mutex);
+}
+
+static void hns_roce_v2_dereg_mr(struct hns_roce_dev *hr_dev)
+{
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ free_mr_send_cmd_to_hw(hr_dev);
+}
+
+static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
+{
+ return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size);
+}
+
+static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, unsigned int n)
+{
+ struct hns_roce_v2_cqe *cqe = get_cqe_v2(hr_cq, n & hr_cq->ib_cq.cqe);
+
+ /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */
+ return (hr_reg_read(cqe, CQE_OWNER) ^ !!(n & hr_cq->cq_depth)) ? cqe :
+ NULL;
+}
+
+static inline void update_cq_db(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq)
+{
+ if (likely(hr_cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB)) {
+ *hr_cq->set_ci_db = hr_cq->cons_index & V2_CQ_DB_CONS_IDX_M;
+ } else {
+ struct hns_roce_v2_db cq_db = {};
+
+ hr_reg_write(&cq_db, DB_TAG, hr_cq->cqn);
+ hr_reg_write(&cq_db, DB_CMD, HNS_ROCE_V2_CQ_DB);
+ hr_reg_write(&cq_db, DB_CQ_CI, hr_cq->cons_index);
+ hr_reg_write(&cq_db, DB_CQ_CMD_SN, 1);
+
+ hns_roce_write64(hr_dev, (__le32 *)&cq_db, hr_cq->db_reg);
+ }
+}
+
+static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
+ struct hns_roce_srq *srq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+ struct hns_roce_v2_cqe *cqe, *dest;
+ u32 prod_index;
+ int nfreed = 0;
+ int wqe_index;
+ u8 owner_bit;
+
+ for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index);
+ ++prod_index) {
+ if (prod_index > hr_cq->cons_index + hr_cq->ib_cq.cqe)
+ break;
+ }
+
+ /*
+ * Now backwards through the CQ, removing CQ entries
+ * that match our QP by overwriting them with next entries.
+ */
+ while ((int) --prod_index - (int) hr_cq->cons_index >= 0) {
+ cqe = get_cqe_v2(hr_cq, prod_index & hr_cq->ib_cq.cqe);
+ if (hr_reg_read(cqe, CQE_LCL_QPN) == qpn) {
+ if (srq && hr_reg_read(cqe, CQE_S_R)) {
+ wqe_index = hr_reg_read(cqe, CQE_WQE_IDX);
+ hns_roce_free_srq_wqe(srq, wqe_index);
+ }
+ ++nfreed;
+ } else if (nfreed) {
+ dest = get_cqe_v2(hr_cq, (prod_index + nfreed) &
+ hr_cq->ib_cq.cqe);
+ owner_bit = hr_reg_read(dest, CQE_OWNER);
+ memcpy(dest, cqe, hr_cq->cqe_size);
+ hr_reg_write(dest, CQE_OWNER, owner_bit);
+ }
+ }
+
+ if (nfreed) {
+ hr_cq->cons_index += nfreed;
+ update_cq_db(hr_dev, hr_cq);
+ }
+}
+
+static void hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
+ struct hns_roce_srq *srq)
+{
+ spin_lock_irq(&hr_cq->lock);
+ __hns_roce_v2_cq_clean(hr_cq, qpn, srq);
+ spin_unlock_irq(&hr_cq->lock);
+}
+
+static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cq *hr_cq, void *mb_buf,
+ u64 *mtts, dma_addr_t dma_handle)
+{
+ struct hns_roce_v2_cq_context *cq_context;
+
+ cq_context = mb_buf;
+ memset(cq_context, 0, sizeof(*cq_context));
+
+ hr_reg_write(cq_context, CQC_CQ_ST, V2_CQ_STATE_VALID);
+ hr_reg_write(cq_context, CQC_ARM_ST, NO_ARMED);
+ hr_reg_write(cq_context, CQC_SHIFT, ilog2(hr_cq->cq_depth));
+ hr_reg_write(cq_context, CQC_CEQN, hr_cq->vector);
+ hr_reg_write(cq_context, CQC_CQN, hr_cq->cqn);
+
+ if (hr_cq->cqe_size == HNS_ROCE_V3_CQE_SIZE)
+ hr_reg_write(cq_context, CQC_CQE_SIZE, CQE_SIZE_64B);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH)
+ hr_reg_enable(cq_context, CQC_STASH);
+
+ hr_reg_write(cq_context, CQC_CQE_CUR_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts[0]));
+ hr_reg_write(cq_context, CQC_CQE_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts[0])));
+ hr_reg_write(cq_context, CQC_CQE_HOP_NUM, hr_dev->caps.cqe_hop_num ==
+ HNS_ROCE_HOP_NUM_0 ? 0 : hr_dev->caps.cqe_hop_num);
+ hr_reg_write(cq_context, CQC_CQE_NEX_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts[1]));
+ hr_reg_write(cq_context, CQC_CQE_NEX_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts[1])));
+ hr_reg_write(cq_context, CQC_CQE_BAR_PG_SZ,
+ to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(cq_context, CQC_CQE_BUF_PG_SZ,
+ to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.buf_pg_shift));
+ hr_reg_write(cq_context, CQC_CQE_BA_L, dma_handle >> CQC_CQE_BA_L_S);
+ hr_reg_write(cq_context, CQC_CQE_BA_H, dma_handle >> CQC_CQE_BA_H_S);
+ hr_reg_write_bool(cq_context, CQC_DB_RECORD_EN,
+ hr_cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB);
+ hr_reg_write(cq_context, CQC_CQE_DB_RECORD_ADDR_L,
+ ((u32)hr_cq->db.dma) >> 1);
+ hr_reg_write(cq_context, CQC_CQE_DB_RECORD_ADDR_H,
+ hr_cq->db.dma >> CQC_CQE_DB_RECORD_ADDR_H_S);
+ hr_reg_write(cq_context, CQC_CQ_MAX_CNT,
+ HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM);
+ hr_reg_write(cq_context, CQC_CQ_PERIOD,
+ HNS_ROCE_V2_CQ_DEFAULT_INTERVAL);
+}
+
+static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq,
+ enum ib_cq_notify_flags flags)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
+ struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
+ struct hns_roce_v2_db cq_db = {};
+ u32 notify_flag;
+
+ /*
+ * flags = 0, then notify_flag : next
+ * flags = 1, then notify flag : solocited
+ */
+ notify_flag = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+ V2_CQ_DB_REQ_NOT : V2_CQ_DB_REQ_NOT_SOL;
+
+ hr_reg_write(&cq_db, DB_TAG, hr_cq->cqn);
+ hr_reg_write(&cq_db, DB_CMD, HNS_ROCE_V2_CQ_DB_NOTIFY);
+ hr_reg_write(&cq_db, DB_CQ_CI, hr_cq->cons_index);
+ hr_reg_write(&cq_db, DB_CQ_CMD_SN, hr_cq->arm_sn);
+ hr_reg_write(&cq_db, DB_CQ_NOTIFY, notify_flag);
+
+ hns_roce_write64(hr_dev, (__le32 *)&cq_db, hr_cq->db_reg);
+
+ return 0;
+}
+
+static int sw_comp(struct hns_roce_qp *hr_qp, struct hns_roce_wq *wq,
+ int num_entries, struct ib_wc *wc)
+{
+ unsigned int left;
+ int npolled = 0;
+
+ left = wq->head - wq->tail;
+ if (left == 0)
+ return 0;
+
+ left = min_t(unsigned int, (unsigned int)num_entries, left);
+ while (npolled < left) {
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->vendor_err = 0;
+ wc->qp = &hr_qp->ibqp;
+
+ wq->tail++;
+ wc++;
+ npolled++;
+ }
+
+ return npolled;
+}
+
+static int hns_roce_v2_sw_poll_cq(struct hns_roce_cq *hr_cq, int num_entries,
+ struct ib_wc *wc)
+{
+ struct hns_roce_qp *hr_qp;
+ int npolled = 0;
+
+ list_for_each_entry(hr_qp, &hr_cq->sq_list, sq_node) {
+ npolled += sw_comp(hr_qp, &hr_qp->sq,
+ num_entries - npolled, wc + npolled);
+ if (npolled >= num_entries)
+ goto out;
+ }
+
+ list_for_each_entry(hr_qp, &hr_cq->rq_list, rq_node) {
+ npolled += sw_comp(hr_qp, &hr_qp->rq,
+ num_entries - npolled, wc + npolled);
+ if (npolled >= num_entries)
+ goto out;
+ }
+
+out:
+ return npolled;
+}
+
+static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
+ struct hns_roce_cq *cq, struct hns_roce_v2_cqe *cqe,
+ struct ib_wc *wc)
+{
+ static const struct {
+ u32 cqe_status;
+ enum ib_wc_status wc_status;
+ } map[] = {
+ { HNS_ROCE_CQE_V2_SUCCESS, IB_WC_SUCCESS },
+ { HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR, IB_WC_LOC_LEN_ERR },
+ { HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR, IB_WC_LOC_QP_OP_ERR },
+ { HNS_ROCE_CQE_V2_LOCAL_PROT_ERR, IB_WC_LOC_PROT_ERR },
+ { HNS_ROCE_CQE_V2_WR_FLUSH_ERR, IB_WC_WR_FLUSH_ERR },
+ { HNS_ROCE_CQE_V2_MW_BIND_ERR, IB_WC_MW_BIND_ERR },
+ { HNS_ROCE_CQE_V2_BAD_RESP_ERR, IB_WC_BAD_RESP_ERR },
+ { HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR, IB_WC_LOC_ACCESS_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR, IB_WC_REM_INV_REQ_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR, IB_WC_REM_ACCESS_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_OP_ERR, IB_WC_REM_OP_ERR },
+ { HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR,
+ IB_WC_RETRY_EXC_ERR },
+ { HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR },
+ { HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR, IB_WC_REM_ABORT_ERR },
+ { HNS_ROCE_CQE_V2_GENERAL_ERR, IB_WC_GENERAL_ERR}
+ };
+
+ u32 cqe_status = hr_reg_read(cqe, CQE_STATUS);
+ int i;
+
+ wc->status = IB_WC_GENERAL_ERR;
+ for (i = 0; i < ARRAY_SIZE(map); i++)
+ if (cqe_status == map[i].cqe_status) {
+ wc->status = map[i].wc_status;
+ break;
+ }
+
+ if (likely(wc->status == IB_WC_SUCCESS ||
+ wc->status == IB_WC_WR_FLUSH_ERR))
+ return;
+
+ ibdev_err_ratelimited(&hr_dev->ib_dev, "error cqe status 0x%x:\n",
+ cqe_status);
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 4, cqe,
+ cq->cqe_size, false);
+ wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
+
+ /*
+ * For hns ROCEE, GENERAL_ERR is an error type that is not defined in
+ * the standard protocol, the driver must ignore it and needn't to set
+ * the QP to an error state.
+ */
+ if (cqe_status == HNS_ROCE_CQE_V2_GENERAL_ERR)
+ return;
+
+ flush_cqe(hr_dev, qp);
+}
+
+static int get_cur_qp(struct hns_roce_cq *hr_cq, struct hns_roce_v2_cqe *cqe,
+ struct hns_roce_qp **cur_qp)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+ struct hns_roce_qp *hr_qp = *cur_qp;
+ u32 qpn;
+
+ qpn = hr_reg_read(cqe, CQE_LCL_QPN);
+
+ if (!hr_qp || qpn != hr_qp->qpn) {
+ hr_qp = __hns_roce_qp_lookup(hr_dev, qpn);
+ if (unlikely(!hr_qp)) {
+ ibdev_err(&hr_dev->ib_dev,
+ "CQ %06lx with entry for unknown QPN %06x\n",
+ hr_cq->cqn, qpn);
+ return -EINVAL;
+ }
+ *cur_qp = hr_qp;
+ }
+
+ return 0;
+}
+
+/*
+ * mapped-value = 1 + real-value
+ * The ib wc opcode's real value is start from 0, In order to distinguish
+ * between initialized and uninitialized map values, we plus 1 to the actual
+ * value when defining the mapping, so that the validity can be identified by
+ * checking whether the mapped value is greater than 0.
+ */
+#define HR_WC_OP_MAP(hr_key, ib_key) \
+ [HNS_ROCE_V2_WQE_OP_ ## hr_key] = 1 + IB_WC_ ## ib_key
+
+static const u32 wc_send_op_map[] = {
+ HR_WC_OP_MAP(SEND, SEND),
+ HR_WC_OP_MAP(SEND_WITH_INV, SEND),
+ HR_WC_OP_MAP(SEND_WITH_IMM, SEND),
+ HR_WC_OP_MAP(RDMA_READ, RDMA_READ),
+ HR_WC_OP_MAP(RDMA_WRITE, RDMA_WRITE),
+ HR_WC_OP_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE),
+ HR_WC_OP_MAP(ATOM_CMP_AND_SWAP, COMP_SWAP),
+ HR_WC_OP_MAP(ATOM_FETCH_AND_ADD, FETCH_ADD),
+ HR_WC_OP_MAP(ATOM_MSK_CMP_AND_SWAP, MASKED_COMP_SWAP),
+ HR_WC_OP_MAP(ATOM_MSK_FETCH_AND_ADD, MASKED_FETCH_ADD),
+ HR_WC_OP_MAP(FAST_REG_PMR, REG_MR),
+};
+
+static int to_ib_wc_send_op(u32 hr_opcode)
+{
+ if (hr_opcode >= ARRAY_SIZE(wc_send_op_map))
+ return -EINVAL;
+
+ return wc_send_op_map[hr_opcode] ? wc_send_op_map[hr_opcode] - 1 :
+ -EINVAL;
+}
+
+static const u32 wc_recv_op_map[] = {
+ HR_WC_OP_MAP(RDMA_WRITE_WITH_IMM, WITH_IMM),
+ HR_WC_OP_MAP(SEND, RECV),
+ HR_WC_OP_MAP(SEND_WITH_IMM, WITH_IMM),
+ HR_WC_OP_MAP(SEND_WITH_INV, RECV),
+};
+
+static int to_ib_wc_recv_op(u32 hr_opcode)
+{
+ if (hr_opcode >= ARRAY_SIZE(wc_recv_op_map))
+ return -EINVAL;
+
+ return wc_recv_op_map[hr_opcode] ? wc_recv_op_map[hr_opcode] - 1 :
+ -EINVAL;
+}
+
+static void fill_send_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe)
+{
+ u32 hr_opcode;
+ int ib_opcode;
+
+ wc->wc_flags = 0;
+
+ hr_opcode = hr_reg_read(cqe, CQE_OPCODE);
+ switch (hr_opcode) {
+ case HNS_ROCE_V2_WQE_OP_RDMA_READ:
+ wc->byte_len = le32_to_cpu(cqe->byte_cnt);
+ break;
+ case HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM:
+ case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM:
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ break;
+ case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP:
+ case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD:
+ case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP:
+ case HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD:
+ wc->byte_len = 8;
+ break;
+ default:
+ break;
+ }
+
+ ib_opcode = to_ib_wc_send_op(hr_opcode);
+ if (ib_opcode < 0)
+ wc->status = IB_WC_GENERAL_ERR;
+ else
+ wc->opcode = ib_opcode;
+}
+
+static int fill_recv_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe)
+{
+ u32 hr_opcode;
+ int ib_opcode;
+
+ wc->byte_len = le32_to_cpu(cqe->byte_cnt);
+
+ hr_opcode = hr_reg_read(cqe, CQE_OPCODE);
+ switch (hr_opcode) {
+ case HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM:
+ case HNS_ROCE_V2_OPCODE_SEND_WITH_IMM:
+ wc->wc_flags = IB_WC_WITH_IMM;
+ wc->ex.imm_data = cpu_to_be32(le32_to_cpu(cqe->immtdata));
+ break;
+ case HNS_ROCE_V2_OPCODE_SEND_WITH_INV:
+ wc->wc_flags = IB_WC_WITH_INVALIDATE;
+ wc->ex.invalidate_rkey = le32_to_cpu(cqe->rkey);
+ break;
+ default:
+ wc->wc_flags = 0;
+ }
+
+ ib_opcode = to_ib_wc_recv_op(hr_opcode);
+ if (ib_opcode < 0)
+ wc->status = IB_WC_GENERAL_ERR;
+ else
+ wc->opcode = ib_opcode;
+
+ wc->sl = hr_reg_read(cqe, CQE_SL);
+ wc->src_qp = hr_reg_read(cqe, CQE_RMT_QPN);
+ wc->slid = 0;
+ wc->wc_flags |= hr_reg_read(cqe, CQE_GRH) ? IB_WC_GRH : 0;
+ wc->port_num = hr_reg_read(cqe, CQE_PORTN);
+ wc->pkey_index = 0;
+
+ if (hr_reg_read(cqe, CQE_VID_VLD)) {
+ wc->vlan_id = hr_reg_read(cqe, CQE_VID);
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+ } else {
+ wc->vlan_id = 0xffff;
+ }
+
+ wc->network_hdr_type = hr_reg_read(cqe, CQE_PORT_TYPE);
+
+ return 0;
+}
+
+static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
+ struct hns_roce_qp **cur_qp, struct ib_wc *wc)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device);
+ struct hns_roce_qp *qp = *cur_qp;
+ struct hns_roce_srq *srq = NULL;
+ struct hns_roce_v2_cqe *cqe;
+ struct hns_roce_wq *wq;
+ int is_send;
+ u16 wqe_idx;
+ int ret;
+
+ cqe = get_sw_cqe_v2(hr_cq, hr_cq->cons_index);
+ if (!cqe)
+ return -EAGAIN;
+
+ ++hr_cq->cons_index;
+ /* Memory barrier */
+ rmb();
+
+ ret = get_cur_qp(hr_cq, cqe, &qp);
+ if (ret)
+ return ret;
+
+ wc->qp = &qp->ibqp;
+ wc->vendor_err = 0;
+
+ wqe_idx = hr_reg_read(cqe, CQE_WQE_IDX);
+
+ is_send = !hr_reg_read(cqe, CQE_S_R);
+ if (is_send) {
+ wq = &qp->sq;
+
+ /* If sg_signal_bit is set, tail pointer will be updated to
+ * the WQE corresponding to the current CQE.
+ */
+ if (qp->sq_signal_bits)
+ wq->tail += (wqe_idx - (u16)wq->tail) &
+ (wq->wqe_cnt - 1);
+
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+
+ fill_send_wc(wc, cqe);
+ } else {
+ if (qp->ibqp.srq) {
+ srq = to_hr_srq(qp->ibqp.srq);
+ wc->wr_id = srq->wrid[wqe_idx];
+ hns_roce_free_srq_wqe(srq, wqe_idx);
+ } else {
+ wq = &qp->rq;
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+ }
+
+ ret = fill_recv_wc(wc, cqe);
+ }
+
+ get_cqe_status(hr_dev, qp, hr_cq, cqe, wc);
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ return 0;
+
+ return ret;
+}
+
+static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *wc)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
+ struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
+ struct hns_roce_qp *cur_qp = NULL;
+ unsigned long flags;
+ int npolled;
+
+ spin_lock_irqsave(&hr_cq->lock, flags);
+
+ /*
+ * When the device starts to reset, the state is RST_DOWN. At this time,
+ * there may still be some valid CQEs in the hardware that are not
+ * polled. Therefore, it is not allowed to switch to the software mode
+ * immediately. When the state changes to UNINIT, CQE no longer exists
+ * in the hardware, and then switch to software mode.
+ */
+ if (hr_dev->state == HNS_ROCE_DEVICE_STATE_UNINIT) {
+ npolled = hns_roce_v2_sw_poll_cq(hr_cq, num_entries, wc);
+ goto out;
+ }
+
+ for (npolled = 0; npolled < num_entries; ++npolled) {
+ if (hns_roce_v2_poll_one(hr_cq, &cur_qp, wc + npolled))
+ break;
+ }
+
+ if (npolled)
+ update_cq_db(hr_dev, hr_cq);
+
+out:
+ spin_unlock_irqrestore(&hr_cq->lock, flags);
+
+ return npolled;
+}
+
+static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type,
+ u32 step_idx, u8 *mbox_cmd)
+{
+ u8 cmd;
+
+ switch (type) {
+ case HEM_TYPE_QPC:
+ cmd = HNS_ROCE_CMD_WRITE_QPC_BT0;
+ break;
+ case HEM_TYPE_MTPT:
+ cmd = HNS_ROCE_CMD_WRITE_MPT_BT0;
+ break;
+ case HEM_TYPE_CQC:
+ cmd = HNS_ROCE_CMD_WRITE_CQC_BT0;
+ break;
+ case HEM_TYPE_SRQC:
+ cmd = HNS_ROCE_CMD_WRITE_SRQC_BT0;
+ break;
+ case HEM_TYPE_SCCC:
+ cmd = HNS_ROCE_CMD_WRITE_SCCC_BT0;
+ break;
+ case HEM_TYPE_QPC_TIMER:
+ cmd = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0;
+ break;
+ case HEM_TYPE_CQC_TIMER:
+ cmd = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0;
+ break;
+ default:
+ dev_warn(hr_dev->dev, "failed to check hem type %u.\n", type);
+ return -EINVAL;
+ }
+
+ *mbox_cmd = cmd + step_idx;
+
+ return 0;
+}
+
+static int config_gmv_ba_to_hw(struct hns_roce_dev *hr_dev, unsigned long obj,
+ dma_addr_t base_addr)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ u32 idx = obj / (HNS_HW_PAGE_SIZE / hr_dev->caps.gmv_entry_sz);
+ u64 addr = to_hr_hw_page_addr(base_addr);
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, false);
+
+ hr_reg_write(req, CFG_GMV_BT_BA_L, lower_32_bits(addr));
+ hr_reg_write(req, CFG_GMV_BT_BA_H, upper_32_bits(addr));
+ hr_reg_write(req, CFG_GMV_BT_IDX, idx);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj,
+ dma_addr_t base_addr, u32 hem_type, u32 step_idx)
+{
+ int ret;
+ u8 cmd;
+
+ if (unlikely(hem_type == HEM_TYPE_GMV))
+ return config_gmv_ba_to_hw(hr_dev, obj, base_addr);
+
+ if (unlikely(hem_type == HEM_TYPE_SCCC && step_idx))
+ return 0;
+
+ ret = get_op_for_set_hem(hr_dev, hem_type, step_idx, &cmd);
+ if (ret < 0)
+ return ret;
+
+ return config_hem_ba_to_hw(hr_dev, base_addr, cmd, obj);
+}
+
+static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table, int obj,
+ u32 step_idx)
+{
+ struct hns_roce_hem_mhop mhop;
+ struct hns_roce_hem *hem;
+ unsigned long mhop_obj = obj;
+ int i, j, k;
+ int ret = 0;
+ u64 hem_idx = 0;
+ u64 l1_idx = 0;
+ u64 bt_ba = 0;
+ u32 chunk_ba_num;
+ u32 hop_num;
+
+ if (!hns_roce_check_whether_mhop(hr_dev, table->type))
+ return 0;
+
+ hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop);
+ i = mhop.l0_idx;
+ j = mhop.l1_idx;
+ k = mhop.l2_idx;
+ hop_num = mhop.hop_num;
+ chunk_ba_num = mhop.bt_chunk_size / 8;
+
+ if (hop_num == 2) {
+ hem_idx = i * chunk_ba_num * chunk_ba_num + j * chunk_ba_num +
+ k;
+ l1_idx = i * chunk_ba_num + j;
+ } else if (hop_num == 1) {
+ hem_idx = i * chunk_ba_num + j;
+ } else if (hop_num == HNS_ROCE_HOP_NUM_0) {
+ hem_idx = i;
+ }
+
+ if (table->type == HEM_TYPE_SCCC)
+ obj = mhop.l0_idx;
+
+ if (check_whether_last_step(hop_num, step_idx)) {
+ hem = table->hem[hem_idx];
+
+ ret = set_hem_to_hw(hr_dev, obj, hem->dma, table->type, step_idx);
+ } else {
+ if (step_idx == 0)
+ bt_ba = table->bt_l0_dma_addr[i];
+ else if (step_idx == 1 && hop_num == 2)
+ bt_ba = table->bt_l1_dma_addr[l1_idx];
+
+ ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type, step_idx);
+ }
+
+ return ret;
+}
+
+static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev,
+ struct hns_roce_hem_table *table,
+ int tag, u32 step_idx)
+{
+ struct hns_roce_cmd_mailbox *mailbox;
+ struct device *dev = hr_dev->dev;
+ u8 cmd = 0xff;
+ int ret;
+
+ if (!hns_roce_check_whether_mhop(hr_dev, table->type))
+ return 0;
+
+ switch (table->type) {
+ case HEM_TYPE_QPC:
+ cmd = HNS_ROCE_CMD_DESTROY_QPC_BT0;
+ break;
+ case HEM_TYPE_MTPT:
+ cmd = HNS_ROCE_CMD_DESTROY_MPT_BT0;
+ break;
+ case HEM_TYPE_CQC:
+ cmd = HNS_ROCE_CMD_DESTROY_CQC_BT0;
+ break;
+ case HEM_TYPE_SRQC:
+ cmd = HNS_ROCE_CMD_DESTROY_SRQC_BT0;
+ break;
+ case HEM_TYPE_SCCC:
+ case HEM_TYPE_QPC_TIMER:
+ case HEM_TYPE_CQC_TIMER:
+ case HEM_TYPE_GMV:
+ return 0;
+ default:
+ dev_warn(dev, "table %u not to be destroyed by mailbox!\n",
+ table->type);
+ return 0;
+ }
+
+ cmd += step_idx;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, cmd, tag);
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask,
+ struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_cmd_mailbox *mailbox;
+ int qpc_size;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ /* The qpc size of HIP08 is only 256B, which is half of HIP09 */
+ qpc_size = hr_dev->caps.qpc_sz;
+ memcpy(mailbox->buf, context, qpc_size);
+ memcpy(mailbox->buf + qpc_size, qpc_mask, qpc_size);
+
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0,
+ HNS_ROCE_CMD_MODIFY_QPC, hr_qp->qpn);
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+static void set_access_flags(struct hns_roce_qp *hr_qp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask,
+ const struct ib_qp_attr *attr, int attr_mask)
+{
+ u8 dest_rd_atomic;
+ u32 access_flags;
+
+ dest_rd_atomic = (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) ?
+ attr->max_dest_rd_atomic : hr_qp->resp_depth;
+
+ access_flags = (attr_mask & IB_QP_ACCESS_FLAGS) ?
+ attr->qp_access_flags : hr_qp->atomic_rd_en;
+
+ if (!dest_rd_atomic)
+ access_flags &= IB_ACCESS_REMOTE_WRITE;
+
+ hr_reg_write_bool(context, QPC_RRE,
+ access_flags & IB_ACCESS_REMOTE_READ);
+ hr_reg_clear(qpc_mask, QPC_RRE);
+
+ hr_reg_write_bool(context, QPC_RWE,
+ access_flags & IB_ACCESS_REMOTE_WRITE);
+ hr_reg_clear(qpc_mask, QPC_RWE);
+
+ hr_reg_write_bool(context, QPC_ATE,
+ access_flags & IB_ACCESS_REMOTE_ATOMIC);
+ hr_reg_clear(qpc_mask, QPC_ATE);
+ hr_reg_write_bool(context, QPC_EXT_ATE,
+ access_flags & IB_ACCESS_REMOTE_ATOMIC);
+ hr_reg_clear(qpc_mask, QPC_EXT_ATE);
+}
+
+static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
+ struct hns_roce_v2_qp_context *context)
+{
+ hr_reg_write(context, QPC_SGE_SHIFT,
+ to_hr_hem_entries_shift(hr_qp->sge.sge_cnt,
+ hr_qp->sge.sge_shift));
+
+ hr_reg_write(context, QPC_SQ_SHIFT, ilog2(hr_qp->sq.wqe_cnt));
+
+ hr_reg_write(context, QPC_RQ_SHIFT, ilog2(hr_qp->rq.wqe_cnt));
+}
+
+static inline int get_cqn(struct ib_cq *ib_cq)
+{
+ return ib_cq ? to_hr_cq(ib_cq)->cqn : 0;
+}
+
+static inline int get_pdn(struct ib_pd *ib_pd)
+{
+ return ib_pd ? to_hr_pd(ib_pd)->pdn : 0;
+}
+
+static void modify_qp_reset_to_init(struct ib_qp *ibqp,
+ struct hns_roce_v2_qp_context *context)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+ /*
+ * In v2 engine, software pass context and context mask to hardware
+ * when modifying qp. If software need modify some fields in context,
+ * we should set all bits of the relevant fields in context mask to
+ * 0 at the same time, else set them to 0x1.
+ */
+ hr_reg_write(context, QPC_TST, to_hr_qp_type(ibqp->qp_type));
+
+ hr_reg_write(context, QPC_PD, get_pdn(ibqp->pd));
+
+ hr_reg_write(context, QPC_RQWS, ilog2(hr_qp->rq.max_gs));
+
+ set_qpc_wqe_cnt(hr_qp, context);
+
+ /* No VLAN need to set 0xFFF */
+ hr_reg_write(context, QPC_VLAN_ID, 0xfff);
+
+ if (ibqp->qp_type == IB_QPT_XRC_TGT) {
+ context->qkey_xrcd = cpu_to_le32(hr_qp->xrcdn);
+
+ hr_reg_enable(context, QPC_XRC_QP_TYPE);
+ }
+
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
+ hr_reg_enable(context, QPC_RQ_RECORD_EN);
+
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_OWNER_DB)
+ hr_reg_enable(context, QPC_OWNER_MODE);
+
+ hr_reg_write(context, QPC_RQ_DB_RECORD_ADDR_L,
+ lower_32_bits(hr_qp->rdb.dma) >> 1);
+ hr_reg_write(context, QPC_RQ_DB_RECORD_ADDR_H,
+ upper_32_bits(hr_qp->rdb.dma));
+
+ hr_reg_write(context, QPC_RX_CQN, get_cqn(ibqp->recv_cq));
+
+ if (ibqp->srq) {
+ hr_reg_enable(context, QPC_SRQ_EN);
+ hr_reg_write(context, QPC_SRQN, to_hr_srq(ibqp->srq)->srqn);
+ }
+
+ hr_reg_enable(context, QPC_FRE);
+
+ hr_reg_write(context, QPC_TX_CQN, get_cqn(ibqp->send_cq));
+
+ if (hr_dev->caps.qpc_sz < HNS_ROCE_V3_QPC_SZ)
+ return;
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_STASH)
+ hr_reg_enable(&context->ext, QPCEX_STASH);
+}
+
+static void modify_qp_init_to_init(struct ib_qp *ibqp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ /*
+ * In v2 engine, software pass context and context mask to hardware
+ * when modifying qp. If software need modify some fields in context,
+ * we should set all bits of the relevant fields in context mask to
+ * 0 at the same time, else set them to 0x1.
+ */
+ hr_reg_write(context, QPC_TST, to_hr_qp_type(ibqp->qp_type));
+ hr_reg_clear(qpc_mask, QPC_TST);
+
+ hr_reg_write(context, QPC_PD, get_pdn(ibqp->pd));
+ hr_reg_clear(qpc_mask, QPC_PD);
+
+ hr_reg_write(context, QPC_RX_CQN, get_cqn(ibqp->recv_cq));
+ hr_reg_clear(qpc_mask, QPC_RX_CQN);
+
+ hr_reg_write(context, QPC_TX_CQN, get_cqn(ibqp->send_cq));
+ hr_reg_clear(qpc_mask, QPC_TX_CQN);
+
+ if (ibqp->srq) {
+ hr_reg_enable(context, QPC_SRQ_EN);
+ hr_reg_clear(qpc_mask, QPC_SRQ_EN);
+ hr_reg_write(context, QPC_SRQN, to_hr_srq(ibqp->srq)->srqn);
+ hr_reg_clear(qpc_mask, QPC_SRQN);
+ }
+}
+
+static int config_qp_rq_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ u64 mtts[MTT_MIN_COUNT] = { 0 };
+ u64 wqe_sge_ba;
+ int ret;
+
+ /* Search qp buf's mtts */
+ ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts,
+ MTT_MIN_COUNT);
+ if (hr_qp->rq.wqe_cnt && ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to find QP(0x%lx) RQ WQE buf, ret = %d.\n",
+ hr_qp->qpn, ret);
+ return ret;
+ }
+
+ wqe_sge_ba = hns_roce_get_mtr_ba(&hr_qp->mtr);
+
+ context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3);
+ qpc_mask->wqe_sge_ba = 0;
+
+ /*
+ * In v2 engine, software pass context and context mask to hardware
+ * when modifying qp. If software need modify some fields in context,
+ * we should set all bits of the relevant fields in context mask to
+ * 0 at the same time, else set them to 0x1.
+ */
+ hr_reg_write(context, QPC_WQE_SGE_BA_H, wqe_sge_ba >> (32 + 3));
+ hr_reg_clear(qpc_mask, QPC_WQE_SGE_BA_H);
+
+ hr_reg_write(context, QPC_SQ_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.wqe_sq_hop_num,
+ hr_qp->sq.wqe_cnt));
+ hr_reg_clear(qpc_mask, QPC_SQ_HOP_NUM);
+
+ hr_reg_write(context, QPC_SGE_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.wqe_sge_hop_num,
+ hr_qp->sge.sge_cnt));
+ hr_reg_clear(qpc_mask, QPC_SGE_HOP_NUM);
+
+ hr_reg_write(context, QPC_RQ_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.wqe_rq_hop_num,
+ hr_qp->rq.wqe_cnt));
+
+ hr_reg_clear(qpc_mask, QPC_RQ_HOP_NUM);
+
+ hr_reg_write(context, QPC_WQE_SGE_BA_PG_SZ,
+ to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.ba_pg_shift));
+ hr_reg_clear(qpc_mask, QPC_WQE_SGE_BA_PG_SZ);
+
+ hr_reg_write(context, QPC_WQE_SGE_BUF_PG_SZ,
+ to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.buf_pg_shift));
+ hr_reg_clear(qpc_mask, QPC_WQE_SGE_BUF_PG_SZ);
+
+ context->rq_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0]));
+ qpc_mask->rq_cur_blk_addr = 0;
+
+ hr_reg_write(context, QPC_RQ_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts[0])));
+ hr_reg_clear(qpc_mask, QPC_RQ_CUR_BLK_ADDR_H);
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ context->rq_nxt_blk_addr =
+ cpu_to_le32(to_hr_hw_page_addr(mtts[1]));
+ qpc_mask->rq_nxt_blk_addr = 0;
+ hr_reg_write(context, QPC_RQ_NXT_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts[1])));
+ hr_reg_clear(qpc_mask, QPC_RQ_NXT_BLK_ADDR_H);
+ }
+
+ return 0;
+}
+
+static int config_qp_sq_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u64 sge_cur_blk = 0;
+ u64 sq_cur_blk = 0;
+ int ret;
+
+ /* search qp buf's mtts */
+ ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sq.offset,
+ &sq_cur_blk, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to find QP(0x%lx) SQ WQE buf, ret = %d.\n",
+ hr_qp->qpn, ret);
+ return ret;
+ }
+ if (hr_qp->sge.sge_cnt > 0) {
+ ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
+ hr_qp->sge.offset, &sge_cur_blk, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf, ret = %d.\n",
+ hr_qp->qpn, ret);
+ return ret;
+ }
+ }
+
+ /*
+ * In v2 engine, software pass context and context mask to hardware
+ * when modifying qp. If software need modify some fields in context,
+ * we should set all bits of the relevant fields in context mask to
+ * 0 at the same time, else set them to 0x1.
+ */
+ hr_reg_write(context, QPC_SQ_CUR_BLK_ADDR_L,
+ lower_32_bits(to_hr_hw_page_addr(sq_cur_blk)));
+ hr_reg_write(context, QPC_SQ_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(sq_cur_blk)));
+ hr_reg_clear(qpc_mask, QPC_SQ_CUR_BLK_ADDR_L);
+ hr_reg_clear(qpc_mask, QPC_SQ_CUR_BLK_ADDR_H);
+
+ hr_reg_write(context, QPC_SQ_CUR_SGE_BLK_ADDR_L,
+ lower_32_bits(to_hr_hw_page_addr(sge_cur_blk)));
+ hr_reg_write(context, QPC_SQ_CUR_SGE_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(sge_cur_blk)));
+ hr_reg_clear(qpc_mask, QPC_SQ_CUR_SGE_BLK_ADDR_L);
+ hr_reg_clear(qpc_mask, QPC_SQ_CUR_SGE_BLK_ADDR_H);
+
+ hr_reg_write(context, QPC_RX_SQ_CUR_BLK_ADDR_L,
+ lower_32_bits(to_hr_hw_page_addr(sq_cur_blk)));
+ hr_reg_write(context, QPC_RX_SQ_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(sq_cur_blk)));
+ hr_reg_clear(qpc_mask, QPC_RX_SQ_CUR_BLK_ADDR_L);
+ hr_reg_clear(qpc_mask, QPC_RX_SQ_CUR_BLK_ADDR_H);
+
+ return 0;
+}
+
+static inline enum ib_mtu get_mtu(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr)
+{
+ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD)
+ return IB_MTU_4096;
+
+ return attr->path_mtu;
+}
+
+static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr, int attr_mask,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask,
+ struct ib_udata *udata)
+{
+ struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ dma_addr_t trrl_ba;
+ dma_addr_t irrl_ba;
+ enum ib_mtu ib_mtu;
+ u8 ack_req_freq;
+ const u8 *smac;
+ int lp_msg_len;
+ u8 lp_pktn_ini;
+ u64 *mtts;
+ u8 *dmac;
+ u32 port;
+ int mtu;
+ int ret;
+
+ ret = config_qp_rq_buf(hr_dev, hr_qp, context, qpc_mask);
+ if (ret) {
+ ibdev_err(ibdev, "failed to config rq buf, ret = %d.\n", ret);
+ return ret;
+ }
+
+ /* Search IRRL's mtts */
+ mtts = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table,
+ hr_qp->qpn, &irrl_ba);
+ if (!mtts) {
+ ibdev_err(ibdev, "failed to find qp irrl_table.\n");
+ return -EINVAL;
+ }
+
+ /* Search TRRL's mtts */
+ mtts = hns_roce_table_find(hr_dev, &hr_dev->qp_table.trrl_table,
+ hr_qp->qpn, &trrl_ba);
+ if (!mtts) {
+ ibdev_err(ibdev, "failed to find qp trrl_table.\n");
+ return -EINVAL;
+ }
+
+ if (attr_mask & IB_QP_ALT_PATH) {
+ ibdev_err(ibdev, "INIT2RTR attr_mask (0x%x) error.\n",
+ attr_mask);
+ return -EINVAL;
+ }
+
+ hr_reg_write(context, QPC_TRRL_BA_L, trrl_ba >> QPC_TRRL_BA_L_S);
+ hr_reg_clear(qpc_mask, QPC_TRRL_BA_L);
+ context->trrl_ba = cpu_to_le32(trrl_ba >> QPC_TRRL_BA_M_S);
+ qpc_mask->trrl_ba = 0;
+ hr_reg_write(context, QPC_TRRL_BA_H, trrl_ba >> QPC_TRRL_BA_H_S);
+ hr_reg_clear(qpc_mask, QPC_TRRL_BA_H);
+
+ context->irrl_ba = cpu_to_le32(irrl_ba >> QPC_IRRL_BA_L_S);
+ qpc_mask->irrl_ba = 0;
+ hr_reg_write(context, QPC_IRRL_BA_H, irrl_ba >> QPC_IRRL_BA_H_S);
+ hr_reg_clear(qpc_mask, QPC_IRRL_BA_H);
+
+ hr_reg_enable(context, QPC_RMT_E2E);
+ hr_reg_clear(qpc_mask, QPC_RMT_E2E);
+
+ hr_reg_write(context, QPC_SIG_TYPE, hr_qp->sq_signal_bits);
+ hr_reg_clear(qpc_mask, QPC_SIG_TYPE);
+
+ port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port;
+
+ smac = (const u8 *)hr_dev->dev_addr[port];
+ dmac = (u8 *)attr->ah_attr.roce.dmac;
+ /* when dmac equals smac or loop_idc is 1, it should loopback */
+ if (ether_addr_equal_unaligned(dmac, smac) ||
+ hr_dev->loop_idc == 0x1) {
+ hr_reg_write(context, QPC_LBI, hr_dev->loop_idc);
+ hr_reg_clear(qpc_mask, QPC_LBI);
+ }
+
+ if (attr_mask & IB_QP_DEST_QPN) {
+ hr_reg_write(context, QPC_DQPN, attr->dest_qp_num);
+ hr_reg_clear(qpc_mask, QPC_DQPN);
+ }
+
+ memcpy(&context->dmac, dmac, sizeof(u32));
+ hr_reg_write(context, QPC_DMAC_H, *((u16 *)(&dmac[4])));
+ qpc_mask->dmac = 0;
+ hr_reg_clear(qpc_mask, QPC_DMAC_H);
+
+ ib_mtu = get_mtu(ibqp, attr);
+ hr_qp->path_mtu = ib_mtu;
+
+ mtu = ib_mtu_enum_to_int(ib_mtu);
+ if (WARN_ON(mtu <= 0))
+ return -EINVAL;
+#define MIN_LP_MSG_LEN 1024
+ /* mtu * (2 ^ lp_pktn_ini) should be in the range of 1024 to mtu */
+ lp_msg_len = max(mtu, MIN_LP_MSG_LEN);
+ lp_pktn_ini = ilog2(lp_msg_len / mtu);
+
+ if (attr_mask & IB_QP_PATH_MTU) {
+ hr_reg_write(context, QPC_MTU, ib_mtu);
+ hr_reg_clear(qpc_mask, QPC_MTU);
+ }
+
+ hr_reg_write(context, QPC_LP_PKTN_INI, lp_pktn_ini);
+ hr_reg_clear(qpc_mask, QPC_LP_PKTN_INI);
+
+ /*
+ * There are several constraints for ACK_REQ_FREQ:
+ * 1. mtu * (2 ^ ACK_REQ_FREQ) should not be too large, otherwise
+ * it may cause some unexpected retries when sending large
+ * payload.
+ * 2. ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI.
+ * 3. ACK_REQ_FREQ must be equal to LP_PKTN_INI when using LDCP
+ * or HC3 congestion control algorithm.
+ */
+ if (hr_qp->cong_type == CONG_TYPE_LDCP ||
+ hr_qp->cong_type == CONG_TYPE_HC3 ||
+ hr_dev->caps.max_ack_req_msg_len < lp_msg_len)
+ ack_req_freq = lp_pktn_ini;
+ else
+ ack_req_freq = ilog2(hr_dev->caps.max_ack_req_msg_len / mtu);
+ hr_reg_write(context, QPC_ACK_REQ_FREQ, ack_req_freq);
+ hr_reg_clear(qpc_mask, QPC_ACK_REQ_FREQ);
+
+ hr_reg_clear(qpc_mask, QPC_RX_REQ_PSN_ERR);
+ hr_reg_clear(qpc_mask, QPC_RX_REQ_MSN);
+ hr_reg_clear(qpc_mask, QPC_RX_REQ_LAST_OPTYPE);
+
+ context->rq_rnr_timer = 0;
+ qpc_mask->rq_rnr_timer = 0;
+
+ hr_reg_clear(qpc_mask, QPC_TRRL_HEAD_MAX);
+ hr_reg_clear(qpc_mask, QPC_TRRL_TAIL_MAX);
+
+#define MAX_LP_SGEN 3
+ /* rocee send 2^lp_sgen_ini segs every time */
+ hr_reg_write(context, QPC_LP_SGEN_INI, MAX_LP_SGEN);
+ hr_reg_clear(qpc_mask, QPC_LP_SGEN_INI);
+
+ if (udata && ibqp->qp_type == IB_QPT_RC &&
+ (uctx->config & HNS_ROCE_RQ_INLINE_FLAGS)) {
+ hr_reg_write_bool(context, QPC_RQIE,
+ hr_dev->caps.flags &
+ HNS_ROCE_CAP_FLAG_RQ_INLINE);
+ hr_reg_clear(qpc_mask, QPC_RQIE);
+ }
+
+ if (udata &&
+ (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_XRC_TGT) &&
+ (uctx->config & HNS_ROCE_CQE_INLINE_FLAGS)) {
+ hr_reg_write_bool(context, QPC_CQEIE,
+ hr_dev->caps.flags &
+ HNS_ROCE_CAP_FLAG_CQE_INLINE);
+ hr_reg_clear(qpc_mask, QPC_CQEIE);
+
+ hr_reg_write(context, QPC_CQEIS, 0);
+ hr_reg_clear(qpc_mask, QPC_CQEIS);
+ }
+
+ return 0;
+}
+
+static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, int attr_mask,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ /* Not support alternate path and path migration */
+ if (attr_mask & (IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE)) {
+ ibdev_err(ibdev, "RTR2RTS attr_mask (0x%x)error\n", attr_mask);
+ return -EINVAL;
+ }
+
+ ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask);
+ if (ret) {
+ ibdev_err(ibdev, "failed to config sq buf, ret = %d.\n", ret);
+ return ret;
+ }
+
+ /*
+ * Set some fields in context to zero, Because the default values
+ * of all fields in context are zero, we need not set them to 0 again.
+ * but we should set the relevant fields of context mask to 0.
+ */
+ hr_reg_clear(qpc_mask, QPC_IRRL_SGE_IDX);
+
+ hr_reg_clear(qpc_mask, QPC_RX_ACK_MSN);
+
+ hr_reg_clear(qpc_mask, QPC_ACK_LAST_OPTYPE);
+ hr_reg_clear(qpc_mask, QPC_IRRL_PSN_VLD);
+ hr_reg_clear(qpc_mask, QPC_IRRL_PSN);
+
+ hr_reg_clear(qpc_mask, QPC_IRRL_TAIL_REAL);
+
+ hr_reg_clear(qpc_mask, QPC_RETRY_MSG_MSN);
+
+ hr_reg_clear(qpc_mask, QPC_RNR_RETRY_FLAG);
+
+ hr_reg_clear(qpc_mask, QPC_CHECK_FLG);
+
+ hr_reg_clear(qpc_mask, QPC_V2_IRRL_HEAD);
+
+ return 0;
+}
+
+static int alloc_dip_entry(struct xarray *dip_xa, u32 qpn)
+{
+ struct hns_roce_dip *hr_dip;
+ int ret;
+
+ hr_dip = xa_load(dip_xa, qpn);
+ if (hr_dip)
+ return 0;
+
+ hr_dip = kzalloc(sizeof(*hr_dip), GFP_KERNEL);
+ if (!hr_dip)
+ return -ENOMEM;
+
+ ret = xa_err(xa_store(dip_xa, qpn, hr_dip, GFP_KERNEL));
+ if (ret)
+ kfree(hr_dip);
+
+ return ret;
+}
+
+static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
+ u32 *dip_idx)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct xarray *dip_xa = &hr_dev->qp_table.dip_xa;
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_dip *hr_dip;
+ unsigned long idx;
+ int ret = 0;
+
+ ret = alloc_dip_entry(dip_xa, ibqp->qp_num);
+ if (ret)
+ return ret;
+
+ xa_lock(dip_xa);
+
+ xa_for_each(dip_xa, idx, hr_dip) {
+ if (hr_dip->qp_cnt &&
+ !memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) {
+ *dip_idx = hr_dip->dip_idx;
+ hr_dip->qp_cnt++;
+ hr_qp->dip = hr_dip;
+ goto out;
+ }
+ }
+
+ /* If no dgid is found, a new dip and a mapping between dgid and
+ * dip_idx will be created.
+ */
+ xa_for_each(dip_xa, idx, hr_dip) {
+ if (hr_dip->qp_cnt)
+ continue;
+
+ *dip_idx = idx;
+ memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
+ hr_dip->dip_idx = idx;
+ hr_dip->qp_cnt++;
+ hr_qp->dip = hr_dip;
+ break;
+ }
+
+ /* This should never happen. */
+ if (WARN_ON_ONCE(!hr_qp->dip))
+ ret = -ENOSPC;
+
+out:
+ xa_unlock(dip_xa);
+ return ret;
+}
+
+enum {
+ CONG_DCQCN,
+ CONG_WINDOW,
+};
+
+enum {
+ UNSUPPORT_CONG_LEVEL,
+ SUPPORT_CONG_LEVEL,
+};
+
+enum {
+ CONG_LDCP,
+ CONG_HC3,
+};
+
+enum {
+ DIP_INVALID,
+ DIP_VALID,
+};
+
+enum {
+ WND_LIMIT,
+ WND_UNLIMIT,
+};
+
+static int check_cong_type(struct ib_qp *ibqp,
+ struct hns_roce_congestion_algorithm *cong_alg)
+{
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+ /* different congestion types match different configurations */
+ switch (hr_qp->cong_type) {
+ case CONG_TYPE_DCQCN:
+ cong_alg->alg_sel = CONG_DCQCN;
+ cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
+ cong_alg->dip_vld = DIP_INVALID;
+ cong_alg->wnd_mode_sel = WND_LIMIT;
+ break;
+ case CONG_TYPE_LDCP:
+ cong_alg->alg_sel = CONG_WINDOW;
+ cong_alg->alg_sub_sel = CONG_LDCP;
+ cong_alg->dip_vld = DIP_INVALID;
+ cong_alg->wnd_mode_sel = WND_UNLIMIT;
+ break;
+ case CONG_TYPE_HC3:
+ cong_alg->alg_sel = CONG_WINDOW;
+ cong_alg->alg_sub_sel = CONG_HC3;
+ cong_alg->dip_vld = DIP_INVALID;
+ cong_alg->wnd_mode_sel = WND_LIMIT;
+ break;
+ case CONG_TYPE_DIP:
+ cong_alg->alg_sel = CONG_DCQCN;
+ cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
+ cong_alg->dip_vld = DIP_VALID;
+ cong_alg->wnd_mode_sel = WND_LIMIT;
+ break;
+ default:
+ hr_qp->cong_type = CONG_TYPE_DCQCN;
+ cong_alg->alg_sel = CONG_DCQCN;
+ cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
+ cong_alg->dip_vld = DIP_INVALID;
+ cong_alg->wnd_mode_sel = WND_LIMIT;
+ break;
+ }
+
+ return 0;
+}
+
+static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ struct hns_roce_congestion_algorithm cong_field;
+ struct ib_device *ibdev = ibqp->device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ u32 dip_idx = 0;
+ int ret;
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ||
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE)
+ return 0;
+
+ ret = check_cong_type(ibqp, &cong_field);
+ if (ret)
+ return ret;
+
+ hr_reg_write(context, QPC_CONG_ALGO_TMPL_ID, hr_dev->cong_algo_tmpl_id +
+ hr_qp->cong_type * HNS_ROCE_CONG_SIZE);
+ hr_reg_clear(qpc_mask, QPC_CONG_ALGO_TMPL_ID);
+ hr_reg_write(&context->ext, QPCEX_CONG_ALG_SEL, cong_field.alg_sel);
+ hr_reg_clear(&qpc_mask->ext, QPCEX_CONG_ALG_SEL);
+ hr_reg_write(&context->ext, QPCEX_CONG_ALG_SUB_SEL,
+ cong_field.alg_sub_sel);
+ hr_reg_clear(&qpc_mask->ext, QPCEX_CONG_ALG_SUB_SEL);
+ hr_reg_write(&context->ext, QPCEX_DIP_CTX_IDX_VLD, cong_field.dip_vld);
+ hr_reg_clear(&qpc_mask->ext, QPCEX_DIP_CTX_IDX_VLD);
+ hr_reg_write(&context->ext, QPCEX_SQ_RQ_NOT_FORBID_EN,
+ cong_field.wnd_mode_sel);
+ hr_reg_clear(&qpc_mask->ext, QPCEX_SQ_RQ_NOT_FORBID_EN);
+
+ /* if dip is disabled, there is no need to set dip idx */
+ if (cong_field.dip_vld == 0)
+ return 0;
+
+ ret = get_dip_ctx_idx(ibqp, attr, &dip_idx);
+ if (ret) {
+ ibdev_err(ibdev, "failed to fill cong field, ret = %d.\n", ret);
+ return ret;
+ }
+
+ hr_reg_write(&context->ext, QPCEX_DIP_CTX_IDX, dip_idx);
+ hr_reg_write(&qpc_mask->ext, QPCEX_DIP_CTX_IDX, 0);
+
+ return 0;
+}
+
+static int hns_roce_hw_v2_get_dscp(struct hns_roce_dev *hr_dev, u8 dscp,
+ u8 *tc_mode, u8 *priority)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ if (!ops->get_dscp_prio)
+ return -EOPNOTSUPP;
+
+ return ops->get_dscp_prio(handle, dscp, tc_mode, priority);
+}
+
+bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl)
+{
+ u32 max_sl;
+
+ max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1);
+ if (unlikely(sl > max_sl)) {
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to set SL(%u). Shouldn't be larger than %u.\n",
+ sl, max_sl);
+ return false;
+ }
+
+ return true;
+}
+
+static int hns_roce_set_sl(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ ret = hns_roce_hw_v2_get_dscp(hr_dev, get_tclass(&attr->ah_attr.grh),
+ &hr_qp->tc_mode, &hr_qp->priority);
+ if (ret && ret != -EOPNOTSUPP &&
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ ibdev_err_ratelimited(ibdev,
+ "failed to get dscp, ret = %d.\n", ret);
+ return ret;
+ }
+
+ if (hr_qp->tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ hr_qp->sl = hr_qp->priority;
+ else
+ hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+
+ if (!check_sl_valid(hr_dev, hr_qp->sl))
+ return -EINVAL;
+
+ hr_reg_write(context, QPC_SL, hr_qp->sl);
+ hr_reg_clear(qpc_mask, QPC_SL);
+
+ return 0;
+}
+
+static int hns_roce_v2_set_path(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ const struct ib_gid_attr *gid_attr = NULL;
+ u8 sl = rdma_ah_get_sl(&attr->ah_attr);
+ int is_roce_protocol;
+ u16 vlan_id = 0xffff;
+ bool is_udp = false;
+ u8 ib_port;
+ u8 hr_port;
+ int ret;
+
+ /*
+ * If free_mr_en of qp is set, it means that this qp comes from
+ * free mr. This qp will perform the loopback operation.
+ * In the loopback scenario, only sl needs to be set.
+ */
+ if (hr_qp->free_mr_en) {
+ if (!check_sl_valid(hr_dev, sl))
+ return -EINVAL;
+ hr_reg_write(context, QPC_SL, sl);
+ hr_reg_clear(qpc_mask, QPC_SL);
+ hr_qp->sl = sl;
+ return 0;
+ }
+
+ ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num : hr_qp->port + 1;
+ hr_port = ib_port - 1;
+ is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) &&
+ rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
+
+ if (is_roce_protocol) {
+ gid_attr = attr->ah_attr.grh.sgid_attr;
+ ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
+ if (ret)
+ return ret;
+
+ is_udp = (gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
+ }
+
+ /* Only HIP08 needs to set the vlan_en bits in QPC */
+ if (vlan_id < VLAN_N_VID &&
+ hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ hr_reg_enable(context, QPC_RQ_VLAN_EN);
+ hr_reg_clear(qpc_mask, QPC_RQ_VLAN_EN);
+ hr_reg_enable(context, QPC_SQ_VLAN_EN);
+ hr_reg_clear(qpc_mask, QPC_SQ_VLAN_EN);
+ }
+
+ hr_reg_write(context, QPC_VLAN_ID, vlan_id);
+ hr_reg_clear(qpc_mask, QPC_VLAN_ID);
+
+ if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) {
+ ibdev_err(ibdev, "sgid_index(%u) too large. max is %d\n",
+ grh->sgid_index, hr_dev->caps.gid_table_len[hr_port]);
+ return -EINVAL;
+ }
+
+ if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) {
+ ibdev_err(ibdev, "ah attr is not RDMA roce type\n");
+ return -EINVAL;
+ }
+
+ hr_reg_write(context, QPC_UDPSPN,
+ is_udp ? rdma_get_udp_sport(grh->flow_label, ibqp->qp_num,
+ attr->dest_qp_num) :
+ 0);
+
+ hr_reg_clear(qpc_mask, QPC_UDPSPN);
+
+ hr_reg_write(context, QPC_GMV_IDX, grh->sgid_index);
+
+ hr_reg_clear(qpc_mask, QPC_GMV_IDX);
+
+ hr_reg_write(context, QPC_HOPLIMIT, grh->hop_limit);
+ hr_reg_clear(qpc_mask, QPC_HOPLIMIT);
+
+ ret = fill_cong_field(ibqp, attr, context, qpc_mask);
+ if (ret)
+ return ret;
+
+ hr_reg_write(context, QPC_TC, get_tclass(&attr->ah_attr.grh));
+ hr_reg_clear(qpc_mask, QPC_TC);
+
+ hr_reg_write(context, QPC_FL, grh->flow_label);
+ hr_reg_clear(qpc_mask, QPC_FL);
+ memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
+ memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
+
+ return hns_roce_set_sl(ibqp, attr, context, qpc_mask);
+}
+
+static bool check_qp_state(enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+ static const bool sm[][IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = { [IB_QPS_RESET] = true,
+ [IB_QPS_INIT] = true },
+ [IB_QPS_INIT] = { [IB_QPS_RESET] = true,
+ [IB_QPS_INIT] = true,
+ [IB_QPS_RTR] = true,
+ [IB_QPS_ERR] = true },
+ [IB_QPS_RTR] = { [IB_QPS_RESET] = true,
+ [IB_QPS_RTS] = true,
+ [IB_QPS_ERR] = true },
+ [IB_QPS_RTS] = { [IB_QPS_RESET] = true,
+ [IB_QPS_RTS] = true,
+ [IB_QPS_ERR] = true },
+ [IB_QPS_SQD] = {},
+ [IB_QPS_SQE] = {},
+ [IB_QPS_ERR] = { [IB_QPS_RESET] = true,
+ [IB_QPS_ERR] = true }
+ };
+
+ return sm[cur_state][new_state];
+}
+
+static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+ enum ib_qp_state cur_state,
+ enum ib_qp_state new_state,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ int ret = 0;
+
+ if (!check_qp_state(cur_state, new_state))
+ return -EINVAL;
+
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ memset(qpc_mask, 0, hr_dev->caps.qpc_sz);
+ modify_qp_reset_to_init(ibqp, context);
+ } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
+ modify_qp_init_to_init(ibqp, context, qpc_mask);
+ } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context,
+ qpc_mask, udata);
+ } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
+ ret = modify_qp_rtr_to_rts(ibqp, attr_mask, context, qpc_mask);
+ }
+
+ return ret;
+}
+
+static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout)
+{
+#define QP_ACK_TIMEOUT_MAX_HIP08 20
+#define QP_ACK_TIMEOUT_MAX 31
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ if (*timeout > QP_ACK_TIMEOUT_MAX_HIP08) {
+ ibdev_warn(&hr_dev->ib_dev,
+ "local ACK timeout shall be 0 to 20.\n");
+ return false;
+ }
+ *timeout += HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08;
+ } else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) {
+ if (*timeout > QP_ACK_TIMEOUT_MAX) {
+ ibdev_warn(&hr_dev->ib_dev,
+ "local ACK timeout shall be 0 to 31.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ int ret = 0;
+ u8 timeout;
+
+ if (attr_mask & IB_QP_AV) {
+ ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context,
+ qpc_mask);
+ if (ret)
+ return ret;
+ }
+
+ if (attr_mask & IB_QP_TIMEOUT) {
+ timeout = attr->timeout;
+ if (check_qp_timeout_cfg_range(hr_dev, &timeout)) {
+ hr_reg_write(context, QPC_AT, timeout);
+ hr_reg_clear(qpc_mask, QPC_AT);
+ }
+ }
+
+ if (attr_mask & IB_QP_RETRY_CNT) {
+ hr_reg_write(context, QPC_RETRY_NUM_INIT, attr->retry_cnt);
+ hr_reg_clear(qpc_mask, QPC_RETRY_NUM_INIT);
+
+ hr_reg_write(context, QPC_RETRY_CNT, attr->retry_cnt);
+ hr_reg_clear(qpc_mask, QPC_RETRY_CNT);
+ }
+
+ if (attr_mask & IB_QP_RNR_RETRY) {
+ hr_reg_write(context, QPC_RNR_NUM_INIT, attr->rnr_retry);
+ hr_reg_clear(qpc_mask, QPC_RNR_NUM_INIT);
+
+ hr_reg_write(context, QPC_RNR_CNT, attr->rnr_retry);
+ hr_reg_clear(qpc_mask, QPC_RNR_CNT);
+ }
+
+ if (attr_mask & IB_QP_SQ_PSN) {
+ hr_reg_write(context, QPC_SQ_CUR_PSN, attr->sq_psn);
+ hr_reg_clear(qpc_mask, QPC_SQ_CUR_PSN);
+
+ hr_reg_write(context, QPC_SQ_MAX_PSN, attr->sq_psn);
+ hr_reg_clear(qpc_mask, QPC_SQ_MAX_PSN);
+
+ hr_reg_write(context, QPC_RETRY_MSG_PSN_L, attr->sq_psn);
+ hr_reg_clear(qpc_mask, QPC_RETRY_MSG_PSN_L);
+
+ hr_reg_write(context, QPC_RETRY_MSG_PSN_H,
+ attr->sq_psn >> RETRY_MSG_PSN_SHIFT);
+ hr_reg_clear(qpc_mask, QPC_RETRY_MSG_PSN_H);
+
+ hr_reg_write(context, QPC_RETRY_MSG_FPKT_PSN, attr->sq_psn);
+ hr_reg_clear(qpc_mask, QPC_RETRY_MSG_FPKT_PSN);
+
+ hr_reg_write(context, QPC_RX_ACK_EPSN, attr->sq_psn);
+ hr_reg_clear(qpc_mask, QPC_RX_ACK_EPSN);
+ }
+
+ if ((attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) &&
+ attr->max_dest_rd_atomic) {
+ hr_reg_write(context, QPC_RR_MAX,
+ fls(attr->max_dest_rd_atomic - 1));
+ hr_reg_clear(qpc_mask, QPC_RR_MAX);
+ }
+
+ if ((attr_mask & IB_QP_MAX_QP_RD_ATOMIC) && attr->max_rd_atomic) {
+ hr_reg_write(context, QPC_SR_MAX, fls(attr->max_rd_atomic - 1));
+ hr_reg_clear(qpc_mask, QPC_SR_MAX);
+ }
+
+ if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
+ set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+ hr_reg_write(context, QPC_MIN_RNR_TIME,
+ hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ?
+ HNS_ROCE_RNR_TIMER_10NS : attr->min_rnr_timer);
+ hr_reg_clear(qpc_mask, QPC_MIN_RNR_TIME);
+ }
+
+ if (attr_mask & IB_QP_RQ_PSN) {
+ hr_reg_write(context, QPC_RX_REQ_EPSN, attr->rq_psn);
+ hr_reg_clear(qpc_mask, QPC_RX_REQ_EPSN);
+
+ hr_reg_write(context, QPC_RAQ_PSN, attr->rq_psn - 1);
+ hr_reg_clear(qpc_mask, QPC_RAQ_PSN);
+ }
+
+ if (attr_mask & IB_QP_QKEY) {
+ context->qkey_xrcd = cpu_to_le32(attr->qkey);
+ qpc_mask->qkey_xrcd = 0;
+ hr_qp->qkey = attr->qkey;
+ }
+
+ return ret;
+}
+
+static void hns_roce_v2_record_opt_fields(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS)
+ hr_qp->atomic_rd_en = attr->qp_access_flags;
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+ hr_qp->resp_depth = attr->max_dest_rd_atomic;
+ if (attr_mask & IB_QP_PORT) {
+ hr_qp->port = attr->port_num - 1;
+ hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
+ }
+}
+
+static void clear_qp(struct hns_roce_qp *hr_qp)
+{
+ struct ib_qp *ibqp = &hr_qp->ibqp;
+
+ if (ibqp->send_cq)
+ hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq),
+ hr_qp->qpn, NULL);
+
+ if (ibqp->recv_cq && ibqp->recv_cq != ibqp->send_cq)
+ hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq),
+ hr_qp->qpn, ibqp->srq ?
+ to_hr_srq(ibqp->srq) : NULL);
+
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
+ *hr_qp->rdb.db_record = 0;
+
+ hr_qp->rq.head = 0;
+ hr_qp->rq.tail = 0;
+ hr_qp->sq.head = 0;
+ hr_qp->sq.tail = 0;
+ hr_qp->next_sge = 0;
+}
+
+static void v2_set_flushed_fields(struct ib_qp *ibqp,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ unsigned long sq_flag = 0;
+ unsigned long rq_flag = 0;
+
+ if (ibqp->qp_type == IB_QPT_XRC_TGT)
+ return;
+
+ spin_lock_irqsave(&hr_qp->sq.lock, sq_flag);
+ trace_hns_sq_flush_cqe(hr_qp->qpn, hr_qp->sq.head, TRACE_SQ);
+ hr_reg_write(context, QPC_SQ_PRODUCER_IDX, hr_qp->sq.head);
+ hr_reg_clear(qpc_mask, QPC_SQ_PRODUCER_IDX);
+ hr_qp->state = IB_QPS_ERR;
+ spin_unlock_irqrestore(&hr_qp->sq.lock, sq_flag);
+
+ if (ibqp->srq || ibqp->qp_type == IB_QPT_XRC_INI) /* no RQ */
+ return;
+
+ spin_lock_irqsave(&hr_qp->rq.lock, rq_flag);
+ trace_hns_rq_flush_cqe(hr_qp->qpn, hr_qp->rq.head, TRACE_RQ);
+ hr_reg_write(context, QPC_RQ_PRODUCER_IDX, hr_qp->rq.head);
+ hr_reg_clear(qpc_mask, QPC_RQ_PRODUCER_IDX);
+ spin_unlock_irqrestore(&hr_qp->rq.lock, rq_flag);
+}
+
+static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ int attr_mask, enum ib_qp_state cur_state,
+ enum ib_qp_state new_state, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_v2_qp_context *context;
+ struct hns_roce_v2_qp_context *qpc_mask;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret = -ENOMEM;
+
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
+ /*
+ * In v2 engine, software pass context and context mask to hardware
+ * when modifying qp. If software need modify some fields in context,
+ * we should set all bits of the relevant fields in context mask to
+ * 0 at the same time, else set them to 0x1.
+ */
+ context = kvzalloc(sizeof(*context), GFP_KERNEL);
+ qpc_mask = kvzalloc(sizeof(*qpc_mask), GFP_KERNEL);
+ if (!context || !qpc_mask)
+ goto out;
+
+ memset(qpc_mask, 0xff, hr_dev->caps.qpc_sz);
+
+ ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state,
+ new_state, context, qpc_mask, udata);
+ if (ret)
+ goto out;
+
+ /* When QP state is err, SQ and RQ WQE should be flushed */
+ if (new_state == IB_QPS_ERR)
+ v2_set_flushed_fields(ibqp, context, qpc_mask);
+
+ /* Configure the optional fields */
+ ret = hns_roce_v2_set_opt_fields(ibqp, attr, attr_mask, context,
+ qpc_mask);
+ if (ret)
+ goto out;
+
+ hr_reg_write_bool(context, QPC_INV_CREDIT,
+ to_hr_qp_type(hr_qp->ibqp.qp_type) == SERV_TYPE_XRC ||
+ ibqp->srq);
+ hr_reg_clear(qpc_mask, QPC_INV_CREDIT);
+
+ /* Every status migrate must change state */
+ hr_reg_write(context, QPC_QP_ST, new_state);
+ hr_reg_clear(qpc_mask, QPC_QP_ST);
+
+ /* SW pass context to HW */
+ ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp);
+ if (ret) {
+ ibdev_err_ratelimited(ibdev, "failed to modify QP, ret = %d.\n", ret);
+ goto out;
+ }
+
+ hr_qp->state = new_state;
+
+ hns_roce_v2_record_opt_fields(ibqp, attr, attr_mask);
+
+ if (new_state == IB_QPS_RESET && !ibqp->uobject)
+ clear_qp(hr_qp);
+
+out:
+ kvfree(qpc_mask);
+ kvfree(context);
+ return ret;
+}
+
+static int to_ib_qp_st(enum hns_roce_v2_qp_state state)
+{
+ static const enum ib_qp_state map[] = {
+ [HNS_ROCE_QP_ST_RST] = IB_QPS_RESET,
+ [HNS_ROCE_QP_ST_INIT] = IB_QPS_INIT,
+ [HNS_ROCE_QP_ST_RTR] = IB_QPS_RTR,
+ [HNS_ROCE_QP_ST_RTS] = IB_QPS_RTS,
+ [HNS_ROCE_QP_ST_SQD] = IB_QPS_SQD,
+ [HNS_ROCE_QP_ST_SQER] = IB_QPS_SQE,
+ [HNS_ROCE_QP_ST_ERR] = IB_QPS_ERR,
+ [HNS_ROCE_QP_ST_SQ_DRAINING] = IB_QPS_SQD
+ };
+
+ return (state < ARRAY_SIZE(map)) ? map[state] : -1;
+}
+
+static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, u32 qpn,
+ void *buffer)
+{
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_QPC,
+ qpn);
+ if (ret)
+ goto out;
+
+ memcpy(buffer, mailbox->buf, hr_dev->caps.qpc_sz);
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static int hns_roce_v2_query_srqc(struct hns_roce_dev *hr_dev, u32 srqn,
+ void *buffer)
+{
+ struct hns_roce_srq_context *context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_SRQC,
+ srqn);
+ if (ret)
+ goto out;
+
+ memcpy(buffer, context, sizeof(*context));
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static int hns_roce_v2_query_sccc(struct hns_roce_dev *hr_dev, u32 sccn,
+ void *buffer)
+{
+ struct hns_roce_v2_scc_context *context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_SCCC,
+ sccn);
+ if (ret)
+ goto out;
+
+ context = mailbox->buf;
+ memcpy(buffer, context, sizeof(*context));
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static u8 get_qp_timeout_attr(struct hns_roce_dev *hr_dev,
+ struct hns_roce_v2_qp_context *context)
+{
+ u8 timeout;
+
+ timeout = (u8)hr_reg_read(context, QPC_AT);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ timeout -= HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08;
+
+ return timeout;
+}
+
+static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_v2_qp_context context = {};
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int tmp_qp_state;
+ int state;
+ int ret;
+
+ memset(qp_attr, 0, sizeof(*qp_attr));
+ memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+
+ mutex_lock(&hr_qp->mutex);
+
+ if (hr_qp->state == IB_QPS_RESET) {
+ qp_attr->qp_state = IB_QPS_RESET;
+ ret = 0;
+ goto done;
+ }
+
+ ret = hns_roce_v2_query_qpc(hr_dev, hr_qp->qpn, &context);
+ if (ret) {
+ ibdev_err_ratelimited(ibdev,
+ "failed to query QPC, ret = %d.\n",
+ ret);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ state = hr_reg_read(&context, QPC_QP_ST);
+ tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
+ if (tmp_qp_state == -1) {
+ ibdev_err_ratelimited(ibdev, "Illegal ib_qp_state\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ hr_qp->state = (u8)tmp_qp_state;
+ qp_attr->qp_state = (enum ib_qp_state)hr_qp->state;
+ qp_attr->path_mtu = (enum ib_mtu)hr_reg_read(&context, QPC_MTU);
+ qp_attr->path_mig_state = IB_MIG_ARMED;
+ qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ if (hr_qp->ibqp.qp_type == IB_QPT_UD)
+ qp_attr->qkey = le32_to_cpu(context.qkey_xrcd);
+
+ qp_attr->rq_psn = hr_reg_read(&context, QPC_RX_REQ_EPSN);
+ qp_attr->sq_psn = (u32)hr_reg_read(&context, QPC_SQ_CUR_PSN);
+ qp_attr->dest_qp_num = hr_reg_read(&context, QPC_DQPN);
+ qp_attr->qp_access_flags =
+ ((hr_reg_read(&context, QPC_RRE)) << V2_QP_RRE_S) |
+ ((hr_reg_read(&context, QPC_RWE)) << V2_QP_RWE_S) |
+ ((hr_reg_read(&context, QPC_ATE)) << V2_QP_ATE_S);
+
+ if (hr_qp->ibqp.qp_type == IB_QPT_RC ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT) {
+ struct ib_global_route *grh =
+ rdma_ah_retrieve_grh(&qp_attr->ah_attr);
+
+ rdma_ah_set_sl(&qp_attr->ah_attr,
+ hr_reg_read(&context, QPC_SL));
+ rdma_ah_set_port_num(&qp_attr->ah_attr, hr_qp->port + 1);
+ rdma_ah_set_ah_flags(&qp_attr->ah_attr, IB_AH_GRH);
+ grh->flow_label = hr_reg_read(&context, QPC_FL);
+ grh->sgid_index = hr_reg_read(&context, QPC_GMV_IDX);
+ grh->hop_limit = hr_reg_read(&context, QPC_HOPLIMIT);
+ grh->traffic_class = hr_reg_read(&context, QPC_TC);
+
+ memcpy(grh->dgid.raw, context.dgid, sizeof(grh->dgid.raw));
+ }
+
+ qp_attr->port_num = hr_qp->port + 1;
+ qp_attr->sq_draining = 0;
+ qp_attr->max_rd_atomic = 1 << hr_reg_read(&context, QPC_SR_MAX);
+ qp_attr->max_dest_rd_atomic = 1 << hr_reg_read(&context, QPC_RR_MAX);
+
+ qp_attr->min_rnr_timer = (u8)hr_reg_read(&context, QPC_MIN_RNR_TIME);
+ qp_attr->timeout = get_qp_timeout_attr(hr_dev, &context);
+ qp_attr->retry_cnt = hr_reg_read(&context, QPC_RETRY_NUM_INIT);
+ qp_attr->rnr_retry = hr_reg_read(&context, QPC_RNR_NUM_INIT);
+
+done:
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
+ qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
+ qp_attr->cap.max_inline_data = hr_qp->max_inline_data;
+
+ qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
+ qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
+
+ qp_init_attr->qp_context = ibqp->qp_context;
+ qp_init_attr->qp_type = ibqp->qp_type;
+ qp_init_attr->recv_cq = ibqp->recv_cq;
+ qp_init_attr->send_cq = ibqp->send_cq;
+ qp_init_attr->srq = ibqp->srq;
+ qp_init_attr->cap = qp_attr->cap;
+ qp_init_attr->sq_sig_type = hr_qp->sq_signal_bits;
+
+out:
+ mutex_unlock(&hr_qp->mutex);
+ return ret;
+}
+
+static inline int modify_qp_is_ok(struct hns_roce_qp *hr_qp)
+{
+ return ((hr_qp->ibqp.qp_type == IB_QPT_RC ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT) &&
+ hr_qp->state != IB_QPS_RESET);
+}
+
+static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_udata *udata)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_cq *send_cq, *recv_cq;
+ unsigned long flags;
+ int ret = 0;
+
+ if (modify_qp_is_ok(hr_qp)) {
+ /* Modify qp to reset before destroying qp */
+ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
+ hr_qp->state, IB_QPS_RESET, udata);
+ if (ret)
+ ibdev_err_ratelimited(ibdev,
+ "failed to modify QP to RST, ret = %d.\n",
+ ret);
+ }
+
+ send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL;
+ recv_cq = hr_qp->ibqp.recv_cq ? to_hr_cq(hr_qp->ibqp.recv_cq) : NULL;
+
+ spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
+ hns_roce_lock_cqs(send_cq, recv_cq);
+
+ if (!udata) {
+ if (recv_cq)
+ __hns_roce_v2_cq_clean(recv_cq, hr_qp->qpn,
+ (hr_qp->ibqp.srq ?
+ to_hr_srq(hr_qp->ibqp.srq) :
+ NULL));
+
+ if (send_cq && send_cq != recv_cq)
+ __hns_roce_v2_cq_clean(send_cq, hr_qp->qpn, NULL);
+ }
+
+ hns_roce_qp_remove(hr_dev, hr_qp);
+
+ hns_roce_unlock_cqs(send_cq, recv_cq);
+ spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
+
+ return ret;
+}
+
+static void put_dip_ctx_idx(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_dip *hr_dip = hr_qp->dip;
+
+ if (!hr_dip)
+ return;
+
+ xa_lock(&hr_dev->qp_table.dip_xa);
+
+ hr_dip->qp_cnt--;
+ if (!hr_dip->qp_cnt)
+ memset(hr_dip->dgid, 0, GID_LEN_V2);
+
+ xa_unlock(&hr_dev->qp_table.dip_xa);
+}
+
+int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ unsigned long flags;
+ int ret;
+
+ /* Make sure flush_cqe() is completed */
+ spin_lock_irqsave(&hr_qp->flush_lock, flags);
+ set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag);
+ spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
+ flush_work(&hr_qp->flush_work.work);
+
+ if (hr_qp->cong_type == CONG_TYPE_DIP)
+ put_dip_ctx_idx(hr_dev, hr_qp);
+
+ ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata);
+ if (ret)
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n",
+ hr_qp->qpn, ret);
+
+ hns_roce_qp_destroy(hr_dev, hr_qp, udata);
+
+ return 0;
+}
+
+static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_sccc_clr_done *resp;
+ struct hns_roce_sccc_clr *clr;
+ struct hns_roce_cmq_desc desc;
+ int ret, i;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return 0;
+
+ mutex_lock(&hr_dev->qp_table.scc_mutex);
+
+ /* set scc ctx clear done flag */
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d.\n", ret);
+ goto out;
+ }
+
+ /* clear scc context */
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CLR_SCCC, false);
+ clr = (struct hns_roce_sccc_clr *)desc.data;
+ clr->qpn = cpu_to_le32(hr_qp->qpn);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d.\n", ret);
+ goto out;
+ }
+
+ /* query scc context clear is done or not */
+ resp = (struct hns_roce_sccc_clr_done *)desc.data;
+ for (i = 0; i <= HNS_ROCE_CMQ_SCC_CLR_DONE_CNT; i++) {
+ hns_roce_cmq_setup_basic_desc(&desc,
+ HNS_ROCE_OPC_QUERY_SCCC, true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to query clr cmq, ret = %d\n",
+ ret);
+ goto out;
+ }
+
+ if (resp->clr_done)
+ goto out;
+
+ msleep(20);
+ }
+
+ ibdev_err(ibdev, "query SCC clr done flag overtime.\n");
+ ret = -ETIMEDOUT;
+
+out:
+ mutex_unlock(&hr_dev->qp_table.scc_mutex);
+ return ret;
+}
+
+#define DMA_IDX_SHIFT 3
+#define DMA_WQE_SHIFT 3
+
+static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq,
+ struct hns_roce_srq_context *ctx)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ struct ib_device *ibdev = srq->ibsrq.device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ u64 mtts_idx[MTT_MIN_COUNT] = {};
+ dma_addr_t dma_handle_idx;
+ int ret;
+
+ /* Get physical address of idx que buf */
+ ret = hns_roce_mtr_find(hr_dev, &idx_que->mtr, 0, mtts_idx,
+ ARRAY_SIZE(mtts_idx));
+ if (ret) {
+ ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ dma_handle_idx = hns_roce_get_mtr_ba(&idx_que->mtr);
+
+ hr_reg_write(ctx, SRQC_IDX_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt));
+
+ hr_reg_write(ctx, SRQC_IDX_BT_BA_L, dma_handle_idx >> DMA_IDX_SHIFT);
+ hr_reg_write(ctx, SRQC_IDX_BT_BA_H,
+ upper_32_bits(dma_handle_idx >> DMA_IDX_SHIFT));
+
+ hr_reg_write(ctx, SRQC_IDX_BA_PG_SZ,
+ to_hr_hw_page_shift(idx_que->mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(ctx, SRQC_IDX_BUF_PG_SZ,
+ to_hr_hw_page_shift(idx_que->mtr.hem_cfg.buf_pg_shift));
+
+ hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts_idx[0]));
+ hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts_idx[0])));
+
+ hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_L,
+ to_hr_hw_page_addr(mtts_idx[1]));
+ hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts_idx[1])));
+
+ return 0;
+}
+
+static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf)
+{
+ struct ib_device *ibdev = srq->ibsrq.device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ struct hns_roce_srq_context *ctx = mb_buf;
+ u64 mtts_wqe[MTT_MIN_COUNT] = {};
+ dma_addr_t dma_handle_wqe;
+ int ret;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ /* Get the physical address of srq buf */
+ ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe,
+ ARRAY_SIZE(mtts_wqe));
+ if (ret) {
+ ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ dma_handle_wqe = hns_roce_get_mtr_ba(&srq->buf_mtr);
+
+ hr_reg_write(ctx, SRQC_SRQ_ST, 1);
+ hr_reg_write_bool(ctx, SRQC_SRQ_TYPE,
+ srq->ibsrq.srq_type == IB_SRQT_XRC);
+ hr_reg_write(ctx, SRQC_PD, to_hr_pd(srq->ibsrq.pd)->pdn);
+ hr_reg_write(ctx, SRQC_SRQN, srq->srqn);
+ hr_reg_write(ctx, SRQC_XRCD, srq->xrcdn);
+ hr_reg_write(ctx, SRQC_XRC_CQN, srq->cqn);
+ hr_reg_write(ctx, SRQC_SHIFT, ilog2(srq->wqe_cnt));
+ hr_reg_write(ctx, SRQC_RQWS,
+ srq->max_gs <= 0 ? 0 : fls(srq->max_gs - 1));
+
+ hr_reg_write(ctx, SRQC_WQE_HOP_NUM,
+ to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num,
+ srq->wqe_cnt));
+
+ hr_reg_write(ctx, SRQC_WQE_BT_BA_L, dma_handle_wqe >> DMA_WQE_SHIFT);
+ hr_reg_write(ctx, SRQC_WQE_BT_BA_H,
+ upper_32_bits(dma_handle_wqe >> DMA_WQE_SHIFT));
+
+ hr_reg_write(ctx, SRQC_WQE_BA_PG_SZ,
+ to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(ctx, SRQC_WQE_BUF_PG_SZ,
+ to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift));
+
+ if (srq->cap_flags & HNS_ROCE_SRQ_CAP_RECORD_DB) {
+ hr_reg_enable(ctx, SRQC_DB_RECORD_EN);
+ hr_reg_write(ctx, SRQC_DB_RECORD_ADDR_L,
+ lower_32_bits(srq->rdb.dma) >> 1);
+ hr_reg_write(ctx, SRQC_DB_RECORD_ADDR_H,
+ upper_32_bits(srq->rdb.dma));
+ }
+
+ return hns_roce_v2_write_srqc_index_queue(srq, ctx);
+}
+
+static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
+ struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_srq_context *srq_context;
+ struct hns_roce_srq_context *srqc_mask;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret = 0;
+
+ /* Resizing SRQs is not supported yet */
+ if (srq_attr_mask & IB_SRQ_MAX_WR) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (srq_attr_mask & IB_SRQ_LIMIT) {
+ if (srq_attr->srq_limit > srq->wqe_cnt) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ ret = PTR_ERR(mailbox);
+ goto out;
+ }
+
+ srq_context = mailbox->buf;
+ srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1;
+
+ memset(srqc_mask, 0xff, sizeof(*srqc_mask));
+
+ hr_reg_write(srq_context, SRQC_LIMIT_WL, srq_attr->srq_limit);
+ hr_reg_clear(srqc_mask, SRQC_LIMIT_WL);
+
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0,
+ HNS_ROCE_CMD_MODIFY_SRQC, srq->srqn);
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ if (ret)
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to handle cmd of modifying SRQ, ret = %d.\n",
+ ret);
+ }
+
+out:
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT]);
+
+ return ret;
+}
+
+static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_srq_context *srq_context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ srq_context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma,
+ HNS_ROCE_CMD_QUERY_SRQC, srq->srqn);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to process cmd of querying SRQ, ret = %d.\n",
+ ret);
+ goto out;
+ }
+
+ attr->srq_limit = hr_reg_read(srq_context, SRQC_LIMIT_WL);
+ attr->max_wr = srq->wqe_cnt;
+ attr->max_sge = srq->max_gs - srq->rsv_sge;
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(cq->device);
+ struct hns_roce_v2_cq_context *cq_context;
+ struct hns_roce_cq *hr_cq = to_hr_cq(cq);
+ struct hns_roce_v2_cq_context *cqc_mask;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ ret = PTR_ERR_OR_ZERO(mailbox);
+ if (ret)
+ goto err_out;
+
+ cq_context = mailbox->buf;
+ cqc_mask = (struct hns_roce_v2_cq_context *)mailbox->buf + 1;
+
+ memset(cqc_mask, 0xff, sizeof(*cqc_mask));
+
+ hr_reg_write(cq_context, CQC_CQ_MAX_CNT, cq_count);
+ hr_reg_clear(cqc_mask, CQC_CQ_MAX_CNT);
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ if (cq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) {
+ dev_info(hr_dev->dev,
+ "cq_period(%u) reached the upper limit, adjusted to 65.\n",
+ cq_period);
+ cq_period = HNS_ROCE_MAX_CQ_PERIOD_HIP08;
+ }
+ cq_period *= HNS_ROCE_CLOCK_ADJUST;
+ }
+ hr_reg_write(cq_context, CQC_CQ_PERIOD, cq_period);
+ hr_reg_clear(cqc_mask, CQC_CQ_PERIOD);
+
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0,
+ HNS_ROCE_CMD_MODIFY_CQC, hr_cq->cqn);
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ if (ret)
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to process cmd when modifying CQ, ret = %d.\n",
+ ret);
+
+err_out:
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT]);
+
+ return ret;
+}
+
+static int hns_roce_v2_query_cqc(struct hns_roce_dev *hr_dev, u32 cqn,
+ void *buffer)
+{
+ struct hns_roce_v2_cq_context *context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma,
+ HNS_ROCE_CMD_QUERY_CQC, cqn);
+ if (ret) {
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to process cmd when querying CQ, ret = %d.\n",
+ ret);
+ goto err_mailbox;
+ }
+
+ memcpy(buffer, context, sizeof(*context));
+
+err_mailbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+static int hns_roce_v2_query_mpt(struct hns_roce_dev *hr_dev, u32 key,
+ void *buffer)
+{
+ struct hns_roce_v2_mpt_entry *context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT,
+ key_to_hw_index(key));
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to process cmd when querying MPT, ret = %d.\n",
+ ret);
+ goto err_mailbox;
+ }
+
+ memcpy(buffer, context, sizeof(*context));
+
+err_mailbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+static void dump_aeqe_log(struct hns_roce_work *irq_work)
+{
+ struct hns_roce_dev *hr_dev = irq_work->hr_dev;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+
+ switch (irq_work->event_type) {
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG:
+ ibdev_info(ibdev, "path migrated succeeded.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
+ ibdev_warn(ibdev, "path migration failed.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_COMM_EST:
+ break;
+ case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
+ ibdev_dbg(ibdev, "send queue drained.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ ibdev_err(ibdev, "local work queue 0x%x catast error, sub_event type is: %d\n",
+ irq_work->queue_num, irq_work->sub_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ ibdev_err(ibdev, "invalid request local work queue 0x%x error.\n",
+ irq_work->queue_num);
+ break;
+ case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ ibdev_err(ibdev, "local access violation work queue 0x%x error, sub_event type is: %d\n",
+ irq_work->queue_num, irq_work->sub_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ ibdev_dbg(ibdev, "SRQ limit reach.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
+ ibdev_dbg(ibdev, "SRQ last wqe reach.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ ibdev_err(ibdev, "SRQ catas error.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
+ ibdev_err(ibdev, "CQ 0x%x access err.\n", irq_work->queue_num);
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
+ ibdev_warn(ibdev, "CQ 0x%x overflow\n", irq_work->queue_num);
+ break;
+ case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
+ ibdev_warn(ibdev, "DB overflow.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_MB:
+ break;
+ case HNS_ROCE_EVENT_TYPE_FLR:
+ ibdev_warn(ibdev, "function level reset.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+ ibdev_err(ibdev, "xrc domain violation error.\n");
+ break;
+ case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
+ ibdev_err(ibdev, "invalid xrceth error.\n");
+ break;
+ default:
+ ibdev_info(ibdev, "Undefined event %d.\n",
+ irq_work->event_type);
+ break;
+ }
+}
+
+static void hns_roce_irq_work_handle(struct work_struct *work)
+{
+ struct hns_roce_work *irq_work =
+ container_of(work, struct hns_roce_work, work);
+ struct hns_roce_dev *hr_dev = irq_work->hr_dev;
+ int event_type = irq_work->event_type;
+ u32 queue_num = irq_work->queue_num;
+
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG:
+ case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
+ case HNS_ROCE_EVENT_TYPE_COMM_EST:
+ case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
+ case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
+ case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+ case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
+ hns_roce_qp_event(hr_dev, queue_num, event_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ hns_roce_srq_event(hr_dev, queue_num, event_type);
+ break;
+ case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
+ hns_roce_cq_event(hr_dev, queue_num, event_type);
+ break;
+ default:
+ break;
+ }
+
+ dump_aeqe_log(irq_work);
+
+ kfree(irq_work);
+}
+
+static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq, u32 queue_num)
+{
+ struct hns_roce_work *irq_work;
+
+ irq_work = kzalloc(sizeof(struct hns_roce_work), GFP_ATOMIC);
+ if (!irq_work)
+ return;
+
+ INIT_WORK(&irq_work->work, hns_roce_irq_work_handle);
+ irq_work->hr_dev = hr_dev;
+ irq_work->event_type = eq->event_type;
+ irq_work->sub_type = eq->sub_type;
+ irq_work->queue_num = queue_num;
+ queue_work(hr_dev->irq_workq, &irq_work->work);
+}
+
+static void update_eq_db(struct hns_roce_eq *eq)
+{
+ struct hns_roce_dev *hr_dev = eq->hr_dev;
+ struct hns_roce_v2_db eq_db = {};
+
+ if (eq->type_flag == HNS_ROCE_AEQ) {
+ hr_reg_write(&eq_db, EQ_DB_CMD,
+ eq->arm_st == HNS_ROCE_V2_EQ_ALWAYS_ARMED ?
+ HNS_ROCE_EQ_DB_CMD_AEQ :
+ HNS_ROCE_EQ_DB_CMD_AEQ_ARMED);
+ } else {
+ hr_reg_write(&eq_db, EQ_DB_TAG, eq->eqn);
+
+ hr_reg_write(&eq_db, EQ_DB_CMD,
+ eq->arm_st == HNS_ROCE_V2_EQ_ALWAYS_ARMED ?
+ HNS_ROCE_EQ_DB_CMD_CEQ :
+ HNS_ROCE_EQ_DB_CMD_CEQ_ARMED);
+ }
+
+ hr_reg_write(&eq_db, EQ_DB_CI, eq->cons_index);
+
+ hns_roce_write64(hr_dev, (__le32 *)&eq_db, eq->db_reg);
+}
+
+static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
+{
+ struct hns_roce_aeqe *aeqe;
+
+ aeqe = hns_roce_buf_offset(eq->mtr.kmem,
+ (eq->cons_index & (eq->entries - 1)) *
+ eq->eqe_size);
+
+ return (hr_reg_read(aeqe, AEQE_OWNER) ^
+ !!(eq->cons_index & eq->entries)) ? aeqe : NULL;
+}
+
+static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
+{
+ struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq);
+ irqreturn_t aeqe_found = IRQ_NONE;
+ int num_aeqes = 0;
+ int event_type;
+ u32 queue_num;
+ int sub_type;
+
+ while (aeqe && num_aeqes < HNS_AEQ_POLLING_BUDGET) {
+ /* Make sure we read AEQ entry after we have checked the
+ * ownership bit
+ */
+ dma_rmb();
+
+ event_type = hr_reg_read(aeqe, AEQE_EVENT_TYPE);
+ sub_type = hr_reg_read(aeqe, AEQE_SUB_TYPE);
+ queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM);
+
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+ case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+ case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
+ hns_roce_flush_cqe(hr_dev, queue_num);
+ break;
+ case HNS_ROCE_EVENT_TYPE_MB:
+ hns_roce_cmd_event(hr_dev,
+ le16_to_cpu(aeqe->event.cmd.token),
+ aeqe->event.cmd.status,
+ le64_to_cpu(aeqe->event.cmd.out_param));
+ break;
+ default:
+ break;
+ }
+
+ eq->event_type = event_type;
+ eq->sub_type = sub_type;
+ ++eq->cons_index;
+ aeqe_found = IRQ_HANDLED;
+ trace_hns_ae_info(event_type, aeqe, eq->eqe_size);
+
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AEQE_CNT]);
+
+ hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
+
+ aeqe = next_aeqe_sw_v2(eq);
+ ++num_aeqes;
+ }
+
+ update_eq_db(eq);
+
+ return IRQ_RETVAL(aeqe_found);
+}
+
+static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
+{
+ struct hns_roce_ceqe *ceqe;
+
+ ceqe = hns_roce_buf_offset(eq->mtr.kmem,
+ (eq->cons_index & (eq->entries - 1)) *
+ eq->eqe_size);
+
+ return (hr_reg_read(ceqe, CEQE_OWNER) ^
+ !!(eq->cons_index & eq->entries)) ? ceqe : NULL;
+}
+
+static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_eq *eq)
+{
+ queue_work(system_bh_wq, &eq->work);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
+{
+ struct hns_roce_eq *eq = eq_ptr;
+ struct hns_roce_dev *hr_dev = eq->hr_dev;
+ irqreturn_t int_work;
+
+ if (eq->type_flag == HNS_ROCE_CEQ)
+ /* Completion event interrupt */
+ int_work = hns_roce_v2_ceq_int(eq);
+ else
+ /* Asynchronous event interrupt */
+ int_work = hns_roce_v2_aeq_int(hr_dev, eq);
+
+ return IRQ_RETVAL(int_work);
+}
+
+static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev,
+ u32 int_st)
+{
+ struct pci_dev *pdev = hr_dev->pci_dev;
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
+ const struct hnae3_ae_ops *ops = ae_dev->ops;
+ enum hnae3_reset_type reset_type;
+ irqreturn_t int_work = IRQ_NONE;
+ u32 int_en;
+
+ int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG);
+
+ if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) {
+ dev_err(hr_dev->dev, "AEQ overflow!\n");
+
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG,
+ 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S);
+
+ reset_type = hr_dev->is_vf ?
+ HNAE3_VF_FUNC_RESET : HNAE3_FUNC_RESET;
+
+ /* Set reset level for reset_event() */
+ if (ops->set_default_reset_request)
+ ops->set_default_reset_request(ae_dev, reset_type);
+ if (ops->reset_event)
+ ops->reset_event(pdev, NULL);
+
+ int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S;
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en);
+
+ int_work = IRQ_HANDLED;
+ } else {
+ dev_err(hr_dev->dev, "there is no basic abn irq found.\n");
+ }
+
+ return IRQ_RETVAL(int_work);
+}
+
+static int fmea_ram_ecc_query(struct hns_roce_dev *hr_dev,
+ struct fmea_ram_ecc *ecc_info)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_QUERY_RAM_ECC, true);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+
+ ecc_info->is_ecc_err = hr_reg_read(req, QUERY_RAM_ECC_1BIT_ERR);
+ ecc_info->res_type = hr_reg_read(req, QUERY_RAM_ECC_RES_TYPE);
+ ecc_info->index = hr_reg_read(req, QUERY_RAM_ECC_TAG);
+
+ return 0;
+}
+
+static int fmea_recover_gmv(struct hns_roce_dev *hr_dev, u32 idx)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
+ u32 addr_upper;
+ u32 addr_low;
+ int ret;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, true);
+ hr_reg_write(req, CFG_GMV_BT_IDX, idx);
+
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to read gmv, ret = %d.\n", ret);
+ return ret;
+ }
+
+ addr_low = hr_reg_read(req, CFG_GMV_BT_BA_L);
+ addr_upper = hr_reg_read(req, CFG_GMV_BT_BA_H);
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GMV_BT, false);
+ hr_reg_write(req, CFG_GMV_BT_BA_L, addr_low);
+ hr_reg_write(req, CFG_GMV_BT_BA_H, addr_upper);
+ hr_reg_write(req, CFG_GMV_BT_IDX, idx);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
+static u64 fmea_get_ram_res_addr(u32 res_type, __le64 *data)
+{
+ if (res_type == ECC_RESOURCE_QPC_TIMER ||
+ res_type == ECC_RESOURCE_CQC_TIMER ||
+ res_type == ECC_RESOURCE_SCCC)
+ return le64_to_cpu(*data);
+
+ return le64_to_cpu(*data) << HNS_HW_PAGE_SHIFT;
+}
+
+static int fmea_recover_others(struct hns_roce_dev *hr_dev, u32 res_type,
+ u32 index)
+{
+ u8 write_bt0_op = fmea_ram_res[res_type].write_bt0_op;
+ u8 read_bt0_op = fmea_ram_res[res_type].read_bt0_op;
+ struct hns_roce_cmd_mailbox *mailbox;
+ u64 addr;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, read_bt0_op, index);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to read fmea ram, ret = %d.\n",
+ ret);
+ goto out;
+ }
+
+ addr = fmea_get_ram_res_addr(res_type, mailbox->buf);
+
+ ret = hns_roce_cmd_mbox(hr_dev, addr, 0, write_bt0_op, index);
+ if (ret)
+ dev_err(hr_dev->dev,
+ "failed to execute cmd to write fmea ram, ret = %d.\n",
+ ret);
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static void fmea_ram_ecc_recover(struct hns_roce_dev *hr_dev,
+ struct fmea_ram_ecc *ecc_info)
+{
+ u32 res_type = ecc_info->res_type;
+ u32 index = ecc_info->index;
+ int ret;
+
+ BUILD_BUG_ON(ARRAY_SIZE(fmea_ram_res) != ECC_RESOURCE_COUNT);
+
+ if (res_type >= ECC_RESOURCE_COUNT) {
+ dev_err(hr_dev->dev, "unsupported fmea ram ecc type %u.\n",
+ res_type);
+ return;
+ }
+
+ if (res_type == ECC_RESOURCE_GMV)
+ ret = fmea_recover_gmv(hr_dev, index);
+ else
+ ret = fmea_recover_others(hr_dev, res_type, index);
+ if (ret)
+ dev_err(hr_dev->dev,
+ "failed to recover %s, index = %u, ret = %d.\n",
+ fmea_ram_res[res_type].name, index, ret);
+}
+
+static void fmea_ram_ecc_work(struct work_struct *ecc_work)
+{
+ struct hns_roce_dev *hr_dev =
+ container_of(ecc_work, struct hns_roce_dev, ecc_work);
+ struct fmea_ram_ecc ecc_info = {};
+
+ if (fmea_ram_ecc_query(hr_dev, &ecc_info)) {
+ dev_err(hr_dev->dev, "failed to query fmea ram ecc.\n");
+ return;
+ }
+
+ if (!ecc_info.is_ecc_err) {
+ dev_err(hr_dev->dev, "there is no fmea ram ecc err found.\n");
+ return;
+ }
+
+ fmea_ram_ecc_recover(hr_dev, &ecc_info);
+}
+
+static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id)
+{
+ struct hns_roce_dev *hr_dev = dev_id;
+ irqreturn_t int_work = IRQ_NONE;
+ u32 int_st;
+
+ int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG);
+
+ if (int_st) {
+ int_work = abnormal_interrupt_basic(hr_dev, int_st);
+ } else if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ queue_work(hr_dev->irq_workq, &hr_dev->ecc_work);
+ int_work = IRQ_HANDLED;
+ } else {
+ dev_err(hr_dev->dev, "there is no abnormal irq found.\n");
+ }
+
+ return IRQ_RETVAL(int_work);
+}
+
+static void hns_roce_v2_int_mask_enable(struct hns_roce_dev *hr_dev,
+ int eq_num, u32 enable_flag)
+{
+ int i;
+
+ for (i = 0; i < eq_num; i++)
+ roce_write(hr_dev, ROCEE_VF_EVENT_INT_EN_REG +
+ i * EQ_REG_OFFSET, enable_flag);
+
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, enable_flag);
+ roce_write(hr_dev, ROCEE_VF_ABN_INT_CFG_REG, enable_flag);
+}
+
+static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
+{
+ hns_roce_mtr_destroy(hr_dev, &eq->mtr);
+}
+
+static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
+{
+ struct device *dev = hr_dev->dev;
+ int eqn = eq->eqn;
+ int ret;
+ u8 cmd;
+
+ if (eqn < hr_dev->caps.num_comp_vectors)
+ cmd = HNS_ROCE_CMD_DESTROY_CEQC;
+ else
+ cmd = HNS_ROCE_CMD_DESTROY_AEQC;
+
+ ret = hns_roce_destroy_hw_ctx(hr_dev, cmd, eqn & HNS_ROCE_V2_EQN_M);
+ if (ret)
+ dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
+
+ free_eq_buf(hr_dev, eq);
+}
+
+static void init_eq_config(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
+{
+ eq->db_reg = hr_dev->reg_base + ROCEE_VF_EQ_DB_CFG0_REG;
+ eq->cons_index = 0;
+ eq->over_ignore = HNS_ROCE_V2_EQ_OVER_IGNORE_0;
+ eq->coalesce = HNS_ROCE_V2_EQ_COALESCE_0;
+ eq->arm_st = HNS_ROCE_V2_EQ_ALWAYS_ARMED;
+ eq->shift = ilog2((unsigned int)eq->entries);
+}
+
+static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
+ void *mb_buf)
+{
+ u64 eqe_ba[MTT_MIN_COUNT] = { 0 };
+ struct hns_roce_eq_context *eqc;
+ u64 bt_ba = 0;
+ int ret;
+
+ eqc = mb_buf;
+ memset(eqc, 0, sizeof(struct hns_roce_eq_context));
+
+ init_eq_config(hr_dev, eq);
+
+ /* if not multi-hop, eqe buffer only use one trunk */
+ ret = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba,
+ ARRAY_SIZE(eqe_ba));
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to find EQE mtr, ret = %d\n", ret);
+ return ret;
+ }
+
+ bt_ba = hns_roce_get_mtr_ba(&eq->mtr);
+
+ hr_reg_write(eqc, EQC_EQ_ST, HNS_ROCE_V2_EQ_STATE_VALID);
+ hr_reg_write(eqc, EQC_EQE_HOP_NUM, eq->hop_num);
+ hr_reg_write(eqc, EQC_OVER_IGNORE, eq->over_ignore);
+ hr_reg_write(eqc, EQC_COALESCE, eq->coalesce);
+ hr_reg_write(eqc, EQC_ARM_ST, eq->arm_st);
+ hr_reg_write(eqc, EQC_EQN, eq->eqn);
+ hr_reg_write(eqc, EQC_EQE_CNT, HNS_ROCE_EQ_INIT_EQE_CNT);
+ hr_reg_write(eqc, EQC_EQE_BA_PG_SZ,
+ to_hr_hw_page_shift(eq->mtr.hem_cfg.ba_pg_shift));
+ hr_reg_write(eqc, EQC_EQE_BUF_PG_SZ,
+ to_hr_hw_page_shift(eq->mtr.hem_cfg.buf_pg_shift));
+ hr_reg_write(eqc, EQC_EQ_PROD_INDX, HNS_ROCE_EQ_INIT_PROD_IDX);
+ hr_reg_write(eqc, EQC_EQ_MAX_CNT, eq->eq_max_cnt);
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ if (eq->eq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) {
+ dev_info(hr_dev->dev, "eq_period(%u) reached the upper limit, adjusted to 65.\n",
+ eq->eq_period);
+ eq->eq_period = HNS_ROCE_MAX_EQ_PERIOD;
+ }
+ eq->eq_period *= HNS_ROCE_CLOCK_ADJUST;
+ }
+
+ hr_reg_write(eqc, EQC_EQ_PERIOD, eq->eq_period);
+ hr_reg_write(eqc, EQC_EQE_REPORT_TIMER, HNS_ROCE_EQ_INIT_REPORT_TIMER);
+ hr_reg_write(eqc, EQC_EQE_BA_L, bt_ba >> 3);
+ hr_reg_write(eqc, EQC_EQE_BA_H, bt_ba >> 35);
+ hr_reg_write(eqc, EQC_SHIFT, eq->shift);
+ hr_reg_write(eqc, EQC_MSI_INDX, HNS_ROCE_EQ_INIT_MSI_IDX);
+ hr_reg_write(eqc, EQC_CUR_EQE_BA_L, eqe_ba[0] >> 12);
+ hr_reg_write(eqc, EQC_CUR_EQE_BA_M, eqe_ba[0] >> 28);
+ hr_reg_write(eqc, EQC_CUR_EQE_BA_H, eqe_ba[0] >> 60);
+ hr_reg_write(eqc, EQC_EQ_CONS_INDX, HNS_ROCE_EQ_INIT_CONS_IDX);
+ hr_reg_write(eqc, EQC_NEX_EQE_BA_L, eqe_ba[1] >> 12);
+ hr_reg_write(eqc, EQC_NEX_EQE_BA_H, eqe_ba[1] >> 44);
+ hr_reg_write(eqc, EQC_EQE_SIZE, eq->eqe_size == HNS_ROCE_V3_EQE_SIZE);
+
+ return 0;
+}
+
+static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
+{
+ struct hns_roce_buf_attr buf_attr = {};
+ int err;
+
+ if (hr_dev->caps.eqe_hop_num == HNS_ROCE_HOP_NUM_0)
+ eq->hop_num = 0;
+ else
+ eq->hop_num = hr_dev->caps.eqe_hop_num;
+
+ buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT;
+ buf_attr.region[0].size = eq->entries * eq->eqe_size;
+ buf_attr.region[0].hopnum = eq->hop_num;
+ buf_attr.region_count = 1;
+
+ err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr,
+ hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT, NULL,
+ 0);
+ if (err)
+ dev_err(hr_dev->dev, "failed to alloc EQE mtr, err %d\n", err);
+
+ return err;
+}
+
+static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq, u8 eq_cmd)
+{
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ /* Allocate mailbox memory */
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = alloc_eq_buf(hr_dev, eq);
+ if (ret)
+ goto free_cmd_mbox;
+
+ ret = config_eqc(hr_dev, eq, mailbox->buf);
+ if (ret)
+ goto err_cmd_mbox;
+
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, eq_cmd, eq->eqn);
+ if (ret) {
+ dev_err(hr_dev->dev, "[mailbox cmd] create eqc failed.\n");
+ goto err_cmd_mbox;
+ }
+
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return 0;
+
+err_cmd_mbox:
+ free_eq_buf(hr_dev, eq);
+
+free_cmd_mbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+static void hns_roce_ceq_work(struct work_struct *work)
+{
+ struct hns_roce_eq *eq = from_work(eq, work, work);
+ struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
+ struct hns_roce_dev *hr_dev = eq->hr_dev;
+ int ceqe_num = 0;
+ u32 cqn;
+
+ while (ceqe && ceqe_num < hr_dev->caps.ceqe_depth) {
+ /* Make sure we read CEQ entry after we have checked the
+ * ownership bit
+ */
+ dma_rmb();
+
+ cqn = hr_reg_read(ceqe, CEQE_CQN);
+
+ hns_roce_cq_completion(hr_dev, cqn);
+
+ ++eq->cons_index;
+ ++ceqe_num;
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]);
+
+ ceqe = next_ceqe_sw_v2(eq);
+ }
+
+ update_eq_db(eq);
+}
+
+static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
+ int comp_num, int aeq_num, int other_num)
+{
+ struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+ int i, j;
+ int ret;
+
+ for (i = 0; i < irq_num; i++) {
+ hr_dev->irq_names[i] = kzalloc(HNS_ROCE_INT_NAME_LEN,
+ GFP_KERNEL);
+ if (!hr_dev->irq_names[i]) {
+ ret = -ENOMEM;
+ goto err_kzalloc_failed;
+ }
+ }
+
+ /* irq contains: abnormal + AEQ + CEQ */
+ for (j = 0; j < other_num; j++)
+ snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
+ "hns-%s-abn-%d", pci_name(hr_dev->pci_dev), j);
+
+ for (j = other_num; j < (other_num + aeq_num); j++)
+ snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
+ "hns-%s-aeq-%d", pci_name(hr_dev->pci_dev), j - other_num);
+
+ for (j = (other_num + aeq_num); j < irq_num; j++)
+ snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
+ "hns-%s-ceq-%d", pci_name(hr_dev->pci_dev),
+ j - other_num - aeq_num);
+
+ for (j = 0; j < irq_num; j++) {
+ if (j < other_num) {
+ ret = request_irq(hr_dev->irq[j],
+ hns_roce_v2_msix_interrupt_abn,
+ 0, hr_dev->irq_names[j], hr_dev);
+ } else if (j < (other_num + comp_num)) {
+ INIT_WORK(&eq_table->eq[j - other_num].work,
+ hns_roce_ceq_work);
+ ret = request_irq(eq_table->eq[j - other_num].irq,
+ hns_roce_v2_msix_interrupt_eq,
+ 0, hr_dev->irq_names[j + aeq_num],
+ &eq_table->eq[j - other_num]);
+ } else {
+ ret = request_irq(eq_table->eq[j - other_num].irq,
+ hns_roce_v2_msix_interrupt_eq,
+ 0, hr_dev->irq_names[j - comp_num],
+ &eq_table->eq[j - other_num]);
+ }
+
+ if (ret) {
+ dev_err(hr_dev->dev, "request irq error!\n");
+ goto err_request_failed;
+ }
+ }
+
+ return 0;
+
+err_request_failed:
+ for (j -= 1; j >= 0; j--) {
+ if (j < other_num) {
+ free_irq(hr_dev->irq[j], hr_dev);
+ continue;
+ }
+ free_irq(eq_table->eq[j - other_num].irq,
+ &eq_table->eq[j - other_num]);
+ if (j < other_num + comp_num)
+ cancel_work_sync(&eq_table->eq[j - other_num].work);
+ }
+
+err_kzalloc_failed:
+ for (i -= 1; i >= 0; i--)
+ kfree(hr_dev->irq_names[i]);
+
+ return ret;
+}
+
+static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev)
+{
+ int irq_num;
+ int eq_num;
+ int i;
+
+ eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
+ irq_num = eq_num + hr_dev->caps.num_other_vectors;
+
+ for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
+ free_irq(hr_dev->irq[i], hr_dev);
+
+ for (i = 0; i < eq_num; i++) {
+ free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]);
+ if (i < hr_dev->caps.num_comp_vectors)
+ cancel_work_sync(&hr_dev->eq_table.eq[i].work);
+ }
+
+ for (i = 0; i < irq_num; i++)
+ kfree(hr_dev->irq_names[i]);
+}
+
+static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_eq *eq;
+ int other_num;
+ int comp_num;
+ int aeq_num;
+ int irq_num;
+ int eq_num;
+ u8 eq_cmd;
+ int ret;
+ int i;
+
+ if (hr_dev->caps.aeqe_depth < HNS_AEQ_POLLING_BUDGET)
+ return -EINVAL;
+
+ other_num = hr_dev->caps.num_other_vectors;
+ comp_num = hr_dev->caps.num_comp_vectors;
+ aeq_num = hr_dev->caps.num_aeq_vectors;
+
+ eq_num = comp_num + aeq_num;
+ irq_num = eq_num + other_num;
+
+ eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL);
+ if (!eq_table->eq)
+ return -ENOMEM;
+
+ /* create eq */
+ for (i = 0; i < eq_num; i++) {
+ eq = &eq_table->eq[i];
+ eq->hr_dev = hr_dev;
+ eq->eqn = i;
+ if (i < comp_num) {
+ /* CEQ */
+ eq_cmd = HNS_ROCE_CMD_CREATE_CEQC;
+ eq->type_flag = HNS_ROCE_CEQ;
+ eq->entries = hr_dev->caps.ceqe_depth;
+ eq->eqe_size = hr_dev->caps.ceqe_size;
+ eq->irq = hr_dev->irq[i + other_num + aeq_num];
+ eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM;
+ eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL;
+ } else {
+ /* AEQ */
+ eq_cmd = HNS_ROCE_CMD_CREATE_AEQC;
+ eq->type_flag = HNS_ROCE_AEQ;
+ eq->entries = hr_dev->caps.aeqe_depth;
+ eq->eqe_size = hr_dev->caps.aeqe_size;
+ eq->irq = hr_dev->irq[i - comp_num + other_num];
+ eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM;
+ eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL;
+ }
+
+ ret = hns_roce_v2_create_eq(hr_dev, eq, eq_cmd);
+ if (ret) {
+ dev_err(dev, "failed to create eq.\n");
+ goto err_create_eq_fail;
+ }
+ }
+
+ INIT_WORK(&hr_dev->ecc_work, fmea_ram_ecc_work);
+
+ hr_dev->irq_workq = alloc_ordered_workqueue("hns_roce_irq_workq", 0);
+ if (!hr_dev->irq_workq) {
+ dev_err(dev, "failed to create irq workqueue.\n");
+ ret = -ENOMEM;
+ goto err_create_eq_fail;
+ }
+
+ ret = __hns_roce_request_irq(hr_dev, irq_num, comp_num, aeq_num,
+ other_num);
+ if (ret) {
+ dev_err(dev, "failed to request irq.\n");
+ goto err_request_irq_fail;
+ }
+
+ /* enable irq */
+ hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_ENABLE);
+
+ return 0;
+
+err_request_irq_fail:
+ destroy_workqueue(hr_dev->irq_workq);
+
+err_create_eq_fail:
+ for (i -= 1; i >= 0; i--)
+ hns_roce_v2_destroy_eqc(hr_dev, &eq_table->eq[i]);
+ kfree(eq_table->eq);
+
+ return ret;
+}
+
+static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_eq_table *eq_table = &hr_dev->eq_table;
+ int eq_num;
+ int i;
+
+ eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors;
+
+ /* Disable irq */
+ hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE);
+
+ __hns_roce_free_irq(hr_dev);
+ destroy_workqueue(hr_dev->irq_workq);
+
+ for (i = 0; i < eq_num; i++)
+ hns_roce_v2_destroy_eqc(hr_dev, &eq_table->eq[i]);
+
+ kfree(eq_table->eq);
+}
+
+static const struct ib_device_ops hns_roce_v2_dev_ops = {
+ .destroy_qp = hns_roce_v2_destroy_qp,
+ .modify_cq = hns_roce_v2_modify_cq,
+ .poll_cq = hns_roce_v2_poll_cq,
+ .post_recv = hns_roce_v2_post_recv,
+ .post_send = hns_roce_v2_post_send,
+ .query_qp = hns_roce_v2_query_qp,
+ .req_notify_cq = hns_roce_v2_req_notify_cq,
+};
+
+static const struct ib_device_ops hns_roce_v2_dev_srq_ops = {
+ .modify_srq = hns_roce_v2_modify_srq,
+ .post_srq_recv = hns_roce_v2_post_srq_recv,
+ .query_srq = hns_roce_v2_query_srq,
+};
+
+static const struct hns_roce_hw hns_roce_hw_v2 = {
+ .cmq_init = hns_roce_v2_cmq_init,
+ .cmq_exit = hns_roce_v2_cmq_exit,
+ .hw_profile = hns_roce_v2_profile,
+ .hw_init = hns_roce_v2_init,
+ .hw_exit = hns_roce_v2_exit,
+ .post_mbox = v2_post_mbox,
+ .poll_mbox_done = v2_poll_mbox_done,
+ .chk_mbox_avail = v2_chk_mbox_is_avail,
+ .set_gid = hns_roce_v2_set_gid,
+ .set_mac = hns_roce_v2_set_mac,
+ .write_mtpt = hns_roce_v2_write_mtpt,
+ .rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt,
+ .frmr_write_mtpt = hns_roce_v2_frmr_write_mtpt,
+ .write_cqc = hns_roce_v2_write_cqc,
+ .set_hem = hns_roce_v2_set_hem,
+ .clear_hem = hns_roce_v2_clear_hem,
+ .modify_qp = hns_roce_v2_modify_qp,
+ .dereg_mr = hns_roce_v2_dereg_mr,
+ .qp_flow_control_init = hns_roce_v2_qp_flow_control_init,
+ .init_eq = hns_roce_v2_init_eq_table,
+ .cleanup_eq = hns_roce_v2_cleanup_eq_table,
+ .write_srqc = hns_roce_v2_write_srqc,
+ .query_cqc = hns_roce_v2_query_cqc,
+ .query_qpc = hns_roce_v2_query_qpc,
+ .query_mpt = hns_roce_v2_query_mpt,
+ .query_srqc = hns_roce_v2_query_srqc,
+ .query_sccc = hns_roce_v2_query_sccc,
+ .query_hw_counter = hns_roce_hw_v2_query_counter,
+ .get_dscp = hns_roce_hw_v2_get_dscp,
+ .hns_roce_dev_ops = &hns_roce_v2_dev_ops,
+ .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops,
+};
+
+static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_200G_RDMA), 0},
+ {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_RDMA_DCB_PFC_VF),
+ HNAE3_DEV_SUPPORT_ROCE_DCB_BITS},
+ /* required last entry */
+ {0, }
+};
+
+MODULE_DEVICE_TABLE(pci, hns_roce_hw_v2_pci_tbl);
+
+static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev,
+ struct hnae3_handle *handle)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ const struct pci_device_id *id;
+ int i;
+
+ hr_dev->pci_dev = handle->pdev;
+ id = pci_match_id(hns_roce_hw_v2_pci_tbl, hr_dev->pci_dev);
+ hr_dev->is_vf = id->driver_data;
+ hr_dev->dev = &handle->pdev->dev;
+ hr_dev->hw = &hns_roce_hw_v2;
+ hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG;
+ hr_dev->odb_offset = hr_dev->sdb_offset;
+
+ /* Get info from NIC driver. */
+ hr_dev->reg_base = handle->rinfo.roce_io_base;
+ hr_dev->mem_base = handle->rinfo.roce_mem_base;
+ hr_dev->caps.num_ports = 1;
+ hr_dev->iboe.netdevs[0] = handle->rinfo.netdev;
+ hr_dev->iboe.phy_port[0] = 0;
+
+ addrconf_addr_eui48((u8 *)&hr_dev->ib_dev.node_guid,
+ hr_dev->iboe.netdevs[0]->dev_addr);
+
+ for (i = 0; i < handle->rinfo.num_vectors; i++)
+ hr_dev->irq[i] = pci_irq_vector(handle->pdev,
+ i + handle->rinfo.base_vector);
+
+ /* cmd issue mode: 0 is poll, 1 is event */
+ hr_dev->cmd_mod = 1;
+ hr_dev->loop_idc = 0;
+
+ hr_dev->reset_cnt = handle->ae_algo->ops->ae_dev_reset_cnt(handle);
+ priv->handle = handle;
+}
+
+static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
+{
+ struct hns_roce_dev *hr_dev;
+ int ret;
+
+ hr_dev = ib_alloc_device(hns_roce_dev, ib_dev);
+ if (!hr_dev)
+ return -ENOMEM;
+
+ hr_dev->priv = kzalloc(sizeof(struct hns_roce_v2_priv), GFP_KERNEL);
+ if (!hr_dev->priv) {
+ ret = -ENOMEM;
+ goto error_failed_kzalloc;
+ }
+
+ hns_roce_hw_v2_get_cfg(hr_dev, handle);
+
+ ret = hns_roce_init(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "RoCE Engine init failed!\n");
+ goto error_failed_roce_init;
+ }
+
+ handle->priv = hr_dev;
+
+ return 0;
+
+error_failed_roce_init:
+ kfree(hr_dev->priv);
+
+error_failed_kzalloc:
+ ib_dealloc_device(&hr_dev->ib_dev);
+
+ return ret;
+}
+
+static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
+ bool reset, bool bond_cleanup)
+{
+ struct hns_roce_dev *hr_dev = handle->priv;
+
+ if (!hr_dev)
+ return;
+
+ handle->priv = NULL;
+
+ hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT;
+ hns_roce_handle_device_err(hr_dev);
+
+ hns_roce_exit(hr_dev, bond_cleanup);
+ kfree(hr_dev->priv);
+ ib_dealloc_device(&hr_dev->ib_dev);
+}
+
+static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
+{
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ const struct pci_device_id *id;
+ struct device *dev = &handle->pdev->dev;
+ int ret;
+
+ handle->rinfo.instance_state = HNS_ROCE_STATE_INIT;
+
+ if (ops->ae_dev_resetting(handle) || ops->get_hw_reset_stat(handle)) {
+ handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
+ goto reset_chk_err;
+ }
+
+ id = pci_match_id(hns_roce_hw_v2_pci_tbl, handle->pdev);
+ if (!id)
+ return 0;
+
+ if (id->driver_data && handle->pdev->revision == PCI_REVISION_ID_HIP08)
+ return 0;
+
+ ret = __hns_roce_hw_v2_init_instance(handle);
+ if (ret) {
+ handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
+ dev_err(dev, "RoCE instance init failed! ret = %d\n", ret);
+ if (ops->ae_dev_resetting(handle) ||
+ ops->get_hw_reset_stat(handle))
+ goto reset_chk_err;
+ else
+ return ret;
+ }
+
+ handle->rinfo.instance_state = HNS_ROCE_STATE_INITED;
+
+ return 0;
+
+reset_chk_err:
+ dev_err(dev, "Device is busy in resetting state.\n"
+ "please retry later.\n");
+
+ return -EBUSY;
+}
+
+static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
+ bool reset)
+{
+ /* Suspend bond to avoid concurrency */
+ hns_roce_bond_suspend(handle);
+
+ if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED)
+ goto out;
+
+ handle->rinfo.instance_state = HNS_ROCE_STATE_UNINIT;
+
+ __hns_roce_hw_v2_uninit_instance(handle, reset, true);
+
+ handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
+
+out:
+ hns_roce_bond_resume(handle);
+}
+
+struct hns_roce_dev
+ *hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp,
+ int func_idx)
+{
+ struct hnae3_handle *handle;
+ int ret;
+
+ handle = bond_grp->bond_func_info[func_idx].handle;
+ if (!handle || !handle->client)
+ return NULL;
+
+ ret = hns_roce_hw_v2_init_instance(handle);
+ if (ret)
+ return NULL;
+
+ return handle->priv;
+}
+
+void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp,
+ int func_idx)
+{
+ struct hnae3_handle *handle = bond_grp->bond_func_info[func_idx].handle;
+
+ if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED)
+ return;
+
+ handle->rinfo.instance_state = HNS_ROCE_STATE_BOND_UNINIT;
+
+ __hns_roce_hw_v2_uninit_instance(handle, false, false);
+
+ handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
+}
+
+static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
+{
+ struct hns_roce_dev *hr_dev;
+
+ /* Suspend bond to avoid concurrency */
+ hns_roce_bond_suspend(handle);
+
+ if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) {
+ set_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state);
+ return 0;
+ }
+
+ handle->rinfo.reset_state = HNS_ROCE_STATE_RST_DOWN;
+ clear_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state);
+
+ hr_dev = handle->priv;
+ if (!hr_dev)
+ return 0;
+
+ hr_dev->active = false;
+ hr_dev->dis_db = true;
+
+ rdma_user_mmap_disassociate(&hr_dev->ib_dev);
+
+ hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
+
+ return 0;
+}
+
+static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle)
+{
+ struct device *dev = &handle->pdev->dev;
+ int ret;
+
+ if (test_and_clear_bit(HNS_ROCE_RST_DIRECT_RETURN,
+ &handle->rinfo.state)) {
+ handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED;
+ hns_roce_bond_resume(handle);
+ return 0;
+ }
+
+ handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INIT;
+
+ dev_info(&handle->pdev->dev, "In reset process RoCE client reinit.\n");
+ ret = __hns_roce_hw_v2_init_instance(handle);
+ if (ret) {
+ /* when reset notify type is HNAE3_INIT_CLIENT In reset notify
+ * callback function, RoCE Engine reinitialize. If RoCE reinit
+ * failed, we should inform NIC driver.
+ */
+ handle->priv = NULL;
+ dev_err(dev, "In reset process RoCE reinit failed %d.\n", ret);
+ } else {
+ handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED;
+ dev_info(dev, "reset done, RoCE client reinit finished.\n");
+ }
+
+ hns_roce_bond_resume(handle);
+ return ret;
+}
+
+static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle)
+{
+ if (test_bit(HNS_ROCE_RST_DIRECT_RETURN, &handle->rinfo.state))
+ return 0;
+
+ handle->rinfo.reset_state = HNS_ROCE_STATE_RST_UNINIT;
+ dev_info(&handle->pdev->dev, "In reset process RoCE client uninit.\n");
+ msleep(HNS_ROCE_V2_HW_RST_UNINT_DELAY);
+ __hns_roce_hw_v2_uninit_instance(handle, false, false);
+
+ return 0;
+}
+
+static int hns_roce_hw_v2_reset_notify(struct hnae3_handle *handle,
+ enum hnae3_reset_notify_type type)
+{
+ int ret = 0;
+
+ switch (type) {
+ case HNAE3_DOWN_CLIENT:
+ ret = hns_roce_hw_v2_reset_notify_down(handle);
+ break;
+ case HNAE3_INIT_CLIENT:
+ ret = hns_roce_hw_v2_reset_notify_init(handle);
+ break;
+ case HNAE3_UNINIT_CLIENT:
+ ret = hns_roce_hw_v2_reset_notify_uninit(handle);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void hns_roce_hw_v2_link_status_change(struct hnae3_handle *handle,
+ bool linkup)
+{
+ struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv;
+ struct net_device *netdev = handle->rinfo.netdev;
+
+ if (linkup || !hr_dev)
+ return;
+
+ /* For bond device, the link status depends on the upper netdev,
+ * and the upper device's link status depends on all the slaves'
+ * netdev but not only one. So bond device cannot get a correct
+ * link status from this path.
+ */
+ if (hns_roce_get_bond_grp(netdev, get_hr_bus_num(hr_dev)))
+ return;
+
+ ib_dispatch_port_state_event(&hr_dev->ib_dev, netdev);
+}
+
+static const struct hnae3_client_ops hns_roce_hw_v2_ops = {
+ .init_instance = hns_roce_hw_v2_init_instance,
+ .uninit_instance = hns_roce_hw_v2_uninit_instance,
+ .link_status_change = hns_roce_hw_v2_link_status_change,
+ .reset_notify = hns_roce_hw_v2_reset_notify,
+};
+
+static struct hnae3_client hns_roce_hw_v2_client = {
+ .name = "hns_roce_hw_v2",
+ .type = HNAE3_CLIENT_ROCE,
+ .ops = &hns_roce_hw_v2_ops,
+};
+
+static int __init hns_roce_hw_v2_init(void)
+{
+ hns_roce_init_debugfs();
+ return hnae3_register_client(&hns_roce_hw_v2_client);
+}
+
+static void __exit hns_roce_hw_v2_exit(void)
+{
+ hns_roce_dealloc_bond_grp();
+ hnae3_unregister_client(&hns_roce_hw_v2_client);
+ hns_roce_cleanup_debugfs();
+}
+
+module_init(hns_roce_hw_v2_init);
+module_exit(hns_roce_hw_v2_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Wei Hu <xavier.huwei@huawei.com>");
+MODULE_AUTHOR("Lijun Ou <oulijun@huawei.com>");
+MODULE_AUTHOR("Shaobo Xu <xushaobo2@huawei.com>");
+MODULE_DESCRIPTION("Hisilicon Hip08 Family RoCE Driver");
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
new file mode 100644
index 000000000000..285fe0875fac
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -0,0 +1,1501 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _HNS_ROCE_HW_V2_H
+#define _HNS_ROCE_HW_V2_H
+
+#include <linux/bitops.h>
+#include "hnae3.h"
+#include "hns_roce_bond.h"
+
+#define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32
+#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
+#define HNS_ROCE_V2_AEQE_VEC_NUM 1
+#define HNS_ROCE_V2_ABNORMAL_VEC_NUM 1
+#define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000
+#define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000
+#define HNS_ROCE_V2_MAX_XRCD_NUM 0x1000000
+
+#define HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08 10
+
+#define HNS_ROCE_V3_SCCC_SZ 64
+#define HNS_ROCE_V3_GMV_ENTRY_SZ 32
+
+#define HNS_ROCE_V2_EXT_LLM_ENTRY_SZ 8
+#define HNS_ROCE_V2_EXT_LLM_MAX_DEPTH 4096
+
+#define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE
+#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE
+#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFF000
+#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
+#define HNS_ROCE_INVALID_LKEY 0x0
+#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000
+#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
+#define HNS_ROCE_V2_RSV_QPS 8
+
+#define HNS_ROCE_V2_HW_RST_TIMEOUT 1000
+#define HNS_ROCE_V2_HW_RST_UNINT_DELAY 100
+
+#define HNS_ROCE_V2_HW_RST_COMPLETION_WAIT 20
+
+#define HNS_ROCE_CONTEXT_HOP_NUM 1
+#define HNS_ROCE_SCCC_HOP_NUM 1
+#define HNS_ROCE_MTT_HOP_NUM 1
+#define HNS_ROCE_CQE_HOP_NUM 1
+#define HNS_ROCE_SRQWQE_HOP_NUM 1
+#define HNS_ROCE_PBL_HOP_NUM 2
+#define HNS_ROCE_IDX_HOP_NUM 1
+#define HNS_ROCE_SQWQE_HOP_NUM 2
+#define HNS_ROCE_EXT_SGE_HOP_NUM 1
+#define HNS_ROCE_RQWQE_HOP_NUM 2
+
+#define HNS_ROCE_V2_EQE_HOP_NUM 2
+#define HNS_ROCE_V3_EQE_HOP_NUM 1
+
+#define HNS_ROCE_BA_PG_SZ_SUPPORTED_256K 6
+#define HNS_ROCE_BA_PG_SZ_SUPPORTED_16K 2
+#define HNS_ROCE_V2_GID_INDEX_NUM 16
+
+#define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18)
+
+/* budget must be smaller than aeqe_depth to guarantee that we update
+ * the ci before we polled all the entries in the EQ.
+ */
+#define HNS_AEQ_POLLING_BUDGET 64
+
+enum {
+ HNS_ROCE_CMD_FLAG_IN = BIT(0),
+ HNS_ROCE_CMD_FLAG_OUT = BIT(1),
+ HNS_ROCE_CMD_FLAG_NEXT = BIT(2),
+ HNS_ROCE_CMD_FLAG_WR = BIT(3),
+ HNS_ROCE_CMD_FLAG_ERR_INTR = BIT(5),
+};
+
+#define HNS_ROCE_CMQ_DESC_NUM_S 3
+
+#define HNS_ROCE_CMQ_SCC_CLR_DONE_CNT 5
+
+#define HNS_ROCE_CONG_SIZE 64
+
+#define check_whether_last_step(hop_num, step_idx) \
+ ((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \
+ (step_idx == 1 && hop_num == 1) || \
+ (step_idx == 2 && hop_num == 2))
+#define HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT 0
+#define HNS_ICL_SWITCH_CMD_ROCEE_SEL BIT(HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT)
+
+#define CMD_CSQ_DESC_NUM 1024
+#define CMD_CRQ_DESC_NUM 1024
+
+/* Free mr used parameters */
+#define HNS_ROCE_FREE_MR_USED_CQE_NUM 128
+#define HNS_ROCE_FREE_MR_USED_QP_NUM 0x8
+#define HNS_ROCE_FREE_MR_USED_PSN 0x0808
+#define HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT 0x7
+#define HNS_ROCE_FREE_MR_USED_QP_TIMEOUT 0x12
+#define HNS_ROCE_FREE_MR_USED_SQWQE_NUM 128
+#define HNS_ROCE_FREE_MR_USED_SQSGE_NUM 0x2
+#define HNS_ROCE_FREE_MR_USED_RQWQE_NUM 128
+#define HNS_ROCE_FREE_MR_USED_RQSGE_NUM 0x2
+#define HNS_ROCE_V2_FREE_MR_TIMEOUT 4500
+
+enum {
+ NO_ARMED = 0x0,
+ REG_NXT_CEQE = 0x2,
+ REG_NXT_SE_CEQE = 0x3
+};
+
+enum {
+ CQE_SIZE_32B = 0x0,
+ CQE_SIZE_64B = 0x1
+};
+
+#define V2_CQ_DB_REQ_NOT_SOL 0
+#define V2_CQ_DB_REQ_NOT 1
+
+#define V2_CQ_STATE_VALID 1
+#define V2_QKEY_VAL 0x80010000
+
+#define GID_LEN_V2 16
+
+enum {
+ HNS_ROCE_V2_WQE_OP_SEND = 0x0,
+ HNS_ROCE_V2_WQE_OP_SEND_WITH_INV = 0x1,
+ HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM = 0x2,
+ HNS_ROCE_V2_WQE_OP_RDMA_WRITE = 0x3,
+ HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM = 0x4,
+ HNS_ROCE_V2_WQE_OP_RDMA_READ = 0x5,
+ HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP = 0x6,
+ HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD = 0x7,
+ HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP = 0x8,
+ HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9,
+ HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa,
+ HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc,
+ HNS_ROCE_V2_WQE_OP_MASK = 0x1f,
+};
+
+enum {
+ /* rq operations */
+ HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM = 0x0,
+ HNS_ROCE_V2_OPCODE_SEND = 0x1,
+ HNS_ROCE_V2_OPCODE_SEND_WITH_IMM = 0x2,
+ HNS_ROCE_V2_OPCODE_SEND_WITH_INV = 0x3,
+};
+
+enum {
+ HNS_ROCE_V2_SQ_DB,
+ HNS_ROCE_V2_RQ_DB,
+ HNS_ROCE_V2_SRQ_DB,
+ HNS_ROCE_V2_CQ_DB,
+ HNS_ROCE_V2_CQ_DB_NOTIFY
+};
+
+enum {
+ HNS_ROCE_CQE_V2_SUCCESS = 0x00,
+ HNS_ROCE_CQE_V2_LOCAL_LENGTH_ERR = 0x01,
+ HNS_ROCE_CQE_V2_LOCAL_QP_OP_ERR = 0x02,
+ HNS_ROCE_CQE_V2_LOCAL_PROT_ERR = 0x04,
+ HNS_ROCE_CQE_V2_WR_FLUSH_ERR = 0x05,
+ HNS_ROCE_CQE_V2_MW_BIND_ERR = 0x06,
+ HNS_ROCE_CQE_V2_BAD_RESP_ERR = 0x10,
+ HNS_ROCE_CQE_V2_LOCAL_ACCESS_ERR = 0x11,
+ HNS_ROCE_CQE_V2_REMOTE_INVAL_REQ_ERR = 0x12,
+ HNS_ROCE_CQE_V2_REMOTE_ACCESS_ERR = 0x13,
+ HNS_ROCE_CQE_V2_REMOTE_OP_ERR = 0x14,
+ HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR = 0x15,
+ HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR = 0x16,
+ HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR = 0x22,
+ HNS_ROCE_CQE_V2_GENERAL_ERR = 0x23,
+
+ HNS_ROCE_V2_CQE_STATUS_MASK = 0xff,
+};
+
+/* CMQ command */
+enum hns_roce_opcode_type {
+ HNS_QUERY_FW_VER = 0x0001,
+ HNS_ROCE_OPC_QUERY_HW_VER = 0x8000,
+ HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001,
+ HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004,
+ HNS_ROCE_OPC_QUERY_COUNTER = 0x8206,
+ HNS_ROCE_OPC_QUERY_PF_RES = 0x8400,
+ HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401,
+ HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403,
+ HNS_ROCE_OPC_QUERY_PF_TIMER_RES = 0x8406,
+ HNS_ROCE_OPC_QUERY_FUNC_INFO = 0x8407,
+ HNS_ROCE_OPC_QUERY_PF_CAPS_NUM = 0x8408,
+ HNS_ROCE_OPC_CFG_ENTRY_SIZE = 0x8409,
+ HNS_ROCE_OPC_QUERY_VF_CAPS_NUM = 0x8410,
+ HNS_ROCE_OPC_CFG_SGID_TB = 0x8500,
+ HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501,
+ HNS_ROCE_OPC_POST_MB = 0x8504,
+ HNS_ROCE_OPC_QUERY_MB_ST = 0x8505,
+ HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506,
+ HNS_ROCE_OPC_FUNC_CLEAR = 0x8508,
+ HNS_ROCE_OPC_CLR_SCCC = 0x8509,
+ HNS_ROCE_OPC_QUERY_SCCC = 0x850a,
+ HNS_ROCE_OPC_RESET_SCCC = 0x850b,
+ HNS_ROCE_OPC_CLEAR_EXTDB_LIST_INFO = 0x850d,
+ HNS_ROCE_OPC_QUERY_VF_RES = 0x850e,
+ HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f,
+ HNS_ROCE_OPC_CFG_GMV_BT = 0x8510,
+ HNS_ROCE_QUERY_RAM_ECC = 0x8513,
+ HNS_SWITCH_PARAMETER_CFG = 0x1033,
+ HNS_ROCE_OPC_SET_BOND_INFO = 0x8601,
+ HNS_ROCE_OPC_CLEAR_BOND_INFO = 0x8602,
+ HNS_ROCE_OPC_CHANGE_ACTIVE_PORT = 0x8603,
+};
+
+#define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000
+#define HNS_ROCE_OPC_POST_MB_TRY_CNT 8
+#define HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC 5
+struct hns_roce_cmdq_tx_timeout_map {
+ u16 opcode;
+ u32 tx_timeout;
+};
+
+enum {
+ TYPE_CRQ,
+ TYPE_CSQ,
+};
+
+enum hns_roce_cmd_return_status {
+ CMD_EXEC_SUCCESS,
+ CMD_NO_AUTH,
+ CMD_NOT_EXIST,
+ CMD_CRQ_FULL,
+ CMD_NEXT_ERR,
+ CMD_NOT_EXEC,
+ CMD_PARA_ERR,
+ CMD_RESULT_ERR,
+ CMD_TIMEOUT,
+ CMD_HILINK_ERR,
+ CMD_INFO_ILLEGAL,
+ CMD_INVALID,
+ CMD_ROH_CHECK_FAIL,
+ CMD_OTHER_ERR = 0xff
+};
+
+struct hns_roce_cmd_errcode {
+ enum hns_roce_cmd_return_status return_status;
+ int errno;
+};
+
+enum hns_roce_sgid_type {
+ GID_TYPE_FLAG_ROCE_V1 = 0,
+ GID_TYPE_FLAG_ROCE_V2_IPV4,
+ GID_TYPE_FLAG_ROCE_V2_IPV6,
+};
+
+struct hns_roce_v2_cq_context {
+ __le32 byte_4_pg_ceqn;
+ __le32 byte_8_cqn;
+ __le32 cqe_cur_blk_addr;
+ __le32 byte_16_hop_addr;
+ __le32 cqe_nxt_blk_addr;
+ __le32 byte_24_pgsz_addr;
+ __le32 byte_28_cq_pi;
+ __le32 byte_32_cq_ci;
+ __le32 cqe_ba;
+ __le32 byte_40_cqe_ba;
+ __le32 byte_44_db_record;
+ __le32 db_record_addr;
+ __le32 byte_52_cqe_cnt;
+ __le32 byte_56_cqe_period_maxcnt;
+ __le32 cqe_report_timer;
+ __le32 byte_64_se_cqe_idx;
+};
+
+#define CQC_CQE_BA_L_S 3
+#define CQC_CQE_BA_H_S (32 + CQC_CQE_BA_L_S)
+#define CQC_CQE_DB_RECORD_ADDR_H_S 32
+
+#define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0
+#define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0
+
+#define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l)
+
+#define CQC_CQ_ST CQC_FIELD_LOC(1, 0)
+#define CQC_POLL CQC_FIELD_LOC(2, 2)
+#define CQC_SE CQC_FIELD_LOC(3, 3)
+#define CQC_OVER_IGNORE CQC_FIELD_LOC(4, 4)
+#define CQC_ARM_ST CQC_FIELD_LOC(7, 6)
+#define CQC_SHIFT CQC_FIELD_LOC(12, 8)
+#define CQC_CMD_SN CQC_FIELD_LOC(14, 13)
+#define CQC_CEQN CQC_FIELD_LOC(23, 15)
+#define CQC_CQN CQC_FIELD_LOC(55, 32)
+#define CQC_POE_EN CQC_FIELD_LOC(56, 56)
+#define CQC_POE_NUM CQC_FIELD_LOC(58, 57)
+#define CQC_CQE_SIZE CQC_FIELD_LOC(60, 59)
+#define CQC_CQ_CNT_MODE CQC_FIELD_LOC(61, 61)
+#define CQC_STASH CQC_FIELD_LOC(63, 63)
+#define CQC_CQE_CUR_BLK_ADDR_L CQC_FIELD_LOC(95, 64)
+#define CQC_CQE_CUR_BLK_ADDR_H CQC_FIELD_LOC(115, 96)
+#define CQC_POE_QID CQC_FIELD_LOC(125, 116)
+#define CQC_CQE_HOP_NUM CQC_FIELD_LOC(127, 126)
+#define CQC_CQE_NEX_BLK_ADDR_L CQC_FIELD_LOC(159, 128)
+#define CQC_CQE_NEX_BLK_ADDR_H CQC_FIELD_LOC(179, 160)
+#define CQC_CQE_BAR_PG_SZ CQC_FIELD_LOC(187, 184)
+#define CQC_CQE_BUF_PG_SZ CQC_FIELD_LOC(191, 188)
+#define CQC_CQ_PRODUCER_IDX CQC_FIELD_LOC(215, 192)
+#define CQC_CQ_CONSUMER_IDX CQC_FIELD_LOC(247, 224)
+#define CQC_CQE_BA_L CQC_FIELD_LOC(287, 256)
+#define CQC_CQE_BA_H CQC_FIELD_LOC(316, 288)
+#define CQC_POE_QID_H_0 CQC_FIELD_LOC(319, 317)
+#define CQC_DB_RECORD_EN CQC_FIELD_LOC(320, 320)
+#define CQC_CQE_DB_RECORD_ADDR_L CQC_FIELD_LOC(351, 321)
+#define CQC_CQE_DB_RECORD_ADDR_H CQC_FIELD_LOC(383, 352)
+#define CQC_CQE_CNT CQC_FIELD_LOC(407, 384)
+#define CQC_CQ_MAX_CNT CQC_FIELD_LOC(431, 416)
+#define CQC_CQ_PERIOD CQC_FIELD_LOC(447, 432)
+#define CQC_CQE_REPORT_TIMER CQC_FIELD_LOC(471, 448)
+#define CQC_WR_CQE_IDX CQC_FIELD_LOC(479, 472)
+#define CQC_SE_CQE_IDX CQC_FIELD_LOC(503, 480)
+#define CQC_POE_QID_H_1 CQC_FIELD_LOC(511, 511)
+
+struct hns_roce_srq_context {
+ __le32 data[16];
+};
+
+#define SRQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_srq_context, h, l)
+
+#define SRQC_SRQ_ST SRQC_FIELD_LOC(1, 0)
+#define SRQC_WQE_HOP_NUM SRQC_FIELD_LOC(3, 2)
+#define SRQC_SHIFT SRQC_FIELD_LOC(7, 4)
+#define SRQC_SRQN SRQC_FIELD_LOC(31, 8)
+#define SRQC_LIMIT_WL SRQC_FIELD_LOC(47, 32)
+#define SRQC_RSV0 SRQC_FIELD_LOC(63, 48)
+#define SRQC_XRCD SRQC_FIELD_LOC(87, 64)
+#define SRQC_RSV1 SRQC_FIELD_LOC(95, 88)
+#define SRQC_PRODUCER_IDX SRQC_FIELD_LOC(111, 96)
+#define SRQC_CONSUMER_IDX SRQC_FIELD_LOC(127, 112)
+#define SRQC_WQE_BT_BA_L SRQC_FIELD_LOC(159, 128)
+#define SRQC_WQE_BT_BA_H SRQC_FIELD_LOC(188, 160)
+#define SRQC_RSV2 SRQC_FIELD_LOC(190, 189)
+#define SRQC_SRQ_TYPE SRQC_FIELD_LOC(191, 191)
+#define SRQC_PD SRQC_FIELD_LOC(215, 192)
+#define SRQC_RQWS SRQC_FIELD_LOC(219, 216)
+#define SRQC_RSV3 SRQC_FIELD_LOC(223, 220)
+#define SRQC_IDX_BT_BA_L SRQC_FIELD_LOC(255, 224)
+#define SRQC_IDX_BT_BA_H SRQC_FIELD_LOC(284, 256)
+#define SRQC_RSV4 SRQC_FIELD_LOC(287, 285)
+#define SRQC_IDX_CUR_BLK_ADDR_L SRQC_FIELD_LOC(319, 288)
+#define SRQC_IDX_CUR_BLK_ADDR_H SRQC_FIELD_LOC(339, 320)
+#define SRQC_RSV5 SRQC_FIELD_LOC(341, 340)
+#define SRQC_IDX_HOP_NUM SRQC_FIELD_LOC(343, 342)
+#define SRQC_IDX_BA_PG_SZ SRQC_FIELD_LOC(347, 344)
+#define SRQC_IDX_BUF_PG_SZ SRQC_FIELD_LOC(351, 348)
+#define SRQC_IDX_NXT_BLK_ADDR_L SRQC_FIELD_LOC(383, 352)
+#define SRQC_IDX_NXT_BLK_ADDR_H SRQC_FIELD_LOC(403, 384)
+#define SRQC_RSV6 SRQC_FIELD_LOC(415, 404)
+#define SRQC_XRC_CQN SRQC_FIELD_LOC(439, 416)
+#define SRQC_WQE_BA_PG_SZ SRQC_FIELD_LOC(443, 440)
+#define SRQC_WQE_BUF_PG_SZ SRQC_FIELD_LOC(447, 444)
+#define SRQC_DB_RECORD_EN SRQC_FIELD_LOC(448, 448)
+#define SRQC_DB_RECORD_ADDR_L SRQC_FIELD_LOC(479, 449)
+#define SRQC_DB_RECORD_ADDR_H SRQC_FIELD_LOC(511, 480)
+
+enum {
+ V2_MPT_ST_VALID = 0x1,
+ V2_MPT_ST_FREE = 0x2,
+};
+
+enum hns_roce_v2_qp_state {
+ HNS_ROCE_QP_ST_RST,
+ HNS_ROCE_QP_ST_INIT,
+ HNS_ROCE_QP_ST_RTR,
+ HNS_ROCE_QP_ST_RTS,
+ HNS_ROCE_QP_ST_SQD,
+ HNS_ROCE_QP_ST_SQER,
+ HNS_ROCE_QP_ST_ERR,
+ HNS_ROCE_QP_ST_SQ_DRAINING,
+ HNS_ROCE_QP_NUM_ST
+};
+
+struct hns_roce_v2_qp_context_ex {
+ __le32 data[64];
+};
+
+struct hns_roce_v2_qp_context {
+ __le32 byte_4_sqpn_tst;
+ __le32 wqe_sge_ba;
+ __le32 byte_12_sq_hop;
+ __le32 byte_16_buf_ba_pg_sz;
+ __le32 byte_20_smac_sgid_idx;
+ __le32 byte_24_mtu_tc;
+ __le32 byte_28_at_fl;
+ u8 dgid[GID_LEN_V2];
+ __le32 dmac;
+ __le32 byte_52_udpspn_dmac;
+ __le32 byte_56_dqpn_err;
+ __le32 byte_60_qpst_tempid;
+ __le32 qkey_xrcd;
+ __le32 byte_68_rq_db;
+ __le32 rq_db_record_addr;
+ __le32 byte_76_srqn_op_en;
+ __le32 byte_80_rnr_rx_cqn;
+ __le32 byte_84_rq_ci_pi;
+ __le32 rq_cur_blk_addr;
+ __le32 byte_92_srq_info;
+ __le32 byte_96_rx_reqmsn;
+ __le32 rq_nxt_blk_addr;
+ __le32 byte_104_rq_sge;
+ __le32 byte_108_rx_reqepsn;
+ __le32 rq_rnr_timer;
+ __le32 rx_msg_len;
+ __le32 rx_rkey_pkt_info;
+ __le64 rx_va;
+ __le32 byte_132_trrl;
+ __le32 trrl_ba;
+ __le32 byte_140_raq;
+ __le32 byte_144_raq;
+ __le32 byte_148_raq;
+ __le32 byte_152_raq;
+ __le32 byte_156_raq;
+ __le32 byte_160_sq_ci_pi;
+ __le32 sq_cur_blk_addr;
+ __le32 byte_168_irrl_idx;
+ __le32 byte_172_sq_psn;
+ __le32 byte_176_msg_pktn;
+ __le32 sq_cur_sge_blk_addr;
+ __le32 byte_184_irrl_idx;
+ __le32 cur_sge_offset;
+ __le32 byte_192_ext_sge;
+ __le32 byte_196_sq_psn;
+ __le32 byte_200_sq_max;
+ __le32 irrl_ba;
+ __le32 byte_208_irrl;
+ __le32 byte_212_lsn;
+ __le32 sq_timer;
+ __le32 byte_220_retry_psn_msn;
+ __le32 byte_224_retry_msg;
+ __le32 rx_sq_cur_blk_addr;
+ __le32 byte_232_irrl_sge;
+ __le32 irrl_cur_sge_offset;
+ __le32 byte_240_irrl_tail;
+ __le32 byte_244_rnr_rxack;
+ __le32 byte_248_ack_psn;
+ __le32 byte_252_err_txcqn;
+ __le32 byte_256_sqflush_rqcqe;
+
+ struct hns_roce_v2_qp_context_ex ext;
+};
+
+#define QPC_TRRL_BA_L_S 4
+#define QPC_TRRL_BA_M_S (16 + QPC_TRRL_BA_L_S)
+#define QPC_TRRL_BA_H_S (32 + QPC_TRRL_BA_M_S)
+#define QPC_IRRL_BA_L_S 6
+#define QPC_IRRL_BA_H_S (32 + QPC_IRRL_BA_L_S)
+
+#define QPC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context, h, l)
+
+#define QPC_TST QPC_FIELD_LOC(2, 0)
+#define QPC_SGE_SHIFT QPC_FIELD_LOC(7, 3)
+#define QPC_CNP_TIMER QPC_FIELD_LOC(31, 8)
+#define QPC_WQE_SGE_BA_L QPC_FIELD_LOC(63, 32)
+#define QPC_WQE_SGE_BA_H QPC_FIELD_LOC(92, 64)
+#define QPC_SQ_HOP_NUM QPC_FIELD_LOC(94, 93)
+#define QPC_CIRE_EN QPC_FIELD_LOC(95, 95)
+#define QPC_WQE_SGE_BA_PG_SZ QPC_FIELD_LOC(99, 96)
+#define QPC_WQE_SGE_BUF_PG_SZ QPC_FIELD_LOC(103, 100)
+#define QPC_PD QPC_FIELD_LOC(127, 104)
+#define QPC_RQ_HOP_NUM QPC_FIELD_LOC(129, 128)
+#define QPC_SGE_HOP_NUM QPC_FIELD_LOC(131, 130)
+#define QPC_RQWS QPC_FIELD_LOC(135, 132)
+#define QPC_SQ_SHIFT QPC_FIELD_LOC(139, 136)
+#define QPC_RQ_SHIFT QPC_FIELD_LOC(143, 140)
+#define QPC_GMV_IDX QPC_FIELD_LOC(159, 144)
+#define QPC_HOPLIMIT QPC_FIELD_LOC(167, 160)
+#define QPC_TC QPC_FIELD_LOC(175, 168)
+#define QPC_VLAN_ID QPC_FIELD_LOC(187, 176)
+#define QPC_MTU QPC_FIELD_LOC(191, 188)
+#define QPC_FL QPC_FIELD_LOC(211, 192)
+#define QPC_SL QPC_FIELD_LOC(215, 212)
+#define QPC_CNP_TX_FLAG QPC_FIELD_LOC(216, 216)
+#define QPC_CE_FLAG QPC_FIELD_LOC(217, 217)
+#define QPC_LBI QPC_FIELD_LOC(218, 218)
+#define QPC_AT QPC_FIELD_LOC(223, 219)
+#define QPC_DGID QPC_FIELD_LOC(351, 224)
+#define QPC_DMAC_L QPC_FIELD_LOC(383, 352)
+#define QPC_DMAC_H QPC_FIELD_LOC(399, 384)
+#define QPC_UDPSPN QPC_FIELD_LOC(415, 400)
+#define QPC_DQPN QPC_FIELD_LOC(439, 416)
+#define QPC_SQ_TX_ERR QPC_FIELD_LOC(440, 440)
+#define QPC_SQ_RX_ERR QPC_FIELD_LOC(441, 441)
+#define QPC_RQ_TX_ERR QPC_FIELD_LOC(442, 442)
+#define QPC_RQ_RX_ERR QPC_FIELD_LOC(443, 443)
+#define QPC_LP_PKTN_INI QPC_FIELD_LOC(447, 444)
+#define QPC_CONG_ALGO_TMPL_ID QPC_FIELD_LOC(455, 448)
+#define QPC_SCC_TOKEN QPC_FIELD_LOC(474, 456)
+#define QPC_SQ_DB_DOING QPC_FIELD_LOC(475, 475)
+#define QPC_RQ_DB_DOING QPC_FIELD_LOC(476, 476)
+#define QPC_QP_ST QPC_FIELD_LOC(479, 477)
+#define QPC_QKEY_XRCD QPC_FIELD_LOC(511, 480)
+#define QPC_RQ_RECORD_EN QPC_FIELD_LOC(512, 512)
+#define QPC_RQ_DB_RECORD_ADDR_L QPC_FIELD_LOC(543, 513)
+#define QPC_RQ_DB_RECORD_ADDR_H QPC_FIELD_LOC(575, 544)
+#define QPC_SRQN QPC_FIELD_LOC(599, 576)
+#define QPC_SRQ_EN QPC_FIELD_LOC(600, 600)
+#define QPC_RRE QPC_FIELD_LOC(601, 601)
+#define QPC_RWE QPC_FIELD_LOC(602, 602)
+#define QPC_ATE QPC_FIELD_LOC(603, 603)
+#define QPC_RQIE QPC_FIELD_LOC(604, 604)
+#define QPC_EXT_ATE QPC_FIELD_LOC(605, 605)
+#define QPC_RQ_VLAN_EN QPC_FIELD_LOC(606, 606)
+#define QPC_RQ_RTY_TX_ERR QPC_FIELD_LOC(607, 607)
+#define QPC_RX_CQN QPC_FIELD_LOC(631, 608)
+#define QPC_XRC_QP_TYPE QPC_FIELD_LOC(632, 632)
+#define QPC_CQEIE QPC_FIELD_LOC(633, 633)
+#define QPC_CQEIS QPC_FIELD_LOC(634, 634)
+#define QPC_MIN_RNR_TIME QPC_FIELD_LOC(639, 635)
+#define QPC_RQ_PRODUCER_IDX QPC_FIELD_LOC(655, 640)
+#define QPC_RQ_CONSUMER_IDX QPC_FIELD_LOC(671, 656)
+#define QPC_RQ_CUR_BLK_ADDR_L QPC_FIELD_LOC(703, 672)
+#define QPC_RQ_CUR_BLK_ADDR_H QPC_FIELD_LOC(723, 704)
+#define QPC_SRQ_INFO QPC_FIELD_LOC(735, 724)
+#define QPC_RX_REQ_MSN QPC_FIELD_LOC(759, 736)
+#define QPC_REDUCE_CODE QPC_FIELD_LOC(766, 760)
+#define QPC_RX_XRC_PKT_CQE_FLG QPC_FIELD_LOC(767, 767)
+#define QPC_RQ_NXT_BLK_ADDR_L QPC_FIELD_LOC(799, 768)
+#define QPC_RQ_NXT_BLK_ADDR_H QPC_FIELD_LOC(819, 800)
+#define QPC_REDUCE_EN QPC_FIELD_LOC(820, 820)
+#define QPC_FLUSH_EN QPC_FIELD_LOC(821, 821)
+#define QPC_AW_EN QPC_FIELD_LOC(822, 822)
+#define QPC_WN_EN QPC_FIELD_LOC(823, 823)
+#define QPC_RQ_CUR_WQE_SGE_NUM QPC_FIELD_LOC(831, 824)
+#define QPC_INV_CREDIT QPC_FIELD_LOC(832, 832)
+#define QPC_LAST_WRITE_TYPE QPC_FIELD_LOC(834, 833)
+#define QPC_RX_REQ_PSN_ERR QPC_FIELD_LOC(835, 835)
+#define QPC_RX_REQ_LAST_OPTYPE QPC_FIELD_LOC(838, 836)
+#define QPC_RX_REQ_RNR QPC_FIELD_LOC(839, 839)
+#define QPC_RX_REQ_EPSN QPC_FIELD_LOC(863, 840)
+#define QPC_RQ_RNR_TIMER QPC_FIELD_LOC(895, 864)
+#define QPC_RX_MSG_LEN QPC_FIELD_LOC(927, 896)
+#define QPC_RX_RKEY_PKT_INFO QPC_FIELD_LOC(959, 928)
+#define QPC_RX_VA QPC_FIELD_LOC(1023, 960)
+#define QPC_TRRL_HEAD_MAX QPC_FIELD_LOC(1031, 1024)
+#define QPC_TRRL_TAIL_MAX QPC_FIELD_LOC(1039, 1032)
+#define QPC_TRRL_BA_L QPC_FIELD_LOC(1055, 1040)
+#define QPC_TRRL_BA_M QPC_FIELD_LOC(1087, 1056)
+#define QPC_TRRL_BA_H QPC_FIELD_LOC(1099, 1088)
+#define QPC_RR_MAX QPC_FIELD_LOC(1102, 1100)
+#define QPC_RQ_RTY_WAIT_DO QPC_FIELD_LOC(1103, 1103)
+#define QPC_RAQ_TRRL_HEAD QPC_FIELD_LOC(1111, 1104)
+#define QPC_RAQ_TRRL_TAIL QPC_FIELD_LOC(1119, 1112)
+#define QPC_RAQ_RTY_INI_PSN QPC_FIELD_LOC(1143, 1120)
+#define QPC_CIRE_SLV_RQ_EN QPC_FIELD_LOC(1144, 1144)
+#define QPC_RAQ_CREDIT QPC_FIELD_LOC(1149, 1145)
+#define QPC_RQ_DB_IN_EXT QPC_FIELD_LOC(1150, 1150)
+#define QPC_RESP_RTY_FLG QPC_FIELD_LOC(1151, 1151)
+#define QPC_RAQ_MSN QPC_FIELD_LOC(1175, 1152)
+#define QPC_RAQ_SYNDROME QPC_FIELD_LOC(1183, 1176)
+#define QPC_RAQ_PSN QPC_FIELD_LOC(1207, 1184)
+#define QPC_RAQ_TRRL_RTY_HEAD QPC_FIELD_LOC(1215, 1208)
+#define QPC_RAQ_USE_PKTN QPC_FIELD_LOC(1239, 1216)
+#define QPC_RQ_SCC_TOKEN QPC_FIELD_LOC(1245, 1240)
+#define QPC_RVD10 QPC_FIELD_LOC(1247, 1246)
+#define QPC_SQ_PRODUCER_IDX QPC_FIELD_LOC(1263, 1248)
+#define QPC_SQ_CONSUMER_IDX QPC_FIELD_LOC(1279, 1264)
+#define QPC_SQ_CUR_BLK_ADDR_L QPC_FIELD_LOC(1311, 1280)
+#define QPC_SQ_CUR_BLK_ADDR_H QPC_FIELD_LOC(1331, 1312)
+#define QPC_MSG_RTY_LP_FLG QPC_FIELD_LOC(1332, 1332)
+#define QPC_SQ_INVLD_FLG QPC_FIELD_LOC(1333, 1333)
+#define QPC_LP_SGEN_INI QPC_FIELD_LOC(1335, 1334)
+#define QPC_SQ_VLAN_EN QPC_FIELD_LOC(1336, 1336)
+#define QPC_POLL_DB_WAIT_DO QPC_FIELD_LOC(1337, 1337)
+#define QPC_SCC_TOKEN_FORBID_SQ_DEQ QPC_FIELD_LOC(1338, 1338)
+#define QPC_WAIT_ACK_TIMEOUT QPC_FIELD_LOC(1339, 1339)
+#define QPC_IRRL_IDX_LSB QPC_FIELD_LOC(1343, 1340)
+#define QPC_ACK_REQ_FREQ QPC_FIELD_LOC(1349, 1344)
+#define QPC_MSG_RNR_FLG QPC_FIELD_LOC(1350, 1350)
+#define QPC_FRE QPC_FIELD_LOC(1351, 1351)
+#define QPC_SQ_CUR_PSN QPC_FIELD_LOC(1375, 1352)
+#define QPC_MSG_USE_PKTN QPC_FIELD_LOC(1399, 1376)
+#define QPC_IRRL_HEAD_PRE QPC_FIELD_LOC(1407, 1400)
+#define QPC_SQ_CUR_SGE_BLK_ADDR_L QPC_FIELD_LOC(1439, 1408)
+#define QPC_SQ_CUR_SGE_BLK_ADDR_H QPC_FIELD_LOC(1459, 1440)
+#define QPC_IRRL_IDX_MSB QPC_FIELD_LOC(1471, 1460)
+#define QPC_CUR_SGE_OFFSET QPC_FIELD_LOC(1503, 1472)
+#define QPC_CUR_SGE_IDX QPC_FIELD_LOC(1527, 1504)
+#define QPC_EXT_SGE_NUM_LEFT QPC_FIELD_LOC(1535, 1528)
+#define QPC_OWNER_MODE QPC_FIELD_LOC(1536, 1536)
+#define QPC_CIRE_SLV_SQ_EN QPC_FIELD_LOC(1537, 1537)
+#define QPC_CIRE_DOING QPC_FIELD_LOC(1538, 1538)
+#define QPC_CIRE_RESULT QPC_FIELD_LOC(1539, 1539)
+#define QPC_OWNER_DB_WAIT_DO QPC_FIELD_LOC(1540, 1540)
+#define QPC_SQ_WQE_INVLD QPC_FIELD_LOC(1541, 1541)
+#define QPC_DCA_MODE QPC_FIELD_LOC(1542, 1542)
+#define QPC_RTY_OWNER_NOCHK QPC_FIELD_LOC(1543, 1543)
+#define QPC_V2_IRRL_HEAD QPC_FIELD_LOC(1543, 1536)
+#define QPC_SQ_MAX_PSN QPC_FIELD_LOC(1567, 1544)
+#define QPC_SQ_MAX_IDX QPC_FIELD_LOC(1583, 1568)
+#define QPC_LCL_OPERATED_CNT QPC_FIELD_LOC(1599, 1584)
+#define QPC_IRRL_BA_L QPC_FIELD_LOC(1631, 1600)
+#define QPC_IRRL_BA_H QPC_FIELD_LOC(1657, 1632)
+#define QPC_PKT_RNR_FLG QPC_FIELD_LOC(1658, 1658)
+#define QPC_PKT_RTY_FLG QPC_FIELD_LOC(1659, 1659)
+#define QPC_RMT_E2E QPC_FIELD_LOC(1660, 1660)
+#define QPC_SR_MAX QPC_FIELD_LOC(1663, 1661)
+#define QPC_LSN QPC_FIELD_LOC(1687, 1664)
+#define QPC_RETRY_NUM_INIT QPC_FIELD_LOC(1690, 1688)
+#define QPC_CHECK_FLG QPC_FIELD_LOC(1692, 1691)
+#define QPC_RETRY_CNT QPC_FIELD_LOC(1695, 1693)
+#define QPC_SQ_TIMER QPC_FIELD_LOC(1727, 1696)
+#define QPC_RETRY_MSG_MSN QPC_FIELD_LOC(1743, 1728)
+#define QPC_RETRY_MSG_PSN_L QPC_FIELD_LOC(1759, 1744)
+#define QPC_RETRY_MSG_PSN_H QPC_FIELD_LOC(1767, 1760)
+#define QPC_RETRY_MSG_FPKT_PSN QPC_FIELD_LOC(1791, 1768)
+#define QPC_RX_SQ_CUR_BLK_ADDR_L QPC_FIELD_LOC(1823, 1792)
+#define QPC_RX_SQ_CUR_BLK_ADDR_H QPC_FIELD_LOC(1843, 1824)
+#define QPC_IRRL_SGE_IDX QPC_FIELD_LOC(1851, 1844)
+#define QPC_LSAN_EN QPC_FIELD_LOC(1852, 1852)
+#define QPC_SO_LP_VLD QPC_FIELD_LOC(1853, 1853)
+#define QPC_FENCE_LP_VLD QPC_FIELD_LOC(1854, 1854)
+#define QPC_IRRL_LP_VLD QPC_FIELD_LOC(1855, 1855)
+#define QPC_IRRL_CUR_SGE_OFFSET QPC_FIELD_LOC(1887, 1856)
+#define QPC_IRRL_TAIL_REAL QPC_FIELD_LOC(1895, 1888)
+#define QPC_IRRL_TAIL_RD QPC_FIELD_LOC(1903, 1896)
+#define QPC_RX_ACK_MSN QPC_FIELD_LOC(1919, 1904)
+#define QPC_RX_ACK_EPSN QPC_FIELD_LOC(1943, 1920)
+#define QPC_RNR_NUM_INIT QPC_FIELD_LOC(1946, 1944)
+#define QPC_RNR_CNT QPC_FIELD_LOC(1949, 1947)
+#define QPC_LCL_OP_FLG QPC_FIELD_LOC(1950, 1950)
+#define QPC_IRRL_RD_FLG QPC_FIELD_LOC(1951, 1951)
+#define QPC_IRRL_PSN QPC_FIELD_LOC(1975, 1952)
+#define QPC_ACK_PSN_ERR QPC_FIELD_LOC(1976, 1976)
+#define QPC_ACK_LAST_OPTYPE QPC_FIELD_LOC(1978, 1977)
+#define QPC_IRRL_PSN_VLD QPC_FIELD_LOC(1979, 1979)
+#define QPC_RNR_RETRY_FLAG QPC_FIELD_LOC(1980, 1980)
+#define QPC_SQ_RTY_TX_ERR QPC_FIELD_LOC(1981, 1981)
+#define QPC_LAST_IND QPC_FIELD_LOC(1982, 1982)
+#define QPC_CQ_ERR_IND QPC_FIELD_LOC(1983, 1983)
+#define QPC_TX_CQN QPC_FIELD_LOC(2007, 1984)
+#define QPC_SIG_TYPE QPC_FIELD_LOC(2008, 2008)
+#define QPC_ERR_TYPE QPC_FIELD_LOC(2015, 2009)
+#define QPC_RQ_CQE_IDX QPC_FIELD_LOC(2031, 2016)
+#define QPC_SQ_FLUSH_IDX QPC_FIELD_LOC(2047, 2032)
+
+#define RETRY_MSG_PSN_SHIFT 16
+
+#define QPCEX_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context_ex, h, l)
+
+#define QPCEX_CONG_ALG_SEL QPCEX_FIELD_LOC(0, 0)
+#define QPCEX_CONG_ALG_SUB_SEL QPCEX_FIELD_LOC(1, 1)
+#define QPCEX_DIP_CTX_IDX_VLD QPCEX_FIELD_LOC(2, 2)
+#define QPCEX_DIP_CTX_IDX QPCEX_FIELD_LOC(22, 3)
+#define QPCEX_SQ_RQ_NOT_FORBID_EN QPCEX_FIELD_LOC(23, 23)
+#define QPCEX_STASH QPCEX_FIELD_LOC(82, 82)
+
+#define SCC_CONTEXT_SIZE 16
+
+struct hns_roce_v2_scc_context {
+ __le32 data[SCC_CONTEXT_SIZE];
+};
+
+#define V2_QP_RWE_S 1 /* rdma write enable */
+#define V2_QP_RRE_S 2 /* rdma read enable */
+#define V2_QP_ATE_S 3 /* rdma atomic enable */
+
+struct hns_roce_v2_cqe {
+ __le32 byte_4;
+ union {
+ __le32 rkey;
+ __le32 immtdata;
+ };
+ __le32 byte_12;
+ __le32 byte_16;
+ __le32 byte_cnt;
+ u8 smac[4];
+ __le32 byte_28;
+ __le32 byte_32;
+ __le32 rsv[8];
+};
+
+#define CQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cqe, h, l)
+
+#define CQE_OPCODE CQE_FIELD_LOC(4, 0)
+#define CQE_RQ_INLINE CQE_FIELD_LOC(5, 5)
+#define CQE_S_R CQE_FIELD_LOC(6, 6)
+#define CQE_OWNER CQE_FIELD_LOC(7, 7)
+#define CQE_STATUS CQE_FIELD_LOC(15, 8)
+#define CQE_WQE_IDX CQE_FIELD_LOC(31, 16)
+#define CQE_RKEY_IMMTDATA CQE_FIELD_LOC(63, 32)
+#define CQE_XRC_SRQN CQE_FIELD_LOC(87, 64)
+#define CQE_RSV0 CQE_FIELD_LOC(95, 88)
+#define CQE_LCL_QPN CQE_FIELD_LOC(119, 96)
+#define CQE_SUB_STATUS CQE_FIELD_LOC(127, 120)
+#define CQE_BYTE_CNT CQE_FIELD_LOC(159, 128)
+#define CQE_SMAC CQE_FIELD_LOC(207, 160)
+#define CQE_PORT_TYPE CQE_FIELD_LOC(209, 208)
+#define CQE_VID CQE_FIELD_LOC(221, 210)
+#define CQE_VID_VLD CQE_FIELD_LOC(222, 222)
+#define CQE_RSV2 CQE_FIELD_LOC(223, 223)
+#define CQE_RMT_QPN CQE_FIELD_LOC(247, 224)
+#define CQE_SL CQE_FIELD_LOC(250, 248)
+#define CQE_PORTN CQE_FIELD_LOC(253, 251)
+#define CQE_GRH CQE_FIELD_LOC(254, 254)
+#define CQE_LPK CQE_FIELD_LOC(255, 255)
+#define CQE_RSV3 CQE_FIELD_LOC(511, 256)
+
+struct hns_roce_v2_mpt_entry {
+ __le32 byte_4_pd_hop_st;
+ __le32 byte_8_mw_cnt_en;
+ __le32 byte_12_mw_pa;
+ __le32 bound_lkey;
+ __le32 len_l;
+ __le32 len_h;
+ __le32 lkey;
+ __le32 va_l;
+ __le32 va_h;
+ __le32 pbl_size;
+ __le32 pbl_ba_l;
+ __le32 byte_48_mode_ba;
+ __le32 pa0_l;
+ __le32 byte_56_pa0_h;
+ __le32 pa1_l;
+ __le32 byte_64_buf_pa1;
+};
+
+#define MPT_PBL_BUF_ADDR_S 6
+#define MPT_PBL_BA_ADDR_S 3
+
+#define MPT_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_mpt_entry, h, l)
+
+#define MPT_ST MPT_FIELD_LOC(1, 0)
+#define MPT_PBL_HOP_NUM MPT_FIELD_LOC(3, 2)
+#define MPT_PBL_BA_PG_SZ MPT_FIELD_LOC(7, 4)
+#define MPT_PD MPT_FIELD_LOC(31, 8)
+#define MPT_RA_EN MPT_FIELD_LOC(32, 32)
+#define MPT_R_INV_EN MPT_FIELD_LOC(33, 33)
+#define MPT_L_INV_EN MPT_FIELD_LOC(34, 34)
+#define MPT_BIND_EN MPT_FIELD_LOC(35, 35)
+#define MPT_ATOMIC_EN MPT_FIELD_LOC(36, 36)
+#define MPT_RR_EN MPT_FIELD_LOC(37, 37)
+#define MPT_RW_EN MPT_FIELD_LOC(38, 38)
+#define MPT_LW_EN MPT_FIELD_LOC(39, 39)
+#define MPT_MW_CNT MPT_FIELD_LOC(63, 40)
+#define MPT_FRE MPT_FIELD_LOC(64, 64)
+#define MPT_PA MPT_FIELD_LOC(65, 65)
+#define MPT_ZBVA MPT_FIELD_LOC(66, 66)
+#define MPT_SHARE MPT_FIELD_LOC(67, 67)
+#define MPT_MR_MW MPT_FIELD_LOC(68, 68)
+#define MPT_BPD MPT_FIELD_LOC(69, 69)
+#define MPT_BQP MPT_FIELD_LOC(70, 70)
+#define MPT_INNER_PA_VLD MPT_FIELD_LOC(71, 71)
+#define MPT_MW_BIND_QPN MPT_FIELD_LOC(95, 72)
+#define MPT_BOUND_LKEY MPT_FIELD_LOC(127, 96)
+#define MPT_LEN_L MPT_FIELD_LOC(159, 128)
+#define MPT_LEN_H MPT_FIELD_LOC(191, 160)
+#define MPT_LKEY MPT_FIELD_LOC(223, 192)
+#define MPT_VA MPT_FIELD_LOC(287, 224)
+#define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288)
+#define MPT_PBL_BA_L MPT_FIELD_LOC(351, 320)
+#define MPT_PBL_BA_H MPT_FIELD_LOC(380, 352)
+#define MPT_BLK_MODE MPT_FIELD_LOC(381, 381)
+#define MPT_RSV0 MPT_FIELD_LOC(383, 382)
+#define MPT_PA0_L MPT_FIELD_LOC(415, 384)
+#define MPT_PA0_H MPT_FIELD_LOC(441, 416)
+#define MPT_BOUND_VA MPT_FIELD_LOC(447, 442)
+#define MPT_PA1_L MPT_FIELD_LOC(479, 448)
+#define MPT_PA1_H MPT_FIELD_LOC(505, 480)
+#define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506)
+#define MPT_RSV2 MPT_FIELD_LOC(507, 507)
+#define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508)
+
+#define V2_MPT_BYTE_4_MPT_ST_S 0
+#define V2_MPT_BYTE_4_MPT_ST_M GENMASK(1, 0)
+
+#define V2_MPT_BYTE_4_PBL_HOP_NUM_S 2
+#define V2_MPT_BYTE_4_PBL_HOP_NUM_M GENMASK(3, 2)
+
+#define V2_MPT_BYTE_4_PBL_BA_PG_SZ_S 4
+#define V2_MPT_BYTE_4_PBL_BA_PG_SZ_M GENMASK(7, 4)
+
+#define V2_MPT_BYTE_4_PD_S 8
+#define V2_MPT_BYTE_4_PD_M GENMASK(31, 8)
+
+#define V2_MPT_BYTE_8_RA_EN_S 0
+
+#define V2_MPT_BYTE_8_R_INV_EN_S 1
+
+#define V2_MPT_BYTE_8_L_INV_EN_S 2
+
+#define V2_MPT_BYTE_8_BIND_EN_S 3
+
+#define V2_MPT_BYTE_8_ATOMIC_EN_S 4
+
+#define V2_MPT_BYTE_8_RR_EN_S 5
+
+#define V2_MPT_BYTE_8_RW_EN_S 6
+
+#define V2_MPT_BYTE_8_LW_EN_S 7
+
+#define V2_MPT_BYTE_12_FRE_S 0
+
+#define V2_MPT_BYTE_12_PA_S 1
+
+#define V2_MPT_BYTE_12_BPD_S 5
+
+#define V2_MPT_BYTE_12_BQP_S 6
+
+#define V2_MPT_BYTE_12_INNER_PA_VLD_S 7
+
+#define V2_MPT_BYTE_48_PBL_BA_H_S 0
+#define V2_MPT_BYTE_48_PBL_BA_H_M GENMASK(28, 0)
+
+#define V2_MPT_BYTE_48_BLK_MODE_S 29
+
+#define V2_MPT_BYTE_56_PA0_H_S 0
+#define V2_MPT_BYTE_56_PA0_H_M GENMASK(25, 0)
+
+#define V2_MPT_BYTE_64_PA1_H_S 0
+#define V2_MPT_BYTE_64_PA1_H_M GENMASK(25, 0)
+
+#define V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S 28
+#define V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M GENMASK(31, 28)
+
+struct hns_roce_v2_db {
+ __le32 data[2];
+};
+
+#define DB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_db, h, l)
+
+#define DB_TAG DB_FIELD_LOC(23, 0)
+#define DB_CMD DB_FIELD_LOC(27, 24)
+#define DB_FLAG DB_FIELD_LOC(31, 31)
+#define DB_PI DB_FIELD_LOC(47, 32)
+#define DB_SL DB_FIELD_LOC(50, 48)
+#define DB_CQ_CI DB_FIELD_LOC(55, 32)
+#define DB_CQ_NOTIFY DB_FIELD_LOC(56, 56)
+#define DB_CQ_CMD_SN DB_FIELD_LOC(58, 57)
+#define EQ_DB_TAG DB_FIELD_LOC(7, 0)
+#define EQ_DB_CMD DB_FIELD_LOC(17, 16)
+#define EQ_DB_CI DB_FIELD_LOC(55, 32)
+
+#define V2_DB_PRODUCER_IDX_S 0
+#define V2_DB_PRODUCER_IDX_M GENMASK(15, 0)
+
+#define V2_CQ_DB_CONS_IDX_S 0
+#define V2_CQ_DB_CONS_IDX_M GENMASK(23, 0)
+
+struct hns_roce_v2_ud_send_wqe {
+ __le32 byte_4;
+ __le32 msg_len;
+ __le32 immtdata;
+ __le32 byte_16;
+ __le32 byte_20;
+ __le32 byte_24;
+ __le32 qkey;
+ __le32 byte_32;
+ __le32 byte_36;
+ __le32 byte_40;
+ u8 dmac[ETH_ALEN];
+ u8 sgid_index;
+ u8 smac_index;
+ u8 dgid[GID_LEN_V2];
+};
+
+#define UD_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_ud_send_wqe, h, l)
+
+#define UD_SEND_WQE_OPCODE UD_SEND_WQE_FIELD_LOC(4, 0)
+#define UD_SEND_WQE_OWNER UD_SEND_WQE_FIELD_LOC(7, 7)
+#define UD_SEND_WQE_CQE UD_SEND_WQE_FIELD_LOC(8, 8)
+#define UD_SEND_WQE_SE UD_SEND_WQE_FIELD_LOC(11, 11)
+#define UD_SEND_WQE_PD UD_SEND_WQE_FIELD_LOC(119, 96)
+#define UD_SEND_WQE_SGE_NUM UD_SEND_WQE_FIELD_LOC(127, 120)
+#define UD_SEND_WQE_MSG_START_SGE_IDX UD_SEND_WQE_FIELD_LOC(151, 128)
+#define UD_SEND_WQE_UDPSPN UD_SEND_WQE_FIELD_LOC(191, 176)
+#define UD_SEND_WQE_DQPN UD_SEND_WQE_FIELD_LOC(247, 224)
+#define UD_SEND_WQE_VLAN UD_SEND_WQE_FIELD_LOC(271, 256)
+#define UD_SEND_WQE_HOPLIMIT UD_SEND_WQE_FIELD_LOC(279, 272)
+#define UD_SEND_WQE_TCLASS UD_SEND_WQE_FIELD_LOC(287, 280)
+#define UD_SEND_WQE_FLOW_LABEL UD_SEND_WQE_FIELD_LOC(307, 288)
+#define UD_SEND_WQE_SL UD_SEND_WQE_FIELD_LOC(311, 308)
+#define UD_SEND_WQE_VLAN_EN UD_SEND_WQE_FIELD_LOC(318, 318)
+#define UD_SEND_WQE_LBI UD_SEND_WQE_FIELD_LOC(319, 319)
+
+struct hns_roce_v2_rc_send_wqe {
+ __le32 byte_4;
+ __le32 msg_len;
+ union {
+ __le32 inv_key;
+ __le32 immtdata;
+ };
+ __le32 byte_16;
+ __le32 byte_20;
+ __le32 rkey;
+ __le64 va;
+};
+
+#define RC_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_rc_send_wqe, h, l)
+
+#define RC_SEND_WQE_OPCODE RC_SEND_WQE_FIELD_LOC(4, 0)
+#define RC_SEND_WQE_DB_SL_L RC_SEND_WQE_FIELD_LOC(6, 5)
+#define RC_SEND_WQE_DB_SL_H RC_SEND_WQE_FIELD_LOC(14, 13)
+#define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7)
+#define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8)
+#define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9)
+#define RC_SEND_WQE_SO RC_SEND_WQE_FIELD_LOC(10, 10)
+#define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11)
+#define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12)
+#define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15)
+#define RC_SEND_WQE_FLAG RC_SEND_WQE_FIELD_LOC(31, 31)
+#define RC_SEND_WQE_XRC_SRQN RC_SEND_WQE_FIELD_LOC(119, 96)
+#define RC_SEND_WQE_SGE_NUM RC_SEND_WQE_FIELD_LOC(127, 120)
+#define RC_SEND_WQE_MSG_START_SGE_IDX RC_SEND_WQE_FIELD_LOC(151, 128)
+#define RC_SEND_WQE_INL_TYPE RC_SEND_WQE_FIELD_LOC(159, 159)
+
+struct hns_roce_wqe_frmr_seg {
+ __le32 pbl_size;
+ __le32 byte_40;
+};
+
+#define FRMR_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_wqe_frmr_seg, h, l)
+
+#define FRMR_PBL_SIZE FRMR_WQE_FIELD_LOC(31, 0)
+#define FRMR_BLOCK_SIZE FRMR_WQE_FIELD_LOC(35, 32)
+#define FRMR_PBL_BUF_PG_SZ FRMR_WQE_FIELD_LOC(39, 36)
+#define FRMR_BLK_MODE FRMR_WQE_FIELD_LOC(40, 40)
+#define FRMR_ZBVA FRMR_WQE_FIELD_LOC(41, 41)
+#define FRMR_BIND_EN FRMR_WQE_FIELD_LOC(42, 42)
+#define FRMR_ATOMIC FRMR_WQE_FIELD_LOC(43, 43)
+#define FRMR_RR FRMR_WQE_FIELD_LOC(44, 44)
+#define FRMR_RW FRMR_WQE_FIELD_LOC(45, 45)
+#define FRMR_LW FRMR_WQE_FIELD_LOC(46, 46)
+
+struct hns_roce_v2_wqe_data_seg {
+ __le32 len;
+ __le32 lkey;
+ __le64 addr;
+};
+
+struct hns_roce_query_version {
+ __le16 rocee_vendor_id;
+ __le16 rocee_hw_version;
+ __le32 rsv[5];
+};
+
+struct hns_roce_query_fw_info {
+ __le32 fw_ver;
+ __le32 rsv[5];
+};
+
+struct hns_roce_func_clear {
+ __le32 rst_funcid_en;
+ __le32 func_done;
+ __le32 rsv[4];
+};
+
+#define FUNC_CLEAR_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_func_clear, h, l)
+
+#define FUNC_CLEAR_RST_FUN_DONE FUNC_CLEAR_FIELD_LOC(32, 32)
+
+/* Each physical function manages up to 248 virtual functions, it takes up to
+ * 100ms for each function to execute clear. If an abnormal reset occurs, it is
+ * executed twice at most, so it takes up to 249 * 2 * 100ms.
+ */
+#define HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS (249 * 2 * 100)
+#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL 40
+#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT 20
+
+#define CFG_LLM_A_BA_L CMQ_REQ_FIELD_LOC(31, 0)
+#define CFG_LLM_A_BA_H CMQ_REQ_FIELD_LOC(63, 32)
+#define CFG_LLM_A_DEPTH CMQ_REQ_FIELD_LOC(76, 64)
+#define CFG_LLM_A_PGSZ CMQ_REQ_FIELD_LOC(83, 80)
+#define CFG_LLM_A_INIT_EN CMQ_REQ_FIELD_LOC(84, 84)
+#define CFG_LLM_A_HEAD_BA_L CMQ_REQ_FIELD_LOC(127, 96)
+#define CFG_LLM_A_HEAD_BA_H CMQ_REQ_FIELD_LOC(147, 128)
+#define CFG_LLM_A_HEAD_NXTPTR CMQ_REQ_FIELD_LOC(159, 148)
+#define CFG_LLM_A_HEAD_PTR CMQ_REQ_FIELD_LOC(171, 160)
+#define CFG_LLM_B_TAIL_BA_L CMQ_REQ_FIELD_LOC(31, 0)
+#define CFG_LLM_B_TAIL_BA_H CMQ_REQ_FIELD_LOC(63, 32)
+#define CFG_LLM_B_TAIL_PTR CMQ_REQ_FIELD_LOC(75, 64)
+
+/* Fields of HNS_ROCE_OPC_CFG_GLOBAL_PARAM */
+#define CFG_GLOBAL_PARAM_1US_CYCLES CMQ_REQ_FIELD_LOC(9, 0)
+#define CFG_GLOBAL_PARAM_UDP_PORT CMQ_REQ_FIELD_LOC(31, 16)
+
+/*
+ * Fields of HNS_ROCE_OPC_QUERY_PF_RES, HNS_ROCE_OPC_QUERY_VF_RES
+ * and HNS_ROCE_OPC_ALLOC_VF_RES
+ */
+#define FUNC_RES_A_VF_ID CMQ_REQ_FIELD_LOC(7, 0)
+#define FUNC_RES_A_QPC_BT_IDX CMQ_REQ_FIELD_LOC(42, 32)
+#define FUNC_RES_A_QPC_BT_NUM CMQ_REQ_FIELD_LOC(59, 48)
+#define FUNC_RES_A_SRQC_BT_IDX CMQ_REQ_FIELD_LOC(72, 64)
+#define FUNC_RES_A_SRQC_BT_NUM CMQ_REQ_FIELD_LOC(89, 80)
+#define FUNC_RES_A_CQC_BT_IDX CMQ_REQ_FIELD_LOC(104, 96)
+#define FUNC_RES_A_CQC_BT_NUM CMQ_REQ_FIELD_LOC(121, 112)
+#define FUNC_RES_A_MPT_BT_IDX CMQ_REQ_FIELD_LOC(136, 128)
+#define FUNC_RES_A_MPT_BT_NUM CMQ_REQ_FIELD_LOC(153, 144)
+#define FUNC_RES_A_EQC_BT_IDX CMQ_REQ_FIELD_LOC(168, 160)
+#define FUNC_RES_A_EQC_BT_NUM CMQ_REQ_FIELD_LOC(185, 176)
+#define FUNC_RES_B_SMAC_IDX CMQ_REQ_FIELD_LOC(39, 32)
+#define FUNC_RES_B_SMAC_NUM CMQ_REQ_FIELD_LOC(48, 40)
+#define FUNC_RES_B_SGID_IDX CMQ_REQ_FIELD_LOC(71, 64)
+#define FUNC_RES_B_SGID_NUM CMQ_REQ_FIELD_LOC(80, 72)
+#define FUNC_RES_B_QID_IDX CMQ_REQ_FIELD_LOC(105, 96)
+#define FUNC_RES_B_QID_NUM CMQ_REQ_FIELD_LOC(122, 112)
+#define FUNC_RES_V_QID_NUM CMQ_REQ_FIELD_LOC(115, 112)
+
+#define FUNC_RES_B_SCCC_BT_IDX CMQ_REQ_FIELD_LOC(136, 128)
+#define FUNC_RES_B_SCCC_BT_NUM CMQ_REQ_FIELD_LOC(145, 137)
+#define FUNC_RES_B_GMV_BT_IDX CMQ_REQ_FIELD_LOC(167, 160)
+#define FUNC_RES_B_GMV_BT_NUM CMQ_REQ_FIELD_LOC(176, 168)
+#define FUNC_RES_V_GMV_BT_NUM CMQ_REQ_FIELD_LOC(184, 176)
+
+/* Fields of HNS_ROCE_OPC_QUERY_PF_TIMER_RES */
+#define PF_TIMER_RES_QPC_ITEM_IDX CMQ_REQ_FIELD_LOC(43, 32)
+#define PF_TIMER_RES_QPC_ITEM_NUM CMQ_REQ_FIELD_LOC(60, 48)
+#define PF_TIMER_RES_CQC_ITEM_IDX CMQ_REQ_FIELD_LOC(74, 64)
+#define PF_TIMER_RES_CQC_ITEM_NUM CMQ_REQ_FIELD_LOC(91, 80)
+
+struct hns_roce_vf_switch {
+ __le32 rocee_sel;
+ __le32 fun_id;
+ __le32 cfg;
+ __le32 resv1;
+ __le32 resv2;
+ __le32 resv3;
+};
+
+#define VF_SWITCH_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_vf_switch, h, l)
+
+#define VF_SWITCH_VF_ID VF_SWITCH_FIELD_LOC(42, 35)
+#define VF_SWITCH_ALW_LPBK VF_SWITCH_FIELD_LOC(65, 65)
+#define VF_SWITCH_ALW_LCL_LPBK VF_SWITCH_FIELD_LOC(66, 66)
+#define VF_SWITCH_ALW_DST_OVRD VF_SWITCH_FIELD_LOC(67, 67)
+
+struct hns_roce_post_mbox {
+ __le32 in_param_l;
+ __le32 in_param_h;
+ __le32 out_param_l;
+ __le32 out_param_h;
+ __le32 cmd_tag;
+ __le32 token_event_en;
+};
+
+struct hns_roce_mbox_status {
+ __le32 mb_status_hw_run;
+ __le32 rsv[5];
+};
+
+#define HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS 10000
+
+#define MB_ST_HW_RUN_M BIT(31)
+#define MB_ST_COMPLETE_M GENMASK(7, 0)
+
+#define MB_ST_COMPLETE_SUCC 1
+
+/* Fields of HNS_ROCE_OPC_CFG_BT_ATTR */
+#define CFG_BT_ATTR_QPC_BA_PGSZ CMQ_REQ_FIELD_LOC(3, 0)
+#define CFG_BT_ATTR_QPC_BUF_PGSZ CMQ_REQ_FIELD_LOC(7, 4)
+#define CFG_BT_ATTR_QPC_HOPNUM CMQ_REQ_FIELD_LOC(9, 8)
+#define CFG_BT_ATTR_SRQC_BA_PGSZ CMQ_REQ_FIELD_LOC(35, 32)
+#define CFG_BT_ATTR_SRQC_BUF_PGSZ CMQ_REQ_FIELD_LOC(39, 36)
+#define CFG_BT_ATTR_SRQC_HOPNUM CMQ_REQ_FIELD_LOC(41, 40)
+#define CFG_BT_ATTR_CQC_BA_PGSZ CMQ_REQ_FIELD_LOC(67, 64)
+#define CFG_BT_ATTR_CQC_BUF_PGSZ CMQ_REQ_FIELD_LOC(71, 68)
+#define CFG_BT_ATTR_CQC_HOPNUM CMQ_REQ_FIELD_LOC(73, 72)
+#define CFG_BT_ATTR_MPT_BA_PGSZ CMQ_REQ_FIELD_LOC(99, 96)
+#define CFG_BT_ATTR_MPT_BUF_PGSZ CMQ_REQ_FIELD_LOC(103, 100)
+#define CFG_BT_ATTR_MPT_HOPNUM CMQ_REQ_FIELD_LOC(105, 104)
+#define CFG_BT_ATTR_SCCC_BA_PGSZ CMQ_REQ_FIELD_LOC(131, 128)
+#define CFG_BT_ATTR_SCCC_BUF_PGSZ CMQ_REQ_FIELD_LOC(135, 132)
+#define CFG_BT_ATTR_SCCC_HOPNUM CMQ_REQ_FIELD_LOC(137, 136)
+
+/* Fields of HNS_ROCE_OPC_CFG_ENTRY_SIZE */
+#define CFG_HEM_ENTRY_SIZE_TYPE CMQ_REQ_FIELD_LOC(31, 0)
+enum {
+ HNS_ROCE_CFG_QPC_SIZE = BIT(0),
+ HNS_ROCE_CFG_SCCC_SIZE = BIT(1),
+};
+
+#define CFG_HEM_ENTRY_SIZE_VALUE CMQ_REQ_FIELD_LOC(191, 160)
+
+/* Fields of HNS_ROCE_OPC_CFG_GMV_BT */
+#define CFG_GMV_BT_BA_L CMQ_REQ_FIELD_LOC(31, 0)
+#define CFG_GMV_BT_BA_H CMQ_REQ_FIELD_LOC(51, 32)
+#define CFG_GMV_BT_IDX CMQ_REQ_FIELD_LOC(95, 64)
+
+/* Fields of HNS_ROCE_QUERY_RAM_ECC */
+#define QUERY_RAM_ECC_1BIT_ERR CMQ_REQ_FIELD_LOC(31, 0)
+#define QUERY_RAM_ECC_RES_TYPE CMQ_REQ_FIELD_LOC(63, 32)
+#define QUERY_RAM_ECC_TAG CMQ_REQ_FIELD_LOC(95, 64)
+
+struct hns_roce_cfg_sgid_tb {
+ __le32 table_idx_rsv;
+ __le32 vf_sgid_l;
+ __le32 vf_sgid_ml;
+ __le32 vf_sgid_mh;
+ __le32 vf_sgid_h;
+ __le32 vf_sgid_type_rsv;
+};
+
+#define SGID_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_sgid_tb, h, l)
+
+#define CFG_SGID_TB_TABLE_IDX SGID_TB_FIELD_LOC(7, 0)
+#define CFG_SGID_TB_VF_SGID_TYPE SGID_TB_FIELD_LOC(161, 160)
+
+struct hns_roce_cfg_smac_tb {
+ __le32 tb_idx_rsv;
+ __le32 vf_smac_l;
+ __le32 vf_smac_h_rsv;
+ __le32 rsv[3];
+};
+
+#define SMAC_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_smac_tb, h, l)
+
+#define CFG_SMAC_TB_IDX SMAC_TB_FIELD_LOC(7, 0)
+#define CFG_SMAC_TB_VF_SMAC_H SMAC_TB_FIELD_LOC(79, 64)
+
+struct hns_roce_cfg_gmv_tb_a {
+ __le32 vf_sgid_l;
+ __le32 vf_sgid_ml;
+ __le32 vf_sgid_mh;
+ __le32 vf_sgid_h;
+ __le32 vf_sgid_type_vlan;
+ __le32 resv;
+};
+
+#define GMV_TB_A_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_a, h, l)
+
+#define GMV_TB_A_VF_SGID_TYPE GMV_TB_A_FIELD_LOC(129, 128)
+#define GMV_TB_A_VF_VLAN_EN GMV_TB_A_FIELD_LOC(130, 130)
+#define GMV_TB_A_VF_VLAN_ID GMV_TB_A_FIELD_LOC(155, 144)
+
+struct hns_roce_cfg_gmv_tb_b {
+ __le32 vf_smac_l;
+ __le32 vf_smac_h;
+ __le32 table_idx_rsv;
+ __le32 resv[3];
+};
+
+#define GMV_TB_B_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_b, h, l)
+
+#define GMV_TB_B_SMAC_H GMV_TB_B_FIELD_LOC(47, 32)
+#define GMV_TB_B_SGID_IDX GMV_TB_B_FIELD_LOC(71, 64)
+
+#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM_HIP08 5
+#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 6
+struct hns_roce_query_pf_caps_a {
+ u8 number_ports;
+ u8 local_ca_ack_delay;
+ __le16 max_sq_sg;
+ __le16 max_sq_inline;
+ __le16 max_rq_sg;
+ __le32 rsv0;
+ __le16 num_qpc_timer;
+ __le16 num_cqc_timer;
+ __le16 max_srq_sges;
+ u8 num_aeq_vectors;
+ u8 num_other_vectors;
+ u8 max_sq_desc_sz;
+ u8 max_rq_desc_sz;
+ u8 rsv1;
+ u8 cqe_sz;
+};
+
+struct hns_roce_query_pf_caps_b {
+ u8 mtpt_entry_sz;
+ u8 irrl_entry_sz;
+ u8 trrl_entry_sz;
+ u8 cqc_entry_sz;
+ u8 srqc_entry_sz;
+ u8 idx_entry_sz;
+ u8 sccc_sz;
+ u8 max_mtu;
+ __le16 qpc_sz;
+ __le16 qpc_timer_entry_sz;
+ __le16 cqc_timer_entry_sz;
+ u8 min_cqes;
+ u8 min_wqes;
+ __le32 page_size_cap;
+ u8 pkey_table_len;
+ u8 phy_num_uars;
+ u8 ctx_hop_num;
+ u8 pbl_hop_num;
+};
+
+struct hns_roce_query_pf_caps_c {
+ __le32 cap_flags_num_pds;
+ __le32 max_gid_num_cqs;
+ __le32 cq_depth;
+ __le32 num_mrws;
+ __le32 ord_num_qps;
+ __le16 sq_depth;
+ __le16 rq_depth;
+};
+
+#define PF_CAPS_C_FIELD_LOC(h, l) \
+ FIELD_LOC(struct hns_roce_query_pf_caps_c, h, l)
+
+#define PF_CAPS_C_NUM_PDS PF_CAPS_C_FIELD_LOC(19, 0)
+#define PF_CAPS_C_CAP_FLAGS PF_CAPS_C_FIELD_LOC(31, 20)
+#define PF_CAPS_C_NUM_CQS PF_CAPS_C_FIELD_LOC(51, 32)
+#define PF_CAPS_C_MAX_GID PF_CAPS_C_FIELD_LOC(60, 52)
+#define PF_CAPS_C_CQ_DEPTH PF_CAPS_C_FIELD_LOC(86, 64)
+#define PF_CAPS_C_NUM_XRCDS PF_CAPS_C_FIELD_LOC(91, 87)
+#define PF_CAPS_C_NUM_MRWS PF_CAPS_C_FIELD_LOC(115, 96)
+#define PF_CAPS_C_NUM_QPS PF_CAPS_C_FIELD_LOC(147, 128)
+#define PF_CAPS_C_MAX_ORD PF_CAPS_C_FIELD_LOC(155, 148)
+
+struct hns_roce_query_pf_caps_d {
+ __le32 wq_hop_num_max_srqs;
+ __le16 srq_depth;
+ __le16 cap_flags_ex;
+ __le32 num_ceqs_ceq_depth;
+ __le32 arm_st_aeq_depth;
+ __le32 num_uars_rsv_pds;
+ __le32 rsv_uars_rsv_qps;
+};
+
+#define PF_CAPS_D_FIELD_LOC(h, l) \
+ FIELD_LOC(struct hns_roce_query_pf_caps_d, h, l)
+
+#define PF_CAPS_D_NUM_SRQS PF_CAPS_D_FIELD_LOC(19, 0)
+#define PF_CAPS_D_RQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(21, 20)
+#define PF_CAPS_D_EX_SGE_HOP_NUM PF_CAPS_D_FIELD_LOC(23, 22)
+#define PF_CAPS_D_SQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(25, 24)
+#define PF_CAPS_D_CONG_CAP PF_CAPS_D_FIELD_LOC(29, 26)
+#define PF_CAPS_D_CEQ_DEPTH PF_CAPS_D_FIELD_LOC(85, 64)
+#define PF_CAPS_D_NUM_CEQS PF_CAPS_D_FIELD_LOC(95, 86)
+#define PF_CAPS_D_AEQ_DEPTH PF_CAPS_D_FIELD_LOC(117, 96)
+#define PF_CAPS_D_AEQ_ARM_ST PF_CAPS_D_FIELD_LOC(119, 118)
+#define PF_CAPS_D_CEQ_ARM_ST PF_CAPS_D_FIELD_LOC(121, 120)
+#define PF_CAPS_D_DEFAULT_ALG PF_CAPS_D_FIELD_LOC(127, 122)
+#define PF_CAPS_D_RSV_PDS PF_CAPS_D_FIELD_LOC(147, 128)
+#define PF_CAPS_D_NUM_UARS PF_CAPS_D_FIELD_LOC(155, 148)
+#define PF_CAPS_D_RSV_QPS PF_CAPS_D_FIELD_LOC(179, 160)
+#define PF_CAPS_D_RSV_UARS PF_CAPS_D_FIELD_LOC(187, 180)
+
+#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12
+
+struct hns_roce_congestion_algorithm {
+ u8 alg_sel;
+ u8 alg_sub_sel;
+ u8 dip_vld;
+ u8 wnd_mode_sel;
+};
+
+struct hns_roce_query_pf_caps_e {
+ __le32 chunk_size_shift_rsv_mrws;
+ __le32 rsv_cqs;
+ __le32 rsv_srqs;
+ __le32 rsv_lkey;
+ __le16 ceq_max_cnt;
+ __le16 ceq_period;
+ __le16 aeq_max_cnt;
+ __le16 aeq_period;
+};
+
+struct hns_roce_query_pf_caps_f {
+ __le32 max_ack_req_msg_len;
+ __le32 rsv[5];
+};
+
+#define PF_CAPS_E_FIELD_LOC(h, l) \
+ FIELD_LOC(struct hns_roce_query_pf_caps_e, h, l)
+
+#define PF_CAPS_E_RSV_MRWS PF_CAPS_E_FIELD_LOC(19, 0)
+#define PF_CAPS_E_CHUNK_SIZE_SHIFT PF_CAPS_E_FIELD_LOC(31, 20)
+#define PF_CAPS_E_RSV_CQS PF_CAPS_E_FIELD_LOC(51, 32)
+#define PF_CAPS_E_RSV_XRCDS PF_CAPS_E_FIELD_LOC(63, 52)
+#define PF_CAPS_E_RSV_SRQS PF_CAPS_E_FIELD_LOC(83, 64)
+#define PF_CAPS_E_RSV_LKEYS PF_CAPS_E_FIELD_LOC(115, 96)
+
+struct hns_roce_cmq_req {
+ __le32 data[6];
+};
+
+#define CMQ_REQ_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cmq_req, h, l)
+
+struct hns_roce_cmq_desc {
+ __le16 opcode;
+ __le16 flag;
+ __le16 retval;
+ __le16 rsv;
+ union {
+ __le32 data[6];
+ struct {
+ __le32 own_func_num;
+ __le32 own_mac_id;
+ __le32 rsv[4];
+ } func_info;
+ };
+};
+
+struct hns_roce_v2_cmq_ring {
+ dma_addr_t desc_dma_addr;
+ struct hns_roce_cmq_desc *desc;
+ u32 head;
+ u16 buf_size;
+ u16 desc_num;
+ u8 flag;
+ spinlock_t lock; /* command queue lock */
+};
+
+struct hns_roce_v2_cmq {
+ struct hns_roce_v2_cmq_ring csq;
+ u16 tx_timeout;
+};
+
+struct hns_roce_link_table {
+ struct hns_roce_buf_list table;
+ struct hns_roce_buf *buf;
+};
+
+#define HNS_ROCE_EXT_LLM_ENTRY(addr, id) (((id) << (64 - 12)) | ((addr) >> 12))
+#define HNS_ROCE_EXT_LLM_MIN_PAGES(que_num) ((que_num) * 4 + 2)
+
+struct hns_roce_v2_free_mr {
+ struct hns_roce_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM];
+ struct hns_roce_cq *rsv_cq;
+ struct hns_roce_pd *rsv_pd;
+ struct mutex mutex;
+};
+
+struct hns_roce_v2_priv {
+ struct hnae3_handle *handle;
+ struct hns_roce_v2_cmq cmq;
+ struct hns_roce_link_table ext_llm;
+ struct hns_roce_v2_free_mr free_mr;
+};
+
+struct hns_roce_dip {
+ u8 dgid[GID_LEN_V2];
+ u32 dip_idx;
+ u32 qp_cnt;
+};
+
+struct fmea_ram_ecc {
+ u32 is_ecc_err;
+ u32 res_type;
+ u32 index;
+};
+
+/* only for RNR timeout issue of HIP08 */
+#define HNS_ROCE_CLOCK_ADJUST 1000
+#define HNS_ROCE_MAX_CQ_PERIOD_HIP08 65
+#define HNS_ROCE_MAX_EQ_PERIOD 65
+#define HNS_ROCE_RNR_TIMER_10NS 1
+#define HNS_ROCE_1US_CFG 999
+#define HNS_ROCE_1NS_CFG 0
+
+#define HNS_ROCE_AEQ_DEFAULT_BURST_NUM 0x0
+#define HNS_ROCE_AEQ_DEFAULT_INTERVAL 0x0
+#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x0
+#define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x0
+
+#define HNS_ROCE_V2_EQ_STATE_INVALID 0
+#define HNS_ROCE_V2_EQ_STATE_VALID 1
+#define HNS_ROCE_V2_EQ_STATE_OVERFLOW 2
+#define HNS_ROCE_V2_EQ_STATE_FAILURE 3
+
+#define HNS_ROCE_V2_EQ_OVER_IGNORE_0 0
+#define HNS_ROCE_V2_EQ_OVER_IGNORE_1 1
+
+#define HNS_ROCE_V2_EQ_COALESCE_0 0
+#define HNS_ROCE_V2_EQ_COALESCE_1 1
+
+#define HNS_ROCE_V2_EQ_FIRED 0
+#define HNS_ROCE_V2_EQ_ARMED 1
+#define HNS_ROCE_V2_EQ_ALWAYS_ARMED 3
+
+#define HNS_ROCE_EQ_INIT_EQE_CNT 0
+#define HNS_ROCE_EQ_INIT_PROD_IDX 0
+#define HNS_ROCE_EQ_INIT_REPORT_TIMER 0
+#define HNS_ROCE_EQ_INIT_MSI_IDX 0
+#define HNS_ROCE_EQ_INIT_CONS_IDX 0
+#define HNS_ROCE_EQ_INIT_NXT_EQE_BA 0
+
+#define HNS_ROCE_V2_COMP_EQE_NUM 0x1000
+#define HNS_ROCE_V2_ASYNC_EQE_NUM 0x1000
+
+#define HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S 0
+
+#define HNS_ROCE_EQ_DB_CMD_AEQ 0x0
+#define HNS_ROCE_EQ_DB_CMD_AEQ_ARMED 0x1
+#define HNS_ROCE_EQ_DB_CMD_CEQ 0x2
+#define HNS_ROCE_EQ_DB_CMD_CEQ_ARMED 0x3
+
+#define EQ_ENABLE 1
+#define EQ_DISABLE 0
+
+#define EQ_REG_OFFSET 0x4
+
+#define HNS_ROCE_INT_NAME_LEN 32
+#define HNS_ROCE_V2_EQN_M GENMASK(23, 0)
+
+#define HNS_ROCE_V2_VF_ABN_INT_EN_S 0
+#define HNS_ROCE_V2_VF_ABN_INT_EN_M GENMASK(0, 0)
+#define HNS_ROCE_V2_VF_ABN_INT_ST_M GENMASK(2, 0)
+#define HNS_ROCE_V2_VF_ABN_INT_CFG_M GENMASK(2, 0)
+#define HNS_ROCE_V2_VF_EVENT_INT_EN_M GENMASK(0, 0)
+
+struct hns_roce_eq_context {
+ __le32 data[16];
+};
+
+#define EQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_eq_context, h, l)
+
+#define EQC_EQ_ST EQC_FIELD_LOC(1, 0)
+#define EQC_EQE_HOP_NUM EQC_FIELD_LOC(3, 2)
+#define EQC_OVER_IGNORE EQC_FIELD_LOC(4, 4)
+#define EQC_COALESCE EQC_FIELD_LOC(5, 5)
+#define EQC_ARM_ST EQC_FIELD_LOC(7, 6)
+#define EQC_EQN EQC_FIELD_LOC(15, 8)
+#define EQC_EQE_CNT EQC_FIELD_LOC(31, 16)
+#define EQC_EQE_BA_PG_SZ EQC_FIELD_LOC(35, 32)
+#define EQC_EQE_BUF_PG_SZ EQC_FIELD_LOC(39, 36)
+#define EQC_EQ_PROD_INDX EQC_FIELD_LOC(63, 40)
+#define EQC_EQ_MAX_CNT EQC_FIELD_LOC(79, 64)
+#define EQC_EQ_PERIOD EQC_FIELD_LOC(95, 80)
+#define EQC_EQE_REPORT_TIMER EQC_FIELD_LOC(127, 96)
+#define EQC_EQE_BA_L EQC_FIELD_LOC(159, 128)
+#define EQC_EQE_BA_H EQC_FIELD_LOC(188, 160)
+#define EQC_SHIFT EQC_FIELD_LOC(199, 192)
+#define EQC_MSI_INDX EQC_FIELD_LOC(207, 200)
+#define EQC_CUR_EQE_BA_L EQC_FIELD_LOC(223, 208)
+#define EQC_CUR_EQE_BA_M EQC_FIELD_LOC(255, 224)
+#define EQC_CUR_EQE_BA_H EQC_FIELD_LOC(259, 256)
+#define EQC_EQ_CONS_INDX EQC_FIELD_LOC(287, 264)
+#define EQC_NEX_EQE_BA_L EQC_FIELD_LOC(319, 288)
+#define EQC_NEX_EQE_BA_H EQC_FIELD_LOC(339, 320)
+#define EQC_EQE_SIZE EQC_FIELD_LOC(341, 340)
+
+#define MAX_SERVICE_LEVEL 0x7
+
+struct hns_roce_wqe_atomic_seg {
+ __le64 fetchadd_swap_data;
+ __le64 cmp_data;
+};
+
+struct hns_roce_sccc_clr {
+ __le32 qpn;
+ __le32 rsv[5];
+};
+
+struct hns_roce_sccc_clr_done {
+ __le32 clr_done;
+ __le32 rsv[5];
+};
+
+struct hns_roce_bond_info {
+ __le32 bond_id;
+ __le32 bond_mode;
+ __le32 active_slave_cnt;
+ __le32 active_slave_mask;
+ __le32 slave_mask;
+ __le32 hash_policy;
+};
+
+struct hns_roce_dev
+ *hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp,
+ int func_idx);
+void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp,
+ int func_idx);
+int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+int hns_roce_cmd_bond(struct hns_roce_bond_group *bond_grp,
+ enum hns_roce_bond_cmd_type bond_type);
+
+static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2],
+ void __iomem *dest)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ if (!hr_dev->dis_db && !ops->get_hw_reset_stat(handle))
+ hns_roce_write64_k(val, dest);
+}
+
+#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 4953d9cb83a7..2f4864ab7d4e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -31,137 +31,165 @@
* SOFTWARE.
*/
#include <linux/acpi.h>
-#include <linux/of_platform.h>
+#include <linux/module.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_cache.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
-#include <rdma/hns-abi.h>
#include "hns_roce_hem.h"
+#include "hns_roce_hw_v2.h"
+#include "hns_roce_bond.h"
-/**
- * hns_get_gid_index - Get gid index.
- * @hr_dev: pointer to structure hns_roce_dev.
- * @port: port, value range: 0 ~ MAX
- * @gid_index: gid_index, value range: 0 ~ MAX
- * Description:
- * N ports shared gids, allocation method as follow:
- * GID[0][0], GID[1][0],.....GID[N - 1][0],
- * GID[0][0], GID[1][0],.....GID[N - 1][0],
- * And so on
- */
-int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index)
-{
- return gid_index * hr_dev->caps.num_ports + port;
-}
-
-static void hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
+static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port,
+ const u8 *addr)
{
u8 phy_port;
- u32 i = 0;
+ u32 i;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return 0;
- if (!memcmp(hr_dev->dev_addr[port], addr, MAC_ADDR_OCTET_NUM))
- return;
+ if (!memcmp(hr_dev->dev_addr[port], addr, ETH_ALEN))
+ return 0;
- for (i = 0; i < MAC_ADDR_OCTET_NUM; i++)
+ for (i = 0; i < ETH_ALEN; i++)
hr_dev->dev_addr[port][i] = addr[i];
phy_port = hr_dev->iboe.phy_port[port];
- hr_dev->hw->set_mac(hr_dev, phy_port, addr);
+ return hr_dev->hw->set_mac(hr_dev, phy_port, addr);
}
-static int hns_roce_add_gid(struct ib_device *device, u8 port_num,
- unsigned int index, const union ib_gid *gid,
- const struct ib_gid_attr *attr, void **context)
+static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(device);
- u8 port = port_num - 1;
- unsigned long flags;
+ struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
+ u32 port = attr->port_num - 1;
+ int ret;
if (port >= hr_dev->caps.num_ports)
return -EINVAL;
- spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+ ret = hr_dev->hw->set_gid(hr_dev, attr->index, &attr->gid, attr);
- hr_dev->hw->set_gid(hr_dev, port, index, (union ib_gid *)gid);
-
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
-
- return 0;
+ return ret;
}
-static int hns_roce_del_gid(struct ib_device *device, u8 port_num,
- unsigned int index, void **context)
+static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(device);
- union ib_gid zgid = { {0} };
- u8 port = port_num - 1;
- unsigned long flags;
+ struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
+ u32 port = attr->port_num - 1;
+ int ret;
if (port >= hr_dev->caps.num_ports)
return -EINVAL;
- spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+ ret = hr_dev->hw->set_gid(hr_dev, attr->index, NULL, NULL);
+
+ return ret;
+}
- hr_dev->hw->set_gid(hr_dev, port, index, &zgid);
+static int hns_roce_get_port_state(struct hns_roce_dev *hr_dev, u32 port_num,
+ enum ib_port_state *state)
+{
+ struct hns_roce_bond_group *bond_grp;
+ u8 bus_num = get_hr_bus_num(hr_dev);
+ struct net_device *net_dev;
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+ net_dev = ib_device_get_netdev(&hr_dev->ib_dev, port_num);
+ if (!net_dev)
+ return -ENODEV;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) {
+ bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);
+ if (bond_grp) {
+ *state = ib_get_curr_port_state(bond_grp->upper_dev);
+ goto out;
+ }
+ }
+
+ *state = ib_get_curr_port_state(net_dev);
+out:
+ dev_put(net_dev);
return 0;
}
-static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
- unsigned long event)
+static int handle_en_event(struct net_device *netdev,
+ struct hns_roce_dev *hr_dev,
+ u32 port, unsigned long event)
{
- struct device *dev = &hr_dev->pdev->dev;
- struct net_device *netdev;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct device *dev = hr_dev->dev;
+ enum ib_port_state curr_state;
+ struct ib_event ibevent;
+ int ret = 0;
- netdev = hr_dev->iboe.netdevs[port];
if (!netdev) {
- dev_err(dev, "port(%d) can't find netdev\n", port);
+ dev_err(dev, "can't find netdev on port(%u)!\n", port);
return -ENODEV;
}
- spin_lock_bh(&hr_dev->iboe.lock);
-
switch (event) {
- case NETDEV_UP:
- case NETDEV_CHANGE:
case NETDEV_REGISTER:
case NETDEV_CHANGEADDR:
- hns_roce_set_mac(hr_dev, port, netdev->dev_addr);
+ ret = hns_roce_set_mac(hr_dev, port, netdev->dev_addr);
break;
+ case NETDEV_UP:
+ case NETDEV_CHANGE:
+ ret = hns_roce_set_mac(hr_dev, port, netdev->dev_addr);
+ if (ret)
+ return ret;
+ fallthrough;
case NETDEV_DOWN:
- /*
- * In v1 engine, only support all ports closed together.
- */
+ if (!netif_is_lag_master(netdev))
+ break;
+ curr_state = ib_get_curr_port_state(netdev);
+
+ write_lock_irq(&ibdev->cache_lock);
+ if (ibdev->port_data[port].cache.last_port_state == curr_state) {
+ write_unlock_irq(&ibdev->cache_lock);
+ return 0;
+ }
+ ibdev->port_data[port].cache.last_port_state = curr_state;
+ write_unlock_irq(&ibdev->cache_lock);
+
+ ibevent.event = (curr_state == IB_PORT_DOWN) ?
+ IB_EVENT_PORT_ERR : IB_EVENT_PORT_ACTIVE;
+ ibevent.device = ibdev;
+ ibevent.element.port_num = port + 1;
+ ib_dispatch_event(&ibevent);
break;
default:
dev_dbg(dev, "NETDEV event = 0x%x!\n", (u32)(event));
break;
}
- spin_unlock_bh(&hr_dev->iboe.lock);
- return 0;
+ return ret;
}
static int hns_roce_netdev_event(struct notifier_block *self,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct hns_roce_bond_group *bond_grp;
struct hns_roce_ib_iboe *iboe = NULL;
struct hns_roce_dev *hr_dev = NULL;
- u8 port = 0;
- int ret = 0;
+ struct net_device *upper = NULL;
+ int ret;
+ u32 port;
hr_dev = container_of(self, struct hns_roce_dev, iboe.nb);
iboe = &hr_dev->iboe;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) {
+ bond_grp = hns_roce_get_bond_grp(get_hr_netdev(hr_dev, 0),
+ get_hr_bus_num(hr_dev));
+ upper = bond_grp ? bond_grp->upper_dev : NULL;
+ }
for (port = 0; port < hr_dev->caps.num_ports; port++) {
- if (dev == iboe->netdevs[port]) {
- ret = handle_en_event(hr_dev, port, event);
+ if ((!upper && dev == iboe->netdevs[port]) ||
+ (upper && dev == upper)) {
+ ret = handle_en_event(dev, hr_dev, port, event);
if (ret)
return NOTIFY_DONE;
break;
@@ -173,12 +201,15 @@ static int hns_roce_netdev_event(struct notifier_block *self,
static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev)
{
+ struct net_device *net_dev;
+ int ret;
u8 i;
for (i = 0; i < hr_dev->caps.num_ports; i++) {
- hr_dev->hw->set_mtu(hr_dev, hr_dev->iboe.phy_port[i],
- hr_dev->caps.max_mtu);
- hns_roce_set_mac(hr_dev, i, hr_dev->iboe.netdevs[i]->dev_addr);
+ net_dev = get_hr_netdev(hr_dev, i);
+ ret = hns_roce_set_mac(hr_dev, i, net_dev->dev_addr);
+ if (ret)
+ return ret;
}
return 0;
@@ -192,7 +223,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
memset(props, 0, sizeof(*props));
- props->sys_image_guid = hr_dev->sys_image_guid;
+ props->fw_ver = hr_dev->caps.fw_ver;
+ props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid);
props->max_mr_size = (u64)(~(0ULL));
props->page_size_cap = hr_dev->caps.page_size_cap;
props->vendor_id = hr_dev->vendor_id;
@@ -202,54 +234,55 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
props->max_qp_wr = hr_dev->caps.max_wqes;
props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_RC_RNR_NAK_GEN;
- props->max_sge = hr_dev->caps.max_sq_sg;
- props->max_sge_rd = 1;
+ props->max_send_sge = hr_dev->caps.max_sq_sg;
+ props->max_recv_sge = hr_dev->caps.max_rq_sg;
+ props->max_sge_rd = hr_dev->caps.max_sq_sg;
props->max_cq = hr_dev->caps.num_cqs;
props->max_cqe = hr_dev->caps.max_cqes;
props->max_mr = hr_dev->caps.num_mtpts;
props->max_pd = hr_dev->caps.num_pds;
props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma;
props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma;
- props->atomic_cap = IB_ATOMIC_NONE;
+ props->atomic_cap = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_ATOMIC ?
+ IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = 1;
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
+ props->max_ah = INT_MAX;
+ props->cq_caps.max_cq_moderation_period = HNS_ROCE_MAX_CQ_PERIOD;
+ props->cq_caps.max_cq_moderation_count = HNS_ROCE_MAX_CQ_COUNT;
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ props->cq_caps.max_cq_moderation_period = HNS_ROCE_MAX_CQ_PERIOD_HIP08;
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ props->max_srq = hr_dev->caps.num_srqs;
+ props->max_srq_wr = hr_dev->caps.max_srq_wrs;
+ props->max_srq_sge = hr_dev->caps.max_srq_sges;
+ }
- return 0;
-}
-
-static struct net_device *hns_roce_get_netdev(struct ib_device *ib_dev,
- u8 port_num)
-{
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
- struct net_device *ndev;
-
- if (port_num < 1 || port_num > hr_dev->caps.num_ports)
- return NULL;
-
- rcu_read_lock();
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR &&
+ hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ props->max_fast_reg_page_list_len = HNS_ROCE_FRMR_MAX_PA;
+ }
- ndev = hr_dev->iboe.netdevs[port_num - 1];
- if (ndev)
- dev_hold(ndev);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+ props->device_cap_flags |= IB_DEVICE_XRC;
- rcu_read_unlock();
- return ndev;
+ return 0;
}
-static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
+static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num,
struct ib_port_attr *props)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
- struct device *dev = &hr_dev->pdev->dev;
struct net_device *net_dev;
- unsigned long flags;
enum ib_mtu mtu;
- u8 port;
+ u32 port;
+ int ret;
- assert(port_num > 0);
port = port_num - 1;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = hr_dev->caps.max_mtu;
props->gid_tbl_len = hr_dev->caps.gid_table_len[port];
@@ -258,44 +291,46 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
IB_PORT_BOOT_MGMT_SUP;
props->max_msg_sz = HNS_ROCE_MAX_MSG_LEN;
props->pkey_tbl_len = 1;
- props->active_width = IB_WIDTH_4X;
- props->active_speed = 1;
-
- spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+ ret = ib_get_eth_speed(ib_dev, port_num, &props->active_speed,
+ &props->active_width);
+ if (ret)
+ ibdev_warn(ib_dev, "failed to get speed, ret = %d.\n", ret);
- net_dev = hr_dev->iboe.netdevs[port];
+ net_dev = ib_device_get_netdev(ib_dev, port_num);
if (!net_dev) {
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
- dev_err(dev, "find netdev %d failed!\r\n", port);
+ ibdev_err(ib_dev, "find netdev %u failed!\n", port);
return -EINVAL;
}
mtu = iboe_get_mtu(net_dev->mtu);
props->active_mtu = mtu ? min(props->max_mtu, mtu) : IB_MTU_256;
- props->state = (netif_running(net_dev) && netif_carrier_ok(net_dev)) ?
- IB_PORT_ACTIVE : IB_PORT_DOWN;
- props->phys_state = (props->state == IB_PORT_ACTIVE) ? 5 : 3;
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+ dev_put(net_dev);
+
+ ret = hns_roce_get_port_state(hr_dev, port_num, &props->state);
+ if (ret) {
+ ibdev_err(ib_dev, "failed to get port state.\n");
+ return ret;
+ }
+ props->phys_state = props->state == IB_PORT_ACTIVE ?
+ IB_PORT_PHYS_STATE_LINK_UP :
+ IB_PORT_PHYS_STATE_DISABLED;
return 0;
}
static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device,
- u8 port_num)
+ u32 port_num)
{
return IB_LINK_LAYER_ETHERNET;
}
-static int hns_roce_query_gid(struct ib_device *ib_dev, u8 port_num, int index,
- union ib_gid *gid)
-{
- return 0;
-}
-
-static int hns_roce_query_pkey(struct ib_device *ib_dev, u8 port, u16 index,
+static int hns_roce_query_pkey(struct ib_device *ib_dev, u32 port, u16 index,
u16 *pkey)
{
+ if (index > 0)
+ return -EINVAL;
+
*pkey = PKEY_ID;
return 0;
@@ -318,409 +353,561 @@ static int hns_roce_modify_device(struct ib_device *ib_dev, int mask,
return 0;
}
-static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask,
- struct ib_port_modify *props)
+struct hns_user_mmap_entry *
+hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
+ size_t length,
+ enum hns_roce_mmap_type mmap_type)
{
+ struct hns_user_mmap_entry *entry;
+ int ret;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+
+ entry->address = address;
+ entry->mmap_type = mmap_type;
+
+ switch (mmap_type) {
+ /* pgoff 0 must be used by DB for compatibility */
+ case HNS_ROCE_MMAP_TYPE_DB:
+ ret = rdma_user_mmap_entry_insert_exact(
+ ucontext, &entry->rdma_entry, length, 0);
+ break;
+ case HNS_ROCE_MMAP_TYPE_DWQE:
+ ret = rdma_user_mmap_entry_insert_range(
+ ucontext, &entry->rdma_entry, length, 1,
+ U32_MAX);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret) {
+ kfree(entry);
+ return NULL;
+ }
+
+ return entry;
+}
+
+static void hns_roce_dealloc_uar_entry(struct hns_roce_ucontext *context)
+{
+ if (context->db_mmap_entry)
+ rdma_user_mmap_entry_remove(
+ &context->db_mmap_entry->rdma_entry);
+}
+
+static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx)
+{
+ struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
+ u64 address;
+
+ address = context->uar.pfn << PAGE_SHIFT;
+ context->db_mmap_entry = hns_roce_user_mmap_entry_insert(
+ uctx, address, PAGE_SIZE, HNS_ROCE_MMAP_TYPE_DB);
+ if (!context->db_mmap_entry)
+ return -ENOMEM;
+
return 0;
}
-static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
- struct ib_udata *udata)
+static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
+ struct ib_udata *udata)
{
- int ret = 0;
- struct hns_roce_ucontext *context;
- struct hns_roce_ib_alloc_ucontext_resp resp;
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+ struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
+ struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
+ struct hns_roce_ib_alloc_ucontext_resp resp = {};
+ struct hns_roce_ib_alloc_ucontext ucmd = {};
+ int ret = -EAGAIN;
+
+ if (!hr_dev->active)
+ goto error_out;
resp.qp_tab_size = hr_dev->caps.num_qps;
+ resp.srq_tab_size = hr_dev->caps.num_srqs;
- context = kmalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return ERR_PTR(-ENOMEM);
+ ret = ib_copy_from_udata(&ucmd, udata,
+ min(udata->inlen, sizeof(ucmd)));
+ if (ret)
+ goto error_out;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ context->config = ucmd.config & HNS_ROCE_EXSGE_FLAGS;
+
+ if (context->config & HNS_ROCE_EXSGE_FLAGS) {
+ resp.config |= HNS_ROCE_RSP_EXSGE_FLAGS;
+ resp.max_inline_data = hr_dev->caps.max_sq_inline;
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+ context->config |= ucmd.config & HNS_ROCE_RQ_INLINE_FLAGS;
+ if (context->config & HNS_ROCE_RQ_INLINE_FLAGS)
+ resp.config |= HNS_ROCE_RSP_RQ_INLINE_FLAGS;
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQE_INLINE) {
+ context->config |= ucmd.config & HNS_ROCE_CQE_INLINE_FLAGS;
+ if (context->config & HNS_ROCE_CQE_INLINE_FLAGS)
+ resp.config |= HNS_ROCE_RSP_CQE_INLINE_FLAGS;
+ }
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ resp.congest_type = hr_dev->caps.cong_cap;
ret = hns_roce_uar_alloc(hr_dev, &context->uar);
if (ret)
- goto error_fail_uar_alloc;
+ goto error_out;
+
+ ret = hns_roce_alloc_uar_entry(uctx);
+ if (ret)
+ goto error_fail_uar_entry;
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) {
+ INIT_LIST_HEAD(&context->page_list);
+ mutex_init(&context->page_mutex);
+ }
+
+ resp.cqe_size = hr_dev->caps.cqe_sz;
- ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
if (ret)
goto error_fail_copy_to_udata;
- return &context->ibucontext;
+ hns_roce_get_cq_bankid_for_uctx(context);
+
+ return 0;
error_fail_copy_to_udata:
- hns_roce_uar_free(hr_dev, &context->uar);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
+ mutex_destroy(&context->page_mutex);
+ hns_roce_dealloc_uar_entry(context);
+
+error_fail_uar_entry:
+ ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx);
-error_fail_uar_alloc:
- kfree(context);
+error_out:
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT]);
- return ERR_PTR(ret);
+ return ret;
}
-static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
+static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device);
- hns_roce_uar_free(to_hr_dev(ibcontext->device), &context->uar);
- kfree(context);
+ hns_roce_put_cq_bankid_for_uctx(context);
- return 0;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
+ mutex_destroy(&context->page_mutex);
+
+ hns_roce_dealloc_uar_entry(context);
+
+ ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx);
}
-static int hns_roce_mmap(struct ib_ucontext *context,
- struct vm_area_struct *vma)
+static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
+ struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct hns_user_mmap_entry *entry;
+ phys_addr_t pfn;
+ pgprot_t prot;
+ int ret;
- if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0)
- return -EINVAL;
+ if (hr_dev->dis_db) {
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
+ return -EPERM;
+ }
- if (vma->vm_pgoff == 0) {
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- if (io_remap_pfn_range(vma, vma->vm_start,
- to_hr_ucontext(context)->uar.pfn,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
- } else if (vma->vm_pgoff == 1 && hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
- /* vm_pgoff: 1 -- TPTR */
- if (io_remap_pfn_range(vma, vma->vm_start,
- hr_dev->tptr_dma_addr >> PAGE_SHIFT,
- hr_dev->tptr_size,
- vma->vm_page_prot))
- return -EAGAIN;
- } else
+ rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff);
+ if (!rdma_entry) {
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
return -EINVAL;
+ }
- return 0;
+ entry = to_hns_mmap(rdma_entry);
+ pfn = entry->address >> PAGE_SHIFT;
+
+ switch (entry->mmap_type) {
+ case HNS_ROCE_MMAP_TYPE_DB:
+ case HNS_ROCE_MMAP_TYPE_DWQE:
+ prot = pgprot_device(vma->vm_page_prot);
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = rdma_user_mmap_io(uctx, vma, pfn, rdma_entry->npages * PAGE_SIZE,
+ prot, rdma_entry);
+
+out:
+ rdma_user_mmap_entry_put(rdma_entry);
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
+
+ return ret;
}
-static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
+static void hns_roce_free_mmap(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct hns_user_mmap_entry *entry = to_hns_mmap(rdma_entry);
+
+ kfree(entry);
+}
+
+static int hns_roce_port_immutable(struct ib_device *ib_dev, u32 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
int ret;
- ret = hns_roce_query_port(ib_dev, port_num, &attr);
+ ret = ib_query_port(ib_dev, port_num, &attr);
if (ret)
return ret;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ if (to_hr_dev(ib_dev)->caps.flags & HNS_ROCE_CAP_FLAG_ROCE_V1_V2)
+ immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
return 0;
}
-static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
+static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
- struct hns_roce_ib_iboe *iboe = &hr_dev->iboe;
+}
- unregister_inetaddr_notifier(&iboe->nb_inet);
- unregister_netdevice_notifier(&iboe->nb);
- ib_unregister_device(&hr_dev->ib_dev);
+static void hns_roce_get_fw_ver(struct ib_device *device, char *str)
+{
+ u64 fw_ver = to_hr_dev(device)->caps.fw_ver;
+ unsigned int major, minor, sub_minor;
+
+ major = upper_32_bits(fw_ver);
+ minor = high_16_bits(lower_32_bits(fw_ver));
+ sub_minor = low_16_bits(fw_ver);
+
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%04u", major, minor,
+ sub_minor);
}
-static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
+#define HNS_ROCE_HW_CNT(ename, cname) \
+ [HNS_ROCE_HW_##ename##_CNT].name = cname
+
+static const struct rdma_stat_desc hns_roce_port_stats_descs[] = {
+ HNS_ROCE_HW_CNT(RX_RC_PKT, "rx_rc_pkt"),
+ HNS_ROCE_HW_CNT(RX_UC_PKT, "rx_uc_pkt"),
+ HNS_ROCE_HW_CNT(RX_UD_PKT, "rx_ud_pkt"),
+ HNS_ROCE_HW_CNT(RX_XRC_PKT, "rx_xrc_pkt"),
+ HNS_ROCE_HW_CNT(RX_PKT, "rx_pkt"),
+ HNS_ROCE_HW_CNT(RX_ERR_PKT, "rx_err_pkt"),
+ HNS_ROCE_HW_CNT(RX_CNP_PKT, "rx_cnp_pkt"),
+ HNS_ROCE_HW_CNT(TX_RC_PKT, "tx_rc_pkt"),
+ HNS_ROCE_HW_CNT(TX_UC_PKT, "tx_uc_pkt"),
+ HNS_ROCE_HW_CNT(TX_UD_PKT, "tx_ud_pkt"),
+ HNS_ROCE_HW_CNT(TX_XRC_PKT, "tx_xrc_pkt"),
+ HNS_ROCE_HW_CNT(TX_PKT, "tx_pkt"),
+ HNS_ROCE_HW_CNT(TX_ERR_PKT, "tx_err_pkt"),
+ HNS_ROCE_HW_CNT(TX_CNP_PKT, "tx_cnp_pkt"),
+ HNS_ROCE_HW_CNT(TRP_GET_MPT_ERR_PKT, "trp_get_mpt_err_pkt"),
+ HNS_ROCE_HW_CNT(TRP_GET_IRRL_ERR_PKT, "trp_get_irrl_err_pkt"),
+ HNS_ROCE_HW_CNT(ECN_DB, "ecn_doorbell"),
+ HNS_ROCE_HW_CNT(RX_BUF, "rx_buffer"),
+ HNS_ROCE_HW_CNT(TRP_RX_SOF, "trp_rx_sof"),
+ HNS_ROCE_HW_CNT(CQ_CQE, "cq_cqe"),
+ HNS_ROCE_HW_CNT(CQ_POE, "cq_poe"),
+ HNS_ROCE_HW_CNT(CQ_NOTIFY, "cq_notify"),
+};
+
+static struct rdma_hw_stats *hns_roce_alloc_hw_port_stats(
+ struct ib_device *device, u32 port_num)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(device);
+
+ if (port_num > hr_dev->caps.num_ports) {
+ ibdev_err(device, "invalid port num.\n");
+ return NULL;
+ }
+
+ return rdma_alloc_hw_stats_struct(hns_roce_port_stats_descs,
+ ARRAY_SIZE(hns_roce_port_stats_descs),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int hns_roce_get_hw_stats(struct ib_device *device,
+ struct rdma_hw_stats *stats,
+ u32 port, int index)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(device);
+ int num_counters = HNS_ROCE_HW_CNT_TOTAL;
int ret;
- struct hns_roce_ib_iboe *iboe = NULL;
- struct ib_device *ib_dev = NULL;
- struct device *dev = &hr_dev->pdev->dev;
- iboe = &hr_dev->iboe;
- spin_lock_init(&iboe->lock);
+ if (port == 0)
+ return 0;
- ib_dev = &hr_dev->ib_dev;
- strlcpy(ib_dev->name, "hns_%d", IB_DEVICE_NAME_MAX);
-
- ib_dev->owner = THIS_MODULE;
- ib_dev->node_type = RDMA_NODE_IB_CA;
- ib_dev->dma_device = dev;
-
- ib_dev->phys_port_cnt = hr_dev->caps.num_ports;
- ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey;
- ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors;
- ib_dev->uverbs_abi_ver = 1;
- ib_dev->uverbs_cmd_mask =
- (1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ULL << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ULL << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ULL << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ULL << IB_USER_VERBS_CMD_REG_MR) |
- (1ULL << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ULL << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ULL << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ULL << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ULL << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ULL << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ULL << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ULL << IB_USER_VERBS_CMD_DESTROY_QP);
-
- /* HCA||device||port */
- ib_dev->modify_device = hns_roce_modify_device;
- ib_dev->query_device = hns_roce_query_device;
- ib_dev->query_port = hns_roce_query_port;
- ib_dev->modify_port = hns_roce_modify_port;
- ib_dev->get_link_layer = hns_roce_get_link_layer;
- ib_dev->get_netdev = hns_roce_get_netdev;
- ib_dev->query_gid = hns_roce_query_gid;
- ib_dev->add_gid = hns_roce_add_gid;
- ib_dev->del_gid = hns_roce_del_gid;
- ib_dev->query_pkey = hns_roce_query_pkey;
- ib_dev->alloc_ucontext = hns_roce_alloc_ucontext;
- ib_dev->dealloc_ucontext = hns_roce_dealloc_ucontext;
- ib_dev->mmap = hns_roce_mmap;
-
- /* PD */
- ib_dev->alloc_pd = hns_roce_alloc_pd;
- ib_dev->dealloc_pd = hns_roce_dealloc_pd;
-
- /* AH */
- ib_dev->create_ah = hns_roce_create_ah;
- ib_dev->query_ah = hns_roce_query_ah;
- ib_dev->destroy_ah = hns_roce_destroy_ah;
-
- /* QP */
- ib_dev->create_qp = hns_roce_create_qp;
- ib_dev->modify_qp = hns_roce_modify_qp;
- ib_dev->query_qp = hr_dev->hw->query_qp;
- ib_dev->destroy_qp = hr_dev->hw->destroy_qp;
- ib_dev->post_send = hr_dev->hw->post_send;
- ib_dev->post_recv = hr_dev->hw->post_recv;
-
- /* CQ */
- ib_dev->create_cq = hns_roce_ib_create_cq;
- ib_dev->destroy_cq = hns_roce_ib_destroy_cq;
- ib_dev->req_notify_cq = hr_dev->hw->req_notify_cq;
- ib_dev->poll_cq = hr_dev->hw->poll_cq;
-
- /* MR */
- ib_dev->get_dma_mr = hns_roce_get_dma_mr;
- ib_dev->reg_user_mr = hns_roce_reg_user_mr;
- ib_dev->dereg_mr = hns_roce_dereg_mr;
-
- /* OTHERS */
- ib_dev->get_port_immutable = hns_roce_port_immutable;
-
- ret = ib_register_device(ib_dev, NULL);
+ if (port > hr_dev->caps.num_ports)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_hw_counter(hr_dev, stats->value, port,
+ &num_counters);
if (ret) {
- dev_err(dev, "ib_register_device failed!\n");
+ ibdev_err(device, "failed to query hw counter, ret = %d\n",
+ ret);
return ret;
}
- ret = hns_roce_setup_mtu_mac(hr_dev);
- if (ret) {
- dev_err(dev, "setup_mtu_mac failed!\n");
- goto error_failed_setup_mtu_mac;
- }
+ return num_counters;
+}
- iboe->nb.notifier_call = hns_roce_netdev_event;
- ret = register_netdevice_notifier(&iboe->nb);
- if (ret) {
- dev_err(dev, "register_netdevice_notifier failed!\n");
- goto error_failed_setup_mtu_mac;
+static void
+ hns_roce_unregister_bond_cleanup(struct hns_roce_dev *hr_dev,
+ struct hns_roce_bond_group *bond_grp)
+{
+ struct net_device *net_dev;
+ int i;
+
+ /* To avoid the loss of other slave devices when main_hr_dev
+ * is unregistered, re-initialize the remaining slaves before
+ * the bond resources cleanup.
+ */
+ bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED;
+ for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) {
+ net_dev = bond_grp->bond_func_info[i].net_dev;
+ if (net_dev && net_dev != get_hr_netdev(hr_dev, 0))
+ hns_roce_bond_init_client(bond_grp, i);
}
- return 0;
+ hns_roce_cleanup_bond(bond_grp);
+}
-error_failed_setup_mtu_mac:
- ib_unregister_device(ib_dev);
+static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev,
+ bool bond_cleanup)
+{
+ struct net_device *net_dev = get_hr_netdev(hr_dev, 0);
+ struct hns_roce_ib_iboe *iboe = &hr_dev->iboe;
+ struct hns_roce_bond_group *bond_grp;
+ u8 bus_num = get_hr_bus_num(hr_dev);
- return ret;
+ if (bond_cleanup && hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) {
+ bond_grp = hns_roce_get_bond_grp(net_dev, bus_num);
+ if (bond_grp)
+ hns_roce_unregister_bond_cleanup(hr_dev, bond_grp);
+ }
+
+ hr_dev->active = false;
+ unregister_netdevice_notifier(&iboe->nb);
+ ib_unregister_device(&hr_dev->ib_dev);
}
-static const struct of_device_id hns_roce_of_match[] = {
- { .compatible = "hisilicon,hns-roce-v1", .data = &hns_roce_hw_v1, },
- {},
+static const struct ib_device_ops hns_roce_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_HNS,
+ .uverbs_abi_ver = 1,
+ .uverbs_no_driver_id_binding = 1,
+
+ .get_dev_fw_str = hns_roce_get_fw_ver,
+ .add_gid = hns_roce_add_gid,
+ .alloc_pd = hns_roce_alloc_pd,
+ .alloc_ucontext = hns_roce_alloc_ucontext,
+ .create_ah = hns_roce_create_ah,
+ .create_user_ah = hns_roce_create_ah,
+ .create_cq = hns_roce_create_cq,
+ .create_qp = hns_roce_create_qp,
+ .dealloc_pd = hns_roce_dealloc_pd,
+ .dealloc_ucontext = hns_roce_dealloc_ucontext,
+ .del_gid = hns_roce_del_gid,
+ .dereg_mr = hns_roce_dereg_mr,
+ .destroy_ah = hns_roce_destroy_ah,
+ .destroy_cq = hns_roce_destroy_cq,
+ .disassociate_ucontext = hns_roce_disassociate_ucontext,
+ .get_dma_mr = hns_roce_get_dma_mr,
+ .get_link_layer = hns_roce_get_link_layer,
+ .get_port_immutable = hns_roce_port_immutable,
+ .mmap = hns_roce_mmap,
+ .mmap_free = hns_roce_free_mmap,
+ .modify_device = hns_roce_modify_device,
+ .modify_qp = hns_roce_modify_qp,
+ .query_ah = hns_roce_query_ah,
+ .query_device = hns_roce_query_device,
+ .query_pkey = hns_roce_query_pkey,
+ .query_port = hns_roce_query_port,
+ .reg_user_mr = hns_roce_reg_user_mr,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, hns_roce_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, hns_roce_cq, ib_cq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, hns_roce_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, hns_roce_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, hns_roce_ucontext, ibucontext),
};
-MODULE_DEVICE_TABLE(of, hns_roce_of_match);
-static const struct acpi_device_id hns_roce_acpi_match[] = {
- { "HISI00D1", (kernel_ulong_t)&hns_roce_hw_v1 },
- {},
+static const struct ib_device_ops hns_roce_dev_hw_stats_ops = {
+ .alloc_hw_port_stats = hns_roce_alloc_hw_port_stats,
+ .get_hw_stats = hns_roce_get_hw_stats,
};
-MODULE_DEVICE_TABLE(acpi, hns_roce_acpi_match);
-static int hns_roce_node_match(struct device *dev, void *fwnode)
-{
- return dev->fwnode == fwnode;
-}
+static const struct ib_device_ops hns_roce_dev_mr_ops = {
+ .rereg_user_mr = hns_roce_rereg_user_mr,
+};
-static struct
-platform_device *hns_roce_find_pdev(struct fwnode_handle *fwnode)
-{
- struct device *dev;
+static const struct ib_device_ops hns_roce_dev_frmr_ops = {
+ .alloc_mr = hns_roce_alloc_mr,
+ .map_mr_sg = hns_roce_map_mr_sg,
+};
- /* get the 'device'corresponding to matching 'fwnode' */
- dev = bus_find_device(&platform_bus_type, NULL,
- fwnode, hns_roce_node_match);
- /* get the platform device */
- return dev ? to_platform_device(dev) : NULL;
-}
+static const struct ib_device_ops hns_roce_dev_srq_ops = {
+ .create_srq = hns_roce_create_srq,
+ .destroy_srq = hns_roce_destroy_srq,
+
+ INIT_RDMA_OBJ_SIZE(ib_srq, hns_roce_srq, ibsrq),
+};
+
+static const struct ib_device_ops hns_roce_dev_xrcd_ops = {
+ .alloc_xrcd = hns_roce_alloc_xrcd,
+ .dealloc_xrcd = hns_roce_dealloc_xrcd,
+
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, hns_roce_xrcd, ibxrcd),
+};
-static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
+static const struct ib_device_ops hns_roce_dev_restrack_ops = {
+ .fill_res_cq_entry = hns_roce_fill_res_cq_entry,
+ .fill_res_cq_entry_raw = hns_roce_fill_res_cq_entry_raw,
+ .fill_res_qp_entry = hns_roce_fill_res_qp_entry,
+ .fill_res_qp_entry_raw = hns_roce_fill_res_qp_entry_raw,
+ .fill_res_mr_entry = hns_roce_fill_res_mr_entry,
+ .fill_res_mr_entry_raw = hns_roce_fill_res_mr_entry_raw,
+ .fill_res_srq_entry = hns_roce_fill_res_srq_entry,
+ .fill_res_srq_entry_raw = hns_roce_fill_res_srq_entry_raw,
+};
+
+static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
{
- int i;
+ struct hns_roce_ib_iboe *iboe = NULL;
+ struct device *dev = hr_dev->dev;
+ struct ib_device *ib_dev = NULL;
+ struct net_device *net_dev;
+ unsigned int i;
int ret;
- u8 phy_port;
- int port_cnt = 0;
- struct device *dev = &hr_dev->pdev->dev;
- struct device_node *net_node;
- struct net_device *netdev = NULL;
- struct platform_device *pdev = NULL;
- struct resource *res;
-
- /* check if we are compatible with the underlying SoC */
- if (dev_of_node(dev)) {
- const struct of_device_id *of_id;
-
- of_id = of_match_node(hns_roce_of_match, dev->of_node);
- if (!of_id) {
- dev_err(dev, "device is not compatible!\n");
- return -ENXIO;
- }
- hr_dev->hw = (struct hns_roce_hw *)of_id->data;
- if (!hr_dev->hw) {
- dev_err(dev, "couldn't get H/W specific DT data!\n");
- return -ENXIO;
- }
- } else if (is_acpi_device_node(dev->fwnode)) {
- const struct acpi_device_id *acpi_id;
- acpi_id = acpi_match_device(hns_roce_acpi_match, dev);
- if (!acpi_id) {
- dev_err(dev, "device is not compatible!\n");
- return -ENXIO;
- }
- hr_dev->hw = (struct hns_roce_hw *) acpi_id->driver_data;
- if (!hr_dev->hw) {
- dev_err(dev, "couldn't get H/W specific ACPI data!\n");
- return -ENXIO;
- }
- } else {
- dev_err(dev, "can't read compatibility data from DT or ACPI\n");
- return -ENXIO;
- }
+ iboe = &hr_dev->iboe;
+ spin_lock_init(&iboe->lock);
- /* get the mapped register base address */
- res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(dev, "memory resource not found!\n");
- return -EINVAL;
+ ib_dev = &hr_dev->ib_dev;
+
+ ib_dev->node_type = RDMA_NODE_IB_CA;
+ ib_dev->dev.parent = dev;
+
+ ib_dev->phys_port_cnt = hr_dev->caps.num_ports;
+ ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey;
+ ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors;
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR)
+ ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR)
+ ib_set_device_ops(ib_dev, &hns_roce_dev_frmr_ops);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops);
+ ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops);
}
- hr_dev->reg_base = devm_ioremap_resource(dev, res);
- if (IS_ERR(hr_dev->reg_base))
- return PTR_ERR(hr_dev->reg_base);
- /* read the node_guid of IB device from the DT or ACPI */
- ret = device_property_read_u8_array(dev, "node-guid",
- (u8 *)&hr_dev->ib_dev.node_guid,
- GUID_LEN);
- if (ret) {
- dev_err(dev, "couldn't get node_guid from DT or ACPI!\n");
- return ret;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+ ib_set_device_ops(ib_dev, &hns_roce_dev_xrcd_ops);
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09 &&
+ !hr_dev->is_vf)
+ ib_set_device_ops(ib_dev, &hns_roce_dev_hw_stats_ops);
+
+ ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops);
+ ib_set_device_ops(ib_dev, &hns_roce_dev_ops);
+ ib_set_device_ops(ib_dev, &hns_roce_dev_restrack_ops);
+
+ dma_set_max_seg_size(dev, SZ_2G);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) {
+ ret = hns_roce_alloc_bond_grp(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to alloc bond_grp for bus %u, ret = %d\n",
+ get_hr_bus_num(hr_dev), ret);
+ return ret;
+ }
}
- /* get the RoCE associated ethernet ports or netdevices */
- for (i = 0; i < HNS_ROCE_MAX_PORTS; i++) {
- if (dev_of_node(dev)) {
- net_node = of_parse_phandle(dev->of_node, "eth-handle",
- i);
- if (!net_node)
- continue;
- pdev = of_find_device_by_node(net_node);
- } else if (is_acpi_device_node(dev->fwnode)) {
- struct acpi_reference_args args;
- struct fwnode_handle *fwnode;
-
- ret = acpi_node_get_property_reference(dev->fwnode,
- "eth-handle",
- i, &args);
- if (ret)
- continue;
- fwnode = acpi_fwnode_handle(args.adev);
- pdev = hns_roce_find_pdev(fwnode);
- } else {
- dev_err(dev, "cannot read data from DT or ACPI\n");
- return -ENXIO;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND &&
+ hns_roce_bond_is_active(hr_dev)) {
+ ret = hns_roce_bond_init(hr_dev);
+ if (ret) {
+ dev_err(dev, "failed to init bond!\n");
+ return ret;
}
+ ret = ib_register_device(ib_dev, "hns_bond_%d", dev);
+ } else {
+ for (i = 0; i < hr_dev->caps.num_ports; i++) {
+ net_dev = get_hr_netdev(hr_dev, i);
+ if (!net_dev)
+ continue;
- if (pdev) {
- netdev = platform_get_drvdata(pdev);
- phy_port = (u8)i;
- if (netdev) {
- hr_dev->iboe.netdevs[port_cnt] = netdev;
- hr_dev->iboe.phy_port[port_cnt] = phy_port;
- } else {
- dev_err(dev, "no netdev found with pdev %s\n",
- pdev->name);
- return -ENODEV;
- }
- port_cnt++;
+ ret = ib_device_set_netdev(ib_dev, net_dev, i + 1);
+ if (ret)
+ return ret;
}
+ ret = ib_register_device(ib_dev, "hns_%d", dev);
}
-
- if (port_cnt == 0) {
- dev_err(dev, "unable to get eth-handle for available ports!\n");
- return -EINVAL;
+ if (ret) {
+ dev_err(dev, "ib_register_device failed!\n");
+ return ret;
}
- hr_dev->caps.num_ports = port_cnt;
-
- /* cmd issue mode: 0 is poll, 1 is event */
- hr_dev->cmd_mod = 1;
- hr_dev->loop_idc = 0;
-
- /* read the interrupt names from the DT or ACPI */
- ret = device_property_read_string_array(dev, "interrupt-names",
- hr_dev->irq_names,
- HNS_ROCE_MAX_IRQ_NUM);
- if (ret < 0) {
- dev_err(dev, "couldn't get interrupt names from DT or ACPI!\n");
- return ret;
+ ret = hns_roce_setup_mtu_mac(hr_dev);
+ if (ret) {
+ dev_err(dev, "setup_mtu_mac failed!\n");
+ goto error_failed_setup_mtu_mac;
}
- /* fetch the interrupt numbers */
- for (i = 0; i < HNS_ROCE_MAX_IRQ_NUM; i++) {
- hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i);
- if (hr_dev->irq[i] <= 0) {
- dev_err(dev, "platform get of irq[=%d] failed!\n", i);
- return -EINVAL;
- }
+ iboe->nb.notifier_call = hns_roce_netdev_event;
+ ret = register_netdevice_notifier(&iboe->nb);
+ if (ret) {
+ dev_err(dev, "register_netdevice_notifier failed!\n");
+ goto error_failed_setup_mtu_mac;
}
+ hr_dev->active = true;
return 0;
+
+error_failed_setup_mtu_mac:
+ ib_unregister_device(ib_dev);
+
+ return ret;
}
static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
{
+ struct device *dev = hr_dev->dev;
int ret;
- struct device *dev = &hr_dev->pdev->dev;
-
- ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtt_table,
- HEM_TYPE_MTT, hr_dev->caps.mtt_entry_sz,
- hr_dev->caps.num_mtt_segs, 1);
- if (ret) {
- dev_err(dev, "Failed to init MTT context memory, aborting.\n");
- return ret;
- }
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table,
HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz,
- hr_dev->caps.num_mtpts, 1);
+ hr_dev->caps.num_mtpts);
if (ret) {
- dev_err(dev, "Failed to init MTPT context memory, aborting.\n");
- goto err_unmap_mtt;
+ dev_err(dev, "failed to init MTPT context memory, aborting.\n");
+ return ret;
}
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table,
- HEM_TYPE_QPC, hr_dev->caps.qpc_entry_sz,
- hr_dev->caps.num_qps, 1);
+ HEM_TYPE_QPC, hr_dev->caps.qpc_sz,
+ hr_dev->caps.num_qps);
if (ret) {
- dev_err(dev, "Failed to init QP context memory, aborting.\n");
+ dev_err(dev, "failed to init QP context memory, aborting.\n");
goto err_unmap_dmpt;
}
@@ -728,22 +915,122 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
HEM_TYPE_IRRL,
hr_dev->caps.irrl_entry_sz *
hr_dev->caps.max_qp_init_rdma,
- hr_dev->caps.num_qps, 1);
+ hr_dev->caps.num_qps);
if (ret) {
- dev_err(dev, "Failed to init irrl_table memory, aborting.\n");
+ dev_err(dev, "failed to init irrl_table memory, aborting.\n");
goto err_unmap_qp;
}
+ if (hr_dev->caps.trrl_entry_sz) {
+ ret = hns_roce_init_hem_table(hr_dev,
+ &hr_dev->qp_table.trrl_table,
+ HEM_TYPE_TRRL,
+ hr_dev->caps.trrl_entry_sz *
+ hr_dev->caps.max_qp_dest_rdma,
+ hr_dev->caps.num_qps);
+ if (ret) {
+ dev_err(dev,
+ "failed to init trrl_table memory, aborting.\n");
+ goto err_unmap_irrl;
+ }
+ }
+
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cq_table.table,
HEM_TYPE_CQC, hr_dev->caps.cqc_entry_sz,
- hr_dev->caps.num_cqs, 1);
+ hr_dev->caps.num_cqs);
if (ret) {
- dev_err(dev, "Failed to init CQ context memory, aborting.\n");
- goto err_unmap_irrl;
+ dev_err(dev, "failed to init CQ context memory, aborting.\n");
+ goto err_unmap_trrl;
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table,
+ HEM_TYPE_SRQC,
+ hr_dev->caps.srqc_entry_sz,
+ hr_dev->caps.num_srqs);
+ if (ret) {
+ dev_err(dev,
+ "failed to init SRQ context memory, aborting.\n");
+ goto err_unmap_cq;
+ }
+ }
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
+ ret = hns_roce_init_hem_table(hr_dev,
+ &hr_dev->qp_table.sccc_table,
+ HEM_TYPE_SCCC,
+ hr_dev->caps.sccc_sz,
+ hr_dev->caps.num_qps);
+ if (ret) {
+ dev_err(dev,
+ "failed to init SCC context memory, aborting.\n");
+ goto err_unmap_srq;
+ }
+ }
+
+ if (hr_dev->caps.qpc_timer_entry_sz) {
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qpc_timer_table,
+ HEM_TYPE_QPC_TIMER,
+ hr_dev->caps.qpc_timer_entry_sz,
+ hr_dev->caps.qpc_timer_bt_num);
+ if (ret) {
+ dev_err(dev,
+ "failed to init QPC timer memory, aborting.\n");
+ goto err_unmap_ctx;
+ }
+ }
+
+ if (hr_dev->caps.cqc_timer_entry_sz) {
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cqc_timer_table,
+ HEM_TYPE_CQC_TIMER,
+ hr_dev->caps.cqc_timer_entry_sz,
+ hr_dev->caps.cqc_timer_bt_num);
+ if (ret) {
+ dev_err(dev,
+ "failed to init CQC timer memory, aborting.\n");
+ goto err_unmap_qpc_timer;
+ }
+ }
+
+ if (hr_dev->caps.gmv_entry_sz) {
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->gmv_table,
+ HEM_TYPE_GMV,
+ hr_dev->caps.gmv_entry_sz,
+ hr_dev->caps.gmv_entry_num);
+ if (ret) {
+ dev_err(dev,
+ "failed to init gmv table memory, ret = %d\n",
+ ret);
+ goto err_unmap_cqc_timer;
+ }
}
return 0;
+err_unmap_cqc_timer:
+ if (hr_dev->caps.cqc_timer_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table);
+
+err_unmap_qpc_timer:
+ if (hr_dev->caps.qpc_timer_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table);
+
+err_unmap_ctx:
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->qp_table.sccc_table);
+err_unmap_srq:
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table);
+
+err_unmap_cq:
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
+
+err_unmap_trrl:
+ if (hr_dev->caps.trrl_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->qp_table.trrl_table);
+
err_unmap_irrl:
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table);
@@ -753,12 +1040,15 @@ err_unmap_qp:
err_unmap_dmpt:
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table);
-err_unmap_mtt:
- hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table);
-
return ret;
}
+static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev)
+{
+ hns_roce_cleanup_bitmap(hr_dev);
+ mutex_destroy(&hr_dev->pgdir_mutex);
+}
+
/**
* hns_roce_setup_hca - setup host channel adapter
* @hr_dev: pointer to hns roce device
@@ -766,114 +1056,144 @@ err_unmap_mtt:
*/
static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
{
+ struct device *dev = hr_dev->dev;
int ret;
- struct device *dev = &hr_dev->pdev->dev;
spin_lock_init(&hr_dev->sm_lock);
- spin_lock_init(&hr_dev->bt_cmd_lock);
- ret = hns_roce_init_uar_table(hr_dev);
- if (ret) {
- dev_err(dev, "Failed to initialize uar table. aborting\n");
- return ret;
- }
+ INIT_LIST_HEAD(&hr_dev->qp_list);
+ spin_lock_init(&hr_dev->qp_list_lock);
- ret = hns_roce_uar_alloc(hr_dev, &hr_dev->priv_uar);
- if (ret) {
- dev_err(dev, "Failed to allocate priv_uar.\n");
- goto err_uar_table_free;
- }
-
- ret = hns_roce_init_pd_table(hr_dev);
- if (ret) {
- dev_err(dev, "Failed to init protected domain table.\n");
- goto err_uar_alloc_free;
- }
+ INIT_LIST_HEAD(&hr_dev->pgdir_list);
+ mutex_init(&hr_dev->pgdir_mutex);
- ret = hns_roce_init_mr_table(hr_dev);
- if (ret) {
- dev_err(dev, "Failed to init memory region table.\n");
- goto err_pd_table_free;
- }
+ hns_roce_init_uar_table(hr_dev);
- ret = hns_roce_init_cq_table(hr_dev);
+ ret = hns_roce_uar_alloc(hr_dev, &hr_dev->priv_uar);
if (ret) {
- dev_err(dev, "Failed to init completion queue table.\n");
- goto err_mr_table_free;
+ dev_err(dev, "failed to allocate priv_uar.\n");
+ goto err_uar_table_free;
}
ret = hns_roce_init_qp_table(hr_dev);
if (ret) {
- dev_err(dev, "Failed to init queue pair table.\n");
- goto err_cq_table_free;
+ dev_err(dev, "failed to init qp_table.\n");
+ goto err_uar_table_free;
}
- return 0;
+ hns_roce_init_pd_table(hr_dev);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
+ hns_roce_init_xrcd_table(hr_dev);
-err_cq_table_free:
- hns_roce_cleanup_cq_table(hr_dev);
+ hns_roce_init_mr_table(hr_dev);
-err_mr_table_free:
- hns_roce_cleanup_mr_table(hr_dev);
+ hns_roce_init_cq_table(hr_dev);
-err_pd_table_free:
- hns_roce_cleanup_pd_table(hr_dev);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ hns_roce_init_srq_table(hr_dev);
-err_uar_alloc_free:
- hns_roce_uar_free(hr_dev, &hr_dev->priv_uar);
+ return 0;
err_uar_table_free:
- hns_roce_cleanup_uar_table(hr_dev);
+ ida_destroy(&hr_dev->uar_ida.ida);
+ mutex_destroy(&hr_dev->pgdir_mutex);
+
return ret;
}
-/**
- * hns_roce_probe - RoCE driver entrance
- * @pdev: pointer to platform device
- * Return : int
- *
- */
-static int hns_roce_probe(struct platform_device *pdev)
+static void check_and_get_armed_cq(struct list_head *cq_list, struct ib_cq *cq)
{
- int ret;
- struct hns_roce_dev *hr_dev;
- struct device *dev = &pdev->dev;
+ struct hns_roce_cq *hr_cq = to_hr_cq(cq);
+ unsigned long flags;
- hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev));
- if (!hr_dev)
- return -ENOMEM;
+ spin_lock_irqsave(&hr_cq->lock, flags);
+ if (cq->comp_handler) {
+ if (!hr_cq->is_armed) {
+ hr_cq->is_armed = 1;
+ list_add_tail(&hr_cq->node, cq_list);
+ }
+ }
+ spin_unlock_irqrestore(&hr_cq->lock, flags);
+}
- hr_dev->pdev = pdev;
- platform_set_drvdata(pdev, hr_dev);
+void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_qp *hr_qp;
+ struct hns_roce_cq *hr_cq;
+ struct list_head cq_list;
+ unsigned long flags_qp;
+ unsigned long flags;
+
+ INIT_LIST_HEAD(&cq_list);
- if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64ULL)) &&
- dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32ULL))) {
- dev_err(dev, "Not usable DMA addressing mode\n");
- ret = -EIO;
- goto error_failed_get_cfg;
+ spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
+ list_for_each_entry(hr_qp, &hr_dev->qp_list, node) {
+ spin_lock_irqsave(&hr_qp->sq.lock, flags_qp);
+ if (hr_qp->sq.tail != hr_qp->sq.head)
+ check_and_get_armed_cq(&cq_list, hr_qp->ibqp.send_cq);
+ spin_unlock_irqrestore(&hr_qp->sq.lock, flags_qp);
+
+ spin_lock_irqsave(&hr_qp->rq.lock, flags_qp);
+ if ((!hr_qp->ibqp.srq) && (hr_qp->rq.tail != hr_qp->rq.head))
+ check_and_get_armed_cq(&cq_list, hr_qp->ibqp.recv_cq);
+ spin_unlock_irqrestore(&hr_qp->rq.lock, flags_qp);
}
- ret = hns_roce_get_cfg(hr_dev);
- if (ret) {
- dev_err(dev, "Get Configuration failed!\n");
- goto error_failed_get_cfg;
+ list_for_each_entry(hr_cq, &cq_list, node)
+ hns_roce_cq_completion(hr_dev, hr_cq->cqn);
+
+ spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
+}
+
+static int hns_roce_alloc_dfx_cnt(struct hns_roce_dev *hr_dev)
+{
+ hr_dev->dfx_cnt = kvcalloc(HNS_ROCE_DFX_CNT_TOTAL, sizeof(atomic64_t),
+ GFP_KERNEL);
+ if (!hr_dev->dfx_cnt)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void hns_roce_dealloc_dfx_cnt(struct hns_roce_dev *hr_dev)
+{
+ kvfree(hr_dev->dfx_cnt);
+}
+
+int hns_roce_init(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ hr_dev->is_reset = false;
+
+ ret = hns_roce_alloc_dfx_cnt(hr_dev);
+ if (ret)
+ return ret;
+
+ if (hr_dev->hw->cmq_init) {
+ ret = hr_dev->hw->cmq_init(hr_dev);
+ if (ret) {
+ dev_err(dev, "init RoCE Command Queue failed!\n");
+ goto error_failed_alloc_dfx_cnt;
+ }
}
- ret = hr_dev->hw->reset(hr_dev, true);
+ ret = hr_dev->hw->hw_profile(hr_dev);
if (ret) {
- dev_err(dev, "Reset RoCE engine failed!\n");
- goto error_failed_get_cfg;
+ dev_err(dev, "get RoCE engine profile failed!\n");
+ goto error_failed_cmd_init;
}
- hr_dev->hw->hw_profile(hr_dev);
-
ret = hns_roce_cmd_init(hr_dev);
if (ret) {
dev_err(dev, "cmd init failed!\n");
goto error_failed_cmd_init;
}
- ret = hns_roce_init_eq_table(hr_dev);
+ /* EQ depends on poll mode, event mode depends on EQ */
+ ret = hr_dev->hw->init_eq(hr_dev);
if (ret) {
dev_err(dev, "eq init failed!\n");
goto error_failed_eq_table;
@@ -881,10 +1201,9 @@ static int hns_roce_probe(struct platform_device *pdev)
if (hr_dev->cmd_mod) {
ret = hns_roce_cmd_use_events(hr_dev);
- if (ret) {
- dev_err(dev, "Switch to event-driven cmd failed!\n");
- goto error_failed_use_event;
- }
+ if (ret)
+ dev_warn(dev,
+ "Cmd event mode failed, set back to poll!\n");
}
ret = hns_roce_init_hem(hr_dev);
@@ -899,23 +1218,28 @@ static int hns_roce_probe(struct platform_device *pdev)
goto error_failed_setup_hca;
}
- ret = hr_dev->hw->hw_init(hr_dev);
- if (ret) {
- dev_err(dev, "hw_init failed!\n");
- goto error_failed_engine_init;
+ if (hr_dev->hw->hw_init) {
+ ret = hr_dev->hw->hw_init(hr_dev);
+ if (ret) {
+ dev_err(dev, "hw_init failed!\n");
+ goto error_failed_engine_init;
+ }
}
ret = hns_roce_register_device(hr_dev);
if (ret)
goto error_failed_register_device;
+ hns_roce_register_debugfs(hr_dev);
+
return 0;
error_failed_register_device:
- hr_dev->hw->hw_exit(hr_dev);
+ if (hr_dev->hw->hw_exit)
+ hr_dev->hw->hw_exit(hr_dev);
error_failed_engine_init:
- hns_roce_cleanup_bitmap(hr_dev);
+ hns_roce_teardown_hca(hr_dev);
error_failed_setup_hca:
hns_roce_cleanup_hem(hr_dev);
@@ -923,61 +1247,41 @@ error_failed_setup_hca:
error_failed_init_hem:
if (hr_dev->cmd_mod)
hns_roce_cmd_use_polling(hr_dev);
-
-error_failed_use_event:
- hns_roce_cleanup_eq_table(hr_dev);
+ hr_dev->hw->cleanup_eq(hr_dev);
error_failed_eq_table:
hns_roce_cmd_cleanup(hr_dev);
error_failed_cmd_init:
- ret = hr_dev->hw->reset(hr_dev, false);
- if (ret)
- dev_err(&hr_dev->pdev->dev, "roce_engine reset fail\n");
+ if (hr_dev->hw->cmq_exit)
+ hr_dev->hw->cmq_exit(hr_dev);
-error_failed_get_cfg:
- ib_dealloc_device(&hr_dev->ib_dev);
+error_failed_alloc_dfx_cnt:
+ hns_roce_dealloc_dfx_cnt(hr_dev);
return ret;
}
-/**
- * hns_roce_remove - remove RoCE device
- * @pdev: pointer to platform device
- */
-static int hns_roce_remove(struct platform_device *pdev)
+void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup)
{
- struct hns_roce_dev *hr_dev = platform_get_drvdata(pdev);
+ hns_roce_unregister_debugfs(hr_dev);
+ hns_roce_unregister_device(hr_dev, bond_cleanup);
- hns_roce_unregister_device(hr_dev);
- hr_dev->hw->hw_exit(hr_dev);
- hns_roce_cleanup_bitmap(hr_dev);
+ if (hr_dev->hw->hw_exit)
+ hr_dev->hw->hw_exit(hr_dev);
+ hns_roce_teardown_hca(hr_dev);
hns_roce_cleanup_hem(hr_dev);
if (hr_dev->cmd_mod)
hns_roce_cmd_use_polling(hr_dev);
- hns_roce_cleanup_eq_table(hr_dev);
+ hr_dev->hw->cleanup_eq(hr_dev);
hns_roce_cmd_cleanup(hr_dev);
- hr_dev->hw->reset(hr_dev, false);
-
- ib_dealloc_device(&hr_dev->ib_dev);
-
- return 0;
+ if (hr_dev->hw->cmq_exit)
+ hr_dev->hw->cmq_exit(hr_dev);
+ hns_roce_dealloc_dfx_cnt(hr_dev);
}
-static struct platform_driver hns_roce_driver = {
- .probe = hns_roce_probe,
- .remove = hns_roce_remove,
- .driver = {
- .name = DRV_NAME,
- .of_match_table = hns_roce_of_match,
- .acpi_match_table = ACPI_PTR(hns_roce_acpi_match),
- },
-};
-
-module_platform_driver(hns_roce_driver);
-
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Wei Hu <xavier.huwei@huawei.com>");
MODULE_AUTHOR("Nenglong Zhao <zhaonenglong@hisilicon.com>");
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 4139abee3b54..31cb8699e198 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -31,15 +31,18 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
+#include <linux/vmalloc.h>
+#include <linux/count_zeros.h>
#include <rdma/ib_umem.h>
+#include <linux/math.h>
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
+#include "hns_roce_trace.h"
-static u32 hw_index_to_key(unsigned long ind)
+static u32 hw_index_to_key(int ind)
{
- return (u32)(ind >> 24) | (ind << 8);
+ return ((u32)ind >> 24) | ((u32)ind << 8);
}
unsigned long key_to_hw_index(u32 key)
@@ -47,580 +50,1055 @@ unsigned long key_to_hw_index(u32 key)
return (key << 24) | (key >> 8);
}
-static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long mpt_index)
+static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
{
- return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
- HNS_ROCE_CMD_SW2HW_MPT,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int err;
+ int id;
+
+ /* Allocate a key for mr from mr_table */
+ id = ida_alloc_range(&mtpt_ida->ida, mtpt_ida->min, mtpt_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(ibdev, "failed to alloc id for MR key, id(%d)\n", id);
+ return -ENOMEM;
+ }
+
+ mr->key = hw_index_to_key(id); /* MR key */
+
+ err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table,
+ (unsigned long)id);
+ if (err) {
+ ibdev_err(ibdev, "failed to alloc mtpt, ret = %d.\n", err);
+ goto err_free_bitmap;
+ }
+
+ return 0;
+err_free_bitmap:
+ ida_free(&mtpt_ida->ida, id);
+ return err;
}
-int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
- struct hns_roce_cmd_mailbox *mailbox,
- unsigned long mpt_index)
+static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
{
- return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0,
- mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT,
- HNS_ROCE_CMD_TIMEOUT_MSECS);
+ unsigned long obj = key_to_hw_index(mr->key);
+
+ hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj);
+ ida_free(&hr_dev->mr_table.mtpt_ida.ida, (int)obj);
}
-static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
- unsigned long *seg)
+static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
+ struct ib_udata *udata, u64 start)
{
- int o;
- u32 m;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ bool is_fast = mr->type == MR_TYPE_FRMR;
+ struct hns_roce_buf_attr buf_attr = {};
+ int err;
+
+ mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num;
+ buf_attr.page_shift = is_fast ? PAGE_SHIFT :
+ hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT;
+ buf_attr.region[0].size = mr->size;
+ buf_attr.region[0].hopnum = mr->pbl_hop_num;
+ buf_attr.region_count = 1;
+ buf_attr.user_access = mr->access;
+ /* fast MR's buffer is alloced before mapping, not at creation */
+ buf_attr.mtt_only = is_fast;
+ buf_attr.iova = mr->iova;
+ /* pagesize and hopnum is fixed for fast MR */
+ buf_attr.adaptive = !is_fast;
+ buf_attr.type = MTR_PBL;
+
+ err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr,
+ hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT,
+ udata, start);
+ if (err) {
+ ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err);
+ return err;
+ }
- spin_lock(&buddy->lock);
+ mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count;
+ mr->pbl_hop_num = buf_attr.region[0].hopnum;
- for (o = order; o <= buddy->max_order; ++o) {
- if (buddy->num_free[o]) {
- m = 1 << (buddy->max_order - o);
- *seg = find_first_bit(buddy->bits[o], m);
- if (*seg < m)
- goto found;
- }
+ return err;
+}
+
+static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
+{
+ hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr);
+}
+
+static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ if (mr->enabled) {
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
+ key_to_hw_index(mr->key) &
+ (hr_dev->caps.num_mtpts - 1));
+ if (ret)
+ ibdev_warn_ratelimited(ibdev, "failed to destroy mpt, ret = %d.\n",
+ ret);
}
- spin_unlock(&buddy->lock);
- return -1;
- found:
- clear_bit(*seg, buddy->bits[o]);
- --buddy->num_free[o];
+ free_mr_pbl(hr_dev, mr);
+ free_mr_key(hr_dev, mr);
+}
- while (o > order) {
- --o;
- *seg <<= 1;
- set_bit(*seg ^ 1, buddy->bits[o]);
- ++buddy->num_free[o];
+static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mr *mr)
+{
+ unsigned long mtpt_idx = key_to_hw_index(mr->key);
+ struct hns_roce_cmd_mailbox *mailbox;
+ struct device *dev = hr_dev->dev;
+ int ret;
+
+ /* Allocate mailbox memory */
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ trace_hns_mr(mr);
+ if (mr->type != MR_TYPE_FRMR)
+ ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr);
+ else
+ ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
+ if (ret) {
+ dev_err(dev, "failed to write mtpt, ret = %d.\n", ret);
+ goto err_page;
}
- spin_unlock(&buddy->lock);
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
+ mtpt_idx & (hr_dev->caps.num_mtpts - 1));
+ if (ret) {
+ dev_err(dev, "failed to create mpt, ret = %d.\n", ret);
+ goto err_page;
+ }
- *seg <<= order;
- return 0;
+ mr->enabled = 1;
+
+err_page:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+
+ return ret;
+}
+
+void hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida;
+
+ ida_init(&mtpt_ida->ida);
+ mtpt_ida->max = hr_dev->caps.num_mtpts - 1;
+ mtpt_ida->min = hr_dev->caps.reserved_mrws;
}
-static void hns_roce_buddy_free(struct hns_roce_buddy *buddy, unsigned long seg,
- int order)
+struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
{
- seg >>= order;
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct hns_roce_mr *mr;
+ int ret;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->type = MR_TYPE_DMA;
+ mr->pd = to_hr_pd(pd)->pdn;
+ mr->access = acc;
+
+ /* Allocate memory region key */
+ hns_roce_hem_list_init(&mr->pbl_mtr.hem_list);
+ ret = alloc_mr_key(hr_dev, mr);
+ if (ret)
+ goto err_free;
- spin_lock(&buddy->lock);
+ ret = hns_roce_mr_enable(hr_dev, mr);
+ if (ret)
+ goto err_mr;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+
+ return &mr->ibmr;
+err_mr:
+ free_mr_key(hr_dev, mr);
- while (test_bit(seg ^ 1, buddy->bits[order])) {
- clear_bit(seg ^ 1, buddy->bits[order]);
- --buddy->num_free[order];
- seg >>= 1;
- ++order;
+err_free:
+ kfree(mr);
+ return ERR_PTR(ret);
+}
+
+struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct hns_roce_mr *mr;
+ int ret;
+
+ if (dmah) {
+ ret = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ ret = -ENOMEM;
+ goto err_out;
}
- set_bit(seg, buddy->bits[order]);
- ++buddy->num_free[order];
+ mr->iova = virt_addr;
+ mr->size = length;
+ mr->pd = to_hr_pd(pd)->pdn;
+ mr->access = access_flags;
+ mr->type = MR_TYPE_MR;
+
+ ret = alloc_mr_key(hr_dev, mr);
+ if (ret)
+ goto err_alloc_mr;
+
+ ret = alloc_mr_pbl(hr_dev, mr, udata, start);
+ if (ret)
+ goto err_alloc_key;
+
+ ret = hns_roce_mr_enable(hr_dev, mr);
+ if (ret)
+ goto err_alloc_pbl;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+
+ return &mr->ibmr;
+
+err_alloc_pbl:
+ free_mr_pbl(hr_dev, mr);
+err_alloc_key:
+ free_mr_key(hr_dev, mr);
+err_alloc_mr:
+ kfree(mr);
+err_out:
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MR_REG_ERR_CNT]);
- spin_unlock(&buddy->lock);
+ return ERR_PTR(ret);
}
-static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
+struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata)
{
- int i, s;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+ struct ib_device *ib_dev = &hr_dev->ib_dev;
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
+ struct hns_roce_cmd_mailbox *mailbox;
+ unsigned long mtpt_idx;
+ int ret;
- buddy->max_order = max_order;
- spin_lock_init(&buddy->lock);
+ if (!mr->enabled) {
+ ret = -EINVAL;
+ goto err_out;
+ }
- buddy->bits = kzalloc((buddy->max_order + 1) * sizeof(long *),
- GFP_KERNEL);
- buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof(int *),
- GFP_KERNEL);
- if (!buddy->bits || !buddy->num_free)
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ ret = PTR_ERR_OR_ZERO(mailbox);
+ if (ret)
goto err_out;
- for (i = 0; i <= buddy->max_order; ++i) {
- s = BITS_TO_LONGS(1 << (buddy->max_order - i));
- buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL |
- __GFP_NOWARN);
- if (!buddy->bits[i]) {
- buddy->bits[i] = vzalloc(s * sizeof(long));
- if (!buddy->bits[i])
- goto err_out_free;
+ mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT,
+ mtpt_idx);
+ if (ret)
+ goto free_cmd_mbox;
+
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
+ mtpt_idx);
+ if (ret)
+ ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret);
+
+ mr->enabled = 0;
+ mr->iova = virt_addr;
+ mr->size = length;
+
+ if (flags & IB_MR_REREG_PD)
+ mr->pd = to_hr_pd(pd)->pdn;
+
+ if (flags & IB_MR_REREG_ACCESS)
+ mr->access = mr_access_flags;
+
+ if (flags & IB_MR_REREG_TRANS) {
+ free_mr_pbl(hr_dev, mr);
+ ret = alloc_mr_pbl(hr_dev, mr, udata, start);
+ if (ret) {
+ ibdev_err(ib_dev, "failed to alloc mr PBL, ret = %d.\n",
+ ret);
+ goto free_cmd_mbox;
}
}
- set_bit(0, buddy->bits[buddy->max_order]);
- buddy->num_free[buddy->max_order] = 1;
+ ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, mailbox->buf);
+ if (ret) {
+ ibdev_err(ib_dev, "failed to write mtpt, ret = %d.\n", ret);
+ goto free_cmd_mbox;
+ }
- return 0;
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
+ mtpt_idx);
+ if (ret) {
+ ibdev_err(ib_dev, "failed to create MPT, ret = %d.\n", ret);
+ goto free_cmd_mbox;
+ }
+
+ mr->enabled = 1;
-err_out_free:
- for (i = 0; i <= buddy->max_order; ++i)
- kvfree(buddy->bits[i]);
+free_cmd_mbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
err_out:
- kfree(buddy->bits);
- kfree(buddy->num_free);
- return -ENOMEM;
+ if (ret) {
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MR_REREG_ERR_CNT]);
+ return ERR_PTR(ret);
+ }
+
+ return NULL;
}
-static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy)
+int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
- int i;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
- for (i = 0; i <= buddy->max_order; ++i)
- kvfree(buddy->bits[i]);
+ if (hr_dev->hw->dereg_mr)
+ hr_dev->hw->dereg_mr(hr_dev);
- kfree(buddy->bits);
- kfree(buddy->num_free);
+ hns_roce_mr_free(hr_dev, mr);
+ kfree(mr);
+
+ return 0;
}
-static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
- unsigned long *seg)
+struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
{
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
- int ret = 0;
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct device *dev = hr_dev->dev;
+ struct hns_roce_mr *mr;
+ int ret;
- ret = hns_roce_buddy_alloc(&mr_table->mtt_buddy, order, seg);
- if (ret == -1)
- return -1;
+ if (mr_type != IB_MR_TYPE_MEM_REG)
+ return ERR_PTR(-EINVAL);
- if (hns_roce_table_get_range(hr_dev, &mr_table->mtt_table, *seg,
- *seg + (1 << order) - 1)) {
- hns_roce_buddy_free(&mr_table->mtt_buddy, *seg, order);
- return -1;
+ if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
+ dev_err(dev, "max_num_sg larger than %d\n",
+ HNS_ROCE_FRMR_MAX_PA);
+ return ERR_PTR(-EINVAL);
}
- return 0;
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->type = MR_TYPE_FRMR;
+ mr->pd = to_hr_pd(pd)->pdn;
+ mr->size = max_num_sg * (1 << PAGE_SHIFT);
+
+ /* Allocate memory region key */
+ ret = alloc_mr_key(hr_dev, mr);
+ if (ret)
+ goto err_free;
+
+ ret = alloc_mr_pbl(hr_dev, mr, NULL, 0);
+ if (ret)
+ goto err_key;
+
+ ret = hns_roce_mr_enable(hr_dev, mr);
+ if (ret)
+ goto err_pbl;
+
+ mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+ mr->ibmr.length = mr->size;
+
+ return &mr->ibmr;
+
+err_pbl:
+ free_mr_pbl(hr_dev, mr);
+err_key:
+ free_mr_key(hr_dev, mr);
+err_free:
+ kfree(mr);
+ return ERR_PTR(ret);
}
-int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
- struct hns_roce_mtt *mtt)
+static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
{
- int ret = 0;
- int i;
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
- /* Page num is zero, correspond to DMA memory register */
- if (!npages) {
- mtt->order = -1;
- mtt->page_shift = HNS_ROCE_HEM_PAGE_SHIFT;
+ if (likely(mr->npages < mr->pbl_mtr.hem_cfg.buf_pg_count)) {
+ mr->page_list[mr->npages++] = addr;
return 0;
}
- /* Note: if page_shift is zero, FAST memory regsiter */
- mtt->page_shift = page_shift;
+ return -ENOBUFS;
+}
- /* Compute MTT entry necessary */
- for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages;
- i <<= 1)
- ++mtt->order;
+int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset_p)
+{
+ unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_mr *mr = to_hr_mr(ibmr);
+ struct hns_roce_mtr *mtr = &mr->pbl_mtr;
+ int ret, sg_num = 0;
+
+ if (!IS_ALIGNED(sg_offset, HNS_ROCE_FRMR_ALIGN_SIZE) ||
+ ibmr->page_size < HNS_HW_PAGE_SIZE ||
+ ibmr->page_size > HNS_HW_MAX_PAGE_SIZE)
+ return sg_num;
+
+ mr->npages = 0;
+ mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count,
+ sizeof(dma_addr_t), GFP_KERNEL);
+ if (!mr->page_list)
+ return sg_num;
+
+ sg_num = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset_p, hns_roce_set_page);
+ if (sg_num < 1) {
+ ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n",
+ mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, sg_num);
+ goto err_page_list;
+ }
- /* Allocate MTT entry */
- ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg);
- if (ret == -1)
- return -ENOMEM;
+ mtr->hem_cfg.region[0].offset = 0;
+ mtr->hem_cfg.region[0].count = mr->npages;
+ mtr->hem_cfg.region[0].hopnum = mr->pbl_hop_num;
+ mtr->hem_cfg.region_count = 1;
+ ret = hns_roce_mtr_map(hr_dev, mtr, mr->page_list, mr->npages);
+ if (ret) {
+ ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret);
+ sg_num = 0;
+ } else {
+ mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size);
+ }
- return 0;
+err_page_list:
+ kvfree(mr->page_list);
+ mr->page_list = NULL;
+
+ return sg_num;
}
-void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
+static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_region *region, dma_addr_t *pages,
+ int max_count)
{
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
+ int count, npage;
+ int offset, end;
+ __le64 *mtts;
+ u64 addr;
+ int i;
- if (mtt->order < 0)
- return;
+ offset = region->offset;
+ end = offset + region->count;
+ npage = 0;
+ while (offset < end && npage < max_count) {
+ count = 0;
+ mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
+ offset, &count);
+ if (!mtts)
+ return -ENOBUFS;
+
+ for (i = 0; i < count && npage < max_count; i++) {
+ addr = pages[npage];
+
+ mtts[i] = cpu_to_le64(addr);
+ npage++;
+ }
+ offset += count;
+ }
- hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order);
- hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, mtt->first_seg,
- mtt->first_seg + (1 << mtt->order) - 1);
+ return npage;
}
-static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
- u64 size, u32 access, int npages,
- struct hns_roce_mr *mr)
+static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr)
{
- unsigned long index = 0;
- int ret = 0;
- struct device *dev = &hr_dev->pdev->dev;
+ int i;
- /* Allocate a key for mr from mr_table */
- ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
- if (ret == -1)
- return -ENOMEM;
+ for (i = 0; i < attr->region_count; i++)
+ if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 &&
+ attr->region[i].hopnum > 0)
+ return true;
- mr->iova = iova; /* MR va starting addr */
- mr->size = size; /* MR addr range */
- mr->pd = pd; /* MR num */
- mr->access = access; /* MR access permit */
- mr->enabled = 0; /* MR active status */
- mr->key = hw_index_to_key(index); /* MR key */
-
- if (size == ~0ull) {
- mr->type = MR_TYPE_DMA;
- mr->pbl_buf = NULL;
- mr->pbl_dma_addr = 0;
- } else {
- mr->type = MR_TYPE_MR;
- mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
- &(mr->pbl_dma_addr),
- GFP_KERNEL);
- if (!mr->pbl_buf)
+ /* because the mtr only one root base address, when hopnum is 0 means
+ * root base address equals the first buffer address, thus all alloced
+ * memory must in a continuous space accessed by direct mode.
+ */
+ return false;
+}
+
+static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr)
+{
+ size_t size = 0;
+ int i;
+
+ for (i = 0; i < attr->region_count; i++)
+ size += attr->region[i].size;
+
+ return size;
+}
+
+/*
+ * check the given pages in continuous address space
+ * Returns 0 on success, or the error page num.
+ */
+static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count,
+ unsigned int page_shift)
+{
+ size_t page_size = 1 << page_shift;
+ int i;
+
+ for (i = 1; i < page_count; i++)
+ if (pages[i] - pages[i - 1] != page_size)
+ return i;
+
+ return 0;
+}
+
+static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
+{
+ /* release user buffers */
+ if (mtr->umem) {
+ ib_umem_release(mtr->umem);
+ mtr->umem = NULL;
+ }
+
+ /* release kernel buffers */
+ if (mtr->kmem) {
+ hns_roce_buf_free(hr_dev, mtr->kmem);
+ mtr->kmem = NULL;
+ }
+}
+
+static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr,
+ struct ib_udata *udata, unsigned long user_addr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ size_t total_size;
+
+ total_size = mtr_bufs_size(buf_attr);
+
+ if (udata) {
+ mtr->kmem = NULL;
+ mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
+ buf_attr->user_access);
+ if (IS_ERR(mtr->umem)) {
+ ibdev_err(ibdev, "failed to get umem, ret = %pe.\n",
+ mtr->umem);
return -ENOMEM;
+ }
+ } else {
+ mtr->umem = NULL;
+ mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size,
+ buf_attr->page_shift,
+ !mtr_has_mtt(buf_attr) ?
+ HNS_ROCE_BUF_DIRECT : 0);
+ if (IS_ERR(mtr->kmem)) {
+ ibdev_err(ibdev, "failed to alloc kmem, ret = %pe.\n",
+ mtr->kmem);
+ return PTR_ERR(mtr->kmem);
+ }
}
return 0;
}
-static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
- struct hns_roce_mr *mr)
+static int cal_mtr_pg_cnt(struct hns_roce_mtr *mtr)
{
- struct device *dev = &hr_dev->pdev->dev;
- int npages = 0;
- int ret;
+ struct hns_roce_buf_region *region;
+ int page_cnt = 0;
+ int i;
- if (mr->enabled) {
- ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key)
- & (hr_dev->caps.num_mtpts - 1));
- if (ret)
- dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret);
+ for (i = 0; i < mtr->hem_cfg.region_count; i++) {
+ region = &mtr->hem_cfg.region[i];
+ page_cnt += region->count;
}
- if (mr->size != ~0ULL) {
- npages = ib_umem_page_count(mr->umem);
- dma_free_coherent(dev, (unsigned int)(npages * 8), mr->pbl_buf,
- mr->pbl_dma_addr);
- }
+ return page_cnt;
+}
- hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
- key_to_hw_index(mr->key), BITMAP_NO_RR);
+static bool need_split_huge_page(struct hns_roce_mtr *mtr)
+{
+ /* When HEM buffer uses 0-level addressing, the page size is
+ * equal to the whole buffer size. If the current MTR has multiple
+ * regions, we split the buffer into small pages(4k, required by hns
+ * ROCEE). These pages will be used in multiple regions.
+ */
+ return mtr->hem_cfg.is_direct && mtr->hem_cfg.region_count > 1;
}
-static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
- struct hns_roce_mr *mr)
+static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int page_count = cal_mtr_pg_cnt(mtr);
+ unsigned int page_shift;
+ dma_addr_t *pages;
+ int npage;
int ret;
- unsigned long mtpt_idx = key_to_hw_index(mr->key);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_cmd_mailbox *mailbox;
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
- /* Prepare HEM entry memory */
- ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
- if (ret)
- return ret;
+ page_shift = need_split_huge_page(mtr) ? HNS_HW_PAGE_SHIFT :
+ mtr->hem_cfg.buf_pg_shift;
+ /* alloc a tmp array to store buffer's dma address */
+ pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
- /* Allocate mailbox memory */
- mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox)) {
- ret = PTR_ERR(mailbox);
- goto err_table;
+ if (mtr->umem)
+ npage = hns_roce_get_umem_bufs(pages, page_count,
+ mtr->umem, page_shift);
+ else
+ npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count,
+ mtr->kmem, page_shift);
+
+ if (npage != page_count) {
+ ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage,
+ page_count);
+ ret = -ENOBUFS;
+ goto err_alloc_list;
}
- ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
- if (ret) {
- dev_err(dev, "Write mtpt fail!\n");
- goto err_page;
- }
-
- ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
- mtpt_idx & (hr_dev->caps.num_mtpts - 1));
- if (ret) {
- dev_err(dev, "SW2HW_MPT failed (%d)\n", ret);
- goto err_page;
+ if (need_split_huge_page(mtr) && npage > 1) {
+ ret = mtr_check_direct_pages(pages, npage, page_shift);
+ if (ret) {
+ ibdev_err(ibdev, "failed to check %s page: %d / %d.\n",
+ mtr->umem ? "umtr" : "kmtr", ret, npage);
+ ret = -ENOBUFS;
+ goto err_alloc_list;
+ }
}
- mr->enabled = 1;
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
-
- return 0;
+ ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count);
+ if (ret)
+ ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret);
-err_page:
- hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+err_alloc_list:
+ kvfree(pages);
-err_table:
- hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
return ret;
}
-static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, u32 start_index,
- u32 npages, u64 *page_list)
+int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ dma_addr_t *pages, unsigned int page_cnt)
{
- u32 i = 0;
- __le64 *mtts = NULL;
- dma_addr_t dma_handle;
- u32 s = start_index * sizeof(u64);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_region *r;
+ unsigned int i, mapped_cnt;
+ int ret = 0;
- /* All MTTs must fit in the same page */
- if (start_index / (PAGE_SIZE / sizeof(u64)) !=
- (start_index + npages - 1) / (PAGE_SIZE / sizeof(u64)))
- return -EINVAL;
+ /*
+ * Only use the first page address as root ba when hopnum is 0, this
+ * is because the addresses of all pages are consecutive in this case.
+ */
+ if (mtr->hem_cfg.is_direct) {
+ mtr->hem_cfg.root_ba = pages[0];
+ return 0;
+ }
- if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1))
- return -EINVAL;
+ for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count &&
+ mapped_cnt < page_cnt; i++) {
+ r = &mtr->hem_cfg.region[i];
- mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table,
- mtt->first_seg + s / hr_dev->caps.mtt_entry_sz,
- &dma_handle);
- if (!mtts)
- return -ENOMEM;
+ if (r->offset + r->count > page_cnt) {
+ ret = -EINVAL;
+ ibdev_err(ibdev,
+ "failed to check mtr%u count %u + %u > %u.\n",
+ i, r->offset, r->count, page_cnt);
+ return ret;
+ }
+
+ ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset],
+ page_cnt - mapped_cnt);
+ if (ret < 0) {
+ ibdev_err(ibdev,
+ "failed to map mtr%u offset %u, ret = %d.\n",
+ i, r->offset, ret);
+ return ret;
+ }
+ mapped_cnt += ret;
+ ret = 0;
+ }
- /* Save page addr, low 12 bits : 0 */
- for (i = 0; i < npages; ++i)
- mtts[i] = (cpu_to_le64(page_list[i])) >> PAGE_ADDR_SHIFT;
+ if (mapped_cnt < page_cnt) {
+ ret = -ENOBUFS;
+ ibdev_err(ibdev, "failed to map mtr pages count: %u < %u.\n",
+ mapped_cnt, page_cnt);
+ }
- return 0;
+ return ret;
}
-static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, u32 start_index,
- u32 npages, u64 *page_list)
+static int hns_roce_get_direct_addr_mtt(struct hns_roce_hem_cfg *cfg,
+ u32 start_index, u64 *mtt_buf,
+ int mtt_cnt)
{
- int chunk;
- int ret;
+ int mtt_count;
+ int total = 0;
+ u32 npage;
+ u64 addr;
- if (mtt->order < 0)
+ if (mtt_cnt > cfg->region_count)
return -EINVAL;
- while (npages > 0) {
- chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages);
+ for (mtt_count = 0; mtt_count < cfg->region_count && total < mtt_cnt;
+ mtt_count++) {
+ npage = cfg->region[mtt_count].offset;
+ if (npage < start_index)
+ continue;
- ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk,
- page_list);
- if (ret)
- return ret;
+ addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
+ mtt_buf[total] = addr;
- npages -= chunk;
- start_index += chunk;
- page_list += chunk;
+ total++;
}
+ if (!total)
+ return -ENOENT;
+
return 0;
}
-int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, struct hns_roce_buf *buf)
+static int hns_roce_get_mhop_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr, u32 start_index,
+ u64 *mtt_buf, int mtt_cnt)
{
- u32 i = 0;
- int ret = 0;
- u64 *page_list = NULL;
+ int left = mtt_cnt;
+ int total = 0;
+ int mtt_count;
+ __le64 *mtts;
+ u32 npage;
+
+ while (left > 0) {
+ mtt_count = 0;
+ mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
+ start_index + total,
+ &mtt_count);
+ if (!mtts || !mtt_count)
+ break;
+
+ npage = min(mtt_count, left);
+ left -= npage;
+ for (mtt_count = 0; mtt_count < npage; mtt_count++)
+ mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]);
+ }
- page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL);
- if (!page_list)
- return -ENOMEM;
+ if (!total)
+ return -ENOENT;
- for (i = 0; i < buf->npages; ++i) {
- if (buf->nbufs == 1)
- page_list[i] = buf->direct.map + (i << buf->page_shift);
- else
- page_list[i] = buf->page_list[i].map;
+ return 0;
+}
- }
- ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list);
+int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ u32 offset, u64 *mtt_buf, int mtt_max)
+{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ u32 start_index;
+ int ret;
- kfree(page_list);
+ if (!mtt_buf || mtt_max < 1)
+ return -EINVAL;
+ /* no mtt memory in direct mode, so just return the buffer address */
+ if (cfg->is_direct) {
+ start_index = offset >> HNS_HW_PAGE_SHIFT;
+ ret = hns_roce_get_direct_addr_mtt(cfg, start_index,
+ mtt_buf, mtt_max);
+ } else {
+ start_index = offset >> cfg->buf_pg_shift;
+ ret = hns_roce_get_mhop_mtt(hr_dev, mtr, start_index,
+ mtt_buf, mtt_max);
+ }
return ret;
}
-int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
+static int get_best_page_shift(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr)
{
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
- int ret = 0;
+ unsigned int page_sz;
- ret = hns_roce_bitmap_init(&mr_table->mtpt_bitmap,
- hr_dev->caps.num_mtpts,
- hr_dev->caps.num_mtpts - 1,
- hr_dev->caps.reserved_mrws, 0);
- if (ret)
- return ret;
+ if (!buf_attr->adaptive || buf_attr->type != MTR_PBL || !mtr->umem)
+ return 0;
- ret = hns_roce_buddy_init(&mr_table->mtt_buddy,
- ilog2(hr_dev->caps.num_mtt_segs));
- if (ret)
- goto err_buddy;
+ page_sz = ib_umem_find_best_pgsz(mtr->umem,
+ hr_dev->caps.page_size_cap,
+ buf_attr->iova);
+ if (!page_sz)
+ return -EINVAL;
+ buf_attr->page_shift = order_base_2(page_sz);
return 0;
-
-err_buddy:
- hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
- return ret;
}
-void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev)
+static int get_best_hop_num(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr,
+ unsigned int ba_pg_shift)
{
- struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
+#define INVALID_HOPNUM -1
+#define MIN_BA_CNT 1
+ size_t buf_pg_sz = 1 << buf_attr->page_shift;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ size_t ba_pg_sz = 1 << ba_pg_shift;
+ int hop_num = INVALID_HOPNUM;
+ size_t unit = MIN_BA_CNT;
+ size_t ba_cnt;
+ int j;
+
+ if (!buf_attr->adaptive || buf_attr->type != MTR_PBL)
+ return 0;
+
+ /* Caculating the number of buf pages, each buf page need a BA */
+ if (mtr->umem)
+ ba_cnt = ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz);
+ else
+ ba_cnt = DIV_ROUND_UP(buf_attr->region[0].size, buf_pg_sz);
+
+ for (j = 0; j <= HNS_ROCE_MAX_HOP_NUM; j++) {
+ if (ba_cnt <= unit) {
+ hop_num = j;
+ break;
+ }
+ /* Number of BAs can be represented at per hop */
+ unit *= ba_pg_sz / BA_BYTE_LEN;
+ }
+
+ if (hop_num < 0) {
+ ibdev_err(ibdev,
+ "failed to calculate a valid hopnum.\n");
+ return -EINVAL;
+ }
- hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
- hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap);
+ buf_attr->region[0].hopnum = hop_num;
+
+ return 0;
}
-struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
+static bool is_buf_attr_valid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_buf_attr *attr)
{
- int ret = 0;
- struct hns_roce_mr *mr = NULL;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+
+ if (attr->region_count > ARRAY_SIZE(attr->region) ||
+ attr->region_count < 1 || attr->page_shift < HNS_HW_PAGE_SHIFT) {
+ ibdev_err(ibdev,
+ "invalid buf attr, region count %u, page shift %u.\n",
+ attr->region_count, attr->page_shift);
+ return false;
+ }
- mr = kmalloc(sizeof(*mr), GFP_KERNEL);
- if (mr == NULL)
- return ERR_PTR(-ENOMEM);
+ return true;
+}
- /* Allocate memory region key */
- ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
- ~0ULL, acc, 0, mr);
- if (ret)
- goto err_free;
+static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *attr)
+{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ struct hns_roce_buf_region *r;
+ size_t buf_pg_sz;
+ size_t buf_size;
+ int page_cnt, i;
+ u64 pgoff = 0;
+
+ if (!is_buf_attr_valid(hr_dev, attr))
+ return -EINVAL;
- ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr);
- if (ret)
- goto err_mr;
+ /* If mtt is disabled, all pages must be within a continuous range */
+ cfg->is_direct = !mtr_has_mtt(attr);
+ cfg->region_count = attr->region_count;
+ buf_size = mtr_bufs_size(attr);
+ if (need_split_huge_page(mtr)) {
+ buf_pg_sz = HNS_HW_PAGE_SIZE;
+ cfg->buf_pg_count = 1;
+ /* The ROCEE requires the page size to be 4K * 2 ^ N. */
+ cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT +
+ order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE));
+ } else {
+ buf_pg_sz = 1 << attr->page_shift;
+ cfg->buf_pg_count = mtr->umem ?
+ ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz) :
+ DIV_ROUND_UP(buf_size, buf_pg_sz);
+ cfg->buf_pg_shift = attr->page_shift;
+ pgoff = mtr->umem ? mtr->umem->address & ~PAGE_MASK : 0;
+ }
- mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
- mr->umem = NULL;
+ /* Convert buffer size to page index and page count for each region and
+ * the buffer's offset needs to be appended to the first region.
+ */
+ for (page_cnt = 0, i = 0; i < attr->region_count; i++) {
+ r = &cfg->region[i];
+ r->offset = page_cnt;
+ buf_size = hr_hw_page_align(attr->region[i].size + pgoff);
+ if (attr->type == MTR_PBL && mtr->umem)
+ r->count = ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz);
+ else
+ r->count = DIV_ROUND_UP(buf_size, buf_pg_sz);
- return &mr->ibmr;
+ pgoff = 0;
+ page_cnt += r->count;
+ r->hopnum = to_hr_hem_hopnum(attr->region[i].hopnum, r->count);
+ }
-err_mr:
- hns_roce_mr_free(to_hr_dev(pd->device), mr);
+ return 0;
+}
-err_free:
- kfree(mr);
- return ERR_PTR(ret);
+static u64 cal_pages_per_l1ba(unsigned int ba_per_bt, unsigned int hopnum)
+{
+ return int_pow(ba_per_bt, hopnum - 1);
}
-int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
- struct hns_roce_mtt *mtt, struct ib_umem *umem)
+static unsigned int cal_best_bt_pg_sz(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ unsigned int pg_shift)
{
- struct scatterlist *sg;
- int i, k, entry;
- int ret = 0;
- u64 *pages;
- u32 n;
- int len;
+ unsigned long cap = hr_dev->caps.page_size_cap;
+ struct hns_roce_buf_region *re;
+ unsigned int pgs_per_l1ba;
+ unsigned int ba_per_bt;
+ unsigned int ba_num;
+ int i;
- pages = (u64 *) __get_free_page(GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
+ for_each_set_bit_from(pg_shift, &cap, sizeof(cap) * BITS_PER_BYTE) {
+ if (!(BIT(pg_shift) & cap))
+ continue;
+
+ ba_per_bt = BIT(pg_shift) / BA_BYTE_LEN;
+ ba_num = 0;
+ for (i = 0; i < mtr->hem_cfg.region_count; i++) {
+ re = &mtr->hem_cfg.region[i];
+ if (re->hopnum == 0)
+ continue;
- i = n = 0;
-
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> mtt->page_shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(sg) + umem->page_size * k;
- if (i == PAGE_SIZE / sizeof(u64)) {
- ret = hns_roce_write_mtt(hr_dev, mtt, n, i,
- pages);
- if (ret)
- goto out;
- n += i;
- i = 0;
- }
+ pgs_per_l1ba = cal_pages_per_l1ba(ba_per_bt, re->hopnum);
+ ba_num += DIV_ROUND_UP(re->count, pgs_per_l1ba);
}
- }
- if (i)
- ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages);
+ if (ba_num <= ba_per_bt)
+ return pg_shift;
+ }
-out:
- free_page((unsigned long) pages);
- return ret;
+ return 0;
}
-static int hns_roce_ib_umem_write_mr(struct hns_roce_mr *mr,
- struct ib_umem *umem)
+static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ unsigned int ba_page_shift)
{
- int i = 0;
- int entry;
- struct scatterlist *sg;
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ int ret;
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- mr->pbl_buf[i] = ((u64)sg_dma_address(sg)) >> 12;
- i++;
- }
+ hns_roce_hem_list_init(&mtr->hem_list);
+ if (!cfg->is_direct) {
+ ba_page_shift = cal_best_bt_pg_sz(hr_dev, mtr, ba_page_shift);
+ if (!ba_page_shift)
+ return -ERANGE;
- /* Memory barrier */
- mb();
+ ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
+ cfg->region, cfg->region_count,
+ ba_page_shift);
+ if (ret)
+ return ret;
+ cfg->root_ba = mtr->hem_list.root_ba;
+ cfg->ba_pg_shift = ba_page_shift;
+ } else {
+ cfg->ba_pg_shift = cfg->buf_pg_shift;
+ }
return 0;
}
-struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt_addr, int access_flags,
- struct ib_udata *udata)
+static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_mr *mr = NULL;
- int ret = 0;
- int n = 0;
+ hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
+}
- mr = kmalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr)
- return ERR_PTR(-ENOMEM);
+/**
+ * hns_roce_mtr_create - Create hns memory translate region.
+ *
+ * @hr_dev: RoCE device struct pointer
+ * @mtr: memory translate region
+ * @buf_attr: buffer attribute for creating mtr
+ * @ba_page_shift: page shift for multi-hop base address table
+ * @udata: user space context, if it's NULL, means kernel space
+ * @user_addr: userspace virtual address to start at
+ */
+int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr,
+ unsigned int ba_page_shift, struct ib_udata *udata,
+ unsigned long user_addr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
- mr->umem = ib_umem_get(pd->uobject->context, start, length,
- access_flags, 0);
- if (IS_ERR(mr->umem)) {
- ret = PTR_ERR(mr->umem);
- goto err_free;
- }
+ trace_hns_buf_attr(buf_attr);
+ /* The caller has its own buffer list and invokes the hns_roce_mtr_map()
+ * to finish the MTT configuration.
+ */
+ if (buf_attr->mtt_only) {
+ mtr->umem = NULL;
+ mtr->kmem = NULL;
+ } else {
+ ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc mtr bufs, ret = %d.\n", ret);
+ return ret;
+ }
- n = ib_umem_page_count(mr->umem);
- if (mr->umem->page_size != HNS_ROCE_HEM_PAGE_SIZE) {
- dev_err(dev, "Just support 4K page size but is 0x%x now!\n",
- mr->umem->page_size);
- ret = -EINVAL;
- goto err_umem;
- }
+ ret = get_best_page_shift(hr_dev, mtr, buf_attr);
+ if (ret)
+ goto err_init_buf;
- if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) {
- dev_err(dev, " MR len %lld err. MR is limited to 4G at most!\n",
- length);
- ret = -EINVAL;
- goto err_umem;
+ ret = get_best_hop_num(hr_dev, mtr, buf_attr, ba_page_shift);
+ if (ret)
+ goto err_init_buf;
}
- ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
- access_flags, n, mr);
+ ret = mtr_init_buf_cfg(hr_dev, mtr, buf_attr);
if (ret)
- goto err_umem;
+ goto err_init_buf;
- ret = hns_roce_ib_umem_write_mr(mr, mr->umem);
- if (ret)
- goto err_mr;
-
- ret = hns_roce_mr_enable(hr_dev, mr);
- if (ret)
- goto err_mr;
+ ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret);
+ goto err_init_buf;
+ }
- mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
+ if (buf_attr->mtt_only)
+ return 0;
- return &mr->ibmr;
+ /* Write buffer's dma address to MTT */
+ ret = mtr_map_bufs(hr_dev, mtr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret);
+ goto err_alloc_mtt;
+ }
-err_mr:
- hns_roce_mr_free(hr_dev, mr);
+ return 0;
-err_umem:
- ib_umem_release(mr->umem);
+err_alloc_mtt:
+ mtr_free_mtt(hr_dev, mtr);
+err_init_buf:
+ mtr_free_bufs(hr_dev, mtr);
-err_free:
- kfree(mr);
- return ERR_PTR(ret);
+ return ret;
}
-int hns_roce_dereg_mr(struct ib_mr *ibmr)
+void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
- struct hns_roce_mr *mr = to_hr_mr(ibmr);
- int ret = 0;
+ /* release multi-hop addressing resource */
+ hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
- if (hr_dev->hw->dereg_mr) {
- ret = hr_dev->hw->dereg_mr(hr_dev, mr);
- } else {
- hns_roce_mr_free(hr_dev, mr);
-
- if (mr->umem)
- ib_umem_release(mr->umem);
-
- kfree(mr);
- }
-
- return ret;
+ /* free buffers */
+ mtr_free_bufs(hr_dev, mtr);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index a64500fa1145..225c3e328e0e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -30,110 +30,144 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
#include "hns_roce_device.h"
-static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn)
+void hns_roce_init_pd_table(struct hns_roce_dev *hr_dev)
{
- return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn);
-}
-
-static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn)
-{
- hns_roce_bitmap_free(&hr_dev->pd_bitmap, pdn, BITMAP_NO_RR);
-}
-
-int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev)
-{
- return hns_roce_bitmap_init(&hr_dev->pd_bitmap, hr_dev->caps.num_pds,
- hr_dev->caps.num_pds - 1,
- hr_dev->caps.reserved_pds, 0);
-}
+ struct hns_roce_ida *pd_ida = &hr_dev->pd_ida;
-void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev)
-{
- hns_roce_bitmap_cleanup(&hr_dev->pd_bitmap);
+ ida_init(&pd_ida->ida);
+ pd_ida->max = hr_dev->caps.num_pds - 1;
+ pd_ida->min = hr_dev->caps.reserved_pds;
}
-struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
+ struct ib_device *ib_dev = ibpd->device;
struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_pd *pd;
- int ret;
-
- pd = kmalloc(sizeof(*pd), GFP_KERNEL);
- if (!pd)
- return ERR_PTR(-ENOMEM);
+ struct hns_roce_ida *pd_ida = &hr_dev->pd_ida;
+ struct hns_roce_pd *pd = to_hr_pd(ibpd);
+ int ret = 0;
+ int id;
- ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn);
- if (ret) {
- kfree(pd);
- dev_err(dev, "[alloc_pd]hns_roce_pd_alloc failed!\n");
- return ERR_PTR(ret);
+ id = ida_alloc_range(&pd_ida->ida, pd_ida->min, pd_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(ib_dev, "failed to alloc pd, id = %d.\n", id);
+ return -ENOMEM;
}
+ pd->pdn = (unsigned long)id;
+
+ if (udata) {
+ struct hns_roce_ib_alloc_pd_resp resp = {.pdn = pd->pdn};
- if (context) {
- if (ib_copy_to_udata(udata, &pd->pdn, sizeof(u64))) {
- hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn);
- dev_err(dev, "[alloc_pd]ib_copy_to_udata failed!\n");
- kfree(pd);
- return ERR_PTR(-EFAULT);
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ if (ret) {
+ ida_free(&pd_ida->ida, id);
+ ibdev_err(ib_dev, "failed to copy to udata, ret = %d\n", ret);
}
}
- return &pd->ibpd;
+ return ret;
}
-int hns_roce_dealloc_pd(struct ib_pd *pd)
+int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
- hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn);
- kfree(to_hr_pd(pd));
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+
+ ida_free(&hr_dev->pd_ida.ida, (int)to_hr_pd(pd)->pdn);
return 0;
}
int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
{
- struct resource *res;
- int ret = 0;
+ struct hns_roce_ida *uar_ida = &hr_dev->uar_ida;
+ int id;
/* Using bitmap to manager UAR index */
- ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->index);
- if (ret == -1)
+ id = ida_alloc_range(&uar_ida->ida, uar_ida->min, uar_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(&hr_dev->ib_dev, "failed to alloc uar id(%d).\n", id);
return -ENOMEM;
+ }
+ uar->logic_idx = (unsigned long)id;
- if (uar->index > 0)
- uar->index = (uar->index - 1) %
+ if (uar->logic_idx > 0 && hr_dev->caps.phy_num_uars > 1)
+ uar->index = (uar->logic_idx - 1) %
(hr_dev->caps.phy_num_uars - 1) + 1;
+ else
+ uar->index = 0;
+
+ uar->pfn = ((pci_resource_start(hr_dev->pci_dev, 2)) >> PAGE_SHIFT);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
+ hr_dev->dwqe_page = pci_resource_start(hr_dev->pci_dev, 4);
- res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(&hr_dev->pdev->dev, "memory resource not found!\n");
- return -EINVAL;
+ return 0;
+}
+
+void hns_roce_init_uar_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_ida *uar_ida = &hr_dev->uar_ida;
+
+ ida_init(&uar_ida->ida);
+ uar_ida->max = hr_dev->caps.num_uars - 1;
+ uar_ida->min = hr_dev->caps.reserved_uars;
+}
+
+static int hns_roce_xrcd_alloc(struct hns_roce_dev *hr_dev, u32 *xrcdn)
+{
+ struct hns_roce_ida *xrcd_ida = &hr_dev->xrcd_ida;
+ int id;
+
+ id = ida_alloc_range(&xrcd_ida->ida, xrcd_ida->min, xrcd_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(&hr_dev->ib_dev, "failed to alloc xrcdn(%d).\n", id);
+ return -ENOMEM;
}
- uar->pfn = ((res->start) >> PAGE_SHIFT) + uar->index;
+ *xrcdn = (u32)id;
return 0;
}
-void hns_roce_uar_free(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
+void hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev)
{
- hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index,
- BITMAP_NO_RR);
+ struct hns_roce_ida *xrcd_ida = &hr_dev->xrcd_ida;
+
+ ida_init(&xrcd_ida->ida);
+ xrcd_ida->max = hr_dev->caps.num_xrcds - 1;
+ xrcd_ida->min = hr_dev->caps.reserved_xrcds;
}
-int hns_roce_init_uar_table(struct hns_roce_dev *hr_dev)
+int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
{
- return hns_roce_bitmap_init(&hr_dev->uar_table.bitmap,
- hr_dev->caps.num_uars,
- hr_dev->caps.num_uars - 1,
- hr_dev->caps.reserved_uars, 0);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_xrcd->device);
+ struct hns_roce_xrcd *xrcd = to_hr_xrcd(ib_xrcd);
+ int ret;
+
+ if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)) {
+ ret = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ ret = hns_roce_xrcd_alloc(hr_dev, &xrcd->xrcdn);
+
+err_out:
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT]);
+
+ return ret;
}
-void hns_roce_cleanup_uar_table(struct hns_roce_dev *hr_dev)
+int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
{
- hns_roce_bitmap_cleanup(&hr_dev->uar_table.bitmap);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_xrcd->device);
+ u32 xrcdn = to_hr_xrcd(ib_xrcd)->xrcdn;
+
+ ida_free(&hr_dev->xrcd_ida.ida, (int)xrcdn);
+
+ return 0;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index f036f32f15d3..d1640c5fbaab 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -31,46 +31,127 @@
* SOFTWARE.
*/
-#include <linux/platform_device.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_umem.h>
+#include <rdma/uverbs_ioctl.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
-#include <rdma/hns-abi.h>
-#define SQP_NUM (2 * HNS_ROCE_MAX_PORTS)
-
-void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
+static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev,
+ u32 qpn)
{
- struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
- struct device *dev = &hr_dev->pdev->dev;
+ struct device *dev = hr_dev->dev;
struct hns_roce_qp *qp;
+ unsigned long flags;
- spin_lock(&qp_table->lock);
-
+ xa_lock_irqsave(&hr_dev->qp_table_xa, flags);
qp = __hns_roce_qp_lookup(hr_dev, qpn);
if (qp)
- atomic_inc(&qp->refcount);
+ refcount_inc(&qp->refcount);
+ xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags);
- spin_unlock(&qp_table->lock);
+ if (!qp)
+ dev_warn(dev, "async event for bogus QP %08x\n", qpn);
- if (!qp) {
- dev_warn(dev, "Async event for bogus QP %08x\n", qpn);
+ return qp;
+}
+
+static void flush_work_handle(struct work_struct *work)
+{
+ struct hns_roce_work *flush_work = container_of(work,
+ struct hns_roce_work, work);
+ struct hns_roce_qp *hr_qp = container_of(flush_work,
+ struct hns_roce_qp, flush_work);
+ struct device *dev = flush_work->hr_dev->dev;
+ struct ib_qp_attr attr;
+ int attr_mask;
+ int ret;
+
+ attr_mask = IB_QP_STATE;
+ attr.qp_state = IB_QPS_ERR;
+
+ if (test_and_clear_bit(HNS_ROCE_FLUSH_FLAG, &hr_qp->flush_flag)) {
+ ret = hns_roce_modify_qp(&hr_qp->ibqp, &attr, attr_mask, NULL);
+ if (ret)
+ dev_err(dev, "modify QP to error state failed(%d) during CQE flush\n",
+ ret);
+ }
+
+ /*
+ * make sure we signal QP destroy leg that flush QP was completed
+ * so that it can safely proceed ahead now and destroy QP
+ */
+ if (refcount_dec_and_test(&hr_qp->refcount))
+ complete(&hr_qp->free);
+}
+
+void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_work *flush_work = &hr_qp->flush_work;
+ unsigned long flags;
+
+ spin_lock_irqsave(&hr_qp->flush_lock, flags);
+ /* Exit directly after destroy_qp() */
+ if (test_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag)) {
+ spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
return;
}
+ refcount_inc(&hr_qp->refcount);
+ queue_work(hr_dev->irq_workq, &flush_work->work);
+ spin_unlock_irqrestore(&hr_qp->flush_lock, flags);
+}
+
+void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)
+{
+ /*
+ * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state
+ * gets into errored mode. Hence, as a workaround to this
+ * hardware limitation, driver needs to assist in flushing. But
+ * the flushing operation uses mailbox to convey the QP state to
+ * the hardware and which can sleep due to the mutex protection
+ * around the mailbox calls. Hence, use the deferred flush for
+ * now.
+ */
+ if (!test_and_set_bit(HNS_ROCE_FLUSH_FLAG, &qp->flush_flag))
+ init_flush_work(dev, qp);
+}
+
+void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
+{
+ struct hns_roce_qp *qp;
+
+ qp = hns_roce_qp_lookup(hr_dev, qpn);
+ if (!qp)
+ return;
+
qp->event(qp, (enum hns_roce_event)event_type);
- if (atomic_dec_and_test(&qp->refcount))
+ if (refcount_dec_and_test(&qp->refcount))
+ complete(&qp->free);
+}
+
+void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn)
+{
+ struct hns_roce_qp *qp;
+
+ qp = hns_roce_qp_lookup(hr_dev, qpn);
+ if (!qp)
+ return;
+
+ qp->state = IB_QPS_ERR;
+ flush_cqe(hr_dev, qp);
+
+ if (refcount_dec_and_test(&qp->refcount))
complete(&qp->free);
}
static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
enum hns_roce_event type)
{
- struct ib_event event;
struct ib_qp *ibqp = &hr_qp->ibqp;
+ struct ib_event event;
if (ibqp->event_handler) {
event.device = ibqp->device;
@@ -98,10 +179,12 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
event.event = IB_EVENT_QP_REQ_ERR;
break;
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION:
+ case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH:
event.event = IB_EVENT_QP_ACCESS_ERR;
break;
default:
- dev_dbg(ibqp->device->dma_device, "roce_ib: Unexpected event type %d on QP %06lx\n",
+ dev_dbg(ibqp->device->dev.parent, "roce_ib: Unexpected event type %d on QP %06lx\n",
type, hr_qp->qpn);
return;
}
@@ -109,104 +192,185 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
}
}
-static int hns_roce_reserve_range_qp(struct hns_roce_dev *hr_dev, int cnt,
- int align, unsigned long *base)
+static u8 get_affinity_cq_bank(u8 qp_bank)
{
- struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
-
- return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, base);
+ return (qp_bank >> 1) & CQ_BANKID_MASK;
}
-enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state)
+static u8 get_least_load_bankid_for_qp(struct ib_qp_init_attr *init_attr,
+ struct hns_roce_bank *bank)
{
- switch (state) {
- case IB_QPS_RESET:
- return HNS_ROCE_QP_STATE_RST;
- case IB_QPS_INIT:
- return HNS_ROCE_QP_STATE_INIT;
- case IB_QPS_RTR:
- return HNS_ROCE_QP_STATE_RTR;
- case IB_QPS_RTS:
- return HNS_ROCE_QP_STATE_RTS;
- case IB_QPS_SQD:
- return HNS_ROCE_QP_STATE_SQD;
- case IB_QPS_ERR:
- return HNS_ROCE_QP_STATE_ERR;
- default:
- return HNS_ROCE_QP_NUM_STATE;
+#define INVALID_LOAD_QPNUM 0xFFFFFFFF
+ struct ib_cq *scq = init_attr->send_cq;
+ u32 least_load = INVALID_LOAD_QPNUM;
+ unsigned long cqn = 0;
+ u8 bankid = 0;
+ u32 bankcnt;
+ u8 i;
+
+ if (scq)
+ cqn = to_hr_cq(scq)->cqn;
+
+ for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) {
+ if (scq && (get_affinity_cq_bank(i) != (cqn & CQ_BANKID_MASK)))
+ continue;
+
+ bankcnt = bank[i].inuse;
+ if (bankcnt < least_load) {
+ least_load = bankcnt;
+ bankid = i;
+ }
}
+
+ return bankid;
}
-static int hns_roce_gsi_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn,
- struct hns_roce_qp *hr_qp)
+static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid,
+ unsigned long *qpn)
+{
+ int id;
+
+ id = ida_alloc_range(&bank->ida, bank->next, bank->max, GFP_KERNEL);
+ if (id < 0) {
+ id = ida_alloc_range(&bank->ida, bank->min, bank->max,
+ GFP_KERNEL);
+ if (id < 0)
+ return id;
+ }
+
+ /* the QPN should keep increasing until the max value is reached. */
+ bank->next = (id + 1) > bank->max ? bank->min : id + 1;
+
+ /* the lower 3 bits is bankid */
+ *qpn = (id << 3) | bankid;
+
+ return 0;
+}
+static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+ unsigned long num = 0;
+ u8 bankid;
int ret;
- if (!qpn)
- return -EINVAL;
+ if (hr_qp->ibqp.qp_type == IB_QPT_GSI) {
+ num = 1;
+ } else {
+ mutex_lock(&qp_table->bank_mutex);
+ bankid = get_least_load_bankid_for_qp(init_attr, qp_table->bank);
- hr_qp->qpn = qpn;
+ ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid,
+ &num);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to alloc QPN, ret = %d\n", ret);
+ mutex_unlock(&qp_table->bank_mutex);
+ return ret;
+ }
- spin_lock_irq(&qp_table->lock);
- ret = radix_tree_insert(&hr_dev->qp_table_tree,
- hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp);
- spin_unlock_irq(&qp_table->lock);
- if (ret) {
- dev_err(&hr_dev->pdev->dev, "QPC radix_tree_insert failed\n");
- goto err_put_irrl;
+ qp_table->bank[bankid].inuse++;
+ mutex_unlock(&qp_table->bank_mutex);
}
- atomic_set(&hr_qp->refcount, 1);
- init_completion(&hr_qp->free);
+ hr_qp->qpn = num;
return 0;
+}
-err_put_irrl:
+static void add_qp_to_list(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_cq *send_cq, struct ib_cq *recv_cq)
+{
+ struct hns_roce_cq *hr_send_cq, *hr_recv_cq;
+ unsigned long flags;
+
+ hr_send_cq = send_cq ? to_hr_cq(send_cq) : NULL;
+ hr_recv_cq = recv_cq ? to_hr_cq(recv_cq) : NULL;
+
+ spin_lock_irqsave(&hr_dev->qp_list_lock, flags);
+ hns_roce_lock_cqs(hr_send_cq, hr_recv_cq);
+
+ list_add_tail(&hr_qp->node, &hr_dev->qp_list);
+ if (hr_send_cq)
+ list_add_tail(&hr_qp->sq_node, &hr_send_cq->sq_list);
+ if (hr_recv_cq)
+ list_add_tail(&hr_qp->rq_node, &hr_recv_cq->rq_list);
+
+ hns_roce_unlock_cqs(hr_send_cq, hr_recv_cq);
+ spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
+}
+
+static int hns_roce_qp_store(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct xarray *xa = &hr_dev->qp_table_xa;
+ int ret;
+
+ if (!hr_qp->qpn)
+ return -EINVAL;
+
+ ret = xa_err(xa_store_irq(xa, hr_qp->qpn, hr_qp, GFP_KERNEL));
+ if (ret)
+ dev_err(hr_dev->dev, "failed to xa store for QPC\n");
+ else
+ /* add QP to device's QP list for softwc */
+ add_qp_to_list(hr_dev, hr_qp, init_attr->send_cq,
+ init_attr->recv_cq);
return ret;
}
-static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn,
- struct hns_roce_qp *hr_qp)
+static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
- struct device *dev = &hr_dev->pdev->dev;
+ struct device *dev = hr_dev->dev;
int ret;
- if (!qpn)
+ if (!hr_qp->qpn)
return -EINVAL;
- hr_qp->qpn = qpn;
-
/* Alloc memory for QPC */
ret = hns_roce_table_get(hr_dev, &qp_table->qp_table, hr_qp->qpn);
if (ret) {
- dev_err(dev, "QPC table get failed\n");
+ dev_err(dev, "failed to get QPC table\n");
goto err_out;
}
/* Alloc memory for IRRL */
ret = hns_roce_table_get(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
if (ret) {
- dev_err(dev, "IRRL table get failed\n");
+ dev_err(dev, "failed to get IRRL table\n");
goto err_put_qp;
}
- spin_lock_irq(&qp_table->lock);
- ret = radix_tree_insert(&hr_dev->qp_table_tree,
- hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp);
- spin_unlock_irq(&qp_table->lock);
- if (ret) {
- dev_err(dev, "QPC radix_tree_insert failed\n");
- goto err_put_irrl;
+ if (hr_dev->caps.trrl_entry_sz) {
+ /* Alloc memory for TRRL */
+ ret = hns_roce_table_get(hr_dev, &qp_table->trrl_table,
+ hr_qp->qpn);
+ if (ret) {
+ dev_err(dev, "failed to get TRRL table\n");
+ goto err_put_irrl;
+ }
}
- atomic_set(&hr_qp->refcount, 1);
- init_completion(&hr_qp->free);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
+ /* Alloc memory for SCC CTX */
+ ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table,
+ hr_qp->qpn);
+ if (ret) {
+ dev_err(dev, "failed to get SCC CTX table\n");
+ goto err_put_trrl;
+ }
+ }
return 0;
+err_put_trrl:
+ if (hr_dev->caps.trrl_entry_sz)
+ hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn);
+
err_put_irrl:
hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
@@ -217,517 +381,1101 @@ err_out:
return ret;
}
+static void qp_user_mmap_entry_remove(struct hns_roce_qp *hr_qp)
+{
+ rdma_user_mmap_entry_remove(&hr_qp->dwqe_mmap_entry->rdma_entry);
+}
+
void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
- struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+ struct xarray *xa = &hr_dev->qp_table_xa;
unsigned long flags;
- spin_lock_irqsave(&qp_table->lock, flags);
- radix_tree_delete(&hr_dev->qp_table_tree,
- hr_qp->qpn & (hr_dev->caps.num_qps - 1));
- spin_unlock_irqrestore(&qp_table->lock, flags);
+ list_del(&hr_qp->node);
+
+ if (hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
+ list_del(&hr_qp->sq_node);
+
+ if (hr_qp->ibqp.qp_type != IB_QPT_XRC_INI &&
+ hr_qp->ibqp.qp_type != IB_QPT_XRC_TGT)
+ list_del(&hr_qp->rq_node);
+
+ xa_lock_irqsave(xa, flags);
+ __xa_erase(xa, hr_qp->qpn);
+ xa_unlock_irqrestore(xa, flags);
}
-void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+static void free_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
- if (atomic_dec_and_test(&hr_qp->refcount))
- complete(&hr_qp->free);
- wait_for_completion(&hr_qp->free);
+ if (hr_dev->caps.trrl_entry_sz)
+ hns_roce_table_put(hr_dev, &qp_table->trrl_table, hr_qp->qpn);
+ hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
+}
- if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) {
- hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn);
- hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn);
- }
+static inline u8 get_qp_bankid(unsigned long qpn)
+{
+ /* The lower 3 bits of QPN are used to hash to different banks */
+ return (u8)(qpn & GENMASK(2, 0));
}
-void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
- int cnt)
+static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
{
- struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
+ u8 bankid;
+
+ if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
+ return;
- if (base_qpn < SQP_NUM)
+ if (hr_qp->qpn < hr_dev->caps.reserved_qps)
return;
- hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, BITMAP_RR);
+ bankid = get_qp_bankid(hr_qp->qpn);
+
+ ida_free(&hr_dev->qp_table.bank[bankid].ida,
+ hr_qp->qpn / HNS_ROCE_QP_BANK_NUM);
+
+ mutex_lock(&hr_dev->qp_table.bank_mutex);
+ hr_dev->qp_table.bank[bankid].inuse--;
+ mutex_unlock(&hr_dev->qp_table.bank_mutex);
+}
+
+static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp,
+ bool user)
+{
+ u32 max_sge = dev->caps.max_rq_sg;
+
+ if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return max_sge;
+
+ /* Reserve SGEs only for HIP08 in kernel; The userspace driver will
+ * calculate number of max_sge with reserved SGEs when allocating wqe
+ * buf, so there is no need to do this again in kernel. But the number
+ * may exceed the capacity of SGEs recorded in the firmware, so the
+ * kernel driver should just adapt the value accordingly.
+ */
+ if (user)
+ max_sge = roundup_pow_of_two(max_sge + 1);
+ else
+ hr_qp->rq.rsv_sge = 1;
+
+ return max_sge;
}
-static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
- struct ib_qp_cap *cap, int is_user, int has_srq,
- struct hns_roce_qp *hr_qp)
+static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
+ struct hns_roce_qp *hr_qp, int has_rq, bool user)
{
- u32 max_cnt;
- struct device *dev = &hr_dev->pdev->dev;
+ u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user);
+ u32 cnt;
+
+ /* If srq exist, set zero for relative number of rq */
+ if (!has_rq) {
+ hr_qp->rq.wqe_cnt = 0;
+ hr_qp->rq.max_gs = 0;
+ cap->max_recv_wr = 0;
+ cap->max_recv_sge = 0;
+
+ return 0;
+ }
/* Check the validity of QP support capacity */
- if (cap->max_recv_wr > hr_dev->caps.max_wqes ||
- cap->max_recv_sge > hr_dev->caps.max_rq_sg) {
- dev_err(dev, "RQ WR or sge error!max_recv_wr=%d max_recv_sge=%d\n",
- cap->max_recv_wr, cap->max_recv_sge);
+ if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes ||
+ cap->max_recv_sge > max_sge) {
+ ibdev_err(&hr_dev->ib_dev,
+ "RQ config error, depth = %u, sge = %u\n",
+ cap->max_recv_wr, cap->max_recv_sge);
return -EINVAL;
}
- /* If srq exit, set zero for relative number of rq */
- if (has_srq) {
- if (cap->max_recv_wr) {
- dev_dbg(dev, "srq no need config max_recv_wr\n");
- return -EINVAL;
- }
+ cnt = roundup_pow_of_two(max(cap->max_recv_wr, hr_dev->caps.min_wqes));
+ if (cnt > hr_dev->caps.max_wqes) {
+ ibdev_err(&hr_dev->ib_dev, "rq depth %u too large\n",
+ cap->max_recv_wr);
+ return -EINVAL;
+ }
- hr_qp->rq.wqe_cnt = hr_qp->rq.max_gs = 0;
- } else {
- if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) {
- dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n");
- return -EINVAL;
- }
+ hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) +
+ hr_qp->rq.rsv_sge);
- /* In v1 engine, parameter verification procession */
- max_cnt = cap->max_recv_wr > HNS_ROCE_MIN_WQE_NUM ?
- cap->max_recv_wr : HNS_ROCE_MIN_WQE_NUM;
- hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt);
+ hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
+ hr_qp->rq.max_gs);
- if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) {
- dev_err(dev, "hns_roce_set_rq_size rq.wqe_cnt too large\n");
- return -EINVAL;
- }
+ hr_qp->rq.wqe_cnt = cnt;
- max_cnt = max(1U, cap->max_recv_sge);
- hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt);
- /* WQE is fixed for 64B */
- hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz);
- }
+ cap->max_recv_wr = cnt;
+ cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
+
+ return 0;
+}
- cap->max_recv_wr = hr_qp->rq.max_post = hr_qp->rq.wqe_cnt;
- cap->max_recv_sge = hr_qp->rq.max_gs;
+static u32 get_max_inline_data(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap)
+{
+ if (cap->max_inline_data) {
+ cap->max_inline_data = roundup_pow_of_two(cap->max_inline_data);
+ return min(cap->max_inline_data,
+ hr_dev->caps.max_sq_inline);
+ }
return 0;
}
-static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
- struct hns_roce_qp *hr_qp,
- struct hns_roce_ib_create_qp *ucmd)
+static void update_inline_data(struct hns_roce_qp *hr_qp,
+ struct ib_qp_cap *cap)
+{
+ u32 sge_num = hr_qp->sq.ext_sge_cnt;
+
+ if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
+ if (!(hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD))
+ sge_num = max((u32)HNS_ROCE_SGE_IN_WQE, sge_num);
+
+ cap->max_inline_data = max(cap->max_inline_data,
+ sge_num * HNS_ROCE_SGE_SIZE);
+ }
+
+ hr_qp->max_inline_data = cap->max_inline_data;
+}
+
+static u32 get_sge_num_from_max_send_sge(bool is_ud_or_gsi,
+ u32 max_send_sge)
+{
+ unsigned int std_sge_num;
+ unsigned int min_sge;
+
+ std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
+ min_sge = is_ud_or_gsi ? 1 : 0;
+ return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) :
+ min_sge;
+}
+
+static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi,
+ u32 max_inline_data)
+{
+ unsigned int inline_sge;
+
+ if (!max_inline_data)
+ return 0;
+
+ /*
+ * if max_inline_data less than
+ * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE,
+ * In addition to ud's mode, no need to extend sge.
+ */
+ inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE;
+ if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE)
+ inline_sge = 0;
+
+ return inline_sge;
+}
+
+static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
+ struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap)
+{
+ bool is_ud_or_gsi = (hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+ hr_qp->ibqp.qp_type == IB_QPT_UD);
+ unsigned int std_sge_num;
+ u32 inline_ext_sge = 0;
+ u32 ext_wqe_sge_cnt;
+ u32 total_sge_cnt;
+
+ cap->max_inline_data = get_max_inline_data(hr_dev, cap);
+
+ hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT;
+ std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
+ ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud_or_gsi,
+ cap->max_send_sge);
+
+ if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
+ inline_ext_sge = max(ext_wqe_sge_cnt,
+ get_sge_num_from_max_inl_data(is_ud_or_gsi,
+ cap->max_inline_data));
+ hr_qp->sq.ext_sge_cnt = inline_ext_sge ?
+ roundup_pow_of_two(inline_ext_sge) : 0;
+
+ hr_qp->sq.max_gs = max(1U, (hr_qp->sq.ext_sge_cnt + std_sge_num));
+ hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
+
+ ext_wqe_sge_cnt = hr_qp->sq.ext_sge_cnt;
+ } else {
+ hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
+ hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
+ hr_qp->sq.ext_sge_cnt = hr_qp->sq.max_gs;
+ }
+
+ /* If the number of extended sge is not zero, they MUST use the
+ * space of HNS_HW_PAGE_SIZE at least.
+ */
+ if (ext_wqe_sge_cnt) {
+ total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * ext_wqe_sge_cnt);
+ hr_qp->sge.sge_cnt = max(total_sge_cnt,
+ (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
+ }
+
+ update_inline_data(hr_qp, cap);
+}
+
+static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap,
+ struct hns_roce_ib_create_qp *ucmd)
{
u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
u8 max_sq_stride = ilog2(roundup_sq_stride);
/* Sanity check SQ size before proceeding */
- if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes ||
- ucmd->log_sq_stride > max_sq_stride ||
- ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
- dev_err(&hr_dev->pdev->dev, "check SQ size error!\n");
+ if (ucmd->log_sq_stride > max_sq_stride ||
+ ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) {
+ ibdev_err(&hr_dev->ib_dev, "failed to check SQ stride size.\n");
return -EINVAL;
}
- hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count;
+ if (cap->max_send_sge > hr_dev->caps.max_sq_sg) {
+ ibdev_err(&hr_dev->ib_dev, "failed to check SQ SGE size %u.\n",
+ cap->max_send_sge);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int set_user_sq_size(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u32 cnt = 0;
+ int ret;
+
+ if (check_shl_overflow(1, ucmd->log_sq_bb_count, &cnt) ||
+ cnt > hr_dev->caps.max_wqes)
+ return -EINVAL;
+
+ ret = check_sq_size_with_integrity(hr_dev, cap, ucmd);
+ if (ret) {
+ ibdev_err(ibdev, "failed to check user SQ size, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
+
hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
+ hr_qp->sq.wqe_cnt = cnt;
- /* Get buf size, SQ and RQ are aligned to page_szie */
- hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
- hr_qp->rq.wqe_shift), PAGE_SIZE) +
- HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift), PAGE_SIZE);
+ return 0;
+}
+
+static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_buf_attr *buf_attr)
+{
+ int buf_size;
+ int idx = 0;
+ hr_qp->buff_size = 0;
+
+ /* SQ WQE */
hr_qp->sq.offset = 0;
- hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift), PAGE_SIZE);
+ buf_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt,
+ hr_qp->sq.wqe_shift);
+ if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
+ buf_attr->region[idx].size = buf_size;
+ buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num;
+ idx++;
+ hr_qp->buff_size += buf_size;
+ }
+
+ /* extend SGE WQE in SQ */
+ hr_qp->sge.offset = hr_qp->buff_size;
+ buf_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt,
+ hr_qp->sge.sge_shift);
+ if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
+ buf_attr->region[idx].size = buf_size;
+ buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sge_hop_num;
+ idx++;
+ hr_qp->buff_size += buf_size;
+ }
+
+ /* RQ WQE */
+ hr_qp->rq.offset = hr_qp->buff_size;
+ buf_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt,
+ hr_qp->rq.wqe_shift);
+ if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) {
+ buf_attr->region[idx].size = buf_size;
+ buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num;
+ idx++;
+ hr_qp->buff_size += buf_size;
+ }
+
+ if (hr_qp->buff_size < 1)
+ return -EINVAL;
+
+ buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
+ buf_attr->region_count = idx;
return 0;
}
-static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
- struct ib_qp_cap *cap,
- struct hns_roce_qp *hr_qp)
+static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
+ struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp)
{
- struct device *dev = &hr_dev->pdev->dev;
- u32 max_cnt;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u32 cnt;
- if (cap->max_send_wr > hr_dev->caps.max_wqes ||
- cap->max_send_sge > hr_dev->caps.max_sq_sg ||
- cap->max_inline_data > hr_dev->caps.max_sq_inline) {
- dev_err(dev, "hns_roce_set_kernel_sq_size error1\n");
+ if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes ||
+ cap->max_send_sge > hr_dev->caps.max_sq_sg) {
+ ibdev_err(ibdev, "failed to check SQ WR or SGE num.\n");
return -EINVAL;
}
- hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
- hr_qp->sq_max_wqes_per_wr = 1;
- hr_qp->sq_spare_wqes = 0;
-
- /* In v1 engine, parameter verification procession */
- max_cnt = cap->max_send_wr > HNS_ROCE_MIN_WQE_NUM ?
- cap->max_send_wr : HNS_ROCE_MIN_WQE_NUM;
- hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt);
- if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) {
- dev_err(dev, "hns_roce_set_kernel_sq_size sq.wqe_cnt too large\n");
+ cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes));
+ if (cnt > hr_dev->caps.max_wqes) {
+ ibdev_err(ibdev, "failed to check WQE num, WQE num = %u.\n",
+ cnt);
return -EINVAL;
}
- /* Get data_seg numbers */
- max_cnt = max(1U, cap->max_send_sge);
- hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt);
-
- /* Get buf size, SQ and RQ are aligned to page_szie */
- hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
- hr_qp->rq.wqe_shift), PAGE_SIZE) +
- HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift), PAGE_SIZE);
- hr_qp->sq.offset = 0;
- hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
- hr_qp->sq.wqe_shift), PAGE_SIZE);
+ hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
+ hr_qp->sq.wqe_cnt = cnt;
- /* Get wr and sge number which send */
- cap->max_send_wr = hr_qp->sq.max_post = hr_qp->sq.wqe_cnt;
- cap->max_send_sge = hr_qp->sq.max_gs;
+ set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
- /* We don't support inline sends for kernel QPs (yet) */
- cap->max_inline_data = 0;
+ /* sync the parameters of kernel QP to user's configuration */
+ cap->max_send_wr = cnt;
return 0;
}
-static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
- struct ib_pd *ib_pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata, unsigned long sqpn,
- struct hns_roce_qp *hr_qp)
+static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr)
{
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_ib_create_qp ucmd;
- unsigned long qpn = 0;
- int ret = 0;
+ if (attr->qp_type == IB_QPT_XRC_TGT || !attr->cap.max_send_wr)
+ return 0;
- mutex_init(&hr_qp->mutex);
- spin_lock_init(&hr_qp->sq.lock);
- spin_lock_init(&hr_qp->rq.lock);
+ return 1;
+}
- hr_qp->state = IB_QPS_RESET;
+static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)
+{
+ if (attr->qp_type == IB_QPT_XRC_INI ||
+ attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
+ !attr->cap.max_recv_wr)
+ return 0;
- if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
- hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
- else
- hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
+ return 1;
+}
+
+static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata, unsigned long addr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
+ int ret;
- ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, !!ib_pd->uobject,
- !!init_attr->srq, hr_qp);
+ ret = set_wqe_buf_attr(hr_dev, hr_qp, &buf_attr);
if (ret) {
- dev_err(dev, "hns_roce_set_rq_size failed\n");
- goto err_out;
+ ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret);
+ goto err_inline;
+ }
+ ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr,
+ PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz,
+ udata, addr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret);
+ goto err_inline;
}
- if (ib_pd->uobject) {
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
- dev_err(dev, "ib_copy_from_udata error for create qp\n");
- ret = -EFAULT;
- goto err_out;
- }
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_DIRECT_WQE)
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_DIRECT_WQE;
- ret = hns_roce_set_user_sq_size(hr_dev, hr_qp, &ucmd);
- if (ret) {
- dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n");
- goto err_out;
- }
+ return 0;
- hr_qp->umem = ib_umem_get(ib_pd->uobject->context,
- ucmd.buf_addr, hr_qp->buff_size, 0,
- 0);
- if (IS_ERR(hr_qp->umem)) {
- dev_err(dev, "ib_umem_get error for create qp\n");
- ret = PTR_ERR(hr_qp->umem);
- goto err_out;
- }
+err_inline:
- ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(hr_qp->umem),
- ilog2((unsigned int)hr_qp->umem->page_size),
- &hr_qp->mtt);
- if (ret) {
- dev_err(dev, "hns_roce_mtt_init error for create qp\n");
- goto err_buf;
- }
+ return ret;
+}
- ret = hns_roce_ib_umem_write_mtt(hr_dev, &hr_qp->mtt,
- hr_qp->umem);
- if (ret) {
- dev_err(dev, "hns_roce_ib_umem_write_mtt error for create qp\n");
- goto err_mtt;
- }
- } else {
- if (init_attr->create_flags &
- IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
- dev_err(dev, "init_attr->create_flags error!\n");
- ret = -EINVAL;
- goto err_out;
- }
+static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+{
+ hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr);
+}
- if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) {
- dev_err(dev, "init_attr->create_flags error!\n");
- ret = -EINVAL;
- goto err_out;
- }
+static inline bool user_qp_has_sdb(struct hns_roce_dev *hr_dev,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp_resp *resp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) &&
+ udata->outlen >= offsetofend(typeof(*resp), cap_flags) &&
+ hns_roce_qp_has_sq(init_attr) &&
+ udata->inlen >= offsetofend(typeof(*ucmd), sdb_addr));
+}
- /* Set SQ size */
- ret = hns_roce_set_kernel_sq_size(hr_dev, &init_attr->cap,
- hr_qp);
- if (ret) {
- dev_err(dev, "hns_roce_set_kernel_sq_size error!\n");
- goto err_out;
- }
+static inline bool user_qp_has_rdb(struct hns_roce_dev *hr_dev,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp_resp *resp)
+{
+ return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) &&
+ udata->outlen >= offsetofend(typeof(*resp), cap_flags) &&
+ hns_roce_qp_has_rq(init_attr));
+}
- /* QP doorbell register address */
- hr_qp->sq.db_reg_l = hr_dev->reg_base + ROCEE_DB_SQ_L_0_REG +
- DB_REG_OFFSET * hr_dev->priv_uar.index;
- hr_qp->rq.db_reg_l = hr_dev->reg_base +
- ROCEE_DB_OTHERS_L_0_REG +
- DB_REG_OFFSET * hr_dev->priv_uar.index;
-
- /* Allocate QP buf */
- if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, PAGE_SIZE * 2,
- &hr_qp->hr_buf)) {
- dev_err(dev, "hns_roce_buf_alloc error!\n");
- ret = -ENOMEM;
+static inline bool kernel_qp_has_rdb(struct hns_roce_dev *hr_dev,
+ struct ib_qp_init_attr *init_attr)
+{
+ return ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) &&
+ hns_roce_qp_has_rq(init_attr));
+}
+
+static int qp_mmap_entry(struct hns_roce_qp *hr_qp,
+ struct hns_roce_dev *hr_dev,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp_resp *resp)
+{
+ struct hns_roce_ucontext *uctx =
+ rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
+ struct rdma_user_mmap_entry *rdma_entry;
+ u64 address;
+
+ address = hr_dev->dwqe_page + hr_qp->qpn * HNS_ROCE_DWQE_SIZE;
+
+ hr_qp->dwqe_mmap_entry =
+ hns_roce_user_mmap_entry_insert(&uctx->ibucontext, address,
+ HNS_ROCE_DWQE_SIZE,
+ HNS_ROCE_MMAP_TYPE_DWQE);
+
+ if (!hr_qp->dwqe_mmap_entry) {
+ ibdev_err(&hr_dev->ib_dev, "failed to get dwqe mmap entry.\n");
+ return -ENOMEM;
+ }
+
+ rdma_entry = &hr_qp->dwqe_mmap_entry->rdma_entry;
+ resp->dwqe_mmap_key = rdma_user_mmap_get_offset(rdma_entry);
+
+ return 0;
+}
+
+static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp *ucmd,
+ struct hns_roce_ib_create_qp_resp *resp)
+{
+ bool has_sdb = user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd);
+ struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
+ bool has_rdb = user_qp_has_rdb(hr_dev, init_attr, udata, resp);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ if (has_sdb) {
+ ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to map user SQ doorbell, ret = %d.\n",
+ ret);
goto err_out;
}
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB;
+ }
- /* Write MTT */
- ret = hns_roce_mtt_init(hr_dev, hr_qp->hr_buf.npages,
- hr_qp->hr_buf.page_shift, &hr_qp->mtt);
+ if (has_rdb) {
+ ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb);
if (ret) {
- dev_err(dev, "hns_roce_mtt_init error for kernel create qp\n");
- goto err_buf;
+ ibdev_err(ibdev,
+ "failed to map user RQ doorbell, ret = %d.\n",
+ ret);
+ goto err_sdb;
}
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
+ }
- ret = hns_roce_buf_write_mtt(hr_dev, &hr_qp->mtt,
- &hr_qp->hr_buf);
+ return 0;
+
+err_sdb:
+ if (has_sdb)
+ hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
+err_out:
+ return ret;
+}
+
+static int alloc_kernel_qp_db(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ hr_qp->sq.db_reg = hr_dev->mem_base +
+ HNS_ROCE_DWQE_SIZE * hr_qp->qpn;
+ else
+ hr_qp->sq.db_reg = hr_dev->reg_base + hr_dev->sdb_offset +
+ DB_REG_OFFSET * hr_dev->priv_uar.index;
+
+ hr_qp->rq.db_reg = hr_dev->reg_base + hr_dev->odb_offset +
+ DB_REG_OFFSET * hr_dev->priv_uar.index;
+
+ if (kernel_qp_has_rdb(hr_dev, init_attr)) {
+ ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0);
if (ret) {
- dev_err(dev, "hns_roce_buf_write_mtt error for kernel create qp\n");
- goto err_mtt;
+ ibdev_err(ibdev,
+ "failed to alloc kernel RQ doorbell, ret = %d.\n",
+ ret);
+ return ret;
}
+ *hr_qp->rdb.db_record = 0;
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB;
+ }
- hr_qp->sq.wrid = kmalloc_array(hr_qp->sq.wqe_cnt, sizeof(u64),
- GFP_KERNEL);
- hr_qp->rq.wrid = kmalloc_array(hr_qp->rq.wqe_cnt, sizeof(u64),
- GFP_KERNEL);
- if (!hr_qp->sq.wrid || !hr_qp->rq.wrid) {
- ret = -ENOMEM;
- goto err_wrid;
+ return 0;
+}
+
+static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp *ucmd,
+ struct hns_roce_ib_create_qp_resp *resp)
+{
+ int ret;
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SDI_MODE)
+ hr_qp->en_flags |= HNS_ROCE_QP_CAP_OWNER_DB;
+
+ if (udata) {
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE) {
+ ret = qp_mmap_entry(hr_qp, hr_dev, udata, resp);
+ if (ret)
+ return ret;
}
- }
- if (sqpn) {
- qpn = sqpn;
+ ret = alloc_user_qp_db(hr_dev, hr_qp, init_attr, udata, ucmd,
+ resp);
+ if (ret)
+ goto err_remove_qp;
} else {
- /* Get QPN */
- ret = hns_roce_reserve_range_qp(hr_dev, 1, 1, &qpn);
- if (ret) {
- dev_err(dev, "hns_roce_reserve_range_qp alloc qpn error\n");
- goto err_wrid;
- }
+ ret = alloc_kernel_qp_db(hr_dev, hr_qp, init_attr);
+ if (ret)
+ return ret;
}
- if ((init_attr->qp_type) == IB_QPT_GSI) {
- ret = hns_roce_gsi_qp_alloc(hr_dev, qpn, hr_qp);
- if (ret) {
- dev_err(dev, "hns_roce_qp_alloc failed!\n");
- goto err_qpn;
- }
+ return 0;
+
+err_remove_qp:
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
+ qp_user_mmap_entry_remove(hr_qp);
+
+ return ret;
+}
+
+static void free_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_udata *udata)
+{
+ struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(
+ udata, struct hns_roce_ucontext, ibucontext);
+
+ if (udata) {
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
+ hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
+ hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)
+ qp_user_mmap_entry_remove(hr_qp);
} else {
- ret = hns_roce_qp_alloc(hr_dev, qpn, hr_qp);
- if (ret) {
- dev_err(dev, "hns_roce_qp_alloc failed!\n");
- goto err_qpn;
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
+ hns_roce_free_db(hr_dev, &hr_qp->rdb);
+ }
+}
+
+static int alloc_kernel_wrid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ u64 *sq_wrid = NULL;
+ u64 *rq_wrid = NULL;
+ int ret;
+
+ sq_wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL);
+ if (!sq_wrid) {
+ ibdev_err(ibdev, "failed to alloc SQ wrid.\n");
+ return -ENOMEM;
+ }
+
+ if (hr_qp->rq.wqe_cnt) {
+ rq_wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), GFP_KERNEL);
+ if (!rq_wrid) {
+ ibdev_err(ibdev, "failed to alloc RQ wrid.\n");
+ ret = -ENOMEM;
+ goto err_sq;
}
}
- if (sqpn)
- hr_qp->doorbell_qpn = 1;
+ hr_qp->sq.wrid = sq_wrid;
+ hr_qp->rq.wrid = rq_wrid;
+ return 0;
+err_sq:
+ kfree(sq_wrid);
+
+ return ret;
+}
+
+static void free_kernel_wrid(struct hns_roce_qp *hr_qp)
+{
+ kfree(hr_qp->rq.wrid);
+ kfree(hr_qp->sq.wrid);
+}
+
+static void default_congest_type(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ if (hr_qp->ibqp.qp_type == IB_QPT_UD ||
+ hr_qp->ibqp.qp_type == IB_QPT_GSI)
+ hr_qp->cong_type = CONG_TYPE_DCQCN;
else
- hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn);
+ hr_qp->cong_type = hr_dev->caps.default_cong_type;
+}
- hr_qp->event = hns_roce_ib_qp_event;
+static int set_congest_type(struct hns_roce_qp *hr_qp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
+
+ switch (ucmd->cong_type_flags) {
+ case HNS_ROCE_CREATE_QP_FLAGS_DCQCN:
+ hr_qp->cong_type = CONG_TYPE_DCQCN;
+ break;
+ case HNS_ROCE_CREATE_QP_FLAGS_LDCP:
+ hr_qp->cong_type = CONG_TYPE_LDCP;
+ break;
+ case HNS_ROCE_CREATE_QP_FLAGS_HC3:
+ hr_qp->cong_type = CONG_TYPE_HC3;
+ break;
+ case HNS_ROCE_CREATE_QP_FLAGS_DIP:
+ hr_qp->cong_type = CONG_TYPE_DIP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!test_bit(hr_qp->cong_type, (unsigned long *)&hr_dev->caps.cong_cap))
+ return -EOPNOTSUPP;
+
+ if (hr_qp->ibqp.qp_type == IB_QPT_UD &&
+ hr_qp->cong_type != CONG_TYPE_DCQCN)
+ return -EOPNOTSUPP;
return 0;
+}
-err_qpn:
- if (!sqpn)
- hns_roce_release_range_qp(hr_dev, qpn, 1);
+static int set_congest_param(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ if (ucmd->comp_mask & HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE)
+ return set_congest_type(hr_qp, ucmd);
-err_wrid:
- kfree(hr_qp->sq.wrid);
- kfree(hr_qp->rq.wrid);
+ default_congest_type(hr_dev, hr_qp);
-err_mtt:
- hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
+ return 0;
+}
-err_buf:
- if (ib_pd->uobject)
- ib_umem_release(hr_qp->umem);
+static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_ucontext *uctx;
+ int ret;
+
+ if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
else
- hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
+ hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
+
+ ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp,
+ hns_roce_qp_has_rq(init_attr), !!udata);
+ if (ret) {
+ ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ if (udata) {
+ ret = ib_copy_from_udata(ucmd, udata,
+ min(udata->inlen, sizeof(*ucmd)));
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to copy QP ucmd, ret = %d\n", ret);
+ return ret;
+ }
+
+ uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext,
+ ibucontext);
+ hr_qp->config = uctx->config;
+ ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to set user SQ size, ret = %d.\n",
+ ret);
+ return ret;
+ }
+
+ ret = set_congest_param(hr_dev, hr_qp, ucmd);
+ } else {
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
+ default_congest_type(hr_dev, hr_qp);
+ ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
+ if (ret)
+ ibdev_err(ibdev,
+ "failed to set kernel SQ size, ret = %d.\n",
+ ret);
+ }
-err_out:
return ret;
}
-struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata,
+ struct hns_roce_qp *hr_qp)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
- struct device *dev = &hr_dev->pdev->dev;
- struct hns_roce_sqp *hr_sqp;
- struct hns_roce_qp *hr_qp;
+ struct hns_roce_work *flush_work = &hr_qp->flush_work;
+ struct hns_roce_ib_create_qp_resp resp = {};
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_ib_create_qp ucmd = {};
int ret;
- switch (init_attr->qp_type) {
- case IB_QPT_RC: {
- hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
- if (!hr_qp)
- return ERR_PTR(-ENOMEM);
+ mutex_init(&hr_qp->mutex);
+ spin_lock_init(&hr_qp->sq.lock);
+ spin_lock_init(&hr_qp->rq.lock);
+ spin_lock_init(&hr_qp->flush_lock);
+
+ hr_qp->state = IB_QPS_RESET;
+ hr_qp->flush_flag = 0;
+ flush_work->hr_dev = hr_dev;
+ INIT_WORK(&flush_work->work, flush_work_handle);
+
+ if (init_attr->create_flags)
+ return -EOPNOTSUPP;
+
+ ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd);
+ if (ret) {
+ ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret);
+ goto err_out;
+ }
- ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0,
- hr_qp);
+ if (!udata) {
+ ret = alloc_kernel_wrid(hr_dev, hr_qp);
if (ret) {
- dev_err(dev, "Create RC QP failed\n");
- kfree(hr_qp);
- return ERR_PTR(ret);
+ ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n",
+ ret);
+ goto err_out;
}
+ }
- hr_qp->ibqp.qp_num = hr_qp->qpn;
+ ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret);
+ goto err_buf;
+ }
- break;
+ ret = alloc_qpn(hr_dev, hr_qp, init_attr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret);
+ goto err_qpn;
}
- case IB_QPT_GSI: {
- /* Userspace is not allowed to create special QPs: */
- if (pd->uobject) {
- dev_err(dev, "not support usr space GSI\n");
- return ERR_PTR(-EINVAL);
- }
- hr_sqp = kzalloc(sizeof(*hr_sqp), GFP_KERNEL);
- if (!hr_sqp)
- return ERR_PTR(-ENOMEM);
+ ret = alloc_qp_db(hr_dev, hr_qp, init_attr, udata, &ucmd, &resp);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc QP doorbell, ret = %d.\n",
+ ret);
+ goto err_db;
+ }
- hr_qp = &hr_sqp->hr_qp;
- hr_qp->port = init_attr->port_num - 1;
- hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
- hr_qp->ibqp.qp_num = HNS_ROCE_MAX_PORTS +
- hr_dev->iboe.phy_port[hr_qp->port];
+ ret = alloc_qpc(hr_dev, hr_qp);
+ if (ret) {
+ ibdev_err(ibdev, "failed to alloc QP context, ret = %d.\n",
+ ret);
+ goto err_qpc;
+ }
+
+ ret = hns_roce_qp_store(hr_dev, hr_qp, init_attr);
+ if (ret) {
+ ibdev_err(ibdev, "failed to store QP, ret = %d.\n", ret);
+ goto err_store;
+ }
- ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
- hr_qp->ibqp.qp_num, hr_qp);
+ if (udata) {
+ resp.cap_flags = hr_qp->en_flags;
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
if (ret) {
- dev_err(dev, "Create GSI QP failed!\n");
- kfree(hr_sqp);
- return ERR_PTR(ret);
+ ibdev_err(ibdev, "copy qp resp failed!\n");
+ goto err_flow_ctrl;
}
+ }
- break;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
+ ret = hr_dev->hw->qp_flow_control_init(hr_dev, hr_qp);
+ if (ret)
+ goto err_flow_ctrl;
}
- default:{
- dev_err(dev, "not support QP type %d\n", init_attr->qp_type);
- return ERR_PTR(-EINVAL);
+
+ hr_qp->ibqp.qp_num = hr_qp->qpn;
+ hr_qp->event = hns_roce_ib_qp_event;
+ refcount_set(&hr_qp->refcount, 1);
+ init_completion(&hr_qp->free);
+
+ return 0;
+
+err_flow_ctrl:
+ hns_roce_qp_remove(hr_dev, hr_qp);
+err_store:
+ free_qpc(hr_dev, hr_qp);
+err_qpc:
+ free_qp_db(hr_dev, hr_qp, udata);
+err_db:
+ free_qpn(hr_dev, hr_qp);
+err_qpn:
+ free_qp_buf(hr_dev, hr_qp);
+err_buf:
+ free_kernel_wrid(hr_qp);
+err_out:
+ mutex_destroy(&hr_qp->mutex);
+ return ret;
+}
+
+void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_udata *udata)
+{
+ if (refcount_dec_and_test(&hr_qp->refcount))
+ complete(&hr_qp->free);
+ wait_for_completion(&hr_qp->free);
+
+ free_qpc(hr_dev, hr_qp);
+ free_qpn(hr_dev, hr_qp);
+ free_qp_buf(hr_dev, hr_qp);
+ free_kernel_wrid(hr_qp);
+ free_qp_db(hr_dev, hr_qp, udata);
+ mutex_destroy(&hr_qp->mutex);
+}
+
+static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type,
+ bool is_user)
+{
+ switch (type) {
+ case IB_QPT_XRC_INI:
+ case IB_QPT_XRC_TGT:
+ if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC))
+ goto out;
+ break;
+ case IB_QPT_UD:
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 &&
+ is_user)
+ goto out;
+ break;
+ case IB_QPT_RC:
+ case IB_QPT_GSI:
+ break;
+ default:
+ goto out;
}
+
+ return 0;
+
+out:
+ ibdev_err(&hr_dev->ib_dev, "not support QP type %d\n", type);
+
+ return -EOPNOTSUPP;
+}
+
+int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct ib_device *ibdev = qp->device;
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ struct hns_roce_qp *hr_qp = to_hr_qp(qp);
+ int ret;
+
+ ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata);
+ if (ret)
+ goto err_out;
+
+ if (init_attr->qp_type == IB_QPT_XRC_TGT)
+ hr_qp->xrcdn = to_hr_xrcd(init_attr->xrcd)->xrcdn;
+
+ if (init_attr->qp_type == IB_QPT_GSI) {
+ hr_qp->port = init_attr->port_num - 1;
+ hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
}
- return &hr_qp->ibqp;
+ ret = hns_roce_create_qp_common(hr_dev, init_attr, udata, hr_qp);
+ if (ret)
+ ibdev_err(ibdev, "create QP type %d failed(%d)\n",
+ init_attr->qp_type, ret);
+
+err_out:
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_CREATE_ERR_CNT]);
+
+ return ret;
}
int to_hr_qp_type(int qp_type)
{
- int transport_type;
+ switch (qp_type) {
+ case IB_QPT_RC:
+ return SERV_TYPE_RC;
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return SERV_TYPE_UD;
+ case IB_QPT_XRC_INI:
+ case IB_QPT_XRC_TGT:
+ return SERV_TYPE_XRC;
+ default:
+ return -1;
+ }
+}
- if (qp_type == IB_QPT_RC)
- transport_type = SERV_TYPE_RC;
- else if (qp_type == IB_QPT_UC)
- transport_type = SERV_TYPE_UC;
- else if (qp_type == IB_QPT_UD)
- transport_type = SERV_TYPE_UD;
- else if (qp_type == IB_QPT_GSI)
- transport_type = SERV_TYPE_UD;
- else
- transport_type = -1;
+static int check_mtu_validate(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct ib_qp_attr *attr, int attr_mask)
+{
+ struct net_device *net_dev;
+ enum ib_mtu active_mtu;
+ int p;
+
+ p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
+ net_dev = get_hr_netdev(hr_dev, p);
+ active_mtu = iboe_get_mtu(net_dev->mtu);
- return transport_type;
+ if ((hr_dev->caps.max_mtu >= IB_MTU_2048 &&
+ attr->path_mtu > hr_dev->caps.max_mtu) ||
+ attr->path_mtu < IB_MTU_256 || attr->path_mtu > active_mtu) {
+ ibdev_err(&hr_dev->ib_dev,
+ "attr path_mtu(%d)invalid while modify qp",
+ attr->path_mtu);
+ return -EINVAL;
+ }
+
+ return 0;
}
-int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
+static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- enum ib_qp_state cur_state, new_state;
- struct device *dev = &hr_dev->pdev->dev;
- int ret = -EINVAL;
int p;
- enum ib_mtu active_mtu;
-
- mutex_lock(&hr_qp->mutex);
-
- cur_state = attr_mask & IB_QP_CUR_STATE ?
- attr->cur_qp_state : (enum ib_qp_state)hr_qp->state;
- new_state = attr_mask & IB_QP_STATE ?
- attr->qp_state : cur_state;
-
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_ETHERNET)) {
- dev_err(dev, "ib_modify_qp_is_ok failed\n");
- goto out;
- }
if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) {
- dev_err(dev, "attr port_num invalid.attr->port_num=%d\n",
- attr->port_num);
- goto out;
+ ibdev_err(&hr_dev->ib_dev, "invalid attr, port_num = %u.\n",
+ attr->port_num);
+ return -EINVAL;
}
if (attr_mask & IB_QP_PKEY_INDEX) {
p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) {
- dev_err(dev, "attr pkey_index invalid.attr->pkey_index=%d\n",
- attr->pkey_index);
- goto out;
- }
- }
-
- if (attr_mask & IB_QP_PATH_MTU) {
- p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
- active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu);
-
- if (attr->path_mtu > IB_MTU_2048 ||
- attr->path_mtu < IB_MTU_256 ||
- attr->path_mtu > active_mtu) {
- dev_err(dev, "attr path_mtu(%d)invalid while modify qp",
- attr->path_mtu);
- goto out;
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid attr, pkey_index = %u.\n",
+ attr->pkey_index);
+ return -EINVAL;
}
}
if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) {
- dev_err(dev, "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n",
- attr->max_rd_atomic);
- goto out;
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid attr, max_rd_atomic = %u.\n",
+ attr->max_rd_atomic);
+ return -EINVAL;
}
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) {
- dev_err(dev, "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n",
- attr->max_dest_rd_atomic);
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid attr, max_dest_rd_atomic = %u.\n",
+ attr->max_dest_rd_atomic);
+ return -EINVAL;
+ }
+
+ if (attr_mask & IB_QP_PATH_MTU)
+ return check_mtu_validate(hr_dev, hr_qp, attr, attr_mask);
+
+ return 0;
+}
+
+int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_ib_modify_qp_resp resp = {};
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ enum ib_qp_state cur_state, new_state;
+ int ret = -EINVAL;
+
+ mutex_lock(&hr_qp->mutex);
+
+ if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state)
goto out;
+
+ cur_state = hr_qp->state;
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+
+ if (ibqp->uobject &&
+ (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) {
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) {
+ hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
+
+ if (hr_qp->en_flags & HNS_ROCE_QP_CAP_RQ_RECORD_DB)
+ hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
+ } else {
+ ibdev_warn(&hr_dev->ib_dev,
+ "flush cqe is not supported in userspace!\n");
+ goto out;
+ }
}
- if (cur_state == new_state && cur_state == IB_QPS_RESET) {
- ret = -EPERM;
- dev_err(dev, "cur_state=%d new_state=%d\n", cur_state,
- new_state);
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask)) {
+ ibdev_err(&hr_dev->ib_dev, "ib_modify_qp_is_ok failed\n");
goto out;
}
+ ret = hns_roce_check_qp_attr(ibqp, attr, attr_mask);
+ if (ret)
+ goto out;
+
+ if (cur_state == new_state && cur_state == IB_QPS_RESET)
+ goto out;
+
ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state,
- new_state);
+ new_state, udata);
+ if (ret)
+ goto out;
+
+ if (udata && udata->outlen) {
+ resp.tc_mode = hr_qp->tc_mode;
+ resp.priority = hr_qp->sl;
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ if (ret)
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to copy modify qp resp.\n");
+ }
out:
mutex_unlock(&hr_qp->mutex);
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_MODIFY_ERR_CNT]);
return ret;
}
@@ -735,14 +1483,23 @@ out:
void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
__acquires(&send_cq->lock) __acquires(&recv_cq->lock)
{
- if (send_cq == recv_cq) {
- spin_lock_irq(&send_cq->lock);
+ if (unlikely(send_cq == NULL && recv_cq == NULL)) {
+ __acquire(&send_cq->lock);
+ __acquire(&recv_cq->lock);
+ } else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
+ spin_lock(&send_cq->lock);
+ __acquire(&recv_cq->lock);
+ } else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
+ spin_lock(&recv_cq->lock);
+ __acquire(&send_cq->lock);
+ } else if (send_cq == recv_cq) {
+ spin_lock(&send_cq->lock);
__acquire(&recv_cq->lock);
} else if (send_cq->cqn < recv_cq->cqn) {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
} else {
- spin_lock_irq(&recv_cq->lock);
+ spin_lock(&recv_cq->lock);
spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
}
}
@@ -751,82 +1508,88 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
struct hns_roce_cq *recv_cq) __releases(&send_cq->lock)
__releases(&recv_cq->lock)
{
- if (send_cq == recv_cq) {
+ if (unlikely(send_cq == NULL && recv_cq == NULL)) {
+ __release(&recv_cq->lock);
+ __release(&send_cq->lock);
+ } else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
+ __release(&recv_cq->lock);
+ spin_unlock(&send_cq->lock);
+ } else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
+ __release(&send_cq->lock);
+ spin_unlock(&recv_cq->lock);
+ } else if (send_cq == recv_cq) {
__release(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
} else if (send_cq->cqn < recv_cq->cqn) {
spin_unlock(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
} else {
spin_unlock(&send_cq->lock);
- spin_unlock_irq(&recv_cq->lock);
+ spin_unlock(&recv_cq->lock);
}
}
-__be32 send_ieth(struct ib_send_wr *wr)
+static inline void *get_wqe(struct hns_roce_qp *hr_qp, u32 offset)
{
- switch (wr->opcode) {
- case IB_WR_SEND_WITH_IMM:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- return cpu_to_le32(wr->ex.imm_data);
- case IB_WR_SEND_WITH_INV:
- return cpu_to_le32(wr->ex.invalidate_rkey);
- default:
- return 0;
- }
+ return hns_roce_buf_offset(hr_qp->mtr.kmem, offset);
}
-static void *get_wqe(struct hns_roce_qp *hr_qp, int offset)
+void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, unsigned int n)
{
-
- return hns_roce_buf_offset(&hr_qp->hr_buf, offset);
+ return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift));
}
-void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n)
+void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n)
{
- return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift));
+ return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift));
}
-void *get_send_wqe(struct hns_roce_qp *hr_qp, int n)
+void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n)
{
- return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift));
+ return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift));
}
-bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq,
+bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq,
struct ib_cq *ib_cq)
{
struct hns_roce_cq *hr_cq;
u32 cur;
cur = hr_wq->head - hr_wq->tail;
- if (likely(cur + nreq < hr_wq->max_post))
- return 0;
+ if (likely(cur + nreq < hr_wq->wqe_cnt))
+ return false;
hr_cq = to_hr_cq(ib_cq);
spin_lock(&hr_cq->lock);
cur = hr_wq->head - hr_wq->tail;
spin_unlock(&hr_cq->lock);
- return cur + nreq >= hr_wq->max_post;
+ return cur + nreq >= hr_wq->wqe_cnt;
}
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
- int reserved_from_top = 0;
- int ret;
+ unsigned int reserved_from_bot;
+ unsigned int i;
- spin_lock_init(&qp_table->lock);
- INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC);
+ mutex_init(&qp_table->scc_mutex);
+ mutex_init(&qp_table->bank_mutex);
+ xa_init(&hr_dev->qp_table_xa);
+ xa_init(&qp_table->dip_xa);
- /* A port include two SQP, six port total 12 */
- ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps,
- hr_dev->caps.num_qps - 1, SQP_NUM,
- reserved_from_top);
- if (ret) {
- dev_err(&hr_dev->pdev->dev, "qp bitmap init failed!error=%d\n",
- ret);
- return ret;
+ reserved_from_bot = hr_dev->caps.reserved_qps;
+
+ for (i = 0; i < reserved_from_bot; i++) {
+ hr_dev->qp_table.bank[get_qp_bankid(i)].inuse++;
+ hr_dev->qp_table.bank[get_qp_bankid(i)].min++;
+ }
+
+ for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) {
+ ida_init(&hr_dev->qp_table.bank[i].ida);
+ hr_dev->qp_table.bank[i].max = hr_dev->caps.num_qps /
+ HNS_ROCE_QP_BANK_NUM - 1;
+ hr_dev->qp_table.bank[i].next = hr_dev->qp_table.bank[i].min;
}
return 0;
@@ -834,5 +1597,12 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
{
- hns_roce_bitmap_cleanup(&hr_dev->qp_table.bitmap);
+ int i;
+
+ for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++)
+ ida_destroy(&hr_dev->qp_table.bank[i].ida);
+ xa_destroy(&hr_dev->qp_table.dip_xa);
+ xa_destroy(&hr_dev->qp_table_xa);
+ mutex_destroy(&hr_dev->qp_table.bank_mutex);
+ mutex_destroy(&hr_dev->qp_table.scc_mutex);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c
new file mode 100644
index 000000000000..230187dda6a0
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+// Copyright (c) 2019 Hisilicon Limited.
+
+#include <rdma/rdma_cm.h>
+#include <rdma/restrack.h>
+#include <uapi/rdma/rdma_netlink.h>
+#include "hns_roce_common.h"
+#include "hns_roce_device.h"
+#include "hns_roce_hw_v2.h"
+
+int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq)
+{
+ struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32(msg, "cq_depth", hr_cq->cq_depth))
+ goto err;
+
+ if (rdma_nl_put_driver_u32(msg, "cons_index", hr_cq->cons_index))
+ goto err;
+
+ if (rdma_nl_put_driver_u32(msg, "cqe_size", hr_cq->cqe_size))
+ goto err;
+
+ if (rdma_nl_put_driver_u32(msg, "arm_sn", hr_cq->arm_sn))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+
+ return -EMSGSIZE;
+}
+
+int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
+ struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+ struct hns_roce_v2_cq_context context;
+ int ret;
+
+ if (!hr_dev->hw->query_cqc)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_cqc(hr_dev, hr_cq->cqn, &context);
+ if (ret)
+ return -EINVAL;
+
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
+
+ return ret;
+}
+
+int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp)
+{
+ struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp);
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "sq_wqe_cnt", hr_qp->sq.wqe_cnt))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "sq_max_gs", hr_qp->sq.max_gs))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "rq_wqe_cnt", hr_qp->rq.wqe_cnt))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "rq_max_gs", hr_qp->rq.max_gs))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "ext_sge_sge_cnt", hr_qp->sge.sge_cnt))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+
+ return -EMSGSIZE;
+}
+
+int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_qp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp);
+ struct hns_roce_full_qp_ctx {
+ struct hns_roce_v2_qp_context qpc;
+ struct hns_roce_v2_scc_context sccc;
+ } context = {};
+ u32 sccn = hr_qp->qpn;
+ int ret;
+
+ if (!hr_dev->hw->query_qpc)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_qpc(hr_dev, hr_qp->qpn, &context.qpc);
+ if (ret)
+ return ret;
+
+ /* If SCC is disabled or the query fails, the queried SCCC will
+ * be all 0.
+ */
+ if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) ||
+ !hr_dev->hw->query_sccc)
+ goto out;
+
+ if (hr_qp->cong_type == CONG_TYPE_DIP) {
+ if (!hr_qp->dip)
+ goto out;
+ sccn = hr_qp->dip->dip_idx;
+ }
+
+ ret = hr_dev->hw->query_sccc(hr_dev, sccn, &context.sccc);
+ if (ret)
+ ibdev_warn_ratelimited(&hr_dev->ib_dev,
+ "failed to query SCCC, ret = %d.\n",
+ ret);
+
+out:
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
+
+ return ret;
+}
+
+int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr)
+{
+ struct hns_roce_mr *hr_mr = to_hr_mr(ib_mr);
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "pbl_hop_num", hr_mr->pbl_hop_num))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "ba_pg_shift",
+ hr_mr->pbl_mtr.hem_cfg.ba_pg_shift))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "buf_pg_shift",
+ hr_mr->pbl_mtr.hem_cfg.buf_pg_shift))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+
+ return -EMSGSIZE;
+}
+
+int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_mr->device);
+ struct hns_roce_mr *hr_mr = to_hr_mr(ib_mr);
+ struct hns_roce_v2_mpt_entry context;
+ int ret;
+
+ if (!hr_dev->hw->query_mpt)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_mpt(hr_dev, hr_mr->key, &context);
+ if (ret)
+ return -EINVAL;
+
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
+
+ return ret;
+}
+
+int hns_roce_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq)
+{
+ struct hns_roce_srq *hr_srq = to_hr_srq(ib_srq);
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "srqn", hr_srq->srqn))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "wqe_cnt", hr_srq->wqe_cnt))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "max_gs", hr_srq->max_gs))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "xrcdn", hr_srq->xrcdn))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+int hns_roce_fill_res_srq_entry_raw(struct sk_buff *msg, struct ib_srq *ib_srq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device);
+ struct hns_roce_srq *hr_srq = to_hr_srq(ib_srq);
+ struct hns_roce_srq_context context;
+ int ret;
+
+ if (!hr_dev->hw->query_srqc)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_srqc(hr_dev, hr_srq->srqn, &context);
+ if (ret)
+ return ret;
+
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
new file mode 100644
index 000000000000..8a6efb6b9c9e
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -0,0 +1,549 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018 Hisilicon Limited.
+ */
+
+#include <rdma/ib_umem.h>
+#include <rdma/uverbs_ioctl.h>
+#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
+#include "hns_roce_hem.h"
+
+void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct hns_roce_srq *srq;
+
+ xa_lock(&srq_table->xa);
+ srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1));
+ if (srq)
+ refcount_inc(&srq->refcount);
+ xa_unlock(&srq_table->xa);
+
+ if (!srq) {
+ dev_warn(hr_dev->dev, "Async event for bogus SRQ %08x\n", srqn);
+ return;
+ }
+
+ srq->event(srq, event_type);
+
+ if (refcount_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+}
+
+static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
+ enum hns_roce_event event_type)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ struct ib_srq *ibsrq = &srq->ibsrq;
+ struct ib_event event;
+
+ if (ibsrq->event_handler) {
+ event.device = ibsrq->device;
+ event.element.srq = ibsrq;
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ event.event = IB_EVENT_SRQ_ERR;
+ break;
+ default:
+ dev_err(hr_dev->dev,
+ "hns_roce:Unexpected event type %d on SRQ %06lx\n",
+ event_type, srq->srqn);
+ return;
+ }
+
+ ibsrq->event_handler(&event, ibsrq->srq_context);
+ }
+}
+
+static int alloc_srqn(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ struct hns_roce_ida *srq_ida = &hr_dev->srq_table.srq_ida;
+ int id;
+
+ id = ida_alloc_range(&srq_ida->ida, srq_ida->min, srq_ida->max,
+ GFP_KERNEL);
+ if (id < 0) {
+ ibdev_err(&hr_dev->ib_dev, "failed to alloc srq(%d).\n", id);
+ return -ENOMEM;
+ }
+
+ srq->srqn = id;
+
+ return 0;
+}
+
+static void free_srqn(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ ida_free(&hr_dev->srq_table.srq_ida.ida, (int)srq->srqn);
+}
+
+static int hns_roce_create_srqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n");
+ return PTR_ERR(mailbox);
+ }
+
+ ret = hr_dev->hw->write_srqc(srq, mailbox->buf);
+ if (ret) {
+ ibdev_err(ibdev, "failed to write SRQC.\n");
+ goto err_mbox;
+ }
+
+ ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_SRQ,
+ srq->srqn);
+ if (ret)
+ ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret);
+
+err_mbox:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn);
+ if (ret) {
+ ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = xa_err(xa_store_irq(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
+ if (ret) {
+ ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret);
+ goto err_put;
+ }
+
+ ret = hns_roce_create_srqc(hr_dev, srq);
+ if (ret)
+ goto err_xa;
+
+ return 0;
+
+err_xa:
+ xa_erase_irq(&srq_table->xa, srq->srqn);
+err_put:
+ hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
+
+ return ret;
+}
+
+static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ int ret;
+
+ ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ,
+ srq->srqn);
+ if (ret)
+ dev_err_ratelimited(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
+ ret, srq->srqn);
+
+ xa_erase_irq(&srq_table->xa, srq->srqn);
+
+ if (refcount_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+ wait_for_completion(&srq->free);
+
+ hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
+}
+
+static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata, unsigned long addr)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
+ int ret;
+
+ srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ);
+
+ buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + PAGE_SHIFT;
+ buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
+ srq->idx_que.entry_shift);
+ buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num;
+ buf_attr.region_count = 1;
+
+ ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr,
+ hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT,
+ udata, addr);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc SRQ idx mtr, ret = %d.\n", ret);
+ return ret;
+ }
+
+ if (!udata) {
+ idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL);
+ if (!idx_que->bitmap) {
+ ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n");
+ ret = -ENOMEM;
+ goto err_idx_mtr;
+ }
+ }
+
+ idx_que->head = 0;
+ idx_que->tail = 0;
+
+ return 0;
+err_idx_mtr:
+ hns_roce_mtr_destroy(hr_dev, &idx_que->mtr);
+
+ return ret;
+}
+
+static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+
+ bitmap_free(idx_que->bitmap);
+ idx_que->bitmap = NULL;
+ hns_roce_mtr_destroy(hr_dev, &idx_que->mtr);
+}
+
+static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq,
+ struct ib_udata *udata, unsigned long addr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ struct hns_roce_buf_attr buf_attr = {};
+ int ret;
+
+ srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE,
+ HNS_ROCE_SGE_SIZE *
+ srq->max_gs)));
+
+ buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + PAGE_SHIFT;
+ buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt,
+ srq->wqe_shift);
+ buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num;
+ buf_attr.region_count = 1;
+
+ ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr,
+ hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT,
+ udata, addr);
+ if (ret)
+ ibdev_err(ibdev,
+ "failed to alloc SRQ buf mtr, ret = %d.\n", ret);
+
+ return ret;
+}
+
+static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq)
+{
+ hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr);
+}
+
+static int alloc_srq_wrid(struct hns_roce_srq *srq)
+{
+ srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL);
+ if (!srq->wrid)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void free_srq_wrid(struct hns_roce_srq *srq)
+{
+ kvfree(srq->wrid);
+ srq->wrid = NULL;
+}
+
+static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq,
+ bool user)
+{
+ u32 max_sge = dev->caps.max_srq_sges;
+
+ if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return max_sge;
+
+ /* Reserve SGEs only for HIP08 in kernel; The userspace driver will
+ * calculate number of max_sge with reserved SGEs when allocating wqe
+ * buf, so there is no need to do this again in kernel. But the number
+ * may exceed the capacity of SGEs recorded in the firmware, so the
+ * kernel driver should just adapt the value accordingly.
+ */
+ if (user)
+ max_sge = roundup_pow_of_two(max_sge + 1);
+ else
+ hr_srq->rsv_sge = 1;
+
+ return max_sge;
+}
+
+static int set_srq_basic_param(struct hns_roce_srq *srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ struct ib_srq_attr *attr = &init_attr->attr;
+ u32 max_sge;
+
+ max_sge = proc_srq_sge(hr_dev, srq, !!udata);
+ if (attr->max_wr > hr_dev->caps.max_srq_wrs ||
+ attr->max_sge > max_sge || !attr->max_sge) {
+ ibdev_err(&hr_dev->ib_dev,
+ "invalid SRQ attr, depth = %u, sge = %u.\n",
+ attr->max_wr, attr->max_sge);
+ return -EINVAL;
+ }
+
+ attr->max_wr = max_t(u32, attr->max_wr, HNS_ROCE_MIN_SRQ_WQE_NUM);
+ srq->wqe_cnt = roundup_pow_of_two(attr->max_wr);
+ srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge);
+
+ attr->max_wr = srq->wqe_cnt;
+ attr->max_sge = srq->max_gs - srq->rsv_sge;
+ attr->srq_limit = 0;
+
+ return 0;
+}
+
+static void set_srq_ext_param(struct hns_roce_srq *srq,
+ struct ib_srq_init_attr *init_attr)
+{
+ srq->cqn = ib_srq_has_cq(init_attr->srq_type) ?
+ to_hr_cq(init_attr->ext.cq)->cqn : 0;
+
+ srq->xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
+ to_hr_xrcd(init_attr->ext.xrc.xrcd)->xrcdn : 0;
+}
+
+static int set_srq_param(struct hns_roce_srq *srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ int ret;
+
+ ret = set_srq_basic_param(srq, init_attr, udata);
+ if (ret)
+ return ret;
+
+ set_srq_ext_param(srq, init_attr);
+
+ return 0;
+}
+
+static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata)
+{
+ struct hns_roce_ib_create_srq ucmd = {};
+ int ret;
+
+ if (udata) {
+ ret = ib_copy_from_udata(&ucmd, udata,
+ min(udata->inlen, sizeof(ucmd)));
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to copy SRQ udata, ret = %d.\n",
+ ret);
+ return ret;
+ }
+ }
+
+ ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr);
+ if (ret)
+ return ret;
+
+ ret = alloc_srq_wqe_buf(hr_dev, srq, udata, ucmd.buf_addr);
+ if (ret)
+ goto err_idx;
+
+ if (!udata) {
+ ret = alloc_srq_wrid(srq);
+ if (ret)
+ goto err_wqe_buf;
+ }
+
+ return 0;
+
+err_wqe_buf:
+ free_srq_wqe_buf(hr_dev, srq);
+err_idx:
+ free_srq_idx(hr_dev, srq);
+
+ return ret;
+}
+
+static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ free_srq_wrid(srq);
+ free_srq_wqe_buf(hr_dev, srq);
+ free_srq_idx(hr_dev, srq);
+}
+
+static int get_srq_ucmd(struct hns_roce_srq *srq, struct ib_udata *udata,
+ struct hns_roce_ib_create_srq *ucmd)
+{
+ struct ib_device *ibdev = srq->ibsrq.device;
+ int ret;
+
+ ret = ib_copy_from_udata(ucmd, udata, min(udata->inlen, sizeof(*ucmd)));
+ if (ret) {
+ ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void free_srq_db(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata)
+{
+ struct hns_roce_ucontext *uctx;
+
+ if (!(srq->cap_flags & HNS_ROCE_SRQ_CAP_RECORD_DB))
+ return;
+
+ srq->cap_flags &= ~HNS_ROCE_SRQ_CAP_RECORD_DB;
+ if (udata) {
+ uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext,
+ ibucontext);
+ hns_roce_db_unmap_user(uctx, &srq->rdb);
+ } else {
+ hns_roce_free_db(hr_dev, &srq->rdb);
+ }
+}
+
+static int alloc_srq_db(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_srq_resp *resp)
+{
+ struct hns_roce_ib_create_srq ucmd = {};
+ struct hns_roce_ucontext *uctx;
+ int ret;
+
+ if (udata) {
+ ret = get_srq_ucmd(srq, udata, &ucmd);
+ if (ret)
+ return ret;
+
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB) &&
+ (ucmd.req_cap_flags & HNS_ROCE_SRQ_CAP_RECORD_DB)) {
+ uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
+ ret = hns_roce_db_map_user(uctx, ucmd.db_addr,
+ &srq->rdb);
+ if (ret)
+ return ret;
+
+ srq->cap_flags |= HNS_ROCE_RSP_SRQ_CAP_RECORD_DB;
+ }
+ } else {
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB) {
+ ret = hns_roce_alloc_db(hr_dev, &srq->rdb, 1);
+ if (ret)
+ return ret;
+
+ *srq->rdb.db_record = 0;
+ srq->cap_flags |= HNS_ROCE_RSP_SRQ_CAP_RECORD_DB;
+ }
+ srq->db_reg = hr_dev->reg_base + SRQ_DB_REG;
+ }
+
+ return 0;
+}
+
+int hns_roce_create_srq(struct ib_srq *ib_srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device);
+ struct hns_roce_ib_create_srq_resp resp = {};
+ struct hns_roce_srq *srq = to_hr_srq(ib_srq);
+ int ret;
+
+ mutex_init(&srq->mutex);
+ spin_lock_init(&srq->lock);
+
+ ret = set_srq_param(srq, init_attr, udata);
+ if (ret)
+ goto err_out;
+
+ ret = alloc_srq_buf(hr_dev, srq, udata);
+ if (ret)
+ goto err_out;
+
+ ret = alloc_srq_db(hr_dev, srq, udata, &resp);
+ if (ret)
+ goto err_srq_buf;
+
+ ret = alloc_srqn(hr_dev, srq);
+ if (ret)
+ goto err_srq_db;
+
+ ret = alloc_srqc(hr_dev, srq);
+ if (ret)
+ goto err_srqn;
+
+ if (udata) {
+ resp.cap_flags = srq->cap_flags;
+ resp.srqn = srq->srqn;
+ if (ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)))) {
+ ret = -EFAULT;
+ goto err_srqc;
+ }
+ }
+
+ srq->event = hns_roce_ib_srq_event;
+ refcount_set(&srq->refcount, 1);
+ init_completion(&srq->free);
+
+ return 0;
+
+err_srqc:
+ free_srqc(hr_dev, srq);
+err_srqn:
+ free_srqn(hr_dev, srq);
+err_srq_db:
+ free_srq_db(hr_dev, srq, udata);
+err_srq_buf:
+ free_srq_buf(hr_dev, srq);
+err_out:
+ mutex_destroy(&srq->mutex);
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT]);
+
+ return ret;
+}
+
+int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+
+ free_srqc(hr_dev, srq);
+ free_srqn(hr_dev, srq);
+ free_srq_db(hr_dev, srq, udata);
+ free_srq_buf(hr_dev, srq);
+ mutex_destroy(&srq->mutex);
+ return 0;
+}
+
+void hns_roce_init_srq_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct hns_roce_ida *srq_ida = &srq_table->srq_ida;
+
+ xa_init(&srq_table->xa);
+
+ ida_init(&srq_ida->ida);
+ srq_ida->max = hr_dev->caps.num_srqs - 1;
+ srq_ida->min = hr_dev->caps.reserved_srqs;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_trace.h b/drivers/infiniband/hw/hns/hns_roce_trace.h
new file mode 100644
index 000000000000..59ceb591b3a1
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_trace.h
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2025 Hisilicon Limited.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hns_roce
+
+#if !defined(__HNS_ROCE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HNS_ROCE_TRACE_H
+
+#include <linux/tracepoint.h>
+#include <linux/string_choices.h>
+#include "hns_roce_device.h"
+#include "hns_roce_hw_v2.h"
+
+DECLARE_EVENT_CLASS(flush_head_template,
+ TP_PROTO(unsigned long qpn, u32 pi,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, pi, type),
+
+ TP_STRUCT__entry(__field(unsigned long, qpn)
+ __field(u32, pi)
+ __field(enum hns_roce_trace_type, type)
+ ),
+
+ TP_fast_assign(__entry->qpn = qpn;
+ __entry->pi = pi;
+ __entry->type = type;
+ ),
+
+ TP_printk("%s 0x%lx flush head 0x%x.",
+ trace_type_to_str(__entry->type),
+ __entry->qpn, __entry->pi)
+);
+
+DEFINE_EVENT(flush_head_template, hns_sq_flush_cqe,
+ TP_PROTO(unsigned long qpn, u32 pi,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, pi, type));
+DEFINE_EVENT(flush_head_template, hns_rq_flush_cqe,
+ TP_PROTO(unsigned long qpn, u32 pi,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, pi, type));
+
+#define MAX_SGE_PER_WQE 64
+#define MAX_WQE_SIZE (MAX_SGE_PER_WQE * HNS_ROCE_SGE_SIZE)
+DECLARE_EVENT_CLASS(wqe_template,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len,
+ u64 id, enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type),
+
+ TP_STRUCT__entry(__field(unsigned long, qpn)
+ __field(u32, idx)
+ __array(u32, wqe,
+ MAX_WQE_SIZE / sizeof(__le32))
+ __field(u32, len)
+ __field(u64, id)
+ __field(enum hns_roce_trace_type, type)
+ ),
+
+ TP_fast_assign(__entry->qpn = qpn;
+ __entry->idx = idx;
+ __entry->id = id;
+ __entry->len = len / sizeof(__le32);
+ __entry->type = type;
+ for (int i = 0; i < __entry->len; i++)
+ __entry->wqe[i] = le32_to_cpu(((__le32 *)wqe)[i]);
+ ),
+
+ TP_printk("%s 0x%lx wqe(0x%x/0x%llx): %s",
+ trace_type_to_str(__entry->type),
+ __entry->qpn, __entry->idx, __entry->id,
+ __print_array(__entry->wqe, __entry->len,
+ sizeof(__le32)))
+);
+
+DEFINE_EVENT(wqe_template, hns_sq_wqe,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type));
+DEFINE_EVENT(wqe_template, hns_rq_wqe,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type));
+DEFINE_EVENT(wqe_template, hns_srq_wqe,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type));
+
+TRACE_EVENT(hns_ae_info,
+ TP_PROTO(int event_type, void *aeqe, unsigned int len),
+ TP_ARGS(event_type, aeqe, len),
+
+ TP_STRUCT__entry(__field(int, event_type)
+ __array(u32, aeqe,
+ HNS_ROCE_V3_EQE_SIZE / sizeof(__le32))
+ __field(u32, len)
+ ),
+
+ TP_fast_assign(__entry->event_type = event_type;
+ __entry->len = len / sizeof(__le32);
+ for (int i = 0; i < __entry->len; i++)
+ __entry->aeqe[i] = le32_to_cpu(((__le32 *)aeqe)[i]);
+ ),
+
+ TP_printk("event %2d aeqe: %s", __entry->event_type,
+ __print_array(__entry->aeqe, __entry->len, sizeof(__le32)))
+);
+
+TRACE_EVENT(hns_mr,
+ TP_PROTO(struct hns_roce_mr *mr),
+ TP_ARGS(mr),
+
+ TP_STRUCT__entry(__field(u64, iova)
+ __field(u64, size)
+ __field(u32, key)
+ __field(u32, pd)
+ __field(u32, pbl_hop_num)
+ __field(u32, npages)
+ __field(int, type)
+ __field(int, enabled)
+ ),
+
+ TP_fast_assign(__entry->iova = mr->iova;
+ __entry->size = mr->size;
+ __entry->key = mr->key;
+ __entry->pd = mr->pd;
+ __entry->pbl_hop_num = mr->pbl_hop_num;
+ __entry->npages = mr->npages;
+ __entry->type = mr->type;
+ __entry->enabled = mr->enabled;
+ ),
+
+ TP_printk("iova:0x%llx, size:%llu, key:%u, pd:%u, pbl_hop:%u, npages:%u, type:%d, status:%d",
+ __entry->iova, __entry->size, __entry->key,
+ __entry->pd, __entry->pbl_hop_num, __entry->npages,
+ __entry->type, __entry->enabled)
+);
+
+TRACE_EVENT(hns_buf_attr,
+ TP_PROTO(struct hns_roce_buf_attr *attr),
+ TP_ARGS(attr),
+
+ TP_STRUCT__entry(__field(unsigned int, region_count)
+ __field(unsigned int, region0_size)
+ __field(int, region0_hopnum)
+ __field(unsigned int, region1_size)
+ __field(int, region1_hopnum)
+ __field(unsigned int, region2_size)
+ __field(int, region2_hopnum)
+ __field(unsigned int, page_shift)
+ __field(bool, mtt_only)
+ ),
+
+ TP_fast_assign(__entry->region_count = attr->region_count;
+ __entry->region0_size = attr->region[0].size;
+ __entry->region0_hopnum = attr->region[0].hopnum;
+ __entry->region1_size = attr->region[1].size;
+ __entry->region1_hopnum = attr->region[1].hopnum;
+ __entry->region2_size = attr->region[2].size;
+ __entry->region2_hopnum = attr->region[2].hopnum;
+ __entry->page_shift = attr->page_shift;
+ __entry->mtt_only = attr->mtt_only;
+ ),
+
+ TP_printk("rg cnt:%u, pg_sft:0x%x, mtt_only:%s, rg 0 (sz:%u, hop:%u), rg 1 (sz:%u, hop:%u), rg 2 (sz:%u, hop:%u)\n",
+ __entry->region_count, __entry->page_shift,
+ str_yes_no(__entry->mtt_only),
+ __entry->region0_size, __entry->region0_hopnum,
+ __entry->region1_size, __entry->region1_hopnum,
+ __entry->region2_size, __entry->region2_hopnum)
+);
+
+DECLARE_EVENT_CLASS(cmdq,
+ TP_PROTO(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc),
+ TP_ARGS(hr_dev, desc),
+
+ TP_STRUCT__entry(__string(dev_name, dev_name(hr_dev->dev))
+ __field(u16, opcode)
+ __field(u16, flag)
+ __field(u16, retval)
+ __array(u32, data, 6)
+ ),
+
+ TP_fast_assign(__assign_str(dev_name);
+ __entry->opcode = le16_to_cpu(desc->opcode);
+ __entry->flag = le16_to_cpu(desc->flag);
+ __entry->retval = le16_to_cpu(desc->retval);
+ for (int i = 0; i < 6; i++)
+ __entry->data[i] = le32_to_cpu(desc->data[i]);
+ ),
+
+ TP_printk("%s cmdq opcode:0x%x, flag:0x%x, retval:0x%x, data:%s\n",
+ __get_str(dev_name), __entry->opcode,
+ __entry->flag, __entry->retval,
+ __print_array(__entry->data, 6, sizeof(__le32)))
+);
+
+DEFINE_EVENT(cmdq, hns_cmdq_req,
+ TP_PROTO(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc),
+ TP_ARGS(hr_dev, desc));
+DEFINE_EVENT(cmdq, hns_cmdq_resp,
+ TP_PROTO(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc),
+ TP_ARGS(hr_dev, desc));
+
+#endif /* __HNS_ROCE_TRACE_H */
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE hns_roce_trace
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig
deleted file mode 100644
index 6e7d27a14061..000000000000
--- a/drivers/infiniband/hw/i40iw/Kconfig
+++ /dev/null
@@ -1,7 +0,0 @@
-config INFINIBAND_I40IW
- tristate "Intel(R) Ethernet X722 iWARP Driver"
- depends on INET && I40E
- select GENERIC_ALLOCATOR
- ---help---
- Intel(R) Ethernet X722 iWARP Driver
- INET && I40IW && INFINIBAND && I40E
diff --git a/drivers/infiniband/hw/i40iw/Makefile b/drivers/infiniband/hw/i40iw/Makefile
deleted file mode 100644
index 90068c03d217..000000000000
--- a/drivers/infiniband/hw/i40iw/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-ccflags-y := -Idrivers/net/ethernet/intel/i40e
-
-obj-$(CONFIG_INFINIBAND_I40IW) += i40iw.o
-
-i40iw-objs :=\
- i40iw_cm.o i40iw_ctrl.o \
- i40iw_hmc.o i40iw_hw.o i40iw_main.o \
- i40iw_pble.o i40iw_puda.o i40iw_uk.o i40iw_utils.o \
- i40iw_verbs.o i40iw_virtchnl.o i40iw_vf.o
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
deleted file mode 100644
index da2eb5a281fa..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ /dev/null
@@ -1,594 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_IW_H
-#define I40IW_IW_H
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/workqueue.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-#include <linux/crc32c.h>
-#include <rdma/ib_smi.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_pack.h>
-#include <rdma/rdma_cm.h>
-#include <rdma/iw_cm.h>
-#include <crypto/hash.h>
-
-#include "i40iw_status.h"
-#include "i40iw_osdep.h"
-#include "i40iw_d.h"
-#include "i40iw_hmc.h"
-
-#include <i40e_client.h>
-#include "i40iw_type.h"
-#include "i40iw_p.h"
-#include "i40iw_ucontext.h"
-#include "i40iw_pble.h"
-#include "i40iw_verbs.h"
-#include "i40iw_cm.h"
-#include "i40iw_user.h"
-#include "i40iw_puda.h"
-
-#define I40IW_FW_VERSION 2
-#define I40IW_HW_VERSION 2
-
-#define I40IW_ARP_ADD 1
-#define I40IW_ARP_DELETE 2
-#define I40IW_ARP_RESOLVE 3
-
-#define I40IW_MACIP_ADD 1
-#define I40IW_MACIP_DELETE 2
-
-#define IW_CCQ_SIZE (I40IW_CQP_SW_SQSIZE_2048 + 1)
-#define IW_CEQ_SIZE 2048
-#define IW_AEQ_SIZE 2048
-
-#define RX_BUF_SIZE (1536 + 8)
-#define IW_REG0_SIZE (4 * 1024)
-#define IW_TX_TIMEOUT (6 * HZ)
-#define IW_FIRST_QPN 1
-#define IW_SW_CONTEXT_ALIGN 1024
-
-#define MAX_DPC_ITERATIONS 128
-
-#define I40IW_EVENT_TIMEOUT 100000
-#define I40IW_VCHNL_EVENT_TIMEOUT 100000
-
-#define I40IW_NO_VLAN 0xffff
-#define I40IW_NO_QSET 0xffff
-
-/* access to mcast filter list */
-#define IW_ADD_MCAST false
-#define IW_DEL_MCAST true
-
-#define I40IW_DRV_OPT_ENABLE_MPA_VER_0 0x00000001
-#define I40IW_DRV_OPT_DISABLE_MPA_CRC 0x00000002
-#define I40IW_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004
-#define I40IW_DRV_OPT_DISABLE_INTF 0x00000008
-#define I40IW_DRV_OPT_ENABLE_MSI 0x00000010
-#define I40IW_DRV_OPT_DUAL_LOGICAL_PORT 0x00000020
-#define I40IW_DRV_OPT_NO_INLINE_DATA 0x00000080
-#define I40IW_DRV_OPT_DISABLE_INT_MOD 0x00000100
-#define I40IW_DRV_OPT_DISABLE_VIRT_WQ 0x00000200
-#define I40IW_DRV_OPT_ENABLE_PAU 0x00000400
-#define I40IW_DRV_OPT_MCAST_LOGPORT_MAP 0x00000800
-
-#define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
-#define IW_CFG_FPM_QP_COUNT 32768
-#define I40IW_MAX_PAGES_PER_FMR 512
-#define I40IW_MIN_PAGES_PER_FMR 1
-#define I40IW_CQP_COMPL_RQ_WQE_FLUSHED 2
-#define I40IW_CQP_COMPL_SQ_WQE_FLUSHED 3
-#define I40IW_CQP_COMPL_RQ_SQ_WQE_FLUSHED 4
-
-#define I40IW_MTU_TO_MSS 40
-#define I40IW_DEFAULT_MSS 1460
-
-struct i40iw_cqp_compl_info {
- u32 op_ret_val;
- u16 maj_err_code;
- u16 min_err_code;
- bool error;
- u8 op_code;
-};
-
-#define i40iw_pr_err(fmt, args ...) pr_err("%s: "fmt, __func__, ## args)
-
-#define i40iw_pr_info(fmt, args ...) pr_info("%s: " fmt, __func__, ## args)
-
-#define i40iw_pr_warn(fmt, args ...) pr_warn("%s: " fmt, __func__, ## args)
-
-struct i40iw_cqp_request {
- struct cqp_commands_info info;
- wait_queue_head_t waitq;
- struct list_head list;
- atomic_t refcount;
- void (*callback_fcn)(struct i40iw_cqp_request*, u32);
- void *param;
- struct i40iw_cqp_compl_info compl_info;
- bool waiting;
- bool request_done;
- bool dynamic;
-};
-
-struct i40iw_cqp {
- struct i40iw_sc_cqp sc_cqp;
- spinlock_t req_lock; /*cqp request list */
- wait_queue_head_t waitq;
- struct i40iw_dma_mem sq;
- struct i40iw_dma_mem host_ctx;
- u64 *scratch_array;
- struct i40iw_cqp_request *cqp_requests;
- struct list_head cqp_avail_reqs;
- struct list_head cqp_pending_reqs;
-};
-
-struct i40iw_device;
-
-struct i40iw_ccq {
- struct i40iw_sc_cq sc_cq;
- spinlock_t lock; /* ccq control */
- wait_queue_head_t waitq;
- struct i40iw_dma_mem mem_cq;
- struct i40iw_dma_mem shadow_area;
-};
-
-struct i40iw_ceq {
- struct i40iw_sc_ceq sc_ceq;
- struct i40iw_dma_mem mem;
- u32 irq;
- u32 msix_idx;
- struct i40iw_device *iwdev;
- struct tasklet_struct dpc_tasklet;
-};
-
-struct i40iw_aeq {
- struct i40iw_sc_aeq sc_aeq;
- struct i40iw_dma_mem mem;
-};
-
-struct i40iw_arp_entry {
- u32 ip_addr[4];
- u8 mac_addr[ETH_ALEN];
-};
-
-enum init_completion_state {
- INVALID_STATE = 0,
- INITIAL_STATE,
- CQP_CREATED,
- HMC_OBJS_CREATED,
- PBLE_CHUNK_MEM,
- CCQ_CREATED,
- AEQ_CREATED,
- CEQ_CREATED,
- ILQ_CREATED,
- IEQ_CREATED,
- INET_NOTIFIER,
- IP_ADDR_REGISTERED,
- RDMA_DEV_REGISTERED
-};
-
-struct i40iw_msix_vector {
- u32 idx;
- u32 irq;
- u32 cpu_affinity;
- u32 ceq_id;
-};
-
-struct l2params_work {
- struct work_struct work;
- struct i40iw_device *iwdev;
- struct i40iw_l2params l2params;
-};
-
-#define I40IW_MSIX_TABLE_SIZE 65
-
-struct virtchnl_work {
- struct work_struct work;
- union {
- struct i40iw_cqp_request *cqp_request;
- struct i40iw_virtchnl_work_info work_info;
- };
-};
-
-struct i40e_qvlist_info;
-
-struct i40iw_device {
- struct i40iw_ib_device *iwibdev;
- struct net_device *netdev;
- wait_queue_head_t vchnl_waitq;
- struct i40iw_sc_dev sc_dev;
- struct i40iw_sc_vsi vsi;
- struct i40iw_handler *hdl;
- struct i40e_info *ldev;
- struct i40e_client *client;
- struct i40iw_hw hw;
- struct i40iw_cm_core cm_core;
- u8 *mem_resources;
- unsigned long *allocated_qps;
- unsigned long *allocated_cqs;
- unsigned long *allocated_mrs;
- unsigned long *allocated_pds;
- unsigned long *allocated_arps;
- struct i40iw_qp **qp_table;
- bool msix_shared;
- u32 msix_count;
- struct i40iw_msix_vector *iw_msixtbl;
- struct i40e_qvlist_info *iw_qvlist;
-
- struct i40iw_hmc_pble_rsrc *pble_rsrc;
- struct i40iw_arp_entry *arp_table;
- struct i40iw_cqp cqp;
- struct i40iw_ccq ccq;
- u32 ceqs_count;
- struct i40iw_ceq *ceqlist;
- struct i40iw_aeq aeq;
- u32 arp_table_size;
- u32 next_arp_index;
- spinlock_t resource_lock; /* hw resource access */
- spinlock_t qptable_lock;
- u32 vendor_id;
- u32 vendor_part_id;
- u32 of_device_registered;
-
- u32 device_cap_flags;
- unsigned long db_start;
- u8 resource_profile;
- u8 max_rdma_vfs;
- u8 max_enabled_vfs;
- u8 max_sge;
- u8 iw_status;
- u8 send_term_ok;
- bool push_mode; /* Initialized from parameter passed to driver */
-
- /* x710 specific */
- struct mutex pbl_mutex;
- struct tasklet_struct dpc_tasklet;
- struct workqueue_struct *virtchnl_wq;
- struct virtchnl_work virtchnl_w[I40IW_MAX_PE_ENABLED_VF_COUNT];
- struct i40iw_dma_mem obj_mem;
- struct i40iw_dma_mem obj_next;
- u8 *hmc_info_mem;
- u32 sd_type;
- struct workqueue_struct *param_wq;
- atomic_t params_busy;
- enum init_completion_state init_state;
- u16 mac_ip_table_idx;
- atomic_t vchnl_msgs;
- u32 max_mr;
- u32 max_qp;
- u32 max_cq;
- u32 max_pd;
- u32 next_qp;
- u32 next_cq;
- u32 next_pd;
- u32 max_mr_size;
- u32 max_qp_wr;
- u32 max_cqe;
- u32 mr_stagmask;
- u32 mpa_version;
- bool dcb;
- bool closing;
- bool reset;
- u32 used_pds;
- u32 used_cqs;
- u32 used_mrs;
- u32 used_qps;
- wait_queue_head_t close_wq;
- atomic64_t use_count;
-};
-
-struct i40iw_ib_device {
- struct ib_device ibdev;
- struct i40iw_device *iwdev;
-};
-
-struct i40iw_handler {
- struct list_head list;
- struct i40e_client *client;
- struct i40iw_device device;
- struct i40e_info ldev;
-};
-
-/**
- * to_iwdev - get device
- * @ibdev: ib device
- **/
-static inline struct i40iw_device *to_iwdev(struct ib_device *ibdev)
-{
- return container_of(ibdev, struct i40iw_ib_device, ibdev)->iwdev;
-}
-
-/**
- * to_ucontext - get user context
- * @ibucontext: ib user context
- **/
-static inline struct i40iw_ucontext *to_ucontext(struct ib_ucontext *ibucontext)
-{
- return container_of(ibucontext, struct i40iw_ucontext, ibucontext);
-}
-
-/**
- * to_iwpd - get protection domain
- * @ibpd: ib pd
- **/
-static inline struct i40iw_pd *to_iwpd(struct ib_pd *ibpd)
-{
- return container_of(ibpd, struct i40iw_pd, ibpd);
-}
-
-/**
- * to_iwmr - get device memory region
- * @ibdev: ib memory region
- **/
-static inline struct i40iw_mr *to_iwmr(struct ib_mr *ibmr)
-{
- return container_of(ibmr, struct i40iw_mr, ibmr);
-}
-
-/**
- * to_iwmr_from_ibfmr - get device memory region
- * @ibfmr: ib fmr
- **/
-static inline struct i40iw_mr *to_iwmr_from_ibfmr(struct ib_fmr *ibfmr)
-{
- return container_of(ibfmr, struct i40iw_mr, ibfmr);
-}
-
-/**
- * to_iwmw - get device memory window
- * @ibmw: ib memory window
- **/
-static inline struct i40iw_mr *to_iwmw(struct ib_mw *ibmw)
-{
- return container_of(ibmw, struct i40iw_mr, ibmw);
-}
-
-/**
- * to_iwcq - get completion queue
- * @ibcq: ib cqdevice
- **/
-static inline struct i40iw_cq *to_iwcq(struct ib_cq *ibcq)
-{
- return container_of(ibcq, struct i40iw_cq, ibcq);
-}
-
-/**
- * to_iwqp - get device qp
- * @ibqp: ib qp
- **/
-static inline struct i40iw_qp *to_iwqp(struct ib_qp *ibqp)
-{
- return container_of(ibqp, struct i40iw_qp, ibqp);
-}
-
-/* i40iw.c */
-void i40iw_add_ref(struct ib_qp *);
-void i40iw_rem_ref(struct ib_qp *);
-struct ib_qp *i40iw_get_qp(struct ib_device *, int);
-
-void i40iw_flush_wqes(struct i40iw_device *iwdev,
- struct i40iw_qp *qp);
-
-void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
- unsigned char *mac_addr,
- u32 *ip_addr,
- bool ipv4,
- u32 action);
-
-int i40iw_manage_apbvt(struct i40iw_device *iwdev,
- u16 accel_local_port,
- bool add_port);
-
-struct i40iw_cqp_request *i40iw_get_cqp_request(struct i40iw_cqp *cqp, bool wait);
-void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request);
-void i40iw_put_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request);
-
-/**
- * i40iw_alloc_resource - allocate a resource
- * @iwdev: device pointer
- * @resource_array: resource bit array:
- * @max_resources: maximum resource number
- * @req_resources_num: Allocated resource number
- * @next: next free id
- **/
-static inline int i40iw_alloc_resource(struct i40iw_device *iwdev,
- unsigned long *resource_array,
- u32 max_resources,
- u32 *req_resource_num,
- u32 *next)
-{
- u32 resource_num;
- unsigned long flags;
-
- spin_lock_irqsave(&iwdev->resource_lock, flags);
- resource_num = find_next_zero_bit(resource_array, max_resources, *next);
- if (resource_num >= max_resources) {
- resource_num = find_first_zero_bit(resource_array, max_resources);
- if (resource_num >= max_resources) {
- spin_unlock_irqrestore(&iwdev->resource_lock, flags);
- return -EOVERFLOW;
- }
- }
- set_bit(resource_num, resource_array);
- *next = resource_num + 1;
- if (*next == max_resources)
- *next = 0;
- *req_resource_num = resource_num;
- spin_unlock_irqrestore(&iwdev->resource_lock, flags);
-
- return 0;
-}
-
-/**
- * i40iw_is_resource_allocated - detrmine if resource is
- * allocated
- * @iwdev: device pointer
- * @resource_array: resource array for the resource_num
- * @resource_num: resource number to check
- **/
-static inline bool i40iw_is_resource_allocated(struct i40iw_device *iwdev,
- unsigned long *resource_array,
- u32 resource_num)
-{
- bool bit_is_set;
- unsigned long flags;
-
- spin_lock_irqsave(&iwdev->resource_lock, flags);
-
- bit_is_set = test_bit(resource_num, resource_array);
- spin_unlock_irqrestore(&iwdev->resource_lock, flags);
-
- return bit_is_set;
-}
-
-/**
- * i40iw_free_resource - free a resource
- * @iwdev: device pointer
- * @resource_array: resource array for the resource_num
- * @resource_num: resource number to free
- **/
-static inline void i40iw_free_resource(struct i40iw_device *iwdev,
- unsigned long *resource_array,
- u32 resource_num)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&iwdev->resource_lock, flags);
- clear_bit(resource_num, resource_array);
- spin_unlock_irqrestore(&iwdev->resource_lock, flags);
-}
-
-/**
- * to_iwhdl - Get the handler from the device pointer
- * @iwdev: device pointer
- **/
-static inline struct i40iw_handler *to_iwhdl(struct i40iw_device *iw_dev)
-{
- return container_of(iw_dev, struct i40iw_handler, device);
-}
-
-struct i40iw_handler *i40iw_find_netdev(struct net_device *netdev);
-
-/**
- * iw_init_resources -
- */
-u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev);
-
-int i40iw_register_rdma_device(struct i40iw_device *iwdev);
-void i40iw_port_ibevent(struct i40iw_device *iwdev);
-void i40iw_cm_disconn(struct i40iw_qp *iwqp);
-void i40iw_cm_disconn_worker(void *);
-int mini_cm_recv_pkt(struct i40iw_cm_core *, struct i40iw_device *,
- struct sk_buff *);
-
-enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev,
- struct i40iw_cqp_request *cqp_request);
-enum i40iw_status_code i40iw_add_mac_addr(struct i40iw_device *iwdev,
- u8 *mac_addr, u8 *mac_index);
-int i40iw_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
-void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq);
-
-void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev);
-void i40iw_add_pdusecount(struct i40iw_pd *iwpd);
-void i40iw_rem_devusecount(struct i40iw_device *iwdev);
-void i40iw_add_devusecount(struct i40iw_device *iwdev);
-void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
- struct i40iw_modify_qp_info *info, bool wait);
-
-void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev,
- struct i40iw_sc_qp *qp,
- bool suspend);
-enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
- struct i40iw_cm_info *cminfo,
- enum i40iw_quad_entry_type etype,
- enum i40iw_quad_hash_manage_type mtype,
- void *cmnode,
- bool wait);
-void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf);
-void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp);
-void i40iw_free_qp_resources(struct i40iw_device *iwdev,
- struct i40iw_qp *iwqp,
- u32 qp_num);
-enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev,
- struct i40iw_dma_mem *memptr,
- u32 size, u32 mask);
-
-void i40iw_request_reset(struct i40iw_device *iwdev);
-void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev);
-void i40iw_setup_cm_core(struct i40iw_device *iwdev);
-void i40iw_cleanup_cm_core(struct i40iw_cm_core *cm_core);
-void i40iw_process_ceq(struct i40iw_device *, struct i40iw_ceq *iwceq);
-void i40iw_process_aeq(struct i40iw_device *);
-void i40iw_next_iw_state(struct i40iw_qp *iwqp,
- u8 state, u8 del_hash,
- u8 term, u8 term_len);
-int i40iw_send_syn(struct i40iw_cm_node *cm_node, u32 sendack);
-struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core,
- u16 rem_port,
- u32 *rem_addr,
- u16 loc_port,
- u32 *loc_addr,
- bool add_refcnt);
-
-enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
- struct i40iw_sc_qp *qp,
- struct i40iw_qp_flush_info *info,
- bool wait);
-
-void i40iw_copy_ip_ntohl(u32 *dst, __be32 *src);
-struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *ib_pd,
- u64 addr,
- u64 size,
- int acc,
- u64 *iova_start);
-
-int i40iw_inetaddr_event(struct notifier_block *notifier,
- unsigned long event,
- void *ptr);
-int i40iw_inet6addr_event(struct notifier_block *notifier,
- unsigned long event,
- void *ptr);
-int i40iw_net_event(struct notifier_block *notifier,
- unsigned long event,
- void *ptr);
-
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
deleted file mode 100644
index 95a0586a4da8..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ /dev/null
@@ -1,4329 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include <linux/atomic.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/if_arp.h>
-#include <linux/if_vlan.h>
-#include <linux/notifier.h>
-#include <linux/net.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/time.h>
-#include <linux/delay.h>
-#include <linux/etherdevice.h>
-#include <linux/netdevice.h>
-#include <linux/random.h>
-#include <linux/list.h>
-#include <linux/threads.h>
-#include <linux/highmem.h>
-#include <net/arp.h>
-#include <net/ndisc.h>
-#include <net/neighbour.h>
-#include <net/route.h>
-#include <net/addrconf.h>
-#include <net/ip6_route.h>
-#include <net/ip_fib.h>
-#include <net/tcp.h>
-#include <asm/checksum.h>
-
-#include "i40iw.h"
-
-static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *);
-static void i40iw_cm_post_event(struct i40iw_cm_event *event);
-static void i40iw_disconnect_worker(struct work_struct *work);
-
-/**
- * i40iw_free_sqbuf - put back puda buffer if refcount = 0
- * @vsi: pointer to vsi structure
- * @buf: puda buffer to free
- */
-void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp)
-{
- struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)bufp;
- struct i40iw_puda_rsrc *ilq = vsi->ilq;
-
- if (!atomic_dec_return(&buf->refcount))
- i40iw_puda_ret_bufpool(ilq, buf);
-}
-
-/**
- * i40iw_derive_hw_ird_setting - Calculate IRD
- *
- * @cm_ird: IRD of connection's node
- *
- * The ird from the connection is rounded to a supported HW
- * setting (2,8,32,64) and then encoded for ird_size field of
- * qp_ctx
- */
-static u8 i40iw_derive_hw_ird_setting(u16 cm_ird)
-{
- u8 encoded_ird_size;
- u8 pof2_cm_ird = 1;
-
- /* round-off to next powerof2 */
- while (pof2_cm_ird < cm_ird)
- pof2_cm_ird *= 2;
-
- /* ird_size field is encoded in qp_ctx */
- switch (pof2_cm_ird) {
- case I40IW_HW_IRD_SETTING_64:
- encoded_ird_size = 3;
- break;
- case I40IW_HW_IRD_SETTING_32:
- case I40IW_HW_IRD_SETTING_16:
- encoded_ird_size = 2;
- break;
- case I40IW_HW_IRD_SETTING_8:
- case I40IW_HW_IRD_SETTING_4:
- encoded_ird_size = 1;
- break;
- case I40IW_HW_IRD_SETTING_2:
- default:
- encoded_ird_size = 0;
- break;
- }
- return encoded_ird_size;
-}
-
-/**
- * i40iw_record_ird_ord - Record IRD/ORD passed in
- * @cm_node: connection's node
- * @conn_ird: connection IRD
- * @conn_ord: connection ORD
- */
-static void i40iw_record_ird_ord(struct i40iw_cm_node *cm_node, u16 conn_ird, u16 conn_ord)
-{
- if (conn_ird > I40IW_MAX_IRD_SIZE)
- conn_ird = I40IW_MAX_IRD_SIZE;
-
- if (conn_ord > I40IW_MAX_ORD_SIZE)
- conn_ord = I40IW_MAX_ORD_SIZE;
-
- cm_node->ird_size = conn_ird;
- cm_node->ord_size = conn_ord;
-}
-
-/**
- * i40iw_copy_ip_ntohl - change network to host ip
- * @dst: host ip
- * @src: big endian
- */
-void i40iw_copy_ip_ntohl(u32 *dst, __be32 *src)
-{
- *dst++ = ntohl(*src++);
- *dst++ = ntohl(*src++);
- *dst++ = ntohl(*src++);
- *dst = ntohl(*src);
-}
-
-/**
- * i40iw_copy_ip_htonl - change host addr to network ip
- * @dst: host ip
- * @src: little endian
- */
-static inline void i40iw_copy_ip_htonl(__be32 *dst, u32 *src)
-{
- *dst++ = htonl(*src++);
- *dst++ = htonl(*src++);
- *dst++ = htonl(*src++);
- *dst = htonl(*src);
-}
-
-/**
- * i40iw_fill_sockaddr4 - get addr info for passive connection
- * @cm_node: connection's node
- * @event: upper layer's cm event
- */
-static inline void i40iw_fill_sockaddr4(struct i40iw_cm_node *cm_node,
- struct iw_cm_event *event)
-{
- struct sockaddr_in *laddr = (struct sockaddr_in *)&event->local_addr;
- struct sockaddr_in *raddr = (struct sockaddr_in *)&event->remote_addr;
-
- laddr->sin_family = AF_INET;
- raddr->sin_family = AF_INET;
-
- laddr->sin_port = htons(cm_node->loc_port);
- raddr->sin_port = htons(cm_node->rem_port);
-
- laddr->sin_addr.s_addr = htonl(cm_node->loc_addr[0]);
- raddr->sin_addr.s_addr = htonl(cm_node->rem_addr[0]);
-}
-
-/**
- * i40iw_fill_sockaddr6 - get ipv6 addr info for passive side
- * @cm_node: connection's node
- * @event: upper layer's cm event
- */
-static inline void i40iw_fill_sockaddr6(struct i40iw_cm_node *cm_node,
- struct iw_cm_event *event)
-{
- struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&event->local_addr;
- struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)&event->remote_addr;
-
- laddr6->sin6_family = AF_INET6;
- raddr6->sin6_family = AF_INET6;
-
- laddr6->sin6_port = htons(cm_node->loc_port);
- raddr6->sin6_port = htons(cm_node->rem_port);
-
- i40iw_copy_ip_htonl(laddr6->sin6_addr.in6_u.u6_addr32,
- cm_node->loc_addr);
- i40iw_copy_ip_htonl(raddr6->sin6_addr.in6_u.u6_addr32,
- cm_node->rem_addr);
-}
-
-/**
- * i40iw_get_addr_info
- * @cm_node: contains ip/tcp info
- * @cm_info: to get a copy of the cm_node ip/tcp info
-*/
-static void i40iw_get_addr_info(struct i40iw_cm_node *cm_node,
- struct i40iw_cm_info *cm_info)
-{
- cm_info->ipv4 = cm_node->ipv4;
- cm_info->vlan_id = cm_node->vlan_id;
- memcpy(cm_info->loc_addr, cm_node->loc_addr, sizeof(cm_info->loc_addr));
- memcpy(cm_info->rem_addr, cm_node->rem_addr, sizeof(cm_info->rem_addr));
- cm_info->loc_port = cm_node->loc_port;
- cm_info->rem_port = cm_node->rem_port;
- cm_info->user_pri = cm_node->user_pri;
-}
-
-/**
- * i40iw_get_cmevent_info - for cm event upcall
- * @cm_node: connection's node
- * @cm_id: upper layers cm struct for the event
- * @event: upper layer's cm event
- */
-static inline void i40iw_get_cmevent_info(struct i40iw_cm_node *cm_node,
- struct iw_cm_id *cm_id,
- struct iw_cm_event *event)
-{
- memcpy(&event->local_addr, &cm_id->m_local_addr,
- sizeof(event->local_addr));
- memcpy(&event->remote_addr, &cm_id->m_remote_addr,
- sizeof(event->remote_addr));
- if (cm_node) {
- event->private_data = (void *)cm_node->pdata_buf;
- event->private_data_len = (u8)cm_node->pdata.size;
- event->ird = cm_node->ird_size;
- event->ord = cm_node->ord_size;
- }
-}
-
-/**
- * i40iw_send_cm_event - upcall cm's event handler
- * @cm_node: connection's node
- * @cm_id: upper layer's cm info struct
- * @type: Event type to indicate
- * @status: status for the event type
- */
-static int i40iw_send_cm_event(struct i40iw_cm_node *cm_node,
- struct iw_cm_id *cm_id,
- enum iw_cm_event_type type,
- int status)
-{
- struct iw_cm_event event;
-
- memset(&event, 0, sizeof(event));
- event.event = type;
- event.status = status;
- switch (type) {
- case IW_CM_EVENT_CONNECT_REQUEST:
- if (cm_node->ipv4)
- i40iw_fill_sockaddr4(cm_node, &event);
- else
- i40iw_fill_sockaddr6(cm_node, &event);
- event.provider_data = (void *)cm_node;
- event.private_data = (void *)cm_node->pdata_buf;
- event.private_data_len = (u8)cm_node->pdata.size;
- event.ird = cm_node->ird_size;
- break;
- case IW_CM_EVENT_CONNECT_REPLY:
- i40iw_get_cmevent_info(cm_node, cm_id, &event);
- break;
- case IW_CM_EVENT_ESTABLISHED:
- event.ird = cm_node->ird_size;
- event.ord = cm_node->ord_size;
- break;
- case IW_CM_EVENT_DISCONNECT:
- break;
- case IW_CM_EVENT_CLOSE:
- break;
- default:
- i40iw_pr_err("event type received type = %d\n", type);
- return -1;
- }
- return cm_id->event_handler(cm_id, &event);
-}
-
-/**
- * i40iw_create_event - create cm event
- * @cm_node: connection's node
- * @type: Event type to generate
- */
-static struct i40iw_cm_event *i40iw_create_event(struct i40iw_cm_node *cm_node,
- enum i40iw_cm_event_type type)
-{
- struct i40iw_cm_event *event;
-
- if (!cm_node->cm_id)
- return NULL;
-
- event = kzalloc(sizeof(*event), GFP_ATOMIC);
-
- if (!event)
- return NULL;
-
- event->type = type;
- event->cm_node = cm_node;
- memcpy(event->cm_info.rem_addr, cm_node->rem_addr, sizeof(event->cm_info.rem_addr));
- memcpy(event->cm_info.loc_addr, cm_node->loc_addr, sizeof(event->cm_info.loc_addr));
- event->cm_info.rem_port = cm_node->rem_port;
- event->cm_info.loc_port = cm_node->loc_port;
- event->cm_info.cm_id = cm_node->cm_id;
-
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "node=%p event=%p type=%u dst=%pI4 src=%pI4\n",
- cm_node,
- event,
- type,
- event->cm_info.loc_addr,
- event->cm_info.rem_addr);
-
- i40iw_cm_post_event(event);
- return event;
-}
-
-/**
- * i40iw_free_retrans_entry - free send entry
- * @cm_node: connection's node
- */
-static void i40iw_free_retrans_entry(struct i40iw_cm_node *cm_node)
-{
- struct i40iw_device *iwdev = cm_node->iwdev;
- struct i40iw_timer_entry *send_entry;
-
- send_entry = cm_node->send_entry;
- if (send_entry) {
- cm_node->send_entry = NULL;
- i40iw_free_sqbuf(&iwdev->vsi, (void *)send_entry->sqbuf);
- kfree(send_entry);
- atomic_dec(&cm_node->ref_count);
- }
-}
-
-/**
- * i40iw_cleanup_retrans_entry - free send entry with lock
- * @cm_node: connection's node
- */
-static void i40iw_cleanup_retrans_entry(struct i40iw_cm_node *cm_node)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- i40iw_free_retrans_entry(cm_node);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
-}
-
-/**
- * i40iw_form_cm_frame - get a free packet and build frame
- * @cm_node: connection's node ionfo to use in frame
- * @options: pointer to options info
- * @hdr: pointer mpa header
- * @pdata: pointer to private data
- * @flags: indicates FIN or ACK
- */
-static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
- struct i40iw_kmem_info *options,
- struct i40iw_kmem_info *hdr,
- struct i40iw_kmem_info *pdata,
- u8 flags)
-{
- struct i40iw_puda_buf *sqbuf;
- struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi;
- u8 *buf;
-
- struct tcphdr *tcph;
- struct iphdr *iph;
- struct ipv6hdr *ip6h;
- struct ethhdr *ethh;
- u16 packetsize;
- u16 eth_hlen = ETH_HLEN;
- u32 opts_len = 0;
- u32 pd_len = 0;
- u32 hdr_len = 0;
- u16 vtag;
-
- sqbuf = i40iw_puda_get_bufpool(vsi->ilq);
- if (!sqbuf)
- return NULL;
- buf = sqbuf->mem.va;
-
- if (options)
- opts_len = (u32)options->size;
-
- if (hdr)
- hdr_len = hdr->size;
-
- if (pdata)
- pd_len = pdata->size;
-
- if (cm_node->vlan_id < VLAN_TAG_PRESENT)
- eth_hlen += 4;
-
- if (cm_node->ipv4)
- packetsize = sizeof(*iph) + sizeof(*tcph);
- else
- packetsize = sizeof(*ip6h) + sizeof(*tcph);
- packetsize += opts_len + hdr_len + pd_len;
-
- memset(buf, 0x00, eth_hlen + packetsize);
-
- sqbuf->totallen = packetsize + eth_hlen;
- sqbuf->maclen = eth_hlen;
- sqbuf->tcphlen = sizeof(*tcph) + opts_len;
- sqbuf->scratch = (void *)cm_node;
-
- ethh = (struct ethhdr *)buf;
- buf += eth_hlen;
-
- if (cm_node->ipv4) {
- sqbuf->ipv4 = true;
-
- iph = (struct iphdr *)buf;
- buf += sizeof(*iph);
- tcph = (struct tcphdr *)buf;
- buf += sizeof(*tcph);
-
- ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
- ether_addr_copy(ethh->h_source, cm_node->loc_mac);
- if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
- ((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
- vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id;
- ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
-
- ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IP);
- } else {
- ethh->h_proto = htons(ETH_P_IP);
- }
-
- iph->version = IPVERSION;
- iph->ihl = 5; /* 5 * 4Byte words, IP headr len */
- iph->tos = cm_node->tos;
- iph->tot_len = htons(packetsize);
- iph->id = htons(++cm_node->tcp_cntxt.loc_id);
-
- iph->frag_off = htons(0x4000);
- iph->ttl = 0x40;
- iph->protocol = IPPROTO_TCP;
- iph->saddr = htonl(cm_node->loc_addr[0]);
- iph->daddr = htonl(cm_node->rem_addr[0]);
- } else {
- sqbuf->ipv4 = false;
- ip6h = (struct ipv6hdr *)buf;
- buf += sizeof(*ip6h);
- tcph = (struct tcphdr *)buf;
- buf += sizeof(*tcph);
-
- ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
- ether_addr_copy(ethh->h_source, cm_node->loc_mac);
- if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
- ((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
- vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id;
- ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
- ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IPV6);
- } else {
- ethh->h_proto = htons(ETH_P_IPV6);
- }
- ip6h->version = 6;
- ip6h->priority = cm_node->tos >> 4;
- ip6h->flow_lbl[0] = cm_node->tos << 4;
- ip6h->flow_lbl[1] = 0;
- ip6h->flow_lbl[2] = 0;
- ip6h->payload_len = htons(packetsize - sizeof(*ip6h));
- ip6h->nexthdr = 6;
- ip6h->hop_limit = 128;
- i40iw_copy_ip_htonl(ip6h->saddr.in6_u.u6_addr32,
- cm_node->loc_addr);
- i40iw_copy_ip_htonl(ip6h->daddr.in6_u.u6_addr32,
- cm_node->rem_addr);
- }
-
- tcph->source = htons(cm_node->loc_port);
- tcph->dest = htons(cm_node->rem_port);
-
- tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
-
- if (flags & SET_ACK) {
- cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
- tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
- tcph->ack = 1;
- } else {
- tcph->ack_seq = 0;
- }
-
- if (flags & SET_SYN) {
- cm_node->tcp_cntxt.loc_seq_num++;
- tcph->syn = 1;
- } else {
- cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len;
- }
-
- if (flags & SET_FIN) {
- cm_node->tcp_cntxt.loc_seq_num++;
- tcph->fin = 1;
- }
-
- if (flags & SET_RST)
- tcph->rst = 1;
-
- tcph->doff = (u16)((sizeof(*tcph) + opts_len + 3) >> 2);
- sqbuf->tcphlen = tcph->doff << 2;
- tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd);
- tcph->urg_ptr = 0;
-
- if (opts_len) {
- memcpy(buf, options->addr, opts_len);
- buf += opts_len;
- }
-
- if (hdr_len) {
- memcpy(buf, hdr->addr, hdr_len);
- buf += hdr_len;
- }
-
- if (pdata && pdata->addr)
- memcpy(buf, pdata->addr, pdata->size);
-
- atomic_set(&sqbuf->refcount, 1);
-
- return sqbuf;
-}
-
-/**
- * i40iw_send_reset - Send RST packet
- * @cm_node: connection's node
- */
-static int i40iw_send_reset(struct i40iw_cm_node *cm_node)
-{
- struct i40iw_puda_buf *sqbuf;
- int flags = SET_RST | SET_ACK;
-
- sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, flags);
- if (!sqbuf) {
- i40iw_pr_err("no sqbuf\n");
- return -1;
- }
-
- return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 0, 1);
-}
-
-/**
- * i40iw_active_open_err - send event for active side cm error
- * @cm_node: connection's node
- * @reset: Flag to send reset or not
- */
-static void i40iw_active_open_err(struct i40iw_cm_node *cm_node, bool reset)
-{
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->cm_core->stats_connect_errs++;
- if (reset) {
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "%s cm_node=%p state=%d\n",
- __func__,
- cm_node,
- cm_node->state);
- atomic_inc(&cm_node->ref_count);
- i40iw_send_reset(cm_node);
- }
-
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
-}
-
-/**
- * i40iw_passive_open_err - handle passive side cm error
- * @cm_node: connection's node
- * @reset: send reset or just free cm_node
- */
-static void i40iw_passive_open_err(struct i40iw_cm_node *cm_node, bool reset)
-{
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->cm_core->stats_passive_errs++;
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "%s cm_node=%p state =%d\n",
- __func__,
- cm_node,
- cm_node->state);
- if (reset)
- i40iw_send_reset(cm_node);
- else
- i40iw_rem_ref_cm_node(cm_node);
-}
-
-/**
- * i40iw_event_connect_error - to create connect error event
- * @event: cm information for connect event
- */
-static void i40iw_event_connect_error(struct i40iw_cm_event *event)
-{
- struct i40iw_qp *iwqp;
- struct iw_cm_id *cm_id;
-
- cm_id = event->cm_node->cm_id;
- if (!cm_id)
- return;
-
- iwqp = cm_id->provider_data;
-
- if (!iwqp || !iwqp->iwdev)
- return;
-
- iwqp->cm_id = NULL;
- cm_id->provider_data = NULL;
- i40iw_send_cm_event(event->cm_node, cm_id,
- IW_CM_EVENT_CONNECT_REPLY,
- -ECONNRESET);
- cm_id->rem_ref(cm_id);
- i40iw_rem_ref_cm_node(event->cm_node);
-}
-
-/**
- * i40iw_process_options
- * @cm_node: connection's node
- * @optionsloc: point to start of options
- * @optionsize: size of all options
- * @syn_packet: flag if syn packet
- */
-static int i40iw_process_options(struct i40iw_cm_node *cm_node,
- u8 *optionsloc,
- u32 optionsize,
- u32 syn_packet)
-{
- u32 tmp;
- u32 offset = 0;
- union all_known_options *all_options;
- char got_mss_option = 0;
-
- while (offset < optionsize) {
- all_options = (union all_known_options *)(optionsloc + offset);
- switch (all_options->as_base.optionnum) {
- case OPTION_NUMBER_END:
- offset = optionsize;
- break;
- case OPTION_NUMBER_NONE:
- offset += 1;
- continue;
- case OPTION_NUMBER_MSS:
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "%s: MSS Length: %d Offset: %d Size: %d\n",
- __func__,
- all_options->as_mss.length,
- offset,
- optionsize);
- got_mss_option = 1;
- if (all_options->as_mss.length != 4)
- return -1;
- tmp = ntohs(all_options->as_mss.mss);
- if (tmp > 0 && tmp < cm_node->tcp_cntxt.mss)
- cm_node->tcp_cntxt.mss = tmp;
- break;
- case OPTION_NUMBER_WINDOW_SCALE:
- cm_node->tcp_cntxt.snd_wscale =
- all_options->as_windowscale.shiftcount;
- break;
- default:
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "TCP Option not understood: %x\n",
- all_options->as_base.optionnum);
- break;
- }
- offset += all_options->as_base.length;
- }
- if (!got_mss_option && syn_packet)
- cm_node->tcp_cntxt.mss = I40IW_CM_DEFAULT_MSS;
- return 0;
-}
-
-/**
- * i40iw_handle_tcp_options -
- * @cm_node: connection's node
- * @tcph: pointer tcp header
- * @optionsize: size of options rcvd
- * @passive: active or passive flag
- */
-static int i40iw_handle_tcp_options(struct i40iw_cm_node *cm_node,
- struct tcphdr *tcph,
- int optionsize,
- int passive)
-{
- u8 *optionsloc = (u8 *)&tcph[1];
-
- if (optionsize) {
- if (i40iw_process_options(cm_node,
- optionsloc,
- optionsize,
- (u32)tcph->syn)) {
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "%s: Node %p, Sending RESET\n",
- __func__,
- cm_node);
- if (passive)
- i40iw_passive_open_err(cm_node, true);
- else
- i40iw_active_open_err(cm_node, true);
- return -1;
- }
- }
-
- cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
- cm_node->tcp_cntxt.snd_wscale;
-
- if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
- cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
- return 0;
-}
-
-/**
- * i40iw_build_mpa_v1 - build a MPA V1 frame
- * @cm_node: connection's node
- * @mpa_key: to do read0 or write0
- */
-static void i40iw_build_mpa_v1(struct i40iw_cm_node *cm_node,
- void *start_addr,
- u8 mpa_key)
-{
- struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr;
-
- switch (mpa_key) {
- case MPA_KEY_REQUEST:
- memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
- break;
- case MPA_KEY_REPLY:
- memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
- break;
- default:
- break;
- }
- mpa_frame->flags = IETF_MPA_FLAGS_CRC;
- mpa_frame->rev = cm_node->mpa_frame_rev;
- mpa_frame->priv_data_len = htons(cm_node->pdata.size);
-}
-
-/**
- * i40iw_build_mpa_v2 - build a MPA V2 frame
- * @cm_node: connection's node
- * @start_addr: buffer start address
- * @mpa_key: to do read0 or write0
- */
-static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node,
- void *start_addr,
- u8 mpa_key)
-{
- struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr;
- struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
- u16 ctrl_ird, ctrl_ord;
-
- /* initialize the upper 5 bytes of the frame */
- i40iw_build_mpa_v1(cm_node, start_addr, mpa_key);
- mpa_frame->flags |= IETF_MPA_V2_FLAG;
- mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE);
-
- /* initialize RTR msg */
- if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
- ctrl_ird = IETF_NO_IRD_ORD;
- ctrl_ord = IETF_NO_IRD_ORD;
- } else {
- ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
- IETF_NO_IRD_ORD : cm_node->ird_size;
- ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
- IETF_NO_IRD_ORD : cm_node->ord_size;
- }
-
- ctrl_ird |= IETF_PEER_TO_PEER;
- ctrl_ird |= IETF_FLPDU_ZERO_LEN;
-
- switch (mpa_key) {
- case MPA_KEY_REQUEST:
- ctrl_ord |= IETF_RDMA0_WRITE;
- ctrl_ord |= IETF_RDMA0_READ;
- break;
- case MPA_KEY_REPLY:
- switch (cm_node->send_rdma0_op) {
- case SEND_RDMA_WRITE_ZERO:
- ctrl_ord |= IETF_RDMA0_WRITE;
- break;
- case SEND_RDMA_READ_ZERO:
- ctrl_ord |= IETF_RDMA0_READ;
- break;
- }
- break;
- default:
- break;
- }
- rtr_msg->ctrl_ird = htons(ctrl_ird);
- rtr_msg->ctrl_ord = htons(ctrl_ord);
-}
-
-/**
- * i40iw_cm_build_mpa_frame - build mpa frame for mpa version 1 or version 2
- * @cm_node: connection's node
- * @mpa: mpa: data buffer
- * @mpa_key: to do read0 or write0
- */
-static int i40iw_cm_build_mpa_frame(struct i40iw_cm_node *cm_node,
- struct i40iw_kmem_info *mpa,
- u8 mpa_key)
-{
- int hdr_len = 0;
-
- switch (cm_node->mpa_frame_rev) {
- case IETF_MPA_V1:
- hdr_len = sizeof(struct ietf_mpa_v1);
- i40iw_build_mpa_v1(cm_node, mpa->addr, mpa_key);
- break;
- case IETF_MPA_V2:
- hdr_len = sizeof(struct ietf_mpa_v2);
- i40iw_build_mpa_v2(cm_node, mpa->addr, mpa_key);
- break;
- default:
- break;
- }
-
- return hdr_len;
-}
-
-/**
- * i40iw_send_mpa_request - active node send mpa request to passive node
- * @cm_node: connection's node
- */
-static int i40iw_send_mpa_request(struct i40iw_cm_node *cm_node)
-{
- struct i40iw_puda_buf *sqbuf;
-
- if (!cm_node) {
- i40iw_pr_err("cm_node == NULL\n");
- return -1;
- }
-
- cm_node->mpa_hdr.addr = &cm_node->mpa_frame;
- cm_node->mpa_hdr.size = i40iw_cm_build_mpa_frame(cm_node,
- &cm_node->mpa_hdr,
- MPA_KEY_REQUEST);
- if (!cm_node->mpa_hdr.size) {
- i40iw_pr_err("mpa size = %d\n", cm_node->mpa_hdr.size);
- return -1;
- }
-
- sqbuf = i40iw_form_cm_frame(cm_node,
- NULL,
- &cm_node->mpa_hdr,
- &cm_node->pdata,
- SET_ACK);
- if (!sqbuf) {
- i40iw_pr_err("sq_buf == NULL\n");
- return -1;
- }
- return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
-}
-
-/**
- * i40iw_send_mpa_reject -
- * @cm_node: connection's node
- * @pdata: reject data for connection
- * @plen: length of reject data
- */
-static int i40iw_send_mpa_reject(struct i40iw_cm_node *cm_node,
- const void *pdata,
- u8 plen)
-{
- struct i40iw_puda_buf *sqbuf;
- struct i40iw_kmem_info priv_info;
-
- cm_node->mpa_hdr.addr = &cm_node->mpa_frame;
- cm_node->mpa_hdr.size = i40iw_cm_build_mpa_frame(cm_node,
- &cm_node->mpa_hdr,
- MPA_KEY_REPLY);
-
- cm_node->mpa_frame.flags |= IETF_MPA_FLAGS_REJECT;
- priv_info.addr = (void *)pdata;
- priv_info.size = plen;
-
- sqbuf = i40iw_form_cm_frame(cm_node,
- NULL,
- &cm_node->mpa_hdr,
- &priv_info,
- SET_ACK | SET_FIN);
- if (!sqbuf) {
- i40iw_pr_err("no sqbuf\n");
- return -ENOMEM;
- }
- cm_node->state = I40IW_CM_STATE_FIN_WAIT1;
- return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
-}
-
-/**
- * recv_mpa - process an IETF MPA frame
- * @cm_node: connection's node
- * @buffer: Data pointer
- * @type: to return accept or reject
- * @len: Len of mpa buffer
- */
-static int i40iw_parse_mpa(struct i40iw_cm_node *cm_node, u8 *buffer, u32 *type, u32 len)
-{
- struct ietf_mpa_v1 *mpa_frame;
- struct ietf_mpa_v2 *mpa_v2_frame;
- struct ietf_rtr_msg *rtr_msg;
- int mpa_hdr_len;
- int priv_data_len;
-
- *type = I40IW_MPA_REQUEST_ACCEPT;
-
- if (len < sizeof(struct ietf_mpa_v1)) {
- i40iw_pr_err("ietf buffer small (%x)\n", len);
- return -1;
- }
-
- mpa_frame = (struct ietf_mpa_v1 *)buffer;
- mpa_hdr_len = sizeof(struct ietf_mpa_v1);
- priv_data_len = ntohs(mpa_frame->priv_data_len);
-
- if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) {
- i40iw_pr_err("large pri_data %d\n", priv_data_len);
- return -1;
- }
- if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) {
- i40iw_pr_err("unsupported mpa rev = %d\n", mpa_frame->rev);
- return -1;
- }
- if (mpa_frame->rev > cm_node->mpa_frame_rev) {
- i40iw_pr_err("rev %d\n", mpa_frame->rev);
- return -1;
- }
- cm_node->mpa_frame_rev = mpa_frame->rev;
-
- if (cm_node->state != I40IW_CM_STATE_MPAREQ_SENT) {
- if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
- i40iw_pr_err("Unexpected MPA Key received\n");
- return -1;
- }
- } else {
- if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) {
- i40iw_pr_err("Unexpected MPA Key received\n");
- return -1;
- }
- }
-
- if (priv_data_len + mpa_hdr_len > len) {
- i40iw_pr_err("ietf buffer len(%x + %x != %x)\n",
- priv_data_len, mpa_hdr_len, len);
- return -1;
- }
- if (len > MAX_CM_BUFFER) {
- i40iw_pr_err("ietf buffer large len = %d\n", len);
- return -1;
- }
-
- switch (mpa_frame->rev) {
- case IETF_MPA_V2:{
- u16 ird_size;
- u16 ord_size;
- u16 ctrl_ord;
- u16 ctrl_ird;
-
- mpa_v2_frame = (struct ietf_mpa_v2 *)buffer;
- mpa_hdr_len += IETF_RTR_MSG_SIZE;
- rtr_msg = &mpa_v2_frame->rtr_msg;
-
- /* parse rtr message */
- ctrl_ord = ntohs(rtr_msg->ctrl_ord);
- ctrl_ird = ntohs(rtr_msg->ctrl_ird);
- ird_size = ctrl_ird & IETF_NO_IRD_ORD;
- ord_size = ctrl_ord & IETF_NO_IRD_ORD;
-
- if (!(ctrl_ird & IETF_PEER_TO_PEER))
- return -1;
-
- if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) {
- cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
- goto negotiate_done;
- }
-
- if (cm_node->state != I40IW_CM_STATE_MPAREQ_SENT) {
- /* responder */
- if (!ord_size && (ctrl_ord & IETF_RDMA0_READ))
- cm_node->ird_size = 1;
- if (cm_node->ord_size > ird_size)
- cm_node->ord_size = ird_size;
- } else {
- /* initiator */
- if (!ird_size && (ctrl_ord & IETF_RDMA0_READ))
- return -1;
- if (cm_node->ord_size > ird_size)
- cm_node->ord_size = ird_size;
-
- if (cm_node->ird_size < ord_size)
- /* no resources available */
- return -1;
- }
-
-negotiate_done:
- if (ctrl_ord & IETF_RDMA0_READ)
- cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
- else if (ctrl_ord & IETF_RDMA0_WRITE)
- cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
- else /* Not supported RDMA0 operation */
- return -1;
- i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
- "MPAV2: Negotiated ORD: %d, IRD: %d\n",
- cm_node->ord_size, cm_node->ird_size);
- break;
- }
- break;
- case IETF_MPA_V1:
- default:
- break;
- }
-
- memcpy(cm_node->pdata_buf, buffer + mpa_hdr_len, priv_data_len);
- cm_node->pdata.size = priv_data_len;
-
- if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT)
- *type = I40IW_MPA_REQUEST_REJECT;
-
- if (mpa_frame->flags & IETF_MPA_FLAGS_MARKERS)
- cm_node->snd_mark_en = true;
-
- return 0;
-}
-
-/**
- * i40iw_schedule_cm_timer
- * @@cm_node: connection's node
- * @sqbuf: buffer to send
- * @type: if it es send ot close
- * @send_retrans: if rexmits to be done
- * @close_when_complete: is cm_node to be removed
- *
- * note - cm_node needs to be protected before calling this. Encase in:
- * i40iw_rem_ref_cm_node(cm_core, cm_node);
- * i40iw_schedule_cm_timer(...)
- * atomic_inc(&cm_node->ref_count);
- */
-int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *sqbuf,
- enum i40iw_timer_type type,
- int send_retrans,
- int close_when_complete)
-{
- struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi;
- struct i40iw_cm_core *cm_core = cm_node->cm_core;
- struct i40iw_timer_entry *new_send;
- int ret = 0;
- u32 was_timer_set;
- unsigned long flags;
-
- new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
- if (!new_send) {
- i40iw_free_sqbuf(vsi, (void *)sqbuf);
- return -ENOMEM;
- }
- new_send->retrycount = I40IW_DEFAULT_RETRYS;
- new_send->retranscount = I40IW_DEFAULT_RETRANS;
- new_send->sqbuf = sqbuf;
- new_send->timetosend = jiffies;
- new_send->type = type;
- new_send->send_retrans = send_retrans;
- new_send->close_when_complete = close_when_complete;
-
- if (type == I40IW_TIMER_TYPE_CLOSE) {
- new_send->timetosend += (HZ / 10);
- if (cm_node->close_entry) {
- kfree(new_send);
- i40iw_free_sqbuf(vsi, (void *)sqbuf);
- i40iw_pr_err("already close entry\n");
- return -EINVAL;
- }
- cm_node->close_entry = new_send;
- }
-
- if (type == I40IW_TIMER_TYPE_SEND) {
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- cm_node->send_entry = new_send;
- atomic_inc(&cm_node->ref_count);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- new_send->timetosend = jiffies + I40IW_RETRY_TIMEOUT;
-
- atomic_inc(&sqbuf->refcount);
- i40iw_puda_send_buf(vsi->ilq, sqbuf);
- if (!send_retrans) {
- i40iw_cleanup_retrans_entry(cm_node);
- if (close_when_complete)
- i40iw_rem_ref_cm_node(cm_node);
- return ret;
- }
- }
-
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- was_timer_set = timer_pending(&cm_core->tcp_timer);
-
- if (!was_timer_set) {
- cm_core->tcp_timer.expires = new_send->timetosend;
- add_timer(&cm_core->tcp_timer);
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- return ret;
-}
-
-/**
- * i40iw_retrans_expired - Could not rexmit the packet
- * @cm_node: connection's node
- */
-static void i40iw_retrans_expired(struct i40iw_cm_node *cm_node)
-{
- struct iw_cm_id *cm_id = cm_node->cm_id;
- enum i40iw_cm_node_state state = cm_node->state;
-
- cm_node->state = I40IW_CM_STATE_CLOSED;
- switch (state) {
- case I40IW_CM_STATE_SYN_RCVD:
- case I40IW_CM_STATE_CLOSING:
- i40iw_rem_ref_cm_node(cm_node);
- break;
- case I40IW_CM_STATE_FIN_WAIT1:
- case I40IW_CM_STATE_LAST_ACK:
- if (cm_node->cm_id)
- cm_id->rem_ref(cm_id);
- i40iw_send_reset(cm_node);
- break;
- default:
- atomic_inc(&cm_node->ref_count);
- i40iw_send_reset(cm_node);
- i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
- break;
- }
-}
-
-/**
- * i40iw_handle_close_entry - for handling retry/timeouts
- * @cm_node: connection's node
- * @rem_node: flag for remove cm_node
- */
-static void i40iw_handle_close_entry(struct i40iw_cm_node *cm_node, u32 rem_node)
-{
- struct i40iw_timer_entry *close_entry = cm_node->close_entry;
- struct iw_cm_id *cm_id = cm_node->cm_id;
- struct i40iw_qp *iwqp;
- unsigned long flags;
-
- if (!close_entry)
- return;
- iwqp = (struct i40iw_qp *)close_entry->sqbuf;
- if (iwqp) {
- spin_lock_irqsave(&iwqp->lock, flags);
- if (iwqp->cm_id) {
- iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSED;
- iwqp->hw_iwarp_state = I40IW_QP_STATE_ERROR;
- iwqp->last_aeq = I40IW_AE_RESET_SENT;
- iwqp->ibqp_state = IB_QPS_ERR;
- spin_unlock_irqrestore(&iwqp->lock, flags);
- i40iw_cm_disconn(iwqp);
- } else {
- spin_unlock_irqrestore(&iwqp->lock, flags);
- }
- } else if (rem_node) {
- /* TIME_WAIT state */
- i40iw_rem_ref_cm_node(cm_node);
- }
- if (cm_id)
- cm_id->rem_ref(cm_id);
- kfree(close_entry);
- cm_node->close_entry = NULL;
-}
-
-/**
- * i40iw_cm_timer_tick - system's timer expired callback
- * @pass: Pointing to cm_core
- */
-static void i40iw_cm_timer_tick(unsigned long pass)
-{
- unsigned long nexttimeout = jiffies + I40IW_LONG_TIME;
- struct i40iw_cm_node *cm_node;
- struct i40iw_timer_entry *send_entry, *close_entry;
- struct list_head *list_core_temp;
- struct i40iw_sc_vsi *vsi;
- struct list_head *list_node;
- struct i40iw_cm_core *cm_core = (struct i40iw_cm_core *)pass;
- u32 settimer = 0;
- unsigned long timetosend;
- struct i40iw_sc_dev *dev;
- unsigned long flags;
-
- struct list_head timer_list;
-
- INIT_LIST_HEAD(&timer_list);
- spin_lock_irqsave(&cm_core->ht_lock, flags);
-
- list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
- cm_node = container_of(list_node, struct i40iw_cm_node, list);
- if (cm_node->close_entry || cm_node->send_entry) {
- atomic_inc(&cm_node->ref_count);
- list_add(&cm_node->timer_entry, &timer_list);
- }
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- list_for_each_safe(list_node, list_core_temp, &timer_list) {
- cm_node = container_of(list_node,
- struct i40iw_cm_node,
- timer_entry);
- close_entry = cm_node->close_entry;
-
- if (close_entry) {
- if (time_after(close_entry->timetosend, jiffies)) {
- if (nexttimeout > close_entry->timetosend ||
- !settimer) {
- nexttimeout = close_entry->timetosend;
- settimer = 1;
- }
- } else {
- i40iw_handle_close_entry(cm_node, 1);
- }
- }
-
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
-
- send_entry = cm_node->send_entry;
- if (!send_entry)
- goto done;
- if (time_after(send_entry->timetosend, jiffies)) {
- if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
- if ((nexttimeout > send_entry->timetosend) ||
- !settimer) {
- nexttimeout = send_entry->timetosend;
- settimer = 1;
- }
- } else {
- i40iw_free_retrans_entry(cm_node);
- }
- goto done;
- }
-
- if ((cm_node->state == I40IW_CM_STATE_OFFLOADED) ||
- (cm_node->state == I40IW_CM_STATE_CLOSED)) {
- i40iw_free_retrans_entry(cm_node);
- goto done;
- }
-
- if (!send_entry->retranscount || !send_entry->retrycount) {
- i40iw_free_retrans_entry(cm_node);
-
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- i40iw_retrans_expired(cm_node);
- cm_node->state = I40IW_CM_STATE_CLOSED;
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- goto done;
- }
- cm_node->cm_core->stats_pkt_retrans++;
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
-
- vsi = &cm_node->iwdev->vsi;
- dev = cm_node->dev;
- atomic_inc(&send_entry->sqbuf->refcount);
- i40iw_puda_send_buf(vsi->ilq, send_entry->sqbuf);
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- if (send_entry->send_retrans) {
- send_entry->retranscount--;
- timetosend = (I40IW_RETRY_TIMEOUT <<
- (I40IW_DEFAULT_RETRANS -
- send_entry->retranscount));
-
- send_entry->timetosend = jiffies +
- min(timetosend, I40IW_MAX_TIMEOUT);
- if (nexttimeout > send_entry->timetosend || !settimer) {
- nexttimeout = send_entry->timetosend;
- settimer = 1;
- }
- } else {
- int close_when_complete;
-
- close_when_complete = send_entry->close_when_complete;
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "cm_node=%p state=%d\n",
- cm_node,
- cm_node->state);
- i40iw_free_retrans_entry(cm_node);
- if (close_when_complete)
- i40iw_rem_ref_cm_node(cm_node);
- }
-done:
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- i40iw_rem_ref_cm_node(cm_node);
- }
-
- if (settimer) {
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- if (!timer_pending(&cm_core->tcp_timer)) {
- cm_core->tcp_timer.expires = nexttimeout;
- add_timer(&cm_core->tcp_timer);
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- }
-}
-
-/**
- * i40iw_send_syn - send SYN packet
- * @cm_node: connection's node
- * @sendack: flag to set ACK bit or not
- */
-int i40iw_send_syn(struct i40iw_cm_node *cm_node, u32 sendack)
-{
- struct i40iw_puda_buf *sqbuf;
- int flags = SET_SYN;
- char optionsbuffer[sizeof(struct option_mss) +
- sizeof(struct option_windowscale) +
- sizeof(struct option_base) + TCP_OPTIONS_PADDING];
- struct i40iw_kmem_info opts;
-
- int optionssize = 0;
- /* Sending MSS option */
- union all_known_options *options;
-
- opts.addr = optionsbuffer;
- if (!cm_node) {
- i40iw_pr_err("no cm_node\n");
- return -EINVAL;
- }
-
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_mss.optionnum = OPTION_NUMBER_MSS;
- options->as_mss.length = sizeof(struct option_mss);
- options->as_mss.mss = htons(cm_node->tcp_cntxt.mss);
- optionssize += sizeof(struct option_mss);
-
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE;
- options->as_windowscale.length = sizeof(struct option_windowscale);
- options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale;
- optionssize += sizeof(struct option_windowscale);
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_end = OPTION_NUMBER_END;
- optionssize += 1;
-
- if (sendack)
- flags |= SET_ACK;
-
- opts.size = optionssize;
-
- sqbuf = i40iw_form_cm_frame(cm_node, &opts, NULL, NULL, flags);
- if (!sqbuf) {
- i40iw_pr_err("no sqbuf\n");
- return -1;
- }
- return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
-}
-
-/**
- * i40iw_send_ack - Send ACK packet
- * @cm_node: connection's node
- */
-static void i40iw_send_ack(struct i40iw_cm_node *cm_node)
-{
- struct i40iw_puda_buf *sqbuf;
- struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi;
-
- sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK);
- if (sqbuf)
- i40iw_puda_send_buf(vsi->ilq, sqbuf);
- else
- i40iw_pr_err("no sqbuf\n");
-}
-
-/**
- * i40iw_send_fin - Send FIN pkt
- * @cm_node: connection's node
- */
-static int i40iw_send_fin(struct i40iw_cm_node *cm_node)
-{
- struct i40iw_puda_buf *sqbuf;
-
- sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK | SET_FIN);
- if (!sqbuf) {
- i40iw_pr_err("no sqbuf\n");
- return -1;
- }
- return i40iw_schedule_cm_timer(cm_node, sqbuf, I40IW_TIMER_TYPE_SEND, 1, 0);
-}
-
-/**
- * i40iw_find_node - find a cm node that matches the reference cm node
- * @cm_core: cm's core
- * @rem_port: remote tcp port num
- * @rem_addr: remote ip addr
- * @loc_port: local tcp port num
- * @loc_addr: loc ip addr
- * @add_refcnt: flag to increment refcount of cm_node
- */
-struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core,
- u16 rem_port,
- u32 *rem_addr,
- u16 loc_port,
- u32 *loc_addr,
- bool add_refcnt)
-{
- struct list_head *hte;
- struct i40iw_cm_node *cm_node;
- unsigned long flags;
-
- hte = &cm_core->connected_nodes;
-
- /* walk list and find cm_node associated with this session ID */
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_entry(cm_node, hte, list) {
- if (!memcmp(cm_node->loc_addr, loc_addr, sizeof(cm_node->loc_addr)) &&
- (cm_node->loc_port == loc_port) &&
- !memcmp(cm_node->rem_addr, rem_addr, sizeof(cm_node->rem_addr)) &&
- (cm_node->rem_port == rem_port)) {
- if (add_refcnt)
- atomic_inc(&cm_node->ref_count);
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- return cm_node;
- }
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- /* no owner node */
- return NULL;
-}
-
-/**
- * i40iw_find_listener - find a cm node listening on this addr-port pair
- * @cm_core: cm's core
- * @dst_port: listener tcp port num
- * @dst_addr: listener ip addr
- * @listener_state: state to match with listen node's
- */
-static struct i40iw_cm_listener *i40iw_find_listener(
- struct i40iw_cm_core *cm_core,
- u32 *dst_addr,
- u16 dst_port,
- u16 vlan_id,
- enum i40iw_cm_listener_state
- listener_state)
-{
- struct i40iw_cm_listener *listen_node;
- static const u32 ip_zero[4] = { 0, 0, 0, 0 };
- u32 listen_addr[4];
- u16 listen_port;
- unsigned long flags;
-
- /* walk list and find cm_node associated with this session ID */
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
- memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr));
- listen_port = listen_node->loc_port;
- /* compare node pair, return node handle if a match */
- if ((!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) ||
- !memcmp(listen_addr, ip_zero, sizeof(listen_addr))) &&
- (listen_port == dst_port) &&
- (listener_state & listen_node->listener_state)) {
- atomic_inc(&listen_node->ref_count);
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- return listen_node;
- }
- }
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- return NULL;
-}
-
-/**
- * i40iw_add_hte_node - add a cm node to the hash table
- * @cm_core: cm's core
- * @cm_node: connection's node
- */
-static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
- struct i40iw_cm_node *cm_node)
-{
- struct list_head *hte;
- unsigned long flags;
-
- if (!cm_node || !cm_core) {
- i40iw_pr_err("cm_node or cm_core == NULL\n");
- return;
- }
- spin_lock_irqsave(&cm_core->ht_lock, flags);
-
- /* get a handle on the hash table element (list head for this slot) */
- hte = &cm_core->connected_nodes;
- list_add_tail(&cm_node->list, hte);
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-}
-
-/**
- * listen_port_in_use - determine if port is in use
- * @port: Listen port number
- */
-static bool i40iw_listen_port_in_use(struct i40iw_cm_core *cm_core, u16 port)
-{
- struct i40iw_cm_listener *listen_node;
- unsigned long flags;
- bool ret = false;
-
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
- if (listen_node->loc_port == port) {
- ret = true;
- break;
- }
- }
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- return ret;
-}
-
-/**
- * i40iw_del_multiple_qhash - Remove qhash and child listens
- * @iwdev: iWarp device
- * @cm_info: CM info for parent listen node
- * @cm_parent_listen_node: The parent listen node
- */
-static enum i40iw_status_code i40iw_del_multiple_qhash(
- struct i40iw_device *iwdev,
- struct i40iw_cm_info *cm_info,
- struct i40iw_cm_listener *cm_parent_listen_node)
-{
- struct i40iw_cm_listener *child_listen_node;
- enum i40iw_status_code ret = I40IW_ERR_CONFIG;
- struct list_head *pos, *tpos;
- unsigned long flags;
-
- spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
- list_for_each_safe(pos, tpos, &cm_parent_listen_node->child_listen_list) {
- child_listen_node = list_entry(pos, struct i40iw_cm_listener, child_listen_list);
- if (child_listen_node->ipv4)
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "removing child listen for IP=%pI4, port=%d, vlan=%d\n",
- child_listen_node->loc_addr,
- child_listen_node->loc_port,
- child_listen_node->vlan_id);
- else
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
- "removing child listen for IP=%pI6, port=%d, vlan=%d\n",
- child_listen_node->loc_addr,
- child_listen_node->loc_port,
- child_listen_node->vlan_id);
- list_del(pos);
- memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
- sizeof(cm_info->loc_addr));
- cm_info->vlan_id = child_listen_node->vlan_id;
- if (child_listen_node->qhash_set) {
- ret = i40iw_manage_qhash(iwdev, cm_info,
- I40IW_QHASH_TYPE_TCP_SYN,
- I40IW_QHASH_MANAGE_TYPE_DELETE,
- NULL, false);
- child_listen_node->qhash_set = false;
- } else {
- ret = I40IW_SUCCESS;
- }
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "freed pointer = %p\n",
- child_listen_node);
- kfree(child_listen_node);
- cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
- }
- spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
-
- return ret;
-}
-
-/**
- * i40iw_netdev_vlan_ipv6 - Gets the netdev and mac
- * @addr: local IPv6 address
- * @vlan_id: vlan id for the given IPv6 address
- * @mac: mac address for the given IPv6 address
- *
- * Returns the net_device of the IPv6 address and also sets the
- * vlan id and mac for that address.
- */
-static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac)
-{
- struct net_device *ip_dev = NULL;
- struct in6_addr laddr6;
-
- if (!IS_ENABLED(CONFIG_IPV6))
- return NULL;
- i40iw_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr);
- if (vlan_id)
- *vlan_id = I40IW_NO_VLAN;
- if (mac)
- eth_zero_addr(mac);
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, ip_dev) {
- if (ipv6_chk_addr(&init_net, &laddr6, ip_dev, 1)) {
- if (vlan_id)
- *vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
- if (ip_dev->dev_addr && mac)
- ether_addr_copy(mac, ip_dev->dev_addr);
- break;
- }
- }
- rcu_read_unlock();
- return ip_dev;
-}
-
-/**
- * i40iw_get_vlan_ipv4 - Returns the vlan_id for IPv4 address
- * @addr: local IPv4 address
- */
-static u16 i40iw_get_vlan_ipv4(u32 *addr)
-{
- struct net_device *netdev;
- u16 vlan_id = I40IW_NO_VLAN;
-
- netdev = ip_dev_find(&init_net, htonl(addr[0]));
- if (netdev) {
- vlan_id = rdma_vlan_dev_vlan_id(netdev);
- dev_put(netdev);
- }
- return vlan_id;
-}
-
-/**
- * i40iw_add_mqh_6 - Adds multiple qhashes for IPv6
- * @iwdev: iWarp device
- * @cm_info: CM info for parent listen node
- * @cm_parent_listen_node: The parent listen node
- *
- * Adds a qhash and a child listen node for every IPv6 address
- * on the adapter and adds the associated qhash filter
- */
-static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
- struct i40iw_cm_info *cm_info,
- struct i40iw_cm_listener *cm_parent_listen_node)
-{
- struct net_device *ip_dev;
- struct inet6_dev *idev;
- struct inet6_ifaddr *ifp, *tmp;
- enum i40iw_status_code ret = 0;
- struct i40iw_cm_listener *child_listen_node;
- unsigned long flags;
-
- rtnl_lock();
- for_each_netdev_rcu(&init_net, ip_dev) {
- if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) &&
- (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
- (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
- idev = __in6_dev_get(ip_dev);
- if (!idev) {
- i40iw_pr_err("idev == NULL\n");
- break;
- }
- list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "IP=%pI6, vlan_id=%d, MAC=%pM\n",
- &ifp->addr,
- rdma_vlan_dev_vlan_id(ip_dev),
- ip_dev->dev_addr);
- child_listen_node =
- kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "Allocating child listener %p\n",
- child_listen_node);
- if (!child_listen_node) {
- ret = I40IW_ERR_NO_MEMORY;
- goto exit;
- }
- cm_info->vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
- cm_parent_listen_node->vlan_id = cm_info->vlan_id;
-
- memcpy(child_listen_node, cm_parent_listen_node,
- sizeof(*child_listen_node));
-
- i40iw_copy_ip_ntohl(child_listen_node->loc_addr,
- ifp->addr.in6_u.u6_addr32);
- memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
- sizeof(cm_info->loc_addr));
-
- ret = i40iw_manage_qhash(iwdev, cm_info,
- I40IW_QHASH_TYPE_TCP_SYN,
- I40IW_QHASH_MANAGE_TYPE_ADD,
- NULL, true);
- if (!ret) {
- child_listen_node->qhash_set = true;
- spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
- list_add(&child_listen_node->child_listen_list,
- &cm_parent_listen_node->child_listen_list);
- spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
- cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
- } else {
- kfree(child_listen_node);
- }
- }
- }
- }
-exit:
- rtnl_unlock();
- return ret;
-}
-
-/**
- * i40iw_add_mqh_4 - Adds multiple qhashes for IPv4
- * @iwdev: iWarp device
- * @cm_info: CM info for parent listen node
- * @cm_parent_listen_node: The parent listen node
- *
- * Adds a qhash and a child listen node for every IPv4 address
- * on the adapter and adds the associated qhash filter
- */
-static enum i40iw_status_code i40iw_add_mqh_4(
- struct i40iw_device *iwdev,
- struct i40iw_cm_info *cm_info,
- struct i40iw_cm_listener *cm_parent_listen_node)
-{
- struct net_device *dev;
- struct in_device *idev;
- struct i40iw_cm_listener *child_listen_node;
- enum i40iw_status_code ret = 0;
- unsigned long flags;
-
- rtnl_lock();
- for_each_netdev(&init_net, dev) {
- if ((((rdma_vlan_dev_vlan_id(dev) < I40IW_NO_VLAN) &&
- (rdma_vlan_dev_real_dev(dev) == iwdev->netdev)) ||
- (dev == iwdev->netdev)) && (dev->flags & IFF_UP)) {
- idev = in_dev_get(dev);
- for_ifa(idev) {
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n",
- &ifa->ifa_address,
- rdma_vlan_dev_vlan_id(dev),
- dev->dev_addr);
- child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
- cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "Allocating child listener %p\n",
- child_listen_node);
- if (!child_listen_node) {
- in_dev_put(idev);
- ret = I40IW_ERR_NO_MEMORY;
- goto exit;
- }
- cm_info->vlan_id = rdma_vlan_dev_vlan_id(dev);
- cm_parent_listen_node->vlan_id = cm_info->vlan_id;
- memcpy(child_listen_node,
- cm_parent_listen_node,
- sizeof(*child_listen_node));
-
- child_listen_node->loc_addr[0] = ntohl(ifa->ifa_address);
- memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
- sizeof(cm_info->loc_addr));
-
- ret = i40iw_manage_qhash(iwdev,
- cm_info,
- I40IW_QHASH_TYPE_TCP_SYN,
- I40IW_QHASH_MANAGE_TYPE_ADD,
- NULL,
- true);
- if (!ret) {
- child_listen_node->qhash_set = true;
- spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
- list_add(&child_listen_node->child_listen_list,
- &cm_parent_listen_node->child_listen_list);
- spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
- } else {
- kfree(child_listen_node);
- cm_parent_listen_node->cm_core->stats_listen_nodes_created--;
- }
- }
- endfor_ifa(idev);
- in_dev_put(idev);
- }
- }
-exit:
- rtnl_unlock();
- return ret;
-}
-
-/**
- * i40iw_dec_refcnt_listen - delete listener and associated cm nodes
- * @cm_core: cm's core
- * @free_hanging_nodes: to free associated cm_nodes
- * @apbvt_del: flag to delete the apbvt
- */
-static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core,
- struct i40iw_cm_listener *listener,
- int free_hanging_nodes, bool apbvt_del)
-{
- int ret = -EINVAL;
- int err = 0;
- struct list_head *list_pos;
- struct list_head *list_temp;
- struct i40iw_cm_node *cm_node;
- struct list_head reset_list;
- struct i40iw_cm_info nfo;
- struct i40iw_cm_node *loopback;
- enum i40iw_cm_node_state old_state;
- unsigned long flags;
-
- /* free non-accelerated child nodes for this listener */
- INIT_LIST_HEAD(&reset_list);
- if (free_hanging_nodes) {
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_safe(list_pos, list_temp, &cm_core->connected_nodes) {
- cm_node = container_of(list_pos, struct i40iw_cm_node, list);
- if ((cm_node->listener == listener) && !cm_node->accelerated) {
- atomic_inc(&cm_node->ref_count);
- list_add(&cm_node->reset_entry, &reset_list);
- }
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- }
-
- list_for_each_safe(list_pos, list_temp, &reset_list) {
- cm_node = container_of(list_pos, struct i40iw_cm_node, reset_entry);
- loopback = cm_node->loopbackpartner;
- if (cm_node->state >= I40IW_CM_STATE_FIN_WAIT1) {
- i40iw_rem_ref_cm_node(cm_node);
- } else {
- if (!loopback) {
- i40iw_cleanup_retrans_entry(cm_node);
- err = i40iw_send_reset(cm_node);
- if (err) {
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_pr_err("send reset\n");
- } else {
- old_state = cm_node->state;
- cm_node->state = I40IW_CM_STATE_LISTENER_DESTROYED;
- if (old_state != I40IW_CM_STATE_MPAREQ_RCVD)
- i40iw_rem_ref_cm_node(cm_node);
- }
- } else {
- struct i40iw_cm_event event;
-
- event.cm_node = loopback;
- memcpy(event.cm_info.rem_addr,
- loopback->rem_addr, sizeof(event.cm_info.rem_addr));
- memcpy(event.cm_info.loc_addr,
- loopback->loc_addr, sizeof(event.cm_info.loc_addr));
- event.cm_info.rem_port = loopback->rem_port;
- event.cm_info.loc_port = loopback->loc_port;
- event.cm_info.cm_id = loopback->cm_id;
- event.cm_info.ipv4 = loopback->ipv4;
- atomic_inc(&loopback->ref_count);
- loopback->state = I40IW_CM_STATE_CLOSED;
- i40iw_event_connect_error(&event);
- cm_node->state = I40IW_CM_STATE_LISTENER_DESTROYED;
- i40iw_rem_ref_cm_node(cm_node);
- }
- }
- }
-
- if (!atomic_dec_return(&listener->ref_count)) {
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- list_del(&listener->list);
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
-
- if (listener->iwdev) {
- if (apbvt_del && !i40iw_listen_port_in_use(cm_core, listener->loc_port))
- i40iw_manage_apbvt(listener->iwdev,
- listener->loc_port,
- I40IW_MANAGE_APBVT_DEL);
-
- memcpy(nfo.loc_addr, listener->loc_addr, sizeof(nfo.loc_addr));
- nfo.loc_port = listener->loc_port;
- nfo.ipv4 = listener->ipv4;
- nfo.vlan_id = listener->vlan_id;
- nfo.user_pri = listener->user_pri;
-
- if (!list_empty(&listener->child_listen_list)) {
- i40iw_del_multiple_qhash(listener->iwdev, &nfo, listener);
- } else {
- if (listener->qhash_set)
- i40iw_manage_qhash(listener->iwdev,
- &nfo,
- I40IW_QHASH_TYPE_TCP_SYN,
- I40IW_QHASH_MANAGE_TYPE_DELETE,
- NULL,
- false);
- }
- }
-
- cm_core->stats_listen_destroyed++;
- kfree(listener);
- cm_core->stats_listen_nodes_destroyed++;
- listener = NULL;
- ret = 0;
- }
-
- if (listener) {
- if (atomic_read(&listener->pend_accepts_cnt) > 0)
- i40iw_debug(cm_core->dev,
- I40IW_DEBUG_CM,
- "%s: listener (%p) pending accepts=%u\n",
- __func__,
- listener,
- atomic_read(&listener->pend_accepts_cnt));
- }
-
- return ret;
-}
-
-/**
- * i40iw_cm_del_listen - delete a linstener
- * @cm_core: cm's core
- * @listener: passive connection's listener
- * @apbvt_del: flag to delete apbvt
- */
-static int i40iw_cm_del_listen(struct i40iw_cm_core *cm_core,
- struct i40iw_cm_listener *listener,
- bool apbvt_del)
-{
- listener->listener_state = I40IW_CM_LISTENER_PASSIVE_STATE;
- listener->cm_id = NULL; /* going to be destroyed pretty soon */
- return i40iw_dec_refcnt_listen(cm_core, listener, 1, apbvt_del);
-}
-
-/**
- * i40iw_addr_resolve_neigh - resolve neighbor address
- * @iwdev: iwarp device structure
- * @src_ip: local ip address
- * @dst_ip: remote ip address
- * @arpindex: if there is an arp entry
- */
-static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev,
- u32 src_ip,
- u32 dst_ip,
- int arpindex)
-{
- struct rtable *rt;
- struct neighbour *neigh;
- int rc = arpindex;
- struct net_device *netdev = iwdev->netdev;
- __be32 dst_ipaddr = htonl(dst_ip);
- __be32 src_ipaddr = htonl(src_ip);
-
- rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0);
- if (IS_ERR(rt)) {
- i40iw_pr_err("ip_route_output\n");
- return rc;
- }
-
- if (netif_is_bond_slave(netdev))
- netdev = netdev_master_upper_dev_get(netdev);
-
- neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr);
-
- rcu_read_lock();
- if (neigh) {
- if (neigh->nud_state & NUD_VALID) {
- if (arpindex >= 0) {
- if (ether_addr_equal(iwdev->arp_table[arpindex].mac_addr,
- neigh->ha))
- /* Mac address same as arp table */
- goto resolve_neigh_exit;
- i40iw_manage_arp_cache(iwdev,
- iwdev->arp_table[arpindex].mac_addr,
- &dst_ip,
- true,
- I40IW_ARP_DELETE);
- }
-
- i40iw_manage_arp_cache(iwdev, neigh->ha, &dst_ip, true, I40IW_ARP_ADD);
- rc = i40iw_arp_table(iwdev, &dst_ip, true, NULL, I40IW_ARP_RESOLVE);
- } else {
- neigh_event_send(neigh, NULL);
- }
- }
- resolve_neigh_exit:
-
- rcu_read_unlock();
- if (neigh)
- neigh_release(neigh);
-
- ip_rt_put(rt);
- return rc;
-}
-
-/**
- * i40iw_get_dst_ipv6
- */
-static struct dst_entry *i40iw_get_dst_ipv6(struct sockaddr_in6 *src_addr,
- struct sockaddr_in6 *dst_addr)
-{
- struct dst_entry *dst;
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.daddr = dst_addr->sin6_addr;
- fl6.saddr = src_addr->sin6_addr;
- if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
- fl6.flowi6_oif = dst_addr->sin6_scope_id;
-
- dst = ip6_route_output(&init_net, NULL, &fl6);
- return dst;
-}
-
-/**
- * i40iw_addr_resolve_neigh_ipv6 - resolve neighbor ipv6 address
- * @iwdev: iwarp device structure
- * @dst_ip: remote ip address
- * @arpindex: if there is an arp entry
- */
-static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev,
- u32 *src,
- u32 *dest,
- int arpindex)
-{
- struct neighbour *neigh;
- int rc = arpindex;
- struct net_device *netdev = iwdev->netdev;
- struct dst_entry *dst;
- struct sockaddr_in6 dst_addr;
- struct sockaddr_in6 src_addr;
-
- memset(&dst_addr, 0, sizeof(dst_addr));
- dst_addr.sin6_family = AF_INET6;
- i40iw_copy_ip_htonl(dst_addr.sin6_addr.in6_u.u6_addr32, dest);
- memset(&src_addr, 0, sizeof(src_addr));
- src_addr.sin6_family = AF_INET6;
- i40iw_copy_ip_htonl(src_addr.sin6_addr.in6_u.u6_addr32, src);
- dst = i40iw_get_dst_ipv6(&src_addr, &dst_addr);
- if (!dst || dst->error) {
- if (dst) {
- dst_release(dst);
- i40iw_pr_err("ip6_route_output returned dst->error = %d\n",
- dst->error);
- }
- return rc;
- }
-
- if (netif_is_bond_slave(netdev))
- netdev = netdev_master_upper_dev_get(netdev);
-
- neigh = dst_neigh_lookup(dst, &dst_addr);
-
- rcu_read_lock();
- if (neigh) {
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "dst_neigh_lookup MAC=%pM\n", neigh->ha);
- if (neigh->nud_state & NUD_VALID) {
- if (arpindex >= 0) {
- if (ether_addr_equal
- (iwdev->arp_table[arpindex].mac_addr,
- neigh->ha)) {
- /* Mac address same as in arp table */
- goto resolve_neigh_exit6;
- }
- i40iw_manage_arp_cache(iwdev,
- iwdev->arp_table[arpindex].mac_addr,
- dest,
- false,
- I40IW_ARP_DELETE);
- }
- i40iw_manage_arp_cache(iwdev,
- neigh->ha,
- dest,
- false,
- I40IW_ARP_ADD);
- rc = i40iw_arp_table(iwdev,
- dest,
- false,
- NULL,
- I40IW_ARP_RESOLVE);
- } else {
- neigh_event_send(neigh, NULL);
- }
- }
-
- resolve_neigh_exit6:
- rcu_read_unlock();
- if (neigh)
- neigh_release(neigh);
- dst_release(dst);
- return rc;
-}
-
-/**
- * i40iw_ipv4_is_loopback - check if loopback
- * @loc_addr: local addr to compare
- * @rem_addr: remote address
- */
-static bool i40iw_ipv4_is_loopback(u32 loc_addr, u32 rem_addr)
-{
- return ipv4_is_loopback(htonl(rem_addr)) || (loc_addr == rem_addr);
-}
-
-/**
- * i40iw_ipv6_is_loopback - check if loopback
- * @loc_addr: local addr to compare
- * @rem_addr: remote address
- */
-static bool i40iw_ipv6_is_loopback(u32 *loc_addr, u32 *rem_addr)
-{
- struct in6_addr raddr6;
-
- i40iw_copy_ip_htonl(raddr6.in6_u.u6_addr32, rem_addr);
- return !memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6);
-}
-
-/**
- * i40iw_make_cm_node - create a new instance of a cm node
- * @cm_core: cm's core
- * @iwdev: iwarp device structure
- * @cm_info: quad info for connection
- * @listener: passive connection's listener
- */
-static struct i40iw_cm_node *i40iw_make_cm_node(
- struct i40iw_cm_core *cm_core,
- struct i40iw_device *iwdev,
- struct i40iw_cm_info *cm_info,
- struct i40iw_cm_listener *listener)
-{
- struct i40iw_cm_node *cm_node;
- struct timespec ts;
- int oldarpindex;
- int arpindex;
- struct net_device *netdev = iwdev->netdev;
-
- /* create an hte and cm_node for this instance */
- cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC);
- if (!cm_node)
- return NULL;
-
- /* set our node specific transport info */
- cm_node->ipv4 = cm_info->ipv4;
- cm_node->vlan_id = cm_info->vlan_id;
- if ((cm_node->vlan_id == I40IW_NO_VLAN) && iwdev->dcb)
- cm_node->vlan_id = 0;
- cm_node->tos = cm_info->tos;
- cm_node->user_pri = cm_info->user_pri;
- if (listener) {
- if (listener->tos != cm_info->tos)
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB,
- "application TOS[%d] and remote client TOS[%d] mismatch\n",
- listener->tos, cm_info->tos);
- cm_node->tos = max(listener->tos, cm_info->tos);
- cm_node->user_pri = rt_tos2priority(cm_node->tos);
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "listener: TOS:[%d] UP:[%d]\n",
- cm_node->tos, cm_node->user_pri);
- }
- memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr));
- memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr));
- cm_node->loc_port = cm_info->loc_port;
- cm_node->rem_port = cm_info->rem_port;
-
- cm_node->mpa_frame_rev = iwdev->mpa_version;
- cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
- cm_node->ird_size = I40IW_MAX_IRD_SIZE;
- cm_node->ord_size = I40IW_MAX_ORD_SIZE;
-
- cm_node->listener = listener;
- cm_node->cm_id = cm_info->cm_id;
- ether_addr_copy(cm_node->loc_mac, netdev->dev_addr);
- spin_lock_init(&cm_node->retrans_list_lock);
-
- atomic_set(&cm_node->ref_count, 1);
- /* associate our parent CM core */
- cm_node->cm_core = cm_core;
- cm_node->tcp_cntxt.loc_id = I40IW_CM_DEF_LOCAL_ID;
- cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
- cm_node->tcp_cntxt.rcv_wnd =
- I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE;
- ts = current_kernel_time();
- cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec;
- cm_node->tcp_cntxt.mss = iwdev->vsi.mss;
-
- cm_node->iwdev = iwdev;
- cm_node->dev = &iwdev->sc_dev;
-
- if ((cm_node->ipv4 &&
- i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
- (!cm_node->ipv4 && i40iw_ipv6_is_loopback(cm_node->loc_addr,
- cm_node->rem_addr))) {
- arpindex = i40iw_arp_table(iwdev,
- cm_node->rem_addr,
- false,
- NULL,
- I40IW_ARP_RESOLVE);
- } else {
- oldarpindex = i40iw_arp_table(iwdev,
- cm_node->rem_addr,
- false,
- NULL,
- I40IW_ARP_RESOLVE);
- if (cm_node->ipv4)
- arpindex = i40iw_addr_resolve_neigh(iwdev,
- cm_info->loc_addr[0],
- cm_info->rem_addr[0],
- oldarpindex);
- else if (IS_ENABLED(CONFIG_IPV6))
- arpindex = i40iw_addr_resolve_neigh_ipv6(iwdev,
- cm_info->loc_addr,
- cm_info->rem_addr,
- oldarpindex);
- else
- arpindex = -EINVAL;
- }
- if (arpindex < 0) {
- i40iw_pr_err("cm_node arpindex\n");
- kfree(cm_node);
- return NULL;
- }
- ether_addr_copy(cm_node->rem_mac, iwdev->arp_table[arpindex].mac_addr);
- i40iw_add_hte_node(cm_core, cm_node);
- cm_core->stats_nodes_created++;
- return cm_node;
-}
-
-/**
- * i40iw_rem_ref_cm_node - destroy an instance of a cm node
- * @cm_node: connection's node
- */
-static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
-{
- struct i40iw_cm_core *cm_core = cm_node->cm_core;
- struct i40iw_qp *iwqp;
- struct i40iw_cm_info nfo;
- unsigned long flags;
-
- spin_lock_irqsave(&cm_node->cm_core->ht_lock, flags);
- if (atomic_dec_return(&cm_node->ref_count)) {
- spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
- return;
- }
- list_del(&cm_node->list);
- spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
-
- /* if the node is destroyed before connection was accelerated */
- if (!cm_node->accelerated && cm_node->accept_pend) {
- pr_err("node destroyed before established\n");
- atomic_dec(&cm_node->listener->pend_accepts_cnt);
- }
- if (cm_node->close_entry)
- i40iw_handle_close_entry(cm_node, 0);
- if (cm_node->listener) {
- i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
- } else {
- if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) &&
- cm_node->apbvt_set) {
- i40iw_manage_apbvt(cm_node->iwdev,
- cm_node->loc_port,
- I40IW_MANAGE_APBVT_DEL);
- i40iw_get_addr_info(cm_node, &nfo);
- if (cm_node->qhash_set) {
- i40iw_manage_qhash(cm_node->iwdev,
- &nfo,
- I40IW_QHASH_TYPE_TCP_ESTABLISHED,
- I40IW_QHASH_MANAGE_TYPE_DELETE,
- NULL,
- false);
- cm_node->qhash_set = 0;
- }
- }
- }
-
- iwqp = cm_node->iwqp;
- if (iwqp) {
- iwqp->cm_node = NULL;
- i40iw_rem_ref(&iwqp->ibqp);
- cm_node->iwqp = NULL;
- } else if (cm_node->qhash_set) {
- i40iw_get_addr_info(cm_node, &nfo);
- i40iw_manage_qhash(cm_node->iwdev,
- &nfo,
- I40IW_QHASH_TYPE_TCP_ESTABLISHED,
- I40IW_QHASH_MANAGE_TYPE_DELETE,
- NULL,
- false);
- cm_node->qhash_set = 0;
- }
-
- cm_node->cm_core->stats_nodes_destroyed++;
- kfree(cm_node);
-}
-
-/**
- * i40iw_handle_fin_pkt - FIN packet received
- * @cm_node: connection's node
- */
-static void i40iw_handle_fin_pkt(struct i40iw_cm_node *cm_node)
-{
- u32 ret;
-
- switch (cm_node->state) {
- case I40IW_CM_STATE_SYN_RCVD:
- case I40IW_CM_STATE_SYN_SENT:
- case I40IW_CM_STATE_ESTABLISHED:
- case I40IW_CM_STATE_MPAREJ_RCVD:
- cm_node->tcp_cntxt.rcv_nxt++;
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_LAST_ACK;
- i40iw_send_fin(cm_node);
- break;
- case I40IW_CM_STATE_MPAREQ_SENT:
- i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
- cm_node->tcp_cntxt.rcv_nxt++;
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_CLOSED;
- atomic_inc(&cm_node->ref_count);
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_FIN_WAIT1:
- cm_node->tcp_cntxt.rcv_nxt++;
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_CLOSING;
- i40iw_send_ack(cm_node);
- /*
- * Wait for ACK as this is simultaneous close.
- * After we receive ACK, do not send anything.
- * Just rm the node.
- */
- break;
- case I40IW_CM_STATE_FIN_WAIT2:
- cm_node->tcp_cntxt.rcv_nxt++;
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_TIME_WAIT;
- i40iw_send_ack(cm_node);
- ret =
- i40iw_schedule_cm_timer(cm_node, NULL, I40IW_TIMER_TYPE_CLOSE, 1, 0);
- if (ret)
- i40iw_pr_err("node %p state = %d\n", cm_node, cm_node->state);
- break;
- case I40IW_CM_STATE_TIME_WAIT:
- cm_node->tcp_cntxt.rcv_nxt++;
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_rem_ref_cm_node(cm_node);
- break;
- case I40IW_CM_STATE_OFFLOADED:
- default:
- i40iw_pr_err("bad state node %p state = %d\n", cm_node, cm_node->state);
- break;
- }
-}
-
-/**
- * i40iw_handle_rst_pkt - process received RST packet
- * @cm_node: connection's node
- * @rbuf: receive buffer
- */
-static void i40iw_handle_rst_pkt(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *rbuf)
-{
- i40iw_cleanup_retrans_entry(cm_node);
- switch (cm_node->state) {
- case I40IW_CM_STATE_SYN_SENT:
- case I40IW_CM_STATE_MPAREQ_SENT:
- switch (cm_node->mpa_frame_rev) {
- case IETF_MPA_V2:
- cm_node->mpa_frame_rev = IETF_MPA_V1;
- /* send a syn and goto syn sent state */
- cm_node->state = I40IW_CM_STATE_SYN_SENT;
- if (i40iw_send_syn(cm_node, 0))
- i40iw_active_open_err(cm_node, false);
- break;
- case IETF_MPA_V1:
- default:
- i40iw_active_open_err(cm_node, false);
- break;
- }
- break;
- case I40IW_CM_STATE_MPAREQ_RCVD:
- atomic_add_return(1, &cm_node->passive_state);
- break;
- case I40IW_CM_STATE_ESTABLISHED:
- case I40IW_CM_STATE_SYN_RCVD:
- case I40IW_CM_STATE_LISTENING:
- i40iw_pr_err("Bad state state = %d\n", cm_node->state);
- i40iw_passive_open_err(cm_node, false);
- break;
- case I40IW_CM_STATE_OFFLOADED:
- i40iw_active_open_err(cm_node, false);
- break;
- case I40IW_CM_STATE_CLOSED:
- break;
- case I40IW_CM_STATE_FIN_WAIT2:
- case I40IW_CM_STATE_FIN_WAIT1:
- case I40IW_CM_STATE_LAST_ACK:
- cm_node->cm_id->rem_ref(cm_node->cm_id);
- case I40IW_CM_STATE_TIME_WAIT:
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_rem_ref_cm_node(cm_node);
- break;
- default:
- break;
- }
-}
-
-/**
- * i40iw_handle_rcv_mpa - Process a recv'd mpa buffer
- * @cm_node: connection's node
- * @rbuf: receive buffer
- */
-static void i40iw_handle_rcv_mpa(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *rbuf)
-{
- int ret;
- int datasize = rbuf->datalen;
- u8 *dataloc = rbuf->data;
-
- enum i40iw_cm_event_type type = I40IW_CM_EVENT_UNKNOWN;
- u32 res_type;
-
- ret = i40iw_parse_mpa(cm_node, dataloc, &res_type, datasize);
- if (ret) {
- if (cm_node->state == I40IW_CM_STATE_MPAREQ_SENT)
- i40iw_active_open_err(cm_node, true);
- else
- i40iw_passive_open_err(cm_node, true);
- return;
- }
-
- switch (cm_node->state) {
- case I40IW_CM_STATE_ESTABLISHED:
- if (res_type == I40IW_MPA_REQUEST_REJECT)
- i40iw_pr_err("state for reject\n");
- cm_node->state = I40IW_CM_STATE_MPAREQ_RCVD;
- type = I40IW_CM_EVENT_MPA_REQ;
- i40iw_send_ack(cm_node); /* ACK received MPA request */
- atomic_set(&cm_node->passive_state,
- I40IW_PASSIVE_STATE_INDICATED);
- break;
- case I40IW_CM_STATE_MPAREQ_SENT:
- i40iw_cleanup_retrans_entry(cm_node);
- if (res_type == I40IW_MPA_REQUEST_REJECT) {
- type = I40IW_CM_EVENT_MPA_REJECT;
- cm_node->state = I40IW_CM_STATE_MPAREJ_RCVD;
- } else {
- type = I40IW_CM_EVENT_CONNECTED;
- cm_node->state = I40IW_CM_STATE_OFFLOADED;
- i40iw_send_ack(cm_node);
- }
- break;
- default:
- pr_err("%s wrong cm_node state =%d\n", __func__, cm_node->state);
- break;
- }
- i40iw_create_event(cm_node, type);
-}
-
-/**
- * i40iw_indicate_pkt_err - Send up err event to cm
- * @cm_node: connection's node
- */
-static void i40iw_indicate_pkt_err(struct i40iw_cm_node *cm_node)
-{
- switch (cm_node->state) {
- case I40IW_CM_STATE_SYN_SENT:
- case I40IW_CM_STATE_MPAREQ_SENT:
- i40iw_active_open_err(cm_node, true);
- break;
- case I40IW_CM_STATE_ESTABLISHED:
- case I40IW_CM_STATE_SYN_RCVD:
- i40iw_passive_open_err(cm_node, true);
- break;
- case I40IW_CM_STATE_OFFLOADED:
- default:
- break;
- }
-}
-
-/**
- * i40iw_check_syn - Check for error on received syn ack
- * @cm_node: connection's node
- * @tcph: pointer tcp header
- */
-static int i40iw_check_syn(struct i40iw_cm_node *cm_node, struct tcphdr *tcph)
-{
- int err = 0;
-
- if (ntohl(tcph->ack_seq) != cm_node->tcp_cntxt.loc_seq_num) {
- err = 1;
- i40iw_active_open_err(cm_node, true);
- }
- return err;
-}
-
-/**
- * i40iw_check_seq - check seq numbers if OK
- * @cm_node: connection's node
- * @tcph: pointer tcp header
- */
-static int i40iw_check_seq(struct i40iw_cm_node *cm_node, struct tcphdr *tcph)
-{
- int err = 0;
- u32 seq;
- u32 ack_seq;
- u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
- u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
- u32 rcv_wnd;
-
- seq = ntohl(tcph->seq);
- ack_seq = ntohl(tcph->ack_seq);
- rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
- if (ack_seq != loc_seq_num)
- err = -1;
- else if (!between(seq, rcv_nxt, (rcv_nxt + rcv_wnd)))
- err = -1;
- if (err) {
- i40iw_pr_err("seq number\n");
- i40iw_indicate_pkt_err(cm_node);
- }
- return err;
-}
-
-/**
- * i40iw_handle_syn_pkt - is for Passive node
- * @cm_node: connection's node
- * @rbuf: receive buffer
- */
-static void i40iw_handle_syn_pkt(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *rbuf)
-{
- struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
- int ret;
- u32 inc_sequence;
- int optionsize;
- struct i40iw_cm_info nfo;
-
- optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
- inc_sequence = ntohl(tcph->seq);
-
- switch (cm_node->state) {
- case I40IW_CM_STATE_SYN_SENT:
- case I40IW_CM_STATE_MPAREQ_SENT:
- /* Rcvd syn on active open connection */
- i40iw_active_open_err(cm_node, 1);
- break;
- case I40IW_CM_STATE_LISTENING:
- /* Passive OPEN */
- if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
- cm_node->listener->backlog) {
- cm_node->cm_core->stats_backlog_drops++;
- i40iw_passive_open_err(cm_node, false);
- break;
- }
- ret = i40iw_handle_tcp_options(cm_node, tcph, optionsize, 1);
- if (ret) {
- i40iw_passive_open_err(cm_node, false);
- /* drop pkt */
- break;
- }
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
- cm_node->accept_pend = 1;
- atomic_inc(&cm_node->listener->pend_accepts_cnt);
-
- cm_node->state = I40IW_CM_STATE_SYN_RCVD;
- i40iw_get_addr_info(cm_node, &nfo);
- ret = i40iw_manage_qhash(cm_node->iwdev,
- &nfo,
- I40IW_QHASH_TYPE_TCP_ESTABLISHED,
- I40IW_QHASH_MANAGE_TYPE_ADD,
- (void *)cm_node,
- false);
- cm_node->qhash_set = true;
- break;
- case I40IW_CM_STATE_CLOSED:
- i40iw_cleanup_retrans_entry(cm_node);
- atomic_inc(&cm_node->ref_count);
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_OFFLOADED:
- case I40IW_CM_STATE_ESTABLISHED:
- case I40IW_CM_STATE_FIN_WAIT1:
- case I40IW_CM_STATE_FIN_WAIT2:
- case I40IW_CM_STATE_MPAREQ_RCVD:
- case I40IW_CM_STATE_LAST_ACK:
- case I40IW_CM_STATE_CLOSING:
- case I40IW_CM_STATE_UNKNOWN:
- default:
- break;
- }
-}
-
-/**
- * i40iw_handle_synack_pkt - Process SYN+ACK packet (active side)
- * @cm_node: connection's node
- * @rbuf: receive buffer
- */
-static void i40iw_handle_synack_pkt(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *rbuf)
-{
- struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
- int ret;
- u32 inc_sequence;
- int optionsize;
-
- optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
- inc_sequence = ntohl(tcph->seq);
- switch (cm_node->state) {
- case I40IW_CM_STATE_SYN_SENT:
- i40iw_cleanup_retrans_entry(cm_node);
- /* active open */
- if (i40iw_check_syn(cm_node, tcph)) {
- i40iw_pr_err("check syn fail\n");
- return;
- }
- cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
- /* setup options */
- ret = i40iw_handle_tcp_options(cm_node, tcph, optionsize, 0);
- if (ret) {
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "cm_node=%p tcp_options failed\n",
- cm_node);
- break;
- }
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
- i40iw_send_ack(cm_node); /* ACK for the syn_ack */
- ret = i40iw_send_mpa_request(cm_node);
- if (ret) {
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "cm_node=%p i40iw_send_mpa_request failed\n",
- cm_node);
- break;
- }
- cm_node->state = I40IW_CM_STATE_MPAREQ_SENT;
- break;
- case I40IW_CM_STATE_MPAREQ_RCVD:
- i40iw_passive_open_err(cm_node, true);
- break;
- case I40IW_CM_STATE_LISTENING:
- cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_CLOSED:
- cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
- i40iw_cleanup_retrans_entry(cm_node);
- atomic_inc(&cm_node->ref_count);
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_ESTABLISHED:
- case I40IW_CM_STATE_FIN_WAIT1:
- case I40IW_CM_STATE_FIN_WAIT2:
- case I40IW_CM_STATE_LAST_ACK:
- case I40IW_CM_STATE_OFFLOADED:
- case I40IW_CM_STATE_CLOSING:
- case I40IW_CM_STATE_UNKNOWN:
- case I40IW_CM_STATE_MPAREQ_SENT:
- default:
- break;
- }
-}
-
-/**
- * i40iw_handle_ack_pkt - process packet with ACK
- * @cm_node: connection's node
- * @rbuf: receive buffer
- */
-static int i40iw_handle_ack_pkt(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *rbuf)
-{
- struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
- u32 inc_sequence;
- int ret = 0;
- int optionsize;
- u32 datasize = rbuf->datalen;
-
- optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
-
- if (i40iw_check_seq(cm_node, tcph))
- return -EINVAL;
-
- inc_sequence = ntohl(tcph->seq);
- switch (cm_node->state) {
- case I40IW_CM_STATE_SYN_RCVD:
- i40iw_cleanup_retrans_entry(cm_node);
- ret = i40iw_handle_tcp_options(cm_node, tcph, optionsize, 1);
- if (ret)
- break;
- cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
- cm_node->state = I40IW_CM_STATE_ESTABLISHED;
- if (datasize) {
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
- i40iw_handle_rcv_mpa(cm_node, rbuf);
- }
- break;
- case I40IW_CM_STATE_ESTABLISHED:
- i40iw_cleanup_retrans_entry(cm_node);
- if (datasize) {
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
- i40iw_handle_rcv_mpa(cm_node, rbuf);
- }
- break;
- case I40IW_CM_STATE_MPAREQ_SENT:
- cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
- if (datasize) {
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
- i40iw_handle_rcv_mpa(cm_node, rbuf);
- }
- break;
- case I40IW_CM_STATE_LISTENING:
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_CLOSED:
- i40iw_cleanup_retrans_entry(cm_node);
- atomic_inc(&cm_node->ref_count);
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_LAST_ACK:
- case I40IW_CM_STATE_CLOSING:
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_CLOSED;
- if (!cm_node->accept_pend)
- cm_node->cm_id->rem_ref(cm_node->cm_id);
- i40iw_rem_ref_cm_node(cm_node);
- break;
- case I40IW_CM_STATE_FIN_WAIT1:
- i40iw_cleanup_retrans_entry(cm_node);
- cm_node->state = I40IW_CM_STATE_FIN_WAIT2;
- break;
- case I40IW_CM_STATE_SYN_SENT:
- case I40IW_CM_STATE_FIN_WAIT2:
- case I40IW_CM_STATE_OFFLOADED:
- case I40IW_CM_STATE_MPAREQ_RCVD:
- case I40IW_CM_STATE_UNKNOWN:
- default:
- i40iw_cleanup_retrans_entry(cm_node);
- break;
- }
- return ret;
-}
-
-/**
- * i40iw_process_packet - process cm packet
- * @cm_node: connection's node
- * @rbuf: receive buffer
- */
-static void i40iw_process_packet(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *rbuf)
-{
- enum i40iw_tcpip_pkt_type pkt_type = I40IW_PKT_TYPE_UNKNOWN;
- struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
- u32 fin_set = 0;
- int ret;
-
- if (tcph->rst) {
- pkt_type = I40IW_PKT_TYPE_RST;
- } else if (tcph->syn) {
- pkt_type = I40IW_PKT_TYPE_SYN;
- if (tcph->ack)
- pkt_type = I40IW_PKT_TYPE_SYNACK;
- } else if (tcph->ack) {
- pkt_type = I40IW_PKT_TYPE_ACK;
- }
- if (tcph->fin)
- fin_set = 1;
-
- switch (pkt_type) {
- case I40IW_PKT_TYPE_SYN:
- i40iw_handle_syn_pkt(cm_node, rbuf);
- break;
- case I40IW_PKT_TYPE_SYNACK:
- i40iw_handle_synack_pkt(cm_node, rbuf);
- break;
- case I40IW_PKT_TYPE_ACK:
- ret = i40iw_handle_ack_pkt(cm_node, rbuf);
- if (fin_set && !ret)
- i40iw_handle_fin_pkt(cm_node);
- break;
- case I40IW_PKT_TYPE_RST:
- i40iw_handle_rst_pkt(cm_node, rbuf);
- break;
- default:
- if (fin_set &&
- (!i40iw_check_seq(cm_node, (struct tcphdr *)rbuf->tcph)))
- i40iw_handle_fin_pkt(cm_node);
- break;
- }
-}
-
-/**
- * i40iw_make_listen_node - create a listen node with params
- * @cm_core: cm's core
- * @iwdev: iwarp device structure
- * @cm_info: quad info for connection
- */
-static struct i40iw_cm_listener *i40iw_make_listen_node(
- struct i40iw_cm_core *cm_core,
- struct i40iw_device *iwdev,
- struct i40iw_cm_info *cm_info)
-{
- struct i40iw_cm_listener *listener;
- unsigned long flags;
-
- /* cannot have multiple matching listeners */
- listener = i40iw_find_listener(cm_core, cm_info->loc_addr,
- cm_info->loc_port,
- cm_info->vlan_id,
- I40IW_CM_LISTENER_EITHER_STATE);
- if (listener &&
- (listener->listener_state == I40IW_CM_LISTENER_ACTIVE_STATE)) {
- atomic_dec(&listener->ref_count);
- i40iw_debug(cm_core->dev,
- I40IW_DEBUG_CM,
- "Not creating listener since it already exists\n");
- return NULL;
- }
-
- if (!listener) {
- /* create a CM listen node (1/2 node to compare incoming traffic to) */
- listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
- if (!listener)
- return NULL;
- cm_core->stats_listen_nodes_created++;
- memcpy(listener->loc_addr, cm_info->loc_addr, sizeof(listener->loc_addr));
- listener->loc_port = cm_info->loc_port;
-
- INIT_LIST_HEAD(&listener->child_listen_list);
-
- atomic_set(&listener->ref_count, 1);
- } else {
- listener->reused_node = 1;
- }
-
- listener->cm_id = cm_info->cm_id;
- listener->ipv4 = cm_info->ipv4;
- listener->vlan_id = cm_info->vlan_id;
- atomic_set(&listener->pend_accepts_cnt, 0);
- listener->cm_core = cm_core;
- listener->iwdev = iwdev;
-
- listener->backlog = cm_info->backlog;
- listener->listener_state = I40IW_CM_LISTENER_ACTIVE_STATE;
-
- if (!listener->reused_node) {
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- list_add(&listener->list, &cm_core->listen_nodes);
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- }
-
- return listener;
-}
-
-/**
- * i40iw_create_cm_node - make a connection node with params
- * @cm_core: cm's core
- * @iwdev: iwarp device structure
- * @private_data_len: len to provate data for mpa request
- * @private_data: pointer to private data for connection
- * @cm_info: quad info for connection
- */
-static struct i40iw_cm_node *i40iw_create_cm_node(
- struct i40iw_cm_core *cm_core,
- struct i40iw_device *iwdev,
- u16 private_data_len,
- void *private_data,
- struct i40iw_cm_info *cm_info)
-{
- struct i40iw_cm_node *cm_node;
- struct i40iw_cm_listener *loopback_remotelistener;
- struct i40iw_cm_node *loopback_remotenode;
- struct i40iw_cm_info loopback_cm_info;
-
- /* create a CM connection node */
- cm_node = i40iw_make_cm_node(cm_core, iwdev, cm_info, NULL);
- if (!cm_node)
- return ERR_PTR(-ENOMEM);
- /* set our node side to client (active) side */
- cm_node->tcp_cntxt.client = 1;
- cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
-
- if (!memcmp(cm_info->loc_addr, cm_info->rem_addr, sizeof(cm_info->loc_addr))) {
- loopback_remotelistener = i40iw_find_listener(
- cm_core,
- cm_info->rem_addr,
- cm_node->rem_port,
- cm_node->vlan_id,
- I40IW_CM_LISTENER_ACTIVE_STATE);
- if (!loopback_remotelistener) {
- i40iw_rem_ref_cm_node(cm_node);
- return ERR_PTR(-ECONNREFUSED);
- } else {
- loopback_cm_info = *cm_info;
- loopback_cm_info.loc_port = cm_info->rem_port;
- loopback_cm_info.rem_port = cm_info->loc_port;
- loopback_cm_info.cm_id = loopback_remotelistener->cm_id;
- loopback_cm_info.ipv4 = cm_info->ipv4;
- loopback_remotenode = i40iw_make_cm_node(cm_core,
- iwdev,
- &loopback_cm_info,
- loopback_remotelistener);
- if (!loopback_remotenode) {
- i40iw_rem_ref_cm_node(cm_node);
- return ERR_PTR(-ENOMEM);
- }
- cm_core->stats_loopbacks++;
- loopback_remotenode->loopbackpartner = cm_node;
- loopback_remotenode->tcp_cntxt.rcv_wscale =
- I40IW_CM_DEFAULT_RCV_WND_SCALE;
- cm_node->loopbackpartner = loopback_remotenode;
- memcpy(loopback_remotenode->pdata_buf, private_data,
- private_data_len);
- loopback_remotenode->pdata.size = private_data_len;
-
- cm_node->state = I40IW_CM_STATE_OFFLOADED;
- cm_node->tcp_cntxt.rcv_nxt =
- loopback_remotenode->tcp_cntxt.loc_seq_num;
- loopback_remotenode->tcp_cntxt.rcv_nxt =
- cm_node->tcp_cntxt.loc_seq_num;
- cm_node->tcp_cntxt.max_snd_wnd =
- loopback_remotenode->tcp_cntxt.rcv_wnd;
- loopback_remotenode->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
- cm_node->tcp_cntxt.snd_wnd = loopback_remotenode->tcp_cntxt.rcv_wnd;
- loopback_remotenode->tcp_cntxt.snd_wnd = cm_node->tcp_cntxt.rcv_wnd;
- cm_node->tcp_cntxt.snd_wscale = loopback_remotenode->tcp_cntxt.rcv_wscale;
- loopback_remotenode->tcp_cntxt.snd_wscale = cm_node->tcp_cntxt.rcv_wscale;
- loopback_remotenode->state = I40IW_CM_STATE_MPAREQ_RCVD;
- i40iw_create_event(loopback_remotenode, I40IW_CM_EVENT_MPA_REQ);
- }
- return cm_node;
- }
-
- cm_node->pdata.size = private_data_len;
- cm_node->pdata.addr = cm_node->pdata_buf;
-
- memcpy(cm_node->pdata_buf, private_data, private_data_len);
-
- cm_node->state = I40IW_CM_STATE_SYN_SENT;
- return cm_node;
-}
-
-/**
- * i40iw_cm_reject - reject and teardown a connection
- * @cm_node: connection's node
- * @pdate: ptr to private data for reject
- * @plen: size of private data
- */
-static int i40iw_cm_reject(struct i40iw_cm_node *cm_node, const void *pdata, u8 plen)
-{
- int ret = 0;
- int err;
- int passive_state;
- struct iw_cm_id *cm_id = cm_node->cm_id;
- struct i40iw_cm_node *loopback = cm_node->loopbackpartner;
-
- if (cm_node->tcp_cntxt.client)
- return ret;
- i40iw_cleanup_retrans_entry(cm_node);
-
- if (!loopback) {
- passive_state = atomic_add_return(1, &cm_node->passive_state);
- if (passive_state == I40IW_SEND_RESET_EVENT) {
- cm_node->state = I40IW_CM_STATE_CLOSED;
- i40iw_rem_ref_cm_node(cm_node);
- } else {
- if (cm_node->state == I40IW_CM_STATE_LISTENER_DESTROYED) {
- i40iw_rem_ref_cm_node(cm_node);
- } else {
- ret = i40iw_send_mpa_reject(cm_node, pdata, plen);
- if (ret) {
- cm_node->state = I40IW_CM_STATE_CLOSED;
- err = i40iw_send_reset(cm_node);
- if (err)
- i40iw_pr_err("send reset failed\n");
- } else {
- cm_id->add_ref(cm_id);
- }
- }
- }
- } else {
- cm_node->cm_id = NULL;
- if (cm_node->state == I40IW_CM_STATE_LISTENER_DESTROYED) {
- i40iw_rem_ref_cm_node(cm_node);
- i40iw_rem_ref_cm_node(loopback);
- } else {
- ret = i40iw_send_cm_event(loopback,
- loopback->cm_id,
- IW_CM_EVENT_CONNECT_REPLY,
- -ECONNREFUSED);
- i40iw_rem_ref_cm_node(cm_node);
- loopback->state = I40IW_CM_STATE_CLOSING;
-
- cm_id = loopback->cm_id;
- i40iw_rem_ref_cm_node(loopback);
- cm_id->rem_ref(cm_id);
- }
- }
-
- return ret;
-}
-
-/**
- * i40iw_cm_close - close of cm connection
- * @cm_node: connection's node
- */
-static int i40iw_cm_close(struct i40iw_cm_node *cm_node)
-{
- int ret = 0;
-
- if (!cm_node)
- return -EINVAL;
-
- switch (cm_node->state) {
- case I40IW_CM_STATE_SYN_RCVD:
- case I40IW_CM_STATE_SYN_SENT:
- case I40IW_CM_STATE_ONE_SIDE_ESTABLISHED:
- case I40IW_CM_STATE_ESTABLISHED:
- case I40IW_CM_STATE_ACCEPTING:
- case I40IW_CM_STATE_MPAREQ_SENT:
- case I40IW_CM_STATE_MPAREQ_RCVD:
- i40iw_cleanup_retrans_entry(cm_node);
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_CLOSE_WAIT:
- cm_node->state = I40IW_CM_STATE_LAST_ACK;
- i40iw_send_fin(cm_node);
- break;
- case I40IW_CM_STATE_FIN_WAIT1:
- case I40IW_CM_STATE_FIN_WAIT2:
- case I40IW_CM_STATE_LAST_ACK:
- case I40IW_CM_STATE_TIME_WAIT:
- case I40IW_CM_STATE_CLOSING:
- ret = -1;
- break;
- case I40IW_CM_STATE_LISTENING:
- i40iw_cleanup_retrans_entry(cm_node);
- i40iw_send_reset(cm_node);
- break;
- case I40IW_CM_STATE_MPAREJ_RCVD:
- case I40IW_CM_STATE_UNKNOWN:
- case I40IW_CM_STATE_INITED:
- case I40IW_CM_STATE_CLOSED:
- case I40IW_CM_STATE_LISTENER_DESTROYED:
- i40iw_rem_ref_cm_node(cm_node);
- break;
- case I40IW_CM_STATE_OFFLOADED:
- if (cm_node->send_entry)
- i40iw_pr_err("send_entry\n");
- i40iw_rem_ref_cm_node(cm_node);
- break;
- }
- return ret;
-}
-
-/**
- * i40iw_receive_ilq - recv an ETHERNET packet, and process it
- * through CM
- * @vsi: pointer to the vsi structure
- * @rbuf: receive buffer
- */
-void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf)
-{
- struct i40iw_cm_node *cm_node;
- struct i40iw_cm_listener *listener;
- struct iphdr *iph;
- struct ipv6hdr *ip6h;
- struct tcphdr *tcph;
- struct i40iw_cm_info cm_info;
- struct i40iw_sc_dev *dev = vsi->dev;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
- struct i40iw_cm_core *cm_core = &iwdev->cm_core;
- struct vlan_ethhdr *ethh;
- u16 vtag;
-
- /* if vlan, then maclen = 18 else 14 */
- iph = (struct iphdr *)rbuf->iph;
- memset(&cm_info, 0, sizeof(cm_info));
-
- i40iw_debug_buf(dev,
- I40IW_DEBUG_ILQ,
- "RECEIVE ILQ BUFFER",
- rbuf->mem.va,
- rbuf->totallen);
- ethh = (struct vlan_ethhdr *)rbuf->mem.va;
-
- if (ethh->h_vlan_proto == htons(ETH_P_8021Q)) {
- vtag = ntohs(ethh->h_vlan_TCI);
- cm_info.user_pri = (vtag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
- cm_info.vlan_id = vtag & VLAN_VID_MASK;
- i40iw_debug(cm_core->dev,
- I40IW_DEBUG_CM,
- "%s vlan_id=%d\n",
- __func__,
- cm_info.vlan_id);
- } else {
- cm_info.vlan_id = I40IW_NO_VLAN;
- }
- tcph = (struct tcphdr *)rbuf->tcph;
-
- if (rbuf->ipv4) {
- cm_info.loc_addr[0] = ntohl(iph->daddr);
- cm_info.rem_addr[0] = ntohl(iph->saddr);
- cm_info.ipv4 = true;
- cm_info.tos = iph->tos;
- } else {
- ip6h = (struct ipv6hdr *)rbuf->iph;
- i40iw_copy_ip_ntohl(cm_info.loc_addr,
- ip6h->daddr.in6_u.u6_addr32);
- i40iw_copy_ip_ntohl(cm_info.rem_addr,
- ip6h->saddr.in6_u.u6_addr32);
- cm_info.ipv4 = false;
- cm_info.tos = (ip6h->priority << 4) | (ip6h->flow_lbl[0] >> 4);
- }
- cm_info.loc_port = ntohs(tcph->dest);
- cm_info.rem_port = ntohs(tcph->source);
- cm_node = i40iw_find_node(cm_core,
- cm_info.rem_port,
- cm_info.rem_addr,
- cm_info.loc_port,
- cm_info.loc_addr,
- true);
-
- if (!cm_node) {
- /* Only type of packet accepted are for */
- /* the PASSIVE open (syn only) */
- if (!tcph->syn || tcph->ack)
- return;
- listener =
- i40iw_find_listener(cm_core,
- cm_info.loc_addr,
- cm_info.loc_port,
- cm_info.vlan_id,
- I40IW_CM_LISTENER_ACTIVE_STATE);
- if (!listener) {
- cm_info.cm_id = NULL;
- i40iw_debug(cm_core->dev,
- I40IW_DEBUG_CM,
- "%s no listener found\n",
- __func__);
- return;
- }
- cm_info.cm_id = listener->cm_id;
- cm_node = i40iw_make_cm_node(cm_core, iwdev, &cm_info, listener);
- if (!cm_node) {
- i40iw_debug(cm_core->dev,
- I40IW_DEBUG_CM,
- "%s allocate node failed\n",
- __func__);
- atomic_dec(&listener->ref_count);
- return;
- }
- if (!tcph->rst && !tcph->fin) {
- cm_node->state = I40IW_CM_STATE_LISTENING;
- } else {
- i40iw_rem_ref_cm_node(cm_node);
- return;
- }
- atomic_inc(&cm_node->ref_count);
- } else if (cm_node->state == I40IW_CM_STATE_OFFLOADED) {
- i40iw_rem_ref_cm_node(cm_node);
- return;
- }
- i40iw_process_packet(cm_node, rbuf);
- i40iw_rem_ref_cm_node(cm_node);
-}
-
-/**
- * i40iw_setup_cm_core - allocate a top level instance of a cm
- * core
- * @iwdev: iwarp device structure
- */
-void i40iw_setup_cm_core(struct i40iw_device *iwdev)
-{
- struct i40iw_cm_core *cm_core = &iwdev->cm_core;
-
- cm_core->iwdev = iwdev;
- cm_core->dev = &iwdev->sc_dev;
-
- INIT_LIST_HEAD(&cm_core->connected_nodes);
- INIT_LIST_HEAD(&cm_core->listen_nodes);
-
- init_timer(&cm_core->tcp_timer);
- cm_core->tcp_timer.function = i40iw_cm_timer_tick;
- cm_core->tcp_timer.data = (unsigned long)cm_core;
-
- spin_lock_init(&cm_core->ht_lock);
- spin_lock_init(&cm_core->listen_list_lock);
-
- cm_core->event_wq = alloc_ordered_workqueue("iwewq",
- WQ_MEM_RECLAIM);
-
- cm_core->disconn_wq = alloc_ordered_workqueue("iwdwq",
- WQ_MEM_RECLAIM);
-}
-
-/**
- * i40iw_cleanup_cm_core - deallocate a top level instance of a
- * cm core
- * @cm_core: cm's core
- */
-void i40iw_cleanup_cm_core(struct i40iw_cm_core *cm_core)
-{
- unsigned long flags;
-
- if (!cm_core)
- return;
-
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- if (timer_pending(&cm_core->tcp_timer))
- del_timer_sync(&cm_core->tcp_timer);
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- destroy_workqueue(cm_core->event_wq);
- destroy_workqueue(cm_core->disconn_wq);
-}
-
-/**
- * i40iw_init_tcp_ctx - setup qp context
- * @cm_node: connection's node
- * @tcp_info: offload info for tcp
- * @iwqp: associate qp for the connection
- */
-static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node,
- struct i40iw_tcp_offload_info *tcp_info,
- struct i40iw_qp *iwqp)
-{
- tcp_info->ipv4 = cm_node->ipv4;
- tcp_info->drop_ooo_seg = true;
- tcp_info->wscale = true;
- tcp_info->ignore_tcp_opt = true;
- tcp_info->ignore_tcp_uns_opt = true;
- tcp_info->no_nagle = false;
-
- tcp_info->ttl = I40IW_DEFAULT_TTL;
- tcp_info->rtt_var = cpu_to_le32(I40IW_DEFAULT_RTT_VAR);
- tcp_info->ss_thresh = cpu_to_le32(I40IW_DEFAULT_SS_THRESH);
- tcp_info->rexmit_thresh = I40IW_DEFAULT_REXMIT_THRESH;
-
- tcp_info->tcp_state = I40IW_TCP_STATE_ESTABLISHED;
- tcp_info->snd_wscale = cm_node->tcp_cntxt.snd_wscale;
- tcp_info->rcv_wscale = cm_node->tcp_cntxt.rcv_wscale;
-
- tcp_info->snd_nxt = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
- tcp_info->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd);
- tcp_info->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
- tcp_info->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
-
- tcp_info->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
- tcp_info->cwnd = cpu_to_le32(2 * cm_node->tcp_cntxt.mss);
- tcp_info->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
- tcp_info->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
- tcp_info->max_snd_window = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd);
- tcp_info->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd <<
- cm_node->tcp_cntxt.rcv_wscale);
-
- tcp_info->flow_label = 0;
- tcp_info->snd_mss = cpu_to_le32(((u32)cm_node->tcp_cntxt.mss));
- if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
- tcp_info->insert_vlan_tag = true;
- tcp_info->vlan_tag = cpu_to_le16(cm_node->vlan_id);
- }
- if (cm_node->ipv4) {
- tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
- tcp_info->dst_port = cpu_to_le16(cm_node->rem_port);
-
- tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[0]);
- tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[0]);
- tcp_info->arp_idx =
- cpu_to_le16((u16)i40iw_arp_table(
- iwqp->iwdev,
- &tcp_info->dest_ip_addr3,
- true,
- NULL,
- I40IW_ARP_RESOLVE));
- } else {
- tcp_info->src_port = cpu_to_le16(cm_node->loc_port);
- tcp_info->dst_port = cpu_to_le16(cm_node->rem_port);
- tcp_info->dest_ip_addr0 = cpu_to_le32(cm_node->rem_addr[0]);
- tcp_info->dest_ip_addr1 = cpu_to_le32(cm_node->rem_addr[1]);
- tcp_info->dest_ip_addr2 = cpu_to_le32(cm_node->rem_addr[2]);
- tcp_info->dest_ip_addr3 = cpu_to_le32(cm_node->rem_addr[3]);
- tcp_info->local_ipaddr0 = cpu_to_le32(cm_node->loc_addr[0]);
- tcp_info->local_ipaddr1 = cpu_to_le32(cm_node->loc_addr[1]);
- tcp_info->local_ipaddr2 = cpu_to_le32(cm_node->loc_addr[2]);
- tcp_info->local_ipaddr3 = cpu_to_le32(cm_node->loc_addr[3]);
- tcp_info->arp_idx =
- cpu_to_le16((u16)i40iw_arp_table(
- iwqp->iwdev,
- &tcp_info->dest_ip_addr0,
- false,
- NULL,
- I40IW_ARP_RESOLVE));
- }
-}
-
-/**
- * i40iw_cm_init_tsa_conn - setup qp for RTS
- * @iwqp: associate qp for the connection
- * @cm_node: connection's node
- */
-static void i40iw_cm_init_tsa_conn(struct i40iw_qp *iwqp,
- struct i40iw_cm_node *cm_node)
-{
- struct i40iw_tcp_offload_info tcp_info;
- struct i40iwarp_offload_info *iwarp_info;
- struct i40iw_qp_host_ctx_info *ctx_info;
- struct i40iw_device *iwdev = iwqp->iwdev;
- struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
-
- memset(&tcp_info, 0x00, sizeof(struct i40iw_tcp_offload_info));
- iwarp_info = &iwqp->iwarp_info;
- ctx_info = &iwqp->ctx_info;
-
- ctx_info->tcp_info = &tcp_info;
- ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
- ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
-
- iwarp_info->ord_size = cm_node->ord_size;
- iwarp_info->ird_size = i40iw_derive_hw_ird_setting(cm_node->ird_size);
-
- if (iwarp_info->ord_size == 1)
- iwarp_info->ord_size = 2;
-
- iwarp_info->rd_enable = true;
- iwarp_info->rdmap_ver = 1;
- iwarp_info->ddp_ver = 1;
-
- iwarp_info->pd_id = iwqp->iwpd->sc_pd.pd_id;
-
- ctx_info->tcp_info_valid = true;
- ctx_info->iwarp_info_valid = true;
- ctx_info->add_to_qoslist = true;
- ctx_info->user_pri = cm_node->user_pri;
-
- i40iw_init_tcp_ctx(cm_node, &tcp_info, iwqp);
- if (cm_node->snd_mark_en) {
- iwarp_info->snd_mark_en = true;
- iwarp_info->snd_mark_offset = (tcp_info.snd_nxt &
- SNDMARKER_SEQNMASK) + cm_node->lsmm_size;
- }
-
- cm_node->state = I40IW_CM_STATE_OFFLOADED;
- tcp_info.tcp_state = I40IW_TCP_STATE_ESTABLISHED;
- tcp_info.src_mac_addr_idx = iwdev->mac_ip_table_idx;
- tcp_info.tos = cm_node->tos;
-
- dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, (u64 *)(iwqp->host_ctx.va), ctx_info);
-
- /* once tcp_info is set, no need to do it again */
- ctx_info->tcp_info_valid = false;
- ctx_info->iwarp_info_valid = false;
- ctx_info->add_to_qoslist = false;
-}
-
-/**
- * i40iw_cm_disconn - when a connection is being closed
- * @iwqp: associate qp for the connection
- */
-void i40iw_cm_disconn(struct i40iw_qp *iwqp)
-{
- struct disconn_work *work;
- struct i40iw_device *iwdev = iwqp->iwdev;
- struct i40iw_cm_core *cm_core = &iwdev->cm_core;
- unsigned long flags;
-
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
- if (!work)
- return; /* Timer will clean up */
-
- spin_lock_irqsave(&iwdev->qptable_lock, flags);
- if (!iwdev->qp_table[iwqp->ibqp.qp_num]) {
- spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
- "%s qp_id %d is already freed\n",
- __func__, iwqp->ibqp.qp_num);
- kfree(work);
- return;
- }
- i40iw_add_ref(&iwqp->ibqp);
- spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
-
- work->iwqp = iwqp;
- INIT_WORK(&work->work, i40iw_disconnect_worker);
- queue_work(cm_core->disconn_wq, &work->work);
- return;
-}
-
-/**
- * i40iw_qp_disconnect - free qp and close cm
- * @iwqp: associate qp for the connection
- */
-static void i40iw_qp_disconnect(struct i40iw_qp *iwqp)
-{
- struct i40iw_device *iwdev;
- struct i40iw_ib_device *iwibdev;
-
- iwdev = to_iwdev(iwqp->ibqp.device);
- if (!iwdev) {
- i40iw_pr_err("iwdev == NULL\n");
- return;
- }
-
- iwibdev = iwdev->iwibdev;
-
- if (iwqp->active_conn) {
- /* indicate this connection is NOT active */
- iwqp->active_conn = 0;
- } else {
- /* Need to free the Last Streaming Mode Message */
- if (iwqp->ietf_mem.va) {
- if (iwqp->lsmm_mr)
- iwibdev->ibdev.dereg_mr(iwqp->lsmm_mr);
- i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->ietf_mem);
- }
- }
-
- /* close the CM node down if it is still active */
- if (iwqp->cm_node) {
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "%s Call close API\n", __func__);
- i40iw_cm_close(iwqp->cm_node);
- }
-}
-
-/**
- * i40iw_cm_disconn_true - called by worker thread to disconnect qp
- * @iwqp: associate qp for the connection
- */
-static void i40iw_cm_disconn_true(struct i40iw_qp *iwqp)
-{
- struct iw_cm_id *cm_id;
- struct i40iw_device *iwdev;
- struct i40iw_sc_qp *qp = &iwqp->sc_qp;
- u16 last_ae;
- u8 original_hw_tcp_state;
- u8 original_ibqp_state;
- int disconn_status = 0;
- int issue_disconn = 0;
- int issue_close = 0;
- int issue_flush = 0;
- struct ib_event ibevent;
- unsigned long flags;
- int ret;
-
- if (!iwqp) {
- i40iw_pr_err("iwqp == NULL\n");
- return;
- }
-
- spin_lock_irqsave(&iwqp->lock, flags);
- cm_id = iwqp->cm_id;
- /* make sure we havent already closed this connection */
- if (!cm_id) {
- spin_unlock_irqrestore(&iwqp->lock, flags);
- return;
- }
-
- iwdev = to_iwdev(iwqp->ibqp.device);
-
- original_hw_tcp_state = iwqp->hw_tcp_state;
- original_ibqp_state = iwqp->ibqp_state;
- last_ae = iwqp->last_aeq;
-
- if (qp->term_flags) {
- issue_disconn = 1;
- issue_close = 1;
- iwqp->cm_id = NULL;
- /*When term timer expires after cm_timer, don't want
- *terminate-handler to issue cm_disconn which can re-free
- *a QP even after its refcnt=0.
- */
- i40iw_terminate_del_timer(qp);
- if (!iwqp->flush_issued) {
- iwqp->flush_issued = 1;
- issue_flush = 1;
- }
- } else if ((original_hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) ||
- ((original_ibqp_state == IB_QPS_RTS) &&
- (last_ae == I40IW_AE_LLP_CONNECTION_RESET))) {
- issue_disconn = 1;
- if (last_ae == I40IW_AE_LLP_CONNECTION_RESET)
- disconn_status = -ECONNRESET;
- }
-
- if (((original_hw_tcp_state == I40IW_TCP_STATE_CLOSED) ||
- (original_hw_tcp_state == I40IW_TCP_STATE_TIME_WAIT) ||
- (last_ae == I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE) ||
- (last_ae == I40IW_AE_LLP_CONNECTION_RESET))) {
- issue_close = 1;
- iwqp->cm_id = NULL;
- if (!iwqp->flush_issued) {
- iwqp->flush_issued = 1;
- issue_flush = 1;
- }
- }
-
- spin_unlock_irqrestore(&iwqp->lock, flags);
- if (issue_flush && !iwqp->destroyed) {
- /* Flush the queues */
- i40iw_flush_wqes(iwdev, iwqp);
-
- if (qp->term_flags && iwqp->ibqp.event_handler) {
- ibevent.device = iwqp->ibqp.device;
- ibevent.event = (qp->eventtype == TERM_EVENT_QP_FATAL) ?
- IB_EVENT_QP_FATAL : IB_EVENT_QP_ACCESS_ERR;
- ibevent.element.qp = &iwqp->ibqp;
- iwqp->ibqp.event_handler(&ibevent, iwqp->ibqp.qp_context);
- }
- }
-
- if (cm_id && cm_id->event_handler) {
- if (issue_disconn) {
- ret = i40iw_send_cm_event(NULL,
- cm_id,
- IW_CM_EVENT_DISCONNECT,
- disconn_status);
-
- if (ret)
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "disconnect event failed %s: - cm_id = %p\n",
- __func__, cm_id);
- }
- if (issue_close) {
- i40iw_qp_disconnect(iwqp);
- cm_id->provider_data = iwqp;
- ret = i40iw_send_cm_event(NULL, cm_id, IW_CM_EVENT_CLOSE, 0);
- if (ret)
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "close event failed %s: - cm_id = %p\n",
- __func__, cm_id);
- cm_id->rem_ref(cm_id);
- }
- }
-}
-
-/**
- * i40iw_disconnect_worker - worker for connection close
- * @work: points or disconn structure
- */
-static void i40iw_disconnect_worker(struct work_struct *work)
-{
- struct disconn_work *dwork = container_of(work, struct disconn_work, work);
- struct i40iw_qp *iwqp = dwork->iwqp;
-
- kfree(dwork);
- i40iw_cm_disconn_true(iwqp);
- i40iw_rem_ref(&iwqp->ibqp);
-}
-
-/**
- * i40iw_accept - registered call for connection to be accepted
- * @cm_id: cm information for passive connection
- * @conn_param: accpet parameters
- */
-int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
- struct ib_qp *ibqp;
- struct i40iw_qp *iwqp;
- struct i40iw_device *iwdev;
- struct i40iw_sc_dev *dev;
- struct i40iw_cm_node *cm_node;
- struct ib_qp_attr attr;
- int passive_state;
- struct ib_mr *ibmr;
- struct i40iw_pd *iwpd;
- u16 buf_len = 0;
- struct i40iw_kmem_info accept;
- enum i40iw_status_code status;
- u64 tagged_offset;
-
- memset(&attr, 0, sizeof(attr));
- ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
- if (!ibqp)
- return -EINVAL;
-
- iwqp = to_iwqp(ibqp);
- iwdev = iwqp->iwdev;
- dev = &iwdev->sc_dev;
- cm_node = (struct i40iw_cm_node *)cm_id->provider_data;
-
- if (((struct sockaddr_in *)&cm_id->local_addr)->sin_family == AF_INET) {
- cm_node->ipv4 = true;
- cm_node->vlan_id = i40iw_get_vlan_ipv4(cm_node->loc_addr);
- } else {
- cm_node->ipv4 = false;
- i40iw_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id, NULL);
- }
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "Accept vlan_id=%d\n",
- cm_node->vlan_id);
- if (cm_node->state == I40IW_CM_STATE_LISTENER_DESTROYED) {
- if (cm_node->loopbackpartner)
- i40iw_rem_ref_cm_node(cm_node->loopbackpartner);
- i40iw_rem_ref_cm_node(cm_node);
- return -EINVAL;
- }
-
- passive_state = atomic_add_return(1, &cm_node->passive_state);
- if (passive_state == I40IW_SEND_RESET_EVENT) {
- i40iw_rem_ref_cm_node(cm_node);
- return -ECONNRESET;
- }
-
- cm_node->cm_core->stats_accepts++;
- iwqp->cm_node = (void *)cm_node;
- cm_node->iwqp = iwqp;
-
- buf_len = conn_param->private_data_len + I40IW_MAX_IETF_SIZE;
-
- status = i40iw_allocate_dma_mem(dev->hw, &iwqp->ietf_mem, buf_len, 1);
-
- if (status)
- return -ENOMEM;
- cm_node->pdata.size = conn_param->private_data_len;
- accept.addr = iwqp->ietf_mem.va;
- accept.size = i40iw_cm_build_mpa_frame(cm_node, &accept, MPA_KEY_REPLY);
- memcpy(accept.addr + accept.size, conn_param->private_data,
- conn_param->private_data_len);
-
- /* setup our first outgoing iWarp send WQE (the IETF frame response) */
- if ((cm_node->ipv4 &&
- !i40iw_ipv4_is_loopback(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
- (!cm_node->ipv4 &&
- !i40iw_ipv6_is_loopback(cm_node->loc_addr, cm_node->rem_addr))) {
- iwpd = iwqp->iwpd;
- tagged_offset = (uintptr_t)iwqp->ietf_mem.va;
- ibmr = i40iw_reg_phys_mr(&iwpd->ibpd,
- iwqp->ietf_mem.pa,
- buf_len,
- IB_ACCESS_LOCAL_WRITE,
- &tagged_offset);
- if (IS_ERR(ibmr)) {
- i40iw_free_dma_mem(dev->hw, &iwqp->ietf_mem);
- return -ENOMEM;
- }
-
- ibmr->pd = &iwpd->ibpd;
- ibmr->device = iwpd->ibpd.device;
- iwqp->lsmm_mr = ibmr;
- if (iwqp->page)
- iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
- dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp,
- iwqp->ietf_mem.va,
- (accept.size + conn_param->private_data_len),
- ibmr->lkey);
-
- } else {
- if (iwqp->page)
- iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
- dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0);
- }
-
- if (iwqp->page)
- kunmap(iwqp->page);
-
- iwqp->cm_id = cm_id;
- cm_node->cm_id = cm_id;
-
- cm_id->provider_data = (void *)iwqp;
- iwqp->active_conn = 0;
-
- cm_node->lsmm_size = accept.size + conn_param->private_data_len;
- i40iw_cm_init_tsa_conn(iwqp, cm_node);
- cm_id->add_ref(cm_id);
- i40iw_add_ref(&iwqp->ibqp);
-
- i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0);
-
- attr.qp_state = IB_QPS_RTS;
- cm_node->qhash_set = false;
- i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
- if (cm_node->loopbackpartner) {
- cm_node->loopbackpartner->pdata.size = conn_param->private_data_len;
-
- /* copy entire MPA frame to our cm_node's frame */
- memcpy(cm_node->loopbackpartner->pdata_buf,
- conn_param->private_data,
- conn_param->private_data_len);
- i40iw_create_event(cm_node->loopbackpartner, I40IW_CM_EVENT_CONNECTED);
- }
-
- cm_node->accelerated = 1;
- if (cm_node->accept_pend) {
- if (!cm_node->listener)
- i40iw_pr_err("cm_node->listener NULL for passive node\n");
- atomic_dec(&cm_node->listener->pend_accepts_cnt);
- cm_node->accept_pend = 0;
- }
- return 0;
-}
-
-/**
- * i40iw_reject - registered call for connection to be rejected
- * @cm_id: cm information for passive connection
- * @pdata: private data to be sent
- * @pdata_len: private data length
- */
-int i40iw_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
- struct i40iw_device *iwdev;
- struct i40iw_cm_node *cm_node;
- struct i40iw_cm_node *loopback;
-
- cm_node = (struct i40iw_cm_node *)cm_id->provider_data;
- loopback = cm_node->loopbackpartner;
- cm_node->cm_id = cm_id;
- cm_node->pdata.size = pdata_len;
-
- iwdev = to_iwdev(cm_id->device);
- if (!iwdev)
- return -EINVAL;
- cm_node->cm_core->stats_rejects++;
-
- if (pdata_len + sizeof(struct ietf_mpa_v2) > MAX_CM_BUFFER)
- return -EINVAL;
-
- if (loopback) {
- memcpy(&loopback->pdata_buf, pdata, pdata_len);
- loopback->pdata.size = pdata_len;
- }
-
- return i40iw_cm_reject(cm_node, pdata, pdata_len);
-}
-
-/**
- * i40iw_connect - registered call for connection to be established
- * @cm_id: cm information for passive connection
- * @conn_param: Information about the connection
- */
-int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
- struct ib_qp *ibqp;
- struct i40iw_qp *iwqp;
- struct i40iw_device *iwdev;
- struct i40iw_cm_node *cm_node;
- struct i40iw_cm_info cm_info;
- struct sockaddr_in *laddr;
- struct sockaddr_in *raddr;
- struct sockaddr_in6 *laddr6;
- struct sockaddr_in6 *raddr6;
- bool qhash_set = false;
- int apbvt_set = 0;
- int err = 0;
- enum i40iw_status_code status;
-
- ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
- if (!ibqp)
- return -EINVAL;
- iwqp = to_iwqp(ibqp);
- if (!iwqp)
- return -EINVAL;
- iwdev = to_iwdev(iwqp->ibqp.device);
- if (!iwdev)
- return -EINVAL;
-
- laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
- raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
- laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
- raddr6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
-
- if (!(laddr->sin_port) || !(raddr->sin_port))
- return -EINVAL;
-
- iwqp->active_conn = 1;
- iwqp->cm_id = NULL;
- cm_id->provider_data = iwqp;
-
- /* set up the connection params for the node */
- if (cm_id->remote_addr.ss_family == AF_INET) {
- cm_info.ipv4 = true;
- memset(cm_info.loc_addr, 0, sizeof(cm_info.loc_addr));
- memset(cm_info.rem_addr, 0, sizeof(cm_info.rem_addr));
- cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
- cm_info.rem_addr[0] = ntohl(raddr->sin_addr.s_addr);
- cm_info.loc_port = ntohs(laddr->sin_port);
- cm_info.rem_port = ntohs(raddr->sin_port);
- cm_info.vlan_id = i40iw_get_vlan_ipv4(cm_info.loc_addr);
- } else {
- cm_info.ipv4 = false;
- i40iw_copy_ip_ntohl(cm_info.loc_addr,
- laddr6->sin6_addr.in6_u.u6_addr32);
- i40iw_copy_ip_ntohl(cm_info.rem_addr,
- raddr6->sin6_addr.in6_u.u6_addr32);
- cm_info.loc_port = ntohs(laddr6->sin6_port);
- cm_info.rem_port = ntohs(raddr6->sin6_port);
- i40iw_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL);
- }
- cm_info.cm_id = cm_id;
- cm_info.tos = cm_id->tos;
- cm_info.user_pri = rt_tos2priority(cm_id->tos);
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "%s TOS:[%d] UP:[%d]\n",
- __func__, cm_id->tos, cm_info.user_pri);
- if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
- (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
- raddr6->sin6_addr.in6_u.u6_addr32,
- sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) {
- status = i40iw_manage_qhash(iwdev,
- &cm_info,
- I40IW_QHASH_TYPE_TCP_ESTABLISHED,
- I40IW_QHASH_MANAGE_TYPE_ADD,
- NULL,
- true);
- if (status)
- return -EINVAL;
- qhash_set = true;
- }
- status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD);
- if (status) {
- i40iw_manage_qhash(iwdev,
- &cm_info,
- I40IW_QHASH_TYPE_TCP_ESTABLISHED,
- I40IW_QHASH_MANAGE_TYPE_DELETE,
- NULL,
- false);
- return -EINVAL;
- }
-
- apbvt_set = 1;
- cm_id->add_ref(cm_id);
- cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev,
- conn_param->private_data_len,
- (void *)conn_param->private_data,
- &cm_info);
-
- if (IS_ERR(cm_node)) {
- err = PTR_ERR(cm_node);
- goto err_out;
- }
-
- i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
- if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
- !cm_node->ord_size)
- cm_node->ord_size = 1;
-
- cm_node->apbvt_set = apbvt_set;
- cm_node->qhash_set = qhash_set;
- iwqp->cm_node = cm_node;
- cm_node->iwqp = iwqp;
- iwqp->cm_id = cm_id;
- i40iw_add_ref(&iwqp->ibqp);
-
- if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
- cm_node->state = I40IW_CM_STATE_SYN_SENT;
- err = i40iw_send_syn(cm_node, 0);
- if (err) {
- i40iw_rem_ref_cm_node(cm_node);
- goto err_out;
- }
- }
-
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "Api - connect(): port=0x%04x, cm_node=%p, cm_id = %p.\n",
- cm_node->rem_port,
- cm_node,
- cm_node->cm_id);
- return 0;
-
-err_out:
- if (cm_info.ipv4)
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "Api - connect() FAILED: dest addr=%pI4",
- cm_info.rem_addr);
- else
- i40iw_debug(&iwdev->sc_dev,
- I40IW_DEBUG_CM,
- "Api - connect() FAILED: dest addr=%pI6",
- cm_info.rem_addr);
-
- if (qhash_set)
- i40iw_manage_qhash(iwdev,
- &cm_info,
- I40IW_QHASH_TYPE_TCP_ESTABLISHED,
- I40IW_QHASH_MANAGE_TYPE_DELETE,
- NULL,
- false);
-
- if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
- cm_info.loc_port))
- i40iw_manage_apbvt(iwdev,
- cm_info.loc_port,
- I40IW_MANAGE_APBVT_DEL);
- cm_id->rem_ref(cm_id);
- iwdev->cm_core.stats_connect_errs++;
- return err;
-}
-
-/**
- * i40iw_create_listen - registered call creating listener
- * @cm_id: cm information for passive connection
- * @backlog: to max accept pending count
- */
-int i40iw_create_listen(struct iw_cm_id *cm_id, int backlog)
-{
- struct i40iw_device *iwdev;
- struct i40iw_cm_listener *cm_listen_node;
- struct i40iw_cm_info cm_info;
- enum i40iw_status_code ret;
- struct sockaddr_in *laddr;
- struct sockaddr_in6 *laddr6;
- bool wildcard = false;
-
- iwdev = to_iwdev(cm_id->device);
- if (!iwdev)
- return -EINVAL;
-
- laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
- laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
- memset(&cm_info, 0, sizeof(cm_info));
- if (laddr->sin_family == AF_INET) {
- cm_info.ipv4 = true;
- cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
- cm_info.loc_port = ntohs(laddr->sin_port);
-
- if (laddr->sin_addr.s_addr != INADDR_ANY)
- cm_info.vlan_id = i40iw_get_vlan_ipv4(cm_info.loc_addr);
- else
- wildcard = true;
-
- } else {
- cm_info.ipv4 = false;
- i40iw_copy_ip_ntohl(cm_info.loc_addr,
- laddr6->sin6_addr.in6_u.u6_addr32);
- cm_info.loc_port = ntohs(laddr6->sin6_port);
- if (ipv6_addr_type(&laddr6->sin6_addr) != IPV6_ADDR_ANY)
- i40iw_netdev_vlan_ipv6(cm_info.loc_addr,
- &cm_info.vlan_id,
- NULL);
- else
- wildcard = true;
- }
- cm_info.backlog = backlog;
- cm_info.cm_id = cm_id;
-
- cm_listen_node = i40iw_make_listen_node(&iwdev->cm_core, iwdev, &cm_info);
- if (!cm_listen_node) {
- i40iw_pr_err("cm_listen_node == NULL\n");
- return -ENOMEM;
- }
-
- cm_id->provider_data = cm_listen_node;
-
- cm_listen_node->tos = cm_id->tos;
- cm_listen_node->user_pri = rt_tos2priority(cm_id->tos);
- cm_info.user_pri = cm_listen_node->user_pri;
-
- if (!cm_listen_node->reused_node) {
- if (wildcard) {
- if (cm_info.ipv4)
- ret = i40iw_add_mqh_4(iwdev,
- &cm_info,
- cm_listen_node);
- else
- ret = i40iw_add_mqh_6(iwdev,
- &cm_info,
- cm_listen_node);
- if (ret)
- goto error;
-
- ret = i40iw_manage_apbvt(iwdev,
- cm_info.loc_port,
- I40IW_MANAGE_APBVT_ADD);
-
- if (ret)
- goto error;
- } else {
- ret = i40iw_manage_qhash(iwdev,
- &cm_info,
- I40IW_QHASH_TYPE_TCP_SYN,
- I40IW_QHASH_MANAGE_TYPE_ADD,
- NULL,
- true);
- if (ret)
- goto error;
- cm_listen_node->qhash_set = true;
- ret = i40iw_manage_apbvt(iwdev,
- cm_info.loc_port,
- I40IW_MANAGE_APBVT_ADD);
- if (ret)
- goto error;
- }
- }
- cm_id->add_ref(cm_id);
- cm_listen_node->cm_core->stats_listen_created++;
- return 0;
- error:
- i40iw_cm_del_listen(&iwdev->cm_core, (void *)cm_listen_node, false);
- return -EINVAL;
-}
-
-/**
- * i40iw_destroy_listen - registered call to destroy listener
- * @cm_id: cm information for passive connection
- */
-int i40iw_destroy_listen(struct iw_cm_id *cm_id)
-{
- struct i40iw_device *iwdev;
-
- iwdev = to_iwdev(cm_id->device);
- if (cm_id->provider_data)
- i40iw_cm_del_listen(&iwdev->cm_core, cm_id->provider_data, true);
- else
- i40iw_pr_err("cm_id->provider_data was NULL\n");
-
- cm_id->rem_ref(cm_id);
-
- return 0;
-}
-
-/**
- * i40iw_cm_event_connected - handle connected active node
- * @event: the info for cm_node of connection
- */
-static void i40iw_cm_event_connected(struct i40iw_cm_event *event)
-{
- struct i40iw_qp *iwqp;
- struct i40iw_device *iwdev;
- struct i40iw_cm_node *cm_node;
- struct i40iw_sc_dev *dev;
- struct ib_qp_attr attr;
- struct iw_cm_id *cm_id;
- int status;
- bool read0;
-
- cm_node = event->cm_node;
- cm_id = cm_node->cm_id;
- iwqp = (struct i40iw_qp *)cm_id->provider_data;
- iwdev = to_iwdev(iwqp->ibqp.device);
- dev = &iwdev->sc_dev;
-
- if (iwqp->destroyed) {
- status = -ETIMEDOUT;
- goto error;
- }
- i40iw_cm_init_tsa_conn(iwqp, cm_node);
- read0 = (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO);
- if (iwqp->page)
- iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
- dev->iw_priv_qp_ops->qp_send_rtt(&iwqp->sc_qp, read0);
- if (iwqp->page)
- kunmap(iwqp->page);
- status = i40iw_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, 0);
- if (status)
- i40iw_pr_err("send cm event\n");
-
- memset(&attr, 0, sizeof(attr));
- attr.qp_state = IB_QPS_RTS;
- cm_node->qhash_set = false;
- i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
-
- cm_node->accelerated = 1;
- if (cm_node->accept_pend) {
- if (!cm_node->listener)
- i40iw_pr_err("listener is null for passive node\n");
- atomic_dec(&cm_node->listener->pend_accepts_cnt);
- cm_node->accept_pend = 0;
- }
- return;
-
-error:
- iwqp->cm_id = NULL;
- cm_id->provider_data = NULL;
- i40iw_send_cm_event(event->cm_node,
- cm_id,
- IW_CM_EVENT_CONNECT_REPLY,
- status);
- cm_id->rem_ref(cm_id);
- i40iw_rem_ref_cm_node(event->cm_node);
-}
-
-/**
- * i40iw_cm_event_reset - handle reset
- * @event: the info for cm_node of connection
- */
-static void i40iw_cm_event_reset(struct i40iw_cm_event *event)
-{
- struct i40iw_cm_node *cm_node = event->cm_node;
- struct iw_cm_id *cm_id = cm_node->cm_id;
- struct i40iw_qp *iwqp;
-
- if (!cm_id)
- return;
-
- iwqp = cm_id->provider_data;
- if (!iwqp)
- return;
-
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "reset event %p - cm_id = %p\n",
- event->cm_node, cm_id);
- iwqp->cm_id = NULL;
-
- i40iw_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_DISCONNECT, -ECONNRESET);
- i40iw_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CLOSE, 0);
-}
-
-/**
- * i40iw_cm_event_handler - worker thread callback to send event to cm upper layer
- * @work: pointer of cm event info.
- */
-static void i40iw_cm_event_handler(struct work_struct *work)
-{
- struct i40iw_cm_event *event = container_of(work,
- struct i40iw_cm_event,
- event_work);
- struct i40iw_cm_node *cm_node;
-
- if (!event || !event->cm_node || !event->cm_node->cm_core)
- return;
-
- cm_node = event->cm_node;
-
- switch (event->type) {
- case I40IW_CM_EVENT_MPA_REQ:
- i40iw_send_cm_event(cm_node,
- cm_node->cm_id,
- IW_CM_EVENT_CONNECT_REQUEST,
- 0);
- break;
- case I40IW_CM_EVENT_RESET:
- i40iw_cm_event_reset(event);
- break;
- case I40IW_CM_EVENT_CONNECTED:
- if (!event->cm_node->cm_id ||
- (event->cm_node->state != I40IW_CM_STATE_OFFLOADED))
- break;
- i40iw_cm_event_connected(event);
- break;
- case I40IW_CM_EVENT_MPA_REJECT:
- if (!event->cm_node->cm_id ||
- (cm_node->state == I40IW_CM_STATE_OFFLOADED))
- break;
- i40iw_send_cm_event(cm_node,
- cm_node->cm_id,
- IW_CM_EVENT_CONNECT_REPLY,
- -ECONNREFUSED);
- break;
- case I40IW_CM_EVENT_ABORTED:
- if (!event->cm_node->cm_id ||
- (event->cm_node->state == I40IW_CM_STATE_OFFLOADED))
- break;
- i40iw_event_connect_error(event);
- break;
- default:
- i40iw_pr_err("event type = %d\n", event->type);
- break;
- }
-
- event->cm_info.cm_id->rem_ref(event->cm_info.cm_id);
- i40iw_rem_ref_cm_node(event->cm_node);
- kfree(event);
-}
-
-/**
- * i40iw_cm_post_event - queue event request for worker thread
- * @event: cm node's info for up event call
- */
-static void i40iw_cm_post_event(struct i40iw_cm_event *event)
-{
- atomic_inc(&event->cm_node->ref_count);
- event->cm_info.cm_id->add_ref(event->cm_info.cm_id);
- INIT_WORK(&event->event_work, i40iw_cm_event_handler);
-
- queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
-}
-
-/**
- * i40iw_qhash_ctrl - enable/disable qhash for list
- * @iwdev: device pointer
- * @parent_listen_node: parent listen node
- * @nfo: cm info node
- * @ipaddr: Pointer to IPv4 or IPv6 address
- * @ipv4: flag indicating IPv4 when true
- * @ifup: flag indicating interface up when true
- *
- * Enables or disables the qhash for the node in the child
- * listen list that matches ipaddr. If no matching IP was found
- * it will allocate and add a new child listen node to the
- * parent listen node. The listen_list_lock is assumed to be
- * held when called.
- */
-static void i40iw_qhash_ctrl(struct i40iw_device *iwdev,
- struct i40iw_cm_listener *parent_listen_node,
- struct i40iw_cm_info *nfo,
- u32 *ipaddr, bool ipv4, bool ifup)
-{
- struct list_head *child_listen_list = &parent_listen_node->child_listen_list;
- struct i40iw_cm_listener *child_listen_node;
- struct list_head *pos, *tpos;
- enum i40iw_status_code ret;
- bool node_allocated = false;
- enum i40iw_quad_hash_manage_type op =
- ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE;
-
- list_for_each_safe(pos, tpos, child_listen_list) {
- child_listen_node =
- list_entry(pos,
- struct i40iw_cm_listener,
- child_listen_list);
- if (!memcmp(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16))
- goto set_qhash;
- }
-
- /* if not found then add a child listener if interface is going up */
- if (!ifup)
- return;
- child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
- if (!child_listen_node)
- return;
- node_allocated = true;
- memcpy(child_listen_node, parent_listen_node, sizeof(*child_listen_node));
-
- memcpy(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16);
-
-set_qhash:
- memcpy(nfo->loc_addr,
- child_listen_node->loc_addr,
- sizeof(nfo->loc_addr));
- nfo->vlan_id = child_listen_node->vlan_id;
- ret = i40iw_manage_qhash(iwdev, nfo,
- I40IW_QHASH_TYPE_TCP_SYN,
- op,
- NULL, false);
- if (!ret) {
- child_listen_node->qhash_set = ifup;
- if (node_allocated)
- list_add(&child_listen_node->child_listen_list,
- &parent_listen_node->child_listen_list);
- } else if (node_allocated) {
- kfree(child_listen_node);
- }
-}
-
-/**
- * i40iw_cm_disconnect_all - disconnect all connected qp's
- * @iwdev: device pointer
- */
-void i40iw_cm_disconnect_all(struct i40iw_device *iwdev)
-{
- struct i40iw_cm_core *cm_core = &iwdev->cm_core;
- struct list_head *list_core_temp;
- struct list_head *list_node;
- struct i40iw_cm_node *cm_node;
- unsigned long flags;
- struct list_head connected_list;
- struct ib_qp_attr attr;
-
- INIT_LIST_HEAD(&connected_list);
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
- cm_node = container_of(list_node, struct i40iw_cm_node, list);
- atomic_inc(&cm_node->ref_count);
- list_add(&cm_node->connected_entry, &connected_list);
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- list_for_each_safe(list_node, list_core_temp, &connected_list) {
- cm_node = container_of(list_node, struct i40iw_cm_node, connected_entry);
- attr.qp_state = IB_QPS_ERR;
- i40iw_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL);
- i40iw_rem_ref_cm_node(cm_node);
- }
-}
-
-/**
- * i40iw_ifdown_notify - process an ifdown on an interface
- * @iwdev: device pointer
- * @ipaddr: Pointer to IPv4 or IPv6 address
- * @ipv4: flag indicating IPv4 when true
- * @ifup: flag indicating interface up when true
- */
-void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
- u32 *ipaddr, bool ipv4, bool ifup)
-{
- struct i40iw_cm_core *cm_core = &iwdev->cm_core;
- unsigned long flags;
- struct i40iw_cm_listener *listen_node;
- static const u32 ip_zero[4] = { 0, 0, 0, 0 };
- struct i40iw_cm_info nfo;
- u16 vlan_id = rdma_vlan_dev_vlan_id(netdev);
- enum i40iw_status_code ret;
- enum i40iw_quad_hash_manage_type op =
- ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE;
-
- /* Disable or enable qhash for listeners */
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
- if (vlan_id == listen_node->vlan_id &&
- (!memcmp(listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16) ||
- !memcmp(listen_node->loc_addr, ip_zero, ipv4 ? 4 : 16))) {
- memcpy(nfo.loc_addr, listen_node->loc_addr,
- sizeof(nfo.loc_addr));
- nfo.loc_port = listen_node->loc_port;
- nfo.ipv4 = listen_node->ipv4;
- nfo.vlan_id = listen_node->vlan_id;
- nfo.user_pri = listen_node->user_pri;
- if (!list_empty(&listen_node->child_listen_list)) {
- i40iw_qhash_ctrl(iwdev,
- listen_node,
- &nfo,
- ipaddr, ipv4, ifup);
- } else if (memcmp(listen_node->loc_addr, ip_zero,
- ipv4 ? 4 : 16)) {
- ret = i40iw_manage_qhash(iwdev,
- &nfo,
- I40IW_QHASH_TYPE_TCP_SYN,
- op,
- NULL,
- false);
- if (!ret)
- listen_node->qhash_set = ifup;
- }
- }
- }
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
-
- /* disconnect any connected qp's on ifdown */
- if (!ifup)
- i40iw_cm_disconnect_all(iwdev);
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
deleted file mode 100644
index 2e52e38ffcf3..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ /dev/null
@@ -1,453 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_CM_H
-#define I40IW_CM_H
-
-#define QUEUE_EVENTS
-
-#define I40IW_MANAGE_APBVT_DEL 0
-#define I40IW_MANAGE_APBVT_ADD 1
-
-#define I40IW_MPA_REQUEST_ACCEPT 1
-#define I40IW_MPA_REQUEST_REJECT 2
-
-/* IETF MPA -- defines, enums, structs */
-#define IEFT_MPA_KEY_REQ "MPA ID Req Frame"
-#define IEFT_MPA_KEY_REP "MPA ID Rep Frame"
-#define IETF_MPA_KEY_SIZE 16
-#define IETF_MPA_VERSION 1
-#define IETF_MAX_PRIV_DATA_LEN 512
-#define IETF_MPA_FRAME_SIZE 20
-#define IETF_RTR_MSG_SIZE 4
-#define IETF_MPA_V2_FLAG 0x10
-#define SNDMARKER_SEQNMASK 0x000001FF
-
-#define I40IW_MAX_IETF_SIZE 32
-
-/* IETF RTR MSG Fields */
-#define IETF_PEER_TO_PEER 0x8000
-#define IETF_FLPDU_ZERO_LEN 0x4000
-#define IETF_RDMA0_WRITE 0x8000
-#define IETF_RDMA0_READ 0x4000
-#define IETF_NO_IRD_ORD 0x3FFF
-
-/* HW-supported IRD sizes*/
-#define I40IW_HW_IRD_SETTING_2 2
-#define I40IW_HW_IRD_SETTING_4 4
-#define I40IW_HW_IRD_SETTING_8 8
-#define I40IW_HW_IRD_SETTING_16 16
-#define I40IW_HW_IRD_SETTING_32 32
-#define I40IW_HW_IRD_SETTING_64 64
-
-enum ietf_mpa_flags {
- IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */
- IETF_MPA_FLAGS_CRC = 0x40, /* receive Markers */
- IETF_MPA_FLAGS_REJECT = 0x20, /* Reject */
-};
-
-struct ietf_mpa_v1 {
- u8 key[IETF_MPA_KEY_SIZE];
- u8 flags;
- u8 rev;
- __be16 priv_data_len;
- u8 priv_data[0];
-};
-
-#define ietf_mpa_req_resp_frame ietf_mpa_frame
-
-struct ietf_rtr_msg {
- __be16 ctrl_ird;
- __be16 ctrl_ord;
-};
-
-struct ietf_mpa_v2 {
- u8 key[IETF_MPA_KEY_SIZE];
- u8 flags;
- u8 rev;
- __be16 priv_data_len;
- struct ietf_rtr_msg rtr_msg;
- u8 priv_data[0];
-};
-
-struct i40iw_cm_node;
-enum i40iw_timer_type {
- I40IW_TIMER_TYPE_SEND,
- I40IW_TIMER_TYPE_RECV,
- I40IW_TIMER_NODE_CLEANUP,
- I40IW_TIMER_TYPE_CLOSE,
-};
-
-#define I40IW_PASSIVE_STATE_INDICATED 0
-#define I40IW_DO_NOT_SEND_RESET_EVENT 1
-#define I40IW_SEND_RESET_EVENT 2
-
-#define MAX_I40IW_IFS 4
-
-#define SET_ACK 0x1
-#define SET_SYN 0x2
-#define SET_FIN 0x4
-#define SET_RST 0x8
-
-#define TCP_OPTIONS_PADDING 3
-
-struct option_base {
- u8 optionnum;
- u8 length;
-};
-
-enum option_numbers {
- OPTION_NUMBER_END,
- OPTION_NUMBER_NONE,
- OPTION_NUMBER_MSS,
- OPTION_NUMBER_WINDOW_SCALE,
- OPTION_NUMBER_SACK_PERM,
- OPTION_NUMBER_SACK,
- OPTION_NUMBER_WRITE0 = 0xbc
-};
-
-struct option_mss {
- u8 optionnum;
- u8 length;
- __be16 mss;
-};
-
-struct option_windowscale {
- u8 optionnum;
- u8 length;
- u8 shiftcount;
-};
-
-union all_known_options {
- char as_end;
- struct option_base as_base;
- struct option_mss as_mss;
- struct option_windowscale as_windowscale;
-};
-
-struct i40iw_timer_entry {
- struct list_head list;
- unsigned long timetosend; /* jiffies */
- struct i40iw_puda_buf *sqbuf;
- u32 type;
- u32 retrycount;
- u32 retranscount;
- u32 context;
- u32 send_retrans;
- int close_when_complete;
-};
-
-#define I40IW_DEFAULT_RETRYS 64
-#define I40IW_DEFAULT_RETRANS 8
-#define I40IW_DEFAULT_TTL 0x40
-#define I40IW_DEFAULT_RTT_VAR 0x6
-#define I40IW_DEFAULT_SS_THRESH 0x3FFFFFFF
-#define I40IW_DEFAULT_REXMIT_THRESH 8
-
-#define I40IW_RETRY_TIMEOUT HZ
-#define I40IW_SHORT_TIME 10
-#define I40IW_LONG_TIME (2 * HZ)
-#define I40IW_MAX_TIMEOUT ((unsigned long)(12 * HZ))
-
-#define I40IW_CM_HASHTABLE_SIZE 1024
-#define I40IW_CM_TCP_TIMER_INTERVAL 3000
-#define I40IW_CM_DEFAULT_MTU 1540
-#define I40IW_CM_DEFAULT_FRAME_CNT 10
-#define I40IW_CM_THREAD_STACK_SIZE 256
-#define I40IW_CM_DEFAULT_RCV_WND 64240
-#define I40IW_CM_DEFAULT_RCV_WND_SCALED 0x3fffc
-#define I40IW_CM_DEFAULT_RCV_WND_SCALE 2
-#define I40IW_CM_DEFAULT_FREE_PKTS 0x000A
-#define I40IW_CM_FREE_PKT_LO_WATERMARK 2
-
-#define I40IW_CM_DEFAULT_MSS 536
-
-#define I40IW_CM_DEF_SEQ 0x159bf75f
-#define I40IW_CM_DEF_LOCAL_ID 0x3b47
-
-#define I40IW_CM_DEF_SEQ2 0x18ed5740
-#define I40IW_CM_DEF_LOCAL_ID2 0xb807
-#define MAX_CM_BUFFER (I40IW_MAX_IETF_SIZE + IETF_MAX_PRIV_DATA_LEN)
-
-typedef u32 i40iw_addr_t;
-
-#define i40iw_cm_tsa_context i40iw_qp_context
-
-struct i40iw_qp;
-
-/* cm node transition states */
-enum i40iw_cm_node_state {
- I40IW_CM_STATE_UNKNOWN,
- I40IW_CM_STATE_INITED,
- I40IW_CM_STATE_LISTENING,
- I40IW_CM_STATE_SYN_RCVD,
- I40IW_CM_STATE_SYN_SENT,
- I40IW_CM_STATE_ONE_SIDE_ESTABLISHED,
- I40IW_CM_STATE_ESTABLISHED,
- I40IW_CM_STATE_ACCEPTING,
- I40IW_CM_STATE_MPAREQ_SENT,
- I40IW_CM_STATE_MPAREQ_RCVD,
- I40IW_CM_STATE_MPAREJ_RCVD,
- I40IW_CM_STATE_OFFLOADED,
- I40IW_CM_STATE_FIN_WAIT1,
- I40IW_CM_STATE_FIN_WAIT2,
- I40IW_CM_STATE_CLOSE_WAIT,
- I40IW_CM_STATE_TIME_WAIT,
- I40IW_CM_STATE_LAST_ACK,
- I40IW_CM_STATE_CLOSING,
- I40IW_CM_STATE_LISTENER_DESTROYED,
- I40IW_CM_STATE_CLOSED
-};
-
-enum mpa_frame_version {
- IETF_MPA_V1 = 1,
- IETF_MPA_V2 = 2
-};
-
-enum mpa_frame_key {
- MPA_KEY_REQUEST,
- MPA_KEY_REPLY
-};
-
-enum send_rdma0 {
- SEND_RDMA_READ_ZERO = 1,
- SEND_RDMA_WRITE_ZERO = 2
-};
-
-enum i40iw_tcpip_pkt_type {
- I40IW_PKT_TYPE_UNKNOWN,
- I40IW_PKT_TYPE_SYN,
- I40IW_PKT_TYPE_SYNACK,
- I40IW_PKT_TYPE_ACK,
- I40IW_PKT_TYPE_FIN,
- I40IW_PKT_TYPE_RST
-};
-
-/* CM context params */
-struct i40iw_cm_tcp_context {
- u8 client;
-
- u32 loc_seq_num;
- u32 loc_ack_num;
- u32 rem_ack_num;
- u32 rcv_nxt;
-
- u32 loc_id;
- u32 rem_id;
-
- u32 snd_wnd;
- u32 max_snd_wnd;
-
- u32 rcv_wnd;
- u32 mss;
- u8 snd_wscale;
- u8 rcv_wscale;
-
- struct timeval sent_ts;
-};
-
-enum i40iw_cm_listener_state {
- I40IW_CM_LISTENER_PASSIVE_STATE = 1,
- I40IW_CM_LISTENER_ACTIVE_STATE = 2,
- I40IW_CM_LISTENER_EITHER_STATE = 3
-};
-
-struct i40iw_cm_listener {
- struct list_head list;
- struct i40iw_cm_core *cm_core;
- u8 loc_mac[ETH_ALEN];
- u32 loc_addr[4];
- u16 loc_port;
- struct iw_cm_id *cm_id;
- atomic_t ref_count;
- struct i40iw_device *iwdev;
- atomic_t pend_accepts_cnt;
- int backlog;
- enum i40iw_cm_listener_state listener_state;
- u32 reused_node;
- u8 user_pri;
- u8 tos;
- u16 vlan_id;
- bool qhash_set;
- bool ipv4;
- struct list_head child_listen_list;
-
-};
-
-struct i40iw_kmem_info {
- void *addr;
- u32 size;
-};
-
-/* per connection node and node state information */
-struct i40iw_cm_node {
- u32 loc_addr[4], rem_addr[4];
- u16 loc_port, rem_port;
- u16 vlan_id;
- enum i40iw_cm_node_state state;
- u8 loc_mac[ETH_ALEN];
- u8 rem_mac[ETH_ALEN];
- atomic_t ref_count;
- struct i40iw_qp *iwqp;
- struct i40iw_device *iwdev;
- struct i40iw_sc_dev *dev;
- struct i40iw_cm_tcp_context tcp_cntxt;
- struct i40iw_cm_core *cm_core;
- struct i40iw_cm_node *loopbackpartner;
- struct i40iw_timer_entry *send_entry;
- struct i40iw_timer_entry *close_entry;
- spinlock_t retrans_list_lock; /* cm transmit packet */
- enum send_rdma0 send_rdma0_op;
- u16 ird_size;
- u16 ord_size;
- u16 mpav2_ird_ord;
- struct iw_cm_id *cm_id;
- struct list_head list;
- int accelerated;
- struct i40iw_cm_listener *listener;
- int apbvt_set;
- int accept_pend;
- struct list_head timer_entry;
- struct list_head reset_entry;
- struct list_head connected_entry;
- atomic_t passive_state;
- bool qhash_set;
- u8 user_pri;
- u8 tos;
- bool ipv4;
- bool snd_mark_en;
- u16 lsmm_size;
- enum mpa_frame_version mpa_frame_rev;
- struct i40iw_kmem_info pdata;
- union {
- struct ietf_mpa_v1 mpa_frame;
- struct ietf_mpa_v2 mpa_v2_frame;
- };
-
- u8 pdata_buf[IETF_MAX_PRIV_DATA_LEN];
- struct i40iw_kmem_info mpa_hdr;
-};
-
-/* structure for client or CM to fill when making CM api calls. */
-/* - only need to set relevant data, based on op. */
-struct i40iw_cm_info {
- struct iw_cm_id *cm_id;
- u16 loc_port;
- u16 rem_port;
- u32 loc_addr[4];
- u32 rem_addr[4];
- u16 vlan_id;
- int backlog;
- u8 user_pri;
- u8 tos;
- bool ipv4;
-};
-
-/* CM event codes */
-enum i40iw_cm_event_type {
- I40IW_CM_EVENT_UNKNOWN,
- I40IW_CM_EVENT_ESTABLISHED,
- I40IW_CM_EVENT_MPA_REQ,
- I40IW_CM_EVENT_MPA_CONNECT,
- I40IW_CM_EVENT_MPA_ACCEPT,
- I40IW_CM_EVENT_MPA_REJECT,
- I40IW_CM_EVENT_MPA_ESTABLISHED,
- I40IW_CM_EVENT_CONNECTED,
- I40IW_CM_EVENT_RESET,
- I40IW_CM_EVENT_ABORTED
-};
-
-/* event to post to CM event handler */
-struct i40iw_cm_event {
- enum i40iw_cm_event_type type;
- struct i40iw_cm_info cm_info;
- struct work_struct event_work;
- struct i40iw_cm_node *cm_node;
-};
-
-struct i40iw_cm_core {
- struct i40iw_device *iwdev;
- struct i40iw_sc_dev *dev;
-
- struct list_head listen_nodes;
- struct list_head connected_nodes;
-
- struct timer_list tcp_timer;
-
- struct workqueue_struct *event_wq;
- struct workqueue_struct *disconn_wq;
-
- spinlock_t ht_lock; /* manage hash table */
- spinlock_t listen_list_lock; /* listen list */
-
- u64 stats_nodes_created;
- u64 stats_nodes_destroyed;
- u64 stats_listen_created;
- u64 stats_listen_destroyed;
- u64 stats_listen_nodes_created;
- u64 stats_listen_nodes_destroyed;
- u64 stats_loopbacks;
- u64 stats_accepts;
- u64 stats_rejects;
- u64 stats_connect_errs;
- u64 stats_passive_errs;
- u64 stats_pkt_retrans;
- u64 stats_backlog_drops;
-};
-
-int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
- struct i40iw_puda_buf *sqbuf,
- enum i40iw_timer_type type,
- int send_retrans,
- int close_when_complete);
-
-int i40iw_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
-int i40iw_reject(struct iw_cm_id *, const void *, u8);
-int i40iw_connect(struct iw_cm_id *, struct iw_cm_conn_param *);
-int i40iw_create_listen(struct iw_cm_id *, int);
-int i40iw_destroy_listen(struct iw_cm_id *);
-
-int i40iw_cm_start(struct i40iw_device *);
-int i40iw_cm_stop(struct i40iw_device *);
-
-int i40iw_arp_table(struct i40iw_device *iwdev,
- u32 *ip_addr,
- bool ipv4,
- u8 *mac_addr,
- u32 action);
-
-void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
- u32 *ipaddr, bool ipv4, bool ifup);
-void i40iw_cm_disconnect_all(struct i40iw_device *iwdev);
-#endif /* I40IW_CM_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
deleted file mode 100644
index dced4f48cbdd..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ /dev/null
@@ -1,5047 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include "i40iw_osdep.h"
-#include "i40iw_register.h"
-#include "i40iw_status.h"
-#include "i40iw_hmc.h"
-
-#include "i40iw_d.h"
-#include "i40iw_type.h"
-#include "i40iw_p.h"
-#include "i40iw_vf.h"
-#include "i40iw_virtchnl.h"
-
-/**
- * i40iw_insert_wqe_hdr - write wqe header
- * @wqe: cqp wqe for header
- * @header: header for the cqp wqe
- */
-static inline void i40iw_insert_wqe_hdr(u64 *wqe, u64 header)
-{
- wmb(); /* make sure WQE is populated before polarity is set */
- set_64bit_val(wqe, 24, header);
-}
-
-/**
- * i40iw_get_cqp_reg_info - get head and tail for cqp using registers
- * @cqp: struct for cqp hw
- * @val: cqp tail register value
- * @tail:wqtail register value
- * @error: cqp processing err
- */
-static inline void i40iw_get_cqp_reg_info(struct i40iw_sc_cqp *cqp,
- u32 *val,
- u32 *tail,
- u32 *error)
-{
- if (cqp->dev->is_pf) {
- *val = i40iw_rd32(cqp->dev->hw, I40E_PFPE_CQPTAIL);
- *tail = RS_32(*val, I40E_PFPE_CQPTAIL_WQTAIL);
- *error = RS_32(*val, I40E_PFPE_CQPTAIL_CQP_OP_ERR);
- } else {
- *val = i40iw_rd32(cqp->dev->hw, I40E_VFPE_CQPTAIL1);
- *tail = RS_32(*val, I40E_VFPE_CQPTAIL_WQTAIL);
- *error = RS_32(*val, I40E_VFPE_CQPTAIL_CQP_OP_ERR);
- }
-}
-
-/**
- * i40iw_cqp_poll_registers - poll cqp registers
- * @cqp: struct for cqp hw
- * @tail:wqtail register value
- * @count: how many times to try for completion
- */
-static enum i40iw_status_code i40iw_cqp_poll_registers(
- struct i40iw_sc_cqp *cqp,
- u32 tail,
- u32 count)
-{
- u32 i = 0;
- u32 newtail, error, val;
-
- while (i < count) {
- i++;
- i40iw_get_cqp_reg_info(cqp, &val, &newtail, &error);
- if (error) {
- error = (cqp->dev->is_pf) ?
- i40iw_rd32(cqp->dev->hw, I40E_PFPE_CQPERRCODES) :
- i40iw_rd32(cqp->dev->hw, I40E_VFPE_CQPERRCODES1);
- return I40IW_ERR_CQP_COMPL_ERROR;
- }
- if (newtail != tail) {
- /* SUCCESS */
- I40IW_RING_MOVE_TAIL(cqp->sq_ring);
- cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]++;
- return 0;
- }
- udelay(I40IW_SLEEP_COUNT);
- }
- return I40IW_ERR_TIMEOUT;
-}
-
-/**
- * i40iw_sc_parse_fpm_commit_buf - parse fpm commit buffer
- * @buf: ptr to fpm commit buffer
- * @info: ptr to i40iw_hmc_obj_info struct
- * @sd: number of SDs for HMC objects
- *
- * parses fpm commit info and copy base value
- * of hmc objects in hmc_info
- */
-static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf(
- u64 *buf,
- struct i40iw_hmc_obj_info *info,
- u32 *sd)
-{
- u64 temp;
- u64 size;
- u64 base = 0;
- u32 i, j;
- u32 k = 0;
- u32 low;
-
- /* copy base values in obj_info */
- for (i = I40IW_HMC_IW_QP, j = 0;
- i <= I40IW_HMC_IW_PBLE; i++, j += 8) {
- get_64bit_val(buf, j, &temp);
- info[i].base = RS_64_1(temp, 32) * 512;
- if (info[i].base > base) {
- base = info[i].base;
- k = i;
- }
- low = (u32)(temp);
- if (low)
- info[i].cnt = low;
- }
- size = info[k].cnt * info[k].size + info[k].base;
- if (size & 0x1FFFFF)
- *sd = (u32)((size >> 21) + 1); /* add 1 for remainder */
- else
- *sd = (u32)(size >> 21);
-
- return 0;
-}
-
-/**
- * i40iw_sc_parse_fpm_query_buf() - parses fpm query buffer
- * @buf: ptr to fpm query buffer
- * @info: ptr to i40iw_hmc_obj_info struct
- * @hmc_fpm_misc: ptr to fpm data
- *
- * parses fpm query buffer and copy max_cnt and
- * size value of hmc objects in hmc_info
- */
-static enum i40iw_status_code i40iw_sc_parse_fpm_query_buf(
- u64 *buf,
- struct i40iw_hmc_info *hmc_info,
- struct i40iw_hmc_fpm_misc *hmc_fpm_misc)
-{
- u64 temp;
- struct i40iw_hmc_obj_info *obj_info;
- u32 i, j, size;
- u16 max_pe_sds;
-
- obj_info = hmc_info->hmc_obj;
-
- get_64bit_val(buf, 0, &temp);
- hmc_info->first_sd_index = (u16)RS_64(temp, I40IW_QUERY_FPM_FIRST_PE_SD_INDEX);
- max_pe_sds = (u16)RS_64(temp, I40IW_QUERY_FPM_MAX_PE_SDS);
-
- /* Reduce SD count for VFs by 1 to account for PBLE backing page rounding */
- if (hmc_info->hmc_fn_id >= I40IW_FIRST_VF_FPM_ID)
- max_pe_sds--;
- hmc_fpm_misc->max_sds = max_pe_sds;
- hmc_info->sd_table.sd_cnt = max_pe_sds + hmc_info->first_sd_index;
-
- for (i = I40IW_HMC_IW_QP, j = 8;
- i <= I40IW_HMC_IW_ARP; i++, j += 8) {
- get_64bit_val(buf, j, &temp);
- if (i == I40IW_HMC_IW_QP)
- obj_info[i].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_QPS);
- else if (i == I40IW_HMC_IW_CQ)
- obj_info[i].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_CQS);
- else
- obj_info[i].max_cnt = (u32)temp;
-
- size = (u32)RS_64_1(temp, 32);
- obj_info[i].size = ((u64)1 << size);
- }
- for (i = I40IW_HMC_IW_MR, j = 48;
- i <= I40IW_HMC_IW_PBLE; i++, j += 8) {
- get_64bit_val(buf, j, &temp);
- obj_info[i].max_cnt = (u32)temp;
- size = (u32)RS_64_1(temp, 32);
- obj_info[i].size = LS_64_1(1, size);
- }
-
- get_64bit_val(buf, 120, &temp);
- hmc_fpm_misc->max_ceqs = (u8)RS_64(temp, I40IW_QUERY_FPM_MAX_CEQS);
- get_64bit_val(buf, 120, &temp);
- hmc_fpm_misc->ht_multiplier = RS_64(temp, I40IW_QUERY_FPM_HTMULTIPLIER);
- get_64bit_val(buf, 120, &temp);
- hmc_fpm_misc->timer_bucket = RS_64(temp, I40IW_QUERY_FPM_TIMERBUCKET);
- get_64bit_val(buf, 64, &temp);
- hmc_fpm_misc->xf_block_size = RS_64(temp, I40IW_QUERY_FPM_XFBLOCKSIZE);
- if (!hmc_fpm_misc->xf_block_size)
- return I40IW_ERR_INVALID_SIZE;
- get_64bit_val(buf, 80, &temp);
- hmc_fpm_misc->q1_block_size = RS_64(temp, I40IW_QUERY_FPM_Q1BLOCKSIZE);
- if (!hmc_fpm_misc->q1_block_size)
- return I40IW_ERR_INVALID_SIZE;
- return 0;
-}
-
-/**
- * i40iw_fill_qos_list - Change all unknown qs handles to available ones
- * @qs_list: list of qs_handles to be fixed with valid qs_handles
- */
-static void i40iw_fill_qos_list(u16 *qs_list)
-{
- u16 qshandle = qs_list[0];
- int i;
-
- for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
- if (qs_list[i] == QS_HANDLE_UNKNOWN)
- qs_list[i] = qshandle;
- else
- qshandle = qs_list[i];
- }
-}
-
-/**
- * i40iw_qp_from_entry - Given entry, get to the qp structure
- * @entry: Points to list of qp structure
- */
-static struct i40iw_sc_qp *i40iw_qp_from_entry(struct list_head *entry)
-{
- if (!entry)
- return NULL;
-
- return (struct i40iw_sc_qp *)((char *)entry - offsetof(struct i40iw_sc_qp, list));
-}
-
-/**
- * i40iw_get_qp - get the next qp from the list given current qp
- * @head: Listhead of qp's
- * @qp: current qp
- */
-static struct i40iw_sc_qp *i40iw_get_qp(struct list_head *head, struct i40iw_sc_qp *qp)
-{
- struct list_head *entry = NULL;
- struct list_head *lastentry;
-
- if (list_empty(head))
- return NULL;
-
- if (!qp) {
- entry = head->next;
- } else {
- lastentry = &qp->list;
- entry = (lastentry != head) ? lastentry->next : NULL;
- }
-
- return i40iw_qp_from_entry(entry);
-}
-
-/**
- * i40iw_change_l2params - given the new l2 parameters, change all qp
- * @vsi: pointer to the vsi structure
- * @l2params: New paramaters from l2
- */
-void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2params)
-{
- struct i40iw_sc_dev *dev = vsi->dev;
- struct i40iw_sc_qp *qp = NULL;
- bool qs_handle_change = false;
- bool mss_change = false;
- unsigned long flags;
- u16 qs_handle;
- int i;
-
- if (vsi->mss != l2params->mss) {
- mss_change = true;
- vsi->mss = l2params->mss;
- }
-
- i40iw_fill_qos_list(l2params->qs_handle_list);
- for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
- qs_handle = l2params->qs_handle_list[i];
- if (vsi->qos[i].qs_handle != qs_handle)
- qs_handle_change = true;
- else if (!mss_change)
- continue; /* no MSS nor qs handle change */
- spin_lock_irqsave(&vsi->qos[i].lock, flags);
- qp = i40iw_get_qp(&vsi->qos[i].qplist, qp);
- while (qp) {
- if (mss_change)
- i40iw_qp_mss_modify(dev, qp);
- if (qs_handle_change) {
- qp->qs_handle = qs_handle;
- /* issue cqp suspend command */
- i40iw_qp_suspend_resume(dev, qp, true);
- }
- qp = i40iw_get_qp(&vsi->qos[i].qplist, qp);
- }
- spin_unlock_irqrestore(&vsi->qos[i].lock, flags);
- vsi->qos[i].qs_handle = qs_handle;
- }
-}
-
-/**
- * i40iw_qp_rem_qos - remove qp from qos lists during destroy qp
- * @qp: qp to be removed from qos
- */
-static void i40iw_qp_rem_qos(struct i40iw_sc_qp *qp)
-{
- struct i40iw_sc_vsi *vsi = qp->vsi;
- unsigned long flags;
-
- if (!qp->on_qoslist)
- return;
- spin_lock_irqsave(&vsi->qos[qp->user_pri].lock, flags);
- list_del(&qp->list);
- spin_unlock_irqrestore(&vsi->qos[qp->user_pri].lock, flags);
-}
-
-/**
- * i40iw_qp_add_qos - called during setctx fot qp to be added to qos
- * @qp: qp to be added to qos
- */
-void i40iw_qp_add_qos(struct i40iw_sc_qp *qp)
-{
- struct i40iw_sc_vsi *vsi = qp->vsi;
- unsigned long flags;
-
- if (qp->on_qoslist)
- return;
- spin_lock_irqsave(&vsi->qos[qp->user_pri].lock, flags);
- qp->qs_handle = vsi->qos[qp->user_pri].qs_handle;
- list_add(&qp->list, &vsi->qos[qp->user_pri].qplist);
- qp->on_qoslist = true;
- spin_unlock_irqrestore(&vsi->qos[qp->user_pri].lock, flags);
-}
-
-/**
- * i40iw_sc_pd_init - initialize sc pd struct
- * @dev: sc device struct
- * @pd: sc pd ptr
- * @pd_id: pd_id for allocated pd
- * @abi_ver: ABI version from user context, -1 if not valid
- */
-static void i40iw_sc_pd_init(struct i40iw_sc_dev *dev,
- struct i40iw_sc_pd *pd,
- u16 pd_id,
- int abi_ver)
-{
- pd->size = sizeof(*pd);
- pd->pd_id = pd_id;
- pd->abi_ver = abi_ver;
- pd->dev = dev;
-}
-
-/**
- * i40iw_get_encoded_wqe_size - given wq size, returns hardware encoded size
- * @wqsize: size of the wq (sq, rq, srq) to encoded_size
- * @cqpsq: encoded size for sq for cqp as its encoded size is 1+ other wq's
- */
-u8 i40iw_get_encoded_wqe_size(u32 wqsize, bool cqpsq)
-{
- u8 encoded_size = 0;
-
- /* cqp sq's hw coded value starts from 1 for size of 4
- * while it starts from 0 for qp' wq's.
- */
- if (cqpsq)
- encoded_size = 1;
- wqsize >>= 2;
- while (wqsize >>= 1)
- encoded_size++;
- return encoded_size;
-}
-
-/**
- * i40iw_sc_cqp_init - Initialize buffers for a control Queue Pair
- * @cqp: IWARP control queue pair pointer
- * @info: IWARP control queue pair init info pointer
- *
- * Initializes the object and context buffers for a control Queue Pair.
- */
-static enum i40iw_status_code i40iw_sc_cqp_init(struct i40iw_sc_cqp *cqp,
- struct i40iw_cqp_init_info *info)
-{
- u8 hw_sq_size;
-
- if ((info->sq_size > I40IW_CQP_SW_SQSIZE_2048) ||
- (info->sq_size < I40IW_CQP_SW_SQSIZE_4) ||
- ((info->sq_size & (info->sq_size - 1))))
- return I40IW_ERR_INVALID_SIZE;
-
- hw_sq_size = i40iw_get_encoded_wqe_size(info->sq_size, true);
- cqp->size = sizeof(*cqp);
- cqp->sq_size = info->sq_size;
- cqp->hw_sq_size = hw_sq_size;
- cqp->sq_base = info->sq;
- cqp->host_ctx = info->host_ctx;
- cqp->sq_pa = info->sq_pa;
- cqp->host_ctx_pa = info->host_ctx_pa;
- cqp->dev = info->dev;
- cqp->struct_ver = info->struct_ver;
- cqp->scratch_array = info->scratch_array;
- cqp->polarity = 0;
- cqp->en_datacenter_tcp = info->en_datacenter_tcp;
- cqp->enabled_vf_count = info->enabled_vf_count;
- cqp->hmc_profile = info->hmc_profile;
- info->dev->cqp = cqp;
-
- I40IW_RING_INIT(cqp->sq_ring, cqp->sq_size);
- cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS] = 0;
- cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS] = 0;
-
- i40iw_debug(cqp->dev, I40IW_DEBUG_WQE,
- "%s: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%llxh] cqp[%p] polarity[x%04X]\n",
- __func__, cqp->sq_size, cqp->hw_sq_size,
- cqp->sq_base, cqp->sq_pa, cqp, cqp->polarity);
- return 0;
-}
-
-/**
- * i40iw_sc_cqp_create - create cqp during bringup
- * @cqp: struct for cqp hw
- * @maj_err: If error, major err number
- * @min_err: If error, minor err number
- */
-static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp,
- u16 *maj_err,
- u16 *min_err)
-{
- u64 temp;
- u32 cnt = 0, p1, p2, val = 0, err_code;
- enum i40iw_status_code ret_code;
-
- ret_code = i40iw_allocate_dma_mem(cqp->dev->hw,
- &cqp->sdbuf,
- 128,
- I40IW_SD_BUF_ALIGNMENT);
-
- if (ret_code)
- goto exit;
-
- temp = LS_64(cqp->hw_sq_size, I40IW_CQPHC_SQSIZE) |
- LS_64(cqp->struct_ver, I40IW_CQPHC_SVER);
-
- set_64bit_val(cqp->host_ctx, 0, temp);
- set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa);
- temp = LS_64(cqp->enabled_vf_count, I40IW_CQPHC_ENABLED_VFS) |
- LS_64(cqp->hmc_profile, I40IW_CQPHC_HMC_PROFILE);
- set_64bit_val(cqp->host_ctx, 16, temp);
- set_64bit_val(cqp->host_ctx, 24, (uintptr_t)cqp);
- set_64bit_val(cqp->host_ctx, 32, 0);
- set_64bit_val(cqp->host_ctx, 40, 0);
- set_64bit_val(cqp->host_ctx, 48, 0);
- set_64bit_val(cqp->host_ctx, 56, 0);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQP_HOST_CTX",
- cqp->host_ctx, I40IW_CQP_CTX_SIZE * 8);
-
- p1 = RS_32_1(cqp->host_ctx_pa, 32);
- p2 = (u32)cqp->host_ctx_pa;
-
- if (cqp->dev->is_pf) {
- i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPHIGH, p1);
- i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPLOW, p2);
- } else {
- i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPHIGH1, p1);
- i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPLOW1, p2);
- }
- do {
- if (cnt++ > I40IW_DONE_COUNT) {
- i40iw_free_dma_mem(cqp->dev->hw, &cqp->sdbuf);
- ret_code = I40IW_ERR_TIMEOUT;
- /*
- * read PFPE_CQPERRORCODES register to get the minor
- * and major error code
- */
- if (cqp->dev->is_pf)
- err_code = i40iw_rd32(cqp->dev->hw, I40E_PFPE_CQPERRCODES);
- else
- err_code = i40iw_rd32(cqp->dev->hw, I40E_VFPE_CQPERRCODES1);
- *min_err = RS_32(err_code, I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE);
- *maj_err = RS_32(err_code, I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE);
- goto exit;
- }
- udelay(I40IW_SLEEP_COUNT);
- if (cqp->dev->is_pf)
- val = i40iw_rd32(cqp->dev->hw, I40E_PFPE_CCQPSTATUS);
- else
- val = i40iw_rd32(cqp->dev->hw, I40E_VFPE_CCQPSTATUS1);
- } while (!val);
-
-exit:
- if (!ret_code)
- cqp->process_cqp_sds = i40iw_update_sds_noccq;
- return ret_code;
-}
-
-/**
- * i40iw_sc_cqp_post_sq - post of cqp's sq
- * @cqp: struct for cqp hw
- */
-void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp)
-{
- if (cqp->dev->is_pf)
- i40iw_wr32(cqp->dev->hw, I40E_PFPE_CQPDB, I40IW_RING_GETCURRENT_HEAD(cqp->sq_ring));
- else
- i40iw_wr32(cqp->dev->hw, I40E_VFPE_CQPDB1, I40IW_RING_GETCURRENT_HEAD(cqp->sq_ring));
-
- i40iw_debug(cqp->dev,
- I40IW_DEBUG_WQE,
- "%s: HEAD_TAIL[%04d,%04d,%04d]\n",
- __func__,
- cqp->sq_ring.head,
- cqp->sq_ring.tail,
- cqp->sq_ring.size);
-}
-
-/**
- * i40iw_sc_cqp_get_next_send_wqe - get next wqe on cqp sq
- * @cqp: struct for cqp hw
- * @wqe_idx: we index of cqp ring
- */
-u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch)
-{
- u64 *wqe = NULL;
- u32 wqe_idx;
- enum i40iw_status_code ret_code;
-
- if (I40IW_RING_FULL_ERR(cqp->sq_ring)) {
- i40iw_debug(cqp->dev,
- I40IW_DEBUG_WQE,
- "%s: ring is full head %x tail %x size %x\n",
- __func__,
- cqp->sq_ring.head,
- cqp->sq_ring.tail,
- cqp->sq_ring.size);
- return NULL;
- }
- I40IW_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, wqe_idx, ret_code);
- cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS]++;
- if (ret_code)
- return NULL;
- if (!wqe_idx)
- cqp->polarity = !cqp->polarity;
-
- wqe = cqp->sq_base[wqe_idx].elem;
- cqp->scratch_array[wqe_idx] = scratch;
- I40IW_CQP_INIT_WQE(wqe);
-
- return wqe;
-}
-
-/**
- * i40iw_sc_cqp_destroy - destroy cqp during close
- * @cqp: struct for cqp hw
- */
-static enum i40iw_status_code i40iw_sc_cqp_destroy(struct i40iw_sc_cqp *cqp)
-{
- u32 cnt = 0, val = 1;
- enum i40iw_status_code ret_code = 0;
- u32 cqpstat_addr;
-
- if (cqp->dev->is_pf) {
- i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPHIGH, 0);
- i40iw_wr32(cqp->dev->hw, I40E_PFPE_CCQPLOW, 0);
- cqpstat_addr = I40E_PFPE_CCQPSTATUS;
- } else {
- i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPHIGH1, 0);
- i40iw_wr32(cqp->dev->hw, I40E_VFPE_CCQPLOW1, 0);
- cqpstat_addr = I40E_VFPE_CCQPSTATUS1;
- }
- do {
- if (cnt++ > I40IW_DONE_COUNT) {
- ret_code = I40IW_ERR_TIMEOUT;
- break;
- }
- udelay(I40IW_SLEEP_COUNT);
- val = i40iw_rd32(cqp->dev->hw, cqpstat_addr);
- } while (val);
-
- i40iw_free_dma_mem(cqp->dev->hw, &cqp->sdbuf);
- return ret_code;
-}
-
-/**
- * i40iw_sc_ccq_arm - enable intr for control cq
- * @ccq: ccq sc struct
- */
-static void i40iw_sc_ccq_arm(struct i40iw_sc_cq *ccq)
-{
- u64 temp_val;
- u16 sw_cq_sel;
- u8 arm_next_se;
- u8 arm_seq_num;
-
- /* write to cq doorbell shadow area */
- /* arm next se should always be zero */
- get_64bit_val(ccq->cq_uk.shadow_area, 32, &temp_val);
-
- sw_cq_sel = (u16)RS_64(temp_val, I40IW_CQ_DBSA_SW_CQ_SELECT);
- arm_next_se = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_NEXT_SE);
-
- arm_seq_num = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_SEQ_NUM);
- arm_seq_num++;
-
- temp_val = LS_64(arm_seq_num, I40IW_CQ_DBSA_ARM_SEQ_NUM) |
- LS_64(sw_cq_sel, I40IW_CQ_DBSA_SW_CQ_SELECT) |
- LS_64(arm_next_se, I40IW_CQ_DBSA_ARM_NEXT_SE) |
- LS_64(1, I40IW_CQ_DBSA_ARM_NEXT);
-
- set_64bit_val(ccq->cq_uk.shadow_area, 32, temp_val);
-
- wmb(); /* make sure shadow area is updated before arming */
-
- if (ccq->dev->is_pf)
- i40iw_wr32(ccq->dev->hw, I40E_PFPE_CQARM, ccq->cq_uk.cq_id);
- else
- i40iw_wr32(ccq->dev->hw, I40E_VFPE_CQARM1, ccq->cq_uk.cq_id);
-}
-
-/**
- * i40iw_sc_ccq_get_cqe_info - get ccq's cq entry
- * @ccq: ccq sc struct
- * @info: completion q entry to return
- */
-static enum i40iw_status_code i40iw_sc_ccq_get_cqe_info(
- struct i40iw_sc_cq *ccq,
- struct i40iw_ccq_cqe_info *info)
-{
- u64 qp_ctx, temp, temp1;
- u64 *cqe;
- struct i40iw_sc_cqp *cqp;
- u32 wqe_idx;
- u8 polarity;
- enum i40iw_status_code ret_code = 0;
-
- if (ccq->cq_uk.avoid_mem_cflct)
- cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(&ccq->cq_uk);
- else
- cqe = (u64 *)I40IW_GET_CURRENT_CQ_ELEMENT(&ccq->cq_uk);
-
- get_64bit_val(cqe, 24, &temp);
- polarity = (u8)RS_64(temp, I40IW_CQ_VALID);
- if (polarity != ccq->cq_uk.polarity)
- return I40IW_ERR_QUEUE_EMPTY;
-
- get_64bit_val(cqe, 8, &qp_ctx);
- cqp = (struct i40iw_sc_cqp *)(unsigned long)qp_ctx;
- info->error = (bool)RS_64(temp, I40IW_CQ_ERROR);
- info->min_err_code = (u16)RS_64(temp, I40IW_CQ_MINERR);
- if (info->error) {
- info->maj_err_code = (u16)RS_64(temp, I40IW_CQ_MAJERR);
- info->min_err_code = (u16)RS_64(temp, I40IW_CQ_MINERR);
- }
- wqe_idx = (u32)RS_64(temp, I40IW_CQ_WQEIDX);
- info->scratch = cqp->scratch_array[wqe_idx];
-
- get_64bit_val(cqe, 16, &temp1);
- info->op_ret_val = (u32)RS_64(temp1, I40IW_CCQ_OPRETVAL);
- get_64bit_val(cqp->sq_base[wqe_idx].elem, 24, &temp1);
- info->op_code = (u8)RS_64(temp1, I40IW_CQPSQ_OPCODE);
- info->cqp = cqp;
-
- /* move the head for cq */
- I40IW_RING_MOVE_HEAD(ccq->cq_uk.cq_ring, ret_code);
- if (I40IW_RING_GETCURRENT_HEAD(ccq->cq_uk.cq_ring) == 0)
- ccq->cq_uk.polarity ^= 1;
-
- /* update cq tail in cq shadow memory also */
- I40IW_RING_MOVE_TAIL(ccq->cq_uk.cq_ring);
- set_64bit_val(ccq->cq_uk.shadow_area,
- 0,
- I40IW_RING_GETCURRENT_HEAD(ccq->cq_uk.cq_ring));
- wmb(); /* write shadow area before tail */
- I40IW_RING_MOVE_TAIL(cqp->sq_ring);
- ccq->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]++;
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_poll_for_cqp_op_done - Waits for last write to complete in CQP SQ
- * @cqp: struct for cqp hw
- * @op_code: cqp opcode for completion
- * @info: completion q entry to return
- */
-static enum i40iw_status_code i40iw_sc_poll_for_cqp_op_done(
- struct i40iw_sc_cqp *cqp,
- u8 op_code,
- struct i40iw_ccq_cqe_info *compl_info)
-{
- struct i40iw_ccq_cqe_info info;
- struct i40iw_sc_cq *ccq;
- enum i40iw_status_code ret_code = 0;
- u32 cnt = 0;
-
- memset(&info, 0, sizeof(info));
- ccq = cqp->dev->ccq;
- while (1) {
- if (cnt++ > I40IW_DONE_COUNT)
- return I40IW_ERR_TIMEOUT;
-
- if (i40iw_sc_ccq_get_cqe_info(ccq, &info)) {
- udelay(I40IW_SLEEP_COUNT);
- continue;
- }
-
- if (info.error) {
- ret_code = I40IW_ERR_CQP_COMPL_ERROR;
- break;
- }
- /* check if opcode is cq create */
- if (op_code != info.op_code) {
- i40iw_debug(cqp->dev, I40IW_DEBUG_WQE,
- "%s: opcode mismatch for my op code 0x%x, returned opcode %x\n",
- __func__, op_code, info.op_code);
- }
- /* success, exit out of the loop */
- if (op_code == info.op_code)
- break;
- }
-
- if (compl_info)
- memcpy(compl_info, &info, sizeof(*compl_info));
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_manage_push_page - Handle push page
- * @cqp: struct for cqp hw
- * @info: push page info
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_manage_push_page(
- struct i40iw_sc_cqp *cqp,
- struct i40iw_cqp_manage_push_page_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- if (info->push_idx >= I40IW_MAX_PUSH_PAGE_COUNT)
- return I40IW_ERR_INVALID_PUSH_PAGE_INDEX;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 16, info->qs_handle);
-
- header = LS_64(info->push_idx, I40IW_CQPSQ_MPP_PPIDX) |
- LS_64(I40IW_CQP_OP_MANAGE_PUSH_PAGES, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
- LS_64(info->free_page, I40IW_CQPSQ_MPP_FREE_PAGE);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_PUSH_PAGES WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_manage_hmc_pm_func_table - manage of function table
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @vf_index: vf index for cqp
- * @free_pm_fcn: function number
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_manage_hmc_pm_func_table(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u8 vf_index,
- bool free_pm_fcn,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- if (vf_index >= I40IW_MAX_VF_PER_PF)
- return I40IW_ERR_INVALID_VF_ID;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- header = LS_64(vf_index, I40IW_CQPSQ_MHMC_VFIDX) |
- LS_64(I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE, I40IW_CQPSQ_OPCODE) |
- LS_64(free_pm_fcn, I40IW_CQPSQ_MHMC_FREEPMFN) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_HMC_PM_FUNC_TABLE WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_set_hmc_resource_profile - cqp wqe for hmc profile
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @hmc_profile_type: type of profile to set
- * @vf_num: vf number for profile
- * @post_sq: flag for cqp db to ring
- * @poll_registers: flag to poll register for cqp completion
- */
-static enum i40iw_status_code i40iw_sc_set_hmc_resource_profile(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u8 hmc_profile_type,
- u8 vf_num, bool post_sq,
- bool poll_registers)
-{
- u64 *wqe;
- u64 header;
- u32 val, tail, error;
- enum i40iw_status_code ret_code = 0;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 16,
- (LS_64(hmc_profile_type, I40IW_CQPSQ_SHMCRP_HMC_PROFILE) |
- LS_64(vf_num, I40IW_CQPSQ_SHMCRP_VFNUM)));
-
- header = LS_64(I40IW_CQP_OP_SET_HMC_RESOURCE_PROFILE, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_HMC_PM_FUNC_TABLE WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
- if (error)
- return I40IW_ERR_CQP_COMPL_ERROR;
-
- if (post_sq) {
- i40iw_sc_cqp_post_sq(cqp);
- if (poll_registers)
- ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000000);
- else
- ret_code = i40iw_sc_poll_for_cqp_op_done(cqp,
- I40IW_CQP_OP_SHMC_PAGES_ALLOCATED,
- NULL);
- }
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_manage_hmc_pm_func_table_done - wait for cqp wqe completion for function table
- * @cqp: struct for cqp hw
- */
-static enum i40iw_status_code i40iw_sc_manage_hmc_pm_func_table_done(struct i40iw_sc_cqp *cqp)
-{
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE, NULL);
-}
-
-/**
- * i40iw_sc_commit_fpm_values_done - wait for cqp eqe completion for fpm commit
- * @cqp: struct for cqp hw
- */
-static enum i40iw_status_code i40iw_sc_commit_fpm_values_done(struct i40iw_sc_cqp *cqp)
-{
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_COMMIT_FPM_VALUES, NULL);
-}
-
-/**
- * i40iw_sc_commit_fpm_values - cqp wqe for commit fpm values
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @hmc_fn_id: hmc function id
- * @commit_fpm_mem; Memory for fpm values
- * @post_sq: flag for cqp db to ring
- * @wait_type: poll ccq or cqp registers for cqp completion
- */
-static enum i40iw_status_code i40iw_sc_commit_fpm_values(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u8 hmc_fn_id,
- struct i40iw_dma_mem *commit_fpm_mem,
- bool post_sq,
- u8 wait_type)
-{
- u64 *wqe;
- u64 header;
- u32 tail, val, error;
- enum i40iw_status_code ret_code = 0;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 16, hmc_fn_id);
- set_64bit_val(wqe, 32, commit_fpm_mem->pa);
-
- header = LS_64(I40IW_CQP_OP_COMMIT_FPM_VALUES, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "COMMIT_FPM_VALUES WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
- if (error)
- return I40IW_ERR_CQP_COMPL_ERROR;
-
- if (post_sq) {
- i40iw_sc_cqp_post_sq(cqp);
-
- if (wait_type == I40IW_CQP_WAIT_POLL_REGS)
- ret_code = i40iw_cqp_poll_registers(cqp, tail, I40IW_DONE_COUNT);
- else if (wait_type == I40IW_CQP_WAIT_POLL_CQ)
- ret_code = i40iw_sc_commit_fpm_values_done(cqp);
- }
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_query_fpm_values_done - poll for cqp wqe completion for query fpm
- * @cqp: struct for cqp hw
- */
-static enum i40iw_status_code i40iw_sc_query_fpm_values_done(struct i40iw_sc_cqp *cqp)
-{
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_QUERY_FPM_VALUES, NULL);
-}
-
-/**
- * i40iw_sc_query_fpm_values - cqp wqe query fpm values
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @hmc_fn_id: hmc function id
- * @query_fpm_mem: memory for return fpm values
- * @post_sq: flag for cqp db to ring
- * @wait_type: poll ccq or cqp registers for cqp completion
- */
-static enum i40iw_status_code i40iw_sc_query_fpm_values(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u8 hmc_fn_id,
- struct i40iw_dma_mem *query_fpm_mem,
- bool post_sq,
- u8 wait_type)
-{
- u64 *wqe;
- u64 header;
- u32 tail, val, error;
- enum i40iw_status_code ret_code = 0;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 16, hmc_fn_id);
- set_64bit_val(wqe, 32, query_fpm_mem->pa);
-
- header = LS_64(I40IW_CQP_OP_QUERY_FPM_VALUES, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QUERY_FPM WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- /* read the tail from CQP_TAIL register */
- i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
-
- if (error)
- return I40IW_ERR_CQP_COMPL_ERROR;
-
- if (post_sq) {
- i40iw_sc_cqp_post_sq(cqp);
- if (wait_type == I40IW_CQP_WAIT_POLL_REGS)
- ret_code = i40iw_cqp_poll_registers(cqp, tail, I40IW_DONE_COUNT);
- else if (wait_type == I40IW_CQP_WAIT_POLL_CQ)
- ret_code = i40iw_sc_query_fpm_values_done(cqp);
- }
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_add_arp_cache_entry - cqp wqe add arp cache entry
- * @cqp: struct for cqp hw
- * @info: arp entry information
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_add_arp_cache_entry(
- struct i40iw_sc_cqp *cqp,
- struct i40iw_add_arp_cache_entry_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 temp, header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 8, info->reach_max);
-
- temp = info->mac_addr[5] |
- LS_64_1(info->mac_addr[4], 8) |
- LS_64_1(info->mac_addr[3], 16) |
- LS_64_1(info->mac_addr[2], 24) |
- LS_64_1(info->mac_addr[1], 32) |
- LS_64_1(info->mac_addr[0], 40);
-
- set_64bit_val(wqe, 16, temp);
-
- header = info->arp_index |
- LS_64(I40IW_CQP_OP_MANAGE_ARP, I40IW_CQPSQ_OPCODE) |
- LS_64((info->permanent ? 1 : 0), I40IW_CQPSQ_MAT_PERMANENT) |
- LS_64(1, I40IW_CQPSQ_MAT_ENTRYVALID) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ARP_CACHE_ENTRY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_del_arp_cache_entry - dele arp cache entry
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @arp_index: arp index to delete arp entry
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_del_arp_cache_entry(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u16 arp_index,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- header = arp_index |
- LS_64(I40IW_CQP_OP_MANAGE_ARP, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ARP_CACHE_DEL_ENTRY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_query_arp_cache_entry - cqp wqe to query arp and arp index
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @arp_index: arp index to delete arp entry
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_query_arp_cache_entry(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u16 arp_index,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- header = arp_index |
- LS_64(I40IW_CQP_OP_MANAGE_ARP, I40IW_CQPSQ_OPCODE) |
- LS_64(1, I40IW_CQPSQ_MAT_QUERY) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QUERY_ARP_CACHE_ENTRY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_manage_apbvt_entry - for adding and deleting apbvt entries
- * @cqp: struct for cqp hw
- * @info: info for apbvt entry to add or delete
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_manage_apbvt_entry(
- struct i40iw_sc_cqp *cqp,
- struct i40iw_apbvt_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 16, info->port);
-
- header = LS_64(I40IW_CQP_OP_MANAGE_APBVT, I40IW_CQPSQ_OPCODE) |
- LS_64(info->add, I40IW_CQPSQ_MAPT_ADDPORT) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_APBVT WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_manage_qhash_table_entry - manage quad hash entries
- * @cqp: struct for cqp hw
- * @info: info for quad hash to manage
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- *
- * This is called before connection establishment is started. For passive connections, when
- * listener is created, it will call with entry type of I40IW_QHASH_TYPE_TCP_SYN with local
- * ip address and tcp port. When SYN is received (passive connections) or
- * sent (active connections), this routine is called with entry type of
- * I40IW_QHASH_TYPE_TCP_ESTABLISHED and quad is passed in info.
- *
- * When iwarp connection is done and its state moves to RTS, the quad hash entry in
- * the hardware will point to iwarp's qp number and requires no calls from the driver.
- */
-static enum i40iw_status_code i40iw_sc_manage_qhash_table_entry(
- struct i40iw_sc_cqp *cqp,
- struct i40iw_qhash_table_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 qw1 = 0;
- u64 qw2 = 0;
- u64 temp;
- struct i40iw_sc_vsi *vsi = info->vsi;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- temp = info->mac_addr[5] |
- LS_64_1(info->mac_addr[4], 8) |
- LS_64_1(info->mac_addr[3], 16) |
- LS_64_1(info->mac_addr[2], 24) |
- LS_64_1(info->mac_addr[1], 32) |
- LS_64_1(info->mac_addr[0], 40);
-
- set_64bit_val(wqe, 0, temp);
-
- qw1 = LS_64(info->qp_num, I40IW_CQPSQ_QHASH_QPN) |
- LS_64(info->dest_port, I40IW_CQPSQ_QHASH_DEST_PORT);
- if (info->ipv4_valid) {
- set_64bit_val(wqe,
- 48,
- LS_64(info->dest_ip[0], I40IW_CQPSQ_QHASH_ADDR3));
- } else {
- set_64bit_val(wqe,
- 56,
- LS_64(info->dest_ip[0], I40IW_CQPSQ_QHASH_ADDR0) |
- LS_64(info->dest_ip[1], I40IW_CQPSQ_QHASH_ADDR1));
-
- set_64bit_val(wqe,
- 48,
- LS_64(info->dest_ip[2], I40IW_CQPSQ_QHASH_ADDR2) |
- LS_64(info->dest_ip[3], I40IW_CQPSQ_QHASH_ADDR3));
- }
- qw2 = LS_64(vsi->qos[info->user_pri].qs_handle, I40IW_CQPSQ_QHASH_QS_HANDLE);
- if (info->vlan_valid)
- qw2 |= LS_64(info->vlan_id, I40IW_CQPSQ_QHASH_VLANID);
- set_64bit_val(wqe, 16, qw2);
- if (info->entry_type == I40IW_QHASH_TYPE_TCP_ESTABLISHED) {
- qw1 |= LS_64(info->src_port, I40IW_CQPSQ_QHASH_SRC_PORT);
- if (!info->ipv4_valid) {
- set_64bit_val(wqe,
- 40,
- LS_64(info->src_ip[0], I40IW_CQPSQ_QHASH_ADDR0) |
- LS_64(info->src_ip[1], I40IW_CQPSQ_QHASH_ADDR1));
- set_64bit_val(wqe,
- 32,
- LS_64(info->src_ip[2], I40IW_CQPSQ_QHASH_ADDR2) |
- LS_64(info->src_ip[3], I40IW_CQPSQ_QHASH_ADDR3));
- } else {
- set_64bit_val(wqe,
- 32,
- LS_64(info->src_ip[0], I40IW_CQPSQ_QHASH_ADDR3));
- }
- }
-
- set_64bit_val(wqe, 8, qw1);
- temp = LS_64(cqp->polarity, I40IW_CQPSQ_QHASH_WQEVALID) |
- LS_64(I40IW_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY, I40IW_CQPSQ_QHASH_OPCODE) |
- LS_64(info->manage, I40IW_CQPSQ_QHASH_MANAGE) |
- LS_64(info->ipv4_valid, I40IW_CQPSQ_QHASH_IPV4VALID) |
- LS_64(info->vlan_valid, I40IW_CQPSQ_QHASH_VLANVALID) |
- LS_64(info->entry_type, I40IW_CQPSQ_QHASH_ENTRYTYPE);
-
- i40iw_insert_wqe_hdr(wqe, temp);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE_QHASH WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_alloc_local_mac_ipaddr_entry - cqp wqe for loc mac entry
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_alloc_local_mac_ipaddr_entry(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- header = LS_64(I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ALLOCATE_LOCAL_MAC_IPADDR WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_add_local_mac_ipaddr_entry - add mac enry
- * @cqp: struct for cqp hw
- * @info:mac addr info
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_add_local_mac_ipaddr_entry(
- struct i40iw_sc_cqp *cqp,
- struct i40iw_local_mac_ipaddr_entry_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 temp, header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- temp = info->mac_addr[5] |
- LS_64_1(info->mac_addr[4], 8) |
- LS_64_1(info->mac_addr[3], 16) |
- LS_64_1(info->mac_addr[2], 24) |
- LS_64_1(info->mac_addr[1], 32) |
- LS_64_1(info->mac_addr[0], 40);
-
- set_64bit_val(wqe, 32, temp);
-
- header = LS_64(info->entry_idx, I40IW_CQPSQ_MLIPA_IPTABLEIDX) |
- LS_64(I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "ADD_LOCAL_MAC_IPADDR WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_del_local_mac_ipaddr_entry - cqp wqe to dele local mac
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @entry_idx: index of mac entry
- * @ ignore_ref_count: to force mac adde delete
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_del_local_mac_ipaddr_entry(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u8 entry_idx,
- u8 ignore_ref_count,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- header = LS_64(entry_idx, I40IW_CQPSQ_MLIPA_IPTABLEIDX) |
- LS_64(I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE, I40IW_CQPSQ_OPCODE) |
- LS_64(1, I40IW_CQPSQ_MLIPA_FREEENTRY) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
- LS_64(ignore_ref_count, I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "DEL_LOCAL_MAC_IPADDR WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_cqp_nop - send a nop wqe
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_cqp_nop(struct i40iw_sc_cqp *cqp,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 header;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- header = LS_64(I40IW_CQP_OP_NOP, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
- i40iw_insert_wqe_hdr(wqe, header);
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "NOP WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_ceq_init - initialize ceq
- * @ceq: ceq sc structure
- * @info: ceq initialization info
- */
-static enum i40iw_status_code i40iw_sc_ceq_init(struct i40iw_sc_ceq *ceq,
- struct i40iw_ceq_init_info *info)
-{
- u32 pble_obj_cnt;
-
- if ((info->elem_cnt < I40IW_MIN_CEQ_ENTRIES) ||
- (info->elem_cnt > I40IW_MAX_CEQ_ENTRIES))
- return I40IW_ERR_INVALID_SIZE;
-
- if (info->ceq_id >= I40IW_MAX_CEQID)
- return I40IW_ERR_INVALID_CEQ_ID;
-
- pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
-
- if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
- return I40IW_ERR_INVALID_PBLE_INDEX;
-
- ceq->size = sizeof(*ceq);
- ceq->ceqe_base = (struct i40iw_ceqe *)info->ceqe_base;
- ceq->ceq_id = info->ceq_id;
- ceq->dev = info->dev;
- ceq->elem_cnt = info->elem_cnt;
- ceq->ceq_elem_pa = info->ceqe_pa;
- ceq->virtual_map = info->virtual_map;
-
- ceq->pbl_chunk_size = (ceq->virtual_map ? info->pbl_chunk_size : 0);
- ceq->first_pm_pbl_idx = (ceq->virtual_map ? info->first_pm_pbl_idx : 0);
- ceq->pbl_list = (ceq->virtual_map ? info->pbl_list : NULL);
-
- ceq->tph_en = info->tph_en;
- ceq->tph_val = info->tph_val;
- ceq->polarity = 1;
- I40IW_RING_INIT(ceq->ceq_ring, ceq->elem_cnt);
- ceq->dev->ceq[info->ceq_id] = ceq;
-
- return 0;
-}
-
-/**
- * i40iw_sc_ceq_create - create ceq wqe
- * @ceq: ceq sc structure
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_ceq_create(struct i40iw_sc_ceq *ceq,
- u64 scratch,
- bool post_sq)
-{
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
- u64 header;
-
- cqp = ceq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 16, ceq->elem_cnt);
- set_64bit_val(wqe, 32, (ceq->virtual_map ? 0 : ceq->ceq_elem_pa));
- set_64bit_val(wqe, 48, (ceq->virtual_map ? ceq->first_pm_pbl_idx : 0));
- set_64bit_val(wqe, 56, LS_64(ceq->tph_val, I40IW_CQPSQ_TPHVAL));
-
- header = ceq->ceq_id |
- LS_64(I40IW_CQP_OP_CREATE_CEQ, I40IW_CQPSQ_OPCODE) |
- LS_64(ceq->pbl_chunk_size, I40IW_CQPSQ_CEQ_LPBLSIZE) |
- LS_64(ceq->virtual_map, I40IW_CQPSQ_CEQ_VMAP) |
- LS_64(ceq->tph_en, I40IW_CQPSQ_TPHEN) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CEQ_CREATE WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_cceq_create_done - poll for control ceq wqe to complete
- * @ceq: ceq sc structure
- */
-static enum i40iw_status_code i40iw_sc_cceq_create_done(struct i40iw_sc_ceq *ceq)
-{
- struct i40iw_sc_cqp *cqp;
-
- cqp = ceq->dev->cqp;
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_CREATE_CEQ, NULL);
-}
-
-/**
- * i40iw_sc_cceq_destroy_done - poll for destroy cceq to complete
- * @ceq: ceq sc structure
- */
-static enum i40iw_status_code i40iw_sc_cceq_destroy_done(struct i40iw_sc_ceq *ceq)
-{
- struct i40iw_sc_cqp *cqp;
-
- cqp = ceq->dev->cqp;
- cqp->process_cqp_sds = i40iw_update_sds_noccq;
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_DESTROY_CEQ, NULL);
-}
-
-/**
- * i40iw_sc_cceq_create - create cceq
- * @ceq: ceq sc structure
- * @scratch: u64 saved to be used during cqp completion
- */
-static enum i40iw_status_code i40iw_sc_cceq_create(struct i40iw_sc_ceq *ceq, u64 scratch)
-{
- enum i40iw_status_code ret_code;
-
- ret_code = i40iw_sc_ceq_create(ceq, scratch, true);
- if (!ret_code)
- ret_code = i40iw_sc_cceq_create_done(ceq);
- return ret_code;
-}
-
-/**
- * i40iw_sc_ceq_destroy - destroy ceq
- * @ceq: ceq sc structure
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_ceq_destroy(struct i40iw_sc_ceq *ceq,
- u64 scratch,
- bool post_sq)
-{
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
- u64 header;
-
- cqp = ceq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 16, ceq->elem_cnt);
- set_64bit_val(wqe, 48, ceq->first_pm_pbl_idx);
- header = ceq->ceq_id |
- LS_64(I40IW_CQP_OP_DESTROY_CEQ, I40IW_CQPSQ_OPCODE) |
- LS_64(ceq->pbl_chunk_size, I40IW_CQPSQ_CEQ_LPBLSIZE) |
- LS_64(ceq->virtual_map, I40IW_CQPSQ_CEQ_VMAP) |
- LS_64(ceq->tph_en, I40IW_CQPSQ_TPHEN) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
- i40iw_insert_wqe_hdr(wqe, header);
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CEQ_DESTROY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_process_ceq - process ceq
- * @dev: sc device struct
- * @ceq: ceq sc structure
- */
-static void *i40iw_sc_process_ceq(struct i40iw_sc_dev *dev, struct i40iw_sc_ceq *ceq)
-{
- u64 temp;
- u64 *ceqe;
- struct i40iw_sc_cq *cq = NULL;
- u8 polarity;
-
- ceqe = (u64 *)I40IW_GET_CURRENT_CEQ_ELEMENT(ceq);
- get_64bit_val(ceqe, 0, &temp);
- polarity = (u8)RS_64(temp, I40IW_CEQE_VALID);
- if (polarity != ceq->polarity)
- return cq;
-
- cq = (struct i40iw_sc_cq *)(unsigned long)LS_64_1(temp, 1);
-
- I40IW_RING_MOVE_TAIL(ceq->ceq_ring);
- if (I40IW_RING_GETCURRENT_TAIL(ceq->ceq_ring) == 0)
- ceq->polarity ^= 1;
-
- if (dev->is_pf)
- i40iw_wr32(dev->hw, I40E_PFPE_CQACK, cq->cq_uk.cq_id);
- else
- i40iw_wr32(dev->hw, I40E_VFPE_CQACK1, cq->cq_uk.cq_id);
-
- return cq;
-}
-
-/**
- * i40iw_sc_aeq_init - initialize aeq
- * @aeq: aeq structure ptr
- * @info: aeq initialization info
- */
-static enum i40iw_status_code i40iw_sc_aeq_init(struct i40iw_sc_aeq *aeq,
- struct i40iw_aeq_init_info *info)
-{
- u32 pble_obj_cnt;
-
- if ((info->elem_cnt < I40IW_MIN_AEQ_ENTRIES) ||
- (info->elem_cnt > I40IW_MAX_AEQ_ENTRIES))
- return I40IW_ERR_INVALID_SIZE;
- pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
-
- if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
- return I40IW_ERR_INVALID_PBLE_INDEX;
-
- aeq->size = sizeof(*aeq);
- aeq->polarity = 1;
- aeq->aeqe_base = (struct i40iw_sc_aeqe *)info->aeqe_base;
- aeq->dev = info->dev;
- aeq->elem_cnt = info->elem_cnt;
-
- aeq->aeq_elem_pa = info->aeq_elem_pa;
- I40IW_RING_INIT(aeq->aeq_ring, aeq->elem_cnt);
- info->dev->aeq = aeq;
-
- aeq->virtual_map = info->virtual_map;
- aeq->pbl_list = (aeq->virtual_map ? info->pbl_list : NULL);
- aeq->pbl_chunk_size = (aeq->virtual_map ? info->pbl_chunk_size : 0);
- aeq->first_pm_pbl_idx = (aeq->virtual_map ? info->first_pm_pbl_idx : 0);
- info->dev->aeq = aeq;
- return 0;
-}
-
-/**
- * i40iw_sc_aeq_create - create aeq
- * @aeq: aeq structure ptr
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_aeq_create(struct i40iw_sc_aeq *aeq,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
-
- cqp = aeq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 16, aeq->elem_cnt);
- set_64bit_val(wqe, 32,
- (aeq->virtual_map ? 0 : aeq->aeq_elem_pa));
- set_64bit_val(wqe, 48,
- (aeq->virtual_map ? aeq->first_pm_pbl_idx : 0));
-
- header = LS_64(I40IW_CQP_OP_CREATE_AEQ, I40IW_CQPSQ_OPCODE) |
- LS_64(aeq->pbl_chunk_size, I40IW_CQPSQ_AEQ_LPBLSIZE) |
- LS_64(aeq->virtual_map, I40IW_CQPSQ_AEQ_VMAP) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "AEQ_CREATE WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_aeq_destroy - destroy aeq during close
- * @aeq: aeq structure ptr
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_aeq_destroy(struct i40iw_sc_aeq *aeq,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
-
- cqp = aeq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 16, aeq->elem_cnt);
- set_64bit_val(wqe, 48, aeq->first_pm_pbl_idx);
- header = LS_64(I40IW_CQP_OP_DESTROY_AEQ, I40IW_CQPSQ_OPCODE) |
- LS_64(aeq->pbl_chunk_size, I40IW_CQPSQ_AEQ_LPBLSIZE) |
- LS_64(aeq->virtual_map, I40IW_CQPSQ_AEQ_VMAP) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "AEQ_DESTROY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_get_next_aeqe - get next aeq entry
- * @aeq: aeq structure ptr
- * @info: aeqe info to be returned
- */
-static enum i40iw_status_code i40iw_sc_get_next_aeqe(struct i40iw_sc_aeq *aeq,
- struct i40iw_aeqe_info *info)
-{
- u64 temp, compl_ctx;
- u64 *aeqe;
- u16 wqe_idx;
- u8 ae_src;
- u8 polarity;
-
- aeqe = (u64 *)I40IW_GET_CURRENT_AEQ_ELEMENT(aeq);
- get_64bit_val(aeqe, 0, &compl_ctx);
- get_64bit_val(aeqe, 8, &temp);
- polarity = (u8)RS_64(temp, I40IW_AEQE_VALID);
-
- if (aeq->polarity != polarity)
- return I40IW_ERR_QUEUE_EMPTY;
-
- i40iw_debug_buf(aeq->dev, I40IW_DEBUG_WQE, "AEQ_ENTRY", aeqe, 16);
-
- ae_src = (u8)RS_64(temp, I40IW_AEQE_AESRC);
- wqe_idx = (u16)RS_64(temp, I40IW_AEQE_WQDESCIDX);
- info->qp_cq_id = (u32)RS_64(temp, I40IW_AEQE_QPCQID);
- info->ae_id = (u16)RS_64(temp, I40IW_AEQE_AECODE);
- info->tcp_state = (u8)RS_64(temp, I40IW_AEQE_TCPSTATE);
- info->iwarp_state = (u8)RS_64(temp, I40IW_AEQE_IWSTATE);
- info->q2_data_written = (u8)RS_64(temp, I40IW_AEQE_Q2DATA);
- info->aeqe_overflow = (bool)RS_64(temp, I40IW_AEQE_OVERFLOW);
- switch (ae_src) {
- case I40IW_AE_SOURCE_RQ:
- case I40IW_AE_SOURCE_RQ_0011:
- info->qp = true;
- info->wqe_idx = wqe_idx;
- info->compl_ctx = compl_ctx;
- break;
- case I40IW_AE_SOURCE_CQ:
- case I40IW_AE_SOURCE_CQ_0110:
- case I40IW_AE_SOURCE_CQ_1010:
- case I40IW_AE_SOURCE_CQ_1110:
- info->cq = true;
- info->compl_ctx = LS_64_1(compl_ctx, 1);
- break;
- case I40IW_AE_SOURCE_SQ:
- case I40IW_AE_SOURCE_SQ_0111:
- info->qp = true;
- info->sq = true;
- info->wqe_idx = wqe_idx;
- info->compl_ctx = compl_ctx;
- break;
- case I40IW_AE_SOURCE_IN_RR_WR:
- case I40IW_AE_SOURCE_IN_RR_WR_1011:
- info->qp = true;
- info->compl_ctx = compl_ctx;
- info->in_rdrsp_wr = true;
- break;
- case I40IW_AE_SOURCE_OUT_RR:
- case I40IW_AE_SOURCE_OUT_RR_1111:
- info->qp = true;
- info->compl_ctx = compl_ctx;
- info->out_rdrsp = true;
- break;
- default:
- break;
- }
- I40IW_RING_MOVE_TAIL(aeq->aeq_ring);
- if (I40IW_RING_GETCURRENT_TAIL(aeq->aeq_ring) == 0)
- aeq->polarity ^= 1;
- return 0;
-}
-
-/**
- * i40iw_sc_repost_aeq_entries - repost completed aeq entries
- * @dev: sc device struct
- * @count: allocate count
- */
-static enum i40iw_status_code i40iw_sc_repost_aeq_entries(struct i40iw_sc_dev *dev,
- u32 count)
-{
- if (count > I40IW_MAX_AEQ_ALLOCATE_COUNT)
- return I40IW_ERR_INVALID_SIZE;
-
- if (dev->is_pf)
- i40iw_wr32(dev->hw, I40E_PFPE_AEQALLOC, count);
- else
- i40iw_wr32(dev->hw, I40E_VFPE_AEQALLOC1, count);
-
- return 0;
-}
-
-/**
- * i40iw_sc_aeq_create_done - create aeq
- * @aeq: aeq structure ptr
- */
-static enum i40iw_status_code i40iw_sc_aeq_create_done(struct i40iw_sc_aeq *aeq)
-{
- struct i40iw_sc_cqp *cqp;
-
- cqp = aeq->dev->cqp;
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_CREATE_AEQ, NULL);
-}
-
-/**
- * i40iw_sc_aeq_destroy_done - destroy of aeq during close
- * @aeq: aeq structure ptr
- */
-static enum i40iw_status_code i40iw_sc_aeq_destroy_done(struct i40iw_sc_aeq *aeq)
-{
- struct i40iw_sc_cqp *cqp;
-
- cqp = aeq->dev->cqp;
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_DESTROY_AEQ, NULL);
-}
-
-/**
- * i40iw_sc_ccq_init - initialize control cq
- * @cq: sc's cq ctruct
- * @info: info for control cq initialization
- */
-static enum i40iw_status_code i40iw_sc_ccq_init(struct i40iw_sc_cq *cq,
- struct i40iw_ccq_init_info *info)
-{
- u32 pble_obj_cnt;
-
- if (info->num_elem < I40IW_MIN_CQ_SIZE || info->num_elem > I40IW_MAX_CQ_SIZE)
- return I40IW_ERR_INVALID_SIZE;
-
- if (info->ceq_id > I40IW_MAX_CEQID)
- return I40IW_ERR_INVALID_CEQ_ID;
-
- pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
-
- if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
- return I40IW_ERR_INVALID_PBLE_INDEX;
-
- cq->cq_pa = info->cq_pa;
- cq->cq_uk.cq_base = info->cq_base;
- cq->shadow_area_pa = info->shadow_area_pa;
- cq->cq_uk.shadow_area = info->shadow_area;
- cq->shadow_read_threshold = info->shadow_read_threshold;
- cq->dev = info->dev;
- cq->ceq_id = info->ceq_id;
- cq->cq_uk.cq_size = info->num_elem;
- cq->cq_type = I40IW_CQ_TYPE_CQP;
- cq->ceqe_mask = info->ceqe_mask;
- I40IW_RING_INIT(cq->cq_uk.cq_ring, info->num_elem);
-
- cq->cq_uk.cq_id = 0; /* control cq is id 0 always */
- cq->ceq_id_valid = info->ceq_id_valid;
- cq->tph_en = info->tph_en;
- cq->tph_val = info->tph_val;
- cq->cq_uk.avoid_mem_cflct = info->avoid_mem_cflct;
-
- cq->pbl_list = info->pbl_list;
- cq->virtual_map = info->virtual_map;
- cq->pbl_chunk_size = info->pbl_chunk_size;
- cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
- cq->cq_uk.polarity = true;
-
- /* following are only for iw cqs so initialize them to zero */
- cq->cq_uk.cqe_alloc_reg = NULL;
- info->dev->ccq = cq;
- return 0;
-}
-
-/**
- * i40iw_sc_ccq_create_done - poll cqp for ccq create
- * @ccq: ccq sc struct
- */
-static enum i40iw_status_code i40iw_sc_ccq_create_done(struct i40iw_sc_cq *ccq)
-{
- struct i40iw_sc_cqp *cqp;
-
- cqp = ccq->dev->cqp;
- return i40iw_sc_poll_for_cqp_op_done(cqp, I40IW_CQP_OP_CREATE_CQ, NULL);
-}
-
-/**
- * i40iw_sc_ccq_create - create control cq
- * @ccq: ccq sc struct
- * @scratch: u64 saved to be used during cqp completion
- * @check_overflow: overlow flag for ccq
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_ccq_create(struct i40iw_sc_cq *ccq,
- u64 scratch,
- bool check_overflow,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
- enum i40iw_status_code ret_code;
-
- cqp = ccq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 0, ccq->cq_uk.cq_size);
- set_64bit_val(wqe, 8, RS_64_1(ccq, 1));
- set_64bit_val(wqe, 16,
- LS_64(ccq->shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
- set_64bit_val(wqe, 32, (ccq->virtual_map ? 0 : ccq->cq_pa));
- set_64bit_val(wqe, 40, ccq->shadow_area_pa);
- set_64bit_val(wqe, 48,
- (ccq->virtual_map ? ccq->first_pm_pbl_idx : 0));
- set_64bit_val(wqe, 56,
- LS_64(ccq->tph_val, I40IW_CQPSQ_TPHVAL));
-
- header = ccq->cq_uk.cq_id |
- LS_64((ccq->ceq_id_valid ? ccq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
- LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
- LS_64(ccq->pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
- LS_64(check_overflow, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
- LS_64(ccq->virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
- LS_64(ccq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
- LS_64(ccq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
- LS_64(ccq->tph_en, I40IW_CQPSQ_TPHEN) |
- LS_64(ccq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CCQ_CREATE WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq) {
- i40iw_sc_cqp_post_sq(cqp);
- ret_code = i40iw_sc_ccq_create_done(ccq);
- if (ret_code)
- return ret_code;
- }
- cqp->process_cqp_sds = i40iw_cqp_sds_cmd;
-
- return 0;
-}
-
-/**
- * i40iw_sc_ccq_destroy - destroy ccq during close
- * @ccq: ccq sc struct
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_ccq_destroy(struct i40iw_sc_cq *ccq,
- u64 scratch,
- bool post_sq)
-{
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
- u64 header;
- enum i40iw_status_code ret_code = 0;
- u32 tail, val, error;
-
- cqp = ccq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 0, ccq->cq_uk.cq_size);
- set_64bit_val(wqe, 8, RS_64_1(ccq, 1));
- set_64bit_val(wqe, 40, ccq->shadow_area_pa);
-
- header = ccq->cq_uk.cq_id |
- LS_64((ccq->ceq_id_valid ? ccq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
- LS_64(I40IW_CQP_OP_DESTROY_CQ, I40IW_CQPSQ_OPCODE) |
- LS_64(ccq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
- LS_64(ccq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
- LS_64(ccq->tph_en, I40IW_CQPSQ_TPHEN) |
- LS_64(ccq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CCQ_DESTROY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
- if (error)
- return I40IW_ERR_CQP_COMPL_ERROR;
-
- if (post_sq) {
- i40iw_sc_cqp_post_sq(cqp);
- ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000);
- }
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_cq_init - initialize completion q
- * @cq: cq struct
- * @info: cq initialization info
- */
-static enum i40iw_status_code i40iw_sc_cq_init(struct i40iw_sc_cq *cq,
- struct i40iw_cq_init_info *info)
-{
- u32 __iomem *cqe_alloc_reg = NULL;
- enum i40iw_status_code ret_code;
- u32 pble_obj_cnt;
- u32 arm_offset;
-
- pble_obj_cnt = info->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
-
- if (info->virtual_map && (info->first_pm_pbl_idx >= pble_obj_cnt))
- return I40IW_ERR_INVALID_PBLE_INDEX;
-
- cq->cq_pa = info->cq_base_pa;
- cq->dev = info->dev;
- cq->ceq_id = info->ceq_id;
- arm_offset = (info->dev->is_pf) ? I40E_PFPE_CQARM : I40E_VFPE_CQARM1;
- if (i40iw_get_hw_addr(cq->dev))
- cqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(cq->dev) +
- arm_offset);
- info->cq_uk_init_info.cqe_alloc_reg = cqe_alloc_reg;
- ret_code = i40iw_cq_uk_init(&cq->cq_uk, &info->cq_uk_init_info);
- if (ret_code)
- return ret_code;
- cq->virtual_map = info->virtual_map;
- cq->pbl_chunk_size = info->pbl_chunk_size;
- cq->ceqe_mask = info->ceqe_mask;
- cq->cq_type = (info->type) ? info->type : I40IW_CQ_TYPE_IWARP;
-
- cq->shadow_area_pa = info->shadow_area_pa;
- cq->shadow_read_threshold = info->shadow_read_threshold;
-
- cq->ceq_id_valid = info->ceq_id_valid;
- cq->tph_en = info->tph_en;
- cq->tph_val = info->tph_val;
-
- cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
-
- return 0;
-}
-
-/**
- * i40iw_sc_cq_create - create completion q
- * @cq: cq struct
- * @scratch: u64 saved to be used during cqp completion
- * @check_overflow: flag for overflow check
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_cq_create(struct i40iw_sc_cq *cq,
- u64 scratch,
- bool check_overflow,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
-
- if (cq->cq_uk.cq_id > I40IW_MAX_CQID)
- return I40IW_ERR_INVALID_CQ_ID;
-
- if (cq->ceq_id > I40IW_MAX_CEQID)
- return I40IW_ERR_INVALID_CEQ_ID;
-
- cqp = cq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
- set_64bit_val(wqe, 8, RS_64_1(cq, 1));
- set_64bit_val(wqe,
- 16,
- LS_64(cq->shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
-
- set_64bit_val(wqe, 32, (cq->virtual_map ? 0 : cq->cq_pa));
-
- set_64bit_val(wqe, 40, cq->shadow_area_pa);
- set_64bit_val(wqe, 48, (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
- set_64bit_val(wqe, 56, LS_64(cq->tph_val, I40IW_CQPSQ_TPHVAL));
-
- header = cq->cq_uk.cq_id |
- LS_64((cq->ceq_id_valid ? cq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
- LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
- LS_64(cq->pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
- LS_64(check_overflow, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
- LS_64(cq->virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
- LS_64(cq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
- LS_64(cq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
- LS_64(cq->tph_en, I40IW_CQPSQ_TPHEN) |
- LS_64(cq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQ_CREATE WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_cq_destroy - destroy completion q
- * @cq: cq struct
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_cq_destroy(struct i40iw_sc_cq *cq,
- u64 scratch,
- bool post_sq)
-{
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
- u64 header;
-
- cqp = cq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
- set_64bit_val(wqe, 8, RS_64_1(cq, 1));
- set_64bit_val(wqe, 40, cq->shadow_area_pa);
- set_64bit_val(wqe, 48, (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
-
- header = cq->cq_uk.cq_id |
- LS_64((cq->ceq_id_valid ? cq->ceq_id : 0), I40IW_CQPSQ_CQ_CEQID) |
- LS_64(I40IW_CQP_OP_DESTROY_CQ, I40IW_CQPSQ_OPCODE) |
- LS_64(cq->pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
- LS_64(cq->virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
- LS_64(cq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
- LS_64(cq->ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
- LS_64(cq->tph_en, I40IW_CQPSQ_TPHEN) |
- LS_64(cq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQ_DESTROY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_cq_modify - modify a Completion Queue
- * @cq: cq struct
- * @info: modification info struct
- * @scratch:
- * @post_sq: flag to post to sq
- */
-static enum i40iw_status_code i40iw_sc_cq_modify(struct i40iw_sc_cq *cq,
- struct i40iw_modify_cq_info *info,
- u64 scratch,
- bool post_sq)
-{
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
- u64 header;
- u32 cq_size, ceq_id, first_pm_pbl_idx;
- u8 pbl_chunk_size;
- bool virtual_map, ceq_id_valid, check_overflow;
- u32 pble_obj_cnt;
-
- if (info->ceq_valid && (info->ceq_id > I40IW_MAX_CEQID))
- return I40IW_ERR_INVALID_CEQ_ID;
-
- pble_obj_cnt = cq->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
-
- if (info->cq_resize && info->virtual_map &&
- (info->first_pm_pbl_idx >= pble_obj_cnt))
- return I40IW_ERR_INVALID_PBLE_INDEX;
-
- cqp = cq->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- cq->pbl_list = info->pbl_list;
- cq->cq_pa = info->cq_pa;
- cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
-
- cq_size = info->cq_resize ? info->cq_size : cq->cq_uk.cq_size;
- if (info->ceq_change) {
- ceq_id_valid = true;
- ceq_id = info->ceq_id;
- } else {
- ceq_id_valid = cq->ceq_id_valid;
- ceq_id = ceq_id_valid ? cq->ceq_id : 0;
- }
- virtual_map = info->cq_resize ? info->virtual_map : cq->virtual_map;
- first_pm_pbl_idx = (info->cq_resize ?
- (info->virtual_map ? info->first_pm_pbl_idx : 0) :
- (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
- pbl_chunk_size = (info->cq_resize ?
- (info->virtual_map ? info->pbl_chunk_size : 0) :
- (cq->virtual_map ? cq->pbl_chunk_size : 0));
- check_overflow = info->check_overflow_change ? info->check_overflow :
- cq->check_overflow;
- cq->cq_uk.cq_size = cq_size;
- cq->ceq_id_valid = ceq_id_valid;
- cq->ceq_id = ceq_id;
- cq->virtual_map = virtual_map;
- cq->first_pm_pbl_idx = first_pm_pbl_idx;
- cq->pbl_chunk_size = pbl_chunk_size;
- cq->check_overflow = check_overflow;
-
- set_64bit_val(wqe, 0, cq_size);
- set_64bit_val(wqe, 8, RS_64_1(cq, 1));
- set_64bit_val(wqe, 16,
- LS_64(info->shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
- set_64bit_val(wqe, 32, (cq->virtual_map ? 0 : cq->cq_pa));
- set_64bit_val(wqe, 40, cq->shadow_area_pa);
- set_64bit_val(wqe, 48, (cq->virtual_map ? first_pm_pbl_idx : 0));
- set_64bit_val(wqe, 56, LS_64(cq->tph_val, I40IW_CQPSQ_TPHVAL));
-
- header = cq->cq_uk.cq_id |
- LS_64(ceq_id, I40IW_CQPSQ_CQ_CEQID) |
- LS_64(I40IW_CQP_OP_MODIFY_CQ, I40IW_CQPSQ_OPCODE) |
- LS_64(info->cq_resize, I40IW_CQPSQ_CQ_CQRESIZE) |
- LS_64(pbl_chunk_size, I40IW_CQPSQ_CQ_LPBLSIZE) |
- LS_64(check_overflow, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
- LS_64(virtual_map, I40IW_CQPSQ_CQ_VIRTMAP) |
- LS_64(cq->ceqe_mask, I40IW_CQPSQ_CQ_ENCEQEMASK) |
- LS_64(ceq_id_valid, I40IW_CQPSQ_CQ_CEQIDVALID) |
- LS_64(cq->tph_en, I40IW_CQPSQ_TPHEN) |
- LS_64(cq->cq_uk.avoid_mem_cflct, I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "CQ_MODIFY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_qp_init - initialize qp
- * @qp: sc qp
- * @info: initialization qp info
- */
-static enum i40iw_status_code i40iw_sc_qp_init(struct i40iw_sc_qp *qp,
- struct i40iw_qp_init_info *info)
-{
- u32 __iomem *wqe_alloc_reg = NULL;
- enum i40iw_status_code ret_code;
- u32 pble_obj_cnt;
- u8 wqe_size;
- u32 offset;
-
- qp->dev = info->pd->dev;
- qp->vsi = info->vsi;
- qp->sq_pa = info->sq_pa;
- qp->rq_pa = info->rq_pa;
- qp->hw_host_ctx_pa = info->host_ctx_pa;
- qp->q2_pa = info->q2_pa;
- qp->shadow_area_pa = info->shadow_area_pa;
-
- qp->q2_buf = info->q2;
- qp->pd = info->pd;
- qp->hw_host_ctx = info->host_ctx;
- offset = (qp->pd->dev->is_pf) ? I40E_PFPE_WQEALLOC : I40E_VFPE_WQEALLOC1;
- if (i40iw_get_hw_addr(qp->pd->dev))
- wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
- offset);
-
- info->qp_uk_init_info.wqe_alloc_reg = wqe_alloc_reg;
- info->qp_uk_init_info.abi_ver = qp->pd->abi_ver;
- ret_code = i40iw_qp_uk_init(&qp->qp_uk, &info->qp_uk_init_info);
- if (ret_code)
- return ret_code;
- qp->virtual_map = info->virtual_map;
-
- pble_obj_cnt = info->pd->dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
-
- if ((info->virtual_map && (info->sq_pa >= pble_obj_cnt)) ||
- (info->virtual_map && (info->rq_pa >= pble_obj_cnt)))
- return I40IW_ERR_INVALID_PBLE_INDEX;
-
- qp->llp_stream_handle = (void *)(-1);
- qp->qp_type = (info->type) ? info->type : I40IW_QP_TYPE_IWARP;
-
- qp->hw_sq_size = i40iw_get_encoded_wqe_size(qp->qp_uk.sq_ring.size,
- false);
- i40iw_debug(qp->dev, I40IW_DEBUG_WQE, "%s: hw_sq_size[%04d] sq_ring.size[%04d]\n",
- __func__, qp->hw_sq_size, qp->qp_uk.sq_ring.size);
-
- switch (qp->pd->abi_ver) {
- case 4:
- ret_code = i40iw_fragcnt_to_wqesize_rq(qp->qp_uk.max_rq_frag_cnt,
- &wqe_size);
- if (ret_code)
- return ret_code;
- break;
- case 5: /* fallthrough until next ABI version */
- default:
- if (qp->qp_uk.max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
- return I40IW_ERR_INVALID_FRAG_COUNT;
- wqe_size = I40IW_MAX_WQE_SIZE_RQ;
- break;
- }
- qp->hw_rq_size = i40iw_get_encoded_wqe_size(qp->qp_uk.rq_size *
- (wqe_size / I40IW_QP_WQE_MIN_SIZE), false);
- i40iw_debug(qp->dev, I40IW_DEBUG_WQE,
- "%s: hw_rq_size[%04d] qp_uk.rq_size[%04d] wqe_size[%04d]\n",
- __func__, qp->hw_rq_size, qp->qp_uk.rq_size, wqe_size);
- qp->sq_tph_val = info->sq_tph_val;
- qp->rq_tph_val = info->rq_tph_val;
- qp->sq_tph_en = info->sq_tph_en;
- qp->rq_tph_en = info->rq_tph_en;
- qp->rcv_tph_en = info->rcv_tph_en;
- qp->xmit_tph_en = info->xmit_tph_en;
- qp->qs_handle = qp->vsi->qos[qp->user_pri].qs_handle;
- qp->exception_lan_queue = qp->pd->dev->exception_lan_queue;
-
- return 0;
-}
-
-/**
- * i40iw_sc_qp_create - create qp
- * @qp: sc qp
- * @info: qp create info
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_qp_create(
- struct i40iw_sc_qp *qp,
- struct i40iw_create_qp_info *info,
- u64 scratch,
- bool post_sq)
-{
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
- u64 header;
-
- if ((qp->qp_uk.qp_id < I40IW_MIN_IW_QP_ID) ||
- (qp->qp_uk.qp_id > I40IW_MAX_IW_QP_ID))
- return I40IW_ERR_INVALID_QP_ID;
-
- cqp = qp->pd->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
-
- set_64bit_val(wqe, 40, qp->shadow_area_pa);
-
- header = qp->qp_uk.qp_id |
- LS_64(I40IW_CQP_OP_CREATE_QP, I40IW_CQPSQ_OPCODE) |
- LS_64((info->ord_valid ? 1 : 0), I40IW_CQPSQ_QP_ORDVALID) |
- LS_64(info->tcp_ctx_valid, I40IW_CQPSQ_QP_TOECTXVALID) |
- LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
- LS_64(qp->virtual_map, I40IW_CQPSQ_QP_VQ) |
- LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) |
- LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) |
- LS_64(info->arp_cache_idx_valid, I40IW_CQPSQ_QP_ARPTABIDXVALID) |
- LS_64(info->next_iwarp_state, I40IW_CQPSQ_QP_NEXTIWSTATE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_CREATE WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_qp_modify - modify qp cqp wqe
- * @qp: sc qp
- * @info: modify qp info
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_qp_modify(
- struct i40iw_sc_qp *qp,
- struct i40iw_modify_qp_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
- u8 term_actions = 0;
- u8 term_len = 0;
-
- cqp = qp->pd->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- if (info->next_iwarp_state == I40IW_QP_STATE_TERMINATE) {
- if (info->dont_send_fin)
- term_actions += I40IWQP_TERM_SEND_TERM_ONLY;
- if (info->dont_send_term)
- term_actions += I40IWQP_TERM_SEND_FIN_ONLY;
- if ((term_actions == I40IWQP_TERM_SEND_TERM_AND_FIN) ||
- (term_actions == I40IWQP_TERM_SEND_TERM_ONLY))
- term_len = info->termlen;
- }
-
- set_64bit_val(wqe,
- 8,
- LS_64(info->new_mss, I40IW_CQPSQ_QP_NEWMSS) |
- LS_64(term_len, I40IW_CQPSQ_QP_TERMLEN));
-
- set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
- set_64bit_val(wqe, 40, qp->shadow_area_pa);
-
- header = qp->qp_uk.qp_id |
- LS_64(I40IW_CQP_OP_MODIFY_QP, I40IW_CQPSQ_OPCODE) |
- LS_64(info->ord_valid, I40IW_CQPSQ_QP_ORDVALID) |
- LS_64(info->tcp_ctx_valid, I40IW_CQPSQ_QP_TOECTXVALID) |
- LS_64(info->cached_var_valid, I40IW_CQPSQ_QP_CACHEDVARVALID) |
- LS_64(qp->virtual_map, I40IW_CQPSQ_QP_VQ) |
- LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) |
- LS_64(info->force_loopback, I40IW_CQPSQ_QP_FORCELOOPBACK) |
- LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
- LS_64(info->mss_change, I40IW_CQPSQ_QP_MSSCHANGE) |
- LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) |
- LS_64(info->remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) |
- LS_64(term_actions, I40IW_CQPSQ_QP_TERMACT) |
- LS_64(info->reset_tcp_conn, I40IW_CQPSQ_QP_RESETCON) |
- LS_64(info->arp_cache_idx_valid, I40IW_CQPSQ_QP_ARPTABIDXVALID) |
- LS_64(info->next_iwarp_state, I40IW_CQPSQ_QP_NEXTIWSTATE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_MODIFY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_qp_destroy - cqp destroy qp
- * @qp: sc qp
- * @scratch: u64 saved to be used during cqp completion
- * @remove_hash_idx: flag if to remove hash idx
- * @ignore_mw_bnd: memory window bind flag
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_qp_destroy(
- struct i40iw_sc_qp *qp,
- u64 scratch,
- bool remove_hash_idx,
- bool ignore_mw_bnd,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
-
- i40iw_qp_rem_qos(qp);
- cqp = qp->pd->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
- set_64bit_val(wqe, 40, qp->shadow_area_pa);
-
- header = qp->qp_uk.qp_id |
- LS_64(I40IW_CQP_OP_DESTROY_QP, I40IW_CQPSQ_OPCODE) |
- LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) |
- LS_64(ignore_mw_bnd, I40IW_CQPSQ_QP_IGNOREMWBOUND) |
- LS_64(remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_DESTROY WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_qp_flush_wqes - flush qp's wqe
- * @qp: sc qp
- * @info: dlush information
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_qp_flush_wqes(
- struct i40iw_sc_qp *qp,
- struct i40iw_qp_flush_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 temp = 0;
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
- bool flush_sq = false, flush_rq = false;
-
- if (info->rq && !qp->flush_rq)
- flush_rq = true;
-
- if (info->sq && !qp->flush_sq)
- flush_sq = true;
-
- qp->flush_sq |= flush_sq;
- qp->flush_rq |= flush_rq;
- if (!flush_sq && !flush_rq) {
- if (info->ae_code != I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR)
- return 0;
- }
-
- cqp = qp->pd->dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- if (info->userflushcode) {
- if (flush_rq) {
- temp |= LS_64(info->rq_minor_code, I40IW_CQPSQ_FWQE_RQMNERR) |
- LS_64(info->rq_major_code, I40IW_CQPSQ_FWQE_RQMJERR);
- }
- if (flush_sq) {
- temp |= LS_64(info->sq_minor_code, I40IW_CQPSQ_FWQE_SQMNERR) |
- LS_64(info->sq_major_code, I40IW_CQPSQ_FWQE_SQMJERR);
- }
- }
- set_64bit_val(wqe, 16, temp);
-
- temp = (info->generate_ae) ?
- info->ae_code | LS_64(info->ae_source, I40IW_CQPSQ_FWQE_AESOURCE) : 0;
-
- set_64bit_val(wqe, 8, temp);
-
- header = qp->qp_uk.qp_id |
- LS_64(I40IW_CQP_OP_FLUSH_WQES, I40IW_CQPSQ_OPCODE) |
- LS_64(info->generate_ae, I40IW_CQPSQ_FWQE_GENERATE_AE) |
- LS_64(info->userflushcode, I40IW_CQPSQ_FWQE_USERFLCODE) |
- LS_64(flush_sq, I40IW_CQPSQ_FWQE_FLUSHSQ) |
- LS_64(flush_rq, I40IW_CQPSQ_FWQE_FLUSHRQ) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "QP_FLUSH WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_qp_upload_context - upload qp's context
- * @dev: sc device struct
- * @info: upload context info ptr for return
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_qp_upload_context(
- struct i40iw_sc_dev *dev,
- struct i40iw_upload_context_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
-
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 16, info->buf_pa);
-
- header = LS_64(info->qp_id, I40IW_CQPSQ_UCTX_QPID) |
- LS_64(I40IW_CQP_OP_UPLOAD_CONTEXT, I40IW_CQPSQ_OPCODE) |
- LS_64(info->qp_type, I40IW_CQPSQ_UCTX_QPTYPE) |
- LS_64(info->raw_format, I40IW_CQPSQ_UCTX_RAWFORMAT) |
- LS_64(info->freeze_qp, I40IW_CQPSQ_UCTX_FREEZEQP) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "QP_UPLOAD_CTX WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_qp_setctx - set qp's context
- * @qp: sc qp
- * @qp_ctx: context ptr
- * @info: ctx info
- */
-static enum i40iw_status_code i40iw_sc_qp_setctx(
- struct i40iw_sc_qp *qp,
- u64 *qp_ctx,
- struct i40iw_qp_host_ctx_info *info)
-{
- struct i40iwarp_offload_info *iw;
- struct i40iw_tcp_offload_info *tcp;
- struct i40iw_sc_vsi *vsi;
- struct i40iw_sc_dev *dev;
- u64 qw0, qw3, qw7 = 0;
-
- iw = info->iwarp_info;
- tcp = info->tcp_info;
- vsi = qp->vsi;
- dev = qp->dev;
- if (info->add_to_qoslist) {
- qp->user_pri = info->user_pri;
- i40iw_qp_add_qos(qp);
- i40iw_debug(qp->dev, I40IW_DEBUG_DCB, "%s qp[%d] UP[%d] qset[%d]\n",
- __func__, qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle);
- }
- qw0 = LS_64(qp->qp_uk.rq_wqe_size, I40IWQPC_RQWQESIZE) |
- LS_64(info->err_rq_idx_valid, I40IWQPC_ERR_RQ_IDX_VALID) |
- LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) |
- LS_64(qp->xmit_tph_en, I40IWQPC_XMITTPHEN) |
- LS_64(qp->rq_tph_en, I40IWQPC_RQTPHEN) |
- LS_64(qp->sq_tph_en, I40IWQPC_SQTPHEN) |
- LS_64(info->push_idx, I40IWQPC_PPIDX) |
- LS_64(info->push_mode_en, I40IWQPC_PMENA);
-
- set_64bit_val(qp_ctx, 8, qp->sq_pa);
- set_64bit_val(qp_ctx, 16, qp->rq_pa);
-
- qw3 = LS_64(qp->src_mac_addr_idx, I40IWQPC_SRCMACADDRIDX) |
- LS_64(qp->hw_rq_size, I40IWQPC_RQSIZE) |
- LS_64(qp->hw_sq_size, I40IWQPC_SQSIZE);
-
- set_64bit_val(qp_ctx,
- 128,
- LS_64(info->err_rq_idx, I40IWQPC_ERR_RQ_IDX));
-
- set_64bit_val(qp_ctx,
- 136,
- LS_64(info->send_cq_num, I40IWQPC_TXCQNUM) |
- LS_64(info->rcv_cq_num, I40IWQPC_RXCQNUM));
-
- set_64bit_val(qp_ctx,
- 168,
- LS_64(info->qp_compl_ctx, I40IWQPC_QPCOMPCTX));
- set_64bit_val(qp_ctx,
- 176,
- LS_64(qp->sq_tph_val, I40IWQPC_SQTPHVAL) |
- LS_64(qp->rq_tph_val, I40IWQPC_RQTPHVAL) |
- LS_64(qp->qs_handle, I40IWQPC_QSHANDLE) |
- LS_64(qp->exception_lan_queue, I40IWQPC_EXCEPTION_LAN_QUEUE));
-
- if (info->iwarp_info_valid) {
- qw0 |= LS_64(iw->ddp_ver, I40IWQPC_DDP_VER) |
- LS_64(iw->rdmap_ver, I40IWQPC_RDMAP_VER);
-
- qw7 |= LS_64(iw->pd_id, I40IWQPC_PDIDX);
- set_64bit_val(qp_ctx,
- 144,
- LS_64(qp->q2_pa, I40IWQPC_Q2ADDR) |
- LS_64(vsi->fcn_id, I40IWQPC_STAT_INDEX));
- set_64bit_val(qp_ctx,
- 152,
- LS_64(iw->last_byte_sent, I40IWQPC_LASTBYTESENT));
-
- set_64bit_val(qp_ctx,
- 160,
- LS_64(iw->ord_size, I40IWQPC_ORDSIZE) |
- LS_64(iw->ird_size, I40IWQPC_IRDSIZE) |
- LS_64(iw->wr_rdresp_en, I40IWQPC_WRRDRSPOK) |
- LS_64(iw->rd_enable, I40IWQPC_RDOK) |
- LS_64(iw->snd_mark_en, I40IWQPC_SNDMARKERS) |
- LS_64(iw->bind_en, I40IWQPC_BINDEN) |
- LS_64(iw->fast_reg_en, I40IWQPC_FASTREGEN) |
- LS_64(iw->priv_mode_en, I40IWQPC_PRIVEN) |
- LS_64((((vsi->stats_fcn_id_alloc) &&
- (dev->is_pf) && (vsi->fcn_id >= I40IW_FIRST_NON_PF_STAT)) ? 1 : 0),
- I40IWQPC_USESTATSINSTANCE) |
- LS_64(1, I40IWQPC_IWARPMODE) |
- LS_64(iw->rcv_mark_en, I40IWQPC_RCVMARKERS) |
- LS_64(iw->align_hdrs, I40IWQPC_ALIGNHDRS) |
- LS_64(iw->rcv_no_mpa_crc, I40IWQPC_RCVNOMPACRC) |
- LS_64(iw->rcv_mark_offset, I40IWQPC_RCVMARKOFFSET) |
- LS_64(iw->snd_mark_offset, I40IWQPC_SNDMARKOFFSET));
- }
- if (info->tcp_info_valid) {
- qw0 |= LS_64(tcp->ipv4, I40IWQPC_IPV4) |
- LS_64(tcp->no_nagle, I40IWQPC_NONAGLE) |
- LS_64(tcp->insert_vlan_tag, I40IWQPC_INSERTVLANTAG) |
- LS_64(tcp->time_stamp, I40IWQPC_TIMESTAMP) |
- LS_64(tcp->cwnd_inc_limit, I40IWQPC_LIMIT) |
- LS_64(tcp->drop_ooo_seg, I40IWQPC_DROPOOOSEG) |
- LS_64(tcp->dup_ack_thresh, I40IWQPC_DUPACK_THRESH);
-
- qw3 |= LS_64(tcp->ttl, I40IWQPC_TTL) |
- LS_64(tcp->src_mac_addr_idx, I40IWQPC_SRCMACADDRIDX) |
- LS_64(tcp->avoid_stretch_ack, I40IWQPC_AVOIDSTRETCHACK) |
- LS_64(tcp->tos, I40IWQPC_TOS) |
- LS_64(tcp->src_port, I40IWQPC_SRCPORTNUM) |
- LS_64(tcp->dst_port, I40IWQPC_DESTPORTNUM);
-
- qp->src_mac_addr_idx = tcp->src_mac_addr_idx;
- set_64bit_val(qp_ctx,
- 32,
- LS_64(tcp->dest_ip_addr2, I40IWQPC_DESTIPADDR2) |
- LS_64(tcp->dest_ip_addr3, I40IWQPC_DESTIPADDR3));
-
- set_64bit_val(qp_ctx,
- 40,
- LS_64(tcp->dest_ip_addr0, I40IWQPC_DESTIPADDR0) |
- LS_64(tcp->dest_ip_addr1, I40IWQPC_DESTIPADDR1));
-
- set_64bit_val(qp_ctx,
- 48,
- LS_64(tcp->snd_mss, I40IWQPC_SNDMSS) |
- LS_64(tcp->vlan_tag, I40IWQPC_VLANTAG) |
- LS_64(tcp->arp_idx, I40IWQPC_ARPIDX));
-
- qw7 |= LS_64(tcp->flow_label, I40IWQPC_FLOWLABEL) |
- LS_64(tcp->wscale, I40IWQPC_WSCALE) |
- LS_64(tcp->ignore_tcp_opt, I40IWQPC_IGNORE_TCP_OPT) |
- LS_64(tcp->ignore_tcp_uns_opt, I40IWQPC_IGNORE_TCP_UNS_OPT) |
- LS_64(tcp->tcp_state, I40IWQPC_TCPSTATE) |
- LS_64(tcp->rcv_wscale, I40IWQPC_RCVSCALE) |
- LS_64(tcp->snd_wscale, I40IWQPC_SNDSCALE);
-
- set_64bit_val(qp_ctx,
- 72,
- LS_64(tcp->time_stamp_recent, I40IWQPC_TIMESTAMP_RECENT) |
- LS_64(tcp->time_stamp_age, I40IWQPC_TIMESTAMP_AGE));
- set_64bit_val(qp_ctx,
- 80,
- LS_64(tcp->snd_nxt, I40IWQPC_SNDNXT) |
- LS_64(tcp->snd_wnd, I40IWQPC_SNDWND));
-
- set_64bit_val(qp_ctx,
- 88,
- LS_64(tcp->rcv_nxt, I40IWQPC_RCVNXT) |
- LS_64(tcp->rcv_wnd, I40IWQPC_RCVWND));
- set_64bit_val(qp_ctx,
- 96,
- LS_64(tcp->snd_max, I40IWQPC_SNDMAX) |
- LS_64(tcp->snd_una, I40IWQPC_SNDUNA));
- set_64bit_val(qp_ctx,
- 104,
- LS_64(tcp->srtt, I40IWQPC_SRTT) |
- LS_64(tcp->rtt_var, I40IWQPC_RTTVAR));
- set_64bit_val(qp_ctx,
- 112,
- LS_64(tcp->ss_thresh, I40IWQPC_SSTHRESH) |
- LS_64(tcp->cwnd, I40IWQPC_CWND));
- set_64bit_val(qp_ctx,
- 120,
- LS_64(tcp->snd_wl1, I40IWQPC_SNDWL1) |
- LS_64(tcp->snd_wl2, I40IWQPC_SNDWL2));
- set_64bit_val(qp_ctx,
- 128,
- LS_64(tcp->max_snd_window, I40IWQPC_MAXSNDWND) |
- LS_64(tcp->rexmit_thresh, I40IWQPC_REXMIT_THRESH));
- set_64bit_val(qp_ctx,
- 184,
- LS_64(tcp->local_ipaddr3, I40IWQPC_LOCAL_IPADDR3) |
- LS_64(tcp->local_ipaddr2, I40IWQPC_LOCAL_IPADDR2));
- set_64bit_val(qp_ctx,
- 192,
- LS_64(tcp->local_ipaddr1, I40IWQPC_LOCAL_IPADDR1) |
- LS_64(tcp->local_ipaddr0, I40IWQPC_LOCAL_IPADDR0));
- }
-
- set_64bit_val(qp_ctx, 0, qw0);
- set_64bit_val(qp_ctx, 24, qw3);
- set_64bit_val(qp_ctx, 56, qw7);
-
- i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "QP_HOST)CTX WQE",
- qp_ctx, I40IW_QP_CTX_SIZE);
- return 0;
-}
-
-/**
- * i40iw_sc_alloc_stag - mr stag alloc
- * @dev: sc device struct
- * @info: stag info
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_alloc_stag(
- struct i40iw_sc_dev *dev,
- struct i40iw_allocate_stag_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
- enum i40iw_page_size page_size;
-
- page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K;
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe,
- 8,
- LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID) |
- LS_64(info->total_len, I40IW_CQPSQ_STAG_STAGLEN));
- set_64bit_val(wqe,
- 16,
- LS_64(info->stag_idx, I40IW_CQPSQ_STAG_IDX));
- set_64bit_val(wqe,
- 40,
- LS_64(info->hmc_fcn_index, I40IW_CQPSQ_STAG_HMCFNIDX));
-
- header = LS_64(I40IW_CQP_OP_ALLOC_STAG, I40IW_CQPSQ_OPCODE) |
- LS_64(1, I40IW_CQPSQ_STAG_MR) |
- LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
- LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
- LS_64(page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
- LS_64(info->remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
- LS_64(info->use_hmc_fcn_index, I40IW_CQPSQ_STAG_USEHMCFNIDX) |
- LS_64(info->use_pf_rid, I40IW_CQPSQ_STAG_USEPFRID) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "ALLOC_STAG WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_mr_reg_non_shared - non-shared mr registration
- * @dev: sc device struct
- * @info: mr info
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_mr_reg_non_shared(
- struct i40iw_sc_dev *dev,
- struct i40iw_reg_ns_stag_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 temp;
- struct i40iw_sc_cqp *cqp;
- u64 header;
- u32 pble_obj_cnt;
- bool remote_access;
- u8 addr_type;
- enum i40iw_page_size page_size;
-
- page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K;
- if (info->access_rights & (I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY |
- I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY))
- remote_access = true;
- else
- remote_access = false;
-
- pble_obj_cnt = dev->hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt;
-
- if (info->chunk_size && (info->first_pm_pbl_index >= pble_obj_cnt))
- return I40IW_ERR_INVALID_PBLE_INDEX;
-
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- temp = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? (uintptr_t)info->va : info->fbo;
- set_64bit_val(wqe, 0, temp);
-
- set_64bit_val(wqe,
- 8,
- LS_64(info->total_len, I40IW_CQPSQ_STAG_STAGLEN) |
- LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID));
-
- set_64bit_val(wqe,
- 16,
- LS_64(info->stag_key, I40IW_CQPSQ_STAG_KEY) |
- LS_64(info->stag_idx, I40IW_CQPSQ_STAG_IDX));
- if (!info->chunk_size) {
- set_64bit_val(wqe, 32, info->reg_addr_pa);
- set_64bit_val(wqe, 48, 0);
- } else {
- set_64bit_val(wqe, 32, 0);
- set_64bit_val(wqe, 48, info->first_pm_pbl_index);
- }
- set_64bit_val(wqe, 40, info->hmc_fcn_index);
- set_64bit_val(wqe, 56, 0);
-
- addr_type = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? 1 : 0;
- header = LS_64(I40IW_CQP_OP_REG_MR, I40IW_CQPSQ_OPCODE) |
- LS_64(1, I40IW_CQPSQ_STAG_MR) |
- LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
- LS_64(page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
- LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
- LS_64(remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
- LS_64(addr_type, I40IW_CQPSQ_STAG_VABASEDTO) |
- LS_64(info->use_hmc_fcn_index, I40IW_CQPSQ_STAG_USEHMCFNIDX) |
- LS_64(info->use_pf_rid, I40IW_CQPSQ_STAG_USEPFRID) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "MR_REG_NS WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_mr_reg_shared - registered shared memory region
- * @dev: sc device struct
- * @info: info for shared memory registeration
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_mr_reg_shared(
- struct i40iw_sc_dev *dev,
- struct i40iw_register_shared_stag *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 temp, va64, fbo, header;
- u32 va32;
- bool remote_access;
- u8 addr_type;
-
- if (info->access_rights & (I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY |
- I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY))
- remote_access = true;
- else
- remote_access = false;
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- va64 = (uintptr_t)(info->va);
- va32 = (u32)(va64 & 0x00000000FFFFFFFF);
- fbo = (u64)(va32 & (4096 - 1));
-
- set_64bit_val(wqe,
- 0,
- (info->addr_type == I40IW_ADDR_TYPE_VA_BASED ? (uintptr_t)info->va : fbo));
-
- set_64bit_val(wqe,
- 8,
- LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID));
- temp = LS_64(info->new_stag_key, I40IW_CQPSQ_STAG_KEY) |
- LS_64(info->new_stag_idx, I40IW_CQPSQ_STAG_IDX) |
- LS_64(info->parent_stag_idx, I40IW_CQPSQ_STAG_PARENTSTAGIDX);
- set_64bit_val(wqe, 16, temp);
-
- addr_type = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? 1 : 0;
- header = LS_64(I40IW_CQP_OP_REG_SMR, I40IW_CQPSQ_OPCODE) |
- LS_64(1, I40IW_CQPSQ_STAG_MR) |
- LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
- LS_64(remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
- LS_64(addr_type, I40IW_CQPSQ_STAG_VABASEDTO) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "MR_REG_SHARED WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_dealloc_stag - deallocate stag
- * @dev: sc device struct
- * @info: dealloc stag info
- * @scratch: u64 saved to be used during cqp completion
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_dealloc_stag(
- struct i40iw_sc_dev *dev,
- struct i40iw_dealloc_stag_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 header;
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
-
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe,
- 8,
- LS_64(info->pd_id, I40IW_CQPSQ_STAG_PDID));
- set_64bit_val(wqe,
- 16,
- LS_64(info->stag_idx, I40IW_CQPSQ_STAG_IDX));
-
- header = LS_64(I40IW_CQP_OP_DEALLOC_STAG, I40IW_CQPSQ_OPCODE) |
- LS_64(info->mr, I40IW_CQPSQ_STAG_MR) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "DEALLOC_STAG WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_query_stag - query hardware for stag
- * @dev: sc device struct
- * @scratch: u64 saved to be used during cqp completion
- * @stag_index: stag index for query
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_query_stag(struct i40iw_sc_dev *dev,
- u64 scratch,
- u32 stag_index,
- bool post_sq)
-{
- u64 header;
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
-
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe,
- 16,
- LS_64(stag_index, I40IW_CQPSQ_QUERYSTAG_IDX));
-
- header = LS_64(I40IW_CQP_OP_QUERY_STAG, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "QUERY_STAG WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_mw_alloc - mw allocate
- * @dev: sc device struct
- * @scratch: u64 saved to be used during cqp completion
- * @mw_stag_index:stag index
- * @pd_id: pd is for this mw
- * @post_sq: flag for cqp db to ring
- */
-static enum i40iw_status_code i40iw_sc_mw_alloc(
- struct i40iw_sc_dev *dev,
- u64 scratch,
- u32 mw_stag_index,
- u16 pd_id,
- bool post_sq)
-{
- u64 header;
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
-
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe, 8, LS_64(pd_id, I40IW_CQPSQ_STAG_PDID));
- set_64bit_val(wqe,
- 16,
- LS_64(mw_stag_index, I40IW_CQPSQ_STAG_IDX));
-
- header = LS_64(I40IW_CQP_OP_ALLOC_STAG, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_WQE, "MW_ALLOC WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_mr_fast_register - Posts RDMA fast register mr WR to iwarp qp
- * @qp: sc qp struct
- * @info: fast mr info
- * @post_sq: flag for cqp db to ring
- */
-enum i40iw_status_code i40iw_sc_mr_fast_register(
- struct i40iw_sc_qp *qp,
- struct i40iw_fast_reg_stag_info *info,
- bool post_sq)
-{
- u64 temp, header;
- u64 *wqe;
- u32 wqe_idx;
- enum i40iw_page_size page_size;
-
- page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K;
- wqe = i40iw_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, I40IW_QP_WQE_MIN_SIZE,
- 0, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- i40iw_debug(qp->dev, I40IW_DEBUG_MR, "%s: wr_id[%llxh] wqe_idx[%04d] location[%p]\n",
- __func__, info->wr_id, wqe_idx,
- &qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid);
- temp = (info->addr_type == I40IW_ADDR_TYPE_VA_BASED) ? (uintptr_t)info->va : info->fbo;
- set_64bit_val(wqe, 0, temp);
-
- temp = RS_64(info->first_pm_pbl_index >> 16, I40IWQPSQ_FIRSTPMPBLIDXHI);
- set_64bit_val(wqe,
- 8,
- LS_64(temp, I40IWQPSQ_FIRSTPMPBLIDXHI) |
- LS_64(info->reg_addr_pa >> I40IWQPSQ_PBLADDR_SHIFT, I40IWQPSQ_PBLADDR));
-
- set_64bit_val(wqe,
- 16,
- info->total_len |
- LS_64(info->first_pm_pbl_index, I40IWQPSQ_FIRSTPMPBLIDXLO));
-
- header = LS_64(info->stag_key, I40IWQPSQ_STAGKEY) |
- LS_64(info->stag_idx, I40IWQPSQ_STAGINDEX) |
- LS_64(I40IWQP_OP_FAST_REGISTER, I40IWQPSQ_OPCODE) |
- LS_64(info->chunk_size, I40IWQPSQ_LPBLSIZE) |
- LS_64(page_size, I40IWQPSQ_HPAGESIZE) |
- LS_64(info->access_rights, I40IWQPSQ_STAGRIGHTS) |
- LS_64(info->addr_type, I40IWQPSQ_VABASEDTO) |
- LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
- LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "FAST_REG WQE",
- wqe, I40IW_QP_WQE_MIN_SIZE);
-
- if (post_sq)
- i40iw_qp_post_wr(&qp->qp_uk);
- return 0;
-}
-
-/**
- * i40iw_sc_send_lsmm - send last streaming mode message
- * @qp: sc qp struct
- * @lsmm_buf: buffer with lsmm message
- * @size: size of lsmm buffer
- * @stag: stag of lsmm buffer
- */
-static void i40iw_sc_send_lsmm(struct i40iw_sc_qp *qp,
- void *lsmm_buf,
- u32 size,
- i40iw_stag stag)
-{
- u64 *wqe;
- u64 header;
- struct i40iw_qp_uk *qp_uk;
-
- qp_uk = &qp->qp_uk;
- wqe = qp_uk->sq_base->elem;
-
- set_64bit_val(wqe, 0, (uintptr_t)lsmm_buf);
-
- set_64bit_val(wqe, 8, (size | LS_64(stag, I40IWQPSQ_FRAG_STAG)));
-
- set_64bit_val(wqe, 16, 0);
-
- header = LS_64(I40IWQP_OP_RDMA_SEND, I40IWQPSQ_OPCODE) |
- LS_64(1, I40IWQPSQ_STREAMMODE) |
- LS_64(1, I40IWQPSQ_WAITFORRCVPDU) |
- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(qp->dev, I40IW_DEBUG_QP, "SEND_LSMM WQE",
- wqe, I40IW_QP_WQE_MIN_SIZE);
-}
-
-/**
- * i40iw_sc_send_lsmm_nostag - for privilege qp
- * @qp: sc qp struct
- * @lsmm_buf: buffer with lsmm message
- * @size: size of lsmm buffer
- */
-static void i40iw_sc_send_lsmm_nostag(struct i40iw_sc_qp *qp,
- void *lsmm_buf,
- u32 size)
-{
- u64 *wqe;
- u64 header;
- struct i40iw_qp_uk *qp_uk;
-
- qp_uk = &qp->qp_uk;
- wqe = qp_uk->sq_base->elem;
-
- set_64bit_val(wqe, 0, (uintptr_t)lsmm_buf);
-
- set_64bit_val(wqe, 8, size);
-
- set_64bit_val(wqe, 16, 0);
-
- header = LS_64(I40IWQP_OP_RDMA_SEND, I40IWQPSQ_OPCODE) |
- LS_64(1, I40IWQPSQ_STREAMMODE) |
- LS_64(1, I40IWQPSQ_WAITFORRCVPDU) |
- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "SEND_LSMM_NOSTAG WQE",
- wqe, I40IW_QP_WQE_MIN_SIZE);
-}
-
-/**
- * i40iw_sc_send_rtt - send last read0 or write0
- * @qp: sc qp struct
- * @read: Do read0 or write0
- */
-static void i40iw_sc_send_rtt(struct i40iw_sc_qp *qp, bool read)
-{
- u64 *wqe;
- u64 header;
- struct i40iw_qp_uk *qp_uk;
-
- qp_uk = &qp->qp_uk;
- wqe = qp_uk->sq_base->elem;
-
- set_64bit_val(wqe, 0, 0);
- set_64bit_val(wqe, 8, 0);
- set_64bit_val(wqe, 16, 0);
- if (read) {
- header = LS_64(0x1234, I40IWQPSQ_REMSTAG) |
- LS_64(I40IWQP_OP_RDMA_READ, I40IWQPSQ_OPCODE) |
- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
- set_64bit_val(wqe, 8, ((u64)0xabcd << 32));
- } else {
- header = LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
- }
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(qp->dev, I40IW_DEBUG_WQE, "RTR WQE",
- wqe, I40IW_QP_WQE_MIN_SIZE);
-}
-
-/**
- * i40iw_sc_post_wqe0 - send wqe with opcode
- * @qp: sc qp struct
- * @opcode: opcode to use for wqe0
- */
-static enum i40iw_status_code i40iw_sc_post_wqe0(struct i40iw_sc_qp *qp, u8 opcode)
-{
- u64 *wqe;
- u64 header;
- struct i40iw_qp_uk *qp_uk;
-
- qp_uk = &qp->qp_uk;
- wqe = qp_uk->sq_base->elem;
-
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
- switch (opcode) {
- case I40IWQP_OP_NOP:
- set_64bit_val(wqe, 0, 0);
- set_64bit_val(wqe, 8, 0);
- set_64bit_val(wqe, 16, 0);
- header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
- break;
- case I40IWQP_OP_RDMA_SEND:
- set_64bit_val(wqe, 0, 0);
- set_64bit_val(wqe, 8, 0);
- set_64bit_val(wqe, 16, 0);
- header = LS_64(I40IWQP_OP_RDMA_SEND, I40IWQPSQ_OPCODE) |
- LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID) |
- LS_64(1, I40IWQPSQ_STREAMMODE) |
- LS_64(1, I40IWQPSQ_WAITFORRCVPDU);
-
- i40iw_insert_wqe_hdr(wqe, header);
- break;
- default:
- i40iw_debug(qp->dev, I40IW_DEBUG_QP, "%s: Invalid WQE zero opcode\n",
- __func__);
- break;
- }
- return 0;
-}
-
-/**
- * i40iw_sc_init_iw_hmc() - queries fpm values using cqp and populates hmc_info
- * @dev : ptr to i40iw_dev struct
- * @hmc_fn_id: hmc function id
- */
-enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev, u8 hmc_fn_id)
-{
- struct i40iw_hmc_info *hmc_info;
- struct i40iw_dma_mem query_fpm_mem;
- struct i40iw_virt_mem virt_mem;
- struct i40iw_vfdev *vf_dev = NULL;
- u32 mem_size;
- enum i40iw_status_code ret_code = 0;
- bool poll_registers = true;
- u16 iw_vf_idx;
- u8 wait_type;
-
- if (hmc_fn_id >= I40IW_MAX_VF_FPM_ID ||
- (dev->hmc_fn_id != hmc_fn_id && hmc_fn_id < I40IW_FIRST_VF_FPM_ID))
- return I40IW_ERR_INVALID_HMCFN_ID;
-
- i40iw_debug(dev, I40IW_DEBUG_HMC, "hmc_fn_id %u, dev->hmc_fn_id %u\n", hmc_fn_id,
- dev->hmc_fn_id);
- if (hmc_fn_id == dev->hmc_fn_id) {
- hmc_info = dev->hmc_info;
- query_fpm_mem.pa = dev->fpm_query_buf_pa;
- query_fpm_mem.va = dev->fpm_query_buf;
- } else {
- vf_dev = i40iw_vfdev_from_fpm(dev, hmc_fn_id);
- if (!vf_dev)
- return I40IW_ERR_INVALID_VF_ID;
-
- hmc_info = &vf_dev->hmc_info;
- iw_vf_idx = vf_dev->iw_vf_idx;
- i40iw_debug(dev, I40IW_DEBUG_HMC, "vf_dev %p, hmc_info %p, hmc_obj %p\n", vf_dev,
- hmc_info, hmc_info->hmc_obj);
- if (!vf_dev->fpm_query_buf) {
- if (!dev->vf_fpm_query_buf[iw_vf_idx].va) {
- ret_code = i40iw_alloc_query_fpm_buf(dev,
- &dev->vf_fpm_query_buf[iw_vf_idx]);
- if (ret_code)
- return ret_code;
- }
- vf_dev->fpm_query_buf = dev->vf_fpm_query_buf[iw_vf_idx].va;
- vf_dev->fpm_query_buf_pa = dev->vf_fpm_query_buf[iw_vf_idx].pa;
- }
- query_fpm_mem.pa = vf_dev->fpm_query_buf_pa;
- query_fpm_mem.va = vf_dev->fpm_query_buf;
- /**
- * It is HARDWARE specific:
- * this call is done by PF for VF and
- * i40iw_sc_query_fpm_values needs ccq poll
- * because PF ccq is already created.
- */
- poll_registers = false;
- }
-
- hmc_info->hmc_fn_id = hmc_fn_id;
-
- if (hmc_fn_id != dev->hmc_fn_id) {
- ret_code =
- i40iw_cqp_query_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id);
- } else {
- wait_type = poll_registers ? (u8)I40IW_CQP_WAIT_POLL_REGS :
- (u8)I40IW_CQP_WAIT_POLL_CQ;
-
- ret_code = i40iw_sc_query_fpm_values(
- dev->cqp,
- 0,
- hmc_info->hmc_fn_id,
- &query_fpm_mem,
- true,
- wait_type);
- }
- if (ret_code)
- return ret_code;
-
- /* parse the fpm_query_buf and fill hmc obj info */
- ret_code =
- i40iw_sc_parse_fpm_query_buf((u64 *)query_fpm_mem.va,
- hmc_info,
- &dev->hmc_fpm_misc);
- if (ret_code)
- return ret_code;
- i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "QUERY FPM BUFFER",
- query_fpm_mem.va, I40IW_QUERY_FPM_BUF_SIZE);
-
- if (hmc_fn_id != dev->hmc_fn_id) {
- i40iw_cqp_commit_fpm_values_cmd(dev, &query_fpm_mem, hmc_fn_id);
-
- /* parse the fpm_commit_buf and fill hmc obj info */
- i40iw_sc_parse_fpm_commit_buf((u64 *)query_fpm_mem.va, hmc_info->hmc_obj, &hmc_info->sd_table.sd_cnt);
- mem_size = sizeof(struct i40iw_hmc_sd_entry) *
- (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index);
- ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
- if (ret_code)
- return ret_code;
- hmc_info->sd_table.sd_entry = virt_mem.va;
- }
-
- /* fill size of objects which are fixed */
- hmc_info->hmc_obj[I40IW_HMC_IW_XFFL].size = 4;
- hmc_info->hmc_obj[I40IW_HMC_IW_Q1FL].size = 4;
- hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size = 8;
- hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].size = 8192;
- hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].max_cnt = 1;
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_configure_iw_fpm() - commits hmc obj cnt values using cqp command and
- * populates fpm base address in hmc_info
- * @dev : ptr to i40iw_dev struct
- * @hmc_fn_id: hmc function id
- */
-static enum i40iw_status_code i40iw_sc_configure_iw_fpm(struct i40iw_sc_dev *dev,
- u8 hmc_fn_id)
-{
- struct i40iw_hmc_info *hmc_info;
- struct i40iw_hmc_obj_info *obj_info;
- u64 *buf;
- struct i40iw_dma_mem commit_fpm_mem;
- u32 i, j;
- enum i40iw_status_code ret_code = 0;
- bool poll_registers = true;
- u8 wait_type;
-
- if (hmc_fn_id >= I40IW_MAX_VF_FPM_ID ||
- (dev->hmc_fn_id != hmc_fn_id && hmc_fn_id < I40IW_FIRST_VF_FPM_ID))
- return I40IW_ERR_INVALID_HMCFN_ID;
-
- if (hmc_fn_id == dev->hmc_fn_id) {
- hmc_info = dev->hmc_info;
- } else {
- hmc_info = i40iw_vf_hmcinfo_from_fpm(dev, hmc_fn_id);
- poll_registers = false;
- }
- if (!hmc_info)
- return I40IW_ERR_BAD_PTR;
-
- obj_info = hmc_info->hmc_obj;
- buf = dev->fpm_commit_buf;
-
- /* copy cnt values in commit buf */
- for (i = I40IW_HMC_IW_QP, j = 0; i <= I40IW_HMC_IW_PBLE;
- i++, j += 8)
- set_64bit_val(buf, j, (u64)obj_info[i].cnt);
-
- set_64bit_val(buf, 40, 0); /* APBVT rsvd */
-
- commit_fpm_mem.pa = dev->fpm_commit_buf_pa;
- commit_fpm_mem.va = dev->fpm_commit_buf;
- wait_type = poll_registers ? (u8)I40IW_CQP_WAIT_POLL_REGS :
- (u8)I40IW_CQP_WAIT_POLL_CQ;
- ret_code = i40iw_sc_commit_fpm_values(
- dev->cqp,
- 0,
- hmc_info->hmc_fn_id,
- &commit_fpm_mem,
- true,
- wait_type);
-
- /* parse the fpm_commit_buf and fill hmc obj info */
- if (!ret_code)
- ret_code = i40iw_sc_parse_fpm_commit_buf(dev->fpm_commit_buf,
- hmc_info->hmc_obj,
- &hmc_info->sd_table.sd_cnt);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_HMC, "COMMIT FPM BUFFER",
- commit_fpm_mem.va, I40IW_COMMIT_FPM_BUF_SIZE);
-
- return ret_code;
-}
-
-/**
- * cqp_sds_wqe_fill - fill cqp wqe doe sd
- * @cqp: struct for cqp hw
- * @info; sd info for wqe
- * @scratch: u64 saved to be used during cqp completion
- */
-static enum i40iw_status_code cqp_sds_wqe_fill(struct i40iw_sc_cqp *cqp,
- struct i40iw_update_sds_info *info,
- u64 scratch)
-{
- u64 data;
- u64 header;
- u64 *wqe;
- int mem_entries, wqe_entries;
- struct i40iw_dma_mem *sdbuf = &cqp->sdbuf;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- I40IW_CQP_INIT_WQE(wqe);
- wqe_entries = (info->cnt > 3) ? 3 : info->cnt;
- mem_entries = info->cnt - wqe_entries;
-
- header = LS_64(I40IW_CQP_OP_UPDATE_PE_SDS, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID) |
- LS_64(mem_entries, I40IW_CQPSQ_UPESD_ENTRY_COUNT);
-
- if (mem_entries) {
- memcpy(sdbuf->va, &info->entry[3], (mem_entries << 4));
- data = sdbuf->pa;
- } else {
- data = 0;
- }
- data |= LS_64(info->hmc_fn_id, I40IW_CQPSQ_UPESD_HMCFNID);
-
- set_64bit_val(wqe, 16, data);
-
- switch (wqe_entries) {
- case 3:
- set_64bit_val(wqe, 48,
- (LS_64(info->entry[2].cmd, I40IW_CQPSQ_UPESD_SDCMD) |
- LS_64(1, I40IW_CQPSQ_UPESD_ENTRY_VALID)));
-
- set_64bit_val(wqe, 56, info->entry[2].data);
- /* fallthrough */
- case 2:
- set_64bit_val(wqe, 32,
- (LS_64(info->entry[1].cmd, I40IW_CQPSQ_UPESD_SDCMD) |
- LS_64(1, I40IW_CQPSQ_UPESD_ENTRY_VALID)));
-
- set_64bit_val(wqe, 40, info->entry[1].data);
- /* fallthrough */
- case 1:
- set_64bit_val(wqe, 0,
- LS_64(info->entry[0].cmd, I40IW_CQPSQ_UPESD_SDCMD));
-
- set_64bit_val(wqe, 8, info->entry[0].data);
- break;
- default:
- break;
- }
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "UPDATE_PE_SDS WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
- return 0;
-}
-
-/**
- * i40iw_update_pe_sds - cqp wqe for sd
- * @dev: ptr to i40iw_dev struct
- * @info: sd info for sd's
- * @scratch: u64 saved to be used during cqp completion
- */
-static enum i40iw_status_code i40iw_update_pe_sds(struct i40iw_sc_dev *dev,
- struct i40iw_update_sds_info *info,
- u64 scratch)
-{
- struct i40iw_sc_cqp *cqp = dev->cqp;
- enum i40iw_status_code ret_code;
-
- ret_code = cqp_sds_wqe_fill(cqp, info, scratch);
- if (!ret_code)
- i40iw_sc_cqp_post_sq(cqp);
-
- return ret_code;
-}
-
-/**
- * i40iw_update_sds_noccq - update sd before ccq created
- * @dev: sc device struct
- * @info: sd info for sd's
- */
-enum i40iw_status_code i40iw_update_sds_noccq(struct i40iw_sc_dev *dev,
- struct i40iw_update_sds_info *info)
-{
- u32 error, val, tail;
- struct i40iw_sc_cqp *cqp = dev->cqp;
- enum i40iw_status_code ret_code;
-
- ret_code = cqp_sds_wqe_fill(cqp, info, 0);
- if (ret_code)
- return ret_code;
- i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
- if (error)
- return I40IW_ERR_CQP_COMPL_ERROR;
-
- i40iw_sc_cqp_post_sq(cqp);
- ret_code = i40iw_cqp_poll_registers(cqp, tail, I40IW_DONE_COUNT);
-
- return ret_code;
-}
-
-/**
- * i40iw_sc_suspend_qp - suspend qp for param change
- * @cqp: struct for cqp hw
- * @qp: sc qp struct
- * @scratch: u64 saved to be used during cqp completion
- */
-enum i40iw_status_code i40iw_sc_suspend_qp(struct i40iw_sc_cqp *cqp,
- struct i40iw_sc_qp *qp,
- u64 scratch)
-{
- u64 header;
- u64 *wqe;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- header = LS_64(qp->qp_uk.qp_id, I40IW_CQPSQ_SUSPENDQP_QPID) |
- LS_64(I40IW_CQP_OP_SUSPEND_QP, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "SUSPEND_QP WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_resume_qp - resume qp after suspend
- * @cqp: struct for cqp hw
- * @qp: sc qp struct
- * @scratch: u64 saved to be used during cqp completion
- */
-enum i40iw_status_code i40iw_sc_resume_qp(struct i40iw_sc_cqp *cqp,
- struct i40iw_sc_qp *qp,
- u64 scratch)
-{
- u64 header;
- u64 *wqe;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe,
- 16,
- LS_64(qp->qs_handle, I40IW_CQPSQ_RESUMEQP_QSHANDLE));
-
- header = LS_64(qp->qp_uk.qp_id, I40IW_CQPSQ_RESUMEQP_QPID) |
- LS_64(I40IW_CQP_OP_RESUME_QP, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "RESUME_QP WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-/**
- * i40iw_sc_static_hmc_pages_allocated - cqp wqe to allocate hmc pages
- * @cqp: struct for cqp hw
- * @scratch: u64 saved to be used during cqp completion
- * @hmc_fn_id: hmc function id
- * @post_sq: flag for cqp db to ring
- * @poll_registers: flag to poll register for cqp completion
- */
-enum i40iw_status_code i40iw_sc_static_hmc_pages_allocated(
- struct i40iw_sc_cqp *cqp,
- u64 scratch,
- u8 hmc_fn_id,
- bool post_sq,
- bool poll_registers)
-{
- u64 header;
- u64 *wqe;
- u32 tail, val, error;
- enum i40iw_status_code ret_code = 0;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
- set_64bit_val(wqe,
- 16,
- LS_64(hmc_fn_id, I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID));
-
- header = LS_64(I40IW_CQP_OP_SHMC_PAGES_ALLOCATED, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- i40iw_insert_wqe_hdr(wqe, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "SHMC_PAGES_ALLOCATED WQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
- i40iw_get_cqp_reg_info(cqp, &val, &tail, &error);
- if (error) {
- ret_code = I40IW_ERR_CQP_COMPL_ERROR;
- return ret_code;
- }
- if (post_sq) {
- i40iw_sc_cqp_post_sq(cqp);
- if (poll_registers)
- /* check for cqp sq tail update */
- ret_code = i40iw_cqp_poll_registers(cqp, tail, 1000);
- else
- ret_code = i40iw_sc_poll_for_cqp_op_done(cqp,
- I40IW_CQP_OP_SHMC_PAGES_ALLOCATED,
- NULL);
- }
-
- return ret_code;
-}
-
-/**
- * i40iw_ring_full - check if cqp ring is full
- * @cqp: struct for cqp hw
- */
-static bool i40iw_ring_full(struct i40iw_sc_cqp *cqp)
-{
- return I40IW_RING_FULL_ERR(cqp->sq_ring);
-}
-
-/**
- * i40iw_est_sd - returns approximate number of SDs for HMC
- * @dev: sc device struct
- * @hmc_info: hmc structure, size and count for HMC objects
- */
-static u64 i40iw_est_sd(struct i40iw_sc_dev *dev, struct i40iw_hmc_info *hmc_info)
-{
- int i;
- u64 size = 0;
- u64 sd;
-
- for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_PBLE; i++)
- size += hmc_info->hmc_obj[i].cnt * hmc_info->hmc_obj[i].size;
-
- if (dev->is_pf)
- size += hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt * hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size;
-
- if (size & 0x1FFFFF)
- sd = (size >> 21) + 1; /* add 1 for remainder */
- else
- sd = size >> 21;
-
- if (!dev->is_pf) {
- /* 2MB alignment for VF PBLE HMC */
- size = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt * hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size;
- if (size & 0x1FFFFF)
- sd += (size >> 21) + 1; /* add 1 for remainder */
- else
- sd += size >> 21;
- }
-
- return sd;
-}
-
-/**
- * i40iw_config_fpm_values - configure HMC objects
- * @dev: sc device struct
- * @qp_count: desired qp count
- */
-enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_count)
-{
- struct i40iw_virt_mem virt_mem;
- u32 i, mem_size;
- u32 qpwantedoriginal, qpwanted, mrwanted, pblewanted;
- u32 powerof2;
- u64 sd_needed;
- u32 loop_count = 0;
-
- struct i40iw_hmc_info *hmc_info;
- struct i40iw_hmc_fpm_misc *hmc_fpm_misc;
- enum i40iw_status_code ret_code = 0;
-
- hmc_info = dev->hmc_info;
- hmc_fpm_misc = &dev->hmc_fpm_misc;
-
- ret_code = i40iw_sc_init_iw_hmc(dev, dev->hmc_fn_id);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "i40iw_sc_init_iw_hmc returned error_code = %d\n",
- ret_code);
- return ret_code;
- }
-
- for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
- hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
- sd_needed = i40iw_est_sd(dev, hmc_info);
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s: FW initial max sd_count[%08lld] first_sd_index[%04d]\n",
- __func__, sd_needed, hmc_info->first_sd_index);
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s: sd count %d where max sd is %d\n",
- __func__, hmc_info->sd_table.sd_cnt,
- hmc_fpm_misc->max_sds);
-
- qpwanted = min(qp_count, hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt);
- qpwantedoriginal = qpwanted;
- mrwanted = hmc_info->hmc_obj[I40IW_HMC_IW_MR].max_cnt;
- pblewanted = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].max_cnt;
-
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "req_qp=%d max_sd=%d, max_qp = %d, max_cq=%d, max_mr=%d, max_pble=%d\n",
- qp_count, hmc_fpm_misc->max_sds,
- hmc_info->hmc_obj[I40IW_HMC_IW_QP].max_cnt,
- hmc_info->hmc_obj[I40IW_HMC_IW_CQ].max_cnt,
- hmc_info->hmc_obj[I40IW_HMC_IW_MR].max_cnt,
- hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].max_cnt);
-
- do {
- ++loop_count;
- hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt = qpwanted;
- hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt =
- min(2 * qpwanted, hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt);
- hmc_info->hmc_obj[I40IW_HMC_IW_SRQ].cnt = 0x00; /* Reserved */
- hmc_info->hmc_obj[I40IW_HMC_IW_HTE].cnt =
- qpwanted * hmc_fpm_misc->ht_multiplier;
- hmc_info->hmc_obj[I40IW_HMC_IW_ARP].cnt =
- hmc_info->hmc_obj[I40IW_HMC_IW_ARP].max_cnt;
- hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].cnt = 1;
- hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt = mrwanted;
-
- hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt = I40IW_MAX_WQ_ENTRIES * qpwanted;
- hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt = 4 * I40IW_MAX_IRD_SIZE * qpwanted;
- hmc_info->hmc_obj[I40IW_HMC_IW_XFFL].cnt =
- hmc_info->hmc_obj[I40IW_HMC_IW_XF].cnt / hmc_fpm_misc->xf_block_size;
- hmc_info->hmc_obj[I40IW_HMC_IW_Q1FL].cnt =
- hmc_info->hmc_obj[I40IW_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size;
- hmc_info->hmc_obj[I40IW_HMC_IW_TIMER].cnt =
- ((qpwanted) / 512 + 1) * hmc_fpm_misc->timer_bucket;
- hmc_info->hmc_obj[I40IW_HMC_IW_FSIMC].cnt = 0x00;
- hmc_info->hmc_obj[I40IW_HMC_IW_FSIAV].cnt = 0x00;
- hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt = pblewanted;
-
- /* How much memory is needed for all the objects. */
- sd_needed = i40iw_est_sd(dev, hmc_info);
- if ((loop_count > 1000) ||
- ((!(loop_count % 10)) &&
- (qpwanted > qpwantedoriginal * 2 / 3))) {
- if (qpwanted > FPM_MULTIPLIER) {
- qpwanted -= FPM_MULTIPLIER;
- powerof2 = 1;
- while (powerof2 < qpwanted)
- powerof2 *= 2;
- powerof2 /= 2;
- qpwanted = powerof2;
- } else {
- qpwanted /= 2;
- }
- }
- if (mrwanted > FPM_MULTIPLIER * 10)
- mrwanted -= FPM_MULTIPLIER * 10;
- if (pblewanted > FPM_MULTIPLIER * 1000)
- pblewanted -= FPM_MULTIPLIER * 1000;
- } while (sd_needed > hmc_fpm_misc->max_sds && loop_count < 2000);
-
- sd_needed = i40iw_est_sd(dev, hmc_info);
-
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "loop_cnt=%d, sd_needed=%lld, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d\n",
- loop_count, sd_needed,
- hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt,
- hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt,
- hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt,
- hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt);
-
- ret_code = i40iw_sc_configure_iw_fpm(dev, dev->hmc_fn_id);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "configure_iw_fpm returned error_code[x%08X]\n",
- i40iw_rd32(dev->hw, dev->is_pf ? I40E_PFPE_CQPERRCODES : I40E_VFPE_CQPERRCODES1));
- return ret_code;
- }
-
- mem_size = sizeof(struct i40iw_hmc_sd_entry) *
- (hmc_info->sd_table.sd_cnt + hmc_info->first_sd_index + 1);
- ret_code = i40iw_allocate_virt_mem(dev->hw, &virt_mem, mem_size);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s: failed to allocate memory for sd_entry buffer\n",
- __func__);
- return ret_code;
- }
- hmc_info->sd_table.sd_entry = virt_mem.va;
-
- return ret_code;
-}
-
-/**
- * i40iw_exec_cqp_cmd - execute cqp cmd when wqe are available
- * @dev: rdma device
- * @pcmdinfo: cqp command info
- */
-static enum i40iw_status_code i40iw_exec_cqp_cmd(struct i40iw_sc_dev *dev,
- struct cqp_commands_info *pcmdinfo)
-{
- enum i40iw_status_code status;
- struct i40iw_dma_mem values_mem;
-
- dev->cqp_cmd_stats[pcmdinfo->cqp_cmd]++;
- switch (pcmdinfo->cqp_cmd) {
- case OP_DELETE_LOCAL_MAC_IPADDR_ENTRY:
- status = i40iw_sc_del_local_mac_ipaddr_entry(
- pcmdinfo->in.u.del_local_mac_ipaddr_entry.cqp,
- pcmdinfo->in.u.del_local_mac_ipaddr_entry.scratch,
- pcmdinfo->in.u.del_local_mac_ipaddr_entry.entry_idx,
- pcmdinfo->in.u.del_local_mac_ipaddr_entry.ignore_ref_count,
- pcmdinfo->post_sq);
- break;
- case OP_CEQ_DESTROY:
- status = i40iw_sc_ceq_destroy(pcmdinfo->in.u.ceq_destroy.ceq,
- pcmdinfo->in.u.ceq_destroy.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_AEQ_DESTROY:
- status = i40iw_sc_aeq_destroy(pcmdinfo->in.u.aeq_destroy.aeq,
- pcmdinfo->in.u.aeq_destroy.scratch,
- pcmdinfo->post_sq);
-
- break;
- case OP_DELETE_ARP_CACHE_ENTRY:
- status = i40iw_sc_del_arp_cache_entry(
- pcmdinfo->in.u.del_arp_cache_entry.cqp,
- pcmdinfo->in.u.del_arp_cache_entry.scratch,
- pcmdinfo->in.u.del_arp_cache_entry.arp_index,
- pcmdinfo->post_sq);
- break;
- case OP_MANAGE_APBVT_ENTRY:
- status = i40iw_sc_manage_apbvt_entry(
- pcmdinfo->in.u.manage_apbvt_entry.cqp,
- &pcmdinfo->in.u.manage_apbvt_entry.info,
- pcmdinfo->in.u.manage_apbvt_entry.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_CEQ_CREATE:
- status = i40iw_sc_ceq_create(pcmdinfo->in.u.ceq_create.ceq,
- pcmdinfo->in.u.ceq_create.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_AEQ_CREATE:
- status = i40iw_sc_aeq_create(pcmdinfo->in.u.aeq_create.aeq,
- pcmdinfo->in.u.aeq_create.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_ALLOC_LOCAL_MAC_IPADDR_ENTRY:
- status = i40iw_sc_alloc_local_mac_ipaddr_entry(
- pcmdinfo->in.u.alloc_local_mac_ipaddr_entry.cqp,
- pcmdinfo->in.u.alloc_local_mac_ipaddr_entry.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_ADD_LOCAL_MAC_IPADDR_ENTRY:
- status = i40iw_sc_add_local_mac_ipaddr_entry(
- pcmdinfo->in.u.add_local_mac_ipaddr_entry.cqp,
- &pcmdinfo->in.u.add_local_mac_ipaddr_entry.info,
- pcmdinfo->in.u.add_local_mac_ipaddr_entry.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_MANAGE_QHASH_TABLE_ENTRY:
- status = i40iw_sc_manage_qhash_table_entry(
- pcmdinfo->in.u.manage_qhash_table_entry.cqp,
- &pcmdinfo->in.u.manage_qhash_table_entry.info,
- pcmdinfo->in.u.manage_qhash_table_entry.scratch,
- pcmdinfo->post_sq);
-
- break;
- case OP_QP_MODIFY:
- status = i40iw_sc_qp_modify(
- pcmdinfo->in.u.qp_modify.qp,
- &pcmdinfo->in.u.qp_modify.info,
- pcmdinfo->in.u.qp_modify.scratch,
- pcmdinfo->post_sq);
-
- break;
- case OP_QP_UPLOAD_CONTEXT:
- status = i40iw_sc_qp_upload_context(
- pcmdinfo->in.u.qp_upload_context.dev,
- &pcmdinfo->in.u.qp_upload_context.info,
- pcmdinfo->in.u.qp_upload_context.scratch,
- pcmdinfo->post_sq);
-
- break;
- case OP_CQ_CREATE:
- status = i40iw_sc_cq_create(
- pcmdinfo->in.u.cq_create.cq,
- pcmdinfo->in.u.cq_create.scratch,
- pcmdinfo->in.u.cq_create.check_overflow,
- pcmdinfo->post_sq);
- break;
- case OP_CQ_DESTROY:
- status = i40iw_sc_cq_destroy(
- pcmdinfo->in.u.cq_destroy.cq,
- pcmdinfo->in.u.cq_destroy.scratch,
- pcmdinfo->post_sq);
-
- break;
- case OP_QP_CREATE:
- status = i40iw_sc_qp_create(
- pcmdinfo->in.u.qp_create.qp,
- &pcmdinfo->in.u.qp_create.info,
- pcmdinfo->in.u.qp_create.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_QP_DESTROY:
- status = i40iw_sc_qp_destroy(
- pcmdinfo->in.u.qp_destroy.qp,
- pcmdinfo->in.u.qp_destroy.scratch,
- pcmdinfo->in.u.qp_destroy.remove_hash_idx,
- pcmdinfo->in.u.qp_destroy.
- ignore_mw_bnd,
- pcmdinfo->post_sq);
-
- break;
- case OP_ALLOC_STAG:
- status = i40iw_sc_alloc_stag(
- pcmdinfo->in.u.alloc_stag.dev,
- &pcmdinfo->in.u.alloc_stag.info,
- pcmdinfo->in.u.alloc_stag.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_MR_REG_NON_SHARED:
- status = i40iw_sc_mr_reg_non_shared(
- pcmdinfo->in.u.mr_reg_non_shared.dev,
- &pcmdinfo->in.u.mr_reg_non_shared.info,
- pcmdinfo->in.u.mr_reg_non_shared.scratch,
- pcmdinfo->post_sq);
-
- break;
- case OP_DEALLOC_STAG:
- status = i40iw_sc_dealloc_stag(
- pcmdinfo->in.u.dealloc_stag.dev,
- &pcmdinfo->in.u.dealloc_stag.info,
- pcmdinfo->in.u.dealloc_stag.scratch,
- pcmdinfo->post_sq);
-
- break;
- case OP_MW_ALLOC:
- status = i40iw_sc_mw_alloc(
- pcmdinfo->in.u.mw_alloc.dev,
- pcmdinfo->in.u.mw_alloc.scratch,
- pcmdinfo->in.u.mw_alloc.mw_stag_index,
- pcmdinfo->in.u.mw_alloc.pd_id,
- pcmdinfo->post_sq);
-
- break;
- case OP_QP_FLUSH_WQES:
- status = i40iw_sc_qp_flush_wqes(
- pcmdinfo->in.u.qp_flush_wqes.qp,
- &pcmdinfo->in.u.qp_flush_wqes.info,
- pcmdinfo->in.u.qp_flush_wqes.
- scratch, pcmdinfo->post_sq);
- break;
- case OP_ADD_ARP_CACHE_ENTRY:
- status = i40iw_sc_add_arp_cache_entry(
- pcmdinfo->in.u.add_arp_cache_entry.cqp,
- &pcmdinfo->in.u.add_arp_cache_entry.info,
- pcmdinfo->in.u.add_arp_cache_entry.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_MANAGE_PUSH_PAGE:
- status = i40iw_sc_manage_push_page(
- pcmdinfo->in.u.manage_push_page.cqp,
- &pcmdinfo->in.u.manage_push_page.info,
- pcmdinfo->in.u.manage_push_page.scratch,
- pcmdinfo->post_sq);
- break;
- case OP_UPDATE_PE_SDS:
- /* case I40IW_CQP_OP_UPDATE_PE_SDS */
- status = i40iw_update_pe_sds(
- pcmdinfo->in.u.update_pe_sds.dev,
- &pcmdinfo->in.u.update_pe_sds.info,
- pcmdinfo->in.u.update_pe_sds.
- scratch);
-
- break;
- case OP_MANAGE_HMC_PM_FUNC_TABLE:
- status = i40iw_sc_manage_hmc_pm_func_table(
- pcmdinfo->in.u.manage_hmc_pm.dev->cqp,
- pcmdinfo->in.u.manage_hmc_pm.scratch,
- (u8)pcmdinfo->in.u.manage_hmc_pm.info.vf_id,
- pcmdinfo->in.u.manage_hmc_pm.info.free_fcn,
- true);
- break;
- case OP_SUSPEND:
- status = i40iw_sc_suspend_qp(
- pcmdinfo->in.u.suspend_resume.cqp,
- pcmdinfo->in.u.suspend_resume.qp,
- pcmdinfo->in.u.suspend_resume.scratch);
- break;
- case OP_RESUME:
- status = i40iw_sc_resume_qp(
- pcmdinfo->in.u.suspend_resume.cqp,
- pcmdinfo->in.u.suspend_resume.qp,
- pcmdinfo->in.u.suspend_resume.scratch);
- break;
- case OP_MANAGE_VF_PBLE_BP:
- status = i40iw_manage_vf_pble_bp(
- pcmdinfo->in.u.manage_vf_pble_bp.cqp,
- &pcmdinfo->in.u.manage_vf_pble_bp.info,
- pcmdinfo->in.u.manage_vf_pble_bp.scratch, true);
- break;
- case OP_QUERY_FPM_VALUES:
- values_mem.pa = pcmdinfo->in.u.query_fpm_values.fpm_values_pa;
- values_mem.va = pcmdinfo->in.u.query_fpm_values.fpm_values_va;
- status = i40iw_sc_query_fpm_values(
- pcmdinfo->in.u.query_fpm_values.cqp,
- pcmdinfo->in.u.query_fpm_values.scratch,
- pcmdinfo->in.u.query_fpm_values.hmc_fn_id,
- &values_mem, true, I40IW_CQP_WAIT_EVENT);
- break;
- case OP_COMMIT_FPM_VALUES:
- values_mem.pa = pcmdinfo->in.u.commit_fpm_values.fpm_values_pa;
- values_mem.va = pcmdinfo->in.u.commit_fpm_values.fpm_values_va;
- status = i40iw_sc_commit_fpm_values(
- pcmdinfo->in.u.commit_fpm_values.cqp,
- pcmdinfo->in.u.commit_fpm_values.scratch,
- pcmdinfo->in.u.commit_fpm_values.hmc_fn_id,
- &values_mem,
- true,
- I40IW_CQP_WAIT_EVENT);
- break;
- default:
- status = I40IW_NOT_SUPPORTED;
- break;
- }
-
- return status;
-}
-
-/**
- * i40iw_process_cqp_cmd - process all cqp commands
- * @dev: sc device struct
- * @pcmdinfo: cqp command info
- */
-enum i40iw_status_code i40iw_process_cqp_cmd(struct i40iw_sc_dev *dev,
- struct cqp_commands_info *pcmdinfo)
-{
- enum i40iw_status_code status = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&dev->cqp_lock, flags);
- if (list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp))
- status = i40iw_exec_cqp_cmd(dev, pcmdinfo);
- else
- list_add_tail(&pcmdinfo->cqp_cmd_entry, &dev->cqp_cmd_head);
- spin_unlock_irqrestore(&dev->cqp_lock, flags);
- return status;
-}
-
-/**
- * i40iw_process_bh - called from tasklet for cqp list
- * @dev: sc device struct
- */
-enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev)
-{
- enum i40iw_status_code status = 0;
- struct cqp_commands_info *pcmdinfo;
- unsigned long flags;
-
- spin_lock_irqsave(&dev->cqp_lock, flags);
- while (!list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp)) {
- pcmdinfo = (struct cqp_commands_info *)i40iw_remove_head(&dev->cqp_cmd_head);
-
- status = i40iw_exec_cqp_cmd(dev, pcmdinfo);
- if (status)
- break;
- }
- spin_unlock_irqrestore(&dev->cqp_lock, flags);
- return status;
-}
-
-/**
- * i40iw_iwarp_opcode - determine if incoming is rdma layer
- * @info: aeq info for the packet
- * @pkt: packet for error
- */
-static u32 i40iw_iwarp_opcode(struct i40iw_aeqe_info *info, u8 *pkt)
-{
- __be16 *mpa;
- u32 opcode = 0xffffffff;
-
- if (info->q2_data_written) {
- mpa = (__be16 *)pkt;
- opcode = ntohs(mpa[1]) & 0xf;
- }
- return opcode;
-}
-
-/**
- * i40iw_locate_mpa - return pointer to mpa in the pkt
- * @pkt: packet with data
- */
-static u8 *i40iw_locate_mpa(u8 *pkt)
-{
- /* skip over ethernet header */
- pkt += I40IW_MAC_HLEN;
-
- /* Skip over IP and TCP headers */
- pkt += 4 * (pkt[0] & 0x0f);
- pkt += 4 * ((pkt[12] >> 4) & 0x0f);
- return pkt;
-}
-
-/**
- * i40iw_setup_termhdr - termhdr for terminate pkt
- * @qp: sc qp ptr for pkt
- * @hdr: term hdr
- * @opcode: flush opcode for termhdr
- * @layer_etype: error layer + error type
- * @err: error cod ein the header
- */
-static void i40iw_setup_termhdr(struct i40iw_sc_qp *qp,
- struct i40iw_terminate_hdr *hdr,
- enum i40iw_flush_opcode opcode,
- u8 layer_etype,
- u8 err)
-{
- qp->flush_code = opcode;
- hdr->layer_etype = layer_etype;
- hdr->error_code = err;
-}
-
-/**
- * i40iw_bld_terminate_hdr - build terminate message header
- * @qp: qp associated with received terminate AE
- * @info: the struct contiaing AE information
- */
-static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp,
- struct i40iw_aeqe_info *info)
-{
- u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
- u16 ddp_seg_len;
- int copy_len = 0;
- u8 is_tagged = 0;
- u32 opcode;
- struct i40iw_terminate_hdr *termhdr;
-
- termhdr = (struct i40iw_terminate_hdr *)qp->q2_buf;
- memset(termhdr, 0, Q2_BAD_FRAME_OFFSET);
-
- if (info->q2_data_written) {
- /* Use data from offending packet to fill in ddp & rdma hdrs */
- pkt = i40iw_locate_mpa(pkt);
- ddp_seg_len = ntohs(*(__be16 *)pkt);
- if (ddp_seg_len) {
- copy_len = 2;
- termhdr->hdrct = DDP_LEN_FLAG;
- if (pkt[2] & 0x80) {
- is_tagged = 1;
- if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) {
- copy_len += TERM_DDP_LEN_TAGGED;
- termhdr->hdrct |= DDP_HDR_FLAG;
- }
- } else {
- if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) {
- copy_len += TERM_DDP_LEN_UNTAGGED;
- termhdr->hdrct |= DDP_HDR_FLAG;
- }
-
- if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN)) {
- if ((pkt[3] & RDMA_OPCODE_MASK) == RDMA_READ_REQ_OPCODE) {
- copy_len += TERM_RDMA_LEN;
- termhdr->hdrct |= RDMA_HDR_FLAG;
- }
- }
- }
- }
- }
-
- opcode = i40iw_iwarp_opcode(info, pkt);
-
- switch (info->ae_id) {
- case I40IW_AE_AMP_UNALLOCATED_STAG:
- qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
- if (opcode == I40IW_OP_TYPE_RDMA_WRITE)
- i40iw_setup_termhdr(qp, termhdr, FLUSH_PROT_ERR,
- (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_INV_STAG);
- else
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_STAG);
- break;
- case I40IW_AE_AMP_BOUNDS_VIOLATION:
- qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
- if (info->q2_data_written)
- i40iw_setup_termhdr(qp, termhdr, FLUSH_PROT_ERR,
- (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_BOUNDS);
- else
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_BOUNDS);
- break;
- case I40IW_AE_AMP_BAD_PD:
- switch (opcode) {
- case I40IW_OP_TYPE_RDMA_WRITE:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_PROT_ERR,
- (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_UNASSOC_STAG);
- break;
- case I40IW_OP_TYPE_SEND_INV:
- case I40IW_OP_TYPE_SEND_SOL_INV:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_CANT_INV_STAG);
- break;
- default:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_UNASSOC_STAG);
- }
- break;
- case I40IW_AE_AMP_INVALID_STAG:
- qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_INV_STAG);
- break;
- case I40IW_AE_AMP_BAD_QP:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_QP_OP_ERR,
- (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_QN);
- break;
- case I40IW_AE_AMP_BAD_STAG_KEY:
- case I40IW_AE_AMP_BAD_STAG_INDEX:
- qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
- switch (opcode) {
- case I40IW_OP_TYPE_SEND_INV:
- case I40IW_OP_TYPE_SEND_SOL_INV:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_OP_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_CANT_INV_STAG);
- break;
- default:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_INV_STAG);
- }
- break;
- case I40IW_AE_AMP_RIGHTS_VIOLATION:
- case I40IW_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
- case I40IW_AE_PRIV_OPERATION_DENIED:
- qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_ACCESS);
- break;
- case I40IW_AE_AMP_TO_WRAP:
- qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_ACCESS_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT, RDMAP_TO_WRAP);
- break;
- case I40IW_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_LEN_ERR,
- (LAYER_MPA << 4) | DDP_LLP, MPA_MARKER);
- break;
- case I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
- (LAYER_MPA << 4) | DDP_LLP, MPA_CRC);
- break;
- case I40IW_AE_LLP_SEGMENT_TOO_LARGE:
- case I40IW_AE_LLP_SEGMENT_TOO_SMALL:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_LEN_ERR,
- (LAYER_DDP << 4) | DDP_CATASTROPHIC, DDP_CATASTROPHIC_LOCAL);
- break;
- case I40IW_AE_LCE_QP_CATASTROPHIC:
- case I40IW_AE_DDP_NO_L_BIT:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_FATAL_ERR,
- (LAYER_DDP << 4) | DDP_CATASTROPHIC, DDP_CATASTROPHIC_LOCAL);
- break;
- case I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN:
- case I40IW_AE_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
- (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MSN_RANGE);
- break;
- case I40IW_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
- qp->eventtype = TERM_EVENT_QP_ACCESS_ERR;
- i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_LEN_ERR,
- (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_TOO_LONG);
- break;
- case I40IW_AE_DDP_UBE_INVALID_DDP_VERSION:
- if (is_tagged)
- i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
- (LAYER_DDP << 4) | DDP_TAGGED_BUFFER, DDP_TAGGED_INV_DDP_VER);
- else
- i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
- (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_DDP_VER);
- break;
- case I40IW_AE_DDP_UBE_INVALID_MO:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
- (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MO);
- break;
- case I40IW_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_REM_OP_ERR,
- (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_MSN_NO_BUF);
- break;
- case I40IW_AE_DDP_UBE_INVALID_QN:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
- (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER, DDP_UNTAGGED_INV_QN);
- break;
- case I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_GENERAL_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_INV_RDMAP_VER);
- break;
- case I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_LOC_QP_OP_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_UNEXPECTED_OP);
- break;
- default:
- i40iw_setup_termhdr(qp, termhdr, FLUSH_FATAL_ERR,
- (LAYER_RDMA << 4) | RDMAP_REMOTE_OP, RDMAP_UNSPECIFIED);
- break;
- }
-
- if (copy_len)
- memcpy(termhdr + 1, pkt, copy_len);
-
- return sizeof(struct i40iw_terminate_hdr) + copy_len;
-}
-
-/**
- * i40iw_terminate_send_fin() - Send fin for terminate message
- * @qp: qp associated with received terminate AE
- */
-void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp)
-{
- /* Send the fin only */
- i40iw_term_modify_qp(qp,
- I40IW_QP_STATE_TERMINATE,
- I40IWQP_TERM_SEND_FIN_ONLY,
- 0);
-}
-
-/**
- * i40iw_terminate_connection() - Bad AE and send terminate to remote QP
- * @qp: qp associated with received terminate AE
- * @info: the struct contiaing AE information
- */
-void i40iw_terminate_connection(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info)
-{
- u8 termlen = 0;
-
- if (qp->term_flags & I40IW_TERM_SENT)
- return; /* Sanity check */
-
- /* Eventtype can change from bld_terminate_hdr */
- qp->eventtype = TERM_EVENT_QP_FATAL;
- termlen = i40iw_bld_terminate_hdr(qp, info);
- i40iw_terminate_start_timer(qp);
- qp->term_flags |= I40IW_TERM_SENT;
- i40iw_term_modify_qp(qp, I40IW_QP_STATE_TERMINATE,
- I40IWQP_TERM_SEND_TERM_ONLY, termlen);
-}
-
-/**
- * i40iw_terminate_received - handle terminate received AE
- * @qp: qp associated with received terminate AE
- * @info: the struct contiaing AE information
- */
-void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info)
-{
- u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
- __be32 *mpa;
- u8 ddp_ctl;
- u8 rdma_ctl;
- u16 aeq_id = 0;
- struct i40iw_terminate_hdr *termhdr;
-
- mpa = (__be32 *)i40iw_locate_mpa(pkt);
- if (info->q2_data_written) {
- /* did not validate the frame - do it now */
- ddp_ctl = (ntohl(mpa[0]) >> 8) & 0xff;
- rdma_ctl = ntohl(mpa[0]) & 0xff;
- if ((ddp_ctl & 0xc0) != 0x40)
- aeq_id = I40IW_AE_LCE_QP_CATASTROPHIC;
- else if ((ddp_ctl & 0x03) != 1)
- aeq_id = I40IW_AE_DDP_UBE_INVALID_DDP_VERSION;
- else if (ntohl(mpa[2]) != 2)
- aeq_id = I40IW_AE_DDP_UBE_INVALID_QN;
- else if (ntohl(mpa[3]) != 1)
- aeq_id = I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN;
- else if (ntohl(mpa[4]) != 0)
- aeq_id = I40IW_AE_DDP_UBE_INVALID_MO;
- else if ((rdma_ctl & 0xc0) != 0x40)
- aeq_id = I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION;
-
- info->ae_id = aeq_id;
- if (info->ae_id) {
- /* Bad terminate recvd - send back a terminate */
- i40iw_terminate_connection(qp, info);
- return;
- }
- }
-
- qp->term_flags |= I40IW_TERM_RCVD;
- qp->eventtype = TERM_EVENT_QP_FATAL;
- termhdr = (struct i40iw_terminate_hdr *)&mpa[5];
- if (termhdr->layer_etype == RDMAP_REMOTE_PROT ||
- termhdr->layer_etype == RDMAP_REMOTE_OP) {
- i40iw_terminate_done(qp, 0);
- } else {
- i40iw_terminate_start_timer(qp);
- i40iw_terminate_send_fin(qp);
- }
-}
-
-/**
- * i40iw_sc_vsi_init - Initialize virtual device
- * @vsi: pointer to the vsi structure
- * @info: parameters to initialize vsi
- **/
-void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *info)
-{
- int i;
-
- vsi->dev = info->dev;
- vsi->back_vsi = info->back_vsi;
- vsi->mss = info->params->mss;
- i40iw_fill_qos_list(info->params->qs_handle_list);
-
- for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
- vsi->qos[i].qs_handle =
- info->params->qs_handle_list[i];
- i40iw_debug(vsi->dev, I40IW_DEBUG_DCB, "qset[%d]: %d\n", i, vsi->qos[i].qs_handle);
- spin_lock_init(&vsi->qos[i].lock);
- INIT_LIST_HEAD(&vsi->qos[i].qplist);
- }
-}
-
-/**
- * i40iw_hw_stats_init - Initiliaze HW stats table
- * @stats: pestat struct
- * @fcn_idx: PCI fn id
- * @is_pf: Is it a PF?
- *
- * Populate the HW stats table with register offset addr for each
- * stats. And start the perioidic stats timer.
- */
-void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 fcn_idx, bool is_pf)
-{
- u32 stats_reg_offset;
- u32 stats_index;
- struct i40iw_dev_hw_stats_offsets *stats_table =
- &stats->hw_stats_offsets;
- struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats;
-
- if (is_pf) {
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
- I40E_GLPES_PFIP4RXDISCARD(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
- I40E_GLPES_PFIP4RXTRUNC(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
- I40E_GLPES_PFIP4TXNOROUTE(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
- I40E_GLPES_PFIP6RXDISCARD(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
- I40E_GLPES_PFIP6RXTRUNC(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
- I40E_GLPES_PFIP6TXNOROUTE(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
- I40E_GLPES_PFTCPRTXSEG(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
- I40E_GLPES_PFTCPRXOPTERR(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
- I40E_GLPES_PFTCPRXPROTOERR(fcn_idx);
-
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
- I40E_GLPES_PFIP4RXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
- I40E_GLPES_PFIP4RXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
- I40E_GLPES_PFIP4RXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
- I40E_GLPES_PFIP4RXMCPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
- I40E_GLPES_PFIP4TXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
- I40E_GLPES_PFIP4TXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
- I40E_GLPES_PFIP4TXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
- I40E_GLPES_PFIP4TXMCPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
- I40E_GLPES_PFIP6RXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
- I40E_GLPES_PFIP6RXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
- I40E_GLPES_PFIP6RXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
- I40E_GLPES_PFIP6RXMCPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
- I40E_GLPES_PFIP6TXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
- I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
- I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
- I40E_GLPES_PFIP6TXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
- I40E_GLPES_PFTCPRXSEGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
- I40E_GLPES_PFTCPTXSEGLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
- I40E_GLPES_PFRDMARXRDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
- I40E_GLPES_PFRDMARXSNDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
- I40E_GLPES_PFRDMARXWRSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
- I40E_GLPES_PFRDMATXRDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
- I40E_GLPES_PFRDMATXSNDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
- I40E_GLPES_PFRDMATXWRSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
- I40E_GLPES_PFRDMAVBNDLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
- I40E_GLPES_PFRDMAVINVLO(fcn_idx);
- } else {
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
- I40E_GLPES_VFIP4RXDISCARD(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
- I40E_GLPES_VFIP4RXTRUNC(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
- I40E_GLPES_VFIP4TXNOROUTE(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
- I40E_GLPES_VFIP6RXDISCARD(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
- I40E_GLPES_VFIP6RXTRUNC(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
- I40E_GLPES_VFIP6TXNOROUTE(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
- I40E_GLPES_VFTCPRTXSEG(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
- I40E_GLPES_VFTCPRXOPTERR(fcn_idx);
- stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
- I40E_GLPES_VFTCPRXPROTOERR(fcn_idx);
-
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
- I40E_GLPES_VFIP4RXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
- I40E_GLPES_VFIP4RXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
- I40E_GLPES_VFIP4RXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
- I40E_GLPES_VFIP4RXMCPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
- I40E_GLPES_VFIP4TXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
- I40E_GLPES_VFIP4TXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
- I40E_GLPES_VFIP4TXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
- I40E_GLPES_VFIP4TXMCPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
- I40E_GLPES_VFIP6RXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
- I40E_GLPES_VFIP6RXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
- I40E_GLPES_VFIP6RXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
- I40E_GLPES_VFIP6RXMCPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
- I40E_GLPES_VFIP6TXOCTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
- I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
- I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
- I40E_GLPES_VFIP6TXFRAGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
- I40E_GLPES_VFTCPRXSEGSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
- I40E_GLPES_VFTCPTXSEGLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
- I40E_GLPES_VFRDMARXRDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
- I40E_GLPES_VFRDMARXSNDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
- I40E_GLPES_VFRDMARXWRSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
- I40E_GLPES_VFRDMATXRDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
- I40E_GLPES_VFRDMATXSNDSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
- I40E_GLPES_VFRDMATXWRSLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
- I40E_GLPES_VFRDMAVBNDLO(fcn_idx);
- stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
- I40E_GLPES_VFRDMAVINVLO(fcn_idx);
- }
-
- for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64;
- stats_index++) {
- stats_reg_offset = stats_table->stats_offset_64[stats_index];
- last_rd_stats->stats_value_64[stats_index] =
- readq(stats->hw->hw_addr + stats_reg_offset);
- }
-
- for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32;
- stats_index++) {
- stats_reg_offset = stats_table->stats_offset_32[stats_index];
- last_rd_stats->stats_value_32[stats_index] =
- i40iw_rd32(stats->hw, stats_reg_offset);
- }
-}
-
-/**
- * i40iw_hw_stats_read_32 - Read 32-bit HW stats counters and accommodates for roll-overs.
- * @stat: pestat struct
- * @index: index in HW stats table which contains offset reg-addr
- * @value: hw stats value
- */
-void i40iw_hw_stats_read_32(struct i40iw_vsi_pestat *stats,
- enum i40iw_hw_stats_index_32b index,
- u64 *value)
-{
- struct i40iw_dev_hw_stats_offsets *stats_table =
- &stats->hw_stats_offsets;
- struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats;
- struct i40iw_dev_hw_stats *hw_stats = &stats->hw_stats;
- u64 new_stats_value = 0;
- u32 stats_reg_offset = stats_table->stats_offset_32[index];
-
- new_stats_value = i40iw_rd32(stats->hw, stats_reg_offset);
- /*roll-over case */
- if (new_stats_value < last_rd_stats->stats_value_32[index])
- hw_stats->stats_value_32[index] += new_stats_value;
- else
- hw_stats->stats_value_32[index] +=
- new_stats_value - last_rd_stats->stats_value_32[index];
- last_rd_stats->stats_value_32[index] = new_stats_value;
- *value = hw_stats->stats_value_32[index];
-}
-
-/**
- * i40iw_hw_stats_read_64 - Read HW stats counters (greater than 32-bit) and accommodates for roll-overs.
- * @stats: pestat struct
- * @index: index in HW stats table which contains offset reg-addr
- * @value: hw stats value
- */
-void i40iw_hw_stats_read_64(struct i40iw_vsi_pestat *stats,
- enum i40iw_hw_stats_index_64b index,
- u64 *value)
-{
- struct i40iw_dev_hw_stats_offsets *stats_table =
- &stats->hw_stats_offsets;
- struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats;
- struct i40iw_dev_hw_stats *hw_stats = &stats->hw_stats;
- u64 new_stats_value = 0;
- u32 stats_reg_offset = stats_table->stats_offset_64[index];
-
- new_stats_value = readq(stats->hw->hw_addr + stats_reg_offset);
- /*roll-over case */
- if (new_stats_value < last_rd_stats->stats_value_64[index])
- hw_stats->stats_value_64[index] += new_stats_value;
- else
- hw_stats->stats_value_64[index] +=
- new_stats_value - last_rd_stats->stats_value_64[index];
- last_rd_stats->stats_value_64[index] = new_stats_value;
- *value = hw_stats->stats_value_64[index];
-}
-
-/**
- * i40iw_hw_stats_read_all - read all HW stat counters
- * @stats: pestat struct
- * @stats_values: hw stats structure
- *
- * Read all the HW stat counters and populates hw_stats structure
- * of passed-in vsi's pestat as well as copy created in stat_values.
- */
-void i40iw_hw_stats_read_all(struct i40iw_vsi_pestat *stats,
- struct i40iw_dev_hw_stats *stats_values)
-{
- u32 stats_index;
- unsigned long flags;
-
- spin_lock_irqsave(&stats->lock, flags);
-
- for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32;
- stats_index++)
- i40iw_hw_stats_read_32(stats, stats_index,
- &stats_values->stats_value_32[stats_index]);
- for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64;
- stats_index++)
- i40iw_hw_stats_read_64(stats, stats_index,
- &stats_values->stats_value_64[stats_index]);
- spin_unlock_irqrestore(&stats->lock, flags);
-}
-
-/**
- * i40iw_hw_stats_refresh_all - Update all HW stats structs
- * @stats: pestat struct
- *
- * Read all the HW stats counters to refresh values in hw_stats structure
- * of passed-in dev's pestat
- */
-void i40iw_hw_stats_refresh_all(struct i40iw_vsi_pestat *stats)
-{
- u64 stats_value;
- u32 stats_index;
- unsigned long flags;
-
- spin_lock_irqsave(&stats->lock, flags);
-
- for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32;
- stats_index++)
- i40iw_hw_stats_read_32(stats, stats_index, &stats_value);
- for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64;
- stats_index++)
- i40iw_hw_stats_read_64(stats, stats_index, &stats_value);
- spin_unlock_irqrestore(&stats->lock, flags);
-}
-
-/**
- * i40iw_get_fcn_id - Return the function id
- * @dev: pointer to the device
- */
-static u8 i40iw_get_fcn_id(struct i40iw_sc_dev *dev)
-{
- u8 fcn_id = I40IW_INVALID_FCN_ID;
- u8 i;
-
- for (i = I40IW_FIRST_NON_PF_STAT; i < I40IW_MAX_STATS_COUNT; i++)
- if (!dev->fcn_id_array[i]) {
- fcn_id = i;
- dev->fcn_id_array[i] = true;
- break;
- }
- return fcn_id;
-}
-
-/**
- * i40iw_vsi_stats_init - Initialize the vsi statistics
- * @vsi: pointer to the vsi structure
- * @info: The info structure used for initialization
- */
-enum i40iw_status_code i40iw_vsi_stats_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_stats_info *info)
-{
- u8 fcn_id = info->fcn_id;
-
- if (info->alloc_fcn_id)
- fcn_id = i40iw_get_fcn_id(vsi->dev);
-
- if (fcn_id == I40IW_INVALID_FCN_ID)
- return I40IW_ERR_NOT_READY;
-
- vsi->pestat = info->pestat;
- vsi->pestat->hw = vsi->dev->hw;
-
- if (info->stats_initialize) {
- i40iw_hw_stats_init(vsi->pestat, fcn_id, true);
- spin_lock_init(&vsi->pestat->lock);
- i40iw_hw_stats_start_timer(vsi);
- }
- vsi->stats_fcn_id_alloc = info->alloc_fcn_id;
- vsi->fcn_id = fcn_id;
- return I40IW_SUCCESS;
-}
-
-/**
- * i40iw_vsi_stats_free - Free the vsi stats
- * @vsi: pointer to the vsi structure
- */
-void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi)
-{
- u8 fcn_id = vsi->fcn_id;
-
- if ((vsi->stats_fcn_id_alloc) && (fcn_id != I40IW_INVALID_FCN_ID))
- vsi->dev->fcn_id_array[fcn_id] = false;
- i40iw_hw_stats_stop_timer(vsi);
-}
-
-static struct i40iw_cqp_ops iw_cqp_ops = {
- .cqp_init = i40iw_sc_cqp_init,
- .cqp_create = i40iw_sc_cqp_create,
- .cqp_post_sq = i40iw_sc_cqp_post_sq,
- .cqp_get_next_send_wqe = i40iw_sc_cqp_get_next_send_wqe,
- .cqp_destroy = i40iw_sc_cqp_destroy,
- .poll_for_cqp_op_done = i40iw_sc_poll_for_cqp_op_done
-};
-
-static struct i40iw_ccq_ops iw_ccq_ops = {
- .ccq_init = i40iw_sc_ccq_init,
- .ccq_create = i40iw_sc_ccq_create,
- .ccq_destroy = i40iw_sc_ccq_destroy,
- .ccq_create_done = i40iw_sc_ccq_create_done,
- .ccq_get_cqe_info = i40iw_sc_ccq_get_cqe_info,
- .ccq_arm = i40iw_sc_ccq_arm
-};
-
-static struct i40iw_ceq_ops iw_ceq_ops = {
- .ceq_init = i40iw_sc_ceq_init,
- .ceq_create = i40iw_sc_ceq_create,
- .cceq_create_done = i40iw_sc_cceq_create_done,
- .cceq_destroy_done = i40iw_sc_cceq_destroy_done,
- .cceq_create = i40iw_sc_cceq_create,
- .ceq_destroy = i40iw_sc_ceq_destroy,
- .process_ceq = i40iw_sc_process_ceq
-};
-
-static struct i40iw_aeq_ops iw_aeq_ops = {
- .aeq_init = i40iw_sc_aeq_init,
- .aeq_create = i40iw_sc_aeq_create,
- .aeq_destroy = i40iw_sc_aeq_destroy,
- .get_next_aeqe = i40iw_sc_get_next_aeqe,
- .repost_aeq_entries = i40iw_sc_repost_aeq_entries,
- .aeq_create_done = i40iw_sc_aeq_create_done,
- .aeq_destroy_done = i40iw_sc_aeq_destroy_done
-};
-
-/* iwarp pd ops */
-static struct i40iw_pd_ops iw_pd_ops = {
- .pd_init = i40iw_sc_pd_init,
-};
-
-static struct i40iw_priv_qp_ops iw_priv_qp_ops = {
- .qp_init = i40iw_sc_qp_init,
- .qp_create = i40iw_sc_qp_create,
- .qp_modify = i40iw_sc_qp_modify,
- .qp_destroy = i40iw_sc_qp_destroy,
- .qp_flush_wqes = i40iw_sc_qp_flush_wqes,
- .qp_upload_context = i40iw_sc_qp_upload_context,
- .qp_setctx = i40iw_sc_qp_setctx,
- .qp_send_lsmm = i40iw_sc_send_lsmm,
- .qp_send_lsmm_nostag = i40iw_sc_send_lsmm_nostag,
- .qp_send_rtt = i40iw_sc_send_rtt,
- .qp_post_wqe0 = i40iw_sc_post_wqe0,
- .iw_mr_fast_register = i40iw_sc_mr_fast_register
-};
-
-static struct i40iw_priv_cq_ops iw_priv_cq_ops = {
- .cq_init = i40iw_sc_cq_init,
- .cq_create = i40iw_sc_cq_create,
- .cq_destroy = i40iw_sc_cq_destroy,
- .cq_modify = i40iw_sc_cq_modify,
-};
-
-static struct i40iw_mr_ops iw_mr_ops = {
- .alloc_stag = i40iw_sc_alloc_stag,
- .mr_reg_non_shared = i40iw_sc_mr_reg_non_shared,
- .mr_reg_shared = i40iw_sc_mr_reg_shared,
- .dealloc_stag = i40iw_sc_dealloc_stag,
- .query_stag = i40iw_sc_query_stag,
- .mw_alloc = i40iw_sc_mw_alloc
-};
-
-static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = {
- .manage_push_page = i40iw_sc_manage_push_page,
- .manage_hmc_pm_func_table = i40iw_sc_manage_hmc_pm_func_table,
- .set_hmc_resource_profile = i40iw_sc_set_hmc_resource_profile,
- .commit_fpm_values = i40iw_sc_commit_fpm_values,
- .query_fpm_values = i40iw_sc_query_fpm_values,
- .static_hmc_pages_allocated = i40iw_sc_static_hmc_pages_allocated,
- .add_arp_cache_entry = i40iw_sc_add_arp_cache_entry,
- .del_arp_cache_entry = i40iw_sc_del_arp_cache_entry,
- .query_arp_cache_entry = i40iw_sc_query_arp_cache_entry,
- .manage_apbvt_entry = i40iw_sc_manage_apbvt_entry,
- .manage_qhash_table_entry = i40iw_sc_manage_qhash_table_entry,
- .alloc_local_mac_ipaddr_table_entry = i40iw_sc_alloc_local_mac_ipaddr_entry,
- .add_local_mac_ipaddr_entry = i40iw_sc_add_local_mac_ipaddr_entry,
- .del_local_mac_ipaddr_entry = i40iw_sc_del_local_mac_ipaddr_entry,
- .cqp_nop = i40iw_sc_cqp_nop,
- .commit_fpm_values_done = i40iw_sc_commit_fpm_values_done,
- .query_fpm_values_done = i40iw_sc_query_fpm_values_done,
- .manage_hmc_pm_func_table_done = i40iw_sc_manage_hmc_pm_func_table_done,
- .update_suspend_qp = i40iw_sc_suspend_qp,
- .update_resume_qp = i40iw_sc_resume_qp
-};
-
-static struct i40iw_hmc_ops iw_hmc_ops = {
- .init_iw_hmc = i40iw_sc_init_iw_hmc,
- .parse_fpm_query_buf = i40iw_sc_parse_fpm_query_buf,
- .configure_iw_fpm = i40iw_sc_configure_iw_fpm,
- .parse_fpm_commit_buf = i40iw_sc_parse_fpm_commit_buf,
- .create_hmc_object = i40iw_sc_create_hmc_obj,
- .del_hmc_object = i40iw_sc_del_hmc_obj
-};
-
-/**
- * i40iw_device_init - Initialize IWARP device
- * @dev: IWARP device pointer
- * @info: IWARP init info
- */
-enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev,
- struct i40iw_device_init_info *info)
-{
- u32 val;
- u32 vchnl_ver = 0;
- u16 hmc_fcn = 0;
- enum i40iw_status_code ret_code = 0;
- u8 db_size;
-
- spin_lock_init(&dev->cqp_lock);
- INIT_LIST_HEAD(&dev->cqp_cmd_head); /* for the cqp commands backlog. */
-
- i40iw_device_init_uk(&dev->dev_uk);
-
- dev->debug_mask = info->debug_mask;
-
- dev->hmc_fn_id = info->hmc_fn_id;
- dev->exception_lan_queue = info->exception_lan_queue;
- dev->is_pf = info->is_pf;
-
- dev->fpm_query_buf_pa = info->fpm_query_buf_pa;
- dev->fpm_query_buf = info->fpm_query_buf;
-
- dev->fpm_commit_buf_pa = info->fpm_commit_buf_pa;
- dev->fpm_commit_buf = info->fpm_commit_buf;
-
- dev->hw = info->hw;
- dev->hw->hw_addr = info->bar0;
-
- if (dev->is_pf) {
- val = i40iw_rd32(dev->hw, I40E_GLPCI_DREVID);
- dev->hw_rev = (u8)RS_32(val, I40E_GLPCI_DREVID_DEFAULT_REVID);
-
- val = i40iw_rd32(dev->hw, I40E_GLPCI_LBARCTRL);
- db_size = (u8)RS_32(val, I40E_GLPCI_LBARCTRL_PE_DB_SIZE);
- if ((db_size != I40IW_PE_DB_SIZE_4M) &&
- (db_size != I40IW_PE_DB_SIZE_8M)) {
- i40iw_debug(dev, I40IW_DEBUG_DEV,
- "%s: PE doorbell is not enabled in CSR val 0x%x\n",
- __func__, val);
- ret_code = I40IW_ERR_PE_DOORBELL_NOT_ENABLED;
- return ret_code;
- }
- dev->db_addr = dev->hw->hw_addr + I40IW_DB_ADDR_OFFSET;
- dev->vchnl_if.vchnl_recv = i40iw_vchnl_recv_pf;
- } else {
- dev->db_addr = dev->hw->hw_addr + I40IW_VF_DB_ADDR_OFFSET;
- }
-
- dev->cqp_ops = &iw_cqp_ops;
- dev->ccq_ops = &iw_ccq_ops;
- dev->ceq_ops = &iw_ceq_ops;
- dev->aeq_ops = &iw_aeq_ops;
- dev->cqp_misc_ops = &iw_cqp_misc_ops;
- dev->iw_pd_ops = &iw_pd_ops;
- dev->iw_priv_qp_ops = &iw_priv_qp_ops;
- dev->iw_priv_cq_ops = &iw_priv_cq_ops;
- dev->mr_ops = &iw_mr_ops;
- dev->hmc_ops = &iw_hmc_ops;
- dev->vchnl_if.vchnl_send = info->vchnl_send;
- if (dev->vchnl_if.vchnl_send)
- dev->vchnl_up = true;
- else
- dev->vchnl_up = false;
- if (!dev->is_pf) {
- dev->vchnl_if.vchnl_recv = i40iw_vchnl_recv_vf;
- ret_code = i40iw_vchnl_vf_get_ver(dev, &vchnl_ver);
- if (!ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_DEV,
- "%s: Get Channel version rc = 0x%0x, version is %u\n",
- __func__, ret_code, vchnl_ver);
- ret_code = i40iw_vchnl_vf_get_hmc_fcn(dev, &hmc_fcn);
- if (!ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_DEV,
- "%s Get HMC function rc = 0x%0x, hmc fcn is %u\n",
- __func__, ret_code, hmc_fcn);
- dev->hmc_fn_id = (u8)hmc_fcn;
- }
- }
- }
- dev->iw_vf_cqp_ops = &iw_vf_cqp_ops;
-
- return ret_code;
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
deleted file mode 100644
index a39ac12b6a7e..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ /dev/null
@@ -1,1727 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_D_H
-#define I40IW_D_H
-
-#define I40IW_FIRST_USER_QP_ID 2
-
-#define I40IW_DB_ADDR_OFFSET (4 * 1024 * 1024 - 64 * 1024)
-#define I40IW_VF_DB_ADDR_OFFSET (64 * 1024)
-
-#define I40IW_PUSH_OFFSET (4 * 1024 * 1024)
-#define I40IW_PF_FIRST_PUSH_PAGE_INDEX 16
-#define I40IW_VF_PUSH_OFFSET ((8 + 64) * 1024)
-#define I40IW_VF_FIRST_PUSH_PAGE_INDEX 2
-
-#define I40IW_PE_DB_SIZE_4M 1
-#define I40IW_PE_DB_SIZE_8M 2
-
-#define I40IW_DDP_VER 1
-#define I40IW_RDMAP_VER 1
-
-#define I40IW_RDMA_MODE_RDMAC 0
-#define I40IW_RDMA_MODE_IETF 1
-
-#define I40IW_QP_STATE_INVALID 0
-#define I40IW_QP_STATE_IDLE 1
-#define I40IW_QP_STATE_RTS 2
-#define I40IW_QP_STATE_CLOSING 3
-#define I40IW_QP_STATE_RESERVED 4
-#define I40IW_QP_STATE_TERMINATE 5
-#define I40IW_QP_STATE_ERROR 6
-
-#define I40IW_STAG_STATE_INVALID 0
-#define I40IW_STAG_STATE_VALID 1
-
-#define I40IW_STAG_TYPE_SHARED 0
-#define I40IW_STAG_TYPE_NONSHARED 1
-
-#define I40IW_MAX_USER_PRIORITY 8
-#define I40IW_MAX_STATS_COUNT 16
-#define I40IW_FIRST_NON_PF_STAT 4
-
-
-#define LS_64_1(val, bits) ((u64)(uintptr_t)val << bits)
-#define RS_64_1(val, bits) ((u64)(uintptr_t)val >> bits)
-#define LS_32_1(val, bits) (u32)(val << bits)
-#define RS_32_1(val, bits) (u32)(val >> bits)
-#define I40E_HI_DWORD(x) ((u32)((((x) >> 16) >> 16) & 0xFFFFFFFF))
-
-#define QS_HANDLE_UNKNOWN 0xffff
-
-#define LS_64(val, field) (((u64)val << field ## _SHIFT) & (field ## _MASK))
-
-#define RS_64(val, field) ((u64)(val & field ## _MASK) >> field ## _SHIFT)
-#define LS_32(val, field) ((val << field ## _SHIFT) & (field ## _MASK))
-#define RS_32(val, field) ((val & field ## _MASK) >> field ## _SHIFT)
-
-#define TERM_DDP_LEN_TAGGED 14
-#define TERM_DDP_LEN_UNTAGGED 18
-#define TERM_RDMA_LEN 28
-#define RDMA_OPCODE_MASK 0x0f
-#define RDMA_READ_REQ_OPCODE 1
-#define Q2_BAD_FRAME_OFFSET 72
-#define CQE_MAJOR_DRV 0x8000
-
-#define I40IW_TERM_SENT 0x01
-#define I40IW_TERM_RCVD 0x02
-#define I40IW_TERM_DONE 0x04
-#define I40IW_MAC_HLEN 14
-
-#define I40IW_INVALID_WQE_INDEX 0xffffffff
-
-#define I40IW_CQP_WAIT_POLL_REGS 1
-#define I40IW_CQP_WAIT_POLL_CQ 2
-#define I40IW_CQP_WAIT_EVENT 3
-
-#define I40IW_CQP_INIT_WQE(wqe) memset(wqe, 0, 64)
-
-#define I40IW_GET_CURRENT_CQ_ELEMENT(_cq) \
- ( \
- &((_cq)->cq_base[I40IW_RING_GETCURRENT_HEAD((_cq)->cq_ring)]) \
- )
-#define I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(_cq) \
- ( \
- &(((struct i40iw_extended_cqe *) \
- ((_cq)->cq_base))[I40IW_RING_GETCURRENT_HEAD((_cq)->cq_ring)]) \
- )
-
-#define I40IW_GET_CURRENT_AEQ_ELEMENT(_aeq) \
- ( \
- &_aeq->aeqe_base[I40IW_RING_GETCURRENT_TAIL(_aeq->aeq_ring)] \
- )
-
-#define I40IW_GET_CURRENT_CEQ_ELEMENT(_ceq) \
- ( \
- &_ceq->ceqe_base[I40IW_RING_GETCURRENT_TAIL(_ceq->ceq_ring)] \
- )
-
-#define I40IW_AE_SOURCE_RQ 0x1
-#define I40IW_AE_SOURCE_RQ_0011 0x3
-
-#define I40IW_AE_SOURCE_CQ 0x2
-#define I40IW_AE_SOURCE_CQ_0110 0x6
-#define I40IW_AE_SOURCE_CQ_1010 0xA
-#define I40IW_AE_SOURCE_CQ_1110 0xE
-
-#define I40IW_AE_SOURCE_SQ 0x5
-#define I40IW_AE_SOURCE_SQ_0111 0x7
-
-#define I40IW_AE_SOURCE_IN_RR_WR 0x9
-#define I40IW_AE_SOURCE_IN_RR_WR_1011 0xB
-#define I40IW_AE_SOURCE_OUT_RR 0xD
-#define I40IW_AE_SOURCE_OUT_RR_1111 0xF
-
-#define I40IW_TCP_STATE_NON_EXISTENT 0
-#define I40IW_TCP_STATE_CLOSED 1
-#define I40IW_TCP_STATE_LISTEN 2
-#define I40IW_STATE_SYN_SEND 3
-#define I40IW_TCP_STATE_SYN_RECEIVED 4
-#define I40IW_TCP_STATE_ESTABLISHED 5
-#define I40IW_TCP_STATE_CLOSE_WAIT 6
-#define I40IW_TCP_STATE_FIN_WAIT_1 7
-#define I40IW_TCP_STATE_CLOSING 8
-#define I40IW_TCP_STATE_LAST_ACK 9
-#define I40IW_TCP_STATE_FIN_WAIT_2 10
-#define I40IW_TCP_STATE_TIME_WAIT 11
-#define I40IW_TCP_STATE_RESERVED_1 12
-#define I40IW_TCP_STATE_RESERVED_2 13
-#define I40IW_TCP_STATE_RESERVED_3 14
-#define I40IW_TCP_STATE_RESERVED_4 15
-
-/* ILQ CQP hash table fields */
-#define I40IW_CQPSQ_QHASH_VLANID_SHIFT 32
-#define I40IW_CQPSQ_QHASH_VLANID_MASK \
- ((u64)0xfff << I40IW_CQPSQ_QHASH_VLANID_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_QPN_SHIFT 32
-#define I40IW_CQPSQ_QHASH_QPN_MASK \
- ((u64)0x3ffff << I40IW_CQPSQ_QHASH_QPN_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_QS_HANDLE_SHIFT 0
-#define I40IW_CQPSQ_QHASH_QS_HANDLE_MASK ((u64)0x3ff << I40IW_CQPSQ_QHASH_QS_HANDLE_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_SRC_PORT_SHIFT 16
-#define I40IW_CQPSQ_QHASH_SRC_PORT_MASK \
- ((u64)0xffff << I40IW_CQPSQ_QHASH_SRC_PORT_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_DEST_PORT_SHIFT 0
-#define I40IW_CQPSQ_QHASH_DEST_PORT_MASK \
- ((u64)0xffff << I40IW_CQPSQ_QHASH_DEST_PORT_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_ADDR0_SHIFT 32
-#define I40IW_CQPSQ_QHASH_ADDR0_MASK \
- ((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR0_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_ADDR1_SHIFT 0
-#define I40IW_CQPSQ_QHASH_ADDR1_MASK \
- ((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR1_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_ADDR2_SHIFT 32
-#define I40IW_CQPSQ_QHASH_ADDR2_MASK \
- ((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR2_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_ADDR3_SHIFT 0
-#define I40IW_CQPSQ_QHASH_ADDR3_MASK \
- ((u64)0xffffffff << I40IW_CQPSQ_QHASH_ADDR3_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_WQEVALID_SHIFT 63
-#define I40IW_CQPSQ_QHASH_WQEVALID_MASK \
- ((u64)0x1 << I40IW_CQPSQ_QHASH_WQEVALID_SHIFT)
-#define I40IW_CQPSQ_QHASH_OPCODE_SHIFT 32
-#define I40IW_CQPSQ_QHASH_OPCODE_MASK \
- ((u64)0x3f << I40IW_CQPSQ_QHASH_OPCODE_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_MANAGE_SHIFT 61
-#define I40IW_CQPSQ_QHASH_MANAGE_MASK \
- ((u64)0x3 << I40IW_CQPSQ_QHASH_MANAGE_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_IPV4VALID_SHIFT 60
-#define I40IW_CQPSQ_QHASH_IPV4VALID_MASK \
- ((u64)0x1 << I40IW_CQPSQ_QHASH_IPV4VALID_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_VLANVALID_SHIFT 59
-#define I40IW_CQPSQ_QHASH_VLANVALID_MASK \
- ((u64)0x1 << I40IW_CQPSQ_QHASH_VLANVALID_SHIFT)
-
-#define I40IW_CQPSQ_QHASH_ENTRYTYPE_SHIFT 42
-#define I40IW_CQPSQ_QHASH_ENTRYTYPE_MASK \
- ((u64)0x7 << I40IW_CQPSQ_QHASH_ENTRYTYPE_SHIFT)
-/* CQP Host Context */
-#define I40IW_CQPHC_EN_DC_TCP_SHIFT 0
-#define I40IW_CQPHC_EN_DC_TCP_MASK (1UL << I40IW_CQPHC_EN_DC_TCP_SHIFT)
-
-#define I40IW_CQPHC_SQSIZE_SHIFT 8
-#define I40IW_CQPHC_SQSIZE_MASK (0xfUL << I40IW_CQPHC_SQSIZE_SHIFT)
-
-#define I40IW_CQPHC_DISABLE_PFPDUS_SHIFT 1
-#define I40IW_CQPHC_DISABLE_PFPDUS_MASK (0x1UL << I40IW_CQPHC_DISABLE_PFPDUS_SHIFT)
-
-#define I40IW_CQPHC_ENABLED_VFS_SHIFT 32
-#define I40IW_CQPHC_ENABLED_VFS_MASK (0x3fULL << I40IW_CQPHC_ENABLED_VFS_SHIFT)
-
-#define I40IW_CQPHC_HMC_PROFILE_SHIFT 0
-#define I40IW_CQPHC_HMC_PROFILE_MASK (0x7ULL << I40IW_CQPHC_HMC_PROFILE_SHIFT)
-
-#define I40IW_CQPHC_SVER_SHIFT 24
-#define I40IW_CQPHC_SVER_MASK (0xffUL << I40IW_CQPHC_SVER_SHIFT)
-
-#define I40IW_CQPHC_SQBASE_SHIFT 9
-#define I40IW_CQPHC_SQBASE_MASK \
- (0xfffffffffffffeULL << I40IW_CQPHC_SQBASE_SHIFT)
-
-#define I40IW_CQPHC_QPCTX_SHIFT 0
-#define I40IW_CQPHC_QPCTX_MASK \
- (0xffffffffffffffffULL << I40IW_CQPHC_QPCTX_SHIFT)
-#define I40IW_CQPHC_SVER 1
-
-#define I40IW_CQP_SW_SQSIZE_4 4
-#define I40IW_CQP_SW_SQSIZE_2048 2048
-
-/* iWARP QP Doorbell shadow area */
-#define I40IW_QP_DBSA_HW_SQ_TAIL_SHIFT 0
-#define I40IW_QP_DBSA_HW_SQ_TAIL_MASK \
- (0x3fffUL << I40IW_QP_DBSA_HW_SQ_TAIL_SHIFT)
-
-/* Completion Queue Doorbell shadow area */
-#define I40IW_CQ_DBSA_CQEIDX_SHIFT 0
-#define I40IW_CQ_DBSA_CQEIDX_MASK (0xfffffUL << I40IW_CQ_DBSA_CQEIDX_SHIFT)
-
-#define I40IW_CQ_DBSA_SW_CQ_SELECT_SHIFT 0
-#define I40IW_CQ_DBSA_SW_CQ_SELECT_MASK \
- (0x3fffUL << I40IW_CQ_DBSA_SW_CQ_SELECT_SHIFT)
-
-#define I40IW_CQ_DBSA_ARM_NEXT_SHIFT 14
-#define I40IW_CQ_DBSA_ARM_NEXT_MASK (1UL << I40IW_CQ_DBSA_ARM_NEXT_SHIFT)
-
-#define I40IW_CQ_DBSA_ARM_NEXT_SE_SHIFT 15
-#define I40IW_CQ_DBSA_ARM_NEXT_SE_MASK (1UL << I40IW_CQ_DBSA_ARM_NEXT_SE_SHIFT)
-
-#define I40IW_CQ_DBSA_ARM_SEQ_NUM_SHIFT 16
-#define I40IW_CQ_DBSA_ARM_SEQ_NUM_MASK \
- (0x3UL << I40IW_CQ_DBSA_ARM_SEQ_NUM_SHIFT)
-
-/* CQP and iWARP Completion Queue */
-#define I40IW_CQ_QPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQ_QPCTX_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CCQ_OPRETVAL_SHIFT 0
-#define I40IW_CCQ_OPRETVAL_MASK (0xffffffffUL << I40IW_CCQ_OPRETVAL_SHIFT)
-
-#define I40IW_CQ_MINERR_SHIFT 0
-#define I40IW_CQ_MINERR_MASK (0xffffUL << I40IW_CQ_MINERR_SHIFT)
-
-#define I40IW_CQ_MAJERR_SHIFT 16
-#define I40IW_CQ_MAJERR_MASK (0xffffUL << I40IW_CQ_MAJERR_SHIFT)
-
-#define I40IW_CQ_WQEIDX_SHIFT 32
-#define I40IW_CQ_WQEIDX_MASK (0x3fffULL << I40IW_CQ_WQEIDX_SHIFT)
-
-#define I40IW_CQ_ERROR_SHIFT 55
-#define I40IW_CQ_ERROR_MASK (1ULL << I40IW_CQ_ERROR_SHIFT)
-
-#define I40IW_CQ_SQ_SHIFT 62
-#define I40IW_CQ_SQ_MASK (1ULL << I40IW_CQ_SQ_SHIFT)
-
-#define I40IW_CQ_VALID_SHIFT 63
-#define I40IW_CQ_VALID_MASK (1ULL << I40IW_CQ_VALID_SHIFT)
-
-#define I40IWCQ_PAYLDLEN_SHIFT 0
-#define I40IWCQ_PAYLDLEN_MASK (0xffffffffUL << I40IWCQ_PAYLDLEN_SHIFT)
-
-#define I40IWCQ_TCPSEQNUM_SHIFT 32
-#define I40IWCQ_TCPSEQNUM_MASK (0xffffffffULL << I40IWCQ_TCPSEQNUM_SHIFT)
-
-#define I40IWCQ_INVSTAG_SHIFT 0
-#define I40IWCQ_INVSTAG_MASK (0xffffffffUL << I40IWCQ_INVSTAG_SHIFT)
-
-#define I40IWCQ_QPID_SHIFT 32
-#define I40IWCQ_QPID_MASK (0x3ffffULL << I40IWCQ_QPID_SHIFT)
-
-#define I40IWCQ_PSHDROP_SHIFT 51
-#define I40IWCQ_PSHDROP_MASK (1ULL << I40IWCQ_PSHDROP_SHIFT)
-
-#define I40IWCQ_SRQ_SHIFT 52
-#define I40IWCQ_SRQ_MASK (1ULL << I40IWCQ_SRQ_SHIFT)
-
-#define I40IWCQ_STAG_SHIFT 53
-#define I40IWCQ_STAG_MASK (1ULL << I40IWCQ_STAG_SHIFT)
-
-#define I40IWCQ_SOEVENT_SHIFT 54
-#define I40IWCQ_SOEVENT_MASK (1ULL << I40IWCQ_SOEVENT_SHIFT)
-
-#define I40IWCQ_OP_SHIFT 56
-#define I40IWCQ_OP_MASK (0x3fULL << I40IWCQ_OP_SHIFT)
-
-/* CEQE format */
-#define I40IW_CEQE_CQCTX_SHIFT 0
-#define I40IW_CEQE_CQCTX_MASK \
- (0x7fffffffffffffffULL << I40IW_CEQE_CQCTX_SHIFT)
-
-#define I40IW_CEQE_VALID_SHIFT 63
-#define I40IW_CEQE_VALID_MASK (1ULL << I40IW_CEQE_VALID_SHIFT)
-
-/* AEQE format */
-#define I40IW_AEQE_COMPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_AEQE_COMPCTX_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_AEQE_QPCQID_SHIFT 0
-#define I40IW_AEQE_QPCQID_MASK (0x3ffffUL << I40IW_AEQE_QPCQID_SHIFT)
-
-#define I40IW_AEQE_WQDESCIDX_SHIFT 18
-#define I40IW_AEQE_WQDESCIDX_MASK (0x3fffULL << I40IW_AEQE_WQDESCIDX_SHIFT)
-
-#define I40IW_AEQE_OVERFLOW_SHIFT 33
-#define I40IW_AEQE_OVERFLOW_MASK (1ULL << I40IW_AEQE_OVERFLOW_SHIFT)
-
-#define I40IW_AEQE_AECODE_SHIFT 34
-#define I40IW_AEQE_AECODE_MASK (0xffffULL << I40IW_AEQE_AECODE_SHIFT)
-
-#define I40IW_AEQE_AESRC_SHIFT 50
-#define I40IW_AEQE_AESRC_MASK (0xfULL << I40IW_AEQE_AESRC_SHIFT)
-
-#define I40IW_AEQE_IWSTATE_SHIFT 54
-#define I40IW_AEQE_IWSTATE_MASK (0x7ULL << I40IW_AEQE_IWSTATE_SHIFT)
-
-#define I40IW_AEQE_TCPSTATE_SHIFT 57
-#define I40IW_AEQE_TCPSTATE_MASK (0xfULL << I40IW_AEQE_TCPSTATE_SHIFT)
-
-#define I40IW_AEQE_Q2DATA_SHIFT 61
-#define I40IW_AEQE_Q2DATA_MASK (0x3ULL << I40IW_AEQE_Q2DATA_SHIFT)
-
-#define I40IW_AEQE_VALID_SHIFT 63
-#define I40IW_AEQE_VALID_MASK (1ULL << I40IW_AEQE_VALID_SHIFT)
-
-/* CQP SQ WQES */
-#define I40IW_QP_TYPE_IWARP 1
-#define I40IW_QP_TYPE_UDA 2
-#define I40IW_QP_TYPE_CQP 4
-
-#define I40IW_CQ_TYPE_IWARP 1
-#define I40IW_CQ_TYPE_ILQ 2
-#define I40IW_CQ_TYPE_IEQ 3
-#define I40IW_CQ_TYPE_CQP 4
-
-#define I40IWQP_TERM_SEND_TERM_AND_FIN 0
-#define I40IWQP_TERM_SEND_TERM_ONLY 1
-#define I40IWQP_TERM_SEND_FIN_ONLY 2
-#define I40IWQP_TERM_DONOT_SEND_TERM_OR_FIN 3
-
-#define I40IW_CQP_OP_CREATE_QP 0
-#define I40IW_CQP_OP_MODIFY_QP 0x1
-#define I40IW_CQP_OP_DESTROY_QP 0x02
-#define I40IW_CQP_OP_CREATE_CQ 0x03
-#define I40IW_CQP_OP_MODIFY_CQ 0x04
-#define I40IW_CQP_OP_DESTROY_CQ 0x05
-#define I40IW_CQP_OP_CREATE_SRQ 0x06
-#define I40IW_CQP_OP_MODIFY_SRQ 0x07
-#define I40IW_CQP_OP_DESTROY_SRQ 0x08
-#define I40IW_CQP_OP_ALLOC_STAG 0x09
-#define I40IW_CQP_OP_REG_MR 0x0a
-#define I40IW_CQP_OP_QUERY_STAG 0x0b
-#define I40IW_CQP_OP_REG_SMR 0x0c
-#define I40IW_CQP_OP_DEALLOC_STAG 0x0d
-#define I40IW_CQP_OP_MANAGE_LOC_MAC_IP_TABLE 0x0e
-#define I40IW_CQP_OP_MANAGE_ARP 0x0f
-#define I40IW_CQP_OP_MANAGE_VF_PBLE_BP 0x10
-#define I40IW_CQP_OP_MANAGE_PUSH_PAGES 0x11
-#define I40IW_CQP_OP_MANAGE_PE_TEAM 0x12
-#define I40IW_CQP_OP_UPLOAD_CONTEXT 0x13
-#define I40IW_CQP_OP_ALLOCATE_LOC_MAC_IP_TABLE_ENTRY 0x14
-#define I40IW_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE 0x15
-#define I40IW_CQP_OP_CREATE_CEQ 0x16
-#define I40IW_CQP_OP_DESTROY_CEQ 0x18
-#define I40IW_CQP_OP_CREATE_AEQ 0x19
-#define I40IW_CQP_OP_DESTROY_AEQ 0x1b
-#define I40IW_CQP_OP_CREATE_ADDR_VECT 0x1c
-#define I40IW_CQP_OP_MODIFY_ADDR_VECT 0x1d
-#define I40IW_CQP_OP_DESTROY_ADDR_VECT 0x1e
-#define I40IW_CQP_OP_UPDATE_PE_SDS 0x1f
-#define I40IW_CQP_OP_QUERY_FPM_VALUES 0x20
-#define I40IW_CQP_OP_COMMIT_FPM_VALUES 0x21
-#define I40IW_CQP_OP_FLUSH_WQES 0x22
-#define I40IW_CQP_OP_MANAGE_APBVT 0x23
-#define I40IW_CQP_OP_NOP 0x24
-#define I40IW_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY 0x25
-#define I40IW_CQP_OP_CREATE_UDA_MCAST_GROUP 0x26
-#define I40IW_CQP_OP_MODIFY_UDA_MCAST_GROUP 0x27
-#define I40IW_CQP_OP_DESTROY_UDA_MCAST_GROUP 0x28
-#define I40IW_CQP_OP_SUSPEND_QP 0x29
-#define I40IW_CQP_OP_RESUME_QP 0x2a
-#define I40IW_CQP_OP_SHMC_PAGES_ALLOCATED 0x2b
-#define I40IW_CQP_OP_SET_HMC_RESOURCE_PROFILE 0x2d
-
-#define I40IW_UDA_QPSQ_NEXT_HEADER_SHIFT 16
-#define I40IW_UDA_QPSQ_NEXT_HEADER_MASK ((u64)0xff << I40IW_UDA_QPSQ_NEXT_HEADER_SHIFT)
-
-#define I40IW_UDA_QPSQ_OPCODE_SHIFT 32
-#define I40IW_UDA_QPSQ_OPCODE_MASK ((u64)0x3f << I40IW_UDA_QPSQ_OPCODE_SHIFT)
-
-#define I40IW_UDA_QPSQ_MACLEN_SHIFT 56
-#define I40IW_UDA_QPSQ_MACLEN_MASK \
- ((u64)0x7f << I40IW_UDA_QPSQ_MACLEN_SHIFT)
-
-#define I40IW_UDA_QPSQ_IPLEN_SHIFT 48
-#define I40IW_UDA_QPSQ_IPLEN_MASK \
- ((u64)0x7f << I40IW_UDA_QPSQ_IPLEN_SHIFT)
-
-#define I40IW_UDA_QPSQ_L4T_SHIFT 30
-#define I40IW_UDA_QPSQ_L4T_MASK \
- ((u64)0x3 << I40IW_UDA_QPSQ_L4T_SHIFT)
-
-#define I40IW_UDA_QPSQ_IIPT_SHIFT 28
-#define I40IW_UDA_QPSQ_IIPT_MASK \
- ((u64)0x3 << I40IW_UDA_QPSQ_IIPT_SHIFT)
-
-#define I40IW_UDA_QPSQ_L4LEN_SHIFT 24
-#define I40IW_UDA_QPSQ_L4LEN_MASK ((u64)0xf << I40IW_UDA_QPSQ_L4LEN_SHIFT)
-
-#define I40IW_UDA_QPSQ_AVIDX_SHIFT 0
-#define I40IW_UDA_QPSQ_AVIDX_MASK ((u64)0xffff << I40IW_UDA_QPSQ_AVIDX_SHIFT)
-
-#define I40IW_UDA_QPSQ_VALID_SHIFT 63
-#define I40IW_UDA_QPSQ_VALID_MASK \
- ((u64)0x1 << I40IW_UDA_QPSQ_VALID_SHIFT)
-
-#define I40IW_UDA_QPSQ_SIGCOMPL_SHIFT 62
-#define I40IW_UDA_QPSQ_SIGCOMPL_MASK ((u64)0x1 << I40IW_UDA_QPSQ_SIGCOMPL_SHIFT)
-
-#define I40IW_UDA_PAYLOADLEN_SHIFT 0
-#define I40IW_UDA_PAYLOADLEN_MASK ((u64)0x3fff << I40IW_UDA_PAYLOADLEN_SHIFT)
-
-#define I40IW_UDA_HDRLEN_SHIFT 16
-#define I40IW_UDA_HDRLEN_MASK ((u64)0x1ff << I40IW_UDA_HDRLEN_SHIFT)
-
-#define I40IW_VLAN_TAG_VALID_SHIFT 50
-#define I40IW_VLAN_TAG_VALID_MASK ((u64)0x1 << I40IW_VLAN_TAG_VALID_SHIFT)
-
-#define I40IW_UDA_L3PROTO_SHIFT 0
-#define I40IW_UDA_L3PROTO_MASK ((u64)0x3 << I40IW_UDA_L3PROTO_SHIFT)
-
-#define I40IW_UDA_L4PROTO_SHIFT 16
-#define I40IW_UDA_L4PROTO_MASK ((u64)0x3 << I40IW_UDA_L4PROTO_SHIFT)
-
-#define I40IW_UDA_QPSQ_DOLOOPBACK_SHIFT 44
-#define I40IW_UDA_QPSQ_DOLOOPBACK_MASK \
- ((u64)0x1 << I40IW_UDA_QPSQ_DOLOOPBACK_SHIFT)
-
-/* CQP SQ WQE common fields */
-#define I40IW_CQPSQ_OPCODE_SHIFT 32
-#define I40IW_CQPSQ_OPCODE_MASK (0x3fULL << I40IW_CQPSQ_OPCODE_SHIFT)
-
-#define I40IW_CQPSQ_WQEVALID_SHIFT 63
-#define I40IW_CQPSQ_WQEVALID_MASK (1ULL << I40IW_CQPSQ_WQEVALID_SHIFT)
-
-#define I40IW_CQPSQ_TPHVAL_SHIFT 0
-#define I40IW_CQPSQ_TPHVAL_MASK (0xffUL << I40IW_CQPSQ_TPHVAL_SHIFT)
-
-#define I40IW_CQPSQ_TPHEN_SHIFT 60
-#define I40IW_CQPSQ_TPHEN_MASK (1ULL << I40IW_CQPSQ_TPHEN_SHIFT)
-
-#define I40IW_CQPSQ_PBUFADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_PBUFADDR_MASK I40IW_CQPHC_QPCTX_MASK
-
-/* Create/Modify/Destroy QP */
-
-#define I40IW_CQPSQ_QP_NEWMSS_SHIFT 32
-#define I40IW_CQPSQ_QP_NEWMSS_MASK (0x3fffULL << I40IW_CQPSQ_QP_NEWMSS_SHIFT)
-
-#define I40IW_CQPSQ_QP_TERMLEN_SHIFT 48
-#define I40IW_CQPSQ_QP_TERMLEN_MASK (0xfULL << I40IW_CQPSQ_QP_TERMLEN_SHIFT)
-
-#define I40IW_CQPSQ_QP_QPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_QP_QPCTX_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CQPSQ_QP_QPID_SHIFT 0
-#define I40IW_CQPSQ_QP_QPID_MASK (0x3FFFFUL)
-/* I40IWCQ_QPID_MASK */
-
-#define I40IW_CQPSQ_QP_OP_SHIFT 32
-#define I40IW_CQPSQ_QP_OP_MASK I40IWCQ_OP_MASK
-
-#define I40IW_CQPSQ_QP_ORDVALID_SHIFT 42
-#define I40IW_CQPSQ_QP_ORDVALID_MASK (1ULL << I40IW_CQPSQ_QP_ORDVALID_SHIFT)
-
-#define I40IW_CQPSQ_QP_TOECTXVALID_SHIFT 43
-#define I40IW_CQPSQ_QP_TOECTXVALID_MASK \
- (1ULL << I40IW_CQPSQ_QP_TOECTXVALID_SHIFT)
-
-#define I40IW_CQPSQ_QP_CACHEDVARVALID_SHIFT 44
-#define I40IW_CQPSQ_QP_CACHEDVARVALID_MASK \
- (1ULL << I40IW_CQPSQ_QP_CACHEDVARVALID_SHIFT)
-
-#define I40IW_CQPSQ_QP_VQ_SHIFT 45
-#define I40IW_CQPSQ_QP_VQ_MASK (1ULL << I40IW_CQPSQ_QP_VQ_SHIFT)
-
-#define I40IW_CQPSQ_QP_FORCELOOPBACK_SHIFT 46
-#define I40IW_CQPSQ_QP_FORCELOOPBACK_MASK \
- (1ULL << I40IW_CQPSQ_QP_FORCELOOPBACK_SHIFT)
-
-#define I40IW_CQPSQ_QP_CQNUMVALID_SHIFT 47
-#define I40IW_CQPSQ_QP_CQNUMVALID_MASK \
- (1ULL << I40IW_CQPSQ_QP_CQNUMVALID_SHIFT)
-
-#define I40IW_CQPSQ_QP_QPTYPE_SHIFT 48
-#define I40IW_CQPSQ_QP_QPTYPE_MASK (0x3ULL << I40IW_CQPSQ_QP_QPTYPE_SHIFT)
-
-#define I40IW_CQPSQ_QP_MSSCHANGE_SHIFT 52
-#define I40IW_CQPSQ_QP_MSSCHANGE_MASK (1ULL << I40IW_CQPSQ_QP_MSSCHANGE_SHIFT)
-
-#define I40IW_CQPSQ_QP_STATRSRC_SHIFT 53
-#define I40IW_CQPSQ_QP_STATRSRC_MASK (1ULL << I40IW_CQPSQ_QP_STATRSRC_SHIFT)
-
-#define I40IW_CQPSQ_QP_IGNOREMWBOUND_SHIFT 54
-#define I40IW_CQPSQ_QP_IGNOREMWBOUND_MASK \
- (1ULL << I40IW_CQPSQ_QP_IGNOREMWBOUND_SHIFT)
-
-#define I40IW_CQPSQ_QP_REMOVEHASHENTRY_SHIFT 55
-#define I40IW_CQPSQ_QP_REMOVEHASHENTRY_MASK \
- (1ULL << I40IW_CQPSQ_QP_REMOVEHASHENTRY_SHIFT)
-
-#define I40IW_CQPSQ_QP_TERMACT_SHIFT 56
-#define I40IW_CQPSQ_QP_TERMACT_MASK (0x3ULL << I40IW_CQPSQ_QP_TERMACT_SHIFT)
-
-#define I40IW_CQPSQ_QP_RESETCON_SHIFT 58
-#define I40IW_CQPSQ_QP_RESETCON_MASK (1ULL << I40IW_CQPSQ_QP_RESETCON_SHIFT)
-
-#define I40IW_CQPSQ_QP_ARPTABIDXVALID_SHIFT 59
-#define I40IW_CQPSQ_QP_ARPTABIDXVALID_MASK \
- (1ULL << I40IW_CQPSQ_QP_ARPTABIDXVALID_SHIFT)
-
-#define I40IW_CQPSQ_QP_NEXTIWSTATE_SHIFT 60
-#define I40IW_CQPSQ_QP_NEXTIWSTATE_MASK \
- (0x7ULL << I40IW_CQPSQ_QP_NEXTIWSTATE_SHIFT)
-
-#define I40IW_CQPSQ_QP_DBSHADOWADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_QP_DBSHADOWADDR_MASK I40IW_CQPHC_QPCTX_MASK
-
-/* Create/Modify/Destroy CQ */
-#define I40IW_CQPSQ_CQ_CQSIZE_SHIFT 0
-#define I40IW_CQPSQ_CQ_CQSIZE_MASK (0x3ffffUL << I40IW_CQPSQ_CQ_CQSIZE_SHIFT)
-
-#define I40IW_CQPSQ_CQ_CQCTX_SHIFT 0
-#define I40IW_CQPSQ_CQ_CQCTX_MASK \
- (0x7fffffffffffffffULL << I40IW_CQPSQ_CQ_CQCTX_SHIFT)
-
-#define I40IW_CQPSQ_CQ_CQCTX_SHIFT 0
-#define I40IW_CQPSQ_CQ_CQCTX_MASK \
- (0x7fffffffffffffffULL << I40IW_CQPSQ_CQ_CQCTX_SHIFT)
-
-#define I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD_SHIFT 0
-#define I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD_MASK \
- (0x3ffff << I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD_SHIFT)
-
-#define I40IW_CQPSQ_CQ_CEQID_SHIFT 24
-#define I40IW_CQPSQ_CQ_CEQID_MASK (0x7fUL << I40IW_CQPSQ_CQ_CEQID_SHIFT)
-
-#define I40IW_CQPSQ_CQ_OP_SHIFT 32
-#define I40IW_CQPSQ_CQ_OP_MASK (0x3fULL << I40IW_CQPSQ_CQ_OP_SHIFT)
-
-#define I40IW_CQPSQ_CQ_CQRESIZE_SHIFT 43
-#define I40IW_CQPSQ_CQ_CQRESIZE_MASK (1ULL << I40IW_CQPSQ_CQ_CQRESIZE_SHIFT)
-
-#define I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT 44
-#define I40IW_CQPSQ_CQ_LPBLSIZE_MASK (3ULL << I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT)
-
-#define I40IW_CQPSQ_CQ_CHKOVERFLOW_SHIFT 46
-#define I40IW_CQPSQ_CQ_CHKOVERFLOW_MASK \
- (1ULL << I40IW_CQPSQ_CQ_CHKOVERFLOW_SHIFT)
-
-#define I40IW_CQPSQ_CQ_VIRTMAP_SHIFT 47
-#define I40IW_CQPSQ_CQ_VIRTMAP_MASK (1ULL << I40IW_CQPSQ_CQ_VIRTMAP_SHIFT)
-
-#define I40IW_CQPSQ_CQ_ENCEQEMASK_SHIFT 48
-#define I40IW_CQPSQ_CQ_ENCEQEMASK_MASK \
- (1ULL << I40IW_CQPSQ_CQ_ENCEQEMASK_SHIFT)
-
-#define I40IW_CQPSQ_CQ_CEQIDVALID_SHIFT 49
-#define I40IW_CQPSQ_CQ_CEQIDVALID_MASK \
- (1ULL << I40IW_CQPSQ_CQ_CEQIDVALID_SHIFT)
-
-#define I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT_SHIFT 61
-#define I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT_MASK \
- (1ULL << I40IW_CQPSQ_CQ_AVOIDMEMCNFLCT_SHIFT)
-
-/* Create/Modify/Destroy Shared Receive Queue */
-
-#define I40IW_CQPSQ_SRQ_RQSIZE_SHIFT 0
-#define I40IW_CQPSQ_SRQ_RQSIZE_MASK (0xfUL << I40IW_CQPSQ_SRQ_RQSIZE_SHIFT)
-
-#define I40IW_CQPSQ_SRQ_RQWQESIZE_SHIFT 4
-#define I40IW_CQPSQ_SRQ_RQWQESIZE_MASK \
- (0x7UL << I40IW_CQPSQ_SRQ_RQWQESIZE_SHIFT)
-
-#define I40IW_CQPSQ_SRQ_SRQLIMIT_SHIFT 32
-#define I40IW_CQPSQ_SRQ_SRQLIMIT_MASK \
- (0xfffULL << I40IW_CQPSQ_SRQ_SRQLIMIT_SHIFT)
-
-#define I40IW_CQPSQ_SRQ_SRQCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_SRQ_SRQCTX_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CQPSQ_SRQ_PDID_SHIFT 16
-#define I40IW_CQPSQ_SRQ_PDID_MASK \
- (0x7fffULL << I40IW_CQPSQ_SRQ_PDID_SHIFT)
-
-#define I40IW_CQPSQ_SRQ_SRQID_SHIFT 0
-#define I40IW_CQPSQ_SRQ_SRQID_MASK (0x7fffUL << I40IW_CQPSQ_SRQ_SRQID_SHIFT)
-
-#define I40IW_CQPSQ_SRQ_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
-#define I40IW_CQPSQ_SRQ_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
-
-#define I40IW_CQPSQ_SRQ_VIRTMAP_SHIFT I40IW_CQPSQ_CQ_VIRTMAP_SHIFT
-#define I40IW_CQPSQ_SRQ_VIRTMAP_MASK I40IW_CQPSQ_CQ_VIRTMAP_MASK
-
-#define I40IW_CQPSQ_SRQ_TPHEN_SHIFT I40IW_CQPSQ_TPHEN_SHIFT
-#define I40IW_CQPSQ_SRQ_TPHEN_MASK I40IW_CQPSQ_TPHEN_MASK
-
-#define I40IW_CQPSQ_SRQ_ARMLIMITEVENT_SHIFT 61
-#define I40IW_CQPSQ_SRQ_ARMLIMITEVENT_MASK \
- (1ULL << I40IW_CQPSQ_SRQ_ARMLIMITEVENT_SHIFT)
-
-#define I40IW_CQPSQ_SRQ_DBSHADOWAREA_SHIFT 6
-#define I40IW_CQPSQ_SRQ_DBSHADOWAREA_MASK \
- (0x3ffffffffffffffULL << I40IW_CQPSQ_SRQ_DBSHADOWAREA_SHIFT)
-
-#define I40IW_CQPSQ_SRQ_FIRSTPMPBLIDX_SHIFT 0
-#define I40IW_CQPSQ_SRQ_FIRSTPMPBLIDX_MASK \
- (0xfffffffUL << I40IW_CQPSQ_SRQ_FIRSTPMPBLIDX_SHIFT)
-
-/* Allocate/Register/Register Shared/Deallocate Stag */
-#define I40IW_CQPSQ_STAG_VA_FBO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_STAG_VA_FBO_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CQPSQ_STAG_STAGLEN_SHIFT 0
-#define I40IW_CQPSQ_STAG_STAGLEN_MASK \
- (0x3fffffffffffULL << I40IW_CQPSQ_STAG_STAGLEN_SHIFT)
-
-#define I40IW_CQPSQ_STAG_PDID_SHIFT 48
-#define I40IW_CQPSQ_STAG_PDID_MASK (0x7fffULL << I40IW_CQPSQ_STAG_PDID_SHIFT)
-
-#define I40IW_CQPSQ_STAG_KEY_SHIFT 0
-#define I40IW_CQPSQ_STAG_KEY_MASK (0xffUL << I40IW_CQPSQ_STAG_KEY_SHIFT)
-
-#define I40IW_CQPSQ_STAG_IDX_SHIFT 8
-#define I40IW_CQPSQ_STAG_IDX_MASK (0xffffffUL << I40IW_CQPSQ_STAG_IDX_SHIFT)
-
-#define I40IW_CQPSQ_STAG_PARENTSTAGIDX_SHIFT 32
-#define I40IW_CQPSQ_STAG_PARENTSTAGIDX_MASK \
- (0xffffffULL << I40IW_CQPSQ_STAG_PARENTSTAGIDX_SHIFT)
-
-#define I40IW_CQPSQ_STAG_MR_SHIFT 43
-#define I40IW_CQPSQ_STAG_MR_MASK (1ULL << I40IW_CQPSQ_STAG_MR_SHIFT)
-
-#define I40IW_CQPSQ_STAG_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
-#define I40IW_CQPSQ_STAG_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
-
-#define I40IW_CQPSQ_STAG_HPAGESIZE_SHIFT 46
-#define I40IW_CQPSQ_STAG_HPAGESIZE_MASK \
- (1ULL << I40IW_CQPSQ_STAG_HPAGESIZE_SHIFT)
-
-#define I40IW_CQPSQ_STAG_ARIGHTS_SHIFT 48
-#define I40IW_CQPSQ_STAG_ARIGHTS_MASK \
- (0x1fULL << I40IW_CQPSQ_STAG_ARIGHTS_SHIFT)
-
-#define I40IW_CQPSQ_STAG_REMACCENABLED_SHIFT 53
-#define I40IW_CQPSQ_STAG_REMACCENABLED_MASK \
- (1ULL << I40IW_CQPSQ_STAG_REMACCENABLED_SHIFT)
-
-#define I40IW_CQPSQ_STAG_VABASEDTO_SHIFT 59
-#define I40IW_CQPSQ_STAG_VABASEDTO_MASK \
- (1ULL << I40IW_CQPSQ_STAG_VABASEDTO_SHIFT)
-
-#define I40IW_CQPSQ_STAG_USEHMCFNIDX_SHIFT 60
-#define I40IW_CQPSQ_STAG_USEHMCFNIDX_MASK \
- (1ULL << I40IW_CQPSQ_STAG_USEHMCFNIDX_SHIFT)
-
-#define I40IW_CQPSQ_STAG_USEPFRID_SHIFT 61
-#define I40IW_CQPSQ_STAG_USEPFRID_MASK \
- (1ULL << I40IW_CQPSQ_STAG_USEPFRID_SHIFT)
-
-#define I40IW_CQPSQ_STAG_PBA_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_STAG_PBA_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CQPSQ_STAG_HMCFNIDX_SHIFT 0
-#define I40IW_CQPSQ_STAG_HMCFNIDX_MASK \
- (0x3fUL << I40IW_CQPSQ_STAG_HMCFNIDX_SHIFT)
-
-#define I40IW_CQPSQ_STAG_FIRSTPMPBLIDX_SHIFT 0
-#define I40IW_CQPSQ_STAG_FIRSTPMPBLIDX_MASK \
- (0xfffffffUL << I40IW_CQPSQ_STAG_FIRSTPMPBLIDX_SHIFT)
-
-/* Query stag */
-#define I40IW_CQPSQ_QUERYSTAG_IDX_SHIFT I40IW_CQPSQ_STAG_IDX_SHIFT
-#define I40IW_CQPSQ_QUERYSTAG_IDX_MASK I40IW_CQPSQ_STAG_IDX_MASK
-
-/* Allocate Local IP Address Entry */
-
-/* Manage Local IP Address Table - MLIPA */
-#define I40IW_CQPSQ_MLIPA_IPV6LO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_MLIPA_IPV6LO_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CQPSQ_MLIPA_IPV6HI_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_MLIPA_IPV6HI_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CQPSQ_MLIPA_IPV4_SHIFT 0
-#define I40IW_CQPSQ_MLIPA_IPV4_MASK \
- (0xffffffffUL << I40IW_CQPSQ_MLIPA_IPV4_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_IPTABLEIDX_SHIFT 0
-#define I40IW_CQPSQ_MLIPA_IPTABLEIDX_MASK \
- (0x3fUL << I40IW_CQPSQ_MLIPA_IPTABLEIDX_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_IPV4VALID_SHIFT 42
-#define I40IW_CQPSQ_MLIPA_IPV4VALID_MASK \
- (1ULL << I40IW_CQPSQ_MLIPA_IPV4VALID_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_IPV6VALID_SHIFT 43
-#define I40IW_CQPSQ_MLIPA_IPV6VALID_MASK \
- (1ULL << I40IW_CQPSQ_MLIPA_IPV6VALID_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_FREEENTRY_SHIFT 62
-#define I40IW_CQPSQ_MLIPA_FREEENTRY_MASK \
- (1ULL << I40IW_CQPSQ_MLIPA_FREEENTRY_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT_SHIFT 61
-#define I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT_MASK \
- (1ULL << I40IW_CQPSQ_MLIPA_IGNORE_REF_CNT_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_MAC0_SHIFT 0
-#define I40IW_CQPSQ_MLIPA_MAC0_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC0_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_MAC1_SHIFT 8
-#define I40IW_CQPSQ_MLIPA_MAC1_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC1_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_MAC2_SHIFT 16
-#define I40IW_CQPSQ_MLIPA_MAC2_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC2_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_MAC3_SHIFT 24
-#define I40IW_CQPSQ_MLIPA_MAC3_MASK (0xffUL << I40IW_CQPSQ_MLIPA_MAC3_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_MAC4_SHIFT 32
-#define I40IW_CQPSQ_MLIPA_MAC4_MASK (0xffULL << I40IW_CQPSQ_MLIPA_MAC4_SHIFT)
-
-#define I40IW_CQPSQ_MLIPA_MAC5_SHIFT 40
-#define I40IW_CQPSQ_MLIPA_MAC5_MASK (0xffULL << I40IW_CQPSQ_MLIPA_MAC5_SHIFT)
-
-/* Manage ARP Table - MAT */
-#define I40IW_CQPSQ_MAT_REACHMAX_SHIFT 0
-#define I40IW_CQPSQ_MAT_REACHMAX_MASK \
- (0xffffffffUL << I40IW_CQPSQ_MAT_REACHMAX_SHIFT)
-
-#define I40IW_CQPSQ_MAT_MACADDR_SHIFT 0
-#define I40IW_CQPSQ_MAT_MACADDR_MASK \
- (0xffffffffffffULL << I40IW_CQPSQ_MAT_MACADDR_SHIFT)
-
-#define I40IW_CQPSQ_MAT_ARPENTRYIDX_SHIFT 0
-#define I40IW_CQPSQ_MAT_ARPENTRYIDX_MASK \
- (0xfffUL << I40IW_CQPSQ_MAT_ARPENTRYIDX_SHIFT)
-
-#define I40IW_CQPSQ_MAT_ENTRYVALID_SHIFT 42
-#define I40IW_CQPSQ_MAT_ENTRYVALID_MASK \
- (1ULL << I40IW_CQPSQ_MAT_ENTRYVALID_SHIFT)
-
-#define I40IW_CQPSQ_MAT_PERMANENT_SHIFT 43
-#define I40IW_CQPSQ_MAT_PERMANENT_MASK \
- (1ULL << I40IW_CQPSQ_MAT_PERMANENT_SHIFT)
-
-#define I40IW_CQPSQ_MAT_QUERY_SHIFT 44
-#define I40IW_CQPSQ_MAT_QUERY_MASK (1ULL << I40IW_CQPSQ_MAT_QUERY_SHIFT)
-
-/* Manage VF PBLE Backing Pages - MVPBP*/
-#define I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT_SHIFT 0
-#define I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT_MASK \
- (0x3ffULL << I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT_SHIFT)
-
-#define I40IW_CQPSQ_MVPBP_FIRST_PD_INX_SHIFT 16
-#define I40IW_CQPSQ_MVPBP_FIRST_PD_INX_MASK \
- (0x1ffULL << I40IW_CQPSQ_MVPBP_FIRST_PD_INX_SHIFT)
-
-#define I40IW_CQPSQ_MVPBP_SD_INX_SHIFT 32
-#define I40IW_CQPSQ_MVPBP_SD_INX_MASK \
- (0xfffULL << I40IW_CQPSQ_MVPBP_SD_INX_SHIFT)
-
-#define I40IW_CQPSQ_MVPBP_INV_PD_ENT_SHIFT 62
-#define I40IW_CQPSQ_MVPBP_INV_PD_ENT_MASK \
- (0x1ULL << I40IW_CQPSQ_MVPBP_INV_PD_ENT_SHIFT)
-
-#define I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT 3
-#define I40IW_CQPSQ_MVPBP_PD_PLPBA_MASK \
- (0x1fffffffffffffffULL << I40IW_CQPSQ_MVPBP_PD_PLPBA_SHIFT)
-
-/* Manage Push Page - MPP */
-#define I40IW_INVALID_PUSH_PAGE_INDEX 0xffff
-
-#define I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT 0
-#define I40IW_CQPSQ_MPP_QS_HANDLE_MASK (0xffffUL << \
- I40IW_CQPSQ_MPP_QS_HANDLE_SHIFT)
-
-#define I40IW_CQPSQ_MPP_PPIDX_SHIFT 0
-#define I40IW_CQPSQ_MPP_PPIDX_MASK (0x3ffUL << I40IW_CQPSQ_MPP_PPIDX_SHIFT)
-
-#define I40IW_CQPSQ_MPP_FREE_PAGE_SHIFT 62
-#define I40IW_CQPSQ_MPP_FREE_PAGE_MASK (1ULL << I40IW_CQPSQ_MPP_FREE_PAGE_SHIFT)
-
-/* Upload Context - UCTX */
-#define I40IW_CQPSQ_UCTX_QPCTXADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IW_CQPSQ_UCTX_QPCTXADDR_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IW_CQPSQ_UCTX_QPID_SHIFT 0
-#define I40IW_CQPSQ_UCTX_QPID_MASK (0x3ffffUL << I40IW_CQPSQ_UCTX_QPID_SHIFT)
-
-#define I40IW_CQPSQ_UCTX_QPTYPE_SHIFT 48
-#define I40IW_CQPSQ_UCTX_QPTYPE_MASK (0xfULL << I40IW_CQPSQ_UCTX_QPTYPE_SHIFT)
-
-#define I40IW_CQPSQ_UCTX_RAWFORMAT_SHIFT 61
-#define I40IW_CQPSQ_UCTX_RAWFORMAT_MASK \
- (1ULL << I40IW_CQPSQ_UCTX_RAWFORMAT_SHIFT)
-
-#define I40IW_CQPSQ_UCTX_FREEZEQP_SHIFT 62
-#define I40IW_CQPSQ_UCTX_FREEZEQP_MASK \
- (1ULL << I40IW_CQPSQ_UCTX_FREEZEQP_SHIFT)
-
-/* Manage HMC PM Function Table - MHMC */
-#define I40IW_CQPSQ_MHMC_VFIDX_SHIFT 0
-#define I40IW_CQPSQ_MHMC_VFIDX_MASK (0x7fUL << I40IW_CQPSQ_MHMC_VFIDX_SHIFT)
-
-#define I40IW_CQPSQ_MHMC_FREEPMFN_SHIFT 62
-#define I40IW_CQPSQ_MHMC_FREEPMFN_MASK \
- (1ULL << I40IW_CQPSQ_MHMC_FREEPMFN_SHIFT)
-
-/* Set HMC Resource Profile - SHMCRP */
-#define I40IW_CQPSQ_SHMCRP_HMC_PROFILE_SHIFT 0
-#define I40IW_CQPSQ_SHMCRP_HMC_PROFILE_MASK \
- (0x7ULL << I40IW_CQPSQ_SHMCRP_HMC_PROFILE_SHIFT)
-#define I40IW_CQPSQ_SHMCRP_VFNUM_SHIFT 32
-#define I40IW_CQPSQ_SHMCRP_VFNUM_MASK (0x3fULL << I40IW_CQPSQ_SHMCRP_VFNUM_SHIFT)
-
-/* Create/Destroy CEQ */
-#define I40IW_CQPSQ_CEQ_CEQSIZE_SHIFT 0
-#define I40IW_CQPSQ_CEQ_CEQSIZE_MASK \
- (0x1ffffUL << I40IW_CQPSQ_CEQ_CEQSIZE_SHIFT)
-
-#define I40IW_CQPSQ_CEQ_CEQID_SHIFT 0
-#define I40IW_CQPSQ_CEQ_CEQID_MASK (0x7fUL << I40IW_CQPSQ_CEQ_CEQID_SHIFT)
-
-#define I40IW_CQPSQ_CEQ_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
-#define I40IW_CQPSQ_CEQ_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
-
-#define I40IW_CQPSQ_CEQ_VMAP_SHIFT 47
-#define I40IW_CQPSQ_CEQ_VMAP_MASK (1ULL << I40IW_CQPSQ_CEQ_VMAP_SHIFT)
-
-#define I40IW_CQPSQ_CEQ_FIRSTPMPBLIDX_SHIFT 0
-#define I40IW_CQPSQ_CEQ_FIRSTPMPBLIDX_MASK \
- (0xfffffffUL << I40IW_CQPSQ_CEQ_FIRSTPMPBLIDX_SHIFT)
-
-/* Create/Destroy AEQ */
-#define I40IW_CQPSQ_AEQ_AEQECNT_SHIFT 0
-#define I40IW_CQPSQ_AEQ_AEQECNT_MASK \
- (0x7ffffUL << I40IW_CQPSQ_AEQ_AEQECNT_SHIFT)
-
-#define I40IW_CQPSQ_AEQ_LPBLSIZE_SHIFT I40IW_CQPSQ_CQ_LPBLSIZE_SHIFT
-#define I40IW_CQPSQ_AEQ_LPBLSIZE_MASK I40IW_CQPSQ_CQ_LPBLSIZE_MASK
-
-#define I40IW_CQPSQ_AEQ_VMAP_SHIFT 47
-#define I40IW_CQPSQ_AEQ_VMAP_MASK (1ULL << I40IW_CQPSQ_AEQ_VMAP_SHIFT)
-
-#define I40IW_CQPSQ_AEQ_FIRSTPMPBLIDX_SHIFT 0
-#define I40IW_CQPSQ_AEQ_FIRSTPMPBLIDX_MASK \
- (0xfffffffUL << I40IW_CQPSQ_AEQ_FIRSTPMPBLIDX_SHIFT)
-
-/* Commit FPM Values - CFPM */
-#define I40IW_CQPSQ_CFPM_HMCFNID_SHIFT 0
-#define I40IW_CQPSQ_CFPM_HMCFNID_MASK (0x3fUL << I40IW_CQPSQ_CFPM_HMCFNID_SHIFT)
-
-/* Flush WQEs - FWQE */
-#define I40IW_CQPSQ_FWQE_AECODE_SHIFT 0
-#define I40IW_CQPSQ_FWQE_AECODE_MASK (0xffffUL << I40IW_CQPSQ_FWQE_AECODE_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_AESOURCE_SHIFT 16
-#define I40IW_CQPSQ_FWQE_AESOURCE_MASK \
- (0xfUL << I40IW_CQPSQ_FWQE_AESOURCE_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_RQMNERR_SHIFT 0
-#define I40IW_CQPSQ_FWQE_RQMNERR_MASK \
- (0xffffUL << I40IW_CQPSQ_FWQE_RQMNERR_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_RQMJERR_SHIFT 16
-#define I40IW_CQPSQ_FWQE_RQMJERR_MASK \
- (0xffffUL << I40IW_CQPSQ_FWQE_RQMJERR_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_SQMNERR_SHIFT 32
-#define I40IW_CQPSQ_FWQE_SQMNERR_MASK \
- (0xffffULL << I40IW_CQPSQ_FWQE_SQMNERR_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_SQMJERR_SHIFT 48
-#define I40IW_CQPSQ_FWQE_SQMJERR_MASK \
- (0xffffULL << I40IW_CQPSQ_FWQE_SQMJERR_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_QPID_SHIFT 0
-#define I40IW_CQPSQ_FWQE_QPID_MASK (0x3ffffULL << I40IW_CQPSQ_FWQE_QPID_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_GENERATE_AE_SHIFT 59
-#define I40IW_CQPSQ_FWQE_GENERATE_AE_MASK (1ULL << \
- I40IW_CQPSQ_FWQE_GENERATE_AE_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_USERFLCODE_SHIFT 60
-#define I40IW_CQPSQ_FWQE_USERFLCODE_MASK \
- (1ULL << I40IW_CQPSQ_FWQE_USERFLCODE_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_FLUSHSQ_SHIFT 61
-#define I40IW_CQPSQ_FWQE_FLUSHSQ_MASK (1ULL << I40IW_CQPSQ_FWQE_FLUSHSQ_SHIFT)
-
-#define I40IW_CQPSQ_FWQE_FLUSHRQ_SHIFT 62
-#define I40IW_CQPSQ_FWQE_FLUSHRQ_MASK (1ULL << I40IW_CQPSQ_FWQE_FLUSHRQ_SHIFT)
-
-/* Manage Accelerated Port Table - MAPT */
-#define I40IW_CQPSQ_MAPT_PORT_SHIFT 0
-#define I40IW_CQPSQ_MAPT_PORT_MASK (0xffffUL << I40IW_CQPSQ_MAPT_PORT_SHIFT)
-
-#define I40IW_CQPSQ_MAPT_ADDPORT_SHIFT 62
-#define I40IW_CQPSQ_MAPT_ADDPORT_MASK (1ULL << I40IW_CQPSQ_MAPT_ADDPORT_SHIFT)
-
-/* Update Protocol Engine SDs */
-#define I40IW_CQPSQ_UPESD_SDCMD_SHIFT 0
-#define I40IW_CQPSQ_UPESD_SDCMD_MASK (0xffffffffUL << I40IW_CQPSQ_UPESD_SDCMD_SHIFT)
-
-#define I40IW_CQPSQ_UPESD_SDDATALOW_SHIFT 0
-#define I40IW_CQPSQ_UPESD_SDDATALOW_MASK \
- (0xffffffffUL << I40IW_CQPSQ_UPESD_SDDATALOW_SHIFT)
-
-#define I40IW_CQPSQ_UPESD_SDDATAHI_SHIFT 32
-#define I40IW_CQPSQ_UPESD_SDDATAHI_MASK \
- (0xffffffffULL << I40IW_CQPSQ_UPESD_SDDATAHI_SHIFT)
-#define I40IW_CQPSQ_UPESD_HMCFNID_SHIFT 0
-#define I40IW_CQPSQ_UPESD_HMCFNID_MASK \
- (0x3fUL << I40IW_CQPSQ_UPESD_HMCFNID_SHIFT)
-
-#define I40IW_CQPSQ_UPESD_ENTRY_VALID_SHIFT 63
-#define I40IW_CQPSQ_UPESD_ENTRY_VALID_MASK \
- ((u64)1 << I40IW_CQPSQ_UPESD_ENTRY_VALID_SHIFT)
-
-#define I40IW_CQPSQ_UPESD_ENTRY_COUNT_SHIFT 0
-#define I40IW_CQPSQ_UPESD_ENTRY_COUNT_MASK \
- (0xfUL << I40IW_CQPSQ_UPESD_ENTRY_COUNT_SHIFT)
-
-#define I40IW_CQPSQ_UPESD_SKIP_ENTRY_SHIFT 7
-#define I40IW_CQPSQ_UPESD_SKIP_ENTRY_MASK \
- (0x1UL << I40IW_CQPSQ_UPESD_SKIP_ENTRY_SHIFT)
-
-/* Suspend QP */
-#define I40IW_CQPSQ_SUSPENDQP_QPID_SHIFT 0
-#define I40IW_CQPSQ_SUSPENDQP_QPID_MASK (0x3FFFFUL)
-/* I40IWCQ_QPID_MASK */
-
-/* Resume QP */
-#define I40IW_CQPSQ_RESUMEQP_QSHANDLE_SHIFT 0
-#define I40IW_CQPSQ_RESUMEQP_QSHANDLE_MASK \
- (0xffffffffUL << I40IW_CQPSQ_RESUMEQP_QSHANDLE_SHIFT)
-
-#define I40IW_CQPSQ_RESUMEQP_QPID_SHIFT 0
-#define I40IW_CQPSQ_RESUMEQP_QPID_MASK (0x3FFFFUL)
-/* I40IWCQ_QPID_MASK */
-
-/* IW QP Context */
-#define I40IWQPC_DDP_VER_SHIFT 0
-#define I40IWQPC_DDP_VER_MASK (3UL << I40IWQPC_DDP_VER_SHIFT)
-
-#define I40IWQPC_SNAP_SHIFT 2
-#define I40IWQPC_SNAP_MASK (1UL << I40IWQPC_SNAP_SHIFT)
-
-#define I40IWQPC_IPV4_SHIFT 3
-#define I40IWQPC_IPV4_MASK (1UL << I40IWQPC_IPV4_SHIFT)
-
-#define I40IWQPC_NONAGLE_SHIFT 4
-#define I40IWQPC_NONAGLE_MASK (1UL << I40IWQPC_NONAGLE_SHIFT)
-
-#define I40IWQPC_INSERTVLANTAG_SHIFT 5
-#define I40IWQPC_INSERTVLANTAG_MASK (1 << I40IWQPC_INSERTVLANTAG_SHIFT)
-
-#define I40IWQPC_USESRQ_SHIFT 6
-#define I40IWQPC_USESRQ_MASK (1UL << I40IWQPC_USESRQ_SHIFT)
-
-#define I40IWQPC_TIMESTAMP_SHIFT 7
-#define I40IWQPC_TIMESTAMP_MASK (1UL << I40IWQPC_TIMESTAMP_SHIFT)
-
-#define I40IWQPC_RQWQESIZE_SHIFT 8
-#define I40IWQPC_RQWQESIZE_MASK (3UL << I40IWQPC_RQWQESIZE_SHIFT)
-
-#define I40IWQPC_INSERTL2TAG2_SHIFT 11
-#define I40IWQPC_INSERTL2TAG2_MASK (1UL << I40IWQPC_INSERTL2TAG2_SHIFT)
-
-#define I40IWQPC_LIMIT_SHIFT 12
-#define I40IWQPC_LIMIT_MASK (3UL << I40IWQPC_LIMIT_SHIFT)
-
-#define I40IWQPC_DROPOOOSEG_SHIFT 15
-#define I40IWQPC_DROPOOOSEG_MASK (1UL << I40IWQPC_DROPOOOSEG_SHIFT)
-
-#define I40IWQPC_DUPACK_THRESH_SHIFT 16
-#define I40IWQPC_DUPACK_THRESH_MASK (7UL << I40IWQPC_DUPACK_THRESH_SHIFT)
-
-#define I40IWQPC_ERR_RQ_IDX_VALID_SHIFT 19
-#define I40IWQPC_ERR_RQ_IDX_VALID_MASK (1UL << I40IWQPC_ERR_RQ_IDX_VALID_SHIFT)
-
-#define I40IWQPC_DIS_VLAN_CHECKS_SHIFT 19
-#define I40IWQPC_DIS_VLAN_CHECKS_MASK (7UL << I40IWQPC_DIS_VLAN_CHECKS_SHIFT)
-
-#define I40IWQPC_RCVTPHEN_SHIFT 28
-#define I40IWQPC_RCVTPHEN_MASK (1UL << I40IWQPC_RCVTPHEN_SHIFT)
-
-#define I40IWQPC_XMITTPHEN_SHIFT 29
-#define I40IWQPC_XMITTPHEN_MASK (1ULL << I40IWQPC_XMITTPHEN_SHIFT)
-
-#define I40IWQPC_RQTPHEN_SHIFT 30
-#define I40IWQPC_RQTPHEN_MASK (1UL << I40IWQPC_RQTPHEN_SHIFT)
-
-#define I40IWQPC_SQTPHEN_SHIFT 31
-#define I40IWQPC_SQTPHEN_MASK (1ULL << I40IWQPC_SQTPHEN_SHIFT)
-
-#define I40IWQPC_PPIDX_SHIFT 32
-#define I40IWQPC_PPIDX_MASK (0x3ffULL << I40IWQPC_PPIDX_SHIFT)
-
-#define I40IWQPC_PMENA_SHIFT 47
-#define I40IWQPC_PMENA_MASK (1ULL << I40IWQPC_PMENA_SHIFT)
-
-#define I40IWQPC_RDMAP_VER_SHIFT 62
-#define I40IWQPC_RDMAP_VER_MASK (3ULL << I40IWQPC_RDMAP_VER_SHIFT)
-
-#define I40IWQPC_SQADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPC_SQADDR_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IWQPC_RQADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPC_RQADDR_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IWQPC_TTL_SHIFT 0
-#define I40IWQPC_TTL_MASK (0xffUL << I40IWQPC_TTL_SHIFT)
-
-#define I40IWQPC_RQSIZE_SHIFT 8
-#define I40IWQPC_RQSIZE_MASK (0xfUL << I40IWQPC_RQSIZE_SHIFT)
-
-#define I40IWQPC_SQSIZE_SHIFT 12
-#define I40IWQPC_SQSIZE_MASK (0xfUL << I40IWQPC_SQSIZE_SHIFT)
-
-#define I40IWQPC_SRCMACADDRIDX_SHIFT 16
-#define I40IWQPC_SRCMACADDRIDX_MASK (0x3fUL << I40IWQPC_SRCMACADDRIDX_SHIFT)
-
-#define I40IWQPC_AVOIDSTRETCHACK_SHIFT 23
-#define I40IWQPC_AVOIDSTRETCHACK_MASK (1UL << I40IWQPC_AVOIDSTRETCHACK_SHIFT)
-
-#define I40IWQPC_TOS_SHIFT 24
-#define I40IWQPC_TOS_MASK (0xffUL << I40IWQPC_TOS_SHIFT)
-
-#define I40IWQPC_SRCPORTNUM_SHIFT 32
-#define I40IWQPC_SRCPORTNUM_MASK (0xffffULL << I40IWQPC_SRCPORTNUM_SHIFT)
-
-#define I40IWQPC_DESTPORTNUM_SHIFT 48
-#define I40IWQPC_DESTPORTNUM_MASK (0xffffULL << I40IWQPC_DESTPORTNUM_SHIFT)
-
-#define I40IWQPC_DESTIPADDR0_SHIFT 32
-#define I40IWQPC_DESTIPADDR0_MASK \
- (0xffffffffULL << I40IWQPC_DESTIPADDR0_SHIFT)
-
-#define I40IWQPC_DESTIPADDR1_SHIFT 0
-#define I40IWQPC_DESTIPADDR1_MASK \
- (0xffffffffULL << I40IWQPC_DESTIPADDR1_SHIFT)
-
-#define I40IWQPC_DESTIPADDR2_SHIFT 32
-#define I40IWQPC_DESTIPADDR2_MASK \
- (0xffffffffULL << I40IWQPC_DESTIPADDR2_SHIFT)
-
-#define I40IWQPC_DESTIPADDR3_SHIFT 0
-#define I40IWQPC_DESTIPADDR3_MASK \
- (0xffffffffULL << I40IWQPC_DESTIPADDR3_SHIFT)
-
-#define I40IWQPC_SNDMSS_SHIFT 16
-#define I40IWQPC_SNDMSS_MASK (0x3fffUL << I40IWQPC_SNDMSS_SHIFT)
-
-#define I40IWQPC_VLANTAG_SHIFT 32
-#define I40IWQPC_VLANTAG_MASK (0xffffULL << I40IWQPC_VLANTAG_SHIFT)
-
-#define I40IWQPC_ARPIDX_SHIFT 48
-#define I40IWQPC_ARPIDX_MASK (0xfffULL << I40IWQPC_ARPIDX_SHIFT)
-
-#define I40IWQPC_FLOWLABEL_SHIFT 0
-#define I40IWQPC_FLOWLABEL_MASK (0xfffffUL << I40IWQPC_FLOWLABEL_SHIFT)
-
-#define I40IWQPC_WSCALE_SHIFT 20
-#define I40IWQPC_WSCALE_MASK (1UL << I40IWQPC_WSCALE_SHIFT)
-
-#define I40IWQPC_KEEPALIVE_SHIFT 21
-#define I40IWQPC_KEEPALIVE_MASK (1UL << I40IWQPC_KEEPALIVE_SHIFT)
-
-#define I40IWQPC_IGNORE_TCP_OPT_SHIFT 22
-#define I40IWQPC_IGNORE_TCP_OPT_MASK (1UL << I40IWQPC_IGNORE_TCP_OPT_SHIFT)
-
-#define I40IWQPC_IGNORE_TCP_UNS_OPT_SHIFT 23
-#define I40IWQPC_IGNORE_TCP_UNS_OPT_MASK \
- (1UL << I40IWQPC_IGNORE_TCP_UNS_OPT_SHIFT)
-
-#define I40IWQPC_TCPSTATE_SHIFT 28
-#define I40IWQPC_TCPSTATE_MASK (0xfUL << I40IWQPC_TCPSTATE_SHIFT)
-
-#define I40IWQPC_RCVSCALE_SHIFT 32
-#define I40IWQPC_RCVSCALE_MASK (0xfULL << I40IWQPC_RCVSCALE_SHIFT)
-
-#define I40IWQPC_SNDSCALE_SHIFT 40
-#define I40IWQPC_SNDSCALE_MASK (0xfULL << I40IWQPC_SNDSCALE_SHIFT)
-
-#define I40IWQPC_PDIDX_SHIFT 48
-#define I40IWQPC_PDIDX_MASK (0x7fffULL << I40IWQPC_PDIDX_SHIFT)
-
-#define I40IWQPC_KALIVE_TIMER_MAX_PROBES_SHIFT 16
-#define I40IWQPC_KALIVE_TIMER_MAX_PROBES_MASK \
- (0xffUL << I40IWQPC_KALIVE_TIMER_MAX_PROBES_SHIFT)
-
-#define I40IWQPC_KEEPALIVE_INTERVAL_SHIFT 24
-#define I40IWQPC_KEEPALIVE_INTERVAL_MASK \
- (0xffUL << I40IWQPC_KEEPALIVE_INTERVAL_SHIFT)
-
-#define I40IWQPC_TIMESTAMP_RECENT_SHIFT 0
-#define I40IWQPC_TIMESTAMP_RECENT_MASK \
- (0xffffffffUL << I40IWQPC_TIMESTAMP_RECENT_SHIFT)
-
-#define I40IWQPC_TIMESTAMP_AGE_SHIFT 32
-#define I40IWQPC_TIMESTAMP_AGE_MASK \
- (0xffffffffULL << I40IWQPC_TIMESTAMP_AGE_SHIFT)
-
-#define I40IWQPC_SNDNXT_SHIFT 0
-#define I40IWQPC_SNDNXT_MASK (0xffffffffUL << I40IWQPC_SNDNXT_SHIFT)
-
-#define I40IWQPC_SNDWND_SHIFT 32
-#define I40IWQPC_SNDWND_MASK (0xffffffffULL << I40IWQPC_SNDWND_SHIFT)
-
-#define I40IWQPC_RCVNXT_SHIFT 0
-#define I40IWQPC_RCVNXT_MASK (0xffffffffUL << I40IWQPC_RCVNXT_SHIFT)
-
-#define I40IWQPC_RCVWND_SHIFT 32
-#define I40IWQPC_RCVWND_MASK (0xffffffffULL << I40IWQPC_RCVWND_SHIFT)
-
-#define I40IWQPC_SNDMAX_SHIFT 0
-#define I40IWQPC_SNDMAX_MASK (0xffffffffUL << I40IWQPC_SNDMAX_SHIFT)
-
-#define I40IWQPC_SNDUNA_SHIFT 32
-#define I40IWQPC_SNDUNA_MASK (0xffffffffULL << I40IWQPC_SNDUNA_SHIFT)
-
-#define I40IWQPC_SRTT_SHIFT 0
-#define I40IWQPC_SRTT_MASK (0xffffffffUL << I40IWQPC_SRTT_SHIFT)
-
-#define I40IWQPC_RTTVAR_SHIFT 32
-#define I40IWQPC_RTTVAR_MASK (0xffffffffULL << I40IWQPC_RTTVAR_SHIFT)
-
-#define I40IWQPC_SSTHRESH_SHIFT 0
-#define I40IWQPC_SSTHRESH_MASK (0xffffffffUL << I40IWQPC_SSTHRESH_SHIFT)
-
-#define I40IWQPC_CWND_SHIFT 32
-#define I40IWQPC_CWND_MASK (0xffffffffULL << I40IWQPC_CWND_SHIFT)
-
-#define I40IWQPC_SNDWL1_SHIFT 0
-#define I40IWQPC_SNDWL1_MASK (0xffffffffUL << I40IWQPC_SNDWL1_SHIFT)
-
-#define I40IWQPC_SNDWL2_SHIFT 32
-#define I40IWQPC_SNDWL2_MASK (0xffffffffULL << I40IWQPC_SNDWL2_SHIFT)
-
-#define I40IWQPC_ERR_RQ_IDX_SHIFT 32
-#define I40IWQPC_ERR_RQ_IDX_MASK (0x3fffULL << I40IWQPC_ERR_RQ_IDX_SHIFT)
-
-#define I40IWQPC_MAXSNDWND_SHIFT 0
-#define I40IWQPC_MAXSNDWND_MASK (0xffffffffUL << I40IWQPC_MAXSNDWND_SHIFT)
-
-#define I40IWQPC_REXMIT_THRESH_SHIFT 48
-#define I40IWQPC_REXMIT_THRESH_MASK (0x3fULL << I40IWQPC_REXMIT_THRESH_SHIFT)
-
-#define I40IWQPC_TXCQNUM_SHIFT 0
-#define I40IWQPC_TXCQNUM_MASK (0x1ffffUL << I40IWQPC_TXCQNUM_SHIFT)
-
-#define I40IWQPC_RXCQNUM_SHIFT 32
-#define I40IWQPC_RXCQNUM_MASK (0x1ffffULL << I40IWQPC_RXCQNUM_SHIFT)
-
-#define I40IWQPC_STAT_INDEX_SHIFT 0
-#define I40IWQPC_STAT_INDEX_MASK (0x1fULL << I40IWQPC_STAT_INDEX_SHIFT)
-
-#define I40IWQPC_Q2ADDR_SHIFT 0
-#define I40IWQPC_Q2ADDR_MASK (0xffffffffffffff00ULL << I40IWQPC_Q2ADDR_SHIFT)
-
-#define I40IWQPC_LASTBYTESENT_SHIFT 0
-#define I40IWQPC_LASTBYTESENT_MASK (0xffUL << I40IWQPC_LASTBYTESENT_SHIFT)
-
-#define I40IWQPC_SRQID_SHIFT 32
-#define I40IWQPC_SRQID_MASK (0xffULL << I40IWQPC_SRQID_SHIFT)
-
-#define I40IWQPC_ORDSIZE_SHIFT 0
-#define I40IWQPC_ORDSIZE_MASK (0x7fUL << I40IWQPC_ORDSIZE_SHIFT)
-
-#define I40IWQPC_IRDSIZE_SHIFT 16
-#define I40IWQPC_IRDSIZE_MASK (0x3UL << I40IWQPC_IRDSIZE_SHIFT)
-
-#define I40IWQPC_WRRDRSPOK_SHIFT 20
-#define I40IWQPC_WRRDRSPOK_MASK (1UL << I40IWQPC_WRRDRSPOK_SHIFT)
-
-#define I40IWQPC_RDOK_SHIFT 21
-#define I40IWQPC_RDOK_MASK (1UL << I40IWQPC_RDOK_SHIFT)
-
-#define I40IWQPC_SNDMARKERS_SHIFT 22
-#define I40IWQPC_SNDMARKERS_MASK (1UL << I40IWQPC_SNDMARKERS_SHIFT)
-
-#define I40IWQPC_BINDEN_SHIFT 23
-#define I40IWQPC_BINDEN_MASK (1UL << I40IWQPC_BINDEN_SHIFT)
-
-#define I40IWQPC_FASTREGEN_SHIFT 24
-#define I40IWQPC_FASTREGEN_MASK (1UL << I40IWQPC_FASTREGEN_SHIFT)
-
-#define I40IWQPC_PRIVEN_SHIFT 25
-#define I40IWQPC_PRIVEN_MASK (1UL << I40IWQPC_PRIVEN_SHIFT)
-
-#define I40IWQPC_USESTATSINSTANCE_SHIFT 26
-#define I40IWQPC_USESTATSINSTANCE_MASK (1UL << I40IWQPC_USESTATSINSTANCE_SHIFT)
-
-#define I40IWQPC_IWARPMODE_SHIFT 28
-#define I40IWQPC_IWARPMODE_MASK (1UL << I40IWQPC_IWARPMODE_SHIFT)
-
-#define I40IWQPC_RCVMARKERS_SHIFT 29
-#define I40IWQPC_RCVMARKERS_MASK (1UL << I40IWQPC_RCVMARKERS_SHIFT)
-
-#define I40IWQPC_ALIGNHDRS_SHIFT 30
-#define I40IWQPC_ALIGNHDRS_MASK (1UL << I40IWQPC_ALIGNHDRS_SHIFT)
-
-#define I40IWQPC_RCVNOMPACRC_SHIFT 31
-#define I40IWQPC_RCVNOMPACRC_MASK (1UL << I40IWQPC_RCVNOMPACRC_SHIFT)
-
-#define I40IWQPC_RCVMARKOFFSET_SHIFT 33
-#define I40IWQPC_RCVMARKOFFSET_MASK (0x1ffULL << I40IWQPC_RCVMARKOFFSET_SHIFT)
-
-#define I40IWQPC_SNDMARKOFFSET_SHIFT 48
-#define I40IWQPC_SNDMARKOFFSET_MASK (0x1ffULL << I40IWQPC_SNDMARKOFFSET_SHIFT)
-
-#define I40IWQPC_QPCOMPCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPC_QPCOMPCTX_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IWQPC_SQTPHVAL_SHIFT 0
-#define I40IWQPC_SQTPHVAL_MASK (0xffUL << I40IWQPC_SQTPHVAL_SHIFT)
-
-#define I40IWQPC_RQTPHVAL_SHIFT 8
-#define I40IWQPC_RQTPHVAL_MASK (0xffUL << I40IWQPC_RQTPHVAL_SHIFT)
-
-#define I40IWQPC_QSHANDLE_SHIFT 16
-#define I40IWQPC_QSHANDLE_MASK (0x3ffUL << I40IWQPC_QSHANDLE_SHIFT)
-
-#define I40IWQPC_EXCEPTION_LAN_QUEUE_SHIFT 32
-#define I40IWQPC_EXCEPTION_LAN_QUEUE_MASK (0xfffULL << \
- I40IWQPC_EXCEPTION_LAN_QUEUE_SHIFT)
-
-#define I40IWQPC_LOCAL_IPADDR3_SHIFT 0
-#define I40IWQPC_LOCAL_IPADDR3_MASK \
- (0xffffffffUL << I40IWQPC_LOCAL_IPADDR3_SHIFT)
-
-#define I40IWQPC_LOCAL_IPADDR2_SHIFT 32
-#define I40IWQPC_LOCAL_IPADDR2_MASK \
- (0xffffffffULL << I40IWQPC_LOCAL_IPADDR2_SHIFT)
-
-#define I40IWQPC_LOCAL_IPADDR1_SHIFT 0
-#define I40IWQPC_LOCAL_IPADDR1_MASK \
- (0xffffffffUL << I40IWQPC_LOCAL_IPADDR1_SHIFT)
-
-#define I40IWQPC_LOCAL_IPADDR0_SHIFT 32
-#define I40IWQPC_LOCAL_IPADDR0_MASK \
- (0xffffffffULL << I40IWQPC_LOCAL_IPADDR0_SHIFT)
-
-/* wqe size considering 32 bytes per wqe*/
-#define I40IWQP_SW_MIN_WQSIZE 4 /* 128 bytes */
-#define I40IWQP_SW_MAX_WQSIZE 2048 /* 2048 bytes */
-
-#define I40IWQP_OP_RDMA_WRITE 0
-#define I40IWQP_OP_RDMA_READ 1
-#define I40IWQP_OP_RDMA_SEND 3
-#define I40IWQP_OP_RDMA_SEND_INV 4
-#define I40IWQP_OP_RDMA_SEND_SOL_EVENT 5
-#define I40IWQP_OP_RDMA_SEND_SOL_EVENT_INV 6
-#define I40IWQP_OP_BIND_MW 8
-#define I40IWQP_OP_FAST_REGISTER 9
-#define I40IWQP_OP_LOCAL_INVALIDATE 10
-#define I40IWQP_OP_RDMA_READ_LOC_INV 11
-#define I40IWQP_OP_NOP 12
-
-#define I40IW_RSVD_SHIFT 41
-#define I40IW_RSVD_MASK (0x7fffULL << I40IW_RSVD_SHIFT)
-
-/* iwarp QP SQ WQE common fields */
-#define I40IWQPSQ_OPCODE_SHIFT 32
-#define I40IWQPSQ_OPCODE_MASK (0x3fULL << I40IWQPSQ_OPCODE_SHIFT)
-
-#define I40IWQPSQ_ADDFRAGCNT_SHIFT 38
-#define I40IWQPSQ_ADDFRAGCNT_MASK (0x7ULL << I40IWQPSQ_ADDFRAGCNT_SHIFT)
-
-#define I40IWQPSQ_PUSHWQE_SHIFT 56
-#define I40IWQPSQ_PUSHWQE_MASK (1ULL << I40IWQPSQ_PUSHWQE_SHIFT)
-
-#define I40IWQPSQ_STREAMMODE_SHIFT 58
-#define I40IWQPSQ_STREAMMODE_MASK (1ULL << I40IWQPSQ_STREAMMODE_SHIFT)
-
-#define I40IWQPSQ_WAITFORRCVPDU_SHIFT 59
-#define I40IWQPSQ_WAITFORRCVPDU_MASK (1ULL << I40IWQPSQ_WAITFORRCVPDU_SHIFT)
-
-#define I40IWQPSQ_READFENCE_SHIFT 60
-#define I40IWQPSQ_READFENCE_MASK (1ULL << I40IWQPSQ_READFENCE_SHIFT)
-
-#define I40IWQPSQ_LOCALFENCE_SHIFT 61
-#define I40IWQPSQ_LOCALFENCE_MASK (1ULL << I40IWQPSQ_LOCALFENCE_SHIFT)
-
-#define I40IWQPSQ_SIGCOMPL_SHIFT 62
-#define I40IWQPSQ_SIGCOMPL_MASK (1ULL << I40IWQPSQ_SIGCOMPL_SHIFT)
-
-#define I40IWQPSQ_VALID_SHIFT 63
-#define I40IWQPSQ_VALID_MASK (1ULL << I40IWQPSQ_VALID_SHIFT)
-
-#define I40IWQPSQ_FRAG_TO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPSQ_FRAG_TO_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IWQPSQ_FRAG_LEN_SHIFT 0
-#define I40IWQPSQ_FRAG_LEN_MASK (0xffffffffUL << I40IWQPSQ_FRAG_LEN_SHIFT)
-
-#define I40IWQPSQ_FRAG_STAG_SHIFT 32
-#define I40IWQPSQ_FRAG_STAG_MASK (0xffffffffULL << I40IWQPSQ_FRAG_STAG_SHIFT)
-
-#define I40IWQPSQ_REMSTAGINV_SHIFT 0
-#define I40IWQPSQ_REMSTAGINV_MASK (0xffffffffUL << I40IWQPSQ_REMSTAGINV_SHIFT)
-
-#define I40IWQPSQ_INLINEDATAFLAG_SHIFT 57
-#define I40IWQPSQ_INLINEDATAFLAG_MASK (1ULL << I40IWQPSQ_INLINEDATAFLAG_SHIFT)
-
-#define I40IWQPSQ_INLINEDATALEN_SHIFT 48
-#define I40IWQPSQ_INLINEDATALEN_MASK \
- (0x7fULL << I40IWQPSQ_INLINEDATALEN_SHIFT)
-
-/* iwarp send with push mode */
-#define I40IWQPSQ_WQDESCIDX_SHIFT 0
-#define I40IWQPSQ_WQDESCIDX_MASK (0x3fffUL << I40IWQPSQ_WQDESCIDX_SHIFT)
-
-/* rdma write */
-#define I40IWQPSQ_REMSTAG_SHIFT 0
-#define I40IWQPSQ_REMSTAG_MASK (0xffffffffUL << I40IWQPSQ_REMSTAG_SHIFT)
-
-#define I40IWQPSQ_REMTO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPSQ_REMTO_MASK I40IW_CQPHC_QPCTX_MASK
-
-/* memory window */
-#define I40IWQPSQ_STAGRIGHTS_SHIFT 48
-#define I40IWQPSQ_STAGRIGHTS_MASK (0x1fULL << I40IWQPSQ_STAGRIGHTS_SHIFT)
-
-#define I40IWQPSQ_VABASEDTO_SHIFT 53
-#define I40IWQPSQ_VABASEDTO_MASK (1ULL << I40IWQPSQ_VABASEDTO_SHIFT)
-
-#define I40IWQPSQ_MWLEN_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPSQ_MWLEN_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IWQPSQ_PARENTMRSTAG_SHIFT 0
-#define I40IWQPSQ_PARENTMRSTAG_MASK \
- (0xffffffffUL << I40IWQPSQ_PARENTMRSTAG_SHIFT)
-
-#define I40IWQPSQ_MWSTAG_SHIFT 32
-#define I40IWQPSQ_MWSTAG_MASK (0xffffffffULL << I40IWQPSQ_MWSTAG_SHIFT)
-
-#define I40IWQPSQ_BASEVA_TO_FBO_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPSQ_BASEVA_TO_FBO_MASK I40IW_CQPHC_QPCTX_MASK
-
-/* Local Invalidate */
-#define I40IWQPSQ_LOCSTAG_SHIFT 32
-#define I40IWQPSQ_LOCSTAG_MASK (0xffffffffULL << I40IWQPSQ_LOCSTAG_SHIFT)
-
-/* Fast Register */
-#define I40IWQPSQ_STAGKEY_SHIFT 0
-#define I40IWQPSQ_STAGKEY_MASK (0xffUL << I40IWQPSQ_STAGKEY_SHIFT)
-
-#define I40IWQPSQ_STAGINDEX_SHIFT 8
-#define I40IWQPSQ_STAGINDEX_MASK (0xffffffUL << I40IWQPSQ_STAGINDEX_SHIFT)
-
-#define I40IWQPSQ_COPYHOSTPBLS_SHIFT 43
-#define I40IWQPSQ_COPYHOSTPBLS_MASK (1ULL << I40IWQPSQ_COPYHOSTPBLS_SHIFT)
-
-#define I40IWQPSQ_LPBLSIZE_SHIFT 44
-#define I40IWQPSQ_LPBLSIZE_MASK (3ULL << I40IWQPSQ_LPBLSIZE_SHIFT)
-
-#define I40IWQPSQ_HPAGESIZE_SHIFT 46
-#define I40IWQPSQ_HPAGESIZE_MASK (3ULL << I40IWQPSQ_HPAGESIZE_SHIFT)
-
-#define I40IWQPSQ_STAGLEN_SHIFT 0
-#define I40IWQPSQ_STAGLEN_MASK (0x1ffffffffffULL << I40IWQPSQ_STAGLEN_SHIFT)
-
-#define I40IWQPSQ_FIRSTPMPBLIDXLO_SHIFT 48
-#define I40IWQPSQ_FIRSTPMPBLIDXLO_MASK \
- (0xffffULL << I40IWQPSQ_FIRSTPMPBLIDXLO_SHIFT)
-
-#define I40IWQPSQ_FIRSTPMPBLIDXHI_SHIFT 0
-#define I40IWQPSQ_FIRSTPMPBLIDXHI_MASK \
- (0xfffUL << I40IWQPSQ_FIRSTPMPBLIDXHI_SHIFT)
-
-#define I40IWQPSQ_PBLADDR_SHIFT 12
-#define I40IWQPSQ_PBLADDR_MASK (0xfffffffffffffULL << I40IWQPSQ_PBLADDR_SHIFT)
-
-/* iwarp QP RQ WQE common fields */
-#define I40IWQPRQ_ADDFRAGCNT_SHIFT I40IWQPSQ_ADDFRAGCNT_SHIFT
-#define I40IWQPRQ_ADDFRAGCNT_MASK I40IWQPSQ_ADDFRAGCNT_MASK
-
-#define I40IWQPRQ_VALID_SHIFT I40IWQPSQ_VALID_SHIFT
-#define I40IWQPRQ_VALID_MASK I40IWQPSQ_VALID_MASK
-
-#define I40IWQPRQ_COMPLCTX_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPRQ_COMPLCTX_MASK I40IW_CQPHC_QPCTX_MASK
-
-#define I40IWQPRQ_FRAG_LEN_SHIFT I40IWQPSQ_FRAG_LEN_SHIFT
-#define I40IWQPRQ_FRAG_LEN_MASK I40IWQPSQ_FRAG_LEN_MASK
-
-#define I40IWQPRQ_STAG_SHIFT I40IWQPSQ_FRAG_STAG_SHIFT
-#define I40IWQPRQ_STAG_MASK I40IWQPSQ_FRAG_STAG_MASK
-
-#define I40IWQPRQ_TO_SHIFT I40IWQPSQ_FRAG_TO_SHIFT
-#define I40IWQPRQ_TO_MASK I40IWQPSQ_FRAG_TO_MASK
-
-/* Query FPM CQP buf */
-#define I40IW_QUERY_FPM_MAX_QPS_SHIFT 0
-#define I40IW_QUERY_FPM_MAX_QPS_MASK \
- (0x7ffffUL << I40IW_QUERY_FPM_MAX_QPS_SHIFT)
-
-#define I40IW_QUERY_FPM_MAX_CQS_SHIFT 0
-#define I40IW_QUERY_FPM_MAX_CQS_MASK \
- (0x3ffffUL << I40IW_QUERY_FPM_MAX_CQS_SHIFT)
-
-#define I40IW_QUERY_FPM_FIRST_PE_SD_INDEX_SHIFT 0
-#define I40IW_QUERY_FPM_FIRST_PE_SD_INDEX_MASK \
- (0x3fffUL << I40IW_QUERY_FPM_FIRST_PE_SD_INDEX_SHIFT)
-
-#define I40IW_QUERY_FPM_MAX_PE_SDS_SHIFT 32
-#define I40IW_QUERY_FPM_MAX_PE_SDS_MASK \
- (0x3fffULL << I40IW_QUERY_FPM_MAX_PE_SDS_SHIFT)
-
-#define I40IW_QUERY_FPM_MAX_QPS_SHIFT 0
-#define I40IW_QUERY_FPM_MAX_QPS_MASK \
- (0x7ffffUL << I40IW_QUERY_FPM_MAX_QPS_SHIFT)
-
-#define I40IW_QUERY_FPM_MAX_CQS_SHIFT 0
-#define I40IW_QUERY_FPM_MAX_CQS_MASK \
- (0x3ffffUL << I40IW_QUERY_FPM_MAX_CQS_SHIFT)
-
-#define I40IW_QUERY_FPM_MAX_CEQS_SHIFT 0
-#define I40IW_QUERY_FPM_MAX_CEQS_MASK \
- (0xffUL << I40IW_QUERY_FPM_MAX_CEQS_SHIFT)
-
-#define I40IW_QUERY_FPM_XFBLOCKSIZE_SHIFT 32
-#define I40IW_QUERY_FPM_XFBLOCKSIZE_MASK \
- (0xffffffffULL << I40IW_QUERY_FPM_XFBLOCKSIZE_SHIFT)
-
-#define I40IW_QUERY_FPM_Q1BLOCKSIZE_SHIFT 32
-#define I40IW_QUERY_FPM_Q1BLOCKSIZE_MASK \
- (0xffffffffULL << I40IW_QUERY_FPM_Q1BLOCKSIZE_SHIFT)
-
-#define I40IW_QUERY_FPM_HTMULTIPLIER_SHIFT 16
-#define I40IW_QUERY_FPM_HTMULTIPLIER_MASK \
- (0xfUL << I40IW_QUERY_FPM_HTMULTIPLIER_SHIFT)
-
-#define I40IW_QUERY_FPM_TIMERBUCKET_SHIFT 32
-#define I40IW_QUERY_FPM_TIMERBUCKET_MASK \
- (0xffFFULL << I40IW_QUERY_FPM_TIMERBUCKET_SHIFT)
-
-/* Static HMC pages allocated buf */
-#define I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID_SHIFT 0
-#define I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID_MASK \
- (0x3fUL << I40IW_SHMC_PAGE_ALLOCATED_HMC_FN_ID_SHIFT)
-
-#define I40IW_HW_PAGE_SIZE 4096
-#define I40IW_DONE_COUNT 1000
-#define I40IW_SLEEP_COUNT 10
-
-enum {
- I40IW_QUEUES_ALIGNMENT_MASK = (128 - 1),
- I40IW_AEQ_ALIGNMENT_MASK = (256 - 1),
- I40IW_Q2_ALIGNMENT_MASK = (256 - 1),
- I40IW_CEQ_ALIGNMENT_MASK = (256 - 1),
- I40IW_CQ0_ALIGNMENT_MASK = (256 - 1),
- I40IW_HOST_CTX_ALIGNMENT_MASK = (4 - 1),
- I40IW_SHADOWAREA_MASK = (128 - 1),
- I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK = 0,
- I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK = 0
-};
-
-enum i40iw_alignment {
- I40IW_CQP_ALIGNMENT = 0x200,
- I40IW_AEQ_ALIGNMENT = 0x100,
- I40IW_CEQ_ALIGNMENT = 0x100,
- I40IW_CQ0_ALIGNMENT = 0x100,
- I40IW_SD_BUF_ALIGNMENT = 0x100
-};
-
-#define I40IW_WQE_SIZE_64 64
-
-#define I40IW_QP_WQE_MIN_SIZE 32
-#define I40IW_QP_WQE_MAX_SIZE 128
-
-#define I40IW_CQE_QTYPE_RQ 0
-#define I40IW_CQE_QTYPE_SQ 1
-
-#define I40IW_RING_INIT(_ring, _size) \
- { \
- (_ring).head = 0; \
- (_ring).tail = 0; \
- (_ring).size = (_size); \
- }
-#define I40IW_RING_GETSIZE(_ring) ((_ring).size)
-#define I40IW_RING_GETCURRENT_HEAD(_ring) ((_ring).head)
-#define I40IW_RING_GETCURRENT_TAIL(_ring) ((_ring).tail)
-
-#define I40IW_RING_MOVE_HEAD(_ring, _retcode) \
- { \
- register u32 size; \
- size = (_ring).size; \
- if (!I40IW_RING_FULL_ERR(_ring)) { \
- (_ring).head = ((_ring).head + 1) % size; \
- (_retcode) = 0; \
- } else { \
- (_retcode) = I40IW_ERR_RING_FULL; \
- } \
- }
-
-#define I40IW_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
- { \
- register u32 size; \
- size = (_ring).size; \
- if ((I40IW_RING_WORK_AVAILABLE(_ring) + (_count)) < size) { \
- (_ring).head = ((_ring).head + (_count)) % size; \
- (_retcode) = 0; \
- } else { \
- (_retcode) = I40IW_ERR_RING_FULL; \
- } \
- }
-
-#define I40IW_RING_MOVE_TAIL(_ring) \
- (_ring).tail = ((_ring).tail + 1) % (_ring).size
-
-#define I40IW_RING_MOVE_HEAD_NOCHECK(_ring) \
- (_ring).head = ((_ring).head + 1) % (_ring).size
-
-#define I40IW_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \
- (_ring).tail = ((_ring).tail + (_count)) % (_ring).size
-
-#define I40IW_RING_SET_TAIL(_ring, _pos) \
- (_ring).tail = (_pos) % (_ring).size
-
-#define I40IW_RING_FULL_ERR(_ring) \
- ( \
- (I40IW_RING_WORK_AVAILABLE(_ring) == ((_ring).size - 1)) \
- )
-
-#define I40IW_ERR_RING_FULL2(_ring) \
- ( \
- (I40IW_RING_WORK_AVAILABLE(_ring) == ((_ring).size - 2)) \
- )
-
-#define I40IW_ERR_RING_FULL3(_ring) \
- ( \
- (I40IW_RING_WORK_AVAILABLE(_ring) == ((_ring).size - 3)) \
- )
-
-#define I40IW_RING_MORE_WORK(_ring) \
- ( \
- (I40IW_RING_WORK_AVAILABLE(_ring) != 0) \
- )
-
-#define I40IW_RING_WORK_AVAILABLE(_ring) \
- ( \
- (((_ring).head + (_ring).size - (_ring).tail) % (_ring).size) \
- )
-
-#define I40IW_RING_GET_WQES_AVAILABLE(_ring) \
- ( \
- ((_ring).size - I40IW_RING_WORK_AVAILABLE(_ring) - 1) \
- )
-
-#define I40IW_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \
- { \
- index = I40IW_RING_GETCURRENT_HEAD(_ring); \
- I40IW_RING_MOVE_HEAD(_ring, _retcode); \
- }
-
-/* Async Events codes */
-#define I40IW_AE_AMP_UNALLOCATED_STAG 0x0102
-#define I40IW_AE_AMP_INVALID_STAG 0x0103
-#define I40IW_AE_AMP_BAD_QP 0x0104
-#define I40IW_AE_AMP_BAD_PD 0x0105
-#define I40IW_AE_AMP_BAD_STAG_KEY 0x0106
-#define I40IW_AE_AMP_BAD_STAG_INDEX 0x0107
-#define I40IW_AE_AMP_BOUNDS_VIOLATION 0x0108
-#define I40IW_AE_AMP_RIGHTS_VIOLATION 0x0109
-#define I40IW_AE_AMP_TO_WRAP 0x010a
-#define I40IW_AE_AMP_FASTREG_SHARED 0x010b
-#define I40IW_AE_AMP_FASTREG_VALID_STAG 0x010c
-#define I40IW_AE_AMP_FASTREG_MW_STAG 0x010d
-#define I40IW_AE_AMP_FASTREG_INVALID_RIGHTS 0x010e
-#define I40IW_AE_AMP_FASTREG_PBL_TABLE_OVERFLOW 0x010f
-#define I40IW_AE_AMP_FASTREG_INVALID_LENGTH 0x0110
-#define I40IW_AE_AMP_INVALIDATE_SHARED 0x0111
-#define I40IW_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS 0x0112
-#define I40IW_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS 0x0113
-#define I40IW_AE_AMP_MWBIND_VALID_STAG 0x0114
-#define I40IW_AE_AMP_MWBIND_OF_MR_STAG 0x0115
-#define I40IW_AE_AMP_MWBIND_TO_ZERO_BASED_STAG 0x0116
-#define I40IW_AE_AMP_MWBIND_TO_MW_STAG 0x0117
-#define I40IW_AE_AMP_MWBIND_INVALID_RIGHTS 0x0118
-#define I40IW_AE_AMP_MWBIND_INVALID_BOUNDS 0x0119
-#define I40IW_AE_AMP_MWBIND_TO_INVALID_PARENT 0x011a
-#define I40IW_AE_AMP_MWBIND_BIND_DISABLED 0x011b
-#define I40IW_AE_AMP_WQE_INVALID_PARAMETER 0x0130
-#define I40IW_AE_BAD_CLOSE 0x0201
-#define I40IW_AE_RDMAP_ROE_BAD_LLP_CLOSE 0x0202
-#define I40IW_AE_CQ_OPERATION_ERROR 0x0203
-#define I40IW_AE_PRIV_OPERATION_DENIED 0x011c
-#define I40IW_AE_RDMA_READ_WHILE_ORD_ZERO 0x0205
-#define I40IW_AE_STAG_ZERO_INVALID 0x0206
-#define I40IW_AE_IB_RREQ_AND_Q1_FULL 0x0207
-#define I40IW_AE_SRQ_LIMIT 0x0209
-#define I40IW_AE_WQE_UNEXPECTED_OPCODE 0x020a
-#define I40IW_AE_WQE_INVALID_PARAMETER 0x020b
-#define I40IW_AE_WQE_LSMM_TOO_LONG 0x0220
-#define I40IW_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301
-#define I40IW_AE_DDP_INVALID_MSN_RANGE_IS_NOT_VALID 0x0302
-#define I40IW_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303
-#define I40IW_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304
-#define I40IW_AE_DDP_UBE_INVALID_MO 0x0305
-#define I40IW_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE 0x0306
-#define I40IW_AE_DDP_UBE_INVALID_QN 0x0307
-#define I40IW_AE_DDP_NO_L_BIT 0x0308
-#define I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION 0x0311
-#define I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE 0x0312
-#define I40IW_AE_ROE_INVALID_RDMA_READ_REQUEST 0x0313
-#define I40IW_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP 0x0314
-#define I40IW_AE_INVALID_ARP_ENTRY 0x0401
-#define I40IW_AE_INVALID_TCP_OPTION_RCVD 0x0402
-#define I40IW_AE_STALE_ARP_ENTRY 0x0403
-#define I40IW_AE_INVALID_WQE_LENGTH 0x0404
-#define I40IW_AE_INVALID_MAC_ENTRY 0x0405
-#define I40IW_AE_LLP_CLOSE_COMPLETE 0x0501
-#define I40IW_AE_LLP_CONNECTION_RESET 0x0502
-#define I40IW_AE_LLP_FIN_RECEIVED 0x0503
-#define I40IW_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH 0x0504
-#define I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR 0x0505
-#define I40IW_AE_LLP_SEGMENT_TOO_LARGE 0x0506
-#define I40IW_AE_LLP_SEGMENT_TOO_SMALL 0x0507
-#define I40IW_AE_LLP_SYN_RECEIVED 0x0508
-#define I40IW_AE_LLP_TERMINATE_RECEIVED 0x0509
-#define I40IW_AE_LLP_TOO_MANY_RETRIES 0x050a
-#define I40IW_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b
-#define I40IW_AE_LLP_DOUBT_REACHABILITY 0x050c
-#define I40IW_AE_LLP_RX_VLAN_MISMATCH 0x050d
-#define I40IW_AE_RESOURCE_EXHAUSTION 0x0520
-#define I40IW_AE_RESET_SENT 0x0601
-#define I40IW_AE_TERMINATE_SENT 0x0602
-#define I40IW_AE_RESET_NOT_SENT 0x0603
-#define I40IW_AE_LCE_QP_CATASTROPHIC 0x0700
-#define I40IW_AE_LCE_FUNCTION_CATASTROPHIC 0x0701
-#define I40IW_AE_LCE_CQ_CATASTROPHIC 0x0702
-#define I40IW_AE_UDA_XMIT_FRAG_SEQ 0x0800
-#define I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG 0x0801
-#define I40IW_AE_UDA_XMIT_IPADDR_MISMATCH 0x0802
-#define I40IW_AE_QP_SUSPEND_COMPLETE 0x0900
-
-#define OP_DELETE_LOCAL_MAC_IPADDR_ENTRY 1
-#define OP_CEQ_DESTROY 2
-#define OP_AEQ_DESTROY 3
-#define OP_DELETE_ARP_CACHE_ENTRY 4
-#define OP_MANAGE_APBVT_ENTRY 5
-#define OP_CEQ_CREATE 6
-#define OP_AEQ_CREATE 7
-#define OP_ALLOC_LOCAL_MAC_IPADDR_ENTRY 8
-#define OP_ADD_LOCAL_MAC_IPADDR_ENTRY 9
-#define OP_MANAGE_QHASH_TABLE_ENTRY 10
-#define OP_QP_MODIFY 11
-#define OP_QP_UPLOAD_CONTEXT 12
-#define OP_CQ_CREATE 13
-#define OP_CQ_DESTROY 14
-#define OP_QP_CREATE 15
-#define OP_QP_DESTROY 16
-#define OP_ALLOC_STAG 17
-#define OP_MR_REG_NON_SHARED 18
-#define OP_DEALLOC_STAG 19
-#define OP_MW_ALLOC 20
-#define OP_QP_FLUSH_WQES 21
-#define OP_ADD_ARP_CACHE_ENTRY 22
-#define OP_MANAGE_PUSH_PAGE 23
-#define OP_UPDATE_PE_SDS 24
-#define OP_MANAGE_HMC_PM_FUNC_TABLE 25
-#define OP_SUSPEND 26
-#define OP_RESUME 27
-#define OP_MANAGE_VF_PBLE_BP 28
-#define OP_QUERY_FPM_VALUES 29
-#define OP_COMMIT_FPM_VALUES 30
-#define OP_REQUESTED_COMMANDS 31
-#define OP_COMPLETED_COMMANDS 32
-#define OP_SIZE_CQP_STAT_ARRAY 33
-
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hmc.c b/drivers/infiniband/hw/i40iw/i40iw_hmc.c
deleted file mode 100644
index 5484cbf55f0f..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_hmc.c
+++ /dev/null
@@ -1,821 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include "i40iw_osdep.h"
-#include "i40iw_register.h"
-#include "i40iw_status.h"
-#include "i40iw_hmc.h"
-#include "i40iw_d.h"
-#include "i40iw_type.h"
-#include "i40iw_p.h"
-#include "i40iw_vf.h"
-#include "i40iw_virtchnl.h"
-
-/**
- * i40iw_find_sd_index_limit - finds segment descriptor index limit
- * @hmc_info: pointer to the HMC configuration information structure
- * @type: type of HMC resources we're searching
- * @index: starting index for the object
- * @cnt: number of objects we're trying to create
- * @sd_idx: pointer to return index of the segment descriptor in question
- * @sd_limit: pointer to return the maximum number of segment descriptors
- *
- * This function calculates the segment descriptor index and index limit
- * for the resource defined by i40iw_hmc_rsrc_type.
- */
-
-static inline void i40iw_find_sd_index_limit(struct i40iw_hmc_info *hmc_info,
- u32 type,
- u32 idx,
- u32 cnt,
- u32 *sd_idx,
- u32 *sd_limit)
-{
- u64 fpm_addr, fpm_limit;
-
- fpm_addr = hmc_info->hmc_obj[(type)].base +
- hmc_info->hmc_obj[type].size * idx;
- fpm_limit = fpm_addr + hmc_info->hmc_obj[type].size * cnt;
- *sd_idx = (u32)(fpm_addr / I40IW_HMC_DIRECT_BP_SIZE);
- *sd_limit = (u32)((fpm_limit - 1) / I40IW_HMC_DIRECT_BP_SIZE);
- *sd_limit += 1;
-}
-
-/**
- * i40iw_find_pd_index_limit - finds page descriptor index limit
- * @hmc_info: pointer to the HMC configuration information struct
- * @type: HMC resource type we're examining
- * @idx: starting index for the object
- * @cnt: number of objects we're trying to create
- * @pd_index: pointer to return page descriptor index
- * @pd_limit: pointer to return page descriptor index limit
- *
- * Calculates the page descriptor index and index limit for the resource
- * defined by i40iw_hmc_rsrc_type.
- */
-
-static inline void i40iw_find_pd_index_limit(struct i40iw_hmc_info *hmc_info,
- u32 type,
- u32 idx,
- u32 cnt,
- u32 *pd_idx,
- u32 *pd_limit)
-{
- u64 fpm_adr, fpm_limit;
-
- fpm_adr = hmc_info->hmc_obj[type].base +
- hmc_info->hmc_obj[type].size * idx;
- fpm_limit = fpm_adr + (hmc_info)->hmc_obj[(type)].size * (cnt);
- *(pd_idx) = (u32)(fpm_adr / I40IW_HMC_PAGED_BP_SIZE);
- *(pd_limit) = (u32)((fpm_limit - 1) / I40IW_HMC_PAGED_BP_SIZE);
- *(pd_limit) += 1;
-}
-
-/**
- * i40iw_set_sd_entry - setup entry for sd programming
- * @pa: physical addr
- * @idx: sd index
- * @type: paged or direct sd
- * @entry: sd entry ptr
- */
-static inline void i40iw_set_sd_entry(u64 pa,
- u32 idx,
- enum i40iw_sd_entry_type type,
- struct update_sd_entry *entry)
-{
- entry->data = pa | (I40IW_HMC_MAX_BP_COUNT << I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |
- (((type == I40IW_SD_TYPE_PAGED) ? 0 : 1) <<
- I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT) |
- (1 << I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT);
- entry->cmd = (idx | (1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT) | (1 << 15));
-}
-
-/**
- * i40iw_clr_sd_entry - setup entry for sd clear
- * @idx: sd index
- * @type: paged or direct sd
- * @entry: sd entry ptr
- */
-static inline void i40iw_clr_sd_entry(u32 idx, enum i40iw_sd_entry_type type,
- struct update_sd_entry *entry)
-{
- entry->data = (I40IW_HMC_MAX_BP_COUNT <<
- I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |
- (((type == I40IW_SD_TYPE_PAGED) ? 0 : 1) <<
- I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT);
- entry->cmd = (idx | (1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT) | (1 << 15));
-}
-
-/**
- * i40iw_hmc_sd_one - setup 1 sd entry for cqp
- * @dev: pointer to the device structure
- * @hmc_fn_id: hmc's function id
- * @pa: physical addr
- * @sd_idx: sd index
- * @type: paged or direct sd
- * @setsd: flag to set or clear sd
- */
-enum i40iw_status_code i40iw_hmc_sd_one(struct i40iw_sc_dev *dev,
- u8 hmc_fn_id,
- u64 pa, u32 sd_idx,
- enum i40iw_sd_entry_type type,
- bool setsd)
-{
- struct i40iw_update_sds_info sdinfo;
-
- sdinfo.cnt = 1;
- sdinfo.hmc_fn_id = hmc_fn_id;
- if (setsd)
- i40iw_set_sd_entry(pa, sd_idx, type, sdinfo.entry);
- else
- i40iw_clr_sd_entry(sd_idx, type, sdinfo.entry);
-
- return dev->cqp->process_cqp_sds(dev, &sdinfo);
-}
-
-/**
- * i40iw_hmc_sd_grp - setup group od sd entries for cqp
- * @dev: pointer to the device structure
- * @hmc_info: pointer to the HMC configuration information struct
- * @sd_index: sd index
- * @sd_cnt: number of sd entries
- * @setsd: flag to set or clear sd
- */
-static enum i40iw_status_code i40iw_hmc_sd_grp(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_info *hmc_info,
- u32 sd_index,
- u32 sd_cnt,
- bool setsd)
-{
- struct i40iw_hmc_sd_entry *sd_entry;
- struct i40iw_update_sds_info sdinfo;
- u64 pa;
- u32 i;
- enum i40iw_status_code ret_code = 0;
-
- memset(&sdinfo, 0, sizeof(sdinfo));
- sdinfo.hmc_fn_id = hmc_info->hmc_fn_id;
- for (i = sd_index; i < sd_index + sd_cnt; i++) {
- sd_entry = &hmc_info->sd_table.sd_entry[i];
- if (!sd_entry ||
- (!sd_entry->valid && setsd) ||
- (sd_entry->valid && !setsd))
- continue;
- if (setsd) {
- pa = (sd_entry->entry_type == I40IW_SD_TYPE_PAGED) ?
- sd_entry->u.pd_table.pd_page_addr.pa :
- sd_entry->u.bp.addr.pa;
- i40iw_set_sd_entry(pa, i, sd_entry->entry_type,
- &sdinfo.entry[sdinfo.cnt]);
- } else {
- i40iw_clr_sd_entry(i, sd_entry->entry_type,
- &sdinfo.entry[sdinfo.cnt]);
- }
- sdinfo.cnt++;
- if (sdinfo.cnt == I40IW_MAX_SD_ENTRIES) {
- ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "i40iw_hmc_sd_grp: sd_programming failed err=%d\n",
- ret_code);
- return ret_code;
- }
- sdinfo.cnt = 0;
- }
- }
- if (sdinfo.cnt)
- ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo);
-
- return ret_code;
-}
-
-/**
- * i40iw_vfdev_from_fpm - return vf dev ptr for hmc function id
- * @dev: pointer to the device structure
- * @hmc_fn_id: hmc's function id
- */
-struct i40iw_vfdev *i40iw_vfdev_from_fpm(struct i40iw_sc_dev *dev, u8 hmc_fn_id)
-{
- struct i40iw_vfdev *vf_dev = NULL;
- u16 idx;
-
- for (idx = 0; idx < I40IW_MAX_PE_ENABLED_VF_COUNT; idx++) {
- if (dev->vf_dev[idx] &&
- ((u8)dev->vf_dev[idx]->pmf_index == hmc_fn_id)) {
- vf_dev = dev->vf_dev[idx];
- break;
- }
- }
- return vf_dev;
-}
-
-/**
- * i40iw_vf_hmcinfo_from_fpm - get ptr to hmc for func_id
- * @dev: pointer to the device structure
- * @hmc_fn_id: hmc's function id
- */
-struct i40iw_hmc_info *i40iw_vf_hmcinfo_from_fpm(struct i40iw_sc_dev *dev,
- u8 hmc_fn_id)
-{
- struct i40iw_hmc_info *hmc_info = NULL;
- u16 idx;
-
- for (idx = 0; idx < I40IW_MAX_PE_ENABLED_VF_COUNT; idx++) {
- if (dev->vf_dev[idx] &&
- ((u8)dev->vf_dev[idx]->pmf_index == hmc_fn_id)) {
- hmc_info = &dev->vf_dev[idx]->hmc_info;
- break;
- }
- }
- return hmc_info;
-}
-
-/**
- * i40iw_hmc_finish_add_sd_reg - program sd entries for objects
- * @dev: pointer to the device structure
- * @info: create obj info
- */
-static enum i40iw_status_code i40iw_hmc_finish_add_sd_reg(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_create_obj_info *info)
-{
- if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
- return I40IW_ERR_INVALID_HMC_OBJ_INDEX;
-
- if ((info->start_idx + info->count) >
- info->hmc_info->hmc_obj[info->rsrc_type].cnt)
- return I40IW_ERR_INVALID_HMC_OBJ_COUNT;
-
- if (!info->add_sd_cnt)
- return 0;
-
- return i40iw_hmc_sd_grp(dev, info->hmc_info,
- info->hmc_info->sd_indexes[0],
- info->add_sd_cnt, true);
-}
-
-/**
- * i40iw_create_iw_hmc_obj - allocate backing store for hmc objects
- * @dev: pointer to the device structure
- * @info: pointer to i40iw_hmc_iw_create_obj_info struct
- *
- * This will allocate memory for PDs and backing pages and populate
- * the sd and pd entries.
- */
-enum i40iw_status_code i40iw_sc_create_hmc_obj(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_create_obj_info *info)
-{
- struct i40iw_hmc_sd_entry *sd_entry;
- u32 sd_idx, sd_lmt;
- u32 pd_idx = 0, pd_lmt = 0;
- u32 pd_idx1 = 0, pd_lmt1 = 0;
- u32 i, j;
- bool pd_error = false;
- enum i40iw_status_code ret_code = 0;
-
- if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
- return I40IW_ERR_INVALID_HMC_OBJ_INDEX;
-
- if ((info->start_idx + info->count) >
- info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s: error type %u, start = %u, req cnt %u, cnt = %u\n",
- __func__, info->rsrc_type, info->start_idx, info->count,
- info->hmc_info->hmc_obj[info->rsrc_type].cnt);
- return I40IW_ERR_INVALID_HMC_OBJ_COUNT;
- }
-
- if (!dev->is_pf)
- return i40iw_vchnl_vf_add_hmc_objs(dev, info->rsrc_type, 0, info->count);
-
- i40iw_find_sd_index_limit(info->hmc_info, info->rsrc_type,
- info->start_idx, info->count,
- &sd_idx, &sd_lmt);
- if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
- sd_lmt > info->hmc_info->sd_table.sd_cnt) {
- return I40IW_ERR_INVALID_SD_INDEX;
- }
- i40iw_find_pd_index_limit(info->hmc_info, info->rsrc_type,
- info->start_idx, info->count, &pd_idx, &pd_lmt);
-
- for (j = sd_idx; j < sd_lmt; j++) {
- ret_code = i40iw_add_sd_table_entry(dev->hw, info->hmc_info,
- j,
- info->entry_type,
- I40IW_HMC_DIRECT_BP_SIZE);
- if (ret_code)
- goto exit_sd_error;
- sd_entry = &info->hmc_info->sd_table.sd_entry[j];
-
- if ((sd_entry->entry_type == I40IW_SD_TYPE_PAGED) &&
- ((dev->hmc_info == info->hmc_info) &&
- (info->rsrc_type != I40IW_HMC_IW_PBLE))) {
- pd_idx1 = max(pd_idx, (j * I40IW_HMC_MAX_BP_COUNT));
- pd_lmt1 = min(pd_lmt,
- (j + 1) * I40IW_HMC_MAX_BP_COUNT);
- for (i = pd_idx1; i < pd_lmt1; i++) {
- /* update the pd table entry */
- ret_code = i40iw_add_pd_table_entry(dev->hw, info->hmc_info,
- i, NULL);
- if (ret_code) {
- pd_error = true;
- break;
- }
- }
- if (pd_error) {
- while (i && (i > pd_idx1)) {
- i40iw_remove_pd_bp(dev->hw, info->hmc_info, (i - 1),
- info->is_pf);
- i--;
- }
- }
- }
- if (sd_entry->valid)
- continue;
-
- info->hmc_info->sd_indexes[info->add_sd_cnt] = (u16)j;
- info->add_sd_cnt++;
- sd_entry->valid = true;
- }
- return i40iw_hmc_finish_add_sd_reg(dev, info);
-
-exit_sd_error:
- while (j && (j > sd_idx)) {
- sd_entry = &info->hmc_info->sd_table.sd_entry[j - 1];
- switch (sd_entry->entry_type) {
- case I40IW_SD_TYPE_PAGED:
- pd_idx1 = max(pd_idx,
- (j - 1) * I40IW_HMC_MAX_BP_COUNT);
- pd_lmt1 = min(pd_lmt, (j * I40IW_HMC_MAX_BP_COUNT));
- for (i = pd_idx1; i < pd_lmt1; i++)
- i40iw_prep_remove_pd_page(info->hmc_info, i);
- break;
- case I40IW_SD_TYPE_DIRECT:
- i40iw_prep_remove_pd_page(info->hmc_info, (j - 1));
- break;
- default:
- ret_code = I40IW_ERR_INVALID_SD_TYPE;
- break;
- }
- j--;
- }
-
- return ret_code;
-}
-
-/**
- * i40iw_finish_del_sd_reg - delete sd entries for objects
- * @dev: pointer to the device structure
- * @info: dele obj info
- * @reset: true if called before reset
- */
-static enum i40iw_status_code i40iw_finish_del_sd_reg(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_del_obj_info *info,
- bool reset)
-{
- struct i40iw_hmc_sd_entry *sd_entry;
- enum i40iw_status_code ret_code = 0;
- u32 i, sd_idx;
- struct i40iw_dma_mem *mem;
-
- if (dev->is_pf && !reset)
- ret_code = i40iw_hmc_sd_grp(dev, info->hmc_info,
- info->hmc_info->sd_indexes[0],
- info->del_sd_cnt, false);
-
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error cqp sd sd_grp\n", __func__);
-
- for (i = 0; i < info->del_sd_cnt; i++) {
- sd_idx = info->hmc_info->sd_indexes[i];
- sd_entry = &info->hmc_info->sd_table.sd_entry[sd_idx];
- if (!sd_entry)
- continue;
- mem = (sd_entry->entry_type == I40IW_SD_TYPE_PAGED) ?
- &sd_entry->u.pd_table.pd_page_addr :
- &sd_entry->u.bp.addr;
-
- if (!mem || !mem->va)
- i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error cqp sd mem\n", __func__);
- else
- i40iw_free_dma_mem(dev->hw, mem);
- }
- return ret_code;
-}
-
-/**
- * i40iw_del_iw_hmc_obj - remove pe hmc objects
- * @dev: pointer to the device structure
- * @info: pointer to i40iw_hmc_del_obj_info struct
- * @reset: true if called before reset
- *
- * This will de-populate the SDs and PDs. It frees
- * the memory for PDS and backing storage. After this function is returned,
- * caller should deallocate memory allocated previously for
- * book-keeping information about PDs and backing storage.
- */
-enum i40iw_status_code i40iw_sc_del_hmc_obj(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_del_obj_info *info,
- bool reset)
-{
- struct i40iw_hmc_pd_table *pd_table;
- u32 sd_idx, sd_lmt;
- u32 pd_idx, pd_lmt, rel_pd_idx;
- u32 i, j;
- enum i40iw_status_code ret_code = 0;
-
- if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s: error start_idx[%04d] >= [type %04d].cnt[%04d]\n",
- __func__, info->start_idx, info->rsrc_type,
- info->hmc_info->hmc_obj[info->rsrc_type].cnt);
- return I40IW_ERR_INVALID_HMC_OBJ_INDEX;
- }
-
- if ((info->start_idx + info->count) >
- info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
- i40iw_debug(dev, I40IW_DEBUG_HMC,
- "%s: error start_idx[%04d] + count %04d >= [type %04d].cnt[%04d]\n",
- __func__, info->start_idx, info->count,
- info->rsrc_type,
- info->hmc_info->hmc_obj[info->rsrc_type].cnt);
- return I40IW_ERR_INVALID_HMC_OBJ_COUNT;
- }
- if (!dev->is_pf) {
- ret_code = i40iw_vchnl_vf_del_hmc_obj(dev, info->rsrc_type, 0,
- info->count);
- if (info->rsrc_type != I40IW_HMC_IW_PBLE)
- return ret_code;
- }
-
- i40iw_find_pd_index_limit(info->hmc_info, info->rsrc_type,
- info->start_idx, info->count, &pd_idx, &pd_lmt);
-
- for (j = pd_idx; j < pd_lmt; j++) {
- sd_idx = j / I40IW_HMC_PD_CNT_IN_SD;
-
- if (info->hmc_info->sd_table.sd_entry[sd_idx].entry_type !=
- I40IW_SD_TYPE_PAGED)
- continue;
-
- rel_pd_idx = j % I40IW_HMC_PD_CNT_IN_SD;
- pd_table = &info->hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
- if (pd_table->pd_entry[rel_pd_idx].valid) {
- ret_code = i40iw_remove_pd_bp(dev->hw, info->hmc_info, j,
- info->is_pf);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error\n", __func__);
- return ret_code;
- }
- }
- }
-
- i40iw_find_sd_index_limit(info->hmc_info, info->rsrc_type,
- info->start_idx, info->count, &sd_idx, &sd_lmt);
- if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
- sd_lmt > info->hmc_info->sd_table.sd_cnt) {
- i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: error invalid sd_idx\n", __func__);
- return I40IW_ERR_INVALID_SD_INDEX;
- }
-
- for (i = sd_idx; i < sd_lmt; i++) {
- if (!info->hmc_info->sd_table.sd_entry[i].valid)
- continue;
- switch (info->hmc_info->sd_table.sd_entry[i].entry_type) {
- case I40IW_SD_TYPE_DIRECT:
- ret_code = i40iw_prep_remove_sd_bp(info->hmc_info, i);
- if (!ret_code) {
- info->hmc_info->sd_indexes[info->del_sd_cnt] = (u16)i;
- info->del_sd_cnt++;
- }
- break;
- case I40IW_SD_TYPE_PAGED:
- ret_code = i40iw_prep_remove_pd_page(info->hmc_info, i);
- if (!ret_code) {
- info->hmc_info->sd_indexes[info->del_sd_cnt] = (u16)i;
- info->del_sd_cnt++;
- }
- break;
- default:
- break;
- }
- }
- return i40iw_finish_del_sd_reg(dev, info, reset);
-}
-
-/**
- * i40iw_add_sd_table_entry - Adds a segment descriptor to the table
- * @hw: pointer to our hw struct
- * @hmc_info: pointer to the HMC configuration information struct
- * @sd_index: segment descriptor index to manipulate
- * @type: what type of segment descriptor we're manipulating
- * @direct_mode_sz: size to alloc in direct mode
- */
-enum i40iw_status_code i40iw_add_sd_table_entry(struct i40iw_hw *hw,
- struct i40iw_hmc_info *hmc_info,
- u32 sd_index,
- enum i40iw_sd_entry_type type,
- u64 direct_mode_sz)
-{
- enum i40iw_status_code ret_code = 0;
- struct i40iw_hmc_sd_entry *sd_entry;
- bool dma_mem_alloc_done = false;
- struct i40iw_dma_mem mem;
- u64 alloc_len;
-
- sd_entry = &hmc_info->sd_table.sd_entry[sd_index];
- if (!sd_entry->valid) {
- if (type == I40IW_SD_TYPE_PAGED)
- alloc_len = I40IW_HMC_PAGED_BP_SIZE;
- else
- alloc_len = direct_mode_sz;
-
- /* allocate a 4K pd page or 2M backing page */
- ret_code = i40iw_allocate_dma_mem(hw, &mem, alloc_len,
- I40IW_HMC_PD_BP_BUF_ALIGNMENT);
- if (ret_code)
- goto exit;
- dma_mem_alloc_done = true;
- if (type == I40IW_SD_TYPE_PAGED) {
- ret_code = i40iw_allocate_virt_mem(hw,
- &sd_entry->u.pd_table.pd_entry_virt_mem,
- sizeof(struct i40iw_hmc_pd_entry) * 512);
- if (ret_code)
- goto exit;
- sd_entry->u.pd_table.pd_entry = (struct i40iw_hmc_pd_entry *)
- sd_entry->u.pd_table.pd_entry_virt_mem.va;
-
- memcpy(&sd_entry->u.pd_table.pd_page_addr, &mem, sizeof(struct i40iw_dma_mem));
- } else {
- memcpy(&sd_entry->u.bp.addr, &mem, sizeof(struct i40iw_dma_mem));
- sd_entry->u.bp.sd_pd_index = sd_index;
- }
-
- hmc_info->sd_table.sd_entry[sd_index].entry_type = type;
-
- I40IW_INC_SD_REFCNT(&hmc_info->sd_table);
- }
- if (sd_entry->entry_type == I40IW_SD_TYPE_DIRECT)
- I40IW_INC_BP_REFCNT(&sd_entry->u.bp);
-exit:
- if (ret_code)
- if (dma_mem_alloc_done)
- i40iw_free_dma_mem(hw, &mem);
-
- return ret_code;
-}
-
-/**
- * i40iw_add_pd_table_entry - Adds page descriptor to the specified table
- * @hw: pointer to our HW structure
- * @hmc_info: pointer to the HMC configuration information structure
- * @pd_index: which page descriptor index to manipulate
- * @rsrc_pg: if not NULL, use preallocated page instead of allocating new one.
- *
- * This function:
- * 1. Initializes the pd entry
- * 2. Adds pd_entry in the pd_table
- * 3. Mark the entry valid in i40iw_hmc_pd_entry structure
- * 4. Initializes the pd_entry's ref count to 1
- * assumptions:
- * 1. The memory for pd should be pinned down, physically contiguous and
- * aligned on 4K boundary and zeroed memory.
- * 2. It should be 4K in size.
- */
-enum i40iw_status_code i40iw_add_pd_table_entry(struct i40iw_hw *hw,
- struct i40iw_hmc_info *hmc_info,
- u32 pd_index,
- struct i40iw_dma_mem *rsrc_pg)
-{
- enum i40iw_status_code ret_code = 0;
- struct i40iw_hmc_pd_table *pd_table;
- struct i40iw_hmc_pd_entry *pd_entry;
- struct i40iw_dma_mem mem;
- struct i40iw_dma_mem *page = &mem;
- u32 sd_idx, rel_pd_idx;
- u64 *pd_addr;
- u64 page_desc;
-
- if (pd_index / I40IW_HMC_PD_CNT_IN_SD >= hmc_info->sd_table.sd_cnt)
- return I40IW_ERR_INVALID_PAGE_DESC_INDEX;
-
- sd_idx = (pd_index / I40IW_HMC_PD_CNT_IN_SD);
- if (hmc_info->sd_table.sd_entry[sd_idx].entry_type != I40IW_SD_TYPE_PAGED)
- return 0;
-
- rel_pd_idx = (pd_index % I40IW_HMC_PD_CNT_IN_SD);
- pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
- pd_entry = &pd_table->pd_entry[rel_pd_idx];
- if (!pd_entry->valid) {
- if (rsrc_pg) {
- pd_entry->rsrc_pg = true;
- page = rsrc_pg;
- } else {
- ret_code = i40iw_allocate_dma_mem(hw, page,
- I40IW_HMC_PAGED_BP_SIZE,
- I40IW_HMC_PD_BP_BUF_ALIGNMENT);
- if (ret_code)
- return ret_code;
- pd_entry->rsrc_pg = false;
- }
-
- memcpy(&pd_entry->bp.addr, page, sizeof(struct i40iw_dma_mem));
- pd_entry->bp.sd_pd_index = pd_index;
- pd_entry->bp.entry_type = I40IW_SD_TYPE_PAGED;
- page_desc = page->pa | 0x1;
-
- pd_addr = (u64 *)pd_table->pd_page_addr.va;
- pd_addr += rel_pd_idx;
-
- memcpy(pd_addr, &page_desc, sizeof(*pd_addr));
-
- pd_entry->sd_index = sd_idx;
- pd_entry->valid = true;
- I40IW_INC_PD_REFCNT(pd_table);
- if (hmc_info->hmc_fn_id < I40IW_FIRST_VF_FPM_ID)
- I40IW_INVALIDATE_PF_HMC_PD(hw, sd_idx, rel_pd_idx);
- else if (hw->hmc.hmc_fn_id != hmc_info->hmc_fn_id)
- I40IW_INVALIDATE_VF_HMC_PD(hw, sd_idx, rel_pd_idx,
- hmc_info->hmc_fn_id);
- }
- I40IW_INC_BP_REFCNT(&pd_entry->bp);
-
- return 0;
-}
-
-/**
- * i40iw_remove_pd_bp - remove a backing page from a page descriptor
- * @hw: pointer to our HW structure
- * @hmc_info: pointer to the HMC configuration information structure
- * @idx: the page index
- * @is_pf: distinguishes a VF from a PF
- *
- * This function:
- * 1. Marks the entry in pd table (for paged address mode) or in sd table
- * (for direct address mode) invalid.
- * 2. Write to register PMPDINV to invalidate the backing page in FV cache
- * 3. Decrement the ref count for the pd _entry
- * assumptions:
- * 1. Caller can deallocate the memory used by backing storage after this
- * function returns.
- */
-enum i40iw_status_code i40iw_remove_pd_bp(struct i40iw_hw *hw,
- struct i40iw_hmc_info *hmc_info,
- u32 idx,
- bool is_pf)
-{
- struct i40iw_hmc_pd_entry *pd_entry;
- struct i40iw_hmc_pd_table *pd_table;
- struct i40iw_hmc_sd_entry *sd_entry;
- u32 sd_idx, rel_pd_idx;
- struct i40iw_dma_mem *mem;
- u64 *pd_addr;
-
- sd_idx = idx / I40IW_HMC_PD_CNT_IN_SD;
- rel_pd_idx = idx % I40IW_HMC_PD_CNT_IN_SD;
- if (sd_idx >= hmc_info->sd_table.sd_cnt)
- return I40IW_ERR_INVALID_PAGE_DESC_INDEX;
-
- sd_entry = &hmc_info->sd_table.sd_entry[sd_idx];
- if (sd_entry->entry_type != I40IW_SD_TYPE_PAGED)
- return I40IW_ERR_INVALID_SD_TYPE;
-
- pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
- pd_entry = &pd_table->pd_entry[rel_pd_idx];
- I40IW_DEC_BP_REFCNT(&pd_entry->bp);
- if (pd_entry->bp.ref_cnt)
- return 0;
-
- pd_entry->valid = false;
- I40IW_DEC_PD_REFCNT(pd_table);
- pd_addr = (u64 *)pd_table->pd_page_addr.va;
- pd_addr += rel_pd_idx;
- memset(pd_addr, 0, sizeof(u64));
- if (is_pf)
- I40IW_INVALIDATE_PF_HMC_PD(hw, sd_idx, idx);
- else
- I40IW_INVALIDATE_VF_HMC_PD(hw, sd_idx, idx,
- hmc_info->hmc_fn_id);
-
- if (!pd_entry->rsrc_pg) {
- mem = &pd_entry->bp.addr;
- if (!mem || !mem->va)
- return I40IW_ERR_PARAM;
- i40iw_free_dma_mem(hw, mem);
- }
- if (!pd_table->ref_cnt)
- i40iw_free_virt_mem(hw, &pd_table->pd_entry_virt_mem);
-
- return 0;
-}
-
-/**
- * i40iw_prep_remove_sd_bp - Prepares to remove a backing page from a sd entry
- * @hmc_info: pointer to the HMC configuration information structure
- * @idx: the page index
- */
-enum i40iw_status_code i40iw_prep_remove_sd_bp(struct i40iw_hmc_info *hmc_info, u32 idx)
-{
- struct i40iw_hmc_sd_entry *sd_entry;
-
- sd_entry = &hmc_info->sd_table.sd_entry[idx];
- I40IW_DEC_BP_REFCNT(&sd_entry->u.bp);
- if (sd_entry->u.bp.ref_cnt)
- return I40IW_ERR_NOT_READY;
-
- I40IW_DEC_SD_REFCNT(&hmc_info->sd_table);
- sd_entry->valid = false;
-
- return 0;
-}
-
-/**
- * i40iw_prep_remove_pd_page - Prepares to remove a PD page from sd entry.
- * @hmc_info: pointer to the HMC configuration information structure
- * @idx: segment descriptor index to find the relevant page descriptor
- */
-enum i40iw_status_code i40iw_prep_remove_pd_page(struct i40iw_hmc_info *hmc_info,
- u32 idx)
-{
- struct i40iw_hmc_sd_entry *sd_entry;
-
- sd_entry = &hmc_info->sd_table.sd_entry[idx];
-
- if (sd_entry->u.pd_table.ref_cnt)
- return I40IW_ERR_NOT_READY;
-
- sd_entry->valid = false;
- I40IW_DEC_SD_REFCNT(&hmc_info->sd_table);
-
- return 0;
-}
-
-/**
- * i40iw_pf_init_vfhmc -
- * @vf_cnt_array: array of cnt values of iwarp hmc objects
- * @vf_hmc_fn_id: hmc function id ofr vf driver
- * @dev: pointer to i40iw_dev struct
- *
- * Called by pf driver to initialize hmc_info for vf driver instance.
- */
-enum i40iw_status_code i40iw_pf_init_vfhmc(struct i40iw_sc_dev *dev,
- u8 vf_hmc_fn_id,
- u32 *vf_cnt_array)
-{
- struct i40iw_hmc_info *hmc_info;
- enum i40iw_status_code ret_code = 0;
- u32 i;
-
- if ((vf_hmc_fn_id < I40IW_FIRST_VF_FPM_ID) ||
- (vf_hmc_fn_id >= I40IW_FIRST_VF_FPM_ID +
- I40IW_MAX_PE_ENABLED_VF_COUNT)) {
- i40iw_debug(dev, I40IW_DEBUG_HMC, "%s: invalid vf_hmc_fn_id 0x%x\n",
- __func__, vf_hmc_fn_id);
- return I40IW_ERR_INVALID_HMCFN_ID;
- }
-
- ret_code = i40iw_sc_init_iw_hmc(dev, vf_hmc_fn_id);
- if (ret_code)
- return ret_code;
-
- hmc_info = i40iw_vf_hmcinfo_from_fpm(dev, vf_hmc_fn_id);
-
- for (i = I40IW_HMC_IW_QP; i < I40IW_HMC_IW_MAX; i++)
- if (vf_cnt_array)
- hmc_info->hmc_obj[i].cnt =
- vf_cnt_array[i - I40IW_HMC_IW_QP];
- else
- hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
-
- return 0;
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hmc.h b/drivers/infiniband/hw/i40iw/i40iw_hmc.h
deleted file mode 100644
index 4c3fdd875621..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_hmc.h
+++ /dev/null
@@ -1,241 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_HMC_H
-#define I40IW_HMC_H
-
-#include "i40iw_d.h"
-
-struct i40iw_hw;
-enum i40iw_status_code;
-
-#define I40IW_HMC_MAX_BP_COUNT 512
-#define I40IW_MAX_SD_ENTRIES 11
-#define I40IW_HW_DBG_HMC_INVALID_BP_MARK 0xCA
-
-#define I40IW_HMC_INFO_SIGNATURE 0x484D5347
-#define I40IW_HMC_PD_CNT_IN_SD 512
-#define I40IW_HMC_DIRECT_BP_SIZE 0x200000
-#define I40IW_HMC_MAX_SD_COUNT 4096
-#define I40IW_HMC_PAGED_BP_SIZE 4096
-#define I40IW_HMC_PD_BP_BUF_ALIGNMENT 4096
-#define I40IW_FIRST_VF_FPM_ID 16
-#define FPM_MULTIPLIER 1024
-
-#define I40IW_INC_SD_REFCNT(sd_table) ((sd_table)->ref_cnt++)
-#define I40IW_INC_PD_REFCNT(pd_table) ((pd_table)->ref_cnt++)
-#define I40IW_INC_BP_REFCNT(bp) ((bp)->ref_cnt++)
-
-#define I40IW_DEC_SD_REFCNT(sd_table) ((sd_table)->ref_cnt--)
-#define I40IW_DEC_PD_REFCNT(pd_table) ((pd_table)->ref_cnt--)
-#define I40IW_DEC_BP_REFCNT(bp) ((bp)->ref_cnt--)
-
-/**
- * I40IW_INVALIDATE_PF_HMC_PD - Invalidates the pd cache in the hardware
- * @hw: pointer to our hw struct
- * @sd_idx: segment descriptor index
- * @pd_idx: page descriptor index
- */
-#define I40IW_INVALIDATE_PF_HMC_PD(hw, sd_idx, pd_idx) \
- i40iw_wr32((hw), I40E_PFHMC_PDINV, \
- (((sd_idx) << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) | \
- (0x1 << I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT) | \
- ((pd_idx) << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
-
-/**
- * I40IW_INVALIDATE_VF_HMC_PD - Invalidates the pd cache in the hardware
- * @hw: pointer to our hw struct
- * @sd_idx: segment descriptor index
- * @pd_idx: page descriptor index
- * @hmc_fn_id: VF's function id
- */
-#define I40IW_INVALIDATE_VF_HMC_PD(hw, sd_idx, pd_idx, hmc_fn_id) \
- i40iw_wr32(hw, I40E_GLHMC_VFPDINV(hmc_fn_id - I40IW_FIRST_VF_FPM_ID), \
- ((sd_idx << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) | \
- (pd_idx << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
-
-struct i40iw_hmc_obj_info {
- u64 base;
- u32 max_cnt;
- u32 cnt;
- u64 size;
-};
-
-enum i40iw_sd_entry_type {
- I40IW_SD_TYPE_INVALID = 0,
- I40IW_SD_TYPE_PAGED = 1,
- I40IW_SD_TYPE_DIRECT = 2
-};
-
-struct i40iw_hmc_bp {
- enum i40iw_sd_entry_type entry_type;
- struct i40iw_dma_mem addr;
- u32 sd_pd_index;
- u32 ref_cnt;
-};
-
-struct i40iw_hmc_pd_entry {
- struct i40iw_hmc_bp bp;
- u32 sd_index;
- bool rsrc_pg;
- bool valid;
-};
-
-struct i40iw_hmc_pd_table {
- struct i40iw_dma_mem pd_page_addr;
- struct i40iw_hmc_pd_entry *pd_entry;
- struct i40iw_virt_mem pd_entry_virt_mem;
- u32 ref_cnt;
- u32 sd_index;
-};
-
-struct i40iw_hmc_sd_entry {
- enum i40iw_sd_entry_type entry_type;
- bool valid;
-
- union {
- struct i40iw_hmc_pd_table pd_table;
- struct i40iw_hmc_bp bp;
- } u;
-};
-
-struct i40iw_hmc_sd_table {
- struct i40iw_virt_mem addr;
- u32 sd_cnt;
- u32 ref_cnt;
- struct i40iw_hmc_sd_entry *sd_entry;
-};
-
-struct i40iw_hmc_info {
- u32 signature;
- u8 hmc_fn_id;
- u16 first_sd_index;
-
- struct i40iw_hmc_obj_info *hmc_obj;
- struct i40iw_virt_mem hmc_obj_virt_mem;
- struct i40iw_hmc_sd_table sd_table;
- u16 sd_indexes[I40IW_HMC_MAX_SD_COUNT];
-};
-
-struct update_sd_entry {
- u64 cmd;
- u64 data;
-};
-
-struct i40iw_update_sds_info {
- u32 cnt;
- u8 hmc_fn_id;
- struct update_sd_entry entry[I40IW_MAX_SD_ENTRIES];
-};
-
-struct i40iw_ccq_cqe_info;
-struct i40iw_hmc_fcn_info {
- void (*callback_fcn)(struct i40iw_sc_dev *, void *,
- struct i40iw_ccq_cqe_info *);
- void *cqp_callback_param;
- u32 vf_id;
- u16 iw_vf_idx;
- bool free_fcn;
-};
-
-enum i40iw_hmc_rsrc_type {
- I40IW_HMC_IW_QP = 0,
- I40IW_HMC_IW_CQ = 1,
- I40IW_HMC_IW_SRQ = 2,
- I40IW_HMC_IW_HTE = 3,
- I40IW_HMC_IW_ARP = 4,
- I40IW_HMC_IW_APBVT_ENTRY = 5,
- I40IW_HMC_IW_MR = 6,
- I40IW_HMC_IW_XF = 7,
- I40IW_HMC_IW_XFFL = 8,
- I40IW_HMC_IW_Q1 = 9,
- I40IW_HMC_IW_Q1FL = 10,
- I40IW_HMC_IW_TIMER = 11,
- I40IW_HMC_IW_FSIMC = 12,
- I40IW_HMC_IW_FSIAV = 13,
- I40IW_HMC_IW_PBLE = 14,
- I40IW_HMC_IW_MAX = 15,
-};
-
-struct i40iw_hmc_create_obj_info {
- struct i40iw_hmc_info *hmc_info;
- struct i40iw_virt_mem add_sd_virt_mem;
- u32 rsrc_type;
- u32 start_idx;
- u32 count;
- u32 add_sd_cnt;
- enum i40iw_sd_entry_type entry_type;
- bool is_pf;
-};
-
-struct i40iw_hmc_del_obj_info {
- struct i40iw_hmc_info *hmc_info;
- struct i40iw_virt_mem del_sd_virt_mem;
- u32 rsrc_type;
- u32 start_idx;
- u32 count;
- u32 del_sd_cnt;
- bool is_pf;
-};
-
-enum i40iw_status_code i40iw_copy_dma_mem(struct i40iw_hw *hw, void *dest_buf,
- struct i40iw_dma_mem *src_mem, u64 src_offset, u64 size);
-enum i40iw_status_code i40iw_sc_create_hmc_obj(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_create_obj_info *info);
-enum i40iw_status_code i40iw_sc_del_hmc_obj(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_del_obj_info *info,
- bool reset);
-enum i40iw_status_code i40iw_hmc_sd_one(struct i40iw_sc_dev *dev, u8 hmc_fn_id,
- u64 pa, u32 sd_idx, enum i40iw_sd_entry_type type,
- bool setsd);
-enum i40iw_status_code i40iw_update_sds_noccq(struct i40iw_sc_dev *dev,
- struct i40iw_update_sds_info *info);
-struct i40iw_vfdev *i40iw_vfdev_from_fpm(struct i40iw_sc_dev *dev, u8 hmc_fn_id);
-struct i40iw_hmc_info *i40iw_vf_hmcinfo_from_fpm(struct i40iw_sc_dev *dev,
- u8 hmc_fn_id);
-enum i40iw_status_code i40iw_add_sd_table_entry(struct i40iw_hw *hw,
- struct i40iw_hmc_info *hmc_info, u32 sd_index,
- enum i40iw_sd_entry_type type, u64 direct_mode_sz);
-enum i40iw_status_code i40iw_add_pd_table_entry(struct i40iw_hw *hw,
- struct i40iw_hmc_info *hmc_info, u32 pd_index,
- struct i40iw_dma_mem *rsrc_pg);
-enum i40iw_status_code i40iw_remove_pd_bp(struct i40iw_hw *hw,
- struct i40iw_hmc_info *hmc_info, u32 idx, bool is_pf);
-enum i40iw_status_code i40iw_prep_remove_sd_bp(struct i40iw_hmc_info *hmc_info, u32 idx);
-enum i40iw_status_code i40iw_prep_remove_pd_page(struct i40iw_hmc_info *hmc_info, u32 idx);
-
-#define ENTER_SHARED_FUNCTION()
-#define EXIT_SHARED_FUNCTION()
-
-#endif /* I40IW_HMC_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
deleted file mode 100644
index 476867a3f584..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ /dev/null
@@ -1,768 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/if_vlan.h>
-
-#include "i40iw.h"
-
-/**
- * i40iw_initialize_hw_resources - initialize hw resource during open
- * @iwdev: iwarp device
- */
-u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev)
-{
- unsigned long num_pds;
- u32 resources_size;
- u32 max_mr;
- u32 max_qp;
- u32 max_cq;
- u32 arp_table_size;
- u32 mrdrvbits;
- void *resource_ptr;
-
- max_qp = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt;
- max_cq = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
- max_mr = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt;
- arp_table_size = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_ARP].cnt;
- iwdev->max_cqe = 0xFFFFF;
- num_pds = I40IW_MAX_PDS;
- resources_size = sizeof(struct i40iw_arp_entry) * arp_table_size;
- resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp);
- resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr);
- resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_cq);
- resources_size += sizeof(unsigned long) * BITS_TO_LONGS(num_pds);
- resources_size += sizeof(unsigned long) * BITS_TO_LONGS(arp_table_size);
- resources_size += sizeof(struct i40iw_qp **) * max_qp;
- iwdev->mem_resources = kzalloc(resources_size, GFP_KERNEL);
-
- if (!iwdev->mem_resources)
- return -ENOMEM;
-
- iwdev->max_qp = max_qp;
- iwdev->max_mr = max_mr;
- iwdev->max_cq = max_cq;
- iwdev->max_pd = num_pds;
- iwdev->arp_table_size = arp_table_size;
- iwdev->arp_table = (struct i40iw_arp_entry *)iwdev->mem_resources;
- resource_ptr = iwdev->mem_resources + (sizeof(struct i40iw_arp_entry) * arp_table_size);
-
- iwdev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
- IB_DEVICE_MEM_WINDOW | IB_DEVICE_MEM_MGT_EXTENSIONS;
-
- iwdev->allocated_qps = resource_ptr;
- iwdev->allocated_cqs = &iwdev->allocated_qps[BITS_TO_LONGS(max_qp)];
- iwdev->allocated_mrs = &iwdev->allocated_cqs[BITS_TO_LONGS(max_cq)];
- iwdev->allocated_pds = &iwdev->allocated_mrs[BITS_TO_LONGS(max_mr)];
- iwdev->allocated_arps = &iwdev->allocated_pds[BITS_TO_LONGS(num_pds)];
- iwdev->qp_table = (struct i40iw_qp **)(&iwdev->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
- set_bit(0, iwdev->allocated_mrs);
- set_bit(0, iwdev->allocated_qps);
- set_bit(0, iwdev->allocated_cqs);
- set_bit(0, iwdev->allocated_pds);
- set_bit(0, iwdev->allocated_arps);
-
- /* Following for ILQ/IEQ */
- set_bit(1, iwdev->allocated_qps);
- set_bit(1, iwdev->allocated_cqs);
- set_bit(1, iwdev->allocated_pds);
- set_bit(2, iwdev->allocated_cqs);
- set_bit(2, iwdev->allocated_pds);
-
- spin_lock_init(&iwdev->resource_lock);
- spin_lock_init(&iwdev->qptable_lock);
- /* stag index mask has a minimum of 14 bits */
- mrdrvbits = 24 - max(get_count_order(iwdev->max_mr), 14);
- iwdev->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits));
- return 0;
-}
-
-/**
- * i40iw_cqp_ce_handler - handle cqp completions
- * @iwdev: iwarp device
- * @arm: flag to arm after completions
- * @cq: cq for cqp completions
- */
-static void i40iw_cqp_ce_handler(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq, bool arm)
-{
- struct i40iw_cqp_request *cqp_request;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- u32 cqe_count = 0;
- struct i40iw_ccq_cqe_info info;
- int ret;
-
- do {
- memset(&info, 0, sizeof(info));
- ret = dev->ccq_ops->ccq_get_cqe_info(cq, &info);
- if (ret)
- break;
- cqp_request = (struct i40iw_cqp_request *)(unsigned long)info.scratch;
- if (info.error)
- i40iw_pr_err("opcode = 0x%x maj_err_code = 0x%x min_err_code = 0x%x\n",
- info.op_code, info.maj_err_code, info.min_err_code);
- if (cqp_request) {
- cqp_request->compl_info.maj_err_code = info.maj_err_code;
- cqp_request->compl_info.min_err_code = info.min_err_code;
- cqp_request->compl_info.op_ret_val = info.op_ret_val;
- cqp_request->compl_info.error = info.error;
-
- if (cqp_request->waiting) {
- cqp_request->request_done = true;
- wake_up(&cqp_request->waitq);
- i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
- } else {
- if (cqp_request->callback_fcn)
- cqp_request->callback_fcn(cqp_request, 1);
- i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
- }
- }
-
- cqe_count++;
- } while (1);
-
- if (arm && cqe_count) {
- i40iw_process_bh(dev);
- dev->ccq_ops->ccq_arm(cq);
- }
-}
-
-/**
- * i40iw_iwarp_ce_handler - handle iwarp completions
- * @iwdev: iwarp device
- * @iwcp: iwarp cq receiving event
- */
-static void i40iw_iwarp_ce_handler(struct i40iw_device *iwdev,
- struct i40iw_sc_cq *iwcq)
-{
- struct i40iw_cq *i40iwcq = iwcq->back_cq;
-
- if (i40iwcq->ibcq.comp_handler)
- i40iwcq->ibcq.comp_handler(&i40iwcq->ibcq,
- i40iwcq->ibcq.cq_context);
-}
-
-/**
- * i40iw_puda_ce_handler - handle puda completion events
- * @iwdev: iwarp device
- * @cq: puda completion q for event
- */
-static void i40iw_puda_ce_handler(struct i40iw_device *iwdev,
- struct i40iw_sc_cq *cq)
-{
- struct i40iw_sc_dev *dev = (struct i40iw_sc_dev *)&iwdev->sc_dev;
- enum i40iw_status_code status;
- u32 compl_error;
-
- do {
- status = i40iw_puda_poll_completion(dev, cq, &compl_error);
- if (status == I40IW_ERR_QUEUE_EMPTY)
- break;
- if (status) {
- i40iw_pr_err("puda status = %d\n", status);
- break;
- }
- if (compl_error) {
- i40iw_pr_err("puda compl_err =0x%x\n", compl_error);
- break;
- }
- } while (1);
-
- dev->ccq_ops->ccq_arm(cq);
-}
-
-/**
- * i40iw_process_ceq - handle ceq for completions
- * @iwdev: iwarp device
- * @ceq: ceq having cq for completion
- */
-void i40iw_process_ceq(struct i40iw_device *iwdev, struct i40iw_ceq *ceq)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_sc_ceq *sc_ceq;
- struct i40iw_sc_cq *cq;
- bool arm = true;
-
- sc_ceq = &ceq->sc_ceq;
- do {
- cq = dev->ceq_ops->process_ceq(dev, sc_ceq);
- if (!cq)
- break;
-
- if (cq->cq_type == I40IW_CQ_TYPE_CQP)
- i40iw_cqp_ce_handler(iwdev, cq, arm);
- else if (cq->cq_type == I40IW_CQ_TYPE_IWARP)
- i40iw_iwarp_ce_handler(iwdev, cq);
- else if ((cq->cq_type == I40IW_CQ_TYPE_ILQ) ||
- (cq->cq_type == I40IW_CQ_TYPE_IEQ))
- i40iw_puda_ce_handler(iwdev, cq);
- } while (1);
-}
-
-/**
- * i40iw_next_iw_state - modify qp state
- * @iwqp: iwarp qp to modify
- * @state: next state for qp
- * @del_hash: del hash
- * @term: term message
- * @termlen: length of term message
- */
-void i40iw_next_iw_state(struct i40iw_qp *iwqp,
- u8 state,
- u8 del_hash,
- u8 term,
- u8 termlen)
-{
- struct i40iw_modify_qp_info info;
-
- memset(&info, 0, sizeof(info));
- info.next_iwarp_state = state;
- info.remove_hash_idx = del_hash;
- info.cq_num_valid = true;
- info.arp_cache_idx_valid = true;
- info.dont_send_term = true;
- info.dont_send_fin = true;
- info.termlen = termlen;
-
- if (term & I40IWQP_TERM_SEND_TERM_ONLY)
- info.dont_send_term = false;
- if (term & I40IWQP_TERM_SEND_FIN_ONLY)
- info.dont_send_fin = false;
- if (iwqp->sc_qp.term_flags && (state == I40IW_QP_STATE_ERROR))
- info.reset_tcp_conn = true;
- iwqp->hw_iwarp_state = state;
- i40iw_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0);
-}
-
-/**
- * i40iw_process_aeq - handle aeq events
- * @iwdev: iwarp device
- */
-void i40iw_process_aeq(struct i40iw_device *iwdev)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_aeq *aeq = &iwdev->aeq;
- struct i40iw_sc_aeq *sc_aeq = &aeq->sc_aeq;
- struct i40iw_aeqe_info aeinfo;
- struct i40iw_aeqe_info *info = &aeinfo;
- int ret;
- struct i40iw_qp *iwqp = NULL;
- struct i40iw_sc_cq *cq = NULL;
- struct i40iw_cq *iwcq = NULL;
- struct i40iw_sc_qp *qp = NULL;
- struct i40iw_qp_host_ctx_info *ctx_info = NULL;
- unsigned long flags;
-
- u32 aeqcnt = 0;
-
- if (!sc_aeq->size)
- return;
-
- do {
- memset(info, 0, sizeof(*info));
- ret = dev->aeq_ops->get_next_aeqe(sc_aeq, info);
- if (ret)
- break;
-
- aeqcnt++;
- i40iw_debug(dev, I40IW_DEBUG_AEQ,
- "%s ae_id = 0x%x bool qp=%d qp_id = %d\n",
- __func__, info->ae_id, info->qp, info->qp_cq_id);
- if (info->qp) {
- spin_lock_irqsave(&iwdev->qptable_lock, flags);
- iwqp = iwdev->qp_table[info->qp_cq_id];
- if (!iwqp) {
- spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
- i40iw_debug(dev, I40IW_DEBUG_AEQ,
- "%s qp_id %d is already freed\n",
- __func__, info->qp_cq_id);
- continue;
- }
- i40iw_add_ref(&iwqp->ibqp);
- spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
- qp = &iwqp->sc_qp;
- spin_lock_irqsave(&iwqp->lock, flags);
- iwqp->hw_tcp_state = info->tcp_state;
- iwqp->hw_iwarp_state = info->iwarp_state;
- iwqp->last_aeq = info->ae_id;
- spin_unlock_irqrestore(&iwqp->lock, flags);
- ctx_info = &iwqp->ctx_info;
- ctx_info->err_rq_idx_valid = true;
- } else {
- if (info->ae_id != I40IW_AE_CQ_OPERATION_ERROR)
- continue;
- }
-
- switch (info->ae_id) {
- case I40IW_AE_LLP_FIN_RECEIVED:
- if (qp->term_flags)
- continue;
- if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
- iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSE_WAIT;
- if ((iwqp->hw_tcp_state == I40IW_TCP_STATE_CLOSE_WAIT) &&
- (iwqp->ibqp_state == IB_QPS_RTS)) {
- i40iw_next_iw_state(iwqp,
- I40IW_QP_STATE_CLOSING, 0, 0, 0);
- i40iw_cm_disconn(iwqp);
- }
- iwqp->cm_id->add_ref(iwqp->cm_id);
- i40iw_schedule_cm_timer(iwqp->cm_node,
- (struct i40iw_puda_buf *)iwqp,
- I40IW_TIMER_TYPE_CLOSE, 1, 0);
- }
- break;
- case I40IW_AE_LLP_CLOSE_COMPLETE:
- if (qp->term_flags)
- i40iw_terminate_done(qp, 0);
- else
- i40iw_cm_disconn(iwqp);
- break;
- case I40IW_AE_RESET_SENT:
- i40iw_next_iw_state(iwqp, I40IW_QP_STATE_ERROR, 1, 0, 0);
- i40iw_cm_disconn(iwqp);
- break;
- case I40IW_AE_LLP_CONNECTION_RESET:
- if (atomic_read(&iwqp->close_timer_started))
- continue;
- i40iw_cm_disconn(iwqp);
- break;
- case I40IW_AE_QP_SUSPEND_COMPLETE:
- i40iw_qp_suspend_resume(dev, &iwqp->sc_qp, false);
- break;
- case I40IW_AE_TERMINATE_SENT:
- i40iw_terminate_send_fin(qp);
- break;
- case I40IW_AE_LLP_TERMINATE_RECEIVED:
- i40iw_terminate_received(qp, info);
- break;
- case I40IW_AE_CQ_OPERATION_ERROR:
- i40iw_pr_err("Processing an iWARP related AE for CQ misc = 0x%04X\n",
- info->ae_id);
- cq = (struct i40iw_sc_cq *)(unsigned long)info->compl_ctx;
- iwcq = (struct i40iw_cq *)cq->back_cq;
-
- if (iwcq->ibcq.event_handler) {
- struct ib_event ibevent;
-
- ibevent.device = iwcq->ibcq.device;
- ibevent.event = IB_EVENT_CQ_ERR;
- ibevent.element.cq = &iwcq->ibcq;
- iwcq->ibcq.event_handler(&ibevent, iwcq->ibcq.cq_context);
- }
- break;
- case I40IW_AE_PRIV_OPERATION_DENIED:
- case I40IW_AE_STAG_ZERO_INVALID:
- case I40IW_AE_IB_RREQ_AND_Q1_FULL:
- case I40IW_AE_DDP_UBE_INVALID_DDP_VERSION:
- case I40IW_AE_DDP_UBE_INVALID_MO:
- case I40IW_AE_DDP_UBE_INVALID_QN:
- case I40IW_AE_DDP_NO_L_BIT:
- case I40IW_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
- case I40IW_AE_RDMAP_ROE_UNEXPECTED_OPCODE:
- case I40IW_AE_ROE_INVALID_RDMA_READ_REQUEST:
- case I40IW_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
- case I40IW_AE_INVALID_ARP_ENTRY:
- case I40IW_AE_INVALID_TCP_OPTION_RCVD:
- case I40IW_AE_STALE_ARP_ENTRY:
- case I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR:
- case I40IW_AE_LLP_SEGMENT_TOO_SMALL:
- case I40IW_AE_LLP_SYN_RECEIVED:
- case I40IW_AE_LLP_TOO_MANY_RETRIES:
- case I40IW_AE_LLP_DOUBT_REACHABILITY:
- case I40IW_AE_LCE_QP_CATASTROPHIC:
- case I40IW_AE_LCE_FUNCTION_CATASTROPHIC:
- case I40IW_AE_LCE_CQ_CATASTROPHIC:
- case I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG:
- case I40IW_AE_UDA_XMIT_IPADDR_MISMATCH:
- ctx_info->err_rq_idx_valid = false;
- default:
- if (!info->sq && ctx_info->err_rq_idx_valid) {
- ctx_info->err_rq_idx = info->wqe_idx;
- ctx_info->tcp_info_valid = false;
- ctx_info->iwarp_info_valid = false;
- ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
- iwqp->host_ctx.va,
- ctx_info);
- }
- i40iw_terminate_connection(qp, info);
- break;
- }
- if (info->qp)
- i40iw_rem_ref(&iwqp->ibqp);
- } while (1);
-
- if (aeqcnt)
- dev->aeq_ops->repost_aeq_entries(dev, aeqcnt);
-}
-
-/**
- * i40iw_manage_apbvt - add or delete tcp port
- * @iwdev: iwarp device
- * @accel_local_port: port for apbvt
- * @add_port: add or delete port
- */
-int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool add_port)
-{
- struct i40iw_apbvt_info *info;
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port);
- if (!cqp_request)
- return -ENOMEM;
-
- cqp_info = &cqp_request->info;
- info = &cqp_info->in.u.manage_apbvt_entry.info;
-
- memset(info, 0, sizeof(*info));
- info->add = add_port;
- info->port = cpu_to_le16(accel_local_port);
-
- cqp_info->cqp_cmd = OP_MANAGE_APBVT_ENTRY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.manage_apbvt_entry.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.manage_apbvt_entry.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Manage APBVT entry fail");
- return status;
-}
-
-/**
- * i40iw_manage_arp_cache - manage hw arp cache
- * @iwdev: iwarp device
- * @mac_addr: mac address ptr
- * @ip_addr: ip addr for arp cache
- * @action: add, delete or modify
- */
-void i40iw_manage_arp_cache(struct i40iw_device *iwdev,
- unsigned char *mac_addr,
- u32 *ip_addr,
- bool ipv4,
- u32 action)
-{
- struct i40iw_add_arp_cache_entry_info *info;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- int arp_index;
-
- arp_index = i40iw_arp_table(iwdev, ip_addr, ipv4, mac_addr, action);
- if (arp_index == -1)
- return;
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
- if (!cqp_request)
- return;
-
- cqp_info = &cqp_request->info;
- if (action == I40IW_ARP_ADD) {
- cqp_info->cqp_cmd = OP_ADD_ARP_CACHE_ENTRY;
- info = &cqp_info->in.u.add_arp_cache_entry.info;
- memset(info, 0, sizeof(*info));
- info->arp_index = cpu_to_le16((u16)arp_index);
- info->permanent = true;
- ether_addr_copy(info->mac_addr, mac_addr);
- cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request;
- cqp_info->in.u.add_arp_cache_entry.cqp = &iwdev->cqp.sc_cqp;
- } else {
- cqp_info->cqp_cmd = OP_DELETE_ARP_CACHE_ENTRY;
- cqp_info->in.u.del_arp_cache_entry.scratch = (uintptr_t)cqp_request;
- cqp_info->in.u.del_arp_cache_entry.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.del_arp_cache_entry.arp_index = arp_index;
- }
-
- cqp_info->in.u.add_arp_cache_entry.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.add_arp_cache_entry.scratch = (uintptr_t)cqp_request;
- cqp_info->post_sq = 1;
- if (i40iw_handle_cqp_op(iwdev, cqp_request))
- i40iw_pr_err("CQP-OP Add/Del Arp Cache entry fail");
-}
-
-/**
- * i40iw_send_syn_cqp_callback - do syn/ack after qhash
- * @cqp_request: qhash cqp completion
- * @send_ack: flag send ack
- */
-static void i40iw_send_syn_cqp_callback(struct i40iw_cqp_request *cqp_request, u32 send_ack)
-{
- i40iw_send_syn(cqp_request->param, send_ack);
-}
-
-/**
- * i40iw_manage_qhash - add or modify qhash
- * @iwdev: iwarp device
- * @cminfo: cm info for qhash
- * @etype: type (syn or quad)
- * @mtype: type of qhash
- * @cmnode: cmnode associated with connection
- * @wait: wait for completion
- * @user_pri:user pri of the connection
- */
-enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
- struct i40iw_cm_info *cminfo,
- enum i40iw_quad_entry_type etype,
- enum i40iw_quad_hash_manage_type mtype,
- void *cmnode,
- bool wait)
-{
- struct i40iw_qhash_table_info *info;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_sc_vsi *vsi = &iwdev->vsi;
- enum i40iw_status_code status;
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
-
- cqp_request = i40iw_get_cqp_request(iwcqp, wait);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
- cqp_info = &cqp_request->info;
- info = &cqp_info->in.u.manage_qhash_table_entry.info;
- memset(info, 0, sizeof(*info));
-
- info->vsi = &iwdev->vsi;
- info->manage = mtype;
- info->entry_type = etype;
- if (cminfo->vlan_id != 0xFFFF) {
- info->vlan_valid = true;
- info->vlan_id = cpu_to_le16(cminfo->vlan_id);
- } else {
- info->vlan_valid = false;
- }
-
- info->ipv4_valid = cminfo->ipv4;
- info->user_pri = cminfo->user_pri;
- ether_addr_copy(info->mac_addr, iwdev->netdev->dev_addr);
- info->qp_num = cpu_to_le32(vsi->ilq->qp_id);
- info->dest_port = cpu_to_le16(cminfo->loc_port);
- info->dest_ip[0] = cpu_to_le32(cminfo->loc_addr[0]);
- info->dest_ip[1] = cpu_to_le32(cminfo->loc_addr[1]);
- info->dest_ip[2] = cpu_to_le32(cminfo->loc_addr[2]);
- info->dest_ip[3] = cpu_to_le32(cminfo->loc_addr[3]);
- if (etype == I40IW_QHASH_TYPE_TCP_ESTABLISHED) {
- info->src_port = cpu_to_le16(cminfo->rem_port);
- info->src_ip[0] = cpu_to_le32(cminfo->rem_addr[0]);
- info->src_ip[1] = cpu_to_le32(cminfo->rem_addr[1]);
- info->src_ip[2] = cpu_to_le32(cminfo->rem_addr[2]);
- info->src_ip[3] = cpu_to_le32(cminfo->rem_addr[3]);
- }
- if (cmnode) {
- cqp_request->callback_fcn = i40iw_send_syn_cqp_callback;
- cqp_request->param = (void *)cmnode;
- }
-
- if (info->ipv4_valid)
- i40iw_debug(dev, I40IW_DEBUG_CM,
- "%s:%s IP=%pI4, port=%d, mac=%pM, vlan_id=%d\n",
- __func__, (!mtype) ? "DELETE" : "ADD",
- info->dest_ip,
- info->dest_port, info->mac_addr, cminfo->vlan_id);
- else
- i40iw_debug(dev, I40IW_DEBUG_CM,
- "%s:%s IP=%pI6, port=%d, mac=%pM, vlan_id=%d\n",
- __func__, (!mtype) ? "DELETE" : "ADD",
- info->dest_ip,
- info->dest_port, info->mac_addr, cminfo->vlan_id);
- cqp_info->in.u.manage_qhash_table_entry.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.manage_qhash_table_entry.scratch = (uintptr_t)cqp_request;
- cqp_info->cqp_cmd = OP_MANAGE_QHASH_TABLE_ENTRY;
- cqp_info->post_sq = 1;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Manage Qhash Entry fail");
- return status;
-}
-
-/**
- * i40iw_hw_flush_wqes - flush qp's wqe
- * @iwdev: iwarp device
- * @qp: hardware control qp
- * @info: info for flush
- * @wait: flag wait for completion
- */
-enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
- struct i40iw_sc_qp *qp,
- struct i40iw_qp_flush_info *info,
- bool wait)
-{
- enum i40iw_status_code status;
- struct i40iw_qp_flush_info *hw_info;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
-
- cqp_info = &cqp_request->info;
- hw_info = &cqp_request->info.in.u.qp_flush_wqes.info;
- memcpy(hw_info, info, sizeof(*hw_info));
-
- cqp_info->cqp_cmd = OP_QP_FLUSH_WQES;
- cqp_info->post_sq = 1;
- cqp_info->in.u.qp_flush_wqes.qp = qp;
- cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status) {
- i40iw_pr_err("CQP-OP Flush WQE's fail");
- complete(&iwqp->sq_drained);
- complete(&iwqp->rq_drained);
- return status;
- }
- if (!cqp_request->compl_info.maj_err_code) {
- switch (cqp_request->compl_info.min_err_code) {
- case I40IW_CQP_COMPL_RQ_WQE_FLUSHED:
- complete(&iwqp->sq_drained);
- break;
- case I40IW_CQP_COMPL_SQ_WQE_FLUSHED:
- complete(&iwqp->rq_drained);
- break;
- case I40IW_CQP_COMPL_RQ_SQ_WQE_FLUSHED:
- break;
- default:
- complete(&iwqp->sq_drained);
- complete(&iwqp->rq_drained);
- break;
- }
- }
-
- return 0;
-}
-
-/**
- * i40iw_hw_manage_vf_pble_bp - manage vf pbles
- * @iwdev: iwarp device
- * @info: info for managing pble
- * @wait: flag wait for completion
- */
-enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev,
- struct i40iw_manage_vf_pble_info *info,
- bool wait)
-{
- enum i40iw_status_code status;
- struct i40iw_manage_vf_pble_info *hw_info;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
-
- if ((iwdev->init_state < CCQ_CREATED) && wait)
- wait = false;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
-
- cqp_info = &cqp_request->info;
- hw_info = &cqp_request->info.in.u.manage_vf_pble_bp.info;
- memcpy(hw_info, info, sizeof(*hw_info));
-
- cqp_info->cqp_cmd = OP_MANAGE_VF_PBLE_BP;
- cqp_info->post_sq = 1;
- cqp_info->in.u.manage_vf_pble_bp.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.manage_vf_pble_bp.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Manage VF pble_bp fail");
- return status;
-}
-
-/**
- * i40iw_get_ib_wc - return change flush code to IB's
- * @opcode: iwarp flush code
- */
-static enum ib_wc_status i40iw_get_ib_wc(enum i40iw_flush_opcode opcode)
-{
- switch (opcode) {
- case FLUSH_PROT_ERR:
- return IB_WC_LOC_PROT_ERR;
- case FLUSH_REM_ACCESS_ERR:
- return IB_WC_REM_ACCESS_ERR;
- case FLUSH_LOC_QP_OP_ERR:
- return IB_WC_LOC_QP_OP_ERR;
- case FLUSH_REM_OP_ERR:
- return IB_WC_REM_OP_ERR;
- case FLUSH_LOC_LEN_ERR:
- return IB_WC_LOC_LEN_ERR;
- case FLUSH_GENERAL_ERR:
- return IB_WC_GENERAL_ERR;
- case FLUSH_FATAL_ERR:
- default:
- return IB_WC_FATAL_ERR;
- }
-}
-
-/**
- * i40iw_set_flush_info - set flush info
- * @pinfo: set flush info
- * @min: minor err
- * @maj: major err
- * @opcode: flush error code
- */
-static void i40iw_set_flush_info(struct i40iw_qp_flush_info *pinfo,
- u16 *min,
- u16 *maj,
- enum i40iw_flush_opcode opcode)
-{
- *min = (u16)i40iw_get_ib_wc(opcode);
- *maj = CQE_MAJOR_DRV;
- pinfo->userflushcode = true;
-}
-
-/**
- * i40iw_flush_wqes - flush wqe for qp
- * @iwdev: iwarp device
- * @iwqp: qp to flush wqes
- */
-void i40iw_flush_wqes(struct i40iw_device *iwdev, struct i40iw_qp *iwqp)
-{
- struct i40iw_qp_flush_info info;
- struct i40iw_qp_flush_info *pinfo = &info;
-
- struct i40iw_sc_qp *qp = &iwqp->sc_qp;
-
- memset(pinfo, 0, sizeof(*pinfo));
- info.sq = true;
- info.rq = true;
- if (qp->term_flags) {
- i40iw_set_flush_info(pinfo, &pinfo->sq_minor_code,
- &pinfo->sq_major_code, qp->flush_code);
- i40iw_set_flush_info(pinfo, &pinfo->rq_minor_code,
- &pinfo->rq_major_code, qp->flush_code);
- }
- (void)i40iw_hw_flush_wqes(iwdev, &iwqp->sc_qp, &info, true);
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
deleted file mode 100644
index 2728af3103ce..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ /dev/null
@@ -1,2031 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/if_vlan.h>
-#include <net/addrconf.h>
-
-#include "i40iw.h"
-#include "i40iw_register.h"
-#include <net/netevent.h>
-#define CLIENT_IW_INTERFACE_VERSION_MAJOR 0
-#define CLIENT_IW_INTERFACE_VERSION_MINOR 01
-#define CLIENT_IW_INTERFACE_VERSION_BUILD 00
-
-#define DRV_VERSION_MAJOR 0
-#define DRV_VERSION_MINOR 5
-#define DRV_VERSION_BUILD 123
-#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
- __stringify(DRV_VERSION_MINOR) "." __stringify(DRV_VERSION_BUILD)
-
-static int push_mode;
-module_param(push_mode, int, 0644);
-MODULE_PARM_DESC(push_mode, "Low latency mode: 0=disabled (default), 1=enabled)");
-
-static int debug;
-module_param(debug, int, 0644);
-MODULE_PARM_DESC(debug, "debug flags: 0=disabled (default), 0x7fffffff=all");
-
-static int resource_profile;
-module_param(resource_profile, int, 0644);
-MODULE_PARM_DESC(resource_profile,
- "Resource Profile: 0=no VF RDMA support (default), 1=Weighted VF, 2=Even Distribution");
-
-static int max_rdma_vfs = 32;
-module_param(max_rdma_vfs, int, 0644);
-MODULE_PARM_DESC(max_rdma_vfs, "Maximum VF count: 0-32 32=default");
-static int mpa_version = 2;
-module_param(mpa_version, int, 0644);
-MODULE_PARM_DESC(mpa_version, "MPA version to be used in MPA Req/Resp 1 or 2");
-
-MODULE_AUTHOR("Intel Corporation, <e1000-rdma@lists.sourceforge.net>");
-MODULE_DESCRIPTION("Intel(R) Ethernet Connection X722 iWARP RDMA Driver");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION);
-
-static struct i40e_client i40iw_client;
-static char i40iw_client_name[I40E_CLIENT_STR_LENGTH] = "i40iw";
-
-static LIST_HEAD(i40iw_handlers);
-static spinlock_t i40iw_handler_lock;
-
-static enum i40iw_status_code i40iw_virtchnl_send(struct i40iw_sc_dev *dev,
- u32 vf_id, u8 *msg, u16 len);
-
-static struct notifier_block i40iw_inetaddr_notifier = {
- .notifier_call = i40iw_inetaddr_event
-};
-
-static struct notifier_block i40iw_inetaddr6_notifier = {
- .notifier_call = i40iw_inet6addr_event
-};
-
-static struct notifier_block i40iw_net_notifier = {
- .notifier_call = i40iw_net_event
-};
-
-static atomic_t i40iw_notifiers_registered;
-
-/**
- * i40iw_find_i40e_handler - find a handler given a client info
- * @ldev: pointer to a client info
- */
-static struct i40iw_handler *i40iw_find_i40e_handler(struct i40e_info *ldev)
-{
- struct i40iw_handler *hdl;
- unsigned long flags;
-
- spin_lock_irqsave(&i40iw_handler_lock, flags);
- list_for_each_entry(hdl, &i40iw_handlers, list) {
- if (hdl->ldev.netdev == ldev->netdev) {
- spin_unlock_irqrestore(&i40iw_handler_lock, flags);
- return hdl;
- }
- }
- spin_unlock_irqrestore(&i40iw_handler_lock, flags);
- return NULL;
-}
-
-/**
- * i40iw_find_netdev - find a handler given a netdev
- * @netdev: pointer to net_device
- */
-struct i40iw_handler *i40iw_find_netdev(struct net_device *netdev)
-{
- struct i40iw_handler *hdl;
- unsigned long flags;
-
- spin_lock_irqsave(&i40iw_handler_lock, flags);
- list_for_each_entry(hdl, &i40iw_handlers, list) {
- if (hdl->ldev.netdev == netdev) {
- spin_unlock_irqrestore(&i40iw_handler_lock, flags);
- return hdl;
- }
- }
- spin_unlock_irqrestore(&i40iw_handler_lock, flags);
- return NULL;
-}
-
-/**
- * i40iw_add_handler - add a handler to the list
- * @hdl: handler to be added to the handler list
- */
-static void i40iw_add_handler(struct i40iw_handler *hdl)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&i40iw_handler_lock, flags);
- list_add(&hdl->list, &i40iw_handlers);
- spin_unlock_irqrestore(&i40iw_handler_lock, flags);
-}
-
-/**
- * i40iw_del_handler - delete a handler from the list
- * @hdl: handler to be deleted from the handler list
- */
-static int i40iw_del_handler(struct i40iw_handler *hdl)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&i40iw_handler_lock, flags);
- list_del(&hdl->list);
- spin_unlock_irqrestore(&i40iw_handler_lock, flags);
- return 0;
-}
-
-/**
- * i40iw_enable_intr - set up device interrupts
- * @dev: hardware control device structure
- * @msix_id: id of the interrupt to be enabled
- */
-static void i40iw_enable_intr(struct i40iw_sc_dev *dev, u32 msix_id)
-{
- u32 val;
-
- val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
- (3 << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
- if (dev->is_pf)
- i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_id - 1), val);
- else
- i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_id - 1), val);
-}
-
-/**
- * i40iw_dpc - tasklet for aeq and ceq 0
- * @data: iwarp device
- */
-static void i40iw_dpc(unsigned long data)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)data;
-
- if (iwdev->msix_shared)
- i40iw_process_ceq(iwdev, iwdev->ceqlist);
- i40iw_process_aeq(iwdev);
- i40iw_enable_intr(&iwdev->sc_dev, iwdev->iw_msixtbl[0].idx);
-}
-
-/**
- * i40iw_ceq_dpc - dpc handler for CEQ
- * @data: data points to CEQ
- */
-static void i40iw_ceq_dpc(unsigned long data)
-{
- struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data;
- struct i40iw_device *iwdev = iwceq->iwdev;
-
- i40iw_process_ceq(iwdev, iwceq);
- i40iw_enable_intr(&iwdev->sc_dev, iwceq->msix_idx);
-}
-
-/**
- * i40iw_irq_handler - interrupt handler for aeq and ceq0
- * @irq: Interrupt request number
- * @data: iwarp device
- */
-static irqreturn_t i40iw_irq_handler(int irq, void *data)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)data;
-
- tasklet_schedule(&iwdev->dpc_tasklet);
- return IRQ_HANDLED;
-}
-
-/**
- * i40iw_destroy_cqp - destroy control qp
- * @iwdev: iwarp device
- * @create_done: 1 if cqp create poll was success
- *
- * Issue destroy cqp request and
- * free the resources associated with the cqp
- */
-static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_cqp *cqp = &iwdev->cqp;
-
- if (free_hwcqp)
- dev->cqp_ops->cqp_destroy(dev->cqp);
-
- i40iw_free_dma_mem(dev->hw, &cqp->sq);
- kfree(cqp->scratch_array);
- iwdev->cqp.scratch_array = NULL;
-
- kfree(cqp->cqp_requests);
- cqp->cqp_requests = NULL;
-}
-
-/**
- * i40iw_disable_irqs - disable device interrupts
- * @dev: hardware control device structure
- * @msic_vec: msix vector to disable irq
- * @dev_id: parameter to pass to free_irq (used during irq setup)
- *
- * The function is called when destroying aeq/ceq
- */
-static void i40iw_disable_irq(struct i40iw_sc_dev *dev,
- struct i40iw_msix_vector *msix_vec,
- void *dev_id)
-{
- if (dev->is_pf)
- i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0);
- else
- i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0);
- irq_set_affinity_hint(msix_vec->irq, NULL);
- free_irq(msix_vec->irq, dev_id);
-}
-
-/**
- * i40iw_destroy_aeq - destroy aeq
- * @iwdev: iwarp device
- * @reset: true if called before reset
- *
- * Issue a destroy aeq request and
- * free the resources associated with the aeq
- * The function is called during driver unload
- */
-static void i40iw_destroy_aeq(struct i40iw_device *iwdev, bool reset)
-{
- enum i40iw_status_code status = I40IW_ERR_NOT_READY;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_aeq *aeq = &iwdev->aeq;
-
- if (!iwdev->msix_shared)
- i40iw_disable_irq(dev, iwdev->iw_msixtbl, (void *)iwdev);
- if (reset)
- goto exit;
-
- if (!dev->aeq_ops->aeq_destroy(&aeq->sc_aeq, 0, 1))
- status = dev->aeq_ops->aeq_destroy_done(&aeq->sc_aeq);
- if (status)
- i40iw_pr_err("destroy aeq failed %d\n", status);
-
-exit:
- i40iw_free_dma_mem(dev->hw, &aeq->mem);
-}
-
-/**
- * i40iw_destroy_ceq - destroy ceq
- * @iwdev: iwarp device
- * @iwceq: ceq to be destroyed
- * @reset: true if called before reset
- *
- * Issue a destroy ceq request and
- * free the resources associated with the ceq
- */
-static void i40iw_destroy_ceq(struct i40iw_device *iwdev,
- struct i40iw_ceq *iwceq,
- bool reset)
-{
- enum i40iw_status_code status;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
-
- if (reset)
- goto exit;
-
- status = dev->ceq_ops->ceq_destroy(&iwceq->sc_ceq, 0, 1);
- if (status) {
- i40iw_pr_err("ceq destroy command failed %d\n", status);
- goto exit;
- }
-
- status = dev->ceq_ops->cceq_destroy_done(&iwceq->sc_ceq);
- if (status)
- i40iw_pr_err("ceq destroy completion failed %d\n", status);
-exit:
- i40iw_free_dma_mem(dev->hw, &iwceq->mem);
-}
-
-/**
- * i40iw_dele_ceqs - destroy all ceq's
- * @iwdev: iwarp device
- * @reset: true if called before reset
- *
- * Go through all of the device ceq's and for each ceq
- * disable the ceq interrupt and destroy the ceq
- */
-static void i40iw_dele_ceqs(struct i40iw_device *iwdev, bool reset)
-{
- u32 i = 0;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_ceq *iwceq = iwdev->ceqlist;
- struct i40iw_msix_vector *msix_vec = iwdev->iw_msixtbl;
-
- if (iwdev->msix_shared) {
- i40iw_disable_irq(dev, msix_vec, (void *)iwdev);
- i40iw_destroy_ceq(iwdev, iwceq, reset);
- iwceq++;
- i++;
- }
-
- for (msix_vec++; i < iwdev->ceqs_count; i++, msix_vec++, iwceq++) {
- i40iw_disable_irq(dev, msix_vec, (void *)iwceq);
- i40iw_destroy_ceq(iwdev, iwceq, reset);
- }
-}
-
-/**
- * i40iw_destroy_ccq - destroy control cq
- * @iwdev: iwarp device
- * @reset: true if called before reset
- *
- * Issue destroy ccq request and
- * free the resources associated with the ccq
- */
-static void i40iw_destroy_ccq(struct i40iw_device *iwdev, bool reset)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_ccq *ccq = &iwdev->ccq;
- enum i40iw_status_code status = 0;
-
- if (!reset)
- status = dev->ccq_ops->ccq_destroy(dev->ccq, 0, true);
- if (status)
- i40iw_pr_err("ccq destroy failed %d\n", status);
- i40iw_free_dma_mem(dev->hw, &ccq->mem_cq);
-}
-
-/* types of hmc objects */
-static enum i40iw_hmc_rsrc_type iw_hmc_obj_types[] = {
- I40IW_HMC_IW_QP,
- I40IW_HMC_IW_CQ,
- I40IW_HMC_IW_HTE,
- I40IW_HMC_IW_ARP,
- I40IW_HMC_IW_APBVT_ENTRY,
- I40IW_HMC_IW_MR,
- I40IW_HMC_IW_XF,
- I40IW_HMC_IW_XFFL,
- I40IW_HMC_IW_Q1,
- I40IW_HMC_IW_Q1FL,
- I40IW_HMC_IW_TIMER,
-};
-
-/**
- * i40iw_close_hmc_objects_type - delete hmc objects of a given type
- * @iwdev: iwarp device
- * @obj_type: the hmc object type to be deleted
- * @is_pf: true if the function is PF otherwise false
- * @reset: true if called before reset
- */
-static void i40iw_close_hmc_objects_type(struct i40iw_sc_dev *dev,
- enum i40iw_hmc_rsrc_type obj_type,
- struct i40iw_hmc_info *hmc_info,
- bool is_pf,
- bool reset)
-{
- struct i40iw_hmc_del_obj_info info;
-
- memset(&info, 0, sizeof(info));
- info.hmc_info = hmc_info;
- info.rsrc_type = obj_type;
- info.count = hmc_info->hmc_obj[obj_type].cnt;
- info.is_pf = is_pf;
- if (dev->hmc_ops->del_hmc_object(dev, &info, reset))
- i40iw_pr_err("del obj of type %d failed\n", obj_type);
-}
-
-/**
- * i40iw_del_hmc_objects - remove all device hmc objects
- * @dev: iwarp device
- * @hmc_info: hmc_info to free
- * @is_pf: true if hmc_info belongs to PF, not vf nor allocated
- * by PF on behalf of VF
- * @reset: true if called before reset
- */
-static void i40iw_del_hmc_objects(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_info *hmc_info,
- bool is_pf,
- bool reset)
-{
- unsigned int i;
-
- for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++)
- i40iw_close_hmc_objects_type(dev, iw_hmc_obj_types[i], hmc_info, is_pf, reset);
-}
-
-/**
- * i40iw_ceq_handler - interrupt handler for ceq
- * @data: ceq pointer
- */
-static irqreturn_t i40iw_ceq_handler(int irq, void *data)
-{
- struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data;
-
- if (iwceq->irq != irq)
- i40iw_pr_err("expected irq = %d received irq = %d\n", iwceq->irq, irq);
- tasklet_schedule(&iwceq->dpc_tasklet);
- return IRQ_HANDLED;
-}
-
-/**
- * i40iw_create_hmc_obj_type - create hmc object of a given type
- * @dev: hardware control device structure
- * @info: information for the hmc object to create
- */
-static enum i40iw_status_code i40iw_create_hmc_obj_type(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_create_obj_info *info)
-{
- return dev->hmc_ops->create_hmc_object(dev, info);
-}
-
-/**
- * i40iw_create_hmc_objs - create all hmc objects for the device
- * @iwdev: iwarp device
- * @is_pf: true if the function is PF otherwise false
- *
- * Create the device hmc objects and allocate hmc pages
- * Return 0 if successful, otherwise clean up and return error
- */
-static enum i40iw_status_code i40iw_create_hmc_objs(struct i40iw_device *iwdev,
- bool is_pf)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_hmc_create_obj_info info;
- enum i40iw_status_code status;
- int i;
-
- memset(&info, 0, sizeof(info));
- info.hmc_info = dev->hmc_info;
- info.is_pf = is_pf;
- info.entry_type = iwdev->sd_type;
- for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) {
- info.rsrc_type = iw_hmc_obj_types[i];
- info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt;
- status = i40iw_create_hmc_obj_type(dev, &info);
- if (status) {
- i40iw_pr_err("create obj type %d status = %d\n",
- iw_hmc_obj_types[i], status);
- break;
- }
- }
- if (!status)
- return (dev->cqp_misc_ops->static_hmc_pages_allocated(dev->cqp, 0,
- dev->hmc_fn_id,
- true, true));
-
- while (i) {
- i--;
- /* destroy the hmc objects of a given type */
- i40iw_close_hmc_objects_type(dev,
- iw_hmc_obj_types[i],
- dev->hmc_info,
- is_pf,
- false);
- }
- return status;
-}
-
-/**
- * i40iw_obj_aligned_mem - get aligned memory from device allocated memory
- * @iwdev: iwarp device
- * @memptr: points to the memory addresses
- * @size: size of memory needed
- * @mask: mask for the aligned memory
- *
- * Get aligned memory of the requested size and
- * update the memptr to point to the new aligned memory
- * Return 0 if successful, otherwise return no memory error
- */
-enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev,
- struct i40iw_dma_mem *memptr,
- u32 size,
- u32 mask)
-{
- unsigned long va, newva;
- unsigned long extra;
-
- va = (unsigned long)iwdev->obj_next.va;
- newva = va;
- if (mask)
- newva = ALIGN(va, (mask + 1));
- extra = newva - va;
- memptr->va = (u8 *)va + extra;
- memptr->pa = iwdev->obj_next.pa + extra;
- memptr->size = size;
- if ((memptr->va + size) > (iwdev->obj_mem.va + iwdev->obj_mem.size))
- return I40IW_ERR_NO_MEMORY;
-
- iwdev->obj_next.va = memptr->va + size;
- iwdev->obj_next.pa = memptr->pa + size;
- return 0;
-}
-
-/**
- * i40iw_create_cqp - create control qp
- * @iwdev: iwarp device
- *
- * Return 0, if the cqp and all the resources associated with it
- * are successfully created, otherwise return error
- */
-static enum i40iw_status_code i40iw_create_cqp(struct i40iw_device *iwdev)
-{
- enum i40iw_status_code status;
- u32 sqsize = I40IW_CQP_SW_SQSIZE_2048;
- struct i40iw_dma_mem mem;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_cqp_init_info cqp_init_info;
- struct i40iw_cqp *cqp = &iwdev->cqp;
- u16 maj_err, min_err;
- int i;
-
- cqp->cqp_requests = kcalloc(sqsize, sizeof(*cqp->cqp_requests), GFP_KERNEL);
- if (!cqp->cqp_requests)
- return I40IW_ERR_NO_MEMORY;
- cqp->scratch_array = kcalloc(sqsize, sizeof(*cqp->scratch_array), GFP_KERNEL);
- if (!cqp->scratch_array) {
- kfree(cqp->cqp_requests);
- return I40IW_ERR_NO_MEMORY;
- }
- dev->cqp = &cqp->sc_cqp;
- dev->cqp->dev = dev;
- memset(&cqp_init_info, 0, sizeof(cqp_init_info));
- status = i40iw_allocate_dma_mem(dev->hw, &cqp->sq,
- (sizeof(struct i40iw_cqp_sq_wqe) * sqsize),
- I40IW_CQP_ALIGNMENT);
- if (status)
- goto exit;
- status = i40iw_obj_aligned_mem(iwdev, &mem, sizeof(struct i40iw_cqp_ctx),
- I40IW_HOST_CTX_ALIGNMENT_MASK);
- if (status)
- goto exit;
- dev->cqp->host_ctx_pa = mem.pa;
- dev->cqp->host_ctx = mem.va;
- /* populate the cqp init info */
- cqp_init_info.dev = dev;
- cqp_init_info.sq_size = sqsize;
- cqp_init_info.sq = cqp->sq.va;
- cqp_init_info.sq_pa = cqp->sq.pa;
- cqp_init_info.host_ctx_pa = mem.pa;
- cqp_init_info.host_ctx = mem.va;
- cqp_init_info.hmc_profile = iwdev->resource_profile;
- cqp_init_info.enabled_vf_count = iwdev->max_rdma_vfs;
- cqp_init_info.scratch_array = cqp->scratch_array;
- status = dev->cqp_ops->cqp_init(dev->cqp, &cqp_init_info);
- if (status) {
- i40iw_pr_err("cqp init status %d\n", status);
- goto exit;
- }
- status = dev->cqp_ops->cqp_create(dev->cqp, &maj_err, &min_err);
- if (status) {
- i40iw_pr_err("cqp create status %d maj_err %d min_err %d\n",
- status, maj_err, min_err);
- goto exit;
- }
- spin_lock_init(&cqp->req_lock);
- INIT_LIST_HEAD(&cqp->cqp_avail_reqs);
- INIT_LIST_HEAD(&cqp->cqp_pending_reqs);
- /* init the waitq of the cqp_requests and add them to the list */
- for (i = 0; i < I40IW_CQP_SW_SQSIZE_2048; i++) {
- init_waitqueue_head(&cqp->cqp_requests[i].waitq);
- list_add_tail(&cqp->cqp_requests[i].list, &cqp->cqp_avail_reqs);
- }
- return 0;
-exit:
- /* clean up the created resources */
- i40iw_destroy_cqp(iwdev, false);
- return status;
-}
-
-/**
- * i40iw_create_ccq - create control cq
- * @iwdev: iwarp device
- *
- * Return 0, if the ccq and the resources associated with it
- * are successfully created, otherwise return error
- */
-static enum i40iw_status_code i40iw_create_ccq(struct i40iw_device *iwdev)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_dma_mem mem;
- enum i40iw_status_code status;
- struct i40iw_ccq_init_info info;
- struct i40iw_ccq *ccq = &iwdev->ccq;
-
- memset(&info, 0, sizeof(info));
- dev->ccq = &ccq->sc_cq;
- dev->ccq->dev = dev;
- info.dev = dev;
- ccq->shadow_area.size = sizeof(struct i40iw_cq_shadow_area);
- ccq->mem_cq.size = sizeof(struct i40iw_cqe) * IW_CCQ_SIZE;
- status = i40iw_allocate_dma_mem(dev->hw, &ccq->mem_cq,
- ccq->mem_cq.size, I40IW_CQ0_ALIGNMENT);
- if (status)
- goto exit;
- status = i40iw_obj_aligned_mem(iwdev, &mem, ccq->shadow_area.size,
- I40IW_SHADOWAREA_MASK);
- if (status)
- goto exit;
- ccq->sc_cq.back_cq = (void *)ccq;
- /* populate the ccq init info */
- info.cq_base = ccq->mem_cq.va;
- info.cq_pa = ccq->mem_cq.pa;
- info.num_elem = IW_CCQ_SIZE;
- info.shadow_area = mem.va;
- info.shadow_area_pa = mem.pa;
- info.ceqe_mask = false;
- info.ceq_id_valid = true;
- info.shadow_read_threshold = 16;
- status = dev->ccq_ops->ccq_init(dev->ccq, &info);
- if (!status)
- status = dev->ccq_ops->ccq_create(dev->ccq, 0, true, true);
-exit:
- if (status)
- i40iw_free_dma_mem(dev->hw, &ccq->mem_cq);
- return status;
-}
-
-/**
- * i40iw_configure_ceq_vector - set up the msix interrupt vector for ceq
- * @iwdev: iwarp device
- * @msix_vec: interrupt vector information
- * @iwceq: ceq associated with the vector
- * @ceq_id: the id number of the iwceq
- *
- * Allocate interrupt resources and enable irq handling
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iwdev,
- struct i40iw_ceq *iwceq,
- u32 ceq_id,
- struct i40iw_msix_vector *msix_vec)
-{
- enum i40iw_status_code status;
- cpumask_t mask;
-
- if (iwdev->msix_shared && !ceq_id) {
- tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
- status = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "AEQCEQ", iwdev);
- } else {
- tasklet_init(&iwceq->dpc_tasklet, i40iw_ceq_dpc, (unsigned long)iwceq);
- status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq);
- }
-
- cpumask_clear(&mask);
- cpumask_set_cpu(msix_vec->cpu_affinity, &mask);
- irq_set_affinity_hint(msix_vec->irq, &mask);
-
- if (status) {
- i40iw_pr_err("ceq irq config fail\n");
- return I40IW_ERR_CONFIG;
- }
- msix_vec->ceq_id = ceq_id;
-
- return 0;
-}
-
-/**
- * i40iw_create_ceq - create completion event queue
- * @iwdev: iwarp device
- * @iwceq: pointer to the ceq resources to be created
- * @ceq_id: the id number of the iwceq
- *
- * Return 0, if the ceq and the resources associated with it
- * are successfully created, otherwise return error
- */
-static enum i40iw_status_code i40iw_create_ceq(struct i40iw_device *iwdev,
- struct i40iw_ceq *iwceq,
- u32 ceq_id)
-{
- enum i40iw_status_code status;
- struct i40iw_ceq_init_info info;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- u64 scratch;
-
- memset(&info, 0, sizeof(info));
- info.ceq_id = ceq_id;
- iwceq->iwdev = iwdev;
- iwceq->mem.size = sizeof(struct i40iw_ceqe) *
- iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
- status = i40iw_allocate_dma_mem(dev->hw, &iwceq->mem, iwceq->mem.size,
- I40IW_CEQ_ALIGNMENT);
- if (status)
- goto exit;
- info.ceq_id = ceq_id;
- info.ceqe_base = iwceq->mem.va;
- info.ceqe_pa = iwceq->mem.pa;
-
- info.elem_cnt = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
- iwceq->sc_ceq.ceq_id = ceq_id;
- info.dev = dev;
- scratch = (uintptr_t)&iwdev->cqp.sc_cqp;
- status = dev->ceq_ops->ceq_init(&iwceq->sc_ceq, &info);
- if (!status)
- status = dev->ceq_ops->cceq_create(&iwceq->sc_ceq, scratch);
-
-exit:
- if (status)
- i40iw_free_dma_mem(dev->hw, &iwceq->mem);
- return status;
-}
-
-void i40iw_request_reset(struct i40iw_device *iwdev)
-{
- struct i40e_info *ldev = iwdev->ldev;
-
- ldev->ops->request_reset(ldev, iwdev->client, 1);
-}
-
-/**
- * i40iw_setup_ceqs - manage the device ceq's and their interrupt resources
- * @iwdev: iwarp device
- * @ldev: i40e lan device
- *
- * Allocate a list for all device completion event queues
- * Create the ceq's and configure their msix interrupt vectors
- * Return 0, if at least one ceq is successfully set up, otherwise return error
- */
-static enum i40iw_status_code i40iw_setup_ceqs(struct i40iw_device *iwdev,
- struct i40e_info *ldev)
-{
- u32 i;
- u32 ceq_id;
- struct i40iw_ceq *iwceq;
- struct i40iw_msix_vector *msix_vec;
- enum i40iw_status_code status = 0;
- u32 num_ceqs;
-
- if (ldev && ldev->ops && ldev->ops->setup_qvlist) {
- status = ldev->ops->setup_qvlist(ldev, &i40iw_client,
- iwdev->iw_qvlist);
- if (status)
- goto exit;
- } else {
- status = I40IW_ERR_BAD_PTR;
- goto exit;
- }
-
- num_ceqs = min(iwdev->msix_count, iwdev->sc_dev.hmc_fpm_misc.max_ceqs);
- iwdev->ceqlist = kcalloc(num_ceqs, sizeof(*iwdev->ceqlist), GFP_KERNEL);
- if (!iwdev->ceqlist) {
- status = I40IW_ERR_NO_MEMORY;
- goto exit;
- }
- i = (iwdev->msix_shared) ? 0 : 1;
- for (ceq_id = 0; i < num_ceqs; i++, ceq_id++) {
- iwceq = &iwdev->ceqlist[ceq_id];
- status = i40iw_create_ceq(iwdev, iwceq, ceq_id);
- if (status) {
- i40iw_pr_err("create ceq status = %d\n", status);
- break;
- }
-
- msix_vec = &iwdev->iw_msixtbl[i];
- iwceq->irq = msix_vec->irq;
- iwceq->msix_idx = msix_vec->idx;
- status = i40iw_configure_ceq_vector(iwdev, iwceq, ceq_id, msix_vec);
- if (status) {
- i40iw_destroy_ceq(iwdev, iwceq, false);
- break;
- }
- i40iw_enable_intr(&iwdev->sc_dev, msix_vec->idx);
- iwdev->ceqs_count++;
- }
-
-exit:
- if (status) {
- if (!iwdev->ceqs_count) {
- kfree(iwdev->ceqlist);
- iwdev->ceqlist = NULL;
- } else {
- status = 0;
- }
- }
- return status;
-}
-
-/**
- * i40iw_configure_aeq_vector - set up the msix vector for aeq
- * @iwdev: iwarp device
- *
- * Allocate interrupt resources and enable irq handling
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_configure_aeq_vector(struct i40iw_device *iwdev)
-{
- struct i40iw_msix_vector *msix_vec = iwdev->iw_msixtbl;
- u32 ret = 0;
-
- if (!iwdev->msix_shared) {
- tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
- ret = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "i40iw", iwdev);
- }
- if (ret) {
- i40iw_pr_err("aeq irq config fail\n");
- return I40IW_ERR_CONFIG;
- }
-
- return 0;
-}
-
-/**
- * i40iw_create_aeq - create async event queue
- * @iwdev: iwarp device
- *
- * Return 0, if the aeq and the resources associated with it
- * are successfully created, otherwise return error
- */
-static enum i40iw_status_code i40iw_create_aeq(struct i40iw_device *iwdev)
-{
- enum i40iw_status_code status;
- struct i40iw_aeq_init_info info;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_aeq *aeq = &iwdev->aeq;
- u64 scratch = 0;
- u32 aeq_size;
-
- aeq_size = 2 * iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_QP].cnt +
- iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_CQ].cnt;
- memset(&info, 0, sizeof(info));
- aeq->mem.size = sizeof(struct i40iw_sc_aeqe) * aeq_size;
- status = i40iw_allocate_dma_mem(dev->hw, &aeq->mem, aeq->mem.size,
- I40IW_AEQ_ALIGNMENT);
- if (status)
- goto exit;
-
- info.aeqe_base = aeq->mem.va;
- info.aeq_elem_pa = aeq->mem.pa;
- info.elem_cnt = aeq_size;
- info.dev = dev;
- status = dev->aeq_ops->aeq_init(&aeq->sc_aeq, &info);
- if (status)
- goto exit;
- status = dev->aeq_ops->aeq_create(&aeq->sc_aeq, scratch, 1);
- if (!status)
- status = dev->aeq_ops->aeq_create_done(&aeq->sc_aeq);
-exit:
- if (status)
- i40iw_free_dma_mem(dev->hw, &aeq->mem);
- return status;
-}
-
-/**
- * i40iw_setup_aeq - set up the device aeq
- * @iwdev: iwarp device
- *
- * Create the aeq and configure its msix interrupt vector
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_setup_aeq(struct i40iw_device *iwdev)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- enum i40iw_status_code status;
-
- status = i40iw_create_aeq(iwdev);
- if (status)
- return status;
-
- status = i40iw_configure_aeq_vector(iwdev);
- if (status) {
- i40iw_destroy_aeq(iwdev, false);
- return status;
- }
-
- if (!iwdev->msix_shared)
- i40iw_enable_intr(dev, iwdev->iw_msixtbl[0].idx);
- return 0;
-}
-
-/**
- * i40iw_initialize_ilq - create iwarp local queue for cm
- * @iwdev: iwarp device
- *
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_initialize_ilq(struct i40iw_device *iwdev)
-{
- struct i40iw_puda_rsrc_info info;
- enum i40iw_status_code status;
-
- memset(&info, 0, sizeof(info));
- info.type = I40IW_PUDA_RSRC_TYPE_ILQ;
- info.cq_id = 1;
- info.qp_id = 0;
- info.count = 1;
- info.pd_id = 1;
- info.sq_size = 8192;
- info.rq_size = 8192;
- info.buf_size = 1024;
- info.tx_buf_cnt = 16384;
- info.receive = i40iw_receive_ilq;
- info.xmit_complete = i40iw_free_sqbuf;
- status = i40iw_puda_create_rsrc(&iwdev->vsi, &info);
- if (status)
- i40iw_pr_err("ilq create fail\n");
- return status;
-}
-
-/**
- * i40iw_initialize_ieq - create iwarp exception queue
- * @iwdev: iwarp device
- *
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_initialize_ieq(struct i40iw_device *iwdev)
-{
- struct i40iw_puda_rsrc_info info;
- enum i40iw_status_code status;
-
- memset(&info, 0, sizeof(info));
- info.type = I40IW_PUDA_RSRC_TYPE_IEQ;
- info.cq_id = 2;
- info.qp_id = iwdev->sc_dev.exception_lan_queue;
- info.count = 1;
- info.pd_id = 2;
- info.sq_size = 8192;
- info.rq_size = 8192;
- info.buf_size = 2048;
- info.tx_buf_cnt = 16384;
- status = i40iw_puda_create_rsrc(&iwdev->vsi, &info);
- if (status)
- i40iw_pr_err("ieq create fail\n");
- return status;
-}
-
-/**
- * i40iw_hmc_setup - create hmc objects for the device
- * @iwdev: iwarp device
- *
- * Set up the device private memory space for the number and size of
- * the hmc objects and create the objects
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_hmc_setup(struct i40iw_device *iwdev)
-{
- enum i40iw_status_code status;
-
- iwdev->sd_type = I40IW_SD_TYPE_DIRECT;
- status = i40iw_config_fpm_values(&iwdev->sc_dev, IW_CFG_FPM_QP_COUNT);
- if (status)
- goto exit;
- status = i40iw_create_hmc_objs(iwdev, true);
- if (status)
- goto exit;
- iwdev->init_state = HMC_OBJS_CREATED;
-exit:
- return status;
-}
-
-/**
- * i40iw_del_init_mem - deallocate memory resources
- * @iwdev: iwarp device
- */
-static void i40iw_del_init_mem(struct i40iw_device *iwdev)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
-
- i40iw_free_dma_mem(&iwdev->hw, &iwdev->obj_mem);
- kfree(dev->hmc_info->sd_table.sd_entry);
- dev->hmc_info->sd_table.sd_entry = NULL;
- kfree(iwdev->mem_resources);
- iwdev->mem_resources = NULL;
- kfree(iwdev->ceqlist);
- iwdev->ceqlist = NULL;
- kfree(iwdev->iw_msixtbl);
- iwdev->iw_msixtbl = NULL;
- kfree(iwdev->hmc_info_mem);
- iwdev->hmc_info_mem = NULL;
-}
-
-/**
- * i40iw_del_macip_entry - remove a mac ip address entry from the hw table
- * @iwdev: iwarp device
- * @idx: the index of the mac ip address to delete
- */
-static void i40iw_del_macip_entry(struct i40iw_device *iwdev, u8 idx)
-{
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status = 0;
-
- cqp_request = i40iw_get_cqp_request(iwcqp, true);
- if (!cqp_request) {
- i40iw_pr_err("cqp_request memory failed\n");
- return;
- }
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_DELETE_LOCAL_MAC_IPADDR_ENTRY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.del_local_mac_ipaddr_entry.cqp = &iwcqp->sc_cqp;
- cqp_info->in.u.del_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
- cqp_info->in.u.del_local_mac_ipaddr_entry.entry_idx = idx;
- cqp_info->in.u.del_local_mac_ipaddr_entry.ignore_ref_count = 0;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Del MAC Ip entry fail");
-}
-
-/**
- * i40iw_add_mac_ipaddr_entry - add a mac ip address entry to the hw table
- * @iwdev: iwarp device
- * @mac_addr: pointer to mac address
- * @idx: the index of the mac ip address to add
- */
-static enum i40iw_status_code i40iw_add_mac_ipaddr_entry(struct i40iw_device *iwdev,
- u8 *mac_addr,
- u8 idx)
-{
- struct i40iw_local_mac_ipaddr_entry_info *info;
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status = 0;
-
- cqp_request = i40iw_get_cqp_request(iwcqp, true);
- if (!cqp_request) {
- i40iw_pr_err("cqp_request memory failed\n");
- return I40IW_ERR_NO_MEMORY;
- }
-
- cqp_info = &cqp_request->info;
-
- cqp_info->post_sq = 1;
- info = &cqp_info->in.u.add_local_mac_ipaddr_entry.info;
- ether_addr_copy(info->mac_addr, mac_addr);
- info->entry_idx = idx;
- cqp_info->in.u.add_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
- cqp_info->cqp_cmd = OP_ADD_LOCAL_MAC_IPADDR_ENTRY;
- cqp_info->in.u.add_local_mac_ipaddr_entry.cqp = &iwcqp->sc_cqp;
- cqp_info->in.u.add_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Add MAC Ip entry fail");
- return status;
-}
-
-/**
- * i40iw_alloc_local_mac_ipaddr_entry - allocate a mac ip address entry
- * @iwdev: iwarp device
- * @mac_ip_tbl_idx: the index of the new mac ip address
- *
- * Allocate a mac ip address entry and update the mac_ip_tbl_idx
- * to hold the index of the newly created mac ip address
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_alloc_local_mac_ipaddr_entry(struct i40iw_device *iwdev,
- u16 *mac_ip_tbl_idx)
-{
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status = 0;
-
- cqp_request = i40iw_get_cqp_request(iwcqp, true);
- if (!cqp_request) {
- i40iw_pr_err("cqp_request memory failed\n");
- return I40IW_ERR_NO_MEMORY;
- }
-
- /* increment refcount, because we need the cqp request ret value */
- atomic_inc(&cqp_request->refcount);
-
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_ALLOC_LOCAL_MAC_IPADDR_ENTRY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.alloc_local_mac_ipaddr_entry.cqp = &iwcqp->sc_cqp;
- cqp_info->in.u.alloc_local_mac_ipaddr_entry.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (!status)
- *mac_ip_tbl_idx = cqp_request->compl_info.op_ret_val;
- else
- i40iw_pr_err("CQP-OP Alloc MAC Ip entry fail");
- /* decrement refcount and free the cqp request, if no longer used */
- i40iw_put_cqp_request(iwcqp, cqp_request);
- return status;
-}
-
-/**
- * i40iw_alloc_set_mac_ipaddr - set up a mac ip address table entry
- * @iwdev: iwarp device
- * @macaddr: pointer to mac address
- *
- * Allocate a mac ip address entry and add it to the hw table
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_alloc_set_mac_ipaddr(struct i40iw_device *iwdev,
- u8 *macaddr)
-{
- enum i40iw_status_code status;
-
- status = i40iw_alloc_local_mac_ipaddr_entry(iwdev, &iwdev->mac_ip_table_idx);
- if (!status) {
- status = i40iw_add_mac_ipaddr_entry(iwdev, macaddr,
- (u8)iwdev->mac_ip_table_idx);
- if (status)
- i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
- }
- return status;
-}
-
-/**
- * i40iw_add_ipv6_addr - add ipv6 address to the hw arp table
- * @iwdev: iwarp device
- */
-static void i40iw_add_ipv6_addr(struct i40iw_device *iwdev)
-{
- struct net_device *ip_dev;
- struct inet6_dev *idev;
- struct inet6_ifaddr *ifp, *tmp;
- u32 local_ipaddr6[4];
-
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, ip_dev) {
- if ((((rdma_vlan_dev_vlan_id(ip_dev) < 0xFFFF) &&
- (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
- (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {
- idev = __in6_dev_get(ip_dev);
- if (!idev) {
- i40iw_pr_err("ipv6 inet device not found\n");
- break;
- }
- list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
- i40iw_pr_info("IP=%pI6, vlan_id=%d, MAC=%pM\n", &ifp->addr,
- rdma_vlan_dev_vlan_id(ip_dev), ip_dev->dev_addr);
- i40iw_copy_ip_ntohl(local_ipaddr6,
- ifp->addr.in6_u.u6_addr32);
- i40iw_manage_arp_cache(iwdev,
- ip_dev->dev_addr,
- local_ipaddr6,
- false,
- I40IW_ARP_ADD);
- }
- }
- }
- rcu_read_unlock();
-}
-
-/**
- * i40iw_add_ipv4_addr - add ipv4 address to the hw arp table
- * @iwdev: iwarp device
- */
-static void i40iw_add_ipv4_addr(struct i40iw_device *iwdev)
-{
- struct net_device *dev;
- struct in_device *idev;
- bool got_lock = true;
- u32 ip_addr;
-
- if (!rtnl_trylock())
- got_lock = false;
-
- for_each_netdev(&init_net, dev) {
- if ((((rdma_vlan_dev_vlan_id(dev) < 0xFFFF) &&
- (rdma_vlan_dev_real_dev(dev) == iwdev->netdev)) ||
- (dev == iwdev->netdev)) && (dev->flags & IFF_UP)) {
- idev = in_dev_get(dev);
- for_ifa(idev) {
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
- "IP=%pI4, vlan_id=%d, MAC=%pM\n", &ifa->ifa_address,
- rdma_vlan_dev_vlan_id(dev), dev->dev_addr);
-
- ip_addr = ntohl(ifa->ifa_address);
- i40iw_manage_arp_cache(iwdev,
- dev->dev_addr,
- &ip_addr,
- true,
- I40IW_ARP_ADD);
- }
- endfor_ifa(idev);
- in_dev_put(idev);
- }
- }
- if (got_lock)
- rtnl_unlock();
-}
-
-/**
- * i40iw_add_mac_ip - add mac and ip addresses
- * @iwdev: iwarp device
- *
- * Create and add a mac ip address entry to the hw table and
- * ipv4/ipv6 addresses to the arp cache
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_add_mac_ip(struct i40iw_device *iwdev)
-{
- struct net_device *netdev = iwdev->netdev;
- enum i40iw_status_code status;
-
- status = i40iw_alloc_set_mac_ipaddr(iwdev, (u8 *)netdev->dev_addr);
- if (status)
- return status;
- i40iw_add_ipv4_addr(iwdev);
- i40iw_add_ipv6_addr(iwdev);
- return 0;
-}
-
-/**
- * i40iw_wait_pe_ready - Check if firmware is ready
- * @hw: provides access to registers
- */
-static void i40iw_wait_pe_ready(struct i40iw_hw *hw)
-{
- u32 statusfw;
- u32 statuscpu0;
- u32 statuscpu1;
- u32 statuscpu2;
- u32 retrycount = 0;
-
- do {
- statusfw = i40iw_rd32(hw, I40E_GLPE_FWLDSTATUS);
- i40iw_pr_info("[%04d] fm load status[x%04X]\n", __LINE__, statusfw);
- statuscpu0 = i40iw_rd32(hw, I40E_GLPE_CPUSTATUS0);
- i40iw_pr_info("[%04d] CSR_CQP status[x%04X]\n", __LINE__, statuscpu0);
- statuscpu1 = i40iw_rd32(hw, I40E_GLPE_CPUSTATUS1);
- i40iw_pr_info("[%04d] I40E_GLPE_CPUSTATUS1 status[x%04X]\n",
- __LINE__, statuscpu1);
- statuscpu2 = i40iw_rd32(hw, I40E_GLPE_CPUSTATUS2);
- i40iw_pr_info("[%04d] I40E_GLPE_CPUSTATUS2 status[x%04X]\n",
- __LINE__, statuscpu2);
- if ((statuscpu0 == 0x80) && (statuscpu1 == 0x80) && (statuscpu2 == 0x80))
- break; /* SUCCESS */
- mdelay(1000);
- retrycount++;
- } while (retrycount < 14);
- i40iw_wr32(hw, 0xb4040, 0x4C104C5);
-}
-
-/**
- * i40iw_initialize_dev - initialize device
- * @iwdev: iwarp device
- * @ldev: lan device information
- *
- * Allocate memory for the hmc objects and initialize iwdev
- * Return 0 if successful, otherwise clean up the resources
- * and return error
- */
-static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev,
- struct i40e_info *ldev)
-{
- enum i40iw_status_code status;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_device_init_info info;
- struct i40iw_vsi_init_info vsi_info;
- struct i40iw_dma_mem mem;
- struct i40iw_l2params l2params;
- u32 size;
- struct i40iw_vsi_stats_info stats_info;
- u16 last_qset = I40IW_NO_QSET;
- u16 qset;
- u32 i;
-
- memset(&l2params, 0, sizeof(l2params));
- memset(&info, 0, sizeof(info));
- size = sizeof(struct i40iw_hmc_pble_rsrc) + sizeof(struct i40iw_hmc_info) +
- (sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX);
- iwdev->hmc_info_mem = kzalloc(size, GFP_KERNEL);
- if (!iwdev->hmc_info_mem)
- return I40IW_ERR_NO_MEMORY;
-
- iwdev->pble_rsrc = (struct i40iw_hmc_pble_rsrc *)iwdev->hmc_info_mem;
- dev->hmc_info = &iwdev->hw.hmc;
- dev->hmc_info->hmc_obj = (struct i40iw_hmc_obj_info *)(iwdev->pble_rsrc + 1);
- status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_QUERY_FPM_BUF_SIZE,
- I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK);
- if (status)
- goto exit;
- info.fpm_query_buf_pa = mem.pa;
- info.fpm_query_buf = mem.va;
- status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_COMMIT_FPM_BUF_SIZE,
- I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK);
- if (status)
- goto exit;
- info.fpm_commit_buf_pa = mem.pa;
- info.fpm_commit_buf = mem.va;
- info.hmc_fn_id = ldev->fid;
- info.is_pf = (ldev->ftype) ? false : true;
- info.bar0 = ldev->hw_addr;
- info.hw = &iwdev->hw;
- info.debug_mask = debug;
- l2params.mss =
- (ldev->params.mtu) ? ldev->params.mtu - I40IW_MTU_TO_MSS : I40IW_DEFAULT_MSS;
- for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++) {
- qset = ldev->params.qos.prio_qos[i].qs_handle;
- l2params.qs_handle_list[i] = qset;
- if (last_qset == I40IW_NO_QSET)
- last_qset = qset;
- else if ((qset != last_qset) && (qset != I40IW_NO_QSET))
- iwdev->dcb = true;
- }
- i40iw_pr_info("DCB is set/clear = %d\n", iwdev->dcb);
- info.exception_lan_queue = 1;
- info.vchnl_send = i40iw_virtchnl_send;
- status = i40iw_device_init(&iwdev->sc_dev, &info);
-exit:
- if (status) {
- kfree(iwdev->hmc_info_mem);
- iwdev->hmc_info_mem = NULL;
- }
- memset(&vsi_info, 0, sizeof(vsi_info));
- vsi_info.dev = &iwdev->sc_dev;
- vsi_info.back_vsi = (void *)iwdev;
- vsi_info.params = &l2params;
- i40iw_sc_vsi_init(&iwdev->vsi, &vsi_info);
-
- if (dev->is_pf) {
- memset(&stats_info, 0, sizeof(stats_info));
- stats_info.fcn_id = ldev->fid;
- stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL);
- stats_info.stats_initialize = true;
- if (stats_info.pestat)
- i40iw_vsi_stats_init(&iwdev->vsi, &stats_info);
- }
- return status;
-}
-
-/**
- * i40iw_register_notifiers - register tcp ip notifiers
- */
-static void i40iw_register_notifiers(void)
-{
- if (atomic_inc_return(&i40iw_notifiers_registered) == 1) {
- register_inetaddr_notifier(&i40iw_inetaddr_notifier);
- register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
- register_netevent_notifier(&i40iw_net_notifier);
- }
-}
-
-/**
- * i40iw_save_msix_info - copy msix vector information to iwarp device
- * @iwdev: iwarp device
- * @ldev: lan device information
- *
- * Allocate iwdev msix table and copy the ldev msix info to the table
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev,
- struct i40e_info *ldev)
-{
- struct i40e_qvlist_info *iw_qvlist;
- struct i40e_qv_info *iw_qvinfo;
- u32 ceq_idx;
- u32 i;
- u32 size;
-
- iwdev->msix_count = ldev->msix_count;
-
- size = sizeof(struct i40iw_msix_vector) * iwdev->msix_count;
- size += sizeof(struct i40e_qvlist_info);
- size += sizeof(struct i40e_qv_info) * iwdev->msix_count - 1;
- iwdev->iw_msixtbl = kzalloc(size, GFP_KERNEL);
-
- if (!iwdev->iw_msixtbl)
- return I40IW_ERR_NO_MEMORY;
- iwdev->iw_qvlist = (struct i40e_qvlist_info *)(&iwdev->iw_msixtbl[iwdev->msix_count]);
- iw_qvlist = iwdev->iw_qvlist;
- iw_qvinfo = iw_qvlist->qv_info;
- iw_qvlist->num_vectors = iwdev->msix_count;
- if (iwdev->msix_count <= num_online_cpus())
- iwdev->msix_shared = true;
- for (i = 0, ceq_idx = 0; i < iwdev->msix_count; i++, iw_qvinfo++) {
- iwdev->iw_msixtbl[i].idx = ldev->msix_entries[i].entry;
- iwdev->iw_msixtbl[i].irq = ldev->msix_entries[i].vector;
- iwdev->iw_msixtbl[i].cpu_affinity = ceq_idx;
- if (i == 0) {
- iw_qvinfo->aeq_idx = 0;
- if (iwdev->msix_shared)
- iw_qvinfo->ceq_idx = ceq_idx++;
- else
- iw_qvinfo->ceq_idx = I40E_QUEUE_INVALID_IDX;
- } else {
- iw_qvinfo->aeq_idx = I40E_QUEUE_INVALID_IDX;
- iw_qvinfo->ceq_idx = ceq_idx++;
- }
- iw_qvinfo->itr_idx = 3;
- iw_qvinfo->v_idx = iwdev->iw_msixtbl[i].idx;
- }
- return 0;
-}
-
-/**
- * i40iw_deinit_device - clean up the device resources
- * @iwdev: iwarp device
- * @reset: true if called before reset
- *
- * Destroy the ib device interface, remove the mac ip entry and ipv4/ipv6 addresses,
- * destroy the device queues and free the pble and the hmc objects
- */
-static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset)
-{
- struct i40e_info *ldev = iwdev->ldev;
-
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
-
- i40iw_pr_info("state = %d\n", iwdev->init_state);
- if (iwdev->param_wq)
- destroy_workqueue(iwdev->param_wq);
-
- switch (iwdev->init_state) {
- case RDMA_DEV_REGISTERED:
- iwdev->iw_status = 0;
- i40iw_port_ibevent(iwdev);
- i40iw_destroy_rdma_device(iwdev->iwibdev);
- /* fallthrough */
- case IP_ADDR_REGISTERED:
- if (!reset)
- i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
- /* fallthrough */
- case INET_NOTIFIER:
- if (!atomic_dec_return(&i40iw_notifiers_registered)) {
- unregister_netevent_notifier(&i40iw_net_notifier);
- unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
- unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
- }
- /* fallthrough */
- case CEQ_CREATED:
- i40iw_dele_ceqs(iwdev, reset);
- /* fallthrough */
- case AEQ_CREATED:
- i40iw_destroy_aeq(iwdev, reset);
- /* fallthrough */
- case IEQ_CREATED:
- i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_IEQ, reset);
- /* fallthrough */
- case ILQ_CREATED:
- i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_ILQ, reset);
- /* fallthrough */
- case CCQ_CREATED:
- i40iw_destroy_ccq(iwdev, reset);
- /* fallthrough */
- case PBLE_CHUNK_MEM:
- i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc);
- /* fallthrough */
- case HMC_OBJS_CREATED:
- i40iw_del_hmc_objects(dev, dev->hmc_info, true, reset);
- /* fallthrough */
- case CQP_CREATED:
- i40iw_destroy_cqp(iwdev, true);
- /* fallthrough */
- case INITIAL_STATE:
- i40iw_cleanup_cm_core(&iwdev->cm_core);
- if (iwdev->vsi.pestat) {
- i40iw_vsi_stats_free(&iwdev->vsi);
- kfree(iwdev->vsi.pestat);
- }
- i40iw_del_init_mem(iwdev);
- break;
- case INVALID_STATE:
- /* fallthrough */
- default:
- i40iw_pr_err("bad init_state = %d\n", iwdev->init_state);
- break;
- }
-
- i40iw_del_handler(i40iw_find_i40e_handler(ldev));
- kfree(iwdev->hdl);
-}
-
-/**
- * i40iw_setup_init_state - set up the initial device struct
- * @hdl: handler for iwarp device - one per instance
- * @ldev: lan device information
- * @client: iwarp client information, provided during registration
- *
- * Initialize the iwarp device and its hdl information
- * using the ldev and client information
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
- struct i40e_info *ldev,
- struct i40e_client *client)
-{
- struct i40iw_device *iwdev = &hdl->device;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- enum i40iw_status_code status;
-
- memcpy(&hdl->ldev, ldev, sizeof(*ldev));
- if (resource_profile == 1)
- resource_profile = 2;
-
- iwdev->mpa_version = mpa_version;
- iwdev->resource_profile = (resource_profile < I40IW_HMC_PROFILE_EQUAL) ?
- (u8)resource_profile + I40IW_HMC_PROFILE_DEFAULT :
- I40IW_HMC_PROFILE_DEFAULT;
- iwdev->max_rdma_vfs =
- (iwdev->resource_profile != I40IW_HMC_PROFILE_DEFAULT) ? max_rdma_vfs : 0;
- iwdev->max_enabled_vfs = iwdev->max_rdma_vfs;
- iwdev->netdev = ldev->netdev;
- hdl->client = client;
- if (!ldev->ftype)
- iwdev->db_start = pci_resource_start(ldev->pcidev, 0) + I40IW_DB_ADDR_OFFSET;
- else
- iwdev->db_start = pci_resource_start(ldev->pcidev, 0) + I40IW_VF_DB_ADDR_OFFSET;
-
- status = i40iw_save_msix_info(iwdev, ldev);
- if (status)
- goto exit;
- iwdev->hw.dev_context = (void *)ldev->pcidev;
- iwdev->hw.hw_addr = ldev->hw_addr;
- status = i40iw_allocate_dma_mem(&iwdev->hw,
- &iwdev->obj_mem, 8192, 4096);
- if (status)
- goto exit;
- iwdev->obj_next = iwdev->obj_mem;
- iwdev->push_mode = push_mode;
-
- init_waitqueue_head(&iwdev->vchnl_waitq);
- init_waitqueue_head(&dev->vf_reqs);
- init_waitqueue_head(&iwdev->close_wq);
-
- status = i40iw_initialize_dev(iwdev, ldev);
-exit:
- if (status) {
- kfree(iwdev->iw_msixtbl);
- i40iw_free_dma_mem(dev->hw, &iwdev->obj_mem);
- iwdev->iw_msixtbl = NULL;
- }
- return status;
-}
-
-/**
- * i40iw_get_used_rsrc - determine resources used internally
- * @iwdev: iwarp device
- *
- * Called after internal allocations
- */
-static void i40iw_get_used_rsrc(struct i40iw_device *iwdev)
-{
- iwdev->used_pds = find_next_zero_bit(iwdev->allocated_pds, iwdev->max_pd, 0);
- iwdev->used_qps = find_next_zero_bit(iwdev->allocated_qps, iwdev->max_qp, 0);
- iwdev->used_cqs = find_next_zero_bit(iwdev->allocated_cqs, iwdev->max_cq, 0);
- iwdev->used_mrs = find_next_zero_bit(iwdev->allocated_mrs, iwdev->max_mr, 0);
-}
-
-/**
- * i40iw_open - client interface operation open for iwarp/uda device
- * @ldev: lan device information
- * @client: iwarp client information, provided during registration
- *
- * Called by the lan driver during the processing of client register
- * Create device resources, set up queues, pble and hmc objects and
- * register the device with the ib verbs interface
- * Return 0 if successful, otherwise return error
- */
-static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
-{
- struct i40iw_device *iwdev;
- struct i40iw_sc_dev *dev;
- enum i40iw_status_code status;
- struct i40iw_handler *hdl;
-
- hdl = i40iw_find_netdev(ldev->netdev);
- if (hdl)
- return 0;
-
- hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
- if (!hdl)
- return -ENOMEM;
- iwdev = &hdl->device;
- iwdev->hdl = hdl;
- dev = &iwdev->sc_dev;
- i40iw_setup_cm_core(iwdev);
-
- dev->back_dev = (void *)iwdev;
- iwdev->ldev = &hdl->ldev;
- iwdev->client = client;
- mutex_init(&iwdev->pbl_mutex);
- i40iw_add_handler(hdl);
-
- do {
- status = i40iw_setup_init_state(hdl, ldev, client);
- if (status)
- break;
- iwdev->init_state = INITIAL_STATE;
- if (dev->is_pf)
- i40iw_wait_pe_ready(dev->hw);
- status = i40iw_create_cqp(iwdev);
- if (status)
- break;
- iwdev->init_state = CQP_CREATED;
- status = i40iw_hmc_setup(iwdev);
- if (status)
- break;
- status = i40iw_create_ccq(iwdev);
- if (status)
- break;
- iwdev->init_state = CCQ_CREATED;
- status = i40iw_initialize_ilq(iwdev);
- if (status)
- break;
- iwdev->init_state = ILQ_CREATED;
- status = i40iw_initialize_ieq(iwdev);
- if (status)
- break;
- iwdev->init_state = IEQ_CREATED;
- status = i40iw_setup_aeq(iwdev);
- if (status)
- break;
- iwdev->init_state = AEQ_CREATED;
- status = i40iw_setup_ceqs(iwdev, ldev);
- if (status)
- break;
- iwdev->init_state = CEQ_CREATED;
- status = i40iw_initialize_hw_resources(iwdev);
- if (status)
- break;
- i40iw_get_used_rsrc(iwdev);
- dev->ccq_ops->ccq_arm(dev->ccq);
- status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc);
- if (status)
- break;
- iwdev->virtchnl_wq = alloc_ordered_workqueue("iwvch", WQ_MEM_RECLAIM);
- i40iw_register_notifiers();
- iwdev->init_state = INET_NOTIFIER;
- status = i40iw_add_mac_ip(iwdev);
- if (status)
- break;
- iwdev->init_state = IP_ADDR_REGISTERED;
- if (i40iw_register_rdma_device(iwdev)) {
- i40iw_pr_err("register rdma device fail\n");
- break;
- };
-
- iwdev->init_state = RDMA_DEV_REGISTERED;
- iwdev->iw_status = 1;
- i40iw_port_ibevent(iwdev);
- iwdev->param_wq = alloc_ordered_workqueue("l2params", WQ_MEM_RECLAIM);
- if(iwdev->param_wq == NULL)
- break;
- i40iw_pr_info("i40iw_open completed\n");
- return 0;
- } while (0);
-
- i40iw_pr_err("status = %d last completion = %d\n", status, iwdev->init_state);
- i40iw_deinit_device(iwdev, false);
- return -ERESTART;
-}
-
-/**
- * i40iw_l2params_worker - worker for l2 params change
- * @work: work pointer for l2 params
- */
-static void i40iw_l2params_worker(struct work_struct *work)
-{
- struct l2params_work *dwork =
- container_of(work, struct l2params_work, work);
- struct i40iw_device *iwdev = dwork->iwdev;
-
- i40iw_change_l2params(&iwdev->vsi, &dwork->l2params);
- atomic_dec(&iwdev->params_busy);
- kfree(work);
-}
-
-/**
- * i40iw_l2param_change - handle qs handles for qos and mss change
- * @ldev: lan device information
- * @client: client for paramater change
- * @params: new parameters from L2
- */
-static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *client,
- struct i40e_params *params)
-{
- struct i40iw_handler *hdl;
- struct i40iw_l2params *l2params;
- struct l2params_work *work;
- struct i40iw_device *iwdev;
- int i;
-
- hdl = i40iw_find_i40e_handler(ldev);
- if (!hdl)
- return;
-
- iwdev = &hdl->device;
-
- if (atomic_read(&iwdev->params_busy))
- return;
-
-
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
- if (!work)
- return;
-
- atomic_inc(&iwdev->params_busy);
-
- work->iwdev = iwdev;
- l2params = &work->l2params;
- for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++)
- l2params->qs_handle_list[i] = params->qos.prio_qos[i].qs_handle;
-
- l2params->mss = (params->mtu) ? params->mtu - I40IW_MTU_TO_MSS : iwdev->vsi.mss;
-
- INIT_WORK(&work->work, i40iw_l2params_worker);
- queue_work(iwdev->param_wq, &work->work);
-}
-
-/**
- * i40iw_close - client interface operation close for iwarp/uda device
- * @ldev: lan device information
- * @client: client to close
- *
- * Called by the lan driver during the processing of client unregister
- * Destroy and clean up the driver resources
- */
-static void i40iw_close(struct i40e_info *ldev, struct i40e_client *client, bool reset)
-{
- struct i40iw_device *iwdev;
- struct i40iw_handler *hdl;
-
- hdl = i40iw_find_i40e_handler(ldev);
- if (!hdl)
- return;
-
- iwdev = &hdl->device;
- iwdev->closing = true;
-
- i40iw_cm_disconnect_all(iwdev);
- destroy_workqueue(iwdev->virtchnl_wq);
- i40iw_deinit_device(iwdev, reset);
-}
-
-/**
- * i40iw_vf_reset - process VF reset
- * @ldev: lan device information
- * @client: client interface instance
- * @vf_id: virtual function id
- *
- * Called when a VF is reset by the PF
- * Destroy and clean up the VF resources
- */
-static void i40iw_vf_reset(struct i40e_info *ldev, struct i40e_client *client, u32 vf_id)
-{
- struct i40iw_handler *hdl;
- struct i40iw_sc_dev *dev;
- struct i40iw_hmc_fcn_info hmc_fcn_info;
- struct i40iw_virt_mem vf_dev_mem;
- struct i40iw_vfdev *tmp_vfdev;
- unsigned int i;
- unsigned long flags;
- struct i40iw_device *iwdev;
-
- hdl = i40iw_find_i40e_handler(ldev);
- if (!hdl)
- return;
-
- dev = &hdl->device.sc_dev;
- iwdev = (struct i40iw_device *)dev->back_dev;
-
- for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) {
- if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id))
- continue;
- /* free all resources allocated on behalf of vf */
- tmp_vfdev = dev->vf_dev[i];
- spin_lock_irqsave(&iwdev->vsi.pestat->lock, flags);
- dev->vf_dev[i] = NULL;
- spin_unlock_irqrestore(&iwdev->vsi.pestat->lock, flags);
- i40iw_del_hmc_objects(dev, &tmp_vfdev->hmc_info, false, false);
- /* remove vf hmc function */
- memset(&hmc_fcn_info, 0, sizeof(hmc_fcn_info));
- hmc_fcn_info.vf_id = vf_id;
- hmc_fcn_info.iw_vf_idx = tmp_vfdev->iw_vf_idx;
- hmc_fcn_info.free_fcn = true;
- i40iw_cqp_manage_hmc_fcn_cmd(dev, &hmc_fcn_info);
- /* free vf_dev */
- vf_dev_mem.va = tmp_vfdev;
- vf_dev_mem.size = sizeof(struct i40iw_vfdev) +
- sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX;
- i40iw_free_virt_mem(dev->hw, &vf_dev_mem);
- break;
- }
-}
-
-/**
- * i40iw_vf_enable - enable a number of VFs
- * @ldev: lan device information
- * @client: client interface instance
- * @num_vfs: number of VFs for the PF
- *
- * Called when the number of VFs changes
- */
-static void i40iw_vf_enable(struct i40e_info *ldev,
- struct i40e_client *client,
- u32 num_vfs)
-{
- struct i40iw_handler *hdl;
-
- hdl = i40iw_find_i40e_handler(ldev);
- if (!hdl)
- return;
-
- if (num_vfs > I40IW_MAX_PE_ENABLED_VF_COUNT)
- hdl->device.max_enabled_vfs = I40IW_MAX_PE_ENABLED_VF_COUNT;
- else
- hdl->device.max_enabled_vfs = num_vfs;
-}
-
-/**
- * i40iw_vf_capable - check if VF capable
- * @ldev: lan device information
- * @client: client interface instance
- * @vf_id: virtual function id
- *
- * Return 1 if a VF slot is available or if VF is already RDMA enabled
- * Return 0 otherwise
- */
-static int i40iw_vf_capable(struct i40e_info *ldev,
- struct i40e_client *client,
- u32 vf_id)
-{
- struct i40iw_handler *hdl;
- struct i40iw_sc_dev *dev;
- unsigned int i;
-
- hdl = i40iw_find_i40e_handler(ldev);
- if (!hdl)
- return 0;
-
- dev = &hdl->device.sc_dev;
-
- for (i = 0; i < hdl->device.max_enabled_vfs; i++) {
- if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id == vf_id))
- return 1;
- }
-
- return 0;
-}
-
-/**
- * i40iw_virtchnl_receive - receive a message through the virtual channel
- * @ldev: lan device information
- * @client: client interface instance
- * @vf_id: virtual function id associated with the message
- * @msg: message buffer pointer
- * @len: length of the message
- *
- * Invoke virtual channel receive operation for the given msg
- * Return 0 if successful, otherwise return error
- */
-static int i40iw_virtchnl_receive(struct i40e_info *ldev,
- struct i40e_client *client,
- u32 vf_id,
- u8 *msg,
- u16 len)
-{
- struct i40iw_handler *hdl;
- struct i40iw_sc_dev *dev;
- struct i40iw_device *iwdev;
- int ret_code = I40IW_NOT_SUPPORTED;
-
- if (!len || !msg)
- return I40IW_ERR_PARAM;
-
- hdl = i40iw_find_i40e_handler(ldev);
- if (!hdl)
- return I40IW_ERR_PARAM;
-
- dev = &hdl->device.sc_dev;
- iwdev = dev->back_dev;
-
- if (dev->vchnl_if.vchnl_recv) {
- ret_code = dev->vchnl_if.vchnl_recv(dev, vf_id, msg, len);
- if (!dev->is_pf) {
- atomic_dec(&iwdev->vchnl_msgs);
- wake_up(&iwdev->vchnl_waitq);
- }
- }
- return ret_code;
-}
-
-/**
- * i40iw_vf_clear_to_send - wait to send virtual channel message
- * @dev: iwarp device *
- * Wait for until virtual channel is clear
- * before sending the next message
- *
- * Returns false if error
- * Returns true if clear to send
- */
-bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev)
-{
- struct i40iw_device *iwdev;
- wait_queue_t wait;
-
- iwdev = dev->back_dev;
-
- if (!wq_has_sleeper(&dev->vf_reqs) &&
- (atomic_read(&iwdev->vchnl_msgs) == 0))
- return true; /* virtual channel is clear */
-
- init_wait(&wait);
- add_wait_queue_exclusive(&dev->vf_reqs, &wait);
-
- if (!wait_event_timeout(dev->vf_reqs,
- (atomic_read(&iwdev->vchnl_msgs) == 0),
- I40IW_VCHNL_EVENT_TIMEOUT))
- dev->vchnl_up = false;
-
- remove_wait_queue(&dev->vf_reqs, &wait);
-
- return dev->vchnl_up;
-}
-
-/**
- * i40iw_virtchnl_send - send a message through the virtual channel
- * @dev: iwarp device
- * @vf_id: virtual function id associated with the message
- * @msg: virtual channel message buffer pointer
- * @len: length of the message
- *
- * Invoke virtual channel send operation for the given msg
- * Return 0 if successful, otherwise return error
- */
-static enum i40iw_status_code i40iw_virtchnl_send(struct i40iw_sc_dev *dev,
- u32 vf_id,
- u8 *msg,
- u16 len)
-{
- struct i40iw_device *iwdev;
- struct i40e_info *ldev;
-
- if (!dev || !dev->back_dev)
- return I40IW_ERR_BAD_PTR;
-
- iwdev = dev->back_dev;
- ldev = iwdev->ldev;
-
- if (ldev && ldev->ops && ldev->ops->virtchnl_send)
- return ldev->ops->virtchnl_send(ldev, &i40iw_client, vf_id, msg, len);
- return I40IW_ERR_BAD_PTR;
-}
-
-/* client interface functions */
-static const struct i40e_client_ops i40e_ops = {
- .open = i40iw_open,
- .close = i40iw_close,
- .l2_param_change = i40iw_l2param_change,
- .virtchnl_receive = i40iw_virtchnl_receive,
- .vf_reset = i40iw_vf_reset,
- .vf_enable = i40iw_vf_enable,
- .vf_capable = i40iw_vf_capable
-};
-
-/**
- * i40iw_init_module - driver initialization function
- *
- * First function to call when the driver is loaded
- * Register the driver as i40e client and port mapper client
- */
-static int __init i40iw_init_module(void)
-{
- int ret;
-
- memset(&i40iw_client, 0, sizeof(i40iw_client));
- i40iw_client.version.major = CLIENT_IW_INTERFACE_VERSION_MAJOR;
- i40iw_client.version.minor = CLIENT_IW_INTERFACE_VERSION_MINOR;
- i40iw_client.version.build = CLIENT_IW_INTERFACE_VERSION_BUILD;
- i40iw_client.ops = &i40e_ops;
- memcpy(i40iw_client.name, i40iw_client_name, I40E_CLIENT_STR_LENGTH);
- i40iw_client.type = I40E_CLIENT_IWARP;
- spin_lock_init(&i40iw_handler_lock);
- ret = i40e_register_client(&i40iw_client);
- return ret;
-}
-
-/**
- * i40iw_exit_module - driver exit clean up function
- *
- * The function is called just before the driver is unloaded
- * Unregister the driver as i40e client and port mapper client
- */
-static void __exit i40iw_exit_module(void)
-{
- i40e_unregister_client(&i40iw_client);
-}
-
-module_init(i40iw_init_module);
-module_exit(i40iw_exit_module);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
deleted file mode 100644
index aa66c1c63dfa..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h
+++ /dev/null
@@ -1,218 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_OSDEP_H
-#define I40IW_OSDEP_H
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/bitops.h>
-#include <net/tcp.h>
-#include <crypto/hash.h>
-/* get readq/writeq support for 32 bit kernels, use the low-first version */
-#include <linux/io-64-nonatomic-lo-hi.h>
-
-#define STATS_TIMER_DELAY 1000
-
-static inline void set_64bit_val(u64 *wqe_words, u32 byte_index, u64 value)
-{
- wqe_words[byte_index >> 3] = value;
-}
-
-/**
- * set_32bit_val - set 32 value to hw wqe
- * @wqe_words: wqe addr to write
- * @byte_index: index in wqe
- * @value: value to write
- **/
-static inline void set_32bit_val(u32 *wqe_words, u32 byte_index, u32 value)
-{
- wqe_words[byte_index >> 2] = value;
-}
-
-/**
- * get_64bit_val - read 64 bit value from wqe
- * @wqe_words: wqe addr
- * @byte_index: index to read from
- * @value: read value
- **/
-static inline void get_64bit_val(u64 *wqe_words, u32 byte_index, u64 *value)
-{
- *value = wqe_words[byte_index >> 3];
-}
-
-/**
- * get_32bit_val - read 32 bit value from wqe
- * @wqe_words: wqe addr
- * @byte_index: index to reaad from
- * @value: return 32 bit value
- **/
-static inline void get_32bit_val(u32 *wqe_words, u32 byte_index, u32 *value)
-{
- *value = wqe_words[byte_index >> 2];
-}
-
-struct i40iw_dma_mem {
- void *va;
- dma_addr_t pa;
- u32 size;
-} __packed;
-
-struct i40iw_virt_mem {
- void *va;
- u32 size;
-} __packed;
-
-#define i40iw_debug(h, m, s, ...) \
-do { \
- if (((m) & (h)->debug_mask)) \
- pr_info("i40iw " s, ##__VA_ARGS__); \
-} while (0)
-
-#define i40iw_flush(a) readl((a)->hw_addr + I40E_GLGEN_STAT)
-
-#define I40E_GLHMC_VFSDCMD(_i) (0x000C8000 + ((_i) * 4)) \
- /* _i=0...31 */
-#define I40E_GLHMC_VFSDCMD_MAX_INDEX 31
-#define I40E_GLHMC_VFSDCMD_PMSDIDX_SHIFT 0
-#define I40E_GLHMC_VFSDCMD_PMSDIDX_MASK (0xFFF \
- << I40E_GLHMC_VFSDCMD_PMSDIDX_SHIFT)
-#define I40E_GLHMC_VFSDCMD_PF_SHIFT 16
-#define I40E_GLHMC_VFSDCMD_PF_MASK (0xF << I40E_GLHMC_VFSDCMD_PF_SHIFT)
-#define I40E_GLHMC_VFSDCMD_VF_SHIFT 20
-#define I40E_GLHMC_VFSDCMD_VF_MASK (0x1FF << I40E_GLHMC_VFSDCMD_VF_SHIFT)
-#define I40E_GLHMC_VFSDCMD_PMF_TYPE_SHIFT 29
-#define I40E_GLHMC_VFSDCMD_PMF_TYPE_MASK (0x3 \
- << I40E_GLHMC_VFSDCMD_PMF_TYPE_SHIFT)
-#define I40E_GLHMC_VFSDCMD_PMSDWR_SHIFT 31
-#define I40E_GLHMC_VFSDCMD_PMSDWR_MASK (0x1 << I40E_GLHMC_VFSDCMD_PMSDWR_SHIFT)
-
-#define I40E_GLHMC_VFSDDATAHIGH(_i) (0x000C8200 + ((_i) * 4)) \
- /* _i=0...31 */
-#define I40E_GLHMC_VFSDDATAHIGH_MAX_INDEX 31
-#define I40E_GLHMC_VFSDDATAHIGH_PMSDDATAHIGH_SHIFT 0
-#define I40E_GLHMC_VFSDDATAHIGH_PMSDDATAHIGH_MASK (0xFFFFFFFF \
- << I40E_GLHMC_VFSDDATAHIGH_PMSDDATAHIGH_SHIFT)
-
-#define I40E_GLHMC_VFSDDATALOW(_i) (0x000C8100 + ((_i) * 4)) \
- /* _i=0...31 */
-#define I40E_GLHMC_VFSDDATALOW_MAX_INDEX 31
-#define I40E_GLHMC_VFSDDATALOW_PMSDVALID_SHIFT 0
-#define I40E_GLHMC_VFSDDATALOW_PMSDVALID_MASK (0x1 \
- << I40E_GLHMC_VFSDDATALOW_PMSDVALID_SHIFT)
-#define I40E_GLHMC_VFSDDATALOW_PMSDTYPE_SHIFT 1
-#define I40E_GLHMC_VFSDDATALOW_PMSDTYPE_MASK (0x1 \
- << I40E_GLHMC_VFSDDATALOW_PMSDTYPE_SHIFT)
-#define I40E_GLHMC_VFSDDATALOW_PMSDBPCOUNT_SHIFT 2
-#define I40E_GLHMC_VFSDDATALOW_PMSDBPCOUNT_MASK (0x3FF \
- << I40E_GLHMC_VFSDDATALOW_PMSDBPCOUNT_SHIFT)
-#define I40E_GLHMC_VFSDDATALOW_PMSDDATALOW_SHIFT 12
-#define I40E_GLHMC_VFSDDATALOW_PMSDDATALOW_MASK (0xFFFFF \
- << I40E_GLHMC_VFSDDATALOW_PMSDDATALOW_SHIFT)
-
-#define I40E_GLPE_FWLDSTATUS 0x0000D200
-#define I40E_GLPE_FWLDSTATUS_LOAD_REQUESTED_SHIFT 0
-#define I40E_GLPE_FWLDSTATUS_LOAD_REQUESTED_MASK (0x1 \
- << I40E_GLPE_FWLDSTATUS_LOAD_REQUESTED_SHIFT)
-#define I40E_GLPE_FWLDSTATUS_DONE_SHIFT 1
-#define I40E_GLPE_FWLDSTATUS_DONE_MASK (0x1 << I40E_GLPE_FWLDSTATUS_DONE_SHIFT)
-#define I40E_GLPE_FWLDSTATUS_CQP_FAIL_SHIFT 2
-#define I40E_GLPE_FWLDSTATUS_CQP_FAIL_MASK (0x1 \
- << I40E_GLPE_FWLDSTATUS_CQP_FAIL_SHIFT)
-#define I40E_GLPE_FWLDSTATUS_TEP_FAIL_SHIFT 3
-#define I40E_GLPE_FWLDSTATUS_TEP_FAIL_MASK (0x1 \
- << I40E_GLPE_FWLDSTATUS_TEP_FAIL_SHIFT)
-#define I40E_GLPE_FWLDSTATUS_OOP_FAIL_SHIFT 4
-#define I40E_GLPE_FWLDSTATUS_OOP_FAIL_MASK (0x1 \
- << I40E_GLPE_FWLDSTATUS_OOP_FAIL_SHIFT)
-
-struct i40iw_sc_dev;
-struct i40iw_sc_qp;
-struct i40iw_puda_buf;
-struct i40iw_puda_completion_info;
-struct i40iw_update_sds_info;
-struct i40iw_hmc_fcn_info;
-struct i40iw_virtchnl_work_info;
-struct i40iw_manage_vf_pble_info;
-struct i40iw_device;
-struct i40iw_hmc_info;
-struct i40iw_hw;
-
-u8 __iomem *i40iw_get_hw_addr(void *dev);
-void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
-enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev);
-bool i40iw_vf_clear_to_send(struct i40iw_sc_dev *dev);
-enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc, void *addr,
- u32 length, u32 value);
-struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *buf);
-void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length, u32 seqnum);
-void i40iw_free_hash_desc(struct shash_desc *);
-enum i40iw_status_code i40iw_init_hash_desc(struct shash_desc **);
-enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_info *info,
- struct i40iw_puda_buf *buf);
-enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_update_sds_info *info);
-enum i40iw_status_code i40iw_cqp_manage_hmc_fcn_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_fcn_info *hmcfcninfo);
-enum i40iw_status_code i40iw_cqp_query_fpm_values_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_dma_mem *values_mem,
- u8 hmc_fn_id);
-enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_dma_mem *values_mem,
- u8 hmc_fn_id);
-enum i40iw_status_code i40iw_alloc_query_fpm_buf(struct i40iw_sc_dev *dev,
- struct i40iw_dma_mem *mem);
-enum i40iw_status_code i40iw_cqp_manage_vf_pble_bp(struct i40iw_sc_dev *dev,
- struct i40iw_manage_vf_pble_info *info);
-void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
- struct i40iw_virtchnl_work_info *work_info, u32 iw_vf_idx);
-void *i40iw_remove_head(struct list_head *list);
-void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend);
-void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
-
-void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len);
-void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred);
-void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp);
-void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp);
-
-enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev,
- struct i40iw_manage_vf_pble_info *info,
- bool wait);
-struct i40iw_sc_vsi;
-void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi);
-void i40iw_hw_stats_stop_timer(struct i40iw_sc_vsi *vsi);
-#define i40iw_mmiowb() mmiowb()
-void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value);
-u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg);
-#endif /* _I40IW_OSDEP_H_ */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h
deleted file mode 100644
index 28a92fee0822..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_p.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_P_H
-#define I40IW_P_H
-
-#define PAUSE_TIMER_VALUE 0xFFFF
-#define REFRESH_THRESHOLD 0x7FFF
-#define HIGH_THRESHOLD 0x800
-#define LOW_THRESHOLD 0x200
-#define ALL_TC2PFC 0xFF
-
-void i40iw_debug_buf(struct i40iw_sc_dev *dev, enum i40iw_debug_flag mask,
- char *desc, u64 *buf, u32 size);
-/* init operations */
-enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev,
- struct i40iw_device_init_info *info);
-
-void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp);
-
-u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch);
-
-enum i40iw_status_code i40iw_sc_mr_fast_register(struct i40iw_sc_qp *qp,
- struct i40iw_fast_reg_stag_info *info,
- bool post_sq);
-
-/* HMC/FPM functions */
-enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev,
- u8 hmc_fn_id);
-
-enum i40iw_status_code i40iw_pf_init_vfhmc(struct i40iw_sc_dev *dev, u8 vf_hmc_fn_id,
- u32 *vf_cnt_array);
-
-/* stats functions */
-void i40iw_hw_stats_refresh_all(struct i40iw_vsi_pestat *stats);
-void i40iw_hw_stats_read_all(struct i40iw_vsi_pestat *stats, struct i40iw_dev_hw_stats *stats_values);
-void i40iw_hw_stats_read_32(struct i40iw_vsi_pestat *stats,
- enum i40iw_hw_stats_index_32b index,
- u64 *value);
-void i40iw_hw_stats_read_64(struct i40iw_vsi_pestat *stats,
- enum i40iw_hw_stats_index_64b index,
- u64 *value);
-void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 index, bool is_pf);
-
-/* vsi misc functions */
-enum i40iw_status_code i40iw_vsi_stats_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_stats_info *info);
-void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi);
-void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *info);
-
-void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2params);
-void i40iw_qp_add_qos(struct i40iw_sc_qp *qp);
-
-void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp);
-
-void i40iw_terminate_connection(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info);
-
-void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *info);
-
-enum i40iw_status_code i40iw_sc_suspend_qp(struct i40iw_sc_cqp *cqp,
- struct i40iw_sc_qp *qp, u64 scratch);
-
-enum i40iw_status_code i40iw_sc_resume_qp(struct i40iw_sc_cqp *cqp,
- struct i40iw_sc_qp *qp, u64 scratch);
-
-enum i40iw_status_code i40iw_sc_static_hmc_pages_allocated(struct i40iw_sc_cqp *cqp,
- u64 scratch, u8 hmc_fn_id,
- bool post_sq,
- bool poll_registers);
-
-enum i40iw_status_code i40iw_config_fpm_values(struct i40iw_sc_dev *dev, u32 qp_count);
-
-void free_sd_mem(struct i40iw_sc_dev *dev);
-
-enum i40iw_status_code i40iw_process_cqp_cmd(struct i40iw_sc_dev *dev,
- struct cqp_commands_info *pcmdinfo);
-
-enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev);
-
-/* prototype for functions used for dynamic memory allocation */
-enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw,
- struct i40iw_dma_mem *mem, u64 size,
- u32 alignment);
-void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem);
-enum i40iw_status_code i40iw_allocate_virt_mem(struct i40iw_hw *hw,
- struct i40iw_virt_mem *mem, u32 size);
-enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw,
- struct i40iw_virt_mem *mem);
-u8 i40iw_get_encoded_wqe_size(u32 wqsize, bool cqpsq);
-
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c
deleted file mode 100644
index c87ba1617087..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_pble.c
+++ /dev/null
@@ -1,615 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include "i40iw_status.h"
-#include "i40iw_osdep.h"
-#include "i40iw_register.h"
-#include "i40iw_hmc.h"
-
-#include "i40iw_d.h"
-#include "i40iw_type.h"
-#include "i40iw_p.h"
-
-#include <linux/pci.h>
-#include <linux/genalloc.h>
-#include <linux/vmalloc.h>
-#include "i40iw_pble.h"
-#include "i40iw.h"
-
-struct i40iw_device;
-static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc);
-static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk);
-
-/**
- * i40iw_destroy_pble_pool - destroy pool during module unload
- * @pble_rsrc: pble resources
- */
-void i40iw_destroy_pble_pool(struct i40iw_sc_dev *dev, struct i40iw_hmc_pble_rsrc *pble_rsrc)
-{
- struct list_head *clist;
- struct list_head *tlist;
- struct i40iw_chunk *chunk;
- struct i40iw_pble_pool *pinfo = &pble_rsrc->pinfo;
-
- if (pinfo->pool) {
- list_for_each_safe(clist, tlist, &pinfo->clist) {
- chunk = list_entry(clist, struct i40iw_chunk, list);
- if (chunk->type == I40IW_VMALLOC)
- i40iw_free_vmalloc_mem(dev->hw, chunk);
- kfree(chunk);
- }
- gen_pool_destroy(pinfo->pool);
- }
-}
-
-/**
- * i40iw_hmc_init_pble - Initialize pble resources during module load
- * @dev: i40iw_sc_dev struct
- * @pble_rsrc: pble resources
- */
-enum i40iw_status_code i40iw_hmc_init_pble(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc)
-{
- struct i40iw_hmc_info *hmc_info;
- u32 fpm_idx = 0;
-
- hmc_info = dev->hmc_info;
- pble_rsrc->fpm_base_addr = hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].base;
- /* Now start the pble' on 4k boundary */
- if (pble_rsrc->fpm_base_addr & 0xfff)
- fpm_idx = (PAGE_SIZE - (pble_rsrc->fpm_base_addr & 0xfff)) >> 3;
-
- pble_rsrc->unallocated_pble =
- hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].cnt - fpm_idx;
- pble_rsrc->next_fpm_addr = pble_rsrc->fpm_base_addr + (fpm_idx << 3);
-
- pble_rsrc->pinfo.pool_shift = POOL_SHIFT;
- pble_rsrc->pinfo.pool = gen_pool_create(pble_rsrc->pinfo.pool_shift, -1);
- INIT_LIST_HEAD(&pble_rsrc->pinfo.clist);
- if (!pble_rsrc->pinfo.pool)
- goto error;
-
- if (add_pble_pool(dev, pble_rsrc))
- goto error;
-
- return 0;
-
- error:i40iw_destroy_pble_pool(dev, pble_rsrc);
- return I40IW_ERR_NO_MEMORY;
-}
-
-/**
- * get_sd_pd_idx - Returns sd index, pd index and rel_pd_idx from fpm address
- * @ pble_rsrc: structure containing fpm address
- * @ idx: where to return indexes
- */
-static inline void get_sd_pd_idx(struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct sd_pd_idx *idx)
-{
- idx->sd_idx = (u32)(pble_rsrc->next_fpm_addr) / I40IW_HMC_DIRECT_BP_SIZE;
- idx->pd_idx = (u32)(pble_rsrc->next_fpm_addr) / I40IW_HMC_PAGED_BP_SIZE;
- idx->rel_pd_idx = (idx->pd_idx % I40IW_HMC_PD_CNT_IN_SD);
-}
-
-/**
- * add_sd_direct - add sd direct for pble
- * @dev: hardware control device structure
- * @pble_rsrc: pble resource ptr
- * @info: page info for sd
- */
-static enum i40iw_status_code add_sd_direct(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_add_page_info *info)
-{
- enum i40iw_status_code ret_code = 0;
- struct sd_pd_idx *idx = &info->idx;
- struct i40iw_chunk *chunk = info->chunk;
- struct i40iw_hmc_info *hmc_info = info->hmc_info;
- struct i40iw_hmc_sd_entry *sd_entry = info->sd_entry;
- u32 offset = 0;
-
- if (!sd_entry->valid) {
- if (dev->is_pf) {
- ret_code = i40iw_add_sd_table_entry(dev->hw, hmc_info,
- info->idx.sd_idx,
- I40IW_SD_TYPE_DIRECT,
- I40IW_HMC_DIRECT_BP_SIZE);
- if (ret_code)
- return ret_code;
- chunk->type = I40IW_DMA_COHERENT;
- }
- }
- offset = idx->rel_pd_idx << I40IW_HMC_PAGED_BP_SHIFT;
- chunk->size = info->pages << I40IW_HMC_PAGED_BP_SHIFT;
- chunk->vaddr = ((u8 *)sd_entry->u.bp.addr.va + offset);
- chunk->fpm_addr = pble_rsrc->next_fpm_addr;
- i40iw_debug(dev, I40IW_DEBUG_PBLE, "chunk_size[%d] = 0x%x vaddr=%p fpm_addr = %llx\n",
- chunk->size, chunk->size, chunk->vaddr, chunk->fpm_addr);
- return 0;
-}
-
-/**
- * i40iw_free_vmalloc_mem - free vmalloc during close
- * @hw: hw struct
- * @chunk: chunk information for vmalloc
- */
-static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk)
-{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
- int i;
-
- if (!chunk->pg_cnt)
- goto done;
- for (i = 0; i < chunk->pg_cnt; i++)
- dma_unmap_page(&pcidev->dev, chunk->dmaaddrs[i], PAGE_SIZE, DMA_BIDIRECTIONAL);
-
- done:
- kfree(chunk->dmaaddrs);
- chunk->dmaaddrs = NULL;
- vfree(chunk->vaddr);
- chunk->vaddr = NULL;
- chunk->type = 0;
-}
-
-/**
- * i40iw_get_vmalloc_mem - get 2M page for sd
- * @hw: hardware address
- * @chunk: chunk to adf
- * @pg_cnt: #of 4 K pages
- */
-static enum i40iw_status_code i40iw_get_vmalloc_mem(struct i40iw_hw *hw,
- struct i40iw_chunk *chunk,
- int pg_cnt)
-{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
- struct page *page;
- u8 *addr;
- u32 size;
- int i;
-
- chunk->dmaaddrs = kzalloc(pg_cnt << 3, GFP_KERNEL);
- if (!chunk->dmaaddrs)
- return I40IW_ERR_NO_MEMORY;
- size = PAGE_SIZE * pg_cnt;
- chunk->vaddr = vmalloc(size);
- if (!chunk->vaddr) {
- kfree(chunk->dmaaddrs);
- chunk->dmaaddrs = NULL;
- return I40IW_ERR_NO_MEMORY;
- }
- chunk->size = size;
- addr = (u8 *)chunk->vaddr;
- for (i = 0; i < pg_cnt; i++) {
- page = vmalloc_to_page((void *)addr);
- if (!page)
- break;
- chunk->dmaaddrs[i] = dma_map_page(&pcidev->dev, page, 0,
- PAGE_SIZE, DMA_BIDIRECTIONAL);
- if (dma_mapping_error(&pcidev->dev, chunk->dmaaddrs[i]))
- break;
- addr += PAGE_SIZE;
- }
-
- chunk->pg_cnt = i;
- chunk->type = I40IW_VMALLOC;
- if (i == pg_cnt)
- return 0;
-
- i40iw_free_vmalloc_mem(hw, chunk);
- return I40IW_ERR_NO_MEMORY;
-}
-
-/**
- * fpm_to_idx - given fpm address, get pble index
- * @pble_rsrc: pble resource management
- * @addr: fpm address for index
- */
-static inline u32 fpm_to_idx(struct i40iw_hmc_pble_rsrc *pble_rsrc, u64 addr)
-{
- return (addr - (pble_rsrc->fpm_base_addr)) >> 3;
-}
-
-/**
- * add_bp_pages - add backing pages for sd
- * @dev: hardware control device structure
- * @pble_rsrc: pble resource management
- * @info: page info for sd
- */
-static enum i40iw_status_code add_bp_pages(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_add_page_info *info)
-{
- u8 *addr;
- struct i40iw_dma_mem mem;
- struct i40iw_hmc_pd_entry *pd_entry;
- struct i40iw_hmc_sd_entry *sd_entry = info->sd_entry;
- struct i40iw_hmc_info *hmc_info = info->hmc_info;
- struct i40iw_chunk *chunk = info->chunk;
- struct i40iw_manage_vf_pble_info vf_pble_info;
- enum i40iw_status_code status = 0;
- u32 rel_pd_idx = info->idx.rel_pd_idx;
- u32 pd_idx = info->idx.pd_idx;
- u32 i;
-
- status = i40iw_get_vmalloc_mem(dev->hw, chunk, info->pages);
- if (status)
- return I40IW_ERR_NO_MEMORY;
- status = i40iw_add_sd_table_entry(dev->hw, hmc_info,
- info->idx.sd_idx, I40IW_SD_TYPE_PAGED,
- I40IW_HMC_DIRECT_BP_SIZE);
- if (status) {
- i40iw_free_vmalloc_mem(dev->hw, chunk);
- return status;
- }
- if (!dev->is_pf) {
- status = i40iw_vchnl_vf_add_hmc_objs(dev, I40IW_HMC_IW_PBLE,
- fpm_to_idx(pble_rsrc,
- pble_rsrc->next_fpm_addr),
- (info->pages << PBLE_512_SHIFT));
- if (status) {
- i40iw_pr_err("allocate PBLEs in the PF. Error %i\n", status);
- i40iw_free_vmalloc_mem(dev->hw, chunk);
- return status;
- }
- }
- addr = chunk->vaddr;
- for (i = 0; i < info->pages; i++) {
- mem.pa = chunk->dmaaddrs[i];
- mem.size = PAGE_SIZE;
- mem.va = (void *)(addr);
- pd_entry = &sd_entry->u.pd_table.pd_entry[rel_pd_idx++];
- if (!pd_entry->valid) {
- status = i40iw_add_pd_table_entry(dev->hw, hmc_info, pd_idx++, &mem);
- if (status)
- goto error;
- addr += PAGE_SIZE;
- } else {
- i40iw_pr_err("pd entry is valid expecting to be invalid\n");
- }
- }
- if (!dev->is_pf) {
- vf_pble_info.first_pd_index = info->idx.rel_pd_idx;
- vf_pble_info.inv_pd_ent = false;
- vf_pble_info.pd_entry_cnt = PBLE_PER_PAGE;
- vf_pble_info.pd_pl_pba = sd_entry->u.pd_table.pd_page_addr.pa;
- vf_pble_info.sd_index = info->idx.sd_idx;
- status = i40iw_hw_manage_vf_pble_bp(dev->back_dev,
- &vf_pble_info, true);
- if (status) {
- i40iw_pr_err("CQP manage VF PBLE BP failed. %i\n", status);
- goto error;
- }
- }
- chunk->fpm_addr = pble_rsrc->next_fpm_addr;
- return 0;
-error:
- i40iw_free_vmalloc_mem(dev->hw, chunk);
- return status;
-}
-
-/**
- * add_pble_pool - add a sd entry for pble resoure
- * @dev: hardware control device structure
- * @pble_rsrc: pble resource management
- */
-static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc)
-{
- struct i40iw_hmc_sd_entry *sd_entry;
- struct i40iw_hmc_info *hmc_info;
- struct i40iw_chunk *chunk;
- struct i40iw_add_page_info info;
- struct sd_pd_idx *idx = &info.idx;
- enum i40iw_status_code ret_code = 0;
- enum i40iw_sd_entry_type sd_entry_type;
- u64 sd_reg_val = 0;
- u32 pages;
-
- if (pble_rsrc->unallocated_pble < PBLE_PER_PAGE)
- return I40IW_ERR_NO_MEMORY;
- if (pble_rsrc->next_fpm_addr & 0xfff) {
- i40iw_pr_err("next fpm_addr %llx\n", pble_rsrc->next_fpm_addr);
- return I40IW_ERR_INVALID_PAGE_DESC_INDEX;
- }
- chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
- if (!chunk)
- return I40IW_ERR_NO_MEMORY;
- hmc_info = dev->hmc_info;
- chunk->fpm_addr = pble_rsrc->next_fpm_addr;
- get_sd_pd_idx(pble_rsrc, idx);
- sd_entry = &hmc_info->sd_table.sd_entry[idx->sd_idx];
- pages = (idx->rel_pd_idx) ? (I40IW_HMC_PD_CNT_IN_SD -
- idx->rel_pd_idx) : I40IW_HMC_PD_CNT_IN_SD;
- pages = min(pages, pble_rsrc->unallocated_pble >> PBLE_512_SHIFT);
- info.chunk = chunk;
- info.hmc_info = hmc_info;
- info.pages = pages;
- info.sd_entry = sd_entry;
- if (!sd_entry->valid) {
- sd_entry_type = (!idx->rel_pd_idx &&
- (pages == I40IW_HMC_PD_CNT_IN_SD) &&
- dev->is_pf) ? I40IW_SD_TYPE_DIRECT : I40IW_SD_TYPE_PAGED;
- } else {
- sd_entry_type = sd_entry->entry_type;
- }
- i40iw_debug(dev, I40IW_DEBUG_PBLE,
- "pages = %d, unallocated_pble[%u] current_fpm_addr = %llx\n",
- pages, pble_rsrc->unallocated_pble, pble_rsrc->next_fpm_addr);
- i40iw_debug(dev, I40IW_DEBUG_PBLE, "sd_entry_type = %d sd_entry valid = %d\n",
- sd_entry_type, sd_entry->valid);
-
- if (sd_entry_type == I40IW_SD_TYPE_DIRECT)
- ret_code = add_sd_direct(dev, pble_rsrc, &info);
- if (ret_code)
- sd_entry_type = I40IW_SD_TYPE_PAGED;
- else
- pble_rsrc->stats_direct_sds++;
-
- if (sd_entry_type == I40IW_SD_TYPE_PAGED) {
- ret_code = add_bp_pages(dev, pble_rsrc, &info);
- if (ret_code)
- goto error;
- else
- pble_rsrc->stats_paged_sds++;
- }
-
- if (gen_pool_add_virt(pble_rsrc->pinfo.pool, (unsigned long)chunk->vaddr,
- (phys_addr_t)chunk->fpm_addr, chunk->size, -1)) {
- i40iw_pr_err("could not allocate memory by gen_pool_addr_virt()\n");
- ret_code = I40IW_ERR_NO_MEMORY;
- goto error;
- }
- pble_rsrc->next_fpm_addr += chunk->size;
- i40iw_debug(dev, I40IW_DEBUG_PBLE, "next_fpm_addr = %llx chunk_size[%u] = 0x%x\n",
- pble_rsrc->next_fpm_addr, chunk->size, chunk->size);
- pble_rsrc->unallocated_pble -= (chunk->size >> 3);
- list_add(&chunk->list, &pble_rsrc->pinfo.clist);
- sd_reg_val = (sd_entry_type == I40IW_SD_TYPE_PAGED) ?
- sd_entry->u.pd_table.pd_page_addr.pa : sd_entry->u.bp.addr.pa;
- if (sd_entry->valid)
- return 0;
- if (dev->is_pf) {
- ret_code = i40iw_hmc_sd_one(dev, hmc_info->hmc_fn_id,
- sd_reg_val, idx->sd_idx,
- sd_entry->entry_type, true);
- if (ret_code) {
- i40iw_pr_err("cqp cmd failed for sd (pbles)\n");
- goto error;
- }
- }
-
- sd_entry->valid = true;
- return 0;
- error:
- kfree(chunk);
- return ret_code;
-}
-
-/**
- * free_lvl2 - fee level 2 pble
- * @pble_rsrc: pble resource management
- * @palloc: level 2 pble allocation
- */
-static void free_lvl2(struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_pble_alloc *palloc)
-{
- u32 i;
- struct gen_pool *pool;
- struct i40iw_pble_level2 *lvl2 = &palloc->level2;
- struct i40iw_pble_info *root = &lvl2->root;
- struct i40iw_pble_info *leaf = lvl2->leaf;
-
- pool = pble_rsrc->pinfo.pool;
-
- for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
- if (leaf->addr)
- gen_pool_free(pool, leaf->addr, (leaf->cnt << 3));
- else
- break;
- }
-
- if (root->addr)
- gen_pool_free(pool, root->addr, (root->cnt << 3));
-
- kfree(lvl2->leaf);
- lvl2->leaf = NULL;
-}
-
-/**
- * get_lvl2_pble - get level 2 pble resource
- * @pble_rsrc: pble resource management
- * @palloc: level 2 pble allocation
- * @pool: pool pointer
- */
-static enum i40iw_status_code get_lvl2_pble(struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_pble_alloc *palloc,
- struct gen_pool *pool)
-{
- u32 lf4k, lflast, total, i;
- u32 pblcnt = PBLE_PER_PAGE;
- u64 *addr;
- struct i40iw_pble_level2 *lvl2 = &palloc->level2;
- struct i40iw_pble_info *root = &lvl2->root;
- struct i40iw_pble_info *leaf;
-
- /* number of full 512 (4K) leafs) */
- lf4k = palloc->total_cnt >> 9;
- lflast = palloc->total_cnt % PBLE_PER_PAGE;
- total = (lflast == 0) ? lf4k : lf4k + 1;
- lvl2->leaf_cnt = total;
-
- leaf = kzalloc((sizeof(*leaf) * total), GFP_ATOMIC);
- if (!leaf)
- return I40IW_ERR_NO_MEMORY;
- lvl2->leaf = leaf;
- /* allocate pbles for the root */
- root->addr = gen_pool_alloc(pool, (total << 3));
- if (!root->addr) {
- kfree(lvl2->leaf);
- lvl2->leaf = NULL;
- return I40IW_ERR_NO_MEMORY;
- }
- root->idx = fpm_to_idx(pble_rsrc,
- (u64)gen_pool_virt_to_phys(pool, root->addr));
- root->cnt = total;
- addr = (u64 *)root->addr;
- for (i = 0; i < total; i++, leaf++) {
- pblcnt = (lflast && ((i + 1) == total)) ? lflast : PBLE_PER_PAGE;
- leaf->addr = gen_pool_alloc(pool, (pblcnt << 3));
- if (!leaf->addr)
- goto error;
- leaf->idx = fpm_to_idx(pble_rsrc, (u64)gen_pool_virt_to_phys(pool, leaf->addr));
-
- leaf->cnt = pblcnt;
- *addr = (u64)leaf->idx;
- addr++;
- }
- palloc->level = I40IW_LEVEL_2;
- pble_rsrc->stats_lvl2++;
- return 0;
- error:
- free_lvl2(pble_rsrc, palloc);
- return I40IW_ERR_NO_MEMORY;
-}
-
-/**
- * get_lvl1_pble - get level 1 pble resource
- * @dev: hardware control device structure
- * @pble_rsrc: pble resource management
- * @palloc: level 1 pble allocation
- */
-static enum i40iw_status_code get_lvl1_pble(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_pble_alloc *palloc)
-{
- u64 *addr;
- struct gen_pool *pool;
- struct i40iw_pble_info *lvl1 = &palloc->level1;
-
- pool = pble_rsrc->pinfo.pool;
- addr = (u64 *)gen_pool_alloc(pool, (palloc->total_cnt << 3));
-
- if (!addr)
- return I40IW_ERR_NO_MEMORY;
-
- palloc->level = I40IW_LEVEL_1;
- lvl1->addr = (unsigned long)addr;
- lvl1->idx = fpm_to_idx(pble_rsrc, (u64)gen_pool_virt_to_phys(pool,
- (unsigned long)addr));
- lvl1->cnt = palloc->total_cnt;
- pble_rsrc->stats_lvl1++;
- return 0;
-}
-
-/**
- * get_lvl1_lvl2_pble - calls get_lvl1 and get_lvl2 pble routine
- * @dev: i40iw_sc_dev struct
- * @pble_rsrc: pble resources
- * @palloc: contains all inforamtion regarding pble (idx + pble addr)
- * @pool: pointer to general purpose special memory pool descriptor
- */
-static inline enum i40iw_status_code get_lvl1_lvl2_pble(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_pble_alloc *palloc,
- struct gen_pool *pool)
-{
- enum i40iw_status_code status = 0;
-
- status = get_lvl1_pble(dev, pble_rsrc, palloc);
- if (status && (palloc->total_cnt > PBLE_PER_PAGE))
- status = get_lvl2_pble(pble_rsrc, palloc, pool);
- return status;
-}
-
-/**
- * i40iw_get_pble - allocate pbles from the pool
- * @dev: i40iw_sc_dev struct
- * @pble_rsrc: pble resources
- * @palloc: contains all inforamtion regarding pble (idx + pble addr)
- * @pble_cnt: #of pbles requested
- */
-enum i40iw_status_code i40iw_get_pble(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_pble_alloc *palloc,
- u32 pble_cnt)
-{
- struct gen_pool *pool;
- enum i40iw_status_code status = 0;
- u32 max_sds = 0;
- int i;
-
- pool = pble_rsrc->pinfo.pool;
- palloc->total_cnt = pble_cnt;
- palloc->level = I40IW_LEVEL_0;
- /*check first to see if we can get pble's without acquiring additional sd's */
- status = get_lvl1_lvl2_pble(dev, pble_rsrc, palloc, pool);
- if (!status)
- goto exit;
- max_sds = (palloc->total_cnt >> 18) + 1;
- for (i = 0; i < max_sds; i++) {
- status = add_pble_pool(dev, pble_rsrc);
- if (status)
- break;
- status = get_lvl1_lvl2_pble(dev, pble_rsrc, palloc, pool);
- if (!status)
- break;
- }
-exit:
- if (!status)
- pble_rsrc->stats_alloc_ok++;
- else
- pble_rsrc->stats_alloc_fail++;
-
- return status;
-}
-
-/**
- * i40iw_free_pble - put pbles back into pool
- * @pble_rsrc: pble resources
- * @palloc: contains all inforamtion regarding pble resource being freed
- */
-void i40iw_free_pble(struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_pble_alloc *palloc)
-{
- struct gen_pool *pool;
-
- pool = pble_rsrc->pinfo.pool;
- if (palloc->level == I40IW_LEVEL_2)
- free_lvl2(pble_rsrc, palloc);
- else
- gen_pool_free(pool, palloc->level1.addr,
- (palloc->level1.cnt << 3));
- pble_rsrc->stats_alloc_freed++;
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.h b/drivers/infiniband/hw/i40iw/i40iw_pble.h
deleted file mode 100644
index 7b1851d21cc0..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_pble.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_PBLE_H
-#define I40IW_PBLE_H
-
-#define POOL_SHIFT 6
-#define PBLE_PER_PAGE 512
-#define I40IW_HMC_PAGED_BP_SHIFT 12
-#define PBLE_512_SHIFT 9
-
-enum i40iw_pble_level {
- I40IW_LEVEL_0 = 0,
- I40IW_LEVEL_1 = 1,
- I40IW_LEVEL_2 = 2
-};
-
-enum i40iw_alloc_type {
- I40IW_NO_ALLOC = 0,
- I40IW_DMA_COHERENT = 1,
- I40IW_VMALLOC = 2
-};
-
-struct i40iw_pble_info {
- unsigned long addr;
- u32 idx;
- u32 cnt;
-};
-
-struct i40iw_pble_level2 {
- struct i40iw_pble_info root;
- struct i40iw_pble_info *leaf;
- u32 leaf_cnt;
-};
-
-struct i40iw_pble_alloc {
- u32 total_cnt;
- enum i40iw_pble_level level;
- union {
- struct i40iw_pble_info level1;
- struct i40iw_pble_level2 level2;
- };
-};
-
-struct sd_pd_idx {
- u32 sd_idx;
- u32 pd_idx;
- u32 rel_pd_idx;
-};
-
-struct i40iw_add_page_info {
- struct i40iw_chunk *chunk;
- struct i40iw_hmc_sd_entry *sd_entry;
- struct i40iw_hmc_info *hmc_info;
- struct sd_pd_idx idx;
- u32 pages;
-};
-
-struct i40iw_chunk {
- struct list_head list;
- u32 size;
- void *vaddr;
- u64 fpm_addr;
- u32 pg_cnt;
- dma_addr_t *dmaaddrs;
- enum i40iw_alloc_type type;
-};
-
-struct i40iw_pble_pool {
- struct gen_pool *pool;
- struct list_head clist;
- u32 total_pble_alloc;
- u32 free_pble_cnt;
- u32 pool_shift;
-};
-
-struct i40iw_hmc_pble_rsrc {
- u32 unallocated_pble;
- u64 fpm_base_addr;
- u64 next_fpm_addr;
- struct i40iw_pble_pool pinfo;
-
- u32 stats_direct_sds;
- u32 stats_paged_sds;
- u64 stats_alloc_ok;
- u64 stats_alloc_fail;
- u64 stats_alloc_freed;
- u64 stats_lvl1;
- u64 stats_lvl2;
-};
-
-void i40iw_destroy_pble_pool(struct i40iw_sc_dev *dev, struct i40iw_hmc_pble_rsrc *pble_rsrc);
-enum i40iw_status_code i40iw_hmc_init_pble(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc);
-void i40iw_free_pble(struct i40iw_hmc_pble_rsrc *pble_rsrc, struct i40iw_pble_alloc *palloc);
-enum i40iw_status_code i40iw_get_pble(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_pble_rsrc *pble_rsrc,
- struct i40iw_pble_alloc *palloc,
- u32 pble_cnt);
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
deleted file mode 100644
index db41ab40da9c..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ /dev/null
@@ -1,1493 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include "i40iw_osdep.h"
-#include "i40iw_register.h"
-#include "i40iw_status.h"
-#include "i40iw_hmc.h"
-
-#include "i40iw_d.h"
-#include "i40iw_type.h"
-#include "i40iw_p.h"
-#include "i40iw_puda.h"
-
-static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi,
- struct i40iw_puda_buf *buf);
-static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid);
-static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx);
-static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc
- *rsrc, bool initial);
-static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp);
-/**
- * i40iw_puda_get_listbuf - get buffer from puda list
- * @list: list to use for buffers (ILQ or IEQ)
- */
-static struct i40iw_puda_buf *i40iw_puda_get_listbuf(struct list_head *list)
-{
- struct i40iw_puda_buf *buf = NULL;
-
- if (!list_empty(list)) {
- buf = (struct i40iw_puda_buf *)list->next;
- list_del((struct list_head *)&buf->list);
- }
- return buf;
-}
-
-/**
- * i40iw_puda_get_bufpool - return buffer from resource
- * @rsrc: resource to use for buffer
- */
-struct i40iw_puda_buf *i40iw_puda_get_bufpool(struct i40iw_puda_rsrc *rsrc)
-{
- struct i40iw_puda_buf *buf = NULL;
- struct list_head *list = &rsrc->bufpool;
- unsigned long flags;
-
- spin_lock_irqsave(&rsrc->bufpool_lock, flags);
- buf = i40iw_puda_get_listbuf(list);
- if (buf)
- rsrc->avail_buf_count--;
- else
- rsrc->stats_buf_alloc_fail++;
- spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
- return buf;
-}
-
-/**
- * i40iw_puda_ret_bufpool - return buffer to rsrc list
- * @rsrc: resource to use for buffer
- * @buf: buffe to return to resouce
- */
-void i40iw_puda_ret_bufpool(struct i40iw_puda_rsrc *rsrc,
- struct i40iw_puda_buf *buf)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rsrc->bufpool_lock, flags);
- list_add(&buf->list, &rsrc->bufpool);
- spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
- rsrc->avail_buf_count++;
-}
-
-/**
- * i40iw_puda_post_recvbuf - set wqe for rcv buffer
- * @rsrc: resource ptr
- * @wqe_idx: wqe index to use
- * @buf: puda buffer for rcv q
- * @initial: flag if during init time
- */
-static void i40iw_puda_post_recvbuf(struct i40iw_puda_rsrc *rsrc, u32 wqe_idx,
- struct i40iw_puda_buf *buf, bool initial)
-{
- u64 *wqe;
- struct i40iw_sc_qp *qp = &rsrc->qp;
- u64 offset24 = 0;
-
- qp->qp_uk.rq_wrid_array[wqe_idx] = (uintptr_t)buf;
- wqe = qp->qp_uk.rq_base[wqe_idx].elem;
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
- "%s: wqe_idx= %d buf = %p wqe = %p\n", __func__,
- wqe_idx, buf, wqe);
- if (!initial)
- get_64bit_val(wqe, 24, &offset24);
-
- offset24 = (offset24) ? 0 : LS_64(1, I40IWQPSQ_VALID);
- set_64bit_val(wqe, 24, offset24);
-
- set_64bit_val(wqe, 0, buf->mem.pa);
- set_64bit_val(wqe, 8,
- LS_64(buf->mem.size, I40IWQPSQ_FRAG_LEN));
- set_64bit_val(wqe, 24, offset24);
-}
-
-/**
- * i40iw_puda_replenish_rq - post rcv buffers
- * @rsrc: resource to use for buffer
- * @initial: flag if during init time
- */
-static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc *rsrc,
- bool initial)
-{
- u32 i;
- u32 invalid_cnt = rsrc->rxq_invalid_cnt;
- struct i40iw_puda_buf *buf = NULL;
-
- for (i = 0; i < invalid_cnt; i++) {
- buf = i40iw_puda_get_bufpool(rsrc);
- if (!buf)
- return I40IW_ERR_list_empty;
- i40iw_puda_post_recvbuf(rsrc, rsrc->rx_wqe_idx, buf,
- initial);
- rsrc->rx_wqe_idx =
- ((rsrc->rx_wqe_idx + 1) % rsrc->rq_size);
- rsrc->rxq_invalid_cnt--;
- }
- return 0;
-}
-
-/**
- * i40iw_puda_alloc_buf - allocate mem for buffer
- * @dev: iwarp device
- * @length: length of buffer
- */
-static struct i40iw_puda_buf *i40iw_puda_alloc_buf(struct i40iw_sc_dev *dev,
- u32 length)
-{
- struct i40iw_puda_buf *buf = NULL;
- struct i40iw_virt_mem buf_mem;
- enum i40iw_status_code ret;
-
- ret = i40iw_allocate_virt_mem(dev->hw, &buf_mem,
- sizeof(struct i40iw_puda_buf));
- if (ret) {
- i40iw_debug(dev, I40IW_DEBUG_PUDA,
- "%s: error mem for buf\n", __func__);
- return NULL;
- }
- buf = (struct i40iw_puda_buf *)buf_mem.va;
- ret = i40iw_allocate_dma_mem(dev->hw, &buf->mem, length, 1);
- if (ret) {
- i40iw_debug(dev, I40IW_DEBUG_PUDA,
- "%s: error dma mem for buf\n", __func__);
- i40iw_free_virt_mem(dev->hw, &buf_mem);
- return NULL;
- }
- buf->buf_mem.va = buf_mem.va;
- buf->buf_mem.size = buf_mem.size;
- return buf;
-}
-
-/**
- * i40iw_puda_dele_buf - delete buffer back to system
- * @dev: iwarp device
- * @buf: buffer to free
- */
-static void i40iw_puda_dele_buf(struct i40iw_sc_dev *dev,
- struct i40iw_puda_buf *buf)
-{
- i40iw_free_dma_mem(dev->hw, &buf->mem);
- i40iw_free_virt_mem(dev->hw, &buf->buf_mem);
-}
-
-/**
- * i40iw_puda_get_next_send_wqe - return next wqe for processing
- * @qp: puda qp for wqe
- * @wqe_idx: wqe index for caller
- */
-static u64 *i40iw_puda_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx)
-{
- u64 *wqe = NULL;
- enum i40iw_status_code ret_code = 0;
-
- *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
- if (!*wqe_idx)
- qp->swqe_polarity = !qp->swqe_polarity;
- I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
- if (ret_code)
- return wqe;
- wqe = qp->sq_base[*wqe_idx].elem;
-
- return wqe;
-}
-
-/**
- * i40iw_puda_poll_info - poll cq for completion
- * @cq: cq for poll
- * @info: info return for successful completion
- */
-static enum i40iw_status_code i40iw_puda_poll_info(struct i40iw_sc_cq *cq,
- struct i40iw_puda_completion_info *info)
-{
- u64 qword0, qword2, qword3;
- u64 *cqe;
- u64 comp_ctx;
- bool valid_bit;
- u32 major_err, minor_err;
- bool error;
-
- cqe = (u64 *)I40IW_GET_CURRENT_CQ_ELEMENT(&cq->cq_uk);
- get_64bit_val(cqe, 24, &qword3);
- valid_bit = (bool)RS_64(qword3, I40IW_CQ_VALID);
-
- if (valid_bit != cq->cq_uk.polarity)
- return I40IW_ERR_QUEUE_EMPTY;
-
- i40iw_debug_buf(cq->dev, I40IW_DEBUG_PUDA, "PUDA CQE", cqe, 32);
- error = (bool)RS_64(qword3, I40IW_CQ_ERROR);
- if (error) {
- i40iw_debug(cq->dev, I40IW_DEBUG_PUDA, "%s receive error\n", __func__);
- major_err = (u32)(RS_64(qword3, I40IW_CQ_MAJERR));
- minor_err = (u32)(RS_64(qword3, I40IW_CQ_MINERR));
- info->compl_error = major_err << 16 | minor_err;
- return I40IW_ERR_CQ_COMPL_ERROR;
- }
-
- get_64bit_val(cqe, 0, &qword0);
- get_64bit_val(cqe, 16, &qword2);
-
- info->q_type = (u8)RS_64(qword3, I40IW_CQ_SQ);
- info->qp_id = (u32)RS_64(qword2, I40IWCQ_QPID);
-
- get_64bit_val(cqe, 8, &comp_ctx);
- info->qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx;
- info->wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX);
-
- if (info->q_type == I40IW_CQE_QTYPE_RQ) {
- info->vlan_valid = (bool)RS_64(qword3, I40IW_VLAN_TAG_VALID);
- info->l4proto = (u8)RS_64(qword2, I40IW_UDA_L4PROTO);
- info->l3proto = (u8)RS_64(qword2, I40IW_UDA_L3PROTO);
- info->payload_len = (u16)RS_64(qword0, I40IW_UDA_PAYLOADLEN);
- }
-
- return 0;
-}
-
-/**
- * i40iw_puda_poll_completion - processes completion for cq
- * @dev: iwarp device
- * @cq: cq getting interrupt
- * @compl_err: return any completion err
- */
-enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
- struct i40iw_sc_cq *cq, u32 *compl_err)
-{
- struct i40iw_qp_uk *qp;
- struct i40iw_cq_uk *cq_uk = &cq->cq_uk;
- struct i40iw_puda_completion_info info;
- enum i40iw_status_code ret = 0;
- struct i40iw_puda_buf *buf;
- struct i40iw_puda_rsrc *rsrc;
- void *sqwrid;
- u8 cq_type = cq->cq_type;
- unsigned long flags;
-
- if ((cq_type == I40IW_CQ_TYPE_ILQ) || (cq_type == I40IW_CQ_TYPE_IEQ)) {
- rsrc = (cq_type == I40IW_CQ_TYPE_ILQ) ? cq->vsi->ilq : cq->vsi->ieq;
- } else {
- i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s qp_type error\n", __func__);
- return I40IW_ERR_BAD_PTR;
- }
- memset(&info, 0, sizeof(info));
- ret = i40iw_puda_poll_info(cq, &info);
- *compl_err = info.compl_error;
- if (ret == I40IW_ERR_QUEUE_EMPTY)
- return ret;
- if (ret)
- goto done;
-
- qp = info.qp;
- if (!qp || !rsrc) {
- ret = I40IW_ERR_BAD_PTR;
- goto done;
- }
-
- if (qp->qp_id != rsrc->qp_id) {
- ret = I40IW_ERR_BAD_PTR;
- goto done;
- }
-
- if (info.q_type == I40IW_CQE_QTYPE_RQ) {
- buf = (struct i40iw_puda_buf *)(uintptr_t)qp->rq_wrid_array[info.wqe_idx];
- /* Get all the tcpip information in the buf header */
- ret = i40iw_puda_get_tcpip_info(&info, buf);
- if (ret) {
- rsrc->stats_rcvd_pkt_err++;
- if (cq_type == I40IW_CQ_TYPE_ILQ) {
- i40iw_ilq_putback_rcvbuf(&rsrc->qp,
- info.wqe_idx);
- } else {
- i40iw_puda_ret_bufpool(rsrc, buf);
- i40iw_puda_replenish_rq(rsrc, false);
- }
- goto done;
- }
-
- rsrc->stats_pkt_rcvd++;
- rsrc->compl_rxwqe_idx = info.wqe_idx;
- i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s RQ completion\n", __func__);
- rsrc->receive(rsrc->vsi, buf);
- if (cq_type == I40IW_CQ_TYPE_ILQ)
- i40iw_ilq_putback_rcvbuf(&rsrc->qp, info.wqe_idx);
- else
- i40iw_puda_replenish_rq(rsrc, false);
-
- } else {
- i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s SQ completion\n", __func__);
- sqwrid = (void *)(uintptr_t)qp->sq_wrtrk_array[info.wqe_idx].wrid;
- I40IW_RING_SET_TAIL(qp->sq_ring, info.wqe_idx);
- rsrc->xmit_complete(rsrc->vsi, sqwrid);
- spin_lock_irqsave(&rsrc->bufpool_lock, flags);
- rsrc->tx_wqe_avail_cnt++;
- spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
- if (!list_empty(&rsrc->vsi->ilq->txpend))
- i40iw_puda_send_buf(rsrc->vsi->ilq, NULL);
- }
-
-done:
- I40IW_RING_MOVE_HEAD(cq_uk->cq_ring, ret);
- if (I40IW_RING_GETCURRENT_HEAD(cq_uk->cq_ring) == 0)
- cq_uk->polarity = !cq_uk->polarity;
- /* update cq tail in cq shadow memory also */
- I40IW_RING_MOVE_TAIL(cq_uk->cq_ring);
- set_64bit_val(cq_uk->shadow_area, 0,
- I40IW_RING_GETCURRENT_HEAD(cq_uk->cq_ring));
- return 0;
-}
-
-/**
- * i40iw_puda_send - complete send wqe for transmit
- * @qp: puda qp for send
- * @info: buffer information for transmit
- */
-enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp,
- struct i40iw_puda_send_info *info)
-{
- u64 *wqe;
- u32 iplen, l4len;
- u64 header[2];
- u32 wqe_idx;
- u8 iipt;
-
- /* number of 32 bits DWORDS in header */
- l4len = info->tcplen >> 2;
- if (info->ipv4) {
- iipt = 3;
- iplen = 5;
- } else {
- iipt = 1;
- iplen = 10;
- }
-
- wqe = i40iw_puda_get_next_send_wqe(&qp->qp_uk, &wqe_idx);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
- qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid = (uintptr_t)info->scratch;
- /* Third line of WQE descriptor */
- /* maclen is in words */
- header[0] = LS_64((info->maclen >> 1), I40IW_UDA_QPSQ_MACLEN) |
- LS_64(iplen, I40IW_UDA_QPSQ_IPLEN) | LS_64(1, I40IW_UDA_QPSQ_L4T) |
- LS_64(iipt, I40IW_UDA_QPSQ_IIPT) |
- LS_64(l4len, I40IW_UDA_QPSQ_L4LEN);
- /* Forth line of WQE descriptor */
- header[1] = LS_64(I40IW_OP_TYPE_SEND, I40IW_UDA_QPSQ_OPCODE) |
- LS_64(1, I40IW_UDA_QPSQ_SIGCOMPL) |
- LS_64(info->doloopback, I40IW_UDA_QPSQ_DOLOOPBACK) |
- LS_64(qp->qp_uk.swqe_polarity, I40IW_UDA_QPSQ_VALID);
-
- set_64bit_val(wqe, 0, info->paddr);
- set_64bit_val(wqe, 8, LS_64(info->len, I40IWQPSQ_FRAG_LEN));
- set_64bit_val(wqe, 16, header[0]);
- set_64bit_val(wqe, 24, header[1]);
-
- i40iw_debug_buf(qp->dev, I40IW_DEBUG_PUDA, "PUDA SEND WQE", wqe, 32);
- i40iw_qp_post_wr(&qp->qp_uk);
- return 0;
-}
-
-/**
- * i40iw_puda_send_buf - transmit puda buffer
- * @rsrc: resource to use for buffer
- * @buf: puda buffer to transmit
- */
-void i40iw_puda_send_buf(struct i40iw_puda_rsrc *rsrc, struct i40iw_puda_buf *buf)
-{
- struct i40iw_puda_send_info info;
- enum i40iw_status_code ret = 0;
- unsigned long flags;
-
- spin_lock_irqsave(&rsrc->bufpool_lock, flags);
- /* if no wqe available or not from a completion and we have
- * pending buffers, we must queue new buffer
- */
- if (!rsrc->tx_wqe_avail_cnt || (buf && !list_empty(&rsrc->txpend))) {
- list_add_tail(&buf->list, &rsrc->txpend);
- spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
- rsrc->stats_sent_pkt_q++;
- if (rsrc->type == I40IW_PUDA_RSRC_TYPE_ILQ)
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
- "%s: adding to txpend\n", __func__);
- return;
- }
- rsrc->tx_wqe_avail_cnt--;
- /* if we are coming from a completion and have pending buffers
- * then Get one from pending list
- */
- if (!buf) {
- buf = i40iw_puda_get_listbuf(&rsrc->txpend);
- if (!buf)
- goto done;
- }
-
- info.scratch = (void *)buf;
- info.paddr = buf->mem.pa;
- info.len = buf->totallen;
- info.tcplen = buf->tcphlen;
- info.maclen = buf->maclen;
- info.ipv4 = buf->ipv4;
- info.doloopback = (rsrc->type == I40IW_PUDA_RSRC_TYPE_IEQ);
-
- ret = i40iw_puda_send(&rsrc->qp, &info);
- if (ret) {
- rsrc->tx_wqe_avail_cnt++;
- rsrc->stats_sent_pkt_q++;
- list_add(&buf->list, &rsrc->txpend);
- if (rsrc->type == I40IW_PUDA_RSRC_TYPE_ILQ)
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
- "%s: adding to puda_send\n", __func__);
- } else {
- rsrc->stats_pkt_sent++;
- }
-done:
- spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
-}
-
-/**
- * i40iw_puda_qp_setctx - during init, set qp's context
- * @rsrc: qp's resource
- */
-static void i40iw_puda_qp_setctx(struct i40iw_puda_rsrc *rsrc)
-{
- struct i40iw_sc_qp *qp = &rsrc->qp;
- u64 *qp_ctx = qp->hw_host_ctx;
-
- set_64bit_val(qp_ctx, 8, qp->sq_pa);
- set_64bit_val(qp_ctx, 16, qp->rq_pa);
-
- set_64bit_val(qp_ctx, 24,
- LS_64(qp->hw_rq_size, I40IWQPC_RQSIZE) |
- LS_64(qp->hw_sq_size, I40IWQPC_SQSIZE));
-
- set_64bit_val(qp_ctx, 48, LS_64(1514, I40IWQPC_SNDMSS));
- set_64bit_val(qp_ctx, 56, 0);
- set_64bit_val(qp_ctx, 64, 1);
-
- set_64bit_val(qp_ctx, 136,
- LS_64(rsrc->cq_id, I40IWQPC_TXCQNUM) |
- LS_64(rsrc->cq_id, I40IWQPC_RXCQNUM));
-
- set_64bit_val(qp_ctx, 160, LS_64(1, I40IWQPC_PRIVEN));
-
- set_64bit_val(qp_ctx, 168,
- LS_64((uintptr_t)qp, I40IWQPC_QPCOMPCTX));
-
- set_64bit_val(qp_ctx, 176,
- LS_64(qp->sq_tph_val, I40IWQPC_SQTPHVAL) |
- LS_64(qp->rq_tph_val, I40IWQPC_RQTPHVAL) |
- LS_64(qp->qs_handle, I40IWQPC_QSHANDLE));
-
- i40iw_debug_buf(rsrc->dev, I40IW_DEBUG_PUDA, "PUDA QP CONTEXT",
- qp_ctx, I40IW_QP_CTX_SIZE);
-}
-
-/**
- * i40iw_puda_qp_wqe - setup wqe for qp create
- * @rsrc: resource for qp
- */
-static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
-{
- struct i40iw_sc_cqp *cqp;
- u64 *wqe;
- u64 header;
- struct i40iw_ccq_cqe_info compl_info;
- enum i40iw_status_code status = 0;
-
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
- set_64bit_val(wqe, 40, qp->shadow_area_pa);
- header = qp->qp_uk.qp_id |
- LS_64(I40IW_CQP_OP_CREATE_QP, I40IW_CQPSQ_OPCODE) |
- LS_64(I40IW_QP_TYPE_UDA, I40IW_CQPSQ_QP_QPTYPE) |
- LS_64(1, I40IW_CQPSQ_QP_CQNUMVALID) |
- LS_64(2, I40IW_CQPSQ_QP_NEXTIWSTATE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
-
- set_64bit_val(wqe, 24, header);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_PUDA, "PUDA CQE", wqe, 32);
- i40iw_sc_cqp_post_sq(cqp);
- status = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
- I40IW_CQP_OP_CREATE_QP,
- &compl_info);
- return status;
-}
-
-/**
- * i40iw_puda_qp_create - create qp for resource
- * @rsrc: resource to use for buffer
- */
-static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc)
-{
- struct i40iw_sc_qp *qp = &rsrc->qp;
- struct i40iw_qp_uk *ukqp = &qp->qp_uk;
- enum i40iw_status_code ret = 0;
- u32 sq_size, rq_size, t_size;
- struct i40iw_dma_mem *mem;
-
- sq_size = rsrc->sq_size * I40IW_QP_WQE_MIN_SIZE;
- rq_size = rsrc->rq_size * I40IW_QP_WQE_MIN_SIZE;
- t_size = (sq_size + rq_size + (I40IW_SHADOW_AREA_SIZE << 3) +
- I40IW_QP_CTX_SIZE);
- /* Get page aligned memory */
- ret =
- i40iw_allocate_dma_mem(rsrc->dev->hw, &rsrc->qpmem, t_size,
- I40IW_HW_PAGE_SIZE);
- if (ret) {
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, "%s: error dma mem\n", __func__);
- return ret;
- }
-
- mem = &rsrc->qpmem;
- memset(mem->va, 0, t_size);
- qp->hw_sq_size = i40iw_get_encoded_wqe_size(rsrc->sq_size, false);
- qp->hw_rq_size = i40iw_get_encoded_wqe_size(rsrc->rq_size, false);
- qp->pd = &rsrc->sc_pd;
- qp->qp_type = I40IW_QP_TYPE_UDA;
- qp->dev = rsrc->dev;
- qp->back_qp = (void *)rsrc;
- qp->sq_pa = mem->pa;
- qp->rq_pa = qp->sq_pa + sq_size;
- qp->vsi = rsrc->vsi;
- ukqp->sq_base = mem->va;
- ukqp->rq_base = &ukqp->sq_base[rsrc->sq_size];
- ukqp->shadow_area = ukqp->rq_base[rsrc->rq_size].elem;
- qp->shadow_area_pa = qp->rq_pa + rq_size;
- qp->hw_host_ctx = ukqp->shadow_area + I40IW_SHADOW_AREA_SIZE;
- qp->hw_host_ctx_pa =
- qp->shadow_area_pa + (I40IW_SHADOW_AREA_SIZE << 3);
- ukqp->qp_id = rsrc->qp_id;
- ukqp->sq_wrtrk_array = rsrc->sq_wrtrk_array;
- ukqp->rq_wrid_array = rsrc->rq_wrid_array;
-
- ukqp->qp_id = rsrc->qp_id;
- ukqp->sq_size = rsrc->sq_size;
- ukqp->rq_size = rsrc->rq_size;
-
- I40IW_RING_INIT(ukqp->sq_ring, ukqp->sq_size);
- I40IW_RING_INIT(ukqp->initial_ring, ukqp->sq_size);
- I40IW_RING_INIT(ukqp->rq_ring, ukqp->rq_size);
-
- if (qp->pd->dev->is_pf)
- ukqp->wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
- I40E_PFPE_WQEALLOC);
- else
- ukqp->wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
- I40E_VFPE_WQEALLOC1);
-
- qp->user_pri = 0;
- i40iw_qp_add_qos(qp);
- i40iw_puda_qp_setctx(rsrc);
- if (rsrc->ceq_valid)
- ret = i40iw_cqp_qp_create_cmd(rsrc->dev, qp);
- else
- ret = i40iw_puda_qp_wqe(rsrc->dev, qp);
- if (ret)
- i40iw_free_dma_mem(rsrc->dev->hw, &rsrc->qpmem);
- return ret;
-}
-
-/**
- * i40iw_puda_cq_wqe - setup wqe for cq create
- * @rsrc: resource for cq
- */
-static enum i40iw_status_code i40iw_puda_cq_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq)
-{
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
- struct i40iw_ccq_cqe_info compl_info;
- enum i40iw_status_code status = 0;
-
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
- set_64bit_val(wqe, 8, RS_64_1(cq, 1));
- set_64bit_val(wqe, 16,
- LS_64(cq->shadow_read_threshold,
- I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
- set_64bit_val(wqe, 32, cq->cq_pa);
-
- set_64bit_val(wqe, 40, cq->shadow_area_pa);
-
- header = cq->cq_uk.cq_id |
- LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
- LS_64(1, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
- LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) |
- LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
- set_64bit_val(wqe, 24, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- i40iw_sc_cqp_post_sq(dev->cqp);
- status = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
- I40IW_CQP_OP_CREATE_CQ,
- &compl_info);
- return status;
-}
-
-/**
- * i40iw_puda_cq_create - create cq for resource
- * @rsrc: resource for which cq to create
- */
-static enum i40iw_status_code i40iw_puda_cq_create(struct i40iw_puda_rsrc *rsrc)
-{
- struct i40iw_sc_dev *dev = rsrc->dev;
- struct i40iw_sc_cq *cq = &rsrc->cq;
- enum i40iw_status_code ret = 0;
- u32 tsize, cqsize;
- struct i40iw_dma_mem *mem;
- struct i40iw_cq_init_info info;
- struct i40iw_cq_uk_init_info *init_info = &info.cq_uk_init_info;
-
- cq->vsi = rsrc->vsi;
- cqsize = rsrc->cq_size * (sizeof(struct i40iw_cqe));
- tsize = cqsize + sizeof(struct i40iw_cq_shadow_area);
- ret = i40iw_allocate_dma_mem(dev->hw, &rsrc->cqmem, tsize,
- I40IW_CQ0_ALIGNMENT_MASK);
- if (ret)
- return ret;
-
- mem = &rsrc->cqmem;
- memset(&info, 0, sizeof(info));
- info.dev = dev;
- info.type = (rsrc->type == I40IW_PUDA_RSRC_TYPE_ILQ) ?
- I40IW_CQ_TYPE_ILQ : I40IW_CQ_TYPE_IEQ;
- info.shadow_read_threshold = rsrc->cq_size >> 2;
- info.ceq_id_valid = true;
- info.cq_base_pa = mem->pa;
- info.shadow_area_pa = mem->pa + cqsize;
- init_info->cq_base = mem->va;
- init_info->shadow_area = (u64 *)((u8 *)mem->va + cqsize);
- init_info->cq_size = rsrc->cq_size;
- init_info->cq_id = rsrc->cq_id;
- info.ceqe_mask = true;
- info.ceq_id_valid = true;
- ret = dev->iw_priv_cq_ops->cq_init(cq, &info);
- if (ret)
- goto error;
- if (rsrc->ceq_valid)
- ret = i40iw_cqp_cq_create_cmd(dev, cq);
- else
- ret = i40iw_puda_cq_wqe(dev, cq);
-error:
- if (ret)
- i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
- return ret;
-}
-
-/**
- * i40iw_puda_free_qp - free qp for resource
- * @rsrc: resource for which qp to free
- */
-static void i40iw_puda_free_qp(struct i40iw_puda_rsrc *rsrc)
-{
- enum i40iw_status_code ret;
- struct i40iw_ccq_cqe_info compl_info;
- struct i40iw_sc_dev *dev = rsrc->dev;
-
- if (rsrc->ceq_valid) {
- i40iw_cqp_qp_destroy_cmd(dev, &rsrc->qp);
- return;
- }
-
- ret = dev->iw_priv_qp_ops->qp_destroy(&rsrc->qp,
- 0, false, true, true);
- if (ret)
- i40iw_debug(dev, I40IW_DEBUG_PUDA,
- "%s error puda qp destroy wqe\n",
- __func__);
-
- if (!ret) {
- ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
- I40IW_CQP_OP_DESTROY_QP,
- &compl_info);
- if (ret)
- i40iw_debug(dev, I40IW_DEBUG_PUDA,
- "%s error puda qp destroy failed\n",
- __func__);
- }
-}
-
-/**
- * i40iw_puda_free_cq - free cq for resource
- * @rsrc: resource for which cq to free
- */
-static void i40iw_puda_free_cq(struct i40iw_puda_rsrc *rsrc)
-{
- enum i40iw_status_code ret;
- struct i40iw_ccq_cqe_info compl_info;
- struct i40iw_sc_dev *dev = rsrc->dev;
-
- if (rsrc->ceq_valid) {
- i40iw_cqp_cq_destroy_cmd(dev, &rsrc->cq);
- return;
- }
- ret = dev->iw_priv_cq_ops->cq_destroy(&rsrc->cq, 0, true);
-
- if (ret)
- i40iw_debug(dev, I40IW_DEBUG_PUDA,
- "%s error ieq cq destroy\n",
- __func__);
-
- if (!ret) {
- ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
- I40IW_CQP_OP_DESTROY_CQ,
- &compl_info);
- if (ret)
- i40iw_debug(dev, I40IW_DEBUG_PUDA,
- "%s error ieq qp destroy done\n",
- __func__);
- }
-}
-
-/**
- * i40iw_puda_dele_resources - delete all resources during close
- * @dev: iwarp device
- * @type: type of resource to dele
- * @reset: true if reset chip
- */
-void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi,
- enum puda_resource_type type,
- bool reset)
-{
- struct i40iw_sc_dev *dev = vsi->dev;
- struct i40iw_puda_rsrc *rsrc;
- struct i40iw_puda_buf *buf = NULL;
- struct i40iw_puda_buf *nextbuf = NULL;
- struct i40iw_virt_mem *vmem;
-
- switch (type) {
- case I40IW_PUDA_RSRC_TYPE_ILQ:
- rsrc = vsi->ilq;
- vmem = &vsi->ilq_mem;
- break;
- case I40IW_PUDA_RSRC_TYPE_IEQ:
- rsrc = vsi->ieq;
- vmem = &vsi->ieq_mem;
- break;
- default:
- i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s: error resource type = 0x%x\n",
- __func__, type);
- return;
- }
-
- switch (rsrc->completion) {
- case PUDA_HASH_CRC_COMPLETE:
- i40iw_free_hash_desc(rsrc->hash_desc);
- case PUDA_QP_CREATED:
- if (!reset)
- i40iw_puda_free_qp(rsrc);
-
- i40iw_free_dma_mem(dev->hw, &rsrc->qpmem);
- /* fallthrough */
- case PUDA_CQ_CREATED:
- if (!reset)
- i40iw_puda_free_cq(rsrc);
-
- i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
- break;
- default:
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA, "%s error no resources\n", __func__);
- break;
- }
- /* Free all allocated puda buffers for both tx and rx */
- buf = rsrc->alloclist;
- while (buf) {
- nextbuf = buf->next;
- i40iw_puda_dele_buf(dev, buf);
- buf = nextbuf;
- rsrc->alloc_buf_count--;
- }
- i40iw_free_virt_mem(dev->hw, vmem);
-}
-
-/**
- * i40iw_puda_allocbufs - allocate buffers for resource
- * @rsrc: resource for buffer allocation
- * @count: number of buffers to create
- */
-static enum i40iw_status_code i40iw_puda_allocbufs(struct i40iw_puda_rsrc *rsrc,
- u32 count)
-{
- u32 i;
- struct i40iw_puda_buf *buf;
- struct i40iw_puda_buf *nextbuf;
-
- for (i = 0; i < count; i++) {
- buf = i40iw_puda_alloc_buf(rsrc->dev, rsrc->buf_size);
- if (!buf) {
- rsrc->stats_buf_alloc_fail++;
- return I40IW_ERR_NO_MEMORY;
- }
- i40iw_puda_ret_bufpool(rsrc, buf);
- rsrc->alloc_buf_count++;
- if (!rsrc->alloclist) {
- rsrc->alloclist = buf;
- } else {
- nextbuf = rsrc->alloclist;
- rsrc->alloclist = buf;
- buf->next = nextbuf;
- }
- }
- rsrc->avail_buf_count = rsrc->alloc_buf_count;
- return 0;
-}
-
-/**
- * i40iw_puda_create_rsrc - create resouce (ilq or ieq)
- * @dev: iwarp device
- * @info: resource information
- */
-enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi,
- struct i40iw_puda_rsrc_info *info)
-{
- struct i40iw_sc_dev *dev = vsi->dev;
- enum i40iw_status_code ret = 0;
- struct i40iw_puda_rsrc *rsrc;
- u32 pudasize;
- u32 sqwridsize, rqwridsize;
- struct i40iw_virt_mem *vmem;
-
- info->count = 1;
- pudasize = sizeof(struct i40iw_puda_rsrc);
- sqwridsize = info->sq_size * sizeof(struct i40iw_sq_uk_wr_trk_info);
- rqwridsize = info->rq_size * 8;
- switch (info->type) {
- case I40IW_PUDA_RSRC_TYPE_ILQ:
- vmem = &vsi->ilq_mem;
- break;
- case I40IW_PUDA_RSRC_TYPE_IEQ:
- vmem = &vsi->ieq_mem;
- break;
- default:
- return I40IW_NOT_SUPPORTED;
- }
- ret =
- i40iw_allocate_virt_mem(dev->hw, vmem,
- pudasize + sqwridsize + rqwridsize);
- if (ret)
- return ret;
- rsrc = (struct i40iw_puda_rsrc *)vmem->va;
- spin_lock_init(&rsrc->bufpool_lock);
- if (info->type == I40IW_PUDA_RSRC_TYPE_ILQ) {
- vsi->ilq = (struct i40iw_puda_rsrc *)vmem->va;
- vsi->ilq_count = info->count;
- rsrc->receive = info->receive;
- rsrc->xmit_complete = info->xmit_complete;
- } else {
- vmem = &vsi->ieq_mem;
- vsi->ieq_count = info->count;
- vsi->ieq = (struct i40iw_puda_rsrc *)vmem->va;
- rsrc->receive = i40iw_ieq_receive;
- rsrc->xmit_complete = i40iw_ieq_tx_compl;
- }
-
- rsrc->ceq_valid = info->ceq_valid;
- rsrc->type = info->type;
- rsrc->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)((u8 *)vmem->va + pudasize);
- rsrc->rq_wrid_array = (u64 *)((u8 *)vmem->va + pudasize + sqwridsize);
- /* Initialize all ieq lists */
- INIT_LIST_HEAD(&rsrc->bufpool);
- INIT_LIST_HEAD(&rsrc->txpend);
-
- rsrc->tx_wqe_avail_cnt = info->sq_size - 1;
- dev->iw_pd_ops->pd_init(dev, &rsrc->sc_pd, info->pd_id, -1);
- rsrc->qp_id = info->qp_id;
- rsrc->cq_id = info->cq_id;
- rsrc->sq_size = info->sq_size;
- rsrc->rq_size = info->rq_size;
- rsrc->cq_size = info->rq_size + info->sq_size;
- rsrc->buf_size = info->buf_size;
- rsrc->dev = dev;
- rsrc->vsi = vsi;
-
- ret = i40iw_puda_cq_create(rsrc);
- if (!ret) {
- rsrc->completion = PUDA_CQ_CREATED;
- ret = i40iw_puda_qp_create(rsrc);
- }
- if (ret) {
- i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error qp_create\n", __func__);
- goto error;
- }
- rsrc->completion = PUDA_QP_CREATED;
-
- ret = i40iw_puda_allocbufs(rsrc, info->tx_buf_cnt + info->rq_size);
- if (ret) {
- i40iw_debug(dev, I40IW_DEBUG_PUDA, "[%s] error allloc_buf\n", __func__);
- goto error;
- }
-
- rsrc->rxq_invalid_cnt = info->rq_size;
- ret = i40iw_puda_replenish_rq(rsrc, true);
- if (ret)
- goto error;
-
- if (info->type == I40IW_PUDA_RSRC_TYPE_IEQ) {
- if (!i40iw_init_hash_desc(&rsrc->hash_desc)) {
- rsrc->check_crc = true;
- rsrc->completion = PUDA_HASH_CRC_COMPLETE;
- ret = 0;
- }
- }
-
- dev->ccq_ops->ccq_arm(&rsrc->cq);
- return ret;
- error:
- i40iw_puda_dele_resources(vsi, info->type, false);
-
- return ret;
-}
-
-/**
- * i40iw_ilq_putback_rcvbuf - ilq buffer to put back on rq
- * @qp: ilq's qp resource
- * @wqe_idx: wqe index of completed rcvbuf
- */
-static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx)
-{
- u64 *wqe;
- u64 offset24;
-
- wqe = qp->qp_uk.rq_base[wqe_idx].elem;
- get_64bit_val(wqe, 24, &offset24);
- offset24 = (offset24) ? 0 : LS_64(1, I40IWQPSQ_VALID);
- set_64bit_val(wqe, 24, offset24);
-}
-
-/**
- * i40iw_ieq_get_fpdu - given length return fpdu length
- * @length: length if fpdu
- */
-static u16 i40iw_ieq_get_fpdu_length(u16 length)
-{
- u16 fpdu_len;
-
- fpdu_len = length + I40IW_IEQ_MPA_FRAMING;
- fpdu_len = (fpdu_len + 3) & 0xfffffffc;
- return fpdu_len;
-}
-
-/**
- * i40iw_ieq_copy_to_txbuf - copydata from rcv buf to tx buf
- * @buf: rcv buffer with partial
- * @txbuf: tx buffer for sendign back
- * @buf_offset: rcv buffer offset to copy from
- * @txbuf_offset: at offset in tx buf to copy
- * @length: length of data to copy
- */
-static void i40iw_ieq_copy_to_txbuf(struct i40iw_puda_buf *buf,
- struct i40iw_puda_buf *txbuf,
- u16 buf_offset, u32 txbuf_offset,
- u32 length)
-{
- void *mem1 = (u8 *)buf->mem.va + buf_offset;
- void *mem2 = (u8 *)txbuf->mem.va + txbuf_offset;
-
- memcpy(mem2, mem1, length);
-}
-
-/**
- * i40iw_ieq_setup_tx_buf - setup tx buffer for partial handling
- * @buf: reeive buffer with partial
- * @txbuf: buffer to prepare
- */
-static void i40iw_ieq_setup_tx_buf(struct i40iw_puda_buf *buf,
- struct i40iw_puda_buf *txbuf)
-{
- txbuf->maclen = buf->maclen;
- txbuf->tcphlen = buf->tcphlen;
- txbuf->ipv4 = buf->ipv4;
- txbuf->hdrlen = buf->hdrlen;
- i40iw_ieq_copy_to_txbuf(buf, txbuf, 0, 0, buf->hdrlen);
-}
-
-/**
- * i40iw_ieq_check_first_buf - check if rcv buffer's seq is in range
- * @buf: receive exception buffer
- * @fps: first partial sequence number
- */
-static void i40iw_ieq_check_first_buf(struct i40iw_puda_buf *buf, u32 fps)
-{
- u32 offset;
-
- if (buf->seqnum < fps) {
- offset = fps - buf->seqnum;
- if (offset > buf->datalen)
- return;
- buf->data += offset;
- buf->datalen -= (u16)offset;
- buf->seqnum = fps;
- }
-}
-
-/**
- * i40iw_ieq_compl_pfpdu - write txbuf with full fpdu
- * @ieq: ieq resource
- * @rxlist: ieq's received buffer list
- * @pbufl: temporary list for buffers for fpddu
- * @txbuf: tx buffer for fpdu
- * @fpdu_len: total length of fpdu
- */
-static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq,
- struct list_head *rxlist,
- struct list_head *pbufl,
- struct i40iw_puda_buf *txbuf,
- u16 fpdu_len)
-{
- struct i40iw_puda_buf *buf;
- u32 nextseqnum;
- u16 txoffset, bufoffset;
-
- buf = i40iw_puda_get_listbuf(pbufl);
- if (!buf)
- return;
- nextseqnum = buf->seqnum + fpdu_len;
- txbuf->totallen = buf->hdrlen + fpdu_len;
- txbuf->data = (u8 *)txbuf->mem.va + buf->hdrlen;
- i40iw_ieq_setup_tx_buf(buf, txbuf);
-
- txoffset = buf->hdrlen;
- bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
-
- do {
- if (buf->datalen >= fpdu_len) {
- /* copied full fpdu */
- i40iw_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset, fpdu_len);
- buf->datalen -= fpdu_len;
- buf->data += fpdu_len;
- buf->seqnum = nextseqnum;
- break;
- }
- /* copy partial fpdu */
- i40iw_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset, buf->datalen);
- txoffset += buf->datalen;
- fpdu_len -= buf->datalen;
- i40iw_puda_ret_bufpool(ieq, buf);
- buf = i40iw_puda_get_listbuf(pbufl);
- if (!buf)
- return;
- bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
- } while (1);
-
- /* last buffer on the list*/
- if (buf->datalen)
- list_add(&buf->list, rxlist);
- else
- i40iw_puda_ret_bufpool(ieq, buf);
-}
-
-/**
- * i40iw_ieq_create_pbufl - create buffer list for single fpdu
- * @rxlist: resource list for receive ieq buffes
- * @pbufl: temp. list for buffers for fpddu
- * @buf: first receive buffer
- * @fpdu_len: total length of fpdu
- */
-static enum i40iw_status_code i40iw_ieq_create_pbufl(
- struct i40iw_pfpdu *pfpdu,
- struct list_head *rxlist,
- struct list_head *pbufl,
- struct i40iw_puda_buf *buf,
- u16 fpdu_len)
-{
- enum i40iw_status_code status = 0;
- struct i40iw_puda_buf *nextbuf;
- u32 nextseqnum;
- u16 plen = fpdu_len - buf->datalen;
- bool done = false;
-
- nextseqnum = buf->seqnum + buf->datalen;
- do {
- nextbuf = i40iw_puda_get_listbuf(rxlist);
- if (!nextbuf) {
- status = I40IW_ERR_list_empty;
- break;
- }
- list_add_tail(&nextbuf->list, pbufl);
- if (nextbuf->seqnum != nextseqnum) {
- pfpdu->bad_seq_num++;
- status = I40IW_ERR_SEQ_NUM;
- break;
- }
- if (nextbuf->datalen >= plen) {
- done = true;
- } else {
- plen -= nextbuf->datalen;
- nextseqnum = nextbuf->seqnum + nextbuf->datalen;
- }
-
- } while (!done);
-
- return status;
-}
-
-/**
- * i40iw_ieq_handle_partial - process partial fpdu buffer
- * @ieq: ieq resource
- * @pfpdu: partial management per user qp
- * @buf: receive buffer
- * @fpdu_len: fpdu len in the buffer
- */
-static enum i40iw_status_code i40iw_ieq_handle_partial(struct i40iw_puda_rsrc *ieq,
- struct i40iw_pfpdu *pfpdu,
- struct i40iw_puda_buf *buf,
- u16 fpdu_len)
-{
- enum i40iw_status_code status = 0;
- u8 *crcptr;
- u32 mpacrc;
- u32 seqnum = buf->seqnum;
- struct list_head pbufl; /* partial buffer list */
- struct i40iw_puda_buf *txbuf = NULL;
- struct list_head *rxlist = &pfpdu->rxlist;
-
- INIT_LIST_HEAD(&pbufl);
- list_add(&buf->list, &pbufl);
-
- status = i40iw_ieq_create_pbufl(pfpdu, rxlist, &pbufl, buf, fpdu_len);
- if (status)
- goto error;
-
- txbuf = i40iw_puda_get_bufpool(ieq);
- if (!txbuf) {
- pfpdu->no_tx_bufs++;
- status = I40IW_ERR_NO_TXBUFS;
- goto error;
- }
-
- i40iw_ieq_compl_pfpdu(ieq, rxlist, &pbufl, txbuf, fpdu_len);
- i40iw_ieq_update_tcpip_info(txbuf, fpdu_len, seqnum);
- crcptr = txbuf->data + fpdu_len - 4;
- mpacrc = *(u32 *)crcptr;
- if (ieq->check_crc) {
- status = i40iw_ieq_check_mpacrc(ieq->hash_desc, txbuf->data,
- (fpdu_len - 4), mpacrc);
- if (status) {
- i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
- "%s: error bad crc\n", __func__);
- goto error;
- }
- }
-
- i40iw_debug_buf(ieq->dev, I40IW_DEBUG_IEQ, "IEQ TX BUFFER",
- txbuf->mem.va, txbuf->totallen);
- i40iw_puda_send_buf(ieq, txbuf);
- pfpdu->rcv_nxt = seqnum + fpdu_len;
- return status;
- error:
- while (!list_empty(&pbufl)) {
- buf = (struct i40iw_puda_buf *)(pbufl.prev);
- list_del(&buf->list);
- list_add(&buf->list, rxlist);
- }
- if (txbuf)
- i40iw_puda_ret_bufpool(ieq, txbuf);
- return status;
-}
-
-/**
- * i40iw_ieq_process_buf - process buffer rcvd for ieq
- * @ieq: ieq resource
- * @pfpdu: partial management per user qp
- * @buf: receive buffer
- */
-static enum i40iw_status_code i40iw_ieq_process_buf(struct i40iw_puda_rsrc *ieq,
- struct i40iw_pfpdu *pfpdu,
- struct i40iw_puda_buf *buf)
-{
- u16 fpdu_len = 0;
- u16 datalen = buf->datalen;
- u8 *datap = buf->data;
- u8 *crcptr;
- u16 ioffset = 0;
- u32 mpacrc;
- u32 seqnum = buf->seqnum;
- u16 length = 0;
- u16 full = 0;
- bool partial = false;
- struct i40iw_puda_buf *txbuf;
- struct list_head *rxlist = &pfpdu->rxlist;
- enum i40iw_status_code ret = 0;
- enum i40iw_status_code status = 0;
-
- ioffset = (u16)(buf->data - (u8 *)buf->mem.va);
- while (datalen) {
- fpdu_len = i40iw_ieq_get_fpdu_length(ntohs(*(__be16 *)datap));
- if (fpdu_len > pfpdu->max_fpdu_data) {
- i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
- "%s: error bad fpdu_len\n", __func__);
- status = I40IW_ERR_MPA_CRC;
- list_add(&buf->list, rxlist);
- return status;
- }
-
- if (datalen < fpdu_len) {
- partial = true;
- break;
- }
- crcptr = datap + fpdu_len - 4;
- mpacrc = *(u32 *)crcptr;
- if (ieq->check_crc)
- ret = i40iw_ieq_check_mpacrc(ieq->hash_desc,
- datap, fpdu_len - 4, mpacrc);
- if (ret) {
- status = I40IW_ERR_MPA_CRC;
- list_add(&buf->list, rxlist);
- return status;
- }
- full++;
- pfpdu->fpdu_processed++;
- datap += fpdu_len;
- length += fpdu_len;
- datalen -= fpdu_len;
- }
- if (full) {
- /* copy full pdu's in the txbuf and send them out */
- txbuf = i40iw_puda_get_bufpool(ieq);
- if (!txbuf) {
- pfpdu->no_tx_bufs++;
- status = I40IW_ERR_NO_TXBUFS;
- list_add(&buf->list, rxlist);
- return status;
- }
- /* modify txbuf's buffer header */
- i40iw_ieq_setup_tx_buf(buf, txbuf);
- /* copy full fpdu's to new buffer */
- i40iw_ieq_copy_to_txbuf(buf, txbuf, ioffset, buf->hdrlen,
- length);
- txbuf->totallen = buf->hdrlen + length;
-
- i40iw_ieq_update_tcpip_info(txbuf, length, buf->seqnum);
- i40iw_puda_send_buf(ieq, txbuf);
-
- if (!datalen) {
- pfpdu->rcv_nxt = buf->seqnum + length;
- i40iw_puda_ret_bufpool(ieq, buf);
- return status;
- }
- buf->data = datap;
- buf->seqnum = seqnum + length;
- buf->datalen = datalen;
- pfpdu->rcv_nxt = buf->seqnum;
- }
- if (partial)
- status = i40iw_ieq_handle_partial(ieq, pfpdu, buf, fpdu_len);
-
- return status;
-}
-
-/**
- * i40iw_ieq_process_fpdus - process fpdu's buffers on its list
- * @qp: qp for which partial fpdus
- * @ieq: ieq resource
- */
-static void i40iw_ieq_process_fpdus(struct i40iw_sc_qp *qp,
- struct i40iw_puda_rsrc *ieq)
-{
- struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
- struct list_head *rxlist = &pfpdu->rxlist;
- struct i40iw_puda_buf *buf;
- enum i40iw_status_code status;
-
- do {
- if (list_empty(rxlist))
- break;
- buf = i40iw_puda_get_listbuf(rxlist);
- if (!buf) {
- i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
- "%s: error no buf\n", __func__);
- break;
- }
- if (buf->seqnum != pfpdu->rcv_nxt) {
- /* This could be out of order or missing packet */
- pfpdu->out_of_order++;
- list_add(&buf->list, rxlist);
- break;
- }
- /* keep processing buffers from the head of the list */
- status = i40iw_ieq_process_buf(ieq, pfpdu, buf);
- if (status == I40IW_ERR_MPA_CRC) {
- pfpdu->mpa_crc_err = true;
- while (!list_empty(rxlist)) {
- buf = i40iw_puda_get_listbuf(rxlist);
- i40iw_puda_ret_bufpool(ieq, buf);
- pfpdu->crc_err++;
- }
- /* create CQP for AE */
- i40iw_ieq_mpa_crc_ae(ieq->dev, qp);
- }
- } while (!status);
-}
-
-/**
- * i40iw_ieq_handle_exception - handle qp's exception
- * @ieq: ieq resource
- * @qp: qp receiving excpetion
- * @buf: receive buffer
- */
-static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq,
- struct i40iw_sc_qp *qp,
- struct i40iw_puda_buf *buf)
-{
- struct i40iw_puda_buf *tmpbuf = NULL;
- struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
- u32 *hw_host_ctx = (u32 *)qp->hw_host_ctx;
- u32 rcv_wnd = hw_host_ctx[23];
- /* first partial seq # in q2 */
- u32 fps = qp->q2_buf[16];
- struct list_head *rxlist = &pfpdu->rxlist;
- struct list_head *plist;
-
- pfpdu->total_ieq_bufs++;
-
- if (pfpdu->mpa_crc_err) {
- pfpdu->crc_err++;
- goto error;
- }
- if (pfpdu->mode && (fps != pfpdu->fps)) {
- /* clean up qp as it is new partial sequence */
- i40iw_ieq_cleanup_qp(ieq, qp);
- i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
- "%s: restarting new partial\n", __func__);
- pfpdu->mode = false;
- }
-
- if (!pfpdu->mode) {
- i40iw_debug_buf(ieq->dev, I40IW_DEBUG_IEQ, "Q2 BUFFER", (u64 *)qp->q2_buf, 128);
- /* First_Partial_Sequence_Number check */
- pfpdu->rcv_nxt = fps;
- pfpdu->fps = fps;
- pfpdu->mode = true;
- pfpdu->max_fpdu_data = ieq->vsi->mss;
- pfpdu->pmode_count++;
- INIT_LIST_HEAD(rxlist);
- i40iw_ieq_check_first_buf(buf, fps);
- }
-
- if (!(rcv_wnd >= (buf->seqnum - pfpdu->rcv_nxt))) {
- pfpdu->bad_seq_num++;
- goto error;
- }
-
- if (!list_empty(rxlist)) {
- tmpbuf = (struct i40iw_puda_buf *)rxlist->next;
- plist = &tmpbuf->list;
- while ((struct list_head *)tmpbuf != rxlist) {
- if ((int)(buf->seqnum - tmpbuf->seqnum) < 0)
- break;
- tmpbuf = (struct i40iw_puda_buf *)plist->next;
- }
- /* Insert buf before tmpbuf */
- list_add_tail(&buf->list, &tmpbuf->list);
- } else {
- list_add_tail(&buf->list, rxlist);
- }
- i40iw_ieq_process_fpdus(qp, ieq);
- return;
- error:
- i40iw_puda_ret_bufpool(ieq, buf);
-}
-
-/**
- * i40iw_ieq_receive - received exception buffer
- * @dev: iwarp device
- * @buf: exception buffer received
- */
-static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi,
- struct i40iw_puda_buf *buf)
-{
- struct i40iw_puda_rsrc *ieq = vsi->ieq;
- struct i40iw_sc_qp *qp = NULL;
- u32 wqe_idx = ieq->compl_rxwqe_idx;
-
- qp = i40iw_ieq_get_qp(vsi->dev, buf);
- if (!qp) {
- ieq->stats_bad_qp_id++;
- i40iw_puda_ret_bufpool(ieq, buf);
- } else {
- i40iw_ieq_handle_exception(ieq, qp, buf);
- }
- /*
- * ieq->rx_wqe_idx is used by i40iw_puda_replenish_rq()
- * on which wqe_idx to start replenish rq
- */
- if (!ieq->rxq_invalid_cnt)
- ieq->rx_wqe_idx = wqe_idx;
- ieq->rxq_invalid_cnt++;
-}
-
-/**
- * i40iw_ieq_tx_compl - put back after sending completed exception buffer
- * @vsi: pointer to the vsi structure
- * @sqwrid: pointer to puda buffer
- */
-static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid)
-{
- struct i40iw_puda_rsrc *ieq = vsi->ieq;
- struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)sqwrid;
-
- i40iw_puda_ret_bufpool(ieq, buf);
- if (!list_empty(&ieq->txpend)) {
- buf = i40iw_puda_get_listbuf(&ieq->txpend);
- i40iw_puda_send_buf(ieq, buf);
- }
-}
-
-/**
- * i40iw_ieq_cleanup_qp - qp is being destroyed
- * @ieq: ieq resource
- * @qp: all pending fpdu buffers
- */
-static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp)
-{
- struct i40iw_puda_buf *buf;
- struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
- struct list_head *rxlist = &pfpdu->rxlist;
-
- if (!pfpdu->mode)
- return;
- while (!list_empty(rxlist)) {
- buf = i40iw_puda_get_listbuf(rxlist);
- i40iw_puda_ret_bufpool(ieq, buf);
- }
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.h b/drivers/infiniband/hw/i40iw/i40iw_puda.h
deleted file mode 100644
index dba05ce7d392..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.h
+++ /dev/null
@@ -1,189 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_PUDA_H
-#define I40IW_PUDA_H
-
-#define I40IW_IEQ_MPA_FRAMING 6
-
-struct i40iw_sc_dev;
-struct i40iw_sc_qp;
-struct i40iw_sc_cq;
-
-enum puda_resource_type {
- I40IW_PUDA_RSRC_TYPE_ILQ = 1,
- I40IW_PUDA_RSRC_TYPE_IEQ
-};
-
-enum puda_rsrc_complete {
- PUDA_CQ_CREATED = 1,
- PUDA_QP_CREATED,
- PUDA_TX_COMPLETE,
- PUDA_RX_COMPLETE,
- PUDA_HASH_CRC_COMPLETE
-};
-
-struct i40iw_puda_completion_info {
- struct i40iw_qp_uk *qp;
- u8 q_type;
- u8 vlan_valid;
- u8 l3proto;
- u8 l4proto;
- u16 payload_len;
- u32 compl_error; /* No_err=0, else major and minor err code */
- u32 qp_id;
- u32 wqe_idx;
-};
-
-struct i40iw_puda_send_info {
- u64 paddr; /* Physical address */
- u32 len;
- u8 tcplen;
- u8 maclen;
- bool ipv4;
- bool doloopback;
- void *scratch;
-};
-
-struct i40iw_puda_buf {
- struct list_head list; /* MUST be first entry */
- struct i40iw_dma_mem mem; /* DMA memory for the buffer */
- struct i40iw_puda_buf *next; /* for alloclist in rsrc struct */
- struct i40iw_virt_mem buf_mem; /* Buffer memory for this buffer */
- void *scratch;
- u8 *iph;
- u8 *tcph;
- u8 *data;
- u16 datalen;
- u16 vlan_id;
- u8 tcphlen; /* tcp length in bytes */
- u8 maclen; /* mac length in bytes */
- u32 totallen; /* machlen+iphlen+tcphlen+datalen */
- atomic_t refcount;
- u8 hdrlen;
- bool ipv4;
- u32 seqnum;
-};
-
-struct i40iw_puda_rsrc_info {
- enum puda_resource_type type; /* ILQ or IEQ */
- u32 count;
- u16 pd_id;
- bool ceq_valid;
- u32 cq_id;
- u32 qp_id;
- u32 sq_size;
- u32 rq_size;
- u16 buf_size;
- u16 mss;
- u32 tx_buf_cnt; /* total bufs allocated will be rq_size + tx_buf_cnt */
- void (*receive)(struct i40iw_sc_vsi *, struct i40iw_puda_buf *);
- void (*xmit_complete)(struct i40iw_sc_vsi *, void *);
-};
-
-struct i40iw_puda_rsrc {
- struct i40iw_sc_cq cq;
- struct i40iw_sc_qp qp;
- struct i40iw_sc_pd sc_pd;
- struct i40iw_sc_dev *dev;
- struct i40iw_sc_vsi *vsi;
- struct i40iw_dma_mem cqmem;
- struct i40iw_dma_mem qpmem;
- struct i40iw_virt_mem ilq_mem;
- enum puda_rsrc_complete completion;
- enum puda_resource_type type;
- u16 buf_size; /*buffer must be max datalen + tcpip hdr + mac */
- u16 mss;
- bool ceq_valid;
- u32 cq_id;
- u32 qp_id;
- u32 sq_size;
- u32 rq_size;
- u32 cq_size;
- struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
- u64 *rq_wrid_array;
- u32 compl_rxwqe_idx;
- u32 rx_wqe_idx;
- u32 rxq_invalid_cnt;
- u32 tx_wqe_avail_cnt;
- bool check_crc;
- struct shash_desc *hash_desc;
- struct list_head txpend;
- struct list_head bufpool; /* free buffers pool list for recv and xmit */
- u32 alloc_buf_count;
- u32 avail_buf_count; /* snapshot of currently available buffers */
- spinlock_t bufpool_lock;
- struct i40iw_puda_buf *alloclist;
- void (*receive)(struct i40iw_sc_vsi *, struct i40iw_puda_buf *);
- void (*xmit_complete)(struct i40iw_sc_vsi *, void *);
- /* puda stats */
- u64 stats_buf_alloc_fail;
- u64 stats_pkt_rcvd;
- u64 stats_pkt_sent;
- u64 stats_rcvd_pkt_err;
- u64 stats_sent_pkt_q;
- u64 stats_bad_qp_id;
-};
-
-struct i40iw_puda_buf *i40iw_puda_get_bufpool(struct i40iw_puda_rsrc *rsrc);
-void i40iw_puda_ret_bufpool(struct i40iw_puda_rsrc *rsrc,
- struct i40iw_puda_buf *buf);
-void i40iw_puda_send_buf(struct i40iw_puda_rsrc *rsrc,
- struct i40iw_puda_buf *buf);
-enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp,
- struct i40iw_puda_send_info *info);
-enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi,
- struct i40iw_puda_rsrc_info *info);
-void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi,
- enum puda_resource_type type,
- bool reset);
-enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
- struct i40iw_sc_cq *cq, u32 *compl_err);
-
-struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev,
- struct i40iw_puda_buf *buf);
-enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_info *info,
- struct i40iw_puda_buf *buf);
-enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc,
- void *addr, u32 length, u32 value);
-enum i40iw_status_code i40iw_init_hash_desc(struct shash_desc **desc);
-void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
-void i40iw_free_hash_desc(struct shash_desc *desc);
-void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length,
- u32 seqnum);
-enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
-enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq);
-void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
-void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq);
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_register.h b/drivers/infiniband/hw/i40iw/i40iw_register.h
deleted file mode 100644
index 57768184e251..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_register.h
+++ /dev/null
@@ -1,1030 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_REGISTER_H
-#define I40IW_REGISTER_H
-
-#define I40E_GLGEN_STAT 0x000B612C /* Reset: POR */
-
-#define I40E_PFHMC_PDINV 0x000C0300 /* Reset: PFR */
-#define I40E_PFHMC_PDINV_PMSDIDX_SHIFT 0
-#define I40E_PFHMC_PDINV_PMSDIDX_MASK (0xFFF << I40E_PFHMC_PDINV_PMSDIDX_SHIFT)
-#define I40E_PFHMC_PDINV_PMPDIDX_SHIFT 16
-#define I40E_PFHMC_PDINV_PMPDIDX_MASK (0x1FF << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)
-#define I40E_PFHMC_SDCMD_PMSDWR_SHIFT 31
-#define I40E_PFHMC_SDCMD_PMSDWR_MASK (0x1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT)
-#define I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT 0
-#define I40E_PFHMC_SDDATALOW_PMSDVALID_MASK (0x1 << I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT)
-#define I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT 1
-#define I40E_PFHMC_SDDATALOW_PMSDTYPE_MASK (0x1 << I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT)
-#define I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT 2
-#define I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_MASK (0x3FF << I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT)
-
-#define I40E_PFINT_DYN_CTLN(_INTPF) (0x00034800 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: PFR */
-#define I40E_PFINT_DYN_CTLN_INTENA_SHIFT 0
-#define I40E_PFINT_DYN_CTLN_INTENA_MASK (0x1 << I40E_PFINT_DYN_CTLN_INTENA_SHIFT)
-#define I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT 1
-#define I40E_PFINT_DYN_CTLN_CLEARPBA_MASK (0x1 << I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT)
-#define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3
-#define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK (0x3 << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
-
-#define I40E_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...15 */ /* Reset: VFR */
-#define I40E_GLHMC_VFPDINV(_i) (0x000C8300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
-
-#define I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT 15
-#define I40E_PFHMC_PDINV_PMSDPARTSEL_MASK (0x1 << I40E_PFHMC_PDINV_PMSDPARTSEL_SHIFT)
-#define I40E_GLPCI_LBARCTRL 0x000BE484 /* Reset: POR */
-#define I40E_GLPCI_LBARCTRL_PE_DB_SIZE_SHIFT 4
-#define I40E_GLPCI_LBARCTRL_PE_DB_SIZE_MASK (0x3 << I40E_GLPCI_LBARCTRL_PE_DB_SIZE_SHIFT)
-#define I40E_GLPCI_DREVID 0x0009C480 /* Reset: PCIR */
-#define I40E_GLPCI_DREVID_DEFAULT_REVID_SHIFT 0
-#define I40E_GLPCI_DREVID_DEFAULT_REVID_MASK 0xFF
-
-#define I40E_PFPE_AEQALLOC 0x00131180 /* Reset: PFR */
-#define I40E_PFPE_AEQALLOC_AECOUNT_SHIFT 0
-#define I40E_PFPE_AEQALLOC_AECOUNT_MASK (0xFFFFFFFF << I40E_PFPE_AEQALLOC_AECOUNT_SHIFT)
-#define I40E_PFPE_CCQPHIGH 0x00008200 /* Reset: PFR */
-#define I40E_PFPE_CCQPHIGH_PECCQPHIGH_SHIFT 0
-#define I40E_PFPE_CCQPHIGH_PECCQPHIGH_MASK (0xFFFFFFFF << I40E_PFPE_CCQPHIGH_PECCQPHIGH_SHIFT)
-#define I40E_PFPE_CCQPLOW 0x00008180 /* Reset: PFR */
-#define I40E_PFPE_CCQPLOW_PECCQPLOW_SHIFT 0
-#define I40E_PFPE_CCQPLOW_PECCQPLOW_MASK (0xFFFFFFFF << I40E_PFPE_CCQPLOW_PECCQPLOW_SHIFT)
-#define I40E_PFPE_CCQPSTATUS 0x00008100 /* Reset: PFR */
-#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_SHIFT 0
-#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_MASK (0x1 << I40E_PFPE_CCQPSTATUS_CCQP_DONE_SHIFT)
-#define I40E_PFPE_CCQPSTATUS_HMC_PROFILE_SHIFT 4
-#define I40E_PFPE_CCQPSTATUS_HMC_PROFILE_MASK (0x7 << I40E_PFPE_CCQPSTATUS_HMC_PROFILE_SHIFT)
-#define I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT 16
-#define I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_MASK (0x3F << I40E_PFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT)
-#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_SHIFT 31
-#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_MASK (0x1 << I40E_PFPE_CCQPSTATUS_CCQP_ERR_SHIFT)
-#define I40E_PFPE_CQACK 0x00131100 /* Reset: PFR */
-#define I40E_PFPE_CQACK_PECQID_SHIFT 0
-#define I40E_PFPE_CQACK_PECQID_MASK (0x1FFFF << I40E_PFPE_CQACK_PECQID_SHIFT)
-#define I40E_PFPE_CQARM 0x00131080 /* Reset: PFR */
-#define I40E_PFPE_CQARM_PECQID_SHIFT 0
-#define I40E_PFPE_CQARM_PECQID_MASK (0x1FFFF << I40E_PFPE_CQARM_PECQID_SHIFT)
-#define I40E_PFPE_CQPDB 0x00008000 /* Reset: PFR */
-#define I40E_PFPE_CQPDB_WQHEAD_SHIFT 0
-#define I40E_PFPE_CQPDB_WQHEAD_MASK (0x7FF << I40E_PFPE_CQPDB_WQHEAD_SHIFT)
-#define I40E_PFPE_CQPERRCODES 0x00008880 /* Reset: PFR */
-#define I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT 0
-#define I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_MASK (0xFFFF << I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT)
-#define I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT 16
-#define I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_MASK (0xFFFF << I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT)
-#define I40E_PFPE_CQPTAIL 0x00008080 /* Reset: PFR */
-#define I40E_PFPE_CQPTAIL_WQTAIL_SHIFT 0
-#define I40E_PFPE_CQPTAIL_WQTAIL_MASK (0x7FF << I40E_PFPE_CQPTAIL_WQTAIL_SHIFT)
-#define I40E_PFPE_CQPTAIL_CQP_OP_ERR_SHIFT 31
-#define I40E_PFPE_CQPTAIL_CQP_OP_ERR_MASK (0x1 << I40E_PFPE_CQPTAIL_CQP_OP_ERR_SHIFT)
-#define I40E_PFPE_FLMQ1ALLOCERR 0x00008980 /* Reset: PFR */
-#define I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_SHIFT 0
-#define I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_SHIFT)
-#define I40E_PFPE_FLMXMITALLOCERR 0x00008900 /* Reset: PFR */
-#define I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_SHIFT 0
-#define I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_SHIFT)
-#define I40E_PFPE_IPCONFIG0 0x00008280 /* Reset: PFR */
-#define I40E_PFPE_IPCONFIG0_PEIPID_SHIFT 0
-#define I40E_PFPE_IPCONFIG0_PEIPID_MASK (0xFFFF << I40E_PFPE_IPCONFIG0_PEIPID_SHIFT)
-#define I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT 16
-#define I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_MASK (0x1 << I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT)
-#define I40E_PFPE_MRTEIDXMASK 0x00008600 /* Reset: PFR */
-#define I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT 0
-#define I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_MASK (0x1F << I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT)
-#define I40E_PFPE_RCVUNEXPECTEDERROR 0x00008680 /* Reset: PFR */
-#define I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT 0
-#define I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_MASK (0xFFFFFF << I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT)
-#define I40E_PFPE_TCPNOWTIMER 0x00008580 /* Reset: PFR */
-#define I40E_PFPE_TCPNOWTIMER_TCP_NOW_SHIFT 0
-#define I40E_PFPE_TCPNOWTIMER_TCP_NOW_MASK (0xFFFFFFFF << I40E_PFPE_TCPNOWTIMER_TCP_NOW_SHIFT)
-
-#define I40E_PFPE_WQEALLOC 0x00138C00 /* Reset: PFR */
-#define I40E_PFPE_WQEALLOC_PEQPID_SHIFT 0
-#define I40E_PFPE_WQEALLOC_PEQPID_MASK (0x3FFFF << I40E_PFPE_WQEALLOC_PEQPID_SHIFT)
-#define I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT 20
-#define I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_MASK (0xFFF << I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT)
-
-#define I40E_VFPE_AEQALLOC(_VF) (0x00130C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_AEQALLOC_MAX_INDEX 127
-#define I40E_VFPE_AEQALLOC_AECOUNT_SHIFT 0
-#define I40E_VFPE_AEQALLOC_AECOUNT_MASK (0xFFFFFFFF << I40E_VFPE_AEQALLOC_AECOUNT_SHIFT)
-#define I40E_VFPE_CCQPHIGH(_VF) (0x00001000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CCQPHIGH_MAX_INDEX 127
-#define I40E_VFPE_CCQPHIGH_PECCQPHIGH_SHIFT 0
-#define I40E_VFPE_CCQPHIGH_PECCQPHIGH_MASK (0xFFFFFFFF << I40E_VFPE_CCQPHIGH_PECCQPHIGH_SHIFT)
-#define I40E_VFPE_CCQPLOW(_VF) (0x00000C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CCQPLOW_MAX_INDEX 127
-#define I40E_VFPE_CCQPLOW_PECCQPLOW_SHIFT 0
-#define I40E_VFPE_CCQPLOW_PECCQPLOW_MASK (0xFFFFFFFF << I40E_VFPE_CCQPLOW_PECCQPLOW_SHIFT)
-#define I40E_VFPE_CCQPSTATUS(_VF) (0x00000800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CCQPSTATUS_MAX_INDEX 127
-#define I40E_VFPE_CCQPSTATUS_CCQP_DONE_SHIFT 0
-#define I40E_VFPE_CCQPSTATUS_CCQP_DONE_MASK (0x1 << I40E_VFPE_CCQPSTATUS_CCQP_DONE_SHIFT)
-#define I40E_VFPE_CCQPSTATUS_HMC_PROFILE_SHIFT 4
-#define I40E_VFPE_CCQPSTATUS_HMC_PROFILE_MASK (0x7 << I40E_VFPE_CCQPSTATUS_HMC_PROFILE_SHIFT)
-#define I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT 16
-#define I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_MASK (0x3F << I40E_VFPE_CCQPSTATUS_RDMA_EN_VFS_SHIFT)
-#define I40E_VFPE_CCQPSTATUS_CCQP_ERR_SHIFT 31
-#define I40E_VFPE_CCQPSTATUS_CCQP_ERR_MASK (0x1 << I40E_VFPE_CCQPSTATUS_CCQP_ERR_SHIFT)
-#define I40E_VFPE_CQACK(_VF) (0x00130800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CQACK_MAX_INDEX 127
-#define I40E_VFPE_CQACK_PECQID_SHIFT 0
-#define I40E_VFPE_CQACK_PECQID_MASK (0x1FFFF << I40E_VFPE_CQACK_PECQID_SHIFT)
-#define I40E_VFPE_CQARM(_VF) (0x00130400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CQARM_MAX_INDEX 127
-#define I40E_VFPE_CQARM_PECQID_SHIFT 0
-#define I40E_VFPE_CQARM_PECQID_MASK (0x1FFFF << I40E_VFPE_CQARM_PECQID_SHIFT)
-#define I40E_VFPE_CQPDB(_VF) (0x00000000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CQPDB_MAX_INDEX 127
-#define I40E_VFPE_CQPDB_WQHEAD_SHIFT 0
-#define I40E_VFPE_CQPDB_WQHEAD_MASK (0x7FF << I40E_VFPE_CQPDB_WQHEAD_SHIFT)
-#define I40E_VFPE_CQPERRCODES(_VF) (0x00001800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CQPERRCODES_MAX_INDEX 127
-#define I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT 0
-#define I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT)
-#define I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT 16
-#define I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT)
-#define I40E_VFPE_CQPTAIL(_VF) (0x00000400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_CQPTAIL_MAX_INDEX 127
-#define I40E_VFPE_CQPTAIL_WQTAIL_SHIFT 0
-#define I40E_VFPE_CQPTAIL_WQTAIL_MASK (0x7FF << I40E_VFPE_CQPTAIL_WQTAIL_SHIFT)
-#define I40E_VFPE_CQPTAIL_CQP_OP_ERR_SHIFT 31
-#define I40E_VFPE_CQPTAIL_CQP_OP_ERR_MASK (0x1 << I40E_VFPE_CQPTAIL_CQP_OP_ERR_SHIFT)
-#define I40E_VFPE_IPCONFIG0(_VF) (0x00001400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_IPCONFIG0_MAX_INDEX 127
-#define I40E_VFPE_IPCONFIG0_PEIPID_SHIFT 0
-#define I40E_VFPE_IPCONFIG0_PEIPID_MASK (0xFFFF << I40E_VFPE_IPCONFIG0_PEIPID_SHIFT)
-#define I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT 16
-#define I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_MASK (0x1 << I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT)
-#define I40E_VFPE_MRTEIDXMASK(_VF) (0x00003000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_MRTEIDXMASK_MAX_INDEX 127
-#define I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT 0
-#define I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_MASK (0x1F << I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT)
-#define I40E_VFPE_RCVUNEXPECTEDERROR(_VF) (0x00003400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_RCVUNEXPECTEDERROR_MAX_INDEX 127
-#define I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT 0
-#define I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_MASK (0xFFFFFF << I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT)
-#define I40E_VFPE_TCPNOWTIMER(_VF) (0x00002C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_TCPNOWTIMER_MAX_INDEX 127
-#define I40E_VFPE_TCPNOWTIMER_TCP_NOW_SHIFT 0
-#define I40E_VFPE_TCPNOWTIMER_TCP_NOW_MASK (0xFFFFFFFF << I40E_VFPE_TCPNOWTIMER_TCP_NOW_SHIFT)
-#define I40E_VFPE_WQEALLOC(_VF) (0x00138000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
-#define I40E_VFPE_WQEALLOC_MAX_INDEX 127
-#define I40E_VFPE_WQEALLOC_PEQPID_SHIFT 0
-#define I40E_VFPE_WQEALLOC_PEQPID_MASK (0x3FFFF << I40E_VFPE_WQEALLOC_PEQPID_SHIFT)
-#define I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT 20
-#define I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_MASK (0xFFF << I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT)
-
-#define I40E_GLPE_CPUSTATUS0 0x0000D040 /* Reset: PE_CORER */
-#define I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_SHIFT 0
-#define I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_MASK (0xFFFFFFFF << I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_SHIFT)
-#define I40E_GLPE_CPUSTATUS1 0x0000D044 /* Reset: PE_CORER */
-#define I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_SHIFT 0
-#define I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_MASK (0xFFFFFFFF << I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_SHIFT)
-#define I40E_GLPE_CPUSTATUS2 0x0000D048 /* Reset: PE_CORER */
-#define I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_SHIFT 0
-#define I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_MASK (0xFFFFFFFF << I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_SHIFT)
-#define I40E_GLPE_CPUTRIG0 0x0000D060 /* Reset: PE_CORER */
-#define I40E_GLPE_CPUTRIG0_PECPUTRIG0_SHIFT 0
-#define I40E_GLPE_CPUTRIG0_PECPUTRIG0_MASK (0xFFFF << I40E_GLPE_CPUTRIG0_PECPUTRIG0_SHIFT)
-#define I40E_GLPE_CPUTRIG0_TEPREQUEST0_SHIFT 17
-#define I40E_GLPE_CPUTRIG0_TEPREQUEST0_MASK (0x1 << I40E_GLPE_CPUTRIG0_TEPREQUEST0_SHIFT)
-#define I40E_GLPE_CPUTRIG0_OOPREQUEST0_SHIFT 18
-#define I40E_GLPE_CPUTRIG0_OOPREQUEST0_MASK (0x1 << I40E_GLPE_CPUTRIG0_OOPREQUEST0_SHIFT)
-#define I40E_GLPE_DUAL40_RUPM 0x0000DA04 /* Reset: PE_CORER */
-#define I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_SHIFT 0
-#define I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_MASK (0x1 << I40E_GLPE_DUAL40_RUPM_DUAL_40G_MODE_SHIFT)
-#define I40E_GLPE_PFAEQEDROPCNT(_i) (0x00131440 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
-#define I40E_GLPE_PFAEQEDROPCNT_MAX_INDEX 15
-#define I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_SHIFT 0
-#define I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_MASK (0xFFFF << I40E_GLPE_PFAEQEDROPCNT_AEQEDROPCNT_SHIFT)
-#define I40E_GLPE_PFCEQEDROPCNT(_i) (0x001313C0 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
-#define I40E_GLPE_PFCEQEDROPCNT_MAX_INDEX 15
-#define I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_SHIFT 0
-#define I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_MASK (0xFFFF << I40E_GLPE_PFCEQEDROPCNT_CEQEDROPCNT_SHIFT)
-#define I40E_GLPE_PFCQEDROPCNT(_i) (0x00131340 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
-#define I40E_GLPE_PFCQEDROPCNT_MAX_INDEX 15
-#define I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_SHIFT 0
-#define I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_MASK (0xFFFF << I40E_GLPE_PFCQEDROPCNT_CQEDROPCNT_SHIFT)
-#define I40E_GLPE_RUPM_CQPPOOL 0x0000DACC /* Reset: PE_CORER */
-#define I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_SHIFT 0
-#define I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_MASK (0xFF << I40E_GLPE_RUPM_CQPPOOL_CQPSPADS_SHIFT)
-#define I40E_GLPE_RUPM_FLRPOOL 0x0000DAC4 /* Reset: PE_CORER */
-#define I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_SHIFT 0
-#define I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_MASK (0xFF << I40E_GLPE_RUPM_FLRPOOL_FLRSPADS_SHIFT)
-#define I40E_GLPE_RUPM_GCTL 0x0000DA00 /* Reset: PE_CORER */
-#define I40E_GLPE_RUPM_GCTL_ALLOFFTH_SHIFT 0
-#define I40E_GLPE_RUPM_GCTL_ALLOFFTH_MASK (0xFF << I40E_GLPE_RUPM_GCTL_ALLOFFTH_SHIFT)
-#define I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_SHIFT 26
-#define I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_MASK (0x1 << I40E_GLPE_RUPM_GCTL_RUPM_P0_DIS_SHIFT)
-#define I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_SHIFT 27
-#define I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_MASK (0x1 << I40E_GLPE_RUPM_GCTL_RUPM_P1_DIS_SHIFT)
-#define I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_SHIFT 28
-#define I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_MASK (0x1 << I40E_GLPE_RUPM_GCTL_RUPM_P2_DIS_SHIFT)
-#define I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_SHIFT 29
-#define I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_MASK (0x1 << I40E_GLPE_RUPM_GCTL_RUPM_P3_DIS_SHIFT)
-#define I40E_GLPE_RUPM_GCTL_RUPM_DIS_SHIFT 30
-#define I40E_GLPE_RUPM_GCTL_RUPM_DIS_MASK (0x1 << I40E_GLPE_RUPM_GCTL_RUPM_DIS_SHIFT)
-#define I40E_GLPE_RUPM_GCTL_SWLB_MODE_SHIFT 31
-#define I40E_GLPE_RUPM_GCTL_SWLB_MODE_MASK (0x1 << I40E_GLPE_RUPM_GCTL_SWLB_MODE_SHIFT)
-#define I40E_GLPE_RUPM_PTXPOOL 0x0000DAC8 /* Reset: PE_CORER */
-#define I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_SHIFT 0
-#define I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_MASK (0xFF << I40E_GLPE_RUPM_PTXPOOL_PTXSPADS_SHIFT)
-#define I40E_GLPE_RUPM_PUSHPOOL 0x0000DAC0 /* Reset: PE_CORER */
-#define I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_SHIFT 0
-#define I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_MASK (0xFF << I40E_GLPE_RUPM_PUSHPOOL_PUSHSPADS_SHIFT)
-#define I40E_GLPE_RUPM_TXHOST_EN 0x0000DA08 /* Reset: PE_CORER */
-#define I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_SHIFT 0
-#define I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_MASK (0x1 << I40E_GLPE_RUPM_TXHOST_EN_TXHOST_EN_SHIFT)
-#define I40E_GLPE_VFAEQEDROPCNT(_i) (0x00132540 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
-#define I40E_GLPE_VFAEQEDROPCNT_MAX_INDEX 31
-#define I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_SHIFT 0
-#define I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_MASK (0xFFFF << I40E_GLPE_VFAEQEDROPCNT_AEQEDROPCNT_SHIFT)
-#define I40E_GLPE_VFCEQEDROPCNT(_i) (0x00132440 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
-#define I40E_GLPE_VFCEQEDROPCNT_MAX_INDEX 31
-#define I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_SHIFT 0
-#define I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_MASK (0xFFFF << I40E_GLPE_VFCEQEDROPCNT_CEQEDROPCNT_SHIFT)
-#define I40E_GLPE_VFCQEDROPCNT(_i) (0x00132340 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
-#define I40E_GLPE_VFCQEDROPCNT_MAX_INDEX 31
-#define I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_SHIFT 0
-#define I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_MASK (0xFFFF << I40E_GLPE_VFCQEDROPCNT_CQEDROPCNT_SHIFT)
-#define I40E_GLPE_VFFLMOBJCTRL(_i) (0x0000D400 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPE_VFFLMOBJCTRL_MAX_INDEX 31
-#define I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT 0
-#define I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_MASK (0x7 << I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT)
-#define I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT 8
-#define I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_MASK (0x7 << I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT)
-#define I40E_GLPE_VFFLMQ1ALLOCERR(_i) (0x0000C700 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPE_VFFLMQ1ALLOCERR_MAX_INDEX 31
-#define I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_SHIFT 0
-#define I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_SHIFT)
-#define I40E_GLPE_VFFLMXMITALLOCERR(_i) (0x0000C600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPE_VFFLMXMITALLOCERR_MAX_INDEX 31
-#define I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_SHIFT 0
-#define I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_SHIFT)
-#define I40E_GLPE_VFUDACTRL(_i) (0x0000C000 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPE_VFUDACTRL_MAX_INDEX 31
-#define I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_SHIFT 0
-#define I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_SHIFT)
-#define I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_SHIFT 1
-#define I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_SHIFT)
-#define I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_SHIFT 2
-#define I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_SHIFT)
-#define I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_SHIFT 3
-#define I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_SHIFT)
-#define I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_SHIFT 4
-#define I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_MASK (0x1 << I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_SHIFT)
-#define I40E_GLPE_VFUDAUCFBQPN(_i) (0x0000C100 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPE_VFUDAUCFBQPN_MAX_INDEX 31
-#define I40E_GLPE_VFUDAUCFBQPN_QPN_SHIFT 0
-#define I40E_GLPE_VFUDAUCFBQPN_QPN_MASK (0x3FFFF << I40E_GLPE_VFUDAUCFBQPN_QPN_SHIFT)
-#define I40E_GLPE_VFUDAUCFBQPN_VALID_SHIFT 31
-#define I40E_GLPE_VFUDAUCFBQPN_VALID_MASK (0x1 << I40E_GLPE_VFUDAUCFBQPN_VALID_SHIFT)
-
-#define I40E_GLPES_PFIP4RXDISCARD(_i) (0x00010600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXDISCARD_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_SHIFT 0
-#define I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_SHIFT)
-#define I40E_GLPES_PFIP4RXFRAGSHI(_i) (0x00010804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXFRAGSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT 0
-#define I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT)
-#define I40E_GLPES_PFIP4RXFRAGSLO(_i) (0x00010800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXFRAGSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT 0
-#define I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT)
-#define I40E_GLPES_PFIP4RXMCOCTSHI(_i) (0x00010A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXMCOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP4RXMCOCTSLO(_i) (0x00010A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXMCOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP4RXMCPKTSHI(_i) (0x00010C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXMCPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP4RXMCPKTSLO(_i) (0x00010C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXMCPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT)
-#define I40E_GLPES_PFIP4RXOCTSHI(_i) (0x00010204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP4RXOCTSLO(_i) (0x00010200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP4RXPKTSHI(_i) (0x00010404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP4RXPKTSLO(_i) (0x00010400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT)
-#define I40E_GLPES_PFIP4RXTRUNC(_i) (0x00010700 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4RXTRUNC_MAX_INDEX 15
-#define I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_SHIFT 0
-#define I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_SHIFT)
-#define I40E_GLPES_PFIP4TXFRAGSHI(_i) (0x00011E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXFRAGSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT 0
-#define I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT)
-#define I40E_GLPES_PFIP4TXFRAGSLO(_i) (0x00011E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXFRAGSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT 0
-#define I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT)
-#define I40E_GLPES_PFIP4TXMCOCTSHI(_i) (0x00012004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXMCOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP4TXMCOCTSLO(_i) (0x00012000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXMCOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP4TXMCPKTSHI(_i) (0x00012204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXMCPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP4TXMCPKTSLO(_i) (0x00012200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXMCPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT)
-#define I40E_GLPES_PFIP4TXNOROUTE(_i) (0x00012E00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXNOROUTE_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT 0
-#define I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT)
-#define I40E_GLPES_PFIP4TXOCTSHI(_i) (0x00011A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP4TXOCTSLO(_i) (0x00011A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP4TXPKTSHI(_i) (0x00011C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP4TXPKTSLO(_i) (0x00011C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP4TXPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT)
-#define I40E_GLPES_PFIP6RXDISCARD(_i) (0x00011200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXDISCARD_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_SHIFT 0
-#define I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_SHIFT)
-#define I40E_GLPES_PFIP6RXFRAGSHI(_i) (0x00011404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXFRAGSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT 0
-#define I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT)
-#define I40E_GLPES_PFIP6RXFRAGSLO(_i) (0x00011400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXFRAGSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT 0
-#define I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT)
-#define I40E_GLPES_PFIP6RXMCOCTSHI(_i) (0x00011604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXMCOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP6RXMCOCTSLO(_i) (0x00011600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXMCOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP6RXMCPKTSHI(_i) (0x00011804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXMCPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP6RXMCPKTSLO(_i) (0x00011800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXMCPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT)
-#define I40E_GLPES_PFIP6RXOCTSHI(_i) (0x00010E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP6RXOCTSLO(_i) (0x00010E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP6RXPKTSHI(_i) (0x00011004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP6RXPKTSLO(_i) (0x00011000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT)
-#define I40E_GLPES_PFIP6RXTRUNC(_i) (0x00011300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6RXTRUNC_MAX_INDEX 15
-#define I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_SHIFT 0
-#define I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_SHIFT)
-#define I40E_GLPES_PFIP6TXFRAGSHI(_i) (0x00012804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXFRAGSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT 0
-#define I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT)
-#define I40E_GLPES_PFIP6TXFRAGSLO(_i) (0x00012800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXFRAGSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT 0
-#define I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT)
-#define I40E_GLPES_PFIP6TXMCOCTSHI(_i) (0x00012A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXMCOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP6TXMCOCTSLO(_i) (0x00012A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXMCOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP6TXMCPKTSHI(_i) (0x00012C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXMCPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP6TXMCPKTSLO(_i) (0x00012C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXMCPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT)
-#define I40E_GLPES_PFIP6TXNOROUTE(_i) (0x00012F00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXNOROUTE_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT 0
-#define I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT)
-#define I40E_GLPES_PFIP6TXOCTSHI(_i) (0x00012404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXOCTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT)
-#define I40E_GLPES_PFIP6TXOCTSLO(_i) (0x00012400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXOCTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT)
-#define I40E_GLPES_PFIP6TXPKTSHI(_i) (0x00012604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT 0
-#define I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT)
-#define I40E_GLPES_PFIP6TXPKTSLO(_i) (0x00012600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFIP6TXPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT 0
-#define I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT)
-#define I40E_GLPES_PFRDMARXRDSHI(_i) (0x00013E04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMARXRDSHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_SHIFT 0
-#define I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_SHIFT)
-#define I40E_GLPES_PFRDMARXRDSLO(_i) (0x00013E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMARXRDSLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_SHIFT 0
-#define I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_SHIFT)
-#define I40E_GLPES_PFRDMARXSNDSHI(_i) (0x00014004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMARXSNDSHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT 0
-#define I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT)
-#define I40E_GLPES_PFRDMARXSNDSLO(_i) (0x00014000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMARXSNDSLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT 0
-#define I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT)
-#define I40E_GLPES_PFRDMARXWRSHI(_i) (0x00013C04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMARXWRSHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_SHIFT 0
-#define I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_SHIFT)
-#define I40E_GLPES_PFRDMARXWRSLO(_i) (0x00013C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMARXWRSLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_SHIFT 0
-#define I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_SHIFT)
-#define I40E_GLPES_PFRDMATXRDSHI(_i) (0x00014404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMATXRDSHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_SHIFT 0
-#define I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_SHIFT)
-#define I40E_GLPES_PFRDMATXRDSLO(_i) (0x00014400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMATXRDSLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_SHIFT 0
-#define I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_SHIFT)
-#define I40E_GLPES_PFRDMATXSNDSHI(_i) (0x00014604 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMATXSNDSHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT 0
-#define I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT)
-#define I40E_GLPES_PFRDMATXSNDSLO(_i) (0x00014600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMATXSNDSLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT 0
-#define I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT)
-#define I40E_GLPES_PFRDMATXWRSHI(_i) (0x00014204 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMATXWRSHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_SHIFT 0
-#define I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_SHIFT)
-#define I40E_GLPES_PFRDMATXWRSLO(_i) (0x00014200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMATXWRSLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_SHIFT 0
-#define I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_SHIFT)
-#define I40E_GLPES_PFRDMAVBNDHI(_i) (0x00014804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMAVBNDHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_SHIFT 0
-#define I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_SHIFT)
-#define I40E_GLPES_PFRDMAVBNDLO(_i) (0x00014800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMAVBNDLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_SHIFT 0
-#define I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_SHIFT)
-#define I40E_GLPES_PFRDMAVINVHI(_i) (0x00014A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMAVINVHI_MAX_INDEX 15
-#define I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_SHIFT 0
-#define I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_SHIFT)
-#define I40E_GLPES_PFRDMAVINVLO(_i) (0x00014A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRDMAVINVLO_MAX_INDEX 15
-#define I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_SHIFT 0
-#define I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_SHIFT)
-#define I40E_GLPES_PFRXVLANERR(_i) (0x00010000 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFRXVLANERR_MAX_INDEX 15
-#define I40E_GLPES_PFRXVLANERR_RXVLANERR_SHIFT 0
-#define I40E_GLPES_PFRXVLANERR_RXVLANERR_MASK (0xFFFFFF << I40E_GLPES_PFRXVLANERR_RXVLANERR_SHIFT)
-#define I40E_GLPES_PFTCPRTXSEG(_i) (0x00013600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFTCPRTXSEG_MAX_INDEX 15
-#define I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_SHIFT 0
-#define I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_MASK (0xFFFFFFFF << I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_SHIFT)
-#define I40E_GLPES_PFTCPRXOPTERR(_i) (0x00013200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFTCPRXOPTERR_MAX_INDEX 15
-#define I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_SHIFT 0
-#define I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_MASK (0xFFFFFF << I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_SHIFT)
-#define I40E_GLPES_PFTCPRXPROTOERR(_i) (0x00013300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFTCPRXPROTOERR_MAX_INDEX 15
-#define I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT 0
-#define I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_MASK (0xFFFFFF << I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT)
-#define I40E_GLPES_PFTCPRXSEGSHI(_i) (0x00013004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFTCPRXSEGSHI_MAX_INDEX 15
-#define I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT 0
-#define I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_MASK (0xFFFF << I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT)
-#define I40E_GLPES_PFTCPRXSEGSLO(_i) (0x00013000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFTCPRXSEGSLO_MAX_INDEX 15
-#define I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT 0
-#define I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT)
-#define I40E_GLPES_PFTCPTXSEGHI(_i) (0x00013404 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFTCPTXSEGHI_MAX_INDEX 15
-#define I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_SHIFT 0
-#define I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_MASK (0xFFFF << I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_SHIFT)
-#define I40E_GLPES_PFTCPTXSEGLO(_i) (0x00013400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFTCPTXSEGLO_MAX_INDEX 15
-#define I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_SHIFT 0
-#define I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_MASK (0xFFFFFFFF << I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_SHIFT)
-#define I40E_GLPES_PFUDPRXPKTSHI(_i) (0x00013804 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFUDPRXPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT 0
-#define I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT)
-#define I40E_GLPES_PFUDPRXPKTSLO(_i) (0x00013800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFUDPRXPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT 0
-#define I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT)
-#define I40E_GLPES_PFUDPTXPKTSHI(_i) (0x00013A04 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFUDPTXPKTSHI_MAX_INDEX 15
-#define I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT 0
-#define I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT)
-#define I40E_GLPES_PFUDPTXPKTSLO(_i) (0x00013A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
-#define I40E_GLPES_PFUDPTXPKTSLO_MAX_INDEX 15
-#define I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT 0
-#define I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT)
-#define I40E_GLPES_RDMARXMULTFPDUSHI 0x0001E014 /* Reset: PE_CORER */
-#define I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_SHIFT 0
-#define I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_MASK (0xFFFFFF << I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_SHIFT)
-#define I40E_GLPES_RDMARXMULTFPDUSLO 0x0001E010 /* Reset: PE_CORER */
-#define I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_SHIFT 0
-#define I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_SHIFT)
-#define I40E_GLPES_RDMARXOOODDPHI 0x0001E01C /* Reset: PE_CORER */
-#define I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_SHIFT 0
-#define I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_MASK (0xFFFFFF << I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_SHIFT)
-#define I40E_GLPES_RDMARXOOODDPLO 0x0001E018 /* Reset: PE_CORER */
-#define I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_SHIFT 0
-#define I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_SHIFT)
-#define I40E_GLPES_RDMARXOOONOMARK 0x0001E004 /* Reset: PE_CORER */
-#define I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_SHIFT 0
-#define I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_SHIFT)
-#define I40E_GLPES_RDMARXUNALIGN 0x0001E000 /* Reset: PE_CORER */
-#define I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_SHIFT 0
-#define I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_SHIFT)
-#define I40E_GLPES_TCPRXFOURHOLEHI 0x0001E044 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_SHIFT 0
-#define I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_SHIFT)
-#define I40E_GLPES_TCPRXFOURHOLELO 0x0001E040 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_SHIFT 0
-#define I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_SHIFT)
-#define I40E_GLPES_TCPRXONEHOLEHI 0x0001E02C /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_SHIFT 0
-#define I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_SHIFT)
-#define I40E_GLPES_TCPRXONEHOLELO 0x0001E028 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_SHIFT 0
-#define I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_SHIFT)
-#define I40E_GLPES_TCPRXPUREACKHI 0x0001E024 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_SHIFT 0
-#define I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_SHIFT)
-#define I40E_GLPES_TCPRXPUREACKSLO 0x0001E020 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_SHIFT 0
-#define I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_SHIFT)
-#define I40E_GLPES_TCPRXTHREEHOLEHI 0x0001E03C /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_SHIFT 0
-#define I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_SHIFT)
-#define I40E_GLPES_TCPRXTHREEHOLELO 0x0001E038 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_SHIFT 0
-#define I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_SHIFT)
-#define I40E_GLPES_TCPRXTWOHOLEHI 0x0001E034 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_SHIFT 0
-#define I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_SHIFT)
-#define I40E_GLPES_TCPRXTWOHOLELO 0x0001E030 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_SHIFT 0
-#define I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_SHIFT)
-#define I40E_GLPES_TCPTXRETRANSFASTHI 0x0001E04C /* Reset: PE_CORER */
-#define I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_SHIFT 0
-#define I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_MASK (0xFFFFFF << I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_SHIFT)
-#define I40E_GLPES_TCPTXRETRANSFASTLO 0x0001E048 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_SHIFT 0
-#define I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_SHIFT)
-#define I40E_GLPES_TCPTXTOUTSFASTHI 0x0001E054 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_SHIFT 0
-#define I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_MASK (0xFFFFFF << I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_SHIFT)
-#define I40E_GLPES_TCPTXTOUTSFASTLO 0x0001E050 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_SHIFT 0
-#define I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_SHIFT)
-#define I40E_GLPES_TCPTXTOUTSHI 0x0001E05C /* Reset: PE_CORER */
-#define I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_SHIFT 0
-#define I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_MASK (0xFFFFFF << I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_SHIFT)
-#define I40E_GLPES_TCPTXTOUTSLO 0x0001E058 /* Reset: PE_CORER */
-#define I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_SHIFT 0
-#define I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_SHIFT)
-#define I40E_GLPES_VFIP4RXDISCARD(_i) (0x00018600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXDISCARD_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_SHIFT 0
-#define I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_SHIFT)
-#define I40E_GLPES_VFIP4RXFRAGSHI(_i) (0x00018804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXFRAGSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT 0
-#define I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT)
-#define I40E_GLPES_VFIP4RXFRAGSLO(_i) (0x00018800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXFRAGSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT 0
-#define I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT)
-#define I40E_GLPES_VFIP4RXMCOCTSHI(_i) (0x00018A04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXMCOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP4RXMCOCTSLO(_i) (0x00018A00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXMCOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP4RXMCPKTSHI(_i) (0x00018C04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXMCPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP4RXMCPKTSLO(_i) (0x00018C00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXMCPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT)
-#define I40E_GLPES_VFIP4RXOCTSHI(_i) (0x00018204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP4RXOCTSLO(_i) (0x00018200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP4RXPKTSHI(_i) (0x00018404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP4RXPKTSLO(_i) (0x00018400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT)
-#define I40E_GLPES_VFIP4RXTRUNC(_i) (0x00018700 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4RXTRUNC_MAX_INDEX 31
-#define I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_SHIFT 0
-#define I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_SHIFT)
-#define I40E_GLPES_VFIP4TXFRAGSHI(_i) (0x00019E04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXFRAGSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT 0
-#define I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT)
-#define I40E_GLPES_VFIP4TXFRAGSLO(_i) (0x00019E00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXFRAGSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT 0
-#define I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT)
-#define I40E_GLPES_VFIP4TXMCOCTSHI(_i) (0x0001A004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXMCOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP4TXMCOCTSLO(_i) (0x0001A000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXMCOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP4TXMCPKTSHI(_i) (0x0001A204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXMCPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP4TXMCPKTSLO(_i) (0x0001A200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXMCPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT)
-#define I40E_GLPES_VFIP4TXNOROUTE(_i) (0x0001AE00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXNOROUTE_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT 0
-#define I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT)
-#define I40E_GLPES_VFIP4TXOCTSHI(_i) (0x00019A04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP4TXOCTSLO(_i) (0x00019A00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP4TXPKTSHI(_i) (0x00019C04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP4TXPKTSLO(_i) (0x00019C00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP4TXPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT)
-#define I40E_GLPES_VFIP6RXDISCARD(_i) (0x00019200 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXDISCARD_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_SHIFT 0
-#define I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_SHIFT)
-#define I40E_GLPES_VFIP6RXFRAGSHI(_i) (0x00019404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXFRAGSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT 0
-#define I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT)
-#define I40E_GLPES_VFIP6RXFRAGSLO(_i) (0x00019400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXFRAGSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT 0
-#define I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT)
-#define I40E_GLPES_VFIP6RXMCOCTSHI(_i) (0x00019604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXMCOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP6RXMCOCTSLO(_i) (0x00019600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXMCOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP6RXMCPKTSHI(_i) (0x00019804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXMCPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP6RXMCPKTSLO(_i) (0x00019800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXMCPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT)
-#define I40E_GLPES_VFIP6RXOCTSHI(_i) (0x00018E04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP6RXOCTSLO(_i) (0x00018E00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP6RXPKTSHI(_i) (0x00019004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP6RXPKTSLO(_i) (0x00019000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT)
-#define I40E_GLPES_VFIP6RXTRUNC(_i) (0x00019300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6RXTRUNC_MAX_INDEX 31
-#define I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_SHIFT 0
-#define I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_SHIFT)
-#define I40E_GLPES_VFIP6TXFRAGSHI(_i) (0x0001A804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXFRAGSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT 0
-#define I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT)
-#define I40E_GLPES_VFIP6TXFRAGSLO(_i) (0x0001A800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXFRAGSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT 0
-#define I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT)
-#define I40E_GLPES_VFIP6TXMCOCTSHI(_i) (0x0001AA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXMCOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP6TXMCOCTSLO(_i) (0x0001AA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXMCOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP6TXMCPKTSHI(_i) (0x0001AC04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXMCPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP6TXMCPKTSLO(_i) (0x0001AC00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXMCPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT)
-#define I40E_GLPES_VFIP6TXNOROUTE(_i) (0x0001AF00 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXNOROUTE_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT 0
-#define I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT)
-#define I40E_GLPES_VFIP6TXOCTSHI(_i) (0x0001A404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXOCTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT)
-#define I40E_GLPES_VFIP6TXOCTSLO(_i) (0x0001A400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXOCTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT)
-#define I40E_GLPES_VFIP6TXPKTSHI(_i) (0x0001A604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT 0
-#define I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT)
-#define I40E_GLPES_VFIP6TXPKTSLO(_i) (0x0001A600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFIP6TXPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT 0
-#define I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT)
-#define I40E_GLPES_VFRDMARXRDSHI(_i) (0x0001BE04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMARXRDSHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_SHIFT 0
-#define I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_SHIFT)
-#define I40E_GLPES_VFRDMARXRDSLO(_i) (0x0001BE00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMARXRDSLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_SHIFT 0
-#define I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_SHIFT)
-#define I40E_GLPES_VFRDMARXSNDSHI(_i) (0x0001C004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMARXSNDSHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT 0
-#define I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT)
-#define I40E_GLPES_VFRDMARXSNDSLO(_i) (0x0001C000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMARXSNDSLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT 0
-#define I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT)
-#define I40E_GLPES_VFRDMARXWRSHI(_i) (0x0001BC04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMARXWRSHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_SHIFT 0
-#define I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_SHIFT)
-#define I40E_GLPES_VFRDMARXWRSLO(_i) (0x0001BC00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMARXWRSLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_SHIFT 0
-#define I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_SHIFT)
-#define I40E_GLPES_VFRDMATXRDSHI(_i) (0x0001C404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMATXRDSHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_SHIFT 0
-#define I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_SHIFT)
-#define I40E_GLPES_VFRDMATXRDSLO(_i) (0x0001C400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMATXRDSLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_SHIFT 0
-#define I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_SHIFT)
-#define I40E_GLPES_VFRDMATXSNDSHI(_i) (0x0001C604 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMATXSNDSHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT 0
-#define I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT)
-#define I40E_GLPES_VFRDMATXSNDSLO(_i) (0x0001C600 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMATXSNDSLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT 0
-#define I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT)
-#define I40E_GLPES_VFRDMATXWRSHI(_i) (0x0001C204 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMATXWRSHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_SHIFT 0
-#define I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_SHIFT)
-#define I40E_GLPES_VFRDMATXWRSLO(_i) (0x0001C200 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMATXWRSLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_SHIFT 0
-#define I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_SHIFT)
-#define I40E_GLPES_VFRDMAVBNDHI(_i) (0x0001C804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMAVBNDHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_SHIFT 0
-#define I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_SHIFT)
-#define I40E_GLPES_VFRDMAVBNDLO(_i) (0x0001C800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMAVBNDLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_SHIFT 0
-#define I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_SHIFT)
-#define I40E_GLPES_VFRDMAVINVHI(_i) (0x0001CA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMAVINVHI_MAX_INDEX 31
-#define I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_SHIFT 0
-#define I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_SHIFT)
-#define I40E_GLPES_VFRDMAVINVLO(_i) (0x0001CA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRDMAVINVLO_MAX_INDEX 31
-#define I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_SHIFT 0
-#define I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_SHIFT)
-#define I40E_GLPES_VFRXVLANERR(_i) (0x00018000 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFRXVLANERR_MAX_INDEX 31
-#define I40E_GLPES_VFRXVLANERR_RXVLANERR_SHIFT 0
-#define I40E_GLPES_VFRXVLANERR_RXVLANERR_MASK (0xFFFFFF << I40E_GLPES_VFRXVLANERR_RXVLANERR_SHIFT)
-#define I40E_GLPES_VFTCPRTXSEG(_i) (0x0001B600 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFTCPRTXSEG_MAX_INDEX 31
-#define I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_SHIFT 0
-#define I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_MASK (0xFFFFFFFF << I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_SHIFT)
-#define I40E_GLPES_VFTCPRXOPTERR(_i) (0x0001B200 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFTCPRXOPTERR_MAX_INDEX 31
-#define I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_SHIFT 0
-#define I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_MASK (0xFFFFFF << I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_SHIFT)
-#define I40E_GLPES_VFTCPRXPROTOERR(_i) (0x0001B300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFTCPRXPROTOERR_MAX_INDEX 31
-#define I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT 0
-#define I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_MASK (0xFFFFFF << I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT)
-#define I40E_GLPES_VFTCPRXSEGSHI(_i) (0x0001B004 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFTCPRXSEGSHI_MAX_INDEX 31
-#define I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT 0
-#define I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_MASK (0xFFFF << I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT)
-#define I40E_GLPES_VFTCPRXSEGSLO(_i) (0x0001B000 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFTCPRXSEGSLO_MAX_INDEX 31
-#define I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT 0
-#define I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT)
-#define I40E_GLPES_VFTCPTXSEGHI(_i) (0x0001B404 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFTCPTXSEGHI_MAX_INDEX 31
-#define I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_SHIFT 0
-#define I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_MASK (0xFFFF << I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_SHIFT)
-#define I40E_GLPES_VFTCPTXSEGLO(_i) (0x0001B400 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFTCPTXSEGLO_MAX_INDEX 31
-#define I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_SHIFT 0
-#define I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_MASK (0xFFFFFFFF << I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_SHIFT)
-#define I40E_GLPES_VFUDPRXPKTSHI(_i) (0x0001B804 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFUDPRXPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT 0
-#define I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT)
-#define I40E_GLPES_VFUDPRXPKTSLO(_i) (0x0001B800 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFUDPRXPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT 0
-#define I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT)
-#define I40E_GLPES_VFUDPTXPKTSHI(_i) (0x0001BA04 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFUDPTXPKTSHI_MAX_INDEX 31
-#define I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT 0
-#define I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT)
-#define I40E_GLPES_VFUDPTXPKTSLO(_i) (0x0001BA00 + ((_i) * 8)) /* _i=0...31 */ /* Reset: PE_CORER */
-#define I40E_GLPES_VFUDPTXPKTSLO_MAX_INDEX 31
-#define I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT 0
-#define I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT)
-
-#define I40E_VFPE_AEQALLOC1 0x0000A400 /* Reset: VFR */
-#define I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT 0
-#define I40E_VFPE_AEQALLOC1_AECOUNT_MASK (0xFFFFFFFF << I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT)
-#define I40E_VFPE_CCQPHIGH1 0x00009800 /* Reset: VFR */
-#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT 0
-#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_MASK (0xFFFFFFFF << I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT)
-#define I40E_VFPE_CCQPLOW1 0x0000AC00 /* Reset: VFR */
-#define I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT 0
-#define I40E_VFPE_CCQPLOW1_PECCQPLOW_MASK (0xFFFFFFFF << I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT)
-#define I40E_VFPE_CCQPSTATUS1 0x0000B800 /* Reset: VFR */
-#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT 0
-#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_MASK (0x1 << I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT)
-#define I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_SHIFT 4
-#define I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_MASK (0x7 << I40E_VFPE_CCQPSTATUS1_HMC_PROFILE_SHIFT)
-#define I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_SHIFT 16
-#define I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_MASK (0x3F << I40E_VFPE_CCQPSTATUS1_RDMA_EN_VFS_SHIFT)
-#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT 31
-#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_MASK (0x1 << I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT)
-#define I40E_VFPE_CQACK1 0x0000B000 /* Reset: VFR */
-#define I40E_VFPE_CQACK1_PECQID_SHIFT 0
-#define I40E_VFPE_CQACK1_PECQID_MASK (0x1FFFF << I40E_VFPE_CQACK1_PECQID_SHIFT)
-#define I40E_VFPE_CQARM1 0x0000B400 /* Reset: VFR */
-#define I40E_VFPE_CQARM1_PECQID_SHIFT 0
-#define I40E_VFPE_CQARM1_PECQID_MASK (0x1FFFF << I40E_VFPE_CQARM1_PECQID_SHIFT)
-#define I40E_VFPE_CQPDB1 0x0000BC00 /* Reset: VFR */
-#define I40E_VFPE_CQPDB1_WQHEAD_SHIFT 0
-#define I40E_VFPE_CQPDB1_WQHEAD_MASK (0x7FF << I40E_VFPE_CQPDB1_WQHEAD_SHIFT)
-#define I40E_VFPE_CQPERRCODES1 0x00009C00 /* Reset: VFR */
-#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT 0
-#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT)
-#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT 16
-#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT)
-#define I40E_VFPE_CQPTAIL1 0x0000A000 /* Reset: VFR */
-#define I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT 0
-#define I40E_VFPE_CQPTAIL1_WQTAIL_MASK (0x7FF << I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT)
-#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT 31
-#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_MASK (0x1 << I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT)
-#define I40E_VFPE_IPCONFIG01 0x00008C00 /* Reset: VFR */
-#define I40E_VFPE_IPCONFIG01_PEIPID_SHIFT 0
-#define I40E_VFPE_IPCONFIG01_PEIPID_MASK (0xFFFF << I40E_VFPE_IPCONFIG01_PEIPID_SHIFT)
-#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT 16
-#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_MASK (0x1 << I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT)
-#define I40E_VFPE_MRTEIDXMASK1 0x00009000 /* Reset: VFR */
-#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT 0
-#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_MASK (0x1F << I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT)
-#define I40E_VFPE_RCVUNEXPECTEDERROR1 0x00009400 /* Reset: VFR */
-#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT 0
-#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_MASK (0xFFFFFF << I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT)
-#define I40E_VFPE_TCPNOWTIMER1 0x0000A800 /* Reset: VFR */
-#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT 0
-#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_MASK (0xFFFFFFFF << I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT)
-#define I40E_VFPE_WQEALLOC1 0x0000C000 /* Reset: VFR */
-#define I40E_VFPE_WQEALLOC1_PEQPID_SHIFT 0
-#define I40E_VFPE_WQEALLOC1_PEQPID_MASK (0x3FFFF << I40E_VFPE_WQEALLOC1_PEQPID_SHIFT)
-#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT 20
-#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_MASK (0xFFF << I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT)
-#endif /* I40IW_REGISTER_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h
deleted file mode 100644
index 91c421762f06..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_status.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_STATUS_H
-#define I40IW_STATUS_H
-
-/* Error Codes */
-enum i40iw_status_code {
- I40IW_SUCCESS = 0,
- I40IW_ERR_NVM = -1,
- I40IW_ERR_NVM_CHECKSUM = -2,
- I40IW_ERR_CONFIG = -4,
- I40IW_ERR_PARAM = -5,
- I40IW_ERR_DEVICE_NOT_SUPPORTED = -6,
- I40IW_ERR_RESET_FAILED = -7,
- I40IW_ERR_SWFW_SYNC = -8,
- I40IW_ERR_NO_MEMORY = -9,
- I40IW_ERR_BAD_PTR = -10,
- I40IW_ERR_INVALID_PD_ID = -11,
- I40IW_ERR_INVALID_QP_ID = -12,
- I40IW_ERR_INVALID_CQ_ID = -13,
- I40IW_ERR_INVALID_CEQ_ID = -14,
- I40IW_ERR_INVALID_AEQ_ID = -15,
- I40IW_ERR_INVALID_SIZE = -16,
- I40IW_ERR_INVALID_ARP_INDEX = -17,
- I40IW_ERR_INVALID_FPM_FUNC_ID = -18,
- I40IW_ERR_QP_INVALID_MSG_SIZE = -19,
- I40IW_ERR_QP_TOOMANY_WRS_POSTED = -20,
- I40IW_ERR_INVALID_FRAG_COUNT = -21,
- I40IW_ERR_QUEUE_EMPTY = -22,
- I40IW_ERR_INVALID_ALIGNMENT = -23,
- I40IW_ERR_FLUSHED_QUEUE = -24,
- I40IW_ERR_INVALID_PUSH_PAGE_INDEX = -25,
- I40IW_ERR_INVALID_IMM_DATA_SIZE = -26,
- I40IW_ERR_TIMEOUT = -27,
- I40IW_ERR_OPCODE_MISMATCH = -28,
- I40IW_ERR_CQP_COMPL_ERROR = -29,
- I40IW_ERR_INVALID_VF_ID = -30,
- I40IW_ERR_INVALID_HMCFN_ID = -31,
- I40IW_ERR_BACKING_PAGE_ERROR = -32,
- I40IW_ERR_NO_PBLCHUNKS_AVAILABLE = -33,
- I40IW_ERR_INVALID_PBLE_INDEX = -34,
- I40IW_ERR_INVALID_SD_INDEX = -35,
- I40IW_ERR_INVALID_PAGE_DESC_INDEX = -36,
- I40IW_ERR_INVALID_SD_TYPE = -37,
- I40IW_ERR_MEMCPY_FAILED = -38,
- I40IW_ERR_INVALID_HMC_OBJ_INDEX = -39,
- I40IW_ERR_INVALID_HMC_OBJ_COUNT = -40,
- I40IW_ERR_INVALID_SRQ_ARM_LIMIT = -41,
- I40IW_ERR_SRQ_ENABLED = -42,
- I40IW_ERR_BUF_TOO_SHORT = -43,
- I40IW_ERR_BAD_IWARP_CQE = -44,
- I40IW_ERR_NVM_BLANK_MODE = -45,
- I40IW_ERR_NOT_IMPLEMENTED = -46,
- I40IW_ERR_PE_DOORBELL_NOT_ENABLED = -47,
- I40IW_ERR_NOT_READY = -48,
- I40IW_NOT_SUPPORTED = -49,
- I40IW_ERR_FIRMWARE_API_VERSION = -50,
- I40IW_ERR_RING_FULL = -51,
- I40IW_ERR_MPA_CRC = -61,
- I40IW_ERR_NO_TXBUFS = -62,
- I40IW_ERR_SEQ_NUM = -63,
- I40IW_ERR_list_empty = -64,
- I40IW_ERR_INVALID_MAC_ADDR = -65,
- I40IW_ERR_BAD_STAG = -66,
- I40IW_ERR_CQ_COMPL_ERROR = -67,
- I40IW_ERR_QUEUE_DESTROYED = -68
-
-};
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
deleted file mode 100644
index 7b76259752b0..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ /dev/null
@@ -1,1350 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_TYPE_H
-#define I40IW_TYPE_H
-#include "i40iw_user.h"
-#include "i40iw_hmc.h"
-#include "i40iw_vf.h"
-#include "i40iw_virtchnl.h"
-
-struct i40iw_cqp_sq_wqe {
- u64 buf[I40IW_CQP_WQE_SIZE];
-};
-
-struct i40iw_sc_aeqe {
- u64 buf[I40IW_AEQE_SIZE];
-};
-
-struct i40iw_ceqe {
- u64 buf[I40IW_CEQE_SIZE];
-};
-
-struct i40iw_cqp_ctx {
- u64 buf[I40IW_CQP_CTX_SIZE];
-};
-
-struct i40iw_cq_shadow_area {
- u64 buf[I40IW_SHADOW_AREA_SIZE];
-};
-
-struct i40iw_sc_dev;
-struct i40iw_hmc_info;
-struct i40iw_vsi_pestat;
-
-struct i40iw_cqp_ops;
-struct i40iw_ccq_ops;
-struct i40iw_ceq_ops;
-struct i40iw_aeq_ops;
-struct i40iw_mr_ops;
-struct i40iw_cqp_misc_ops;
-struct i40iw_pd_ops;
-struct i40iw_priv_qp_ops;
-struct i40iw_priv_cq_ops;
-struct i40iw_hmc_ops;
-
-enum i40iw_page_size {
- I40IW_PAGE_SIZE_4K,
- I40IW_PAGE_SIZE_2M
-};
-
-enum i40iw_resource_indicator_type {
- I40IW_RSRC_INDICATOR_TYPE_ADAPTER = 0,
- I40IW_RSRC_INDICATOR_TYPE_CQ,
- I40IW_RSRC_INDICATOR_TYPE_QP,
- I40IW_RSRC_INDICATOR_TYPE_SRQ
-};
-
-enum i40iw_hdrct_flags {
- DDP_LEN_FLAG = 0x80,
- DDP_HDR_FLAG = 0x40,
- RDMA_HDR_FLAG = 0x20
-};
-
-enum i40iw_term_layers {
- LAYER_RDMA = 0,
- LAYER_DDP = 1,
- LAYER_MPA = 2
-};
-
-enum i40iw_term_error_types {
- RDMAP_REMOTE_PROT = 1,
- RDMAP_REMOTE_OP = 2,
- DDP_CATASTROPHIC = 0,
- DDP_TAGGED_BUFFER = 1,
- DDP_UNTAGGED_BUFFER = 2,
- DDP_LLP = 3
-};
-
-enum i40iw_term_rdma_errors {
- RDMAP_INV_STAG = 0x00,
- RDMAP_INV_BOUNDS = 0x01,
- RDMAP_ACCESS = 0x02,
- RDMAP_UNASSOC_STAG = 0x03,
- RDMAP_TO_WRAP = 0x04,
- RDMAP_INV_RDMAP_VER = 0x05,
- RDMAP_UNEXPECTED_OP = 0x06,
- RDMAP_CATASTROPHIC_LOCAL = 0x07,
- RDMAP_CATASTROPHIC_GLOBAL = 0x08,
- RDMAP_CANT_INV_STAG = 0x09,
- RDMAP_UNSPECIFIED = 0xff
-};
-
-enum i40iw_term_ddp_errors {
- DDP_CATASTROPHIC_LOCAL = 0x00,
- DDP_TAGGED_INV_STAG = 0x00,
- DDP_TAGGED_BOUNDS = 0x01,
- DDP_TAGGED_UNASSOC_STAG = 0x02,
- DDP_TAGGED_TO_WRAP = 0x03,
- DDP_TAGGED_INV_DDP_VER = 0x04,
- DDP_UNTAGGED_INV_QN = 0x01,
- DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02,
- DDP_UNTAGGED_INV_MSN_RANGE = 0x03,
- DDP_UNTAGGED_INV_MO = 0x04,
- DDP_UNTAGGED_INV_TOO_LONG = 0x05,
- DDP_UNTAGGED_INV_DDP_VER = 0x06
-};
-
-enum i40iw_term_mpa_errors {
- MPA_CLOSED = 0x01,
- MPA_CRC = 0x02,
- MPA_MARKER = 0x03,
- MPA_REQ_RSP = 0x04,
-};
-
-enum i40iw_flush_opcode {
- FLUSH_INVALID = 0,
- FLUSH_PROT_ERR,
- FLUSH_REM_ACCESS_ERR,
- FLUSH_LOC_QP_OP_ERR,
- FLUSH_REM_OP_ERR,
- FLUSH_LOC_LEN_ERR,
- FLUSH_GENERAL_ERR,
- FLUSH_FATAL_ERR
-};
-
-enum i40iw_term_eventtypes {
- TERM_EVENT_QP_FATAL,
- TERM_EVENT_QP_ACCESS_ERR
-};
-
-struct i40iw_terminate_hdr {
- u8 layer_etype;
- u8 error_code;
- u8 hdrct;
- u8 rsvd;
-};
-
-enum i40iw_debug_flag {
- I40IW_DEBUG_NONE = 0x00000000,
- I40IW_DEBUG_ERR = 0x00000001,
- I40IW_DEBUG_INIT = 0x00000002,
- I40IW_DEBUG_DEV = 0x00000004,
- I40IW_DEBUG_CM = 0x00000008,
- I40IW_DEBUG_VERBS = 0x00000010,
- I40IW_DEBUG_PUDA = 0x00000020,
- I40IW_DEBUG_ILQ = 0x00000040,
- I40IW_DEBUG_IEQ = 0x00000080,
- I40IW_DEBUG_QP = 0x00000100,
- I40IW_DEBUG_CQ = 0x00000200,
- I40IW_DEBUG_MR = 0x00000400,
- I40IW_DEBUG_PBLE = 0x00000800,
- I40IW_DEBUG_WQE = 0x00001000,
- I40IW_DEBUG_AEQ = 0x00002000,
- I40IW_DEBUG_CQP = 0x00004000,
- I40IW_DEBUG_HMC = 0x00008000,
- I40IW_DEBUG_USER = 0x00010000,
- I40IW_DEBUG_VIRT = 0x00020000,
- I40IW_DEBUG_DCB = 0x00040000,
- I40IW_DEBUG_CQE = 0x00800000,
- I40IW_DEBUG_ALL = 0xFFFFFFFF
-};
-
-enum i40iw_hw_stats_index_32b {
- I40IW_HW_STAT_INDEX_IP4RXDISCARD = 0,
- I40IW_HW_STAT_INDEX_IP4RXTRUNC,
- I40IW_HW_STAT_INDEX_IP4TXNOROUTE,
- I40IW_HW_STAT_INDEX_IP6RXDISCARD,
- I40IW_HW_STAT_INDEX_IP6RXTRUNC,
- I40IW_HW_STAT_INDEX_IP6TXNOROUTE,
- I40IW_HW_STAT_INDEX_TCPRTXSEG,
- I40IW_HW_STAT_INDEX_TCPRXOPTERR,
- I40IW_HW_STAT_INDEX_TCPRXPROTOERR,
- I40IW_HW_STAT_INDEX_MAX_32
-};
-
-enum i40iw_hw_stats_index_64b {
- I40IW_HW_STAT_INDEX_IP4RXOCTS = 0,
- I40IW_HW_STAT_INDEX_IP4RXPKTS,
- I40IW_HW_STAT_INDEX_IP4RXFRAGS,
- I40IW_HW_STAT_INDEX_IP4RXMCPKTS,
- I40IW_HW_STAT_INDEX_IP4TXOCTS,
- I40IW_HW_STAT_INDEX_IP4TXPKTS,
- I40IW_HW_STAT_INDEX_IP4TXFRAGS,
- I40IW_HW_STAT_INDEX_IP4TXMCPKTS,
- I40IW_HW_STAT_INDEX_IP6RXOCTS,
- I40IW_HW_STAT_INDEX_IP6RXPKTS,
- I40IW_HW_STAT_INDEX_IP6RXFRAGS,
- I40IW_HW_STAT_INDEX_IP6RXMCPKTS,
- I40IW_HW_STAT_INDEX_IP6TXOCTS,
- I40IW_HW_STAT_INDEX_IP6TXPKTS,
- I40IW_HW_STAT_INDEX_IP6TXFRAGS,
- I40IW_HW_STAT_INDEX_IP6TXMCPKTS,
- I40IW_HW_STAT_INDEX_TCPRXSEGS,
- I40IW_HW_STAT_INDEX_TCPTXSEG,
- I40IW_HW_STAT_INDEX_RDMARXRDS,
- I40IW_HW_STAT_INDEX_RDMARXSNDS,
- I40IW_HW_STAT_INDEX_RDMARXWRS,
- I40IW_HW_STAT_INDEX_RDMATXRDS,
- I40IW_HW_STAT_INDEX_RDMATXSNDS,
- I40IW_HW_STAT_INDEX_RDMATXWRS,
- I40IW_HW_STAT_INDEX_RDMAVBND,
- I40IW_HW_STAT_INDEX_RDMAVINV,
- I40IW_HW_STAT_INDEX_MAX_64
-};
-
-struct i40iw_dev_hw_stats_offsets {
- u32 stats_offset_32[I40IW_HW_STAT_INDEX_MAX_32];
- u32 stats_offset_64[I40IW_HW_STAT_INDEX_MAX_64];
-};
-
-struct i40iw_dev_hw_stats {
- u64 stats_value_32[I40IW_HW_STAT_INDEX_MAX_32];
- u64 stats_value_64[I40IW_HW_STAT_INDEX_MAX_64];
-};
-
-struct i40iw_vsi_pestat {
- struct i40iw_hw *hw;
- struct i40iw_dev_hw_stats hw_stats;
- struct i40iw_dev_hw_stats last_read_hw_stats;
- struct i40iw_dev_hw_stats_offsets hw_stats_offsets;
- struct timer_list stats_timer;
- spinlock_t lock; /* rdma stats lock */
-};
-
-struct i40iw_hw {
- u8 __iomem *hw_addr;
- void *dev_context;
- struct i40iw_hmc_info hmc;
-};
-
-struct i40iw_pfpdu {
- struct list_head rxlist;
- u32 rcv_nxt;
- u32 fps;
- u32 max_fpdu_data;
- bool mode;
- bool mpa_crc_err;
- u64 total_ieq_bufs;
- u64 fpdu_processed;
- u64 bad_seq_num;
- u64 crc_err;
- u64 no_tx_bufs;
- u64 tx_err;
- u64 out_of_order;
- u64 pmode_count;
-};
-
-struct i40iw_sc_pd {
- u32 size;
- struct i40iw_sc_dev *dev;
- u16 pd_id;
- int abi_ver;
-};
-
-struct i40iw_cqp_quanta {
- u64 elem[I40IW_CQP_WQE_SIZE];
-};
-
-struct i40iw_sc_cqp {
- u32 size;
- u64 sq_pa;
- u64 host_ctx_pa;
- void *back_cqp;
- struct i40iw_sc_dev *dev;
- enum i40iw_status_code (*process_cqp_sds)(struct i40iw_sc_dev *,
- struct i40iw_update_sds_info *);
- struct i40iw_dma_mem sdbuf;
- struct i40iw_ring sq_ring;
- struct i40iw_cqp_quanta *sq_base;
- u64 *host_ctx;
- u64 *scratch_array;
- u32 cqp_id;
- u32 sq_size;
- u32 hw_sq_size;
- u8 struct_ver;
- u8 polarity;
- bool en_datacenter_tcp;
- u8 hmc_profile;
- u8 enabled_vf_count;
- u8 timeout_count;
-};
-
-struct i40iw_sc_aeq {
- u32 size;
- u64 aeq_elem_pa;
- struct i40iw_sc_dev *dev;
- struct i40iw_sc_aeqe *aeqe_base;
- void *pbl_list;
- u32 elem_cnt;
- struct i40iw_ring aeq_ring;
- bool virtual_map;
- u8 pbl_chunk_size;
- u32 first_pm_pbl_idx;
- u8 polarity;
-};
-
-struct i40iw_sc_ceq {
- u32 size;
- u64 ceq_elem_pa;
- struct i40iw_sc_dev *dev;
- struct i40iw_ceqe *ceqe_base;
- void *pbl_list;
- u32 ceq_id;
- u32 elem_cnt;
- struct i40iw_ring ceq_ring;
- bool virtual_map;
- u8 pbl_chunk_size;
- bool tph_en;
- u8 tph_val;
- u32 first_pm_pbl_idx;
- u8 polarity;
-};
-
-struct i40iw_sc_cq {
- struct i40iw_cq_uk cq_uk;
- u64 cq_pa;
- u64 shadow_area_pa;
- struct i40iw_sc_dev *dev;
- struct i40iw_sc_vsi *vsi;
- void *pbl_list;
- void *back_cq;
- u32 ceq_id;
- u32 shadow_read_threshold;
- bool ceqe_mask;
- bool virtual_map;
- u8 pbl_chunk_size;
- u8 cq_type;
- bool ceq_id_valid;
- bool tph_en;
- u8 tph_val;
- u32 first_pm_pbl_idx;
- bool check_overflow;
-};
-
-struct i40iw_sc_qp {
- struct i40iw_qp_uk qp_uk;
- u64 sq_pa;
- u64 rq_pa;
- u64 hw_host_ctx_pa;
- u64 shadow_area_pa;
- u64 q2_pa;
- struct i40iw_sc_dev *dev;
- struct i40iw_sc_vsi *vsi;
- struct i40iw_sc_pd *pd;
- u64 *hw_host_ctx;
- void *llp_stream_handle;
- void *back_qp;
- struct i40iw_pfpdu pfpdu;
- u8 *q2_buf;
- u64 qp_compl_ctx;
- u16 qs_handle;
- u16 exception_lan_queue;
- u16 push_idx;
- u8 sq_tph_val;
- u8 rq_tph_val;
- u8 qp_state;
- u8 qp_type;
- u8 hw_sq_size;
- u8 hw_rq_size;
- u8 src_mac_addr_idx;
- bool sq_tph_en;
- bool rq_tph_en;
- bool rcv_tph_en;
- bool xmit_tph_en;
- bool virtual_map;
- bool flush_sq;
- bool flush_rq;
- u8 user_pri;
- struct list_head list;
- bool on_qoslist;
- bool sq_flush;
- enum i40iw_flush_opcode flush_code;
- enum i40iw_term_eventtypes eventtype;
- u8 term_flags;
-};
-
-struct i40iw_hmc_fpm_misc {
- u32 max_ceqs;
- u32 max_sds;
- u32 xf_block_size;
- u32 q1_block_size;
- u32 ht_multiplier;
- u32 timer_bucket;
-};
-
-struct i40iw_vchnl_if {
- enum i40iw_status_code (*vchnl_recv)(struct i40iw_sc_dev *, u32, u8 *, u16);
- enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *dev, u32, u8 *, u16);
-};
-
-#define I40IW_VCHNL_MAX_VF_MSG_SIZE 512
-
-struct i40iw_vchnl_vf_msg_buffer {
- struct i40iw_virtchnl_op_buf vchnl_msg;
- char parm_buffer[I40IW_VCHNL_MAX_VF_MSG_SIZE - 1];
-};
-
-struct i40iw_qos {
- struct list_head qplist;
- spinlock_t lock; /* qos list */
- u16 qs_handle;
-};
-
-struct i40iw_vfdev {
- struct i40iw_sc_dev *pf_dev;
- u8 *hmc_info_mem;
- struct i40iw_vsi_pestat pestat;
- struct i40iw_hmc_pble_info *pble_info;
- struct i40iw_hmc_info hmc_info;
- struct i40iw_vchnl_vf_msg_buffer vf_msg_buffer;
- u64 fpm_query_buf_pa;
- u64 *fpm_query_buf;
- u32 vf_id;
- u32 msg_count;
- bool pf_hmc_initialized;
- u16 pmf_index;
- u16 iw_vf_idx; /* VF Device table index */
- bool stats_initialized;
-};
-
-#define I40IW_INVALID_FCN_ID 0xff
-struct i40iw_sc_vsi {
- struct i40iw_sc_dev *dev;
- void *back_vsi; /* Owned by OS */
- u32 ilq_count;
- struct i40iw_virt_mem ilq_mem;
- struct i40iw_puda_rsrc *ilq;
- u32 ieq_count;
- struct i40iw_virt_mem ieq_mem;
- struct i40iw_puda_rsrc *ieq;
- u16 mss;
- u8 fcn_id;
- bool stats_fcn_id_alloc;
- struct i40iw_qos qos[I40IW_MAX_USER_PRIORITY];
- struct i40iw_vsi_pestat *pestat;
-};
-
-struct i40iw_sc_dev {
- struct list_head cqp_cmd_head; /* head of the CQP command list */
- spinlock_t cqp_lock; /* cqp list sync */
- struct i40iw_dev_uk dev_uk;
- bool fcn_id_array[I40IW_MAX_STATS_COUNT];
- struct i40iw_dma_mem vf_fpm_query_buf[I40IW_MAX_PE_ENABLED_VF_COUNT];
- u64 fpm_query_buf_pa;
- u64 fpm_commit_buf_pa;
- u64 *fpm_query_buf;
- u64 *fpm_commit_buf;
- void *back_dev;
- struct i40iw_hw *hw;
- u8 __iomem *db_addr;
- struct i40iw_hmc_info *hmc_info;
- struct i40iw_hmc_pble_info *pble_info;
- struct i40iw_vfdev *vf_dev[I40IW_MAX_PE_ENABLED_VF_COUNT];
- struct i40iw_sc_cqp *cqp;
- struct i40iw_sc_aeq *aeq;
- struct i40iw_sc_ceq *ceq[I40IW_CEQ_MAX_COUNT];
- struct i40iw_sc_cq *ccq;
- struct i40iw_cqp_ops *cqp_ops;
- struct i40iw_ccq_ops *ccq_ops;
- struct i40iw_ceq_ops *ceq_ops;
- struct i40iw_aeq_ops *aeq_ops;
- struct i40iw_pd_ops *iw_pd_ops;
- struct i40iw_priv_qp_ops *iw_priv_qp_ops;
- struct i40iw_priv_cq_ops *iw_priv_cq_ops;
- struct i40iw_mr_ops *mr_ops;
- struct i40iw_cqp_misc_ops *cqp_misc_ops;
- struct i40iw_hmc_ops *hmc_ops;
- struct i40iw_vchnl_if vchnl_if;
- const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops;
-
- struct i40iw_hmc_fpm_misc hmc_fpm_misc;
- u32 debug_mask;
- u16 exception_lan_queue;
- u8 hmc_fn_id;
- bool is_pf;
- bool vchnl_up;
- u8 vf_id;
- wait_queue_head_t vf_reqs;
- u64 cqp_cmd_stats[OP_SIZE_CQP_STAT_ARRAY];
- struct i40iw_vchnl_vf_msg_buffer vchnl_vf_msg_buf;
- u8 hw_rev;
-};
-
-struct i40iw_modify_cq_info {
- u64 cq_pa;
- struct i40iw_cqe *cq_base;
- void *pbl_list;
- u32 ceq_id;
- u32 cq_size;
- u32 shadow_read_threshold;
- bool virtual_map;
- u8 pbl_chunk_size;
- bool check_overflow;
- bool cq_resize;
- bool ceq_change;
- bool check_overflow_change;
- u32 first_pm_pbl_idx;
- bool ceq_valid;
-};
-
-struct i40iw_create_qp_info {
- u8 next_iwarp_state;
- bool ord_valid;
- bool tcp_ctx_valid;
- bool cq_num_valid;
- bool static_rsrc;
- bool arp_cache_idx_valid;
-};
-
-struct i40iw_modify_qp_info {
- u64 rx_win0;
- u64 rx_win1;
- u16 new_mss;
- u8 next_iwarp_state;
- u8 termlen;
- bool ord_valid;
- bool tcp_ctx_valid;
- bool cq_num_valid;
- bool static_rsrc;
- bool arp_cache_idx_valid;
- bool reset_tcp_conn;
- bool remove_hash_idx;
- bool dont_send_term;
- bool dont_send_fin;
- bool cached_var_valid;
- bool mss_change;
- bool force_loopback;
-};
-
-struct i40iw_ccq_cqe_info {
- struct i40iw_sc_cqp *cqp;
- u64 scratch;
- u32 op_ret_val;
- u16 maj_err_code;
- u16 min_err_code;
- u8 op_code;
- bool error;
-};
-
-struct i40iw_l2params {
- u16 qs_handle_list[I40IW_MAX_USER_PRIORITY];
- u16 mss;
-};
-
-struct i40iw_vsi_init_info {
- struct i40iw_sc_dev *dev;
- void *back_vsi;
- struct i40iw_l2params *params;
-};
-
-struct i40iw_vsi_stats_info {
- struct i40iw_vsi_pestat *pestat;
- u8 fcn_id;
- bool alloc_fcn_id;
- bool stats_initialize;
-};
-
-struct i40iw_device_init_info {
- u64 fpm_query_buf_pa;
- u64 fpm_commit_buf_pa;
- u64 *fpm_query_buf;
- u64 *fpm_commit_buf;
- struct i40iw_hw *hw;
- void __iomem *bar0;
- enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *, u32, u8 *, u16);
- u16 exception_lan_queue;
- u8 hmc_fn_id;
- bool is_pf;
- u32 debug_mask;
-};
-
-enum i40iw_cqp_hmc_profile {
- I40IW_HMC_PROFILE_DEFAULT = 1,
- I40IW_HMC_PROFILE_FAVOR_VF = 2,
- I40IW_HMC_PROFILE_EQUAL = 3,
-};
-
-struct i40iw_cqp_init_info {
- u64 cqp_compl_ctx;
- u64 host_ctx_pa;
- u64 sq_pa;
- struct i40iw_sc_dev *dev;
- struct i40iw_cqp_quanta *sq;
- u64 *host_ctx;
- u64 *scratch_array;
- u32 sq_size;
- u8 struct_ver;
- bool en_datacenter_tcp;
- u8 hmc_profile;
- u8 enabled_vf_count;
-};
-
-struct i40iw_ceq_init_info {
- u64 ceqe_pa;
- struct i40iw_sc_dev *dev;
- u64 *ceqe_base;
- void *pbl_list;
- u32 elem_cnt;
- u32 ceq_id;
- bool virtual_map;
- u8 pbl_chunk_size;
- bool tph_en;
- u8 tph_val;
- u32 first_pm_pbl_idx;
-};
-
-struct i40iw_aeq_init_info {
- u64 aeq_elem_pa;
- struct i40iw_sc_dev *dev;
- u32 *aeqe_base;
- void *pbl_list;
- u32 elem_cnt;
- bool virtual_map;
- u8 pbl_chunk_size;
- u32 first_pm_pbl_idx;
-};
-
-struct i40iw_ccq_init_info {
- u64 cq_pa;
- u64 shadow_area_pa;
- struct i40iw_sc_dev *dev;
- struct i40iw_cqe *cq_base;
- u64 *shadow_area;
- void *pbl_list;
- u32 num_elem;
- u32 ceq_id;
- u32 shadow_read_threshold;
- bool ceqe_mask;
- bool ceq_id_valid;
- bool tph_en;
- u8 tph_val;
- bool avoid_mem_cflct;
- bool virtual_map;
- u8 pbl_chunk_size;
- u32 first_pm_pbl_idx;
-};
-
-struct i40iwarp_offload_info {
- u16 rcv_mark_offset;
- u16 snd_mark_offset;
- u16 pd_id;
- u8 ddp_ver;
- u8 rdmap_ver;
- u8 ord_size;
- u8 ird_size;
- bool wr_rdresp_en;
- bool rd_enable;
- bool snd_mark_en;
- bool rcv_mark_en;
- bool bind_en;
- bool fast_reg_en;
- bool priv_mode_en;
- bool lsmm_present;
- u8 iwarp_mode;
- bool align_hdrs;
- bool rcv_no_mpa_crc;
-
- u8 last_byte_sent;
-};
-
-struct i40iw_tcp_offload_info {
- bool ipv4;
- bool no_nagle;
- bool insert_vlan_tag;
- bool time_stamp;
- u8 cwnd_inc_limit;
- bool drop_ooo_seg;
- u8 dup_ack_thresh;
- u8 ttl;
- u8 src_mac_addr_idx;
- bool avoid_stretch_ack;
- u8 tos;
- u16 src_port;
- u16 dst_port;
- u32 dest_ip_addr0;
- u32 dest_ip_addr1;
- u32 dest_ip_addr2;
- u32 dest_ip_addr3;
- u32 snd_mss;
- u16 vlan_tag;
- u16 arp_idx;
- u32 flow_label;
- bool wscale;
- u8 tcp_state;
- u8 snd_wscale;
- u8 rcv_wscale;
- u32 time_stamp_recent;
- u32 time_stamp_age;
- u32 snd_nxt;
- u32 snd_wnd;
- u32 rcv_nxt;
- u32 rcv_wnd;
- u32 snd_max;
- u32 snd_una;
- u32 srtt;
- u32 rtt_var;
- u32 ss_thresh;
- u32 cwnd;
- u32 snd_wl1;
- u32 snd_wl2;
- u32 max_snd_window;
- u8 rexmit_thresh;
- u32 local_ipaddr0;
- u32 local_ipaddr1;
- u32 local_ipaddr2;
- u32 local_ipaddr3;
- bool ignore_tcp_opt;
- bool ignore_tcp_uns_opt;
-};
-
-struct i40iw_qp_host_ctx_info {
- u64 qp_compl_ctx;
- struct i40iw_tcp_offload_info *tcp_info;
- struct i40iwarp_offload_info *iwarp_info;
- u32 send_cq_num;
- u32 rcv_cq_num;
- u16 push_idx;
- bool push_mode_en;
- bool tcp_info_valid;
- bool iwarp_info_valid;
- bool err_rq_idx_valid;
- u16 err_rq_idx;
- bool add_to_qoslist;
- u8 user_pri;
-};
-
-struct i40iw_aeqe_info {
- u64 compl_ctx;
- u32 qp_cq_id;
- u16 ae_id;
- u16 wqe_idx;
- u8 tcp_state;
- u8 iwarp_state;
- bool qp;
- bool cq;
- bool sq;
- bool in_rdrsp_wr;
- bool out_rdrsp;
- u8 q2_data_written;
- bool aeqe_overflow;
-};
-
-struct i40iw_allocate_stag_info {
- u64 total_len;
- u32 chunk_size;
- u32 stag_idx;
- u32 page_size;
- u16 pd_id;
- u16 access_rights;
- bool remote_access;
- bool use_hmc_fcn_index;
- u8 hmc_fcn_index;
- bool use_pf_rid;
-};
-
-struct i40iw_reg_ns_stag_info {
- u64 reg_addr_pa;
- u64 fbo;
- void *va;
- u64 total_len;
- u32 page_size;
- u32 chunk_size;
- u32 first_pm_pbl_index;
- enum i40iw_addressing_type addr_type;
- i40iw_stag_index stag_idx;
- u16 access_rights;
- u16 pd_id;
- i40iw_stag_key stag_key;
- bool use_hmc_fcn_index;
- u8 hmc_fcn_index;
- bool use_pf_rid;
-};
-
-struct i40iw_fast_reg_stag_info {
- u64 wr_id;
- u64 reg_addr_pa;
- u64 fbo;
- void *va;
- u64 total_len;
- u32 page_size;
- u32 chunk_size;
- u32 first_pm_pbl_index;
- enum i40iw_addressing_type addr_type;
- i40iw_stag_index stag_idx;
- u16 access_rights;
- u16 pd_id;
- i40iw_stag_key stag_key;
- bool local_fence;
- bool read_fence;
- bool signaled;
- bool use_hmc_fcn_index;
- u8 hmc_fcn_index;
- bool use_pf_rid;
- bool defer_flag;
-};
-
-struct i40iw_dealloc_stag_info {
- u32 stag_idx;
- u16 pd_id;
- bool mr;
- bool dealloc_pbl;
-};
-
-struct i40iw_register_shared_stag {
- void *va;
- enum i40iw_addressing_type addr_type;
- i40iw_stag_index new_stag_idx;
- i40iw_stag_index parent_stag_idx;
- u32 access_rights;
- u16 pd_id;
- i40iw_stag_key new_stag_key;
-};
-
-struct i40iw_qp_init_info {
- struct i40iw_qp_uk_init_info qp_uk_init_info;
- struct i40iw_sc_pd *pd;
- struct i40iw_sc_vsi *vsi;
- u64 *host_ctx;
- u8 *q2;
- u64 sq_pa;
- u64 rq_pa;
- u64 host_ctx_pa;
- u64 q2_pa;
- u64 shadow_area_pa;
- int abi_ver;
- u8 sq_tph_val;
- u8 rq_tph_val;
- u8 type;
- bool sq_tph_en;
- bool rq_tph_en;
- bool rcv_tph_en;
- bool xmit_tph_en;
- bool virtual_map;
-};
-
-struct i40iw_cq_init_info {
- struct i40iw_sc_dev *dev;
- u64 cq_base_pa;
- u64 shadow_area_pa;
- u32 ceq_id;
- u32 shadow_read_threshold;
- bool virtual_map;
- bool ceqe_mask;
- u8 pbl_chunk_size;
- u32 first_pm_pbl_idx;
- bool ceq_id_valid;
- bool tph_en;
- u8 tph_val;
- u8 type;
- struct i40iw_cq_uk_init_info cq_uk_init_info;
-};
-
-struct i40iw_upload_context_info {
- u64 buf_pa;
- bool freeze_qp;
- bool raw_format;
- u32 qp_id;
- u8 qp_type;
-};
-
-struct i40iw_add_arp_cache_entry_info {
- u8 mac_addr[6];
- u32 reach_max;
- u16 arp_index;
- bool permanent;
-};
-
-struct i40iw_apbvt_info {
- u16 port;
- bool add;
-};
-
-enum i40iw_quad_entry_type {
- I40IW_QHASH_TYPE_TCP_ESTABLISHED = 1,
- I40IW_QHASH_TYPE_TCP_SYN,
-};
-
-enum i40iw_quad_hash_manage_type {
- I40IW_QHASH_MANAGE_TYPE_DELETE = 0,
- I40IW_QHASH_MANAGE_TYPE_ADD,
- I40IW_QHASH_MANAGE_TYPE_MODIFY
-};
-
-struct i40iw_qhash_table_info {
- struct i40iw_sc_vsi *vsi;
- enum i40iw_quad_hash_manage_type manage;
- enum i40iw_quad_entry_type entry_type;
- bool vlan_valid;
- bool ipv4_valid;
- u8 mac_addr[6];
- u16 vlan_id;
- u8 user_pri;
- u32 qp_num;
- u32 dest_ip[4];
- u32 src_ip[4];
- u16 dest_port;
- u16 src_port;
-};
-
-struct i40iw_local_mac_ipaddr_entry_info {
- u8 mac_addr[6];
- u8 entry_idx;
-};
-
-struct i40iw_cqp_manage_push_page_info {
- u32 push_idx;
- u16 qs_handle;
- u8 free_page;
-};
-
-struct i40iw_qp_flush_info {
- u16 sq_minor_code;
- u16 sq_major_code;
- u16 rq_minor_code;
- u16 rq_major_code;
- u16 ae_code;
- u8 ae_source;
- bool sq;
- bool rq;
- bool userflushcode;
- bool generate_ae;
-};
-
-struct i40iw_cqp_commit_fpm_values {
- u64 qp_base;
- u64 cq_base;
- u32 hte_base;
- u32 arp_base;
- u32 apbvt_inuse_base;
- u32 mr_base;
- u32 xf_base;
- u32 xffl_base;
- u32 q1_base;
- u32 q1fl_base;
- u32 fsimc_base;
- u32 fsiav_base;
- u32 pbl_base;
-
- u32 qp_cnt;
- u32 cq_cnt;
- u32 hte_cnt;
- u32 arp_cnt;
- u32 mr_cnt;
- u32 xf_cnt;
- u32 xffl_cnt;
- u32 q1_cnt;
- u32 q1fl_cnt;
- u32 fsimc_cnt;
- u32 fsiav_cnt;
- u32 pbl_cnt;
-};
-
-struct i40iw_cqp_query_fpm_values {
- u16 first_pe_sd_index;
- u32 qp_objsize;
- u32 cq_objsize;
- u32 hte_objsize;
- u32 arp_objsize;
- u32 mr_objsize;
- u32 xf_objsize;
- u32 q1_objsize;
- u32 fsimc_objsize;
- u32 fsiav_objsize;
-
- u32 qp_max;
- u32 cq_max;
- u32 hte_max;
- u32 arp_max;
- u32 mr_max;
- u32 xf_max;
- u32 xffl_max;
- u32 q1_max;
- u32 q1fl_max;
- u32 fsimc_max;
- u32 fsiav_max;
- u32 pbl_max;
-};
-
-struct i40iw_cqp_ops {
- enum i40iw_status_code (*cqp_init)(struct i40iw_sc_cqp *,
- struct i40iw_cqp_init_info *);
- enum i40iw_status_code (*cqp_create)(struct i40iw_sc_cqp *, u16 *, u16 *);
- void (*cqp_post_sq)(struct i40iw_sc_cqp *);
- u64 *(*cqp_get_next_send_wqe)(struct i40iw_sc_cqp *, u64 scratch);
- enum i40iw_status_code (*cqp_destroy)(struct i40iw_sc_cqp *);
- enum i40iw_status_code (*poll_for_cqp_op_done)(struct i40iw_sc_cqp *, u8,
- struct i40iw_ccq_cqe_info *);
-};
-
-struct i40iw_ccq_ops {
- enum i40iw_status_code (*ccq_init)(struct i40iw_sc_cq *,
- struct i40iw_ccq_init_info *);
- enum i40iw_status_code (*ccq_create)(struct i40iw_sc_cq *, u64, bool, bool);
- enum i40iw_status_code (*ccq_destroy)(struct i40iw_sc_cq *, u64, bool);
- enum i40iw_status_code (*ccq_create_done)(struct i40iw_sc_cq *);
- enum i40iw_status_code (*ccq_get_cqe_info)(struct i40iw_sc_cq *,
- struct i40iw_ccq_cqe_info *);
- void (*ccq_arm)(struct i40iw_sc_cq *);
-};
-
-struct i40iw_ceq_ops {
- enum i40iw_status_code (*ceq_init)(struct i40iw_sc_ceq *,
- struct i40iw_ceq_init_info *);
- enum i40iw_status_code (*ceq_create)(struct i40iw_sc_ceq *, u64, bool);
- enum i40iw_status_code (*cceq_create_done)(struct i40iw_sc_ceq *);
- enum i40iw_status_code (*cceq_destroy_done)(struct i40iw_sc_ceq *);
- enum i40iw_status_code (*cceq_create)(struct i40iw_sc_ceq *, u64);
- enum i40iw_status_code (*ceq_destroy)(struct i40iw_sc_ceq *, u64, bool);
- void *(*process_ceq)(struct i40iw_sc_dev *, struct i40iw_sc_ceq *);
-};
-
-struct i40iw_aeq_ops {
- enum i40iw_status_code (*aeq_init)(struct i40iw_sc_aeq *,
- struct i40iw_aeq_init_info *);
- enum i40iw_status_code (*aeq_create)(struct i40iw_sc_aeq *, u64, bool);
- enum i40iw_status_code (*aeq_destroy)(struct i40iw_sc_aeq *, u64, bool);
- enum i40iw_status_code (*get_next_aeqe)(struct i40iw_sc_aeq *,
- struct i40iw_aeqe_info *);
- enum i40iw_status_code (*repost_aeq_entries)(struct i40iw_sc_dev *, u32);
- enum i40iw_status_code (*aeq_create_done)(struct i40iw_sc_aeq *);
- enum i40iw_status_code (*aeq_destroy_done)(struct i40iw_sc_aeq *);
-};
-
-struct i40iw_pd_ops {
- void (*pd_init)(struct i40iw_sc_dev *, struct i40iw_sc_pd *, u16, int);
-};
-
-struct i40iw_priv_qp_ops {
- enum i40iw_status_code (*qp_init)(struct i40iw_sc_qp *, struct i40iw_qp_init_info *);
- enum i40iw_status_code (*qp_create)(struct i40iw_sc_qp *,
- struct i40iw_create_qp_info *, u64, bool);
- enum i40iw_status_code (*qp_modify)(struct i40iw_sc_qp *,
- struct i40iw_modify_qp_info *, u64, bool);
- enum i40iw_status_code (*qp_destroy)(struct i40iw_sc_qp *, u64, bool, bool, bool);
- enum i40iw_status_code (*qp_flush_wqes)(struct i40iw_sc_qp *,
- struct i40iw_qp_flush_info *, u64, bool);
- enum i40iw_status_code (*qp_upload_context)(struct i40iw_sc_dev *,
- struct i40iw_upload_context_info *,
- u64, bool);
- enum i40iw_status_code (*qp_setctx)(struct i40iw_sc_qp *, u64 *,
- struct i40iw_qp_host_ctx_info *);
-
- void (*qp_send_lsmm)(struct i40iw_sc_qp *, void *, u32, i40iw_stag);
- void (*qp_send_lsmm_nostag)(struct i40iw_sc_qp *, void *, u32);
- void (*qp_send_rtt)(struct i40iw_sc_qp *, bool);
- enum i40iw_status_code (*qp_post_wqe0)(struct i40iw_sc_qp *, u8);
- enum i40iw_status_code (*iw_mr_fast_register)(struct i40iw_sc_qp *,
- struct i40iw_fast_reg_stag_info *,
- bool);
-};
-
-struct i40iw_priv_cq_ops {
- enum i40iw_status_code (*cq_init)(struct i40iw_sc_cq *, struct i40iw_cq_init_info *);
- enum i40iw_status_code (*cq_create)(struct i40iw_sc_cq *, u64, bool, bool);
- enum i40iw_status_code (*cq_destroy)(struct i40iw_sc_cq *, u64, bool);
- enum i40iw_status_code (*cq_modify)(struct i40iw_sc_cq *,
- struct i40iw_modify_cq_info *, u64, bool);
-};
-
-struct i40iw_mr_ops {
- enum i40iw_status_code (*alloc_stag)(struct i40iw_sc_dev *,
- struct i40iw_allocate_stag_info *, u64, bool);
- enum i40iw_status_code (*mr_reg_non_shared)(struct i40iw_sc_dev *,
- struct i40iw_reg_ns_stag_info *,
- u64, bool);
- enum i40iw_status_code (*mr_reg_shared)(struct i40iw_sc_dev *,
- struct i40iw_register_shared_stag *,
- u64, bool);
- enum i40iw_status_code (*dealloc_stag)(struct i40iw_sc_dev *,
- struct i40iw_dealloc_stag_info *,
- u64, bool);
- enum i40iw_status_code (*query_stag)(struct i40iw_sc_dev *, u64, u32, bool);
- enum i40iw_status_code (*mw_alloc)(struct i40iw_sc_dev *, u64, u32, u16, bool);
-};
-
-struct i40iw_cqp_misc_ops {
- enum i40iw_status_code (*manage_push_page)(struct i40iw_sc_cqp *,
- struct i40iw_cqp_manage_push_page_info *,
- u64, bool);
- enum i40iw_status_code (*manage_hmc_pm_func_table)(struct i40iw_sc_cqp *,
- u64, u8, bool, bool);
- enum i40iw_status_code (*set_hmc_resource_profile)(struct i40iw_sc_cqp *,
- u64, u8, u8, bool, bool);
- enum i40iw_status_code (*commit_fpm_values)(struct i40iw_sc_cqp *, u64, u8,
- struct i40iw_dma_mem *, bool, u8);
- enum i40iw_status_code (*query_fpm_values)(struct i40iw_sc_cqp *, u64, u8,
- struct i40iw_dma_mem *, bool, u8);
- enum i40iw_status_code (*static_hmc_pages_allocated)(struct i40iw_sc_cqp *,
- u64, u8, bool, bool);
- enum i40iw_status_code (*add_arp_cache_entry)(struct i40iw_sc_cqp *,
- struct i40iw_add_arp_cache_entry_info *,
- u64, bool);
- enum i40iw_status_code (*del_arp_cache_entry)(struct i40iw_sc_cqp *, u64, u16, bool);
- enum i40iw_status_code (*query_arp_cache_entry)(struct i40iw_sc_cqp *, u64, u16, bool);
- enum i40iw_status_code (*manage_apbvt_entry)(struct i40iw_sc_cqp *,
- struct i40iw_apbvt_info *, u64, bool);
- enum i40iw_status_code (*manage_qhash_table_entry)(struct i40iw_sc_cqp *,
- struct i40iw_qhash_table_info *, u64, bool);
- enum i40iw_status_code (*alloc_local_mac_ipaddr_table_entry)(struct i40iw_sc_cqp *, u64, bool);
- enum i40iw_status_code (*add_local_mac_ipaddr_entry)(struct i40iw_sc_cqp *,
- struct i40iw_local_mac_ipaddr_entry_info *,
- u64, bool);
- enum i40iw_status_code (*del_local_mac_ipaddr_entry)(struct i40iw_sc_cqp *, u64, u8, u8, bool);
- enum i40iw_status_code (*cqp_nop)(struct i40iw_sc_cqp *, u64, bool);
- enum i40iw_status_code (*commit_fpm_values_done)(struct i40iw_sc_cqp
- *);
- enum i40iw_status_code (*query_fpm_values_done)(struct i40iw_sc_cqp *);
- enum i40iw_status_code (*manage_hmc_pm_func_table_done)(struct i40iw_sc_cqp *);
- enum i40iw_status_code (*update_suspend_qp)(struct i40iw_sc_cqp *, struct i40iw_sc_qp *, u64);
- enum i40iw_status_code (*update_resume_qp)(struct i40iw_sc_cqp *, struct i40iw_sc_qp *, u64);
-};
-
-struct i40iw_hmc_ops {
- enum i40iw_status_code (*init_iw_hmc)(struct i40iw_sc_dev *, u8);
- enum i40iw_status_code (*parse_fpm_query_buf)(u64 *, struct i40iw_hmc_info *,
- struct i40iw_hmc_fpm_misc *);
- enum i40iw_status_code (*configure_iw_fpm)(struct i40iw_sc_dev *, u8);
- enum i40iw_status_code (*parse_fpm_commit_buf)(u64 *, struct i40iw_hmc_obj_info *, u32 *sd);
- enum i40iw_status_code (*create_hmc_object)(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_create_obj_info *);
- enum i40iw_status_code (*del_hmc_object)(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_del_obj_info *,
- bool reset);
- enum i40iw_status_code (*pf_init_vfhmc)(struct i40iw_sc_dev *, u8, u32 *);
- enum i40iw_status_code (*vf_configure_vffpm)(struct i40iw_sc_dev *, u32 *);
-};
-
-struct cqp_info {
- union {
- struct {
- struct i40iw_sc_qp *qp;
- struct i40iw_create_qp_info info;
- u64 scratch;
- } qp_create;
-
- struct {
- struct i40iw_sc_qp *qp;
- struct i40iw_modify_qp_info info;
- u64 scratch;
- } qp_modify;
-
- struct {
- struct i40iw_sc_qp *qp;
- u64 scratch;
- bool remove_hash_idx;
- bool ignore_mw_bnd;
- } qp_destroy;
-
- struct {
- struct i40iw_sc_cq *cq;
- u64 scratch;
- bool check_overflow;
- } cq_create;
-
- struct {
- struct i40iw_sc_cq *cq;
- u64 scratch;
- } cq_destroy;
-
- struct {
- struct i40iw_sc_dev *dev;
- struct i40iw_allocate_stag_info info;
- u64 scratch;
- } alloc_stag;
-
- struct {
- struct i40iw_sc_dev *dev;
- u64 scratch;
- u32 mw_stag_index;
- u16 pd_id;
- } mw_alloc;
-
- struct {
- struct i40iw_sc_dev *dev;
- struct i40iw_reg_ns_stag_info info;
- u64 scratch;
- } mr_reg_non_shared;
-
- struct {
- struct i40iw_sc_dev *dev;
- struct i40iw_dealloc_stag_info info;
- u64 scratch;
- } dealloc_stag;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- struct i40iw_local_mac_ipaddr_entry_info info;
- u64 scratch;
- } add_local_mac_ipaddr_entry;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- struct i40iw_add_arp_cache_entry_info info;
- u64 scratch;
- } add_arp_cache_entry;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- u64 scratch;
- u8 entry_idx;
- u8 ignore_ref_count;
- } del_local_mac_ipaddr_entry;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- u64 scratch;
- u16 arp_index;
- } del_arp_cache_entry;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- struct i40iw_manage_vf_pble_info info;
- u64 scratch;
- } manage_vf_pble_bp;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- struct i40iw_cqp_manage_push_page_info info;
- u64 scratch;
- } manage_push_page;
-
- struct {
- struct i40iw_sc_dev *dev;
- struct i40iw_upload_context_info info;
- u64 scratch;
- } qp_upload_context;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- u64 scratch;
- } alloc_local_mac_ipaddr_entry;
-
- struct {
- struct i40iw_sc_dev *dev;
- struct i40iw_hmc_fcn_info info;
- u64 scratch;
- } manage_hmc_pm;
-
- struct {
- struct i40iw_sc_ceq *ceq;
- u64 scratch;
- } ceq_create;
-
- struct {
- struct i40iw_sc_ceq *ceq;
- u64 scratch;
- } ceq_destroy;
-
- struct {
- struct i40iw_sc_aeq *aeq;
- u64 scratch;
- } aeq_create;
-
- struct {
- struct i40iw_sc_aeq *aeq;
- u64 scratch;
- } aeq_destroy;
-
- struct {
- struct i40iw_sc_qp *qp;
- struct i40iw_qp_flush_info info;
- u64 scratch;
- } qp_flush_wqes;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- void *fpm_values_va;
- u64 fpm_values_pa;
- u8 hmc_fn_id;
- u64 scratch;
- } query_fpm_values;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- void *fpm_values_va;
- u64 fpm_values_pa;
- u8 hmc_fn_id;
- u64 scratch;
- } commit_fpm_values;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- struct i40iw_apbvt_info info;
- u64 scratch;
- } manage_apbvt_entry;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- struct i40iw_qhash_table_info info;
- u64 scratch;
- } manage_qhash_table_entry;
-
- struct {
- struct i40iw_sc_dev *dev;
- struct i40iw_update_sds_info info;
- u64 scratch;
- } update_pe_sds;
-
- struct {
- struct i40iw_sc_cqp *cqp;
- struct i40iw_sc_qp *qp;
- u64 scratch;
- } suspend_resume;
- } u;
-};
-
-struct cqp_commands_info {
- struct list_head cqp_cmd_entry;
- u8 cqp_cmd;
- u8 post_sq;
- struct cqp_info in;
-};
-
-struct i40iw_virtchnl_work_info {
- void (*callback_fcn)(void *vf_dev);
- void *worker_vf_dev;
-};
-
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ucontext.h b/drivers/infiniband/hw/i40iw/i40iw_ucontext.h
deleted file mode 100644
index 57d3f1d11ff1..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_ucontext.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2006 - 2016 Intel Corporation. All rights reserved.
- * Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005 Cisco Systems. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef I40IW_USER_CONTEXT_H
-#define I40IW_USER_CONTEXT_H
-
-#include <linux/types.h>
-
-#define I40IW_ABI_VER 5
-
-struct i40iw_alloc_ucontext_req {
- __u32 reserved32;
- __u8 userspace_ver;
- __u8 reserved8[3];
-};
-
-struct i40iw_alloc_ucontext_resp {
- __u32 max_pds; /* maximum pds allowed for this user process */
- __u32 max_qps; /* maximum qps allowed for this user process */
- __u32 wq_size; /* size of the WQs (sq+rq) allocated to the mmaped area */
- __u8 kernel_ver;
- __u8 reserved[3];
-};
-
-struct i40iw_alloc_pd_resp {
- __u32 pd_id;
- __u8 reserved[4];
-};
-
-struct i40iw_create_cq_req {
- __u64 user_cq_buffer;
- __u64 user_shadow_area;
-};
-
-struct i40iw_create_qp_req {
- __u64 user_wqe_buffers;
- __u64 user_compl_ctx;
-
- /* UDA QP PHB */
- __u64 user_sq_phb; /* place for VA of the sq phb buff */
- __u64 user_rq_phb; /* place for VA of the rq phb buff */
-};
-
-enum i40iw_memreg_type {
- IW_MEMREG_TYPE_MEM = 0x0000,
- IW_MEMREG_TYPE_QP = 0x0001,
- IW_MEMREG_TYPE_CQ = 0x0002,
-};
-
-struct i40iw_mem_reg_req {
- __u16 reg_type; /* Memory, QP or CQ */
- __u16 cq_pages;
- __u16 rq_pages;
- __u16 sq_pages;
-};
-
-struct i40iw_create_cq_resp {
- __u32 cq_id;
- __u32 cq_size;
- __u32 mmap_db_index;
- __u32 reserved;
-};
-
-struct i40iw_create_qp_resp {
- __u32 qp_id;
- __u32 actual_sq_size;
- __u32 actual_rq_size;
- __u32 i40iw_drv_opt;
- __u16 push_idx;
- __u8 lsmm;
- __u8 rsvd2;
-};
-
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
deleted file mode 100644
index b0d3a0e8a9b5..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ /dev/null
@@ -1,1198 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include "i40iw_osdep.h"
-#include "i40iw_status.h"
-#include "i40iw_d.h"
-#include "i40iw_user.h"
-#include "i40iw_register.h"
-
-static u32 nop_signature = 0x55550000;
-
-/**
- * i40iw_nop_1 - insert a nop wqe and move head. no post work
- * @qp: hw qp ptr
- */
-static enum i40iw_status_code i40iw_nop_1(struct i40iw_qp_uk *qp)
-{
- u64 header, *wqe;
- u64 *wqe_0 = NULL;
- u32 wqe_idx, peek_head;
- bool signaled = false;
-
- if (!qp->sq_ring.head)
- return I40IW_ERR_PARAM;
-
- wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
- wqe = qp->sq_base[wqe_idx].elem;
-
- qp->sq_wrtrk_array[wqe_idx].wqe_size = I40IW_QP_WQE_MIN_SIZE;
-
- peek_head = (qp->sq_ring.head + 1) % qp->sq_ring.size;
- wqe_0 = qp->sq_base[peek_head].elem;
- if (peek_head)
- wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID);
- else
- wqe_0[3] = LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- set_64bit_val(wqe, 0, 0);
- set_64bit_val(wqe, 8, 0);
- set_64bit_val(wqe, 16, 0);
-
- header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
- LS_64(signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID) | nop_signature++;
-
- wmb(); /* Memory barrier to ensure data is written before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
- return 0;
-}
-
-/**
- * i40iw_qp_post_wr - post wr to hrdware
- * @qp: hw qp ptr
- */
-void i40iw_qp_post_wr(struct i40iw_qp_uk *qp)
-{
- u64 temp;
- u32 hw_sq_tail;
- u32 sw_sq_head;
-
- mb(); /* valid bit is written and loads completed before reading shadow */
-
- /* read the doorbell shadow area */
- get_64bit_val(qp->shadow_area, 0, &temp);
-
- hw_sq_tail = (u32)RS_64(temp, I40IW_QP_DBSA_HW_SQ_TAIL);
- sw_sq_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
- if (sw_sq_head != hw_sq_tail) {
- if (sw_sq_head > qp->initial_ring.head) {
- if ((hw_sq_tail >= qp->initial_ring.head) &&
- (hw_sq_tail < sw_sq_head)) {
- writel(qp->qp_id, qp->wqe_alloc_reg);
- }
- } else if (sw_sq_head != qp->initial_ring.head) {
- if ((hw_sq_tail >= qp->initial_ring.head) ||
- (hw_sq_tail < sw_sq_head)) {
- writel(qp->qp_id, qp->wqe_alloc_reg);
- }
- }
- }
-
- qp->initial_ring.head = qp->sq_ring.head;
-}
-
-/**
- * i40iw_qp_ring_push_db - ring qp doorbell
- * @qp: hw qp ptr
- * @wqe_idx: wqe index
- */
-static void i40iw_qp_ring_push_db(struct i40iw_qp_uk *qp, u32 wqe_idx)
-{
- set_32bit_val(qp->push_db, 0, LS_32((wqe_idx >> 2), I40E_PFPE_WQEALLOC_WQE_DESC_INDEX) | qp->qp_id);
- qp->initial_ring.head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
-}
-
-/**
- * i40iw_qp_get_next_send_wqe - return next wqe ptr
- * @qp: hw qp ptr
- * @wqe_idx: return wqe index
- * @wqe_size: size of sq wqe
- */
-u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp,
- u32 *wqe_idx,
- u8 wqe_size,
- u32 total_size,
- u64 wr_id
- )
-{
- u64 *wqe = NULL;
- u64 wqe_ptr;
- u32 peek_head = 0;
- u16 offset;
- enum i40iw_status_code ret_code = 0;
- u8 nop_wqe_cnt = 0, i;
- u64 *wqe_0 = NULL;
-
- *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
-
- if (!*wqe_idx)
- qp->swqe_polarity = !qp->swqe_polarity;
- wqe_ptr = (uintptr_t)qp->sq_base[*wqe_idx].elem;
- offset = (u16)(wqe_ptr) & 0x7F;
- if ((offset + wqe_size) > I40IW_QP_WQE_MAX_SIZE) {
- nop_wqe_cnt = (u8)(I40IW_QP_WQE_MAX_SIZE - offset) / I40IW_QP_WQE_MIN_SIZE;
- for (i = 0; i < nop_wqe_cnt; i++) {
- i40iw_nop_1(qp);
- I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
- if (ret_code)
- return NULL;
- }
-
- *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
- if (!*wqe_idx)
- qp->swqe_polarity = !qp->swqe_polarity;
- }
-
- if (((*wqe_idx & 3) == 1) && (wqe_size == I40IW_WQE_SIZE_64)) {
- i40iw_nop_1(qp);
- I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
- if (ret_code)
- return NULL;
- *wqe_idx = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
- if (!*wqe_idx)
- qp->swqe_polarity = !qp->swqe_polarity;
- }
- I40IW_RING_MOVE_HEAD_BY_COUNT(qp->sq_ring,
- wqe_size / I40IW_QP_WQE_MIN_SIZE, ret_code);
- if (ret_code)
- return NULL;
-
- wqe = qp->sq_base[*wqe_idx].elem;
-
- peek_head = I40IW_RING_GETCURRENT_HEAD(qp->sq_ring);
- wqe_0 = qp->sq_base[peek_head].elem;
-
- if (((peek_head & 3) == 1) || ((peek_head & 3) == 3)) {
- if (RS_64(wqe_0[3], I40IWQPSQ_VALID) != !qp->swqe_polarity)
- wqe_0[3] = LS_64(!qp->swqe_polarity, I40IWQPSQ_VALID);
- }
-
- qp->sq_wrtrk_array[*wqe_idx].wrid = wr_id;
- qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size;
- qp->sq_wrtrk_array[*wqe_idx].wqe_size = wqe_size;
- return wqe;
-}
-
-/**
- * i40iw_set_fragment - set fragment in wqe
- * @wqe: wqe for setting fragment
- * @offset: offset value
- * @sge: sge length and stag
- */
-static void i40iw_set_fragment(u64 *wqe, u32 offset, struct i40iw_sge *sge)
-{
- if (sge) {
- set_64bit_val(wqe, offset, LS_64(sge->tag_off, I40IWQPSQ_FRAG_TO));
- set_64bit_val(wqe, (offset + 8),
- (LS_64(sge->len, I40IWQPSQ_FRAG_LEN) |
- LS_64(sge->stag, I40IWQPSQ_FRAG_STAG)));
- }
-}
-
-/**
- * i40iw_qp_get_next_recv_wqe - get next qp's rcv wqe
- * @qp: hw qp ptr
- * @wqe_idx: return wqe index
- */
-u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx)
-{
- u64 *wqe = NULL;
- enum i40iw_status_code ret_code;
-
- if (I40IW_RING_FULL_ERR(qp->rq_ring))
- return NULL;
-
- I40IW_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code);
- if (ret_code)
- return NULL;
- if (!*wqe_idx)
- qp->rwqe_polarity = !qp->rwqe_polarity;
- /* rq_wqe_size_multiplier is no of qwords in one rq wqe */
- wqe = qp->rq_base[*wqe_idx * (qp->rq_wqe_size_multiplier >> 2)].elem;
-
- return wqe;
-}
-
-/**
- * i40iw_rdma_write - rdma write operation
- * @qp: hw qp ptr
- * @info: post sq information
- * @post_sq: flag to post sq
- */
-static enum i40iw_status_code i40iw_rdma_write(struct i40iw_qp_uk *qp,
- struct i40iw_post_sq_info *info,
- bool post_sq)
-{
- u64 header;
- u64 *wqe;
- struct i40iw_rdma_write *op_info;
- u32 i, wqe_idx;
- u32 total_size = 0, byte_off;
- enum i40iw_status_code ret_code;
- bool read_fence = false;
- u8 wqe_size;
-
- op_info = &info->op.rdma_write;
- if (op_info->num_lo_sges > qp->max_sq_frag_cnt)
- return I40IW_ERR_INVALID_FRAG_COUNT;
-
- for (i = 0; i < op_info->num_lo_sges; i++)
- total_size += op_info->lo_sg_list[i].len;
-
- if (total_size > I40IW_MAX_OUTBOUND_MESSAGE_SIZE)
- return I40IW_ERR_QP_INVALID_MSG_SIZE;
-
- read_fence |= info->read_fence;
-
- ret_code = i40iw_fragcnt_to_wqesize_sq(op_info->num_lo_sges, &wqe_size);
- if (ret_code)
- return ret_code;
-
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, total_size, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
- set_64bit_val(wqe, 16,
- LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
- if (!op_info->rem_addr.stag)
- return I40IW_ERR_BAD_STAG;
-
- header = LS_64(op_info->rem_addr.stag, I40IWQPSQ_REMSTAG) |
- LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
- LS_64((op_info->num_lo_sges > 1 ? (op_info->num_lo_sges - 1) : 0), I40IWQPSQ_ADDFRAGCNT) |
- LS_64(read_fence, I40IWQPSQ_READFENCE) |
- LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_set_fragment(wqe, 0, op_info->lo_sg_list);
-
- for (i = 1, byte_off = 32; i < op_info->num_lo_sges; i++) {
- i40iw_set_fragment(wqe, byte_off, &op_info->lo_sg_list[i]);
- byte_off += 16;
- }
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
-
- if (post_sq)
- i40iw_qp_post_wr(qp);
-
- return 0;
-}
-
-/**
- * i40iw_rdma_read - rdma read command
- * @qp: hw qp ptr
- * @info: post sq information
- * @inv_stag: flag for inv_stag
- * @post_sq: flag to post sq
- */
-static enum i40iw_status_code i40iw_rdma_read(struct i40iw_qp_uk *qp,
- struct i40iw_post_sq_info *info,
- bool inv_stag,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_rdma_read *op_info;
- u64 header;
- u32 wqe_idx;
- enum i40iw_status_code ret_code;
- u8 wqe_size;
- bool local_fence = false;
-
- op_info = &info->op.rdma_read;
- ret_code = i40iw_fragcnt_to_wqesize_sq(1, &wqe_size);
- if (ret_code)
- return ret_code;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->lo_addr.len, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
- local_fence |= info->local_fence;
-
- set_64bit_val(wqe, 16, LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
- header = LS_64(op_info->rem_addr.stag, I40IWQPSQ_REMSTAG) |
- LS_64((inv_stag ? I40IWQP_OP_RDMA_READ_LOC_INV : I40IWQP_OP_RDMA_READ), I40IWQPSQ_OPCODE) |
- LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
- LS_64(local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_set_fragment(wqe, 0, &op_info->lo_addr);
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
- if (post_sq)
- i40iw_qp_post_wr(qp);
-
- return 0;
-}
-
-/**
- * i40iw_send - rdma send command
- * @qp: hw qp ptr
- * @info: post sq information
- * @stag_to_inv: stag_to_inv value
- * @post_sq: flag to post sq
- */
-static enum i40iw_status_code i40iw_send(struct i40iw_qp_uk *qp,
- struct i40iw_post_sq_info *info,
- u32 stag_to_inv,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_post_send *op_info;
- u64 header;
- u32 i, wqe_idx, total_size = 0, byte_off;
- enum i40iw_status_code ret_code;
- bool read_fence = false;
- u8 wqe_size;
-
- op_info = &info->op.send;
- if (qp->max_sq_frag_cnt < op_info->num_sges)
- return I40IW_ERR_INVALID_FRAG_COUNT;
-
- for (i = 0; i < op_info->num_sges; i++)
- total_size += op_info->sg_list[i].len;
- ret_code = i40iw_fragcnt_to_wqesize_sq(op_info->num_sges, &wqe_size);
- if (ret_code)
- return ret_code;
-
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, total_size, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- read_fence |= info->read_fence;
- set_64bit_val(wqe, 16, 0);
- header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
- LS_64(info->op_type, I40IWQPSQ_OPCODE) |
- LS_64((op_info->num_sges > 1 ? (op_info->num_sges - 1) : 0),
- I40IWQPSQ_ADDFRAGCNT) |
- LS_64(read_fence, I40IWQPSQ_READFENCE) |
- LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_set_fragment(wqe, 0, op_info->sg_list);
-
- for (i = 1, byte_off = 32; i < op_info->num_sges; i++) {
- i40iw_set_fragment(wqe, byte_off, &op_info->sg_list[i]);
- byte_off += 16;
- }
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
- if (post_sq)
- i40iw_qp_post_wr(qp);
-
- return 0;
-}
-
-/**
- * i40iw_inline_rdma_write - inline rdma write operation
- * @qp: hw qp ptr
- * @info: post sq information
- * @post_sq: flag to post sq
- */
-static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp,
- struct i40iw_post_sq_info *info,
- bool post_sq)
-{
- u64 *wqe;
- u8 *dest, *src;
- struct i40iw_inline_rdma_write *op_info;
- u64 *push;
- u64 header = 0;
- u32 wqe_idx;
- enum i40iw_status_code ret_code;
- bool read_fence = false;
- u8 wqe_size;
-
- op_info = &info->op.inline_rdma_write;
- if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE)
- return I40IW_ERR_INVALID_IMM_DATA_SIZE;
-
- ret_code = i40iw_inline_data_size_to_wqesize(op_info->len, &wqe_size);
- if (ret_code)
- return ret_code;
-
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->len, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- read_fence |= info->read_fence;
- set_64bit_val(wqe, 16,
- LS_64(op_info->rem_addr.tag_off, I40IWQPSQ_FRAG_TO));
-
- header = LS_64(op_info->rem_addr.stag, I40IWQPSQ_REMSTAG) |
- LS_64(I40IWQP_OP_RDMA_WRITE, I40IWQPSQ_OPCODE) |
- LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) |
- LS_64(1, I40IWQPSQ_INLINEDATAFLAG) |
- LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) |
- LS_64(read_fence, I40IWQPSQ_READFENCE) |
- LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- dest = (u8 *)wqe;
- src = (u8 *)(op_info->data);
-
- if (op_info->len <= 16) {
- memcpy(dest, src, op_info->len);
- } else {
- memcpy(dest, src, 16);
- src += 16;
- dest = (u8 *)wqe + 32;
- memcpy(dest, src, op_info->len - 16);
- }
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
-
- if (qp->push_db) {
- push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20);
- memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32);
- i40iw_qp_ring_push_db(qp, wqe_idx);
- } else {
- if (post_sq)
- i40iw_qp_post_wr(qp);
- }
-
- return 0;
-}
-
-/**
- * i40iw_inline_send - inline send operation
- * @qp: hw qp ptr
- * @info: post sq information
- * @stag_to_inv: remote stag
- * @post_sq: flag to post sq
- */
-static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp,
- struct i40iw_post_sq_info *info,
- u32 stag_to_inv,
- bool post_sq)
-{
- u64 *wqe;
- u8 *dest, *src;
- struct i40iw_post_inline_send *op_info;
- u64 header;
- u32 wqe_idx;
- enum i40iw_status_code ret_code;
- bool read_fence = false;
- u8 wqe_size;
- u64 *push;
-
- op_info = &info->op.inline_send;
- if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE)
- return I40IW_ERR_INVALID_IMM_DATA_SIZE;
-
- ret_code = i40iw_inline_data_size_to_wqesize(op_info->len, &wqe_size);
- if (ret_code)
- return ret_code;
-
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, wqe_size, op_info->len, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- read_fence |= info->read_fence;
- header = LS_64(stag_to_inv, I40IWQPSQ_REMSTAG) |
- LS_64(info->op_type, I40IWQPSQ_OPCODE) |
- LS_64(op_info->len, I40IWQPSQ_INLINEDATALEN) |
- LS_64(1, I40IWQPSQ_INLINEDATAFLAG) |
- LS_64((qp->push_db ? 1 : 0), I40IWQPSQ_PUSHWQE) |
- LS_64(read_fence, I40IWQPSQ_READFENCE) |
- LS_64(info->local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- dest = (u8 *)wqe;
- src = (u8 *)(op_info->data);
-
- if (op_info->len <= 16) {
- memcpy(dest, src, op_info->len);
- } else {
- memcpy(dest, src, 16);
- src += 16;
- dest = (u8 *)wqe + 32;
- memcpy(dest, src, op_info->len - 16);
- }
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
-
- if (qp->push_db) {
- push = (u64 *)((uintptr_t)qp->push_wqe + (wqe_idx & 0x3) * 0x20);
- memcpy(push, wqe, (op_info->len > 16) ? op_info->len + 16 : 32);
- i40iw_qp_ring_push_db(qp, wqe_idx);
- } else {
- if (post_sq)
- i40iw_qp_post_wr(qp);
- }
-
- return 0;
-}
-
-/**
- * i40iw_stag_local_invalidate - stag invalidate operation
- * @qp: hw qp ptr
- * @info: post sq information
- * @post_sq: flag to post sq
- */
-static enum i40iw_status_code i40iw_stag_local_invalidate(struct i40iw_qp_uk *qp,
- struct i40iw_post_sq_info *info,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_inv_local_stag *op_info;
- u64 header;
- u32 wqe_idx;
- bool local_fence = false;
-
- op_info = &info->op.inv_local_stag;
- local_fence = info->local_fence;
-
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
- set_64bit_val(wqe, 0, 0);
- set_64bit_val(wqe, 8,
- LS_64(op_info->target_stag, I40IWQPSQ_LOCSTAG));
- set_64bit_val(wqe, 16, 0);
- header = LS_64(I40IW_OP_TYPE_INV_STAG, I40IWQPSQ_OPCODE) |
- LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
- LS_64(local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
-
- if (post_sq)
- i40iw_qp_post_wr(qp);
-
- return 0;
-}
-
-/**
- * i40iw_mw_bind - Memory Window bind operation
- * @qp: hw qp ptr
- * @info: post sq information
- * @post_sq: flag to post sq
- */
-static enum i40iw_status_code i40iw_mw_bind(struct i40iw_qp_uk *qp,
- struct i40iw_post_sq_info *info,
- bool post_sq)
-{
- u64 *wqe;
- struct i40iw_bind_window *op_info;
- u64 header;
- u32 wqe_idx;
- bool local_fence = false;
-
- op_info = &info->op.bind_window;
-
- local_fence |= info->local_fence;
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, info->wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
- set_64bit_val(wqe, 0, (uintptr_t)op_info->va);
- set_64bit_val(wqe, 8,
- LS_64(op_info->mr_stag, I40IWQPSQ_PARENTMRSTAG) |
- LS_64(op_info->mw_stag, I40IWQPSQ_MWSTAG));
- set_64bit_val(wqe, 16, op_info->bind_length);
- header = LS_64(I40IW_OP_TYPE_BIND_MW, I40IWQPSQ_OPCODE) |
- LS_64(((op_info->enable_reads << 2) |
- (op_info->enable_writes << 3)),
- I40IWQPSQ_STAGRIGHTS) |
- LS_64((op_info->addressing_type == I40IW_ADDR_TYPE_VA_BASED ? 1 : 0),
- I40IWQPSQ_VABASEDTO) |
- LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
- LS_64(local_fence, I40IWQPSQ_LOCALFENCE) |
- LS_64(info->signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
-
- if (post_sq)
- i40iw_qp_post_wr(qp);
-
- return 0;
-}
-
-/**
- * i40iw_post_receive - post receive wqe
- * @qp: hw qp ptr
- * @info: post rq information
- */
-static enum i40iw_status_code i40iw_post_receive(struct i40iw_qp_uk *qp,
- struct i40iw_post_rq_info *info)
-{
- u64 *wqe;
- u64 header;
- u32 total_size = 0, wqe_idx, i, byte_off;
-
- if (qp->max_rq_frag_cnt < info->num_sges)
- return I40IW_ERR_INVALID_FRAG_COUNT;
- for (i = 0; i < info->num_sges; i++)
- total_size += info->sg_list[i].len;
- wqe = i40iw_qp_get_next_recv_wqe(qp, &wqe_idx);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
-
- qp->rq_wrid_array[wqe_idx] = info->wr_id;
- set_64bit_val(wqe, 16, 0);
-
- header = LS_64((info->num_sges > 1 ? (info->num_sges - 1) : 0),
- I40IWQPSQ_ADDFRAGCNT) |
- LS_64(qp->rwqe_polarity, I40IWQPSQ_VALID);
-
- i40iw_set_fragment(wqe, 0, info->sg_list);
-
- for (i = 1, byte_off = 32; i < info->num_sges; i++) {
- i40iw_set_fragment(wqe, byte_off, &info->sg_list[i]);
- byte_off += 16;
- }
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
-
- return 0;
-}
-
-/**
- * i40iw_cq_request_notification - cq notification request (door bell)
- * @cq: hw cq
- * @cq_notify: notification type
- */
-static void i40iw_cq_request_notification(struct i40iw_cq_uk *cq,
- enum i40iw_completion_notify cq_notify)
-{
- u64 temp_val;
- u16 sw_cq_sel;
- u8 arm_next_se = 0;
- u8 arm_next = 0;
- u8 arm_seq_num;
-
- get_64bit_val(cq->shadow_area, 32, &temp_val);
- arm_seq_num = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_SEQ_NUM);
- arm_seq_num++;
-
- sw_cq_sel = (u16)RS_64(temp_val, I40IW_CQ_DBSA_SW_CQ_SELECT);
- arm_next_se = (u8)RS_64(temp_val, I40IW_CQ_DBSA_ARM_NEXT_SE);
- arm_next_se |= 1;
- if (cq_notify == IW_CQ_COMPL_EVENT)
- arm_next = 1;
- temp_val = LS_64(arm_seq_num, I40IW_CQ_DBSA_ARM_SEQ_NUM) |
- LS_64(sw_cq_sel, I40IW_CQ_DBSA_SW_CQ_SELECT) |
- LS_64(arm_next_se, I40IW_CQ_DBSA_ARM_NEXT_SE) |
- LS_64(arm_next, I40IW_CQ_DBSA_ARM_NEXT);
-
- set_64bit_val(cq->shadow_area, 32, temp_val);
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- writel(cq->cq_id, cq->cqe_alloc_reg);
-}
-
-/**
- * i40iw_cq_post_entries - update tail in shadow memory
- * @cq: hw cq
- * @count: # of entries processed
- */
-static enum i40iw_status_code i40iw_cq_post_entries(struct i40iw_cq_uk *cq,
- u8 count)
-{
- I40IW_RING_MOVE_TAIL_BY_COUNT(cq->cq_ring, count);
- set_64bit_val(cq->shadow_area, 0,
- I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
- return 0;
-}
-
-/**
- * i40iw_cq_poll_completion - get cq completion info
- * @cq: hw cq
- * @info: cq poll information returned
- * @post_cq: update cq tail
- */
-static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq,
- struct i40iw_cq_poll_info *info)
-{
- u64 comp_ctx, qword0, qword2, qword3, wqe_qword;
- u64 *cqe, *sw_wqe;
- struct i40iw_qp_uk *qp;
- struct i40iw_ring *pring = NULL;
- u32 wqe_idx, q_type, array_idx = 0;
- enum i40iw_status_code ret_code = 0;
- bool move_cq_head = true;
- u8 polarity;
- u8 addl_wqes = 0;
-
- if (cq->avoid_mem_cflct)
- cqe = (u64 *)I40IW_GET_CURRENT_EXTENDED_CQ_ELEMENT(cq);
- else
- cqe = (u64 *)I40IW_GET_CURRENT_CQ_ELEMENT(cq);
-
- get_64bit_val(cqe, 24, &qword3);
- polarity = (u8)RS_64(qword3, I40IW_CQ_VALID);
-
- if (polarity != cq->polarity)
- return I40IW_ERR_QUEUE_EMPTY;
-
- q_type = (u8)RS_64(qword3, I40IW_CQ_SQ);
- info->error = (bool)RS_64(qword3, I40IW_CQ_ERROR);
- info->push_dropped = (bool)RS_64(qword3, I40IWCQ_PSHDROP);
- if (info->error) {
- info->comp_status = I40IW_COMPL_STATUS_FLUSHED;
- info->major_err = (bool)RS_64(qword3, I40IW_CQ_MAJERR);
- info->minor_err = (bool)RS_64(qword3, I40IW_CQ_MINERR);
- } else {
- info->comp_status = I40IW_COMPL_STATUS_SUCCESS;
- }
-
- get_64bit_val(cqe, 0, &qword0);
- get_64bit_val(cqe, 16, &qword2);
-
- info->tcp_seq_num = (u8)RS_64(qword0, I40IWCQ_TCPSEQNUM);
-
- info->qp_id = (u32)RS_64(qword2, I40IWCQ_QPID);
-
- get_64bit_val(cqe, 8, &comp_ctx);
-
- info->solicited_event = (bool)RS_64(qword3, I40IWCQ_SOEVENT);
- info->is_srq = (bool)RS_64(qword3, I40IWCQ_SRQ);
-
- qp = (struct i40iw_qp_uk *)(unsigned long)comp_ctx;
- if (!qp) {
- ret_code = I40IW_ERR_QUEUE_DESTROYED;
- goto exit;
- }
- wqe_idx = (u32)RS_64(qword3, I40IW_CQ_WQEIDX);
- info->qp_handle = (i40iw_qp_handle)(unsigned long)qp;
-
- if (q_type == I40IW_CQE_QTYPE_RQ) {
- array_idx = (wqe_idx * 4) / qp->rq_wqe_size_multiplier;
- if (info->comp_status == I40IW_COMPL_STATUS_FLUSHED) {
- info->wr_id = qp->rq_wrid_array[qp->rq_ring.tail];
- array_idx = qp->rq_ring.tail;
- } else {
- info->wr_id = qp->rq_wrid_array[array_idx];
- }
-
- info->op_type = I40IW_OP_TYPE_REC;
- if (qword3 & I40IWCQ_STAG_MASK) {
- info->stag_invalid_set = true;
- info->inv_stag = (u32)RS_64(qword2, I40IWCQ_INVSTAG);
- } else {
- info->stag_invalid_set = false;
- }
- info->bytes_xfered = (u32)RS_64(qword0, I40IWCQ_PAYLDLEN);
- I40IW_RING_SET_TAIL(qp->rq_ring, array_idx + 1);
- pring = &qp->rq_ring;
- } else {
- if (info->comp_status != I40IW_COMPL_STATUS_FLUSHED) {
- info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
- info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len;
-
- info->op_type = (u8)RS_64(qword3, I40IWCQ_OP);
- sw_wqe = qp->sq_base[wqe_idx].elem;
- get_64bit_val(sw_wqe, 24, &wqe_qword);
-
- addl_wqes = qp->sq_wrtrk_array[wqe_idx].wqe_size / I40IW_QP_WQE_MIN_SIZE;
- I40IW_RING_SET_TAIL(qp->sq_ring, (wqe_idx + addl_wqes));
- } else {
- do {
- u8 op_type;
- u32 tail;
-
- tail = qp->sq_ring.tail;
- sw_wqe = qp->sq_base[tail].elem;
- get_64bit_val(sw_wqe, 24, &wqe_qword);
- op_type = (u8)RS_64(wqe_qword, I40IWQPSQ_OPCODE);
- info->op_type = op_type;
- addl_wqes = qp->sq_wrtrk_array[tail].wqe_size / I40IW_QP_WQE_MIN_SIZE;
- I40IW_RING_SET_TAIL(qp->sq_ring, (tail + addl_wqes));
- if (op_type != I40IWQP_OP_NOP) {
- info->wr_id = qp->sq_wrtrk_array[tail].wrid;
- info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len;
- break;
- }
- } while (1);
- }
- pring = &qp->sq_ring;
- }
-
- ret_code = 0;
-
-exit:
- if (!ret_code &&
- (info->comp_status == I40IW_COMPL_STATUS_FLUSHED))
- if (pring && (I40IW_RING_MORE_WORK(*pring)))
- move_cq_head = false;
-
- if (move_cq_head) {
- I40IW_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
-
- if (I40IW_RING_GETCURRENT_HEAD(cq->cq_ring) == 0)
- cq->polarity ^= 1;
-
- I40IW_RING_MOVE_TAIL(cq->cq_ring);
- set_64bit_val(cq->shadow_area, 0,
- I40IW_RING_GETCURRENT_HEAD(cq->cq_ring));
- } else {
- if (info->is_srq)
- return ret_code;
- qword3 &= ~I40IW_CQ_WQEIDX_MASK;
- qword3 |= LS_64(pring->tail, I40IW_CQ_WQEIDX);
- set_64bit_val(cqe, 24, qword3);
- }
-
- return ret_code;
-}
-
-/**
- * i40iw_get_wqe_shift - get shift count for maximum wqe size
- * @wqdepth: depth of wq required.
- * @sge: Maximum Scatter Gather Elements wqe
- * @inline_data: Maximum inline data size
- * @shift: Returns the shift needed based on sge
- *
- * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size.
- * For 1 SGE or inline data <= 16, shift = 0 (wqe size of 32 bytes).
- * For 2 or 3 SGEs or inline data <= 48, shift = 1 (wqe size of 64 bytes).
- * Shift of 2 otherwise (wqe size of 128 bytes).
- */
-enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift)
-{
- u32 size;
-
- *shift = 0;
- if (sge > 1 || inline_data > 16)
- *shift = (sge < 4 && inline_data <= 48) ? 1 : 2;
-
- /* check if wqdepth is multiple of 2 or not */
-
- if ((wqdepth < I40IWQP_SW_MIN_WQSIZE) || (wqdepth & (wqdepth - 1)))
- return I40IW_ERR_INVALID_SIZE;
-
- size = wqdepth << *shift; /* multiple of 32 bytes count */
- if (size > I40IWQP_SW_MAX_WQSIZE)
- return I40IW_ERR_INVALID_SIZE;
- return 0;
-}
-
-static struct i40iw_qp_uk_ops iw_qp_uk_ops = {
- .iw_qp_post_wr = i40iw_qp_post_wr,
- .iw_qp_ring_push_db = i40iw_qp_ring_push_db,
- .iw_rdma_write = i40iw_rdma_write,
- .iw_rdma_read = i40iw_rdma_read,
- .iw_send = i40iw_send,
- .iw_inline_rdma_write = i40iw_inline_rdma_write,
- .iw_inline_send = i40iw_inline_send,
- .iw_stag_local_invalidate = i40iw_stag_local_invalidate,
- .iw_mw_bind = i40iw_mw_bind,
- .iw_post_receive = i40iw_post_receive,
- .iw_post_nop = i40iw_nop
-};
-
-static struct i40iw_cq_ops iw_cq_ops = {
- .iw_cq_request_notification = i40iw_cq_request_notification,
- .iw_cq_poll_completion = i40iw_cq_poll_completion,
- .iw_cq_post_entries = i40iw_cq_post_entries,
- .iw_cq_clean = i40iw_clean_cq
-};
-
-static struct i40iw_device_uk_ops iw_device_uk_ops = {
- .iwarp_cq_uk_init = i40iw_cq_uk_init,
- .iwarp_qp_uk_init = i40iw_qp_uk_init,
-};
-
-/**
- * i40iw_qp_uk_init - initialize shared qp
- * @qp: hw qp (user and kernel)
- * @info: qp initialization info
- *
- * initializes the vars used in both user and kernel mode.
- * size of the wqe depends on numbers of max. fragements
- * allowed. Then size of wqe * the number of wqes should be the
- * amount of memory allocated for sq and rq. If srq is used,
- * then rq_base will point to one rq wqe only (not the whole
- * array of wqes)
- */
-enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
- struct i40iw_qp_uk_init_info *info)
-{
- enum i40iw_status_code ret_code = 0;
- u32 sq_ring_size;
- u8 sqshift, rqshift;
-
- if (info->max_sq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
- return I40IW_ERR_INVALID_FRAG_COUNT;
-
- if (info->max_rq_frag_cnt > I40IW_MAX_WQ_FRAGMENT_COUNT)
- return I40IW_ERR_INVALID_FRAG_COUNT;
- ret_code = i40iw_get_wqe_shift(info->sq_size, info->max_sq_frag_cnt, info->max_inline_data, &sqshift);
- if (ret_code)
- return ret_code;
-
- qp->sq_base = info->sq;
- qp->rq_base = info->rq;
- qp->shadow_area = info->shadow_area;
- qp->sq_wrtrk_array = info->sq_wrtrk_array;
- qp->rq_wrid_array = info->rq_wrid_array;
-
- qp->wqe_alloc_reg = info->wqe_alloc_reg;
- qp->qp_id = info->qp_id;
-
- qp->sq_size = info->sq_size;
- qp->push_db = info->push_db;
- qp->push_wqe = info->push_wqe;
-
- qp->max_sq_frag_cnt = info->max_sq_frag_cnt;
- sq_ring_size = qp->sq_size << sqshift;
-
- I40IW_RING_INIT(qp->sq_ring, sq_ring_size);
- I40IW_RING_INIT(qp->initial_ring, sq_ring_size);
- I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
- I40IW_RING_MOVE_TAIL(qp->sq_ring);
- I40IW_RING_MOVE_HEAD(qp->initial_ring, ret_code);
- qp->swqe_polarity = 1;
- qp->swqe_polarity_deferred = 1;
- qp->rwqe_polarity = 0;
-
- if (!qp->use_srq) {
- qp->rq_size = info->rq_size;
- qp->max_rq_frag_cnt = info->max_rq_frag_cnt;
- I40IW_RING_INIT(qp->rq_ring, qp->rq_size);
- switch (info->abi_ver) {
- case 4:
- ret_code = i40iw_get_wqe_shift(info->rq_size, info->max_rq_frag_cnt, 0, &rqshift);
- if (ret_code)
- return ret_code;
- break;
- case 5: /* fallthrough until next ABI version */
- default:
- rqshift = I40IW_MAX_RQ_WQE_SHIFT;
- break;
- }
- qp->rq_wqe_size = rqshift;
- qp->rq_wqe_size_multiplier = 4 << rqshift;
- }
- qp->ops = iw_qp_uk_ops;
-
- return ret_code;
-}
-
-/**
- * i40iw_cq_uk_init - initialize shared cq (user and kernel)
- * @cq: hw cq
- * @info: hw cq initialization info
- */
-enum i40iw_status_code i40iw_cq_uk_init(struct i40iw_cq_uk *cq,
- struct i40iw_cq_uk_init_info *info)
-{
- if ((info->cq_size < I40IW_MIN_CQ_SIZE) ||
- (info->cq_size > I40IW_MAX_CQ_SIZE))
- return I40IW_ERR_INVALID_SIZE;
- cq->cq_base = (struct i40iw_cqe *)info->cq_base;
- cq->cq_id = info->cq_id;
- cq->cq_size = info->cq_size;
- cq->cqe_alloc_reg = info->cqe_alloc_reg;
- cq->shadow_area = info->shadow_area;
- cq->avoid_mem_cflct = info->avoid_mem_cflct;
-
- I40IW_RING_INIT(cq->cq_ring, cq->cq_size);
- cq->polarity = 1;
- cq->ops = iw_cq_ops;
-
- return 0;
-}
-
-/**
- * i40iw_device_init_uk - setup routines for iwarp shared device
- * @dev: iwarp shared (user and kernel)
- */
-void i40iw_device_init_uk(struct i40iw_dev_uk *dev)
-{
- dev->ops_uk = iw_device_uk_ops;
-}
-
-/**
- * i40iw_clean_cq - clean cq entries
- * @ queue completion context
- * @cq: cq to clean
- */
-void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq)
-{
- u64 *cqe;
- u64 qword3, comp_ctx;
- u32 cq_head;
- u8 polarity, temp;
-
- cq_head = cq->cq_ring.head;
- temp = cq->polarity;
- do {
- if (cq->avoid_mem_cflct)
- cqe = (u64 *)&(((struct i40iw_extended_cqe *)cq->cq_base)[cq_head]);
- else
- cqe = (u64 *)&cq->cq_base[cq_head];
- get_64bit_val(cqe, 24, &qword3);
- polarity = (u8)RS_64(qword3, I40IW_CQ_VALID);
-
- if (polarity != temp)
- break;
-
- get_64bit_val(cqe, 8, &comp_ctx);
- if ((void *)(unsigned long)comp_ctx == queue)
- set_64bit_val(cqe, 8, 0);
-
- cq_head = (cq_head + 1) % cq->cq_ring.size;
- if (!cq_head)
- temp ^= 1;
- } while (true);
-}
-
-/**
- * i40iw_nop - send a nop
- * @qp: hw qp ptr
- * @wr_id: work request id
- * @signaled: flag if signaled for completion
- * @post_sq: flag to post sq
- */
-enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp,
- u64 wr_id,
- bool signaled,
- bool post_sq)
-{
- u64 header, *wqe;
- u32 wqe_idx;
-
- wqe = i40iw_qp_get_next_send_wqe(qp, &wqe_idx, I40IW_QP_WQE_MIN_SIZE, 0, wr_id);
- if (!wqe)
- return I40IW_ERR_QP_TOOMANY_WRS_POSTED;
- set_64bit_val(wqe, 0, 0);
- set_64bit_val(wqe, 8, 0);
- set_64bit_val(wqe, 16, 0);
-
- header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
- LS_64(signaled, I40IWQPSQ_SIGCOMPL) |
- LS_64(qp->swqe_polarity, I40IWQPSQ_VALID);
-
- wmb(); /* make sure WQE is populated before valid bit is set */
-
- set_64bit_val(wqe, 24, header);
- if (post_sq)
- i40iw_qp_post_wr(qp);
-
- return 0;
-}
-
-/**
- * i40iw_fragcnt_to_wqesize_sq - calculate wqe size based on fragment count for SQ
- * @frag_cnt: number of fragments
- * @wqe_size: size of sq wqe returned
- */
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size)
-{
- switch (frag_cnt) {
- case 0:
- case 1:
- *wqe_size = I40IW_QP_WQE_MIN_SIZE;
- break;
- case 2:
- case 3:
- *wqe_size = 64;
- break;
- case 4:
- case 5:
- *wqe_size = 96;
- break;
- case 6:
- case 7:
- *wqe_size = 128;
- break;
- default:
- return I40IW_ERR_INVALID_FRAG_COUNT;
- }
-
- return 0;
-}
-
-/**
- * i40iw_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ
- * @frag_cnt: number of fragments
- * @wqe_size: size of rq wqe returned
- */
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size)
-{
- switch (frag_cnt) {
- case 0:
- case 1:
- *wqe_size = 32;
- break;
- case 2:
- case 3:
- *wqe_size = 64;
- break;
- case 4:
- case 5:
- case 6:
- case 7:
- *wqe_size = 128;
- break;
- default:
- return I40IW_ERR_INVALID_FRAG_COUNT;
- }
-
- return 0;
-}
-
-/**
- * i40iw_inline_data_size_to_wqesize - based on inline data, wqe size
- * @data_size: data size for inline
- * @wqe_size: size of sq wqe returned
- */
-enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size,
- u8 *wqe_size)
-{
- if (data_size > I40IW_MAX_INLINE_DATA_SIZE)
- return I40IW_ERR_INVALID_IMM_DATA_SIZE;
-
- if (data_size <= 16)
- *wqe_size = I40IW_QP_WQE_MIN_SIZE;
- else
- *wqe_size = 64;
-
- return 0;
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
deleted file mode 100644
index 84be6f13b9c5..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_user.h
+++ /dev/null
@@ -1,446 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_USER_H
-#define I40IW_USER_H
-
-enum i40iw_device_capabilities_const {
- I40IW_WQE_SIZE = 4,
- I40IW_CQP_WQE_SIZE = 8,
- I40IW_CQE_SIZE = 4,
- I40IW_EXTENDED_CQE_SIZE = 8,
- I40IW_AEQE_SIZE = 2,
- I40IW_CEQE_SIZE = 1,
- I40IW_CQP_CTX_SIZE = 8,
- I40IW_SHADOW_AREA_SIZE = 8,
- I40IW_CEQ_MAX_COUNT = 256,
- I40IW_QUERY_FPM_BUF_SIZE = 128,
- I40IW_COMMIT_FPM_BUF_SIZE = 128,
- I40IW_MIN_IW_QP_ID = 1,
- I40IW_MAX_IW_QP_ID = 262143,
- I40IW_MIN_CEQID = 0,
- I40IW_MAX_CEQID = 256,
- I40IW_MIN_CQID = 0,
- I40IW_MAX_CQID = 131071,
- I40IW_MIN_AEQ_ENTRIES = 1,
- I40IW_MAX_AEQ_ENTRIES = 524287,
- I40IW_MIN_CEQ_ENTRIES = 1,
- I40IW_MAX_CEQ_ENTRIES = 131071,
- I40IW_MIN_CQ_SIZE = 1,
- I40IW_MAX_CQ_SIZE = 1048575,
- I40IW_MAX_AEQ_ALLOCATE_COUNT = 255,
- I40IW_DB_ID_ZERO = 0,
- I40IW_MAX_WQ_FRAGMENT_COUNT = 3,
- I40IW_MAX_SGE_RD = 1,
- I40IW_MAX_OUTBOUND_MESSAGE_SIZE = 2147483647,
- I40IW_MAX_INBOUND_MESSAGE_SIZE = 2147483647,
- I40IW_MAX_PUSH_PAGE_COUNT = 4096,
- I40IW_MAX_PE_ENABLED_VF_COUNT = 32,
- I40IW_MAX_VF_FPM_ID = 47,
- I40IW_MAX_VF_PER_PF = 127,
- I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496,
- I40IW_MAX_INLINE_DATA_SIZE = 48,
- I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48,
- I40IW_MAX_IRD_SIZE = 63,
- I40IW_MAX_ORD_SIZE = 127,
- I40IW_MAX_WQ_ENTRIES = 2048,
- I40IW_Q2_BUFFER_SIZE = (248 + 100),
- I40IW_MAX_WQE_SIZE_RQ = 128,
- I40IW_QP_CTX_SIZE = 248,
- I40IW_MAX_PDS = 32768
-};
-
-#define i40iw_handle void *
-#define i40iw_adapter_handle i40iw_handle
-#define i40iw_qp_handle i40iw_handle
-#define i40iw_cq_handle i40iw_handle
-#define i40iw_srq_handle i40iw_handle
-#define i40iw_pd_id i40iw_handle
-#define i40iw_stag_handle i40iw_handle
-#define i40iw_stag_index u32
-#define i40iw_stag u32
-#define i40iw_stag_key u8
-
-#define i40iw_tagged_offset u64
-#define i40iw_access_privileges u32
-#define i40iw_physical_fragment u64
-#define i40iw_address_list u64 *
-
-#define I40IW_MAX_MR_SIZE 0x10000000000L
-#define I40IW_MAX_RQ_WQE_SHIFT 2
-
-struct i40iw_qp_uk;
-struct i40iw_cq_uk;
-struct i40iw_srq_uk;
-struct i40iw_qp_uk_init_info;
-struct i40iw_cq_uk_init_info;
-struct i40iw_srq_uk_init_info;
-
-struct i40iw_sge {
- i40iw_tagged_offset tag_off;
- u32 len;
- i40iw_stag stag;
-};
-
-#define i40iw_sgl struct i40iw_sge *
-
-struct i40iw_ring {
- u32 head;
- u32 tail;
- u32 size;
-};
-
-struct i40iw_cqe {
- u64 buf[I40IW_CQE_SIZE];
-};
-
-struct i40iw_extended_cqe {
- u64 buf[I40IW_EXTENDED_CQE_SIZE];
-};
-
-struct i40iw_wqe {
- u64 buf[I40IW_WQE_SIZE];
-};
-
-struct i40iw_qp_uk_ops;
-
-enum i40iw_addressing_type {
- I40IW_ADDR_TYPE_ZERO_BASED = 0,
- I40IW_ADDR_TYPE_VA_BASED = 1,
-};
-
-#define I40IW_ACCESS_FLAGS_LOCALREAD 0x01
-#define I40IW_ACCESS_FLAGS_LOCALWRITE 0x02
-#define I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY 0x04
-#define I40IW_ACCESS_FLAGS_REMOTEREAD 0x05
-#define I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY 0x08
-#define I40IW_ACCESS_FLAGS_REMOTEWRITE 0x0a
-#define I40IW_ACCESS_FLAGS_BIND_WINDOW 0x10
-#define I40IW_ACCESS_FLAGS_ALL 0x1F
-
-#define I40IW_OP_TYPE_RDMA_WRITE 0
-#define I40IW_OP_TYPE_RDMA_READ 1
-#define I40IW_OP_TYPE_SEND 3
-#define I40IW_OP_TYPE_SEND_INV 4
-#define I40IW_OP_TYPE_SEND_SOL 5
-#define I40IW_OP_TYPE_SEND_SOL_INV 6
-#define I40IW_OP_TYPE_REC 7
-#define I40IW_OP_TYPE_BIND_MW 8
-#define I40IW_OP_TYPE_FAST_REG_NSMR 9
-#define I40IW_OP_TYPE_INV_STAG 10
-#define I40IW_OP_TYPE_RDMA_READ_INV_STAG 11
-#define I40IW_OP_TYPE_NOP 12
-
-enum i40iw_completion_status {
- I40IW_COMPL_STATUS_SUCCESS = 0,
- I40IW_COMPL_STATUS_FLUSHED,
- I40IW_COMPL_STATUS_INVALID_WQE,
- I40IW_COMPL_STATUS_QP_CATASTROPHIC,
- I40IW_COMPL_STATUS_REMOTE_TERMINATION,
- I40IW_COMPL_STATUS_INVALID_STAG,
- I40IW_COMPL_STATUS_BASE_BOUND_VIOLATION,
- I40IW_COMPL_STATUS_ACCESS_VIOLATION,
- I40IW_COMPL_STATUS_INVALID_PD_ID,
- I40IW_COMPL_STATUS_WRAP_ERROR,
- I40IW_COMPL_STATUS_STAG_INVALID_PDID,
- I40IW_COMPL_STATUS_RDMA_READ_ZERO_ORD,
- I40IW_COMPL_STATUS_QP_NOT_PRIVLEDGED,
- I40IW_COMPL_STATUS_STAG_NOT_INVALID,
- I40IW_COMPL_STATUS_INVALID_PHYS_BUFFER_SIZE,
- I40IW_COMPL_STATUS_INVALID_PHYS_BUFFER_ENTRY,
- I40IW_COMPL_STATUS_INVALID_FBO,
- I40IW_COMPL_STATUS_INVALID_LENGTH,
- I40IW_COMPL_STATUS_INVALID_ACCESS,
- I40IW_COMPL_STATUS_PHYS_BUFFER_LIST_TOO_LONG,
- I40IW_COMPL_STATUS_INVALID_VIRT_ADDRESS,
- I40IW_COMPL_STATUS_INVALID_REGION,
- I40IW_COMPL_STATUS_INVALID_WINDOW,
- I40IW_COMPL_STATUS_INVALID_TOTAL_LENGTH
-};
-
-enum i40iw_completion_notify {
- IW_CQ_COMPL_EVENT = 0,
- IW_CQ_COMPL_SOLICITED = 1
-};
-
-struct i40iw_post_send {
- i40iw_sgl sg_list;
- u32 num_sges;
-};
-
-struct i40iw_post_inline_send {
- void *data;
- u32 len;
-};
-
-struct i40iw_post_send_w_inv {
- i40iw_sgl sg_list;
- u32 num_sges;
- i40iw_stag remote_stag_to_inv;
-};
-
-struct i40iw_post_inline_send_w_inv {
- void *data;
- u32 len;
- i40iw_stag remote_stag_to_inv;
-};
-
-struct i40iw_rdma_write {
- i40iw_sgl lo_sg_list;
- u32 num_lo_sges;
- struct i40iw_sge rem_addr;
-};
-
-struct i40iw_inline_rdma_write {
- void *data;
- u32 len;
- struct i40iw_sge rem_addr;
-};
-
-struct i40iw_rdma_read {
- struct i40iw_sge lo_addr;
- struct i40iw_sge rem_addr;
-};
-
-struct i40iw_bind_window {
- i40iw_stag mr_stag;
- u64 bind_length;
- void *va;
- enum i40iw_addressing_type addressing_type;
- bool enable_reads;
- bool enable_writes;
- i40iw_stag mw_stag;
-};
-
-struct i40iw_inv_local_stag {
- i40iw_stag target_stag;
-};
-
-struct i40iw_post_sq_info {
- u64 wr_id;
- u8 op_type;
- bool signaled;
- bool read_fence;
- bool local_fence;
- bool inline_data;
- bool defer_flag;
- union {
- struct i40iw_post_send send;
- struct i40iw_post_send send_w_sol;
- struct i40iw_post_send_w_inv send_w_inv;
- struct i40iw_post_send_w_inv send_w_sol_inv;
- struct i40iw_rdma_write rdma_write;
- struct i40iw_rdma_read rdma_read;
- struct i40iw_rdma_read rdma_read_inv;
- struct i40iw_bind_window bind_window;
- struct i40iw_inv_local_stag inv_local_stag;
- struct i40iw_inline_rdma_write inline_rdma_write;
- struct i40iw_post_inline_send inline_send;
- struct i40iw_post_inline_send inline_send_w_sol;
- struct i40iw_post_inline_send_w_inv inline_send_w_inv;
- struct i40iw_post_inline_send_w_inv inline_send_w_sol_inv;
- } op;
-};
-
-struct i40iw_post_rq_info {
- u64 wr_id;
- i40iw_sgl sg_list;
- u32 num_sges;
-};
-
-struct i40iw_cq_poll_info {
- u64 wr_id;
- i40iw_qp_handle qp_handle;
- u32 bytes_xfered;
- u32 tcp_seq_num;
- u32 qp_id;
- i40iw_stag inv_stag;
- enum i40iw_completion_status comp_status;
- u16 major_err;
- u16 minor_err;
- u8 op_type;
- bool stag_invalid_set;
- bool push_dropped;
- bool error;
- bool is_srq;
- bool solicited_event;
-};
-
-struct i40iw_qp_uk_ops {
- void (*iw_qp_post_wr)(struct i40iw_qp_uk *);
- void (*iw_qp_ring_push_db)(struct i40iw_qp_uk *, u32);
- enum i40iw_status_code (*iw_rdma_write)(struct i40iw_qp_uk *,
- struct i40iw_post_sq_info *, bool);
- enum i40iw_status_code (*iw_rdma_read)(struct i40iw_qp_uk *,
- struct i40iw_post_sq_info *, bool, bool);
- enum i40iw_status_code (*iw_send)(struct i40iw_qp_uk *,
- struct i40iw_post_sq_info *, u32, bool);
- enum i40iw_status_code (*iw_inline_rdma_write)(struct i40iw_qp_uk *,
- struct i40iw_post_sq_info *, bool);
- enum i40iw_status_code (*iw_inline_send)(struct i40iw_qp_uk *,
- struct i40iw_post_sq_info *, u32, bool);
- enum i40iw_status_code (*iw_stag_local_invalidate)(struct i40iw_qp_uk *,
- struct i40iw_post_sq_info *, bool);
- enum i40iw_status_code (*iw_mw_bind)(struct i40iw_qp_uk *,
- struct i40iw_post_sq_info *, bool);
- enum i40iw_status_code (*iw_post_receive)(struct i40iw_qp_uk *,
- struct i40iw_post_rq_info *);
- enum i40iw_status_code (*iw_post_nop)(struct i40iw_qp_uk *, u64, bool, bool);
-};
-
-struct i40iw_cq_ops {
- void (*iw_cq_request_notification)(struct i40iw_cq_uk *,
- enum i40iw_completion_notify);
- enum i40iw_status_code (*iw_cq_poll_completion)(struct i40iw_cq_uk *,
- struct i40iw_cq_poll_info *);
- enum i40iw_status_code (*iw_cq_post_entries)(struct i40iw_cq_uk *, u8 count);
- void (*iw_cq_clean)(void *, struct i40iw_cq_uk *);
-};
-
-struct i40iw_dev_uk;
-
-struct i40iw_device_uk_ops {
- enum i40iw_status_code (*iwarp_cq_uk_init)(struct i40iw_cq_uk *,
- struct i40iw_cq_uk_init_info *);
- enum i40iw_status_code (*iwarp_qp_uk_init)(struct i40iw_qp_uk *,
- struct i40iw_qp_uk_init_info *);
-};
-
-struct i40iw_dev_uk {
- struct i40iw_device_uk_ops ops_uk;
-};
-
-struct i40iw_sq_uk_wr_trk_info {
- u64 wrid;
- u32 wr_len;
- u8 wqe_size;
- u8 reserved[3];
-};
-
-struct i40iw_qp_quanta {
- u64 elem[I40IW_WQE_SIZE];
-};
-
-struct i40iw_qp_uk {
- struct i40iw_qp_quanta *sq_base;
- struct i40iw_qp_quanta *rq_base;
- u32 __iomem *wqe_alloc_reg;
- struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
- u64 *rq_wrid_array;
- u64 *shadow_area;
- u32 *push_db;
- u64 *push_wqe;
- struct i40iw_ring sq_ring;
- struct i40iw_ring rq_ring;
- struct i40iw_ring initial_ring;
- u32 qp_id;
- u32 sq_size;
- u32 rq_size;
- u32 max_sq_frag_cnt;
- u32 max_rq_frag_cnt;
- struct i40iw_qp_uk_ops ops;
- bool use_srq;
- u8 swqe_polarity;
- u8 swqe_polarity_deferred;
- u8 rwqe_polarity;
- u8 rq_wqe_size;
- u8 rq_wqe_size_multiplier;
- bool deferred_flag;
-};
-
-struct i40iw_cq_uk {
- struct i40iw_cqe *cq_base;
- u32 __iomem *cqe_alloc_reg;
- u64 *shadow_area;
- u32 cq_id;
- u32 cq_size;
- struct i40iw_ring cq_ring;
- u8 polarity;
- bool avoid_mem_cflct;
-
- struct i40iw_cq_ops ops;
-};
-
-struct i40iw_qp_uk_init_info {
- struct i40iw_qp_quanta *sq;
- struct i40iw_qp_quanta *rq;
- u32 __iomem *wqe_alloc_reg;
- u64 *shadow_area;
- struct i40iw_sq_uk_wr_trk_info *sq_wrtrk_array;
- u64 *rq_wrid_array;
- u32 *push_db;
- u64 *push_wqe;
- u32 qp_id;
- u32 sq_size;
- u32 rq_size;
- u32 max_sq_frag_cnt;
- u32 max_rq_frag_cnt;
- u32 max_inline_data;
- int abi_ver;
-};
-
-struct i40iw_cq_uk_init_info {
- u32 __iomem *cqe_alloc_reg;
- struct i40iw_cqe *cq_base;
- u64 *shadow_area;
- u32 cq_size;
- u32 cq_id;
- bool avoid_mem_cflct;
-};
-
-void i40iw_device_init_uk(struct i40iw_dev_uk *dev);
-
-void i40iw_qp_post_wr(struct i40iw_qp_uk *qp);
-u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx,
- u8 wqe_size,
- u32 total_size,
- u64 wr_id
- );
-u64 *i40iw_qp_get_next_recv_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx);
-u64 *i40iw_qp_get_next_srq_wqe(struct i40iw_srq_uk *srq, u32 *wqe_idx);
-
-enum i40iw_status_code i40iw_cq_uk_init(struct i40iw_cq_uk *cq,
- struct i40iw_cq_uk_init_info *info);
-enum i40iw_status_code i40iw_qp_uk_init(struct i40iw_qp_uk *qp,
- struct i40iw_qp_uk_init_info *info);
-
-void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq);
-enum i40iw_status_code i40iw_nop(struct i40iw_qp_uk *qp, u64 wr_id,
- bool signaled, bool post_sq);
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_sq(u32 frag_cnt, u8 *wqe_size);
-enum i40iw_status_code i40iw_fragcnt_to_wqesize_rq(u32 frag_cnt, u8 *wqe_size);
-enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size,
- u8 *wqe_size);
-enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data, u8 *shift);
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
deleted file mode 100644
index 0f5d43d1f5fc..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ /dev/null
@@ -1,1447 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-#include <net/netevent.h>
-#include <net/neighbour.h>
-#include "i40iw.h"
-
-/**
- * i40iw_arp_table - manage arp table
- * @iwdev: iwarp device
- * @ip_addr: ip address for device
- * @mac_addr: mac address ptr
- * @action: modify, delete or add
- */
-int i40iw_arp_table(struct i40iw_device *iwdev,
- u32 *ip_addr,
- bool ipv4,
- u8 *mac_addr,
- u32 action)
-{
- int arp_index;
- int err;
- u32 ip[4];
-
- if (ipv4) {
- memset(ip, 0, sizeof(ip));
- ip[0] = *ip_addr;
- } else {
- memcpy(ip, ip_addr, sizeof(ip));
- }
-
- for (arp_index = 0; (u32)arp_index < iwdev->arp_table_size; arp_index++)
- if (memcmp(iwdev->arp_table[arp_index].ip_addr, ip, sizeof(ip)) == 0)
- break;
- switch (action) {
- case I40IW_ARP_ADD:
- if (arp_index != iwdev->arp_table_size)
- return -1;
-
- arp_index = 0;
- err = i40iw_alloc_resource(iwdev, iwdev->allocated_arps,
- iwdev->arp_table_size,
- (u32 *)&arp_index,
- &iwdev->next_arp_index);
-
- if (err)
- return err;
-
- memcpy(iwdev->arp_table[arp_index].ip_addr, ip, sizeof(ip));
- ether_addr_copy(iwdev->arp_table[arp_index].mac_addr, mac_addr);
- break;
- case I40IW_ARP_RESOLVE:
- if (arp_index == iwdev->arp_table_size)
- return -1;
- break;
- case I40IW_ARP_DELETE:
- if (arp_index == iwdev->arp_table_size)
- return -1;
- memset(iwdev->arp_table[arp_index].ip_addr, 0,
- sizeof(iwdev->arp_table[arp_index].ip_addr));
- eth_zero_addr(iwdev->arp_table[arp_index].mac_addr);
- i40iw_free_resource(iwdev, iwdev->allocated_arps, arp_index);
- break;
- default:
- return -1;
- }
- return arp_index;
-}
-
-/**
- * i40iw_wr32 - write 32 bits to hw register
- * @hw: hardware information including registers
- * @reg: register offset
- * @value: vvalue to write to register
- */
-inline void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value)
-{
- writel(value, hw->hw_addr + reg);
-}
-
-/**
- * i40iw_rd32 - read a 32 bit hw register
- * @hw: hardware information including registers
- * @reg: register offset
- *
- * Return value of register content
- */
-inline u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg)
-{
- return readl(hw->hw_addr + reg);
-}
-
-/**
- * i40iw_inetaddr_event - system notifier for netdev events
- * @notfier: not used
- * @event: event for notifier
- * @ptr: if address
- */
-int i40iw_inetaddr_event(struct notifier_block *notifier,
- unsigned long event,
- void *ptr)
-{
- struct in_ifaddr *ifa = ptr;
- struct net_device *event_netdev = ifa->ifa_dev->dev;
- struct net_device *netdev;
- struct net_device *upper_dev;
- struct i40iw_device *iwdev;
- struct i40iw_handler *hdl;
- u32 local_ipaddr;
- u32 action = I40IW_ARP_ADD;
-
- hdl = i40iw_find_netdev(event_netdev);
- if (!hdl)
- return NOTIFY_DONE;
-
- iwdev = &hdl->device;
- netdev = iwdev->ldev->netdev;
- upper_dev = netdev_master_upper_dev_get(netdev);
- if (netdev != event_netdev)
- return NOTIFY_DONE;
-
- if (upper_dev)
- local_ipaddr = ntohl(
- ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
- else
- local_ipaddr = ntohl(ifa->ifa_address);
- switch (event) {
- case NETDEV_DOWN:
- action = I40IW_ARP_DELETE;
- /* Fall through */
- case NETDEV_UP:
- /* Fall through */
- case NETDEV_CHANGEADDR:
- i40iw_manage_arp_cache(iwdev,
- netdev->dev_addr,
- &local_ipaddr,
- true,
- action);
- i40iw_if_notify(iwdev, netdev, &local_ipaddr, true,
- (action == I40IW_ARP_ADD) ? true : false);
- break;
- default:
- break;
- }
- return NOTIFY_DONE;
-}
-
-/**
- * i40iw_inet6addr_event - system notifier for ipv6 netdev events
- * @notfier: not used
- * @event: event for notifier
- * @ptr: if address
- */
-int i40iw_inet6addr_event(struct notifier_block *notifier,
- unsigned long event,
- void *ptr)
-{
- struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
- struct net_device *event_netdev = ifa->idev->dev;
- struct net_device *netdev;
- struct i40iw_device *iwdev;
- struct i40iw_handler *hdl;
- u32 local_ipaddr6[4];
- u32 action = I40IW_ARP_ADD;
-
- hdl = i40iw_find_netdev(event_netdev);
- if (!hdl)
- return NOTIFY_DONE;
-
- iwdev = &hdl->device;
- netdev = iwdev->ldev->netdev;
- if (netdev != event_netdev)
- return NOTIFY_DONE;
-
- i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
- switch (event) {
- case NETDEV_DOWN:
- action = I40IW_ARP_DELETE;
- /* Fall through */
- case NETDEV_UP:
- /* Fall through */
- case NETDEV_CHANGEADDR:
- i40iw_manage_arp_cache(iwdev,
- netdev->dev_addr,
- local_ipaddr6,
- false,
- action);
- i40iw_if_notify(iwdev, netdev, local_ipaddr6, false,
- (action == I40IW_ARP_ADD) ? true : false);
- break;
- default:
- break;
- }
- return NOTIFY_DONE;
-}
-
-/**
- * i40iw_net_event - system notifier for net events
- * @notfier: not used
- * @event: event for notifier
- * @ptr: neighbor
- */
-int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void *ptr)
-{
- struct neighbour *neigh = ptr;
- struct i40iw_device *iwdev;
- struct i40iw_handler *iwhdl;
- __be32 *p;
- u32 local_ipaddr[4];
-
- switch (event) {
- case NETEVENT_NEIGH_UPDATE:
- iwhdl = i40iw_find_netdev((struct net_device *)neigh->dev);
- if (!iwhdl)
- return NOTIFY_DONE;
- iwdev = &iwhdl->device;
- p = (__be32 *)neigh->primary_key;
- i40iw_copy_ip_ntohl(local_ipaddr, p);
- if (neigh->nud_state & NUD_VALID) {
- i40iw_manage_arp_cache(iwdev,
- neigh->ha,
- local_ipaddr,
- false,
- I40IW_ARP_ADD);
-
- } else {
- i40iw_manage_arp_cache(iwdev,
- neigh->ha,
- local_ipaddr,
- false,
- I40IW_ARP_DELETE);
- }
- break;
- default:
- break;
- }
- return NOTIFY_DONE;
-}
-
-/**
- * i40iw_get_cqp_request - get cqp struct
- * @cqp: device cqp ptr
- * @wait: cqp to be used in wait mode
- */
-struct i40iw_cqp_request *i40iw_get_cqp_request(struct i40iw_cqp *cqp, bool wait)
-{
- struct i40iw_cqp_request *cqp_request = NULL;
- unsigned long flags;
-
- spin_lock_irqsave(&cqp->req_lock, flags);
- if (!list_empty(&cqp->cqp_avail_reqs)) {
- cqp_request = list_entry(cqp->cqp_avail_reqs.next,
- struct i40iw_cqp_request, list);
- list_del_init(&cqp_request->list);
- }
- spin_unlock_irqrestore(&cqp->req_lock, flags);
- if (!cqp_request) {
- cqp_request = kzalloc(sizeof(*cqp_request), GFP_ATOMIC);
- if (cqp_request) {
- cqp_request->dynamic = true;
- INIT_LIST_HEAD(&cqp_request->list);
- init_waitqueue_head(&cqp_request->waitq);
- }
- }
- if (!cqp_request) {
- i40iw_pr_err("CQP Request Fail: No Memory");
- return NULL;
- }
-
- if (wait) {
- atomic_set(&cqp_request->refcount, 2);
- cqp_request->waiting = true;
- } else {
- atomic_set(&cqp_request->refcount, 1);
- }
- return cqp_request;
-}
-
-/**
- * i40iw_free_cqp_request - free cqp request
- * @cqp: cqp ptr
- * @cqp_request: to be put back in cqp list
- */
-void i40iw_free_cqp_request(struct i40iw_cqp *cqp, struct i40iw_cqp_request *cqp_request)
-{
- unsigned long flags;
-
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- cqp_request->request_done = false;
- cqp_request->callback_fcn = NULL;
- cqp_request->waiting = false;
-
- spin_lock_irqsave(&cqp->req_lock, flags);
- list_add_tail(&cqp_request->list, &cqp->cqp_avail_reqs);
- spin_unlock_irqrestore(&cqp->req_lock, flags);
- }
-}
-
-/**
- * i40iw_put_cqp_request - dec ref count and free if 0
- * @cqp: cqp ptr
- * @cqp_request: to be put back in cqp list
- */
-void i40iw_put_cqp_request(struct i40iw_cqp *cqp,
- struct i40iw_cqp_request *cqp_request)
-{
- if (atomic_dec_and_test(&cqp_request->refcount))
- i40iw_free_cqp_request(cqp, cqp_request);
-}
-
-/**
- * i40iw_free_qp - callback after destroy cqp completes
- * @cqp_request: cqp request for destroy qp
- * @num: not used
- */
-static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num)
-{
- struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)cqp_request->param;
- struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
- struct i40iw_device *iwdev;
- u32 qp_num = iwqp->ibqp.qp_num;
-
- iwdev = iwqp->iwdev;
-
- i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
- i40iw_free_qp_resources(iwdev, iwqp, qp_num);
- i40iw_rem_devusecount(iwdev);
-}
-
-/**
- * i40iw_wait_event - wait for completion
- * @iwdev: iwarp device
- * @cqp_request: cqp request to wait
- */
-static int i40iw_wait_event(struct i40iw_device *iwdev,
- struct i40iw_cqp_request *cqp_request)
-{
- struct cqp_commands_info *info = &cqp_request->info;
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- bool cqp_error = false;
- int err_code = 0;
- int timeout_ret = 0;
-
- timeout_ret = wait_event_timeout(cqp_request->waitq,
- cqp_request->request_done,
- I40IW_EVENT_TIMEOUT);
- if (!timeout_ret) {
- i40iw_pr_err("error cqp command 0x%x timed out ret = %d\n",
- info->cqp_cmd, timeout_ret);
- err_code = -ETIME;
- if (!iwdev->reset) {
- iwdev->reset = true;
- i40iw_request_reset(iwdev);
- }
- goto done;
- }
- cqp_error = cqp_request->compl_info.error;
- if (cqp_error) {
- i40iw_pr_err("error cqp command 0x%x completion maj = 0x%x min=0x%x\n",
- info->cqp_cmd, cqp_request->compl_info.maj_err_code,
- cqp_request->compl_info.min_err_code);
- err_code = -EPROTO;
- goto done;
- }
-done:
- i40iw_put_cqp_request(iwcqp, cqp_request);
- return err_code;
-}
-
-/**
- * i40iw_handle_cqp_op - process cqp command
- * @iwdev: iwarp device
- * @cqp_request: cqp request to process
- */
-enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev,
- struct i40iw_cqp_request
- *cqp_request)
-{
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- enum i40iw_status_code status;
- struct cqp_commands_info *info = &cqp_request->info;
- int err_code = 0;
-
- if (iwdev->reset) {
- i40iw_free_cqp_request(&iwdev->cqp, cqp_request);
- return I40IW_ERR_CQP_COMPL_ERROR;
- }
-
- status = i40iw_process_cqp_cmd(dev, info);
- if (status) {
- i40iw_pr_err("error cqp command 0x%x failed\n", info->cqp_cmd);
- i40iw_free_cqp_request(&iwdev->cqp, cqp_request);
- return status;
- }
- if (cqp_request->waiting)
- err_code = i40iw_wait_event(iwdev, cqp_request);
- if (err_code)
- status = I40IW_ERR_CQP_COMPL_ERROR;
- return status;
-}
-
-/**
- * i40iw_add_devusecount - add dev refcount
- * @iwdev: dev for refcount
- */
-void i40iw_add_devusecount(struct i40iw_device *iwdev)
-{
- atomic64_inc(&iwdev->use_count);
-}
-
-/**
- * i40iw_rem_devusecount - decrement refcount for dev
- * @iwdev: device
- */
-void i40iw_rem_devusecount(struct i40iw_device *iwdev)
-{
- if (!atomic64_dec_and_test(&iwdev->use_count))
- return;
- wake_up(&iwdev->close_wq);
-}
-
-/**
- * i40iw_add_pdusecount - add pd refcount
- * @iwpd: pd for refcount
- */
-void i40iw_add_pdusecount(struct i40iw_pd *iwpd)
-{
- atomic_inc(&iwpd->usecount);
-}
-
-/**
- * i40iw_rem_pdusecount - decrement refcount for pd and free if 0
- * @iwpd: pd for refcount
- * @iwdev: iwarp device
- */
-void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev)
-{
- if (!atomic_dec_and_test(&iwpd->usecount))
- return;
- i40iw_free_resource(iwdev, iwdev->allocated_pds, iwpd->sc_pd.pd_id);
- kfree(iwpd);
-}
-
-/**
- * i40iw_add_ref - add refcount for qp
- * @ibqp: iqarp qp
- */
-void i40iw_add_ref(struct ib_qp *ibqp)
-{
- struct i40iw_qp *iwqp = (struct i40iw_qp *)ibqp;
-
- atomic_inc(&iwqp->refcount);
-}
-
-/**
- * i40iw_rem_ref - rem refcount for qp and free if 0
- * @ibqp: iqarp qp
- */
-void i40iw_rem_ref(struct ib_qp *ibqp)
-{
- struct i40iw_qp *iwqp;
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_device *iwdev;
- u32 qp_num;
- unsigned long flags;
-
- iwqp = to_iwqp(ibqp);
- iwdev = iwqp->iwdev;
- spin_lock_irqsave(&iwdev->qptable_lock, flags);
- if (!atomic_dec_and_test(&iwqp->refcount)) {
- spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
- return;
- }
-
- qp_num = iwqp->ibqp.qp_num;
- iwdev->qp_table[qp_num] = NULL;
- spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
- if (!cqp_request)
- return;
-
- cqp_request->callback_fcn = i40iw_free_qp;
- cqp_request->param = (void *)&iwqp->sc_qp;
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_QP_DESTROY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.qp_destroy.qp = &iwqp->sc_qp;
- cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
- cqp_info->in.u.qp_destroy.remove_hash_idx = true;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Destroy QP fail");
-}
-
-/**
- * i40iw_get_qp - get qp address
- * @device: iwarp device
- * @qpn: qp number
- */
-struct ib_qp *i40iw_get_qp(struct ib_device *device, int qpn)
-{
- struct i40iw_device *iwdev = to_iwdev(device);
-
- if ((qpn < IW_FIRST_QPN) || (qpn >= iwdev->max_qp))
- return NULL;
-
- return &iwdev->qp_table[qpn]->ibqp;
-}
-
-/**
- * i40iw_debug_buf - print debug msg and buffer is mask set
- * @dev: hardware control device structure
- * @mask: mask to compare if to print debug buffer
- * @buf: points buffer addr
- * @size: saize of buffer to print
- */
-void i40iw_debug_buf(struct i40iw_sc_dev *dev,
- enum i40iw_debug_flag mask,
- char *desc,
- u64 *buf,
- u32 size)
-{
- u32 i;
-
- if (!(dev->debug_mask & mask))
- return;
- i40iw_debug(dev, mask, "%s\n", desc);
- i40iw_debug(dev, mask, "starting address virt=%p phy=%llxh\n", buf,
- (unsigned long long)virt_to_phys(buf));
-
- for (i = 0; i < size; i += 8)
- i40iw_debug(dev, mask, "index %03d val: %016llx\n", i, buf[i / 8]);
-}
-
-/**
- * i40iw_get_hw_addr - return hw addr
- * @par: points to shared dev
- */
-u8 __iomem *i40iw_get_hw_addr(void *par)
-{
- struct i40iw_sc_dev *dev = (struct i40iw_sc_dev *)par;
-
- return dev->hw->hw_addr;
-}
-
-/**
- * i40iw_remove_head - return head entry and remove from list
- * @list: list for entry
- */
-void *i40iw_remove_head(struct list_head *list)
-{
- struct list_head *entry;
-
- if (list_empty(list))
- return NULL;
-
- entry = (void *)list->next;
- list_del(entry);
- return (void *)entry;
-}
-
-/**
- * i40iw_allocate_dma_mem - Memory alloc helper fn
- * @hw: pointer to the HW structure
- * @mem: ptr to mem struct to fill out
- * @size: size of memory requested
- * @alignment: what to align the allocation to
- */
-enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw,
- struct i40iw_dma_mem *mem,
- u64 size,
- u32 alignment)
-{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
-
- if (!mem)
- return I40IW_ERR_PARAM;
- mem->size = ALIGN(size, alignment);
- mem->va = dma_zalloc_coherent(&pcidev->dev, mem->size,
- (dma_addr_t *)&mem->pa, GFP_KERNEL);
- if (!mem->va)
- return I40IW_ERR_NO_MEMORY;
- return 0;
-}
-
-/**
- * i40iw_free_dma_mem - Memory free helper fn
- * @hw: pointer to the HW structure
- * @mem: ptr to mem struct to free
- */
-void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem)
-{
- struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context;
-
- if (!mem || !mem->va)
- return;
-
- dma_free_coherent(&pcidev->dev, mem->size,
- mem->va, (dma_addr_t)mem->pa);
- mem->va = NULL;
-}
-
-/**
- * i40iw_allocate_virt_mem - virtual memory alloc helper fn
- * @hw: pointer to the HW structure
- * @mem: ptr to mem struct to fill out
- * @size: size of memory requested
- */
-enum i40iw_status_code i40iw_allocate_virt_mem(struct i40iw_hw *hw,
- struct i40iw_virt_mem *mem,
- u32 size)
-{
- if (!mem)
- return I40IW_ERR_PARAM;
-
- mem->size = size;
- mem->va = kzalloc(size, GFP_KERNEL);
-
- if (mem->va)
- return 0;
- else
- return I40IW_ERR_NO_MEMORY;
-}
-
-/**
- * i40iw_free_virt_mem - virtual memory free helper fn
- * @hw: pointer to the HW structure
- * @mem: ptr to mem struct to free
- */
-enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw,
- struct i40iw_virt_mem *mem)
-{
- if (!mem)
- return I40IW_ERR_PARAM;
- /*
- * mem->va points to the parent of mem, so both mem and mem->va
- * can not be touched once mem->va is freed
- */
- kfree(mem->va);
- return 0;
-}
-
-/**
- * i40iw_cqp_sds_cmd - create cqp command for sd
- * @dev: hardware control device structure
- * @sd_info: information for sd cqp
- *
- */
-enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_update_sds_info *sdinfo)
-{
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
- cqp_info = &cqp_request->info;
- memcpy(&cqp_info->in.u.update_pe_sds.info, sdinfo,
- sizeof(cqp_info->in.u.update_pe_sds.info));
- cqp_info->cqp_cmd = OP_UPDATE_PE_SDS;
- cqp_info->post_sq = 1;
- cqp_info->in.u.update_pe_sds.dev = dev;
- cqp_info->in.u.update_pe_sds.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Update SD's fail");
- return status;
-}
-
-/**
- * i40iw_qp_suspend_resume - cqp command for suspend/resume
- * @dev: hardware control device structure
- * @qp: hardware control qp
- * @suspend: flag if suspend or resume
- */
-void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
- struct i40iw_cqp_request *cqp_request;
- struct i40iw_sc_cqp *cqp = dev->cqp;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
- if (!cqp_request)
- return;
-
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = (suspend) ? OP_SUSPEND : OP_RESUME;
- cqp_info->in.u.suspend_resume.cqp = cqp;
- cqp_info->in.u.suspend_resume.qp = qp;
- cqp_info->in.u.suspend_resume.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP QP Suspend/Resume fail");
-}
-
-/**
- * i40iw_qp_mss_modify - modify mss for qp
- * @dev: hardware control device structure
- * @qp: hardware control qp
- */
-void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
- struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
- struct i40iw_modify_qp_info info;
-
- memset(&info, 0, sizeof(info));
- info.mss_change = true;
- info.new_mss = qp->vsi->mss;
- i40iw_hw_modify_qp(iwdev, iwqp, &info, false);
-}
-
-/**
- * i40iw_term_modify_qp - modify qp for term message
- * @qp: hardware control qp
- * @next_state: qp's next state
- * @term: terminate code
- * @term_len: length
- */
-void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len)
-{
- struct i40iw_qp *iwqp;
-
- iwqp = (struct i40iw_qp *)qp->back_qp;
- i40iw_next_iw_state(iwqp, next_state, 0, term, term_len);
-};
-
-/**
- * i40iw_terminate_done - after terminate is completed
- * @qp: hardware control qp
- * @timeout_occurred: indicates if terminate timer expired
- */
-void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred)
-{
- struct i40iw_qp *iwqp;
- u32 next_iwarp_state = I40IW_QP_STATE_ERROR;
- u8 hte = 0;
- bool first_time;
- unsigned long flags;
-
- iwqp = (struct i40iw_qp *)qp->back_qp;
- spin_lock_irqsave(&iwqp->lock, flags);
- if (iwqp->hte_added) {
- iwqp->hte_added = 0;
- hte = 1;
- }
- first_time = !(qp->term_flags & I40IW_TERM_DONE);
- qp->term_flags |= I40IW_TERM_DONE;
- spin_unlock_irqrestore(&iwqp->lock, flags);
- if (first_time) {
- if (!timeout_occurred)
- i40iw_terminate_del_timer(qp);
- else
- next_iwarp_state = I40IW_QP_STATE_CLOSING;
-
- i40iw_next_iw_state(iwqp, next_iwarp_state, hte, 0, 0);
- i40iw_cm_disconn(iwqp);
- }
-}
-
-/**
- * i40iw_terminate_imeout - timeout happened
- * @context: points to iwarp qp
- */
-static void i40iw_terminate_timeout(unsigned long context)
-{
- struct i40iw_qp *iwqp = (struct i40iw_qp *)context;
- struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp;
-
- i40iw_terminate_done(qp, 1);
- i40iw_rem_ref(&iwqp->ibqp);
-}
-
-/**
- * i40iw_terminate_start_timer - start terminate timeout
- * @qp: hardware control qp
- */
-void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp)
-{
- struct i40iw_qp *iwqp;
-
- iwqp = (struct i40iw_qp *)qp->back_qp;
- i40iw_add_ref(&iwqp->ibqp);
- init_timer(&iwqp->terminate_timer);
- iwqp->terminate_timer.function = i40iw_terminate_timeout;
- iwqp->terminate_timer.expires = jiffies + HZ;
- iwqp->terminate_timer.data = (unsigned long)iwqp;
- add_timer(&iwqp->terminate_timer);
-}
-
-/**
- * i40iw_terminate_del_timer - delete terminate timeout
- * @qp: hardware control qp
- */
-void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp)
-{
- struct i40iw_qp *iwqp;
-
- iwqp = (struct i40iw_qp *)qp->back_qp;
- if (del_timer(&iwqp->terminate_timer))
- i40iw_rem_ref(&iwqp->ibqp);
-}
-
-/**
- * i40iw_cqp_generic_worker - generic worker for cqp
- * @work: work pointer
- */
-static void i40iw_cqp_generic_worker(struct work_struct *work)
-{
- struct i40iw_virtchnl_work_info *work_info =
- &((struct virtchnl_work *)work)->work_info;
-
- if (work_info->worker_vf_dev)
- work_info->callback_fcn(work_info->worker_vf_dev);
-}
-
-/**
- * i40iw_cqp_spawn_worker - spawn worket thread
- * @iwdev: device struct pointer
- * @work_info: work request info
- * @iw_vf_idx: virtual function index
- */
-void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
- struct i40iw_virtchnl_work_info *work_info,
- u32 iw_vf_idx)
-{
- struct virtchnl_work *work;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- work = &iwdev->virtchnl_w[iw_vf_idx];
- memcpy(&work->work_info, work_info, sizeof(*work_info));
- INIT_WORK(&work->work, i40iw_cqp_generic_worker);
- queue_work(iwdev->virtchnl_wq, &work->work);
-}
-
-/**
- * i40iw_cqp_manage_hmc_fcn_worker -
- * @work: work pointer for hmc info
- */
-static void i40iw_cqp_manage_hmc_fcn_worker(struct work_struct *work)
-{
- struct i40iw_cqp_request *cqp_request =
- ((struct virtchnl_work *)work)->cqp_request;
- struct i40iw_ccq_cqe_info ccq_cqe_info;
- struct i40iw_hmc_fcn_info *hmcfcninfo =
- &cqp_request->info.in.u.manage_hmc_pm.info;
- struct i40iw_device *iwdev =
- (struct i40iw_device *)cqp_request->info.in.u.manage_hmc_pm.dev->back_dev;
-
- ccq_cqe_info.cqp = NULL;
- ccq_cqe_info.maj_err_code = cqp_request->compl_info.maj_err_code;
- ccq_cqe_info.min_err_code = cqp_request->compl_info.min_err_code;
- ccq_cqe_info.op_code = cqp_request->compl_info.op_code;
- ccq_cqe_info.op_ret_val = cqp_request->compl_info.op_ret_val;
- ccq_cqe_info.scratch = 0;
- ccq_cqe_info.error = cqp_request->compl_info.error;
- hmcfcninfo->callback_fcn(cqp_request->info.in.u.manage_hmc_pm.dev,
- hmcfcninfo->cqp_callback_param, &ccq_cqe_info);
- i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
-}
-
-/**
- * i40iw_cqp_manage_hmc_fcn_callback - called function after cqp completion
- * @cqp_request: cqp request info struct for hmc fun
- * @unused: unused param of callback
- */
-static void i40iw_cqp_manage_hmc_fcn_callback(struct i40iw_cqp_request *cqp_request,
- u32 unused)
-{
- struct virtchnl_work *work;
- struct i40iw_hmc_fcn_info *hmcfcninfo =
- &cqp_request->info.in.u.manage_hmc_pm.info;
- struct i40iw_device *iwdev =
- (struct i40iw_device *)cqp_request->info.in.u.manage_hmc_pm.dev->
- back_dev;
-
- if (hmcfcninfo && hmcfcninfo->callback_fcn) {
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s1\n", __func__);
- atomic_inc(&cqp_request->refcount);
- work = &iwdev->virtchnl_w[hmcfcninfo->iw_vf_idx];
- work->cqp_request = cqp_request;
- INIT_WORK(&work->work, i40iw_cqp_manage_hmc_fcn_worker);
- queue_work(iwdev->virtchnl_wq, &work->work);
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s2\n", __func__);
- } else {
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s: Something wrong\n", __func__);
- }
-}
-
-/**
- * i40iw_cqp_manage_hmc_fcn_cmd - issue cqp command to manage hmc
- * @dev: hardware control device structure
- * @hmcfcninfo: info for hmc
- */
-enum i40iw_status_code i40iw_cqp_manage_hmc_fcn_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_hmc_fcn_info *hmcfcninfo)
-{
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_HMC, "%s\n", __func__);
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
- cqp_info = &cqp_request->info;
- cqp_request->callback_fcn = i40iw_cqp_manage_hmc_fcn_callback;
- cqp_request->param = hmcfcninfo;
- memcpy(&cqp_info->in.u.manage_hmc_pm.info, hmcfcninfo,
- sizeof(*hmcfcninfo));
- cqp_info->in.u.manage_hmc_pm.dev = dev;
- cqp_info->cqp_cmd = OP_MANAGE_HMC_PM_FUNC_TABLE;
- cqp_info->post_sq = 1;
- cqp_info->in.u.manage_hmc_pm.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Manage HMC fail");
- return status;
-}
-
-/**
- * i40iw_cqp_query_fpm_values_cmd - send cqp command for fpm
- * @iwdev: function device struct
- * @values_mem: buffer for fpm
- * @hmc_fn_id: function id for fpm
- */
-enum i40iw_status_code i40iw_cqp_query_fpm_values_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_dma_mem *values_mem,
- u8 hmc_fn_id)
-{
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
- cqp_info = &cqp_request->info;
- cqp_request->param = NULL;
- cqp_info->in.u.query_fpm_values.cqp = dev->cqp;
- cqp_info->in.u.query_fpm_values.fpm_values_pa = values_mem->pa;
- cqp_info->in.u.query_fpm_values.fpm_values_va = values_mem->va;
- cqp_info->in.u.query_fpm_values.hmc_fn_id = hmc_fn_id;
- cqp_info->cqp_cmd = OP_QUERY_FPM_VALUES;
- cqp_info->post_sq = 1;
- cqp_info->in.u.query_fpm_values.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Query FPM fail");
- return status;
-}
-
-/**
- * i40iw_cqp_commit_fpm_values_cmd - commit fpm values in hw
- * @dev: hardware control device structure
- * @values_mem: buffer with fpm values
- * @hmc_fn_id: function id for fpm
- */
-enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_dma_mem *values_mem,
- u8 hmc_fn_id)
-{
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
- cqp_info = &cqp_request->info;
- cqp_request->param = NULL;
- cqp_info->in.u.commit_fpm_values.cqp = dev->cqp;
- cqp_info->in.u.commit_fpm_values.fpm_values_pa = values_mem->pa;
- cqp_info->in.u.commit_fpm_values.fpm_values_va = values_mem->va;
- cqp_info->in.u.commit_fpm_values.hmc_fn_id = hmc_fn_id;
- cqp_info->cqp_cmd = OP_COMMIT_FPM_VALUES;
- cqp_info->post_sq = 1;
- cqp_info->in.u.commit_fpm_values.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Commit FPM fail");
- return status;
-}
-
-/**
- * i40iw_vf_wait_vchnl_resp - wait for channel msg
- * @iwdev: function's device struct
- */
-enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev)
-{
- struct i40iw_device *iwdev = dev->back_dev;
- int timeout_ret;
-
- i40iw_debug(dev, I40IW_DEBUG_VIRT, "%s[%u] dev %p, iwdev %p\n",
- __func__, __LINE__, dev, iwdev);
-
- atomic_set(&iwdev->vchnl_msgs, 2);
- timeout_ret = wait_event_timeout(iwdev->vchnl_waitq,
- (atomic_read(&iwdev->vchnl_msgs) == 1),
- I40IW_VCHNL_EVENT_TIMEOUT);
- atomic_dec(&iwdev->vchnl_msgs);
- if (!timeout_ret) {
- i40iw_pr_err("virt channel completion timeout = 0x%x\n", timeout_ret);
- atomic_set(&iwdev->vchnl_msgs, 0);
- dev->vchnl_up = false;
- return I40IW_ERR_TIMEOUT;
- }
- wake_up(&dev->vf_reqs);
- return 0;
-}
-
-/**
- * i40iw_cqp_cq_create_cmd - create a cq for the cqp
- * @dev: device pointer
- * @cq: pointer to created cq
- */
-enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_sc_cq *cq)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status;
-
- cqp_request = i40iw_get_cqp_request(iwcqp, true);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
-
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_CQ_CREATE;
- cqp_info->post_sq = 1;
- cqp_info->in.u.cq_create.cq = cq;
- cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Create QP fail");
-
- return status;
-}
-
-/**
- * i40iw_cqp_qp_create_cmd - create a qp for the cqp
- * @dev: device pointer
- * @qp: pointer to created qp
- */
-enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev,
- struct i40iw_sc_qp *qp)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_create_qp_info *qp_info;
- enum i40iw_status_code status;
-
- cqp_request = i40iw_get_cqp_request(iwcqp, true);
- if (!cqp_request)
- return I40IW_ERR_NO_MEMORY;
-
- cqp_info = &cqp_request->info;
- qp_info = &cqp_request->info.in.u.qp_create.info;
-
- memset(qp_info, 0, sizeof(*qp_info));
-
- qp_info->cq_num_valid = true;
- qp_info->next_iwarp_state = I40IW_QP_STATE_RTS;
-
- cqp_info->cqp_cmd = OP_QP_CREATE;
- cqp_info->post_sq = 1;
- cqp_info->in.u.qp_create.qp = qp;
- cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP QP create fail");
- return status;
-}
-
-/**
- * i40iw_cqp_cq_destroy_cmd - destroy the cqp cq
- * @dev: device pointer
- * @cq: pointer to cq
- */
-void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- i40iw_cq_wq_destroy(iwdev, cq);
-}
-
-/**
- * i40iw_cqp_qp_destroy_cmd - destroy the cqp
- * @dev: device pointer
- * @qp: pointer to qp
- */
-void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status;
-
- cqp_request = i40iw_get_cqp_request(iwcqp, true);
- if (!cqp_request)
- return;
-
- cqp_info = &cqp_request->info;
- memset(cqp_info, 0, sizeof(*cqp_info));
-
- cqp_info->cqp_cmd = OP_QP_DESTROY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.qp_destroy.qp = qp;
- cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
- cqp_info->in.u.qp_destroy.remove_hash_idx = true;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP QP_DESTROY fail");
-}
-
-
-/**
- * i40iw_ieq_mpa_crc_ae - generate AE for crc error
- * @dev: hardware control device structure
- * @qp: hardware control qp
- */
-void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
-{
- struct i40iw_qp_flush_info info;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- i40iw_debug(dev, I40IW_DEBUG_AEQ, "%s entered\n", __func__);
- memset(&info, 0, sizeof(info));
- info.ae_code = I40IW_AE_LLP_RECEIVED_MPA_CRC_ERROR;
- info.generate_ae = true;
- info.ae_source = 0x3;
- (void)i40iw_hw_flush_wqes(iwdev, qp, &info, false);
-}
-
-/**
- * i40iw_init_hash_desc - initialize hash for crc calculation
- * @desc: cryption type
- */
-enum i40iw_status_code i40iw_init_hash_desc(struct shash_desc **desc)
-{
- struct crypto_shash *tfm;
- struct shash_desc *tdesc;
-
- tfm = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(tfm))
- return I40IW_ERR_MPA_CRC;
-
- tdesc = kzalloc(sizeof(*tdesc) + crypto_shash_descsize(tfm),
- GFP_KERNEL);
- if (!tdesc) {
- crypto_free_shash(tfm);
- return I40IW_ERR_MPA_CRC;
- }
- tdesc->tfm = tfm;
- *desc = tdesc;
-
- return 0;
-}
-
-/**
- * i40iw_free_hash_desc - free hash desc
- * @desc: to be freed
- */
-void i40iw_free_hash_desc(struct shash_desc *desc)
-{
- if (desc) {
- crypto_free_shash(desc->tfm);
- kfree(desc);
- }
-}
-
-/**
- * i40iw_alloc_query_fpm_buf - allocate buffer for fpm
- * @dev: hardware control device structure
- * @mem: buffer ptr for fpm to be allocated
- * @return: memory allocation status
- */
-enum i40iw_status_code i40iw_alloc_query_fpm_buf(struct i40iw_sc_dev *dev,
- struct i40iw_dma_mem *mem)
-{
- enum i40iw_status_code status;
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
-
- status = i40iw_obj_aligned_mem(iwdev, mem, I40IW_QUERY_FPM_BUF_SIZE,
- I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK);
- return status;
-}
-
-/**
- * i40iw_ieq_check_mpacrc - check if mpa crc is OK
- * @desc: desc for hash
- * @addr: address of buffer for crc
- * @length: length of buffer
- * @value: value to be compared
- */
-enum i40iw_status_code i40iw_ieq_check_mpacrc(struct shash_desc *desc,
- void *addr,
- u32 length,
- u32 value)
-{
- u32 crc = 0;
- int ret;
- enum i40iw_status_code ret_code = 0;
-
- crypto_shash_init(desc);
- ret = crypto_shash_update(desc, addr, length);
- if (!ret)
- crypto_shash_final(desc, (u8 *)&crc);
- if (crc != value) {
- i40iw_pr_err("mpa crc check fail\n");
- ret_code = I40IW_ERR_MPA_CRC;
- }
- return ret_code;
-}
-
-/**
- * i40iw_ieq_get_qp - get qp based on quad in puda buffer
- * @dev: hardware control device structure
- * @buf: receive puda buffer on exception q
- */
-struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev,
- struct i40iw_puda_buf *buf)
-{
- struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
- struct i40iw_qp *iwqp;
- struct i40iw_cm_node *cm_node;
- u32 loc_addr[4], rem_addr[4];
- u16 loc_port, rem_port;
- struct ipv6hdr *ip6h;
- struct iphdr *iph = (struct iphdr *)buf->iph;
- struct tcphdr *tcph = (struct tcphdr *)buf->tcph;
-
- if (iph->version == 4) {
- memset(loc_addr, 0, sizeof(loc_addr));
- loc_addr[0] = ntohl(iph->daddr);
- memset(rem_addr, 0, sizeof(rem_addr));
- rem_addr[0] = ntohl(iph->saddr);
- } else {
- ip6h = (struct ipv6hdr *)buf->iph;
- i40iw_copy_ip_ntohl(loc_addr, ip6h->daddr.in6_u.u6_addr32);
- i40iw_copy_ip_ntohl(rem_addr, ip6h->saddr.in6_u.u6_addr32);
- }
- loc_port = ntohs(tcph->dest);
- rem_port = ntohs(tcph->source);
-
- cm_node = i40iw_find_node(&iwdev->cm_core, rem_port, rem_addr, loc_port,
- loc_addr, false);
- if (!cm_node)
- return NULL;
- iwqp = cm_node->iwqp;
- return &iwqp->sc_qp;
-}
-
-/**
- * i40iw_ieq_update_tcpip_info - update tcpip in the buffer
- * @buf: puda to update
- * @length: length of buffer
- * @seqnum: seq number for tcp
- */
-void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length, u32 seqnum)
-{
- struct tcphdr *tcph;
- struct iphdr *iph;
- u16 iphlen;
- u16 packetsize;
- u8 *addr = (u8 *)buf->mem.va;
-
- iphlen = (buf->ipv4) ? 20 : 40;
- iph = (struct iphdr *)(addr + buf->maclen);
- tcph = (struct tcphdr *)(addr + buf->maclen + iphlen);
- packetsize = length + buf->tcphlen + iphlen;
-
- iph->tot_len = htons(packetsize);
- tcph->seq = htonl(seqnum);
-}
-
-/**
- * i40iw_puda_get_tcpip_info - get tcpip info from puda buffer
- * @info: to get information
- * @buf: puda buffer
- */
-enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_info *info,
- struct i40iw_puda_buf *buf)
-{
- struct iphdr *iph;
- struct ipv6hdr *ip6h;
- struct tcphdr *tcph;
- u16 iphlen;
- u16 pkt_len;
- u8 *mem = (u8 *)buf->mem.va;
- struct ethhdr *ethh = (struct ethhdr *)buf->mem.va;
-
- if (ethh->h_proto == htons(0x8100)) {
- info->vlan_valid = true;
- buf->vlan_id = ntohs(((struct vlan_ethhdr *)ethh)->h_vlan_TCI) & VLAN_VID_MASK;
- }
- buf->maclen = (info->vlan_valid) ? 18 : 14;
- iphlen = (info->l3proto) ? 40 : 20;
- buf->ipv4 = (info->l3proto) ? false : true;
- buf->iph = mem + buf->maclen;
- iph = (struct iphdr *)buf->iph;
-
- buf->tcph = buf->iph + iphlen;
- tcph = (struct tcphdr *)buf->tcph;
-
- if (buf->ipv4) {
- pkt_len = ntohs(iph->tot_len);
- } else {
- ip6h = (struct ipv6hdr *)buf->iph;
- pkt_len = ntohs(ip6h->payload_len) + iphlen;
- }
-
- buf->totallen = pkt_len + buf->maclen;
-
- if (info->payload_len < buf->totallen) {
- i40iw_pr_err("payload_len = 0x%x totallen expected0x%x\n",
- info->payload_len, buf->totallen);
- return I40IW_ERR_INVALID_SIZE;
- }
-
- buf->tcphlen = (tcph->doff) << 2;
- buf->datalen = pkt_len - iphlen - buf->tcphlen;
- buf->data = (buf->datalen) ? buf->tcph + buf->tcphlen : NULL;
- buf->hdrlen = buf->maclen + iphlen + buf->tcphlen;
- buf->seqnum = ntohl(tcph->seq);
- return 0;
-}
-
-/**
- * i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats
- * @vsi: pointer to the vsi structure
- */
-static void i40iw_hw_stats_timeout(unsigned long vsi)
-{
- struct i40iw_sc_vsi *sc_vsi = (struct i40iw_sc_vsi *)vsi;
- struct i40iw_sc_dev *pf_dev = sc_vsi->dev;
- struct i40iw_vsi_pestat *pf_devstat = sc_vsi->pestat;
- struct i40iw_vsi_pestat *vf_devstat = NULL;
- u16 iw_vf_idx;
- unsigned long flags;
-
- /*PF*/
- i40iw_hw_stats_read_all(pf_devstat, &pf_devstat->hw_stats);
-
- for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {
- spin_lock_irqsave(&pf_devstat->lock, flags);
- if (pf_dev->vf_dev[iw_vf_idx]) {
- if (pf_dev->vf_dev[iw_vf_idx]->stats_initialized) {
- vf_devstat = &pf_dev->vf_dev[iw_vf_idx]->pestat;
- i40iw_hw_stats_read_all(vf_devstat, &vf_devstat->hw_stats);
- }
- }
- spin_unlock_irqrestore(&pf_devstat->lock, flags);
- }
-
- mod_timer(&pf_devstat->stats_timer,
- jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
-}
-
-/**
- * i40iw_hw_stats_start_timer - Start periodic stats timer
- * @vsi: pointer to the vsi structure
- */
-void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi)
-{
- struct i40iw_vsi_pestat *devstat = vsi->pestat;
-
- init_timer(&devstat->stats_timer);
- devstat->stats_timer.function = i40iw_hw_stats_timeout;
- devstat->stats_timer.data = (unsigned long)vsi;
- mod_timer(&devstat->stats_timer,
- jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
-}
-
-/**
- * i40iw_hw_stats_stop_timer - Delete periodic stats timer
- * @vsi: pointer to the vsi structure
- */
-void i40iw_hw_stats_stop_timer(struct i40iw_sc_vsi *vsi)
-{
- struct i40iw_vsi_pestat *devstat = vsi->pestat;
-
- del_timer_sync(&devstat->stats_timer);
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
deleted file mode 100644
index 29e97df9e1a7..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ /dev/null
@@ -1,2909 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/random.h>
-#include <linux/highmem.h>
-#include <linux/time.h>
-#include <linux/hugetlb.h>
-#include <asm/byteorder.h>
-#include <net/ip.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/iw_cm.h>
-#include <rdma/ib_user_verbs.h>
-#include <rdma/ib_umem.h>
-#include "i40iw.h"
-
-/**
- * i40iw_query_device - get device attributes
- * @ibdev: device pointer from stack
- * @props: returning device attributes
- * @udata: user data
- */
-static int i40iw_query_device(struct ib_device *ibdev,
- struct ib_device_attr *props,
- struct ib_udata *udata)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
-
- if (udata->inlen || udata->outlen)
- return -EINVAL;
- memset(props, 0, sizeof(*props));
- ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr);
- props->fw_ver = I40IW_FW_VERSION;
- props->device_cap_flags = iwdev->device_cap_flags;
- props->vendor_id = iwdev->ldev->pcidev->vendor;
- props->vendor_part_id = iwdev->ldev->pcidev->device;
- props->hw_ver = (u32)iwdev->sc_dev.hw_rev;
- props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
- props->max_qp = iwdev->max_qp - iwdev->used_qps;
- props->max_qp_wr = (I40IW_MAX_WQ_ENTRIES >> 2) - 1;
- props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
- props->max_cq = iwdev->max_cq - iwdev->used_cqs;
- props->max_cqe = iwdev->max_cqe;
- props->max_mr = iwdev->max_mr - iwdev->used_mrs;
- props->max_pd = iwdev->max_pd - iwdev->used_pds;
- props->max_sge_rd = I40IW_MAX_SGE_RD;
- props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE;
- props->max_qp_init_rd_atom = props->max_qp_rd_atom;
- props->atomic_cap = IB_ATOMIC_NONE;
- props->max_map_per_fmr = 1;
- props->max_fast_reg_page_list_len = I40IW_MAX_PAGES_PER_FMR;
- return 0;
-}
-
-/**
- * i40iw_query_port - get port attrubutes
- * @ibdev: device pointer from stack
- * @port: port number for query
- * @props: returning device attributes
- */
-static int i40iw_query_port(struct ib_device *ibdev,
- u8 port,
- struct ib_port_attr *props)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct net_device *netdev = iwdev->netdev;
-
- memset(props, 0, sizeof(*props));
-
- props->max_mtu = IB_MTU_4096;
- if (netdev->mtu >= 4096)
- props->active_mtu = IB_MTU_4096;
- else if (netdev->mtu >= 2048)
- props->active_mtu = IB_MTU_2048;
- else if (netdev->mtu >= 1024)
- props->active_mtu = IB_MTU_1024;
- else if (netdev->mtu >= 512)
- props->active_mtu = IB_MTU_512;
- else
- props->active_mtu = IB_MTU_256;
-
- props->lid = 1;
- if (netif_carrier_ok(iwdev->netdev))
- props->state = IB_PORT_ACTIVE;
- else
- props->state = IB_PORT_DOWN;
- props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
- IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
- props->gid_tbl_len = 1;
- props->pkey_tbl_len = 1;
- props->active_width = IB_WIDTH_4X;
- props->active_speed = 1;
- props->max_msg_sz = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
- return 0;
-}
-
-/**
- * i40iw_alloc_ucontext - Allocate the user context data structure
- * @ibdev: device pointer from stack
- * @udata: user data
- *
- * This keeps track of all objects associated with a particular
- * user-mode client.
- */
-static struct ib_ucontext *i40iw_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct i40iw_alloc_ucontext_req req;
- struct i40iw_alloc_ucontext_resp uresp;
- struct i40iw_ucontext *ucontext;
-
- if (ib_copy_from_udata(&req, udata, sizeof(req)))
- return ERR_PTR(-EINVAL);
-
- if (req.userspace_ver < 4 || req.userspace_ver > I40IW_ABI_VER) {
- i40iw_pr_err("Unsupported provider library version %u.\n", req.userspace_ver);
- return ERR_PTR(-EINVAL);
- }
-
- memset(&uresp, 0, sizeof(uresp));
- uresp.max_qps = iwdev->max_qp;
- uresp.max_pds = iwdev->max_pd;
- uresp.wq_size = iwdev->max_qp_wr * 2;
- uresp.kernel_ver = req.userspace_ver;
-
- ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL);
- if (!ucontext)
- return ERR_PTR(-ENOMEM);
-
- ucontext->iwdev = iwdev;
- ucontext->abi_ver = req.userspace_ver;
-
- if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
- kfree(ucontext);
- return ERR_PTR(-EFAULT);
- }
-
- INIT_LIST_HEAD(&ucontext->cq_reg_mem_list);
- spin_lock_init(&ucontext->cq_reg_mem_list_lock);
- INIT_LIST_HEAD(&ucontext->qp_reg_mem_list);
- spin_lock_init(&ucontext->qp_reg_mem_list_lock);
-
- return &ucontext->ibucontext;
-}
-
-/**
- * i40iw_dealloc_ucontext - deallocate the user context data structure
- * @context: user context created during alloc
- */
-static int i40iw_dealloc_ucontext(struct ib_ucontext *context)
-{
- struct i40iw_ucontext *ucontext = to_ucontext(context);
- unsigned long flags;
-
- spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
- if (!list_empty(&ucontext->cq_reg_mem_list)) {
- spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
- return -EBUSY;
- }
- spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
- spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
- if (!list_empty(&ucontext->qp_reg_mem_list)) {
- spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
- return -EBUSY;
- }
- spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
-
- kfree(ucontext);
- return 0;
-}
-
-/**
- * i40iw_mmap - user memory map
- * @context: context created during alloc
- * @vma: kernel info for user memory map
- */
-static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
- struct i40iw_ucontext *ucontext;
- u64 db_addr_offset;
- u64 push_offset;
-
- ucontext = to_ucontext(context);
- if (ucontext->iwdev->sc_dev.is_pf) {
- db_addr_offset = I40IW_DB_ADDR_OFFSET;
- push_offset = I40IW_PUSH_OFFSET;
- if (vma->vm_pgoff)
- vma->vm_pgoff += I40IW_PF_FIRST_PUSH_PAGE_INDEX - 1;
- } else {
- db_addr_offset = I40IW_VF_DB_ADDR_OFFSET;
- push_offset = I40IW_VF_PUSH_OFFSET;
- if (vma->vm_pgoff)
- vma->vm_pgoff += I40IW_VF_FIRST_PUSH_PAGE_INDEX - 1;
- }
-
- vma->vm_pgoff += db_addr_offset >> PAGE_SHIFT;
-
- if (vma->vm_pgoff == (db_addr_offset >> PAGE_SHIFT)) {
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- vma->vm_private_data = ucontext;
- } else {
- if ((vma->vm_pgoff - (push_offset >> PAGE_SHIFT)) % 2)
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- else
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
- }
-
- if (io_remap_pfn_range(vma, vma->vm_start,
- vma->vm_pgoff + (pci_resource_start(ucontext->iwdev->ldev->pcidev, 0) >> PAGE_SHIFT),
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- return 0;
-}
-
-/**
- * i40iw_alloc_push_page - allocate a push page for qp
- * @iwdev: iwarp device
- * @qp: hardware control qp
- */
-static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp)
-{
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status;
-
- if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX)
- return;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return;
-
- atomic_inc(&cqp_request->refcount);
-
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
- cqp_info->post_sq = 1;
-
- cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
- cqp_info->in.u.manage_push_page.info.free_page = 0;
- cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
-
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (!status)
- qp->push_idx = cqp_request->compl_info.op_ret_val;
- else
- i40iw_pr_err("CQP-OP Push page fail");
- i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
-}
-
-/**
- * i40iw_dealloc_push_page - free a push page for qp
- * @iwdev: iwarp device
- * @qp: hardware control qp
- */
-static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp)
-{
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- enum i40iw_status_code status;
-
- if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX)
- return;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
- if (!cqp_request)
- return;
-
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
- cqp_info->post_sq = 1;
-
- cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx;
- cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
- cqp_info->in.u.manage_push_page.info.free_page = 1;
- cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
- cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
-
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (!status)
- qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
- else
- i40iw_pr_err("CQP-OP Push page fail");
-}
-
-/**
- * i40iw_alloc_pd - allocate protection domain
- * @ibdev: device pointer from stack
- * @context: user context created during alloc
- * @udata: user data
- */
-static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
-{
- struct i40iw_pd *iwpd;
- struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_alloc_pd_resp uresp;
- struct i40iw_sc_pd *sc_pd;
- struct i40iw_ucontext *ucontext;
- u32 pd_id = 0;
- int err;
-
- if (iwdev->closing)
- return ERR_PTR(-ENODEV);
-
- err = i40iw_alloc_resource(iwdev, iwdev->allocated_pds,
- iwdev->max_pd, &pd_id, &iwdev->next_pd);
- if (err) {
- i40iw_pr_err("alloc resource failed\n");
- return ERR_PTR(err);
- }
-
- iwpd = kzalloc(sizeof(*iwpd), GFP_KERNEL);
- if (!iwpd) {
- err = -ENOMEM;
- goto free_res;
- }
-
- sc_pd = &iwpd->sc_pd;
-
- if (context) {
- ucontext = to_ucontext(context);
- dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id, ucontext->abi_ver);
- memset(&uresp, 0, sizeof(uresp));
- uresp.pd_id = pd_id;
- if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
- err = -EFAULT;
- goto error;
- }
- } else {
- dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id, -1);
- }
-
- i40iw_add_pdusecount(iwpd);
- return &iwpd->ibpd;
-error:
- kfree(iwpd);
-free_res:
- i40iw_free_resource(iwdev, iwdev->allocated_pds, pd_id);
- return ERR_PTR(err);
-}
-
-/**
- * i40iw_dealloc_pd - deallocate pd
- * @ibpd: ptr of pd to be deallocated
- */
-static int i40iw_dealloc_pd(struct ib_pd *ibpd)
-{
- struct i40iw_pd *iwpd = to_iwpd(ibpd);
- struct i40iw_device *iwdev = to_iwdev(ibpd->device);
-
- i40iw_rem_pdusecount(iwpd, iwdev);
- return 0;
-}
-
-/**
- * i40iw_qp_roundup - return round up qp ring size
- * @wr_ring_size: ring size to round up
- */
-static int i40iw_qp_roundup(u32 wr_ring_size)
-{
- int scount = 1;
-
- if (wr_ring_size < I40IWQP_SW_MIN_WQSIZE)
- wr_ring_size = I40IWQP_SW_MIN_WQSIZE;
-
- for (wr_ring_size--; scount <= 16; scount *= 2)
- wr_ring_size |= wr_ring_size >> scount;
- return ++wr_ring_size;
-}
-
-/**
- * i40iw_get_pbl - Retrieve pbl from a list given a virtual
- * address
- * @va: user virtual address
- * @pbl_list: pbl list to search in (QP's or CQ's)
- */
-static struct i40iw_pbl *i40iw_get_pbl(unsigned long va,
- struct list_head *pbl_list)
-{
- struct i40iw_pbl *iwpbl;
-
- list_for_each_entry(iwpbl, pbl_list, list) {
- if (iwpbl->user_base == va) {
- list_del(&iwpbl->list);
- return iwpbl;
- }
- }
- return NULL;
-}
-
-/**
- * i40iw_free_qp_resources - free up memory resources for qp
- * @iwdev: iwarp device
- * @iwqp: qp ptr (user or kernel)
- * @qp_num: qp number assigned
- */
-void i40iw_free_qp_resources(struct i40iw_device *iwdev,
- struct i40iw_qp *iwqp,
- u32 qp_num)
-{
- i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp);
- if (qp_num)
- i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num);
- i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->q2_ctx_mem);
- i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->kqp.dma_mem);
- kfree(iwqp->kqp.wrid_mem);
- iwqp->kqp.wrid_mem = NULL;
- kfree(iwqp->allocated_buffer);
-}
-
-/**
- * i40iw_clean_cqes - clean cq entries for qp
- * @iwqp: qp ptr (user or kernel)
- * @iwcq: cq ptr
- */
-static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq)
-{
- struct i40iw_cq_uk *ukcq = &iwcq->sc_cq.cq_uk;
-
- ukcq->ops.iw_cq_clean(&iwqp->sc_qp.qp_uk, ukcq);
-}
-
-/**
- * i40iw_destroy_qp - destroy qp
- * @ibqp: qp's ib pointer also to get to device's qp address
- */
-static int i40iw_destroy_qp(struct ib_qp *ibqp)
-{
- struct i40iw_qp *iwqp = to_iwqp(ibqp);
-
- iwqp->destroyed = 1;
-
- if (iwqp->ibqp_state >= IB_QPS_INIT && iwqp->ibqp_state < IB_QPS_RTS)
- i40iw_next_iw_state(iwqp, I40IW_QP_STATE_ERROR, 0, 0, 0);
-
- if (!iwqp->user_mode) {
- if (iwqp->iwscq) {
- i40iw_clean_cqes(iwqp, iwqp->iwscq);
- if (iwqp->iwrcq != iwqp->iwscq)
- i40iw_clean_cqes(iwqp, iwqp->iwrcq);
- }
- }
-
- i40iw_rem_ref(&iwqp->ibqp);
- return 0;
-}
-
-/**
- * i40iw_setup_virt_qp - setup for allocation of virtual qp
- * @dev: iwarp device
- * @qp: qp ptr
- * @init_info: initialize info to return
- */
-static int i40iw_setup_virt_qp(struct i40iw_device *iwdev,
- struct i40iw_qp *iwqp,
- struct i40iw_qp_init_info *init_info)
-{
- struct i40iw_pbl *iwpbl = iwqp->iwpbl;
- struct i40iw_qp_mr *qpmr = &iwpbl->qp_mr;
-
- iwqp->page = qpmr->sq_page;
- init_info->shadow_area_pa = cpu_to_le64(qpmr->shadow);
- if (iwpbl->pbl_allocated) {
- init_info->virtual_map = true;
- init_info->sq_pa = qpmr->sq_pbl.idx;
- init_info->rq_pa = qpmr->rq_pbl.idx;
- } else {
- init_info->sq_pa = qpmr->sq_pbl.addr;
- init_info->rq_pa = qpmr->rq_pbl.addr;
- }
- return 0;
-}
-
-/**
- * i40iw_setup_kmode_qp - setup initialization for kernel mode qp
- * @iwdev: iwarp device
- * @iwqp: qp ptr (user or kernel)
- * @info: initialize info to return
- */
-static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev,
- struct i40iw_qp *iwqp,
- struct i40iw_qp_init_info *info)
-{
- struct i40iw_dma_mem *mem = &iwqp->kqp.dma_mem;
- u32 sqdepth, rqdepth;
- u32 sq_size, rq_size;
- u8 sqshift;
- u32 size;
- enum i40iw_status_code status;
- struct i40iw_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
-
- sq_size = i40iw_qp_roundup(ukinfo->sq_size + 1);
- rq_size = i40iw_qp_roundup(ukinfo->rq_size + 1);
-
- status = i40iw_get_wqe_shift(sq_size, ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, &sqshift);
- if (status)
- return -ENOMEM;
-
- sqdepth = sq_size << sqshift;
- rqdepth = rq_size << I40IW_MAX_RQ_WQE_SHIFT;
-
- size = sqdepth * sizeof(struct i40iw_sq_uk_wr_trk_info) + (rqdepth << 3);
- iwqp->kqp.wrid_mem = kzalloc(size, GFP_KERNEL);
-
- ukinfo->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)iwqp->kqp.wrid_mem;
- if (!ukinfo->sq_wrtrk_array)
- return -ENOMEM;
-
- ukinfo->rq_wrid_array = (u64 *)&ukinfo->sq_wrtrk_array[sqdepth];
-
- size = (sqdepth + rqdepth) * I40IW_QP_WQE_MIN_SIZE;
- size += (I40IW_SHADOW_AREA_SIZE << 3);
-
- status = i40iw_allocate_dma_mem(iwdev->sc_dev.hw, mem, size, 256);
- if (status) {
- kfree(ukinfo->sq_wrtrk_array);
- ukinfo->sq_wrtrk_array = NULL;
- return -ENOMEM;
- }
-
- ukinfo->sq = mem->va;
- info->sq_pa = mem->pa;
-
- ukinfo->rq = &ukinfo->sq[sqdepth];
- info->rq_pa = info->sq_pa + (sqdepth * I40IW_QP_WQE_MIN_SIZE);
-
- ukinfo->shadow_area = ukinfo->rq[rqdepth].elem;
- info->shadow_area_pa = info->rq_pa + (rqdepth * I40IW_QP_WQE_MIN_SIZE);
-
- ukinfo->sq_size = sq_size;
- ukinfo->rq_size = rq_size;
- ukinfo->qp_id = iwqp->ibqp.qp_num;
- return 0;
-}
-
-/**
- * i40iw_create_qp - create qp
- * @ibpd: ptr of pd
- * @init_attr: attributes for qp
- * @udata: user data for create qp
- */
-static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
-{
- struct i40iw_pd *iwpd = to_iwpd(ibpd);
- struct i40iw_device *iwdev = to_iwdev(ibpd->device);
- struct i40iw_cqp *iwcqp = &iwdev->cqp;
- struct i40iw_qp *iwqp;
- struct i40iw_ucontext *ucontext;
- struct i40iw_create_qp_req req;
- struct i40iw_create_qp_resp uresp;
- u32 qp_num = 0;
- void *mem;
- enum i40iw_status_code ret;
- int err_code;
- int sq_size;
- int rq_size;
- struct i40iw_sc_qp *qp;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_qp_init_info init_info;
- struct i40iw_create_qp_info *qp_info;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
-
- struct i40iw_qp_host_ctx_info *ctx_info;
- struct i40iwarp_offload_info *iwarp_info;
- unsigned long flags;
-
- if (iwdev->closing)
- return ERR_PTR(-ENODEV);
-
- if (init_attr->create_flags)
- return ERR_PTR(-EINVAL);
- if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE)
- init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
-
- if (init_attr->cap.max_send_sge > I40IW_MAX_WQ_FRAGMENT_COUNT)
- init_attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
-
- if (init_attr->cap.max_recv_sge > I40IW_MAX_WQ_FRAGMENT_COUNT)
- init_attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
-
- memset(&init_info, 0, sizeof(init_info));
-
- sq_size = init_attr->cap.max_send_wr;
- rq_size = init_attr->cap.max_recv_wr;
-
- init_info.vsi = &iwdev->vsi;
- init_info.qp_uk_init_info.sq_size = sq_size;
- init_info.qp_uk_init_info.rq_size = rq_size;
- init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
- init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
- init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data;
-
- mem = kzalloc(sizeof(*iwqp), GFP_KERNEL);
- if (!mem)
- return ERR_PTR(-ENOMEM);
-
- iwqp = (struct i40iw_qp *)mem;
- qp = &iwqp->sc_qp;
- qp->back_qp = (void *)iwqp;
- qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
-
- iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info;
-
- if (i40iw_allocate_dma_mem(dev->hw,
- &iwqp->q2_ctx_mem,
- I40IW_Q2_BUFFER_SIZE + I40IW_QP_CTX_SIZE,
- 256)) {
- i40iw_pr_err("dma_mem failed\n");
- err_code = -ENOMEM;
- goto error;
- }
-
- init_info.q2 = iwqp->q2_ctx_mem.va;
- init_info.q2_pa = iwqp->q2_ctx_mem.pa;
-
- init_info.host_ctx = (void *)init_info.q2 + I40IW_Q2_BUFFER_SIZE;
- init_info.host_ctx_pa = init_info.q2_pa + I40IW_Q2_BUFFER_SIZE;
-
- err_code = i40iw_alloc_resource(iwdev, iwdev->allocated_qps, iwdev->max_qp,
- &qp_num, &iwdev->next_qp);
- if (err_code) {
- i40iw_pr_err("qp resource\n");
- goto error;
- }
-
- iwqp->allocated_buffer = mem;
- iwqp->iwdev = iwdev;
- iwqp->iwpd = iwpd;
- iwqp->ibqp.qp_num = qp_num;
- qp = &iwqp->sc_qp;
- iwqp->iwscq = to_iwcq(init_attr->send_cq);
- iwqp->iwrcq = to_iwcq(init_attr->recv_cq);
-
- iwqp->host_ctx.va = init_info.host_ctx;
- iwqp->host_ctx.pa = init_info.host_ctx_pa;
- iwqp->host_ctx.size = I40IW_QP_CTX_SIZE;
-
- init_info.pd = &iwpd->sc_pd;
- init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num;
- iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
-
- if (init_attr->qp_type != IB_QPT_RC) {
- err_code = -EINVAL;
- goto error;
- }
- if (iwdev->push_mode)
- i40iw_alloc_push_page(iwdev, qp);
- if (udata) {
- err_code = ib_copy_from_udata(&req, udata, sizeof(req));
- if (err_code) {
- i40iw_pr_err("ib_copy_from_data\n");
- goto error;
- }
- iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
- if (ibpd->uobject && ibpd->uobject->context) {
- iwqp->user_mode = 1;
- ucontext = to_ucontext(ibpd->uobject->context);
-
- if (req.user_wqe_buffers) {
- spin_lock_irqsave(
- &ucontext->qp_reg_mem_list_lock, flags);
- iwqp->iwpbl = i40iw_get_pbl(
- (unsigned long)req.user_wqe_buffers,
- &ucontext->qp_reg_mem_list);
- spin_unlock_irqrestore(
- &ucontext->qp_reg_mem_list_lock, flags);
-
- if (!iwqp->iwpbl) {
- err_code = -ENODATA;
- i40iw_pr_err("no pbl info\n");
- goto error;
- }
- }
- }
- err_code = i40iw_setup_virt_qp(iwdev, iwqp, &init_info);
- } else {
- err_code = i40iw_setup_kmode_qp(iwdev, iwqp, &init_info);
- }
-
- if (err_code) {
- i40iw_pr_err("setup qp failed\n");
- goto error;
- }
-
- init_info.type = I40IW_QP_TYPE_IWARP;
- ret = dev->iw_priv_qp_ops->qp_init(qp, &init_info);
- if (ret) {
- err_code = -EPROTO;
- i40iw_pr_err("qp_init fail\n");
- goto error;
- }
- ctx_info = &iwqp->ctx_info;
- iwarp_info = &iwqp->iwarp_info;
- iwarp_info->rd_enable = true;
- iwarp_info->wr_rdresp_en = true;
- if (!iwqp->user_mode) {
- iwarp_info->fast_reg_en = true;
- iwarp_info->priv_mode_en = true;
- }
- iwarp_info->ddp_ver = 1;
- iwarp_info->rdmap_ver = 1;
-
- ctx_info->iwarp_info_valid = true;
- ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
- ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
- if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) {
- ctx_info->push_mode_en = false;
- } else {
- ctx_info->push_mode_en = true;
- ctx_info->push_idx = qp->push_idx;
- }
-
- ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
- (u64 *)iwqp->host_ctx.va,
- ctx_info);
- ctx_info->iwarp_info_valid = false;
- cqp_request = i40iw_get_cqp_request(iwcqp, true);
- if (!cqp_request) {
- err_code = -ENOMEM;
- goto error;
- }
- cqp_info = &cqp_request->info;
- qp_info = &cqp_request->info.in.u.qp_create.info;
-
- memset(qp_info, 0, sizeof(*qp_info));
-
- qp_info->cq_num_valid = true;
- qp_info->next_iwarp_state = I40IW_QP_STATE_IDLE;
-
- cqp_info->cqp_cmd = OP_QP_CREATE;
- cqp_info->post_sq = 1;
- cqp_info->in.u.qp_create.qp = qp;
- cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
- ret = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (ret) {
- i40iw_pr_err("CQP-OP QP create fail");
- err_code = -EACCES;
- goto error;
- }
-
- i40iw_add_ref(&iwqp->ibqp);
- spin_lock_init(&iwqp->lock);
- iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
- iwdev->qp_table[qp_num] = iwqp;
- i40iw_add_pdusecount(iwqp->iwpd);
- i40iw_add_devusecount(iwdev);
- if (ibpd->uobject && udata) {
- memset(&uresp, 0, sizeof(uresp));
- uresp.actual_sq_size = sq_size;
- uresp.actual_rq_size = rq_size;
- uresp.qp_id = qp_num;
- uresp.push_idx = qp->push_idx;
- err_code = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
- if (err_code) {
- i40iw_pr_err("copy_to_udata failed\n");
- i40iw_destroy_qp(&iwqp->ibqp);
- /* let the completion of the qp destroy free the qp */
- return ERR_PTR(err_code);
- }
- }
- init_completion(&iwqp->sq_drained);
- init_completion(&iwqp->rq_drained);
-
- return &iwqp->ibqp;
-error:
- i40iw_free_qp_resources(iwdev, iwqp, qp_num);
- return ERR_PTR(err_code);
-}
-
-/**
- * i40iw_query - query qp attributes
- * @ibqp: qp pointer
- * @attr: attributes pointer
- * @attr_mask: Not used
- * @init_attr: qp attributes to return
- */
-static int i40iw_query_qp(struct ib_qp *ibqp,
- struct ib_qp_attr *attr,
- int attr_mask,
- struct ib_qp_init_attr *init_attr)
-{
- struct i40iw_qp *iwqp = to_iwqp(ibqp);
- struct i40iw_sc_qp *qp = &iwqp->sc_qp;
-
- attr->qp_access_flags = 0;
- attr->cap.max_send_wr = qp->qp_uk.sq_size;
- attr->cap.max_recv_wr = qp->qp_uk.rq_size;
- attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
- attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
- attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
- init_attr->event_handler = iwqp->ibqp.event_handler;
- init_attr->qp_context = iwqp->ibqp.qp_context;
- init_attr->send_cq = iwqp->ibqp.send_cq;
- init_attr->recv_cq = iwqp->ibqp.recv_cq;
- init_attr->srq = iwqp->ibqp.srq;
- init_attr->cap = attr->cap;
- return 0;
-}
-
-/**
- * i40iw_hw_modify_qp - setup cqp for modify qp
- * @iwdev: iwarp device
- * @iwqp: qp ptr (user or kernel)
- * @info: info for modify qp
- * @wait: flag to wait or not for modify qp completion
- */
-void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
- struct i40iw_modify_qp_info *info, bool wait)
-{
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_modify_qp_info *m_info;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
- if (!cqp_request)
- return;
-
- cqp_info = &cqp_request->info;
- m_info = &cqp_info->in.u.qp_modify.info;
- memcpy(m_info, info, sizeof(*m_info));
- cqp_info->cqp_cmd = OP_QP_MODIFY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp;
- cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Modify QP fail");
-}
-
-/**
- * i40iw_modify_qp - modify qp request
- * @ibqp: qp's pointer for modify
- * @attr: access attributes
- * @attr_mask: state mask
- * @udata: user data
- */
-int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
-{
- struct i40iw_qp *iwqp = to_iwqp(ibqp);
- struct i40iw_device *iwdev = iwqp->iwdev;
- struct i40iw_qp_host_ctx_info *ctx_info;
- struct i40iwarp_offload_info *iwarp_info;
- struct i40iw_modify_qp_info info;
- u8 issue_modify_qp = 0;
- u8 dont_wait = 0;
- u32 err;
- unsigned long flags;
-
- memset(&info, 0, sizeof(info));
- ctx_info = &iwqp->ctx_info;
- iwarp_info = &iwqp->iwarp_info;
-
- spin_lock_irqsave(&iwqp->lock, flags);
-
- if (attr_mask & IB_QP_STATE) {
- if (iwdev->closing && attr->qp_state != IB_QPS_ERR) {
- err = -EINVAL;
- goto exit;
- }
-
- switch (attr->qp_state) {
- case IB_QPS_INIT:
- case IB_QPS_RTR:
- if (iwqp->iwarp_state > (u32)I40IW_QP_STATE_IDLE) {
- err = -EINVAL;
- goto exit;
- }
- if (iwqp->iwarp_state == I40IW_QP_STATE_INVALID) {
- info.next_iwarp_state = I40IW_QP_STATE_IDLE;
- issue_modify_qp = 1;
- }
- break;
- case IB_QPS_RTS:
- if ((iwqp->iwarp_state > (u32)I40IW_QP_STATE_RTS) ||
- (!iwqp->cm_id)) {
- err = -EINVAL;
- goto exit;
- }
-
- issue_modify_qp = 1;
- iwqp->hw_tcp_state = I40IW_TCP_STATE_ESTABLISHED;
- iwqp->hte_added = 1;
- info.next_iwarp_state = I40IW_QP_STATE_RTS;
- info.tcp_ctx_valid = true;
- info.ord_valid = true;
- info.arp_cache_idx_valid = true;
- info.cq_num_valid = true;
- break;
- case IB_QPS_SQD:
- if (iwqp->hw_iwarp_state > (u32)I40IW_QP_STATE_RTS) {
- err = 0;
- goto exit;
- }
- if ((iwqp->iwarp_state == (u32)I40IW_QP_STATE_CLOSING) ||
- (iwqp->iwarp_state < (u32)I40IW_QP_STATE_RTS)) {
- err = 0;
- goto exit;
- }
- if (iwqp->iwarp_state > (u32)I40IW_QP_STATE_CLOSING) {
- err = -EINVAL;
- goto exit;
- }
- info.next_iwarp_state = I40IW_QP_STATE_CLOSING;
- issue_modify_qp = 1;
- break;
- case IB_QPS_SQE:
- if (iwqp->iwarp_state >= (u32)I40IW_QP_STATE_TERMINATE) {
- err = -EINVAL;
- goto exit;
- }
- info.next_iwarp_state = I40IW_QP_STATE_TERMINATE;
- issue_modify_qp = 1;
- break;
- case IB_QPS_ERR:
- case IB_QPS_RESET:
- if (iwqp->iwarp_state == (u32)I40IW_QP_STATE_ERROR) {
- err = -EINVAL;
- goto exit;
- }
- if (iwqp->sc_qp.term_flags)
- i40iw_terminate_del_timer(&iwqp->sc_qp);
- info.next_iwarp_state = I40IW_QP_STATE_ERROR;
- if ((iwqp->hw_tcp_state > I40IW_TCP_STATE_CLOSED) &&
- iwdev->iw_status &&
- (iwqp->hw_tcp_state != I40IW_TCP_STATE_TIME_WAIT))
- info.reset_tcp_conn = true;
- else
- dont_wait = 1;
- issue_modify_qp = 1;
- info.next_iwarp_state = I40IW_QP_STATE_ERROR;
- break;
- default:
- err = -EINVAL;
- goto exit;
- }
-
- iwqp->ibqp_state = attr->qp_state;
-
- if (issue_modify_qp)
- iwqp->iwarp_state = info.next_iwarp_state;
- else
- info.next_iwarp_state = iwqp->iwarp_state;
- }
- if (attr_mask & IB_QP_ACCESS_FLAGS) {
- ctx_info->iwarp_info_valid = true;
- if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE)
- iwarp_info->wr_rdresp_en = true;
- if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
- iwarp_info->wr_rdresp_en = true;
- if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
- iwarp_info->rd_enable = true;
- if (attr->qp_access_flags & IB_ACCESS_MW_BIND)
- iwarp_info->bind_en = true;
-
- if (iwqp->user_mode) {
- iwarp_info->rd_enable = true;
- iwarp_info->wr_rdresp_en = true;
- iwarp_info->priv_mode_en = false;
- }
- }
-
- if (ctx_info->iwarp_info_valid) {
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- int ret;
-
- ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
- ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
- ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
- (u64 *)iwqp->host_ctx.va,
- ctx_info);
- if (ret) {
- i40iw_pr_err("setting QP context\n");
- err = -EINVAL;
- goto exit;
- }
- }
-
- spin_unlock_irqrestore(&iwqp->lock, flags);
-
- if (issue_modify_qp)
- i40iw_hw_modify_qp(iwdev, iwqp, &info, true);
-
- if (issue_modify_qp && (iwqp->ibqp_state > IB_QPS_RTS)) {
- if (dont_wait) {
- if (iwqp->cm_id && iwqp->hw_tcp_state) {
- spin_lock_irqsave(&iwqp->lock, flags);
- iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSED;
- iwqp->last_aeq = I40IW_AE_RESET_SENT;
- spin_unlock_irqrestore(&iwqp->lock, flags);
- }
- }
- }
- return 0;
-exit:
- spin_unlock_irqrestore(&iwqp->lock, flags);
- return err;
-}
-
-/**
- * cq_free_resources - free up recources for cq
- * @iwdev: iwarp device
- * @iwcq: cq ptr
- */
-static void cq_free_resources(struct i40iw_device *iwdev, struct i40iw_cq *iwcq)
-{
- struct i40iw_sc_cq *cq = &iwcq->sc_cq;
-
- if (!iwcq->user_mode)
- i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwcq->kmem);
- i40iw_free_resource(iwdev, iwdev->allocated_cqs, cq->cq_uk.cq_id);
-}
-
-/**
- * i40iw_cq_wq_destroy - send cq destroy cqp
- * @iwdev: iwarp device
- * @cq: hardware control cq
- */
-void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq)
-{
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return;
-
- cqp_info = &cqp_request->info;
-
- cqp_info->cqp_cmd = OP_CQ_DESTROY;
- cqp_info->post_sq = 1;
- cqp_info->in.u.cq_destroy.cq = cq;
- cqp_info->in.u.cq_destroy.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP Destroy QP fail");
-}
-
-/**
- * i40iw_destroy_cq - destroy cq
- * @ib_cq: cq pointer
- */
-static int i40iw_destroy_cq(struct ib_cq *ib_cq)
-{
- struct i40iw_cq *iwcq;
- struct i40iw_device *iwdev;
- struct i40iw_sc_cq *cq;
-
- if (!ib_cq) {
- i40iw_pr_err("ib_cq == NULL\n");
- return 0;
- }
-
- iwcq = to_iwcq(ib_cq);
- iwdev = to_iwdev(ib_cq->device);
- cq = &iwcq->sc_cq;
- i40iw_cq_wq_destroy(iwdev, cq);
- cq_free_resources(iwdev, iwcq);
- kfree(iwcq);
- i40iw_rem_devusecount(iwdev);
- return 0;
-}
-
-/**
- * i40iw_create_cq - create cq
- * @ibdev: device pointer from stack
- * @attr: attributes for cq
- * @context: user context created during alloc
- * @udata: user data
- */
-static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct i40iw_cq *iwcq;
- struct i40iw_pbl *iwpbl;
- u32 cq_num = 0;
- struct i40iw_sc_cq *cq;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_cq_init_info info;
- enum i40iw_status_code status;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- struct i40iw_cq_uk_init_info *ukinfo = &info.cq_uk_init_info;
- unsigned long flags;
- int err_code;
- int entries = attr->cqe;
-
- if (iwdev->closing)
- return ERR_PTR(-ENODEV);
-
- if (entries > iwdev->max_cqe)
- return ERR_PTR(-EINVAL);
-
- iwcq = kzalloc(sizeof(*iwcq), GFP_KERNEL);
- if (!iwcq)
- return ERR_PTR(-ENOMEM);
-
- memset(&info, 0, sizeof(info));
-
- err_code = i40iw_alloc_resource(iwdev, iwdev->allocated_cqs,
- iwdev->max_cq, &cq_num,
- &iwdev->next_cq);
- if (err_code)
- goto error;
-
- cq = &iwcq->sc_cq;
- cq->back_cq = (void *)iwcq;
- spin_lock_init(&iwcq->lock);
-
- info.dev = dev;
- ukinfo->cq_size = max(entries, 4);
- ukinfo->cq_id = cq_num;
- iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
- info.ceqe_mask = 0;
- if (attr->comp_vector < iwdev->ceqs_count)
- info.ceq_id = attr->comp_vector;
- info.ceq_id_valid = true;
- info.ceqe_mask = 1;
- info.type = I40IW_CQ_TYPE_IWARP;
- if (context) {
- struct i40iw_ucontext *ucontext;
- struct i40iw_create_cq_req req;
- struct i40iw_cq_mr *cqmr;
-
- memset(&req, 0, sizeof(req));
- iwcq->user_mode = true;
- ucontext = to_ucontext(context);
- if (ib_copy_from_udata(&req, udata, sizeof(struct i40iw_create_cq_req)))
- goto cq_free_resources;
-
- spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
- iwpbl = i40iw_get_pbl((unsigned long)req.user_cq_buffer,
- &ucontext->cq_reg_mem_list);
- spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
- if (!iwpbl) {
- err_code = -EPROTO;
- goto cq_free_resources;
- }
-
- iwcq->iwpbl = iwpbl;
- iwcq->cq_mem_size = 0;
- cqmr = &iwpbl->cq_mr;
- info.shadow_area_pa = cpu_to_le64(cqmr->shadow);
- if (iwpbl->pbl_allocated) {
- info.virtual_map = true;
- info.pbl_chunk_size = 1;
- info.first_pm_pbl_idx = cqmr->cq_pbl.idx;
- } else {
- info.cq_base_pa = cqmr->cq_pbl.addr;
- }
- } else {
- /* Kmode allocations */
- int rsize;
- int shadow;
-
- rsize = info.cq_uk_init_info.cq_size * sizeof(struct i40iw_cqe);
- rsize = round_up(rsize, 256);
- shadow = I40IW_SHADOW_AREA_SIZE << 3;
- status = i40iw_allocate_dma_mem(dev->hw, &iwcq->kmem,
- rsize + shadow, 256);
- if (status) {
- err_code = -ENOMEM;
- goto cq_free_resources;
- }
- ukinfo->cq_base = iwcq->kmem.va;
- info.cq_base_pa = iwcq->kmem.pa;
- info.shadow_area_pa = info.cq_base_pa + rsize;
- ukinfo->shadow_area = iwcq->kmem.va + rsize;
- }
-
- if (dev->iw_priv_cq_ops->cq_init(cq, &info)) {
- i40iw_pr_err("init cq fail\n");
- err_code = -EPROTO;
- goto cq_free_resources;
- }
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request) {
- err_code = -ENOMEM;
- goto cq_free_resources;
- }
-
- cqp_info = &cqp_request->info;
- cqp_info->cqp_cmd = OP_CQ_CREATE;
- cqp_info->post_sq = 1;
- cqp_info->in.u.cq_create.cq = cq;
- cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status) {
- i40iw_pr_err("CQP-OP Create QP fail");
- err_code = -EPROTO;
- goto cq_free_resources;
- }
-
- if (context) {
- struct i40iw_create_cq_resp resp;
-
- memset(&resp, 0, sizeof(resp));
- resp.cq_id = info.cq_uk_init_info.cq_id;
- resp.cq_size = info.cq_uk_init_info.cq_size;
- if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
- i40iw_pr_err("copy to user data\n");
- err_code = -EPROTO;
- goto cq_destroy;
- }
- }
-
- i40iw_add_devusecount(iwdev);
- return (struct ib_cq *)iwcq;
-
-cq_destroy:
- i40iw_cq_wq_destroy(iwdev, cq);
-cq_free_resources:
- cq_free_resources(iwdev, iwcq);
-error:
- kfree(iwcq);
- return ERR_PTR(err_code);
-}
-
-/**
- * i40iw_get_user_access - get hw access from IB access
- * @acc: IB access to return hw access
- */
-static inline u16 i40iw_get_user_access(int acc)
-{
- u16 access = 0;
-
- access |= (acc & IB_ACCESS_LOCAL_WRITE) ? I40IW_ACCESS_FLAGS_LOCALWRITE : 0;
- access |= (acc & IB_ACCESS_REMOTE_WRITE) ? I40IW_ACCESS_FLAGS_REMOTEWRITE : 0;
- access |= (acc & IB_ACCESS_REMOTE_READ) ? I40IW_ACCESS_FLAGS_REMOTEREAD : 0;
- access |= (acc & IB_ACCESS_MW_BIND) ? I40IW_ACCESS_FLAGS_BIND_WINDOW : 0;
- return access;
-}
-
-/**
- * i40iw_free_stag - free stag resource
- * @iwdev: iwarp device
- * @stag: stag to free
- */
-static void i40iw_free_stag(struct i40iw_device *iwdev, u32 stag)
-{
- u32 stag_idx;
-
- stag_idx = (stag & iwdev->mr_stagmask) >> I40IW_CQPSQ_STAG_IDX_SHIFT;
- i40iw_free_resource(iwdev, iwdev->allocated_mrs, stag_idx);
- i40iw_rem_devusecount(iwdev);
-}
-
-/**
- * i40iw_create_stag - create random stag
- * @iwdev: iwarp device
- */
-static u32 i40iw_create_stag(struct i40iw_device *iwdev)
-{
- u32 stag = 0;
- u32 stag_index = 0;
- u32 next_stag_index;
- u32 driver_key;
- u32 random;
- u8 consumer_key;
- int ret;
-
- get_random_bytes(&random, sizeof(random));
- consumer_key = (u8)random;
-
- driver_key = random & ~iwdev->mr_stagmask;
- next_stag_index = (random & iwdev->mr_stagmask) >> 8;
- next_stag_index %= iwdev->max_mr;
-
- ret = i40iw_alloc_resource(iwdev,
- iwdev->allocated_mrs, iwdev->max_mr,
- &stag_index, &next_stag_index);
- if (!ret) {
- stag = stag_index << I40IW_CQPSQ_STAG_IDX_SHIFT;
- stag |= driver_key;
- stag += (u32)consumer_key;
- i40iw_add_devusecount(iwdev);
- }
- return stag;
-}
-
-/**
- * i40iw_next_pbl_addr - Get next pbl address
- * @pbl: pointer to a pble
- * @pinfo: info pointer
- * @idx: index
- */
-static inline u64 *i40iw_next_pbl_addr(u64 *pbl,
- struct i40iw_pble_info **pinfo,
- u32 *idx)
-{
- *idx += 1;
- if ((!(*pinfo)) || (*idx != (*pinfo)->cnt))
- return ++pbl;
- *idx = 0;
- (*pinfo)++;
- return (u64 *)(*pinfo)->addr;
-}
-
-/**
- * i40iw_copy_user_pgaddrs - copy user page address to pble's os locally
- * @iwmr: iwmr for IB's user page addresses
- * @pbl: ple pointer to save 1 level or 0 level pble
- * @level: indicated level 0, 1 or 2
- */
-static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
- u64 *pbl,
- enum i40iw_pble_level level)
-{
- struct ib_umem *region = iwmr->region;
- struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
- int chunk_pages, entry, pg_shift, i;
- struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
- struct i40iw_pble_info *pinfo;
- struct scatterlist *sg;
- u64 pg_addr = 0;
- u32 idx = 0;
-
- pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf;
-
- pg_shift = ffs(region->page_size) - 1;
- for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
- chunk_pages = sg_dma_len(sg) >> pg_shift;
- if ((iwmr->type == IW_MEMREG_TYPE_QP) &&
- !iwpbl->qp_mr.sq_page)
- iwpbl->qp_mr.sq_page = sg_page(sg);
- for (i = 0; i < chunk_pages; i++) {
- pg_addr = sg_dma_address(sg) + region->page_size * i;
-
- if ((entry + i) == 0)
- *pbl = cpu_to_le64(pg_addr & iwmr->page_msk);
- else if (!(pg_addr & ~iwmr->page_msk))
- *pbl = cpu_to_le64(pg_addr);
- else
- continue;
- pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx);
- }
- }
-}
-
-/**
- * i40iw_set_hugetlb_params - set MR pg size and mask to huge pg values.
- * @addr: virtual address
- * @iwmr: mr pointer for this memory registration
- */
-static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr)
-{
- struct vm_area_struct *vma;
- struct hstate *h;
-
- vma = find_vma(current->mm, addr);
- if (vma && is_vm_hugetlb_page(vma)) {
- h = hstate_vma(vma);
- if (huge_page_size(h) == 0x200000) {
- iwmr->page_size = huge_page_size(h);
- iwmr->page_msk = huge_page_mask(h);
- }
- }
-}
-
-/**
- * i40iw_check_mem_contiguous - check if pbls stored in arr are contiguous
- * @arr: lvl1 pbl array
- * @npages: page count
- * pg_size: page size
- *
- */
-static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size)
-{
- u32 pg_idx;
-
- for (pg_idx = 0; pg_idx < npages; pg_idx++) {
- if ((*arr + (pg_size * pg_idx)) != arr[pg_idx])
- return false;
- }
- return true;
-}
-
-/**
- * i40iw_check_mr_contiguous - check if MR is physically contiguous
- * @palloc: pbl allocation struct
- * pg_size: page size
- */
-static bool i40iw_check_mr_contiguous(struct i40iw_pble_alloc *palloc, u32 pg_size)
-{
- struct i40iw_pble_level2 *lvl2 = &palloc->level2;
- struct i40iw_pble_info *leaf = lvl2->leaf;
- u64 *arr = NULL;
- u64 *start_addr = NULL;
- int i;
- bool ret;
-
- if (palloc->level == I40IW_LEVEL_1) {
- arr = (u64 *)palloc->level1.addr;
- ret = i40iw_check_mem_contiguous(arr, palloc->total_cnt, pg_size);
- return ret;
- }
-
- start_addr = (u64 *)leaf->addr;
-
- for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
- arr = (u64 *)leaf->addr;
- if ((*start_addr + (i * pg_size * PBLE_PER_PAGE)) != *arr)
- return false;
- ret = i40iw_check_mem_contiguous(arr, leaf->cnt, pg_size);
- if (!ret)
- return false;
- }
-
- return true;
-}
-
-/**
- * i40iw_setup_pbles - copy user pg address to pble's
- * @iwdev: iwarp device
- * @iwmr: mr pointer for this memory registration
- * @use_pbles: flag if to use pble's
- */
-static int i40iw_setup_pbles(struct i40iw_device *iwdev,
- struct i40iw_mr *iwmr,
- bool use_pbles)
-{
- struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
- struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
- struct i40iw_pble_info *pinfo;
- u64 *pbl;
- enum i40iw_status_code status;
- enum i40iw_pble_level level = I40IW_LEVEL_1;
-
- if (use_pbles) {
- mutex_lock(&iwdev->pbl_mutex);
- status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
- mutex_unlock(&iwdev->pbl_mutex);
- if (status)
- return -ENOMEM;
-
- iwpbl->pbl_allocated = true;
- level = palloc->level;
- pinfo = (level == I40IW_LEVEL_1) ? &palloc->level1 : palloc->level2.leaf;
- pbl = (u64 *)pinfo->addr;
- } else {
- pbl = iwmr->pgaddrmem;
- }
-
- i40iw_copy_user_pgaddrs(iwmr, pbl, level);
-
- if (use_pbles)
- iwmr->pgaddrmem[0] = *pbl;
-
- return 0;
-}
-
-/**
- * i40iw_handle_q_mem - handle memory for qp and cq
- * @iwdev: iwarp device
- * @req: information for q memory management
- * @iwpbl: pble struct
- * @use_pbles: flag to use pble
- */
-static int i40iw_handle_q_mem(struct i40iw_device *iwdev,
- struct i40iw_mem_reg_req *req,
- struct i40iw_pbl *iwpbl,
- bool use_pbles)
-{
- struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
- struct i40iw_mr *iwmr = iwpbl->iwmr;
- struct i40iw_qp_mr *qpmr = &iwpbl->qp_mr;
- struct i40iw_cq_mr *cqmr = &iwpbl->cq_mr;
- struct i40iw_hmc_pble *hmc_p;
- u64 *arr = iwmr->pgaddrmem;
- u32 pg_size;
- int err;
- int total;
- bool ret = true;
-
- total = req->sq_pages + req->rq_pages + req->cq_pages;
- pg_size = iwmr->page_size;
-
- err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
- if (err)
- return err;
-
- if (use_pbles && (palloc->level != I40IW_LEVEL_1)) {
- i40iw_free_pble(iwdev->pble_rsrc, palloc);
- iwpbl->pbl_allocated = false;
- return -ENOMEM;
- }
-
- if (use_pbles)
- arr = (u64 *)palloc->level1.addr;
-
- if (iwmr->type == IW_MEMREG_TYPE_QP) {
- hmc_p = &qpmr->sq_pbl;
- qpmr->shadow = (dma_addr_t)arr[total];
-
- if (use_pbles) {
- ret = i40iw_check_mem_contiguous(arr, req->sq_pages, pg_size);
- if (ret)
- ret = i40iw_check_mem_contiguous(&arr[req->sq_pages], req->rq_pages, pg_size);
- }
-
- if (!ret) {
- hmc_p->idx = palloc->level1.idx;
- hmc_p = &qpmr->rq_pbl;
- hmc_p->idx = palloc->level1.idx + req->sq_pages;
- } else {
- hmc_p->addr = arr[0];
- hmc_p = &qpmr->rq_pbl;
- hmc_p->addr = arr[req->sq_pages];
- }
- } else { /* CQ */
- hmc_p = &cqmr->cq_pbl;
- cqmr->shadow = (dma_addr_t)arr[total];
-
- if (use_pbles)
- ret = i40iw_check_mem_contiguous(arr, req->cq_pages, pg_size);
-
- if (!ret)
- hmc_p->idx = palloc->level1.idx;
- else
- hmc_p->addr = arr[0];
- }
-
- if (use_pbles && ret) {
- i40iw_free_pble(iwdev->pble_rsrc, palloc);
- iwpbl->pbl_allocated = false;
- }
-
- return err;
-}
-
-/**
- * i40iw_hw_alloc_stag - cqp command to allocate stag
- * @iwdev: iwarp device
- * @iwmr: iwarp mr pointer
- */
-static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr)
-{
- struct i40iw_allocate_stag_info *info;
- struct i40iw_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
- enum i40iw_status_code status;
- int err = 0;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return -ENOMEM;
-
- cqp_info = &cqp_request->info;
- info = &cqp_info->in.u.alloc_stag.info;
- memset(info, 0, sizeof(*info));
- info->page_size = PAGE_SIZE;
- info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT;
- info->pd_id = iwpd->sc_pd.pd_id;
- info->total_len = iwmr->length;
- info->remote_access = true;
- cqp_info->cqp_cmd = OP_ALLOC_STAG;
- cqp_info->post_sq = 1;
- cqp_info->in.u.alloc_stag.dev = &iwdev->sc_dev;
- cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request;
-
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status) {
- err = -ENOMEM;
- i40iw_pr_err("CQP-OP MR Reg fail");
- }
- return err;
-}
-
-/**
- * i40iw_alloc_mr - register stag for fast memory registration
- * @pd: ibpd pointer
- * @mr_type: memory for stag registrion
- * @max_num_sg: man number of pages
- */
-static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg)
-{
- struct i40iw_pd *iwpd = to_iwpd(pd);
- struct i40iw_device *iwdev = to_iwdev(pd->device);
- struct i40iw_pble_alloc *palloc;
- struct i40iw_pbl *iwpbl;
- struct i40iw_mr *iwmr;
- enum i40iw_status_code status;
- u32 stag;
- int err_code = -ENOMEM;
-
- iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
- if (!iwmr)
- return ERR_PTR(-ENOMEM);
-
- stag = i40iw_create_stag(iwdev);
- if (!stag) {
- err_code = -EOVERFLOW;
- goto err;
- }
- iwmr->stag = stag;
- iwmr->ibmr.rkey = stag;
- iwmr->ibmr.lkey = stag;
- iwmr->ibmr.pd = pd;
- iwmr->ibmr.device = pd->device;
- iwpbl = &iwmr->iwpbl;
- iwpbl->iwmr = iwmr;
- iwmr->type = IW_MEMREG_TYPE_MEM;
- palloc = &iwpbl->pble_alloc;
- iwmr->page_cnt = max_num_sg;
- mutex_lock(&iwdev->pbl_mutex);
- status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
- mutex_unlock(&iwdev->pbl_mutex);
- if (status)
- goto err1;
-
- if (palloc->level != I40IW_LEVEL_1)
- goto err2;
- err_code = i40iw_hw_alloc_stag(iwdev, iwmr);
- if (err_code)
- goto err2;
- iwpbl->pbl_allocated = true;
- i40iw_add_pdusecount(iwpd);
- return &iwmr->ibmr;
-err2:
- i40iw_free_pble(iwdev->pble_rsrc, palloc);
-err1:
- i40iw_free_stag(iwdev, stag);
-err:
- kfree(iwmr);
- return ERR_PTR(err_code);
-}
-
-/**
- * i40iw_set_page - populate pbl list for fmr
- * @ibmr: ib mem to access iwarp mr pointer
- * @addr: page dma address fro pbl list
- */
-static int i40iw_set_page(struct ib_mr *ibmr, u64 addr)
-{
- struct i40iw_mr *iwmr = to_iwmr(ibmr);
- struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
- struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
- u64 *pbl;
-
- if (unlikely(iwmr->npages == iwmr->page_cnt))
- return -ENOMEM;
-
- pbl = (u64 *)palloc->level1.addr;
- pbl[iwmr->npages++] = cpu_to_le64(addr);
- return 0;
-}
-
-/**
- * i40iw_map_mr_sg - map of sg list for fmr
- * @ibmr: ib mem to access iwarp mr pointer
- * @sg: scatter gather list for fmr
- * @sg_nents: number of sg pages
- */
-static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
- int sg_nents, unsigned int *sg_offset)
-{
- struct i40iw_mr *iwmr = to_iwmr(ibmr);
-
- iwmr->npages = 0;
- return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, i40iw_set_page);
-}
-
-/**
- * i40iw_drain_sq - drain the send queue
- * @ibqp: ib qp pointer
- */
-static void i40iw_drain_sq(struct ib_qp *ibqp)
-{
- struct i40iw_qp *iwqp = to_iwqp(ibqp);
- struct i40iw_sc_qp *qp = &iwqp->sc_qp;
-
- if (I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring))
- wait_for_completion(&iwqp->sq_drained);
-}
-
-/**
- * i40iw_drain_rq - drain the receive queue
- * @ibqp: ib qp pointer
- */
-static void i40iw_drain_rq(struct ib_qp *ibqp)
-{
- struct i40iw_qp *iwqp = to_iwqp(ibqp);
- struct i40iw_sc_qp *qp = &iwqp->sc_qp;
-
- if (I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring))
- wait_for_completion(&iwqp->rq_drained);
-}
-
-/**
- * i40iw_hwreg_mr - send cqp command for memory registration
- * @iwdev: iwarp device
- * @iwmr: iwarp mr pointer
- * @access: access for MR
- */
-static int i40iw_hwreg_mr(struct i40iw_device *iwdev,
- struct i40iw_mr *iwmr,
- u16 access)
-{
- struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
- struct i40iw_reg_ns_stag_info *stag_info;
- struct i40iw_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
- struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
- enum i40iw_status_code status;
- int err = 0;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return -ENOMEM;
-
- cqp_info = &cqp_request->info;
- stag_info = &cqp_info->in.u.mr_reg_non_shared.info;
- memset(stag_info, 0, sizeof(*stag_info));
- stag_info->va = (void *)(unsigned long)iwpbl->user_base;
- stag_info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT;
- stag_info->stag_key = (u8)iwmr->stag;
- stag_info->total_len = iwmr->length;
- stag_info->access_rights = access;
- stag_info->pd_id = iwpd->sc_pd.pd_id;
- stag_info->addr_type = I40IW_ADDR_TYPE_VA_BASED;
- stag_info->page_size = iwmr->page_size;
-
- if (iwpbl->pbl_allocated) {
- if (palloc->level == I40IW_LEVEL_1) {
- stag_info->first_pm_pbl_index = palloc->level1.idx;
- stag_info->chunk_size = 1;
- } else {
- stag_info->first_pm_pbl_index = palloc->level2.root.idx;
- stag_info->chunk_size = 3;
- }
- } else {
- stag_info->reg_addr_pa = iwmr->pgaddrmem[0];
- }
-
- cqp_info->cqp_cmd = OP_MR_REG_NON_SHARED;
- cqp_info->post_sq = 1;
- cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->sc_dev;
- cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request;
-
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status) {
- err = -ENOMEM;
- i40iw_pr_err("CQP-OP MR Reg fail");
- }
- return err;
-}
-
-/**
- * i40iw_reg_user_mr - Register a user memory region
- * @pd: ptr of pd
- * @start: virtual start address
- * @length: length of mr
- * @virt: virtual address
- * @acc: access of mr
- * @udata: user data
- */
-static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
- u64 start,
- u64 length,
- u64 virt,
- int acc,
- struct ib_udata *udata)
-{
- struct i40iw_pd *iwpd = to_iwpd(pd);
- struct i40iw_device *iwdev = to_iwdev(pd->device);
- struct i40iw_ucontext *ucontext;
- struct i40iw_pble_alloc *palloc;
- struct i40iw_pbl *iwpbl;
- struct i40iw_mr *iwmr;
- struct ib_umem *region;
- struct i40iw_mem_reg_req req;
- u64 pbl_depth = 0;
- u32 stag = 0;
- u16 access;
- u64 region_length;
- bool use_pbles = false;
- unsigned long flags;
- int err = -ENOSYS;
- int ret;
- int pg_shift;
-
- if (iwdev->closing)
- return ERR_PTR(-ENODEV);
-
- if (length > I40IW_MAX_MR_SIZE)
- return ERR_PTR(-EINVAL);
- region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
- if (IS_ERR(region))
- return (struct ib_mr *)region;
-
- if (ib_copy_from_udata(&req, udata, sizeof(req))) {
- ib_umem_release(region);
- return ERR_PTR(-EFAULT);
- }
-
- iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
- if (!iwmr) {
- ib_umem_release(region);
- return ERR_PTR(-ENOMEM);
- }
-
- iwpbl = &iwmr->iwpbl;
- iwpbl->iwmr = iwmr;
- iwmr->region = region;
- iwmr->ibmr.pd = pd;
- iwmr->ibmr.device = pd->device;
- ucontext = to_ucontext(pd->uobject->context);
-
- iwmr->page_size = region->page_size;
- iwmr->page_msk = PAGE_MASK;
-
- if (region->hugetlb && (req.reg_type == IW_MEMREG_TYPE_MEM))
- i40iw_set_hugetlb_values(start, iwmr);
-
- region_length = region->length + (start & (iwmr->page_size - 1));
- pg_shift = ffs(iwmr->page_size) - 1;
- pbl_depth = region_length >> pg_shift;
- pbl_depth += (region_length & (iwmr->page_size - 1)) ? 1 : 0;
- iwmr->length = region->length;
-
- iwpbl->user_base = virt;
- palloc = &iwpbl->pble_alloc;
-
- iwmr->type = req.reg_type;
- iwmr->page_cnt = (u32)pbl_depth;
-
- switch (req.reg_type) {
- case IW_MEMREG_TYPE_QP:
- use_pbles = ((req.sq_pages + req.rq_pages) > 2);
- err = i40iw_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
- if (err)
- goto error;
- spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
- list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
- spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
- break;
- case IW_MEMREG_TYPE_CQ:
- use_pbles = (req.cq_pages > 1);
- err = i40iw_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
- if (err)
- goto error;
-
- spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
- list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
- spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
- break;
- case IW_MEMREG_TYPE_MEM:
- use_pbles = (iwmr->page_cnt != 1);
- access = I40IW_ACCESS_FLAGS_LOCALREAD;
-
- err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
- if (err)
- goto error;
-
- if (use_pbles) {
- ret = i40iw_check_mr_contiguous(palloc, iwmr->page_size);
- if (ret) {
- i40iw_free_pble(iwdev->pble_rsrc, palloc);
- iwpbl->pbl_allocated = false;
- }
- }
-
- access |= i40iw_get_user_access(acc);
- stag = i40iw_create_stag(iwdev);
- if (!stag) {
- err = -ENOMEM;
- goto error;
- }
-
- iwmr->stag = stag;
- iwmr->ibmr.rkey = stag;
- iwmr->ibmr.lkey = stag;
-
- err = i40iw_hwreg_mr(iwdev, iwmr, access);
- if (err) {
- i40iw_free_stag(iwdev, stag);
- goto error;
- }
-
- break;
- default:
- goto error;
- }
-
- iwmr->type = req.reg_type;
- if (req.reg_type == IW_MEMREG_TYPE_MEM)
- i40iw_add_pdusecount(iwpd);
- return &iwmr->ibmr;
-
-error:
- if (palloc->level != I40IW_LEVEL_0 && iwpbl->pbl_allocated)
- i40iw_free_pble(iwdev->pble_rsrc, palloc);
- ib_umem_release(region);
- kfree(iwmr);
- return ERR_PTR(err);
-}
-
-/**
- * i40iw_reg_phys_mr - register kernel physical memory
- * @pd: ibpd pointer
- * @addr: physical address of memory to register
- * @size: size of memory to register
- * @acc: Access rights
- * @iova_start: start of virtual address for physical buffers
- */
-struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *pd,
- u64 addr,
- u64 size,
- int acc,
- u64 *iova_start)
-{
- struct i40iw_pd *iwpd = to_iwpd(pd);
- struct i40iw_device *iwdev = to_iwdev(pd->device);
- struct i40iw_pbl *iwpbl;
- struct i40iw_mr *iwmr;
- enum i40iw_status_code status;
- u32 stag;
- u16 access = I40IW_ACCESS_FLAGS_LOCALREAD;
- int ret;
-
- iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
- if (!iwmr)
- return ERR_PTR(-ENOMEM);
- iwmr->ibmr.pd = pd;
- iwmr->ibmr.device = pd->device;
- iwpbl = &iwmr->iwpbl;
- iwpbl->iwmr = iwmr;
- iwmr->type = IW_MEMREG_TYPE_MEM;
- iwpbl->user_base = *iova_start;
- stag = i40iw_create_stag(iwdev);
- if (!stag) {
- ret = -EOVERFLOW;
- goto err;
- }
- access |= i40iw_get_user_access(acc);
- iwmr->stag = stag;
- iwmr->ibmr.rkey = stag;
- iwmr->ibmr.lkey = stag;
- iwmr->page_cnt = 1;
- iwmr->pgaddrmem[0] = addr;
- iwmr->length = size;
- status = i40iw_hwreg_mr(iwdev, iwmr, access);
- if (status) {
- i40iw_free_stag(iwdev, stag);
- ret = -ENOMEM;
- goto err;
- }
-
- i40iw_add_pdusecount(iwpd);
- return &iwmr->ibmr;
- err:
- kfree(iwmr);
- return ERR_PTR(ret);
-}
-
-/**
- * i40iw_get_dma_mr - register physical mem
- * @pd: ptr of pd
- * @acc: access for memory
- */
-static struct ib_mr *i40iw_get_dma_mr(struct ib_pd *pd, int acc)
-{
- u64 kva = 0;
-
- return i40iw_reg_phys_mr(pd, 0, 0, acc, &kva);
-}
-
-/**
- * i40iw_del_mem_list - Deleting pbl list entries for CQ/QP
- * @iwmr: iwmr for IB's user page addresses
- * @ucontext: ptr to user context
- */
-static void i40iw_del_memlist(struct i40iw_mr *iwmr,
- struct i40iw_ucontext *ucontext)
-{
- struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
- unsigned long flags;
-
- switch (iwmr->type) {
- case IW_MEMREG_TYPE_CQ:
- spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
- if (!list_empty(&ucontext->cq_reg_mem_list))
- list_del(&iwpbl->list);
- spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
- break;
- case IW_MEMREG_TYPE_QP:
- spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
- if (!list_empty(&ucontext->qp_reg_mem_list))
- list_del(&iwpbl->list);
- spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
- break;
- default:
- break;
- }
-}
-
-/**
- * i40iw_dereg_mr - deregister mr
- * @ib_mr: mr ptr for dereg
- */
-static int i40iw_dereg_mr(struct ib_mr *ib_mr)
-{
- struct ib_pd *ibpd = ib_mr->pd;
- struct i40iw_pd *iwpd = to_iwpd(ibpd);
- struct i40iw_mr *iwmr = to_iwmr(ib_mr);
- struct i40iw_device *iwdev = to_iwdev(ib_mr->device);
- enum i40iw_status_code status;
- struct i40iw_dealloc_stag_info *info;
- struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
- struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
- struct i40iw_cqp_request *cqp_request;
- struct cqp_commands_info *cqp_info;
- u32 stag_idx;
-
- if (iwmr->region)
- ib_umem_release(iwmr->region);
-
- if (iwmr->type != IW_MEMREG_TYPE_MEM) {
- if (ibpd->uobject) {
- struct i40iw_ucontext *ucontext;
-
- ucontext = to_ucontext(ibpd->uobject->context);
- i40iw_del_memlist(iwmr, ucontext);
- }
- if (iwpbl->pbl_allocated)
- i40iw_free_pble(iwdev->pble_rsrc, palloc);
- kfree(iwmr);
- return 0;
- }
-
- cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
- if (!cqp_request)
- return -ENOMEM;
-
- cqp_info = &cqp_request->info;
- info = &cqp_info->in.u.dealloc_stag.info;
- memset(info, 0, sizeof(*info));
-
- info->pd_id = cpu_to_le32(iwpd->sc_pd.pd_id & 0x00007fff);
- info->stag_idx = RS_64_1(ib_mr->rkey, I40IW_CQPSQ_STAG_IDX_SHIFT);
- stag_idx = info->stag_idx;
- info->mr = true;
- if (iwpbl->pbl_allocated)
- info->dealloc_pbl = true;
-
- cqp_info->cqp_cmd = OP_DEALLOC_STAG;
- cqp_info->post_sq = 1;
- cqp_info->in.u.dealloc_stag.dev = &iwdev->sc_dev;
- cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
- status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
- i40iw_pr_err("CQP-OP dealloc failed for stag_idx = 0x%x\n", stag_idx);
- i40iw_rem_pdusecount(iwpd, iwdev);
- i40iw_free_stag(iwdev, iwmr->stag);
- if (iwpbl->pbl_allocated)
- i40iw_free_pble(iwdev->pble_rsrc, palloc);
- kfree(iwmr);
- return 0;
-}
-
-/**
- * i40iw_show_rev
- */
-static ssize_t i40iw_show_rev(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct i40iw_ib_device *iwibdev = container_of(dev,
- struct i40iw_ib_device,
- ibdev.dev);
- u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev;
-
- return sprintf(buf, "%x\n", hw_rev);
-}
-
-/**
- * i40iw_show_hca
- */
-static ssize_t i40iw_show_hca(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sprintf(buf, "I40IW\n");
-}
-
-/**
- * i40iw_show_board
- */
-static ssize_t i40iw_show_board(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- return sprintf(buf, "%.*s\n", 32, "I40IW Board ID");
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL);
-
-static struct device_attribute *i40iw_dev_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id
-};
-
-/**
- * i40iw_copy_sg_list - copy sg list for qp
- * @sg_list: copied into sg_list
- * @sgl: copy from sgl
- * @num_sges: count of sg entries
- */
-static void i40iw_copy_sg_list(struct i40iw_sge *sg_list, struct ib_sge *sgl, int num_sges)
-{
- unsigned int i;
-
- for (i = 0; (i < num_sges) && (i < I40IW_MAX_WQ_FRAGMENT_COUNT); i++) {
- sg_list[i].tag_off = sgl[i].addr;
- sg_list[i].len = sgl[i].length;
- sg_list[i].stag = sgl[i].lkey;
- }
-}
-
-/**
- * i40iw_post_send - kernel application wr
- * @ibqp: qp ptr for wr
- * @ib_wr: work request ptr
- * @bad_wr: return of bad wr if err
- */
-static int i40iw_post_send(struct ib_qp *ibqp,
- struct ib_send_wr *ib_wr,
- struct ib_send_wr **bad_wr)
-{
- struct i40iw_qp *iwqp;
- struct i40iw_qp_uk *ukqp;
- struct i40iw_post_sq_info info;
- enum i40iw_status_code ret;
- int err = 0;
- unsigned long flags;
- bool inv_stag;
-
- iwqp = (struct i40iw_qp *)ibqp;
- ukqp = &iwqp->sc_qp.qp_uk;
-
- spin_lock_irqsave(&iwqp->lock, flags);
- while (ib_wr) {
- inv_stag = false;
- memset(&info, 0, sizeof(info));
- info.wr_id = (u64)(ib_wr->wr_id);
- if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all)
- info.signaled = true;
- if (ib_wr->send_flags & IB_SEND_FENCE)
- info.read_fence = true;
-
- switch (ib_wr->opcode) {
- case IB_WR_SEND:
- /* fall-through */
- case IB_WR_SEND_WITH_INV:
- if (ib_wr->opcode == IB_WR_SEND) {
- if (ib_wr->send_flags & IB_SEND_SOLICITED)
- info.op_type = I40IW_OP_TYPE_SEND_SOL;
- else
- info.op_type = I40IW_OP_TYPE_SEND;
- } else {
- if (ib_wr->send_flags & IB_SEND_SOLICITED)
- info.op_type = I40IW_OP_TYPE_SEND_SOL_INV;
- else
- info.op_type = I40IW_OP_TYPE_SEND_INV;
- }
-
- if (ib_wr->send_flags & IB_SEND_INLINE) {
- info.op.inline_send.data = (void *)(unsigned long)ib_wr->sg_list[0].addr;
- info.op.inline_send.len = ib_wr->sg_list[0].length;
- ret = ukqp->ops.iw_inline_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
- } else {
- info.op.send.num_sges = ib_wr->num_sge;
- info.op.send.sg_list = (struct i40iw_sge *)ib_wr->sg_list;
- ret = ukqp->ops.iw_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
- }
-
- if (ret) {
- if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
- err = -ENOMEM;
- else
- err = -EINVAL;
- }
- break;
- case IB_WR_RDMA_WRITE:
- info.op_type = I40IW_OP_TYPE_RDMA_WRITE;
-
- if (ib_wr->send_flags & IB_SEND_INLINE) {
- info.op.inline_rdma_write.data = (void *)(unsigned long)ib_wr->sg_list[0].addr;
- info.op.inline_rdma_write.len = ib_wr->sg_list[0].length;
- info.op.inline_rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
- info.op.inline_rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
- info.op.inline_rdma_write.rem_addr.len = ib_wr->sg_list->length;
- ret = ukqp->ops.iw_inline_rdma_write(ukqp, &info, false);
- } else {
- info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list;
- info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
- info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
- info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
- info.op.rdma_write.rem_addr.len = ib_wr->sg_list->length;
- ret = ukqp->ops.iw_rdma_write(ukqp, &info, false);
- }
-
- if (ret) {
- if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
- err = -ENOMEM;
- else
- err = -EINVAL;
- }
- break;
- case IB_WR_RDMA_READ_WITH_INV:
- inv_stag = true;
- /* fall-through*/
- case IB_WR_RDMA_READ:
- if (ib_wr->num_sge > I40IW_MAX_SGE_RD) {
- err = -EINVAL;
- break;
- }
- info.op_type = I40IW_OP_TYPE_RDMA_READ;
- info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
- info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey;
- info.op.rdma_read.rem_addr.len = ib_wr->sg_list->length;
- info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr;
- info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
- info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
- ret = ukqp->ops.iw_rdma_read(ukqp, &info, inv_stag, false);
- if (ret) {
- if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
- err = -ENOMEM;
- else
- err = -EINVAL;
- }
- break;
- case IB_WR_LOCAL_INV:
- info.op_type = I40IW_OP_TYPE_INV_STAG;
- info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
- ret = ukqp->ops.iw_stag_local_invalidate(ukqp, &info, true);
- if (ret)
- err = -ENOMEM;
- break;
- case IB_WR_REG_MR:
- {
- struct i40iw_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr);
- int flags = reg_wr(ib_wr)->access;
- struct i40iw_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc;
- struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
- struct i40iw_fast_reg_stag_info info;
-
- memset(&info, 0, sizeof(info));
- info.access_rights = I40IW_ACCESS_FLAGS_LOCALREAD;
- info.access_rights |= i40iw_get_user_access(flags);
- info.stag_key = reg_wr(ib_wr)->key & 0xff;
- info.stag_idx = reg_wr(ib_wr)->key >> 8;
- info.page_size = reg_wr(ib_wr)->mr->page_size;
- info.wr_id = ib_wr->wr_id;
-
- info.addr_type = I40IW_ADDR_TYPE_VA_BASED;
- info.va = (void *)(uintptr_t)iwmr->ibmr.iova;
- info.total_len = iwmr->ibmr.length;
- info.reg_addr_pa = *(u64 *)palloc->level1.addr;
- info.first_pm_pbl_index = palloc->level1.idx;
- info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
- info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED;
-
- if (iwmr->npages > I40IW_MIN_PAGES_PER_FMR)
- info.chunk_size = 1;
-
- ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true);
- if (ret)
- err = -ENOMEM;
- break;
- }
- default:
- err = -EINVAL;
- i40iw_pr_err(" upost_send bad opcode = 0x%x\n",
- ib_wr->opcode);
- break;
- }
-
- if (err)
- break;
- ib_wr = ib_wr->next;
- }
-
- if (err)
- *bad_wr = ib_wr;
- else
- ukqp->ops.iw_qp_post_wr(ukqp);
- spin_unlock_irqrestore(&iwqp->lock, flags);
-
- return err;
-}
-
-/**
- * i40iw_post_recv - post receive wr for kernel application
- * @ibqp: ib qp pointer
- * @ib_wr: work request for receive
- * @bad_wr: bad wr caused an error
- */
-static int i40iw_post_recv(struct ib_qp *ibqp,
- struct ib_recv_wr *ib_wr,
- struct ib_recv_wr **bad_wr)
-{
- struct i40iw_qp *iwqp;
- struct i40iw_qp_uk *ukqp;
- struct i40iw_post_rq_info post_recv;
- struct i40iw_sge sg_list[I40IW_MAX_WQ_FRAGMENT_COUNT];
- enum i40iw_status_code ret = 0;
- unsigned long flags;
- int err = 0;
-
- iwqp = (struct i40iw_qp *)ibqp;
- ukqp = &iwqp->sc_qp.qp_uk;
-
- memset(&post_recv, 0, sizeof(post_recv));
- spin_lock_irqsave(&iwqp->lock, flags);
- while (ib_wr) {
- post_recv.num_sges = ib_wr->num_sge;
- post_recv.wr_id = ib_wr->wr_id;
- i40iw_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge);
- post_recv.sg_list = sg_list;
- ret = ukqp->ops.iw_post_receive(ukqp, &post_recv);
- if (ret) {
- i40iw_pr_err(" post_recv err %d\n", ret);
- if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
- err = -ENOMEM;
- else
- err = -EINVAL;
- *bad_wr = ib_wr;
- goto out;
- }
- ib_wr = ib_wr->next;
- }
- out:
- spin_unlock_irqrestore(&iwqp->lock, flags);
- return err;
-}
-
-/**
- * i40iw_poll_cq - poll cq for completion (kernel apps)
- * @ibcq: cq to poll
- * @num_entries: number of entries to poll
- * @entry: wr of entry completed
- */
-static int i40iw_poll_cq(struct ib_cq *ibcq,
- int num_entries,
- struct ib_wc *entry)
-{
- struct i40iw_cq *iwcq;
- int cqe_count = 0;
- struct i40iw_cq_poll_info cq_poll_info;
- enum i40iw_status_code ret;
- struct i40iw_cq_uk *ukcq;
- struct i40iw_sc_qp *qp;
- struct i40iw_qp *iwqp;
- unsigned long flags;
-
- iwcq = (struct i40iw_cq *)ibcq;
- ukcq = &iwcq->sc_cq.cq_uk;
-
- spin_lock_irqsave(&iwcq->lock, flags);
- while (cqe_count < num_entries) {
- ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info);
- if (ret == I40IW_ERR_QUEUE_EMPTY) {
- break;
- } else if (ret == I40IW_ERR_QUEUE_DESTROYED) {
- continue;
- } else if (ret) {
- if (!cqe_count)
- cqe_count = -1;
- break;
- }
- entry->wc_flags = 0;
- entry->wr_id = cq_poll_info.wr_id;
- if (cq_poll_info.error) {
- entry->status = IB_WC_WR_FLUSH_ERR;
- entry->vendor_err = cq_poll_info.major_err << 16 | cq_poll_info.minor_err;
- } else {
- entry->status = IB_WC_SUCCESS;
- }
-
- switch (cq_poll_info.op_type) {
- case I40IW_OP_TYPE_RDMA_WRITE:
- entry->opcode = IB_WC_RDMA_WRITE;
- break;
- case I40IW_OP_TYPE_RDMA_READ_INV_STAG:
- case I40IW_OP_TYPE_RDMA_READ:
- entry->opcode = IB_WC_RDMA_READ;
- break;
- case I40IW_OP_TYPE_SEND_SOL:
- case I40IW_OP_TYPE_SEND_SOL_INV:
- case I40IW_OP_TYPE_SEND_INV:
- case I40IW_OP_TYPE_SEND:
- entry->opcode = IB_WC_SEND;
- break;
- case I40IW_OP_TYPE_REC:
- entry->opcode = IB_WC_RECV;
- break;
- default:
- entry->opcode = IB_WC_RECV;
- break;
- }
-
- entry->ex.imm_data = 0;
- qp = (struct i40iw_sc_qp *)cq_poll_info.qp_handle;
- entry->qp = (struct ib_qp *)qp->back_qp;
- entry->src_qp = cq_poll_info.qp_id;
- iwqp = (struct i40iw_qp *)qp->back_qp;
- if (iwqp->iwarp_state > I40IW_QP_STATE_RTS) {
- if (!I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring))
- complete(&iwqp->sq_drained);
- if (!I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring))
- complete(&iwqp->rq_drained);
- }
- entry->byte_len = cq_poll_info.bytes_xfered;
- entry++;
- cqe_count++;
- }
- spin_unlock_irqrestore(&iwcq->lock, flags);
- return cqe_count;
-}
-
-/**
- * i40iw_req_notify_cq - arm cq kernel application
- * @ibcq: cq to arm
- * @notify_flags: notofication flags
- */
-static int i40iw_req_notify_cq(struct ib_cq *ibcq,
- enum ib_cq_notify_flags notify_flags)
-{
- struct i40iw_cq *iwcq;
- struct i40iw_cq_uk *ukcq;
- unsigned long flags;
- enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_EVENT;
-
- iwcq = (struct i40iw_cq *)ibcq;
- ukcq = &iwcq->sc_cq.cq_uk;
- if (notify_flags == IB_CQ_SOLICITED)
- cq_notify = IW_CQ_COMPL_SOLICITED;
- spin_lock_irqsave(&iwcq->lock, flags);
- ukcq->ops.iw_cq_request_notification(ukcq, cq_notify);
- spin_unlock_irqrestore(&iwcq->lock, flags);
- return 0;
-}
-
-/**
- * i40iw_port_immutable - return port's immutable data
- * @ibdev: ib dev struct
- * @port_num: port number
- * @immutable: immutable data for the port return
- */
-static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
- struct ib_port_immutable *immutable)
-{
- struct ib_port_attr attr;
- int err;
-
- err = i40iw_query_port(ibdev, port_num, &attr);
-
- if (err)
- return err;
-
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
-
- return 0;
-}
-
-static const char * const i40iw_hw_stat_names[] = {
- // 32bit names
- [I40IW_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards",
- [I40IW_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts",
- [I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes",
- [I40IW_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards",
- [I40IW_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts",
- [I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes",
- [I40IW_HW_STAT_INDEX_TCPRTXSEG] = "tcpRetransSegs",
- [I40IW_HW_STAT_INDEX_TCPRXOPTERR] = "tcpInOptErrors",
- [I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = "tcpInProtoErrors",
- // 64bit names
- [I40IW_HW_STAT_INDEX_IP4RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4InOctets",
- [I40IW_HW_STAT_INDEX_IP4RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4InPkts",
- [I40IW_HW_STAT_INDEX_IP4RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4InReasmRqd",
- [I40IW_HW_STAT_INDEX_IP4RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4InMcastPkts",
- [I40IW_HW_STAT_INDEX_IP4TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4OutOctets",
- [I40IW_HW_STAT_INDEX_IP4TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4OutPkts",
- [I40IW_HW_STAT_INDEX_IP4TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4OutSegRqd",
- [I40IW_HW_STAT_INDEX_IP4TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip4OutMcastPkts",
- [I40IW_HW_STAT_INDEX_IP6RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6InOctets",
- [I40IW_HW_STAT_INDEX_IP6RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6InPkts",
- [I40IW_HW_STAT_INDEX_IP6RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6InReasmRqd",
- [I40IW_HW_STAT_INDEX_IP6RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6InMcastPkts",
- [I40IW_HW_STAT_INDEX_IP6TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6OutOctets",
- [I40IW_HW_STAT_INDEX_IP6TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6OutPkts",
- [I40IW_HW_STAT_INDEX_IP6TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6OutSegRqd",
- [I40IW_HW_STAT_INDEX_IP6TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
- "ip6OutMcastPkts",
- [I40IW_HW_STAT_INDEX_TCPRXSEGS + I40IW_HW_STAT_INDEX_MAX_32] =
- "tcpInSegs",
- [I40IW_HW_STAT_INDEX_TCPTXSEG + I40IW_HW_STAT_INDEX_MAX_32] =
- "tcpOutSegs",
- [I40IW_HW_STAT_INDEX_RDMARXRDS + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwInRdmaReads",
- [I40IW_HW_STAT_INDEX_RDMARXSNDS + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwInRdmaSends",
- [I40IW_HW_STAT_INDEX_RDMARXWRS + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwInRdmaWrites",
- [I40IW_HW_STAT_INDEX_RDMATXRDS + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwOutRdmaReads",
- [I40IW_HW_STAT_INDEX_RDMATXSNDS + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwOutRdmaSends",
- [I40IW_HW_STAT_INDEX_RDMATXWRS + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwOutRdmaWrites",
- [I40IW_HW_STAT_INDEX_RDMAVBND + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwRdmaBnd",
- [I40IW_HW_STAT_INDEX_RDMAVINV + I40IW_HW_STAT_INDEX_MAX_32] =
- "iwRdmaInv"
-};
-
-static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str,
- size_t str_len)
-{
- u32 firmware_version = I40IW_FW_VERSION;
-
- snprintf(str, str_len, "%u.%u", firmware_version,
- (firmware_version & 0x000000ff));
-}
-
-/**
- * i40iw_alloc_hw_stats - Allocate a hw stats structure
- * @ibdev: device pointer from stack
- * @port_num: port number
- */
-static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- int num_counters = I40IW_HW_STAT_INDEX_MAX_32 +
- I40IW_HW_STAT_INDEX_MAX_64;
- unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN;
-
- BUILD_BUG_ON(ARRAY_SIZE(i40iw_hw_stat_names) !=
- (I40IW_HW_STAT_INDEX_MAX_32 +
- I40IW_HW_STAT_INDEX_MAX_64));
-
- /*
- * PFs get the default update lifespan, but VFs only update once
- * per second
- */
- if (!dev->is_pf)
- lifespan = 1000;
- return rdma_alloc_hw_stats_struct(i40iw_hw_stat_names, num_counters,
- lifespan);
-}
-
-/**
- * i40iw_get_hw_stats - Populates the rdma_hw_stats structure
- * @ibdev: device pointer from stack
- * @stats: stats pointer from stack
- * @port_num: port number
- * @index: which hw counter the stack is requesting we update
- */
-static int i40iw_get_hw_stats(struct ib_device *ibdev,
- struct rdma_hw_stats *stats,
- u8 port_num, int index)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_vsi_pestat *devstat = iwdev->vsi.pestat;
- struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
-
- if (dev->is_pf) {
- i40iw_hw_stats_read_all(devstat, &devstat->hw_stats);
- } else {
- if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats))
- return -ENOSYS;
- }
-
- memcpy(&stats->value[0], hw_stats, sizeof(*hw_stats));
-
- return stats->num_counters;
-}
-
-/**
- * i40iw_query_gid - Query port GID
- * @ibdev: device pointer from stack
- * @port: port number
- * @index: Entry index
- * @gid: Global ID
- */
-static int i40iw_query_gid(struct ib_device *ibdev,
- u8 port,
- int index,
- union ib_gid *gid)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
-
- memset(gid->raw, 0, sizeof(gid->raw));
- ether_addr_copy(gid->raw, iwdev->netdev->dev_addr);
- return 0;
-}
-
-/**
- * i40iw_modify_port Modify port properties
- * @ibdev: device pointer from stack
- * @port: port number
- * @port_modify_mask: mask for port modifications
- * @props: port properties
- */
-static int i40iw_modify_port(struct ib_device *ibdev,
- u8 port,
- int port_modify_mask,
- struct ib_port_modify *props)
-{
- return -ENOSYS;
-}
-
-/**
- * i40iw_query_pkey - Query partition key
- * @ibdev: device pointer from stack
- * @port: port number
- * @index: index of pkey
- * @pkey: pointer to store the pkey
- */
-static int i40iw_query_pkey(struct ib_device *ibdev,
- u8 port,
- u16 index,
- u16 *pkey)
-{
- *pkey = 0;
- return 0;
-}
-
-/**
- * i40iw_create_ah - create address handle
- * @ibpd: ptr of pd
- * @ah_attr: address handle attributes
- */
-static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd,
- struct ib_ah_attr *attr,
- struct ib_udata *udata)
-
-{
- return ERR_PTR(-ENOSYS);
-}
-
-/**
- * i40iw_destroy_ah - Destroy address handle
- * @ah: pointer to address handle
- */
-static int i40iw_destroy_ah(struct ib_ah *ah)
-{
- return -ENOSYS;
-}
-
-/**
- * i40iw_init_rdma_device - initialization of iwarp device
- * @iwdev: iwarp device
- */
-static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev)
-{
- struct i40iw_ib_device *iwibdev;
- struct net_device *netdev = iwdev->netdev;
- struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context;
-
- iwibdev = (struct i40iw_ib_device *)ib_alloc_device(sizeof(*iwibdev));
- if (!iwibdev) {
- i40iw_pr_err("iwdev == NULL\n");
- return NULL;
- }
- strlcpy(iwibdev->ibdev.name, "i40iw%d", IB_DEVICE_NAME_MAX);
- iwibdev->ibdev.owner = THIS_MODULE;
- iwdev->iwibdev = iwibdev;
- iwibdev->iwdev = iwdev;
-
- iwibdev->ibdev.node_type = RDMA_NODE_RNIC;
- ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr);
-
- iwibdev->ibdev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_POST_RECV) |
- (1ull << IB_USER_VERBS_CMD_POST_SEND);
- iwibdev->ibdev.phys_port_cnt = 1;
- iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
- iwibdev->ibdev.dma_device = &pcidev->dev;
- iwibdev->ibdev.dev.parent = &pcidev->dev;
- iwibdev->ibdev.query_port = i40iw_query_port;
- iwibdev->ibdev.modify_port = i40iw_modify_port;
- iwibdev->ibdev.query_pkey = i40iw_query_pkey;
- iwibdev->ibdev.query_gid = i40iw_query_gid;
- iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext;
- iwibdev->ibdev.dealloc_ucontext = i40iw_dealloc_ucontext;
- iwibdev->ibdev.mmap = i40iw_mmap;
- iwibdev->ibdev.alloc_pd = i40iw_alloc_pd;
- iwibdev->ibdev.dealloc_pd = i40iw_dealloc_pd;
- iwibdev->ibdev.create_qp = i40iw_create_qp;
- iwibdev->ibdev.modify_qp = i40iw_modify_qp;
- iwibdev->ibdev.query_qp = i40iw_query_qp;
- iwibdev->ibdev.destroy_qp = i40iw_destroy_qp;
- iwibdev->ibdev.create_cq = i40iw_create_cq;
- iwibdev->ibdev.destroy_cq = i40iw_destroy_cq;
- iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr;
- iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr;
- iwibdev->ibdev.dereg_mr = i40iw_dereg_mr;
- iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats;
- iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats;
- iwibdev->ibdev.query_device = i40iw_query_device;
- iwibdev->ibdev.create_ah = i40iw_create_ah;
- iwibdev->ibdev.destroy_ah = i40iw_destroy_ah;
- iwibdev->ibdev.drain_sq = i40iw_drain_sq;
- iwibdev->ibdev.drain_rq = i40iw_drain_rq;
- iwibdev->ibdev.alloc_mr = i40iw_alloc_mr;
- iwibdev->ibdev.map_mr_sg = i40iw_map_mr_sg;
- iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL);
- if (!iwibdev->ibdev.iwcm) {
- ib_dealloc_device(&iwibdev->ibdev);
- return NULL;
- }
-
- iwibdev->ibdev.iwcm->add_ref = i40iw_add_ref;
- iwibdev->ibdev.iwcm->rem_ref = i40iw_rem_ref;
- iwibdev->ibdev.iwcm->get_qp = i40iw_get_qp;
- iwibdev->ibdev.iwcm->connect = i40iw_connect;
- iwibdev->ibdev.iwcm->accept = i40iw_accept;
- iwibdev->ibdev.iwcm->reject = i40iw_reject;
- iwibdev->ibdev.iwcm->create_listen = i40iw_create_listen;
- iwibdev->ibdev.iwcm->destroy_listen = i40iw_destroy_listen;
- memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name,
- sizeof(iwibdev->ibdev.iwcm->ifname));
- iwibdev->ibdev.get_port_immutable = i40iw_port_immutable;
- iwibdev->ibdev.get_dev_fw_str = i40iw_get_dev_fw_str;
- iwibdev->ibdev.poll_cq = i40iw_poll_cq;
- iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq;
- iwibdev->ibdev.post_send = i40iw_post_send;
- iwibdev->ibdev.post_recv = i40iw_post_recv;
-
- return iwibdev;
-}
-
-/**
- * i40iw_port_ibevent - indicate port event
- * @iwdev: iwarp device
- */
-void i40iw_port_ibevent(struct i40iw_device *iwdev)
-{
- struct i40iw_ib_device *iwibdev = iwdev->iwibdev;
- struct ib_event event;
-
- event.device = &iwibdev->ibdev;
- event.element.port_num = 1;
- event.event = iwdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
- ib_dispatch_event(&event);
-}
-
-/**
- * i40iw_unregister_rdma_device - unregister of iwarp from IB
- * @iwibdev: rdma device ptr
- */
-static void i40iw_unregister_rdma_device(struct i40iw_ib_device *iwibdev)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i)
- device_remove_file(&iwibdev->ibdev.dev,
- i40iw_dev_attributes[i]);
- ib_unregister_device(&iwibdev->ibdev);
-}
-
-/**
- * i40iw_destroy_rdma_device - destroy rdma device and free resources
- * @iwibdev: IB device ptr
- */
-void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
-{
- if (!iwibdev)
- return;
-
- i40iw_unregister_rdma_device(iwibdev);
- kfree(iwibdev->ibdev.iwcm);
- iwibdev->ibdev.iwcm = NULL;
- wait_event_timeout(iwibdev->iwdev->close_wq,
- !atomic64_read(&iwibdev->iwdev->use_count),
- I40IW_EVENT_TIMEOUT);
- ib_dealloc_device(&iwibdev->ibdev);
-}
-
-/**
- * i40iw_register_rdma_device - register iwarp device to IB
- * @iwdev: iwarp device
- */
-int i40iw_register_rdma_device(struct i40iw_device *iwdev)
-{
- int i, ret;
- struct i40iw_ib_device *iwibdev;
-
- iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
- if (!iwdev->iwibdev)
- return -ENOMEM;
- iwibdev = iwdev->iwibdev;
-
- ret = ib_register_device(&iwibdev->ibdev, NULL);
- if (ret)
- goto error;
-
- for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i) {
- ret =
- device_create_file(&iwibdev->ibdev.dev,
- i40iw_dev_attributes[i]);
- if (ret) {
- while (i > 0) {
- i--;
- device_remove_file(&iwibdev->ibdev.dev, i40iw_dev_attributes[i]);
- }
- ib_unregister_device(&iwibdev->ibdev);
- goto error;
- }
- }
- return 0;
-error:
- kfree(iwdev->iwibdev->ibdev.iwcm);
- iwdev->iwibdev->ibdev.iwcm = NULL;
- ib_dealloc_device(&iwdev->iwibdev->ibdev);
- return ret;
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
deleted file mode 100644
index 07c3fec77de6..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_VERBS_H
-#define I40IW_VERBS_H
-
-struct i40iw_ucontext {
- struct ib_ucontext ibucontext;
- struct i40iw_device *iwdev;
- struct list_head cq_reg_mem_list;
- spinlock_t cq_reg_mem_list_lock; /* memory list for cq's */
- struct list_head qp_reg_mem_list;
- spinlock_t qp_reg_mem_list_lock; /* memory list for qp's */
- int abi_ver;
-};
-
-struct i40iw_pd {
- struct ib_pd ibpd;
- struct i40iw_sc_pd sc_pd;
- atomic_t usecount;
-};
-
-struct i40iw_hmc_pble {
- union {
- u32 idx;
- dma_addr_t addr;
- };
-};
-
-struct i40iw_cq_mr {
- struct i40iw_hmc_pble cq_pbl;
- dma_addr_t shadow;
-};
-
-struct i40iw_qp_mr {
- struct i40iw_hmc_pble sq_pbl;
- struct i40iw_hmc_pble rq_pbl;
- dma_addr_t shadow;
- struct page *sq_page;
-};
-
-struct i40iw_pbl {
- struct list_head list;
- union {
- struct i40iw_qp_mr qp_mr;
- struct i40iw_cq_mr cq_mr;
- };
-
- bool pbl_allocated;
- u64 user_base;
- struct i40iw_pble_alloc pble_alloc;
- struct i40iw_mr *iwmr;
-};
-
-#define MAX_SAVE_PAGE_ADDRS 4
-struct i40iw_mr {
- union {
- struct ib_mr ibmr;
- struct ib_mw ibmw;
- struct ib_fmr ibfmr;
- };
- struct ib_umem *region;
- u16 type;
- u32 page_cnt;
- u32 page_size;
- u64 page_msk;
- u32 npages;
- u32 stag;
- u64 length;
- u64 pgaddrmem[MAX_SAVE_PAGE_ADDRS];
- struct i40iw_pbl iwpbl;
-};
-
-struct i40iw_cq {
- struct ib_cq ibcq;
- struct i40iw_sc_cq sc_cq;
- u16 cq_head;
- u16 cq_size;
- u16 cq_number;
- bool user_mode;
- u32 polled_completions;
- u32 cq_mem_size;
- struct i40iw_dma_mem kmem;
- spinlock_t lock; /* for poll cq */
- struct i40iw_pbl *iwpbl;
-};
-
-struct disconn_work {
- struct work_struct work;
- struct i40iw_qp *iwqp;
-};
-
-struct iw_cm_id;
-struct ietf_mpa_frame;
-struct i40iw_ud_file;
-
-struct i40iw_qp_kmode {
- struct i40iw_dma_mem dma_mem;
- u64 *wrid_mem;
-};
-
-struct i40iw_qp {
- struct ib_qp ibqp;
- struct i40iw_sc_qp sc_qp;
- struct i40iw_device *iwdev;
- struct i40iw_cq *iwscq;
- struct i40iw_cq *iwrcq;
- struct i40iw_pd *iwpd;
- struct i40iw_qp_host_ctx_info ctx_info;
- struct i40iwarp_offload_info iwarp_info;
- void *allocated_buffer;
- atomic_t refcount;
- struct iw_cm_id *cm_id;
- void *cm_node;
- struct ib_mr *lsmm_mr;
- struct work_struct work;
- enum ib_qp_state ibqp_state;
- u32 iwarp_state;
- u32 qp_mem_size;
- u32 last_aeq;
- atomic_t close_timer_started;
- spinlock_t lock; /* for post work requests */
- struct i40iw_qp_context *iwqp_context;
- void *pbl_vbase;
- dma_addr_t pbl_pbase;
- struct page *page;
- u8 active_conn:1;
- u8 user_mode:1;
- u8 hte_added:1;
- u8 flush_issued:1;
- u8 destroyed:1;
- u8 sig_all:1;
- u8 pau_mode:1;
- u8 rsvd:1;
- u16 term_sq_flush_code;
- u16 term_rq_flush_code;
- u8 hw_iwarp_state;
- u8 hw_tcp_state;
- struct i40iw_qp_kmode kqp;
- struct i40iw_dma_mem host_ctx;
- struct timer_list terminate_timer;
- struct i40iw_pbl *iwpbl;
- struct i40iw_dma_mem q2_ctx_mem;
- struct i40iw_dma_mem ietf_mem;
- struct completion sq_drained;
- struct completion rq_drained;
-};
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.c b/drivers/infiniband/hw/i40iw/i40iw_vf.c
deleted file mode 100644
index e33d4810965c..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_vf.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include "i40iw_osdep.h"
-#include "i40iw_register.h"
-#include "i40iw_status.h"
-#include "i40iw_hmc.h"
-#include "i40iw_d.h"
-#include "i40iw_type.h"
-#include "i40iw_p.h"
-#include "i40iw_vf.h"
-
-/**
- * i40iw_manage_vf_pble_bp - manage vf pble
- * @cqp: cqp for cqp' sq wqe
- * @info: pble info
- * @scratch: pointer for completion
- * @post_sq: to post and ring
- */
-enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp,
- struct i40iw_manage_vf_pble_info *info,
- u64 scratch,
- bool post_sq)
-{
- u64 *wqe;
- u64 temp, header, pd_pl_pba = 0;
-
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
- if (!wqe)
- return I40IW_ERR_RING_FULL;
-
- temp = LS_64(info->pd_entry_cnt, I40IW_CQPSQ_MVPBP_PD_ENTRY_CNT) |
- LS_64(info->first_pd_index, I40IW_CQPSQ_MVPBP_FIRST_PD_INX) |
- LS_64(info->sd_index, I40IW_CQPSQ_MVPBP_SD_INX);
- set_64bit_val(wqe, 16, temp);
-
- header = LS_64((info->inv_pd_ent ? 1 : 0), I40IW_CQPSQ_MVPBP_INV_PD_ENT) |
- LS_64(I40IW_CQP_OP_MANAGE_VF_PBLE_BP, I40IW_CQPSQ_OPCODE) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
- set_64bit_val(wqe, 24, header);
-
- pd_pl_pba = LS_64(info->pd_pl_pba >> 3, I40IW_CQPSQ_MVPBP_PD_PLPBA);
- set_64bit_val(wqe, 32, pd_pl_pba);
-
- i40iw_debug_buf(cqp->dev, I40IW_DEBUG_WQE, "MANAGE VF_PBLE_BP WQE", wqe, I40IW_CQP_WQE_SIZE * 8);
-
- if (post_sq)
- i40iw_sc_cqp_post_sq(cqp);
- return 0;
-}
-
-const struct i40iw_vf_cqp_ops iw_vf_cqp_ops = {
- i40iw_manage_vf_pble_bp
-};
diff --git a/drivers/infiniband/hw/i40iw/i40iw_vf.h b/drivers/infiniband/hw/i40iw/i40iw_vf.h
deleted file mode 100644
index 4359559ece9c..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_vf.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_VF_H
-#define I40IW_VF_H
-
-struct i40iw_sc_cqp;
-
-struct i40iw_manage_vf_pble_info {
- u32 sd_index;
- u16 first_pd_index;
- u16 pd_entry_cnt;
- u8 inv_pd_ent;
- u64 pd_pl_pba;
-};
-
-struct i40iw_vf_cqp_ops {
- enum i40iw_status_code (*manage_vf_pble_bp)(struct i40iw_sc_cqp *,
- struct i40iw_manage_vf_pble_info *,
- u64,
- bool);
-};
-
-enum i40iw_status_code i40iw_manage_vf_pble_bp(struct i40iw_sc_cqp *cqp,
- struct i40iw_manage_vf_pble_info *info,
- u64 scratch,
- bool post_sq);
-
-extern const struct i40iw_vf_cqp_ops iw_vf_cqp_ops;
-
-#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
deleted file mode 100644
index f4d13683a403..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
+++ /dev/null
@@ -1,759 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#include "i40iw_osdep.h"
-#include "i40iw_register.h"
-#include "i40iw_status.h"
-#include "i40iw_hmc.h"
-#include "i40iw_d.h"
-#include "i40iw_type.h"
-#include "i40iw_p.h"
-#include "i40iw_virtchnl.h"
-
-/**
- * vchnl_vf_send_get_ver_req - Request Channel version
- * @dev: IWARP device pointer
- * @vchnl_req: Virtual channel message request pointer
- */
-static enum i40iw_status_code vchnl_vf_send_get_ver_req(struct i40iw_sc_dev *dev,
- struct i40iw_virtchnl_req *vchnl_req)
-{
- enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
- struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
-
- if (!dev->vchnl_up)
- return ret_code;
-
- memset(vchnl_msg, 0, sizeof(*vchnl_msg));
- vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
- vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg);
- vchnl_msg->iw_op_code = I40IW_VCHNL_OP_GET_VER;
- vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_GET_VER_V0;
- ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
- return ret_code;
-}
-
-/**
- * vchnl_vf_send_get_hmc_fcn_req - Request HMC Function from VF
- * @dev: IWARP device pointer
- * @vchnl_req: Virtual channel message request pointer
- */
-static enum i40iw_status_code vchnl_vf_send_get_hmc_fcn_req(struct i40iw_sc_dev *dev,
- struct i40iw_virtchnl_req *vchnl_req)
-{
- enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
- struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
-
- if (!dev->vchnl_up)
- return ret_code;
-
- memset(vchnl_msg, 0, sizeof(*vchnl_msg));
- vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
- vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg);
- vchnl_msg->iw_op_code = I40IW_VCHNL_OP_GET_HMC_FCN;
- vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_GET_HMC_FCN_V0;
- ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
- return ret_code;
-}
-
-/**
- * vchnl_vf_send_get_pe_stats_req - Request PE stats from VF
- * @dev: IWARP device pointer
- * @vchnl_req: Virtual channel message request pointer
- */
-static enum i40iw_status_code vchnl_vf_send_get_pe_stats_req(struct i40iw_sc_dev *dev,
- struct i40iw_virtchnl_req *vchnl_req)
-{
- enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
- struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
-
- if (!dev->vchnl_up)
- return ret_code;
-
- memset(vchnl_msg, 0, sizeof(*vchnl_msg));
- vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
- vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg) + sizeof(struct i40iw_dev_hw_stats) - 1;
- vchnl_msg->iw_op_code = I40IW_VCHNL_OP_GET_STATS;
- vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_GET_STATS_V0;
- ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
- return ret_code;
-}
-
-/**
- * vchnl_vf_send_add_hmc_objs_req - Add HMC objects
- * @dev: IWARP device pointer
- * @vchnl_req: Virtual channel message request pointer
- */
-static enum i40iw_status_code vchnl_vf_send_add_hmc_objs_req(struct i40iw_sc_dev *dev,
- struct i40iw_virtchnl_req *vchnl_req,
- enum i40iw_hmc_rsrc_type rsrc_type,
- u32 start_index,
- u32 rsrc_count)
-{
- enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
- struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
- struct i40iw_virtchnl_hmc_obj_range *add_hmc_obj;
-
- if (!dev->vchnl_up)
- return ret_code;
-
- add_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
- memset(vchnl_msg, 0, sizeof(*vchnl_msg));
- memset(add_hmc_obj, 0, sizeof(*add_hmc_obj));
- vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
- vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg) + sizeof(struct i40iw_virtchnl_hmc_obj_range) - 1;
- vchnl_msg->iw_op_code = I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE;
- vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE_V0;
- add_hmc_obj->obj_type = (u16)rsrc_type;
- add_hmc_obj->start_index = start_index;
- add_hmc_obj->obj_count = rsrc_count;
- ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
- return ret_code;
-}
-
-/**
- * vchnl_vf_send_del_hmc_objs_req - del HMC objects
- * @dev: IWARP device pointer
- * @vchnl_req: Virtual channel message request pointer
- * @ rsrc_type - resource type to delete
- * @ start_index - starting index for resource
- * @ rsrc_count - number of resource type to delete
- */
-static enum i40iw_status_code vchnl_vf_send_del_hmc_objs_req(struct i40iw_sc_dev *dev,
- struct i40iw_virtchnl_req *vchnl_req,
- enum i40iw_hmc_rsrc_type rsrc_type,
- u32 start_index,
- u32 rsrc_count)
-{
- enum i40iw_status_code ret_code = I40IW_ERR_NOT_READY;
- struct i40iw_virtchnl_op_buf *vchnl_msg = vchnl_req->vchnl_msg;
- struct i40iw_virtchnl_hmc_obj_range *add_hmc_obj;
-
- if (!dev->vchnl_up)
- return ret_code;
-
- add_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
- memset(vchnl_msg, 0, sizeof(*vchnl_msg));
- memset(add_hmc_obj, 0, sizeof(*add_hmc_obj));
- vchnl_msg->iw_chnl_op_ctx = (uintptr_t)vchnl_req;
- vchnl_msg->iw_chnl_buf_len = sizeof(*vchnl_msg) + sizeof(struct i40iw_virtchnl_hmc_obj_range) - 1;
- vchnl_msg->iw_op_code = I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE;
- vchnl_msg->iw_op_ver = I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE_V0;
- add_hmc_obj->obj_type = (u16)rsrc_type;
- add_hmc_obj->start_index = start_index;
- add_hmc_obj->obj_count = rsrc_count;
- ret_code = dev->vchnl_if.vchnl_send(dev, 0, (u8 *)vchnl_msg, vchnl_msg->iw_chnl_buf_len);
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
- return ret_code;
-}
-
-/**
- * vchnl_pf_send_get_ver_resp - Send channel version to VF
- * @dev: IWARP device pointer
- * @vf_id: Virtual function ID associated with the message
- * @vchnl_msg: Virtual channel message buffer pointer
- */
-static void vchnl_pf_send_get_ver_resp(struct i40iw_sc_dev *dev,
- u32 vf_id,
- struct i40iw_virtchnl_op_buf *vchnl_msg)
-{
- enum i40iw_status_code ret_code;
- u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(u32) - 1];
- struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
-
- memset(resp_buffer, 0, sizeof(*resp_buffer));
- vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
- vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
- vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
- *((u32 *)vchnl_msg_resp->iw_chnl_buf) = I40IW_VCHNL_CHNL_VER_V0;
- ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
-}
-
-/**
- * vchnl_pf_send_get_hmc_fcn_resp - Send HMC Function to VF
- * @dev: IWARP device pointer
- * @vf_id: Virtual function ID associated with the message
- * @vchnl_msg: Virtual channel message buffer pointer
- */
-static void vchnl_pf_send_get_hmc_fcn_resp(struct i40iw_sc_dev *dev,
- u32 vf_id,
- struct i40iw_virtchnl_op_buf *vchnl_msg,
- u16 hmc_fcn)
-{
- enum i40iw_status_code ret_code;
- u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(u16) - 1];
- struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
-
- memset(resp_buffer, 0, sizeof(*resp_buffer));
- vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
- vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
- vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
- *((u16 *)vchnl_msg_resp->iw_chnl_buf) = hmc_fcn;
- ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
-}
-
-/**
- * vchnl_pf_send_get_pe_stats_resp - Send PE Stats to VF
- * @dev: IWARP device pointer
- * @vf_id: Virtual function ID associated with the message
- * @vchnl_msg: Virtual channel message buffer pointer
- * @hw_stats: HW Stats struct
- */
-
-static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev,
- u32 vf_id,
- struct i40iw_virtchnl_op_buf *vchnl_msg,
- struct i40iw_dev_hw_stats *hw_stats)
-{
- enum i40iw_status_code ret_code;
- u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf) + sizeof(struct i40iw_dev_hw_stats) - 1];
- struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
-
- memset(resp_buffer, 0, sizeof(*resp_buffer));
- vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
- vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
- vchnl_msg_resp->iw_op_ret_code = I40IW_SUCCESS;
- *((struct i40iw_dev_hw_stats *)vchnl_msg_resp->iw_chnl_buf) = *hw_stats;
- ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
-}
-
-/**
- * vchnl_pf_send_error_resp - Send an error response to VF
- * @dev: IWARP device pointer
- * @vf_id: Virtual function ID associated with the message
- * @vchnl_msg: Virtual channel message buffer pointer
- */
-static void vchnl_pf_send_error_resp(struct i40iw_sc_dev *dev, u32 vf_id,
- struct i40iw_virtchnl_op_buf *vchnl_msg,
- u16 op_ret_code)
-{
- enum i40iw_status_code ret_code;
- u8 resp_buffer[sizeof(struct i40iw_virtchnl_resp_buf)];
- struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)resp_buffer;
-
- memset(resp_buffer, 0, sizeof(resp_buffer));
- vchnl_msg_resp->iw_chnl_op_ctx = vchnl_msg->iw_chnl_op_ctx;
- vchnl_msg_resp->iw_chnl_buf_len = sizeof(resp_buffer);
- vchnl_msg_resp->iw_op_ret_code = (u16)op_ret_code;
- ret_code = dev->vchnl_if.vchnl_send(dev, vf_id, resp_buffer, sizeof(resp_buffer));
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: virt channel send failed 0x%x\n", __func__, ret_code);
-}
-
-/**
- * pf_cqp_get_hmc_fcn_callback - Callback for Get HMC Fcn
- * @cqp_req_param: CQP Request param value
- * @not_used: unused CQP callback parameter
- */
-static void pf_cqp_get_hmc_fcn_callback(struct i40iw_sc_dev *dev, void *callback_param,
- struct i40iw_ccq_cqe_info *cqe_info)
-{
- struct i40iw_vfdev *vf_dev = callback_param;
- struct i40iw_virt_mem vf_dev_mem;
-
- if (cqe_info->error) {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "CQP Completion Error on Get HMC Function. Maj = 0x%04x, Minor = 0x%04x\n",
- cqe_info->maj_err_code, cqe_info->min_err_code);
- dev->vf_dev[vf_dev->iw_vf_idx] = NULL;
- vchnl_pf_send_error_resp(dev, vf_dev->vf_id, &vf_dev->vf_msg_buffer.vchnl_msg,
- (u16)I40IW_ERR_CQP_COMPL_ERROR);
- vf_dev_mem.va = vf_dev;
- vf_dev_mem.size = sizeof(*vf_dev);
- i40iw_free_virt_mem(dev->hw, &vf_dev_mem);
- } else {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "CQP Completion Operation Return information = 0x%08x\n",
- cqe_info->op_ret_val);
- vf_dev->pmf_index = (u16)cqe_info->op_ret_val;
- vf_dev->msg_count--;
- vchnl_pf_send_get_hmc_fcn_resp(dev,
- vf_dev->vf_id,
- &vf_dev->vf_msg_buffer.vchnl_msg,
- vf_dev->pmf_index);
- }
-}
-
-/**
- * pf_add_hmc_obj - Callback for Add HMC Object
- * @vf_dev: pointer to the VF Device
- */
-static void pf_add_hmc_obj_callback(void *work_vf_dev)
-{
- struct i40iw_vfdev *vf_dev = (struct i40iw_vfdev *)work_vf_dev;
- struct i40iw_hmc_info *hmc_info = &vf_dev->hmc_info;
- struct i40iw_virtchnl_op_buf *vchnl_msg = &vf_dev->vf_msg_buffer.vchnl_msg;
- struct i40iw_hmc_create_obj_info info;
- struct i40iw_virtchnl_hmc_obj_range *add_hmc_obj;
- enum i40iw_status_code ret_code;
-
- if (!vf_dev->pf_hmc_initialized) {
- ret_code = i40iw_pf_init_vfhmc(vf_dev->pf_dev, (u8)vf_dev->pmf_index, NULL);
- if (ret_code)
- goto add_out;
- vf_dev->pf_hmc_initialized = true;
- }
-
- add_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
-
- memset(&info, 0, sizeof(info));
- info.hmc_info = hmc_info;
- info.is_pf = false;
- info.rsrc_type = (u32)add_hmc_obj->obj_type;
- info.entry_type = (info.rsrc_type == I40IW_HMC_IW_PBLE) ? I40IW_SD_TYPE_PAGED : I40IW_SD_TYPE_DIRECT;
- info.start_idx = add_hmc_obj->start_index;
- info.count = add_hmc_obj->obj_count;
- i40iw_debug(vf_dev->pf_dev, I40IW_DEBUG_VIRT,
- "I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE. Add %u type %u objects\n",
- info.count, info.rsrc_type);
- ret_code = i40iw_sc_create_hmc_obj(vf_dev->pf_dev, &info);
- if (!ret_code)
- vf_dev->hmc_info.hmc_obj[add_hmc_obj->obj_type].cnt = add_hmc_obj->obj_count;
-add_out:
- vf_dev->msg_count--;
- vchnl_pf_send_error_resp(vf_dev->pf_dev, vf_dev->vf_id, vchnl_msg, (u16)ret_code);
-}
-
-/**
- * pf_del_hmc_obj_callback - Callback for delete HMC Object
- * @work_vf_dev: pointer to the VF Device
- */
-static void pf_del_hmc_obj_callback(void *work_vf_dev)
-{
- struct i40iw_vfdev *vf_dev = (struct i40iw_vfdev *)work_vf_dev;
- struct i40iw_hmc_info *hmc_info = &vf_dev->hmc_info;
- struct i40iw_virtchnl_op_buf *vchnl_msg = &vf_dev->vf_msg_buffer.vchnl_msg;
- struct i40iw_hmc_del_obj_info info;
- struct i40iw_virtchnl_hmc_obj_range *del_hmc_obj;
- enum i40iw_status_code ret_code = I40IW_SUCCESS;
-
- if (!vf_dev->pf_hmc_initialized)
- goto del_out;
-
- del_hmc_obj = (struct i40iw_virtchnl_hmc_obj_range *)vchnl_msg->iw_chnl_buf;
-
- memset(&info, 0, sizeof(info));
- info.hmc_info = hmc_info;
- info.is_pf = false;
- info.rsrc_type = (u32)del_hmc_obj->obj_type;
- info.start_idx = del_hmc_obj->start_index;
- info.count = del_hmc_obj->obj_count;
- i40iw_debug(vf_dev->pf_dev, I40IW_DEBUG_VIRT,
- "I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE. Delete %u type %u objects\n",
- info.count, info.rsrc_type);
- ret_code = i40iw_sc_del_hmc_obj(vf_dev->pf_dev, &info, false);
-del_out:
- vf_dev->msg_count--;
- vchnl_pf_send_error_resp(vf_dev->pf_dev, vf_dev->vf_id, vchnl_msg, (u16)ret_code);
-}
-
-/**
- * i40iw_vf_init_pestat - Initialize stats for VF
- * @devL pointer to the VF Device
- * @stats: Statistics structure pointer
- * @index: Stats index
- */
-static void i40iw_vf_init_pestat(struct i40iw_sc_dev *dev, struct i40iw_vsi_pestat *stats, u16 index)
-{
- stats->hw = dev->hw;
- i40iw_hw_stats_init(stats, (u8)index, false);
- spin_lock_init(&stats->lock);
-}
-
-/**
- * i40iw_vchnl_recv_pf - Receive PF virtual channel messages
- * @dev: IWARP device pointer
- * @vf_id: Virtual function ID associated with the message
- * @msg: Virtual channel message buffer pointer
- * @len: Length of the virtual channels message
- */
-enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
- u32 vf_id,
- u8 *msg,
- u16 len)
-{
- struct i40iw_virtchnl_op_buf *vchnl_msg = (struct i40iw_virtchnl_op_buf *)msg;
- struct i40iw_vfdev *vf_dev = NULL;
- struct i40iw_hmc_fcn_info hmc_fcn_info;
- u16 iw_vf_idx;
- u16 first_avail_iw_vf = I40IW_MAX_PE_ENABLED_VF_COUNT;
- struct i40iw_virt_mem vf_dev_mem;
- struct i40iw_virtchnl_work_info work_info;
- struct i40iw_vsi_pestat *stats;
- enum i40iw_status_code ret_code;
-
- if (!dev || !msg || !len)
- return I40IW_ERR_PARAM;
-
- if (!dev->vchnl_up)
- return I40IW_ERR_NOT_READY;
- if (vchnl_msg->iw_op_code == I40IW_VCHNL_OP_GET_VER) {
- if (vchnl_msg->iw_op_ver != I40IW_VCHNL_OP_GET_VER_V0)
- vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
- else
- vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg);
- return I40IW_SUCCESS;
- }
- for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {
- if (!dev->vf_dev[iw_vf_idx]) {
- if (first_avail_iw_vf == I40IW_MAX_PE_ENABLED_VF_COUNT)
- first_avail_iw_vf = iw_vf_idx;
- continue;
- }
- if (dev->vf_dev[iw_vf_idx]->vf_id == vf_id) {
- vf_dev = dev->vf_dev[iw_vf_idx];
- break;
- }
- }
- if (vf_dev) {
- if (!vf_dev->msg_count) {
- vf_dev->msg_count++;
- } else {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "VF%u already has a channel message in progress.\n",
- vf_id);
- return I40IW_SUCCESS;
- }
- }
- switch (vchnl_msg->iw_op_code) {
- case I40IW_VCHNL_OP_GET_HMC_FCN:
- if (!vf_dev &&
- (first_avail_iw_vf != I40IW_MAX_PE_ENABLED_VF_COUNT)) {
- ret_code = i40iw_allocate_virt_mem(dev->hw, &vf_dev_mem, sizeof(struct i40iw_vfdev) +
- (sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX));
- if (!ret_code) {
- vf_dev = vf_dev_mem.va;
- vf_dev->stats_initialized = false;
- vf_dev->pf_dev = dev;
- vf_dev->msg_count = 1;
- vf_dev->vf_id = vf_id;
- vf_dev->iw_vf_idx = first_avail_iw_vf;
- vf_dev->pf_hmc_initialized = false;
- vf_dev->hmc_info.hmc_obj = (struct i40iw_hmc_obj_info *)(&vf_dev[1]);
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "vf_dev %p, hmc_info %p, hmc_obj %p\n",
- vf_dev, &vf_dev->hmc_info, vf_dev->hmc_info.hmc_obj);
- dev->vf_dev[first_avail_iw_vf] = vf_dev;
- iw_vf_idx = first_avail_iw_vf;
- } else {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "VF%u Unable to allocate a VF device structure.\n",
- vf_id);
- vchnl_pf_send_error_resp(dev, vf_id, vchnl_msg, (u16)I40IW_ERR_NO_MEMORY);
- return I40IW_SUCCESS;
- }
- memcpy(&vf_dev->vf_msg_buffer.vchnl_msg, vchnl_msg, len);
- hmc_fcn_info.callback_fcn = pf_cqp_get_hmc_fcn_callback;
- hmc_fcn_info.vf_id = vf_id;
- hmc_fcn_info.iw_vf_idx = vf_dev->iw_vf_idx;
- hmc_fcn_info.cqp_callback_param = vf_dev;
- hmc_fcn_info.free_fcn = false;
- ret_code = i40iw_cqp_manage_hmc_fcn_cmd(dev, &hmc_fcn_info);
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "VF%u error CQP HMC Function operation.\n",
- vf_id);
- i40iw_vf_init_pestat(dev, &vf_dev->pestat, vf_dev->pmf_index);
- vf_dev->stats_initialized = true;
- } else {
- if (vf_dev) {
- vf_dev->msg_count--;
- vchnl_pf_send_get_hmc_fcn_resp(dev, vf_id, vchnl_msg, vf_dev->pmf_index);
- } else {
- vchnl_pf_send_error_resp(dev, vf_id, vchnl_msg,
- (u16)I40IW_ERR_NO_MEMORY);
- }
- }
- break;
- case I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE:
- if (!vf_dev)
- return I40IW_ERR_BAD_PTR;
- work_info.worker_vf_dev = vf_dev;
- work_info.callback_fcn = pf_add_hmc_obj_callback;
- memcpy(&vf_dev->vf_msg_buffer.vchnl_msg, vchnl_msg, len);
- i40iw_cqp_spawn_worker(dev, &work_info, vf_dev->iw_vf_idx);
- break;
- case I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE:
- if (!vf_dev)
- return I40IW_ERR_BAD_PTR;
- work_info.worker_vf_dev = vf_dev;
- work_info.callback_fcn = pf_del_hmc_obj_callback;
- memcpy(&vf_dev->vf_msg_buffer.vchnl_msg, vchnl_msg, len);
- i40iw_cqp_spawn_worker(dev, &work_info, vf_dev->iw_vf_idx);
- break;
- case I40IW_VCHNL_OP_GET_STATS:
- if (!vf_dev)
- return I40IW_ERR_BAD_PTR;
- stats = &vf_dev->pestat;
- i40iw_hw_stats_read_all(stats, &stats->hw_stats);
- vf_dev->msg_count--;
- vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &stats->hw_stats);
- break;
- default:
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "40iw_vchnl_recv_pf: Invalid OpCode 0x%x\n",
- vchnl_msg->iw_op_code);
- vchnl_pf_send_error_resp(dev, vf_id,
- vchnl_msg, (u16)I40IW_ERR_NOT_IMPLEMENTED);
- }
- return I40IW_SUCCESS;
-}
-
-/**
- * i40iw_vchnl_recv_vf - Receive VF virtual channel messages
- * @dev: IWARP device pointer
- * @vf_id: Virtual function ID associated with the message
- * @msg: Virtual channel message buffer pointer
- * @len: Length of the virtual channels message
- */
-enum i40iw_status_code i40iw_vchnl_recv_vf(struct i40iw_sc_dev *dev,
- u32 vf_id,
- u8 *msg,
- u16 len)
-{
- struct i40iw_virtchnl_resp_buf *vchnl_msg_resp = (struct i40iw_virtchnl_resp_buf *)msg;
- struct i40iw_virtchnl_req *vchnl_req;
-
- vchnl_req = (struct i40iw_virtchnl_req *)(uintptr_t)vchnl_msg_resp->iw_chnl_op_ctx;
- vchnl_req->ret_code = (enum i40iw_status_code)vchnl_msg_resp->iw_op_ret_code;
- if (len == (sizeof(*vchnl_msg_resp) + vchnl_req->parm_len - 1)) {
- if (vchnl_req->parm_len && vchnl_req->parm)
- memcpy(vchnl_req->parm, vchnl_msg_resp->iw_chnl_buf, vchnl_req->parm_len);
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: Got response, data size %u\n", __func__,
- vchnl_req->parm_len);
- } else {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s: error length on response, Got %u, expected %u\n", __func__,
- len, (u32)(sizeof(*vchnl_msg_resp) + vchnl_req->parm_len - 1));
- }
-
- return I40IW_SUCCESS;
-}
-
-/**
- * i40iw_vchnl_vf_get_ver - Request Channel version
- * @dev: IWARP device pointer
- * @vchnl_ver: Virtual channel message version pointer
- */
-enum i40iw_status_code i40iw_vchnl_vf_get_ver(struct i40iw_sc_dev *dev,
- u32 *vchnl_ver)
-{
- struct i40iw_virtchnl_req vchnl_req;
- enum i40iw_status_code ret_code;
-
- if (!i40iw_vf_clear_to_send(dev))
- return I40IW_ERR_TIMEOUT;
- memset(&vchnl_req, 0, sizeof(vchnl_req));
- vchnl_req.dev = dev;
- vchnl_req.parm = vchnl_ver;
- vchnl_req.parm_len = sizeof(*vchnl_ver);
- vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
-
- ret_code = vchnl_vf_send_get_ver_req(dev, &vchnl_req);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s Send message failed 0x%0x\n", __func__, ret_code);
- return ret_code;
- }
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (ret_code)
- return ret_code;
- else
- return vchnl_req.ret_code;
-}
-
-/**
- * i40iw_vchnl_vf_get_hmc_fcn - Request HMC Function
- * @dev: IWARP device pointer
- * @hmc_fcn: HMC function index pointer
- */
-enum i40iw_status_code i40iw_vchnl_vf_get_hmc_fcn(struct i40iw_sc_dev *dev,
- u16 *hmc_fcn)
-{
- struct i40iw_virtchnl_req vchnl_req;
- enum i40iw_status_code ret_code;
-
- if (!i40iw_vf_clear_to_send(dev))
- return I40IW_ERR_TIMEOUT;
- memset(&vchnl_req, 0, sizeof(vchnl_req));
- vchnl_req.dev = dev;
- vchnl_req.parm = hmc_fcn;
- vchnl_req.parm_len = sizeof(*hmc_fcn);
- vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
-
- ret_code = vchnl_vf_send_get_hmc_fcn_req(dev, &vchnl_req);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s Send message failed 0x%0x\n", __func__, ret_code);
- return ret_code;
- }
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (ret_code)
- return ret_code;
- else
- return vchnl_req.ret_code;
-}
-
-/**
- * i40iw_vchnl_vf_add_hmc_objs - Add HMC Object
- * @dev: IWARP device pointer
- * @rsrc_type: HMC Resource type
- * @start_index: Starting index of the objects to be added
- * @rsrc_count: Number of resources to be added
- */
-enum i40iw_status_code i40iw_vchnl_vf_add_hmc_objs(struct i40iw_sc_dev *dev,
- enum i40iw_hmc_rsrc_type rsrc_type,
- u32 start_index,
- u32 rsrc_count)
-{
- struct i40iw_virtchnl_req vchnl_req;
- enum i40iw_status_code ret_code;
-
- if (!i40iw_vf_clear_to_send(dev))
- return I40IW_ERR_TIMEOUT;
- memset(&vchnl_req, 0, sizeof(vchnl_req));
- vchnl_req.dev = dev;
- vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
-
- ret_code = vchnl_vf_send_add_hmc_objs_req(dev,
- &vchnl_req,
- rsrc_type,
- start_index,
- rsrc_count);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s Send message failed 0x%0x\n", __func__, ret_code);
- return ret_code;
- }
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (ret_code)
- return ret_code;
- else
- return vchnl_req.ret_code;
-}
-
-/**
- * i40iw_vchnl_vf_del_hmc_obj - del HMC obj
- * @dev: IWARP device pointer
- * @rsrc_type: HMC Resource type
- * @start_index: Starting index of the object to delete
- * @rsrc_count: Number of resources to be delete
- */
-enum i40iw_status_code i40iw_vchnl_vf_del_hmc_obj(struct i40iw_sc_dev *dev,
- enum i40iw_hmc_rsrc_type rsrc_type,
- u32 start_index,
- u32 rsrc_count)
-{
- struct i40iw_virtchnl_req vchnl_req;
- enum i40iw_status_code ret_code;
-
- if (!i40iw_vf_clear_to_send(dev))
- return I40IW_ERR_TIMEOUT;
- memset(&vchnl_req, 0, sizeof(vchnl_req));
- vchnl_req.dev = dev;
- vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
-
- ret_code = vchnl_vf_send_del_hmc_objs_req(dev,
- &vchnl_req,
- rsrc_type,
- start_index,
- rsrc_count);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s Send message failed 0x%0x\n", __func__, ret_code);
- return ret_code;
- }
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (ret_code)
- return ret_code;
- else
- return vchnl_req.ret_code;
-}
-
-/**
- * i40iw_vchnl_vf_get_pe_stats - Get PE stats
- * @dev: IWARP device pointer
- * @hw_stats: HW stats struct
- */
-enum i40iw_status_code i40iw_vchnl_vf_get_pe_stats(struct i40iw_sc_dev *dev,
- struct i40iw_dev_hw_stats *hw_stats)
-{
- struct i40iw_virtchnl_req vchnl_req;
- enum i40iw_status_code ret_code;
-
- if (!i40iw_vf_clear_to_send(dev))
- return I40IW_ERR_TIMEOUT;
- memset(&vchnl_req, 0, sizeof(vchnl_req));
- vchnl_req.dev = dev;
- vchnl_req.parm = hw_stats;
- vchnl_req.parm_len = sizeof(*hw_stats);
- vchnl_req.vchnl_msg = &dev->vchnl_vf_msg_buf.vchnl_msg;
-
- ret_code = vchnl_vf_send_get_pe_stats_req(dev, &vchnl_req);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "%s Send message failed 0x%0x\n", __func__, ret_code);
- return ret_code;
- }
- ret_code = i40iw_vf_wait_vchnl_resp(dev);
- if (ret_code)
- return ret_code;
- else
- return vchnl_req.ret_code;
-}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.h b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.h
deleted file mode 100644
index 24886ef08293..000000000000
--- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*******************************************************************************
-*
-* Copyright (c) 2015-2016 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenFabrics.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*
-*******************************************************************************/
-
-#ifndef I40IW_VIRTCHNL_H
-#define I40IW_VIRTCHNL_H
-
-#include "i40iw_hmc.h"
-
-#pragma pack(push, 1)
-
-struct i40iw_virtchnl_op_buf {
- u16 iw_op_code;
- u16 iw_op_ver;
- u16 iw_chnl_buf_len;
- u16 rsvd;
- u64 iw_chnl_op_ctx;
- /* Member alignment MUST be maintained above this location */
- u8 iw_chnl_buf[1];
-};
-
-struct i40iw_virtchnl_resp_buf {
- u64 iw_chnl_op_ctx;
- u16 iw_chnl_buf_len;
- s16 iw_op_ret_code;
- /* Member alignment MUST be maintained above this location */
- u16 rsvd[2];
- u8 iw_chnl_buf[1];
-};
-
-enum i40iw_virtchnl_ops {
- I40IW_VCHNL_OP_GET_VER = 0,
- I40IW_VCHNL_OP_GET_HMC_FCN,
- I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE,
- I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE,
- I40IW_VCHNL_OP_GET_STATS
-};
-
-#define I40IW_VCHNL_OP_GET_VER_V0 0
-#define I40IW_VCHNL_OP_GET_HMC_FCN_V0 0
-#define I40IW_VCHNL_OP_ADD_HMC_OBJ_RANGE_V0 0
-#define I40IW_VCHNL_OP_DEL_HMC_OBJ_RANGE_V0 0
-#define I40IW_VCHNL_OP_GET_STATS_V0 0
-#define I40IW_VCHNL_CHNL_VER_V0 0
-
-struct i40iw_dev_hw_stats;
-
-struct i40iw_virtchnl_hmc_obj_range {
- u16 obj_type;
- u16 rsvd;
- u32 start_index;
- u32 obj_count;
-};
-
-enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
- u32 vf_id,
- u8 *msg,
- u16 len);
-
-enum i40iw_status_code i40iw_vchnl_recv_vf(struct i40iw_sc_dev *dev,
- u32 vf_id,
- u8 *msg,
- u16 len);
-
-struct i40iw_virtchnl_req {
- struct i40iw_sc_dev *dev;
- struct i40iw_virtchnl_op_buf *vchnl_msg;
- void *parm;
- u32 vf_id;
- u16 parm_len;
- s16 ret_code;
-};
-
-#pragma pack(pop)
-
-enum i40iw_status_code i40iw_vchnl_vf_get_ver(struct i40iw_sc_dev *dev,
- u32 *vchnl_ver);
-
-enum i40iw_status_code i40iw_vchnl_vf_get_hmc_fcn(struct i40iw_sc_dev *dev,
- u16 *hmc_fcn);
-
-enum i40iw_status_code i40iw_vchnl_vf_add_hmc_objs(struct i40iw_sc_dev *dev,
- enum i40iw_hmc_rsrc_type rsrc_type,
- u32 start_index,
- u32 rsrc_count);
-
-enum i40iw_status_code i40iw_vchnl_vf_del_hmc_obj(struct i40iw_sc_dev *dev,
- enum i40iw_hmc_rsrc_type rsrc_type,
- u32 start_index,
- u32 rsrc_count);
-
-enum i40iw_status_code i40iw_vchnl_vf_get_pe_stats(struct i40iw_sc_dev *dev,
- struct i40iw_dev_hw_stats *hw_stats);
-#endif
diff --git a/drivers/infiniband/hw/ionic/Kconfig b/drivers/infiniband/hw/ionic/Kconfig
new file mode 100644
index 000000000000..de6f10e9b6e9
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2018-2025, Advanced Micro Devices, Inc.
+
+config INFINIBAND_IONIC
+ tristate "AMD Pensando DSC RDMA/RoCE Support"
+ depends on NETDEVICES && ETHERNET && PCI && INET && IONIC
+ help
+ This enables RDMA/RoCE support for the AMD Pensando family of
+ Distributed Services Cards (DSCs).
+
+ To learn more, visit our website at
+ <https://www.amd.com/en/products/accelerators/pensando.html>.
+
+ To compile this driver as a module, choose M here. The module
+ will be called ionic_rdma.
diff --git a/drivers/infiniband/hw/ionic/Makefile b/drivers/infiniband/hw/ionic/Makefile
new file mode 100644
index 000000000000..957973742820
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-y := -I $(srctree)/drivers/net/ethernet/pensando/ionic
+
+obj-$(CONFIG_INFINIBAND_IONIC) += ionic_rdma.o
+
+ionic_rdma-y := \
+ ionic_ibdev.o ionic_lif_cfg.o ionic_queue.o ionic_pgtbl.o ionic_admin.o \
+ ionic_controlpath.o ionic_datapath.o ionic_hw_stats.o
diff --git a/drivers/infiniband/hw/ionic/ionic_admin.c b/drivers/infiniband/hw/ionic/ionic_admin.c
new file mode 100644
index 000000000000..2537aa55d12d
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_admin.c
@@ -0,0 +1,1229 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+#include "ionic_fw.h"
+#include "ionic_ibdev.h"
+
+#define IONIC_EQ_COUNT_MIN 4
+#define IONIC_AQ_COUNT_MIN 1
+
+/* not a valid queue position or negative error status */
+#define IONIC_ADMIN_POSTED 0x10000
+
+/* cpu can be held with irq disabled for COUNT * MS (for create/destroy_ah) */
+#define IONIC_ADMIN_BUSY_RETRY_COUNT 2000
+#define IONIC_ADMIN_BUSY_RETRY_MS 1
+
+/* admin queue will be considered failed if a command takes longer */
+#define IONIC_ADMIN_TIMEOUT (HZ * 2)
+#define IONIC_ADMIN_WARN (HZ / 8)
+
+/* will poll for admin cq to tolerate and report from missed event */
+#define IONIC_ADMIN_DELAY (HZ / 8)
+
+/* work queue for polling the event queue and admin cq */
+struct workqueue_struct *ionic_evt_workq;
+
+static void ionic_admin_timedout(struct ionic_aq *aq)
+{
+ struct ionic_ibdev *dev = aq->dev;
+ unsigned long irqflags;
+ u16 pos;
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+ if (ionic_queue_empty(&aq->q))
+ goto out;
+
+ /* Reset ALL adminq if any one times out */
+ if (atomic_read(&aq->admin_state) < IONIC_ADMIN_KILLED)
+ queue_work(ionic_evt_workq, &dev->reset_work);
+
+ ibdev_err(&dev->ibdev, "admin command timed out, aq %d after: %ums\n",
+ aq->aqid, (u32)jiffies_to_msecs(jiffies - aq->stamp));
+
+ pos = (aq->q.prod - 1) & aq->q.mask;
+ if (pos == aq->q.cons)
+ goto out;
+
+ ibdev_warn(&dev->ibdev, "admin pos %u (last posted)\n", pos);
+ print_hex_dump(KERN_WARNING, "cmd ", DUMP_PREFIX_OFFSET, 16, 1,
+ ionic_queue_at(&aq->q, pos),
+ BIT(aq->q.stride_log2), true);
+
+out:
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+}
+
+static void ionic_admin_reset_dwork(struct ionic_ibdev *dev)
+{
+ if (atomic_read(&dev->admin_state) == IONIC_ADMIN_KILLED)
+ return;
+
+ queue_delayed_work(ionic_evt_workq, &dev->admin_dwork,
+ IONIC_ADMIN_DELAY);
+}
+
+static void ionic_admin_reset_wdog(struct ionic_aq *aq)
+{
+ if (atomic_read(&aq->admin_state) == IONIC_ADMIN_KILLED)
+ return;
+
+ aq->stamp = jiffies;
+ ionic_admin_reset_dwork(aq->dev);
+}
+
+static bool ionic_admin_next_cqe(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ struct ionic_v1_cqe **cqe)
+{
+ struct ionic_v1_cqe *qcqe = ionic_queue_at_prod(&cq->q);
+
+ if (unlikely(cq->color != ionic_v1_cqe_color(qcqe)))
+ return false;
+
+ /* Prevent out-of-order reads of the CQE */
+ dma_rmb();
+ *cqe = qcqe;
+
+ return true;
+}
+
+static void ionic_admin_poll_locked(struct ionic_aq *aq)
+{
+ struct ionic_cq *cq = &aq->vcq->cq[0];
+ struct ionic_admin_wr *wr, *wr_next;
+ struct ionic_ibdev *dev = aq->dev;
+ u32 wr_strides, avlbl_strides;
+ struct ionic_v1_cqe *cqe;
+ u32 qtf, qid;
+ u16 old_prod;
+ u8 type;
+
+ lockdep_assert_held(&aq->lock);
+
+ if (atomic_read(&aq->admin_state) == IONIC_ADMIN_KILLED) {
+ list_for_each_entry_safe(wr, wr_next, &aq->wr_prod, aq_ent) {
+ INIT_LIST_HEAD(&wr->aq_ent);
+ aq->q_wr[wr->status].wr = NULL;
+ wr->status = atomic_read(&aq->admin_state);
+ complete_all(&wr->work);
+ }
+ INIT_LIST_HEAD(&aq->wr_prod);
+
+ list_for_each_entry_safe(wr, wr_next, &aq->wr_post, aq_ent) {
+ INIT_LIST_HEAD(&wr->aq_ent);
+ wr->status = atomic_read(&aq->admin_state);
+ complete_all(&wr->work);
+ }
+ INIT_LIST_HEAD(&aq->wr_post);
+
+ return;
+ }
+
+ old_prod = cq->q.prod;
+
+ while (ionic_admin_next_cqe(dev, cq, &cqe)) {
+ qtf = ionic_v1_cqe_qtf(cqe);
+ qid = ionic_v1_cqe_qtf_qid(qtf);
+ type = ionic_v1_cqe_qtf_type(qtf);
+
+ if (unlikely(type != IONIC_V1_CQE_TYPE_ADMIN)) {
+ ibdev_warn_ratelimited(&dev->ibdev,
+ "bad cqe type %u\n", type);
+ goto cq_next;
+ }
+
+ if (unlikely(qid != aq->aqid)) {
+ ibdev_warn_ratelimited(&dev->ibdev,
+ "bad cqe qid %u\n", qid);
+ goto cq_next;
+ }
+
+ if (unlikely(be16_to_cpu(cqe->admin.cmd_idx) != aq->q.cons)) {
+ ibdev_warn_ratelimited(&dev->ibdev,
+ "bad idx %u cons %u qid %u\n",
+ be16_to_cpu(cqe->admin.cmd_idx),
+ aq->q.cons, qid);
+ goto cq_next;
+ }
+
+ if (unlikely(ionic_queue_empty(&aq->q))) {
+ ibdev_warn_ratelimited(&dev->ibdev,
+ "bad cqe for empty adminq\n");
+ goto cq_next;
+ }
+
+ wr = aq->q_wr[aq->q.cons].wr;
+ if (wr) {
+ aq->q_wr[aq->q.cons].wr = NULL;
+ list_del_init(&wr->aq_ent);
+
+ wr->cqe = *cqe;
+ wr->status = atomic_read(&aq->admin_state);
+ complete_all(&wr->work);
+ }
+
+ ionic_queue_consume_entries(&aq->q,
+ aq->q_wr[aq->q.cons].wqe_strides);
+
+cq_next:
+ ionic_queue_produce(&cq->q);
+ cq->color = ionic_color_wrap(cq->q.prod, cq->color);
+ }
+
+ if (old_prod != cq->q.prod) {
+ ionic_admin_reset_wdog(aq);
+ cq->q.cons = cq->q.prod;
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype,
+ ionic_queue_dbell_val(&cq->q));
+ queue_work(ionic_evt_workq, &aq->work);
+ } else if (!aq->armed) {
+ aq->armed = true;
+ cq->arm_any_prod = ionic_queue_next(&cq->q, cq->arm_any_prod);
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype,
+ cq->q.dbell | IONIC_CQ_RING_ARM |
+ cq->arm_any_prod);
+ queue_work(ionic_evt_workq, &aq->work);
+ }
+
+ if (atomic_read(&aq->admin_state) != IONIC_ADMIN_ACTIVE)
+ return;
+
+ old_prod = aq->q.prod;
+
+ if (ionic_queue_empty(&aq->q) && !list_empty(&aq->wr_post))
+ ionic_admin_reset_wdog(aq);
+
+ if (list_empty(&aq->wr_post))
+ return;
+
+ do {
+ u8 *src;
+ int i, src_len;
+ size_t stride_len;
+
+ wr = list_first_entry(&aq->wr_post, struct ionic_admin_wr,
+ aq_ent);
+ wr_strides = (le16_to_cpu(wr->wqe.len) + ADMIN_WQE_HDR_LEN +
+ (ADMIN_WQE_STRIDE - 1)) >> aq->q.stride_log2;
+ avlbl_strides = ionic_queue_length_remaining(&aq->q);
+
+ if (wr_strides > avlbl_strides)
+ break;
+
+ list_move(&wr->aq_ent, &aq->wr_prod);
+ wr->status = aq->q.prod;
+ aq->q_wr[aq->q.prod].wr = wr;
+ aq->q_wr[aq->q.prod].wqe_strides = wr_strides;
+
+ src_len = le16_to_cpu(wr->wqe.len);
+ src = (uint8_t *)&wr->wqe.cmd;
+
+ /* First stride */
+ memcpy(ionic_queue_at_prod(&aq->q), &wr->wqe,
+ ADMIN_WQE_HDR_LEN);
+ stride_len = ADMIN_WQE_STRIDE - ADMIN_WQE_HDR_LEN;
+ if (stride_len > src_len)
+ stride_len = src_len;
+ memcpy(ionic_queue_at_prod(&aq->q) + ADMIN_WQE_HDR_LEN,
+ src, stride_len);
+ ibdev_dbg(&dev->ibdev, "post admin prod %u (%u strides)\n",
+ aq->q.prod, wr_strides);
+ print_hex_dump_debug("wqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ ionic_queue_at_prod(&aq->q),
+ BIT(aq->q.stride_log2), true);
+ ionic_queue_produce(&aq->q);
+
+ /* Remaining strides */
+ for (i = stride_len; i < src_len; i += stride_len) {
+ stride_len = ADMIN_WQE_STRIDE;
+
+ if (i + stride_len > src_len)
+ stride_len = src_len - i;
+
+ memcpy(ionic_queue_at_prod(&aq->q), src + i,
+ stride_len);
+ print_hex_dump_debug("wqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ ionic_queue_at_prod(&aq->q),
+ BIT(aq->q.stride_log2), true);
+ ionic_queue_produce(&aq->q);
+ }
+ } while (!list_empty(&aq->wr_post));
+
+ if (old_prod != aq->q.prod)
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.aq_qtype,
+ ionic_queue_dbell_val(&aq->q));
+}
+
+static void ionic_admin_dwork(struct work_struct *ws)
+{
+ struct ionic_ibdev *dev =
+ container_of(ws, struct ionic_ibdev, admin_dwork.work);
+ struct ionic_aq *aq, *bad_aq = NULL;
+ bool do_reschedule = false;
+ unsigned long irqflags;
+ bool do_reset = false;
+ u16 pos;
+ int i;
+
+ for (i = 0; i < dev->lif_cfg.aq_count; i++) {
+ aq = dev->aq_vec[i];
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+
+ if (ionic_queue_empty(&aq->q))
+ goto next_aq;
+
+ /* Reschedule if any queue has outstanding work */
+ do_reschedule = true;
+
+ if (time_is_after_eq_jiffies(aq->stamp + IONIC_ADMIN_WARN))
+ /* Warning threshold not met, nothing to do */
+ goto next_aq;
+
+ /* See if polling now makes some progress */
+ pos = aq->q.cons;
+ ionic_admin_poll_locked(aq);
+ if (pos != aq->q.cons) {
+ ibdev_dbg(&dev->ibdev,
+ "missed event for acq %d\n", aq->cqid);
+ goto next_aq;
+ }
+
+ if (time_is_after_eq_jiffies(aq->stamp +
+ IONIC_ADMIN_TIMEOUT)) {
+ /* Timeout threshold not met */
+ ibdev_dbg(&dev->ibdev, "no progress after %ums\n",
+ (u32)jiffies_to_msecs(jiffies - aq->stamp));
+ goto next_aq;
+ }
+
+ /* Queue timed out */
+ bad_aq = aq;
+ do_reset = true;
+next_aq:
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+ }
+
+ if (do_reset)
+ /* Reset RDMA lif on a timeout */
+ ionic_admin_timedout(bad_aq);
+ else if (do_reschedule)
+ /* Try to poll again later */
+ ionic_admin_reset_dwork(dev);
+}
+
+static void ionic_admin_work(struct work_struct *ws)
+{
+ struct ionic_aq *aq = container_of(ws, struct ionic_aq, work);
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+ ionic_admin_poll_locked(aq);
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+}
+
+static void ionic_admin_post_aq(struct ionic_aq *aq, struct ionic_admin_wr *wr)
+{
+ unsigned long irqflags;
+ bool poll;
+
+ wr->status = IONIC_ADMIN_POSTED;
+ wr->aq = aq;
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+ poll = list_empty(&aq->wr_post);
+ list_add(&wr->aq_ent, &aq->wr_post);
+ if (poll)
+ ionic_admin_poll_locked(aq);
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+}
+
+void ionic_admin_post(struct ionic_ibdev *dev, struct ionic_admin_wr *wr)
+{
+ int aq_idx;
+
+ /* Use cpu id for the adminq selection */
+ aq_idx = raw_smp_processor_id() % dev->lif_cfg.aq_count;
+ ionic_admin_post_aq(dev->aq_vec[aq_idx], wr);
+}
+
+static void ionic_admin_cancel(struct ionic_admin_wr *wr)
+{
+ struct ionic_aq *aq = wr->aq;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+
+ if (!list_empty(&wr->aq_ent)) {
+ list_del(&wr->aq_ent);
+ if (wr->status != IONIC_ADMIN_POSTED)
+ aq->q_wr[wr->status].wr = NULL;
+ }
+
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+}
+
+static int ionic_admin_busy_wait(struct ionic_admin_wr *wr)
+{
+ struct ionic_aq *aq = wr->aq;
+ unsigned long irqflags;
+ int try_i;
+
+ for (try_i = 0; try_i < IONIC_ADMIN_BUSY_RETRY_COUNT; ++try_i) {
+ if (completion_done(&wr->work))
+ return 0;
+
+ mdelay(IONIC_ADMIN_BUSY_RETRY_MS);
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+ ionic_admin_poll_locked(aq);
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+ }
+
+ /*
+ * we timed out. Initiate RDMA LIF reset and indicate
+ * error to caller.
+ */
+ ionic_admin_timedout(aq);
+ return -ETIMEDOUT;
+}
+
+int ionic_admin_wait(struct ionic_ibdev *dev, struct ionic_admin_wr *wr,
+ enum ionic_admin_flags flags)
+{
+ int rc, timo;
+
+ if (flags & IONIC_ADMIN_F_BUSYWAIT) {
+ /* Spin */
+ rc = ionic_admin_busy_wait(wr);
+ } else if (flags & IONIC_ADMIN_F_INTERRUPT) {
+ /*
+ * Interruptible sleep, 1s timeout
+ * This is used for commands which are safe for the caller
+ * to clean up without killing and resetting the adminq.
+ */
+ timo = wait_for_completion_interruptible_timeout(&wr->work,
+ HZ);
+ if (timo > 0)
+ rc = 0;
+ else if (timo == 0)
+ rc = -ETIMEDOUT;
+ else
+ rc = timo;
+ } else {
+ /*
+ * Uninterruptible sleep
+ * This is used for commands which are NOT safe for the
+ * caller to clean up. Cleanup must be handled by the
+ * adminq kill and reset process so that host memory is
+ * not corrupted by the device.
+ */
+ wait_for_completion(&wr->work);
+ rc = 0;
+ }
+
+ if (rc) {
+ ibdev_warn(&dev->ibdev, "wait status %d\n", rc);
+ ionic_admin_cancel(wr);
+ } else if (wr->status == IONIC_ADMIN_KILLED) {
+ ibdev_dbg(&dev->ibdev, "admin killed\n");
+
+ /* No error if admin already killed during teardown */
+ rc = (flags & IONIC_ADMIN_F_TEARDOWN) ? 0 : -ENODEV;
+ } else if (ionic_v1_cqe_error(&wr->cqe)) {
+ ibdev_warn(&dev->ibdev, "opcode %u error %u\n",
+ wr->wqe.op,
+ be32_to_cpu(wr->cqe.status_length));
+ rc = -EINVAL;
+ }
+ return rc;
+}
+
+static int ionic_rdma_devcmd(struct ionic_ibdev *dev,
+ struct ionic_admin_ctx *admin)
+{
+ int rc;
+
+ rc = ionic_adminq_post_wait(dev->lif_cfg.lif, admin);
+ if (rc)
+ return rc;
+
+ return ionic_error_to_errno(admin->comp.comp.status);
+}
+
+int ionic_rdma_reset_devcmd(struct ionic_ibdev *dev)
+{
+ struct ionic_admin_ctx admin = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(admin.work),
+ .cmd.rdma_reset = {
+ .opcode = IONIC_CMD_RDMA_RESET_LIF,
+ .lif_index = cpu_to_le16(dev->lif_cfg.lif_index),
+ },
+ };
+
+ return ionic_rdma_devcmd(dev, &admin);
+}
+
+static int ionic_rdma_queue_devcmd(struct ionic_ibdev *dev,
+ struct ionic_queue *q,
+ u32 qid, u32 cid, u16 opcode)
+{
+ struct ionic_admin_ctx admin = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(admin.work),
+ .cmd.rdma_queue = {
+ .opcode = opcode,
+ .lif_index = cpu_to_le16(dev->lif_cfg.lif_index),
+ .qid_ver = cpu_to_le32(qid),
+ .cid = cpu_to_le32(cid),
+ .dbid = cpu_to_le16(dev->lif_cfg.dbid),
+ .depth_log2 = q->depth_log2,
+ .stride_log2 = q->stride_log2,
+ .dma_addr = cpu_to_le64(q->dma),
+ },
+ };
+
+ return ionic_rdma_devcmd(dev, &admin);
+}
+
+static void ionic_rdma_admincq_comp(struct ib_cq *ibcq, void *cq_context)
+{
+ struct ionic_aq *aq = cq_context;
+ unsigned long irqflags;
+
+ spin_lock_irqsave(&aq->lock, irqflags);
+ aq->armed = false;
+ if (atomic_read(&aq->admin_state) < IONIC_ADMIN_KILLED)
+ queue_work(ionic_evt_workq, &aq->work);
+ spin_unlock_irqrestore(&aq->lock, irqflags);
+}
+
+static void ionic_rdma_admincq_event(struct ib_event *event, void *cq_context)
+{
+ struct ionic_aq *aq = cq_context;
+
+ ibdev_err(&aq->dev->ibdev, "admincq event %d\n", event->event);
+}
+
+static struct ionic_vcq *ionic_create_rdma_admincq(struct ionic_ibdev *dev,
+ int comp_vector)
+{
+ struct ib_cq_init_attr attr = {
+ .cqe = IONIC_AQ_DEPTH,
+ .comp_vector = comp_vector,
+ };
+ struct ionic_tbl_buf buf = {};
+ struct ionic_vcq *vcq;
+ struct ionic_cq *cq;
+ int rc;
+
+ vcq = kzalloc(sizeof(*vcq), GFP_KERNEL);
+ if (!vcq)
+ return ERR_PTR(-ENOMEM);
+
+ vcq->ibcq.device = &dev->ibdev;
+ vcq->ibcq.comp_handler = ionic_rdma_admincq_comp;
+ vcq->ibcq.event_handler = ionic_rdma_admincq_event;
+ atomic_set(&vcq->ibcq.usecnt, 0);
+
+ vcq->udma_mask = 1;
+ cq = &vcq->cq[0];
+
+ rc = ionic_create_cq_common(vcq, &buf, &attr, NULL, NULL,
+ NULL, NULL, 0);
+ if (rc)
+ goto err_init;
+
+ rc = ionic_rdma_queue_devcmd(dev, &cq->q, cq->cqid, cq->eqid,
+ IONIC_CMD_RDMA_CREATE_CQ);
+ if (rc)
+ goto err_cmd;
+
+ return vcq;
+
+err_cmd:
+ ionic_destroy_cq_common(dev, cq);
+err_init:
+ kfree(vcq);
+
+ return ERR_PTR(rc);
+}
+
+static struct ionic_aq *__ionic_create_rdma_adminq(struct ionic_ibdev *dev,
+ u32 aqid, u32 cqid)
+{
+ struct ionic_aq *aq;
+ int rc;
+
+ aq = kzalloc(sizeof(*aq), GFP_KERNEL);
+ if (!aq)
+ return ERR_PTR(-ENOMEM);
+
+ atomic_set(&aq->admin_state, IONIC_ADMIN_KILLED);
+ aq->dev = dev;
+ aq->aqid = aqid;
+ aq->cqid = cqid;
+ spin_lock_init(&aq->lock);
+
+ rc = ionic_queue_init(&aq->q, dev->lif_cfg.hwdev, IONIC_EQ_DEPTH,
+ ADMIN_WQE_STRIDE);
+ if (rc)
+ goto err_q;
+
+ ionic_queue_dbell_init(&aq->q, aq->aqid);
+
+ aq->q_wr = kcalloc((u32)aq->q.mask + 1, sizeof(*aq->q_wr), GFP_KERNEL);
+ if (!aq->q_wr) {
+ rc = -ENOMEM;
+ goto err_wr;
+ }
+
+ INIT_LIST_HEAD(&aq->wr_prod);
+ INIT_LIST_HEAD(&aq->wr_post);
+
+ INIT_WORK(&aq->work, ionic_admin_work);
+ aq->armed = false;
+
+ return aq;
+
+err_wr:
+ ionic_queue_destroy(&aq->q, dev->lif_cfg.hwdev);
+err_q:
+ kfree(aq);
+
+ return ERR_PTR(rc);
+}
+
+static void __ionic_destroy_rdma_adminq(struct ionic_ibdev *dev,
+ struct ionic_aq *aq)
+{
+ kfree(aq->q_wr);
+ ionic_queue_destroy(&aq->q, dev->lif_cfg.hwdev);
+ kfree(aq);
+}
+
+static struct ionic_aq *ionic_create_rdma_adminq(struct ionic_ibdev *dev,
+ u32 aqid, u32 cqid)
+{
+ struct ionic_aq *aq;
+ int rc;
+
+ aq = __ionic_create_rdma_adminq(dev, aqid, cqid);
+ if (IS_ERR(aq))
+ return aq;
+
+ rc = ionic_rdma_queue_devcmd(dev, &aq->q, aq->aqid, aq->cqid,
+ IONIC_CMD_RDMA_CREATE_ADMINQ);
+ if (rc)
+ goto err_cmd;
+
+ return aq;
+
+err_cmd:
+ __ionic_destroy_rdma_adminq(dev, aq);
+
+ return ERR_PTR(rc);
+}
+
+static void ionic_flush_qs(struct ionic_ibdev *dev)
+{
+ struct ionic_qp *qp, *qp_tmp;
+ struct ionic_cq *cq, *cq_tmp;
+ LIST_HEAD(flush_list);
+ unsigned long index;
+
+ WARN_ON(!irqs_disabled());
+
+ /* Flush qp send and recv */
+ xa_lock(&dev->qp_tbl);
+ xa_for_each(&dev->qp_tbl, index, qp) {
+ kref_get(&qp->qp_kref);
+ list_add_tail(&qp->ibkill_flush_ent, &flush_list);
+ }
+ xa_unlock(&dev->qp_tbl);
+
+ list_for_each_entry_safe(qp, qp_tmp, &flush_list, ibkill_flush_ent) {
+ ionic_flush_qp(dev, qp);
+ kref_put(&qp->qp_kref, ionic_qp_complete);
+ list_del(&qp->ibkill_flush_ent);
+ }
+
+ /* Notify completions */
+ xa_lock(&dev->cq_tbl);
+ xa_for_each(&dev->cq_tbl, index, cq) {
+ kref_get(&cq->cq_kref);
+ list_add_tail(&cq->ibkill_flush_ent, &flush_list);
+ }
+ xa_unlock(&dev->cq_tbl);
+
+ list_for_each_entry_safe(cq, cq_tmp, &flush_list, ibkill_flush_ent) {
+ ionic_notify_flush_cq(cq);
+ kref_put(&cq->cq_kref, ionic_cq_complete);
+ list_del(&cq->ibkill_flush_ent);
+ }
+}
+
+static void ionic_kill_ibdev(struct ionic_ibdev *dev, bool fatal_path)
+{
+ unsigned long irqflags;
+ bool do_flush = false;
+ int i;
+
+ /* Mark AQs for drain and flush the QPs while irq is disabled */
+ local_irq_save(irqflags);
+
+ /* Mark the admin queue, flushing at most once */
+ for (i = 0; i < dev->lif_cfg.aq_count; i++) {
+ struct ionic_aq *aq = dev->aq_vec[i];
+
+ spin_lock(&aq->lock);
+ if (atomic_read(&aq->admin_state) != IONIC_ADMIN_KILLED) {
+ atomic_set(&aq->admin_state, IONIC_ADMIN_KILLED);
+ /* Flush incomplete admin commands */
+ ionic_admin_poll_locked(aq);
+ do_flush = true;
+ }
+ spin_unlock(&aq->lock);
+ }
+
+ if (do_flush)
+ ionic_flush_qs(dev);
+
+ local_irq_restore(irqflags);
+
+ /* Post a fatal event if requested */
+ if (fatal_path) {
+ struct ib_event ev;
+
+ ev.device = &dev->ibdev;
+ ev.element.port_num = 1;
+ ev.event = IB_EVENT_DEVICE_FATAL;
+
+ ib_dispatch_event(&ev);
+ }
+
+ atomic_set(&dev->admin_state, IONIC_ADMIN_KILLED);
+}
+
+void ionic_kill_rdma_admin(struct ionic_ibdev *dev, bool fatal_path)
+{
+ enum ionic_admin_state old_state;
+ unsigned long irqflags = 0;
+ int i, rc;
+
+ if (!dev->aq_vec)
+ return;
+
+ /*
+ * Admin queues are transitioned from active to paused to killed state.
+ * When in paused state, no new commands are issued to the device,
+ * nor are any completed locally. After resetting the lif, it will be
+ * safe to resume the rdma admin queues in the killed state. Commands
+ * will not be issued to the device, but will complete locally with status
+ * IONIC_ADMIN_KILLED. Handling completion will ensure that creating or
+ * modifying resources fails, but destroying resources succeeds.
+ * If there was a failure resetting the lif using this strategy,
+ * then the state of the device is unknown.
+ */
+ old_state = atomic_cmpxchg(&dev->admin_state, IONIC_ADMIN_ACTIVE,
+ IONIC_ADMIN_PAUSED);
+ if (old_state != IONIC_ADMIN_ACTIVE)
+ return;
+
+ /* Pause all the AQs */
+ local_irq_save(irqflags);
+ for (i = 0; i < dev->lif_cfg.aq_count; i++) {
+ struct ionic_aq *aq = dev->aq_vec[i];
+
+ spin_lock(&aq->lock);
+ /* pause rdma admin queues to reset lif */
+ if (atomic_read(&aq->admin_state) == IONIC_ADMIN_ACTIVE)
+ atomic_set(&aq->admin_state, IONIC_ADMIN_PAUSED);
+ spin_unlock(&aq->lock);
+ }
+ local_irq_restore(irqflags);
+
+ rc = ionic_rdma_reset_devcmd(dev);
+ if (unlikely(rc)) {
+ ibdev_err(&dev->ibdev, "failed to reset rdma %d\n", rc);
+ ionic_request_rdma_reset(dev->lif_cfg.lif);
+ }
+
+ ionic_kill_ibdev(dev, fatal_path);
+}
+
+static void ionic_reset_work(struct work_struct *ws)
+{
+ struct ionic_ibdev *dev =
+ container_of(ws, struct ionic_ibdev, reset_work);
+
+ ionic_kill_rdma_admin(dev, true);
+}
+
+static bool ionic_next_eqe(struct ionic_eq *eq, struct ionic_v1_eqe *eqe)
+{
+ struct ionic_v1_eqe *qeqe;
+ bool color;
+
+ qeqe = ionic_queue_at_prod(&eq->q);
+ color = ionic_v1_eqe_color(qeqe);
+
+ /* cons is color for eq */
+ if (eq->q.cons != color)
+ return false;
+
+ /* Prevent out-of-order reads of the EQE */
+ dma_rmb();
+
+ ibdev_dbg(&eq->dev->ibdev, "poll eq prod %u\n", eq->q.prod);
+ print_hex_dump_debug("eqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ qeqe, BIT(eq->q.stride_log2), true);
+ *eqe = *qeqe;
+
+ return true;
+}
+
+static void ionic_cq_event(struct ionic_ibdev *dev, u32 cqid, u8 code)
+{
+ unsigned long irqflags;
+ struct ib_event ibev;
+ struct ionic_cq *cq;
+
+ xa_lock_irqsave(&dev->cq_tbl, irqflags);
+ cq = xa_load(&dev->cq_tbl, cqid);
+ if (cq)
+ kref_get(&cq->cq_kref);
+ xa_unlock_irqrestore(&dev->cq_tbl, irqflags);
+
+ if (!cq) {
+ ibdev_dbg(&dev->ibdev,
+ "missing cqid %#x code %u\n", cqid, code);
+ return;
+ }
+
+ switch (code) {
+ case IONIC_V1_EQE_CQ_NOTIFY:
+ if (cq->vcq->ibcq.comp_handler)
+ cq->vcq->ibcq.comp_handler(&cq->vcq->ibcq,
+ cq->vcq->ibcq.cq_context);
+ break;
+
+ case IONIC_V1_EQE_CQ_ERR:
+ if (cq->vcq->ibcq.event_handler) {
+ ibev.event = IB_EVENT_CQ_ERR;
+ ibev.device = &dev->ibdev;
+ ibev.element.cq = &cq->vcq->ibcq;
+
+ cq->vcq->ibcq.event_handler(&ibev,
+ cq->vcq->ibcq.cq_context);
+ }
+ break;
+
+ default:
+ ibdev_dbg(&dev->ibdev,
+ "unrecognized cqid %#x code %u\n", cqid, code);
+ break;
+ }
+
+ kref_put(&cq->cq_kref, ionic_cq_complete);
+}
+
+static void ionic_qp_event(struct ionic_ibdev *dev, u32 qpid, u8 code)
+{
+ unsigned long irqflags;
+ struct ib_event ibev;
+ struct ionic_qp *qp;
+
+ xa_lock_irqsave(&dev->qp_tbl, irqflags);
+ qp = xa_load(&dev->qp_tbl, qpid);
+ if (qp)
+ kref_get(&qp->qp_kref);
+ xa_unlock_irqrestore(&dev->qp_tbl, irqflags);
+
+ if (!qp) {
+ ibdev_dbg(&dev->ibdev,
+ "missing qpid %#x code %u\n", qpid, code);
+ return;
+ }
+
+ ibev.device = &dev->ibdev;
+ ibev.element.qp = &qp->ibqp;
+
+ switch (code) {
+ case IONIC_V1_EQE_SQ_DRAIN:
+ ibev.event = IB_EVENT_SQ_DRAINED;
+ break;
+
+ case IONIC_V1_EQE_QP_COMM_EST:
+ ibev.event = IB_EVENT_COMM_EST;
+ break;
+
+ case IONIC_V1_EQE_QP_LAST_WQE:
+ ibev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ break;
+
+ case IONIC_V1_EQE_QP_ERR:
+ ibev.event = IB_EVENT_QP_FATAL;
+ break;
+
+ case IONIC_V1_EQE_QP_ERR_REQUEST:
+ ibev.event = IB_EVENT_QP_REQ_ERR;
+ break;
+
+ case IONIC_V1_EQE_QP_ERR_ACCESS:
+ ibev.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+
+ default:
+ ibdev_dbg(&dev->ibdev,
+ "unrecognized qpid %#x code %u\n", qpid, code);
+ goto out;
+ }
+
+ if (qp->ibqp.event_handler)
+ qp->ibqp.event_handler(&ibev, qp->ibqp.qp_context);
+
+out:
+ kref_put(&qp->qp_kref, ionic_qp_complete);
+}
+
+static u16 ionic_poll_eq(struct ionic_eq *eq, u16 budget)
+{
+ struct ionic_ibdev *dev = eq->dev;
+ struct ionic_v1_eqe eqe;
+ u16 npolled = 0;
+ u8 type, code;
+ u32 evt, qid;
+
+ while (npolled < budget) {
+ if (!ionic_next_eqe(eq, &eqe))
+ break;
+
+ ionic_queue_produce(&eq->q);
+
+ /* cons is color for eq */
+ eq->q.cons = ionic_color_wrap(eq->q.prod, eq->q.cons);
+
+ ++npolled;
+
+ evt = ionic_v1_eqe_evt(&eqe);
+ type = ionic_v1_eqe_evt_type(evt);
+ code = ionic_v1_eqe_evt_code(evt);
+ qid = ionic_v1_eqe_evt_qid(evt);
+
+ switch (type) {
+ case IONIC_V1_EQE_TYPE_CQ:
+ ionic_cq_event(dev, qid, code);
+ break;
+
+ case IONIC_V1_EQE_TYPE_QP:
+ ionic_qp_event(dev, qid, code);
+ break;
+
+ default:
+ ibdev_dbg(&dev->ibdev,
+ "unknown event %#x type %u\n", evt, type);
+ }
+ }
+
+ return npolled;
+}
+
+static void ionic_poll_eq_work(struct work_struct *work)
+{
+ struct ionic_eq *eq = container_of(work, struct ionic_eq, work);
+ u32 npolled;
+
+ if (unlikely(!eq->enable) || WARN_ON(eq->armed))
+ return;
+
+ npolled = ionic_poll_eq(eq, IONIC_EQ_WORK_BUDGET);
+ if (npolled == IONIC_EQ_WORK_BUDGET) {
+ ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr,
+ npolled, 0);
+ queue_work(ionic_evt_workq, &eq->work);
+ } else {
+ xchg(&eq->armed, 1);
+ ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr,
+ 0, IONIC_INTR_CRED_UNMASK);
+ }
+}
+
+static irqreturn_t ionic_poll_eq_isr(int irq, void *eqptr)
+{
+ struct ionic_eq *eq = eqptr;
+ int was_armed;
+ u32 npolled;
+
+ was_armed = xchg(&eq->armed, 0);
+
+ if (unlikely(!eq->enable) || !was_armed)
+ return IRQ_HANDLED;
+
+ npolled = ionic_poll_eq(eq, IONIC_EQ_ISR_BUDGET);
+ if (npolled == IONIC_EQ_ISR_BUDGET) {
+ ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr,
+ npolled, 0);
+ queue_work(ionic_evt_workq, &eq->work);
+ } else {
+ xchg(&eq->armed, 1);
+ ionic_intr_credits(eq->dev->lif_cfg.intr_ctrl, eq->intr,
+ 0, IONIC_INTR_CRED_UNMASK);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static struct ionic_eq *ionic_create_eq(struct ionic_ibdev *dev, int eqid)
+{
+ struct ionic_intr_info intr_obj = { };
+ struct ionic_eq *eq;
+ int rc;
+
+ eq = kzalloc(sizeof(*eq), GFP_KERNEL);
+ if (!eq)
+ return ERR_PTR(-ENOMEM);
+
+ eq->dev = dev;
+
+ rc = ionic_queue_init(&eq->q, dev->lif_cfg.hwdev, IONIC_EQ_DEPTH,
+ sizeof(struct ionic_v1_eqe));
+ if (rc)
+ goto err_q;
+
+ eq->eqid = eqid;
+
+ eq->armed = true;
+ eq->enable = false;
+ INIT_WORK(&eq->work, ionic_poll_eq_work);
+
+ rc = ionic_intr_alloc(dev->lif_cfg.lif, &intr_obj);
+ if (rc < 0)
+ goto err_intr;
+
+ eq->irq = intr_obj.vector;
+ eq->intr = intr_obj.index;
+
+ ionic_queue_dbell_init(&eq->q, eq->eqid);
+
+ /* cons is color for eq */
+ eq->q.cons = true;
+
+ snprintf(eq->name, sizeof(eq->name), "%s-%d-%d-eq",
+ "ionr", dev->lif_cfg.lif_index, eq->eqid);
+
+ ionic_intr_mask(dev->lif_cfg.intr_ctrl, eq->intr, IONIC_INTR_MASK_SET);
+ ionic_intr_mask_assert(dev->lif_cfg.intr_ctrl, eq->intr, IONIC_INTR_MASK_SET);
+ ionic_intr_coal_init(dev->lif_cfg.intr_ctrl, eq->intr, 0);
+ ionic_intr_clean(dev->lif_cfg.intr_ctrl, eq->intr);
+
+ eq->enable = true;
+
+ rc = request_irq(eq->irq, ionic_poll_eq_isr, 0, eq->name, eq);
+ if (rc)
+ goto err_irq;
+
+ rc = ionic_rdma_queue_devcmd(dev, &eq->q, eq->eqid, eq->intr,
+ IONIC_CMD_RDMA_CREATE_EQ);
+ if (rc)
+ goto err_cmd;
+
+ ionic_intr_mask(dev->lif_cfg.intr_ctrl, eq->intr, IONIC_INTR_MASK_CLEAR);
+
+ return eq;
+
+err_cmd:
+ eq->enable = false;
+ free_irq(eq->irq, eq);
+ flush_work(&eq->work);
+err_irq:
+ ionic_intr_free(dev->lif_cfg.lif, eq->intr);
+err_intr:
+ ionic_queue_destroy(&eq->q, dev->lif_cfg.hwdev);
+err_q:
+ kfree(eq);
+
+ return ERR_PTR(rc);
+}
+
+static void ionic_destroy_eq(struct ionic_eq *eq)
+{
+ struct ionic_ibdev *dev = eq->dev;
+
+ eq->enable = false;
+ free_irq(eq->irq, eq);
+ flush_work(&eq->work);
+
+ ionic_intr_free(dev->lif_cfg.lif, eq->intr);
+ ionic_queue_destroy(&eq->q, dev->lif_cfg.hwdev);
+ kfree(eq);
+}
+
+int ionic_create_rdma_admin(struct ionic_ibdev *dev)
+{
+ int eq_i = 0, aq_i = 0, rc = 0;
+ struct ionic_vcq *vcq;
+ struct ionic_aq *aq;
+ struct ionic_eq *eq;
+
+ dev->eq_vec = NULL;
+ dev->aq_vec = NULL;
+
+ INIT_WORK(&dev->reset_work, ionic_reset_work);
+ INIT_DELAYED_WORK(&dev->admin_dwork, ionic_admin_dwork);
+ atomic_set(&dev->admin_state, IONIC_ADMIN_KILLED);
+
+ if (dev->lif_cfg.aq_count > IONIC_AQ_COUNT) {
+ ibdev_dbg(&dev->ibdev, "limiting adminq count to %d\n",
+ IONIC_AQ_COUNT);
+ dev->lif_cfg.aq_count = IONIC_AQ_COUNT;
+ }
+
+ if (dev->lif_cfg.eq_count > IONIC_EQ_COUNT) {
+ dev_dbg(&dev->ibdev.dev, "limiting eventq count to %d\n",
+ IONIC_EQ_COUNT);
+ dev->lif_cfg.eq_count = IONIC_EQ_COUNT;
+ }
+
+ /* need at least two eq and one aq */
+ if (dev->lif_cfg.eq_count < IONIC_EQ_COUNT_MIN ||
+ dev->lif_cfg.aq_count < IONIC_AQ_COUNT_MIN) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ dev->eq_vec = kmalloc_array(dev->lif_cfg.eq_count, sizeof(*dev->eq_vec),
+ GFP_KERNEL);
+ if (!dev->eq_vec) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ for (eq_i = 0; eq_i < dev->lif_cfg.eq_count; ++eq_i) {
+ eq = ionic_create_eq(dev, eq_i + dev->lif_cfg.eq_base);
+ if (IS_ERR(eq)) {
+ rc = PTR_ERR(eq);
+
+ if (eq_i < IONIC_EQ_COUNT_MIN) {
+ ibdev_err(&dev->ibdev,
+ "fail create eq %pe\n", eq);
+ goto out;
+ }
+
+ /* ok, just fewer eq than device supports */
+ ibdev_dbg(&dev->ibdev, "eq count %d want %d rc %pe\n",
+ eq_i, dev->lif_cfg.eq_count, eq);
+
+ rc = 0;
+ break;
+ }
+
+ dev->eq_vec[eq_i] = eq;
+ }
+
+ dev->lif_cfg.eq_count = eq_i;
+
+ dev->aq_vec = kmalloc_array(dev->lif_cfg.aq_count, sizeof(*dev->aq_vec),
+ GFP_KERNEL);
+ if (!dev->aq_vec) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Create one CQ per AQ */
+ for (aq_i = 0; aq_i < dev->lif_cfg.aq_count; ++aq_i) {
+ vcq = ionic_create_rdma_admincq(dev, aq_i % eq_i);
+ if (IS_ERR(vcq)) {
+ rc = PTR_ERR(vcq);
+
+ if (!aq_i) {
+ ibdev_err(&dev->ibdev,
+ "failed to create acq %pe\n", vcq);
+ goto out;
+ }
+
+ /* ok, just fewer adminq than device supports */
+ ibdev_dbg(&dev->ibdev, "acq count %d want %d rc %pe\n",
+ aq_i, dev->lif_cfg.aq_count, vcq);
+ break;
+ }
+
+ aq = ionic_create_rdma_adminq(dev, aq_i + dev->lif_cfg.aq_base,
+ vcq->cq[0].cqid);
+ if (IS_ERR(aq)) {
+ /* Clean up the dangling CQ */
+ ionic_destroy_cq_common(dev, &vcq->cq[0]);
+ kfree(vcq);
+
+ rc = PTR_ERR(aq);
+
+ if (!aq_i) {
+ ibdev_err(&dev->ibdev,
+ "failed to create aq %pe\n", aq);
+ goto out;
+ }
+
+ /* ok, just fewer adminq than device supports */
+ ibdev_dbg(&dev->ibdev, "aq count %d want %d rc %pe\n",
+ aq_i, dev->lif_cfg.aq_count, aq);
+ break;
+ }
+
+ vcq->ibcq.cq_context = aq;
+ aq->vcq = vcq;
+
+ atomic_set(&aq->admin_state, IONIC_ADMIN_ACTIVE);
+ dev->aq_vec[aq_i] = aq;
+ }
+
+ atomic_set(&dev->admin_state, IONIC_ADMIN_ACTIVE);
+out:
+ dev->lif_cfg.eq_count = eq_i;
+ dev->lif_cfg.aq_count = aq_i;
+
+ return rc;
+}
+
+void ionic_destroy_rdma_admin(struct ionic_ibdev *dev)
+{
+ struct ionic_vcq *vcq;
+ struct ionic_aq *aq;
+ struct ionic_eq *eq;
+
+ /*
+ * Killing the admin before destroy makes sure all admin and
+ * completions are flushed. admin_state = IONIC_ADMIN_KILLED
+ * stops queueing up further works.
+ */
+ cancel_delayed_work_sync(&dev->admin_dwork);
+ cancel_work_sync(&dev->reset_work);
+
+ if (dev->aq_vec) {
+ while (dev->lif_cfg.aq_count > 0) {
+ aq = dev->aq_vec[--dev->lif_cfg.aq_count];
+ vcq = aq->vcq;
+
+ cancel_work_sync(&aq->work);
+
+ __ionic_destroy_rdma_adminq(dev, aq);
+ if (vcq) {
+ ionic_destroy_cq_common(dev, &vcq->cq[0]);
+ kfree(vcq);
+ }
+ }
+
+ kfree(dev->aq_vec);
+ }
+
+ if (dev->eq_vec) {
+ while (dev->lif_cfg.eq_count > 0) {
+ eq = dev->eq_vec[--dev->lif_cfg.eq_count];
+ ionic_destroy_eq(eq);
+ }
+
+ kfree(dev->eq_vec);
+ }
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_controlpath.c b/drivers/infiniband/hw/ionic/ionic_controlpath.c
new file mode 100644
index 000000000000..ea12d9b8e125
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_controlpath.c
@@ -0,0 +1,2679 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_user_verbs.h>
+#include <ionic_api.h>
+
+#include "ionic_fw.h"
+#include "ionic_ibdev.h"
+
+#define ionic_set_ecn(tos) (((tos) | 2u) & ~1u)
+#define ionic_clear_ecn(tos) ((tos) & ~3u)
+
+static int ionic_validate_qdesc(struct ionic_qdesc *q)
+{
+ if (!q->addr || !q->size || !q->mask ||
+ !q->depth_log2 || !q->stride_log2)
+ return -EINVAL;
+
+ if (q->addr & (PAGE_SIZE - 1))
+ return -EINVAL;
+
+ if (q->mask != BIT(q->depth_log2) - 1)
+ return -EINVAL;
+
+ if (q->size < BIT_ULL(q->depth_log2 + q->stride_log2))
+ return -EINVAL;
+
+ return 0;
+}
+
+static u32 ionic_get_eqid(struct ionic_ibdev *dev, u32 comp_vector, u8 udma_idx)
+{
+ /* EQ per vector per udma, and the first eqs reserved for async events.
+ * The rest of the vectors can be requested for completions.
+ */
+ u32 comp_vec_count = dev->lif_cfg.eq_count / dev->lif_cfg.udma_count - 1;
+
+ return (comp_vector % comp_vec_count + 1) * dev->lif_cfg.udma_count + udma_idx;
+}
+
+static int ionic_get_cqid(struct ionic_ibdev *dev, u32 *cqid, u8 udma_idx)
+{
+ unsigned int size, base, bound;
+ int rc;
+
+ size = dev->lif_cfg.cq_count / dev->lif_cfg.udma_count;
+ base = size * udma_idx;
+ bound = base + size;
+
+ rc = ionic_resid_get_shared(&dev->inuse_cqid, base, bound);
+ if (rc >= 0) {
+ /* cq_base is zero or a multiple of two queue groups */
+ *cqid = dev->lif_cfg.cq_base +
+ ionic_bitid_to_qid(rc, dev->lif_cfg.udma_qgrp_shift,
+ dev->half_cqid_udma_shift);
+
+ rc = 0;
+ }
+
+ return rc;
+}
+
+static void ionic_put_cqid(struct ionic_ibdev *dev, u32 cqid)
+{
+ u32 bitid = ionic_qid_to_bitid(cqid - dev->lif_cfg.cq_base,
+ dev->lif_cfg.udma_qgrp_shift,
+ dev->half_cqid_udma_shift);
+
+ ionic_resid_put(&dev->inuse_cqid, bitid);
+}
+
+int ionic_create_cq_common(struct ionic_vcq *vcq,
+ struct ionic_tbl_buf *buf,
+ const struct ib_cq_init_attr *attr,
+ struct ionic_ctx *ctx,
+ struct ib_udata *udata,
+ struct ionic_qdesc *req_cq,
+ __u32 *resp_cqid,
+ int udma_idx)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(vcq->ibcq.device);
+ struct ionic_cq *cq = &vcq->cq[udma_idx];
+ void *entry;
+ int rc;
+
+ cq->vcq = vcq;
+
+ if (attr->cqe < 1 || attr->cqe + IONIC_CQ_GRACE > 0xffff) {
+ rc = -EINVAL;
+ goto err_args;
+ }
+
+ rc = ionic_get_cqid(dev, &cq->cqid, udma_idx);
+ if (rc)
+ goto err_args;
+
+ cq->eqid = ionic_get_eqid(dev, attr->comp_vector, udma_idx);
+
+ spin_lock_init(&cq->lock);
+ INIT_LIST_HEAD(&cq->poll_sq);
+ INIT_LIST_HEAD(&cq->flush_sq);
+ INIT_LIST_HEAD(&cq->flush_rq);
+
+ if (udata) {
+ rc = ionic_validate_qdesc(req_cq);
+ if (rc)
+ goto err_qdesc;
+
+ cq->umem = ib_umem_get(&dev->ibdev, req_cq->addr, req_cq->size,
+ IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(cq->umem)) {
+ rc = PTR_ERR(cq->umem);
+ goto err_qdesc;
+ }
+
+ cq->q.ptr = NULL;
+ cq->q.size = req_cq->size;
+ cq->q.mask = req_cq->mask;
+ cq->q.depth_log2 = req_cq->depth_log2;
+ cq->q.stride_log2 = req_cq->stride_log2;
+
+ *resp_cqid = cq->cqid;
+ } else {
+ rc = ionic_queue_init(&cq->q, dev->lif_cfg.hwdev,
+ attr->cqe + IONIC_CQ_GRACE,
+ sizeof(struct ionic_v1_cqe));
+ if (rc)
+ goto err_q_init;
+
+ ionic_queue_dbell_init(&cq->q, cq->cqid);
+ cq->color = true;
+ cq->credit = cq->q.mask;
+ }
+
+ rc = ionic_pgtbl_init(dev, buf, cq->umem, cq->q.dma, 1, PAGE_SIZE);
+ if (rc)
+ goto err_pgtbl_init;
+
+ init_completion(&cq->cq_rel_comp);
+ kref_init(&cq->cq_kref);
+
+ entry = xa_store_irq(&dev->cq_tbl, cq->cqid, cq, GFP_KERNEL);
+ if (entry) {
+ if (!xa_is_err(entry))
+ rc = -EINVAL;
+ else
+ rc = xa_err(entry);
+
+ goto err_xa;
+ }
+
+ return 0;
+
+err_xa:
+ ionic_pgtbl_unbuf(dev, buf);
+err_pgtbl_init:
+ if (!udata)
+ ionic_queue_destroy(&cq->q, dev->lif_cfg.hwdev);
+err_q_init:
+ if (cq->umem)
+ ib_umem_release(cq->umem);
+err_qdesc:
+ ionic_put_cqid(dev, cq->cqid);
+err_args:
+ cq->vcq = NULL;
+
+ return rc;
+}
+
+void ionic_destroy_cq_common(struct ionic_ibdev *dev, struct ionic_cq *cq)
+{
+ if (!cq->vcq)
+ return;
+
+ xa_erase_irq(&dev->cq_tbl, cq->cqid);
+
+ kref_put(&cq->cq_kref, ionic_cq_complete);
+ wait_for_completion(&cq->cq_rel_comp);
+
+ if (cq->umem)
+ ib_umem_release(cq->umem);
+ else
+ ionic_queue_destroy(&cq->q, dev->lif_cfg.hwdev);
+
+ ionic_put_cqid(dev, cq->cqid);
+
+ cq->vcq = NULL;
+}
+
+static int ionic_validate_qdesc_zero(struct ionic_qdesc *q)
+{
+ if (q->addr || q->size || q->mask || q->depth_log2 || q->stride_log2)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ionic_get_pdid(struct ionic_ibdev *dev, u32 *pdid)
+{
+ int rc;
+
+ rc = ionic_resid_get(&dev->inuse_pdid);
+ if (rc < 0)
+ return rc;
+
+ *pdid = rc;
+ return 0;
+}
+
+static int ionic_get_ahid(struct ionic_ibdev *dev, u32 *ahid)
+{
+ int rc;
+
+ rc = ionic_resid_get(&dev->inuse_ahid);
+ if (rc < 0)
+ return rc;
+
+ *ahid = rc;
+ return 0;
+}
+
+static int ionic_get_mrid(struct ionic_ibdev *dev, u32 *mrid)
+{
+ int rc;
+
+ /* wrap to 1, skip reserved lkey */
+ rc = ionic_resid_get_shared(&dev->inuse_mrid, 1,
+ dev->inuse_mrid.inuse_size);
+ if (rc < 0)
+ return rc;
+
+ *mrid = ionic_mrid(rc, dev->next_mrkey++);
+ return 0;
+}
+
+static int ionic_get_gsi_qpid(struct ionic_ibdev *dev, u32 *qpid)
+{
+ int rc = 0;
+
+ rc = ionic_resid_get_shared(&dev->inuse_qpid, IB_QPT_GSI, IB_QPT_GSI + 1);
+ if (rc < 0)
+ return rc;
+
+ *qpid = IB_QPT_GSI;
+ return 0;
+}
+
+static int ionic_get_qpid(struct ionic_ibdev *dev, u32 *qpid,
+ u8 *udma_idx, u8 udma_mask)
+{
+ unsigned int size, base, bound;
+ int udma_i, udma_x, udma_ix;
+ int rc = -EINVAL;
+
+ udma_x = dev->next_qpid_udma_idx;
+
+ dev->next_qpid_udma_idx ^= dev->lif_cfg.udma_count - 1;
+
+ for (udma_i = 0; udma_i < dev->lif_cfg.udma_count; ++udma_i) {
+ udma_ix = udma_i ^ udma_x;
+
+ if (!(udma_mask & BIT(udma_ix)))
+ continue;
+
+ size = dev->lif_cfg.qp_count / dev->lif_cfg.udma_count;
+ base = size * udma_ix;
+ bound = base + size;
+
+ /* skip reserved SMI and GSI qpids in group zero */
+ if (!base)
+ base = 2;
+
+ rc = ionic_resid_get_shared(&dev->inuse_qpid, base, bound);
+ if (rc >= 0) {
+ *qpid = ionic_bitid_to_qid(rc,
+ dev->lif_cfg.udma_qgrp_shift,
+ dev->half_qpid_udma_shift);
+ *udma_idx = udma_ix;
+
+ rc = 0;
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static int ionic_get_dbid(struct ionic_ibdev *dev, u32 *dbid, phys_addr_t *addr)
+{
+ int rc, dbpage_num;
+
+ /* wrap to 1, skip kernel reserved */
+ rc = ionic_resid_get_shared(&dev->inuse_dbid, 1,
+ dev->inuse_dbid.inuse_size);
+ if (rc < 0)
+ return rc;
+
+ dbpage_num = (dev->lif_cfg.lif_hw_index * dev->lif_cfg.dbid_count) + rc;
+ *addr = dev->lif_cfg.db_phys + ((phys_addr_t)dbpage_num << PAGE_SHIFT);
+
+ *dbid = rc;
+
+ return 0;
+}
+
+static void ionic_put_pdid(struct ionic_ibdev *dev, u32 pdid)
+{
+ ionic_resid_put(&dev->inuse_pdid, pdid);
+}
+
+static void ionic_put_ahid(struct ionic_ibdev *dev, u32 ahid)
+{
+ ionic_resid_put(&dev->inuse_ahid, ahid);
+}
+
+static void ionic_put_mrid(struct ionic_ibdev *dev, u32 mrid)
+{
+ ionic_resid_put(&dev->inuse_mrid, ionic_mrid_index(mrid));
+}
+
+static void ionic_put_qpid(struct ionic_ibdev *dev, u32 qpid)
+{
+ u32 bitid = ionic_qid_to_bitid(qpid,
+ dev->lif_cfg.udma_qgrp_shift,
+ dev->half_qpid_udma_shift);
+
+ ionic_resid_put(&dev->inuse_qpid, bitid);
+}
+
+static void ionic_put_dbid(struct ionic_ibdev *dev, u32 dbid)
+{
+ ionic_resid_put(&dev->inuse_dbid, dbid);
+}
+
+static struct rdma_user_mmap_entry*
+ionic_mmap_entry_insert(struct ionic_ctx *ctx, unsigned long size,
+ unsigned long pfn, u8 mmap_flags, u64 *offset)
+{
+ struct ionic_mmap_entry *entry;
+ int rc;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+
+ entry->size = size;
+ entry->pfn = pfn;
+ entry->mmap_flags = mmap_flags;
+
+ rc = rdma_user_mmap_entry_insert(&ctx->ibctx, &entry->rdma_entry,
+ entry->size);
+ if (rc) {
+ kfree(entry);
+ return NULL;
+ }
+
+ if (offset)
+ *offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+ return &entry->rdma_entry;
+}
+
+int ionic_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibctx->device);
+ struct ionic_ctx *ctx = to_ionic_ctx(ibctx);
+ struct ionic_ctx_resp resp = {};
+ struct ionic_ctx_req req;
+ phys_addr_t db_phys = 0;
+ int rc;
+
+ rc = ib_copy_from_udata(&req, udata, sizeof(req));
+ if (rc)
+ return rc;
+
+ /* try to allocate dbid for user ctx */
+ rc = ionic_get_dbid(dev, &ctx->dbid, &db_phys);
+ if (rc < 0)
+ return rc;
+
+ ibdev_dbg(&dev->ibdev, "user space dbid %u\n", ctx->dbid);
+
+ ctx->mmap_dbell = ionic_mmap_entry_insert(ctx, PAGE_SIZE,
+ PHYS_PFN(db_phys), 0, NULL);
+ if (!ctx->mmap_dbell) {
+ rc = -ENOMEM;
+ goto err_mmap_dbell;
+ }
+
+ resp.page_shift = PAGE_SHIFT;
+
+ resp.dbell_offset = db_phys & ~PAGE_MASK;
+
+ resp.version = dev->lif_cfg.rdma_version;
+ resp.qp_opcodes = dev->lif_cfg.qp_opcodes;
+ resp.admin_opcodes = dev->lif_cfg.admin_opcodes;
+
+ resp.sq_qtype = dev->lif_cfg.sq_qtype;
+ resp.rq_qtype = dev->lif_cfg.rq_qtype;
+ resp.cq_qtype = dev->lif_cfg.cq_qtype;
+ resp.admin_qtype = dev->lif_cfg.aq_qtype;
+ resp.max_stride = dev->lif_cfg.max_stride;
+ resp.max_spec = IONIC_SPEC_HIGH;
+
+ resp.udma_count = dev->lif_cfg.udma_count;
+ resp.expdb_mask = dev->lif_cfg.expdb_mask;
+
+ if (dev->lif_cfg.sq_expdb)
+ resp.expdb_qtypes |= IONIC_EXPDB_SQ;
+ if (dev->lif_cfg.rq_expdb)
+ resp.expdb_qtypes |= IONIC_EXPDB_RQ;
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc)
+ goto err_resp;
+
+ return 0;
+
+err_resp:
+ rdma_user_mmap_entry_remove(ctx->mmap_dbell);
+err_mmap_dbell:
+ ionic_put_dbid(dev, ctx->dbid);
+
+ return rc;
+}
+
+void ionic_dealloc_ucontext(struct ib_ucontext *ibctx)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibctx->device);
+ struct ionic_ctx *ctx = to_ionic_ctx(ibctx);
+
+ rdma_user_mmap_entry_remove(ctx->mmap_dbell);
+ ionic_put_dbid(dev, ctx->dbid);
+}
+
+int ionic_mmap(struct ib_ucontext *ibctx, struct vm_area_struct *vma)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibctx->device);
+ struct ionic_ctx *ctx = to_ionic_ctx(ibctx);
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct ionic_mmap_entry *ionic_entry;
+ int rc = 0;
+
+ rdma_entry = rdma_user_mmap_entry_get(&ctx->ibctx, vma);
+ if (!rdma_entry) {
+ ibdev_dbg(&dev->ibdev, "not found %#lx\n",
+ vma->vm_pgoff << PAGE_SHIFT);
+ return -EINVAL;
+ }
+
+ ionic_entry = container_of(rdma_entry, struct ionic_mmap_entry,
+ rdma_entry);
+
+ ibdev_dbg(&dev->ibdev, "writecombine? %d\n",
+ ionic_entry->mmap_flags & IONIC_MMAP_WC);
+ if (ionic_entry->mmap_flags & IONIC_MMAP_WC)
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ else
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ ibdev_dbg(&dev->ibdev, "remap st %#lx pf %#lx sz %#lx\n",
+ vma->vm_start, ionic_entry->pfn, ionic_entry->size);
+ rc = rdma_user_mmap_io(&ctx->ibctx, vma, ionic_entry->pfn,
+ ionic_entry->size, vma->vm_page_prot,
+ rdma_entry);
+ if (rc)
+ ibdev_dbg(&dev->ibdev, "remap failed %d\n", rc);
+
+ rdma_user_mmap_entry_put(rdma_entry);
+ return rc;
+}
+
+void ionic_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct ionic_mmap_entry *ionic_entry;
+
+ ionic_entry = container_of(rdma_entry, struct ionic_mmap_entry,
+ rdma_entry);
+ kfree(ionic_entry);
+}
+
+int ionic_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device);
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+
+ return ionic_get_pdid(dev, &pd->pdid);
+}
+
+int ionic_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device);
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+
+ ionic_put_pdid(dev, pd->pdid);
+
+ return 0;
+}
+
+static int ionic_build_hdr(struct ionic_ibdev *dev,
+ struct ib_ud_header *hdr,
+ const struct rdma_ah_attr *attr,
+ u16 sport, bool want_ecn)
+{
+ const struct ib_global_route *grh;
+ enum rdma_network_type net;
+ u16 vlan;
+ int rc;
+
+ if (attr->ah_flags != IB_AH_GRH)
+ return -EINVAL;
+ if (attr->type != RDMA_AH_ATTR_TYPE_ROCE)
+ return -EINVAL;
+
+ grh = rdma_ah_read_grh(attr);
+
+ rc = rdma_read_gid_l2_fields(grh->sgid_attr, &vlan, &hdr->eth.smac_h[0]);
+ if (rc)
+ return rc;
+
+ net = rdma_gid_attr_network_type(grh->sgid_attr);
+
+ rc = ib_ud_header_init(0, /* no payload */
+ 0, /* no lrh */
+ 1, /* yes eth */
+ vlan != 0xffff,
+ 0, /* no grh */
+ net == RDMA_NETWORK_IPV4 ? 4 : 6,
+ 1, /* yes udp */
+ 0, /* no imm */
+ hdr);
+ if (rc)
+ return rc;
+
+ ether_addr_copy(hdr->eth.dmac_h, attr->roce.dmac);
+
+ if (net == RDMA_NETWORK_IPV4) {
+ hdr->eth.type = cpu_to_be16(ETH_P_IP);
+ hdr->ip4.frag_off = cpu_to_be16(0x4000); /* don't fragment */
+ hdr->ip4.ttl = grh->hop_limit;
+ hdr->ip4.tot_len = cpu_to_be16(0xffff);
+ hdr->ip4.saddr =
+ *(const __be32 *)(grh->sgid_attr->gid.raw + 12);
+ hdr->ip4.daddr = *(const __be32 *)(grh->dgid.raw + 12);
+
+ if (want_ecn)
+ hdr->ip4.tos = ionic_set_ecn(grh->traffic_class);
+ else
+ hdr->ip4.tos = ionic_clear_ecn(grh->traffic_class);
+ } else {
+ hdr->eth.type = cpu_to_be16(ETH_P_IPV6);
+ hdr->grh.flow_label = cpu_to_be32(grh->flow_label);
+ hdr->grh.hop_limit = grh->hop_limit;
+ hdr->grh.source_gid = grh->sgid_attr->gid;
+ hdr->grh.destination_gid = grh->dgid;
+
+ if (want_ecn)
+ hdr->grh.traffic_class =
+ ionic_set_ecn(grh->traffic_class);
+ else
+ hdr->grh.traffic_class =
+ ionic_clear_ecn(grh->traffic_class);
+ }
+
+ if (vlan != 0xffff) {
+ vlan |= rdma_ah_get_sl(attr) << VLAN_PRIO_SHIFT;
+ hdr->vlan.tag = cpu_to_be16(vlan);
+ hdr->vlan.type = hdr->eth.type;
+ hdr->eth.type = cpu_to_be16(ETH_P_8021Q);
+ }
+
+ hdr->udp.sport = cpu_to_be16(sport);
+ hdr->udp.dport = cpu_to_be16(ROCE_V2_UDP_DPORT);
+
+ return 0;
+}
+
+static void ionic_set_ah_attr(struct ionic_ibdev *dev,
+ struct rdma_ah_attr *ah_attr,
+ struct ib_ud_header *hdr,
+ int sgid_index)
+{
+ u32 flow_label;
+ u16 vlan = 0;
+ u8 tos, ttl;
+
+ if (hdr->vlan_present)
+ vlan = be16_to_cpu(hdr->vlan.tag);
+
+ if (hdr->ipv4_present) {
+ flow_label = 0;
+ ttl = hdr->ip4.ttl;
+ tos = hdr->ip4.tos;
+ *(__be16 *)(hdr->grh.destination_gid.raw + 10) = cpu_to_be16(0xffff);
+ *(__be32 *)(hdr->grh.destination_gid.raw + 12) = hdr->ip4.daddr;
+ } else {
+ flow_label = be32_to_cpu(hdr->grh.flow_label);
+ ttl = hdr->grh.hop_limit;
+ tos = hdr->grh.traffic_class;
+ }
+
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ ah_attr->type = RDMA_AH_ATTR_TYPE_ROCE;
+ if (hdr->eth_present)
+ ether_addr_copy(ah_attr->roce.dmac, hdr->eth.dmac_h);
+ rdma_ah_set_sl(ah_attr, vlan >> VLAN_PRIO_SHIFT);
+ rdma_ah_set_port_num(ah_attr, 1);
+ rdma_ah_set_grh(ah_attr, NULL, flow_label, sgid_index, ttl, tos);
+ rdma_ah_set_dgid_raw(ah_attr, &hdr->grh.destination_gid);
+}
+
+static int ionic_create_ah_cmd(struct ionic_ibdev *dev,
+ struct ionic_ah *ah,
+ struct ionic_pd *pd,
+ struct rdma_ah_attr *attr,
+ u32 flags)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_CREATE_AH,
+ .len = cpu_to_le16(IONIC_ADMIN_CREATE_AH_IN_V1_LEN),
+ .cmd.create_ah = {
+ .pd_id = cpu_to_le32(pd->pdid),
+ .dbid_flags = cpu_to_le16(dev->lif_cfg.dbid),
+ .id_ver = cpu_to_le32(ah->ahid),
+ }
+ }
+ };
+ enum ionic_admin_flags admin_flags = 0;
+ dma_addr_t hdr_dma = 0;
+ void *hdr_buf;
+ gfp_t gfp = GFP_ATOMIC;
+ int rc, hdr_len = 0;
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_AH)
+ return -EBADRQC;
+
+ if (flags & RDMA_CREATE_AH_SLEEPABLE)
+ gfp = GFP_KERNEL;
+ else
+ admin_flags |= IONIC_ADMIN_F_BUSYWAIT;
+
+ rc = ionic_build_hdr(dev, &ah->hdr, attr, IONIC_ROCE_UDP_SPORT, false);
+ if (rc)
+ return rc;
+
+ if (ah->hdr.eth.type == cpu_to_be16(ETH_P_8021Q)) {
+ if (ah->hdr.vlan.type == cpu_to_be16(ETH_P_IP))
+ wr.wqe.cmd.create_ah.csum_profile =
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP;
+ else
+ wr.wqe.cmd.create_ah.csum_profile =
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_UDP;
+ } else {
+ if (ah->hdr.eth.type == cpu_to_be16(ETH_P_IP))
+ wr.wqe.cmd.create_ah.csum_profile =
+ IONIC_TFP_CSUM_PROF_ETH_IPV4_UDP;
+ else
+ wr.wqe.cmd.create_ah.csum_profile =
+ IONIC_TFP_CSUM_PROF_ETH_IPV6_UDP;
+ }
+
+ ah->sgid_index = rdma_ah_read_grh(attr)->sgid_index;
+
+ hdr_buf = kmalloc(PAGE_SIZE, gfp);
+ if (!hdr_buf)
+ return -ENOMEM;
+
+ hdr_len = ib_ud_header_pack(&ah->hdr, hdr_buf);
+ hdr_len -= IB_BTH_BYTES;
+ hdr_len -= IB_DETH_BYTES;
+ ibdev_dbg(&dev->ibdev, "roce packet header template\n");
+ print_hex_dump_debug("hdr ", DUMP_PREFIX_OFFSET, 16, 1,
+ hdr_buf, hdr_len, true);
+
+ hdr_dma = dma_map_single(dev->lif_cfg.hwdev, hdr_buf, hdr_len,
+ DMA_TO_DEVICE);
+
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma);
+ if (rc)
+ goto err_dma;
+
+ wr.wqe.cmd.create_ah.dma_addr = cpu_to_le64(hdr_dma);
+ wr.wqe.cmd.create_ah.length = cpu_to_le32(hdr_len);
+
+ ionic_admin_post(dev, &wr);
+ rc = ionic_admin_wait(dev, &wr, admin_flags);
+
+ dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, hdr_len,
+ DMA_TO_DEVICE);
+err_dma:
+ kfree(hdr_buf);
+
+ return rc;
+}
+
+static int ionic_destroy_ah_cmd(struct ionic_ibdev *dev, u32 ahid, u32 flags)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_DESTROY_AH,
+ .len = cpu_to_le16(IONIC_ADMIN_DESTROY_AH_IN_V1_LEN),
+ .cmd.destroy_ah = {
+ .ah_id = cpu_to_le32(ahid),
+ },
+ }
+ };
+ enum ionic_admin_flags admin_flags = IONIC_ADMIN_F_TEARDOWN;
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_AH)
+ return -EBADRQC;
+
+ if (!(flags & RDMA_CREATE_AH_SLEEPABLE))
+ admin_flags |= IONIC_ADMIN_F_BUSYWAIT;
+
+ ionic_admin_post(dev, &wr);
+ ionic_admin_wait(dev, &wr, admin_flags);
+
+ /* No host-memory resource is associated with ah, so it is ok
+ * to "succeed" and complete this destroy ah on the host.
+ */
+ return 0;
+}
+
+int ionic_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibah->device);
+ struct rdma_ah_attr *attr = init_attr->ah_attr;
+ struct ionic_pd *pd = to_ionic_pd(ibah->pd);
+ struct ionic_ah *ah = to_ionic_ah(ibah);
+ struct ionic_ah_resp resp = {};
+ u32 flags = init_attr->flags;
+ int rc;
+
+ rc = ionic_get_ahid(dev, &ah->ahid);
+ if (rc)
+ return rc;
+
+ rc = ionic_create_ah_cmd(dev, ah, pd, attr, flags);
+ if (rc)
+ goto err_cmd;
+
+ if (udata) {
+ resp.ahid = ah->ahid;
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc)
+ goto err_resp;
+ }
+
+ return 0;
+
+err_resp:
+ ionic_destroy_ah_cmd(dev, ah->ahid, flags);
+err_cmd:
+ ionic_put_ahid(dev, ah->ahid);
+ return rc;
+}
+
+int ionic_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibah->device);
+ struct ionic_ah *ah = to_ionic_ah(ibah);
+
+ ionic_set_ah_attr(dev, ah_attr, &ah->hdr, ah->sgid_index);
+
+ return 0;
+}
+
+int ionic_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibah->device);
+ struct ionic_ah *ah = to_ionic_ah(ibah);
+ int rc;
+
+ rc = ionic_destroy_ah_cmd(dev, ah->ahid, flags);
+ if (rc)
+ return rc;
+
+ ionic_put_ahid(dev, ah->ahid);
+
+ return 0;
+}
+
+static int ionic_create_mr_cmd(struct ionic_ibdev *dev,
+ struct ionic_pd *pd,
+ struct ionic_mr *mr,
+ u64 addr,
+ u64 length)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_CREATE_MR,
+ .len = cpu_to_le16(IONIC_ADMIN_CREATE_MR_IN_V1_LEN),
+ .cmd.create_mr = {
+ .va = cpu_to_le64(addr),
+ .length = cpu_to_le64(length),
+ .pd_id = cpu_to_le32(pd->pdid),
+ .page_size_log2 = mr->buf.page_size_log2,
+ .tbl_index = cpu_to_le32(~0),
+ .map_count = cpu_to_le32(mr->buf.tbl_pages),
+ .dma_addr = ionic_pgtbl_dma(&mr->buf, addr),
+ .dbid_flags = cpu_to_le16(mr->flags),
+ .id_ver = cpu_to_le32(mr->mrid),
+ }
+ }
+ };
+ int rc;
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_MR)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+ rc = ionic_admin_wait(dev, &wr, 0);
+ if (!rc)
+ mr->created = true;
+
+ return rc;
+}
+
+static int ionic_destroy_mr_cmd(struct ionic_ibdev *dev, u32 mrid)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_DESTROY_MR,
+ .len = cpu_to_le16(IONIC_ADMIN_DESTROY_MR_IN_V1_LEN),
+ .cmd.destroy_mr = {
+ .mr_id = cpu_to_le32(mrid),
+ },
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_MR)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_TEARDOWN);
+}
+
+struct ib_mr *ionic_get_dma_mr(struct ib_pd *ibpd, int access)
+{
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+ struct ionic_mr *mr;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->ibmr.lkey = IONIC_DMA_LKEY;
+ mr->ibmr.rkey = IONIC_DMA_RKEY;
+
+ if (pd)
+ pd->flags |= IONIC_QPF_PRIVILEGED;
+
+ return &mr->ibmr;
+}
+
+struct ib_mr *ionic_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 addr, int access, struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device);
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+ struct ionic_mr *mr;
+ unsigned long pg_sz;
+ int rc;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ rc = ionic_get_mrid(dev, &mr->mrid);
+ if (rc)
+ goto err_mrid;
+
+ mr->ibmr.lkey = mr->mrid;
+ mr->ibmr.rkey = mr->mrid;
+ mr->ibmr.iova = addr;
+ mr->ibmr.length = length;
+
+ mr->flags = IONIC_MRF_USER_MR | to_ionic_mr_flags(access);
+
+ mr->umem = ib_umem_get(&dev->ibdev, start, length, access);
+ if (IS_ERR(mr->umem)) {
+ rc = PTR_ERR(mr->umem);
+ goto err_umem;
+ }
+
+ pg_sz = ib_umem_find_best_pgsz(mr->umem,
+ dev->lif_cfg.page_size_supported,
+ addr);
+ if (!pg_sz) {
+ rc = -EINVAL;
+ goto err_pgtbl;
+ }
+
+ rc = ionic_pgtbl_init(dev, &mr->buf, mr->umem, 0, 1, pg_sz);
+ if (rc)
+ goto err_pgtbl;
+
+ rc = ionic_create_mr_cmd(dev, pd, mr, addr, length);
+ if (rc)
+ goto err_cmd;
+
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+
+ return &mr->ibmr;
+
+err_cmd:
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+err_pgtbl:
+ ib_umem_release(mr->umem);
+err_umem:
+ ionic_put_mrid(dev, mr->mrid);
+err_mrid:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+struct ib_mr *ionic_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 offset,
+ u64 length, u64 addr, int fd, int access,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device);
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct ionic_mr *mr;
+ u64 pg_sz;
+ int rc;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ rc = ionic_get_mrid(dev, &mr->mrid);
+ if (rc)
+ goto err_mrid;
+
+ mr->ibmr.lkey = mr->mrid;
+ mr->ibmr.rkey = mr->mrid;
+ mr->ibmr.iova = addr;
+ mr->ibmr.length = length;
+
+ mr->flags = IONIC_MRF_USER_MR | to_ionic_mr_flags(access);
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(&dev->ibdev, offset, length,
+ fd, access);
+ if (IS_ERR(umem_dmabuf)) {
+ rc = PTR_ERR(umem_dmabuf);
+ goto err_umem;
+ }
+
+ mr->umem = &umem_dmabuf->umem;
+
+ pg_sz = ib_umem_find_best_pgsz(mr->umem,
+ dev->lif_cfg.page_size_supported,
+ addr);
+ if (!pg_sz) {
+ rc = -EINVAL;
+ goto err_pgtbl;
+ }
+
+ rc = ionic_pgtbl_init(dev, &mr->buf, mr->umem, 0, 1, pg_sz);
+ if (rc)
+ goto err_pgtbl;
+
+ rc = ionic_create_mr_cmd(dev, pd, mr, addr, length);
+ if (rc)
+ goto err_cmd;
+
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+
+ return &mr->ibmr;
+
+err_cmd:
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+err_pgtbl:
+ ib_umem_release(mr->umem);
+err_umem:
+ ionic_put_mrid(dev, mr->mrid);
+err_mrid:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+int ionic_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibmr->device);
+ struct ionic_mr *mr = to_ionic_mr(ibmr);
+ int rc;
+
+ if (!mr->ibmr.lkey)
+ goto out;
+
+ if (mr->created) {
+ rc = ionic_destroy_mr_cmd(dev, mr->mrid);
+ if (rc)
+ return rc;
+ }
+
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
+ ionic_put_mrid(dev, mr->mrid);
+
+out:
+ kfree(mr);
+
+ return 0;
+}
+
+struct ib_mr *ionic_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type type,
+ u32 max_sg)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibpd->device);
+ struct ionic_pd *pd = to_ionic_pd(ibpd);
+ struct ionic_mr *mr;
+ int rc;
+
+ if (type != IB_MR_TYPE_MEM_REG)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ rc = ionic_get_mrid(dev, &mr->mrid);
+ if (rc)
+ goto err_mrid;
+
+ mr->ibmr.lkey = mr->mrid;
+ mr->ibmr.rkey = mr->mrid;
+
+ mr->flags = IONIC_MRF_PHYS_MR;
+
+ rc = ionic_pgtbl_init(dev, &mr->buf, mr->umem, 0, max_sg, PAGE_SIZE);
+ if (rc)
+ goto err_pgtbl;
+
+ mr->buf.tbl_pages = 0;
+
+ rc = ionic_create_mr_cmd(dev, pd, mr, 0, 0);
+ if (rc)
+ goto err_cmd;
+
+ return &mr->ibmr;
+
+err_cmd:
+ ionic_pgtbl_unbuf(dev, &mr->buf);
+err_pgtbl:
+ ionic_put_mrid(dev, mr->mrid);
+err_mrid:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+static int ionic_map_mr_page(struct ib_mr *ibmr, u64 dma)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibmr->device);
+ struct ionic_mr *mr = to_ionic_mr(ibmr);
+
+ ibdev_dbg(&dev->ibdev, "dma %p\n", (void *)dma);
+ return ionic_pgtbl_page(&mr->buf, dma);
+}
+
+int ionic_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibmr->device);
+ struct ionic_mr *mr = to_ionic_mr(ibmr);
+ int rc;
+
+ /* mr must be allocated using ib_alloc_mr() */
+ if (unlikely(!mr->buf.tbl_limit))
+ return -EINVAL;
+
+ mr->buf.tbl_pages = 0;
+
+ if (mr->buf.tbl_buf)
+ dma_sync_single_for_cpu(dev->lif_cfg.hwdev, mr->buf.tbl_dma,
+ mr->buf.tbl_size, DMA_TO_DEVICE);
+
+ ibdev_dbg(&dev->ibdev, "sg %p nent %d\n", sg, sg_nents);
+ rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, ionic_map_mr_page);
+
+ mr->buf.page_size_log2 = order_base_2(ibmr->page_size);
+
+ if (mr->buf.tbl_buf)
+ dma_sync_single_for_device(dev->lif_cfg.hwdev, mr->buf.tbl_dma,
+ mr->buf.tbl_size, DMA_TO_DEVICE);
+
+ return rc;
+}
+
+int ionic_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibmw->device);
+ struct ionic_pd *pd = to_ionic_pd(ibmw->pd);
+ struct ionic_mr *mr = to_ionic_mw(ibmw);
+ int rc;
+
+ rc = ionic_get_mrid(dev, &mr->mrid);
+ if (rc)
+ return rc;
+
+ mr->ibmw.rkey = mr->mrid;
+
+ if (mr->ibmw.type == IB_MW_TYPE_1)
+ mr->flags = IONIC_MRF_MW_1;
+ else
+ mr->flags = IONIC_MRF_MW_2;
+
+ rc = ionic_create_mr_cmd(dev, pd, mr, 0, 0);
+ if (rc)
+ goto err_cmd;
+
+ return 0;
+
+err_cmd:
+ ionic_put_mrid(dev, mr->mrid);
+ return rc;
+}
+
+int ionic_dealloc_mw(struct ib_mw *ibmw)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibmw->device);
+ struct ionic_mr *mr = to_ionic_mw(ibmw);
+ int rc;
+
+ rc = ionic_destroy_mr_cmd(dev, mr->mrid);
+ if (rc)
+ return rc;
+
+ ionic_put_mrid(dev, mr->mrid);
+
+ return 0;
+}
+
+static int ionic_create_cq_cmd(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx,
+ struct ionic_cq *cq,
+ struct ionic_tbl_buf *buf)
+{
+ const u16 dbid = ionic_ctx_dbid(dev, ctx);
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_CREATE_CQ,
+ .len = cpu_to_le16(IONIC_ADMIN_CREATE_CQ_IN_V1_LEN),
+ .cmd.create_cq = {
+ .eq_id = cpu_to_le32(cq->eqid),
+ .depth_log2 = cq->q.depth_log2,
+ .stride_log2 = cq->q.stride_log2,
+ .page_size_log2 = buf->page_size_log2,
+ .tbl_index = cpu_to_le32(~0),
+ .map_count = cpu_to_le32(buf->tbl_pages),
+ .dma_addr = ionic_pgtbl_dma(buf, 0),
+ .dbid_flags = cpu_to_le16(dbid),
+ .id_ver = cpu_to_le32(cq->cqid),
+ }
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_CQ)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, 0);
+}
+
+static int ionic_destroy_cq_cmd(struct ionic_ibdev *dev, u32 cqid)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_DESTROY_CQ,
+ .len = cpu_to_le16(IONIC_ADMIN_DESTROY_CQ_IN_V1_LEN),
+ .cmd.destroy_cq = {
+ .cq_id = cpu_to_le32(cqid),
+ },
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_CQ)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_TEARDOWN);
+}
+
+int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device);
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct ionic_ctx *ctx =
+ rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibcq);
+ struct ionic_tbl_buf buf = {};
+ struct ionic_cq_resp resp;
+ struct ionic_cq_req req;
+ int udma_idx = 0, rc;
+
+ if (udata) {
+ rc = ib_copy_from_udata(&req, udata, sizeof(req));
+ if (rc)
+ return rc;
+ }
+
+ vcq->udma_mask = BIT(dev->lif_cfg.udma_count) - 1;
+
+ if (udata)
+ vcq->udma_mask &= req.udma_mask;
+
+ if (!vcq->udma_mask) {
+ rc = -EINVAL;
+ goto err_init;
+ }
+
+ for (; udma_idx < dev->lif_cfg.udma_count; ++udma_idx) {
+ if (!(vcq->udma_mask & BIT(udma_idx)))
+ continue;
+
+ rc = ionic_create_cq_common(vcq, &buf, attr, ctx, udata,
+ &req.cq[udma_idx],
+ &resp.cqid[udma_idx],
+ udma_idx);
+ if (rc)
+ goto err_init;
+
+ rc = ionic_create_cq_cmd(dev, ctx, &vcq->cq[udma_idx], &buf);
+ if (rc)
+ goto err_cmd;
+
+ ionic_pgtbl_unbuf(dev, &buf);
+ }
+
+ vcq->ibcq.cqe = attr->cqe;
+
+ if (udata) {
+ resp.udma_mask = vcq->udma_mask;
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc)
+ goto err_resp;
+ }
+
+ return 0;
+
+err_resp:
+ while (udma_idx) {
+ --udma_idx;
+ if (!(vcq->udma_mask & BIT(udma_idx)))
+ continue;
+ ionic_destroy_cq_cmd(dev, vcq->cq[udma_idx].cqid);
+err_cmd:
+ ionic_pgtbl_unbuf(dev, &buf);
+ ionic_destroy_cq_common(dev, &vcq->cq[udma_idx]);
+err_init:
+ ;
+ }
+
+ return rc;
+}
+
+int ionic_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibcq);
+ int udma_idx, rc_tmp, rc = 0;
+
+ for (udma_idx = dev->lif_cfg.udma_count; udma_idx; ) {
+ --udma_idx;
+
+ if (!(vcq->udma_mask & BIT(udma_idx)))
+ continue;
+
+ rc_tmp = ionic_destroy_cq_cmd(dev, vcq->cq[udma_idx].cqid);
+ if (rc_tmp) {
+ if (!rc)
+ rc = rc_tmp;
+
+ continue;
+ }
+
+ ionic_destroy_cq_common(dev, &vcq->cq[udma_idx]);
+ }
+
+ return rc;
+}
+
+static bool pd_remote_privileged(struct ib_pd *pd)
+{
+ return pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY;
+}
+
+static int ionic_create_qp_cmd(struct ionic_ibdev *dev,
+ struct ionic_pd *pd,
+ struct ionic_cq *send_cq,
+ struct ionic_cq *recv_cq,
+ struct ionic_qp *qp,
+ struct ionic_tbl_buf *sq_buf,
+ struct ionic_tbl_buf *rq_buf,
+ struct ib_qp_init_attr *attr)
+{
+ const u16 dbid = ionic_obj_dbid(dev, pd->ibpd.uobject);
+ const u32 flags = to_ionic_qp_flags(0, 0,
+ qp->sq_cmb & IONIC_CMB_ENABLE,
+ qp->rq_cmb & IONIC_CMB_ENABLE,
+ qp->sq_spec, qp->rq_spec,
+ pd->flags & IONIC_QPF_PRIVILEGED,
+ pd_remote_privileged(&pd->ibpd));
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_CREATE_QP,
+ .len = cpu_to_le16(IONIC_ADMIN_CREATE_QP_IN_V1_LEN),
+ .cmd.create_qp = {
+ .pd_id = cpu_to_le32(pd->pdid),
+ .priv_flags = cpu_to_be32(flags),
+ .type_state = to_ionic_qp_type(attr->qp_type),
+ .dbid_flags = cpu_to_le16(dbid),
+ .id_ver = cpu_to_le32(qp->qpid),
+ }
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_CREATE_QP)
+ return -EBADRQC;
+
+ if (qp->has_sq) {
+ wr.wqe.cmd.create_qp.sq_cq_id = cpu_to_le32(send_cq->cqid);
+ wr.wqe.cmd.create_qp.sq_depth_log2 = qp->sq.depth_log2;
+ wr.wqe.cmd.create_qp.sq_stride_log2 = qp->sq.stride_log2;
+ wr.wqe.cmd.create_qp.sq_page_size_log2 = sq_buf->page_size_log2;
+ wr.wqe.cmd.create_qp.sq_tbl_index_xrcd_id = cpu_to_le32(~0);
+ wr.wqe.cmd.create_qp.sq_map_count =
+ cpu_to_le32(sq_buf->tbl_pages);
+ wr.wqe.cmd.create_qp.sq_dma_addr = ionic_pgtbl_dma(sq_buf, 0);
+ }
+
+ if (qp->has_rq) {
+ wr.wqe.cmd.create_qp.rq_cq_id = cpu_to_le32(recv_cq->cqid);
+ wr.wqe.cmd.create_qp.rq_depth_log2 = qp->rq.depth_log2;
+ wr.wqe.cmd.create_qp.rq_stride_log2 = qp->rq.stride_log2;
+ wr.wqe.cmd.create_qp.rq_page_size_log2 = rq_buf->page_size_log2;
+ wr.wqe.cmd.create_qp.rq_tbl_index_srq_id = cpu_to_le32(~0);
+ wr.wqe.cmd.create_qp.rq_map_count =
+ cpu_to_le32(rq_buf->tbl_pages);
+ wr.wqe.cmd.create_qp.rq_dma_addr = ionic_pgtbl_dma(rq_buf, 0);
+ }
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, 0);
+}
+
+static int ionic_modify_qp_cmd(struct ionic_ibdev *dev,
+ struct ionic_pd *pd,
+ struct ionic_qp *qp,
+ struct ib_qp_attr *attr,
+ int mask)
+{
+ const u32 flags = to_ionic_qp_flags(attr->qp_access_flags,
+ attr->en_sqd_async_notify,
+ qp->sq_cmb & IONIC_CMB_ENABLE,
+ qp->rq_cmb & IONIC_CMB_ENABLE,
+ qp->sq_spec, qp->rq_spec,
+ pd->flags & IONIC_QPF_PRIVILEGED,
+ pd_remote_privileged(qp->ibqp.pd));
+ const u8 state = to_ionic_qp_modify_state(attr->qp_state,
+ attr->cur_qp_state);
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_MODIFY_QP,
+ .len = cpu_to_le16(IONIC_ADMIN_MODIFY_QP_IN_V1_LEN),
+ .cmd.mod_qp = {
+ .attr_mask = cpu_to_be32(mask),
+ .access_flags = cpu_to_be16(flags),
+ .rq_psn = cpu_to_le32(attr->rq_psn),
+ .sq_psn = cpu_to_le32(attr->sq_psn),
+ .rate_limit_kbps =
+ cpu_to_le32(attr->rate_limit),
+ .pmtu = (attr->path_mtu + 7),
+ .retry = (attr->retry_cnt |
+ (attr->rnr_retry << 4)),
+ .rnr_timer = attr->min_rnr_timer,
+ .retry_timeout = attr->timeout,
+ .type_state = state,
+ .id_ver = cpu_to_le32(qp->qpid),
+ }
+ }
+ };
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ void *hdr_buf = NULL;
+ dma_addr_t hdr_dma = 0;
+ int rc, hdr_len = 0;
+ u16 sport;
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_MODIFY_QP)
+ return -EBADRQC;
+
+ if ((mask & IB_QP_MAX_DEST_RD_ATOMIC) && attr->max_dest_rd_atomic) {
+ /* Note, round up/down was already done for allocating
+ * resources on the device. The allocation order is in cache
+ * line size. We can't use the order of the resource
+ * allocation to determine the order wqes here, because for
+ * queue length <= one cache line it is not distinct.
+ *
+ * Therefore, order wqes is computed again here.
+ *
+ * Account for hole and round up to the next order.
+ */
+ wr.wqe.cmd.mod_qp.rsq_depth =
+ order_base_2(attr->max_dest_rd_atomic + 1);
+ wr.wqe.cmd.mod_qp.rsq_index = cpu_to_le32(~0);
+ }
+
+ if ((mask & IB_QP_MAX_QP_RD_ATOMIC) && attr->max_rd_atomic) {
+ /* Account for hole and round down to the next order */
+ wr.wqe.cmd.mod_qp.rrq_depth =
+ order_base_2(attr->max_rd_atomic + 2) - 1;
+ wr.wqe.cmd.mod_qp.rrq_index = cpu_to_le32(~0);
+ }
+
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
+ wr.wqe.cmd.mod_qp.qkey_dest_qpn =
+ cpu_to_le32(attr->dest_qp_num);
+ else
+ wr.wqe.cmd.mod_qp.qkey_dest_qpn = cpu_to_le32(attr->qkey);
+
+ if (mask & IB_QP_AV) {
+ if (!qp->hdr)
+ return -ENOMEM;
+
+ sport = rdma_get_udp_sport(grh->flow_label,
+ qp->qpid,
+ attr->dest_qp_num);
+
+ rc = ionic_build_hdr(dev, qp->hdr, &attr->ah_attr, sport, true);
+ if (rc)
+ return rc;
+
+ qp->sgid_index = grh->sgid_index;
+
+ hdr_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!hdr_buf)
+ return -ENOMEM;
+
+ hdr_len = ib_ud_header_pack(qp->hdr, hdr_buf);
+ hdr_len -= IB_BTH_BYTES;
+ hdr_len -= IB_DETH_BYTES;
+ ibdev_dbg(&dev->ibdev, "roce packet header template\n");
+ print_hex_dump_debug("hdr ", DUMP_PREFIX_OFFSET, 16, 1,
+ hdr_buf, hdr_len, true);
+
+ hdr_dma = dma_map_single(dev->lif_cfg.hwdev, hdr_buf, hdr_len,
+ DMA_TO_DEVICE);
+
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma);
+ if (rc)
+ goto err_dma;
+
+ if (qp->hdr->ipv4_present) {
+ wr.wqe.cmd.mod_qp.tfp_csum_profile =
+ qp->hdr->vlan_present ?
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP :
+ IONIC_TFP_CSUM_PROF_ETH_IPV4_UDP;
+ } else {
+ wr.wqe.cmd.mod_qp.tfp_csum_profile =
+ qp->hdr->vlan_present ?
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_UDP :
+ IONIC_TFP_CSUM_PROF_ETH_IPV6_UDP;
+ }
+
+ wr.wqe.cmd.mod_qp.ah_id_len =
+ cpu_to_le32(qp->ahid | (hdr_len << 24));
+ wr.wqe.cmd.mod_qp.dma_addr = cpu_to_le64(hdr_dma);
+
+ wr.wqe.cmd.mod_qp.en_pcp = attr->ah_attr.sl;
+ wr.wqe.cmd.mod_qp.ip_dscp = grh->traffic_class >> 2;
+ }
+
+ ionic_admin_post(dev, &wr);
+
+ rc = ionic_admin_wait(dev, &wr, 0);
+
+ if (mask & IB_QP_AV)
+ dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, hdr_len,
+ DMA_TO_DEVICE);
+err_dma:
+ if (mask & IB_QP_AV)
+ kfree(hdr_buf);
+
+ return rc;
+}
+
+static int ionic_query_qp_cmd(struct ionic_ibdev *dev,
+ struct ionic_qp *qp,
+ struct ib_qp_attr *attr,
+ int mask)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_QUERY_QP,
+ .len = cpu_to_le16(IONIC_ADMIN_QUERY_QP_IN_V1_LEN),
+ .cmd.query_qp = {
+ .id_ver = cpu_to_le32(qp->qpid),
+ },
+ }
+ };
+ struct ionic_v1_admin_query_qp_sq *query_sqbuf;
+ struct ionic_v1_admin_query_qp_rq *query_rqbuf;
+ dma_addr_t query_sqdma;
+ dma_addr_t query_rqdma;
+ dma_addr_t hdr_dma = 0;
+ void *hdr_buf = NULL;
+ int flags, rc;
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_QUERY_QP)
+ return -EBADRQC;
+
+ if (qp->has_sq) {
+ bool expdb = !!(qp->sq_cmb & IONIC_CMB_EXPDB);
+
+ attr->cap.max_send_sge =
+ ionic_v1_send_wqe_max_sge(qp->sq.stride_log2,
+ qp->sq_spec,
+ expdb);
+ attr->cap.max_inline_data =
+ ionic_v1_send_wqe_max_data(qp->sq.stride_log2, expdb);
+ }
+
+ if (qp->has_rq) {
+ attr->cap.max_recv_sge =
+ ionic_v1_recv_wqe_max_sge(qp->rq.stride_log2,
+ qp->rq_spec,
+ qp->rq_cmb & IONIC_CMB_EXPDB);
+ }
+
+ query_sqbuf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!query_sqbuf)
+ return -ENOMEM;
+
+ query_rqbuf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!query_rqbuf) {
+ rc = -ENOMEM;
+ goto err_rqbuf;
+ }
+
+ query_sqdma = dma_map_single(dev->lif_cfg.hwdev, query_sqbuf, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, query_sqdma);
+ if (rc)
+ goto err_sqdma;
+
+ query_rqdma = dma_map_single(dev->lif_cfg.hwdev, query_rqbuf, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, query_rqdma);
+ if (rc)
+ goto err_rqdma;
+
+ if (mask & IB_QP_AV) {
+ hdr_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!hdr_buf) {
+ rc = -ENOMEM;
+ goto err_hdrbuf;
+ }
+
+ hdr_dma = dma_map_single(dev->lif_cfg.hwdev, hdr_buf,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma);
+ if (rc)
+ goto err_hdrdma;
+ }
+
+ wr.wqe.cmd.query_qp.sq_dma_addr = cpu_to_le64(query_sqdma);
+ wr.wqe.cmd.query_qp.rq_dma_addr = cpu_to_le64(query_rqdma);
+ wr.wqe.cmd.query_qp.hdr_dma_addr = cpu_to_le64(hdr_dma);
+ wr.wqe.cmd.query_qp.ah_id = cpu_to_le32(qp->ahid);
+
+ ionic_admin_post(dev, &wr);
+
+ rc = ionic_admin_wait(dev, &wr, 0);
+
+ if (rc)
+ goto err_hdrdma;
+
+ flags = be16_to_cpu(query_sqbuf->access_perms_flags |
+ query_rqbuf->access_perms_flags);
+
+ print_hex_dump_debug("sqbuf ", DUMP_PREFIX_OFFSET, 16, 1,
+ query_sqbuf, sizeof(*query_sqbuf), true);
+ print_hex_dump_debug("rqbuf ", DUMP_PREFIX_OFFSET, 16, 1,
+ query_rqbuf, sizeof(*query_rqbuf), true);
+ ibdev_dbg(&dev->ibdev, "query qp %u state_pmtu %#x flags %#x",
+ qp->qpid, query_rqbuf->state_pmtu, flags);
+
+ attr->qp_state = from_ionic_qp_state(query_rqbuf->state_pmtu >> 4);
+ attr->cur_qp_state = attr->qp_state;
+ attr->path_mtu = (query_rqbuf->state_pmtu & 0xf) - 7;
+ attr->path_mig_state = IB_MIG_MIGRATED;
+ attr->qkey = be32_to_cpu(query_sqbuf->qkey_dest_qpn);
+ attr->rq_psn = be32_to_cpu(query_sqbuf->rq_psn);
+ attr->sq_psn = be32_to_cpu(query_rqbuf->sq_psn);
+ attr->dest_qp_num = attr->qkey;
+ attr->qp_access_flags = from_ionic_qp_flags(flags);
+ attr->pkey_index = 0;
+ attr->alt_pkey_index = 0;
+ attr->en_sqd_async_notify = !!(flags & IONIC_QPF_SQD_NOTIFY);
+ attr->sq_draining = !!(flags & IONIC_QPF_SQ_DRAINING);
+ attr->max_rd_atomic = BIT(query_rqbuf->rrq_depth) - 1;
+ attr->max_dest_rd_atomic = BIT(query_rqbuf->rsq_depth) - 1;
+ attr->min_rnr_timer = query_sqbuf->rnr_timer;
+ attr->port_num = 0;
+ attr->timeout = query_sqbuf->retry_timeout;
+ attr->retry_cnt = query_rqbuf->retry_rnrtry & 0xf;
+ attr->rnr_retry = query_rqbuf->retry_rnrtry >> 4;
+ attr->alt_port_num = 0;
+ attr->alt_timeout = 0;
+ attr->rate_limit = be32_to_cpu(query_sqbuf->rate_limit_kbps);
+
+ if (mask & IB_QP_AV)
+ ionic_set_ah_attr(dev, &attr->ah_attr,
+ qp->hdr, qp->sgid_index);
+
+err_hdrdma:
+ if (mask & IB_QP_AV) {
+ dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ kfree(hdr_buf);
+ }
+err_hdrbuf:
+ dma_unmap_single(dev->lif_cfg.hwdev, query_rqdma, sizeof(*query_rqbuf),
+ DMA_FROM_DEVICE);
+err_rqdma:
+ dma_unmap_single(dev->lif_cfg.hwdev, query_sqdma, sizeof(*query_sqbuf),
+ DMA_FROM_DEVICE);
+err_sqdma:
+ kfree(query_rqbuf);
+err_rqbuf:
+ kfree(query_sqbuf);
+
+ return rc;
+}
+
+static int ionic_destroy_qp_cmd(struct ionic_ibdev *dev, u32 qpid)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = IONIC_V1_ADMIN_DESTROY_QP,
+ .len = cpu_to_le16(IONIC_ADMIN_DESTROY_QP_IN_V1_LEN),
+ .cmd.destroy_qp = {
+ .qp_id = cpu_to_le32(qpid),
+ },
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= IONIC_V1_ADMIN_DESTROY_QP)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_TEARDOWN);
+}
+
+static bool ionic_expdb_wqe_size_supported(struct ionic_ibdev *dev,
+ uint32_t wqe_size)
+{
+ switch (wqe_size) {
+ case 64: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_64;
+ case 128: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_128;
+ case 256: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_256;
+ case 512: return dev->lif_cfg.expdb_mask & IONIC_EXPDB_512;
+ }
+
+ return false;
+}
+
+static void ionic_qp_sq_init_cmb(struct ionic_ibdev *dev,
+ struct ionic_qp *qp,
+ struct ib_udata *udata,
+ int max_data)
+{
+ u8 expdb_stride_log2 = 0;
+ bool expdb;
+ int rc;
+
+ if (!(qp->sq_cmb & IONIC_CMB_ENABLE))
+ goto not_in_cmb;
+
+ if (qp->sq_cmb & ~IONIC_CMB_SUPPORTED) {
+ if (qp->sq_cmb & IONIC_CMB_REQUIRE)
+ goto not_in_cmb;
+
+ qp->sq_cmb &= IONIC_CMB_SUPPORTED;
+ }
+
+ if ((qp->sq_cmb & IONIC_CMB_EXPDB) && !dev->lif_cfg.sq_expdb) {
+ if (qp->sq_cmb & IONIC_CMB_REQUIRE)
+ goto not_in_cmb;
+
+ qp->sq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ qp->sq_cmb_order = order_base_2(qp->sq.size / PAGE_SIZE);
+
+ if (qp->sq_cmb_order >= IONIC_SQCMB_ORDER)
+ goto not_in_cmb;
+
+ if (qp->sq_cmb & IONIC_CMB_EXPDB)
+ expdb_stride_log2 = qp->sq.stride_log2;
+
+ rc = ionic_get_cmb(dev->lif_cfg.lif, &qp->sq_cmb_pgid,
+ &qp->sq_cmb_addr, qp->sq_cmb_order,
+ expdb_stride_log2, &expdb);
+ if (rc)
+ goto not_in_cmb;
+
+ if ((qp->sq_cmb & IONIC_CMB_EXPDB) && !expdb) {
+ if (qp->sq_cmb & IONIC_CMB_REQUIRE)
+ goto err_map;
+
+ qp->sq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ return;
+
+err_map:
+ ionic_put_cmb(dev->lif_cfg.lif, qp->sq_cmb_pgid, qp->sq_cmb_order);
+not_in_cmb:
+ if (qp->sq_cmb & IONIC_CMB_REQUIRE)
+ ibdev_dbg(&dev->ibdev, "could not place sq in cmb as required\n");
+
+ qp->sq_cmb = 0;
+ qp->sq_cmb_order = IONIC_RES_INVALID;
+ qp->sq_cmb_pgid = 0;
+ qp->sq_cmb_addr = 0;
+}
+
+static void ionic_qp_sq_destroy_cmb(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx,
+ struct ionic_qp *qp)
+{
+ if (!(qp->sq_cmb & IONIC_CMB_ENABLE))
+ return;
+
+ if (ctx)
+ rdma_user_mmap_entry_remove(qp->mmap_sq_cmb);
+
+ ionic_put_cmb(dev->lif_cfg.lif, qp->sq_cmb_pgid, qp->sq_cmb_order);
+}
+
+static int ionic_qp_sq_init(struct ionic_ibdev *dev, struct ionic_ctx *ctx,
+ struct ionic_qp *qp, struct ionic_qdesc *sq,
+ struct ionic_tbl_buf *buf, int max_wr, int max_sge,
+ int max_data, int sq_spec, struct ib_udata *udata)
+{
+ u32 wqe_size;
+ int rc = 0;
+
+ qp->sq_msn_prod = 0;
+ qp->sq_msn_cons = 0;
+
+ if (!qp->has_sq) {
+ if (buf) {
+ buf->tbl_buf = NULL;
+ buf->tbl_limit = 0;
+ buf->tbl_pages = 0;
+ }
+ if (udata)
+ rc = ionic_validate_qdesc_zero(sq);
+
+ return rc;
+ }
+
+ rc = -EINVAL;
+
+ if (max_wr < 0 || max_wr > 0xffff)
+ return rc;
+
+ if (max_sge < 1)
+ return rc;
+
+ if (max_sge > min(ionic_v1_send_wqe_max_sge(dev->lif_cfg.max_stride, 0,
+ qp->sq_cmb &
+ IONIC_CMB_EXPDB),
+ IONIC_SPEC_HIGH))
+ return rc;
+
+ if (max_data < 0)
+ return rc;
+
+ if (max_data > ionic_v1_send_wqe_max_data(dev->lif_cfg.max_stride,
+ qp->sq_cmb & IONIC_CMB_EXPDB))
+ return rc;
+
+ if (udata) {
+ rc = ionic_validate_qdesc(sq);
+ if (rc)
+ return rc;
+
+ qp->sq_spec = sq_spec;
+
+ qp->sq.ptr = NULL;
+ qp->sq.size = sq->size;
+ qp->sq.mask = sq->mask;
+ qp->sq.depth_log2 = sq->depth_log2;
+ qp->sq.stride_log2 = sq->stride_log2;
+
+ qp->sq_meta = NULL;
+ qp->sq_msn_idx = NULL;
+
+ qp->sq_umem = ib_umem_get(&dev->ibdev, sq->addr, sq->size, 0);
+ if (IS_ERR(qp->sq_umem))
+ return PTR_ERR(qp->sq_umem);
+ } else {
+ qp->sq_umem = NULL;
+
+ qp->sq_spec = ionic_v1_use_spec_sge(max_sge, sq_spec);
+ if (sq_spec && !qp->sq_spec)
+ ibdev_dbg(&dev->ibdev,
+ "init sq: max_sge %u disables spec\n",
+ max_sge);
+
+ if (qp->sq_cmb & IONIC_CMB_EXPDB) {
+ wqe_size = ionic_v1_send_wqe_min_size(max_sge, max_data,
+ qp->sq_spec,
+ true);
+
+ if (!ionic_expdb_wqe_size_supported(dev, wqe_size))
+ qp->sq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ if (!(qp->sq_cmb & IONIC_CMB_EXPDB))
+ wqe_size = ionic_v1_send_wqe_min_size(max_sge, max_data,
+ qp->sq_spec,
+ false);
+
+ rc = ionic_queue_init(&qp->sq, dev->lif_cfg.hwdev,
+ max_wr, wqe_size);
+ if (rc)
+ return rc;
+
+ ionic_queue_dbell_init(&qp->sq, qp->qpid);
+
+ qp->sq_meta = kmalloc_array((u32)qp->sq.mask + 1,
+ sizeof(*qp->sq_meta),
+ GFP_KERNEL);
+ if (!qp->sq_meta) {
+ rc = -ENOMEM;
+ goto err_sq_meta;
+ }
+
+ qp->sq_msn_idx = kmalloc_array((u32)qp->sq.mask + 1,
+ sizeof(*qp->sq_msn_idx),
+ GFP_KERNEL);
+ if (!qp->sq_msn_idx) {
+ rc = -ENOMEM;
+ goto err_sq_msn;
+ }
+ }
+
+ ionic_qp_sq_init_cmb(dev, qp, udata, max_data);
+
+ if (qp->sq_cmb & IONIC_CMB_ENABLE)
+ rc = ionic_pgtbl_init(dev, buf, NULL,
+ (u64)qp->sq_cmb_pgid << PAGE_SHIFT,
+ 1, PAGE_SIZE);
+ else
+ rc = ionic_pgtbl_init(dev, buf,
+ qp->sq_umem, qp->sq.dma, 1, PAGE_SIZE);
+ if (rc)
+ goto err_sq_tbl;
+
+ return 0;
+
+err_sq_tbl:
+ ionic_qp_sq_destroy_cmb(dev, ctx, qp);
+ kfree(qp->sq_msn_idx);
+err_sq_msn:
+ kfree(qp->sq_meta);
+err_sq_meta:
+ if (qp->sq_umem)
+ ib_umem_release(qp->sq_umem);
+ else
+ ionic_queue_destroy(&qp->sq, dev->lif_cfg.hwdev);
+ return rc;
+}
+
+static void ionic_qp_sq_destroy(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx,
+ struct ionic_qp *qp)
+{
+ if (!qp->has_sq)
+ return;
+
+ ionic_qp_sq_destroy_cmb(dev, ctx, qp);
+
+ kfree(qp->sq_msn_idx);
+ kfree(qp->sq_meta);
+
+ if (qp->sq_umem)
+ ib_umem_release(qp->sq_umem);
+ else
+ ionic_queue_destroy(&qp->sq, dev->lif_cfg.hwdev);
+}
+
+static void ionic_qp_rq_init_cmb(struct ionic_ibdev *dev,
+ struct ionic_qp *qp,
+ struct ib_udata *udata)
+{
+ u8 expdb_stride_log2 = 0;
+ bool expdb;
+ int rc;
+
+ if (!(qp->rq_cmb & IONIC_CMB_ENABLE))
+ goto not_in_cmb;
+
+ if (qp->rq_cmb & ~IONIC_CMB_SUPPORTED) {
+ if (qp->rq_cmb & IONIC_CMB_REQUIRE)
+ goto not_in_cmb;
+
+ qp->rq_cmb &= IONIC_CMB_SUPPORTED;
+ }
+
+ if ((qp->rq_cmb & IONIC_CMB_EXPDB) && !dev->lif_cfg.rq_expdb) {
+ if (qp->rq_cmb & IONIC_CMB_REQUIRE)
+ goto not_in_cmb;
+
+ qp->rq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ qp->rq_cmb_order = order_base_2(qp->rq.size / PAGE_SIZE);
+
+ if (qp->rq_cmb_order >= IONIC_RQCMB_ORDER)
+ goto not_in_cmb;
+
+ if (qp->rq_cmb & IONIC_CMB_EXPDB)
+ expdb_stride_log2 = qp->rq.stride_log2;
+
+ rc = ionic_get_cmb(dev->lif_cfg.lif, &qp->rq_cmb_pgid,
+ &qp->rq_cmb_addr, qp->rq_cmb_order,
+ expdb_stride_log2, &expdb);
+ if (rc)
+ goto not_in_cmb;
+
+ if ((qp->rq_cmb & IONIC_CMB_EXPDB) && !expdb) {
+ if (qp->rq_cmb & IONIC_CMB_REQUIRE)
+ goto err_map;
+
+ qp->rq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ return;
+
+err_map:
+ ionic_put_cmb(dev->lif_cfg.lif, qp->rq_cmb_pgid, qp->rq_cmb_order);
+not_in_cmb:
+ if (qp->rq_cmb & IONIC_CMB_REQUIRE)
+ ibdev_dbg(&dev->ibdev, "could not place rq in cmb as required\n");
+
+ qp->rq_cmb = 0;
+ qp->rq_cmb_order = IONIC_RES_INVALID;
+ qp->rq_cmb_pgid = 0;
+ qp->rq_cmb_addr = 0;
+}
+
+static void ionic_qp_rq_destroy_cmb(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx,
+ struct ionic_qp *qp)
+{
+ if (!(qp->rq_cmb & IONIC_CMB_ENABLE))
+ return;
+
+ if (ctx)
+ rdma_user_mmap_entry_remove(qp->mmap_rq_cmb);
+
+ ionic_put_cmb(dev->lif_cfg.lif, qp->rq_cmb_pgid, qp->rq_cmb_order);
+}
+
+static int ionic_qp_rq_init(struct ionic_ibdev *dev, struct ionic_ctx *ctx,
+ struct ionic_qp *qp, struct ionic_qdesc *rq,
+ struct ionic_tbl_buf *buf, int max_wr, int max_sge,
+ int rq_spec, struct ib_udata *udata)
+{
+ int rc = 0, i;
+ u32 wqe_size;
+
+ if (!qp->has_rq) {
+ if (buf) {
+ buf->tbl_buf = NULL;
+ buf->tbl_limit = 0;
+ buf->tbl_pages = 0;
+ }
+ if (udata)
+ rc = ionic_validate_qdesc_zero(rq);
+
+ return rc;
+ }
+
+ rc = -EINVAL;
+
+ if (max_wr < 0 || max_wr > 0xffff)
+ return rc;
+
+ if (max_sge < 1)
+ return rc;
+
+ if (max_sge > min(ionic_v1_recv_wqe_max_sge(dev->lif_cfg.max_stride, 0, false),
+ IONIC_SPEC_HIGH))
+ return rc;
+
+ if (udata) {
+ rc = ionic_validate_qdesc(rq);
+ if (rc)
+ return rc;
+
+ qp->rq_spec = rq_spec;
+
+ qp->rq.ptr = NULL;
+ qp->rq.size = rq->size;
+ qp->rq.mask = rq->mask;
+ qp->rq.depth_log2 = rq->depth_log2;
+ qp->rq.stride_log2 = rq->stride_log2;
+
+ qp->rq_meta = NULL;
+
+ qp->rq_umem = ib_umem_get(&dev->ibdev, rq->addr, rq->size, 0);
+ if (IS_ERR(qp->rq_umem))
+ return PTR_ERR(qp->rq_umem);
+ } else {
+ qp->rq_umem = NULL;
+
+ qp->rq_spec = ionic_v1_use_spec_sge(max_sge, rq_spec);
+ if (rq_spec && !qp->rq_spec)
+ ibdev_dbg(&dev->ibdev,
+ "init rq: max_sge %u disables spec\n",
+ max_sge);
+
+ if (qp->rq_cmb & IONIC_CMB_EXPDB) {
+ wqe_size = ionic_v1_recv_wqe_min_size(max_sge,
+ qp->rq_spec,
+ true);
+
+ if (!ionic_expdb_wqe_size_supported(dev, wqe_size))
+ qp->rq_cmb &= ~IONIC_CMB_EXPDB;
+ }
+
+ if (!(qp->rq_cmb & IONIC_CMB_EXPDB))
+ wqe_size = ionic_v1_recv_wqe_min_size(max_sge,
+ qp->rq_spec,
+ false);
+
+ rc = ionic_queue_init(&qp->rq, dev->lif_cfg.hwdev,
+ max_wr, wqe_size);
+ if (rc)
+ return rc;
+
+ ionic_queue_dbell_init(&qp->rq, qp->qpid);
+
+ qp->rq_meta = kmalloc_array((u32)qp->rq.mask + 1,
+ sizeof(*qp->rq_meta),
+ GFP_KERNEL);
+ if (!qp->rq_meta) {
+ rc = -ENOMEM;
+ goto err_rq_meta;
+ }
+
+ for (i = 0; i < qp->rq.mask; ++i)
+ qp->rq_meta[i].next = &qp->rq_meta[i + 1];
+ qp->rq_meta[i].next = IONIC_META_LAST;
+ qp->rq_meta_head = &qp->rq_meta[0];
+ }
+
+ ionic_qp_rq_init_cmb(dev, qp, udata);
+
+ if (qp->rq_cmb & IONIC_CMB_ENABLE)
+ rc = ionic_pgtbl_init(dev, buf, NULL,
+ (u64)qp->rq_cmb_pgid << PAGE_SHIFT,
+ 1, PAGE_SIZE);
+ else
+ rc = ionic_pgtbl_init(dev, buf,
+ qp->rq_umem, qp->rq.dma, 1, PAGE_SIZE);
+ if (rc)
+ goto err_rq_tbl;
+
+ return 0;
+
+err_rq_tbl:
+ ionic_qp_rq_destroy_cmb(dev, ctx, qp);
+ kfree(qp->rq_meta);
+err_rq_meta:
+ if (qp->rq_umem)
+ ib_umem_release(qp->rq_umem);
+ else
+ ionic_queue_destroy(&qp->rq, dev->lif_cfg.hwdev);
+ return rc;
+}
+
+static void ionic_qp_rq_destroy(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx,
+ struct ionic_qp *qp)
+{
+ if (!qp->has_rq)
+ return;
+
+ ionic_qp_rq_destroy_cmb(dev, ctx, qp);
+
+ kfree(qp->rq_meta);
+
+ if (qp->rq_umem)
+ ib_umem_release(qp->rq_umem);
+ else
+ ionic_queue_destroy(&qp->rq, dev->lif_cfg.hwdev);
+}
+
+int ionic_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_tbl_buf sq_buf = {}, rq_buf = {};
+ struct ionic_pd *pd = to_ionic_pd(ibqp->pd);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ struct ionic_ctx *ctx =
+ rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx);
+ struct ionic_qp_resp resp = {};
+ struct ionic_qp_req req = {};
+ struct ionic_cq *cq;
+ u8 udma_mask;
+ void *entry;
+ int rc;
+
+ if (udata) {
+ rc = ib_copy_from_udata(&req, udata, sizeof(req));
+ if (rc)
+ return rc;
+ } else {
+ req.sq_spec = IONIC_SPEC_HIGH;
+ req.rq_spec = IONIC_SPEC_HIGH;
+ }
+
+ if (attr->qp_type == IB_QPT_SMI || attr->qp_type > IB_QPT_UD)
+ return -EOPNOTSUPP;
+
+ qp->state = IB_QPS_RESET;
+
+ INIT_LIST_HEAD(&qp->cq_poll_sq);
+ INIT_LIST_HEAD(&qp->cq_flush_sq);
+ INIT_LIST_HEAD(&qp->cq_flush_rq);
+
+ spin_lock_init(&qp->sq_lock);
+ spin_lock_init(&qp->rq_lock);
+
+ qp->has_sq = 1;
+ qp->has_rq = 1;
+
+ if (attr->qp_type == IB_QPT_GSI) {
+ rc = ionic_get_gsi_qpid(dev, &qp->qpid);
+ } else {
+ udma_mask = BIT(dev->lif_cfg.udma_count) - 1;
+
+ if (qp->has_sq)
+ udma_mask &= to_ionic_vcq(attr->send_cq)->udma_mask;
+
+ if (qp->has_rq)
+ udma_mask &= to_ionic_vcq(attr->recv_cq)->udma_mask;
+
+ if (udata && req.udma_mask)
+ udma_mask &= req.udma_mask;
+
+ if (!udma_mask)
+ return -EINVAL;
+
+ rc = ionic_get_qpid(dev, &qp->qpid, &qp->udma_idx, udma_mask);
+ }
+ if (rc)
+ return rc;
+
+ qp->sig_all = attr->sq_sig_type == IB_SIGNAL_ALL_WR;
+ qp->has_ah = attr->qp_type == IB_QPT_RC;
+
+ if (qp->has_ah) {
+ qp->hdr = kzalloc(sizeof(*qp->hdr), GFP_KERNEL);
+ if (!qp->hdr) {
+ rc = -ENOMEM;
+ goto err_ah_alloc;
+ }
+
+ rc = ionic_get_ahid(dev, &qp->ahid);
+ if (rc)
+ goto err_ahid;
+ }
+
+ if (udata) {
+ if (req.rq_cmb & IONIC_CMB_ENABLE)
+ qp->rq_cmb = req.rq_cmb;
+
+ if (req.sq_cmb & IONIC_CMB_ENABLE)
+ qp->sq_cmb = req.sq_cmb;
+ }
+
+ rc = ionic_qp_sq_init(dev, ctx, qp, &req.sq, &sq_buf,
+ attr->cap.max_send_wr, attr->cap.max_send_sge,
+ attr->cap.max_inline_data, req.sq_spec, udata);
+ if (rc)
+ goto err_sq;
+
+ rc = ionic_qp_rq_init(dev, ctx, qp, &req.rq, &rq_buf,
+ attr->cap.max_recv_wr, attr->cap.max_recv_sge,
+ req.rq_spec, udata);
+ if (rc)
+ goto err_rq;
+
+ rc = ionic_create_qp_cmd(dev, pd,
+ to_ionic_vcq_cq(attr->send_cq, qp->udma_idx),
+ to_ionic_vcq_cq(attr->recv_cq, qp->udma_idx),
+ qp, &sq_buf, &rq_buf, attr);
+ if (rc)
+ goto err_cmd;
+
+ if (udata) {
+ resp.qpid = qp->qpid;
+ resp.udma_idx = qp->udma_idx;
+
+ if (qp->sq_cmb & IONIC_CMB_ENABLE) {
+ bool wc;
+
+ if ((qp->sq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC)) ==
+ (IONIC_CMB_WC | IONIC_CMB_UC)) {
+ ibdev_dbg(&dev->ibdev,
+ "Both sq_cmb flags IONIC_CMB_WC and IONIC_CMB_UC are set, using default driver mapping\n");
+ qp->sq_cmb &= ~(IONIC_CMB_WC | IONIC_CMB_UC);
+ }
+
+ wc = (qp->sq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC))
+ != IONIC_CMB_UC;
+
+ /* let userspace know the mapping */
+ if (wc)
+ qp->sq_cmb |= IONIC_CMB_WC;
+ else
+ qp->sq_cmb |= IONIC_CMB_UC;
+
+ qp->mmap_sq_cmb =
+ ionic_mmap_entry_insert(ctx,
+ qp->sq.size,
+ PHYS_PFN(qp->sq_cmb_addr),
+ wc ? IONIC_MMAP_WC : 0,
+ &resp.sq_cmb_offset);
+ if (!qp->mmap_sq_cmb) {
+ rc = -ENOMEM;
+ goto err_mmap_sq;
+ }
+
+ resp.sq_cmb = qp->sq_cmb;
+ }
+
+ if (qp->rq_cmb & IONIC_CMB_ENABLE) {
+ bool wc;
+
+ if ((qp->rq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC)) ==
+ (IONIC_CMB_WC | IONIC_CMB_UC)) {
+ ibdev_dbg(&dev->ibdev,
+ "Both rq_cmb flags IONIC_CMB_WC and IONIC_CMB_UC are set, using default driver mapping\n");
+ qp->rq_cmb &= ~(IONIC_CMB_WC | IONIC_CMB_UC);
+ }
+
+ if (qp->rq_cmb & IONIC_CMB_EXPDB)
+ wc = (qp->rq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC))
+ == IONIC_CMB_WC;
+ else
+ wc = (qp->rq_cmb & (IONIC_CMB_WC | IONIC_CMB_UC))
+ != IONIC_CMB_UC;
+
+ /* let userspace know the mapping */
+ if (wc)
+ qp->rq_cmb |= IONIC_CMB_WC;
+ else
+ qp->rq_cmb |= IONIC_CMB_UC;
+
+ qp->mmap_rq_cmb =
+ ionic_mmap_entry_insert(ctx,
+ qp->rq.size,
+ PHYS_PFN(qp->rq_cmb_addr),
+ wc ? IONIC_MMAP_WC : 0,
+ &resp.rq_cmb_offset);
+ if (!qp->mmap_rq_cmb) {
+ rc = -ENOMEM;
+ goto err_mmap_rq;
+ }
+
+ resp.rq_cmb = qp->rq_cmb;
+ }
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc)
+ goto err_resp;
+ }
+
+ ionic_pgtbl_unbuf(dev, &rq_buf);
+ ionic_pgtbl_unbuf(dev, &sq_buf);
+
+ qp->ibqp.qp_num = qp->qpid;
+
+ init_completion(&qp->qp_rel_comp);
+ kref_init(&qp->qp_kref);
+
+ entry = xa_store_irq(&dev->qp_tbl, qp->qpid, qp, GFP_KERNEL);
+ if (entry) {
+ if (!xa_is_err(entry))
+ rc = -EINVAL;
+ else
+ rc = xa_err(entry);
+
+ goto err_resp;
+ }
+
+ if (qp->has_sq) {
+ cq = to_ionic_vcq_cq(attr->send_cq, qp->udma_idx);
+
+ attr->cap.max_send_wr = qp->sq.mask;
+ attr->cap.max_send_sge =
+ ionic_v1_send_wqe_max_sge(qp->sq.stride_log2,
+ qp->sq_spec,
+ qp->sq_cmb & IONIC_CMB_EXPDB);
+ attr->cap.max_inline_data =
+ ionic_v1_send_wqe_max_data(qp->sq.stride_log2,
+ qp->sq_cmb &
+ IONIC_CMB_EXPDB);
+ qp->sq_cqid = cq->cqid;
+ }
+
+ if (qp->has_rq) {
+ cq = to_ionic_vcq_cq(attr->recv_cq, qp->udma_idx);
+
+ attr->cap.max_recv_wr = qp->rq.mask;
+ attr->cap.max_recv_sge =
+ ionic_v1_recv_wqe_max_sge(qp->rq.stride_log2,
+ qp->rq_spec,
+ qp->rq_cmb & IONIC_CMB_EXPDB);
+ qp->rq_cqid = cq->cqid;
+ }
+
+ return 0;
+
+err_resp:
+ if (udata && (qp->rq_cmb & IONIC_CMB_ENABLE))
+ rdma_user_mmap_entry_remove(qp->mmap_rq_cmb);
+err_mmap_rq:
+ if (udata && (qp->sq_cmb & IONIC_CMB_ENABLE))
+ rdma_user_mmap_entry_remove(qp->mmap_sq_cmb);
+err_mmap_sq:
+ ionic_destroy_qp_cmd(dev, qp->qpid);
+err_cmd:
+ ionic_pgtbl_unbuf(dev, &rq_buf);
+ ionic_qp_rq_destroy(dev, ctx, qp);
+err_rq:
+ ionic_pgtbl_unbuf(dev, &sq_buf);
+ ionic_qp_sq_destroy(dev, ctx, qp);
+err_sq:
+ if (qp->has_ah)
+ ionic_put_ahid(dev, qp->ahid);
+err_ahid:
+ kfree(qp->hdr);
+err_ah_alloc:
+ ionic_put_qpid(dev, qp->qpid);
+ return rc;
+}
+
+void ionic_notify_flush_cq(struct ionic_cq *cq)
+{
+ if (cq->flush && cq->vcq->ibcq.comp_handler)
+ cq->vcq->ibcq.comp_handler(&cq->vcq->ibcq,
+ cq->vcq->ibcq.cq_context);
+}
+
+static void ionic_notify_qp_cqs(struct ionic_ibdev *dev, struct ionic_qp *qp)
+{
+ if (qp->ibqp.send_cq)
+ ionic_notify_flush_cq(to_ionic_vcq_cq(qp->ibqp.send_cq,
+ qp->udma_idx));
+ if (qp->ibqp.recv_cq && qp->ibqp.recv_cq != qp->ibqp.send_cq)
+ ionic_notify_flush_cq(to_ionic_vcq_cq(qp->ibqp.recv_cq,
+ qp->udma_idx));
+}
+
+void ionic_flush_qp(struct ionic_ibdev *dev, struct ionic_qp *qp)
+{
+ unsigned long irqflags;
+ struct ionic_cq *cq;
+
+ if (qp->ibqp.send_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.send_cq, qp->udma_idx);
+
+ /* Hold the CQ lock and QP sq_lock to set up flush */
+ spin_lock_irqsave(&cq->lock, irqflags);
+ spin_lock(&qp->sq_lock);
+ qp->sq_flush = true;
+ if (!ionic_queue_empty(&qp->sq)) {
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_sq, &cq->flush_sq);
+ }
+ spin_unlock(&qp->sq_lock);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+ }
+
+ if (qp->ibqp.recv_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.recv_cq, qp->udma_idx);
+
+ /* Hold the CQ lock and QP rq_lock to set up flush */
+ spin_lock_irqsave(&cq->lock, irqflags);
+ spin_lock(&qp->rq_lock);
+ qp->rq_flush = true;
+ if (!ionic_queue_empty(&qp->rq)) {
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_rq, &cq->flush_rq);
+ }
+ spin_unlock(&qp->rq_lock);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+ }
+}
+
+static void ionic_clean_cq(struct ionic_cq *cq, u32 qpid)
+{
+ struct ionic_v1_cqe *qcqe;
+ int prod, qtf, qid, type;
+ bool color;
+
+ if (!cq->q.ptr)
+ return;
+
+ color = cq->color;
+ prod = cq->q.prod;
+ qcqe = ionic_queue_at(&cq->q, prod);
+
+ while (color == ionic_v1_cqe_color(qcqe)) {
+ qtf = ionic_v1_cqe_qtf(qcqe);
+ qid = ionic_v1_cqe_qtf_qid(qtf);
+ type = ionic_v1_cqe_qtf_type(qtf);
+
+ if (qid == qpid && type != IONIC_V1_CQE_TYPE_ADMIN)
+ ionic_v1_cqe_clean(qcqe);
+
+ prod = ionic_queue_next(&cq->q, prod);
+ qcqe = ionic_queue_at(&cq->q, prod);
+ color = ionic_color_wrap(prod, color);
+ }
+}
+
+static void ionic_reset_qp(struct ionic_ibdev *dev, struct ionic_qp *qp)
+{
+ unsigned long irqflags;
+ struct ionic_cq *cq;
+ int i;
+
+ local_irq_save(irqflags);
+
+ if (qp->ibqp.send_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.send_cq, qp->udma_idx);
+ spin_lock(&cq->lock);
+ ionic_clean_cq(cq, qp->qpid);
+ spin_unlock(&cq->lock);
+ }
+
+ if (qp->ibqp.recv_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.recv_cq, qp->udma_idx);
+ spin_lock(&cq->lock);
+ ionic_clean_cq(cq, qp->qpid);
+ spin_unlock(&cq->lock);
+ }
+
+ if (qp->has_sq) {
+ spin_lock(&qp->sq_lock);
+ qp->sq_flush = false;
+ qp->sq_flush_rcvd = false;
+ qp->sq_msn_prod = 0;
+ qp->sq_msn_cons = 0;
+ qp->sq.prod = 0;
+ qp->sq.cons = 0;
+ spin_unlock(&qp->sq_lock);
+ }
+
+ if (qp->has_rq) {
+ spin_lock(&qp->rq_lock);
+ qp->rq_flush = false;
+ qp->rq.prod = 0;
+ qp->rq.cons = 0;
+ if (qp->rq_meta) {
+ for (i = 0; i < qp->rq.mask; ++i)
+ qp->rq_meta[i].next = &qp->rq_meta[i + 1];
+ qp->rq_meta[i].next = IONIC_META_LAST;
+ }
+ qp->rq_meta_head = &qp->rq_meta[0];
+ spin_unlock(&qp->rq_lock);
+ }
+
+ local_irq_restore(irqflags);
+}
+
+static bool ionic_qp_cur_state_is_ok(enum ib_qp_state q_state,
+ enum ib_qp_state attr_state)
+{
+ if (q_state == attr_state)
+ return true;
+
+ if (attr_state == IB_QPS_ERR)
+ return true;
+
+ if (attr_state == IB_QPS_SQE)
+ return q_state == IB_QPS_RTS || q_state == IB_QPS_SQD;
+
+ return false;
+}
+
+static int ionic_check_modify_qp(struct ionic_qp *qp, struct ib_qp_attr *attr,
+ int mask)
+{
+ enum ib_qp_state cur_state = (mask & IB_QP_CUR_STATE) ?
+ attr->cur_qp_state : qp->state;
+ enum ib_qp_state next_state = (mask & IB_QP_STATE) ?
+ attr->qp_state : cur_state;
+
+ if ((mask & IB_QP_CUR_STATE) &&
+ !ionic_qp_cur_state_is_ok(qp->state, attr->cur_qp_state))
+ return -EINVAL;
+
+ if (!ib_modify_qp_is_ok(cur_state, next_state, qp->ibqp.qp_type, mask))
+ return -EINVAL;
+
+ /* unprivileged qp not allowed privileged qkey */
+ if ((mask & IB_QP_QKEY) && (attr->qkey & 0x80000000) &&
+ qp->ibqp.uobject)
+ return -EPERM;
+
+ return 0;
+}
+
+int ionic_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
+ struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_pd *pd = to_ionic_pd(ibqp->pd);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ int rc;
+
+ rc = ionic_check_modify_qp(qp, attr, mask);
+ if (rc)
+ return rc;
+
+ if (mask & IB_QP_CAP)
+ return -EINVAL;
+
+ rc = ionic_modify_qp_cmd(dev, pd, qp, attr, mask);
+ if (rc)
+ return rc;
+
+ if (mask & IB_QP_STATE) {
+ qp->state = attr->qp_state;
+
+ if (attr->qp_state == IB_QPS_ERR) {
+ ionic_flush_qp(dev, qp);
+ ionic_notify_qp_cqs(dev, qp);
+ } else if (attr->qp_state == IB_QPS_RESET) {
+ ionic_reset_qp(dev, qp);
+ }
+ }
+
+ return 0;
+}
+
+int ionic_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int mask, struct ib_qp_init_attr *init_attr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ int rc;
+
+ memset(attr, 0, sizeof(*attr));
+ memset(init_attr, 0, sizeof(*init_attr));
+
+ rc = ionic_query_qp_cmd(dev, qp, attr, mask);
+ if (rc)
+ return rc;
+
+ if (qp->has_sq)
+ attr->cap.max_send_wr = qp->sq.mask;
+
+ if (qp->has_rq)
+ attr->cap.max_recv_wr = qp->rq.mask;
+
+ init_attr->event_handler = ibqp->event_handler;
+ init_attr->qp_context = ibqp->qp_context;
+ init_attr->send_cq = ibqp->send_cq;
+ init_attr->recv_cq = ibqp->recv_cq;
+ init_attr->srq = ibqp->srq;
+ init_attr->xrcd = ibqp->xrcd;
+ init_attr->cap = attr->cap;
+ init_attr->sq_sig_type = qp->sig_all ?
+ IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
+ init_attr->qp_type = ibqp->qp_type;
+ init_attr->create_flags = 0;
+ init_attr->port_num = 0;
+ init_attr->rwq_ind_tbl = ibqp->rwq_ind_tbl;
+ init_attr->source_qpn = 0;
+
+ return rc;
+}
+
+int ionic_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct ionic_ctx *ctx =
+ rdma_udata_to_drv_context(udata, struct ionic_ctx, ibctx);
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ unsigned long irqflags;
+ struct ionic_cq *cq;
+ int rc;
+
+ rc = ionic_destroy_qp_cmd(dev, qp->qpid);
+ if (rc)
+ return rc;
+
+ xa_erase_irq(&dev->qp_tbl, qp->qpid);
+
+ kref_put(&qp->qp_kref, ionic_qp_complete);
+ wait_for_completion(&qp->qp_rel_comp);
+
+ if (qp->ibqp.send_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.send_cq, qp->udma_idx);
+ spin_lock_irqsave(&cq->lock, irqflags);
+ ionic_clean_cq(cq, qp->qpid);
+ list_del(&qp->cq_poll_sq);
+ list_del(&qp->cq_flush_sq);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+ }
+
+ if (qp->ibqp.recv_cq) {
+ cq = to_ionic_vcq_cq(qp->ibqp.recv_cq, qp->udma_idx);
+ spin_lock_irqsave(&cq->lock, irqflags);
+ ionic_clean_cq(cq, qp->qpid);
+ list_del(&qp->cq_flush_rq);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+ }
+
+ ionic_qp_rq_destroy(dev, ctx, qp);
+ ionic_qp_sq_destroy(dev, ctx, qp);
+ if (qp->has_ah) {
+ ionic_put_ahid(dev, qp->ahid);
+ kfree(qp->hdr);
+ }
+ ionic_put_qpid(dev, qp->qpid);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_datapath.c b/drivers/infiniband/hw/ionic/ionic_datapath.c
new file mode 100644
index 000000000000..aa2944887f23
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_datapath.c
@@ -0,0 +1,1399 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "ionic_fw.h"
+#include "ionic_ibdev.h"
+
+#define IONIC_OP(version, opname) \
+ ((version) < 2 ? IONIC_V1_OP_##opname : IONIC_V2_OP_##opname)
+
+static bool ionic_next_cqe(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ struct ionic_v1_cqe **cqe)
+{
+ struct ionic_v1_cqe *qcqe = ionic_queue_at_prod(&cq->q);
+
+ if (unlikely(cq->color != ionic_v1_cqe_color(qcqe)))
+ return false;
+
+ /* Prevent out-of-order reads of the CQE */
+ dma_rmb();
+
+ *cqe = qcqe;
+
+ return true;
+}
+
+static int ionic_flush_recv(struct ionic_qp *qp, struct ib_wc *wc)
+{
+ struct ionic_rq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+
+ if (!qp->rq_flush)
+ return 0;
+
+ if (ionic_queue_empty(&qp->rq))
+ return 0;
+
+ wqe = ionic_queue_at_cons(&qp->rq);
+
+ /* wqe_id must be a valid queue index */
+ if (unlikely(wqe->base.wqe_id >> qp->rq.depth_log2)) {
+ ibdev_warn(qp->ibqp.device,
+ "flush qp %u recv index %llu invalid\n",
+ qp->qpid, (unsigned long long)wqe->base.wqe_id);
+ return -EIO;
+ }
+
+ /* wqe_id must indicate a request that is outstanding */
+ meta = &qp->rq_meta[wqe->base.wqe_id];
+ if (unlikely(meta->next != IONIC_META_POSTED)) {
+ ibdev_warn(qp->ibqp.device,
+ "flush qp %u recv index %llu not posted\n",
+ qp->qpid, (unsigned long long)wqe->base.wqe_id);
+ return -EIO;
+ }
+
+ ionic_queue_consume(&qp->rq);
+
+ memset(wc, 0, sizeof(*wc));
+
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->wr_id = meta->wrid;
+ wc->qp = &qp->ibqp;
+
+ meta->next = qp->rq_meta_head;
+ qp->rq_meta_head = meta;
+
+ return 1;
+}
+
+static int ionic_flush_recv_many(struct ionic_qp *qp,
+ struct ib_wc *wc, int nwc)
+{
+ int rc = 0, npolled = 0;
+
+ while (npolled < nwc) {
+ rc = ionic_flush_recv(qp, wc + npolled);
+ if (rc <= 0)
+ break;
+
+ npolled += rc;
+ }
+
+ return npolled ?: rc;
+}
+
+static int ionic_flush_send(struct ionic_qp *qp, struct ib_wc *wc)
+{
+ struct ionic_sq_meta *meta;
+
+ if (!qp->sq_flush)
+ return 0;
+
+ if (ionic_queue_empty(&qp->sq))
+ return 0;
+
+ meta = &qp->sq_meta[qp->sq.cons];
+
+ ionic_queue_consume(&qp->sq);
+
+ memset(wc, 0, sizeof(*wc));
+
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->wr_id = meta->wrid;
+ wc->qp = &qp->ibqp;
+
+ return 1;
+}
+
+static int ionic_flush_send_many(struct ionic_qp *qp,
+ struct ib_wc *wc, int nwc)
+{
+ int rc = 0, npolled = 0;
+
+ while (npolled < nwc) {
+ rc = ionic_flush_send(qp, wc + npolled);
+ if (rc <= 0)
+ break;
+
+ npolled += rc;
+ }
+
+ return npolled ?: rc;
+}
+
+static int ionic_poll_recv(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ struct ionic_qp *cqe_qp, struct ionic_v1_cqe *cqe,
+ struct ib_wc *wc)
+{
+ struct ionic_qp *qp = NULL;
+ struct ionic_rq_meta *meta;
+ u32 src_qpn, st_len;
+ u16 vlan_tag;
+ u8 op;
+
+ if (cqe_qp->rq_flush)
+ return 0;
+
+ qp = cqe_qp;
+
+ st_len = be32_to_cpu(cqe->status_length);
+
+ /* ignore wqe_id in case of flush error */
+ if (ionic_v1_cqe_error(cqe) && st_len == IONIC_STS_WQE_FLUSHED_ERR) {
+ cqe_qp->rq_flush = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_rq, &cq->flush_rq);
+
+ /* posted recvs (if any) flushed by ionic_flush_recv */
+ return 0;
+ }
+
+ /* there had better be something in the recv queue to complete */
+ if (ionic_queue_empty(&qp->rq)) {
+ ibdev_warn(&dev->ibdev, "qp %u is empty\n", qp->qpid);
+ return -EIO;
+ }
+
+ /* wqe_id must be a valid queue index */
+ if (unlikely(cqe->recv.wqe_id >> qp->rq.depth_log2)) {
+ ibdev_warn(&dev->ibdev,
+ "qp %u recv index %llu invalid\n",
+ qp->qpid, (unsigned long long)cqe->recv.wqe_id);
+ return -EIO;
+ }
+
+ /* wqe_id must indicate a request that is outstanding */
+ meta = &qp->rq_meta[cqe->recv.wqe_id];
+ if (unlikely(meta->next != IONIC_META_POSTED)) {
+ ibdev_warn(&dev->ibdev,
+ "qp %u recv index %llu not posted\n",
+ qp->qpid, (unsigned long long)cqe->recv.wqe_id);
+ return -EIO;
+ }
+
+ meta->next = qp->rq_meta_head;
+ qp->rq_meta_head = meta;
+
+ memset(wc, 0, sizeof(*wc));
+
+ wc->wr_id = meta->wrid;
+
+ wc->qp = &cqe_qp->ibqp;
+
+ if (ionic_v1_cqe_error(cqe)) {
+ wc->vendor_err = st_len;
+ wc->status = ionic_to_ib_status(st_len);
+
+ cqe_qp->rq_flush = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_rq, &cq->flush_rq);
+
+ ibdev_warn(&dev->ibdev,
+ "qp %d recv cqe with error\n", qp->qpid);
+ print_hex_dump(KERN_WARNING, "cqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ cqe, BIT(cq->q.stride_log2), true);
+ goto out;
+ }
+
+ wc->vendor_err = 0;
+ wc->status = IB_WC_SUCCESS;
+
+ src_qpn = be32_to_cpu(cqe->recv.src_qpn_op);
+ op = src_qpn >> IONIC_V1_CQE_RECV_OP_SHIFT;
+
+ src_qpn &= IONIC_V1_CQE_RECV_QPN_MASK;
+ op &= IONIC_V1_CQE_RECV_OP_MASK;
+
+ wc->opcode = IB_WC_RECV;
+ switch (op) {
+ case IONIC_V1_CQE_RECV_OP_RDMA_IMM:
+ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ wc->ex.imm_data = cqe->recv.imm_data_rkey; /* be32 in wc */
+ break;
+ case IONIC_V1_CQE_RECV_OP_SEND_IMM:
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ wc->ex.imm_data = cqe->recv.imm_data_rkey; /* be32 in wc */
+ break;
+ case IONIC_V1_CQE_RECV_OP_SEND_INV:
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ wc->ex.invalidate_rkey = be32_to_cpu(cqe->recv.imm_data_rkey);
+ break;
+ }
+
+ wc->byte_len = st_len;
+ wc->src_qp = src_qpn;
+
+ if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->ibqp.qp_type == IB_QPT_GSI) {
+ wc->wc_flags |= IB_WC_GRH | IB_WC_WITH_SMAC;
+ ether_addr_copy(wc->smac, cqe->recv.src_mac);
+
+ wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
+ if (ionic_v1_cqe_recv_is_ipv4(cqe))
+ wc->network_hdr_type = RDMA_NETWORK_IPV4;
+ else
+ wc->network_hdr_type = RDMA_NETWORK_IPV6;
+
+ if (ionic_v1_cqe_recv_is_vlan(cqe))
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+
+ /* vlan_tag in cqe will be valid from dpath even if no vlan */
+ vlan_tag = be16_to_cpu(cqe->recv.vlan_tag);
+ wc->vlan_id = vlan_tag & 0xfff; /* 802.1q VID */
+ wc->sl = vlan_tag >> VLAN_PRIO_SHIFT; /* 802.1q PCP */
+ }
+
+ wc->pkey_index = 0;
+ wc->port_num = 1;
+
+out:
+ ionic_queue_consume(&qp->rq);
+
+ return 1;
+}
+
+static bool ionic_peek_send(struct ionic_qp *qp)
+{
+ struct ionic_sq_meta *meta;
+
+ if (qp->sq_flush)
+ return false;
+
+ /* completed all send queue requests */
+ if (ionic_queue_empty(&qp->sq))
+ return false;
+
+ meta = &qp->sq_meta[qp->sq.cons];
+
+ /* waiting for remote completion */
+ if (meta->remote && meta->seq == qp->sq_msn_cons)
+ return false;
+
+ /* waiting for local completion */
+ if (!meta->remote && !meta->local_comp)
+ return false;
+
+ return true;
+}
+
+static int ionic_poll_send(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ struct ionic_qp *qp, struct ib_wc *wc)
+{
+ struct ionic_sq_meta *meta;
+
+ if (qp->sq_flush)
+ return 0;
+
+ do {
+ /* completed all send queue requests */
+ if (ionic_queue_empty(&qp->sq))
+ goto out_empty;
+
+ meta = &qp->sq_meta[qp->sq.cons];
+
+ /* waiting for remote completion */
+ if (meta->remote && meta->seq == qp->sq_msn_cons)
+ goto out_empty;
+
+ /* waiting for local completion */
+ if (!meta->remote && !meta->local_comp)
+ goto out_empty;
+
+ ionic_queue_consume(&qp->sq);
+
+ /* produce wc only if signaled or error status */
+ } while (!meta->signal && meta->ibsts == IB_WC_SUCCESS);
+
+ memset(wc, 0, sizeof(*wc));
+
+ wc->status = meta->ibsts;
+ wc->wr_id = meta->wrid;
+ wc->qp = &qp->ibqp;
+
+ if (meta->ibsts == IB_WC_SUCCESS) {
+ wc->byte_len = meta->len;
+ wc->opcode = meta->ibop;
+ } else {
+ wc->vendor_err = meta->len;
+
+ qp->sq_flush = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_sq, &cq->flush_sq);
+ }
+
+ return 1;
+
+out_empty:
+ if (qp->sq_flush_rcvd) {
+ qp->sq_flush = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_sq, &cq->flush_sq);
+ }
+ return 0;
+}
+
+static int ionic_poll_send_many(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ struct ionic_qp *qp, struct ib_wc *wc, int nwc)
+{
+ int rc = 0, npolled = 0;
+
+ while (npolled < nwc) {
+ rc = ionic_poll_send(dev, cq, qp, wc + npolled);
+ if (rc <= 0)
+ break;
+
+ npolled += rc;
+ }
+
+ return npolled ?: rc;
+}
+
+static int ionic_validate_cons(u16 prod, u16 cons,
+ u16 comp, u16 mask)
+{
+ if (((prod - cons) & mask) <= ((comp - cons) & mask))
+ return -EIO;
+
+ return 0;
+}
+
+static int ionic_comp_msn(struct ionic_qp *qp, struct ionic_v1_cqe *cqe)
+{
+ struct ionic_sq_meta *meta;
+ u16 cqe_seq, cqe_idx;
+ int rc;
+
+ if (qp->sq_flush)
+ return 0;
+
+ cqe_seq = be32_to_cpu(cqe->send.msg_msn) & qp->sq.mask;
+
+ rc = ionic_validate_cons(qp->sq_msn_prod,
+ qp->sq_msn_cons,
+ cqe_seq - 1,
+ qp->sq.mask);
+ if (rc) {
+ ibdev_warn(qp->ibqp.device,
+ "qp %u bad msn %#x seq %u for prod %u cons %u\n",
+ qp->qpid, be32_to_cpu(cqe->send.msg_msn),
+ cqe_seq, qp->sq_msn_prod, qp->sq_msn_cons);
+ return rc;
+ }
+
+ qp->sq_msn_cons = cqe_seq;
+
+ if (ionic_v1_cqe_error(cqe)) {
+ cqe_idx = qp->sq_msn_idx[(cqe_seq - 1) & qp->sq.mask];
+
+ meta = &qp->sq_meta[cqe_idx];
+ meta->len = be32_to_cpu(cqe->status_length);
+ meta->ibsts = ionic_to_ib_status(meta->len);
+
+ ibdev_warn(qp->ibqp.device,
+ "qp %d msn cqe with error\n", qp->qpid);
+ print_hex_dump(KERN_WARNING, "cqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ cqe, sizeof(*cqe), true);
+ }
+
+ return 0;
+}
+
+static int ionic_comp_npg(struct ionic_qp *qp, struct ionic_v1_cqe *cqe)
+{
+ struct ionic_sq_meta *meta;
+ u16 cqe_idx;
+ u32 st_len;
+
+ if (qp->sq_flush)
+ return 0;
+
+ st_len = be32_to_cpu(cqe->status_length);
+
+ if (ionic_v1_cqe_error(cqe) && st_len == IONIC_STS_WQE_FLUSHED_ERR) {
+ /*
+ * Flush cqe does not consume a wqe on the device, and maybe
+ * no such work request is posted.
+ *
+ * The driver should begin flushing after the last indicated
+ * normal or error completion. Here, only set a hint that the
+ * flush request was indicated. In poll_send, if nothing more
+ * can be polled normally, then begin flushing.
+ */
+ qp->sq_flush_rcvd = true;
+ return 0;
+ }
+
+ cqe_idx = cqe->send.npg_wqe_id & qp->sq.mask;
+ meta = &qp->sq_meta[cqe_idx];
+ meta->local_comp = true;
+
+ if (ionic_v1_cqe_error(cqe)) {
+ meta->len = st_len;
+ meta->ibsts = ionic_to_ib_status(st_len);
+ meta->remote = false;
+ ibdev_warn(qp->ibqp.device,
+ "qp %d npg cqe with error\n", qp->qpid);
+ print_hex_dump(KERN_WARNING, "cqe ", DUMP_PREFIX_OFFSET, 16, 1,
+ cqe, sizeof(*cqe), true);
+ }
+
+ return 0;
+}
+
+static void ionic_reserve_sync_cq(struct ionic_ibdev *dev, struct ionic_cq *cq)
+{
+ if (!ionic_queue_empty(&cq->q)) {
+ cq->credit += ionic_queue_length(&cq->q);
+ cq->q.cons = cq->q.prod;
+
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype,
+ ionic_queue_dbell_val(&cq->q));
+ }
+}
+
+static void ionic_reserve_cq(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ int spend)
+{
+ cq->credit -= spend;
+
+ if (cq->credit <= 0)
+ ionic_reserve_sync_cq(dev, cq);
+}
+
+static int ionic_poll_vcq_cq(struct ionic_ibdev *dev,
+ struct ionic_cq *cq,
+ int nwc, struct ib_wc *wc)
+{
+ struct ionic_qp *qp, *qp_next;
+ struct ionic_v1_cqe *cqe;
+ int rc = 0, npolled = 0;
+ unsigned long irqflags;
+ u32 qtf, qid;
+ bool peek;
+ u8 type;
+
+ if (nwc < 1)
+ return 0;
+
+ spin_lock_irqsave(&cq->lock, irqflags);
+
+ /* poll already indicated work completions for send queue */
+ list_for_each_entry_safe(qp, qp_next, &cq->poll_sq, cq_poll_sq) {
+ if (npolled == nwc)
+ goto out;
+
+ spin_lock(&qp->sq_lock);
+ rc = ionic_poll_send_many(dev, cq, qp, wc + npolled,
+ nwc - npolled);
+ spin_unlock(&qp->sq_lock);
+
+ if (rc > 0)
+ npolled += rc;
+
+ if (npolled < nwc)
+ list_del_init(&qp->cq_poll_sq);
+ }
+
+ /* poll for more work completions */
+ while (likely(ionic_next_cqe(dev, cq, &cqe))) {
+ if (npolled == nwc)
+ goto out;
+
+ qtf = ionic_v1_cqe_qtf(cqe);
+ qid = ionic_v1_cqe_qtf_qid(qtf);
+ type = ionic_v1_cqe_qtf_type(qtf);
+
+ /*
+ * Safe to access QP without additional reference here as,
+ * 1. We hold cq->lock throughout
+ * 2. ionic_destroy_qp() acquires the same cq->lock before cleanup
+ * 3. QP is removed from qp_tbl before any cleanup begins
+ * This ensures no concurrent access between polling and destruction.
+ */
+ qp = xa_load(&dev->qp_tbl, qid);
+ if (unlikely(!qp)) {
+ ibdev_dbg(&dev->ibdev, "missing qp for qid %u\n", qid);
+ goto cq_next;
+ }
+
+ switch (type) {
+ case IONIC_V1_CQE_TYPE_RECV:
+ spin_lock(&qp->rq_lock);
+ rc = ionic_poll_recv(dev, cq, qp, cqe, wc + npolled);
+ spin_unlock(&qp->rq_lock);
+
+ if (rc < 0)
+ goto out;
+
+ npolled += rc;
+
+ break;
+
+ case IONIC_V1_CQE_TYPE_SEND_MSN:
+ spin_lock(&qp->sq_lock);
+ rc = ionic_comp_msn(qp, cqe);
+ if (!rc) {
+ rc = ionic_poll_send_many(dev, cq, qp,
+ wc + npolled,
+ nwc - npolled);
+ peek = ionic_peek_send(qp);
+ }
+ spin_unlock(&qp->sq_lock);
+
+ if (rc < 0)
+ goto out;
+
+ npolled += rc;
+
+ if (peek)
+ list_move_tail(&qp->cq_poll_sq, &cq->poll_sq);
+ break;
+
+ case IONIC_V1_CQE_TYPE_SEND_NPG:
+ spin_lock(&qp->sq_lock);
+ rc = ionic_comp_npg(qp, cqe);
+ if (!rc) {
+ rc = ionic_poll_send_many(dev, cq, qp,
+ wc + npolled,
+ nwc - npolled);
+ peek = ionic_peek_send(qp);
+ }
+ spin_unlock(&qp->sq_lock);
+
+ if (rc < 0)
+ goto out;
+
+ npolled += rc;
+
+ if (peek)
+ list_move_tail(&qp->cq_poll_sq, &cq->poll_sq);
+ break;
+
+ default:
+ ibdev_warn(&dev->ibdev,
+ "unexpected cqe type %u\n", type);
+ rc = -EIO;
+ goto out;
+ }
+
+cq_next:
+ ionic_queue_produce(&cq->q);
+ cq->color = ionic_color_wrap(cq->q.prod, cq->color);
+ }
+
+ /* lastly, flush send and recv queues */
+ if (likely(!cq->flush))
+ goto out;
+
+ cq->flush = false;
+
+ list_for_each_entry_safe(qp, qp_next, &cq->flush_sq, cq_flush_sq) {
+ if (npolled == nwc)
+ goto out;
+
+ spin_lock(&qp->sq_lock);
+ rc = ionic_flush_send_many(qp, wc + npolled, nwc - npolled);
+ spin_unlock(&qp->sq_lock);
+
+ if (rc > 0)
+ npolled += rc;
+
+ if (npolled < nwc)
+ list_del_init(&qp->cq_flush_sq);
+ else
+ cq->flush = true;
+ }
+
+ list_for_each_entry_safe(qp, qp_next, &cq->flush_rq, cq_flush_rq) {
+ if (npolled == nwc)
+ goto out;
+
+ spin_lock(&qp->rq_lock);
+ rc = ionic_flush_recv_many(qp, wc + npolled, nwc - npolled);
+ spin_unlock(&qp->rq_lock);
+
+ if (rc > 0)
+ npolled += rc;
+
+ if (npolled < nwc)
+ list_del_init(&qp->cq_flush_rq);
+ else
+ cq->flush = true;
+ }
+
+out:
+ /* in case credit was depleted (more work posted than cq depth) */
+ if (cq->credit <= 0)
+ ionic_reserve_sync_cq(dev, cq);
+
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+
+ return npolled ?: rc;
+}
+
+int ionic_poll_cq(struct ib_cq *ibcq, int nwc, struct ib_wc *wc)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibcq);
+ int rc_tmp, rc = 0, npolled = 0;
+ int cq_i, cq_x, cq_ix;
+
+ cq_x = vcq->poll_idx;
+ vcq->poll_idx ^= dev->lif_cfg.udma_count - 1;
+
+ for (cq_i = 0; npolled < nwc && cq_i < dev->lif_cfg.udma_count; ++cq_i) {
+ cq_ix = cq_i ^ cq_x;
+
+ if (!(vcq->udma_mask & BIT(cq_ix)))
+ continue;
+
+ rc_tmp = ionic_poll_vcq_cq(dev, &vcq->cq[cq_ix],
+ nwc - npolled,
+ wc + npolled);
+
+ if (rc_tmp >= 0)
+ npolled += rc_tmp;
+ else if (!rc)
+ rc = rc_tmp;
+ }
+
+ return npolled ?: rc;
+}
+
+static int ionic_req_notify_vcq_cq(struct ionic_ibdev *dev, struct ionic_cq *cq,
+ enum ib_cq_notify_flags flags)
+{
+ u64 dbell_val = cq->q.dbell;
+
+ if (flags & IB_CQ_SOLICITED) {
+ cq->arm_sol_prod = ionic_queue_next(&cq->q, cq->arm_sol_prod);
+ dbell_val |= cq->arm_sol_prod | IONIC_CQ_RING_SOL;
+ } else {
+ cq->arm_any_prod = ionic_queue_next(&cq->q, cq->arm_any_prod);
+ dbell_val |= cq->arm_any_prod | IONIC_CQ_RING_ARM;
+ }
+
+ ionic_reserve_sync_cq(dev, cq);
+
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.cq_qtype, dbell_val);
+
+ /*
+ * IB_CQ_REPORT_MISSED_EVENTS:
+ *
+ * The queue index in ring zero guarantees no missed events.
+ *
+ * Here, we check if the color bit in the next cqe is flipped. If it
+ * is flipped, then progress can be made by immediately polling the cq.
+ * Still, the cq will be armed, and an event will be generated. The cq
+ * may be empty when polled after the event, because the next poll
+ * after arming the cq can empty it.
+ */
+ return (flags & IB_CQ_REPORT_MISSED_EVENTS) &&
+ cq->color == ionic_v1_cqe_color(ionic_queue_at_prod(&cq->q));
+}
+
+int ionic_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibcq->device);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibcq);
+ int rc = 0, cq_i;
+
+ for (cq_i = 0; cq_i < dev->lif_cfg.udma_count; ++cq_i) {
+ if (!(vcq->udma_mask & BIT(cq_i)))
+ continue;
+
+ if (ionic_req_notify_vcq_cq(dev, &vcq->cq[cq_i], flags))
+ rc = 1;
+ }
+
+ return rc;
+}
+
+static s64 ionic_prep_inline(void *data, u32 max_data,
+ const struct ib_sge *ib_sgl, int num_sge)
+{
+ static const s64 bit_31 = 1u << 31;
+ s64 len = 0, sg_len;
+ int sg_i;
+
+ for (sg_i = 0; sg_i < num_sge; ++sg_i) {
+ sg_len = ib_sgl[sg_i].length;
+
+ /* sge length zero means 2GB */
+ if (unlikely(sg_len == 0))
+ sg_len = bit_31;
+
+ /* greater than max inline data is invalid */
+ if (unlikely(len + sg_len > max_data))
+ return -EINVAL;
+
+ memcpy(data + len, (void *)ib_sgl[sg_i].addr, sg_len);
+
+ len += sg_len;
+ }
+
+ return len;
+}
+
+static s64 ionic_prep_pld(struct ionic_v1_wqe *wqe,
+ union ionic_v1_pld *pld,
+ int spec, u32 max_sge,
+ const struct ib_sge *ib_sgl,
+ int num_sge)
+{
+ static const s64 bit_31 = 1l << 31;
+ struct ionic_sge *sgl;
+ __be32 *spec32 = NULL;
+ __be16 *spec16 = NULL;
+ s64 len = 0, sg_len;
+ int sg_i = 0;
+
+ if (unlikely(num_sge < 0 || (u32)num_sge > max_sge))
+ return -EINVAL;
+
+ if (spec && num_sge > IONIC_V1_SPEC_FIRST_SGE) {
+ sg_i = IONIC_V1_SPEC_FIRST_SGE;
+
+ if (num_sge > 8) {
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SPEC16);
+ spec16 = pld->spec16;
+ } else {
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SPEC32);
+ spec32 = pld->spec32;
+ }
+ }
+
+ sgl = &pld->sgl[sg_i];
+
+ for (sg_i = 0; sg_i < num_sge; ++sg_i) {
+ sg_len = ib_sgl[sg_i].length;
+
+ /* sge length zero means 2GB */
+ if (unlikely(sg_len == 0))
+ sg_len = bit_31;
+
+ /* greater than 2GB data is invalid */
+ if (unlikely(len + sg_len > bit_31))
+ return -EINVAL;
+
+ sgl[sg_i].va = cpu_to_be64(ib_sgl[sg_i].addr);
+ sgl[sg_i].len = cpu_to_be32(sg_len);
+ sgl[sg_i].lkey = cpu_to_be32(ib_sgl[sg_i].lkey);
+
+ if (spec32) {
+ spec32[sg_i] = sgl[sg_i].len;
+ } else if (spec16) {
+ if (unlikely(sg_len > U16_MAX))
+ return -EINVAL;
+ spec16[sg_i] = cpu_to_be16(sg_len);
+ }
+
+ len += sg_len;
+ }
+
+ return len;
+}
+
+static void ionic_prep_base(struct ionic_qp *qp,
+ const struct ib_send_wr *wr,
+ struct ionic_sq_meta *meta,
+ struct ionic_v1_wqe *wqe)
+{
+ meta->wrid = wr->wr_id;
+ meta->ibsts = IB_WC_SUCCESS;
+ meta->signal = false;
+ meta->local_comp = false;
+
+ wqe->base.wqe_id = qp->sq.prod;
+
+ if (wr->send_flags & IB_SEND_FENCE)
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_FENCE);
+
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SOL);
+
+ if (qp->sig_all || wr->send_flags & IB_SEND_SIGNALED) {
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_SIG);
+ meta->signal = true;
+ }
+
+ meta->seq = qp->sq_msn_prod;
+ meta->remote =
+ qp->ibqp.qp_type != IB_QPT_UD &&
+ qp->ibqp.qp_type != IB_QPT_GSI &&
+ !ionic_ibop_is_local(wr->opcode);
+
+ if (meta->remote) {
+ qp->sq_msn_idx[meta->seq] = qp->sq.prod;
+ qp->sq_msn_prod = ionic_queue_next(&qp->sq, qp->sq_msn_prod);
+ }
+
+ ionic_queue_produce(&qp->sq);
+}
+
+static int ionic_prep_common(struct ionic_qp *qp,
+ const struct ib_send_wr *wr,
+ struct ionic_sq_meta *meta,
+ struct ionic_v1_wqe *wqe)
+{
+ s64 signed_len;
+ u32 mval;
+
+ if (wr->send_flags & IB_SEND_INLINE) {
+ wqe->base.num_sge_key = 0;
+ wqe->base.flags |= cpu_to_be16(IONIC_V1_FLAG_INL);
+ mval = ionic_v1_send_wqe_max_data(qp->sq.stride_log2, false);
+ signed_len = ionic_prep_inline(wqe->common.pld.data, mval,
+ wr->sg_list, wr->num_sge);
+ } else {
+ wqe->base.num_sge_key = wr->num_sge;
+ mval = ionic_v1_send_wqe_max_sge(qp->sq.stride_log2,
+ qp->sq_spec,
+ false);
+ signed_len = ionic_prep_pld(wqe, &wqe->common.pld,
+ qp->sq_spec, mval,
+ wr->sg_list, wr->num_sge);
+ }
+
+ if (unlikely(signed_len < 0))
+ return signed_len;
+
+ meta->len = signed_len;
+ wqe->common.length = cpu_to_be32(signed_len);
+
+ ionic_prep_base(qp, wr, meta, wqe);
+
+ return 0;
+}
+
+static void ionic_prep_sq_wqe(struct ionic_qp *qp, void *wqe)
+{
+ memset(wqe, 0, 1u << qp->sq.stride_log2);
+}
+
+static void ionic_prep_rq_wqe(struct ionic_qp *qp, void *wqe)
+{
+ memset(wqe, 0, 1u << qp->rq.stride_log2);
+}
+
+static int ionic_prep_send(struct ionic_qp *qp,
+ const struct ib_send_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ meta->ibop = IB_WC_SEND;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND);
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND_IMM);
+ wqe->base.imm_data_key = wr->ex.imm_data;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND_INV);
+ wqe->base.imm_data_key =
+ cpu_to_be32(wr->ex.invalidate_rkey);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ionic_prep_common(qp, wr, meta, wqe);
+}
+
+static int ionic_prep_send_ud(struct ionic_qp *qp,
+ const struct ib_ud_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+ struct ionic_ah *ah;
+
+ if (unlikely(!wr->ah))
+ return -EINVAL;
+
+ ah = to_ionic_ah(wr->ah);
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ wqe->common.send.ah_id = cpu_to_be32(ah->ahid);
+ wqe->common.send.dest_qpn = cpu_to_be32(wr->remote_qpn);
+ wqe->common.send.dest_qkey = cpu_to_be32(wr->remote_qkey);
+
+ meta->ibop = IB_WC_SEND;
+
+ switch (wr->wr.opcode) {
+ case IB_WR_SEND:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND);
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, SEND_IMM);
+ wqe->base.imm_data_key = wr->wr.ex.imm_data;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ionic_prep_common(qp, &wr->wr, meta, wqe);
+}
+
+static int ionic_prep_rdma(struct ionic_qp *qp,
+ const struct ib_rdma_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ meta->ibop = IB_WC_RDMA_WRITE;
+
+ switch (wr->wr.opcode) {
+ case IB_WR_RDMA_READ:
+ if (wr->wr.send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE))
+ return -EINVAL;
+ meta->ibop = IB_WC_RDMA_READ;
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, RDMA_READ);
+ break;
+ case IB_WR_RDMA_WRITE:
+ if (wr->wr.send_flags & IB_SEND_SOLICITED)
+ return -EINVAL;
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, RDMA_WRITE);
+ break;
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, RDMA_WRITE_IMM);
+ wqe->base.imm_data_key = wr->wr.ex.imm_data;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ wqe->common.rdma.remote_va_high = cpu_to_be32(wr->remote_addr >> 32);
+ wqe->common.rdma.remote_va_low = cpu_to_be32(wr->remote_addr);
+ wqe->common.rdma.remote_rkey = cpu_to_be32(wr->rkey);
+
+ return ionic_prep_common(qp, &wr->wr, meta, wqe);
+}
+
+static int ionic_prep_atomic(struct ionic_qp *qp,
+ const struct ib_atomic_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+
+ if (wr->wr.num_sge != 1 || wr->wr.sg_list[0].length != 8)
+ return -EINVAL;
+
+ if (wr->wr.send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE))
+ return -EINVAL;
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ meta->ibop = IB_WC_RDMA_WRITE;
+
+ switch (wr->wr.opcode) {
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ meta->ibop = IB_WC_COMP_SWAP;
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, ATOMIC_CS);
+ wqe->atomic.swap_add_high = cpu_to_be32(wr->swap >> 32);
+ wqe->atomic.swap_add_low = cpu_to_be32(wr->swap);
+ wqe->atomic.compare_high = cpu_to_be32(wr->compare_add >> 32);
+ wqe->atomic.compare_low = cpu_to_be32(wr->compare_add);
+ break;
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ meta->ibop = IB_WC_FETCH_ADD;
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, ATOMIC_FA);
+ wqe->atomic.swap_add_high = cpu_to_be32(wr->compare_add >> 32);
+ wqe->atomic.swap_add_low = cpu_to_be32(wr->compare_add);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ wqe->atomic.remote_va_high = cpu_to_be32(wr->remote_addr >> 32);
+ wqe->atomic.remote_va_low = cpu_to_be32(wr->remote_addr);
+ wqe->atomic.remote_rkey = cpu_to_be32(wr->rkey);
+
+ wqe->base.num_sge_key = 1;
+ wqe->atomic.sge.va = cpu_to_be64(wr->wr.sg_list[0].addr);
+ wqe->atomic.sge.len = cpu_to_be32(8);
+ wqe->atomic.sge.lkey = cpu_to_be32(wr->wr.sg_list[0].lkey);
+
+ return ionic_prep_common(qp, &wr->wr, meta, wqe);
+}
+
+static int ionic_prep_inv(struct ionic_qp *qp,
+ const struct ib_send_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+
+ if (wr->send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE))
+ return -EINVAL;
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, LOCAL_INV);
+ wqe->base.imm_data_key = cpu_to_be32(wr->ex.invalidate_rkey);
+
+ meta->len = 0;
+ meta->ibop = IB_WC_LOCAL_INV;
+
+ ionic_prep_base(qp, wr, meta, wqe);
+
+ return 0;
+}
+
+static int ionic_prep_reg(struct ionic_qp *qp,
+ const struct ib_reg_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ struct ionic_mr *mr = to_ionic_mr(wr->mr);
+ struct ionic_sq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+ __le64 dma_addr;
+ int flags;
+
+ if (wr->wr.send_flags & (IB_SEND_SOLICITED | IB_SEND_INLINE))
+ return -EINVAL;
+
+ /* must call ib_map_mr_sg before posting reg wr */
+ if (!mr->buf.tbl_pages)
+ return -EINVAL;
+
+ meta = &qp->sq_meta[qp->sq.prod];
+ wqe = ionic_queue_at_prod(&qp->sq);
+
+ ionic_prep_sq_wqe(qp, wqe);
+
+ flags = to_ionic_mr_flags(wr->access);
+
+ wqe->base.op = IONIC_OP(dev->lif_cfg.rdma_version, REG_MR);
+ wqe->base.num_sge_key = wr->key;
+ wqe->base.imm_data_key = cpu_to_be32(mr->ibmr.lkey);
+ wqe->reg_mr.va = cpu_to_be64(mr->ibmr.iova);
+ wqe->reg_mr.length = cpu_to_be64(mr->ibmr.length);
+ wqe->reg_mr.offset = ionic_pgtbl_off(&mr->buf, mr->ibmr.iova);
+ dma_addr = ionic_pgtbl_dma(&mr->buf, mr->ibmr.iova);
+ wqe->reg_mr.dma_addr = cpu_to_be64(le64_to_cpu(dma_addr));
+
+ wqe->reg_mr.map_count = cpu_to_be32(mr->buf.tbl_pages);
+ wqe->reg_mr.flags = cpu_to_be16(flags);
+ wqe->reg_mr.dir_size_log2 = 0;
+ wqe->reg_mr.page_size_log2 = order_base_2(mr->ibmr.page_size);
+
+ meta->len = 0;
+ meta->ibop = IB_WC_REG_MR;
+
+ ionic_prep_base(qp, &wr->wr, meta, wqe);
+
+ return 0;
+}
+
+static int ionic_prep_one_rc(struct ionic_qp *qp,
+ const struct ib_send_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ int rc = 0;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_SEND_WITH_INV:
+ rc = ionic_prep_send(qp, wr);
+ break;
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ rc = ionic_prep_rdma(qp, rdma_wr(wr));
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ rc = ionic_prep_atomic(qp, atomic_wr(wr));
+ break;
+ case IB_WR_LOCAL_INV:
+ rc = ionic_prep_inv(qp, wr);
+ break;
+ case IB_WR_REG_MR:
+ rc = ionic_prep_reg(qp, reg_wr(wr));
+ break;
+ default:
+ ibdev_dbg(&dev->ibdev, "invalid opcode %d\n", wr->opcode);
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+static int ionic_prep_one_ud(struct ionic_qp *qp,
+ const struct ib_send_wr *wr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(qp->ibqp.device);
+ int rc = 0;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ rc = ionic_prep_send_ud(qp, ud_wr(wr));
+ break;
+ default:
+ ibdev_dbg(&dev->ibdev, "invalid opcode %d\n", wr->opcode);
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+static int ionic_prep_recv(struct ionic_qp *qp,
+ const struct ib_recv_wr *wr)
+{
+ struct ionic_rq_meta *meta;
+ struct ionic_v1_wqe *wqe;
+ s64 signed_len;
+ u32 mval;
+
+ wqe = ionic_queue_at_prod(&qp->rq);
+
+ /* if wqe is owned by device, caller can try posting again soon */
+ if (wqe->base.flags & cpu_to_be16(IONIC_V1_FLAG_FENCE))
+ return -EAGAIN;
+
+ meta = qp->rq_meta_head;
+ if (unlikely(meta == IONIC_META_LAST) ||
+ unlikely(meta == IONIC_META_POSTED))
+ return -EIO;
+
+ ionic_prep_rq_wqe(qp, wqe);
+
+ mval = ionic_v1_recv_wqe_max_sge(qp->rq.stride_log2, qp->rq_spec,
+ false);
+ signed_len = ionic_prep_pld(wqe, &wqe->recv.pld,
+ qp->rq_spec, mval,
+ wr->sg_list, wr->num_sge);
+ if (signed_len < 0)
+ return signed_len;
+
+ meta->wrid = wr->wr_id;
+
+ wqe->base.wqe_id = meta - qp->rq_meta;
+ wqe->base.num_sge_key = wr->num_sge;
+
+ /* total length for recv goes in base imm_data_key */
+ wqe->base.imm_data_key = cpu_to_be32(signed_len);
+
+ ionic_queue_produce(&qp->rq);
+
+ qp->rq_meta_head = meta->next;
+ meta->next = IONIC_META_POSTED;
+
+ return 0;
+}
+
+static int ionic_post_send_common(struct ionic_ibdev *dev,
+ struct ionic_vcq *vcq,
+ struct ionic_cq *cq,
+ struct ionic_qp *qp,
+ const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad)
+{
+ unsigned long irqflags;
+ bool notify = false;
+ int spend, rc = 0;
+
+ if (!bad)
+ return -EINVAL;
+
+ if (!qp->has_sq) {
+ *bad = wr;
+ return -EINVAL;
+ }
+
+ if (qp->state < IB_QPS_RTS) {
+ *bad = wr;
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&qp->sq_lock, irqflags);
+
+ while (wr) {
+ if (ionic_queue_full(&qp->sq)) {
+ ibdev_dbg(&dev->ibdev, "queue full");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->ibqp.qp_type == IB_QPT_GSI)
+ rc = ionic_prep_one_ud(qp, wr);
+ else
+ rc = ionic_prep_one_rc(qp, wr);
+ if (rc)
+ goto out;
+
+ wr = wr->next;
+ }
+
+out:
+ spin_unlock_irqrestore(&qp->sq_lock, irqflags);
+
+ spin_lock_irqsave(&cq->lock, irqflags);
+ spin_lock(&qp->sq_lock);
+
+ if (likely(qp->sq.prod != qp->sq_old_prod)) {
+ /* ring cq doorbell just in time */
+ spend = (qp->sq.prod - qp->sq_old_prod) & qp->sq.mask;
+ ionic_reserve_cq(dev, cq, spend);
+
+ qp->sq_old_prod = qp->sq.prod;
+
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.sq_qtype,
+ ionic_queue_dbell_val(&qp->sq));
+ }
+
+ if (qp->sq_flush) {
+ notify = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_sq, &cq->flush_sq);
+ }
+
+ spin_unlock(&qp->sq_lock);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+
+ if (notify && vcq->ibcq.comp_handler)
+ vcq->ibcq.comp_handler(&vcq->ibcq, vcq->ibcq.cq_context);
+
+ *bad = wr;
+ return rc;
+}
+
+static int ionic_post_recv_common(struct ionic_ibdev *dev,
+ struct ionic_vcq *vcq,
+ struct ionic_cq *cq,
+ struct ionic_qp *qp,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad)
+{
+ unsigned long irqflags;
+ bool notify = false;
+ int spend, rc = 0;
+
+ if (!bad)
+ return -EINVAL;
+
+ if (!qp->has_rq) {
+ *bad = wr;
+ return -EINVAL;
+ }
+
+ if (qp->state < IB_QPS_INIT) {
+ *bad = wr;
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&qp->rq_lock, irqflags);
+
+ while (wr) {
+ if (ionic_queue_full(&qp->rq)) {
+ ibdev_dbg(&dev->ibdev, "queue full");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ rc = ionic_prep_recv(qp, wr);
+ if (rc)
+ goto out;
+
+ wr = wr->next;
+ }
+
+out:
+ if (!cq) {
+ spin_unlock_irqrestore(&qp->rq_lock, irqflags);
+ goto out_unlocked;
+ }
+ spin_unlock_irqrestore(&qp->rq_lock, irqflags);
+
+ spin_lock_irqsave(&cq->lock, irqflags);
+ spin_lock(&qp->rq_lock);
+
+ if (likely(qp->rq.prod != qp->rq_old_prod)) {
+ /* ring cq doorbell just in time */
+ spend = (qp->rq.prod - qp->rq_old_prod) & qp->rq.mask;
+ ionic_reserve_cq(dev, cq, spend);
+
+ qp->rq_old_prod = qp->rq.prod;
+
+ ionic_dbell_ring(dev->lif_cfg.dbpage, dev->lif_cfg.rq_qtype,
+ ionic_queue_dbell_val(&qp->rq));
+ }
+
+ if (qp->rq_flush) {
+ notify = true;
+ cq->flush = true;
+ list_move_tail(&qp->cq_flush_rq, &cq->flush_rq);
+ }
+
+ spin_unlock(&qp->rq_lock);
+ spin_unlock_irqrestore(&cq->lock, irqflags);
+
+ if (notify && vcq->ibcq.comp_handler)
+ vcq->ibcq.comp_handler(&vcq->ibcq, vcq->ibcq.cq_context);
+
+out_unlocked:
+ *bad = wr;
+ return rc;
+}
+
+int ionic_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibqp->send_cq);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ struct ionic_cq *cq =
+ to_ionic_vcq_cq(ibqp->send_cq, qp->udma_idx);
+
+ return ionic_post_send_common(dev, vcq, cq, qp, wr, bad);
+}
+
+int ionic_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibqp->device);
+ struct ionic_vcq *vcq = to_ionic_vcq(ibqp->recv_cq);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ struct ionic_cq *cq =
+ to_ionic_vcq_cq(ibqp->recv_cq, qp->udma_idx);
+
+ return ionic_post_recv_common(dev, vcq, cq, qp, wr, bad);
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_fw.h b/drivers/infiniband/hw/ionic/ionic_fw.h
new file mode 100644
index 000000000000..adfbb89d856c
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_fw.h
@@ -0,0 +1,1029 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_FW_H_
+#define _IONIC_FW_H_
+
+#include <linux/kernel.h>
+#include <rdma/ib_verbs.h>
+
+/* common for ib spec */
+
+#define IONIC_EXP_DBELL_SZ 8
+
+enum ionic_mrid_bits {
+ IONIC_MRID_INDEX_SHIFT = 8,
+};
+
+static inline u32 ionic_mrid(u32 index, u8 key)
+{
+ return (index << IONIC_MRID_INDEX_SHIFT) | key;
+}
+
+static inline u32 ionic_mrid_index(u32 lrkey)
+{
+ return lrkey >> IONIC_MRID_INDEX_SHIFT;
+}
+
+/* common to all versions */
+
+/* wqe scatter gather element */
+struct ionic_sge {
+ __be64 va;
+ __be32 len;
+ __be32 lkey;
+};
+
+/* admin queue mr type */
+enum ionic_mr_flags {
+ /* bits that determine mr access */
+ IONIC_MRF_LOCAL_WRITE = BIT(0),
+ IONIC_MRF_REMOTE_WRITE = BIT(1),
+ IONIC_MRF_REMOTE_READ = BIT(2),
+ IONIC_MRF_REMOTE_ATOMIC = BIT(3),
+ IONIC_MRF_MW_BIND = BIT(4),
+ IONIC_MRF_ZERO_BASED = BIT(5),
+ IONIC_MRF_ON_DEMAND = BIT(6),
+ IONIC_MRF_PB = BIT(7),
+ IONIC_MRF_ACCESS_MASK = BIT(12) - 1,
+
+ /* bits that determine mr type */
+ IONIC_MRF_UKEY_EN = BIT(13),
+ IONIC_MRF_IS_MW = BIT(14),
+ IONIC_MRF_INV_EN = BIT(15),
+
+ /* base flags combinations for mr types */
+ IONIC_MRF_USER_MR = 0,
+ IONIC_MRF_PHYS_MR = (IONIC_MRF_UKEY_EN |
+ IONIC_MRF_INV_EN),
+ IONIC_MRF_MW_1 = (IONIC_MRF_UKEY_EN |
+ IONIC_MRF_IS_MW),
+ IONIC_MRF_MW_2 = (IONIC_MRF_UKEY_EN |
+ IONIC_MRF_IS_MW |
+ IONIC_MRF_INV_EN),
+};
+
+static inline int to_ionic_mr_flags(int access)
+{
+ int flags = 0;
+
+ if (access & IB_ACCESS_LOCAL_WRITE)
+ flags |= IONIC_MRF_LOCAL_WRITE;
+
+ if (access & IB_ACCESS_REMOTE_READ)
+ flags |= IONIC_MRF_REMOTE_READ;
+
+ if (access & IB_ACCESS_REMOTE_WRITE)
+ flags |= IONIC_MRF_REMOTE_WRITE;
+
+ if (access & IB_ACCESS_REMOTE_ATOMIC)
+ flags |= IONIC_MRF_REMOTE_ATOMIC;
+
+ if (access & IB_ACCESS_MW_BIND)
+ flags |= IONIC_MRF_MW_BIND;
+
+ if (access & IB_ZERO_BASED)
+ flags |= IONIC_MRF_ZERO_BASED;
+
+ return flags;
+}
+
+enum ionic_qp_flags {
+ /* bits that determine qp access */
+ IONIC_QPF_REMOTE_WRITE = BIT(0),
+ IONIC_QPF_REMOTE_READ = BIT(1),
+ IONIC_QPF_REMOTE_ATOMIC = BIT(2),
+
+ /* bits that determine other qp behavior */
+ IONIC_QPF_SQ_PB = BIT(6),
+ IONIC_QPF_RQ_PB = BIT(7),
+ IONIC_QPF_SQ_SPEC = BIT(8),
+ IONIC_QPF_RQ_SPEC = BIT(9),
+ IONIC_QPF_REMOTE_PRIVILEGED = BIT(10),
+ IONIC_QPF_SQ_DRAINING = BIT(11),
+ IONIC_QPF_SQD_NOTIFY = BIT(12),
+ IONIC_QPF_SQ_CMB = BIT(13),
+ IONIC_QPF_RQ_CMB = BIT(14),
+ IONIC_QPF_PRIVILEGED = BIT(15),
+};
+
+static inline int from_ionic_qp_flags(int flags)
+{
+ int access_flags = 0;
+
+ if (flags & IONIC_QPF_REMOTE_WRITE)
+ access_flags |= IB_ACCESS_REMOTE_WRITE;
+
+ if (flags & IONIC_QPF_REMOTE_READ)
+ access_flags |= IB_ACCESS_REMOTE_READ;
+
+ if (flags & IONIC_QPF_REMOTE_ATOMIC)
+ access_flags |= IB_ACCESS_REMOTE_ATOMIC;
+
+ return access_flags;
+}
+
+static inline int to_ionic_qp_flags(int access, bool sqd_notify,
+ bool sq_is_cmb, bool rq_is_cmb,
+ bool sq_spec, bool rq_spec,
+ bool privileged, bool remote_privileged)
+{
+ int flags = 0;
+
+ if (access & IB_ACCESS_REMOTE_WRITE)
+ flags |= IONIC_QPF_REMOTE_WRITE;
+
+ if (access & IB_ACCESS_REMOTE_READ)
+ flags |= IONIC_QPF_REMOTE_READ;
+
+ if (access & IB_ACCESS_REMOTE_ATOMIC)
+ flags |= IONIC_QPF_REMOTE_ATOMIC;
+
+ if (sqd_notify)
+ flags |= IONIC_QPF_SQD_NOTIFY;
+
+ if (sq_is_cmb)
+ flags |= IONIC_QPF_SQ_CMB;
+
+ if (rq_is_cmb)
+ flags |= IONIC_QPF_RQ_CMB;
+
+ if (sq_spec)
+ flags |= IONIC_QPF_SQ_SPEC;
+
+ if (rq_spec)
+ flags |= IONIC_QPF_RQ_SPEC;
+
+ if (privileged)
+ flags |= IONIC_QPF_PRIVILEGED;
+
+ if (remote_privileged)
+ flags |= IONIC_QPF_REMOTE_PRIVILEGED;
+
+ return flags;
+}
+
+/* cqe non-admin status indicated in status_length field when err bit is set */
+enum ionic_status {
+ IONIC_STS_OK,
+ IONIC_STS_LOCAL_LEN_ERR,
+ IONIC_STS_LOCAL_QP_OPER_ERR,
+ IONIC_STS_LOCAL_PROT_ERR,
+ IONIC_STS_WQE_FLUSHED_ERR,
+ IONIC_STS_MEM_MGMT_OPER_ERR,
+ IONIC_STS_BAD_RESP_ERR,
+ IONIC_STS_LOCAL_ACC_ERR,
+ IONIC_STS_REMOTE_INV_REQ_ERR,
+ IONIC_STS_REMOTE_ACC_ERR,
+ IONIC_STS_REMOTE_OPER_ERR,
+ IONIC_STS_RETRY_EXCEEDED,
+ IONIC_STS_RNR_RETRY_EXCEEDED,
+ IONIC_STS_XRC_VIO_ERR,
+ IONIC_STS_LOCAL_SGL_INV_ERR,
+};
+
+static inline int ionic_to_ib_status(int sts)
+{
+ switch (sts) {
+ case IONIC_STS_OK:
+ return IB_WC_SUCCESS;
+ case IONIC_STS_LOCAL_LEN_ERR:
+ return IB_WC_LOC_LEN_ERR;
+ case IONIC_STS_LOCAL_QP_OPER_ERR:
+ case IONIC_STS_LOCAL_SGL_INV_ERR:
+ return IB_WC_LOC_QP_OP_ERR;
+ case IONIC_STS_LOCAL_PROT_ERR:
+ return IB_WC_LOC_PROT_ERR;
+ case IONIC_STS_WQE_FLUSHED_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ case IONIC_STS_MEM_MGMT_OPER_ERR:
+ return IB_WC_MW_BIND_ERR;
+ case IONIC_STS_BAD_RESP_ERR:
+ return IB_WC_BAD_RESP_ERR;
+ case IONIC_STS_LOCAL_ACC_ERR:
+ return IB_WC_LOC_ACCESS_ERR;
+ case IONIC_STS_REMOTE_INV_REQ_ERR:
+ return IB_WC_REM_INV_REQ_ERR;
+ case IONIC_STS_REMOTE_ACC_ERR:
+ return IB_WC_REM_ACCESS_ERR;
+ case IONIC_STS_REMOTE_OPER_ERR:
+ return IB_WC_REM_OP_ERR;
+ case IONIC_STS_RETRY_EXCEEDED:
+ return IB_WC_RETRY_EXC_ERR;
+ case IONIC_STS_RNR_RETRY_EXCEEDED:
+ return IB_WC_RNR_RETRY_EXC_ERR;
+ case IONIC_STS_XRC_VIO_ERR:
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+}
+
+/* admin queue qp type */
+enum ionic_qp_type {
+ IONIC_QPT_RC,
+ IONIC_QPT_UC,
+ IONIC_QPT_RD,
+ IONIC_QPT_UD,
+ IONIC_QPT_SRQ,
+ IONIC_QPT_XRC_INI,
+ IONIC_QPT_XRC_TGT,
+ IONIC_QPT_XRC_SRQ,
+};
+
+static inline int to_ionic_qp_type(enum ib_qp_type type)
+{
+ switch (type) {
+ case IB_QPT_GSI:
+ case IB_QPT_UD:
+ return IONIC_QPT_UD;
+ case IB_QPT_RC:
+ return IONIC_QPT_RC;
+ case IB_QPT_UC:
+ return IONIC_QPT_UC;
+ case IB_QPT_XRC_INI:
+ return IONIC_QPT_XRC_INI;
+ case IB_QPT_XRC_TGT:
+ return IONIC_QPT_XRC_TGT;
+ default:
+ return -EINVAL;
+ }
+}
+
+/* admin queue qp state */
+enum ionic_qp_state {
+ IONIC_QPS_RESET,
+ IONIC_QPS_INIT,
+ IONIC_QPS_RTR,
+ IONIC_QPS_RTS,
+ IONIC_QPS_SQD,
+ IONIC_QPS_SQE,
+ IONIC_QPS_ERR,
+};
+
+static inline int from_ionic_qp_state(enum ionic_qp_state state)
+{
+ switch (state) {
+ case IONIC_QPS_RESET:
+ return IB_QPS_RESET;
+ case IONIC_QPS_INIT:
+ return IB_QPS_INIT;
+ case IONIC_QPS_RTR:
+ return IB_QPS_RTR;
+ case IONIC_QPS_RTS:
+ return IB_QPS_RTS;
+ case IONIC_QPS_SQD:
+ return IB_QPS_SQD;
+ case IONIC_QPS_SQE:
+ return IB_QPS_SQE;
+ case IONIC_QPS_ERR:
+ return IB_QPS_ERR;
+ default:
+ return -EINVAL;
+ }
+}
+
+static inline int to_ionic_qp_state(enum ib_qp_state state)
+{
+ switch (state) {
+ case IB_QPS_RESET:
+ return IONIC_QPS_RESET;
+ case IB_QPS_INIT:
+ return IONIC_QPS_INIT;
+ case IB_QPS_RTR:
+ return IONIC_QPS_RTR;
+ case IB_QPS_RTS:
+ return IONIC_QPS_RTS;
+ case IB_QPS_SQD:
+ return IONIC_QPS_SQD;
+ case IB_QPS_SQE:
+ return IONIC_QPS_SQE;
+ case IB_QPS_ERR:
+ return IONIC_QPS_ERR;
+ default:
+ return 0;
+ }
+}
+
+static inline int to_ionic_qp_modify_state(enum ib_qp_state to_state,
+ enum ib_qp_state from_state)
+{
+ return to_ionic_qp_state(to_state) |
+ (to_ionic_qp_state(from_state) << 4);
+}
+
+/* fw abi v1 */
+
+/* data payload part of v1 wqe */
+union ionic_v1_pld {
+ struct ionic_sge sgl[2];
+ __be32 spec32[8];
+ __be16 spec16[16];
+ __u8 data[32];
+};
+
+/* completion queue v1 cqe */
+struct ionic_v1_cqe {
+ union {
+ struct {
+ __be16 cmd_idx;
+ __u8 cmd_op;
+ __u8 rsvd[17];
+ __le16 old_sq_cindex;
+ __le16 old_rq_cq_cindex;
+ } admin;
+ struct {
+ __u64 wqe_id;
+ __be32 src_qpn_op;
+ __u8 src_mac[6];
+ __be16 vlan_tag;
+ __be32 imm_data_rkey;
+ } recv;
+ struct {
+ __u8 rsvd[4];
+ __be32 msg_msn;
+ __u8 rsvd2[8];
+ __u64 npg_wqe_id;
+ } send;
+ };
+ __be32 status_length;
+ __be32 qid_type_flags;
+};
+
+/* bits for cqe recv */
+enum ionic_v1_cqe_src_qpn_bits {
+ IONIC_V1_CQE_RECV_QPN_MASK = 0xffffff,
+ IONIC_V1_CQE_RECV_OP_SHIFT = 24,
+
+ /* MASK could be 0x3, but need 0x1f for makeshift values:
+ * OP_TYPE_RDMA_OPER_WITH_IMM, OP_TYPE_SEND_RCVD
+ */
+ IONIC_V1_CQE_RECV_OP_MASK = 0x1f,
+ IONIC_V1_CQE_RECV_OP_SEND = 0,
+ IONIC_V1_CQE_RECV_OP_SEND_INV = 1,
+ IONIC_V1_CQE_RECV_OP_SEND_IMM = 2,
+ IONIC_V1_CQE_RECV_OP_RDMA_IMM = 3,
+
+ IONIC_V1_CQE_RECV_IS_IPV4 = BIT(7 + IONIC_V1_CQE_RECV_OP_SHIFT),
+ IONIC_V1_CQE_RECV_IS_VLAN = BIT(6 + IONIC_V1_CQE_RECV_OP_SHIFT),
+};
+
+/* bits for cqe qid_type_flags */
+enum ionic_v1_cqe_qtf_bits {
+ IONIC_V1_CQE_COLOR = BIT(0),
+ IONIC_V1_CQE_ERROR = BIT(1),
+ IONIC_V1_CQE_TYPE_SHIFT = 5,
+ IONIC_V1_CQE_TYPE_MASK = 0x7,
+ IONIC_V1_CQE_QID_SHIFT = 8,
+
+ IONIC_V1_CQE_TYPE_ADMIN = 0,
+ IONIC_V1_CQE_TYPE_RECV = 1,
+ IONIC_V1_CQE_TYPE_SEND_MSN = 2,
+ IONIC_V1_CQE_TYPE_SEND_NPG = 3,
+};
+
+static inline bool ionic_v1_cqe_color(struct ionic_v1_cqe *cqe)
+{
+ return cqe->qid_type_flags & cpu_to_be32(IONIC_V1_CQE_COLOR);
+}
+
+static inline bool ionic_v1_cqe_error(struct ionic_v1_cqe *cqe)
+{
+ return cqe->qid_type_flags & cpu_to_be32(IONIC_V1_CQE_ERROR);
+}
+
+static inline bool ionic_v1_cqe_recv_is_ipv4(struct ionic_v1_cqe *cqe)
+{
+ return cqe->recv.src_qpn_op & cpu_to_be32(IONIC_V1_CQE_RECV_IS_IPV4);
+}
+
+static inline bool ionic_v1_cqe_recv_is_vlan(struct ionic_v1_cqe *cqe)
+{
+ return cqe->recv.src_qpn_op & cpu_to_be32(IONIC_V1_CQE_RECV_IS_VLAN);
+}
+
+static inline void ionic_v1_cqe_clean(struct ionic_v1_cqe *cqe)
+{
+ cqe->qid_type_flags |= cpu_to_be32(~0u << IONIC_V1_CQE_QID_SHIFT);
+}
+
+static inline u32 ionic_v1_cqe_qtf(struct ionic_v1_cqe *cqe)
+{
+ return be32_to_cpu(cqe->qid_type_flags);
+}
+
+static inline u8 ionic_v1_cqe_qtf_type(u32 qtf)
+{
+ return (qtf >> IONIC_V1_CQE_TYPE_SHIFT) & IONIC_V1_CQE_TYPE_MASK;
+}
+
+static inline u32 ionic_v1_cqe_qtf_qid(u32 qtf)
+{
+ return qtf >> IONIC_V1_CQE_QID_SHIFT;
+}
+
+/* v1 base wqe header */
+struct ionic_v1_base_hdr {
+ __u64 wqe_id;
+ __u8 op;
+ __u8 num_sge_key;
+ __be16 flags;
+ __be32 imm_data_key;
+};
+
+/* v1 receive wqe body */
+struct ionic_v1_recv_bdy {
+ __u8 rsvd[16];
+ union ionic_v1_pld pld;
+};
+
+/* v1 send/rdma wqe body (common, has sgl) */
+struct ionic_v1_common_bdy {
+ union {
+ struct {
+ __be32 ah_id;
+ __be32 dest_qpn;
+ __be32 dest_qkey;
+ } send;
+ struct {
+ __be32 remote_va_high;
+ __be32 remote_va_low;
+ __be32 remote_rkey;
+ } rdma;
+ };
+ __be32 length;
+ union ionic_v1_pld pld;
+};
+
+/* v1 atomic wqe body */
+struct ionic_v1_atomic_bdy {
+ __be32 remote_va_high;
+ __be32 remote_va_low;
+ __be32 remote_rkey;
+ __be32 swap_add_high;
+ __be32 swap_add_low;
+ __be32 compare_high;
+ __be32 compare_low;
+ __u8 rsvd[4];
+ struct ionic_sge sge;
+};
+
+/* v1 reg mr wqe body */
+struct ionic_v1_reg_mr_bdy {
+ __be64 va;
+ __be64 length;
+ __be64 offset;
+ __be64 dma_addr;
+ __be32 map_count;
+ __be16 flags;
+ __u8 dir_size_log2;
+ __u8 page_size_log2;
+ __u8 rsvd[8];
+};
+
+/* v1 bind mw wqe body */
+struct ionic_v1_bind_mw_bdy {
+ __be64 va;
+ __be64 length;
+ __be32 lkey;
+ __be16 flags;
+ __u8 rsvd[26];
+};
+
+/* v1 send/recv wqe */
+struct ionic_v1_wqe {
+ struct ionic_v1_base_hdr base;
+ union {
+ struct ionic_v1_recv_bdy recv;
+ struct ionic_v1_common_bdy common;
+ struct ionic_v1_atomic_bdy atomic;
+ struct ionic_v1_reg_mr_bdy reg_mr;
+ struct ionic_v1_bind_mw_bdy bind_mw;
+ };
+};
+
+/* queue pair v1 send opcodes */
+enum ionic_v1_op {
+ IONIC_V1_OP_SEND,
+ IONIC_V1_OP_SEND_INV,
+ IONIC_V1_OP_SEND_IMM,
+ IONIC_V1_OP_RDMA_READ,
+ IONIC_V1_OP_RDMA_WRITE,
+ IONIC_V1_OP_RDMA_WRITE_IMM,
+ IONIC_V1_OP_ATOMIC_CS,
+ IONIC_V1_OP_ATOMIC_FA,
+ IONIC_V1_OP_REG_MR,
+ IONIC_V1_OP_LOCAL_INV,
+ IONIC_V1_OP_BIND_MW,
+
+ /* flags */
+ IONIC_V1_FLAG_FENCE = BIT(0),
+ IONIC_V1_FLAG_SOL = BIT(1),
+ IONIC_V1_FLAG_INL = BIT(2),
+ IONIC_V1_FLAG_SIG = BIT(3),
+
+ /* flags last four bits for sgl spec format */
+ IONIC_V1_FLAG_SPEC32 = (1u << 12),
+ IONIC_V1_FLAG_SPEC16 = (2u << 12),
+ IONIC_V1_SPEC_FIRST_SGE = 2,
+};
+
+/* queue pair v2 send opcodes */
+enum ionic_v2_op {
+ IONIC_V2_OPSL_OUT = 0x20,
+ IONIC_V2_OPSL_IMM = 0x40,
+ IONIC_V2_OPSL_INV = 0x80,
+
+ IONIC_V2_OP_SEND = 0x0 | IONIC_V2_OPSL_OUT,
+ IONIC_V2_OP_SEND_IMM = IONIC_V2_OP_SEND | IONIC_V2_OPSL_IMM,
+ IONIC_V2_OP_SEND_INV = IONIC_V2_OP_SEND | IONIC_V2_OPSL_INV,
+
+ IONIC_V2_OP_RDMA_WRITE = 0x1 | IONIC_V2_OPSL_OUT,
+ IONIC_V2_OP_RDMA_WRITE_IMM = IONIC_V2_OP_RDMA_WRITE | IONIC_V2_OPSL_IMM,
+
+ IONIC_V2_OP_RDMA_READ = 0x2,
+
+ IONIC_V2_OP_ATOMIC_CS = 0x4,
+ IONIC_V2_OP_ATOMIC_FA = 0x5,
+ IONIC_V2_OP_REG_MR = 0x6,
+ IONIC_V2_OP_LOCAL_INV = 0x7,
+ IONIC_V2_OP_BIND_MW = 0x8,
+};
+
+static inline size_t ionic_v1_send_wqe_min_size(int min_sge, int min_data,
+ int spec, bool expdb)
+{
+ size_t sz_wqe, sz_sgl, sz_data;
+
+ if (spec > IONIC_V1_SPEC_FIRST_SGE)
+ min_sge += IONIC_V1_SPEC_FIRST_SGE;
+
+ if (expdb) {
+ min_sge += 1;
+ min_data += IONIC_EXP_DBELL_SZ;
+ }
+
+ sz_wqe = sizeof(struct ionic_v1_wqe);
+ sz_sgl = offsetof(struct ionic_v1_wqe, common.pld.sgl[min_sge]);
+ sz_data = offsetof(struct ionic_v1_wqe, common.pld.data[min_data]);
+
+ if (sz_sgl > sz_wqe)
+ sz_wqe = sz_sgl;
+
+ if (sz_data > sz_wqe)
+ sz_wqe = sz_data;
+
+ return sz_wqe;
+}
+
+static inline int ionic_v1_send_wqe_max_sge(u8 stride_log2, int spec,
+ bool expdb)
+{
+ struct ionic_sge *sge = (void *)(1ull << stride_log2);
+ struct ionic_v1_wqe *wqe = (void *)0;
+ int num_sge = 0;
+
+ if (expdb)
+ sge -= 1;
+
+ if (spec > IONIC_V1_SPEC_FIRST_SGE)
+ num_sge = IONIC_V1_SPEC_FIRST_SGE;
+
+ num_sge = sge - &wqe->common.pld.sgl[num_sge];
+
+ if (spec && num_sge > spec)
+ num_sge = spec;
+
+ return num_sge;
+}
+
+static inline int ionic_v1_send_wqe_max_data(u8 stride_log2, bool expdb)
+{
+ struct ionic_v1_wqe *wqe = (void *)0;
+ __u8 *data = (void *)(1ull << stride_log2);
+
+ if (expdb)
+ data -= IONIC_EXP_DBELL_SZ;
+
+ return data - wqe->common.pld.data;
+}
+
+static inline size_t ionic_v1_recv_wqe_min_size(int min_sge, int spec,
+ bool expdb)
+{
+ size_t sz_wqe, sz_sgl;
+
+ if (spec > IONIC_V1_SPEC_FIRST_SGE)
+ min_sge += IONIC_V1_SPEC_FIRST_SGE;
+
+ if (expdb)
+ min_sge += 1;
+
+ sz_wqe = sizeof(struct ionic_v1_wqe);
+ sz_sgl = offsetof(struct ionic_v1_wqe, recv.pld.sgl[min_sge]);
+
+ if (sz_sgl > sz_wqe)
+ sz_wqe = sz_sgl;
+
+ return sz_wqe;
+}
+
+static inline int ionic_v1_recv_wqe_max_sge(u8 stride_log2, int spec,
+ bool expdb)
+{
+ struct ionic_sge *sge = (void *)(1ull << stride_log2);
+ struct ionic_v1_wqe *wqe = (void *)0;
+ int num_sge = 0;
+
+ if (expdb)
+ sge -= 1;
+
+ if (spec > IONIC_V1_SPEC_FIRST_SGE)
+ num_sge = IONIC_V1_SPEC_FIRST_SGE;
+
+ num_sge = sge - &wqe->recv.pld.sgl[num_sge];
+
+ if (spec && num_sge > spec)
+ num_sge = spec;
+
+ return num_sge;
+}
+
+static inline int ionic_v1_use_spec_sge(int min_sge, int spec)
+{
+ if (!spec || min_sge > spec)
+ return 0;
+
+ if (min_sge <= IONIC_V1_SPEC_FIRST_SGE)
+ return IONIC_V1_SPEC_FIRST_SGE;
+
+ return spec;
+}
+
+struct ionic_admin_stats_hdr {
+ __le64 dma_addr;
+ __le32 length;
+ __le32 id_ver;
+ __u8 type_state;
+} __packed;
+
+#define IONIC_ADMIN_STATS_HDRS_IN_V1_LEN 17
+static_assert(sizeof(struct ionic_admin_stats_hdr) ==
+ IONIC_ADMIN_STATS_HDRS_IN_V1_LEN);
+
+struct ionic_admin_create_ah {
+ __le64 dma_addr;
+ __le32 length;
+ __le32 pd_id;
+ __le32 id_ver;
+ __le16 dbid_flags;
+ __u8 csum_profile;
+ __u8 crypto;
+} __packed;
+
+#define IONIC_ADMIN_CREATE_AH_IN_V1_LEN 24
+static_assert(sizeof(struct ionic_admin_create_ah) ==
+ IONIC_ADMIN_CREATE_AH_IN_V1_LEN);
+
+struct ionic_admin_destroy_ah {
+ __le32 ah_id;
+} __packed;
+
+#define IONIC_ADMIN_DESTROY_AH_IN_V1_LEN 4
+static_assert(sizeof(struct ionic_admin_destroy_ah) ==
+ IONIC_ADMIN_DESTROY_AH_IN_V1_LEN);
+
+struct ionic_admin_query_ah {
+ __le64 dma_addr;
+} __packed;
+
+#define IONIC_ADMIN_QUERY_AH_IN_V1_LEN 8
+static_assert(sizeof(struct ionic_admin_query_ah) ==
+ IONIC_ADMIN_QUERY_AH_IN_V1_LEN);
+
+struct ionic_admin_create_mr {
+ __le64 va;
+ __le64 length;
+ __le32 pd_id;
+ __le32 id_ver;
+ __le32 tbl_index;
+ __le32 map_count;
+ __le64 dma_addr;
+ __le16 dbid_flags;
+ __u8 pt_type;
+ __u8 dir_size_log2;
+ __u8 page_size_log2;
+} __packed;
+
+#define IONIC_ADMIN_CREATE_MR_IN_V1_LEN 45
+static_assert(sizeof(struct ionic_admin_create_mr) ==
+ IONIC_ADMIN_CREATE_MR_IN_V1_LEN);
+
+struct ionic_admin_destroy_mr {
+ __le32 mr_id;
+} __packed;
+
+#define IONIC_ADMIN_DESTROY_MR_IN_V1_LEN 4
+static_assert(sizeof(struct ionic_admin_destroy_mr) ==
+ IONIC_ADMIN_DESTROY_MR_IN_V1_LEN);
+
+struct ionic_admin_create_cq {
+ __le32 eq_id;
+ __u8 depth_log2;
+ __u8 stride_log2;
+ __u8 dir_size_log2_rsvd;
+ __u8 page_size_log2;
+ __le32 cq_flags;
+ __le32 id_ver;
+ __le32 tbl_index;
+ __le32 map_count;
+ __le64 dma_addr;
+ __le16 dbid_flags;
+} __packed;
+
+#define IONIC_ADMIN_CREATE_CQ_IN_V1_LEN 34
+static_assert(sizeof(struct ionic_admin_create_cq) ==
+ IONIC_ADMIN_CREATE_CQ_IN_V1_LEN);
+
+struct ionic_admin_destroy_cq {
+ __le32 cq_id;
+} __packed;
+
+#define IONIC_ADMIN_DESTROY_CQ_IN_V1_LEN 4
+static_assert(sizeof(struct ionic_admin_destroy_cq) ==
+ IONIC_ADMIN_DESTROY_CQ_IN_V1_LEN);
+
+struct ionic_admin_create_qp {
+ __le32 pd_id;
+ __be32 priv_flags;
+ __le32 sq_cq_id;
+ __u8 sq_depth_log2;
+ __u8 sq_stride_log2;
+ __u8 sq_dir_size_log2_rsvd;
+ __u8 sq_page_size_log2;
+ __le32 sq_tbl_index_xrcd_id;
+ __le32 sq_map_count;
+ __le64 sq_dma_addr;
+ __le32 rq_cq_id;
+ __u8 rq_depth_log2;
+ __u8 rq_stride_log2;
+ __u8 rq_dir_size_log2_rsvd;
+ __u8 rq_page_size_log2;
+ __le32 rq_tbl_index_srq_id;
+ __le32 rq_map_count;
+ __le64 rq_dma_addr;
+ __le32 id_ver;
+ __le16 dbid_flags;
+ __u8 type_state;
+ __u8 rsvd;
+} __packed;
+
+#define IONIC_ADMIN_CREATE_QP_IN_V1_LEN 64
+static_assert(sizeof(struct ionic_admin_create_qp) ==
+ IONIC_ADMIN_CREATE_QP_IN_V1_LEN);
+
+struct ionic_admin_destroy_qp {
+ __le32 qp_id;
+} __packed;
+
+#define IONIC_ADMIN_DESTROY_QP_IN_V1_LEN 4
+static_assert(sizeof(struct ionic_admin_destroy_qp) ==
+ IONIC_ADMIN_DESTROY_QP_IN_V1_LEN);
+
+struct ionic_admin_mod_qp {
+ __be32 attr_mask;
+ __u8 dcqcn_profile;
+ __u8 tfp_csum_profile;
+ __be16 access_flags;
+ __le32 rq_psn;
+ __le32 sq_psn;
+ __le32 qkey_dest_qpn;
+ __le32 rate_limit_kbps;
+ __u8 pmtu;
+ __u8 retry;
+ __u8 rnr_timer;
+ __u8 retry_timeout;
+ __u8 rsq_depth;
+ __u8 rrq_depth;
+ __le16 pkey_id;
+ __le32 ah_id_len;
+ __u8 en_pcp;
+ __u8 ip_dscp;
+ __u8 rsvd2;
+ __u8 type_state;
+ union {
+ struct {
+ __le16 rsvd1;
+ };
+ __le32 rrq_index;
+ };
+ __le32 rsq_index;
+ __le64 dma_addr;
+ __le32 id_ver;
+} __packed;
+
+#define IONIC_ADMIN_MODIFY_QP_IN_V1_LEN 60
+static_assert(sizeof(struct ionic_admin_mod_qp) ==
+ IONIC_ADMIN_MODIFY_QP_IN_V1_LEN);
+
+struct ionic_admin_query_qp {
+ __le64 hdr_dma_addr;
+ __le64 sq_dma_addr;
+ __le64 rq_dma_addr;
+ __le32 ah_id;
+ __le32 id_ver;
+ __le16 dbid_flags;
+} __packed;
+
+#define IONIC_ADMIN_QUERY_QP_IN_V1_LEN 34
+static_assert(sizeof(struct ionic_admin_query_qp) ==
+ IONIC_ADMIN_QUERY_QP_IN_V1_LEN);
+
+#define ADMIN_WQE_STRIDE 64
+#define ADMIN_WQE_HDR_LEN 4
+
+/* admin queue v1 wqe */
+struct ionic_v1_admin_wqe {
+ __u8 op;
+ __u8 rsvd;
+ __le16 len;
+
+ union {
+ struct ionic_admin_stats_hdr stats;
+ struct ionic_admin_create_ah create_ah;
+ struct ionic_admin_destroy_ah destroy_ah;
+ struct ionic_admin_query_ah query_ah;
+ struct ionic_admin_create_mr create_mr;
+ struct ionic_admin_destroy_mr destroy_mr;
+ struct ionic_admin_create_cq create_cq;
+ struct ionic_admin_destroy_cq destroy_cq;
+ struct ionic_admin_create_qp create_qp;
+ struct ionic_admin_destroy_qp destroy_qp;
+ struct ionic_admin_mod_qp mod_qp;
+ struct ionic_admin_query_qp query_qp;
+ } cmd;
+};
+
+/* side data for query qp */
+struct ionic_v1_admin_query_qp_sq {
+ __u8 rnr_timer;
+ __u8 retry_timeout;
+ __be16 access_perms_flags;
+ __be16 rsvd;
+ __be16 pkey_id;
+ __be32 qkey_dest_qpn;
+ __be32 rate_limit_kbps;
+ __be32 rq_psn;
+};
+
+struct ionic_v1_admin_query_qp_rq {
+ __u8 state_pmtu;
+ __u8 retry_rnrtry;
+ __u8 rrq_depth;
+ __u8 rsq_depth;
+ __be32 sq_psn;
+ __be16 access_perms_flags;
+ __be16 rsvd;
+};
+
+/* admin queue v1 opcodes */
+enum ionic_v1_admin_op {
+ IONIC_V1_ADMIN_NOOP,
+ IONIC_V1_ADMIN_CREATE_CQ,
+ IONIC_V1_ADMIN_CREATE_QP,
+ IONIC_V1_ADMIN_CREATE_MR,
+ IONIC_V1_ADMIN_STATS_HDRS,
+ IONIC_V1_ADMIN_STATS_VALS,
+ IONIC_V1_ADMIN_DESTROY_MR,
+ IONIC_V1_ADMIN_RSVD_7, /* RESIZE_CQ */
+ IONIC_V1_ADMIN_DESTROY_CQ,
+ IONIC_V1_ADMIN_MODIFY_QP,
+ IONIC_V1_ADMIN_QUERY_QP,
+ IONIC_V1_ADMIN_DESTROY_QP,
+ IONIC_V1_ADMIN_DEBUG,
+ IONIC_V1_ADMIN_CREATE_AH,
+ IONIC_V1_ADMIN_QUERY_AH,
+ IONIC_V1_ADMIN_MODIFY_DCQCN,
+ IONIC_V1_ADMIN_DESTROY_AH,
+ IONIC_V1_ADMIN_QP_STATS_HDRS,
+ IONIC_V1_ADMIN_QP_STATS_VALS,
+ IONIC_V1_ADMIN_OPCODES_MAX,
+};
+
+/* admin queue v1 cqe status */
+enum ionic_v1_admin_status {
+ IONIC_V1_ASTS_OK,
+ IONIC_V1_ASTS_BAD_CMD,
+ IONIC_V1_ASTS_BAD_INDEX,
+ IONIC_V1_ASTS_BAD_STATE,
+ IONIC_V1_ASTS_BAD_TYPE,
+ IONIC_V1_ASTS_BAD_ATTR,
+ IONIC_V1_ASTS_MSG_TOO_BIG,
+};
+
+/* event queue v1 eqe */
+struct ionic_v1_eqe {
+ __be32 evt;
+};
+
+/* bits for cqe queue_type_flags */
+enum ionic_v1_eqe_evt_bits {
+ IONIC_V1_EQE_COLOR = BIT(0),
+ IONIC_V1_EQE_TYPE_SHIFT = 1,
+ IONIC_V1_EQE_TYPE_MASK = 0x7,
+ IONIC_V1_EQE_CODE_SHIFT = 4,
+ IONIC_V1_EQE_CODE_MASK = 0xf,
+ IONIC_V1_EQE_QID_SHIFT = 8,
+
+ /* cq events */
+ IONIC_V1_EQE_TYPE_CQ = 0,
+ /* cq normal events */
+ IONIC_V1_EQE_CQ_NOTIFY = 0,
+ /* cq error events */
+ IONIC_V1_EQE_CQ_ERR = 8,
+
+ /* qp and srq events */
+ IONIC_V1_EQE_TYPE_QP = 1,
+ /* qp normal events */
+ IONIC_V1_EQE_SRQ_LEVEL = 0,
+ IONIC_V1_EQE_SQ_DRAIN = 1,
+ IONIC_V1_EQE_QP_COMM_EST = 2,
+ IONIC_V1_EQE_QP_LAST_WQE = 3,
+ /* qp error events */
+ IONIC_V1_EQE_QP_ERR = 8,
+ IONIC_V1_EQE_QP_ERR_REQUEST = 9,
+ IONIC_V1_EQE_QP_ERR_ACCESS = 10,
+};
+
+enum ionic_tfp_csum_profiles {
+ IONIC_TFP_CSUM_PROF_ETH_IPV4_UDP = 0,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP = 1,
+ IONIC_TFP_CSUM_PROF_ETH_IPV6_UDP = 2,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_UDP = 3,
+ IONIC_TFP_CSUM_PROF_IPV4_UDP_VXLAN_ETH_QTAG_IPV4_UDP = 4,
+ IONIC_TFP_CSUM_PROF_IPV4_UDP_VXLAN_ETH_QTAG_IPV6_UDP = 5,
+ IONIC_TFP_CSUM_PROF_QTAG_IPV4_UDP_VXLAN_ETH_QTAG_IPV4_UDP = 6,
+ IONIC_TFP_CSUM_PROF_QTAG_IPV4_UDP_VXLAN_ETH_QTAG_IPV6_UDP = 7,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP_ESP_IPV4_UDP = 8,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_ESP_UDP = 9,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP_ESP_UDP = 10,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV6_ESP_UDP = 11,
+ IONIC_TFP_CSUM_PROF_ETH_QTAG_IPV4_UDP_CSUM = 12,
+};
+
+static inline bool ionic_v1_eqe_color(struct ionic_v1_eqe *eqe)
+{
+ return eqe->evt & cpu_to_be32(IONIC_V1_EQE_COLOR);
+}
+
+static inline u32 ionic_v1_eqe_evt(struct ionic_v1_eqe *eqe)
+{
+ return be32_to_cpu(eqe->evt);
+}
+
+static inline u8 ionic_v1_eqe_evt_type(u32 evt)
+{
+ return (evt >> IONIC_V1_EQE_TYPE_SHIFT) & IONIC_V1_EQE_TYPE_MASK;
+}
+
+static inline u8 ionic_v1_eqe_evt_code(u32 evt)
+{
+ return (evt >> IONIC_V1_EQE_CODE_SHIFT) & IONIC_V1_EQE_CODE_MASK;
+}
+
+static inline u32 ionic_v1_eqe_evt_qid(u32 evt)
+{
+ return evt >> IONIC_V1_EQE_QID_SHIFT;
+}
+
+enum ionic_v1_stat_bits {
+ IONIC_V1_STAT_TYPE_SHIFT = 28,
+ IONIC_V1_STAT_TYPE_NONE = 0,
+ IONIC_V1_STAT_TYPE_8 = 1,
+ IONIC_V1_STAT_TYPE_LE16 = 2,
+ IONIC_V1_STAT_TYPE_LE32 = 3,
+ IONIC_V1_STAT_TYPE_LE64 = 4,
+ IONIC_V1_STAT_TYPE_BE16 = 5,
+ IONIC_V1_STAT_TYPE_BE32 = 6,
+ IONIC_V1_STAT_TYPE_BE64 = 7,
+ IONIC_V1_STAT_OFF_MASK = BIT(IONIC_V1_STAT_TYPE_SHIFT) - 1,
+};
+
+struct ionic_v1_stat {
+ union {
+ __be32 be_type_off;
+ u32 type_off;
+ };
+ char name[28];
+};
+
+static inline int ionic_v1_stat_type(struct ionic_v1_stat *hdr)
+{
+ return hdr->type_off >> IONIC_V1_STAT_TYPE_SHIFT;
+}
+
+static inline unsigned int ionic_v1_stat_off(struct ionic_v1_stat *hdr)
+{
+ return hdr->type_off & IONIC_V1_STAT_OFF_MASK;
+}
+
+#endif /* _IONIC_FW_H_ */
diff --git a/drivers/infiniband/hw/ionic/ionic_hw_stats.c b/drivers/infiniband/hw/ionic/ionic_hw_stats.c
new file mode 100644
index 000000000000..244a80dde08f
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_hw_stats.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/dma-mapping.h>
+
+#include "ionic_fw.h"
+#include "ionic_ibdev.h"
+
+static int ionic_v1_stat_normalize(struct ionic_v1_stat *hw_stats,
+ int hw_stats_count)
+{
+ int hw_stat_i;
+
+ for (hw_stat_i = 0; hw_stat_i < hw_stats_count; ++hw_stat_i) {
+ struct ionic_v1_stat *stat = &hw_stats[hw_stat_i];
+
+ stat->type_off = be32_to_cpu(stat->be_type_off);
+ stat->name[sizeof(stat->name) - 1] = 0;
+ if (ionic_v1_stat_type(stat) == IONIC_V1_STAT_TYPE_NONE)
+ break;
+ }
+
+ return hw_stat_i;
+}
+
+static void ionic_fill_stats_desc(struct rdma_stat_desc *hw_stats_hdrs,
+ struct ionic_v1_stat *hw_stats,
+ int hw_stats_count)
+{
+ int hw_stat_i;
+
+ for (hw_stat_i = 0; hw_stat_i < hw_stats_count; ++hw_stat_i) {
+ struct ionic_v1_stat *stat = &hw_stats[hw_stat_i];
+
+ hw_stats_hdrs[hw_stat_i].name = stat->name;
+ }
+}
+
+static u64 ionic_v1_stat_val(struct ionic_v1_stat *stat,
+ void *vals_buf, size_t vals_len)
+{
+ unsigned int off = ionic_v1_stat_off(stat);
+ int type = ionic_v1_stat_type(stat);
+
+#define __ionic_v1_stat_validate(__type) \
+ ((off + sizeof(__type) <= vals_len) && \
+ (IS_ALIGNED(off, sizeof(__type))))
+
+ switch (type) {
+ case IONIC_V1_STAT_TYPE_8:
+ if (__ionic_v1_stat_validate(u8))
+ return *(u8 *)(vals_buf + off);
+ break;
+ case IONIC_V1_STAT_TYPE_LE16:
+ if (__ionic_v1_stat_validate(__le16))
+ return le16_to_cpu(*(__le16 *)(vals_buf + off));
+ break;
+ case IONIC_V1_STAT_TYPE_LE32:
+ if (__ionic_v1_stat_validate(__le32))
+ return le32_to_cpu(*(__le32 *)(vals_buf + off));
+ break;
+ case IONIC_V1_STAT_TYPE_LE64:
+ if (__ionic_v1_stat_validate(__le64))
+ return le64_to_cpu(*(__le64 *)(vals_buf + off));
+ break;
+ case IONIC_V1_STAT_TYPE_BE16:
+ if (__ionic_v1_stat_validate(__be16))
+ return be16_to_cpu(*(__be16 *)(vals_buf + off));
+ break;
+ case IONIC_V1_STAT_TYPE_BE32:
+ if (__ionic_v1_stat_validate(__be32))
+ return be32_to_cpu(*(__be32 *)(vals_buf + off));
+ break;
+ case IONIC_V1_STAT_TYPE_BE64:
+ if (__ionic_v1_stat_validate(__be64))
+ return be64_to_cpu(*(__be64 *)(vals_buf + off));
+ break;
+ }
+
+ return ~0ull;
+#undef __ionic_v1_stat_validate
+}
+
+static int ionic_hw_stats_cmd(struct ionic_ibdev *dev,
+ dma_addr_t dma, size_t len, int qid, int op)
+{
+ struct ionic_admin_wr wr = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(wr.work),
+ .wqe = {
+ .op = op,
+ .len = cpu_to_le16(IONIC_ADMIN_STATS_HDRS_IN_V1_LEN),
+ .cmd.stats = {
+ .dma_addr = cpu_to_le64(dma),
+ .length = cpu_to_le32(len),
+ .id_ver = cpu_to_le32(qid),
+ },
+ }
+ };
+
+ if (dev->lif_cfg.admin_opcodes <= op)
+ return -EBADRQC;
+
+ ionic_admin_post(dev, &wr);
+
+ return ionic_admin_wait(dev, &wr, IONIC_ADMIN_F_INTERRUPT);
+}
+
+static int ionic_init_hw_stats(struct ionic_ibdev *dev)
+{
+ dma_addr_t hw_stats_dma;
+ int rc, hw_stats_count;
+
+ if (dev->hw_stats_hdrs)
+ return 0;
+
+ dev->hw_stats_count = 0;
+
+ /* buffer for current values from the device */
+ dev->hw_stats_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!dev->hw_stats_buf) {
+ rc = -ENOMEM;
+ goto err_buf;
+ }
+
+ /* buffer for names, sizes, offsets of values */
+ dev->hw_stats = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!dev->hw_stats) {
+ rc = -ENOMEM;
+ goto err_hw_stats;
+ }
+
+ /* request the names, sizes, offsets */
+ hw_stats_dma = dma_map_single(dev->lif_cfg.hwdev, dev->hw_stats,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hw_stats_dma);
+ if (rc)
+ goto err_dma;
+
+ rc = ionic_hw_stats_cmd(dev, hw_stats_dma, PAGE_SIZE, 0,
+ IONIC_V1_ADMIN_STATS_HDRS);
+ if (rc)
+ goto err_cmd;
+
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+ /* normalize and count the number of hw_stats */
+ hw_stats_count =
+ ionic_v1_stat_normalize(dev->hw_stats,
+ PAGE_SIZE / sizeof(*dev->hw_stats));
+ if (!hw_stats_count) {
+ rc = -ENODATA;
+ goto err_dma;
+ }
+
+ dev->hw_stats_count = hw_stats_count;
+
+ /* alloc and init array of names, for alloc_hw_stats */
+ dev->hw_stats_hdrs = kcalloc(hw_stats_count,
+ sizeof(*dev->hw_stats_hdrs),
+ GFP_KERNEL);
+ if (!dev->hw_stats_hdrs) {
+ rc = -ENOMEM;
+ goto err_dma;
+ }
+
+ ionic_fill_stats_desc(dev->hw_stats_hdrs, dev->hw_stats,
+ hw_stats_count);
+
+ return 0;
+
+err_cmd:
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+err_dma:
+ kfree(dev->hw_stats);
+err_hw_stats:
+ kfree(dev->hw_stats_buf);
+err_buf:
+ dev->hw_stats_count = 0;
+ dev->hw_stats = NULL;
+ dev->hw_stats_buf = NULL;
+ dev->hw_stats_hdrs = NULL;
+ return rc;
+}
+
+static struct rdma_hw_stats *ionic_alloc_hw_stats(struct ib_device *ibdev,
+ u32 port)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+
+ if (port != 1)
+ return NULL;
+
+ return rdma_alloc_hw_stats_struct(dev->hw_stats_hdrs,
+ dev->hw_stats_count,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int ionic_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *hw_stats,
+ u32 port, int index)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+ dma_addr_t hw_stats_dma;
+ int rc, hw_stat_i;
+
+ if (port != 1)
+ return -EINVAL;
+
+ hw_stats_dma = dma_map_single(dev->lif_cfg.hwdev, dev->hw_stats_buf,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hw_stats_dma);
+ if (rc)
+ goto err_dma;
+
+ rc = ionic_hw_stats_cmd(dev, hw_stats_dma, PAGE_SIZE,
+ 0, IONIC_V1_ADMIN_STATS_VALS);
+ if (rc)
+ goto err_cmd;
+
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+
+ for (hw_stat_i = 0; hw_stat_i < dev->hw_stats_count; ++hw_stat_i)
+ hw_stats->value[hw_stat_i] =
+ ionic_v1_stat_val(&dev->hw_stats[hw_stat_i],
+ dev->hw_stats_buf, PAGE_SIZE);
+
+ return hw_stat_i;
+
+err_cmd:
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+err_dma:
+ return rc;
+}
+
+static struct rdma_hw_stats *
+ionic_counter_alloc_stats(struct rdma_counter *counter)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(counter->device);
+ struct ionic_counter *cntr;
+ int err;
+
+ cntr = kzalloc(sizeof(*cntr), GFP_KERNEL);
+ if (!cntr)
+ return NULL;
+
+ /* buffer for current values from the device */
+ cntr->vals = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!cntr->vals)
+ goto err_vals;
+
+ err = xa_alloc(&dev->counter_stats->xa_counters, &counter->id,
+ cntr,
+ XA_LIMIT(0, IONIC_MAX_QPID),
+ GFP_KERNEL);
+ if (err)
+ goto err_xa;
+
+ INIT_LIST_HEAD(&cntr->qp_list);
+
+ return rdma_alloc_hw_stats_struct(dev->counter_stats->stats_hdrs,
+ dev->counter_stats->queue_stats_count,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+err_xa:
+ kfree(cntr->vals);
+err_vals:
+ kfree(cntr);
+
+ return NULL;
+}
+
+static int ionic_counter_dealloc(struct rdma_counter *counter)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(counter->device);
+ struct ionic_counter *cntr;
+
+ cntr = xa_erase(&dev->counter_stats->xa_counters, counter->id);
+ if (!cntr)
+ return -EINVAL;
+
+ kfree(cntr->vals);
+ kfree(cntr);
+
+ return 0;
+}
+
+static int ionic_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *ibqp,
+ u32 port)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(counter->device);
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+ struct ionic_counter *cntr;
+
+ cntr = xa_load(&dev->counter_stats->xa_counters, counter->id);
+ if (!cntr)
+ return -EINVAL;
+
+ list_add_tail(&qp->qp_list_counter, &cntr->qp_list);
+ ibqp->counter = counter;
+
+ return 0;
+}
+
+static int ionic_counter_unbind_qp(struct ib_qp *ibqp, u32 port)
+{
+ struct ionic_qp *qp = to_ionic_qp(ibqp);
+
+ if (ibqp->counter) {
+ list_del(&qp->qp_list_counter);
+ ibqp->counter = NULL;
+ }
+
+ return 0;
+}
+
+static int ionic_get_qp_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *hw_stats,
+ u32 counter_id)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+ struct ionic_counter_stats *cs;
+ struct ionic_counter *cntr;
+ dma_addr_t hw_stats_dma;
+ struct ionic_qp *qp;
+ int rc, stat_i = 0;
+
+ cs = dev->counter_stats;
+ cntr = xa_load(&cs->xa_counters, counter_id);
+ if (!cntr)
+ return -EINVAL;
+
+ hw_stats_dma = dma_map_single(dev->lif_cfg.hwdev, cntr->vals,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hw_stats_dma);
+ if (rc)
+ return rc;
+
+ memset(hw_stats->value, 0, sizeof(u64) * hw_stats->num_counters);
+
+ list_for_each_entry(qp, &cntr->qp_list, qp_list_counter) {
+ rc = ionic_hw_stats_cmd(dev, hw_stats_dma, PAGE_SIZE,
+ qp->qpid,
+ IONIC_V1_ADMIN_QP_STATS_VALS);
+ if (rc)
+ goto err_cmd;
+
+ for (stat_i = 0; stat_i < cs->queue_stats_count; ++stat_i)
+ hw_stats->value[stat_i] +=
+ ionic_v1_stat_val(&cs->hdr[stat_i],
+ cntr->vals,
+ PAGE_SIZE);
+ }
+
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+ return stat_i;
+
+err_cmd:
+ dma_unmap_single(dev->lif_cfg.hwdev, hw_stats_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+ return rc;
+}
+
+static int ionic_counter_update_stats(struct rdma_counter *counter)
+{
+ return ionic_get_qp_stats(counter->device, counter->stats, counter->id);
+}
+
+static int ionic_alloc_counters(struct ionic_ibdev *dev)
+{
+ struct ionic_counter_stats *cs = dev->counter_stats;
+ int rc, hw_stats_count;
+ dma_addr_t hdr_dma;
+
+ /* buffer for names, sizes, offsets of values */
+ cs->hdr = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!cs->hdr)
+ return -ENOMEM;
+
+ hdr_dma = dma_map_single(dev->lif_cfg.hwdev, cs->hdr,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, hdr_dma);
+ if (rc)
+ goto err_dma;
+
+ rc = ionic_hw_stats_cmd(dev, hdr_dma, PAGE_SIZE, 0,
+ IONIC_V1_ADMIN_QP_STATS_HDRS);
+ if (rc)
+ goto err_cmd;
+
+ dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+ /* normalize and count the number of hw_stats */
+ hw_stats_count = ionic_v1_stat_normalize(cs->hdr,
+ PAGE_SIZE / sizeof(*cs->hdr));
+ if (!hw_stats_count) {
+ rc = -ENODATA;
+ goto err_dma;
+ }
+
+ cs->queue_stats_count = hw_stats_count;
+
+ /* alloc and init array of names */
+ cs->stats_hdrs = kcalloc(hw_stats_count, sizeof(*cs->stats_hdrs),
+ GFP_KERNEL);
+ if (!cs->stats_hdrs) {
+ rc = -ENOMEM;
+ goto err_dma;
+ }
+
+ ionic_fill_stats_desc(cs->stats_hdrs, cs->hdr, hw_stats_count);
+
+ return 0;
+
+err_cmd:
+ dma_unmap_single(dev->lif_cfg.hwdev, hdr_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+err_dma:
+ kfree(cs->hdr);
+
+ return rc;
+}
+
+static const struct ib_device_ops ionic_hw_stats_ops = {
+ .driver_id = RDMA_DRIVER_IONIC,
+ .alloc_hw_port_stats = ionic_alloc_hw_stats,
+ .get_hw_stats = ionic_get_hw_stats,
+};
+
+static const struct ib_device_ops ionic_counter_stats_ops = {
+ .counter_alloc_stats = ionic_counter_alloc_stats,
+ .counter_dealloc = ionic_counter_dealloc,
+ .counter_bind_qp = ionic_counter_bind_qp,
+ .counter_unbind_qp = ionic_counter_unbind_qp,
+ .counter_update_stats = ionic_counter_update_stats,
+};
+
+void ionic_stats_init(struct ionic_ibdev *dev)
+{
+ u16 stats_type = dev->lif_cfg.stats_type;
+ int rc;
+
+ if (stats_type & IONIC_LIF_RDMA_STAT_GLOBAL) {
+ rc = ionic_init_hw_stats(dev);
+ if (rc)
+ ibdev_dbg(&dev->ibdev, "Failed to init hw stats\n");
+ else
+ ib_set_device_ops(&dev->ibdev, &ionic_hw_stats_ops);
+ }
+
+ if (stats_type & IONIC_LIF_RDMA_STAT_QP) {
+ dev->counter_stats = kzalloc(sizeof(*dev->counter_stats),
+ GFP_KERNEL);
+ if (!dev->counter_stats)
+ return;
+
+ rc = ionic_alloc_counters(dev);
+ if (rc) {
+ ibdev_dbg(&dev->ibdev, "Failed to init counter stats\n");
+ kfree(dev->counter_stats);
+ dev->counter_stats = NULL;
+ return;
+ }
+
+ xa_init_flags(&dev->counter_stats->xa_counters, XA_FLAGS_ALLOC);
+
+ ib_set_device_ops(&dev->ibdev, &ionic_counter_stats_ops);
+ }
+}
+
+void ionic_stats_cleanup(struct ionic_ibdev *dev)
+{
+ if (dev->counter_stats) {
+ xa_destroy(&dev->counter_stats->xa_counters);
+ kfree(dev->counter_stats->hdr);
+ kfree(dev->counter_stats->stats_hdrs);
+ kfree(dev->counter_stats);
+ dev->counter_stats = NULL;
+ }
+
+ kfree(dev->hw_stats);
+ kfree(dev->hw_stats_buf);
+ kfree(dev->hw_stats_hdrs);
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.c b/drivers/infiniband/hw/ionic/ionic_ibdev.c
new file mode 100644
index 000000000000..164046d00e5d
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_ibdev.c
@@ -0,0 +1,440 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/pci.h>
+#include <linux/irq.h>
+#include <net/addrconf.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_mad.h>
+
+#include "ionic_ibdev.h"
+
+#define DRIVER_DESCRIPTION "AMD Pensando RoCE HCA driver"
+#define DEVICE_DESCRIPTION "AMD Pensando RoCE HCA"
+
+MODULE_AUTHOR("Allen Hubbe <allen.hubbe@amd.com>");
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS("NET_IONIC");
+
+static int ionic_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *attr,
+ struct ib_udata *udata)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+ struct net_device *ndev;
+
+ ndev = ib_device_get_netdev(ibdev, 1);
+ addrconf_ifid_eui48((u8 *)&attr->sys_image_guid, ndev);
+ dev_put(ndev);
+ attr->max_mr_size = dev->lif_cfg.npts_per_lif * PAGE_SIZE / 2;
+ attr->page_size_cap = dev->lif_cfg.page_size_supported;
+
+ attr->vendor_id = to_pci_dev(dev->lif_cfg.hwdev)->vendor;
+ attr->vendor_part_id = to_pci_dev(dev->lif_cfg.hwdev)->device;
+
+ attr->hw_ver = ionic_lif_asic_rev(dev->lif_cfg.lif);
+ attr->fw_ver = 0;
+ attr->max_qp = dev->lif_cfg.qp_count;
+ attr->max_qp_wr = IONIC_MAX_DEPTH;
+ attr->device_cap_flags =
+ IB_DEVICE_MEM_WINDOW |
+ IB_DEVICE_MEM_MGT_EXTENSIONS |
+ IB_DEVICE_MEM_WINDOW_TYPE_2B |
+ 0;
+ attr->max_send_sge =
+ min(ionic_v1_send_wqe_max_sge(dev->lif_cfg.max_stride, 0, false),
+ IONIC_SPEC_HIGH);
+ attr->max_recv_sge =
+ min(ionic_v1_recv_wqe_max_sge(dev->lif_cfg.max_stride, 0, false),
+ IONIC_SPEC_HIGH);
+ attr->max_sge_rd = attr->max_send_sge;
+ attr->max_cq = dev->lif_cfg.cq_count / dev->lif_cfg.udma_count;
+ attr->max_cqe = IONIC_MAX_CQ_DEPTH - IONIC_CQ_GRACE;
+ attr->max_mr = dev->lif_cfg.nmrs_per_lif;
+ attr->max_pd = IONIC_MAX_PD;
+ attr->max_qp_rd_atom = IONIC_MAX_RD_ATOM;
+ attr->max_ee_rd_atom = 0;
+ attr->max_res_rd_atom = IONIC_MAX_RD_ATOM;
+ attr->max_qp_init_rd_atom = IONIC_MAX_RD_ATOM;
+ attr->max_ee_init_rd_atom = 0;
+ attr->atomic_cap = IB_ATOMIC_GLOB;
+ attr->masked_atomic_cap = IB_ATOMIC_GLOB;
+ attr->max_mw = dev->lif_cfg.nmrs_per_lif;
+ attr->max_mcast_grp = 0;
+ attr->max_mcast_qp_attach = 0;
+ attr->max_ah = dev->lif_cfg.nahs_per_lif;
+ attr->max_fast_reg_page_list_len = dev->lif_cfg.npts_per_lif / 2;
+ attr->max_pkeys = IONIC_PKEY_TBL_LEN;
+
+ return 0;
+}
+
+static int ionic_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *attr)
+{
+ struct net_device *ndev;
+
+ if (port != 1)
+ return -EINVAL;
+
+ ndev = ib_device_get_netdev(ibdev, port);
+
+ if (netif_running(ndev) && netif_carrier_ok(ndev)) {
+ attr->state = IB_PORT_ACTIVE;
+ attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ } else if (netif_running(ndev)) {
+ attr->state = IB_PORT_DOWN;
+ attr->phys_state = IB_PORT_PHYS_STATE_POLLING;
+ } else {
+ attr->state = IB_PORT_DOWN;
+ attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+ }
+
+ attr->max_mtu = iboe_get_mtu(ndev->max_mtu);
+ attr->active_mtu = min(attr->max_mtu, iboe_get_mtu(ndev->mtu));
+ attr->gid_tbl_len = IONIC_GID_TBL_LEN;
+ attr->ip_gids = true;
+ attr->port_cap_flags = 0;
+ attr->max_msg_sz = 0x80000000;
+ attr->pkey_tbl_len = IONIC_PKEY_TBL_LEN;
+ attr->max_vl_num = 1;
+ attr->subnet_prefix = 0xfe80000000000000ull;
+
+ dev_put(ndev);
+
+ return ib_get_eth_speed(ibdev, port,
+ &attr->active_speed,
+ &attr->active_width);
+}
+
+static enum rdma_link_layer ionic_get_link_layer(struct ib_device *ibdev,
+ u32 port)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+static int ionic_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
+ u16 *pkey)
+{
+ if (port != 1)
+ return -EINVAL;
+
+ if (index != 0)
+ return -EINVAL;
+
+ *pkey = IB_DEFAULT_PKEY_FULL;
+
+ return 0;
+}
+
+static int ionic_modify_device(struct ib_device *ibdev, int mask,
+ struct ib_device_modify *attr)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+
+ if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
+ return -EOPNOTSUPP;
+
+ if (mask & IB_DEVICE_MODIFY_NODE_DESC)
+ memcpy(dev->ibdev.node_desc, attr->node_desc,
+ IB_DEVICE_NODE_DESC_MAX);
+
+ return 0;
+}
+
+static int ionic_get_port_immutable(struct ib_device *ibdev, u32 port,
+ struct ib_port_immutable *attr)
+{
+ if (port != 1)
+ return -EINVAL;
+
+ attr->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ attr->pkey_tbl_len = IONIC_PKEY_TBL_LEN;
+ attr->gid_tbl_len = IONIC_GID_TBL_LEN;
+ attr->max_mad_size = IB_MGMT_MAD_SIZE;
+
+ return 0;
+}
+
+static void ionic_get_dev_fw_str(struct ib_device *ibdev, char *str)
+{
+ struct ionic_ibdev *dev = to_ionic_ibdev(ibdev);
+
+ ionic_lif_fw_version(dev->lif_cfg.lif, str, IB_FW_VERSION_NAME_MAX);
+}
+
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct ionic_ibdev *dev =
+ rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev);
+
+ return sysfs_emit(buf, "0x%x\n", ionic_lif_asic_rev(dev->lif_cfg.lif));
+}
+static DEVICE_ATTR_RO(hw_rev);
+
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ionic_ibdev *dev =
+ rdma_device_to_drv_device(device, struct ionic_ibdev, ibdev);
+
+ return sysfs_emit(buf, "%s\n", dev->ibdev.node_desc);
+}
+static DEVICE_ATTR_RO(hca_type);
+
+static struct attribute *ionic_rdma_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ NULL
+};
+
+static const struct attribute_group ionic_rdma_attr_group = {
+ .attrs = ionic_rdma_attributes,
+};
+
+static void ionic_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+ /*
+ * Dummy define disassociate_ucontext so that it does not
+ * wait for user context before cleaning up hw resources.
+ */
+}
+
+static const struct ib_device_ops ionic_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_IONIC,
+ .uverbs_abi_ver = IONIC_ABI_VERSION,
+
+ .alloc_ucontext = ionic_alloc_ucontext,
+ .dealloc_ucontext = ionic_dealloc_ucontext,
+ .mmap = ionic_mmap,
+ .mmap_free = ionic_mmap_free,
+ .alloc_pd = ionic_alloc_pd,
+ .dealloc_pd = ionic_dealloc_pd,
+ .create_ah = ionic_create_ah,
+ .query_ah = ionic_query_ah,
+ .destroy_ah = ionic_destroy_ah,
+ .create_user_ah = ionic_create_ah,
+ .get_dma_mr = ionic_get_dma_mr,
+ .reg_user_mr = ionic_reg_user_mr,
+ .reg_user_mr_dmabuf = ionic_reg_user_mr_dmabuf,
+ .dereg_mr = ionic_dereg_mr,
+ .alloc_mr = ionic_alloc_mr,
+ .map_mr_sg = ionic_map_mr_sg,
+ .alloc_mw = ionic_alloc_mw,
+ .dealloc_mw = ionic_dealloc_mw,
+ .create_cq = ionic_create_cq,
+ .destroy_cq = ionic_destroy_cq,
+ .create_qp = ionic_create_qp,
+ .modify_qp = ionic_modify_qp,
+ .query_qp = ionic_query_qp,
+ .destroy_qp = ionic_destroy_qp,
+
+ .post_send = ionic_post_send,
+ .post_recv = ionic_post_recv,
+ .poll_cq = ionic_poll_cq,
+ .req_notify_cq = ionic_req_notify_cq,
+
+ .query_device = ionic_query_device,
+ .query_port = ionic_query_port,
+ .get_link_layer = ionic_get_link_layer,
+ .query_pkey = ionic_query_pkey,
+ .modify_device = ionic_modify_device,
+ .get_port_immutable = ionic_get_port_immutable,
+ .get_dev_fw_str = ionic_get_dev_fw_str,
+ .device_group = &ionic_rdma_attr_group,
+ .disassociate_ucontext = ionic_disassociate_ucontext,
+
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, ionic_ctx, ibctx),
+ INIT_RDMA_OBJ_SIZE(ib_pd, ionic_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_ah, ionic_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, ionic_vcq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_qp, ionic_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_mw, ionic_mr, ibmw),
+};
+
+static void ionic_init_resids(struct ionic_ibdev *dev)
+{
+ ionic_resid_init(&dev->inuse_cqid, dev->lif_cfg.cq_count);
+ dev->half_cqid_udma_shift =
+ order_base_2(dev->lif_cfg.cq_count / dev->lif_cfg.udma_count);
+ ionic_resid_init(&dev->inuse_pdid, IONIC_MAX_PD);
+ ionic_resid_init(&dev->inuse_ahid, dev->lif_cfg.nahs_per_lif);
+ ionic_resid_init(&dev->inuse_mrid, dev->lif_cfg.nmrs_per_lif);
+ /* skip reserved lkey */
+ dev->next_mrkey = 1;
+ ionic_resid_init(&dev->inuse_qpid, dev->lif_cfg.qp_count);
+ /* skip reserved SMI and GSI qpids */
+ dev->half_qpid_udma_shift =
+ order_base_2(dev->lif_cfg.qp_count / dev->lif_cfg.udma_count);
+ ionic_resid_init(&dev->inuse_dbid, dev->lif_cfg.dbid_count);
+}
+
+static void ionic_destroy_resids(struct ionic_ibdev *dev)
+{
+ ionic_resid_destroy(&dev->inuse_cqid);
+ ionic_resid_destroy(&dev->inuse_pdid);
+ ionic_resid_destroy(&dev->inuse_ahid);
+ ionic_resid_destroy(&dev->inuse_mrid);
+ ionic_resid_destroy(&dev->inuse_qpid);
+ ionic_resid_destroy(&dev->inuse_dbid);
+}
+
+static void ionic_destroy_ibdev(struct ionic_ibdev *dev)
+{
+ ionic_kill_rdma_admin(dev, false);
+ ib_unregister_device(&dev->ibdev);
+ ionic_stats_cleanup(dev);
+ ionic_destroy_rdma_admin(dev);
+ ionic_destroy_resids(dev);
+ WARN_ON(!xa_empty(&dev->qp_tbl));
+ xa_destroy(&dev->qp_tbl);
+ WARN_ON(!xa_empty(&dev->cq_tbl));
+ xa_destroy(&dev->cq_tbl);
+ ib_dealloc_device(&dev->ibdev);
+}
+
+static struct ionic_ibdev *ionic_create_ibdev(struct ionic_aux_dev *ionic_adev)
+{
+ struct ib_device *ibdev;
+ struct ionic_ibdev *dev;
+ struct net_device *ndev;
+ int rc;
+
+ dev = ib_alloc_device(ionic_ibdev, ibdev);
+ if (!dev)
+ return ERR_PTR(-EINVAL);
+
+ ionic_fill_lif_cfg(ionic_adev->lif, &dev->lif_cfg);
+
+ xa_init_flags(&dev->qp_tbl, GFP_ATOMIC);
+ xa_init_flags(&dev->cq_tbl, GFP_ATOMIC);
+
+ ionic_init_resids(dev);
+
+ rc = ionic_rdma_reset_devcmd(dev);
+ if (rc)
+ goto err_reset;
+
+ rc = ionic_create_rdma_admin(dev);
+ if (rc)
+ goto err_admin;
+
+ ibdev = &dev->ibdev;
+ ibdev->dev.parent = dev->lif_cfg.hwdev;
+
+ strscpy(ibdev->name, "ionic_%d", IB_DEVICE_NAME_MAX);
+ strscpy(ibdev->node_desc, DEVICE_DESCRIPTION, IB_DEVICE_NODE_DESC_MAX);
+
+ ibdev->node_type = RDMA_NODE_IB_CA;
+ ibdev->phys_port_cnt = 1;
+
+ /* the first two eq are reserved for async events */
+ ibdev->num_comp_vectors = dev->lif_cfg.eq_count - 2;
+
+ ndev = ionic_lif_netdev(ionic_adev->lif);
+ addrconf_ifid_eui48((u8 *)&ibdev->node_guid, ndev);
+ rc = ib_device_set_netdev(ibdev, ndev, 1);
+ /* ionic_lif_netdev() returns ndev with refcount held */
+ dev_put(ndev);
+ if (rc)
+ goto err_admin;
+
+ ib_set_device_ops(&dev->ibdev, &ionic_dev_ops);
+
+ ionic_stats_init(dev);
+
+ rc = ib_register_device(ibdev, "ionic_%d", ibdev->dev.parent);
+ if (rc)
+ goto err_register;
+
+ return dev;
+
+err_register:
+ ionic_stats_cleanup(dev);
+err_admin:
+ ionic_kill_rdma_admin(dev, false);
+ ionic_destroy_rdma_admin(dev);
+err_reset:
+ ionic_destroy_resids(dev);
+ xa_destroy(&dev->qp_tbl);
+ xa_destroy(&dev->cq_tbl);
+ ib_dealloc_device(&dev->ibdev);
+
+ return ERR_PTR(rc);
+}
+
+static int ionic_aux_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct ionic_aux_dev *ionic_adev;
+ struct ionic_ibdev *dev;
+
+ ionic_adev = container_of(adev, struct ionic_aux_dev, adev);
+ dev = ionic_create_ibdev(ionic_adev);
+ if (IS_ERR(dev))
+ return dev_err_probe(&adev->dev, PTR_ERR(dev),
+ "Failed to register ibdev\n");
+
+ auxiliary_set_drvdata(adev, dev);
+ ibdev_dbg(&dev->ibdev, "registered\n");
+
+ return 0;
+}
+
+static void ionic_aux_remove(struct auxiliary_device *adev)
+{
+ struct ionic_ibdev *dev = auxiliary_get_drvdata(adev);
+
+ dev_dbg(&adev->dev, "unregister ibdev\n");
+ ionic_destroy_ibdev(dev);
+ dev_dbg(&adev->dev, "unregistered\n");
+}
+
+static const struct auxiliary_device_id ionic_aux_id_table[] = {
+ { .name = "ionic.rdma", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, ionic_aux_id_table);
+
+static struct auxiliary_driver ionic_aux_r_driver = {
+ .name = "rdma",
+ .probe = ionic_aux_probe,
+ .remove = ionic_aux_remove,
+ .id_table = ionic_aux_id_table,
+};
+
+static int __init ionic_mod_init(void)
+{
+ int rc;
+
+ ionic_evt_workq = create_workqueue(KBUILD_MODNAME "-evt");
+ if (!ionic_evt_workq)
+ return -ENOMEM;
+
+ rc = auxiliary_driver_register(&ionic_aux_r_driver);
+ if (rc)
+ goto err_aux;
+
+ return 0;
+
+err_aux:
+ destroy_workqueue(ionic_evt_workq);
+
+ return rc;
+}
+
+static void __exit ionic_mod_exit(void)
+{
+ auxiliary_driver_unregister(&ionic_aux_r_driver);
+ destroy_workqueue(ionic_evt_workq);
+}
+
+module_init(ionic_mod_init);
+module_exit(ionic_mod_exit);
diff --git a/drivers/infiniband/hw/ionic/ionic_ibdev.h b/drivers/infiniband/hw/ionic/ionic_ibdev.h
new file mode 100644
index 000000000000..82fda1e3cdb6
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_ibdev.h
@@ -0,0 +1,517 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_IBDEV_H_
+#define _IONIC_IBDEV_H_
+
+#include <rdma/ib_umem.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/uverbs_ioctl.h>
+
+#include <rdma/ionic-abi.h>
+#include <ionic_api.h>
+#include <ionic_regs.h>
+
+#include "ionic_fw.h"
+#include "ionic_queue.h"
+#include "ionic_res.h"
+
+#include "ionic_lif_cfg.h"
+
+/* Config knobs */
+#define IONIC_EQ_DEPTH 511
+#define IONIC_EQ_COUNT 32
+#define IONIC_AQ_DEPTH 63
+#define IONIC_AQ_COUNT 4
+#define IONIC_EQ_ISR_BUDGET 10
+#define IONIC_EQ_WORK_BUDGET 1000
+#define IONIC_MAX_RD_ATOM 16
+#define IONIC_PKEY_TBL_LEN 1
+#define IONIC_GID_TBL_LEN 256
+
+#define IONIC_MAX_QPID 0xffffff
+#define IONIC_SPEC_HIGH 8
+#define IONIC_MAX_PD 1024
+#define IONIC_SPEC_HIGH 8
+#define IONIC_SQCMB_ORDER 5
+#define IONIC_RQCMB_ORDER 0
+
+#define IONIC_META_LAST ((void *)1ul)
+#define IONIC_META_POSTED ((void *)2ul)
+
+#define IONIC_CQ_GRACE 100
+
+#define IONIC_ROCE_UDP_SPORT 28272
+#define IONIC_DMA_LKEY 0
+#define IONIC_DMA_RKEY IONIC_DMA_LKEY
+
+#define IONIC_CMB_SUPPORTED \
+ (IONIC_CMB_ENABLE | IONIC_CMB_REQUIRE | IONIC_CMB_EXPDB | \
+ IONIC_CMB_WC | IONIC_CMB_UC)
+
+/* resource is not reserved on the device, indicated in tbl_order */
+#define IONIC_RES_INVALID -1
+
+struct ionic_aq;
+struct ionic_cq;
+struct ionic_eq;
+struct ionic_vcq;
+
+enum ionic_admin_state {
+ IONIC_ADMIN_ACTIVE, /* submitting admin commands to queue */
+ IONIC_ADMIN_PAUSED, /* not submitting, but may complete normally */
+ IONIC_ADMIN_KILLED, /* not submitting, locally completed */
+};
+
+enum ionic_admin_flags {
+ IONIC_ADMIN_F_BUSYWAIT = BIT(0), /* Don't sleep */
+ IONIC_ADMIN_F_TEARDOWN = BIT(1), /* In destroy path */
+ IONIC_ADMIN_F_INTERRUPT = BIT(2), /* Interruptible w/timeout */
+};
+
+enum ionic_mmap_flag {
+ IONIC_MMAP_WC = BIT(0),
+};
+
+struct ionic_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ unsigned long size;
+ unsigned long pfn;
+ u8 mmap_flags;
+};
+
+struct ionic_ibdev {
+ struct ib_device ibdev;
+
+ struct ionic_lif_cfg lif_cfg;
+
+ struct xarray qp_tbl;
+ struct xarray cq_tbl;
+
+ struct ionic_resid_bits inuse_dbid;
+ struct ionic_resid_bits inuse_pdid;
+ struct ionic_resid_bits inuse_ahid;
+ struct ionic_resid_bits inuse_mrid;
+ struct ionic_resid_bits inuse_qpid;
+ struct ionic_resid_bits inuse_cqid;
+
+ u8 half_cqid_udma_shift;
+ u8 half_qpid_udma_shift;
+ u8 next_qpid_udma_idx;
+ u8 next_mrkey;
+
+ struct work_struct reset_work;
+ bool reset_posted;
+ u32 reset_cnt;
+
+ struct delayed_work admin_dwork;
+ struct ionic_aq **aq_vec;
+ atomic_t admin_state;
+
+ struct ionic_eq **eq_vec;
+
+ struct ionic_v1_stat *hw_stats;
+ void *hw_stats_buf;
+ struct rdma_stat_desc *hw_stats_hdrs;
+ struct ionic_counter_stats *counter_stats;
+ int hw_stats_count;
+};
+
+struct ionic_eq {
+ struct ionic_ibdev *dev;
+
+ u32 eqid;
+ u32 intr;
+
+ struct ionic_queue q;
+
+ int armed;
+ bool enable;
+
+ struct work_struct work;
+
+ int irq;
+ char name[32];
+};
+
+struct ionic_admin_wr {
+ struct completion work;
+ struct list_head aq_ent;
+ struct ionic_v1_admin_wqe wqe;
+ struct ionic_v1_cqe cqe;
+ struct ionic_aq *aq;
+ int status;
+};
+
+struct ionic_admin_wr_q {
+ struct ionic_admin_wr *wr;
+ int wqe_strides;
+};
+
+struct ionic_aq {
+ struct ionic_ibdev *dev;
+ struct ionic_vcq *vcq;
+
+ struct work_struct work;
+
+ atomic_t admin_state;
+ unsigned long stamp;
+ bool armed;
+
+ u32 aqid;
+ u32 cqid;
+
+ spinlock_t lock; /* for posting */
+ struct ionic_queue q;
+ struct ionic_admin_wr_q *q_wr;
+ struct list_head wr_prod;
+ struct list_head wr_post;
+};
+
+struct ionic_ctx {
+ struct ib_ucontext ibctx;
+ u32 dbid;
+ struct rdma_user_mmap_entry *mmap_dbell;
+};
+
+struct ionic_tbl_buf {
+ u32 tbl_limit;
+ u32 tbl_pages;
+ size_t tbl_size;
+ __le64 *tbl_buf;
+ dma_addr_t tbl_dma;
+ u8 page_size_log2;
+};
+
+struct ionic_pd {
+ struct ib_pd ibpd;
+
+ u32 pdid;
+ u32 flags;
+};
+
+struct ionic_cq {
+ struct ionic_vcq *vcq;
+
+ u32 cqid;
+ u32 eqid;
+
+ spinlock_t lock; /* for polling */
+ struct list_head poll_sq;
+ bool flush;
+ struct list_head flush_sq;
+ struct list_head flush_rq;
+ struct list_head ibkill_flush_ent;
+
+ struct ionic_queue q;
+ bool color;
+ int credit;
+ u16 arm_any_prod;
+ u16 arm_sol_prod;
+
+ struct kref cq_kref;
+ struct completion cq_rel_comp;
+
+ /* infrequently accessed, keep at end */
+ struct ib_umem *umem;
+};
+
+struct ionic_vcq {
+ struct ib_cq ibcq;
+ struct ionic_cq cq[2];
+ u8 udma_mask;
+ u8 poll_idx;
+};
+
+struct ionic_sq_meta {
+ u64 wrid;
+ u32 len;
+ u16 seq;
+ u8 ibop;
+ u8 ibsts;
+ u8 remote:1;
+ u8 signal:1;
+ u8 local_comp:1;
+};
+
+struct ionic_rq_meta {
+ struct ionic_rq_meta *next;
+ u64 wrid;
+};
+
+struct ionic_qp {
+ struct ib_qp ibqp;
+ enum ib_qp_state state;
+
+ u32 qpid;
+ u32 ahid;
+ u32 sq_cqid;
+ u32 rq_cqid;
+ u8 udma_idx;
+ u8 has_ah:1;
+ u8 has_sq:1;
+ u8 has_rq:1;
+ u8 sig_all:1;
+
+ struct list_head qp_list_counter;
+
+ struct list_head cq_poll_sq;
+ struct list_head cq_flush_sq;
+ struct list_head cq_flush_rq;
+ struct list_head ibkill_flush_ent;
+
+ spinlock_t sq_lock; /* for posting and polling */
+ struct ionic_queue sq;
+ struct ionic_sq_meta *sq_meta;
+ u16 *sq_msn_idx;
+ int sq_spec;
+ u16 sq_old_prod;
+ u16 sq_msn_prod;
+ u16 sq_msn_cons;
+ u8 sq_cmb;
+ bool sq_flush;
+ bool sq_flush_rcvd;
+
+ spinlock_t rq_lock; /* for posting and polling */
+ struct ionic_queue rq;
+ struct ionic_rq_meta *rq_meta;
+ struct ionic_rq_meta *rq_meta_head;
+ int rq_spec;
+ u16 rq_old_prod;
+ u8 rq_cmb;
+ bool rq_flush;
+
+ struct kref qp_kref;
+ struct completion qp_rel_comp;
+
+ /* infrequently accessed, keep at end */
+ int sgid_index;
+ int sq_cmb_order;
+ u32 sq_cmb_pgid;
+ phys_addr_t sq_cmb_addr;
+ struct rdma_user_mmap_entry *mmap_sq_cmb;
+
+ struct ib_umem *sq_umem;
+
+ int rq_cmb_order;
+ u32 rq_cmb_pgid;
+ phys_addr_t rq_cmb_addr;
+ struct rdma_user_mmap_entry *mmap_rq_cmb;
+
+ struct ib_umem *rq_umem;
+
+ int dcqcn_profile;
+
+ struct ib_ud_header *hdr;
+};
+
+struct ionic_ah {
+ struct ib_ah ibah;
+ u32 ahid;
+ int sgid_index;
+ struct ib_ud_header hdr;
+};
+
+struct ionic_mr {
+ union {
+ struct ib_mr ibmr;
+ struct ib_mw ibmw;
+ };
+
+ u32 mrid;
+ int flags;
+
+ struct ib_umem *umem;
+ struct ionic_tbl_buf buf;
+ bool created;
+};
+
+struct ionic_counter_stats {
+ int queue_stats_count;
+ struct ionic_v1_stat *hdr;
+ struct rdma_stat_desc *stats_hdrs;
+ struct xarray xa_counters;
+};
+
+struct ionic_counter {
+ void *vals;
+ struct list_head qp_list;
+};
+
+static inline struct ionic_ibdev *to_ionic_ibdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct ionic_ibdev, ibdev);
+}
+
+static inline struct ionic_ctx *to_ionic_ctx(struct ib_ucontext *ibctx)
+{
+ return container_of(ibctx, struct ionic_ctx, ibctx);
+}
+
+static inline struct ionic_ctx *to_ionic_ctx_uobj(struct ib_uobject *uobj)
+{
+ if (!uobj)
+ return NULL;
+
+ if (!uobj->context)
+ return NULL;
+
+ return to_ionic_ctx(uobj->context);
+}
+
+static inline struct ionic_pd *to_ionic_pd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct ionic_pd, ibpd);
+}
+
+static inline struct ionic_mr *to_ionic_mr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct ionic_mr, ibmr);
+}
+
+static inline struct ionic_mr *to_ionic_mw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct ionic_mr, ibmw);
+}
+
+static inline struct ionic_vcq *to_ionic_vcq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct ionic_vcq, ibcq);
+}
+
+static inline struct ionic_cq *to_ionic_vcq_cq(struct ib_cq *ibcq,
+ uint8_t udma_idx)
+{
+ return &to_ionic_vcq(ibcq)->cq[udma_idx];
+}
+
+static inline struct ionic_qp *to_ionic_qp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct ionic_qp, ibqp);
+}
+
+static inline struct ionic_ah *to_ionic_ah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct ionic_ah, ibah);
+}
+
+static inline u32 ionic_ctx_dbid(struct ionic_ibdev *dev,
+ struct ionic_ctx *ctx)
+{
+ if (!ctx)
+ return dev->lif_cfg.dbid;
+
+ return ctx->dbid;
+}
+
+static inline u32 ionic_obj_dbid(struct ionic_ibdev *dev,
+ struct ib_uobject *uobj)
+{
+ return ionic_ctx_dbid(dev, to_ionic_ctx_uobj(uobj));
+}
+
+static inline bool ionic_ibop_is_local(enum ib_wr_opcode op)
+{
+ return op == IB_WR_LOCAL_INV || op == IB_WR_REG_MR;
+}
+
+static inline void ionic_qp_complete(struct kref *kref)
+{
+ struct ionic_qp *qp = container_of(kref, struct ionic_qp, qp_kref);
+
+ complete(&qp->qp_rel_comp);
+}
+
+static inline void ionic_cq_complete(struct kref *kref)
+{
+ struct ionic_cq *cq = container_of(kref, struct ionic_cq, cq_kref);
+
+ complete(&cq->cq_rel_comp);
+}
+
+/* ionic_admin.c */
+extern struct workqueue_struct *ionic_evt_workq;
+void ionic_admin_post(struct ionic_ibdev *dev, struct ionic_admin_wr *wr);
+int ionic_admin_wait(struct ionic_ibdev *dev, struct ionic_admin_wr *wr,
+ enum ionic_admin_flags);
+
+int ionic_rdma_reset_devcmd(struct ionic_ibdev *dev);
+
+int ionic_create_rdma_admin(struct ionic_ibdev *dev);
+void ionic_destroy_rdma_admin(struct ionic_ibdev *dev);
+void ionic_kill_rdma_admin(struct ionic_ibdev *dev, bool fatal_path);
+
+/* ionic_controlpath.c */
+int ionic_create_cq_common(struct ionic_vcq *vcq,
+ struct ionic_tbl_buf *buf,
+ const struct ib_cq_init_attr *attr,
+ struct ionic_ctx *ctx,
+ struct ib_udata *udata,
+ struct ionic_qdesc *req_cq,
+ __u32 *resp_cqid,
+ int udma_idx);
+void ionic_destroy_cq_common(struct ionic_ibdev *dev, struct ionic_cq *cq);
+void ionic_flush_qp(struct ionic_ibdev *dev, struct ionic_qp *qp);
+void ionic_notify_flush_cq(struct ionic_cq *cq);
+
+int ionic_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata);
+void ionic_dealloc_ucontext(struct ib_ucontext *ibctx);
+int ionic_mmap(struct ib_ucontext *ibctx, struct vm_area_struct *vma);
+void ionic_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
+int ionic_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int ionic_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int ionic_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int ionic_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
+int ionic_destroy_ah(struct ib_ah *ibah, u32 flags);
+struct ib_mr *ionic_get_dma_mr(struct ib_pd *ibpd, int access);
+struct ib_mr *ionic_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 addr, int access, struct ib_dmah *dmah,
+ struct ib_udata *udata);
+struct ib_mr *ionic_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 offset,
+ u64 length, u64 addr, int fd, int access,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs);
+int ionic_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
+struct ib_mr *ionic_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type type,
+ u32 max_sg);
+int ionic_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
+int ionic_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
+int ionic_dealloc_mw(struct ib_mw *ibmw);
+int ionic_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+int ionic_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+int ionic_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+ struct ib_udata *udata);
+int ionic_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
+ struct ib_udata *udata);
+int ionic_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
+ struct ib_qp_init_attr *init_attr);
+int ionic_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+
+/* ionic_datapath.c */
+int ionic_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad);
+int ionic_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad);
+int ionic_poll_cq(struct ib_cq *ibcq, int nwc, struct ib_wc *wc);
+int ionic_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+
+/* ionic_hw_stats.c */
+void ionic_stats_init(struct ionic_ibdev *dev);
+void ionic_stats_cleanup(struct ionic_ibdev *dev);
+
+/* ionic_pgtbl.c */
+__le64 ionic_pgtbl_dma(struct ionic_tbl_buf *buf, u64 va);
+__be64 ionic_pgtbl_off(struct ionic_tbl_buf *buf, u64 va);
+int ionic_pgtbl_page(struct ionic_tbl_buf *buf, u64 dma);
+int ionic_pgtbl_init(struct ionic_ibdev *dev,
+ struct ionic_tbl_buf *buf,
+ struct ib_umem *umem,
+ dma_addr_t dma,
+ int limit,
+ u64 page_size);
+void ionic_pgtbl_unbuf(struct ionic_ibdev *dev, struct ionic_tbl_buf *buf);
+#endif /* _IONIC_IBDEV_H_ */
diff --git a/drivers/infiniband/hw/ionic/ionic_lif_cfg.c b/drivers/infiniband/hw/ionic/ionic_lif_cfg.c
new file mode 100644
index 000000000000..f3cd281c3a2f
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_lif_cfg.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/kernel.h>
+
+#include <ionic.h>
+#include <ionic_lif.h>
+
+#include "ionic_lif_cfg.h"
+
+#define IONIC_MIN_RDMA_VERSION 0
+#define IONIC_MAX_RDMA_VERSION 2
+
+static u8 ionic_get_expdb(struct ionic_lif *lif)
+{
+ u8 expdb_support = 0;
+
+ if (lif->ionic->idev.phy_cmb_expdb64_pages)
+ expdb_support |= IONIC_EXPDB_64B_WQE;
+ if (lif->ionic->idev.phy_cmb_expdb128_pages)
+ expdb_support |= IONIC_EXPDB_128B_WQE;
+ if (lif->ionic->idev.phy_cmb_expdb256_pages)
+ expdb_support |= IONIC_EXPDB_256B_WQE;
+ if (lif->ionic->idev.phy_cmb_expdb512_pages)
+ expdb_support |= IONIC_EXPDB_512B_WQE;
+
+ return expdb_support;
+}
+
+void ionic_fill_lif_cfg(struct ionic_lif *lif, struct ionic_lif_cfg *cfg)
+{
+ union ionic_lif_identity *ident = &lif->ionic->ident.lif;
+
+ cfg->lif = lif;
+ cfg->hwdev = &lif->ionic->pdev->dev;
+ cfg->lif_index = lif->index;
+ cfg->lif_hw_index = lif->hw_index;
+
+ cfg->dbid = lif->kern_pid;
+ cfg->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif);
+ cfg->dbpage = lif->kern_dbpage;
+ cfg->intr_ctrl = lif->ionic->idev.intr_ctrl;
+
+ cfg->db_phys = lif->ionic->bars[IONIC_PCI_BAR_DBELL].bus_addr;
+
+ if (IONIC_VERSION(ident->rdma.version, ident->rdma.minor_version) >=
+ IONIC_VERSION(2, 1))
+ cfg->page_size_supported =
+ le64_to_cpu(ident->rdma.page_size_cap);
+ else
+ cfg->page_size_supported = IONIC_PAGE_SIZE_SUPPORTED;
+
+ cfg->rdma_version = ident->rdma.version;
+ cfg->qp_opcodes = ident->rdma.qp_opcodes;
+ cfg->admin_opcodes = ident->rdma.admin_opcodes;
+
+ cfg->stats_type = le16_to_cpu(ident->rdma.stats_type);
+ cfg->npts_per_lif = le32_to_cpu(ident->rdma.npts_per_lif);
+ cfg->nmrs_per_lif = le32_to_cpu(ident->rdma.nmrs_per_lif);
+ cfg->nahs_per_lif = le32_to_cpu(ident->rdma.nahs_per_lif);
+
+ cfg->aq_base = le32_to_cpu(ident->rdma.aq_qtype.qid_base);
+ cfg->cq_base = le32_to_cpu(ident->rdma.cq_qtype.qid_base);
+ cfg->eq_base = le32_to_cpu(ident->rdma.eq_qtype.qid_base);
+
+ /*
+ * ionic_create_rdma_admin() may reduce aq_count or eq_count if
+ * it is unable to allocate all that were requested.
+ * aq_count is tunable; see ionic_aq_count
+ * eq_count is tunable; see ionic_eq_count
+ */
+ cfg->aq_count = le32_to_cpu(ident->rdma.aq_qtype.qid_count);
+ cfg->eq_count = le32_to_cpu(ident->rdma.eq_qtype.qid_count);
+ cfg->cq_count = le32_to_cpu(ident->rdma.cq_qtype.qid_count);
+ cfg->qp_count = le32_to_cpu(ident->rdma.sq_qtype.qid_count);
+ cfg->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif);
+
+ cfg->aq_qtype = ident->rdma.aq_qtype.qtype;
+ cfg->sq_qtype = ident->rdma.sq_qtype.qtype;
+ cfg->rq_qtype = ident->rdma.rq_qtype.qtype;
+ cfg->cq_qtype = ident->rdma.cq_qtype.qtype;
+ cfg->eq_qtype = ident->rdma.eq_qtype.qtype;
+ cfg->udma_qgrp_shift = ident->rdma.udma_shift;
+ cfg->udma_count = 2;
+
+ cfg->max_stride = ident->rdma.max_stride;
+ cfg->expdb_mask = ionic_get_expdb(lif);
+
+ cfg->sq_expdb =
+ !!(lif->qtype_info[IONIC_QTYPE_TXQ].features & IONIC_QIDENT_F_EXPDB);
+ cfg->rq_expdb =
+ !!(lif->qtype_info[IONIC_QTYPE_RXQ].features & IONIC_QIDENT_F_EXPDB);
+}
+
+struct net_device *ionic_lif_netdev(struct ionic_lif *lif)
+{
+ struct net_device *netdev = lif->netdev;
+
+ dev_hold(netdev);
+ return netdev;
+}
+
+void ionic_lif_fw_version(struct ionic_lif *lif, char *str, size_t len)
+{
+ strscpy(str, lif->ionic->idev.dev_info.fw_version, len);
+}
+
+u8 ionic_lif_asic_rev(struct ionic_lif *lif)
+{
+ return lif->ionic->idev.dev_info.asic_rev;
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_lif_cfg.h b/drivers/infiniband/hw/ionic/ionic_lif_cfg.h
new file mode 100644
index 000000000000..20853429f623
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_lif_cfg.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_LIF_CFG_H_
+
+#define IONIC_VERSION(a, b) (((a) << 16) + ((b) << 8))
+#define IONIC_PAGE_SIZE_SUPPORTED 0x40201000 /* 4kb, 2Mb, 1Gb */
+
+#define IONIC_EXPDB_64B_WQE BIT(0)
+#define IONIC_EXPDB_128B_WQE BIT(1)
+#define IONIC_EXPDB_256B_WQE BIT(2)
+#define IONIC_EXPDB_512B_WQE BIT(3)
+
+struct ionic_lif_cfg {
+ struct device *hwdev;
+ struct ionic_lif *lif;
+
+ int lif_index;
+ int lif_hw_index;
+
+ u32 dbid;
+ int dbid_count;
+ u64 __iomem *dbpage;
+ struct ionic_intr __iomem *intr_ctrl;
+ phys_addr_t db_phys;
+
+ u64 page_size_supported;
+ u32 npts_per_lif;
+ u32 nmrs_per_lif;
+ u32 nahs_per_lif;
+
+ u32 aq_base;
+ u32 cq_base;
+ u32 eq_base;
+
+ int aq_count;
+ int eq_count;
+ int cq_count;
+ int qp_count;
+
+ u16 stats_type;
+ u8 aq_qtype;
+ u8 sq_qtype;
+ u8 rq_qtype;
+ u8 cq_qtype;
+ u8 eq_qtype;
+
+ u8 udma_count;
+ u8 udma_qgrp_shift;
+
+ u8 rdma_version;
+ u8 qp_opcodes;
+ u8 admin_opcodes;
+
+ u8 max_stride;
+ bool sq_expdb;
+ bool rq_expdb;
+ u8 expdb_mask;
+};
+
+void ionic_fill_lif_cfg(struct ionic_lif *lif, struct ionic_lif_cfg *cfg);
+struct net_device *ionic_lif_netdev(struct ionic_lif *lif);
+void ionic_lif_fw_version(struct ionic_lif *lif, char *str, size_t len);
+u8 ionic_lif_asic_rev(struct ionic_lif *lif);
+
+#endif /* _IONIC_LIF_CFG_H_ */
diff --git a/drivers/infiniband/hw/ionic/ionic_pgtbl.c b/drivers/infiniband/hw/ionic/ionic_pgtbl.c
new file mode 100644
index 000000000000..e74db73c9246
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_pgtbl.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/mman.h>
+#include <linux/dma-mapping.h>
+
+#include "ionic_fw.h"
+#include "ionic_ibdev.h"
+
+__le64 ionic_pgtbl_dma(struct ionic_tbl_buf *buf, u64 va)
+{
+ u64 pg_mask = BIT_ULL(buf->page_size_log2) - 1;
+ u64 dma;
+
+ if (!buf->tbl_pages)
+ return cpu_to_le64(0);
+
+ if (buf->tbl_pages > 1)
+ return cpu_to_le64(buf->tbl_dma);
+
+ if (buf->tbl_buf)
+ dma = le64_to_cpu(buf->tbl_buf[0]);
+ else
+ dma = buf->tbl_dma;
+
+ return cpu_to_le64(dma + (va & pg_mask));
+}
+
+__be64 ionic_pgtbl_off(struct ionic_tbl_buf *buf, u64 va)
+{
+ if (buf->tbl_pages > 1) {
+ u64 pg_mask = BIT_ULL(buf->page_size_log2) - 1;
+
+ return cpu_to_be64(va & pg_mask);
+ }
+
+ return 0;
+}
+
+int ionic_pgtbl_page(struct ionic_tbl_buf *buf, u64 dma)
+{
+ if (unlikely(buf->tbl_pages == buf->tbl_limit))
+ return -ENOMEM;
+
+ if (buf->tbl_buf)
+ buf->tbl_buf[buf->tbl_pages] = cpu_to_le64(dma);
+ else
+ buf->tbl_dma = dma;
+
+ ++buf->tbl_pages;
+
+ return 0;
+}
+
+static int ionic_tbl_buf_alloc(struct ionic_ibdev *dev,
+ struct ionic_tbl_buf *buf)
+{
+ int rc;
+
+ buf->tbl_size = buf->tbl_limit * sizeof(*buf->tbl_buf);
+ buf->tbl_buf = kmalloc(buf->tbl_size, GFP_KERNEL);
+ if (!buf->tbl_buf)
+ return -ENOMEM;
+
+ buf->tbl_dma = dma_map_single(dev->lif_cfg.hwdev, buf->tbl_buf,
+ buf->tbl_size, DMA_TO_DEVICE);
+ rc = dma_mapping_error(dev->lif_cfg.hwdev, buf->tbl_dma);
+ if (rc) {
+ kfree(buf->tbl_buf);
+ return rc;
+ }
+
+ return 0;
+}
+
+static int ionic_pgtbl_umem(struct ionic_tbl_buf *buf, struct ib_umem *umem)
+{
+ struct ib_block_iter biter;
+ u64 page_dma;
+ int rc;
+
+ rdma_umem_for_each_dma_block(umem, &biter, BIT_ULL(buf->page_size_log2)) {
+ page_dma = rdma_block_iter_dma_address(&biter);
+ rc = ionic_pgtbl_page(buf, page_dma);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
+void ionic_pgtbl_unbuf(struct ionic_ibdev *dev, struct ionic_tbl_buf *buf)
+{
+ if (buf->tbl_buf)
+ dma_unmap_single(dev->lif_cfg.hwdev, buf->tbl_dma,
+ buf->tbl_size, DMA_TO_DEVICE);
+
+ kfree(buf->tbl_buf);
+ memset(buf, 0, sizeof(*buf));
+}
+
+int ionic_pgtbl_init(struct ionic_ibdev *dev,
+ struct ionic_tbl_buf *buf,
+ struct ib_umem *umem,
+ dma_addr_t dma,
+ int limit,
+ u64 page_size)
+{
+ int rc;
+
+ memset(buf, 0, sizeof(*buf));
+
+ if (umem) {
+ limit = ib_umem_num_dma_blocks(umem, page_size);
+ buf->page_size_log2 = order_base_2(page_size);
+ }
+
+ if (limit < 1)
+ return -EINVAL;
+
+ buf->tbl_limit = limit;
+
+ /* skip pgtbl if contiguous / direct translation */
+ if (limit > 1) {
+ rc = ionic_tbl_buf_alloc(dev, buf);
+ if (rc)
+ return rc;
+ }
+
+ if (umem)
+ rc = ionic_pgtbl_umem(buf, umem);
+ else
+ rc = ionic_pgtbl_page(buf, dma);
+
+ if (rc)
+ goto err_unbuf;
+
+ return 0;
+
+err_unbuf:
+ ionic_pgtbl_unbuf(dev, buf);
+ return rc;
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_queue.c b/drivers/infiniband/hw/ionic/ionic_queue.c
new file mode 100644
index 000000000000..aa897ed2a412
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_queue.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#include <linux/dma-mapping.h>
+
+#include "ionic_queue.h"
+
+int ionic_queue_init(struct ionic_queue *q, struct device *dma_dev,
+ int depth, size_t stride)
+{
+ if (depth < 0 || depth > 0xffff)
+ return -EINVAL;
+
+ if (stride == 0 || stride > 0x10000)
+ return -EINVAL;
+
+ if (depth == 0)
+ depth = 1;
+
+ q->depth_log2 = order_base_2(depth + 1);
+ q->stride_log2 = order_base_2(stride);
+
+ if (q->depth_log2 + q->stride_log2 < PAGE_SHIFT)
+ q->depth_log2 = PAGE_SHIFT - q->stride_log2;
+
+ if (q->depth_log2 > 16 || q->stride_log2 > 16)
+ return -EINVAL;
+
+ q->size = BIT_ULL(q->depth_log2 + q->stride_log2);
+ q->mask = BIT(q->depth_log2) - 1;
+
+ q->ptr = dma_alloc_coherent(dma_dev, q->size, &q->dma, GFP_KERNEL);
+ if (!q->ptr)
+ return -ENOMEM;
+
+ /* it will always be page aligned, but just to be sure... */
+ if (!PAGE_ALIGNED(q->ptr)) {
+ dma_free_coherent(dma_dev, q->size, q->ptr, q->dma);
+ return -ENOMEM;
+ }
+
+ q->prod = 0;
+ q->cons = 0;
+ q->dbell = 0;
+
+ return 0;
+}
+
+void ionic_queue_destroy(struct ionic_queue *q, struct device *dma_dev)
+{
+ dma_free_coherent(dma_dev, q->size, q->ptr, q->dma);
+}
diff --git a/drivers/infiniband/hw/ionic/ionic_queue.h b/drivers/infiniband/hw/ionic/ionic_queue.h
new file mode 100644
index 000000000000..d18020d4cad5
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_queue.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_QUEUE_H_
+#define _IONIC_QUEUE_H_
+
+#include <linux/io.h>
+#include <ionic_regs.h>
+
+#define IONIC_MAX_DEPTH 0xffff
+#define IONIC_MAX_CQ_DEPTH 0xffff
+#define IONIC_CQ_RING_ARM IONIC_DBELL_RING_1
+#define IONIC_CQ_RING_SOL IONIC_DBELL_RING_2
+
+/**
+ * struct ionic_queue - Ring buffer used between device and driver
+ * @size: Size of the buffer, in bytes
+ * @dma: Dma address of the buffer
+ * @ptr: Buffer virtual address
+ * @prod: Driver position in the queue
+ * @cons: Device position in the queue
+ * @mask: Capacity of the queue, subtracting the hole
+ * This value is equal to ((1 << depth_log2) - 1)
+ * @depth_log2: Log base two size depth of the queue
+ * @stride_log2: Log base two size of an element in the queue
+ * @dbell: Doorbell identifying bits
+ */
+struct ionic_queue {
+ size_t size;
+ dma_addr_t dma;
+ void *ptr;
+ u16 prod;
+ u16 cons;
+ u16 mask;
+ u8 depth_log2;
+ u8 stride_log2;
+ u64 dbell;
+};
+
+/**
+ * ionic_queue_init() - Initialize user space queue
+ * @q: Uninitialized queue structure
+ * @dma_dev: DMA device for mapping
+ * @depth: Depth of the queue
+ * @stride: Size of each element of the queue
+ *
+ * Return: status code
+ */
+int ionic_queue_init(struct ionic_queue *q, struct device *dma_dev,
+ int depth, size_t stride);
+
+/**
+ * ionic_queue_destroy() - Destroy user space queue
+ * @q: Queue structure
+ * @dma_dev: DMA device for mapping
+ *
+ * Return: status code
+ */
+void ionic_queue_destroy(struct ionic_queue *q, struct device *dma_dev);
+
+/**
+ * ionic_queue_empty() - Test if queue is empty
+ * @q: Queue structure
+ *
+ * This is only valid for to-device queues.
+ *
+ * Return: is empty
+ */
+static inline bool ionic_queue_empty(struct ionic_queue *q)
+{
+ return q->prod == q->cons;
+}
+
+/**
+ * ionic_queue_length() - Get the current length of the queue
+ * @q: Queue structure
+ *
+ * This is only valid for to-device queues.
+ *
+ * Return: length
+ */
+static inline u16 ionic_queue_length(struct ionic_queue *q)
+{
+ return (q->prod - q->cons) & q->mask;
+}
+
+/**
+ * ionic_queue_length_remaining() - Get the remaining length of the queue
+ * @q: Queue structure
+ *
+ * This is only valid for to-device queues.
+ *
+ * Return: length remaining
+ */
+static inline u16 ionic_queue_length_remaining(struct ionic_queue *q)
+{
+ return q->mask - ionic_queue_length(q);
+}
+
+/**
+ * ionic_queue_full() - Test if queue is full
+ * @q: Queue structure
+ *
+ * This is only valid for to-device queues.
+ *
+ * Return: is full
+ */
+static inline bool ionic_queue_full(struct ionic_queue *q)
+{
+ return q->mask == ionic_queue_length(q);
+}
+
+/**
+ * ionic_color_wrap() - Flip the color if prod is wrapped
+ * @prod: Queue index just after advancing
+ * @color: Queue color just prior to advancing the index
+ *
+ * Return: color after advancing the index
+ */
+static inline bool ionic_color_wrap(u16 prod, bool color)
+{
+ /* logical xor color with (prod == 0) */
+ return color != (prod == 0);
+}
+
+/**
+ * ionic_queue_at() - Get the element at the given index
+ * @q: Queue structure
+ * @idx: Index in the queue
+ *
+ * The index must be within the bounds of the queue. It is not checked here.
+ *
+ * Return: pointer to element at index
+ */
+static inline void *ionic_queue_at(struct ionic_queue *q, u16 idx)
+{
+ return q->ptr + ((unsigned long)idx << q->stride_log2);
+}
+
+/**
+ * ionic_queue_at_prod() - Get the element at the producer index
+ * @q: Queue structure
+ *
+ * Return: pointer to element at producer index
+ */
+static inline void *ionic_queue_at_prod(struct ionic_queue *q)
+{
+ return ionic_queue_at(q, q->prod);
+}
+
+/**
+ * ionic_queue_at_cons() - Get the element at the consumer index
+ * @q: Queue structure
+ *
+ * Return: pointer to element at consumer index
+ */
+static inline void *ionic_queue_at_cons(struct ionic_queue *q)
+{
+ return ionic_queue_at(q, q->cons);
+}
+
+/**
+ * ionic_queue_next() - Compute the next index
+ * @q: Queue structure
+ * @idx: Index
+ *
+ * Return: next index after idx
+ */
+static inline u16 ionic_queue_next(struct ionic_queue *q, u16 idx)
+{
+ return (idx + 1) & q->mask;
+}
+
+/**
+ * ionic_queue_produce() - Increase the producer index
+ * @q: Queue structure
+ *
+ * Caller must ensure that the queue is not full. It is not checked here.
+ */
+static inline void ionic_queue_produce(struct ionic_queue *q)
+{
+ q->prod = ionic_queue_next(q, q->prod);
+}
+
+/**
+ * ionic_queue_consume() - Increase the consumer index
+ * @q: Queue structure
+ *
+ * Caller must ensure that the queue is not empty. It is not checked here.
+ *
+ * This is only valid for to-device queues.
+ */
+static inline void ionic_queue_consume(struct ionic_queue *q)
+{
+ q->cons = ionic_queue_next(q, q->cons);
+}
+
+/**
+ * ionic_queue_consume_entries() - Increase the consumer index by entries
+ * @q: Queue structure
+ * @entries: Number of entries to increment
+ *
+ * Caller must ensure that the queue is not empty. It is not checked here.
+ *
+ * This is only valid for to-device queues.
+ */
+static inline void ionic_queue_consume_entries(struct ionic_queue *q,
+ u16 entries)
+{
+ q->cons = (q->cons + entries) & q->mask;
+}
+
+/**
+ * ionic_queue_dbell_init() - Initialize doorbell bits for queue id
+ * @q: Queue structure
+ * @qid: Queue identifying number
+ */
+static inline void ionic_queue_dbell_init(struct ionic_queue *q, u32 qid)
+{
+ q->dbell = IONIC_DBELL_QID(qid);
+}
+
+/**
+ * ionic_queue_dbell_val() - Get current doorbell update value
+ * @q: Queue structure
+ *
+ * Return: current doorbell update value
+ */
+static inline u64 ionic_queue_dbell_val(struct ionic_queue *q)
+{
+ return q->dbell | q->prod;
+}
+
+#endif /* _IONIC_QUEUE_H_ */
diff --git a/drivers/infiniband/hw/ionic/ionic_res.h b/drivers/infiniband/hw/ionic/ionic_res.h
new file mode 100644
index 000000000000..46c8c584bd9a
--- /dev/null
+++ b/drivers/infiniband/hw/ionic/ionic_res.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2025, Advanced Micro Devices, Inc. */
+
+#ifndef _IONIC_RES_H_
+#define _IONIC_RES_H_
+
+#include <linux/kernel.h>
+#include <linux/idr.h>
+
+/**
+ * struct ionic_resid_bits - Number allocator based on IDA
+ *
+ * @inuse: IDA handle
+ * @inuse_size: Highest ID limit for IDA
+ */
+struct ionic_resid_bits {
+ struct ida inuse;
+ unsigned int inuse_size;
+};
+
+/**
+ * ionic_resid_init() - Initialize a resid allocator
+ * @resid: Uninitialized resid allocator
+ * @size: Capacity of the allocator
+ *
+ * Return: Zero on success, or negative error number
+ */
+static inline void ionic_resid_init(struct ionic_resid_bits *resid,
+ unsigned int size)
+{
+ resid->inuse_size = size;
+ ida_init(&resid->inuse);
+}
+
+/**
+ * ionic_resid_destroy() - Destroy a resid allocator
+ * @resid: Resid allocator
+ */
+static inline void ionic_resid_destroy(struct ionic_resid_bits *resid)
+{
+ ida_destroy(&resid->inuse);
+}
+
+/**
+ * ionic_resid_get_shared() - Allocate an available shared resource id
+ * @resid: Resid allocator
+ * @min: Smallest valid resource id
+ * @size: One after largest valid resource id
+ *
+ * Return: Resource id, or negative error number
+ */
+static inline int ionic_resid_get_shared(struct ionic_resid_bits *resid,
+ unsigned int min,
+ unsigned int size)
+{
+ return ida_alloc_range(&resid->inuse, min, size - 1, GFP_KERNEL);
+}
+
+/**
+ * ionic_resid_get() - Allocate an available resource id
+ * @resid: Resid allocator
+ *
+ * Return: Resource id, or negative error number
+ */
+static inline int ionic_resid_get(struct ionic_resid_bits *resid)
+{
+ return ionic_resid_get_shared(resid, 0, resid->inuse_size);
+}
+
+/**
+ * ionic_resid_put() - Free a resource id
+ * @resid: Resid allocator
+ * @id: Resource id
+ */
+static inline void ionic_resid_put(struct ionic_resid_bits *resid, int id)
+{
+ ida_free(&resid->inuse, id);
+}
+
+/**
+ * ionic_bitid_to_qid() - Transform a resource bit index into a queue id
+ * @bitid: Bit index
+ * @qgrp_shift: Log2 number of queues per queue group
+ * @half_qid_shift: Log2 of half the total number of queues
+ *
+ * Return: Queue id
+ *
+ * Udma-constrained queues (QPs and CQs) are associated with their udma by
+ * queue group. Even queue groups are associated with udma0, and odd queue
+ * groups with udma1.
+ *
+ * For allocating queue ids, we want to arrange the bits into two halves,
+ * with the even queue groups of udma0 in the lower half of the bitset,
+ * and the odd queue groups of udma1 in the upper half of the bitset.
+ * Then, one or two calls of find_next_zero_bit can examine all the bits
+ * for queues of an entire udma.
+ *
+ * For example, assuming eight queue groups with qgrp qids per group:
+ *
+ * bitid 0*qgrp..1*qgrp-1 : qid 0*qgrp..1*qgrp-1
+ * bitid 1*qgrp..2*qgrp-1 : qid 2*qgrp..3*qgrp-1
+ * bitid 2*qgrp..3*qgrp-1 : qid 4*qgrp..5*qgrp-1
+ * bitid 3*qgrp..4*qgrp-1 : qid 6*qgrp..7*qgrp-1
+ * bitid 4*qgrp..5*qgrp-1 : qid 1*qgrp..2*qgrp-1
+ * bitid 5*qgrp..6*qgrp-1 : qid 3*qgrp..4*qgrp-1
+ * bitid 6*qgrp..7*qgrp-1 : qid 5*qgrp..6*qgrp-1
+ * bitid 7*qgrp..8*qgrp-1 : qid 7*qgrp..8*qgrp-1
+ *
+ * There are three important ranges of bits in the qid. There is the udma
+ * bit "U" at qgrp_shift, which is the least significant bit of the group
+ * index, and determines which udma a queue is associated with.
+ * The bits of lesser significance we can call the idx bits "I", which are
+ * the index of the queue within the group. The bits of greater significance
+ * we can call the grp bits "G", which are other bits of the group index that
+ * do not determine the udma. Those bits are just rearranged in the bit index
+ * in the bitset. A bitid has the udma bit in the most significant place,
+ * then the grp bits, then the idx bits.
+ *
+ * bitid: 00000000000000 U GGG IIIIII
+ * qid: 00000000000000 GGG U IIIIII
+ *
+ * Transforming from bit index to qid, or from qid to bit index, can be
+ * accomplished by rearranging the bits by masking and shifting.
+ */
+static inline u32 ionic_bitid_to_qid(u32 bitid, u8 qgrp_shift,
+ u8 half_qid_shift)
+{
+ u32 udma_bit =
+ (bitid & BIT(half_qid_shift)) >> (half_qid_shift - qgrp_shift);
+ u32 grp_bits = (bitid & GENMASK(half_qid_shift - 1, qgrp_shift)) << 1;
+ u32 idx_bits = bitid & (BIT(qgrp_shift) - 1);
+
+ return grp_bits | udma_bit | idx_bits;
+}
+
+/**
+ * ionic_qid_to_bitid() - Transform a queue id into a resource bit index
+ * @qid: queue index
+ * @qgrp_shift: Log2 number of queues per queue group
+ * @half_qid_shift: Log2 of half the total number of queues
+ *
+ * Return: Resource bit index
+ *
+ * This is the inverse of ionic_bitid_to_qid().
+ */
+static inline u32 ionic_qid_to_bitid(u32 qid, u8 qgrp_shift, u8 half_qid_shift)
+{
+ u32 udma_bit = (qid & BIT(qgrp_shift)) << (half_qid_shift - qgrp_shift);
+ u32 grp_bits = (qid & GENMASK(half_qid_shift, qgrp_shift + 1)) >> 1;
+ u32 idx_bits = qid & (BIT(qgrp_shift) - 1);
+
+ return udma_bit | grp_bits | idx_bits;
+}
+#endif /* _IONIC_RES_H_ */
diff --git a/drivers/infiniband/hw/irdma/Kconfig b/drivers/infiniband/hw/irdma/Kconfig
new file mode 100644
index 000000000000..0bd7e3fca1fb
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/Kconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config INFINIBAND_IRDMA
+ tristate "Intel(R) Ethernet Protocol Driver for RDMA"
+ depends on INET
+ depends on IPV6 || !IPV6
+ depends on PCI
+ depends on IDPF && ICE && I40E
+ select GENERIC_ALLOCATOR
+ select AUXILIARY_BUS
+ select CRC32
+ help
+ This is an Intel(R) Ethernet Protocol Driver for RDMA that
+ supports IPU E2000 (RoCEv2), E810 (iWARP/RoCEv2) and X722 (iWARP)
+ network devices.
diff --git a/drivers/infiniband/hw/irdma/Makefile b/drivers/infiniband/hw/irdma/Makefile
new file mode 100644
index 000000000000..03ceb9e5475f
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/Makefile
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+# Copyright (c) 2019, Intel Corporation.
+
+#
+# Makefile for the Intel(R) Ethernet Connection RDMA Linux Driver
+#
+
+obj-$(CONFIG_INFINIBAND_IRDMA) += irdma.o
+
+irdma-objs := cm.o \
+ ctrl.o \
+ hmc.o \
+ hw.o \
+ i40iw_hw.o \
+ i40iw_if.o \
+ ig3rdma_if.o\
+ icrdma_if.o \
+ icrdma_hw.o \
+ ig3rdma_hw.o\
+ main.o \
+ pble.o \
+ puda.o \
+ trace.o \
+ uda.o \
+ uk.o \
+ utils.o \
+ verbs.o \
+ virtchnl.o \
+ ws.o \
+
+CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
new file mode 100644
index 000000000000..f4f4f92ba63a
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -0,0 +1,4434 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "main.h"
+#include "trace.h"
+
+static void irdma_cm_post_event(struct irdma_cm_event *event);
+static void irdma_disconnect_worker(struct work_struct *work);
+
+/**
+ * irdma_free_sqbuf - put back puda buffer if refcount is 0
+ * @vsi: The VSI structure of the device
+ * @bufp: puda buffer to free
+ */
+void irdma_free_sqbuf(struct irdma_sc_vsi *vsi, void *bufp)
+{
+ struct irdma_puda_buf *buf = bufp;
+ struct irdma_puda_rsrc *ilq = vsi->ilq;
+
+ if (refcount_dec_and_test(&buf->refcount))
+ irdma_puda_ret_bufpool(ilq, buf);
+}
+
+/**
+ * irdma_record_ird_ord - Record IRD/ORD passed in
+ * @cm_node: connection's node
+ * @conn_ird: connection IRD
+ * @conn_ord: connection ORD
+ */
+static void irdma_record_ird_ord(struct irdma_cm_node *cm_node, u32 conn_ird,
+ u32 conn_ord)
+{
+ if (conn_ird > cm_node->dev->hw_attrs.max_hw_ird)
+ conn_ird = cm_node->dev->hw_attrs.max_hw_ird;
+
+ if (conn_ord > cm_node->dev->hw_attrs.max_hw_ord)
+ conn_ord = cm_node->dev->hw_attrs.max_hw_ord;
+ else if (!conn_ord && cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO)
+ conn_ord = 1;
+ cm_node->ird_size = conn_ird;
+ cm_node->ord_size = conn_ord;
+}
+
+/**
+ * irdma_copy_ip_ntohl - copy IP address from network to host
+ * @dst: IP address in host order
+ * @src: IP address in network order (big endian)
+ */
+void irdma_copy_ip_ntohl(u32 *dst, __be32 *src)
+{
+ *dst++ = ntohl(*src++);
+ *dst++ = ntohl(*src++);
+ *dst++ = ntohl(*src++);
+ *dst = ntohl(*src);
+}
+
+/**
+ * irdma_copy_ip_htonl - copy IP address from host to network order
+ * @dst: IP address in network order (big endian)
+ * @src: IP address in host order
+ */
+void irdma_copy_ip_htonl(__be32 *dst, u32 *src)
+{
+ *dst++ = htonl(*src++);
+ *dst++ = htonl(*src++);
+ *dst++ = htonl(*src++);
+ *dst = htonl(*src);
+}
+
+/**
+ * irdma_get_addr_info
+ * @cm_node: contains ip/tcp info
+ * @cm_info: to get a copy of the cm_node ip/tcp info
+ */
+static void irdma_get_addr_info(struct irdma_cm_node *cm_node,
+ struct irdma_cm_info *cm_info)
+{
+ memset(cm_info, 0, sizeof(*cm_info));
+ cm_info->ipv4 = cm_node->ipv4;
+ cm_info->vlan_id = cm_node->vlan_id;
+ memcpy(cm_info->loc_addr, cm_node->loc_addr, sizeof(cm_info->loc_addr));
+ memcpy(cm_info->rem_addr, cm_node->rem_addr, sizeof(cm_info->rem_addr));
+ cm_info->loc_port = cm_node->loc_port;
+ cm_info->rem_port = cm_node->rem_port;
+}
+
+/**
+ * irdma_fill_sockaddr4 - fill in addr info for IPv4 connection
+ * @cm_node: connection's node
+ * @event: upper layer's cm event
+ */
+static inline void irdma_fill_sockaddr4(struct irdma_cm_node *cm_node,
+ struct iw_cm_event *event)
+{
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&event->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&event->remote_addr;
+
+ laddr->sin_family = AF_INET;
+ raddr->sin_family = AF_INET;
+
+ laddr->sin_port = htons(cm_node->loc_port);
+ raddr->sin_port = htons(cm_node->rem_port);
+
+ laddr->sin_addr.s_addr = htonl(cm_node->loc_addr[0]);
+ raddr->sin_addr.s_addr = htonl(cm_node->rem_addr[0]);
+}
+
+/**
+ * irdma_fill_sockaddr6 - fill in addr info for IPv6 connection
+ * @cm_node: connection's node
+ * @event: upper layer's cm event
+ */
+static inline void irdma_fill_sockaddr6(struct irdma_cm_node *cm_node,
+ struct iw_cm_event *event)
+{
+ struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&event->local_addr;
+ struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)&event->remote_addr;
+
+ laddr6->sin6_family = AF_INET6;
+ raddr6->sin6_family = AF_INET6;
+
+ laddr6->sin6_port = htons(cm_node->loc_port);
+ raddr6->sin6_port = htons(cm_node->rem_port);
+
+ irdma_copy_ip_htonl(laddr6->sin6_addr.in6_u.u6_addr32,
+ cm_node->loc_addr);
+ irdma_copy_ip_htonl(raddr6->sin6_addr.in6_u.u6_addr32,
+ cm_node->rem_addr);
+}
+
+/**
+ * irdma_get_cmevent_info - for cm event upcall
+ * @cm_node: connection's node
+ * @cm_id: upper layers cm struct for the event
+ * @event: upper layer's cm event
+ */
+static inline void irdma_get_cmevent_info(struct irdma_cm_node *cm_node,
+ struct iw_cm_id *cm_id,
+ struct iw_cm_event *event)
+{
+ memcpy(&event->local_addr, &cm_id->m_local_addr,
+ sizeof(event->local_addr));
+ memcpy(&event->remote_addr, &cm_id->m_remote_addr,
+ sizeof(event->remote_addr));
+ if (cm_node) {
+ event->private_data = cm_node->pdata_buf;
+ event->private_data_len = (u8)cm_node->pdata.size;
+ event->ird = cm_node->ird_size;
+ event->ord = cm_node->ord_size;
+ }
+}
+
+/**
+ * irdma_send_cm_event - upcall cm's event handler
+ * @cm_node: connection's node
+ * @cm_id: upper layer's cm info struct
+ * @type: Event type to indicate
+ * @status: status for the event type
+ */
+static int irdma_send_cm_event(struct irdma_cm_node *cm_node,
+ struct iw_cm_id *cm_id,
+ enum iw_cm_event_type type, int status)
+{
+ struct iw_cm_event event = {};
+
+ event.event = type;
+ event.status = status;
+ trace_irdma_send_cm_event(cm_node, cm_id, type, status,
+ __builtin_return_address(0));
+
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: cm_node %p cm_id=%p state=%d accel=%d event_type=%d status=%d\n",
+ cm_node, cm_id, cm_node->accelerated, cm_node->state, type,
+ status);
+
+ switch (type) {
+ case IW_CM_EVENT_CONNECT_REQUEST:
+ if (cm_node->ipv4)
+ irdma_fill_sockaddr4(cm_node, &event);
+ else
+ irdma_fill_sockaddr6(cm_node, &event);
+ event.provider_data = cm_node;
+ event.private_data = cm_node->pdata_buf;
+ event.private_data_len = (u8)cm_node->pdata.size;
+ event.ird = cm_node->ird_size;
+ break;
+ case IW_CM_EVENT_CONNECT_REPLY:
+ irdma_get_cmevent_info(cm_node, cm_id, &event);
+ break;
+ case IW_CM_EVENT_ESTABLISHED:
+ event.ird = cm_node->ird_size;
+ event.ord = cm_node->ord_size;
+ break;
+ case IW_CM_EVENT_DISCONNECT:
+ case IW_CM_EVENT_CLOSE:
+ /* Wait if we are in RTS but havent issued the iwcm event upcall */
+ if (!cm_node->accelerated)
+ wait_for_completion(&cm_node->establish_comp);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return cm_id->event_handler(cm_id, &event);
+}
+
+/**
+ * irdma_timer_list_prep - add connection nodes to a list to perform timer tasks
+ * @cm_core: cm's core
+ * @timer_list: a timer list to which cm_node will be selected
+ */
+static void irdma_timer_list_prep(struct irdma_cm_core *cm_core,
+ struct list_head *timer_list)
+{
+ struct irdma_cm_node *cm_node;
+ int bkt;
+
+ hash_for_each_rcu(cm_core->cm_hash_tbl, bkt, cm_node, list) {
+ if ((cm_node->close_entry || cm_node->send_entry) &&
+ refcount_inc_not_zero(&cm_node->refcnt))
+ list_add(&cm_node->timer_entry, timer_list);
+ }
+}
+
+/**
+ * irdma_create_event - create cm event
+ * @cm_node: connection's node
+ * @type: Event type to generate
+ */
+static struct irdma_cm_event *irdma_create_event(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type)
+{
+ struct irdma_cm_event *event;
+
+ if (!cm_node->cm_id)
+ return NULL;
+
+ event = kzalloc(sizeof(*event), GFP_ATOMIC);
+
+ if (!event)
+ return NULL;
+
+ event->type = type;
+ event->cm_node = cm_node;
+ memcpy(event->cm_info.rem_addr, cm_node->rem_addr,
+ sizeof(event->cm_info.rem_addr));
+ memcpy(event->cm_info.loc_addr, cm_node->loc_addr,
+ sizeof(event->cm_info.loc_addr));
+ event->cm_info.rem_port = cm_node->rem_port;
+ event->cm_info.loc_port = cm_node->loc_port;
+ event->cm_info.cm_id = cm_node->cm_id;
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: node=%p event=%p type=%u dst=%pI4 src=%pI4\n", cm_node,
+ event, type, event->cm_info.loc_addr,
+ event->cm_info.rem_addr);
+ trace_irdma_create_event(cm_node, type, __builtin_return_address(0));
+ irdma_cm_post_event(event);
+
+ return event;
+}
+
+/**
+ * irdma_free_retrans_entry - free send entry
+ * @cm_node: connection's node
+ */
+static void irdma_free_retrans_entry(struct irdma_cm_node *cm_node)
+{
+ struct irdma_device *iwdev = cm_node->iwdev;
+ struct irdma_timer_entry *send_entry;
+
+ send_entry = cm_node->send_entry;
+ if (!send_entry)
+ return;
+
+ cm_node->send_entry = NULL;
+ irdma_free_sqbuf(&iwdev->vsi, send_entry->sqbuf);
+ kfree(send_entry);
+ refcount_dec(&cm_node->refcnt);
+}
+
+/**
+ * irdma_cleanup_retrans_entry - free send entry with lock
+ * @cm_node: connection's node
+ */
+static void irdma_cleanup_retrans_entry(struct irdma_cm_node *cm_node)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ irdma_free_retrans_entry(cm_node);
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+}
+
+/**
+ * irdma_form_ah_cm_frame - get a free packet and build frame with address handle
+ * @cm_node: connection's node ionfo to use in frame
+ * @options: pointer to options info
+ * @hdr: pointer mpa header
+ * @pdata: pointer to private data
+ * @flags: indicates FIN or ACK
+ */
+static struct irdma_puda_buf *irdma_form_ah_cm_frame(struct irdma_cm_node *cm_node,
+ struct irdma_kmem_info *options,
+ struct irdma_kmem_info *hdr,
+ struct irdma_mpa_priv_info *pdata,
+ u8 flags)
+{
+ struct irdma_puda_buf *sqbuf;
+ struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
+ u8 *buf;
+ struct tcphdr *tcph;
+ u16 pktsize;
+ u32 opts_len = 0;
+ u32 pd_len = 0;
+ u32 hdr_len = 0;
+
+ if (!cm_node->ah || !cm_node->ah->ah_info.ah_valid) {
+ ibdev_dbg(&cm_node->iwdev->ibdev, "CM: AH invalid\n");
+ return NULL;
+ }
+
+ sqbuf = irdma_puda_get_bufpool(vsi->ilq);
+ if (!sqbuf) {
+ ibdev_dbg(&cm_node->iwdev->ibdev, "CM: SQ buf NULL\n");
+ return NULL;
+ }
+
+ sqbuf->ah_id = cm_node->ah->ah_info.ah_idx;
+ buf = sqbuf->mem.va;
+ if (options)
+ opts_len = (u32)options->size;
+
+ if (hdr)
+ hdr_len = hdr->size;
+
+ if (pdata)
+ pd_len = pdata->size;
+
+ pktsize = sizeof(*tcph) + opts_len + hdr_len + pd_len;
+
+ memset(buf, 0, sizeof(*tcph));
+
+ sqbuf->totallen = pktsize;
+ sqbuf->tcphlen = sizeof(*tcph) + opts_len;
+ sqbuf->scratch = cm_node;
+
+ tcph = (struct tcphdr *)buf;
+ buf += sizeof(*tcph);
+
+ tcph->source = htons(cm_node->loc_port);
+ tcph->dest = htons(cm_node->rem_port);
+ tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
+
+ if (flags & SET_ACK) {
+ cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
+ tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
+ tcph->ack = 1;
+ } else {
+ tcph->ack_seq = 0;
+ }
+
+ if (flags & SET_SYN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->syn = 1;
+ } else {
+ cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len;
+ }
+
+ if (flags & SET_FIN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->fin = 1;
+ }
+
+ if (flags & SET_RST)
+ tcph->rst = 1;
+
+ tcph->doff = (u16)((sizeof(*tcph) + opts_len + 3) >> 2);
+ sqbuf->tcphlen = tcph->doff << 2;
+ tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd);
+ tcph->urg_ptr = 0;
+
+ if (opts_len) {
+ memcpy(buf, options->addr, opts_len);
+ buf += opts_len;
+ }
+
+ if (hdr_len) {
+ memcpy(buf, hdr->addr, hdr_len);
+ buf += hdr_len;
+ }
+
+ if (pdata && pdata->addr)
+ memcpy(buf, pdata->addr, pdata->size);
+
+ refcount_set(&sqbuf->refcount, 1);
+
+ print_hex_dump_debug("ILQ: TRANSMIT ILQ BUFFER", DUMP_PREFIX_OFFSET,
+ 16, 8, sqbuf->mem.va, sqbuf->totallen, false);
+
+ return sqbuf;
+}
+
+/**
+ * irdma_form_uda_cm_frame - get a free packet and build frame full tcpip packet
+ * @cm_node: connection's node ionfo to use in frame
+ * @options: pointer to options info
+ * @hdr: pointer mpa header
+ * @pdata: pointer to private data
+ * @flags: indicates FIN or ACK
+ */
+static struct irdma_puda_buf *irdma_form_uda_cm_frame(struct irdma_cm_node *cm_node,
+ struct irdma_kmem_info *options,
+ struct irdma_kmem_info *hdr,
+ struct irdma_mpa_priv_info *pdata,
+ u8 flags)
+{
+ struct irdma_puda_buf *sqbuf;
+ struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
+ u8 *buf;
+
+ struct tcphdr *tcph;
+ struct iphdr *iph;
+ struct ipv6hdr *ip6h;
+ struct ethhdr *ethh;
+ u16 pktsize;
+ u16 eth_hlen = ETH_HLEN;
+ u32 opts_len = 0;
+ u32 pd_len = 0;
+ u32 hdr_len = 0;
+
+ u16 vtag;
+
+ sqbuf = irdma_puda_get_bufpool(vsi->ilq);
+ if (!sqbuf)
+ return NULL;
+
+ buf = sqbuf->mem.va;
+
+ if (options)
+ opts_len = (u32)options->size;
+
+ if (hdr)
+ hdr_len = hdr->size;
+
+ if (pdata)
+ pd_len = pdata->size;
+
+ if (cm_node->vlan_id < VLAN_N_VID)
+ eth_hlen += 4;
+
+ if (cm_node->ipv4)
+ pktsize = sizeof(*iph) + sizeof(*tcph);
+ else
+ pktsize = sizeof(*ip6h) + sizeof(*tcph);
+ pktsize += opts_len + hdr_len + pd_len;
+
+ memset(buf, 0, eth_hlen + pktsize);
+
+ sqbuf->totallen = pktsize + eth_hlen;
+ sqbuf->maclen = eth_hlen;
+ sqbuf->tcphlen = sizeof(*tcph) + opts_len;
+ sqbuf->scratch = cm_node;
+
+ ethh = (struct ethhdr *)buf;
+ buf += eth_hlen;
+
+ if (cm_node->do_lpb)
+ sqbuf->do_lpb = true;
+
+ if (cm_node->ipv4) {
+ sqbuf->ipv4 = true;
+
+ iph = (struct iphdr *)buf;
+ buf += sizeof(*iph);
+ tcph = (struct tcphdr *)buf;
+ buf += sizeof(*tcph);
+
+ ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
+ ether_addr_copy(ethh->h_source, cm_node->loc_mac);
+ if (cm_node->vlan_id < VLAN_N_VID) {
+ ((struct vlan_ethhdr *)ethh)->h_vlan_proto =
+ htons(ETH_P_8021Q);
+ vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) |
+ cm_node->vlan_id;
+ ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
+
+ ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto =
+ htons(ETH_P_IP);
+ } else {
+ ethh->h_proto = htons(ETH_P_IP);
+ }
+
+ iph->version = IPVERSION;
+ iph->ihl = 5; /* 5 * 4Byte words, IP headr len */
+ iph->tos = cm_node->tos;
+ iph->tot_len = htons(pktsize);
+ iph->id = htons(++cm_node->tcp_cntxt.loc_id);
+
+ iph->frag_off = htons(0x4000);
+ iph->ttl = 0x40;
+ iph->protocol = IPPROTO_TCP;
+ iph->saddr = htonl(cm_node->loc_addr[0]);
+ iph->daddr = htonl(cm_node->rem_addr[0]);
+ } else {
+ sqbuf->ipv4 = false;
+ ip6h = (struct ipv6hdr *)buf;
+ buf += sizeof(*ip6h);
+ tcph = (struct tcphdr *)buf;
+ buf += sizeof(*tcph);
+
+ ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
+ ether_addr_copy(ethh->h_source, cm_node->loc_mac);
+ if (cm_node->vlan_id < VLAN_N_VID) {
+ ((struct vlan_ethhdr *)ethh)->h_vlan_proto =
+ htons(ETH_P_8021Q);
+ vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) |
+ cm_node->vlan_id;
+ ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
+ ((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto =
+ htons(ETH_P_IPV6);
+ } else {
+ ethh->h_proto = htons(ETH_P_IPV6);
+ }
+ ip6h->version = 6;
+ ip6h->priority = cm_node->tos >> 4;
+ ip6h->flow_lbl[0] = cm_node->tos << 4;
+ ip6h->flow_lbl[1] = 0;
+ ip6h->flow_lbl[2] = 0;
+ ip6h->payload_len = htons(pktsize - sizeof(*ip6h));
+ ip6h->nexthdr = 6;
+ ip6h->hop_limit = 128;
+ irdma_copy_ip_htonl(ip6h->saddr.in6_u.u6_addr32,
+ cm_node->loc_addr);
+ irdma_copy_ip_htonl(ip6h->daddr.in6_u.u6_addr32,
+ cm_node->rem_addr);
+ }
+
+ tcph->source = htons(cm_node->loc_port);
+ tcph->dest = htons(cm_node->rem_port);
+ tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
+
+ if (flags & SET_ACK) {
+ cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
+ tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
+ tcph->ack = 1;
+ } else {
+ tcph->ack_seq = 0;
+ }
+
+ if (flags & SET_SYN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->syn = 1;
+ } else {
+ cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len;
+ }
+
+ if (flags & SET_FIN) {
+ cm_node->tcp_cntxt.loc_seq_num++;
+ tcph->fin = 1;
+ }
+
+ if (flags & SET_RST)
+ tcph->rst = 1;
+
+ tcph->doff = (u16)((sizeof(*tcph) + opts_len + 3) >> 2);
+ sqbuf->tcphlen = tcph->doff << 2;
+ tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd);
+ tcph->urg_ptr = 0;
+
+ if (opts_len) {
+ memcpy(buf, options->addr, opts_len);
+ buf += opts_len;
+ }
+
+ if (hdr_len) {
+ memcpy(buf, hdr->addr, hdr_len);
+ buf += hdr_len;
+ }
+
+ if (pdata && pdata->addr)
+ memcpy(buf, pdata->addr, pdata->size);
+
+ refcount_set(&sqbuf->refcount, 1);
+
+ print_hex_dump_debug("ILQ: TRANSMIT ILQ BUFFER", DUMP_PREFIX_OFFSET,
+ 16, 8, sqbuf->mem.va, sqbuf->totallen, false);
+ return sqbuf;
+}
+
+/**
+ * irdma_send_reset - Send RST packet
+ * @cm_node: connection's node
+ */
+int irdma_send_reset(struct irdma_cm_node *cm_node)
+{
+ struct irdma_puda_buf *sqbuf;
+ int flags = SET_RST | SET_ACK;
+
+ trace_irdma_send_reset(cm_node, 0, __builtin_return_address(0));
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL,
+ flags);
+ if (!sqbuf)
+ return -ENOMEM;
+
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: caller: %pS cm_node %p cm_id=%p accel=%d state=%d rem_port=0x%04x, loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4\n",
+ __builtin_return_address(0), cm_node, cm_node->cm_id,
+ cm_node->accelerated, cm_node->state, cm_node->rem_port,
+ cm_node->loc_port, cm_node->rem_addr, cm_node->loc_addr);
+
+ return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 0,
+ 1);
+}
+
+/**
+ * irdma_active_open_err - send event for active side cm error
+ * @cm_node: connection's node
+ * @reset: Flag to send reset or not
+ */
+static void irdma_active_open_err(struct irdma_cm_node *cm_node, bool reset)
+{
+ trace_irdma_active_open_err(cm_node, reset,
+ __builtin_return_address(0));
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->cm_core->stats_connect_errs++;
+ if (reset) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: cm_node=%p state=%d\n", cm_node,
+ cm_node->state);
+ refcount_inc(&cm_node->refcnt);
+ irdma_send_reset(cm_node);
+ }
+
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED);
+}
+
+/**
+ * irdma_passive_open_err - handle passive side cm error
+ * @cm_node: connection's node
+ * @reset: send reset or just free cm_node
+ */
+static void irdma_passive_open_err(struct irdma_cm_node *cm_node, bool reset)
+{
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->cm_core->stats_passive_errs++;
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ ibdev_dbg(&cm_node->iwdev->ibdev, "CM: cm_node=%p state =%d\n",
+ cm_node, cm_node->state);
+ trace_irdma_passive_open_err(cm_node, reset,
+ __builtin_return_address(0));
+ if (reset)
+ irdma_send_reset(cm_node);
+ else
+ irdma_rem_ref_cm_node(cm_node);
+}
+
+/**
+ * irdma_event_connect_error - to create connect error event
+ * @event: cm information for connect event
+ */
+static void irdma_event_connect_error(struct irdma_cm_event *event)
+{
+ struct irdma_qp *iwqp;
+ struct iw_cm_id *cm_id;
+
+ cm_id = event->cm_node->cm_id;
+ if (!cm_id)
+ return;
+
+ iwqp = cm_id->provider_data;
+
+ if (!iwqp || !iwqp->iwdev)
+ return;
+
+ iwqp->cm_id = NULL;
+ cm_id->provider_data = NULL;
+ irdma_send_cm_event(event->cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY,
+ -ECONNRESET);
+ irdma_rem_ref_cm_node(event->cm_node);
+}
+
+/**
+ * irdma_process_options - process options from TCP header
+ * @cm_node: connection's node
+ * @optionsloc: point to start of options
+ * @optionsize: size of all options
+ * @syn_pkt: flag if syn packet
+ */
+static int irdma_process_options(struct irdma_cm_node *cm_node, u8 *optionsloc,
+ u32 optionsize, u32 syn_pkt)
+{
+ u32 tmp;
+ u32 offset = 0;
+ union all_known_options *all_options;
+ char got_mss_option = 0;
+
+ while (offset < optionsize) {
+ all_options = (union all_known_options *)(optionsloc + offset);
+ switch (all_options->base.optionnum) {
+ case OPTION_NUM_EOL:
+ offset = optionsize;
+ break;
+ case OPTION_NUM_NONE:
+ offset += 1;
+ continue;
+ case OPTION_NUM_MSS:
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: MSS Length: %d Offset: %d Size: %d\n",
+ all_options->mss.len, offset, optionsize);
+ got_mss_option = 1;
+ if (all_options->mss.len != 4)
+ return -EINVAL;
+ tmp = ntohs(all_options->mss.mss);
+ if ((cm_node->ipv4 &&
+ (tmp + IRDMA_MTU_TO_MSS_IPV4) < IRDMA_MIN_MTU_IPV4) ||
+ (!cm_node->ipv4 &&
+ (tmp + IRDMA_MTU_TO_MSS_IPV6) < IRDMA_MIN_MTU_IPV6))
+ return -EINVAL;
+ if (tmp < cm_node->tcp_cntxt.mss)
+ cm_node->tcp_cntxt.mss = tmp;
+ break;
+ case OPTION_NUM_WINDOW_SCALE:
+ cm_node->tcp_cntxt.snd_wscale =
+ all_options->windowscale.shiftcount;
+ break;
+ default:
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: Unsupported TCP Option: %x\n",
+ all_options->base.optionnum);
+ break;
+ }
+ offset += all_options->base.len;
+ }
+ if (!got_mss_option && syn_pkt)
+ cm_node->tcp_cntxt.mss = IRDMA_CM_DEFAULT_MSS;
+
+ return 0;
+}
+
+/**
+ * irdma_handle_tcp_options - setup TCP context info after parsing TCP options
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ * @optionsize: size of options rcvd
+ * @passive: active or passive flag
+ */
+static int irdma_handle_tcp_options(struct irdma_cm_node *cm_node,
+ struct tcphdr *tcph, int optionsize,
+ int passive)
+{
+ u8 *optionsloc = (u8 *)&tcph[1];
+ int ret;
+
+ if (optionsize) {
+ ret = irdma_process_options(cm_node, optionsloc, optionsize,
+ (u32)tcph->syn);
+ if (ret) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: Node %p, Sending Reset\n", cm_node);
+ if (passive)
+ irdma_passive_open_err(cm_node, true);
+ else
+ irdma_active_open_err(cm_node, true);
+ return ret;
+ }
+ }
+
+ cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window)
+ << cm_node->tcp_cntxt.snd_wscale;
+
+ if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
+ cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
+
+ return 0;
+}
+
+/**
+ * irdma_build_mpa_v1 - build a MPA V1 frame
+ * @cm_node: connection's node
+ * @start_addr: address where to build frame
+ * @mpa_key: to do read0 or write0
+ */
+static void irdma_build_mpa_v1(struct irdma_cm_node *cm_node, void *start_addr,
+ u8 mpa_key)
+{
+ struct ietf_mpa_v1 *mpa_frame = start_addr;
+
+ switch (mpa_key) {
+ case MPA_KEY_REQUEST:
+ memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
+ break;
+ case MPA_KEY_REPLY:
+ memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
+ break;
+ default:
+ break;
+ }
+ mpa_frame->flags = IETF_MPA_FLAGS_CRC;
+ mpa_frame->rev = cm_node->mpa_frame_rev;
+ mpa_frame->priv_data_len = htons(cm_node->pdata.size);
+}
+
+/**
+ * irdma_build_mpa_v2 - build a MPA V2 frame
+ * @cm_node: connection's node
+ * @start_addr: buffer start address
+ * @mpa_key: to do read0 or write0
+ */
+static void irdma_build_mpa_v2(struct irdma_cm_node *cm_node, void *start_addr,
+ u8 mpa_key)
+{
+ struct ietf_mpa_v2 *mpa_frame = start_addr;
+ struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
+ u16 ctrl_ird, ctrl_ord;
+
+ /* initialize the upper 5 bytes of the frame */
+ irdma_build_mpa_v1(cm_node, start_addr, mpa_key);
+ mpa_frame->flags |= IETF_MPA_V2_FLAG;
+ if (cm_node->iwdev->iw_ooo) {
+ mpa_frame->flags |= IETF_MPA_FLAGS_MARKERS;
+ cm_node->rcv_mark_en = true;
+ }
+ mpa_frame->priv_data_len = cpu_to_be16(be16_to_cpu(mpa_frame->priv_data_len) +
+ IETF_RTR_MSG_SIZE);
+
+ /* initialize RTR msg */
+ if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
+ ctrl_ird = IETF_NO_IRD_ORD;
+ ctrl_ord = IETF_NO_IRD_ORD;
+ } else {
+ ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
+ IETF_NO_IRD_ORD :
+ cm_node->ird_size;
+ ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
+ IETF_NO_IRD_ORD :
+ cm_node->ord_size;
+ }
+ ctrl_ird |= IETF_PEER_TO_PEER;
+
+ switch (mpa_key) {
+ case MPA_KEY_REQUEST:
+ ctrl_ord |= IETF_RDMA0_WRITE;
+ ctrl_ord |= IETF_RDMA0_READ;
+ break;
+ case MPA_KEY_REPLY:
+ switch (cm_node->send_rdma0_op) {
+ case SEND_RDMA_WRITE_ZERO:
+ ctrl_ord |= IETF_RDMA0_WRITE;
+ break;
+ case SEND_RDMA_READ_ZERO:
+ ctrl_ord |= IETF_RDMA0_READ;
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ rtr_msg->ctrl_ird = htons(ctrl_ird);
+ rtr_msg->ctrl_ord = htons(ctrl_ord);
+}
+
+/**
+ * irdma_cm_build_mpa_frame - build mpa frame for mpa version 1 or version 2
+ * @cm_node: connection's node
+ * @mpa: mpa: data buffer
+ * @mpa_key: to do read0 or write0
+ */
+static int irdma_cm_build_mpa_frame(struct irdma_cm_node *cm_node,
+ struct irdma_kmem_info *mpa, u8 mpa_key)
+{
+ int hdr_len = 0;
+
+ switch (cm_node->mpa_frame_rev) {
+ case IETF_MPA_V1:
+ hdr_len = sizeof(struct ietf_mpa_v1);
+ irdma_build_mpa_v1(cm_node, mpa->addr, mpa_key);
+ break;
+ case IETF_MPA_V2:
+ hdr_len = sizeof(struct ietf_mpa_v2);
+ irdma_build_mpa_v2(cm_node, mpa->addr, mpa_key);
+ break;
+ default:
+ break;
+ }
+
+ return hdr_len;
+}
+
+/**
+ * irdma_send_mpa_request - active node send mpa request to passive node
+ * @cm_node: connection's node
+ */
+static int irdma_send_mpa_request(struct irdma_cm_node *cm_node)
+{
+ struct irdma_puda_buf *sqbuf;
+
+ cm_node->mpa_hdr.addr = &cm_node->mpa_v2_frame;
+ cm_node->mpa_hdr.size = irdma_cm_build_mpa_frame(cm_node,
+ &cm_node->mpa_hdr,
+ MPA_KEY_REQUEST);
+ if (!cm_node->mpa_hdr.size) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: mpa size = %d\n", cm_node->mpa_hdr.size);
+ return -EINVAL;
+ }
+
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL,
+ &cm_node->mpa_hdr,
+ &cm_node->pdata, SET_ACK);
+ if (!sqbuf)
+ return -ENOMEM;
+
+ return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1,
+ 0);
+}
+
+/**
+ * irdma_send_mpa_reject -
+ * @cm_node: connection's node
+ * @pdata: reject data for connection
+ * @plen: length of reject data
+ */
+static int irdma_send_mpa_reject(struct irdma_cm_node *cm_node,
+ const void *pdata, u8 plen)
+{
+ struct irdma_puda_buf *sqbuf;
+ struct irdma_mpa_priv_info priv_info;
+
+ cm_node->mpa_hdr.addr = &cm_node->mpa_v2_frame;
+ cm_node->mpa_hdr.size = irdma_cm_build_mpa_frame(cm_node,
+ &cm_node->mpa_hdr,
+ MPA_KEY_REPLY);
+
+ cm_node->mpa_frame.flags |= IETF_MPA_FLAGS_REJECT;
+ priv_info.addr = pdata;
+ priv_info.size = plen;
+
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL,
+ &cm_node->mpa_hdr, &priv_info,
+ SET_ACK | SET_FIN);
+ if (!sqbuf)
+ return -ENOMEM;
+
+ cm_node->state = IRDMA_CM_STATE_FIN_WAIT1;
+
+ return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1,
+ 0);
+}
+
+/**
+ * irdma_negotiate_mpa_v2_ird_ord - negotiate MPAv2 IRD/ORD
+ * @cm_node: connection's node
+ * @buf: Data pointer
+ */
+static int irdma_negotiate_mpa_v2_ird_ord(struct irdma_cm_node *cm_node,
+ u8 *buf)
+{
+ struct ietf_mpa_v2 *mpa_v2_frame;
+ struct ietf_rtr_msg *rtr_msg;
+ u16 ird_size;
+ u16 ord_size;
+ u16 ctrl_ord;
+ u16 ctrl_ird;
+
+ mpa_v2_frame = (struct ietf_mpa_v2 *)buf;
+ rtr_msg = &mpa_v2_frame->rtr_msg;
+
+ /* parse rtr message */
+ ctrl_ord = ntohs(rtr_msg->ctrl_ord);
+ ctrl_ird = ntohs(rtr_msg->ctrl_ird);
+ ird_size = ctrl_ird & IETF_NO_IRD_ORD;
+ ord_size = ctrl_ord & IETF_NO_IRD_ORD;
+
+ if (!(ctrl_ird & IETF_PEER_TO_PEER))
+ return -EOPNOTSUPP;
+
+ if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) {
+ cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
+ goto negotiate_done;
+ }
+
+ if (cm_node->state != IRDMA_CM_STATE_MPAREQ_SENT) {
+ /* responder */
+ if (!ord_size && (ctrl_ord & IETF_RDMA0_READ))
+ cm_node->ird_size = 1;
+ if (cm_node->ord_size > ird_size)
+ cm_node->ord_size = ird_size;
+ } else {
+ /* initiator */
+ if (!ird_size && (ctrl_ord & IETF_RDMA0_READ))
+ /* Remote peer doesn't support RDMA0_READ */
+ return -EOPNOTSUPP;
+
+ if (cm_node->ord_size > ird_size)
+ cm_node->ord_size = ird_size;
+
+ if (cm_node->ird_size < ord_size)
+ /* no resources available */
+ return -EINVAL;
+ }
+
+negotiate_done:
+ if (ctrl_ord & IETF_RDMA0_READ)
+ cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+ else if (ctrl_ord & IETF_RDMA0_WRITE)
+ cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
+ else
+ /* Not supported RDMA0 operation */
+ return -EOPNOTSUPP;
+
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: MPAV2 Negotiated ORD: %d, IRD: %d\n",
+ cm_node->ord_size, cm_node->ird_size);
+ trace_irdma_negotiate_mpa_v2(cm_node);
+ return 0;
+}
+
+/**
+ * irdma_parse_mpa - process an IETF MPA frame
+ * @cm_node: connection's node
+ * @buf: Data pointer
+ * @type: to return accept or reject
+ * @len: Len of mpa buffer
+ */
+static int irdma_parse_mpa(struct irdma_cm_node *cm_node, u8 *buf, u32 *type,
+ u32 len)
+{
+ struct ietf_mpa_v1 *mpa_frame;
+ int mpa_hdr_len, priv_data_len, ret;
+
+ *type = IRDMA_MPA_REQUEST_ACCEPT;
+
+ if (len < sizeof(struct ietf_mpa_v1)) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: ietf buffer small (%x)\n", len);
+ return -EINVAL;
+ }
+
+ mpa_frame = (struct ietf_mpa_v1 *)buf;
+ mpa_hdr_len = sizeof(struct ietf_mpa_v1);
+ priv_data_len = ntohs(mpa_frame->priv_data_len);
+
+ if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: private_data too big %d\n", priv_data_len);
+ return -EOVERFLOW;
+ }
+
+ if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: unsupported mpa rev = %d\n", mpa_frame->rev);
+ return -EINVAL;
+ }
+
+ if (mpa_frame->rev > cm_node->mpa_frame_rev) {
+ ibdev_dbg(&cm_node->iwdev->ibdev, "CM: rev %d\n",
+ mpa_frame->rev);
+ return -EINVAL;
+ }
+
+ cm_node->mpa_frame_rev = mpa_frame->rev;
+ if (cm_node->state != IRDMA_CM_STATE_MPAREQ_SENT) {
+ if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ,
+ IETF_MPA_KEY_SIZE)) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: Unexpected MPA Key received\n");
+ return -EINVAL;
+ }
+ } else {
+ if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP,
+ IETF_MPA_KEY_SIZE)) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: Unexpected MPA Key received\n");
+ return -EINVAL;
+ }
+ }
+
+ if (priv_data_len + mpa_hdr_len > len) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: ietf buffer len(%x + %x != %x)\n",
+ priv_data_len, mpa_hdr_len, len);
+ return -EOVERFLOW;
+ }
+
+ if (len > IRDMA_MAX_CM_BUF) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: ietf buffer large len = %d\n", len);
+ return -EOVERFLOW;
+ }
+
+ switch (mpa_frame->rev) {
+ case IETF_MPA_V2:
+ mpa_hdr_len += IETF_RTR_MSG_SIZE;
+ ret = irdma_negotiate_mpa_v2_ird_ord(cm_node, buf);
+ if (ret)
+ return ret;
+ break;
+ case IETF_MPA_V1:
+ default:
+ break;
+ }
+
+ memcpy(cm_node->pdata_buf, buf + mpa_hdr_len, priv_data_len);
+ cm_node->pdata.size = priv_data_len;
+
+ if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT)
+ *type = IRDMA_MPA_REQUEST_REJECT;
+
+ if (mpa_frame->flags & IETF_MPA_FLAGS_MARKERS)
+ cm_node->snd_mark_en = true;
+
+ return 0;
+}
+
+/**
+ * irdma_schedule_cm_timer
+ * @cm_node: connection's node
+ * @sqbuf: buffer to send
+ * @type: if it is send or close
+ * @send_retrans: if rexmits to be done
+ * @close_when_complete: is cm_node to be removed
+ *
+ * note - cm_node needs to be protected before calling this. Encase in:
+ * irdma_rem_ref_cm_node(cm_core, cm_node);
+ * irdma_schedule_cm_timer(...)
+ * refcount_inc(&cm_node->refcnt);
+ */
+int irdma_schedule_cm_timer(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *sqbuf,
+ enum irdma_timer_type type, int send_retrans,
+ int close_when_complete)
+{
+ struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
+ struct irdma_cm_core *cm_core = cm_node->cm_core;
+ struct irdma_timer_entry *new_send;
+ u32 was_timer_set;
+ unsigned long flags;
+
+ new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
+ if (!new_send) {
+ if (type != IRDMA_TIMER_TYPE_CLOSE)
+ irdma_free_sqbuf(vsi, sqbuf);
+ return -ENOMEM;
+ }
+
+ new_send->retrycount = IRDMA_DEFAULT_RETRYS;
+ new_send->retranscount = IRDMA_DEFAULT_RETRANS;
+ new_send->sqbuf = sqbuf;
+ new_send->timetosend = jiffies;
+ new_send->type = type;
+ new_send->send_retrans = send_retrans;
+ new_send->close_when_complete = close_when_complete;
+
+ if (type == IRDMA_TIMER_TYPE_CLOSE) {
+ new_send->timetosend += (HZ / 10);
+ if (cm_node->close_entry) {
+ kfree(new_send);
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: already close entry\n");
+ return -EINVAL;
+ }
+
+ cm_node->close_entry = new_send;
+ } else { /* type == IRDMA_TIMER_TYPE_SEND */
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ cm_node->send_entry = new_send;
+ refcount_inc(&cm_node->refcnt);
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ new_send->timetosend = jiffies + IRDMA_RETRY_TIMEOUT;
+
+ refcount_inc(&sqbuf->refcount);
+ irdma_puda_send_buf(vsi->ilq, sqbuf);
+ if (!send_retrans) {
+ irdma_cleanup_retrans_entry(cm_node);
+ if (close_when_complete)
+ irdma_rem_ref_cm_node(cm_node);
+ return 0;
+ }
+ }
+
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ was_timer_set = timer_pending(&cm_core->tcp_timer);
+
+ if (!was_timer_set) {
+ cm_core->tcp_timer.expires = new_send->timetosend;
+ add_timer(&cm_core->tcp_timer);
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ return 0;
+}
+
+/**
+ * irdma_retrans_expired - Could not rexmit the packet
+ * @cm_node: connection's node
+ */
+static void irdma_retrans_expired(struct irdma_cm_node *cm_node)
+{
+ enum irdma_cm_node_state state = cm_node->state;
+
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ switch (state) {
+ case IRDMA_CM_STATE_SYN_RCVD:
+ case IRDMA_CM_STATE_CLOSING:
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ case IRDMA_CM_STATE_LAST_ACK:
+ irdma_send_reset(cm_node);
+ break;
+ default:
+ refcount_inc(&cm_node->refcnt);
+ irdma_send_reset(cm_node);
+ irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED);
+ break;
+ }
+}
+
+/**
+ * irdma_handle_close_entry - for handling retry/timeouts
+ * @cm_node: connection's node
+ * @rem_node: flag for remove cm_node
+ */
+static void irdma_handle_close_entry(struct irdma_cm_node *cm_node,
+ u32 rem_node)
+{
+ struct irdma_timer_entry *close_entry = cm_node->close_entry;
+ struct irdma_qp *iwqp;
+ unsigned long flags;
+
+ if (!close_entry)
+ return;
+ iwqp = (struct irdma_qp *)close_entry->sqbuf;
+ if (iwqp) {
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->cm_id) {
+ iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
+ iwqp->hw_iwarp_state = IRDMA_QP_STATE_ERROR;
+ iwqp->last_aeq = IRDMA_AE_RESET_SENT;
+ iwqp->ibqp_state = IB_QPS_ERR;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ irdma_cm_disconn(iwqp);
+ } else {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+ } else if (rem_node) {
+ /* TIME_WAIT state */
+ irdma_rem_ref_cm_node(cm_node);
+ }
+
+ kfree(close_entry);
+ cm_node->close_entry = NULL;
+}
+
+/**
+ * irdma_cm_timer_tick - system's timer expired callback
+ * @t: Pointer to timer_list
+ */
+static void irdma_cm_timer_tick(struct timer_list *t)
+{
+ unsigned long nexttimeout = jiffies + IRDMA_LONG_TIME;
+ struct irdma_cm_node *cm_node;
+ struct irdma_timer_entry *send_entry, *close_entry;
+ struct list_head *list_core_temp;
+ struct list_head *list_node;
+ struct irdma_cm_core *cm_core = timer_container_of(cm_core, t,
+ tcp_timer);
+ struct irdma_sc_vsi *vsi;
+ u32 settimer = 0;
+ unsigned long timetosend;
+ unsigned long flags;
+ struct list_head timer_list;
+
+ INIT_LIST_HEAD(&timer_list);
+
+ rcu_read_lock();
+ irdma_timer_list_prep(cm_core, &timer_list);
+ rcu_read_unlock();
+
+ list_for_each_safe (list_node, list_core_temp, &timer_list) {
+ cm_node = container_of(list_node, struct irdma_cm_node,
+ timer_entry);
+ close_entry = cm_node->close_entry;
+
+ if (close_entry) {
+ if (time_after(close_entry->timetosend, jiffies)) {
+ if (nexttimeout > close_entry->timetosend ||
+ !settimer) {
+ nexttimeout = close_entry->timetosend;
+ settimer = 1;
+ }
+ } else {
+ irdma_handle_close_entry(cm_node, 1);
+ }
+ }
+
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+
+ send_entry = cm_node->send_entry;
+ if (!send_entry)
+ goto done;
+ if (time_after(send_entry->timetosend, jiffies)) {
+ if (cm_node->state != IRDMA_CM_STATE_OFFLOADED) {
+ if (nexttimeout > send_entry->timetosend ||
+ !settimer) {
+ nexttimeout = send_entry->timetosend;
+ settimer = 1;
+ }
+ } else {
+ irdma_free_retrans_entry(cm_node);
+ }
+ goto done;
+ }
+
+ if (cm_node->state == IRDMA_CM_STATE_OFFLOADED ||
+ cm_node->state == IRDMA_CM_STATE_CLOSED) {
+ irdma_free_retrans_entry(cm_node);
+ goto done;
+ }
+
+ if (!send_entry->retranscount || !send_entry->retrycount) {
+ irdma_free_retrans_entry(cm_node);
+
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock,
+ flags);
+ irdma_retrans_expired(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ goto done;
+ }
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+
+ vsi = &cm_node->iwdev->vsi;
+ if (!cm_node->ack_rcvd) {
+ refcount_inc(&send_entry->sqbuf->refcount);
+ irdma_puda_send_buf(vsi->ilq, send_entry->sqbuf);
+ cm_node->cm_core->stats_pkt_retrans++;
+ }
+
+ spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
+ if (send_entry->send_retrans) {
+ send_entry->retranscount--;
+ timetosend = (IRDMA_RETRY_TIMEOUT <<
+ (IRDMA_DEFAULT_RETRANS -
+ send_entry->retranscount));
+
+ send_entry->timetosend = jiffies +
+ min(timetosend, IRDMA_MAX_TIMEOUT);
+ if (nexttimeout > send_entry->timetosend || !settimer) {
+ nexttimeout = send_entry->timetosend;
+ settimer = 1;
+ }
+ } else {
+ int close_when_complete;
+
+ close_when_complete = send_entry->close_when_complete;
+ irdma_free_retrans_entry(cm_node);
+ if (close_when_complete)
+ irdma_rem_ref_cm_node(cm_node);
+ }
+done:
+ spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ irdma_rem_ref_cm_node(cm_node);
+ }
+
+ if (settimer) {
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ if (!timer_pending(&cm_core->tcp_timer)) {
+ cm_core->tcp_timer.expires = nexttimeout;
+ add_timer(&cm_core->tcp_timer);
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ }
+}
+
+/**
+ * irdma_send_syn - send SYN packet
+ * @cm_node: connection's node
+ * @sendack: flag to set ACK bit or not
+ */
+int irdma_send_syn(struct irdma_cm_node *cm_node, u32 sendack)
+{
+ struct irdma_puda_buf *sqbuf;
+ int flags = SET_SYN;
+ char optionsbuf[sizeof(struct option_mss) +
+ sizeof(struct option_windowscale) +
+ sizeof(struct option_base) + TCP_OPTIONS_PADDING];
+ struct irdma_kmem_info opts;
+ int optionssize = 0;
+ /* Sending MSS option */
+ union all_known_options *options;
+
+ opts.addr = optionsbuf;
+ if (!cm_node)
+ return -EINVAL;
+
+ options = (union all_known_options *)&optionsbuf[optionssize];
+ options->mss.optionnum = OPTION_NUM_MSS;
+ options->mss.len = sizeof(struct option_mss);
+ options->mss.mss = htons(cm_node->tcp_cntxt.mss);
+ optionssize += sizeof(struct option_mss);
+
+ options = (union all_known_options *)&optionsbuf[optionssize];
+ options->windowscale.optionnum = OPTION_NUM_WINDOW_SCALE;
+ options->windowscale.len = sizeof(struct option_windowscale);
+ options->windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale;
+ optionssize += sizeof(struct option_windowscale);
+ options = (union all_known_options *)&optionsbuf[optionssize];
+ options->eol = OPTION_NUM_EOL;
+ optionssize += 1;
+
+ if (sendack)
+ flags |= SET_ACK;
+
+ opts.size = optionssize;
+
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, &opts, NULL, NULL,
+ flags);
+ if (!sqbuf)
+ return -ENOMEM;
+
+ return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1,
+ 0);
+}
+
+/**
+ * irdma_send_ack - Send ACK packet
+ * @cm_node: connection's node
+ */
+void irdma_send_ack(struct irdma_cm_node *cm_node)
+{
+ struct irdma_puda_buf *sqbuf;
+ struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
+
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL,
+ SET_ACK);
+ if (sqbuf)
+ irdma_puda_send_buf(vsi->ilq, sqbuf);
+}
+
+/**
+ * irdma_send_fin - Send FIN pkt
+ * @cm_node: connection's node
+ */
+static int irdma_send_fin(struct irdma_cm_node *cm_node)
+{
+ struct irdma_puda_buf *sqbuf;
+
+ sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL,
+ SET_ACK | SET_FIN);
+ if (!sqbuf)
+ return -ENOMEM;
+
+ return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1,
+ 0);
+}
+
+/**
+ * irdma_find_listener - find a cm node listening on this addr-port pair
+ * @cm_core: cm's core
+ * @dst_addr: listener ip addr
+ * @ipv4: flag indicating IPv4 when true
+ * @dst_port: listener tcp port num
+ * @vlan_id: virtual LAN ID
+ * @listener_state: state to match with listen node's
+ */
+static struct irdma_cm_listener *
+irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, bool ipv4,
+ u16 dst_port, u16 vlan_id,
+ enum irdma_cm_listener_state listener_state)
+{
+ struct irdma_cm_listener *listen_node;
+ static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+ u32 listen_addr[4];
+ u16 listen_port;
+ unsigned long flags;
+
+ /* walk list and find cm_node associated with this session ID */
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_for_each_entry (listen_node, &cm_core->listen_list, list) {
+ memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr));
+ listen_port = listen_node->loc_port;
+ if (listen_node->ipv4 != ipv4 || listen_port != dst_port ||
+ !(listener_state & listen_node->listener_state))
+ continue;
+ /* compare node pair, return node handle if a match */
+ if (!memcmp(listen_addr, ip_zero, sizeof(listen_addr)) ||
+ (!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) &&
+ vlan_id == listen_node->vlan_id)) {
+ refcount_inc(&listen_node->refcnt);
+ spin_unlock_irqrestore(&cm_core->listen_list_lock,
+ flags);
+ trace_irdma_find_listener(listen_node);
+ return listen_node;
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+ return NULL;
+}
+
+/**
+ * irdma_del_multiple_qhash - Remove qhash and child listens
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ */
+static int irdma_del_multiple_qhash(struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *cm_parent_listen_node)
+{
+ struct irdma_cm_listener *child_listen_node;
+ struct list_head *pos, *tpos;
+ unsigned long flags;
+ int ret = -EINVAL;
+
+ spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+ list_for_each_safe (pos, tpos,
+ &cm_parent_listen_node->child_listen_list) {
+ child_listen_node = list_entry(pos, struct irdma_cm_listener,
+ child_listen_list);
+ if (child_listen_node->ipv4)
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: removing child listen for IP=%pI4, port=%d, vlan=%d\n",
+ child_listen_node->loc_addr,
+ child_listen_node->loc_port,
+ child_listen_node->vlan_id);
+ else
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: removing child listen for IP=%pI6, port=%d, vlan=%d\n",
+ child_listen_node->loc_addr,
+ child_listen_node->loc_port,
+ child_listen_node->vlan_id);
+ trace_irdma_del_multiple_qhash(child_listen_node);
+ list_del(pos);
+ memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+ sizeof(cm_info->loc_addr));
+ cm_info->vlan_id = child_listen_node->vlan_id;
+ if (child_listen_node->qhash_set) {
+ ret = irdma_manage_qhash(iwdev, cm_info,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_MANAGE_TYPE_DELETE,
+ NULL, false);
+ child_listen_node->qhash_set = false;
+ } else {
+ ret = 0;
+ }
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: Child listen node freed = %p\n",
+ child_listen_node);
+ kfree(child_listen_node);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
+ }
+ spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+
+ return ret;
+}
+
+static u8 irdma_iw_get_vlan_prio(u32 *loc_addr, u8 prio, bool ipv4)
+{
+ struct net_device *ndev = NULL;
+
+ rcu_read_lock();
+ if (ipv4) {
+ ndev = ip_dev_find(&init_net, htonl(loc_addr[0]));
+ } else if (IS_ENABLED(CONFIG_IPV6)) {
+ struct net_device *ip_dev;
+ struct in6_addr laddr6;
+
+ irdma_copy_ip_htonl(laddr6.in6_u.u6_addr32, loc_addr);
+
+ for_each_netdev_rcu (&init_net, ip_dev) {
+ if (ipv6_chk_addr(&init_net, &laddr6, ip_dev, 1)) {
+ ndev = ip_dev;
+ break;
+ }
+ }
+ }
+
+ if (!ndev)
+ goto done;
+ if (is_vlan_dev(ndev))
+ prio = (vlan_dev_get_egress_qos_mask(ndev, prio) & VLAN_PRIO_MASK)
+ >> VLAN_PRIO_SHIFT;
+ if (ipv4)
+ dev_put(ndev);
+
+done:
+ rcu_read_unlock();
+
+ return prio;
+}
+
+/**
+ * irdma_get_vlan_mac_ipv6 - Gets the vlan and mac
+ * @addr: local IPv6 address
+ * @vlan_id: vlan id for the given IPv6 address
+ * @mac: mac address for the given IPv6 address
+ *
+ * Returns the vlan id and mac for an IPv6 address.
+ */
+void irdma_get_vlan_mac_ipv6(u32 *addr, u16 *vlan_id, u8 *mac)
+{
+ struct net_device *ip_dev = NULL;
+ struct in6_addr laddr6;
+
+ if (!IS_ENABLED(CONFIG_IPV6))
+ return;
+
+ irdma_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr);
+ if (vlan_id)
+ *vlan_id = 0xFFFF; /* Match rdma_vlan_dev_vlan_id() */
+ if (mac)
+ eth_zero_addr(mac);
+
+ rcu_read_lock();
+ for_each_netdev_rcu (&init_net, ip_dev) {
+ if (ipv6_chk_addr(&init_net, &laddr6, ip_dev, 1)) {
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
+ if (ip_dev->dev_addr && mac)
+ ether_addr_copy(mac, ip_dev->dev_addr);
+ break;
+ }
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * irdma_get_vlan_ipv4 - Returns the vlan_id for IPv4 address
+ * @addr: local IPv4 address
+ */
+u16 irdma_get_vlan_ipv4(u32 *addr)
+{
+ struct net_device *netdev;
+ u16 vlan_id = 0xFFFF;
+
+ netdev = ip_dev_find(&init_net, htonl(addr[0]));
+ if (netdev) {
+ vlan_id = rdma_vlan_dev_vlan_id(netdev);
+ dev_put(netdev);
+ }
+
+ return vlan_id;
+}
+
+/**
+ * irdma_add_mqh_6 - Adds multiple qhashes for IPv6
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ *
+ * Adds a qhash and a child listen node for every IPv6 address
+ * on the adapter and adds the associated qhash filter
+ */
+static int irdma_add_mqh_6(struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *cm_parent_listen_node)
+{
+ struct net_device *ip_dev;
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifp, *tmp;
+ struct irdma_cm_listener *child_listen_node;
+ unsigned long flags;
+ int ret = 0;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, ip_dev) {
+ if (!(ip_dev->flags & IFF_UP))
+ continue;
+
+ if (((rdma_vlan_dev_vlan_id(ip_dev) >= VLAN_N_VID) ||
+ (rdma_vlan_dev_real_dev(ip_dev) != iwdev->netdev)) &&
+ ip_dev != iwdev->netdev)
+ continue;
+
+ idev = __in6_dev_get(ip_dev);
+ if (!idev) {
+ ibdev_dbg(&iwdev->ibdev, "CM: idev == NULL\n");
+ break;
+ }
+ list_for_each_entry_safe (ifp, tmp, &idev->addr_list, if_list) {
+ ibdev_dbg(&iwdev->ibdev, "CM: IP=%pI6, vlan_id=%d, MAC=%pM\n",
+ &ifp->addr, rdma_vlan_dev_vlan_id(ip_dev),
+ ip_dev->dev_addr);
+ child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_KERNEL);
+ ibdev_dbg(&iwdev->ibdev, "CM: Allocating child listener %p\n",
+ child_listen_node);
+ if (!child_listen_node) {
+ ibdev_dbg(&iwdev->ibdev, "CM: listener memory allocation\n");
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ cm_info->vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
+ cm_parent_listen_node->vlan_id = cm_info->vlan_id;
+ memcpy(child_listen_node, cm_parent_listen_node,
+ sizeof(*child_listen_node));
+ irdma_copy_ip_ntohl(child_listen_node->loc_addr,
+ ifp->addr.in6_u.u6_addr32);
+ memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+ sizeof(cm_info->loc_addr));
+ if (!iwdev->vsi.dscp_mode)
+ cm_info->user_pri =
+ irdma_iw_get_vlan_prio(child_listen_node->loc_addr,
+ cm_info->user_pri,
+ false);
+
+ ret = irdma_manage_qhash(iwdev, cm_info,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_MANAGE_TYPE_ADD,
+ NULL, true);
+ if (ret) {
+ kfree(child_listen_node);
+ continue;
+ }
+
+ trace_irdma_add_mqh_6(iwdev, child_listen_node,
+ ip_dev->dev_addr);
+
+ child_listen_node->qhash_set = true;
+ spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
+ list_add(&child_listen_node->child_listen_list,
+ &cm_parent_listen_node->child_listen_list);
+ spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
+ }
+ }
+exit:
+ rtnl_unlock();
+
+ return ret;
+}
+
+/**
+ * irdma_add_mqh_4 - Adds multiple qhashes for IPv4
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_parent_listen_node: The parent listen node
+ *
+ * Adds a qhash and a child listen node for every IPv4 address
+ * on the adapter and adds the associated qhash filter
+ */
+static int irdma_add_mqh_4(struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *cm_parent_listen_node)
+{
+ struct net_device *ip_dev;
+ struct in_device *idev;
+ struct irdma_cm_listener *child_listen_node;
+ unsigned long flags;
+ const struct in_ifaddr *ifa;
+ int ret = 0;
+
+ rtnl_lock();
+ for_each_netdev(&init_net, ip_dev) {
+ if (!(ip_dev->flags & IFF_UP))
+ continue;
+
+ if (((rdma_vlan_dev_vlan_id(ip_dev) >= VLAN_N_VID) ||
+ (rdma_vlan_dev_real_dev(ip_dev) != iwdev->netdev)) &&
+ ip_dev != iwdev->netdev)
+ continue;
+
+ idev = in_dev_get(ip_dev);
+ if (!idev)
+ continue;
+
+ in_dev_for_each_ifa_rtnl(ifa, idev) {
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n",
+ &ifa->ifa_address, rdma_vlan_dev_vlan_id(ip_dev),
+ ip_dev->dev_addr);
+ child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_KERNEL);
+ cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
+ ibdev_dbg(&iwdev->ibdev, "CM: Allocating child listener %p\n",
+ child_listen_node);
+ if (!child_listen_node) {
+ ibdev_dbg(&iwdev->ibdev, "CM: listener memory allocation\n");
+ in_dev_put(idev);
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ cm_info->vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
+ cm_parent_listen_node->vlan_id = cm_info->vlan_id;
+ memcpy(child_listen_node, cm_parent_listen_node,
+ sizeof(*child_listen_node));
+ child_listen_node->loc_addr[0] =
+ ntohl(ifa->ifa_address);
+ memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
+ sizeof(cm_info->loc_addr));
+ if (!iwdev->vsi.dscp_mode)
+ cm_info->user_pri =
+ irdma_iw_get_vlan_prio(child_listen_node->loc_addr,
+ cm_info->user_pri,
+ true);
+ ret = irdma_manage_qhash(iwdev, cm_info,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_MANAGE_TYPE_ADD,
+ NULL, true);
+ if (ret) {
+ kfree(child_listen_node);
+ cm_parent_listen_node->cm_core
+ ->stats_listen_nodes_created--;
+ continue;
+ }
+
+ trace_irdma_add_mqh_4(iwdev, child_listen_node,
+ ip_dev->dev_addr);
+
+ child_listen_node->qhash_set = true;
+ spin_lock_irqsave(&iwdev->cm_core.listen_list_lock,
+ flags);
+ list_add(&child_listen_node->child_listen_list,
+ &cm_parent_listen_node->child_listen_list);
+ spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
+ }
+ in_dev_put(idev);
+ }
+exit:
+ rtnl_unlock();
+
+ return ret;
+}
+
+/**
+ * irdma_add_mqh - Adds multiple qhashes
+ * @iwdev: iWarp device
+ * @cm_info: CM info for parent listen node
+ * @cm_listen_node: The parent listen node
+ */
+static int irdma_add_mqh(struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *cm_listen_node)
+{
+ if (cm_info->ipv4)
+ return irdma_add_mqh_4(iwdev, cm_info, cm_listen_node);
+ else
+ return irdma_add_mqh_6(iwdev, cm_info, cm_listen_node);
+}
+
+/**
+ * irdma_reset_list_prep - add connection nodes slated for reset to list
+ * @cm_core: cm's core
+ * @listener: pointer to listener node
+ * @reset_list: a list to which cm_node will be selected
+ */
+static void irdma_reset_list_prep(struct irdma_cm_core *cm_core,
+ struct irdma_cm_listener *listener,
+ struct list_head *reset_list)
+{
+ struct irdma_cm_node *cm_node;
+ int bkt;
+
+ hash_for_each_rcu(cm_core->cm_hash_tbl, bkt, cm_node, list) {
+ if (cm_node->listener == listener &&
+ !cm_node->accelerated &&
+ refcount_inc_not_zero(&cm_node->refcnt))
+ list_add(&cm_node->reset_entry, reset_list);
+ }
+}
+
+/**
+ * irdma_dec_refcnt_listen - delete listener and associated cm nodes
+ * @cm_core: cm's core
+ * @listener: pointer to listener node
+ * @free_hanging_nodes: to free associated cm_nodes
+ * @apbvt_del: flag to delete the apbvt
+ */
+static int irdma_dec_refcnt_listen(struct irdma_cm_core *cm_core,
+ struct irdma_cm_listener *listener,
+ int free_hanging_nodes, bool apbvt_del)
+{
+ int err;
+ struct list_head *list_pos;
+ struct list_head *list_temp;
+ struct irdma_cm_node *cm_node;
+ struct list_head reset_list;
+ struct irdma_cm_info nfo;
+ enum irdma_cm_node_state old_state;
+ unsigned long flags;
+
+ trace_irdma_dec_refcnt_listen(listener, __builtin_return_address(0));
+ /* free non-accelerated child nodes for this listener */
+ INIT_LIST_HEAD(&reset_list);
+ if (free_hanging_nodes) {
+ rcu_read_lock();
+ irdma_reset_list_prep(cm_core, listener, &reset_list);
+ rcu_read_unlock();
+ }
+
+ list_for_each_safe (list_pos, list_temp, &reset_list) {
+ cm_node = container_of(list_pos, struct irdma_cm_node,
+ reset_entry);
+ if (cm_node->state >= IRDMA_CM_STATE_FIN_WAIT1) {
+ irdma_rem_ref_cm_node(cm_node);
+ continue;
+ }
+
+ irdma_cleanup_retrans_entry(cm_node);
+ err = irdma_send_reset(cm_node);
+ if (err) {
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: send reset failed\n");
+ } else {
+ old_state = cm_node->state;
+ cm_node->state = IRDMA_CM_STATE_LISTENER_DESTROYED;
+ if (old_state != IRDMA_CM_STATE_MPAREQ_RCVD)
+ irdma_rem_ref_cm_node(cm_node);
+ }
+ }
+
+ if (refcount_dec_and_test(&listener->refcnt)) {
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_del(&listener->list);
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+ if (apbvt_del)
+ irdma_del_apbvt(listener->iwdev,
+ listener->apbvt_entry);
+ memcpy(nfo.loc_addr, listener->loc_addr, sizeof(nfo.loc_addr));
+ nfo.loc_port = listener->loc_port;
+ nfo.ipv4 = listener->ipv4;
+ nfo.vlan_id = listener->vlan_id;
+ nfo.user_pri = listener->user_pri;
+ nfo.qh_qpid = listener->iwdev->vsi.ilq->qp_id;
+
+ if (!list_empty(&listener->child_listen_list)) {
+ irdma_del_multiple_qhash(listener->iwdev, &nfo,
+ listener);
+ } else {
+ if (listener->qhash_set)
+ irdma_manage_qhash(listener->iwdev,
+ &nfo,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_MANAGE_TYPE_DELETE,
+ NULL, false);
+ }
+
+ cm_core->stats_listen_destroyed++;
+ cm_core->stats_listen_nodes_destroyed++;
+ ibdev_dbg(&listener->iwdev->ibdev,
+ "CM: loc_port=0x%04x loc_addr=%pI4 cm_listen_node=%p cm_id=%p qhash_set=%d vlan_id=%d apbvt_del=%d\n",
+ listener->loc_port, listener->loc_addr, listener,
+ listener->cm_id, listener->qhash_set,
+ listener->vlan_id, apbvt_del);
+ kfree(listener);
+ listener = NULL;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * irdma_cm_del_listen - delete a listener
+ * @cm_core: cm's core
+ * @listener: passive connection's listener
+ * @apbvt_del: flag to delete apbvt
+ */
+static int irdma_cm_del_listen(struct irdma_cm_core *cm_core,
+ struct irdma_cm_listener *listener,
+ bool apbvt_del)
+{
+ listener->listener_state = IRDMA_CM_LISTENER_PASSIVE_STATE;
+ listener->cm_id = NULL;
+
+ return irdma_dec_refcnt_listen(cm_core, listener, 1, apbvt_del);
+}
+
+/**
+ * irdma_addr_resolve_neigh - resolve neighbor address
+ * @iwdev: iwarp device structure
+ * @src_ip: local ip address
+ * @dst_ip: remote ip address
+ * @arpindex: if there is an arp entry
+ */
+static int irdma_addr_resolve_neigh(struct irdma_device *iwdev, u32 src_ip,
+ u32 dst_ip, int arpindex)
+{
+ struct rtable *rt;
+ struct neighbour *neigh;
+ int rc = arpindex;
+ __be32 dst_ipaddr = htonl(dst_ip);
+ __be32 src_ipaddr = htonl(src_ip);
+
+ rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0,
+ RT_SCOPE_UNIVERSE);
+ if (IS_ERR(rt)) {
+ ibdev_dbg(&iwdev->ibdev, "CM: ip_route_output fail\n");
+ return -EINVAL;
+ }
+
+ neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr);
+ if (!neigh)
+ goto exit;
+
+ if (neigh->nud_state & NUD_VALID)
+ rc = irdma_add_arp(iwdev->rf, &dst_ip, true, neigh->ha);
+ else
+ neigh_event_send(neigh, NULL);
+ if (neigh)
+ neigh_release(neigh);
+exit:
+ ip_rt_put(rt);
+
+ return rc;
+}
+
+/**
+ * irdma_get_dst_ipv6 - get destination cache entry via ipv6 lookup
+ * @src_addr: local ipv6 sock address
+ * @dst_addr: destination ipv6 sock address
+ */
+static struct dst_entry *irdma_get_dst_ipv6(struct sockaddr_in6 *src_addr,
+ struct sockaddr_in6 *dst_addr)
+{
+ struct dst_entry *dst = NULL;
+
+ if ((IS_ENABLED(CONFIG_IPV6))) {
+ struct flowi6 fl6 = {};
+
+ fl6.daddr = dst_addr->sin6_addr;
+ fl6.saddr = src_addr->sin6_addr;
+ if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+ fl6.flowi6_oif = dst_addr->sin6_scope_id;
+
+ dst = ip6_route_output(&init_net, NULL, &fl6);
+ }
+
+ return dst;
+}
+
+/**
+ * irdma_addr_resolve_neigh_ipv6 - resolve neighbor ipv6 address
+ * @iwdev: iwarp device structure
+ * @src: local ip address
+ * @dest: remote ip address
+ * @arpindex: if there is an arp entry
+ */
+static int irdma_addr_resolve_neigh_ipv6(struct irdma_device *iwdev, u32 *src,
+ u32 *dest, int arpindex)
+{
+ struct neighbour *neigh;
+ int rc = arpindex;
+ struct dst_entry *dst;
+ struct sockaddr_in6 dst_addr = {};
+ struct sockaddr_in6 src_addr = {};
+
+ dst_addr.sin6_family = AF_INET6;
+ irdma_copy_ip_htonl(dst_addr.sin6_addr.in6_u.u6_addr32, dest);
+ src_addr.sin6_family = AF_INET6;
+ irdma_copy_ip_htonl(src_addr.sin6_addr.in6_u.u6_addr32, src);
+ dst = irdma_get_dst_ipv6(&src_addr, &dst_addr);
+ if (!dst || dst->error) {
+ if (dst) {
+ dst_release(dst);
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: ip6_route_output returned dst->error = %d\n",
+ dst->error);
+ }
+ return -EINVAL;
+ }
+
+ neigh = dst_neigh_lookup(dst, dst_addr.sin6_addr.in6_u.u6_addr32);
+ if (!neigh)
+ goto exit;
+
+ ibdev_dbg(&iwdev->ibdev, "CM: dst_neigh_lookup MAC=%pM\n",
+ neigh->ha);
+
+ trace_irdma_addr_resolve(iwdev, neigh->ha);
+
+ if (neigh->nud_state & NUD_VALID)
+ rc = irdma_add_arp(iwdev->rf, dest, false, neigh->ha);
+ else
+ neigh_event_send(neigh, NULL);
+ if (neigh)
+ neigh_release(neigh);
+exit:
+ dst_release(dst);
+
+ return rc;
+}
+
+/**
+ * irdma_find_node - find a cm node that matches the reference cm node
+ * @cm_core: cm's core
+ * @rem_port: remote tcp port num
+ * @rem_addr: remote ip addr
+ * @loc_port: local tcp port num
+ * @loc_addr: local ip addr
+ * @vlan_id: local VLAN ID
+ */
+struct irdma_cm_node *irdma_find_node(struct irdma_cm_core *cm_core,
+ u16 rem_port, u32 *rem_addr, u16 loc_port,
+ u32 *loc_addr, u16 vlan_id)
+{
+ struct irdma_cm_node *cm_node;
+ u32 key = (rem_port << 16) | loc_port;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(cm_core->cm_hash_tbl, cm_node, list, key) {
+ if (cm_node->vlan_id == vlan_id &&
+ cm_node->loc_port == loc_port && cm_node->rem_port == rem_port &&
+ !memcmp(cm_node->loc_addr, loc_addr, sizeof(cm_node->loc_addr)) &&
+ !memcmp(cm_node->rem_addr, rem_addr, sizeof(cm_node->rem_addr))) {
+ if (!refcount_inc_not_zero(&cm_node->refcnt))
+ goto exit;
+ rcu_read_unlock();
+ trace_irdma_find_node(cm_node, 0, NULL);
+ return cm_node;
+ }
+ }
+
+exit:
+ rcu_read_unlock();
+
+ /* no owner node */
+ return NULL;
+}
+
+/**
+ * irdma_add_hte_node - add a cm node to the hash table
+ * @cm_core: cm's core
+ * @cm_node: connection's node
+ */
+static void irdma_add_hte_node(struct irdma_cm_core *cm_core,
+ struct irdma_cm_node *cm_node)
+{
+ unsigned long flags;
+ u32 key = (cm_node->rem_port << 16) | cm_node->loc_port;
+
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ hash_add_rcu(cm_core->cm_hash_tbl, &cm_node->list, key);
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+}
+
+/**
+ * irdma_ipv4_is_lpb - check if loopback
+ * @loc_addr: local addr to compare
+ * @rem_addr: remote address
+ */
+bool irdma_ipv4_is_lpb(u32 loc_addr, u32 rem_addr)
+{
+ return ipv4_is_loopback(htonl(rem_addr)) || (loc_addr == rem_addr);
+}
+
+/**
+ * irdma_ipv6_is_lpb - check if loopback
+ * @loc_addr: local addr to compare
+ * @rem_addr: remote address
+ */
+bool irdma_ipv6_is_lpb(u32 *loc_addr, u32 *rem_addr)
+{
+ struct in6_addr raddr6;
+
+ irdma_copy_ip_htonl(raddr6.in6_u.u6_addr32, rem_addr);
+
+ return !memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6);
+}
+
+/**
+ * irdma_cm_create_ah - create a cm address handle
+ * @cm_node: The connection manager node to create AH for
+ * @wait: Provides option to wait for ah creation or not
+ */
+static int irdma_cm_create_ah(struct irdma_cm_node *cm_node, bool wait)
+{
+ struct irdma_ah_info ah_info = {};
+ struct irdma_device *iwdev = cm_node->iwdev;
+
+ ether_addr_copy(ah_info.mac_addr, iwdev->netdev->dev_addr);
+
+ ah_info.hop_ttl = 0x40;
+ ah_info.tc_tos = cm_node->tos;
+ ah_info.vsi = &iwdev->vsi;
+
+ if (cm_node->ipv4) {
+ ah_info.ipv4_valid = true;
+ ah_info.dest_ip_addr[0] = cm_node->rem_addr[0];
+ ah_info.src_ip_addr[0] = cm_node->loc_addr[0];
+ ah_info.do_lpbk = irdma_ipv4_is_lpb(ah_info.src_ip_addr[0],
+ ah_info.dest_ip_addr[0]);
+ } else {
+ memcpy(ah_info.dest_ip_addr, cm_node->rem_addr,
+ sizeof(ah_info.dest_ip_addr));
+ memcpy(ah_info.src_ip_addr, cm_node->loc_addr,
+ sizeof(ah_info.src_ip_addr));
+ ah_info.do_lpbk = irdma_ipv6_is_lpb(ah_info.src_ip_addr,
+ ah_info.dest_ip_addr);
+ }
+
+ ah_info.vlan_tag = cm_node->vlan_id;
+ if (cm_node->vlan_id < VLAN_N_VID) {
+ ah_info.insert_vlan_tag = 1;
+ ah_info.vlan_tag |= cm_node->user_pri << VLAN_PRIO_SHIFT;
+ }
+
+ ah_info.dst_arpindex =
+ irdma_arp_table(iwdev->rf, ah_info.dest_ip_addr,
+ ah_info.ipv4_valid, NULL, IRDMA_ARP_RESOLVE);
+
+ if (irdma_puda_create_ah(&iwdev->rf->sc_dev, &ah_info, wait,
+ IRDMA_PUDA_RSRC_TYPE_ILQ, cm_node,
+ &cm_node->ah))
+ return -ENOMEM;
+
+ trace_irdma_create_ah(cm_node);
+ return 0;
+}
+
+/**
+ * irdma_cm_free_ah - free a cm address handle
+ * @cm_node: The connection manager node to create AH for
+ */
+static void irdma_cm_free_ah(struct irdma_cm_node *cm_node)
+{
+ struct irdma_device *iwdev = cm_node->iwdev;
+
+ trace_irdma_cm_free_ah(cm_node);
+ irdma_puda_free_ah(&iwdev->rf->sc_dev, cm_node->ah);
+ cm_node->ah = NULL;
+}
+
+/**
+ * irdma_make_cm_node - create a new instance of a cm node
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @cm_info: quad info for connection
+ * @listener: passive connection's listener
+ */
+static struct irdma_cm_node *
+irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_listener *listener)
+{
+ struct irdma_cm_node *cm_node;
+ int oldarpindex;
+ int arpindex;
+ struct net_device *netdev = iwdev->netdev;
+
+ /* create an hte and cm_node for this instance */
+ cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC);
+ if (!cm_node)
+ return NULL;
+
+ /* set our node specific transport info */
+ cm_node->ipv4 = cm_info->ipv4;
+ cm_node->vlan_id = cm_info->vlan_id;
+ if (cm_node->vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode)
+ cm_node->vlan_id = 0;
+ cm_node->tos = cm_info->tos;
+ cm_node->user_pri = cm_info->user_pri;
+ if (listener) {
+ if (listener->tos != cm_info->tos)
+ ibdev_warn(&iwdev->ibdev,
+ "application TOS[%d] and remote client TOS[%d] mismatch\n",
+ listener->tos, cm_info->tos);
+ if (iwdev->vsi.dscp_mode) {
+ cm_node->user_pri = listener->user_pri;
+ } else {
+ cm_node->tos = max(listener->tos, cm_info->tos);
+ cm_node->user_pri = rt_tos2priority(cm_node->tos);
+ cm_node->user_pri =
+ irdma_iw_get_vlan_prio(cm_info->loc_addr,
+ cm_node->user_pri,
+ cm_info->ipv4);
+ }
+ ibdev_dbg(&iwdev->ibdev,
+ "DCB: listener: TOS:[%d] UP:[%d]\n", cm_node->tos,
+ cm_node->user_pri);
+ trace_irdma_listener_tos(iwdev, cm_node->tos,
+ cm_node->user_pri);
+ }
+ memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr));
+ memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr));
+ cm_node->loc_port = cm_info->loc_port;
+ cm_node->rem_port = cm_info->rem_port;
+
+ cm_node->mpa_frame_rev = IRDMA_CM_DEFAULT_MPA_VER;
+ cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+ cm_node->iwdev = iwdev;
+ cm_node->dev = &iwdev->rf->sc_dev;
+
+ cm_node->ird_size = cm_node->dev->hw_attrs.max_hw_ird;
+ cm_node->ord_size = cm_node->dev->hw_attrs.max_hw_ord;
+
+ cm_node->listener = listener;
+ cm_node->cm_id = cm_info->cm_id;
+ ether_addr_copy(cm_node->loc_mac, netdev->dev_addr);
+ spin_lock_init(&cm_node->retrans_list_lock);
+ cm_node->ack_rcvd = false;
+
+ init_completion(&cm_node->establish_comp);
+ refcount_set(&cm_node->refcnt, 1);
+ /* associate our parent CM core */
+ cm_node->cm_core = cm_core;
+ cm_node->tcp_cntxt.loc_id = IRDMA_CM_DEFAULT_LOCAL_ID;
+ cm_node->tcp_cntxt.rcv_wscale = iwdev->rcv_wscale;
+ cm_node->tcp_cntxt.rcv_wnd = iwdev->rcv_wnd >> cm_node->tcp_cntxt.rcv_wscale;
+ if (cm_node->ipv4) {
+ cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr[0]),
+ htonl(cm_node->rem_addr[0]),
+ htons(cm_node->loc_port),
+ htons(cm_node->rem_port));
+ cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - IRDMA_MTU_TO_MSS_IPV4;
+ } else if (IS_ENABLED(CONFIG_IPV6)) {
+ __be32 loc[4] = {
+ htonl(cm_node->loc_addr[0]), htonl(cm_node->loc_addr[1]),
+ htonl(cm_node->loc_addr[2]), htonl(cm_node->loc_addr[3])
+ };
+ __be32 rem[4] = {
+ htonl(cm_node->rem_addr[0]), htonl(cm_node->rem_addr[1]),
+ htonl(cm_node->rem_addr[2]), htonl(cm_node->rem_addr[3])
+ };
+ cm_node->tcp_cntxt.loc_seq_num = secure_tcpv6_seq(loc, rem,
+ htons(cm_node->loc_port),
+ htons(cm_node->rem_port));
+ cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - IRDMA_MTU_TO_MSS_IPV6;
+ }
+
+ if ((cm_node->ipv4 &&
+ irdma_ipv4_is_lpb(cm_node->loc_addr[0], cm_node->rem_addr[0])) ||
+ (!cm_node->ipv4 &&
+ irdma_ipv6_is_lpb(cm_node->loc_addr, cm_node->rem_addr))) {
+ cm_node->do_lpb = true;
+ arpindex = irdma_arp_table(iwdev->rf, cm_node->rem_addr,
+ cm_node->ipv4, NULL,
+ IRDMA_ARP_RESOLVE);
+ } else {
+ oldarpindex = irdma_arp_table(iwdev->rf, cm_node->rem_addr,
+ cm_node->ipv4, NULL,
+ IRDMA_ARP_RESOLVE);
+ if (cm_node->ipv4)
+ arpindex = irdma_addr_resolve_neigh(iwdev,
+ cm_info->loc_addr[0],
+ cm_info->rem_addr[0],
+ oldarpindex);
+ else if (IS_ENABLED(CONFIG_IPV6))
+ arpindex = irdma_addr_resolve_neigh_ipv6(iwdev,
+ cm_info->loc_addr,
+ cm_info->rem_addr,
+ oldarpindex);
+ else
+ arpindex = -EINVAL;
+ }
+
+ if (arpindex < 0)
+ goto err;
+
+ ether_addr_copy(cm_node->rem_mac,
+ iwdev->rf->arp_table[arpindex].mac_addr);
+ irdma_add_hte_node(cm_core, cm_node);
+ cm_core->stats_nodes_created++;
+ return cm_node;
+
+err:
+ kfree(cm_node);
+
+ return NULL;
+}
+
+static void irdma_destroy_connection(struct irdma_cm_node *cm_node)
+{
+ struct irdma_cm_core *cm_core = cm_node->cm_core;
+ struct irdma_qp *iwqp;
+ struct irdma_cm_info nfo;
+
+ /* if the node is destroyed before connection was accelerated */
+ if (!cm_node->accelerated && cm_node->accept_pend) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: node destroyed before established\n");
+ atomic_dec(&cm_node->listener->pend_accepts_cnt);
+ }
+ if (cm_node->close_entry)
+ irdma_handle_close_entry(cm_node, 0);
+ if (cm_node->listener) {
+ irdma_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
+ } else {
+ if (cm_node->apbvt_set) {
+ irdma_del_apbvt(cm_node->iwdev, cm_node->apbvt_entry);
+ cm_node->apbvt_set = 0;
+ }
+ irdma_get_addr_info(cm_node, &nfo);
+ if (cm_node->qhash_set) {
+ nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id;
+ irdma_manage_qhash(cm_node->iwdev, &nfo,
+ IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
+ IRDMA_QHASH_MANAGE_TYPE_DELETE, NULL,
+ false);
+ cm_node->qhash_set = 0;
+ }
+ }
+
+ iwqp = cm_node->iwqp;
+ if (iwqp) {
+ cm_node->cm_id->rem_ref(cm_node->cm_id);
+ cm_node->cm_id = NULL;
+ iwqp->cm_id = NULL;
+ irdma_qp_rem_ref(&iwqp->ibqp);
+ cm_node->iwqp = NULL;
+ } else if (cm_node->qhash_set) {
+ irdma_get_addr_info(cm_node, &nfo);
+ nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id;
+ irdma_manage_qhash(cm_node->iwdev, &nfo,
+ IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
+ IRDMA_QHASH_MANAGE_TYPE_DELETE, NULL, false);
+ cm_node->qhash_set = 0;
+ }
+
+ cm_core->cm_free_ah(cm_node);
+}
+
+/**
+ * irdma_rem_ref_cm_node - destroy an instance of a cm node
+ * @cm_node: connection's node
+ */
+void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node)
+{
+ struct irdma_cm_core *cm_core = cm_node->cm_core;
+ unsigned long flags;
+
+ trace_irdma_rem_ref_cm_node(cm_node, 0, __builtin_return_address(0));
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+
+ if (!refcount_dec_and_test(&cm_node->refcnt)) {
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+ return;
+ }
+ if (cm_node->iwqp) {
+ cm_node->iwqp->cm_node = NULL;
+ cm_node->iwqp->cm_id = NULL;
+ }
+ hash_del_rcu(&cm_node->list);
+ cm_node->cm_core->stats_nodes_destroyed++;
+
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ irdma_destroy_connection(cm_node);
+
+ kfree_rcu(cm_node, rcu_head);
+}
+
+/**
+ * irdma_handle_fin_pkt - FIN packet received
+ * @cm_node: connection's node
+ */
+static void irdma_handle_fin_pkt(struct irdma_cm_node *cm_node)
+{
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_RCVD:
+ case IRDMA_CM_STATE_SYN_SENT:
+ case IRDMA_CM_STATE_ESTABLISHED:
+ case IRDMA_CM_STATE_MPAREJ_RCVD:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_LAST_ACK;
+ irdma_send_fin(cm_node);
+ break;
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED);
+ cm_node->tcp_cntxt.rcv_nxt++;
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ refcount_inc(&cm_node->refcnt);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSING;
+ irdma_send_ack(cm_node);
+ /*
+ * Wait for ACK as this is simultaneous close.
+ * After we receive ACK, do not send anything.
+ * Just rm the node.
+ */
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_TIME_WAIT;
+ irdma_send_ack(cm_node);
+ irdma_schedule_cm_timer(cm_node, NULL, IRDMA_TIMER_TYPE_CLOSE,
+ 1, 0);
+ break;
+ case IRDMA_CM_STATE_TIME_WAIT:
+ cm_node->tcp_cntxt.rcv_nxt++;
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ case IRDMA_CM_STATE_OFFLOADED:
+ default:
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: bad state node state = %d\n", cm_node->state);
+ break;
+ }
+}
+
+/**
+ * irdma_handle_rst_pkt - process received RST packet
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void irdma_handle_rst_pkt(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: caller: %pS cm_node=%p state=%d rem_port=0x%04x loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4\n",
+ __builtin_return_address(0), cm_node, cm_node->state,
+ cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr,
+ cm_node->loc_addr);
+
+ irdma_cleanup_retrans_entry(cm_node);
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_SENT:
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ switch (cm_node->mpa_frame_rev) {
+ case IETF_MPA_V2:
+ /* Drop down to MPA_V1*/
+ cm_node->mpa_frame_rev = IETF_MPA_V1;
+ /* send a syn and goto syn sent state */
+ cm_node->state = IRDMA_CM_STATE_SYN_SENT;
+ if (irdma_send_syn(cm_node, 0))
+ irdma_active_open_err(cm_node, false);
+ break;
+ case IETF_MPA_V1:
+ default:
+ irdma_active_open_err(cm_node, false);
+ break;
+ }
+ break;
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ atomic_inc(&cm_node->passive_state);
+ break;
+ case IRDMA_CM_STATE_ESTABLISHED:
+ case IRDMA_CM_STATE_SYN_RCVD:
+ case IRDMA_CM_STATE_LISTENING:
+ irdma_passive_open_err(cm_node, false);
+ break;
+ case IRDMA_CM_STATE_OFFLOADED:
+ irdma_active_open_err(cm_node, false);
+ break;
+ case IRDMA_CM_STATE_CLOSED:
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ case IRDMA_CM_STATE_LAST_ACK:
+ case IRDMA_CM_STATE_TIME_WAIT:
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * irdma_handle_rcv_mpa - Process a recv'd mpa buffer
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void irdma_handle_rcv_mpa(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ int err;
+ int datasize = rbuf->datalen;
+ u8 *dataloc = rbuf->data;
+
+ enum irdma_cm_event_type type = IRDMA_CM_EVENT_UNKNOWN;
+ u32 res_type;
+
+ err = irdma_parse_mpa(cm_node, dataloc, &res_type, datasize);
+ if (err) {
+ if (cm_node->state == IRDMA_CM_STATE_MPAREQ_SENT)
+ irdma_active_open_err(cm_node, true);
+ else
+ irdma_passive_open_err(cm_node, true);
+ return;
+ }
+
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_ESTABLISHED:
+ if (res_type == IRDMA_MPA_REQUEST_REJECT)
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: state for reject\n");
+ cm_node->state = IRDMA_CM_STATE_MPAREQ_RCVD;
+ type = IRDMA_CM_EVENT_MPA_REQ;
+ irdma_send_ack(cm_node); /* ACK received MPA request */
+ atomic_set(&cm_node->passive_state,
+ IRDMA_PASSIVE_STATE_INDICATED);
+ break;
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ irdma_cleanup_retrans_entry(cm_node);
+ if (res_type == IRDMA_MPA_REQUEST_REJECT) {
+ type = IRDMA_CM_EVENT_MPA_REJECT;
+ cm_node->state = IRDMA_CM_STATE_MPAREJ_RCVD;
+ } else {
+ type = IRDMA_CM_EVENT_CONNECTED;
+ cm_node->state = IRDMA_CM_STATE_OFFLOADED;
+ }
+ irdma_send_ack(cm_node);
+ break;
+ default:
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: wrong cm_node state =%d\n", cm_node->state);
+ break;
+ }
+ irdma_create_event(cm_node, type);
+}
+
+/**
+ * irdma_check_syn - Check for error on received syn ack
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ */
+static int irdma_check_syn(struct irdma_cm_node *cm_node, struct tcphdr *tcph)
+{
+ if (ntohl(tcph->ack_seq) != cm_node->tcp_cntxt.loc_seq_num) {
+ irdma_active_open_err(cm_node, true);
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_check_seq - check seq numbers if OK
+ * @cm_node: connection's node
+ * @tcph: pointer tcp header
+ */
+static int irdma_check_seq(struct irdma_cm_node *cm_node, struct tcphdr *tcph)
+{
+ u32 seq;
+ u32 ack_seq;
+ u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
+ u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
+ u32 rcv_wnd;
+ int err = 0;
+
+ seq = ntohl(tcph->seq);
+ ack_seq = ntohl(tcph->ack_seq);
+ rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
+ if (ack_seq != loc_seq_num ||
+ !between(seq, rcv_nxt, (rcv_nxt + rcv_wnd)))
+ err = -1;
+ if (err)
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: seq number err\n");
+
+ return err;
+}
+
+void irdma_add_conn_est_qh(struct irdma_cm_node *cm_node)
+{
+ struct irdma_cm_info nfo;
+
+ irdma_get_addr_info(cm_node, &nfo);
+ nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id;
+ irdma_manage_qhash(cm_node->iwdev, &nfo,
+ IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
+ IRDMA_QHASH_MANAGE_TYPE_ADD,
+ cm_node, false);
+ cm_node->qhash_set = true;
+}
+
+/**
+ * irdma_handle_syn_pkt - is for Passive node
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void irdma_handle_syn_pkt(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ int err;
+ u32 inc_sequence;
+ int optionsize;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+ inc_sequence = ntohl(tcph->seq);
+
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_SENT:
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ /* Rcvd syn on active open connection */
+ irdma_active_open_err(cm_node, 1);
+ break;
+ case IRDMA_CM_STATE_LISTENING:
+ /* Passive OPEN */
+ if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
+ cm_node->listener->backlog) {
+ cm_node->cm_core->stats_backlog_drops++;
+ irdma_passive_open_err(cm_node, false);
+ break;
+ }
+ err = irdma_handle_tcp_options(cm_node, tcph, optionsize, 1);
+ if (err) {
+ irdma_passive_open_err(cm_node, false);
+ /* drop pkt */
+ break;
+ }
+ err = cm_node->cm_core->cm_create_ah(cm_node, false);
+ if (err) {
+ irdma_passive_open_err(cm_node, false);
+ /* drop pkt */
+ break;
+ }
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+ cm_node->accept_pend = 1;
+ atomic_inc(&cm_node->listener->pend_accepts_cnt);
+
+ cm_node->state = IRDMA_CM_STATE_SYN_RCVD;
+ break;
+ case IRDMA_CM_STATE_CLOSED:
+ irdma_cleanup_retrans_entry(cm_node);
+ refcount_inc(&cm_node->refcnt);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_OFFLOADED:
+ case IRDMA_CM_STATE_ESTABLISHED:
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ case IRDMA_CM_STATE_LAST_ACK:
+ case IRDMA_CM_STATE_CLOSING:
+ case IRDMA_CM_STATE_UNKNOWN:
+ default:
+ break;
+ }
+}
+
+/**
+ * irdma_handle_synack_pkt - Process SYN+ACK packet (active side)
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void irdma_handle_synack_pkt(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ int err;
+ u32 inc_sequence;
+ int optionsize;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+ inc_sequence = ntohl(tcph->seq);
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_SENT:
+ irdma_cleanup_retrans_entry(cm_node);
+ /* active open */
+ if (irdma_check_syn(cm_node, tcph)) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: check syn fail\n");
+ return;
+ }
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ /* setup options */
+ err = irdma_handle_tcp_options(cm_node, tcph, optionsize, 0);
+ if (err) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: cm_node=%p tcp_options failed\n",
+ cm_node);
+ break;
+ }
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
+ irdma_send_ack(cm_node); /* ACK for the syn_ack */
+ err = irdma_send_mpa_request(cm_node);
+ if (err) {
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: cm_node=%p irdma_send_mpa_request failed\n",
+ cm_node);
+ break;
+ }
+ cm_node->state = IRDMA_CM_STATE_MPAREQ_SENT;
+ break;
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ irdma_passive_open_err(cm_node, true);
+ break;
+ case IRDMA_CM_STATE_LISTENING:
+ cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_CLOSED:
+ cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+ irdma_cleanup_retrans_entry(cm_node);
+ refcount_inc(&cm_node->refcnt);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_ESTABLISHED:
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ case IRDMA_CM_STATE_LAST_ACK:
+ case IRDMA_CM_STATE_OFFLOADED:
+ case IRDMA_CM_STATE_CLOSING:
+ case IRDMA_CM_STATE_UNKNOWN:
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ default:
+ break;
+ }
+}
+
+/**
+ * irdma_handle_ack_pkt - process packet with ACK
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static int irdma_handle_ack_pkt(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ u32 inc_sequence;
+ int ret;
+ int optionsize;
+ u32 datasize = rbuf->datalen;
+
+ optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+
+ if (irdma_check_seq(cm_node, tcph))
+ return -EINVAL;
+
+ inc_sequence = ntohl(tcph->seq);
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_RCVD:
+ irdma_cleanup_retrans_entry(cm_node);
+ ret = irdma_handle_tcp_options(cm_node, tcph, optionsize, 1);
+ if (ret)
+ return ret;
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ cm_node->state = IRDMA_CM_STATE_ESTABLISHED;
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ irdma_handle_rcv_mpa(cm_node, rbuf);
+ }
+ break;
+ case IRDMA_CM_STATE_ESTABLISHED:
+ irdma_cleanup_retrans_entry(cm_node);
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ irdma_handle_rcv_mpa(cm_node, rbuf);
+ }
+ break;
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+ if (datasize) {
+ cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
+ cm_node->ack_rcvd = false;
+ irdma_handle_rcv_mpa(cm_node, rbuf);
+ } else {
+ cm_node->ack_rcvd = true;
+ }
+ break;
+ case IRDMA_CM_STATE_LISTENING:
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_CLOSED:
+ irdma_cleanup_retrans_entry(cm_node);
+ refcount_inc(&cm_node->refcnt);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_LAST_ACK:
+ case IRDMA_CM_STATE_CLOSING:
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ irdma_cleanup_retrans_entry(cm_node);
+ cm_node->state = IRDMA_CM_STATE_FIN_WAIT2;
+ break;
+ case IRDMA_CM_STATE_SYN_SENT:
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ case IRDMA_CM_STATE_OFFLOADED:
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ case IRDMA_CM_STATE_UNKNOWN:
+ default:
+ irdma_cleanup_retrans_entry(cm_node);
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_process_pkt - process cm packet
+ * @cm_node: connection's node
+ * @rbuf: receive buffer
+ */
+static void irdma_process_pkt(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *rbuf)
+{
+ enum irdma_tcpip_pkt_type pkt_type = IRDMA_PKT_TYPE_UNKNOWN;
+ struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
+ u32 fin_set = 0;
+ int err;
+
+ if (tcph->rst) {
+ pkt_type = IRDMA_PKT_TYPE_RST;
+ } else if (tcph->syn) {
+ pkt_type = IRDMA_PKT_TYPE_SYN;
+ if (tcph->ack)
+ pkt_type = IRDMA_PKT_TYPE_SYNACK;
+ } else if (tcph->ack) {
+ pkt_type = IRDMA_PKT_TYPE_ACK;
+ }
+ if (tcph->fin)
+ fin_set = 1;
+
+ switch (pkt_type) {
+ case IRDMA_PKT_TYPE_SYN:
+ irdma_handle_syn_pkt(cm_node, rbuf);
+ break;
+ case IRDMA_PKT_TYPE_SYNACK:
+ irdma_handle_synack_pkt(cm_node, rbuf);
+ break;
+ case IRDMA_PKT_TYPE_ACK:
+ err = irdma_handle_ack_pkt(cm_node, rbuf);
+ if (fin_set && !err)
+ irdma_handle_fin_pkt(cm_node);
+ break;
+ case IRDMA_PKT_TYPE_RST:
+ irdma_handle_rst_pkt(cm_node, rbuf);
+ break;
+ default:
+ if (fin_set &&
+ (!irdma_check_seq(cm_node, (struct tcphdr *)rbuf->tcph)))
+ irdma_handle_fin_pkt(cm_node);
+ break;
+ }
+}
+
+/**
+ * irdma_make_listen_node - create a listen node with params
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @cm_info: quad info for connection
+ */
+static struct irdma_cm_listener *
+irdma_make_listen_node(struct irdma_cm_core *cm_core,
+ struct irdma_device *iwdev,
+ struct irdma_cm_info *cm_info)
+{
+ struct irdma_cm_listener *listener;
+ unsigned long flags;
+
+ /* cannot have multiple matching listeners */
+ listener =
+ irdma_find_listener(cm_core, cm_info->loc_addr, cm_info->ipv4,
+ cm_info->loc_port, cm_info->vlan_id,
+ IRDMA_CM_LISTENER_EITHER_STATE);
+ if (listener &&
+ listener->listener_state == IRDMA_CM_LISTENER_ACTIVE_STATE) {
+ refcount_dec(&listener->refcnt);
+ return NULL;
+ }
+
+ if (!listener) {
+ /* create a CM listen node
+ * 1/2 node to compare incoming traffic to
+ */
+ listener = kzalloc(sizeof(*listener), GFP_KERNEL);
+ if (!listener)
+ return NULL;
+ cm_core->stats_listen_nodes_created++;
+ memcpy(listener->loc_addr, cm_info->loc_addr,
+ sizeof(listener->loc_addr));
+ listener->loc_port = cm_info->loc_port;
+
+ INIT_LIST_HEAD(&listener->child_listen_list);
+
+ refcount_set(&listener->refcnt, 1);
+ } else {
+ listener->reused_node = 1;
+ }
+
+ listener->cm_id = cm_info->cm_id;
+ listener->ipv4 = cm_info->ipv4;
+ listener->vlan_id = cm_info->vlan_id;
+ atomic_set(&listener->pend_accepts_cnt, 0);
+ listener->cm_core = cm_core;
+ listener->iwdev = iwdev;
+
+ listener->backlog = cm_info->backlog;
+ listener->listener_state = IRDMA_CM_LISTENER_ACTIVE_STATE;
+
+ if (!listener->reused_node) {
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_add(&listener->list, &cm_core->listen_list);
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+ }
+
+ return listener;
+}
+
+/**
+ * irdma_create_cm_node - make a connection node with params
+ * @cm_core: cm's core
+ * @iwdev: iwarp device structure
+ * @conn_param: connection parameters
+ * @cm_info: quad info for connection
+ * @caller_cm_node: pointer to cm_node structure to return
+ */
+static int irdma_create_cm_node(struct irdma_cm_core *cm_core,
+ struct irdma_device *iwdev,
+ struct iw_cm_conn_param *conn_param,
+ struct irdma_cm_info *cm_info,
+ struct irdma_cm_node **caller_cm_node)
+{
+ struct irdma_cm_node *cm_node;
+ u16 private_data_len = conn_param->private_data_len;
+ const void *private_data = conn_param->private_data;
+
+ /* create a CM connection node */
+ cm_node = irdma_make_cm_node(cm_core, iwdev, cm_info, NULL);
+ if (!cm_node)
+ return -ENOMEM;
+
+ /* set our node side to client (active) side */
+ cm_node->tcp_cntxt.client = 1;
+ cm_node->tcp_cntxt.rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
+
+ irdma_record_ird_ord(cm_node, conn_param->ird, conn_param->ord);
+
+ cm_node->pdata.size = private_data_len;
+ cm_node->pdata.addr = cm_node->pdata_buf;
+
+ memcpy(cm_node->pdata_buf, private_data, private_data_len);
+ *caller_cm_node = cm_node;
+
+ return 0;
+}
+
+/**
+ * irdma_cm_reject - reject and teardown a connection
+ * @cm_node: connection's node
+ * @pdata: ptr to private data for reject
+ * @plen: size of private data
+ */
+static int irdma_cm_reject(struct irdma_cm_node *cm_node, const void *pdata,
+ u8 plen)
+{
+ int ret;
+ int passive_state;
+
+ if (cm_node->tcp_cntxt.client)
+ return 0;
+
+ irdma_cleanup_retrans_entry(cm_node);
+
+ passive_state = atomic_add_return(1, &cm_node->passive_state);
+ if (passive_state == IRDMA_SEND_RESET_EVENT) {
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ irdma_rem_ref_cm_node(cm_node);
+ return 0;
+ }
+
+ if (cm_node->state == IRDMA_CM_STATE_LISTENER_DESTROYED) {
+ irdma_rem_ref_cm_node(cm_node);
+ return 0;
+ }
+
+ ret = irdma_send_mpa_reject(cm_node, pdata, plen);
+ if (!ret)
+ return 0;
+
+ cm_node->state = IRDMA_CM_STATE_CLOSED;
+ if (irdma_send_reset(cm_node))
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: send reset failed\n");
+
+ return ret;
+}
+
+/**
+ * irdma_cm_close - close of cm connection
+ * @cm_node: connection's node
+ */
+static int irdma_cm_close(struct irdma_cm_node *cm_node)
+{
+ switch (cm_node->state) {
+ case IRDMA_CM_STATE_SYN_RCVD:
+ case IRDMA_CM_STATE_SYN_SENT:
+ case IRDMA_CM_STATE_ONE_SIDE_ESTABLISHED:
+ case IRDMA_CM_STATE_ESTABLISHED:
+ case IRDMA_CM_STATE_ACCEPTING:
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ irdma_cleanup_retrans_entry(cm_node);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_CLOSE_WAIT:
+ cm_node->state = IRDMA_CM_STATE_LAST_ACK;
+ irdma_send_fin(cm_node);
+ break;
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ case IRDMA_CM_STATE_LAST_ACK:
+ case IRDMA_CM_STATE_TIME_WAIT:
+ case IRDMA_CM_STATE_CLOSING:
+ return -EINVAL;
+ case IRDMA_CM_STATE_LISTENING:
+ irdma_cleanup_retrans_entry(cm_node);
+ irdma_send_reset(cm_node);
+ break;
+ case IRDMA_CM_STATE_MPAREJ_RCVD:
+ case IRDMA_CM_STATE_UNKNOWN:
+ case IRDMA_CM_STATE_INITED:
+ case IRDMA_CM_STATE_CLOSED:
+ case IRDMA_CM_STATE_LISTENER_DESTROYED:
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ case IRDMA_CM_STATE_OFFLOADED:
+ if (cm_node->send_entry)
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: CM send_entry in OFFLOADED state\n");
+ irdma_rem_ref_cm_node(cm_node);
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_receive_ilq - recv an ETHERNET packet, and process it
+ * through CM
+ * @vsi: VSI structure of dev
+ * @rbuf: receive buffer
+ */
+void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf)
+{
+ struct irdma_cm_node *cm_node;
+ struct irdma_cm_listener *listener;
+ struct iphdr *iph;
+ struct ipv6hdr *ip6h;
+ struct tcphdr *tcph;
+ struct irdma_cm_info cm_info = {};
+ struct irdma_device *iwdev = vsi->back_vsi;
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+ struct vlan_ethhdr *ethh;
+ u16 vtag;
+
+ /* if vlan, then maclen = 18 else 14 */
+ iph = (struct iphdr *)rbuf->iph;
+ print_hex_dump_debug("ILQ: RECEIVE ILQ BUFFER", DUMP_PREFIX_OFFSET,
+ 16, 8, rbuf->mem.va, rbuf->totallen, false);
+ if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ if (rbuf->vlan_valid) {
+ vtag = rbuf->vlan_id;
+ cm_info.user_pri = (vtag & VLAN_PRIO_MASK) >>
+ VLAN_PRIO_SHIFT;
+ cm_info.vlan_id = vtag & VLAN_VID_MASK;
+ } else {
+ cm_info.vlan_id = 0xFFFF;
+ }
+ } else {
+ ethh = rbuf->mem.va;
+
+ if (ethh->h_vlan_proto == htons(ETH_P_8021Q)) {
+ vtag = ntohs(ethh->h_vlan_TCI);
+ cm_info.user_pri = (vtag & VLAN_PRIO_MASK) >>
+ VLAN_PRIO_SHIFT;
+ cm_info.vlan_id = vtag & VLAN_VID_MASK;
+ ibdev_dbg(&cm_core->iwdev->ibdev,
+ "CM: vlan_id=%d\n", cm_info.vlan_id);
+ } else {
+ cm_info.vlan_id = 0xFFFF;
+ }
+ }
+ tcph = (struct tcphdr *)rbuf->tcph;
+
+ if (rbuf->ipv4) {
+ cm_info.loc_addr[0] = ntohl(iph->daddr);
+ cm_info.rem_addr[0] = ntohl(iph->saddr);
+ cm_info.ipv4 = true;
+ cm_info.tos = iph->tos;
+ } else {
+ ip6h = (struct ipv6hdr *)rbuf->iph;
+ irdma_copy_ip_ntohl(cm_info.loc_addr,
+ ip6h->daddr.in6_u.u6_addr32);
+ irdma_copy_ip_ntohl(cm_info.rem_addr,
+ ip6h->saddr.in6_u.u6_addr32);
+ cm_info.ipv4 = false;
+ cm_info.tos = (ip6h->priority << 4) | (ip6h->flow_lbl[0] >> 4);
+ }
+ cm_info.loc_port = ntohs(tcph->dest);
+ cm_info.rem_port = ntohs(tcph->source);
+ cm_node = irdma_find_node(cm_core, cm_info.rem_port, cm_info.rem_addr,
+ cm_info.loc_port, cm_info.loc_addr, cm_info.vlan_id);
+
+ if (!cm_node) {
+ /* Only type of packet accepted are for the
+ * PASSIVE open (syn only)
+ */
+ if (!tcph->syn || tcph->ack)
+ return;
+
+ listener = irdma_find_listener(cm_core,
+ cm_info.loc_addr,
+ cm_info.ipv4,
+ cm_info.loc_port,
+ cm_info.vlan_id,
+ IRDMA_CM_LISTENER_ACTIVE_STATE);
+ if (!listener) {
+ cm_info.cm_id = NULL;
+ ibdev_dbg(&cm_core->iwdev->ibdev,
+ "CM: no listener found\n");
+ return;
+ }
+
+ cm_info.cm_id = listener->cm_id;
+ cm_node = irdma_make_cm_node(cm_core, iwdev, &cm_info,
+ listener);
+ if (!cm_node) {
+ ibdev_dbg(&cm_core->iwdev->ibdev,
+ "CM: allocate node failed\n");
+ refcount_dec(&listener->refcnt);
+ return;
+ }
+
+ if (!tcph->rst && !tcph->fin) {
+ cm_node->state = IRDMA_CM_STATE_LISTENING;
+ } else {
+ irdma_rem_ref_cm_node(cm_node);
+ return;
+ }
+
+ refcount_inc(&cm_node->refcnt);
+ } else if (cm_node->state == IRDMA_CM_STATE_OFFLOADED) {
+ irdma_rem_ref_cm_node(cm_node);
+ return;
+ }
+
+ irdma_process_pkt(cm_node, rbuf);
+ irdma_rem_ref_cm_node(cm_node);
+}
+
+static int irdma_add_qh(struct irdma_cm_node *cm_node, bool active)
+{
+ if (!active)
+ irdma_add_conn_est_qh(cm_node);
+ return 0;
+}
+
+static void irdma_cm_free_ah_nop(struct irdma_cm_node *cm_node)
+{
+}
+
+/**
+ * irdma_setup_cm_core - setup top level instance of a cm core
+ * @iwdev: iwarp device structure
+ * @rdma_ver: HW version
+ */
+int irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver)
+{
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+
+ cm_core->iwdev = iwdev;
+ cm_core->dev = &iwdev->rf->sc_dev;
+
+ /* Handles CM event work items send to Iwarp core */
+ cm_core->event_wq = alloc_ordered_workqueue("iwarp-event-wq", 0);
+ if (!cm_core->event_wq)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&cm_core->listen_list);
+
+ timer_setup(&cm_core->tcp_timer, irdma_cm_timer_tick, 0);
+
+ spin_lock_init(&cm_core->ht_lock);
+ spin_lock_init(&cm_core->listen_list_lock);
+ spin_lock_init(&cm_core->apbvt_lock);
+ switch (rdma_ver) {
+ case IRDMA_GEN_1:
+ cm_core->form_cm_frame = irdma_form_uda_cm_frame;
+ cm_core->cm_create_ah = irdma_add_qh;
+ cm_core->cm_free_ah = irdma_cm_free_ah_nop;
+ break;
+ case IRDMA_GEN_2:
+ default:
+ cm_core->form_cm_frame = irdma_form_ah_cm_frame;
+ cm_core->cm_create_ah = irdma_cm_create_ah;
+ cm_core->cm_free_ah = irdma_cm_free_ah;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_cleanup_cm_core - deallocate a top level instance of a
+ * cm core
+ * @cm_core: cm's core
+ */
+void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core)
+{
+ if (!cm_core)
+ return;
+
+ timer_delete_sync(&cm_core->tcp_timer);
+
+ destroy_workqueue(cm_core->event_wq);
+ cm_core->dev->ws_reset(&cm_core->iwdev->vsi);
+}
+
+/**
+ * irdma_init_tcp_ctx - setup qp context
+ * @cm_node: connection's node
+ * @tcp_info: offload info for tcp
+ * @iwqp: associate qp for the connection
+ */
+static void irdma_init_tcp_ctx(struct irdma_cm_node *cm_node,
+ struct irdma_tcp_offload_info *tcp_info,
+ struct irdma_qp *iwqp)
+{
+ tcp_info->ipv4 = cm_node->ipv4;
+ tcp_info->drop_ooo_seg = !iwqp->iwdev->iw_ooo;
+ tcp_info->wscale = true;
+ tcp_info->ignore_tcp_opt = true;
+ tcp_info->ignore_tcp_uns_opt = true;
+ tcp_info->no_nagle = false;
+
+ tcp_info->ttl = IRDMA_DEFAULT_TTL;
+ tcp_info->rtt_var = IRDMA_DEFAULT_RTT_VAR;
+ tcp_info->ss_thresh = IRDMA_DEFAULT_SS_THRESH;
+ tcp_info->rexmit_thresh = IRDMA_DEFAULT_REXMIT_THRESH;
+
+ tcp_info->tcp_state = IRDMA_TCP_STATE_ESTABLISHED;
+ tcp_info->snd_wscale = cm_node->tcp_cntxt.snd_wscale;
+ tcp_info->rcv_wscale = cm_node->tcp_cntxt.rcv_wscale;
+
+ tcp_info->snd_nxt = cm_node->tcp_cntxt.loc_seq_num;
+ tcp_info->snd_wnd = cm_node->tcp_cntxt.snd_wnd;
+ tcp_info->rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
+ tcp_info->snd_max = cm_node->tcp_cntxt.loc_seq_num;
+
+ tcp_info->snd_una = cm_node->tcp_cntxt.loc_seq_num;
+ tcp_info->cwnd = 2 * cm_node->tcp_cntxt.mss;
+ tcp_info->snd_wl1 = cm_node->tcp_cntxt.rcv_nxt;
+ tcp_info->snd_wl2 = cm_node->tcp_cntxt.loc_seq_num;
+ tcp_info->max_snd_window = cm_node->tcp_cntxt.max_snd_wnd;
+ tcp_info->rcv_wnd = cm_node->tcp_cntxt.rcv_wnd
+ << cm_node->tcp_cntxt.rcv_wscale;
+
+ tcp_info->flow_label = 0;
+ tcp_info->snd_mss = (u32)cm_node->tcp_cntxt.mss;
+ tcp_info->tos = cm_node->tos;
+ if (cm_node->vlan_id < VLAN_N_VID) {
+ tcp_info->insert_vlan_tag = true;
+ tcp_info->vlan_tag = cm_node->vlan_id;
+ tcp_info->vlan_tag |= cm_node->user_pri << VLAN_PRIO_SHIFT;
+ }
+ if (cm_node->ipv4) {
+ tcp_info->src_port = cm_node->loc_port;
+ tcp_info->dst_port = cm_node->rem_port;
+
+ tcp_info->dest_ip_addr[3] = cm_node->rem_addr[0];
+ tcp_info->local_ipaddr[3] = cm_node->loc_addr[0];
+ tcp_info->arp_idx = (u16)irdma_arp_table(iwqp->iwdev->rf,
+ &tcp_info->dest_ip_addr[3],
+ true, NULL,
+ IRDMA_ARP_RESOLVE);
+ } else {
+ tcp_info->src_port = cm_node->loc_port;
+ tcp_info->dst_port = cm_node->rem_port;
+ memcpy(tcp_info->dest_ip_addr, cm_node->rem_addr,
+ sizeof(tcp_info->dest_ip_addr));
+ memcpy(tcp_info->local_ipaddr, cm_node->loc_addr,
+ sizeof(tcp_info->local_ipaddr));
+
+ tcp_info->arp_idx = (u16)irdma_arp_table(iwqp->iwdev->rf,
+ &tcp_info->dest_ip_addr[0],
+ false, NULL,
+ IRDMA_ARP_RESOLVE);
+ }
+}
+
+/**
+ * irdma_cm_init_tsa_conn - setup qp for RTS
+ * @iwqp: associate qp for the connection
+ * @cm_node: connection's node
+ */
+static void irdma_cm_init_tsa_conn(struct irdma_qp *iwqp,
+ struct irdma_cm_node *cm_node)
+{
+ struct irdma_iwarp_offload_info *iwarp_info;
+ struct irdma_qp_host_ctx_info *ctx_info;
+
+ iwarp_info = &iwqp->iwarp_info;
+ ctx_info = &iwqp->ctx_info;
+
+ ctx_info->tcp_info = &iwqp->tcp_info;
+ ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+ ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+
+ iwarp_info->ord_size = cm_node->ord_size;
+ iwarp_info->ird_size = cm_node->ird_size;
+ iwarp_info->rd_en = true;
+ iwarp_info->rdmap_ver = 1;
+ iwarp_info->ddp_ver = 1;
+ iwarp_info->pd_id = iwqp->iwpd->sc_pd.pd_id;
+
+ ctx_info->tcp_info_valid = true;
+ ctx_info->iwarp_info_valid = true;
+ ctx_info->user_pri = cm_node->user_pri;
+
+ irdma_init_tcp_ctx(cm_node, &iwqp->tcp_info, iwqp);
+ if (cm_node->snd_mark_en) {
+ iwarp_info->snd_mark_en = true;
+ iwarp_info->snd_mark_offset = (iwqp->tcp_info.snd_nxt & SNDMARKER_SEQNMASK) +
+ cm_node->lsmm_size;
+ }
+
+ cm_node->state = IRDMA_CM_STATE_OFFLOADED;
+ iwqp->tcp_info.tcp_state = IRDMA_TCP_STATE_ESTABLISHED;
+ iwqp->tcp_info.src_mac_addr_idx = iwqp->iwdev->mac_ip_table_idx;
+
+ if (cm_node->rcv_mark_en) {
+ iwarp_info->rcv_mark_en = true;
+ iwarp_info->align_hdrs = true;
+ }
+
+ irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info);
+
+ /* once tcp_info is set, no need to do it again */
+ ctx_info->tcp_info_valid = false;
+ ctx_info->iwarp_info_valid = false;
+}
+
+/**
+ * irdma_cm_disconn - when a connection is being closed
+ * @iwqp: associated qp for the connection
+ */
+void irdma_cm_disconn(struct irdma_qp *iwqp)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct disconn_work *work;
+ unsigned long flags;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ spin_lock_irqsave(&iwdev->rf->qptable_lock, flags);
+ if (!iwdev->rf->qp_table[iwqp->ibqp.qp_num]) {
+ spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: qp_id %d is already freed\n",
+ iwqp->ibqp.qp_num);
+ kfree(work);
+ return;
+ }
+ irdma_qp_add_ref(&iwqp->ibqp);
+ spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
+
+ work->iwqp = iwqp;
+ INIT_WORK(&work->work, irdma_disconnect_worker);
+ queue_work(iwdev->cleanup_wq, &work->work);
+}
+
+/**
+ * irdma_qp_disconnect - free qp and close cm
+ * @iwqp: associate qp for the connection
+ */
+static void irdma_qp_disconnect(struct irdma_qp *iwqp)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+
+ iwqp->active_conn = 0;
+ /* close the CM node down if it is still active */
+ ibdev_dbg(&iwdev->ibdev, "CM: Call close API\n");
+ irdma_cm_close(iwqp->cm_node);
+}
+
+/**
+ * irdma_cm_disconn_true - called by worker thread to disconnect qp
+ * @iwqp: associate qp for the connection
+ */
+static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
+{
+ struct iw_cm_id *cm_id;
+ struct irdma_device *iwdev;
+ struct irdma_sc_qp *qp = &iwqp->sc_qp;
+ u16 last_ae;
+ u8 original_hw_tcp_state;
+ u8 original_ibqp_state;
+ int disconn_status = 0;
+ int issue_disconn = 0;
+ int issue_close = 0;
+ int issue_flush = 0;
+ unsigned long flags;
+ int err;
+
+ iwdev = iwqp->iwdev;
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
+ struct ib_qp_attr attr;
+
+ if (iwqp->flush_issued || iwqp->sc_qp.qp_uk.destroy_pending) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ return;
+ }
+
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ attr.qp_state = IB_QPS_ERR;
+ irdma_modify_qp_roce(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ irdma_ib_qp_event(iwqp, qp->event_type);
+ return;
+ }
+
+ cm_id = iwqp->cm_id;
+ original_hw_tcp_state = iwqp->hw_tcp_state;
+ original_ibqp_state = iwqp->ibqp_state;
+ last_ae = iwqp->last_aeq;
+
+ if (qp->term_flags) {
+ issue_disconn = 1;
+ issue_close = 1;
+ iwqp->cm_id = NULL;
+ irdma_terminate_del_timer(qp);
+ if (!iwqp->flush_issued) {
+ iwqp->flush_issued = 1;
+ issue_flush = 1;
+ }
+ } else if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSE_WAIT) ||
+ ((original_ibqp_state == IB_QPS_RTS) &&
+ (last_ae == IRDMA_AE_LLP_CONNECTION_RESET))) {
+ issue_disconn = 1;
+ if (last_ae == IRDMA_AE_LLP_CONNECTION_RESET)
+ disconn_status = -ECONNRESET;
+ }
+
+ if (original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
+ original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
+ last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
+ last_ae == IRDMA_AE_BAD_CLOSE ||
+ last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset || !cm_id) {
+ issue_close = 1;
+ iwqp->cm_id = NULL;
+ qp->term_flags = 0;
+ if (!iwqp->flush_issued) {
+ iwqp->flush_issued = 1;
+ issue_flush = 1;
+ }
+ }
+
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (issue_flush && !iwqp->sc_qp.qp_uk.destroy_pending) {
+ irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_FLUSH_RQ |
+ IRDMA_FLUSH_WAIT);
+
+ if (qp->term_flags)
+ irdma_ib_qp_event(iwqp, qp->event_type);
+ }
+
+ if (!cm_id || !cm_id->event_handler)
+ return;
+
+ spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags);
+ if (!iwqp->cm_node) {
+ spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+ return;
+ }
+ refcount_inc(&iwqp->cm_node->refcnt);
+
+ spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+
+ if (issue_disconn) {
+ err = irdma_send_cm_event(iwqp->cm_node, cm_id,
+ IW_CM_EVENT_DISCONNECT,
+ disconn_status);
+ if (err)
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: disconnect event failed: - cm_id = %p\n",
+ cm_id);
+ }
+ if (issue_close) {
+ cm_id->provider_data = iwqp;
+ err = irdma_send_cm_event(iwqp->cm_node, cm_id,
+ IW_CM_EVENT_CLOSE, 0);
+ if (err)
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: close event failed: - cm_id = %p\n",
+ cm_id);
+ irdma_qp_disconnect(iwqp);
+ }
+ irdma_rem_ref_cm_node(iwqp->cm_node);
+}
+
+/**
+ * irdma_disconnect_worker - worker for connection close
+ * @work: points or disconn structure
+ */
+static void irdma_disconnect_worker(struct work_struct *work)
+{
+ struct disconn_work *dwork = container_of(work, struct disconn_work, work);
+ struct irdma_qp *iwqp = dwork->iwqp;
+
+ kfree(dwork);
+ irdma_cm_disconn_true(iwqp);
+ irdma_qp_rem_ref(&iwqp->ibqp);
+}
+
+/**
+ * irdma_free_lsmm_rsrc - free lsmm memory and deregister
+ * @iwqp: associate qp for the connection
+ */
+void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp)
+{
+ struct irdma_device *iwdev;
+
+ iwdev = iwqp->iwdev;
+
+ if (iwqp->ietf_mem.va) {
+ if (iwqp->lsmm_mr)
+ iwdev->ibdev.ops.dereg_mr(iwqp->lsmm_mr, NULL);
+ dma_free_coherent(iwdev->rf->sc_dev.hw->device,
+ iwqp->ietf_mem.size, iwqp->ietf_mem.va,
+ iwqp->ietf_mem.pa);
+ iwqp->ietf_mem.va = NULL;
+ }
+}
+
+/**
+ * irdma_accept - registered call for connection to be accepted
+ * @cm_id: cm information for passive connection
+ * @conn_param: accept parameters
+ */
+int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ struct ib_qp *ibqp;
+ struct irdma_qp *iwqp;
+ struct irdma_device *iwdev;
+ struct irdma_sc_dev *dev;
+ struct irdma_cm_node *cm_node;
+ struct ib_qp_attr attr = {};
+ int passive_state;
+ struct ib_mr *ibmr;
+ struct irdma_pd *iwpd;
+ u16 buf_len = 0;
+ struct irdma_kmem_info accept;
+ u64 tagged_offset;
+ int wait_ret;
+ int ret = 0;
+
+ ibqp = irdma_get_qp(cm_id->device, conn_param->qpn);
+ if (!ibqp)
+ return -EINVAL;
+
+ iwqp = to_iwqp(ibqp);
+ iwdev = iwqp->iwdev;
+ dev = &iwdev->rf->sc_dev;
+ cm_node = cm_id->provider_data;
+
+ if (((struct sockaddr_in *)&cm_id->local_addr)->sin_family == AF_INET) {
+ cm_node->ipv4 = true;
+ cm_node->vlan_id = irdma_get_vlan_ipv4(cm_node->loc_addr);
+ } else {
+ cm_node->ipv4 = false;
+ irdma_get_vlan_mac_ipv6(cm_node->loc_addr, &cm_node->vlan_id,
+ NULL);
+ }
+ ibdev_dbg(&iwdev->ibdev, "CM: Accept vlan_id=%d\n",
+ cm_node->vlan_id);
+
+ trace_irdma_accept(cm_node, 0, NULL);
+
+ if (cm_node->state == IRDMA_CM_STATE_LISTENER_DESTROYED) {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ passive_state = atomic_add_return(1, &cm_node->passive_state);
+ if (passive_state == IRDMA_SEND_RESET_EVENT) {
+ ret = -ECONNRESET;
+ goto error;
+ }
+
+ buf_len = conn_param->private_data_len + IRDMA_MAX_IETF_SIZE;
+ iwqp->ietf_mem.size = ALIGN(buf_len, 1);
+ iwqp->ietf_mem.va = dma_alloc_coherent(dev->hw->device,
+ iwqp->ietf_mem.size,
+ &iwqp->ietf_mem.pa, GFP_KERNEL);
+ if (!iwqp->ietf_mem.va) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ cm_node->pdata.size = conn_param->private_data_len;
+ accept.addr = iwqp->ietf_mem.va;
+ accept.size = irdma_cm_build_mpa_frame(cm_node, &accept, MPA_KEY_REPLY);
+ memcpy((u8 *)accept.addr + accept.size, conn_param->private_data,
+ conn_param->private_data_len);
+
+ if (cm_node->dev->ws_add(iwqp->sc_qp.vsi, cm_node->user_pri)) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ iwqp->sc_qp.user_pri = cm_node->user_pri;
+ irdma_qp_add_qos(&iwqp->sc_qp);
+ /* setup our first outgoing iWarp send WQE (the IETF frame response) */
+ iwpd = iwqp->iwpd;
+ tagged_offset = (uintptr_t)iwqp->ietf_mem.va;
+ ibmr = irdma_reg_phys_mr(&iwpd->ibpd, iwqp->ietf_mem.pa, buf_len,
+ IB_ACCESS_LOCAL_WRITE, &tagged_offset, false);
+ if (IS_ERR(ibmr)) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ ibmr->pd = &iwpd->ibpd;
+ ibmr->device = iwpd->ibpd.device;
+ iwqp->lsmm_mr = ibmr;
+ if (iwqp->page)
+ iwqp->sc_qp.qp_uk.sq_base = kmap_local_page(iwqp->page);
+
+ cm_node->lsmm_size = accept.size + conn_param->private_data_len;
+ irdma_sc_send_lsmm(&iwqp->sc_qp, iwqp->ietf_mem.va, cm_node->lsmm_size,
+ ibmr->lkey);
+
+ if (iwqp->page)
+ kunmap_local(iwqp->sc_qp.qp_uk.sq_base);
+
+ iwqp->cm_id = cm_id;
+ cm_node->cm_id = cm_id;
+
+ cm_id->provider_data = iwqp;
+ iwqp->active_conn = 0;
+ iwqp->cm_node = cm_node;
+ cm_node->iwqp = iwqp;
+ irdma_cm_init_tsa_conn(iwqp, cm_node);
+ irdma_qp_add_ref(&iwqp->ibqp);
+ cm_id->add_ref(cm_id);
+
+ attr.qp_state = IB_QPS_RTS;
+ cm_node->qhash_set = false;
+ cm_node->cm_core->cm_free_ah(cm_node);
+
+ irdma_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ if (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE) {
+ wait_ret = wait_event_interruptible_timeout(iwqp->waitq,
+ iwqp->rts_ae_rcvd,
+ IRDMA_MAX_TIMEOUT);
+ if (!wait_ret) {
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: Slow Connection: cm_node=%p, loc_port=%d, rem_port=%d, cm_id=%p\n",
+ cm_node, cm_node->loc_port,
+ cm_node->rem_port, cm_node->cm_id);
+ ret = -ECONNRESET;
+ goto error;
+ }
+ }
+
+ irdma_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0);
+ cm_node->accelerated = true;
+ complete(&cm_node->establish_comp);
+
+ if (cm_node->accept_pend) {
+ atomic_dec(&cm_node->listener->pend_accepts_cnt);
+ cm_node->accept_pend = 0;
+ }
+
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: rem_port=0x%04x, loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4 cm_node=%p cm_id=%p qp_id = %d\n\n",
+ cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr,
+ cm_node->loc_addr, cm_node, cm_id, ibqp->qp_num);
+ cm_node->cm_core->stats_accepts++;
+
+ return 0;
+error:
+ irdma_free_lsmm_rsrc(iwqp);
+ irdma_rem_ref_cm_node(cm_node);
+
+ return ret;
+}
+
+/**
+ * irdma_reject - registered call for connection to be rejected
+ * @cm_id: cm information for passive connection
+ * @pdata: private data to be sent
+ * @pdata_len: private data length
+ */
+int irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+ struct irdma_device *iwdev;
+ struct irdma_cm_node *cm_node;
+
+ cm_node = cm_id->provider_data;
+ cm_node->pdata.size = pdata_len;
+
+ trace_irdma_reject(cm_node, 0, NULL);
+
+ iwdev = to_iwdev(cm_id->device);
+ if (!iwdev)
+ return -EINVAL;
+
+ cm_node->cm_core->stats_rejects++;
+
+ if (pdata_len + sizeof(struct ietf_mpa_v2) > IRDMA_MAX_CM_BUF)
+ return -EINVAL;
+
+ return irdma_cm_reject(cm_node, pdata, pdata_len);
+}
+
+/**
+ * irdma_connect - registered call for connection to be established
+ * @cm_id: cm information for passive connection
+ * @conn_param: Information about the connection
+ */
+int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ struct ib_qp *ibqp;
+ struct irdma_qp *iwqp;
+ struct irdma_device *iwdev;
+ struct irdma_cm_node *cm_node;
+ struct irdma_cm_info cm_info;
+ struct sockaddr_in *laddr;
+ struct sockaddr_in *raddr;
+ struct sockaddr_in6 *laddr6;
+ struct sockaddr_in6 *raddr6;
+ int ret = 0;
+
+ ibqp = irdma_get_qp(cm_id->device, conn_param->qpn);
+ if (!ibqp)
+ return -EINVAL;
+ iwqp = to_iwqp(ibqp);
+ if (!iwqp)
+ return -EINVAL;
+ iwdev = iwqp->iwdev;
+ if (!iwdev)
+ return -EINVAL;
+
+ laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+ raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
+ laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+ raddr6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
+
+ if (!(laddr->sin_port) || !(raddr->sin_port))
+ return -EINVAL;
+
+ iwqp->active_conn = 1;
+ iwqp->cm_id = NULL;
+ cm_id->provider_data = iwqp;
+
+ /* set up the connection params for the node */
+ if (cm_id->remote_addr.ss_family == AF_INET) {
+ if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV4)
+ return -EINVAL;
+
+ cm_info.ipv4 = true;
+ memset(cm_info.loc_addr, 0, sizeof(cm_info.loc_addr));
+ memset(cm_info.rem_addr, 0, sizeof(cm_info.rem_addr));
+ cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
+ cm_info.rem_addr[0] = ntohl(raddr->sin_addr.s_addr);
+ cm_info.loc_port = ntohs(laddr->sin_port);
+ cm_info.rem_port = ntohs(raddr->sin_port);
+ cm_info.vlan_id = irdma_get_vlan_ipv4(cm_info.loc_addr);
+ } else {
+ if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV6)
+ return -EINVAL;
+
+ cm_info.ipv4 = false;
+ irdma_copy_ip_ntohl(cm_info.loc_addr,
+ laddr6->sin6_addr.in6_u.u6_addr32);
+ irdma_copy_ip_ntohl(cm_info.rem_addr,
+ raddr6->sin6_addr.in6_u.u6_addr32);
+ cm_info.loc_port = ntohs(laddr6->sin6_port);
+ cm_info.rem_port = ntohs(raddr6->sin6_port);
+ irdma_get_vlan_mac_ipv6(cm_info.loc_addr, &cm_info.vlan_id,
+ NULL);
+ }
+ cm_info.cm_id = cm_id;
+ cm_info.qh_qpid = iwdev->vsi.ilq->qp_id;
+ cm_info.tos = cm_id->tos;
+ if (iwdev->vsi.dscp_mode) {
+ cm_info.user_pri =
+ iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(cm_info.tos)];
+ } else {
+ cm_info.user_pri = rt_tos2priority(cm_id->tos);
+ cm_info.user_pri = irdma_iw_get_vlan_prio(cm_info.loc_addr,
+ cm_info.user_pri,
+ cm_info.ipv4);
+ }
+
+ if (iwqp->sc_qp.dev->ws_add(iwqp->sc_qp.vsi, cm_info.user_pri))
+ return -ENOMEM;
+ iwqp->sc_qp.user_pri = cm_info.user_pri;
+ irdma_qp_add_qos(&iwqp->sc_qp);
+ ibdev_dbg(&iwdev->ibdev, "DCB: TOS:[%d] UP:[%d]\n", cm_id->tos,
+ cm_info.user_pri);
+
+ trace_irdma_dcb_tos(iwdev, cm_id->tos, cm_info.user_pri);
+
+ ret = irdma_create_cm_node(&iwdev->cm_core, iwdev, conn_param, &cm_info,
+ &cm_node);
+ if (ret)
+ return ret;
+ ret = cm_node->cm_core->cm_create_ah(cm_node, true);
+ if (ret)
+ goto err;
+ if (irdma_manage_qhash(iwdev, &cm_info,
+ IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
+ IRDMA_QHASH_MANAGE_TYPE_ADD, NULL, true)) {
+ ret = -EINVAL;
+ goto err;
+ }
+ cm_node->qhash_set = true;
+
+ cm_node->apbvt_entry = irdma_add_apbvt(iwdev, cm_info.loc_port);
+ if (!cm_node->apbvt_entry) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ cm_node->apbvt_set = true;
+ iwqp->cm_node = cm_node;
+ cm_node->iwqp = iwqp;
+ iwqp->cm_id = cm_id;
+ irdma_qp_add_ref(&iwqp->ibqp);
+ cm_id->add_ref(cm_id);
+
+ if (cm_node->state != IRDMA_CM_STATE_OFFLOADED) {
+ cm_node->state = IRDMA_CM_STATE_SYN_SENT;
+ ret = irdma_send_syn(cm_node, 0);
+ if (ret)
+ goto err;
+ }
+
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: rem_port=0x%04x, loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4 cm_node=%p cm_id=%p qp_id = %d\n\n",
+ cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr,
+ cm_node->loc_addr, cm_node, cm_id, ibqp->qp_num);
+
+ trace_irdma_connect(cm_node, 0, NULL);
+
+ return 0;
+
+err:
+ if (cm_info.ipv4)
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: connect() FAILED: dest addr=%pI4",
+ cm_info.rem_addr);
+ else
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: connect() FAILED: dest addr=%pI6",
+ cm_info.rem_addr);
+ irdma_rem_ref_cm_node(cm_node);
+ iwdev->cm_core.stats_connect_errs++;
+
+ return ret;
+}
+
+/**
+ * irdma_create_listen - registered call creating listener
+ * @cm_id: cm information for passive connection
+ * @backlog: to max accept pending count
+ */
+int irdma_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+ struct irdma_device *iwdev;
+ struct irdma_cm_listener *cm_listen_node;
+ struct irdma_cm_info cm_info = {};
+ struct sockaddr_in *laddr;
+ struct sockaddr_in6 *laddr6;
+ bool wildcard = false;
+ int err;
+
+ iwdev = to_iwdev(cm_id->device);
+ if (!iwdev)
+ return -EINVAL;
+
+ laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+ laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+ cm_info.qh_qpid = iwdev->vsi.ilq->qp_id;
+
+ if (laddr->sin_family == AF_INET) {
+ if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV4)
+ return -EINVAL;
+
+ cm_info.ipv4 = true;
+ cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
+ cm_info.loc_port = ntohs(laddr->sin_port);
+
+ if (laddr->sin_addr.s_addr != htonl(INADDR_ANY)) {
+ cm_info.vlan_id = irdma_get_vlan_ipv4(cm_info.loc_addr);
+ } else {
+ cm_info.vlan_id = 0xFFFF;
+ wildcard = true;
+ }
+ } else {
+ if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV6)
+ return -EINVAL;
+
+ cm_info.ipv4 = false;
+ irdma_copy_ip_ntohl(cm_info.loc_addr,
+ laddr6->sin6_addr.in6_u.u6_addr32);
+ cm_info.loc_port = ntohs(laddr6->sin6_port);
+ if (ipv6_addr_type(&laddr6->sin6_addr) != IPV6_ADDR_ANY) {
+ irdma_get_vlan_mac_ipv6(cm_info.loc_addr,
+ &cm_info.vlan_id, NULL);
+ } else {
+ cm_info.vlan_id = 0xFFFF;
+ wildcard = true;
+ }
+ }
+
+ if (cm_info.vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode)
+ cm_info.vlan_id = 0;
+ cm_info.backlog = backlog;
+ cm_info.cm_id = cm_id;
+
+ trace_irdma_create_listen(iwdev, &cm_info);
+
+ cm_listen_node = irdma_make_listen_node(&iwdev->cm_core, iwdev,
+ &cm_info);
+ if (!cm_listen_node) {
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: cm_listen_node == NULL\n");
+ return -ENOMEM;
+ }
+
+ cm_id->provider_data = cm_listen_node;
+
+ cm_listen_node->tos = cm_id->tos;
+ if (iwdev->vsi.dscp_mode)
+ cm_listen_node->user_pri =
+ iwdev->vsi.dscp_map[irdma_tos2dscp(cm_id->tos)];
+ else
+ cm_listen_node->user_pri = rt_tos2priority(cm_id->tos);
+ cm_info.user_pri = cm_listen_node->user_pri;
+ if (!cm_listen_node->reused_node) {
+ if (wildcard) {
+ err = irdma_add_mqh(iwdev, &cm_info, cm_listen_node);
+ if (err)
+ goto error;
+ } else {
+ if (!iwdev->vsi.dscp_mode)
+ cm_listen_node->user_pri =
+ irdma_iw_get_vlan_prio(cm_info.loc_addr,
+ cm_info.user_pri,
+ cm_info.ipv4);
+ cm_info.user_pri = cm_listen_node->user_pri;
+ err = irdma_manage_qhash(iwdev, &cm_info,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_MANAGE_TYPE_ADD,
+ NULL, true);
+ if (err)
+ goto error;
+
+ cm_listen_node->qhash_set = true;
+ }
+
+ cm_listen_node->apbvt_entry = irdma_add_apbvt(iwdev,
+ cm_info.loc_port);
+ if (!cm_listen_node->apbvt_entry)
+ goto error;
+ }
+ cm_id->add_ref(cm_id);
+ cm_listen_node->cm_core->stats_listen_created++;
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: loc_port=0x%04x loc_addr=%pI4 cm_listen_node=%p cm_id=%p qhash_set=%d vlan_id=%d\n",
+ cm_listen_node->loc_port, cm_listen_node->loc_addr,
+ cm_listen_node, cm_listen_node->cm_id,
+ cm_listen_node->qhash_set, cm_listen_node->vlan_id);
+
+ return 0;
+
+error:
+
+ irdma_cm_del_listen(&iwdev->cm_core, cm_listen_node, false);
+
+ return -EINVAL;
+}
+
+/**
+ * irdma_destroy_listen - registered call to destroy listener
+ * @cm_id: cm information for passive connection
+ */
+int irdma_destroy_listen(struct iw_cm_id *cm_id)
+{
+ struct irdma_device *iwdev;
+
+ iwdev = to_iwdev(cm_id->device);
+ if (cm_id->provider_data)
+ irdma_cm_del_listen(&iwdev->cm_core, cm_id->provider_data,
+ true);
+ else
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: cm_id->provider_data was NULL\n");
+
+ cm_id->rem_ref(cm_id);
+
+ return 0;
+}
+
+/**
+ * irdma_teardown_list_prep - add conn nodes slated for tear down to list
+ * @cm_core: cm's core
+ * @teardown_list: a list to which cm_node will be selected
+ * @ipaddr: pointer to ip address
+ * @nfo: pointer to cm_info structure instance
+ * @disconnect_all: flag indicating disconnect all QPs
+ */
+static void irdma_teardown_list_prep(struct irdma_cm_core *cm_core,
+ struct list_head *teardown_list,
+ u32 *ipaddr,
+ struct irdma_cm_info *nfo,
+ bool disconnect_all)
+{
+ struct irdma_cm_node *cm_node;
+ int bkt;
+
+ hash_for_each_rcu(cm_core->cm_hash_tbl, bkt, cm_node, list) {
+ if ((disconnect_all ||
+ (nfo->vlan_id == cm_node->vlan_id &&
+ !memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16))) &&
+ refcount_inc_not_zero(&cm_node->refcnt))
+ list_add(&cm_node->teardown_entry, teardown_list);
+ }
+}
+
+/**
+ * irdma_cm_event_connected - handle connected active node
+ * @event: the info for cm_node of connection
+ */
+static void irdma_cm_event_connected(struct irdma_cm_event *event)
+{
+ struct irdma_qp *iwqp;
+ struct irdma_device *iwdev;
+ struct irdma_cm_node *cm_node;
+ struct irdma_sc_dev *dev;
+ struct ib_qp_attr attr = {};
+ struct iw_cm_id *cm_id;
+ int status;
+ bool read0;
+ int wait_ret = 0;
+
+ cm_node = event->cm_node;
+ cm_id = cm_node->cm_id;
+ iwqp = cm_id->provider_data;
+ iwdev = iwqp->iwdev;
+ dev = &iwdev->rf->sc_dev;
+ if (iwqp->sc_qp.qp_uk.destroy_pending) {
+ status = -ETIMEDOUT;
+ goto error;
+ }
+
+ irdma_cm_init_tsa_conn(iwqp, cm_node);
+ read0 = (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO);
+ if (iwqp->page)
+ iwqp->sc_qp.qp_uk.sq_base = kmap_local_page(iwqp->page);
+ irdma_sc_send_rtt(&iwqp->sc_qp, read0);
+ if (iwqp->page)
+ kunmap_local(iwqp->sc_qp.qp_uk.sq_base);
+
+ attr.qp_state = IB_QPS_RTS;
+ cm_node->qhash_set = false;
+ irdma_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ if (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE) {
+ wait_ret = wait_event_interruptible_timeout(iwqp->waitq,
+ iwqp->rts_ae_rcvd,
+ IRDMA_MAX_TIMEOUT);
+ if (!wait_ret)
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: Slow Connection: cm_node=%p, loc_port=%d, rem_port=%d, cm_id=%p\n",
+ cm_node, cm_node->loc_port,
+ cm_node->rem_port, cm_node->cm_id);
+ }
+
+ irdma_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, 0);
+ cm_node->accelerated = true;
+ complete(&cm_node->establish_comp);
+ cm_node->cm_core->cm_free_ah(cm_node);
+ return;
+
+error:
+ iwqp->cm_id = NULL;
+ cm_id->provider_data = NULL;
+ irdma_send_cm_event(event->cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY,
+ status);
+ irdma_rem_ref_cm_node(event->cm_node);
+}
+
+/**
+ * irdma_cm_event_reset - handle reset
+ * @event: the info for cm_node of connection
+ */
+static void irdma_cm_event_reset(struct irdma_cm_event *event)
+{
+ struct irdma_cm_node *cm_node = event->cm_node;
+ struct iw_cm_id *cm_id = cm_node->cm_id;
+ struct irdma_qp *iwqp;
+
+ if (!cm_id)
+ return;
+
+ iwqp = cm_id->provider_data;
+ if (!iwqp)
+ return;
+
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: reset event %p - cm_id = %p\n", event->cm_node, cm_id);
+ iwqp->cm_id = NULL;
+
+ irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_DISCONNECT,
+ -ECONNRESET);
+ irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CLOSE, 0);
+}
+
+/**
+ * irdma_cm_event_handler - send event to cm upper layer
+ * @work: pointer of cm event info.
+ */
+static void irdma_cm_event_handler(struct work_struct *work)
+{
+ struct irdma_cm_event *event = container_of(work, struct irdma_cm_event, event_work);
+ struct irdma_cm_node *cm_node;
+
+ if (!event || !event->cm_node || !event->cm_node->cm_core)
+ return;
+
+ cm_node = event->cm_node;
+ trace_irdma_cm_event_handler(cm_node, event->type, NULL);
+
+ switch (event->type) {
+ case IRDMA_CM_EVENT_MPA_REQ:
+ irdma_send_cm_event(cm_node, cm_node->cm_id,
+ IW_CM_EVENT_CONNECT_REQUEST, 0);
+ break;
+ case IRDMA_CM_EVENT_RESET:
+ irdma_cm_event_reset(event);
+ break;
+ case IRDMA_CM_EVENT_CONNECTED:
+ if (!event->cm_node->cm_id ||
+ event->cm_node->state != IRDMA_CM_STATE_OFFLOADED)
+ break;
+ irdma_cm_event_connected(event);
+ break;
+ case IRDMA_CM_EVENT_MPA_REJECT:
+ if (!event->cm_node->cm_id ||
+ cm_node->state == IRDMA_CM_STATE_OFFLOADED)
+ break;
+ irdma_send_cm_event(cm_node, cm_node->cm_id,
+ IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED);
+ break;
+ case IRDMA_CM_EVENT_ABORTED:
+ if (!event->cm_node->cm_id ||
+ event->cm_node->state == IRDMA_CM_STATE_OFFLOADED)
+ break;
+ irdma_event_connect_error(event);
+ break;
+ default:
+ ibdev_dbg(&cm_node->iwdev->ibdev,
+ "CM: bad event type = %d\n", event->type);
+ break;
+ }
+
+ irdma_rem_ref_cm_node(event->cm_node);
+ kfree(event);
+}
+
+/**
+ * irdma_cm_post_event - queue event request for worker thread
+ * @event: cm node's info for up event call
+ */
+static void irdma_cm_post_event(struct irdma_cm_event *event)
+{
+ refcount_inc(&event->cm_node->refcnt);
+ INIT_WORK(&event->event_work, irdma_cm_event_handler);
+ queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
+}
+
+/**
+ * irdma_cm_teardown_connections - teardown QPs
+ * @iwdev: device pointer
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @nfo: Connection info
+ * @disconnect_all: flag indicating disconnect all QPs
+ *
+ * teardown QPs where source or destination addr matches ip addr
+ */
+void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
+ struct irdma_cm_info *nfo,
+ bool disconnect_all)
+{
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+ struct list_head *list_core_temp;
+ struct list_head *list_node;
+ struct irdma_cm_node *cm_node;
+ struct list_head teardown_list;
+ struct ib_qp_attr attr;
+
+ INIT_LIST_HEAD(&teardown_list);
+
+ rcu_read_lock();
+ irdma_teardown_list_prep(cm_core, &teardown_list, ipaddr, nfo, disconnect_all);
+ rcu_read_unlock();
+
+ list_for_each_safe (list_node, list_core_temp, &teardown_list) {
+ cm_node = container_of(list_node, struct irdma_cm_node,
+ teardown_entry);
+ attr.qp_state = IB_QPS_ERR;
+ irdma_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ if (iwdev->rf->reset)
+ irdma_cm_disconn(cm_node->iwqp);
+ irdma_rem_ref_cm_node(cm_node);
+ }
+}
+
+/**
+ * irdma_qhash_ctrl - enable/disable qhash for list
+ * @iwdev: device pointer
+ * @parent_listen_node: parent listen node
+ * @nfo: cm info node
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @ipv4: flag indicating IPv4 when true
+ * @ifup: flag indicating interface up when true
+ *
+ * Enables or disables the qhash for the node in the child
+ * listen list that matches ipaddr. If no matching IP was found
+ * it will allocate and add a new child listen node to the
+ * parent listen node. The listen_list_lock is assumed to be
+ * held when called.
+ */
+static void irdma_qhash_ctrl(struct irdma_device *iwdev,
+ struct irdma_cm_listener *parent_listen_node,
+ struct irdma_cm_info *nfo, u32 *ipaddr, bool ipv4,
+ bool ifup)
+{
+ struct list_head *child_listen_list = &parent_listen_node->child_listen_list;
+ struct irdma_cm_listener *child_listen_node;
+ struct list_head *pos, *tpos;
+ bool node_allocated = false;
+ enum irdma_quad_hash_manage_type op = ifup ?
+ IRDMA_QHASH_MANAGE_TYPE_ADD :
+ IRDMA_QHASH_MANAGE_TYPE_DELETE;
+ int err;
+
+ list_for_each_safe (pos, tpos, child_listen_list) {
+ child_listen_node = list_entry(pos, struct irdma_cm_listener,
+ child_listen_list);
+ if (!memcmp(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16))
+ goto set_qhash;
+ }
+
+ /* if not found then add a child listener if interface is going up */
+ if (!ifup)
+ return;
+ child_listen_node = kmemdup(parent_listen_node,
+ sizeof(*child_listen_node), GFP_ATOMIC);
+ if (!child_listen_node)
+ return;
+
+ node_allocated = true;
+ memcpy(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16);
+
+set_qhash:
+ memcpy(nfo->loc_addr, child_listen_node->loc_addr,
+ sizeof(nfo->loc_addr));
+ nfo->vlan_id = child_listen_node->vlan_id;
+ err = irdma_manage_qhash(iwdev, nfo, IRDMA_QHASH_TYPE_TCP_SYN, op, NULL,
+ false);
+ if (!err) {
+ child_listen_node->qhash_set = ifup;
+ if (node_allocated)
+ list_add(&child_listen_node->child_listen_list,
+ &parent_listen_node->child_listen_list);
+ } else if (node_allocated) {
+ kfree(child_listen_node);
+ }
+}
+
+/**
+ * irdma_if_notify - process an ifdown on an interface
+ * @iwdev: device pointer
+ * @netdev: network device structure
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @ipv4: flag indicating IPv4 when true
+ * @ifup: flag indicating interface up when true
+ */
+void irdma_if_notify(struct irdma_device *iwdev, struct net_device *netdev,
+ u32 *ipaddr, bool ipv4, bool ifup)
+{
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+ unsigned long flags;
+ struct irdma_cm_listener *listen_node;
+ static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+ struct irdma_cm_info nfo = {};
+ u16 vlan_id = rdma_vlan_dev_vlan_id(netdev);
+ enum irdma_quad_hash_manage_type op = ifup ?
+ IRDMA_QHASH_MANAGE_TYPE_ADD :
+ IRDMA_QHASH_MANAGE_TYPE_DELETE;
+
+ nfo.vlan_id = vlan_id;
+ nfo.ipv4 = ipv4;
+ nfo.qh_qpid = 1;
+
+ /* Disable or enable qhash for listeners */
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_for_each_entry (listen_node, &cm_core->listen_list, list) {
+ if (vlan_id != listen_node->vlan_id ||
+ (memcmp(listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16) &&
+ memcmp(listen_node->loc_addr, ip_zero, ipv4 ? 4 : 16)))
+ continue;
+
+ memcpy(nfo.loc_addr, listen_node->loc_addr,
+ sizeof(nfo.loc_addr));
+ nfo.loc_port = listen_node->loc_port;
+ nfo.user_pri = listen_node->user_pri;
+ if (!list_empty(&listen_node->child_listen_list)) {
+ irdma_qhash_ctrl(iwdev, listen_node, &nfo, ipaddr, ipv4,
+ ifup);
+ } else if (memcmp(listen_node->loc_addr, ip_zero,
+ ipv4 ? 4 : 16)) {
+ if (!irdma_manage_qhash(iwdev, &nfo,
+ IRDMA_QHASH_TYPE_TCP_SYN, op,
+ NULL, false))
+ listen_node->qhash_set = ifup;
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+ /* disconnect any connected qp's on ifdown */
+ if (!ifup)
+ irdma_cm_teardown_connections(iwdev, ipaddr, &nfo, false);
+}
diff --git a/drivers/infiniband/hw/irdma/cm.h b/drivers/infiniband/hw/irdma/cm.h
new file mode 100644
index 000000000000..48ee285cf745
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/cm.h
@@ -0,0 +1,416 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#ifndef IRDMA_CM_H
+#define IRDMA_CM_H
+
+#define IRDMA_MPA_REQUEST_ACCEPT 1
+#define IRDMA_MPA_REQUEST_REJECT 2
+
+/* IETF MPA -- defines */
+#define IEFT_MPA_KEY_REQ "MPA ID Req Frame"
+#define IEFT_MPA_KEY_REP "MPA ID Rep Frame"
+#define IETF_MPA_KEY_SIZE 16
+#define IETF_MPA_VER 1
+#define IETF_MAX_PRIV_DATA_LEN 512
+#define IETF_MPA_FRAME_SIZE 20
+#define IETF_RTR_MSG_SIZE 4
+#define IETF_MPA_V2_FLAG 0x10
+#define SNDMARKER_SEQNMASK 0x000001ff
+#define IRDMA_MAX_IETF_SIZE 32
+
+/* IETF RTR MSG Fields */
+#define IETF_PEER_TO_PEER 0x8000
+#define IETF_FLPDU_ZERO_LEN 0x4000
+#define IETF_RDMA0_WRITE 0x8000
+#define IETF_RDMA0_READ 0x4000
+#define IETF_NO_IRD_ORD 0x3fff
+
+#define MAX_PORTS 65536
+
+#define IRDMA_PASSIVE_STATE_INDICATED 0
+#define IRDMA_DO_NOT_SEND_RESET_EVENT 1
+#define IRDMA_SEND_RESET_EVENT 2
+
+#define MAX_IRDMA_IFS 4
+
+#define SET_ACK 1
+#define SET_SYN 2
+#define SET_FIN 4
+#define SET_RST 8
+
+#define TCP_OPTIONS_PADDING 3
+
+#define IRDMA_DEFAULT_RETRYS 64
+#define IRDMA_DEFAULT_RETRANS 32
+#define IRDMA_DEFAULT_TTL 0x40
+#define IRDMA_DEFAULT_RTT_VAR 6
+#define IRDMA_DEFAULT_SS_THRESH 0x3fffffff
+#define IRDMA_DEFAULT_REXMIT_THRESH 8
+
+#define IRDMA_RETRY_TIMEOUT HZ
+#define IRDMA_SHORT_TIME 10
+#define IRDMA_LONG_TIME (2 * HZ)
+#define IRDMA_MAX_TIMEOUT ((unsigned long)(12 * HZ))
+
+#define IRDMA_CM_HASHTABLE_SIZE 1024
+#define IRDMA_CM_TCP_TIMER_INTERVAL 3000
+#define IRDMA_CM_DEFAULT_MTU 1540
+#define IRDMA_CM_DEFAULT_FRAME_CNT 10
+#define IRDMA_CM_THREAD_STACK_SIZE 256
+#define IRDMA_CM_DEFAULT_RCV_WND 64240
+#define IRDMA_CM_DEFAULT_RCV_WND_SCALED 0x3FFFC
+#define IRDMA_CM_DEFAULT_RCV_WND_SCALE 2
+#define IRDMA_CM_DEFAULT_FREE_PKTS 10
+#define IRDMA_CM_FREE_PKT_LO_WATERMARK 2
+#define IRDMA_CM_DEFAULT_MSS 536
+#define IRDMA_CM_DEFAULT_MPA_VER 2
+#define IRDMA_CM_DEFAULT_SEQ 0x159bf75f
+#define IRDMA_CM_DEFAULT_LOCAL_ID 0x3b47
+#define IRDMA_CM_DEFAULT_SEQ2 0x18ed5740
+#define IRDMA_CM_DEFAULT_LOCAL_ID2 0xb807
+#define IRDMA_MAX_CM_BUF (IRDMA_MAX_IETF_SIZE + IETF_MAX_PRIV_DATA_LEN)
+
+enum ietf_mpa_flags {
+ IETF_MPA_FLAGS_REJECT = 0x20,
+ IETF_MPA_FLAGS_CRC = 0x40,
+ IETF_MPA_FLAGS_MARKERS = 0x80,
+};
+
+enum irdma_timer_type {
+ IRDMA_TIMER_TYPE_SEND,
+ IRDMA_TIMER_TYPE_CLOSE,
+};
+
+enum option_nums {
+ OPTION_NUM_EOL,
+ OPTION_NUM_NONE,
+ OPTION_NUM_MSS,
+ OPTION_NUM_WINDOW_SCALE,
+ OPTION_NUM_SACK_PERM,
+ OPTION_NUM_SACK,
+ OPTION_NUM_WRITE0 = 0xbc,
+};
+
+/* cm node transition states */
+enum irdma_cm_node_state {
+ IRDMA_CM_STATE_UNKNOWN,
+ IRDMA_CM_STATE_INITED,
+ IRDMA_CM_STATE_LISTENING,
+ IRDMA_CM_STATE_SYN_RCVD,
+ IRDMA_CM_STATE_SYN_SENT,
+ IRDMA_CM_STATE_ONE_SIDE_ESTABLISHED,
+ IRDMA_CM_STATE_ESTABLISHED,
+ IRDMA_CM_STATE_ACCEPTING,
+ IRDMA_CM_STATE_MPAREQ_SENT,
+ IRDMA_CM_STATE_MPAREQ_RCVD,
+ IRDMA_CM_STATE_MPAREJ_RCVD,
+ IRDMA_CM_STATE_OFFLOADED,
+ IRDMA_CM_STATE_FIN_WAIT1,
+ IRDMA_CM_STATE_FIN_WAIT2,
+ IRDMA_CM_STATE_CLOSE_WAIT,
+ IRDMA_CM_STATE_TIME_WAIT,
+ IRDMA_CM_STATE_LAST_ACK,
+ IRDMA_CM_STATE_CLOSING,
+ IRDMA_CM_STATE_LISTENER_DESTROYED,
+ IRDMA_CM_STATE_CLOSED,
+};
+
+enum mpa_frame_ver {
+ IETF_MPA_V1 = 1,
+ IETF_MPA_V2 = 2,
+};
+
+enum mpa_frame_key {
+ MPA_KEY_REQUEST,
+ MPA_KEY_REPLY,
+};
+
+enum send_rdma0 {
+ SEND_RDMA_READ_ZERO = 1,
+ SEND_RDMA_WRITE_ZERO = 2,
+};
+
+enum irdma_tcpip_pkt_type {
+ IRDMA_PKT_TYPE_UNKNOWN,
+ IRDMA_PKT_TYPE_SYN,
+ IRDMA_PKT_TYPE_SYNACK,
+ IRDMA_PKT_TYPE_ACK,
+ IRDMA_PKT_TYPE_FIN,
+ IRDMA_PKT_TYPE_RST,
+};
+
+enum irdma_cm_listener_state {
+ IRDMA_CM_LISTENER_PASSIVE_STATE = 1,
+ IRDMA_CM_LISTENER_ACTIVE_STATE = 2,
+ IRDMA_CM_LISTENER_EITHER_STATE = 3,
+};
+
+/* CM event codes */
+enum irdma_cm_event_type {
+ IRDMA_CM_EVENT_UNKNOWN,
+ IRDMA_CM_EVENT_ESTABLISHED,
+ IRDMA_CM_EVENT_MPA_REQ,
+ IRDMA_CM_EVENT_MPA_CONNECT,
+ IRDMA_CM_EVENT_MPA_ACCEPT,
+ IRDMA_CM_EVENT_MPA_REJECT,
+ IRDMA_CM_EVENT_MPA_ESTABLISHED,
+ IRDMA_CM_EVENT_CONNECTED,
+ IRDMA_CM_EVENT_RESET,
+ IRDMA_CM_EVENT_ABORTED,
+};
+
+struct ietf_mpa_v1 {
+ u8 key[IETF_MPA_KEY_SIZE];
+ u8 flags;
+ u8 rev;
+ __be16 priv_data_len;
+ u8 priv_data[];
+};
+
+struct ietf_rtr_msg {
+ __be16 ctrl_ird;
+ __be16 ctrl_ord;
+};
+
+struct ietf_mpa_v2 {
+ u8 key[IETF_MPA_KEY_SIZE];
+ u8 flags;
+ u8 rev;
+ __be16 priv_data_len;
+ struct ietf_rtr_msg rtr_msg;
+ u8 priv_data[];
+};
+
+struct option_base {
+ u8 optionnum;
+ u8 len;
+};
+
+struct option_mss {
+ u8 optionnum;
+ u8 len;
+ __be16 mss;
+};
+
+struct option_windowscale {
+ u8 optionnum;
+ u8 len;
+ u8 shiftcount;
+};
+
+union all_known_options {
+ char eol;
+ struct option_base base;
+ struct option_mss mss;
+ struct option_windowscale windowscale;
+};
+
+struct irdma_timer_entry {
+ struct list_head list;
+ unsigned long timetosend; /* jiffies */
+ struct irdma_puda_buf *sqbuf;
+ u32 type;
+ u32 retrycount;
+ u32 retranscount;
+ u32 context;
+ u32 send_retrans;
+ int close_when_complete;
+};
+
+/* CM context params */
+struct irdma_cm_tcp_context {
+ u8 client;
+ u32 loc_seq_num;
+ u32 loc_ack_num;
+ u32 rem_ack_num;
+ u32 rcv_nxt;
+ u32 loc_id;
+ u32 rem_id;
+ u32 snd_wnd;
+ u32 max_snd_wnd;
+ u32 rcv_wnd;
+ u32 mss;
+ u8 snd_wscale;
+ u8 rcv_wscale;
+};
+
+struct irdma_apbvt_entry {
+ struct hlist_node hlist;
+ u32 use_cnt;
+ u16 port;
+};
+
+struct irdma_cm_listener {
+ struct list_head list;
+ struct iw_cm_id *cm_id;
+ struct irdma_cm_core *cm_core;
+ struct irdma_device *iwdev;
+ struct list_head child_listen_list;
+ struct irdma_apbvt_entry *apbvt_entry;
+ enum irdma_cm_listener_state listener_state;
+ refcount_t refcnt;
+ atomic_t pend_accepts_cnt;
+ u32 loc_addr[4];
+ u32 reused_node;
+ int backlog;
+ u16 loc_port;
+ u16 vlan_id;
+ u8 loc_mac[ETH_ALEN];
+ u8 user_pri;
+ u8 tos;
+ bool qhash_set:1;
+ bool ipv4:1;
+};
+
+struct irdma_kmem_info {
+ void *addr;
+ u32 size;
+};
+
+struct irdma_mpa_priv_info {
+ const void *addr;
+ u32 size;
+};
+
+struct irdma_cm_node {
+ struct irdma_qp *iwqp;
+ struct irdma_device *iwdev;
+ struct irdma_sc_dev *dev;
+ struct irdma_cm_tcp_context tcp_cntxt;
+ struct irdma_cm_core *cm_core;
+ struct irdma_timer_entry *send_entry;
+ struct irdma_timer_entry *close_entry;
+ struct irdma_cm_listener *listener;
+ struct list_head timer_entry;
+ struct list_head reset_entry;
+ struct list_head teardown_entry;
+ struct irdma_apbvt_entry *apbvt_entry;
+ struct rcu_head rcu_head;
+ struct irdma_mpa_priv_info pdata;
+ struct irdma_sc_ah *ah;
+ union {
+ struct ietf_mpa_v1 mpa_frame;
+ struct ietf_mpa_v2 mpa_v2_frame;
+ };
+ struct irdma_kmem_info mpa_hdr;
+ struct iw_cm_id *cm_id;
+ struct hlist_node list;
+ struct completion establish_comp;
+ spinlock_t retrans_list_lock; /* protect CM node rexmit updates*/
+ atomic_t passive_state;
+ refcount_t refcnt;
+ enum irdma_cm_node_state state;
+ enum send_rdma0 send_rdma0_op;
+ enum mpa_frame_ver mpa_frame_rev;
+ u32 loc_addr[4], rem_addr[4];
+ u16 loc_port, rem_port;
+ int apbvt_set;
+ int accept_pend;
+ u16 vlan_id;
+ u16 ird_size;
+ u16 ord_size;
+ u16 mpav2_ird_ord;
+ u16 lsmm_size;
+ u8 pdata_buf[IETF_MAX_PRIV_DATA_LEN];
+ u8 loc_mac[ETH_ALEN];
+ u8 rem_mac[ETH_ALEN];
+ u8 user_pri;
+ u8 tos;
+ bool ack_rcvd:1;
+ bool qhash_set:1;
+ bool ipv4:1;
+ bool snd_mark_en:1;
+ bool rcv_mark_en:1;
+ bool do_lpb:1;
+ bool accelerated:1;
+};
+
+/* Used by internal CM APIs to pass CM information*/
+struct irdma_cm_info {
+ struct iw_cm_id *cm_id;
+ u16 loc_port;
+ u16 rem_port;
+ u32 loc_addr[4];
+ u32 rem_addr[4];
+ u32 qh_qpid;
+ u16 vlan_id;
+ int backlog;
+ u8 user_pri;
+ u8 tos;
+ bool ipv4;
+};
+
+struct irdma_cm_event {
+ enum irdma_cm_event_type type;
+ struct irdma_cm_info cm_info;
+ struct work_struct event_work;
+ struct irdma_cm_node *cm_node;
+};
+
+struct irdma_cm_core {
+ struct irdma_device *iwdev;
+ struct irdma_sc_dev *dev;
+ struct list_head listen_list;
+ DECLARE_HASHTABLE(cm_hash_tbl, 8);
+ DECLARE_HASHTABLE(apbvt_hash_tbl, 8);
+ struct timer_list tcp_timer;
+ struct workqueue_struct *event_wq;
+ spinlock_t ht_lock; /* protect CM node (active side) list */
+ spinlock_t listen_list_lock; /* protect listener list */
+ spinlock_t apbvt_lock; /*serialize apbvt add/del entries*/
+ u64 stats_nodes_created;
+ u64 stats_nodes_destroyed;
+ u64 stats_listen_created;
+ u64 stats_listen_destroyed;
+ u64 stats_listen_nodes_created;
+ u64 stats_listen_nodes_destroyed;
+ u64 stats_lpbs;
+ u64 stats_accepts;
+ u64 stats_rejects;
+ u64 stats_connect_errs;
+ u64 stats_passive_errs;
+ u64 stats_pkt_retrans;
+ u64 stats_backlog_drops;
+ struct irdma_puda_buf *(*form_cm_frame)(struct irdma_cm_node *cm_node,
+ struct irdma_kmem_info *options,
+ struct irdma_kmem_info *hdr,
+ struct irdma_mpa_priv_info *pdata,
+ u8 flags);
+ int (*cm_create_ah)(struct irdma_cm_node *cm_node, bool wait);
+ void (*cm_free_ah)(struct irdma_cm_node *cm_node);
+};
+
+int irdma_schedule_cm_timer(struct irdma_cm_node *cm_node,
+ struct irdma_puda_buf *sqbuf,
+ enum irdma_timer_type type, int send_retrans,
+ int close_when_complete);
+
+static inline u8 irdma_tos2dscp(u8 tos)
+{
+#define IRDMA_DSCP_VAL GENMASK(7, 2)
+ return (u8)FIELD_GET(IRDMA_DSCP_VAL, tos);
+}
+
+int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
+int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int irdma_create_listen(struct iw_cm_id *cm_id, int backlog);
+int irdma_destroy_listen(struct iw_cm_id *cm_id);
+int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, const u8 *mac);
+void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr,
+ struct irdma_cm_info *nfo,
+ bool disconnect_all);
+int irdma_cm_start(struct irdma_device *dev);
+int irdma_cm_stop(struct irdma_device *dev);
+bool irdma_ipv4_is_lpb(u32 loc_addr, u32 rem_addr);
+bool irdma_ipv6_is_lpb(u32 *loc_addr, u32 *rem_addr);
+int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, bool ipv4,
+ const u8 *mac_addr, u32 action);
+void irdma_if_notify(struct irdma_device *iwdev, struct net_device *netdev,
+ u32 *ipaddr, bool ipv4, bool ifup);
+bool irdma_port_in_use(struct irdma_cm_core *cm_core, u16 port);
+void irdma_send_ack(struct irdma_cm_node *cm_node);
+void irdma_lpb_nop(struct irdma_sc_qp *qp);
+void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node);
+void irdma_add_conn_est_qh(struct irdma_cm_node *cm_node);
+#endif /* IRDMA_CM_H */
diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c
new file mode 100644
index 000000000000..ce5cf89c463c
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -0,0 +1,6602 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include <linux/etherdevice.h>
+
+#include "osdep.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "ws.h"
+#include "protos.h"
+
+/**
+ * irdma_get_qp_from_list - get next qp from a list
+ * @head: Listhead of qp's
+ * @qp: current qp
+ */
+struct irdma_sc_qp *irdma_get_qp_from_list(struct list_head *head,
+ struct irdma_sc_qp *qp)
+{
+ struct list_head *lastentry;
+ struct list_head *entry = NULL;
+
+ if (list_empty(head))
+ return NULL;
+
+ if (!qp) {
+ entry = head->next;
+ } else {
+ lastentry = &qp->list;
+ entry = lastentry->next;
+ if (entry == head)
+ return NULL;
+ }
+
+ return container_of(entry, struct irdma_sc_qp, list);
+}
+
+/**
+ * irdma_sc_suspend_resume_qps - suspend/resume all qp's on VSI
+ * @vsi: the VSI struct pointer
+ * @op: Set to IRDMA_OP_RESUME or IRDMA_OP_SUSPEND
+ */
+void irdma_sc_suspend_resume_qps(struct irdma_sc_vsi *vsi, u8 op)
+{
+ struct irdma_sc_qp *qp = NULL;
+ u8 i;
+
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ mutex_lock(&vsi->qos[i].qos_mutex);
+ qp = irdma_get_qp_from_list(&vsi->qos[i].qplist, qp);
+ while (qp) {
+ if (op == IRDMA_OP_RESUME) {
+ if (!qp->dev->ws_add(vsi, i)) {
+ qp->qs_handle =
+ vsi->qos[qp->user_pri].qs_handle;
+ irdma_cqp_qp_suspend_resume(qp, op);
+ } else {
+ irdma_cqp_qp_suspend_resume(qp, op);
+ irdma_modify_qp_to_err(qp);
+ }
+ } else if (op == IRDMA_OP_SUSPEND) {
+ /* issue cqp suspend command */
+ if (!irdma_cqp_qp_suspend_resume(qp, op))
+ atomic_inc(&vsi->qp_suspend_reqs);
+ }
+ qp = irdma_get_qp_from_list(&vsi->qos[i].qplist, qp);
+ }
+ mutex_unlock(&vsi->qos[i].qos_mutex);
+ }
+}
+
+static void irdma_set_qos_info(struct irdma_sc_vsi *vsi,
+ struct irdma_l2params *l2p)
+{
+ u8 i;
+
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ vsi->qos[i].qs_handle = vsi->dev->qos[i].qs_handle;
+ vsi->qos[i].valid = true;
+ }
+
+ return;
+ }
+ vsi->qos_rel_bw = l2p->vsi_rel_bw;
+ vsi->qos_prio_type = l2p->vsi_prio_type;
+ vsi->dscp_mode = l2p->dscp_mode;
+ if (l2p->dscp_mode) {
+ memcpy(vsi->dscp_map, l2p->dscp_map, sizeof(vsi->dscp_map));
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
+ l2p->up2tc[i] = i;
+ }
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ vsi->qos[i].qs_handle = l2p->qs_handle_list[i];
+ vsi->qos[i].traffic_class = l2p->up2tc[i];
+ vsi->qos[i].rel_bw =
+ l2p->tc_info[vsi->qos[i].traffic_class].rel_bw;
+ vsi->qos[i].prio_type =
+ l2p->tc_info[vsi->qos[i].traffic_class].prio_type;
+ vsi->qos[i].valid = false;
+ }
+}
+
+/**
+ * irdma_change_l2params - given the new l2 parameters, change all qp
+ * @vsi: RDMA VSI pointer
+ * @l2params: New parameters from l2
+ */
+void irdma_change_l2params(struct irdma_sc_vsi *vsi,
+ struct irdma_l2params *l2params)
+{
+ if (l2params->mtu_changed) {
+ vsi->mtu = l2params->mtu;
+ if (vsi->ieq)
+ irdma_reinitialize_ieq(vsi);
+ }
+
+ if (!l2params->tc_changed)
+ return;
+
+ vsi->tc_change_pending = false;
+ irdma_set_qos_info(vsi, l2params);
+ irdma_sc_suspend_resume_qps(vsi, IRDMA_OP_RESUME);
+}
+
+/**
+ * irdma_qp_rem_qos - remove qp from qos lists during destroy qp
+ * @qp: qp to be removed from qos
+ */
+void irdma_qp_rem_qos(struct irdma_sc_qp *qp)
+{
+ struct irdma_sc_vsi *vsi = qp->vsi;
+
+ ibdev_dbg(to_ibdev(qp->dev),
+ "DCB: DCB: Remove qp[%d] UP[%d] qset[%d] on_qoslist[%d]\n",
+ qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle,
+ qp->on_qoslist);
+ mutex_lock(&vsi->qos[qp->user_pri].qos_mutex);
+ if (qp->on_qoslist) {
+ qp->on_qoslist = false;
+ list_del(&qp->list);
+ }
+ mutex_unlock(&vsi->qos[qp->user_pri].qos_mutex);
+}
+
+/**
+ * irdma_qp_add_qos - called during setctx for qp to be added to qos
+ * @qp: qp to be added to qos
+ */
+void irdma_qp_add_qos(struct irdma_sc_qp *qp)
+{
+ struct irdma_sc_vsi *vsi = qp->vsi;
+
+ ibdev_dbg(to_ibdev(qp->dev),
+ "DCB: DCB: Add qp[%d] UP[%d] qset[%d] on_qoslist[%d]\n",
+ qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle,
+ qp->on_qoslist);
+ mutex_lock(&vsi->qos[qp->user_pri].qos_mutex);
+ if (!qp->on_qoslist) {
+ list_add(&qp->list, &vsi->qos[qp->user_pri].qplist);
+ qp->on_qoslist = true;
+ qp->qs_handle = vsi->qos[qp->user_pri].qs_handle;
+ }
+ mutex_unlock(&vsi->qos[qp->user_pri].qos_mutex);
+}
+
+/**
+ * irdma_sc_pd_init - initialize sc pd struct
+ * @dev: sc device struct
+ * @pd: sc pd ptr
+ * @pd_id: pd_id for allocated pd
+ * @abi_ver: User/Kernel ABI version
+ */
+void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id,
+ int abi_ver)
+{
+ pd->pd_id = pd_id;
+ pd->abi_ver = abi_ver;
+ pd->dev = dev;
+}
+
+/**
+ * irdma_sc_add_arp_cache_entry - cqp wqe add arp cache entry
+ * @cqp: struct for cqp hw
+ * @info: arp entry information
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_add_arp_cache_entry(struct irdma_sc_cqp *cqp,
+ struct irdma_add_arp_cache_entry_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ u64 hdr;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+ set_64bit_val(wqe, 8, info->reach_max);
+ set_64bit_val(wqe, 16, ether_addr_to_u64(info->mac_addr));
+
+ hdr = info->arp_index |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_ARP) |
+ FIELD_PREP(IRDMA_CQPSQ_MAT_PERMANENT, (info->permanent ? 1 : 0)) |
+ FIELD_PREP(IRDMA_CQPSQ_MAT_ENTRYVALID, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: ARP_CACHE_ENTRY WQE", DUMP_PREFIX_OFFSET,
+ 16, 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_del_arp_cache_entry - dele arp cache entry
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @arp_index: arp index to delete arp entry
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_del_arp_cache_entry(struct irdma_sc_cqp *cqp, u64 scratch,
+ u16 arp_index, bool post_sq)
+{
+ __le64 *wqe;
+ u64 hdr;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ hdr = arp_index |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_ARP) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: ARP_CACHE_DEL_ENTRY WQE",
+ DUMP_PREFIX_OFFSET, 16, 8, wqe,
+ IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_manage_apbvt_entry - for adding and deleting apbvt entries
+ * @cqp: struct for cqp hw
+ * @info: info for apbvt entry to add or delete
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_manage_apbvt_entry(struct irdma_sc_cqp *cqp,
+ struct irdma_apbvt_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ u64 hdr;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, info->port);
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_APBVT) |
+ FIELD_PREP(IRDMA_CQPSQ_MAPT_ADDPORT, info->add) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: MANAGE_APBVT WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_manage_qhash_table_entry - manage quad hash entries
+ * @cqp: struct for cqp hw
+ * @info: info for quad hash to manage
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ *
+ * This is called before connection establishment is started.
+ * For passive connections, when listener is created, it will
+ * call with entry type of IRDMA_QHASH_TYPE_TCP_SYN with local
+ * ip address and tcp port. When SYN is received (passive
+ * connections) or sent (active connections), this routine is
+ * called with entry type of IRDMA_QHASH_TYPE_TCP_ESTABLISHED
+ * and quad is passed in info.
+ *
+ * When iwarp connection is done and its state moves to RTS, the
+ * quad hash entry in the hardware will point to iwarp's qp
+ * number and requires no calls from the driver.
+ */
+static int
+irdma_sc_manage_qhash_table_entry(struct irdma_sc_cqp *cqp,
+ struct irdma_qhash_table_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ u64 qw1 = 0;
+ u64 qw2 = 0;
+ u64 temp;
+ struct irdma_sc_vsi *vsi = info->vsi;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, ether_addr_to_u64(info->mac_addr));
+
+ qw1 = FIELD_PREP(IRDMA_CQPSQ_QHASH_QPN, info->qp_num) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_DEST_PORT, info->dest_port);
+ if (info->ipv4_valid) {
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR3, info->dest_ip[0]));
+ } else {
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR0, info->dest_ip[0]) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR1, info->dest_ip[1]));
+
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR2, info->dest_ip[2]) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR3, info->dest_ip[3]));
+ }
+ qw2 = FIELD_PREP(IRDMA_CQPSQ_QHASH_QS_HANDLE,
+ vsi->qos[info->user_pri].qs_handle);
+ if (info->vlan_valid)
+ qw2 |= FIELD_PREP(IRDMA_CQPSQ_QHASH_VLANID, info->vlan_id);
+ set_64bit_val(wqe, 16, qw2);
+ if (info->entry_type == IRDMA_QHASH_TYPE_TCP_ESTABLISHED) {
+ qw1 |= FIELD_PREP(IRDMA_CQPSQ_QHASH_SRC_PORT, info->src_port);
+ if (!info->ipv4_valid) {
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR0, info->src_ip[0]) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR1, info->src_ip[1]));
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR2, info->src_ip[2]) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR3, info->src_ip[3]));
+ } else {
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ADDR3, info->src_ip[0]));
+ }
+ }
+
+ set_64bit_val(wqe, 8, qw1);
+ temp = FIELD_PREP(IRDMA_CQPSQ_QHASH_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_OPCODE,
+ IRDMA_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_MANAGE, info->manage) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_IPV4VALID, info->ipv4_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_VLANVALID, info->vlan_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QHASH_ENTRYTYPE, info->entry_type);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, temp);
+
+ print_hex_dump_debug("WQE: MANAGE_QHASH WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_qp_init - initialize qp
+ * @qp: sc qp
+ * @info: initialization qp info
+ */
+int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info)
+{
+ int ret_code;
+ u32 pble_obj_cnt;
+ u16 wqe_size;
+
+ if (info->qp_uk_init_info.max_sq_frag_cnt >
+ info->pd->dev->hw_attrs.uk_attrs.max_hw_wq_frags ||
+ info->qp_uk_init_info.max_rq_frag_cnt >
+ info->pd->dev->hw_attrs.uk_attrs.max_hw_wq_frags)
+ return -EINVAL;
+
+ qp->dev = info->pd->dev;
+ qp->vsi = info->vsi;
+ qp->ieq_qp = info->vsi->exception_lan_q;
+ qp->sq_pa = info->sq_pa;
+ qp->rq_pa = info->rq_pa;
+ qp->hw_host_ctx_pa = info->host_ctx_pa;
+ qp->q2_pa = info->q2_pa;
+ qp->shadow_area_pa = info->shadow_area_pa;
+ qp->q2_buf = info->q2;
+ qp->pd = info->pd;
+ qp->hw_host_ctx = info->host_ctx;
+ info->qp_uk_init_info.wqe_alloc_db = qp->pd->dev->wqe_alloc_db;
+ ret_code = irdma_uk_qp_init(&qp->qp_uk, &info->qp_uk_init_info);
+ if (ret_code)
+ return ret_code;
+
+ qp->virtual_map = info->virtual_map;
+ pble_obj_cnt = info->pd->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+
+ if ((info->virtual_map && info->sq_pa >= pble_obj_cnt) ||
+ (!info->qp_uk_init_info.srq_uk &&
+ info->virtual_map && info->rq_pa >= pble_obj_cnt))
+ return -EINVAL;
+
+ qp->llp_stream_handle = (void *)(-1);
+ qp->hw_sq_size = irdma_get_encoded_wqe_size(qp->qp_uk.sq_ring.size,
+ IRDMA_QUEUE_TYPE_SQ_RQ);
+ ibdev_dbg(to_ibdev(qp->dev),
+ "WQE: hw_sq_size[%04d] sq_ring.size[%04d]\n",
+ qp->hw_sq_size, qp->qp_uk.sq_ring.size);
+ if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1 && qp->pd->abi_ver > 4)
+ wqe_size = IRDMA_WQE_SIZE_128;
+ else
+ ret_code = irdma_fragcnt_to_wqesize_rq(qp->qp_uk.max_rq_frag_cnt,
+ &wqe_size);
+ if (ret_code)
+ return ret_code;
+
+ qp->hw_rq_size = irdma_get_encoded_wqe_size(qp->qp_uk.rq_size *
+ (wqe_size / IRDMA_QP_WQE_MIN_SIZE), IRDMA_QUEUE_TYPE_SQ_RQ);
+ ibdev_dbg(to_ibdev(qp->dev),
+ "WQE: hw_rq_size[%04d] qp_uk.rq_size[%04d] wqe_size[%04d]\n",
+ qp->hw_rq_size, qp->qp_uk.rq_size, wqe_size);
+ qp->sq_tph_val = info->sq_tph_val;
+ qp->rq_tph_val = info->rq_tph_val;
+ qp->sq_tph_en = info->sq_tph_en;
+ qp->rq_tph_en = info->rq_tph_en;
+ qp->rcv_tph_en = info->rcv_tph_en;
+ qp->xmit_tph_en = info->xmit_tph_en;
+ qp->qp_uk.first_sq_wq = info->qp_uk_init_info.first_sq_wq;
+ qp->qs_handle = qp->vsi->qos[qp->user_pri].qs_handle;
+
+ return 0;
+}
+
+/**
+ * irdma_sc_srq_init - init sc_srq structure
+ * @srq: srq sc struct
+ * @info: parameters for srq init
+ */
+int irdma_sc_srq_init(struct irdma_sc_srq *srq,
+ struct irdma_srq_init_info *info)
+{
+ u32 srq_size_quanta;
+ int ret_code;
+
+ ret_code = irdma_uk_srq_init(&srq->srq_uk, &info->srq_uk_init_info);
+ if (ret_code)
+ return ret_code;
+
+ srq->dev = info->pd->dev;
+ srq->pd = info->pd;
+ srq->vsi = info->vsi;
+ srq->srq_pa = info->srq_pa;
+ srq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+ srq->pasid = info->pasid;
+ srq->pasid_valid = info->pasid_valid;
+ srq->srq_limit = info->srq_limit;
+ srq->leaf_pbl_size = info->leaf_pbl_size;
+ srq->virtual_map = info->virtual_map;
+ srq->tph_en = info->tph_en;
+ srq->arm_limit_event = info->arm_limit_event;
+ srq->tph_val = info->tph_value;
+ srq->shadow_area_pa = info->shadow_area_pa;
+
+ /* Smallest SRQ size is 256B i.e. 8 quanta */
+ srq_size_quanta = max((u32)IRDMA_SRQ_MIN_QUANTA,
+ srq->srq_uk.srq_size *
+ srq->srq_uk.wqe_size_multiplier);
+ srq->hw_srq_size = irdma_get_encoded_wqe_size(srq_size_quanta,
+ IRDMA_QUEUE_TYPE_SRQ);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_srq_create - send srq create CQP WQE
+ * @srq: srq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_srq_create(struct irdma_sc_srq *srq, u64 scratch,
+ bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = srq->pd->dev->cqp;
+ if (srq->srq_uk.srq_id < cqp->dev->hw_attrs.min_hw_srq_id ||
+ srq->srq_uk.srq_id >
+ (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].max_cnt - 1))
+ return -EINVAL;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_SRQ_LIMIT, srq->srq_limit) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_RQSIZE, srq->hw_srq_size) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_RQ_WQE_SIZE, srq->srq_uk.wqe_size));
+ set_64bit_val(wqe, 8, (uintptr_t)srq);
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_PD_ID, srq->pd->pd_id));
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR,
+ srq->srq_pa >>
+ IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR_S));
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR,
+ srq->shadow_area_pa >>
+ IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR_S));
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_FIRST_PM_PBL_IDX,
+ srq->first_pm_pbl_idx));
+
+ hdr = srq->srq_uk.srq_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_SRQ) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_LEAF_PBL_SIZE, srq->leaf_pbl_size) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_VIRTMAP, srq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_ARM_LIMIT_EVENT,
+ srq->arm_limit_event) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: SRQ_CREATE WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_srq_modify - send modify_srq CQP WQE
+ * @srq: srq sc struct
+ * @info: parameters for srq modification
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_srq_modify(struct irdma_sc_srq *srq,
+ struct irdma_modify_srq_info *info, u64 scratch,
+ bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = srq->dev->cqp;
+ if (srq->srq_uk.srq_id < cqp->dev->hw_attrs.min_hw_srq_id ||
+ srq->srq_uk.srq_id >
+ (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].max_cnt - 1))
+ return -EINVAL;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_SRQ_LIMIT, info->srq_limit) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_RQSIZE, srq->hw_srq_size) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_RQ_WQE_SIZE, srq->srq_uk.wqe_size));
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_SRQCTX, srq->srq_uk.srq_id));
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_PD_ID, srq->pd->pd_id));
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR,
+ srq->srq_pa >>
+ IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR_S));
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR,
+ srq->shadow_area_pa >>
+ IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR_S));
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_FIRST_PM_PBL_IDX,
+ srq->first_pm_pbl_idx));
+
+ hdr = srq->srq_uk.srq_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MODIFY_SRQ) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_LEAF_PBL_SIZE, srq->leaf_pbl_size) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_VIRTMAP, srq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_ARM_LIMIT_EVENT,
+ info->arm_limit_event) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: SRQ_MODIFY WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_srq_destroy - send srq_destroy CQP WQE
+ * @srq: srq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_srq_destroy(struct irdma_sc_srq *srq, u64 scratch,
+ bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = srq->dev->cqp;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 8, (uintptr_t)srq);
+
+ hdr = srq->srq_uk.srq_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_SRQ) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: SRQ_DESTROY WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_qp_create - create qp
+ * @qp: sc qp
+ * @info: qp create info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_create_qp_info *info,
+ u64 scratch, bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = qp->dev->cqp;
+ if (qp->qp_uk.qp_id < cqp->dev->hw_attrs.min_hw_qp_id ||
+ qp->qp_uk.qp_id >= cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt)
+ return -EINVAL;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+ set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+ hdr = qp->qp_uk.qp_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_QP) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_ORDVALID, (info->ord_valid ? 1 : 0)) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_TOECTXVALID, info->tcp_ctx_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_MACVALID, info->mac_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_QPTYPE, qp->qp_uk.qp_type) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_VQ, qp->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_FORCELOOPBACK, info->force_lpb) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_CQNUMVALID, info->cq_num_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_ARPTABIDXVALID,
+ info->arp_cache_idx_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_NEXTIWSTATE, info->next_iwarp_state) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: QP_CREATE WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_qp_modify - modify qp cqp wqe
+ * @qp: sc qp
+ * @info: modify qp info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_qp_modify(struct irdma_sc_qp *qp, struct irdma_modify_qp_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+ u8 term_actions = 0;
+ u8 term_len = 0;
+
+ cqp = qp->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ if (info->next_iwarp_state == IRDMA_QP_STATE_TERMINATE) {
+ if (info->dont_send_fin)
+ term_actions += IRDMAQP_TERM_SEND_TERM_ONLY;
+ if (info->dont_send_term)
+ term_actions += IRDMAQP_TERM_SEND_FIN_ONLY;
+ if (term_actions == IRDMAQP_TERM_SEND_TERM_AND_FIN ||
+ term_actions == IRDMAQP_TERM_SEND_TERM_ONLY)
+ term_len = info->termlen;
+ }
+
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMA_CQPSQ_QP_NEWMSS, info->new_mss) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_TERMLEN, term_len));
+ set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+ set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+ hdr = qp->qp_uk.qp_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MODIFY_QP) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_ORDVALID, info->ord_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_TOECTXVALID, info->tcp_ctx_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_CACHEDVARVALID,
+ info->cached_var_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_VQ, qp->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_FORCELOOPBACK, info->force_lpb) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_CQNUMVALID, info->cq_num_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_MACVALID, info->mac_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_QPTYPE, qp->qp_uk.qp_type) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_MSSCHANGE, info->mss_change) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_REMOVEHASHENTRY,
+ info->remove_hash_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_TERMACT, term_actions) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_RESETCON, info->reset_tcp_conn) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_ARPTABIDXVALID,
+ info->arp_cache_idx_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_NEXTIWSTATE, info->next_iwarp_state) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: QP_MODIFY WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_qp_destroy - cqp destroy qp
+ * @qp: sc qp
+ * @scratch: u64 saved to be used during cqp completion
+ * @remove_hash_idx: flag if to remove hash idx
+ * @ignore_mw_bnd: memory window bind flag
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch,
+ bool remove_hash_idx, bool ignore_mw_bnd, bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+
+ cqp = qp->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+ set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+ hdr = qp->qp_uk.qp_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_QP) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_QPTYPE, qp->qp_uk.qp_type) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_IGNOREMWBOUND, ignore_mw_bnd) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_REMOVEHASHENTRY, remove_hash_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: QP_DESTROY WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_get_encoded_ird_size -
+ * @ird_size: IRD size
+ * The ird from the connection is rounded to a supported HW setting and then encoded
+ * for ird_size field of qp_ctx. Consumers are expected to provide valid ird size based
+ * on hardware attributes. IRD size defaults to a value of 4 in case of invalid input
+ */
+static u8 irdma_sc_get_encoded_ird_size(u16 ird_size)
+{
+ switch (ird_size ?
+ roundup_pow_of_two(2 * ird_size) : 4) {
+ case 256:
+ return IRDMA_IRD_HW_SIZE_256;
+ case 128:
+ return IRDMA_IRD_HW_SIZE_128;
+ case 64:
+ case 32:
+ return IRDMA_IRD_HW_SIZE_64;
+ case 16:
+ case 8:
+ return IRDMA_IRD_HW_SIZE_16;
+ case 4:
+ default:
+ break;
+ }
+
+ return IRDMA_IRD_HW_SIZE_4;
+}
+
+/**
+ * irdma_sc_qp_setctx_roce_gen_2 - set qp's context
+ * @qp: sc qp
+ * @qp_ctx: context ptr
+ * @info: ctx info
+ */
+static void irdma_sc_qp_setctx_roce_gen_2(struct irdma_sc_qp *qp,
+ __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info)
+{
+ struct irdma_roce_offload_info *roce_info;
+ struct irdma_udp_offload_info *udp;
+ u8 push_mode_en;
+ u32 push_idx;
+
+ roce_info = info->roce_info;
+ udp = info->udp_info;
+ qp->user_pri = info->user_pri;
+ if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) {
+ push_mode_en = 0;
+ push_idx = 0;
+ } else {
+ push_mode_en = 1;
+ push_idx = qp->push_idx;
+ }
+ set_64bit_val(qp_ctx, 0,
+ FIELD_PREP(IRDMAQPC_RQWQESIZE, qp->qp_uk.rq_wqe_size) |
+ FIELD_PREP(IRDMAQPC_RCVTPHEN, qp->rcv_tph_en) |
+ FIELD_PREP(IRDMAQPC_XMITTPHEN, qp->xmit_tph_en) |
+ FIELD_PREP(IRDMAQPC_RQTPHEN, qp->rq_tph_en) |
+ FIELD_PREP(IRDMAQPC_SQTPHEN, qp->sq_tph_en) |
+ FIELD_PREP(IRDMAQPC_PPIDX, push_idx) |
+ FIELD_PREP(IRDMAQPC_PMENA, push_mode_en) |
+ FIELD_PREP(IRDMAQPC_PDIDXHI, roce_info->pd_id >> 16) |
+ FIELD_PREP(IRDMAQPC_DC_TCP_EN, roce_info->dctcp_en) |
+ FIELD_PREP(IRDMAQPC_ERR_RQ_IDX_VALID, roce_info->err_rq_idx_valid) |
+ FIELD_PREP(IRDMAQPC_ISQP1, roce_info->is_qp1) |
+ FIELD_PREP(IRDMAQPC_ROCE_TVER, roce_info->roce_tver) |
+ FIELD_PREP(IRDMAQPC_IPV4, udp->ipv4) |
+ FIELD_PREP(IRDMAQPC_INSERTVLANTAG, udp->insert_vlan_tag));
+ set_64bit_val(qp_ctx, 8, qp->sq_pa);
+ set_64bit_val(qp_ctx, 16, qp->rq_pa);
+ if ((roce_info->dcqcn_en || roce_info->dctcp_en) &&
+ !(udp->tos & 0x03))
+ udp->tos |= ECN_CODE_PT_VAL;
+ set_64bit_val(qp_ctx, 24,
+ FIELD_PREP(IRDMAQPC_RQSIZE, qp->hw_rq_size) |
+ FIELD_PREP(IRDMAQPC_SQSIZE, qp->hw_sq_size) |
+ FIELD_PREP(IRDMAQPC_TTL, udp->ttl) | FIELD_PREP(IRDMAQPC_TOS, udp->tos) |
+ FIELD_PREP(IRDMAQPC_SRCPORTNUM, udp->src_port) |
+ FIELD_PREP(IRDMAQPC_DESTPORTNUM, udp->dst_port));
+ set_64bit_val(qp_ctx, 32,
+ FIELD_PREP(IRDMAQPC_DESTIPADDR2, udp->dest_ip_addr[2]) |
+ FIELD_PREP(IRDMAQPC_DESTIPADDR3, udp->dest_ip_addr[3]));
+ set_64bit_val(qp_ctx, 40,
+ FIELD_PREP(IRDMAQPC_DESTIPADDR0, udp->dest_ip_addr[0]) |
+ FIELD_PREP(IRDMAQPC_DESTIPADDR1, udp->dest_ip_addr[1]));
+ set_64bit_val(qp_ctx, 48,
+ FIELD_PREP(IRDMAQPC_SNDMSS, udp->snd_mss) |
+ FIELD_PREP(IRDMAQPC_VLANTAG, udp->vlan_tag) |
+ FIELD_PREP(IRDMAQPC_ARPIDX, udp->arp_idx));
+ set_64bit_val(qp_ctx, 56,
+ FIELD_PREP(IRDMAQPC_PKEY, roce_info->p_key) |
+ FIELD_PREP(IRDMAQPC_PDIDX, roce_info->pd_id) |
+ FIELD_PREP(IRDMAQPC_ACKCREDITS, roce_info->ack_credits) |
+ FIELD_PREP(IRDMAQPC_FLOWLABEL, udp->flow_label));
+ set_64bit_val(qp_ctx, 64,
+ FIELD_PREP(IRDMAQPC_QKEY, roce_info->qkey) |
+ FIELD_PREP(IRDMAQPC_DESTQP, roce_info->dest_qp));
+ set_64bit_val(qp_ctx, 80,
+ FIELD_PREP(IRDMAQPC_PSNNXT, udp->psn_nxt) |
+ FIELD_PREP(IRDMAQPC_LSN, udp->lsn));
+ set_64bit_val(qp_ctx, 88,
+ FIELD_PREP(IRDMAQPC_EPSN, udp->epsn));
+ set_64bit_val(qp_ctx, 96,
+ FIELD_PREP(IRDMAQPC_PSNMAX, udp->psn_max) |
+ FIELD_PREP(IRDMAQPC_PSNUNA, udp->psn_una));
+ set_64bit_val(qp_ctx, 112,
+ FIELD_PREP(IRDMAQPC_CWNDROCE, udp->cwnd));
+ set_64bit_val(qp_ctx, 128,
+ FIELD_PREP(IRDMAQPC_ERR_RQ_IDX, roce_info->err_rq_idx) |
+ FIELD_PREP(IRDMAQPC_RNRNAK_THRESH, udp->rnr_nak_thresh) |
+ FIELD_PREP(IRDMAQPC_REXMIT_THRESH, udp->rexmit_thresh) |
+ FIELD_PREP(IRDMAQPC_RTOMIN, roce_info->rtomin));
+ set_64bit_val(qp_ctx, 136,
+ FIELD_PREP(IRDMAQPC_TXCQNUM, info->send_cq_num) |
+ FIELD_PREP(IRDMAQPC_RXCQNUM, info->rcv_cq_num));
+ set_64bit_val(qp_ctx, 144,
+ FIELD_PREP(IRDMAQPC_STAT_INDEX, info->stats_idx));
+ set_64bit_val(qp_ctx, 152, ether_addr_to_u64(roce_info->mac_addr) << 16);
+ set_64bit_val(qp_ctx, 160,
+ FIELD_PREP(IRDMAQPC_ORDSIZE, roce_info->ord_size) |
+ FIELD_PREP(IRDMAQPC_IRDSIZE, irdma_sc_get_encoded_ird_size(roce_info->ird_size)) |
+ FIELD_PREP(IRDMAQPC_WRRDRSPOK, roce_info->wr_rdresp_en) |
+ FIELD_PREP(IRDMAQPC_RDOK, roce_info->rd_en) |
+ FIELD_PREP(IRDMAQPC_USESTATSINSTANCE, info->stats_idx_valid) |
+ FIELD_PREP(IRDMAQPC_BINDEN, roce_info->bind_en) |
+ FIELD_PREP(IRDMAQPC_FASTREGEN, roce_info->fast_reg_en) |
+ FIELD_PREP(IRDMAQPC_DCQCNENABLE, roce_info->dcqcn_en) |
+ FIELD_PREP(IRDMAQPC_RCVNOICRC, roce_info->rcv_no_icrc) |
+ FIELD_PREP(IRDMAQPC_FW_CC_ENABLE, roce_info->fw_cc_enable) |
+ FIELD_PREP(IRDMAQPC_UDPRIVCQENABLE, roce_info->udprivcq_en) |
+ FIELD_PREP(IRDMAQPC_PRIVEN, roce_info->priv_mode_en) |
+ FIELD_PREP(IRDMAQPC_TIMELYENABLE, roce_info->timely_en));
+ set_64bit_val(qp_ctx, 168,
+ FIELD_PREP(IRDMAQPC_QPCOMPCTX, info->qp_compl_ctx));
+ set_64bit_val(qp_ctx, 176,
+ FIELD_PREP(IRDMAQPC_SQTPHVAL, qp->sq_tph_val) |
+ FIELD_PREP(IRDMAQPC_RQTPHVAL, qp->rq_tph_val) |
+ FIELD_PREP(IRDMAQPC_QSHANDLE, qp->qs_handle));
+ set_64bit_val(qp_ctx, 184,
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR3, udp->local_ipaddr[3]) |
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR2, udp->local_ipaddr[2]));
+ set_64bit_val(qp_ctx, 192,
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR1, udp->local_ipaddr[1]) |
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR0, udp->local_ipaddr[0]));
+ set_64bit_val(qp_ctx, 200,
+ FIELD_PREP(IRDMAQPC_THIGH, roce_info->t_high) |
+ FIELD_PREP(IRDMAQPC_TLOW, roce_info->t_low));
+ set_64bit_val(qp_ctx, 208,
+ FIELD_PREP(IRDMAQPC_REMENDPOINTIDX, info->rem_endpoint_idx));
+
+ print_hex_dump_debug("WQE: QP_HOST CTX WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, qp_ctx, IRDMA_QP_CTX_SIZE, false);
+}
+
+/**
+ * irdma_sc_get_encoded_ird_size_gen_3 - get encoded IRD size for GEN 3
+ * @ird_size: IRD size
+ * The ird from the connection is rounded to a supported HW setting and then encoded
+ * for ird_size field of qp_ctx. Consumers are expected to provide valid ird size based
+ * on hardware attributes. IRD size defaults to a value of 4 in case of invalid input.
+ */
+static u8 irdma_sc_get_encoded_ird_size_gen_3(u16 ird_size)
+{
+ switch (ird_size ?
+ roundup_pow_of_two(2 * ird_size) : 4) {
+ case 4096:
+ return IRDMA_IRD_HW_SIZE_4096_GEN3;
+ case 2048:
+ return IRDMA_IRD_HW_SIZE_2048_GEN3;
+ case 1024:
+ return IRDMA_IRD_HW_SIZE_1024_GEN3;
+ case 512:
+ return IRDMA_IRD_HW_SIZE_512_GEN3;
+ case 256:
+ return IRDMA_IRD_HW_SIZE_256_GEN3;
+ case 128:
+ return IRDMA_IRD_HW_SIZE_128_GEN3;
+ case 64:
+ return IRDMA_IRD_HW_SIZE_64_GEN3;
+ case 32:
+ return IRDMA_IRD_HW_SIZE_32_GEN3;
+ case 16:
+ return IRDMA_IRD_HW_SIZE_16_GEN3;
+ case 8:
+ return IRDMA_IRD_HW_SIZE_8_GEN3;
+ case 4:
+ default:
+ break;
+ }
+
+ return IRDMA_IRD_HW_SIZE_4_GEN3;
+}
+
+/**
+ * irdma_sc_qp_setctx_roce_gen_3 - set qp's context
+ * @qp: sc qp
+ * @qp_ctx: context ptr
+ * @info: ctx info
+ */
+static void irdma_sc_qp_setctx_roce_gen_3(struct irdma_sc_qp *qp,
+ __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info)
+{
+ struct irdma_roce_offload_info *roce_info = info->roce_info;
+ struct irdma_udp_offload_info *udp = info->udp_info;
+ u64 qw0, qw3, qw7 = 0, qw8 = 0;
+ u8 push_mode_en;
+ u32 push_idx;
+
+ qp->user_pri = info->user_pri;
+ if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) {
+ push_mode_en = 0;
+ push_idx = 0;
+ } else {
+ push_mode_en = 1;
+ push_idx = qp->push_idx;
+ }
+
+ qw0 = FIELD_PREP(IRDMAQPC_RQWQESIZE, qp->qp_uk.rq_wqe_size) |
+ FIELD_PREP(IRDMAQPC_RCVTPHEN, qp->rcv_tph_en) |
+ FIELD_PREP(IRDMAQPC_XMITTPHEN, qp->xmit_tph_en) |
+ FIELD_PREP(IRDMAQPC_RQTPHEN, qp->rq_tph_en) |
+ FIELD_PREP(IRDMAQPC_SQTPHEN, qp->sq_tph_en) |
+ FIELD_PREP(IRDMAQPC_PPIDX, push_idx) |
+ FIELD_PREP(IRDMAQPC_PMENA, push_mode_en) |
+ FIELD_PREP(IRDMAQPC_DC_TCP_EN, roce_info->dctcp_en) |
+ FIELD_PREP(IRDMAQPC_ISQP1, roce_info->is_qp1) |
+ FIELD_PREP(IRDMAQPC_ROCE_TVER, roce_info->roce_tver) |
+ FIELD_PREP(IRDMAQPC_IPV4, udp->ipv4) |
+ FIELD_PREP(IRDMAQPC_USE_SRQ, !qp->qp_uk.srq_uk ? 0 : 1) |
+ FIELD_PREP(IRDMAQPC_INSERTVLANTAG, udp->insert_vlan_tag);
+ set_64bit_val(qp_ctx, 0, qw0);
+ set_64bit_val(qp_ctx, 8, qp->sq_pa);
+ set_64bit_val(qp_ctx, 16, qp->rq_pa);
+ qw3 = FIELD_PREP(IRDMAQPC_RQSIZE, qp->hw_rq_size) |
+ FIELD_PREP(IRDMAQPC_SQSIZE, qp->hw_sq_size) |
+ FIELD_PREP(IRDMAQPC_TTL, udp->ttl) |
+ FIELD_PREP(IRDMAQPC_TOS, udp->tos) |
+ FIELD_PREP(IRDMAQPC_SRCPORTNUM, udp->src_port) |
+ FIELD_PREP(IRDMAQPC_DESTPORTNUM, udp->dst_port);
+ set_64bit_val(qp_ctx, 24, qw3);
+ set_64bit_val(qp_ctx, 32,
+ FIELD_PREP(IRDMAQPC_DESTIPADDR2, udp->dest_ip_addr[2]) |
+ FIELD_PREP(IRDMAQPC_DESTIPADDR3, udp->dest_ip_addr[3]));
+ set_64bit_val(qp_ctx, 40,
+ FIELD_PREP(IRDMAQPC_DESTIPADDR0, udp->dest_ip_addr[0]) |
+ FIELD_PREP(IRDMAQPC_DESTIPADDR1, udp->dest_ip_addr[1]));
+ set_64bit_val(qp_ctx, 48,
+ FIELD_PREP(IRDMAQPC_SNDMSS, udp->snd_mss) |
+ FIELD_PREP(IRDMAQPC_VLANTAG, udp->vlan_tag) |
+ FIELD_PREP(IRDMAQPC_ARPIDX, udp->arp_idx));
+ qw7 = FIELD_PREP(IRDMAQPC_PKEY, roce_info->p_key) |
+ FIELD_PREP(IRDMAQPC_ACKCREDITS, roce_info->ack_credits) |
+ FIELD_PREP(IRDMAQPC_FLOWLABEL, udp->flow_label);
+ set_64bit_val(qp_ctx, 56, qw7);
+ qw8 = FIELD_PREP(IRDMAQPC_QKEY, roce_info->qkey) |
+ FIELD_PREP(IRDMAQPC_DESTQP, roce_info->dest_qp);
+ set_64bit_val(qp_ctx, 64, qw8);
+ set_64bit_val(qp_ctx, 80,
+ FIELD_PREP(IRDMAQPC_PSNNXT, udp->psn_nxt) |
+ FIELD_PREP(IRDMAQPC_LSN, udp->lsn));
+ set_64bit_val(qp_ctx, 88,
+ FIELD_PREP(IRDMAQPC_EPSN, udp->epsn));
+ set_64bit_val(qp_ctx, 96,
+ FIELD_PREP(IRDMAQPC_PSNMAX, udp->psn_max) |
+ FIELD_PREP(IRDMAQPC_PSNUNA, udp->psn_una));
+ set_64bit_val(qp_ctx, 112,
+ FIELD_PREP(IRDMAQPC_CWNDROCE, udp->cwnd));
+ set_64bit_val(qp_ctx, 128,
+ FIELD_PREP(IRDMAQPC_MINRNR_TIMER, udp->min_rnr_timer) |
+ FIELD_PREP(IRDMAQPC_RNRNAK_THRESH, udp->rnr_nak_thresh) |
+ FIELD_PREP(IRDMAQPC_REXMIT_THRESH, udp->rexmit_thresh) |
+ FIELD_PREP(IRDMAQPC_RNRNAK_TMR, udp->rnr_nak_tmr) |
+ FIELD_PREP(IRDMAQPC_RTOMIN, roce_info->rtomin));
+ set_64bit_val(qp_ctx, 136,
+ FIELD_PREP(IRDMAQPC_TXCQNUM, info->send_cq_num) |
+ FIELD_PREP(IRDMAQPC_RXCQNUM, info->rcv_cq_num));
+ set_64bit_val(qp_ctx, 152,
+ FIELD_PREP(IRDMAQPC_MACADDRESS,
+ ether_addr_to_u64(roce_info->mac_addr)) |
+ FIELD_PREP(IRDMAQPC_LOCALACKTIMEOUT,
+ roce_info->local_ack_timeout));
+ set_64bit_val(qp_ctx, 160,
+ FIELD_PREP(IRDMAQPC_ORDSIZE_GEN3, roce_info->ord_size) |
+ FIELD_PREP(IRDMAQPC_IRDSIZE_GEN3,
+ irdma_sc_get_encoded_ird_size_gen_3(roce_info->ird_size)) |
+ FIELD_PREP(IRDMAQPC_WRRDRSPOK, roce_info->wr_rdresp_en) |
+ FIELD_PREP(IRDMAQPC_RDOK, roce_info->rd_en) |
+ FIELD_PREP(IRDMAQPC_USESTATSINSTANCE,
+ info->stats_idx_valid) |
+ FIELD_PREP(IRDMAQPC_BINDEN, roce_info->bind_en) |
+ FIELD_PREP(IRDMAQPC_FASTREGEN, roce_info->fast_reg_en) |
+ FIELD_PREP(IRDMAQPC_DCQCNENABLE, roce_info->dcqcn_en) |
+ FIELD_PREP(IRDMAQPC_RCVNOICRC, roce_info->rcv_no_icrc) |
+ FIELD_PREP(IRDMAQPC_FW_CC_ENABLE,
+ roce_info->fw_cc_enable) |
+ FIELD_PREP(IRDMAQPC_UDPRIVCQENABLE,
+ roce_info->udprivcq_en) |
+ FIELD_PREP(IRDMAQPC_PRIVEN, roce_info->priv_mode_en) |
+ FIELD_PREP(IRDMAQPC_REMOTE_ATOMIC_EN,
+ info->remote_atomics_en) |
+ FIELD_PREP(IRDMAQPC_TIMELYENABLE, roce_info->timely_en));
+ set_64bit_val(qp_ctx, 168,
+ FIELD_PREP(IRDMAQPC_QPCOMPCTX, info->qp_compl_ctx));
+ set_64bit_val(qp_ctx, 176,
+ FIELD_PREP(IRDMAQPC_SQTPHVAL, qp->sq_tph_val) |
+ FIELD_PREP(IRDMAQPC_RQTPHVAL, qp->rq_tph_val) |
+ FIELD_PREP(IRDMAQPC_QSHANDLE, qp->qs_handle));
+ set_64bit_val(qp_ctx, 184,
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR3, udp->local_ipaddr[3]) |
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR2, udp->local_ipaddr[2]));
+ set_64bit_val(qp_ctx, 192,
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR1, udp->local_ipaddr[1]) |
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR0, udp->local_ipaddr[0]));
+ set_64bit_val(qp_ctx, 200,
+ FIELD_PREP(IRDMAQPC_THIGH, roce_info->t_high) |
+ FIELD_PREP(IRDMAQPC_SRQ_ID,
+ !qp->qp_uk.srq_uk ?
+ 0 : qp->qp_uk.srq_uk->srq_id) |
+ FIELD_PREP(IRDMAQPC_TLOW, roce_info->t_low));
+ set_64bit_val(qp_ctx, 208, roce_info->pd_id |
+ FIELD_PREP(IRDMAQPC_STAT_INDEX_GEN3, info->stats_idx) |
+ FIELD_PREP(IRDMAQPC_PKT_LIMIT, qp->pkt_limit));
+
+ print_hex_dump_debug("WQE: QP_HOST ROCE CTX WQE", DUMP_PREFIX_OFFSET,
+ 16, 8, qp_ctx, IRDMA_QP_CTX_SIZE, false);
+}
+
+void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info)
+{
+ if (qp->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2)
+ irdma_sc_qp_setctx_roce_gen_2(qp, qp_ctx, info);
+ else
+ irdma_sc_qp_setctx_roce_gen_3(qp, qp_ctx, info);
+}
+
+/* irdma_sc_alloc_local_mac_entry - allocate a mac entry
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_alloc_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch,
+ bool post_sq)
+{
+ __le64 *wqe;
+ u64 hdr;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE,
+ IRDMA_CQP_OP_ALLOCATE_LOC_MAC_TABLE_ENTRY) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: ALLOCATE_LOCAL_MAC WQE",
+ DUMP_PREFIX_OFFSET, 16, 8, wqe,
+ IRDMA_CQP_WQE_SIZE * 8, false);
+
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * irdma_sc_add_local_mac_entry - add mac enry
+ * @cqp: struct for cqp hw
+ * @info:mac addr info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_add_local_mac_entry(struct irdma_sc_cqp *cqp,
+ struct irdma_local_mac_entry_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ u64 header;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 32, ether_addr_to_u64(info->mac_addr));
+
+ header = FIELD_PREP(IRDMA_CQPSQ_MLM_TABLEIDX, info->entry_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE,
+ IRDMA_CQP_OP_MANAGE_LOC_MAC_TABLE) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, header);
+
+ print_hex_dump_debug("WQE: ADD_LOCAL_MAC WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * irdma_sc_del_local_mac_entry - cqp wqe to dele local mac
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @entry_idx: index of mac entry
+ * @ignore_ref_count: to force mac adde delete
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_del_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch,
+ u16 entry_idx, u8 ignore_ref_count,
+ bool post_sq)
+{
+ __le64 *wqe;
+ u64 header;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+ header = FIELD_PREP(IRDMA_CQPSQ_MLM_TABLEIDX, entry_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE,
+ IRDMA_CQP_OP_MANAGE_LOC_MAC_TABLE) |
+ FIELD_PREP(IRDMA_CQPSQ_MLM_FREEENTRY, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_MLM_IGNORE_REF_CNT, ignore_ref_count);
+
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, header);
+
+ print_hex_dump_debug("WQE: DEL_LOCAL_MAC_IPADDR WQE",
+ DUMP_PREFIX_OFFSET, 16, 8, wqe,
+ IRDMA_CQP_WQE_SIZE * 8, false);
+
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * irdma_sc_qp_setctx - set qp's context
+ * @qp: sc qp
+ * @qp_ctx: context ptr
+ * @info: ctx info
+ */
+void irdma_sc_qp_setctx(struct irdma_sc_qp *qp, __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info)
+{
+ struct irdma_iwarp_offload_info *iw;
+ struct irdma_tcp_offload_info *tcp;
+ struct irdma_sc_dev *dev;
+ u8 push_mode_en;
+ u32 push_idx;
+ u64 qw0, qw3, qw7 = 0, qw16 = 0;
+ u64 mac = 0;
+
+ iw = info->iwarp_info;
+ tcp = info->tcp_info;
+ dev = qp->dev;
+ if (iw->rcv_mark_en) {
+ qp->pfpdu.marker_len = 4;
+ qp->pfpdu.rcv_start_seq = tcp->rcv_nxt;
+ }
+ qp->user_pri = info->user_pri;
+ if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) {
+ push_mode_en = 0;
+ push_idx = 0;
+ } else {
+ push_mode_en = 1;
+ push_idx = qp->push_idx;
+ }
+ qw0 = FIELD_PREP(IRDMAQPC_RQWQESIZE, qp->qp_uk.rq_wqe_size) |
+ FIELD_PREP(IRDMAQPC_RCVTPHEN, qp->rcv_tph_en) |
+ FIELD_PREP(IRDMAQPC_XMITTPHEN, qp->xmit_tph_en) |
+ FIELD_PREP(IRDMAQPC_RQTPHEN, qp->rq_tph_en) |
+ FIELD_PREP(IRDMAQPC_SQTPHEN, qp->sq_tph_en) |
+ FIELD_PREP(IRDMAQPC_PPIDX, push_idx) |
+ FIELD_PREP(IRDMAQPC_PMENA, push_mode_en);
+
+ set_64bit_val(qp_ctx, 8, qp->sq_pa);
+ set_64bit_val(qp_ctx, 16, qp->rq_pa);
+
+ qw3 = FIELD_PREP(IRDMAQPC_RQSIZE, qp->hw_rq_size) |
+ FIELD_PREP(IRDMAQPC_SQSIZE, qp->hw_sq_size);
+ if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ qw3 |= FIELD_PREP(IRDMAQPC_GEN1_SRCMACADDRIDX,
+ qp->src_mac_addr_idx);
+ set_64bit_val(qp_ctx, 136,
+ FIELD_PREP(IRDMAQPC_TXCQNUM, info->send_cq_num) |
+ FIELD_PREP(IRDMAQPC_RXCQNUM, info->rcv_cq_num));
+ set_64bit_val(qp_ctx, 168,
+ FIELD_PREP(IRDMAQPC_QPCOMPCTX, info->qp_compl_ctx));
+ set_64bit_val(qp_ctx, 176,
+ FIELD_PREP(IRDMAQPC_SQTPHVAL, qp->sq_tph_val) |
+ FIELD_PREP(IRDMAQPC_RQTPHVAL, qp->rq_tph_val) |
+ FIELD_PREP(IRDMAQPC_QSHANDLE, qp->qs_handle) |
+ FIELD_PREP(IRDMAQPC_EXCEPTION_LAN_QUEUE, qp->ieq_qp));
+ if (info->iwarp_info_valid) {
+ qw0 |= FIELD_PREP(IRDMAQPC_DDP_VER, iw->ddp_ver) |
+ FIELD_PREP(IRDMAQPC_RDMAP_VER, iw->rdmap_ver) |
+ FIELD_PREP(IRDMAQPC_DC_TCP_EN, iw->dctcp_en) |
+ FIELD_PREP(IRDMAQPC_ECN_EN, iw->ecn_en) |
+ FIELD_PREP(IRDMAQPC_IBRDENABLE, iw->ib_rd_en) |
+ FIELD_PREP(IRDMAQPC_PDIDXHI, iw->pd_id >> 16) |
+ FIELD_PREP(IRDMAQPC_ERR_RQ_IDX_VALID,
+ iw->err_rq_idx_valid);
+ qw7 |= FIELD_PREP(IRDMAQPC_PDIDX, iw->pd_id);
+ qw16 |= FIELD_PREP(IRDMAQPC_ERR_RQ_IDX, iw->err_rq_idx) |
+ FIELD_PREP(IRDMAQPC_RTOMIN, iw->rtomin);
+ set_64bit_val(qp_ctx, 144,
+ FIELD_PREP(IRDMAQPC_Q2ADDR, qp->q2_pa >> 8) |
+ FIELD_PREP(IRDMAQPC_STAT_INDEX, info->stats_idx));
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ mac = ether_addr_to_u64(iw->mac_addr);
+
+ set_64bit_val(qp_ctx, 152,
+ mac << 16 | FIELD_PREP(IRDMAQPC_LASTBYTESENT, iw->last_byte_sent));
+ set_64bit_val(qp_ctx, 160,
+ FIELD_PREP(IRDMAQPC_ORDSIZE, iw->ord_size) |
+ FIELD_PREP(IRDMAQPC_IRDSIZE, irdma_sc_get_encoded_ird_size(iw->ird_size)) |
+ FIELD_PREP(IRDMAQPC_WRRDRSPOK, iw->wr_rdresp_en) |
+ FIELD_PREP(IRDMAQPC_RDOK, iw->rd_en) |
+ FIELD_PREP(IRDMAQPC_SNDMARKERS, iw->snd_mark_en) |
+ FIELD_PREP(IRDMAQPC_BINDEN, iw->bind_en) |
+ FIELD_PREP(IRDMAQPC_FASTREGEN, iw->fast_reg_en) |
+ FIELD_PREP(IRDMAQPC_PRIVEN, iw->priv_mode_en) |
+ FIELD_PREP(IRDMAQPC_USESTATSINSTANCE, info->stats_idx_valid) |
+ FIELD_PREP(IRDMAQPC_IWARPMODE, 1) |
+ FIELD_PREP(IRDMAQPC_RCVMARKERS, iw->rcv_mark_en) |
+ FIELD_PREP(IRDMAQPC_ALIGNHDRS, iw->align_hdrs) |
+ FIELD_PREP(IRDMAQPC_RCVNOMPACRC, iw->rcv_no_mpa_crc) |
+ FIELD_PREP(IRDMAQPC_RCVMARKOFFSET, iw->rcv_mark_offset || !tcp ? iw->rcv_mark_offset : tcp->rcv_nxt) |
+ FIELD_PREP(IRDMAQPC_SNDMARKOFFSET, iw->snd_mark_offset || !tcp ? iw->snd_mark_offset : tcp->snd_nxt) |
+ FIELD_PREP(IRDMAQPC_TIMELYENABLE, iw->timely_en));
+ }
+ if (info->tcp_info_valid) {
+ qw0 |= FIELD_PREP(IRDMAQPC_IPV4, tcp->ipv4) |
+ FIELD_PREP(IRDMAQPC_NONAGLE, tcp->no_nagle) |
+ FIELD_PREP(IRDMAQPC_INSERTVLANTAG,
+ tcp->insert_vlan_tag) |
+ FIELD_PREP(IRDMAQPC_TIMESTAMP, tcp->time_stamp) |
+ FIELD_PREP(IRDMAQPC_LIMIT, tcp->cwnd_inc_limit) |
+ FIELD_PREP(IRDMAQPC_DROPOOOSEG, tcp->drop_ooo_seg) |
+ FIELD_PREP(IRDMAQPC_DUPACK_THRESH, tcp->dup_ack_thresh);
+
+ if ((iw->ecn_en || iw->dctcp_en) && !(tcp->tos & 0x03))
+ tcp->tos |= ECN_CODE_PT_VAL;
+
+ qw3 |= FIELD_PREP(IRDMAQPC_TTL, tcp->ttl) |
+ FIELD_PREP(IRDMAQPC_AVOIDSTRETCHACK, tcp->avoid_stretch_ack) |
+ FIELD_PREP(IRDMAQPC_TOS, tcp->tos) |
+ FIELD_PREP(IRDMAQPC_SRCPORTNUM, tcp->src_port) |
+ FIELD_PREP(IRDMAQPC_DESTPORTNUM, tcp->dst_port);
+ if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) {
+ qw3 |= FIELD_PREP(IRDMAQPC_GEN1_SRCMACADDRIDX, tcp->src_mac_addr_idx);
+
+ qp->src_mac_addr_idx = tcp->src_mac_addr_idx;
+ }
+ set_64bit_val(qp_ctx, 32,
+ FIELD_PREP(IRDMAQPC_DESTIPADDR2, tcp->dest_ip_addr[2]) |
+ FIELD_PREP(IRDMAQPC_DESTIPADDR3, tcp->dest_ip_addr[3]));
+ set_64bit_val(qp_ctx, 40,
+ FIELD_PREP(IRDMAQPC_DESTIPADDR0, tcp->dest_ip_addr[0]) |
+ FIELD_PREP(IRDMAQPC_DESTIPADDR1, tcp->dest_ip_addr[1]));
+ set_64bit_val(qp_ctx, 48,
+ FIELD_PREP(IRDMAQPC_SNDMSS, tcp->snd_mss) |
+ FIELD_PREP(IRDMAQPC_SYN_RST_HANDLING, tcp->syn_rst_handling) |
+ FIELD_PREP(IRDMAQPC_VLANTAG, tcp->vlan_tag) |
+ FIELD_PREP(IRDMAQPC_ARPIDX, tcp->arp_idx));
+ qw7 |= FIELD_PREP(IRDMAQPC_FLOWLABEL, tcp->flow_label) |
+ FIELD_PREP(IRDMAQPC_WSCALE, tcp->wscale) |
+ FIELD_PREP(IRDMAQPC_IGNORE_TCP_OPT,
+ tcp->ignore_tcp_opt) |
+ FIELD_PREP(IRDMAQPC_IGNORE_TCP_UNS_OPT,
+ tcp->ignore_tcp_uns_opt) |
+ FIELD_PREP(IRDMAQPC_TCPSTATE, tcp->tcp_state) |
+ FIELD_PREP(IRDMAQPC_RCVSCALE, tcp->rcv_wscale) |
+ FIELD_PREP(IRDMAQPC_SNDSCALE, tcp->snd_wscale);
+ set_64bit_val(qp_ctx, 72,
+ FIELD_PREP(IRDMAQPC_TIMESTAMP_RECENT, tcp->time_stamp_recent) |
+ FIELD_PREP(IRDMAQPC_TIMESTAMP_AGE, tcp->time_stamp_age));
+ set_64bit_val(qp_ctx, 80,
+ FIELD_PREP(IRDMAQPC_SNDNXT, tcp->snd_nxt) |
+ FIELD_PREP(IRDMAQPC_SNDWND, tcp->snd_wnd));
+ set_64bit_val(qp_ctx, 88,
+ FIELD_PREP(IRDMAQPC_RCVNXT, tcp->rcv_nxt) |
+ FIELD_PREP(IRDMAQPC_RCVWND, tcp->rcv_wnd));
+ set_64bit_val(qp_ctx, 96,
+ FIELD_PREP(IRDMAQPC_SNDMAX, tcp->snd_max) |
+ FIELD_PREP(IRDMAQPC_SNDUNA, tcp->snd_una));
+ set_64bit_val(qp_ctx, 104,
+ FIELD_PREP(IRDMAQPC_SRTT, tcp->srtt) |
+ FIELD_PREP(IRDMAQPC_RTTVAR, tcp->rtt_var));
+ set_64bit_val(qp_ctx, 112,
+ FIELD_PREP(IRDMAQPC_SSTHRESH, tcp->ss_thresh) |
+ FIELD_PREP(IRDMAQPC_CWND, tcp->cwnd));
+ set_64bit_val(qp_ctx, 120,
+ FIELD_PREP(IRDMAQPC_SNDWL1, tcp->snd_wl1) |
+ FIELD_PREP(IRDMAQPC_SNDWL2, tcp->snd_wl2));
+ qw16 |= FIELD_PREP(IRDMAQPC_MAXSNDWND, tcp->max_snd_window) |
+ FIELD_PREP(IRDMAQPC_REXMIT_THRESH, tcp->rexmit_thresh);
+ set_64bit_val(qp_ctx, 184,
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR3, tcp->local_ipaddr[3]) |
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR2, tcp->local_ipaddr[2]));
+ set_64bit_val(qp_ctx, 192,
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR1, tcp->local_ipaddr[1]) |
+ FIELD_PREP(IRDMAQPC_LOCAL_IPADDR0, tcp->local_ipaddr[0]));
+ set_64bit_val(qp_ctx, 200,
+ FIELD_PREP(IRDMAQPC_THIGH, iw->t_high) |
+ FIELD_PREP(IRDMAQPC_TLOW, iw->t_low));
+ set_64bit_val(qp_ctx, 208,
+ FIELD_PREP(IRDMAQPC_REMENDPOINTIDX, info->rem_endpoint_idx));
+ }
+
+ set_64bit_val(qp_ctx, 0, qw0);
+ set_64bit_val(qp_ctx, 24, qw3);
+ set_64bit_val(qp_ctx, 56, qw7);
+ set_64bit_val(qp_ctx, 128, qw16);
+
+ print_hex_dump_debug("WQE: QP_HOST CTX", DUMP_PREFIX_OFFSET, 16, 8,
+ qp_ctx, IRDMA_QP_CTX_SIZE, false);
+}
+
+/**
+ * irdma_sc_alloc_stag - mr stag alloc
+ * @dev: sc device struct
+ * @info: stag info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev,
+ struct irdma_allocate_stag_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+ enum irdma_page_size page_size;
+
+ if (!info->total_len && !info->all_memory)
+ return -EINVAL;
+
+ if (info->page_size == 0x40000000)
+ page_size = IRDMA_PAGE_SIZE_1G;
+ else if (info->page_size == 0x200000)
+ page_size = IRDMA_PAGE_SIZE_2M;
+ else
+ page_size = IRDMA_PAGE_SIZE_4K;
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 8,
+ FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_STAGLEN, info->total_len));
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18));
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_HMCFNIDX, info->hmc_fcn_index));
+
+ if (info->chunk_size)
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_FIRSTPMPBLIDX, info->first_pm_pbl_idx));
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_ALLOC_STAG) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_MR, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_ARIGHTS, info->access_rights) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_LPBLSIZE, info->chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_HPAGESIZE, page_size) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_REMACCENABLED, info->remote_access) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN,
+ info->remote_atomics_en) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: ALLOC_STAG WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_mr_reg_non_shared - non-shared mr registration
+ * @dev: sc device struct
+ * @info: mr info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev,
+ struct irdma_reg_ns_stag_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ u64 fbo;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+ u32 pble_obj_cnt;
+ bool remote_access;
+ u8 addr_type;
+ enum irdma_page_size page_size;
+
+ if (!info->total_len && !info->all_memory)
+ return -EINVAL;
+
+ if (info->page_size == 0x40000000)
+ page_size = IRDMA_PAGE_SIZE_1G;
+ else if (info->page_size == 0x200000)
+ page_size = IRDMA_PAGE_SIZE_2M;
+ else if (info->page_size == 0x1000)
+ page_size = IRDMA_PAGE_SIZE_4K;
+ else
+ return -EINVAL;
+
+ if (info->access_rights & (IRDMA_ACCESS_FLAGS_REMOTEREAD_ONLY |
+ IRDMA_ACCESS_FLAGS_REMOTEWRITE_ONLY))
+ remote_access = true;
+ else
+ remote_access = false;
+
+ pble_obj_cnt = dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+ if (info->chunk_size && info->first_pm_pbl_index >= pble_obj_cnt)
+ return -EINVAL;
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+ fbo = info->va & (info->page_size - 1);
+
+ set_64bit_val(wqe, 0,
+ (info->addr_type == IRDMA_ADDR_TYPE_VA_BASED ?
+ info->va : fbo));
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_STAGLEN, info->total_len) |
+ FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID));
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_KEY, info->stag_key) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx));
+ if (!info->chunk_size) {
+ set_64bit_val(wqe, 32, info->reg_addr_pa);
+ set_64bit_val(wqe, 48, 0);
+ } else {
+ set_64bit_val(wqe, 32, 0);
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_FIRSTPMPBLIDX, info->first_pm_pbl_index));
+ }
+ set_64bit_val(wqe, 40, info->hmc_fcn_index);
+ set_64bit_val(wqe, 56, 0);
+
+ addr_type = (info->addr_type == IRDMA_ADDR_TYPE_VA_BASED) ? 1 : 0;
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_REG_MR) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_MR, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_LPBLSIZE, info->chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_HPAGESIZE, page_size) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_ARIGHTS, info->access_rights) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_REMACCENABLED, remote_access) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_VABASEDTO, addr_type) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN,
+ info->remote_atomics_en) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: MR_REG_NS WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_dealloc_stag - deallocate stag
+ * @dev: sc device struct
+ * @info: dealloc stag info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_dealloc_stag(struct irdma_sc_dev *dev,
+ struct irdma_dealloc_stag_info *info,
+ u64 scratch, bool post_sq)
+{
+ u64 hdr;
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 8,
+ FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID));
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18));
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DEALLOC_STAG) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_MR, info->mr) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: DEALLOC_STAG WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_mw_alloc - mw allocate
+ * @dev: sc device struct
+ * @info: memory window allocation information
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_mw_alloc(struct irdma_sc_dev *dev,
+ struct irdma_mw_alloc_info *info, u64 scratch,
+ bool post_sq)
+{
+ u64 hdr;
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 8,
+ FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID));
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->mw_stag_index) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18));
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_ALLOC_STAG) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_MWTYPE, info->mw_wide) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_MW1_BIND_DONT_VLDT_KEY,
+ info->mw1_bind_dont_vldt_key) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: MW_ALLOC WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_mr_fast_register - Posts RDMA fast register mr WR to iwarp qp
+ * @qp: sc qp struct
+ * @info: fast mr info
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp,
+ struct irdma_fast_reg_stag_info *info,
+ bool post_sq)
+{
+ u64 temp, hdr;
+ __le64 *wqe;
+ u32 wqe_idx;
+ enum irdma_page_size page_size;
+ struct irdma_post_sq_info sq_info = {};
+
+ if (info->page_size == 0x40000000)
+ page_size = IRDMA_PAGE_SIZE_1G;
+ else if (info->page_size == 0x200000)
+ page_size = IRDMA_PAGE_SIZE_2M;
+ else
+ page_size = IRDMA_PAGE_SIZE_4K;
+
+ sq_info.wr_id = info->wr_id;
+ sq_info.signaled = info->signaled;
+
+ wqe = irdma_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx,
+ IRDMA_QP_WQE_MIN_QUANTA, 0, &sq_info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(&qp->qp_uk, wqe_idx);
+
+ ibdev_dbg(to_ibdev(qp->dev),
+ "MR: wr_id[%llxh] wqe_idx[%04d] location[%p]\n",
+ info->wr_id, wqe_idx,
+ &qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid);
+
+ temp = (info->addr_type == IRDMA_ADDR_TYPE_VA_BASED) ?
+ (uintptr_t)info->va : info->fbo;
+ set_64bit_val(wqe, 0, temp);
+
+ temp = FIELD_GET(IRDMAQPSQ_FIRSTPMPBLIDXHI,
+ info->first_pm_pbl_index >> 16);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_FIRSTPMPBLIDXHI, temp) |
+ FIELD_PREP(IRDMAQPSQ_PBLADDR >> IRDMA_HW_PAGE_SHIFT, info->reg_addr_pa));
+ set_64bit_val(wqe, 16,
+ info->total_len |
+ FIELD_PREP(IRDMAQPSQ_FIRSTPMPBLIDXLO, info->first_pm_pbl_index));
+
+ hdr = FIELD_PREP(IRDMAQPSQ_STAGKEY, info->stag_key) |
+ FIELD_PREP(IRDMAQPSQ_STAGINDEX, info->stag_idx) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_FAST_REGISTER) |
+ FIELD_PREP(IRDMAQPSQ_LPBLSIZE, info->chunk_size) |
+ FIELD_PREP(IRDMAQPSQ_HPAGESIZE, page_size) |
+ FIELD_PREP(IRDMAQPSQ_STAGRIGHTS, info->access_rights) |
+ FIELD_PREP(IRDMAQPSQ_VABASEDTO, info->addr_type) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_REMOTE_ATOMICS_EN, info->remote_atomics_en) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: FAST_REG WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_QP_WQE_MIN_SIZE, false);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(&qp->qp_uk);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_gen_rts_ae - request AE generated after RTS
+ * @qp: sc qp struct
+ */
+static void irdma_sc_gen_rts_ae(struct irdma_sc_qp *qp)
+{
+ __le64 *wqe;
+ u64 hdr;
+ struct irdma_qp_uk *qp_uk;
+
+ qp_uk = &qp->qp_uk;
+
+ wqe = qp_uk->sq_base[1].elem;
+
+ hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_NOP) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, 1) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+ print_hex_dump_debug("QP: NOP W/LOCAL FENCE WQE", DUMP_PREFIX_OFFSET,
+ 16, 8, wqe, IRDMA_QP_WQE_MIN_SIZE, false);
+
+ wqe = qp_uk->sq_base[2].elem;
+ hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_GEN_RTS_AE) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+ print_hex_dump_debug("QP: CONN EST WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_QP_WQE_MIN_SIZE, false);
+}
+
+/**
+ * irdma_sc_send_lsmm - send last streaming mode message
+ * @qp: sc qp struct
+ * @lsmm_buf: buffer with lsmm message
+ * @size: size of lsmm buffer
+ * @stag: stag of lsmm buffer
+ */
+void irdma_sc_send_lsmm(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size,
+ irdma_stag stag)
+{
+ __le64 *wqe;
+ u64 hdr;
+ struct irdma_qp_uk *qp_uk;
+
+ qp_uk = &qp->qp_uk;
+ wqe = qp_uk->sq_base->elem;
+
+ set_64bit_val(wqe, 0, (uintptr_t)lsmm_buf);
+ if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) {
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, size) |
+ FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, stag));
+ } else {
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_FRAG_LEN, size) |
+ FIELD_PREP(IRDMAQPSQ_FRAG_STAG, stag) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity));
+ }
+ set_64bit_val(wqe, 16, 0);
+
+ hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_RDMA_SEND) |
+ FIELD_PREP(IRDMAQPSQ_STREAMMODE, 1) |
+ FIELD_PREP(IRDMAQPSQ_WAITFORRCVPDU, 1) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: SEND_LSMM WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_QP_WQE_MIN_SIZE, false);
+
+ if (qp->dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE)
+ irdma_sc_gen_rts_ae(qp);
+}
+
+/**
+ * irdma_sc_send_rtt - send last read0 or write0
+ * @qp: sc qp struct
+ * @read: Do read0 or write0
+ */
+void irdma_sc_send_rtt(struct irdma_sc_qp *qp, bool read)
+{
+ __le64 *wqe;
+ u64 hdr;
+ struct irdma_qp_uk *qp_uk;
+
+ qp_uk = &qp->qp_uk;
+ wqe = qp_uk->sq_base->elem;
+
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 16, 0);
+ if (read) {
+ if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) {
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, 0xabcd));
+ } else {
+ set_64bit_val(wqe, 8,
+ (u64)0xabcd | FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity));
+ }
+ hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, 0x1234) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_RDMA_READ) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity);
+
+ } else {
+ if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) {
+ set_64bit_val(wqe, 8, 0);
+ } else {
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity));
+ }
+ hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_RDMA_WRITE) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity);
+ }
+
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: RTR WQE", DUMP_PREFIX_OFFSET, 16, 8, wqe,
+ IRDMA_QP_WQE_MIN_SIZE, false);
+
+ if (qp->dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE)
+ irdma_sc_gen_rts_ae(qp);
+}
+
+/**
+ * irdma_iwarp_opcode - determine if incoming is rdma layer
+ * @info: aeq info for the packet
+ * @pkt: packet for error
+ */
+static u32 irdma_iwarp_opcode(struct irdma_aeqe_info *info, u8 *pkt)
+{
+ __be16 *mpa;
+ u32 opcode = 0xffffffff;
+
+ if (info->q2_data_written) {
+ mpa = (__be16 *)pkt;
+ opcode = ntohs(mpa[1]) & 0xf;
+ }
+
+ return opcode;
+}
+
+/**
+ * irdma_locate_mpa - return pointer to mpa in the pkt
+ * @pkt: packet with data
+ */
+static u8 *irdma_locate_mpa(u8 *pkt)
+{
+ /* skip over ethernet header */
+ pkt += IRDMA_MAC_HLEN;
+
+ /* Skip over IP and TCP headers */
+ pkt += 4 * (pkt[0] & 0x0f);
+ pkt += 4 * ((pkt[12] >> 4) & 0x0f);
+
+ return pkt;
+}
+
+/**
+ * irdma_bld_termhdr_ctrl - setup terminate hdr control fields
+ * @qp: sc qp ptr for pkt
+ * @hdr: term hdr
+ * @opcode: flush opcode for termhdr
+ * @layer_etype: error layer + error type
+ * @err: error cod ein the header
+ */
+static void irdma_bld_termhdr_ctrl(struct irdma_sc_qp *qp,
+ struct irdma_terminate_hdr *hdr,
+ enum irdma_flush_opcode opcode,
+ u8 layer_etype, u8 err)
+{
+ qp->flush_code = opcode;
+ hdr->layer_etype = layer_etype;
+ hdr->error_code = err;
+}
+
+/**
+ * irdma_bld_termhdr_ddp_rdma - setup ddp and rdma hdrs in terminate hdr
+ * @pkt: ptr to mpa in offending pkt
+ * @hdr: term hdr
+ * @copy_len: offending pkt length to be copied to term hdr
+ * @is_tagged: DDP tagged or untagged
+ */
+static void irdma_bld_termhdr_ddp_rdma(u8 *pkt, struct irdma_terminate_hdr *hdr,
+ int *copy_len, u8 *is_tagged)
+{
+ u16 ddp_seg_len;
+
+ ddp_seg_len = ntohs(*(__be16 *)pkt);
+ if (ddp_seg_len) {
+ *copy_len = 2;
+ hdr->hdrct = DDP_LEN_FLAG;
+ if (pkt[2] & 0x80) {
+ *is_tagged = 1;
+ if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) {
+ *copy_len += TERM_DDP_LEN_TAGGED;
+ hdr->hdrct |= DDP_HDR_FLAG;
+ }
+ } else {
+ if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) {
+ *copy_len += TERM_DDP_LEN_UNTAGGED;
+ hdr->hdrct |= DDP_HDR_FLAG;
+ }
+ if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN) &&
+ ((pkt[3] & RDMA_OPCODE_M) == RDMA_READ_REQ_OPCODE)) {
+ *copy_len += TERM_RDMA_LEN;
+ hdr->hdrct |= RDMA_HDR_FLAG;
+ }
+ }
+ }
+}
+
+/**
+ * irdma_bld_terminate_hdr - build terminate message header
+ * @qp: qp associated with received terminate AE
+ * @info: the struct contiaing AE information
+ */
+static int irdma_bld_terminate_hdr(struct irdma_sc_qp *qp,
+ struct irdma_aeqe_info *info)
+{
+ u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
+ int copy_len = 0;
+ u8 is_tagged = 0;
+ u32 opcode;
+ struct irdma_terminate_hdr *termhdr;
+
+ termhdr = (struct irdma_terminate_hdr *)qp->q2_buf;
+ memset(termhdr, 0, Q2_BAD_FRAME_OFFSET);
+
+ if (info->q2_data_written) {
+ pkt = irdma_locate_mpa(pkt);
+ irdma_bld_termhdr_ddp_rdma(pkt, termhdr, &copy_len, &is_tagged);
+ }
+
+ opcode = irdma_iwarp_opcode(info, pkt);
+ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ qp->sq_flush_code = info->sq;
+ qp->rq_flush_code = info->rq;
+
+ switch (info->ae_id) {
+ case IRDMA_AE_AMP_UNALLOCATED_STAG:
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ if (opcode == IRDMA_OP_TYPE_RDMA_WRITE)
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_PROT_ERR,
+ (LAYER_DDP << 4) | DDP_TAGGED_BUF,
+ DDP_TAGGED_INV_STAG);
+ else
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT,
+ RDMAP_INV_STAG);
+ break;
+ case IRDMA_AE_AMP_BOUNDS_VIOLATION:
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ if (info->q2_data_written)
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_PROT_ERR,
+ (LAYER_DDP << 4) | DDP_TAGGED_BUF,
+ DDP_TAGGED_BOUNDS);
+ else
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT,
+ RDMAP_INV_BOUNDS);
+ break;
+ case IRDMA_AE_AMP_BAD_PD:
+ switch (opcode) {
+ case IRDMA_OP_TYPE_RDMA_WRITE:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_PROT_ERR,
+ (LAYER_DDP << 4) | DDP_TAGGED_BUF,
+ DDP_TAGGED_UNASSOC_STAG);
+ break;
+ case IRDMA_OP_TYPE_SEND_INV:
+ case IRDMA_OP_TYPE_SEND_SOL_INV:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT,
+ RDMAP_CANT_INV_STAG);
+ break;
+ default:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT,
+ RDMAP_UNASSOC_STAG);
+ }
+ break;
+ case IRDMA_AE_AMP_INVALID_STAG:
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT,
+ RDMAP_INV_STAG);
+ break;
+ case IRDMA_AE_AMP_BAD_QP:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_LOC_QP_OP_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUF,
+ DDP_UNTAGGED_INV_QN);
+ break;
+ case IRDMA_AE_AMP_BAD_STAG_KEY:
+ case IRDMA_AE_AMP_BAD_STAG_INDEX:
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ switch (opcode) {
+ case IRDMA_OP_TYPE_SEND_INV:
+ case IRDMA_OP_TYPE_SEND_SOL_INV:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_OP_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_OP,
+ RDMAP_CANT_INV_STAG);
+ break;
+ default:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_OP,
+ RDMAP_INV_STAG);
+ }
+ break;
+ case IRDMA_AE_AMP_RIGHTS_VIOLATION:
+ case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
+ case IRDMA_AE_PRIV_OPERATION_DENIED:
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT,
+ RDMAP_ACCESS);
+ break;
+ case IRDMA_AE_AMP_TO_WRAP:
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_ACCESS_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT,
+ RDMAP_TO_WRAP);
+ break;
+ case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_MPA << 4) | DDP_LLP, MPA_CRC);
+ break;
+ case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_LOC_LEN_ERR,
+ (LAYER_DDP << 4) | DDP_CATASTROPHIC,
+ DDP_CATASTROPHIC_LOCAL);
+ break;
+ case IRDMA_AE_LCE_QP_CATASTROPHIC:
+ case IRDMA_AE_DDP_NO_L_BIT:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_FATAL_ERR,
+ (LAYER_DDP << 4) | DDP_CATASTROPHIC,
+ DDP_CATASTROPHIC_LOCAL);
+ break;
+ case IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUF,
+ DDP_UNTAGGED_INV_MSN_RANGE);
+ break;
+ case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+ qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_LOC_LEN_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUF,
+ DDP_UNTAGGED_INV_TOO_LONG);
+ break;
+ case IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION:
+ if (is_tagged)
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_DDP << 4) | DDP_TAGGED_BUF,
+ DDP_TAGGED_INV_DDP_VER);
+ else
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUF,
+ DDP_UNTAGGED_INV_DDP_VER);
+ break;
+ case IRDMA_AE_DDP_UBE_INVALID_MO:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUF,
+ DDP_UNTAGGED_INV_MO);
+ break;
+ case IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_REM_OP_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUF,
+ DDP_UNTAGGED_INV_MSN_NO_BUF);
+ break;
+ case IRDMA_AE_DDP_UBE_INVALID_QN:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_DDP << 4) | DDP_UNTAGGED_BUF,
+ DDP_UNTAGGED_INV_QN);
+ break;
+ case IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_GENERAL_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_OP,
+ RDMAP_INV_RDMAP_VER);
+ break;
+ default:
+ irdma_bld_termhdr_ctrl(qp, termhdr, FLUSH_FATAL_ERR,
+ (LAYER_RDMA << 4) | RDMAP_REMOTE_OP,
+ RDMAP_UNSPECIFIED);
+ break;
+ }
+
+ if (copy_len)
+ memcpy(termhdr + 1, pkt, copy_len);
+
+ return sizeof(struct irdma_terminate_hdr) + copy_len;
+}
+
+/**
+ * irdma_terminate_send_fin() - Send fin for terminate message
+ * @qp: qp associated with received terminate AE
+ */
+void irdma_terminate_send_fin(struct irdma_sc_qp *qp)
+{
+ irdma_term_modify_qp(qp, IRDMA_QP_STATE_TERMINATE,
+ IRDMAQP_TERM_SEND_FIN_ONLY, 0);
+}
+
+/**
+ * irdma_terminate_connection() - Bad AE and send terminate to remote QP
+ * @qp: qp associated with received terminate AE
+ * @info: the struct contiaing AE information
+ */
+void irdma_terminate_connection(struct irdma_sc_qp *qp,
+ struct irdma_aeqe_info *info)
+{
+ u8 termlen = 0;
+
+ if (qp->term_flags & IRDMA_TERM_SENT)
+ return;
+
+ termlen = irdma_bld_terminate_hdr(qp, info);
+ irdma_terminate_start_timer(qp);
+ qp->term_flags |= IRDMA_TERM_SENT;
+ irdma_term_modify_qp(qp, IRDMA_QP_STATE_TERMINATE,
+ IRDMAQP_TERM_SEND_TERM_ONLY, termlen);
+}
+
+/**
+ * irdma_terminate_received - handle terminate received AE
+ * @qp: qp associated with received terminate AE
+ * @info: the struct contiaing AE information
+ */
+void irdma_terminate_received(struct irdma_sc_qp *qp,
+ struct irdma_aeqe_info *info)
+{
+ u8 *pkt = qp->q2_buf + Q2_BAD_FRAME_OFFSET;
+ __be32 *mpa;
+ u8 ddp_ctl;
+ u8 rdma_ctl;
+ u16 aeq_id = 0;
+ struct irdma_terminate_hdr *termhdr;
+
+ mpa = (__be32 *)irdma_locate_mpa(pkt);
+ if (info->q2_data_written) {
+ /* did not validate the frame - do it now */
+ ddp_ctl = (ntohl(mpa[0]) >> 8) & 0xff;
+ rdma_ctl = ntohl(mpa[0]) & 0xff;
+ if ((ddp_ctl & 0xc0) != 0x40)
+ aeq_id = IRDMA_AE_LCE_QP_CATASTROPHIC;
+ else if ((ddp_ctl & 0x03) != 1)
+ aeq_id = IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION;
+ else if (ntohl(mpa[2]) != 2)
+ aeq_id = IRDMA_AE_DDP_UBE_INVALID_QN;
+ else if (ntohl(mpa[3]) != 1)
+ aeq_id = IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN;
+ else if (ntohl(mpa[4]) != 0)
+ aeq_id = IRDMA_AE_DDP_UBE_INVALID_MO;
+ else if ((rdma_ctl & 0xc0) != 0x40)
+ aeq_id = IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION;
+
+ info->ae_id = aeq_id;
+ if (info->ae_id) {
+ /* Bad terminate recvd - send back a terminate */
+ irdma_terminate_connection(qp, info);
+ return;
+ }
+ }
+
+ qp->term_flags |= IRDMA_TERM_RCVD;
+ qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ termhdr = (struct irdma_terminate_hdr *)&mpa[5];
+ if (termhdr->layer_etype == RDMAP_REMOTE_PROT ||
+ termhdr->layer_etype == RDMAP_REMOTE_OP) {
+ irdma_terminate_done(qp, 0);
+ } else {
+ irdma_terminate_start_timer(qp);
+ irdma_terminate_send_fin(qp);
+ }
+}
+
+static int irdma_null_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ return 0;
+}
+
+static void irdma_null_ws_remove(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ /* do nothing */
+}
+
+static void irdma_null_ws_reset(struct irdma_sc_vsi *vsi)
+{
+ /* do nothing */
+}
+
+/**
+ * irdma_sc_vsi_init - Init the vsi structure
+ * @vsi: pointer to vsi structure to initialize
+ * @info: the info used to initialize the vsi struct
+ */
+void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi,
+ struct irdma_vsi_init_info *info)
+{
+ int i;
+
+ vsi->dev = info->dev;
+ vsi->back_vsi = info->back_vsi;
+ vsi->register_qset = info->register_qset;
+ vsi->unregister_qset = info->unregister_qset;
+ vsi->mtu = info->params->mtu;
+ vsi->exception_lan_q = info->exception_lan_q;
+ vsi->vsi_idx = info->pf_data_vsi_num;
+
+ irdma_set_qos_info(vsi, info->params);
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ mutex_init(&vsi->qos[i].qos_mutex);
+ INIT_LIST_HEAD(&vsi->qos[i].qplist);
+ }
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) {
+ vsi->dev->ws_add = irdma_ws_add;
+ vsi->dev->ws_remove = irdma_ws_remove;
+ vsi->dev->ws_reset = irdma_ws_reset;
+ } else {
+ vsi->dev->ws_add = irdma_null_ws_add;
+ vsi->dev->ws_remove = irdma_null_ws_remove;
+ vsi->dev->ws_reset = irdma_null_ws_reset;
+ }
+}
+
+/**
+ * irdma_get_stats_idx - Return stats index
+ * @vsi: pointer to the vsi
+ */
+static u16 irdma_get_stats_idx(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_stats_inst_info stats_info = {};
+ struct irdma_sc_dev *dev = vsi->dev;
+ u8 i;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ if (!irdma_cqp_stats_inst_cmd(vsi, IRDMA_OP_STATS_ALLOCATE,
+ &stats_info))
+ return stats_info.stats_idx;
+ }
+
+ for (i = 0; i < IRDMA_MAX_STATS_COUNT_GEN_1; i++) {
+ if (!dev->stats_idx_array[i]) {
+ dev->stats_idx_array[i] = true;
+ return i;
+ }
+ }
+
+ return IRDMA_INVALID_STATS_IDX;
+}
+
+/**
+ * irdma_hw_stats_init_gen1 - Initialize stat reg table used for gen1
+ * @vsi: vsi structure where hw_regs are set
+ *
+ * Populate the HW stats table
+ */
+static void irdma_hw_stats_init_gen1(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_sc_dev *dev = vsi->dev;
+ const struct irdma_hw_stat_map *map;
+ u64 *stat_reg = vsi->hw_stats_regs;
+ u64 *regs = dev->hw_stats_regs;
+ u16 i, stats_reg_set = vsi->stats_idx;
+
+ map = dev->hw_stats_map;
+
+ /* First 4 stat instances are reserved for port level statistics. */
+ stats_reg_set += vsi->stats_inst_alloc ? IRDMA_FIRST_NON_PF_STAT : 0;
+
+ for (i = 0; i < dev->hw_attrs.max_stat_idx; i++) {
+ if (map[i].bitmask <= IRDMA_MAX_STATS_32)
+ stat_reg[i] = regs[i] + stats_reg_set * sizeof(u32);
+ else
+ stat_reg[i] = regs[i] + stats_reg_set * sizeof(u64);
+ }
+}
+
+/**
+ * irdma_vsi_stats_init - Initialize the vsi statistics
+ * @vsi: pointer to the vsi structure
+ * @info: The info structure used for initialization
+ */
+int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi,
+ struct irdma_vsi_stats_info *info)
+{
+ struct irdma_dma_mem *stats_buff_mem;
+
+ vsi->pestat = info->pestat;
+ vsi->pestat->hw = vsi->dev->hw;
+ vsi->pestat->vsi = vsi;
+ stats_buff_mem = &vsi->pestat->gather_info.stats_buff_mem;
+ stats_buff_mem->size = ALIGN(IRDMA_GATHER_STATS_BUF_SIZE * 2, 1);
+ stats_buff_mem->va = dma_alloc_coherent(vsi->pestat->hw->device,
+ stats_buff_mem->size,
+ &stats_buff_mem->pa,
+ GFP_KERNEL);
+ if (!stats_buff_mem->va)
+ return -ENOMEM;
+
+ vsi->pestat->gather_info.gather_stats_va = stats_buff_mem->va;
+ vsi->pestat->gather_info.last_gather_stats_va =
+ (void *)((uintptr_t)stats_buff_mem->va +
+ IRDMA_GATHER_STATS_BUF_SIZE);
+
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3)
+ irdma_hw_stats_start_timer(vsi);
+
+ /* when stat allocation is not required default to fcn_id. */
+ vsi->stats_idx = info->fcn_id;
+ if (info->alloc_stats_inst) {
+ u16 stats_idx = irdma_get_stats_idx(vsi);
+
+ if (stats_idx != IRDMA_INVALID_STATS_IDX) {
+ vsi->stats_inst_alloc = true;
+ vsi->stats_idx = stats_idx;
+ vsi->pestat->gather_info.use_stats_inst = true;
+ vsi->pestat->gather_info.stats_inst_index = stats_idx;
+ }
+ }
+
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ irdma_hw_stats_init_gen1(vsi);
+
+ return 0;
+}
+
+/**
+ * irdma_vsi_stats_free - Free the vsi stats
+ * @vsi: pointer to the vsi structure
+ */
+void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_stats_inst_info stats_info = {};
+ struct irdma_sc_dev *dev = vsi->dev;
+ u16 stats_idx = vsi->stats_idx;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ if (vsi->stats_inst_alloc) {
+ stats_info.stats_idx = vsi->stats_idx;
+ irdma_cqp_stats_inst_cmd(vsi, IRDMA_OP_STATS_FREE,
+ &stats_info);
+ }
+ } else {
+ if (vsi->stats_inst_alloc &&
+ stats_idx < vsi->dev->hw_attrs.max_stat_inst)
+ vsi->dev->stats_idx_array[stats_idx] = false;
+ }
+
+ if (!vsi->pestat)
+ return;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3)
+ irdma_hw_stats_stop_timer(vsi);
+ dma_free_coherent(vsi->pestat->hw->device,
+ vsi->pestat->gather_info.stats_buff_mem.size,
+ vsi->pestat->gather_info.stats_buff_mem.va,
+ vsi->pestat->gather_info.stats_buff_mem.pa);
+ vsi->pestat->gather_info.stats_buff_mem.va = NULL;
+}
+
+/**
+ * irdma_get_encoded_wqe_size - given wq size, returns hardware encoded size
+ * @wqsize: size of the wq (sq, rq) to encoded_size
+ * @queue_type: queue type selected for the calculation algorithm
+ */
+u8 irdma_get_encoded_wqe_size(u32 wqsize, enum irdma_queue_type queue_type)
+{
+ u8 encoded_size = 0;
+
+ if (queue_type == IRDMA_QUEUE_TYPE_SRQ) {
+ /* Smallest SRQ size is 256B (8 quanta) that gets
+ * encoded to 0.
+ */
+ encoded_size = ilog2(wqsize) - 3;
+
+ return encoded_size;
+ }
+ /* cqp sq's hw coded value starts from 1 for size of 4
+ * while it starts from 0 for qp' wq's.
+ */
+ if (queue_type == IRDMA_QUEUE_TYPE_CQP)
+ encoded_size = 1;
+ wqsize >>= 2;
+ while (wqsize >>= 1)
+ encoded_size++;
+
+ return encoded_size;
+}
+
+/**
+ * irdma_sc_gather_stats - collect the statistics
+ * @cqp: struct for cqp hw
+ * @info: gather stats info structure
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_sc_gather_stats(struct irdma_sc_cqp *cqp,
+ struct irdma_stats_gather_info *info,
+ u64 scratch)
+{
+ __le64 *wqe;
+ u64 temp;
+
+ if (info->stats_buff_mem.size < IRDMA_GATHER_STATS_BUF_SIZE)
+ return -ENOMEM;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_STATS_HMC_FCN_INDEX, info->hmc_fcn_index));
+ set_64bit_val(wqe, 32, info->stats_buff_mem.pa);
+
+ temp = FIELD_PREP(IRDMA_CQPSQ_STATS_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_USE_INST, info->use_stats_inst) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_INST_INDEX,
+ info->stats_inst_index) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_USE_HMC_FCN_INDEX,
+ info->use_hmc_fcn_index) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_OP, IRDMA_CQP_OP_GATHER_STATS);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, temp);
+
+ print_hex_dump_debug("STATS: GATHER_STATS WQE", DUMP_PREFIX_OFFSET,
+ 16, 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+
+ irdma_sc_cqp_post_sq(cqp);
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "STATS: CQP SQ head 0x%x tail 0x%x size 0x%x\n",
+ cqp->sq_ring.head, cqp->sq_ring.tail, cqp->sq_ring.size);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_manage_stats_inst - allocate or free stats instance
+ * @cqp: struct for cqp hw
+ * @info: stats info structure
+ * @alloc: alloc vs. delete flag
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_sc_manage_stats_inst(struct irdma_sc_cqp *cqp,
+ struct irdma_stats_inst_info *info,
+ bool alloc, u64 scratch)
+{
+ __le64 *wqe;
+ u64 temp;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_STATS_HMC_FCN_INDEX, info->hmc_fn_id));
+ temp = FIELD_PREP(IRDMA_CQPSQ_STATS_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_ALLOC_INST, alloc) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_USE_HMC_FCN_INDEX,
+ info->use_hmc_fcn_index) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_INST_INDEX, info->stats_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_STATS_OP, IRDMA_CQP_OP_MANAGE_STATS);
+
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, temp);
+
+ print_hex_dump_debug("WQE: MANAGE_STATS WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+
+ irdma_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * irdma_sc_set_up_map - set the up map table
+ * @cqp: struct for cqp hw
+ * @info: User priority map info
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_sc_set_up_map(struct irdma_sc_cqp *cqp,
+ struct irdma_up_info *info, u64 scratch)
+{
+ __le64 *wqe;
+ u64 temp = 0;
+ int i;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
+ temp |= (u64)info->map[i] << (i * 8);
+
+ set_64bit_val(wqe, 0, temp);
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_UP_CNPOVERRIDE, info->cnp_up_override) |
+ FIELD_PREP(IRDMA_CQPSQ_UP_HMCFCNIDX, info->hmc_fcn_idx));
+
+ temp = FIELD_PREP(IRDMA_CQPSQ_UP_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_UP_USEVLAN, info->use_vlan) |
+ FIELD_PREP(IRDMA_CQPSQ_UP_USEOVERRIDE,
+ info->use_cnp_up_override) |
+ FIELD_PREP(IRDMA_CQPSQ_UP_OP, IRDMA_CQP_OP_UP_MAP);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, temp);
+
+ print_hex_dump_debug("WQE: UPMAP WQE", DUMP_PREFIX_OFFSET, 16, 8, wqe,
+ IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_manage_ws_node - create/modify/destroy WS node
+ * @cqp: struct for cqp hw
+ * @info: node info structure
+ * @node_op: 0 for add 1 for modify, 2 for delete
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_sc_manage_ws_node(struct irdma_sc_cqp *cqp,
+ struct irdma_ws_node_info *info,
+ enum irdma_ws_node_op node_op, u64 scratch)
+{
+ __le64 *wqe;
+ u64 temp = 0;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_CQPSQ_WS_VSI, info->vsi) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_WEIGHT, info->weight));
+
+ temp = FIELD_PREP(IRDMA_CQPSQ_WS_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_NODEOP, node_op) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_ENABLENODE, info->enable) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_NODETYPE, info->type_leaf) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_PRIOTYPE, info->prio_type) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_TC, info->tc) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_OP, IRDMA_CQP_OP_WORK_SCHED_NODE) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_PARENTID, info->parent_id) |
+ FIELD_PREP(IRDMA_CQPSQ_WS_NODEID, info->id);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, temp);
+
+ print_hex_dump_debug("WQE: MANAGE_WS WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_qp_flush_wqes - flush qp's wqe
+ * @qp: sc qp
+ * @info: dlush information
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
+ struct irdma_qp_flush_info *info, u64 scratch,
+ bool post_sq)
+{
+ u64 temp = 0;
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+ bool flush_sq = false, flush_rq = false;
+
+ if (info->rq && !qp->flush_rq)
+ flush_rq = true;
+ if (info->sq && !qp->flush_sq)
+ flush_sq = true;
+ qp->flush_sq |= flush_sq;
+ qp->flush_rq |= flush_rq;
+
+ if (!flush_sq && !flush_rq) {
+ ibdev_dbg(to_ibdev(qp->dev),
+ "CQP: Additional flush request ignored for qp %x\n",
+ qp->qp_uk.qp_id);
+ return -EALREADY;
+ }
+
+ cqp = qp->pd->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ if (info->userflushcode) {
+ if (flush_rq)
+ temp |= FIELD_PREP(IRDMA_CQPSQ_FWQE_RQMNERR,
+ info->rq_minor_code) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_RQMJERR,
+ info->rq_major_code);
+ if (flush_sq)
+ temp |= FIELD_PREP(IRDMA_CQPSQ_FWQE_SQMNERR,
+ info->sq_minor_code) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_SQMJERR,
+ info->sq_major_code);
+ }
+ set_64bit_val(wqe, 16, temp);
+
+ temp = (info->generate_ae) ?
+ info->ae_code | FIELD_PREP(IRDMA_CQPSQ_FWQE_AESOURCE,
+ info->ae_src) : 0;
+ set_64bit_val(wqe, 8, temp);
+ if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX, info->err_sq_idx));
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX, info->err_rq_idx));
+ }
+
+ hdr = qp->qp_uk.qp_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_FLUSH_WQES) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_GENERATE_AE, info->generate_ae) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_USERFLCODE, info->userflushcode) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHSQ, flush_sq) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHRQ, flush_rq) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ hdr |= FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX_VALID, info->err_sq_idx_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX_VALID, info->err_rq_idx_valid);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: QP_FLUSH WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_gen_ae - generate AE, uses flush WQE CQP OP
+ * @qp: sc qp
+ * @info: gen ae information
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_gen_ae(struct irdma_sc_qp *qp,
+ struct irdma_gen_ae_info *info, u64 scratch,
+ bool post_sq)
+{
+ u64 temp;
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+
+ cqp = qp->pd->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ temp = info->ae_code | FIELD_PREP(IRDMA_CQPSQ_FWQE_AESOURCE,
+ info->ae_src);
+ set_64bit_val(wqe, 8, temp);
+
+ hdr = qp->qp_uk.qp_id | FIELD_PREP(IRDMA_CQPSQ_OPCODE,
+ IRDMA_CQP_OP_GEN_AE) |
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_GENERATE_AE, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: GEN_AE WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/*** irdma_sc_qp_upload_context - upload qp's context
+ * @dev: sc device struct
+ * @info: upload context info ptr for return
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_qp_upload_context(struct irdma_sc_dev *dev,
+ struct irdma_upload_context_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, info->buf_pa);
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_UCTX_QPID, info->qp_id) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_UPLOAD_CONTEXT) |
+ FIELD_PREP(IRDMA_CQPSQ_UCTX_QPTYPE, info->qp_type) |
+ FIELD_PREP(IRDMA_CQPSQ_UCTX_RAWFORMAT, info->raw_format) |
+ FIELD_PREP(IRDMA_CQPSQ_UCTX_FREEZEQP, info->freeze_qp) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: QP_UPLOAD_CTX WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_manage_push_page - Handle push page
+ * @cqp: struct for cqp hw
+ * @info: push page info
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_manage_push_page(struct irdma_sc_cqp *cqp,
+ struct irdma_cqp_manage_push_page_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ u64 hdr;
+
+ if (info->free_page &&
+ info->push_idx >= cqp->dev->hw_attrs.max_hw_device_pages)
+ return -EINVAL;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, info->qs_handle);
+ hdr = FIELD_PREP(IRDMA_CQPSQ_MPP_PPIDX, info->push_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_MPP_PPTYPE, info->push_page_type) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_PUSH_PAGES) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_MPP_FREE_PAGE, info->free_page);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: MANAGE_PUSH_PAGES WQE", DUMP_PREFIX_OFFSET,
+ 16, 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_suspend_qp - suspend qp for param change
+ * @cqp: struct for cqp hw
+ * @qp: sc qp struct
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_sc_suspend_qp(struct irdma_sc_cqp *cqp, struct irdma_sc_qp *qp,
+ u64 scratch)
+{
+ u64 hdr;
+ __le64 *wqe;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_SUSPENDQP_QPID, qp->qp_uk.qp_id) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_SUSPEND_QP) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: SUSPEND_QP WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_resume_qp - resume qp after suspend
+ * @cqp: struct for cqp hw
+ * @qp: sc qp struct
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_sc_resume_qp(struct irdma_sc_cqp *cqp, struct irdma_sc_qp *qp,
+ u64 scratch)
+{
+ u64 hdr;
+ __le64 *wqe;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_RESUMEQP_QSHANDLE, qp->qs_handle));
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_RESUMEQP_QPID, qp->qp_uk.qp_id) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_RESUME_QP) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: RESUME_QP WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_cq_ack - acknowledge completion q
+ * @cq: cq struct
+ */
+static inline void irdma_sc_cq_ack(struct irdma_sc_cq *cq)
+{
+ writel(cq->cq_uk.cq_id, cq->cq_uk.cq_ack_db);
+}
+
+/**
+ * irdma_sc_cq_init - initialize completion q
+ * @cq: cq struct
+ * @info: cq initialization info
+ */
+int irdma_sc_cq_init(struct irdma_sc_cq *cq, struct irdma_cq_init_info *info)
+{
+ u32 pble_obj_cnt;
+
+ pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+ if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt)
+ return -EINVAL;
+
+ cq->cq_pa = info->cq_base_pa;
+ cq->dev = info->dev;
+ cq->ceq_id = info->ceq_id;
+ info->cq_uk_init_info.cqe_alloc_db = cq->dev->cq_arm_db;
+ info->cq_uk_init_info.cq_ack_db = cq->dev->cq_ack_db;
+ irdma_uk_cq_init(&cq->cq_uk, &info->cq_uk_init_info);
+
+ cq->virtual_map = info->virtual_map;
+ cq->pbl_chunk_size = info->pbl_chunk_size;
+ cq->ceqe_mask = info->ceqe_mask;
+ cq->cq_type = (info->type) ? info->type : IRDMA_CQ_TYPE_IWARP;
+ cq->shadow_area_pa = info->shadow_area_pa;
+ cq->shadow_read_threshold = info->shadow_read_threshold;
+ cq->ceq_id_valid = info->ceq_id_valid;
+ cq->tph_en = info->tph_en;
+ cq->tph_val = info->tph_val;
+ cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+ cq->vsi = info->vsi;
+
+ return 0;
+}
+
+/**
+ * irdma_sc_cq_create - create completion q
+ * @cq: cq struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @check_overflow: flag for overflow check
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_cq_create(struct irdma_sc_cq *cq, u64 scratch,
+ bool check_overflow, bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+
+ cqp = cq->dev->cqp;
+ if (cq->cq_uk.cq_id >= cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt)
+ return -EINVAL;
+
+ if (cq->ceq_id >= cq->dev->hmc_fpm_misc.max_ceqs)
+ return -EINVAL;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
+ set_64bit_val(wqe, 8, (uintptr_t)cq >> 1);
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD, cq->shadow_read_threshold));
+ set_64bit_val(wqe, 32, (cq->virtual_map ? 0 : cq->cq_pa));
+ set_64bit_val(wqe, 40, cq->shadow_area_pa);
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_CQ_FIRSTPMPBLIDX, (cq->virtual_map ? cq->first_pm_pbl_idx : 0)));
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_TPHVAL, cq->tph_val) |
+ FIELD_PREP(IRDMA_CQPSQ_VSIIDX, cq->vsi->vsi_idx));
+
+ hdr = FLD_LS_64(cq->dev, cq->cq_uk.cq_id, IRDMA_CQPSQ_CQ_CQID) |
+ FLD_LS_64(cq->dev, (cq->ceq_id_valid ? cq->ceq_id : 0),
+ IRDMA_CQPSQ_CQ_CEQID) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_CQ) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_LPBLSIZE, cq->pbl_chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CHKOVERFLOW, check_overflow) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_VIRTMAP, cq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CQID_HIGH, cq->cq_uk.cq_id >> 22) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CEQID_HIGH,
+ (cq->ceq_id_valid ? cq->ceq_id : 0) >> 10) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, cq->ceqe_mask) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, cq->ceq_id_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_TPHEN, cq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT,
+ cq->cq_uk.avoid_mem_cflct) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: CQ_CREATE WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_cq_destroy - destroy completion q
+ * @cq: cq struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = cq->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
+ set_64bit_val(wqe, 8, (uintptr_t)cq >> 1);
+ set_64bit_val(wqe, 40, cq->shadow_area_pa);
+ set_64bit_val(wqe, 48,
+ (cq->virtual_map ? cq->first_pm_pbl_idx : 0));
+
+ hdr = cq->cq_uk.cq_id |
+ FLD_LS_64(cq->dev, (cq->ceq_id_valid ? cq->ceq_id : 0),
+ IRDMA_CQPSQ_CQ_CEQID) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_CQ) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_LPBLSIZE, cq->pbl_chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_VIRTMAP, cq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, cq->ceqe_mask) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, cq->ceq_id_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_TPHEN, cq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT, cq->cq_uk.avoid_mem_cflct) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: CQ_DESTROY WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_cq_resize - set resized cq buffer info
+ * @cq: resized cq
+ * @info: resized cq buffer info
+ */
+void irdma_sc_cq_resize(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info)
+{
+ cq->virtual_map = info->virtual_map;
+ cq->cq_pa = info->cq_pa;
+ cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+ cq->pbl_chunk_size = info->pbl_chunk_size;
+ irdma_uk_cq_resize(&cq->cq_uk, info->cq_base, info->cq_size);
+}
+
+/**
+ * irdma_sc_cq_modify - modify a Completion Queue
+ * @cq: cq struct
+ * @info: modification info struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag to post to sq
+ */
+static int irdma_sc_cq_modify(struct irdma_sc_cq *cq,
+ struct irdma_modify_cq_info *info, u64 scratch,
+ bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+ u32 pble_obj_cnt;
+
+ pble_obj_cnt = cq->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+ if (info->cq_resize && info->virtual_map &&
+ info->first_pm_pbl_idx >= pble_obj_cnt)
+ return -EINVAL;
+
+ cqp = cq->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, info->cq_size);
+ set_64bit_val(wqe, 8, (uintptr_t)cq >> 1);
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD, info->shadow_read_threshold));
+ set_64bit_val(wqe, 32, info->cq_pa);
+ set_64bit_val(wqe, 40, cq->shadow_area_pa);
+ set_64bit_val(wqe, 48, info->first_pm_pbl_idx);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_TPHVAL, cq->tph_val) |
+ FIELD_PREP(IRDMA_CQPSQ_VSIIDX, cq->vsi->vsi_idx));
+
+ hdr = cq->cq_uk.cq_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MODIFY_CQ) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CQRESIZE, info->cq_resize) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_LPBLSIZE, info->pbl_chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CHKOVERFLOW, info->check_overflow) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_VIRTMAP, info->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, cq->ceqe_mask) |
+ FIELD_PREP(IRDMA_CQPSQ_TPHEN, cq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT,
+ cq->cq_uk.avoid_mem_cflct) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: CQ_MODIFY WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_get_decoded_ird_size_gen_3 - get decoded IRD size for GEN 3
+ * @ird_enc: IRD encoding
+ * IRD size defaults to a value of 4 in case of invalid input.
+ */
+static u16 irdma_sc_get_decoded_ird_size_gen_3(u8 ird_enc)
+{
+ switch (ird_enc) {
+ case IRDMA_IRD_HW_SIZE_4096_GEN3:
+ return 4096;
+ case IRDMA_IRD_HW_SIZE_2048_GEN3:
+ return 2048;
+ case IRDMA_IRD_HW_SIZE_1024_GEN3:
+ return 1024;
+ case IRDMA_IRD_HW_SIZE_512_GEN3:
+ return 512;
+ case IRDMA_IRD_HW_SIZE_256_GEN3:
+ return 256;
+ case IRDMA_IRD_HW_SIZE_128_GEN3:
+ return 128;
+ case IRDMA_IRD_HW_SIZE_64_GEN3:
+ return 64;
+ case IRDMA_IRD_HW_SIZE_32_GEN3:
+ return 32;
+ case IRDMA_IRD_HW_SIZE_16_GEN3:
+ return 16;
+ case IRDMA_IRD_HW_SIZE_8_GEN3:
+ return 8;
+ case IRDMA_IRD_HW_SIZE_4_GEN3:
+ return 4;
+ default:
+ return 4;
+ }
+}
+
+/**
+ * irdma_check_cqp_progress - check cqp processing progress
+ * @timeout: timeout info struct
+ * @dev: sc device struct
+ */
+void irdma_check_cqp_progress(struct irdma_cqp_timeout *timeout, struct irdma_sc_dev *dev)
+{
+ u64 completed_ops = atomic64_read(&dev->cqp->completed_ops);
+
+ if (timeout->compl_cqp_cmds != completed_ops) {
+ timeout->compl_cqp_cmds = completed_ops;
+ timeout->count = 0;
+ } else if (timeout->compl_cqp_cmds != dev->cqp->requested_ops) {
+ timeout->count++;
+ }
+}
+
+/**
+ * irdma_get_cqp_reg_info - get head and tail for cqp using registers
+ * @cqp: struct for cqp hw
+ * @val: cqp tail register value
+ * @tail: wqtail register value
+ * @error: cqp processing err
+ */
+static inline void irdma_get_cqp_reg_info(struct irdma_sc_cqp *cqp, u32 *val,
+ u32 *tail, u32 *error)
+{
+ *val = readl(cqp->dev->hw_regs[IRDMA_CQPTAIL]);
+ *tail = FIELD_GET(IRDMA_CQPTAIL_WQTAIL, *val);
+ *error = FIELD_GET(IRDMA_CQPTAIL_CQP_OP_ERR, *val);
+}
+
+/**
+ * irdma_sc_cqp_def_cmpl_ae_handler - remove completed requests from pending list
+ * @dev: sc device struct
+ * @info: AE entry info
+ * @first: true if this is the first call to this handler for given AEQE
+ * @scratch: (out) scratch entry pointer
+ * @sw_def_info: (in/out) SW ticket value for this AE
+ *
+ * In case of AE_DEF_CMPL event, this function should be called in a loop
+ * until it returns NULL-ptr via scratch.
+ * For each call, it looks for a matching CQP request on pending list,
+ * removes it from the list and returns the pointer to the associated scratch
+ * entry.
+ * If this is the first call to this function for given AEQE, sw_def_info
+ * value is not used to find matching requests. Instead, it is populated
+ * with the value from the first matching cqp_request on the list.
+ * For subsequent calls, ooo_op->sw_def_info need to match the value passed
+ * by a caller.
+ *
+ * Return: scratch entry pointer for cqp_request to be released or NULL
+ * if no matching request is found.
+ */
+void irdma_sc_cqp_def_cmpl_ae_handler(struct irdma_sc_dev *dev,
+ struct irdma_aeqe_info *info,
+ bool first, u64 *scratch,
+ u32 *sw_def_info)
+{
+ struct irdma_ooo_cqp_op *ooo_op;
+ unsigned long flags;
+
+ *scratch = 0;
+
+ spin_lock_irqsave(&dev->cqp->ooo_list_lock, flags);
+ list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) {
+ if (ooo_op->deferred &&
+ ((first && ooo_op->def_info == info->def_info) ||
+ (!first && ooo_op->sw_def_info == *sw_def_info))) {
+ *sw_def_info = ooo_op->sw_def_info;
+ *scratch = ooo_op->scratch;
+
+ list_move(&ooo_op->list_entry, &dev->cqp->ooo_avail);
+ atomic64_inc(&dev->cqp->completed_ops);
+
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&dev->cqp->ooo_list_lock, flags);
+
+ if (first && !*scratch)
+ ibdev_dbg(to_ibdev(dev),
+ "AEQ: deferred completion with unknown ticket: def_info 0x%x\n",
+ info->def_info);
+}
+
+/**
+ * irdma_sc_cqp_cleanup_handler - remove requests from pending list
+ * @dev: sc device struct
+ *
+ * This function should be called in a loop from irdma_cleanup_pending_cqp_op.
+ * For each call, it returns first CQP request on pending list, removes it
+ * from the list and returns the pointer to the associated scratch entry.
+ *
+ * Return: scratch entry pointer for cqp_request to be released or NULL
+ * if pending list is empty.
+ */
+u64 irdma_sc_cqp_cleanup_handler(struct irdma_sc_dev *dev)
+{
+ struct irdma_ooo_cqp_op *ooo_op;
+ u64 scratch = 0;
+
+ list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) {
+ scratch = ooo_op->scratch;
+
+ list_del(&ooo_op->list_entry);
+ list_add(&ooo_op->list_entry, &dev->cqp->ooo_avail);
+ atomic64_inc(&dev->cqp->completed_ops);
+
+ break;
+ }
+
+ return scratch;
+}
+
+/**
+ * irdma_cqp_poll_registers - poll cqp registers
+ * @cqp: struct for cqp hw
+ * @tail: wqtail register value
+ * @count: how many times to try for completion
+ */
+static int irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp, u32 tail,
+ u32 count)
+{
+ u32 i = 0;
+ u32 newtail, error, val;
+
+ while (i++ < count) {
+ irdma_get_cqp_reg_info(cqp, &val, &newtail, &error);
+ if (error) {
+ error = readl(cqp->dev->hw_regs[IRDMA_CQPERRCODES]);
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "CQP: CQPERRCODES error_code[x%08X]\n",
+ error);
+ return -EIO;
+ }
+ if (newtail != tail) {
+ /* SUCCESS */
+ IRDMA_RING_MOVE_TAIL(cqp->sq_ring);
+ atomic64_inc(&cqp->completed_ops);
+ return 0;
+ }
+ udelay(cqp->dev->hw_attrs.max_sleep_count);
+ }
+
+ return -ETIMEDOUT;
+}
+
+/**
+ * irdma_sc_decode_fpm_commit - decode a 64 bit value into count and base
+ * @dev: sc device struct
+ * @buf: pointer to commit buffer
+ * @buf_idx: buffer index
+ * @obj_info: object info pointer
+ * @rsrc_idx: indexs of memory resource
+ */
+static u64 irdma_sc_decode_fpm_commit(struct irdma_sc_dev *dev, __le64 *buf,
+ u32 buf_idx, struct irdma_hmc_obj_info *obj_info,
+ u32 rsrc_idx)
+{
+ u64 temp;
+
+ get_64bit_val(buf, buf_idx, &temp);
+
+ switch (rsrc_idx) {
+ case IRDMA_HMC_IW_QP:
+ obj_info[rsrc_idx].cnt = (u32)FIELD_GET(IRDMA_COMMIT_FPM_QPCNT, temp);
+ break;
+ case IRDMA_HMC_IW_CQ:
+ obj_info[rsrc_idx].cnt = (u32)FLD_RS_64(dev, temp, IRDMA_COMMIT_FPM_CQCNT);
+ break;
+ case IRDMA_HMC_IW_APBVT_ENTRY:
+ if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2)
+ obj_info[rsrc_idx].cnt = 1;
+ else
+ obj_info[rsrc_idx].cnt = 0;
+ break;
+ default:
+ obj_info[rsrc_idx].cnt = (u32)temp;
+ break;
+ }
+
+ obj_info[rsrc_idx].base = (temp >> IRDMA_COMMIT_FPM_BASE_S) * 512;
+
+ return temp;
+}
+
+/**
+ * irdma_sc_parse_fpm_commit_buf - parse fpm commit buffer
+ * @dev: pointer to dev struct
+ * @buf: ptr to fpm commit buffer
+ * @info: ptr to irdma_hmc_obj_info struct
+ * @sd: number of SDs for HMC objects
+ *
+ * parses fpm commit info and copy base value
+ * of hmc objects in hmc_info
+ */
+static void
+irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf,
+ struct irdma_hmc_obj_info *info, u32 *sd)
+{
+ u64 size;
+ u32 i;
+ u64 max_base = 0;
+ u32 last_hmc_obj = 0;
+
+ irdma_sc_decode_fpm_commit(dev, buf, 0, info,
+ IRDMA_HMC_IW_QP);
+ irdma_sc_decode_fpm_commit(dev, buf, 8, info,
+ IRDMA_HMC_IW_CQ);
+ irdma_sc_decode_fpm_commit(dev, buf, 16, info,
+ IRDMA_HMC_IW_SRQ);
+ irdma_sc_decode_fpm_commit(dev, buf, 24, info,
+ IRDMA_HMC_IW_HTE);
+ irdma_sc_decode_fpm_commit(dev, buf, 32, info,
+ IRDMA_HMC_IW_ARP);
+ irdma_sc_decode_fpm_commit(dev, buf, 40, info,
+ IRDMA_HMC_IW_APBVT_ENTRY);
+ irdma_sc_decode_fpm_commit(dev, buf, 48, info,
+ IRDMA_HMC_IW_MR);
+ irdma_sc_decode_fpm_commit(dev, buf, 56, info,
+ IRDMA_HMC_IW_XF);
+ irdma_sc_decode_fpm_commit(dev, buf, 64, info,
+ IRDMA_HMC_IW_XFFL);
+ irdma_sc_decode_fpm_commit(dev, buf, 72, info,
+ IRDMA_HMC_IW_Q1);
+ irdma_sc_decode_fpm_commit(dev, buf, 80, info,
+ IRDMA_HMC_IW_Q1FL);
+ irdma_sc_decode_fpm_commit(dev, buf, 88, info,
+ IRDMA_HMC_IW_TIMER);
+ irdma_sc_decode_fpm_commit(dev, buf, 112, info,
+ IRDMA_HMC_IW_PBLE);
+ /* skipping RSVD. */
+ if (dev->hw_attrs.uk_attrs.hw_rev != IRDMA_GEN_1) {
+ irdma_sc_decode_fpm_commit(dev, buf, 96, info,
+ IRDMA_HMC_IW_FSIMC);
+ irdma_sc_decode_fpm_commit(dev, buf, 104, info,
+ IRDMA_HMC_IW_FSIAV);
+ irdma_sc_decode_fpm_commit(dev, buf, 128, info,
+ IRDMA_HMC_IW_RRF);
+ irdma_sc_decode_fpm_commit(dev, buf, 136, info,
+ IRDMA_HMC_IW_RRFFL);
+ irdma_sc_decode_fpm_commit(dev, buf, 144, info,
+ IRDMA_HMC_IW_HDR);
+ irdma_sc_decode_fpm_commit(dev, buf, 152, info,
+ IRDMA_HMC_IW_MD);
+ if (dev->cqp->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) {
+ irdma_sc_decode_fpm_commit(dev, buf, 160, info,
+ IRDMA_HMC_IW_OOISC);
+ irdma_sc_decode_fpm_commit(dev, buf, 168, info,
+ IRDMA_HMC_IW_OOISCFFL);
+ }
+ }
+
+ /* searching for the last object in HMC to find the size of the HMC area. */
+ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) {
+ if (info[i].base > max_base && info[i].cnt) {
+ max_base = info[i].base;
+ last_hmc_obj = i;
+ }
+ }
+
+ size = info[last_hmc_obj].cnt * info[last_hmc_obj].size +
+ info[last_hmc_obj].base;
+
+ if (size & 0x1FFFFF)
+ *sd = (u32)((size >> 21) + 1); /* add 1 for remainder */
+ else
+ *sd = (u32)(size >> 21);
+
+}
+
+/**
+ * irdma_sc_decode_fpm_query() - Decode a 64 bit value into max count and size
+ * @buf: ptr to fpm query buffer
+ * @buf_idx: index into buf
+ * @obj_info: ptr to irdma_hmc_obj_info struct
+ * @rsrc_idx: resource index into info
+ *
+ * Decode a 64 bit value from fpm query buffer into max count and size
+ */
+static u64 irdma_sc_decode_fpm_query(__le64 *buf, u32 buf_idx,
+ struct irdma_hmc_obj_info *obj_info,
+ u32 rsrc_idx)
+{
+ u64 temp;
+ u32 size;
+
+ get_64bit_val(buf, buf_idx, &temp);
+ obj_info[rsrc_idx].max_cnt = (u32)temp;
+ size = (u32)(temp >> 32);
+ obj_info[rsrc_idx].size = BIT_ULL(size);
+
+ return temp;
+}
+
+/**
+ * irdma_sc_parse_fpm_query_buf() - parses fpm query buffer
+ * @dev: ptr to shared code device
+ * @buf: ptr to fpm query buffer
+ * @hmc_info: ptr to irdma_hmc_obj_info struct
+ * @hmc_fpm_misc: ptr to fpm data
+ *
+ * parses fpm query buffer and copy max_cnt and
+ * size value of hmc objects in hmc_info
+ */
+static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf,
+ struct irdma_hmc_info *hmc_info,
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc)
+{
+ struct irdma_hmc_obj_info *obj_info;
+ u8 ird_encoding;
+ u64 temp;
+ u32 size;
+ u16 max_pe_sds;
+
+ obj_info = hmc_info->hmc_obj;
+
+ get_64bit_val(buf, 0, &temp);
+ hmc_info->first_sd_index = (u16)FIELD_GET(IRDMA_QUERY_FPM_FIRST_PE_SD_INDEX, temp);
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ max_pe_sds = (u16)FIELD_GET(IRDMA_QUERY_FPM_MAX_PE_SDS_GEN3, temp);
+ else
+ max_pe_sds = (u16)FIELD_GET(IRDMA_QUERY_FPM_MAX_PE_SDS, temp);
+
+ /* Reduce SD count for unprivleged functions by 1 to account for PBLE
+ * backing page rounding
+ */
+ if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2 &&
+ (hmc_info->hmc_fn_id >= dev->hw_attrs.first_hw_vf_fpm_id ||
+ !dev->privileged))
+ max_pe_sds--;
+
+ hmc_fpm_misc->max_sds = max_pe_sds;
+ hmc_info->sd_table.sd_cnt = max_pe_sds + hmc_info->first_sd_index;
+ get_64bit_val(buf, 8, &temp);
+ obj_info[IRDMA_HMC_IW_QP].max_cnt = (u32)FIELD_GET(IRDMA_QUERY_FPM_MAX_QPS, temp);
+ size = (u32)(temp >> 32);
+ obj_info[IRDMA_HMC_IW_QP].size = BIT_ULL(size);
+
+ get_64bit_val(buf, 16, &temp);
+ obj_info[IRDMA_HMC_IW_CQ].max_cnt = (u32)FIELD_GET(IRDMA_QUERY_FPM_MAX_CQS, temp);
+ size = (u32)(temp >> 32);
+ obj_info[IRDMA_HMC_IW_CQ].size = BIT_ULL(size);
+
+ irdma_sc_decode_fpm_query(buf, 24, obj_info, IRDMA_HMC_IW_SRQ);
+ irdma_sc_decode_fpm_query(buf, 32, obj_info, IRDMA_HMC_IW_HTE);
+ irdma_sc_decode_fpm_query(buf, 40, obj_info, IRDMA_HMC_IW_ARP);
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ obj_info[IRDMA_HMC_IW_APBVT_ENTRY].size = 0;
+ obj_info[IRDMA_HMC_IW_APBVT_ENTRY].max_cnt = 0;
+ } else {
+ obj_info[IRDMA_HMC_IW_APBVT_ENTRY].size = 8192;
+ obj_info[IRDMA_HMC_IW_APBVT_ENTRY].max_cnt = 1;
+ }
+
+ irdma_sc_decode_fpm_query(buf, 48, obj_info, IRDMA_HMC_IW_MR);
+ irdma_sc_decode_fpm_query(buf, 56, obj_info, IRDMA_HMC_IW_XF);
+
+ get_64bit_val(buf, 64, &temp);
+ obj_info[IRDMA_HMC_IW_XFFL].max_cnt = (u32)temp;
+ obj_info[IRDMA_HMC_IW_XFFL].size = 4;
+ hmc_fpm_misc->xf_block_size = FIELD_GET(IRDMA_QUERY_FPM_XFBLOCKSIZE, temp);
+ if (obj_info[IRDMA_HMC_IW_XF].max_cnt && !hmc_fpm_misc->xf_block_size)
+ return -EINVAL;
+
+ irdma_sc_decode_fpm_query(buf, 72, obj_info, IRDMA_HMC_IW_Q1);
+ get_64bit_val(buf, 80, &temp);
+ obj_info[IRDMA_HMC_IW_Q1FL].max_cnt = (u32)temp;
+ obj_info[IRDMA_HMC_IW_Q1FL].size = 4;
+
+ hmc_fpm_misc->q1_block_size = FIELD_GET(IRDMA_QUERY_FPM_Q1BLOCKSIZE, temp);
+ if (!hmc_fpm_misc->q1_block_size)
+ return -EINVAL;
+
+ irdma_sc_decode_fpm_query(buf, 88, obj_info, IRDMA_HMC_IW_TIMER);
+
+ get_64bit_val(buf, 112, &temp);
+ obj_info[IRDMA_HMC_IW_PBLE].max_cnt = (u32)temp;
+ obj_info[IRDMA_HMC_IW_PBLE].size = 8;
+
+ get_64bit_val(buf, 120, &temp);
+ hmc_fpm_misc->max_ceqs = FIELD_GET(IRDMA_QUERY_FPM_MAX_CEQS, temp);
+ hmc_fpm_misc->ht_multiplier = FIELD_GET(IRDMA_QUERY_FPM_HTMULTIPLIER, temp);
+ hmc_fpm_misc->timer_bucket = FIELD_GET(IRDMA_QUERY_FPM_TIMERBUCKET, temp);
+ if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2,
+ dev->feature_info[IRDMA_FTN_FLAGS])) {
+ ird_encoding = (u8)FIELD_GET(IRDMA_QUERY_FPM_MAX_IRD, temp);
+ hmc_fpm_misc->ird =
+ irdma_sc_get_decoded_ird_size_gen_3(ird_encoding) / 2;
+ dev->hw_attrs.max_hw_ird = hmc_fpm_misc->ird;
+ dev->hw_attrs.max_hw_ord = hmc_fpm_misc->ird;
+ }
+ if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ return 0;
+ irdma_sc_decode_fpm_query(buf, 96, obj_info, IRDMA_HMC_IW_FSIMC);
+ irdma_sc_decode_fpm_query(buf, 104, obj_info, IRDMA_HMC_IW_FSIAV);
+ irdma_sc_decode_fpm_query(buf, 128, obj_info, IRDMA_HMC_IW_RRF);
+
+ get_64bit_val(buf, 136, &temp);
+ obj_info[IRDMA_HMC_IW_RRFFL].max_cnt = (u32)temp;
+ obj_info[IRDMA_HMC_IW_RRFFL].size = 4;
+ hmc_fpm_misc->rrf_block_size = FIELD_GET(IRDMA_QUERY_FPM_RRFBLOCKSIZE, temp);
+ if (!hmc_fpm_misc->rrf_block_size &&
+ obj_info[IRDMA_HMC_IW_RRFFL].max_cnt)
+ return -EINVAL;
+
+ irdma_sc_decode_fpm_query(buf, 144, obj_info, IRDMA_HMC_IW_HDR);
+ irdma_sc_decode_fpm_query(buf, 152, obj_info, IRDMA_HMC_IW_MD);
+
+ if (dev->cqp->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) {
+ irdma_sc_decode_fpm_query(buf, 160, obj_info, IRDMA_HMC_IW_OOISC);
+
+ get_64bit_val(buf, 168, &temp);
+ obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt = (u32)temp;
+ obj_info[IRDMA_HMC_IW_OOISCFFL].size = 4;
+ hmc_fpm_misc->ooiscf_block_size = FIELD_GET(IRDMA_QUERY_FPM_OOISCFBLOCKSIZE, temp);
+ if (!hmc_fpm_misc->ooiscf_block_size &&
+ obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt)
+ return -EINVAL;
+ }
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ get_64bit_val(buf, 176, &temp);
+ hmc_fpm_misc->loc_mem_pages = (u32)FIELD_GET(IRDMA_QUERY_FPM_LOC_MEM_PAGES, temp);
+ if (!hmc_fpm_misc->loc_mem_pages)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_sc_cqp_init - Initialize buffers for a control Queue Pair
+ * @cqp: IWARP control queue pair pointer
+ * @info: IWARP control queue pair init info pointer
+ *
+ * Initializes the object and context buffers for a control Queue Pair.
+ */
+int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
+ struct irdma_cqp_init_info *info)
+{
+ struct irdma_ooo_cqp_op *ooo_op;
+ u32 num_ooo_ops;
+ u8 hw_sq_size;
+
+ if (info->sq_size > IRDMA_CQP_SW_SQSIZE_2048 ||
+ info->sq_size < IRDMA_CQP_SW_SQSIZE_4 ||
+ ((info->sq_size & (info->sq_size - 1))))
+ return -EINVAL;
+
+ hw_sq_size = irdma_get_encoded_wqe_size(info->sq_size,
+ IRDMA_QUEUE_TYPE_CQP);
+ cqp->size = sizeof(*cqp);
+ cqp->sq_size = info->sq_size;
+ cqp->hw_sq_size = hw_sq_size;
+ cqp->sq_base = info->sq;
+ cqp->host_ctx = info->host_ctx;
+ cqp->sq_pa = info->sq_pa;
+ cqp->host_ctx_pa = info->host_ctx_pa;
+ cqp->dev = info->dev;
+ cqp->struct_ver = info->struct_ver;
+ cqp->hw_maj_ver = info->hw_maj_ver;
+ cqp->hw_min_ver = info->hw_min_ver;
+ cqp->scratch_array = info->scratch_array;
+ cqp->polarity = 0;
+ cqp->en_datacenter_tcp = info->en_datacenter_tcp;
+ cqp->ena_vf_count = info->ena_vf_count;
+ cqp->hmc_profile = info->hmc_profile;
+ cqp->ceqs_per_vf = info->ceqs_per_vf;
+ cqp->disable_packed = info->disable_packed;
+ cqp->rocev2_rto_policy = info->rocev2_rto_policy;
+ cqp->protocol_used = info->protocol_used;
+ memcpy(&cqp->dcqcn_params, &info->dcqcn_params, sizeof(cqp->dcqcn_params));
+ if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ cqp->ooisc_blksize = info->ooisc_blksize;
+ cqp->rrsp_blksize = info->rrsp_blksize;
+ cqp->q1_blksize = info->q1_blksize;
+ cqp->xmit_blksize = info->xmit_blksize;
+ cqp->blksizes_valid = info->blksizes_valid;
+ cqp->ts_shift = info->ts_shift;
+ cqp->ts_override = info->ts_override;
+ cqp->en_fine_grained_timers = info->en_fine_grained_timers;
+ cqp->pe_en_vf_cnt = info->pe_en_vf_cnt;
+ cqp->ooo_op_array = info->ooo_op_array;
+ /* initialize the OOO lists */
+ INIT_LIST_HEAD(&cqp->ooo_avail);
+ INIT_LIST_HEAD(&cqp->ooo_pnd);
+ if (cqp->ooo_op_array) {
+ /* Populate avail list entries */
+ for (num_ooo_ops = 0, ooo_op = info->ooo_op_array;
+ num_ooo_ops < cqp->sq_size;
+ num_ooo_ops++, ooo_op++)
+ list_add(&ooo_op->list_entry, &cqp->ooo_avail);
+ }
+ }
+ info->dev->cqp = cqp;
+
+ IRDMA_RING_INIT(cqp->sq_ring, cqp->sq_size);
+ cqp->last_def_cmpl_ticket = 0;
+ cqp->sw_def_cmpl_ticket = 0;
+ cqp->requested_ops = 0;
+ atomic64_set(&cqp->completed_ops, 0);
+ /* for the cqp commands backlog. */
+ INIT_LIST_HEAD(&cqp->dev->cqp_cmd_head);
+
+ writel(0, cqp->dev->hw_regs[IRDMA_CQPTAIL]);
+ if (cqp->dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) {
+ writel(0, cqp->dev->hw_regs[IRDMA_CQPDB]);
+ writel(0, cqp->dev->hw_regs[IRDMA_CCQPSTATUS]);
+ }
+
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "WQE: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%p] cqp[%p] polarity[x%04x]\n",
+ cqp->sq_size, cqp->hw_sq_size, cqp->sq_base,
+ (u64 *)(uintptr_t)cqp->sq_pa, cqp, cqp->polarity);
+ return 0;
+}
+
+/**
+ * irdma_sc_cqp_create - create cqp during bringup
+ * @cqp: struct for cqp hw
+ * @maj_err: If error, major err number
+ * @min_err: If error, minor err number
+ */
+int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err)
+{
+ u64 temp;
+ u8 hw_rev;
+ u32 cnt = 0, p1, p2, val = 0, err_code;
+ int ret_code;
+
+ hw_rev = cqp->dev->hw_attrs.uk_attrs.hw_rev;
+ cqp->sdbuf.size = ALIGN(IRDMA_UPDATE_SD_BUFF_SIZE * cqp->sq_size,
+ IRDMA_SD_BUF_ALIGNMENT);
+ cqp->sdbuf.va = dma_alloc_coherent(cqp->dev->hw->device,
+ cqp->sdbuf.size, &cqp->sdbuf.pa,
+ GFP_KERNEL);
+ if (!cqp->sdbuf.va)
+ return -ENOMEM;
+
+ spin_lock_init(&cqp->dev->cqp_lock);
+ spin_lock_init(&cqp->ooo_list_lock);
+
+ temp = FIELD_PREP(IRDMA_CQPHC_SQSIZE, cqp->hw_sq_size) |
+ FIELD_PREP(IRDMA_CQPHC_SVER, cqp->struct_ver) |
+ FIELD_PREP(IRDMA_CQPHC_DISABLE_PFPDUS, cqp->disable_packed) |
+ FIELD_PREP(IRDMA_CQPHC_CEQPERVF, cqp->ceqs_per_vf);
+ if (hw_rev >= IRDMA_GEN_2) {
+ temp |= FIELD_PREP(IRDMA_CQPHC_ROCEV2_RTO_POLICY,
+ cqp->rocev2_rto_policy) |
+ FIELD_PREP(IRDMA_CQPHC_PROTOCOL_USED,
+ cqp->protocol_used);
+ }
+ if (hw_rev >= IRDMA_GEN_3)
+ temp |= FIELD_PREP(IRDMA_CQPHC_EN_FINE_GRAINED_TIMERS,
+ cqp->en_fine_grained_timers);
+
+ set_64bit_val(cqp->host_ctx, 0, temp);
+ set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa);
+
+ temp = FIELD_PREP(IRDMA_CQPHC_ENABLED_VFS, cqp->ena_vf_count) |
+ FIELD_PREP(IRDMA_CQPHC_HMC_PROFILE, cqp->hmc_profile);
+
+ if (hw_rev >= IRDMA_GEN_3)
+ temp |= FIELD_PREP(IRDMA_CQPHC_OOISC_BLKSIZE,
+ cqp->ooisc_blksize) |
+ FIELD_PREP(IRDMA_CQPHC_RRSP_BLKSIZE,
+ cqp->rrsp_blksize) |
+ FIELD_PREP(IRDMA_CQPHC_Q1_BLKSIZE, cqp->q1_blksize) |
+ FIELD_PREP(IRDMA_CQPHC_XMIT_BLKSIZE,
+ cqp->xmit_blksize) |
+ FIELD_PREP(IRDMA_CQPHC_BLKSIZES_VALID,
+ cqp->blksizes_valid) |
+ FIELD_PREP(IRDMA_CQPHC_TIMESTAMP_OVERRIDE,
+ cqp->ts_override) |
+ FIELD_PREP(IRDMA_CQPHC_TS_SHIFT, cqp->ts_shift);
+ set_64bit_val(cqp->host_ctx, 16, temp);
+ set_64bit_val(cqp->host_ctx, 24, (uintptr_t)cqp);
+ temp = FIELD_PREP(IRDMA_CQPHC_HW_MAJVER, cqp->hw_maj_ver) |
+ FIELD_PREP(IRDMA_CQPHC_HW_MINVER, cqp->hw_min_ver);
+ if (hw_rev >= IRDMA_GEN_2) {
+ temp |= FIELD_PREP(IRDMA_CQPHC_MIN_RATE, cqp->dcqcn_params.min_rate) |
+ FIELD_PREP(IRDMA_CQPHC_MIN_DEC_FACTOR, cqp->dcqcn_params.min_dec_factor);
+ }
+ set_64bit_val(cqp->host_ctx, 32, temp);
+ set_64bit_val(cqp->host_ctx, 40, 0);
+ temp = 0;
+ if (hw_rev >= IRDMA_GEN_2) {
+ temp |= FIELD_PREP(IRDMA_CQPHC_DCQCN_T, cqp->dcqcn_params.dcqcn_t) |
+ FIELD_PREP(IRDMA_CQPHC_RAI_FACTOR, cqp->dcqcn_params.rai_factor) |
+ FIELD_PREP(IRDMA_CQPHC_HAI_FACTOR, cqp->dcqcn_params.hai_factor);
+ }
+ set_64bit_val(cqp->host_ctx, 48, temp);
+ temp = 0;
+ if (hw_rev >= IRDMA_GEN_2) {
+ temp |= FIELD_PREP(IRDMA_CQPHC_DCQCN_B, cqp->dcqcn_params.dcqcn_b) |
+ FIELD_PREP(IRDMA_CQPHC_DCQCN_F, cqp->dcqcn_params.dcqcn_f) |
+ FIELD_PREP(IRDMA_CQPHC_CC_CFG_VALID, cqp->dcqcn_params.cc_cfg_valid) |
+ FIELD_PREP(IRDMA_CQPHC_RREDUCE_MPERIOD, cqp->dcqcn_params.rreduce_mperiod);
+ }
+ set_64bit_val(cqp->host_ctx, 56, temp);
+ print_hex_dump_debug("WQE: CQP_HOST_CTX WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, cqp->host_ctx, IRDMA_CQP_CTX_SIZE * 8, false);
+ p1 = cqp->host_ctx_pa >> 32;
+ p2 = (u32)cqp->host_ctx_pa;
+
+ writel(p1, cqp->dev->hw_regs[IRDMA_CCQPHIGH]);
+ writel(p2, cqp->dev->hw_regs[IRDMA_CCQPLOW]);
+
+ do {
+ if (cnt++ > cqp->dev->hw_attrs.max_done_count) {
+ ret_code = -ETIMEDOUT;
+ goto err;
+ }
+ udelay(cqp->dev->hw_attrs.max_sleep_count);
+ val = readl(cqp->dev->hw_regs[IRDMA_CCQPSTATUS]);
+ } while (!val);
+
+ if (FLD_RS_32(cqp->dev, val, IRDMA_CCQPSTATUS_CCQP_ERR)) {
+ ret_code = -EOPNOTSUPP;
+ goto err;
+ }
+
+ cqp->process_cqp_sds = irdma_update_sds_noccq;
+ return 0;
+
+err:
+ dma_free_coherent(cqp->dev->hw->device, cqp->sdbuf.size,
+ cqp->sdbuf.va, cqp->sdbuf.pa);
+ cqp->sdbuf.va = NULL;
+ err_code = readl(cqp->dev->hw_regs[IRDMA_CQPERRCODES]);
+ *min_err = FIELD_GET(IRDMA_CQPERRCODES_CQP_MINOR_CODE, err_code);
+ *maj_err = FIELD_GET(IRDMA_CQPERRCODES_CQP_MAJOR_CODE, err_code);
+ return ret_code;
+}
+
+/**
+ * irdma_sc_cqp_post_sq - post of cqp's sq
+ * @cqp: struct for cqp hw
+ */
+void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp)
+{
+ writel(IRDMA_RING_CURRENT_HEAD(cqp->sq_ring), cqp->dev->cqp_db);
+
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "WQE: CQP SQ head 0x%x tail 0x%x size 0x%x\n",
+ cqp->sq_ring.head, cqp->sq_ring.tail, cqp->sq_ring.size);
+}
+
+/**
+ * irdma_sc_cqp_get_next_send_wqe_idx - get next wqe on cqp sq
+ * and pass back index
+ * @cqp: CQP HW structure
+ * @scratch: private data for CQP WQE
+ * @wqe_idx: WQE index of CQP SQ
+ */
+__le64 *irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch,
+ u32 *wqe_idx)
+{
+ __le64 *wqe = NULL;
+ int ret_code;
+
+ if (IRDMA_RING_FULL_ERR(cqp->sq_ring)) {
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "WQE: CQP SQ is full, head 0x%x tail 0x%x size 0x%x\n",
+ cqp->sq_ring.head, cqp->sq_ring.tail,
+ cqp->sq_ring.size);
+ return NULL;
+ }
+ IRDMA_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, *wqe_idx, ret_code);
+ if (ret_code)
+ return NULL;
+
+ cqp->requested_ops++;
+ if (!*wqe_idx)
+ cqp->polarity = !cqp->polarity;
+ wqe = cqp->sq_base[*wqe_idx].elem;
+ cqp->scratch_array[*wqe_idx] = scratch;
+ IRDMA_CQP_INIT_WQE(wqe);
+
+ return wqe;
+}
+
+/**
+ * irdma_sc_cqp_destroy - destroy cqp during close
+ * @cqp: struct for cqp hw
+ */
+int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp)
+{
+ u32 cnt = 0, val;
+ int ret_code = 0;
+
+ writel(0, cqp->dev->hw_regs[IRDMA_CCQPHIGH]);
+ writel(0, cqp->dev->hw_regs[IRDMA_CCQPLOW]);
+ do {
+ if (cnt++ > cqp->dev->hw_attrs.max_done_count) {
+ ret_code = -ETIMEDOUT;
+ break;
+ }
+ udelay(cqp->dev->hw_attrs.max_sleep_count);
+ val = readl(cqp->dev->hw_regs[IRDMA_CCQPSTATUS]);
+ } while (FLD_RS_32(cqp->dev, val, IRDMA_CCQPSTATUS_CCQP_DONE));
+
+ dma_free_coherent(cqp->dev->hw->device, cqp->sdbuf.size,
+ cqp->sdbuf.va, cqp->sdbuf.pa);
+ cqp->sdbuf.va = NULL;
+ return ret_code;
+}
+
+/**
+ * irdma_sc_ccq_arm - enable intr for control cq
+ * @ccq: ccq sc struct
+ */
+void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq)
+{
+ unsigned long flags;
+ u64 temp_val;
+ u16 sw_cq_sel;
+ u8 arm_next_se;
+ u8 arm_seq_num;
+
+ spin_lock_irqsave(&ccq->dev->cqp_lock, flags);
+ get_64bit_val(ccq->cq_uk.shadow_area, 32, &temp_val);
+ sw_cq_sel = (u16)FIELD_GET(IRDMA_CQ_DBSA_SW_CQ_SELECT, temp_val);
+ arm_next_se = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT_SE, temp_val);
+ arm_seq_num = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_SEQ_NUM, temp_val);
+ arm_seq_num++;
+ temp_val = FIELD_PREP(IRDMA_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) |
+ FIELD_PREP(IRDMA_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) |
+ FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT_SE, arm_next_se) |
+ FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT, 1);
+ set_64bit_val(ccq->cq_uk.shadow_area, 32, temp_val);
+ spin_unlock_irqrestore(&ccq->dev->cqp_lock, flags);
+
+ dma_wmb(); /* make sure shadow area is updated before arming */
+
+ writel(ccq->cq_uk.cq_id, ccq->dev->cq_arm_db);
+}
+
+/**
+ * irdma_sc_process_def_cmpl - process deferred or pending completion
+ * @cqp: CQP sc struct
+ * @info: CQP CQE info
+ * @wqe_idx: CQP WQE descriptor index
+ * @def_info: deferred op ticket value or out-of-order completion id
+ * @def_cmpl: true for deferred completion, false for pending (RCA)
+ */
+static void irdma_sc_process_def_cmpl(struct irdma_sc_cqp *cqp,
+ struct irdma_ccq_cqe_info *info,
+ u32 wqe_idx, u32 def_info, bool def_cmpl)
+{
+ struct irdma_ooo_cqp_op *ooo_op;
+ unsigned long flags;
+
+ /* Deferred and out-of-order completions share the same list of pending
+ * completions. Since the list can be also accessed from AE handler,
+ * it must be protected by a lock.
+ */
+ spin_lock_irqsave(&cqp->ooo_list_lock, flags);
+
+ /* For deferred completions bump up SW completion ticket value. */
+ if (def_cmpl) {
+ cqp->last_def_cmpl_ticket = def_info;
+ cqp->sw_def_cmpl_ticket++;
+ }
+ if (!list_empty(&cqp->ooo_avail)) {
+ ooo_op = (struct irdma_ooo_cqp_op *)
+ list_entry(cqp->ooo_avail.next,
+ struct irdma_ooo_cqp_op, list_entry);
+
+ list_del(&ooo_op->list_entry);
+ ooo_op->scratch = info->scratch;
+ ooo_op->def_info = def_info;
+ ooo_op->sw_def_info = cqp->sw_def_cmpl_ticket;
+ ooo_op->deferred = def_cmpl;
+ ooo_op->wqe_idx = wqe_idx;
+ /* Pending completions must be chronologically ordered,
+ * so adding at the end of list.
+ */
+ list_add_tail(&ooo_op->list_entry, &cqp->ooo_pnd);
+ }
+ spin_unlock_irqrestore(&cqp->ooo_list_lock, flags);
+
+ info->pending = true;
+}
+
+/**
+ * irdma_sc_process_ooo_cmpl - process out-of-order (final) completion
+ * @cqp: CQP sc struct
+ * @info: CQP CQE info
+ * @def_info: out-of-order completion id
+ */
+static void irdma_sc_process_ooo_cmpl(struct irdma_sc_cqp *cqp,
+ struct irdma_ccq_cqe_info *info,
+ u32 def_info)
+{
+ struct irdma_ooo_cqp_op *ooo_op_tmp;
+ struct irdma_ooo_cqp_op *ooo_op;
+ unsigned long flags;
+
+ info->scratch = 0;
+
+ spin_lock_irqsave(&cqp->ooo_list_lock, flags);
+ list_for_each_entry_safe(ooo_op, ooo_op_tmp, &cqp->ooo_pnd,
+ list_entry) {
+ if (!ooo_op->deferred && ooo_op->def_info == def_info) {
+ list_del(&ooo_op->list_entry);
+ info->scratch = ooo_op->scratch;
+ list_add(&ooo_op->list_entry, &cqp->ooo_avail);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&cqp->ooo_list_lock, flags);
+
+ if (!info->scratch)
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "CQP: DEBUG_FW_OOO out-of-order completion with unknown def_info = 0x%x\n",
+ def_info);
+}
+
+/**
+ * irdma_sc_ccq_get_cqe_info - get ccq's cq entry
+ * @ccq: ccq sc struct
+ * @info: completion q entry to return
+ */
+int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
+ struct irdma_ccq_cqe_info *info)
+{
+ u32 def_info;
+ bool def_cmpl = false;
+ bool pend_cmpl = false;
+ bool ooo_final_cmpl = false;
+ u64 qp_ctx, temp, temp1;
+ __le64 *cqe;
+ struct irdma_sc_cqp *cqp;
+ u32 wqe_idx;
+ u32 error;
+ u8 polarity;
+ int ret_code = 0;
+ unsigned long flags;
+
+ if (ccq->cq_uk.avoid_mem_cflct)
+ cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(&ccq->cq_uk);
+ else
+ cqe = IRDMA_GET_CURRENT_CQ_ELEM(&ccq->cq_uk);
+
+ get_64bit_val(cqe, 24, &temp);
+ polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, temp);
+ if (polarity != ccq->cq_uk.polarity)
+ return -ENOENT;
+
+ /* Ensure CEQE contents are read after valid bit is checked */
+ dma_rmb();
+
+ get_64bit_val(cqe, 8, &qp_ctx);
+ cqp = (struct irdma_sc_cqp *)(unsigned long)qp_ctx;
+ info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, temp);
+ info->maj_err_code = IRDMA_CQPSQ_MAJ_NO_ERROR;
+ info->min_err_code = (u16)FIELD_GET(IRDMA_CQ_MINERR, temp);
+ if (info->error) {
+ info->maj_err_code = (u16)FIELD_GET(IRDMA_CQ_MAJERR, temp);
+ error = readl(cqp->dev->hw_regs[IRDMA_CQPERRCODES]);
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "CQP: CQPERRCODES error_code[x%08X]\n", error);
+ }
+
+ wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, temp);
+ info->scratch = cqp->scratch_array[wqe_idx];
+
+ get_64bit_val(cqe, 16, &temp1);
+ info->op_ret_val = (u32)FIELD_GET(IRDMA_CCQ_OPRETVAL, temp1);
+ if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ def_cmpl = info->maj_err_code == IRDMA_CQPSQ_MAJ_NO_ERROR &&
+ info->min_err_code == IRDMA_CQPSQ_MIN_DEF_CMPL;
+ def_info = (u32)FIELD_GET(IRDMA_CCQ_DEFINFO, temp1);
+
+ pend_cmpl = info->maj_err_code == IRDMA_CQPSQ_MAJ_NO_ERROR &&
+ info->min_err_code == IRDMA_CQPSQ_MIN_OOO_CMPL;
+
+ ooo_final_cmpl = (bool)FIELD_GET(IRDMA_OOO_CMPL, temp);
+
+ if (def_cmpl || pend_cmpl || ooo_final_cmpl) {
+ if (ooo_final_cmpl)
+ irdma_sc_process_ooo_cmpl(cqp, info, def_info);
+ else
+ irdma_sc_process_def_cmpl(cqp, info, wqe_idx,
+ def_info, def_cmpl);
+ }
+ }
+
+ get_64bit_val(cqp->sq_base[wqe_idx].elem, 24, &temp1);
+ info->op_code = (u8)FIELD_GET(IRDMA_CQPSQ_OPCODE, temp1);
+ info->cqp = cqp;
+
+ /* move the head for cq */
+ IRDMA_RING_MOVE_HEAD(ccq->cq_uk.cq_ring, ret_code);
+ if (!IRDMA_RING_CURRENT_HEAD(ccq->cq_uk.cq_ring))
+ ccq->cq_uk.polarity ^= 1;
+
+ /* update cq tail in cq shadow memory also */
+ IRDMA_RING_MOVE_TAIL(ccq->cq_uk.cq_ring);
+ set_64bit_val(ccq->cq_uk.shadow_area, 0,
+ IRDMA_RING_CURRENT_HEAD(ccq->cq_uk.cq_ring));
+
+ dma_wmb(); /* make sure shadow area is updated before moving tail */
+
+ spin_lock_irqsave(&cqp->dev->cqp_lock, flags);
+ if (!ooo_final_cmpl)
+ IRDMA_RING_MOVE_TAIL(cqp->sq_ring);
+ spin_unlock_irqrestore(&cqp->dev->cqp_lock, flags);
+
+ /* Do not increment completed_ops counter on pending or deferred
+ * completions.
+ */
+ if (pend_cmpl || def_cmpl)
+ return ret_code;
+ atomic64_inc(&cqp->completed_ops);
+
+ return ret_code;
+}
+
+/**
+ * irdma_sc_poll_for_cqp_op_done - Waits for last write to complete in CQP SQ
+ * @cqp: struct for cqp hw
+ * @op_code: cqp opcode for completion
+ * @compl_info: completion q entry to return
+ */
+int irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 op_code,
+ struct irdma_ccq_cqe_info *compl_info)
+{
+ struct irdma_ccq_cqe_info info = {};
+ struct irdma_sc_cq *ccq;
+ int ret_code = 0;
+ u32 cnt = 0;
+
+ ccq = cqp->dev->ccq;
+ while (1) {
+ if (cnt++ > 100 * cqp->dev->hw_attrs.max_done_count)
+ return -ETIMEDOUT;
+
+ if (irdma_sc_ccq_get_cqe_info(ccq, &info)) {
+ udelay(cqp->dev->hw_attrs.max_sleep_count);
+ continue;
+ }
+ if (info.error && info.op_code != IRDMA_CQP_OP_QUERY_STAG) {
+ ret_code = -EIO;
+ break;
+ }
+ /* make sure op code matches*/
+ if (op_code == info.op_code)
+ break;
+ ibdev_dbg(to_ibdev(cqp->dev),
+ "WQE: opcode mismatch for my op code 0x%x, returned opcode %x\n",
+ op_code, info.op_code);
+ }
+
+ if (compl_info)
+ memcpy(compl_info, &info, sizeof(*compl_info));
+
+ return ret_code;
+}
+
+/**
+ * irdma_sc_manage_hmc_pm_func_table - manage of function table
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @info: info for the manage function table operation
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_manage_hmc_pm_func_table(struct irdma_sc_cqp *cqp,
+ struct irdma_hmc_fcn_info *info,
+ u64 scratch, bool post_sq)
+{
+ __le64 *wqe;
+ u64 hdr;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+ set_64bit_val(wqe, 32, 0);
+ set_64bit_val(wqe, 40, 0);
+ set_64bit_val(wqe, 48, 0);
+ set_64bit_val(wqe, 56, 0);
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_MHMC_VFIDX, info->vf_id) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE,
+ IRDMA_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE) |
+ FIELD_PREP(IRDMA_CQPSQ_MHMC_FREEPMFN, info->free_fcn) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: MANAGE_HMC_PM_FUNC_TABLE WQE",
+ DUMP_PREFIX_OFFSET, 16, 8, wqe,
+ IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_commit_fpm_val_done - wait for cqp eqe completion
+ * for fpm commit
+ * @cqp: struct for cqp hw
+ */
+static int irdma_sc_commit_fpm_val_done(struct irdma_sc_cqp *cqp)
+{
+ return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_COMMIT_FPM_VAL,
+ NULL);
+}
+
+/**
+ * irdma_sc_commit_fpm_val - cqp wqe for commit fpm values
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_fn_id: hmc function id
+ * @commit_fpm_mem: Memory for fpm values
+ * @post_sq: flag for cqp db to ring
+ * @wait_type: poll ccq or cqp registers for cqp completion
+ */
+static int irdma_sc_commit_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch,
+ u8 hmc_fn_id,
+ struct irdma_dma_mem *commit_fpm_mem,
+ bool post_sq, u8 wait_type)
+{
+ __le64 *wqe;
+ u64 hdr;
+ u32 tail, val, error;
+ int ret_code = 0;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, hmc_fn_id);
+ set_64bit_val(wqe, 32, commit_fpm_mem->pa);
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_BUFSIZE, IRDMA_COMMIT_FPM_BUF_SIZE) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_COMMIT_FPM_VAL) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: COMMIT_FPM_VAL WQE", DUMP_PREFIX_OFFSET,
+ 16, 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_get_cqp_reg_info(cqp, &val, &tail, &error);
+
+ if (post_sq) {
+ irdma_sc_cqp_post_sq(cqp);
+ if (wait_type == IRDMA_CQP_WAIT_POLL_REGS)
+ ret_code = irdma_cqp_poll_registers(cqp, tail,
+ cqp->dev->hw_attrs.max_done_count);
+ else if (wait_type == IRDMA_CQP_WAIT_POLL_CQ)
+ ret_code = irdma_sc_commit_fpm_val_done(cqp);
+ }
+
+ return ret_code;
+}
+
+/**
+ * irdma_sc_query_fpm_val_done - poll for cqp wqe completion for
+ * query fpm
+ * @cqp: struct for cqp hw
+ */
+static int irdma_sc_query_fpm_val_done(struct irdma_sc_cqp *cqp)
+{
+ return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_QUERY_FPM_VAL,
+ NULL);
+}
+
+/**
+ * irdma_sc_query_fpm_val - cqp wqe query fpm values
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_fn_id: hmc function id
+ * @query_fpm_mem: memory for return fpm values
+ * @post_sq: flag for cqp db to ring
+ * @wait_type: poll ccq or cqp registers for cqp completion
+ */
+static int irdma_sc_query_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch,
+ u8 hmc_fn_id,
+ struct irdma_dma_mem *query_fpm_mem,
+ bool post_sq, u8 wait_type)
+{
+ __le64 *wqe;
+ u64 hdr;
+ u32 tail, val, error;
+ int ret_code = 0;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, hmc_fn_id);
+ set_64bit_val(wqe, 32, query_fpm_mem->pa);
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_QUERY_FPM_VAL) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: QUERY_FPM WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_get_cqp_reg_info(cqp, &val, &tail, &error);
+
+ if (post_sq) {
+ irdma_sc_cqp_post_sq(cqp);
+ if (wait_type == IRDMA_CQP_WAIT_POLL_REGS)
+ ret_code = irdma_cqp_poll_registers(cqp, tail,
+ cqp->dev->hw_attrs.max_done_count);
+ else if (wait_type == IRDMA_CQP_WAIT_POLL_CQ)
+ ret_code = irdma_sc_query_fpm_val_done(cqp);
+ }
+
+ return ret_code;
+}
+
+/**
+ * irdma_sc_ceq_init - initialize ceq
+ * @ceq: ceq sc structure
+ * @info: ceq initialization info
+ */
+int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq,
+ struct irdma_ceq_init_info *info)
+{
+ u32 pble_obj_cnt;
+
+ if (info->elem_cnt < info->dev->hw_attrs.min_hw_ceq_size ||
+ info->elem_cnt > info->dev->hw_attrs.max_hw_ceq_size)
+ return -EINVAL;
+
+ if (info->ceq_id >= info->dev->hmc_fpm_misc.max_ceqs)
+ return -EINVAL;
+ pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+
+ if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt)
+ return -EINVAL;
+
+ ceq->size = sizeof(*ceq);
+ ceq->ceqe_base = (struct irdma_ceqe *)info->ceqe_base;
+ ceq->ceq_id = info->ceq_id;
+ ceq->dev = info->dev;
+ ceq->elem_cnt = info->elem_cnt;
+ ceq->ceq_elem_pa = info->ceqe_pa;
+ ceq->virtual_map = info->virtual_map;
+ ceq->itr_no_expire = info->itr_no_expire;
+ ceq->pbl_chunk_size = (ceq->virtual_map ? info->pbl_chunk_size : 0);
+ ceq->first_pm_pbl_idx = (ceq->virtual_map ? info->first_pm_pbl_idx : 0);
+ ceq->pbl_list = (ceq->virtual_map ? info->pbl_list : NULL);
+ ceq->tph_en = info->tph_en;
+ ceq->tph_val = info->tph_val;
+ ceq->vsi_idx = info->vsi_idx;
+ ceq->polarity = 1;
+ IRDMA_RING_INIT(ceq->ceq_ring, ceq->elem_cnt);
+ ceq->dev->ceq[info->ceq_id] = ceq;
+
+ return 0;
+}
+
+/**
+ * irdma_sc_ceq_create - create ceq wqe
+ * @ceq: ceq sc structure
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+
+static int irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 scratch,
+ bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = ceq->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+ set_64bit_val(wqe, 16, ceq->elem_cnt);
+ set_64bit_val(wqe, 32,
+ (ceq->virtual_map ? 0 : ceq->ceq_elem_pa));
+ set_64bit_val(wqe, 48,
+ (ceq->virtual_map ? ceq->first_pm_pbl_idx : 0));
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_TPHVAL, ceq->tph_val) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID, ceq->pasid) |
+ FIELD_PREP(IRDMA_CQPSQ_VSIIDX, ceq->vsi_idx));
+ hdr = FIELD_PREP(IRDMA_CQPSQ_CEQ_CEQID, ceq->ceq_id) |
+ FIELD_PREP(IRDMA_CQPSQ_CEQ_CEQID_HIGH, ceq->ceq_id >> 10) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_CEQ) |
+ FIELD_PREP(IRDMA_CQPSQ_CEQ_LPBLSIZE, ceq->pbl_chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_CEQ_VMAP, ceq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_CEQ_ITRNOEXPIRE, ceq->itr_no_expire) |
+ FIELD_PREP(IRDMA_CQPSQ_TPHEN, ceq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, ceq->pasid_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: CEQ_CREATE WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_cceq_create_done - poll for control ceq wqe to complete
+ * @ceq: ceq sc structure
+ */
+static int irdma_sc_cceq_create_done(struct irdma_sc_ceq *ceq)
+{
+ struct irdma_sc_cqp *cqp;
+
+ cqp = ceq->dev->cqp;
+ return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_CREATE_CEQ,
+ NULL);
+}
+
+/**
+ * irdma_sc_cceq_destroy_done - poll for destroy cceq to complete
+ * @ceq: ceq sc structure
+ */
+int irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq)
+{
+ struct irdma_sc_cqp *cqp;
+
+ cqp = ceq->dev->cqp;
+ cqp->process_cqp_sds = irdma_update_sds_noccq;
+
+ return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_DESTROY_CEQ,
+ NULL);
+}
+
+/**
+ * irdma_sc_cceq_create - create cceq
+ * @ceq: ceq sc structure
+ * @scratch: u64 saved to be used during cqp completion
+ */
+int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch)
+{
+ int ret_code;
+ struct irdma_sc_dev *dev = ceq->dev;
+
+ dev->ccq->vsi_idx = ceq->vsi_idx;
+
+ ret_code = irdma_sc_ceq_create(ceq, scratch, true);
+ if (!ret_code)
+ return irdma_sc_cceq_create_done(ceq);
+
+ return ret_code;
+}
+
+/**
+ * irdma_sc_ceq_destroy - destroy ceq
+ * @ceq: ceq sc structure
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+
+ cqp = ceq->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, ceq->elem_cnt);
+ set_64bit_val(wqe, 48, ceq->first_pm_pbl_idx);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_PASID, ceq->pasid));
+ hdr = ceq->ceq_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_CEQ) |
+ FIELD_PREP(IRDMA_CQPSQ_CEQ_LPBLSIZE, ceq->pbl_chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_CEQ_VMAP, ceq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_TPHEN, ceq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, ceq->pasid_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: CEQ_DESTROY WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_process_ceq - process ceq
+ * @dev: sc device struct
+ * @ceq: ceq sc structure
+ *
+ * It is expected caller serializes this function with cleanup_ceqes()
+ * because these functions manipulate the same ceq
+ */
+void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq)
+{
+ u64 temp;
+ __le64 *ceqe;
+ struct irdma_sc_cq *cq = NULL;
+ struct irdma_sc_cq *temp_cq;
+ u8 polarity;
+ u32 cq_idx;
+
+ do {
+ cq_idx = 0;
+ ceqe = IRDMA_GET_CURRENT_CEQ_ELEM(ceq);
+ get_64bit_val(ceqe, 0, &temp);
+ polarity = (u8)FIELD_GET(IRDMA_CEQE_VALID, temp);
+ if (polarity != ceq->polarity)
+ return NULL;
+
+ temp_cq = (struct irdma_sc_cq *)(unsigned long)(temp << 1);
+ if (!temp_cq) {
+ cq_idx = IRDMA_INVALID_CQ_IDX;
+ IRDMA_RING_MOVE_TAIL(ceq->ceq_ring);
+
+ if (!IRDMA_RING_CURRENT_TAIL(ceq->ceq_ring))
+ ceq->polarity ^= 1;
+ continue;
+ }
+
+ cq = temp_cq;
+
+ IRDMA_RING_MOVE_TAIL(ceq->ceq_ring);
+ if (!IRDMA_RING_CURRENT_TAIL(ceq->ceq_ring))
+ ceq->polarity ^= 1;
+ } while (cq_idx == IRDMA_INVALID_CQ_IDX);
+
+ if (cq)
+ irdma_sc_cq_ack(cq);
+ return cq;
+}
+
+/**
+ * irdma_sc_cleanup_ceqes - clear the valid ceqes ctx matching the cq
+ * @cq: cq for which the ceqes need to be cleaned up
+ * @ceq: ceq ptr
+ *
+ * The function is called after the cq is destroyed to cleanup
+ * its pending ceqe entries. It is expected caller serializes this
+ * function with process_ceq() in interrupt context.
+ */
+void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq)
+{
+ struct irdma_sc_cq *next_cq;
+ u8 ceq_polarity = ceq->polarity;
+ __le64 *ceqe;
+ u8 polarity;
+ u64 temp;
+ int next;
+ u32 i;
+
+ next = IRDMA_RING_GET_NEXT_TAIL(ceq->ceq_ring, 0);
+
+ for (i = 1; i <= IRDMA_RING_SIZE(*ceq); i++) {
+ ceqe = IRDMA_GET_CEQ_ELEM_AT_POS(ceq, next);
+
+ get_64bit_val(ceqe, 0, &temp);
+ polarity = (u8)FIELD_GET(IRDMA_CEQE_VALID, temp);
+ if (polarity != ceq_polarity)
+ return;
+
+ next_cq = (struct irdma_sc_cq *)(unsigned long)(temp << 1);
+ if (cq == next_cq)
+ set_64bit_val(ceqe, 0, temp & IRDMA_CEQE_VALID);
+
+ next = IRDMA_RING_GET_NEXT_TAIL(ceq->ceq_ring, i);
+ if (!next)
+ ceq_polarity ^= 1;
+ }
+}
+
+/**
+ * irdma_sc_aeq_init - initialize aeq
+ * @aeq: aeq structure ptr
+ * @info: aeq initialization info
+ */
+int irdma_sc_aeq_init(struct irdma_sc_aeq *aeq,
+ struct irdma_aeq_init_info *info)
+{
+ u32 pble_obj_cnt;
+
+ if (info->elem_cnt < info->dev->hw_attrs.min_hw_aeq_size ||
+ info->elem_cnt > info->dev->hw_attrs.max_hw_aeq_size)
+ return -EINVAL;
+
+ pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+
+ if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt)
+ return -EINVAL;
+
+ aeq->size = sizeof(*aeq);
+ aeq->polarity = 1;
+ aeq->aeqe_base = (struct irdma_sc_aeqe *)info->aeqe_base;
+ aeq->dev = info->dev;
+ aeq->elem_cnt = info->elem_cnt;
+ aeq->aeq_elem_pa = info->aeq_elem_pa;
+ IRDMA_RING_INIT(aeq->aeq_ring, aeq->elem_cnt);
+ aeq->virtual_map = info->virtual_map;
+ aeq->pbl_list = (aeq->virtual_map ? info->pbl_list : NULL);
+ aeq->pbl_chunk_size = (aeq->virtual_map ? info->pbl_chunk_size : 0);
+ aeq->first_pm_pbl_idx = (aeq->virtual_map ? info->first_pm_pbl_idx : 0);
+ aeq->msix_idx = info->msix_idx;
+ info->dev->aeq = aeq;
+
+ return 0;
+}
+
+/**
+ * irdma_sc_aeq_create - create aeq
+ * @aeq: aeq structure ptr
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_aeq_create(struct irdma_sc_aeq *aeq, u64 scratch,
+ bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+
+ cqp = aeq->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+ set_64bit_val(wqe, 16, aeq->elem_cnt);
+ set_64bit_val(wqe, 32,
+ (aeq->virtual_map ? 0 : aeq->aeq_elem_pa));
+ set_64bit_val(wqe, 48,
+ (aeq->virtual_map ? aeq->first_pm_pbl_idx : 0));
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_PASID, aeq->pasid));
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_AEQ) |
+ FIELD_PREP(IRDMA_CQPSQ_AEQ_LPBLSIZE, aeq->pbl_chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_AEQ_VMAP, aeq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, aeq->pasid_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: AEQ_CREATE WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_sc_aeq_destroy - destroy aeq during close
+ * @aeq: aeq structure ptr
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, u64 scratch,
+ bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ struct irdma_sc_dev *dev;
+ u64 hdr;
+
+ dev = aeq->dev;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2)
+ writel(0, dev->hw_regs[IRDMA_PFINT_AEQCTL]);
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+ set_64bit_val(wqe, 16, aeq->elem_cnt);
+ set_64bit_val(wqe, 48, aeq->first_pm_pbl_idx);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_PASID, aeq->pasid));
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_AEQ) |
+ FIELD_PREP(IRDMA_CQPSQ_AEQ_LPBLSIZE, aeq->pbl_chunk_size) |
+ FIELD_PREP(IRDMA_CQPSQ_AEQ_VMAP, aeq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, aeq->pasid_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: AEQ_DESTROY WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ if (post_sq)
+ irdma_sc_cqp_post_sq(cqp);
+ return 0;
+}
+
+/**
+ * irdma_sc_get_next_aeqe - get next aeq entry
+ * @aeq: aeq structure ptr
+ * @info: aeqe info to be returned
+ */
+int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
+ struct irdma_aeqe_info *info)
+{
+ u64 temp, compl_ctx;
+ __le64 *aeqe;
+ u8 ae_src;
+ u8 polarity;
+
+ aeqe = IRDMA_GET_CURRENT_AEQ_ELEM(aeq);
+ get_64bit_val(aeqe, 8, &temp);
+ polarity = (u8)FIELD_GET(IRDMA_AEQE_VALID, temp);
+
+ if (aeq->polarity != polarity)
+ return -ENOENT;
+
+ /* Ensure AEQE contents are read after valid bit is checked */
+ dma_rmb();
+
+ get_64bit_val(aeqe, 0, &compl_ctx);
+
+ print_hex_dump_debug("WQE: AEQ_ENTRY WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ aeqe, 16, false);
+
+ if (aeq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ ae_src = (u8)FIELD_GET(IRDMA_AEQE_AESRC_GEN_3, temp);
+ info->wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX_GEN_3,
+ temp);
+ info->qp_cq_id = (u32)FIELD_GET(IRDMA_AEQE_QPCQID_GEN_3, temp);
+ info->ae_id = (u16)FIELD_GET(IRDMA_AEQE_AECODE_GEN_3, temp);
+ info->tcp_state = (u8)FIELD_GET(IRDMA_AEQE_TCPSTATE_GEN_3, compl_ctx);
+ info->iwarp_state = (u8)FIELD_GET(IRDMA_AEQE_IWSTATE_GEN_3, temp);
+ info->q2_data_written = (u8)FIELD_GET(IRDMA_AEQE_Q2DATA_GEN_3, compl_ctx);
+ info->aeqe_overflow = (bool)FIELD_GET(IRDMA_AEQE_OVERFLOW_GEN_3, temp);
+ info->compl_ctx = FIELD_GET(IRDMA_AEQE_CMPL_CTXT, compl_ctx);
+ compl_ctx = FIELD_GET(IRDMA_AEQE_CMPL_CTXT, compl_ctx) << IRDMA_AEQE_CMPL_CTXT_S;
+ } else {
+ ae_src = (u8)FIELD_GET(IRDMA_AEQE_AESRC, temp);
+ info->wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX, temp);
+ info->qp_cq_id = (u32)FIELD_GET(IRDMA_AEQE_QPCQID_LOW, temp) |
+ ((u32)FIELD_GET(IRDMA_AEQE_QPCQID_HI, temp) << 18);
+ info->ae_id = (u16)FIELD_GET(IRDMA_AEQE_AECODE, temp);
+ info->tcp_state = (u8)FIELD_GET(IRDMA_AEQE_TCPSTATE, temp);
+ info->iwarp_state = (u8)FIELD_GET(IRDMA_AEQE_IWSTATE, temp);
+ info->q2_data_written = (u8)FIELD_GET(IRDMA_AEQE_Q2DATA, temp);
+ info->aeqe_overflow = (bool)FIELD_GET(IRDMA_AEQE_OVERFLOW,
+ temp);
+ }
+
+ info->ae_src = ae_src;
+ switch (info->ae_id) {
+ case IRDMA_AE_SRQ_LIMIT:
+ info->srq = true;
+ /* [63:6] from CMPL_CTXT, [5:0] from WQDESCIDX. */
+ info->compl_ctx = compl_ctx;
+ ae_src = IRDMA_AE_SOURCE_RSVD;
+ break;
+ case IRDMA_AE_PRIV_OPERATION_DENIED:
+ case IRDMA_AE_AMP_INVALIDATE_TYPE1_MW:
+ case IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW:
+ case IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG:
+ case IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH:
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
+ case IRDMA_AE_UDA_XMIT_BAD_PD:
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT:
+ case IRDMA_AE_BAD_CLOSE:
+ case IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO:
+ case IRDMA_AE_STAG_ZERO_INVALID:
+ case IRDMA_AE_IB_RREQ_AND_Q1_FULL:
+ case IRDMA_AE_IB_INVALID_REQUEST:
+ case IRDMA_AE_WQE_UNEXPECTED_OPCODE:
+ case IRDMA_AE_IB_REMOTE_ACCESS_ERROR:
+ case IRDMA_AE_IB_REMOTE_OP_ERROR:
+ case IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION:
+ case IRDMA_AE_DDP_UBE_INVALID_MO:
+ case IRDMA_AE_DDP_UBE_INVALID_QN:
+ case IRDMA_AE_DDP_NO_L_BIT:
+ case IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
+ case IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE:
+ case IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST:
+ case IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
+ case IRDMA_AE_ROCE_RSP_LENGTH_ERROR:
+ case IRDMA_AE_INVALID_ARP_ENTRY:
+ case IRDMA_AE_INVALID_TCP_OPTION_RCVD:
+ case IRDMA_AE_STALE_ARP_ENTRY:
+ case IRDMA_AE_INVALID_AH_ENTRY:
+ case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+ case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
+ case IRDMA_AE_LLP_TOO_MANY_RETRIES:
+ case IRDMA_AE_LLP_TOO_MANY_RNRS:
+ case IRDMA_AE_REMOTE_QP_CATASTROPHIC:
+ case IRDMA_AE_LOCAL_QP_CATASTROPHIC:
+ case IRDMA_AE_RCE_QP_CATASTROPHIC:
+ case IRDMA_AE_LLP_DOUBT_REACHABILITY:
+ case IRDMA_AE_LLP_CONNECTION_ESTABLISHED:
+ case IRDMA_AE_RESET_SENT:
+ case IRDMA_AE_TERMINATE_SENT:
+ case IRDMA_AE_RESET_NOT_SENT:
+ case IRDMA_AE_LCE_QP_CATASTROPHIC:
+ case IRDMA_AE_QP_SUSPEND_COMPLETE:
+ case IRDMA_AE_UDA_L4LEN_INVALID:
+ info->qp = true;
+ info->compl_ctx = compl_ctx;
+ break;
+ case IRDMA_AE_LCE_CQ_CATASTROPHIC:
+ info->cq = true;
+ info->compl_ctx = compl_ctx << 1;
+ ae_src = IRDMA_AE_SOURCE_RSVD;
+ break;
+ case IRDMA_AE_CQP_DEFERRED_COMPLETE:
+ info->def_info = info->wqe_idx;
+ ae_src = IRDMA_AE_SOURCE_RSVD;
+ break;
+ case IRDMA_AE_ROCE_EMPTY_MCG:
+ case IRDMA_AE_ROCE_BAD_MC_IP_ADDR:
+ case IRDMA_AE_ROCE_BAD_MC_QPID:
+ case IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH:
+ fallthrough;
+ case IRDMA_AE_LLP_CONNECTION_RESET:
+ case IRDMA_AE_LLP_SYN_RECEIVED:
+ case IRDMA_AE_LLP_FIN_RECEIVED:
+ case IRDMA_AE_LLP_CLOSE_COMPLETE:
+ case IRDMA_AE_LLP_TERMINATE_RECEIVED:
+ case IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE:
+ ae_src = IRDMA_AE_SOURCE_RSVD;
+ info->qp = true;
+ info->compl_ctx = compl_ctx;
+ break;
+ default:
+ break;
+ }
+
+ switch (ae_src) {
+ case IRDMA_AE_SOURCE_RQ:
+ case IRDMA_AE_SOURCE_RQ_0011:
+ info->qp = true;
+ info->rq = true;
+ info->compl_ctx = compl_ctx;
+ info->err_rq_idx_valid = true;
+ break;
+ case IRDMA_AE_SOURCE_CQ:
+ case IRDMA_AE_SOURCE_CQ_0110:
+ case IRDMA_AE_SOURCE_CQ_1010:
+ case IRDMA_AE_SOURCE_CQ_1110:
+ info->cq = true;
+ info->compl_ctx = compl_ctx << 1;
+ break;
+ case IRDMA_AE_SOURCE_SQ:
+ case IRDMA_AE_SOURCE_SQ_0111:
+ info->qp = true;
+ info->sq = true;
+ info->compl_ctx = compl_ctx;
+ break;
+ case IRDMA_AE_SOURCE_IN_RR_WR:
+ info->qp = true;
+ if (aeq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ info->err_rq_idx_valid = true;
+ info->compl_ctx = compl_ctx;
+ info->in_rdrsp_wr = true;
+ break;
+ case IRDMA_AE_SOURCE_IN_RR_WR_1011:
+ info->qp = true;
+ if (aeq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ info->sq = true;
+ info->err_rq_idx_valid = true;
+ }
+ info->compl_ctx = compl_ctx;
+ info->in_rdrsp_wr = true;
+ break;
+ case IRDMA_AE_SOURCE_OUT_RR:
+ case IRDMA_AE_SOURCE_OUT_RR_1111:
+ info->qp = true;
+ info->compl_ctx = compl_ctx;
+ info->out_rdrsp = true;
+ break;
+ case IRDMA_AE_SOURCE_RSVD:
+ default:
+ break;
+ }
+
+ IRDMA_RING_MOVE_TAIL(aeq->aeq_ring);
+ if (!IRDMA_RING_CURRENT_TAIL(aeq->aeq_ring))
+ aeq->polarity ^= 1;
+
+ return 0;
+}
+
+/**
+ * irdma_sc_repost_aeq_entries - repost completed aeq entries
+ * @dev: sc device struct
+ * @count: allocate count
+ */
+void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count)
+{
+ writel(count, dev->hw_regs[IRDMA_AEQALLOC]);
+}
+
+/**
+ * irdma_sc_ccq_init - initialize control cq
+ * @cq: sc's cq ctruct
+ * @info: info for control cq initialization
+ */
+int irdma_sc_ccq_init(struct irdma_sc_cq *cq, struct irdma_ccq_init_info *info)
+{
+ u32 pble_obj_cnt;
+
+ if (info->num_elem < info->dev->hw_attrs.uk_attrs.min_hw_cq_size ||
+ info->num_elem > info->dev->hw_attrs.uk_attrs.max_hw_cq_size)
+ return -EINVAL;
+
+ if (info->ceq_id >= info->dev->hmc_fpm_misc.max_ceqs)
+ return -EINVAL;
+
+ pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
+
+ if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt)
+ return -EINVAL;
+
+ cq->cq_pa = info->cq_pa;
+ cq->cq_uk.cq_base = info->cq_base;
+ cq->shadow_area_pa = info->shadow_area_pa;
+ cq->cq_uk.shadow_area = info->shadow_area;
+ cq->shadow_read_threshold = info->shadow_read_threshold;
+ cq->dev = info->dev;
+ cq->ceq_id = info->ceq_id;
+ cq->cq_uk.cq_size = info->num_elem;
+ cq->cq_type = IRDMA_CQ_TYPE_CQP;
+ cq->ceqe_mask = info->ceqe_mask;
+ IRDMA_RING_INIT(cq->cq_uk.cq_ring, info->num_elem);
+ cq->cq_uk.cq_id = 0; /* control cq is id 0 always */
+ cq->ceq_id_valid = info->ceq_id_valid;
+ cq->tph_en = info->tph_en;
+ cq->tph_val = info->tph_val;
+ cq->cq_uk.avoid_mem_cflct = info->avoid_mem_cflct;
+ cq->pbl_list = info->pbl_list;
+ cq->virtual_map = info->virtual_map;
+ cq->pbl_chunk_size = info->pbl_chunk_size;
+ cq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+ cq->cq_uk.polarity = true;
+ cq->vsi = info->vsi;
+ cq->cq_uk.cq_ack_db = cq->dev->cq_ack_db;
+
+ /* Only applicable to CQs other than CCQ so initialize to zero */
+ cq->cq_uk.cqe_alloc_db = NULL;
+
+ info->dev->ccq = cq;
+ return 0;
+}
+
+/**
+ * irdma_sc_ccq_create_done - poll cqp for ccq create
+ * @ccq: ccq sc struct
+ */
+static inline int irdma_sc_ccq_create_done(struct irdma_sc_cq *ccq)
+{
+ struct irdma_sc_cqp *cqp;
+
+ cqp = ccq->dev->cqp;
+
+ return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_CREATE_CQ, NULL);
+}
+
+/**
+ * irdma_sc_ccq_create - create control cq
+ * @ccq: ccq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @check_overflow: overlow flag for ccq
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch,
+ bool check_overflow, bool post_sq)
+{
+ int ret_code;
+
+ ret_code = irdma_sc_cq_create(ccq, scratch, check_overflow, post_sq);
+ if (ret_code)
+ return ret_code;
+
+ if (post_sq) {
+ ret_code = irdma_sc_ccq_create_done(ccq);
+ if (ret_code)
+ return ret_code;
+ }
+ ccq->dev->cqp->process_cqp_sds = irdma_cqp_sds_cmd;
+
+ return 0;
+}
+
+/**
+ * irdma_sc_ccq_destroy - destroy ccq during close
+ * @ccq: ccq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+int irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+ int ret_code = 0;
+ u32 tail, val, error;
+
+ cqp = ccq->dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, ccq->cq_uk.cq_size);
+ set_64bit_val(wqe, 8, (uintptr_t)ccq >> 1);
+ set_64bit_val(wqe, 40, ccq->shadow_area_pa);
+
+ hdr = ccq->cq_uk.cq_id |
+ FLD_LS_64(ccq->dev, (ccq->ceq_id_valid ? ccq->ceq_id : 0),
+ IRDMA_CQPSQ_CQ_CEQID) |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_CQ) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, ccq->ceqe_mask) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, ccq->ceq_id_valid) |
+ FIELD_PREP(IRDMA_CQPSQ_TPHEN, ccq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT, ccq->cq_uk.avoid_mem_cflct) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: CCQ_DESTROY WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_get_cqp_reg_info(cqp, &val, &tail, &error);
+
+ if (post_sq) {
+ irdma_sc_cqp_post_sq(cqp);
+ ret_code = irdma_cqp_poll_registers(cqp, tail,
+ cqp->dev->hw_attrs.max_done_count);
+ }
+
+ cqp->process_cqp_sds = irdma_update_sds_noccq;
+
+ return ret_code;
+}
+
+/**
+ * irdma_sc_init_iw_hmc() - queries fpm values using cqp and populates hmc_info
+ * @dev : ptr to irdma_dev struct
+ * @hmc_fn_id: hmc function id
+ */
+int irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, u8 hmc_fn_id)
+{
+ struct irdma_hmc_info *hmc_info;
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc;
+ struct irdma_dma_mem query_fpm_mem;
+ int ret_code = 0;
+ u8 wait_type;
+
+ hmc_info = dev->hmc_info;
+ hmc_fpm_misc = &dev->hmc_fpm_misc;
+ query_fpm_mem.pa = dev->fpm_query_buf_pa;
+ query_fpm_mem.va = dev->fpm_query_buf;
+ hmc_info->hmc_fn_id = hmc_fn_id;
+ wait_type = (u8)IRDMA_CQP_WAIT_POLL_REGS;
+
+ ret_code = irdma_sc_query_fpm_val(dev->cqp, 0, hmc_info->hmc_fn_id,
+ &query_fpm_mem, true, wait_type);
+ if (ret_code)
+ return ret_code;
+
+ /* parse the fpm_query_buf and fill hmc obj info */
+ ret_code = irdma_sc_parse_fpm_query_buf(dev, query_fpm_mem.va, hmc_info,
+ hmc_fpm_misc);
+
+ print_hex_dump_debug("HMC: QUERY FPM BUFFER", DUMP_PREFIX_OFFSET, 16,
+ 8, query_fpm_mem.va, IRDMA_QUERY_FPM_BUF_SIZE,
+ false);
+ return ret_code;
+}
+
+/**
+ * irdma_set_loc_mem() - set a local memory bit field
+ * @buf: ptr to a buffer where local memory gets enabled
+ */
+static void irdma_set_loc_mem(__le64 *buf)
+{
+ u64 loc_mem_en = BIT_ULL(ENABLE_LOC_MEM);
+ u32 offset;
+ u64 temp;
+
+ for (offset = 0; offset < IRDMA_COMMIT_FPM_BUF_SIZE;
+ offset += sizeof(__le64)) {
+ if (offset == IRDMA_PBLE_COMMIT_OFFSET)
+ continue;
+ get_64bit_val(buf, offset, &temp);
+ if (temp)
+ set_64bit_val(buf, offset, temp | loc_mem_en);
+ }
+}
+
+/**
+ * irdma_sc_cfg_iw_fpm() - commits hmc obj cnt values using cqp
+ * command and populates fpm base address in hmc_info
+ * @dev : ptr to irdma_dev struct
+ * @hmc_fn_id: hmc function id
+ */
+static int irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, u8 hmc_fn_id)
+{
+ struct irdma_hmc_info *hmc_info;
+ struct irdma_hmc_obj_info *obj_info;
+ __le64 *buf;
+ struct irdma_dma_mem commit_fpm_mem;
+ int ret_code = 0;
+ u8 wait_type;
+
+ hmc_info = dev->hmc_info;
+ obj_info = hmc_info->hmc_obj;
+ buf = dev->fpm_commit_buf;
+
+ set_64bit_val(buf, 0, (u64)obj_info[IRDMA_HMC_IW_QP].cnt);
+ set_64bit_val(buf, 8, (u64)obj_info[IRDMA_HMC_IW_CQ].cnt);
+ set_64bit_val(buf, 16, (u64)obj_info[IRDMA_HMC_IW_SRQ].cnt);
+ set_64bit_val(buf, 24, (u64)obj_info[IRDMA_HMC_IW_HTE].cnt);
+ set_64bit_val(buf, 32, (u64)obj_info[IRDMA_HMC_IW_ARP].cnt);
+ set_64bit_val(buf, 40, (u64)0); /* RSVD */
+ set_64bit_val(buf, 48, (u64)obj_info[IRDMA_HMC_IW_MR].cnt);
+ set_64bit_val(buf, 56, (u64)obj_info[IRDMA_HMC_IW_XF].cnt);
+ set_64bit_val(buf, 64, (u64)obj_info[IRDMA_HMC_IW_XFFL].cnt);
+ set_64bit_val(buf, 72, (u64)obj_info[IRDMA_HMC_IW_Q1].cnt);
+ set_64bit_val(buf, 80, (u64)obj_info[IRDMA_HMC_IW_Q1FL].cnt);
+ set_64bit_val(buf, 88,
+ (u64)obj_info[IRDMA_HMC_IW_TIMER].cnt);
+ set_64bit_val(buf, 96,
+ (u64)obj_info[IRDMA_HMC_IW_FSIMC].cnt);
+ set_64bit_val(buf, 104,
+ (u64)obj_info[IRDMA_HMC_IW_FSIAV].cnt);
+ set_64bit_val(buf, 112,
+ (u64)obj_info[IRDMA_HMC_IW_PBLE].cnt);
+ set_64bit_val(buf, 120, (u64)0); /* RSVD */
+ set_64bit_val(buf, 128, (u64)obj_info[IRDMA_HMC_IW_RRF].cnt);
+ set_64bit_val(buf, 136,
+ (u64)obj_info[IRDMA_HMC_IW_RRFFL].cnt);
+ set_64bit_val(buf, 144, (u64)obj_info[IRDMA_HMC_IW_HDR].cnt);
+ set_64bit_val(buf, 152, (u64)obj_info[IRDMA_HMC_IW_MD].cnt);
+ set_64bit_val(buf, 160,
+ (u64)obj_info[IRDMA_HMC_IW_OOISC].cnt);
+ set_64bit_val(buf, 168,
+ (u64)obj_info[IRDMA_HMC_IW_OOISCFFL].cnt);
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3 &&
+ dev->hmc_fpm_misc.loc_mem_pages)
+ irdma_set_loc_mem(buf);
+ commit_fpm_mem.pa = dev->fpm_commit_buf_pa;
+ commit_fpm_mem.va = dev->fpm_commit_buf;
+
+ wait_type = (u8)IRDMA_CQP_WAIT_POLL_REGS;
+ print_hex_dump_debug("HMC: COMMIT FPM BUFFER", DUMP_PREFIX_OFFSET, 16,
+ 8, commit_fpm_mem.va, IRDMA_COMMIT_FPM_BUF_SIZE,
+ false);
+ ret_code = irdma_sc_commit_fpm_val(dev->cqp, 0, hmc_info->hmc_fn_id,
+ &commit_fpm_mem, true, wait_type);
+ if (!ret_code)
+ irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf,
+ hmc_info->hmc_obj,
+ &hmc_info->sd_table.sd_cnt);
+ print_hex_dump_debug("HMC: COMMIT FPM BUFFER", DUMP_PREFIX_OFFSET, 16,
+ 8, commit_fpm_mem.va, IRDMA_COMMIT_FPM_BUF_SIZE,
+ false);
+
+ return ret_code;
+}
+
+/**
+ * cqp_sds_wqe_fill - fill cqp wqe doe sd
+ * @cqp: struct for cqp hw
+ * @info: sd info for wqe
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int cqp_sds_wqe_fill(struct irdma_sc_cqp *cqp,
+ struct irdma_update_sds_info *info, u64 scratch)
+{
+ u64 data;
+ u64 hdr;
+ __le64 *wqe;
+ int mem_entries, wqe_entries;
+ struct irdma_dma_mem *sdbuf = &cqp->sdbuf;
+ u64 offset = 0;
+ u32 wqe_idx;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe_idx(cqp, scratch, &wqe_idx);
+ if (!wqe)
+ return -ENOMEM;
+
+ wqe_entries = (info->cnt > 3) ? 3 : info->cnt;
+ mem_entries = info->cnt - wqe_entries;
+
+ if (mem_entries) {
+ offset = wqe_idx * IRDMA_UPDATE_SD_BUFF_SIZE;
+ memcpy(((char *)sdbuf->va + offset), &info->entry[3], mem_entries << 4);
+
+ data = (u64)sdbuf->pa + offset;
+ } else {
+ data = 0;
+ }
+ data |= FIELD_PREP(IRDMA_CQPSQ_UPESD_HMCFNID, info->hmc_fn_id);
+ set_64bit_val(wqe, 16, data);
+
+ switch (wqe_entries) {
+ case 3:
+ set_64bit_val(wqe, 48,
+ (FIELD_PREP(IRDMA_CQPSQ_UPESD_SDCMD, info->entry[2].cmd) |
+ FIELD_PREP(IRDMA_CQPSQ_UPESD_ENTRY_VALID, 1)));
+
+ set_64bit_val(wqe, 56, info->entry[2].data);
+ fallthrough;
+ case 2:
+ set_64bit_val(wqe, 32,
+ (FIELD_PREP(IRDMA_CQPSQ_UPESD_SDCMD, info->entry[1].cmd) |
+ FIELD_PREP(IRDMA_CQPSQ_UPESD_ENTRY_VALID, 1)));
+
+ set_64bit_val(wqe, 40, info->entry[1].data);
+ fallthrough;
+ case 1:
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMA_CQPSQ_UPESD_SDCMD, info->entry[0].cmd));
+
+ set_64bit_val(wqe, 8, info->entry[0].data);
+ break;
+ default:
+ break;
+ }
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_UPDATE_PE_SDS) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_UPESD_ENTRY_COUNT, mem_entries);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (mem_entries)
+ print_hex_dump_debug("WQE: UPDATE_PE_SDS WQE Buffer",
+ DUMP_PREFIX_OFFSET, 16, 8,
+ (char *)sdbuf->va + offset,
+ mem_entries << 4, false);
+
+ print_hex_dump_debug("WQE: UPDATE_PE_SDS WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+
+ return 0;
+}
+
+/**
+ * irdma_update_pe_sds - cqp wqe for sd
+ * @dev: ptr to irdma_dev struct
+ * @info: sd info for sd's
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_update_pe_sds(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info, u64 scratch)
+{
+ struct irdma_sc_cqp *cqp = dev->cqp;
+ int ret_code;
+
+ ret_code = cqp_sds_wqe_fill(cqp, info, scratch);
+ if (!ret_code)
+ irdma_sc_cqp_post_sq(cqp);
+
+ return ret_code;
+}
+
+/**
+ * irdma_update_sds_noccq - update sd before ccq created
+ * @dev: sc device struct
+ * @info: sd info for sd's
+ */
+int irdma_update_sds_noccq(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info)
+{
+ u32 error, val, tail;
+ struct irdma_sc_cqp *cqp = dev->cqp;
+ int ret_code;
+
+ ret_code = cqp_sds_wqe_fill(cqp, info, 0);
+ if (ret_code)
+ return ret_code;
+
+ irdma_get_cqp_reg_info(cqp, &val, &tail, &error);
+
+ irdma_sc_cqp_post_sq(cqp);
+ return irdma_cqp_poll_registers(cqp, tail,
+ cqp->dev->hw_attrs.max_done_count);
+}
+
+/**
+ * irdma_sc_static_hmc_pages_allocated - cqp wqe to allocate hmc pages
+ * @cqp: struct for cqp hw
+ * @scratch: u64 saved to be used during cqp completion
+ * @hmc_fn_id: hmc function id
+ * @post_sq: flag for cqp db to ring
+ * @poll_registers: flag to poll register for cqp completion
+ */
+int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch,
+ u8 hmc_fn_id, bool post_sq,
+ bool poll_registers)
+{
+ u64 hdr;
+ __le64 *wqe;
+ u32 tail, val, error;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_SHMC_PAGE_ALLOCATED_HMC_FN_ID, hmc_fn_id));
+
+ hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE,
+ IRDMA_CQP_OP_SHMC_PAGES_ALLOCATED) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("WQE: SHMC_PAGES_ALLOCATED WQE",
+ DUMP_PREFIX_OFFSET, 16, 8, wqe,
+ IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_get_cqp_reg_info(cqp, &val, &tail, &error);
+
+ if (post_sq) {
+ irdma_sc_cqp_post_sq(cqp);
+ if (poll_registers)
+ /* check for cqp sq tail update */
+ return irdma_cqp_poll_registers(cqp, tail,
+ cqp->dev->hw_attrs.max_done_count);
+ else
+ return irdma_sc_poll_for_cqp_op_done(cqp,
+ IRDMA_CQP_OP_SHMC_PAGES_ALLOCATED,
+ NULL);
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_cqp_ring_full - check if cqp ring is full
+ * @cqp: struct for cqp hw
+ */
+static bool irdma_cqp_ring_full(struct irdma_sc_cqp *cqp)
+{
+ return IRDMA_RING_FULL_ERR(cqp->sq_ring);
+}
+
+/**
+ * irdma_est_sd - returns approximate number of SDs for HMC
+ * @dev: sc device struct
+ * @hmc_info: hmc structure, size and count for HMC objects
+ */
+static u32 irdma_est_sd(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info)
+{
+ struct irdma_hmc_obj_info *pble_info;
+ int i;
+ u64 size = 0;
+ u64 sd;
+
+ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++)
+ if (i != IRDMA_HMC_IW_PBLE)
+ size += round_up(hmc_info->hmc_obj[i].cnt *
+ hmc_info->hmc_obj[i].size, 512);
+
+ pble_info = &hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE];
+ if (dev->privileged)
+ size += round_up(pble_info->cnt * pble_info->size, 512);
+ if (size & 0x1FFFFF)
+ sd = (size >> 21) + 1; /* add 1 for remainder */
+ else
+ sd = size >> 21;
+ if (!dev->privileged && !dev->hmc_fpm_misc.loc_mem_pages) {
+ /* 2MB alignment for VF PBLE HMC */
+ size = pble_info->cnt * pble_info->size;
+ if (size & 0x1FFFFF)
+ sd += (size >> 21) + 1; /* add 1 for remainder */
+ else
+ sd += size >> 21;
+ }
+ if (sd > 0xFFFFFFFF) {
+ ibdev_dbg(to_ibdev(dev), "HMC: sd overflow[%lld]\n", sd);
+ sd = 0xFFFFFFFF - 1;
+ }
+
+ return (u32)sd;
+}
+
+/**
+ * irdma_sc_query_rdma_features - query RDMA features and FW ver
+ * @cqp: struct for cqp hw
+ * @buf: buffer to hold query info
+ * @scratch: u64 saved to be used during cqp completion
+ */
+static int irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp,
+ struct irdma_dma_mem *buf, u64 scratch)
+{
+ u32 tail, val, error;
+ __le64 *wqe;
+ int status;
+ u64 temp;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ temp = buf->pa;
+ set_64bit_val(wqe, 32, temp);
+
+ temp = FIELD_PREP(IRDMA_CQPSQ_QUERY_RDMA_FEATURES_WQEVALID,
+ cqp->polarity) |
+ FIELD_PREP(IRDMA_CQPSQ_QUERY_RDMA_FEATURES_BUF_LEN, buf->size) |
+ FIELD_PREP(IRDMA_CQPSQ_UP_OP, IRDMA_CQP_OP_QUERY_RDMA_FEATURES);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, temp);
+
+ print_hex_dump_debug("WQE: QUERY RDMA FEATURES", DUMP_PREFIX_OFFSET,
+ 16, 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_get_cqp_reg_info(cqp, &val, &tail, &error);
+
+ irdma_sc_cqp_post_sq(cqp);
+ status = irdma_cqp_poll_registers(cqp, tail,
+ cqp->dev->hw_attrs.max_done_count);
+ if (error || status)
+ status = -EINVAL;
+
+ return status;
+}
+
+/**
+ * irdma_get_rdma_features - get RDMA features
+ * @dev: sc device struct
+ */
+int irdma_get_rdma_features(struct irdma_sc_dev *dev)
+{
+ int ret_code;
+ struct irdma_dma_mem feat_buf;
+ u64 temp;
+ u16 byte_idx, feat_type, feat_cnt, feat_idx;
+
+ feat_buf.size = ALIGN(IRDMA_FEATURE_BUF_SIZE,
+ IRDMA_FEATURE_BUF_ALIGNMENT);
+ feat_buf.va = dma_alloc_coherent(dev->hw->device, feat_buf.size,
+ &feat_buf.pa, GFP_KERNEL);
+ if (!feat_buf.va)
+ return -ENOMEM;
+
+ ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0);
+ if (ret_code)
+ goto exit;
+
+ get_64bit_val(feat_buf.va, 0, &temp);
+ feat_cnt = (u16)FIELD_GET(IRDMA_FEATURE_CNT, temp);
+ if (feat_cnt < 2) {
+ ret_code = -EINVAL;
+ goto exit;
+ } else if (feat_cnt > IRDMA_MAX_FEATURES) {
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: feature buf size insufficient, retrying with larger buffer\n");
+ dma_free_coherent(dev->hw->device, feat_buf.size, feat_buf.va,
+ feat_buf.pa);
+ feat_buf.va = NULL;
+ feat_buf.size = ALIGN(8 * feat_cnt,
+ IRDMA_FEATURE_BUF_ALIGNMENT);
+ feat_buf.va = dma_alloc_coherent(dev->hw->device,
+ feat_buf.size, &feat_buf.pa,
+ GFP_KERNEL);
+ if (!feat_buf.va)
+ return -ENOMEM;
+
+ ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0);
+ if (ret_code)
+ goto exit;
+
+ get_64bit_val(feat_buf.va, 0, &temp);
+ feat_cnt = (u16)FIELD_GET(IRDMA_FEATURE_CNT, temp);
+ if (feat_cnt < 2) {
+ ret_code = -EINVAL;
+ goto exit;
+ }
+ }
+
+ print_hex_dump_debug("WQE: QUERY RDMA FEATURES", DUMP_PREFIX_OFFSET,
+ 16, 8, feat_buf.va, feat_cnt * 8, false);
+
+ for (byte_idx = 0, feat_idx = 0; feat_idx < min(feat_cnt, (u16)IRDMA_MAX_FEATURES);
+ feat_idx++, byte_idx += 8) {
+ get_64bit_val(feat_buf.va, byte_idx, &temp);
+ feat_type = FIELD_GET(IRDMA_FEATURE_TYPE, temp);
+ if (feat_type >= IRDMA_MAX_FEATURES) {
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: found unrecognized feature type %d\n",
+ feat_type);
+ continue;
+ }
+ dev->feature_info[feat_type] = temp;
+ }
+
+ if (dev->feature_info[IRDMA_FTN_FLAGS] & IRDMA_ATOMICS_ALLOWED_BIT)
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_ATOMIC_OPS;
+
+exit:
+ dma_free_coherent(dev->hw->device, feat_buf.size, feat_buf.va,
+ feat_buf.pa);
+ feat_buf.va = NULL;
+ return ret_code;
+}
+
+static u32 irdma_q1_cnt(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 qpwanted)
+{
+ u32 q1_cnt;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) {
+ q1_cnt = roundup_pow_of_two(dev->hw_attrs.max_hw_ird * 2 * qpwanted);
+ } else {
+ if (dev->cqp->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
+ q1_cnt = roundup_pow_of_two(dev->hw_attrs.max_hw_ird * 2 * qpwanted + 512);
+ else
+ q1_cnt = dev->hw_attrs.max_hw_ird * 2 * qpwanted;
+ }
+
+ return q1_cnt;
+}
+
+static void cfg_fpm_value_gen_1(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 qpwanted)
+{
+ hmc_info->hmc_obj[IRDMA_HMC_IW_XF].cnt = roundup_pow_of_two(qpwanted * dev->hw_attrs.max_hw_wqes);
+}
+
+static void cfg_fpm_value_gen_2(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 qpwanted)
+{
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc = &dev->hmc_fpm_misc;
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_XF].cnt =
+ 4 * hmc_fpm_misc->xf_block_size * qpwanted;
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_HDR].cnt = qpwanted;
+
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].max_cnt)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt = 32 * qpwanted;
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].max_cnt)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt /
+ hmc_fpm_misc->rrf_block_size;
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_OOISC].max_cnt)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_OOISC].cnt = 32 * qpwanted;
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_OOISCFFL].max_cnt)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_OOISCFFL].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_OOISC].cnt /
+ hmc_fpm_misc->ooiscf_block_size;
+}
+
+/**
+ * irdma_get_rsrc_mem_config - configure resources if local memory or host
+ * @dev: sc device struct
+ * @is_mrte_loc_mem: if true, MR's to be in local memory because sd=loc pages
+ *
+ * Only mr can be configured host or local memory if qp's are in local memory.
+ * If qp is in local memory, then all resource object will be in local memory
+ * except mr which can be either host or local memory. The only exception
+ * is pble's which are always in host memory.
+ */
+static void irdma_get_rsrc_mem_config(struct irdma_sc_dev *dev, bool is_mrte_loc_mem)
+{
+ struct irdma_hmc_info *hmc_info = dev->hmc_info;
+ int i;
+
+ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++)
+ hmc_info->hmc_obj[i].mem_loc = IRDMA_LOC_MEM;
+
+ if (dev->feature_info[IRDMA_OBJ_1] && !is_mrte_loc_mem) {
+ u8 mem_type;
+
+ mem_type = (u8)FIELD_GET(IRDMA_MR_MEM_LOC, dev->feature_info[IRDMA_OBJ_1]);
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc =
+ (mem_type & IRDMA_OBJ_LOC_MEM_BIT) ?
+ IRDMA_LOC_MEM : IRDMA_HOST_MEM;
+ } else {
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc = IRDMA_LOC_MEM;
+ }
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].mem_loc = IRDMA_HOST_MEM;
+
+ ibdev_dbg(to_ibdev(dev), "HMC: INFO: mrte_mem_loc = %d pble = %d\n",
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].mem_loc);
+}
+
+/**
+ * irdma_cfg_sd_mem - allocate sd memory
+ * @dev: sc device struct
+ * @hmc_info: ptr to irdma_hmc_obj_info struct
+ */
+static int irdma_cfg_sd_mem(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info)
+{
+ struct irdma_virt_mem virt_mem;
+ u32 mem_size;
+
+ mem_size = sizeof(struct irdma_hmc_sd_entry) * hmc_info->sd_table.sd_cnt;
+ virt_mem.size = mem_size;
+ virt_mem.va = kzalloc(virt_mem.size, GFP_KERNEL);
+ if (!virt_mem.va)
+ return -ENOMEM;
+ hmc_info->sd_table.sd_entry = virt_mem.va;
+
+ return 0;
+}
+
+/**
+ * irdma_get_objs_pages - get number of 2M pages needed
+ * @dev: sc device struct
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @mem_loc: pages for local or host memory
+ */
+static u32 irdma_get_objs_pages(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info,
+ enum irdma_hmc_obj_mem mem_loc)
+{
+ u64 size = 0;
+ int i;
+
+ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++) {
+ if (hmc_info->hmc_obj[i].mem_loc == mem_loc) {
+ size += round_up(hmc_info->hmc_obj[i].cnt *
+ hmc_info->hmc_obj[i].size, 512);
+ }
+ }
+
+ return DIV_ROUND_UP(size, IRDMA_HMC_PAGE_SIZE);
+}
+
+/**
+ * irdma_set_host_hmc_rsrc_gen_3 - calculate host hmc resources for gen 3
+ * @dev: sc device struct
+ */
+static void irdma_set_host_hmc_rsrc_gen_3(struct irdma_sc_dev *dev)
+{
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc;
+ struct irdma_hmc_info *hmc_info;
+ enum irdma_hmc_obj_mem mrte_loc;
+ u32 mrwanted, pblewanted;
+ u32 avail_sds, mr_sds;
+
+ hmc_info = dev->hmc_info;
+ hmc_fpm_misc = &dev->hmc_fpm_misc;
+ avail_sds = hmc_fpm_misc->max_sds;
+ mrte_loc = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc;
+ mrwanted = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt;
+ pblewanted = hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].max_cnt;
+
+ if (mrte_loc == IRDMA_HOST_MEM && avail_sds > IRDMA_MIN_PBLE_PAGES) {
+ mr_sds = avail_sds - IRDMA_MIN_PBLE_PAGES;
+ mrwanted = min(mrwanted, mr_sds * MAX_MR_PER_SD);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt = mrwanted;
+ avail_sds -= DIV_ROUND_UP(mrwanted, MAX_MR_PER_SD);
+ }
+
+ if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS]) &&
+ pblewanted > avail_sds * MAX_PBLE_PER_SD)
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: Warn: Resource version 2: pble wanted = 0x%x available = 0x%x\n",
+ pblewanted, avail_sds * MAX_PBLE_PER_SD);
+
+ pblewanted = min(pblewanted, avail_sds * MAX_PBLE_PER_SD);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt = pblewanted;
+}
+
+/**
+ * irdma_verify_commit_fpm_gen_3 - verify query fpm values
+ * @dev: sc device struct
+ * @max_pages: max local memory available
+ * @qpwanted: number of qp's wanted
+ */
+static int irdma_verify_commit_fpm_gen_3(struct irdma_sc_dev *dev,
+ u32 max_pages,
+ u32 qpwanted)
+{
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc;
+ u32 rrf_cnt, xf_cnt, timer_cnt, pages_needed;
+ struct irdma_hmc_info *hmc_info;
+ u32 rrffl_cnt = 0;
+ u32 xffl_cnt = 0;
+ u32 q1fl_cnt;
+
+ hmc_info = dev->hmc_info;
+ hmc_fpm_misc = &dev->hmc_fpm_misc;
+
+ rrf_cnt = roundup_pow_of_two(IRDMA_RRF_MULTIPLIER * qpwanted);
+
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].max_cnt)
+ rrffl_cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt /
+ hmc_fpm_misc->rrf_block_size;
+
+ xf_cnt = roundup_pow_of_two(IRDMA_XF_MULTIPLIER * qpwanted);
+
+ if (xf_cnt)
+ xffl_cnt = xf_cnt / hmc_fpm_misc->xf_block_size;
+
+ timer_cnt = (round_up(qpwanted, 512) / 512 + 1) *
+ hmc_fpm_misc->timer_bucket;
+
+ q1fl_cnt = hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size;
+
+ pages_needed = irdma_get_objs_pages(dev, hmc_info, IRDMA_LOC_MEM);
+ if (pages_needed > max_pages) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: FAIL: SW counts rrf_cnt = %u rrffl_cnt = %u timer_cnt = %u",
+ rrf_cnt, rrffl_cnt, timer_cnt);
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: FAIL: SW counts xf_cnt = %u xffl_cnt = %u q1fl_cnt = %u",
+ xf_cnt, xffl_cnt, q1fl_cnt);
+
+ return -EINVAL;
+ }
+
+ hmc_fpm_misc->max_sds -= pages_needed;
+ hmc_fpm_misc->loc_mem_pages -= pages_needed;
+
+ return 0;
+}
+
+/**
+ * irdma_set_loc_hmc_rsrc_gen_3 - calculate hmc resources for gen 3
+ * @dev: sc device struct
+ * @max_pages: max local memory available
+ * @qpwanted: number of qp's wanted
+ */
+static int irdma_set_loc_hmc_rsrc_gen_3(struct irdma_sc_dev *dev,
+ u32 max_pages,
+ u32 qpwanted)
+{
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc;
+ u32 rrf_cnt, xf_cnt, timer_cnt, pages_needed;
+ struct irdma_hmc_info *hmc_info;
+ u32 ird, ord;
+
+ if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS]))
+ return irdma_verify_commit_fpm_gen_3(dev, max_pages, qpwanted);
+
+ hmc_info = dev->hmc_info;
+ hmc_fpm_misc = &dev->hmc_fpm_misc;
+ ird = dev->hw_attrs.max_hw_ird;
+ ord = dev->hw_attrs.max_hw_ord;
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_HDR].cnt = qpwanted;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt = qpwanted;
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt =
+ min(hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt, qpwanted * 2);
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt =
+ min(qpwanted * 8, hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt);
+
+ rrf_cnt = roundup_pow_of_two(IRDMA_RRF_MULTIPLIER * qpwanted);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt =
+ min(hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].max_cnt, rrf_cnt);
+
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].max_cnt)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt /
+ hmc_fpm_misc->rrf_block_size;
+
+ xf_cnt = roundup_pow_of_two(IRDMA_XF_MULTIPLIER * qpwanted);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_XF].cnt =
+ min(hmc_info->hmc_obj[IRDMA_HMC_IW_XF].max_cnt, xf_cnt);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_XFFL].cnt =
+ xf_cnt / hmc_fpm_misc->xf_block_size;
+
+ timer_cnt = (round_up(qpwanted, 512) / 512 + 1) *
+ hmc_fpm_misc->timer_bucket;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_TIMER].cnt =
+ min(timer_cnt, hmc_info->hmc_obj[IRDMA_HMC_IW_TIMER].cnt);
+
+ do {
+ hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt = roundup_pow_of_two(ird * 2 * qpwanted);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_Q1FL].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size;
+
+ pages_needed = irdma_get_objs_pages(dev, hmc_info, IRDMA_LOC_MEM);
+ if (pages_needed <= max_pages)
+ break;
+
+ ird /= 2;
+ ord /= 2;
+ } while (ird >= IRDMA_MIN_IRD);
+
+ if (ird < IRDMA_MIN_IRD) {
+ ibdev_dbg(to_ibdev(dev), "HMC: FAIL: IRD=%u Q1 CNT = %u\n",
+ ird, hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt);
+ return -EINVAL;
+ }
+
+ dev->hw_attrs.max_hw_ird = ird;
+ dev->hw_attrs.max_hw_ord = ord;
+ hmc_fpm_misc->max_sds -= pages_needed;
+
+ return 0;
+}
+
+/**
+ * cfg_fpm_value_gen_3 - configure fpm for gen 3
+ * @dev: sc device struct
+ * @hmc_info: ptr to irdma_hmc_obj_info struct
+ * @hmc_fpm_misc: ptr to fpm data
+ */
+static int cfg_fpm_value_gen_3(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info,
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc)
+{
+ enum irdma_hmc_obj_mem mrte_loc;
+ u32 mrwanted, qpwanted;
+ int i, ret_code = 0;
+ u32 loc_mem_pages;
+ bool is_mrte_loc_mem;
+
+ loc_mem_pages = hmc_fpm_misc->loc_mem_pages;
+ is_mrte_loc_mem = hmc_fpm_misc->loc_mem_pages == hmc_fpm_misc->max_sds ?
+ true : false;
+
+ irdma_get_rsrc_mem_config(dev, is_mrte_loc_mem);
+ mrte_loc = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc;
+
+ if (is_mrte_loc_mem)
+ loc_mem_pages -= IRDMA_MIN_PBLE_PAGES;
+
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: mrte_loc %d loc_mem %u fpm max sds %u host_obj %d\n",
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].mem_loc,
+ hmc_fpm_misc->loc_mem_pages, hmc_fpm_misc->max_sds,
+ is_mrte_loc_mem);
+
+ mrwanted = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt;
+ qpwanted = hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_HDR].cnt = qpwanted;
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_OOISC].max_cnt = 0;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_OOISCFFL].max_cnt = 0;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_HTE].max_cnt = 0;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].max_cnt = 0;
+
+ if (!FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS]))
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt =
+ min(hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt,
+ (u32)IRDMA_FSIAV_CNT_MAX);
+
+ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++)
+ hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
+
+ while (qpwanted >= IRDMA_MIN_QP_CNT) {
+ if (!irdma_set_loc_hmc_rsrc_gen_3(dev, loc_mem_pages, qpwanted))
+ break;
+
+ if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS]))
+ return -EINVAL;
+
+ qpwanted /= 2;
+ if (mrte_loc == IRDMA_LOC_MEM) {
+ mrwanted = qpwanted * IRDMA_MIN_MR_PER_QP;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt =
+ min(hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt, mrwanted);
+ }
+ }
+
+ if (qpwanted < IRDMA_MIN_QP_CNT) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: ERROR: could not allocate fpm resources\n");
+ return -EINVAL;
+ }
+
+ irdma_set_host_hmc_rsrc_gen_3(dev);
+ ret_code = irdma_sc_cfg_iw_fpm(dev, dev->hmc_fn_id);
+ if (ret_code) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: cfg_iw_fpm returned error_code[x%08X]\n",
+ readl(dev->hw_regs[IRDMA_CQPERRCODES]));
+
+ return ret_code;
+ }
+
+ return irdma_cfg_sd_mem(dev, hmc_info);
+}
+
+/**
+ * irdma_cfg_fpm_val - configure HMC objects
+ * @dev: sc device struct
+ * @qp_count: desired qp count
+ */
+int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count)
+{
+ u32 qpwanted, mrwanted, pblewanted;
+ u32 powerof2, hte, i;
+ u32 sd_needed;
+ u32 sd_diff;
+ u32 loop_count = 0;
+ struct irdma_hmc_info *hmc_info;
+ struct irdma_hmc_fpm_misc *hmc_fpm_misc;
+ int ret_code = 0;
+ u32 max_sds;
+
+ hmc_info = dev->hmc_info;
+ hmc_fpm_misc = &dev->hmc_fpm_misc;
+
+ ret_code = irdma_sc_init_iw_hmc(dev, dev->hmc_fn_id);
+ if (ret_code) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: irdma_sc_init_iw_hmc returned error_code = %d\n",
+ ret_code);
+ return ret_code;
+ }
+
+ max_sds = hmc_fpm_misc->max_sds;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ return cfg_fpm_value_gen_3(dev, hmc_info, hmc_fpm_misc);
+
+ for (i = IRDMA_HMC_IW_QP; i < IRDMA_HMC_IW_MAX; i++)
+ hmc_info->hmc_obj[i].cnt = hmc_info->hmc_obj[i].max_cnt;
+ sd_needed = irdma_est_sd(dev, hmc_info);
+ ibdev_dbg(to_ibdev(dev), "HMC: sd count %u where max sd is %u\n",
+ hmc_info->sd_table.sd_cnt, max_sds);
+
+ qpwanted = min(qp_count, hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt);
+
+ powerof2 = 1;
+ while (powerof2 <= qpwanted)
+ powerof2 *= 2;
+ powerof2 /= 2;
+ qpwanted = powerof2;
+
+ mrwanted = hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt;
+ pblewanted = hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].max_cnt;
+
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: req_qp=%d max_sd=%u, max_qp = %u, max_cq=%u, max_mr=%u, max_pble=%u, mc=%d, av=%u\n",
+ qp_count, max_sds,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].max_cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].max_cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].max_cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt);
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].max_cnt;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].max_cnt;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].max_cnt;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_APBVT_ENTRY].cnt = 1;
+
+ while (irdma_q1_cnt(dev, hmc_info, qpwanted) > hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].max_cnt)
+ qpwanted /= 2;
+
+ do {
+ ++loop_count;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt = qpwanted;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt =
+ min(2 * qpwanted, hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].cnt = 0; /* Reserved */
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt = mrwanted;
+
+ hte = round_up(qpwanted + hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt, 512);
+ powerof2 = 1;
+ while (powerof2 < hte)
+ powerof2 *= 2;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_HTE].cnt =
+ powerof2 * hmc_fpm_misc->ht_multiplier;
+ if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ cfg_fpm_value_gen_1(dev, hmc_info, qpwanted);
+ else
+ cfg_fpm_value_gen_2(dev, hmc_info, qpwanted);
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt = irdma_q1_cnt(dev, hmc_info, qpwanted);
+ hmc_info->hmc_obj[IRDMA_HMC_IW_XFFL].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_XF].cnt / hmc_fpm_misc->xf_block_size;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_Q1FL].cnt =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_Q1].cnt / hmc_fpm_misc->q1_block_size;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_TIMER].cnt =
+ (round_up(qpwanted, 512) / 512 + 1) * hmc_fpm_misc->timer_bucket;
+
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt = pblewanted;
+ sd_needed = irdma_est_sd(dev, hmc_info);
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: sd_needed = %d, hmc_fpm_misc->max_sds=%d, mrwanted=%d, pblewanted=%d qpwanted=%d\n",
+ sd_needed, hmc_fpm_misc->max_sds, mrwanted,
+ pblewanted, qpwanted);
+
+ /* Do not reduce resources further. All objects fit with max SDs */
+ if (sd_needed <= hmc_fpm_misc->max_sds)
+ break;
+
+ sd_diff = sd_needed - hmc_fpm_misc->max_sds;
+ if (sd_diff > 128) {
+ if (!(loop_count % 2) && qpwanted > 128) {
+ qpwanted /= 2;
+ } else {
+ pblewanted /= 2;
+ mrwanted /= 2;
+ }
+ continue;
+ }
+
+ if (dev->cqp->hmc_profile != IRDMA_HMC_PROFILE_FAVOR_VF &&
+ pblewanted > (512 * FPM_MULTIPLIER * sd_diff)) {
+ pblewanted -= 256 * FPM_MULTIPLIER * sd_diff;
+ continue;
+ } else if (pblewanted > (100 * FPM_MULTIPLIER)) {
+ pblewanted -= 10 * FPM_MULTIPLIER;
+ } else if (pblewanted > FPM_MULTIPLIER) {
+ pblewanted -= FPM_MULTIPLIER;
+ } else if (qpwanted <= 128) {
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt > 256)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt /= 2;
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt > 256)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt /= 2;
+ }
+ if (mrwanted > FPM_MULTIPLIER)
+ mrwanted -= FPM_MULTIPLIER;
+ if (!(loop_count % 10) && qpwanted > 128) {
+ qpwanted /= 2;
+ if (hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt > 256)
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt /= 2;
+ }
+ } while (loop_count < 2000);
+
+ if (sd_needed > hmc_fpm_misc->max_sds) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: cfg_fpm failed loop_cnt=%u, sd_needed=%u, max sd count %u\n",
+ loop_count, sd_needed, hmc_info->sd_table.sd_cnt);
+ return -EINVAL;
+ }
+
+ if (loop_count > 1 && sd_needed < max_sds) {
+ pblewanted += (max_sds - sd_needed) * 256 * FPM_MULTIPLIER;
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt = pblewanted;
+ sd_needed = irdma_est_sd(dev, hmc_info);
+ }
+
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: loop_cnt=%d, sd_needed=%d, qpcnt = %d, cqcnt=%d, mrcnt=%d, pblecnt=%d, mc=%d, ah=%d, max sd count %d, first sd index %d\n",
+ loop_count, sd_needed,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIMC].cnt,
+ hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt,
+ hmc_info->sd_table.sd_cnt, hmc_info->first_sd_index);
+
+ ret_code = irdma_sc_cfg_iw_fpm(dev, dev->hmc_fn_id);
+ if (ret_code) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: cfg_iw_fpm returned error_code[x%08X]\n",
+ readl(dev->hw_regs[IRDMA_CQPERRCODES]));
+ return ret_code;
+ }
+
+ return irdma_cfg_sd_mem(dev, hmc_info);
+}
+
+/**
+ * irdma_exec_cqp_cmd - execute cqp cmd when wqe are available
+ * @dev: rdma device
+ * @pcmdinfo: cqp command info
+ */
+static int irdma_exec_cqp_cmd(struct irdma_sc_dev *dev,
+ struct cqp_cmds_info *pcmdinfo)
+{
+ int status;
+ struct irdma_dma_mem val_mem;
+ bool alloc = false;
+
+ dev->cqp_cmd_stats[pcmdinfo->cqp_cmd]++;
+ switch (pcmdinfo->cqp_cmd) {
+ case IRDMA_OP_CEQ_DESTROY:
+ status = irdma_sc_ceq_destroy(pcmdinfo->in.u.ceq_destroy.ceq,
+ pcmdinfo->in.u.ceq_destroy.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_AEQ_DESTROY:
+ status = irdma_sc_aeq_destroy(pcmdinfo->in.u.aeq_destroy.aeq,
+ pcmdinfo->in.u.aeq_destroy.scratch,
+ pcmdinfo->post_sq);
+
+ break;
+ case IRDMA_OP_CEQ_CREATE:
+ status = irdma_sc_ceq_create(pcmdinfo->in.u.ceq_create.ceq,
+ pcmdinfo->in.u.ceq_create.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_AEQ_CREATE:
+ status = irdma_sc_aeq_create(pcmdinfo->in.u.aeq_create.aeq,
+ pcmdinfo->in.u.aeq_create.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_QP_UPLOAD_CONTEXT:
+ status = irdma_sc_qp_upload_context(pcmdinfo->in.u.qp_upload_context.dev,
+ &pcmdinfo->in.u.qp_upload_context.info,
+ pcmdinfo->in.u.qp_upload_context.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_CQ_CREATE:
+ status = irdma_sc_cq_create(pcmdinfo->in.u.cq_create.cq,
+ pcmdinfo->in.u.cq_create.scratch,
+ pcmdinfo->in.u.cq_create.check_overflow,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_CQ_MODIFY:
+ status = irdma_sc_cq_modify(pcmdinfo->in.u.cq_modify.cq,
+ &pcmdinfo->in.u.cq_modify.info,
+ pcmdinfo->in.u.cq_modify.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_CQ_DESTROY:
+ status = irdma_sc_cq_destroy(pcmdinfo->in.u.cq_destroy.cq,
+ pcmdinfo->in.u.cq_destroy.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_QP_FLUSH_WQES:
+ status = irdma_sc_qp_flush_wqes(pcmdinfo->in.u.qp_flush_wqes.qp,
+ &pcmdinfo->in.u.qp_flush_wqes.info,
+ pcmdinfo->in.u.qp_flush_wqes.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_GEN_AE:
+ status = irdma_sc_gen_ae(pcmdinfo->in.u.gen_ae.qp,
+ &pcmdinfo->in.u.gen_ae.info,
+ pcmdinfo->in.u.gen_ae.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_MANAGE_PUSH_PAGE:
+ status = irdma_sc_manage_push_page(pcmdinfo->in.u.manage_push_page.cqp,
+ &pcmdinfo->in.u.manage_push_page.info,
+ pcmdinfo->in.u.manage_push_page.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_UPDATE_PE_SDS:
+ status = irdma_update_pe_sds(pcmdinfo->in.u.update_pe_sds.dev,
+ &pcmdinfo->in.u.update_pe_sds.info,
+ pcmdinfo->in.u.update_pe_sds.scratch);
+ break;
+ case IRDMA_OP_MANAGE_HMC_PM_FUNC_TABLE:
+ /* switch to calling through the call table */
+ status =
+ irdma_sc_manage_hmc_pm_func_table(pcmdinfo->in.u.manage_hmc_pm.dev->cqp,
+ &pcmdinfo->in.u.manage_hmc_pm.info,
+ pcmdinfo->in.u.manage_hmc_pm.scratch,
+ true);
+ break;
+ case IRDMA_OP_SUSPEND:
+ status = irdma_sc_suspend_qp(pcmdinfo->in.u.suspend_resume.cqp,
+ pcmdinfo->in.u.suspend_resume.qp,
+ pcmdinfo->in.u.suspend_resume.scratch);
+ break;
+ case IRDMA_OP_RESUME:
+ status = irdma_sc_resume_qp(pcmdinfo->in.u.suspend_resume.cqp,
+ pcmdinfo->in.u.suspend_resume.qp,
+ pcmdinfo->in.u.suspend_resume.scratch);
+ break;
+ case IRDMA_OP_QUERY_FPM_VAL:
+ val_mem.pa = pcmdinfo->in.u.query_fpm_val.fpm_val_pa;
+ val_mem.va = pcmdinfo->in.u.query_fpm_val.fpm_val_va;
+ status = irdma_sc_query_fpm_val(pcmdinfo->in.u.query_fpm_val.cqp,
+ pcmdinfo->in.u.query_fpm_val.scratch,
+ pcmdinfo->in.u.query_fpm_val.hmc_fn_id,
+ &val_mem, true, IRDMA_CQP_WAIT_EVENT);
+ break;
+ case IRDMA_OP_COMMIT_FPM_VAL:
+ val_mem.pa = pcmdinfo->in.u.commit_fpm_val.fpm_val_pa;
+ val_mem.va = pcmdinfo->in.u.commit_fpm_val.fpm_val_va;
+ status = irdma_sc_commit_fpm_val(pcmdinfo->in.u.commit_fpm_val.cqp,
+ pcmdinfo->in.u.commit_fpm_val.scratch,
+ pcmdinfo->in.u.commit_fpm_val.hmc_fn_id,
+ &val_mem,
+ true,
+ IRDMA_CQP_WAIT_EVENT);
+ break;
+ case IRDMA_OP_STATS_ALLOCATE:
+ alloc = true;
+ fallthrough;
+ case IRDMA_OP_STATS_FREE:
+ status = irdma_sc_manage_stats_inst(pcmdinfo->in.u.stats_manage.cqp,
+ &pcmdinfo->in.u.stats_manage.info,
+ alloc,
+ pcmdinfo->in.u.stats_manage.scratch);
+ break;
+ case IRDMA_OP_STATS_GATHER:
+ status = irdma_sc_gather_stats(pcmdinfo->in.u.stats_gather.cqp,
+ &pcmdinfo->in.u.stats_gather.info,
+ pcmdinfo->in.u.stats_gather.scratch);
+ break;
+ case IRDMA_OP_WS_MODIFY_NODE:
+ status = irdma_sc_manage_ws_node(pcmdinfo->in.u.ws_node.cqp,
+ &pcmdinfo->in.u.ws_node.info,
+ IRDMA_MODIFY_NODE,
+ pcmdinfo->in.u.ws_node.scratch);
+ break;
+ case IRDMA_OP_WS_DELETE_NODE:
+ status = irdma_sc_manage_ws_node(pcmdinfo->in.u.ws_node.cqp,
+ &pcmdinfo->in.u.ws_node.info,
+ IRDMA_DEL_NODE,
+ pcmdinfo->in.u.ws_node.scratch);
+ break;
+ case IRDMA_OP_WS_ADD_NODE:
+ status = irdma_sc_manage_ws_node(pcmdinfo->in.u.ws_node.cqp,
+ &pcmdinfo->in.u.ws_node.info,
+ IRDMA_ADD_NODE,
+ pcmdinfo->in.u.ws_node.scratch);
+ break;
+ case IRDMA_OP_SET_UP_MAP:
+ status = irdma_sc_set_up_map(pcmdinfo->in.u.up_map.cqp,
+ &pcmdinfo->in.u.up_map.info,
+ pcmdinfo->in.u.up_map.scratch);
+ break;
+ case IRDMA_OP_QUERY_RDMA_FEATURES:
+ status = irdma_sc_query_rdma_features(pcmdinfo->in.u.query_rdma.cqp,
+ &pcmdinfo->in.u.query_rdma.query_buff_mem,
+ pcmdinfo->in.u.query_rdma.scratch);
+ break;
+ case IRDMA_OP_DELETE_ARP_CACHE_ENTRY:
+ status = irdma_sc_del_arp_cache_entry(pcmdinfo->in.u.del_arp_cache_entry.cqp,
+ pcmdinfo->in.u.del_arp_cache_entry.scratch,
+ pcmdinfo->in.u.del_arp_cache_entry.arp_index,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_MANAGE_APBVT_ENTRY:
+ status = irdma_sc_manage_apbvt_entry(pcmdinfo->in.u.manage_apbvt_entry.cqp,
+ &pcmdinfo->in.u.manage_apbvt_entry.info,
+ pcmdinfo->in.u.manage_apbvt_entry.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_MANAGE_QHASH_TABLE_ENTRY:
+ status = irdma_sc_manage_qhash_table_entry(pcmdinfo->in.u.manage_qhash_table_entry.cqp,
+ &pcmdinfo->in.u.manage_qhash_table_entry.info,
+ pcmdinfo->in.u.manage_qhash_table_entry.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_QP_MODIFY:
+ status = irdma_sc_qp_modify(pcmdinfo->in.u.qp_modify.qp,
+ &pcmdinfo->in.u.qp_modify.info,
+ pcmdinfo->in.u.qp_modify.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_QP_CREATE:
+ status = irdma_sc_qp_create(pcmdinfo->in.u.qp_create.qp,
+ &pcmdinfo->in.u.qp_create.info,
+ pcmdinfo->in.u.qp_create.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_QP_DESTROY:
+ status = irdma_sc_qp_destroy(pcmdinfo->in.u.qp_destroy.qp,
+ pcmdinfo->in.u.qp_destroy.scratch,
+ pcmdinfo->in.u.qp_destroy.remove_hash_idx,
+ pcmdinfo->in.u.qp_destroy.ignore_mw_bnd,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_ALLOC_STAG:
+ status = irdma_sc_alloc_stag(pcmdinfo->in.u.alloc_stag.dev,
+ &pcmdinfo->in.u.alloc_stag.info,
+ pcmdinfo->in.u.alloc_stag.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_MR_REG_NON_SHARED:
+ status = irdma_sc_mr_reg_non_shared(pcmdinfo->in.u.mr_reg_non_shared.dev,
+ &pcmdinfo->in.u.mr_reg_non_shared.info,
+ pcmdinfo->in.u.mr_reg_non_shared.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_DEALLOC_STAG:
+ status = irdma_sc_dealloc_stag(pcmdinfo->in.u.dealloc_stag.dev,
+ &pcmdinfo->in.u.dealloc_stag.info,
+ pcmdinfo->in.u.dealloc_stag.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_MW_ALLOC:
+ status = irdma_sc_mw_alloc(pcmdinfo->in.u.mw_alloc.dev,
+ &pcmdinfo->in.u.mw_alloc.info,
+ pcmdinfo->in.u.mw_alloc.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_ADD_ARP_CACHE_ENTRY:
+ status = irdma_sc_add_arp_cache_entry(pcmdinfo->in.u.add_arp_cache_entry.cqp,
+ &pcmdinfo->in.u.add_arp_cache_entry.info,
+ pcmdinfo->in.u.add_arp_cache_entry.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY:
+ status = irdma_sc_alloc_local_mac_entry(pcmdinfo->in.u.alloc_local_mac_entry.cqp,
+ pcmdinfo->in.u.alloc_local_mac_entry.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_ADD_LOCAL_MAC_ENTRY:
+ status = irdma_sc_add_local_mac_entry(pcmdinfo->in.u.add_local_mac_entry.cqp,
+ &pcmdinfo->in.u.add_local_mac_entry.info,
+ pcmdinfo->in.u.add_local_mac_entry.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_DELETE_LOCAL_MAC_ENTRY:
+ status = irdma_sc_del_local_mac_entry(pcmdinfo->in.u.del_local_mac_entry.cqp,
+ pcmdinfo->in.u.del_local_mac_entry.scratch,
+ pcmdinfo->in.u.del_local_mac_entry.entry_idx,
+ pcmdinfo->in.u.del_local_mac_entry.ignore_ref_count,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_AH_CREATE:
+ status = irdma_sc_create_ah(pcmdinfo->in.u.ah_create.cqp,
+ &pcmdinfo->in.u.ah_create.info,
+ pcmdinfo->in.u.ah_create.scratch);
+ break;
+ case IRDMA_OP_AH_DESTROY:
+ status = irdma_sc_destroy_ah(pcmdinfo->in.u.ah_destroy.cqp,
+ &pcmdinfo->in.u.ah_destroy.info,
+ pcmdinfo->in.u.ah_destroy.scratch);
+ break;
+ case IRDMA_OP_MC_CREATE:
+ status = irdma_sc_create_mcast_grp(pcmdinfo->in.u.mc_create.cqp,
+ &pcmdinfo->in.u.mc_create.info,
+ pcmdinfo->in.u.mc_create.scratch);
+ break;
+ case IRDMA_OP_MC_DESTROY:
+ status = irdma_sc_destroy_mcast_grp(pcmdinfo->in.u.mc_destroy.cqp,
+ &pcmdinfo->in.u.mc_destroy.info,
+ pcmdinfo->in.u.mc_destroy.scratch);
+ break;
+ case IRDMA_OP_MC_MODIFY:
+ status = irdma_sc_modify_mcast_grp(pcmdinfo->in.u.mc_modify.cqp,
+ &pcmdinfo->in.u.mc_modify.info,
+ pcmdinfo->in.u.mc_modify.scratch);
+ break;
+ case IRDMA_OP_SRQ_CREATE:
+ status = irdma_sc_srq_create(pcmdinfo->in.u.srq_create.srq,
+ pcmdinfo->in.u.srq_create.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_SRQ_MODIFY:
+ status = irdma_sc_srq_modify(pcmdinfo->in.u.srq_modify.srq,
+ &pcmdinfo->in.u.srq_modify.info,
+ pcmdinfo->in.u.srq_modify.scratch,
+ pcmdinfo->post_sq);
+ break;
+ case IRDMA_OP_SRQ_DESTROY:
+ status = irdma_sc_srq_destroy(pcmdinfo->in.u.srq_destroy.srq,
+ pcmdinfo->in.u.srq_destroy.scratch,
+ pcmdinfo->post_sq);
+ break;
+ default:
+ status = -EOPNOTSUPP;
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * irdma_process_cqp_cmd - process all cqp commands
+ * @dev: sc device struct
+ * @pcmdinfo: cqp command info
+ */
+int irdma_process_cqp_cmd(struct irdma_sc_dev *dev,
+ struct cqp_cmds_info *pcmdinfo)
+{
+ int status = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->cqp_lock, flags);
+ if (list_empty(&dev->cqp_cmd_head) && !irdma_cqp_ring_full(dev->cqp))
+ status = irdma_exec_cqp_cmd(dev, pcmdinfo);
+ else
+ list_add_tail(&pcmdinfo->cqp_cmd_entry, &dev->cqp_cmd_head);
+ spin_unlock_irqrestore(&dev->cqp_lock, flags);
+ return status;
+}
+
+/**
+ * irdma_process_bh - called from tasklet for cqp list
+ * @dev: sc device struct
+ */
+int irdma_process_bh(struct irdma_sc_dev *dev)
+{
+ int status = 0;
+ struct cqp_cmds_info *pcmdinfo;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->cqp_lock, flags);
+ while (!list_empty(&dev->cqp_cmd_head) &&
+ !irdma_cqp_ring_full(dev->cqp)) {
+ pcmdinfo = (struct cqp_cmds_info *)irdma_remove_cqp_head(dev);
+ status = irdma_exec_cqp_cmd(dev, pcmdinfo);
+ if (status)
+ break;
+ }
+ spin_unlock_irqrestore(&dev->cqp_lock, flags);
+ return status;
+}
+
+/**
+ * irdma_cfg_aeq- Configure AEQ interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ * @enable: True to enable, False disables
+ */
+void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, bool enable)
+{
+ u32 reg_val;
+
+ reg_val = FIELD_PREP(IRDMA_PFINT_AEQCTL_CAUSE_ENA, enable) |
+ FIELD_PREP(IRDMA_PFINT_AEQCTL_MSIX_INDX, idx) |
+ FIELD_PREP(IRDMA_PFINT_AEQCTL_ITR_INDX, 3);
+ writel(reg_val, dev->hw_regs[IRDMA_PFINT_AEQCTL]);
+}
+
+/**
+ * sc_vsi_update_stats - Update statistics
+ * @vsi: sc_vsi instance to update
+ */
+void sc_vsi_update_stats(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_dev_hw_stats *hw_stats = &vsi->pestat->hw_stats;
+ struct irdma_gather_stats *gather_stats =
+ vsi->pestat->gather_info.gather_stats_va;
+ struct irdma_gather_stats *last_gather_stats =
+ vsi->pestat->gather_info.last_gather_stats_va;
+ const struct irdma_hw_stat_map *map = vsi->dev->hw_stats_map;
+ u16 max_stat_idx = vsi->dev->hw_attrs.max_stat_idx;
+ u16 i;
+
+ if (vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ for (i = 0; i < max_stat_idx; i++) {
+ u16 idx = map[i].byteoff / sizeof(u64);
+
+ hw_stats->stats_val[i] = gather_stats->val[idx];
+ }
+ return;
+ }
+
+ irdma_update_stats(hw_stats, gather_stats, last_gather_stats,
+ map, max_stat_idx);
+}
+
+/**
+ * irdma_wait_pe_ready - Check if firmware is ready
+ * @dev: provides access to registers
+ */
+static int irdma_wait_pe_ready(struct irdma_sc_dev *dev)
+{
+ u32 statuscpu0;
+ u32 statuscpu1;
+ u32 statuscpu2;
+ u32 retrycount = 0;
+
+ do {
+ statuscpu0 = readl(dev->hw_regs[IRDMA_GLPE_CPUSTATUS0]);
+ statuscpu1 = readl(dev->hw_regs[IRDMA_GLPE_CPUSTATUS1]);
+ statuscpu2 = readl(dev->hw_regs[IRDMA_GLPE_CPUSTATUS2]);
+ if (statuscpu0 == 0x80 && statuscpu1 == 0x80 &&
+ statuscpu2 == 0x80)
+ return 0;
+ mdelay(1000);
+ } while (retrycount++ < dev->hw_attrs.max_pe_ready_count);
+ return -1;
+}
+
+static inline void irdma_sc_init_hw(struct irdma_sc_dev *dev)
+{
+ switch (dev->hw_attrs.uk_attrs.hw_rev) {
+ case IRDMA_GEN_1:
+ i40iw_init_hw(dev);
+ break;
+ case IRDMA_GEN_2:
+ icrdma_init_hw(dev);
+ break;
+ case IRDMA_GEN_3:
+ ig3rdma_init_hw(dev);
+ break;
+ }
+}
+
+/**
+ * irdma_sc_dev_init - Initialize control part of device
+ * @ver: version
+ * @dev: Device pointer
+ * @info: Device init info
+ */
+int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev,
+ struct irdma_device_init_info *info)
+{
+ u32 val;
+ int ret_code = 0;
+ u8 db_size;
+
+ INIT_LIST_HEAD(&dev->cqp_cmd_head); /* for CQP command backlog */
+ mutex_init(&dev->ws_mutex);
+ dev->hmc_fn_id = info->hmc_fn_id;
+ dev->fpm_query_buf_pa = info->fpm_query_buf_pa;
+ dev->fpm_query_buf = info->fpm_query_buf;
+ dev->fpm_commit_buf_pa = info->fpm_commit_buf_pa;
+ dev->fpm_commit_buf = info->fpm_commit_buf;
+ dev->hw = info->hw;
+ dev->hw->hw_addr = info->bar0;
+ dev->protocol_used = info->protocol_used;
+ /* Setup the hardware limits, hmc may limit further */
+ dev->hw_attrs.min_hw_qp_id = IRDMA_MIN_IW_QP_ID;
+ dev->hw_attrs.min_hw_srq_id = IRDMA_MIN_IW_SRQ_ID;
+ dev->hw_attrs.min_hw_aeq_size = IRDMA_MIN_AEQ_ENTRIES;
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ dev->hw_attrs.max_hw_aeq_size = IRDMA_MAX_AEQ_ENTRIES_GEN_3;
+ else
+ dev->hw_attrs.max_hw_aeq_size = IRDMA_MAX_AEQ_ENTRIES;
+ dev->hw_attrs.min_hw_ceq_size = IRDMA_MIN_CEQ_ENTRIES;
+ dev->hw_attrs.max_hw_ceq_size = IRDMA_MAX_CEQ_ENTRIES;
+ dev->hw_attrs.uk_attrs.min_hw_cq_size = IRDMA_MIN_CQ_SIZE;
+ dev->hw_attrs.uk_attrs.max_hw_cq_size = IRDMA_MAX_CQ_SIZE;
+ dev->hw_attrs.uk_attrs.max_hw_wq_frags = IRDMA_MAX_WQ_FRAGMENT_COUNT;
+ dev->hw_attrs.uk_attrs.max_hw_read_sges = IRDMA_MAX_SGE_RD;
+ dev->hw_attrs.max_hw_outbound_msg_size = IRDMA_MAX_OUTBOUND_MSG_SIZE;
+ dev->hw_attrs.max_mr_size = IRDMA_MAX_MR_SIZE;
+ dev->hw_attrs.max_hw_inbound_msg_size = IRDMA_MAX_INBOUND_MSG_SIZE;
+ dev->hw_attrs.max_hw_device_pages = IRDMA_MAX_PUSH_PAGE_COUNT;
+ dev->hw_attrs.uk_attrs.max_hw_inline = IRDMA_MAX_INLINE_DATA_SIZE;
+ dev->hw_attrs.max_hw_wqes = IRDMA_MAX_WQ_ENTRIES;
+ dev->hw_attrs.max_qp_wr = IRDMA_MAX_QP_WRS(IRDMA_MAX_QUANTA_PER_WR);
+
+ dev->hw_attrs.uk_attrs.max_hw_rq_quanta = IRDMA_QP_SW_MAX_RQ_QUANTA;
+ dev->hw_attrs.uk_attrs.max_hw_wq_quanta = IRDMA_QP_SW_MAX_WQ_QUANTA;
+ dev->hw_attrs.max_hw_pds = IRDMA_MAX_PDS;
+ dev->hw_attrs.max_hw_ena_vf_count = IRDMA_MAX_PE_ENA_VF_COUNT;
+
+ dev->hw_attrs.max_pe_ready_count = 14;
+ dev->hw_attrs.max_done_count = IRDMA_DONE_COUNT;
+ dev->hw_attrs.max_sleep_count = IRDMA_SLEEP_COUNT;
+ dev->hw_attrs.max_cqp_compl_wait_time_ms = CQP_COMPL_WAIT_TIME_MS;
+
+ if (!dev->privileged) {
+ ret_code = irdma_vchnl_req_get_hmc_fcn(dev);
+ if (ret_code) {
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: Get HMC function ret = %d\n",
+ ret_code);
+
+ return ret_code;
+ }
+ }
+
+ irdma_sc_init_hw(dev);
+
+ if (dev->privileged) {
+ if (irdma_wait_pe_ready(dev))
+ return -ETIMEDOUT;
+
+ val = readl(dev->hw_regs[IRDMA_GLPCI_LBARCTRL]);
+ db_size = (u8)FIELD_GET(IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE, val);
+ if (db_size != IRDMA_PE_DB_SIZE_4M &&
+ db_size != IRDMA_PE_DB_SIZE_8M) {
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: RDMA PE doorbell is not enabled in CSR val 0x%x db_size=%d\n",
+ val, db_size);
+ return -ENODEV;
+ }
+ } else {
+ ret_code = irdma_vchnl_req_get_reg_layout(dev);
+ if (ret_code)
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: Get Register layout failed ret = %d\n",
+ ret_code);
+ }
+
+ return ret_code;
+}
+
+/**
+ * irdma_stat_val - Extract HW counter value from statistics buffer
+ * @stats_val: pointer to statistics buffer
+ * @byteoff: byte offset of counter value in the buffer (8B-aligned)
+ * @bitoff: bit offset of counter value within 8B entry
+ * @bitmask: maximum counter value (e.g. 0xffffff for 24-bit counter)
+ */
+static inline u64 irdma_stat_val(const u64 *stats_val, u16 byteoff, u8 bitoff,
+ u64 bitmask)
+{
+ u16 idx = byteoff / sizeof(*stats_val);
+
+ return (stats_val[idx] >> bitoff) & bitmask;
+}
+
+/**
+ * irdma_stat_delta - Calculate counter delta
+ * @new_val: updated counter value
+ * @old_val: last counter value
+ * @max_val: maximum counter value (e.g. 0xffffff for 24-bit counter)
+ */
+static inline u64 irdma_stat_delta(u64 new_val, u64 old_val, u64 max_val)
+{
+ if (new_val >= old_val)
+ return new_val - old_val;
+
+ /* roll-over case */
+ return max_val - old_val + new_val + 1;
+}
+
+/**
+ * irdma_update_stats - Update statistics
+ * @hw_stats: hw_stats instance to update
+ * @gather_stats: updated stat counters
+ * @last_gather_stats: last stat counters
+ * @map: HW stat map (hw_stats => gather_stats)
+ * @max_stat_idx: number of HW stats
+ */
+void irdma_update_stats(struct irdma_dev_hw_stats *hw_stats,
+ struct irdma_gather_stats *gather_stats,
+ struct irdma_gather_stats *last_gather_stats,
+ const struct irdma_hw_stat_map *map, u16 max_stat_idx)
+{
+ u64 *stats_val = hw_stats->stats_val;
+ u16 i;
+
+ for (i = 0; i < max_stat_idx; i++) {
+ u64 new_val = irdma_stat_val(gather_stats->val, map[i].byteoff,
+ map[i].bitoff, map[i].bitmask);
+ u64 last_val = irdma_stat_val(last_gather_stats->val,
+ map[i].byteoff, map[i].bitoff,
+ map[i].bitmask);
+
+ stats_val[i] +=
+ irdma_stat_delta(new_val, last_val, map[i].bitmask);
+ }
+
+ memcpy(last_gather_stats, gather_stats, sizeof(*last_gather_stats));
+}
diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h
new file mode 100644
index 000000000000..983b22d7ae23
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/defs.h
@@ -0,0 +1,1184 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#ifndef IRDMA_DEFS_H
+#define IRDMA_DEFS_H
+
+#define IRDMA_FIRST_USER_QP_ID 3
+
+#define ECN_CODE_PT_VAL 2
+
+#define IRDMA_PUSH_OFFSET (8 * 1024 * 1024)
+#define IRDMA_PF_FIRST_PUSH_PAGE_INDEX 16
+#define IRDMA_PF_BAR_RSVD (60 * 1024)
+
+#define IRDMA_PE_DB_SIZE_4M 1
+#define IRDMA_PE_DB_SIZE_8M 2
+
+#define IRDMA_IRD_HW_SIZE_4_GEN3 0
+#define IRDMA_IRD_HW_SIZE_8_GEN3 1
+#define IRDMA_IRD_HW_SIZE_16_GEN3 2
+#define IRDMA_IRD_HW_SIZE_32_GEN3 3
+#define IRDMA_IRD_HW_SIZE_64_GEN3 4
+#define IRDMA_IRD_HW_SIZE_128_GEN3 5
+#define IRDMA_IRD_HW_SIZE_256_GEN3 6
+#define IRDMA_IRD_HW_SIZE_512_GEN3 7
+#define IRDMA_IRD_HW_SIZE_1024_GEN3 8
+#define IRDMA_IRD_HW_SIZE_2048_GEN3 9
+#define IRDMA_IRD_HW_SIZE_4096_GEN3 10
+
+#define IRDMA_IRD_HW_SIZE_4 0
+#define IRDMA_IRD_HW_SIZE_16 1
+#define IRDMA_IRD_HW_SIZE_64 2
+#define IRDMA_IRD_HW_SIZE_128 3
+#define IRDMA_IRD_HW_SIZE_256 4
+
+enum irdma_protocol_used {
+ IRDMA_ANY_PROTOCOL = 0,
+ IRDMA_IWARP_PROTOCOL_ONLY = 1,
+ IRDMA_ROCE_PROTOCOL_ONLY = 2,
+};
+
+#define IRDMA_QP_STATE_INVALID 0
+#define IRDMA_QP_STATE_IDLE 1
+#define IRDMA_QP_STATE_RTS 2
+#define IRDMA_QP_STATE_CLOSING 3
+#define IRDMA_QP_STATE_SQD 3
+#define IRDMA_QP_STATE_RTR 4
+#define IRDMA_QP_STATE_TERMINATE 5
+#define IRDMA_QP_STATE_ERROR 6
+
+#define IRDMA_MAX_TRAFFIC_CLASS 8
+#define IRDMA_MAX_STATS_COUNT_GEN_1 12
+#define IRDMA_MAX_USER_PRIORITY 8
+#define IRDMA_MAX_APPS 8
+#define IRDMA_MAX_STATS_COUNT 128
+#define IRDMA_FIRST_NON_PF_STAT 4
+
+#define IRDMA_MIN_MTU_IPV4 576
+#define IRDMA_MIN_MTU_IPV6 1280
+#define IRDMA_MTU_TO_MSS_IPV4 40
+#define IRDMA_MTU_TO_MSS_IPV6 60
+#define IRDMA_DEFAULT_MTU 1500
+
+#define Q2_FPSN_OFFSET 64
+#define TERM_DDP_LEN_TAGGED 14
+#define TERM_DDP_LEN_UNTAGGED 18
+#define TERM_RDMA_LEN 28
+#define RDMA_OPCODE_M 0x0f
+#define RDMA_READ_REQ_OPCODE 1
+#define Q2_BAD_FRAME_OFFSET 72
+#define CQE_MAJOR_DRV 0x8000
+
+#define IRDMA_TERM_SENT 1
+#define IRDMA_TERM_RCVD 2
+#define IRDMA_TERM_DONE 4
+#define IRDMA_MAC_HLEN 14
+
+#define IRDMA_CQP_WAIT_POLL_REGS 1
+#define IRDMA_CQP_WAIT_POLL_CQ 2
+#define IRDMA_CQP_WAIT_EVENT 3
+
+#define IRDMA_AE_SOURCE_RSVD 0x0
+#define IRDMA_AE_SOURCE_RQ 0x1
+#define IRDMA_AE_SOURCE_RQ_0011 0x3
+
+#define IRDMA_AE_SOURCE_CQ 0x2
+#define IRDMA_AE_SOURCE_CQ_0110 0x6
+#define IRDMA_AE_SOURCE_CQ_1010 0xa
+#define IRDMA_AE_SOURCE_CQ_1110 0xe
+
+#define IRDMA_AE_SOURCE_SQ 0x5
+#define IRDMA_AE_SOURCE_SQ_0111 0x7
+
+#define IRDMA_AE_SOURCE_IN_RR_WR 0x9
+#define IRDMA_AE_SOURCE_IN_RR_WR_1011 0xb
+#define IRDMA_AE_SOURCE_OUT_RR 0xd
+#define IRDMA_AE_SOURCE_OUT_RR_1111 0xf
+
+#define IRDMA_TCP_STATE_NON_EXISTENT 0
+#define IRDMA_TCP_STATE_CLOSED 1
+#define IRDMA_TCP_STATE_LISTEN 2
+#define IRDMA_STATE_SYN_SEND 3
+#define IRDMA_TCP_STATE_SYN_RECEIVED 4
+#define IRDMA_TCP_STATE_ESTABLISHED 5
+#define IRDMA_TCP_STATE_CLOSE_WAIT 6
+#define IRDMA_TCP_STATE_FIN_WAIT_1 7
+#define IRDMA_TCP_STATE_CLOSING 8
+#define IRDMA_TCP_STATE_LAST_ACK 9
+#define IRDMA_TCP_STATE_FIN_WAIT_2 10
+#define IRDMA_TCP_STATE_TIME_WAIT 11
+#define IRDMA_TCP_STATE_RESERVED_1 12
+#define IRDMA_TCP_STATE_RESERVED_2 13
+#define IRDMA_TCP_STATE_RESERVED_3 14
+#define IRDMA_TCP_STATE_RESERVED_4 15
+
+#define IRDMA_CQP_SW_SQSIZE_4 4
+#define IRDMA_CQP_SW_SQSIZE_2048 2048
+
+#define IRDMA_CQ_TYPE_IWARP 1
+#define IRDMA_CQ_TYPE_ILQ 2
+#define IRDMA_CQ_TYPE_IEQ 3
+#define IRDMA_CQ_TYPE_CQP 4
+
+#define IRDMA_DONE_COUNT 1000
+#define IRDMA_SLEEP_COUNT 10
+
+#define IRDMA_UPDATE_SD_BUFF_SIZE 128
+#define IRDMA_FEATURE_BUF_SIZE (8 * IRDMA_MAX_FEATURES)
+
+#define ENABLE_LOC_MEM 63
+#define IRDMA_ATOMICS_ALLOWED_BIT 1
+#define MAX_PBLE_PER_SD 0x40000
+#define MAX_PBLE_SD_PER_FCN 0x400
+#define MAX_MR_PER_SD 0x8000
+#define MAX_MR_SD_PER_FCN 0x80
+#define IRDMA_PBLE_COMMIT_OFFSET 112
+#define IRDMA_MAX_QUANTA_PER_WR 8
+
+#define IRDMA_QP_SW_MAX_WQ_QUANTA 32768
+#define IRDMA_QP_SW_MAX_SQ_QUANTA 32768
+#define IRDMA_QP_SW_MAX_RQ_QUANTA 32768
+#define IRDMA_MAX_QP_WRS(max_quanta_per_wr) \
+ ((IRDMA_QP_SW_MAX_WQ_QUANTA - IRDMA_SQ_RSVD) / (max_quanta_per_wr))
+#define IRDMA_SRQ_MIN_QUANTA 8
+#define IRDMA_SRQ_MAX_QUANTA 262144
+#define IRDMA_MAX_SRQ_WRS \
+ ((IRDMA_SRQ_MAX_QUANTA - IRDMA_RQ_RSVD) / IRDMA_MAX_QUANTA_PER_WR)
+
+#define IRDMAQP_TERM_SEND_TERM_AND_FIN 0
+#define IRDMAQP_TERM_SEND_TERM_ONLY 1
+#define IRDMAQP_TERM_SEND_FIN_ONLY 2
+#define IRDMAQP_TERM_DONOT_SEND_TERM_OR_FIN 3
+
+#define IRDMA_QP_TYPE_IWARP 1
+#define IRDMA_QP_TYPE_UDA 2
+#define IRDMA_QP_TYPE_ROCE_RC 3
+#define IRDMA_QP_TYPE_ROCE_UD 4
+
+#define IRDMA_HW_PAGE_SIZE 4096
+#define IRDMA_HW_PAGE_SHIFT 12
+#define IRDMA_CQE_QTYPE_RQ 0
+#define IRDMA_CQE_QTYPE_SQ 1
+
+#define IRDMA_QP_SW_MIN_WQSIZE 8u /* in WRs*/
+#define IRDMA_QP_WQE_MIN_SIZE 32
+#define IRDMA_QP_WQE_MAX_SIZE 256
+#define IRDMA_QP_WQE_MIN_QUANTA 1
+#define IRDMA_MAX_RQ_WQE_SHIFT_GEN1 2
+#define IRDMA_MAX_RQ_WQE_SHIFT_GEN2 3
+
+#define IRDMA_SQ_RSVD 258
+#define IRDMA_RQ_RSVD 1
+
+#define IRDMA_FEATURE_RTS_AE BIT_ULL(0)
+#define IRDMA_FEATURE_CQ_RESIZE BIT_ULL(1)
+#define IRDMA_FEATURE_64_BYTE_CQE BIT_ULL(5)
+#define IRDMA_FEATURE_ATOMIC_OPS BIT_ULL(6)
+#define IRDMA_FEATURE_SRQ BIT_ULL(7)
+#define IRDMA_FEATURE_CQE_TIMESTAMPING BIT_ULL(8)
+
+#define IRDMAQP_OP_RDMA_WRITE 0x00
+#define IRDMAQP_OP_RDMA_READ 0x01
+#define IRDMAQP_OP_RDMA_SEND 0x03
+#define IRDMAQP_OP_RDMA_SEND_INV 0x04
+#define IRDMAQP_OP_RDMA_SEND_SOL_EVENT 0x05
+#define IRDMAQP_OP_RDMA_SEND_SOL_EVENT_INV 0x06
+#define IRDMAQP_OP_BIND_MW 0x08
+#define IRDMAQP_OP_FAST_REGISTER 0x09
+#define IRDMAQP_OP_LOCAL_INVALIDATE 0x0a
+#define IRDMAQP_OP_RDMA_READ_LOC_INV 0x0b
+#define IRDMAQP_OP_NOP 0x0c
+#define IRDMAQP_OP_RDMA_WRITE_SOL 0x0d
+#define IRDMAQP_OP_ATOMIC_FETCH_ADD 0x0f
+#define IRDMAQP_OP_ATOMIC_COMPARE_SWAP_ADD 0x11
+#define IRDMAQP_OP_GEN_RTS_AE 0x30
+
+enum irdma_cqp_op_type {
+ IRDMA_OP_CEQ_DESTROY = 1,
+ IRDMA_OP_AEQ_DESTROY = 2,
+ IRDMA_OP_DELETE_ARP_CACHE_ENTRY = 3,
+ IRDMA_OP_MANAGE_APBVT_ENTRY = 4,
+ IRDMA_OP_CEQ_CREATE = 5,
+ IRDMA_OP_AEQ_CREATE = 6,
+ IRDMA_OP_MANAGE_QHASH_TABLE_ENTRY = 7,
+ IRDMA_OP_QP_MODIFY = 8,
+ IRDMA_OP_QP_UPLOAD_CONTEXT = 9,
+ IRDMA_OP_CQ_CREATE = 10,
+ IRDMA_OP_CQ_DESTROY = 11,
+ IRDMA_OP_QP_CREATE = 12,
+ IRDMA_OP_QP_DESTROY = 13,
+ IRDMA_OP_ALLOC_STAG = 14,
+ IRDMA_OP_MR_REG_NON_SHARED = 15,
+ IRDMA_OP_DEALLOC_STAG = 16,
+ IRDMA_OP_MW_ALLOC = 17,
+ IRDMA_OP_QP_FLUSH_WQES = 18,
+ IRDMA_OP_ADD_ARP_CACHE_ENTRY = 19,
+ IRDMA_OP_MANAGE_PUSH_PAGE = 20,
+ IRDMA_OP_UPDATE_PE_SDS = 21,
+ IRDMA_OP_MANAGE_HMC_PM_FUNC_TABLE = 22,
+ IRDMA_OP_SUSPEND = 23,
+ IRDMA_OP_RESUME = 24,
+ IRDMA_OP_MANAGE_VF_PBLE_BP = 25,
+ IRDMA_OP_QUERY_FPM_VAL = 26,
+ IRDMA_OP_COMMIT_FPM_VAL = 27,
+ IRDMA_OP_AH_CREATE = 28,
+ IRDMA_OP_AH_MODIFY = 29,
+ IRDMA_OP_AH_DESTROY = 30,
+ IRDMA_OP_MC_CREATE = 31,
+ IRDMA_OP_MC_DESTROY = 32,
+ IRDMA_OP_MC_MODIFY = 33,
+ IRDMA_OP_STATS_ALLOCATE = 34,
+ IRDMA_OP_STATS_FREE = 35,
+ IRDMA_OP_STATS_GATHER = 36,
+ IRDMA_OP_WS_ADD_NODE = 37,
+ IRDMA_OP_WS_MODIFY_NODE = 38,
+ IRDMA_OP_WS_DELETE_NODE = 39,
+ IRDMA_OP_WS_FAILOVER_START = 40,
+ IRDMA_OP_WS_FAILOVER_COMPLETE = 41,
+ IRDMA_OP_SET_UP_MAP = 42,
+ IRDMA_OP_GEN_AE = 43,
+ IRDMA_OP_QUERY_RDMA_FEATURES = 44,
+ IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY = 45,
+ IRDMA_OP_ADD_LOCAL_MAC_ENTRY = 46,
+ IRDMA_OP_DELETE_LOCAL_MAC_ENTRY = 47,
+ IRDMA_OP_CQ_MODIFY = 48,
+ IRDMA_OP_SRQ_CREATE = 49,
+ IRDMA_OP_SRQ_MODIFY = 50,
+ IRDMA_OP_SRQ_DESTROY = 51,
+
+ /* Must be last entry*/
+ IRDMA_MAX_CQP_OPS = 52,
+};
+
+/* CQP SQ WQES */
+#define IRDMA_CQP_OP_CREATE_QP 0
+#define IRDMA_CQP_OP_MODIFY_QP 0x1
+#define IRDMA_CQP_OP_DESTROY_QP 0x02
+#define IRDMA_CQP_OP_CREATE_CQ 0x03
+#define IRDMA_CQP_OP_MODIFY_CQ 0x04
+#define IRDMA_CQP_OP_DESTROY_CQ 0x05
+#define IRDMA_CQP_OP_CREATE_SRQ 0x06
+#define IRDMA_CQP_OP_MODIFY_SRQ 0x07
+#define IRDMA_CQP_OP_DESTROY_SRQ 0x08
+#define IRDMA_CQP_OP_ALLOC_STAG 0x09
+#define IRDMA_CQP_OP_REG_MR 0x0a
+#define IRDMA_CQP_OP_QUERY_STAG 0x0b
+#define IRDMA_CQP_OP_REG_SMR 0x0c
+#define IRDMA_CQP_OP_DEALLOC_STAG 0x0d
+#define IRDMA_CQP_OP_MANAGE_LOC_MAC_TABLE 0x0e
+#define IRDMA_CQP_OP_MANAGE_ARP 0x0f
+#define IRDMA_CQP_OP_MANAGE_VF_PBLE_BP 0x10
+#define IRDMA_CQP_OP_MANAGE_PUSH_PAGES 0x11
+#define IRDMA_CQP_OP_QUERY_RDMA_FEATURES 0x12
+#define IRDMA_CQP_OP_UPLOAD_CONTEXT 0x13
+#define IRDMA_CQP_OP_ALLOCATE_LOC_MAC_TABLE_ENTRY 0x14
+#define IRDMA_CQP_OP_UPLOAD_CONTEXT 0x13
+#define IRDMA_CQP_OP_MANAGE_HMC_PM_FUNC_TABLE 0x15
+#define IRDMA_CQP_OP_CREATE_CEQ 0x16
+#define IRDMA_CQP_OP_DESTROY_CEQ 0x18
+#define IRDMA_CQP_OP_CREATE_AEQ 0x19
+#define IRDMA_CQP_OP_DESTROY_AEQ 0x1b
+#define IRDMA_CQP_OP_CREATE_ADDR_HANDLE 0x1c
+#define IRDMA_CQP_OP_MODIFY_ADDR_HANDLE 0x1d
+#define IRDMA_CQP_OP_DESTROY_ADDR_HANDLE 0x1e
+#define IRDMA_CQP_OP_UPDATE_PE_SDS 0x1f
+#define IRDMA_CQP_OP_QUERY_FPM_VAL 0x20
+#define IRDMA_CQP_OP_COMMIT_FPM_VAL 0x21
+#define IRDMA_CQP_OP_FLUSH_WQES 0x22
+/* IRDMA_CQP_OP_GEN_AE is the same value as IRDMA_CQP_OP_FLUSH_WQES */
+#define IRDMA_CQP_OP_GEN_AE 0x22
+#define IRDMA_CQP_OP_MANAGE_APBVT 0x23
+#define IRDMA_CQP_OP_NOP 0x24
+#define IRDMA_CQP_OP_MANAGE_QUAD_HASH_TABLE_ENTRY 0x25
+#define IRDMA_CQP_OP_CREATE_MCAST_GRP 0x26
+#define IRDMA_CQP_OP_MODIFY_MCAST_GRP 0x27
+#define IRDMA_CQP_OP_DESTROY_MCAST_GRP 0x28
+#define IRDMA_CQP_OP_SUSPEND_QP 0x29
+#define IRDMA_CQP_OP_RESUME_QP 0x2a
+#define IRDMA_CQP_OP_SHMC_PAGES_ALLOCATED 0x2b
+#define IRDMA_CQP_OP_WORK_SCHED_NODE 0x2c
+#define IRDMA_CQP_OP_MANAGE_STATS 0x2d
+#define IRDMA_CQP_OP_GATHER_STATS 0x2e
+#define IRDMA_CQP_OP_UP_MAP 0x2f
+
+#define FLD_LS_64(dev, val, field) \
+ (((u64)(val) << (dev)->hw_shifts[field ## _S]) & (dev)->hw_masks[field ## _M])
+#define FLD_RS_64(dev, val, field) \
+ ((u64)((val) & (dev)->hw_masks[field ## _M]) >> (dev)->hw_shifts[field ## _S])
+#define FLD_LS_32(dev, val, field) \
+ (((val) << (dev)->hw_shifts[field ## _S]) & (dev)->hw_masks[field ## _M])
+#define FLD_RS_32(dev, val, field) \
+ ((u64)((val) & (dev)->hw_masks[field ## _M]) >> (dev)->hw_shifts[field ## _S])
+
+#define IRDMA_MAX_STATS_24 0xffffffULL
+#define IRDMA_MAX_STATS_32 0xffffffffULL
+#define IRDMA_MAX_STATS_48 0xffffffffffffULL
+#define IRDMA_MAX_STATS_56 0xffffffffffffffULL
+#define IRDMA_MAX_STATS_64 0xffffffffffffffffULL
+
+#define IRDMA_MAX_CQ_READ_THRESH 0x3FFFF
+#define IRDMA_CQPSQ_QHASH_VLANID GENMASK_ULL(43, 32)
+#define IRDMA_CQPSQ_QHASH_QPN GENMASK_ULL(49, 32)
+#define IRDMA_CQPSQ_QHASH_QS_HANDLE GENMASK_ULL(9, 0)
+#define IRDMA_CQPSQ_QHASH_SRC_PORT GENMASK_ULL(31, 16)
+#define IRDMA_CQPSQ_QHASH_DEST_PORT GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_QHASH_ADDR0 GENMASK_ULL(63, 32)
+#define IRDMA_CQPSQ_QHASH_ADDR1 GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_QHASH_ADDR2 GENMASK_ULL(63, 32)
+#define IRDMA_CQPSQ_QHASH_ADDR3 GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_QHASH_WQEVALID BIT_ULL(63)
+#define IRDMA_CQPSQ_QHASH_OPCODE GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_QHASH_MANAGE GENMASK_ULL(62, 61)
+#define IRDMA_CQPSQ_QHASH_IPV4VALID BIT_ULL(60)
+#define IRDMA_CQPSQ_QHASH_VLANVALID BIT_ULL(59)
+#define IRDMA_CQPSQ_QHASH_ENTRYTYPE GENMASK_ULL(44, 42)
+#define IRDMA_CQPSQ_STATS_WQEVALID BIT_ULL(63)
+#define IRDMA_CQPSQ_STATS_ALLOC_INST BIT_ULL(62)
+#define IRDMA_CQPSQ_STATS_USE_HMC_FCN_INDEX BIT_ULL(60)
+#define IRDMA_CQPSQ_STATS_USE_INST BIT_ULL(61)
+#define IRDMA_CQPSQ_STATS_OP GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_STATS_INST_INDEX GENMASK_ULL(6, 0)
+#define IRDMA_CQPSQ_STATS_HMC_FCN_INDEX GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_WS_WQEVALID BIT_ULL(63)
+#define IRDMA_CQPSQ_WS_NODEOP GENMASK_ULL(55, 52)
+#define IRDMA_SD_MAX GENMASK_ULL(15, 0)
+#define IRDMA_MEM_MAX GENMASK_ULL(15, 0)
+#define IRDMA_QP_MEM_LOC GENMASK_ULL(47, 44)
+#define IRDMA_MR_MEM_LOC GENMASK_ULL(27, 24)
+
+#define IRDMA_CQPSQ_WS_ENABLENODE BIT_ULL(62)
+#define IRDMA_CQPSQ_WS_NODETYPE BIT_ULL(61)
+#define IRDMA_CQPSQ_WS_PRIOTYPE GENMASK_ULL(60, 59)
+#define IRDMA_CQPSQ_WS_TC GENMASK_ULL(58, 56)
+#define IRDMA_CQPSQ_WS_VMVFTYPE GENMASK_ULL(55, 54)
+#define IRDMA_CQPSQ_WS_VMVFNUM GENMASK_ULL(51, 42)
+#define IRDMA_CQPSQ_WS_OP GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_WS_PARENTID GENMASK_ULL(29, 16)
+#define IRDMA_CQPSQ_WS_NODEID GENMASK_ULL(13, 0)
+#define IRDMA_CQPSQ_WS_VSI GENMASK_ULL(63, 48)
+#define IRDMA_CQPSQ_WS_WEIGHT GENMASK_ULL(38, 32)
+
+#define IRDMA_CQPSQ_UP_WQEVALID BIT_ULL(63)
+#define IRDMA_CQPSQ_UP_USEVLAN BIT_ULL(62)
+#define IRDMA_CQPSQ_UP_USEOVERRIDE BIT_ULL(61)
+#define IRDMA_CQPSQ_UP_OP GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_UP_HMCFCNIDX GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_UP_CNPOVERRIDE GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_WQEVALID BIT_ULL(63)
+#define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_BUF_LEN GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_OP GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_HW_MODEL_USED GENMASK_ULL(47, 32)
+#define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_HW_MAJOR_VERSION GENMASK_ULL(23, 16)
+#define IRDMA_CQPSQ_QUERY_RDMA_FEATURES_HW_MINOR_VERSION GENMASK_ULL(7, 0)
+#define IRDMA_CQPHC_SQSIZE GENMASK_ULL(11, 8)
+#define IRDMA_CQPHC_DISABLE_PFPDUS BIT_ULL(1)
+#define IRDMA_CQPHC_ROCEV2_RTO_POLICY BIT_ULL(2)
+#define IRDMA_CQPHC_PROTOCOL_USED GENMASK_ULL(4, 3)
+#define IRDMA_CQPHC_MIN_RATE GENMASK_ULL(51, 48)
+#define IRDMA_CQPHC_MIN_DEC_FACTOR GENMASK_ULL(59, 56)
+#define IRDMA_CQPHC_DCQCN_T GENMASK_ULL(15, 0)
+#define IRDMA_CQPHC_HAI_FACTOR GENMASK_ULL(47, 32)
+#define IRDMA_CQPHC_RAI_FACTOR GENMASK_ULL(63, 48)
+#define IRDMA_CQPHC_DCQCN_B GENMASK_ULL(24, 0)
+#define IRDMA_CQPHC_DCQCN_F GENMASK_ULL(27, 25)
+#define IRDMA_CQPHC_CC_CFG_VALID BIT_ULL(31)
+#define IRDMA_CQPHC_RREDUCE_MPERIOD GENMASK_ULL(63, 32)
+#define IRDMA_CQPHC_HW_MINVER GENMASK_ULL(15, 0)
+
+#define IRDMA_CQPHC_HW_MAJVER_GEN_1 0
+#define IRDMA_CQPHC_HW_MAJVER_GEN_2 1
+#define IRDMA_CQPHC_HW_MAJVER_GEN_3 2
+#define IRDMA_CQPHC_HW_MAJVER GENMASK_ULL(31, 16)
+#define IRDMA_CQPHC_CEQPERVF GENMASK_ULL(39, 32)
+
+#define IRDMA_CQPHC_ENABLED_VFS GENMASK_ULL(37, 32)
+
+#define IRDMA_CQPHC_HMC_PROFILE GENMASK_ULL(2, 0)
+#define IRDMA_CQPHC_SVER GENMASK_ULL(31, 24)
+#define IRDMA_CQPHC_SQBASE GENMASK_ULL(63, 9)
+
+#define IRDMA_CQPHC_TIMESTAMP_OVERRIDE BIT_ULL(5)
+#define IRDMA_CQPHC_TS_SHIFT GENMASK_ULL(12, 8)
+#define IRDMA_CQPHC_EN_FINE_GRAINED_TIMERS BIT_ULL(0)
+
+#define IRDMA_CQPHC_OOISC_BLKSIZE GENMASK_ULL(63, 60)
+#define IRDMA_CQPHC_RRSP_BLKSIZE GENMASK_ULL(59, 56)
+#define IRDMA_CQPHC_Q1_BLKSIZE GENMASK_ULL(55, 52)
+#define IRDMA_CQPHC_XMIT_BLKSIZE GENMASK_ULL(51, 48)
+#define IRDMA_CQPHC_BLKSIZES_VALID BIT_ULL(4)
+
+#define IRDMA_CQPHC_QPCTX GENMASK_ULL(63, 0)
+#define IRDMA_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0)
+#define IRDMA_CQ_DBSA_CQEIDX GENMASK_ULL(19, 0)
+#define IRDMA_CQ_DBSA_SW_CQ_SELECT GENMASK_ULL(13, 0)
+#define IRDMA_CQ_DBSA_ARM_NEXT BIT_ULL(14)
+#define IRDMA_CQ_DBSA_ARM_NEXT_SE BIT_ULL(15)
+#define IRDMA_CQ_DBSA_ARM_SEQ_NUM GENMASK_ULL(17, 16)
+
+/* CQP and iWARP Completion Queue */
+#define IRDMA_CQ_QPCTX IRDMA_CQPHC_QPCTX
+
+#define IRDMA_CCQ_OPRETVAL GENMASK_ULL(31, 0)
+
+#define IRDMA_CCQ_DEFINFO GENMASK_ULL(63, 32)
+
+#define IRDMA_CQ_MINERR GENMASK_ULL(15, 0)
+#define IRDMA_CQ_MAJERR GENMASK_ULL(31, 16)
+#define IRDMA_CQ_WQEIDX GENMASK_ULL(46, 32)
+#define IRDMA_CQ_EXTCQE BIT_ULL(50)
+#define IRDMA_OOO_CMPL BIT_ULL(54)
+#define IRDMA_CQ_ERROR BIT_ULL(55)
+#define IRDMA_CQ_SQ BIT_ULL(62)
+
+#define IRDMA_CQ_SRQ BIT_ULL(52)
+#define IRDMA_CQ_VALID BIT_ULL(63)
+#define IRDMA_CQ_IMMVALID BIT_ULL(62)
+#define IRDMA_CQ_UDSMACVALID BIT_ULL(61)
+#define IRDMA_CQ_UDVLANVALID BIT_ULL(60)
+#define IRDMA_CQ_UDSMAC GENMASK_ULL(47, 0)
+#define IRDMA_CQ_UDVLAN GENMASK_ULL(63, 48)
+
+#define IRDMA_CQ_IMMDATALOW32 GENMASK_ULL(31, 0)
+#define IRDMA_CQ_IMMDATAUP32 GENMASK_ULL(63, 32)
+#define IRDMACQ_PAYLDLEN GENMASK_ULL(31, 0)
+#define IRDMACQ_TCPSEQNUMRTT GENMASK_ULL(63, 32)
+#define IRDMACQ_INVSTAG GENMASK_ULL(31, 0)
+#define IRDMACQ_QPID GENMASK_ULL(55, 32)
+
+#define IRDMACQ_UDSRCQPN GENMASK_ULL(31, 0)
+#define IRDMACQ_PSHDROP BIT_ULL(51)
+#define IRDMACQ_STAG BIT_ULL(53)
+#define IRDMACQ_IPV4 BIT_ULL(53)
+#define IRDMACQ_SOEVENT BIT_ULL(54)
+#define IRDMACQ_OP GENMASK_ULL(61, 56)
+
+#define IRDMA_CEQE_CQCTX GENMASK_ULL(62, 0)
+#define IRDMA_CEQE_VALID BIT_ULL(63)
+
+/* AEQE format */
+#define IRDMA_AEQE_COMPCTX IRDMA_CQPHC_QPCTX
+#define IRDMA_AEQE_QPCQID_LOW GENMASK_ULL(17, 0)
+#define IRDMA_AEQE_QPCQID_HI BIT_ULL(46)
+#define IRDMA_AEQE_WQDESCIDX GENMASK_ULL(32, 18)
+#define IRDMA_AEQE_OVERFLOW BIT_ULL(33)
+#define IRDMA_AEQE_AECODE GENMASK_ULL(45, 34)
+#define IRDMA_AEQE_AESRC GENMASK_ULL(53, 50)
+#define IRDMA_AEQE_IWSTATE GENMASK_ULL(56, 54)
+#define IRDMA_AEQE_TCPSTATE GENMASK_ULL(60, 57)
+#define IRDMA_AEQE_Q2DATA GENMASK_ULL(62, 61)
+#define IRDMA_AEQE_VALID BIT_ULL(63)
+
+#define IRDMA_AEQE_Q2DATA_GEN_3 GENMASK_ULL(5, 4)
+#define IRDMA_AEQE_TCPSTATE_GEN_3 GENMASK_ULL(3, 0)
+#define IRDMA_AEQE_QPCQID_GEN_3 GENMASK_ULL(24, 0)
+#define IRDMA_AEQE_AECODE_GEN_3 GENMASK_ULL(61, 50)
+#define IRDMA_AEQE_OVERFLOW_GEN_3 BIT_ULL(62)
+#define IRDMA_AEQE_WQDESCIDX_GEN_3 GENMASK_ULL(49, 32)
+#define IRDMA_AEQE_IWSTATE_GEN_3 GENMASK_ULL(31, 29)
+#define IRDMA_AEQE_AESRC_GEN_3 GENMASK_ULL(28, 25)
+#define IRDMA_AEQE_CMPL_CTXT_S 6
+#define IRDMA_AEQE_CMPL_CTXT GENMASK_ULL(63, 6)
+
+#define IRDMA_UDA_QPSQ_NEXT_HDR GENMASK_ULL(23, 16)
+#define IRDMA_UDA_QPSQ_OPCODE GENMASK_ULL(37, 32)
+#define IRDMA_UDA_QPSQ_L4LEN GENMASK_ULL(45, 42)
+#define IRDMA_GEN1_UDA_QPSQ_L4LEN GENMASK_ULL(27, 24)
+#define IRDMA_UDA_QPSQ_AHIDX GENMASK_ULL(16, 0)
+#define IRDMA_UDA_QPSQ_VALID BIT_ULL(63)
+#define IRDMA_UDA_QPSQ_SIGCOMPL BIT_ULL(62)
+#define IRDMA_UDA_QPSQ_MACLEN GENMASK_ULL(62, 56)
+#define IRDMA_UDA_QPSQ_IPLEN GENMASK_ULL(54, 48)
+#define IRDMA_UDA_QPSQ_L4T GENMASK_ULL(31, 30)
+#define IRDMA_UDA_QPSQ_IIPT GENMASK_ULL(29, 28)
+#define IRDMA_UDA_PAYLOADLEN GENMASK_ULL(13, 0)
+#define IRDMA_UDA_HDRLEN GENMASK_ULL(24, 16)
+#define IRDMA_VLAN_TAG_VALID BIT_ULL(50)
+#define IRDMA_UDA_L3PROTO GENMASK_ULL(1, 0)
+#define IRDMA_UDA_L4PROTO GENMASK_ULL(17, 16)
+#define IRDMA_UDA_QPSQ_DOLOOPBACK BIT_ULL(44)
+#define IRDMA_CQPSQ_BUFSIZE GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_OPCODE GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_WQEVALID BIT_ULL(63)
+#define IRDMA_CQPSQ_TPHVAL GENMASK_ULL(7, 0)
+
+#define IRDMA_CQPSQ_VSIIDX GENMASK_ULL(23, 8)
+#define IRDMA_CQPSQ_TPHEN BIT_ULL(60)
+
+#define IRDMA_CQPSQ_PBUFADDR IRDMA_CQPHC_QPCTX
+
+#define IRDMA_CQPSQ_PASID GENMASK_ULL(51, 32)
+#define IRDMA_CQPSQ_PASID_VALID BIT_ULL(62)
+
+/* Create/Modify/Destroy QP */
+
+#define IRDMA_CQPSQ_QP_NEWMSS GENMASK_ULL(45, 32)
+#define IRDMA_CQPSQ_QP_TERMLEN GENMASK_ULL(51, 48)
+
+#define IRDMA_CQPSQ_QP_QPCTX IRDMA_CQPHC_QPCTX
+
+#define IRDMA_CQPSQ_QP_QPID_S 0
+#define IRDMA_CQPSQ_QP_QPID_M (0xFFFFFFUL)
+
+#define IRDMA_CQPSQ_QP_OP_S 32
+#define IRDMA_CQPSQ_QP_OP_M IRDMACQ_OP_M
+#define IRDMA_CQPSQ_QP_ORDVALID BIT_ULL(42)
+#define IRDMA_CQPSQ_QP_TOECTXVALID BIT_ULL(43)
+#define IRDMA_CQPSQ_QP_CACHEDVARVALID BIT_ULL(44)
+#define IRDMA_CQPSQ_QP_VQ BIT_ULL(45)
+#define IRDMA_CQPSQ_QP_FORCELOOPBACK BIT_ULL(46)
+#define IRDMA_CQPSQ_QP_CQNUMVALID BIT_ULL(47)
+#define IRDMA_CQPSQ_QP_QPTYPE GENMASK_ULL(50, 48)
+#define IRDMA_CQPSQ_QP_MACVALID BIT_ULL(51)
+#define IRDMA_CQPSQ_QP_MSSCHANGE BIT_ULL(52)
+
+#define IRDMA_CQPSQ_QP_IGNOREMWBOUND BIT_ULL(54)
+#define IRDMA_CQPSQ_QP_REMOVEHASHENTRY BIT_ULL(55)
+#define IRDMA_CQPSQ_QP_TERMACT GENMASK_ULL(57, 56)
+#define IRDMA_CQPSQ_QP_RESETCON BIT_ULL(58)
+#define IRDMA_CQPSQ_QP_ARPTABIDXVALID BIT_ULL(59)
+#define IRDMA_CQPSQ_QP_NEXTIWSTATE GENMASK_ULL(62, 60)
+
+#define IRDMA_CQPSQ_QP_DBSHADOWADDR IRDMA_CQPHC_QPCTX
+
+#define IRDMA_CQPSQ_SRQ_RQSIZE GENMASK_ULL(3, 0)
+#define IRDMA_CQPSQ_SRQ_RQ_WQE_SIZE GENMASK_ULL(5, 4)
+#define IRDMA_CQPSQ_SRQ_SRQ_LIMIT GENMASK_ULL(43, 32)
+#define IRDMA_CQPSQ_SRQ_SRQCTX GENMASK_ULL(63, 6)
+#define IRDMA_CQPSQ_SRQ_PD_ID GENMASK_ULL(39, 16)
+#define IRDMA_CQPSQ_SRQ_SRQ_ID GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_SRQ_OP GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_SRQ_LEAF_PBL_SIZE GENMASK_ULL(45, 44)
+#define IRDMA_CQPSQ_SRQ_VIRTMAP BIT_ULL(47)
+#define IRDMA_CQPSQ_SRQ_TPH_EN BIT_ULL(60)
+#define IRDMA_CQPSQ_SRQ_ARM_LIMIT_EVENT BIT_ULL(61)
+#define IRDMA_CQPSQ_SRQ_FIRST_PM_PBL_IDX GENMASK_ULL(27, 0)
+#define IRDMA_CQPSQ_SRQ_TPH_VALUE GENMASK_ULL(7, 0)
+#define IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR_S 8
+#define IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR GENMASK_ULL(63, 8)
+#define IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR_S 6
+#define IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR GENMASK_ULL(63, 6)
+
+#define IRDMA_CQPSQ_CQ_CQSIZE GENMASK_ULL(20, 0)
+#define IRDMA_CQPSQ_CQ_CQCTX GENMASK_ULL(62, 0)
+#define IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD GENMASK(17, 0)
+
+#define IRDMA_CQPSQ_CQ_CQID_HIGH GENMASK_ULL(52, 50)
+#define IRDMA_CQPSQ_CQ_CEQID_HIGH GENMASK_ULL(59, 54)
+#define IRDMA_CQPSQ_CQ_OP GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_CQ_CQRESIZE BIT_ULL(43)
+#define IRDMA_CQPSQ_CQ_LPBLSIZE GENMASK_ULL(45, 44)
+#define IRDMA_CQPSQ_CQ_CHKOVERFLOW BIT_ULL(46)
+#define IRDMA_CQPSQ_CQ_VIRTMAP BIT_ULL(47)
+#define IRDMA_CQPSQ_CQ_ENCEQEMASK BIT_ULL(48)
+#define IRDMA_CQPSQ_CQ_CEQIDVALID BIT_ULL(49)
+#define IRDMA_CQPSQ_CQ_AVOIDMEMCNFLCT BIT_ULL(61)
+#define IRDMA_CQPSQ_CQ_FIRSTPMPBLIDX GENMASK_ULL(27, 0)
+
+/* Allocate/Register/Register Shared/Deallocate Stag */
+#define IRDMA_CQPSQ_STAG_VA_FBO IRDMA_CQPHC_QPCTX
+#define IRDMA_CQPSQ_STAG_STAGLEN GENMASK_ULL(45, 0)
+#define IRDMA_CQPSQ_STAG_KEY GENMASK_ULL(7, 0)
+#define IRDMA_CQPSQ_STAG_IDX GENMASK_ULL(31, 8)
+#define IRDMA_CQPSQ_STAG_IDX_S 8
+#define IRDMA_CQPSQ_STAG_PARENTSTAGIDX GENMASK_ULL(55, 32)
+#define IRDMA_CQPSQ_STAG_MR BIT_ULL(43)
+#define IRDMA_CQPSQ_STAG_MWTYPE BIT_ULL(42)
+#define IRDMA_CQPSQ_STAG_MW1_BIND_DONT_VLDT_KEY BIT_ULL(58)
+#define IRDMA_CQPSQ_STAG_PDID_HI GENMASK_ULL(59, 54)
+
+#define IRDMA_CQPSQ_STAG_LPBLSIZE IRDMA_CQPSQ_CQ_LPBLSIZE
+#define IRDMA_CQPSQ_STAG_HPAGESIZE GENMASK_ULL(47, 46)
+#define IRDMA_CQPSQ_STAG_ARIGHTS GENMASK_ULL(52, 48)
+#define IRDMA_CQPSQ_STAG_REMACCENABLED BIT_ULL(53)
+#define IRDMA_CQPSQ_STAG_VABASEDTO BIT_ULL(59)
+#define IRDMA_CQPSQ_STAG_USEHMCFNIDX BIT_ULL(60)
+#define IRDMA_CQPSQ_STAG_USEPFRID BIT_ULL(61)
+
+#define IRDMA_CQPSQ_STAG_PBA IRDMA_CQPHC_QPCTX
+#define IRDMA_CQPSQ_STAG_HMCFNIDX GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN BIT_ULL(61)
+
+#define IRDMA_CQPSQ_STAG_FIRSTPMPBLIDX GENMASK_ULL(27, 0)
+#define IRDMA_CQPSQ_QUERYSTAG_IDX IRDMA_CQPSQ_STAG_IDX
+#define IRDMA_CQPSQ_MLM_TABLEIDX GENMASK_ULL(5, 0)
+#define IRDMA_CQPSQ_MLM_FREEENTRY BIT_ULL(62)
+#define IRDMA_CQPSQ_MLM_IGNORE_REF_CNT BIT_ULL(61)
+#define IRDMA_CQPSQ_MLM_MAC0 GENMASK_ULL(7, 0)
+#define IRDMA_CQPSQ_MLM_MAC1 GENMASK_ULL(15, 8)
+#define IRDMA_CQPSQ_MLM_MAC2 GENMASK_ULL(23, 16)
+#define IRDMA_CQPSQ_MLM_MAC3 GENMASK_ULL(31, 24)
+#define IRDMA_CQPSQ_MLM_MAC4 GENMASK_ULL(39, 32)
+#define IRDMA_CQPSQ_MLM_MAC5 GENMASK_ULL(47, 40)
+#define IRDMA_CQPSQ_MAT_REACHMAX GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_MAT_MACADDR GENMASK_ULL(47, 0)
+#define IRDMA_CQPSQ_MAT_ARPENTRYIDX GENMASK_ULL(11, 0)
+#define IRDMA_CQPSQ_MAT_ENTRYVALID BIT_ULL(42)
+#define IRDMA_CQPSQ_MAT_PERMANENT BIT_ULL(43)
+#define IRDMA_CQPSQ_MAT_QUERY BIT_ULL(44)
+#define IRDMA_CQPSQ_MVPBP_PD_ENTRY_CNT GENMASK_ULL(9, 0)
+#define IRDMA_CQPSQ_MVPBP_FIRST_PD_INX GENMASK_ULL(24, 16)
+#define IRDMA_CQPSQ_MVPBP_SD_INX GENMASK_ULL(43, 32)
+#define IRDMA_CQPSQ_MVPBP_INV_PD_ENT BIT_ULL(62)
+#define IRDMA_CQPSQ_MVPBP_PD_PLPBA GENMASK_ULL(63, 3)
+
+/* Manage Push Page - MPP */
+#define IRDMA_INVALID_PUSH_PAGE_INDEX_GEN_1 0xffff
+#define IRDMA_INVALID_PUSH_PAGE_INDEX 0xffffffff
+#define IRDMA_CQPSQ_MPP_PPIDX GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_MPP_PPTYPE GENMASK_ULL(61, 60)
+#define IRDMA_CQPSQ_MPP_FREE_PAGE BIT_ULL(62)
+
+/* Upload Context - UCTX */
+#define IRDMA_CQPSQ_UCTX_QPCTXADDR IRDMA_CQPHC_QPCTX
+#define IRDMA_CQPSQ_UCTX_QPID GENMASK_ULL(23, 0)
+#define IRDMA_CQPSQ_UCTX_QPTYPE GENMASK_ULL(51, 48)
+
+#define IRDMA_CQPSQ_UCTX_RAWFORMAT BIT_ULL(61)
+#define IRDMA_CQPSQ_UCTX_FREEZEQP BIT_ULL(62)
+
+#define IRDMA_CQPSQ_MHMC_VFIDX GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_MHMC_FREEPMFN BIT_ULL(62)
+
+#define IRDMA_CQPSQ_SHMCRP_HMC_PROFILE GENMASK_ULL(2, 0)
+#define IRDMA_CQPSQ_SHMCRP_VFNUM GENMASK_ULL(37, 32)
+#define IRDMA_CQPSQ_CEQ_CEQSIZE GENMASK_ULL(21, 0)
+#define IRDMA_CQPSQ_CEQ_CEQID GENMASK_ULL(9, 0)
+
+#define IRDMA_CQPSQ_CEQ_CEQID_HIGH GENMASK_ULL(15, 10)
+
+#define IRDMA_CQPSQ_CEQ_LPBLSIZE IRDMA_CQPSQ_CQ_LPBLSIZE
+#define IRDMA_CQPSQ_CEQ_VMAP BIT_ULL(47)
+#define IRDMA_CQPSQ_CEQ_ITRNOEXPIRE BIT_ULL(46)
+#define IRDMA_CQPSQ_CEQ_FIRSTPMPBLIDX GENMASK_ULL(27, 0)
+#define IRDMA_CQPSQ_AEQ_AEQECNT GENMASK_ULL(18, 0)
+#define IRDMA_CQPSQ_AEQ_LPBLSIZE IRDMA_CQPSQ_CQ_LPBLSIZE
+#define IRDMA_CQPSQ_AEQ_VMAP BIT_ULL(47)
+#define IRDMA_CQPSQ_AEQ_FIRSTPMPBLIDX GENMASK_ULL(27, 0)
+
+#define IRDMA_COMMIT_FPM_QPCNT GENMASK_ULL(20, 0)
+#define IRDMA_COMMIT_FPM_BASE_S 32
+#define IRDMA_CQPSQ_CFPM_HMCFNID GENMASK_ULL(15, 0)
+
+#define IRDMA_CQPSQ_FWQE_AECODE GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_FWQE_AESOURCE GENMASK_ULL(19, 16)
+#define IRDMA_CQPSQ_FWQE_RQMNERR GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_FWQE_RQMJERR GENMASK_ULL(31, 16)
+#define IRDMA_CQPSQ_FWQE_SQMNERR GENMASK_ULL(47, 32)
+#define IRDMA_CQPSQ_FWQE_SQMJERR GENMASK_ULL(63, 48)
+#define IRDMA_CQPSQ_FWQE_QPID GENMASK_ULL(23, 0)
+#define IRDMA_CQPSQ_FWQE_GENERATE_AE BIT_ULL(59)
+#define IRDMA_CQPSQ_FWQE_USERFLCODE BIT_ULL(60)
+#define IRDMA_CQPSQ_FWQE_FLUSHSQ BIT_ULL(61)
+#define IRDMA_CQPSQ_FWQE_FLUSHRQ BIT_ULL(62)
+#define IRDMA_CQPSQ_FWQE_ERR_SQ_IDX_VALID BIT_ULL(42)
+#define IRDMA_CQPSQ_FWQE_ERR_SQ_IDX GENMASK_ULL(49, 32)
+#define IRDMA_CQPSQ_FWQE_ERR_RQ_IDX_VALID BIT_ULL(43)
+#define IRDMA_CQPSQ_FWQE_ERR_RQ_IDX GENMASK_ULL(46, 32)
+#define IRDMA_CQPSQ_MAPT_PORT GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_MAPT_ADDPORT BIT_ULL(62)
+#define IRDMA_CQPSQ_UPESD_SDCMD GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_UPESD_SDDATALOW GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_UPESD_SDDATAHI GENMASK_ULL(63, 32)
+#define IRDMA_CQPSQ_UPESD_HMCFNID GENMASK_ULL(5, 0)
+#define IRDMA_CQPSQ_UPESD_ENTRY_VALID BIT_ULL(63)
+
+#define IRDMA_CQPSQ_UPESD_BM_PF 0
+#define IRDMA_CQPSQ_UPESD_BM_CP_LM 1
+#define IRDMA_CQPSQ_UPESD_BM_AXF 2
+#define IRDMA_CQPSQ_UPESD_BM_LM 4
+#define IRDMA_CQPSQ_UPESD_BM GENMASK_ULL(34, 32)
+#define IRDMA_CQPSQ_UPESD_ENTRY_COUNT GENMASK_ULL(3, 0)
+#define IRDMA_CQPSQ_UPESD_SKIP_ENTRY BIT_ULL(7)
+#define IRDMA_CQPSQ_SUSPENDQP_QPID GENMASK_ULL(23, 0)
+#define IRDMA_CQPSQ_RESUMEQP_QSHANDLE GENMASK_ULL(31, 0)
+#define IRDMA_CQPSQ_RESUMEQP_QPID GENMASK(23, 0)
+#define IRDMA_MANAGE_RSRC_VER2 BIT_ULL(2)
+
+#define IRDMA_CQPSQ_MIN_STAG_INVALID 0x0001
+#define IRDMA_CQPSQ_MIN_SUSPEND_PND 0x0005
+#define IRDMA_CQPSQ_MIN_DEF_CMPL 0x0006
+#define IRDMA_CQPSQ_MIN_OOO_CMPL 0x0007
+
+#define IRDMA_CQPSQ_MAJ_NO_ERROR 0x0000
+#define IRDMA_CQPSQ_MAJ_OBJCACHE_ERROR 0xF000
+#define IRDMA_CQPSQ_MAJ_CNTXTCACHE_ERROR 0xF001
+#define IRDMA_CQPSQ_MAJ_ERROR 0xFFFF
+#define IRDMAQPC_DDP_VER GENMASK_ULL(1, 0)
+#define IRDMAQPC_IBRDENABLE BIT_ULL(2)
+#define IRDMAQPC_IPV4 BIT_ULL(3)
+#define IRDMAQPC_NONAGLE BIT_ULL(4)
+#define IRDMAQPC_INSERTVLANTAG BIT_ULL(5)
+#define IRDMAQPC_ISQP1 BIT_ULL(6)
+#define IRDMAQPC_TIMESTAMP BIT_ULL(7)
+#define IRDMAQPC_RQWQESIZE GENMASK_ULL(9, 8)
+#define IRDMAQPC_INSERTL2TAG2 BIT_ULL(11)
+#define IRDMAQPC_LIMIT GENMASK_ULL(13, 12)
+
+#define IRDMAQPC_USE_SRQ BIT_ULL(10)
+#define IRDMAQPC_SRQ_ID GENMASK_ULL(15, 0)
+#define IRDMAQPC_PASID GENMASK_ULL(19, 0)
+#define IRDMAQPC_PASID_VALID BIT_ULL(11)
+
+#define IRDMAQPC_ECN_EN BIT_ULL(14)
+#define IRDMAQPC_DROPOOOSEG BIT_ULL(15)
+#define IRDMAQPC_DUPACK_THRESH GENMASK_ULL(18, 16)
+#define IRDMAQPC_ERR_RQ_IDX_VALID BIT_ULL(19)
+#define IRDMAQPC_DIS_VLAN_CHECKS GENMASK_ULL(21, 19)
+#define IRDMAQPC_DC_TCP_EN BIT_ULL(25)
+#define IRDMAQPC_RCVTPHEN BIT_ULL(28)
+#define IRDMAQPC_XMITTPHEN BIT_ULL(29)
+#define IRDMAQPC_RQTPHEN BIT_ULL(30)
+#define IRDMAQPC_SQTPHEN BIT_ULL(31)
+#define IRDMAQPC_PPIDX GENMASK_ULL(41, 32)
+#define IRDMAQPC_PMENA BIT_ULL(47)
+#define IRDMAQPC_RDMAP_VER GENMASK_ULL(63, 62)
+#define IRDMAQPC_ROCE_TVER GENMASK_ULL(63, 60)
+
+#define IRDMAQPC_SQADDR IRDMA_CQPHC_QPCTX
+#define IRDMAQPC_RQADDR IRDMA_CQPHC_QPCTX
+#define IRDMAQPC_TTL GENMASK_ULL(7, 0)
+#define IRDMAQPC_RQSIZE GENMASK_ULL(11, 8)
+#define IRDMAQPC_SQSIZE GENMASK_ULL(15, 12)
+#define IRDMAQPC_GEN1_SRCMACADDRIDX GENMASK(21, 16)
+#define IRDMAQPC_AVOIDSTRETCHACK BIT_ULL(23)
+#define IRDMAQPC_TOS GENMASK_ULL(31, 24)
+#define IRDMAQPC_SRCPORTNUM GENMASK_ULL(47, 32)
+#define IRDMAQPC_DESTPORTNUM GENMASK_ULL(63, 48)
+#define IRDMAQPC_DESTIPADDR0 GENMASK_ULL(63, 32)
+#define IRDMAQPC_DESTIPADDR1 GENMASK_ULL(31, 0)
+#define IRDMAQPC_DESTIPADDR2 GENMASK_ULL(63, 32)
+#define IRDMAQPC_DESTIPADDR3 GENMASK_ULL(31, 0)
+#define IRDMAQPC_SNDMSS GENMASK_ULL(29, 16)
+#define IRDMAQPC_SYN_RST_HANDLING GENMASK_ULL(31, 30)
+#define IRDMAQPC_VLANTAG GENMASK_ULL(47, 32)
+#define IRDMAQPC_ARPIDX GENMASK_ULL(63, 48)
+#define IRDMAQPC_FLOWLABEL GENMASK_ULL(19, 0)
+#define IRDMAQPC_WSCALE BIT_ULL(20)
+#define IRDMAQPC_KEEPALIVE BIT_ULL(21)
+#define IRDMAQPC_IGNORE_TCP_OPT BIT_ULL(22)
+#define IRDMAQPC_IGNORE_TCP_UNS_OPT BIT_ULL(23)
+#define IRDMAQPC_TCPSTATE GENMASK_ULL(31, 28)
+#define IRDMAQPC_RCVSCALE GENMASK_ULL(35, 32)
+#define IRDMAQPC_SNDSCALE GENMASK_ULL(43, 40)
+#define IRDMAQPC_PDIDX GENMASK_ULL(63, 48)
+#define IRDMAQPC_PDIDXHI GENMASK_ULL(21, 20)
+#define IRDMAQPC_PKEY GENMASK_ULL(47, 32)
+#define IRDMAQPC_ACKCREDITS GENMASK_ULL(24, 20)
+#define IRDMAQPC_QKEY GENMASK_ULL(63, 32)
+#define IRDMAQPC_DESTQP GENMASK_ULL(23, 0)
+#define IRDMAQPC_KALIVE_TIMER_MAX_PROBES GENMASK_ULL(23, 16)
+#define IRDMAQPC_KEEPALIVE_INTERVAL GENMASK_ULL(31, 24)
+#define IRDMAQPC_TIMESTAMP_RECENT GENMASK_ULL(31, 0)
+#define IRDMAQPC_TIMESTAMP_AGE GENMASK_ULL(63, 32)
+#define IRDMAQPC_SNDNXT GENMASK_ULL(31, 0)
+#define IRDMAQPC_ISN GENMASK_ULL(55, 32)
+#define IRDMAQPC_PSNNXT GENMASK_ULL(23, 0)
+#define IRDMAQPC_LSN GENMASK_ULL(55, 32)
+#define IRDMAQPC_SNDWND GENMASK_ULL(63, 32)
+#define IRDMAQPC_RCVNXT GENMASK_ULL(31, 0)
+#define IRDMAQPC_EPSN GENMASK_ULL(23, 0)
+#define IRDMAQPC_RCVWND GENMASK_ULL(63, 32)
+#define IRDMAQPC_SNDMAX GENMASK_ULL(31, 0)
+#define IRDMAQPC_SNDUNA GENMASK_ULL(63, 32)
+#define IRDMAQPC_PSNMAX GENMASK_ULL(23, 0)
+#define IRDMAQPC_PSNUNA GENMASK_ULL(55, 32)
+#define IRDMAQPC_SRTT GENMASK_ULL(31, 0)
+#define IRDMAQPC_RTTVAR GENMASK_ULL(63, 32)
+#define IRDMAQPC_SSTHRESH GENMASK_ULL(31, 0)
+#define IRDMAQPC_CWND GENMASK_ULL(63, 32)
+#define IRDMAQPC_CWNDROCE GENMASK_ULL(55, 32)
+#define IRDMAQPC_SNDWL1 GENMASK_ULL(31, 0)
+#define IRDMAQPC_SNDWL2 GENMASK_ULL(63, 32)
+#define IRDMAQPC_MINRNR_TIMER GENMASK_ULL(4, 0)
+#define IRDMAQPC_ERR_RQ_IDX GENMASK_ULL(46, 32)
+#define IRDMAQPC_RTOMIN GENMASK_ULL(63, 57)
+#define IRDMAQPC_MAXSNDWND GENMASK_ULL(31, 0)
+#define IRDMAQPC_REXMIT_THRESH GENMASK_ULL(53, 48)
+#define IRDMAQPC_RNRNAK_THRESH GENMASK_ULL(56, 54)
+#define IRDMAQPC_TXCQNUM GENMASK_ULL(24, 0)
+#define IRDMAQPC_RXCQNUM GENMASK_ULL(56, 32)
+#define IRDMAQPC_STAT_INDEX GENMASK_ULL(6, 0)
+#define IRDMAQPC_Q2ADDR GENMASK_ULL(63, 8)
+#define IRDMAQPC_LASTBYTESENT GENMASK_ULL(7, 0)
+#define IRDMAQPC_MACADDRESS GENMASK_ULL(63, 16)
+#define IRDMAQPC_ORDSIZE GENMASK_ULL(7, 0)
+
+#define IRDMAQPC_LOCALACKTIMEOUT GENMASK_ULL(12, 8)
+#define IRDMAQPC_RNRNAK_TMR GENMASK_ULL(4, 0)
+#define IRDMAQPC_ORDSIZE_GEN3 GENMASK_ULL(10, 0)
+#define IRDMAQPC_REMOTE_ATOMIC_EN BIT_ULL(18)
+#define IRDMAQPC_STAT_INDEX_GEN3 GENMASK_ULL(47, 32)
+#define IRDMAQPC_PKT_LIMIT GENMASK_ULL(55, 48)
+
+#define IRDMAQPC_IRDSIZE GENMASK_ULL(18, 16)
+
+#define IRDMAQPC_IRDSIZE_GEN3 GENMASK_ULL(17, 14)
+
+#define IRDMAQPC_UDPRIVCQENABLE BIT_ULL(19)
+#define IRDMAQPC_WRRDRSPOK BIT_ULL(20)
+#define IRDMAQPC_RDOK BIT_ULL(21)
+#define IRDMAQPC_SNDMARKERS BIT_ULL(22)
+#define IRDMAQPC_DCQCNENABLE BIT_ULL(22)
+#define IRDMAQPC_FW_CC_ENABLE BIT_ULL(28)
+#define IRDMAQPC_RCVNOICRC BIT_ULL(31)
+#define IRDMAQPC_BINDEN BIT_ULL(23)
+#define IRDMAQPC_FASTREGEN BIT_ULL(24)
+#define IRDMAQPC_PRIVEN BIT_ULL(25)
+#define IRDMAQPC_TIMELYENABLE BIT_ULL(27)
+#define IRDMAQPC_THIGH GENMASK_ULL(63, 52)
+#define IRDMAQPC_TLOW GENMASK_ULL(39, 32)
+#define IRDMAQPC_REMENDPOINTIDX GENMASK_ULL(16, 0)
+#define IRDMAQPC_USESTATSINSTANCE BIT_ULL(26)
+#define IRDMAQPC_IWARPMODE BIT_ULL(28)
+#define IRDMAQPC_RCVMARKERS BIT_ULL(29)
+#define IRDMAQPC_ALIGNHDRS BIT_ULL(30)
+#define IRDMAQPC_RCVNOMPACRC BIT_ULL(31)
+#define IRDMAQPC_RCVMARKOFFSET GENMASK_ULL(40, 32)
+#define IRDMAQPC_SNDMARKOFFSET GENMASK_ULL(56, 48)
+
+#define IRDMAQPC_QPCOMPCTX IRDMA_CQPHC_QPCTX
+#define IRDMAQPC_SQTPHVAL GENMASK_ULL(7, 0)
+#define IRDMAQPC_RQTPHVAL GENMASK_ULL(15, 8)
+#define IRDMAQPC_QSHANDLE GENMASK_ULL(25, 16)
+#define IRDMAQPC_EXCEPTION_LAN_QUEUE GENMASK_ULL(43, 32)
+#define IRDMAQPC_LOCAL_IPADDR3 GENMASK_ULL(31, 0)
+#define IRDMAQPC_LOCAL_IPADDR2 GENMASK_ULL(63, 32)
+#define IRDMAQPC_LOCAL_IPADDR1 GENMASK_ULL(31, 0)
+#define IRDMAQPC_LOCAL_IPADDR0 GENMASK_ULL(63, 32)
+#define IRDMA_FW_VER_MINOR GENMASK_ULL(15, 0)
+#define IRDMA_FW_VER_MAJOR GENMASK_ULL(31, 16)
+#define IRDMA_FEATURE_INFO GENMASK_ULL(47, 0)
+#define IRDMA_FEATURE_CNT GENMASK_ULL(47, 32)
+#define IRDMA_FEATURE_TYPE GENMASK_ULL(63, 48)
+#define IRDMA_FEATURE_RSRC_MAX GENMASK_ULL(31, 0)
+
+#define IRDMAQPSQ_OPCODE GENMASK_ULL(37, 32)
+#define IRDMAQPSQ_COPY_HOST_PBL BIT_ULL(43)
+#define IRDMAQPSQ_ADDFRAGCNT GENMASK_ULL(41, 38)
+#define IRDMAQPSQ_PUSHWQE BIT_ULL(56)
+#define IRDMAQPSQ_STREAMMODE BIT_ULL(58)
+#define IRDMAQPSQ_WAITFORRCVPDU BIT_ULL(59)
+#define IRDMAQPSQ_READFENCE BIT_ULL(60)
+#define IRDMAQPSQ_LOCALFENCE BIT_ULL(61)
+#define IRDMAQPSQ_UDPHEADER BIT_ULL(61)
+#define IRDMAQPSQ_L4LEN GENMASK_ULL(45, 42)
+#define IRDMAQPSQ_SIGCOMPL BIT_ULL(62)
+#define IRDMAQPSQ_VALID BIT_ULL(63)
+
+#define IRDMAQPSQ_FRAG_TO IRDMA_CQPHC_QPCTX
+#define IRDMAQPSQ_FRAG_VALID BIT_ULL(63)
+#define IRDMAQPSQ_FRAG_LEN GENMASK_ULL(62, 32)
+#define IRDMAQPSQ_FRAG_STAG GENMASK_ULL(31, 0)
+#define IRDMAQPSQ_GEN1_FRAG_LEN GENMASK_ULL(31, 0)
+#define IRDMAQPSQ_GEN1_FRAG_STAG GENMASK_ULL(63, 32)
+#define IRDMAQPSQ_REMSTAGINV GENMASK_ULL(31, 0)
+#define IRDMAQPSQ_DESTQKEY GENMASK_ULL(31, 0)
+#define IRDMAQPSQ_DESTQPN GENMASK_ULL(55, 32)
+#define IRDMAQPSQ_AHID GENMASK_ULL(24, 0)
+#define IRDMAQPSQ_INLINEDATAFLAG BIT_ULL(57)
+
+#define IRDMA_INLINE_VALID_S 7
+#define IRDMAQPSQ_INLINEDATALEN GENMASK_ULL(55, 48)
+#define IRDMAQPSQ_IMMDATAFLAG BIT_ULL(47)
+#define IRDMAQPSQ_REPORTRTT BIT_ULL(46)
+
+#define IRDMAQPSQ_IMMDATA GENMASK_ULL(63, 0)
+#define IRDMAQPSQ_REMSTAG GENMASK_ULL(31, 0)
+
+#define IRDMAQPSQ_REMTO IRDMA_CQPHC_QPCTX
+
+#define IRDMAQPSQ_STAG GENMASK_ULL(31, 0)
+#define IRDMAQPSQ_REMOTE_STAG GENMASK_ULL(31, 0)
+
+#define IRDMAQPSQ_STAGRIGHTS GENMASK_ULL(52, 48)
+#define IRDMAQPSQ_VABASEDTO BIT_ULL(53)
+#define IRDMAQPSQ_MEMWINDOWTYPE BIT_ULL(54)
+
+#define IRDMAQPSQ_MWLEN IRDMA_CQPHC_QPCTX
+#define IRDMAQPSQ_PARENTMRSTAG GENMASK_ULL(63, 32)
+#define IRDMAQPSQ_MWSTAG GENMASK_ULL(31, 0)
+
+#define IRDMAQPSQ_BASEVA_TO_FBO IRDMA_CQPHC_QPCTX
+
+#define IRDMAQPSQ_REMOTE_ATOMICS_EN BIT_ULL(55)
+
+#define IRDMAQPSQ_LOCSTAG GENMASK_ULL(31, 0)
+
+#define IRDMAQPSQ_STAGKEY GENMASK_ULL(7, 0)
+#define IRDMAQPSQ_STAGINDEX GENMASK_ULL(31, 8)
+#define IRDMAQPSQ_COPYHOSTPBLS BIT_ULL(43)
+#define IRDMAQPSQ_LPBLSIZE GENMASK_ULL(45, 44)
+#define IRDMAQPSQ_HPAGESIZE GENMASK_ULL(47, 46)
+#define IRDMAQPSQ_STAGLEN GENMASK_ULL(40, 0)
+#define IRDMAQPSQ_FIRSTPMPBLIDXLO GENMASK_ULL(63, 48)
+#define IRDMAQPSQ_FIRSTPMPBLIDXHI GENMASK_ULL(11, 0)
+#define IRDMAQPSQ_PBLADDR GENMASK_ULL(63, 12)
+
+/* iwarp QP RQ WQE common fields */
+#define IRDMAQPRQ_ADDFRAGCNT IRDMAQPSQ_ADDFRAGCNT
+#define IRDMAQPRQ_VALID IRDMAQPSQ_VALID
+#define IRDMAQPRQ_COMPLCTX IRDMA_CQPHC_QPCTX
+#define IRDMAQPRQ_FRAG_LEN IRDMAQPSQ_FRAG_LEN
+#define IRDMAQPRQ_STAG IRDMAQPSQ_FRAG_STAG
+#define IRDMAQPRQ_TO IRDMAQPSQ_FRAG_TO
+
+#define IRDMAPFINT_OICR_HMC_ERR_M BIT(26)
+#define IRDMAPFINT_OICR_PE_PUSH_M BIT(27)
+#define IRDMAPFINT_OICR_PE_CRITERR_M BIT(28)
+
+#define IRDMA_QUERY_FPM_LOC_MEM_PAGES GENMASK_ULL(63, 32)
+#define IRDMA_QUERY_FPM_MAX_QPS GENMASK_ULL(31, 0)
+#define IRDMA_QUERY_FPM_MAX_CQS GENMASK_ULL(31, 0)
+#define IRDMA_QUERY_FPM_FIRST_PE_SD_INDEX GENMASK_ULL(13, 0)
+#define IRDMA_QUERY_FPM_MAX_PE_SDS GENMASK_ULL(44, 32)
+#define IRDMA_QUERY_FPM_MAX_PE_SDS_GEN3 GENMASK_ULL(47, 32)
+#define IRDMA_QUERY_FPM_MAX_CEQS GENMASK_ULL(9, 0)
+#define IRDMA_QUERY_FPM_MAX_IRD GENMASK_ULL(53, 50)
+#define IRDMA_QUERY_FPM_XFBLOCKSIZE GENMASK_ULL(63, 32)
+#define IRDMA_QUERY_FPM_Q1BLOCKSIZE GENMASK_ULL(63, 32)
+#define IRDMA_QUERY_FPM_HTMULTIPLIER GENMASK_ULL(19, 16)
+#define IRDMA_QUERY_FPM_TIMERBUCKET GENMASK_ULL(47, 32)
+#define IRDMA_QUERY_FPM_RRFBLOCKSIZE GENMASK_ULL(63, 32)
+#define IRDMA_QUERY_FPM_RRFFLBLOCKSIZE GENMASK_ULL(63, 32)
+#define IRDMA_QUERY_FPM_OOISCFBLOCKSIZE GENMASK_ULL(63, 32)
+#define IRDMA_SHMC_PAGE_ALLOCATED_HMC_FN_ID GENMASK_ULL(5, 0)
+
+#define IRDMA_GET_CURRENT_AEQ_ELEM(_aeq) \
+ ( \
+ (_aeq)->aeqe_base[IRDMA_RING_CURRENT_TAIL((_aeq)->aeq_ring)].buf \
+ )
+
+#define IRDMA_GET_CURRENT_CEQ_ELEM(_ceq) \
+ ( \
+ (_ceq)->ceqe_base[IRDMA_RING_CURRENT_TAIL((_ceq)->ceq_ring)].buf \
+ )
+
+#define IRDMA_GET_CEQ_ELEM_AT_POS(_ceq, _pos) \
+ ( \
+ (_ceq)->ceqe_base[_pos].buf \
+ )
+
+#define IRDMA_RING_GET_NEXT_TAIL(_ring, _idx) \
+ ( \
+ ((_ring).tail + (_idx)) % (_ring).size \
+ )
+
+#define IRDMA_CQP_INIT_WQE(wqe) memset(wqe, 0, 64)
+
+#define IRDMA_GET_CURRENT_CQ_ELEM(_cq) \
+ ( \
+ (_cq)->cq_base[IRDMA_RING_CURRENT_HEAD((_cq)->cq_ring)].buf \
+ )
+#define IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(_cq) \
+ ( \
+ ((struct irdma_extended_cqe *) \
+ ((_cq)->cq_base))[IRDMA_RING_CURRENT_HEAD((_cq)->cq_ring)].buf \
+ )
+
+#define IRDMA_RING_INIT(_ring, _size) \
+ { \
+ (_ring).head = 0; \
+ (_ring).tail = 0; \
+ (_ring).size = (_size); \
+ }
+#define IRDMA_RING_SIZE(_ring) ((_ring).size)
+#define IRDMA_RING_CURRENT_HEAD(_ring) ((_ring).head)
+#define IRDMA_RING_CURRENT_TAIL(_ring) ((_ring).tail)
+
+#define IRDMA_RING_MOVE_HEAD(_ring, _retcode) \
+ { \
+ register u32 size; \
+ size = (_ring).size; \
+ if (!IRDMA_RING_FULL_ERR(_ring)) { \
+ (_ring).head = ((_ring).head + 1) % size; \
+ (_retcode) = 0; \
+ } else { \
+ (_retcode) = -ENOMEM; \
+ } \
+ }
+#define IRDMA_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
+ { \
+ register u32 size; \
+ size = (_ring).size; \
+ if ((IRDMA_RING_USED_QUANTA(_ring) + (_count)) < size) { \
+ (_ring).head = ((_ring).head + (_count)) % size; \
+ (_retcode) = 0; \
+ } else { \
+ (_retcode) = -ENOMEM; \
+ } \
+ }
+#define IRDMA_SQ_RING_MOVE_HEAD(_ring, _retcode) \
+ { \
+ register u32 size; \
+ size = (_ring).size; \
+ if (!IRDMA_SQ_RING_FULL_ERR(_ring)) { \
+ (_ring).head = ((_ring).head + 1) % size; \
+ (_retcode) = 0; \
+ } else { \
+ (_retcode) = -ENOMEM; \
+ } \
+ }
+#define IRDMA_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \
+ { \
+ register u32 size; \
+ size = (_ring).size; \
+ if ((IRDMA_RING_USED_QUANTA(_ring) + (_count)) < (size - 256)) { \
+ (_ring).head = ((_ring).head + (_count)) % size; \
+ (_retcode) = 0; \
+ } else { \
+ (_retcode) = -ENOMEM; \
+ } \
+ }
+#define IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \
+ (_ring).head = ((_ring).head + (_count)) % (_ring).size
+
+#define IRDMA_RING_MOVE_TAIL(_ring) \
+ (_ring).tail = ((_ring).tail + 1) % (_ring).size
+
+#define IRDMA_RING_MOVE_HEAD_NOCHECK(_ring) \
+ (_ring).head = ((_ring).head + 1) % (_ring).size
+
+#define IRDMA_RING_MOVE_TAIL_BY_COUNT(_ring, _count) \
+ (_ring).tail = ((_ring).tail + (_count)) % (_ring).size
+
+#define IRDMA_RING_SET_TAIL(_ring, _pos) \
+ (_ring).tail = (_pos) % (_ring).size
+
+#define IRDMA_RING_FULL_ERR(_ring) \
+ ( \
+ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 1)) \
+ )
+
+#define IRDMA_ERR_RING_FULL2(_ring) \
+ ( \
+ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 2)) \
+ )
+
+#define IRDMA_ERR_RING_FULL3(_ring) \
+ ( \
+ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 3)) \
+ )
+
+#define IRDMA_SQ_RING_FULL_ERR(_ring) \
+ ( \
+ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 257)) \
+ )
+
+#define IRDMA_ERR_SQ_RING_FULL2(_ring) \
+ ( \
+ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 258)) \
+ )
+#define IRDMA_ERR_SQ_RING_FULL3(_ring) \
+ ( \
+ (IRDMA_RING_USED_QUANTA(_ring) == ((_ring).size - 259)) \
+ )
+#define IRDMA_RING_MORE_WORK(_ring) \
+ ( \
+ (IRDMA_RING_USED_QUANTA(_ring) != 0) \
+ )
+
+#define IRDMA_RING_USED_QUANTA(_ring) \
+ ( \
+ (((_ring).head + (_ring).size - (_ring).tail) % (_ring).size) \
+ )
+
+#define IRDMA_RING_FREE_QUANTA(_ring) \
+ ( \
+ ((_ring).size - IRDMA_RING_USED_QUANTA(_ring) - 1) \
+ )
+
+#define IRDMA_SQ_RING_FREE_QUANTA(_ring) \
+ ( \
+ ((_ring).size - IRDMA_RING_USED_QUANTA(_ring) - 257) \
+ )
+
+#define IRDMA_ATOMIC_RING_MOVE_HEAD(_ring, index, _retcode) \
+ { \
+ index = IRDMA_RING_CURRENT_HEAD(_ring); \
+ IRDMA_RING_MOVE_HEAD(_ring, _retcode); \
+ }
+
+enum irdma_qp_wqe_size {
+ IRDMA_WQE_SIZE_32 = 32,
+ IRDMA_WQE_SIZE_64 = 64,
+ IRDMA_WQE_SIZE_96 = 96,
+ IRDMA_WQE_SIZE_128 = 128,
+ IRDMA_WQE_SIZE_256 = 256,
+};
+
+enum irdma_ws_node_op {
+ IRDMA_ADD_NODE = 0,
+ IRDMA_MODIFY_NODE,
+ IRDMA_DEL_NODE,
+};
+
+enum { IRDMA_Q_ALIGNMENT_M = (128 - 1),
+ IRDMA_AEQ_ALIGNMENT_M = (256 - 1),
+ IRDMA_Q2_ALIGNMENT_M = (256 - 1),
+ IRDMA_CEQ_ALIGNMENT_M = (256 - 1),
+ IRDMA_CQ0_ALIGNMENT_M = (256 - 1),
+ IRDMA_HOST_CTX_ALIGNMENT_M = (4 - 1),
+ IRDMA_SHADOWAREA_M = (128 - 1),
+ IRDMA_FPM_QUERY_BUF_ALIGNMENT_M = (4 - 1),
+ IRDMA_FPM_COMMIT_BUF_ALIGNMENT_M = (4 - 1),
+};
+
+enum irdma_alignment {
+ IRDMA_CQP_ALIGNMENT = 0x200,
+ IRDMA_AEQ_ALIGNMENT = 0x100,
+ IRDMA_CEQ_ALIGNMENT = 0x100,
+ IRDMA_CQ0_ALIGNMENT = 0x100,
+ IRDMA_SD_BUF_ALIGNMENT = 0x80,
+ IRDMA_FEATURE_BUF_ALIGNMENT = 0x10,
+};
+
+enum icrdma_protocol_used {
+ ICRDMA_ANY_PROTOCOL = 0,
+ ICRDMA_IWARP_PROTOCOL_ONLY = 1,
+ ICRDMA_ROCE_PROTOCOL_ONLY = 2,
+};
+
+/**
+ * set_64bit_val - set 64 bit value to hw wqe
+ * @wqe_words: wqe addr to write
+ * @byte_index: index in wqe
+ * @val: value to write
+ **/
+static inline void set_64bit_val(__le64 *wqe_words, u32 byte_index, u64 val)
+{
+ wqe_words[byte_index >> 3] = cpu_to_le64(val);
+}
+
+/**
+ * set_32bit_val - set 32 bit value to hw wqe
+ * @wqe_words: wqe addr to write
+ * @byte_index: index in wqe
+ * @val: value to write
+ **/
+static inline void set_32bit_val(__le32 *wqe_words, u32 byte_index, u32 val)
+{
+ wqe_words[byte_index >> 2] = cpu_to_le32(val);
+}
+
+/**
+ * get_64bit_val - read 64 bit value from wqe
+ * @wqe_words: wqe addr
+ * @byte_index: index to read from
+ * @val: read value
+ **/
+static inline void get_64bit_val(__le64 *wqe_words, u32 byte_index, u64 *val)
+{
+ *val = le64_to_cpu(wqe_words[byte_index >> 3]);
+}
+
+/**
+ * get_32bit_val - read 32 bit value from wqe
+ * @wqe_words: wqe addr
+ * @byte_index: index to reaad from
+ * @val: return 32 bit value
+ **/
+static inline void get_32bit_val(__le32 *wqe_words, u32 byte_index, u32 *val)
+{
+ *val = le32_to_cpu(wqe_words[byte_index >> 2]);
+}
+#endif /* IRDMA_DEFS_H */
diff --git a/drivers/infiniband/hw/irdma/hmc.c b/drivers/infiniband/hw/irdma/hmc.c
new file mode 100644
index 000000000000..da18add141da
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/hmc.c
@@ -0,0 +1,709 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "osdep.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "protos.h"
+#include "virtchnl.h"
+
+/**
+ * irdma_find_sd_index_limit - finds segment descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @type: type of HMC resources we're searching
+ * @idx: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @sd_idx: pointer to return index of the segment descriptor in question
+ * @sd_limit: pointer to return the maximum number of segment descriptors
+ *
+ * This function calculates the segment descriptor index and index limit
+ * for the resource defined by irdma_hmc_rsrc_type.
+ */
+
+static void irdma_find_sd_index_limit(struct irdma_hmc_info *hmc_info, u32 type,
+ u32 idx, u32 cnt, u32 *sd_idx,
+ u32 *sd_limit)
+{
+ u64 fpm_addr, fpm_limit;
+
+ fpm_addr = hmc_info->hmc_obj[(type)].base +
+ hmc_info->hmc_obj[type].size * idx;
+ fpm_limit = fpm_addr + hmc_info->hmc_obj[type].size * cnt;
+ *sd_idx = (u32)(fpm_addr / IRDMA_HMC_DIRECT_BP_SIZE);
+ *sd_limit = (u32)((fpm_limit - 1) / IRDMA_HMC_DIRECT_BP_SIZE);
+ *sd_limit += 1;
+}
+
+/**
+ * irdma_find_pd_index_limit - finds page descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @type: HMC resource type we're examining
+ * @idx: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @pd_idx: pointer to return page descriptor index
+ * @pd_limit: pointer to return page descriptor index limit
+ *
+ * Calculates the page descriptor index and index limit for the resource
+ * defined by irdma_hmc_rsrc_type.
+ */
+
+static void irdma_find_pd_index_limit(struct irdma_hmc_info *hmc_info, u32 type,
+ u32 idx, u32 cnt, u32 *pd_idx,
+ u32 *pd_limit)
+{
+ u64 fpm_adr, fpm_limit;
+
+ fpm_adr = hmc_info->hmc_obj[type].base +
+ hmc_info->hmc_obj[type].size * idx;
+ fpm_limit = fpm_adr + (hmc_info)->hmc_obj[(type)].size * (cnt);
+ *pd_idx = (u32)(fpm_adr / IRDMA_HMC_PAGED_BP_SIZE);
+ *pd_limit = (u32)((fpm_limit - 1) / IRDMA_HMC_PAGED_BP_SIZE);
+ *pd_limit += 1;
+}
+
+/**
+ * irdma_set_sd_entry - setup entry for sd programming
+ * @pa: physical addr
+ * @idx: sd index
+ * @type: paged or direct sd
+ * @entry: sd entry ptr
+ */
+static void irdma_set_sd_entry(u64 pa, u32 idx, enum irdma_sd_entry_type type,
+ struct irdma_update_sd_entry *entry)
+{
+ entry->data = pa |
+ FIELD_PREP(IRDMA_PFHMC_SDDATALOW_PMSDBPCOUNT, IRDMA_HMC_MAX_BP_COUNT) |
+ FIELD_PREP(IRDMA_PFHMC_SDDATALOW_PMSDTYPE,
+ type == IRDMA_SD_TYPE_PAGED ? 0 : 1) |
+ FIELD_PREP(IRDMA_PFHMC_SDDATALOW_PMSDVALID, 1);
+
+ entry->cmd = idx | FIELD_PREP(IRDMA_PFHMC_SDCMD_PMSDWR, 1) | BIT(15);
+}
+
+/**
+ * irdma_clr_sd_entry - setup entry for sd clear
+ * @idx: sd index
+ * @type: paged or direct sd
+ * @entry: sd entry ptr
+ */
+static void irdma_clr_sd_entry(u32 idx, enum irdma_sd_entry_type type,
+ struct irdma_update_sd_entry *entry)
+{
+ entry->data = FIELD_PREP(IRDMA_PFHMC_SDDATALOW_PMSDBPCOUNT, IRDMA_HMC_MAX_BP_COUNT) |
+ FIELD_PREP(IRDMA_PFHMC_SDDATALOW_PMSDTYPE,
+ type == IRDMA_SD_TYPE_PAGED ? 0 : 1);
+
+ entry->cmd = idx | FIELD_PREP(IRDMA_PFHMC_SDCMD_PMSDWR, 1) | BIT(15);
+}
+
+/**
+ * irdma_invalidate_pf_hmc_pd - Invalidates the pd cache in the hardware for PF
+ * @dev: pointer to our device struct
+ * @sd_idx: segment descriptor index
+ * @pd_idx: page descriptor index
+ */
+static inline void irdma_invalidate_pf_hmc_pd(struct irdma_sc_dev *dev, u32 sd_idx,
+ u32 pd_idx)
+{
+ u32 val = FIELD_PREP(IRDMA_PFHMC_PDINV_PMSDIDX, sd_idx) |
+ FIELD_PREP(IRDMA_PFHMC_PDINV_PMSDPARTSEL, 1) |
+ FIELD_PREP(IRDMA_PFHMC_PDINV_PMPDIDX, pd_idx);
+
+ writel(val, dev->hw_regs[IRDMA_PFHMC_PDINV]);
+}
+
+/**
+ * irdma_hmc_sd_one - setup 1 sd entry for cqp
+ * @dev: pointer to the device structure
+ * @hmc_fn_id: hmc's function id
+ * @pa: physical addr
+ * @sd_idx: sd index
+ * @type: paged or direct sd
+ * @setsd: flag to set or clear sd
+ */
+int irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, u64 pa, u32 sd_idx,
+ enum irdma_sd_entry_type type, bool setsd)
+{
+ struct irdma_update_sds_info sdinfo;
+
+ sdinfo.cnt = 1;
+ sdinfo.hmc_fn_id = hmc_fn_id;
+ if (setsd)
+ irdma_set_sd_entry(pa, sd_idx, type, sdinfo.entry);
+ else
+ irdma_clr_sd_entry(sd_idx, type, sdinfo.entry);
+ return dev->cqp->process_cqp_sds(dev, &sdinfo);
+}
+
+/**
+ * irdma_hmc_sd_grp - setup group of sd entries for cqp
+ * @dev: pointer to the device structure
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @sd_index: sd index
+ * @sd_cnt: number of sd entries
+ * @setsd: flag to set or clear sd
+ */
+static int irdma_hmc_sd_grp(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 sd_index,
+ u32 sd_cnt, bool setsd)
+{
+ struct irdma_hmc_sd_entry *sd_entry;
+ struct irdma_update_sds_info sdinfo = {};
+ u64 pa;
+ u32 i;
+ int ret_code = 0;
+
+ sdinfo.hmc_fn_id = hmc_info->hmc_fn_id;
+ for (i = sd_index; i < sd_index + sd_cnt; i++) {
+ sd_entry = &hmc_info->sd_table.sd_entry[i];
+ if (!sd_entry || (!sd_entry->valid && setsd) ||
+ (sd_entry->valid && !setsd))
+ continue;
+ if (setsd) {
+ pa = (sd_entry->entry_type == IRDMA_SD_TYPE_PAGED) ?
+ sd_entry->u.pd_table.pd_page_addr.pa :
+ sd_entry->u.bp.addr.pa;
+ irdma_set_sd_entry(pa, i, sd_entry->entry_type,
+ &sdinfo.entry[sdinfo.cnt]);
+ } else {
+ irdma_clr_sd_entry(i, sd_entry->entry_type,
+ &sdinfo.entry[sdinfo.cnt]);
+ }
+ sdinfo.cnt++;
+ if (sdinfo.cnt == IRDMA_MAX_SD_ENTRIES) {
+ ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo);
+ if (ret_code) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: sd_programming failed err=%d\n",
+ ret_code);
+ return ret_code;
+ }
+
+ sdinfo.cnt = 0;
+ }
+ }
+ if (sdinfo.cnt)
+ ret_code = dev->cqp->process_cqp_sds(dev, &sdinfo);
+
+ return ret_code;
+}
+
+/**
+ * irdma_hmc_finish_add_sd_reg - program sd entries for objects
+ * @dev: pointer to the device structure
+ * @info: create obj info
+ */
+static int irdma_hmc_finish_add_sd_reg(struct irdma_sc_dev *dev,
+ struct irdma_hmc_create_obj_info *info)
+{
+ if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
+ return -EINVAL;
+
+ if ((info->start_idx + info->count) >
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt)
+ return -EINVAL;
+
+ if (!info->add_sd_cnt)
+ return 0;
+ return irdma_hmc_sd_grp(dev, info->hmc_info,
+ info->hmc_info->sd_indexes[0], info->add_sd_cnt,
+ true);
+}
+
+/**
+ * irdma_sc_create_hmc_obj - allocate backing store for hmc objects
+ * @dev: pointer to the device structure
+ * @info: pointer to irdma_hmc_create_obj_info struct
+ *
+ * This will allocate memory for PDs and backing pages and populate
+ * the sd and pd entries.
+ */
+int irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev,
+ struct irdma_hmc_create_obj_info *info)
+{
+ struct irdma_hmc_sd_entry *sd_entry;
+ u32 sd_idx, sd_lmt;
+ u32 pd_idx = 0, pd_lmt = 0;
+ u32 pd_idx1 = 0, pd_lmt1 = 0;
+ u32 i, j;
+ bool pd_error = false;
+ int ret_code = 0;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3 &&
+ dev->hmc_info->hmc_obj[info->rsrc_type].mem_loc == IRDMA_LOC_MEM)
+ return 0;
+
+ if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt)
+ return -EINVAL;
+
+ if ((info->start_idx + info->count) >
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: error type %u, start = %u, req cnt %u, cnt = %u\n",
+ info->rsrc_type, info->start_idx, info->count,
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt);
+ return -EINVAL;
+ }
+
+ irdma_find_sd_index_limit(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count, &sd_idx,
+ &sd_lmt);
+ if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+ sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+ return -EINVAL;
+ }
+
+ irdma_find_pd_index_limit(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count, &pd_idx,
+ &pd_lmt);
+
+ for (j = sd_idx; j < sd_lmt; j++) {
+ ret_code = irdma_add_sd_table_entry(dev->hw, info->hmc_info, j,
+ info->entry_type,
+ IRDMA_HMC_DIRECT_BP_SIZE);
+ if (ret_code)
+ goto exit_sd_error;
+
+ sd_entry = &info->hmc_info->sd_table.sd_entry[j];
+ if (sd_entry->entry_type == IRDMA_SD_TYPE_PAGED &&
+ (dev->hmc_info == info->hmc_info &&
+ info->rsrc_type != IRDMA_HMC_IW_PBLE)) {
+ pd_idx1 = max(pd_idx, (j * IRDMA_HMC_MAX_BP_COUNT));
+ pd_lmt1 = min(pd_lmt, (j + 1) * IRDMA_HMC_MAX_BP_COUNT);
+ for (i = pd_idx1; i < pd_lmt1; i++) {
+ /* update the pd table entry */
+ ret_code = irdma_add_pd_table_entry(dev,
+ info->hmc_info,
+ i, NULL);
+ if (ret_code) {
+ pd_error = true;
+ break;
+ }
+ }
+ if (pd_error) {
+ while (i && (i > pd_idx1)) {
+ irdma_remove_pd_bp(dev, info->hmc_info,
+ i - 1);
+ i--;
+ }
+ }
+ }
+ if (sd_entry->valid)
+ continue;
+
+ info->hmc_info->sd_indexes[info->add_sd_cnt] = (u16)j;
+ info->add_sd_cnt++;
+ sd_entry->valid = true;
+ }
+ return irdma_hmc_finish_add_sd_reg(dev, info);
+
+exit_sd_error:
+ while (j && (j > sd_idx)) {
+ sd_entry = &info->hmc_info->sd_table.sd_entry[j - 1];
+ switch (sd_entry->entry_type) {
+ case IRDMA_SD_TYPE_PAGED:
+ pd_idx1 = max(pd_idx, (j - 1) * IRDMA_HMC_MAX_BP_COUNT);
+ pd_lmt1 = min(pd_lmt, (j * IRDMA_HMC_MAX_BP_COUNT));
+ for (i = pd_idx1; i < pd_lmt1; i++)
+ irdma_prep_remove_pd_page(info->hmc_info, i);
+ break;
+ case IRDMA_SD_TYPE_DIRECT:
+ irdma_prep_remove_pd_page(info->hmc_info, (j - 1));
+ break;
+ default:
+ ret_code = -EINVAL;
+ break;
+ }
+ j--;
+ }
+
+ return ret_code;
+}
+
+/**
+ * irdma_finish_del_sd_reg - delete sd entries for objects
+ * @dev: pointer to the device structure
+ * @info: dele obj info
+ * @reset: true if called before reset
+ */
+static int irdma_finish_del_sd_reg(struct irdma_sc_dev *dev,
+ struct irdma_hmc_del_obj_info *info,
+ bool reset)
+{
+ struct irdma_hmc_sd_entry *sd_entry;
+ int ret_code = 0;
+ u32 i, sd_idx;
+ struct irdma_dma_mem *mem;
+
+ if (dev->privileged && !reset)
+ ret_code = irdma_hmc_sd_grp(dev, info->hmc_info,
+ info->hmc_info->sd_indexes[0],
+ info->del_sd_cnt, false);
+
+ if (ret_code)
+ ibdev_dbg(to_ibdev(dev), "HMC: error cqp sd sd_grp\n");
+ for (i = 0; i < info->del_sd_cnt; i++) {
+ sd_idx = info->hmc_info->sd_indexes[i];
+ sd_entry = &info->hmc_info->sd_table.sd_entry[sd_idx];
+ mem = (sd_entry->entry_type == IRDMA_SD_TYPE_PAGED) ?
+ &sd_entry->u.pd_table.pd_page_addr :
+ &sd_entry->u.bp.addr;
+
+ if (!mem || !mem->va) {
+ ibdev_dbg(to_ibdev(dev), "HMC: error cqp sd mem\n");
+ } else {
+ dma_free_coherent(dev->hw->device, mem->size, mem->va,
+ mem->pa);
+ mem->va = NULL;
+ }
+ }
+
+ return ret_code;
+}
+
+/**
+ * irdma_sc_del_hmc_obj - remove pe hmc objects
+ * @dev: pointer to the device structure
+ * @info: pointer to irdma_hmc_del_obj_info struct
+ * @reset: true if called before reset
+ *
+ * This will de-populate the SDs and PDs. It frees
+ * the memory for PDS and backing storage. After this function is returned,
+ * caller should deallocate memory allocated previously for
+ * book-keeping information about PDs and backing storage.
+ */
+int irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev,
+ struct irdma_hmc_del_obj_info *info, bool reset)
+{
+ struct irdma_hmc_pd_table *pd_table;
+ u32 sd_idx, sd_lmt;
+ u32 pd_idx, pd_lmt, rel_pd_idx;
+ u32 i, j;
+ int ret_code = 0;
+
+ if (dev->hmc_info->hmc_obj[info->rsrc_type].mem_loc == IRDMA_LOC_MEM)
+ return 0;
+
+ if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: error start_idx[%04d] >= [type %04d].cnt[%04d]\n",
+ info->start_idx, info->rsrc_type,
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt);
+ return -EINVAL;
+ }
+
+ if ((info->start_idx + info->count) >
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: error start_idx[%04d] + count %04d >= [type %04d].cnt[%04d]\n",
+ info->start_idx, info->count, info->rsrc_type,
+ info->hmc_info->hmc_obj[info->rsrc_type].cnt);
+ return -EINVAL;
+ }
+
+ irdma_find_pd_index_limit(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count, &pd_idx,
+ &pd_lmt);
+
+ for (j = pd_idx; j < pd_lmt; j++) {
+ sd_idx = j / IRDMA_HMC_PD_CNT_IN_SD;
+
+ if (!info->hmc_info->sd_table.sd_entry[sd_idx].valid)
+ continue;
+
+ if (info->hmc_info->sd_table.sd_entry[sd_idx].entry_type !=
+ IRDMA_SD_TYPE_PAGED)
+ continue;
+
+ rel_pd_idx = j % IRDMA_HMC_PD_CNT_IN_SD;
+ pd_table = &info->hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+ if (pd_table->pd_entry &&
+ pd_table->pd_entry[rel_pd_idx].valid) {
+ ret_code = irdma_remove_pd_bp(dev, info->hmc_info, j);
+ if (ret_code) {
+ ibdev_dbg(to_ibdev(dev),
+ "HMC: remove_pd_bp error\n");
+ return ret_code;
+ }
+ }
+ }
+
+ irdma_find_sd_index_limit(info->hmc_info, info->rsrc_type,
+ info->start_idx, info->count, &sd_idx,
+ &sd_lmt);
+ if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+ sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+ ibdev_dbg(to_ibdev(dev), "HMC: invalid sd_idx\n");
+ return -EINVAL;
+ }
+
+ for (i = sd_idx; i < sd_lmt; i++) {
+ pd_table = &info->hmc_info->sd_table.sd_entry[i].u.pd_table;
+ if (!info->hmc_info->sd_table.sd_entry[i].valid)
+ continue;
+ switch (info->hmc_info->sd_table.sd_entry[i].entry_type) {
+ case IRDMA_SD_TYPE_DIRECT:
+ ret_code = irdma_prep_remove_sd_bp(info->hmc_info, i);
+ if (!ret_code) {
+ info->hmc_info->sd_indexes[info->del_sd_cnt] =
+ (u16)i;
+ info->del_sd_cnt++;
+ }
+ break;
+ case IRDMA_SD_TYPE_PAGED:
+ ret_code = irdma_prep_remove_pd_page(info->hmc_info, i);
+ if (ret_code)
+ break;
+ if (dev->hmc_info != info->hmc_info &&
+ info->rsrc_type == IRDMA_HMC_IW_PBLE &&
+ pd_table->pd_entry) {
+ kfree(pd_table->pd_entry_virt_mem.va);
+ pd_table->pd_entry = NULL;
+ }
+ info->hmc_info->sd_indexes[info->del_sd_cnt] = (u16)i;
+ info->del_sd_cnt++;
+ break;
+ default:
+ break;
+ }
+ }
+ return irdma_finish_del_sd_reg(dev, info, reset);
+}
+
+/**
+ * irdma_add_sd_table_entry - Adds a segment descriptor to the table
+ * @hw: pointer to our hw struct
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @sd_index: segment descriptor index to manipulate
+ * @type: what type of segment descriptor we're manipulating
+ * @direct_mode_sz: size to alloc in direct mode
+ */
+int irdma_add_sd_table_entry(struct irdma_hw *hw,
+ struct irdma_hmc_info *hmc_info, u32 sd_index,
+ enum irdma_sd_entry_type type, u64 direct_mode_sz)
+{
+ struct irdma_hmc_sd_entry *sd_entry;
+ struct irdma_dma_mem dma_mem;
+ u64 alloc_len;
+
+ sd_entry = &hmc_info->sd_table.sd_entry[sd_index];
+ if (!sd_entry->valid) {
+ if (type == IRDMA_SD_TYPE_PAGED)
+ alloc_len = IRDMA_HMC_PAGED_BP_SIZE;
+ else
+ alloc_len = direct_mode_sz;
+
+ /* allocate a 4K pd page or 2M backing page */
+ dma_mem.size = ALIGN(alloc_len, IRDMA_HMC_PD_BP_BUF_ALIGNMENT);
+ dma_mem.va = dma_alloc_coherent(hw->device, dma_mem.size,
+ &dma_mem.pa, GFP_KERNEL);
+ if (!dma_mem.va)
+ return -ENOMEM;
+ if (type == IRDMA_SD_TYPE_PAGED) {
+ struct irdma_virt_mem *vmem =
+ &sd_entry->u.pd_table.pd_entry_virt_mem;
+
+ vmem->size = sizeof(struct irdma_hmc_pd_entry) * 512;
+ vmem->va = kzalloc(vmem->size, GFP_KERNEL);
+ if (!vmem->va) {
+ dma_free_coherent(hw->device, dma_mem.size,
+ dma_mem.va, dma_mem.pa);
+ dma_mem.va = NULL;
+ return -ENOMEM;
+ }
+ sd_entry->u.pd_table.pd_entry = vmem->va;
+
+ memcpy(&sd_entry->u.pd_table.pd_page_addr, &dma_mem,
+ sizeof(sd_entry->u.pd_table.pd_page_addr));
+ } else {
+ memcpy(&sd_entry->u.bp.addr, &dma_mem,
+ sizeof(sd_entry->u.bp.addr));
+
+ sd_entry->u.bp.sd_pd_index = sd_index;
+ }
+
+ hmc_info->sd_table.sd_entry[sd_index].entry_type = type;
+ hmc_info->sd_table.use_cnt++;
+ }
+ if (sd_entry->entry_type == IRDMA_SD_TYPE_DIRECT)
+ sd_entry->u.bp.use_cnt++;
+
+ return 0;
+}
+
+/**
+ * irdma_add_pd_table_entry - Adds page descriptor to the specified table
+ * @dev: pointer to our device structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @pd_index: which page descriptor index to manipulate
+ * @rsrc_pg: if not NULL, use preallocated page instead of allocating new one.
+ *
+ * This function:
+ * 1. Initializes the pd entry
+ * 2. Adds pd_entry in the pd_table
+ * 3. Mark the entry valid in irdma_hmc_pd_entry structure
+ * 4. Initializes the pd_entry's ref count to 1
+ * assumptions:
+ * 1. The memory for pd should be pinned down, physically contiguous and
+ * aligned on 4K boundary and zeroed memory.
+ * 2. It should be 4K in size.
+ */
+int irdma_add_pd_table_entry(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 pd_index,
+ struct irdma_dma_mem *rsrc_pg)
+{
+ struct irdma_hmc_pd_table *pd_table;
+ struct irdma_hmc_pd_entry *pd_entry;
+ struct irdma_dma_mem mem;
+ struct irdma_dma_mem *page = &mem;
+ u32 sd_idx, rel_pd_idx;
+ u64 *pd_addr;
+ u64 page_desc;
+
+ if (pd_index / IRDMA_HMC_PD_CNT_IN_SD >= hmc_info->sd_table.sd_cnt)
+ return -EINVAL;
+
+ sd_idx = (pd_index / IRDMA_HMC_PD_CNT_IN_SD);
+ if (hmc_info->sd_table.sd_entry[sd_idx].entry_type !=
+ IRDMA_SD_TYPE_PAGED)
+ return 0;
+
+ rel_pd_idx = (pd_index % IRDMA_HMC_PD_CNT_IN_SD);
+ pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+ pd_entry = &pd_table->pd_entry[rel_pd_idx];
+ if (!pd_entry->valid) {
+ if (rsrc_pg) {
+ pd_entry->rsrc_pg = true;
+ page = rsrc_pg;
+ } else {
+ page->size = ALIGN(IRDMA_HMC_PAGED_BP_SIZE,
+ IRDMA_HMC_PD_BP_BUF_ALIGNMENT);
+ page->va = dma_alloc_coherent(dev->hw->device,
+ page->size, &page->pa,
+ GFP_KERNEL);
+ if (!page->va)
+ return -ENOMEM;
+
+ pd_entry->rsrc_pg = false;
+ }
+
+ memcpy(&pd_entry->bp.addr, page, sizeof(pd_entry->bp.addr));
+ pd_entry->bp.sd_pd_index = pd_index;
+ pd_entry->bp.entry_type = IRDMA_SD_TYPE_PAGED;
+ page_desc = page->pa | 0x1;
+ pd_addr = pd_table->pd_page_addr.va;
+ pd_addr += rel_pd_idx;
+ memcpy(pd_addr, &page_desc, sizeof(*pd_addr));
+ pd_entry->sd_index = sd_idx;
+ pd_entry->valid = true;
+ pd_table->use_cnt++;
+
+ if (hmc_info->hmc_fn_id < dev->hw_attrs.first_hw_vf_fpm_id &&
+ dev->privileged)
+ irdma_invalidate_pf_hmc_pd(dev, sd_idx, rel_pd_idx);
+ }
+ pd_entry->bp.use_cnt++;
+
+ return 0;
+}
+
+/**
+ * irdma_remove_pd_bp - remove a backing page from a page descriptor
+ * @dev: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ *
+ * This function:
+ * 1. Marks the entry in pd table (for paged address mode) or in sd table
+ * (for direct address mode) invalid.
+ * 2. Write to register PMPDINV to invalidate the backing page in FV cache
+ * 3. Decrement the ref count for the pd _entry
+ * assumptions:
+ * 1. Caller can deallocate the memory used by backing storage after this
+ * function returns.
+ */
+int irdma_remove_pd_bp(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 idx)
+{
+ struct irdma_hmc_pd_entry *pd_entry;
+ struct irdma_hmc_pd_table *pd_table;
+ struct irdma_hmc_sd_entry *sd_entry;
+ u32 sd_idx, rel_pd_idx;
+ struct irdma_dma_mem *mem;
+ u64 *pd_addr;
+
+ sd_idx = idx / IRDMA_HMC_PD_CNT_IN_SD;
+ rel_pd_idx = idx % IRDMA_HMC_PD_CNT_IN_SD;
+ if (sd_idx >= hmc_info->sd_table.sd_cnt)
+ return -EINVAL;
+
+ sd_entry = &hmc_info->sd_table.sd_entry[sd_idx];
+ if (sd_entry->entry_type != IRDMA_SD_TYPE_PAGED)
+ return -EINVAL;
+
+ pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+ pd_entry = &pd_table->pd_entry[rel_pd_idx];
+ if (--pd_entry->bp.use_cnt)
+ return 0;
+
+ pd_entry->valid = false;
+ pd_table->use_cnt--;
+ pd_addr = pd_table->pd_page_addr.va;
+ pd_addr += rel_pd_idx;
+ memset(pd_addr, 0, sizeof(u64));
+ if (dev->privileged && dev->hmc_fn_id == hmc_info->hmc_fn_id)
+ irdma_invalidate_pf_hmc_pd(dev, sd_idx, idx);
+
+ if (!pd_entry->rsrc_pg) {
+ mem = &pd_entry->bp.addr;
+ if (!mem || !mem->va)
+ return -EINVAL;
+
+ dma_free_coherent(dev->hw->device, mem->size, mem->va,
+ mem->pa);
+ mem->va = NULL;
+ }
+ if (!pd_table->use_cnt)
+ kfree(pd_table->pd_entry_virt_mem.va);
+
+ return 0;
+}
+
+/**
+ * irdma_prep_remove_sd_bp - Prepares to remove a backing page from a sd entry
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ */
+int irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, u32 idx)
+{
+ struct irdma_hmc_sd_entry *sd_entry;
+
+ sd_entry = &hmc_info->sd_table.sd_entry[idx];
+ if (--sd_entry->u.bp.use_cnt)
+ return -EBUSY;
+
+ hmc_info->sd_table.use_cnt--;
+ sd_entry->valid = false;
+
+ return 0;
+}
+
+/**
+ * irdma_prep_remove_pd_page - Prepares to remove a PD page from sd entry.
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: segment descriptor index to find the relevant page descriptor
+ */
+int irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx)
+{
+ struct irdma_hmc_sd_entry *sd_entry;
+
+ sd_entry = &hmc_info->sd_table.sd_entry[idx];
+
+ if (sd_entry->u.pd_table.use_cnt)
+ return -EBUSY;
+
+ sd_entry->valid = false;
+ hmc_info->sd_table.use_cnt--;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/irdma/hmc.h b/drivers/infiniband/hw/irdma/hmc.h
new file mode 100644
index 000000000000..257a5d22aa96
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/hmc.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2020 Intel Corporation */
+#ifndef IRDMA_HMC_H
+#define IRDMA_HMC_H
+
+#include "defs.h"
+
+#define IRDMA_HMC_MAX_BP_COUNT 512
+#define IRDMA_MAX_SD_ENTRIES 11
+#define IRDMA_HW_DBG_HMC_INVALID_BP_MARK 0xca
+#define IRDMA_HMC_INFO_SIGNATURE 0x484d5347
+#define IRDMA_HMC_PD_CNT_IN_SD 512
+#define IRDMA_HMC_DIRECT_BP_SIZE 0x200000
+#define IRDMA_HMC_MAX_SD_COUNT 8192
+#define IRDMA_HMC_PAGED_BP_SIZE 4096
+#define IRDMA_HMC_PD_BP_BUF_ALIGNMENT 4096
+#define IRDMA_FIRST_VF_FPM_ID 8
+#define FPM_MULTIPLIER 1024
+#define IRDMA_OBJ_LOC_MEM_BIT 0x4
+#define IRDMA_XF_MULTIPLIER 16
+#define IRDMA_RRF_MULTIPLIER 8
+#define IRDMA_MIN_PBLE_PAGES 3
+#define IRDMA_HMC_PAGE_SIZE 2097152
+#define IRDMA_MIN_MR_PER_QP 4
+#define IRDMA_MIN_QP_CNT 64
+#define IRDMA_FSIAV_CNT_MAX 1048576
+#define IRDMA_MIN_IRD 8
+#define IRDMA_HMC_MIN_RRF 16
+
+enum irdma_hmc_rsrc_type {
+ IRDMA_HMC_IW_QP = 0,
+ IRDMA_HMC_IW_CQ = 1,
+ IRDMA_HMC_IW_SRQ = 2,
+ IRDMA_HMC_IW_HTE = 3,
+ IRDMA_HMC_IW_ARP = 4,
+ IRDMA_HMC_IW_APBVT_ENTRY = 5,
+ IRDMA_HMC_IW_MR = 6,
+ IRDMA_HMC_IW_XF = 7,
+ IRDMA_HMC_IW_XFFL = 8,
+ IRDMA_HMC_IW_Q1 = 9,
+ IRDMA_HMC_IW_Q1FL = 10,
+ IRDMA_HMC_IW_TIMER = 11,
+ IRDMA_HMC_IW_FSIMC = 12,
+ IRDMA_HMC_IW_FSIAV = 13,
+ IRDMA_HMC_IW_PBLE = 14,
+ IRDMA_HMC_IW_RRF = 15,
+ IRDMA_HMC_IW_RRFFL = 16,
+ IRDMA_HMC_IW_HDR = 17,
+ IRDMA_HMC_IW_MD = 18,
+ IRDMA_HMC_IW_OOISC = 19,
+ IRDMA_HMC_IW_OOISCFFL = 20,
+ IRDMA_HMC_IW_MAX, /* Must be last entry */
+};
+
+enum irdma_sd_entry_type {
+ IRDMA_SD_TYPE_INVALID = 0,
+ IRDMA_SD_TYPE_PAGED = 1,
+ IRDMA_SD_TYPE_DIRECT = 2,
+};
+
+enum irdma_hmc_obj_mem {
+ IRDMA_HOST_MEM = 0,
+ IRDMA_LOC_MEM = 1,
+};
+
+struct irdma_hmc_obj_info {
+ u64 base;
+ u32 max_cnt;
+ u32 cnt;
+ u64 size;
+ enum irdma_hmc_obj_mem mem_loc;
+};
+
+struct irdma_hmc_bp {
+ enum irdma_sd_entry_type entry_type;
+ struct irdma_dma_mem addr;
+ u32 sd_pd_index;
+ u32 use_cnt;
+};
+
+struct irdma_hmc_pd_entry {
+ struct irdma_hmc_bp bp;
+ u32 sd_index;
+ bool rsrc_pg:1;
+ bool valid:1;
+};
+
+struct irdma_hmc_pd_table {
+ struct irdma_dma_mem pd_page_addr;
+ struct irdma_hmc_pd_entry *pd_entry;
+ struct irdma_virt_mem pd_entry_virt_mem;
+ u32 use_cnt;
+ u32 sd_index;
+};
+
+struct irdma_hmc_sd_entry {
+ enum irdma_sd_entry_type entry_type;
+ bool valid;
+ union {
+ struct irdma_hmc_pd_table pd_table;
+ struct irdma_hmc_bp bp;
+ } u;
+};
+
+struct irdma_hmc_sd_table {
+ struct irdma_virt_mem addr;
+ u32 sd_cnt;
+ u32 use_cnt;
+ struct irdma_hmc_sd_entry *sd_entry;
+};
+
+struct irdma_hmc_info {
+ u32 signature;
+ u8 hmc_fn_id;
+ u16 first_sd_index;
+ struct irdma_hmc_obj_info *hmc_obj;
+ struct irdma_virt_mem hmc_obj_virt_mem;
+ struct irdma_hmc_sd_table sd_table;
+ u16 sd_indexes[IRDMA_HMC_MAX_SD_COUNT];
+};
+
+struct irdma_update_sd_entry {
+ u64 cmd;
+ u64 data;
+};
+
+struct irdma_update_sds_info {
+ u32 cnt;
+ u8 hmc_fn_id;
+ struct irdma_update_sd_entry entry[IRDMA_MAX_SD_ENTRIES];
+};
+
+struct irdma_ccq_cqe_info;
+struct irdma_hmc_fcn_info {
+ u32 vf_id;
+ u8 protocol_used;
+ u8 free_fcn;
+};
+
+struct irdma_hmc_create_obj_info {
+ struct irdma_hmc_info *hmc_info;
+ struct irdma_virt_mem add_sd_virt_mem;
+ u32 rsrc_type;
+ u32 start_idx;
+ u32 count;
+ u32 add_sd_cnt;
+ enum irdma_sd_entry_type entry_type;
+ bool privileged;
+};
+
+struct irdma_hmc_del_obj_info {
+ struct irdma_hmc_info *hmc_info;
+ struct irdma_virt_mem del_sd_virt_mem;
+ u32 rsrc_type;
+ u32 start_idx;
+ u32 count;
+ u32 del_sd_cnt;
+ bool privileged;
+};
+
+int irdma_copy_dma_mem(struct irdma_hw *hw, void *dest_buf,
+ struct irdma_dma_mem *src_mem, u64 src_offset, u64 size);
+int irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev,
+ struct irdma_hmc_create_obj_info *info);
+int irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev,
+ struct irdma_hmc_del_obj_info *info, bool reset);
+int irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, u64 pa, u32 sd_idx,
+ enum irdma_sd_entry_type type,
+ bool setsd);
+int irdma_update_sds_noccq(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info);
+struct irdma_vfdev *irdma_vfdev_from_fpm(struct irdma_sc_dev *dev,
+ u8 hmc_fn_id);
+struct irdma_hmc_info *irdma_vf_hmcinfo_from_fpm(struct irdma_sc_dev *dev,
+ u8 hmc_fn_id);
+int irdma_add_sd_table_entry(struct irdma_hw *hw,
+ struct irdma_hmc_info *hmc_info, u32 sd_index,
+ enum irdma_sd_entry_type type, u64 direct_mode_sz);
+int irdma_add_pd_table_entry(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 pd_index,
+ struct irdma_dma_mem *rsrc_pg);
+int irdma_remove_pd_bp(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, u32 idx);
+int irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, u32 idx);
+int irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx);
+#endif /* IRDMA_HMC_H */
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
new file mode 100644
index 000000000000..d1fc5726b979
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -0,0 +1,2823 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "main.h"
+
+static struct irdma_rsrc_limits rsrc_limits_table[] = {
+ [0] = {
+ .qplimit = SZ_128,
+ },
+ [1] = {
+ .qplimit = SZ_1K,
+ },
+ [2] = {
+ .qplimit = SZ_2K,
+ },
+ [3] = {
+ .qplimit = SZ_4K,
+ },
+ [4] = {
+ .qplimit = SZ_16K,
+ },
+ [5] = {
+ .qplimit = SZ_64K,
+ },
+ [6] = {
+ .qplimit = SZ_128K,
+ },
+ [7] = {
+ .qplimit = SZ_256K,
+ },
+};
+
+/* types of hmc objects */
+static enum irdma_hmc_rsrc_type iw_hmc_obj_types[] = {
+ IRDMA_HMC_IW_QP,
+ IRDMA_HMC_IW_CQ,
+ IRDMA_HMC_IW_SRQ,
+ IRDMA_HMC_IW_HTE,
+ IRDMA_HMC_IW_ARP,
+ IRDMA_HMC_IW_APBVT_ENTRY,
+ IRDMA_HMC_IW_MR,
+ IRDMA_HMC_IW_XF,
+ IRDMA_HMC_IW_XFFL,
+ IRDMA_HMC_IW_Q1,
+ IRDMA_HMC_IW_Q1FL,
+ IRDMA_HMC_IW_PBLE,
+ IRDMA_HMC_IW_TIMER,
+ IRDMA_HMC_IW_FSIMC,
+ IRDMA_HMC_IW_FSIAV,
+ IRDMA_HMC_IW_RRF,
+ IRDMA_HMC_IW_RRFFL,
+ IRDMA_HMC_IW_HDR,
+ IRDMA_HMC_IW_MD,
+ IRDMA_HMC_IW_OOISC,
+ IRDMA_HMC_IW_OOISCFFL,
+};
+
+/**
+ * irdma_iwarp_ce_handler - handle iwarp completions
+ * @iwcq: iwarp cq receiving event
+ */
+static void irdma_iwarp_ce_handler(struct irdma_sc_cq *iwcq)
+{
+ struct irdma_cq *cq = iwcq->back_cq;
+
+ if (!cq->user_mode)
+ atomic_set(&cq->armed, 0);
+ if (cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
+/**
+ * irdma_puda_ce_handler - handle puda completion events
+ * @rf: RDMA PCI function
+ * @cq: puda completion q for event
+ */
+static void irdma_puda_ce_handler(struct irdma_pci_f *rf,
+ struct irdma_sc_cq *cq)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ u32 compl_error;
+ int status;
+
+ do {
+ status = irdma_puda_poll_cmpl(dev, cq, &compl_error);
+ if (status == -ENOENT)
+ break;
+ if (status) {
+ ibdev_dbg(to_ibdev(dev), "ERR: puda status = %d\n", status);
+ break;
+ }
+ if (compl_error) {
+ ibdev_dbg(to_ibdev(dev), "ERR: puda compl_err =0x%x\n",
+ compl_error);
+ break;
+ }
+ } while (1);
+
+ irdma_sc_ccq_arm(cq);
+}
+
+/**
+ * irdma_process_ceq - handle ceq for completions
+ * @rf: RDMA PCI function
+ * @ceq: ceq having cq for completion
+ */
+static void irdma_process_ceq(struct irdma_pci_f *rf, struct irdma_ceq *ceq)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_sc_ceq *sc_ceq;
+ struct irdma_sc_cq *cq;
+ unsigned long flags;
+
+ sc_ceq = &ceq->sc_ceq;
+ do {
+ spin_lock_irqsave(&ceq->ce_lock, flags);
+ cq = irdma_sc_process_ceq(dev, sc_ceq);
+ if (!cq) {
+ spin_unlock_irqrestore(&ceq->ce_lock, flags);
+ break;
+ }
+
+ if (cq->cq_type == IRDMA_CQ_TYPE_IWARP)
+ irdma_iwarp_ce_handler(cq);
+
+ spin_unlock_irqrestore(&ceq->ce_lock, flags);
+
+ if (cq->cq_type == IRDMA_CQ_TYPE_CQP)
+ queue_work(rf->cqp_cmpl_wq, &rf->cqp_cmpl_work);
+ else if (cq->cq_type == IRDMA_CQ_TYPE_ILQ ||
+ cq->cq_type == IRDMA_CQ_TYPE_IEQ)
+ irdma_puda_ce_handler(rf, cq);
+ } while (1);
+}
+
+static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
+ struct irdma_aeqe_info *info)
+{
+ struct qp_err_code qp_err;
+
+ qp->sq_flush_code = info->sq;
+ qp->rq_flush_code = info->rq;
+ if (qp->qp_uk.uk_attrs->hw_rev >= IRDMA_GEN_3) {
+ if (info->sq) {
+ qp->err_sq_idx_valid = true;
+ qp->err_sq_idx = info->wqe_idx;
+ }
+ if (info->rq) {
+ qp->err_rq_idx_valid = true;
+ qp->err_rq_idx = info->wqe_idx;
+ }
+ }
+
+ qp_err = irdma_ae_to_qp_err_code(info->ae_id);
+ qp->flush_code = qp_err.flush_code;
+ qp->event_type = qp_err.event_type;
+}
+
+/**
+ * irdma_complete_cqp_request - perform post-completion cleanup
+ * @cqp: device CQP
+ * @cqp_request: CQP request
+ *
+ * Mark CQP request as done, wake up waiting thread or invoke
+ * callback function and release/free CQP request.
+ */
+static void irdma_complete_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request)
+{
+ if (cqp_request->waiting) {
+ WRITE_ONCE(cqp_request->request_done, true);
+ wake_up(&cqp_request->waitq);
+ } else if (cqp_request->callback_fcn) {
+ cqp_request->callback_fcn(cqp_request);
+ }
+ irdma_put_cqp_request(cqp, cqp_request);
+}
+
+/**
+ * irdma_process_ae_def_cmpl - handle IRDMA_AE_CQP_DEFERRED_COMPLETE event
+ * @rf: RDMA PCI function
+ * @info: AEQ entry info
+ */
+static void irdma_process_ae_def_cmpl(struct irdma_pci_f *rf,
+ struct irdma_aeqe_info *info)
+{
+ u32 sw_def_info;
+ u64 scratch;
+
+ irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq);
+
+ irdma_sc_cqp_def_cmpl_ae_handler(&rf->sc_dev, info, true,
+ &scratch, &sw_def_info);
+ while (scratch) {
+ struct irdma_cqp_request *cqp_request =
+ (struct irdma_cqp_request *)(uintptr_t)scratch;
+
+ irdma_complete_cqp_request(&rf->cqp, cqp_request);
+ irdma_sc_cqp_def_cmpl_ae_handler(&rf->sc_dev, info, false,
+ &scratch, &sw_def_info);
+ }
+}
+
+/**
+ * irdma_process_aeq - handle aeq events
+ * @rf: RDMA PCI function
+ */
+static void irdma_process_aeq(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_aeq *aeq = &rf->aeq;
+ struct irdma_sc_aeq *sc_aeq = &aeq->sc_aeq;
+ struct irdma_aeqe_info aeinfo;
+ struct irdma_aeqe_info *info = &aeinfo;
+ int ret;
+ struct irdma_qp *iwqp = NULL;
+ struct irdma_cq *iwcq = NULL;
+ struct irdma_sc_qp *qp = NULL;
+ struct irdma_qp_host_ctx_info *ctx_info = NULL;
+ struct irdma_device *iwdev = rf->iwdev;
+ struct irdma_sc_srq *srq;
+ unsigned long flags;
+
+ u32 aeqcnt = 0;
+
+ if (!sc_aeq->size)
+ return;
+
+ do {
+ memset(info, 0, sizeof(*info));
+ ret = irdma_sc_get_next_aeqe(sc_aeq, info);
+ if (ret)
+ break;
+
+ if (info->aeqe_overflow) {
+ ibdev_err(&iwdev->ibdev, "AEQ has overflowed\n");
+ rf->reset = true;
+ rf->gen_ops.request_reset(rf);
+ return;
+ }
+
+ aeqcnt++;
+ ibdev_dbg(&iwdev->ibdev,
+ "AEQ: ae_id = 0x%x bool qp=%d qp_id = %d tcp_state=%d iwarp_state=%d ae_src=%d\n",
+ info->ae_id, info->qp, info->qp_cq_id, info->tcp_state,
+ info->iwarp_state, info->ae_src);
+
+ if (info->qp) {
+ spin_lock_irqsave(&rf->qptable_lock, flags);
+ iwqp = rf->qp_table[info->qp_cq_id];
+ if (!iwqp) {
+ spin_unlock_irqrestore(&rf->qptable_lock,
+ flags);
+ if (info->ae_id == IRDMA_AE_QP_SUSPEND_COMPLETE) {
+ atomic_dec(&iwdev->vsi.qp_suspend_reqs);
+ wake_up(&iwdev->suspend_wq);
+ continue;
+ }
+ ibdev_dbg(&iwdev->ibdev, "AEQ: qp_id %d is already freed\n",
+ info->qp_cq_id);
+ continue;
+ }
+ irdma_qp_add_ref(&iwqp->ibqp);
+ spin_unlock_irqrestore(&rf->qptable_lock, flags);
+ qp = &iwqp->sc_qp;
+ spin_lock_irqsave(&iwqp->lock, flags);
+ iwqp->hw_tcp_state = info->tcp_state;
+ iwqp->hw_iwarp_state = info->iwarp_state;
+ if (info->ae_id != IRDMA_AE_QP_SUSPEND_COMPLETE)
+ iwqp->last_aeq = info->ae_id;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ } else if (info->srq) {
+ if (info->ae_id != IRDMA_AE_SRQ_LIMIT)
+ continue;
+ } else {
+ if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR &&
+ info->ae_id != IRDMA_AE_CQP_DEFERRED_COMPLETE)
+ continue;
+ }
+
+ switch (info->ae_id) {
+ struct irdma_cm_node *cm_node;
+ case IRDMA_AE_LLP_CONNECTION_ESTABLISHED:
+ cm_node = iwqp->cm_node;
+ if (cm_node->accept_pend) {
+ atomic_dec(&cm_node->listener->pend_accepts_cnt);
+ cm_node->accept_pend = 0;
+ }
+ iwqp->rts_ae_rcvd = 1;
+ wake_up_interruptible(&iwqp->waitq);
+ break;
+ case IRDMA_AE_LLP_FIN_RECEIVED:
+ case IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE:
+ if (qp->term_flags)
+ break;
+ if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
+ iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSE_WAIT;
+ if (iwqp->hw_tcp_state == IRDMA_TCP_STATE_CLOSE_WAIT &&
+ iwqp->ibqp_state == IB_QPS_RTS) {
+ irdma_next_iw_state(iwqp,
+ IRDMA_QP_STATE_CLOSING,
+ 0, 0, 0);
+ irdma_cm_disconn(iwqp);
+ }
+ irdma_schedule_cm_timer(iwqp->cm_node,
+ (struct irdma_puda_buf *)iwqp,
+ IRDMA_TIMER_TYPE_CLOSE,
+ 1, 0);
+ }
+ break;
+ case IRDMA_AE_LLP_CLOSE_COMPLETE:
+ if (qp->term_flags)
+ irdma_terminate_done(qp, 0);
+ else
+ irdma_cm_disconn(iwqp);
+ break;
+ case IRDMA_AE_BAD_CLOSE:
+ case IRDMA_AE_RESET_SENT:
+ irdma_next_iw_state(iwqp, IRDMA_QP_STATE_ERROR, 1, 0,
+ 0);
+ irdma_cm_disconn(iwqp);
+ break;
+ case IRDMA_AE_LLP_CONNECTION_RESET:
+ if (atomic_read(&iwqp->close_timer_started))
+ break;
+ irdma_cm_disconn(iwqp);
+ break;
+ case IRDMA_AE_QP_SUSPEND_COMPLETE:
+ if (iwqp->iwdev->vsi.tc_change_pending) {
+ if (!atomic_dec_return(&qp->vsi->qp_suspend_reqs))
+ wake_up(&iwqp->iwdev->suspend_wq);
+ }
+ if (iwqp->suspend_pending) {
+ iwqp->suspend_pending = false;
+ wake_up(&iwqp->iwdev->suspend_wq);
+ }
+ break;
+ case IRDMA_AE_TERMINATE_SENT:
+ irdma_terminate_send_fin(qp);
+ break;
+ case IRDMA_AE_LLP_TERMINATE_RECEIVED:
+ irdma_terminate_received(qp, info);
+ break;
+ case IRDMA_AE_CQ_OPERATION_ERROR:
+ ibdev_err(&iwdev->ibdev,
+ "Processing an iWARP related AE for CQ misc = 0x%04X\n",
+ info->ae_id);
+
+ spin_lock_irqsave(&rf->cqtable_lock, flags);
+ iwcq = rf->cq_table[info->qp_cq_id];
+ if (!iwcq) {
+ spin_unlock_irqrestore(&rf->cqtable_lock,
+ flags);
+ ibdev_dbg(to_ibdev(dev),
+ "cq_id %d is already freed\n", info->qp_cq_id);
+ continue;
+ }
+ irdma_cq_add_ref(&iwcq->ibcq);
+ spin_unlock_irqrestore(&rf->cqtable_lock, flags);
+
+ if (iwcq->ibcq.event_handler) {
+ struct ib_event ibevent;
+
+ ibevent.device = iwcq->ibcq.device;
+ ibevent.event = IB_EVENT_CQ_ERR;
+ ibevent.element.cq = &iwcq->ibcq;
+ iwcq->ibcq.event_handler(&ibevent,
+ iwcq->ibcq.cq_context);
+ }
+ irdma_cq_rem_ref(&iwcq->ibcq);
+ break;
+ case IRDMA_AE_SRQ_LIMIT:
+ srq = (struct irdma_sc_srq *)(uintptr_t)info->compl_ctx;
+ irdma_srq_event(srq);
+ break;
+ case IRDMA_AE_SRQ_CATASTROPHIC_ERROR:
+ break;
+ case IRDMA_AE_CQP_DEFERRED_COMPLETE:
+ /* Remove completed CQP requests from pending list
+ * and notify about those CQP ops completion.
+ */
+ irdma_process_ae_def_cmpl(rf, info);
+ break;
+ case IRDMA_AE_RESET_NOT_SENT:
+ case IRDMA_AE_LLP_DOUBT_REACHABILITY:
+ case IRDMA_AE_RESOURCE_EXHAUSTION:
+ break;
+ case IRDMA_AE_PRIV_OPERATION_DENIED:
+ case IRDMA_AE_STAG_ZERO_INVALID:
+ case IRDMA_AE_IB_RREQ_AND_Q1_FULL:
+ case IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION:
+ case IRDMA_AE_DDP_UBE_INVALID_MO:
+ case IRDMA_AE_DDP_UBE_INVALID_QN:
+ case IRDMA_AE_DDP_NO_L_BIT:
+ case IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION:
+ case IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE:
+ case IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST:
+ case IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
+ case IRDMA_AE_INVALID_ARP_ENTRY:
+ case IRDMA_AE_INVALID_TCP_OPTION_RCVD:
+ case IRDMA_AE_STALE_ARP_ENTRY:
+ case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+ case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
+ case IRDMA_AE_LLP_SYN_RECEIVED:
+ case IRDMA_AE_LLP_TOO_MANY_RETRIES:
+ case IRDMA_AE_LCE_QP_CATASTROPHIC:
+ case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC:
+ case IRDMA_AE_LLP_TOO_MANY_RNRS:
+ case IRDMA_AE_LCE_CQ_CATASTROPHIC:
+ case IRDMA_AE_REMOTE_QP_CATASTROPHIC:
+ case IRDMA_AE_LOCAL_QP_CATASTROPHIC:
+ case IRDMA_AE_RCE_QP_CATASTROPHIC:
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
+ default:
+ ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d, ae_src=%d\n",
+ info->ae_id, info->qp, info->qp_cq_id, info->ae_src);
+ ctx_info = &iwqp->ctx_info;
+ if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1)) {
+ ctx_info->roce_info->err_rq_idx_valid =
+ ctx_info->srq_valid ? false : info->err_rq_idx_valid;
+ if (ctx_info->roce_info->err_rq_idx_valid) {
+ ctx_info->roce_info->err_rq_idx = info->wqe_idx;
+ irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va,
+ ctx_info);
+ }
+ irdma_set_flush_fields(qp, info);
+ irdma_cm_disconn(iwqp);
+ break;
+ }
+ ctx_info->iwarp_info->err_rq_idx_valid = info->rq;
+ if (info->rq) {
+ ctx_info->iwarp_info->err_rq_idx = info->wqe_idx;
+ ctx_info->tcp_info_valid = false;
+ ctx_info->iwarp_info_valid = true;
+ irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va,
+ ctx_info);
+ }
+ if (iwqp->hw_iwarp_state != IRDMA_QP_STATE_RTS &&
+ iwqp->hw_iwarp_state != IRDMA_QP_STATE_TERMINATE) {
+ irdma_next_iw_state(iwqp, IRDMA_QP_STATE_ERROR, 1, 0, 0);
+ irdma_cm_disconn(iwqp);
+ } else {
+ irdma_terminate_connection(qp, info);
+ }
+ break;
+ }
+ if (info->qp)
+ irdma_qp_rem_ref(&iwqp->ibqp);
+ } while (1);
+
+ if (aeqcnt)
+ irdma_sc_repost_aeq_entries(dev, aeqcnt);
+}
+
+/**
+ * irdma_ena_intr - set up device interrupts
+ * @dev: hardware control device structure
+ * @msix_id: id of the interrupt to be enabled
+ */
+static void irdma_ena_intr(struct irdma_sc_dev *dev, u32 msix_id)
+{
+ dev->irq_ops->irdma_en_irq(dev, msix_id);
+}
+
+/**
+ * irdma_dpc - tasklet for aeq and ceq 0
+ * @t: tasklet_struct ptr
+ */
+static void irdma_dpc(struct tasklet_struct *t)
+{
+ struct irdma_pci_f *rf = from_tasklet(rf, t, dpc_tasklet);
+
+ if (rf->msix_shared)
+ irdma_process_ceq(rf, rf->ceqlist);
+ irdma_process_aeq(rf);
+ irdma_ena_intr(&rf->sc_dev, rf->iw_msixtbl[0].idx);
+}
+
+/**
+ * irdma_ceq_dpc - dpc handler for CEQ
+ * @t: tasklet_struct ptr
+ */
+static void irdma_ceq_dpc(struct tasklet_struct *t)
+{
+ struct irdma_ceq *iwceq = from_tasklet(iwceq, t, dpc_tasklet);
+ struct irdma_pci_f *rf = iwceq->rf;
+
+ irdma_process_ceq(rf, iwceq);
+ irdma_ena_intr(&rf->sc_dev, iwceq->msix_idx);
+}
+
+/**
+ * irdma_save_msix_info - copy msix vector information to iwarp device
+ * @rf: RDMA PCI function
+ *
+ * Allocate iwdev msix table and copy the msix info to the table
+ * Return 0 if successful, otherwise return error
+ */
+static int irdma_save_msix_info(struct irdma_pci_f *rf)
+{
+ struct irdma_qvlist_info *iw_qvlist;
+ struct irdma_qv_info *iw_qvinfo;
+ struct msix_entry *pmsix;
+ u32 ceq_idx;
+ u32 i;
+ size_t size;
+
+ if (!rf->msix_count)
+ return -EINVAL;
+
+ size = sizeof(struct irdma_msix_vector) * rf->msix_count;
+ size += struct_size(iw_qvlist, qv_info, rf->msix_count);
+ rf->iw_msixtbl = kzalloc(size, GFP_KERNEL);
+ if (!rf->iw_msixtbl)
+ return -ENOMEM;
+
+ rf->iw_qvlist = (struct irdma_qvlist_info *)
+ (&rf->iw_msixtbl[rf->msix_count]);
+ iw_qvlist = rf->iw_qvlist;
+ iw_qvinfo = iw_qvlist->qv_info;
+ iw_qvlist->num_vectors = rf->msix_count;
+ if (rf->msix_count <= num_online_cpus())
+ rf->msix_shared = true;
+
+ pmsix = rf->msix_entries;
+ for (i = 0, ceq_idx = 0; i < rf->msix_count; i++, iw_qvinfo++) {
+ rf->iw_msixtbl[i].idx = pmsix->entry;
+ rf->iw_msixtbl[i].irq = pmsix->vector;
+ rf->iw_msixtbl[i].cpu_affinity = ceq_idx;
+ if (!i) {
+ iw_qvinfo->aeq_idx = 0;
+ if (rf->msix_shared)
+ iw_qvinfo->ceq_idx = ceq_idx++;
+ else
+ iw_qvinfo->ceq_idx = IRDMA_Q_INVALID_IDX;
+ } else {
+ iw_qvinfo->aeq_idx = IRDMA_Q_INVALID_IDX;
+ iw_qvinfo->ceq_idx = ceq_idx++;
+ }
+ iw_qvinfo->itr_idx = 3;
+ iw_qvinfo->v_idx = rf->iw_msixtbl[i].idx;
+ pmsix++;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_irq_handler - interrupt handler for aeq and ceq0
+ * @irq: Interrupt request number
+ * @data: RDMA PCI function
+ */
+static irqreturn_t irdma_irq_handler(int irq, void *data)
+{
+ struct irdma_pci_f *rf = data;
+
+ tasklet_schedule(&rf->dpc_tasklet);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * irdma_ceq_handler - interrupt handler for ceq
+ * @irq: interrupt request number
+ * @data: ceq pointer
+ */
+static irqreturn_t irdma_ceq_handler(int irq, void *data)
+{
+ struct irdma_ceq *iwceq = data;
+
+ if (iwceq->irq != irq)
+ ibdev_err(to_ibdev(&iwceq->rf->sc_dev), "expected irq = %d received irq = %d\n",
+ iwceq->irq, irq);
+ tasklet_schedule(&iwceq->dpc_tasklet);
+
+ return IRQ_HANDLED;
+}
+
+/**
+ * irdma_destroy_irq - destroy device interrupts
+ * @rf: RDMA PCI function
+ * @msix_vec: msix vector to disable irq
+ * @dev_id: parameter to pass to free_irq (used during irq setup)
+ *
+ * The function is called when destroying aeq/ceq
+ */
+static void irdma_destroy_irq(struct irdma_pci_f *rf,
+ struct irdma_msix_vector *msix_vec, void *dev_id)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+
+ dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx);
+ irq_update_affinity_hint(msix_vec->irq, NULL);
+ free_irq(msix_vec->irq, dev_id);
+ if (rf == dev_id) {
+ tasklet_kill(&rf->dpc_tasklet);
+ } else {
+ struct irdma_ceq *iwceq = (struct irdma_ceq *)dev_id;
+
+ tasklet_kill(&iwceq->dpc_tasklet);
+ }
+}
+
+/**
+ * irdma_destroy_cqp - destroy control qp
+ * @rf: RDMA PCI function
+ *
+ * Issue destroy cqp request and
+ * free the resources associated with the cqp
+ */
+static void irdma_destroy_cqp(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_cqp *cqp = &rf->cqp;
+ int status = 0;
+
+ status = irdma_sc_cqp_destroy(dev->cqp);
+ if (status)
+ ibdev_dbg(to_ibdev(dev), "ERR: Destroy CQP failed %d\n", status);
+
+ irdma_cleanup_pending_cqp_op(rf);
+ dma_free_coherent(dev->hw->device, cqp->sq.size, cqp->sq.va,
+ cqp->sq.pa);
+ cqp->sq.va = NULL;
+ kfree(cqp->oop_op_array);
+ cqp->oop_op_array = NULL;
+ kfree(cqp->scratch_array);
+ cqp->scratch_array = NULL;
+ kfree(cqp->cqp_requests);
+ cqp->cqp_requests = NULL;
+}
+
+static void irdma_destroy_virt_aeq(struct irdma_pci_f *rf)
+{
+ struct irdma_aeq *aeq = &rf->aeq;
+ u32 pg_cnt = DIV_ROUND_UP(aeq->mem.size, PAGE_SIZE);
+ dma_addr_t *pg_arr = (dma_addr_t *)aeq->palloc.level1.addr;
+
+ irdma_unmap_vm_page_list(&rf->hw, pg_arr, pg_cnt);
+ irdma_free_pble(rf->pble_rsrc, &aeq->palloc);
+ vfree(aeq->mem.va);
+}
+
+/**
+ * irdma_destroy_aeq - destroy aeq
+ * @rf: RDMA PCI function
+ *
+ * Issue a destroy aeq request and
+ * free the resources associated with the aeq
+ * The function is called during driver unload
+ */
+static void irdma_destroy_aeq(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_aeq *aeq = &rf->aeq;
+ int status = -EBUSY;
+
+ if (!rf->msix_shared) {
+ if (rf->sc_dev.privileged)
+ rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev,
+ rf->iw_msixtbl->idx, false);
+ irdma_destroy_irq(rf, rf->iw_msixtbl, rf);
+ }
+ if (rf->reset)
+ goto exit;
+
+ aeq->sc_aeq.size = 0;
+ status = irdma_cqp_aeq_cmd(dev, &aeq->sc_aeq, IRDMA_OP_AEQ_DESTROY);
+ if (status)
+ ibdev_dbg(to_ibdev(dev), "ERR: Destroy AEQ failed %d\n", status);
+
+exit:
+ if (aeq->virtual_map) {
+ irdma_destroy_virt_aeq(rf);
+ } else {
+ dma_free_coherent(dev->hw->device, aeq->mem.size, aeq->mem.va,
+ aeq->mem.pa);
+ aeq->mem.va = NULL;
+ }
+}
+
+/**
+ * irdma_destroy_ceq - destroy ceq
+ * @rf: RDMA PCI function
+ * @iwceq: ceq to be destroyed
+ *
+ * Issue a destroy ceq request and
+ * free the resources associated with the ceq
+ */
+static void irdma_destroy_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ int status;
+
+ if (rf->reset)
+ goto exit;
+
+ status = irdma_sc_ceq_destroy(&iwceq->sc_ceq, 0, 1);
+ if (status) {
+ ibdev_dbg(to_ibdev(dev), "ERR: CEQ destroy command failed %d\n", status);
+ goto exit;
+ }
+
+ status = irdma_sc_cceq_destroy_done(&iwceq->sc_ceq);
+ if (status)
+ ibdev_dbg(to_ibdev(dev), "ERR: CEQ destroy completion failed %d\n",
+ status);
+exit:
+ dma_free_coherent(dev->hw->device, iwceq->mem.size, iwceq->mem.va,
+ iwceq->mem.pa);
+ iwceq->mem.va = NULL;
+}
+
+/**
+ * irdma_del_ceq_0 - destroy ceq 0
+ * @rf: RDMA PCI function
+ *
+ * Disable the ceq 0 interrupt and destroy the ceq 0
+ */
+static void irdma_del_ceq_0(struct irdma_pci_f *rf)
+{
+ struct irdma_ceq *iwceq = rf->ceqlist;
+ struct irdma_msix_vector *msix_vec;
+
+ if (rf->msix_shared) {
+ msix_vec = &rf->iw_msixtbl[0];
+ if (rf->sc_dev.privileged)
+ rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev,
+ msix_vec->ceq_id,
+ msix_vec->idx, false);
+ irdma_destroy_irq(rf, msix_vec, rf);
+ } else {
+ msix_vec = &rf->iw_msixtbl[1];
+ irdma_destroy_irq(rf, msix_vec, iwceq);
+ }
+
+ irdma_destroy_ceq(rf, iwceq);
+ rf->sc_dev.ceq_valid = false;
+ rf->ceqs_count = 0;
+}
+
+/**
+ * irdma_del_ceqs - destroy all ceq's except CEQ 0
+ * @rf: RDMA PCI function
+ *
+ * Go through all of the device ceq's, except 0, and for each
+ * ceq disable the ceq interrupt and destroy the ceq
+ */
+static void irdma_del_ceqs(struct irdma_pci_f *rf)
+{
+ struct irdma_ceq *iwceq = &rf->ceqlist[1];
+ struct irdma_msix_vector *msix_vec;
+ u32 i = 0;
+
+ if (rf->msix_shared)
+ msix_vec = &rf->iw_msixtbl[1];
+ else
+ msix_vec = &rf->iw_msixtbl[2];
+
+ for (i = 1; i < rf->ceqs_count; i++, msix_vec++, iwceq++) {
+ if (rf->sc_dev.privileged)
+ rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev,
+ msix_vec->ceq_id,
+ msix_vec->idx, false);
+ irdma_destroy_irq(rf, msix_vec, iwceq);
+ irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq,
+ IRDMA_OP_CEQ_DESTROY);
+ dma_free_coherent(rf->sc_dev.hw->device, iwceq->mem.size,
+ iwceq->mem.va, iwceq->mem.pa);
+ iwceq->mem.va = NULL;
+ }
+ rf->ceqs_count = 1;
+}
+
+/**
+ * irdma_destroy_ccq - destroy control cq
+ * @rf: RDMA PCI function
+ *
+ * Issue destroy ccq request and
+ * free the resources associated with the ccq
+ */
+static void irdma_destroy_ccq(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_ccq *ccq = &rf->ccq;
+ int status = 0;
+
+ if (rf->cqp_cmpl_wq)
+ destroy_workqueue(rf->cqp_cmpl_wq);
+
+ if (!rf->reset)
+ status = irdma_sc_ccq_destroy(dev->ccq, 0, true);
+ if (status)
+ ibdev_dbg(to_ibdev(dev), "ERR: CCQ destroy failed %d\n", status);
+ dma_free_coherent(dev->hw->device, ccq->mem_cq.size, ccq->mem_cq.va,
+ ccq->mem_cq.pa);
+ ccq->mem_cq.va = NULL;
+}
+
+/**
+ * irdma_close_hmc_objects_type - delete hmc objects of a given type
+ * @dev: iwarp device
+ * @obj_type: the hmc object type to be deleted
+ * @hmc_info: host memory info struct
+ * @privileged: permission to close HMC objects
+ * @reset: true if called before reset
+ */
+static void irdma_close_hmc_objects_type(struct irdma_sc_dev *dev,
+ enum irdma_hmc_rsrc_type obj_type,
+ struct irdma_hmc_info *hmc_info,
+ bool privileged, bool reset)
+{
+ struct irdma_hmc_del_obj_info info = {};
+
+ info.hmc_info = hmc_info;
+ info.rsrc_type = obj_type;
+ info.count = hmc_info->hmc_obj[obj_type].cnt;
+ info.privileged = privileged;
+ if (irdma_sc_del_hmc_obj(dev, &info, reset))
+ ibdev_dbg(to_ibdev(dev), "ERR: del HMC obj of type %d failed\n",
+ obj_type);
+}
+
+/**
+ * irdma_del_hmc_objects - remove all device hmc objects
+ * @dev: iwarp device
+ * @hmc_info: hmc_info to free
+ * @privileged: permission to delete HMC objects
+ * @reset: true if called before reset
+ * @vers: hardware version
+ */
+static void irdma_del_hmc_objects(struct irdma_sc_dev *dev,
+ struct irdma_hmc_info *hmc_info, bool privileged,
+ bool reset, enum irdma_vers vers)
+{
+ unsigned int i;
+
+ for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) {
+ if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt)
+ irdma_close_hmc_objects_type(dev, iw_hmc_obj_types[i],
+ hmc_info, privileged, reset);
+ if (vers == IRDMA_GEN_1 && i == IRDMA_HMC_IW_TIMER)
+ break;
+ }
+}
+
+/**
+ * irdma_create_hmc_obj_type - create hmc object of a given type
+ * @dev: hardware control device structure
+ * @info: information for the hmc object to create
+ */
+static int irdma_create_hmc_obj_type(struct irdma_sc_dev *dev,
+ struct irdma_hmc_create_obj_info *info)
+{
+ return irdma_sc_create_hmc_obj(dev, info);
+}
+
+/**
+ * irdma_create_hmc_objs - create all hmc objects for the device
+ * @rf: RDMA PCI function
+ * @privileged: permission to create HMC objects
+ * @vers: HW version
+ *
+ * Create the device hmc objects and allocate hmc pages
+ * Return 0 if successful, otherwise clean up and return error
+ */
+static int irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged,
+ enum irdma_vers vers)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_hmc_create_obj_info info = {};
+ int i, status = 0;
+
+ info.hmc_info = dev->hmc_info;
+ info.privileged = privileged;
+ info.entry_type = rf->sd_type;
+
+ for (i = 0; i < IW_HMC_OBJ_TYPE_NUM; i++) {
+ if (iw_hmc_obj_types[i] == IRDMA_HMC_IW_PBLE)
+ continue;
+ if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt) {
+ info.rsrc_type = iw_hmc_obj_types[i];
+ info.count = dev->hmc_info->hmc_obj[info.rsrc_type].cnt;
+ info.add_sd_cnt = 0;
+ status = irdma_create_hmc_obj_type(dev, &info);
+ if (status) {
+ ibdev_dbg(to_ibdev(dev),
+ "ERR: create obj type %d status = %d\n",
+ iw_hmc_obj_types[i], status);
+ break;
+ }
+ }
+ if (vers == IRDMA_GEN_1 && i == IRDMA_HMC_IW_TIMER)
+ break;
+ }
+
+ if (!status)
+ return irdma_sc_static_hmc_pages_allocated(dev->cqp, 0, dev->hmc_fn_id,
+ true, true);
+
+ while (i) {
+ i--;
+ /* destroy the hmc objects of a given type */
+ if (dev->hmc_info->hmc_obj[iw_hmc_obj_types[i]].cnt)
+ irdma_close_hmc_objects_type(dev, iw_hmc_obj_types[i],
+ dev->hmc_info, privileged,
+ false);
+ }
+
+ return status;
+}
+
+/**
+ * irdma_obj_aligned_mem - get aligned memory from device allocated memory
+ * @rf: RDMA PCI function
+ * @memptr: points to the memory addresses
+ * @size: size of memory needed
+ * @mask: mask for the aligned memory
+ *
+ * Get aligned memory of the requested size and
+ * update the memptr to point to the new aligned memory
+ * Return 0 if successful, otherwise return no memory error
+ */
+static int irdma_obj_aligned_mem(struct irdma_pci_f *rf,
+ struct irdma_dma_mem *memptr, u32 size,
+ u32 mask)
+{
+ unsigned long va, newva;
+ unsigned long extra;
+
+ va = (unsigned long)rf->obj_next.va;
+ newva = va;
+ if (mask)
+ newva = ALIGN(va, (unsigned long)mask + 1ULL);
+ extra = newva - va;
+ memptr->va = (u8 *)va + extra;
+ memptr->pa = rf->obj_next.pa + extra;
+ memptr->size = size;
+ if (((u8 *)memptr->va + size) > ((u8 *)rf->obj_mem.va + rf->obj_mem.size))
+ return -ENOMEM;
+
+ rf->obj_next.va = (u8 *)memptr->va + size;
+ rf->obj_next.pa = memptr->pa + size;
+
+ return 0;
+}
+
+/**
+ * irdma_create_cqp - create control qp
+ * @rf: RDMA PCI function
+ *
+ * Return 0, if the cqp and all the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static int irdma_create_cqp(struct irdma_pci_f *rf)
+{
+ u32 sqsize = IRDMA_CQP_SW_SQSIZE_2048;
+ struct irdma_dma_mem mem;
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_cqp_init_info cqp_init_info = {};
+ struct irdma_cqp *cqp = &rf->cqp;
+ u16 maj_err, min_err;
+ int i, status;
+
+ cqp->cqp_requests = kcalloc(sqsize, sizeof(*cqp->cqp_requests), GFP_KERNEL);
+ if (!cqp->cqp_requests)
+ return -ENOMEM;
+
+ cqp->scratch_array = kcalloc(sqsize, sizeof(*cqp->scratch_array), GFP_KERNEL);
+ if (!cqp->scratch_array) {
+ status = -ENOMEM;
+ goto err_scratch;
+ }
+
+ cqp->oop_op_array = kcalloc(sqsize, sizeof(*cqp->oop_op_array),
+ GFP_KERNEL);
+ if (!cqp->oop_op_array) {
+ status = -ENOMEM;
+ goto err_oop;
+ }
+ cqp_init_info.ooo_op_array = cqp->oop_op_array;
+ dev->cqp = &cqp->sc_cqp;
+ dev->cqp->dev = dev;
+ cqp->sq.size = ALIGN(sizeof(struct irdma_cqp_sq_wqe) * sqsize,
+ IRDMA_CQP_ALIGNMENT);
+ cqp->sq.va = dma_alloc_coherent(dev->hw->device, cqp->sq.size,
+ &cqp->sq.pa, GFP_KERNEL);
+ if (!cqp->sq.va) {
+ status = -ENOMEM;
+ goto err_sq;
+ }
+
+ status = irdma_obj_aligned_mem(rf, &mem, sizeof(struct irdma_cqp_ctx),
+ IRDMA_HOST_CTX_ALIGNMENT_M);
+ if (status)
+ goto err_ctx;
+
+ dev->cqp->host_ctx_pa = mem.pa;
+ dev->cqp->host_ctx = mem.va;
+ /* populate the cqp init info */
+ cqp_init_info.dev = dev;
+ cqp_init_info.sq_size = sqsize;
+ cqp_init_info.sq = cqp->sq.va;
+ cqp_init_info.sq_pa = cqp->sq.pa;
+ cqp_init_info.host_ctx_pa = mem.pa;
+ cqp_init_info.host_ctx = mem.va;
+ cqp_init_info.hmc_profile = rf->rsrc_profile;
+ cqp_init_info.scratch_array = cqp->scratch_array;
+ cqp_init_info.protocol_used = rf->protocol_used;
+
+ switch (rf->rdma_ver) {
+ case IRDMA_GEN_1:
+ cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_1;
+ break;
+ case IRDMA_GEN_2:
+ cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_2;
+ break;
+ case IRDMA_GEN_3:
+ cqp_init_info.hw_maj_ver = IRDMA_CQPHC_HW_MAJVER_GEN_3;
+ cqp_init_info.ts_override = 1;
+ break;
+ }
+ status = irdma_sc_cqp_init(dev->cqp, &cqp_init_info);
+ if (status) {
+ ibdev_dbg(to_ibdev(dev), "ERR: cqp init status %d\n", status);
+ goto err_ctx;
+ }
+
+ spin_lock_init(&cqp->req_lock);
+ spin_lock_init(&cqp->compl_lock);
+
+ status = irdma_sc_cqp_create(dev->cqp, &maj_err, &min_err);
+ if (status) {
+ ibdev_dbg(to_ibdev(dev),
+ "ERR: cqp create failed - status %d maj_err %d min_err %d\n",
+ status, maj_err, min_err);
+ goto err_ctx;
+ }
+
+ INIT_LIST_HEAD(&cqp->cqp_avail_reqs);
+ INIT_LIST_HEAD(&cqp->cqp_pending_reqs);
+
+ /* init the waitqueue of the cqp_requests and add them to the list */
+ for (i = 0; i < sqsize; i++) {
+ init_waitqueue_head(&cqp->cqp_requests[i].waitq);
+ list_add_tail(&cqp->cqp_requests[i].list, &cqp->cqp_avail_reqs);
+ }
+ init_waitqueue_head(&cqp->remove_wq);
+ return 0;
+
+err_ctx:
+ dma_free_coherent(dev->hw->device, cqp->sq.size,
+ cqp->sq.va, cqp->sq.pa);
+ cqp->sq.va = NULL;
+err_sq:
+ kfree(cqp->oop_op_array);
+ cqp->oop_op_array = NULL;
+err_oop:
+ kfree(cqp->scratch_array);
+ cqp->scratch_array = NULL;
+err_scratch:
+ kfree(cqp->cqp_requests);
+ cqp->cqp_requests = NULL;
+
+ return status;
+}
+
+/**
+ * irdma_create_ccq - create control cq
+ * @rf: RDMA PCI function
+ *
+ * Return 0, if the ccq and the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static int irdma_create_ccq(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_ccq_init_info info = {};
+ struct irdma_ccq *ccq = &rf->ccq;
+ int ccq_size;
+ int status;
+
+ dev->ccq = &ccq->sc_cq;
+ dev->ccq->dev = dev;
+ info.dev = dev;
+ ccq_size = (rf->rdma_ver >= IRDMA_GEN_3) ? IW_GEN_3_CCQ_SIZE : IW_CCQ_SIZE;
+ ccq->shadow_area.size = sizeof(struct irdma_cq_shadow_area);
+ ccq->mem_cq.size = ALIGN(sizeof(struct irdma_cqe) * ccq_size,
+ IRDMA_CQ0_ALIGNMENT);
+ ccq->mem_cq.va = dma_alloc_coherent(dev->hw->device, ccq->mem_cq.size,
+ &ccq->mem_cq.pa, GFP_KERNEL);
+ if (!ccq->mem_cq.va)
+ return -ENOMEM;
+
+ status = irdma_obj_aligned_mem(rf, &ccq->shadow_area,
+ ccq->shadow_area.size,
+ IRDMA_SHADOWAREA_M);
+ if (status)
+ goto exit;
+
+ ccq->sc_cq.back_cq = ccq;
+ /* populate the ccq init info */
+ info.cq_base = ccq->mem_cq.va;
+ info.cq_pa = ccq->mem_cq.pa;
+ info.num_elem = ccq_size;
+ info.shadow_area = ccq->shadow_area.va;
+ info.shadow_area_pa = ccq->shadow_area.pa;
+ info.ceqe_mask = false;
+ info.ceq_id_valid = true;
+ info.shadow_read_threshold = 16;
+ info.vsi = &rf->default_vsi;
+ status = irdma_sc_ccq_init(dev->ccq, &info);
+ if (!status)
+ status = irdma_sc_ccq_create(dev->ccq, 0, true, true);
+exit:
+ if (status) {
+ dma_free_coherent(dev->hw->device, ccq->mem_cq.size,
+ ccq->mem_cq.va, ccq->mem_cq.pa);
+ ccq->mem_cq.va = NULL;
+ }
+
+ return status;
+}
+
+/**
+ * irdma_alloc_set_mac - set up a mac address table entry
+ * @iwdev: irdma device
+ *
+ * Allocate a mac ip entry and add it to the hw table Return 0
+ * if successful, otherwise return error
+ */
+static int irdma_alloc_set_mac(struct irdma_device *iwdev)
+{
+ int status;
+
+ status = irdma_alloc_local_mac_entry(iwdev->rf,
+ &iwdev->mac_ip_table_idx);
+ if (!status) {
+ status = irdma_add_local_mac_entry(iwdev->rf,
+ (const u8 *)iwdev->netdev->dev_addr,
+ (u8)iwdev->mac_ip_table_idx);
+ if (status)
+ irdma_del_local_mac_entry(iwdev->rf,
+ (u8)iwdev->mac_ip_table_idx);
+ }
+ return status;
+}
+
+/**
+ * irdma_cfg_ceq_vector - set up the msix interrupt vector for
+ * ceq
+ * @rf: RDMA PCI function
+ * @iwceq: ceq associated with the vector
+ * @ceq_id: the id number of the iwceq
+ * @msix_vec: interrupt vector information
+ *
+ * Allocate interrupt resources and enable irq handling
+ * Return 0 if successful, otherwise return error
+ */
+static int irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
+ u32 ceq_id, struct irdma_msix_vector *msix_vec)
+{
+ int status;
+
+ if (rf->msix_shared && !ceq_id) {
+ snprintf(msix_vec->name, sizeof(msix_vec->name) - 1,
+ "irdma-%s-AEQCEQ-0", dev_name(&rf->pcidev->dev));
+ tasklet_setup(&rf->dpc_tasklet, irdma_dpc);
+ status = request_irq(msix_vec->irq, irdma_irq_handler, 0,
+ msix_vec->name, rf);
+ } else {
+ snprintf(msix_vec->name, sizeof(msix_vec->name) - 1,
+ "irdma-%s-CEQ-%d",
+ dev_name(&rf->pcidev->dev), ceq_id);
+ tasklet_setup(&iwceq->dpc_tasklet, irdma_ceq_dpc);
+
+ status = request_irq(msix_vec->irq, irdma_ceq_handler, 0,
+ msix_vec->name, iwceq);
+ }
+ cpumask_clear(&msix_vec->mask);
+ cpumask_set_cpu(msix_vec->cpu_affinity, &msix_vec->mask);
+ irq_update_affinity_hint(msix_vec->irq, &msix_vec->mask);
+ if (status) {
+ ibdev_dbg(&rf->iwdev->ibdev, "ERR: ceq irq config fail\n");
+ return status;
+ }
+
+ msix_vec->ceq_id = ceq_id;
+ if (rf->sc_dev.privileged)
+ rf->sc_dev.irq_ops->irdma_cfg_ceq(&rf->sc_dev, ceq_id,
+ msix_vec->idx, true);
+ else
+ status = irdma_vchnl_req_ceq_vec_map(&rf->sc_dev, ceq_id,
+ msix_vec->idx);
+ return status;
+}
+
+/**
+ * irdma_cfg_aeq_vector - set up the msix vector for aeq
+ * @rf: RDMA PCI function
+ *
+ * Allocate interrupt resources and enable irq handling
+ * Return 0 if successful, otherwise return error
+ */
+static int irdma_cfg_aeq_vector(struct irdma_pci_f *rf)
+{
+ struct irdma_msix_vector *msix_vec = rf->iw_msixtbl;
+ int ret = 0;
+
+ if (!rf->msix_shared) {
+ snprintf(msix_vec->name, sizeof(msix_vec->name) - 1,
+ "irdma-%s-AEQ", dev_name(&rf->pcidev->dev));
+ tasklet_setup(&rf->dpc_tasklet, irdma_dpc);
+ ret = request_irq(msix_vec->irq, irdma_irq_handler, 0,
+ msix_vec->name, rf);
+ }
+ if (ret) {
+ ibdev_dbg(&rf->iwdev->ibdev, "ERR: aeq irq config fail\n");
+ return ret;
+ }
+
+ if (rf->sc_dev.privileged)
+ rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, msix_vec->idx,
+ true);
+ else
+ ret = irdma_vchnl_req_aeq_vec_map(&rf->sc_dev, msix_vec->idx);
+
+ return ret;
+}
+
+/**
+ * irdma_create_ceq - create completion event queue
+ * @rf: RDMA PCI function
+ * @iwceq: pointer to the ceq resources to be created
+ * @ceq_id: the id number of the iwceq
+ * @vsi_idx: vsi idx
+ *
+ * Return 0, if the ceq and the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
+ u32 ceq_id, u16 vsi_idx)
+{
+ int status;
+ struct irdma_ceq_init_info info = {};
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ u32 ceq_size;
+
+ info.ceq_id = ceq_id;
+ iwceq->rf = rf;
+ ceq_size = min(rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt,
+ dev->hw_attrs.max_hw_ceq_size);
+ iwceq->mem.size = ALIGN(sizeof(struct irdma_ceqe) * ceq_size,
+ IRDMA_CEQ_ALIGNMENT);
+ iwceq->mem.va = dma_alloc_coherent(dev->hw->device, iwceq->mem.size,
+ &iwceq->mem.pa, GFP_KERNEL);
+ if (!iwceq->mem.va)
+ return -ENOMEM;
+
+ info.ceq_id = ceq_id;
+ info.ceqe_base = iwceq->mem.va;
+ info.ceqe_pa = iwceq->mem.pa;
+ info.elem_cnt = ceq_size;
+ iwceq->sc_ceq.ceq_id = ceq_id;
+ info.dev = dev;
+ info.vsi_idx = vsi_idx;
+ status = irdma_sc_ceq_init(&iwceq->sc_ceq, &info);
+ if (!status) {
+ if (dev->ceq_valid)
+ status = irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq,
+ IRDMA_OP_CEQ_CREATE);
+ else
+ status = irdma_sc_cceq_create(&iwceq->sc_ceq, 0);
+ }
+
+ if (status) {
+ dma_free_coherent(dev->hw->device, iwceq->mem.size,
+ iwceq->mem.va, iwceq->mem.pa);
+ iwceq->mem.va = NULL;
+ }
+
+ return status;
+}
+
+/**
+ * irdma_setup_ceq_0 - create CEQ 0 and it's interrupt resource
+ * @rf: RDMA PCI function
+ *
+ * Allocate a list for all device completion event queues
+ * Create the ceq 0 and configure it's msix interrupt vector
+ * Return 0, if successfully set up, otherwise return error
+ */
+static int irdma_setup_ceq_0(struct irdma_pci_f *rf)
+{
+ struct irdma_ceq *iwceq;
+ struct irdma_msix_vector *msix_vec;
+ u32 i;
+ int status = 0;
+ u32 num_ceqs;
+
+ num_ceqs = min(rf->msix_count, rf->sc_dev.hmc_fpm_misc.max_ceqs);
+ rf->ceqlist = kcalloc(num_ceqs, sizeof(*rf->ceqlist), GFP_KERNEL);
+ if (!rf->ceqlist) {
+ status = -ENOMEM;
+ goto exit;
+ }
+
+ iwceq = &rf->ceqlist[0];
+ status = irdma_create_ceq(rf, iwceq, 0, rf->default_vsi.vsi_idx);
+ if (status) {
+ ibdev_dbg(&rf->iwdev->ibdev, "ERR: create ceq status = %d\n",
+ status);
+ goto exit;
+ }
+
+ spin_lock_init(&iwceq->ce_lock);
+ i = rf->msix_shared ? 0 : 1;
+ msix_vec = &rf->iw_msixtbl[i];
+ iwceq->irq = msix_vec->irq;
+ iwceq->msix_idx = msix_vec->idx;
+ status = irdma_cfg_ceq_vector(rf, iwceq, 0, msix_vec);
+ if (status) {
+ irdma_destroy_ceq(rf, iwceq);
+ goto exit;
+ }
+
+ irdma_ena_intr(&rf->sc_dev, msix_vec->idx);
+ rf->ceqs_count++;
+
+exit:
+ if (status && !rf->ceqs_count) {
+ kfree(rf->ceqlist);
+ rf->ceqlist = NULL;
+ return status;
+ }
+ rf->sc_dev.ceq_valid = true;
+
+ return 0;
+}
+
+/**
+ * irdma_setup_ceqs - manage the device ceq's and their interrupt resources
+ * @rf: RDMA PCI function
+ * @vsi_idx: vsi_idx for this CEQ
+ *
+ * Allocate a list for all device completion event queues
+ * Create the ceq's and configure their msix interrupt vectors
+ * Return 0, if ceqs are successfully set up, otherwise return error
+ */
+static int irdma_setup_ceqs(struct irdma_pci_f *rf, u16 vsi_idx)
+{
+ u32 i;
+ u32 ceq_id;
+ struct irdma_ceq *iwceq;
+ struct irdma_msix_vector *msix_vec;
+ int status;
+ u32 num_ceqs;
+
+ num_ceqs = min(rf->msix_count, rf->sc_dev.hmc_fpm_misc.max_ceqs);
+ i = (rf->msix_shared) ? 1 : 2;
+ for (ceq_id = 1; i < num_ceqs; i++, ceq_id++) {
+ iwceq = &rf->ceqlist[ceq_id];
+ status = irdma_create_ceq(rf, iwceq, ceq_id, vsi_idx);
+ if (status) {
+ ibdev_dbg(&rf->iwdev->ibdev,
+ "ERR: create ceq status = %d\n", status);
+ goto del_ceqs;
+ }
+ spin_lock_init(&iwceq->ce_lock);
+ msix_vec = &rf->iw_msixtbl[i];
+ iwceq->irq = msix_vec->irq;
+ iwceq->msix_idx = msix_vec->idx;
+ status = irdma_cfg_ceq_vector(rf, iwceq, ceq_id, msix_vec);
+ if (status) {
+ irdma_destroy_ceq(rf, iwceq);
+ goto del_ceqs;
+ }
+ irdma_ena_intr(&rf->sc_dev, msix_vec->idx);
+ rf->ceqs_count++;
+ }
+
+ return 0;
+
+del_ceqs:
+ irdma_del_ceqs(rf);
+
+ return status;
+}
+
+static int irdma_create_virt_aeq(struct irdma_pci_f *rf, u32 size)
+{
+ struct irdma_aeq *aeq = &rf->aeq;
+ dma_addr_t *pg_arr;
+ u32 pg_cnt;
+ int status;
+
+ if (rf->rdma_ver < IRDMA_GEN_2)
+ return -EOPNOTSUPP;
+
+ aeq->mem.size = sizeof(struct irdma_sc_aeqe) * size;
+ aeq->mem.va = vzalloc(aeq->mem.size);
+
+ if (!aeq->mem.va)
+ return -ENOMEM;
+
+ pg_cnt = DIV_ROUND_UP(aeq->mem.size, PAGE_SIZE);
+ status = irdma_get_pble(rf->pble_rsrc, &aeq->palloc, pg_cnt, true);
+ if (status) {
+ vfree(aeq->mem.va);
+ return status;
+ }
+
+ pg_arr = (dma_addr_t *)aeq->palloc.level1.addr;
+ status = irdma_map_vm_page_list(&rf->hw, aeq->mem.va, pg_arr, pg_cnt);
+ if (status) {
+ irdma_free_pble(rf->pble_rsrc, &aeq->palloc);
+ vfree(aeq->mem.va);
+ return status;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_create_aeq - create async event queue
+ * @rf: RDMA PCI function
+ *
+ * Return 0, if the aeq and the resources associated with it
+ * are successfully created, otherwise return error
+ */
+static int irdma_create_aeq(struct irdma_pci_f *rf)
+{
+ struct irdma_aeq_init_info info = {};
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_aeq *aeq = &rf->aeq;
+ struct irdma_hmc_info *hmc_info = rf->sc_dev.hmc_info;
+ u32 aeq_size;
+ u8 multiplier = (rf->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) ? 2 : 1;
+ int status;
+
+ aeq_size = multiplier * hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt +
+ hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt;
+ aeq_size = min(aeq_size, dev->hw_attrs.max_hw_aeq_size);
+ /* GEN_3 does not support virtual AEQ. Cap at max Kernel alloc size */
+ if (rf->rdma_ver == IRDMA_GEN_3)
+ aeq_size = min(aeq_size, (u32)((PAGE_SIZE << MAX_PAGE_ORDER) /
+ sizeof(struct irdma_sc_aeqe)));
+ aeq->mem.size = ALIGN(sizeof(struct irdma_sc_aeqe) * aeq_size,
+ IRDMA_AEQ_ALIGNMENT);
+ aeq->mem.va = dma_alloc_coherent(dev->hw->device, aeq->mem.size,
+ &aeq->mem.pa,
+ GFP_KERNEL | __GFP_NOWARN);
+ if (aeq->mem.va)
+ goto skip_virt_aeq;
+ else if (rf->rdma_ver == IRDMA_GEN_3)
+ return -ENOMEM;
+
+ /* physically mapped aeq failed. setup virtual aeq */
+ status = irdma_create_virt_aeq(rf, aeq_size);
+ if (status)
+ return status;
+
+ info.virtual_map = true;
+ aeq->virtual_map = info.virtual_map;
+ info.pbl_chunk_size = 1;
+ info.first_pm_pbl_idx = aeq->palloc.level1.idx;
+
+skip_virt_aeq:
+ info.aeqe_base = aeq->mem.va;
+ info.aeq_elem_pa = aeq->mem.pa;
+ info.elem_cnt = aeq_size;
+ info.dev = dev;
+ info.msix_idx = rf->iw_msixtbl->idx;
+ status = irdma_sc_aeq_init(&aeq->sc_aeq, &info);
+ if (status)
+ goto err;
+
+ status = irdma_cqp_aeq_cmd(dev, &aeq->sc_aeq, IRDMA_OP_AEQ_CREATE);
+ if (status)
+ goto err;
+
+ return 0;
+
+err:
+ if (aeq->virtual_map) {
+ irdma_destroy_virt_aeq(rf);
+ } else {
+ dma_free_coherent(dev->hw->device, aeq->mem.size, aeq->mem.va,
+ aeq->mem.pa);
+ aeq->mem.va = NULL;
+ }
+
+ return status;
+}
+
+/**
+ * irdma_setup_aeq - set up the device aeq
+ * @rf: RDMA PCI function
+ *
+ * Create the aeq and configure its msix interrupt vector
+ * Return 0 if successful, otherwise return error
+ */
+static int irdma_setup_aeq(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ int status;
+
+ status = irdma_create_aeq(rf);
+ if (status)
+ return status;
+
+ status = irdma_cfg_aeq_vector(rf);
+ if (status) {
+ irdma_destroy_aeq(rf);
+ return status;
+ }
+
+ if (!rf->msix_shared)
+ irdma_ena_intr(dev, rf->iw_msixtbl[0].idx);
+
+ return 0;
+}
+
+/**
+ * irdma_initialize_ilq - create iwarp local queue for cm
+ * @iwdev: irdma device
+ *
+ * Return 0 if successful, otherwise return error
+ */
+static int irdma_initialize_ilq(struct irdma_device *iwdev)
+{
+ struct irdma_puda_rsrc_info info = {};
+ int status;
+
+ info.type = IRDMA_PUDA_RSRC_TYPE_ILQ;
+ info.cq_id = 1;
+ info.qp_id = 1;
+ info.count = 1;
+ info.pd_id = 1;
+ info.abi_ver = IRDMA_ABI_VER;
+ info.sq_size = min(iwdev->rf->max_qp / 2, (u32)32768);
+ info.rq_size = info.sq_size;
+ info.buf_size = 1024;
+ info.tx_buf_cnt = 2 * info.sq_size;
+ info.receive = irdma_receive_ilq;
+ info.xmit_complete = irdma_free_sqbuf;
+ status = irdma_puda_create_rsrc(&iwdev->vsi, &info);
+ if (status)
+ ibdev_dbg(&iwdev->ibdev, "ERR: ilq create fail\n");
+
+ return status;
+}
+
+/**
+ * irdma_initialize_ieq - create iwarp exception queue
+ * @iwdev: irdma device
+ *
+ * Return 0 if successful, otherwise return error
+ */
+static int irdma_initialize_ieq(struct irdma_device *iwdev)
+{
+ struct irdma_puda_rsrc_info info = {};
+ int status;
+
+ info.type = IRDMA_PUDA_RSRC_TYPE_IEQ;
+ info.cq_id = 2;
+ info.qp_id = iwdev->vsi.exception_lan_q;
+ info.count = 1;
+ info.pd_id = 2;
+ info.abi_ver = IRDMA_ABI_VER;
+ info.sq_size = min(iwdev->rf->max_qp / 2, (u32)32768);
+ info.rq_size = info.sq_size;
+ info.buf_size = iwdev->vsi.mtu + IRDMA_IPV4_PAD;
+ info.tx_buf_cnt = 4096;
+ status = irdma_puda_create_rsrc(&iwdev->vsi, &info);
+ if (status)
+ ibdev_dbg(&iwdev->ibdev, "ERR: ieq create fail\n");
+
+ return status;
+}
+
+/**
+ * irdma_reinitialize_ieq - destroy and re-create ieq
+ * @vsi: VSI structure
+ */
+void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_device *iwdev = vsi->back_vsi;
+ struct irdma_pci_f *rf = iwdev->rf;
+
+ irdma_puda_dele_rsrc(vsi, IRDMA_PUDA_RSRC_TYPE_IEQ, false);
+ if (irdma_initialize_ieq(iwdev)) {
+ iwdev->rf->reset = true;
+ rf->gen_ops.request_reset(rf);
+ }
+}
+
+/**
+ * irdma_hmc_setup - create hmc objects for the device
+ * @rf: RDMA PCI function
+ *
+ * Set up the device private memory space for the number and size of
+ * the hmc objects and create the objects
+ * Return 0 if successful, otherwise return error
+ */
+static int irdma_hmc_setup(struct irdma_pci_f *rf)
+{
+ int status;
+ u32 qpcnt;
+
+ qpcnt = rsrc_limits_table[rf->limits_sel].qplimit;
+
+ rf->sd_type = IRDMA_SD_TYPE_DIRECT;
+ status = irdma_cfg_fpm_val(&rf->sc_dev, qpcnt);
+ if (status)
+ return status;
+
+ status = irdma_create_hmc_objs(rf, true, rf->rdma_ver);
+
+ return status;
+}
+
+/**
+ * irdma_del_init_mem - deallocate memory resources
+ * @rf: RDMA PCI function
+ */
+static void irdma_del_init_mem(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+
+ if (!rf->sc_dev.privileged)
+ irdma_vchnl_req_put_hmc_fcn(&rf->sc_dev);
+ kfree(dev->hmc_info->sd_table.sd_entry);
+ dev->hmc_info->sd_table.sd_entry = NULL;
+ vfree(rf->mem_rsrc);
+ rf->mem_rsrc = NULL;
+ dma_free_coherent(rf->hw.device, rf->obj_mem.size, rf->obj_mem.va,
+ rf->obj_mem.pa);
+ rf->obj_mem.va = NULL;
+ if (rf->rdma_ver != IRDMA_GEN_1) {
+ bitmap_free(rf->allocated_ws_nodes);
+ rf->allocated_ws_nodes = NULL;
+ }
+ kfree(rf->ceqlist);
+ rf->ceqlist = NULL;
+ kfree(rf->iw_msixtbl);
+ rf->iw_msixtbl = NULL;
+ kfree(rf->hmc_info_mem);
+ rf->hmc_info_mem = NULL;
+}
+
+/**
+ * irdma_initialize_dev - initialize device
+ * @rf: RDMA PCI function
+ *
+ * Allocate memory for the hmc objects and initialize iwdev
+ * Return 0 if successful, otherwise clean up the resources
+ * and return error
+ */
+static int irdma_initialize_dev(struct irdma_pci_f *rf)
+{
+ int status;
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_device_init_info info = {};
+ struct irdma_dma_mem mem;
+ u32 size;
+
+ size = sizeof(struct irdma_hmc_pble_rsrc) +
+ sizeof(struct irdma_hmc_info) +
+ (sizeof(struct irdma_hmc_obj_info) * IRDMA_HMC_IW_MAX);
+
+ rf->hmc_info_mem = kzalloc(size, GFP_KERNEL);
+ if (!rf->hmc_info_mem)
+ return -ENOMEM;
+
+ rf->pble_rsrc = (struct irdma_hmc_pble_rsrc *)rf->hmc_info_mem;
+ dev->hmc_info = &rf->hw.hmc;
+ dev->hmc_info->hmc_obj = (struct irdma_hmc_obj_info *)
+ (rf->pble_rsrc + 1);
+
+ status = irdma_obj_aligned_mem(rf, &mem, IRDMA_QUERY_FPM_BUF_SIZE,
+ IRDMA_FPM_QUERY_BUF_ALIGNMENT_M);
+ if (status)
+ goto error;
+
+ info.fpm_query_buf_pa = mem.pa;
+ info.fpm_query_buf = mem.va;
+
+ status = irdma_obj_aligned_mem(rf, &mem, IRDMA_COMMIT_FPM_BUF_SIZE,
+ IRDMA_FPM_COMMIT_BUF_ALIGNMENT_M);
+ if (status)
+ goto error;
+
+ info.fpm_commit_buf_pa = mem.pa;
+ info.fpm_commit_buf = mem.va;
+
+ info.bar0 = rf->hw.hw_addr;
+ info.hmc_fn_id = rf->pf_id;
+ info.protocol_used = rf->protocol_used;
+ info.hw = &rf->hw;
+ status = irdma_sc_dev_init(rf->rdma_ver, &rf->sc_dev, &info);
+ if (status)
+ goto error;
+
+ return status;
+error:
+ kfree(rf->hmc_info_mem);
+ rf->hmc_info_mem = NULL;
+
+ return status;
+}
+
+/**
+ * irdma_rt_deinit_hw - clean up the irdma device resources
+ * @iwdev: irdma device
+ *
+ * remove the mac ip entry and ipv4/ipv6 addresses, destroy the
+ * device queues and free the pble and the hmc objects
+ */
+void irdma_rt_deinit_hw(struct irdma_device *iwdev)
+{
+ ibdev_dbg(&iwdev->ibdev, "INIT: state = %d\n", iwdev->init_state);
+
+ switch (iwdev->init_state) {
+ case IP_ADDR_REGISTERED:
+ if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ irdma_del_local_mac_entry(iwdev->rf,
+ (u8)iwdev->mac_ip_table_idx);
+ fallthrough;
+ case IEQ_CREATED:
+ if (!iwdev->roce_mode)
+ irdma_puda_dele_rsrc(&iwdev->vsi, IRDMA_PUDA_RSRC_TYPE_IEQ,
+ iwdev->rf->reset);
+ fallthrough;
+ case ILQ_CREATED:
+ if (!iwdev->roce_mode)
+ irdma_puda_dele_rsrc(&iwdev->vsi,
+ IRDMA_PUDA_RSRC_TYPE_ILQ,
+ iwdev->rf->reset);
+ break;
+ default:
+ ibdev_warn(&iwdev->ibdev, "bad init_state = %d\n", iwdev->init_state);
+ break;
+ }
+
+ irdma_cleanup_cm_core(&iwdev->cm_core);
+ if (iwdev->vsi.pestat) {
+ irdma_vsi_stats_free(&iwdev->vsi);
+ kfree(iwdev->vsi.pestat);
+ }
+ if (iwdev->cleanup_wq)
+ destroy_workqueue(iwdev->cleanup_wq);
+}
+
+static int irdma_setup_init_state(struct irdma_pci_f *rf)
+{
+ int status;
+
+ status = irdma_save_msix_info(rf);
+ if (status)
+ return status;
+
+ rf->hw.device = &rf->pcidev->dev;
+ rf->obj_mem.size = ALIGN(8192, IRDMA_HW_PAGE_SIZE);
+ rf->obj_mem.va = dma_alloc_coherent(rf->hw.device, rf->obj_mem.size,
+ &rf->obj_mem.pa, GFP_KERNEL);
+ if (!rf->obj_mem.va) {
+ status = -ENOMEM;
+ goto clean_msixtbl;
+ }
+
+ rf->obj_next = rf->obj_mem;
+ status = irdma_initialize_dev(rf);
+ if (status)
+ goto clean_obj_mem;
+
+ return 0;
+
+clean_obj_mem:
+ dma_free_coherent(rf->hw.device, rf->obj_mem.size, rf->obj_mem.va,
+ rf->obj_mem.pa);
+ rf->obj_mem.va = NULL;
+clean_msixtbl:
+ kfree(rf->iw_msixtbl);
+ rf->iw_msixtbl = NULL;
+ return status;
+}
+
+/**
+ * irdma_get_used_rsrc - determine resources used internally
+ * @iwdev: irdma device
+ *
+ * Called at the end of open to get all internal allocations
+ */
+static void irdma_get_used_rsrc(struct irdma_device *iwdev)
+{
+ iwdev->rf->used_pds = find_first_zero_bit(iwdev->rf->allocated_pds,
+ iwdev->rf->max_pd);
+ iwdev->rf->used_qps = find_first_zero_bit(iwdev->rf->allocated_qps,
+ iwdev->rf->max_qp);
+ iwdev->rf->used_cqs = find_first_zero_bit(iwdev->rf->allocated_cqs,
+ iwdev->rf->max_cq);
+ iwdev->rf->used_srqs = find_first_zero_bit(iwdev->rf->allocated_srqs,
+ iwdev->rf->max_srq);
+ iwdev->rf->used_mrs = find_first_zero_bit(iwdev->rf->allocated_mrs,
+ iwdev->rf->max_mr);
+}
+
+void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf)
+{
+ enum init_completion_state state = rf->init_state;
+
+ rf->init_state = INVALID_STATE;
+
+ switch (state) {
+ case AEQ_CREATED:
+ irdma_destroy_aeq(rf);
+ fallthrough;
+ case PBLE_CHUNK_MEM:
+ irdma_destroy_pble_prm(rf->pble_rsrc);
+ fallthrough;
+ case CEQS_CREATED:
+ irdma_del_ceqs(rf);
+ fallthrough;
+ case CEQ0_CREATED:
+ irdma_del_ceq_0(rf);
+ fallthrough;
+ case CCQ_CREATED:
+ irdma_destroy_ccq(rf);
+ fallthrough;
+ case HW_RSRC_INITIALIZED:
+ case HMC_OBJS_CREATED:
+ irdma_del_hmc_objects(&rf->sc_dev, rf->sc_dev.hmc_info, true,
+ rf->reset, rf->rdma_ver);
+ fallthrough;
+ case CQP_CREATED:
+ irdma_destroy_cqp(rf);
+ fallthrough;
+ case INITIAL_STATE:
+ irdma_del_init_mem(rf);
+ break;
+ case INVALID_STATE:
+ default:
+ ibdev_warn(&rf->iwdev->ibdev, "bad init_state = %d\n", rf->init_state);
+ break;
+ }
+}
+
+/**
+ * irdma_rt_init_hw - Initializes runtime portion of HW
+ * @iwdev: irdma device
+ * @l2params: qos, tc, mtu info from netdev driver
+ *
+ * Create device queues ILQ, IEQ, CEQs and PBLEs. Setup irdma
+ * device resource objects.
+ */
+int irdma_rt_init_hw(struct irdma_device *iwdev,
+ struct irdma_l2params *l2params)
+{
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_vsi_init_info vsi_info = {};
+ struct irdma_vsi_stats_info stats_info = {};
+ int status;
+
+ vsi_info.dev = dev;
+ vsi_info.back_vsi = iwdev;
+ vsi_info.params = l2params;
+ vsi_info.pf_data_vsi_num = iwdev->vsi_num;
+ vsi_info.register_qset = rf->gen_ops.register_qset;
+ vsi_info.unregister_qset = rf->gen_ops.unregister_qset;
+ vsi_info.exception_lan_q = 2;
+ irdma_sc_vsi_init(&iwdev->vsi, &vsi_info);
+
+ status = irdma_setup_cm_core(iwdev, rf->rdma_ver);
+ if (status)
+ return status;
+
+ stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL);
+ if (!stats_info.pestat) {
+ irdma_cleanup_cm_core(&iwdev->cm_core);
+ return -ENOMEM;
+ }
+ stats_info.fcn_id = dev->hmc_fn_id;
+ status = irdma_vsi_stats_init(&iwdev->vsi, &stats_info);
+ if (status) {
+ irdma_cleanup_cm_core(&iwdev->cm_core);
+ kfree(stats_info.pestat);
+ return status;
+ }
+
+ do {
+ if (!iwdev->roce_mode) {
+ status = irdma_initialize_ilq(iwdev);
+ if (status)
+ break;
+ iwdev->init_state = ILQ_CREATED;
+ status = irdma_initialize_ieq(iwdev);
+ if (status)
+ break;
+ iwdev->init_state = IEQ_CREATED;
+ }
+ if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ irdma_alloc_set_mac(iwdev);
+ irdma_add_ip(iwdev);
+ iwdev->init_state = IP_ADDR_REGISTERED;
+
+ /* handles asynch cleanup tasks - disconnect CM , free qp,
+ * free cq bufs
+ */
+ iwdev->cleanup_wq = alloc_workqueue("irdma-cleanup-wq",
+ WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE);
+ if (!iwdev->cleanup_wq)
+ return -ENOMEM;
+ irdma_get_used_rsrc(iwdev);
+ init_waitqueue_head(&iwdev->suspend_wq);
+
+ return 0;
+ } while (0);
+
+ dev_err(&rf->pcidev->dev, "HW runtime init FAIL status = %d last cmpl = %d\n",
+ status, iwdev->init_state);
+ irdma_rt_deinit_hw(iwdev);
+
+ return status;
+}
+
+/**
+ * irdma_ctrl_init_hw - Initializes control portion of HW
+ * @rf: RDMA PCI function
+ *
+ * Create admin queues, HMC obejcts and RF resource objects
+ */
+int irdma_ctrl_init_hw(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ int status;
+ do {
+ status = irdma_setup_init_state(rf);
+ if (status)
+ break;
+ rf->init_state = INITIAL_STATE;
+
+ status = irdma_create_cqp(rf);
+ if (status)
+ break;
+ rf->init_state = CQP_CREATED;
+
+ dev->feature_info[IRDMA_FEATURE_FW_INFO] = IRDMA_FW_VER_DEFAULT;
+ if (rf->rdma_ver != IRDMA_GEN_1) {
+ status = irdma_get_rdma_features(dev);
+ if (status)
+ break;
+ }
+
+ status = irdma_hmc_setup(rf);
+ if (status)
+ break;
+ rf->init_state = HMC_OBJS_CREATED;
+
+ status = irdma_initialize_hw_rsrc(rf);
+ if (status)
+ break;
+ rf->init_state = HW_RSRC_INITIALIZED;
+
+ status = irdma_create_ccq(rf);
+ if (status)
+ break;
+ rf->init_state = CCQ_CREATED;
+
+ status = irdma_setup_ceq_0(rf);
+ if (status)
+ break;
+ rf->init_state = CEQ0_CREATED;
+ /* Handles processing of CQP completions */
+ rf->cqp_cmpl_wq =
+ alloc_ordered_workqueue("cqp_cmpl_wq", WQ_HIGHPRI);
+ if (!rf->cqp_cmpl_wq) {
+ status = -ENOMEM;
+ break;
+ }
+ INIT_WORK(&rf->cqp_cmpl_work, cqp_compl_worker);
+ irdma_sc_ccq_arm(dev->ccq);
+
+ status = irdma_setup_ceqs(rf, rf->iwdev ? rf->iwdev->vsi_num : 0);
+ if (status)
+ break;
+
+ rf->init_state = CEQS_CREATED;
+
+ status = irdma_hmc_init_pble(&rf->sc_dev,
+ rf->pble_rsrc);
+ if (status)
+ break;
+
+ rf->init_state = PBLE_CHUNK_MEM;
+
+ status = irdma_setup_aeq(rf);
+ if (status)
+ break;
+ rf->init_state = AEQ_CREATED;
+
+ return 0;
+ } while (0);
+
+ dev_err(&rf->pcidev->dev, "IRDMA hardware initialization FAILED init_state=%d status=%d\n",
+ rf->init_state, status);
+ irdma_ctrl_deinit_hw(rf);
+ return status;
+}
+
+/**
+ * irdma_set_hw_rsrc - set hw memory resources.
+ * @rf: RDMA PCI function
+ */
+static void irdma_set_hw_rsrc(struct irdma_pci_f *rf)
+{
+ rf->allocated_qps = (void *)(rf->mem_rsrc +
+ (sizeof(struct irdma_arp_entry) * rf->arp_table_size));
+ rf->allocated_cqs = &rf->allocated_qps[BITS_TO_LONGS(rf->max_qp)];
+ rf->allocated_srqs = &rf->allocated_cqs[BITS_TO_LONGS(rf->max_cq)];
+ rf->allocated_mrs = &rf->allocated_srqs[BITS_TO_LONGS(rf->max_srq)];
+ rf->allocated_pds = &rf->allocated_mrs[BITS_TO_LONGS(rf->max_mr)];
+ rf->allocated_ahs = &rf->allocated_pds[BITS_TO_LONGS(rf->max_pd)];
+ rf->allocated_mcgs = &rf->allocated_ahs[BITS_TO_LONGS(rf->max_ah)];
+ rf->allocated_arps = &rf->allocated_mcgs[BITS_TO_LONGS(rf->max_mcg)];
+ rf->qp_table = (struct irdma_qp **)
+ (&rf->allocated_arps[BITS_TO_LONGS(rf->arp_table_size)]);
+ rf->cq_table = (struct irdma_cq **)(&rf->qp_table[rf->max_qp]);
+
+ spin_lock_init(&rf->rsrc_lock);
+ spin_lock_init(&rf->arp_lock);
+ spin_lock_init(&rf->qptable_lock);
+ spin_lock_init(&rf->cqtable_lock);
+ spin_lock_init(&rf->qh_list_lock);
+}
+
+/**
+ * irdma_calc_mem_rsrc_size - calculate memory resources size.
+ * @rf: RDMA PCI function
+ */
+static u32 irdma_calc_mem_rsrc_size(struct irdma_pci_f *rf)
+{
+ u32 rsrc_size;
+
+ rsrc_size = sizeof(struct irdma_arp_entry) * rf->arp_table_size;
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_qp);
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_mr);
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_cq);
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_srq);
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_pd);
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->arp_table_size);
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_ah);
+ rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_mcg);
+ rsrc_size += sizeof(struct irdma_qp **) * rf->max_qp;
+ rsrc_size += sizeof(struct irdma_cq **) * rf->max_cq;
+ rsrc_size += sizeof(struct irdma_srq **) * rf->max_srq;
+
+ return rsrc_size;
+}
+
+/**
+ * irdma_initialize_hw_rsrc - initialize hw resource tracking array
+ * @rf: RDMA PCI function
+ */
+u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
+{
+ u32 rsrc_size;
+ u32 mrdrvbits;
+ u32 ret;
+
+ if (rf->rdma_ver != IRDMA_GEN_1) {
+ rf->allocated_ws_nodes = bitmap_zalloc(IRDMA_MAX_WS_NODES,
+ GFP_KERNEL);
+ if (!rf->allocated_ws_nodes)
+ return -ENOMEM;
+
+ set_bit(0, rf->allocated_ws_nodes);
+ rf->max_ws_node_id = IRDMA_MAX_WS_NODES;
+ }
+ rf->max_cqe = rf->sc_dev.hw_attrs.uk_attrs.max_hw_cq_size;
+ rf->max_qp = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt;
+ rf->max_mr = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_MR].cnt;
+ rf->max_cq = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt;
+ rf->max_srq = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].cnt;
+ rf->max_pd = rf->sc_dev.hw_attrs.max_hw_pds;
+ rf->arp_table_size = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_ARP].cnt;
+ rf->max_ah = rf->sc_dev.hmc_info->hmc_obj[IRDMA_HMC_IW_FSIAV].cnt;
+ rf->max_mcg = rf->max_qp;
+
+ rsrc_size = irdma_calc_mem_rsrc_size(rf);
+ rf->mem_rsrc = vzalloc(rsrc_size);
+ if (!rf->mem_rsrc) {
+ ret = -ENOMEM;
+ goto mem_rsrc_vzalloc_fail;
+ }
+
+ rf->arp_table = (struct irdma_arp_entry *)rf->mem_rsrc;
+
+ irdma_set_hw_rsrc(rf);
+
+ set_bit(0, rf->allocated_mrs);
+ set_bit(0, rf->allocated_qps);
+ set_bit(0, rf->allocated_cqs);
+ set_bit(0, rf->allocated_srqs);
+ set_bit(0, rf->allocated_pds);
+ set_bit(0, rf->allocated_arps);
+ set_bit(0, rf->allocated_ahs);
+ set_bit(0, rf->allocated_mcgs);
+ set_bit(2, rf->allocated_qps); /* qp 2 IEQ */
+ set_bit(1, rf->allocated_qps); /* qp 1 ILQ */
+ set_bit(1, rf->allocated_cqs);
+ set_bit(1, rf->allocated_pds);
+ set_bit(2, rf->allocated_cqs);
+ set_bit(2, rf->allocated_pds);
+
+ INIT_LIST_HEAD(&rf->mc_qht_list.list);
+ /* stag index mask has a minimum of 14 bits */
+ mrdrvbits = 24 - max(get_count_order(rf->max_mr), 14);
+ rf->mr_stagmask = ~(((1 << mrdrvbits) - 1) << (32 - mrdrvbits));
+
+ return 0;
+
+mem_rsrc_vzalloc_fail:
+ bitmap_free(rf->allocated_ws_nodes);
+ rf->allocated_ws_nodes = NULL;
+
+ return ret;
+}
+
+/**
+ * irdma_cqp_ce_handler - handle cqp completions
+ * @rf: RDMA PCI function
+ * @cq: cq for cqp completions
+ */
+void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ u32 cqe_count = 0;
+ struct irdma_ccq_cqe_info info;
+ unsigned long flags;
+ int ret;
+
+ do {
+ memset(&info, 0, sizeof(info));
+ spin_lock_irqsave(&rf->cqp.compl_lock, flags);
+ ret = irdma_sc_ccq_get_cqe_info(cq, &info);
+ spin_unlock_irqrestore(&rf->cqp.compl_lock, flags);
+ if (ret)
+ break;
+
+ cqp_request = (struct irdma_cqp_request *)
+ (unsigned long)info.scratch;
+ if (info.error && irdma_cqp_crit_err(dev, cqp_request->info.cqp_cmd,
+ info.maj_err_code,
+ info.min_err_code))
+ ibdev_err(&rf->iwdev->ibdev, "cqp opcode = 0x%x maj_err_code = 0x%x min_err_code = 0x%x\n",
+ info.op_code, info.maj_err_code, info.min_err_code);
+ if (cqp_request) {
+ cqp_request->compl_info.maj_err_code = info.maj_err_code;
+ cqp_request->compl_info.min_err_code = info.min_err_code;
+ cqp_request->compl_info.op_ret_val = info.op_ret_val;
+ cqp_request->compl_info.error = info.error;
+
+ /*
+ * If this is deferred or pending completion, then mark
+ * CQP request as pending to not block the CQ, but don't
+ * release CQP request, as it is still on the OOO list.
+ */
+ if (info.pending)
+ cqp_request->pending = true;
+ else
+ irdma_complete_cqp_request(&rf->cqp,
+ cqp_request);
+ }
+
+ cqe_count++;
+ } while (1);
+
+ if (cqe_count) {
+ irdma_process_bh(dev);
+ irdma_sc_ccq_arm(cq);
+ }
+}
+
+/**
+ * cqp_compl_worker - Handle cqp completions
+ * @work: Pointer to work structure
+ */
+void cqp_compl_worker(struct work_struct *work)
+{
+ struct irdma_pci_f *rf = container_of(work, struct irdma_pci_f,
+ cqp_cmpl_work);
+ struct irdma_sc_cq *cq = &rf->ccq.sc_cq;
+
+ irdma_cqp_ce_handler(rf, cq);
+}
+
+/**
+ * irdma_lookup_apbvt_entry - lookup hash table for an existing apbvt entry corresponding to port
+ * @cm_core: cm's core
+ * @port: port to identify apbvt entry
+ */
+static struct irdma_apbvt_entry *irdma_lookup_apbvt_entry(struct irdma_cm_core *cm_core,
+ u16 port)
+{
+ struct irdma_apbvt_entry *entry;
+
+ hash_for_each_possible(cm_core->apbvt_hash_tbl, entry, hlist, port) {
+ if (entry->port == port) {
+ entry->use_cnt++;
+ return entry;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * irdma_next_iw_state - modify qp state
+ * @iwqp: iwarp qp to modify
+ * @state: next state for qp
+ * @del_hash: del hash
+ * @term: term message
+ * @termlen: length of term message
+ */
+void irdma_next_iw_state(struct irdma_qp *iwqp, u8 state, u8 del_hash, u8 term,
+ u8 termlen)
+{
+ struct irdma_modify_qp_info info = {};
+
+ info.next_iwarp_state = state;
+ info.remove_hash_idx = del_hash;
+ info.cq_num_valid = true;
+ info.arp_cache_idx_valid = true;
+ info.dont_send_term = true;
+ info.dont_send_fin = true;
+ info.termlen = termlen;
+
+ if (term & IRDMAQP_TERM_SEND_TERM_ONLY)
+ info.dont_send_term = false;
+ if (term & IRDMAQP_TERM_SEND_FIN_ONLY)
+ info.dont_send_fin = false;
+ if (iwqp->sc_qp.term_flags && state == IRDMA_QP_STATE_ERROR)
+ info.reset_tcp_conn = true;
+ iwqp->hw_iwarp_state = state;
+ irdma_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0);
+ iwqp->iwarp_state = info.next_iwarp_state;
+}
+
+/**
+ * irdma_del_local_mac_entry - remove a mac entry from the hw
+ * table
+ * @rf: RDMA PCI function
+ * @idx: the index of the mac ip address to delete
+ */
+void irdma_del_local_mac_entry(struct irdma_pci_f *rf, u16 idx)
+{
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_DELETE_LOCAL_MAC_ENTRY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.del_local_mac_entry.cqp = &iwcqp->sc_cqp;
+ cqp_info->in.u.del_local_mac_entry.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.del_local_mac_entry.entry_idx = idx;
+ cqp_info->in.u.del_local_mac_entry.ignore_ref_count = 0;
+
+ irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(iwcqp, cqp_request);
+}
+
+/**
+ * irdma_add_local_mac_entry - add a mac ip address entry to the
+ * hw table
+ * @rf: RDMA PCI function
+ * @mac_addr: pointer to mac address
+ * @idx: the index of the mac ip address to add
+ */
+int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 idx)
+{
+ struct irdma_local_mac_entry_info *info;
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->post_sq = 1;
+ info = &cqp_info->in.u.add_local_mac_entry.info;
+ ether_addr_copy(info->mac_addr, mac_addr);
+ info->entry_idx = idx;
+ cqp_info->in.u.add_local_mac_entry.scratch = (uintptr_t)cqp_request;
+ cqp_info->cqp_cmd = IRDMA_OP_ADD_LOCAL_MAC_ENTRY;
+ cqp_info->in.u.add_local_mac_entry.cqp = &iwcqp->sc_cqp;
+ cqp_info->in.u.add_local_mac_entry.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(iwcqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_alloc_local_mac_entry - allocate a mac entry
+ * @rf: RDMA PCI function
+ * @mac_tbl_idx: the index of the new mac address
+ *
+ * Allocate a mac address entry and update the mac_tbl_idx
+ * to hold the index of the newly created mac address
+ * Return 0 if successful, otherwise return error
+ */
+int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx)
+{
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status = 0;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.alloc_local_mac_entry.cqp = &iwcqp->sc_cqp;
+ cqp_info->in.u.alloc_local_mac_entry.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (!status)
+ *mac_tbl_idx = (u16)cqp_request->compl_info.op_ret_val;
+
+ irdma_put_cqp_request(iwcqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_cqp_manage_apbvt_cmd - send cqp command manage apbvt
+ * @iwdev: irdma device
+ * @accel_local_port: port for apbvt
+ * @add_port: add ordelete port
+ */
+static int irdma_cqp_manage_apbvt_cmd(struct irdma_device *iwdev,
+ u16 accel_local_port, bool add_port)
+{
+ struct irdma_apbvt_info *info;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, add_port);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.manage_apbvt_entry.info;
+ info->add = add_port;
+ info->port = accel_local_port;
+ cqp_info->cqp_cmd = IRDMA_OP_MANAGE_APBVT_ENTRY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.manage_apbvt_entry.cqp = &iwdev->rf->cqp.sc_cqp;
+ cqp_info->in.u.manage_apbvt_entry.scratch = (uintptr_t)cqp_request;
+ ibdev_dbg(&iwdev->ibdev, "DEV: %s: port=0x%04x\n",
+ (!add_port) ? "DELETE" : "ADD", accel_local_port);
+
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_add_apbvt - add tcp port to HW apbvt table
+ * @iwdev: irdma device
+ * @port: port for apbvt
+ */
+struct irdma_apbvt_entry *irdma_add_apbvt(struct irdma_device *iwdev, u16 port)
+{
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+ struct irdma_apbvt_entry *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cm_core->apbvt_lock, flags);
+ entry = irdma_lookup_apbvt_entry(cm_core, port);
+ if (entry) {
+ spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+ return entry;
+ }
+
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry) {
+ spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+ return NULL;
+ }
+
+ entry->port = port;
+ entry->use_cnt = 1;
+ hash_add(cm_core->apbvt_hash_tbl, &entry->hlist, entry->port);
+ spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+
+ if (irdma_cqp_manage_apbvt_cmd(iwdev, port, true)) {
+ kfree(entry);
+ return NULL;
+ }
+
+ return entry;
+}
+
+/**
+ * irdma_del_apbvt - delete tcp port from HW apbvt table
+ * @iwdev: irdma device
+ * @entry: apbvt entry object
+ */
+void irdma_del_apbvt(struct irdma_device *iwdev,
+ struct irdma_apbvt_entry *entry)
+{
+ struct irdma_cm_core *cm_core = &iwdev->cm_core;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cm_core->apbvt_lock, flags);
+ if (--entry->use_cnt) {
+ spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+ return;
+ }
+
+ hash_del(&entry->hlist);
+ /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to
+ * protect against race where add APBVT CQP can race ahead of the delete
+ * APBVT for same port.
+ */
+ irdma_cqp_manage_apbvt_cmd(iwdev, entry->port, false);
+ kfree(entry);
+ spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+}
+
+/**
+ * irdma_manage_arp_cache - manage hw arp cache
+ * @rf: RDMA PCI function
+ * @mac_addr: mac address ptr
+ * @ip_addr: ip addr for arp cache
+ * @ipv4: flag inicating IPv4
+ * @action: add, delete or modify
+ */
+void irdma_manage_arp_cache(struct irdma_pci_f *rf,
+ const unsigned char *mac_addr,
+ u32 *ip_addr, bool ipv4, u32 action)
+{
+ struct irdma_add_arp_cache_entry_info *info;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int arp_index;
+
+ arp_index = irdma_arp_table(rf, ip_addr, ipv4, mac_addr, action);
+ if (arp_index == -1)
+ return;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, false);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ if (action == IRDMA_ARP_ADD) {
+ cqp_info->cqp_cmd = IRDMA_OP_ADD_ARP_CACHE_ENTRY;
+ info = &cqp_info->in.u.add_arp_cache_entry.info;
+ info->arp_index = (u16)arp_index;
+ info->permanent = true;
+ ether_addr_copy(info->mac_addr, mac_addr);
+ cqp_info->in.u.add_arp_cache_entry.scratch =
+ (uintptr_t)cqp_request;
+ cqp_info->in.u.add_arp_cache_entry.cqp = &rf->cqp.sc_cqp;
+ } else {
+ cqp_info->cqp_cmd = IRDMA_OP_DELETE_ARP_CACHE_ENTRY;
+ cqp_info->in.u.del_arp_cache_entry.scratch =
+ (uintptr_t)cqp_request;
+ cqp_info->in.u.del_arp_cache_entry.cqp = &rf->cqp.sc_cqp;
+ cqp_info->in.u.del_arp_cache_entry.arp_index = arp_index;
+ }
+
+ cqp_info->post_sq = 1;
+ irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+}
+
+/**
+ * irdma_send_syn_cqp_callback - do syn/ack after qhash
+ * @cqp_request: qhash cqp completion
+ */
+static void irdma_send_syn_cqp_callback(struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_cm_node *cm_node = cqp_request->param;
+
+ irdma_send_syn(cm_node, 1);
+ irdma_rem_ref_cm_node(cm_node);
+}
+
+/**
+ * irdma_manage_qhash - add or modify qhash
+ * @iwdev: irdma device
+ * @cminfo: cm info for qhash
+ * @etype: type (syn or quad)
+ * @mtype: type of qhash
+ * @cmnode: cmnode associated with connection
+ * @wait: wait for completion
+ */
+int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo,
+ enum irdma_quad_entry_type etype,
+ enum irdma_quad_hash_manage_type mtype, void *cmnode,
+ bool wait)
+{
+ struct irdma_qhash_table_info *info;
+ struct irdma_cqp *iwcqp = &iwdev->rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_cm_node *cm_node = cmnode;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.manage_qhash_table_entry.info;
+ info->vsi = &iwdev->vsi;
+ info->manage = mtype;
+ info->entry_type = etype;
+ if (cminfo->vlan_id < VLAN_N_VID) {
+ info->vlan_valid = true;
+ info->vlan_id = cminfo->vlan_id;
+ } else {
+ info->vlan_valid = false;
+ }
+ info->ipv4_valid = cminfo->ipv4;
+ info->user_pri = cminfo->user_pri;
+ ether_addr_copy(info->mac_addr, iwdev->netdev->dev_addr);
+ info->qp_num = cminfo->qh_qpid;
+ info->dest_port = cminfo->loc_port;
+ info->dest_ip[0] = cminfo->loc_addr[0];
+ info->dest_ip[1] = cminfo->loc_addr[1];
+ info->dest_ip[2] = cminfo->loc_addr[2];
+ info->dest_ip[3] = cminfo->loc_addr[3];
+ if (etype == IRDMA_QHASH_TYPE_TCP_ESTABLISHED ||
+ etype == IRDMA_QHASH_TYPE_UDP_UNICAST ||
+ etype == IRDMA_QHASH_TYPE_UDP_MCAST ||
+ etype == IRDMA_QHASH_TYPE_ROCE_MCAST ||
+ etype == IRDMA_QHASH_TYPE_ROCEV2_HW) {
+ info->src_port = cminfo->rem_port;
+ info->src_ip[0] = cminfo->rem_addr[0];
+ info->src_ip[1] = cminfo->rem_addr[1];
+ info->src_ip[2] = cminfo->rem_addr[2];
+ info->src_ip[3] = cminfo->rem_addr[3];
+ }
+ if (cmnode) {
+ cqp_request->callback_fcn = irdma_send_syn_cqp_callback;
+ cqp_request->param = cmnode;
+ if (!wait)
+ refcount_inc(&cm_node->refcnt);
+ }
+ if (info->ipv4_valid)
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: %s caller: %pS loc_port=0x%04x rem_port=0x%04x loc_addr=%pI4 rem_addr=%pI4 mac=%pM, vlan_id=%d cm_node=%p\n",
+ (!mtype) ? "DELETE" : "ADD",
+ __builtin_return_address(0), info->dest_port,
+ info->src_port, info->dest_ip, info->src_ip,
+ info->mac_addr, cminfo->vlan_id,
+ cmnode ? cmnode : NULL);
+ else
+ ibdev_dbg(&iwdev->ibdev,
+ "CM: %s caller: %pS loc_port=0x%04x rem_port=0x%04x loc_addr=%pI6 rem_addr=%pI6 mac=%pM, vlan_id=%d cm_node=%p\n",
+ (!mtype) ? "DELETE" : "ADD",
+ __builtin_return_address(0), info->dest_port,
+ info->src_port, info->dest_ip, info->src_ip,
+ info->mac_addr, cminfo->vlan_id,
+ cmnode ? cmnode : NULL);
+
+ cqp_info->in.u.manage_qhash_table_entry.cqp = &iwdev->rf->cqp.sc_cqp;
+ cqp_info->in.u.manage_qhash_table_entry.scratch = (uintptr_t)cqp_request;
+ cqp_info->cqp_cmd = IRDMA_OP_MANAGE_QHASH_TABLE_ENTRY;
+ cqp_info->post_sq = 1;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ if (status && cm_node && !wait)
+ irdma_rem_ref_cm_node(cm_node);
+
+ irdma_put_cqp_request(iwcqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_hw_flush_wqes_callback - Check return code after flush
+ * @cqp_request: qhash cqp completion
+ */
+static void irdma_hw_flush_wqes_callback(struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_qp_flush_info *hw_info;
+ struct irdma_sc_qp *qp;
+ struct irdma_qp *iwqp;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_info = &cqp_request->info;
+ hw_info = &cqp_info->in.u.qp_flush_wqes.info;
+ qp = cqp_info->in.u.qp_flush_wqes.qp;
+ iwqp = qp->qp_uk.back_qp;
+
+ if (cqp_request->compl_info.maj_err_code)
+ return;
+
+ if (hw_info->rq &&
+ (cqp_request->compl_info.min_err_code == IRDMA_CQP_COMPL_SQ_WQE_FLUSHED ||
+ cqp_request->compl_info.min_err_code == 0)) {
+ /* RQ WQE flush was requested but did not happen */
+ qp->qp_uk.rq_flush_complete = true;
+ }
+ if (hw_info->sq &&
+ (cqp_request->compl_info.min_err_code == IRDMA_CQP_COMPL_RQ_WQE_FLUSHED ||
+ cqp_request->compl_info.min_err_code == 0)) {
+ if (IRDMA_RING_MORE_WORK(qp->qp_uk.sq_ring)) {
+ ibdev_err(&iwqp->iwdev->ibdev, "Flush QP[%d] failed, SQ has more work",
+ qp->qp_uk.qp_id);
+ irdma_ib_qp_event(iwqp, IRDMA_QP_EVENT_CATASTROPHIC);
+ }
+ qp->qp_uk.sq_flush_complete = true;
+ }
+}
+
+/**
+ * irdma_hw_flush_wqes - flush qp's wqe
+ * @rf: RDMA PCI function
+ * @qp: hardware control qp
+ * @info: info for flush
+ * @wait: flag wait for completion
+ */
+int irdma_hw_flush_wqes(struct irdma_pci_f *rf, struct irdma_sc_qp *qp,
+ struct irdma_qp_flush_info *info, bool wait)
+{
+ int status;
+ struct irdma_qp_flush_info *hw_info;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_qp *iwqp = qp->qp_uk.back_qp;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ if (!wait)
+ cqp_request->callback_fcn = irdma_hw_flush_wqes_callback;
+ hw_info = &cqp_request->info.in.u.qp_flush_wqes.info;
+ memcpy(hw_info, info, sizeof(*hw_info));
+ cqp_info->cqp_cmd = IRDMA_OP_QP_FLUSH_WQES;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_flush_wqes.qp = qp;
+ cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status) {
+ qp->qp_uk.sq_flush_complete = true;
+ qp->qp_uk.rq_flush_complete = true;
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ return status;
+ }
+
+ if (!wait || cqp_request->compl_info.maj_err_code)
+ goto put_cqp;
+
+ if (info->rq) {
+ if (cqp_request->compl_info.min_err_code == IRDMA_CQP_COMPL_SQ_WQE_FLUSHED ||
+ cqp_request->compl_info.min_err_code == 0) {
+ /* RQ WQE flush was requested but did not happen */
+ qp->qp_uk.rq_flush_complete = true;
+ }
+ }
+ if (info->sq) {
+ if (cqp_request->compl_info.min_err_code == IRDMA_CQP_COMPL_RQ_WQE_FLUSHED ||
+ cqp_request->compl_info.min_err_code == 0) {
+ /*
+ * Handling case where WQE is posted to empty SQ when
+ * flush has not completed
+ */
+ if (IRDMA_RING_MORE_WORK(qp->qp_uk.sq_ring)) {
+ struct irdma_cqp_request *new_req;
+
+ if (!qp->qp_uk.sq_flush_complete)
+ goto put_cqp;
+ qp->qp_uk.sq_flush_complete = false;
+ qp->flush_sq = false;
+
+ info->rq = false;
+ info->sq = true;
+ new_req = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!new_req) {
+ status = -ENOMEM;
+ goto put_cqp;
+ }
+ cqp_info = &new_req->info;
+ hw_info = &new_req->info.in.u.qp_flush_wqes.info;
+ memcpy(hw_info, info, sizeof(*hw_info));
+ cqp_info->cqp_cmd = IRDMA_OP_QP_FLUSH_WQES;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_flush_wqes.qp = qp;
+ cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)new_req;
+
+ status = irdma_handle_cqp_op(rf, new_req);
+ if (new_req->compl_info.maj_err_code ||
+ new_req->compl_info.min_err_code != IRDMA_CQP_COMPL_SQ_WQE_FLUSHED ||
+ status) {
+ ibdev_err(&iwqp->iwdev->ibdev, "fatal QP event: SQ in error but not flushed, qp: %d",
+ iwqp->ibqp.qp_num);
+ qp->qp_uk.sq_flush_complete = false;
+ irdma_ib_qp_event(iwqp, IRDMA_QP_EVENT_CATASTROPHIC);
+ }
+ irdma_put_cqp_request(&rf->cqp, new_req);
+ } else {
+ /* SQ WQE flush was requested but did not happen */
+ qp->qp_uk.sq_flush_complete = true;
+ }
+ } else {
+ if (!IRDMA_RING_MORE_WORK(qp->qp_uk.sq_ring))
+ qp->qp_uk.sq_flush_complete = true;
+ }
+ }
+
+ ibdev_dbg(&rf->iwdev->ibdev,
+ "VERBS: qp_id=%d qp_type=%d qpstate=%d ibqpstate=%d last_aeq=%d hw_iw_state=%d maj_err_code=%d min_err_code=%d\n",
+ iwqp->ibqp.qp_num, rf->protocol_used, iwqp->iwarp_state,
+ iwqp->ibqp_state, iwqp->last_aeq, iwqp->hw_iwarp_state,
+ cqp_request->compl_info.maj_err_code,
+ cqp_request->compl_info.min_err_code);
+put_cqp:
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_gen_ae - generate AE
+ * @rf: RDMA PCI function
+ * @qp: qp associated with AE
+ * @info: info for ae
+ * @wait: wait for completion
+ */
+void irdma_gen_ae(struct irdma_pci_f *rf, struct irdma_sc_qp *qp,
+ struct irdma_gen_ae_info *info, bool wait)
+{
+ struct irdma_gen_ae_info *ae_info;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ ae_info = &cqp_request->info.in.u.gen_ae.info;
+ memcpy(ae_info, info, sizeof(*ae_info));
+ cqp_info->cqp_cmd = IRDMA_OP_GEN_AE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.gen_ae.qp = qp;
+ cqp_info->in.u.gen_ae.scratch = (uintptr_t)cqp_request;
+
+ irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+}
+
+void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask)
+{
+ struct irdma_qp_flush_info info = {};
+ struct irdma_pci_f *rf = iwqp->iwdev->rf;
+ u8 flush_code = iwqp->sc_qp.flush_code;
+
+ if ((!(flush_mask & IRDMA_FLUSH_SQ) &&
+ !(flush_mask & IRDMA_FLUSH_RQ)) ||
+ ((flush_mask & IRDMA_REFLUSH) && rf->rdma_ver >= IRDMA_GEN_3))
+ return;
+
+ /* Set flush info fields*/
+ info.sq = flush_mask & IRDMA_FLUSH_SQ;
+ info.rq = flush_mask & IRDMA_FLUSH_RQ;
+
+ /* Generate userflush errors in CQE */
+ info.sq_major_code = IRDMA_FLUSH_MAJOR_ERR;
+ info.sq_minor_code = FLUSH_GENERAL_ERR;
+ info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR;
+ info.rq_minor_code = FLUSH_GENERAL_ERR;
+ info.userflushcode = true;
+ info.err_sq_idx_valid = iwqp->sc_qp.err_sq_idx_valid;
+ info.err_sq_idx = iwqp->sc_qp.err_sq_idx;
+ info.err_rq_idx_valid = iwqp->sc_qp.err_rq_idx_valid;
+ info.err_rq_idx = iwqp->sc_qp.err_rq_idx;
+
+ if (flush_mask & IRDMA_REFLUSH) {
+ if (info.sq)
+ iwqp->sc_qp.flush_sq = false;
+ if (info.rq)
+ iwqp->sc_qp.flush_rq = false;
+ } else {
+ if (flush_code) {
+ if (info.sq && iwqp->sc_qp.sq_flush_code)
+ info.sq_minor_code = flush_code;
+ if (info.rq && iwqp->sc_qp.rq_flush_code)
+ info.rq_minor_code = flush_code;
+ }
+ if (!iwqp->user_mode)
+ queue_delayed_work(iwqp->iwdev->cleanup_wq,
+ &iwqp->dwork_flush,
+ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
+ }
+
+ /* Issue flush */
+ (void)irdma_hw_flush_wqes(rf, &iwqp->sc_qp, &info,
+ flush_mask & IRDMA_FLUSH_WAIT);
+ iwqp->flush_issued = true;
+}
diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.c b/drivers/infiniband/hw/irdma/i40iw_hw.c
new file mode 100644
index 000000000000..60c1f2b1811d
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/i40iw_hw.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "osdep.h"
+#include "type.h"
+#include "i40iw_hw.h"
+#include "protos.h"
+
+static u32 i40iw_regs[IRDMA_MAX_REGS] = {
+ I40E_PFPE_CQPTAIL,
+ I40E_PFPE_CQPDB,
+ I40E_PFPE_CCQPSTATUS,
+ I40E_PFPE_CCQPHIGH,
+ I40E_PFPE_CCQPLOW,
+ I40E_PFPE_CQARM,
+ I40E_PFPE_CQACK,
+ I40E_PFPE_AEQALLOC,
+ I40E_PFPE_CQPERRCODES,
+ I40E_PFPE_WQEALLOC,
+ I40E_PFINT_DYN_CTLN(0),
+ I40IW_DB_ADDR_OFFSET,
+
+ I40E_GLPCI_LBARCTRL,
+ I40E_GLPE_CPUSTATUS0,
+ I40E_GLPE_CPUSTATUS1,
+ I40E_GLPE_CPUSTATUS2,
+ I40E_PFINT_AEQCTL,
+ I40E_PFINT_CEQCTL(0),
+ I40E_VSIQF_CTL(0),
+ I40E_PFHMC_PDINV,
+ I40E_GLHMC_VFPDINV(0),
+ I40E_GLPE_CRITERR,
+ 0xffffffff /* PFINT_RATEN not used in FPK */
+};
+
+static u32 i40iw_stat_offsets[] = {
+ I40E_GLPES_PFIP4RXDISCARD(0),
+ I40E_GLPES_PFIP4RXTRUNC(0),
+ I40E_GLPES_PFIP4TXNOROUTE(0),
+ I40E_GLPES_PFIP6RXDISCARD(0),
+ I40E_GLPES_PFIP6RXTRUNC(0),
+ I40E_GLPES_PFIP6TXNOROUTE(0),
+ I40E_GLPES_PFTCPRTXSEG(0),
+ I40E_GLPES_PFTCPRXOPTERR(0),
+ I40E_GLPES_PFTCPRXPROTOERR(0),
+ I40E_GLPES_PFRXVLANERR(0),
+
+ I40E_GLPES_PFIP4RXOCTSLO(0),
+ I40E_GLPES_PFIP4RXPKTSLO(0),
+ I40E_GLPES_PFIP4RXFRAGSLO(0),
+ I40E_GLPES_PFIP4RXMCPKTSLO(0),
+ I40E_GLPES_PFIP4TXOCTSLO(0),
+ I40E_GLPES_PFIP4TXPKTSLO(0),
+ I40E_GLPES_PFIP4TXFRAGSLO(0),
+ I40E_GLPES_PFIP4TXMCPKTSLO(0),
+ I40E_GLPES_PFIP6RXOCTSLO(0),
+ I40E_GLPES_PFIP6RXPKTSLO(0),
+ I40E_GLPES_PFIP6RXFRAGSLO(0),
+ I40E_GLPES_PFIP6RXMCPKTSLO(0),
+ I40E_GLPES_PFIP6TXOCTSLO(0),
+ I40E_GLPES_PFIP6TXPKTSLO(0),
+ I40E_GLPES_PFIP6TXFRAGSLO(0),
+ I40E_GLPES_PFIP6TXMCPKTSLO(0),
+ I40E_GLPES_PFTCPRXSEGSLO(0),
+ I40E_GLPES_PFTCPTXSEGLO(0),
+ I40E_GLPES_PFRDMARXRDSLO(0),
+ I40E_GLPES_PFRDMARXSNDSLO(0),
+ I40E_GLPES_PFRDMARXWRSLO(0),
+ I40E_GLPES_PFRDMATXRDSLO(0),
+ I40E_GLPES_PFRDMATXSNDSLO(0),
+ I40E_GLPES_PFRDMATXWRSLO(0),
+ I40E_GLPES_PFRDMAVBNDLO(0),
+ I40E_GLPES_PFRDMAVINVLO(0),
+ I40E_GLPES_PFIP4RXMCOCTSLO(0),
+ I40E_GLPES_PFIP4TXMCOCTSLO(0),
+ I40E_GLPES_PFIP6RXMCOCTSLO(0),
+ I40E_GLPES_PFIP6TXMCOCTSLO(0),
+ I40E_GLPES_PFUDPRXPKTSLO(0),
+ I40E_GLPES_PFUDPTXPKTSLO(0)
+};
+
+static u64 i40iw_masks[IRDMA_MAX_MASKS] = {
+ I40E_PFPE_CCQPSTATUS_CCQP_DONE,
+ I40E_PFPE_CCQPSTATUS_CCQP_ERR,
+ I40E_CQPSQ_STAG_PDID,
+ I40E_CQPSQ_CQ_CEQID,
+ I40E_CQPSQ_CQ_CQID,
+ I40E_COMMIT_FPM_CQCNT,
+ I40E_CQPSQ_UPESD_HMCFNID,
+};
+
+static u64 i40iw_shifts[IRDMA_MAX_SHIFTS] = {
+ I40E_PFPE_CCQPSTATUS_CCQP_DONE_S,
+ I40E_PFPE_CCQPSTATUS_CCQP_ERR_S,
+ I40E_CQPSQ_STAG_PDID_S,
+ I40E_CQPSQ_CQ_CEQID_S,
+ I40E_CQPSQ_CQ_CQID_S,
+ I40E_COMMIT_FPM_CQCNT_S,
+ I40E_CQPSQ_UPESD_HMCFNID_S,
+};
+
+/**
+ * i40iw_config_ceq- Configure CEQ interrupt
+ * @dev: pointer to the device structure
+ * @ceq_id: Completion Event Queue ID
+ * @idx: vector index
+ * @enable: Enable CEQ interrupt when true
+ */
+static void i40iw_config_ceq(struct irdma_sc_dev *dev, u32 ceq_id, u32 idx,
+ bool enable)
+{
+ u32 reg_val;
+
+ reg_val = FIELD_PREP(I40E_PFINT_LNKLSTN_FIRSTQ_INDX, ceq_id) |
+ FIELD_PREP(I40E_PFINT_LNKLSTN_FIRSTQ_TYPE, QUEUE_TYPE_CEQ);
+ wr32(dev->hw, I40E_PFINT_LNKLSTN(idx - 1), reg_val);
+
+ reg_val = FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX, 0x3) |
+ FIELD_PREP(I40E_PFINT_DYN_CTLN_INTENA, 0x1);
+ wr32(dev->hw, I40E_PFINT_DYN_CTLN(idx - 1), reg_val);
+
+ reg_val = FIELD_PREP(IRDMA_GLINT_CEQCTL_CAUSE_ENA, enable) |
+ FIELD_PREP(IRDMA_GLINT_CEQCTL_MSIX_INDX, idx) |
+ FIELD_PREP(I40E_PFINT_CEQCTL_NEXTQ_INDX, NULL_QUEUE_INDEX) |
+ FIELD_PREP(IRDMA_GLINT_CEQCTL_ITR_INDX, 0x3);
+
+ wr32(dev->hw, i40iw_regs[IRDMA_GLINT_CEQCTL] + 4 * ceq_id, reg_val);
+}
+
+/**
+ * i40iw_ena_irq - Enable interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ */
+static void i40iw_ena_irq(struct irdma_sc_dev *dev, u32 idx)
+{
+ u32 val;
+
+ val = FIELD_PREP(IRDMA_GLINT_DYN_CTL_INTENA, 0x1) |
+ FIELD_PREP(IRDMA_GLINT_DYN_CTL_CLEARPBA, 0x1) |
+ FIELD_PREP(IRDMA_GLINT_DYN_CTL_ITR_INDX, 0x3);
+ wr32(dev->hw, i40iw_regs[IRDMA_GLINT_DYN_CTL] + 4 * (idx - 1), val);
+}
+
+/**
+ * i40iw_disable_irq - Disable interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ */
+static void i40iw_disable_irq(struct irdma_sc_dev *dev, u32 idx)
+{
+ wr32(dev->hw, i40iw_regs[IRDMA_GLINT_DYN_CTL] + 4 * (idx - 1), 0);
+}
+
+static const struct irdma_irq_ops i40iw_irq_ops = {
+ .irdma_cfg_aeq = irdma_cfg_aeq,
+ .irdma_cfg_ceq = i40iw_config_ceq,
+ .irdma_dis_irq = i40iw_disable_irq,
+ .irdma_en_irq = i40iw_ena_irq,
+};
+
+static const struct irdma_hw_stat_map i40iw_hw_stat_map[] = {
+ [IRDMA_HW_STAT_INDEX_RXVLANERR] = { 0, 0, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_IP4RXOCTS] = { 8, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXPKTS] = { 16, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = { 24, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = { 32, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP4RXFRAGS] = { 40, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS] = { 48, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXOCTS] = { 56, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXPKTS] = { 64, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = { 72, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = { 80, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP6RXFRAGS] = { 88, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS] = { 96, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXOCTS] = { 104, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXPKTS] = { 112, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXFRAGS] = { 120, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS] = { 128, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXOCTS] = { 136, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXPKTS] = { 144, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXFRAGS] = { 152, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS] = { 160, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = { 168, 0, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = { 176, 0, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_TCPRXSEGS] = { 184, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = { 192, 0, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = { 200, 0, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_TCPTXSEG] = { 208, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_TCPRTXSEG] = { 216, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_RDMARXWRS] = { 224, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMARXRDS] = { 232, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMARXSNDS] = { 240, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMATXWRS] = { 248, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMATXRDS] = { 256, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMATXSNDS] = { 264, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMAVBND] = { 272, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMAVINV] = { 280, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS] = { 288, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS] = { 296, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS] = { 304, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS] = { 312, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_UDPRXPKTS] = { 320, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_UDPTXPKTS] = { 328, 0, IRDMA_MAX_STATS_48 },
+};
+
+void i40iw_init_hw(struct irdma_sc_dev *dev)
+{
+ int i;
+ u8 __iomem *hw_addr;
+
+ for (i = 0; i < IRDMA_MAX_REGS; ++i) {
+ hw_addr = dev->hw->hw_addr;
+
+ if (i == IRDMA_DB_ADDR_OFFSET)
+ hw_addr = NULL;
+
+ dev->hw_regs[i] = (u32 __iomem *)(i40iw_regs[i] + hw_addr);
+ }
+
+ for (i = 0; i < IRDMA_HW_STAT_INDEX_MAX_GEN_1; ++i)
+ dev->hw_stats_regs[i] = i40iw_stat_offsets[i];
+
+ dev->hw_attrs.first_hw_vf_fpm_id = I40IW_FIRST_VF_FPM_ID;
+ dev->hw_attrs.max_hw_vf_fpm_id = IRDMA_MAX_VF_FPM_ID;
+
+ for (i = 0; i < IRDMA_MAX_SHIFTS; ++i)
+ dev->hw_shifts[i] = i40iw_shifts[i];
+
+ for (i = 0; i < IRDMA_MAX_MASKS; ++i)
+ dev->hw_masks[i] = i40iw_masks[i];
+
+ dev->wqe_alloc_db = dev->hw_regs[IRDMA_WQEALLOC];
+ dev->cq_arm_db = dev->hw_regs[IRDMA_CQARM];
+ dev->aeq_alloc_db = dev->hw_regs[IRDMA_AEQALLOC];
+ dev->cqp_db = dev->hw_regs[IRDMA_CQPDB];
+ dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK];
+ dev->ceq_itr_mask_db = NULL;
+ dev->aeq_itr_mask_db = NULL;
+ dev->irq_ops = &i40iw_irq_ops;
+ dev->hw_stats_map = i40iw_hw_stat_map;
+
+ /* Setup the hardware limits, hmc may limit further */
+ dev->hw_attrs.uk_attrs.max_hw_wq_frags = I40IW_MAX_WQ_FRAGMENT_COUNT;
+ dev->hw_attrs.uk_attrs.max_hw_read_sges = I40IW_MAX_SGE_RD;
+ dev->hw_attrs.max_hw_device_pages = I40IW_MAX_PUSH_PAGE_COUNT;
+ dev->hw_attrs.uk_attrs.max_hw_inline = I40IW_MAX_INLINE_DATA_SIZE;
+ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M;
+ dev->hw_attrs.max_hw_ird = I40IW_MAX_IRD_SIZE;
+ dev->hw_attrs.max_hw_ord = I40IW_MAX_ORD_SIZE;
+ dev->hw_attrs.max_hw_wqes = I40IW_MAX_WQ_ENTRIES;
+ dev->hw_attrs.uk_attrs.max_hw_rq_quanta = I40IW_QP_SW_MAX_RQ_QUANTA;
+ dev->hw_attrs.uk_attrs.max_hw_wq_quanta = I40IW_QP_SW_MAX_WQ_QUANTA;
+ dev->hw_attrs.uk_attrs.max_hw_sq_chunk = I40IW_MAX_QUANTA_PER_WR;
+ dev->hw_attrs.max_hw_pds = I40IW_MAX_PDS;
+ dev->hw_attrs.max_stat_inst = I40IW_MAX_STATS_COUNT;
+ dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_1;
+ dev->hw_attrs.max_hw_outbound_msg_size = I40IW_MAX_OUTBOUND_MSG_SIZE;
+ dev->hw_attrs.max_hw_inbound_msg_size = I40IW_MAX_INBOUND_MSG_SIZE;
+ dev->hw_attrs.uk_attrs.min_hw_wq_size = I40IW_MIN_WQ_SIZE;
+ dev->hw_attrs.max_qp_wr = I40IW_MAX_QP_WRS;
+}
diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.h b/drivers/infiniband/hw/irdma/i40iw_hw.h
new file mode 100644
index 000000000000..0095b327afcc
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/i40iw_hw.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#ifndef I40IW_HW_H
+#define I40IW_HW_H
+#define I40E_VFPE_CQPTAIL1 0x0000A000 /* Reset: VFR */
+#define I40E_VFPE_CQPDB1 0x0000BC00 /* Reset: VFR */
+#define I40E_VFPE_CCQPSTATUS1 0x0000B800 /* Reset: VFR */
+#define I40E_VFPE_CCQPHIGH1 0x00009800 /* Reset: VFR */
+#define I40E_VFPE_CCQPLOW1 0x0000AC00 /* Reset: VFR */
+#define I40E_VFPE_CQARM1 0x0000B400 /* Reset: VFR */
+#define I40E_VFPE_CQACK1 0x0000B000 /* Reset: VFR */
+#define I40E_VFPE_AEQALLOC1 0x0000A400 /* Reset: VFR */
+#define I40E_VFPE_CQPERRCODES1 0x00009C00 /* Reset: VFR */
+#define I40E_VFPE_WQEALLOC1 0x0000C000 /* Reset: VFR */
+#define I40E_VFINT_DYN_CTLN(_INTVF) (0x00024800 + ((_INTVF) * 4)) /* _i=0...511 */ /* Reset: VFR */
+
+#define I40E_PFPE_CQPTAIL 0x00008080 /* Reset: PFR */
+
+#define I40E_PFPE_CQPDB 0x00008000 /* Reset: PFR */
+#define I40E_PFPE_CCQPSTATUS 0x00008100 /* Reset: PFR */
+#define I40E_PFPE_CCQPHIGH 0x00008200 /* Reset: PFR */
+#define I40E_PFPE_CCQPLOW 0x00008180 /* Reset: PFR */
+#define I40E_PFPE_CQARM 0x00131080 /* Reset: PFR */
+#define I40E_PFPE_CQACK 0x00131100 /* Reset: PFR */
+#define I40E_PFPE_AEQALLOC 0x00131180 /* Reset: PFR */
+#define I40E_PFPE_CQPERRCODES 0x00008880 /* Reset: PFR */
+#define I40E_PFPE_WQEALLOC 0x00138C00 /* Reset: PFR */
+#define I40E_GLPCI_LBARCTRL 0x000BE484 /* Reset: POR */
+#define I40E_GLPE_CPUSTATUS0 0x0000D040 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS1 0x0000D044 /* Reset: PE_CORER */
+#define I40E_GLPE_CPUSTATUS2 0x0000D048 /* Reset: PE_CORER */
+#define I40E_GLPE_CRITERR 0x000B4000 /* Reset: PE_CORER */
+#define I40E_PFHMC_PDINV 0x000C0300 /* Reset: PFR */
+#define I40E_GLHMC_VFPDINV(_i) (0x000C8300 + ((_i) * 4)) /* _i=0...31 */ /* Reset: CORER */
+#define I40E_PFINT_DYN_CTLN(_INTPF) (0x00034800 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: PFR */
+#define I40E_PFINT_AEQCTL 0x00038700 /* Reset: CORER */
+
+#define I40E_GLPES_PFIP4RXDISCARD(_i) (0x00010600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXTRUNC(_i) (0x00010700 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXNOROUTE(_i) (0x00012E00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXDISCARD(_i) (0x00011200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXTRUNC(_i) (0x00011300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+
+#define I40E_GLPES_PFRDMAVBNDLO(_i) (0x00014800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCOCTSLO(_i) (0x00012000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCOCTSLO(_i) (0x00011600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCOCTSLO(_i) (0x00012A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPRXPKTSLO(_i) (0x00013800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFUDPTXPKTSLO(_i) (0x00013A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+
+#define I40E_GLPES_PFIP6TXNOROUTE(_i) (0x00012F00 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRTXSEG(_i) (0x00013600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXOPTERR(_i) (0x00013200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXPROTOERR(_i) (0x00013300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRXVLANERR(_i) (0x00010000 + ((_i) * 4)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXOCTSLO(_i) (0x00010200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXPKTSLO(_i) (0x00010400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXFRAGSLO(_i) (0x00010800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCPKTSLO(_i) (0x00010C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXOCTSLO(_i) (0x00011A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXPKTSLO(_i) (0x00011C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXFRAGSLO(_i) (0x00011E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4TXMCPKTSLO(_i) (0x00012200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXOCTSLO(_i) (0x00010E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXPKTSLO(_i) (0x00011000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXFRAGSLO(_i) (0x00011400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXOCTSLO(_i) (0x00012400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXPKTSLO(_i) (0x00012600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXFRAGSLO(_i) (0x00012800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6TXMCPKTSLO(_i) (0x00012C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPTXSEGLO(_i) (0x00013400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXRDSLO(_i) (0x00013E00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXSNDSLO(_i) (0x00014000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMARXWRSLO(_i) (0x00013C00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXRDSLO(_i) (0x00014400 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXSNDSLO(_i) (0x00014600 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMATXWRSLO(_i) (0x00014200 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP4RXMCOCTSLO(_i) (0x00010A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFIP6RXMCPKTSLO(_i) (0x00011800 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFTCPRXSEGSLO(_i) (0x00013000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+#define I40E_GLPES_PFRDMAVINVLO(_i) (0x00014A00 + ((_i) * 8)) /* _i=0...15 */ /* Reset: PE_CORER */
+
+#define I40IW_DB_ADDR_OFFSET (4 * 1024 * 1024 - 64 * 1024)
+
+#define I40IW_VF_DB_ADDR_OFFSET (64 * 1024)
+
+#define I40E_PFINT_LNKLSTN(_INTPF) (0x00035000 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: PFR */
+#define I40E_PFINT_LNKLSTN_MAX_INDEX 511
+#define I40E_PFINT_LNKLSTN_FIRSTQ_INDX GENMASK(10, 0)
+#define I40E_PFINT_LNKLSTN_FIRSTQ_TYPE GENMASK(12, 11)
+
+#define I40E_PFINT_CEQCTL(_INTPF) (0x00036800 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: CORER */
+#define I40E_PFINT_CEQCTL_MAX_INDEX 511
+
+/* shifts/masks for FLD_[LS/RS]_64 macros used in device table */
+#define I40E_PFINT_CEQCTL_MSIX_INDX_S 0
+#define I40E_PFINT_CEQCTL_MSIX_INDX GENMASK(7, 0)
+#define I40E_PFINT_CEQCTL_ITR_INDX_S 11
+#define I40E_PFINT_CEQCTL_ITR_INDX GENMASK(12, 11)
+#define I40E_PFINT_CEQCTL_MSIX0_INDX_S 13
+#define I40E_PFINT_CEQCTL_MSIX0_INDX GENMASK(15, 13)
+#define I40E_PFINT_CEQCTL_NEXTQ_INDX_S 16
+#define I40E_PFINT_CEQCTL_NEXTQ_INDX GENMASK(26, 16)
+#define I40E_PFINT_CEQCTL_NEXTQ_TYPE_S 27
+#define I40E_PFINT_CEQCTL_NEXTQ_TYPE GENMASK(28, 27)
+#define I40E_PFINT_CEQCTL_CAUSE_ENA_S 30
+#define I40E_PFINT_CEQCTL_CAUSE_ENA BIT(30)
+#define I40E_PFINT_CEQCTL_INTEVENT_S 31
+#define I40E_PFINT_CEQCTL_INTEVENT BIT(31)
+#define I40E_CQPSQ_STAG_PDID_S 48
+#define I40E_CQPSQ_STAG_PDID GENMASK_ULL(62, 48)
+#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_S 0
+#define I40E_PFPE_CCQPSTATUS_CCQP_DONE BIT_ULL(0)
+#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_S 31
+#define I40E_PFPE_CCQPSTATUS_CCQP_ERR BIT_ULL(31)
+#define I40E_PFINT_DYN_CTLN_ITR_INDX_S 3
+#define I40E_PFINT_DYN_CTLN_ITR_INDX GENMASK(4, 3)
+#define I40E_PFINT_DYN_CTLN_INTENA_S 0
+#define I40E_PFINT_DYN_CTLN_INTENA BIT(0)
+#define I40E_CQPSQ_CQ_CEQID_S 24
+#define I40E_CQPSQ_CQ_CEQID GENMASK(30, 24)
+#define I40E_CQPSQ_CQ_CQID_S 0
+#define I40E_CQPSQ_CQ_CQID GENMASK_ULL(15, 0)
+#define I40E_COMMIT_FPM_CQCNT_S 0
+#define I40E_COMMIT_FPM_CQCNT GENMASK_ULL(17, 0)
+#define I40E_CQPSQ_UPESD_HMCFNID_S 0
+#define I40E_CQPSQ_UPESD_HMCFNID GENMASK_ULL(5, 0)
+
+#define I40E_VSIQF_CTL(_VSI) (0x0020D800 + ((_VSI) * 4))
+
+enum i40iw_device_caps_const {
+ I40IW_MAX_WQ_FRAGMENT_COUNT = 3,
+ I40IW_MAX_SGE_RD = 1,
+ I40IW_MAX_PUSH_PAGE_COUNT = 0,
+ I40IW_MAX_INLINE_DATA_SIZE = 48,
+ I40IW_MAX_IRD_SIZE = 63,
+ I40IW_MAX_ORD_SIZE = 127,
+ I40IW_MAX_WQ_ENTRIES = 2048,
+ I40IW_MAX_WQE_SIZE_RQ = 128,
+ I40IW_MAX_PDS = 32768,
+ I40IW_MAX_STATS_COUNT = 16,
+ I40IW_MAX_CQ_SIZE = 1048575,
+ I40IW_MAX_OUTBOUND_MSG_SIZE = 2147483647,
+ I40IW_MAX_INBOUND_MSG_SIZE = 2147483647,
+ I40IW_MIN_WQ_SIZE = 4 /* WQEs */,
+};
+
+#define I40IW_QP_WQE_MIN_SIZE 32
+#define I40IW_QP_WQE_MAX_SIZE 128
+#define I40IW_MAX_RQ_WQE_SHIFT 2
+#define I40IW_MAX_QUANTA_PER_WR 2
+
+#define I40IW_QP_SW_MAX_SQ_QUANTA 2048
+#define I40IW_QP_SW_MAX_RQ_QUANTA 16384
+#define I40IW_QP_SW_MAX_WQ_QUANTA 2048
+#define I40IW_MAX_QP_WRS ((I40IW_QP_SW_MAX_SQ_QUANTA - IRDMA_SQ_RSVD) / I40IW_MAX_QUANTA_PER_WR)
+#define I40IW_FIRST_VF_FPM_ID 16
+#define QUEUE_TYPE_CEQ 2
+#define NULL_QUEUE_INDEX 0x7FF
+
+void i40iw_init_hw(struct irdma_sc_dev *dev);
+#endif /* I40IW_HW_H */
diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c b/drivers/infiniband/hw/irdma/i40iw_if.c
new file mode 100644
index 000000000000..15e036ddaffb
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/i40iw_if.c
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "main.h"
+#include "i40iw_hw.h"
+#include <linux/net/intel/i40e_client.h>
+
+static struct i40e_client i40iw_client;
+
+/**
+ * i40iw_l2param_change - handle mss change
+ * @cdev_info: parent lan device information structure with data/ops
+ * @client: client for parameter change
+ * @params: new parameters from L2
+ */
+static void i40iw_l2param_change(struct i40e_info *cdev_info,
+ struct i40e_client *client,
+ struct i40e_params *params)
+{
+ struct irdma_l2params l2params = {};
+ struct irdma_device *iwdev;
+ struct ib_device *ibdev;
+
+ ibdev = ib_device_get_by_netdev(cdev_info->netdev, RDMA_DRIVER_IRDMA);
+ if (!ibdev)
+ return;
+
+ iwdev = to_iwdev(ibdev);
+
+ if (iwdev->vsi.mtu != params->mtu) {
+ l2params.mtu_changed = true;
+ l2params.mtu = params->mtu;
+ }
+ irdma_change_l2params(&iwdev->vsi, &l2params);
+ ib_device_put(ibdev);
+}
+
+/**
+ * i40iw_close - client interface operation close for iwarp/uda device
+ * @cdev_info: parent lan device information structure with data/ops
+ * @client: client to close
+ * @reset: flag to indicate close on reset
+ *
+ * Called by the lan driver during the processing of client unregister
+ * Destroy and clean up the driver resources
+ */
+static void i40iw_close(struct i40e_info *cdev_info, struct i40e_client *client,
+ bool reset)
+{
+ struct irdma_device *iwdev;
+ struct ib_device *ibdev;
+
+ ibdev = ib_device_get_by_netdev(cdev_info->netdev, RDMA_DRIVER_IRDMA);
+ if (WARN_ON(!ibdev))
+ return;
+
+ iwdev = to_iwdev(ibdev);
+ if (reset)
+ iwdev->rf->reset = true;
+
+ iwdev->iw_status = 0;
+ irdma_port_ibevent(iwdev);
+ ib_unregister_device_and_put(ibdev);
+ pr_debug("INIT: Gen1 PF[%d] close complete\n", PCI_FUNC(cdev_info->pcidev->devfn));
+}
+
+static void i40iw_request_reset(struct irdma_pci_f *rf)
+{
+ struct i40e_info *cdev_info = rf->cdev;
+
+ cdev_info->ops->request_reset(cdev_info, &i40iw_client, 1);
+}
+
+static void i40iw_fill_device_info(struct irdma_device *iwdev, struct i40e_info *cdev_info)
+{
+ struct irdma_pci_f *rf = iwdev->rf;
+
+ rf->rdma_ver = IRDMA_GEN_1;
+ rf->sc_dev.hw = &rf->hw;
+ rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_1;
+ rf->sc_dev.privileged = true;
+ rf->gen_ops.request_reset = i40iw_request_reset;
+ rf->pcidev = cdev_info->pcidev;
+ rf->pf_id = cdev_info->fid;
+ rf->hw.hw_addr = cdev_info->hw_addr;
+ rf->cdev = cdev_info;
+ rf->msix_count = cdev_info->msix_count;
+ rf->msix_entries = cdev_info->msix_entries;
+ rf->limits_sel = 5;
+ rf->protocol_used = IRDMA_IWARP_PROTOCOL_ONLY;
+ rf->iwdev = iwdev;
+
+ iwdev->init_state = INITIAL_STATE;
+ iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED;
+ iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
+ iwdev->netdev = cdev_info->netdev;
+ iwdev->vsi_num = 0;
+}
+
+/**
+ * i40iw_open - client interface operation open for iwarp/uda device
+ * @cdev_info: parent lan device information structure with data/ops
+ * @client: iwarp client information, provided during registration
+ *
+ * Called by the lan driver during the processing of client register
+ * Create device resources, set up queues, pble and hmc objects and
+ * register the device with the ib verbs interface
+ * Return 0 if successful, otherwise return error
+ */
+static int i40iw_open(struct i40e_info *cdev_info, struct i40e_client *client)
+{
+ struct irdma_l2params l2params = {};
+ struct irdma_device *iwdev;
+ struct irdma_pci_f *rf;
+ int err = -EIO;
+ int i;
+ u16 qset;
+ u16 last_qset = IRDMA_NO_QSET;
+
+ iwdev = ib_alloc_device(irdma_device, ibdev);
+ if (!iwdev)
+ return -ENOMEM;
+
+ iwdev->rf = kzalloc(sizeof(*rf), GFP_KERNEL);
+ if (!iwdev->rf) {
+ ib_dealloc_device(&iwdev->ibdev);
+ return -ENOMEM;
+ }
+
+ i40iw_fill_device_info(iwdev, cdev_info);
+ rf = iwdev->rf;
+
+ if (irdma_ctrl_init_hw(rf)) {
+ err = -EIO;
+ goto err_ctrl_init;
+ }
+
+ l2params.mtu = (cdev_info->params.mtu) ? cdev_info->params.mtu : IRDMA_DEFAULT_MTU;
+ for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++) {
+ qset = cdev_info->params.qos.prio_qos[i].qs_handle;
+ l2params.up2tc[i] = cdev_info->params.qos.prio_qos[i].tc;
+ l2params.qs_handle_list[i] = qset;
+ if (last_qset == IRDMA_NO_QSET)
+ last_qset = qset;
+ else if ((qset != last_qset) && (qset != IRDMA_NO_QSET))
+ iwdev->dcb_vlan_mode = true;
+ }
+
+ if (irdma_rt_init_hw(iwdev, &l2params)) {
+ err = -EIO;
+ goto err_rt_init;
+ }
+
+ err = irdma_ib_register_device(iwdev);
+ if (err)
+ goto err_ibreg;
+
+ ibdev_dbg(&iwdev->ibdev, "INIT: Gen1 PF[%d] open success\n",
+ PCI_FUNC(rf->pcidev->devfn));
+
+ return 0;
+
+err_ibreg:
+ irdma_rt_deinit_hw(iwdev);
+err_rt_init:
+ irdma_ctrl_deinit_hw(rf);
+err_ctrl_init:
+ kfree(iwdev->rf);
+ ib_dealloc_device(&iwdev->ibdev);
+
+ return err;
+}
+
+/* client interface functions */
+static const struct i40e_client_ops i40e_ops = {
+ .open = i40iw_open,
+ .close = i40iw_close,
+ .l2_param_change = i40iw_l2param_change
+};
+
+static struct i40e_client i40iw_client = {
+ .ops = &i40e_ops,
+ .type = I40E_CLIENT_IWARP,
+};
+
+static int i40iw_probe(struct auxiliary_device *aux_dev, const struct auxiliary_device_id *id)
+{
+ struct i40e_auxiliary_device *i40e_adev = container_of(aux_dev,
+ struct i40e_auxiliary_device,
+ aux_dev);
+ struct i40e_info *cdev_info = i40e_adev->ldev;
+
+ strscpy_pad(i40iw_client.name, "irdma", I40E_CLIENT_STR_LENGTH);
+ i40e_client_device_register(cdev_info, &i40iw_client);
+
+ return 0;
+}
+
+static void i40iw_remove(struct auxiliary_device *aux_dev)
+{
+ struct i40e_auxiliary_device *i40e_adev = container_of(aux_dev,
+ struct i40e_auxiliary_device,
+ aux_dev);
+ struct i40e_info *cdev_info = i40e_adev->ldev;
+
+ i40e_client_device_unregister(cdev_info);
+}
+
+static const struct auxiliary_device_id i40iw_auxiliary_id_table[] = {
+ {.name = "i40e.iwarp", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, i40iw_auxiliary_id_table);
+
+struct auxiliary_driver i40iw_auxiliary_drv = {
+ .name = "gen_1",
+ .id_table = i40iw_auxiliary_id_table,
+ .probe = i40iw_probe,
+ .remove = i40iw_remove,
+};
diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.c b/drivers/infiniband/hw/irdma/icrdma_hw.c
new file mode 100644
index 000000000000..32f26284a788
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/icrdma_hw.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2017 - 2021 Intel Corporation */
+#include "osdep.h"
+#include "type.h"
+#include "icrdma_hw.h"
+
+static u32 icrdma_regs[IRDMA_MAX_REGS] = {
+ PFPE_CQPTAIL,
+ PFPE_CQPDB,
+ PFPE_CCQPSTATUS,
+ PFPE_CCQPHIGH,
+ PFPE_CCQPLOW,
+ PFPE_CQARM,
+ PFPE_CQACK,
+ PFPE_AEQALLOC,
+ PFPE_CQPERRCODES,
+ PFPE_WQEALLOC,
+ GLINT_DYN_CTL(0),
+ ICRDMA_DB_ADDR_OFFSET,
+
+ GLPCI_LBARCTRL,
+ GLPE_CPUSTATUS0,
+ GLPE_CPUSTATUS1,
+ GLPE_CPUSTATUS2,
+ PFINT_AEQCTL,
+ GLINT_CEQCTL(0),
+ VSIQF_PE_CTL1(0),
+ PFHMC_PDINV,
+ GLHMC_VFPDINV(0),
+ GLPE_CRITERR,
+ GLINT_RATE(0),
+};
+
+static u64 icrdma_masks[IRDMA_MAX_MASKS] = {
+ ICRDMA_CCQPSTATUS_CCQP_DONE,
+ ICRDMA_CCQPSTATUS_CCQP_ERR,
+ ICRDMA_CQPSQ_STAG_PDID,
+ ICRDMA_CQPSQ_CQ_CEQID,
+ ICRDMA_CQPSQ_CQ_CQID,
+ ICRDMA_COMMIT_FPM_CQCNT,
+ ICRDMA_CQPSQ_UPESD_HMCFNID,
+};
+
+static u64 icrdma_shifts[IRDMA_MAX_SHIFTS] = {
+ ICRDMA_CCQPSTATUS_CCQP_DONE_S,
+ ICRDMA_CCQPSTATUS_CCQP_ERR_S,
+ ICRDMA_CQPSQ_STAG_PDID_S,
+ ICRDMA_CQPSQ_CQ_CEQID_S,
+ ICRDMA_CQPSQ_CQ_CQID_S,
+ ICRDMA_COMMIT_FPM_CQCNT_S,
+ ICRDMA_CQPSQ_UPESD_HMCFNID_S,
+};
+
+/**
+ * icrdma_ena_irq - Enable interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ */
+static void icrdma_ena_irq(struct irdma_sc_dev *dev, u32 idx)
+{
+ u32 val;
+ u32 interval = 0;
+
+ if (dev->ceq_itr && dev->aeq->msix_idx != idx)
+ interval = dev->ceq_itr >> 1; /* 2 usec units */
+ val = FIELD_PREP(IRDMA_GLINT_DYN_CTL_ITR_INDX, 0) |
+ FIELD_PREP(IRDMA_GLINT_DYN_CTL_INTERVAL, interval) |
+ FIELD_PREP(IRDMA_GLINT_DYN_CTL_INTENA, 1) |
+ FIELD_PREP(IRDMA_GLINT_DYN_CTL_CLEARPBA, 1);
+
+ if (dev->hw_attrs.uk_attrs.hw_rev != IRDMA_GEN_1)
+ writel(val, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + idx);
+ else
+ writel(val, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + (idx - 1));
+}
+
+/**
+ * icrdma_disable_irq - Disable interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ */
+static void icrdma_disable_irq(struct irdma_sc_dev *dev, u32 idx)
+{
+ if (dev->hw_attrs.uk_attrs.hw_rev != IRDMA_GEN_1)
+ writel(0, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + idx);
+ else
+ writel(0, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + (idx - 1));
+}
+
+/**
+ * icrdma_cfg_ceq- Configure CEQ interrupt
+ * @dev: pointer to the device structure
+ * @ceq_id: Completion Event Queue ID
+ * @idx: vector index
+ * @enable: True to enable, False disables
+ */
+static void icrdma_cfg_ceq(struct irdma_sc_dev *dev, u32 ceq_id, u32 idx,
+ bool enable)
+{
+ u32 reg_val;
+
+ reg_val = FIELD_PREP(IRDMA_GLINT_CEQCTL_CAUSE_ENA, enable) |
+ FIELD_PREP(IRDMA_GLINT_CEQCTL_MSIX_INDX, idx) |
+ FIELD_PREP(IRDMA_GLINT_CEQCTL_ITR_INDX, 3);
+
+ writel(reg_val, dev->hw_regs[IRDMA_GLINT_CEQCTL] + ceq_id);
+}
+
+static const struct irdma_irq_ops icrdma_irq_ops = {
+ .irdma_cfg_aeq = irdma_cfg_aeq,
+ .irdma_cfg_ceq = icrdma_cfg_ceq,
+ .irdma_dis_irq = icrdma_disable_irq,
+ .irdma_en_irq = icrdma_ena_irq,
+};
+
+static const struct irdma_hw_stat_map icrdma_hw_stat_map[] = {
+ [IRDMA_HW_STAT_INDEX_RXVLANERR] = { 0, 32, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_IP4RXOCTS] = { 8, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXPKTS] = { 16, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = { 24, 32, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = { 24, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP4RXFRAGS] = { 32, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS] = { 40, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS] = { 48, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXOCTS] = { 56, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXPKTS] = { 64, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = { 72, 32, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = { 72, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_IP6RXFRAGS] = { 80, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS] = { 88, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS] = { 96, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXOCTS] = { 104, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXPKTS] = { 112, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXFRAGS] = { 120, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS] = { 128, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS] = { 136, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXOCTS] = { 144, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXPKTS] = { 152, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXFRAGS] = { 160, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS] = { 168, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS] = { 176, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = { 184, 32, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = { 184, 0, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_TCPRXSEGS] = { 192, 32, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = { 200, 32, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = { 200, 0, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_TCPTXSEG] = { 208, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_TCPRTXSEG] = { 216, 32, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_UDPRXPKTS] = { 224, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_UDPTXPKTS] = { 232, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMARXWRS] = { 240, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMARXRDS] = { 248, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMARXSNDS] = { 256, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMATXWRS] = { 264, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMATXRDS] = { 272, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMATXSNDS] = { 280, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMAVBND] = { 288, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RDMAVINV] = { 296, 0, IRDMA_MAX_STATS_48 },
+ [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS] = { 304, 0, IRDMA_MAX_STATS_56 },
+ [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED] = { 312, 32, IRDMA_MAX_STATS_24 },
+ [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED] = { 312, 0, IRDMA_MAX_STATS_32 },
+ [IRDMA_HW_STAT_INDEX_TXNPCNPSENT] = { 320, 0, IRDMA_MAX_STATS_32 },
+};
+
+void icrdma_init_hw(struct irdma_sc_dev *dev)
+{
+ int i;
+ u8 __iomem *hw_addr;
+
+ for (i = 0; i < IRDMA_MAX_REGS; ++i) {
+ hw_addr = dev->hw->hw_addr;
+
+ if (i == IRDMA_DB_ADDR_OFFSET)
+ hw_addr = NULL;
+
+ dev->hw_regs[i] = (u32 __iomem *)(hw_addr + icrdma_regs[i]);
+ }
+ dev->hw_attrs.max_hw_vf_fpm_id = IRDMA_MAX_VF_FPM_ID;
+ dev->hw_attrs.first_hw_vf_fpm_id = IRDMA_FIRST_VF_FPM_ID;
+
+ for (i = 0; i < IRDMA_MAX_SHIFTS; ++i)
+ dev->hw_shifts[i] = icrdma_shifts[i];
+
+ for (i = 0; i < IRDMA_MAX_MASKS; ++i)
+ dev->hw_masks[i] = icrdma_masks[i];
+
+ dev->wqe_alloc_db = dev->hw_regs[IRDMA_WQEALLOC];
+ dev->cq_arm_db = dev->hw_regs[IRDMA_CQARM];
+ dev->aeq_alloc_db = dev->hw_regs[IRDMA_AEQALLOC];
+ dev->cqp_db = dev->hw_regs[IRDMA_CQPDB];
+ dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK];
+ dev->irq_ops = &icrdma_irq_ops;
+ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G;
+ dev->hw_stats_map = icrdma_hw_stat_map;
+ dev->hw_attrs.max_hw_ird = ICRDMA_MAX_IRD_SIZE;
+ dev->hw_attrs.max_hw_ord = ICRDMA_MAX_ORD_SIZE;
+ dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT;
+ dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_2;
+ dev->hw_attrs.max_hw_device_pages = ICRDMA_MAX_PUSH_PAGE_COUNT;
+
+ dev->hw_attrs.uk_attrs.min_hw_wq_size = ICRDMA_MIN_WQ_SIZE;
+ dev->hw_attrs.uk_attrs.max_hw_sq_chunk = IRDMA_MAX_QUANTA_PER_WR;
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_RTS_AE |
+ IRDMA_FEATURE_CQ_RESIZE;
+}
diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.h b/drivers/infiniband/hw/irdma/icrdma_hw.h
new file mode 100644
index 000000000000..d97944ab45da
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/icrdma_hw.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2017 - 2021 Intel Corporation */
+#ifndef ICRDMA_HW_H
+#define ICRDMA_HW_H
+
+#include "irdma.h"
+
+#define VFPE_CQPTAIL1 0x0000a000
+#define VFPE_CQPDB1 0x0000bc00
+#define VFPE_CCQPSTATUS1 0x0000b800
+#define VFPE_CCQPHIGH1 0x00009800
+#define VFPE_CCQPLOW1 0x0000ac00
+#define VFPE_CQARM1 0x0000b400
+#define VFPE_CQARM1 0x0000b400
+#define VFPE_CQACK1 0x0000b000
+#define VFPE_AEQALLOC1 0x0000a400
+#define VFPE_CQPERRCODES1 0x00009c00
+#define VFPE_WQEALLOC1 0x0000c000
+#define VFINT_DYN_CTLN(_i) (0x00003800 + ((_i) * 4)) /* _i=0...63 */
+
+#define PFPE_CQPTAIL 0x00500880
+#define PFPE_CQPDB 0x00500800
+#define PFPE_CCQPSTATUS 0x0050a000
+#define PFPE_CCQPHIGH 0x0050a100
+#define PFPE_CCQPLOW 0x0050a080
+#define PFPE_CQARM 0x00502c00
+#define PFPE_CQACK 0x00502c80
+#define PFPE_AEQALLOC 0x00502d00
+#define GLINT_DYN_CTL(_INT) (0x00160000 + ((_INT) * 4)) /* _i=0...2047 */
+#define GLPCI_LBARCTRL 0x0009de74
+#define GLPE_CPUSTATUS0 0x0050ba5c
+#define GLPE_CPUSTATUS1 0x0050ba60
+#define GLPE_CPUSTATUS2 0x0050ba64
+#define PFINT_AEQCTL 0x0016cb00
+#define PFPE_CQPERRCODES 0x0050a200
+#define PFPE_WQEALLOC 0x00504400
+#define GLINT_CEQCTL(_INT) (0x0015c000 + ((_INT) * 4)) /* _i=0...2047 */
+#define VSIQF_PE_CTL1(_VSI) (0x00414000 + ((_VSI) * 4)) /* _i=0...767 */
+#define PFHMC_PDINV 0x00520300
+#define GLHMC_VFPDINV(_i) (0x00528300 + ((_i) * 4)) /* _i=0...31 */
+#define GLPE_CRITERR 0x00534000
+#define GLINT_RATE(_INT) (0x0015A000 + ((_INT) * 4)) /* _i=0...2047 */ /* Reset Source: CORER */
+
+#define ICRDMA_DB_ADDR_OFFSET (8 * 1024 * 1024 - 64 * 1024)
+
+#define ICRDMA_VF_DB_ADDR_OFFSET (64 * 1024)
+
+/* shifts/masks for FLD_[LS/RS]_64 macros used in device table */
+#define ICRDMA_CCQPSTATUS_CCQP_DONE_S 0
+#define ICRDMA_CCQPSTATUS_CCQP_DONE BIT_ULL(0)
+#define ICRDMA_CCQPSTATUS_CCQP_ERR_S 31
+#define ICRDMA_CCQPSTATUS_CCQP_ERR BIT_ULL(31)
+#define ICRDMA_CQPSQ_STAG_PDID_S 46
+#define ICRDMA_CQPSQ_STAG_PDID GENMASK_ULL(63, 46)
+#define ICRDMA_CQPSQ_CQ_CEQID_S 22
+#define ICRDMA_CQPSQ_CQ_CEQID GENMASK_ULL(31, 22)
+#define ICRDMA_CQPSQ_CQ_CQID_S 0
+#define ICRDMA_CQPSQ_CQ_CQID GENMASK_ULL(18, 0)
+#define ICRDMA_COMMIT_FPM_CQCNT_S 0
+#define ICRDMA_COMMIT_FPM_CQCNT GENMASK_ULL(19, 0)
+#define ICRDMA_CQPSQ_UPESD_HMCFNID_S 0
+#define ICRDMA_CQPSQ_UPESD_HMCFNID GENMASK_ULL(5, 0)
+enum icrdma_device_caps_const {
+ ICRDMA_MAX_STATS_COUNT = 128,
+
+ ICRDMA_MAX_IRD_SIZE = 127,
+ ICRDMA_MAX_ORD_SIZE = 255,
+ ICRDMA_MIN_WQ_SIZE = 8 /* WQEs */,
+ ICRDMA_MAX_PUSH_PAGE_COUNT = 256,
+};
+
+void icrdma_init_hw(struct irdma_sc_dev *dev);
+#endif /* ICRDMA_HW_H*/
diff --git a/drivers/infiniband/hw/irdma/icrdma_if.c b/drivers/infiniband/hw/irdma/icrdma_if.c
new file mode 100644
index 000000000000..b49fd9cf2476
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/icrdma_if.c
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2015 - 2024 Intel Corporation */
+
+#include "main.h"
+#include <linux/net/intel/iidc_rdma_ice.h>
+
+static void icrdma_prep_tc_change(struct irdma_device *iwdev)
+{
+ iwdev->vsi.tc_change_pending = true;
+ irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_SUSPEND);
+
+ /* Wait for all qp's to suspend */
+ wait_event_timeout(iwdev->suspend_wq,
+ !atomic_read(&iwdev->vsi.qp_suspend_reqs),
+ msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS));
+ irdma_ws_reset(&iwdev->vsi);
+}
+
+static void icrdma_fill_qos_info(struct irdma_l2params *l2params,
+ struct iidc_rdma_qos_params *qos_info)
+{
+ int i;
+
+ l2params->num_tc = qos_info->num_tc;
+ l2params->vsi_prio_type = qos_info->vport_priority_type;
+ l2params->vsi_rel_bw = qos_info->vport_relative_bw;
+ for (i = 0; i < l2params->num_tc; i++) {
+ l2params->tc_info[i].egress_virt_up =
+ qos_info->tc_info[i].egress_virt_up;
+ l2params->tc_info[i].ingress_virt_up =
+ qos_info->tc_info[i].ingress_virt_up;
+ l2params->tc_info[i].prio_type = qos_info->tc_info[i].prio_type;
+ l2params->tc_info[i].rel_bw = qos_info->tc_info[i].rel_bw;
+ l2params->tc_info[i].tc_ctx = qos_info->tc_info[i].tc_ctx;
+ }
+ for (i = 0; i < IIDC_MAX_USER_PRIORITY; i++)
+ l2params->up2tc[i] = qos_info->up2tc[i];
+ if (qos_info->pfc_mode == IIDC_DSCP_PFC_MODE) {
+ l2params->dscp_mode = true;
+ memcpy(l2params->dscp_map, qos_info->dscp_map, sizeof(l2params->dscp_map));
+ }
+}
+
+static void icrdma_iidc_event_handler(struct iidc_rdma_core_dev_info *cdev_info,
+ struct iidc_rdma_event *event)
+{
+ struct irdma_device *iwdev = dev_get_drvdata(&cdev_info->adev->dev);
+ struct irdma_l2params l2params = {};
+
+ if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_MTU_CHANGE)) {
+ ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", iwdev->netdev->mtu);
+ if (iwdev->vsi.mtu != iwdev->netdev->mtu) {
+ l2params.mtu = iwdev->netdev->mtu;
+ l2params.mtu_changed = true;
+ irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev);
+ irdma_change_l2params(&iwdev->vsi, &l2params);
+ }
+ } else if (*event->type & BIT(IIDC_RDMA_EVENT_BEFORE_TC_CHANGE)) {
+ if (iwdev->vsi.tc_change_pending)
+ return;
+
+ icrdma_prep_tc_change(iwdev);
+ } else if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_TC_CHANGE)) {
+ struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv;
+
+ if (!iwdev->vsi.tc_change_pending)
+ return;
+
+ l2params.tc_changed = true;
+ ibdev_dbg(&iwdev->ibdev, "CLNT: TC Change\n");
+
+ icrdma_fill_qos_info(&l2params, &idc_priv->qos_info);
+ if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
+ iwdev->dcb_vlan_mode =
+ l2params.num_tc > 1 && !l2params.dscp_mode;
+ irdma_change_l2params(&iwdev->vsi, &l2params);
+ } else if (*event->type & BIT(IIDC_RDMA_EVENT_CRIT_ERR)) {
+ ibdev_warn(&iwdev->ibdev, "ICE OICR event notification: oicr = 0x%08x\n",
+ event->reg);
+ if (event->reg & IRDMAPFINT_OICR_PE_CRITERR_M) {
+ u32 pe_criterr;
+
+ pe_criterr = readl(iwdev->rf->sc_dev.hw_regs[IRDMA_GLPE_CRITERR]);
+#define IRDMA_Q1_RESOURCE_ERR 0x0001024d
+ if (pe_criterr != IRDMA_Q1_RESOURCE_ERR) {
+ ibdev_err(&iwdev->ibdev, "critical PE Error, GLPE_CRITERR=0x%08x\n",
+ pe_criterr);
+ iwdev->rf->reset = true;
+ } else {
+ ibdev_warn(&iwdev->ibdev, "Q1 Resource Check\n");
+ }
+ }
+ if (event->reg & IRDMAPFINT_OICR_HMC_ERR_M) {
+ ibdev_err(&iwdev->ibdev, "HMC Error\n");
+ iwdev->rf->reset = true;
+ }
+ if (event->reg & IRDMAPFINT_OICR_PE_PUSH_M) {
+ ibdev_err(&iwdev->ibdev, "PE Push Error\n");
+ iwdev->rf->reset = true;
+ }
+ if (iwdev->rf->reset)
+ iwdev->rf->gen_ops.request_reset(iwdev->rf);
+ }
+}
+
+/**
+ * icrdma_lan_register_qset - Register qset with LAN driver
+ * @vsi: vsi structure
+ * @tc_node: Traffic class node
+ */
+static int icrdma_lan_register_qset(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node)
+{
+ struct irdma_device *iwdev = vsi->back_vsi;
+ struct iidc_rdma_core_dev_info *cdev_info = iwdev->rf->cdev;
+ struct iidc_rdma_qset_params qset = {};
+ int ret;
+
+ qset.qs_handle = tc_node->qs_handle;
+ qset.tc = tc_node->traffic_class;
+ qset.vport_id = vsi->vsi_idx;
+ ret = ice_add_rdma_qset(cdev_info, &qset);
+ if (ret) {
+ ibdev_dbg(&iwdev->ibdev, "WS: LAN alloc_res for rdma qset failed.\n");
+ return ret;
+ }
+
+ tc_node->l2_sched_node_id = qset.teid;
+ vsi->qos[tc_node->user_pri].l2_sched_node_id = qset.teid;
+
+ return 0;
+}
+
+/**
+ * icrdma_lan_unregister_qset - Unregister qset with LAN driver
+ * @vsi: vsi structure
+ * @tc_node: Traffic class node
+ */
+static void icrdma_lan_unregister_qset(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node)
+{
+ struct irdma_device *iwdev = vsi->back_vsi;
+ struct iidc_rdma_core_dev_info *cdev_info = iwdev->rf->cdev;
+ struct iidc_rdma_qset_params qset = {};
+
+ qset.qs_handle = tc_node->qs_handle;
+ qset.tc = tc_node->traffic_class;
+ qset.vport_id = vsi->vsi_idx;
+ qset.teid = tc_node->l2_sched_node_id;
+
+ if (ice_del_rdma_qset(cdev_info, &qset))
+ ibdev_dbg(&iwdev->ibdev, "WS: LAN free_res for rdma qset failed.\n");
+}
+
+/**
+ * icrdma_request_reset - Request a reset
+ * @rf: RDMA PCI function
+ */
+static void icrdma_request_reset(struct irdma_pci_f *rf)
+{
+ ibdev_warn(&rf->iwdev->ibdev, "Requesting a reset\n");
+ ice_rdma_request_reset(rf->cdev, IIDC_FUNC_RESET);
+}
+
+static int icrdma_init_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev)
+{
+ int i;
+
+ rf->msix_count = num_online_cpus() + IRDMA_NUM_AEQ_MSIX;
+ rf->msix_entries = kcalloc(rf->msix_count, sizeof(*rf->msix_entries),
+ GFP_KERNEL);
+ if (!rf->msix_entries)
+ return -ENOMEM;
+
+ for (i = 0; i < rf->msix_count; i++)
+ if (ice_alloc_rdma_qvector(cdev, &rf->msix_entries[i]))
+ break;
+
+ if (i < IRDMA_MIN_MSIX) {
+ while (--i >= 0)
+ ice_free_rdma_qvector(cdev, &rf->msix_entries[i]);
+
+ kfree(rf->msix_entries);
+ return -ENOMEM;
+ }
+
+ rf->msix_count = i;
+
+ return 0;
+}
+
+static void icrdma_deinit_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev)
+{
+ int i;
+
+ for (i = 0; i < rf->msix_count; i++)
+ ice_free_rdma_qvector(cdev, &rf->msix_entries[i]);
+
+ kfree(rf->msix_entries);
+}
+
+static void icrdma_fill_device_info(struct irdma_device *iwdev,
+ struct iidc_rdma_core_dev_info *cdev_info)
+{
+ struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv;
+ struct irdma_pci_f *rf = iwdev->rf;
+
+ rf->sc_dev.hw = &rf->hw;
+ rf->iwdev = iwdev;
+ rf->cdev = cdev_info;
+ rf->hw.hw_addr = idc_priv->hw_addr;
+ rf->pcidev = cdev_info->pdev;
+ rf->hw.device = &rf->pcidev->dev;
+ rf->pf_id = idc_priv->pf_id;
+ rf->rdma_ver = IRDMA_GEN_2;
+ rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_2;
+ rf->sc_dev.is_pf = true;
+ rf->sc_dev.privileged = true;
+
+ rf->gen_ops.register_qset = icrdma_lan_register_qset;
+ rf->gen_ops.unregister_qset = icrdma_lan_unregister_qset;
+
+ rf->default_vsi.vsi_idx = idc_priv->vport_id;
+ rf->protocol_used =
+ cdev_info->rdma_protocol == IIDC_RDMA_PROTOCOL_ROCEV2 ?
+ IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY;
+ rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT;
+ rf->rst_to = IRDMA_RST_TIMEOUT_HZ;
+ rf->gen_ops.request_reset = icrdma_request_reset;
+ rf->limits_sel = 7;
+ mutex_init(&rf->ah_tbl_lock);
+
+ iwdev->netdev = idc_priv->netdev;
+ iwdev->vsi_num = idc_priv->vport_id;
+ iwdev->init_state = INITIAL_STATE;
+ iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT;
+ iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT;
+ iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED;
+ iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
+ if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
+ iwdev->roce_mode = true;
+}
+
+static int icrdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_device_id *id)
+{
+ struct iidc_rdma_core_auxiliary_dev *iidc_adev;
+ struct iidc_rdma_core_dev_info *cdev_info;
+ struct iidc_rdma_priv_dev_info *idc_priv;
+ struct irdma_l2params l2params = {};
+ struct irdma_device *iwdev;
+ struct irdma_pci_f *rf;
+ int err;
+
+ iidc_adev = container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
+ cdev_info = iidc_adev->cdev_info;
+ idc_priv = cdev_info->iidc_priv;
+
+ iwdev = ib_alloc_device(irdma_device, ibdev);
+ if (!iwdev)
+ return -ENOMEM;
+ iwdev->rf = kzalloc(sizeof(*rf), GFP_KERNEL);
+ if (!iwdev->rf) {
+ ib_dealloc_device(&iwdev->ibdev);
+ return -ENOMEM;
+ }
+
+ icrdma_fill_device_info(iwdev, cdev_info);
+ rf = iwdev->rf;
+
+ err = icrdma_init_interrupts(rf, cdev_info);
+ if (err)
+ goto err_init_interrupts;
+
+ err = irdma_ctrl_init_hw(rf);
+ if (err)
+ goto err_ctrl_init;
+
+ l2params.mtu = iwdev->netdev->mtu;
+ icrdma_fill_qos_info(&l2params, &idc_priv->qos_info);
+ if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
+ iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode;
+
+ err = irdma_rt_init_hw(iwdev, &l2params);
+ if (err)
+ goto err_rt_init;
+
+ err = irdma_ib_register_device(iwdev);
+ if (err)
+ goto err_ibreg;
+
+ ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, true);
+
+ ibdev_dbg(&iwdev->ibdev, "INIT: Gen2 PF[%d] device probe success\n", PCI_FUNC(rf->pcidev->devfn));
+ auxiliary_set_drvdata(aux_dev, iwdev);
+
+ return 0;
+
+err_ibreg:
+ irdma_rt_deinit_hw(iwdev);
+err_rt_init:
+ irdma_ctrl_deinit_hw(rf);
+err_ctrl_init:
+ icrdma_deinit_interrupts(rf, cdev_info);
+err_init_interrupts:
+ mutex_destroy(&rf->ah_tbl_lock);
+ kfree(rf);
+ ib_dealloc_device(&iwdev->ibdev);
+
+ return err;
+}
+
+static void icrdma_remove(struct auxiliary_device *aux_dev)
+{
+ struct iidc_rdma_core_auxiliary_dev *idc_adev =
+ container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
+ struct iidc_rdma_core_dev_info *cdev_info = idc_adev->cdev_info;
+ struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev);
+ u8 rdma_ver = iwdev->rf->rdma_ver;
+
+ ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, false);
+ irdma_ib_unregister_device(iwdev);
+ icrdma_deinit_interrupts(iwdev->rf, cdev_info);
+ mutex_destroy(&iwdev->rf->ah_tbl_lock);
+
+ kfree(iwdev->rf);
+
+ pr_debug("INIT: Gen[%d] func[%d] device remove success\n",
+ rdma_ver, PCI_FUNC(cdev_info->pdev->devfn));
+}
+
+static const struct auxiliary_device_id icrdma_auxiliary_id_table[] = {
+ {.name = "ice.iwarp", },
+ {.name = "ice.roce", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, icrdma_auxiliary_id_table);
+
+struct iidc_rdma_core_auxiliary_drv icrdma_core_auxiliary_drv = {
+ .adrv = {
+ .name = "gen_2",
+ .id_table = icrdma_auxiliary_id_table,
+ .probe = icrdma_probe,
+ .remove = icrdma_remove,
+ },
+ .event_handler = icrdma_iidc_event_handler,
+};
diff --git a/drivers/infiniband/hw/irdma/ig3rdma_hw.c b/drivers/infiniband/hw/irdma/ig3rdma_hw.c
new file mode 100644
index 000000000000..2e8bb475e22a
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ig3rdma_hw.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2018 - 2024 Intel Corporation */
+#include "osdep.h"
+#include "type.h"
+#include "protos.h"
+#include "ig3rdma_hw.h"
+
+/**
+ * ig3rdma_ena_irq - Enable interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ */
+static void ig3rdma_ena_irq(struct irdma_sc_dev *dev, u32 idx)
+{
+ u32 val;
+ u32 int_stride = 1; /* one u32 per register */
+
+ if (dev->is_pf)
+ int_stride = 0x400;
+ else
+ idx--; /* VFs use DYN_CTL_N */
+
+ val = FIELD_PREP(IRDMA_GLINT_DYN_CTL_INTENA, 1) |
+ FIELD_PREP(IRDMA_GLINT_DYN_CTL_CLEARPBA, 1);
+
+ writel(val, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + (idx * int_stride));
+}
+
+/**
+ * ig3rdma_disable_irq - Disable interrupt
+ * @dev: pointer to the device structure
+ * @idx: vector index
+ */
+static void ig3rdma_disable_irq(struct irdma_sc_dev *dev, u32 idx)
+{
+ u32 int_stride = 1; /* one u32 per register */
+
+ if (dev->is_pf)
+ int_stride = 0x400;
+ else
+ idx--; /* VFs use DYN_CTL_N */
+
+ writel(0, dev->hw_regs[IRDMA_GLINT_DYN_CTL] + (idx * int_stride));
+}
+
+static const struct irdma_irq_ops ig3rdma_irq_ops = {
+ .irdma_dis_irq = ig3rdma_disable_irq,
+ .irdma_en_irq = ig3rdma_ena_irq,
+};
+
+static const struct irdma_hw_stat_map ig3rdma_hw_stat_map[] = {
+ [IRDMA_HW_STAT_INDEX_RXVLANERR] = { 0, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXOCTS] = { 8, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXPKTS] = { 16, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] = { 24, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = { 32, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXFRAGS] = { 40, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS] = { 48, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS] = { 56, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXOCTS] = { 64, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXPKTS] = { 72, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] = { 80, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXTRUNC] = { 88, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXFRAGS] = { 96, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS] = { 104, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS] = { 112, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXOCTS] = { 120, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXPKTS] = { 128, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXFRAGS] = { 136, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS] = { 144, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS] = { 152, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXOCTS] = { 160, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXPKTS] = { 168, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXFRAGS] = { 176, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS] = { 184, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS] = { 192, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE] = { 200, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE] = { 208, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TCPRTXSEG] = { 216, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TCPRXOPTERR] = { 224, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR] = { 232, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TCPTXSEG] = { 240, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TCPRXSEGS] = { 248, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_UDPRXPKTS] = { 256, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_UDPTXPKTS] = { 264, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMARXWRS] = { 272, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMARXRDS] = { 280, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMARXSNDS] = { 288, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMATXWRS] = { 296, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMATXRDS] = { 304, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMATXSNDS] = { 312, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMAVBND] = { 320, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMAVINV] = { 328, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS] = { 336, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED] = { 344, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED] = { 352, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_TXNPCNPSENT] = { 360, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RNR_SENT] = { 368, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RNR_RCVD] = { 376, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMAORDLMTCNT] = { 384, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMAIRDLMTCNT] = { 392, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMARXATS] = { 408, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RDMATXATS] = { 416, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_NAKSEQERR] = { 424, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_NAKSEQERR_IMPLIED] = { 432, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RTO] = { 440, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_RXOOOPKTS] = { 448, 0, 0 },
+ [IRDMA_HW_STAT_INDEX_ICRCERR] = { 456, 0, 0 },
+};
+
+void ig3rdma_init_hw(struct irdma_sc_dev *dev)
+{
+ dev->irq_ops = &ig3rdma_irq_ops;
+ dev->hw_stats_map = ig3rdma_hw_stat_map;
+
+ dev->hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_3;
+ dev->hw_attrs.uk_attrs.max_hw_wq_frags = IG3RDMA_MAX_WQ_FRAGMENT_COUNT;
+ dev->hw_attrs.uk_attrs.max_hw_read_sges = IG3RDMA_MAX_SGE_RD;
+ dev->hw_attrs.uk_attrs.max_hw_sq_chunk = IRDMA_MAX_QUANTA_PER_WR;
+ dev->hw_attrs.first_hw_vf_fpm_id = 0;
+ dev->hw_attrs.max_hw_vf_fpm_id = IG3_MAX_APFS + IG3_MAX_AVFS;
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_64_BYTE_CQE;
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_CQE_TIMESTAMPING;
+
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_SRQ;
+ dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_RTS_AE |
+ IRDMA_FEATURE_CQ_RESIZE;
+ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G;
+ dev->hw_attrs.max_hw_ird = IG3RDMA_MAX_IRD_SIZE;
+ dev->hw_attrs.max_hw_ord = IG3RDMA_MAX_ORD_SIZE;
+ dev->hw_attrs.max_stat_inst = IG3RDMA_MAX_STATS_COUNT;
+ dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_3;
+ dev->hw_attrs.uk_attrs.min_hw_wq_size = IG3RDMA_MIN_WQ_SIZE;
+ dev->hw_attrs.uk_attrs.max_hw_srq_quanta = IRDMA_SRQ_MAX_QUANTA;
+ dev->hw_attrs.uk_attrs.max_hw_inline = IG3RDMA_MAX_INLINE_DATA_SIZE;
+ dev->hw_attrs.max_hw_device_pages =
+ dev->is_pf ? IG3RDMA_MAX_PF_PUSH_PAGE_COUNT : IG3RDMA_MAX_VF_PUSH_PAGE_COUNT;
+}
+
+static void __iomem *__ig3rdma_get_reg_addr(struct irdma_mmio_region *region, u64 reg_offset)
+{
+ if (reg_offset >= region->offset &&
+ reg_offset < (region->offset + region->len)) {
+ reg_offset -= region->offset;
+
+ return region->addr + reg_offset;
+ }
+
+ return NULL;
+}
+
+void __iomem *ig3rdma_get_reg_addr(struct irdma_hw *hw, u64 reg_offset)
+{
+ u8 __iomem *reg_addr;
+ int i;
+
+ reg_addr = __ig3rdma_get_reg_addr(&hw->rdma_reg, reg_offset);
+ if (reg_addr)
+ return reg_addr;
+
+ for (i = 0; i < hw->num_io_regions; i++) {
+ reg_addr = __ig3rdma_get_reg_addr(&hw->io_regs[i], reg_offset);
+ if (reg_addr)
+ return reg_addr;
+ }
+
+ WARN_ON_ONCE(1);
+
+ return NULL;
+}
diff --git a/drivers/infiniband/hw/irdma/ig3rdma_hw.h b/drivers/infiniband/hw/irdma/ig3rdma_hw.h
new file mode 100644
index 000000000000..03d5f1188789
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ig3rdma_hw.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2021 - 2024 Intel Corporation */
+#ifndef IG3RDMA_HW_H
+#define IG3RDMA_HW_H
+
+#define IG3_MAX_APFS 1
+#define IG3_MAX_AVFS 0
+
+#define IG3_PF_RDMA_REGION_OFFSET 0xBC00000
+#define IG3_PF_RDMA_REGION_LEN 0x401000
+#define IG3_VF_RDMA_REGION_OFFSET 0x8C00
+#define IG3_VF_RDMA_REGION_LEN 0x8400
+
+enum ig3rdma_device_caps_const {
+ IG3RDMA_MAX_WQ_FRAGMENT_COUNT = 14,
+ IG3RDMA_MAX_SGE_RD = 14,
+
+ IG3RDMA_MAX_STATS_COUNT = 128,
+
+ IG3RDMA_MAX_IRD_SIZE = 64,
+ IG3RDMA_MAX_ORD_SIZE = 64,
+ IG3RDMA_MIN_WQ_SIZE = 16 /* WQEs */,
+ IG3RDMA_MAX_INLINE_DATA_SIZE = 216,
+ IG3RDMA_MAX_PF_PUSH_PAGE_COUNT = 8192,
+ IG3RDMA_MAX_VF_PUSH_PAGE_COUNT = 16,
+};
+
+void __iomem *ig3rdma_get_reg_addr(struct irdma_hw *hw, u64 reg_offset);
+int ig3rdma_vchnl_send_sync(struct irdma_sc_dev *dev, u8 *msg, u16 len,
+ u8 *recv_msg, u16 *recv_len);
+
+#endif /* IG3RDMA_HW_H*/
diff --git a/drivers/infiniband/hw/irdma/ig3rdma_if.c b/drivers/infiniband/hw/irdma/ig3rdma_if.c
new file mode 100644
index 000000000000..e1d6670d9396
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ig3rdma_if.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2023 - 2024 Intel Corporation */
+
+#include "main.h"
+#include <linux/net/intel/iidc_rdma_idpf.h>
+#include "ig3rdma_hw.h"
+
+static void ig3rdma_idc_core_event_handler(struct iidc_rdma_core_dev_info *cdev_info,
+ struct iidc_rdma_event *event)
+{
+ struct irdma_pci_f *rf = auxiliary_get_drvdata(cdev_info->adev);
+
+ if (*event->type & BIT(IIDC_RDMA_EVENT_WARN_RESET)) {
+ rf->reset = true;
+ rf->sc_dev.vchnl_up = false;
+ }
+}
+
+int ig3rdma_vchnl_send_sync(struct irdma_sc_dev *dev, u8 *msg, u16 len,
+ u8 *recv_msg, u16 *recv_len)
+{
+ struct iidc_rdma_core_dev_info *cdev_info = dev_to_rf(dev)->cdev;
+ int ret;
+
+ ret = idpf_idc_rdma_vc_send_sync(cdev_info, msg, len, recv_msg,
+ recv_len);
+ if (ret == -ETIMEDOUT) {
+ ibdev_err(&(dev_to_rf(dev)->iwdev->ibdev),
+ "Virtual channel Req <-> Resp completion timeout\n");
+ dev->vchnl_up = false;
+ }
+
+ return ret;
+}
+
+static int ig3rdma_vchnl_init(struct irdma_pci_f *rf,
+ struct iidc_rdma_core_dev_info *cdev_info,
+ u8 *rdma_ver)
+{
+ struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv;
+ struct irdma_vchnl_init_info virt_info;
+ u8 gen = rf->rdma_ver;
+ int ret;
+
+ rf->vchnl_wq = alloc_ordered_workqueue("irdma-virtchnl-wq", 0);
+ if (!rf->vchnl_wq)
+ return -ENOMEM;
+
+ mutex_init(&rf->sc_dev.vchnl_mutex);
+
+ virt_info.is_pf = !idc_priv->ftype;
+ virt_info.hw_rev = gen;
+ virt_info.privileged = gen == IRDMA_GEN_2;
+ virt_info.vchnl_wq = rf->vchnl_wq;
+ ret = irdma_sc_vchnl_init(&rf->sc_dev, &virt_info);
+ if (ret) {
+ destroy_workqueue(rf->vchnl_wq);
+ mutex_destroy(&rf->sc_dev.vchnl_mutex);
+ return ret;
+ }
+
+ *rdma_ver = rf->sc_dev.hw_attrs.uk_attrs.hw_rev;
+
+ return 0;
+}
+
+/**
+ * ig3rdma_request_reset - Request a reset
+ * @rf: RDMA PCI function
+ */
+static void ig3rdma_request_reset(struct irdma_pci_f *rf)
+{
+ ibdev_warn(&rf->iwdev->ibdev, "Requesting a reset\n");
+ idpf_idc_request_reset(rf->cdev, IIDC_FUNC_RESET);
+}
+
+static int ig3rdma_cfg_regions(struct irdma_hw *hw,
+ struct iidc_rdma_core_dev_info *cdev_info)
+{
+ struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv;
+ struct pci_dev *pdev = cdev_info->pdev;
+ int i;
+
+ switch (idc_priv->ftype) {
+ case IIDC_FUNCTION_TYPE_PF:
+ hw->rdma_reg.len = IG3_PF_RDMA_REGION_LEN;
+ hw->rdma_reg.offset = IG3_PF_RDMA_REGION_OFFSET;
+ break;
+ case IIDC_FUNCTION_TYPE_VF:
+ hw->rdma_reg.len = IG3_VF_RDMA_REGION_LEN;
+ hw->rdma_reg.offset = IG3_VF_RDMA_REGION_OFFSET;
+ break;
+ default:
+ return -ENODEV;
+ }
+
+ hw->rdma_reg.addr = ioremap(pci_resource_start(pdev, 0) + hw->rdma_reg.offset,
+ hw->rdma_reg.len);
+
+ if (!hw->rdma_reg.addr)
+ return -ENOMEM;
+
+ hw->num_io_regions = le16_to_cpu(idc_priv->num_memory_regions);
+ hw->io_regs = kcalloc(hw->num_io_regions,
+ sizeof(struct irdma_mmio_region), GFP_KERNEL);
+
+ if (!hw->io_regs) {
+ iounmap(hw->rdma_reg.addr);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < hw->num_io_regions; i++) {
+ hw->io_regs[i].addr =
+ idc_priv->mapped_mem_regions[i].region_addr;
+ hw->io_regs[i].len =
+ le64_to_cpu(idc_priv->mapped_mem_regions[i].size);
+ hw->io_regs[i].offset =
+ le64_to_cpu(idc_priv->mapped_mem_regions[i].start_offset);
+ }
+
+ return 0;
+}
+
+static void ig3rdma_decfg_rf(struct irdma_pci_f *rf)
+{
+ struct irdma_hw *hw = &rf->hw;
+
+ mutex_destroy(&rf->ah_tbl_lock);
+ destroy_workqueue(rf->vchnl_wq);
+ mutex_destroy(&rf->sc_dev.vchnl_mutex);
+ kfree(hw->io_regs);
+ iounmap(hw->rdma_reg.addr);
+}
+
+static int ig3rdma_cfg_rf(struct irdma_pci_f *rf,
+ struct iidc_rdma_core_dev_info *cdev_info)
+{
+ struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->iidc_priv;
+ int err;
+
+ rf->sc_dev.hw = &rf->hw;
+ rf->cdev = cdev_info;
+ rf->pcidev = cdev_info->pdev;
+ rf->hw.device = &rf->pcidev->dev;
+ rf->msix_count = idc_priv->msix_count;
+ rf->msix_entries = idc_priv->msix_entries;
+
+ err = ig3rdma_vchnl_init(rf, cdev_info, &rf->rdma_ver);
+ if (err)
+ return err;
+
+ err = ig3rdma_cfg_regions(&rf->hw, cdev_info);
+ if (err) {
+ destroy_workqueue(rf->vchnl_wq);
+ mutex_destroy(&rf->sc_dev.vchnl_mutex);
+ return err;
+ }
+
+ rf->protocol_used = IRDMA_ROCE_PROTOCOL_ONLY;
+ rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT;
+ rf->rst_to = IRDMA_RST_TIMEOUT_HZ;
+ rf->gen_ops.request_reset = ig3rdma_request_reset;
+ rf->limits_sel = 7;
+ mutex_init(&rf->ah_tbl_lock);
+
+ return 0;
+}
+
+static int ig3rdma_core_probe(struct auxiliary_device *aux_dev,
+ const struct auxiliary_device_id *id)
+{
+ struct iidc_rdma_core_auxiliary_dev *idc_adev =
+ container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
+ struct iidc_rdma_core_dev_info *cdev_info = idc_adev->cdev_info;
+ struct irdma_pci_f *rf;
+ int err;
+
+ rf = kzalloc(sizeof(*rf), GFP_KERNEL);
+ if (!rf)
+ return -ENOMEM;
+
+ err = ig3rdma_cfg_rf(rf, cdev_info);
+ if (err)
+ goto err_cfg_rf;
+
+ err = irdma_ctrl_init_hw(rf);
+ if (err)
+ goto err_ctrl_init;
+
+ auxiliary_set_drvdata(aux_dev, rf);
+
+ err = idpf_idc_vport_dev_ctrl(cdev_info, true);
+ if (err)
+ goto err_vport_ctrl;
+
+ return 0;
+
+err_vport_ctrl:
+ irdma_ctrl_deinit_hw(rf);
+err_ctrl_init:
+ ig3rdma_decfg_rf(rf);
+err_cfg_rf:
+ kfree(rf);
+
+ return err;
+}
+
+static void ig3rdma_core_remove(struct auxiliary_device *aux_dev)
+{
+ struct iidc_rdma_core_auxiliary_dev *idc_adev =
+ container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
+ struct iidc_rdma_core_dev_info *cdev_info = idc_adev->cdev_info;
+ struct irdma_pci_f *rf = auxiliary_get_drvdata(aux_dev);
+
+ idpf_idc_vport_dev_ctrl(cdev_info, false);
+ irdma_ctrl_deinit_hw(rf);
+ ig3rdma_decfg_rf(rf);
+ kfree(rf);
+}
+
+static const struct auxiliary_device_id ig3rdma_core_auxiliary_id_table[] = {
+ {.name = "idpf.8086.rdma.core", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, ig3rdma_core_auxiliary_id_table);
+
+struct iidc_rdma_core_auxiliary_drv ig3rdma_core_auxiliary_drv = {
+ .adrv = {
+ .name = "core",
+ .id_table = ig3rdma_core_auxiliary_id_table,
+ .probe = ig3rdma_core_probe,
+ .remove = ig3rdma_core_remove,
+ },
+ .event_handler = ig3rdma_idc_core_event_handler,
+};
diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h
new file mode 100644
index 000000000000..ff938a01d70c
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/irdma.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2017 - 2021 Intel Corporation */
+#ifndef IRDMA_H
+#define IRDMA_H
+
+#define IRDMA_WQEALLOC_WQE_DESC_INDEX GENMASK(31, 20)
+
+#define IRDMA_CQPTAIL_WQTAIL GENMASK(10, 0)
+#define IRDMA_CQPTAIL_CQP_OP_ERR BIT(31)
+
+#define IRDMA_CQPERRCODES_CQP_MINOR_CODE GENMASK(15, 0)
+#define IRDMA_CQPERRCODES_CQP_MAJOR_CODE GENMASK(31, 16)
+#define IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE GENMASK(5, 4)
+#define IRDMA_GLINT_RATE_INTERVAL GENMASK(5, 0)
+#define IRDMA_GLINT_RATE_INTRL_ENA BIT(6)
+#define IRDMA_GLINT_DYN_CTL_INTENA BIT(0)
+#define IRDMA_GLINT_DYN_CTL_CLEARPBA BIT(1)
+#define IRDMA_GLINT_DYN_CTL_ITR_INDX GENMASK(4, 3)
+#define IRDMA_GLINT_DYN_CTL_INTERVAL GENMASK(16, 5)
+#define IRDMA_GLINT_CEQCTL_ITR_INDX GENMASK(12, 11)
+#define IRDMA_GLINT_CEQCTL_CAUSE_ENA BIT(30)
+#define IRDMA_GLINT_CEQCTL_MSIX_INDX GENMASK(10, 0)
+#define IRDMA_PFINT_AEQCTL_MSIX_INDX GENMASK(10, 0)
+#define IRDMA_PFINT_AEQCTL_ITR_INDX GENMASK(12, 11)
+#define IRDMA_PFINT_AEQCTL_CAUSE_ENA BIT(30)
+#define IRDMA_PFHMC_PDINV_PMSDIDX GENMASK(11, 0)
+#define IRDMA_PFHMC_PDINV_PMSDPARTSEL BIT(15)
+#define IRDMA_PFHMC_PDINV_PMPDIDX GENMASK(24, 16)
+#define IRDMA_PFHMC_SDDATALOW_PMSDVALID BIT(0)
+#define IRDMA_PFHMC_SDDATALOW_PMSDTYPE BIT(1)
+#define IRDMA_PFHMC_SDDATALOW_PMSDBPCOUNT GENMASK(11, 2)
+#define IRDMA_PFHMC_SDDATALOW_PMSDDATALOW GENMASK(31, 12)
+#define IRDMA_PFHMC_SDCMD_PMSDWR BIT(31)
+
+#define IRDMA_INVALID_CQ_IDX 0xffffffff
+#define IRDMA_Q_INVALID_IDX 0xffff
+
+enum irdma_dyn_idx_t {
+ IRDMA_IDX_ITR0 = 0,
+ IRDMA_IDX_ITR1 = 1,
+ IRDMA_IDX_ITR2 = 2,
+ IRDMA_IDX_NOITR = 3,
+};
+
+enum irdma_registers {
+ IRDMA_CQPTAIL,
+ IRDMA_CQPDB,
+ IRDMA_CCQPSTATUS,
+ IRDMA_CCQPHIGH,
+ IRDMA_CCQPLOW,
+ IRDMA_CQARM,
+ IRDMA_CQACK,
+ IRDMA_AEQALLOC,
+ IRDMA_CQPERRCODES,
+ IRDMA_WQEALLOC,
+ IRDMA_GLINT_DYN_CTL,
+ IRDMA_DB_ADDR_OFFSET,
+ IRDMA_GLPCI_LBARCTRL,
+ IRDMA_GLPE_CPUSTATUS0,
+ IRDMA_GLPE_CPUSTATUS1,
+ IRDMA_GLPE_CPUSTATUS2,
+ IRDMA_PFINT_AEQCTL,
+ IRDMA_GLINT_CEQCTL,
+ IRDMA_VSIQF_PE_CTL1,
+ IRDMA_PFHMC_PDINV,
+ IRDMA_GLHMC_VFPDINV,
+ IRDMA_GLPE_CRITERR,
+ IRDMA_GLINT_RATE,
+ IRDMA_MAX_REGS, /* Must be last entry */
+};
+
+enum irdma_shifts {
+ IRDMA_CCQPSTATUS_CCQP_DONE_S,
+ IRDMA_CCQPSTATUS_CCQP_ERR_S,
+ IRDMA_CQPSQ_STAG_PDID_S,
+ IRDMA_CQPSQ_CQ_CEQID_S,
+ IRDMA_CQPSQ_CQ_CQID_S,
+ IRDMA_COMMIT_FPM_CQCNT_S,
+ IRDMA_CQPSQ_UPESD_HMCFNID_S,
+ IRDMA_MAX_SHIFTS,
+};
+
+enum irdma_masks {
+ IRDMA_CCQPSTATUS_CCQP_DONE_M,
+ IRDMA_CCQPSTATUS_CCQP_ERR_M,
+ IRDMA_CQPSQ_STAG_PDID_M,
+ IRDMA_CQPSQ_CQ_CEQID_M,
+ IRDMA_CQPSQ_CQ_CQID_M,
+ IRDMA_COMMIT_FPM_CQCNT_M,
+ IRDMA_CQPSQ_UPESD_HMCFNID_M,
+ IRDMA_MAX_MASKS, /* Must be last entry */
+};
+
+#define IRDMA_MAX_MGS_PER_CTX 8
+
+struct irdma_mcast_grp_ctx_entry_info {
+ u32 qp_id;
+ bool valid_entry;
+ u16 dest_port;
+ u32 use_cnt;
+};
+
+struct irdma_mcast_grp_info {
+ u8 dest_mac_addr[ETH_ALEN];
+ u16 vlan_id;
+ u16 hmc_fcn_id;
+ bool ipv4_valid:1;
+ bool vlan_valid:1;
+ u16 mg_id;
+ u32 no_of_mgs;
+ u32 dest_ip_addr[4];
+ u16 qs_handle;
+ struct irdma_dma_mem dma_mem_mc;
+ struct irdma_mcast_grp_ctx_entry_info mg_ctx_info[IRDMA_MAX_MGS_PER_CTX];
+};
+
+enum irdma_vers {
+ IRDMA_GEN_RSVD,
+ IRDMA_GEN_1,
+ IRDMA_GEN_2,
+ IRDMA_GEN_3,
+ IRDMA_GEN_NEXT,
+ IRDMA_GEN_MAX = IRDMA_GEN_NEXT-1
+};
+
+struct irdma_uk_attrs {
+ u64 feature_flags;
+ u32 max_hw_wq_frags;
+ u32 max_hw_read_sges;
+ u32 max_hw_inline;
+ u32 max_hw_rq_quanta;
+ u32 max_hw_wq_quanta;
+ u32 min_hw_cq_size;
+ u32 max_hw_cq_size;
+ u32 max_hw_srq_quanta;
+ u16 max_hw_sq_chunk;
+ u16 min_hw_wq_size;
+ u8 hw_rev;
+};
+
+struct irdma_hw_attrs {
+ struct irdma_uk_attrs uk_attrs;
+ u64 max_hw_outbound_msg_size;
+ u64 max_hw_inbound_msg_size;
+ u64 max_mr_size;
+ u64 page_size_cap;
+ u32 min_hw_qp_id;
+ u32 min_hw_aeq_size;
+ u32 max_hw_aeq_size;
+ u32 min_hw_ceq_size;
+ u32 max_hw_ceq_size;
+ u32 max_hw_device_pages;
+ u32 max_hw_vf_fpm_id;
+ u32 first_hw_vf_fpm_id;
+ u32 max_hw_ird;
+ u32 max_hw_ord;
+ u32 max_hw_wqes;
+ u32 max_hw_pds;
+ u32 max_hw_ena_vf_count;
+ u32 max_qp_wr;
+ u32 max_pe_ready_count;
+ u32 max_done_count;
+ u32 max_sleep_count;
+ u32 max_cqp_compl_wait_time_ms;
+ u32 min_hw_srq_id;
+ u16 max_stat_inst;
+ u16 max_stat_idx;
+};
+
+void i40iw_init_hw(struct irdma_sc_dev *dev);
+void icrdma_init_hw(struct irdma_sc_dev *dev);
+void ig3rdma_init_hw(struct irdma_sc_dev *dev);
+void __iomem *ig3rdma_get_reg_addr(struct irdma_hw *hw, u64 reg_offset);
+#endif /* IRDMA_H*/
diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c
new file mode 100644
index 000000000000..95957d52883d
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/main.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "main.h"
+#include <linux/net/intel/iidc_rdma_idpf.h>
+
+MODULE_ALIAS("i40iw");
+MODULE_DESCRIPTION("Intel(R) Ethernet Protocol Driver for RDMA");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static struct notifier_block irdma_inetaddr_notifier = {
+ .notifier_call = irdma_inetaddr_event
+};
+
+static struct notifier_block irdma_inetaddr6_notifier = {
+ .notifier_call = irdma_inet6addr_event
+};
+
+static struct notifier_block irdma_net_notifier = {
+ .notifier_call = irdma_net_event
+};
+
+static struct notifier_block irdma_netdevice_notifier = {
+ .notifier_call = irdma_netdevice_event
+};
+
+static void irdma_register_notifiers(void)
+{
+ register_inetaddr_notifier(&irdma_inetaddr_notifier);
+ register_inet6addr_notifier(&irdma_inetaddr6_notifier);
+ register_netevent_notifier(&irdma_net_notifier);
+ register_netdevice_notifier(&irdma_netdevice_notifier);
+}
+
+static void irdma_unregister_notifiers(void)
+{
+ unregister_netevent_notifier(&irdma_net_notifier);
+ unregister_inetaddr_notifier(&irdma_inetaddr_notifier);
+ unregister_inet6addr_notifier(&irdma_inetaddr6_notifier);
+ unregister_netdevice_notifier(&irdma_netdevice_notifier);
+}
+
+void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev)
+{
+ if (mtu < IRDMA_MIN_MTU_IPV4)
+ ibdev_warn(to_ibdev(dev), "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 576 for IPv4\n", mtu);
+ else if (mtu < IRDMA_MIN_MTU_IPV6)
+ ibdev_warn(to_ibdev(dev), "MTU setting [%d] too low for RDMA traffic. Minimum MTU is 1280 for IPv6\\n", mtu);
+}
+
+static void ig3rdma_idc_vport_event_handler(struct iidc_rdma_vport_dev_info *cdev_info,
+ struct iidc_rdma_event *event)
+{
+ struct irdma_device *iwdev = auxiliary_get_drvdata(cdev_info->adev);
+ struct irdma_l2params l2params = {};
+
+ if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_MTU_CHANGE)) {
+ ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", iwdev->netdev->mtu);
+ if (iwdev->vsi.mtu != iwdev->netdev->mtu) {
+ l2params.mtu = iwdev->netdev->mtu;
+ l2params.mtu_changed = true;
+ irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev);
+ irdma_change_l2params(&iwdev->vsi, &l2params);
+ }
+ }
+}
+
+static int ig3rdma_vport_probe(struct auxiliary_device *aux_dev,
+ const struct auxiliary_device_id *id)
+{
+ struct iidc_rdma_vport_auxiliary_dev *idc_adev =
+ container_of(aux_dev, struct iidc_rdma_vport_auxiliary_dev, adev);
+ struct auxiliary_device *aux_core_dev = idc_adev->vdev_info->core_adev;
+ struct irdma_pci_f *rf = auxiliary_get_drvdata(aux_core_dev);
+ struct irdma_l2params l2params = {};
+ struct irdma_device *iwdev;
+ int err;
+
+ if (!rf) {
+ WARN_ON_ONCE(1);
+ return -ENOMEM;
+ }
+ iwdev = ib_alloc_device(irdma_device, ibdev);
+ /* Fill iwdev info */
+ iwdev->is_vport = true;
+ iwdev->rf = rf;
+ iwdev->vport_id = idc_adev->vdev_info->vport_id;
+ iwdev->netdev = idc_adev->vdev_info->netdev;
+ iwdev->init_state = INITIAL_STATE;
+ iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT;
+ iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT;
+ iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED;
+ iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
+ iwdev->roce_mode = true;
+ iwdev->push_mode = false;
+
+ l2params.mtu = iwdev->netdev->mtu;
+
+ err = irdma_rt_init_hw(iwdev, &l2params);
+ if (err)
+ goto err_rt_init;
+
+ err = irdma_ib_register_device(iwdev);
+ if (err)
+ goto err_ibreg;
+
+ auxiliary_set_drvdata(aux_dev, iwdev);
+
+ ibdev_dbg(&iwdev->ibdev,
+ "INIT: Gen[%d] vport[%d] probe success. dev_name = %s, core_dev_name = %s, netdev=%s\n",
+ rf->rdma_ver, idc_adev->vdev_info->vport_id,
+ dev_name(&aux_dev->dev),
+ dev_name(&idc_adev->vdev_info->core_adev->dev),
+ netdev_name(idc_adev->vdev_info->netdev));
+
+ return 0;
+err_ibreg:
+ irdma_rt_deinit_hw(iwdev);
+err_rt_init:
+ ib_dealloc_device(&iwdev->ibdev);
+
+ return err;
+}
+
+static void ig3rdma_vport_remove(struct auxiliary_device *aux_dev)
+{
+ struct iidc_rdma_vport_auxiliary_dev *idc_adev =
+ container_of(aux_dev, struct iidc_rdma_vport_auxiliary_dev, adev);
+ struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev);
+
+ ibdev_dbg(&iwdev->ibdev,
+ "INIT: Gen[%d] dev_name = %s, core_dev_name = %s, netdev=%s\n",
+ iwdev->rf->rdma_ver, dev_name(&aux_dev->dev),
+ dev_name(&idc_adev->vdev_info->core_adev->dev),
+ netdev_name(idc_adev->vdev_info->netdev));
+
+ irdma_ib_unregister_device(iwdev);
+}
+
+static const struct auxiliary_device_id ig3rdma_vport_auxiliary_id_table[] = {
+ {.name = "idpf.8086.rdma.vdev", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, ig3rdma_vport_auxiliary_id_table);
+
+static struct iidc_rdma_vport_auxiliary_drv ig3rdma_vport_auxiliary_drv = {
+ .adrv = {
+ .name = "vdev",
+ .id_table = ig3rdma_vport_auxiliary_id_table,
+ .probe = ig3rdma_vport_probe,
+ .remove = ig3rdma_vport_remove,
+ },
+ .event_handler = ig3rdma_idc_vport_event_handler,
+};
+
+
+static int __init irdma_init_module(void)
+{
+ int ret;
+
+ ret = auxiliary_driver_register(&i40iw_auxiliary_drv);
+ if (ret) {
+ pr_err("Failed i40iw(gen_1) auxiliary_driver_register() ret=%d\n",
+ ret);
+ return ret;
+ }
+
+ ret = auxiliary_driver_register(&icrdma_core_auxiliary_drv.adrv);
+ if (ret) {
+ auxiliary_driver_unregister(&i40iw_auxiliary_drv);
+ pr_err("Failed icrdma(gen_2) auxiliary_driver_register() ret=%d\n",
+ ret);
+ return ret;
+ }
+
+ ret = auxiliary_driver_register(&ig3rdma_core_auxiliary_drv.adrv);
+ if (ret) {
+ auxiliary_driver_unregister(&icrdma_core_auxiliary_drv.adrv);
+ auxiliary_driver_unregister(&i40iw_auxiliary_drv);
+ pr_err("Failed ig3rdma(gen_3) core auxiliary_driver_register() ret=%d\n",
+ ret);
+
+ return ret;
+ }
+
+ ret = auxiliary_driver_register(&ig3rdma_vport_auxiliary_drv.adrv);
+ if (ret) {
+ auxiliary_driver_unregister(&ig3rdma_core_auxiliary_drv.adrv);
+ auxiliary_driver_unregister(&icrdma_core_auxiliary_drv.adrv);
+ auxiliary_driver_unregister(&i40iw_auxiliary_drv);
+ pr_err("Failed ig3rdma vport auxiliary_driver_register() ret=%d\n",
+ ret);
+
+ return ret;
+ }
+ irdma_register_notifiers();
+
+ return 0;
+}
+
+static void __exit irdma_exit_module(void)
+{
+ irdma_unregister_notifiers();
+ auxiliary_driver_unregister(&icrdma_core_auxiliary_drv.adrv);
+ auxiliary_driver_unregister(&i40iw_auxiliary_drv);
+ auxiliary_driver_unregister(&ig3rdma_core_auxiliary_drv.adrv);
+ auxiliary_driver_unregister(&ig3rdma_vport_auxiliary_drv.adrv);
+}
+
+module_init(irdma_init_module);
+module_exit(irdma_exit_module);
diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
new file mode 100644
index 000000000000..baab61e424a2
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -0,0 +1,578 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#ifndef IRDMA_MAIN_H
+#define IRDMA_MAIN_H
+
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/if_vlan.h>
+#include <net/addrconf.h>
+#include <net/netevent.h>
+#include <net/tcp.h>
+#include <net/ip6_route.h>
+#include <net/flow.h>
+#include <net/secure_seq.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/crc32c.h>
+#include <linux/kthread.h>
+#ifndef CONFIG_64BIT
+#include <linux/io-64-nonatomic-lo-hi.h>
+#endif
+#include <linux/auxiliary_bus.h>
+#include <linux/net/intel/iidc_rdma.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/iw_cm.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_cache.h>
+#include <rdma/uverbs_ioctl.h>
+#include "osdep.h"
+#include "defs.h"
+#include "hmc.h"
+#include "type.h"
+#include "ws.h"
+#include "protos.h"
+#include "pble.h"
+#include "cm.h"
+#include <rdma/irdma-abi.h>
+#include "verbs.h"
+#include "user.h"
+#include "puda.h"
+
+extern struct auxiliary_driver i40iw_auxiliary_drv;
+extern struct iidc_rdma_core_auxiliary_drv icrdma_core_auxiliary_drv;
+extern struct iidc_rdma_core_auxiliary_drv ig3rdma_core_auxiliary_drv;
+
+#define IRDMA_FW_VER_DEFAULT 2
+#define IRDMA_HW_VER 2
+
+#define IRDMA_ARP_ADD 1
+#define IRDMA_ARP_DELETE 2
+#define IRDMA_ARP_RESOLVE 3
+
+#define IRDMA_MACIP_ADD 1
+#define IRDMA_MACIP_DELETE 2
+
+#define IW_GEN_3_CCQ_SIZE (2 * IRDMA_CQP_SW_SQSIZE_2048 + 2)
+#define IW_CCQ_SIZE (IRDMA_CQP_SW_SQSIZE_2048 + 2)
+#define IW_CEQ_SIZE 2048
+#define IW_AEQ_SIZE 2048
+
+#define RX_BUF_SIZE (1536 + 8)
+#define IW_REG0_SIZE (4 * 1024)
+#define IW_TX_TIMEOUT (6 * HZ)
+#define IW_FIRST_QPN 1
+
+#define IW_SW_CONTEXT_ALIGN 1024
+
+#define MAX_DPC_ITERATIONS 128
+
+#define IRDMA_EVENT_TIMEOUT_MS 5000
+#define IRDMA_VCHNL_EVENT_TIMEOUT 100000
+#define IRDMA_RST_TIMEOUT_HZ 4
+
+#define IRDMA_NO_QSET 0xffff
+
+#define IW_CFG_FPM_QP_COUNT 32768
+#define IRDMA_MAX_PAGES_PER_FMR 262144
+#define IRDMA_MIN_PAGES_PER_FMR 1
+#define IRDMA_CQP_COMPL_RQ_WQE_FLUSHED 2
+#define IRDMA_CQP_COMPL_SQ_WQE_FLUSHED 3
+
+#define IRDMA_Q_TYPE_PE_AEQ 0x80
+#define IRDMA_Q_INVALID_IDX 0xffff
+#define IRDMA_REM_ENDPOINT_TRK_QPID 3
+
+#define IRDMA_DRV_OPT_ENA_MPA_VER_0 0x00000001
+#define IRDMA_DRV_OPT_DISABLE_MPA_CRC 0x00000002
+#define IRDMA_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004
+#define IRDMA_DRV_OPT_DISABLE_INTF 0x00000008
+#define IRDMA_DRV_OPT_ENA_MSI 0x00000010
+#define IRDMA_DRV_OPT_DUAL_LOGICAL_PORT 0x00000020
+#define IRDMA_DRV_OPT_NO_INLINE_DATA 0x00000080
+#define IRDMA_DRV_OPT_DISABLE_INT_MOD 0x00000100
+#define IRDMA_DRV_OPT_DISABLE_VIRT_WQ 0x00000200
+#define IRDMA_DRV_OPT_ENA_PAU 0x00000400
+#define IRDMA_DRV_OPT_MCAST_LOGPORT_MAP 0x00000800
+
+#define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
+#define IRDMA_ROCE_CWND_DEFAULT 0x400
+#define IRDMA_ROCE_ACKCREDS_DEFAULT 0x1E
+
+#define IRDMA_FLUSH_SQ BIT(0)
+#define IRDMA_FLUSH_RQ BIT(1)
+#define IRDMA_REFLUSH BIT(2)
+#define IRDMA_FLUSH_WAIT BIT(3)
+
+#define IRDMA_IRQ_NAME_STR_LEN (64)
+
+#define IRDMA_NUM_AEQ_MSIX 1
+#define IRDMA_MIN_MSIX 2
+
+enum init_completion_state {
+ INVALID_STATE = 0,
+ INITIAL_STATE,
+ CQP_CREATED,
+ HMC_OBJS_CREATED,
+ HW_RSRC_INITIALIZED,
+ CCQ_CREATED,
+ CEQ0_CREATED,
+ CEQS_CREATED,
+ PBLE_CHUNK_MEM,
+ AEQ_CREATED,
+ ILQ_CREATED,
+ IEQ_CREATED, /* Last state of probe */
+ IP_ADDR_REGISTERED, /* Last state of open */
+};
+
+struct irdma_rsrc_limits {
+ u32 qplimit;
+ u32 mrlimit;
+ u32 cqlimit;
+};
+
+struct irdma_cqp_err_info {
+ u16 maj;
+ u16 min;
+ const char *desc;
+};
+
+struct irdma_cqp_compl_info {
+ u32 op_ret_val;
+ u16 maj_err_code;
+ u16 min_err_code;
+ bool error;
+ u8 op_code;
+};
+
+struct irdma_cqp_request {
+ struct cqp_cmds_info info;
+ wait_queue_head_t waitq;
+ struct list_head list;
+ refcount_t refcnt;
+ void (*callback_fcn)(struct irdma_cqp_request *cqp_request);
+ void *param;
+ struct irdma_cqp_compl_info compl_info;
+ bool request_done; /* READ/WRITE_ONCE macros operate on it */
+ bool waiting:1;
+ bool dynamic:1;
+ bool pending:1;
+};
+
+struct irdma_cqp {
+ struct irdma_sc_cqp sc_cqp;
+ spinlock_t req_lock; /* protect CQP request list */
+ spinlock_t compl_lock; /* protect CQP completion processing */
+ wait_queue_head_t waitq;
+ wait_queue_head_t remove_wq;
+ struct irdma_dma_mem sq;
+ struct irdma_dma_mem host_ctx;
+ u64 *scratch_array;
+ struct irdma_cqp_request *cqp_requests;
+ struct irdma_ooo_cqp_op *oop_op_array;
+ struct list_head cqp_avail_reqs;
+ struct list_head cqp_pending_reqs;
+};
+
+struct irdma_ccq {
+ struct irdma_sc_cq sc_cq;
+ struct irdma_dma_mem mem_cq;
+ struct irdma_dma_mem shadow_area;
+};
+
+struct irdma_ceq {
+ struct irdma_sc_ceq sc_ceq;
+ struct irdma_dma_mem mem;
+ u32 irq;
+ u32 msix_idx;
+ struct irdma_pci_f *rf;
+ struct tasklet_struct dpc_tasklet;
+ spinlock_t ce_lock; /* sync cq destroy with cq completion event notification */
+};
+
+struct irdma_aeq {
+ struct irdma_sc_aeq sc_aeq;
+ struct irdma_dma_mem mem;
+ struct irdma_pble_alloc palloc;
+ bool virtual_map;
+};
+
+struct irdma_arp_entry {
+ u32 ip_addr[4];
+ u8 mac_addr[ETH_ALEN];
+};
+
+struct irdma_msix_vector {
+ u32 idx;
+ u32 irq;
+ u32 cpu_affinity;
+ u32 ceq_id;
+ cpumask_t mask;
+ char name[IRDMA_IRQ_NAME_STR_LEN];
+};
+
+struct irdma_mc_table_info {
+ u32 mgn;
+ u32 dest_ip[4];
+ bool lan_fwd:1;
+ bool ipv4_valid:1;
+};
+
+struct mc_table_list {
+ struct list_head list;
+ struct irdma_mc_table_info mc_info;
+ struct irdma_mcast_grp_info mc_grp_ctx;
+};
+
+struct irdma_qv_info {
+ u32 v_idx; /* msix_vector */
+ u16 ceq_idx;
+ u16 aeq_idx;
+ u8 itr_idx;
+};
+
+struct irdma_qvlist_info {
+ u32 num_vectors;
+ struct irdma_qv_info qv_info[] __counted_by(num_vectors);
+};
+
+struct irdma_gen_ops {
+ void (*request_reset)(struct irdma_pci_f *rf);
+ int (*register_qset)(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
+ void (*unregister_qset)(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
+};
+
+struct irdma_pci_f {
+ bool reset:1;
+ bool rsrc_created:1;
+ bool msix_shared:1;
+ bool hwqp1_rsvd:1;
+ u8 rsrc_profile;
+ u8 *hmc_info_mem;
+ u8 *mem_rsrc;
+ u8 rdma_ver;
+ u8 rst_to;
+ u8 pf_id;
+ enum irdma_protocol_used protocol_used;
+ u32 sd_type;
+ u32 msix_count;
+ u32 max_mr;
+ u32 max_qp;
+ u32 max_cq;
+ u32 max_srq;
+ u32 next_srq;
+ u32 max_ah;
+ u32 next_ah;
+ u32 max_mcg;
+ u32 next_mcg;
+ u32 max_pd;
+ u32 next_qp;
+ u32 next_cq;
+ u32 next_pd;
+ u32 max_mr_size;
+ u32 max_cqe;
+ u32 mr_stagmask;
+ u32 used_pds;
+ u32 used_cqs;
+ u32 used_srqs;
+ u32 used_mrs;
+ u32 used_qps;
+ u32 arp_table_size;
+ u32 next_arp_index;
+ u32 ceqs_count;
+ u32 next_ws_node_id;
+ u32 max_ws_node_id;
+ u32 limits_sel;
+ unsigned long *allocated_ws_nodes;
+ unsigned long *allocated_qps;
+ unsigned long *allocated_cqs;
+ unsigned long *allocated_srqs;
+ unsigned long *allocated_mrs;
+ unsigned long *allocated_pds;
+ unsigned long *allocated_mcgs;
+ unsigned long *allocated_ahs;
+ unsigned long *allocated_arps;
+ enum init_completion_state init_state;
+ struct irdma_sc_dev sc_dev;
+ struct pci_dev *pcidev;
+ void *cdev;
+ struct irdma_hw hw;
+ struct irdma_cqp cqp;
+ struct irdma_ccq ccq;
+ struct irdma_aeq aeq;
+ struct irdma_ceq *ceqlist;
+ struct irdma_hmc_pble_rsrc *pble_rsrc;
+ struct irdma_arp_entry *arp_table;
+ spinlock_t arp_lock; /*protect ARP table access*/
+ spinlock_t rsrc_lock; /* protect HW resource array access */
+ spinlock_t qptable_lock; /*protect QP table access*/
+ spinlock_t cqtable_lock; /*protect CQ table access*/
+ struct irdma_qp **qp_table;
+ struct irdma_cq **cq_table;
+ spinlock_t qh_list_lock; /* protect mc_qht_list */
+ struct mc_table_list mc_qht_list;
+ struct irdma_msix_vector *iw_msixtbl;
+ struct irdma_qvlist_info *iw_qvlist;
+ struct tasklet_struct dpc_tasklet;
+ struct msix_entry *msix_entries;
+ struct irdma_dma_mem obj_mem;
+ struct irdma_dma_mem obj_next;
+ atomic_t vchnl_msgs;
+ wait_queue_head_t vchnl_waitq;
+ struct workqueue_struct *cqp_cmpl_wq;
+ struct work_struct cqp_cmpl_work;
+ struct workqueue_struct *vchnl_wq;
+ struct irdma_sc_vsi default_vsi;
+ void *back_fcn;
+ struct irdma_gen_ops gen_ops;
+ struct irdma_device *iwdev;
+ DECLARE_HASHTABLE(ah_hash_tbl, 8);
+ struct mutex ah_tbl_lock; /* protect AH hash table access */
+};
+
+struct irdma_device {
+ struct ib_device ibdev;
+ struct irdma_pci_f *rf;
+ struct net_device *netdev;
+ struct workqueue_struct *cleanup_wq;
+ struct irdma_sc_vsi vsi;
+ struct irdma_cm_core cm_core;
+ u32 roce_cwnd;
+ u32 roce_ackcreds;
+ u32 vendor_id;
+ u32 vendor_part_id;
+ u32 push_mode;
+ u32 rcv_wnd;
+ u16 mac_ip_table_idx;
+ u16 vsi_num;
+ u16 vport_id;
+ u8 rcv_wscale;
+ u8 iw_status;
+ bool roce_mode:1;
+ bool roce_dcqcn_en:1;
+ bool dcb_vlan_mode:1;
+ bool iw_ooo:1;
+ bool is_vport:1;
+ enum init_completion_state init_state;
+
+ wait_queue_head_t suspend_wq;
+};
+
+static inline struct irdma_device *to_iwdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct irdma_device, ibdev);
+}
+
+static inline struct irdma_ucontext *to_ucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct irdma_ucontext, ibucontext);
+}
+
+static inline struct irdma_user_mmap_entry *
+to_irdma_mmap_entry(struct rdma_user_mmap_entry *rdma_entry)
+{
+ return container_of(rdma_entry, struct irdma_user_mmap_entry,
+ rdma_entry);
+}
+
+static inline struct irdma_pd *to_iwpd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct irdma_pd, ibpd);
+}
+
+static inline struct irdma_ah *to_iwah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct irdma_ah, ibah);
+}
+
+static inline struct irdma_mr *to_iwmr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct irdma_mr, ibmr);
+}
+
+static inline struct irdma_mr *to_iwmw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct irdma_mr, ibmw);
+}
+
+static inline struct irdma_cq *to_iwcq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct irdma_cq, ibcq);
+}
+
+static inline struct irdma_qp *to_iwqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct irdma_qp, ibqp);
+}
+
+static inline struct irdma_pci_f *dev_to_rf(struct irdma_sc_dev *dev)
+{
+ return container_of(dev, struct irdma_pci_f, sc_dev);
+}
+
+static inline struct irdma_srq *to_iwsrq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct irdma_srq, ibsrq);
+}
+
+/**
+ * irdma_alloc_resource - allocate a resource
+ * @iwdev: device pointer
+ * @resource_array: resource bit array:
+ * @max_resources: maximum resource number
+ * @req_resources_num: Allocated resource number
+ * @next: next free id
+ **/
+static inline int irdma_alloc_rsrc(struct irdma_pci_f *rf,
+ unsigned long *rsrc_array, u32 max_rsrc,
+ u32 *req_rsrc_num, u32 *next)
+{
+ u32 rsrc_num;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rf->rsrc_lock, flags);
+ rsrc_num = find_next_zero_bit(rsrc_array, max_rsrc, *next);
+ if (rsrc_num >= max_rsrc) {
+ rsrc_num = find_first_zero_bit(rsrc_array, max_rsrc);
+ if (rsrc_num >= max_rsrc) {
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+ ibdev_dbg(&rf->iwdev->ibdev,
+ "ERR: resource [%d] allocation failed\n",
+ rsrc_num);
+ return -EOVERFLOW;
+ }
+ }
+ __set_bit(rsrc_num, rsrc_array);
+ *next = rsrc_num + 1;
+ if (*next == max_rsrc)
+ *next = 0;
+ *req_rsrc_num = rsrc_num;
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+
+ return 0;
+}
+
+/**
+ * irdma_free_resource - free a resource
+ * @iwdev: device pointer
+ * @resource_array: resource array for the resource_num
+ * @resource_num: resource number to free
+ **/
+static inline void irdma_free_rsrc(struct irdma_pci_f *rf,
+ unsigned long *rsrc_array, u32 rsrc_num)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rf->rsrc_lock, flags);
+ __clear_bit(rsrc_num, rsrc_array);
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+}
+
+int irdma_ctrl_init_hw(struct irdma_pci_f *rf);
+void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf);
+int irdma_rt_init_hw(struct irdma_device *iwdev,
+ struct irdma_l2params *l2params);
+void irdma_rt_deinit_hw(struct irdma_device *iwdev);
+void irdma_qp_add_ref(struct ib_qp *ibqp);
+void irdma_qp_rem_ref(struct ib_qp *ibqp);
+void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp);
+struct ib_qp *irdma_get_qp(struct ib_device *ibdev, int qpn);
+void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask);
+void irdma_manage_arp_cache(struct irdma_pci_f *rf,
+ const unsigned char *mac_addr,
+ u32 *ip_addr, bool ipv4, u32 action);
+struct irdma_apbvt_entry *irdma_add_apbvt(struct irdma_device *iwdev, u16 port);
+void irdma_del_apbvt(struct irdma_device *iwdev,
+ struct irdma_apbvt_entry *entry);
+struct irdma_cqp_request *irdma_alloc_and_get_cqp_request(struct irdma_cqp *cqp,
+ bool wait);
+void irdma_free_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request);
+void irdma_put_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request);
+int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx);
+int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 idx);
+void irdma_del_local_mac_entry(struct irdma_pci_f *rf, u16 idx);
+
+u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf);
+void irdma_port_ibevent(struct irdma_device *iwdev);
+void irdma_cm_disconn(struct irdma_qp *qp);
+
+bool irdma_cqp_crit_err(struct irdma_sc_dev *dev, u8 cqp_cmd,
+ u16 maj_err_code, u16 min_err_code);
+int irdma_handle_cqp_op(struct irdma_pci_f *rf,
+ struct irdma_cqp_request *cqp_request);
+
+int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata);
+int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+void irdma_cq_add_ref(struct ib_cq *ibcq);
+void irdma_cq_rem_ref(struct ib_cq *ibcq);
+void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq);
+void irdma_srq_event(struct irdma_sc_srq *srq);
+void irdma_srq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_srq *srq);
+void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf);
+int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp,
+ struct irdma_modify_qp_info *info, bool wait);
+int irdma_qp_suspend_resume(struct irdma_sc_qp *qp, bool suspend);
+int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo,
+ enum irdma_quad_entry_type etype,
+ enum irdma_quad_hash_manage_type mtype, void *cmnode,
+ bool wait);
+void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf);
+void irdma_free_sqbuf(struct irdma_sc_vsi *vsi, void *bufp);
+void irdma_free_qp_rsrc(struct irdma_qp *iwqp);
+int irdma_setup_cm_core(struct irdma_device *iwdev, u8 ver);
+void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core);
+void irdma_next_iw_state(struct irdma_qp *iwqp, u8 state, u8 del_hash, u8 term,
+ u8 term_len);
+int irdma_send_syn(struct irdma_cm_node *cm_node, u32 sendack);
+int irdma_send_reset(struct irdma_cm_node *cm_node);
+struct irdma_cm_node *irdma_find_node(struct irdma_cm_core *cm_core,
+ u16 rem_port, u32 *rem_addr, u16 loc_port,
+ u32 *loc_addr, u16 vlan_id);
+int irdma_hw_flush_wqes(struct irdma_pci_f *rf, struct irdma_sc_qp *qp,
+ struct irdma_qp_flush_info *info, bool wait);
+void irdma_gen_ae(struct irdma_pci_f *rf, struct irdma_sc_qp *qp,
+ struct irdma_gen_ae_info *info, bool wait);
+void irdma_copy_ip_ntohl(u32 *dst, __be32 *src);
+void irdma_copy_ip_htonl(__be32 *dst, u32 *src);
+u16 irdma_get_vlan_ipv4(u32 *addr);
+void irdma_get_vlan_mac_ipv6(u32 *addr, u16 *vlan_id, u8 *mac);
+struct ib_mr *irdma_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size,
+ int acc, u64 *iova_start, bool dma_mr);
+int irdma_upload_qp_context(struct irdma_qp *iwqp, bool freeze, bool raw);
+void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq);
+int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd,
+ bool wait,
+ void (*callback_fcn)(struct irdma_cqp_request *cqp_request),
+ void *cb_param);
+void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request);
+int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr);
+int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr);
+int irdma_net_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr);
+int irdma_netdevice_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr);
+void irdma_add_ip(struct irdma_device *iwdev);
+void cqp_compl_worker(struct work_struct *work);
+void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev);
+#endif /* IRDMA_MAIN_H */
diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h
new file mode 100644
index 000000000000..3f73ceacccb6
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/osdep.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#ifndef IRDMA_OSDEP_H
+#define IRDMA_OSDEP_H
+
+#include <linux/pci.h>
+#include <linux/bitfield.h>
+#include <rdma/ib_verbs.h>
+#include <net/dscp.h>
+
+#define STATS_TIMER_DELAY 60000
+
+struct irdma_dma_info {
+ dma_addr_t *dmaaddrs;
+};
+
+struct irdma_dma_mem {
+ void *va;
+ dma_addr_t pa;
+ u32 size;
+} __packed;
+
+struct irdma_virt_mem {
+ void *va;
+ u32 size;
+} __packed;
+
+struct irdma_sc_vsi;
+struct irdma_sc_dev;
+struct irdma_sc_qp;
+struct irdma_puda_buf;
+struct irdma_puda_cmpl_info;
+struct irdma_update_sds_info;
+struct irdma_hmc_fcn_info;
+struct irdma_manage_vf_pble_info;
+struct irdma_hw;
+struct irdma_pci_f;
+
+struct ib_device *to_ibdev(struct irdma_sc_dev *dev);
+void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
+enum irdma_status_code irdma_vf_wait_vchnl_resp(struct irdma_sc_dev *dev);
+bool irdma_vf_clear_to_send(struct irdma_sc_dev *dev);
+void irdma_add_dev_ref(struct irdma_sc_dev *dev);
+void irdma_put_dev_ref(struct irdma_sc_dev *dev);
+int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val);
+struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
+ struct irdma_puda_buf *buf);
+void irdma_send_ieq_ack(struct irdma_sc_qp *qp);
+void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len,
+ u32 seqnum);
+int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
+ struct irdma_puda_buf *buf);
+int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info);
+int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev,
+ struct irdma_hmc_fcn_info *hmcfcninfo,
+ u16 *pmf_idx);
+int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *mem);
+void *irdma_remove_cqp_head(struct irdma_sc_dev *dev);
+void irdma_term_modify_qp(struct irdma_sc_qp *qp, u8 next_state, u8 term,
+ u8 term_len);
+void irdma_terminate_done(struct irdma_sc_qp *qp, int timeout_occurred);
+void irdma_terminate_start_timer(struct irdma_sc_qp *qp);
+void irdma_terminate_del_timer(struct irdma_sc_qp *qp);
+void irdma_hw_stats_start_timer(struct irdma_sc_vsi *vsi);
+void irdma_hw_stats_stop_timer(struct irdma_sc_vsi *vsi);
+void wr32(struct irdma_hw *hw, u32 reg, u32 val);
+u32 rd32(struct irdma_hw *hw, u32 reg);
+u64 rd64(struct irdma_hw *hw, u32 reg);
+int irdma_map_vm_page_list(struct irdma_hw *hw, void *va, dma_addr_t *pg_dma,
+ u32 pg_cnt);
+void irdma_unmap_vm_page_list(struct irdma_hw *hw, dma_addr_t *pg_dma, u32 pg_cnt);
+#endif /* IRDMA_OSDEP_H */
diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c
new file mode 100644
index 000000000000..28dfad7f940c
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/pble.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "osdep.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "protos.h"
+#include "pble.h"
+
+static int add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc);
+
+/**
+ * irdma_destroy_pble_prm - destroy prm during module unload
+ * @pble_rsrc: pble resources
+ */
+void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
+{
+ struct irdma_chunk *chunk;
+ struct irdma_pble_prm *pinfo = &pble_rsrc->pinfo;
+
+ while (!list_empty(&pinfo->clist)) {
+ chunk = (struct irdma_chunk *) pinfo->clist.next;
+ list_del(&chunk->list);
+ if (chunk->type == PBLE_SD_PAGED)
+ irdma_pble_free_paged_mem(chunk);
+ bitmap_free(chunk->bitmapbuf);
+ kfree(chunk->chunkmem.va);
+ }
+}
+
+/**
+ * irdma_hmc_init_pble - Initialize pble resources during module load
+ * @dev: irdma_sc_dev struct
+ * @pble_rsrc: pble resources
+ */
+int irdma_hmc_init_pble(struct irdma_sc_dev *dev,
+ struct irdma_hmc_pble_rsrc *pble_rsrc)
+{
+ struct irdma_hmc_info *hmc_info;
+ u32 fpm_idx = 0;
+ int status = 0;
+
+ hmc_info = dev->hmc_info;
+ pble_rsrc->dev = dev;
+ pble_rsrc->fpm_base_addr = hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].base;
+ /* Start pble' on 4k boundary */
+ if (pble_rsrc->fpm_base_addr & 0xfff)
+ fpm_idx = (4096 - (pble_rsrc->fpm_base_addr & 0xfff)) >> 3;
+ pble_rsrc->unallocated_pble =
+ hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt - fpm_idx;
+ pble_rsrc->next_fpm_addr = pble_rsrc->fpm_base_addr + (fpm_idx << 3);
+ pble_rsrc->pinfo.pble_shift = PBLE_SHIFT;
+
+ mutex_init(&pble_rsrc->pble_mutex_lock);
+
+ spin_lock_init(&pble_rsrc->pinfo.prm_lock);
+ INIT_LIST_HEAD(&pble_rsrc->pinfo.clist);
+ if (add_pble_prm(pble_rsrc)) {
+ irdma_destroy_pble_prm(pble_rsrc);
+ status = -ENOMEM;
+ }
+
+ return status;
+}
+
+/**
+ * get_sd_pd_idx - Returns sd index, pd index and rel_pd_idx from fpm address
+ * @pble_rsrc: structure containing fpm address
+ * @idx: where to return indexes
+ */
+static void get_sd_pd_idx(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct sd_pd_idx *idx)
+{
+ idx->sd_idx = pble_rsrc->next_fpm_addr / IRDMA_HMC_DIRECT_BP_SIZE;
+ idx->pd_idx = (u32)(pble_rsrc->next_fpm_addr / IRDMA_HMC_PAGED_BP_SIZE);
+ idx->rel_pd_idx = (idx->pd_idx % IRDMA_HMC_PD_CNT_IN_SD);
+}
+
+/**
+ * add_sd_direct - add sd direct for pble
+ * @pble_rsrc: pble resource ptr
+ * @info: page info for sd
+ */
+static int add_sd_direct(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_add_page_info *info)
+{
+ struct irdma_sc_dev *dev = pble_rsrc->dev;
+ int ret_code = 0;
+ struct sd_pd_idx *idx = &info->idx;
+ struct irdma_chunk *chunk = info->chunk;
+ struct irdma_hmc_info *hmc_info = info->hmc_info;
+ struct irdma_hmc_sd_entry *sd_entry = info->sd_entry;
+ u32 offset = 0;
+
+ if (!sd_entry->valid) {
+ ret_code = irdma_add_sd_table_entry(dev->hw, hmc_info,
+ info->idx.sd_idx,
+ IRDMA_SD_TYPE_DIRECT,
+ IRDMA_HMC_DIRECT_BP_SIZE);
+ if (ret_code)
+ return ret_code;
+
+ chunk->type = PBLE_SD_CONTIGOUS;
+ }
+
+ offset = idx->rel_pd_idx << HMC_PAGED_BP_SHIFT;
+ chunk->size = info->pages << HMC_PAGED_BP_SHIFT;
+ chunk->vaddr = sd_entry->u.bp.addr.va + offset;
+ chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+ ibdev_dbg(to_ibdev(dev),
+ "PBLE: chunk_size[%lld] = 0x%llx vaddr=0x%p fpm_addr = %llx\n",
+ chunk->size, chunk->size, chunk->vaddr, chunk->fpm_addr);
+
+ return 0;
+}
+
+/**
+ * fpm_to_idx - given fpm address, get pble index
+ * @pble_rsrc: pble resource management
+ * @addr: fpm address for index
+ */
+static u32 fpm_to_idx(struct irdma_hmc_pble_rsrc *pble_rsrc, u64 addr)
+{
+ u64 idx;
+
+ idx = (addr - (pble_rsrc->fpm_base_addr)) >> 3;
+
+ return (u32)idx;
+}
+
+/**
+ * add_bp_pages - add backing pages for sd
+ * @pble_rsrc: pble resource management
+ * @info: page info for sd
+ */
+static int add_bp_pages(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_add_page_info *info)
+{
+ struct irdma_sc_dev *dev = pble_rsrc->dev;
+ u8 *addr;
+ struct irdma_dma_mem mem;
+ struct irdma_hmc_pd_entry *pd_entry;
+ struct irdma_hmc_sd_entry *sd_entry = info->sd_entry;
+ struct irdma_hmc_info *hmc_info = info->hmc_info;
+ struct irdma_chunk *chunk = info->chunk;
+ int status = 0;
+ u32 rel_pd_idx = info->idx.rel_pd_idx;
+ u32 pd_idx = info->idx.pd_idx;
+ u32 i;
+
+ if (irdma_pble_get_paged_mem(chunk, info->pages))
+ return -ENOMEM;
+
+ status = irdma_add_sd_table_entry(dev->hw, hmc_info, info->idx.sd_idx,
+ IRDMA_SD_TYPE_PAGED,
+ IRDMA_HMC_DIRECT_BP_SIZE);
+ if (status)
+ goto error;
+
+ addr = chunk->vaddr;
+ for (i = 0; i < info->pages; i++) {
+ mem.pa = (u64)chunk->dmainfo.dmaaddrs[i];
+ mem.size = 4096;
+ mem.va = addr;
+ pd_entry = &sd_entry->u.pd_table.pd_entry[rel_pd_idx++];
+ if (!pd_entry->valid) {
+ status = irdma_add_pd_table_entry(dev, hmc_info,
+ pd_idx++, &mem);
+ if (status)
+ goto error;
+
+ addr += 4096;
+ }
+ }
+
+ chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+ return 0;
+
+error:
+ irdma_pble_free_paged_mem(chunk);
+
+ return status;
+}
+
+/**
+ * irdma_get_type - add a sd entry type for sd
+ * @dev: irdma_sc_dev struct
+ * @idx: index of sd
+ * @pages: pages in the sd
+ */
+static enum irdma_sd_entry_type irdma_get_type(struct irdma_sc_dev *dev,
+ struct sd_pd_idx *idx, u32 pages)
+{
+ enum irdma_sd_entry_type sd_entry_type;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ sd_entry_type = (!idx->rel_pd_idx &&
+ pages == IRDMA_HMC_PD_CNT_IN_SD) ?
+ IRDMA_SD_TYPE_DIRECT : IRDMA_SD_TYPE_PAGED;
+ else
+ sd_entry_type = (!idx->rel_pd_idx &&
+ pages == IRDMA_HMC_PD_CNT_IN_SD &&
+ dev->privileged) ?
+ IRDMA_SD_TYPE_DIRECT : IRDMA_SD_TYPE_PAGED;
+ return sd_entry_type;
+}
+
+/**
+ * add_pble_prm - add a sd entry for pble resoure
+ * @pble_rsrc: pble resource management
+ */
+static int add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
+{
+ struct irdma_sc_dev *dev = pble_rsrc->dev;
+ struct irdma_hmc_sd_entry *sd_entry;
+ struct irdma_hmc_info *hmc_info;
+ struct irdma_chunk *chunk;
+ struct irdma_add_page_info info;
+ struct sd_pd_idx *idx = &info.idx;
+ int ret_code = 0;
+ enum irdma_sd_entry_type sd_entry_type;
+ u64 sd_reg_val = 0;
+ struct irdma_virt_mem chunkmem;
+ u32 pages;
+
+ if (pble_rsrc->unallocated_pble < PBLE_PER_PAGE)
+ return -ENOMEM;
+
+ if (pble_rsrc->next_fpm_addr & 0xfff)
+ return -EINVAL;
+
+ chunkmem.size = sizeof(*chunk);
+ chunkmem.va = kzalloc(chunkmem.size, GFP_KERNEL);
+ if (!chunkmem.va)
+ return -ENOMEM;
+
+ chunk = chunkmem.va;
+ chunk->chunkmem = chunkmem;
+ hmc_info = dev->hmc_info;
+ chunk->dev = dev;
+ chunk->fpm_addr = pble_rsrc->next_fpm_addr;
+ get_sd_pd_idx(pble_rsrc, idx);
+ sd_entry = &hmc_info->sd_table.sd_entry[idx->sd_idx];
+ pages = (idx->rel_pd_idx) ? (IRDMA_HMC_PD_CNT_IN_SD - idx->rel_pd_idx) :
+ IRDMA_HMC_PD_CNT_IN_SD;
+ pages = min(pages, pble_rsrc->unallocated_pble >> PBLE_512_SHIFT);
+ info.chunk = chunk;
+ info.hmc_info = hmc_info;
+ info.pages = pages;
+ info.sd_entry = sd_entry;
+ if (!sd_entry->valid)
+ sd_entry_type = irdma_get_type(dev, idx, pages);
+ else
+ sd_entry_type = sd_entry->entry_type;
+
+ ibdev_dbg(to_ibdev(dev),
+ "PBLE: pages = %d, unallocated_pble[%d] current_fpm_addr = %llx\n",
+ pages, pble_rsrc->unallocated_pble,
+ pble_rsrc->next_fpm_addr);
+ ibdev_dbg(to_ibdev(dev), "PBLE: sd_entry_type = %d\n", sd_entry_type);
+ if (sd_entry_type == IRDMA_SD_TYPE_DIRECT)
+ ret_code = add_sd_direct(pble_rsrc, &info);
+
+ if (ret_code)
+ sd_entry_type = IRDMA_SD_TYPE_PAGED;
+ else
+ pble_rsrc->stats_direct_sds++;
+
+ if (sd_entry_type == IRDMA_SD_TYPE_PAGED) {
+ ret_code = add_bp_pages(pble_rsrc, &info);
+ if (ret_code)
+ goto error;
+ else
+ pble_rsrc->stats_paged_sds++;
+ }
+
+ ret_code = irdma_prm_add_pble_mem(&pble_rsrc->pinfo, chunk);
+ if (ret_code)
+ goto error;
+
+ pble_rsrc->next_fpm_addr += chunk->size;
+ ibdev_dbg(to_ibdev(dev),
+ "PBLE: next_fpm_addr = %llx chunk_size[%llu] = 0x%llx\n",
+ pble_rsrc->next_fpm_addr, chunk->size, chunk->size);
+ pble_rsrc->unallocated_pble -= (u32)(chunk->size >> 3);
+ sd_reg_val = (sd_entry_type == IRDMA_SD_TYPE_PAGED) ?
+ sd_entry->u.pd_table.pd_page_addr.pa :
+ sd_entry->u.bp.addr.pa;
+ if ((dev->privileged && !sd_entry->valid) ||
+ dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ ret_code = irdma_hmc_sd_one(dev, hmc_info->hmc_fn_id,
+ sd_reg_val, idx->sd_idx,
+ sd_entry->entry_type, true);
+ if (ret_code)
+ goto error;
+ }
+
+ list_add(&chunk->list, &pble_rsrc->pinfo.clist);
+ sd_entry->valid = true;
+ return 0;
+
+error:
+ bitmap_free(chunk->bitmapbuf);
+ kfree(chunk->chunkmem.va);
+
+ return ret_code;
+}
+
+/**
+ * free_lvl2 - fee level 2 pble
+ * @pble_rsrc: pble resource management
+ * @palloc: level 2 pble allocation
+ */
+static void free_lvl2(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc)
+{
+ u32 i;
+ struct irdma_pble_level2 *lvl2 = &palloc->level2;
+ struct irdma_pble_info *root = &lvl2->root;
+ struct irdma_pble_info *leaf = lvl2->leaf;
+
+ for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
+ if (leaf->addr)
+ irdma_prm_return_pbles(&pble_rsrc->pinfo,
+ &leaf->chunkinfo);
+ else
+ break;
+ }
+
+ if (root->addr)
+ irdma_prm_return_pbles(&pble_rsrc->pinfo, &root->chunkinfo);
+
+ kfree(lvl2->leafmem.va);
+ lvl2->leaf = NULL;
+}
+
+/**
+ * get_lvl2_pble - get level 2 pble resource
+ * @pble_rsrc: pble resource management
+ * @palloc: level 2 pble allocation
+ */
+static int get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc)
+{
+ u32 lf4k, lflast, total, i;
+ u32 pblcnt = PBLE_PER_PAGE;
+ u64 *addr;
+ struct irdma_pble_level2 *lvl2 = &palloc->level2;
+ struct irdma_pble_info *root = &lvl2->root;
+ struct irdma_pble_info *leaf;
+ int ret_code;
+ u64 fpm_addr;
+
+ /* number of full 512 (4K) leafs) */
+ lf4k = palloc->total_cnt >> 9;
+ lflast = palloc->total_cnt % PBLE_PER_PAGE;
+ total = (lflast == 0) ? lf4k : lf4k + 1;
+ lvl2->leaf_cnt = total;
+
+ lvl2->leafmem.size = (sizeof(*leaf) * total);
+ lvl2->leafmem.va = kzalloc(lvl2->leafmem.size, GFP_KERNEL);
+ if (!lvl2->leafmem.va)
+ return -ENOMEM;
+
+ lvl2->leaf = lvl2->leafmem.va;
+ leaf = lvl2->leaf;
+ ret_code = irdma_prm_get_pbles(&pble_rsrc->pinfo, &root->chunkinfo,
+ total << 3, &root->addr, &fpm_addr);
+ if (ret_code) {
+ kfree(lvl2->leafmem.va);
+ lvl2->leaf = NULL;
+ return -ENOMEM;
+ }
+
+ root->idx = fpm_to_idx(pble_rsrc, fpm_addr);
+ root->cnt = total;
+ addr = root->addr;
+ for (i = 0; i < total; i++, leaf++) {
+ pblcnt = (lflast && ((i + 1) == total)) ?
+ lflast : PBLE_PER_PAGE;
+ ret_code = irdma_prm_get_pbles(&pble_rsrc->pinfo,
+ &leaf->chunkinfo, pblcnt << 3,
+ &leaf->addr, &fpm_addr);
+ if (ret_code)
+ goto error;
+
+ leaf->idx = fpm_to_idx(pble_rsrc, fpm_addr);
+
+ leaf->cnt = pblcnt;
+ *addr = (u64)leaf->idx;
+ addr++;
+ }
+
+ palloc->level = PBLE_LEVEL_2;
+ pble_rsrc->stats_lvl2++;
+ return 0;
+
+error:
+ free_lvl2(pble_rsrc, palloc);
+
+ return -ENOMEM;
+}
+
+/**
+ * get_lvl1_pble - get level 1 pble resource
+ * @pble_rsrc: pble resource management
+ * @palloc: level 1 pble allocation
+ */
+static int get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc)
+{
+ int ret_code;
+ u64 fpm_addr;
+ struct irdma_pble_info *lvl1 = &palloc->level1;
+
+ ret_code = irdma_prm_get_pbles(&pble_rsrc->pinfo, &lvl1->chunkinfo,
+ palloc->total_cnt << 3, &lvl1->addr,
+ &fpm_addr);
+ if (ret_code)
+ return -ENOMEM;
+
+ palloc->level = PBLE_LEVEL_1;
+ lvl1->idx = fpm_to_idx(pble_rsrc, fpm_addr);
+ lvl1->cnt = palloc->total_cnt;
+ pble_rsrc->stats_lvl1++;
+
+ return 0;
+}
+
+/**
+ * get_lvl1_lvl2_pble - calls get_lvl1 and get_lvl2 pble routine
+ * @pble_rsrc: pble resources
+ * @palloc: contains all inforamtion regarding pble (idx + pble addr)
+ * @lvl: Bitmask for requested pble level
+ */
+static int get_lvl1_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc, u8 lvl)
+{
+ int status = 0;
+
+ status = get_lvl1_pble(pble_rsrc, palloc);
+ if (!status || lvl == PBLE_LEVEL_1 || palloc->total_cnt <= PBLE_PER_PAGE)
+ return status;
+
+ status = get_lvl2_pble(pble_rsrc, palloc);
+
+ return status;
+}
+
+/**
+ * irdma_get_pble - allocate pbles from the prm
+ * @pble_rsrc: pble resources
+ * @palloc: contains all inforamtion regarding pble (idx + pble addr)
+ * @pble_cnt: #of pbles requested
+ * @lvl: requested pble level mask
+ */
+int irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc, u32 pble_cnt,
+ u8 lvl)
+{
+ int status = 0;
+ int max_sds = 0;
+ int i;
+
+ palloc->total_cnt = pble_cnt;
+ palloc->level = PBLE_LEVEL_0;
+
+ mutex_lock(&pble_rsrc->pble_mutex_lock);
+
+ /*check first to see if we can get pble's without acquiring
+ * additional sd's
+ */
+ status = get_lvl1_lvl2_pble(pble_rsrc, palloc, lvl);
+ if (!status)
+ goto exit;
+
+ max_sds = (palloc->total_cnt >> 18) + 1;
+ for (i = 0; i < max_sds; i++) {
+ status = add_pble_prm(pble_rsrc);
+ if (status)
+ break;
+
+ status = get_lvl1_lvl2_pble(pble_rsrc, palloc, lvl);
+ /* if level1_only, only go through it once */
+ if (!status || lvl)
+ break;
+ }
+
+exit:
+ if (!status) {
+ pble_rsrc->allocdpbles += pble_cnt;
+ pble_rsrc->stats_alloc_ok++;
+ } else {
+ pble_rsrc->stats_alloc_fail++;
+ }
+ mutex_unlock(&pble_rsrc->pble_mutex_lock);
+
+ return status;
+}
+
+/**
+ * irdma_free_pble - put pbles back into prm
+ * @pble_rsrc: pble resources
+ * @palloc: contains all information regarding pble resource being freed
+ */
+void irdma_free_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc)
+{
+ if (palloc->level == PBLE_LEVEL_2)
+ free_lvl2(pble_rsrc, palloc);
+ else
+ irdma_prm_return_pbles(&pble_rsrc->pinfo,
+ &palloc->level1.chunkinfo);
+
+ mutex_lock(&pble_rsrc->pble_mutex_lock);
+ pble_rsrc->freedpbles += palloc->total_cnt;
+ pble_rsrc->stats_alloc_freed++;
+ mutex_unlock(&pble_rsrc->pble_mutex_lock);
+}
diff --git a/drivers/infiniband/hw/irdma/pble.h b/drivers/infiniband/hw/irdma/pble.h
new file mode 100644
index 000000000000..160ad728e9fb
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/pble.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2019 Intel Corporation */
+#ifndef IRDMA_PBLE_H
+#define IRDMA_PBLE_H
+
+#define PBLE_SHIFT 6
+#define PBLE_PER_PAGE 512
+#define HMC_PAGED_BP_SHIFT 12
+#define PBLE_512_SHIFT 9
+#define PBLE_INVALID_IDX 0xffffffff
+
+enum irdma_pble_level {
+ PBLE_LEVEL_0 = 0,
+ PBLE_LEVEL_1 = 1,
+ PBLE_LEVEL_2 = 2,
+};
+
+enum irdma_alloc_type {
+ PBLE_NO_ALLOC = 0,
+ PBLE_SD_CONTIGOUS = 1,
+ PBLE_SD_PAGED = 2,
+};
+
+struct irdma_chunk;
+
+struct irdma_pble_chunkinfo {
+ struct irdma_chunk *pchunk;
+ u64 bit_idx;
+ u64 bits_used;
+};
+
+struct irdma_pble_info {
+ u64 *addr;
+ u32 idx;
+ u32 cnt;
+ struct irdma_pble_chunkinfo chunkinfo;
+};
+
+struct irdma_pble_level2 {
+ struct irdma_pble_info root;
+ struct irdma_pble_info *leaf;
+ struct irdma_virt_mem leafmem;
+ u32 leaf_cnt;
+};
+
+struct irdma_pble_alloc {
+ u32 total_cnt;
+ enum irdma_pble_level level;
+ union {
+ struct irdma_pble_info level1;
+ struct irdma_pble_level2 level2;
+ };
+};
+
+struct sd_pd_idx {
+ u32 sd_idx;
+ u32 pd_idx;
+ u32 rel_pd_idx;
+};
+
+struct irdma_add_page_info {
+ struct irdma_chunk *chunk;
+ struct irdma_hmc_sd_entry *sd_entry;
+ struct irdma_hmc_info *hmc_info;
+ struct sd_pd_idx idx;
+ u32 pages;
+};
+
+struct irdma_chunk {
+ struct list_head list;
+ struct irdma_dma_info dmainfo;
+ unsigned long *bitmapbuf;
+
+ u32 sizeofbitmap;
+ u64 size;
+ void *vaddr;
+ u64 fpm_addr;
+ u32 pg_cnt;
+ enum irdma_alloc_type type;
+ struct irdma_sc_dev *dev;
+ struct irdma_virt_mem chunkmem;
+};
+
+struct irdma_pble_prm {
+ struct list_head clist;
+ spinlock_t prm_lock; /* protect prm bitmap */
+ u64 total_pble_alloc;
+ u64 free_pble_cnt;
+ u8 pble_shift;
+};
+
+struct irdma_hmc_pble_rsrc {
+ u32 unallocated_pble;
+ struct mutex pble_mutex_lock; /* protect PBLE resource */
+ struct irdma_sc_dev *dev;
+ u64 fpm_base_addr;
+ u64 next_fpm_addr;
+ struct irdma_pble_prm pinfo;
+ u64 allocdpbles;
+ u64 freedpbles;
+ u32 stats_direct_sds;
+ u32 stats_paged_sds;
+ u64 stats_alloc_ok;
+ u64 stats_alloc_fail;
+ u64 stats_alloc_freed;
+ u64 stats_lvl1;
+ u64 stats_lvl2;
+};
+
+void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc);
+int irdma_hmc_init_pble(struct irdma_sc_dev *dev,
+ struct irdma_hmc_pble_rsrc *pble_rsrc);
+void irdma_free_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc);
+int irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ struct irdma_pble_alloc *palloc, u32 pble_cnt,
+ u8 lvl);
+int irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm,
+ struct irdma_chunk *pchunk);
+int irdma_prm_get_pbles(struct irdma_pble_prm *pprm,
+ struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size,
+ u64 **vaddr, u64 *fpm_addr);
+void irdma_prm_return_pbles(struct irdma_pble_prm *pprm,
+ struct irdma_pble_chunkinfo *chunkinfo);
+void irdma_pble_acquire_lock(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ unsigned long *flags);
+void irdma_pble_release_lock(struct irdma_hmc_pble_rsrc *pble_rsrc,
+ unsigned long *flags);
+void irdma_pble_free_paged_mem(struct irdma_chunk *chunk);
+int irdma_pble_get_paged_mem(struct irdma_chunk *chunk, u32 pg_cnt);
+void irdma_prm_rem_bitmapmem(struct irdma_hw *hw, struct irdma_chunk *chunk);
+#endif /* IRDMA_PBLE_H */
diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h
new file mode 100644
index 000000000000..324cfbf21764
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/protos.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2016 - 2021 Intel Corporation */
+#ifndef IRDMA_PROTOS_H
+#define IRDMA_PROTOS_H
+
+#define PAUSE_TIMER_VAL 0xffff
+#define REFRESH_THRESHOLD 0x7fff
+#define HIGH_THRESHOLD 0x800
+#define LOW_THRESHOLD 0x200
+#define ALL_TC2PFC 0xff
+#define CQP_COMPL_WAIT_TIME_MS 10
+#define CQP_TIMEOUT_THRESHOLD 500
+#define CQP_DEF_CMPL_TIMEOUT_THRESHOLD 2500
+
+/* init operations */
+int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev,
+ struct irdma_device_init_info *info);
+void irdma_sc_rt_init(struct irdma_sc_dev *dev);
+void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp);
+__le64 *irdma_sc_cqp_get_next_send_wqe(struct irdma_sc_cqp *cqp, u64 scratch);
+int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp,
+ struct irdma_fast_reg_stag_info *info,
+ bool post_sq);
+/* HMC/FPM functions */
+int irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, u8 hmc_fn_id);
+/* stats misc */
+int irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev,
+ struct irdma_vsi_pestat *pestat, bool wait);
+void irdma_cqp_gather_stats_gen1(struct irdma_sc_dev *dev,
+ struct irdma_vsi_pestat *pestat);
+void irdma_hw_stats_read_all(struct irdma_vsi_pestat *stats,
+ const u64 *hw_stats_regs);
+int irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd,
+ struct irdma_ws_node_info *node_info);
+int irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_ceq *sc_ceq,
+ u8 op);
+int irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_aeq *sc_aeq,
+ u8 op);
+int irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd,
+ struct irdma_stats_inst_info *stats_info);
+u16 irdma_alloc_ws_node_id(struct irdma_sc_dev *dev);
+void irdma_free_ws_node_id(struct irdma_sc_dev *dev, u16 node_id);
+void irdma_update_stats(struct irdma_dev_hw_stats *hw_stats,
+ struct irdma_gather_stats *gather_stats,
+ struct irdma_gather_stats *last_gather_stats,
+ const struct irdma_hw_stat_map *map, u16 max_stat_idx);
+
+/* vsi functions */
+int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi,
+ struct irdma_vsi_stats_info *info);
+void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi);
+void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi,
+ struct irdma_vsi_init_info *info);
+int irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq);
+void irdma_sc_remove_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq);
+/* misc L2 param change functions */
+void irdma_change_l2params(struct irdma_sc_vsi *vsi,
+ struct irdma_l2params *l2params);
+void irdma_sc_suspend_resume_qps(struct irdma_sc_vsi *vsi, u8 suspend);
+int irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, u8 cmd);
+void irdma_qp_add_qos(struct irdma_sc_qp *qp);
+void irdma_qp_rem_qos(struct irdma_sc_qp *qp);
+struct irdma_sc_qp *irdma_get_qp_from_list(struct list_head *head,
+ struct irdma_sc_qp *qp);
+void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi);
+/* terminate functions*/
+void irdma_terminate_send_fin(struct irdma_sc_qp *qp);
+
+void irdma_terminate_connection(struct irdma_sc_qp *qp,
+ struct irdma_aeqe_info *info);
+
+void irdma_terminate_received(struct irdma_sc_qp *qp,
+ struct irdma_aeqe_info *info);
+/* dynamic memory allocation */
+/* misc */
+u8 irdma_get_encoded_wqe_size(u32 wqsize, enum irdma_queue_type queue_type);
+void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp);
+int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch,
+ u8 hmc_fn_id, bool post_sq,
+ bool poll_registers);
+int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count);
+int irdma_get_rdma_features(struct irdma_sc_dev *dev);
+void free_sd_mem(struct irdma_sc_dev *dev);
+int irdma_process_cqp_cmd(struct irdma_sc_dev *dev,
+ struct cqp_cmds_info *pcmdinfo);
+int irdma_process_bh(struct irdma_sc_dev *dev);
+int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info);
+int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev,
+ struct irdma_dma_mem *mem);
+int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev,
+ struct irdma_hmc_fcn_info *hmcfcninfo,
+ u16 *pmf_idx);
+void irdma_add_dev_ref(struct irdma_sc_dev *dev);
+void irdma_put_dev_ref(struct irdma_sc_dev *dev);
+void *irdma_remove_cqp_head(struct irdma_sc_dev *dev);
+#endif /* IRDMA_PROTOS_H */
diff --git a/drivers/infiniband/hw/irdma/puda.c b/drivers/infiniband/hw/irdma/puda.c
new file mode 100644
index 000000000000..cee47ddbd1b5
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/puda.c
@@ -0,0 +1,1718 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "osdep.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "protos.h"
+#include "puda.h"
+#include "ws.h"
+
+static void irdma_ieq_receive(struct irdma_sc_vsi *vsi,
+ struct irdma_puda_buf *buf);
+static void irdma_ieq_tx_compl(struct irdma_sc_vsi *vsi, void *sqwrid);
+static void irdma_ilq_putback_rcvbuf(struct irdma_sc_qp *qp,
+ struct irdma_puda_buf *buf, u32 wqe_idx);
+/**
+ * irdma_puda_get_listbuf - get buffer from puda list
+ * @list: list to use for buffers (ILQ or IEQ)
+ */
+static struct irdma_puda_buf *irdma_puda_get_listbuf(struct list_head *list)
+{
+ struct irdma_puda_buf *buf = NULL;
+
+ if (!list_empty(list)) {
+ buf = (struct irdma_puda_buf *)list->next;
+ list_del((struct list_head *)&buf->list);
+ }
+
+ return buf;
+}
+
+/**
+ * irdma_puda_get_bufpool - return buffer from resource
+ * @rsrc: resource to use for buffer
+ */
+struct irdma_puda_buf *irdma_puda_get_bufpool(struct irdma_puda_rsrc *rsrc)
+{
+ struct irdma_puda_buf *buf = NULL;
+ struct list_head *list = &rsrc->bufpool;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+ buf = irdma_puda_get_listbuf(list);
+ if (buf) {
+ rsrc->avail_buf_count--;
+ buf->vsi = rsrc->vsi;
+ } else {
+ rsrc->stats_buf_alloc_fail++;
+ }
+ spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+
+ return buf;
+}
+
+/**
+ * irdma_puda_ret_bufpool - return buffer to rsrc list
+ * @rsrc: resource to use for buffer
+ * @buf: buffer to return to resource
+ */
+void irdma_puda_ret_bufpool(struct irdma_puda_rsrc *rsrc,
+ struct irdma_puda_buf *buf)
+{
+ unsigned long flags;
+
+ buf->do_lpb = false;
+ spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+ list_add(&buf->list, &rsrc->bufpool);
+ spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+ rsrc->avail_buf_count++;
+}
+
+/**
+ * irdma_puda_post_recvbuf - set wqe for rcv buffer
+ * @rsrc: resource ptr
+ * @wqe_idx: wqe index to use
+ * @buf: puda buffer for rcv q
+ * @initial: flag if during init time
+ */
+static void irdma_puda_post_recvbuf(struct irdma_puda_rsrc *rsrc, u32 wqe_idx,
+ struct irdma_puda_buf *buf, bool initial)
+{
+ __le64 *wqe;
+ struct irdma_sc_qp *qp = &rsrc->qp;
+ u64 offset24 = 0;
+
+ /* Synch buffer for use by device */
+ dma_sync_single_for_device(rsrc->dev->hw->device, buf->mem.pa,
+ buf->mem.size, DMA_BIDIRECTIONAL);
+ qp->qp_uk.rq_wrid_array[wqe_idx] = (uintptr_t)buf;
+ wqe = qp->qp_uk.rq_base[wqe_idx].elem;
+ if (!initial)
+ get_64bit_val(wqe, 24, &offset24);
+
+ offset24 = (offset24) ? 0 : FIELD_PREP(IRDMAQPSQ_VALID, 1);
+
+ set_64bit_val(wqe, 16, 0);
+ set_64bit_val(wqe, 0, buf->mem.pa);
+ if (qp->qp_uk.uk_attrs->hw_rev == IRDMA_GEN_1) {
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, buf->mem.size));
+ } else {
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_FRAG_LEN, buf->mem.size) |
+ offset24);
+ }
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, offset24);
+}
+
+/**
+ * irdma_puda_replenish_rq - post rcv buffers
+ * @rsrc: resource to use for buffer
+ * @initial: flag if during init time
+ */
+static int irdma_puda_replenish_rq(struct irdma_puda_rsrc *rsrc, bool initial)
+{
+ u32 i;
+ u32 invalid_cnt = rsrc->rxq_invalid_cnt;
+ struct irdma_puda_buf *buf = NULL;
+
+ for (i = 0; i < invalid_cnt; i++) {
+ buf = irdma_puda_get_bufpool(rsrc);
+ if (!buf)
+ return -ENOBUFS;
+ irdma_puda_post_recvbuf(rsrc, rsrc->rx_wqe_idx, buf, initial);
+ rsrc->rx_wqe_idx = ((rsrc->rx_wqe_idx + 1) % rsrc->rq_size);
+ rsrc->rxq_invalid_cnt--;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_puda_alloc_buf - allocate mem for buffer
+ * @dev: iwarp device
+ * @len: length of buffer
+ */
+static struct irdma_puda_buf *irdma_puda_alloc_buf(struct irdma_sc_dev *dev,
+ u32 len)
+{
+ struct irdma_puda_buf *buf;
+ struct irdma_virt_mem buf_mem;
+
+ buf_mem.size = sizeof(struct irdma_puda_buf);
+ buf_mem.va = kzalloc(buf_mem.size, GFP_KERNEL);
+ if (!buf_mem.va)
+ return NULL;
+
+ buf = buf_mem.va;
+ buf->mem.size = len;
+ buf->mem.va = kzalloc(buf->mem.size, GFP_KERNEL);
+ if (!buf->mem.va)
+ goto free_virt;
+ buf->mem.pa = dma_map_single(dev->hw->device, buf->mem.va,
+ buf->mem.size, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(dev->hw->device, buf->mem.pa)) {
+ kfree(buf->mem.va);
+ goto free_virt;
+ }
+
+ buf->buf_mem.va = buf_mem.va;
+ buf->buf_mem.size = buf_mem.size;
+
+ return buf;
+
+free_virt:
+ kfree(buf_mem.va);
+ return NULL;
+}
+
+/**
+ * irdma_puda_dele_buf - delete buffer back to system
+ * @dev: iwarp device
+ * @buf: buffer to free
+ */
+static void irdma_puda_dele_buf(struct irdma_sc_dev *dev,
+ struct irdma_puda_buf *buf)
+{
+ dma_unmap_single(dev->hw->device, buf->mem.pa, buf->mem.size,
+ DMA_BIDIRECTIONAL);
+ kfree(buf->mem.va);
+ kfree(buf->buf_mem.va);
+}
+
+/**
+ * irdma_puda_get_next_send_wqe - return next wqe for processing
+ * @qp: puda qp for wqe
+ * @wqe_idx: wqe index for caller
+ */
+static __le64 *irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp,
+ u32 *wqe_idx)
+{
+ int ret_code = 0;
+
+ *wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
+ if (!*wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+ IRDMA_RING_MOVE_HEAD(qp->sq_ring, ret_code);
+ if (ret_code)
+ return NULL;
+
+ return qp->sq_base[*wqe_idx].elem;
+}
+
+/**
+ * irdma_puda_poll_info - poll cq for completion
+ * @cq: cq for poll
+ * @info: info return for successful completion
+ */
+static int irdma_puda_poll_info(struct irdma_sc_cq *cq,
+ struct irdma_puda_cmpl_info *info)
+{
+ struct irdma_cq_uk *cq_uk = &cq->cq_uk;
+ u64 qword0, qword2, qword3, qword6;
+ __le64 *cqe;
+ __le64 *ext_cqe = NULL;
+ u64 qword7 = 0;
+ u64 comp_ctx;
+ bool valid_bit;
+ bool ext_valid = 0;
+ u32 major_err, minor_err;
+ u32 peek_head;
+ bool error;
+ u8 polarity;
+
+ cqe = IRDMA_GET_CURRENT_CQ_ELEM(&cq->cq_uk);
+ get_64bit_val(cqe, 24, &qword3);
+ valid_bit = (bool)FIELD_GET(IRDMA_CQ_VALID, qword3);
+ if (valid_bit != cq_uk->polarity)
+ return -ENOENT;
+
+ /* Ensure CQE contents are read after valid bit is checked */
+ dma_rmb();
+
+ if (cq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ ext_valid = (bool)FIELD_GET(IRDMA_CQ_EXTCQE, qword3);
+
+ if (ext_valid) {
+ peek_head = (cq_uk->cq_ring.head + 1) % cq_uk->cq_ring.size;
+ ext_cqe = cq_uk->cq_base[peek_head].buf;
+ get_64bit_val(ext_cqe, 24, &qword7);
+ polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7);
+ if (!peek_head)
+ polarity ^= 1;
+ if (polarity != cq_uk->polarity)
+ return -ENOENT;
+
+ /* Ensure ext CQE contents are read after ext valid bit is checked */
+ dma_rmb();
+
+ IRDMA_RING_MOVE_HEAD_NOCHECK(cq_uk->cq_ring);
+ if (!IRDMA_RING_CURRENT_HEAD(cq_uk->cq_ring))
+ cq_uk->polarity = !cq_uk->polarity;
+ /* update cq tail in cq shadow memory also */
+ IRDMA_RING_MOVE_TAIL(cq_uk->cq_ring);
+ }
+
+ print_hex_dump_debug("PUDA: PUDA CQE", DUMP_PREFIX_OFFSET, 16, 8, cqe,
+ 32, false);
+ if (ext_valid)
+ print_hex_dump_debug("PUDA: PUDA EXT-CQE", DUMP_PREFIX_OFFSET,
+ 16, 8, ext_cqe, 32, false);
+
+ error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3);
+ if (error) {
+ ibdev_dbg(to_ibdev(cq->dev), "PUDA: receive error\n");
+ major_err = (u32)(FIELD_GET(IRDMA_CQ_MAJERR, qword3));
+ minor_err = (u32)(FIELD_GET(IRDMA_CQ_MINERR, qword3));
+ info->compl_error = major_err << 16 | minor_err;
+ return -EIO;
+ }
+
+ get_64bit_val(cqe, 0, &qword0);
+ get_64bit_val(cqe, 16, &qword2);
+
+ info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
+ info->qp_id = (u32)FIELD_GET(IRDMACQ_QPID, qword2);
+ if (cq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3);
+
+ get_64bit_val(cqe, 8, &comp_ctx);
+ info->qp = (struct irdma_qp_uk *)(unsigned long)comp_ctx;
+ info->wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3);
+
+ if (info->q_type == IRDMA_CQE_QTYPE_RQ) {
+ if (ext_valid) {
+ info->vlan_valid = (bool)FIELD_GET(IRDMA_CQ_UDVLANVALID, qword7);
+ if (info->vlan_valid) {
+ get_64bit_val(ext_cqe, 16, &qword6);
+ info->vlan = (u16)FIELD_GET(IRDMA_CQ_UDVLAN, qword6);
+ }
+ info->smac_valid = (bool)FIELD_GET(IRDMA_CQ_UDSMACVALID, qword7);
+ if (info->smac_valid) {
+ get_64bit_val(ext_cqe, 16, &qword6);
+ info->smac[0] = (u8)((qword6 >> 40) & 0xFF);
+ info->smac[1] = (u8)((qword6 >> 32) & 0xFF);
+ info->smac[2] = (u8)((qword6 >> 24) & 0xFF);
+ info->smac[3] = (u8)((qword6 >> 16) & 0xFF);
+ info->smac[4] = (u8)((qword6 >> 8) & 0xFF);
+ info->smac[5] = (u8)(qword6 & 0xFF);
+ }
+ }
+
+ if (cq->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) {
+ info->vlan_valid = (bool)FIELD_GET(IRDMA_VLAN_TAG_VALID, qword3);
+ info->l4proto = (u8)FIELD_GET(IRDMA_UDA_L4PROTO, qword2);
+ info->l3proto = (u8)FIELD_GET(IRDMA_UDA_L3PROTO, qword2);
+ }
+
+ info->payload_len = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0);
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_puda_poll_cmpl - processes completion for cq
+ * @dev: iwarp device
+ * @cq: cq getting interrupt
+ * @compl_err: return any completion err
+ */
+int irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq,
+ u32 *compl_err)
+{
+ struct irdma_qp_uk *qp;
+ struct irdma_cq_uk *cq_uk = &cq->cq_uk;
+ struct irdma_puda_cmpl_info info = {};
+ int ret = 0;
+ struct irdma_puda_buf *buf;
+ struct irdma_puda_rsrc *rsrc;
+ u8 cq_type = cq->cq_type;
+ unsigned long flags;
+
+ if (cq_type == IRDMA_CQ_TYPE_ILQ || cq_type == IRDMA_CQ_TYPE_IEQ) {
+ rsrc = (cq_type == IRDMA_CQ_TYPE_ILQ) ? cq->vsi->ilq :
+ cq->vsi->ieq;
+ } else {
+ ibdev_dbg(to_ibdev(dev), "PUDA: qp_type error\n");
+ return -EINVAL;
+ }
+
+ ret = irdma_puda_poll_info(cq, &info);
+ *compl_err = info.compl_error;
+ if (ret == -ENOENT)
+ return ret;
+ if (ret)
+ goto done;
+
+ qp = info.qp;
+ if (!qp || !rsrc) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ if (qp->qp_id != rsrc->qp_id) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ if (info.q_type == IRDMA_CQE_QTYPE_RQ) {
+ buf = (struct irdma_puda_buf *)(uintptr_t)
+ qp->rq_wrid_array[info.wqe_idx];
+
+ /* reusing so synch the buffer for CPU use */
+ dma_sync_single_for_cpu(dev->hw->device, buf->mem.pa,
+ buf->mem.size, DMA_BIDIRECTIONAL);
+ /* Get all the tcpip information in the buf header */
+ ret = irdma_puda_get_tcpip_info(&info, buf);
+ if (ret) {
+ rsrc->stats_rcvd_pkt_err++;
+ if (cq_type == IRDMA_CQ_TYPE_ILQ) {
+ irdma_ilq_putback_rcvbuf(&rsrc->qp, buf,
+ info.wqe_idx);
+ } else {
+ irdma_puda_ret_bufpool(rsrc, buf);
+ irdma_puda_replenish_rq(rsrc, false);
+ }
+ goto done;
+ }
+
+ rsrc->stats_pkt_rcvd++;
+ rsrc->compl_rxwqe_idx = info.wqe_idx;
+ ibdev_dbg(to_ibdev(dev), "PUDA: RQ completion\n");
+ rsrc->receive(rsrc->vsi, buf);
+ if (cq_type == IRDMA_CQ_TYPE_ILQ)
+ irdma_ilq_putback_rcvbuf(&rsrc->qp, buf, info.wqe_idx);
+ else
+ irdma_puda_replenish_rq(rsrc, false);
+
+ } else {
+ ibdev_dbg(to_ibdev(dev), "PUDA: SQ completion\n");
+ buf = (struct irdma_puda_buf *)(uintptr_t)
+ qp->sq_wrtrk_array[info.wqe_idx].wrid;
+
+ /* reusing so synch the buffer for CPU use */
+ dma_sync_single_for_cpu(dev->hw->device, buf->mem.pa,
+ buf->mem.size, DMA_BIDIRECTIONAL);
+ IRDMA_RING_SET_TAIL(qp->sq_ring, info.wqe_idx);
+ rsrc->xmit_complete(rsrc->vsi, buf);
+ spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+ rsrc->tx_wqe_avail_cnt++;
+ spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+ if (!list_empty(&rsrc->txpend))
+ irdma_puda_send_buf(rsrc, NULL);
+ }
+
+done:
+ IRDMA_RING_MOVE_HEAD_NOCHECK(cq_uk->cq_ring);
+ if (!IRDMA_RING_CURRENT_HEAD(cq_uk->cq_ring))
+ cq_uk->polarity = !cq_uk->polarity;
+ /* update cq tail in cq shadow memory also */
+ IRDMA_RING_MOVE_TAIL(cq_uk->cq_ring);
+ set_64bit_val(cq_uk->shadow_area, 0,
+ IRDMA_RING_CURRENT_HEAD(cq_uk->cq_ring));
+
+ return ret;
+}
+
+/**
+ * irdma_puda_send - complete send wqe for transmit
+ * @qp: puda qp for send
+ * @info: buffer information for transmit
+ */
+int irdma_puda_send(struct irdma_sc_qp *qp, struct irdma_puda_send_info *info)
+{
+ __le64 *wqe;
+ u32 iplen, l4len;
+ u64 hdr[2];
+ u32 wqe_idx;
+ u8 iipt;
+
+ /* number of 32 bits DWORDS in header */
+ l4len = info->tcplen >> 2;
+ if (info->ipv4) {
+ iipt = 3;
+ iplen = 5;
+ } else {
+ iipt = 1;
+ iplen = 10;
+ }
+
+ wqe = irdma_puda_get_next_send_wqe(&qp->qp_uk, &wqe_idx);
+ if (!wqe)
+ return -ENOMEM;
+
+ qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid = (uintptr_t)info->scratch;
+ /* Third line of WQE descriptor */
+ /* maclen is in words */
+
+ if (qp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ hdr[0] = 0; /* Dest_QPN and Dest_QKey only for UD */
+ hdr[1] = FIELD_PREP(IRDMA_UDA_QPSQ_OPCODE, IRDMA_OP_TYPE_SEND) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_L4LEN, l4len) |
+ FIELD_PREP(IRDMAQPSQ_AHID, info->ah_id) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_SIGCOMPL, 1) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_VALID,
+ qp->qp_uk.swqe_polarity);
+
+ /* Forth line of WQE descriptor */
+
+ set_64bit_val(wqe, 0, info->paddr);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_FRAG_LEN, info->len) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_VALID, qp->qp_uk.swqe_polarity));
+ } else {
+ hdr[0] = FIELD_PREP(IRDMA_UDA_QPSQ_MACLEN, info->maclen >> 1) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_IPLEN, iplen) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_L4T, 1) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_IIPT, iipt) |
+ FIELD_PREP(IRDMA_GEN1_UDA_QPSQ_L4LEN, l4len);
+
+ hdr[1] = FIELD_PREP(IRDMA_UDA_QPSQ_OPCODE, IRDMA_OP_TYPE_SEND) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_SIGCOMPL, 1) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_DOLOOPBACK, info->do_lpb) |
+ FIELD_PREP(IRDMA_UDA_QPSQ_VALID, qp->qp_uk.swqe_polarity);
+
+ /* Forth line of WQE descriptor */
+
+ set_64bit_val(wqe, 0, info->paddr);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, info->len));
+ }
+
+ set_64bit_val(wqe, 16, hdr[0]);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr[1]);
+
+ print_hex_dump_debug("PUDA: PUDA SEND WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, 32, false);
+ irdma_uk_qp_post_wr(&qp->qp_uk);
+ return 0;
+}
+
+/**
+ * irdma_puda_send_buf - transmit puda buffer
+ * @rsrc: resource to use for buffer
+ * @buf: puda buffer to transmit
+ */
+void irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc,
+ struct irdma_puda_buf *buf)
+{
+ struct irdma_puda_send_info info;
+ int ret = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rsrc->bufpool_lock, flags);
+ /* if no wqe available or not from a completion and we have
+ * pending buffers, we must queue new buffer
+ */
+ if (!rsrc->tx_wqe_avail_cnt || (buf && !list_empty(&rsrc->txpend))) {
+ list_add_tail(&buf->list, &rsrc->txpend);
+ spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+ rsrc->stats_sent_pkt_q++;
+ if (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ)
+ ibdev_dbg(to_ibdev(rsrc->dev),
+ "PUDA: adding to txpend\n");
+ return;
+ }
+ rsrc->tx_wqe_avail_cnt--;
+ /* if we are coming from a completion and have pending buffers
+ * then Get one from pending list
+ */
+ if (!buf) {
+ buf = irdma_puda_get_listbuf(&rsrc->txpend);
+ if (!buf)
+ goto done;
+ }
+
+ info.scratch = buf;
+ info.paddr = buf->mem.pa;
+ info.len = buf->totallen;
+ info.tcplen = buf->tcphlen;
+ info.ipv4 = buf->ipv4;
+
+ if (rsrc->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ info.ah_id = buf->ah_id;
+ } else {
+ info.maclen = buf->maclen;
+ info.do_lpb = buf->do_lpb;
+ }
+
+ /* Synch buffer for use by device */
+ dma_sync_single_for_cpu(rsrc->dev->hw->device, buf->mem.pa,
+ buf->mem.size, DMA_BIDIRECTIONAL);
+ ret = irdma_puda_send(&rsrc->qp, &info);
+ if (ret) {
+ rsrc->tx_wqe_avail_cnt++;
+ rsrc->stats_sent_pkt_q++;
+ list_add(&buf->list, &rsrc->txpend);
+ if (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ)
+ ibdev_dbg(to_ibdev(rsrc->dev),
+ "PUDA: adding to puda_send\n");
+ } else {
+ rsrc->stats_pkt_sent++;
+ }
+done:
+ spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
+}
+
+/**
+ * irdma_puda_qp_setctx - during init, set qp's context
+ * @rsrc: qp's resource
+ */
+static void irdma_puda_qp_setctx(struct irdma_puda_rsrc *rsrc)
+{
+ struct irdma_sc_qp *qp = &rsrc->qp;
+ __le64 *qp_ctx = qp->hw_host_ctx;
+
+ set_64bit_val(qp_ctx, 8, qp->sq_pa);
+ set_64bit_val(qp_ctx, 16, qp->rq_pa);
+ set_64bit_val(qp_ctx, 24,
+ FIELD_PREP(IRDMAQPC_RQSIZE, qp->hw_rq_size) |
+ FIELD_PREP(IRDMAQPC_SQSIZE, qp->hw_sq_size));
+ set_64bit_val(qp_ctx, 48,
+ FIELD_PREP(IRDMAQPC_SNDMSS, rsrc->buf_size));
+ set_64bit_val(qp_ctx, 56, 0);
+ if (qp->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ set_64bit_val(qp_ctx, 64, 1);
+ set_64bit_val(qp_ctx, 136,
+ FIELD_PREP(IRDMAQPC_TXCQNUM, rsrc->cq_id) |
+ FIELD_PREP(IRDMAQPC_RXCQNUM, rsrc->cq_id));
+ set_64bit_val(qp_ctx, 144,
+ FIELD_PREP(IRDMAQPC_STAT_INDEX, rsrc->stats_idx));
+ set_64bit_val(qp_ctx, 160,
+ FIELD_PREP(IRDMAQPC_PRIVEN, 1) |
+ FIELD_PREP(IRDMAQPC_USESTATSINSTANCE, rsrc->stats_idx_valid));
+ set_64bit_val(qp_ctx, 168,
+ FIELD_PREP(IRDMAQPC_QPCOMPCTX, (uintptr_t)qp));
+ set_64bit_val(qp_ctx, 176,
+ FIELD_PREP(IRDMAQPC_SQTPHVAL, qp->sq_tph_val) |
+ FIELD_PREP(IRDMAQPC_RQTPHVAL, qp->rq_tph_val) |
+ FIELD_PREP(IRDMAQPC_QSHANDLE, qp->qs_handle));
+
+ print_hex_dump_debug("PUDA: PUDA QP CONTEXT", DUMP_PREFIX_OFFSET, 16,
+ 8, qp_ctx, IRDMA_QP_CTX_SIZE, false);
+}
+
+/**
+ * irdma_puda_qp_wqe - setup wqe for qp create
+ * @dev: Device
+ * @qp: Resource qp
+ */
+static int irdma_puda_qp_wqe(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
+{
+ struct irdma_sc_cqp *cqp;
+ __le64 *wqe;
+ u64 hdr;
+ struct irdma_ccq_cqe_info compl_info;
+ int status = 0;
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, 0);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 16, qp->hw_host_ctx_pa);
+ set_64bit_val(wqe, 40, qp->shadow_area_pa);
+
+ hdr = qp->qp_uk.qp_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_QP) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_QPTYPE, IRDMA_QP_TYPE_UDA) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_CQNUMVALID, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_QP_NEXTIWSTATE, 2) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("PUDA: PUDA QP CREATE", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, 40, false);
+ irdma_sc_cqp_post_sq(cqp);
+ status = irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_CREATE_QP,
+ &compl_info);
+
+ return status;
+}
+
+/**
+ * irdma_puda_qp_create - create qp for resource
+ * @rsrc: resource to use for buffer
+ */
+static int irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc)
+{
+ struct irdma_sc_qp *qp = &rsrc->qp;
+ struct irdma_qp_uk *ukqp = &qp->qp_uk;
+ int ret = 0;
+ u32 sq_size, rq_size;
+ struct irdma_dma_mem *mem;
+
+ sq_size = rsrc->sq_size * IRDMA_QP_WQE_MIN_SIZE;
+ rq_size = rsrc->rq_size * IRDMA_QP_WQE_MIN_SIZE;
+ rsrc->qpmem.size = ALIGN((sq_size + rq_size + (IRDMA_SHADOW_AREA_SIZE << 3) + IRDMA_QP_CTX_SIZE),
+ IRDMA_HW_PAGE_SIZE);
+ rsrc->qpmem.va = dma_alloc_coherent(rsrc->dev->hw->device,
+ rsrc->qpmem.size, &rsrc->qpmem.pa,
+ GFP_KERNEL);
+ if (!rsrc->qpmem.va)
+ return -ENOMEM;
+
+ mem = &rsrc->qpmem;
+ memset(mem->va, 0, rsrc->qpmem.size);
+ qp->hw_sq_size = irdma_get_encoded_wqe_size(rsrc->sq_size, IRDMA_QUEUE_TYPE_SQ_RQ);
+ qp->hw_rq_size = irdma_get_encoded_wqe_size(rsrc->rq_size, IRDMA_QUEUE_TYPE_SQ_RQ);
+ qp->pd = &rsrc->sc_pd;
+ qp->qp_uk.qp_type = IRDMA_QP_TYPE_UDA;
+ qp->dev = rsrc->dev;
+ qp->qp_uk.back_qp = rsrc;
+ qp->sq_pa = mem->pa;
+ qp->rq_pa = qp->sq_pa + sq_size;
+ qp->vsi = rsrc->vsi;
+ ukqp->sq_base = mem->va;
+ ukqp->rq_base = &ukqp->sq_base[rsrc->sq_size];
+ ukqp->shadow_area = ukqp->rq_base[rsrc->rq_size].elem;
+ ukqp->uk_attrs = &qp->dev->hw_attrs.uk_attrs;
+ qp->shadow_area_pa = qp->rq_pa + rq_size;
+ qp->hw_host_ctx = ukqp->shadow_area + IRDMA_SHADOW_AREA_SIZE;
+ qp->hw_host_ctx_pa = qp->shadow_area_pa + (IRDMA_SHADOW_AREA_SIZE << 3);
+ qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX;
+ ukqp->qp_id = rsrc->qp_id;
+ ukqp->sq_wrtrk_array = rsrc->sq_wrtrk_array;
+ ukqp->rq_wrid_array = rsrc->rq_wrid_array;
+ ukqp->sq_size = rsrc->sq_size;
+ ukqp->rq_size = rsrc->rq_size;
+
+ IRDMA_RING_INIT(ukqp->sq_ring, ukqp->sq_size);
+ IRDMA_RING_INIT(ukqp->rq_ring, ukqp->rq_size);
+ ukqp->wqe_alloc_db = qp->pd->dev->wqe_alloc_db;
+
+ ret = rsrc->dev->ws_add(qp->vsi, qp->user_pri);
+ if (ret) {
+ dma_free_coherent(rsrc->dev->hw->device, rsrc->qpmem.size,
+ rsrc->qpmem.va, rsrc->qpmem.pa);
+ rsrc->qpmem.va = NULL;
+ return ret;
+ }
+
+ irdma_qp_add_qos(qp);
+ irdma_puda_qp_setctx(rsrc);
+
+ if (rsrc->dev->ceq_valid)
+ ret = irdma_cqp_qp_create_cmd(rsrc->dev, qp);
+ else
+ ret = irdma_puda_qp_wqe(rsrc->dev, qp);
+ if (ret) {
+ irdma_qp_rem_qos(qp);
+ rsrc->dev->ws_remove(qp->vsi, qp->user_pri);
+ dma_free_coherent(rsrc->dev->hw->device, rsrc->qpmem.size,
+ rsrc->qpmem.va, rsrc->qpmem.pa);
+ rsrc->qpmem.va = NULL;
+ }
+
+ return ret;
+}
+
+/**
+ * irdma_puda_cq_wqe - setup wqe for CQ create
+ * @dev: Device
+ * @cq: resource for cq
+ */
+static int irdma_puda_cq_wqe(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq)
+{
+ __le64 *wqe;
+ struct irdma_sc_cqp *cqp;
+ u64 hdr;
+ struct irdma_ccq_cqe_info compl_info;
+
+ cqp = dev->cqp;
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, 0);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
+ set_64bit_val(wqe, 8, (uintptr_t)cq >> 1);
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_CQ_SHADOW_READ_THRESHOLD, cq->shadow_read_threshold));
+ set_64bit_val(wqe, 32, cq->cq_pa);
+ set_64bit_val(wqe, 40, cq->shadow_area_pa);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_TPHVAL, cq->tph_val) |
+ FIELD_PREP(IRDMA_CQPSQ_VSIIDX, cq->vsi->vsi_idx));
+
+ hdr = cq->cq_uk.cq_id |
+ FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_CQ) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CHKOVERFLOW, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, 1) |
+ FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ print_hex_dump_debug("PUDA: PUDA CREATE CQ", DUMP_PREFIX_OFFSET, 16,
+ 8, wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_sc_cqp_post_sq(dev->cqp);
+ return irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_CREATE_CQ,
+ &compl_info);
+}
+
+/**
+ * irdma_puda_cq_create - create cq for resource
+ * @rsrc: resource for which cq to create
+ */
+static int irdma_puda_cq_create(struct irdma_puda_rsrc *rsrc)
+{
+ struct irdma_sc_dev *dev = rsrc->dev;
+ struct irdma_sc_cq *cq = &rsrc->cq;
+ int ret = 0;
+ u32 cqsize;
+ struct irdma_dma_mem *mem;
+ struct irdma_cq_init_info info = {};
+ struct irdma_cq_uk_init_info *init_info = &info.cq_uk_init_info;
+
+ cq->vsi = rsrc->vsi;
+ cqsize = rsrc->cq_size * (sizeof(struct irdma_cqe));
+ rsrc->cqmem.size = ALIGN(cqsize + sizeof(struct irdma_cq_shadow_area),
+ IRDMA_CQ0_ALIGNMENT);
+ rsrc->cqmem.va = dma_alloc_coherent(dev->hw->device, rsrc->cqmem.size,
+ &rsrc->cqmem.pa, GFP_KERNEL);
+ if (!rsrc->cqmem.va)
+ return -ENOMEM;
+
+ mem = &rsrc->cqmem;
+ info.dev = dev;
+ info.type = (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ) ?
+ IRDMA_CQ_TYPE_ILQ : IRDMA_CQ_TYPE_IEQ;
+ info.shadow_read_threshold = rsrc->cq_size >> 2;
+ info.cq_base_pa = mem->pa;
+ info.shadow_area_pa = mem->pa + cqsize;
+ init_info->cq_base = mem->va;
+ init_info->shadow_area = (__le64 *)((u8 *)mem->va + cqsize);
+ init_info->cq_size = rsrc->cq_size;
+ init_info->cq_id = rsrc->cq_id;
+ info.ceqe_mask = true;
+ info.ceq_id_valid = true;
+ info.vsi = rsrc->vsi;
+
+ ret = irdma_sc_cq_init(cq, &info);
+ if (ret)
+ goto error;
+
+ if (rsrc->dev->ceq_valid)
+ ret = irdma_cqp_cq_create_cmd(dev, cq);
+ else
+ ret = irdma_puda_cq_wqe(dev, cq);
+error:
+ if (ret) {
+ dma_free_coherent(dev->hw->device, rsrc->cqmem.size,
+ rsrc->cqmem.va, rsrc->cqmem.pa);
+ rsrc->cqmem.va = NULL;
+ }
+
+ return ret;
+}
+
+/**
+ * irdma_puda_free_qp - free qp for resource
+ * @rsrc: resource for which qp to free
+ */
+static void irdma_puda_free_qp(struct irdma_puda_rsrc *rsrc)
+{
+ int ret;
+ struct irdma_ccq_cqe_info compl_info;
+ struct irdma_sc_dev *dev = rsrc->dev;
+
+ if (rsrc->dev->ceq_valid) {
+ irdma_cqp_qp_destroy_cmd(dev, &rsrc->qp);
+ rsrc->dev->ws_remove(rsrc->qp.vsi, rsrc->qp.user_pri);
+ return;
+ }
+
+ ret = irdma_sc_qp_destroy(&rsrc->qp, 0, false, true, true);
+ if (ret)
+ ibdev_dbg(to_ibdev(dev),
+ "PUDA: error puda qp destroy wqe, status = %d\n",
+ ret);
+ if (!ret) {
+ ret = irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_DESTROY_QP,
+ &compl_info);
+ if (ret)
+ ibdev_dbg(to_ibdev(dev),
+ "PUDA: error puda qp destroy failed, status = %d\n",
+ ret);
+ }
+ rsrc->dev->ws_remove(rsrc->qp.vsi, rsrc->qp.user_pri);
+}
+
+/**
+ * irdma_puda_free_cq - free cq for resource
+ * @rsrc: resource for which cq to free
+ */
+static void irdma_puda_free_cq(struct irdma_puda_rsrc *rsrc)
+{
+ int ret;
+ struct irdma_ccq_cqe_info compl_info;
+ struct irdma_sc_dev *dev = rsrc->dev;
+
+ if (rsrc->dev->ceq_valid) {
+ irdma_cqp_cq_destroy_cmd(dev, &rsrc->cq);
+ return;
+ }
+
+ ret = irdma_sc_cq_destroy(&rsrc->cq, 0, true);
+ if (ret)
+ ibdev_dbg(to_ibdev(dev), "PUDA: error ieq cq destroy\n");
+ if (!ret) {
+ ret = irdma_sc_poll_for_cqp_op_done(dev->cqp, IRDMA_CQP_OP_DESTROY_CQ,
+ &compl_info);
+ if (ret)
+ ibdev_dbg(to_ibdev(dev),
+ "PUDA: error ieq qp destroy done\n");
+ }
+}
+
+/**
+ * irdma_puda_dele_rsrc - delete all resources during close
+ * @vsi: VSI structure of device
+ * @type: type of resource to dele
+ * @reset: true if reset chip
+ */
+void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type,
+ bool reset)
+{
+ struct irdma_sc_dev *dev = vsi->dev;
+ struct irdma_puda_rsrc *rsrc;
+ struct irdma_puda_buf *buf = NULL;
+ struct irdma_puda_buf *nextbuf = NULL;
+ struct irdma_virt_mem *vmem;
+
+ switch (type) {
+ case IRDMA_PUDA_RSRC_TYPE_ILQ:
+ rsrc = vsi->ilq;
+ vmem = &vsi->ilq_mem;
+ vsi->ilq = NULL;
+ break;
+ case IRDMA_PUDA_RSRC_TYPE_IEQ:
+ rsrc = vsi->ieq;
+ vmem = &vsi->ieq_mem;
+ vsi->ieq = NULL;
+ break;
+ default:
+ ibdev_dbg(to_ibdev(dev), "PUDA: error resource type = 0x%x\n",
+ type);
+ return;
+ }
+
+ switch (rsrc->cmpl) {
+ case PUDA_HASH_CRC_COMPLETE:
+ case PUDA_QP_CREATED:
+ irdma_qp_rem_qos(&rsrc->qp);
+
+ if (!reset)
+ irdma_puda_free_qp(rsrc);
+
+ dma_free_coherent(dev->hw->device, rsrc->qpmem.size,
+ rsrc->qpmem.va, rsrc->qpmem.pa);
+ rsrc->qpmem.va = NULL;
+ fallthrough;
+ case PUDA_CQ_CREATED:
+ if (!reset)
+ irdma_puda_free_cq(rsrc);
+
+ dma_free_coherent(dev->hw->device, rsrc->cqmem.size,
+ rsrc->cqmem.va, rsrc->cqmem.pa);
+ rsrc->cqmem.va = NULL;
+ break;
+ default:
+ ibdev_dbg(to_ibdev(rsrc->dev), "PUDA: error no resources\n");
+ break;
+ }
+ /* Free all allocated puda buffers for both tx and rx */
+ buf = rsrc->alloclist;
+ while (buf) {
+ nextbuf = buf->next;
+ irdma_puda_dele_buf(dev, buf);
+ buf = nextbuf;
+ rsrc->alloc_buf_count--;
+ }
+
+ kfree(vmem->va);
+}
+
+/**
+ * irdma_puda_allocbufs - allocate buffers for resource
+ * @rsrc: resource for buffer allocation
+ * @count: number of buffers to create
+ */
+static int irdma_puda_allocbufs(struct irdma_puda_rsrc *rsrc, u32 count)
+{
+ u32 i;
+ struct irdma_puda_buf *buf;
+ struct irdma_puda_buf *nextbuf;
+
+ for (i = 0; i < count; i++) {
+ buf = irdma_puda_alloc_buf(rsrc->dev, rsrc->buf_size);
+ if (!buf) {
+ rsrc->stats_buf_alloc_fail++;
+ return -ENOMEM;
+ }
+ irdma_puda_ret_bufpool(rsrc, buf);
+ rsrc->alloc_buf_count++;
+ if (!rsrc->alloclist) {
+ rsrc->alloclist = buf;
+ } else {
+ nextbuf = rsrc->alloclist;
+ rsrc->alloclist = buf;
+ buf->next = nextbuf;
+ }
+ }
+
+ rsrc->avail_buf_count = rsrc->alloc_buf_count;
+
+ return 0;
+}
+
+/**
+ * irdma_puda_create_rsrc - create resource (ilq or ieq)
+ * @vsi: sc VSI struct
+ * @info: resource information
+ */
+int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi,
+ struct irdma_puda_rsrc_info *info)
+{
+ struct irdma_sc_dev *dev = vsi->dev;
+ int ret = 0;
+ struct irdma_puda_rsrc *rsrc;
+ u32 pudasize;
+ u32 sqwridsize, rqwridsize;
+ struct irdma_virt_mem *vmem;
+
+ info->count = 1;
+ pudasize = sizeof(struct irdma_puda_rsrc);
+ sqwridsize = info->sq_size * sizeof(struct irdma_sq_uk_wr_trk_info);
+ rqwridsize = info->rq_size * 8;
+ switch (info->type) {
+ case IRDMA_PUDA_RSRC_TYPE_ILQ:
+ vmem = &vsi->ilq_mem;
+ break;
+ case IRDMA_PUDA_RSRC_TYPE_IEQ:
+ vmem = &vsi->ieq_mem;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ vmem->size = pudasize + sqwridsize + rqwridsize;
+ vmem->va = kzalloc(vmem->size, GFP_KERNEL);
+ if (!vmem->va)
+ return -ENOMEM;
+
+ rsrc = vmem->va;
+ spin_lock_init(&rsrc->bufpool_lock);
+ switch (info->type) {
+ case IRDMA_PUDA_RSRC_TYPE_ILQ:
+ vsi->ilq = vmem->va;
+ vsi->ilq_count = info->count;
+ rsrc->receive = info->receive;
+ rsrc->xmit_complete = info->xmit_complete;
+ break;
+ case IRDMA_PUDA_RSRC_TYPE_IEQ:
+ vsi->ieq_count = info->count;
+ vsi->ieq = vmem->va;
+ rsrc->receive = irdma_ieq_receive;
+ rsrc->xmit_complete = irdma_ieq_tx_compl;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ rsrc->type = info->type;
+ rsrc->sq_wrtrk_array = (struct irdma_sq_uk_wr_trk_info *)
+ ((u8 *)vmem->va + pudasize);
+ rsrc->rq_wrid_array = (u64 *)((u8 *)vmem->va + pudasize + sqwridsize);
+ /* Initialize all ieq lists */
+ INIT_LIST_HEAD(&rsrc->bufpool);
+ INIT_LIST_HEAD(&rsrc->txpend);
+
+ rsrc->tx_wqe_avail_cnt = info->sq_size - 1;
+ irdma_sc_pd_init(dev, &rsrc->sc_pd, info->pd_id, info->abi_ver);
+ rsrc->qp_id = info->qp_id;
+ rsrc->cq_id = info->cq_id;
+ rsrc->sq_size = info->sq_size;
+ rsrc->rq_size = info->rq_size;
+ rsrc->cq_size = info->rq_size + info->sq_size;
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ if (rsrc->type == IRDMA_PUDA_RSRC_TYPE_ILQ)
+ rsrc->cq_size += info->rq_size;
+ }
+ rsrc->buf_size = info->buf_size;
+ rsrc->dev = dev;
+ rsrc->vsi = vsi;
+ rsrc->stats_idx = info->stats_idx;
+ rsrc->stats_idx_valid = info->stats_idx_valid;
+
+ ret = irdma_puda_cq_create(rsrc);
+ if (!ret) {
+ rsrc->cmpl = PUDA_CQ_CREATED;
+ ret = irdma_puda_qp_create(rsrc);
+ }
+ if (ret) {
+ ibdev_dbg(to_ibdev(dev),
+ "PUDA: error qp_create type=%d, status=%d\n",
+ rsrc->type, ret);
+ goto error;
+ }
+ rsrc->cmpl = PUDA_QP_CREATED;
+
+ ret = irdma_puda_allocbufs(rsrc, info->tx_buf_cnt + info->rq_size);
+ if (ret) {
+ ibdev_dbg(to_ibdev(dev), "PUDA: error alloc_buf\n");
+ goto error;
+ }
+
+ rsrc->rxq_invalid_cnt = info->rq_size;
+ ret = irdma_puda_replenish_rq(rsrc, true);
+ if (ret)
+ goto error;
+
+ if (info->type == IRDMA_PUDA_RSRC_TYPE_IEQ) {
+ rsrc->check_crc = true;
+ rsrc->cmpl = PUDA_HASH_CRC_COMPLETE;
+ }
+
+ irdma_sc_ccq_arm(&rsrc->cq);
+ return 0;
+
+error:
+ irdma_puda_dele_rsrc(vsi, info->type, false);
+
+ return ret;
+}
+
+/**
+ * irdma_ilq_putback_rcvbuf - ilq buffer to put back on rq
+ * @qp: ilq's qp resource
+ * @buf: puda buffer for rcv q
+ * @wqe_idx: wqe index of completed rcvbuf
+ */
+static void irdma_ilq_putback_rcvbuf(struct irdma_sc_qp *qp,
+ struct irdma_puda_buf *buf, u32 wqe_idx)
+{
+ __le64 *wqe;
+ u64 offset8, offset24;
+
+ /* Synch buffer for use by device */
+ dma_sync_single_for_device(qp->dev->hw->device, buf->mem.pa,
+ buf->mem.size, DMA_BIDIRECTIONAL);
+ wqe = qp->qp_uk.rq_base[wqe_idx].elem;
+ get_64bit_val(wqe, 24, &offset24);
+ if (qp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ get_64bit_val(wqe, 8, &offset8);
+ if (offset24)
+ offset8 &= ~FIELD_PREP(IRDMAQPSQ_VALID, 1);
+ else
+ offset8 |= FIELD_PREP(IRDMAQPSQ_VALID, 1);
+ set_64bit_val(wqe, 8, offset8);
+ dma_wmb(); /* make sure WQE is written before valid bit is set */
+ }
+ if (offset24)
+ offset24 = 0;
+ else
+ offset24 = FIELD_PREP(IRDMAQPSQ_VALID, 1);
+
+ set_64bit_val(wqe, 24, offset24);
+}
+
+/**
+ * irdma_ieq_get_fpdu_len - get length of fpdu with or without marker
+ * @pfpdu: pointer to fpdu
+ * @datap: pointer to data in the buffer
+ * @rcv_seq: seqnum of the data buffer
+ */
+static u16 irdma_ieq_get_fpdu_len(struct irdma_pfpdu *pfpdu, u8 *datap,
+ u32 rcv_seq)
+{
+ u32 marker_seq, end_seq, blk_start;
+ u8 marker_len = pfpdu->marker_len;
+ u16 total_len = 0;
+ u16 fpdu_len;
+
+ blk_start = (pfpdu->rcv_start_seq - rcv_seq) & (IRDMA_MRK_BLK_SZ - 1);
+ if (!blk_start) {
+ total_len = marker_len;
+ marker_seq = rcv_seq + IRDMA_MRK_BLK_SZ;
+ if (marker_len && *(u32 *)datap)
+ return 0;
+ } else {
+ marker_seq = rcv_seq + blk_start;
+ }
+
+ datap += total_len;
+ fpdu_len = ntohs(*(__be16 *)datap);
+ fpdu_len += IRDMA_IEQ_MPA_FRAMING;
+ fpdu_len = (fpdu_len + 3) & 0xfffc;
+
+ if (fpdu_len > pfpdu->max_fpdu_data)
+ return 0;
+
+ total_len += fpdu_len;
+ end_seq = rcv_seq + total_len;
+ while ((int)(marker_seq - end_seq) < 0) {
+ total_len += marker_len;
+ end_seq += marker_len;
+ marker_seq += IRDMA_MRK_BLK_SZ;
+ }
+
+ return total_len;
+}
+
+/**
+ * irdma_ieq_copy_to_txbuf - copydata from rcv buf to tx buf
+ * @buf: rcv buffer with partial
+ * @txbuf: tx buffer for sending back
+ * @buf_offset: rcv buffer offset to copy from
+ * @txbuf_offset: at offset in tx buf to copy
+ * @len: length of data to copy
+ */
+static void irdma_ieq_copy_to_txbuf(struct irdma_puda_buf *buf,
+ struct irdma_puda_buf *txbuf,
+ u16 buf_offset, u32 txbuf_offset, u32 len)
+{
+ void *mem1 = (u8 *)buf->mem.va + buf_offset;
+ void *mem2 = (u8 *)txbuf->mem.va + txbuf_offset;
+
+ memcpy(mem2, mem1, len);
+}
+
+/**
+ * irdma_ieq_setup_tx_buf - setup tx buffer for partial handling
+ * @buf: reeive buffer with partial
+ * @txbuf: buffer to prepare
+ */
+static void irdma_ieq_setup_tx_buf(struct irdma_puda_buf *buf,
+ struct irdma_puda_buf *txbuf)
+{
+ txbuf->tcphlen = buf->tcphlen;
+ txbuf->ipv4 = buf->ipv4;
+
+ if (buf->vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ txbuf->hdrlen = txbuf->tcphlen;
+ irdma_ieq_copy_to_txbuf(buf, txbuf, IRDMA_TCP_OFFSET, 0,
+ txbuf->hdrlen);
+ } else {
+ txbuf->maclen = buf->maclen;
+ txbuf->hdrlen = buf->hdrlen;
+ irdma_ieq_copy_to_txbuf(buf, txbuf, 0, 0, buf->hdrlen);
+ }
+}
+
+/**
+ * irdma_ieq_check_first_buf - check if rcv buffer's seq is in range
+ * @buf: receive exception buffer
+ * @fps: first partial sequence number
+ */
+static void irdma_ieq_check_first_buf(struct irdma_puda_buf *buf, u32 fps)
+{
+ u32 offset;
+
+ if (buf->seqnum < fps) {
+ offset = fps - buf->seqnum;
+ if (offset > buf->datalen)
+ return;
+ buf->data += offset;
+ buf->datalen -= (u16)offset;
+ buf->seqnum = fps;
+ }
+}
+
+/**
+ * irdma_ieq_compl_pfpdu - write txbuf with full fpdu
+ * @ieq: ieq resource
+ * @rxlist: ieq's received buffer list
+ * @pbufl: temporary list for buffers for fpddu
+ * @txbuf: tx buffer for fpdu
+ * @fpdu_len: total length of fpdu
+ */
+static void irdma_ieq_compl_pfpdu(struct irdma_puda_rsrc *ieq,
+ struct list_head *rxlist,
+ struct list_head *pbufl,
+ struct irdma_puda_buf *txbuf, u16 fpdu_len)
+{
+ struct irdma_puda_buf *buf;
+ u32 nextseqnum;
+ u16 txoffset, bufoffset;
+
+ buf = irdma_puda_get_listbuf(pbufl);
+ if (!buf)
+ return;
+
+ nextseqnum = buf->seqnum + fpdu_len;
+ irdma_ieq_setup_tx_buf(buf, txbuf);
+ if (buf->vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ txoffset = txbuf->hdrlen;
+ txbuf->totallen = txbuf->hdrlen + fpdu_len;
+ txbuf->data = (u8 *)txbuf->mem.va + txoffset;
+ } else {
+ txoffset = buf->hdrlen;
+ txbuf->totallen = buf->hdrlen + fpdu_len;
+ txbuf->data = (u8 *)txbuf->mem.va + buf->hdrlen;
+ }
+ bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
+
+ do {
+ if (buf->datalen >= fpdu_len) {
+ /* copied full fpdu */
+ irdma_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset,
+ fpdu_len);
+ buf->datalen -= fpdu_len;
+ buf->data += fpdu_len;
+ buf->seqnum = nextseqnum;
+ break;
+ }
+ /* copy partial fpdu */
+ irdma_ieq_copy_to_txbuf(buf, txbuf, bufoffset, txoffset,
+ buf->datalen);
+ txoffset += buf->datalen;
+ fpdu_len -= buf->datalen;
+ irdma_puda_ret_bufpool(ieq, buf);
+ buf = irdma_puda_get_listbuf(pbufl);
+ if (!buf)
+ return;
+
+ bufoffset = (u16)(buf->data - (u8 *)buf->mem.va);
+ } while (1);
+
+ /* last buffer on the list*/
+ if (buf->datalen)
+ list_add(&buf->list, rxlist);
+ else
+ irdma_puda_ret_bufpool(ieq, buf);
+}
+
+/**
+ * irdma_ieq_create_pbufl - create buffer list for single fpdu
+ * @pfpdu: pointer to fpdu
+ * @rxlist: resource list for receive ieq buffes
+ * @pbufl: temp. list for buffers for fpddu
+ * @buf: first receive buffer
+ * @fpdu_len: total length of fpdu
+ */
+static int irdma_ieq_create_pbufl(struct irdma_pfpdu *pfpdu,
+ struct list_head *rxlist,
+ struct list_head *pbufl,
+ struct irdma_puda_buf *buf, u16 fpdu_len)
+{
+ int status = 0;
+ struct irdma_puda_buf *nextbuf;
+ u32 nextseqnum;
+ u16 plen = fpdu_len - buf->datalen;
+ bool done = false;
+
+ nextseqnum = buf->seqnum + buf->datalen;
+ do {
+ nextbuf = irdma_puda_get_listbuf(rxlist);
+ if (!nextbuf) {
+ status = -ENOBUFS;
+ break;
+ }
+ list_add_tail(&nextbuf->list, pbufl);
+ if (nextbuf->seqnum != nextseqnum) {
+ pfpdu->bad_seq_num++;
+ status = -ERANGE;
+ break;
+ }
+ if (nextbuf->datalen >= plen) {
+ done = true;
+ } else {
+ plen -= nextbuf->datalen;
+ nextseqnum = nextbuf->seqnum + nextbuf->datalen;
+ }
+
+ } while (!done);
+
+ return status;
+}
+
+/**
+ * irdma_ieq_handle_partial - process partial fpdu buffer
+ * @ieq: ieq resource
+ * @pfpdu: partial management per user qp
+ * @buf: receive buffer
+ * @fpdu_len: fpdu len in the buffer
+ */
+static int irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq,
+ struct irdma_pfpdu *pfpdu,
+ struct irdma_puda_buf *buf, u16 fpdu_len)
+{
+ int status = 0;
+ u8 *crcptr;
+ u32 mpacrc;
+ u32 seqnum = buf->seqnum;
+ struct list_head pbufl; /* partial buffer list */
+ struct irdma_puda_buf *txbuf = NULL;
+ struct list_head *rxlist = &pfpdu->rxlist;
+
+ ieq->partials_handled++;
+
+ INIT_LIST_HEAD(&pbufl);
+ list_add(&buf->list, &pbufl);
+
+ status = irdma_ieq_create_pbufl(pfpdu, rxlist, &pbufl, buf, fpdu_len);
+ if (status)
+ goto error;
+
+ txbuf = irdma_puda_get_bufpool(ieq);
+ if (!txbuf) {
+ pfpdu->no_tx_bufs++;
+ status = -ENOBUFS;
+ goto error;
+ }
+
+ irdma_ieq_compl_pfpdu(ieq, rxlist, &pbufl, txbuf, fpdu_len);
+ irdma_ieq_update_tcpip_info(txbuf, fpdu_len, seqnum);
+
+ crcptr = txbuf->data + fpdu_len - 4;
+ mpacrc = *(u32 *)crcptr;
+ if (ieq->check_crc) {
+ status = irdma_ieq_check_mpacrc(txbuf->data, fpdu_len - 4,
+ mpacrc);
+ if (status) {
+ ibdev_dbg(to_ibdev(ieq->dev), "IEQ: error bad crc\n");
+ goto error;
+ }
+ }
+
+ print_hex_dump_debug("IEQ: IEQ TX BUFFER", DUMP_PREFIX_OFFSET, 16, 8,
+ txbuf->mem.va, txbuf->totallen, false);
+ if (ieq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ txbuf->ah_id = pfpdu->ah->ah_info.ah_idx;
+ txbuf->do_lpb = true;
+ irdma_puda_send_buf(ieq, txbuf);
+ pfpdu->rcv_nxt = seqnum + fpdu_len;
+ return status;
+
+error:
+ while (!list_empty(&pbufl)) {
+ buf = list_last_entry(&pbufl, struct irdma_puda_buf, list);
+ list_move(&buf->list, rxlist);
+ }
+ if (txbuf)
+ irdma_puda_ret_bufpool(ieq, txbuf);
+
+ return status;
+}
+
+/**
+ * irdma_ieq_process_buf - process buffer rcvd for ieq
+ * @ieq: ieq resource
+ * @pfpdu: partial management per user qp
+ * @buf: receive buffer
+ */
+static int irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq,
+ struct irdma_pfpdu *pfpdu,
+ struct irdma_puda_buf *buf)
+{
+ u16 fpdu_len = 0;
+ u16 datalen = buf->datalen;
+ u8 *datap = buf->data;
+ u8 *crcptr;
+ u16 ioffset = 0;
+ u32 mpacrc;
+ u32 seqnum = buf->seqnum;
+ u16 len = 0;
+ u16 full = 0;
+ bool partial = false;
+ struct irdma_puda_buf *txbuf;
+ struct list_head *rxlist = &pfpdu->rxlist;
+ int ret = 0;
+
+ ioffset = (u16)(buf->data - (u8 *)buf->mem.va);
+ while (datalen) {
+ fpdu_len = irdma_ieq_get_fpdu_len(pfpdu, datap, buf->seqnum);
+ if (!fpdu_len) {
+ ibdev_dbg(to_ibdev(ieq->dev),
+ "IEQ: error bad fpdu len\n");
+ list_add(&buf->list, rxlist);
+ return -EINVAL;
+ }
+
+ if (datalen < fpdu_len) {
+ partial = true;
+ break;
+ }
+ crcptr = datap + fpdu_len - 4;
+ mpacrc = *(u32 *)crcptr;
+ if (ieq->check_crc)
+ ret = irdma_ieq_check_mpacrc(datap, fpdu_len - 4,
+ mpacrc);
+ if (ret) {
+ list_add(&buf->list, rxlist);
+ ibdev_dbg(to_ibdev(ieq->dev),
+ "ERR: IRDMA_ERR_MPA_CRC\n");
+ return -EINVAL;
+ }
+ full++;
+ pfpdu->fpdu_processed++;
+ ieq->fpdu_processed++;
+ datap += fpdu_len;
+ len += fpdu_len;
+ datalen -= fpdu_len;
+ }
+ if (full) {
+ /* copy full pdu's in the txbuf and send them out */
+ txbuf = irdma_puda_get_bufpool(ieq);
+ if (!txbuf) {
+ pfpdu->no_tx_bufs++;
+ list_add(&buf->list, rxlist);
+ return -ENOBUFS;
+ }
+ /* modify txbuf's buffer header */
+ irdma_ieq_setup_tx_buf(buf, txbuf);
+ /* copy full fpdu's to new buffer */
+ if (ieq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ irdma_ieq_copy_to_txbuf(buf, txbuf, ioffset,
+ txbuf->hdrlen, len);
+ txbuf->totallen = txbuf->hdrlen + len;
+ txbuf->ah_id = pfpdu->ah->ah_info.ah_idx;
+ } else {
+ irdma_ieq_copy_to_txbuf(buf, txbuf, ioffset,
+ buf->hdrlen, len);
+ txbuf->totallen = buf->hdrlen + len;
+ }
+ irdma_ieq_update_tcpip_info(txbuf, len, buf->seqnum);
+ print_hex_dump_debug("IEQ: IEQ TX BUFFER", DUMP_PREFIX_OFFSET,
+ 16, 8, txbuf->mem.va, txbuf->totallen,
+ false);
+ txbuf->do_lpb = true;
+ irdma_puda_send_buf(ieq, txbuf);
+
+ if (!datalen) {
+ pfpdu->rcv_nxt = buf->seqnum + len;
+ irdma_puda_ret_bufpool(ieq, buf);
+ return 0;
+ }
+ buf->data = datap;
+ buf->seqnum = seqnum + len;
+ buf->datalen = datalen;
+ pfpdu->rcv_nxt = buf->seqnum;
+ }
+ if (partial)
+ return irdma_ieq_handle_partial(ieq, pfpdu, buf, fpdu_len);
+
+ return 0;
+}
+
+/**
+ * irdma_ieq_process_fpdus - process fpdu's buffers on its list
+ * @qp: qp for which partial fpdus
+ * @ieq: ieq resource
+ */
+void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp,
+ struct irdma_puda_rsrc *ieq)
+{
+ struct irdma_pfpdu *pfpdu = &qp->pfpdu;
+ struct list_head *rxlist = &pfpdu->rxlist;
+ struct irdma_puda_buf *buf;
+ int status;
+
+ do {
+ if (list_empty(rxlist))
+ break;
+ buf = irdma_puda_get_listbuf(rxlist);
+ if (!buf) {
+ ibdev_dbg(to_ibdev(ieq->dev), "IEQ: error no buf\n");
+ break;
+ }
+ if (buf->seqnum != pfpdu->rcv_nxt) {
+ /* This could be out of order or missing packet */
+ pfpdu->out_of_order++;
+ list_add(&buf->list, rxlist);
+ break;
+ }
+ /* keep processing buffers from the head of the list */
+ status = irdma_ieq_process_buf(ieq, pfpdu, buf);
+ if (status == -EINVAL) {
+ pfpdu->mpa_crc_err = true;
+ while (!list_empty(rxlist)) {
+ buf = irdma_puda_get_listbuf(rxlist);
+ irdma_puda_ret_bufpool(ieq, buf);
+ pfpdu->crc_err++;
+ ieq->crc_err++;
+ }
+ /* create CQP for AE */
+ irdma_ieq_mpa_crc_ae(ieq->dev, qp);
+ }
+ } while (!status);
+}
+
+/**
+ * irdma_ieq_create_ah - create an address handle for IEQ
+ * @qp: qp pointer
+ * @buf: buf received on IEQ used to create AH
+ */
+static int irdma_ieq_create_ah(struct irdma_sc_qp *qp, struct irdma_puda_buf *buf)
+{
+ struct irdma_ah_info ah_info = {};
+
+ qp->pfpdu.ah_buf = buf;
+ irdma_puda_ieq_get_ah_info(qp, &ah_info);
+ return irdma_puda_create_ah(qp->vsi->dev, &ah_info, false,
+ IRDMA_PUDA_RSRC_TYPE_IEQ, qp,
+ &qp->pfpdu.ah);
+}
+
+/**
+ * irdma_ieq_handle_exception - handle qp's exception
+ * @ieq: ieq resource
+ * @qp: qp receiving excpetion
+ * @buf: receive buffer
+ */
+static void irdma_ieq_handle_exception(struct irdma_puda_rsrc *ieq,
+ struct irdma_sc_qp *qp,
+ struct irdma_puda_buf *buf)
+{
+ struct irdma_pfpdu *pfpdu = &qp->pfpdu;
+ u32 *hw_host_ctx = (u32 *)qp->hw_host_ctx;
+ u32 rcv_wnd = hw_host_ctx[23];
+ /* first partial seq # in q2 */
+ u32 fps = *(u32 *)(qp->q2_buf + Q2_FPSN_OFFSET);
+ struct list_head *rxlist = &pfpdu->rxlist;
+ unsigned long flags = 0;
+ u8 hw_rev = qp->dev->hw_attrs.uk_attrs.hw_rev;
+
+ print_hex_dump_debug("IEQ: IEQ RX BUFFER", DUMP_PREFIX_OFFSET, 16, 8,
+ buf->mem.va, buf->totallen, false);
+
+ spin_lock_irqsave(&pfpdu->lock, flags);
+ pfpdu->total_ieq_bufs++;
+ if (pfpdu->mpa_crc_err) {
+ pfpdu->crc_err++;
+ goto error;
+ }
+ if (pfpdu->mode && fps != pfpdu->fps) {
+ /* clean up qp as it is new partial sequence */
+ irdma_ieq_cleanup_qp(ieq, qp);
+ ibdev_dbg(to_ibdev(ieq->dev), "IEQ: restarting new partial\n");
+ pfpdu->mode = false;
+ }
+
+ if (!pfpdu->mode) {
+ print_hex_dump_debug("IEQ: Q2 BUFFER", DUMP_PREFIX_OFFSET, 16,
+ 8, (u64 *)qp->q2_buf, 128, false);
+ /* First_Partial_Sequence_Number check */
+ pfpdu->rcv_nxt = fps;
+ pfpdu->fps = fps;
+ pfpdu->mode = true;
+ pfpdu->max_fpdu_data = (buf->ipv4) ?
+ (ieq->vsi->mtu - IRDMA_MTU_TO_MSS_IPV4) :
+ (ieq->vsi->mtu - IRDMA_MTU_TO_MSS_IPV6);
+ pfpdu->pmode_count++;
+ ieq->pmode_count++;
+ INIT_LIST_HEAD(rxlist);
+ irdma_ieq_check_first_buf(buf, fps);
+ }
+
+ if (!(rcv_wnd >= (buf->seqnum - pfpdu->rcv_nxt))) {
+ pfpdu->bad_seq_num++;
+ ieq->bad_seq_num++;
+ goto error;
+ }
+
+ if (!list_empty(rxlist)) {
+ if (buf->seqnum != pfpdu->nextseqnum) {
+ irdma_send_ieq_ack(qp);
+ /* throw away out-of-order, duplicates*/
+ goto error;
+ }
+ }
+ /* Insert buf before head */
+ list_add_tail(&buf->list, rxlist);
+ pfpdu->nextseqnum = buf->seqnum + buf->datalen;
+ pfpdu->lastrcv_buf = buf;
+ if (hw_rev >= IRDMA_GEN_2 && !pfpdu->ah) {
+ irdma_ieq_create_ah(qp, buf);
+ if (!pfpdu->ah)
+ goto error;
+ goto exit;
+ }
+ if (hw_rev == IRDMA_GEN_1)
+ irdma_ieq_process_fpdus(qp, ieq);
+ else if (pfpdu->ah && pfpdu->ah->ah_info.ah_valid)
+ irdma_ieq_process_fpdus(qp, ieq);
+exit:
+ spin_unlock_irqrestore(&pfpdu->lock, flags);
+
+ return;
+
+error:
+ irdma_puda_ret_bufpool(ieq, buf);
+ spin_unlock_irqrestore(&pfpdu->lock, flags);
+}
+
+/**
+ * irdma_ieq_receive - received exception buffer
+ * @vsi: VSI of device
+ * @buf: exception buffer received
+ */
+static void irdma_ieq_receive(struct irdma_sc_vsi *vsi,
+ struct irdma_puda_buf *buf)
+{
+ struct irdma_puda_rsrc *ieq = vsi->ieq;
+ struct irdma_sc_qp *qp = NULL;
+ u32 wqe_idx = ieq->compl_rxwqe_idx;
+
+ qp = irdma_ieq_get_qp(vsi->dev, buf);
+ if (!qp) {
+ ieq->stats_bad_qp_id++;
+ irdma_puda_ret_bufpool(ieq, buf);
+ } else {
+ irdma_ieq_handle_exception(ieq, qp, buf);
+ }
+ /*
+ * ieq->rx_wqe_idx is used by irdma_puda_replenish_rq()
+ * on which wqe_idx to start replenish rq
+ */
+ if (!ieq->rxq_invalid_cnt)
+ ieq->rx_wqe_idx = wqe_idx;
+ ieq->rxq_invalid_cnt++;
+}
+
+/**
+ * irdma_ieq_tx_compl - put back after sending completed exception buffer
+ * @vsi: sc VSI struct
+ * @sqwrid: pointer to puda buffer
+ */
+static void irdma_ieq_tx_compl(struct irdma_sc_vsi *vsi, void *sqwrid)
+{
+ struct irdma_puda_rsrc *ieq = vsi->ieq;
+ struct irdma_puda_buf *buf = sqwrid;
+
+ irdma_puda_ret_bufpool(ieq, buf);
+}
+
+/**
+ * irdma_ieq_cleanup_qp - qp is being destroyed
+ * @ieq: ieq resource
+ * @qp: all pending fpdu buffers
+ */
+void irdma_ieq_cleanup_qp(struct irdma_puda_rsrc *ieq, struct irdma_sc_qp *qp)
+{
+ struct irdma_puda_buf *buf;
+ struct irdma_pfpdu *pfpdu = &qp->pfpdu;
+ struct list_head *rxlist = &pfpdu->rxlist;
+
+ if (qp->pfpdu.ah) {
+ irdma_puda_free_ah(ieq->dev, qp->pfpdu.ah);
+ qp->pfpdu.ah = NULL;
+ qp->pfpdu.ah_buf = NULL;
+ }
+
+ if (!pfpdu->mode)
+ return;
+
+ while (!list_empty(rxlist)) {
+ buf = irdma_puda_get_listbuf(rxlist);
+ irdma_puda_ret_bufpool(ieq, buf);
+ }
+}
diff --git a/drivers/infiniband/hw/irdma/puda.h b/drivers/infiniband/hw/irdma/puda.h
new file mode 100644
index 000000000000..d65041bee667
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/puda.h
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2020 Intel Corporation */
+#ifndef IRDMA_PUDA_H
+#define IRDMA_PUDA_H
+
+#define IRDMA_IEQ_MPA_FRAMING 6
+#define IRDMA_TCP_OFFSET 40
+#define IRDMA_IPV4_PAD 20
+#define IRDMA_MRK_BLK_SZ 512
+
+enum puda_rsrc_type {
+ IRDMA_PUDA_RSRC_TYPE_ILQ = 1,
+ IRDMA_PUDA_RSRC_TYPE_IEQ,
+ IRDMA_PUDA_RSRC_TYPE_MAX, /* Must be last entry */
+};
+
+enum puda_rsrc_complete {
+ PUDA_CQ_CREATED = 1,
+ PUDA_QP_CREATED,
+ PUDA_TX_COMPLETE,
+ PUDA_RX_COMPLETE,
+ PUDA_HASH_CRC_COMPLETE,
+};
+
+struct irdma_sc_dev;
+struct irdma_sc_qp;
+struct irdma_sc_cq;
+
+struct irdma_puda_cmpl_info {
+ struct irdma_qp_uk *qp;
+ u8 q_type;
+ u8 l3proto;
+ u8 l4proto;
+ u16 vlan;
+ u32 payload_len;
+ u32 compl_error; /* No_err=0, else major and minor err code */
+ u32 qp_id;
+ u32 wqe_idx;
+ bool ipv4:1;
+ bool smac_valid:1;
+ bool vlan_valid:1;
+ u8 smac[ETH_ALEN];
+};
+
+struct irdma_puda_send_info {
+ u64 paddr; /* Physical address */
+ u32 len;
+ u32 ah_id;
+ u8 tcplen;
+ u8 maclen;
+ bool ipv4:1;
+ bool do_lpb:1;
+ void *scratch;
+};
+
+struct irdma_puda_buf {
+ struct list_head list; /* MUST be first entry */
+ struct irdma_dma_mem mem; /* DMA memory for the buffer */
+ struct irdma_puda_buf *next; /* for alloclist in rsrc struct */
+ struct irdma_virt_mem buf_mem; /* Buffer memory for this buffer */
+ void *scratch;
+ u8 *iph;
+ u8 *tcph;
+ u8 *data;
+ u16 datalen;
+ u16 vlan_id;
+ u8 tcphlen; /* tcp length in bytes */
+ u8 maclen; /* mac length in bytes */
+ u32 totallen; /* machlen+iphlen+tcphlen+datalen */
+ refcount_t refcount;
+ u8 hdrlen;
+ bool ipv4:1;
+ bool vlan_valid:1;
+ bool do_lpb:1; /* Loopback buffer */
+ bool smac_valid:1;
+ u32 seqnum;
+ u32 ah_id;
+ u8 smac[ETH_ALEN];
+ struct irdma_sc_vsi *vsi;
+};
+
+struct irdma_puda_rsrc_info {
+ void (*receive)(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *buf);
+ void (*xmit_complete)(struct irdma_sc_vsi *vsi, void *sqwrid);
+ enum puda_rsrc_type type; /* ILQ or IEQ */
+ u32 count;
+ u32 pd_id;
+ u32 cq_id;
+ u32 qp_id;
+ u32 sq_size;
+ u32 rq_size;
+ u32 tx_buf_cnt; /* total bufs allocated will be rq_size + tx_buf_cnt */
+ u16 buf_size;
+ u16 stats_idx;
+ bool stats_idx_valid:1;
+ int abi_ver;
+};
+
+struct irdma_puda_rsrc {
+ struct irdma_sc_cq cq;
+ struct irdma_sc_qp qp;
+ struct irdma_sc_pd sc_pd;
+ struct irdma_sc_dev *dev;
+ struct irdma_sc_vsi *vsi;
+ struct irdma_dma_mem cqmem;
+ struct irdma_dma_mem qpmem;
+ struct irdma_virt_mem ilq_mem;
+ enum puda_rsrc_complete cmpl;
+ enum puda_rsrc_type type;
+ u16 buf_size; /*buf must be max datalen + tcpip hdr + mac */
+ u32 cq_id;
+ u32 qp_id;
+ u32 sq_size;
+ u32 rq_size;
+ u32 cq_size;
+ struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array;
+ u64 *rq_wrid_array;
+ u32 compl_rxwqe_idx;
+ u32 rx_wqe_idx;
+ u32 rxq_invalid_cnt;
+ u32 tx_wqe_avail_cnt;
+ struct list_head txpend;
+ struct list_head bufpool; /* free buffers pool list for recv and xmit */
+ u32 alloc_buf_count;
+ u32 avail_buf_count; /* snapshot of currently available buffers */
+ spinlock_t bufpool_lock;
+ struct irdma_puda_buf *alloclist;
+ void (*receive)(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *buf);
+ void (*xmit_complete)(struct irdma_sc_vsi *vsi, void *sqwrid);
+ /* puda stats */
+ u64 stats_buf_alloc_fail;
+ u64 stats_pkt_rcvd;
+ u64 stats_pkt_sent;
+ u64 stats_rcvd_pkt_err;
+ u64 stats_sent_pkt_q;
+ u64 stats_bad_qp_id;
+ /* IEQ stats */
+ u64 fpdu_processed;
+ u64 bad_seq_num;
+ u64 crc_err;
+ u64 pmode_count;
+ u64 partials_handled;
+ u16 stats_idx;
+ bool check_crc:1;
+ bool stats_idx_valid:1;
+};
+
+struct irdma_puda_buf *irdma_puda_get_bufpool(struct irdma_puda_rsrc *rsrc);
+void irdma_puda_ret_bufpool(struct irdma_puda_rsrc *rsrc,
+ struct irdma_puda_buf *buf);
+void irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc,
+ struct irdma_puda_buf *buf);
+int irdma_puda_send(struct irdma_sc_qp *qp, struct irdma_puda_send_info *info);
+int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi,
+ struct irdma_puda_rsrc_info *info);
+void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type,
+ bool reset);
+int irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq,
+ u32 *compl_err);
+
+struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
+ struct irdma_puda_buf *buf);
+int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
+ struct irdma_puda_buf *buf);
+int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val);
+void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
+void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum);
+int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
+int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq);
+int irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
+void irdma_cqp_cq_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq);
+void irdma_puda_ieq_get_ah_info(struct irdma_sc_qp *qp,
+ struct irdma_ah_info *ah_info);
+int irdma_puda_create_ah(struct irdma_sc_dev *dev,
+ struct irdma_ah_info *ah_info, bool wait,
+ enum puda_rsrc_type type, void *cb_param,
+ struct irdma_sc_ah **ah);
+void irdma_puda_free_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah);
+void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp,
+ struct irdma_puda_rsrc *ieq);
+void irdma_ieq_cleanup_qp(struct irdma_puda_rsrc *ieq, struct irdma_sc_qp *qp);
+#endif /*IRDMA_PROTOS_H */
diff --git a/drivers/infiniband/hw/irdma/trace.c b/drivers/infiniband/hw/irdma/trace.c
new file mode 100644
index 000000000000..fc2f56697741
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/trace.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Intel Corporation */
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+const char *print_ip_addr(struct trace_seq *p, u32 *addr, u16 port, bool ipv4)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ if (ipv4) {
+ __be32 myaddr = htonl(*addr);
+
+ trace_seq_printf(p, "%pI4:%d", &myaddr, htons(port));
+ } else {
+ trace_seq_printf(p, "%pI6:%d", addr, htons(port));
+ }
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+const char *parse_iw_event_type(enum iw_cm_event_type iw_type)
+{
+ switch (iw_type) {
+ case IW_CM_EVENT_CONNECT_REQUEST:
+ return "IwRequest";
+ case IW_CM_EVENT_CONNECT_REPLY:
+ return "IwReply";
+ case IW_CM_EVENT_ESTABLISHED:
+ return "IwEstablished";
+ case IW_CM_EVENT_DISCONNECT:
+ return "IwDisconnect";
+ case IW_CM_EVENT_CLOSE:
+ return "IwClose";
+ }
+
+ return "Unknown";
+}
+
+const char *parse_cm_event_type(enum irdma_cm_event_type cm_type)
+{
+ switch (cm_type) {
+ case IRDMA_CM_EVENT_ESTABLISHED:
+ return "CmEstablished";
+ case IRDMA_CM_EVENT_MPA_REQ:
+ return "CmMPA_REQ";
+ case IRDMA_CM_EVENT_MPA_CONNECT:
+ return "CmMPA_CONNECT";
+ case IRDMA_CM_EVENT_MPA_ACCEPT:
+ return "CmMPA_ACCEPT";
+ case IRDMA_CM_EVENT_MPA_REJECT:
+ return "CmMPA_REJECT";
+ case IRDMA_CM_EVENT_MPA_ESTABLISHED:
+ return "CmMPA_ESTABLISHED";
+ case IRDMA_CM_EVENT_CONNECTED:
+ return "CmConnected";
+ case IRDMA_CM_EVENT_RESET:
+ return "CmReset";
+ case IRDMA_CM_EVENT_ABORTED:
+ return "CmAborted";
+ case IRDMA_CM_EVENT_UNKNOWN:
+ return "none";
+ }
+ return "Unknown";
+}
+
+const char *parse_cm_state(enum irdma_cm_node_state state)
+{
+ switch (state) {
+ case IRDMA_CM_STATE_UNKNOWN:
+ return "UNKNOWN";
+ case IRDMA_CM_STATE_INITED:
+ return "INITED";
+ case IRDMA_CM_STATE_LISTENING:
+ return "LISTENING";
+ case IRDMA_CM_STATE_SYN_RCVD:
+ return "SYN_RCVD";
+ case IRDMA_CM_STATE_SYN_SENT:
+ return "SYN_SENT";
+ case IRDMA_CM_STATE_ONE_SIDE_ESTABLISHED:
+ return "ONE_SIDE_ESTABLISHED";
+ case IRDMA_CM_STATE_ESTABLISHED:
+ return "ESTABLISHED";
+ case IRDMA_CM_STATE_ACCEPTING:
+ return "ACCEPTING";
+ case IRDMA_CM_STATE_MPAREQ_SENT:
+ return "MPAREQ_SENT";
+ case IRDMA_CM_STATE_MPAREQ_RCVD:
+ return "MPAREQ_RCVD";
+ case IRDMA_CM_STATE_MPAREJ_RCVD:
+ return "MPAREJ_RECVD";
+ case IRDMA_CM_STATE_OFFLOADED:
+ return "OFFLOADED";
+ case IRDMA_CM_STATE_FIN_WAIT1:
+ return "FIN_WAIT1";
+ case IRDMA_CM_STATE_FIN_WAIT2:
+ return "FIN_WAIT2";
+ case IRDMA_CM_STATE_CLOSE_WAIT:
+ return "CLOSE_WAIT";
+ case IRDMA_CM_STATE_TIME_WAIT:
+ return "TIME_WAIT";
+ case IRDMA_CM_STATE_LAST_ACK:
+ return "LAST_ACK";
+ case IRDMA_CM_STATE_CLOSING:
+ return "CLOSING";
+ case IRDMA_CM_STATE_LISTENER_DESTROYED:
+ return "LISTENER_DESTROYED";
+ case IRDMA_CM_STATE_CLOSED:
+ return "CLOSED";
+ }
+ return ("Bad state");
+}
diff --git a/drivers/infiniband/hw/irdma/trace.h b/drivers/infiniband/hw/irdma/trace.h
new file mode 100644
index 000000000000..b8085a66b9f8
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/trace.h
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Intel Corporation */
+#include "trace_cm.h"
diff --git a/drivers/infiniband/hw/irdma/trace_cm.h b/drivers/infiniband/hw/irdma/trace_cm.h
new file mode 100644
index 000000000000..0d1699b55241
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/trace_cm.h
@@ -0,0 +1,460 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 - 2021 Intel Corporation */
+#if !defined(__TRACE_CM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __TRACE_CM_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include "main.h"
+
+const char *print_ip_addr(struct trace_seq *p, u32 *addr, u16 port, bool ivp4);
+const char *parse_iw_event_type(enum iw_cm_event_type iw_type);
+const char *parse_cm_event_type(enum irdma_cm_event_type cm_type);
+const char *parse_cm_state(enum irdma_cm_node_state);
+#define __print_ip_addr(addr, port, ipv4) print_ip_addr(p, addr, port, ipv4)
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irdma_cm
+
+TRACE_EVENT(irdma_create_listen,
+ TP_PROTO(struct irdma_device *iwdev, struct irdma_cm_info *cm_info),
+ TP_ARGS(iwdev, cm_info),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __dynamic_array(u32, laddr, 4)
+ __field(u16, lport)
+ __field(bool, ipv4)
+ ),
+ TP_fast_assign(__entry->iwdev = iwdev;
+ __entry->lport = cm_info->loc_port;
+ __entry->ipv4 = cm_info->ipv4;
+ memcpy(__get_dynamic_array(laddr),
+ cm_info->loc_addr, 4);
+ ),
+ TP_printk("iwdev=%p loc: %s",
+ __entry->iwdev,
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4)
+ )
+);
+
+TRACE_EVENT(irdma_dec_refcnt_listen,
+ TP_PROTO(struct irdma_cm_listener *listener, void *caller),
+ TP_ARGS(listener, caller),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(u32, refcnt)
+ __dynamic_array(u32, laddr, 4)
+ __field(u16, lport)
+ __field(bool, ipv4)
+ __field(void *, caller)
+ ),
+ TP_fast_assign(__entry->iwdev = listener->iwdev;
+ __entry->lport = listener->loc_port;
+ __entry->ipv4 = listener->ipv4;
+ memcpy(__get_dynamic_array(laddr),
+ listener->loc_addr, 4);
+ ),
+ TP_printk("iwdev=%p caller=%pS loc: %s",
+ __entry->iwdev,
+ __entry->caller,
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4)
+ )
+);
+
+DECLARE_EVENT_CLASS(listener_template,
+ TP_PROTO(struct irdma_cm_listener *listener),
+ TP_ARGS(listener),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(u16, lport)
+ __field(u16, vlan_id)
+ __field(bool, ipv4)
+ __field(enum irdma_cm_listener_state,
+ state)
+ __dynamic_array(u32, laddr, 4)
+ ),
+ TP_fast_assign(__entry->iwdev = listener->iwdev;
+ __entry->lport = listener->loc_port;
+ __entry->vlan_id = listener->vlan_id;
+ __entry->ipv4 = listener->ipv4;
+ __entry->state = listener->listener_state;
+ memcpy(__get_dynamic_array(laddr),
+ listener->loc_addr, 4);
+ ),
+ TP_printk("iwdev=%p vlan=%d loc: %s",
+ __entry->iwdev,
+ __entry->vlan_id,
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4)
+ )
+);
+
+DEFINE_EVENT(listener_template, irdma_find_listener,
+ TP_PROTO(struct irdma_cm_listener *listener),
+ TP_ARGS(listener));
+
+DEFINE_EVENT(listener_template, irdma_del_multiple_qhash,
+ TP_PROTO(struct irdma_cm_listener *listener),
+ TP_ARGS(listener));
+
+TRACE_EVENT(irdma_negotiate_mpa_v2,
+ TP_PROTO(struct irdma_cm_node *cm_node),
+ TP_ARGS(cm_node),
+ TP_STRUCT__entry(__field(struct irdma_cm_node *, cm_node)
+ __field(u16, ord_size)
+ __field(u16, ird_size)
+ ),
+ TP_fast_assign(__entry->cm_node = cm_node;
+ __entry->ord_size = cm_node->ord_size;
+ __entry->ird_size = cm_node->ird_size;
+ ),
+ TP_printk("MPVA2 Negotiated cm_node=%p ORD:[%d], IRD:[%d]",
+ __entry->cm_node,
+ __entry->ord_size,
+ __entry->ird_size
+ )
+);
+
+DECLARE_EVENT_CLASS(tos_template,
+ TP_PROTO(struct irdma_device *iwdev, u8 tos, u8 user_pri),
+ TP_ARGS(iwdev, tos, user_pri),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(u8, tos)
+ __field(u8, user_pri)
+ ),
+ TP_fast_assign(__entry->iwdev = iwdev;
+ __entry->tos = tos;
+ __entry->user_pri = user_pri;
+ ),
+ TP_printk("iwdev=%p TOS:[%d] UP:[%d]",
+ __entry->iwdev,
+ __entry->tos,
+ __entry->user_pri
+ )
+);
+
+DEFINE_EVENT(tos_template, irdma_listener_tos,
+ TP_PROTO(struct irdma_device *iwdev, u8 tos, u8 user_pri),
+ TP_ARGS(iwdev, tos, user_pri));
+
+DEFINE_EVENT(tos_template, irdma_dcb_tos,
+ TP_PROTO(struct irdma_device *iwdev, u8 tos, u8 user_pri),
+ TP_ARGS(iwdev, tos, user_pri));
+
+DECLARE_EVENT_CLASS(qhash_template,
+ TP_PROTO(struct irdma_device *iwdev,
+ struct irdma_cm_listener *listener,
+ const char *dev_addr),
+ TP_ARGS(iwdev, listener, dev_addr),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(u16, lport)
+ __field(u16, vlan_id)
+ __field(bool, ipv4)
+ __dynamic_array(u32, laddr, 4)
+ __dynamic_array(u32, mac, ETH_ALEN)
+ ),
+ TP_fast_assign(__entry->iwdev = iwdev;
+ __entry->lport = listener->loc_port;
+ __entry->vlan_id = listener->vlan_id;
+ __entry->ipv4 = listener->ipv4;
+ memcpy(__get_dynamic_array(laddr),
+ listener->loc_addr, 4);
+ ether_addr_copy(__get_dynamic_array(mac),
+ dev_addr);
+ ),
+ TP_printk("iwdev=%p vlan=%d MAC=%6phC loc: %s",
+ __entry->iwdev,
+ __entry->vlan_id,
+ __get_dynamic_array(mac),
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4)
+ )
+);
+
+DEFINE_EVENT(qhash_template, irdma_add_mqh_6,
+ TP_PROTO(struct irdma_device *iwdev,
+ struct irdma_cm_listener *listener,
+ const char *dev_addr),
+ TP_ARGS(iwdev, listener, dev_addr));
+
+DEFINE_EVENT(qhash_template, irdma_add_mqh_4,
+ TP_PROTO(struct irdma_device *iwdev,
+ struct irdma_cm_listener *listener,
+ const char *dev_addr),
+ TP_ARGS(iwdev, listener, dev_addr));
+
+TRACE_EVENT(irdma_addr_resolve,
+ TP_PROTO(struct irdma_device *iwdev, char *dev_addr),
+ TP_ARGS(iwdev, dev_addr),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __dynamic_array(u8, mac, ETH_ALEN)
+ ),
+ TP_fast_assign(__entry->iwdev = iwdev;
+ ether_addr_copy(__get_dynamic_array(mac), dev_addr);
+ ),
+ TP_printk("iwdev=%p MAC=%6phC", __entry->iwdev,
+ __get_dynamic_array(mac)
+ )
+);
+
+TRACE_EVENT(irdma_send_cm_event,
+ TP_PROTO(struct irdma_cm_node *cm_node, struct iw_cm_id *cm_id,
+ enum iw_cm_event_type type, int status, void *caller),
+ TP_ARGS(cm_node, cm_id, type, status, caller),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(struct irdma_cm_node *, cm_node)
+ __field(struct iw_cm_id *, cm_id)
+ __field(u32, refcount)
+ __field(u16, lport)
+ __field(u16, rport)
+ __field(enum irdma_cm_node_state, state)
+ __field(bool, ipv4)
+ __field(u16, vlan_id)
+ __field(int, accel)
+ __field(enum iw_cm_event_type, type)
+ __field(int, status)
+ __field(void *, caller)
+ __dynamic_array(u32, laddr, 4)
+ __dynamic_array(u32, raddr, 4)
+ ),
+ TP_fast_assign(__entry->iwdev = cm_node->iwdev;
+ __entry->cm_node = cm_node;
+ __entry->cm_id = cm_id;
+ __entry->refcount = refcount_read(&cm_node->refcnt);
+ __entry->state = cm_node->state;
+ __entry->lport = cm_node->loc_port;
+ __entry->rport = cm_node->rem_port;
+ __entry->ipv4 = cm_node->ipv4;
+ __entry->vlan_id = cm_node->vlan_id;
+ __entry->accel = cm_node->accelerated;
+ __entry->type = type;
+ __entry->status = status;
+ __entry->caller = caller;
+ memcpy(__get_dynamic_array(laddr),
+ cm_node->loc_addr, 4);
+ memcpy(__get_dynamic_array(raddr),
+ cm_node->rem_addr, 4);
+ ),
+ TP_printk("iwdev=%p caller=%pS cm_id=%p node=%p refcnt=%d vlan_id=%d accel=%d state=%s event_type=%s status=%d loc: %s rem: %s",
+ __entry->iwdev,
+ __entry->caller,
+ __entry->cm_id,
+ __entry->cm_node,
+ __entry->refcount,
+ __entry->vlan_id,
+ __entry->accel,
+ parse_cm_state(__entry->state),
+ parse_iw_event_type(__entry->type),
+ __entry->status,
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4),
+ __print_ip_addr(__get_dynamic_array(raddr),
+ __entry->rport, __entry->ipv4)
+ )
+);
+
+TRACE_EVENT(irdma_send_cm_event_no_node,
+ TP_PROTO(struct iw_cm_id *cm_id, enum iw_cm_event_type type,
+ int status, void *caller),
+ TP_ARGS(cm_id, type, status, caller),
+ TP_STRUCT__entry(__field(struct iw_cm_id *, cm_id)
+ __field(enum iw_cm_event_type, type)
+ __field(int, status)
+ __field(void *, caller)
+ ),
+ TP_fast_assign(__entry->cm_id = cm_id;
+ __entry->type = type;
+ __entry->status = status;
+ __entry->caller = caller;
+ ),
+ TP_printk("cm_id=%p caller=%pS event_type=%s status=%d",
+ __entry->cm_id,
+ __entry->caller,
+ parse_iw_event_type(__entry->type),
+ __entry->status
+ )
+);
+
+DECLARE_EVENT_CLASS(cm_node_template,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(struct irdma_cm_node *, cm_node)
+ __field(u32, refcount)
+ __field(u16, lport)
+ __field(u16, rport)
+ __field(enum irdma_cm_node_state, state)
+ __field(bool, ipv4)
+ __field(u16, vlan_id)
+ __field(int, accel)
+ __field(enum irdma_cm_event_type, type)
+ __field(void *, caller)
+ __dynamic_array(u32, laddr, 4)
+ __dynamic_array(u32, raddr, 4)
+ ),
+ TP_fast_assign(__entry->iwdev = cm_node->iwdev;
+ __entry->cm_node = cm_node;
+ __entry->refcount = refcount_read(&cm_node->refcnt);
+ __entry->state = cm_node->state;
+ __entry->lport = cm_node->loc_port;
+ __entry->rport = cm_node->rem_port;
+ __entry->ipv4 = cm_node->ipv4;
+ __entry->vlan_id = cm_node->vlan_id;
+ __entry->accel = cm_node->accelerated;
+ __entry->type = type;
+ __entry->caller = caller;
+ memcpy(__get_dynamic_array(laddr),
+ cm_node->loc_addr, 4);
+ memcpy(__get_dynamic_array(raddr),
+ cm_node->rem_addr, 4);
+ ),
+ TP_printk("iwdev=%p caller=%pS node=%p refcnt=%d vlan_id=%d accel=%d state=%s event_type=%s loc: %s rem: %s",
+ __entry->iwdev,
+ __entry->caller,
+ __entry->cm_node,
+ __entry->refcount,
+ __entry->vlan_id,
+ __entry->accel,
+ parse_cm_state(__entry->state),
+ parse_cm_event_type(__entry->type),
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4),
+ __print_ip_addr(__get_dynamic_array(raddr),
+ __entry->rport, __entry->ipv4)
+ )
+);
+
+DEFINE_EVENT(cm_node_template, irdma_create_event,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_accept,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_connect,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_reject,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_find_node,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_send_reset,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_rem_ref_cm_node,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+DEFINE_EVENT(cm_node_template, irdma_cm_event_handler,
+ TP_PROTO(struct irdma_cm_node *cm_node,
+ enum irdma_cm_event_type type, void *caller),
+ TP_ARGS(cm_node, type, caller));
+
+TRACE_EVENT(open_err_template,
+ TP_PROTO(struct irdma_cm_node *cm_node, bool reset, void *caller),
+ TP_ARGS(cm_node, reset, caller),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(struct irdma_cm_node *, cm_node)
+ __field(enum irdma_cm_node_state, state)
+ __field(bool, reset)
+ __field(void *, caller)
+ ),
+ TP_fast_assign(__entry->iwdev = cm_node->iwdev;
+ __entry->cm_node = cm_node;
+ __entry->state = cm_node->state;
+ __entry->reset = reset;
+ __entry->caller = caller;
+ ),
+ TP_printk("iwdev=%p caller=%pS node%p reset=%d state=%s",
+ __entry->iwdev,
+ __entry->caller,
+ __entry->cm_node,
+ __entry->reset,
+ parse_cm_state(__entry->state)
+ )
+);
+
+DEFINE_EVENT(open_err_template, irdma_active_open_err,
+ TP_PROTO(struct irdma_cm_node *cm_node, bool reset, void *caller),
+ TP_ARGS(cm_node, reset, caller));
+
+DEFINE_EVENT(open_err_template, irdma_passive_open_err,
+ TP_PROTO(struct irdma_cm_node *cm_node, bool reset, void *caller),
+ TP_ARGS(cm_node, reset, caller));
+
+DECLARE_EVENT_CLASS(cm_node_ah_template,
+ TP_PROTO(struct irdma_cm_node *cm_node),
+ TP_ARGS(cm_node),
+ TP_STRUCT__entry(__field(struct irdma_device *, iwdev)
+ __field(struct irdma_cm_node *, cm_node)
+ __field(struct irdma_sc_ah *, ah)
+ __field(u32, refcount)
+ __field(u16, lport)
+ __field(u16, rport)
+ __field(enum irdma_cm_node_state, state)
+ __field(bool, ipv4)
+ __field(u16, vlan_id)
+ __field(int, accel)
+ __dynamic_array(u32, laddr, 4)
+ __dynamic_array(u32, raddr, 4)
+ ),
+ TP_fast_assign(__entry->iwdev = cm_node->iwdev;
+ __entry->cm_node = cm_node;
+ __entry->ah = cm_node->ah;
+ __entry->refcount = refcount_read(&cm_node->refcnt);
+ __entry->lport = cm_node->loc_port;
+ __entry->rport = cm_node->rem_port;
+ __entry->state = cm_node->state;
+ __entry->ipv4 = cm_node->ipv4;
+ __entry->vlan_id = cm_node->vlan_id;
+ __entry->accel = cm_node->accelerated;
+ memcpy(__get_dynamic_array(laddr),
+ cm_node->loc_addr, 4);
+ memcpy(__get_dynamic_array(raddr),
+ cm_node->rem_addr, 4);
+ ),
+ TP_printk("iwdev=%p node=%p ah=%p refcnt=%d vlan_id=%d accel=%d state=%s loc: %s rem: %s",
+ __entry->iwdev,
+ __entry->cm_node,
+ __entry->ah,
+ __entry->refcount,
+ __entry->vlan_id,
+ __entry->accel,
+ parse_cm_state(__entry->state),
+ __print_ip_addr(__get_dynamic_array(laddr),
+ __entry->lport, __entry->ipv4),
+ __print_ip_addr(__get_dynamic_array(raddr),
+ __entry->rport, __entry->ipv4)
+ )
+);
+
+DEFINE_EVENT(cm_node_ah_template, irdma_cm_free_ah,
+ TP_PROTO(struct irdma_cm_node *cm_node),
+ TP_ARGS(cm_node));
+
+DEFINE_EVENT(cm_node_ah_template, irdma_create_ah,
+ TP_PROTO(struct irdma_cm_node *cm_node),
+ TP_ARGS(cm_node));
+
+#endif /* __TRACE_CM_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_cm
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h
new file mode 100644
index 000000000000..cab4896640a1
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/type.h
@@ -0,0 +1,1674 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#ifndef IRDMA_TYPE_H
+#define IRDMA_TYPE_H
+#include "osdep.h"
+#include "irdma.h"
+#include "user.h"
+#include "hmc.h"
+#include "uda.h"
+#include "ws.h"
+#include "virtchnl.h"
+
+#define IRDMA_DEBUG_ERR "ERR"
+#define IRDMA_DEBUG_INIT "INIT"
+#define IRDMA_DEBUG_DEV "DEV"
+#define IRDMA_DEBUG_CM "CM"
+#define IRDMA_DEBUG_VERBS "VERBS"
+#define IRDMA_DEBUG_PUDA "PUDA"
+#define IRDMA_DEBUG_ILQ "ILQ"
+#define IRDMA_DEBUG_IEQ "IEQ"
+#define IRDMA_DEBUG_QP "QP"
+#define IRDMA_DEBUG_CQ "CQ"
+#define IRDMA_DEBUG_MR "MR"
+#define IRDMA_DEBUG_PBLE "PBLE"
+#define IRDMA_DEBUG_WQE "WQE"
+#define IRDMA_DEBUG_AEQ "AEQ"
+#define IRDMA_DEBUG_CQP "CQP"
+#define IRDMA_DEBUG_HMC "HMC"
+#define IRDMA_DEBUG_USER "USER"
+#define IRDMA_DEBUG_VIRT "VIRT"
+#define IRDMA_DEBUG_DCB "DCB"
+#define IRDMA_DEBUG_CQE "CQE"
+#define IRDMA_DEBUG_CLNT "CLNT"
+#define IRDMA_DEBUG_WS "WS"
+#define IRDMA_DEBUG_STATS "STATS"
+
+enum irdma_page_size {
+ IRDMA_PAGE_SIZE_4K = 0,
+ IRDMA_PAGE_SIZE_2M,
+ IRDMA_PAGE_SIZE_1G,
+};
+
+enum irdma_hdrct_flags {
+ DDP_LEN_FLAG = 0x80,
+ DDP_HDR_FLAG = 0x40,
+ RDMA_HDR_FLAG = 0x20,
+};
+
+enum irdma_term_layers {
+ LAYER_RDMA = 0,
+ LAYER_DDP = 1,
+ LAYER_MPA = 2,
+};
+
+enum irdma_term_error_types {
+ RDMAP_REMOTE_PROT = 1,
+ RDMAP_REMOTE_OP = 2,
+ DDP_CATASTROPHIC = 0,
+ DDP_TAGGED_BUF = 1,
+ DDP_UNTAGGED_BUF = 2,
+ DDP_LLP = 3,
+};
+
+enum irdma_term_rdma_errors {
+ RDMAP_INV_STAG = 0x00,
+ RDMAP_INV_BOUNDS = 0x01,
+ RDMAP_ACCESS = 0x02,
+ RDMAP_UNASSOC_STAG = 0x03,
+ RDMAP_TO_WRAP = 0x04,
+ RDMAP_INV_RDMAP_VER = 0x05,
+ RDMAP_UNEXPECTED_OP = 0x06,
+ RDMAP_CATASTROPHIC_LOCAL = 0x07,
+ RDMAP_CATASTROPHIC_GLOBAL = 0x08,
+ RDMAP_CANT_INV_STAG = 0x09,
+ RDMAP_UNSPECIFIED = 0xff,
+};
+
+enum irdma_term_ddp_errors {
+ DDP_CATASTROPHIC_LOCAL = 0x00,
+ DDP_TAGGED_INV_STAG = 0x00,
+ DDP_TAGGED_BOUNDS = 0x01,
+ DDP_TAGGED_UNASSOC_STAG = 0x02,
+ DDP_TAGGED_TO_WRAP = 0x03,
+ DDP_TAGGED_INV_DDP_VER = 0x04,
+ DDP_UNTAGGED_INV_QN = 0x01,
+ DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02,
+ DDP_UNTAGGED_INV_MSN_RANGE = 0x03,
+ DDP_UNTAGGED_INV_MO = 0x04,
+ DDP_UNTAGGED_INV_TOO_LONG = 0x05,
+ DDP_UNTAGGED_INV_DDP_VER = 0x06,
+};
+
+enum irdma_term_mpa_errors {
+ MPA_CLOSED = 0x01,
+ MPA_CRC = 0x02,
+ MPA_MARKER = 0x03,
+ MPA_REQ_RSP = 0x04,
+};
+
+enum irdma_hw_stats_index {
+ /* gen1 - 32-bit */
+ IRDMA_HW_STAT_INDEX_IP4RXDISCARD = 0,
+ IRDMA_HW_STAT_INDEX_IP4RXTRUNC = 1,
+ IRDMA_HW_STAT_INDEX_IP4TXNOROUTE = 2,
+ IRDMA_HW_STAT_INDEX_IP6RXDISCARD = 3,
+ IRDMA_HW_STAT_INDEX_IP6RXTRUNC = 4,
+ IRDMA_HW_STAT_INDEX_IP6TXNOROUTE = 5,
+ IRDMA_HW_STAT_INDEX_TCPRTXSEG = 6,
+ IRDMA_HW_STAT_INDEX_TCPRXOPTERR = 7,
+ IRDMA_HW_STAT_INDEX_TCPRXPROTOERR = 8,
+ IRDMA_HW_STAT_INDEX_RXVLANERR = 9,
+ /* gen1 - 64-bit */
+ IRDMA_HW_STAT_INDEX_IP4RXOCTS = 10,
+ IRDMA_HW_STAT_INDEX_IP4RXPKTS = 11,
+ IRDMA_HW_STAT_INDEX_IP4RXFRAGS = 12,
+ IRDMA_HW_STAT_INDEX_IP4RXMCPKTS = 13,
+ IRDMA_HW_STAT_INDEX_IP4TXOCTS = 14,
+ IRDMA_HW_STAT_INDEX_IP4TXPKTS = 15,
+ IRDMA_HW_STAT_INDEX_IP4TXFRAGS = 16,
+ IRDMA_HW_STAT_INDEX_IP4TXMCPKTS = 17,
+ IRDMA_HW_STAT_INDEX_IP6RXOCTS = 18,
+ IRDMA_HW_STAT_INDEX_IP6RXPKTS = 19,
+ IRDMA_HW_STAT_INDEX_IP6RXFRAGS = 20,
+ IRDMA_HW_STAT_INDEX_IP6RXMCPKTS = 21,
+ IRDMA_HW_STAT_INDEX_IP6TXOCTS = 22,
+ IRDMA_HW_STAT_INDEX_IP6TXPKTS = 23,
+ IRDMA_HW_STAT_INDEX_IP6TXFRAGS = 24,
+ IRDMA_HW_STAT_INDEX_IP6TXMCPKTS = 25,
+ IRDMA_HW_STAT_INDEX_TCPRXSEGS = 26,
+ IRDMA_HW_STAT_INDEX_TCPTXSEG = 27,
+ IRDMA_HW_STAT_INDEX_RDMARXRDS = 28,
+ IRDMA_HW_STAT_INDEX_RDMARXSNDS = 29,
+ IRDMA_HW_STAT_INDEX_RDMARXWRS = 30,
+ IRDMA_HW_STAT_INDEX_RDMATXRDS = 31,
+ IRDMA_HW_STAT_INDEX_RDMATXSNDS = 32,
+ IRDMA_HW_STAT_INDEX_RDMATXWRS = 33,
+ IRDMA_HW_STAT_INDEX_RDMAVBND = 34,
+ IRDMA_HW_STAT_INDEX_RDMAVINV = 35,
+ IRDMA_HW_STAT_INDEX_IP4RXMCOCTS = 36,
+ IRDMA_HW_STAT_INDEX_IP4TXMCOCTS = 37,
+ IRDMA_HW_STAT_INDEX_IP6RXMCOCTS = 38,
+ IRDMA_HW_STAT_INDEX_IP6TXMCOCTS = 39,
+ IRDMA_HW_STAT_INDEX_UDPRXPKTS = 40,
+ IRDMA_HW_STAT_INDEX_UDPTXPKTS = 41,
+ IRDMA_HW_STAT_INDEX_MAX_GEN_1 = 42, /* Must be same value as next entry */
+ /* gen2 - 64-bit */
+ IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS = 42,
+ /* gen2 - 32-bit */
+ IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED = 43,
+ IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED = 44,
+ IRDMA_HW_STAT_INDEX_TXNPCNPSENT = 45,
+ IRDMA_HW_STAT_INDEX_MAX_GEN_2 = 46,
+
+ /* gen3 */
+ IRDMA_HW_STAT_INDEX_RNR_SENT = 46,
+ IRDMA_HW_STAT_INDEX_RNR_RCVD = 47,
+ IRDMA_HW_STAT_INDEX_RDMAORDLMTCNT = 48,
+ IRDMA_HW_STAT_INDEX_RDMAIRDLMTCNT = 49,
+ IRDMA_HW_STAT_INDEX_RDMARXATS = 50,
+ IRDMA_HW_STAT_INDEX_RDMATXATS = 51,
+ IRDMA_HW_STAT_INDEX_NAKSEQERR = 52,
+ IRDMA_HW_STAT_INDEX_NAKSEQERR_IMPLIED = 53,
+ IRDMA_HW_STAT_INDEX_RTO = 54,
+ IRDMA_HW_STAT_INDEX_RXOOOPKTS = 55,
+ IRDMA_HW_STAT_INDEX_ICRCERR = 56,
+
+ IRDMA_HW_STAT_INDEX_MAX_GEN_3 = 57,
+};
+
+enum irdma_feature_type {
+ IRDMA_FEATURE_FW_INFO = 0,
+ IRDMA_HW_VERSION_INFO = 1,
+ IRDMA_QP_MAX_INCR = 2,
+ IRDMA_CQ_MAX_INCR = 3,
+ IRDMA_CEQ_MAX_INCR = 4,
+ IRDMA_SD_MAX_INCR = 5,
+ IRDMA_MR_MAX_INCR = 6,
+ IRDMA_Q1_MAX_INCR = 7,
+ IRDMA_AH_MAX_INCR = 8,
+ IRDMA_SRQ_MAX_INCR = 9,
+ IRDMA_TIMER_MAX_INCR = 10,
+ IRDMA_XF_MAX_INCR = 11,
+ IRDMA_RRF_MAX_INCR = 12,
+ IRDMA_PBLE_MAX_INCR = 13,
+ IRDMA_OBJ_1 = 22,
+ IRDMA_OBJ_2 = 23,
+ IRDMA_ENDPT_TRK = 24,
+ IRDMA_FTN_INLINE_MAX = 25,
+ IRDMA_QSETS_MAX = 26,
+ IRDMA_ASO = 27,
+ IRDMA_FTN_FLAGS = 32,
+ IRDMA_FTN_NOP = 33,
+ IRDMA_MAX_FEATURES, /* Must be last entry */
+};
+
+enum irdma_sched_prio_type {
+ IRDMA_PRIO_WEIGHTED_RR = 1,
+ IRDMA_PRIO_STRICT = 2,
+ IRDMA_PRIO_WEIGHTED_STRICT = 3,
+};
+
+enum irdma_vm_vf_type {
+ IRDMA_VF_TYPE = 0,
+ IRDMA_VM_TYPE,
+ IRDMA_PF_TYPE,
+};
+
+enum irdma_cqp_hmc_profile {
+ IRDMA_HMC_PROFILE_DEFAULT = 1,
+ IRDMA_HMC_PROFILE_FAVOR_VF = 2,
+ IRDMA_HMC_PROFILE_EQUAL = 3,
+};
+
+enum irdma_quad_entry_type {
+ IRDMA_QHASH_TYPE_TCP_ESTABLISHED = 1,
+ IRDMA_QHASH_TYPE_TCP_SYN,
+ IRDMA_QHASH_TYPE_UDP_UNICAST,
+ IRDMA_QHASH_TYPE_UDP_MCAST,
+ IRDMA_QHASH_TYPE_ROCE_MCAST,
+ IRDMA_QHASH_TYPE_ROCEV2_HW,
+};
+
+enum irdma_quad_hash_manage_type {
+ IRDMA_QHASH_MANAGE_TYPE_DELETE = 0,
+ IRDMA_QHASH_MANAGE_TYPE_ADD,
+ IRDMA_QHASH_MANAGE_TYPE_MODIFY,
+};
+
+enum irdma_syn_rst_handling {
+ IRDMA_SYN_RST_HANDLING_HW_TCP_SECURE = 0,
+ IRDMA_SYN_RST_HANDLING_HW_TCP,
+ IRDMA_SYN_RST_HANDLING_FW_TCP_SECURE,
+ IRDMA_SYN_RST_HANDLING_FW_TCP,
+};
+
+enum irdma_queue_type {
+ IRDMA_QUEUE_TYPE_SQ_RQ = 0,
+ IRDMA_QUEUE_TYPE_CQP,
+ IRDMA_QUEUE_TYPE_SRQ,
+};
+
+struct irdma_sc_dev;
+struct irdma_vsi_pestat;
+
+struct irdma_dcqcn_cc_params {
+ u8 cc_cfg_valid;
+ u8 min_dec_factor;
+ u8 min_rate;
+ u8 dcqcn_f;
+ u16 rai_factor;
+ u16 hai_factor;
+ u16 dcqcn_t;
+ u32 dcqcn_b;
+ u32 rreduce_mperiod;
+};
+
+struct irdma_cqp_init_info {
+ u64 cqp_compl_ctx;
+ u64 host_ctx_pa;
+ u64 sq_pa;
+ struct irdma_sc_dev *dev;
+ struct irdma_cqp_quanta *sq;
+ struct irdma_dcqcn_cc_params dcqcn_params;
+ __le64 *host_ctx;
+ u64 *scratch_array;
+ u32 sq_size;
+ struct irdma_ooo_cqp_op *ooo_op_array;
+ u32 pe_en_vf_cnt;
+ u16 hw_maj_ver;
+ u16 hw_min_ver;
+ u8 struct_ver;
+ u8 hmc_profile;
+ u8 ena_vf_count;
+ u8 ceqs_per_vf;
+ u8 ooisc_blksize;
+ u8 rrsp_blksize;
+ u8 q1_blksize;
+ u8 xmit_blksize;
+ u8 ts_override;
+ u8 ts_shift;
+ u8 en_fine_grained_timers;
+ u8 blksizes_valid;
+ bool en_datacenter_tcp:1;
+ bool disable_packed:1;
+ bool rocev2_rto_policy:1;
+ enum irdma_protocol_used protocol_used;
+};
+
+struct irdma_terminate_hdr {
+ u8 layer_etype;
+ u8 error_code;
+ u8 hdrct;
+ u8 rsvd;
+};
+
+struct irdma_cqp_sq_wqe {
+ __le64 buf[IRDMA_CQP_WQE_SIZE];
+};
+
+struct irdma_sc_aeqe {
+ __le64 buf[IRDMA_AEQE_SIZE];
+};
+
+struct irdma_ceqe {
+ __le64 buf[IRDMA_CEQE_SIZE];
+};
+
+struct irdma_cqp_ctx {
+ __le64 buf[IRDMA_CQP_CTX_SIZE];
+};
+
+struct irdma_cq_shadow_area {
+ __le64 buf[IRDMA_SHADOW_AREA_SIZE];
+};
+
+struct irdma_dev_hw_stats_offsets {
+ u32 stats_offset[IRDMA_HW_STAT_INDEX_MAX_GEN_1];
+};
+
+struct irdma_dev_hw_stats {
+ u64 stats_val[IRDMA_GATHER_STATS_BUF_SIZE / sizeof(u64)];
+};
+
+struct irdma_gather_stats {
+ u64 val[IRDMA_GATHER_STATS_BUF_SIZE / sizeof(u64)];
+};
+
+struct irdma_hw_stat_map {
+ u16 byteoff;
+ u8 bitoff;
+ u64 bitmask;
+};
+
+struct irdma_stats_gather_info {
+ bool use_hmc_fcn_index:1;
+ bool use_stats_inst:1;
+ u8 hmc_fcn_index;
+ u8 stats_inst_index;
+ struct irdma_dma_mem stats_buff_mem;
+ void *gather_stats_va;
+ void *last_gather_stats_va;
+};
+
+struct irdma_vsi_pestat {
+ struct irdma_hw *hw;
+ struct irdma_dev_hw_stats hw_stats;
+ struct irdma_stats_gather_info gather_info;
+ struct timer_list stats_timer;
+ struct irdma_sc_vsi *vsi;
+ struct irdma_dev_hw_stats last_hw_stats;
+ spinlock_t lock; /* rdma stats lock */
+};
+
+struct irdma_mmio_region {
+ u8 __iomem *addr;
+ resource_size_t len;
+ resource_size_t offset;
+};
+
+struct irdma_hw {
+ union {
+ u8 __iomem *hw_addr;
+ struct {
+ struct irdma_mmio_region rdma_reg; /* RDMA region */
+ struct irdma_mmio_region *io_regs; /* Non-RDMA MMIO regions */
+ u16 num_io_regions; /* Number of Non-RDMA MMIO regions */
+ };
+ };
+ struct device *device;
+ struct irdma_hmc_info hmc;
+};
+
+struct irdma_pfpdu {
+ struct list_head rxlist;
+ u32 rcv_nxt;
+ u32 fps;
+ u32 max_fpdu_data;
+ u32 nextseqnum;
+ u32 rcv_start_seq;
+ bool mode:1;
+ bool mpa_crc_err:1;
+ u8 marker_len;
+ u64 total_ieq_bufs;
+ u64 fpdu_processed;
+ u64 bad_seq_num;
+ u64 crc_err;
+ u64 no_tx_bufs;
+ u64 tx_err;
+ u64 out_of_order;
+ u64 pmode_count;
+ struct irdma_sc_ah *ah;
+ struct irdma_puda_buf *ah_buf;
+ spinlock_t lock; /* fpdu processing lock */
+ struct irdma_puda_buf *lastrcv_buf;
+};
+
+struct irdma_sc_pd {
+ struct irdma_sc_dev *dev;
+ u32 pd_id;
+ int abi_ver;
+};
+
+struct irdma_cqp_quanta {
+ __le64 elem[IRDMA_CQP_WQE_SIZE];
+};
+
+struct irdma_ooo_cqp_op {
+ struct list_head list_entry;
+ u64 scratch;
+ u32 def_info;
+ u32 sw_def_info;
+ u32 wqe_idx;
+ bool deferred:1;
+};
+
+struct irdma_sc_cqp {
+ spinlock_t ooo_list_lock; /* protects list of pending completions */
+ struct list_head ooo_avail;
+ struct list_head ooo_pnd;
+ u32 last_def_cmpl_ticket;
+ u32 sw_def_cmpl_ticket;
+ u32 size;
+ u64 sq_pa;
+ u64 host_ctx_pa;
+ void *back_cqp;
+ struct irdma_sc_dev *dev;
+ int (*process_cqp_sds)(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *info);
+ struct irdma_dma_mem sdbuf;
+ struct irdma_ring sq_ring;
+ struct irdma_cqp_quanta *sq_base;
+ struct irdma_dcqcn_cc_params dcqcn_params;
+ __le64 *host_ctx;
+ u64 *scratch_array;
+ u64 requested_ops;
+ atomic64_t completed_ops;
+ struct irdma_ooo_cqp_op *ooo_op_array;
+ u32 cqp_id;
+ u32 sq_size;
+ u32 pe_en_vf_cnt;
+ u32 hw_sq_size;
+ u16 hw_maj_ver;
+ u16 hw_min_ver;
+ u8 struct_ver;
+ u8 polarity;
+ u8 hmc_profile;
+ u8 ena_vf_count;
+ u8 timeout_count;
+ u8 ceqs_per_vf;
+ u8 ooisc_blksize;
+ u8 rrsp_blksize;
+ u8 q1_blksize;
+ u8 xmit_blksize;
+ u8 ts_override;
+ u8 ts_shift;
+ u8 en_fine_grained_timers;
+ u8 blksizes_valid;
+ bool en_datacenter_tcp:1;
+ bool disable_packed:1;
+ bool rocev2_rto_policy:1;
+ enum irdma_protocol_used protocol_used;
+};
+
+struct irdma_sc_aeq {
+ u32 size;
+ u64 aeq_elem_pa;
+ struct irdma_sc_dev *dev;
+ struct irdma_sc_aeqe *aeqe_base;
+ void *pbl_list;
+ u32 elem_cnt;
+ struct irdma_ring aeq_ring;
+ u8 pbl_chunk_size;
+ u32 first_pm_pbl_idx;
+ u32 msix_idx;
+ u8 polarity;
+ bool virtual_map:1;
+ bool pasid_valid:1;
+ u32 pasid;
+};
+
+struct irdma_sc_ceq {
+ u32 size;
+ u64 ceq_elem_pa;
+ struct irdma_sc_dev *dev;
+ struct irdma_ceqe *ceqe_base;
+ void *pbl_list;
+ u32 ceq_id;
+ u32 elem_cnt;
+ struct irdma_ring ceq_ring;
+ u8 pbl_chunk_size;
+ u8 tph_val;
+ u32 first_pm_pbl_idx;
+ u8 polarity;
+ u16 vsi_idx;
+ bool virtual_map:1;
+ bool tph_en:1;
+ bool itr_no_expire:1;
+ bool pasid_valid:1;
+ u32 pasid;
+};
+
+struct irdma_sc_cq {
+ struct irdma_cq_uk cq_uk;
+ u64 cq_pa;
+ u64 shadow_area_pa;
+ struct irdma_sc_dev *dev;
+ u16 vsi_idx;
+ struct irdma_sc_vsi *vsi;
+ void *pbl_list;
+ void *back_cq;
+ u32 ceq_id;
+ u32 shadow_read_threshold;
+ u8 pbl_chunk_size;
+ u8 cq_type;
+ u8 tph_val;
+ u32 first_pm_pbl_idx;
+ bool ceqe_mask:1;
+ bool virtual_map:1;
+ bool check_overflow:1;
+ bool ceq_id_valid:1;
+ bool tph_en;
+};
+
+struct irdma_sc_qp {
+ struct irdma_qp_uk qp_uk;
+ u64 sq_pa;
+ u64 rq_pa;
+ u64 hw_host_ctx_pa;
+ u64 shadow_area_pa;
+ u64 q2_pa;
+ struct irdma_sc_dev *dev;
+ struct irdma_sc_vsi *vsi;
+ struct irdma_sc_pd *pd;
+ __le64 *hw_host_ctx;
+ void *llp_stream_handle;
+ struct irdma_pfpdu pfpdu;
+ u32 ieq_qp;
+ u8 *q2_buf;
+ u64 qp_compl_ctx;
+ u32 push_idx;
+ u16 qs_handle;
+ u16 push_offset;
+ u8 flush_wqes_count;
+ u8 sq_tph_val;
+ u8 rq_tph_val;
+ u8 qp_state;
+ u8 hw_sq_size;
+ u8 hw_rq_size;
+ u8 src_mac_addr_idx;
+ bool on_qoslist:1;
+ bool ieq_pass_thru:1;
+ bool sq_tph_en:1;
+ bool rq_tph_en:1;
+ bool rcv_tph_en:1;
+ bool xmit_tph_en:1;
+ bool virtual_map:1;
+ bool flush_sq:1;
+ bool flush_rq:1;
+ bool err_sq_idx_valid:1;
+ bool err_rq_idx_valid:1;
+ u32 err_sq_idx;
+ u32 err_rq_idx;
+ bool sq_flush_code:1;
+ bool rq_flush_code:1;
+ u32 pkt_limit;
+ enum irdma_flush_opcode flush_code;
+ enum irdma_qp_event_type event_type;
+ u8 term_flags;
+ u8 user_pri;
+ struct list_head list;
+};
+
+struct irdma_stats_inst_info {
+ bool use_hmc_fcn_index;
+ u8 hmc_fn_id;
+ u16 stats_idx;
+};
+
+struct irdma_up_info {
+ u8 map[8];
+ u8 cnp_up_override;
+ u16 hmc_fcn_idx;
+ bool use_vlan:1;
+ bool use_cnp_up_override:1;
+};
+
+#define IRDMA_MAX_WS_NODES 0x3FF
+#define IRDMA_WS_NODE_INVALID 0xFFFF
+
+struct irdma_ws_node_info {
+ u16 id;
+ u16 vsi;
+ u16 parent_id;
+ u16 qs_handle;
+ bool type_leaf:1;
+ bool enable:1;
+ u8 prio_type;
+ u8 tc;
+ u8 weight;
+};
+
+struct irdma_hmc_fpm_misc {
+ u32 max_ceqs;
+ u32 max_sds;
+ u32 loc_mem_pages;
+ u8 ird;
+ u32 xf_block_size;
+ u32 q1_block_size;
+ u32 ht_multiplier;
+ u32 timer_bucket;
+ u32 rrf_block_size;
+ u32 ooiscf_block_size;
+};
+
+#define IRDMA_VCHNL_MAX_MSG_SIZE 512
+#define IRDMA_LEAF_DEFAULT_REL_BW 64
+#define IRDMA_PARENT_DEFAULT_REL_BW 1
+
+struct irdma_qos {
+ struct list_head qplist;
+ struct mutex qos_mutex; /* protect QoS attributes per QoS level */
+ u64 lan_qos_handle;
+ u32 l2_sched_node_id;
+ u16 qs_handle;
+ u8 traffic_class;
+ u8 rel_bw;
+ u8 prio_type;
+ bool valid;
+};
+
+#define IRDMA_INVALID_STATS_IDX 0xff
+struct irdma_sc_vsi {
+ u16 vsi_idx;
+ struct irdma_sc_dev *dev;
+ void *back_vsi;
+ u32 ilq_count;
+ struct irdma_virt_mem ilq_mem;
+ struct irdma_puda_rsrc *ilq;
+ u32 ieq_count;
+ struct irdma_virt_mem ieq_mem;
+ struct irdma_puda_rsrc *ieq;
+ u32 exception_lan_q;
+ u16 mtu;
+ u16 vm_id;
+ enum irdma_vm_vf_type vm_vf_type;
+ bool stats_inst_alloc:1;
+ bool tc_change_pending:1;
+ struct irdma_vsi_pestat *pestat;
+ atomic_t qp_suspend_reqs;
+ int (*register_qset)(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
+ void (*unregister_qset)(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
+ u8 qos_rel_bw;
+ u8 qos_prio_type;
+ u8 stats_idx;
+ u8 dscp_map[DSCP_MAX];
+ struct irdma_qos qos[IRDMA_MAX_USER_PRIORITY];
+ u64 hw_stats_regs[IRDMA_HW_STAT_INDEX_MAX_GEN_1];
+ bool dscp_mode:1;
+};
+
+struct irdma_sc_dev {
+ struct list_head cqp_cmd_head; /* head of the CQP command list */
+ spinlock_t cqp_lock; /* protect CQP list access */
+ bool stats_idx_array[IRDMA_MAX_STATS_COUNT_GEN_1];
+ struct irdma_dma_mem vf_fpm_query_buf[IRDMA_MAX_PE_ENA_VF_COUNT];
+ u64 fpm_query_buf_pa;
+ u64 fpm_commit_buf_pa;
+ __le64 *fpm_query_buf;
+ __le64 *fpm_commit_buf;
+ struct irdma_hw *hw;
+ u8 __iomem *db_addr;
+ u32 __iomem *wqe_alloc_db;
+ u32 __iomem *cq_arm_db;
+ u32 __iomem *aeq_alloc_db;
+ u32 __iomem *cqp_db;
+ u32 __iomem *cq_ack_db;
+ u32 __iomem *ceq_itr_mask_db;
+ u32 __iomem *aeq_itr_mask_db;
+ u32 __iomem *hw_regs[IRDMA_MAX_REGS];
+ u32 ceq_itr; /* Interrupt throttle, usecs between interrupts: 0 disabled. 2 - 8160 */
+ u64 hw_masks[IRDMA_MAX_MASKS];
+ u64 hw_shifts[IRDMA_MAX_SHIFTS];
+ const struct irdma_hw_stat_map *hw_stats_map;
+ u64 hw_stats_regs[IRDMA_HW_STAT_INDEX_MAX_GEN_1];
+ u64 feature_info[IRDMA_MAX_FEATURES];
+ u64 cqp_cmd_stats[IRDMA_MAX_CQP_OPS];
+ struct irdma_hw_attrs hw_attrs;
+ struct irdma_hmc_info *hmc_info;
+ struct irdma_vchnl_rdma_caps vc_caps;
+ u8 vc_recv_buf[IRDMA_VCHNL_MAX_MSG_SIZE];
+ u16 vc_recv_len;
+ struct irdma_sc_cqp *cqp;
+ struct irdma_sc_aeq *aeq;
+ struct irdma_sc_ceq *ceq[IRDMA_CEQ_MAX_COUNT];
+ struct irdma_sc_cq *ccq;
+ const struct irdma_irq_ops *irq_ops;
+ struct irdma_qos qos[IRDMA_MAX_USER_PRIORITY];
+ struct irdma_hmc_fpm_misc hmc_fpm_misc;
+ struct irdma_ws_node *ws_tree_root;
+ struct mutex ws_mutex; /* ws tree mutex */
+ u32 vchnl_ver;
+ u16 num_vfs;
+ u16 hmc_fn_id;
+ u16 vf_id;
+ bool privileged:1;
+ bool vchnl_up:1;
+ bool ceq_valid:1;
+ bool is_pf:1;
+ u8 protocol_used;
+ struct mutex vchnl_mutex; /* mutex to synchronize RDMA virtual channel messages */
+ u8 pci_rev;
+ int (*ws_add)(struct irdma_sc_vsi *vsi, u8 user_pri);
+ void (*ws_remove)(struct irdma_sc_vsi *vsi, u8 user_pri);
+ void (*ws_reset)(struct irdma_sc_vsi *vsi);
+};
+
+struct irdma_modify_cq_info {
+ u64 cq_pa;
+ struct irdma_cqe *cq_base;
+ u32 cq_size;
+ u32 shadow_read_threshold;
+ u8 pbl_chunk_size;
+ u32 first_pm_pbl_idx;
+ bool virtual_map:1;
+ bool check_overflow;
+ bool cq_resize:1;
+};
+
+struct irdma_srq_init_info {
+ struct irdma_sc_pd *pd;
+ struct irdma_sc_vsi *vsi;
+ u64 srq_pa;
+ u64 shadow_area_pa;
+ u32 first_pm_pbl_idx;
+ u32 pasid;
+ u32 srq_size;
+ u16 srq_limit;
+ u8 pasid_valid;
+ u8 wqe_size;
+ u8 leaf_pbl_size;
+ u8 virtual_map;
+ u8 tph_en;
+ u8 arm_limit_event;
+ u8 tph_value;
+ u8 pbl_chunk_size;
+ struct irdma_srq_uk_init_info srq_uk_init_info;
+};
+
+struct irdma_sc_srq {
+ struct irdma_sc_dev *dev;
+ struct irdma_sc_vsi *vsi;
+ struct irdma_sc_pd *pd;
+ struct irdma_srq_uk srq_uk;
+ void *back_srq;
+ u64 srq_pa;
+ u64 shadow_area_pa;
+ u32 first_pm_pbl_idx;
+ u32 pasid;
+ u32 hw_srq_size;
+ u16 srq_limit;
+ u8 pasid_valid;
+ u8 leaf_pbl_size;
+ u8 virtual_map;
+ u8 tph_en;
+ u8 arm_limit_event;
+ u8 tph_val;
+};
+
+struct irdma_modify_srq_info {
+ u16 srq_limit;
+ u8 arm_limit_event;
+};
+
+struct irdma_create_qp_info {
+ bool ord_valid:1;
+ bool tcp_ctx_valid:1;
+ bool cq_num_valid:1;
+ bool arp_cache_idx_valid:1;
+ bool mac_valid:1;
+ bool force_lpb;
+ u8 next_iwarp_state;
+};
+
+struct irdma_modify_qp_info {
+ u64 rx_win0;
+ u64 rx_win1;
+ u16 new_mss;
+ u8 next_iwarp_state;
+ u8 curr_iwarp_state;
+ u8 termlen;
+ bool ord_valid:1;
+ bool tcp_ctx_valid:1;
+ bool udp_ctx_valid:1;
+ bool cq_num_valid:1;
+ bool arp_cache_idx_valid:1;
+ bool reset_tcp_conn:1;
+ bool remove_hash_idx:1;
+ bool dont_send_term:1;
+ bool dont_send_fin:1;
+ bool cached_var_valid:1;
+ bool mss_change:1;
+ bool force_lpb:1;
+ bool mac_valid:1;
+};
+
+struct irdma_ccq_cqe_info {
+ struct irdma_sc_cqp *cqp;
+ u64 scratch;
+ u32 op_ret_val;
+ u16 maj_err_code;
+ u16 min_err_code;
+ u8 op_code;
+ bool error:1;
+ bool pending:1;
+};
+
+struct irdma_dcb_app_info {
+ u8 priority;
+ u8 selector;
+ u16 prot_id;
+};
+
+struct irdma_qos_tc_info {
+ u64 tc_ctx;
+ u8 rel_bw;
+ u8 prio_type;
+ u8 egress_virt_up;
+ u8 ingress_virt_up;
+};
+
+struct irdma_l2params {
+ struct irdma_qos_tc_info tc_info[IRDMA_MAX_USER_PRIORITY];
+ struct irdma_dcb_app_info apps[IRDMA_MAX_APPS];
+ u32 num_apps;
+ u16 qs_handle_list[IRDMA_MAX_USER_PRIORITY];
+ u16 mtu;
+ u8 up2tc[IRDMA_MAX_USER_PRIORITY];
+ u8 dscp_map[DSCP_MAX];
+ u8 num_tc;
+ u8 vsi_rel_bw;
+ u8 vsi_prio_type;
+ bool mtu_changed:1;
+ bool tc_changed:1;
+ bool dscp_mode:1;
+};
+
+struct irdma_vsi_init_info {
+ struct irdma_sc_dev *dev;
+ void *back_vsi;
+ struct irdma_l2params *params;
+ u16 exception_lan_q;
+ u16 pf_data_vsi_num;
+ enum irdma_vm_vf_type vm_vf_type;
+ u16 vm_id;
+ int (*register_qset)(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
+ void (*unregister_qset)(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *tc_node);
+};
+
+struct irdma_vsi_stats_info {
+ struct irdma_vsi_pestat *pestat;
+ u16 fcn_id;
+ bool alloc_stats_inst;
+};
+
+struct irdma_device_init_info {
+ u64 fpm_query_buf_pa;
+ u64 fpm_commit_buf_pa;
+ __le64 *fpm_query_buf;
+ __le64 *fpm_commit_buf;
+ struct irdma_hw *hw;
+ void __iomem *bar0;
+ enum irdma_protocol_used protocol_used;
+ u16 hmc_fn_id;
+};
+
+struct irdma_ceq_init_info {
+ u64 ceqe_pa;
+ struct irdma_sc_dev *dev;
+ u64 *ceqe_base;
+ void *pbl_list;
+ u32 elem_cnt;
+ u32 ceq_id;
+ bool virtual_map:1;
+ bool tph_en:1;
+ bool itr_no_expire:1;
+ u8 pbl_chunk_size;
+ u8 tph_val;
+ u16 vsi_idx;
+ u32 first_pm_pbl_idx;
+};
+
+struct irdma_aeq_init_info {
+ u64 aeq_elem_pa;
+ struct irdma_sc_dev *dev;
+ u32 *aeqe_base;
+ void *pbl_list;
+ u32 elem_cnt;
+ bool virtual_map;
+ u8 pbl_chunk_size;
+ u32 first_pm_pbl_idx;
+ u32 msix_idx;
+};
+
+struct irdma_ccq_init_info {
+ u64 cq_pa;
+ u64 shadow_area_pa;
+ struct irdma_sc_dev *dev;
+ struct irdma_cqe *cq_base;
+ __le64 *shadow_area;
+ void *pbl_list;
+ u32 num_elem;
+ u32 ceq_id;
+ u32 shadow_read_threshold;
+ bool ceqe_mask:1;
+ bool ceq_id_valid:1;
+ bool avoid_mem_cflct:1;
+ bool virtual_map:1;
+ bool tph_en:1;
+ u8 tph_val;
+ u8 pbl_chunk_size;
+ u32 first_pm_pbl_idx;
+ struct irdma_sc_vsi *vsi;
+};
+
+struct irdma_udp_offload_info {
+ bool ipv4:1;
+ bool insert_vlan_tag:1;
+ u8 ttl;
+ u8 tos;
+ u16 src_port;
+ u16 dst_port;
+ u32 dest_ip_addr[4];
+ u32 snd_mss;
+ u16 vlan_tag;
+ u16 arp_idx;
+ u32 flow_label;
+ u8 udp_state;
+ u32 psn_nxt;
+ u32 lsn;
+ u32 epsn;
+ u32 psn_max;
+ u32 psn_una;
+ u32 local_ipaddr[4];
+ u32 cwnd;
+ u8 rexmit_thresh;
+ u8 rnr_nak_thresh;
+ u8 rnr_nak_tmr;
+ u8 min_rnr_timer;
+};
+
+struct irdma_roce_offload_info {
+ u16 p_key;
+ u16 err_rq_idx;
+ u32 qkey;
+ u32 dest_qp;
+ u8 roce_tver;
+ u8 ack_credits;
+ u8 err_rq_idx_valid;
+ u32 pd_id;
+ u16 ord_size;
+ u16 ird_size;
+ bool is_qp1:1;
+ bool udprivcq_en:1;
+ bool dcqcn_en:1;
+ bool rcv_no_icrc:1;
+ bool wr_rdresp_en:1;
+ bool bind_en:1;
+ bool fast_reg_en:1;
+ bool priv_mode_en:1;
+ bool rd_en:1;
+ bool timely_en:1;
+ bool dctcp_en:1;
+ bool fw_cc_enable:1;
+ bool use_stats_inst:1;
+ u8 local_ack_timeout;
+ u16 t_high;
+ u16 t_low;
+ u8 last_byte_sent;
+ u8 mac_addr[ETH_ALEN];
+ u8 rtomin;
+};
+
+struct irdma_iwarp_offload_info {
+ u16 rcv_mark_offset;
+ u16 snd_mark_offset;
+ u8 ddp_ver;
+ u8 rdmap_ver;
+ u8 iwarp_mode;
+ u16 err_rq_idx;
+ u32 pd_id;
+ u16 ord_size;
+ u16 ird_size;
+ bool ib_rd_en:1;
+ bool align_hdrs:1;
+ bool rcv_no_mpa_crc:1;
+ bool err_rq_idx_valid:1;
+ bool snd_mark_en:1;
+ bool rcv_mark_en:1;
+ bool wr_rdresp_en:1;
+ bool bind_en:1;
+ bool fast_reg_en:1;
+ bool priv_mode_en:1;
+ bool rd_en:1;
+ bool timely_en:1;
+ bool use_stats_inst:1;
+ bool ecn_en:1;
+ bool dctcp_en:1;
+ u16 t_high;
+ u16 t_low;
+ u8 last_byte_sent;
+ u8 mac_addr[ETH_ALEN];
+ u8 rtomin;
+};
+
+struct irdma_tcp_offload_info {
+ bool ipv4:1;
+ bool no_nagle:1;
+ bool insert_vlan_tag:1;
+ bool time_stamp:1;
+ bool drop_ooo_seg:1;
+ bool avoid_stretch_ack:1;
+ bool wscale:1;
+ bool ignore_tcp_opt:1;
+ bool ignore_tcp_uns_opt:1;
+ u8 cwnd_inc_limit;
+ u8 dup_ack_thresh;
+ u8 ttl;
+ u8 src_mac_addr_idx;
+ u8 tos;
+ u16 src_port;
+ u16 dst_port;
+ u32 dest_ip_addr[4];
+ //u32 dest_ip_addr0;
+ //u32 dest_ip_addr1;
+ //u32 dest_ip_addr2;
+ //u32 dest_ip_addr3;
+ u32 snd_mss;
+ u16 syn_rst_handling;
+ u16 vlan_tag;
+ u16 arp_idx;
+ u32 flow_label;
+ u8 tcp_state;
+ u8 snd_wscale;
+ u8 rcv_wscale;
+ u32 time_stamp_recent;
+ u32 time_stamp_age;
+ u32 snd_nxt;
+ u32 snd_wnd;
+ u32 rcv_nxt;
+ u32 rcv_wnd;
+ u32 snd_max;
+ u32 snd_una;
+ u32 srtt;
+ u32 rtt_var;
+ u32 ss_thresh;
+ u32 cwnd;
+ u32 snd_wl1;
+ u32 snd_wl2;
+ u32 max_snd_window;
+ u8 rexmit_thresh;
+ u32 local_ipaddr[4];
+};
+
+struct irdma_qp_host_ctx_info {
+ u64 qp_compl_ctx;
+ union {
+ struct irdma_tcp_offload_info *tcp_info;
+ struct irdma_udp_offload_info *udp_info;
+ };
+ union {
+ struct irdma_iwarp_offload_info *iwarp_info;
+ struct irdma_roce_offload_info *roce_info;
+ };
+ u32 send_cq_num;
+ u32 rcv_cq_num;
+ u32 srq_id;
+ u32 rem_endpoint_idx;
+ u16 stats_idx;
+ bool remote_atomics_en:1;
+ bool srq_valid:1;
+ bool tcp_info_valid:1;
+ bool iwarp_info_valid:1;
+ bool stats_idx_valid:1;
+ u8 user_pri;
+};
+
+struct irdma_aeqe_info {
+ u64 compl_ctx;
+ u32 qp_cq_id;
+ u32 def_info; /* only valid for DEF_CMPL */
+ u16 ae_id;
+ u16 wqe_idx;
+ u8 tcp_state;
+ u8 iwarp_state;
+ bool qp:1;
+ bool cq:1;
+ bool sq:1;
+ bool rq:1;
+ bool srq:1;
+ bool in_rdrsp_wr:1;
+ bool out_rdrsp:1;
+ bool aeqe_overflow:1;
+ bool err_rq_idx_valid:1;
+ u8 q2_data_written;
+ u8 ae_src;
+};
+
+struct irdma_allocate_stag_info {
+ u64 total_len;
+ u64 first_pm_pbl_idx;
+ u32 chunk_size;
+ u32 stag_idx;
+ u32 page_size;
+ u32 pd_id;
+ u16 access_rights;
+ bool remote_access:1;
+ bool use_hmc_fcn_index:1;
+ bool use_pf_rid:1;
+ bool all_memory:1;
+ bool remote_atomics_en:1;
+ u16 hmc_fcn_index;
+};
+
+struct irdma_mw_alloc_info {
+ u32 mw_stag_index;
+ u32 page_size;
+ u32 pd_id;
+ bool remote_access:1;
+ bool mw_wide:1;
+ bool mw1_bind_dont_vldt_key:1;
+};
+
+struct irdma_reg_ns_stag_info {
+ u64 reg_addr_pa;
+ u64 va;
+ u64 total_len;
+ u32 page_size;
+ u32 chunk_size;
+ u32 first_pm_pbl_index;
+ enum irdma_addressing_type addr_type;
+ irdma_stag_index stag_idx;
+ u16 access_rights;
+ u32 pd_id;
+ irdma_stag_key stag_key;
+ bool use_hmc_fcn_index:1;
+ u8 hmc_fcn_index;
+ bool use_pf_rid:1;
+ bool all_memory:1;
+ bool remote_atomics_en:1;
+};
+
+struct irdma_fast_reg_stag_info {
+ u64 wr_id;
+ u64 reg_addr_pa;
+ u64 fbo;
+ void *va;
+ u64 total_len;
+ u32 page_size;
+ u32 chunk_size;
+ u32 first_pm_pbl_index;
+ enum irdma_addressing_type addr_type;
+ irdma_stag_index stag_idx;
+ u16 access_rights;
+ u32 pd_id;
+ irdma_stag_key stag_key;
+ bool local_fence:1;
+ bool read_fence:1;
+ bool signaled:1;
+ bool use_hmc_fcn_index:1;
+ u8 hmc_fcn_index;
+ bool use_pf_rid:1;
+ bool defer_flag:1;
+ bool remote_atomics_en:1;
+};
+
+struct irdma_dealloc_stag_info {
+ u32 stag_idx;
+ u32 pd_id;
+ bool mr:1;
+ bool dealloc_pbl:1;
+};
+
+struct irdma_register_shared_stag {
+ u64 va;
+ enum irdma_addressing_type addr_type;
+ irdma_stag_index new_stag_idx;
+ irdma_stag_index parent_stag_idx;
+ u32 access_rights;
+ u32 pd_id;
+ u32 page_size;
+ irdma_stag_key new_stag_key;
+};
+
+struct irdma_qp_init_info {
+ struct irdma_qp_uk_init_info qp_uk_init_info;
+ struct irdma_sc_pd *pd;
+ struct irdma_sc_vsi *vsi;
+ __le64 *host_ctx;
+ u8 *q2;
+ u64 sq_pa;
+ u64 rq_pa;
+ u64 host_ctx_pa;
+ u64 q2_pa;
+ u64 shadow_area_pa;
+ u8 sq_tph_val;
+ u8 rq_tph_val;
+ bool sq_tph_en:1;
+ bool rq_tph_en:1;
+ bool rcv_tph_en:1;
+ bool xmit_tph_en:1;
+ bool virtual_map:1;
+};
+
+struct irdma_cq_init_info {
+ struct irdma_sc_dev *dev;
+ u64 cq_base_pa;
+ u64 shadow_area_pa;
+ u32 ceq_id;
+ u32 shadow_read_threshold;
+ u8 pbl_chunk_size;
+ u32 first_pm_pbl_idx;
+ bool virtual_map:1;
+ bool ceqe_mask:1;
+ bool ceq_id_valid:1;
+ bool tph_en:1;
+ u8 tph_val;
+ u8 type;
+ struct irdma_cq_uk_init_info cq_uk_init_info;
+ struct irdma_sc_vsi *vsi;
+};
+
+struct irdma_upload_context_info {
+ u64 buf_pa;
+ u32 qp_id;
+ u8 qp_type;
+ bool freeze_qp:1;
+ bool raw_format:1;
+};
+
+struct irdma_local_mac_entry_info {
+ u8 mac_addr[6];
+ u16 entry_idx;
+};
+
+struct irdma_add_arp_cache_entry_info {
+ u8 mac_addr[ETH_ALEN];
+ u32 reach_max;
+ u16 arp_index;
+ bool permanent;
+};
+
+struct irdma_apbvt_info {
+ u16 port;
+ bool add;
+};
+
+struct irdma_qhash_table_info {
+ struct irdma_sc_vsi *vsi;
+ enum irdma_quad_hash_manage_type manage;
+ enum irdma_quad_entry_type entry_type;
+ bool vlan_valid:1;
+ bool ipv4_valid:1;
+ u8 mac_addr[ETH_ALEN];
+ u16 vlan_id;
+ u8 user_pri;
+ u32 qp_num;
+ u32 dest_ip[4];
+ u32 src_ip[4];
+ u16 dest_port;
+ u16 src_port;
+};
+
+struct irdma_cqp_manage_push_page_info {
+ u32 push_idx;
+ u16 qs_handle;
+ u8 free_page;
+ u8 push_page_type;
+};
+
+struct irdma_qp_flush_info {
+ u32 err_sq_idx;
+ u32 err_rq_idx;
+ u16 sq_minor_code;
+ u16 sq_major_code;
+ u16 rq_minor_code;
+ u16 rq_major_code;
+ u16 ae_code;
+ u8 ae_src;
+ bool sq:1;
+ bool rq:1;
+ bool userflushcode:1;
+ bool generate_ae:1;
+ bool err_sq_idx_valid:1;
+ bool err_rq_idx_valid:1;
+};
+
+struct irdma_gen_ae_info {
+ u16 ae_code;
+ u8 ae_src;
+};
+
+struct irdma_cqp_timeout {
+ u64 compl_cqp_cmds;
+ u32 count;
+};
+
+struct irdma_irq_ops {
+ void (*irdma_cfg_aeq)(struct irdma_sc_dev *dev, u32 idx, bool enable);
+ void (*irdma_cfg_ceq)(struct irdma_sc_dev *dev, u32 ceq_id, u32 idx,
+ bool enable);
+ void (*irdma_dis_irq)(struct irdma_sc_dev *dev, u32 idx);
+ void (*irdma_en_irq)(struct irdma_sc_dev *dev, u32 idx);
+};
+
+void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq);
+int irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch,
+ bool check_overflow, bool post_sq);
+int irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq);
+int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
+ struct irdma_ccq_cqe_info *info);
+int irdma_sc_ccq_init(struct irdma_sc_cq *ccq,
+ struct irdma_ccq_init_info *info);
+
+int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch);
+int irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq);
+
+int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq);
+int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq,
+ struct irdma_ceq_init_info *info);
+void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq);
+void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq);
+
+int irdma_sc_aeq_init(struct irdma_sc_aeq *aeq,
+ struct irdma_aeq_init_info *info);
+int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
+ struct irdma_aeqe_info *info);
+void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count);
+
+void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id,
+ int abi_ver);
+void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, bool enable);
+void irdma_check_cqp_progress(struct irdma_cqp_timeout *cqp_timeout,
+ struct irdma_sc_dev *dev);
+void irdma_sc_cqp_def_cmpl_ae_handler(struct irdma_sc_dev *dev,
+ struct irdma_aeqe_info *info,
+ bool first, u64 *scratch,
+ u32 *sw_def_info);
+u64 irdma_sc_cqp_cleanup_handler(struct irdma_sc_dev *dev);
+int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err);
+int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp);
+int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
+ struct irdma_cqp_init_info *info);
+void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp);
+int irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 opcode,
+ struct irdma_ccq_cqe_info *cmpl_info);
+int irdma_sc_fast_register(struct irdma_sc_qp *qp,
+ struct irdma_fast_reg_stag_info *info, bool post_sq);
+int irdma_sc_qp_create(struct irdma_sc_qp *qp,
+ struct irdma_create_qp_info *info, u64 scratch,
+ bool post_sq);
+int irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch,
+ bool remove_hash_idx, bool ignore_mw_bnd, bool post_sq);
+int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
+ struct irdma_qp_flush_info *info, u64 scratch,
+ bool post_sq);
+int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info);
+int irdma_sc_qp_modify(struct irdma_sc_qp *qp,
+ struct irdma_modify_qp_info *info, u64 scratch,
+ bool post_sq);
+void irdma_sc_send_lsmm(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size,
+ irdma_stag stag);
+
+void irdma_sc_send_rtt(struct irdma_sc_qp *qp, bool read);
+void irdma_sc_qp_setctx(struct irdma_sc_qp *qp, __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info);
+void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info);
+int irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, bool post_sq);
+int irdma_sc_cq_init(struct irdma_sc_cq *cq, struct irdma_cq_init_info *info);
+void irdma_sc_cq_resize(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info);
+int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch,
+ u8 hmc_fn_id, bool post_sq,
+ bool poll_registers);
+int irdma_sc_srq_init(struct irdma_sc_srq *srq,
+ struct irdma_srq_init_info *info);
+
+void sc_vsi_update_stats(struct irdma_sc_vsi *vsi);
+struct cqp_info {
+ union {
+ struct {
+ struct irdma_sc_qp *qp;
+ struct irdma_create_qp_info info;
+ u64 scratch;
+ } qp_create;
+
+ struct {
+ struct irdma_sc_qp *qp;
+ struct irdma_modify_qp_info info;
+ u64 scratch;
+ } qp_modify;
+
+ struct {
+ struct irdma_sc_qp *qp;
+ u64 scratch;
+ bool remove_hash_idx;
+ bool ignore_mw_bnd;
+ } qp_destroy;
+
+ struct {
+ struct irdma_sc_cq *cq;
+ u64 scratch;
+ bool check_overflow;
+ } cq_create;
+
+ struct {
+ struct irdma_sc_cq *cq;
+ struct irdma_modify_cq_info info;
+ u64 scratch;
+ } cq_modify;
+
+ struct {
+ struct irdma_sc_cq *cq;
+ u64 scratch;
+ } cq_destroy;
+
+ struct {
+ struct irdma_sc_dev *dev;
+ struct irdma_allocate_stag_info info;
+ u64 scratch;
+ } alloc_stag;
+
+ struct {
+ struct irdma_sc_dev *dev;
+ struct irdma_mw_alloc_info info;
+ u64 scratch;
+ } mw_alloc;
+
+ struct {
+ struct irdma_sc_dev *dev;
+ struct irdma_reg_ns_stag_info info;
+ u64 scratch;
+ } mr_reg_non_shared;
+
+ struct {
+ struct irdma_sc_dev *dev;
+ struct irdma_dealloc_stag_info info;
+ u64 scratch;
+ } dealloc_stag;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_add_arp_cache_entry_info info;
+ u64 scratch;
+ } add_arp_cache_entry;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ u64 scratch;
+ u16 arp_index;
+ } del_arp_cache_entry;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_local_mac_entry_info info;
+ u64 scratch;
+ } add_local_mac_entry;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ u64 scratch;
+ u8 entry_idx;
+ u8 ignore_ref_count;
+ } del_local_mac_entry;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ u64 scratch;
+ } alloc_local_mac_entry;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_cqp_manage_push_page_info info;
+ u64 scratch;
+ } manage_push_page;
+
+ struct {
+ struct irdma_sc_dev *dev;
+ struct irdma_upload_context_info info;
+ u64 scratch;
+ } qp_upload_context;
+
+ struct {
+ struct irdma_sc_dev *dev;
+ struct irdma_hmc_fcn_info info;
+ u64 scratch;
+ } manage_hmc_pm;
+
+ struct {
+ struct irdma_sc_ceq *ceq;
+ u64 scratch;
+ } ceq_create;
+
+ struct {
+ struct irdma_sc_ceq *ceq;
+ u64 scratch;
+ } ceq_destroy;
+
+ struct {
+ struct irdma_sc_aeq *aeq;
+ u64 scratch;
+ } aeq_create;
+
+ struct {
+ struct irdma_sc_aeq *aeq;
+ u64 scratch;
+ } aeq_destroy;
+
+ struct {
+ struct irdma_sc_qp *qp;
+ struct irdma_qp_flush_info info;
+ u64 scratch;
+ } qp_flush_wqes;
+
+ struct {
+ struct irdma_sc_qp *qp;
+ struct irdma_gen_ae_info info;
+ u64 scratch;
+ } gen_ae;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ void *fpm_val_va;
+ u64 fpm_val_pa;
+ u8 hmc_fn_id;
+ u64 scratch;
+ } query_fpm_val;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ void *fpm_val_va;
+ u64 fpm_val_pa;
+ u8 hmc_fn_id;
+ u64 scratch;
+ } commit_fpm_val;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_apbvt_info info;
+ u64 scratch;
+ } manage_apbvt_entry;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_qhash_table_info info;
+ u64 scratch;
+ } manage_qhash_table_entry;
+
+ struct {
+ struct irdma_sc_dev *dev;
+ struct irdma_update_sds_info info;
+ u64 scratch;
+ } update_pe_sds;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_sc_qp *qp;
+ u64 scratch;
+ } suspend_resume;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_ah_info info;
+ u64 scratch;
+ } ah_create;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_ah_info info;
+ u64 scratch;
+ } ah_destroy;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_mcast_grp_info info;
+ u64 scratch;
+ } mc_create;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_mcast_grp_info info;
+ u64 scratch;
+ } mc_destroy;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_mcast_grp_info info;
+ u64 scratch;
+ } mc_modify;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_stats_inst_info info;
+ u64 scratch;
+ } stats_manage;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_stats_gather_info info;
+ u64 scratch;
+ } stats_gather;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_ws_node_info info;
+ u64 scratch;
+ } ws_node;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_up_info info;
+ u64 scratch;
+ } up_map;
+
+ struct {
+ struct irdma_sc_cqp *cqp;
+ struct irdma_dma_mem query_buff_mem;
+ u64 scratch;
+ } query_rdma;
+
+ struct {
+ struct irdma_sc_srq *srq;
+ u64 scratch;
+ } srq_create;
+
+ struct {
+ struct irdma_sc_srq *srq;
+ struct irdma_modify_srq_info info;
+ u64 scratch;
+ } srq_modify;
+
+ struct {
+ struct irdma_sc_srq *srq;
+ u64 scratch;
+ } srq_destroy;
+
+ } u;
+};
+
+struct cqp_cmds_info {
+ struct list_head cqp_cmd_entry;
+ u8 cqp_cmd;
+ u8 post_sq;
+ struct cqp_info in;
+};
+
+__le64 *irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch,
+ u32 *wqe_idx);
+
+/**
+ * irdma_sc_cqp_get_next_send_wqe - get next wqe on cqp sq
+ * @cqp: struct for cqp hw
+ * @scratch: private data for CQP WQE
+ */
+static inline __le64 *irdma_sc_cqp_get_next_send_wqe(struct irdma_sc_cqp *cqp, u64 scratch)
+{
+ u32 wqe_idx;
+
+ return irdma_sc_cqp_get_next_send_wqe_idx(cqp, scratch, &wqe_idx);
+}
+#endif /* IRDMA_TYPE_H */
diff --git a/drivers/infiniband/hw/irdma/uda.c b/drivers/infiniband/hw/irdma/uda.c
new file mode 100644
index 000000000000..84051266d948
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/uda.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2016 - 2021 Intel Corporation */
+#include <linux/etherdevice.h>
+
+#include "osdep.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "protos.h"
+#include "uda.h"
+#include "uda_d.h"
+
+/**
+ * irdma_sc_access_ah() - Create, modify or delete AH
+ * @cqp: struct for cqp hw
+ * @info: ah information
+ * @op: Operation
+ * @scratch: u64 saved to be used during cqp completion
+ */
+int irdma_sc_access_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info,
+ u32 op, u64 scratch)
+{
+ __le64 *wqe;
+ u64 qw1, qw2;
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, ether_addr_to_u64(info->mac_addr) << 16);
+ qw1 = FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_PDINDEXLO, info->pd_idx) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_TC, info->tc_tos) |
+ FIELD_PREP(IRDMA_UDAQPC_VLANTAG, info->vlan_tag);
+
+ qw2 = FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ARPINDEX, info->dst_arpindex) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_FLOWLABEL, info->flow_label) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_HOPLIMIT, info->hop_ttl) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_PDINDEXHI, info->pd_idx >> 16);
+
+ if (!info->ipv4_valid) {
+ set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR0, info->dest_ip_addr[0]) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR1, info->dest_ip_addr[1]));
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR2, info->dest_ip_addr[2]) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->dest_ip_addr[3]));
+
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR0, info->src_ip_addr[0]) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR1, info->src_ip_addr[1]));
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR2, info->src_ip_addr[2]) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->src_ip_addr[3]));
+ } else {
+ set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->dest_ip_addr[0]));
+
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->src_ip_addr[0]));
+ }
+
+ set_64bit_val(wqe, 8, qw1);
+ set_64bit_val(wqe, 16, qw2);
+
+ dma_wmb(); /* need write block before writing WQE header */
+
+ set_64bit_val(
+ wqe, 24,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_OPCODE, op) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_DOLOOPBACKK, info->do_lpbk) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_IPV4VALID, info->ipv4_valid) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_AVIDX, info->ah_idx) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_INSERTVLANTAG, info->insert_vlan_tag));
+
+ print_hex_dump_debug("WQE: MANAGE_AH WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_create_mg_ctx() - create a mcg context
+ * @info: multicast group context info
+ */
+static void irdma_create_mg_ctx(struct irdma_mcast_grp_info *info)
+{
+ struct irdma_mcast_grp_ctx_entry_info *entry_info = NULL;
+ u8 idx = 0; /* index in the array */
+ u8 ctx_idx = 0; /* index in the MG context */
+
+ memset(info->dma_mem_mc.va, 0, IRDMA_MAX_MGS_PER_CTX * sizeof(u64));
+
+ for (idx = 0; idx < IRDMA_MAX_MGS_PER_CTX; idx++) {
+ entry_info = &info->mg_ctx_info[idx];
+ if (entry_info->valid_entry) {
+ set_64bit_val((__le64 *)info->dma_mem_mc.va,
+ ctx_idx * sizeof(u64),
+ FIELD_PREP(IRDMA_UDA_MGCTX_DESTPORT, entry_info->dest_port) |
+ FIELD_PREP(IRDMA_UDA_MGCTX_VALIDENT, entry_info->valid_entry) |
+ FIELD_PREP(IRDMA_UDA_MGCTX_QPID, entry_info->qp_id));
+ ctx_idx++;
+ }
+ }
+}
+
+/**
+ * irdma_access_mcast_grp() - Access mcast group based on op
+ * @cqp: Control QP
+ * @info: multicast group context info
+ * @op: operation to perform
+ * @scratch: u64 saved to be used during cqp completion
+ */
+int irdma_access_mcast_grp(struct irdma_sc_cqp *cqp,
+ struct irdma_mcast_grp_info *info, u32 op,
+ u64 scratch)
+{
+ __le64 *wqe;
+
+ if (info->mg_id >= IRDMA_UDA_MAX_FSI_MGS) {
+ ibdev_dbg(to_ibdev(cqp->dev), "WQE: mg_id out of range\n");
+ return -EINVAL;
+ }
+
+ wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+ if (!wqe) {
+ ibdev_dbg(to_ibdev(cqp->dev), "WQE: ring full\n");
+ return -ENOMEM;
+ }
+
+ irdma_create_mg_ctx(info);
+
+ set_64bit_val(wqe, 32, info->dma_mem_mc.pa);
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MG_VLANID, info->vlan_id) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_QS_HANDLE, info->qs_handle));
+ set_64bit_val(wqe, 0, ether_addr_to_u64(info->dest_mac_addr));
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MG_HMC_FCN_ID, info->hmc_fcn_id));
+
+ if (!info->ipv4_valid) {
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR0, info->dest_ip_addr[0]) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR1, info->dest_ip_addr[1]));
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR2, info->dest_ip_addr[2]) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->dest_ip_addr[3]));
+ } else {
+ set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_ADDR3, info->dest_ip_addr[0]));
+ }
+
+ dma_wmb(); /* need write memory block before writing the WQE header. */
+
+ set_64bit_val(wqe, 24,
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MG_WQEVALID, cqp->polarity) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MG_OPCODE, op) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MG_MGIDX, info->mg_id) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MG_VLANVALID, info->vlan_valid) |
+ FIELD_PREP(IRDMA_UDA_CQPSQ_MG_IPV4VALID, info->ipv4_valid));
+
+ print_hex_dump_debug("WQE: MANAGE_MCG WQE", DUMP_PREFIX_OFFSET, 16, 8,
+ wqe, IRDMA_CQP_WQE_SIZE * 8, false);
+ print_hex_dump_debug("WQE: MCG_HOST CTX WQE", DUMP_PREFIX_OFFSET, 16,
+ 8, info->dma_mem_mc.va,
+ IRDMA_MAX_MGS_PER_CTX * 8, false);
+ irdma_sc_cqp_post_sq(cqp);
+
+ return 0;
+}
+
+/**
+ * irdma_compare_mgs - Compares two multicast group structures
+ * @entry1: Multcast group info
+ * @entry2: Multcast group info in context
+ */
+static bool irdma_compare_mgs(struct irdma_mcast_grp_ctx_entry_info *entry1,
+ struct irdma_mcast_grp_ctx_entry_info *entry2)
+{
+ if (entry1->dest_port == entry2->dest_port &&
+ entry1->qp_id == entry2->qp_id)
+ return true;
+
+ return false;
+}
+
+/**
+ * irdma_sc_add_mcast_grp - Allocates mcast group entry in ctx
+ * @ctx: Multcast group context
+ * @mg: Multcast group info
+ */
+int irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx,
+ struct irdma_mcast_grp_ctx_entry_info *mg)
+{
+ u32 idx;
+ bool free_entry_found = false;
+ u32 free_entry_idx = 0;
+
+ /* find either an identical or a free entry for a multicast group */
+ for (idx = 0; idx < IRDMA_MAX_MGS_PER_CTX; idx++) {
+ if (ctx->mg_ctx_info[idx].valid_entry) {
+ if (irdma_compare_mgs(&ctx->mg_ctx_info[idx], mg)) {
+ ctx->mg_ctx_info[idx].use_cnt++;
+ return 0;
+ }
+ continue;
+ }
+ if (!free_entry_found) {
+ free_entry_found = true;
+ free_entry_idx = idx;
+ }
+ }
+
+ if (free_entry_found) {
+ ctx->mg_ctx_info[free_entry_idx] = *mg;
+ ctx->mg_ctx_info[free_entry_idx].valid_entry = true;
+ ctx->mg_ctx_info[free_entry_idx].use_cnt = 1;
+ ctx->no_of_mgs++;
+ return 0;
+ }
+
+ return -ENOMEM;
+}
+
+/**
+ * irdma_sc_del_mcast_grp - Delete mcast group
+ * @ctx: Multcast group context
+ * @mg: Multcast group info
+ *
+ * Finds and removes a specific mulicast group from context, all
+ * parameters must match to remove a multicast group.
+ */
+int irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx,
+ struct irdma_mcast_grp_ctx_entry_info *mg)
+{
+ u32 idx;
+
+ /* find an entry in multicast group context */
+ for (idx = 0; idx < IRDMA_MAX_MGS_PER_CTX; idx++) {
+ if (!ctx->mg_ctx_info[idx].valid_entry)
+ continue;
+
+ if (irdma_compare_mgs(mg, &ctx->mg_ctx_info[idx])) {
+ ctx->mg_ctx_info[idx].use_cnt--;
+
+ if (!ctx->mg_ctx_info[idx].use_cnt) {
+ ctx->mg_ctx_info[idx].valid_entry = false;
+ ctx->no_of_mgs--;
+ /* Remove gap if element was not the last */
+ if (idx != ctx->no_of_mgs &&
+ ctx->no_of_mgs > 0) {
+ memcpy(&ctx->mg_ctx_info[idx],
+ &ctx->mg_ctx_info[ctx->no_of_mgs - 1],
+ sizeof(ctx->mg_ctx_info[idx]));
+ ctx->mg_ctx_info[ctx->no_of_mgs - 1].valid_entry = false;
+ }
+ }
+
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
diff --git a/drivers/infiniband/hw/irdma/uda.h b/drivers/infiniband/hw/irdma/uda.h
new file mode 100644
index 000000000000..27b8701cf21b
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/uda.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2016 - 2021 Intel Corporation */
+#ifndef IRDMA_UDA_H
+#define IRDMA_UDA_H
+
+#define IRDMA_UDA_MAX_FSI_MGS 4096
+#define IRDMA_UDA_MAX_PFS 16
+#define IRDMA_UDA_MAX_VFS 128
+
+struct irdma_sc_cqp;
+
+struct irdma_ah_info {
+ struct irdma_sc_vsi *vsi;
+ u32 pd_idx;
+ u32 dst_arpindex;
+ u32 dest_ip_addr[4];
+ u32 src_ip_addr[4];
+ u32 flow_label;
+ u32 ah_idx;
+ u16 vlan_tag;
+ u8 insert_vlan_tag;
+ u8 tc_tos;
+ u8 hop_ttl;
+ u8 mac_addr[ETH_ALEN];
+ bool ah_valid:1;
+ bool ipv4_valid:1;
+ bool do_lpbk:1;
+};
+
+struct irdma_sc_ah {
+ struct irdma_sc_dev *dev;
+ struct irdma_ah_info ah_info;
+};
+
+int irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx,
+ struct irdma_mcast_grp_ctx_entry_info *mg);
+int irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx,
+ struct irdma_mcast_grp_ctx_entry_info *mg);
+int irdma_sc_access_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info,
+ u32 op, u64 scratch);
+int irdma_access_mcast_grp(struct irdma_sc_cqp *cqp,
+ struct irdma_mcast_grp_info *info, u32 op,
+ u64 scratch);
+
+static inline void irdma_sc_init_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah)
+{
+ ah->dev = dev;
+}
+
+static inline int irdma_sc_create_ah(struct irdma_sc_cqp *cqp,
+ struct irdma_ah_info *info, u64 scratch)
+{
+ return irdma_sc_access_ah(cqp, info, IRDMA_CQP_OP_CREATE_ADDR_HANDLE,
+ scratch);
+}
+
+static inline int irdma_sc_destroy_ah(struct irdma_sc_cqp *cqp,
+ struct irdma_ah_info *info, u64 scratch)
+{
+ return irdma_sc_access_ah(cqp, info, IRDMA_CQP_OP_DESTROY_ADDR_HANDLE,
+ scratch);
+}
+
+static inline int irdma_sc_create_mcast_grp(struct irdma_sc_cqp *cqp,
+ struct irdma_mcast_grp_info *info,
+ u64 scratch)
+{
+ return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_CREATE_MCAST_GRP,
+ scratch);
+}
+
+static inline int irdma_sc_modify_mcast_grp(struct irdma_sc_cqp *cqp,
+ struct irdma_mcast_grp_info *info,
+ u64 scratch)
+{
+ return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_MODIFY_MCAST_GRP,
+ scratch);
+}
+
+static inline int irdma_sc_destroy_mcast_grp(struct irdma_sc_cqp *cqp,
+ struct irdma_mcast_grp_info *info,
+ u64 scratch)
+{
+ return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_DESTROY_MCAST_GRP,
+ scratch);
+}
+#endif /* IRDMA_UDA_H */
diff --git a/drivers/infiniband/hw/irdma/uda_d.h b/drivers/infiniband/hw/irdma/uda_d.h
new file mode 100644
index 000000000000..4fb4daa20722
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/uda_d.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2016 - 2021 Intel Corporation */
+#ifndef IRDMA_UDA_D_H
+#define IRDMA_UDA_D_H
+
+/* L4 packet type */
+#define IRDMA_E_UDA_SQ_L4T_UNKNOWN 0
+#define IRDMA_E_UDA_SQ_L4T_TCP 1
+#define IRDMA_E_UDA_SQ_L4T_SCTP 2
+#define IRDMA_E_UDA_SQ_L4T_UDP 3
+
+/* Inner IP header type */
+#define IRDMA_E_UDA_SQ_IIPT_UNKNOWN 0
+#define IRDMA_E_UDA_SQ_IIPT_IPV6 1
+#define IRDMA_E_UDA_SQ_IIPT_IPV4_NO_CSUM 2
+#define IRDMA_E_UDA_SQ_IIPT_IPV4_CSUM 3
+#define IRDMA_UDA_QPSQ_PUSHWQE BIT_ULL(56)
+#define IRDMA_UDA_QPSQ_INLINEDATAFLAG BIT_ULL(57)
+#define IRDMA_UDA_QPSQ_INLINEDATALEN GENMASK_ULL(55, 48)
+#define IRDMA_UDA_QPSQ_ADDFRAGCNT GENMASK_ULL(41, 38)
+#define IRDMA_UDA_QPSQ_IPFRAGFLAGS GENMASK_ULL(43, 42)
+#define IRDMA_UDA_QPSQ_NOCHECKSUM BIT_ULL(45)
+#define IRDMA_UDA_QPSQ_AHIDXVALID BIT_ULL(46)
+#define IRDMA_UDA_QPSQ_LOCAL_FENCE BIT_ULL(61)
+#define IRDMA_UDA_QPSQ_AHIDX GENMASK_ULL(16, 0)
+#define IRDMA_UDA_QPSQ_PROTOCOL GENMASK_ULL(23, 16)
+#define IRDMA_UDA_QPSQ_EXTHDRLEN GENMASK_ULL(40, 32)
+#define IRDMA_UDA_QPSQ_MULTICAST BIT_ULL(63)
+#define IRDMA_UDA_QPSQ_MACLEN GENMASK_ULL(62, 56)
+#define IRDMA_UDA_QPSQ_MACLEN_LINE 2
+#define IRDMA_UDA_QPSQ_IPLEN GENMASK_ULL(54, 48)
+#define IRDMA_UDA_QPSQ_IPLEN_LINE 2
+#define IRDMA_UDA_QPSQ_L4T GENMASK_ULL(31, 30)
+#define IRDMA_UDA_QPSQ_L4T_LINE 2
+#define IRDMA_UDA_QPSQ_IIPT GENMASK_ULL(29, 28)
+#define IRDMA_UDA_QPSQ_IIPT_LINE 2
+
+#define IRDMA_UDA_QPSQ_DO_LPB_LINE 3
+#define IRDMA_UDA_QPSQ_FWD_PROG_CONFIRM BIT_ULL(45)
+#define IRDMA_UDA_QPSQ_FWD_PROG_CONFIRM_LINE 3
+#define IRDMA_UDA_QPSQ_IMMDATA GENMASK_ULL(63, 0)
+
+/* Byte Offset 0 */
+#define IRDMA_UDAQPC_IPV4_M BIT_ULL(3)
+#define IRDMA_UDAQPC_INSERTVLANTAG BIT_ULL(5)
+#define IRDMA_UDAQPC_ISQP1 BIT_ULL(6)
+
+#define IRDMA_UDAQPC_ECNENABLE BIT_ULL(14)
+#define IRDMA_UDAQPC_PDINDEXHI GENMASK_ULL(21, 20)
+#define IRDMA_UDAQPC_DCTCPENABLE BIT_ULL(25)
+
+#define IRDMA_UDAQPC_RCVTPHEN IRDMAQPC_RCVTPHEN
+#define IRDMA_UDAQPC_XMITTPHEN IRDMAQPC_XMITTPHEN
+#define IRDMA_UDAQPC_RQTPHEN IRDMAQPC_RQTPHEN
+#define IRDMA_UDAQPC_SQTPHEN IRDMAQPC_SQTPHEN
+#define IRDMA_UDAQPC_PPIDX IRDMAQPC_PPIDX
+#define IRDMA_UDAQPC_PMENA IRDMAQPC_PMENA
+#define IRDMA_UDAQPC_INSERTTAG2 BIT_ULL(11)
+#define IRDMA_UDAQPC_INSERTTAG3 BIT_ULL(14)
+
+#define IRDMA_UDAQPC_RQSIZE IRDMAQPC_RQSIZE
+#define IRDMA_UDAQPC_SQSIZE IRDMAQPC_SQSIZE
+#define IRDMA_UDAQPC_TXCQNUM IRDMAQPC_TXCQNUM
+#define IRDMA_UDAQPC_RXCQNUM IRDMAQPC_RXCQNUM
+#define IRDMA_UDAQPC_QPCOMPCTX IRDMAQPC_QPCOMPCTX
+#define IRDMA_UDAQPC_SQTPHVAL IRDMAQPC_SQTPHVAL
+#define IRDMA_UDAQPC_RQTPHVAL IRDMAQPC_RQTPHVAL
+#define IRDMA_UDAQPC_QSHANDLE IRDMAQPC_QSHANDLE
+#define IRDMA_UDAQPC_RQHDRRINGBUFSIZE GENMASK_ULL(49, 48)
+#define IRDMA_UDAQPC_SQHDRRINGBUFSIZE GENMASK_ULL(33, 32)
+#define IRDMA_UDAQPC_PRIVILEGEENABLE BIT_ULL(25)
+#define IRDMA_UDAQPC_USE_STATISTICS_INSTANCE BIT_ULL(26)
+#define IRDMA_UDAQPC_STATISTICS_INSTANCE_INDEX GENMASK_ULL(6, 0)
+#define IRDMA_UDAQPC_PRIVHDRGENENABLE BIT_ULL(0)
+#define IRDMA_UDAQPC_RQHDRSPLITENABLE BIT_ULL(3)
+#define IRDMA_UDAQPC_RQHDRRINGBUFENABLE BIT_ULL(2)
+#define IRDMA_UDAQPC_SQHDRRINGBUFENABLE BIT_ULL(1)
+#define IRDMA_UDAQPC_IPID GENMASK_ULL(47, 32)
+#define IRDMA_UDAQPC_SNDMSS GENMASK_ULL(29, 16)
+#define IRDMA_UDAQPC_VLANTAG GENMASK_ULL(15, 0)
+#define IRDMA_UDA_CQPSQ_MAV_PDINDEXHI GENMASK_ULL(27, 20)
+#define IRDMA_UDA_CQPSQ_MAV_PDINDEXLO GENMASK_ULL(63, 48)
+#define IRDMA_UDA_CQPSQ_MAV_SRCMACADDRINDEX GENMASK_ULL(29, 24)
+#define IRDMA_UDA_CQPSQ_MAV_ARPINDEX GENMASK_ULL(63, 48)
+#define IRDMA_UDA_CQPSQ_MAV_TC GENMASK_ULL(39, 32)
+#define IRDMA_UDA_CQPSQ_MAV_HOPLIMIT GENMASK_ULL(39, 32)
+#define IRDMA_UDA_CQPSQ_MAV_FLOWLABEL GENMASK_ULL(19, 0)
+#define IRDMA_UDA_CQPSQ_MAV_ADDR0 GENMASK_ULL(63, 32)
+#define IRDMA_UDA_CQPSQ_MAV_ADDR1 GENMASK_ULL(31, 0)
+#define IRDMA_UDA_CQPSQ_MAV_ADDR2 GENMASK_ULL(63, 32)
+#define IRDMA_UDA_CQPSQ_MAV_ADDR3 GENMASK_ULL(31, 0)
+#define IRDMA_UDA_CQPSQ_MAV_WQEVALID BIT_ULL(63)
+#define IRDMA_UDA_CQPSQ_MAV_OPCODE GENMASK_ULL(37, 32)
+#define IRDMA_UDA_CQPSQ_MAV_DOLOOPBACKK BIT_ULL(62)
+#define IRDMA_UDA_CQPSQ_MAV_IPV4VALID BIT_ULL(59)
+#define IRDMA_UDA_CQPSQ_MAV_AVIDX GENMASK_ULL(23, 0)
+#define IRDMA_UDA_CQPSQ_MAV_INSERTVLANTAG BIT_ULL(60)
+#define IRDMA_UDA_MGCTX_VFFLAG BIT_ULL(29)
+#define IRDMA_UDA_MGCTX_DESTPORT GENMASK_ULL(47, 32)
+#define IRDMA_UDA_MGCTX_VFID GENMASK_ULL(28, 22)
+#define IRDMA_UDA_MGCTX_VALIDENT BIT_ULL(31)
+#define IRDMA_UDA_MGCTX_PFID GENMASK_ULL(21, 18)
+#define IRDMA_UDA_MGCTX_FLAGIGNOREDPORT BIT_ULL(30)
+#define IRDMA_UDA_MGCTX_QPID GENMASK_ULL(17, 0)
+#define IRDMA_UDA_CQPSQ_MG_WQEVALID BIT_ULL(63)
+#define IRDMA_UDA_CQPSQ_MG_OPCODE GENMASK_ULL(37, 32)
+#define IRDMA_UDA_CQPSQ_MG_MGIDX GENMASK_ULL(12, 0)
+#define IRDMA_UDA_CQPSQ_MG_IPV4VALID BIT_ULL(60)
+#define IRDMA_UDA_CQPSQ_MG_VLANVALID BIT_ULL(59)
+#define IRDMA_UDA_CQPSQ_MG_HMC_FCN_ID GENMASK_ULL(5, 0)
+#define IRDMA_UDA_CQPSQ_MG_VLANID GENMASK_ULL(43, 32)
+#define IRDMA_UDA_CQPSQ_QS_HANDLE GENMASK_ULL(9, 0)
+#define IRDMA_UDA_CQPSQ_QHASH_QPN GENMASK_ULL(49, 32)
+#define IRDMA_UDA_CQPSQ_QHASH_ BIT_ULL(0)
+#define IRDMA_UDA_CQPSQ_QHASH_SRC_PORT GENMASK_ULL(31, 16)
+#define IRDMA_UDA_CQPSQ_QHASH_DEST_PORT GENMASK_ULL(15, 0)
+#define IRDMA_UDA_CQPSQ_QHASH_ADDR0 GENMASK_ULL(63, 32)
+#define IRDMA_UDA_CQPSQ_QHASH_ADDR1 GENMASK_ULL(31, 0)
+#define IRDMA_UDA_CQPSQ_QHASH_ADDR2 GENMASK_ULL(63, 32)
+#define IRDMA_UDA_CQPSQ_QHASH_ADDR3 GENMASK_ULL(31, 0)
+#define IRDMA_UDA_CQPSQ_QHASH_WQEVALID BIT_ULL(63)
+#define IRDMA_UDA_CQPSQ_QHASH_OPCODE GENMASK_ULL(37, 32)
+#define IRDMA_UDA_CQPSQ_QHASH_MANAGE GENMASK_ULL(62, 61)
+#define IRDMA_UDA_CQPSQ_QHASH_IPV4VALID GENMASK_ULL(60, 60)
+#define IRDMA_UDA_CQPSQ_QHASH_LANFWD GENMASK_ULL(59, 59)
+#define IRDMA_UDA_CQPSQ_QHASH_ENTRYTYPE GENMASK_ULL(44, 42)
+#endif /* IRDMA_UDA_D_H */
diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
new file mode 100644
index 000000000000..f0846b800913
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/uk.c
@@ -0,0 +1,1930 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "osdep.h"
+#include "defs.h"
+#include "user.h"
+#include "irdma.h"
+
+/**
+ * irdma_set_fragment - set fragment in wqe
+ * @wqe: wqe for setting fragment
+ * @offset: offset value
+ * @sge: sge length and stag
+ * @valid: The wqe valid
+ */
+static void irdma_set_fragment(__le64 *wqe, u32 offset, struct ib_sge *sge,
+ u8 valid)
+{
+ if (sge) {
+ set_64bit_val(wqe, offset,
+ FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->addr));
+ set_64bit_val(wqe, offset + 8,
+ FIELD_PREP(IRDMAQPSQ_VALID, valid) |
+ FIELD_PREP(IRDMAQPSQ_FRAG_LEN, sge->length) |
+ FIELD_PREP(IRDMAQPSQ_FRAG_STAG, sge->lkey));
+ } else {
+ set_64bit_val(wqe, offset, 0);
+ set_64bit_val(wqe, offset + 8,
+ FIELD_PREP(IRDMAQPSQ_VALID, valid));
+ }
+}
+
+/**
+ * irdma_set_fragment_gen_1 - set fragment in wqe
+ * @wqe: wqe for setting fragment
+ * @offset: offset value
+ * @sge: sge length and stag
+ * @valid: wqe valid flag
+ */
+static void irdma_set_fragment_gen_1(__le64 *wqe, u32 offset,
+ struct ib_sge *sge, u8 valid)
+{
+ if (sge) {
+ set_64bit_val(wqe, offset,
+ FIELD_PREP(IRDMAQPSQ_FRAG_TO, sge->addr));
+ set_64bit_val(wqe, offset + 8,
+ FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_LEN, sge->length) |
+ FIELD_PREP(IRDMAQPSQ_GEN1_FRAG_STAG, sge->lkey));
+ } else {
+ set_64bit_val(wqe, offset, 0);
+ set_64bit_val(wqe, offset + 8, 0);
+ }
+}
+
+/**
+ * irdma_nop_1 - insert a NOP wqe
+ * @qp: hw qp ptr
+ */
+static int irdma_nop_1(struct irdma_qp_uk *qp)
+{
+ u64 hdr;
+ __le64 *wqe;
+ u32 wqe_idx;
+ bool signaled = false;
+
+ if (!qp->sq_ring.head)
+ return -EINVAL;
+
+ wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
+ wqe = qp->sq_base[wqe_idx].elem;
+
+ qp->sq_wrtrk_array[wqe_idx].quanta = IRDMA_QP_WQE_MIN_QUANTA;
+
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+
+ hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_NOP) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ /* make sure WQE is written before valid bit is set */
+ dma_wmb();
+
+ set_64bit_val(wqe, 24, hdr);
+
+ return 0;
+}
+
+/**
+ * irdma_clr_wqes - clear next 128 sq entries
+ * @qp: hw qp ptr
+ * @qp_wqe_idx: wqe_idx
+ */
+void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx)
+{
+ struct irdma_qp_quanta *sq;
+ u32 wqe_idx;
+
+ if (!(qp_wqe_idx & 0x7F)) {
+ wqe_idx = (qp_wqe_idx + 128) % qp->sq_ring.size;
+ sq = qp->sq_base + wqe_idx;
+ if (wqe_idx)
+ memset(sq, qp->swqe_polarity ? 0 : 0xFF,
+ 128 * sizeof(*sq));
+ else
+ memset(sq, qp->swqe_polarity ? 0xFF : 0,
+ 128 * sizeof(*sq));
+ }
+}
+
+/**
+ * irdma_uk_qp_post_wr - ring doorbell
+ * @qp: hw qp ptr
+ */
+void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp)
+{
+ dma_wmb();
+ writel(qp->qp_id, qp->wqe_alloc_db);
+}
+
+/**
+ * irdma_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go
+ * @qp: hw qp ptr
+ * @wqe_idx: return wqe index
+ * @quanta: size of WR in quanta
+ * @total_size: size of WR in bytes
+ * @info: info on WR
+ */
+__le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx,
+ u16 quanta, u32 total_size,
+ struct irdma_post_sq_info *info)
+{
+ __le64 *wqe;
+ __le64 *wqe_0 = NULL;
+ u16 avail_quanta;
+ u16 i;
+
+ avail_quanta = qp->uk_attrs->max_hw_sq_chunk -
+ (IRDMA_RING_CURRENT_HEAD(qp->sq_ring) %
+ qp->uk_attrs->max_hw_sq_chunk);
+ if (quanta <= avail_quanta) {
+ /* WR fits in current chunk */
+ if (quanta > IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring))
+ return NULL;
+ } else {
+ /* Need to pad with NOP */
+ if (quanta + avail_quanta >
+ IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring))
+ return NULL;
+
+ for (i = 0; i < avail_quanta; i++) {
+ irdma_nop_1(qp);
+ IRDMA_RING_MOVE_HEAD_NOCHECK(qp->sq_ring);
+ }
+ }
+
+ *wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
+ if (!*wqe_idx)
+ qp->swqe_polarity = !qp->swqe_polarity;
+
+ IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, quanta);
+
+ wqe = qp->sq_base[*wqe_idx].elem;
+ if (qp->uk_attrs->hw_rev == IRDMA_GEN_1 && quanta == 1 &&
+ (IRDMA_RING_CURRENT_HEAD(qp->sq_ring) & 1)) {
+ wqe_0 = qp->sq_base[IRDMA_RING_CURRENT_HEAD(qp->sq_ring)].elem;
+ wqe_0[3] = cpu_to_le64(FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity ? 0 : 1));
+ }
+ qp->sq_wrtrk_array[*wqe_idx].wrid = info->wr_id;
+ qp->sq_wrtrk_array[*wqe_idx].wr_len = total_size;
+ qp->sq_wrtrk_array[*wqe_idx].quanta = quanta;
+ qp->sq_wrtrk_array[*wqe_idx].signaled = info->signaled;
+
+ return wqe;
+}
+
+__le64 *irdma_srq_get_next_recv_wqe(struct irdma_srq_uk *srq, u32 *wqe_idx)
+{
+ int ret_code;
+ __le64 *wqe;
+
+ if (IRDMA_RING_FULL_ERR(srq->srq_ring))
+ return NULL;
+
+ IRDMA_ATOMIC_RING_MOVE_HEAD(srq->srq_ring, *wqe_idx, ret_code);
+ if (ret_code)
+ return NULL;
+
+ if (!*wqe_idx)
+ srq->srwqe_polarity = !srq->srwqe_polarity;
+ /* rq_wqe_size_multiplier is no of 32 byte quanta in one rq wqe */
+ wqe = srq->srq_base[*wqe_idx * (srq->wqe_size_multiplier)].elem;
+
+ return wqe;
+}
+
+/**
+ * irdma_qp_get_next_recv_wqe - get next qp's rcv wqe
+ * @qp: hw qp ptr
+ * @wqe_idx: return wqe index
+ */
+__le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx)
+{
+ __le64 *wqe;
+ int ret_code;
+
+ if (IRDMA_RING_FULL_ERR(qp->rq_ring))
+ return NULL;
+
+ IRDMA_ATOMIC_RING_MOVE_HEAD(qp->rq_ring, *wqe_idx, ret_code);
+ if (ret_code)
+ return NULL;
+
+ if (!*wqe_idx)
+ qp->rwqe_polarity = !qp->rwqe_polarity;
+ /* rq_wqe_size_multiplier is no of 32 byte quanta in one rq wqe */
+ wqe = qp->rq_base[*wqe_idx * qp->rq_wqe_size_multiplier].elem;
+
+ return wqe;
+}
+
+/**
+ * irdma_uk_rdma_write - rdma write operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool post_sq)
+{
+ u64 hdr;
+ __le64 *wqe;
+ struct irdma_rdma_write *op_info;
+ u32 i, wqe_idx;
+ u32 total_size = 0, byte_off;
+ int ret_code;
+ u32 frag_cnt, addl_frag_cnt;
+ bool read_fence = false;
+ u16 quanta;
+
+ op_info = &info->op.rdma_write;
+ if (op_info->num_lo_sges > qp->max_sq_frag_cnt)
+ return -EINVAL;
+
+ for (i = 0; i < op_info->num_lo_sges; i++)
+ total_size += op_info->lo_sg_list[i].length;
+
+ read_fence |= info->read_fence;
+
+ if (info->imm_data_valid)
+ frag_cnt = op_info->num_lo_sges + 1;
+ else
+ frag_cnt = op_info->num_lo_sges;
+ addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0;
+ ret_code = irdma_fragcnt_to_quanta_sq(frag_cnt, &quanta);
+ if (ret_code)
+ return ret_code;
+
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(qp, wqe_idx);
+
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr));
+
+ if (info->imm_data_valid) {
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
+ i = 0;
+ } else {
+ qp->wqe_ops.iw_set_fragment(wqe, 0,
+ op_info->lo_sg_list,
+ qp->swqe_polarity);
+ i = 1;
+ }
+
+ for (byte_off = 32; i < op_info->num_lo_sges; i++) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
+ &op_info->lo_sg_list[i],
+ qp->swqe_polarity);
+ byte_off += 16;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(frag_cnt & 0x01) &&
+ frag_cnt) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ qp->swqe_polarity);
+ if (qp->uk_attrs->hw_rev == IRDMA_GEN_2)
+ ++addl_frag_cnt;
+ }
+
+ hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) |
+ FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid) |
+ FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt) |
+ FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_atomic_fetch_add - atomic fetch and add operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_atomic_fetch_add(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq)
+{
+ struct irdma_atomic_fetch_add *op_info;
+ u32 total_size = 0;
+ u16 quanta = 2;
+ u32 wqe_idx;
+ __le64 *wqe;
+ u64 hdr;
+
+ op_info = &info->op.atomic_fetch_add;
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, op_info->tagged_offset);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_STAG, op_info->stag));
+ set_64bit_val(wqe, 16, op_info->remote_tagged_offset);
+
+ hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, 1) |
+ FIELD_PREP(IRDMAQPSQ_REMOTE_STAG, op_info->remote_stag) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_ATOMIC_FETCH_ADD) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ set_64bit_val(wqe, 32, op_info->fetch_add_data_bytes);
+ set_64bit_val(wqe, 40, 0);
+ set_64bit_val(wqe, 48, 0);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity));
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_atomic_compare_swap - atomic compare and swap operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_atomic_compare_swap(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq)
+{
+ struct irdma_atomic_compare_swap *op_info;
+ u32 total_size = 0;
+ u16 quanta = 2;
+ u32 wqe_idx;
+ __le64 *wqe;
+ u64 hdr;
+
+ op_info = &info->op.atomic_compare_swap;
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ set_64bit_val(wqe, 0, op_info->tagged_offset);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_STAG, op_info->stag));
+ set_64bit_val(wqe, 16, op_info->remote_tagged_offset);
+
+ hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, 1) |
+ FIELD_PREP(IRDMAQPSQ_REMOTE_STAG, op_info->remote_stag) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_ATOMIC_COMPARE_SWAP_ADD) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ set_64bit_val(wqe, 32, op_info->swap_data_bytes);
+ set_64bit_val(wqe, 40, op_info->compare_data_bytes);
+ set_64bit_val(wqe, 48, 0);
+ set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity));
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_srq_post_receive - post a receive wqe to a shared rq
+ * @srq: shared rq ptr
+ * @info: post rq information
+ */
+int irdma_uk_srq_post_receive(struct irdma_srq_uk *srq,
+ struct irdma_post_rq_info *info)
+{
+ u32 wqe_idx, i, byte_off;
+ u32 addl_frag_cnt;
+ __le64 *wqe;
+ u64 hdr;
+
+ if (srq->max_srq_frag_cnt < info->num_sges)
+ return -EINVAL;
+
+ wqe = irdma_srq_get_next_recv_wqe(srq, &wqe_idx);
+ if (!wqe)
+ return -ENOMEM;
+
+ addl_frag_cnt = info->num_sges > 1 ? info->num_sges - 1 : 0;
+ srq->wqe_ops.iw_set_fragment(wqe, 0, info->sg_list,
+ srq->srwqe_polarity);
+
+ for (i = 1, byte_off = 32; i < info->num_sges; i++) {
+ srq->wqe_ops.iw_set_fragment(wqe, byte_off, &info->sg_list[i],
+ srq->srwqe_polarity);
+ byte_off += 16;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (srq->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(info->num_sges & 0x01) &&
+ info->num_sges) {
+ srq->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ srq->srwqe_polarity);
+ if (srq->uk_attrs->hw_rev == IRDMA_GEN_2)
+ ++addl_frag_cnt;
+ }
+
+ set_64bit_val(wqe, 16, (u64)info->wr_id);
+ hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) |
+ FIELD_PREP(IRDMAQPSQ_VALID, srq->srwqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ set_64bit_val(srq->shadow_area, 0, (wqe_idx + 1) % srq->srq_ring.size);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_rdma_read - rdma read command
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @inv_stag: flag for inv_stag
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool inv_stag, bool post_sq)
+{
+ struct irdma_rdma_read *op_info;
+ int ret_code;
+ u32 i, byte_off, total_size = 0;
+ bool local_fence = false;
+ u32 addl_frag_cnt;
+ __le64 *wqe;
+ u32 wqe_idx;
+ u16 quanta;
+ u64 hdr;
+
+ op_info = &info->op.rdma_read;
+ if (qp->max_sq_frag_cnt < op_info->num_lo_sges)
+ return -EINVAL;
+
+ for (i = 0; i < op_info->num_lo_sges; i++)
+ total_size += op_info->lo_sg_list[i].length;
+
+ ret_code = irdma_fragcnt_to_quanta_sq(op_info->num_lo_sges, &quanta);
+ if (ret_code)
+ return ret_code;
+
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(qp, wqe_idx);
+
+ addl_frag_cnt = op_info->num_lo_sges > 1 ?
+ (op_info->num_lo_sges - 1) : 0;
+ local_fence |= info->local_fence;
+
+ qp->wqe_ops.iw_set_fragment(wqe, 0, op_info->lo_sg_list,
+ qp->swqe_polarity);
+ for (i = 1, byte_off = 32; i < op_info->num_lo_sges; ++i) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off,
+ &op_info->lo_sg_list[i],
+ qp->swqe_polarity);
+ byte_off += 16;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 &&
+ !(op_info->num_lo_sges & 0x01) && op_info->num_lo_sges) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ qp->swqe_polarity);
+ if (qp->uk_attrs->hw_rev == IRDMA_GEN_2)
+ ++addl_frag_cnt;
+ }
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr));
+ hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) |
+ FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) |
+ FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE,
+ (inv_stag ? IRDMAQP_OP_RDMA_READ_LOC_INV : IRDMAQP_OP_RDMA_READ)) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_send - rdma send command
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_post_send *op_info;
+ u64 hdr;
+ u32 i, wqe_idx, total_size = 0, byte_off;
+ int ret_code;
+ u32 frag_cnt, addl_frag_cnt;
+ bool read_fence = false;
+ u16 quanta;
+
+ op_info = &info->op.send;
+ if (qp->max_sq_frag_cnt < op_info->num_sges)
+ return -EINVAL;
+
+ for (i = 0; i < op_info->num_sges; i++)
+ total_size += op_info->sg_list[i].length;
+
+ if (info->imm_data_valid)
+ frag_cnt = op_info->num_sges + 1;
+ else
+ frag_cnt = op_info->num_sges;
+ ret_code = irdma_fragcnt_to_quanta_sq(frag_cnt, &quanta);
+ if (ret_code)
+ return ret_code;
+
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(qp, wqe_idx);
+
+ read_fence |= info->read_fence;
+ addl_frag_cnt = frag_cnt > 1 ? (frag_cnt - 1) : 0;
+ if (info->imm_data_valid) {
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
+ i = 0;
+ } else {
+ qp->wqe_ops.iw_set_fragment(wqe, 0,
+ frag_cnt ? op_info->sg_list : NULL,
+ qp->swqe_polarity);
+ i = 1;
+ }
+
+ for (byte_off = 32; i < op_info->num_sges; i++) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, &op_info->sg_list[i],
+ qp->swqe_polarity);
+ byte_off += 16;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(frag_cnt & 0x01) &&
+ frag_cnt) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ qp->swqe_polarity);
+ if (qp->uk_attrs->hw_rev == IRDMA_GEN_2)
+ ++addl_frag_cnt;
+ }
+
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMAQPSQ_DESTQKEY, op_info->qkey) |
+ FIELD_PREP(IRDMAQPSQ_DESTQPN, op_info->dest_qp));
+ hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) |
+ FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) |
+ FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG,
+ (info->imm_data_valid ? 1 : 0)) |
+ FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) |
+ FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_UDPHEADER, info->udp_hdr) |
+ FIELD_PREP(IRDMAQPSQ_L4LEN, info->l4len) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_set_mw_bind_wqe_gen_1 - set mw bind wqe
+ * @wqe: wqe for setting fragment
+ * @op_info: info for setting bind wqe values
+ */
+static void irdma_set_mw_bind_wqe_gen_1(__le64 *wqe,
+ struct irdma_bind_window *op_info)
+{
+ set_64bit_val(wqe, 0, (uintptr_t)op_info->va);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_PARENTMRSTAG, op_info->mw_stag) |
+ FIELD_PREP(IRDMAQPSQ_MWSTAG, op_info->mr_stag));
+ set_64bit_val(wqe, 16, op_info->bind_len);
+}
+
+/**
+ * irdma_copy_inline_data_gen_1 - Copy inline data to wqe
+ * @wqe: pointer to wqe
+ * @sge_list: table of pointers to inline data
+ * @num_sges: Total inline data length
+ * @polarity: compatibility parameter
+ */
+static void irdma_copy_inline_data_gen_1(u8 *wqe, struct ib_sge *sge_list,
+ u32 num_sges, u8 polarity)
+{
+ u32 quanta_bytes_remaining = 16;
+ int i;
+
+ for (i = 0; i < num_sges; i++) {
+ u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr;
+ u32 sge_len = sge_list[i].length;
+
+ while (sge_len) {
+ u32 bytes_copied;
+
+ bytes_copied = min(sge_len, quanta_bytes_remaining);
+ memcpy(wqe, cur_sge, bytes_copied);
+ wqe += bytes_copied;
+ cur_sge += bytes_copied;
+ quanta_bytes_remaining -= bytes_copied;
+ sge_len -= bytes_copied;
+
+ if (!quanta_bytes_remaining) {
+ /* Remaining inline bytes reside after hdr */
+ wqe += 16;
+ quanta_bytes_remaining = 32;
+ }
+ }
+ }
+}
+
+/**
+ * irdma_inline_data_size_to_quanta_gen_1 - based on inline data, quanta
+ * @data_size: data size for inline
+ *
+ * Gets the quanta based on inline and immediate data.
+ */
+static inline u16 irdma_inline_data_size_to_quanta_gen_1(u32 data_size)
+{
+ return data_size <= 16 ? IRDMA_QP_WQE_MIN_QUANTA : 2;
+}
+
+/**
+ * irdma_set_mw_bind_wqe - set mw bind in wqe
+ * @wqe: wqe for setting mw bind
+ * @op_info: info for setting wqe values
+ */
+static void irdma_set_mw_bind_wqe(__le64 *wqe,
+ struct irdma_bind_window *op_info)
+{
+ set_64bit_val(wqe, 0, (uintptr_t)op_info->va);
+ set_64bit_val(wqe, 8,
+ FIELD_PREP(IRDMAQPSQ_PARENTMRSTAG, op_info->mr_stag) |
+ FIELD_PREP(IRDMAQPSQ_MWSTAG, op_info->mw_stag));
+ set_64bit_val(wqe, 16, op_info->bind_len);
+}
+
+/**
+ * irdma_copy_inline_data - Copy inline data to wqe
+ * @wqe: pointer to wqe
+ * @sge_list: table of pointers to inline data
+ * @num_sges: number of SGE's
+ * @polarity: polarity of wqe valid bit
+ */
+static void irdma_copy_inline_data(u8 *wqe, struct ib_sge *sge_list,
+ u32 num_sges, u8 polarity)
+{
+ u8 inline_valid = polarity << IRDMA_INLINE_VALID_S;
+ u32 quanta_bytes_remaining = 8;
+ bool first_quanta = true;
+ int i;
+
+ wqe += 8;
+
+ for (i = 0; i < num_sges; i++) {
+ u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr;
+ u32 sge_len = sge_list[i].length;
+
+ while (sge_len) {
+ u32 bytes_copied;
+
+ bytes_copied = min(sge_len, quanta_bytes_remaining);
+ memcpy(wqe, cur_sge, bytes_copied);
+ wqe += bytes_copied;
+ cur_sge += bytes_copied;
+ quanta_bytes_remaining -= bytes_copied;
+ sge_len -= bytes_copied;
+
+ if (!quanta_bytes_remaining) {
+ quanta_bytes_remaining = 31;
+
+ /* Remaining inline bytes reside after hdr */
+ if (first_quanta) {
+ first_quanta = false;
+ wqe += 16;
+ } else {
+ *wqe = inline_valid;
+ wqe++;
+ }
+ }
+ }
+ }
+ if (!first_quanta && quanta_bytes_remaining < 31)
+ *(wqe + quanta_bytes_remaining) = inline_valid;
+}
+
+/**
+ * irdma_inline_data_size_to_quanta - based on inline data, quanta
+ * @data_size: data size for inline
+ *
+ * Gets the quanta based on inline and immediate data.
+ */
+static u16 irdma_inline_data_size_to_quanta(u32 data_size)
+{
+ if (data_size <= 8)
+ return IRDMA_QP_WQE_MIN_QUANTA;
+ else if (data_size <= 39)
+ return 2;
+ else if (data_size <= 70)
+ return 3;
+ else if (data_size <= 101)
+ return 4;
+ else if (data_size <= 132)
+ return 5;
+ else if (data_size <= 163)
+ return 6;
+ else if (data_size <= 194)
+ return 7;
+ else
+ return 8;
+}
+
+/**
+ * irdma_uk_inline_rdma_write - inline rdma write operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_rdma_write *op_info;
+ u64 hdr = 0;
+ u32 wqe_idx;
+ bool read_fence = false;
+ u32 i, total_size = 0;
+ u16 quanta;
+
+ op_info = &info->op.rdma_write;
+
+ if (unlikely(qp->max_sq_frag_cnt < op_info->num_lo_sges))
+ return -EINVAL;
+
+ for (i = 0; i < op_info->num_lo_sges; i++)
+ total_size += op_info->lo_sg_list[i].length;
+
+ if (unlikely(total_size > qp->max_inline_data))
+ return -EINVAL;
+
+ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size);
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(qp, wqe_idx);
+
+ read_fence |= info->read_fence;
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMAQPSQ_FRAG_TO, op_info->rem_addr.addr));
+
+ hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) |
+ FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) |
+ FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) |
+ FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) |
+ FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid ? 1 : 0) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ if (info->imm_data_valid)
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
+
+ qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->lo_sg_list,
+ op_info->num_lo_sges,
+ qp->swqe_polarity);
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_inline_send - inline send operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_inline_send(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_post_send *op_info;
+ u64 hdr;
+ u32 wqe_idx;
+ bool read_fence = false;
+ u32 i, total_size = 0;
+ u16 quanta;
+
+ op_info = &info->op.send;
+
+ if (unlikely(qp->max_sq_frag_cnt < op_info->num_sges))
+ return -EINVAL;
+
+ for (i = 0; i < op_info->num_sges; i++)
+ total_size += op_info->sg_list[i].length;
+
+ if (unlikely(total_size > qp->max_inline_data))
+ return -EINVAL;
+
+ quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size);
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
+ info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(qp, wqe_idx);
+
+ set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMAQPSQ_DESTQKEY, op_info->qkey) |
+ FIELD_PREP(IRDMAQPSQ_DESTQPN, op_info->dest_qp));
+
+ read_fence |= info->read_fence;
+ hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) |
+ FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) |
+ FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) |
+ FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) |
+ FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG,
+ (info->imm_data_valid ? 1 : 0)) |
+ FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) |
+ FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_UDPHEADER, info->udp_hdr) |
+ FIELD_PREP(IRDMAQPSQ_L4LEN, info->l4len) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ if (info->imm_data_valid)
+ set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
+ qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->sg_list,
+ op_info->num_sges, qp->swqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_stag_local_invalidate - stag invalidate operation
+ * @qp: hw qp ptr
+ * @info: post sq information
+ * @post_sq: flag to post sq
+ */
+int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq)
+{
+ __le64 *wqe;
+ struct irdma_inv_local_stag *op_info;
+ u64 hdr;
+ u32 wqe_idx;
+ bool local_fence = false;
+ struct ib_sge sge = {};
+
+ op_info = &info->op.inv_local_stag;
+ local_fence = info->local_fence;
+
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA,
+ 0, info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(qp, wqe_idx);
+
+ sge.lkey = op_info->target_stag;
+ qp->wqe_ops.iw_set_fragment(wqe, 0, &sge, 0);
+
+ set_64bit_val(wqe, 16, 0);
+
+ hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMA_OP_TYPE_INV_STAG) |
+ FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
+ FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_post_receive - post receive wqe
+ * @qp: hw qp ptr
+ * @info: post rq information
+ */
+int irdma_uk_post_receive(struct irdma_qp_uk *qp,
+ struct irdma_post_rq_info *info)
+{
+ u32 wqe_idx, i, byte_off;
+ u32 addl_frag_cnt;
+ __le64 *wqe;
+ u64 hdr;
+
+ if (qp->max_rq_frag_cnt < info->num_sges)
+ return -EINVAL;
+
+ wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx);
+ if (!wqe)
+ return -ENOMEM;
+
+ qp->rq_wrid_array[wqe_idx] = info->wr_id;
+ addl_frag_cnt = info->num_sges > 1 ? (info->num_sges - 1) : 0;
+ qp->wqe_ops.iw_set_fragment(wqe, 0, info->sg_list,
+ qp->rwqe_polarity);
+
+ for (i = 1, byte_off = 32; i < info->num_sges; i++) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, &info->sg_list[i],
+ qp->rwqe_polarity);
+ byte_off += 16;
+ }
+
+ /* if not an odd number set valid bit in next fragment */
+ if (qp->uk_attrs->hw_rev >= IRDMA_GEN_2 && !(info->num_sges & 0x01) &&
+ info->num_sges) {
+ qp->wqe_ops.iw_set_fragment(wqe, byte_off, NULL,
+ qp->rwqe_polarity);
+ if (qp->uk_attrs->hw_rev == IRDMA_GEN_2)
+ ++addl_frag_cnt;
+ }
+
+ set_64bit_val(wqe, 16, 0);
+ hdr = FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->rwqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+
+ return 0;
+}
+
+/**
+ * irdma_uk_cq_resize - reset the cq buffer info
+ * @cq: cq to resize
+ * @cq_base: new cq buffer addr
+ * @cq_size: number of cqes
+ */
+void irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int cq_size)
+{
+ cq->cq_base = cq_base;
+ cq->cq_size = cq_size;
+ IRDMA_RING_INIT(cq->cq_ring, cq->cq_size);
+ cq->polarity = 1;
+}
+
+/**
+ * irdma_uk_cq_set_resized_cnt - record the count of the resized buffers
+ * @cq: cq to resize
+ * @cq_cnt: the count of the resized cq buffers
+ */
+void irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *cq, u16 cq_cnt)
+{
+ u64 temp_val;
+ u16 sw_cq_sel;
+ u8 arm_next_se;
+ u8 arm_next;
+ u8 arm_seq_num;
+
+ get_64bit_val(cq->shadow_area, 32, &temp_val);
+
+ sw_cq_sel = (u16)FIELD_GET(IRDMA_CQ_DBSA_SW_CQ_SELECT, temp_val);
+ sw_cq_sel += cq_cnt;
+
+ arm_seq_num = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_SEQ_NUM, temp_val);
+ arm_next_se = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT_SE, temp_val);
+ arm_next = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT, temp_val);
+
+ temp_val = FIELD_PREP(IRDMA_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) |
+ FIELD_PREP(IRDMA_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) |
+ FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT_SE, arm_next_se) |
+ FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT, arm_next);
+
+ set_64bit_val(cq->shadow_area, 32, temp_val);
+}
+
+/**
+ * irdma_uk_cq_request_notification - cq notification request (door bell)
+ * @cq: hw cq
+ * @cq_notify: notification type
+ */
+void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq,
+ enum irdma_cmpl_notify cq_notify)
+{
+ u64 temp_val;
+ u16 sw_cq_sel;
+ u8 arm_next_se = 0;
+ u8 arm_next = 0;
+ u8 arm_seq_num;
+
+ get_64bit_val(cq->shadow_area, 32, &temp_val);
+ arm_seq_num = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_SEQ_NUM, temp_val);
+ arm_seq_num++;
+ sw_cq_sel = (u16)FIELD_GET(IRDMA_CQ_DBSA_SW_CQ_SELECT, temp_val);
+ arm_next_se = (u8)FIELD_GET(IRDMA_CQ_DBSA_ARM_NEXT_SE, temp_val);
+ arm_next_se |= 1;
+ if (cq_notify == IRDMA_CQ_COMPL_EVENT)
+ arm_next = 1;
+ temp_val = FIELD_PREP(IRDMA_CQ_DBSA_ARM_SEQ_NUM, arm_seq_num) |
+ FIELD_PREP(IRDMA_CQ_DBSA_SW_CQ_SELECT, sw_cq_sel) |
+ FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT_SE, arm_next_se) |
+ FIELD_PREP(IRDMA_CQ_DBSA_ARM_NEXT, arm_next);
+
+ set_64bit_val(cq->shadow_area, 32, temp_val);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ writel(cq->cq_id, cq->cqe_alloc_db);
+}
+
+/**
+ * irdma_uk_cq_empty - Check if CQ is empty
+ * @cq: hw cq
+ */
+bool irdma_uk_cq_empty(struct irdma_cq_uk *cq)
+{
+ __le64 *cqe;
+ u8 polarity;
+ u64 qword3;
+
+ if (cq->avoid_mem_cflct)
+ cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(cq);
+ else
+ cqe = IRDMA_GET_CURRENT_CQ_ELEM(cq);
+
+ get_64bit_val(cqe, 24, &qword3);
+ polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3);
+
+ return polarity != cq->polarity;
+}
+
+/**
+ * irdma_uk_cq_poll_cmpl - get cq completion info
+ * @cq: hw cq
+ * @info: cq poll information returned
+ */
+int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
+ struct irdma_cq_poll_info *info)
+{
+ u64 comp_ctx, qword0, qword2, qword3;
+ __le64 *cqe;
+ struct irdma_qp_uk *qp;
+ struct irdma_srq_uk *srq;
+ struct qp_err_code qp_err;
+ u8 is_srq;
+ struct irdma_ring *pring = NULL;
+ u32 wqe_idx;
+ int ret_code;
+ bool move_cq_head = true;
+ u8 polarity;
+ bool ext_valid;
+ __le64 *ext_cqe;
+
+ if (cq->avoid_mem_cflct)
+ cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(cq);
+ else
+ cqe = IRDMA_GET_CURRENT_CQ_ELEM(cq);
+
+ get_64bit_val(cqe, 24, &qword3);
+ polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3);
+ if (polarity != cq->polarity)
+ return -ENOENT;
+
+ /* Ensure CQE contents are read after valid bit is checked */
+ dma_rmb();
+
+ ext_valid = (bool)FIELD_GET(IRDMA_CQ_EXTCQE, qword3);
+ if (ext_valid) {
+ u64 qword6, qword7;
+ u32 peek_head;
+
+ if (cq->avoid_mem_cflct) {
+ ext_cqe = (__le64 *)((u8 *)cqe + 32);
+ get_64bit_val(ext_cqe, 24, &qword7);
+ polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7);
+ } else {
+ peek_head = (cq->cq_ring.head + 1) % cq->cq_ring.size;
+ ext_cqe = cq->cq_base[peek_head].buf;
+ get_64bit_val(ext_cqe, 24, &qword7);
+ polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7);
+ if (!peek_head)
+ polarity ^= 1;
+ }
+ if (polarity != cq->polarity)
+ return -ENOENT;
+
+ /* Ensure ext CQE contents are read after ext valid bit is checked */
+ dma_rmb();
+
+ info->imm_valid = (bool)FIELD_GET(IRDMA_CQ_IMMVALID, qword7);
+ if (info->imm_valid) {
+ u64 qword4;
+
+ get_64bit_val(ext_cqe, 0, &qword4);
+ info->imm_data = (u32)FIELD_GET(IRDMA_CQ_IMMDATALOW32, qword4);
+ }
+ info->ud_smac_valid = (bool)FIELD_GET(IRDMA_CQ_UDSMACVALID, qword7);
+ info->ud_vlan_valid = (bool)FIELD_GET(IRDMA_CQ_UDVLANVALID, qword7);
+ if (info->ud_smac_valid || info->ud_vlan_valid) {
+ get_64bit_val(ext_cqe, 16, &qword6);
+ if (info->ud_vlan_valid)
+ info->ud_vlan = (u16)FIELD_GET(IRDMA_CQ_UDVLAN, qword6);
+ if (info->ud_smac_valid) {
+ info->ud_smac[5] = qword6 & 0xFF;
+ info->ud_smac[4] = (qword6 >> 8) & 0xFF;
+ info->ud_smac[3] = (qword6 >> 16) & 0xFF;
+ info->ud_smac[2] = (qword6 >> 24) & 0xFF;
+ info->ud_smac[1] = (qword6 >> 32) & 0xFF;
+ info->ud_smac[0] = (qword6 >> 40) & 0xFF;
+ }
+ }
+ } else {
+ info->imm_valid = false;
+ info->ud_smac_valid = false;
+ info->ud_vlan_valid = false;
+ }
+
+ info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
+ is_srq = (u8)FIELD_GET(IRDMA_CQ_SRQ, qword3);
+ info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3);
+ info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3);
+ get_64bit_val(cqe, 8, &comp_ctx);
+ if (is_srq)
+ get_64bit_val(cqe, 40, (u64 *)&qp);
+ else
+ qp = (struct irdma_qp_uk *)(unsigned long)comp_ctx;
+ if (info->error) {
+ info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3);
+ info->minor_err = FIELD_GET(IRDMA_CQ_MINERR, qword3);
+ switch (info->major_err) {
+ case IRDMA_SRQFLUSH_RSVD_MAJOR_ERR:
+ qp_err = irdma_ae_to_qp_err_code(info->minor_err);
+ info->minor_err = qp_err.flush_code;
+ fallthrough;
+ case IRDMA_FLUSH_MAJOR_ERR:
+ /* Set the min error to standard flush error code for remaining cqes */
+ if (info->minor_err != FLUSH_GENERAL_ERR) {
+ qword3 &= ~IRDMA_CQ_MINERR;
+ qword3 |= FIELD_PREP(IRDMA_CQ_MINERR, FLUSH_GENERAL_ERR);
+ set_64bit_val(cqe, 24, qword3);
+ }
+ info->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
+ break;
+ default:
+#define IRDMA_CIE_SIGNATURE 0xE
+#define IRDMA_CQMAJERR_HIGH_NIBBLE GENMASK(15, 12)
+ if (info->q_type == IRDMA_CQE_QTYPE_SQ &&
+ qp->qp_type == IRDMA_QP_TYPE_ROCE_UD &&
+ FIELD_GET(IRDMA_CQMAJERR_HIGH_NIBBLE, info->major_err)
+ == IRDMA_CIE_SIGNATURE) {
+ info->error = 0;
+ info->major_err = 0;
+ info->minor_err = 0;
+ info->comp_status = IRDMA_COMPL_STATUS_SUCCESS;
+ } else {
+ info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN;
+ }
+ break;
+ }
+ } else {
+ info->comp_status = IRDMA_COMPL_STATUS_SUCCESS;
+ }
+
+ get_64bit_val(cqe, 0, &qword0);
+ get_64bit_val(cqe, 16, &qword2);
+
+ info->qp_id = (u32)FIELD_GET(IRDMACQ_QPID, qword2);
+ info->ud_src_qpn = (u32)FIELD_GET(IRDMACQ_UDSRCQPN, qword2);
+
+ get_64bit_val(cqe, 8, &comp_ctx);
+
+ info->solicited_event = (bool)FIELD_GET(IRDMACQ_SOEVENT, qword3);
+ qp = (struct irdma_qp_uk *)(unsigned long)comp_ctx;
+ if (!qp || qp->destroy_pending) {
+ ret_code = -EFAULT;
+ goto exit;
+ }
+ wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3);
+ info->qp_handle = (irdma_qp_handle)(unsigned long)qp;
+ info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3);
+
+ if (info->q_type == IRDMA_CQE_QTYPE_RQ && is_srq) {
+ unsigned long flags;
+
+ srq = qp->srq_uk;
+
+ get_64bit_val(cqe, 8, &info->wr_id);
+ info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0);
+
+ if (qword3 & IRDMACQ_STAG) {
+ info->stag_invalid_set = true;
+ info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG,
+ qword2);
+ } else {
+ info->stag_invalid_set = false;
+ }
+ spin_lock_irqsave(srq->lock, flags);
+ IRDMA_RING_MOVE_TAIL(srq->srq_ring);
+ spin_unlock_irqrestore(srq->lock, flags);
+ pring = &srq->srq_ring;
+
+ } else if (info->q_type == IRDMA_CQE_QTYPE_RQ && !is_srq) {
+ u32 array_idx;
+
+ array_idx = wqe_idx / qp->rq_wqe_size_multiplier;
+
+ if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED ||
+ info->comp_status == IRDMA_COMPL_STATUS_UNKNOWN) {
+ if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) {
+ ret_code = -ENOENT;
+ goto exit;
+ }
+
+ info->wr_id = qp->rq_wrid_array[qp->rq_ring.tail];
+ array_idx = qp->rq_ring.tail;
+ } else {
+ info->wr_id = qp->rq_wrid_array[array_idx];
+ }
+
+ info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0);
+
+ if (qword3 & IRDMACQ_STAG) {
+ info->stag_invalid_set = true;
+ info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG, qword2);
+ } else {
+ info->stag_invalid_set = false;
+ }
+ IRDMA_RING_SET_TAIL(qp->rq_ring, array_idx + 1);
+ if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) {
+ qp->rq_flush_seen = true;
+ if (!IRDMA_RING_MORE_WORK(qp->rq_ring))
+ qp->rq_flush_complete = true;
+ else
+ move_cq_head = false;
+ }
+ pring = &qp->rq_ring;
+ } else { /* q_type is IRDMA_CQE_QTYPE_SQ */
+ if (qp->first_sq_wq) {
+ if (wqe_idx + 1 >= qp->conn_wqes)
+ qp->first_sq_wq = false;
+
+ if (wqe_idx < qp->conn_wqes && qp->sq_ring.head == qp->sq_ring.tail) {
+ IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
+ IRDMA_RING_MOVE_TAIL(cq->cq_ring);
+ set_64bit_val(cq->shadow_area, 0,
+ IRDMA_RING_CURRENT_HEAD(cq->cq_ring));
+ memset(info, 0,
+ sizeof(struct irdma_cq_poll_info));
+ return irdma_uk_cq_poll_cmpl(cq, info);
+ }
+ }
+ if (info->comp_status != IRDMA_COMPL_STATUS_FLUSHED) {
+ info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
+ if (!info->comp_status)
+ info->bytes_xfered = qp->sq_wrtrk_array[wqe_idx].wr_len;
+ if (!qp->sq_wrtrk_array[wqe_idx].signaled) {
+ ret_code = -EFAULT;
+ goto exit;
+ }
+ info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3);
+ IRDMA_RING_SET_TAIL(qp->sq_ring,
+ wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta);
+ } else {
+ if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) {
+ ret_code = -ENOENT;
+ goto exit;
+ }
+
+ do {
+ __le64 *sw_wqe;
+ u64 wqe_qword;
+ u32 tail;
+
+ tail = qp->sq_ring.tail;
+ sw_wqe = qp->sq_base[tail].elem;
+ get_64bit_val(sw_wqe, 24,
+ &wqe_qword);
+ info->op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE,
+ wqe_qword);
+ IRDMA_RING_SET_TAIL(qp->sq_ring,
+ tail + qp->sq_wrtrk_array[tail].quanta);
+ if (info->op_type != IRDMAQP_OP_NOP) {
+ info->wr_id = qp->sq_wrtrk_array[tail].wrid;
+ info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len;
+ break;
+ }
+ } while (1);
+ if (info->op_type == IRDMA_OP_TYPE_BIND_MW &&
+ info->minor_err == FLUSH_PROT_ERR)
+ info->minor_err = FLUSH_MW_BIND_ERR;
+ qp->sq_flush_seen = true;
+ if (!IRDMA_RING_MORE_WORK(qp->sq_ring))
+ qp->sq_flush_complete = true;
+ }
+ pring = &qp->sq_ring;
+ }
+
+ ret_code = 0;
+
+exit:
+ if (!ret_code && info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) {
+ if (pring && IRDMA_RING_MORE_WORK(*pring))
+ /* Park CQ head during a flush to generate additional CQEs
+ * from SW for all unprocessed WQEs. For GEN3 and beyond
+ * FW will generate/flush these CQEs so move to the next CQE
+ */
+ move_cq_head = qp->uk_attrs->hw_rev <= IRDMA_GEN_2 ?
+ false : true;
+ }
+
+ if (move_cq_head) {
+ IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
+ if (!IRDMA_RING_CURRENT_HEAD(cq->cq_ring))
+ cq->polarity ^= 1;
+
+ if (ext_valid && !cq->avoid_mem_cflct) {
+ IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);
+ if (!IRDMA_RING_CURRENT_HEAD(cq->cq_ring))
+ cq->polarity ^= 1;
+ }
+
+ IRDMA_RING_MOVE_TAIL(cq->cq_ring);
+ if (!cq->avoid_mem_cflct && ext_valid)
+ IRDMA_RING_MOVE_TAIL(cq->cq_ring);
+ if (IRDMA_RING_CURRENT_HEAD(cq->cq_ring) & 0x3F || irdma_uk_cq_empty(cq))
+ set_64bit_val(cq->shadow_area, 0,
+ IRDMA_RING_CURRENT_HEAD(cq->cq_ring));
+ } else {
+ qword3 &= ~IRDMA_CQ_WQEIDX;
+ qword3 |= FIELD_PREP(IRDMA_CQ_WQEIDX, pring->tail);
+ set_64bit_val(cqe, 24, qword3);
+ }
+
+ return ret_code;
+}
+
+/**
+ * irdma_round_up_wq - return round up qp wq depth
+ * @wqdepth: wq depth in quanta to round up
+ */
+static int irdma_round_up_wq(u32 wqdepth)
+{
+ int scount = 1;
+
+ for (wqdepth--; scount <= 16; scount *= 2)
+ wqdepth |= wqdepth >> scount;
+
+ return ++wqdepth;
+}
+
+/**
+ * irdma_get_wqe_shift - get shift count for maximum wqe size
+ * @uk_attrs: qp HW attributes
+ * @sge: Maximum Scatter Gather Elements wqe
+ * @inline_data: Maximum inline data size
+ * @shift: Returns the shift needed based on sge
+ *
+ * Shift can be used to left shift the wqe size based on number of SGEs and inlind data size.
+ * For 1 SGE or inline data <= 8, shift = 0 (wqe size of 32
+ * bytes). For 2 or 3 SGEs or inline data <= 39, shift = 1 (wqe
+ * size of 64 bytes).
+ * For 4-7 SGE's and inline <= 101 Shift of 2 otherwise (wqe
+ * size of 256 bytes).
+ */
+void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge,
+ u32 inline_data, u8 *shift)
+{
+ *shift = 0;
+ if (uk_attrs->hw_rev >= IRDMA_GEN_2) {
+ if (sge > 1 || inline_data > 8) {
+ if (sge < 4 && inline_data <= 39)
+ *shift = 1;
+ else if (sge < 8 && inline_data <= 101)
+ *shift = 2;
+ else
+ *shift = 3;
+ }
+ } else if (sge > 1 || inline_data > 16) {
+ *shift = (sge < 4 && inline_data <= 48) ? 1 : 2;
+ }
+}
+
+/*
+ * irdma_get_sqdepth - get SQ depth (quanta)
+ * @uk_attrs: qp HW attributes
+ * @sq_size: SQ size
+ * @shift: shift which determines size of WQE
+ * @sqdepth: depth of SQ
+ *
+ */
+int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift,
+ u32 *sqdepth)
+{
+ u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift;
+
+ *sqdepth = irdma_round_up_wq((sq_size << shift) + IRDMA_SQ_RSVD);
+
+ if (*sqdepth < min_size)
+ *sqdepth = min_size;
+ else if (*sqdepth > uk_attrs->max_hw_wq_quanta)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * irdma_get_rqdepth - get RQ depth (quanta)
+ * @uk_attrs: qp HW attributes
+ * @rq_size: RQ size
+ * @shift: shift which determines size of WQE
+ * @rqdepth: depth of RQ
+ */
+int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift,
+ u32 *rqdepth)
+{
+ u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift;
+
+ *rqdepth = irdma_round_up_wq((rq_size << shift) + IRDMA_RQ_RSVD);
+
+ if (*rqdepth < min_size)
+ *rqdepth = min_size;
+ else if (*rqdepth > uk_attrs->max_hw_rq_quanta)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * irdma_get_srqdepth - get SRQ depth (quanta)
+ * @uk_attrs: qp HW attributes
+ * @srq_size: SRQ size
+ * @shift: shift which determines size of WQE
+ * @srqdepth: depth of SRQ
+ */
+int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift,
+ u32 *srqdepth)
+{
+ *srqdepth = irdma_round_up_wq((srq_size << shift) + IRDMA_RQ_RSVD);
+
+ if (*srqdepth < ((u32)uk_attrs->min_hw_wq_size << shift))
+ *srqdepth = uk_attrs->min_hw_wq_size << shift;
+ else if (*srqdepth > uk_attrs->max_hw_srq_quanta)
+ return -EINVAL;
+
+ return 0;
+}
+
+static const struct irdma_wqe_uk_ops iw_wqe_uk_ops = {
+ .iw_copy_inline_data = irdma_copy_inline_data,
+ .iw_inline_data_size_to_quanta = irdma_inline_data_size_to_quanta,
+ .iw_set_fragment = irdma_set_fragment,
+ .iw_set_mw_bind_wqe = irdma_set_mw_bind_wqe,
+};
+
+static const struct irdma_wqe_uk_ops iw_wqe_uk_ops_gen_1 = {
+ .iw_copy_inline_data = irdma_copy_inline_data_gen_1,
+ .iw_inline_data_size_to_quanta = irdma_inline_data_size_to_quanta_gen_1,
+ .iw_set_fragment = irdma_set_fragment_gen_1,
+ .iw_set_mw_bind_wqe = irdma_set_mw_bind_wqe_gen_1,
+};
+
+/**
+ * irdma_setup_connection_wqes - setup WQEs necessary to complete
+ * connection.
+ * @qp: hw qp (user and kernel)
+ * @info: qp initialization info
+ */
+static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp,
+ struct irdma_qp_uk_init_info *info)
+{
+ u16 move_cnt = 1;
+
+ if (!info->legacy_mode &&
+ (qp->uk_attrs->feature_flags & IRDMA_FEATURE_RTS_AE))
+ move_cnt = 3;
+
+ qp->conn_wqes = move_cnt;
+ IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(qp->sq_ring, move_cnt);
+ IRDMA_RING_MOVE_TAIL_BY_COUNT(qp->sq_ring, move_cnt);
+}
+
+/**
+ * irdma_uk_srq_init - initialize shared qp
+ * @srq: hw srq (user and kernel)
+ * @info: srq initialization info
+ *
+ * Initializes the vars used in both user and kernel mode.
+ * The size of the wqe depends on number of max fragments
+ * allowed. Then size of wqe * the number of wqes should be the
+ * amount of memory allocated for srq.
+ */
+int irdma_uk_srq_init(struct irdma_srq_uk *srq,
+ struct irdma_srq_uk_init_info *info)
+{
+ u8 rqshift;
+
+ srq->uk_attrs = info->uk_attrs;
+ if (info->max_srq_frag_cnt > srq->uk_attrs->max_hw_wq_frags)
+ return -EINVAL;
+
+ irdma_get_wqe_shift(srq->uk_attrs, info->max_srq_frag_cnt, 0, &rqshift);
+ srq->srq_caps = info->srq_caps;
+ srq->srq_base = info->srq;
+ srq->shadow_area = info->shadow_area;
+ srq->srq_id = info->srq_id;
+ srq->srwqe_polarity = 0;
+ srq->srq_size = info->srq_size;
+ srq->wqe_size = rqshift;
+ srq->max_srq_frag_cnt = min(srq->uk_attrs->max_hw_wq_frags,
+ ((u32)2 << rqshift) - 1);
+ IRDMA_RING_INIT(srq->srq_ring, srq->srq_size);
+ srq->wqe_size_multiplier = 1 << rqshift;
+ srq->wqe_ops = iw_wqe_uk_ops;
+
+ return 0;
+}
+
+/**
+ * irdma_uk_calc_shift_wq - calculate WQE shift for both SQ and RQ
+ * @ukinfo: qp initialization info
+ * @sq_shift: Returns shift of SQ
+ * @rq_shift: Returns shift of RQ
+ */
+void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift,
+ u8 *rq_shift)
+{
+ bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2;
+
+ irdma_get_wqe_shift(ukinfo->uk_attrs,
+ imm_support ? ukinfo->max_sq_frag_cnt + 1 :
+ ukinfo->max_sq_frag_cnt,
+ ukinfo->max_inline_data, sq_shift);
+
+ irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0,
+ rq_shift);
+
+ if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) {
+ if (ukinfo->abi_ver > 4)
+ *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1;
+ }
+}
+
+/**
+ * irdma_uk_calc_depth_shift_sq - calculate depth and shift for SQ size.
+ * @ukinfo: qp initialization info
+ * @sq_depth: Returns depth of SQ
+ * @sq_shift: Returns shift of SQ
+ */
+int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo,
+ u32 *sq_depth, u8 *sq_shift)
+{
+ bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2;
+ int status;
+
+ irdma_get_wqe_shift(ukinfo->uk_attrs,
+ imm_support ? ukinfo->max_sq_frag_cnt + 1 :
+ ukinfo->max_sq_frag_cnt,
+ ukinfo->max_inline_data, sq_shift);
+ status = irdma_get_sqdepth(ukinfo->uk_attrs, ukinfo->sq_size,
+ *sq_shift, sq_depth);
+
+ return status;
+}
+
+/**
+ * irdma_uk_calc_depth_shift_rq - calculate depth and shift for RQ size.
+ * @ukinfo: qp initialization info
+ * @rq_depth: Returns depth of RQ
+ * @rq_shift: Returns shift of RQ
+ */
+int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo,
+ u32 *rq_depth, u8 *rq_shift)
+{
+ int status;
+
+ irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0,
+ rq_shift);
+
+ if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) {
+ if (ukinfo->abi_ver > 4)
+ *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1;
+ }
+
+ status = irdma_get_rqdepth(ukinfo->uk_attrs, ukinfo->rq_size,
+ *rq_shift, rq_depth);
+
+ return status;
+}
+
+/**
+ * irdma_uk_qp_init - initialize shared qp
+ * @qp: hw qp (user and kernel)
+ * @info: qp initialization info
+ *
+ * initializes the vars used in both user and kernel mode.
+ * size of the wqe depends on numbers of max. fragements
+ * allowed. Then size of wqe * the number of wqes should be the
+ * amount of memory allocated for sq and rq.
+ */
+int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info)
+{
+ int ret_code = 0;
+ u32 sq_ring_size;
+
+ qp->uk_attrs = info->uk_attrs;
+ if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags ||
+ info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags)
+ return -EINVAL;
+
+ qp->qp_caps = info->qp_caps;
+ qp->sq_base = info->sq;
+ qp->rq_base = info->rq;
+ qp->qp_type = info->type ? info->type : IRDMA_QP_TYPE_IWARP;
+ qp->shadow_area = info->shadow_area;
+ qp->sq_wrtrk_array = info->sq_wrtrk_array;
+
+ qp->rq_wrid_array = info->rq_wrid_array;
+ qp->wqe_alloc_db = info->wqe_alloc_db;
+ qp->qp_id = info->qp_id;
+ qp->sq_size = info->sq_size;
+ qp->max_sq_frag_cnt = info->max_sq_frag_cnt;
+ sq_ring_size = qp->sq_size << info->sq_shift;
+ IRDMA_RING_INIT(qp->sq_ring, sq_ring_size);
+ if (info->first_sq_wq) {
+ irdma_setup_connection_wqes(qp, info);
+ qp->swqe_polarity = 1;
+ qp->first_sq_wq = true;
+ } else {
+ qp->swqe_polarity = 0;
+ }
+ qp->swqe_polarity_deferred = 1;
+ qp->rwqe_polarity = 0;
+ qp->rq_size = info->rq_size;
+ qp->max_rq_frag_cnt = info->max_rq_frag_cnt;
+ qp->max_inline_data = info->max_inline_data;
+ qp->rq_wqe_size = info->rq_shift;
+ IRDMA_RING_INIT(qp->rq_ring, qp->rq_size);
+ qp->rq_wqe_size_multiplier = 1 << info->rq_shift;
+ if (qp->uk_attrs->hw_rev == IRDMA_GEN_1)
+ qp->wqe_ops = iw_wqe_uk_ops_gen_1;
+ else
+ qp->wqe_ops = iw_wqe_uk_ops;
+ qp->srq_uk = info->srq_uk;
+ return ret_code;
+}
+
+/**
+ * irdma_uk_cq_init - initialize shared cq (user and kernel)
+ * @cq: hw cq
+ * @info: hw cq initialization info
+ */
+void irdma_uk_cq_init(struct irdma_cq_uk *cq,
+ struct irdma_cq_uk_init_info *info)
+{
+ cq->cq_base = info->cq_base;
+ cq->cq_id = info->cq_id;
+ cq->cq_size = info->cq_size;
+ cq->cqe_alloc_db = info->cqe_alloc_db;
+ cq->cq_ack_db = info->cq_ack_db;
+ cq->shadow_area = info->shadow_area;
+ cq->avoid_mem_cflct = info->avoid_mem_cflct;
+ IRDMA_RING_INIT(cq->cq_ring, cq->cq_size);
+ cq->polarity = 1;
+}
+
+/**
+ * irdma_uk_clean_cq - clean cq entries
+ * @q: completion context
+ * @cq: cq to clean
+ */
+void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq)
+{
+ __le64 *cqe;
+ u64 qword3, comp_ctx;
+ u32 cq_head;
+ u8 polarity, temp;
+
+ cq_head = cq->cq_ring.head;
+ temp = cq->polarity;
+ do {
+ if (cq->avoid_mem_cflct)
+ cqe = ((struct irdma_extended_cqe *)(cq->cq_base))[cq_head].buf;
+ else
+ cqe = cq->cq_base[cq_head].buf;
+ get_64bit_val(cqe, 24, &qword3);
+ polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3);
+
+ if (polarity != temp)
+ break;
+
+ /* Ensure CQE contents are read after valid bit is checked */
+ dma_rmb();
+
+ get_64bit_val(cqe, 8, &comp_ctx);
+ if ((void *)(unsigned long)comp_ctx == q)
+ set_64bit_val(cqe, 8, 0);
+
+ cq_head = (cq_head + 1) % cq->cq_ring.size;
+ if (!cq_head)
+ temp ^= 1;
+ } while (true);
+}
+
+/**
+ * irdma_nop - post a nop
+ * @qp: hw qp ptr
+ * @wr_id: work request id
+ * @signaled: signaled for completion
+ * @post_sq: ring doorbell
+ */
+int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq)
+{
+ __le64 *wqe;
+ u64 hdr;
+ u32 wqe_idx;
+ struct irdma_post_sq_info info = {};
+
+ info.wr_id = wr_id;
+ wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA,
+ 0, &info);
+ if (!wqe)
+ return -ENOMEM;
+
+ irdma_clr_wqes(qp, wqe_idx);
+
+ set_64bit_val(wqe, 0, 0);
+ set_64bit_val(wqe, 8, 0);
+ set_64bit_val(wqe, 16, 0);
+
+ hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMAQP_OP_NOP) |
+ FIELD_PREP(IRDMAQPSQ_SIGCOMPL, signaled) |
+ FIELD_PREP(IRDMAQPSQ_VALID, qp->swqe_polarity);
+
+ dma_wmb(); /* make sure WQE is populated before valid bit is set */
+
+ set_64bit_val(wqe, 24, hdr);
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
+
+ return 0;
+}
+
+/**
+ * irdma_fragcnt_to_quanta_sq - calculate quanta based on fragment count for SQ
+ * @frag_cnt: number of fragments
+ * @quanta: quanta for frag_cnt
+ */
+int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta)
+{
+ switch (frag_cnt) {
+ case 0:
+ case 1:
+ *quanta = IRDMA_QP_WQE_MIN_QUANTA;
+ break;
+ case 2:
+ case 3:
+ *quanta = 2;
+ break;
+ case 4:
+ case 5:
+ *quanta = 3;
+ break;
+ case 6:
+ case 7:
+ *quanta = 4;
+ break;
+ case 8:
+ case 9:
+ *quanta = 5;
+ break;
+ case 10:
+ case 11:
+ *quanta = 6;
+ break;
+ case 12:
+ case 13:
+ *quanta = 7;
+ break;
+ case 14:
+ case 15: /* when immediate data is present */
+ *quanta = 8;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_fragcnt_to_wqesize_rq - calculate wqe size based on fragment count for RQ
+ * @frag_cnt: number of fragments
+ * @wqe_size: size in bytes given frag_cnt
+ */
+int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size)
+{
+ switch (frag_cnt) {
+ case 0:
+ case 1:
+ *wqe_size = 32;
+ break;
+ case 2:
+ case 3:
+ *wqe_size = 64;
+ break;
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ *wqe_size = 128;
+ break;
+ case 8:
+ case 9:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ *wqe_size = 256;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h
new file mode 100644
index 000000000000..9eb7fd0b1cbf
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/user.h
@@ -0,0 +1,676 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2020 Intel Corporation */
+#ifndef IRDMA_USER_H
+#define IRDMA_USER_H
+
+#define irdma_handle void *
+#define irdma_adapter_handle irdma_handle
+#define irdma_qp_handle irdma_handle
+#define irdma_cq_handle irdma_handle
+#define irdma_pd_id irdma_handle
+#define irdma_stag_handle irdma_handle
+#define irdma_stag_index u32
+#define irdma_stag u32
+#define irdma_stag_key u8
+#define irdma_tagged_offset u64
+#define irdma_access_privileges u32
+#define irdma_physical_fragment u64
+#define irdma_address_list u64 *
+
+#define IRDMA_MAX_MR_SIZE 0x200000000000ULL
+
+#define IRDMA_ACCESS_FLAGS_LOCALREAD 0x01
+#define IRDMA_ACCESS_FLAGS_LOCALWRITE 0x02
+#define IRDMA_ACCESS_FLAGS_REMOTEREAD_ONLY 0x04
+#define IRDMA_ACCESS_FLAGS_REMOTEREAD 0x05
+#define IRDMA_ACCESS_FLAGS_REMOTEWRITE_ONLY 0x08
+#define IRDMA_ACCESS_FLAGS_REMOTEWRITE 0x0a
+#define IRDMA_ACCESS_FLAGS_BIND_WINDOW 0x10
+#define IRDMA_ACCESS_FLAGS_ZERO_BASED 0x20
+#define IRDMA_ACCESS_FLAGS_ALL 0x3f
+
+#define IRDMA_OP_TYPE_RDMA_WRITE 0x00
+#define IRDMA_OP_TYPE_RDMA_READ 0x01
+#define IRDMA_OP_TYPE_SEND 0x03
+#define IRDMA_OP_TYPE_SEND_INV 0x04
+#define IRDMA_OP_TYPE_SEND_SOL 0x05
+#define IRDMA_OP_TYPE_SEND_SOL_INV 0x06
+#define IRDMA_OP_TYPE_RDMA_WRITE_SOL 0x0d
+#define IRDMA_OP_TYPE_BIND_MW 0x08
+#define IRDMA_OP_TYPE_FAST_REG_NSMR 0x09
+#define IRDMA_OP_TYPE_INV_STAG 0x0a
+#define IRDMA_OP_TYPE_RDMA_READ_INV_STAG 0x0b
+#define IRDMA_OP_TYPE_NOP 0x0c
+#define IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD 0x0f
+#define IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP 0x11
+#define IRDMA_OP_TYPE_REC 0x3e
+#define IRDMA_OP_TYPE_REC_IMM 0x3f
+
+#define IRDMA_FLUSH_MAJOR_ERR 1
+#define IRDMA_SRQFLUSH_RSVD_MAJOR_ERR 0xfffe
+
+/* Async Events codes */
+#define IRDMA_AE_AMP_UNALLOCATED_STAG 0x0102
+#define IRDMA_AE_AMP_INVALID_STAG 0x0103
+#define IRDMA_AE_AMP_BAD_QP 0x0104
+#define IRDMA_AE_AMP_BAD_PD 0x0105
+#define IRDMA_AE_AMP_BAD_STAG_KEY 0x0106
+#define IRDMA_AE_AMP_BAD_STAG_INDEX 0x0107
+#define IRDMA_AE_AMP_BOUNDS_VIOLATION 0x0108
+#define IRDMA_AE_AMP_RIGHTS_VIOLATION 0x0109
+#define IRDMA_AE_AMP_TO_WRAP 0x010a
+#define IRDMA_AE_AMP_FASTREG_VALID_STAG 0x010c
+#define IRDMA_AE_AMP_FASTREG_MW_STAG 0x010d
+#define IRDMA_AE_AMP_FASTREG_INVALID_RIGHTS 0x010e
+#define IRDMA_AE_AMP_FASTREG_INVALID_LENGTH 0x0110
+#define IRDMA_AE_AMP_INVALIDATE_SHARED 0x0111
+#define IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS 0x0112
+#define IRDMA_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS 0x0113
+#define IRDMA_AE_AMP_MWBIND_VALID_STAG 0x0114
+#define IRDMA_AE_AMP_MWBIND_OF_MR_STAG 0x0115
+#define IRDMA_AE_AMP_MWBIND_TO_ZERO_BASED_STAG 0x0116
+#define IRDMA_AE_AMP_MWBIND_TO_MW_STAG 0x0117
+#define IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS 0x0118
+#define IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS 0x0119
+#define IRDMA_AE_AMP_MWBIND_TO_INVALID_PARENT 0x011a
+#define IRDMA_AE_AMP_MWBIND_BIND_DISABLED 0x011b
+#define IRDMA_AE_PRIV_OPERATION_DENIED 0x011c
+#define IRDMA_AE_AMP_INVALIDATE_TYPE1_MW 0x011d
+#define IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW 0x011e
+#define IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG 0x011f
+#define IRDMA_AE_AMP_MWBIND_WRONG_TYPE 0x0120
+#define IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH 0x0121
+#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG 0x0132
+#define IRDMA_AE_UDA_XMIT_BAD_PD 0x0133
+#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT 0x0134
+#define IRDMA_AE_UDA_L4LEN_INVALID 0x0135
+#define IRDMA_AE_BAD_CLOSE 0x0201
+#define IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE 0x0202
+#define IRDMA_AE_CQ_OPERATION_ERROR 0x0203
+#define IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO 0x0205
+#define IRDMA_AE_STAG_ZERO_INVALID 0x0206
+#define IRDMA_AE_IB_RREQ_AND_Q1_FULL 0x0207
+#define IRDMA_AE_IB_INVALID_REQUEST 0x0208
+#define IRDMA_AE_SRQ_LIMIT 0x0209
+#define IRDMA_AE_WQE_UNEXPECTED_OPCODE 0x020a
+#define IRDMA_AE_WQE_INVALID_PARAMETER 0x020b
+#define IRDMA_AE_WQE_INVALID_FRAG_DATA 0x020c
+#define IRDMA_AE_IB_REMOTE_ACCESS_ERROR 0x020d
+#define IRDMA_AE_IB_REMOTE_OP_ERROR 0x020e
+#define IRDMA_AE_SRQ_CATASTROPHIC_ERROR 0x020f
+#define IRDMA_AE_WQE_LSMM_TOO_LONG 0x0220
+#define IRDMA_AE_ATOMIC_ALIGNMENT 0x0221
+#define IRDMA_AE_ATOMIC_MASK 0x0222
+#define IRDMA_AE_INVALID_REQUEST 0x0223
+#define IRDMA_AE_PCIE_ATOMIC_DISABLE 0x0224
+#define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301
+#define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303
+#define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304
+#define IRDMA_AE_DDP_UBE_INVALID_MO 0x0305
+#define IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE 0x0306
+#define IRDMA_AE_DDP_UBE_INVALID_QN 0x0307
+#define IRDMA_AE_DDP_NO_L_BIT 0x0308
+#define IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION 0x0311
+#define IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE 0x0312
+#define IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST 0x0313
+#define IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP 0x0314
+#define IRDMA_AE_ROCE_RSP_LENGTH_ERROR 0x0316
+#define IRDMA_AE_ROCE_EMPTY_MCG 0x0380
+#define IRDMA_AE_ROCE_BAD_MC_IP_ADDR 0x0381
+#define IRDMA_AE_ROCE_BAD_MC_QPID 0x0382
+#define IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH 0x0383
+#define IRDMA_AE_INVALID_ARP_ENTRY 0x0401
+#define IRDMA_AE_INVALID_TCP_OPTION_RCVD 0x0402
+#define IRDMA_AE_STALE_ARP_ENTRY 0x0403
+#define IRDMA_AE_INVALID_AH_ENTRY 0x0406
+#define IRDMA_AE_LLP_CLOSE_COMPLETE 0x0501
+#define IRDMA_AE_LLP_CONNECTION_RESET 0x0502
+#define IRDMA_AE_LLP_FIN_RECEIVED 0x0503
+#define IRDMA_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH 0x0504
+#define IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR 0x0505
+#define IRDMA_AE_LLP_SEGMENT_TOO_SMALL 0x0507
+#define IRDMA_AE_LLP_SYN_RECEIVED 0x0508
+#define IRDMA_AE_LLP_TERMINATE_RECEIVED 0x0509
+#define IRDMA_AE_LLP_TOO_MANY_RETRIES 0x050a
+#define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b
+#define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c
+#define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e
+#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f
+#define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520
+#define IRDMA_AE_RESET_SENT 0x0601
+#define IRDMA_AE_TERMINATE_SENT 0x0602
+#define IRDMA_AE_RESET_NOT_SENT 0x0603
+#define IRDMA_AE_LCE_QP_CATASTROPHIC 0x0700
+#define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC 0x0701
+#define IRDMA_AE_LCE_CQ_CATASTROPHIC 0x0702
+#define IRDMA_AE_REMOTE_QP_CATASTROPHIC 0x0703
+#define IRDMA_AE_LOCAL_QP_CATASTROPHIC 0x0704
+#define IRDMA_AE_RCE_QP_CATASTROPHIC 0x0705
+#define IRDMA_AE_QP_SUSPEND_COMPLETE 0x0900
+#define IRDMA_AE_CQP_DEFERRED_COMPLETE 0x0901
+#define IRDMA_AE_ADAPTER_CATASTROPHIC 0x0B0B
+
+enum irdma_device_caps_const {
+ IRDMA_WQE_SIZE = 4,
+ IRDMA_CQP_WQE_SIZE = 8,
+ IRDMA_CQE_SIZE = 4,
+ IRDMA_EXTENDED_CQE_SIZE = 8,
+ IRDMA_AEQE_SIZE = 2,
+ IRDMA_CEQE_SIZE = 1,
+ IRDMA_CQP_CTX_SIZE = 8,
+ IRDMA_SHADOW_AREA_SIZE = 8,
+ IRDMA_QUERY_FPM_BUF_SIZE = 192,
+ IRDMA_COMMIT_FPM_BUF_SIZE = 192,
+ IRDMA_GATHER_STATS_BUF_SIZE = 1024,
+ IRDMA_MIN_IW_QP_ID = 0,
+ IRDMA_MAX_IW_QP_ID = 262143,
+ IRDMA_MIN_IW_SRQ_ID = 0,
+ IRDMA_MIN_CEQID = 0,
+ IRDMA_MAX_CEQID = 1023,
+ IRDMA_CEQ_MAX_COUNT = IRDMA_MAX_CEQID + 1,
+ IRDMA_MIN_CQID = 0,
+ IRDMA_MAX_CQID = 524287,
+ IRDMA_MIN_AEQ_ENTRIES = 1,
+ IRDMA_MAX_AEQ_ENTRIES = 524287,
+ IRDMA_MAX_AEQ_ENTRIES_GEN_3 = 262144,
+ IRDMA_MIN_CEQ_ENTRIES = 1,
+ IRDMA_MAX_CEQ_ENTRIES = 262143,
+ IRDMA_MIN_CQ_SIZE = 1,
+ IRDMA_MAX_CQ_SIZE = 1048575,
+ IRDMA_DB_ID_ZERO = 0,
+ IRDMA_MAX_WQ_FRAGMENT_COUNT = 13,
+ IRDMA_MAX_SGE_RD = 13,
+ IRDMA_MAX_OUTBOUND_MSG_SIZE = 2147483647,
+ IRDMA_MAX_INBOUND_MSG_SIZE = 2147483647,
+ IRDMA_MAX_PUSH_PAGE_COUNT = 1024,
+ IRDMA_MAX_PE_ENA_VF_COUNT = 32,
+ IRDMA_MAX_VF_FPM_ID = 47,
+ IRDMA_MAX_SQ_PAYLOAD_SIZE = 2145386496,
+ IRDMA_MAX_INLINE_DATA_SIZE = 101,
+ IRDMA_MAX_WQ_ENTRIES = 32768,
+ IRDMA_Q2_BUF_SIZE = 256,
+ IRDMA_QP_CTX_SIZE = 256,
+ IRDMA_MAX_PDS = 262144,
+ IRDMA_MIN_WQ_SIZE_GEN2 = 8,
+};
+
+enum irdma_addressing_type {
+ IRDMA_ADDR_TYPE_ZERO_BASED = 0,
+ IRDMA_ADDR_TYPE_VA_BASED = 1,
+};
+
+enum irdma_flush_opcode {
+ FLUSH_INVALID = 0,
+ FLUSH_GENERAL_ERR,
+ FLUSH_PROT_ERR,
+ FLUSH_REM_ACCESS_ERR,
+ FLUSH_LOC_QP_OP_ERR,
+ FLUSH_REM_OP_ERR,
+ FLUSH_LOC_LEN_ERR,
+ FLUSH_FATAL_ERR,
+ FLUSH_RETRY_EXC_ERR,
+ FLUSH_MW_BIND_ERR,
+ FLUSH_REM_INV_REQ_ERR,
+ FLUSH_RNR_RETRY_EXC_ERR,
+};
+
+enum irdma_qp_event_type {
+ IRDMA_QP_EVENT_CATASTROPHIC,
+ IRDMA_QP_EVENT_ACCESS_ERR,
+ IRDMA_QP_EVENT_REQ_ERR,
+};
+
+enum irdma_cmpl_status {
+ IRDMA_COMPL_STATUS_SUCCESS = 0,
+ IRDMA_COMPL_STATUS_FLUSHED,
+ IRDMA_COMPL_STATUS_INVALID_WQE,
+ IRDMA_COMPL_STATUS_QP_CATASTROPHIC,
+ IRDMA_COMPL_STATUS_REMOTE_TERMINATION,
+ IRDMA_COMPL_STATUS_INVALID_STAG,
+ IRDMA_COMPL_STATUS_BASE_BOUND_VIOLATION,
+ IRDMA_COMPL_STATUS_ACCESS_VIOLATION,
+ IRDMA_COMPL_STATUS_INVALID_PD_ID,
+ IRDMA_COMPL_STATUS_WRAP_ERROR,
+ IRDMA_COMPL_STATUS_STAG_INVALID_PDID,
+ IRDMA_COMPL_STATUS_RDMA_READ_ZERO_ORD,
+ IRDMA_COMPL_STATUS_QP_NOT_PRIVLEDGED,
+ IRDMA_COMPL_STATUS_STAG_NOT_INVALID,
+ IRDMA_COMPL_STATUS_INVALID_PHYS_BUF_SIZE,
+ IRDMA_COMPL_STATUS_INVALID_PHYS_BUF_ENTRY,
+ IRDMA_COMPL_STATUS_INVALID_FBO,
+ IRDMA_COMPL_STATUS_INVALID_LEN,
+ IRDMA_COMPL_STATUS_INVALID_ACCESS,
+ IRDMA_COMPL_STATUS_PHYS_BUF_LIST_TOO_LONG,
+ IRDMA_COMPL_STATUS_INVALID_VIRT_ADDRESS,
+ IRDMA_COMPL_STATUS_INVALID_REGION,
+ IRDMA_COMPL_STATUS_INVALID_WINDOW,
+ IRDMA_COMPL_STATUS_INVALID_TOTAL_LEN,
+ IRDMA_COMPL_STATUS_UNKNOWN,
+};
+
+enum irdma_cmpl_notify {
+ IRDMA_CQ_COMPL_EVENT = 0,
+ IRDMA_CQ_COMPL_SOLICITED = 1,
+};
+
+enum irdma_qp_caps {
+ IRDMA_WRITE_WITH_IMM = 1,
+ IRDMA_SEND_WITH_IMM = 2,
+ IRDMA_ROCE = 4,
+ IRDMA_PUSH_MODE = 8,
+};
+
+struct irdma_srq_uk;
+struct irdma_srq_uk_init_info;
+struct irdma_qp_uk;
+struct irdma_cq_uk;
+struct irdma_qp_uk_init_info;
+struct irdma_cq_uk_init_info;
+
+struct irdma_ring {
+ u32 head;
+ u32 tail;
+ u32 size;
+};
+
+struct irdma_cqe {
+ __le64 buf[IRDMA_CQE_SIZE];
+};
+
+struct irdma_extended_cqe {
+ __le64 buf[IRDMA_EXTENDED_CQE_SIZE];
+};
+
+struct irdma_post_send {
+ struct ib_sge *sg_list;
+ u32 num_sges;
+ u32 qkey;
+ u32 dest_qp;
+ u32 ah_id;
+};
+
+struct irdma_post_rq_info {
+ u64 wr_id;
+ struct ib_sge *sg_list;
+ u32 num_sges;
+};
+
+struct irdma_rdma_write {
+ struct ib_sge *lo_sg_list;
+ u32 num_lo_sges;
+ struct ib_sge rem_addr;
+};
+
+struct irdma_rdma_read {
+ struct ib_sge *lo_sg_list;
+ u32 num_lo_sges;
+ struct ib_sge rem_addr;
+};
+
+struct irdma_bind_window {
+ irdma_stag mr_stag;
+ u64 bind_len;
+ void *va;
+ enum irdma_addressing_type addressing_type;
+ bool ena_reads:1;
+ bool ena_writes:1;
+ irdma_stag mw_stag;
+ bool mem_window_type_1:1;
+ bool remote_atomics_en:1;
+};
+
+struct irdma_atomic_fetch_add {
+ u64 tagged_offset;
+ u64 remote_tagged_offset;
+ u64 fetch_add_data_bytes;
+ u32 stag;
+ u32 remote_stag;
+};
+
+struct irdma_atomic_compare_swap {
+ u64 tagged_offset;
+ u64 remote_tagged_offset;
+ u64 swap_data_bytes;
+ u64 compare_data_bytes;
+ u32 stag;
+ u32 remote_stag;
+};
+
+struct irdma_inv_local_stag {
+ irdma_stag target_stag;
+};
+
+struct irdma_post_sq_info {
+ u64 wr_id;
+ u8 op_type;
+ u8 l4len;
+ bool signaled:1;
+ bool read_fence:1;
+ bool local_fence:1;
+ bool inline_data:1;
+ bool imm_data_valid:1;
+ bool report_rtt:1;
+ bool udp_hdr:1;
+ bool defer_flag:1;
+ bool remote_atomic_en:1;
+ u32 imm_data;
+ u32 stag_to_inv;
+ union {
+ struct irdma_post_send send;
+ struct irdma_rdma_write rdma_write;
+ struct irdma_rdma_read rdma_read;
+ struct irdma_bind_window bind_window;
+ struct irdma_inv_local_stag inv_local_stag;
+ struct irdma_atomic_fetch_add atomic_fetch_add;
+ struct irdma_atomic_compare_swap atomic_compare_swap;
+ } op;
+};
+
+struct irdma_cq_poll_info {
+ u64 wr_id;
+ irdma_qp_handle qp_handle;
+ u32 bytes_xfered;
+ u32 tcp_seq_num_rtt;
+ u32 qp_id;
+ u32 ud_src_qpn;
+ u32 imm_data;
+ irdma_stag inv_stag; /* or L_R_Key */
+ enum irdma_cmpl_status comp_status;
+ u16 major_err;
+ u16 minor_err;
+ u16 ud_vlan;
+ u8 ud_smac[6];
+ u8 op_type;
+ u8 q_type;
+ bool stag_invalid_set:1; /* or L_R_Key set */
+ bool error:1;
+ bool solicited_event:1;
+ bool ipv4:1;
+ bool ud_vlan_valid:1;
+ bool ud_smac_valid:1;
+ bool imm_valid:1;
+};
+
+struct qp_err_code {
+ enum irdma_flush_opcode flush_code;
+ enum irdma_qp_event_type event_type;
+};
+
+int irdma_uk_atomic_compare_swap(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq);
+int irdma_uk_atomic_fetch_add(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq);
+int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq);
+int irdma_uk_inline_send(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info, bool post_sq);
+int irdma_uk_post_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled,
+ bool post_sq);
+int irdma_uk_post_receive(struct irdma_qp_uk *qp,
+ struct irdma_post_rq_info *info);
+void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp);
+int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool inv_stag, bool post_sq);
+int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool post_sq);
+int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ bool post_sq);
+int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
+ struct irdma_post_sq_info *info,
+ bool post_sq);
+
+struct irdma_wqe_uk_ops {
+ void (*iw_copy_inline_data)(u8 *dest, struct ib_sge *sge_list,
+ u32 num_sges, u8 polarity);
+ u16 (*iw_inline_data_size_to_quanta)(u32 data_size);
+ void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct ib_sge *sge,
+ u8 valid);
+ void (*iw_set_mw_bind_wqe)(__le64 *wqe,
+ struct irdma_bind_window *op_info);
+};
+
+bool irdma_uk_cq_empty(struct irdma_cq_uk *cq);
+int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
+ struct irdma_cq_poll_info *info);
+void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq,
+ enum irdma_cmpl_notify cq_notify);
+void irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int size);
+void irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *qp, u16 cnt);
+void irdma_uk_cq_init(struct irdma_cq_uk *cq,
+ struct irdma_cq_uk_init_info *info);
+int irdma_uk_qp_init(struct irdma_qp_uk *qp,
+ struct irdma_qp_uk_init_info *info);
+void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift,
+ u8 *rq_shift);
+int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo,
+ u32 *sq_depth, u8 *sq_shift);
+int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo,
+ u32 *rq_depth, u8 *rq_shift);
+int irdma_uk_srq_init(struct irdma_srq_uk *srq,
+ struct irdma_srq_uk_init_info *info);
+int irdma_uk_srq_post_receive(struct irdma_srq_uk *srq,
+ struct irdma_post_rq_info *info);
+
+struct irdma_srq_uk {
+ u32 srq_caps;
+ struct irdma_qp_quanta *srq_base;
+ struct irdma_uk_attrs *uk_attrs;
+ __le64 *shadow_area;
+ struct irdma_ring srq_ring;
+ u32 srq_id;
+ u32 srq_size;
+ u32 max_srq_frag_cnt;
+ struct irdma_wqe_uk_ops wqe_ops;
+ u8 srwqe_polarity;
+ u8 wqe_size;
+ u8 wqe_size_multiplier;
+ u8 deferred_flag;
+ spinlock_t *lock;
+};
+
+struct irdma_srq_uk_init_info {
+ struct irdma_qp_quanta *srq;
+ struct irdma_uk_attrs *uk_attrs;
+ __le64 *shadow_area;
+ u64 *srq_wrid_array;
+ u32 srq_id;
+ u32 srq_caps;
+ u32 srq_size;
+ u32 max_srq_frag_cnt;
+};
+
+struct irdma_sq_uk_wr_trk_info {
+ u64 wrid;
+ u32 wr_len;
+ u16 quanta;
+ u8 signaled;
+ u8 reserved[1];
+};
+
+struct irdma_qp_quanta {
+ __le64 elem[IRDMA_WQE_SIZE];
+};
+
+struct irdma_qp_uk {
+ struct irdma_qp_quanta *sq_base;
+ struct irdma_qp_quanta *rq_base;
+ struct irdma_uk_attrs *uk_attrs;
+ u32 __iomem *wqe_alloc_db;
+ struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array;
+ u64 *rq_wrid_array;
+ __le64 *shadow_area;
+ struct irdma_ring sq_ring;
+ struct irdma_ring rq_ring;
+ struct irdma_ring initial_ring;
+ u32 qp_id;
+ u32 qp_caps;
+ u32 sq_size;
+ u32 rq_size;
+ u32 max_sq_frag_cnt;
+ u32 max_rq_frag_cnt;
+ u32 max_inline_data;
+ struct irdma_wqe_uk_ops wqe_ops;
+ u16 conn_wqes;
+ u8 qp_type;
+ u8 swqe_polarity;
+ u8 swqe_polarity_deferred;
+ u8 rwqe_polarity;
+ u8 rq_wqe_size;
+ u8 rq_wqe_size_multiplier;
+ bool deferred_flag:1;
+ bool first_sq_wq:1;
+ bool sq_flush_complete:1; /* Indicates flush was seen and SQ was empty after the flush */
+ bool rq_flush_complete:1; /* Indicates flush was seen and RQ was empty after the flush */
+ bool destroy_pending:1; /* Indicates the QP is being destroyed */
+ void *back_qp;
+ u8 dbg_rq_flushed;
+ struct irdma_srq_uk *srq_uk;
+ u8 sq_flush_seen;
+ u8 rq_flush_seen;
+};
+
+struct irdma_cq_uk {
+ struct irdma_cqe *cq_base;
+ u32 __iomem *cqe_alloc_db;
+ u32 __iomem *cq_ack_db;
+ __le64 *shadow_area;
+ u32 cq_id;
+ u32 cq_size;
+ struct irdma_ring cq_ring;
+ u8 polarity;
+ bool avoid_mem_cflct:1;
+};
+
+struct irdma_qp_uk_init_info {
+ struct irdma_qp_quanta *sq;
+ struct irdma_qp_quanta *rq;
+ struct irdma_uk_attrs *uk_attrs;
+ u32 __iomem *wqe_alloc_db;
+ __le64 *shadow_area;
+ struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array;
+ u64 *rq_wrid_array;
+ u32 qp_id;
+ u32 qp_caps;
+ u32 sq_size;
+ u32 rq_size;
+ u32 max_sq_frag_cnt;
+ u32 max_rq_frag_cnt;
+ u32 max_inline_data;
+ u32 sq_depth;
+ u32 rq_depth;
+ u8 first_sq_wq;
+ u8 type;
+ u8 sq_shift;
+ u8 rq_shift;
+ int abi_ver;
+ bool legacy_mode;
+ struct irdma_srq_uk *srq_uk;
+};
+
+struct irdma_cq_uk_init_info {
+ u32 __iomem *cqe_alloc_db;
+ u32 __iomem *cq_ack_db;
+ struct irdma_cqe *cq_base;
+ __le64 *shadow_area;
+ u32 cq_size;
+ u32 cq_id;
+ bool avoid_mem_cflct;
+};
+
+__le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx,
+ u16 quanta, u32 total_size,
+ struct irdma_post_sq_info *info);
+__le64 *irdma_srq_get_next_recv_wqe(struct irdma_srq_uk *srq, u32 *wqe_idx);
+__le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx);
+void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq);
+int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq);
+int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta);
+int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size);
+void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge,
+ u32 inline_data, u8 *shift);
+int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift,
+ u32 *wqdepth);
+int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift,
+ u32 *wqdepth);
+int irdma_get_srqdepth(struct irdma_uk_attrs *uk_attrs, u32 srq_size, u8 shift,
+ u32 *srqdepth);
+void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx);
+
+static inline struct qp_err_code irdma_ae_to_qp_err_code(u16 ae_id)
+{
+ struct qp_err_code qp_err = {};
+
+ switch (ae_id) {
+ case IRDMA_AE_AMP_BOUNDS_VIOLATION:
+ case IRDMA_AE_AMP_INVALID_STAG:
+ case IRDMA_AE_AMP_RIGHTS_VIOLATION:
+ case IRDMA_AE_AMP_UNALLOCATED_STAG:
+ case IRDMA_AE_AMP_BAD_PD:
+ case IRDMA_AE_AMP_BAD_QP:
+ case IRDMA_AE_AMP_BAD_STAG_KEY:
+ case IRDMA_AE_AMP_BAD_STAG_INDEX:
+ case IRDMA_AE_AMP_TO_WRAP:
+ case IRDMA_AE_PRIV_OPERATION_DENIED:
+ qp_err.flush_code = FLUSH_PROT_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ break;
+ case IRDMA_AE_UDA_XMIT_BAD_PD:
+ case IRDMA_AE_WQE_UNEXPECTED_OPCODE:
+ qp_err.flush_code = FLUSH_LOC_QP_OP_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT:
+ case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
+ case IRDMA_AE_UDA_L4LEN_INVALID:
+ case IRDMA_AE_DDP_UBE_INVALID_MO:
+ case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
+ qp_err.flush_code = FLUSH_LOC_LEN_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
+ case IRDMA_AE_IB_REMOTE_ACCESS_ERROR:
+ qp_err.flush_code = FLUSH_REM_ACCESS_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ break;
+ case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS:
+ case IRDMA_AE_AMP_MWBIND_BIND_DISABLED:
+ case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS:
+ case IRDMA_AE_AMP_MWBIND_VALID_STAG:
+ qp_err.flush_code = FLUSH_MW_BIND_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_ACCESS_ERR;
+ break;
+ case IRDMA_AE_LLP_TOO_MANY_RETRIES:
+ qp_err.flush_code = FLUSH_RETRY_EXC_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_IB_INVALID_REQUEST:
+ qp_err.flush_code = FLUSH_REM_INV_REQ_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_REQ_ERR;
+ break;
+ case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
+ case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
+ case IRDMA_AE_ROCE_RSP_LENGTH_ERROR:
+ case IRDMA_AE_IB_REMOTE_OP_ERROR:
+ qp_err.flush_code = FLUSH_REM_OP_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_LLP_TOO_MANY_RNRS:
+ qp_err.flush_code = FLUSH_RNR_RETRY_EXC_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ case IRDMA_AE_LCE_QP_CATASTROPHIC:
+ case IRDMA_AE_REMOTE_QP_CATASTROPHIC:
+ case IRDMA_AE_LOCAL_QP_CATASTROPHIC:
+ case IRDMA_AE_RCE_QP_CATASTROPHIC:
+ qp_err.flush_code = FLUSH_FATAL_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ default:
+ qp_err.flush_code = FLUSH_GENERAL_ERR;
+ qp_err.event_type = IRDMA_QP_EVENT_CATASTROPHIC;
+ break;
+ }
+
+ return qp_err;
+}
+#endif /* IRDMA_USER_H */
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
new file mode 100644
index 000000000000..cc2a12f735d3
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -0,0 +1,2508 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "main.h"
+
+/**
+ * irdma_arp_table -manage arp table
+ * @rf: RDMA PCI function
+ * @ip_addr: ip address for device
+ * @ipv4: IPv4 flag
+ * @mac_addr: mac address ptr
+ * @action: modify, delete or add
+ */
+int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr, bool ipv4,
+ const u8 *mac_addr, u32 action)
+{
+ unsigned long flags;
+ int arp_index;
+ u32 ip[4] = {};
+
+ if (ipv4)
+ ip[0] = *ip_addr;
+ else
+ memcpy(ip, ip_addr, sizeof(ip));
+
+ spin_lock_irqsave(&rf->arp_lock, flags);
+ for (arp_index = 0; (u32)arp_index < rf->arp_table_size; arp_index++) {
+ if (!memcmp(rf->arp_table[arp_index].ip_addr, ip, sizeof(ip)))
+ break;
+ }
+
+ switch (action) {
+ case IRDMA_ARP_ADD:
+ if (arp_index != rf->arp_table_size) {
+ arp_index = -1;
+ break;
+ }
+
+ arp_index = 0;
+ if (irdma_alloc_rsrc(rf, rf->allocated_arps, rf->arp_table_size,
+ (u32 *)&arp_index, &rf->next_arp_index)) {
+ arp_index = -1;
+ break;
+ }
+
+ memcpy(rf->arp_table[arp_index].ip_addr, ip,
+ sizeof(rf->arp_table[arp_index].ip_addr));
+ ether_addr_copy(rf->arp_table[arp_index].mac_addr, mac_addr);
+ break;
+ case IRDMA_ARP_RESOLVE:
+ if (arp_index == rf->arp_table_size)
+ arp_index = -1;
+ break;
+ case IRDMA_ARP_DELETE:
+ if (arp_index == rf->arp_table_size) {
+ arp_index = -1;
+ break;
+ }
+
+ memset(rf->arp_table[arp_index].ip_addr, 0,
+ sizeof(rf->arp_table[arp_index].ip_addr));
+ eth_zero_addr(rf->arp_table[arp_index].mac_addr);
+ irdma_free_rsrc(rf, rf->allocated_arps, arp_index);
+ break;
+ default:
+ arp_index = -1;
+ break;
+ }
+
+ spin_unlock_irqrestore(&rf->arp_lock, flags);
+ return arp_index;
+}
+
+/**
+ * irdma_add_arp - add a new arp entry if needed
+ * @rf: RDMA function
+ * @ip: IP address
+ * @ipv4: IPv4 flag
+ * @mac: MAC address
+ */
+int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, bool ipv4, const u8 *mac)
+{
+ int arpidx;
+
+ arpidx = irdma_arp_table(rf, &ip[0], ipv4, NULL, IRDMA_ARP_RESOLVE);
+ if (arpidx >= 0) {
+ if (ether_addr_equal(rf->arp_table[arpidx].mac_addr, mac))
+ return arpidx;
+
+ irdma_manage_arp_cache(rf, rf->arp_table[arpidx].mac_addr, ip,
+ ipv4, IRDMA_ARP_DELETE);
+ }
+
+ irdma_manage_arp_cache(rf, mac, ip, ipv4, IRDMA_ARP_ADD);
+
+ return irdma_arp_table(rf, ip, ipv4, NULL, IRDMA_ARP_RESOLVE);
+}
+
+/**
+ * wr32 - write 32 bits to hw register
+ * @hw: hardware information including registers
+ * @reg: register offset
+ * @val: value to write to register
+ */
+inline void wr32(struct irdma_hw *hw, u32 reg, u32 val)
+{
+ writel(val, hw->hw_addr + reg);
+}
+
+/**
+ * rd32 - read a 32 bit hw register
+ * @hw: hardware information including registers
+ * @reg: register offset
+ *
+ * Return value of register content
+ */
+inline u32 rd32(struct irdma_hw *hw, u32 reg)
+{
+ return readl(hw->hw_addr + reg);
+}
+
+/**
+ * rd64 - read a 64 bit hw register
+ * @hw: hardware information including registers
+ * @reg: register offset
+ *
+ * Return value of register content
+ */
+inline u64 rd64(struct irdma_hw *hw, u32 reg)
+{
+ return readq(hw->hw_addr + reg);
+}
+
+static void irdma_gid_change_event(struct ib_device *ibdev)
+{
+ struct ib_event ib_event;
+
+ ib_event.event = IB_EVENT_GID_CHANGE;
+ ib_event.device = ibdev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+}
+
+/**
+ * irdma_inetaddr_event - system notifier for ipv4 addr events
+ * @notifier: not used
+ * @event: event for notifier
+ * @ptr: if address
+ */
+int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr)
+{
+ struct in_ifaddr *ifa = ptr;
+ struct net_device *real_dev, *netdev = ifa->ifa_dev->dev;
+ struct irdma_device *iwdev;
+ struct ib_device *ibdev;
+ u32 local_ipaddr;
+
+ real_dev = rdma_vlan_dev_real_dev(netdev);
+ if (!real_dev)
+ real_dev = netdev;
+
+ ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
+ if (!ibdev)
+ return NOTIFY_DONE;
+
+ iwdev = to_iwdev(ibdev);
+ local_ipaddr = ntohl(ifa->ifa_address);
+ ibdev_dbg(&iwdev->ibdev,
+ "DEV: netdev %p event %lu local_ip=%pI4 MAC=%pM\n", real_dev,
+ event, &local_ipaddr, real_dev->dev_addr);
+ switch (event) {
+ case NETDEV_DOWN:
+ irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr,
+ &local_ipaddr, true, IRDMA_ARP_DELETE);
+ irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, false);
+ irdma_gid_change_event(&iwdev->ibdev);
+ break;
+ case NETDEV_UP:
+ case NETDEV_CHANGEADDR:
+ irdma_add_arp(iwdev->rf, &local_ipaddr, true, real_dev->dev_addr);
+ irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, true);
+ irdma_gid_change_event(&iwdev->ibdev);
+ break;
+ default:
+ break;
+ }
+
+ ib_device_put(ibdev);
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * irdma_inet6addr_event - system notifier for ipv6 addr events
+ * @notifier: not used
+ * @event: event for notifier
+ * @ptr: if address
+ */
+int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr)
+{
+ struct inet6_ifaddr *ifa = ptr;
+ struct net_device *real_dev, *netdev = ifa->idev->dev;
+ struct irdma_device *iwdev;
+ struct ib_device *ibdev;
+ u32 local_ipaddr6[4];
+
+ real_dev = rdma_vlan_dev_real_dev(netdev);
+ if (!real_dev)
+ real_dev = netdev;
+
+ ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
+ if (!ibdev)
+ return NOTIFY_DONE;
+
+ iwdev = to_iwdev(ibdev);
+ irdma_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
+ ibdev_dbg(&iwdev->ibdev,
+ "DEV: netdev %p event %lu local_ip=%pI6 MAC=%pM\n", real_dev,
+ event, local_ipaddr6, real_dev->dev_addr);
+ switch (event) {
+ case NETDEV_DOWN:
+ irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr,
+ local_ipaddr6, false, IRDMA_ARP_DELETE);
+ irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, false);
+ irdma_gid_change_event(&iwdev->ibdev);
+ break;
+ case NETDEV_UP:
+ case NETDEV_CHANGEADDR:
+ irdma_add_arp(iwdev->rf, local_ipaddr6, false,
+ real_dev->dev_addr);
+ irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, true);
+ irdma_gid_change_event(&iwdev->ibdev);
+ break;
+ default:
+ break;
+ }
+
+ ib_device_put(ibdev);
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * irdma_net_event - system notifier for net events
+ * @notifier: not used
+ * @event: event for notifier
+ * @ptr: neighbor
+ */
+int irdma_net_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr)
+{
+ struct neighbour *neigh = ptr;
+ struct net_device *real_dev, *netdev = (struct net_device *)neigh->dev;
+ struct irdma_device *iwdev;
+ struct ib_device *ibdev;
+ __be32 *p;
+ u32 local_ipaddr[4] = {};
+ bool ipv4 = true;
+
+ switch (event) {
+ case NETEVENT_NEIGH_UPDATE:
+ real_dev = rdma_vlan_dev_real_dev(netdev);
+ if (!real_dev)
+ real_dev = netdev;
+ ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
+ if (!ibdev)
+ return NOTIFY_DONE;
+
+ iwdev = to_iwdev(ibdev);
+ p = (__be32 *)neigh->primary_key;
+ if (neigh->tbl->family == AF_INET6) {
+ ipv4 = false;
+ irdma_copy_ip_ntohl(local_ipaddr, p);
+ } else {
+ local_ipaddr[0] = ntohl(*p);
+ }
+
+ ibdev_dbg(&iwdev->ibdev,
+ "DEV: netdev %p state %d local_ip=%pI4 MAC=%pM\n",
+ iwdev->netdev, neigh->nud_state, local_ipaddr,
+ neigh->ha);
+
+ if (neigh->nud_state & NUD_VALID)
+ irdma_add_arp(iwdev->rf, local_ipaddr, ipv4, neigh->ha);
+
+ else
+ irdma_manage_arp_cache(iwdev->rf, neigh->ha,
+ local_ipaddr, ipv4,
+ IRDMA_ARP_DELETE);
+ ib_device_put(ibdev);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * irdma_netdevice_event - system notifier for netdev events
+ * @notifier: not used
+ * @event: event for notifier
+ * @ptr: netdev
+ */
+int irdma_netdevice_event(struct notifier_block *notifier, unsigned long event,
+ void *ptr)
+{
+ struct irdma_device *iwdev;
+ struct ib_device *ibdev;
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_IRDMA);
+ if (!ibdev)
+ return NOTIFY_DONE;
+
+ iwdev = to_iwdev(ibdev);
+ iwdev->iw_status = 1;
+ switch (event) {
+ case NETDEV_DOWN:
+ iwdev->iw_status = 0;
+ fallthrough;
+ default:
+ break;
+ }
+ ib_device_put(ibdev);
+
+ return NOTIFY_DONE;
+}
+
+/**
+ * irdma_add_ipv6_addr - add ipv6 address to the hw arp table
+ * @iwdev: irdma device
+ */
+static void irdma_add_ipv6_addr(struct irdma_device *iwdev)
+{
+ struct net_device *ip_dev;
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifp, *tmp;
+ u32 local_ipaddr6[4];
+
+ rcu_read_lock();
+ for_each_netdev_rcu (&init_net, ip_dev) {
+ if (((rdma_vlan_dev_vlan_id(ip_dev) < 0xFFFF &&
+ rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev) ||
+ ip_dev == iwdev->netdev) &&
+ (READ_ONCE(ip_dev->flags) & IFF_UP)) {
+ idev = __in6_dev_get(ip_dev);
+ if (!idev) {
+ ibdev_err(&iwdev->ibdev, "ipv6 inet device not found\n");
+ break;
+ }
+ list_for_each_entry_safe (ifp, tmp, &idev->addr_list,
+ if_list) {
+ ibdev_dbg(&iwdev->ibdev,
+ "INIT: IP=%pI6, vlan_id=%d, MAC=%pM\n",
+ &ifp->addr,
+ rdma_vlan_dev_vlan_id(ip_dev),
+ ip_dev->dev_addr);
+
+ irdma_copy_ip_ntohl(local_ipaddr6,
+ ifp->addr.in6_u.u6_addr32);
+ irdma_manage_arp_cache(iwdev->rf,
+ ip_dev->dev_addr,
+ local_ipaddr6, false,
+ IRDMA_ARP_ADD);
+ }
+ }
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * irdma_add_ipv4_addr - add ipv4 address to the hw arp table
+ * @iwdev: irdma device
+ */
+static void irdma_add_ipv4_addr(struct irdma_device *iwdev)
+{
+ struct net_device *dev;
+ struct in_device *idev;
+ u32 ip_addr;
+
+ rcu_read_lock();
+ for_each_netdev_rcu (&init_net, dev) {
+ if (((rdma_vlan_dev_vlan_id(dev) < 0xFFFF &&
+ rdma_vlan_dev_real_dev(dev) == iwdev->netdev) ||
+ dev == iwdev->netdev) && (READ_ONCE(dev->flags) & IFF_UP)) {
+ const struct in_ifaddr *ifa;
+
+ idev = __in_dev_get_rcu(dev);
+ if (!idev)
+ continue;
+
+ in_dev_for_each_ifa_rcu(ifa, idev) {
+ ibdev_dbg(&iwdev->ibdev, "CM: IP=%pI4, vlan_id=%d, MAC=%pM\n",
+ &ifa->ifa_address, rdma_vlan_dev_vlan_id(dev),
+ dev->dev_addr);
+
+ ip_addr = ntohl(ifa->ifa_address);
+ irdma_manage_arp_cache(iwdev->rf, dev->dev_addr,
+ &ip_addr, true,
+ IRDMA_ARP_ADD);
+ }
+ }
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * irdma_add_ip - add ip addresses
+ * @iwdev: irdma device
+ *
+ * Add ipv4/ipv6 addresses to the arp cache
+ */
+void irdma_add_ip(struct irdma_device *iwdev)
+{
+ irdma_add_ipv4_addr(iwdev);
+ irdma_add_ipv6_addr(iwdev);
+}
+
+/**
+ * irdma_alloc_and_get_cqp_request - get cqp struct
+ * @cqp: device cqp ptr
+ * @wait: cqp to be used in wait mode
+ */
+struct irdma_cqp_request *irdma_alloc_and_get_cqp_request(struct irdma_cqp *cqp,
+ bool wait)
+{
+ struct irdma_cqp_request *cqp_request = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cqp->req_lock, flags);
+ if (!list_empty(&cqp->cqp_avail_reqs)) {
+ cqp_request = list_first_entry(&cqp->cqp_avail_reqs,
+ struct irdma_cqp_request, list);
+ list_del_init(&cqp_request->list);
+ }
+ spin_unlock_irqrestore(&cqp->req_lock, flags);
+ if (!cqp_request) {
+ cqp_request = kzalloc(sizeof(*cqp_request), GFP_ATOMIC);
+ if (cqp_request) {
+ cqp_request->dynamic = true;
+ if (wait)
+ init_waitqueue_head(&cqp_request->waitq);
+ }
+ }
+ if (!cqp_request) {
+ ibdev_dbg(to_ibdev(cqp->sc_cqp.dev), "ERR: CQP Request Fail: No Memory");
+ return NULL;
+ }
+
+ cqp_request->waiting = wait;
+ refcount_set(&cqp_request->refcnt, 1);
+ memset(&cqp_request->compl_info, 0, sizeof(cqp_request->compl_info));
+ memset(&cqp_request->info, 0, sizeof(cqp_request->info));
+
+ return cqp_request;
+}
+
+/**
+ * irdma_get_cqp_request - increase refcount for cqp_request
+ * @cqp_request: pointer to cqp_request instance
+ */
+static inline void irdma_get_cqp_request(struct irdma_cqp_request *cqp_request)
+{
+ refcount_inc(&cqp_request->refcnt);
+}
+
+/**
+ * irdma_free_cqp_request - free cqp request
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+void irdma_free_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request)
+{
+ unsigned long flags;
+
+ if (cqp_request->dynamic) {
+ kfree(cqp_request);
+ } else {
+ WRITE_ONCE(cqp_request->request_done, false);
+ cqp_request->callback_fcn = NULL;
+ cqp_request->waiting = false;
+ cqp_request->pending = false;
+
+ spin_lock_irqsave(&cqp->req_lock, flags);
+ list_add_tail(&cqp_request->list, &cqp->cqp_avail_reqs);
+ spin_unlock_irqrestore(&cqp->req_lock, flags);
+ }
+ wake_up(&cqp->remove_wq);
+}
+
+/**
+ * irdma_put_cqp_request - dec ref count and free if 0
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+void irdma_put_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request)
+{
+ if (refcount_dec_and_test(&cqp_request->refcnt))
+ irdma_free_cqp_request(cqp, cqp_request);
+}
+
+/**
+ * irdma_free_pending_cqp_request -free pending cqp request objs
+ * @cqp: cqp ptr
+ * @cqp_request: to be put back in cqp list
+ */
+static void
+irdma_free_pending_cqp_request(struct irdma_cqp *cqp,
+ struct irdma_cqp_request *cqp_request)
+{
+ if (cqp_request->waiting) {
+ cqp_request->compl_info.error = true;
+ WRITE_ONCE(cqp_request->request_done, true);
+ wake_up(&cqp_request->waitq);
+ }
+ wait_event_timeout(cqp->remove_wq,
+ refcount_read(&cqp_request->refcnt) == 1, 1000);
+ irdma_put_cqp_request(cqp, cqp_request);
+}
+
+/**
+ * irdma_cleanup_deferred_cqp_ops - clean-up cqp with no completions
+ * @dev: sc_dev
+ * @cqp: cqp
+ */
+static void irdma_cleanup_deferred_cqp_ops(struct irdma_sc_dev *dev,
+ struct irdma_cqp *cqp)
+{
+ u64 scratch;
+
+ /* process all CQP requests with deferred/pending completions */
+ while ((scratch = irdma_sc_cqp_cleanup_handler(dev)))
+ irdma_free_pending_cqp_request(cqp, (struct irdma_cqp_request *)
+ (uintptr_t)scratch);
+}
+
+/**
+ * irdma_cleanup_pending_cqp_op - clean-up cqp with no
+ * completions
+ * @rf: RDMA PCI function
+ */
+void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_cqp *cqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request = NULL;
+ struct cqp_cmds_info *pcmdinfo = NULL;
+ u32 i, pending_work, wqe_idx;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ irdma_cleanup_deferred_cqp_ops(dev, cqp);
+ pending_work = IRDMA_RING_USED_QUANTA(cqp->sc_cqp.sq_ring);
+ wqe_idx = IRDMA_RING_CURRENT_TAIL(cqp->sc_cqp.sq_ring);
+ for (i = 0; i < pending_work; i++) {
+ cqp_request = (struct irdma_cqp_request *)(unsigned long)
+ cqp->scratch_array[wqe_idx];
+ if (cqp_request)
+ irdma_free_pending_cqp_request(cqp, cqp_request);
+ wqe_idx = (wqe_idx + 1) % IRDMA_RING_SIZE(cqp->sc_cqp.sq_ring);
+ }
+
+ while (!list_empty(&dev->cqp_cmd_head)) {
+ pcmdinfo = irdma_remove_cqp_head(dev);
+ cqp_request =
+ container_of(pcmdinfo, struct irdma_cqp_request, info);
+ if (cqp_request)
+ irdma_free_pending_cqp_request(cqp, cqp_request);
+ }
+}
+
+static int irdma_get_timeout_threshold(struct irdma_sc_dev *dev)
+{
+ u16 time_s = dev->vc_caps.cqp_timeout_s;
+
+ if (!time_s)
+ return CQP_TIMEOUT_THRESHOLD;
+
+ return time_s * 1000 / dev->hw_attrs.max_cqp_compl_wait_time_ms;
+}
+
+static int irdma_get_def_timeout_threshold(struct irdma_sc_dev *dev)
+{
+ u16 time_s = dev->vc_caps.cqp_def_timeout_s;
+
+ if (!time_s)
+ return CQP_DEF_CMPL_TIMEOUT_THRESHOLD;
+
+ return time_s * 1000 / dev->hw_attrs.max_cqp_compl_wait_time_ms;
+}
+
+/**
+ * irdma_wait_event - wait for completion
+ * @rf: RDMA PCI function
+ * @cqp_request: cqp request to wait
+ */
+static int irdma_wait_event(struct irdma_pci_f *rf,
+ struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_cqp_timeout cqp_timeout = {};
+ int timeout_threshold = irdma_get_timeout_threshold(&rf->sc_dev);
+ bool cqp_error = false;
+ int err_code = 0;
+
+ cqp_timeout.compl_cqp_cmds = atomic64_read(&rf->sc_dev.cqp->completed_ops);
+ do {
+ irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq);
+ if (wait_event_timeout(cqp_request->waitq,
+ READ_ONCE(cqp_request->request_done),
+ msecs_to_jiffies(CQP_COMPL_WAIT_TIME_MS)))
+ break;
+
+ if (cqp_request->pending)
+ /* There was a deferred or pending completion
+ * received for this CQP request, so we need
+ * to wait longer than usual.
+ */
+ timeout_threshold =
+ irdma_get_def_timeout_threshold(&rf->sc_dev);
+
+ irdma_check_cqp_progress(&cqp_timeout, &rf->sc_dev);
+
+ if (cqp_timeout.count < timeout_threshold)
+ continue;
+
+ if (!rf->reset) {
+ rf->reset = true;
+ rf->gen_ops.request_reset(rf);
+ }
+ return -ETIMEDOUT;
+ } while (1);
+
+ cqp_error = cqp_request->compl_info.error;
+ if (cqp_error) {
+ err_code = -EIO;
+ if (cqp_request->compl_info.maj_err_code == 0xFFFF) {
+ if (cqp_request->compl_info.min_err_code == 0x8002)
+ err_code = -EBUSY;
+ else if (cqp_request->compl_info.min_err_code == 0x8029) {
+ if (!rf->reset) {
+ rf->reset = true;
+ rf->gen_ops.request_reset(rf);
+ }
+ }
+ }
+ }
+
+ return err_code;
+}
+
+static const char *const irdma_cqp_cmd_names[IRDMA_MAX_CQP_OPS] = {
+ [IRDMA_OP_CEQ_DESTROY] = "Destroy CEQ Cmd",
+ [IRDMA_OP_AEQ_DESTROY] = "Destroy AEQ Cmd",
+ [IRDMA_OP_DELETE_ARP_CACHE_ENTRY] = "Delete ARP Cache Cmd",
+ [IRDMA_OP_MANAGE_APBVT_ENTRY] = "Manage APBV Table Entry Cmd",
+ [IRDMA_OP_CEQ_CREATE] = "CEQ Create Cmd",
+ [IRDMA_OP_AEQ_CREATE] = "AEQ Destroy Cmd",
+ [IRDMA_OP_MANAGE_QHASH_TABLE_ENTRY] = "Manage Quad Hash Table Entry Cmd",
+ [IRDMA_OP_QP_MODIFY] = "Modify QP Cmd",
+ [IRDMA_OP_QP_UPLOAD_CONTEXT] = "Upload Context Cmd",
+ [IRDMA_OP_CQ_CREATE] = "Create CQ Cmd",
+ [IRDMA_OP_CQ_DESTROY] = "Destroy CQ Cmd",
+ [IRDMA_OP_QP_CREATE] = "Create QP Cmd",
+ [IRDMA_OP_QP_DESTROY] = "Destroy QP Cmd",
+ [IRDMA_OP_ALLOC_STAG] = "Allocate STag Cmd",
+ [IRDMA_OP_MR_REG_NON_SHARED] = "Register Non-Shared MR Cmd",
+ [IRDMA_OP_DEALLOC_STAG] = "Deallocate STag Cmd",
+ [IRDMA_OP_MW_ALLOC] = "Allocate Memory Window Cmd",
+ [IRDMA_OP_QP_FLUSH_WQES] = "Flush QP Cmd",
+ [IRDMA_OP_ADD_ARP_CACHE_ENTRY] = "Add ARP Cache Cmd",
+ [IRDMA_OP_MANAGE_PUSH_PAGE] = "Manage Push Page Cmd",
+ [IRDMA_OP_UPDATE_PE_SDS] = "Update PE SDs Cmd",
+ [IRDMA_OP_MANAGE_HMC_PM_FUNC_TABLE] = "Manage HMC PM Function Table Cmd",
+ [IRDMA_OP_SUSPEND] = "Suspend QP Cmd",
+ [IRDMA_OP_RESUME] = "Resume QP Cmd",
+ [IRDMA_OP_MANAGE_VF_PBLE_BP] = "Manage VF PBLE Backing Pages Cmd",
+ [IRDMA_OP_QUERY_FPM_VAL] = "Query FPM Values Cmd",
+ [IRDMA_OP_COMMIT_FPM_VAL] = "Commit FPM Values Cmd",
+ [IRDMA_OP_AH_CREATE] = "Create Address Handle Cmd",
+ [IRDMA_OP_AH_MODIFY] = "Modify Address Handle Cmd",
+ [IRDMA_OP_AH_DESTROY] = "Destroy Address Handle Cmd",
+ [IRDMA_OP_MC_CREATE] = "Create Multicast Group Cmd",
+ [IRDMA_OP_MC_DESTROY] = "Destroy Multicast Group Cmd",
+ [IRDMA_OP_MC_MODIFY] = "Modify Multicast Group Cmd",
+ [IRDMA_OP_STATS_ALLOCATE] = "Add Statistics Instance Cmd",
+ [IRDMA_OP_STATS_FREE] = "Free Statistics Instance Cmd",
+ [IRDMA_OP_STATS_GATHER] = "Gather Statistics Cmd",
+ [IRDMA_OP_WS_ADD_NODE] = "Add Work Scheduler Node Cmd",
+ [IRDMA_OP_WS_MODIFY_NODE] = "Modify Work Scheduler Node Cmd",
+ [IRDMA_OP_WS_DELETE_NODE] = "Delete Work Scheduler Node Cmd",
+ [IRDMA_OP_SET_UP_MAP] = "Set UP-UP Mapping Cmd",
+ [IRDMA_OP_GEN_AE] = "Generate AE Cmd",
+ [IRDMA_OP_QUERY_RDMA_FEATURES] = "RDMA Get Features Cmd",
+ [IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY] = "Allocate Local MAC Entry Cmd",
+ [IRDMA_OP_ADD_LOCAL_MAC_ENTRY] = "Add Local MAC Entry Cmd",
+ [IRDMA_OP_DELETE_LOCAL_MAC_ENTRY] = "Delete Local MAC Entry Cmd",
+ [IRDMA_OP_CQ_MODIFY] = "CQ Modify Cmd",
+ [IRDMA_OP_SRQ_CREATE] = "Create SRQ Cmd",
+ [IRDMA_OP_SRQ_MODIFY] = "Modify SRQ Cmd",
+ [IRDMA_OP_SRQ_DESTROY] = "Destroy SRQ Cmd",
+};
+
+static const struct irdma_cqp_err_info irdma_noncrit_err_list[] = {
+ {0xffff, 0x8002, "Invalid State"},
+ {0xffff, 0x8006, "Flush No Wqe Pending"},
+ {0xffff, 0x8007, "Modify QP Bad Close"},
+ {0xffff, 0x8009, "LLP Closed"},
+ {0xffff, 0x800a, "Reset Not Sent"}
+};
+
+/**
+ * irdma_cqp_crit_err - check if CQP error is critical
+ * @dev: pointer to dev structure
+ * @cqp_cmd: code for last CQP operation
+ * @maj_err_code: major error code
+ * @min_err_code: minot error code
+ */
+bool irdma_cqp_crit_err(struct irdma_sc_dev *dev, u8 cqp_cmd,
+ u16 maj_err_code, u16 min_err_code)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irdma_noncrit_err_list); ++i) {
+ if (maj_err_code == irdma_noncrit_err_list[i].maj &&
+ min_err_code == irdma_noncrit_err_list[i].min) {
+ ibdev_dbg(to_ibdev(dev),
+ "CQP: [%s Error][%s] maj=0x%x min=0x%x\n",
+ irdma_noncrit_err_list[i].desc,
+ irdma_cqp_cmd_names[cqp_cmd], maj_err_code,
+ min_err_code);
+ return false;
+ }
+ }
+ return true;
+}
+
+/**
+ * irdma_handle_cqp_op - process cqp command
+ * @rf: RDMA PCI function
+ * @cqp_request: cqp request to process
+ */
+int irdma_handle_cqp_op(struct irdma_pci_f *rf,
+ struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct cqp_cmds_info *info = &cqp_request->info;
+ int status;
+ bool put_cqp_request = true;
+
+ if (rf->reset)
+ return -EBUSY;
+
+ irdma_get_cqp_request(cqp_request);
+ status = irdma_process_cqp_cmd(dev, info);
+ if (status)
+ goto err;
+
+ if (cqp_request->waiting) {
+ put_cqp_request = false;
+ status = irdma_wait_event(rf, cqp_request);
+ if (status)
+ goto err;
+ }
+
+ return 0;
+
+err:
+ if (irdma_cqp_crit_err(dev, info->cqp_cmd,
+ cqp_request->compl_info.maj_err_code,
+ cqp_request->compl_info.min_err_code))
+ ibdev_err(&rf->iwdev->ibdev,
+ "[%s Error][op_code=%d] status=%d waiting=%d completion_err=%d maj=0x%x min=0x%x\n",
+ irdma_cqp_cmd_names[info->cqp_cmd], info->cqp_cmd, status, cqp_request->waiting,
+ cqp_request->compl_info.error, cqp_request->compl_info.maj_err_code,
+ cqp_request->compl_info.min_err_code);
+
+ if (put_cqp_request)
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+void irdma_qp_add_ref(struct ib_qp *ibqp)
+{
+ struct irdma_qp *iwqp = (struct irdma_qp *)ibqp;
+
+ refcount_inc(&iwqp->refcnt);
+}
+
+void irdma_qp_rem_ref(struct ib_qp *ibqp)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+ u32 qp_num;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwdev->rf->qptable_lock, flags);
+ if (!refcount_dec_and_test(&iwqp->refcnt)) {
+ spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
+ return;
+ }
+
+ qp_num = iwqp->ibqp.qp_num;
+ iwdev->rf->qp_table[qp_num] = NULL;
+ spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
+ complete(&iwqp->free_qp);
+}
+
+void irdma_cq_add_ref(struct ib_cq *ibcq)
+{
+ struct irdma_cq *iwcq = to_iwcq(ibcq);
+
+ refcount_inc(&iwcq->refcnt);
+}
+
+void irdma_cq_rem_ref(struct ib_cq *ibcq)
+{
+ struct ib_device *ibdev = ibcq->device;
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_cq *iwcq = to_iwcq(ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwdev->rf->cqtable_lock, flags);
+ if (!refcount_dec_and_test(&iwcq->refcnt)) {
+ spin_unlock_irqrestore(&iwdev->rf->cqtable_lock, flags);
+ return;
+ }
+
+ iwdev->rf->cq_table[iwcq->cq_num] = NULL;
+ spin_unlock_irqrestore(&iwdev->rf->cqtable_lock, flags);
+ complete(&iwcq->free_cq);
+}
+
+struct ib_device *to_ibdev(struct irdma_sc_dev *dev)
+{
+ return &(container_of(dev, struct irdma_pci_f, sc_dev))->iwdev->ibdev;
+}
+
+/**
+ * irdma_get_qp - get qp address
+ * @device: iwarp device
+ * @qpn: qp number
+ */
+struct ib_qp *irdma_get_qp(struct ib_device *device, int qpn)
+{
+ struct irdma_device *iwdev = to_iwdev(device);
+
+ if (qpn < IW_FIRST_QPN || qpn >= iwdev->rf->max_qp)
+ return NULL;
+
+ return &iwdev->rf->qp_table[qpn]->ibqp;
+}
+
+/**
+ * irdma_remove_cqp_head - return head entry and remove
+ * @dev: device
+ */
+void *irdma_remove_cqp_head(struct irdma_sc_dev *dev)
+{
+ struct list_head *entry;
+ struct list_head *list = &dev->cqp_cmd_head;
+
+ if (list_empty(list))
+ return NULL;
+
+ entry = list->next;
+ list_del(entry);
+
+ return entry;
+}
+
+/**
+ * irdma_cqp_sds_cmd - create cqp command for sd
+ * @dev: hardware control device structure
+ * @sdinfo: information for sd cqp
+ *
+ */
+int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
+ struct irdma_update_sds_info *sdinfo)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ memcpy(&cqp_info->in.u.update_pe_sds.info, sdinfo,
+ sizeof(cqp_info->in.u.update_pe_sds.info));
+ cqp_info->cqp_cmd = IRDMA_OP_UPDATE_PE_SDS;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.update_pe_sds.dev = dev;
+ cqp_info->in.u.update_pe_sds.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_cqp_qp_suspend_resume - cqp command for suspend/resume
+ * @qp: hardware control qp
+ * @op: suspend or resume
+ */
+int irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, u8 op)
+{
+ struct irdma_sc_dev *dev = qp->dev;
+ struct irdma_cqp_request *cqp_request;
+ struct irdma_sc_cqp *cqp = dev->cqp;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, false);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = op;
+ cqp_info->in.u.suspend_resume.cqp = cqp;
+ cqp_info->in.u.suspend_resume.qp = qp;
+ cqp_info->in.u.suspend_resume.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_term_modify_qp - modify qp for term message
+ * @qp: hardware control qp
+ * @next_state: qp's next state
+ * @term: terminate code
+ * @term_len: length
+ */
+void irdma_term_modify_qp(struct irdma_sc_qp *qp, u8 next_state, u8 term,
+ u8 term_len)
+{
+ struct irdma_qp *iwqp;
+
+ iwqp = qp->qp_uk.back_qp;
+ irdma_next_iw_state(iwqp, next_state, 0, term, term_len);
+};
+
+/**
+ * irdma_terminate_done - after terminate is completed
+ * @qp: hardware control qp
+ * @timeout_occurred: indicates if terminate timer expired
+ */
+void irdma_terminate_done(struct irdma_sc_qp *qp, int timeout_occurred)
+{
+ struct irdma_qp *iwqp;
+ u8 hte = 0;
+ bool first_time;
+ unsigned long flags;
+
+ iwqp = qp->qp_uk.back_qp;
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->hte_added) {
+ iwqp->hte_added = 0;
+ hte = 1;
+ }
+ first_time = !(qp->term_flags & IRDMA_TERM_DONE);
+ qp->term_flags |= IRDMA_TERM_DONE;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (first_time) {
+ if (!timeout_occurred)
+ irdma_terminate_del_timer(qp);
+
+ irdma_next_iw_state(iwqp, IRDMA_QP_STATE_ERROR, hte, 0, 0);
+ irdma_cm_disconn(iwqp);
+ }
+}
+
+static void irdma_terminate_timeout(struct timer_list *t)
+{
+ struct irdma_qp *iwqp = timer_container_of(iwqp, t, terminate_timer);
+ struct irdma_sc_qp *qp = &iwqp->sc_qp;
+
+ irdma_terminate_done(qp, 1);
+ irdma_qp_rem_ref(&iwqp->ibqp);
+}
+
+/**
+ * irdma_terminate_start_timer - start terminate timeout
+ * @qp: hardware control qp
+ */
+void irdma_terminate_start_timer(struct irdma_sc_qp *qp)
+{
+ struct irdma_qp *iwqp;
+
+ iwqp = qp->qp_uk.back_qp;
+ irdma_qp_add_ref(&iwqp->ibqp);
+ timer_setup(&iwqp->terminate_timer, irdma_terminate_timeout, 0);
+ iwqp->terminate_timer.expires = jiffies + HZ;
+
+ add_timer(&iwqp->terminate_timer);
+}
+
+/**
+ * irdma_terminate_del_timer - delete terminate timeout
+ * @qp: hardware control qp
+ */
+void irdma_terminate_del_timer(struct irdma_sc_qp *qp)
+{
+ struct irdma_qp *iwqp;
+ int ret;
+
+ iwqp = qp->qp_uk.back_qp;
+ ret = timer_delete(&iwqp->terminate_timer);
+ if (ret)
+ irdma_qp_rem_ref(&iwqp->ibqp);
+}
+
+/**
+ * irdma_cqp_cq_create_cmd - create a cq for the cqp
+ * @dev: device pointer
+ * @cq: pointer to created cq
+ */
+int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_CQ_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.cq_create.cq = cq;
+ cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(iwcqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_cqp_qp_create_cmd - create a qp for the cqp
+ * @dev: device pointer
+ * @qp: pointer to created qp
+ */
+int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_create_qp_info *qp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ qp_info = &cqp_request->info.in.u.qp_create.info;
+ qp_info->cq_num_valid = true;
+ qp_info->next_iwarp_state = IRDMA_QP_STATE_RTS;
+ cqp_info->cqp_cmd = IRDMA_OP_QP_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_create.qp = qp;
+ cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(iwcqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_dealloc_push_page - free a push page for qp
+ * @rf: RDMA PCI function
+ * @qp: hardware control qp
+ */
+static void irdma_dealloc_push_page(struct irdma_pci_f *rf,
+ struct irdma_sc_qp *qp)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX)
+ return;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, false);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_MANAGE_PUSH_PAGE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx;
+ cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
+ cqp_info->in.u.manage_push_page.info.free_page = 1;
+ cqp_info->in.u.manage_push_page.info.push_page_type = 0;
+ cqp_info->in.u.manage_push_page.cqp = &rf->cqp.sc_cqp;
+ cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (!status)
+ qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX;
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+}
+
+static void irdma_free_gsi_qp_rsrc(struct irdma_qp *iwqp, u32 qp_num)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ unsigned long flags;
+
+ if (rf->sc_dev.hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3)
+ return;
+
+ irdma_vchnl_req_del_vport(&rf->sc_dev, iwdev->vport_id, qp_num);
+
+ if (qp_num == 1) {
+ spin_lock_irqsave(&rf->rsrc_lock, flags);
+ rf->hwqp1_rsvd = false;
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+ } else if (qp_num > 2) {
+ irdma_free_rsrc(rf, rf->allocated_qps, qp_num);
+ }
+}
+
+/**
+ * irdma_free_qp_rsrc - free up memory resources for qp
+ * @iwqp: qp ptr (user or kernel)
+ */
+void irdma_free_qp_rsrc(struct irdma_qp *iwqp)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ u32 qp_num = iwqp->sc_qp.qp_uk.qp_id;
+
+ irdma_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp);
+ irdma_dealloc_push_page(rf, &iwqp->sc_qp);
+ if (iwqp->sc_qp.vsi) {
+ irdma_qp_rem_qos(&iwqp->sc_qp);
+ iwqp->sc_qp.dev->ws_remove(iwqp->sc_qp.vsi,
+ iwqp->sc_qp.user_pri);
+ }
+
+ if (iwqp->ibqp.qp_type == IB_QPT_GSI) {
+ irdma_free_gsi_qp_rsrc(iwqp, qp_num);
+ } else {
+ if (qp_num > 2)
+ irdma_free_rsrc(rf, rf->allocated_qps, qp_num);
+ }
+ dma_free_coherent(rf->sc_dev.hw->device, iwqp->q2_ctx_mem.size,
+ iwqp->q2_ctx_mem.va, iwqp->q2_ctx_mem.pa);
+ iwqp->q2_ctx_mem.va = NULL;
+ dma_free_coherent(rf->sc_dev.hw->device, iwqp->kqp.dma_mem.size,
+ iwqp->kqp.dma_mem.va, iwqp->kqp.dma_mem.pa);
+ iwqp->kqp.dma_mem.va = NULL;
+ kfree(iwqp->kqp.sq_wrid_mem);
+ kfree(iwqp->kqp.rq_wrid_mem);
+}
+
+/**
+ * irdma_srq_wq_destroy - send srq destroy cqp
+ * @rf: RDMA PCI function
+ * @srq: hardware control srq
+ */
+void irdma_srq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_srq *srq)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_SRQ_DESTROY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.srq_destroy.srq = srq;
+ cqp_info->in.u.srq_destroy.scratch = (uintptr_t)cqp_request;
+
+ irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+}
+
+/**
+ * irdma_cq_wq_destroy - send cq destroy cqp
+ * @rf: RDMA PCI function
+ * @cq: hardware control cq
+ */
+void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_CQ_DESTROY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.cq_destroy.cq = cq;
+ cqp_info->in.u.cq_destroy.scratch = (uintptr_t)cqp_request;
+
+ irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+}
+
+/**
+ * irdma_hw_modify_qp_callback - handle state for modifyQPs that don't wait
+ * @cqp_request: modify QP completion
+ */
+static void irdma_hw_modify_qp_callback(struct irdma_cqp_request *cqp_request)
+{
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_qp *iwqp;
+
+ cqp_info = &cqp_request->info;
+ iwqp = cqp_info->in.u.qp_modify.qp->qp_uk.back_qp;
+ atomic_dec(&iwqp->hw_mod_qp_pend);
+ wake_up(&iwqp->mod_qp_waitq);
+}
+
+/**
+ * irdma_hw_modify_qp - setup cqp for modify qp
+ * @iwdev: RDMA device
+ * @iwqp: qp ptr (user or kernel)
+ * @info: info for modify qp
+ * @wait: flag to wait or not for modify qp completion
+ */
+int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp,
+ struct irdma_modify_qp_info *info, bool wait)
+{
+ int status;
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_modify_qp_info *m_info;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ if (!wait) {
+ cqp_request->callback_fcn = irdma_hw_modify_qp_callback;
+ atomic_inc(&iwqp->hw_mod_qp_pend);
+ }
+ cqp_info = &cqp_request->info;
+ m_info = &cqp_info->in.u.qp_modify.info;
+ memcpy(m_info, info, sizeof(*m_info));
+ cqp_info->cqp_cmd = IRDMA_OP_QP_MODIFY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp;
+ cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ if (status) {
+ if (rdma_protocol_roce(&iwdev->ibdev, 1))
+ return status;
+
+ switch (m_info->next_iwarp_state) {
+ struct irdma_gen_ae_info ae_info;
+
+ case IRDMA_QP_STATE_RTS:
+ case IRDMA_QP_STATE_IDLE:
+ case IRDMA_QP_STATE_TERMINATE:
+ case IRDMA_QP_STATE_CLOSING:
+ if (info->curr_iwarp_state == IRDMA_QP_STATE_IDLE)
+ irdma_send_reset(iwqp->cm_node);
+ else
+ iwqp->sc_qp.term_flags = IRDMA_TERM_DONE;
+ if (!wait) {
+ ae_info.ae_code = IRDMA_AE_BAD_CLOSE;
+ ae_info.ae_src = 0;
+ irdma_gen_ae(rf, &iwqp->sc_qp, &ae_info, false);
+ } else {
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp,
+ wait);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ m_info = &cqp_info->in.u.qp_modify.info;
+ memcpy(m_info, info, sizeof(*m_info));
+ cqp_info->cqp_cmd = IRDMA_OP_QP_MODIFY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp;
+ cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request;
+ m_info->next_iwarp_state = IRDMA_QP_STATE_ERROR;
+ m_info->reset_tcp_conn = true;
+ irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ }
+ break;
+ case IRDMA_QP_STATE_ERROR:
+ default:
+ break;
+ }
+ }
+
+ return status;
+}
+
+/**
+ * irdma_cqp_cq_destroy_cmd - destroy the cqp cq
+ * @dev: device pointer
+ * @cq: pointer to cq
+ */
+void irdma_cqp_cq_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+
+ irdma_cq_wq_destroy(rf, cq);
+}
+
+/**
+ * irdma_cqp_qp_destroy_cmd - destroy the cqp
+ * @dev: device pointer
+ * @qp: pointer to qp
+ */
+int irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_QP_DESTROY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_destroy.qp = qp;
+ cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.qp_destroy.remove_hash_idx = true;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_ieq_mpa_crc_ae - generate AE for crc error
+ * @dev: hardware control device structure
+ * @qp: hardware control qp
+ */
+void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
+{
+ struct irdma_gen_ae_info info = {};
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+
+ ibdev_dbg(&rf->iwdev->ibdev, "AEQ: Generate MPA CRC AE\n");
+ info.ae_code = IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR;
+ info.ae_src = IRDMA_AE_SOURCE_RQ;
+ irdma_gen_ae(rf, qp, &info, false);
+}
+
+/**
+ * irdma_ieq_check_mpacrc - check if mpa crc is OK
+ * @addr: address of buffer for crc
+ * @len: length of buffer
+ * @val: value to be compared
+ */
+int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val)
+{
+ if ((__force u32)cpu_to_le32(~crc32c(~0, addr, len)) != val)
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * irdma_ieq_get_qp - get qp based on quad in puda buffer
+ * @dev: hardware control device structure
+ * @buf: receive puda buffer on exception q
+ */
+struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
+ struct irdma_puda_buf *buf)
+{
+ struct irdma_qp *iwqp;
+ struct irdma_cm_node *cm_node;
+ struct irdma_device *iwdev = buf->vsi->back_vsi;
+ u32 loc_addr[4] = {};
+ u32 rem_addr[4] = {};
+ u16 loc_port, rem_port;
+ struct ipv6hdr *ip6h;
+ struct iphdr *iph = (struct iphdr *)buf->iph;
+ struct tcphdr *tcph = (struct tcphdr *)buf->tcph;
+
+ if (iph->version == 4) {
+ loc_addr[0] = ntohl(iph->daddr);
+ rem_addr[0] = ntohl(iph->saddr);
+ } else {
+ ip6h = (struct ipv6hdr *)buf->iph;
+ irdma_copy_ip_ntohl(loc_addr, ip6h->daddr.in6_u.u6_addr32);
+ irdma_copy_ip_ntohl(rem_addr, ip6h->saddr.in6_u.u6_addr32);
+ }
+ loc_port = ntohs(tcph->dest);
+ rem_port = ntohs(tcph->source);
+ cm_node = irdma_find_node(&iwdev->cm_core, rem_port, rem_addr, loc_port,
+ loc_addr, buf->vlan_valid ? buf->vlan_id : 0xFFFF);
+ if (!cm_node)
+ return NULL;
+
+ iwqp = cm_node->iwqp;
+ irdma_rem_ref_cm_node(cm_node);
+
+ return &iwqp->sc_qp;
+}
+
+/**
+ * irdma_send_ieq_ack - ACKs for duplicate or OOO partials FPDUs
+ * @qp: qp ptr
+ */
+void irdma_send_ieq_ack(struct irdma_sc_qp *qp)
+{
+ struct irdma_cm_node *cm_node = ((struct irdma_qp *)qp->qp_uk.back_qp)->cm_node;
+ struct irdma_puda_buf *buf = qp->pfpdu.lastrcv_buf;
+ struct tcphdr *tcph = (struct tcphdr *)buf->tcph;
+
+ cm_node->tcp_cntxt.rcv_nxt = qp->pfpdu.nextseqnum;
+ cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+
+ irdma_send_ack(cm_node);
+}
+
+/**
+ * irdma_puda_ieq_get_ah_info - get AH info from IEQ buffer
+ * @qp: qp pointer
+ * @ah_info: AH info pointer
+ */
+void irdma_puda_ieq_get_ah_info(struct irdma_sc_qp *qp,
+ struct irdma_ah_info *ah_info)
+{
+ struct irdma_puda_buf *buf = qp->pfpdu.ah_buf;
+ struct iphdr *iph;
+ struct ipv6hdr *ip6h;
+
+ memset(ah_info, 0, sizeof(*ah_info));
+ ah_info->do_lpbk = true;
+ ah_info->vlan_tag = buf->vlan_id;
+ ah_info->insert_vlan_tag = buf->vlan_valid;
+ ah_info->ipv4_valid = buf->ipv4;
+ ah_info->vsi = qp->vsi;
+
+ if (buf->smac_valid)
+ ether_addr_copy(ah_info->mac_addr, buf->smac);
+
+ if (buf->ipv4) {
+ ah_info->ipv4_valid = true;
+ iph = (struct iphdr *)buf->iph;
+ ah_info->hop_ttl = iph->ttl;
+ ah_info->tc_tos = iph->tos;
+ ah_info->dest_ip_addr[0] = ntohl(iph->daddr);
+ ah_info->src_ip_addr[0] = ntohl(iph->saddr);
+ } else {
+ ip6h = (struct ipv6hdr *)buf->iph;
+ ah_info->hop_ttl = ip6h->hop_limit;
+ ah_info->tc_tos = ip6h->priority;
+ irdma_copy_ip_ntohl(ah_info->dest_ip_addr,
+ ip6h->daddr.in6_u.u6_addr32);
+ irdma_copy_ip_ntohl(ah_info->src_ip_addr,
+ ip6h->saddr.in6_u.u6_addr32);
+ }
+
+ ah_info->dst_arpindex = irdma_arp_table(dev_to_rf(qp->dev),
+ ah_info->dest_ip_addr,
+ ah_info->ipv4_valid,
+ NULL, IRDMA_ARP_RESOLVE);
+}
+
+/**
+ * irdma_gen1_ieq_update_tcpip_info - update tcpip in the buffer
+ * @buf: puda to update
+ * @len: length of buffer
+ * @seqnum: seq number for tcp
+ */
+static void irdma_gen1_ieq_update_tcpip_info(struct irdma_puda_buf *buf,
+ u16 len, u32 seqnum)
+{
+ struct tcphdr *tcph;
+ struct iphdr *iph;
+ u16 iphlen;
+ u16 pktsize;
+ u8 *addr = buf->mem.va;
+
+ iphlen = (buf->ipv4) ? 20 : 40;
+ iph = (struct iphdr *)(addr + buf->maclen);
+ tcph = (struct tcphdr *)(addr + buf->maclen + iphlen);
+ pktsize = len + buf->tcphlen + iphlen;
+ iph->tot_len = htons(pktsize);
+ tcph->seq = htonl(seqnum);
+}
+
+/**
+ * irdma_ieq_update_tcpip_info - update tcpip in the buffer
+ * @buf: puda to update
+ * @len: length of buffer
+ * @seqnum: seq number for tcp
+ */
+void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len,
+ u32 seqnum)
+{
+ struct tcphdr *tcph;
+ u8 *addr;
+
+ if (buf->vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ return irdma_gen1_ieq_update_tcpip_info(buf, len, seqnum);
+
+ addr = buf->mem.va;
+ tcph = (struct tcphdr *)addr;
+ tcph->seq = htonl(seqnum);
+}
+
+/**
+ * irdma_gen1_puda_get_tcpip_info - get tcpip info from puda
+ * buffer
+ * @info: to get information
+ * @buf: puda buffer
+ */
+static int irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
+ struct irdma_puda_buf *buf)
+{
+ struct iphdr *iph;
+ struct ipv6hdr *ip6h;
+ struct tcphdr *tcph;
+ u16 iphlen;
+ u16 pkt_len;
+ u8 *mem = buf->mem.va;
+ struct ethhdr *ethh = buf->mem.va;
+
+ if (ethh->h_proto == htons(0x8100)) {
+ info->vlan_valid = true;
+ buf->vlan_id = ntohs(((struct vlan_ethhdr *)ethh)->h_vlan_TCI) &
+ VLAN_VID_MASK;
+ }
+
+ buf->maclen = (info->vlan_valid) ? 18 : 14;
+ iphlen = (info->l3proto) ? 40 : 20;
+ buf->ipv4 = (info->l3proto) ? false : true;
+ buf->iph = mem + buf->maclen;
+ iph = (struct iphdr *)buf->iph;
+ buf->tcph = buf->iph + iphlen;
+ tcph = (struct tcphdr *)buf->tcph;
+
+ if (buf->ipv4) {
+ pkt_len = ntohs(iph->tot_len);
+ } else {
+ ip6h = (struct ipv6hdr *)buf->iph;
+ pkt_len = ntohs(ip6h->payload_len) + iphlen;
+ }
+
+ buf->totallen = pkt_len + buf->maclen;
+
+ if (info->payload_len < buf->totallen) {
+ ibdev_dbg(to_ibdev(buf->vsi->dev),
+ "ERR: payload_len = 0x%x totallen expected0x%x\n",
+ info->payload_len, buf->totallen);
+ return -EINVAL;
+ }
+
+ buf->tcphlen = tcph->doff << 2;
+ buf->datalen = pkt_len - iphlen - buf->tcphlen;
+ buf->data = buf->datalen ? buf->tcph + buf->tcphlen : NULL;
+ buf->hdrlen = buf->maclen + iphlen + buf->tcphlen;
+ buf->seqnum = ntohl(tcph->seq);
+
+ return 0;
+}
+
+/**
+ * irdma_puda_get_tcpip_info - get tcpip info from puda buffer
+ * @info: to get information
+ * @buf: puda buffer
+ */
+int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
+ struct irdma_puda_buf *buf)
+{
+ struct tcphdr *tcph;
+ u32 pkt_len;
+ u8 *mem;
+
+ if (buf->vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
+ return irdma_gen1_puda_get_tcpip_info(info, buf);
+
+ mem = buf->mem.va;
+ buf->vlan_valid = info->vlan_valid;
+ if (info->vlan_valid)
+ buf->vlan_id = info->vlan;
+
+ buf->ipv4 = info->ipv4;
+ if (buf->ipv4)
+ buf->iph = mem + IRDMA_IPV4_PAD;
+ else
+ buf->iph = mem;
+
+ buf->tcph = mem + IRDMA_TCP_OFFSET;
+ tcph = (struct tcphdr *)buf->tcph;
+ pkt_len = info->payload_len;
+ buf->totallen = pkt_len;
+ buf->tcphlen = tcph->doff << 2;
+ buf->datalen = pkt_len - IRDMA_TCP_OFFSET - buf->tcphlen;
+ buf->data = buf->datalen ? buf->tcph + buf->tcphlen : NULL;
+ buf->hdrlen = IRDMA_TCP_OFFSET + buf->tcphlen;
+ buf->seqnum = ntohl(tcph->seq);
+
+ if (info->smac_valid) {
+ ether_addr_copy(buf->smac, info->smac);
+ buf->smac_valid = true;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_hw_stats_timeout - Stats timer-handler which updates all HW stats
+ * @t: timer_list pointer
+ */
+static void irdma_hw_stats_timeout(struct timer_list *t)
+{
+ struct irdma_vsi_pestat *pf_devstat =
+ timer_container_of(pf_devstat, t, stats_timer);
+ struct irdma_sc_vsi *sc_vsi = pf_devstat->vsi;
+
+ if (sc_vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ irdma_cqp_gather_stats_cmd(sc_vsi->dev, sc_vsi->pestat, false);
+ else
+ irdma_cqp_gather_stats_gen1(sc_vsi->dev, sc_vsi->pestat);
+
+ mod_timer(&pf_devstat->stats_timer,
+ jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
+}
+
+/**
+ * irdma_hw_stats_start_timer - Start periodic stats timer
+ * @vsi: vsi structure pointer
+ */
+void irdma_hw_stats_start_timer(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_vsi_pestat *devstat = vsi->pestat;
+
+ timer_setup(&devstat->stats_timer, irdma_hw_stats_timeout, 0);
+ mod_timer(&devstat->stats_timer,
+ jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
+}
+
+/**
+ * irdma_hw_stats_stop_timer - Delete periodic stats timer
+ * @vsi: pointer to vsi structure
+ */
+void irdma_hw_stats_stop_timer(struct irdma_sc_vsi *vsi)
+{
+ struct irdma_vsi_pestat *devstat = vsi->pestat;
+
+ timer_delete_sync(&devstat->stats_timer);
+}
+
+/**
+ * irdma_process_stats - Checking for wrap and update stats
+ * @pestat: stats structure pointer
+ */
+static inline void irdma_process_stats(struct irdma_vsi_pestat *pestat)
+{
+ sc_vsi_update_stats(pestat->vsi);
+}
+
+/**
+ * irdma_cqp_gather_stats_gen1 - Gather stats
+ * @dev: pointer to device structure
+ * @pestat: statistics structure
+ */
+void irdma_cqp_gather_stats_gen1(struct irdma_sc_dev *dev,
+ struct irdma_vsi_pestat *pestat)
+{
+ struct irdma_gather_stats *gather_stats =
+ pestat->gather_info.gather_stats_va;
+ const struct irdma_hw_stat_map *map = dev->hw_stats_map;
+ u16 max_stats_idx = dev->hw_attrs.max_stat_idx;
+ u32 stats_inst_offset_32;
+ u32 stats_inst_offset_64;
+ u64 new_val;
+ u16 i;
+
+ stats_inst_offset_32 = (pestat->gather_info.use_stats_inst) ?
+ pestat->gather_info.stats_inst_index :
+ pestat->hw->hmc.hmc_fn_id;
+ stats_inst_offset_32 *= 4;
+ stats_inst_offset_64 = stats_inst_offset_32 * 2;
+
+ for (i = 0; i < max_stats_idx; i++) {
+ if (map[i].bitmask <= IRDMA_MAX_STATS_32)
+ new_val = rd32(dev->hw,
+ dev->hw_stats_regs[i] + stats_inst_offset_32);
+ else
+ new_val = rd64(dev->hw,
+ dev->hw_stats_regs[i] + stats_inst_offset_64);
+ gather_stats->val[map[i].byteoff / sizeof(u64)] = new_val;
+ }
+
+ irdma_process_stats(pestat);
+}
+
+/**
+ * irdma_process_cqp_stats - Checking for wrap and update stats
+ * @cqp_request: cqp_request structure pointer
+ */
+static void irdma_process_cqp_stats(struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_vsi_pestat *pestat = cqp_request->param;
+
+ irdma_process_stats(pestat);
+}
+
+/**
+ * irdma_cqp_gather_stats_cmd - Gather stats
+ * @dev: pointer to device structure
+ * @pestat: pointer to stats info
+ * @wait: flag to wait or not wait for stats
+ */
+int irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev,
+ struct irdma_vsi_pestat *pestat, bool wait)
+
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_STATS_GATHER;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.stats_gather.info = pestat->gather_info;
+ cqp_info->in.u.stats_gather.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.stats_gather.cqp = &rf->cqp.sc_cqp;
+ cqp_request->param = pestat;
+ if (!wait)
+ cqp_request->callback_fcn = irdma_process_cqp_stats;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (wait)
+ irdma_process_stats(pestat);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_cqp_stats_inst_cmd - Allocate/free stats instance
+ * @vsi: pointer to vsi structure
+ * @cmd: command to allocate or free
+ * @stats_info: pointer to allocate stats info
+ */
+int irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd,
+ struct irdma_stats_inst_info *stats_info)
+{
+ struct irdma_pci_f *rf = dev_to_rf(vsi->dev);
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+ bool wait = false;
+
+ if (cmd == IRDMA_OP_STATS_ALLOCATE)
+ wait = true;
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = cmd;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.stats_manage.info = *stats_info;
+ cqp_info->in.u.stats_manage.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.stats_manage.cqp = &rf->cqp.sc_cqp;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (wait)
+ stats_info->stats_idx = cqp_request->compl_info.op_ret_val;
+ irdma_put_cqp_request(iwcqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_cqp_ceq_cmd - Create/Destroy CEQ's after CEQ 0
+ * @dev: pointer to device info
+ * @sc_ceq: pointer to ceq structure
+ * @op: Create or Destroy
+ */
+int irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_ceq *sc_ceq,
+ u8 op)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->post_sq = 1;
+ cqp_info->cqp_cmd = op;
+ cqp_info->in.u.ceq_create.ceq = sc_ceq;
+ cqp_info->in.u.ceq_create.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_cqp_aeq_cmd - Create/Destroy AEQ
+ * @dev: pointer to device info
+ * @sc_aeq: pointer to aeq structure
+ * @op: Create or Destroy
+ */
+int irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_aeq *sc_aeq,
+ u8 op)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->post_sq = 1;
+ cqp_info->cqp_cmd = op;
+ cqp_info->in.u.aeq_create.aeq = sc_aeq;
+ cqp_info->in.u.aeq_create.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_cqp_ws_node_cmd - Add/modify/delete ws node
+ * @dev: pointer to device structure
+ * @cmd: Add, modify or delete
+ * @node_info: pointer to ws node info
+ */
+int irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd,
+ struct irdma_ws_node_info *node_info)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ struct irdma_cqp *iwcqp = &rf->cqp;
+ struct irdma_sc_cqp *cqp = &iwcqp->sc_cqp;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+ bool poll;
+
+ if (!rf->sc_dev.ceq_valid)
+ poll = true;
+ else
+ poll = false;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, !poll);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = cmd;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.ws_node.info = *node_info;
+ cqp_info->in.u.ws_node.cqp = cqp;
+ cqp_info->in.u.ws_node.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ if (status)
+ goto exit;
+
+ if (poll) {
+ struct irdma_ccq_cqe_info compl_info;
+
+ status = irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_WORK_SCHED_NODE,
+ &compl_info);
+ node_info->qs_handle = compl_info.op_ret_val;
+ ibdev_dbg(&rf->iwdev->ibdev, "DCB: opcode=%d, compl_info.retval=%d\n",
+ compl_info.op_code, compl_info.op_ret_val);
+ } else {
+ node_info->qs_handle = cqp_request->compl_info.op_ret_val;
+ }
+
+exit:
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_ah_cqp_op - perform an AH cqp operation
+ * @rf: RDMA PCI function
+ * @sc_ah: address handle
+ * @cmd: AH operation
+ * @wait: wait if true
+ * @callback_fcn: Callback function on CQP op completion
+ * @cb_param: parameter for callback function
+ *
+ * returns errno
+ */
+int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd,
+ bool wait,
+ void (*callback_fcn)(struct irdma_cqp_request *),
+ void *cb_param)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ if (cmd != IRDMA_OP_AH_CREATE && cmd != IRDMA_OP_AH_DESTROY)
+ return -EINVAL;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = cmd;
+ cqp_info->post_sq = 1;
+ if (cmd == IRDMA_OP_AH_CREATE) {
+ cqp_info->in.u.ah_create.info = sc_ah->ah_info;
+ cqp_info->in.u.ah_create.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.ah_create.cqp = &rf->cqp.sc_cqp;
+ } else if (cmd == IRDMA_OP_AH_DESTROY) {
+ cqp_info->in.u.ah_destroy.info = sc_ah->ah_info;
+ cqp_info->in.u.ah_destroy.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.ah_destroy.cqp = &rf->cqp.sc_cqp;
+ }
+
+ if (!wait) {
+ cqp_request->callback_fcn = callback_fcn;
+ cqp_request->param = cb_param;
+ }
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ if (status)
+ return -ENOMEM;
+
+ if (wait)
+ sc_ah->ah_info.ah_valid = (cmd == IRDMA_OP_AH_CREATE);
+
+ return 0;
+}
+
+/**
+ * irdma_ieq_ah_cb - callback after creation of AH for IEQ
+ * @cqp_request: pointer to cqp_request of create AH
+ */
+static void irdma_ieq_ah_cb(struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_sc_qp *qp = cqp_request->param;
+ struct irdma_sc_ah *sc_ah = qp->pfpdu.ah;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->pfpdu.lock, flags);
+ if (!cqp_request->compl_info.op_ret_val) {
+ sc_ah->ah_info.ah_valid = true;
+ irdma_ieq_process_fpdus(qp, qp->vsi->ieq);
+ } else {
+ sc_ah->ah_info.ah_valid = false;
+ irdma_ieq_cleanup_qp(qp->vsi->ieq, qp);
+ }
+ spin_unlock_irqrestore(&qp->pfpdu.lock, flags);
+}
+
+/**
+ * irdma_ilq_ah_cb - callback after creation of AH for ILQ
+ * @cqp_request: pointer to cqp_request of create AH
+ */
+static void irdma_ilq_ah_cb(struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_cm_node *cm_node = cqp_request->param;
+ struct irdma_sc_ah *sc_ah = cm_node->ah;
+
+ sc_ah->ah_info.ah_valid = !cqp_request->compl_info.op_ret_val;
+ irdma_add_conn_est_qh(cm_node);
+}
+
+/**
+ * irdma_puda_create_ah - create AH for ILQ/IEQ qp's
+ * @dev: device pointer
+ * @ah_info: Address handle info
+ * @wait: When true will wait for operation to complete
+ * @type: ILQ/IEQ
+ * @cb_param: Callback param when not waiting
+ * @ah_ret: Returned pointer to address handle if created
+ *
+ */
+int irdma_puda_create_ah(struct irdma_sc_dev *dev,
+ struct irdma_ah_info *ah_info, bool wait,
+ enum puda_rsrc_type type, void *cb_param,
+ struct irdma_sc_ah **ah_ret)
+{
+ struct irdma_sc_ah *ah;
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ int err;
+
+ ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
+ *ah_ret = ah;
+ if (!ah)
+ return -ENOMEM;
+
+ err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah,
+ &ah_info->ah_idx, &rf->next_ah);
+ if (err)
+ goto err_free;
+
+ ah->dev = dev;
+ ah->ah_info = *ah_info;
+
+ if (type == IRDMA_PUDA_RSRC_TYPE_ILQ)
+ err = irdma_ah_cqp_op(rf, ah, IRDMA_OP_AH_CREATE, wait,
+ irdma_ilq_ah_cb, cb_param);
+ else
+ err = irdma_ah_cqp_op(rf, ah, IRDMA_OP_AH_CREATE, wait,
+ irdma_ieq_ah_cb, cb_param);
+
+ if (err)
+ goto error;
+ return 0;
+
+error:
+ irdma_free_rsrc(rf, rf->allocated_ahs, ah->ah_info.ah_idx);
+err_free:
+ kfree(ah);
+ *ah_ret = NULL;
+ return -ENOMEM;
+}
+
+/**
+ * irdma_puda_free_ah - free a puda address handle
+ * @dev: device pointer
+ * @ah: The address handle to free
+ */
+void irdma_puda_free_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+
+ if (!ah)
+ return;
+
+ if (ah->ah_info.ah_valid) {
+ irdma_ah_cqp_op(rf, ah, IRDMA_OP_AH_DESTROY, false, NULL, NULL);
+ irdma_free_rsrc(rf, rf->allocated_ahs, ah->ah_info.ah_idx);
+ }
+
+ kfree(ah);
+}
+
+/**
+ * irdma_gsi_ud_qp_ah_cb - callback after creation of AH for GSI/ID QP
+ * @cqp_request: pointer to cqp_request of create AH
+ */
+void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request)
+{
+ struct irdma_sc_ah *sc_ah = cqp_request->param;
+
+ if (!cqp_request->compl_info.op_ret_val)
+ sc_ah->ah_info.ah_valid = true;
+ else
+ sc_ah->ah_info.ah_valid = false;
+}
+
+/**
+ * irdma_prm_add_pble_mem - add moemory to pble resources
+ * @pprm: pble resource manager
+ * @pchunk: chunk of memory to add
+ */
+int irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm,
+ struct irdma_chunk *pchunk)
+{
+ u64 sizeofbitmap;
+
+ if (pchunk->size & 0xfff)
+ return -EINVAL;
+
+ sizeofbitmap = (u64)pchunk->size >> pprm->pble_shift;
+
+ pchunk->bitmapbuf = bitmap_zalloc(sizeofbitmap, GFP_KERNEL);
+ if (!pchunk->bitmapbuf)
+ return -ENOMEM;
+
+ pchunk->sizeofbitmap = sizeofbitmap;
+ /* each pble is 8 bytes hence shift by 3 */
+ pprm->total_pble_alloc += pchunk->size >> 3;
+ pprm->free_pble_cnt += pchunk->size >> 3;
+
+ return 0;
+}
+
+/**
+ * irdma_prm_get_pbles - get pble's from prm
+ * @pprm: pble resource manager
+ * @chunkinfo: nformation about chunk where pble's were acquired
+ * @mem_size: size of pble memory needed
+ * @vaddr: returns virtual address of pble memory
+ * @fpm_addr: returns fpm address of pble memory
+ */
+int irdma_prm_get_pbles(struct irdma_pble_prm *pprm,
+ struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size,
+ u64 **vaddr, u64 *fpm_addr)
+{
+ u64 bits_needed;
+ u64 bit_idx = PBLE_INVALID_IDX;
+ struct irdma_chunk *pchunk = NULL;
+ struct list_head *chunk_entry = pprm->clist.next;
+ u32 offset;
+ unsigned long flags;
+ *vaddr = NULL;
+ *fpm_addr = 0;
+
+ bits_needed = DIV_ROUND_UP_ULL(mem_size, BIT_ULL(pprm->pble_shift));
+
+ spin_lock_irqsave(&pprm->prm_lock, flags);
+ while (chunk_entry != &pprm->clist) {
+ pchunk = (struct irdma_chunk *)chunk_entry;
+ bit_idx = bitmap_find_next_zero_area(pchunk->bitmapbuf,
+ pchunk->sizeofbitmap, 0,
+ bits_needed, 0);
+ if (bit_idx < pchunk->sizeofbitmap)
+ break;
+
+ /* list.next used macro */
+ chunk_entry = pchunk->list.next;
+ }
+
+ if (!pchunk || bit_idx >= pchunk->sizeofbitmap) {
+ spin_unlock_irqrestore(&pprm->prm_lock, flags);
+ return -ENOMEM;
+ }
+
+ bitmap_set(pchunk->bitmapbuf, bit_idx, bits_needed);
+ offset = bit_idx << pprm->pble_shift;
+ *vaddr = pchunk->vaddr + offset;
+ *fpm_addr = pchunk->fpm_addr + offset;
+
+ chunkinfo->pchunk = pchunk;
+ chunkinfo->bit_idx = bit_idx;
+ chunkinfo->bits_used = bits_needed;
+ /* 3 is sizeof pble divide */
+ pprm->free_pble_cnt -= chunkinfo->bits_used << (pprm->pble_shift - 3);
+ spin_unlock_irqrestore(&pprm->prm_lock, flags);
+
+ return 0;
+}
+
+/**
+ * irdma_prm_return_pbles - return pbles back to prm
+ * @pprm: pble resource manager
+ * @chunkinfo: chunk where pble's were acquired and to be freed
+ */
+void irdma_prm_return_pbles(struct irdma_pble_prm *pprm,
+ struct irdma_pble_chunkinfo *chunkinfo)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pprm->prm_lock, flags);
+ pprm->free_pble_cnt += chunkinfo->bits_used << (pprm->pble_shift - 3);
+ bitmap_clear(chunkinfo->pchunk->bitmapbuf, chunkinfo->bit_idx,
+ chunkinfo->bits_used);
+ spin_unlock_irqrestore(&pprm->prm_lock, flags);
+}
+
+int irdma_map_vm_page_list(struct irdma_hw *hw, void *va, dma_addr_t *pg_dma,
+ u32 pg_cnt)
+{
+ struct page *vm_page;
+ int i;
+ u8 *addr;
+
+ addr = (u8 *)(uintptr_t)va;
+ for (i = 0; i < pg_cnt; i++) {
+ vm_page = vmalloc_to_page(addr);
+ if (!vm_page)
+ goto err;
+
+ pg_dma[i] = dma_map_page(hw->device, vm_page, 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(hw->device, pg_dma[i]))
+ goto err;
+
+ addr += PAGE_SIZE;
+ }
+
+ return 0;
+
+err:
+ irdma_unmap_vm_page_list(hw, pg_dma, i);
+ return -ENOMEM;
+}
+
+void irdma_unmap_vm_page_list(struct irdma_hw *hw, dma_addr_t *pg_dma, u32 pg_cnt)
+{
+ int i;
+
+ for (i = 0; i < pg_cnt; i++)
+ dma_unmap_page(hw->device, pg_dma[i], PAGE_SIZE, DMA_BIDIRECTIONAL);
+}
+
+/**
+ * irdma_pble_free_paged_mem - free virtual paged memory
+ * @chunk: chunk to free with paged memory
+ */
+void irdma_pble_free_paged_mem(struct irdma_chunk *chunk)
+{
+ if (!chunk->pg_cnt)
+ goto done;
+
+ irdma_unmap_vm_page_list(chunk->dev->hw, chunk->dmainfo.dmaaddrs,
+ chunk->pg_cnt);
+
+done:
+ kfree(chunk->dmainfo.dmaaddrs);
+ chunk->dmainfo.dmaaddrs = NULL;
+ vfree(chunk->vaddr);
+ chunk->vaddr = NULL;
+ chunk->type = 0;
+}
+
+/**
+ * irdma_pble_get_paged_mem -allocate paged memory for pbles
+ * @chunk: chunk to add for paged memory
+ * @pg_cnt: number of pages needed
+ */
+int irdma_pble_get_paged_mem(struct irdma_chunk *chunk, u32 pg_cnt)
+{
+ u32 size;
+ void *va;
+
+ chunk->dmainfo.dmaaddrs = kzalloc(pg_cnt << 3, GFP_KERNEL);
+ if (!chunk->dmainfo.dmaaddrs)
+ return -ENOMEM;
+
+ size = PAGE_SIZE * pg_cnt;
+ va = vmalloc(size);
+ if (!va)
+ goto err;
+
+ if (irdma_map_vm_page_list(chunk->dev->hw, va, chunk->dmainfo.dmaaddrs,
+ pg_cnt)) {
+ vfree(va);
+ goto err;
+ }
+ chunk->vaddr = va;
+ chunk->size = size;
+ chunk->pg_cnt = pg_cnt;
+ chunk->type = PBLE_SD_PAGED;
+
+ return 0;
+err:
+ kfree(chunk->dmainfo.dmaaddrs);
+ chunk->dmainfo.dmaaddrs = NULL;
+
+ return -ENOMEM;
+}
+
+/**
+ * irdma_alloc_ws_node_id - Allocate a tx scheduler node ID
+ * @dev: device pointer
+ */
+u16 irdma_alloc_ws_node_id(struct irdma_sc_dev *dev)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+ u32 next = 1;
+ u32 node_id;
+
+ if (irdma_alloc_rsrc(rf, rf->allocated_ws_nodes, rf->max_ws_node_id,
+ &node_id, &next))
+ return IRDMA_WS_NODE_INVALID;
+
+ return (u16)node_id;
+}
+
+/**
+ * irdma_free_ws_node_id - Free a tx scheduler node ID
+ * @dev: device pointer
+ * @node_id: Work scheduler node ID
+ */
+void irdma_free_ws_node_id(struct irdma_sc_dev *dev, u16 node_id)
+{
+ struct irdma_pci_f *rf = dev_to_rf(dev);
+
+ irdma_free_rsrc(rf, rf->allocated_ws_nodes, (u32)node_id);
+}
+
+/**
+ * irdma_modify_qp_to_err - Modify a QP to error
+ * @sc_qp: qp structure
+ */
+void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp)
+{
+ struct irdma_qp *qp = sc_qp->qp_uk.back_qp;
+ struct ib_qp_attr attr;
+
+ if (qp->iwdev->rf->reset)
+ return;
+ attr.qp_state = IB_QPS_ERR;
+
+ if (rdma_protocol_roce(qp->ibqp.device, 1))
+ irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL);
+ else
+ irdma_modify_qp(&qp->ibqp, &attr, IB_QP_STATE, NULL);
+}
+
+void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event)
+{
+ struct ib_event ibevent;
+
+ if (!iwqp->ibqp.event_handler)
+ return;
+
+ switch (event) {
+ case IRDMA_QP_EVENT_CATASTROPHIC:
+ ibevent.event = IB_EVENT_QP_FATAL;
+ break;
+ case IRDMA_QP_EVENT_ACCESS_ERR:
+ ibevent.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ case IRDMA_QP_EVENT_REQ_ERR:
+ ibevent.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ }
+ ibevent.device = iwqp->ibqp.device;
+ ibevent.element.qp = &iwqp->ibqp;
+ iwqp->ibqp.event_handler(&ibevent, iwqp->ibqp.qp_context);
+}
+
+void irdma_remove_cmpls_list(struct irdma_cq *iwcq)
+{
+ struct irdma_cmpl_gen *cmpl_node;
+ struct list_head *tmp_node, *list_node;
+
+ list_for_each_safe (list_node, tmp_node, &iwcq->cmpl_generated) {
+ cmpl_node = list_entry(list_node, struct irdma_cmpl_gen, list);
+ list_del(&cmpl_node->list);
+ kfree(cmpl_node);
+ }
+}
+
+int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info)
+{
+ struct irdma_cmpl_gen *cmpl;
+
+ if (list_empty(&iwcq->cmpl_generated))
+ return -ENOENT;
+ cmpl = list_first_entry_or_null(&iwcq->cmpl_generated, struct irdma_cmpl_gen, list);
+ list_del(&cmpl->list);
+ memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info));
+ kfree(cmpl);
+
+ ibdev_dbg(iwcq->ibcq.device,
+ "VERBS: %s: Poll artificially generated completion for QP 0x%X, op %u, wr_id=0x%llx\n",
+ __func__, cq_poll_info->qp_id, cq_poll_info->op_type,
+ cq_poll_info->wr_id);
+
+ return 0;
+}
+
+/**
+ * irdma_set_cpi_common_values - fill in values for polling info struct
+ * @cpi: resulting structure of cq_poll_info type
+ * @qp: QPair
+ * @qp_num: id of the QP
+ */
+static void irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi,
+ struct irdma_qp_uk *qp, u32 qp_num)
+{
+ cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
+ cpi->error = true;
+ cpi->major_err = IRDMA_FLUSH_MAJOR_ERR;
+ cpi->minor_err = FLUSH_GENERAL_ERR;
+ cpi->qp_handle = (irdma_qp_handle)(uintptr_t)qp;
+ cpi->qp_id = qp_num;
+}
+
+static inline void irdma_comp_handler(struct irdma_cq *cq)
+{
+ if (!cq->ibcq.comp_handler)
+ return;
+ if (atomic_cmpxchg(&cq->armed, 1, 0))
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
+void irdma_generate_flush_completions(struct irdma_qp *iwqp)
+{
+ struct irdma_qp_uk *qp = &iwqp->sc_qp.qp_uk;
+ struct irdma_ring *sq_ring = &qp->sq_ring;
+ struct irdma_ring *rq_ring = &qp->rq_ring;
+ struct irdma_cq *iwscq = iwqp->iwscq;
+ struct irdma_cq *iwrcq = iwqp->iwrcq;
+ struct irdma_cmpl_gen *cmpl;
+ __le64 *sw_wqe;
+ u64 wqe_qword;
+ u32 wqe_idx;
+ bool compl_generated = false;
+ unsigned long flags1;
+
+ spin_lock_irqsave(&iwscq->lock, flags1);
+ if (irdma_uk_cq_empty(&iwscq->sc_cq.cq_uk)) {
+ unsigned long flags2;
+
+ spin_lock_irqsave(&iwqp->lock, flags2);
+ while (IRDMA_RING_MORE_WORK(*sq_ring)) {
+ cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
+ if (!cmpl) {
+ spin_unlock_irqrestore(&iwqp->lock, flags2);
+ spin_unlock_irqrestore(&iwscq->lock, flags1);
+ return;
+ }
+
+ wqe_idx = sq_ring->tail;
+ irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
+
+ cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
+ sw_wqe = qp->sq_base[wqe_idx].elem;
+ get_64bit_val(sw_wqe, 24, &wqe_qword);
+ cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE);
+ cmpl->cpi.q_type = IRDMA_CQE_QTYPE_SQ;
+ /* remove the SQ WR by moving SQ tail*/
+ IRDMA_RING_SET_TAIL(*sq_ring,
+ sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta);
+ if (cmpl->cpi.op_type == IRDMAQP_OP_NOP) {
+ kfree(cmpl);
+ continue;
+ }
+ ibdev_dbg(iwscq->ibcq.device,
+ "DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n",
+ __func__, cmpl->cpi.wr_id, qp->qp_id);
+ list_add_tail(&cmpl->list, &iwscq->cmpl_generated);
+ compl_generated = true;
+ }
+ spin_unlock_irqrestore(&iwqp->lock, flags2);
+ spin_unlock_irqrestore(&iwscq->lock, flags1);
+ if (compl_generated)
+ irdma_comp_handler(iwscq);
+ } else {
+ spin_unlock_irqrestore(&iwscq->lock, flags1);
+ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
+ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
+ }
+
+ spin_lock_irqsave(&iwrcq->lock, flags1);
+ if (irdma_uk_cq_empty(&iwrcq->sc_cq.cq_uk)) {
+ unsigned long flags2;
+
+ spin_lock_irqsave(&iwqp->lock, flags2);
+ while (IRDMA_RING_MORE_WORK(*rq_ring)) {
+ cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
+ if (!cmpl) {
+ spin_unlock_irqrestore(&iwqp->lock, flags2);
+ spin_unlock_irqrestore(&iwrcq->lock, flags1);
+ return;
+ }
+
+ wqe_idx = rq_ring->tail;
+ irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
+
+ cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx];
+ cmpl->cpi.op_type = IRDMA_OP_TYPE_REC;
+ cmpl->cpi.q_type = IRDMA_CQE_QTYPE_RQ;
+ /* remove the RQ WR by moving RQ tail */
+ IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1);
+ ibdev_dbg(iwrcq->ibcq.device,
+ "DEV: %s: adding wr_id = 0x%llx RQ Completion to list qp_id=%d, wqe_idx=%d\n",
+ __func__, cmpl->cpi.wr_id, qp->qp_id,
+ wqe_idx);
+ list_add_tail(&cmpl->list, &iwrcq->cmpl_generated);
+
+ compl_generated = true;
+ }
+ spin_unlock_irqrestore(&iwqp->lock, flags2);
+ spin_unlock_irqrestore(&iwrcq->lock, flags1);
+ if (compl_generated)
+ irdma_comp_handler(iwrcq);
+ } else {
+ spin_unlock_irqrestore(&iwrcq->lock, flags1);
+ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
+ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
+ }
+}
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
new file mode 100644
index 000000000000..6d9af41a2884
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -0,0 +1,5517 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#include "main.h"
+
+/**
+ * irdma_query_device - get device attributes
+ * @ibdev: device pointer from stack
+ * @props: returning device attributes
+ * @udata: user data
+ */
+static int irdma_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props,
+ struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct pci_dev *pcidev = iwdev->rf->pcidev;
+ struct irdma_hw_attrs *hw_attrs = &rf->sc_dev.hw_attrs;
+
+ if (udata->inlen || udata->outlen)
+ return -EINVAL;
+
+ memset(props, 0, sizeof(*props));
+ addrconf_addr_eui48((u8 *)&props->sys_image_guid,
+ iwdev->netdev->dev_addr);
+ props->fw_ver = (u64)irdma_fw_major_ver(&rf->sc_dev) << 32 |
+ irdma_fw_minor_ver(&rf->sc_dev);
+ props->device_cap_flags = IB_DEVICE_MEM_WINDOW |
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
+ if (hw_attrs->uk_attrs.hw_rev < IRDMA_GEN_3)
+ props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
+ props->vendor_id = pcidev->vendor;
+ props->vendor_part_id = pcidev->device;
+
+ props->hw_ver = rf->pcidev->revision;
+ props->page_size_cap = hw_attrs->page_size_cap;
+ props->max_mr_size = hw_attrs->max_mr_size;
+ props->max_qp = rf->max_qp - rf->used_qps;
+ props->max_qp_wr = hw_attrs->max_qp_wr;
+ props->max_send_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
+ props->max_recv_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
+ props->max_cq = rf->max_cq - rf->used_cqs;
+ props->max_cqe = rf->max_cqe - 1;
+ props->max_mr = rf->max_mr - rf->used_mrs;
+ if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3)
+ props->max_mw = props->max_mr;
+ props->max_pd = rf->max_pd - rf->used_pds;
+ props->max_sge_rd = hw_attrs->uk_attrs.max_hw_read_sges;
+ props->max_qp_rd_atom = hw_attrs->max_hw_ird;
+ props->max_qp_init_rd_atom = hw_attrs->max_hw_ord;
+ if (rdma_protocol_roce(ibdev, 1)) {
+ props->device_cap_flags |= IB_DEVICE_RC_RNR_NAK_GEN;
+ props->max_pkeys = IRDMA_PKEY_TBL_SZ;
+ }
+
+ props->max_ah = rf->max_ah;
+ props->max_mcast_grp = rf->max_mcg;
+ props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX;
+ props->max_total_mcast_qp_attach = rf->max_qp * IRDMA_MAX_MGS_PER_CTX;
+ props->max_fast_reg_page_list_len = IRDMA_MAX_PAGES_PER_FMR;
+ props->max_srq = rf->max_srq - rf->used_srqs;
+ props->max_srq_wr = IRDMA_MAX_SRQ_WRS;
+ props->max_srq_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
+ if (hw_attrs->uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS)
+ props->atomic_cap = IB_ATOMIC_HCA;
+ else
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->masked_atomic_cap = props->atomic_cap;
+ if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3) {
+#define HCA_CORE_CLOCK_KHZ 1000000UL
+ props->timestamp_mask = GENMASK(31, 0);
+ props->hca_core_clock = HCA_CORE_CLOCK_KHZ;
+ }
+ if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3)
+ props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
+
+ return 0;
+}
+
+/**
+ * irdma_query_port - get port attributes
+ * @ibdev: device pointer from stack
+ * @port: port number for query
+ * @props: returning device attributes
+ */
+static int irdma_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *props)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct net_device *netdev = iwdev->netdev;
+
+ /* no need to zero out pros here. done by caller */
+
+ props->max_mtu = IB_MTU_4096;
+ props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
+ props->lid = 1;
+ props->lmc = 0;
+ props->sm_lid = 0;
+ props->sm_sl = 0;
+ if (netif_carrier_ok(netdev) && netif_running(netdev)) {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ } else {
+ props->state = IB_PORT_DOWN;
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+ }
+
+ ib_get_eth_speed(ibdev, port, &props->active_speed,
+ &props->active_width);
+
+ if (rdma_protocol_roce(ibdev, 1)) {
+ props->gid_tbl_len = 32;
+ props->ip_gids = true;
+ props->pkey_tbl_len = IRDMA_PKEY_TBL_SZ;
+ } else {
+ props->gid_tbl_len = 1;
+ }
+ props->qkey_viol_cntr = 0;
+ props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_REINIT_SUP;
+ props->max_msg_sz = iwdev->rf->sc_dev.hw_attrs.max_hw_outbound_msg_size;
+
+ return 0;
+}
+
+/**
+ * irdma_disassociate_ucontext - Disassociate user context
+ * @context: ib user context
+ */
+static void irdma_disassociate_ucontext(struct ib_ucontext *context)
+{
+}
+
+static int irdma_mmap_legacy(struct irdma_ucontext *ucontext,
+ struct vm_area_struct *vma)
+{
+ u64 pfn;
+
+ if (vma->vm_pgoff || vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ vma->vm_private_data = ucontext;
+ pfn = ((uintptr_t)ucontext->iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET] +
+ pci_resource_start(ucontext->iwdev->rf->pcidev, 0)) >> PAGE_SHIFT;
+
+ return rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot), NULL);
+}
+
+static void irdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct irdma_user_mmap_entry *entry = to_irdma_mmap_entry(rdma_entry);
+
+ kfree(entry);
+}
+
+static struct rdma_user_mmap_entry*
+irdma_user_mmap_entry_insert(struct irdma_ucontext *ucontext, u64 bar_offset,
+ enum irdma_mmap_flag mmap_flag, u64 *mmap_offset)
+{
+ struct irdma_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ int ret;
+
+ if (!entry)
+ return NULL;
+
+ entry->bar_offset = bar_offset;
+ entry->mmap_flag = mmap_flag;
+
+ ret = rdma_user_mmap_entry_insert(&ucontext->ibucontext,
+ &entry->rdma_entry, PAGE_SIZE);
+ if (ret) {
+ kfree(entry);
+ return NULL;
+ }
+ *mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
+
+ return &entry->rdma_entry;
+}
+
+/**
+ * irdma_mmap - user memory map
+ * @context: context created during alloc
+ * @vma: kernel info for user memory map
+ */
+static int irdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct irdma_user_mmap_entry *entry;
+ struct irdma_ucontext *ucontext;
+ u64 pfn;
+ int ret;
+
+ ucontext = to_ucontext(context);
+
+ /* Legacy support for libi40iw with hard-coded mmap key */
+ if (ucontext->legacy_mode)
+ return irdma_mmap_legacy(ucontext, vma);
+
+ rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma);
+ if (!rdma_entry) {
+ ibdev_dbg(&ucontext->iwdev->ibdev,
+ "VERBS: pgoff[0x%lx] does not have valid entry\n",
+ vma->vm_pgoff);
+ return -EINVAL;
+ }
+
+ entry = to_irdma_mmap_entry(rdma_entry);
+ ibdev_dbg(&ucontext->iwdev->ibdev,
+ "VERBS: bar_offset [0x%llx] mmap_flag [%d]\n",
+ entry->bar_offset, entry->mmap_flag);
+
+ pfn = (entry->bar_offset +
+ pci_resource_start(ucontext->iwdev->rf->pcidev, 0)) >> PAGE_SHIFT;
+
+ switch (entry->mmap_flag) {
+ case IRDMA_MMAP_IO_NC:
+ ret = rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ case IRDMA_MMAP_IO_WC:
+ ret = rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE,
+ pgprot_writecombine(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret)
+ ibdev_dbg(&ucontext->iwdev->ibdev,
+ "VERBS: bar_offset [0x%llx] mmap_flag[%d] err[%d]\n",
+ entry->bar_offset, entry->mmap_flag, ret);
+ rdma_user_mmap_entry_put(rdma_entry);
+
+ return ret;
+}
+
+/**
+ * irdma_alloc_push_page - allocate a push page for qp
+ * @iwqp: qp pointer
+ */
+static void irdma_alloc_push_page(struct irdma_qp *iwqp)
+{
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_sc_qp *qp = &iwqp->sc_qp;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_MANAGE_PUSH_PAGE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.manage_push_page.info.push_idx = 0;
+ cqp_info->in.u.manage_push_page.info.qs_handle =
+ qp->vsi->qos[qp->user_pri].qs_handle;
+ cqp_info->in.u.manage_push_page.info.free_page = 0;
+ cqp_info->in.u.manage_push_page.info.push_page_type = 0;
+ cqp_info->in.u.manage_push_page.cqp = &iwdev->rf->cqp.sc_cqp;
+ cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
+
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ if (!status && cqp_request->compl_info.op_ret_val <
+ iwdev->rf->sc_dev.hw_attrs.max_hw_device_pages) {
+ qp->push_idx = cqp_request->compl_info.op_ret_val;
+ qp->push_offset = 0;
+ }
+
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+}
+
+/**
+ * irdma_alloc_ucontext - Allocate the user context data structure
+ * @uctx: uverbs context pointer
+ * @udata: user data
+ *
+ * This keeps track of all objects associated with a particular
+ * user-mode client.
+ */
+static int irdma_alloc_ucontext(struct ib_ucontext *uctx,
+ struct ib_udata *udata)
+{
+#define IRDMA_ALLOC_UCTX_MIN_REQ_LEN offsetofend(struct irdma_alloc_ucontext_req, rsvd8)
+#define IRDMA_ALLOC_UCTX_MIN_RESP_LEN offsetofend(struct irdma_alloc_ucontext_resp, rsvd)
+ struct ib_device *ibdev = uctx->device;
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_alloc_ucontext_req req = {};
+ struct irdma_alloc_ucontext_resp uresp = {};
+ struct irdma_ucontext *ucontext = to_ucontext(uctx);
+ struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs;
+
+ if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN ||
+ udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN)
+ return -EINVAL;
+
+ if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen)))
+ return -EINVAL;
+
+ if (req.userspace_ver < 4 || req.userspace_ver > IRDMA_ABI_VER)
+ goto ver_error;
+
+ ucontext->iwdev = iwdev;
+ ucontext->abi_ver = req.userspace_ver;
+
+ if (!(req.comp_mask & IRDMA_SUPPORT_WQE_FORMAT_V2) &&
+ uk_attrs->hw_rev >= IRDMA_GEN_3)
+ return -EOPNOTSUPP;
+
+ if (req.comp_mask & IRDMA_ALLOC_UCTX_USE_RAW_ATTR)
+ ucontext->use_raw_attrs = true;
+
+ /* GEN_1 legacy support with libi40iw */
+ if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) {
+ if (uk_attrs->hw_rev != IRDMA_GEN_1)
+ return -EOPNOTSUPP;
+
+ ucontext->legacy_mode = true;
+ uresp.max_qps = iwdev->rf->max_qp;
+ uresp.max_pds = iwdev->rf->sc_dev.hw_attrs.max_hw_pds;
+ uresp.wq_size = iwdev->rf->sc_dev.hw_attrs.max_qp_wr * 2;
+ uresp.kernel_ver = req.userspace_ver;
+ if (ib_copy_to_udata(udata, &uresp,
+ min(sizeof(uresp), udata->outlen)))
+ return -EFAULT;
+ } else {
+ u64 bar_off = (uintptr_t)iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET];
+
+ ucontext->db_mmap_entry =
+ irdma_user_mmap_entry_insert(ucontext, bar_off,
+ IRDMA_MMAP_IO_NC,
+ &uresp.db_mmap_key);
+ if (!ucontext->db_mmap_entry)
+ return -ENOMEM;
+
+ uresp.kernel_ver = IRDMA_ABI_VER;
+ uresp.feature_flags = uk_attrs->feature_flags;
+ uresp.max_hw_wq_frags = uk_attrs->max_hw_wq_frags;
+ uresp.max_hw_read_sges = uk_attrs->max_hw_read_sges;
+ uresp.max_hw_inline = uk_attrs->max_hw_inline;
+ uresp.max_hw_rq_quanta = uk_attrs->max_hw_rq_quanta;
+ uresp.max_hw_wq_quanta = uk_attrs->max_hw_wq_quanta;
+ uresp.max_hw_sq_chunk = uk_attrs->max_hw_sq_chunk;
+ uresp.max_hw_cq_size = uk_attrs->max_hw_cq_size;
+ uresp.min_hw_cq_size = uk_attrs->min_hw_cq_size;
+ uresp.hw_rev = uk_attrs->hw_rev;
+ uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR;
+ uresp.min_hw_wq_size = uk_attrs->min_hw_wq_size;
+ uresp.comp_mask |= IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE;
+ uresp.max_hw_srq_quanta = uk_attrs->max_hw_srq_quanta;
+ uresp.comp_mask |= IRDMA_ALLOC_UCTX_MAX_HW_SRQ_QUANTA;
+ if (ib_copy_to_udata(udata, &uresp,
+ min(sizeof(uresp), udata->outlen))) {
+ rdma_user_mmap_entry_remove(ucontext->db_mmap_entry);
+ return -EFAULT;
+ }
+ }
+
+ INIT_LIST_HEAD(&ucontext->cq_reg_mem_list);
+ spin_lock_init(&ucontext->cq_reg_mem_list_lock);
+ INIT_LIST_HEAD(&ucontext->qp_reg_mem_list);
+ spin_lock_init(&ucontext->qp_reg_mem_list_lock);
+ INIT_LIST_HEAD(&ucontext->srq_reg_mem_list);
+ spin_lock_init(&ucontext->srq_reg_mem_list_lock);
+
+ return 0;
+
+ver_error:
+ ibdev_err(&iwdev->ibdev,
+ "Invalid userspace driver version detected. Detected version %d, should be %d\n",
+ req.userspace_ver, IRDMA_ABI_VER);
+ return -EINVAL;
+}
+
+/**
+ * irdma_dealloc_ucontext - deallocate the user context data structure
+ * @context: user context created during alloc
+ */
+static void irdma_dealloc_ucontext(struct ib_ucontext *context)
+{
+ struct irdma_ucontext *ucontext = to_ucontext(context);
+
+ rdma_user_mmap_entry_remove(ucontext->db_mmap_entry);
+}
+
+/**
+ * irdma_alloc_pd - allocate protection domain
+ * @pd: PD pointer
+ * @udata: user data
+ */
+static int irdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata)
+{
+#define IRDMA_ALLOC_PD_MIN_RESP_LEN offsetofend(struct irdma_alloc_pd_resp, rsvd)
+ struct irdma_pd *iwpd = to_iwpd(pd);
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_alloc_pd_resp uresp = {};
+ struct irdma_sc_pd *sc_pd;
+ u32 pd_id = 0;
+ int err;
+
+ if (udata && udata->outlen < IRDMA_ALLOC_PD_MIN_RESP_LEN)
+ return -EINVAL;
+
+ err = irdma_alloc_rsrc(rf, rf->allocated_pds, rf->max_pd, &pd_id,
+ &rf->next_pd);
+ if (err)
+ return err;
+
+ sc_pd = &iwpd->sc_pd;
+ if (udata) {
+ struct irdma_ucontext *ucontext =
+ rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+ irdma_sc_pd_init(dev, sc_pd, pd_id, ucontext->abi_ver);
+ uresp.pd_id = pd_id;
+ if (ib_copy_to_udata(udata, &uresp,
+ min(sizeof(uresp), udata->outlen))) {
+ err = -EFAULT;
+ goto error;
+ }
+ } else {
+ irdma_sc_pd_init(dev, sc_pd, pd_id, IRDMA_ABI_VER);
+ }
+
+ return 0;
+error:
+ irdma_free_rsrc(rf, rf->allocated_pds, pd_id);
+
+ return err;
+}
+
+/**
+ * irdma_dealloc_pd - deallocate pd
+ * @ibpd: ptr of pd to be deallocated
+ * @udata: user data
+ */
+static int irdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct irdma_pd *iwpd = to_iwpd(ibpd);
+ struct irdma_device *iwdev = to_iwdev(ibpd->device);
+
+ irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_pds, iwpd->sc_pd.pd_id);
+
+ return 0;
+}
+
+/**
+ * irdma_get_pbl - Retrieve pbl from a list given a virtual
+ * address
+ * @va: user virtual address
+ * @pbl_list: pbl list to search in (QP's or CQ's)
+ */
+static struct irdma_pbl *irdma_get_pbl(unsigned long va,
+ struct list_head *pbl_list)
+{
+ struct irdma_pbl *iwpbl;
+
+ list_for_each_entry (iwpbl, pbl_list, list) {
+ if (iwpbl->user_base == va) {
+ list_del(&iwpbl->list);
+ iwpbl->on_list = false;
+ return iwpbl;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * irdma_clean_cqes - clean cq entries for qp
+ * @iwqp: qp ptr (user or kernel)
+ * @iwcq: cq ptr
+ */
+static void irdma_clean_cqes(struct irdma_qp *iwqp, struct irdma_cq *iwcq)
+{
+ struct irdma_cq_uk *ukcq = &iwcq->sc_cq.cq_uk;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwcq->lock, flags);
+ irdma_uk_clean_cq(&iwqp->sc_qp.qp_uk, ukcq);
+ spin_unlock_irqrestore(&iwcq->lock, flags);
+}
+
+static void irdma_remove_push_mmap_entries(struct irdma_qp *iwqp)
+{
+ if (iwqp->push_db_mmap_entry) {
+ rdma_user_mmap_entry_remove(iwqp->push_db_mmap_entry);
+ iwqp->push_db_mmap_entry = NULL;
+ }
+ if (iwqp->push_wqe_mmap_entry) {
+ rdma_user_mmap_entry_remove(iwqp->push_wqe_mmap_entry);
+ iwqp->push_wqe_mmap_entry = NULL;
+ }
+}
+
+static int irdma_setup_push_mmap_entries(struct irdma_ucontext *ucontext,
+ struct irdma_qp *iwqp,
+ u64 *push_wqe_mmap_key,
+ u64 *push_db_mmap_key)
+{
+ struct irdma_device *iwdev = ucontext->iwdev;
+ u64 rsvd, bar_off;
+
+ rsvd = IRDMA_PF_BAR_RSVD;
+ bar_off = (uintptr_t)iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET];
+ /* skip over db page */
+ bar_off += IRDMA_HW_PAGE_SIZE;
+ /* push wqe page */
+ bar_off += rsvd + iwqp->sc_qp.push_idx * IRDMA_HW_PAGE_SIZE;
+ iwqp->push_wqe_mmap_entry = irdma_user_mmap_entry_insert(ucontext,
+ bar_off, IRDMA_MMAP_IO_WC,
+ push_wqe_mmap_key);
+ if (!iwqp->push_wqe_mmap_entry)
+ return -ENOMEM;
+
+ /* push doorbell page */
+ bar_off += IRDMA_HW_PAGE_SIZE;
+ iwqp->push_db_mmap_entry = irdma_user_mmap_entry_insert(ucontext,
+ bar_off, IRDMA_MMAP_IO_NC,
+ push_db_mmap_key);
+ if (!iwqp->push_db_mmap_entry) {
+ rdma_user_mmap_entry_remove(iwqp->push_wqe_mmap_entry);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_destroy_qp - destroy qp
+ * @ibqp: qp's ib pointer also to get to device's qp address
+ * @udata: user data
+ */
+static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+
+ iwqp->sc_qp.qp_uk.destroy_pending = true;
+
+ if (iwqp->iwarp_state >= IRDMA_QP_STATE_IDLE)
+ irdma_modify_qp_to_err(&iwqp->sc_qp);
+
+ if (!iwqp->user_mode)
+ cancel_delayed_work_sync(&iwqp->dwork_flush);
+
+ if (!iwqp->user_mode) {
+ if (iwqp->iwscq) {
+ irdma_clean_cqes(iwqp, iwqp->iwscq);
+ if (iwqp->iwrcq != iwqp->iwscq)
+ irdma_clean_cqes(iwqp, iwqp->iwrcq);
+ }
+ }
+
+ irdma_qp_rem_ref(&iwqp->ibqp);
+ wait_for_completion(&iwqp->free_qp);
+ irdma_free_lsmm_rsrc(iwqp);
+ irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
+
+ irdma_remove_push_mmap_entries(iwqp);
+
+ if (iwqp->sc_qp.qp_uk.qp_id == 1)
+ iwdev->rf->hwqp1_rsvd = false;
+ irdma_free_qp_rsrc(iwqp);
+
+ return 0;
+}
+
+/**
+ * irdma_setup_virt_qp - setup for allocation of virtual qp
+ * @iwdev: irdma device
+ * @iwqp: qp ptr
+ * @init_info: initialize info to return
+ */
+static void irdma_setup_virt_qp(struct irdma_device *iwdev,
+ struct irdma_qp *iwqp,
+ struct irdma_qp_init_info *init_info)
+{
+ struct irdma_pbl *iwpbl = iwqp->iwpbl;
+ struct irdma_qp_mr *qpmr = &iwpbl->qp_mr;
+
+ iwqp->page = qpmr->sq_page;
+ init_info->shadow_area_pa = qpmr->shadow;
+ if (iwpbl->pbl_allocated) {
+ init_info->virtual_map = true;
+ init_info->sq_pa = qpmr->sq_pbl.idx;
+ /* Need to use contiguous buffer for RQ of QP
+ * in case it is associated with SRQ.
+ */
+ init_info->rq_pa = init_info->qp_uk_init_info.srq_uk ?
+ qpmr->rq_pa : qpmr->rq_pbl.idx;
+ } else {
+ init_info->sq_pa = qpmr->sq_pbl.addr;
+ init_info->rq_pa = qpmr->rq_pbl.addr;
+ }
+}
+
+/**
+ * irdma_setup_umode_qp - setup sq and rq size in user mode qp
+ * @udata: udata
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @info: initialize info to return
+ * @init_attr: Initial QP create attributes
+ */
+static int irdma_setup_umode_qp(struct ib_udata *udata,
+ struct irdma_device *iwdev,
+ struct irdma_qp *iwqp,
+ struct irdma_qp_init_info *info,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct irdma_ucontext *ucontext = rdma_udata_to_drv_context(udata,
+ struct irdma_ucontext, ibucontext);
+ struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
+ struct irdma_create_qp_req req;
+ unsigned long flags;
+ int ret;
+
+ ret = ib_copy_from_udata(&req, udata,
+ min(sizeof(req), udata->inlen));
+ if (ret) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: ib_copy_from_data fail\n");
+ return ret;
+ }
+
+ iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
+ iwqp->user_mode = 1;
+ if (req.user_wqe_bufs) {
+ info->qp_uk_init_info.legacy_mode = ucontext->legacy_mode;
+ spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+ iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs,
+ &ucontext->qp_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+
+ if (!iwqp->iwpbl) {
+ ret = -ENODATA;
+ ibdev_dbg(&iwdev->ibdev, "VERBS: no pbl info\n");
+ return ret;
+ }
+ }
+
+ if (!ucontext->use_raw_attrs) {
+ /**
+ * Maintain backward compat with older ABI which passes sq and
+ * rq depth in quanta in cap.max_send_wr and cap.max_recv_wr.
+ * There is no way to compute the correct value of
+ * iwqp->max_send_wr/max_recv_wr in the kernel.
+ */
+ iwqp->max_send_wr = init_attr->cap.max_send_wr;
+ iwqp->max_recv_wr = init_attr->cap.max_recv_wr;
+ ukinfo->sq_size = init_attr->cap.max_send_wr;
+ ukinfo->rq_size = init_attr->cap.max_recv_wr;
+ irdma_uk_calc_shift_wq(ukinfo, &ukinfo->sq_shift,
+ &ukinfo->rq_shift);
+ } else {
+ ret = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth,
+ &ukinfo->sq_shift);
+ if (ret)
+ return ret;
+
+ ret = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth,
+ &ukinfo->rq_shift);
+ if (ret)
+ return ret;
+
+ iwqp->max_send_wr =
+ (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift;
+ iwqp->max_recv_wr =
+ (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift;
+ ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift;
+ ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift;
+ }
+
+ irdma_setup_virt_qp(iwdev, iwqp, info);
+
+ return 0;
+}
+
+/**
+ * irdma_setup_kmode_qp - setup initialization for kernel mode qp
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @info: initialize info to return
+ * @init_attr: Initial QP create attributes
+ */
+static int irdma_setup_kmode_qp(struct irdma_device *iwdev,
+ struct irdma_qp *iwqp,
+ struct irdma_qp_init_info *info,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct irdma_dma_mem *mem = &iwqp->kqp.dma_mem;
+ u32 size;
+ int status;
+ struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
+
+ status = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth,
+ &ukinfo->sq_shift);
+ if (status)
+ return status;
+
+ status = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth,
+ &ukinfo->rq_shift);
+ if (status)
+ return status;
+
+ iwqp->kqp.sq_wrid_mem =
+ kcalloc(ukinfo->sq_depth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL);
+ if (!iwqp->kqp.sq_wrid_mem)
+ return -ENOMEM;
+
+ iwqp->kqp.rq_wrid_mem =
+ kcalloc(ukinfo->rq_depth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL);
+
+ if (!iwqp->kqp.rq_wrid_mem) {
+ kfree(iwqp->kqp.sq_wrid_mem);
+ iwqp->kqp.sq_wrid_mem = NULL;
+ return -ENOMEM;
+ }
+
+ ukinfo->sq_wrtrk_array = iwqp->kqp.sq_wrid_mem;
+ ukinfo->rq_wrid_array = iwqp->kqp.rq_wrid_mem;
+
+ size = (ukinfo->sq_depth + ukinfo->rq_depth) * IRDMA_QP_WQE_MIN_SIZE;
+ size += (IRDMA_SHADOW_AREA_SIZE << 3);
+
+ mem->size = ALIGN(size, 256);
+ mem->va = dma_alloc_coherent(iwdev->rf->hw.device, mem->size,
+ &mem->pa, GFP_KERNEL);
+ if (!mem->va) {
+ kfree(iwqp->kqp.sq_wrid_mem);
+ iwqp->kqp.sq_wrid_mem = NULL;
+ kfree(iwqp->kqp.rq_wrid_mem);
+ iwqp->kqp.rq_wrid_mem = NULL;
+ return -ENOMEM;
+ }
+
+ ukinfo->sq = mem->va;
+ info->sq_pa = mem->pa;
+ ukinfo->rq = &ukinfo->sq[ukinfo->sq_depth];
+ info->rq_pa = info->sq_pa + (ukinfo->sq_depth * IRDMA_QP_WQE_MIN_SIZE);
+ ukinfo->shadow_area = ukinfo->rq[ukinfo->rq_depth].elem;
+ info->shadow_area_pa =
+ info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE);
+ ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift;
+ ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift;
+ ukinfo->qp_id = info->qp_uk_init_info.qp_id;
+
+ iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift;
+ iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift;
+ init_attr->cap.max_send_wr = iwqp->max_send_wr;
+ init_attr->cap.max_recv_wr = iwqp->max_recv_wr;
+
+ return 0;
+}
+
+static int irdma_cqp_create_qp_cmd(struct irdma_qp *iwqp)
+{
+ struct irdma_pci_f *rf = iwqp->iwdev->rf;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_create_qp_info *qp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ qp_info = &cqp_request->info.in.u.qp_create.info;
+ qp_info->mac_valid = true;
+ qp_info->cq_num_valid = true;
+ qp_info->next_iwarp_state = IRDMA_QP_STATE_IDLE;
+
+ cqp_info->cqp_cmd = IRDMA_OP_QP_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_create.qp = &iwqp->sc_qp;
+ cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+
+ return status;
+}
+
+static void irdma_roce_fill_and_set_qpctx_info(struct irdma_qp *iwqp,
+ struct irdma_qp_host_ctx_info *ctx_info)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ struct irdma_roce_offload_info *roce_info;
+ struct irdma_udp_offload_info *udp_info;
+
+ udp_info = &iwqp->udp_info;
+ udp_info->snd_mss = ib_mtu_enum_to_int(ib_mtu_int_to_enum(iwdev->vsi.mtu));
+ udp_info->cwnd = iwdev->roce_cwnd;
+ udp_info->rexmit_thresh = 2;
+ udp_info->rnr_nak_thresh = 2;
+ udp_info->src_port = 0xc000;
+ udp_info->dst_port = ROCE_V2_UDP_DPORT;
+ roce_info = &iwqp->roce_info;
+ ether_addr_copy(roce_info->mac_addr, iwdev->netdev->dev_addr);
+
+ if (iwqp->ibqp.qp_type == IB_QPT_GSI && iwqp->ibqp.qp_num != 1)
+ roce_info->is_qp1 = true;
+ roce_info->rd_en = true;
+ roce_info->wr_rdresp_en = true;
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ roce_info->bind_en = true;
+ roce_info->dcqcn_en = false;
+ roce_info->rtomin = 5;
+
+ roce_info->ack_credits = iwdev->roce_ackcreds;
+ roce_info->ird_size = dev->hw_attrs.max_hw_ird;
+ roce_info->ord_size = dev->hw_attrs.max_hw_ord;
+
+ if (!iwqp->user_mode) {
+ roce_info->priv_mode_en = true;
+ roce_info->fast_reg_en = true;
+ roce_info->udprivcq_en = true;
+ }
+ roce_info->roce_tver = 0;
+
+ ctx_info->roce_info = &iwqp->roce_info;
+ ctx_info->udp_info = &iwqp->udp_info;
+ irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info);
+}
+
+static void irdma_iw_fill_and_set_qpctx_info(struct irdma_qp *iwqp,
+ struct irdma_qp_host_ctx_info *ctx_info)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ struct irdma_iwarp_offload_info *iwarp_info;
+
+ iwarp_info = &iwqp->iwarp_info;
+ ether_addr_copy(iwarp_info->mac_addr, iwdev->netdev->dev_addr);
+ iwarp_info->rd_en = true;
+ iwarp_info->wr_rdresp_en = true;
+ iwarp_info->ecn_en = true;
+ iwarp_info->rtomin = 5;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ iwarp_info->ib_rd_en = true;
+ if (!iwqp->user_mode) {
+ iwarp_info->priv_mode_en = true;
+ iwarp_info->fast_reg_en = true;
+ }
+ iwarp_info->ddp_ver = 1;
+ iwarp_info->rdmap_ver = 1;
+
+ ctx_info->iwarp_info = &iwqp->iwarp_info;
+ ctx_info->iwarp_info_valid = true;
+ irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info);
+ ctx_info->iwarp_info_valid = false;
+}
+
+static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr,
+ struct irdma_device *iwdev)
+{
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs;
+
+ if (init_attr->create_flags)
+ return -EOPNOTSUPP;
+
+ if (init_attr->cap.max_inline_data > uk_attrs->max_hw_inline ||
+ init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags ||
+ init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags ||
+ init_attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta ||
+ init_attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta)
+ return -EINVAL;
+
+ if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
+ if (init_attr->qp_type != IB_QPT_RC &&
+ init_attr->qp_type != IB_QPT_UD &&
+ init_attr->qp_type != IB_QPT_GSI)
+ return -EOPNOTSUPP;
+ } else {
+ if (init_attr->qp_type != IB_QPT_RC)
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void irdma_flush_worker(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, dwork_flush);
+
+ irdma_generate_flush_completions(iwqp);
+}
+
+static int irdma_setup_gsi_qp_rsrc(struct irdma_qp *iwqp, u32 *qp_num)
+{
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ unsigned long flags;
+ int ret;
+
+ if (rf->rdma_ver <= IRDMA_GEN_2) {
+ *qp_num = 1;
+ return 0;
+ }
+
+ spin_lock_irqsave(&rf->rsrc_lock, flags);
+ if (!rf->hwqp1_rsvd) {
+ *qp_num = 1;
+ rf->hwqp1_rsvd = true;
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+ } else {
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+ ret = irdma_alloc_rsrc(rf, rf->allocated_qps, rf->max_qp,
+ qp_num, &rf->next_qp);
+ if (ret)
+ return ret;
+ }
+
+ ret = irdma_vchnl_req_add_vport(&rf->sc_dev, iwdev->vport_id, *qp_num,
+ (&iwdev->vsi)->qos);
+ if (ret) {
+ if (*qp_num != 1) {
+ irdma_free_rsrc(rf, rf->allocated_qps, *qp_num);
+ } else {
+ spin_lock_irqsave(&rf->rsrc_lock, flags);
+ rf->hwqp1_rsvd = false;
+ spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+ }
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_create_qp - create qp
+ * @ibqp: ptr of qp
+ * @init_attr: attributes for qp
+ * @udata: user data for create qp
+ */
+static int irdma_create_qp(struct ib_qp *ibqp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+#define IRDMA_CREATE_QP_MIN_REQ_LEN offsetofend(struct irdma_create_qp_req, user_compl_ctx)
+#define IRDMA_CREATE_QP_MIN_RESP_LEN offsetofend(struct irdma_create_qp_resp, rsvd)
+ struct ib_pd *ibpd = ibqp->pd;
+ struct irdma_pd *iwpd = to_iwpd(ibpd);
+ struct irdma_device *iwdev = to_iwdev(ibpd->device);
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_create_qp_resp uresp = {};
+ u32 qp_num = 0;
+ int err_code;
+ struct irdma_sc_qp *qp;
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs;
+ struct irdma_qp_init_info init_info = {};
+ struct irdma_qp_host_ctx_info *ctx_info;
+ struct irdma_srq *iwsrq;
+ bool srq_valid = false;
+ u32 srq_id = 0;
+
+ if (init_attr->srq) {
+ iwsrq = to_iwsrq(init_attr->srq);
+ srq_valid = true;
+ srq_id = iwsrq->srq_num;
+ init_attr->cap.max_recv_sge = uk_attrs->max_hw_wq_frags;
+ init_attr->cap.max_recv_wr = 4;
+ init_info.qp_uk_init_info.srq_uk = &iwsrq->sc_srq.srq_uk;
+ }
+
+ err_code = irdma_validate_qp_attrs(init_attr, iwdev);
+ if (err_code)
+ return err_code;
+
+ if (udata && (udata->inlen < IRDMA_CREATE_QP_MIN_REQ_LEN ||
+ udata->outlen < IRDMA_CREATE_QP_MIN_RESP_LEN))
+ return -EINVAL;
+
+ init_info.vsi = &iwdev->vsi;
+ init_info.qp_uk_init_info.uk_attrs = uk_attrs;
+ init_info.qp_uk_init_info.sq_size = init_attr->cap.max_send_wr;
+ init_info.qp_uk_init_info.rq_size = init_attr->cap.max_recv_wr;
+ init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
+ init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
+ init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data;
+
+ qp = &iwqp->sc_qp;
+ qp->qp_uk.back_qp = iwqp;
+ qp->push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX;
+
+ iwqp->iwdev = iwdev;
+ iwqp->q2_ctx_mem.size = ALIGN(IRDMA_Q2_BUF_SIZE + IRDMA_QP_CTX_SIZE,
+ 256);
+ iwqp->q2_ctx_mem.va = dma_alloc_coherent(dev->hw->device,
+ iwqp->q2_ctx_mem.size,
+ &iwqp->q2_ctx_mem.pa,
+ GFP_KERNEL);
+ if (!iwqp->q2_ctx_mem.va)
+ return -ENOMEM;
+
+ init_info.q2 = iwqp->q2_ctx_mem.va;
+ init_info.q2_pa = iwqp->q2_ctx_mem.pa;
+ init_info.host_ctx = (__le64 *)(init_info.q2 + IRDMA_Q2_BUF_SIZE);
+ init_info.host_ctx_pa = init_info.q2_pa + IRDMA_Q2_BUF_SIZE;
+
+ if (init_attr->qp_type == IB_QPT_GSI) {
+ err_code = irdma_setup_gsi_qp_rsrc(iwqp, &qp_num);
+ if (err_code)
+ goto error;
+ iwqp->ibqp.qp_num = 1;
+ } else {
+ err_code = irdma_alloc_rsrc(rf, rf->allocated_qps, rf->max_qp,
+ &qp_num, &rf->next_qp);
+ if (err_code)
+ goto error;
+ iwqp->ibqp.qp_num = qp_num;
+ }
+
+ iwqp->iwpd = iwpd;
+ qp = &iwqp->sc_qp;
+ iwqp->iwscq = to_iwcq(init_attr->send_cq);
+ iwqp->iwrcq = to_iwcq(init_attr->recv_cq);
+ iwqp->host_ctx.va = init_info.host_ctx;
+ iwqp->host_ctx.pa = init_info.host_ctx_pa;
+ iwqp->host_ctx.size = IRDMA_QP_CTX_SIZE;
+
+ init_info.pd = &iwpd->sc_pd;
+ init_info.qp_uk_init_info.qp_id = qp_num;
+ if (!rdma_protocol_roce(&iwdev->ibdev, 1))
+ init_info.qp_uk_init_info.first_sq_wq = 1;
+ iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
+ init_waitqueue_head(&iwqp->waitq);
+ init_waitqueue_head(&iwqp->mod_qp_waitq);
+
+ if (udata) {
+ init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver;
+ err_code = irdma_setup_umode_qp(udata, iwdev, iwqp, &init_info,
+ init_attr);
+ } else {
+ INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker);
+ init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER;
+ err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr);
+ }
+
+ if (err_code) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: setup qp failed\n");
+ goto error;
+ }
+
+ if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
+ if (init_attr->qp_type == IB_QPT_RC) {
+ init_info.qp_uk_init_info.type = IRDMA_QP_TYPE_ROCE_RC;
+ init_info.qp_uk_init_info.qp_caps = IRDMA_SEND_WITH_IMM |
+ IRDMA_WRITE_WITH_IMM |
+ IRDMA_ROCE;
+ } else {
+ init_info.qp_uk_init_info.type = IRDMA_QP_TYPE_ROCE_UD;
+ init_info.qp_uk_init_info.qp_caps = IRDMA_SEND_WITH_IMM |
+ IRDMA_ROCE;
+ }
+ } else {
+ init_info.qp_uk_init_info.type = IRDMA_QP_TYPE_IWARP;
+ init_info.qp_uk_init_info.qp_caps = IRDMA_WRITE_WITH_IMM;
+ }
+
+ if (dev->hw_attrs.uk_attrs.hw_rev > IRDMA_GEN_1)
+ init_info.qp_uk_init_info.qp_caps |= IRDMA_PUSH_MODE;
+
+ err_code = irdma_sc_qp_init(qp, &init_info);
+ if (err_code) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: qp_init fail\n");
+ goto error;
+ }
+
+ ctx_info = &iwqp->ctx_info;
+ ctx_info->srq_valid = srq_valid;
+ ctx_info->srq_id = srq_id;
+ ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+ ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+
+ if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
+ if (dev->ws_add(&iwdev->vsi, 0)) {
+ irdma_cqp_qp_destroy_cmd(&rf->sc_dev, &iwqp->sc_qp);
+ err_code = -EINVAL;
+ goto error;
+ }
+ irdma_qp_add_qos(&iwqp->sc_qp);
+ irdma_roce_fill_and_set_qpctx_info(iwqp, ctx_info);
+ } else {
+ irdma_iw_fill_and_set_qpctx_info(iwqp, ctx_info);
+ }
+
+ err_code = irdma_cqp_create_qp_cmd(iwqp);
+ if (err_code)
+ goto error;
+
+ refcount_set(&iwqp->refcnt, 1);
+ spin_lock_init(&iwqp->lock);
+ spin_lock_init(&iwqp->sc_qp.pfpdu.lock);
+ iwqp->sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
+ rf->qp_table[qp_num] = iwqp;
+
+ if (udata) {
+ /* GEN_1 legacy support with libi40iw does not have expanded uresp struct */
+ if (udata->outlen < sizeof(uresp)) {
+ uresp.lsmm = 1;
+ uresp.push_idx = IRDMA_INVALID_PUSH_PAGE_INDEX_GEN_1;
+ } else {
+ if (rdma_protocol_iwarp(&iwdev->ibdev, 1))
+ uresp.lsmm = 1;
+ }
+ uresp.actual_sq_size = init_info.qp_uk_init_info.sq_size;
+ uresp.actual_rq_size = init_info.qp_uk_init_info.rq_size;
+ uresp.qp_id = qp_num;
+ uresp.qp_caps = qp->qp_uk.qp_caps;
+
+ err_code = ib_copy_to_udata(udata, &uresp,
+ min(sizeof(uresp), udata->outlen));
+ if (err_code) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: copy_to_udata failed\n");
+ irdma_destroy_qp(&iwqp->ibqp, udata);
+ return err_code;
+ }
+ }
+
+ init_completion(&iwqp->free_qp);
+ return 0;
+
+error:
+ irdma_free_qp_rsrc(iwqp);
+ return err_code;
+}
+
+static int irdma_get_ib_acc_flags(struct irdma_qp *iwqp)
+{
+ int acc_flags = 0;
+
+ if (rdma_protocol_roce(iwqp->ibqp.device, 1)) {
+ if (iwqp->roce_info.wr_rdresp_en) {
+ acc_flags |= IB_ACCESS_LOCAL_WRITE;
+ acc_flags |= IB_ACCESS_REMOTE_WRITE;
+ }
+ if (iwqp->roce_info.rd_en)
+ acc_flags |= IB_ACCESS_REMOTE_READ;
+ if (iwqp->roce_info.bind_en)
+ acc_flags |= IB_ACCESS_MW_BIND;
+ if (iwqp->ctx_info.remote_atomics_en)
+ acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
+ } else {
+ if (iwqp->iwarp_info.wr_rdresp_en) {
+ acc_flags |= IB_ACCESS_LOCAL_WRITE;
+ acc_flags |= IB_ACCESS_REMOTE_WRITE;
+ }
+ if (iwqp->iwarp_info.rd_en)
+ acc_flags |= IB_ACCESS_REMOTE_READ;
+ if (iwqp->ctx_info.remote_atomics_en)
+ acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
+ }
+ return acc_flags;
+}
+
+/**
+ * irdma_query_qp - query qp attributes
+ * @ibqp: qp pointer
+ * @attr: attributes pointer
+ * @attr_mask: Not used
+ * @init_attr: qp attributes to return
+ */
+static int irdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_sc_qp *qp = &iwqp->sc_qp;
+
+ memset(attr, 0, sizeof(*attr));
+ memset(init_attr, 0, sizeof(*init_attr));
+
+ attr->qp_state = iwqp->ibqp_state;
+ attr->cur_qp_state = iwqp->ibqp_state;
+ attr->cap.max_send_wr = iwqp->max_send_wr;
+ attr->cap.max_recv_wr = iwqp->max_recv_wr;
+ attr->cap.max_inline_data = qp->qp_uk.max_inline_data;
+ attr->cap.max_send_sge = qp->qp_uk.max_sq_frag_cnt;
+ attr->cap.max_recv_sge = qp->qp_uk.max_rq_frag_cnt;
+ attr->qp_access_flags = irdma_get_ib_acc_flags(iwqp);
+ attr->port_num = 1;
+ if (rdma_protocol_roce(ibqp->device, 1)) {
+ attr->path_mtu = ib_mtu_int_to_enum(iwqp->udp_info.snd_mss);
+ attr->qkey = iwqp->roce_info.qkey;
+ attr->rq_psn = iwqp->udp_info.epsn;
+ attr->sq_psn = iwqp->udp_info.psn_nxt;
+ attr->dest_qp_num = iwqp->roce_info.dest_qp;
+ attr->pkey_index = iwqp->roce_info.p_key;
+ attr->retry_cnt = iwqp->udp_info.rexmit_thresh;
+ attr->rnr_retry = iwqp->udp_info.rnr_nak_thresh;
+ attr->min_rnr_timer = iwqp->udp_info.min_rnr_timer;
+ attr->max_rd_atomic = iwqp->roce_info.ord_size;
+ attr->max_dest_rd_atomic = iwqp->roce_info.ird_size;
+ }
+
+ init_attr->event_handler = iwqp->ibqp.event_handler;
+ init_attr->qp_context = iwqp->ibqp.qp_context;
+ init_attr->send_cq = iwqp->ibqp.send_cq;
+ init_attr->recv_cq = iwqp->ibqp.recv_cq;
+ init_attr->srq = iwqp->ibqp.srq;
+ init_attr->cap = attr->cap;
+
+ return 0;
+}
+
+/**
+ * irdma_query_pkey - Query partition key
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @index: index of pkey
+ * @pkey: pointer to store the pkey
+ */
+static int irdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
+ u16 *pkey)
+{
+ if (index >= IRDMA_PKEY_TBL_SZ)
+ return -EINVAL;
+
+ *pkey = IRDMA_DEFAULT_PKEY;
+ return 0;
+}
+
+static u8 irdma_roce_get_vlan_prio(const struct ib_gid_attr *attr, u8 prio)
+{
+ struct net_device *ndev;
+
+ rcu_read_lock();
+ ndev = rcu_dereference(attr->ndev);
+ if (!ndev)
+ goto exit;
+ if (is_vlan_dev(ndev)) {
+ u16 vlan_qos = vlan_dev_get_egress_qos_mask(ndev, prio);
+
+ prio = (vlan_qos & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ }
+exit:
+ rcu_read_unlock();
+ return prio;
+}
+
+static int irdma_wait_for_suspend(struct irdma_qp *iwqp)
+{
+ if (!wait_event_timeout(iwqp->iwdev->suspend_wq,
+ !iwqp->suspend_pending,
+ msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS))) {
+ iwqp->suspend_pending = false;
+ ibdev_warn(&iwqp->iwdev->ibdev,
+ "modify_qp timed out waiting for suspend. qp_id = %d, last_ae = 0x%x\n",
+ iwqp->ibqp.qp_num, iwqp->last_aeq);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_modify_qp_roce - modify qp request
+ * @ibqp: qp's pointer for modify
+ * @attr: access attributes
+ * @attr_mask: state mask
+ * @udata: user data
+ */
+int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+#define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush)
+#define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid)
+ struct irdma_pd *iwpd = to_iwpd(ibqp->pd);
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ struct irdma_qp_host_ctx_info *ctx_info;
+ struct irdma_roce_offload_info *roce_info;
+ struct irdma_udp_offload_info *udp_info;
+ struct irdma_modify_qp_info info = {};
+ struct irdma_modify_qp_resp uresp = {};
+ struct irdma_modify_qp_req ureq = {};
+ unsigned long flags;
+ u8 issue_modify_qp = 0;
+ int ret = 0;
+
+ ctx_info = &iwqp->ctx_info;
+ roce_info = &iwqp->roce_info;
+ udp_info = &iwqp->udp_info;
+
+ if (udata) {
+ /* udata inlen/outlen can be 0 when supporting legacy libi40iw */
+ if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) ||
+ (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN))
+ return -EINVAL;
+ }
+
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
+ if (attr_mask & IB_QP_DEST_QPN)
+ roce_info->dest_qp = attr->dest_qp_num;
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ ret = irdma_query_pkey(ibqp->device, 0, attr->pkey_index,
+ &roce_info->p_key);
+ if (ret)
+ return ret;
+ }
+
+ if (attr_mask & IB_QP_QKEY)
+ roce_info->qkey = attr->qkey;
+
+ if (attr_mask & IB_QP_PATH_MTU)
+ udp_info->snd_mss = ib_mtu_enum_to_int(attr->path_mtu);
+
+ if (attr_mask & IB_QP_SQ_PSN) {
+ udp_info->psn_nxt = attr->sq_psn;
+ udp_info->lsn = 0xffff;
+ udp_info->psn_una = attr->sq_psn;
+ udp_info->psn_max = attr->sq_psn;
+ }
+
+ if (attr_mask & IB_QP_RQ_PSN)
+ udp_info->epsn = attr->rq_psn;
+
+ if (attr_mask & IB_QP_RNR_RETRY)
+ udp_info->rnr_nak_thresh = attr->rnr_retry;
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER &&
+ dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+ udp_info->min_rnr_timer = attr->min_rnr_timer;
+
+ if (attr_mask & IB_QP_RETRY_CNT)
+ udp_info->rexmit_thresh = attr->retry_cnt;
+
+ ctx_info->roce_info->pd_id = iwpd->sc_pd.pd_id;
+
+ if (attr_mask & IB_QP_AV) {
+ struct irdma_av *av = &iwqp->roce_ah.av;
+ const struct ib_gid_attr *sgid_attr =
+ attr->ah_attr.grh.sgid_attr;
+ u16 vlan_id = VLAN_N_VID;
+ u32 local_ip[4];
+
+ memset(&iwqp->roce_ah, 0, sizeof(iwqp->roce_ah));
+ if (attr->ah_attr.ah_flags & IB_AH_GRH) {
+ udp_info->ttl = attr->ah_attr.grh.hop_limit;
+ udp_info->flow_label = attr->ah_attr.grh.flow_label;
+ udp_info->tos = attr->ah_attr.grh.traffic_class;
+ udp_info->src_port =
+ rdma_get_udp_sport(udp_info->flow_label,
+ ibqp->qp_num,
+ roce_info->dest_qp);
+ irdma_qp_rem_qos(&iwqp->sc_qp);
+ dev->ws_remove(iwqp->sc_qp.vsi, ctx_info->user_pri);
+ if (iwqp->sc_qp.vsi->dscp_mode)
+ ctx_info->user_pri =
+ iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(udp_info->tos)];
+ else
+ ctx_info->user_pri = rt_tos2priority(udp_info->tos);
+ }
+ ret = rdma_read_gid_l2_fields(sgid_attr, &vlan_id,
+ ctx_info->roce_info->mac_addr);
+ if (ret)
+ return ret;
+ ctx_info->user_pri = irdma_roce_get_vlan_prio(sgid_attr,
+ ctx_info->user_pri);
+ if (dev->ws_add(iwqp->sc_qp.vsi, ctx_info->user_pri))
+ return -ENOMEM;
+ iwqp->sc_qp.user_pri = ctx_info->user_pri;
+ irdma_qp_add_qos(&iwqp->sc_qp);
+
+ if (vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode)
+ vlan_id = 0;
+ if (vlan_id < VLAN_N_VID) {
+ udp_info->insert_vlan_tag = true;
+ udp_info->vlan_tag = vlan_id |
+ ctx_info->user_pri << VLAN_PRIO_SHIFT;
+ } else {
+ udp_info->insert_vlan_tag = false;
+ }
+
+ av->attrs = attr->ah_attr;
+ rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid);
+ rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid);
+ av->net_type = rdma_gid_attr_network_type(sgid_attr);
+ if (av->net_type == RDMA_NETWORK_IPV6) {
+ __be32 *daddr =
+ av->dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32;
+ __be32 *saddr =
+ av->sgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32;
+
+ irdma_copy_ip_ntohl(&udp_info->dest_ip_addr[0], daddr);
+ irdma_copy_ip_ntohl(&udp_info->local_ipaddr[0], saddr);
+
+ udp_info->ipv4 = false;
+ irdma_copy_ip_ntohl(local_ip, daddr);
+
+ } else if (av->net_type == RDMA_NETWORK_IPV4) {
+ __be32 saddr = av->sgid_addr.saddr_in.sin_addr.s_addr;
+ __be32 daddr = av->dgid_addr.saddr_in.sin_addr.s_addr;
+
+ local_ip[0] = ntohl(daddr);
+
+ udp_info->ipv4 = true;
+ udp_info->dest_ip_addr[0] = 0;
+ udp_info->dest_ip_addr[1] = 0;
+ udp_info->dest_ip_addr[2] = 0;
+ udp_info->dest_ip_addr[3] = local_ip[0];
+
+ udp_info->local_ipaddr[0] = 0;
+ udp_info->local_ipaddr[1] = 0;
+ udp_info->local_ipaddr[2] = 0;
+ udp_info->local_ipaddr[3] = ntohl(saddr);
+ }
+ udp_info->arp_idx =
+ irdma_add_arp(iwdev->rf, local_ip, udp_info->ipv4,
+ attr->ah_attr.roce.dmac);
+ }
+
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ if (attr->max_rd_atomic > dev->hw_attrs.max_hw_ord) {
+ ibdev_err(&iwdev->ibdev,
+ "rd_atomic = %d, above max_hw_ord=%d\n",
+ attr->max_rd_atomic,
+ dev->hw_attrs.max_hw_ord);
+ return -EINVAL;
+ }
+ if (attr->max_rd_atomic)
+ roce_info->ord_size = attr->max_rd_atomic;
+ info.ord_valid = true;
+ }
+
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ if (attr->max_dest_rd_atomic > dev->hw_attrs.max_hw_ird) {
+ ibdev_err(&iwdev->ibdev,
+ "rd_atomic = %d, above max_hw_ird=%d\n",
+ attr->max_dest_rd_atomic,
+ dev->hw_attrs.max_hw_ird);
+ return -EINVAL;
+ }
+ if (attr->max_dest_rd_atomic)
+ roce_info->ird_size = attr->max_dest_rd_atomic;
+ }
+
+ if (attr_mask & IB_QP_ACCESS_FLAGS) {
+ if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE)
+ roce_info->wr_rdresp_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
+ roce_info->wr_rdresp_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
+ roce_info->rd_en = true;
+ if (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS)
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)
+ ctx_info->remote_atomics_en = true;
+ }
+
+ wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend));
+
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: caller: %pS qp_id=%d to_ibqpstate=%d ibqpstate=%d irdma_qpstate=%d attr_mask=0x%x\n",
+ __builtin_return_address(0), ibqp->qp_num, attr->qp_state,
+ iwqp->ibqp_state, iwqp->iwarp_state, attr_mask);
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (attr_mask & IB_QP_STATE) {
+ if (!ib_modify_qp_is_ok(iwqp->ibqp_state, attr->qp_state,
+ iwqp->ibqp.qp_type, attr_mask)) {
+ ibdev_warn(&iwdev->ibdev, "modify_qp invalid for qp_id=%d, old_state=0x%x, new_state=0x%x\n",
+ iwqp->ibqp.qp_num, iwqp->ibqp_state,
+ attr->qp_state);
+ ret = -EINVAL;
+ goto exit;
+ }
+ info.curr_iwarp_state = iwqp->iwarp_state;
+
+ switch (attr->qp_state) {
+ case IB_QPS_INIT:
+ if (iwqp->iwarp_state > IRDMA_QP_STATE_IDLE) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (iwqp->iwarp_state == IRDMA_QP_STATE_INVALID) {
+ info.next_iwarp_state = IRDMA_QP_STATE_IDLE;
+ issue_modify_qp = 1;
+ }
+ break;
+ case IB_QPS_RTR:
+ if (iwqp->iwarp_state > IRDMA_QP_STATE_IDLE) {
+ ret = -EINVAL;
+ goto exit;
+ }
+ info.arp_cache_idx_valid = true;
+ info.cq_num_valid = true;
+ info.next_iwarp_state = IRDMA_QP_STATE_RTR;
+ issue_modify_qp = 1;
+ break;
+ case IB_QPS_RTS:
+ if (iwqp->ibqp_state < IB_QPS_RTR ||
+ iwqp->ibqp_state == IB_QPS_ERR) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ info.arp_cache_idx_valid = true;
+ info.cq_num_valid = true;
+ info.ord_valid = true;
+ info.next_iwarp_state = IRDMA_QP_STATE_RTS;
+ issue_modify_qp = 1;
+ if (iwdev->push_mode && udata &&
+ iwqp->sc_qp.push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX &&
+ dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ irdma_alloc_push_page(iwqp);
+ spin_lock_irqsave(&iwqp->lock, flags);
+ }
+ break;
+ case IB_QPS_SQD:
+ if (iwqp->iwarp_state == IRDMA_QP_STATE_SQD)
+ goto exit;
+
+ if (iwqp->iwarp_state != IRDMA_QP_STATE_RTS) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ info.next_iwarp_state = IRDMA_QP_STATE_SQD;
+ issue_modify_qp = 1;
+ iwqp->suspend_pending = true;
+ break;
+ case IB_QPS_SQE:
+ case IB_QPS_ERR:
+ case IB_QPS_RESET:
+ if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (udata && udata->inlen) {
+ if (ib_copy_from_udata(&ureq, udata,
+ min(sizeof(ureq), udata->inlen)))
+ return -EINVAL;
+
+ irdma_flush_wqes(iwqp,
+ (ureq.sq_flush ? IRDMA_FLUSH_SQ : 0) |
+ (ureq.rq_flush ? IRDMA_FLUSH_RQ : 0) |
+ IRDMA_REFLUSH);
+ }
+ return 0;
+ }
+
+ info.next_iwarp_state = IRDMA_QP_STATE_ERROR;
+ issue_modify_qp = 1;
+ break;
+ default:
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ iwqp->ibqp_state = attr->qp_state;
+ }
+
+ ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+ ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+ irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info);
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ if (attr_mask & IB_QP_STATE) {
+ if (issue_modify_qp) {
+ ctx_info->rem_endpoint_idx = udp_info->arp_idx;
+ if (irdma_hw_modify_qp(iwdev, iwqp, &info, true))
+ return -EINVAL;
+ if (info.next_iwarp_state == IRDMA_QP_STATE_SQD) {
+ ret = irdma_wait_for_suspend(iwqp);
+ if (ret)
+ return ret;
+ }
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->iwarp_state == info.curr_iwarp_state) {
+ iwqp->iwarp_state = info.next_iwarp_state;
+ iwqp->ibqp_state = attr->qp_state;
+ }
+ if (iwqp->ibqp_state > IB_QPS_RTS &&
+ !iwqp->flush_issued) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ |
+ IRDMA_FLUSH_RQ |
+ IRDMA_FLUSH_WAIT);
+ iwqp->flush_issued = 1;
+ } else {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+ } else {
+ iwqp->ibqp_state = attr->qp_state;
+ }
+ if (udata && udata->outlen && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ struct irdma_ucontext *ucontext;
+
+ ucontext = rdma_udata_to_drv_context(udata,
+ struct irdma_ucontext, ibucontext);
+ if (iwqp->sc_qp.push_idx != IRDMA_INVALID_PUSH_PAGE_INDEX &&
+ !iwqp->push_wqe_mmap_entry &&
+ !irdma_setup_push_mmap_entries(ucontext, iwqp,
+ &uresp.push_wqe_mmap_key, &uresp.push_db_mmap_key)) {
+ uresp.push_valid = 1;
+ uresp.push_offset = iwqp->sc_qp.push_offset;
+ }
+ ret = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp),
+ udata->outlen));
+ if (ret) {
+ irdma_remove_push_mmap_entries(iwqp);
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: copy_to_udata failed\n");
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+exit:
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ return ret;
+}
+
+/**
+ * irdma_modify_qp - modify qp request
+ * @ibqp: qp's pointer for modify
+ * @attr: access attributes
+ * @attr_mask: state mask
+ * @udata: user data
+ */
+int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata)
+{
+#define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush)
+#define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid)
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+ struct irdma_qp_host_ctx_info *ctx_info;
+ struct irdma_tcp_offload_info *tcp_info;
+ struct irdma_iwarp_offload_info *offload_info;
+ struct irdma_modify_qp_info info = {};
+ struct irdma_modify_qp_resp uresp = {};
+ struct irdma_modify_qp_req ureq = {};
+ u8 issue_modify_qp = 0;
+ u8 dont_wait = 0;
+ int err;
+ unsigned long flags;
+
+ if (udata) {
+ /* udata inlen/outlen can be 0 when supporting legacy libi40iw */
+ if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) ||
+ (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN))
+ return -EINVAL;
+ }
+
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
+ ctx_info = &iwqp->ctx_info;
+ offload_info = &iwqp->iwarp_info;
+ tcp_info = &iwqp->tcp_info;
+ wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend));
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: caller: %pS qp_id=%d to_ibqpstate=%d ibqpstate=%d irdma_qpstate=%d last_aeq=%d hw_tcp_state=%d hw_iwarp_state=%d attr_mask=0x%x\n",
+ __builtin_return_address(0), ibqp->qp_num, attr->qp_state,
+ iwqp->ibqp_state, iwqp->iwarp_state, iwqp->last_aeq,
+ iwqp->hw_tcp_state, iwqp->hw_iwarp_state, attr_mask);
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (attr_mask & IB_QP_STATE) {
+ info.curr_iwarp_state = iwqp->iwarp_state;
+ switch (attr->qp_state) {
+ case IB_QPS_INIT:
+ case IB_QPS_RTR:
+ if (iwqp->iwarp_state > IRDMA_QP_STATE_IDLE) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ if (iwqp->iwarp_state == IRDMA_QP_STATE_INVALID) {
+ info.next_iwarp_state = IRDMA_QP_STATE_IDLE;
+ issue_modify_qp = 1;
+ }
+ if (iwdev->push_mode && udata &&
+ iwqp->sc_qp.push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX &&
+ dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ irdma_alloc_push_page(iwqp);
+ spin_lock_irqsave(&iwqp->lock, flags);
+ }
+ break;
+ case IB_QPS_RTS:
+ if (iwqp->iwarp_state > IRDMA_QP_STATE_RTS ||
+ !iwqp->cm_id) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ issue_modify_qp = 1;
+ iwqp->hw_tcp_state = IRDMA_TCP_STATE_ESTABLISHED;
+ iwqp->hte_added = 1;
+ info.next_iwarp_state = IRDMA_QP_STATE_RTS;
+ info.tcp_ctx_valid = true;
+ info.ord_valid = true;
+ info.arp_cache_idx_valid = true;
+ info.cq_num_valid = true;
+ break;
+ case IB_QPS_SQD:
+ if (iwqp->hw_iwarp_state > IRDMA_QP_STATE_RTS) {
+ err = 0;
+ goto exit;
+ }
+
+ if (iwqp->iwarp_state == IRDMA_QP_STATE_CLOSING ||
+ iwqp->iwarp_state < IRDMA_QP_STATE_RTS) {
+ err = 0;
+ goto exit;
+ }
+
+ if (iwqp->iwarp_state > IRDMA_QP_STATE_CLOSING) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ info.next_iwarp_state = IRDMA_QP_STATE_CLOSING;
+ issue_modify_qp = 1;
+ break;
+ case IB_QPS_SQE:
+ if (iwqp->iwarp_state >= IRDMA_QP_STATE_TERMINATE) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ info.next_iwarp_state = IRDMA_QP_STATE_TERMINATE;
+ issue_modify_qp = 1;
+ break;
+ case IB_QPS_ERR:
+ case IB_QPS_RESET:
+ if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (udata && udata->inlen) {
+ if (ib_copy_from_udata(&ureq, udata,
+ min(sizeof(ureq), udata->inlen)))
+ return -EINVAL;
+
+ irdma_flush_wqes(iwqp,
+ (ureq.sq_flush ? IRDMA_FLUSH_SQ : 0) |
+ (ureq.rq_flush ? IRDMA_FLUSH_RQ : 0) |
+ IRDMA_REFLUSH);
+ }
+ return 0;
+ }
+
+ if (iwqp->sc_qp.term_flags) {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ irdma_terminate_del_timer(&iwqp->sc_qp);
+ spin_lock_irqsave(&iwqp->lock, flags);
+ }
+ info.next_iwarp_state = IRDMA_QP_STATE_ERROR;
+ if (iwqp->hw_tcp_state > IRDMA_TCP_STATE_CLOSED &&
+ iwdev->iw_status &&
+ iwqp->hw_tcp_state != IRDMA_TCP_STATE_TIME_WAIT)
+ info.reset_tcp_conn = true;
+ else
+ dont_wait = 1;
+
+ issue_modify_qp = 1;
+ info.next_iwarp_state = IRDMA_QP_STATE_ERROR;
+ break;
+ default:
+ err = -EINVAL;
+ goto exit;
+ }
+
+ iwqp->ibqp_state = attr->qp_state;
+ }
+ if (attr_mask & IB_QP_ACCESS_FLAGS) {
+ ctx_info->iwarp_info_valid = true;
+ if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE)
+ offload_info->wr_rdresp_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
+ offload_info->wr_rdresp_en = true;
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
+ offload_info->rd_en = true;
+ }
+
+ if (ctx_info->iwarp_info_valid) {
+ ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
+ ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
+ irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info);
+ }
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ if (attr_mask & IB_QP_STATE) {
+ if (issue_modify_qp) {
+ ctx_info->rem_endpoint_idx = tcp_info->arp_idx;
+ if (irdma_hw_modify_qp(iwdev, iwqp, &info, true))
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ if (iwqp->iwarp_state == info.curr_iwarp_state) {
+ iwqp->iwarp_state = info.next_iwarp_state;
+ iwqp->ibqp_state = attr->qp_state;
+ }
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+
+ if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) {
+ if (dont_wait) {
+ if (iwqp->hw_tcp_state) {
+ spin_lock_irqsave(&iwqp->lock, flags);
+ iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
+ iwqp->last_aeq = IRDMA_AE_RESET_SENT;
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ }
+ irdma_cm_disconn(iwqp);
+ } else {
+ int close_timer_started;
+
+ spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags);
+
+ if (iwqp->cm_node) {
+ refcount_inc(&iwqp->cm_node->refcnt);
+ spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+ close_timer_started = atomic_inc_return(&iwqp->close_timer_started);
+ if (iwqp->cm_id && close_timer_started == 1)
+ irdma_schedule_cm_timer(iwqp->cm_node,
+ (struct irdma_puda_buf *)iwqp,
+ IRDMA_TIMER_TYPE_CLOSE, 1, 0);
+
+ irdma_rem_ref_cm_node(iwqp->cm_node);
+ } else {
+ spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+ }
+ }
+ }
+ if (attr_mask & IB_QP_STATE && udata && udata->outlen &&
+ dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
+ struct irdma_ucontext *ucontext;
+
+ ucontext = rdma_udata_to_drv_context(udata,
+ struct irdma_ucontext, ibucontext);
+ if (iwqp->sc_qp.push_idx != IRDMA_INVALID_PUSH_PAGE_INDEX &&
+ !iwqp->push_wqe_mmap_entry &&
+ !irdma_setup_push_mmap_entries(ucontext, iwqp,
+ &uresp.push_wqe_mmap_key, &uresp.push_db_mmap_key)) {
+ uresp.push_valid = 1;
+ uresp.push_offset = iwqp->sc_qp.push_offset;
+ }
+
+ err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp),
+ udata->outlen));
+ if (err) {
+ irdma_remove_push_mmap_entries(iwqp);
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: copy_to_udata failed\n");
+ return err;
+ }
+ }
+
+ return 0;
+exit:
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+
+ return err;
+}
+
+/**
+ * irdma_srq_free_rsrc - free up resources for srq
+ * @rf: RDMA PCI function
+ * @iwsrq: srq ptr
+ */
+static void irdma_srq_free_rsrc(struct irdma_pci_f *rf, struct irdma_srq *iwsrq)
+{
+ struct irdma_sc_srq *srq = &iwsrq->sc_srq;
+
+ if (!iwsrq->user_mode) {
+ dma_free_coherent(rf->sc_dev.hw->device, iwsrq->kmem.size,
+ iwsrq->kmem.va, iwsrq->kmem.pa);
+ iwsrq->kmem.va = NULL;
+ }
+
+ irdma_free_rsrc(rf, rf->allocated_srqs, srq->srq_uk.srq_id);
+}
+
+/**
+ * irdma_cq_free_rsrc - free up resources for cq
+ * @rf: RDMA PCI function
+ * @iwcq: cq ptr
+ */
+static void irdma_cq_free_rsrc(struct irdma_pci_f *rf, struct irdma_cq *iwcq)
+{
+ struct irdma_sc_cq *cq = &iwcq->sc_cq;
+
+ if (!iwcq->user_mode) {
+ dma_free_coherent(rf->sc_dev.hw->device, iwcq->kmem.size,
+ iwcq->kmem.va, iwcq->kmem.pa);
+ iwcq->kmem.va = NULL;
+ dma_free_coherent(rf->sc_dev.hw->device,
+ iwcq->kmem_shadow.size,
+ iwcq->kmem_shadow.va, iwcq->kmem_shadow.pa);
+ iwcq->kmem_shadow.va = NULL;
+ }
+
+ irdma_free_rsrc(rf, rf->allocated_cqs, cq->cq_uk.cq_id);
+}
+
+/**
+ * irdma_free_cqbuf - worker to free a cq buffer
+ * @work: provides access to the cq buffer to free
+ */
+static void irdma_free_cqbuf(struct work_struct *work)
+{
+ struct irdma_cq_buf *cq_buf = container_of(work, struct irdma_cq_buf, work);
+
+ dma_free_coherent(cq_buf->hw->device, cq_buf->kmem_buf.size,
+ cq_buf->kmem_buf.va, cq_buf->kmem_buf.pa);
+ cq_buf->kmem_buf.va = NULL;
+ kfree(cq_buf);
+}
+
+/**
+ * irdma_process_resize_list - remove resized cq buffers from the resize_list
+ * @iwcq: cq which owns the resize_list
+ * @iwdev: irdma device
+ * @lcqe_buf: the buffer where the last cqe is received
+ */
+static int irdma_process_resize_list(struct irdma_cq *iwcq,
+ struct irdma_device *iwdev,
+ struct irdma_cq_buf *lcqe_buf)
+{
+ struct list_head *tmp_node, *list_node;
+ struct irdma_cq_buf *cq_buf;
+ int cnt = 0;
+
+ list_for_each_safe(list_node, tmp_node, &iwcq->resize_list) {
+ cq_buf = list_entry(list_node, struct irdma_cq_buf, list);
+ if (cq_buf == lcqe_buf)
+ return cnt;
+
+ list_del(&cq_buf->list);
+ queue_work(iwdev->cleanup_wq, &cq_buf->work);
+ cnt++;
+ }
+
+ return cnt;
+}
+
+/**
+ * irdma_destroy_srq - destroy srq
+ * @ibsrq: srq pointer
+ * @udata: user data
+ */
+static int irdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibsrq->device);
+ struct irdma_srq *iwsrq = to_iwsrq(ibsrq);
+ struct irdma_sc_srq *srq = &iwsrq->sc_srq;
+
+ irdma_srq_wq_destroy(iwdev->rf, srq);
+ irdma_srq_free_rsrc(iwdev->rf, iwsrq);
+ return 0;
+}
+
+/**
+ * irdma_destroy_cq - destroy cq
+ * @ib_cq: cq pointer
+ * @udata: user data
+ */
+static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ib_cq->device);
+ struct irdma_cq *iwcq = to_iwcq(ib_cq);
+ struct irdma_sc_cq *cq = &iwcq->sc_cq;
+ struct irdma_sc_dev *dev = cq->dev;
+ struct irdma_sc_ceq *ceq = dev->ceq[cq->ceq_id];
+ struct irdma_ceq *iwceq = container_of(ceq, struct irdma_ceq, sc_ceq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwcq->lock, flags);
+ if (!list_empty(&iwcq->cmpl_generated))
+ irdma_remove_cmpls_list(iwcq);
+ if (!list_empty(&iwcq->resize_list))
+ irdma_process_resize_list(iwcq, iwdev, NULL);
+ spin_unlock_irqrestore(&iwcq->lock, flags);
+
+ irdma_cq_rem_ref(ib_cq);
+ wait_for_completion(&iwcq->free_cq);
+
+ irdma_cq_wq_destroy(iwdev->rf, cq);
+
+ spin_lock_irqsave(&iwceq->ce_lock, flags);
+ irdma_sc_cleanup_ceqes(cq, ceq);
+ spin_unlock_irqrestore(&iwceq->ce_lock, flags);
+ irdma_cq_free_rsrc(iwdev->rf, iwcq);
+
+ return 0;
+}
+
+/**
+ * irdma_resize_cq - resize cq
+ * @ibcq: cq to be resized
+ * @entries: desired cq size
+ * @udata: user data
+ */
+static int irdma_resize_cq(struct ib_cq *ibcq, int entries,
+ struct ib_udata *udata)
+{
+#define IRDMA_RESIZE_CQ_MIN_REQ_LEN offsetofend(struct irdma_resize_cq_req, user_cq_buffer)
+ struct irdma_cq *iwcq = to_iwcq(ibcq);
+ struct irdma_sc_dev *dev = iwcq->sc_cq.dev;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_modify_cq_info *m_info;
+ struct irdma_modify_cq_info info = {};
+ struct irdma_dma_mem kmem_buf;
+ struct irdma_cq_mr *cqmr_buf;
+ struct irdma_pbl *iwpbl_buf;
+ struct irdma_device *iwdev;
+ struct irdma_pci_f *rf;
+ struct irdma_cq_buf *cq_buf = NULL;
+ unsigned long flags;
+ u8 cqe_size;
+ int ret;
+
+ iwdev = to_iwdev(ibcq->device);
+ rf = iwdev->rf;
+
+ if (!(rf->sc_dev.hw_attrs.uk_attrs.feature_flags &
+ IRDMA_FEATURE_CQ_RESIZE))
+ return -EOPNOTSUPP;
+
+ if (udata && udata->inlen < IRDMA_RESIZE_CQ_MIN_REQ_LEN)
+ return -EINVAL;
+
+ if (entries > rf->max_cqe)
+ return -EINVAL;
+
+ if (!iwcq->user_mode) {
+ entries += 2;
+
+ if (!iwcq->sc_cq.cq_uk.avoid_mem_cflct &&
+ dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ entries *= 2;
+
+ if (entries & 1)
+ entries += 1; /* cq size must be an even number */
+
+ cqe_size = iwcq->sc_cq.cq_uk.avoid_mem_cflct ? 64 : 32;
+ if (entries * cqe_size == IRDMA_HW_PAGE_SIZE)
+ entries += 2;
+ }
+
+ info.cq_size = max(entries, 4);
+
+ if (info.cq_size == iwcq->sc_cq.cq_uk.cq_size - 1)
+ return 0;
+
+ if (udata) {
+ struct irdma_resize_cq_req req = {};
+ struct irdma_ucontext *ucontext =
+ rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+
+ /* CQ resize not supported with legacy GEN_1 libi40iw */
+ if (ucontext->legacy_mode)
+ return -EOPNOTSUPP;
+
+ if (ib_copy_from_udata(&req, udata,
+ min(sizeof(req), udata->inlen)))
+ return -EINVAL;
+
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ iwpbl_buf = irdma_get_pbl((unsigned long)req.user_cq_buffer,
+ &ucontext->cq_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+
+ if (!iwpbl_buf)
+ return -ENOMEM;
+
+ cqmr_buf = &iwpbl_buf->cq_mr;
+ if (iwpbl_buf->pbl_allocated) {
+ info.virtual_map = true;
+ info.pbl_chunk_size = 1;
+ info.first_pm_pbl_idx = cqmr_buf->cq_pbl.idx;
+ } else {
+ info.cq_pa = cqmr_buf->cq_pbl.addr;
+ }
+ } else {
+ /* Kmode CQ resize */
+ int rsize;
+
+ rsize = info.cq_size * sizeof(struct irdma_cqe);
+ kmem_buf.size = ALIGN(round_up(rsize, 256), 256);
+ kmem_buf.va = dma_alloc_coherent(dev->hw->device,
+ kmem_buf.size, &kmem_buf.pa,
+ GFP_KERNEL);
+ if (!kmem_buf.va)
+ return -ENOMEM;
+
+ info.cq_base = kmem_buf.va;
+ info.cq_pa = kmem_buf.pa;
+ cq_buf = kzalloc(sizeof(*cq_buf), GFP_KERNEL);
+ if (!cq_buf) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ }
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ info.shadow_read_threshold = iwcq->sc_cq.shadow_read_threshold;
+ info.cq_resize = true;
+
+ cqp_info = &cqp_request->info;
+ m_info = &cqp_info->in.u.cq_modify.info;
+ memcpy(m_info, &info, sizeof(*m_info));
+
+ cqp_info->cqp_cmd = IRDMA_OP_CQ_MODIFY;
+ cqp_info->in.u.cq_modify.cq = &iwcq->sc_cq;
+ cqp_info->in.u.cq_modify.scratch = (uintptr_t)cqp_request;
+ cqp_info->post_sq = 1;
+ ret = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ if (ret)
+ goto error;
+
+ spin_lock_irqsave(&iwcq->lock, flags);
+ if (cq_buf) {
+ cq_buf->kmem_buf = iwcq->kmem;
+ cq_buf->hw = dev->hw;
+ memcpy(&cq_buf->cq_uk, &iwcq->sc_cq.cq_uk, sizeof(cq_buf->cq_uk));
+ INIT_WORK(&cq_buf->work, irdma_free_cqbuf);
+ list_add_tail(&cq_buf->list, &iwcq->resize_list);
+ iwcq->kmem = kmem_buf;
+ }
+
+ irdma_sc_cq_resize(&iwcq->sc_cq, &info);
+ ibcq->cqe = info.cq_size - 1;
+ spin_unlock_irqrestore(&iwcq->lock, flags);
+
+ return 0;
+error:
+ if (!udata) {
+ dma_free_coherent(dev->hw->device, kmem_buf.size, kmem_buf.va,
+ kmem_buf.pa);
+ kmem_buf.va = NULL;
+ }
+ kfree(cq_buf);
+
+ return ret;
+}
+
+/**
+ * irdma_srq_event - event notification for srq limit
+ * @srq: shared srq struct
+ */
+void irdma_srq_event(struct irdma_sc_srq *srq)
+{
+ struct irdma_srq *iwsrq = container_of(srq, struct irdma_srq, sc_srq);
+ struct ib_srq *ibsrq = &iwsrq->ibsrq;
+ struct ib_event event;
+
+ srq->srq_limit = 0;
+
+ if (!ibsrq->event_handler)
+ return;
+
+ event.device = ibsrq->device;
+ event.element.port_num = 1;
+ event.element.srq = ibsrq;
+ event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ ibsrq->event_handler(&event, ibsrq->srq_context);
+}
+
+/**
+ * irdma_modify_srq - modify srq request
+ * @ibsrq: srq's pointer for modify
+ * @attr: access attributes
+ * @attr_mask: state mask
+ * @udata: user data
+ */
+static int irdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibsrq->device);
+ struct irdma_srq *iwsrq = to_iwsrq(ibsrq);
+ struct irdma_cqp_request *cqp_request;
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_modify_srq_info *info;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ if (attr_mask & IB_SRQ_MAX_WR)
+ return -EINVAL;
+
+ if (!(attr_mask & IB_SRQ_LIMIT))
+ return 0;
+
+ if (attr->srq_limit > iwsrq->sc_srq.srq_uk.srq_size)
+ return -EINVAL;
+
+ /* Execute this cqp op synchronously, so we can update srq_limit
+ * upon successful completion.
+ */
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.srq_modify.info;
+ info->srq_limit = attr->srq_limit;
+ if (info->srq_limit > 0xFFF)
+ info->srq_limit = 0xFFF;
+ info->arm_limit_event = 1;
+
+ cqp_info->cqp_cmd = IRDMA_OP_SRQ_MODIFY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.srq_modify.srq = &iwsrq->sc_srq;
+ cqp_info->in.u.srq_modify.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ if (status)
+ return status;
+
+ iwsrq->sc_srq.srq_limit = info->srq_limit;
+
+ return 0;
+}
+
+static int irdma_setup_umode_srq(struct irdma_device *iwdev,
+ struct irdma_srq *iwsrq,
+ struct irdma_srq_init_info *info,
+ struct ib_udata *udata)
+{
+#define IRDMA_CREATE_SRQ_MIN_REQ_LEN \
+ offsetofend(struct irdma_create_srq_req, user_shadow_area)
+ struct irdma_create_srq_req req = {};
+ struct irdma_ucontext *ucontext;
+ struct irdma_srq_mr *srqmr;
+ struct irdma_pbl *iwpbl;
+ unsigned long flags;
+
+ iwsrq->user_mode = true;
+ ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+
+ if (udata->inlen < IRDMA_CREATE_SRQ_MIN_REQ_LEN)
+ return -EINVAL;
+
+ if (ib_copy_from_udata(&req, udata,
+ min(sizeof(req), udata->inlen)))
+ return -EFAULT;
+
+ spin_lock_irqsave(&ucontext->srq_reg_mem_list_lock, flags);
+ iwpbl = irdma_get_pbl((unsigned long)req.user_srq_buf,
+ &ucontext->srq_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->srq_reg_mem_list_lock, flags);
+ if (!iwpbl)
+ return -EPROTO;
+
+ iwsrq->iwpbl = iwpbl;
+ srqmr = &iwpbl->srq_mr;
+
+ if (iwpbl->pbl_allocated) {
+ info->virtual_map = true;
+ info->pbl_chunk_size = 1;
+ info->first_pm_pbl_idx = srqmr->srq_pbl.idx;
+ info->leaf_pbl_size = 1;
+ } else {
+ info->srq_pa = srqmr->srq_pbl.addr;
+ }
+ info->shadow_area_pa = srqmr->shadow;
+
+ return 0;
+}
+
+static int irdma_setup_kmode_srq(struct irdma_device *iwdev,
+ struct irdma_srq *iwsrq,
+ struct irdma_srq_init_info *info, u32 depth,
+ u8 shift)
+{
+ struct irdma_srq_uk_init_info *ukinfo = &info->srq_uk_init_info;
+ struct irdma_dma_mem *mem = &iwsrq->kmem;
+ u32 size, ring_size;
+
+ ring_size = depth * IRDMA_QP_WQE_MIN_SIZE;
+ size = ring_size + (IRDMA_SHADOW_AREA_SIZE << 3);
+
+ mem->size = ALIGN(size, 256);
+ mem->va = dma_alloc_coherent(iwdev->rf->hw.device, mem->size,
+ &mem->pa, GFP_KERNEL);
+ if (!mem->va)
+ return -ENOMEM;
+
+ ukinfo->srq = mem->va;
+ ukinfo->srq_size = depth >> shift;
+ ukinfo->shadow_area = mem->va + ring_size;
+
+ info->srq_pa = mem->pa;
+ info->shadow_area_pa = info->srq_pa + ring_size;
+
+ return 0;
+}
+
+/**
+ * irdma_create_srq - create srq
+ * @ibsrq: ib's srq pointer
+ * @initattrs: attributes for srq
+ * @udata: user data for create srq
+ */
+static int irdma_create_srq(struct ib_srq *ibsrq,
+ struct ib_srq_init_attr *initattrs,
+ struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibsrq->device);
+ struct ib_srq_attr *attr = &initattrs->attr;
+ struct irdma_pd *iwpd = to_iwpd(ibsrq->pd);
+ struct irdma_srq *iwsrq = to_iwsrq(ibsrq);
+ struct irdma_srq_uk_init_info *ukinfo;
+ struct irdma_cqp_request *cqp_request;
+ struct irdma_srq_init_info info = {};
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_uk_attrs *uk_attrs;
+ struct cqp_cmds_info *cqp_info;
+ int err_code = 0;
+ u32 depth;
+ u8 shift;
+
+ uk_attrs = &rf->sc_dev.hw_attrs.uk_attrs;
+ ukinfo = &info.srq_uk_init_info;
+
+ if (initattrs->srq_type != IB_SRQT_BASIC)
+ return -EOPNOTSUPP;
+
+ if (!(uk_attrs->feature_flags & IRDMA_FEATURE_SRQ) ||
+ attr->max_sge > uk_attrs->max_hw_wq_frags)
+ return -EINVAL;
+
+ refcount_set(&iwsrq->refcnt, 1);
+ spin_lock_init(&iwsrq->lock);
+ err_code = irdma_alloc_rsrc(rf, rf->allocated_srqs, rf->max_srq,
+ &iwsrq->srq_num, &rf->next_srq);
+ if (err_code)
+ return err_code;
+
+ ukinfo->max_srq_frag_cnt = attr->max_sge;
+ ukinfo->uk_attrs = uk_attrs;
+ ukinfo->srq_id = iwsrq->srq_num;
+
+ irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_srq_frag_cnt, 0,
+ &shift);
+
+ err_code = irdma_get_srqdepth(ukinfo->uk_attrs, attr->max_wr,
+ shift, &depth);
+ if (err_code)
+ return err_code;
+
+ /* Actual SRQ size in WRs for ring and HW */
+ ukinfo->srq_size = depth >> shift;
+
+ /* Max postable WRs to SRQ */
+ iwsrq->max_wr = (depth - IRDMA_RQ_RSVD) >> shift;
+ attr->max_wr = iwsrq->max_wr;
+
+ if (udata)
+ err_code = irdma_setup_umode_srq(iwdev, iwsrq, &info, udata);
+ else
+ err_code = irdma_setup_kmode_srq(iwdev, iwsrq, &info, depth,
+ shift);
+
+ if (err_code)
+ goto free_rsrc;
+
+ info.vsi = &iwdev->vsi;
+ info.pd = &iwpd->sc_pd;
+
+ iwsrq->sc_srq.srq_uk.lock = &iwsrq->lock;
+ err_code = irdma_sc_srq_init(&iwsrq->sc_srq, &info);
+ if (err_code)
+ goto free_dmem;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request) {
+ err_code = -ENOMEM;
+ goto free_dmem;
+ }
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_SRQ_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.srq_create.srq = &iwsrq->sc_srq;
+ cqp_info->in.u.srq_create.scratch = (uintptr_t)cqp_request;
+ err_code = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ if (err_code)
+ goto free_dmem;
+
+ if (udata) {
+ struct irdma_create_srq_resp resp = {};
+
+ resp.srq_id = iwsrq->srq_num;
+ resp.srq_size = ukinfo->srq_size;
+ if (ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen))) {
+ err_code = -EPROTO;
+ goto srq_destroy;
+ }
+ }
+
+ return 0;
+
+srq_destroy:
+ irdma_srq_wq_destroy(rf, &iwsrq->sc_srq);
+
+free_dmem:
+ if (!iwsrq->user_mode)
+ dma_free_coherent(rf->hw.device, iwsrq->kmem.size,
+ iwsrq->kmem.va, iwsrq->kmem.pa);
+free_rsrc:
+ irdma_free_rsrc(rf, rf->allocated_srqs, iwsrq->srq_num);
+ return err_code;
+}
+
+/**
+ * irdma_query_srq - get SRQ attributes
+ * @ibsrq: the SRQ to query
+ * @attr: the attributes of the SRQ
+ */
+static int irdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+ struct irdma_srq *iwsrq = to_iwsrq(ibsrq);
+
+ attr->max_wr = iwsrq->max_wr;
+ attr->max_sge = iwsrq->sc_srq.srq_uk.max_srq_frag_cnt;
+ attr->srq_limit = iwsrq->sc_srq.srq_limit;
+
+ return 0;
+}
+
+static inline int cq_validate_flags(u32 flags, u8 hw_rev)
+{
+ /* GEN1/2 does not support CQ create flags */
+ if (hw_rev <= IRDMA_GEN_2)
+ return flags ? -EOPNOTSUPP : 0;
+
+ return flags & ~IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION ? -EOPNOTSUPP : 0;
+}
+
+/**
+ * irdma_create_cq - create cq
+ * @ibcq: CQ allocated
+ * @attr: attributes for cq
+ * @attrs: uverbs attribute bundle
+ */
+static int irdma_create_cq(struct ib_cq *ibcq,
+ const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+#define IRDMA_CREATE_CQ_MIN_REQ_LEN offsetofend(struct irdma_create_cq_req, user_cq_buf)
+#define IRDMA_CREATE_CQ_MIN_RESP_LEN offsetofend(struct irdma_create_cq_resp, cq_size)
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct ib_device *ibdev = ibcq->device;
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_cq *iwcq = to_iwcq(ibcq);
+ u32 cq_num = 0;
+ struct irdma_sc_cq *cq;
+ struct irdma_sc_dev *dev = &rf->sc_dev;
+ struct irdma_cq_init_info info = {};
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_cq_uk_init_info *ukinfo = &info.cq_uk_init_info;
+ unsigned long flags;
+ int err_code;
+ int entries = attr->cqe;
+ bool cqe_64byte_ena;
+ u8 cqe_size;
+
+ err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev);
+ if (err_code)
+ return err_code;
+
+ if (udata && (udata->inlen < IRDMA_CREATE_CQ_MIN_REQ_LEN ||
+ udata->outlen < IRDMA_CREATE_CQ_MIN_RESP_LEN))
+ return -EINVAL;
+
+ err_code = irdma_alloc_rsrc(rf, rf->allocated_cqs, rf->max_cq, &cq_num,
+ &rf->next_cq);
+ if (err_code)
+ return err_code;
+
+ cq = &iwcq->sc_cq;
+ cq->back_cq = iwcq;
+ refcount_set(&iwcq->refcnt, 1);
+ spin_lock_init(&iwcq->lock);
+ INIT_LIST_HEAD(&iwcq->resize_list);
+ INIT_LIST_HEAD(&iwcq->cmpl_generated);
+ iwcq->cq_num = cq_num;
+ info.dev = dev;
+ ukinfo->cq_size = max(entries, 4);
+ ukinfo->cq_id = cq_num;
+ cqe_64byte_ena = dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_64_BYTE_CQE ?
+ true : false;
+ cqe_size = cqe_64byte_ena ? 64 : 32;
+ ukinfo->avoid_mem_cflct = cqe_64byte_ena;
+ iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
+ if (attr->comp_vector < rf->ceqs_count)
+ info.ceq_id = attr->comp_vector;
+ info.ceq_id_valid = true;
+ info.ceqe_mask = 1;
+ info.type = IRDMA_CQ_TYPE_IWARP;
+ info.vsi = &iwdev->vsi;
+
+ if (udata) {
+ struct irdma_ucontext *ucontext;
+ struct irdma_create_cq_req req = {};
+ struct irdma_cq_mr *cqmr;
+ struct irdma_pbl *iwpbl;
+ struct irdma_pbl *iwpbl_shadow;
+ struct irdma_cq_mr *cqmr_shadow;
+
+ iwcq->user_mode = true;
+ ucontext =
+ rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+ if (ib_copy_from_udata(&req, udata,
+ min(sizeof(req), udata->inlen))) {
+ err_code = -EFAULT;
+ goto cq_free_rsrc;
+ }
+
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ iwpbl = irdma_get_pbl((unsigned long)req.user_cq_buf,
+ &ucontext->cq_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+ if (!iwpbl) {
+ err_code = -EPROTO;
+ goto cq_free_rsrc;
+ }
+
+ cqmr = &iwpbl->cq_mr;
+
+ if (rf->sc_dev.hw_attrs.uk_attrs.feature_flags &
+ IRDMA_FEATURE_CQ_RESIZE && !ucontext->legacy_mode) {
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ iwpbl_shadow = irdma_get_pbl(
+ (unsigned long)req.user_shadow_area,
+ &ucontext->cq_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+
+ if (!iwpbl_shadow) {
+ err_code = -EPROTO;
+ goto cq_free_rsrc;
+ }
+ cqmr_shadow = &iwpbl_shadow->cq_mr;
+ info.shadow_area_pa = cqmr_shadow->cq_pbl.addr;
+ cqmr->split = true;
+ } else {
+ info.shadow_area_pa = cqmr->shadow;
+ }
+ if (iwpbl->pbl_allocated) {
+ info.virtual_map = true;
+ info.pbl_chunk_size = 1;
+ info.first_pm_pbl_idx = cqmr->cq_pbl.idx;
+ } else {
+ info.cq_base_pa = cqmr->cq_pbl.addr;
+ }
+ } else {
+ /* Kmode allocations */
+ int rsize;
+
+ if (entries < 1 || entries > rf->max_cqe) {
+ err_code = -EINVAL;
+ goto cq_free_rsrc;
+ }
+
+ entries += 2;
+ if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+ entries *= 2;
+
+ if (entries & 1)
+ entries += 1; /* cq size must be an even number */
+
+ if (entries * cqe_size == IRDMA_HW_PAGE_SIZE)
+ entries += 2;
+
+ ukinfo->cq_size = entries;
+
+ if (cqe_64byte_ena)
+ rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_extended_cqe);
+ else
+ rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe);
+ iwcq->kmem.size = ALIGN(round_up(rsize, 256), 256);
+ iwcq->kmem.va = dma_alloc_coherent(dev->hw->device,
+ iwcq->kmem.size,
+ &iwcq->kmem.pa, GFP_KERNEL);
+ if (!iwcq->kmem.va) {
+ err_code = -ENOMEM;
+ goto cq_free_rsrc;
+ }
+
+ iwcq->kmem_shadow.size = ALIGN(IRDMA_SHADOW_AREA_SIZE << 3,
+ 64);
+ iwcq->kmem_shadow.va = dma_alloc_coherent(dev->hw->device,
+ iwcq->kmem_shadow.size,
+ &iwcq->kmem_shadow.pa,
+ GFP_KERNEL);
+ if (!iwcq->kmem_shadow.va) {
+ err_code = -ENOMEM;
+ goto cq_free_rsrc;
+ }
+ info.shadow_area_pa = iwcq->kmem_shadow.pa;
+ ukinfo->shadow_area = iwcq->kmem_shadow.va;
+ ukinfo->cq_base = iwcq->kmem.va;
+ info.cq_base_pa = iwcq->kmem.pa;
+ }
+
+ info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2,
+ (u32)IRDMA_MAX_CQ_READ_THRESH);
+
+ if (irdma_sc_cq_init(cq, &info)) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: init cq fail\n");
+ err_code = -EPROTO;
+ goto cq_free_rsrc;
+ }
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
+ if (!cqp_request) {
+ err_code = -ENOMEM;
+ goto cq_free_rsrc;
+ }
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = IRDMA_OP_CQ_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.cq_create.cq = cq;
+ cqp_info->in.u.cq_create.check_overflow = true;
+ cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
+ err_code = irdma_handle_cqp_op(rf, cqp_request);
+ irdma_put_cqp_request(&rf->cqp, cqp_request);
+ if (err_code)
+ goto cq_free_rsrc;
+
+ if (udata) {
+ struct irdma_create_cq_resp resp = {};
+
+ resp.cq_id = info.cq_uk_init_info.cq_id;
+ resp.cq_size = info.cq_uk_init_info.cq_size;
+ if (ib_copy_to_udata(udata, &resp,
+ min(sizeof(resp), udata->outlen))) {
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: copy to user data\n");
+ err_code = -EPROTO;
+ goto cq_destroy;
+ }
+ }
+ rf->cq_table[cq_num] = iwcq;
+ init_completion(&iwcq->free_cq);
+
+ return 0;
+cq_destroy:
+ irdma_cq_wq_destroy(rf, cq);
+cq_free_rsrc:
+ irdma_cq_free_rsrc(rf, iwcq);
+
+ return err_code;
+}
+
+/**
+ * irdma_get_mr_access - get hw MR access permissions from IB access flags
+ * @access: IB access flags
+ * @hw_rev: Hardware version
+ */
+static inline u16 irdma_get_mr_access(int access, u8 hw_rev)
+{
+ u16 hw_access = 0;
+
+ hw_access |= (access & IB_ACCESS_LOCAL_WRITE) ?
+ IRDMA_ACCESS_FLAGS_LOCALWRITE : 0;
+ hw_access |= (access & IB_ACCESS_REMOTE_WRITE) ?
+ IRDMA_ACCESS_FLAGS_REMOTEWRITE : 0;
+ hw_access |= (access & IB_ACCESS_REMOTE_READ) ?
+ IRDMA_ACCESS_FLAGS_REMOTEREAD : 0;
+ if (hw_rev >= IRDMA_GEN_3) {
+ hw_access |= (access & IB_ACCESS_MW_BIND) ?
+ IRDMA_ACCESS_FLAGS_BIND_WINDOW : 0;
+ }
+ hw_access |= (access & IB_ZERO_BASED) ?
+ IRDMA_ACCESS_FLAGS_ZERO_BASED : 0;
+ hw_access |= IRDMA_ACCESS_FLAGS_LOCALREAD;
+
+ return hw_access;
+}
+
+/**
+ * irdma_free_stag - free stag resource
+ * @iwdev: irdma device
+ * @stag: stag to free
+ */
+static void irdma_free_stag(struct irdma_device *iwdev, u32 stag)
+{
+ u32 stag_idx;
+
+ stag_idx = (stag & iwdev->rf->mr_stagmask) >> IRDMA_CQPSQ_STAG_IDX_S;
+ irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_mrs, stag_idx);
+}
+
+/**
+ * irdma_create_stag - create random stag
+ * @iwdev: irdma device
+ */
+static u32 irdma_create_stag(struct irdma_device *iwdev)
+{
+ u32 stag = 0;
+ u32 stag_index = 0;
+ u32 next_stag_index;
+ u32 driver_key;
+ u32 random;
+ u8 consumer_key;
+ int ret;
+
+ get_random_bytes(&random, sizeof(random));
+ consumer_key = (u8)random;
+
+ driver_key = random & ~iwdev->rf->mr_stagmask;
+ next_stag_index = (random & iwdev->rf->mr_stagmask) >> 8;
+ next_stag_index %= iwdev->rf->max_mr;
+
+ ret = irdma_alloc_rsrc(iwdev->rf, iwdev->rf->allocated_mrs,
+ iwdev->rf->max_mr, &stag_index,
+ &next_stag_index);
+ if (ret)
+ return stag;
+ stag = stag_index << IRDMA_CQPSQ_STAG_IDX_S;
+ stag |= driver_key;
+ stag += (u32)consumer_key;
+
+ return stag;
+}
+
+/**
+ * irdma_next_pbl_addr - Get next pbl address
+ * @pbl: pointer to a pble
+ * @pinfo: info pointer
+ * @idx: index
+ */
+static inline u64 *irdma_next_pbl_addr(u64 *pbl, struct irdma_pble_info **pinfo,
+ u32 *idx)
+{
+ *idx += 1;
+ if (!(*pinfo) || *idx != (*pinfo)->cnt)
+ return ++pbl;
+ *idx = 0;
+ (*pinfo)++;
+
+ return (*pinfo)->addr;
+}
+
+/**
+ * irdma_copy_user_pgaddrs - copy user page address to pble's os locally
+ * @iwmr: iwmr for IB's user page addresses
+ * @pbl: ple pointer to save 1 level or 0 level pble
+ * @level: indicated level 0, 1 or 2
+ */
+static void irdma_copy_user_pgaddrs(struct irdma_mr *iwmr, u64 *pbl,
+ enum irdma_pble_level level)
+{
+ struct ib_umem *region = iwmr->region;
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct irdma_pble_info *pinfo;
+ struct ib_block_iter biter;
+ u32 idx = 0;
+ u32 pbl_cnt = 0;
+
+ pinfo = (level == PBLE_LEVEL_1) ? NULL : palloc->level2.leaf;
+
+ if (iwmr->type == IRDMA_MEMREG_TYPE_QP)
+ iwpbl->qp_mr.sq_page = sg_page(region->sgt_append.sgt.sgl);
+
+ rdma_umem_for_each_dma_block(region, &biter, iwmr->page_size) {
+ *pbl = rdma_block_iter_dma_address(&biter);
+ if (++pbl_cnt == palloc->total_cnt)
+ break;
+ pbl = irdma_next_pbl_addr(pbl, &pinfo, &idx);
+ }
+}
+
+/**
+ * irdma_check_mem_contiguous - check if pbls stored in arr are contiguous
+ * @arr: lvl1 pbl array
+ * @npages: page count
+ * @pg_size: page size
+ *
+ */
+static bool irdma_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size)
+{
+ u32 pg_idx;
+
+ for (pg_idx = 0; pg_idx < npages; pg_idx++) {
+ if ((*arr + (pg_size * pg_idx)) != arr[pg_idx])
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * irdma_check_mr_contiguous - check if MR is physically contiguous
+ * @palloc: pbl allocation struct
+ * @pg_size: page size
+ */
+static bool irdma_check_mr_contiguous(struct irdma_pble_alloc *palloc,
+ u32 pg_size)
+{
+ struct irdma_pble_level2 *lvl2 = &palloc->level2;
+ struct irdma_pble_info *leaf = lvl2->leaf;
+ u64 *arr = NULL;
+ u64 *start_addr = NULL;
+ int i;
+ bool ret;
+
+ if (palloc->level == PBLE_LEVEL_1) {
+ arr = palloc->level1.addr;
+ ret = irdma_check_mem_contiguous(arr, palloc->total_cnt,
+ pg_size);
+ return ret;
+ }
+
+ start_addr = leaf->addr;
+
+ for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
+ arr = leaf->addr;
+ if ((*start_addr + (i * pg_size * PBLE_PER_PAGE)) != *arr)
+ return false;
+ ret = irdma_check_mem_contiguous(arr, leaf->cnt, pg_size);
+ if (!ret)
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * irdma_setup_pbles - copy user pg address to pble's
+ * @rf: RDMA PCI function
+ * @iwmr: mr pointer for this memory registration
+ * @lvl: requested pble levels
+ */
+static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr,
+ u8 lvl)
+{
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct irdma_pble_info *pinfo;
+ u64 *pbl;
+ int status;
+ enum irdma_pble_level level = PBLE_LEVEL_1;
+
+ if (lvl) {
+ status = irdma_get_pble(rf->pble_rsrc, palloc, iwmr->page_cnt,
+ lvl);
+ if (status)
+ return status;
+
+ iwpbl->pbl_allocated = true;
+ level = palloc->level;
+ pinfo = (level == PBLE_LEVEL_1) ? &palloc->level1 :
+ palloc->level2.leaf;
+ pbl = pinfo->addr;
+ } else {
+ pbl = iwmr->pgaddrmem;
+ }
+
+ irdma_copy_user_pgaddrs(iwmr, pbl, level);
+
+ if (lvl)
+ iwmr->pgaddrmem[0] = *pbl;
+
+ return 0;
+}
+
+/**
+ * irdma_handle_q_mem - handle memory for qp and cq
+ * @iwdev: irdma device
+ * @req: information for q memory management
+ * @iwpbl: pble struct
+ * @lvl: pble level mask
+ */
+static int irdma_handle_q_mem(struct irdma_device *iwdev,
+ struct irdma_mem_reg_req *req,
+ struct irdma_pbl *iwpbl, u8 lvl)
+{
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct irdma_mr *iwmr = iwpbl->iwmr;
+ struct irdma_qp_mr *qpmr = &iwpbl->qp_mr;
+ struct irdma_cq_mr *cqmr = &iwpbl->cq_mr;
+ struct irdma_srq_mr *srqmr = &iwpbl->srq_mr;
+ struct irdma_hmc_pble *hmc_p;
+ u64 *arr = iwmr->pgaddrmem;
+ u32 pg_size, total;
+ int err = 0;
+ bool ret = true;
+
+ pg_size = iwmr->page_size;
+ err = irdma_setup_pbles(iwdev->rf, iwmr, lvl);
+ if (err)
+ return err;
+
+ if (lvl)
+ arr = palloc->level1.addr;
+
+ switch (iwmr->type) {
+ case IRDMA_MEMREG_TYPE_QP:
+ total = req->sq_pages + req->rq_pages;
+ hmc_p = &qpmr->sq_pbl;
+ qpmr->shadow = (dma_addr_t)arr[total];
+ /* Need to use physical address for RQ of QP
+ * in case it is associated with SRQ.
+ */
+ qpmr->rq_pa = (dma_addr_t)arr[req->sq_pages];
+ if (lvl) {
+ ret = irdma_check_mem_contiguous(arr, req->sq_pages,
+ pg_size);
+ if (ret)
+ ret = irdma_check_mem_contiguous(&arr[req->sq_pages],
+ req->rq_pages,
+ pg_size);
+ }
+
+ if (!ret) {
+ hmc_p->idx = palloc->level1.idx;
+ hmc_p = &qpmr->rq_pbl;
+ hmc_p->idx = palloc->level1.idx + req->sq_pages;
+ } else {
+ hmc_p->addr = arr[0];
+ hmc_p = &qpmr->rq_pbl;
+ hmc_p->addr = arr[req->sq_pages];
+ }
+ break;
+ case IRDMA_MEMREG_TYPE_SRQ:
+ hmc_p = &srqmr->srq_pbl;
+ srqmr->shadow = (dma_addr_t)arr[req->rq_pages];
+ if (lvl)
+ ret = irdma_check_mem_contiguous(arr, req->rq_pages,
+ pg_size);
+
+ if (!ret)
+ hmc_p->idx = palloc->level1.idx;
+ else
+ hmc_p->addr = arr[0];
+ break;
+ case IRDMA_MEMREG_TYPE_CQ:
+ hmc_p = &cqmr->cq_pbl;
+
+ if (!cqmr->split)
+ cqmr->shadow = (dma_addr_t)arr[req->cq_pages];
+
+ if (lvl)
+ ret = irdma_check_mem_contiguous(arr, req->cq_pages,
+ pg_size);
+
+ if (!ret)
+ hmc_p->idx = palloc->level1.idx;
+ else
+ hmc_p->addr = arr[0];
+ break;
+ default:
+ ibdev_dbg(&iwdev->ibdev, "VERBS: MR type error\n");
+ err = -EINVAL;
+ }
+
+ if (lvl && ret) {
+ irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
+ iwpbl->pbl_allocated = false;
+ }
+
+ return err;
+}
+
+/**
+ * irdma_hw_alloc_mw - create the hw memory window
+ * @iwdev: irdma device
+ * @iwmr: pointer to memory window info
+ */
+static int irdma_hw_alloc_mw(struct irdma_device *iwdev, struct irdma_mr *iwmr)
+{
+ struct irdma_mw_alloc_info *info;
+ struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.mw_alloc.info;
+ memset(info, 0, sizeof(*info));
+ if (iwmr->ibmw.type == IB_MW_TYPE_1)
+ info->mw_wide = true;
+
+ info->page_size = PAGE_SIZE;
+ info->mw_stag_index = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S;
+ info->pd_id = iwpd->sc_pd.pd_id;
+ info->remote_access = true;
+ cqp_info->cqp_cmd = IRDMA_OP_MW_ALLOC;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.mw_alloc.dev = &iwdev->rf->sc_dev;
+ cqp_info->in.u.mw_alloc.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_alloc_mw - Allocate memory window
+ * @ibmw: Memory Window
+ * @udata: user data pointer
+ */
+static int irdma_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ibmw->device);
+ struct irdma_mr *iwmr = to_iwmw(ibmw);
+ int err_code;
+ u32 stag;
+
+ stag = irdma_create_stag(iwdev);
+ if (!stag)
+ return -ENOMEM;
+
+ iwmr->stag = stag;
+ ibmw->rkey = stag;
+
+ err_code = irdma_hw_alloc_mw(iwdev, iwmr);
+ if (err_code) {
+ irdma_free_stag(iwdev, stag);
+ return err_code;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_dealloc_mw - Dealloc memory window
+ * @ibmw: memory window structure.
+ */
+static int irdma_dealloc_mw(struct ib_mw *ibmw)
+{
+ struct ib_pd *ibpd = ibmw->pd;
+ struct irdma_pd *iwpd = to_iwpd(ibpd);
+ struct irdma_mr *iwmr = to_iwmr((struct ib_mr *)ibmw);
+ struct irdma_device *iwdev = to_iwdev(ibmw->device);
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_dealloc_stag_info *info;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.dealloc_stag.info;
+ memset(info, 0, sizeof(*info));
+ info->pd_id = iwpd->sc_pd.pd_id;
+ info->stag_idx = ibmw->rkey >> IRDMA_CQPSQ_STAG_IDX_S;
+ info->mr = false;
+ cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev;
+ cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
+ irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+ irdma_free_stag(iwdev, iwmr->stag);
+
+ return 0;
+}
+
+/**
+ * irdma_hw_alloc_stag - cqp command to allocate stag
+ * @iwdev: irdma device
+ * @iwmr: irdma mr pointer
+ */
+static int irdma_hw_alloc_stag(struct irdma_device *iwdev,
+ struct irdma_mr *iwmr)
+{
+ struct irdma_allocate_stag_info *info;
+ struct ib_pd *pd = iwmr->ibmr.pd;
+ struct irdma_pd *iwpd = to_iwpd(pd);
+ int status;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.alloc_stag.info;
+ info->page_size = PAGE_SIZE;
+ info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S;
+ info->pd_id = iwpd->sc_pd.pd_id;
+ info->total_len = iwmr->len;
+ info->remote_access = true;
+ cqp_info->cqp_cmd = IRDMA_OP_ALLOC_STAG;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.alloc_stag.dev = &iwdev->rf->sc_dev;
+ cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+ if (status)
+ return status;
+
+ iwmr->is_hwreg = true;
+ return 0;
+}
+
+/**
+ * irdma_alloc_mr - register stag for fast memory registration
+ * @pd: ibpd pointer
+ * @mr_type: memory for stag registrion
+ * @max_num_sg: man number of pages
+ */
+static struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_pble_alloc *palloc;
+ struct irdma_pbl *iwpbl;
+ struct irdma_mr *iwmr;
+ u32 stag;
+ int err_code;
+
+ iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+ if (!iwmr)
+ return ERR_PTR(-ENOMEM);
+
+ stag = irdma_create_stag(iwdev);
+ if (!stag) {
+ err_code = -ENOMEM;
+ goto err;
+ }
+
+ iwmr->stag = stag;
+ iwmr->ibmr.rkey = stag;
+ iwmr->ibmr.lkey = stag;
+ iwmr->ibmr.pd = pd;
+ iwmr->ibmr.device = pd->device;
+ iwpbl = &iwmr->iwpbl;
+ iwpbl->iwmr = iwmr;
+ iwmr->type = IRDMA_MEMREG_TYPE_MEM;
+ palloc = &iwpbl->pble_alloc;
+ iwmr->page_cnt = max_num_sg;
+ /* Use system PAGE_SIZE as the sg page sizes are unknown at this point */
+ iwmr->len = max_num_sg * PAGE_SIZE;
+ err_code = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt,
+ false);
+ if (err_code)
+ goto err_get_pble;
+
+ err_code = irdma_hw_alloc_stag(iwdev, iwmr);
+ if (err_code)
+ goto err_alloc_stag;
+
+ iwpbl->pbl_allocated = true;
+
+ return &iwmr->ibmr;
+err_alloc_stag:
+ irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
+err_get_pble:
+ irdma_free_stag(iwdev, stag);
+err:
+ kfree(iwmr);
+
+ return ERR_PTR(err_code);
+}
+
+/**
+ * irdma_set_page - populate pbl list for fmr
+ * @ibmr: ib mem to access iwarp mr pointer
+ * @addr: page dma address fro pbl list
+ */
+static int irdma_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct irdma_mr *iwmr = to_iwmr(ibmr);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ u64 *pbl;
+
+ if (unlikely(iwmr->npages == iwmr->page_cnt))
+ return -ENOMEM;
+
+ if (palloc->level == PBLE_LEVEL_2) {
+ struct irdma_pble_info *palloc_info =
+ palloc->level2.leaf + (iwmr->npages >> PBLE_512_SHIFT);
+
+ palloc_info->addr[iwmr->npages & (PBLE_PER_PAGE - 1)] = addr;
+ } else {
+ pbl = palloc->level1.addr;
+ pbl[iwmr->npages] = addr;
+ }
+ iwmr->npages++;
+
+ return 0;
+}
+
+/**
+ * irdma_map_mr_sg - map of sg list for fmr
+ * @ibmr: ib mem to access iwarp mr pointer
+ * @sg: scatter gather list
+ * @sg_nents: number of sg pages
+ * @sg_offset: scatter gather list for fmr
+ */
+static int irdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset)
+{
+ struct irdma_mr *iwmr = to_iwmr(ibmr);
+
+ iwmr->npages = 0;
+
+ return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, irdma_set_page);
+}
+
+/**
+ * irdma_hwreg_mr - send cqp command for memory registration
+ * @iwdev: irdma device
+ * @iwmr: irdma mr pointer
+ * @access: access for MR
+ */
+static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr,
+ u16 access)
+{
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_reg_ns_stag_info *stag_info;
+ struct ib_pd *pd = iwmr->ibmr.pd;
+ struct irdma_pd *iwpd = to_iwpd(pd);
+ struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int ret;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ stag_info = &cqp_info->in.u.mr_reg_non_shared.info;
+ stag_info->va = iwpbl->user_base;
+ stag_info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S;
+ stag_info->stag_key = (u8)iwmr->stag;
+ stag_info->total_len = iwmr->len;
+ stag_info->access_rights = irdma_get_mr_access(access,
+ iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev);
+ if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_ATOMIC_OPS)
+ stag_info->remote_atomics_en = (access & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
+ stag_info->pd_id = iwpd->sc_pd.pd_id;
+ stag_info->all_memory = iwmr->dma_mr;
+ if (stag_info->access_rights & IRDMA_ACCESS_FLAGS_ZERO_BASED)
+ stag_info->addr_type = IRDMA_ADDR_TYPE_ZERO_BASED;
+ else
+ stag_info->addr_type = IRDMA_ADDR_TYPE_VA_BASED;
+ stag_info->page_size = iwmr->page_size;
+
+ if (iwpbl->pbl_allocated) {
+ if (palloc->level == PBLE_LEVEL_1) {
+ stag_info->first_pm_pbl_index = palloc->level1.idx;
+ stag_info->chunk_size = 1;
+ } else {
+ stag_info->first_pm_pbl_index = palloc->level2.root.idx;
+ stag_info->chunk_size = 3;
+ }
+ } else {
+ stag_info->reg_addr_pa = iwmr->pgaddrmem[0];
+ }
+
+ cqp_info->cqp_cmd = IRDMA_OP_MR_REG_NON_SHARED;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->rf->sc_dev;
+ cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request;
+ ret = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+
+ if (!ret)
+ iwmr->is_hwreg = true;
+
+ return ret;
+}
+
+static int irdma_reg_user_mr_type_mem(struct irdma_mr *iwmr, int access,
+ bool create_stag)
+{
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ u32 stag = 0;
+ u8 lvl;
+ int err;
+
+ lvl = iwmr->page_cnt != 1 ? PBLE_LEVEL_1 | PBLE_LEVEL_2 : PBLE_LEVEL_0;
+
+ err = irdma_setup_pbles(iwdev->rf, iwmr, lvl);
+ if (err)
+ return err;
+
+ if (lvl) {
+ err = irdma_check_mr_contiguous(&iwpbl->pble_alloc,
+ iwmr->page_size);
+ if (err) {
+ irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc);
+ iwpbl->pbl_allocated = false;
+ }
+ }
+
+ if (create_stag) {
+ stag = irdma_create_stag(iwdev);
+ if (!stag) {
+ err = -ENOMEM;
+ goto free_pble;
+ }
+
+ iwmr->stag = stag;
+ iwmr->ibmr.rkey = stag;
+ iwmr->ibmr.lkey = stag;
+ }
+
+ err = irdma_hwreg_mr(iwdev, iwmr, access);
+ if (err)
+ goto err_hwreg;
+
+ return 0;
+
+err_hwreg:
+ if (stag)
+ irdma_free_stag(iwdev, stag);
+
+free_pble:
+ if (iwpbl->pble_alloc.level != PBLE_LEVEL_0 && iwpbl->pbl_allocated)
+ irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc);
+
+ return err;
+}
+
+static struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region,
+ struct ib_pd *pd, u64 virt,
+ enum irdma_memreg_type reg_type)
+{
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_pbl *iwpbl;
+ struct irdma_mr *iwmr;
+ unsigned long pgsz_bitmap;
+
+ iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+ if (!iwmr)
+ return ERR_PTR(-ENOMEM);
+
+ iwpbl = &iwmr->iwpbl;
+ iwpbl->iwmr = iwmr;
+ iwmr->region = region;
+ iwmr->ibmr.pd = pd;
+ iwmr->ibmr.device = pd->device;
+ iwmr->ibmr.iova = virt;
+ iwmr->type = reg_type;
+
+ pgsz_bitmap = (reg_type == IRDMA_MEMREG_TYPE_MEM) ?
+ iwdev->rf->sc_dev.hw_attrs.page_size_cap : SZ_4K;
+
+ iwmr->page_size = ib_umem_find_best_pgsz(region, pgsz_bitmap, virt);
+ if (unlikely(!iwmr->page_size)) {
+ kfree(iwmr);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ iwmr->len = region->length;
+ iwpbl->user_base = virt;
+ iwmr->page_cnt = ib_umem_num_dma_blocks(region, iwmr->page_size);
+
+ return iwmr;
+}
+
+static void irdma_free_iwmr(struct irdma_mr *iwmr)
+{
+ kfree(iwmr);
+}
+
+static int irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req,
+ struct ib_udata *udata,
+ struct irdma_mr *iwmr)
+{
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_ucontext *ucontext = NULL;
+ unsigned long flags;
+ u32 total;
+ int err;
+ u8 lvl;
+
+ /* iWarp: Catch page not starting on OS page boundary */
+ if (!rdma_protocol_roce(&iwdev->ibdev, 1) &&
+ ib_umem_offset(iwmr->region))
+ return -EINVAL;
+
+ total = req.sq_pages + req.rq_pages + 1;
+ if (total > iwmr->page_cnt)
+ return -EINVAL;
+
+ total = req.sq_pages + req.rq_pages;
+ lvl = total > 2 ? PBLE_LEVEL_1 : PBLE_LEVEL_0;
+ err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl);
+ if (err)
+ return err;
+
+ ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+ spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+ list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
+ iwpbl->on_list = true;
+ spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+
+ return 0;
+}
+
+static int irdma_reg_user_mr_type_srq(struct irdma_mem_reg_req req,
+ struct ib_udata *udata,
+ struct irdma_mr *iwmr)
+{
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_ucontext *ucontext;
+ unsigned long flags;
+ u32 total;
+ int err;
+ u8 lvl;
+
+ total = req.rq_pages + IRDMA_SHADOW_PGCNT;
+ if (total > iwmr->page_cnt)
+ return -EINVAL;
+
+ lvl = req.rq_pages > 1 ? PBLE_LEVEL_1 : PBLE_LEVEL_0;
+ err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl);
+ if (err)
+ return err;
+
+ ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+ spin_lock_irqsave(&ucontext->srq_reg_mem_list_lock, flags);
+ list_add_tail(&iwpbl->list, &ucontext->srq_reg_mem_list);
+ iwpbl->on_list = true;
+ spin_unlock_irqrestore(&ucontext->srq_reg_mem_list_lock, flags);
+
+ return 0;
+}
+
+static int irdma_reg_user_mr_type_cq(struct irdma_mem_reg_req req,
+ struct ib_udata *udata,
+ struct irdma_mr *iwmr)
+{
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_ucontext *ucontext = NULL;
+ u8 shadow_pgcnt = 1;
+ unsigned long flags;
+ u32 total;
+ int err;
+ u8 lvl;
+
+ if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE)
+ shadow_pgcnt = 0;
+ total = req.cq_pages + shadow_pgcnt;
+ if (total > iwmr->page_cnt)
+ return -EINVAL;
+
+ lvl = req.cq_pages > 1 ? PBLE_LEVEL_1 : PBLE_LEVEL_0;
+ err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl);
+ if (err)
+ return err;
+
+ ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext,
+ ibucontext);
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
+ iwpbl->on_list = true;
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+
+ return 0;
+}
+
+/**
+ * irdma_reg_user_mr - Register a user memory region
+ * @pd: ptr of pd
+ * @start: virtual start address
+ * @len: length of mr
+ * @virt: virtual address
+ * @access: access of mr
+ * @dmah: dma handle
+ * @udata: user data
+ */
+static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
+ u64 virt, int access,
+ struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+#define IRDMA_MEM_REG_MIN_REQ_LEN offsetofend(struct irdma_mem_reg_req, sq_pages)
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_mem_reg_req req = {};
+ struct ib_umem *region = NULL;
+ struct irdma_mr *iwmr = NULL;
+ int err;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
+ return ERR_PTR(-EINVAL);
+
+ if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN)
+ return ERR_PTR(-EINVAL);
+
+ region = ib_umem_get(pd->device, start, len, access);
+
+ if (IS_ERR(region)) {
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: Failed to create ib_umem region\n");
+ return (struct ib_mr *)region;
+ }
+
+ if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) {
+ ib_umem_release(region);
+ return ERR_PTR(-EFAULT);
+ }
+
+ iwmr = irdma_alloc_iwmr(region, pd, virt, req.reg_type);
+ if (IS_ERR(iwmr)) {
+ ib_umem_release(region);
+ return (struct ib_mr *)iwmr;
+ }
+
+ switch (req.reg_type) {
+ case IRDMA_MEMREG_TYPE_QP:
+ err = irdma_reg_user_mr_type_qp(req, udata, iwmr);
+ if (err)
+ goto error;
+
+ break;
+ case IRDMA_MEMREG_TYPE_SRQ:
+ err = irdma_reg_user_mr_type_srq(req, udata, iwmr);
+ if (err)
+ goto error;
+
+ break;
+ case IRDMA_MEMREG_TYPE_CQ:
+ err = irdma_reg_user_mr_type_cq(req, udata, iwmr);
+ if (err)
+ goto error;
+ break;
+ case IRDMA_MEMREG_TYPE_MEM:
+ err = irdma_reg_user_mr_type_mem(iwmr, access, true);
+ if (err)
+ goto error;
+
+ break;
+ default:
+ err = -EINVAL;
+ goto error;
+ }
+
+ return &iwmr->ibmr;
+error:
+ ib_umem_release(region);
+ irdma_free_iwmr(iwmr);
+
+ return ERR_PTR(err);
+}
+
+static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
+ u64 len, u64 virt,
+ int fd, int access,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct irdma_mr *iwmr;
+ int err;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
+ return ERR_PTR(-EINVAL);
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(pd->device, start, len, fd, access);
+ if (IS_ERR(umem_dmabuf)) {
+ ibdev_dbg(&iwdev->ibdev, "Failed to get dmabuf umem[%pe]\n",
+ umem_dmabuf);
+ return ERR_CAST(umem_dmabuf);
+ }
+
+ iwmr = irdma_alloc_iwmr(&umem_dmabuf->umem, pd, virt, IRDMA_MEMREG_TYPE_MEM);
+ if (IS_ERR(iwmr)) {
+ err = PTR_ERR(iwmr);
+ goto err_release;
+ }
+
+ err = irdma_reg_user_mr_type_mem(iwmr, access, true);
+ if (err)
+ goto err_iwmr;
+
+ return &iwmr->ibmr;
+
+err_iwmr:
+ irdma_free_iwmr(iwmr);
+
+err_release:
+ ib_umem_release(&umem_dmabuf->umem);
+
+ return ERR_PTR(err);
+}
+
+static int irdma_hwdereg_mr(struct ib_mr *ib_mr)
+{
+ struct irdma_device *iwdev = to_iwdev(ib_mr->device);
+ struct irdma_mr *iwmr = to_iwmr(ib_mr);
+ struct irdma_pd *iwpd = to_iwpd(ib_mr->pd);
+ struct irdma_dealloc_stag_info *info;
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ /* Skip HW MR de-register when it is already de-registered
+ * during an MR re-reregister and the re-registration fails
+ */
+ if (!iwmr->is_hwreg)
+ return 0;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.dealloc_stag.info;
+ info->pd_id = iwpd->sc_pd.pd_id;
+ info->stag_idx = ib_mr->rkey >> IRDMA_CQPSQ_STAG_IDX_S;
+ info->mr = true;
+ if (iwpbl->pbl_allocated)
+ info->dealloc_pbl = true;
+
+ cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev;
+ cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+ if (status)
+ return status;
+
+ iwmr->is_hwreg = false;
+ return 0;
+}
+
+/*
+ * irdma_rereg_mr_trans - Re-register a user MR for a change translation.
+ * @iwmr: ptr of iwmr
+ * @start: virtual start address
+ * @len: length of mr
+ * @virt: virtual address
+ *
+ * Re-register a user memory region when a change translation is requested.
+ * Re-register a new region while reusing the stag from the original registration.
+ */
+static int irdma_rereg_mr_trans(struct irdma_mr *iwmr, u64 start, u64 len,
+ u64 virt)
+{
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct ib_pd *pd = iwmr->ibmr.pd;
+ struct ib_umem *region;
+ int err;
+
+ region = ib_umem_get(pd->device, start, len, iwmr->access);
+ if (IS_ERR(region))
+ return PTR_ERR(region);
+
+ iwmr->region = region;
+ iwmr->ibmr.iova = virt;
+ iwmr->ibmr.pd = pd;
+ iwmr->page_size = ib_umem_find_best_pgsz(region,
+ iwdev->rf->sc_dev.hw_attrs.page_size_cap,
+ virt);
+ if (unlikely(!iwmr->page_size)) {
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ iwmr->len = region->length;
+ iwpbl->user_base = virt;
+ iwmr->page_cnt = ib_umem_num_dma_blocks(region, iwmr->page_size);
+
+ err = irdma_reg_user_mr_type_mem(iwmr, iwmr->access, false);
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ ib_umem_release(region);
+ return err;
+}
+
+/*
+ * irdma_rereg_user_mr - Re-Register a user memory region(MR)
+ * @ibmr: ib mem to access iwarp mr pointer
+ * @flags: bit mask to indicate which of the attr's of MR modified
+ * @start: virtual start address
+ * @len: length of mr
+ * @virt: virtual address
+ * @new_access: bit mask of access flags
+ * @new_pd: ptr of pd
+ * @udata: user data
+ *
+ * Return:
+ * NULL - Success, existing MR updated
+ * ERR_PTR - error occurred
+ */
+static struct ib_mr *irdma_rereg_user_mr(struct ib_mr *ib_mr, int flags,
+ u64 start, u64 len, u64 virt,
+ int new_access, struct ib_pd *new_pd,
+ struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ib_mr->device);
+ struct irdma_mr *iwmr = to_iwmr(ib_mr);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ int ret;
+
+ if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
+ return ERR_PTR(-EINVAL);
+
+ if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ret = irdma_hwdereg_mr(ib_mr);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (flags & IB_MR_REREG_ACCESS)
+ iwmr->access = new_access;
+
+ if (flags & IB_MR_REREG_PD) {
+ iwmr->ibmr.pd = new_pd;
+ iwmr->ibmr.device = new_pd->device;
+ }
+
+ if (flags & IB_MR_REREG_TRANS) {
+ if (iwpbl->pbl_allocated) {
+ irdma_free_pble(iwdev->rf->pble_rsrc,
+ &iwpbl->pble_alloc);
+ iwpbl->pbl_allocated = false;
+ }
+ if (iwmr->region) {
+ ib_umem_release(iwmr->region);
+ iwmr->region = NULL;
+ }
+
+ ret = irdma_rereg_mr_trans(iwmr, start, len, virt);
+ } else
+ ret = irdma_hwreg_mr(iwdev, iwmr, iwmr->access);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return NULL;
+}
+
+/**
+ * irdma_reg_phys_mr - register kernel physical memory
+ * @pd: ibpd pointer
+ * @addr: physical address of memory to register
+ * @size: size of memory to register
+ * @access: Access rights
+ * @iova_start: start of virtual address for physical buffers
+ * @dma_mr: Flag indicating whether this region is a PD DMA MR
+ */
+struct ib_mr *irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access,
+ u64 *iova_start, bool dma_mr)
+{
+ struct irdma_device *iwdev = to_iwdev(pd->device);
+ struct irdma_pbl *iwpbl;
+ struct irdma_mr *iwmr;
+ u32 stag;
+ int ret;
+
+ iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
+ if (!iwmr)
+ return ERR_PTR(-ENOMEM);
+
+ iwmr->ibmr.pd = pd;
+ iwmr->ibmr.device = pd->device;
+ iwpbl = &iwmr->iwpbl;
+ iwpbl->iwmr = iwmr;
+ iwmr->type = IRDMA_MEMREG_TYPE_MEM;
+ iwmr->dma_mr = dma_mr;
+ iwpbl->user_base = *iova_start;
+ stag = irdma_create_stag(iwdev);
+ if (!stag) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ iwmr->stag = stag;
+ iwmr->ibmr.iova = *iova_start;
+ iwmr->ibmr.rkey = stag;
+ iwmr->ibmr.lkey = stag;
+ iwmr->page_cnt = 1;
+ iwmr->pgaddrmem[0] = addr;
+ iwmr->len = size;
+ iwmr->page_size = SZ_4K;
+ ret = irdma_hwreg_mr(iwdev, iwmr, access);
+ if (ret) {
+ irdma_free_stag(iwdev, stag);
+ goto err;
+ }
+
+ return &iwmr->ibmr;
+
+err:
+ kfree(iwmr);
+
+ return ERR_PTR(ret);
+}
+
+/**
+ * irdma_get_dma_mr - register physical mem
+ * @pd: ptr of pd
+ * @acc: access for memory
+ */
+static struct ib_mr *irdma_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ u64 kva = 0;
+
+ return irdma_reg_phys_mr(pd, 0, 0, acc, &kva, true);
+}
+
+/**
+ * irdma_del_memlist - Deleting pbl list entries for CQ/QP
+ * @iwmr: iwmr for IB's user page addresses
+ * @ucontext: ptr to user context
+ */
+static void irdma_del_memlist(struct irdma_mr *iwmr,
+ struct irdma_ucontext *ucontext)
+{
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ unsigned long flags;
+
+ switch (iwmr->type) {
+ case IRDMA_MEMREG_TYPE_CQ:
+ spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
+ if (iwpbl->on_list) {
+ iwpbl->on_list = false;
+ list_del(&iwpbl->list);
+ }
+ spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
+ break;
+ case IRDMA_MEMREG_TYPE_QP:
+ spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+ if (iwpbl->on_list) {
+ iwpbl->on_list = false;
+ list_del(&iwpbl->list);
+ }
+ spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+ break;
+ case IRDMA_MEMREG_TYPE_SRQ:
+ spin_lock_irqsave(&ucontext->srq_reg_mem_list_lock, flags);
+ if (iwpbl->on_list) {
+ iwpbl->on_list = false;
+ list_del(&iwpbl->list);
+ }
+ spin_unlock_irqrestore(&ucontext->srq_reg_mem_list_lock, flags);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * irdma_dereg_mr - deregister mr
+ * @ib_mr: mr ptr for dereg
+ * @udata: user data
+ */
+static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
+{
+ struct irdma_mr *iwmr = to_iwmr(ib_mr);
+ struct irdma_device *iwdev = to_iwdev(ib_mr->device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ int ret;
+
+ if (iwmr->type != IRDMA_MEMREG_TYPE_MEM) {
+ if (iwmr->region) {
+ struct irdma_ucontext *ucontext;
+
+ ucontext = rdma_udata_to_drv_context(udata,
+ struct irdma_ucontext,
+ ibucontext);
+ irdma_del_memlist(iwmr, ucontext);
+ }
+ goto done;
+ }
+
+ ret = irdma_hwdereg_mr(ib_mr);
+ if (ret)
+ return ret;
+
+ irdma_free_stag(iwdev, iwmr->stag);
+done:
+ if (iwpbl->pbl_allocated)
+ irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc);
+
+ if (iwmr->region)
+ ib_umem_release(iwmr->region);
+
+ kfree(iwmr);
+
+ return 0;
+}
+
+/**
+ * irdma_post_send - kernel application wr
+ * @ibqp: qp ptr for wr
+ * @ib_wr: work request ptr
+ * @bad_wr: return of bad wr if err
+ */
+static int irdma_post_send(struct ib_qp *ibqp,
+ const struct ib_send_wr *ib_wr,
+ const struct ib_send_wr **bad_wr)
+{
+ struct irdma_qp *iwqp;
+ struct irdma_qp_uk *ukqp;
+ struct irdma_sc_dev *dev;
+ struct irdma_post_sq_info info;
+ int err = 0;
+ unsigned long flags;
+ bool inv_stag;
+ struct irdma_ah *ah;
+
+ iwqp = to_iwqp(ibqp);
+ ukqp = &iwqp->sc_qp.qp_uk;
+ dev = &iwqp->iwdev->rf->sc_dev;
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ while (ib_wr) {
+ memset(&info, 0, sizeof(info));
+ inv_stag = false;
+ info.wr_id = (ib_wr->wr_id);
+ if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all)
+ info.signaled = true;
+ if (ib_wr->send_flags & IB_SEND_FENCE)
+ info.read_fence = true;
+ switch (ib_wr->opcode) {
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ if (unlikely(!(dev->hw_attrs.uk_attrs.feature_flags &
+ IRDMA_FEATURE_ATOMIC_OPS))) {
+ err = -EINVAL;
+ break;
+ }
+ info.op_type = IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP;
+ info.op.atomic_compare_swap.tagged_offset = ib_wr->sg_list[0].addr;
+ info.op.atomic_compare_swap.remote_tagged_offset =
+ atomic_wr(ib_wr)->remote_addr;
+ info.op.atomic_compare_swap.swap_data_bytes = atomic_wr(ib_wr)->swap;
+ info.op.atomic_compare_swap.compare_data_bytes =
+ atomic_wr(ib_wr)->compare_add;
+ info.op.atomic_compare_swap.stag = ib_wr->sg_list[0].lkey;
+ info.op.atomic_compare_swap.remote_stag = atomic_wr(ib_wr)->rkey;
+ err = irdma_uk_atomic_compare_swap(ukqp, &info, false);
+ break;
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ if (unlikely(!(dev->hw_attrs.uk_attrs.feature_flags &
+ IRDMA_FEATURE_ATOMIC_OPS))) {
+ err = -EINVAL;
+ break;
+ }
+ info.op_type = IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD;
+ info.op.atomic_fetch_add.tagged_offset = ib_wr->sg_list[0].addr;
+ info.op.atomic_fetch_add.remote_tagged_offset =
+ atomic_wr(ib_wr)->remote_addr;
+ info.op.atomic_fetch_add.fetch_add_data_bytes =
+ atomic_wr(ib_wr)->compare_add;
+ info.op.atomic_fetch_add.stag = ib_wr->sg_list[0].lkey;
+ info.op.atomic_fetch_add.remote_stag =
+ atomic_wr(ib_wr)->rkey;
+ err = irdma_uk_atomic_fetch_add(ukqp, &info, false);
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ if (ukqp->qp_caps & IRDMA_SEND_WITH_IMM) {
+ info.imm_data_valid = true;
+ info.imm_data = ntohl(ib_wr->ex.imm_data);
+ } else {
+ err = -EINVAL;
+ break;
+ }
+ fallthrough;
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_INV:
+ if (ib_wr->opcode == IB_WR_SEND ||
+ ib_wr->opcode == IB_WR_SEND_WITH_IMM) {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ info.op_type = IRDMA_OP_TYPE_SEND_SOL;
+ else
+ info.op_type = IRDMA_OP_TYPE_SEND;
+ } else {
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ info.op_type = IRDMA_OP_TYPE_SEND_SOL_INV;
+ else
+ info.op_type = IRDMA_OP_TYPE_SEND_INV;
+ info.stag_to_inv = ib_wr->ex.invalidate_rkey;
+ }
+
+ info.op.send.num_sges = ib_wr->num_sge;
+ info.op.send.sg_list = ib_wr->sg_list;
+ if (iwqp->ibqp.qp_type == IB_QPT_UD ||
+ iwqp->ibqp.qp_type == IB_QPT_GSI) {
+ ah = to_iwah(ud_wr(ib_wr)->ah);
+ info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx;
+ info.op.send.qkey = ud_wr(ib_wr)->remote_qkey;
+ info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn;
+ }
+
+ if (ib_wr->send_flags & IB_SEND_INLINE)
+ err = irdma_uk_inline_send(ukqp, &info, false);
+ else
+ err = irdma_uk_send(ukqp, &info, false);
+ break;
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ if (ukqp->qp_caps & IRDMA_WRITE_WITH_IMM) {
+ info.imm_data_valid = true;
+ info.imm_data = ntohl(ib_wr->ex.imm_data);
+ } else {
+ err = -EINVAL;
+ break;
+ }
+ fallthrough;
+ case IB_WR_RDMA_WRITE:
+ if (ib_wr->send_flags & IB_SEND_SOLICITED)
+ info.op_type = IRDMA_OP_TYPE_RDMA_WRITE_SOL;
+ else
+ info.op_type = IRDMA_OP_TYPE_RDMA_WRITE;
+
+ info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
+ info.op.rdma_write.lo_sg_list = ib_wr->sg_list;
+ info.op.rdma_write.rem_addr.addr =
+ rdma_wr(ib_wr)->remote_addr;
+ info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey;
+ if (ib_wr->send_flags & IB_SEND_INLINE)
+ err = irdma_uk_inline_rdma_write(ukqp, &info, false);
+ else
+ err = irdma_uk_rdma_write(ukqp, &info, false);
+ break;
+ case IB_WR_RDMA_READ_WITH_INV:
+ inv_stag = true;
+ fallthrough;
+ case IB_WR_RDMA_READ:
+ if (ib_wr->num_sge >
+ dev->hw_attrs.uk_attrs.max_hw_read_sges) {
+ err = -EINVAL;
+ break;
+ }
+ info.op_type = IRDMA_OP_TYPE_RDMA_READ;
+ info.op.rdma_read.rem_addr.addr = rdma_wr(ib_wr)->remote_addr;
+ info.op.rdma_read.rem_addr.lkey = rdma_wr(ib_wr)->rkey;
+ info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list;
+ info.op.rdma_read.num_lo_sges = ib_wr->num_sge;
+ err = irdma_uk_rdma_read(ukqp, &info, inv_stag, false);
+ break;
+ case IB_WR_LOCAL_INV:
+ info.op_type = IRDMA_OP_TYPE_INV_STAG;
+ info.local_fence = true;
+ info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
+ err = irdma_uk_stag_local_invalidate(ukqp, &info, true);
+ break;
+ case IB_WR_REG_MR: {
+ struct irdma_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr);
+ struct irdma_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc;
+ struct irdma_fast_reg_stag_info stag_info = {};
+
+ stag_info.signaled = info.signaled;
+ stag_info.read_fence = info.read_fence;
+ stag_info.access_rights =
+ irdma_get_mr_access(reg_wr(ib_wr)->access,
+ dev->hw_attrs.uk_attrs.hw_rev);
+ stag_info.stag_key = reg_wr(ib_wr)->key & 0xff;
+ stag_info.stag_idx = reg_wr(ib_wr)->key >> 8;
+ stag_info.page_size = reg_wr(ib_wr)->mr->page_size;
+ stag_info.wr_id = ib_wr->wr_id;
+ stag_info.addr_type = IRDMA_ADDR_TYPE_VA_BASED;
+ stag_info.va = (void *)(uintptr_t)iwmr->ibmr.iova;
+ stag_info.total_len = iwmr->ibmr.length;
+ stag_info.reg_addr_pa = *palloc->level1.addr;
+ stag_info.first_pm_pbl_index = palloc->level1.idx;
+ stag_info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
+ if (iwmr->npages > IRDMA_MIN_PAGES_PER_FMR)
+ stag_info.chunk_size = 1;
+ err = irdma_sc_mr_fast_register(&iwqp->sc_qp, &stag_info,
+ true);
+ break;
+ }
+ default:
+ err = -EINVAL;
+ ibdev_dbg(&iwqp->iwdev->ibdev,
+ "VERBS: upost_send bad opcode = 0x%x\n",
+ ib_wr->opcode);
+ break;
+ }
+
+ if (err)
+ break;
+ ib_wr = ib_wr->next;
+ }
+
+ if (!iwqp->flush_issued) {
+ if (iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS)
+ irdma_uk_qp_post_wr(ukqp);
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ } else {
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
+ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
+ }
+
+ if (err)
+ *bad_wr = ib_wr;
+
+ return err;
+}
+
+/**
+ * irdma_post_srq_recv - post receive wr for kernel application
+ * @ibsrq: ib srq pointer
+ * @ib_wr: work request for receive
+ * @bad_wr: bad wr caused an error
+ */
+static int irdma_post_srq_recv(struct ib_srq *ibsrq,
+ const struct ib_recv_wr *ib_wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct irdma_srq *iwsrq = to_iwsrq(ibsrq);
+ struct irdma_srq_uk *uksrq = &iwsrq->sc_srq.srq_uk;
+ struct irdma_post_rq_info post_recv = {};
+ unsigned long flags;
+ int err = 0;
+
+ spin_lock_irqsave(&iwsrq->lock, flags);
+ while (ib_wr) {
+ if (ib_wr->num_sge > uksrq->max_srq_frag_cnt) {
+ err = -EINVAL;
+ goto out;
+ }
+ post_recv.num_sges = ib_wr->num_sge;
+ post_recv.wr_id = ib_wr->wr_id;
+ post_recv.sg_list = ib_wr->sg_list;
+ err = irdma_uk_srq_post_receive(uksrq, &post_recv);
+ if (err)
+ goto out;
+
+ ib_wr = ib_wr->next;
+ }
+
+out:
+ spin_unlock_irqrestore(&iwsrq->lock, flags);
+
+ if (err)
+ *bad_wr = ib_wr;
+
+ return err;
+}
+
+/**
+ * irdma_post_recv - post receive wr for kernel application
+ * @ibqp: ib qp pointer
+ * @ib_wr: work request for receive
+ * @bad_wr: bad wr caused an error
+ */
+static int irdma_post_recv(struct ib_qp *ibqp,
+ const struct ib_recv_wr *ib_wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct irdma_qp *iwqp;
+ struct irdma_qp_uk *ukqp;
+ struct irdma_post_rq_info post_recv = {};
+ unsigned long flags;
+ int err = 0;
+
+ iwqp = to_iwqp(ibqp);
+ ukqp = &iwqp->sc_qp.qp_uk;
+
+ if (ukqp->srq_uk) {
+ *bad_wr = ib_wr;
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&iwqp->lock, flags);
+ while (ib_wr) {
+ post_recv.num_sges = ib_wr->num_sge;
+ post_recv.wr_id = ib_wr->wr_id;
+ post_recv.sg_list = ib_wr->sg_list;
+ err = irdma_uk_post_receive(ukqp, &post_recv);
+ if (err) {
+ ibdev_dbg(&iwqp->iwdev->ibdev,
+ "VERBS: post_recv err %d\n", err);
+ goto out;
+ }
+
+ ib_wr = ib_wr->next;
+ }
+
+out:
+ spin_unlock_irqrestore(&iwqp->lock, flags);
+ if (iwqp->flush_issued)
+ mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
+ msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
+
+ if (err)
+ *bad_wr = ib_wr;
+
+ return err;
+}
+
+/**
+ * irdma_flush_err_to_ib_wc_status - return change flush error code to IB status
+ * @opcode: iwarp flush code
+ */
+static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode opcode)
+{
+ switch (opcode) {
+ case FLUSH_PROT_ERR:
+ return IB_WC_LOC_PROT_ERR;
+ case FLUSH_REM_ACCESS_ERR:
+ return IB_WC_REM_ACCESS_ERR;
+ case FLUSH_LOC_QP_OP_ERR:
+ return IB_WC_LOC_QP_OP_ERR;
+ case FLUSH_REM_OP_ERR:
+ return IB_WC_REM_OP_ERR;
+ case FLUSH_LOC_LEN_ERR:
+ return IB_WC_LOC_LEN_ERR;
+ case FLUSH_GENERAL_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ case FLUSH_RETRY_EXC_ERR:
+ return IB_WC_RETRY_EXC_ERR;
+ case FLUSH_MW_BIND_ERR:
+ return IB_WC_MW_BIND_ERR;
+ case FLUSH_REM_INV_REQ_ERR:
+ return IB_WC_REM_INV_REQ_ERR;
+ case FLUSH_RNR_RETRY_EXC_ERR:
+ return IB_WC_RNR_RETRY_EXC_ERR;
+ case FLUSH_FATAL_ERR:
+ default:
+ return IB_WC_FATAL_ERR;
+ }
+}
+
+/**
+ * irdma_process_cqe - process cqe info
+ * @entry: processed cqe
+ * @cq_poll_info: cqe info
+ */
+static void irdma_process_cqe(struct ib_wc *entry,
+ struct irdma_cq_poll_info *cq_poll_info)
+{
+ struct irdma_sc_qp *qp;
+
+ entry->wc_flags = 0;
+ entry->pkey_index = 0;
+ entry->wr_id = cq_poll_info->wr_id;
+
+ qp = cq_poll_info->qp_handle;
+ entry->qp = qp->qp_uk.back_qp;
+
+ if (cq_poll_info->error) {
+ entry->status = (cq_poll_info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ?
+ irdma_flush_err_to_ib_wc_status(cq_poll_info->minor_err) : IB_WC_GENERAL_ERR;
+
+ entry->vendor_err = cq_poll_info->major_err << 16 |
+ cq_poll_info->minor_err;
+ } else {
+ entry->status = IB_WC_SUCCESS;
+ if (cq_poll_info->imm_valid) {
+ entry->ex.imm_data = htonl(cq_poll_info->imm_data);
+ entry->wc_flags |= IB_WC_WITH_IMM;
+ }
+ if (cq_poll_info->ud_smac_valid) {
+ ether_addr_copy(entry->smac, cq_poll_info->ud_smac);
+ entry->wc_flags |= IB_WC_WITH_SMAC;
+ }
+
+ if (cq_poll_info->ud_vlan_valid) {
+ u16 vlan = cq_poll_info->ud_vlan & VLAN_VID_MASK;
+
+ entry->sl = cq_poll_info->ud_vlan >> VLAN_PRIO_SHIFT;
+ if (vlan) {
+ entry->vlan_id = vlan;
+ entry->wc_flags |= IB_WC_WITH_VLAN;
+ }
+ } else {
+ entry->sl = 0;
+ }
+ }
+
+ if (cq_poll_info->q_type == IRDMA_CQE_QTYPE_SQ) {
+ set_ib_wc_op_sq(cq_poll_info, entry);
+ } else {
+ if (qp->dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2)
+ set_ib_wc_op_rq(cq_poll_info, entry,
+ qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ?
+ true : false);
+ else
+ set_ib_wc_op_rq_gen_3(cq_poll_info, entry);
+ if (qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_UD &&
+ cq_poll_info->stag_invalid_set) {
+ entry->ex.invalidate_rkey = cq_poll_info->inv_stag;
+ entry->wc_flags |= IB_WC_WITH_INVALIDATE;
+ }
+ }
+
+ if (qp->qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD) {
+ entry->src_qp = cq_poll_info->ud_src_qpn;
+ entry->slid = 0;
+ entry->wc_flags |=
+ (IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE);
+ entry->network_hdr_type = cq_poll_info->ipv4 ?
+ RDMA_NETWORK_IPV4 :
+ RDMA_NETWORK_IPV6;
+ } else {
+ entry->src_qp = cq_poll_info->qp_id;
+ }
+
+ entry->byte_len = cq_poll_info->bytes_xfered;
+}
+
+/**
+ * irdma_poll_one - poll one entry of the CQ
+ * @ukcq: ukcq to poll
+ * @cur_cqe: current CQE info to be filled in
+ * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ
+ *
+ * Returns the internal irdma device error code or 0 on success
+ */
+static inline int irdma_poll_one(struct irdma_cq_uk *ukcq,
+ struct irdma_cq_poll_info *cur_cqe,
+ struct ib_wc *entry)
+{
+ int ret = irdma_uk_cq_poll_cmpl(ukcq, cur_cqe);
+
+ if (ret)
+ return ret;
+
+ irdma_process_cqe(entry, cur_cqe);
+
+ return 0;
+}
+
+/**
+ * __irdma_poll_cq - poll cq for completion (kernel apps)
+ * @iwcq: cq to poll
+ * @num_entries: number of entries to poll
+ * @entry: wr of a completed entry
+ */
+static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc *entry)
+{
+ struct list_head *tmp_node, *list_node;
+ struct irdma_cq_buf *last_buf = NULL;
+ struct irdma_cq_poll_info *cur_cqe = &iwcq->cur_cqe;
+ struct irdma_cq_buf *cq_buf;
+ int ret;
+ struct irdma_device *iwdev;
+ struct irdma_cq_uk *ukcq;
+ bool cq_new_cqe = false;
+ int resized_bufs = 0;
+ int npolled = 0;
+
+ iwdev = to_iwdev(iwcq->ibcq.device);
+ ukcq = &iwcq->sc_cq.cq_uk;
+
+ /* go through the list of previously resized CQ buffers */
+ list_for_each_safe(list_node, tmp_node, &iwcq->resize_list) {
+ cq_buf = container_of(list_node, struct irdma_cq_buf, list);
+ while (npolled < num_entries) {
+ ret = irdma_poll_one(&cq_buf->cq_uk, cur_cqe, entry + npolled);
+ if (!ret) {
+ ++npolled;
+ cq_new_cqe = true;
+ continue;
+ }
+ if (ret == -ENOENT)
+ break;
+ /* QP using the CQ is destroyed. Skip reporting this CQE */
+ if (ret == -EFAULT) {
+ cq_new_cqe = true;
+ continue;
+ }
+ goto error;
+ }
+
+ /* save the resized CQ buffer which received the last cqe */
+ if (cq_new_cqe)
+ last_buf = cq_buf;
+ cq_new_cqe = false;
+ }
+
+ /* check the current CQ for new cqes */
+ while (npolled < num_entries) {
+ ret = irdma_poll_one(ukcq, cur_cqe, entry + npolled);
+ if (ret == -ENOENT) {
+ ret = irdma_generated_cmpls(iwcq, cur_cqe);
+ if (!ret)
+ irdma_process_cqe(entry + npolled, cur_cqe);
+ }
+ if (!ret) {
+ ++npolled;
+ cq_new_cqe = true;
+ continue;
+ }
+
+ if (ret == -ENOENT)
+ break;
+ /* QP using the CQ is destroyed. Skip reporting this CQE */
+ if (ret == -EFAULT) {
+ cq_new_cqe = true;
+ continue;
+ }
+ goto error;
+ }
+
+ if (cq_new_cqe)
+ /* all previous CQ resizes are complete */
+ resized_bufs = irdma_process_resize_list(iwcq, iwdev, NULL);
+ else if (last_buf)
+ /* only CQ resizes up to the last_buf are complete */
+ resized_bufs = irdma_process_resize_list(iwcq, iwdev, last_buf);
+ if (resized_bufs)
+ /* report to the HW the number of complete CQ resizes */
+ irdma_uk_cq_set_resized_cnt(ukcq, resized_bufs);
+
+ return npolled;
+error:
+ ibdev_dbg(&iwdev->ibdev, "%s: Error polling CQ, irdma_err: %d\n",
+ __func__, ret);
+
+ return ret;
+}
+
+/**
+ * irdma_poll_cq - poll cq for completion (kernel apps)
+ * @ibcq: cq to poll
+ * @num_entries: number of entries to poll
+ * @entry: wr of a completed entry
+ */
+static int irdma_poll_cq(struct ib_cq *ibcq, int num_entries,
+ struct ib_wc *entry)
+{
+ struct irdma_cq *iwcq;
+ unsigned long flags;
+ int ret;
+
+ iwcq = to_iwcq(ibcq);
+
+ spin_lock_irqsave(&iwcq->lock, flags);
+ ret = __irdma_poll_cq(iwcq, num_entries, entry);
+ spin_unlock_irqrestore(&iwcq->lock, flags);
+
+ return ret;
+}
+
+/**
+ * irdma_req_notify_cq - arm cq kernel application
+ * @ibcq: cq to arm
+ * @notify_flags: notofication flags
+ */
+static int irdma_req_notify_cq(struct ib_cq *ibcq,
+ enum ib_cq_notify_flags notify_flags)
+{
+ struct irdma_cq *iwcq;
+ struct irdma_cq_uk *ukcq;
+ unsigned long flags;
+ enum irdma_cmpl_notify cq_notify;
+ bool promo_event = false;
+ int ret = 0;
+
+ cq_notify = notify_flags == IB_CQ_SOLICITED ?
+ IRDMA_CQ_COMPL_SOLICITED : IRDMA_CQ_COMPL_EVENT;
+ iwcq = to_iwcq(ibcq);
+ ukcq = &iwcq->sc_cq.cq_uk;
+
+ spin_lock_irqsave(&iwcq->lock, flags);
+ /* Only promote to arm the CQ for any event if the last arm event was solicited. */
+ if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED && notify_flags != IB_CQ_SOLICITED)
+ promo_event = true;
+
+ if (!atomic_cmpxchg(&iwcq->armed, 0, 1) || promo_event) {
+ iwcq->last_notify = cq_notify;
+ irdma_uk_cq_request_notification(ukcq, cq_notify);
+ }
+
+ if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
+ (!irdma_uk_cq_empty(ukcq) || !list_empty(&iwcq->cmpl_generated)))
+ ret = 1;
+ spin_unlock_irqrestore(&iwcq->lock, flags);
+
+ return ret;
+}
+
+static const struct rdma_stat_desc irdma_hw_stat_descs[] = {
+ /* gen1 - 32-bit */
+ [IRDMA_HW_STAT_INDEX_IP4RXDISCARD].name = "ip4InDiscards",
+ [IRDMA_HW_STAT_INDEX_IP4RXTRUNC].name = "ip4InTruncatedPkts",
+ [IRDMA_HW_STAT_INDEX_IP4TXNOROUTE].name = "ip4OutNoRoutes",
+ [IRDMA_HW_STAT_INDEX_IP6RXDISCARD].name = "ip6InDiscards",
+ [IRDMA_HW_STAT_INDEX_IP6RXTRUNC].name = "ip6InTruncatedPkts",
+ [IRDMA_HW_STAT_INDEX_IP6TXNOROUTE].name = "ip6OutNoRoutes",
+ [IRDMA_HW_STAT_INDEX_RXVLANERR].name = "rxVlanErrors",
+ /* gen1 - 64-bit */
+ [IRDMA_HW_STAT_INDEX_IP4RXOCTS].name = "ip4InOctets",
+ [IRDMA_HW_STAT_INDEX_IP4RXPKTS].name = "ip4InPkts",
+ [IRDMA_HW_STAT_INDEX_IP4RXFRAGS].name = "ip4InReasmRqd",
+ [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS].name = "ip4InMcastPkts",
+ [IRDMA_HW_STAT_INDEX_IP4TXOCTS].name = "ip4OutOctets",
+ [IRDMA_HW_STAT_INDEX_IP4TXPKTS].name = "ip4OutPkts",
+ [IRDMA_HW_STAT_INDEX_IP4TXFRAGS].name = "ip4OutSegRqd",
+ [IRDMA_HW_STAT_INDEX_IP4TXMCPKTS].name = "ip4OutMcastPkts",
+ [IRDMA_HW_STAT_INDEX_IP6RXOCTS].name = "ip6InOctets",
+ [IRDMA_HW_STAT_INDEX_IP6RXPKTS].name = "ip6InPkts",
+ [IRDMA_HW_STAT_INDEX_IP6RXFRAGS].name = "ip6InReasmRqd",
+ [IRDMA_HW_STAT_INDEX_IP6RXMCPKTS].name = "ip6InMcastPkts",
+ [IRDMA_HW_STAT_INDEX_IP6TXOCTS].name = "ip6OutOctets",
+ [IRDMA_HW_STAT_INDEX_IP6TXPKTS].name = "ip6OutPkts",
+ [IRDMA_HW_STAT_INDEX_IP6TXFRAGS].name = "ip6OutSegRqd",
+ [IRDMA_HW_STAT_INDEX_IP6TXMCPKTS].name = "ip6OutMcastPkts",
+ [IRDMA_HW_STAT_INDEX_RDMARXRDS].name = "InRdmaReads",
+ [IRDMA_HW_STAT_INDEX_RDMARXSNDS].name = "InRdmaSends",
+ [IRDMA_HW_STAT_INDEX_RDMARXWRS].name = "InRdmaWrites",
+ [IRDMA_HW_STAT_INDEX_RDMATXRDS].name = "OutRdmaReads",
+ [IRDMA_HW_STAT_INDEX_RDMATXSNDS].name = "OutRdmaSends",
+ [IRDMA_HW_STAT_INDEX_RDMATXWRS].name = "OutRdmaWrites",
+ [IRDMA_HW_STAT_INDEX_RDMAVBND].name = "RdmaBnd",
+ [IRDMA_HW_STAT_INDEX_RDMAVINV].name = "RdmaInv",
+
+ /* gen2 - 32-bit */
+ [IRDMA_HW_STAT_INDEX_RXRPCNPHANDLED].name = "cnpHandled",
+ [IRDMA_HW_STAT_INDEX_RXRPCNPIGNORED].name = "cnpIgnored",
+ [IRDMA_HW_STAT_INDEX_TXNPCNPSENT].name = "cnpSent",
+ /* gen2 - 64-bit */
+ [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS].name = "ip4InMcastOctets",
+ [IRDMA_HW_STAT_INDEX_IP4TXMCOCTS].name = "ip4OutMcastOctets",
+ [IRDMA_HW_STAT_INDEX_IP6RXMCOCTS].name = "ip6InMcastOctets",
+ [IRDMA_HW_STAT_INDEX_IP6TXMCOCTS].name = "ip6OutMcastOctets",
+ [IRDMA_HW_STAT_INDEX_UDPRXPKTS].name = "RxUDP",
+ [IRDMA_HW_STAT_INDEX_UDPTXPKTS].name = "TxUDP",
+ [IRDMA_HW_STAT_INDEX_RXNPECNMARKEDPKTS].name = "RxECNMrkd",
+ [IRDMA_HW_STAT_INDEX_TCPRTXSEG].name = "RetransSegs",
+ [IRDMA_HW_STAT_INDEX_TCPRXOPTERR].name = "InOptErrors",
+ [IRDMA_HW_STAT_INDEX_TCPRXPROTOERR].name = "InProtoErrors",
+ [IRDMA_HW_STAT_INDEX_TCPRXSEGS].name = "InSegs",
+ [IRDMA_HW_STAT_INDEX_TCPTXSEG].name = "OutSegs",
+
+ /* gen3 */
+ [IRDMA_HW_STAT_INDEX_RNR_SENT].name = "RNR sent",
+ [IRDMA_HW_STAT_INDEX_RNR_RCVD].name = "RNR received",
+ [IRDMA_HW_STAT_INDEX_RDMAORDLMTCNT].name = "ord limit count",
+ [IRDMA_HW_STAT_INDEX_RDMAIRDLMTCNT].name = "ird limit count",
+ [IRDMA_HW_STAT_INDEX_RDMARXATS].name = "Rx atomics",
+ [IRDMA_HW_STAT_INDEX_RDMATXATS].name = "Tx atomics",
+ [IRDMA_HW_STAT_INDEX_NAKSEQERR].name = "Nak Sequence Error",
+ [IRDMA_HW_STAT_INDEX_NAKSEQERR_IMPLIED].name = "Nak Sequence Error Implied",
+ [IRDMA_HW_STAT_INDEX_RTO].name = "RTO",
+ [IRDMA_HW_STAT_INDEX_RXOOOPKTS].name = "Rcvd Out of order packets",
+ [IRDMA_HW_STAT_INDEX_ICRCERR].name = "CRC errors",
+};
+
+static int irdma_roce_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ return 0;
+}
+
+static int irdma_iw_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ return 0;
+}
+
+static void irdma_get_dev_fw_str(struct ib_device *dev, char *str)
+{
+ struct irdma_device *iwdev = to_iwdev(dev);
+
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u",
+ irdma_fw_major_ver(&iwdev->rf->sc_dev),
+ irdma_fw_minor_ver(&iwdev->rf->sc_dev));
+}
+
+/**
+ * irdma_alloc_hw_port_stats - Allocate a hw stats structure
+ * @ibdev: device pointer from stack
+ * @port_num: port number
+ */
+static struct rdma_hw_stats *irdma_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
+
+ int num_counters = dev->hw_attrs.max_stat_idx;
+ unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN;
+
+ return rdma_alloc_hw_stats_struct(irdma_hw_stat_descs, num_counters,
+ lifespan);
+}
+
+/**
+ * irdma_get_hw_stats - Populates the rdma_hw_stats structure
+ * @ibdev: device pointer from stack
+ * @stats: stats pointer from stack
+ * @port_num: port number
+ * @index: which hw counter the stack is requesting we update
+ */
+static int irdma_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats, u32 port_num,
+ int index)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_dev_hw_stats *hw_stats = &iwdev->vsi.pestat->hw_stats;
+
+ if (iwdev->rf->rdma_ver >= IRDMA_GEN_2)
+ irdma_cqp_gather_stats_cmd(&iwdev->rf->sc_dev, iwdev->vsi.pestat, true);
+ else
+ irdma_cqp_gather_stats_gen1(&iwdev->rf->sc_dev, iwdev->vsi.pestat);
+
+ memcpy(&stats->value[0], hw_stats, sizeof(u64) * stats->num_counters);
+
+ return stats->num_counters;
+}
+
+/**
+ * irdma_query_gid - Query port GID
+ * @ibdev: device pointer from stack
+ * @port: port number
+ * @index: Entry index
+ * @gid: Global ID
+ */
+static int irdma_query_gid(struct ib_device *ibdev, u32 port, int index,
+ union ib_gid *gid)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+
+ memset(gid->raw, 0, sizeof(gid->raw));
+ ether_addr_copy(gid->raw, iwdev->netdev->dev_addr);
+
+ return 0;
+}
+
+/**
+ * mcast_list_add - Add a new mcast item to list
+ * @rf: RDMA PCI function
+ * @new_elem: pointer to element to add
+ */
+static void mcast_list_add(struct irdma_pci_f *rf,
+ struct mc_table_list *new_elem)
+{
+ list_add(&new_elem->list, &rf->mc_qht_list.list);
+}
+
+/**
+ * mcast_list_del - Remove an mcast item from list
+ * @mc_qht_elem: pointer to mcast table list element
+ */
+static void mcast_list_del(struct mc_table_list *mc_qht_elem)
+{
+ if (mc_qht_elem)
+ list_del(&mc_qht_elem->list);
+}
+
+/**
+ * mcast_list_lookup_ip - Search mcast list for address
+ * @rf: RDMA PCI function
+ * @ip_mcast: pointer to mcast IP address
+ */
+static struct mc_table_list *mcast_list_lookup_ip(struct irdma_pci_f *rf,
+ u32 *ip_mcast)
+{
+ struct mc_table_list *mc_qht_el;
+ struct list_head *pos, *q;
+
+ list_for_each_safe (pos, q, &rf->mc_qht_list.list) {
+ mc_qht_el = list_entry(pos, struct mc_table_list, list);
+ if (!memcmp(mc_qht_el->mc_info.dest_ip, ip_mcast,
+ sizeof(mc_qht_el->mc_info.dest_ip)))
+ return mc_qht_el;
+ }
+
+ return NULL;
+}
+
+/**
+ * irdma_mcast_cqp_op - perform a mcast cqp operation
+ * @iwdev: irdma device
+ * @mc_grp_ctx: mcast group info
+ * @op: operation
+ *
+ * returns error status
+ */
+static int irdma_mcast_cqp_op(struct irdma_device *iwdev,
+ struct irdma_mcast_grp_info *mc_grp_ctx, u8 op)
+{
+ struct cqp_cmds_info *cqp_info;
+ struct irdma_cqp_request *cqp_request;
+ int status;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_request->info.in.u.mc_create.info = *mc_grp_ctx;
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = op;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.mc_create.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.mc_create.cqp = &iwdev->rf->cqp.sc_cqp;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+
+ return status;
+}
+
+/**
+ * irdma_mcast_mac - Get the multicast MAC for an IP address
+ * @ip_addr: IPv4 or IPv6 address
+ * @mac: pointer to result MAC address
+ * @ipv4: flag indicating IPv4 or IPv6
+ *
+ */
+void irdma_mcast_mac(u32 *ip_addr, u8 *mac, bool ipv4)
+{
+ u8 *ip = (u8 *)ip_addr;
+
+ if (ipv4) {
+ unsigned char mac4[ETH_ALEN] = {0x01, 0x00, 0x5E, 0x00,
+ 0x00, 0x00};
+
+ mac4[3] = ip[2] & 0x7F;
+ mac4[4] = ip[1];
+ mac4[5] = ip[0];
+ ether_addr_copy(mac, mac4);
+ } else {
+ unsigned char mac6[ETH_ALEN] = {0x33, 0x33, 0x00, 0x00,
+ 0x00, 0x00};
+
+ mac6[2] = ip[3];
+ mac6[3] = ip[2];
+ mac6[4] = ip[1];
+ mac6[5] = ip[0];
+ ether_addr_copy(mac, mac6);
+ }
+}
+
+/**
+ * irdma_attach_mcast - attach a qp to a multicast group
+ * @ibqp: ptr to qp
+ * @ibgid: pointer to global ID
+ * @lid: local ID
+ *
+ * returns error status
+ */
+static int irdma_attach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct mc_table_list *mc_qht_elem;
+ struct irdma_mcast_grp_ctx_entry_info mcg_info = {};
+ unsigned long flags;
+ u32 ip_addr[4] = {};
+ u32 mgn;
+ u32 no_mgs;
+ int ret = 0;
+ bool ipv4;
+ u16 vlan_id;
+ union irdma_sockaddr sgid_addr;
+ unsigned char dmac[ETH_ALEN];
+
+ rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid);
+
+ if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid)) {
+ irdma_copy_ip_ntohl(ip_addr,
+ sgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32);
+ irdma_get_vlan_mac_ipv6(ip_addr, &vlan_id, NULL);
+ ipv4 = false;
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: qp_id=%d, IP6address=%pI6\n", ibqp->qp_num,
+ ip_addr);
+ irdma_mcast_mac(ip_addr, dmac, false);
+ } else {
+ ip_addr[0] = ntohl(sgid_addr.saddr_in.sin_addr.s_addr);
+ ipv4 = true;
+ vlan_id = irdma_get_vlan_ipv4(ip_addr);
+ irdma_mcast_mac(ip_addr, dmac, true);
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: qp_id=%d, IP4address=%pI4, MAC=%pM\n",
+ ibqp->qp_num, ip_addr, dmac);
+ }
+
+ spin_lock_irqsave(&rf->qh_list_lock, flags);
+ mc_qht_elem = mcast_list_lookup_ip(rf, ip_addr);
+ if (!mc_qht_elem) {
+ struct irdma_dma_mem *dma_mem_mc;
+
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ mc_qht_elem = kzalloc(sizeof(*mc_qht_elem), GFP_KERNEL);
+ if (!mc_qht_elem)
+ return -ENOMEM;
+
+ mc_qht_elem->mc_info.ipv4_valid = ipv4;
+ memcpy(mc_qht_elem->mc_info.dest_ip, ip_addr,
+ sizeof(mc_qht_elem->mc_info.dest_ip));
+ ret = irdma_alloc_rsrc(rf, rf->allocated_mcgs, rf->max_mcg,
+ &mgn, &rf->next_mcg);
+ if (ret) {
+ kfree(mc_qht_elem);
+ return -ENOMEM;
+ }
+
+ mc_qht_elem->mc_info.mgn = mgn;
+ dma_mem_mc = &mc_qht_elem->mc_grp_ctx.dma_mem_mc;
+ dma_mem_mc->size = ALIGN(sizeof(u64) * IRDMA_MAX_MGS_PER_CTX,
+ IRDMA_HW_PAGE_SIZE);
+ dma_mem_mc->va = dma_alloc_coherent(rf->hw.device,
+ dma_mem_mc->size,
+ &dma_mem_mc->pa,
+ GFP_KERNEL);
+ if (!dma_mem_mc->va) {
+ irdma_free_rsrc(rf, rf->allocated_mcgs, mgn);
+ kfree(mc_qht_elem);
+ return -ENOMEM;
+ }
+
+ mc_qht_elem->mc_grp_ctx.mg_id = (u16)mgn;
+ memcpy(mc_qht_elem->mc_grp_ctx.dest_ip_addr, ip_addr,
+ sizeof(mc_qht_elem->mc_grp_ctx.dest_ip_addr));
+ mc_qht_elem->mc_grp_ctx.ipv4_valid = ipv4;
+ mc_qht_elem->mc_grp_ctx.vlan_id = vlan_id;
+ if (vlan_id < VLAN_N_VID)
+ mc_qht_elem->mc_grp_ctx.vlan_valid = true;
+ mc_qht_elem->mc_grp_ctx.hmc_fcn_id = iwdev->rf->sc_dev.hmc_fn_id;
+ mc_qht_elem->mc_grp_ctx.qs_handle =
+ iwqp->sc_qp.vsi->qos[iwqp->sc_qp.user_pri].qs_handle;
+ ether_addr_copy(mc_qht_elem->mc_grp_ctx.dest_mac_addr, dmac);
+
+ spin_lock_irqsave(&rf->qh_list_lock, flags);
+ mcast_list_add(rf, mc_qht_elem);
+ } else {
+ if (mc_qht_elem->mc_grp_ctx.no_of_mgs ==
+ IRDMA_MAX_MGS_PER_CTX) {
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ return -ENOMEM;
+ }
+ }
+
+ mcg_info.qp_id = iwqp->ibqp.qp_num;
+ no_mgs = mc_qht_elem->mc_grp_ctx.no_of_mgs;
+ irdma_sc_add_mcast_grp(&mc_qht_elem->mc_grp_ctx, &mcg_info);
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+
+ /* Only if there is a change do we need to modify or create */
+ if (!no_mgs) {
+ ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
+ IRDMA_OP_MC_CREATE);
+ } else if (no_mgs != mc_qht_elem->mc_grp_ctx.no_of_mgs) {
+ ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
+ IRDMA_OP_MC_MODIFY);
+ } else {
+ return 0;
+ }
+
+ if (ret)
+ goto error;
+
+ return 0;
+
+error:
+ irdma_sc_del_mcast_grp(&mc_qht_elem->mc_grp_ctx, &mcg_info);
+ if (!mc_qht_elem->mc_grp_ctx.no_of_mgs) {
+ mcast_list_del(mc_qht_elem);
+ dma_free_coherent(rf->hw.device,
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.size,
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.va,
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.pa);
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.va = NULL;
+ irdma_free_rsrc(rf, rf->allocated_mcgs,
+ mc_qht_elem->mc_grp_ctx.mg_id);
+ kfree(mc_qht_elem);
+ }
+
+ return ret;
+}
+
+/**
+ * irdma_detach_mcast - detach a qp from a multicast group
+ * @ibqp: ptr to qp
+ * @ibgid: pointer to global ID
+ * @lid: local ID
+ *
+ * returns error status
+ */
+static int irdma_detach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid)
+{
+ struct irdma_qp *iwqp = to_iwqp(ibqp);
+ struct irdma_device *iwdev = iwqp->iwdev;
+ struct irdma_pci_f *rf = iwdev->rf;
+ u32 ip_addr[4] = {};
+ struct mc_table_list *mc_qht_elem;
+ struct irdma_mcast_grp_ctx_entry_info mcg_info = {};
+ int ret;
+ unsigned long flags;
+ union irdma_sockaddr sgid_addr;
+
+ rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid);
+ if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid))
+ irdma_copy_ip_ntohl(ip_addr,
+ sgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32);
+ else
+ ip_addr[0] = ntohl(sgid_addr.saddr_in.sin_addr.s_addr);
+
+ spin_lock_irqsave(&rf->qh_list_lock, flags);
+ mc_qht_elem = mcast_list_lookup_ip(rf, ip_addr);
+ if (!mc_qht_elem) {
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: address not found MCG\n");
+ return 0;
+ }
+
+ mcg_info.qp_id = iwqp->ibqp.qp_num;
+ irdma_sc_del_mcast_grp(&mc_qht_elem->mc_grp_ctx, &mcg_info);
+ if (!mc_qht_elem->mc_grp_ctx.no_of_mgs) {
+ mcast_list_del(mc_qht_elem);
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
+ IRDMA_OP_MC_DESTROY);
+ if (ret) {
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: failed MC_DESTROY MCG\n");
+ spin_lock_irqsave(&rf->qh_list_lock, flags);
+ mcast_list_add(rf, mc_qht_elem);
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ return -EAGAIN;
+ }
+
+ dma_free_coherent(rf->hw.device,
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.size,
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.va,
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.pa);
+ mc_qht_elem->mc_grp_ctx.dma_mem_mc.va = NULL;
+ irdma_free_rsrc(rf, rf->allocated_mcgs,
+ mc_qht_elem->mc_grp_ctx.mg_id);
+ kfree(mc_qht_elem);
+ } else {
+ spin_unlock_irqrestore(&rf->qh_list_lock, flags);
+ ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
+ IRDMA_OP_MC_MODIFY);
+ if (ret) {
+ ibdev_dbg(&iwdev->ibdev,
+ "VERBS: failed Modify MCG\n");
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int irdma_create_hw_ah(struct irdma_device *iwdev, struct irdma_ah *ah, bool sleep)
+{
+ struct irdma_pci_f *rf = iwdev->rf;
+ int err;
+
+ err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah, &ah->sc_ah.ah_info.ah_idx,
+ &rf->next_ah);
+ if (err)
+ return err;
+
+ err = irdma_ah_cqp_op(rf, &ah->sc_ah, IRDMA_OP_AH_CREATE, sleep,
+ irdma_gsi_ud_qp_ah_cb, &ah->sc_ah);
+
+ if (err) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: CQP-OP Create AH fail");
+ goto err_ah_create;
+ }
+
+ if (!sleep) {
+ int cnt = CQP_COMPL_WAIT_TIME_MS * CQP_TIMEOUT_THRESHOLD;
+
+ do {
+ irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq);
+ mdelay(1);
+ } while (!ah->sc_ah.ah_info.ah_valid && --cnt);
+
+ if (!cnt) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: CQP create AH timed out");
+ err = -ETIMEDOUT;
+ goto err_ah_create;
+ }
+ }
+ return 0;
+
+err_ah_create:
+ irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah->sc_ah.ah_info.ah_idx);
+
+ return err;
+}
+
+static int irdma_setup_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr)
+{
+ struct irdma_pd *pd = to_iwpd(ibah->pd);
+ struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah);
+ struct rdma_ah_attr *ah_attr = attr->ah_attr;
+ const struct ib_gid_attr *sgid_attr;
+ struct irdma_device *iwdev = to_iwdev(ibah->pd->device);
+ struct irdma_pci_f *rf = iwdev->rf;
+ struct irdma_sc_ah *sc_ah;
+ struct irdma_ah_info *ah_info;
+ union irdma_sockaddr sgid_addr, dgid_addr;
+ int err;
+ u8 dmac[ETH_ALEN];
+
+ ah->pd = pd;
+ sc_ah = &ah->sc_ah;
+ sc_ah->ah_info.vsi = &iwdev->vsi;
+ irdma_sc_init_ah(&rf->sc_dev, sc_ah);
+ ah->sgid_index = ah_attr->grh.sgid_index;
+ sgid_attr = ah_attr->grh.sgid_attr;
+ memcpy(&ah->dgid, &ah_attr->grh.dgid, sizeof(ah->dgid));
+ rdma_gid2ip((struct sockaddr *)&sgid_addr, &sgid_attr->gid);
+ rdma_gid2ip((struct sockaddr *)&dgid_addr, &ah_attr->grh.dgid);
+ ah->av.attrs = *ah_attr;
+ ah->av.net_type = rdma_gid_attr_network_type(sgid_attr);
+ ah_info = &sc_ah->ah_info;
+ ah_info->pd_idx = pd->sc_pd.pd_id;
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ ah_info->flow_label = ah_attr->grh.flow_label;
+ ah_info->hop_ttl = ah_attr->grh.hop_limit;
+ ah_info->tc_tos = ah_attr->grh.traffic_class;
+ }
+
+ ether_addr_copy(dmac, ah_attr->roce.dmac);
+ if (ah->av.net_type == RDMA_NETWORK_IPV4) {
+ ah_info->ipv4_valid = true;
+ ah_info->dest_ip_addr[0] =
+ ntohl(dgid_addr.saddr_in.sin_addr.s_addr);
+ ah_info->src_ip_addr[0] =
+ ntohl(sgid_addr.saddr_in.sin_addr.s_addr);
+ ah_info->do_lpbk = irdma_ipv4_is_lpb(ah_info->src_ip_addr[0],
+ ah_info->dest_ip_addr[0]);
+ if (ipv4_is_multicast(dgid_addr.saddr_in.sin_addr.s_addr)) {
+ ah_info->do_lpbk = true;
+ irdma_mcast_mac(ah_info->dest_ip_addr, dmac, true);
+ }
+ } else {
+ irdma_copy_ip_ntohl(ah_info->dest_ip_addr,
+ dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32);
+ irdma_copy_ip_ntohl(ah_info->src_ip_addr,
+ sgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32);
+ ah_info->do_lpbk = irdma_ipv6_is_lpb(ah_info->src_ip_addr,
+ ah_info->dest_ip_addr);
+ if (rdma_is_multicast_addr(&dgid_addr.saddr_in6.sin6_addr)) {
+ ah_info->do_lpbk = true;
+ irdma_mcast_mac(ah_info->dest_ip_addr, dmac, false);
+ }
+ }
+
+ err = rdma_read_gid_l2_fields(sgid_attr, &ah_info->vlan_tag,
+ ah_info->mac_addr);
+ if (err)
+ return err;
+
+ ah_info->dst_arpindex = irdma_add_arp(iwdev->rf, ah_info->dest_ip_addr,
+ ah_info->ipv4_valid, dmac);
+
+ if (ah_info->dst_arpindex == -1)
+ return -EINVAL;
+
+ if (ah_info->vlan_tag >= VLAN_N_VID && iwdev->dcb_vlan_mode)
+ ah_info->vlan_tag = 0;
+
+ if (ah_info->vlan_tag < VLAN_N_VID) {
+ u8 prio = rt_tos2priority(ah_info->tc_tos);
+
+ prio = irdma_roce_get_vlan_prio(sgid_attr, prio);
+
+ ah_info->vlan_tag |= (u16)prio << VLAN_PRIO_SHIFT;
+ ah_info->insert_vlan_tag = true;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_ah_exists - Check for existing identical AH
+ * @iwdev: irdma device
+ * @new_ah: AH to check for
+ *
+ * returns true if AH is found, false if not found.
+ */
+static bool irdma_ah_exists(struct irdma_device *iwdev,
+ struct irdma_ah *new_ah)
+{
+ struct irdma_ah *ah;
+ u32 key = new_ah->sc_ah.ah_info.dest_ip_addr[0] ^
+ new_ah->sc_ah.ah_info.dest_ip_addr[1] ^
+ new_ah->sc_ah.ah_info.dest_ip_addr[2] ^
+ new_ah->sc_ah.ah_info.dest_ip_addr[3];
+
+ hash_for_each_possible(iwdev->rf->ah_hash_tbl, ah, list, key) {
+ /* Set ah_valid and ah_id the same so memcmp can work */
+ new_ah->sc_ah.ah_info.ah_idx = ah->sc_ah.ah_info.ah_idx;
+ new_ah->sc_ah.ah_info.ah_valid = ah->sc_ah.ah_info.ah_valid;
+ if (!memcmp(&ah->sc_ah.ah_info, &new_ah->sc_ah.ah_info,
+ sizeof(ah->sc_ah.ah_info))) {
+ refcount_inc(&ah->refcnt);
+ new_ah->parent_ah = ah;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * irdma_destroy_ah - Destroy address handle
+ * @ibah: pointer to address handle
+ * @ah_flags: flags for sleepable
+ */
+static int irdma_destroy_ah(struct ib_ah *ibah, u32 ah_flags)
+{
+ struct irdma_device *iwdev = to_iwdev(ibah->device);
+ struct irdma_ah *ah = to_iwah(ibah);
+
+ if ((ah_flags & RDMA_DESTROY_AH_SLEEPABLE) && ah->parent_ah) {
+ mutex_lock(&iwdev->rf->ah_tbl_lock);
+ if (!refcount_dec_and_test(&ah->parent_ah->refcnt)) {
+ mutex_unlock(&iwdev->rf->ah_tbl_lock);
+ return 0;
+ }
+ hash_del(&ah->parent_ah->list);
+ kfree(ah->parent_ah);
+ mutex_unlock(&iwdev->rf->ah_tbl_lock);
+ }
+
+ irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY,
+ false, NULL, ah);
+
+ irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs,
+ ah->sc_ah.ah_info.ah_idx);
+
+ return 0;
+}
+
+/**
+ * irdma_create_user_ah - create user address handle
+ * @ibah: address handle
+ * @attr: address handle attributes
+ * @udata: User data
+ *
+ * returns 0 on success, error otherwise
+ */
+static int irdma_create_user_ah(struct ib_ah *ibah,
+ struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata)
+{
+#define IRDMA_CREATE_AH_MIN_RESP_LEN offsetofend(struct irdma_create_ah_resp, rsvd)
+ struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah);
+ struct irdma_device *iwdev = to_iwdev(ibah->pd->device);
+ struct irdma_create_ah_resp uresp;
+ struct irdma_ah *parent_ah;
+ int err;
+
+ if (udata->outlen < IRDMA_CREATE_AH_MIN_RESP_LEN)
+ return -EINVAL;
+
+ err = irdma_setup_ah(ibah, attr);
+ if (err)
+ return err;
+ mutex_lock(&iwdev->rf->ah_tbl_lock);
+ if (!irdma_ah_exists(iwdev, ah)) {
+ err = irdma_create_hw_ah(iwdev, ah, true);
+ if (err) {
+ mutex_unlock(&iwdev->rf->ah_tbl_lock);
+ return err;
+ }
+ /* Add new AH to list */
+ parent_ah = kmemdup(ah, sizeof(*ah), GFP_KERNEL);
+ if (parent_ah) {
+ u32 key = parent_ah->sc_ah.ah_info.dest_ip_addr[0] ^
+ parent_ah->sc_ah.ah_info.dest_ip_addr[1] ^
+ parent_ah->sc_ah.ah_info.dest_ip_addr[2] ^
+ parent_ah->sc_ah.ah_info.dest_ip_addr[3];
+
+ ah->parent_ah = parent_ah;
+ hash_add(iwdev->rf->ah_hash_tbl, &parent_ah->list, key);
+ refcount_set(&parent_ah->refcnt, 1);
+ }
+ }
+ mutex_unlock(&iwdev->rf->ah_tbl_lock);
+
+ uresp.ah_id = ah->sc_ah.ah_info.ah_idx;
+ err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen));
+ if (err)
+ irdma_destroy_ah(ibah, attr->flags);
+
+ return err;
+}
+
+/**
+ * irdma_create_ah - create address handle
+ * @ibah: address handle
+ * @attr: address handle attributes
+ * @udata: NULL
+ *
+ * returns 0 on success, error otherwise
+ */
+static int irdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah);
+ struct irdma_device *iwdev = to_iwdev(ibah->pd->device);
+ int err;
+
+ err = irdma_setup_ah(ibah, attr);
+ if (err)
+ return err;
+ err = irdma_create_hw_ah(iwdev, ah, attr->flags & RDMA_CREATE_AH_SLEEPABLE);
+
+ return err;
+}
+
+/**
+ * irdma_query_ah - Query address handle
+ * @ibah: pointer to address handle
+ * @ah_attr: address handle attributes
+ */
+static int irdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
+{
+ struct irdma_ah *ah = to_iwah(ibah);
+
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ if (ah->av.attrs.ah_flags & IB_AH_GRH) {
+ ah_attr->ah_flags = IB_AH_GRH;
+ ah_attr->grh.flow_label = ah->sc_ah.ah_info.flow_label;
+ ah_attr->grh.traffic_class = ah->sc_ah.ah_info.tc_tos;
+ ah_attr->grh.hop_limit = ah->sc_ah.ah_info.hop_ttl;
+ ah_attr->grh.sgid_index = ah->sgid_index;
+ memcpy(&ah_attr->grh.dgid, &ah->dgid,
+ sizeof(ah_attr->grh.dgid));
+ }
+
+ return 0;
+}
+
+static enum rdma_link_layer irdma_get_link_layer(struct ib_device *ibdev,
+ u32 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+static const struct ib_device_ops irdma_gen1_dev_ops = {
+ .dealloc_driver = irdma_ib_dealloc_device,
+};
+
+static const struct ib_device_ops irdma_gen3_dev_ops = {
+ .alloc_mw = irdma_alloc_mw,
+ .create_srq = irdma_create_srq,
+ .dealloc_mw = irdma_dealloc_mw,
+ .destroy_srq = irdma_destroy_srq,
+ .modify_srq = irdma_modify_srq,
+ .post_srq_recv = irdma_post_srq_recv,
+ .query_srq = irdma_query_srq,
+};
+
+static const struct ib_device_ops irdma_roce_dev_ops = {
+ .attach_mcast = irdma_attach_mcast,
+ .create_ah = irdma_create_ah,
+ .create_user_ah = irdma_create_user_ah,
+ .destroy_ah = irdma_destroy_ah,
+ .detach_mcast = irdma_detach_mcast,
+ .get_link_layer = irdma_get_link_layer,
+ .get_port_immutable = irdma_roce_port_immutable,
+ .modify_qp = irdma_modify_qp_roce,
+ .query_ah = irdma_query_ah,
+ .query_pkey = irdma_query_pkey,
+};
+
+static const struct ib_device_ops irdma_iw_dev_ops = {
+ .get_port_immutable = irdma_iw_port_immutable,
+ .iw_accept = irdma_accept,
+ .iw_add_ref = irdma_qp_add_ref,
+ .iw_connect = irdma_connect,
+ .iw_create_listen = irdma_create_listen,
+ .iw_destroy_listen = irdma_destroy_listen,
+ .iw_get_qp = irdma_get_qp,
+ .iw_reject = irdma_reject,
+ .iw_rem_ref = irdma_qp_rem_ref,
+ .modify_qp = irdma_modify_qp,
+ .query_gid = irdma_query_gid,
+};
+
+static const struct ib_device_ops irdma_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_IRDMA,
+ .uverbs_abi_ver = IRDMA_ABI_VER,
+
+ .alloc_hw_port_stats = irdma_alloc_hw_port_stats,
+ .alloc_mr = irdma_alloc_mr,
+ .alloc_pd = irdma_alloc_pd,
+ .alloc_ucontext = irdma_alloc_ucontext,
+ .create_cq = irdma_create_cq,
+ .create_qp = irdma_create_qp,
+ .dealloc_driver = irdma_ib_dealloc_device,
+ .dealloc_mw = irdma_dealloc_mw,
+ .dealloc_pd = irdma_dealloc_pd,
+ .dealloc_ucontext = irdma_dealloc_ucontext,
+ .dereg_mr = irdma_dereg_mr,
+ .destroy_cq = irdma_destroy_cq,
+ .destroy_qp = irdma_destroy_qp,
+ .disassociate_ucontext = irdma_disassociate_ucontext,
+ .get_dev_fw_str = irdma_get_dev_fw_str,
+ .get_dma_mr = irdma_get_dma_mr,
+ .get_hw_stats = irdma_get_hw_stats,
+ .map_mr_sg = irdma_map_mr_sg,
+ .mmap = irdma_mmap,
+ .mmap_free = irdma_mmap_free,
+ .poll_cq = irdma_poll_cq,
+ .post_recv = irdma_post_recv,
+ .post_send = irdma_post_send,
+ .query_device = irdma_query_device,
+ .query_port = irdma_query_port,
+ .query_qp = irdma_query_qp,
+ .reg_user_mr = irdma_reg_user_mr,
+ .reg_user_mr_dmabuf = irdma_reg_user_mr_dmabuf,
+ .rereg_user_mr = irdma_rereg_user_mr,
+ .req_notify_cq = irdma_req_notify_cq,
+ .resize_cq = irdma_resize_cq,
+ INIT_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, irdma_ucontext, ibucontext),
+ INIT_RDMA_OBJ_SIZE(ib_ah, irdma_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, irdma_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_mw, irdma_mr, ibmw),
+ INIT_RDMA_OBJ_SIZE(ib_qp, irdma_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_srq, irdma_srq, ibsrq),
+};
+
+/**
+ * irdma_init_roce_device - initialization of roce rdma device
+ * @iwdev: irdma device
+ */
+static void irdma_init_roce_device(struct irdma_device *iwdev)
+{
+ iwdev->ibdev.node_type = RDMA_NODE_IB_CA;
+ addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid,
+ iwdev->netdev->dev_addr);
+ ib_set_device_ops(&iwdev->ibdev, &irdma_roce_dev_ops);
+}
+
+/**
+ * irdma_init_iw_device - initialization of iwarp rdma device
+ * @iwdev: irdma device
+ */
+static void irdma_init_iw_device(struct irdma_device *iwdev)
+{
+ struct net_device *netdev = iwdev->netdev;
+
+ iwdev->ibdev.node_type = RDMA_NODE_RNIC;
+ addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid,
+ netdev->dev_addr);
+ memcpy(iwdev->ibdev.iw_ifname, netdev->name,
+ sizeof(iwdev->ibdev.iw_ifname));
+ ib_set_device_ops(&iwdev->ibdev, &irdma_iw_dev_ops);
+}
+
+/**
+ * irdma_init_rdma_device - initialization of rdma device
+ * @iwdev: irdma device
+ */
+static void irdma_init_rdma_device(struct irdma_device *iwdev)
+{
+ struct pci_dev *pcidev = iwdev->rf->pcidev;
+
+ if (iwdev->roce_mode)
+ irdma_init_roce_device(iwdev);
+ else
+ irdma_init_iw_device(iwdev);
+
+ iwdev->ibdev.phys_port_cnt = 1;
+ iwdev->ibdev.num_comp_vectors = iwdev->rf->ceqs_count;
+ iwdev->ibdev.dev.parent = &pcidev->dev;
+ ib_set_device_ops(&iwdev->ibdev, &irdma_dev_ops);
+ if (iwdev->rf->rdma_ver == IRDMA_GEN_1)
+ ib_set_device_ops(&iwdev->ibdev, &irdma_gen1_dev_ops);
+ if (iwdev->rf->rdma_ver >= IRDMA_GEN_3)
+ ib_set_device_ops(&iwdev->ibdev, &irdma_gen3_dev_ops);
+}
+
+/**
+ * irdma_port_ibevent - indicate port event
+ * @iwdev: irdma device
+ */
+void irdma_port_ibevent(struct irdma_device *iwdev)
+{
+ struct ib_event event;
+
+ event.device = &iwdev->ibdev;
+ event.element.port_num = 1;
+ event.event =
+ iwdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ ib_dispatch_event(&event);
+}
+
+/**
+ * irdma_ib_unregister_device - unregister rdma device from IB
+ * core
+ * @iwdev: irdma device
+ */
+void irdma_ib_unregister_device(struct irdma_device *iwdev)
+{
+ iwdev->iw_status = 0;
+ irdma_port_ibevent(iwdev);
+ ib_unregister_device(&iwdev->ibdev);
+}
+
+/**
+ * irdma_ib_register_device - register irdma device to IB core
+ * @iwdev: irdma device
+ */
+int irdma_ib_register_device(struct irdma_device *iwdev)
+{
+ int ret;
+
+ irdma_init_rdma_device(iwdev);
+
+ ret = ib_device_set_netdev(&iwdev->ibdev, iwdev->netdev, 1);
+ if (ret)
+ goto error;
+ dma_set_max_seg_size(iwdev->rf->hw.device, UINT_MAX);
+ ret = ib_register_device(&iwdev->ibdev, "irdma%d", iwdev->rf->hw.device);
+ if (ret)
+ goto error;
+
+ iwdev->iw_status = 1;
+ irdma_port_ibevent(iwdev);
+
+ return 0;
+
+error:
+ if (ret)
+ ibdev_dbg(&iwdev->ibdev, "VERBS: Register RDMA device fail\n");
+
+ return ret;
+}
+
+/**
+ * irdma_ib_dealloc_device
+ * @ibdev: ib device
+ *
+ * callback from ibdev dealloc_driver to deallocate resources
+ * unber irdma device
+ */
+void irdma_ib_dealloc_device(struct ib_device *ibdev)
+{
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+
+ irdma_rt_deinit_hw(iwdev);
+ if (!iwdev->is_vport) {
+ irdma_ctrl_deinit_hw(iwdev->rf);
+ if (iwdev->rf->vchnl_wq) {
+ destroy_workqueue(iwdev->rf->vchnl_wq);
+ mutex_destroy(&iwdev->rf->sc_dev.vchnl_mutex);
+ }
+ }
+}
diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h
new file mode 100644
index 000000000000..aabbb3442098
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/verbs.h
@@ -0,0 +1,341 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2021 Intel Corporation */
+#ifndef IRDMA_VERBS_H
+#define IRDMA_VERBS_H
+
+#define IRDMA_MAX_SAVED_PHY_PGADDR 4
+#define IRDMA_FLUSH_DELAY_MS 20
+
+#define IRDMA_PKEY_TBL_SZ 1
+#define IRDMA_DEFAULT_PKEY 0xFFFF
+#define IRDMA_SHADOW_PGCNT 1
+
+struct irdma_ucontext {
+ struct ib_ucontext ibucontext;
+ struct irdma_device *iwdev;
+ struct rdma_user_mmap_entry *db_mmap_entry;
+ struct list_head cq_reg_mem_list;
+ spinlock_t cq_reg_mem_list_lock; /* protect CQ memory list */
+ struct list_head qp_reg_mem_list;
+ spinlock_t qp_reg_mem_list_lock; /* protect QP memory list */
+ struct list_head srq_reg_mem_list;
+ spinlock_t srq_reg_mem_list_lock; /* protect SRQ memory list */
+ int abi_ver;
+ u8 legacy_mode : 1;
+ u8 use_raw_attrs : 1;
+};
+
+struct irdma_pd {
+ struct ib_pd ibpd;
+ struct irdma_sc_pd sc_pd;
+};
+
+union irdma_sockaddr {
+ struct sockaddr_in saddr_in;
+ struct sockaddr_in6 saddr_in6;
+};
+
+struct irdma_av {
+ u8 macaddr[16];
+ struct rdma_ah_attr attrs;
+ union irdma_sockaddr sgid_addr;
+ union irdma_sockaddr dgid_addr;
+ u8 net_type;
+};
+
+struct irdma_ah {
+ struct ib_ah ibah;
+ struct irdma_sc_ah sc_ah;
+ struct irdma_pd *pd;
+ struct irdma_av av;
+ u8 sgid_index;
+ union ib_gid dgid;
+ struct hlist_node list;
+ refcount_t refcnt;
+ struct irdma_ah *parent_ah; /* AH from cached list */
+};
+
+struct irdma_hmc_pble {
+ union {
+ u32 idx;
+ dma_addr_t addr;
+ };
+};
+
+struct irdma_cq_mr {
+ struct irdma_hmc_pble cq_pbl;
+ dma_addr_t shadow;
+ bool split;
+};
+
+struct irdma_srq_mr {
+ struct irdma_hmc_pble srq_pbl;
+ dma_addr_t shadow;
+};
+
+struct irdma_qp_mr {
+ struct irdma_hmc_pble sq_pbl;
+ struct irdma_hmc_pble rq_pbl;
+ dma_addr_t shadow;
+ dma_addr_t rq_pa;
+ struct page *sq_page;
+};
+
+struct irdma_cq_buf {
+ struct irdma_dma_mem kmem_buf;
+ struct irdma_cq_uk cq_uk;
+ struct irdma_hw *hw;
+ struct list_head list;
+ struct work_struct work;
+};
+
+struct irdma_pbl {
+ struct list_head list;
+ union {
+ struct irdma_qp_mr qp_mr;
+ struct irdma_cq_mr cq_mr;
+ struct irdma_srq_mr srq_mr;
+ };
+
+ bool pbl_allocated:1;
+ bool on_list:1;
+ u64 user_base;
+ struct irdma_pble_alloc pble_alloc;
+ struct irdma_mr *iwmr;
+};
+
+struct irdma_mr {
+ union {
+ struct ib_mr ibmr;
+ struct ib_mw ibmw;
+ };
+ struct ib_umem *region;
+ int access;
+ bool is_hwreg:1;
+ bool dma_mr:1;
+ u16 type;
+ u32 page_cnt;
+ u64 page_size;
+ u32 npages;
+ u32 stag;
+ u64 len;
+ u64 pgaddrmem[IRDMA_MAX_SAVED_PHY_PGADDR];
+ struct irdma_pbl iwpbl;
+};
+
+struct irdma_srq {
+ struct ib_srq ibsrq;
+ struct irdma_sc_srq sc_srq __aligned(64);
+ struct irdma_dma_mem kmem;
+ u64 *srq_wrid_mem;
+ refcount_t refcnt;
+ spinlock_t lock; /* for poll srq */
+ struct irdma_pbl *iwpbl;
+ struct irdma_sge *sg_list;
+ u16 srq_head;
+ u32 srq_num;
+ u32 max_wr;
+ bool user_mode:1;
+};
+
+struct irdma_cq {
+ struct ib_cq ibcq;
+ struct irdma_sc_cq sc_cq;
+ u32 cq_num;
+ bool user_mode;
+ atomic_t armed;
+ enum irdma_cmpl_notify last_notify;
+ struct irdma_dma_mem kmem;
+ struct irdma_dma_mem kmem_shadow;
+ struct completion free_cq;
+ refcount_t refcnt;
+ spinlock_t lock; /* for poll cq */
+ struct list_head resize_list;
+ struct irdma_cq_poll_info cur_cqe;
+ struct list_head cmpl_generated;
+};
+
+struct irdma_cmpl_gen {
+ struct list_head list;
+ struct irdma_cq_poll_info cpi;
+};
+
+struct disconn_work {
+ struct work_struct work;
+ struct irdma_qp *iwqp;
+};
+
+struct iw_cm_id;
+
+struct irdma_qp_kmode {
+ struct irdma_dma_mem dma_mem;
+ struct irdma_sq_uk_wr_trk_info *sq_wrid_mem;
+ u64 *rq_wrid_mem;
+};
+
+struct irdma_qp {
+ struct ib_qp ibqp;
+ struct irdma_sc_qp sc_qp;
+ struct irdma_device *iwdev;
+ struct irdma_cq *iwscq;
+ struct irdma_cq *iwrcq;
+ struct irdma_pd *iwpd;
+ struct rdma_user_mmap_entry *push_wqe_mmap_entry;
+ struct rdma_user_mmap_entry *push_db_mmap_entry;
+ struct irdma_qp_host_ctx_info ctx_info;
+ union {
+ struct irdma_iwarp_offload_info iwarp_info;
+ struct irdma_roce_offload_info roce_info;
+ };
+
+ union {
+ struct irdma_tcp_offload_info tcp_info;
+ struct irdma_udp_offload_info udp_info;
+ };
+
+ struct irdma_ah roce_ah;
+ struct list_head teardown_entry;
+ refcount_t refcnt;
+ struct iw_cm_id *cm_id;
+ struct irdma_cm_node *cm_node;
+ struct delayed_work dwork_flush;
+ struct ib_mr *lsmm_mr;
+ atomic_t hw_mod_qp_pend;
+ enum ib_qp_state ibqp_state;
+ u32 qp_mem_size;
+ u32 last_aeq;
+ int max_send_wr;
+ int max_recv_wr;
+ atomic_t close_timer_started;
+ spinlock_t lock; /* serialize posting WRs to SQ/RQ */
+ struct irdma_qp_context *iwqp_context;
+ void *pbl_vbase;
+ dma_addr_t pbl_pbase;
+ struct page *page;
+ u8 active_conn : 1;
+ u8 user_mode : 1;
+ u8 hte_added : 1;
+ u8 flush_issued : 1;
+ u8 sig_all : 1;
+ u8 pau_mode : 1;
+ u8 suspend_pending : 1;
+ u8 rsvd : 1;
+ u8 iwarp_state;
+ u16 term_sq_flush_code;
+ u16 term_rq_flush_code;
+ u8 hw_iwarp_state;
+ u8 hw_tcp_state;
+ struct irdma_qp_kmode kqp;
+ struct irdma_dma_mem host_ctx;
+ struct timer_list terminate_timer;
+ struct irdma_pbl *iwpbl;
+ struct irdma_dma_mem q2_ctx_mem;
+ struct irdma_dma_mem ietf_mem;
+ struct completion free_qp;
+ wait_queue_head_t waitq;
+ wait_queue_head_t mod_qp_waitq;
+ u8 rts_ae_rcvd;
+};
+
+enum irdma_mmap_flag {
+ IRDMA_MMAP_IO_NC,
+ IRDMA_MMAP_IO_WC,
+};
+
+struct irdma_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ u64 bar_offset;
+ u8 mmap_flag;
+};
+
+static inline u16 irdma_fw_major_ver(struct irdma_sc_dev *dev)
+{
+ return (u16)FIELD_GET(IRDMA_FW_VER_MAJOR, dev->feature_info[IRDMA_FEATURE_FW_INFO]);
+}
+
+static inline u16 irdma_fw_minor_ver(struct irdma_sc_dev *dev)
+{
+ return (u16)FIELD_GET(IRDMA_FW_VER_MINOR, dev->feature_info[IRDMA_FEATURE_FW_INFO]);
+}
+
+static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cq_poll_info,
+ struct ib_wc *entry)
+{
+ switch (cq_poll_info->op_type) {
+ case IRDMA_OP_TYPE_RDMA_WRITE:
+ case IRDMA_OP_TYPE_RDMA_WRITE_SOL:
+ entry->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case IRDMA_OP_TYPE_RDMA_READ_INV_STAG:
+ case IRDMA_OP_TYPE_RDMA_READ:
+ entry->opcode = IB_WC_RDMA_READ;
+ break;
+ case IRDMA_OP_TYPE_SEND_SOL:
+ case IRDMA_OP_TYPE_SEND_SOL_INV:
+ case IRDMA_OP_TYPE_SEND_INV:
+ case IRDMA_OP_TYPE_SEND:
+ entry->opcode = IB_WC_SEND;
+ break;
+ case IRDMA_OP_TYPE_FAST_REG_NSMR:
+ entry->opcode = IB_WC_REG_MR;
+ break;
+ case IRDMA_OP_TYPE_ATOMIC_COMPARE_AND_SWAP:
+ entry->opcode = IB_WC_COMP_SWAP;
+ break;
+ case IRDMA_OP_TYPE_ATOMIC_FETCH_AND_ADD:
+ entry->opcode = IB_WC_FETCH_ADD;
+ break;
+ case IRDMA_OP_TYPE_INV_STAG:
+ entry->opcode = IB_WC_LOCAL_INV;
+ break;
+ default:
+ entry->status = IB_WC_GENERAL_ERR;
+ }
+}
+
+static inline void set_ib_wc_op_rq_gen_3(struct irdma_cq_poll_info *info,
+ struct ib_wc *entry)
+{
+ switch (info->op_type) {
+ case IRDMA_OP_TYPE_RDMA_WRITE:
+ case IRDMA_OP_TYPE_RDMA_WRITE_SOL:
+ entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ break;
+ default:
+ entry->opcode = IB_WC_RECV;
+ }
+}
+
+static inline void set_ib_wc_op_rq(struct irdma_cq_poll_info *cq_poll_info,
+ struct ib_wc *entry, bool send_imm_support)
+{
+ /**
+ * iWARP does not support sendImm, so the presence of Imm data
+ * must be WriteImm.
+ */
+ if (!send_imm_support) {
+ entry->opcode = cq_poll_info->imm_valid ?
+ IB_WC_RECV_RDMA_WITH_IMM :
+ IB_WC_RECV;
+ return;
+ }
+
+ switch (cq_poll_info->op_type) {
+ case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
+ case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
+ entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ break;
+ default:
+ entry->opcode = IB_WC_RECV;
+ }
+}
+
+void irdma_mcast_mac(u32 *ip_addr, u8 *mac, bool ipv4);
+int irdma_ib_register_device(struct irdma_device *iwdev);
+void irdma_ib_unregister_device(struct irdma_device *iwdev);
+void irdma_ib_dealloc_device(struct ib_device *ibdev);
+void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event);
+void irdma_generate_flush_completions(struct irdma_qp *iwqp);
+void irdma_remove_cmpls_list(struct irdma_cq *iwcq);
+int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info);
+#endif /* IRDMA_VERBS_H */
diff --git a/drivers/infiniband/hw/irdma/virtchnl.c b/drivers/infiniband/hw/irdma/virtchnl.c
new file mode 100644
index 000000000000..16ad27247527
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/virtchnl.c
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+/* Copyright (c) 2015 - 2024 Intel Corporation */
+
+#include "osdep.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "protos.h"
+#include "virtchnl.h"
+#include "ws.h"
+#include "i40iw_hw.h"
+#include "ig3rdma_hw.h"
+
+struct vchnl_reg_map_elem {
+ u16 reg_id;
+ u16 reg_idx;
+ bool pg_rel;
+};
+
+struct vchnl_regfld_map_elem {
+ u16 regfld_id;
+ u16 regfld_idx;
+};
+
+static struct vchnl_reg_map_elem vchnl_reg_map[] = {
+ {IRDMA_VCHNL_REG_ID_CQPTAIL, IRDMA_CQPTAIL, false},
+ {IRDMA_VCHNL_REG_ID_CQPDB, IRDMA_CQPDB, false},
+ {IRDMA_VCHNL_REG_ID_CCQPSTATUS, IRDMA_CCQPSTATUS, false},
+ {IRDMA_VCHNL_REG_ID_CCQPHIGH, IRDMA_CCQPHIGH, false},
+ {IRDMA_VCHNL_REG_ID_CCQPLOW, IRDMA_CCQPLOW, false},
+ {IRDMA_VCHNL_REG_ID_CQARM, IRDMA_CQARM, false},
+ {IRDMA_VCHNL_REG_ID_CQACK, IRDMA_CQACK, false},
+ {IRDMA_VCHNL_REG_ID_AEQALLOC, IRDMA_AEQALLOC, false},
+ {IRDMA_VCHNL_REG_ID_CQPERRCODES, IRDMA_CQPERRCODES, false},
+ {IRDMA_VCHNL_REG_ID_WQEALLOC, IRDMA_WQEALLOC, false},
+ {IRDMA_VCHNL_REG_ID_DB_ADDR_OFFSET, IRDMA_DB_ADDR_OFFSET, false },
+ {IRDMA_VCHNL_REG_ID_DYN_CTL, IRDMA_GLINT_DYN_CTL, false },
+ {IRDMA_VCHNL_REG_INV_ID, IRDMA_VCHNL_REG_INV_ID, false }
+};
+
+static struct vchnl_regfld_map_elem vchnl_regfld_map[] = {
+ {IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CQP_OP_ERR, IRDMA_CCQPSTATUS_CCQP_ERR_M},
+ {IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CCQP_DONE, IRDMA_CCQPSTATUS_CCQP_DONE_M},
+ {IRDMA_VCHNL_REGFLD_ID_CQPSQ_STAG_PDID, IRDMA_CQPSQ_STAG_PDID_M},
+ {IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CEQID, IRDMA_CQPSQ_CQ_CEQID_M},
+ {IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CQID, IRDMA_CQPSQ_CQ_CQID_M},
+ {IRDMA_VCHNL_REGFLD_ID_COMMIT_FPM_CQCNT, IRDMA_COMMIT_FPM_CQCNT_M},
+ {IRDMA_VCHNL_REGFLD_ID_UPESD_HMCN_ID, IRDMA_CQPSQ_UPESD_HMCFNID_M},
+ {IRDMA_VCHNL_REGFLD_INV_ID, IRDMA_VCHNL_REGFLD_INV_ID}
+};
+
+#define IRDMA_VCHNL_REG_COUNT ARRAY_SIZE(vchnl_reg_map)
+#define IRDMA_VCHNL_REGFLD_COUNT ARRAY_SIZE(vchnl_regfld_map)
+#define IRDMA_VCHNL_REGFLD_BUF_SIZE \
+ (IRDMA_VCHNL_REG_COUNT * sizeof(struct irdma_vchnl_reg_info) + \
+ IRDMA_VCHNL_REGFLD_COUNT * sizeof(struct irdma_vchnl_reg_field_info))
+#define IRDMA_REGMAP_RESP_BUF_SIZE (IRDMA_VCHNL_RESP_MIN_SIZE + IRDMA_VCHNL_REGFLD_BUF_SIZE)
+
+/**
+ * irdma_sc_vchnl_init - Initialize dev virtchannel and get hw_rev
+ * @dev: dev structure to update
+ * @info: virtchannel info parameters to fill into the dev structure
+ */
+int irdma_sc_vchnl_init(struct irdma_sc_dev *dev,
+ struct irdma_vchnl_init_info *info)
+{
+ dev->vchnl_up = true;
+ dev->privileged = info->privileged;
+ dev->is_pf = info->is_pf;
+ dev->hw_attrs.uk_attrs.hw_rev = info->hw_rev;
+
+ if (!dev->privileged) {
+ int ret = irdma_vchnl_req_get_ver(dev, IRDMA_VCHNL_CHNL_VER_MAX,
+ &dev->vchnl_ver);
+
+ ibdev_dbg(to_ibdev(dev),
+ "DEV: Get Channel version ret = %d, version is %u\n",
+ ret, dev->vchnl_ver);
+
+ if (ret)
+ return ret;
+
+ ret = irdma_vchnl_req_get_caps(dev);
+ if (ret)
+ return ret;
+
+ dev->hw_attrs.uk_attrs.hw_rev = dev->vc_caps.hw_rev;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_vchnl_req_verify_resp - Verify requested response size
+ * @vchnl_req: vchnl message requested
+ * @resp_len: response length sent from vchnl peer
+ */
+static int irdma_vchnl_req_verify_resp(struct irdma_vchnl_req *vchnl_req,
+ u16 resp_len)
+{
+ switch (vchnl_req->vchnl_msg->op_code) {
+ case IRDMA_VCHNL_OP_GET_VER:
+ case IRDMA_VCHNL_OP_GET_HMC_FCN:
+ case IRDMA_VCHNL_OP_PUT_HMC_FCN:
+ if (resp_len != vchnl_req->parm_len)
+ return -EBADMSG;
+ break;
+ case IRDMA_VCHNL_OP_GET_RDMA_CAPS:
+ if (resp_len < IRDMA_VCHNL_OP_GET_RDMA_CAPS_MIN_SIZE)
+ return -EBADMSG;
+ break;
+ case IRDMA_VCHNL_OP_GET_REG_LAYOUT:
+ case IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP:
+ case IRDMA_VCHNL_OP_QUEUE_VECTOR_UNMAP:
+ case IRDMA_VCHNL_OP_ADD_VPORT:
+ case IRDMA_VCHNL_OP_DEL_VPORT:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void irdma_free_vchnl_req_msg(struct irdma_vchnl_req *vchnl_req)
+{
+ kfree(vchnl_req->vchnl_msg);
+}
+
+static int irdma_alloc_vchnl_req_msg(struct irdma_vchnl_req *vchnl_req,
+ struct irdma_vchnl_req_init_info *info)
+{
+ struct irdma_vchnl_op_buf *vchnl_msg;
+
+ vchnl_msg = kzalloc(IRDMA_VCHNL_MAX_MSG_SIZE, GFP_KERNEL);
+
+ if (!vchnl_msg)
+ return -ENOMEM;
+
+ vchnl_msg->op_ctx = (uintptr_t)vchnl_req;
+ vchnl_msg->buf_len = sizeof(*vchnl_msg) + info->req_parm_len;
+ if (info->req_parm_len)
+ memcpy(vchnl_msg->buf, info->req_parm, info->req_parm_len);
+ vchnl_msg->op_code = info->op_code;
+ vchnl_msg->op_ver = info->op_ver;
+
+ vchnl_req->vchnl_msg = vchnl_msg;
+ vchnl_req->parm = info->resp_parm;
+ vchnl_req->parm_len = info->resp_parm_len;
+
+ return 0;
+}
+
+static int irdma_vchnl_req_send_sync(struct irdma_sc_dev *dev,
+ struct irdma_vchnl_req_init_info *info)
+{
+ u16 resp_len = sizeof(dev->vc_recv_buf);
+ struct irdma_vchnl_req vchnl_req = {};
+ u16 msg_len;
+ u8 *msg;
+ int ret;
+
+ ret = irdma_alloc_vchnl_req_msg(&vchnl_req, info);
+ if (ret)
+ return ret;
+
+ msg_len = vchnl_req.vchnl_msg->buf_len;
+ msg = (u8 *)vchnl_req.vchnl_msg;
+
+ mutex_lock(&dev->vchnl_mutex);
+ ret = ig3rdma_vchnl_send_sync(dev, msg, msg_len, dev->vc_recv_buf,
+ &resp_len);
+ dev->vc_recv_len = resp_len;
+ if (ret)
+ goto exit;
+
+ ret = irdma_vchnl_req_get_resp(dev, &vchnl_req);
+exit:
+ mutex_unlock(&dev->vchnl_mutex);
+ ibdev_dbg(to_ibdev(dev),
+ "VIRT: virtual channel send %s caller: %pS ret=%d op=%u op_ver=%u req_len=%u parm_len=%u resp_len=%u\n",
+ !ret ? "SUCCEEDS" : "FAILS", __builtin_return_address(0),
+ ret, vchnl_req.vchnl_msg->op_code,
+ vchnl_req.vchnl_msg->op_ver, vchnl_req.vchnl_msg->buf_len,
+ vchnl_req.parm_len, vchnl_req.resp_len);
+ irdma_free_vchnl_req_msg(&vchnl_req);
+
+ return ret;
+}
+
+/**
+ * irdma_vchnl_req_get_reg_layout - Get Register Layout
+ * @dev: RDMA device pointer
+ */
+int irdma_vchnl_req_get_reg_layout(struct irdma_sc_dev *dev)
+{
+ u16 reg_idx, reg_id, tmp_reg_id, regfld_idx, regfld_id, tmp_regfld_id;
+ struct irdma_vchnl_reg_field_info *regfld_array = NULL;
+ u8 resp_buffer[IRDMA_REGMAP_RESP_BUF_SIZE] = {};
+ struct vchnl_regfld_map_elem *regfld_map_array;
+ struct irdma_vchnl_req_init_info info = {};
+ struct vchnl_reg_map_elem *reg_map_array;
+ struct irdma_vchnl_reg_info *reg_array;
+ u8 num_bits, shift_cnt;
+ u16 buf_len = 0;
+ u64 bitmask;
+ u32 rindex;
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_GET_REG_LAYOUT;
+ info.op_ver = IRDMA_VCHNL_OP_GET_REG_LAYOUT_V0;
+ info.resp_parm = resp_buffer;
+ info.resp_parm_len = sizeof(resp_buffer);
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+
+ if (ret)
+ return ret;
+
+ /* parse the response buffer and update reg info*/
+ /* Parse registers till invalid */
+ /* Parse register fields till invalid */
+ reg_array = (struct irdma_vchnl_reg_info *)resp_buffer;
+ for (rindex = 0; rindex < IRDMA_VCHNL_REG_COUNT; rindex++) {
+ buf_len += sizeof(struct irdma_vchnl_reg_info);
+ if (buf_len >= sizeof(resp_buffer))
+ return -ENOMEM;
+
+ regfld_array =
+ (struct irdma_vchnl_reg_field_info *)&reg_array[rindex + 1];
+ reg_id = reg_array[rindex].reg_id;
+ if (reg_id == IRDMA_VCHNL_REG_INV_ID)
+ break;
+
+ reg_id &= ~IRDMA_VCHNL_REG_PAGE_REL;
+ if (reg_id >= IRDMA_VCHNL_REG_COUNT)
+ return -EINVAL;
+
+ /* search regmap for register index in hw_regs.*/
+ reg_map_array = vchnl_reg_map;
+ do {
+ tmp_reg_id = reg_map_array->reg_id;
+ if (tmp_reg_id == reg_id)
+ break;
+
+ reg_map_array++;
+ } while (tmp_reg_id != IRDMA_VCHNL_REG_INV_ID);
+ if (tmp_reg_id != reg_id)
+ continue;
+
+ reg_idx = reg_map_array->reg_idx;
+
+ /* Page relative, DB Offset do not need bar offset */
+ if (reg_idx == IRDMA_DB_ADDR_OFFSET ||
+ (reg_array[rindex].reg_id & IRDMA_VCHNL_REG_PAGE_REL)) {
+ dev->hw_regs[reg_idx] =
+ (u32 __iomem *)(uintptr_t)reg_array[rindex].reg_offset;
+ continue;
+ }
+
+ /* Update the local HW struct */
+ dev->hw_regs[reg_idx] = ig3rdma_get_reg_addr(dev->hw,
+ reg_array[rindex].reg_offset);
+ if (!dev->hw_regs[reg_idx])
+ return -EINVAL;
+ }
+
+ if (!regfld_array)
+ return -ENOMEM;
+
+ /* set up doorbell variables using mapped DB page */
+ dev->wqe_alloc_db = dev->hw_regs[IRDMA_WQEALLOC];
+ dev->cq_arm_db = dev->hw_regs[IRDMA_CQARM];
+ dev->aeq_alloc_db = dev->hw_regs[IRDMA_AEQALLOC];
+ dev->cqp_db = dev->hw_regs[IRDMA_CQPDB];
+ dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK];
+
+ for (rindex = 0; rindex < IRDMA_VCHNL_REGFLD_COUNT; rindex++) {
+ buf_len += sizeof(struct irdma_vchnl_reg_field_info);
+ if ((buf_len - 1) > sizeof(resp_buffer))
+ break;
+
+ if (regfld_array[rindex].fld_id == IRDMA_VCHNL_REGFLD_INV_ID)
+ break;
+
+ regfld_id = regfld_array[rindex].fld_id;
+ regfld_map_array = vchnl_regfld_map;
+ do {
+ tmp_regfld_id = regfld_map_array->regfld_id;
+ if (tmp_regfld_id == regfld_id)
+ break;
+
+ regfld_map_array++;
+ } while (tmp_regfld_id != IRDMA_VCHNL_REGFLD_INV_ID);
+
+ if (tmp_regfld_id != regfld_id)
+ continue;
+
+ regfld_idx = regfld_map_array->regfld_idx;
+
+ num_bits = regfld_array[rindex].fld_bits;
+ shift_cnt = regfld_array[rindex].fld_shift;
+ if ((num_bits + shift_cnt > 64) || !num_bits) {
+ ibdev_dbg(to_ibdev(dev),
+ "ERR: Invalid field mask id %d bits %d shift %d",
+ regfld_id, num_bits, shift_cnt);
+
+ continue;
+ }
+
+ bitmask = (1ULL << num_bits) - 1;
+ dev->hw_masks[regfld_idx] = bitmask << shift_cnt;
+ dev->hw_shifts[regfld_idx] = shift_cnt;
+ }
+
+ return 0;
+}
+
+int irdma_vchnl_req_add_vport(struct irdma_sc_dev *dev, u16 vport_id,
+ u32 qp1_id, struct irdma_qos *qos)
+{
+ struct irdma_vchnl_resp_vport_info resp_vport = { 0 };
+ struct irdma_vchnl_req_vport_info req_vport = { 0 };
+ struct irdma_vchnl_req_init_info info = { 0 };
+ int ret, i;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_ADD_VPORT;
+ info.op_ver = IRDMA_VCHNL_OP_ADD_VPORT_V0;
+ req_vport.vport_id = vport_id;
+ req_vport.qp1_id = qp1_id;
+ info.req_parm_len = sizeof(req_vport);
+ info.req_parm = &req_vport;
+ info.resp_parm = &resp_vport;
+ info.resp_parm_len = sizeof(resp_vport);
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ qos[i].qs_handle = resp_vport.qs_handle[i];
+ qos[i].valid = true;
+ }
+
+ return 0;
+}
+
+int irdma_vchnl_req_del_vport(struct irdma_sc_dev *dev, u16 vport_id, u32 qp1_id)
+{
+ struct irdma_vchnl_req_init_info info = { 0 };
+ struct irdma_vchnl_req_vport_info req_vport = { 0 };
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_DEL_VPORT;
+ info.op_ver = IRDMA_VCHNL_OP_DEL_VPORT_V0;
+ req_vport.vport_id = vport_id;
+ req_vport.qp1_id = qp1_id;
+ info.req_parm_len = sizeof(req_vport);
+ info.req_parm = &req_vport;
+
+ return irdma_vchnl_req_send_sync(dev, &info);
+}
+
+/**
+ * irdma_vchnl_req_aeq_vec_map - Map AEQ to vector on this function
+ * @dev: RDMA device pointer
+ * @v_idx: vector index
+ */
+int irdma_vchnl_req_aeq_vec_map(struct irdma_sc_dev *dev, u32 v_idx)
+{
+ struct irdma_vchnl_req_init_info info = {};
+ struct irdma_vchnl_qvlist_info *qvl;
+ struct irdma_vchnl_qv_info *qv;
+ u16 qvl_size, num_vectors = 1;
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ qvl_size = struct_size(qvl, qv_info, num_vectors);
+
+ qvl = kzalloc(qvl_size, GFP_KERNEL);
+ if (!qvl)
+ return -ENOMEM;
+
+ qvl->num_vectors = 1;
+ qv = qvl->qv_info;
+
+ qv->ceq_idx = IRDMA_Q_INVALID_IDX;
+ qv->v_idx = v_idx;
+ qv->itr_idx = IRDMA_IDX_ITR0;
+
+ info.op_code = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP;
+ info.op_ver = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP_V0;
+ info.req_parm = qvl;
+ info.req_parm_len = qvl_size;
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+ kfree(qvl);
+
+ return ret;
+}
+
+/**
+ * irdma_vchnl_req_ceq_vec_map - Map CEQ to vector on this function
+ * @dev: RDMA device pointer
+ * @ceq_id: CEQ index
+ * @v_idx: vector index
+ */
+int irdma_vchnl_req_ceq_vec_map(struct irdma_sc_dev *dev, u16 ceq_id, u32 v_idx)
+{
+ struct irdma_vchnl_req_init_info info = {};
+ struct irdma_vchnl_qvlist_info *qvl;
+ struct irdma_vchnl_qv_info *qv;
+ u16 qvl_size, num_vectors = 1;
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ qvl_size = struct_size(qvl, qv_info, num_vectors);
+
+ qvl = kzalloc(qvl_size, GFP_KERNEL);
+ if (!qvl)
+ return -ENOMEM;
+
+ qvl->num_vectors = num_vectors;
+ qv = qvl->qv_info;
+
+ qv->aeq_idx = IRDMA_Q_INVALID_IDX;
+ qv->ceq_idx = ceq_id;
+ qv->v_idx = v_idx;
+ qv->itr_idx = IRDMA_IDX_ITR0;
+
+ info.op_code = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP;
+ info.op_ver = IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP_V0;
+ info.req_parm = qvl;
+ info.req_parm_len = qvl_size;
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+ kfree(qvl);
+
+ return ret;
+}
+
+/**
+ * irdma_vchnl_req_get_ver - Request Channel version
+ * @dev: RDMA device pointer
+ * @ver_req: Virtual channel version requested
+ * @ver_res: Virtual channel version response
+ */
+int irdma_vchnl_req_get_ver(struct irdma_sc_dev *dev, u16 ver_req, u32 *ver_res)
+{
+ struct irdma_vchnl_req_init_info info = {};
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_GET_VER;
+ info.op_ver = ver_req;
+ info.resp_parm = ver_res;
+ info.resp_parm_len = sizeof(*ver_res);
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+ if (ret)
+ return ret;
+
+ if (*ver_res < IRDMA_VCHNL_CHNL_VER_MIN) {
+ ibdev_dbg(to_ibdev(dev),
+ "VIRT: %s unsupported vchnl version 0x%0x\n",
+ __func__, *ver_res);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_vchnl_req_get_hmc_fcn - Request VF HMC Function
+ * @dev: RDMA device pointer
+ */
+int irdma_vchnl_req_get_hmc_fcn(struct irdma_sc_dev *dev)
+{
+ struct irdma_vchnl_req_hmc_info req_hmc = {};
+ struct irdma_vchnl_resp_hmc_info resp_hmc = {};
+ struct irdma_vchnl_req_init_info info = {};
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_GET_HMC_FCN;
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ info.op_ver = IRDMA_VCHNL_OP_GET_HMC_FCN_V2;
+ req_hmc.protocol_used = dev->protocol_used;
+ info.req_parm_len = sizeof(req_hmc);
+ info.req_parm = &req_hmc;
+ info.resp_parm = &resp_hmc;
+ info.resp_parm_len = sizeof(resp_hmc);
+ }
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+
+ if (ret)
+ return ret;
+
+ if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+ int i;
+
+ dev->hmc_fn_id = resp_hmc.hmc_func;
+
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ dev->qos[i].qs_handle = resp_hmc.qs_handle[i];
+ dev->qos[i].valid = true;
+ }
+ }
+ return 0;
+}
+
+/**
+ * irdma_vchnl_req_put_hmc_fcn - Free VF HMC Function
+ * @dev: RDMA device pointer
+ */
+int irdma_vchnl_req_put_hmc_fcn(struct irdma_sc_dev *dev)
+{
+ struct irdma_vchnl_req_init_info info = {};
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_PUT_HMC_FCN;
+ info.op_ver = IRDMA_VCHNL_OP_PUT_HMC_FCN_V0;
+
+ return irdma_vchnl_req_send_sync(dev, &info);
+}
+
+/**
+ * irdma_vchnl_req_get_caps - Request RDMA capabilities
+ * @dev: RDMA device pointer
+ */
+int irdma_vchnl_req_get_caps(struct irdma_sc_dev *dev)
+{
+ struct irdma_vchnl_req_init_info info = {};
+ int ret;
+
+ if (!dev->vchnl_up)
+ return -EBUSY;
+
+ info.op_code = IRDMA_VCHNL_OP_GET_RDMA_CAPS;
+ info.op_ver = IRDMA_VCHNL_OP_GET_RDMA_CAPS_V0;
+ info.resp_parm = &dev->vc_caps;
+ info.resp_parm_len = sizeof(dev->vc_caps);
+
+ ret = irdma_vchnl_req_send_sync(dev, &info);
+
+ if (ret)
+ return ret;
+
+ if (dev->vc_caps.hw_rev > IRDMA_GEN_MAX ||
+ dev->vc_caps.hw_rev < IRDMA_GEN_2) {
+ ibdev_dbg(to_ibdev(dev),
+ "ERR: %s unsupported hw_rev version 0x%0x\n",
+ __func__, dev->vc_caps.hw_rev);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+/**
+ * irdma_vchnl_req_get_resp - Receive the inbound vchnl response.
+ * @dev: Dev pointer
+ * @vchnl_req: Vchannel request
+ */
+int irdma_vchnl_req_get_resp(struct irdma_sc_dev *dev,
+ struct irdma_vchnl_req *vchnl_req)
+{
+ struct irdma_vchnl_resp_buf *vchnl_msg_resp =
+ (struct irdma_vchnl_resp_buf *)dev->vc_recv_buf;
+ u16 resp_len;
+ int ret;
+
+ if ((uintptr_t)vchnl_req != (uintptr_t)vchnl_msg_resp->op_ctx) {
+ ibdev_dbg(to_ibdev(dev),
+ "VIRT: error vchnl context value does not match\n");
+ return -EBADMSG;
+ }
+
+ resp_len = dev->vc_recv_len - sizeof(*vchnl_msg_resp);
+ resp_len = min(resp_len, vchnl_req->parm_len);
+
+ ret = irdma_vchnl_req_verify_resp(vchnl_req, resp_len);
+ if (ret)
+ return ret;
+
+ ret = (int)vchnl_msg_resp->op_ret;
+ if (ret)
+ return ret;
+
+ vchnl_req->resp_len = 0;
+ if (vchnl_req->parm_len && vchnl_req->parm && resp_len) {
+ memcpy(vchnl_req->parm, vchnl_msg_resp->buf, resp_len);
+ vchnl_req->resp_len = resp_len;
+ ibdev_dbg(to_ibdev(dev), "VIRT: Got response, data size %u\n",
+ resp_len);
+ }
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/irdma/virtchnl.h b/drivers/infiniband/hw/irdma/virtchnl.h
new file mode 100644
index 000000000000..aa955a9125bd
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/virtchnl.h
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* Copyright (c) 2015 - 2024 Intel Corporation */
+#ifndef IRDMA_VIRTCHNL_H
+#define IRDMA_VIRTCHNL_H
+
+#include "hmc.h"
+#include "irdma.h"
+
+/* IRDMA_VCHNL_CHNL_VER_V0 is for legacy hw, no longer supported. */
+#define IRDMA_VCHNL_CHNL_VER_V2 2
+#define IRDMA_VCHNL_CHNL_VER_MIN IRDMA_VCHNL_CHNL_VER_V2
+#define IRDMA_VCHNL_CHNL_VER_MAX IRDMA_VCHNL_CHNL_VER_V2
+#define IRDMA_VCHNL_OP_GET_HMC_FCN_V0 0
+#define IRDMA_VCHNL_OP_GET_HMC_FCN_V1 1
+#define IRDMA_VCHNL_OP_GET_HMC_FCN_V2 2
+#define IRDMA_VCHNL_OP_PUT_HMC_FCN_V0 0
+#define IRDMA_VCHNL_OP_GET_REG_LAYOUT_V0 0
+#define IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP_V0 0
+#define IRDMA_VCHNL_OP_QUEUE_VECTOR_UNMAP_V0 0
+#define IRDMA_VCHNL_OP_ADD_VPORT_V0 0
+#define IRDMA_VCHNL_OP_DEL_VPORT_V0 0
+#define IRDMA_VCHNL_OP_GET_RDMA_CAPS_V0 0
+#define IRDMA_VCHNL_OP_GET_RDMA_CAPS_MIN_SIZE 1
+
+#define IRDMA_VCHNL_REG_ID_CQPTAIL 0
+#define IRDMA_VCHNL_REG_ID_CQPDB 1
+#define IRDMA_VCHNL_REG_ID_CCQPSTATUS 2
+#define IRDMA_VCHNL_REG_ID_CCQPHIGH 3
+#define IRDMA_VCHNL_REG_ID_CCQPLOW 4
+#define IRDMA_VCHNL_REG_ID_CQARM 5
+#define IRDMA_VCHNL_REG_ID_CQACK 6
+#define IRDMA_VCHNL_REG_ID_AEQALLOC 7
+#define IRDMA_VCHNL_REG_ID_CQPERRCODES 8
+#define IRDMA_VCHNL_REG_ID_WQEALLOC 9
+#define IRDMA_VCHNL_REG_ID_IPCONFIG0 10
+#define IRDMA_VCHNL_REG_ID_DB_ADDR_OFFSET 11
+#define IRDMA_VCHNL_REG_ID_DYN_CTL 12
+#define IRDMA_VCHNL_REG_ID_AEQITRMASK 13
+#define IRDMA_VCHNL_REG_ID_CEQITRMASK 14
+#define IRDMA_VCHNL_REG_INV_ID 0xFFFF
+#define IRDMA_VCHNL_REG_PAGE_REL 0x8000
+
+#define IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CQP_OP_ERR 2
+#define IRDMA_VCHNL_REGFLD_ID_CCQPSTATUS_CCQP_DONE 5
+#define IRDMA_VCHNL_REGFLD_ID_CQPSQ_STAG_PDID 6
+#define IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CEQID 7
+#define IRDMA_VCHNL_REGFLD_ID_CQPSQ_CQ_CQID 8
+#define IRDMA_VCHNL_REGFLD_ID_COMMIT_FPM_CQCNT 9
+#define IRDMA_VCHNL_REGFLD_ID_UPESD_HMCN_ID 10
+#define IRDMA_VCHNL_REGFLD_INV_ID 0xFFFF
+
+#define IRDMA_VCHNL_RESP_MIN_SIZE (sizeof(struct irdma_vchnl_resp_buf))
+
+enum irdma_vchnl_ops {
+ IRDMA_VCHNL_OP_GET_VER = 0,
+ IRDMA_VCHNL_OP_GET_HMC_FCN = 1,
+ IRDMA_VCHNL_OP_PUT_HMC_FCN = 2,
+ IRDMA_VCHNL_OP_GET_REG_LAYOUT = 11,
+ IRDMA_VCHNL_OP_GET_RDMA_CAPS = 13,
+ IRDMA_VCHNL_OP_QUEUE_VECTOR_MAP = 14,
+ IRDMA_VCHNL_OP_QUEUE_VECTOR_UNMAP = 15,
+ IRDMA_VCHNL_OP_ADD_VPORT = 16,
+ IRDMA_VCHNL_OP_DEL_VPORT = 17,
+};
+
+struct irdma_vchnl_req_hmc_info {
+ u8 protocol_used;
+ u8 disable_qos;
+} __packed;
+
+struct irdma_vchnl_resp_hmc_info {
+ u16 hmc_func;
+ u16 qs_handle[IRDMA_MAX_USER_PRIORITY];
+} __packed;
+
+struct irdma_vchnl_qv_info {
+ u32 v_idx;
+ u16 ceq_idx;
+ u16 aeq_idx;
+ u8 itr_idx;
+};
+
+struct irdma_vchnl_qvlist_info {
+ u32 num_vectors;
+ struct irdma_vchnl_qv_info qv_info[];
+};
+
+struct irdma_vchnl_req_vport_info {
+ u16 vport_id;
+ u32 qp1_id;
+};
+
+struct irdma_vchnl_resp_vport_info {
+ u16 qs_handle[IRDMA_MAX_USER_PRIORITY];
+};
+
+struct irdma_vchnl_op_buf {
+ u16 op_code;
+ u16 op_ver;
+ u16 buf_len;
+ u16 rsvd;
+ u64 op_ctx;
+ u8 buf[];
+} __packed;
+
+struct irdma_vchnl_resp_buf {
+ u64 op_ctx;
+ u16 buf_len;
+ s16 op_ret;
+ u16 rsvd[2];
+ u8 buf[];
+} __packed;
+
+struct irdma_vchnl_rdma_caps {
+ u8 hw_rev;
+ u16 cqp_timeout_s;
+ u16 cqp_def_timeout_s;
+ u16 max_hw_push_len;
+} __packed;
+
+struct irdma_vchnl_init_info {
+ struct workqueue_struct *vchnl_wq;
+ enum irdma_vers hw_rev;
+ bool privileged;
+ bool is_pf;
+};
+
+struct irdma_vchnl_reg_info {
+ u32 reg_offset;
+ u16 field_cnt;
+ u16 reg_id; /* High bit of reg_id: bar or page relative */
+};
+
+struct irdma_vchnl_reg_field_info {
+ u8 fld_shift;
+ u8 fld_bits;
+ u16 fld_id;
+};
+
+struct irdma_vchnl_req {
+ struct irdma_vchnl_op_buf *vchnl_msg;
+ void *parm;
+ u32 vf_id;
+ u16 parm_len;
+ u16 resp_len;
+};
+
+struct irdma_vchnl_req_init_info {
+ void *req_parm;
+ void *resp_parm;
+ u16 req_parm_len;
+ u16 resp_parm_len;
+ u16 op_code;
+ u16 op_ver;
+} __packed;
+
+struct irdma_qos;
+
+int irdma_sc_vchnl_init(struct irdma_sc_dev *dev,
+ struct irdma_vchnl_init_info *info);
+int irdma_vchnl_req_get_ver(struct irdma_sc_dev *dev, u16 ver_req,
+ u32 *ver_res);
+int irdma_vchnl_req_get_hmc_fcn(struct irdma_sc_dev *dev);
+int irdma_vchnl_req_put_hmc_fcn(struct irdma_sc_dev *dev);
+int irdma_vchnl_req_get_caps(struct irdma_sc_dev *dev);
+int irdma_vchnl_req_get_resp(struct irdma_sc_dev *dev,
+ struct irdma_vchnl_req *vc_req);
+int irdma_vchnl_req_get_reg_layout(struct irdma_sc_dev *dev);
+int irdma_vchnl_req_aeq_vec_map(struct irdma_sc_dev *dev, u32 v_idx);
+int irdma_vchnl_req_ceq_vec_map(struct irdma_sc_dev *dev, u16 ceq_id,
+ u32 v_idx);
+int irdma_vchnl_req_add_vport(struct irdma_sc_dev *dev, u16 vport_id,
+ u32 qp1_id, struct irdma_qos *qos);
+int irdma_vchnl_req_del_vport(struct irdma_sc_dev *dev, u16 vport_id,
+ u32 qp1_id);
+#endif /* IRDMA_VIRTCHNL_H */
diff --git a/drivers/infiniband/hw/irdma/ws.c b/drivers/infiniband/hw/irdma/ws.c
new file mode 100644
index 000000000000..542bc0b1bb03
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ws.c
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2017 - 2021 Intel Corporation */
+#include "osdep.h"
+#include "hmc.h"
+#include "defs.h"
+#include "type.h"
+#include "protos.h"
+
+#include "ws.h"
+
+/**
+ * irdma_alloc_node - Allocate a WS node and init
+ * @vsi: vsi pointer
+ * @user_pri: user priority
+ * @node_type: Type of node, leaf or parent
+ * @parent: parent node pointer
+ */
+static struct irdma_ws_node *irdma_alloc_node(struct irdma_sc_vsi *vsi,
+ u8 user_pri,
+ enum irdma_ws_node_type node_type,
+ struct irdma_ws_node *parent)
+{
+ struct irdma_virt_mem ws_mem;
+ struct irdma_ws_node *node;
+ u16 node_index = 0;
+
+ ws_mem.size = sizeof(struct irdma_ws_node);
+ ws_mem.va = kzalloc(ws_mem.size, GFP_KERNEL);
+ if (!ws_mem.va)
+ return NULL;
+
+ if (parent) {
+ node_index = irdma_alloc_ws_node_id(vsi->dev);
+ if (node_index == IRDMA_WS_NODE_INVALID) {
+ kfree(ws_mem.va);
+ return NULL;
+ }
+ }
+
+ node = ws_mem.va;
+ node->index = node_index;
+ node->vsi_index = vsi->vsi_idx;
+ INIT_LIST_HEAD(&node->child_list_head);
+ if (node_type == WS_NODE_TYPE_LEAF) {
+ node->type_leaf = true;
+ node->traffic_class = vsi->qos[user_pri].traffic_class;
+ node->user_pri = user_pri;
+ node->rel_bw = vsi->qos[user_pri].rel_bw;
+ if (!node->rel_bw)
+ node->rel_bw = 1;
+
+ node->lan_qs_handle = vsi->qos[user_pri].lan_qos_handle;
+ node->prio_type = IRDMA_PRIO_WEIGHTED_RR;
+ } else {
+ node->rel_bw = 1;
+ node->prio_type = IRDMA_PRIO_WEIGHTED_RR;
+ node->enable = true;
+ }
+
+ node->parent = parent;
+
+ return node;
+}
+
+/**
+ * irdma_free_node - Free a WS node
+ * @vsi: VSI stricture of device
+ * @node: Pointer to node to free
+ */
+static void irdma_free_node(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *node)
+{
+ struct irdma_virt_mem ws_mem;
+
+ if (node->index)
+ irdma_free_ws_node_id(vsi->dev, node->index);
+
+ ws_mem.va = node;
+ ws_mem.size = sizeof(struct irdma_ws_node);
+ kfree(ws_mem.va);
+}
+
+/**
+ * irdma_ws_cqp_cmd - Post CQP work scheduler node cmd
+ * @vsi: vsi pointer
+ * @node: pointer to node
+ * @cmd: add, remove or modify
+ */
+static int irdma_ws_cqp_cmd(struct irdma_sc_vsi *vsi,
+ struct irdma_ws_node *node, u8 cmd)
+{
+ struct irdma_ws_node_info node_info = {};
+
+ node_info.id = node->index;
+ node_info.vsi = node->vsi_index;
+ if (node->parent)
+ node_info.parent_id = node->parent->index;
+ else
+ node_info.parent_id = node_info.id;
+
+ node_info.weight = node->rel_bw;
+ node_info.tc = node->traffic_class;
+ node_info.prio_type = node->prio_type;
+ node_info.type_leaf = node->type_leaf;
+ node_info.enable = node->enable;
+ if (irdma_cqp_ws_node_cmd(vsi->dev, cmd, &node_info)) {
+ ibdev_dbg(to_ibdev(vsi->dev), "WS: CQP WS CMD failed\n");
+ return -ENOMEM;
+ }
+
+ if (node->type_leaf && cmd == IRDMA_OP_WS_ADD_NODE) {
+ node->qs_handle = node_info.qs_handle;
+ vsi->qos[node->user_pri].qs_handle = node_info.qs_handle;
+ }
+
+ return 0;
+}
+
+/**
+ * ws_find_node - Find SC WS node based on VSI id or TC
+ * @parent: parent node of First VSI or TC node
+ * @match_val: value to match
+ * @type: match type VSI/TC
+ */
+static struct irdma_ws_node *ws_find_node(struct irdma_ws_node *parent,
+ u16 match_val,
+ enum irdma_ws_match_type type)
+{
+ struct irdma_ws_node *node;
+
+ switch (type) {
+ case WS_MATCH_TYPE_VSI:
+ list_for_each_entry(node, &parent->child_list_head, siblings) {
+ if (node->vsi_index == match_val)
+ return node;
+ }
+ break;
+ case WS_MATCH_TYPE_TC:
+ list_for_each_entry(node, &parent->child_list_head, siblings) {
+ if (node->traffic_class == match_val)
+ return node;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return NULL;
+}
+
+/**
+ * irdma_tc_in_use - Checks to see if a leaf node is in use
+ * @vsi: vsi pointer
+ * @user_pri: user priority
+ */
+static bool irdma_tc_in_use(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ int i;
+
+ mutex_lock(&vsi->qos[user_pri].qos_mutex);
+ if (!list_empty(&vsi->qos[user_pri].qplist)) {
+ mutex_unlock(&vsi->qos[user_pri].qos_mutex);
+ return true;
+ }
+
+ /* Check if the traffic class associated with the given user priority
+ * is in use by any other user priority. If so, nothing left to do
+ */
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ if (vsi->qos[i].traffic_class == vsi->qos[user_pri].traffic_class &&
+ !list_empty(&vsi->qos[i].qplist)) {
+ mutex_unlock(&vsi->qos[user_pri].qos_mutex);
+ return true;
+ }
+ }
+ mutex_unlock(&vsi->qos[user_pri].qos_mutex);
+
+ return false;
+}
+
+/**
+ * irdma_remove_leaf - Remove leaf node unconditionally
+ * @vsi: vsi pointer
+ * @user_pri: user priority
+ */
+static void irdma_remove_leaf(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ struct irdma_ws_node *ws_tree_root, *vsi_node, *tc_node;
+ int i;
+ u16 traffic_class;
+
+ traffic_class = vsi->qos[user_pri].traffic_class;
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++)
+ if (vsi->qos[i].traffic_class == traffic_class)
+ vsi->qos[i].valid = false;
+
+ ws_tree_root = vsi->dev->ws_tree_root;
+ if (!ws_tree_root)
+ return;
+
+ vsi_node = ws_find_node(ws_tree_root, vsi->vsi_idx,
+ WS_MATCH_TYPE_VSI);
+ if (!vsi_node)
+ return;
+
+ tc_node = ws_find_node(vsi_node,
+ vsi->qos[user_pri].traffic_class,
+ WS_MATCH_TYPE_TC);
+ if (!tc_node)
+ return;
+
+ irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_DELETE_NODE);
+ vsi->unregister_qset(vsi, tc_node);
+ list_del(&tc_node->siblings);
+ irdma_free_node(vsi, tc_node);
+ /* Check if VSI node can be freed */
+ if (list_empty(&vsi_node->child_list_head)) {
+ irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE);
+ list_del(&vsi_node->siblings);
+ irdma_free_node(vsi, vsi_node);
+ /* Free head node there are no remaining VSI nodes */
+ if (list_empty(&ws_tree_root->child_list_head)) {
+ irdma_ws_cqp_cmd(vsi, ws_tree_root,
+ IRDMA_OP_WS_DELETE_NODE);
+ irdma_free_node(vsi, ws_tree_root);
+ vsi->dev->ws_tree_root = NULL;
+ }
+ }
+}
+
+/**
+ * irdma_ws_add - Build work scheduler tree, set RDMA qs_handle
+ * @vsi: vsi pointer
+ * @user_pri: user priority
+ */
+int irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ struct irdma_ws_node *ws_tree_root;
+ struct irdma_ws_node *vsi_node;
+ struct irdma_ws_node *tc_node;
+ u16 traffic_class;
+ int ret = 0;
+ int i;
+
+ mutex_lock(&vsi->dev->ws_mutex);
+ if (vsi->tc_change_pending) {
+ ret = -EBUSY;
+ goto exit;
+ }
+
+ if (vsi->qos[user_pri].valid)
+ goto exit;
+
+ ws_tree_root = vsi->dev->ws_tree_root;
+ if (!ws_tree_root) {
+ ibdev_dbg(to_ibdev(vsi->dev), "WS: Creating root node\n");
+ ws_tree_root = irdma_alloc_node(vsi, user_pri,
+ WS_NODE_TYPE_PARENT, NULL);
+ if (!ws_tree_root) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ ret = irdma_ws_cqp_cmd(vsi, ws_tree_root, IRDMA_OP_WS_ADD_NODE);
+ if (ret) {
+ irdma_free_node(vsi, ws_tree_root);
+ goto exit;
+ }
+
+ vsi->dev->ws_tree_root = ws_tree_root;
+ }
+
+ /* Find a second tier node that matches the VSI */
+ vsi_node = ws_find_node(ws_tree_root, vsi->vsi_idx,
+ WS_MATCH_TYPE_VSI);
+
+ /* If VSI node doesn't exist, add one */
+ if (!vsi_node) {
+ ibdev_dbg(to_ibdev(vsi->dev),
+ "WS: Node not found matching VSI %d\n",
+ vsi->vsi_idx);
+ vsi_node = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_PARENT,
+ ws_tree_root);
+ if (!vsi_node) {
+ ret = -ENOMEM;
+ goto vsi_add_err;
+ }
+
+ ret = irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_ADD_NODE);
+ if (ret) {
+ irdma_free_node(vsi, vsi_node);
+ goto vsi_add_err;
+ }
+
+ list_add(&vsi_node->siblings, &ws_tree_root->child_list_head);
+ }
+
+ ibdev_dbg(to_ibdev(vsi->dev),
+ "WS: Using node %d which represents VSI %d\n",
+ vsi_node->index, vsi->vsi_idx);
+ traffic_class = vsi->qos[user_pri].traffic_class;
+ tc_node = ws_find_node(vsi_node, traffic_class,
+ WS_MATCH_TYPE_TC);
+ if (!tc_node) {
+ /* Add leaf node */
+ ibdev_dbg(to_ibdev(vsi->dev),
+ "WS: Node not found matching VSI %d and TC %d\n",
+ vsi->vsi_idx, traffic_class);
+ tc_node = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_LEAF,
+ vsi_node);
+ if (!tc_node) {
+ ret = -ENOMEM;
+ goto leaf_add_err;
+ }
+
+ ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_ADD_NODE);
+ if (ret) {
+ irdma_free_node(vsi, tc_node);
+ goto leaf_add_err;
+ }
+
+ list_add(&tc_node->siblings, &vsi_node->child_list_head);
+ /*
+ * callback to LAN to update the LAN tree with our node
+ */
+ ret = vsi->register_qset(vsi, tc_node);
+ if (ret)
+ goto reg_err;
+
+ tc_node->enable = true;
+ ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_MODIFY_NODE);
+ if (ret) {
+ vsi->unregister_qset(vsi, tc_node);
+ goto reg_err;
+ }
+ }
+ ibdev_dbg(to_ibdev(vsi->dev),
+ "WS: Using node %d which represents VSI %d TC %d\n",
+ tc_node->index, vsi->vsi_idx, traffic_class);
+ /*
+ * Iterate through other UPs and update the QS handle if they have
+ * a matching traffic class.
+ */
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+ if (vsi->qos[i].traffic_class == traffic_class) {
+ vsi->qos[i].qs_handle = tc_node->qs_handle;
+ vsi->qos[i].lan_qos_handle = tc_node->lan_qs_handle;
+ vsi->qos[i].l2_sched_node_id = tc_node->l2_sched_node_id;
+ vsi->qos[i].valid = true;
+ }
+ }
+ goto exit;
+
+reg_err:
+ irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_DELETE_NODE);
+ list_del(&tc_node->siblings);
+ irdma_free_node(vsi, tc_node);
+leaf_add_err:
+ if (list_empty(&vsi_node->child_list_head)) {
+ if (irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE))
+ goto exit;
+ list_del(&vsi_node->siblings);
+ irdma_free_node(vsi, vsi_node);
+ }
+
+vsi_add_err:
+ /* Free head node there are no remaining VSI nodes */
+ if (list_empty(&ws_tree_root->child_list_head)) {
+ irdma_ws_cqp_cmd(vsi, ws_tree_root, IRDMA_OP_WS_DELETE_NODE);
+ vsi->dev->ws_tree_root = NULL;
+ irdma_free_node(vsi, ws_tree_root);
+ }
+
+exit:
+ mutex_unlock(&vsi->dev->ws_mutex);
+ return ret;
+}
+
+/**
+ * irdma_ws_remove - Free WS scheduler node, update WS tree
+ * @vsi: vsi pointer
+ * @user_pri: user priority
+ */
+void irdma_ws_remove(struct irdma_sc_vsi *vsi, u8 user_pri)
+{
+ mutex_lock(&vsi->dev->ws_mutex);
+ if (irdma_tc_in_use(vsi, user_pri))
+ goto exit;
+ irdma_remove_leaf(vsi, user_pri);
+exit:
+ mutex_unlock(&vsi->dev->ws_mutex);
+}
+
+/**
+ * irdma_ws_reset - Reset entire WS tree
+ * @vsi: vsi pointer
+ */
+void irdma_ws_reset(struct irdma_sc_vsi *vsi)
+{
+ u8 i;
+
+ mutex_lock(&vsi->dev->ws_mutex);
+ for (i = 0; i < IRDMA_MAX_USER_PRIORITY; ++i)
+ irdma_remove_leaf(vsi, i);
+ mutex_unlock(&vsi->dev->ws_mutex);
+}
diff --git a/drivers/infiniband/hw/irdma/ws.h b/drivers/infiniband/hw/irdma/ws.h
new file mode 100644
index 000000000000..45490031a389
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/ws.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2015 - 2020 Intel Corporation */
+#ifndef IRDMA_WS_H
+#define IRDMA_WS_H
+
+#include "osdep.h"
+
+enum irdma_ws_node_type {
+ WS_NODE_TYPE_PARENT,
+ WS_NODE_TYPE_LEAF,
+};
+
+enum irdma_ws_match_type {
+ WS_MATCH_TYPE_VSI,
+ WS_MATCH_TYPE_TC,
+};
+
+struct irdma_ws_node {
+ struct list_head siblings;
+ struct list_head child_list_head;
+ struct irdma_ws_node *parent;
+ u64 lan_qs_handle; /* opaque handle used by LAN */
+ u32 l2_sched_node_id;
+ u16 index;
+ u16 qs_handle;
+ u16 vsi_index;
+ u8 traffic_class;
+ u8 user_pri;
+ u8 rel_bw;
+ u8 abstraction_layer; /* used for splitting a TC */
+ u8 prio_type;
+ bool type_leaf:1;
+ bool enable:1;
+};
+
+struct irdma_sc_vsi;
+int irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri);
+void irdma_ws_remove(struct irdma_sc_vsi *vsi, u8 user_pri);
+void irdma_ws_reset(struct irdma_sc_vsi *vsi);
+
+#endif /* IRDMA_WS_H */
diff --git a/drivers/infiniband/hw/mana/Kconfig b/drivers/infiniband/hw/mana/Kconfig
new file mode 100644
index 000000000000..546640657bac
--- /dev/null
+++ b/drivers/infiniband/hw/mana/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config MANA_INFINIBAND
+ tristate "Microsoft Azure Network Adapter support"
+ depends on NETDEVICES && ETHERNET && PCI && MICROSOFT_MANA
+ help
+ This driver provides low-level RDMA support for Microsoft Azure
+ Network Adapter (MANA). MANA supports RDMA features that can be used
+ for workloads (e.g. DPDK, MPI etc) that uses RDMA verbs to directly
+ access hardware from user-mode processes in Microsoft Azure cloud
+ environment.
diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile
new file mode 100644
index 000000000000..921c05e08b11
--- /dev/null
+++ b/drivers/infiniband/hw/mana/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o
+
+mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o counters.o
diff --git a/drivers/infiniband/hw/mana/ah.c b/drivers/infiniband/hw/mana/ah.c
new file mode 100644
index 000000000000..f56952eebbaa
--- /dev/null
+++ b/drivers/infiniband/hw/mana/ah.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
+ struct rdma_ah_attr *ah_attr = attr->ah_attr;
+ const struct ib_global_route *grh;
+ enum rdma_network_type ntype;
+
+ if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE ||
+ !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
+ return -EINVAL;
+
+ if (udata)
+ return -EINVAL;
+
+ ah->av = dma_pool_zalloc(mdev->av_pool, GFP_ATOMIC, &ah->dma_handle);
+ if (!ah->av)
+ return -ENOMEM;
+
+ grh = rdma_ah_read_grh(ah_attr);
+ ntype = rdma_gid_attr_network_type(grh->sgid_attr);
+
+ copy_in_reverse(ah->av->dest_mac, ah_attr->roce.dmac, ETH_ALEN);
+ ah->av->udp_src_port = rdma_flow_label_to_udp_sport(grh->flow_label);
+ ah->av->hop_limit = grh->hop_limit;
+ ah->av->dscp = (grh->traffic_class >> 2) & 0x3f;
+ ah->av->is_ipv6 = (ntype == RDMA_NETWORK_IPV6);
+
+ if (ah->av->is_ipv6) {
+ copy_in_reverse(ah->av->dest_ip, grh->dgid.raw, 16);
+ copy_in_reverse(ah->av->src_ip, grh->sgid_attr->gid.raw, 16);
+ } else {
+ ah->av->dest_ip[10] = 0xFF;
+ ah->av->dest_ip[11] = 0xFF;
+ copy_in_reverse(&ah->av->dest_ip[12], &grh->dgid.raw[12], 4);
+ copy_in_reverse(&ah->av->src_ip[12], &grh->sgid_attr->gid.raw[12], 4);
+ }
+
+ return 0;
+}
+
+int mana_ib_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
+
+ dma_pool_free(mdev->av_pool, ah->av, ah->dma_handle);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/counters.c b/drivers/infiniband/hw/mana/counters.c
new file mode 100644
index 000000000000..e964e74be48d
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "counters.h"
+
+static const struct rdma_stat_desc mana_ib_port_stats_desc[] = {
+ [MANA_IB_REQUESTER_TIMEOUT].name = "requester_timeout",
+ [MANA_IB_REQUESTER_OOS_NAK].name = "requester_oos_nak",
+ [MANA_IB_REQUESTER_RNR_NAK].name = "requester_rnr_nak",
+ [MANA_IB_RESPONDER_RNR_NAK].name = "responder_rnr_nak",
+ [MANA_IB_RESPONDER_OOS].name = "responder_oos",
+ [MANA_IB_RESPONDER_DUP_REQUEST].name = "responder_dup_request",
+ [MANA_IB_REQUESTER_IMPLICIT_NAK].name = "requester_implicit_nak",
+ [MANA_IB_REQUESTER_READRESP_PSN_MISMATCH].name = "requester_readresp_psn_mismatch",
+ [MANA_IB_NAK_INV_REQ].name = "nak_inv_req",
+ [MANA_IB_NAK_ACCESS_ERR].name = "nak_access_error",
+ [MANA_IB_NAK_OPP_ERR].name = "nak_opp_error",
+ [MANA_IB_NAK_INV_READ].name = "nak_inv_read",
+ [MANA_IB_RESPONDER_LOCAL_LEN_ERR].name = "responder_local_len_error",
+ [MANA_IB_REQUESTOR_LOCAL_PROT_ERR].name = "requestor_local_prot_error",
+ [MANA_IB_RESPONDER_REM_ACCESS_ERR].name = "responder_rem_access_error",
+ [MANA_IB_RESPONDER_LOCAL_QP_ERR].name = "responder_local_qp_error",
+ [MANA_IB_RESPONDER_MALFORMED_WQE].name = "responder_malformed_wqe",
+ [MANA_IB_GENERAL_HW_ERR].name = "general_hw_error",
+ [MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED].name = "requester_rnr_nak_retries_exceeded",
+ [MANA_IB_REQUESTER_RETRIES_EXCEEDED].name = "requester_retries_exceeded",
+ [MANA_IB_TOTAL_FATAL_ERR].name = "total_fatal_error",
+ [MANA_IB_RECEIVED_CNPS].name = "received_cnps",
+ [MANA_IB_NUM_QPS_CONGESTED].name = "num_qps_congested",
+ [MANA_IB_RATE_INC_EVENTS].name = "rate_inc_events",
+ [MANA_IB_NUM_QPS_RECOVERED].name = "num_qps_recovered",
+ [MANA_IB_CURRENT_RATE].name = "current_rate",
+ [MANA_IB_DUP_RX_REQ].name = "dup_rx_requests",
+ [MANA_IB_TX_BYTES].name = "tx_bytes",
+ [MANA_IB_RX_BYTES].name = "rx_bytes",
+ [MANA_IB_RX_SEND_REQ].name = "rx_send_requests",
+ [MANA_IB_RX_WRITE_REQ].name = "rx_write_requests",
+ [MANA_IB_RX_READ_REQ].name = "rx_read_requests",
+ [MANA_IB_TX_PKT].name = "tx_packets",
+ [MANA_IB_RX_PKT].name = "rx_packets",
+};
+
+static const struct rdma_stat_desc mana_ib_device_stats_desc[] = {
+ [MANA_IB_SENT_CNPS].name = "sent_cnps",
+ [MANA_IB_RECEIVED_ECNS].name = "received_ecns",
+ [MANA_IB_RECEIVED_CNP_COUNT].name = "received_cnp_count",
+ [MANA_IB_QP_CONGESTED_EVENTS].name = "qp_congested_events",
+ [MANA_IB_QP_RECOVERED_EVENTS].name = "qp_recovered_events",
+ [MANA_IB_DEV_RATE_INC_EVENTS].name = "rate_inc_events",
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_device_stats(struct ib_device *ibdev)
+{
+ return rdma_alloc_hw_stats_struct(mana_ib_device_stats_desc,
+ ARRAY_SIZE(mana_ib_device_stats_desc),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ return rdma_alloc_hw_stats_struct(mana_ib_port_stats_desc,
+ ARRAY_SIZE(mana_ib_port_stats_desc),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int mana_ib_get_hw_device_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats)
+{
+ struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
+ ib_dev);
+ struct mana_rnic_query_device_cntrs_resp resp = {};
+ struct mana_rnic_query_device_cntrs_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_DEVICE_COUNTERS,
+ sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+
+ err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req,
+ sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to query device counters err %d",
+ err);
+ return err;
+ }
+
+ stats->value[MANA_IB_SENT_CNPS] = resp.sent_cnps;
+ stats->value[MANA_IB_RECEIVED_ECNS] = resp.received_ecns;
+ stats->value[MANA_IB_RECEIVED_CNP_COUNT] = resp.received_cnp_count;
+ stats->value[MANA_IB_QP_CONGESTED_EVENTS] = resp.qp_congested_events;
+ stats->value[MANA_IB_QP_RECOVERED_EVENTS] = resp.qp_recovered_events;
+ stats->value[MANA_IB_DEV_RATE_INC_EVENTS] = resp.rate_inc_events;
+
+ return ARRAY_SIZE(mana_ib_device_stats_desc);
+}
+
+static int mana_ib_get_hw_port_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num)
+{
+ struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
+ ib_dev);
+ struct mana_rnic_query_vf_cntrs_resp resp = {};
+ struct mana_rnic_query_vf_cntrs_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_VF_COUNTERS,
+ sizeof(req), sizeof(resp));
+ req.hdr.resp.msg_version = GDMA_MESSAGE_V2;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+
+ err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req,
+ sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to query vf counters err %d",
+ err);
+ return err;
+ }
+
+ stats->value[MANA_IB_REQUESTER_TIMEOUT] = resp.requester_timeout;
+ stats->value[MANA_IB_REQUESTER_OOS_NAK] = resp.requester_oos_nak;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK] = resp.requester_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_RNR_NAK] = resp.responder_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_OOS] = resp.responder_oos;
+ stats->value[MANA_IB_RESPONDER_DUP_REQUEST] = resp.responder_dup_request;
+ stats->value[MANA_IB_REQUESTER_IMPLICIT_NAK] =
+ resp.requester_implicit_nak;
+ stats->value[MANA_IB_REQUESTER_READRESP_PSN_MISMATCH] =
+ resp.requester_readresp_psn_mismatch;
+ stats->value[MANA_IB_NAK_INV_REQ] = resp.nak_inv_req;
+ stats->value[MANA_IB_NAK_ACCESS_ERR] = resp.nak_access_err;
+ stats->value[MANA_IB_NAK_OPP_ERR] = resp.nak_opp_err;
+ stats->value[MANA_IB_NAK_INV_READ] = resp.nak_inv_read;
+ stats->value[MANA_IB_RESPONDER_LOCAL_LEN_ERR] =
+ resp.responder_local_len_err;
+ stats->value[MANA_IB_REQUESTOR_LOCAL_PROT_ERR] =
+ resp.requestor_local_prot_err;
+ stats->value[MANA_IB_RESPONDER_REM_ACCESS_ERR] =
+ resp.responder_rem_access_err;
+ stats->value[MANA_IB_RESPONDER_LOCAL_QP_ERR] =
+ resp.responder_local_qp_err;
+ stats->value[MANA_IB_RESPONDER_MALFORMED_WQE] =
+ resp.responder_malformed_wqe;
+ stats->value[MANA_IB_GENERAL_HW_ERR] = resp.general_hw_err;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED] =
+ resp.requester_rnr_nak_retries_exceeded;
+ stats->value[MANA_IB_REQUESTER_RETRIES_EXCEEDED] =
+ resp.requester_retries_exceeded;
+ stats->value[MANA_IB_TOTAL_FATAL_ERR] = resp.total_fatal_err;
+
+ stats->value[MANA_IB_RECEIVED_CNPS] = resp.received_cnps;
+ stats->value[MANA_IB_NUM_QPS_CONGESTED] = resp.num_qps_congested;
+ stats->value[MANA_IB_RATE_INC_EVENTS] = resp.rate_inc_events;
+ stats->value[MANA_IB_NUM_QPS_RECOVERED] = resp.num_qps_recovered;
+ stats->value[MANA_IB_CURRENT_RATE] = resp.current_rate;
+
+ stats->value[MANA_IB_DUP_RX_REQ] = resp.dup_rx_req;
+ stats->value[MANA_IB_TX_BYTES] = resp.tx_bytes;
+ stats->value[MANA_IB_RX_BYTES] = resp.rx_bytes;
+ stats->value[MANA_IB_RX_SEND_REQ] = resp.rx_send_req;
+ stats->value[MANA_IB_RX_WRITE_REQ] = resp.rx_write_req;
+ stats->value[MANA_IB_RX_READ_REQ] = resp.rx_read_req;
+ stats->value[MANA_IB_TX_PKT] = resp.tx_pkt;
+ stats->value[MANA_IB_RX_PKT] = resp.rx_pkt;
+
+ return ARRAY_SIZE(mana_ib_port_stats_desc);
+}
+
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ if (!port_num)
+ return mana_ib_get_hw_device_stats(ibdev, stats);
+ else
+ return mana_ib_get_hw_port_stats(ibdev, stats, port_num);
+}
diff --git a/drivers/infiniband/hw/mana/counters.h b/drivers/infiniband/hw/mana/counters.h
new file mode 100644
index 000000000000..f68e776bb41d
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024 Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _COUNTERS_H_
+#define _COUNTERS_H_
+
+#include "mana_ib.h"
+
+enum mana_ib_port_counters {
+ MANA_IB_REQUESTER_TIMEOUT,
+ MANA_IB_REQUESTER_OOS_NAK,
+ MANA_IB_REQUESTER_RNR_NAK,
+ MANA_IB_RESPONDER_RNR_NAK,
+ MANA_IB_RESPONDER_OOS,
+ MANA_IB_RESPONDER_DUP_REQUEST,
+ MANA_IB_REQUESTER_IMPLICIT_NAK,
+ MANA_IB_REQUESTER_READRESP_PSN_MISMATCH,
+ MANA_IB_NAK_INV_REQ,
+ MANA_IB_NAK_ACCESS_ERR,
+ MANA_IB_NAK_OPP_ERR,
+ MANA_IB_NAK_INV_READ,
+ MANA_IB_RESPONDER_LOCAL_LEN_ERR,
+ MANA_IB_REQUESTOR_LOCAL_PROT_ERR,
+ MANA_IB_RESPONDER_REM_ACCESS_ERR,
+ MANA_IB_RESPONDER_LOCAL_QP_ERR,
+ MANA_IB_RESPONDER_MALFORMED_WQE,
+ MANA_IB_GENERAL_HW_ERR,
+ MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED,
+ MANA_IB_REQUESTER_RETRIES_EXCEEDED,
+ MANA_IB_TOTAL_FATAL_ERR,
+ MANA_IB_RECEIVED_CNPS,
+ MANA_IB_NUM_QPS_CONGESTED,
+ MANA_IB_RATE_INC_EVENTS,
+ MANA_IB_NUM_QPS_RECOVERED,
+ MANA_IB_CURRENT_RATE,
+ MANA_IB_DUP_RX_REQ,
+ MANA_IB_TX_BYTES,
+ MANA_IB_RX_BYTES,
+ MANA_IB_RX_SEND_REQ,
+ MANA_IB_RX_WRITE_REQ,
+ MANA_IB_RX_READ_REQ,
+ MANA_IB_TX_PKT,
+ MANA_IB_RX_PKT,
+};
+
+enum mana_ib_device_counters {
+ MANA_IB_SENT_CNPS,
+ MANA_IB_RECEIVED_ECNS,
+ MANA_IB_RECEIVED_CNP_COUNT,
+ MANA_IB_QP_CONGESTED_EVENTS,
+ MANA_IB_QP_RECOVERED_EVENTS,
+ MANA_IB_DEV_RATE_INC_EVENTS,
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num);
+struct rdma_hw_stats *mana_ib_alloc_hw_device_stats(struct ib_device *ibdev);
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index);
+#endif /* _COUNTERS_H_ */
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
new file mode 100644
index 000000000000..1becc8779123
--- /dev/null
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct mana_ib_create_cq_resp resp = {};
+ struct mana_ib_ucontext *mana_ucontext;
+ struct ib_device *ibdev = ibcq->device;
+ struct mana_ib_create_cq ucmd = {};
+ struct mana_ib_dev *mdev;
+ bool is_rnic_cq;
+ u32 doorbell;
+ u32 buf_size;
+ int err;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
+ cq->cq_handle = INVALID_MANA_HANDLE;
+
+ if (udata) {
+ if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
+ return -EINVAL;
+
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
+ return err;
+ }
+
+ is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ);
+
+ if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
+ attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
+ ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
+ return -EINVAL;
+ }
+
+ cq->cqe = attr->cqe;
+ err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
+ &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
+ return err;
+ }
+
+ mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
+ ibucontext);
+ doorbell = mana_ucontext->doorbell;
+ } else {
+ is_rnic_cq = true;
+ buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
+ cq->cqe = buf_size / COMP_ENTRY_SIZE;
+ err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
+ return err;
+ }
+ doorbell = mdev->gdma_dev->doorbell;
+ }
+
+ if (is_rnic_cq) {
+ err = mana_ib_gd_create_cq(mdev, cq, doorbell);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create RNIC cq, %d\n", err);
+ goto err_destroy_queue;
+ }
+
+ err = mana_ib_install_cq_cb(mdev, cq);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to install cq callback, %d\n", err);
+ goto err_destroy_rnic_cq;
+ }
+ }
+
+ if (udata) {
+ resp.cqid = cq->queue.id;
+ err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+ goto err_remove_cq_cb;
+ }
+ }
+
+ spin_lock_init(&cq->cq_lock);
+ INIT_LIST_HEAD(&cq->list_send_qp);
+ INIT_LIST_HEAD(&cq->list_recv_qp);
+
+ return 0;
+
+err_remove_cq_cb:
+ mana_ib_remove_cq_cb(mdev, cq);
+err_destroy_rnic_cq:
+ mana_ib_gd_destroy_cq(mdev, cq);
+err_destroy_queue:
+ mana_ib_destroy_queue(mdev, &cq->queue);
+
+ return err;
+}
+
+int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct ib_device *ibdev = ibcq->device;
+ struct mana_ib_dev *mdev;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ mana_ib_remove_cq_cb(mdev, cq);
+
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_cq(mdev, cq);
+
+ mana_ib_destroy_queue(mdev, &cq->queue);
+
+ return 0;
+}
+
+static void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq)
+{
+ struct mana_ib_cq *cq = ctx;
+
+ if (cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
+int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct gdma_queue *gdma_cq;
+
+ if (cq->queue.id >= gc->max_num_cqs)
+ return -EINVAL;
+ /* Create CQ table entry */
+ WARN_ON(gc->cq_table[cq->queue.id]);
+ if (cq->queue.kmem)
+ gdma_cq = cq->queue.kmem;
+ else
+ gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
+ if (!gdma_cq)
+ return -ENOMEM;
+
+ gdma_cq->cq.context = cq;
+ gdma_cq->type = GDMA_CQ;
+ gdma_cq->cq.callback = mana_ib_cq_handler;
+ gdma_cq->id = cq->queue.id;
+ gc->cq_table[cq->queue.id] = gdma_cq;
+ return 0;
+}
+
+void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+
+ if (cq->queue.id >= gc->max_num_cqs || cq->queue.id == INVALID_QUEUE_ID)
+ return;
+
+ if (cq->queue.kmem)
+ /* Then it will be cleaned and removed by the mana */
+ return;
+
+ kfree(gc->cq_table[cq->queue.id]);
+ gc->cq_table[cq->queue.id] = NULL;
+}
+
+int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct gdma_queue *gdma_cq = cq->queue.kmem;
+
+ if (!gdma_cq)
+ return -EINVAL;
+
+ mana_gd_ring_cq(gdma_cq, SET_ARM_BIT);
+ return 0;
+}
+
+static inline void handle_ud_sq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
+{
+ struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
+ struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
+ struct ud_sq_shadow_wqe *shadow_wqe;
+
+ shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq);
+ if (!shadow_wqe)
+ return;
+
+ shadow_wqe->header.error_code = rdma_cqe->ud_send.vendor_error;
+
+ wq->tail += shadow_wqe->header.posted_wqe_size;
+ shadow_queue_advance_next_to_complete(&qp->shadow_sq);
+}
+
+static inline void handle_ud_rq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
+{
+ struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
+ struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+ struct ud_rq_shadow_wqe *shadow_wqe;
+
+ shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_rq);
+ if (!shadow_wqe)
+ return;
+
+ shadow_wqe->byte_len = rdma_cqe->ud_recv.msg_len;
+ shadow_wqe->src_qpn = rdma_cqe->ud_recv.src_qpn;
+ shadow_wqe->header.error_code = IB_WC_SUCCESS;
+
+ wq->tail += shadow_wqe->header.posted_wqe_size;
+ shadow_queue_advance_next_to_complete(&qp->shadow_rq);
+}
+
+static void mana_handle_cqe(struct mana_ib_dev *mdev, struct gdma_comp *cqe)
+{
+ struct mana_ib_qp *qp = mana_get_qp_ref(mdev, cqe->wq_num, cqe->is_sq);
+
+ if (!qp)
+ return;
+
+ if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) {
+ if (cqe->is_sq)
+ handle_ud_sq_cqe(qp, cqe);
+ else
+ handle_ud_rq_cqe(qp, cqe);
+ }
+
+ mana_put_qp_ref(qp);
+}
+
+static void fill_verbs_from_shadow_wqe(struct mana_ib_qp *qp, struct ib_wc *wc,
+ const struct shadow_wqe_header *shadow_wqe)
+{
+ const struct ud_rq_shadow_wqe *ud_wqe = (const struct ud_rq_shadow_wqe *)shadow_wqe;
+
+ wc->wr_id = shadow_wqe->wr_id;
+ wc->status = shadow_wqe->error_code;
+ wc->opcode = shadow_wqe->opcode;
+ wc->vendor_err = shadow_wqe->error_code;
+ wc->wc_flags = 0;
+ wc->qp = &qp->ibqp;
+ wc->pkey_index = 0;
+
+ if (shadow_wqe->opcode == IB_WC_RECV) {
+ wc->byte_len = ud_wqe->byte_len;
+ wc->src_qp = ud_wqe->src_qpn;
+ wc->wc_flags |= IB_WC_GRH;
+ }
+}
+
+static int mana_process_completions(struct mana_ib_cq *cq, int nwc, struct ib_wc *wc)
+{
+ struct shadow_wqe_header *shadow_wqe;
+ struct mana_ib_qp *qp;
+ int wc_index = 0;
+
+ /* process send shadow queue completions */
+ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
+ while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_sq))
+ != NULL) {
+ if (wc_index >= nwc)
+ goto out;
+
+ fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
+ shadow_queue_advance_consumer(&qp->shadow_sq);
+ wc_index++;
+ }
+ }
+
+ /* process recv shadow queue completions */
+ list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
+ while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_rq))
+ != NULL) {
+ if (wc_index >= nwc)
+ goto out;
+
+ fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
+ shadow_queue_advance_consumer(&qp->shadow_rq);
+ wc_index++;
+ }
+ }
+
+out:
+ return wc_index;
+}
+
+void mana_drain_gsi_sqs(struct mana_ib_dev *mdev)
+{
+ struct mana_ib_qp *qp = mana_get_qp_ref(mdev, MANA_GSI_QPN, false);
+ struct ud_sq_shadow_wqe *shadow_wqe;
+ struct mana_ib_cq *cq;
+ unsigned long flags;
+
+ if (!qp)
+ return;
+
+ cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ while ((shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq))
+ != NULL) {
+ shadow_wqe->header.error_code = IB_WC_GENERAL_ERR;
+ shadow_queue_advance_next_to_complete(&qp->shadow_sq);
+ }
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ if (cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+
+ mana_put_qp_ref(qp);
+}
+
+int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct mana_ib_dev *mdev = container_of(ibcq->device, struct mana_ib_dev, ib_dev);
+ struct gdma_queue *queue = cq->queue.kmem;
+ struct gdma_comp gdma_cqe;
+ unsigned long flags;
+ int num_polled = 0;
+ int comp_read, i;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ for (i = 0; i < num_entries; i++) {
+ comp_read = mana_gd_poll_cq(queue, &gdma_cqe, 1);
+ if (comp_read < 1)
+ break;
+ mana_handle_cqe(mdev, &gdma_cqe);
+ }
+
+ num_polled = mana_process_completions(cq, num_entries, wc);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ return num_polled;
+}
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
new file mode 100644
index 000000000000..bdeddb642b87
--- /dev/null
+++ b/drivers/infiniband/hw/mana/device.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+#include <net/mana/mana_auxiliary.h>
+#include <net/addrconf.h>
+
+MODULE_DESCRIPTION("Microsoft Azure Network Adapter IB driver");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS("NET_MANA");
+
+static const struct ib_device_ops mana_ib_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_MANA,
+ .uverbs_abi_ver = MANA_IB_UVERBS_ABI_VERSION,
+
+ .add_gid = mana_ib_gd_add_gid,
+ .alloc_pd = mana_ib_alloc_pd,
+ .alloc_ucontext = mana_ib_alloc_ucontext,
+ .create_ah = mana_ib_create_ah,
+ .create_cq = mana_ib_create_cq,
+ .create_qp = mana_ib_create_qp,
+ .create_rwq_ind_table = mana_ib_create_rwq_ind_table,
+ .create_wq = mana_ib_create_wq,
+ .dealloc_pd = mana_ib_dealloc_pd,
+ .dealloc_ucontext = mana_ib_dealloc_ucontext,
+ .del_gid = mana_ib_gd_del_gid,
+ .dereg_mr = mana_ib_dereg_mr,
+ .destroy_ah = mana_ib_destroy_ah,
+ .destroy_cq = mana_ib_destroy_cq,
+ .destroy_qp = mana_ib_destroy_qp,
+ .destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table,
+ .destroy_wq = mana_ib_destroy_wq,
+ .disassociate_ucontext = mana_ib_disassociate_ucontext,
+ .get_dma_mr = mana_ib_get_dma_mr,
+ .get_link_layer = mana_ib_get_link_layer,
+ .get_port_immutable = mana_ib_get_port_immutable,
+ .mmap = mana_ib_mmap,
+ .modify_qp = mana_ib_modify_qp,
+ .modify_wq = mana_ib_modify_wq,
+ .poll_cq = mana_ib_poll_cq,
+ .post_recv = mana_ib_post_recv,
+ .post_send = mana_ib_post_send,
+ .query_device = mana_ib_query_device,
+ .query_gid = mana_ib_query_gid,
+ .query_pkey = mana_ib_query_pkey,
+ .query_port = mana_ib_query_port,
+ .reg_user_mr = mana_ib_reg_user_mr,
+ .reg_user_mr_dmabuf = mana_ib_reg_user_mr_dmabuf,
+ .req_notify_cq = mana_ib_arm_cq,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, mana_ib_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, mana_ib_ucontext, ibucontext),
+ INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mana_ib_rwq_ind_table,
+ ib_ind_table),
+};
+
+static const struct ib_device_ops mana_ib_stats_ops = {
+ .alloc_hw_port_stats = mana_ib_alloc_hw_port_stats,
+ .get_hw_stats = mana_ib_get_hw_stats,
+};
+
+static const struct ib_device_ops mana_ib_device_stats_ops = {
+ .alloc_hw_device_stats = mana_ib_alloc_hw_device_stats,
+};
+
+static int mana_ib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct mana_ib_dev *dev = container_of(this, struct mana_ib_dev, nb);
+ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+ struct gdma_context *gc = dev->gdma_dev->gdma_context;
+ struct mana_context *mc = gc->mana.driver_data;
+ struct net_device *ndev;
+ int i;
+
+ /* Only process events from our parent device */
+ for (i = 0; i < dev->ib_dev.phys_port_cnt; i++)
+ if (event_dev == mc->ports[i]) {
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ ndev = mana_get_primary_netdev(mc, i, &dev->dev_tracker);
+ /*
+ * RDMA core will setup GID based on updated netdev.
+ * It's not possible to race with the core as rtnl lock is being
+ * held.
+ */
+ ib_device_set_netdev(&dev->ib_dev, ndev, i + 1);
+
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ if (ndev)
+ netdev_put(ndev, &dev->dev_tracker);
+
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+ }
+ return NOTIFY_DONE;
+}
+
+static int mana_ib_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct mana_adev *madev = container_of(adev, struct mana_adev, adev);
+ struct gdma_context *gc = madev->mdev->gdma_context;
+ struct mana_context *mc = gc->mana.driver_data;
+ struct gdma_dev *mdev = madev->mdev;
+ struct net_device *ndev;
+ struct mana_ib_dev *dev;
+ u8 mac_addr[ETH_ALEN];
+ int ret, i;
+
+ dev = ib_alloc_device(mana_ib_dev, ib_dev);
+ if (!dev)
+ return -ENOMEM;
+
+ ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
+ dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+ dev->ib_dev.num_comp_vectors = gc->max_num_queues;
+ dev->ib_dev.dev.parent = gc->dev;
+ dev->gdma_dev = mdev;
+ xa_init_flags(&dev->qp_table_wq, XA_FLAGS_LOCK_IRQ);
+
+ if (mana_ib_is_rnic(dev)) {
+ dev->ib_dev.phys_port_cnt = 1;
+ addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, mc->ports[0]->dev_addr);
+ ret = mana_ib_gd_query_adapter_caps(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", ret);
+ goto free_ib_device;
+ }
+
+ ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
+ if (dev->adapter_caps.feature_flags & MANA_IB_FEATURE_DEV_COUNTERS_SUPPORT)
+ ib_set_device_ops(&dev->ib_dev, &mana_ib_device_stats_ops);
+
+ ret = mana_ib_create_eqs(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
+ goto free_ib_device;
+ }
+
+ ret = mana_ib_gd_create_rnic_adapter(dev);
+ if (ret)
+ goto destroy_eqs;
+
+ if (dev->adapter_caps.feature_flags & MANA_IB_FEATURE_MULTI_PORTS_SUPPORT)
+ dev->ib_dev.phys_port_cnt = mc->num_ports;
+
+ for (i = 0; i < dev->ib_dev.phys_port_cnt; i++) {
+ ndev = mana_get_primary_netdev(mc, i, &dev->dev_tracker);
+ if (!ndev) {
+ ret = -ENODEV;
+ ibdev_err(&dev->ib_dev,
+ "Failed to get netdev for IB port %d", i + 1);
+ goto destroy_rnic;
+ }
+ ether_addr_copy(mac_addr, ndev->dev_addr);
+ ret = ib_device_set_netdev(&dev->ib_dev, ndev, i + 1);
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ netdev_put(ndev, &dev->dev_tracker);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
+ goto destroy_rnic;
+ }
+ ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d", ret);
+ goto destroy_rnic;
+ }
+ }
+ dev->nb.notifier_call = mana_ib_netdev_event;
+ ret = register_netdevice_notifier(&dev->nb);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d", ret);
+ goto destroy_rnic;
+ }
+ } else {
+ dev->ib_dev.phys_port_cnt = mc->num_ports;
+ ret = mana_eth_query_adapter_caps(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to query ETH device caps, ret %d", ret);
+ goto free_ib_device;
+ }
+ }
+
+ dev->av_pool = dma_pool_create("mana_ib_av", gc->dev, MANA_AV_BUFFER_SIZE,
+ MANA_AV_BUFFER_SIZE, 0);
+ if (!dev->av_pool) {
+ ret = -ENOMEM;
+ goto deregister_net_notifier;
+ }
+
+ ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
+ mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
+
+ ret = ib_register_device(&dev->ib_dev, mana_ib_is_rnic(dev) ? "mana_%d" : "manae_%d",
+ gc->dev);
+ if (ret)
+ goto deallocate_pool;
+
+ dev_set_drvdata(&adev->dev, dev);
+
+ return 0;
+
+deallocate_pool:
+ dma_pool_destroy(dev->av_pool);
+deregister_net_notifier:
+ if (mana_ib_is_rnic(dev))
+ unregister_netdevice_notifier(&dev->nb);
+destroy_rnic:
+ if (mana_ib_is_rnic(dev))
+ mana_ib_gd_destroy_rnic_adapter(dev);
+destroy_eqs:
+ if (mana_ib_is_rnic(dev))
+ mana_ib_destroy_eqs(dev);
+free_ib_device:
+ xa_destroy(&dev->qp_table_wq);
+ ib_dealloc_device(&dev->ib_dev);
+ return ret;
+}
+
+static void mana_ib_remove(struct auxiliary_device *adev)
+{
+ struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
+
+ if (mana_ib_is_rnic(dev))
+ mana_drain_gsi_sqs(dev);
+
+ ib_unregister_device(&dev->ib_dev);
+ dma_pool_destroy(dev->av_pool);
+ if (mana_ib_is_rnic(dev)) {
+ unregister_netdevice_notifier(&dev->nb);
+ mana_ib_gd_destroy_rnic_adapter(dev);
+ mana_ib_destroy_eqs(dev);
+ }
+ xa_destroy(&dev->qp_table_wq);
+ ib_dealloc_device(&dev->ib_dev);
+}
+
+static const struct auxiliary_device_id mana_id_table[] = {
+ { .name = "mana.rdma", },
+ { .name = "mana.eth", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mana_id_table);
+
+static struct auxiliary_driver mana_driver = {
+ .probe = mana_ib_probe,
+ .remove = mana_ib_remove,
+ .id_table = mana_id_table,
+};
+
+module_auxiliary_driver(mana_driver);
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
new file mode 100644
index 000000000000..fac159f7128d
--- /dev/null
+++ b/drivers/infiniband/hw/mana/main.c
@@ -0,0 +1,1134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+#include "linux/pci.h"
+
+void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
+ u32 port)
+{
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+
+ ndev = mana_ib_get_netdev(&dev->ib_dev, port);
+ mpc = netdev_priv(ndev);
+
+ mutex_lock(&pd->vport_mutex);
+
+ pd->vport_use_count--;
+ WARN_ON(pd->vport_use_count < 0);
+
+ if (!pd->vport_use_count)
+ mana_uncfg_vport(mpc);
+
+ mutex_unlock(&pd->vport_mutex);
+}
+
+int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd,
+ u32 doorbell_id)
+{
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+ int err;
+
+ ndev = mana_ib_get_netdev(&dev->ib_dev, port);
+ mpc = netdev_priv(ndev);
+
+ mutex_lock(&pd->vport_mutex);
+
+ pd->vport_use_count++;
+ if (pd->vport_use_count > 1) {
+ ibdev_dbg(&dev->ib_dev,
+ "Skip as this PD is already configured vport\n");
+ mutex_unlock(&pd->vport_mutex);
+ return 0;
+ }
+
+ err = mana_cfg_vport(mpc, pd->pdn, doorbell_id);
+ if (err) {
+ pd->vport_use_count--;
+ mutex_unlock(&pd->vport_mutex);
+
+ ibdev_dbg(&dev->ib_dev, "Failed to configure vPort %d\n", err);
+ return err;
+ }
+
+ mutex_unlock(&pd->vport_mutex);
+
+ pd->tx_shortform_allowed = mpc->tx_shortform_allowed;
+ pd->tx_vp_offset = mpc->tx_vp_offset;
+
+ ibdev_dbg(&dev->ib_dev, "vport handle %llx pdid %x doorbell_id %x\n",
+ mpc->port_handle, pd->pdn, doorbell_id);
+
+ return 0;
+}
+
+int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct ib_device *ibdev = ibpd->device;
+ struct gdma_create_pd_resp resp = {};
+ struct gdma_create_pd_req req = {};
+ enum gdma_pd_flags flags = 0;
+ struct mana_ib_dev *dev;
+ struct gdma_context *gc;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gc = mdev_to_gc(dev);
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
+ sizeof(resp));
+
+ if (!udata)
+ flags |= GDMA_PD_FLAG_ALLOW_GPA_MR;
+
+ req.flags = flags;
+ err = mana_gd_send_request(gc, sizeof(req), &req,
+ sizeof(resp), &resp);
+
+ if (err || resp.hdr.status) {
+ ibdev_dbg(&dev->ib_dev,
+ "Failed to get pd_id err %d status %u\n", err,
+ resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+
+ return err;
+ }
+
+ pd->pd_handle = resp.pd_handle;
+ pd->pdn = resp.pd_id;
+ ibdev_dbg(&dev->ib_dev, "pd_handle 0x%llx pd_id %d\n",
+ pd->pd_handle, pd->pdn);
+
+ mutex_init(&pd->vport_mutex);
+ pd->vport_use_count = 0;
+ return 0;
+}
+
+int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct ib_device *ibdev = ibpd->device;
+ struct gdma_destory_pd_resp resp = {};
+ struct gdma_destroy_pd_req req = {};
+ struct mana_ib_dev *dev;
+ struct gdma_context *gc;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gc = mdev_to_gc(dev);
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req),
+ sizeof(resp));
+
+ req.pd_handle = pd->pd_handle;
+ err = mana_gd_send_request(gc, sizeof(req), &req,
+ sizeof(resp), &resp);
+
+ if (err || resp.hdr.status) {
+ ibdev_dbg(&dev->ib_dev,
+ "Failed to destroy pd_handle 0x%llx err %d status %u",
+ pd->pd_handle, err, resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+ }
+
+ return err;
+}
+
+static int mana_gd_destroy_doorbell_page(struct gdma_context *gc,
+ int doorbell_page)
+{
+ struct gdma_destroy_resource_range_req req = {};
+ struct gdma_resp_hdr resp = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_RESOURCE_RANGE,
+ sizeof(req), sizeof(resp));
+
+ req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE;
+ req.num_resources = 1;
+ req.allocated_resources = doorbell_page;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err || resp.status) {
+ dev_err(gc->dev,
+ "Failed to destroy doorbell page: ret %d, 0x%x\n",
+ err, resp.status);
+ return err ?: -EPROTO;
+ }
+
+ return 0;
+}
+
+static int mana_gd_allocate_doorbell_page(struct gdma_context *gc,
+ int *doorbell_page)
+{
+ struct gdma_allocate_resource_range_req req = {};
+ struct gdma_allocate_resource_range_resp resp = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_ALLOCATE_RESOURCE_RANGE,
+ sizeof(req), sizeof(resp));
+
+ req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE;
+ req.num_resources = 1;
+ req.alignment = PAGE_SIZE / MANA_PAGE_SIZE;
+
+ /* Have GDMA start searching from 0 */
+ req.allocated_resources = 0;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err || resp.hdr.status) {
+ dev_err(gc->dev,
+ "Failed to allocate doorbell page: ret %d, 0x%x\n",
+ err, resp.hdr.status);
+ return err ?: -EPROTO;
+ }
+
+ *doorbell_page = resp.allocated_resources;
+
+ return 0;
+}
+
+int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
+ struct ib_udata *udata)
+{
+ struct mana_ib_ucontext *ucontext =
+ container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
+ struct ib_device *ibdev = ibcontext->device;
+ struct mana_ib_dev *mdev;
+ struct gdma_context *gc;
+ int doorbell_page;
+ int ret;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gc = mdev_to_gc(mdev);
+
+ /* Allocate a doorbell page index */
+ ret = mana_gd_allocate_doorbell_page(gc, &doorbell_page);
+ if (ret) {
+ ibdev_dbg(ibdev, "Failed to allocate doorbell page %d\n", ret);
+ return ret;
+ }
+
+ ibdev_dbg(ibdev, "Doorbell page allocated %d\n", doorbell_page);
+
+ ucontext->doorbell = doorbell_page;
+
+ return 0;
+}
+
+void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct mana_ib_ucontext *mana_ucontext =
+ container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
+ struct ib_device *ibdev = ibcontext->device;
+ struct mana_ib_dev *mdev;
+ struct gdma_context *gc;
+ int ret;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gc = mdev_to_gc(mdev);
+
+ ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
+ if (ret)
+ ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
+}
+
+int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
+ struct mana_ib_queue *queue)
+{
+ struct gdma_queue_spec spec = {};
+ int err;
+
+ queue->id = INVALID_QUEUE_ID;
+ queue->gdma_region = GDMA_INVALID_DMA_REGION;
+ spec.type = type;
+ spec.monitor_avl_buf = false;
+ spec.queue_size = size;
+ err = mana_gd_create_mana_wq_cq(mdev->gdma_dev, &spec, &queue->kmem);
+ if (err)
+ return err;
+ /* take ownership into mana_ib from mana */
+ queue->gdma_region = queue->kmem->mem_info.dma_region_handle;
+ queue->kmem->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
+ return 0;
+}
+
+int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
+ struct mana_ib_queue *queue)
+{
+ struct ib_umem *umem;
+ int err;
+
+ queue->umem = NULL;
+ queue->id = INVALID_QUEUE_ID;
+ queue->gdma_region = GDMA_INVALID_DMA_REGION;
+
+ umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem)) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %pe\n", umem);
+ return PTR_ERR(umem);
+ }
+
+ err = mana_ib_create_zero_offset_dma_region(mdev, umem, &queue->gdma_region);
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to create dma region, %d\n", err);
+ goto free_umem;
+ }
+ queue->umem = umem;
+
+ ibdev_dbg(&mdev->ib_dev, "created dma region 0x%llx\n", queue->gdma_region);
+
+ return 0;
+free_umem:
+ ib_umem_release(umem);
+ return err;
+}
+
+void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue)
+{
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_dma_region(mdev, queue->gdma_region);
+ ib_umem_release(queue->umem);
+ if (queue->kmem)
+ mana_gd_destroy_queue(mdev_to_gc(mdev), queue->kmem);
+}
+
+static int
+mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
+ struct gdma_context *gc,
+ struct gdma_create_dma_region_req *create_req,
+ size_t num_pages, mana_handle_t *gdma_region,
+ u32 expected_status)
+{
+ struct gdma_create_dma_region_resp create_resp = {};
+ unsigned int create_req_msg_size;
+ int err;
+
+ create_req_msg_size =
+ struct_size(create_req, page_addr_list, num_pages);
+ create_req->page_addr_list_len = num_pages;
+
+ err = mana_gd_send_request(gc, create_req_msg_size, create_req,
+ sizeof(create_resp), &create_resp);
+ if (err || create_resp.hdr.status != expected_status) {
+ ibdev_dbg(&dev->ib_dev,
+ "Failed to create DMA region: %d, 0x%x\n",
+ err, create_resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+
+ return err;
+ }
+
+ *gdma_region = create_resp.dma_region_handle;
+ ibdev_dbg(&dev->ib_dev, "Created DMA region handle 0x%llx\n",
+ *gdma_region);
+
+ return 0;
+}
+
+static int
+mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct gdma_context *gc,
+ struct gdma_dma_region_add_pages_req *add_req,
+ unsigned int num_pages, u32 expected_status)
+{
+ unsigned int add_req_msg_size =
+ struct_size(add_req, page_addr_list, num_pages);
+ struct gdma_general_resp add_resp = {};
+ int err;
+
+ mana_gd_init_req_hdr(&add_req->hdr, GDMA_DMA_REGION_ADD_PAGES,
+ add_req_msg_size, sizeof(add_resp));
+ add_req->page_addr_list_len = num_pages;
+
+ err = mana_gd_send_request(gc, add_req_msg_size, add_req,
+ sizeof(add_resp), &add_resp);
+ if (err || add_resp.hdr.status != expected_status) {
+ ibdev_dbg(&dev->ib_dev,
+ "Failed to create DMA region: %d, 0x%x\n",
+ err, add_resp.hdr.status);
+
+ if (!err)
+ err = -EPROTO;
+
+ return err;
+ }
+
+ return 0;
+}
+
+static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region, unsigned long page_sz)
+{
+ struct gdma_dma_region_add_pages_req *add_req = NULL;
+ size_t num_pages_processed = 0, num_pages_to_handle;
+ struct gdma_create_dma_region_req *create_req;
+ unsigned int create_req_msg_size;
+ struct hw_channel_context *hwc;
+ struct ib_block_iter biter;
+ size_t max_pgs_add_cmd = 0;
+ size_t max_pgs_create_cmd;
+ struct gdma_context *gc;
+ size_t num_pages_total;
+ unsigned int tail = 0;
+ u64 *page_addr_list;
+ void *request_buf;
+ int err = 0;
+
+ gc = mdev_to_gc(dev);
+ hwc = gc->hwc.driver_data;
+
+ num_pages_total = ib_umem_num_dma_blocks(umem, page_sz);
+
+ max_pgs_create_cmd =
+ (hwc->max_req_msg_size - sizeof(*create_req)) / sizeof(u64);
+ num_pages_to_handle =
+ min_t(size_t, num_pages_total, max_pgs_create_cmd);
+ create_req_msg_size =
+ struct_size(create_req, page_addr_list, num_pages_to_handle);
+
+ request_buf = kzalloc(hwc->max_req_msg_size, GFP_KERNEL);
+ if (!request_buf)
+ return -ENOMEM;
+
+ create_req = request_buf;
+ mana_gd_init_req_hdr(&create_req->hdr, GDMA_CREATE_DMA_REGION,
+ create_req_msg_size,
+ sizeof(struct gdma_create_dma_region_resp));
+
+ create_req->length = umem->length;
+ create_req->offset_in_page = ib_umem_dma_offset(umem, page_sz);
+ create_req->gdma_page_type = order_base_2(page_sz) - MANA_PAGE_SHIFT;
+ create_req->page_count = num_pages_total;
+
+ ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n",
+ umem->length, num_pages_total);
+
+ ibdev_dbg(&dev->ib_dev, "page_sz %lu offset_in_page %u\n",
+ page_sz, create_req->offset_in_page);
+
+ ibdev_dbg(&dev->ib_dev, "num_pages_to_handle %lu, gdma_page_type %u",
+ num_pages_to_handle, create_req->gdma_page_type);
+
+ page_addr_list = create_req->page_addr_list;
+ rdma_umem_for_each_dma_block(umem, &biter, page_sz) {
+ u32 expected_status = 0;
+
+ page_addr_list[tail++] = rdma_block_iter_dma_address(&biter);
+ if (tail < num_pages_to_handle)
+ continue;
+
+ if (num_pages_processed + num_pages_to_handle <
+ num_pages_total)
+ expected_status = GDMA_STATUS_MORE_ENTRIES;
+
+ if (!num_pages_processed) {
+ /* First create message */
+ err = mana_ib_gd_first_dma_region(dev, gc, create_req,
+ tail, gdma_region,
+ expected_status);
+ if (err)
+ goto out;
+
+ max_pgs_add_cmd = (hwc->max_req_msg_size -
+ sizeof(*add_req)) / sizeof(u64);
+
+ add_req = request_buf;
+ add_req->dma_region_handle = *gdma_region;
+ add_req->reserved3 = 0;
+ page_addr_list = add_req->page_addr_list;
+ } else {
+ /* Subsequent create messages */
+ err = mana_ib_gd_add_dma_region(dev, gc, add_req, tail,
+ expected_status);
+ if (err)
+ break;
+ }
+
+ num_pages_processed += tail;
+ tail = 0;
+
+ /* The remaining pages to create */
+ num_pages_to_handle =
+ min_t(size_t,
+ num_pages_total - num_pages_processed,
+ max_pgs_add_cmd);
+ }
+
+ if (err)
+ mana_ib_gd_destroy_dma_region(dev, *gdma_region);
+
+out:
+ kfree(request_buf);
+ return err;
+}
+
+int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region, u64 virt)
+{
+ unsigned long page_sz;
+
+ page_sz = ib_umem_find_best_pgsz(umem, dev->adapter_caps.page_size_cap, virt);
+ if (!page_sz) {
+ ibdev_dbg(&dev->ib_dev, "Failed to find page size.\n");
+ return -EINVAL;
+ }
+
+ return mana_ib_gd_create_dma_region(dev, umem, gdma_region, page_sz);
+}
+
+int mana_ib_create_zero_offset_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region)
+{
+ unsigned long page_sz;
+
+ /* Hardware requires dma region to align to chosen page size */
+ page_sz = ib_umem_find_best_pgoff(umem, dev->adapter_caps.page_size_cap, 0);
+ if (!page_sz) {
+ ibdev_dbg(&dev->ib_dev, "Failed to find page size.\n");
+ return -EINVAL;
+ }
+
+ return mana_ib_gd_create_dma_region(dev, umem, gdma_region, page_sz);
+}
+
+int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, u64 gdma_region)
+{
+ struct gdma_context *gc = mdev_to_gc(dev);
+
+ ibdev_dbg(&dev->ib_dev, "destroy dma region 0x%llx\n", gdma_region);
+
+ return mana_gd_destroy_dma_region(gc, gdma_region);
+}
+
+int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
+{
+ struct mana_ib_ucontext *mana_ucontext =
+ container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
+ struct ib_device *ibdev = ibcontext->device;
+ struct mana_ib_dev *mdev;
+ struct gdma_context *gc;
+ phys_addr_t pfn;
+ pgprot_t prot;
+ int ret;
+
+ mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gc = mdev_to_gc(mdev);
+
+ if (vma->vm_pgoff != 0) {
+ ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma->vm_pgoff);
+ return -EINVAL;
+ }
+
+ /* Map to the page indexed by ucontext->doorbell */
+ pfn = (gc->phys_db_page_base +
+ gc->db_page_size * mana_ucontext->doorbell) >>
+ PAGE_SHIFT;
+ prot = pgprot_writecombine(vma->vm_page_prot);
+
+ ret = rdma_user_mmap_io(ibcontext, vma, pfn, PAGE_SIZE, prot,
+ NULL);
+ if (ret)
+ ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret);
+ else
+ ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %lu, ret %d\n",
+ pfn, PAGE_SIZE, ret);
+
+ return ret;
+}
+
+int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ struct ib_port_attr attr;
+ int err;
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+
+ if (mana_ib_is_rnic(dev)) {
+ if (port_num == 1) {
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ } else {
+ immutable->core_cap_flags = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP
+ | RDMA_CORE_CAP_ETH_AH;
+ immutable->max_mad_size = 0;
+ }
+ } else {
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+ }
+
+ return 0;
+}
+
+int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
+ struct ib_udata *uhw)
+{
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ struct pci_dev *pdev = to_pci_dev(mdev_to_gc(dev)->dev);
+
+ memset(props, 0, sizeof(*props));
+ props->vendor_id = pdev->vendor;
+ props->vendor_part_id = dev->gdma_dev->dev_id.type;
+ props->max_mr_size = MANA_IB_MAX_MR_SIZE;
+ props->page_size_cap = dev->adapter_caps.page_size_cap;
+ props->max_qp = dev->adapter_caps.max_qp_count;
+ props->max_qp_wr = dev->adapter_caps.max_qp_wr;
+ props->device_cap_flags = IB_DEVICE_RC_RNR_NAK_GEN;
+ props->max_send_sge = dev->adapter_caps.max_send_sge_count;
+ props->max_recv_sge = dev->adapter_caps.max_recv_sge_count;
+ props->max_sge_rd = dev->adapter_caps.max_recv_sge_count;
+ props->max_cq = dev->adapter_caps.max_cq_count;
+ props->max_cqe = dev->adapter_caps.max_qp_wr;
+ props->max_mr = dev->adapter_caps.max_mr_count;
+ props->max_pd = dev->adapter_caps.max_pd_count;
+ props->max_qp_rd_atom = dev->adapter_caps.max_inbound_read_limit;
+ props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
+ props->max_qp_init_rd_atom = dev->adapter_caps.max_outbound_read_limit;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->masked_atomic_cap = IB_ATOMIC_NONE;
+ props->max_ah = INT_MAX;
+ props->max_pkeys = 1;
+ props->local_ca_ack_delay = MANA_CA_ACK_DELAY;
+ if (!mana_ib_is_rnic(dev))
+ props->raw_packet_caps = IB_RAW_PACKET_CAP_IP_CSUM;
+
+ return 0;
+}
+
+int mana_ib_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *props)
+{
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ struct net_device *ndev = mana_ib_get_netdev(ibdev, port);
+
+ if (!ndev)
+ return -EINVAL;
+
+ memset(props, 0, sizeof(*props));
+ props->max_mtu = IB_MTU_4096;
+ props->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
+
+ if (netif_carrier_ok(ndev) && netif_running(ndev)) {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ } else {
+ props->state = IB_PORT_DOWN;
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+ }
+
+ props->active_width = IB_WIDTH_4X;
+ props->active_speed = IB_SPEED_EDR;
+ props->pkey_tbl_len = 1;
+ if (mana_ib_is_rnic(dev)) {
+ props->gid_tbl_len = 16;
+ props->ip_gids = true;
+ if (port == 1)
+ props->port_cap_flags = IB_PORT_CM_SUP;
+ }
+
+ return 0;
+}
+
+enum rdma_link_layer mana_ib_get_link_layer(struct ib_device *device, u32 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+int mana_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
+{
+ if (index != 0)
+ return -EINVAL;
+ *pkey = IB_DEFAULT_PKEY_FULL;
+ return 0;
+}
+
+int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
+ union ib_gid *gid)
+{
+ /* This version doesn't return GID properties */
+ return 0;
+}
+
+void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+}
+
+int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
+{
+ struct mana_ib_adapter_caps *caps = &dev->adapter_caps;
+ struct mana_ib_query_adapter_caps_resp resp = {};
+ struct mana_ib_query_adapter_caps_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req),
+ sizeof(resp));
+ req.hdr.resp.msg_version = GDMA_MESSAGE_V4;
+ req.hdr.dev_id = dev->gdma_dev->dev_id;
+
+ err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req),
+ &req, sizeof(resp), &resp);
+
+ if (err) {
+ ibdev_err(&dev->ib_dev,
+ "Failed to query adapter caps err %d", err);
+ return err;
+ }
+
+ caps->max_sq_id = resp.max_sq_id;
+ caps->max_rq_id = resp.max_rq_id;
+ caps->max_cq_id = resp.max_cq_id;
+ caps->max_qp_count = resp.max_qp_count;
+ caps->max_cq_count = resp.max_cq_count;
+ caps->max_mr_count = resp.max_mr_count;
+ caps->max_pd_count = resp.max_pd_count;
+ caps->max_inbound_read_limit = resp.max_inbound_read_limit;
+ caps->max_outbound_read_limit = resp.max_outbound_read_limit;
+ caps->mw_count = resp.mw_count;
+ caps->max_srq_count = resp.max_srq_count;
+ caps->max_qp_wr = min_t(u32,
+ resp.max_requester_sq_size / GDMA_MAX_SQE_SIZE,
+ resp.max_requester_rq_size / GDMA_MAX_RQE_SIZE);
+ caps->max_inline_data_size = resp.max_inline_data_size;
+ caps->max_send_sge_count = resp.max_send_sge_count;
+ caps->max_recv_sge_count = resp.max_recv_sge_count;
+ caps->feature_flags = resp.feature_flags;
+
+ caps->page_size_cap = PAGE_SZ_BM;
+ if (mdev_to_gc(dev)->pf_cap_flags1 & GDMA_DRV_CAP_FLAG_1_GDMA_PAGES_4MB_1GB_2GB)
+ caps->page_size_cap |= (SZ_4M | SZ_1G | SZ_2G);
+
+ return 0;
+}
+
+int mana_eth_query_adapter_caps(struct mana_ib_dev *dev)
+{
+ struct mana_ib_adapter_caps *caps = &dev->adapter_caps;
+ struct gdma_query_max_resources_resp resp = {};
+ struct gdma_general_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES,
+ sizeof(req), sizeof(resp));
+
+ err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&dev->ib_dev,
+ "Failed to query adapter caps err %d", err);
+ return err;
+ }
+
+ caps->max_qp_count = min_t(u32, resp.max_sq, resp.max_rq);
+ caps->max_cq_count = resp.max_cq;
+ caps->max_mr_count = resp.max_mst;
+ caps->max_pd_count = 0x6000;
+ caps->max_qp_wr = min_t(u32,
+ 0x100000 / GDMA_MAX_SQE_SIZE,
+ 0x100000 / GDMA_MAX_RQE_SIZE);
+ caps->max_send_sge_count = 30;
+ caps->max_recv_sge_count = 15;
+ caps->page_size_cap = PAGE_SZ_BM;
+
+ return 0;
+}
+
+static void
+mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event)
+{
+ struct mana_ib_dev *mdev = (struct mana_ib_dev *)ctx;
+ struct mana_ib_qp *qp;
+ struct ib_event ev;
+ u32 qpn;
+
+ switch (event->type) {
+ case GDMA_EQE_RNIC_QP_FATAL:
+ qpn = event->details[0];
+ qp = mana_get_qp_ref(mdev, qpn, false);
+ if (!qp)
+ break;
+ if (qp->ibqp.event_handler) {
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_FATAL;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ mana_put_qp_ref(qp);
+ break;
+ default:
+ break;
+ }
+}
+
+int mana_ib_create_eqs(struct mana_ib_dev *mdev)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct gdma_queue_spec spec = {};
+ int err, i;
+
+ spec.type = GDMA_EQ;
+ spec.monitor_avl_buf = false;
+ spec.queue_size = EQ_SIZE;
+ spec.eq.callback = mana_ib_event_handler;
+ spec.eq.context = mdev;
+ spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
+ spec.eq.msix_index = 0;
+
+ err = mana_gd_create_mana_eq(mdev->gdma_dev, &spec, &mdev->fatal_err_eq);
+ if (err)
+ return err;
+
+ mdev->eqs = kcalloc(mdev->ib_dev.num_comp_vectors, sizeof(struct gdma_queue *),
+ GFP_KERNEL);
+ if (!mdev->eqs) {
+ err = -ENOMEM;
+ goto destroy_fatal_eq;
+ }
+ spec.eq.callback = NULL;
+ for (i = 0; i < mdev->ib_dev.num_comp_vectors; i++) {
+ spec.eq.msix_index = (i + 1) % gc->num_msix_usable;
+ err = mana_gd_create_mana_eq(mdev->gdma_dev, &spec, &mdev->eqs[i]);
+ if (err)
+ goto destroy_eqs;
+ }
+
+ return 0;
+
+destroy_eqs:
+ while (i-- > 0)
+ mana_gd_destroy_queue(gc, mdev->eqs[i]);
+ kfree(mdev->eqs);
+destroy_fatal_eq:
+ mana_gd_destroy_queue(gc, mdev->fatal_err_eq);
+ return err;
+}
+
+void mana_ib_destroy_eqs(struct mana_ib_dev *mdev)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int i;
+
+ mana_gd_destroy_queue(gc, mdev->fatal_err_eq);
+
+ for (i = 0; i < mdev->ib_dev.num_comp_vectors; i++)
+ mana_gd_destroy_queue(gc, mdev->eqs[i]);
+
+ kfree(mdev->eqs);
+}
+
+int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev)
+{
+ struct mana_rnic_create_adapter_resp resp = {};
+ struct mana_rnic_create_adapter_req req = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_ADAPTER, sizeof(req), sizeof(resp));
+ req.hdr.req.msg_version = GDMA_MESSAGE_V2;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.notify_eq_id = mdev->fatal_err_eq->id;
+
+ if (mdev->adapter_caps.feature_flags & MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT)
+ req.feature_flags |= MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create RNIC adapter err %d", err);
+ return err;
+ }
+ mdev->adapter_handle = resp.adapter;
+
+ return 0;
+}
+
+int mana_ib_gd_destroy_rnic_adapter(struct mana_ib_dev *mdev)
+{
+ struct mana_rnic_destroy_adapter_resp resp = {};
+ struct mana_rnic_destroy_adapter_req req = {};
+ struct gdma_context *gc;
+ int err;
+
+ gc = mdev_to_gc(mdev);
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_ADAPTER, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy RNIC adapter err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_add_gid(const struct ib_gid_attr *attr, void **context)
+{
+ struct mana_ib_dev *mdev = container_of(attr->device, struct mana_ib_dev, ib_dev);
+ enum rdma_network_type ntype = rdma_gid_attr_network_type(attr);
+ struct mana_rnic_config_addr_resp resp = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_config_addr_req req = {};
+ int err;
+
+ if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6) {
+ ibdev_dbg(&mdev->ib_dev, "Unsupported rdma network type %d", ntype);
+ return -EINVAL;
+ }
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.op = ADDR_OP_ADD;
+ req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4;
+ copy_in_reverse(req.ip_addr, attr->gid.raw, sizeof(union ib_gid));
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to config IP addr err %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_del_gid(const struct ib_gid_attr *attr, void **context)
+{
+ struct mana_ib_dev *mdev = container_of(attr->device, struct mana_ib_dev, ib_dev);
+ enum rdma_network_type ntype = rdma_gid_attr_network_type(attr);
+ struct mana_rnic_config_addr_resp resp = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_config_addr_req req = {};
+ int err;
+
+ if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6) {
+ ibdev_dbg(&mdev->ib_dev, "Unsupported rdma network type %d", ntype);
+ return -EINVAL;
+ }
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.op = ADDR_OP_REMOVE;
+ req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4;
+ copy_in_reverse(req.ip_addr, attr->gid.raw, sizeof(union ib_gid));
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to config IP addr err %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_config_mac(struct mana_ib_dev *mdev, enum mana_ib_addr_op op, u8 *mac)
+{
+ struct mana_rnic_config_mac_addr_resp resp = {};
+ struct mana_rnic_config_mac_addr_req req = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_MAC_ADDR, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.op = op;
+ copy_in_reverse(req.mac_addr, mac, ETH_ALEN);
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to config Mac addr err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 doorbell)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_cq_resp resp = {};
+ struct mana_rnic_create_cq_req req = {};
+ int err;
+
+ if (!mdev->eqs)
+ return -EINVAL;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_CQ, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.gdma_region = cq->queue.gdma_region;
+ req.eq_id = mdev->eqs[cq->comp_vector]->id;
+ req.doorbell_page = doorbell;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create cq err %d", err);
+ return err;
+ }
+
+ cq->queue.id = resp.cq_id;
+ cq->cq_handle = resp.cq_handle;
+ /* The GDMA region is now owned by the CQ handle */
+ cq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+
+ return 0;
+}
+
+int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_destroy_cq_resp resp = {};
+ struct mana_rnic_destroy_cq_req req = {};
+ int err;
+
+ if (cq->cq_handle == INVALID_MANA_HANDLE)
+ return 0;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_CQ, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.cq_handle = cq->cq_handle;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy cq err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u64 flags)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_qp_resp resp = {};
+ struct mana_rnic_create_qp_req req = {};
+ int err, i;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_RC_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.pd_handle = pd->pd_handle;
+ req.send_cq_handle = send_cq->cq_handle;
+ req.recv_cq_handle = recv_cq->cq_handle;
+ for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; i++)
+ req.dma_region[i] = qp->rc_qp.queues[i].gdma_region;
+ req.doorbell_page = doorbell;
+ req.max_send_wr = attr->cap.max_send_wr;
+ req.max_recv_wr = attr->cap.max_recv_wr;
+ req.max_send_sge = attr->cap.max_send_sge;
+ req.max_recv_sge = attr->cap.max_recv_sge;
+ req.flags = flags;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create rc qp err %d", err);
+ return err;
+ }
+ qp->qp_handle = resp.rc_qp_handle;
+ for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; i++) {
+ qp->rc_qp.queues[i].id = resp.queue_ids[i];
+ /* The GDMA regions are now owned by the RNIC QP handle */
+ qp->rc_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ }
+ return 0;
+}
+
+int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ struct mana_rnic_destroy_rc_qp_resp resp = {0};
+ struct mana_rnic_destroy_rc_qp_req req = {0};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_RC_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.rc_qp_handle = qp->qp_handle;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy rc qp err %d", err);
+ return err;
+ }
+ return 0;
+}
+
+int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u32 type)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_udqp_resp resp = {};
+ struct mana_rnic_create_udqp_req req = {};
+ int err, i;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_UD_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.pd_handle = pd->pd_handle;
+ req.send_cq_handle = send_cq->cq_handle;
+ req.recv_cq_handle = recv_cq->cq_handle;
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++)
+ req.dma_region[i] = qp->ud_qp.queues[i].gdma_region;
+ req.doorbell_page = doorbell;
+ req.max_send_wr = attr->cap.max_send_wr;
+ req.max_recv_wr = attr->cap.max_recv_wr;
+ req.max_send_sge = attr->cap.max_send_sge;
+ req.max_recv_sge = attr->cap.max_recv_sge;
+ req.qp_type = type;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create ud qp err %d", err);
+ return err;
+ }
+ qp->qp_handle = resp.qp_handle;
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++) {
+ qp->ud_qp.queues[i].id = resp.queue_ids[i];
+ /* The GDMA regions are now owned by the RNIC QP handle */
+ qp->ud_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ }
+ return 0;
+}
+
+int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ struct mana_rnic_destroy_udqp_resp resp = {0};
+ struct mana_rnic_destroy_udqp_req req = {0};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_UD_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.qp_handle = qp->qp_handle;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy ud qp err %d", err);
+ return err;
+ }
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
new file mode 100644
index 000000000000..9d36232ed880
--- /dev/null
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -0,0 +1,738 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _MANA_IB_H_
+#define _MANA_IB_H_
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_umem.h>
+#include <rdma/mana-abi.h>
+#include <rdma/uverbs_ioctl.h>
+#include <linux/dmapool.h>
+
+#include <net/mana/mana.h>
+#include "shadow_queue.h"
+#include "counters.h"
+
+#define PAGE_SZ_BM \
+ (SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \
+ SZ_512K | SZ_1M | SZ_2M)
+
+/* MANA doesn't have any limit for MR size */
+#define MANA_IB_MAX_MR_SIZE U64_MAX
+
+/* Send queue ID mask */
+#define MANA_SENDQ_MASK BIT(31)
+
+/*
+ * The hardware limit of number of MRs is greater than maximum number of MRs
+ * that can possibly represent in 24 bits
+ */
+#define MANA_IB_MAX_MR 0xFFFFFFu
+
+/*
+ * The CA timeout is approx. 260ms (4us * 2^(DELAY))
+ */
+#define MANA_CA_ACK_DELAY 16
+
+/*
+ * The buffer used for writing AV
+ */
+#define MANA_AV_BUFFER_SIZE 64
+
+#define MANA_GSI_QPN (1)
+
+struct mana_ib_adapter_caps {
+ u32 max_sq_id;
+ u32 max_rq_id;
+ u32 max_cq_id;
+ u32 max_qp_count;
+ u32 max_cq_count;
+ u32 max_mr_count;
+ u32 max_pd_count;
+ u32 max_inbound_read_limit;
+ u32 max_outbound_read_limit;
+ u32 mw_count;
+ u32 max_srq_count;
+ u32 max_qp_wr;
+ u32 max_send_sge_count;
+ u32 max_recv_sge_count;
+ u32 max_inline_data_size;
+ u64 feature_flags;
+ u64 page_size_cap;
+};
+
+struct mana_ib_queue {
+ struct ib_umem *umem;
+ struct gdma_queue *kmem;
+ u64 gdma_region;
+ u64 id;
+};
+
+struct mana_ib_dev {
+ struct ib_device ib_dev;
+ struct gdma_dev *gdma_dev;
+ mana_handle_t adapter_handle;
+ struct gdma_queue *fatal_err_eq;
+ struct gdma_queue **eqs;
+ struct xarray qp_table_wq;
+ struct mana_ib_adapter_caps adapter_caps;
+ struct dma_pool *av_pool;
+ netdevice_tracker dev_tracker;
+ struct notifier_block nb;
+};
+
+struct mana_ib_wq {
+ struct ib_wq ibwq;
+ struct mana_ib_queue queue;
+ int wqe;
+ u32 wq_buf_size;
+ mana_handle_t rx_object;
+};
+
+struct mana_ib_pd {
+ struct ib_pd ibpd;
+ u32 pdn;
+ mana_handle_t pd_handle;
+
+ /* Mutex for sharing access to vport_use_count */
+ struct mutex vport_mutex;
+ int vport_use_count;
+
+ bool tx_shortform_allowed;
+ u32 tx_vp_offset;
+};
+
+struct mana_ib_av {
+ u8 dest_ip[16];
+ u8 dest_mac[ETH_ALEN];
+ u16 udp_src_port;
+ u8 src_ip[16];
+ u32 hop_limit : 8;
+ u32 reserved1 : 12;
+ u32 dscp : 6;
+ u32 reserved2 : 5;
+ u32 is_ipv6 : 1;
+ u32 reserved3 : 32;
+};
+
+struct mana_ib_ah {
+ struct ib_ah ibah;
+ struct mana_ib_av *av;
+ dma_addr_t dma_handle;
+};
+
+struct mana_ib_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ mana_handle_t mr_handle;
+};
+
+struct mana_ib_cq {
+ struct ib_cq ibcq;
+ struct mana_ib_queue queue;
+ /* protects CQ polling */
+ spinlock_t cq_lock;
+ struct list_head list_send_qp;
+ struct list_head list_recv_qp;
+ int cqe;
+ u32 comp_vector;
+ mana_handle_t cq_handle;
+};
+
+enum mana_rc_queue_type {
+ MANA_RC_SEND_QUEUE_REQUESTER = 0,
+ MANA_RC_SEND_QUEUE_RESPONDER,
+ MANA_RC_SEND_QUEUE_FMR,
+ MANA_RC_RECV_QUEUE_REQUESTER,
+ MANA_RC_RECV_QUEUE_RESPONDER,
+ MANA_RC_QUEUE_TYPE_MAX,
+};
+
+struct mana_ib_rc_qp {
+ struct mana_ib_queue queues[MANA_RC_QUEUE_TYPE_MAX];
+};
+
+enum mana_ud_queue_type {
+ MANA_UD_SEND_QUEUE = 0,
+ MANA_UD_RECV_QUEUE,
+ MANA_UD_QUEUE_TYPE_MAX,
+};
+
+struct mana_ib_ud_qp {
+ struct mana_ib_queue queues[MANA_UD_QUEUE_TYPE_MAX];
+ u32 sq_psn;
+};
+
+struct mana_ib_qp {
+ struct ib_qp ibqp;
+
+ mana_handle_t qp_handle;
+ union {
+ struct mana_ib_queue raw_sq;
+ struct mana_ib_rc_qp rc_qp;
+ struct mana_ib_ud_qp ud_qp;
+ };
+
+ /* The port on the IB device, starting with 1 */
+ u32 port;
+
+ struct list_head cq_send_list;
+ struct list_head cq_recv_list;
+ struct shadow_queue shadow_rq;
+ struct shadow_queue shadow_sq;
+
+ refcount_t refcount;
+ struct completion free;
+};
+
+struct mana_ib_ucontext {
+ struct ib_ucontext ibucontext;
+ u32 doorbell;
+};
+
+struct mana_ib_rwq_ind_table {
+ struct ib_rwq_ind_table ib_ind_table;
+};
+
+enum mana_ib_command_code {
+ MANA_IB_GET_ADAPTER_CAP = 0x30001,
+ MANA_IB_CREATE_ADAPTER = 0x30002,
+ MANA_IB_DESTROY_ADAPTER = 0x30003,
+ MANA_IB_CONFIG_IP_ADDR = 0x30004,
+ MANA_IB_CONFIG_MAC_ADDR = 0x30005,
+ MANA_IB_CREATE_UD_QP = 0x30006,
+ MANA_IB_DESTROY_UD_QP = 0x30007,
+ MANA_IB_CREATE_CQ = 0x30008,
+ MANA_IB_DESTROY_CQ = 0x30009,
+ MANA_IB_CREATE_RC_QP = 0x3000a,
+ MANA_IB_DESTROY_RC_QP = 0x3000b,
+ MANA_IB_SET_QP_STATE = 0x3000d,
+ MANA_IB_QUERY_VF_COUNTERS = 0x30022,
+ MANA_IB_QUERY_DEVICE_COUNTERS = 0x30023,
+};
+
+struct mana_ib_query_adapter_caps_req {
+ struct gdma_req_hdr hdr;
+}; /*HW Data */
+
+enum mana_ib_adapter_features {
+ MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT = BIT(4),
+ MANA_IB_FEATURE_DEV_COUNTERS_SUPPORT = BIT(5),
+ MANA_IB_FEATURE_MULTI_PORTS_SUPPORT = BIT(6),
+};
+
+struct mana_ib_query_adapter_caps_resp {
+ struct gdma_resp_hdr hdr;
+ u32 max_sq_id;
+ u32 max_rq_id;
+ u32 max_cq_id;
+ u32 max_qp_count;
+ u32 max_cq_count;
+ u32 max_mr_count;
+ u32 max_pd_count;
+ u32 max_inbound_read_limit;
+ u32 max_outbound_read_limit;
+ u32 mw_count;
+ u32 max_srq_count;
+ u32 max_requester_sq_size;
+ u32 max_responder_sq_size;
+ u32 max_requester_rq_size;
+ u32 max_responder_rq_size;
+ u32 max_send_sge_count;
+ u32 max_recv_sge_count;
+ u32 max_inline_data_size;
+ u64 feature_flags;
+}; /* HW Data */
+
+enum mana_ib_adapter_features_request {
+ MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST = BIT(1),
+}; /*HW Data */
+
+struct mana_rnic_create_adapter_req {
+ struct gdma_req_hdr hdr;
+ u32 notify_eq_id;
+ u32 reserved;
+ u64 feature_flags;
+}; /*HW Data */
+
+struct mana_rnic_create_adapter_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t adapter;
+}; /* HW Data */
+
+struct mana_rnic_destroy_adapter_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+}; /*HW Data */
+
+struct mana_rnic_destroy_adapter_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+enum mana_ib_addr_op {
+ ADDR_OP_ADD = 1,
+ ADDR_OP_REMOVE = 2,
+};
+
+enum sgid_entry_type {
+ SGID_TYPE_IPV4 = 1,
+ SGID_TYPE_IPV6 = 2,
+};
+
+struct mana_rnic_config_addr_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ enum mana_ib_addr_op op;
+ enum sgid_entry_type sgid_type;
+ u8 ip_addr[16];
+}; /* HW Data */
+
+struct mana_rnic_config_addr_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_rnic_config_mac_addr_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ enum mana_ib_addr_op op;
+ u8 mac_addr[ETH_ALEN];
+ u8 reserved[6];
+}; /* HW Data */
+
+struct mana_rnic_config_mac_addr_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_rnic_create_cq_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ u64 gdma_region;
+ u32 eq_id;
+ u32 doorbell_page;
+}; /* HW Data */
+
+struct mana_rnic_create_cq_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t cq_handle;
+ u32 cq_id;
+ u32 reserved;
+}; /* HW Data */
+
+struct mana_rnic_destroy_cq_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t cq_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_cq_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+enum mana_rnic_create_rc_flags {
+ MANA_RC_FLAG_NO_FMR = 2,
+};
+
+struct mana_rnic_create_qp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t pd_handle;
+ mana_handle_t send_cq_handle;
+ mana_handle_t recv_cq_handle;
+ u64 dma_region[MANA_RC_QUEUE_TYPE_MAX];
+ u64 deprecated[2];
+ u64 flags;
+ u32 doorbell_page;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 reserved;
+}; /* HW Data */
+
+struct mana_rnic_create_qp_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t rc_qp_handle;
+ u32 queue_ids[MANA_RC_QUEUE_TYPE_MAX];
+ u32 reserved;
+}; /* HW Data*/
+
+struct mana_rnic_destroy_rc_qp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t rc_qp_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_rc_qp_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_rnic_create_udqp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t pd_handle;
+ mana_handle_t send_cq_handle;
+ mana_handle_t recv_cq_handle;
+ u64 dma_region[MANA_UD_QUEUE_TYPE_MAX];
+ u32 qp_type;
+ u32 doorbell_page;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+}; /* HW Data */
+
+struct mana_rnic_create_udqp_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t qp_handle;
+ u32 queue_ids[MANA_UD_QUEUE_TYPE_MAX];
+}; /* HW Data*/
+
+struct mana_rnic_destroy_udqp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t qp_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_udqp_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_ib_ah_attr {
+ u8 src_addr[16];
+ u8 dest_addr[16];
+ u8 src_mac[ETH_ALEN];
+ u8 dest_mac[ETH_ALEN];
+ u8 src_addr_type;
+ u8 dest_addr_type;
+ u8 hop_limit;
+ u8 traffic_class;
+ u16 src_port;
+ u16 dest_port;
+ u32 flow_label;
+};
+
+struct mana_rnic_set_qp_state_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t qp_handle;
+ u64 attr_mask;
+ u32 qp_state;
+ u32 path_mtu;
+ u32 rq_psn;
+ u32 sq_psn;
+ u32 dest_qpn;
+ u32 max_dest_rd_atomic;
+ u32 retry_cnt;
+ u32 rnr_retry;
+ u32 min_rnr_timer;
+ u32 rate_limit;
+ struct mana_ib_ah_attr ah_attr;
+ u64 reserved1;
+ u32 qkey;
+ u32 qp_access_flags;
+ u8 local_ack_timeout;
+ u8 max_rd_atomic;
+ u16 reserved2;
+ u32 reserved3;
+}; /* HW Data */
+
+struct mana_rnic_set_qp_state_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+enum WQE_OPCODE_TYPES {
+ WQE_TYPE_UD_SEND = 0,
+ WQE_TYPE_UD_RECV = 8,
+}; /* HW DATA */
+
+struct rdma_send_oob {
+ u32 wqe_type : 5;
+ u32 fence : 1;
+ u32 signaled : 1;
+ u32 solicited : 1;
+ u32 psn : 24;
+
+ u32 ssn_or_rqpn : 24;
+ u32 reserved1 : 8;
+ union {
+ struct {
+ u32 remote_qkey;
+ u32 immediate;
+ u32 reserved1;
+ u32 reserved2;
+ } ud_send;
+ };
+}; /* HW DATA */
+
+struct mana_rdma_cqe {
+ union {
+ struct {
+ u8 cqe_type;
+ u8 data[GDMA_COMP_DATA_SIZE - 1];
+ };
+ struct {
+ u32 cqe_type : 8;
+ u32 vendor_error : 9;
+ u32 reserved1 : 15;
+ u32 sge_offset : 5;
+ u32 tx_wqe_offset : 27;
+ } ud_send;
+ struct {
+ u32 cqe_type : 8;
+ u32 reserved1 : 24;
+ u32 msg_len;
+ u32 src_qpn : 24;
+ u32 reserved2 : 8;
+ u32 imm_data;
+ u32 rx_wqe_offset;
+ } ud_recv;
+ };
+}; /* HW DATA */
+
+struct mana_rnic_query_vf_cntrs_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+}; /* HW Data */
+
+struct mana_rnic_query_vf_cntrs_resp {
+ struct gdma_resp_hdr hdr;
+ u64 requester_timeout;
+ u64 requester_oos_nak;
+ u64 requester_rnr_nak;
+ u64 responder_rnr_nak;
+ u64 responder_oos;
+ u64 responder_dup_request;
+ u64 requester_implicit_nak;
+ u64 requester_readresp_psn_mismatch;
+ u64 nak_inv_req;
+ u64 nak_access_err;
+ u64 nak_opp_err;
+ u64 nak_inv_read;
+ u64 responder_local_len_err;
+ u64 requestor_local_prot_err;
+ u64 responder_rem_access_err;
+ u64 responder_local_qp_err;
+ u64 responder_malformed_wqe;
+ u64 general_hw_err;
+ u64 requester_rnr_nak_retries_exceeded;
+ u64 requester_retries_exceeded;
+ u64 total_fatal_err;
+ u64 received_cnps;
+ u64 num_qps_congested;
+ u64 rate_inc_events;
+ u64 num_qps_recovered;
+ u64 current_rate;
+ u64 dup_rx_req;
+ u64 tx_bytes;
+ u64 rx_bytes;
+ u64 rx_send_req;
+ u64 rx_write_req;
+ u64 rx_read_req;
+ u64 tx_pkt;
+ u64 rx_pkt;
+}; /* HW Data */
+
+struct mana_rnic_query_device_cntrs_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+}; /* HW Data */
+
+struct mana_rnic_query_device_cntrs_resp {
+ struct gdma_resp_hdr hdr;
+ u32 sent_cnps;
+ u32 received_ecns;
+ u32 reserved1;
+ u32 received_cnp_count;
+ u32 qp_congested_events;
+ u32 qp_recovered_events;
+ u32 rate_inc_events;
+ u32 reserved2;
+}; /* HW Data */
+
+static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
+{
+ return mdev->gdma_dev->gdma_context;
+}
+
+static inline struct mana_ib_qp *mana_get_qp_ref(struct mana_ib_dev *mdev,
+ u32 qid, bool is_sq)
+{
+ struct mana_ib_qp *qp;
+ unsigned long flag;
+
+ if (is_sq)
+ qid |= MANA_SENDQ_MASK;
+
+ xa_lock_irqsave(&mdev->qp_table_wq, flag);
+ qp = xa_load(&mdev->qp_table_wq, qid);
+ if (qp)
+ refcount_inc(&qp->refcount);
+ xa_unlock_irqrestore(&mdev->qp_table_wq, flag);
+ return qp;
+}
+
+static inline void mana_put_qp_ref(struct mana_ib_qp *qp)
+{
+ if (refcount_dec_and_test(&qp->refcount))
+ complete(&qp->free);
+}
+
+static inline bool mana_ib_is_rnic(struct mana_ib_dev *mdev)
+{
+ return mdev->gdma_dev->dev_id.type == GDMA_DEVICE_MANA_IB;
+}
+
+static inline struct net_device *mana_ib_get_netdev(struct ib_device *ibdev, u32 port)
+{
+ struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_context *mc = gc->mana.driver_data;
+
+ if (port < 1 || port > mc->num_ports)
+ return NULL;
+ return mc->ports[port - 1];
+}
+
+static inline void copy_in_reverse(u8 *dst, const u8 *src, u32 size)
+{
+ u32 i;
+
+ for (i = 0; i < size; i++)
+ dst[size - 1 - i] = src[i];
+}
+
+int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
+void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
+
+int mana_ib_create_zero_offset_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region);
+
+int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region, u64 virt);
+
+int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
+ mana_handle_t gdma_region);
+
+int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
+ struct mana_ib_queue *queue);
+int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
+ struct mana_ib_queue *queue);
+void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue);
+
+struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata);
+
+int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata);
+
+int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata);
+
+int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
+
+int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl);
+
+struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags);
+
+struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_dmah *dmah,
+ struct ib_udata *udata);
+
+int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
+
+int mana_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
+
+int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+
+int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+
+int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port_id,
+ struct mana_ib_pd *pd, u32 doorbell_id);
+void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
+ u32 port);
+
+int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+
+int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+
+int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+
+int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
+ struct ib_udata *udata);
+void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
+
+int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma);
+
+int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable);
+int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
+ struct ib_udata *uhw);
+int mana_ib_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *props);
+int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
+ union ib_gid *gid);
+
+void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
+
+int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *mdev);
+int mana_eth_query_adapter_caps(struct mana_ib_dev *mdev);
+
+int mana_ib_create_eqs(struct mana_ib_dev *mdev);
+
+void mana_ib_destroy_eqs(struct mana_ib_dev *mdev);
+
+int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev);
+
+int mana_ib_gd_destroy_rnic_adapter(struct mana_ib_dev *mdev);
+
+int mana_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey);
+
+enum rdma_link_layer mana_ib_get_link_layer(struct ib_device *device, u32 port_num);
+
+int mana_ib_gd_add_gid(const struct ib_gid_attr *attr, void **context);
+
+int mana_ib_gd_del_gid(const struct ib_gid_attr *attr, void **context);
+
+int mana_ib_gd_config_mac(struct mana_ib_dev *mdev, enum mana_ib_addr_op op, u8 *mac);
+
+int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 doorbell);
+
+int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
+
+int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u64 flags);
+int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
+
+int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u32 type);
+int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
+
+int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int mana_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr);
+
+void mana_drain_gsi_sqs(struct mana_ib_dev *mdev);
+int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+
+struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int fd, int mr_access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs);
+#endif
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
new file mode 100644
index 000000000000..3d0245a4c1ed
--- /dev/null
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+#define VALID_MR_FLAGS (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ |\
+ IB_ACCESS_REMOTE_ATOMIC | IB_ZERO_BASED)
+
+#define VALID_DMA_MR_FLAGS (IB_ACCESS_LOCAL_WRITE)
+
+static enum gdma_mr_access_flags
+mana_ib_verbs_to_gdma_access_flags(int access_flags)
+{
+ enum gdma_mr_access_flags flags = GDMA_ACCESS_FLAG_LOCAL_READ;
+
+ if (access_flags & IB_ACCESS_LOCAL_WRITE)
+ flags |= GDMA_ACCESS_FLAG_LOCAL_WRITE;
+
+ if (access_flags & IB_ACCESS_REMOTE_WRITE)
+ flags |= GDMA_ACCESS_FLAG_REMOTE_WRITE;
+
+ if (access_flags & IB_ACCESS_REMOTE_READ)
+ flags |= GDMA_ACCESS_FLAG_REMOTE_READ;
+
+ if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
+ flags |= GDMA_ACCESS_FLAG_REMOTE_ATOMIC;
+
+ return flags;
+}
+
+static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
+ struct gdma_create_mr_params *mr_params)
+{
+ struct gdma_create_mr_response resp = {};
+ struct gdma_create_mr_request req = {};
+ struct gdma_context *gc = mdev_to_gc(dev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_MR, sizeof(req),
+ sizeof(resp));
+ req.pd_handle = mr_params->pd_handle;
+ req.mr_type = mr_params->mr_type;
+
+ switch (mr_params->mr_type) {
+ case GDMA_MR_TYPE_GPA:
+ break;
+ case GDMA_MR_TYPE_GVA:
+ req.gva.dma_region_handle = mr_params->gva.dma_region_handle;
+ req.gva.virtual_address = mr_params->gva.virtual_address;
+ req.gva.access_flags = mr_params->gva.access_flags;
+ break;
+ case GDMA_MR_TYPE_ZBVA:
+ req.zbva.dma_region_handle = mr_params->zbva.dma_region_handle;
+ req.zbva.access_flags = mr_params->zbva.access_flags;
+ break;
+ default:
+ ibdev_dbg(&dev->ib_dev,
+ "invalid param (GDMA_MR_TYPE) passed, type %d\n",
+ req.mr_type);
+ return -EINVAL;
+ }
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+
+ if (err || resp.hdr.status) {
+ ibdev_dbg(&dev->ib_dev, "Failed to create mr %d, %u", err,
+ resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+
+ return err;
+ }
+
+ mr->ibmr.lkey = resp.lkey;
+ mr->ibmr.rkey = resp.rkey;
+ mr->mr_handle = resp.mr_handle;
+
+ return 0;
+}
+
+static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, u64 mr_handle)
+{
+ struct gdma_destroy_mr_response resp = {};
+ struct gdma_destroy_mr_request req = {};
+ struct gdma_context *gc = mdev_to_gc(dev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_MR, sizeof(req),
+ sizeof(resp));
+
+ req.mr_handle = mr_handle;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err || resp.hdr.status) {
+ dev_err(gc->dev, "Failed to destroy MR: %d, 0x%x\n", err,
+ resp.hdr.status);
+ if (!err)
+ err = -EPROTO;
+ return err;
+ }
+
+ return 0;
+}
+
+struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ u64 dma_region_handle;
+ int err;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ ibdev_dbg(ibdev,
+ "start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x",
+ start, iova, length, access_flags);
+
+ access_flags &= ~IB_ACCESS_OPTIONAL;
+ if (access_flags & ~VALID_MR_FLAGS)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->umem = ib_umem_get(ibdev, start, length, access_flags);
+ if (IS_ERR(mr->umem)) {
+ err = PTR_ERR(mr->umem);
+ ibdev_dbg(ibdev,
+ "Failed to get umem for register user-mr, %pe\n",
+ mr->umem);
+ goto err_free;
+ }
+
+ err = mana_ib_create_dma_region(dev, mr->umem, &dma_region_handle, iova);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
+ err);
+ goto err_umem;
+ }
+
+ ibdev_dbg(ibdev,
+ "created dma region for user-mr 0x%llx\n",
+ dma_region_handle);
+
+ mr_params.pd_handle = pd->pd_handle;
+ if (access_flags & IB_ZERO_BASED) {
+ mr_params.mr_type = GDMA_MR_TYPE_ZBVA;
+ mr_params.zbva.dma_region_handle = dma_region_handle;
+ mr_params.zbva.access_flags =
+ mana_ib_verbs_to_gdma_access_flags(access_flags);
+ } else {
+ mr_params.mr_type = GDMA_MR_TYPE_GVA;
+ mr_params.gva.dma_region_handle = dma_region_handle;
+ mr_params.gva.virtual_address = iova;
+ mr_params.gva.access_flags =
+ mana_ib_verbs_to_gdma_access_flags(access_flags);
+ }
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_dma_region;
+
+ /*
+ * There is no need to keep track of dma_region_handle after MR is
+ * successfully created. The dma_region_handle is tracked in the PF
+ * as part of the lifecycle of this MR.
+ */
+
+ return &mr->ibmr;
+
+err_dma_region:
+ mana_gd_destroy_dma_region(mdev_to_gc(dev), dma_region_handle);
+
+err_umem:
+ ib_umem_release(mr->umem);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int fd, int access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ u64 dma_region_handle;
+ int err;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ access_flags &= ~IB_ACCESS_OPTIONAL;
+ if (access_flags & ~VALID_MR_FLAGS)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(ibdev, start, length, fd, access_flags);
+ if (IS_ERR(umem_dmabuf)) {
+ err = PTR_ERR(umem_dmabuf);
+ ibdev_dbg(ibdev, "Failed to get dmabuf umem, %pe\n",
+ umem_dmabuf);
+ goto err_free;
+ }
+
+ mr->umem = &umem_dmabuf->umem;
+
+ err = mana_ib_create_dma_region(dev, mr->umem, &dma_region_handle, iova);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
+ err);
+ goto err_umem;
+ }
+
+ mr_params.pd_handle = pd->pd_handle;
+ mr_params.mr_type = GDMA_MR_TYPE_GVA;
+ mr_params.gva.dma_region_handle = dma_region_handle;
+ mr_params.gva.virtual_address = iova;
+ mr_params.gva.access_flags =
+ mana_ib_verbs_to_gdma_access_flags(access_flags);
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_dma_region;
+
+ /*
+ * There is no need to keep track of dma_region_handle after MR is
+ * successfully created. The dma_region_handle is tracked in the PF
+ * as part of the lifecycle of this MR.
+ */
+
+ return &mr->ibmr;
+
+err_dma_region:
+ mana_gd_destroy_dma_region(mdev_to_gc(dev), dma_region_handle);
+
+err_umem:
+ ib_umem_release(mr->umem);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ if (access_flags & ~VALID_DMA_MR_FLAGS)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr_params.pd_handle = pd->pd_handle;
+ mr_params.mr_type = GDMA_MR_TYPE_GPA;
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_free;
+
+ return &mr->ibmr;
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr);
+ struct ib_device *ibdev = ibmr->device;
+ struct mana_ib_dev *dev;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ err = mana_ib_gd_destroy_mr(dev, mr->mr_handle);
+ if (err)
+ return err;
+
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
+ kfree(mr);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
new file mode 100644
index 000000000000..48c1f4977f21
--- /dev/null
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -0,0 +1,921 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
+ struct net_device *ndev,
+ mana_handle_t default_rxobj,
+ mana_handle_t ind_table[],
+ u32 log_ind_tbl_size, u32 rx_hash_key_len,
+ u8 *rx_hash_key)
+{
+ struct mana_port_context *mpc = netdev_priv(ndev);
+ struct mana_cfg_rx_steer_req_v2 *req;
+ struct mana_cfg_rx_steer_resp resp = {};
+ struct gdma_context *gc;
+ u32 req_buf_size;
+ int i, err;
+
+ gc = mdev_to_gc(dev);
+
+ req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE);
+ req = kzalloc(req_buf_size, GFP_KERNEL);
+ if (!req)
+ return -ENOMEM;
+
+ mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size,
+ sizeof(resp));
+
+ req->hdr.req.msg_version = GDMA_MESSAGE_V2;
+
+ req->vport = mpc->port_handle;
+ req->rx_enable = 1;
+ req->update_default_rxobj = 1;
+ req->default_rxobj = default_rxobj;
+ req->hdr.dev_id = gc->mana.dev_id;
+
+ /* If there are more than 1 entries in indirection table, enable RSS */
+ if (log_ind_tbl_size)
+ req->rss_enable = true;
+
+ req->num_indir_entries = MANA_INDIRECT_TABLE_DEF_SIZE;
+ req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2,
+ indir_tab);
+ req->update_indir_tab = true;
+ req->cqe_coalescing_enable = 1;
+
+ /* The ind table passed to the hardware must have
+ * MANA_INDIRECT_TABLE_DEF_SIZE entries. Adjust the verb
+ * ind_table to MANA_INDIRECT_TABLE_SIZE if required
+ */
+ ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size);
+ for (i = 0; i < MANA_INDIRECT_TABLE_DEF_SIZE; i++) {
+ req->indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
+ ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i,
+ req->indir_tab[i]);
+ }
+
+ req->update_hashkey = true;
+ if (rx_hash_key_len)
+ memcpy(req->hashkey, rx_hash_key, rx_hash_key_len);
+ else
+ netdev_rss_key_fill(req->hashkey, MANA_HASH_KEY_SIZE);
+
+ ibdev_dbg(&dev->ib_dev, "vport handle %llu default_rxobj 0x%llx\n",
+ req->vport, default_rxobj);
+
+ err = mana_gd_send_request(gc, req_buf_size, req, sizeof(resp), &resp);
+ if (err) {
+ netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
+ goto out;
+ }
+
+ if (resp.hdr.status) {
+ netdev_err(ndev, "vPort RX configuration failed: 0x%x\n",
+ resp.hdr.status);
+ err = -EPROTO;
+ goto out;
+ }
+
+ netdev_info(ndev, "Configured steering vPort %llu log_entries %u\n",
+ mpc->port_handle, log_ind_tbl_size);
+
+out:
+ kfree(req);
+ return err;
+}
+
+static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_ib_dev *mdev =
+ container_of(pd->device, struct mana_ib_dev, ib_dev);
+ struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
+ struct mana_ib_create_qp_rss_resp resp = {};
+ struct mana_ib_create_qp_rss ucmd = {};
+ mana_handle_t *mana_ind_table;
+ struct mana_port_context *mpc;
+ unsigned int ind_tbl_size;
+ struct net_device *ndev;
+ struct mana_ib_cq *cq;
+ struct mana_ib_wq *wq;
+ struct mana_eq *eq;
+ struct ib_cq *ibcq;
+ struct ib_wq *ibwq;
+ int i = 0;
+ u32 port;
+ int ret;
+
+ if (!udata || udata->inlen < sizeof(ucmd))
+ return -EINVAL;
+
+ ret = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (ret) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed copy from udata for create rss-qp, err %d\n",
+ ret);
+ return ret;
+ }
+
+ if (attr->cap.max_recv_wr > mdev->adapter_caps.max_qp_wr) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Requested max_recv_wr %d exceeding limit\n",
+ attr->cap.max_recv_wr);
+ return -EINVAL;
+ }
+
+ if (attr->cap.max_recv_sge > MAX_RX_WQE_SGL_ENTRIES) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Requested max_recv_sge %d exceeding limit\n",
+ attr->cap.max_recv_sge);
+ return -EINVAL;
+ }
+
+ ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size;
+ if (ind_tbl_size > MANA_INDIRECT_TABLE_DEF_SIZE) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Indirect table size %d exceeding limit\n",
+ ind_tbl_size);
+ return -EINVAL;
+ }
+
+ if (ucmd.rx_hash_function != MANA_IB_RX_HASH_FUNC_TOEPLITZ) {
+ ibdev_dbg(&mdev->ib_dev,
+ "RX Hash function is not supported, %d\n",
+ ucmd.rx_hash_function);
+ return -EINVAL;
+ }
+
+ /* IB ports start with 1, MANA start with 0 */
+ port = ucmd.port;
+ ndev = mana_ib_get_netdev(pd->device, port);
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n",
+ port);
+ return -EINVAL;
+ }
+ mpc = netdev_priv(ndev);
+
+ ibdev_dbg(&mdev->ib_dev, "rx_hash_function %d port %d\n",
+ ucmd.rx_hash_function, port);
+
+ mana_ind_table = kcalloc(ind_tbl_size, sizeof(mana_handle_t),
+ GFP_KERNEL);
+ if (!mana_ind_table) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ qp->port = port;
+
+ for (i = 0; i < ind_tbl_size; i++) {
+ struct mana_obj_spec wq_spec = {};
+ struct mana_obj_spec cq_spec = {};
+
+ ibwq = ind_tbl->ind_tbl[i];
+ wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+
+ ibcq = ibwq->cq;
+ cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+
+ wq_spec.gdma_region = wq->queue.gdma_region;
+ wq_spec.queue_size = wq->wq_buf_size;
+
+ cq_spec.gdma_region = cq->queue.gdma_region;
+ cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE;
+ cq_spec.modr_ctx_id = 0;
+ eq = &mpc->ac->eqs[cq->comp_vector];
+ cq_spec.attached_eq = eq->eq->id;
+
+ ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ,
+ &wq_spec, &cq_spec, &wq->rx_object);
+ if (ret) {
+ /* Do cleanup starting with index i-1 */
+ i--;
+ goto fail;
+ }
+
+ /* The GDMA regions are now owned by the WQ object */
+ wq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+ cq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+
+ wq->queue.id = wq_spec.queue_index;
+ cq->queue.id = cq_spec.queue_index;
+
+ ibdev_dbg(&mdev->ib_dev,
+ "rx_object 0x%llx wq id %llu cq id %llu\n",
+ wq->rx_object, wq->queue.id, cq->queue.id);
+
+ resp.entries[i].cqid = cq->queue.id;
+ resp.entries[i].wqid = wq->queue.id;
+
+ mana_ind_table[i] = wq->rx_object;
+
+ /* Create CQ table entry */
+ ret = mana_ib_install_cq_cb(mdev, cq);
+ if (ret)
+ goto fail;
+ }
+ resp.num_entries = i;
+
+ ret = mana_ib_cfg_vport_steering(mdev, ndev, wq->rx_object,
+ mana_ind_table,
+ ind_tbl->log_ind_tbl_size,
+ ucmd.rx_hash_key_len,
+ ucmd.rx_hash_key);
+ if (ret)
+ goto fail;
+
+ ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (ret) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to copy to udata create rss-qp, %d\n",
+ ret);
+ goto fail;
+ }
+
+ kfree(mana_ind_table);
+
+ return 0;
+
+fail:
+ while (i-- > 0) {
+ ibwq = ind_tbl->ind_tbl[i];
+ ibcq = ibwq->cq;
+ wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+ cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+
+ mana_ib_remove_cq_cb(mdev, cq);
+ mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
+ }
+
+ kfree(mana_ind_table);
+
+ return ret;
+}
+
+static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_ib_dev *mdev =
+ container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_cq *send_cq =
+ container_of(attr->send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_ucontext *mana_ucontext =
+ rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
+ ibucontext);
+ struct mana_ib_create_qp_resp resp = {};
+ struct mana_ib_create_qp ucmd = {};
+ struct mana_obj_spec wq_spec = {};
+ struct mana_obj_spec cq_spec = {};
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+ struct mana_eq *eq;
+ int eq_vec;
+ u32 port;
+ int err;
+
+ if (!mana_ucontext || udata->inlen < sizeof(ucmd))
+ return -EINVAL;
+
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to copy from udata create qp-raw, %d\n", err);
+ return err;
+ }
+
+ if (attr->cap.max_send_wr > mdev->adapter_caps.max_qp_wr) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Requested max_send_wr %d exceeding limit\n",
+ attr->cap.max_send_wr);
+ return -EINVAL;
+ }
+
+ if (attr->cap.max_send_sge > MAX_TX_WQE_SGL_ENTRIES) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Requested max_send_sge %d exceeding limit\n",
+ attr->cap.max_send_sge);
+ return -EINVAL;
+ }
+
+ port = ucmd.port;
+ ndev = mana_ib_get_netdev(ibpd->device, port);
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n",
+ port);
+ return -EINVAL;
+ }
+ mpc = netdev_priv(ndev);
+ ibdev_dbg(&mdev->ib_dev, "port %u ndev %p mpc %p\n", port, ndev, mpc);
+
+ err = mana_ib_cfg_vport(mdev, port, pd, mana_ucontext->doorbell);
+ if (err)
+ return -ENODEV;
+
+ qp->port = port;
+
+ ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n",
+ ucmd.sq_buf_addr, ucmd.port);
+
+ err = mana_ib_create_queue(mdev, ucmd.sq_buf_addr, ucmd.sq_buf_size, &qp->raw_sq);
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to create queue for create qp-raw, err %d\n", err);
+ goto err_free_vport;
+ }
+
+ /* Create a WQ on the same port handle used by the Ethernet */
+ wq_spec.gdma_region = qp->raw_sq.gdma_region;
+ wq_spec.queue_size = ucmd.sq_buf_size;
+
+ cq_spec.gdma_region = send_cq->queue.gdma_region;
+ cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE;
+ cq_spec.modr_ctx_id = 0;
+ eq_vec = send_cq->comp_vector;
+ eq = &mpc->ac->eqs[eq_vec];
+ cq_spec.attached_eq = eq->eq->id;
+
+ err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec,
+ &cq_spec, &qp->qp_handle);
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to create wq for create raw-qp, err %d\n",
+ err);
+ goto err_destroy_queue;
+ }
+
+ /* The GDMA regions are now owned by the WQ object */
+ qp->raw_sq.gdma_region = GDMA_INVALID_DMA_REGION;
+ send_cq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+
+ qp->raw_sq.id = wq_spec.queue_index;
+ send_cq->queue.id = cq_spec.queue_index;
+
+ /* Create CQ table entry */
+ err = mana_ib_install_cq_cb(mdev, send_cq);
+ if (err)
+ goto err_destroy_wq_obj;
+
+ ibdev_dbg(&mdev->ib_dev,
+ "qp->qp_handle 0x%llx sq id %llu cq id %llu\n",
+ qp->qp_handle, qp->raw_sq.id, send_cq->queue.id);
+
+ resp.sqid = qp->raw_sq.id;
+ resp.cqid = send_cq->queue.id;
+ resp.tx_vp_offset = pd->tx_vp_offset;
+
+ err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed copy udata for create qp-raw, %d\n",
+ err);
+ goto err_remove_cq_cb;
+ }
+
+ return 0;
+
+err_remove_cq_cb:
+ mana_ib_remove_cq_cb(mdev, send_cq);
+
+err_destroy_wq_obj:
+ mana_destroy_wq_obj(mpc, GDMA_SQ, qp->qp_handle);
+
+err_destroy_queue:
+ mana_ib_destroy_queue(mdev, &qp->raw_sq);
+
+err_free_vport:
+ mana_ib_uncfg_vport(mdev, pd, port);
+
+ return err;
+}
+
+static u32 mana_ib_wqe_size(u32 sge, u32 oob_size)
+{
+ u32 wqe_size = sge * sizeof(struct gdma_sge) + sizeof(struct gdma_wqe) + oob_size;
+
+ return ALIGN(wqe_size, GDMA_WQE_BU_SIZE);
+}
+
+static u32 mana_ib_queue_size(struct ib_qp_init_attr *attr, u32 queue_type)
+{
+ u32 queue_size;
+
+ switch (attr->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ if (queue_type == MANA_UD_SEND_QUEUE)
+ queue_size = attr->cap.max_send_wr *
+ mana_ib_wqe_size(attr->cap.max_send_sge, INLINE_OOB_LARGE_SIZE);
+ else
+ queue_size = attr->cap.max_recv_wr *
+ mana_ib_wqe_size(attr->cap.max_recv_sge, INLINE_OOB_SMALL_SIZE);
+ break;
+ default:
+ return 0;
+ }
+
+ return MANA_PAGE_ALIGN(roundup_pow_of_two(queue_size));
+}
+
+static enum gdma_queue_type mana_ib_queue_type(struct ib_qp_init_attr *attr, u32 queue_type)
+{
+ enum gdma_queue_type type;
+
+ switch (attr->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ if (queue_type == MANA_UD_SEND_QUEUE)
+ type = GDMA_SQ;
+ else
+ type = GDMA_RQ;
+ break;
+ default:
+ type = GDMA_INVALID_QUEUE;
+ }
+ return type;
+}
+
+static int mana_table_store_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
+ GFP_KERNEL);
+}
+
+static void mana_table_remove_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
+}
+
+static int mana_table_store_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
+ u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+ int err;
+
+ err = xa_insert_irq(&mdev->qp_table_wq, qids, qp, GFP_KERNEL);
+ if (err)
+ return err;
+
+ err = xa_insert_irq(&mdev->qp_table_wq, qidr, qp, GFP_KERNEL);
+ if (err)
+ goto remove_sq;
+
+ return 0;
+
+remove_sq:
+ xa_erase_irq(&mdev->qp_table_wq, qids);
+ return err;
+}
+
+static void mana_table_remove_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
+ u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+
+ xa_erase_irq(&mdev->qp_table_wq, qids);
+ xa_erase_irq(&mdev->qp_table_wq, qidr);
+}
+
+static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ refcount_set(&qp->refcount, 1);
+ init_completion(&qp->free);
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ return mana_table_store_rc_qp(mdev, qp);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_table_store_ud_qp(mdev, qp);
+ default:
+ ibdev_dbg(&mdev->ib_dev, "Unknown QP type for storing in mana table, %d\n",
+ qp->ibqp.qp_type);
+ }
+
+ return -EINVAL;
+}
+
+static void mana_table_remove_qp(struct mana_ib_dev *mdev,
+ struct mana_ib_qp *qp)
+{
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ mana_table_remove_rc_qp(mdev, qp);
+ break;
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ mana_table_remove_ud_qp(mdev, qp);
+ break;
+ default:
+ ibdev_dbg(&mdev->ib_dev, "Unknown QP type for removing from mana table, %d\n",
+ qp->ibqp.qp_type);
+ return;
+ }
+ mana_put_qp_ref(qp);
+ wait_for_completion(&qp->free);
+}
+
+static int mana_ib_create_rc_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attr, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_ib_create_rc_qp_resp resp = {};
+ struct mana_ib_ucontext *mana_ucontext;
+ struct mana_ib_create_rc_qp ucmd = {};
+ int i, err, j;
+ u64 flags = 0;
+ u32 doorbell;
+
+ if (!udata || udata->inlen < sizeof(ucmd))
+ return -EINVAL;
+
+ mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, ibucontext);
+ doorbell = mana_ucontext->doorbell;
+ flags = MANA_RC_FLAG_NO_FMR;
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy from udata, %d\n", err);
+ return err;
+ }
+
+ for (i = 0, j = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i) {
+ /* skip FMR for user-level RC QPs */
+ if (i == MANA_RC_SEND_QUEUE_FMR) {
+ qp->rc_qp.queues[i].id = INVALID_QUEUE_ID;
+ qp->rc_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ continue;
+ }
+ err = mana_ib_create_queue(mdev, ucmd.queue_buf[j], ucmd.queue_size[j],
+ &qp->rc_qp.queues[i]);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n", i, err);
+ goto destroy_queues;
+ }
+ j++;
+ }
+
+ err = mana_ib_gd_create_rc_qp(mdev, qp, attr, doorbell, flags);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create rc qp %d\n", err);
+ goto destroy_queues;
+ }
+ qp->ibqp.qp_num = qp->rc_qp.queues[MANA_RC_RECV_QUEUE_RESPONDER].id;
+ qp->port = attr->port_num;
+
+ if (udata) {
+ for (i = 0, j = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i) {
+ if (i == MANA_RC_SEND_QUEUE_FMR)
+ continue;
+ resp.queue_id[j] = qp->rc_qp.queues[i].id;
+ j++;
+ }
+ err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+ goto destroy_qp;
+ }
+ }
+
+ err = mana_table_store_qp(mdev, qp);
+ if (err)
+ goto destroy_qp;
+
+ return 0;
+
+destroy_qp:
+ mana_ib_gd_destroy_rc_qp(mdev, qp);
+destroy_queues:
+ while (i-- > 0)
+ mana_ib_destroy_queue(mdev, &qp->rc_qp.queues[i]);
+ return err;
+}
+
+static void mana_add_qp_to_cqs(struct mana_ib_qp *qp)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&send_cq->cq_lock, flags);
+ list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
+ spin_unlock_irqrestore(&send_cq->cq_lock, flags);
+
+ spin_lock_irqsave(&recv_cq->cq_lock, flags);
+ list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
+ spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
+}
+
+static void mana_remove_qp_from_cqs(struct mana_ib_qp *qp)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&send_cq->cq_lock, flags);
+ list_del(&qp->cq_send_list);
+ spin_unlock_irqrestore(&send_cq->cq_lock, flags);
+
+ spin_lock_irqsave(&recv_cq->cq_lock, flags);
+ list_del(&qp->cq_recv_list);
+ spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
+}
+
+static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attr, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ u32 doorbell, queue_size;
+ int i, err;
+
+ if (udata) {
+ ibdev_dbg(&mdev->ib_dev, "User-level UD QPs are not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) {
+ queue_size = mana_ib_queue_size(attr, i);
+ err = mana_ib_create_kernel_queue(mdev, queue_size, mana_ib_queue_type(attr, i),
+ &qp->ud_qp.queues[i]);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n",
+ i, err);
+ goto destroy_queues;
+ }
+ }
+ doorbell = mdev->gdma_dev->doorbell;
+
+ err = create_shadow_queue(&qp->shadow_rq, attr->cap.max_recv_wr,
+ sizeof(struct ud_rq_shadow_wqe));
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create shadow rq err %d\n", err);
+ goto destroy_queues;
+ }
+ err = create_shadow_queue(&qp->shadow_sq, attr->cap.max_send_wr,
+ sizeof(struct ud_sq_shadow_wqe));
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create shadow sq err %d\n", err);
+ goto destroy_shadow_queues;
+ }
+
+ err = mana_ib_gd_create_ud_qp(mdev, qp, attr, doorbell, attr->qp_type);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create ud qp %d\n", err);
+ goto destroy_shadow_queues;
+ }
+ qp->ibqp.qp_num = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+ qp->port = attr->port_num;
+
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
+ qp->ud_qp.queues[i].kmem->id = qp->ud_qp.queues[i].id;
+
+ err = mana_table_store_qp(mdev, qp);
+ if (err)
+ goto destroy_qp;
+
+ mana_add_qp_to_cqs(qp);
+
+ return 0;
+
+destroy_qp:
+ mana_ib_gd_destroy_ud_qp(mdev, qp);
+destroy_shadow_queues:
+ destroy_shadow_queue(&qp->shadow_rq);
+ destroy_shadow_queue(&qp->shadow_sq);
+destroy_queues:
+ while (i-- > 0)
+ mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
+ return err;
+}
+
+int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ switch (attr->qp_type) {
+ case IB_QPT_RAW_PACKET:
+ /* When rwq_ind_tbl is used, it's for creating WQs for RSS */
+ if (attr->rwq_ind_tbl)
+ return mana_ib_create_qp_rss(ibqp, ibqp->pd, attr,
+ udata);
+
+ return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata);
+ case IB_QPT_RC:
+ return mana_ib_create_rc_qp(ibqp, ibqp->pd, attr, udata);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_create_ud_qp(ibqp, ibqp->pd, attr, udata);
+ default:
+ ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n",
+ attr->qp_type);
+ }
+
+ return -EINVAL;
+}
+
+static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibqp->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_rnic_set_qp_state_resp resp = {};
+ struct mana_rnic_set_qp_state_req req = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_SET_QP_STATE, sizeof(req), sizeof(resp));
+
+ req.hdr.req.msg_version = GDMA_MESSAGE_V3;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.qp_handle = qp->qp_handle;
+ req.qp_state = attr->qp_state;
+ req.attr_mask = attr_mask;
+ req.path_mtu = attr->path_mtu;
+ req.rq_psn = attr->rq_psn;
+ req.sq_psn = attr->sq_psn;
+ req.dest_qpn = attr->dest_qp_num;
+ req.max_dest_rd_atomic = attr->max_dest_rd_atomic;
+ req.retry_cnt = attr->retry_cnt;
+ req.rnr_retry = attr->rnr_retry;
+ req.min_rnr_timer = attr->min_rnr_timer;
+ req.rate_limit = attr->rate_limit;
+ req.qkey = attr->qkey;
+ req.local_ack_timeout = attr->timeout;
+ req.qp_access_flags = attr->qp_access_flags;
+ req.max_rd_atomic = attr->max_rd_atomic;
+
+ if (attr_mask & IB_QP_AV) {
+ ndev = mana_ib_get_netdev(&mdev->ib_dev, ibqp->port);
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n",
+ ibqp->port, ibqp->qp_num);
+ return -EINVAL;
+ }
+ mpc = netdev_priv(ndev);
+ copy_in_reverse(req.ah_attr.src_mac, mpc->mac_addr, ETH_ALEN);
+ copy_in_reverse(req.ah_attr.dest_mac, attr->ah_attr.roce.dmac, ETH_ALEN);
+ copy_in_reverse(req.ah_attr.src_addr, attr->ah_attr.grh.sgid_attr->gid.raw,
+ sizeof(union ib_gid));
+ copy_in_reverse(req.ah_attr.dest_addr, attr->ah_attr.grh.dgid.raw,
+ sizeof(union ib_gid));
+ if (rdma_gid_attr_network_type(attr->ah_attr.grh.sgid_attr) == RDMA_NETWORK_IPV4) {
+ req.ah_attr.src_addr_type = SGID_TYPE_IPV4;
+ req.ah_attr.dest_addr_type = SGID_TYPE_IPV4;
+ } else {
+ req.ah_attr.src_addr_type = SGID_TYPE_IPV6;
+ req.ah_attr.dest_addr_type = SGID_TYPE_IPV6;
+ }
+ req.ah_attr.dest_port = ROCE_V2_UDP_DPORT;
+ req.ah_attr.src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
+ ibqp->qp_num, attr->dest_qp_num);
+ req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class >> 2;
+ req.ah_attr.hop_limit = attr->ah_attr.grh.hop_limit;
+ req.ah_attr.flow_label = attr->ah_attr.grh.flow_label;
+ }
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed modify qp err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ switch (ibqp->qp_type) {
+ case IB_QPT_RC:
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_gd_modify_qp(ibqp, attr, attr_mask, udata);
+ default:
+ ibdev_dbg(ibqp->device, "Modify QP type %u not supported", ibqp->qp_type);
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp,
+ struct ib_rwq_ind_table *ind_tbl,
+ struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+ struct mana_ib_wq *wq;
+ struct ib_wq *ibwq;
+ int i;
+
+ ndev = mana_ib_get_netdev(qp->ibqp.device, qp->port);
+ mpc = netdev_priv(ndev);
+
+ for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
+ ibwq = ind_tbl->ind_tbl[i];
+ wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+ ibdev_dbg(&mdev->ib_dev, "destroying wq->rx_object %llu\n",
+ wq->rx_object);
+ mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
+ }
+
+ return 0;
+}
+
+static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct ib_pd *ibpd = qp->ibqp.pd;
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+ struct mana_ib_pd *pd;
+
+ ndev = mana_ib_get_netdev(qp->ibqp.device, qp->port);
+ mpc = netdev_priv(ndev);
+ pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+
+ mana_destroy_wq_obj(mpc, GDMA_SQ, qp->qp_handle);
+
+ mana_ib_destroy_queue(mdev, &qp->raw_sq);
+
+ mana_ib_uncfg_vport(mdev, pd, qp->port);
+
+ return 0;
+}
+
+static int mana_ib_destroy_rc_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ int i;
+
+ mana_table_remove_qp(mdev, qp);
+
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_rc_qp(mdev, qp);
+ for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i)
+ mana_ib_destroy_queue(mdev, &qp->rc_qp.queues[i]);
+
+ return 0;
+}
+
+static int mana_ib_destroy_ud_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ int i;
+
+ mana_remove_qp_from_cqs(qp);
+ mana_table_remove_qp(mdev, qp);
+
+ destroy_shadow_queue(&qp->shadow_rq);
+ destroy_shadow_queue(&qp->shadow_sq);
+
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_ud_qp(mdev, qp);
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
+ mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
+
+ return 0;
+}
+
+int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+
+ switch (ibqp->qp_type) {
+ case IB_QPT_RAW_PACKET:
+ if (ibqp->rwq_ind_tbl)
+ return mana_ib_destroy_qp_rss(qp, ibqp->rwq_ind_tbl,
+ udata);
+
+ return mana_ib_destroy_qp_raw(qp, udata);
+ case IB_QPT_RC:
+ return mana_ib_destroy_rc_qp(qp, udata);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_destroy_ud_qp(qp, udata);
+ default:
+ ibdev_dbg(ibqp->device, "Unexpected QP type %u\n",
+ ibqp->qp_type);
+ }
+
+ return -ENOENT;
+}
diff --git a/drivers/infiniband/hw/mana/shadow_queue.h b/drivers/infiniband/hw/mana/shadow_queue.h
new file mode 100644
index 000000000000..a4b3818f9c39
--- /dev/null
+++ b/drivers/infiniband/hw/mana/shadow_queue.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _MANA_SHADOW_QUEUE_H_
+#define _MANA_SHADOW_QUEUE_H_
+
+struct shadow_wqe_header {
+ u16 opcode;
+ u16 error_code;
+ u32 posted_wqe_size;
+ u64 wr_id;
+};
+
+struct ud_rq_shadow_wqe {
+ struct shadow_wqe_header header;
+ u32 byte_len;
+ u32 src_qpn;
+};
+
+struct ud_sq_shadow_wqe {
+ struct shadow_wqe_header header;
+};
+
+struct shadow_queue {
+ /* Unmasked producer index, Incremented on wqe posting */
+ u64 prod_idx;
+ /* Unmasked consumer index, Incremented on cq polling */
+ u64 cons_idx;
+ /* Unmasked index of next-to-complete (from HW) shadow WQE */
+ u64 next_to_complete_idx;
+ /* queue size in wqes */
+ u32 length;
+ /* distance between elements in bytes */
+ u32 stride;
+ /* ring buffer holding wqes */
+ void *buffer;
+};
+
+static inline int create_shadow_queue(struct shadow_queue *queue, uint32_t length, uint32_t stride)
+{
+ queue->buffer = kvmalloc_array(length, stride, GFP_KERNEL);
+ if (!queue->buffer)
+ return -ENOMEM;
+
+ queue->length = length;
+ queue->stride = stride;
+
+ return 0;
+}
+
+static inline void destroy_shadow_queue(struct shadow_queue *queue)
+{
+ kvfree(queue->buffer);
+}
+
+static inline bool shadow_queue_full(struct shadow_queue *queue)
+{
+ return (queue->prod_idx - queue->cons_idx) >= queue->length;
+}
+
+static inline bool shadow_queue_empty(struct shadow_queue *queue)
+{
+ return queue->prod_idx == queue->cons_idx;
+}
+
+static inline void *
+shadow_queue_get_element(const struct shadow_queue *queue, u64 unmasked_index)
+{
+ u32 index = unmasked_index % queue->length;
+
+ return ((u8 *)queue->buffer + index * queue->stride);
+}
+
+static inline void *
+shadow_queue_producer_entry(struct shadow_queue *queue)
+{
+ return shadow_queue_get_element(queue, queue->prod_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_consume(const struct shadow_queue *queue)
+{
+ if (queue->cons_idx == queue->next_to_complete_idx)
+ return NULL;
+
+ return shadow_queue_get_element(queue, queue->cons_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_complete(struct shadow_queue *queue)
+{
+ if (queue->next_to_complete_idx == queue->prod_idx)
+ return NULL;
+
+ return shadow_queue_get_element(queue, queue->next_to_complete_idx);
+}
+
+static inline void shadow_queue_advance_producer(struct shadow_queue *queue)
+{
+ queue->prod_idx++;
+}
+
+static inline void shadow_queue_advance_consumer(struct shadow_queue *queue)
+{
+ queue->cons_idx++;
+}
+
+static inline void shadow_queue_advance_next_to_complete(struct shadow_queue *queue)
+{
+ queue->next_to_complete_idx++;
+}
+
+#endif
diff --git a/drivers/infiniband/hw/mana/wq.c b/drivers/infiniband/hw/mana/wq.c
new file mode 100644
index 000000000000..f959f4b9244f
--- /dev/null
+++ b/drivers/infiniband/hw/mana/wq.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(pd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_create_wq ucmd = {};
+ struct mana_ib_wq *wq;
+ int err;
+
+ if (udata->inlen < sizeof(ucmd))
+ return ERR_PTR(-EINVAL);
+
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to copy from udata for create wq, %d\n", err);
+ return ERR_PTR(err);
+ }
+
+ wq = kzalloc(sizeof(*wq), GFP_KERNEL);
+ if (!wq)
+ return ERR_PTR(-ENOMEM);
+
+ ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n", ucmd.wq_buf_addr);
+
+ err = mana_ib_create_queue(mdev, ucmd.wq_buf_addr, ucmd.wq_buf_size, &wq->queue);
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev,
+ "Failed to create queue for create wq, %d\n", err);
+ goto err_free_wq;
+ }
+
+ wq->wqe = init_attr->max_wr;
+ wq->wq_buf_size = ucmd.wq_buf_size;
+ wq->rx_object = INVALID_MANA_HANDLE;
+ return &wq->ibwq;
+
+err_free_wq:
+ kfree(wq);
+
+ return ERR_PTR(err);
+}
+
+int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata)
+{
+ /* modify_wq is not supported by this version of the driver */
+ return -EOPNOTSUPP;
+}
+
+int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
+{
+ struct mana_ib_wq *wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+ struct ib_device *ib_dev = ibwq->device;
+ struct mana_ib_dev *mdev;
+
+ mdev = container_of(ib_dev, struct mana_ib_dev, ib_dev);
+
+ mana_ib_destroy_queue(mdev, &wq->queue);
+
+ kfree(wq);
+
+ return 0;
+}
+
+int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ /*
+ * There is no additional data in ind_table to be maintained by this
+ * driver, do nothing
+ */
+ return 0;
+}
+
+int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
+{
+ /*
+ * There is no additional data in ind_table to be maintained by this
+ * driver, do nothing
+ */
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/wr.c b/drivers/infiniband/hw/mana/wr.c
new file mode 100644
index 000000000000..1813567d3b16
--- /dev/null
+++ b/drivers/infiniband/hw/mana/wr.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+#define MAX_WR_SGL_NUM (2)
+
+static int mana_ib_post_recv_ud(struct mana_ib_qp *qp, const struct ib_recv_wr *wr)
+{
+ struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+ struct gdma_posted_wqe_info wqe_info = {0};
+ struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM];
+ struct gdma_wqe_request wqe_req = {0};
+ struct ud_rq_shadow_wqe *shadow_wqe;
+ int err, i;
+
+ if (shadow_queue_full(&qp->shadow_rq))
+ return -EINVAL;
+
+ if (wr->num_sge > MAX_WR_SGL_NUM)
+ return -EINVAL;
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ gdma_sgl[i].address = wr->sg_list[i].addr;
+ gdma_sgl[i].mem_key = wr->sg_list[i].lkey;
+ gdma_sgl[i].size = wr->sg_list[i].length;
+ }
+ wqe_req.num_sge = wr->num_sge;
+ wqe_req.sgl = gdma_sgl;
+
+ err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+ if (err)
+ return err;
+
+ shadow_wqe = shadow_queue_producer_entry(&qp->shadow_rq);
+ memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+ shadow_wqe->header.opcode = IB_WC_RECV;
+ shadow_wqe->header.wr_id = wr->wr_id;
+ shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+ shadow_queue_advance_producer(&qp->shadow_rq);
+
+ mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+ return 0;
+}
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ int err = 0;
+
+ for (; wr; wr = wr->next) {
+ switch (ibqp->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ err = mana_ib_post_recv_ud(qp, wr);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ return err;
+ }
+ break;
+ default:
+ ibdev_dbg(ibqp->device, "Posting recv wr on qp type %u is not supported\n",
+ ibqp->qp_type);
+ return -EINVAL;
+ }
+ }
+
+ return err;
+}
+
+static int mana_ib_post_send_ud(struct mana_ib_qp *qp, const struct ib_ud_wr *wr)
+{
+ struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(wr->ah, struct mana_ib_ah, ibah);
+ struct net_device *ndev = mana_ib_get_netdev(&mdev->ib_dev, qp->port);
+ struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
+ struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM + 1];
+ struct gdma_posted_wqe_info wqe_info = {0};
+ struct gdma_wqe_request wqe_req = {0};
+ struct rdma_send_oob send_oob = {0};
+ struct ud_sq_shadow_wqe *shadow_wqe;
+ int err, i;
+
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n",
+ qp->port, qp->ibqp.qp_num);
+ return -EINVAL;
+ }
+
+ if (wr->wr.opcode != IB_WR_SEND)
+ return -EINVAL;
+
+ if (shadow_queue_full(&qp->shadow_sq))
+ return -EINVAL;
+
+ if (wr->wr.num_sge > MAX_WR_SGL_NUM)
+ return -EINVAL;
+
+ gdma_sgl[0].address = ah->dma_handle;
+ gdma_sgl[0].mem_key = qp->ibqp.pd->local_dma_lkey;
+ gdma_sgl[0].size = sizeof(struct mana_ib_av);
+ for (i = 0; i < wr->wr.num_sge; ++i) {
+ gdma_sgl[i + 1].address = wr->wr.sg_list[i].addr;
+ gdma_sgl[i + 1].mem_key = wr->wr.sg_list[i].lkey;
+ gdma_sgl[i + 1].size = wr->wr.sg_list[i].length;
+ }
+
+ wqe_req.num_sge = wr->wr.num_sge + 1;
+ wqe_req.sgl = gdma_sgl;
+ wqe_req.inline_oob_size = sizeof(struct rdma_send_oob);
+ wqe_req.inline_oob_data = &send_oob;
+ wqe_req.flags = GDMA_WR_OOB_IN_SGL;
+ wqe_req.client_data_unit = ib_mtu_enum_to_int(ib_mtu_int_to_enum(ndev->mtu));
+
+ send_oob.wqe_type = WQE_TYPE_UD_SEND;
+ send_oob.fence = !!(wr->wr.send_flags & IB_SEND_FENCE);
+ send_oob.signaled = !!(wr->wr.send_flags & IB_SEND_SIGNALED);
+ send_oob.solicited = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
+ send_oob.psn = qp->ud_qp.sq_psn;
+ send_oob.ssn_or_rqpn = wr->remote_qpn;
+ send_oob.ud_send.remote_qkey =
+ qp->ibqp.qp_type == IB_QPT_GSI ? IB_QP1_QKEY : wr->remote_qkey;
+
+ err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+ if (err)
+ return err;
+
+ qp->ud_qp.sq_psn++;
+ shadow_wqe = shadow_queue_producer_entry(&qp->shadow_sq);
+ memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+ shadow_wqe->header.opcode = IB_WC_SEND;
+ shadow_wqe->header.wr_id = wr->wr.wr_id;
+ shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+ shadow_queue_advance_producer(&qp->shadow_sq);
+
+ mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+ return 0;
+}
+
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ int err;
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+
+ for (; wr; wr = wr->next) {
+ switch (ibqp->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ err = mana_ib_post_send_ud(qp, ud_wr(wr));
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ return err;
+ }
+ break;
+ default:
+ ibdev_dbg(ibqp->device, "Posting send wr on qp type %u is not supported\n",
+ ibqp->qp_type);
+ return -EINVAL;
+ }
+ }
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
index db4aa13ebae0..f30ce9dd080a 100644
--- a/drivers/infiniband/hw/mlx4/Kconfig
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -1,10 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support"
depends on NETDEVICES && ETHERNET && PCI && INET
- depends on MAY_USE_DEVLINK
select NET_VENDOR_MELLANOX
select MLX4_CORE
- ---help---
+ help
This driver provides low-level InfiniBand support for
Mellanox ConnectX PCI Express host channel adapters (HCAs).
This is required to use InfiniBand protocols such as
diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile
index f4213b3a8fe1..7b6757b02857 100644
--- a/drivers/infiniband/hw/mlx4/Makefile
+++ b/drivers/infiniband/hw/mlx4/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o mcg.o cm.o alias_GUID.o sysfs.o
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 077c33d2dc75..7321d6ab5fe1 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -40,164 +40,195 @@
#include "mlx4_ib.h"
-static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct mlx4_ib_ah *ah)
+static void create_ib_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
{
- struct mlx4_dev *dev = to_mdev(pd->device)->dev;
+ struct mlx4_ib_ah *ah = to_mah(ib_ah);
+ struct mlx4_dev *dev = to_mdev(ib_ah->device)->dev;
+
+ ah->av.ib.port_pd = cpu_to_be32(to_mpd(ib_ah->pd)->pdn |
+ (rdma_ah_get_port_num(ah_attr) << 24));
+ ah->av.ib.g_slid = rdma_ah_get_path_bits(ah_attr);
+ ah->av.ib.sl_tclass_flowlabel =
+ cpu_to_be32(rdma_ah_get_sl(ah_attr) << 28);
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
- ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
- ah->av.ib.g_slid = ah_attr->src_path_bits;
- ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
- if (ah_attr->ah_flags & IB_AH_GRH) {
ah->av.ib.g_slid |= 0x80;
- ah->av.ib.gid_index = ah_attr->grh.sgid_index;
- ah->av.ib.hop_limit = ah_attr->grh.hop_limit;
+ ah->av.ib.gid_index = grh->sgid_index;
+ ah->av.ib.hop_limit = grh->hop_limit;
ah->av.ib.sl_tclass_flowlabel |=
- cpu_to_be32((ah_attr->grh.traffic_class << 20) |
- ah_attr->grh.flow_label);
- memcpy(ah->av.ib.dgid, ah_attr->grh.dgid.raw, 16);
+ cpu_to_be32((grh->traffic_class << 20) |
+ grh->flow_label);
+ memcpy(ah->av.ib.dgid, grh->dgid.raw, 16);
}
- ah->av.ib.dlid = cpu_to_be16(ah_attr->dlid);
- if (ah_attr->static_rate) {
- ah->av.ib.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
- while (ah->av.ib.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
- !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
- --ah->av.ib.stat_rate;
- }
+ ah->av.ib.dlid = cpu_to_be16(rdma_ah_get_dlid(ah_attr));
+ if (rdma_ah_get_static_rate(ah_attr)) {
+ u8 static_rate = rdma_ah_get_static_rate(ah_attr) +
+ MLX4_STAT_RATE_OFFSET;
- return &ah->ibah;
+ while (static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
+ !(1 << static_rate & dev->caps.stat_rate_support))
+ --static_rate;
+ ah->av.ib.stat_rate = static_rate;
+ }
}
-static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct mlx4_ib_ah *ah)
+static int create_iboe_ah(struct ib_ah *ib_ah, struct rdma_ah_attr *ah_attr)
{
- struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
+ struct mlx4_ib_dev *ibdev = to_mdev(ib_ah->device);
+ struct mlx4_ib_ah *ah = to_mah(ib_ah);
+ const struct ib_gid_attr *gid_attr;
struct mlx4_dev *dev = ibdev->dev;
int is_mcast = 0;
struct in6_addr in6;
u16 vlan_tag = 0xffff;
- union ib_gid sgid;
- struct ib_gid_attr gid_attr;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
int ret;
- memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
- if (rdma_is_multicast_addr(&in6)) {
+ memcpy(&in6, grh->dgid.raw, sizeof(in6));
+ if (rdma_is_multicast_addr(&in6))
is_mcast = 1;
- rdma_get_mcast_mac(&in6, ah->av.eth.mac);
- } else {
- memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN);
- }
- ret = ib_get_cached_gid(pd->device, ah_attr->port_num,
- ah_attr->grh.sgid_index, &sgid, &gid_attr);
- if (ret)
- return ERR_PTR(ret);
+
+ memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN);
eth_zero_addr(ah->av.eth.s_mac);
- if (gid_attr.ndev) {
- if (is_vlan_dev(gid_attr.ndev))
- vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
- memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN);
- dev_put(gid_attr.ndev);
+
+ /*
+ * If sgid_attr is NULL we are being called by mlx4_ib_create_ah_slave
+ * and we are directly creating an AV for a slave's gid_index.
+ */
+ gid_attr = ah_attr->grh.sgid_attr;
+ if (gid_attr) {
+ ret = rdma_read_gid_l2_fields(gid_attr, &vlan_tag,
+ &ah->av.eth.s_mac[0]);
+ if (ret)
+ return ret;
+
+ ret = mlx4_ib_gid_index_to_real_index(ibdev, gid_attr);
+ if (ret < 0)
+ return ret;
+ ah->av.eth.gid_index = ret;
+ } else {
+ /* mlx4_ib_create_ah_slave fills in the s_mac and the vlan */
+ ah->av.eth.gid_index = ah_attr->grh.sgid_index;
}
+
if (vlan_tag < 0x1000)
- vlan_tag |= (ah_attr->sl & 7) << 13;
- ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
- ret = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
- if (ret < 0)
- return ERR_PTR(ret);
- ah->av.eth.gid_index = ret;
+ vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13;
+ ah->av.eth.port_pd = cpu_to_be32(to_mpd(ib_ah->pd)->pdn |
+ (rdma_ah_get_port_num(ah_attr) << 24));
ah->av.eth.vlan = cpu_to_be16(vlan_tag);
- ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
- if (ah_attr->static_rate) {
- ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
+ ah->av.eth.hop_limit = grh->hop_limit;
+ if (rdma_ah_get_static_rate(ah_attr)) {
+ ah->av.eth.stat_rate = rdma_ah_get_static_rate(ah_attr) +
+ MLX4_STAT_RATE_OFFSET;
while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
!(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support))
--ah->av.eth.stat_rate;
}
ah->av.eth.sl_tclass_flowlabel |=
- cpu_to_be32((ah_attr->grh.traffic_class << 20) |
- ah_attr->grh.flow_label);
+ cpu_to_be32((grh->traffic_class << 20) |
+ grh->flow_label);
/*
* HW requires multicast LID so we just choose one.
*/
if (is_mcast)
ah->av.ib.dlid = cpu_to_be16(0xc000);
- memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
- ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(ah_attr->sl << 29);
-
- return &ah->ibah;
+ memcpy(ah->av.eth.dgid, grh->dgid.raw, 16);
+ ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(rdma_ah_get_sl(ah_attr)
+ << 29);
+ return 0;
}
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct ib_udata *udata)
+int mlx4_ib_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
+
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
+ return -EINVAL;
+ /*
+ * TBD: need to handle the case when we get
+ * called in an atomic context and there we
+ * might sleep. We don't expect this
+ * currently since we're working with link
+ * local addresses which we can translate
+ * without going to sleep.
+ */
+ return create_iboe_ah(ib_ah, ah_attr);
+ }
+
+ create_ib_ah(ib_ah, ah_attr);
+ return 0;
+}
+int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
+ int slave_sgid_index, u8 *s_mac, u16 vlan_tag)
{
- struct mlx4_ib_ah *ah;
- struct ib_ah *ret;
-
- ah = kzalloc(sizeof *ah, GFP_ATOMIC);
- if (!ah)
- return ERR_PTR(-ENOMEM);
-
- if (rdma_port_get_link_layer(pd->device, ah_attr->port_num) == IB_LINK_LAYER_ETHERNET) {
- if (!(ah_attr->ah_flags & IB_AH_GRH)) {
- ret = ERR_PTR(-EINVAL);
- } else {
- /*
- * TBD: need to handle the case when we get
- * called in an atomic context and there we
- * might sleep. We don't expect this
- * currently since we're working with link
- * local addresses which we can translate
- * without going to sleep.
- */
- ret = create_iboe_ah(pd, ah_attr, ah);
- }
-
- if (IS_ERR(ret))
- kfree(ah);
+ struct rdma_ah_attr slave_attr = *ah_attr;
+ struct rdma_ah_init_attr init_attr = {};
+ struct mlx4_ib_ah *mah = to_mah(ah);
+ int ret;
+ slave_attr.grh.sgid_attr = NULL;
+ slave_attr.grh.sgid_index = slave_sgid_index;
+ init_attr.ah_attr = &slave_attr;
+ ret = mlx4_ib_create_ah(ah, &init_attr, NULL);
+ if (ret)
return ret;
- } else
- return create_ib_ah(pd, ah_attr, ah); /* never fails */
+
+ ah->type = ah_attr->type;
+
+ /* get rid of force-loopback bit */
+ mah->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
+
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE)
+ memcpy(mah->av.eth.s_mac, s_mac, 6);
+
+ if (vlan_tag < 0x1000)
+ vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13;
+ mah->av.eth.vlan = cpu_to_be16(vlan_tag);
+
+ return 0;
}
-int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct mlx4_ib_ah *ah = to_mah(ibah);
- enum rdma_link_layer ll;
+ int port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
memset(ah_attr, 0, sizeof *ah_attr);
- ah_attr->port_num = be32_to_cpu(ah->av.ib.port_pd) >> 24;
- ll = rdma_port_get_link_layer(ibah->device, ah_attr->port_num);
- if (ll == IB_LINK_LAYER_ETHERNET)
- ah_attr->sl = be32_to_cpu(ah->av.eth.sl_tclass_flowlabel) >> 29;
- else
- ah_attr->sl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
-
- ah_attr->dlid = ll == IB_LINK_LAYER_INFINIBAND ? be16_to_cpu(ah->av.ib.dlid) : 0;
- if (ah->av.ib.stat_rate)
- ah_attr->static_rate = ah->av.ib.stat_rate - MLX4_STAT_RATE_OFFSET;
- ah_attr->src_path_bits = ah->av.ib.g_slid & 0x7F;
+ ah_attr->type = ibah->type;
- if (mlx4_ib_ah_grh_present(ah)) {
- ah_attr->ah_flags = IB_AH_GRH;
-
- ah_attr->grh.traffic_class =
- be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20;
- ah_attr->grh.flow_label =
- be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) & 0xfffff;
- ah_attr->grh.hop_limit = ah->av.ib.hop_limit;
- ah_attr->grh.sgid_index = ah->av.ib.gid_index;
- memcpy(ah_attr->grh.dgid.raw, ah->av.ib.dgid, 16);
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ rdma_ah_set_dlid(ah_attr, 0);
+ rdma_ah_set_sl(ah_attr,
+ be32_to_cpu(ah->av.eth.sl_tclass_flowlabel)
+ >> 29);
+ } else {
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(ah->av.ib.dlid));
+ rdma_ah_set_sl(ah_attr,
+ be32_to_cpu(ah->av.ib.sl_tclass_flowlabel)
+ >> 28);
}
- return 0;
-}
+ rdma_ah_set_port_num(ah_attr, port_num);
+ if (ah->av.ib.stat_rate)
+ rdma_ah_set_static_rate(ah_attr,
+ ah->av.ib.stat_rate -
+ MLX4_STAT_RATE_OFFSET);
+ rdma_ah_set_path_bits(ah_attr, ah->av.ib.g_slid & 0x7F);
+ if (mlx4_ib_ah_grh_present(ah)) {
+ u32 tc_fl = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel);
+
+ rdma_ah_set_grh(ah_attr, NULL,
+ tc_fl & 0xfffff, ah->av.ib.gid_index,
+ ah->av.ib.hop_limit,
+ tc_fl >> 20);
+ rdma_ah_set_dgid_raw(ah_attr, ah->av.ib.dgid);
+ }
-int mlx4_ib_destroy_ah(struct ib_ah *ah)
-{
- kfree(to_mah(ah));
return 0;
}
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 06020c54db20..d7327735b8d0 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -38,7 +38,6 @@
#include <rdma/ib_sa.h>
#include <rdma/ib_pack.h>
#include <linux/mlx4/cmd.h>
-#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <rdma/ib_user_verbs.h>
@@ -73,12 +72,12 @@ static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
int *resched_delay_sec);
void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
- u8 port_num, u8 *p_data)
+ u32 port_num, u8 *p_data)
{
int i;
u64 guid_indexes;
int slave_id;
- int port_index = port_num - 1;
+ u32 port_index = port_num - 1;
if (!mlx4_is_master(dev->dev))
return;
@@ -86,7 +85,7 @@ void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
ports_guid[port_num - 1].
all_rec_per_port[block_num].guid_indexes);
- pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
+ pr_debug("port: %u, guid_indexes: 0x%llx\n", port_num, guid_indexes);
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
/* The location of the specific index starts from bit number 4
@@ -184,7 +183,7 @@ unlock:
* port_number - 1 or 2
*/
void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
- int block_num, u8 port_num,
+ int block_num, u32 port_num,
u8 *p_data)
{
int i;
@@ -206,7 +205,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
ports_guid[port_num - 1].
all_rec_per_port[block_num].guid_indexes);
- pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
+ pr_debug("port: %u, guid_indexes: 0x%llx\n", port_num, guid_indexes);
/*calculate the slaves and notify them*/
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
@@ -260,11 +259,11 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
&gen_event);
- pr_debug("slave: %d, port: %d prev_port_state: %d,"
+ pr_debug("slave: %d, port: %u prev_port_state: %d,"
" new_port_state: %d, gen_event: %d\n",
slave_id, port_num, prev_state, new_state, gen_event);
if (gen_event == SLAVE_PORT_GEN_EVENT_UP) {
- pr_debug("sending PORT_UP event to slave: %d, port: %d\n",
+ pr_debug("sending PORT_UP event to slave: %d, port: %u\n",
slave_id, port_num);
mlx4_gen_port_state_change_eqe(dev->dev, slave_id,
port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE);
@@ -274,7 +273,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
&gen_event);
if (gen_event == SLAVE_PORT_GEN_EVENT_DOWN) {
- pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
+ pr_debug("sending PORT DOWN event to slave: %d, port: %u\n",
slave_id, port_num);
mlx4_gen_port_state_change_eqe(dev->dev,
slave_id,
@@ -310,7 +309,7 @@ static void aliasguid_query_handler(int status,
if (status) {
pr_debug("(port: %d) failed: status = %d\n",
cb_ctx->port, status);
- rec->time_to_run = ktime_get_boot_ns() + 1 * NSEC_PER_SEC;
+ rec->time_to_run = ktime_get_boottime_ns() + 1 * NSEC_PER_SEC;
goto out;
}
@@ -416,7 +415,7 @@ next_entry:
be64_to_cpu((__force __be64)rec->guid_indexes),
be64_to_cpu((__force __be64)applied_guid_indexes),
be64_to_cpu((__force __be64)declined_guid_indexes));
- rec->time_to_run = ktime_get_boot_ns() +
+ rec->time_to_run = ktime_get_boottime_ns() +
resched_delay_sec * NSEC_PER_SEC;
} else {
rec->status = MLX4_GUID_INFO_STATUS_SET;
@@ -499,6 +498,7 @@ static int set_guid_rec(struct ib_device *ibdev,
struct list_head *head =
&dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
+ memset(&attr, 0, sizeof(attr));
err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
if (err) {
pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
@@ -527,7 +527,7 @@ static int set_guid_rec(struct ib_device *ibdev,
memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
- guid_info_rec.lid = cpu_to_be16(attr.lid);
+ guid_info_rec.lid = ib_lid_be16(attr.lid);
guid_info_rec.block_num = index;
memcpy(guid_info_rec.guid_info_list, rec_det->all_recs,
@@ -708,7 +708,7 @@ static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
}
}
if (resched_delay_sec) {
- u64 curr_time = ktime_get_boot_ns();
+ u64 curr_time = ktime_get_boottime_ns();
*resched_delay_sec = (low_record_time < curr_time) ? 0 :
div_u64((low_record_time - curr_time), NSEC_PER_SEC);
@@ -780,7 +780,7 @@ void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
if (!dev->sriov.is_going_down) {
- /* If there is pending one should cancell then run, otherwise
+ /* If there is pending one should cancel then run, otherwise
* won't run till previous one is ended as same work
* struct is used.
*/
@@ -803,8 +803,8 @@ void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
unsigned long flags;
for (i = 0 ; i < dev->num_ports; i++) {
- cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work);
det = &sriov->alias_guid.ports_guid[i];
+ cancel_delayed_work_sync(&det->alias_guid_work);
spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
while (!list_empty(&det->cb_list)) {
cb_ctx = list_entry(det->cb_list.next,
@@ -821,17 +821,14 @@ void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
}
spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
}
- for (i = 0 ; i < dev->num_ports; i++) {
- flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
+ for (i = 0 ; i < dev->num_ports; i++)
destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
- }
ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
kfree(dev->sriov.alias_guid.sa_client);
}
int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
{
- char alias_wq_name[15];
int ret = 0;
int i, j;
union ib_gid gid;
@@ -848,7 +845,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
spin_lock_init(&dev->sriov.alias_guid.ag_work_lock);
for (i = 1; i <= dev->num_ports; ++i) {
- if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) {
+ if (dev->ib_dev.ops.query_gid(&dev->ib_dev, i, 0, &gid)) {
ret = -EFAULT;
goto err_unregister;
}
@@ -877,9 +874,8 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
dev->sriov.alias_guid.ports_guid[i].port = i;
- snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
dev->sriov.alias_guid.ports_guid[i].wq =
- alloc_ordered_workqueue(alias_wq_name, WQ_MEM_RECLAIM);
+ alloc_ordered_workqueue("alias_guid%d", WQ_MEM_RECLAIM, i);
if (!dev->sriov.alias_guid.ports_guid[i].wq) {
ret = -ENOMEM;
goto err_thread;
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index d64845335e87..03aacd526860 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -39,7 +39,7 @@
#include "mlx4_ib.h"
-#define CM_CLEANUP_CACHE_TIMEOUT (5 * HZ)
+#define CM_CLEANUP_CACHE_TIMEOUT (30 * HZ)
struct id_map_entry {
struct rb_node node;
@@ -54,11 +54,20 @@ struct id_map_entry {
struct delayed_work timeout;
};
+struct rej_tmout_entry {
+ int slave;
+ u32 rem_pv_cm_id;
+ struct delayed_work timeout;
+ struct xarray *xa_rej_tmout;
+};
+
struct cm_generic_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
+ unsigned char unused[2];
+ __be16 rej_reason;
};
struct cm_sidr_generic_msg {
@@ -71,6 +80,7 @@ struct cm_req_msg {
union ib_gid primary_path_sgid;
};
+static struct workqueue_struct *cm_wq;
static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
{
@@ -168,20 +178,17 @@ static void id_map_ent_timeout(struct work_struct *work)
{
struct delayed_work *delay = to_delayed_work(work);
struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout);
- struct id_map_entry *db_ent, *found_ent;
+ struct id_map_entry *found_ent;
struct mlx4_ib_dev *dev = ent->dev;
struct mlx4_ib_sriov *sriov = &dev->sriov;
struct rb_root *sl_id_map = &sriov->sl_id_map;
- int pv_id = (int) ent->pv_cm_id;
spin_lock(&sriov->id_map_lock);
- db_ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_id);
- if (!db_ent)
+ if (!xa_erase(&sriov->pv_id_table, ent->pv_cm_id))
goto out;
found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id);
if (found_ent && found_ent == ent)
rb_erase(&found_ent->node, sl_id_map);
- idr_remove(&sriov->pv_id_table, pv_id);
out:
list_del(&ent->list);
@@ -189,24 +196,6 @@ out:
kfree(ent);
}
-static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id)
-{
- struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
- struct rb_root *sl_id_map = &sriov->sl_id_map;
- struct id_map_entry *ent, *found_ent;
-
- spin_lock(&sriov->id_map_lock);
- ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_cm_id);
- if (!ent)
- goto out;
- found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id);
- if (found_ent && found_ent == ent)
- rb_erase(&found_ent->node, sl_id_map);
- idr_remove(&sriov->pv_id_table, pv_cm_id);
-out:
- spin_unlock(&sriov->id_map_lock);
-}
-
static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
{
struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
@@ -256,41 +245,35 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
ent->dev = to_mdev(ibdev);
INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout);
- idr_preload(GFP_KERNEL);
- spin_lock(&to_mdev(ibdev)->sriov.id_map_lock);
-
- ret = idr_alloc_cyclic(&sriov->pv_id_table, ent, 0, 0, GFP_NOWAIT);
+ ret = xa_alloc_cyclic(&sriov->pv_id_table, &ent->pv_cm_id, ent,
+ xa_limit_32b, &sriov->pv_id_next, GFP_KERNEL);
if (ret >= 0) {
- ent->pv_cm_id = (u32)ret;
+ spin_lock(&sriov->id_map_lock);
sl_id_map_add(ibdev, ent);
list_add_tail(&ent->list, &sriov->cm_list);
- }
-
- spin_unlock(&sriov->id_map_lock);
- idr_preload_end();
-
- if (ret >= 0)
+ spin_unlock(&sriov->id_map_lock);
return ent;
+ }
/*error flow*/
kfree(ent);
- mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret);
+ mlx4_ib_warn(ibdev, "Allocation failed (err:0x%x)\n", ret);
return ERR_PTR(-ENOMEM);
}
static struct id_map_entry *
-id_map_get(struct ib_device *ibdev, int *pv_cm_id, int sl_cm_id, int slave_id)
+id_map_get(struct ib_device *ibdev, int *pv_cm_id, int slave_id, int sl_cm_id)
{
struct id_map_entry *ent;
struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
spin_lock(&sriov->id_map_lock);
if (*pv_cm_id == -1) {
- ent = id_map_find_by_sl_id(ibdev, sl_cm_id, slave_id);
+ ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id);
if (ent)
*pv_cm_id = (int) ent->pv_cm_id;
} else
- ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, *pv_cm_id);
+ ent = xa_load(&sriov->pv_id_table, *pv_cm_id);
spin_unlock(&sriov->id_map_lock);
return ent;
@@ -304,14 +287,18 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
spin_lock(&sriov->id_map_lock);
spin_lock_irqsave(&sriov->going_down_lock, flags);
/*make sure that there is no schedule inside the scheduled work.*/
- if (!sriov->is_going_down) {
+ if (!sriov->is_going_down && !id->scheduled_delete) {
id->scheduled_delete = 1;
- schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ queue_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ } else if (id->scheduled_delete) {
+ /* Adjust timeout if already scheduled */
+ mod_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
}
spin_unlock_irqrestore(&sriov->going_down_lock, flags);
spin_unlock(&sriov->id_map_lock);
}
+#define REJ_REASON(m) be16_to_cpu(((struct cm_generic_msg *)(m))->rej_reason)
int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
struct ib_mad *mad)
{
@@ -320,9 +307,14 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
int pv_cm_id = -1;
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
- mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
- mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
+ mad->mad_hdr.attr_id == CM_REP_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_MRA_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID ||
+ (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID && REJ_REASON(mad) == IB_CM_REJ_TIMEOUT)) {
sl_cm_id = get_local_comm_id(mad);
+ id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
+ if (id)
+ goto cont;
id = id_map_alloc(ibdev, slave_id, sl_cm_id);
if (IS_ERR(id)) {
mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
@@ -338,26 +330,107 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id
}
if (!id) {
- pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n",
- slave_id, sl_cm_id);
+ pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL! attr_id: 0x%x\n",
+ slave_id, sl_cm_id, be16_to_cpu(mad->mad_hdr.attr_id));
return -EINVAL;
}
+cont:
set_local_comm_id(mad, id->pv_cm_id);
if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
schedule_delayed(ibdev, id);
- else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID)
- id_map_find_del(ibdev, pv_cm_id);
+ return 0;
+}
+
+static void rej_tmout_timeout(struct work_struct *work)
+{
+ struct delayed_work *delay = to_delayed_work(work);
+ struct rej_tmout_entry *item = container_of(delay, struct rej_tmout_entry, timeout);
+ struct rej_tmout_entry *deleted;
+
+ deleted = xa_cmpxchg(item->xa_rej_tmout, item->rem_pv_cm_id, item, NULL, 0);
+
+ if (deleted != item)
+ pr_debug("deleted(%p) != item(%p)\n", deleted, item);
+
+ kfree(item);
+}
+
+static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int slave)
+{
+ struct rej_tmout_entry *item;
+ struct rej_tmout_entry *old;
+ int ret = 0;
+
+ xa_lock(&sriov->xa_rej_tmout);
+ item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
+
+ if (item) {
+ if (xa_err(item))
+ ret = xa_err(item);
+ else
+ /* If a retry, adjust delayed work */
+ mod_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
+ goto err_or_exists;
+ }
+ xa_unlock(&sriov->xa_rej_tmout);
+
+ item = kmalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ INIT_DELAYED_WORK(&item->timeout, rej_tmout_timeout);
+ item->slave = slave;
+ item->rem_pv_cm_id = rem_pv_cm_id;
+ item->xa_rej_tmout = &sriov->xa_rej_tmout;
+
+ old = xa_cmpxchg(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id, NULL, item, GFP_KERNEL);
+ if (old) {
+ pr_debug(
+ "Non-null old entry (%p) or error (%d) when inserting\n",
+ old, xa_err(old));
+ kfree(item);
+ return xa_err(old);
+ }
+
+ queue_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
return 0;
+
+err_or_exists:
+ xa_unlock(&sriov->xa_rej_tmout);
+ return ret;
+}
+
+static int lookup_rej_tmout_slave(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id)
+{
+ struct rej_tmout_entry *item;
+ int slave;
+
+ xa_lock(&sriov->xa_rej_tmout);
+ item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id);
+
+ if (!item || xa_err(item)) {
+ pr_debug("Could not find slave. rem_pv_cm_id 0x%x error: %d\n",
+ rem_pv_cm_id, xa_err(item));
+ slave = !item ? -ENOENT : xa_err(item);
+ } else {
+ slave = item->slave;
+ }
+ xa_unlock(&sriov->xa_rej_tmout);
+
+ return slave;
}
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
struct ib_mad *mad)
{
+ struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
+ u32 rem_pv_cm_id = get_local_comm_id(mad);
u32 pv_cm_id;
struct id_map_entry *id;
+ int sts;
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) {
@@ -373,6 +446,13 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
be64_to_cpu(gid.global.interface_id));
return -ENOENT;
}
+
+ sts = alloc_rej_tmout(sriov, rem_pv_cm_id, *slave);
+ if (sts)
+ /* Even if this fails, we pass on the REQ to the slave */
+ pr_debug("Could not allocate rej_tmout entry. rem_pv_cm_id 0x%x slave %d status %d\n",
+ rem_pv_cm_id, *slave, sts);
+
return 0;
}
@@ -380,7 +460,14 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1);
if (!id) {
- pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id);
+ if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID &&
+ REJ_REASON(mad) == IB_CM_REJ_TIMEOUT && slave) {
+ *slave = lookup_rej_tmout_slave(sriov, rem_pv_cm_id);
+
+ return (*slave < 0) ? *slave : 0;
+ }
+ pr_debug("Couldn't find an entry for pv_cm_id 0x%x, attr_id 0x%x\n",
+ pv_cm_id, be16_to_cpu(mad->mad_hdr.attr_id));
return -ENOENT;
}
@@ -388,12 +475,9 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
*slave = id->slave_id;
set_remote_comm_id(mad, id->sl_cm_id);
- if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
+ if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID ||
+ mad->mad_hdr.attr_id == CM_REJ_ATTR_ID)
schedule_delayed(ibdev, id);
- else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
- mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) {
- id_map_find_del(ibdev, (int) pv_cm_id);
- }
return 0;
}
@@ -403,7 +487,35 @@ void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev)
spin_lock_init(&dev->sriov.id_map_lock);
INIT_LIST_HEAD(&dev->sriov.cm_list);
dev->sriov.sl_id_map = RB_ROOT;
- idr_init(&dev->sriov.pv_id_table);
+ xa_init_flags(&dev->sriov.pv_id_table, XA_FLAGS_ALLOC);
+ xa_init(&dev->sriov.xa_rej_tmout);
+}
+
+static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave)
+{
+ struct rej_tmout_entry *item;
+ bool flush_needed = false;
+ unsigned long id;
+ int cnt = 0;
+
+ xa_lock(&sriov->xa_rej_tmout);
+ xa_for_each(&sriov->xa_rej_tmout, id, item) {
+ if (slave < 0 || slave == item->slave) {
+ mod_delayed_work(cm_wq, &item->timeout, 0);
+ flush_needed = true;
+ ++cnt;
+ }
+ }
+ xa_unlock(&sriov->xa_rej_tmout);
+
+ if (flush_needed) {
+ flush_workqueue(cm_wq);
+ pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n",
+ cnt, slave);
+ }
+
+ if (slave < 0)
+ WARN_ON(!xa_empty(&sriov->xa_rej_tmout));
}
/* slave = -1 ==> all slaves */
@@ -414,7 +526,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
struct rb_root *sl_id_map = &sriov->sl_id_map;
struct list_head lh;
struct rb_node *nd;
- int need_flush = 1;
+ int need_flush = 0;
struct id_map_entry *map, *tmp_map;
/* cancel all delayed work queue entries */
INIT_LIST_HEAD(&lh);
@@ -422,14 +534,14 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
if (slave < 0 || slave == map->slave_id) {
if (map->scheduled_delete)
- need_flush &= !!cancel_delayed_work(&map->timeout);
+ need_flush |= !cancel_delayed_work(&map->timeout);
}
}
spin_unlock(&sriov->id_map_lock);
- if (!need_flush)
- flush_scheduled_work(); /* make sure all timers were flushed */
+ if (need_flush)
+ flush_workqueue(cm_wq); /* make sure all timers were flushed */
/* now, remove all leftover entries from databases*/
spin_lock(&sriov->id_map_lock);
@@ -440,7 +552,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
struct id_map_entry, node);
rb_erase(&ent->node, sl_id_map);
- idr_remove(&sriov->pv_id_table, (int) ent->pv_cm_id);
+ xa_erase(&sriov->pv_id_table, ent->pv_cm_id);
}
list_splice_init(&dev->sriov.cm_list, &lh);
} else {
@@ -456,7 +568,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
/* remove those nodes from databases */
list_for_each_entry_safe(map, tmp_map, &lh, list) {
rb_erase(&map->node, sl_id_map);
- idr_remove(&sriov->pv_id_table, (int) map->pv_cm_id);
+ xa_erase(&sriov->pv_id_table, map->pv_cm_id);
}
/* add remaining nodes from cm_list */
@@ -473,4 +585,20 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
list_del(&map->list);
kfree(map);
}
+
+ rej_tmout_xa_cleanup(sriov, slave);
+}
+
+int mlx4_ib_cm_init(void)
+{
+ cm_wq = alloc_workqueue("mlx4_ib_cm", WQ_PERCPU, 0);
+ if (!cm_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx4_ib_cm_destroy(void)
+{
+ destroy_workqueue(cm_wq);
}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 6a0fec357dae..c592374f4a58 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -38,6 +38,7 @@
#include "mlx4_ib.h"
#include <rdma/mlx4-abi.h>
+#include <rdma/uverbs_ioctl.h>
static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
{
@@ -102,7 +103,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
int err;
err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
- PAGE_SIZE * 2, &buf->buf, GFP_KERNEL);
+ PAGE_SIZE * 2, &buf->buf);
if (err)
goto out;
@@ -113,7 +114,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL);
+ err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
if (err)
goto err_mtt;
@@ -134,20 +135,27 @@ static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
}
-static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
- struct mlx4_ib_cq_buf *buf, struct ib_umem **umem,
- u64 buf_addr, int cqe)
+static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev,
+ struct mlx4_ib_cq_buf *buf,
+ struct ib_umem **umem, u64 buf_addr, int cqe)
{
int err;
int cqe_size = dev->dev->caps.cqe_size;
+ int shift;
+ int n;
- *umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
- IB_ACCESS_LOCAL_WRITE, 1);
+ *umem = ib_umem_get(&dev->ib_dev, buf_addr, cqe * cqe_size,
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(*umem))
return PTR_ERR(*umem);
- err = mlx4_mtt_init(dev->dev, ib_umem_page_count(*umem),
- ilog2((*umem)->page_size), &buf->mtt);
+ shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n);
+ if (shift < 0) {
+ err = shift;
+ goto err_buf;
+ }
+
+ err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
if (err)
goto err_buf;
@@ -166,28 +174,27 @@ err_buf:
return err;
}
-#define CQ_CREATE_FLAGS_SUPPORTED IB_CQ_FLAGS_TIMESTAMP_COMPLETION
-struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+#define CQ_CREATE_FLAGS_SUPPORTED IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION
+int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
- struct mlx4_ib_cq *cq;
+ struct mlx4_ib_cq *cq = to_mcq(ibcq);
struct mlx4_uar *uar;
+ void *buf_addr;
int err;
+ struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx4_ib_ucontext, ibucontext);
if (entries < 1 || entries > dev->dev->caps.max_cqes)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED)
- return ERR_PTR(-EINVAL);
-
- cq = kmalloc(sizeof *cq, GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
entries = roundup_pow_of_two(entries + 1);
cq->ibcq.cqe = entries - 1;
@@ -199,7 +206,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
INIT_LIST_HEAD(&cq->send_qp_list);
INIT_LIST_HEAD(&cq->recv_qp_list);
- if (context) {
+ if (udata) {
struct mlx4_ib_create_cq ucmd;
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
@@ -207,19 +214,20 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
goto err_cq;
}
- err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem,
+ buf_addr = (void *)(unsigned long)ucmd.buf_addr;
+ err = mlx4_ib_get_cq_umem(dev, &cq->buf, &cq->umem,
ucmd.buf_addr, entries);
if (err)
goto err_cq;
- err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
- &cq->db);
+ err = mlx4_ib_db_map_user(udata, ucmd.db_addr, &cq->db);
if (err)
goto err_mtt;
- uar = &to_mucontext(context)->uar;
+ uar = &context->uar;
+ cq->mcq.usage = MLX4_RES_USAGE_USER_VERBS;
} else {
- err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL);
+ err = mlx4_db_alloc(dev->dev, &cq->db, 1);
if (err)
goto err_cq;
@@ -232,55 +240,56 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
if (err)
goto err_db;
+ buf_addr = &cq->buf.buf;
+
uar = &dev->priv_uar;
+ cq->mcq.usage = MLX4_RES_USAGE_DRIVER;
}
if (dev->eq_table)
vector = dev->eq_table[vector % ibdev->num_comp_vectors];
- err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
- cq->db.dma, &cq->mcq, vector, 0,
- !!(cq->create_flags & IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
+ err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, cq->db.dma,
+ &cq->mcq, vector, 0,
+ !!(cq->create_flags &
+ IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION),
+ buf_addr, !!udata);
if (err)
goto err_dbmap;
- if (context)
+ if (udata)
cq->mcq.tasklet_ctx.comp = mlx4_ib_cq_comp;
else
cq->mcq.comp = mlx4_ib_cq_comp;
cq->mcq.event = mlx4_ib_cq_event;
- if (context)
+ if (udata)
if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
err = -EFAULT;
goto err_cq_free;
}
- return &cq->ibcq;
+ return 0;
err_cq_free:
mlx4_cq_free(dev->dev, &cq->mcq);
err_dbmap:
- if (context)
- mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
+ if (udata)
+ mlx4_ib_db_unmap_user(context, &cq->db);
err_mtt:
mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
- if (context)
- ib_umem_release(cq->umem);
- else
+ ib_umem_release(cq->umem);
+ if (!udata)
mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
err_db:
- if (!context)
+ if (!udata)
mlx4_db_free(dev->dev, &cq->db);
-
err_cq:
- kfree(cq);
-
- return ERR_PTR(err);
+ return err;
}
static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq,
@@ -323,8 +332,8 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq
if (!cq->resize_buf)
return -ENOMEM;
- err = mlx4_ib_get_cq_umem(dev, cq->umem->context, &cq->resize_buf->buf,
- &cq->resize_umem, ucmd.buf_addr, entries);
+ err = mlx4_ib_get_cq_umem(dev, &cq->resize_buf->buf, &cq->resize_umem,
+ ucmd.buf_addr, entries);
if (err) {
kfree(cq->resize_buf);
cq->resize_buf = NULL;
@@ -462,18 +471,15 @@ err_buf:
kfree(cq->resize_buf);
cq->resize_buf = NULL;
- if (cq->resize_umem) {
- ib_umem_release(cq->resize_umem);
- cq->resize_umem = NULL;
- }
-
+ ib_umem_release(cq->resize_umem);
+ cq->resize_umem = NULL;
out:
mutex_unlock(&cq->resize_mutex);
return err;
}
-int mlx4_ib_destroy_cq(struct ib_cq *cq)
+int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(cq->device);
struct mlx4_ib_cq *mcq = to_mcq(cq);
@@ -481,16 +487,18 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq)
mlx4_cq_free(dev->dev, &mcq->mcq);
mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt);
- if (cq->uobject) {
- mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db);
- ib_umem_release(mcq->umem);
+ if (udata) {
+ mlx4_ib_db_unmap_user(
+ rdma_udata_to_drv_context(
+ udata,
+ struct mlx4_ib_ucontext,
+ ibucontext),
+ &mcq->db);
} else {
mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe);
mlx4_db_free(dev->dev, &mcq->db);
}
-
- kfree(mcq);
-
+ ib_umem_release(mcq->umem);
return 0;
}
@@ -565,18 +573,13 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
wc->vendor_err = cqe->vendor_err_syndrome;
}
-static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
+static int mlx4_ib_ipoib_csum_ok(__be16 status, u8 badfcs_enc, __be16 checksum)
{
- return ((status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
- MLX4_CQE_STATUS_IPV4F |
- MLX4_CQE_STATUS_IPV4OPT |
- MLX4_CQE_STATUS_IPV6 |
- MLX4_CQE_STATUS_IPOK)) ==
- cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
- MLX4_CQE_STATUS_IPOK)) &&
- (status & cpu_to_be16(MLX4_CQE_STATUS_UDP |
- MLX4_CQE_STATUS_TCP)) &&
- checksum == cpu_to_be16(0xffff);
+ return ((badfcs_enc & MLX4_CQE_STATUS_L4_CSUM) ||
+ ((status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
+ (status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
+ MLX4_CQE_STATUS_UDP)) &&
+ (checksum == cpu_to_be16(0xffff))));
}
static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
@@ -595,6 +598,7 @@ static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
wc->dlid_path_bits = 0;
if (is_eth) {
+ wc->slid = 0;
wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid);
memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4);
memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2);
@@ -635,7 +639,7 @@ static void mlx4_ib_poll_sw_comp(struct mlx4_ib_cq *cq, int num_entries,
struct mlx4_ib_qp *qp;
*npolled = 0;
- /* Find uncompleted WQEs belonging to that cq and retrun
+ /* Find uncompleted WQEs belonging to that cq and return
* simulated FLUSH_ERR completions
*/
list_for_each_entry(qp, &cq->send_qp_list, cq_send_list) {
@@ -766,11 +770,13 @@ repoll:
switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
case MLX4_OPCODE_RDMA_WRITE_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
+ fallthrough;
case MLX4_OPCODE_RDMA_WRITE:
wc->opcode = IB_WC_RDMA_WRITE;
break;
case MLX4_OPCODE_SEND_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
+ fallthrough;
case MLX4_OPCODE_SEND:
case MLX4_OPCODE_SEND_INVAL:
wc->opcode = IB_WC_SEND;
@@ -843,15 +849,16 @@ repoll:
}
}
- wc->slid = be16_to_cpu(cqe->rlid);
g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
wc->src_qp = g_mlpath_rqpn & 0xffffff;
wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status,
+ cqe->badfcs_enc,
cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
if (is_eth) {
+ wc->slid = 0;
wc->sl = be16_to_cpu(cqe->sl_vid) >> 13;
if (be32_to_cpu(cqe->vlan_my_qpn) &
MLX4_CQE_CVLAN_PRESENT_MASK) {
@@ -863,6 +870,7 @@ repoll:
memcpy(wc->smac, cqe->smac, ETH_ALEN);
wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
} else {
+ wc->slid = be16_to_cpu(cqe->rlid);
wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
wc->vlan_id = 0xffff;
}
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c
index c51740986367..9bbd695a9fd5 100644
--- a/drivers/infiniband/hw/mlx4/doorbell.c
+++ b/drivers/infiniband/hw/mlx4/doorbell.c
@@ -31,6 +31,7 @@
*/
#include <linux/slab.h>
+#include <rdma/uverbs_ioctl.h>
#include "mlx4_ib.h"
@@ -41,11 +42,13 @@ struct mlx4_ib_user_db_page {
int refcnt;
};
-int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
+int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt,
struct mlx4_db *db)
{
struct mlx4_ib_user_db_page *page;
int err = 0;
+ struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx4_ib_ucontext, ibucontext);
mutex_lock(&context->db_page_mutex);
@@ -61,8 +64,8 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
page->user_virt = (virt & PAGE_MASK);
page->refcnt = 0;
- page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
- PAGE_SIZE, 0, 0);
+ page->umem = ib_umem_get(context->ibucontext.device, virt & PAGE_MASK,
+ PAGE_SIZE, 0);
if (IS_ERR(page->umem)) {
err = PTR_ERR(page->umem);
kfree(page);
@@ -72,7 +75,8 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
list_add(&page->list, &context->db_page_list);
found:
- db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
+ db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
+ (virt & ~PAGE_MASK);
db->u.user_page = page;
++page->refcnt;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index db564ccc0f92..91c714f72099 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -88,15 +88,15 @@ struct mlx4_rcv_tunnel_mad {
struct ib_mad mad;
} __packed;
-static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num);
-static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num);
+static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u32 port_num);
+static void handle_lid_change_event(struct mlx4_ib_dev *dev, u32 port_num);
static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
int block, u32 change_bitmap);
__be64 mlx4_ib_gen_node_guid(void)
{
#define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40))
- return cpu_to_be64(NODE_GUID_HI | prandom_u32());
+ return cpu_to_be64(NODE_GUID_HI | get_random_u32());
}
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
@@ -169,7 +169,7 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
op_modifier |= 0x4;
- in_modifier |= in_wc->slid << 16;
+ in_modifier |= ib_lid_cpu16(in_wc->slid) << 16;
}
err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier,
@@ -186,28 +186,29 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
return err;
}
-static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
+static void update_sm_ah(struct mlx4_ib_dev *dev, u32 port_num, u16 lid, u8 sl)
{
struct ib_ah *new_ah;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
if (!dev->send_agent[port_num - 1][0])
return;
memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = lid;
- ah_attr.sl = sl;
- ah_attr.port_num = port_num;
+ ah_attr.type = rdma_ah_find_type(&dev->ib_dev, port_num);
+ rdma_ah_set_dlid(&ah_attr, lid);
+ rdma_ah_set_sl(&ah_attr, sl);
+ rdma_ah_set_port_num(&ah_attr, port_num);
- new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
- &ah_attr);
+ new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
+ &ah_attr, 0);
if (IS_ERR(new_ah))
return;
spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
- ib_destroy_ah(dev->sm_ah[port_num - 1]);
+ rdma_destroy_ah(dev->sm_ah[port_num - 1], 0);
dev->sm_ah[port_num - 1] = new_ah;
spin_unlock_irqrestore(&dev->sm_lock, flags);
}
@@ -216,8 +217,8 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
* Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can
* synthesize LID change, Client-Rereg, GID change, and P_Key change events.
*/
-static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad *mad,
- u16 prev_lid)
+static void smp_snoop(struct ib_device *ibdev, u32 port_num,
+ const struct ib_mad *mad, u16 prev_lid)
{
struct ib_port_info *pinfo;
u16 lid;
@@ -273,7 +274,7 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad
be16_to_cpu(base[i]);
}
}
- pr_debug("PKEY Change event: port=%d, "
+ pr_debug("PKEY Change event: port=%u, "
"block=0x%x, change_bitmap=0x%x\n",
port_num, bn, pkey_change_bitmap);
@@ -379,7 +380,8 @@ static void node_desc_override(struct ib_device *dev,
}
}
-static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, const struct ib_mad *mad)
+static void forward_trap(struct mlx4_ib_dev *dev, u32 port_num,
+ const struct ib_mad *mad)
{
int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
struct ib_mad_send_buf *send_buf;
@@ -428,7 +430,7 @@ static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave
return ret;
}
-int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
+int mlx4_ib_find_real_gid(struct ib_device *ibdev, u32 port, __be64 guid)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
int i;
@@ -442,7 +444,7 @@ int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave,
- u8 port, u16 pkey, u16 *ix)
+ u32 port, u16 pkey, u16 *ix)
{
int i, ret;
u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF;
@@ -499,17 +501,24 @@ static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid,
sgid, dgid);
}
-int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
+static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
+{
+ int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
+
+ return (qpn >= proxy_start && qpn <= proxy_start + 1);
+}
+
+int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u32 port,
enum ib_qp_type dest_qpt, struct ib_wc *wc,
struct ib_grh *grh, struct ib_mad *mad)
{
struct ib_sge list;
struct ib_ud_wr wr;
- struct ib_send_wr *bad_wr;
+ const struct ib_send_wr *bad_wr;
struct mlx4_ib_demux_pv_ctx *tun_ctx;
struct mlx4_ib_demux_pv_qp *tun_qp;
struct mlx4_rcv_tunnel_mad *tun_mad;
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
struct ib_ah *ah;
struct ib_qp *src_qp = NULL;
unsigned tun_tx_ix = 0;
@@ -519,8 +528,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
u16 cached_pkey;
u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
- if (dest_qpt > IB_QPT_GSI)
+ if (dest_qpt > IB_QPT_GSI) {
+ pr_debug("dest_qpt (%d) > IB_QPT_GSI\n", dest_qpt);
return -EINVAL;
+ }
tun_ctx = dev->sriov.demux[port-1].tun[slave];
@@ -537,12 +548,20 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
if (dest_qpt) {
u16 pkey_ix;
ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey);
- if (ret)
+ if (ret) {
+ pr_debug("unable to get %s cached pkey for index %d, ret %d\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
+ wc->pkey_index, ret);
return -EINVAL;
+ }
ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix);
- if (ret)
+ if (ret) {
+ pr_debug("unable to get %s pkey ix for pkey 0x%x, ret %d\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
+ cached_pkey, ret);
return -EINVAL;
+ }
tun_pkey_ix = pkey_ix;
} else
tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
@@ -555,15 +574,18 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
/* create ah. Just need an empty one with the port num for the post send.
* The driver will set the force loopback bit in post_send */
memset(&attr, 0, sizeof attr);
- attr.port_num = port;
+ attr.type = rdma_ah_find_type(&dev->ib_dev, port);
+
+ rdma_ah_set_port_num(&attr, port);
if (is_eth) {
union ib_gid sgid;
+ union ib_gid dgid;
- if (get_gids_from_l3_hdr(grh, &sgid, &attr.grh.dgid))
+ if (get_gids_from_l3_hdr(grh, &sgid, &dgid))
return -EINVAL;
- attr.ah_flags = IB_AH_GRH;
+ rdma_ah_set_grh(&attr, &dgid, 0, 0, 0, 0);
}
- ah = ib_create_ah(tun_ctx->pd, &attr);
+ ah = rdma_create_ah(tun_ctx->pd, &attr, 0);
if (IS_ERR(ah))
return -ENOMEM;
@@ -580,7 +602,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
if (tun_qp->tx_ring[tun_tx_ix].ah)
- ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
+ rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah, 0);
tun_qp->tx_ring[tun_tx_ix].ah = ah;
ib_dma_sync_single_for_cpu(&dev->ib_dev,
tun_qp->tx_ring[tun_tx_ix].buf.map,
@@ -621,7 +643,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2);
} else {
tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
- tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
+ tun_mad->hdr.slid_mac_47_32 = ib_lid_be16(wc->slid);
}
ib_dma_sync_single_for_device(&dev->ib_dev,
@@ -653,11 +675,11 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
spin_unlock(&tun_qp->tx_lock);
tun_qp->tx_ring[tun_tx_ix].ah = NULL;
end:
- ib_destroy_ah(ah);
+ rdma_destroy_ah(ah, 0);
return ret;
}
-static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
+static int mlx4_ib_demux_mad(struct ib_device *ibdev, u32 port,
struct ib_wc *wc, struct ib_grh *grh,
struct ib_mad *mad)
{
@@ -711,7 +733,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
if (err)
- pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
slave, err);
return 0;
}
@@ -790,12 +813,13 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
if (err)
- pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
+ pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
slave, err);
return 0;
}
-static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const struct ib_mad *in_mad, struct ib_mad *out_mad)
{
@@ -803,26 +827,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
int err;
struct ib_port_attr pattr;
- if (in_wc && in_wc->qp->qp_num) {
- pr_debug("received MAD: slid:%d sqpn:%d "
- "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n",
- in_wc->slid, in_wc->src_qp,
- in_wc->dlid_path_bits,
- in_wc->qp->qp_num,
- in_wc->wc_flags,
- in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
- be16_to_cpu(in_mad->mad_hdr.attr_id));
- if (in_wc->wc_flags & IB_WC_GRH) {
- pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
- be64_to_cpu(in_grh->sgid.global.subnet_prefix),
- be64_to_cpu(in_grh->sgid.global.interface_id));
- pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n",
- be64_to_cpu(in_grh->dgid.global.subnet_prefix),
- be64_to_cpu(in_grh->dgid.global.interface_id));
- }
- }
-
- slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
+ slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
forward_trap(to_mdev(ibdev), port_num, in_mad);
@@ -856,7 +861,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
in_mad->mad_hdr.method == IB_MGMT_METHOD_SET &&
in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
!ib_query_port(ibdev, port_num, &pattr))
- prev_lid = pattr.lid;
+ prev_lid = ib_lid_cpu16(pattr.lid);
err = mlx4_MAD_IFC(to_mdev(ibdev),
(mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) |
@@ -928,9 +933,10 @@ static int iboe_process_mad_port_info(void *out_mad)
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
-static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
- const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad *in_mad, struct ib_mad *out_mad)
+static int iboe_process_mad(struct ib_device *ibdev, int mad_flags,
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad)
{
struct mlx4_counter counter_stats;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
@@ -960,7 +966,6 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
}
mutex_unlock(&dev->counters_table[port_num - 1].mutex);
if (stats_avail) {
- memset(out_mad->data, 0, sizeof out_mad->data);
switch (counter_stats.counter_mode & 0xf) {
case 0:
edit_counter(&counter_stats,
@@ -976,40 +981,33 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
return err;
}
-int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index)
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
- const struct ib_mad *in_mad = (const struct ib_mad *)in;
- struct ib_mad *out_mad = (struct ib_mad *)out;
enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num);
- if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
- *out_mad_size != sizeof(*out_mad)))
- return IB_MAD_RESULT_FAILURE;
-
/* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA
* queries, should be called only by VFs and for that specific purpose
*/
if (link == IB_LINK_LAYER_INFINIBAND) {
if (mlx4_is_slave(dev->dev) &&
- (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
- (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS ||
- in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT ||
- in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)))
- return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
- in_grh, in_mad, out_mad);
+ (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
+ (in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS ||
+ in->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT ||
+ in->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)))
+ return iboe_process_mad(ibdev, mad_flags, port_num,
+ in_wc, in_grh, in, out);
- return ib_process_mad(ibdev, mad_flags, port_num, in_wc,
- in_grh, in_mad, out_mad);
+ return ib_process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
+ in, out);
}
if (link == IB_LINK_LAYER_ETHERNET)
return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
- in_grh, in_mad, out_mad);
+ in_grh, in, out);
return -EINVAL;
}
@@ -1018,7 +1016,7 @@ static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0])
- ib_destroy_ah(mad_send_wc->send_buf->context[0]);
+ rdma_destroy_ah(mad_send_wc->send_buf->context[0], 0);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -1073,11 +1071,11 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
}
if (dev->sm_ah[p])
- ib_destroy_ah(dev->sm_ah[p]);
+ rdma_destroy_ah(dev->sm_ah[p], 0);
}
}
-static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num)
+static void handle_lid_change_event(struct mlx4_ib_dev *dev, u32 port_num)
{
mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_LID_CHANGE);
@@ -1086,7 +1084,7 @@ static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num)
MLX4_EQ_PORT_INFO_LID_CHANGE_MASK);
}
-static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num)
+static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u32 port_num)
{
/* re-configure the alias-guid and mcg's */
if (mlx4_is_master(dev->dev)) {
@@ -1125,7 +1123,7 @@ static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
GET_MASK_FROM_EQE(eqe));
}
-static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num,
+static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u32 port_num,
u32 guid_tbl_blk_num, u32 change_bitmap)
{
struct ib_smp *in_mad = NULL;
@@ -1181,7 +1179,7 @@ void handle_port_mgmt_change_event(struct work_struct *work)
struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
struct mlx4_ib_dev *dev = ew->ib_dev;
struct mlx4_eqe *eqe = &(ew->ib_eqe);
- u8 port = eqe->event.port_mgmt_change.port;
+ u32 port = eqe->event.port_mgmt_change.port;
u32 changed_attr;
u32 tbl_block;
u32 change_bitmap;
@@ -1278,7 +1276,7 @@ void handle_port_mgmt_change_event(struct work_struct *work)
kfree(ew);
}
-void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
+void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u32 port_num,
enum ib_event_type type)
{
struct ib_event event;
@@ -1301,12 +1299,25 @@ static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
+static void mlx4_ib_wire_comp_handler(struct ib_cq *cq, void *arg)
+{
+ unsigned long flags;
+ struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
+ struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
+
+ spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
+ if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
+ queue_work(ctx->wi_wq, &ctx->work);
+ spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
+}
+
static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
struct mlx4_ib_demux_pv_qp *tun_qp,
int index)
{
struct ib_sge sg_list;
- struct ib_recv_wr recv_wr, *bad_recv_wr;
+ struct ib_recv_wr recv_wr;
+ const struct ib_recv_wr *bad_recv_wr;
int size;
size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
@@ -1342,33 +1353,23 @@ static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
return ret;
}
-static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
-{
- int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
-
- return (qpn >= proxy_start && qpn <= proxy_start + 1);
-}
-
-
-int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
+int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u32 port,
enum ib_qp_type dest_qpt, u16 pkey_index,
- u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
+ u32 remote_qpn, u32 qkey, struct rdma_ah_attr *attr,
u8 *s_mac, u16 vlan_id, struct ib_mad *mad)
{
struct ib_sge list;
struct ib_ud_wr wr;
- struct ib_send_wr *bad_wr;
+ const struct ib_send_wr *bad_wr;
struct mlx4_ib_demux_pv_ctx *sqp_ctx;
struct mlx4_ib_demux_pv_qp *sqp;
struct mlx4_mad_snd_buf *sqp_mad;
struct ib_ah *ah;
struct ib_qp *send_qp = NULL;
unsigned wire_tx_ix = 0;
- int ret = 0;
u16 wire_pkey_ix;
int src_qpnum;
- u8 sgid_index;
-
+ int ret;
sqp_ctx = dev->sriov.sqps[port-1];
@@ -1388,29 +1389,32 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
send_qp = sqp->qp;
- /* create ah */
- sgid_index = attr->grh.sgid_index;
- attr->grh.sgid_index = 0;
- ah = ib_create_ah(sqp_ctx->pd, attr);
- if (IS_ERR(ah))
+ ah = rdma_zalloc_drv_obj(sqp_ctx->pd->device, ib_ah);
+ if (!ah)
return -ENOMEM;
- attr->grh.sgid_index = sgid_index;
- to_mah(ah)->av.ib.gid_index = sgid_index;
- /* get rid of force-loopback bit */
- to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
+
+ ah->device = sqp_ctx->pd->device;
+ ah->pd = sqp_ctx->pd;
+
+ /* create ah */
+ ret = mlx4_ib_create_ah_slave(ah, attr,
+ rdma_ah_retrieve_grh(attr)->sgid_index,
+ s_mac, vlan_id);
+ if (ret)
+ goto out;
+
spin_lock(&sqp->tx_lock);
if (sqp->tx_ix_head - sqp->tx_ix_tail >=
- (MLX4_NUM_TUNNEL_BUFS - 1))
+ (MLX4_NUM_WIRE_BUFS - 1))
ret = -EAGAIN;
else
- wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
+ wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_WIRE_BUFS - 1);
spin_unlock(&sqp->tx_lock);
if (ret)
goto out;
sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
- if (sqp->tx_ring[wire_tx_ix].ah)
- ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
+ kfree(sqp->tx_ring[wire_tx_ix].ah);
sqp->tx_ring[wire_tx_ix].ah = ah;
ib_dma_sync_single_for_cpu(&dev->ib_dev,
sqp->tx_ring[wire_tx_ix].buf.map,
@@ -1439,12 +1443,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
wr.wr.num_sge = 1;
wr.wr.opcode = IB_WR_SEND;
wr.wr.send_flags = IB_SEND_SIGNALED;
- if (s_mac)
- memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
- if (vlan_id < 0x1000)
- vlan_id |= (attr->sl & 7) << 13;
- to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id);
-
ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
if (!ret)
@@ -1455,7 +1453,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
spin_unlock(&sqp->tx_lock);
sqp->tx_ring[wire_tx_ix].ah = NULL;
out:
- ib_destroy_ah(ah);
+ kfree(ah);
return ret;
}
@@ -1467,12 +1465,13 @@ static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
}
static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
- struct ib_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr)
{
+ struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr);
if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
- ah_attr->grh.sgid_index = slave;
+ grh->sgid_index = slave;
else
- ah_attr->grh.sgid_index += get_slave_base_gid_ix(dev, slave, port);
+ grh->sgid_index += get_slave_base_gid_ix(dev, slave, port);
}
static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
@@ -1482,11 +1481,14 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1);
struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr;
struct mlx4_ib_ah ah;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
u8 *slave_id;
int slave;
int port;
u16 vlan_id;
+ u8 qos;
+ u8 *dmac;
+ int sts;
/* Get slave that sent this packet */
if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
@@ -1523,6 +1525,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
return;
} else
*slave_id = slave;
+ break;
default:
/* nothing */;
}
@@ -1569,24 +1572,31 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
if (port < 0)
return;
ah.av.ib.port_pd = cpu_to_be32(port << 24 | (be32_to_cpu(ah.av.ib.port_pd) & 0xffffff));
+ ah.ibah.type = rdma_ah_find_type(&dev->ib_dev, port);
mlx4_ib_query_ah(&ah.ibah, &ah_attr);
- if (ah_attr.ah_flags & IB_AH_GRH)
+ if (rdma_ah_get_ah_flags(&ah_attr) & IB_AH_GRH)
fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
-
- memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
+ dmac = rdma_ah_retrieve_dmac(&ah_attr);
+ if (dmac)
+ memcpy(dmac, tunnel->hdr.mac, ETH_ALEN);
vlan_id = be16_to_cpu(tunnel->hdr.vlan);
/* if slave have default vlan use it */
- mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
- &vlan_id, &ah_attr.sl);
-
- mlx4_ib_send_to_wire(dev, slave, ctx->port,
- is_proxy_qp0(dev, wc->src_qp, slave) ?
- IB_QPT_SMI : IB_QPT_GSI,
- be16_to_cpu(tunnel->hdr.pkey_index),
- be32_to_cpu(tunnel->hdr.remote_qpn),
- be32_to_cpu(tunnel->hdr.qkey),
- &ah_attr, wc->smac, vlan_id, &tunnel->mad);
+ if (mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
+ &vlan_id, &qos))
+ rdma_ah_set_sl(&ah_attr, qos);
+
+ sts = mlx4_ib_send_to_wire(dev, slave, ctx->port,
+ is_proxy_qp0(dev, wc->src_qp, slave) ?
+ IB_QPT_SMI : IB_QPT_GSI,
+ be16_to_cpu(tunnel->hdr.pkey_index),
+ be32_to_cpu(tunnel->hdr.remote_qpn),
+ be32_to_cpu(tunnel->hdr.qkey),
+ &ah_attr, wc->smac, vlan_id, &tunnel->mad);
+ if (sts)
+ pr_debug("failed sending %s to wire on behalf of slave %d (%d)\n",
+ is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI",
+ slave, sts);
}
static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
@@ -1595,18 +1605,20 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
int i;
struct mlx4_ib_demux_pv_qp *tun_qp;
int rx_buf_size, tx_buf_size;
+ const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (qp_type > IB_QPT_GSI)
return -EINVAL;
tun_qp = &ctx->qp[qp_type];
- tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS,
+ tun_qp->ring = kcalloc(nmbr_bufs,
+ sizeof(struct mlx4_ib_buf),
GFP_KERNEL);
if (!tun_qp->ring)
return -ENOMEM;
- tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
+ tun_qp->tx_ring = kcalloc(nmbr_bufs,
sizeof (struct mlx4_ib_tun_tx_buf),
GFP_KERNEL);
if (!tun_qp->tx_ring) {
@@ -1623,7 +1635,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
if (!tun_qp->ring[i].addr)
goto err;
@@ -1637,7 +1649,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
}
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
tun_qp->tx_ring[i].buf.addr =
kmalloc(tx_buf_size, GFP_KERNEL);
if (!tun_qp->tx_ring[i].buf.addr)
@@ -1668,9 +1680,7 @@ tx_err:
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
}
- kfree(tun_qp->tx_ring);
- tun_qp->tx_ring = NULL;
- i = MLX4_NUM_TUNNEL_BUFS;
+ i = nmbr_bufs;
err:
while (i > 0) {
--i;
@@ -1678,6 +1688,8 @@ err:
rx_buf_size, DMA_FROM_DEVICE);
kfree(tun_qp->ring[i].addr);
}
+ kfree(tun_qp->tx_ring);
+ tun_qp->tx_ring = NULL;
kfree(tun_qp->ring);
tun_qp->ring = NULL;
return -ENOMEM;
@@ -1689,6 +1701,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
int i;
struct mlx4_ib_demux_pv_qp *tun_qp;
int rx_buf_size, tx_buf_size;
+ const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (qp_type > IB_QPT_GSI)
return;
@@ -1703,18 +1716,18 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
rx_buf_size, DMA_FROM_DEVICE);
kfree(tun_qp->ring[i].addr);
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
if (tun_qp->tx_ring[i].ah)
- ib_destroy_ah(tun_qp->tx_ring[i].ah);
+ rdma_destroy_ah(tun_qp->tx_ring[i].ah, 0);
}
kfree(tun_qp->tx_ring);
kfree(tun_qp->ring);
@@ -1743,11 +1756,8 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
"buf:%lld\n", wc.wr_id);
break;
case IB_WC_SEND:
- pr_debug("received tunnel send completion:"
- "wrid=0x%llx, status=0x%x\n",
- wc.wr_id, wc.status);
- ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
spin_lock(&tun_qp->tx_lock);
@@ -1763,8 +1773,8 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
" status = %d, wrid = 0x%llx\n",
ctx->slave, wc.status, wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
- ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
spin_lock(&tun_qp->tx_lock);
@@ -1792,6 +1802,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
struct ib_qp_attr attr;
int qp_attr_mask_INIT;
+ const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (qp_type > IB_QPT_GSI)
return -EINVAL;
@@ -1802,8 +1813,8 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
qp_init_attr.init_attr.send_cq = ctx->cq;
qp_init_attr.init_attr.recv_cq = ctx->cq;
qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
- qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
- qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
+ qp_init_attr.init_attr.cap.max_send_wr = nmbr_bufs;
+ qp_init_attr.init_attr.cap.max_recv_wr = nmbr_bufs;
qp_init_attr.init_attr.cap.max_send_sge = 1;
qp_init_attr.init_attr.cap.max_recv_sge = 1;
if (create_tun) {
@@ -1825,9 +1836,9 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr);
if (IS_ERR(tun_qp->qp)) {
ret = PTR_ERR(tun_qp->qp);
+ pr_err("Couldn't create %s QP (%pe)\n",
+ create_tun ? "tunnel" : "special", tun_qp->qp);
tun_qp->qp = NULL;
- pr_err("Couldn't create %s QP (%d)\n",
- create_tun ? "tunnel" : "special", ret);
return ret;
}
@@ -1865,7 +1876,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
goto err_qp;
}
- for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
+ for (i = 0; i < nmbr_bufs; i++) {
ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
if (ret) {
pr_err(" mlx4_ib_post_pv_buf error"
@@ -1900,9 +1911,9 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
if (wc.status == IB_WC_SUCCESS) {
switch (wc.opcode) {
case IB_WC_SEND:
- ib_destroy_ah(sqp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
- sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ kfree(sqp->tx_ring[wc.wr_id &
+ (MLX4_NUM_WIRE_BUFS - 1)].ah);
+ sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
sqp->tx_ix_tail++;
@@ -1911,18 +1922,17 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
case IB_WC_RECV:
mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
(sqp->ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
+ (MLX4_NUM_WIRE_BUFS - 1)].addr))->payload);
grh = &(((struct mlx4_mad_rcv_buf *)
(sqp->ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
+ (MLX4_NUM_WIRE_BUFS - 1)].addr))->grh);
mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)))
+ (MLX4_NUM_WIRE_BUFS - 1)))
pr_err("Failed reposting SQP "
"buf:%lld\n", wc.wr_id);
break;
default:
- BUG_ON(1);
break;
}
} else {
@@ -1930,9 +1940,9 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
" status = %d, wrid = 0x%llx\n",
ctx->slave, wc.status, wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
- ib_destroy_ah(sqp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
- sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
+ kfree(sqp->tx_ring[wc.wr_id &
+ (MLX4_NUM_WIRE_BUFS - 1)].ah);
+ sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
sqp->tx_ix_tail++;
@@ -1972,6 +1982,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
{
int ret, cq_size;
struct ib_cq_init_attr cq_attr = {};
+ const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS;
if (ctx->state != DEMUX_PV_STATE_DOWN)
return -EEXIST;
@@ -1996,23 +2007,24 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
goto err_out_qp0;
}
- cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
+ cq_size = 2 * nmbr_bufs;
if (ctx->has_smi)
cq_size *= 2;
cq_attr.cqe = cq_size;
- ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
+ ctx->cq = ib_create_cq(ctx->ib_dev,
+ create_tun ? mlx4_ib_tunnel_comp_handler : mlx4_ib_wire_comp_handler,
NULL, ctx, &cq_attr);
if (IS_ERR(ctx->cq)) {
ret = PTR_ERR(ctx->cq);
- pr_err("Couldn't create tunnel CQ (%d)\n", ret);
+ pr_err("Couldn't create tunnel CQ (%pe)\n", ctx->cq);
goto err_buf;
}
ctx->pd = ib_alloc_pd(ctx->ib_dev, 0);
if (IS_ERR(ctx->pd)) {
ret = PTR_ERR(ctx->pd);
- pr_err("Couldn't create tunnel PD (%d)\n", ret);
+ pr_err("Couldn't create tunnel PD (%pe)\n", ctx->pd);
goto err_cq;
}
@@ -2038,6 +2050,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
+ ctx->wi_wq = to_mdev(ibdev)->sriov.demux[port - 1].wi_wq;
ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
if (ret) {
@@ -2145,7 +2158,6 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
struct mlx4_ib_demux_ctx *ctx,
int port)
{
- char name[12];
int ret = 0;
int i;
@@ -2181,16 +2193,21 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
goto err_mcg;
}
- snprintf(name, sizeof name, "mlx4_ibt%d", port);
- ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
+ ctx->wq = alloc_ordered_workqueue("mlx4_ibt%d", WQ_MEM_RECLAIM, port);
if (!ctx->wq) {
pr_err("Failed to create tunnelling WQ for port %d\n", port);
ret = -ENOMEM;
goto err_wq;
}
- snprintf(name, sizeof name, "mlx4_ibud%d", port);
- ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
+ ctx->wi_wq = alloc_ordered_workqueue("mlx4_ibwi%d", WQ_MEM_RECLAIM, port);
+ if (!ctx->wi_wq) {
+ pr_err("Failed to create wire WQ for port %d\n", port);
+ ret = -ENOMEM;
+ goto err_wiwq;
+ }
+
+ ctx->ud_wq = alloc_ordered_workqueue("mlx4_ibud%d", WQ_MEM_RECLAIM, port);
if (!ctx->ud_wq) {
pr_err("Failed to create up/down WQ for port %d\n", port);
ret = -ENOMEM;
@@ -2200,6 +2217,10 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
return 0;
err_udwq:
+ destroy_workqueue(ctx->wi_wq);
+ ctx->wi_wq = NULL;
+
+err_wiwq:
destroy_workqueue(ctx->wq);
ctx->wq = NULL;
@@ -2247,12 +2268,14 @@ static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
}
flush_workqueue(ctx->wq);
+ flush_workqueue(ctx->wi_wq);
for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
free_pv_object(dev, i, ctx->port);
}
kfree(ctx->tun);
destroy_workqueue(ctx->ud_wq);
+ destroy_workqueue(ctx->wi_wq);
destroy_workqueue(ctx->wq);
}
}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 7031a8dd4d14..dd35e03402ab 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -39,6 +39,9 @@
#include <linux/inetdevice.h>
#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
+
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/devlink.h>
@@ -58,8 +61,7 @@
#include <rdma/mlx4-abi.h>
#define DRV_NAME MLX4_IB_DRV_NAME
-#define DRV_VERSION "2.2-1"
-#define DRV_RELDATE "Feb 2014"
+#define DRV_VERSION "4.0-0"
#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
@@ -68,7 +70,6 @@
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION);
int mlx4_ib_sm_guid_assign = 0;
module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
@@ -76,20 +77,16 @@ MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_ass
static const char mlx4_ib_version[] =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
- DRV_VERSION " (" DRV_RELDATE ")\n";
+ DRV_VERSION "\n";
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
+static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
+ u32 port_num);
+static int mlx4_ib_event(struct notifier_block *this, unsigned long event,
+ void *param);
static struct workqueue_struct *wq;
-static void init_query_mad(struct ib_smp *mad)
-{
- mad->base_version = 1;
- mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
- mad->class_version = 1;
- mad->method = IB_MGMT_METHOD_GET;
-}
-
static int check_flow_steering_support(struct mlx4_dev *dev)
{
int eth_num_ports = 0;
@@ -126,17 +123,20 @@ static int num_ib_ports(struct mlx4_dev *dev)
return ib_ports;
}
-static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num)
+static struct net_device *mlx4_ib_get_netdev(struct ib_device *device,
+ u32 port_num)
{
struct mlx4_ib_dev *ibdev = to_mdev(device);
- struct net_device *dev;
+ struct net_device *dev, *ret = NULL;
rcu_read_lock();
- dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
+ for_each_netdev_rcu(&init_net, dev) {
+ if (dev->dev.parent != ibdev->ib_dev.dev.parent ||
+ dev->dev_port + 1 != port_num)
+ continue;
- if (dev) {
if (mlx4_is_bonded(ibdev->dev)) {
- struct net_device *upper = NULL;
+ struct net_device *upper;
upper = netdev_master_upper_dev_get_rcu(dev);
if (upper) {
@@ -147,17 +147,19 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_n
dev = active;
}
}
- }
- if (dev)
+
dev_hold(dev);
+ ret = dev;
+ break;
+ }
rcu_read_unlock();
- return dev;
+ return ret;
}
static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
struct mlx4_ib_dev *ibdev,
- u8 port_num)
+ u32 port_num)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
@@ -190,7 +192,7 @@ static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
struct mlx4_ib_dev *ibdev,
- u8 port_num)
+ u32 port_num)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
@@ -216,8 +218,6 @@ static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
gid_tbl[i].version = 2;
if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
gid_tbl[i].type = 1;
- else
- memset(&gid_tbl[i].gid, 0, 12);
}
}
@@ -237,7 +237,7 @@ static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
static int mlx4_ib_update_gids(struct gid_entry *gids,
struct mlx4_ib_dev *ibdev,
- u8 port_num)
+ u32 port_num)
{
if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
@@ -245,40 +245,49 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
}
-static int mlx4_ib_add_gid(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- void **context)
+static void free_gid_entry(struct gid_entry *entry)
{
- struct mlx4_ib_dev *ibdev = to_mdev(device);
+ memset(&entry->gid, 0, sizeof(entry->gid));
+ kfree(entry->ctx);
+ entry->ctx = NULL;
+}
+
+static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
+{
+ struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
struct mlx4_port_gid_table *port_gid_table;
int free = -1, found = -1;
int ret = 0;
int hw_update = 0;
int i;
- struct gid_entry *gids = NULL;
+ struct gid_entry *gids;
+ u16 vlan_id = 0xffff;
+ u8 mac[ETH_ALEN];
- if (!rdma_cap_roce_gid_table(device, port_num))
+ if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
return -EINVAL;
- if (port_num > MLX4_MAX_PORTS)
+ if (attr->port_num > MLX4_MAX_PORTS)
return -EINVAL;
if (!context)
return -EINVAL;
- port_gid_table = &iboe->gids[port_num - 1];
+ ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]);
+ if (ret)
+ return ret;
+ port_gid_table = &iboe->gids[attr->port_num - 1];
spin_lock_bh(&iboe->lock);
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
- if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
- (port_gid_table->gids[i].gid_type == attr->gid_type)) {
+ if (!memcmp(&port_gid_table->gids[i].gid,
+ &attr->gid, sizeof(attr->gid)) &&
+ port_gid_table->gids[i].gid_type == attr->gid_type &&
+ port_gid_table->gids[i].vlan_id == vlan_id) {
found = i;
break;
}
- if (free < 0 && !memcmp(&port_gid_table->gids[i].gid, &zgid, sizeof(*gid)))
+ if (free < 0 && rdma_is_zero_gid(&port_gid_table->gids[i].gid))
free = i; /* HW has space */
}
@@ -291,8 +300,9 @@ static int mlx4_ib_add_gid(struct ib_device *device,
ret = -ENOMEM;
} else {
*context = port_gid_table->gids[free].ctx;
- memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
+ port_gid_table->gids[free].gid = attr->gid;
port_gid_table->gids[free].gid_type = attr->gid_type;
+ port_gid_table->gids[free].vlan_id = vlan_id;
port_gid_table->gids[free].ctx->real_index = free;
port_gid_table->gids[free].ctx->refcount = 1;
hw_update = 1;
@@ -304,9 +314,12 @@ static int mlx4_ib_add_gid(struct ib_device *device,
ctx->refcount++;
}
if (!ret && hw_update) {
- gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
+ gids = kmalloc_array(MLX4_MAX_PORT_GIDS, sizeof(*gids),
+ GFP_ATOMIC);
if (!gids) {
ret = -ENOMEM;
+ *context = NULL;
+ free_gid_entry(&port_gid_table->gids[free]);
} else {
for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
@@ -317,77 +330,82 @@ static int mlx4_ib_add_gid(struct ib_device *device,
spin_unlock_bh(&iboe->lock);
if (!ret && hw_update) {
- ret = mlx4_ib_update_gids(gids, ibdev, port_num);
+ ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
+ if (ret) {
+ spin_lock_bh(&iboe->lock);
+ *context = NULL;
+ free_gid_entry(&port_gid_table->gids[free]);
+ spin_unlock_bh(&iboe->lock);
+ }
kfree(gids);
}
return ret;
}
-static int mlx4_ib_del_gid(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- void **context)
+static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
{
struct gid_cache_context *ctx = *context;
- struct mlx4_ib_dev *ibdev = to_mdev(device);
+ struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
struct mlx4_port_gid_table *port_gid_table;
int ret = 0;
int hw_update = 0;
struct gid_entry *gids = NULL;
- if (!rdma_cap_roce_gid_table(device, port_num))
+ if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
return -EINVAL;
- if (port_num > MLX4_MAX_PORTS)
+ if (attr->port_num > MLX4_MAX_PORTS)
return -EINVAL;
- port_gid_table = &iboe->gids[port_num - 1];
+ port_gid_table = &iboe->gids[attr->port_num - 1];
spin_lock_bh(&iboe->lock);
if (ctx) {
ctx->refcount--;
if (!ctx->refcount) {
unsigned int real_index = ctx->real_index;
- memcpy(&port_gid_table->gids[real_index].gid, &zgid, sizeof(zgid));
- kfree(port_gid_table->gids[real_index].ctx);
- port_gid_table->gids[real_index].ctx = NULL;
+ free_gid_entry(&port_gid_table->gids[real_index]);
hw_update = 1;
}
}
if (!ret && hw_update) {
int i;
- gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
+ gids = kmalloc_array(MLX4_MAX_PORT_GIDS, sizeof(*gids),
+ GFP_ATOMIC);
if (!gids) {
ret = -ENOMEM;
} else {
- for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
- memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
+ memcpy(&gids[i].gid,
+ &port_gid_table->gids[i].gid,
+ sizeof(union ib_gid));
+ gids[i].gid_type =
+ port_gid_table->gids[i].gid_type;
+ }
}
}
spin_unlock_bh(&iboe->lock);
- if (!ret && hw_update) {
- ret = mlx4_ib_update_gids(gids, ibdev, port_num);
- kfree(gids);
- }
+ if (gids)
+ ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num);
+
+ kfree(gids);
return ret;
}
int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
- u8 port_num, int index)
+ const struct ib_gid_attr *attr)
{
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
struct gid_cache_context *ctx = NULL;
- union ib_gid gid;
struct mlx4_port_gid_table *port_gid_table;
int real_index = -EINVAL;
int i;
- int ret;
unsigned long flags;
- struct ib_gid_attr attr;
+ u32 port_num = attr->port_num;
if (port_num > MLX4_MAX_PORTS)
return -EINVAL;
@@ -396,24 +414,15 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
port_num = 1;
if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
- return index;
-
- ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
- if (ret)
- return ret;
-
- if (attr.ndev)
- dev_put(attr.ndev);
-
- if (!memcmp(&gid, &zgid, sizeof(gid)))
- return -EINVAL;
+ return attr->index;
spin_lock_irqsave(&iboe->lock, flags);
port_gid_table = &iboe->gids[port_num - 1];
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
- if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
- attr.gid_type == port_gid_table->gids[i].gid_type) {
+ if (!memcmp(&port_gid_table->gids[i].gid,
+ &attr->gid, sizeof(attr->gid)) &&
+ attr->gid_type == port_gid_table->gids[i].gid_type) {
ctx = port_gid_table->gids[i].ctx;
break;
}
@@ -428,12 +437,12 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_udata *uhw)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err;
int have_ib_ports;
struct mlx4_uverbs_ex_query_device cmd;
- struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0};
+ struct mlx4_uverbs_ex_query_device_resp resp = {};
struct mlx4_clock_params clock_params;
if (uhw->inlen) {
@@ -459,7 +468,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
@@ -475,8 +484,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
- IB_DEVICE_RC_RNR_NAK_GEN |
- IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ IB_DEVICE_RC_RNR_NAK_GEN;
+ props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -490,9 +499,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
if (dev->dev->caps.max_gso_sz &&
(dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
- props->device_cap_flags |= IB_DEVICE_UD_TSO;
+ props->kernel_cap_flags |= IBK_UD_TSO;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
- props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
+ props->kernel_cap_flags |= IBK_LOCAL_DMA_LKEY;
if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
@@ -522,9 +531,11 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->page_size_cap = dev->dev->caps.page_size_cap;
props->max_qp = dev->dev->quotas.qp;
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
- props->max_sge = min(dev->dev->caps.max_sq_sg,
- dev->dev->caps.max_rq_sg);
- props->max_sge_rd = MLX4_MAX_SGE_RD;
+ props->max_send_sge =
+ min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg);
+ props->max_recv_sge =
+ min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg);
+ props->max_sge_rd = MLX4_MAX_SGE_RD;
props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes;
props->max_mr = dev->dev->quotas.mpt;
@@ -545,22 +556,81 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
- props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
props->timestamp_mask = 0xFFFFFFFFFFFFULL;
props->max_ah = INT_MAX;
- if (!mlx4_is_slave(dev->dev))
- err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
+ if (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET ||
+ mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET) {
+ if (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) {
+ props->rss_caps.max_rwq_indirection_tables =
+ props->max_qp;
+ props->rss_caps.max_rwq_indirection_table_size =
+ dev->dev->caps.max_rss_tbl_sz;
+ props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
+ props->max_wq_type_rq = props->max_qp;
+ }
+
+ if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
+ }
+
+ props->cq_caps.max_cq_moderation_count = MLX4_MAX_CQ_COUNT;
+ props->cq_caps.max_cq_moderation_period = MLX4_MAX_CQ_PERIOD;
if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
resp.response_length += sizeof(resp.hca_core_clock_offset);
- if (!err && !mlx4_is_slave(dev->dev)) {
- resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP;
+ if (!mlx4_get_internal_clock_params(dev->dev, &clock_params)) {
+ resp.comp_mask |= MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET;
resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
}
}
+ if (uhw->outlen >= resp.response_length +
+ sizeof(resp.max_inl_recv_sz)) {
+ resp.response_length += sizeof(resp.max_inl_recv_sz);
+ resp.max_inl_recv_sz = dev->dev->caps.max_rq_sg *
+ sizeof(struct mlx4_wqe_data_seg);
+ }
+
+ if (offsetofend(typeof(resp), rss_caps) <= uhw->outlen) {
+ if (props->rss_caps.supported_qpts) {
+ resp.rss_caps.rx_hash_function =
+ MLX4_IB_RX_HASH_FUNC_TOEPLITZ;
+
+ resp.rss_caps.rx_hash_fields_mask =
+ MLX4_IB_RX_HASH_SRC_IPV4 |
+ MLX4_IB_RX_HASH_DST_IPV4 |
+ MLX4_IB_RX_HASH_SRC_IPV6 |
+ MLX4_IB_RX_HASH_DST_IPV6 |
+ MLX4_IB_RX_HASH_SRC_PORT_TCP |
+ MLX4_IB_RX_HASH_DST_PORT_TCP |
+ MLX4_IB_RX_HASH_SRC_PORT_UDP |
+ MLX4_IB_RX_HASH_DST_PORT_UDP;
+
+ if (dev->dev->caps.tunnel_offload_mode ==
+ MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
+ resp.rss_caps.rx_hash_fields_mask |=
+ MLX4_IB_RX_HASH_INNER;
+ }
+ resp.response_length = offsetof(typeof(resp), rss_caps) +
+ sizeof(resp.rss_caps);
+ }
+
+ if (offsetofend(typeof(resp), tso_caps) <= uhw->outlen) {
+ if (dev->dev->caps.max_gso_sz &&
+ ((mlx4_ib_port_link_layer(ibdev, 1) ==
+ IB_LINK_LAYER_ETHERNET) ||
+ (mlx4_ib_port_link_layer(ibdev, 2) ==
+ IB_LINK_LAYER_ETHERNET))) {
+ resp.tso_caps.max_tso = dev->dev->caps.max_gso_sz;
+ resp.tso_caps.supported_qpts |=
+ 1 << IB_QPT_RAW_PACKET;
+ }
+ resp.response_length = offsetof(typeof(resp), tso_caps) +
+ sizeof(resp.tso_caps);
+ }
+
if (uhw->outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
if (err)
@@ -574,7 +644,7 @@ out:
}
static enum rdma_link_layer
-mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
+mlx4_ib_port_link_layer(struct ib_device *device, u32 port_num)
{
struct mlx4_dev *dev = to_mdev(device)->dev;
@@ -582,11 +652,11 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
}
-static int ib_link_query_port(struct ib_device *ibdev, u8 port,
+static int ib_link_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props, int netw_view)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int ext_active_speed;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
@@ -596,7 +666,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -648,7 +718,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
/* If reported active speed is QDR, check if is FDR-10 */
if (props->active_speed == IB_SPEED_QDR) {
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -674,11 +744,12 @@ out:
static u8 state_to_phys_state(enum ib_port_state state)
{
- return state == IB_PORT_ACTIVE ? 5 : 3;
+ return state == IB_PORT_ACTIVE ?
+ IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
}
-static int eth_link_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props, int netw_view)
+static int eth_link_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *props)
{
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@@ -704,10 +775,12 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
IB_WIDTH_4X : IB_WIDTH_1X;
props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
IB_SPEED_FDR : IB_SPEED_QDR;
- props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
+ props->port_cap_flags = IB_PORT_CM_SUP;
+ props->ip_gids = true;
props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
props->max_msg_sz = mdev->dev->caps.max_msg_sz;
- props->pkey_tbl_len = 1;
+ if (mdev->dev->caps.pkey_table_len[port])
+ props->pkey_tbl_len = 1;
props->max_mtu = IB_MTU_4096;
props->max_vl_num = 2;
props->state = IB_PORT_DOWN;
@@ -736,32 +809,32 @@ out:
return err;
}
-int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+int __mlx4_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props, int netw_view)
{
int err;
- memset(props, 0, sizeof *props);
+ /* props being zeroed by the caller, avoid zeroing it here */
err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
ib_link_query_port(ibdev, port, props, netw_view) :
- eth_link_query_port(ibdev, port, props, netw_view);
+ eth_link_query_port(ibdev, port, props);
return err;
}
-static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+static int mlx4_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
/* returns host view */
return __mlx4_ib_query_port(ibdev, port, props, 0);
}
-int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+int __mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid, int netw_view)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
int clear = 0;
@@ -772,7 +845,7 @@ int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -794,7 +867,7 @@ int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
}
}
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
@@ -813,34 +886,20 @@ out:
return err;
}
-static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+static int mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid)
{
- int ret;
-
if (rdma_protocol_ib(ibdev, port))
return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
-
- if (!rdma_protocol_roce(ibdev, port))
- return -ENODEV;
-
- if (!rdma_cap_roce_gid_table(ibdev, port))
- return -ENODEV;
-
- ret = ib_get_cached_gid(ibdev, port, index, gid, NULL);
- if (ret == -EAGAIN) {
- memcpy(gid, &zgid, sizeof(*gid));
- return 0;
- }
-
- return ret;
+ return 0;
}
-static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl)
+static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u32 port,
+ u64 *sl2vl_tbl)
{
union sl2vl_tbl_to_u64 sl2vl64;
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
int jj;
@@ -855,7 +914,7 @@ static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_SL_TO_VL_TABLE;
in_mad->attr_mod = 0;
@@ -896,11 +955,11 @@ static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev)
}
}
-int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+int __mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey, int netw_view)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
@@ -909,7 +968,7 @@ int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
@@ -929,7 +988,8 @@ out:
return err;
}
-static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+static int mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
+ u16 *pkey)
{
return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
}
@@ -970,8 +1030,8 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
return 0;
}
-static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
- u32 cap_mask)
+static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u32 port,
+ int reset_qkey_viols, u32 cap_mask)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
@@ -996,7 +1056,7 @@ static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_vio
return err;
}
-static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
+static int mlx4_ib_modify_port(struct ib_device *ibdev, u32 port, int mask,
struct ib_port_modify *props)
{
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@@ -1014,7 +1074,7 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
mutex_lock(&mdev->cap_mask_mutex);
- err = mlx4_ib_query_port(ibdev, port, &attr);
+ err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
@@ -1030,19 +1090,21 @@ out:
return err;
}
-static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
+static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx,
+ struct ib_udata *udata)
{
+ struct ib_device *ibdev = uctx->device;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
- struct mlx4_ib_ucontext *context;
+ struct mlx4_ib_ucontext *context = to_mucontext(uctx);
struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
struct mlx4_ib_alloc_ucontext_resp resp;
int err;
if (!dev->ib_active)
- return ERR_PTR(-EAGAIN);
+ return -EAGAIN;
- if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
+ if (ibdev->ops.uverbs_abi_ver ==
+ MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
resp_v3.qp_tab_size = dev->dev->caps.num_qps;
resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
@@ -1054,306 +1116,149 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
resp.cqe_size = dev->dev->caps.cqe_size;
}
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return ERR_PTR(-ENOMEM);
-
err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
- if (err) {
- kfree(context);
- return ERR_PTR(err);
- }
+ if (err)
+ return err;
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
- if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
+ INIT_LIST_HEAD(&context->wqn_ranges_list);
+ mutex_init(&context->wqn_ranges_mutex);
+
+ if (ibdev->ops.uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
else
err = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (err) {
mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
- kfree(context);
- return ERR_PTR(-EFAULT);
+ return -EFAULT;
}
- return &context->ibucontext;
+ return err;
}
-static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+static void mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
- kfree(context);
-
- return 0;
}
-static void mlx4_ib_vma_open(struct vm_area_struct *area)
-{
- /* vma_open is called when a new VMA is created on top of our VMA.
- * This is done through either mremap flow or split_vma (usually due
- * to mlock, madvise, munmap, etc.). We do not support a clone of the
- * vma, as this VMA is strongly hardware related. Therefore we set the
- * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
- * calling us again and trying to do incorrect actions. We assume that
- * the original vma size is exactly a single page that there will be no
- * "splitting" operations on.
- */
- area->vm_ops = NULL;
-}
-
-static void mlx4_ib_vma_close(struct vm_area_struct *area)
-{
- struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
-
- /* It's guaranteed that all VMAs opened on a FD are closed before the
- * file itself is closed, therefore no sync is needed with the regular
- * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
- * with accessing the vma as part of mlx4_ib_disassociate_ucontext.
- * The close operation is usually called under mm->mmap_sem except when
- * process is exiting. The exiting case is handled explicitly as part
- * of mlx4_ib_disassociate_ucontext.
- */
- mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
- area->vm_private_data;
-
- /* set the vma context pointer to null in the mlx4_ib driver's private
- * data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
- */
- mlx4_ib_vma_priv_data->vma = NULL;
-}
-
-static const struct vm_operations_struct mlx4_ib_vm_ops = {
- .open = mlx4_ib_vma_open,
- .close = mlx4_ib_vma_close
-};
-
static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
- int i;
- int ret = 0;
- struct vm_area_struct *vma;
- struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
- struct task_struct *owning_process = NULL;
- struct mm_struct *owning_mm = NULL;
-
- owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
- if (!owning_process)
- return;
-
- owning_mm = get_task_mm(owning_process);
- if (!owning_mm) {
- pr_info("no mm, disassociate ucontext is pending task termination\n");
- while (1) {
- /* make sure that task is dead before returning, it may
- * prevent a rare case of module down in parallel to a
- * call to mlx4_ib_vma_close.
- */
- put_task_struct(owning_process);
- msleep(1);
- owning_process = get_pid_task(ibcontext->tgid,
- PIDTYPE_PID);
- if (!owning_process ||
- owning_process->state == TASK_DEAD) {
- pr_info("disassociate ucontext done, task was terminated\n");
- /* in case task was dead need to release the task struct */
- if (owning_process)
- put_task_struct(owning_process);
- return;
- }
- }
- }
-
- /* need to protect from a race on closing the vma as part of
- * mlx4_ib_vma_close().
- */
- down_read(&owning_mm->mmap_sem);
- for (i = 0; i < HW_BAR_COUNT; i++) {
- vma = context->hw_bar_info[i].vma;
- if (!vma)
- continue;
-
- ret = zap_vma_ptes(context->hw_bar_info[i].vma,
- context->hw_bar_info[i].vma->vm_start,
- PAGE_SIZE);
- if (ret) {
- pr_err("Error: zap_vma_ptes failed for index=%d, ret=%d\n", i, ret);
- BUG_ON(1);
- }
-
- /* context going to be destroyed, should not access ops any more */
- context->hw_bar_info[i].vma->vm_ops = NULL;
- }
-
- up_read(&owning_mm->mmap_sem);
- mmput(owning_mm);
- put_task_struct(owning_process);
-}
-
-static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
- struct mlx4_ib_vma_private_data *vma_private_data)
-{
- vma_private_data->vma = vma;
- vma->vm_private_data = vma_private_data;
- vma->vm_ops = &mlx4_ib_vm_ops;
}
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{
struct mlx4_ib_dev *dev = to_mdev(context->device);
- struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
- return -EINVAL;
+ switch (vma->vm_pgoff) {
+ case 0:
+ return rdma_user_mmap_io(context, vma,
+ to_mucontext(context)->uar.pfn,
+ PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ NULL);
- if (vma->vm_pgoff == 0) {
- /* We prevent double mmaping on same context */
- if (mucontext->hw_bar_info[HW_BAR_DB].vma)
+ case 1:
+ if (dev->dev->caps.bf_reg_size == 0)
return -EINVAL;
-
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
- if (io_remap_pfn_range(vma, vma->vm_start,
- to_mucontext(context)->uar.pfn,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
-
- } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
- /* We prevent double mmaping on same context */
- if (mucontext->hw_bar_info[HW_BAR_BF].vma)
- return -EINVAL;
-
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-
- if (io_remap_pfn_range(vma, vma->vm_start,
- to_mucontext(context)->uar.pfn +
- dev->dev->caps.num_uars,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
-
- } else if (vma->vm_pgoff == 3) {
+ return rdma_user_mmap_io(
+ context, vma,
+ to_mucontext(context)->uar.pfn +
+ dev->dev->caps.num_uars,
+ PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot),
+ NULL);
+
+ case 3: {
struct mlx4_clock_params params;
int ret;
- /* We prevent double mmaping on same context */
- if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
- return -EINVAL;
-
ret = mlx4_get_internal_clock_params(dev->dev, &params);
-
if (ret)
return ret;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- if (io_remap_pfn_range(vma, vma->vm_start,
- (pci_resource_start(dev->dev->persist->pdev,
- params.bar) +
- params.offset)
- >> PAGE_SHIFT,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- mlx4_ib_set_vma_data(vma,
- &mucontext->hw_bar_info[HW_BAR_CLOCK]);
- } else {
- return -EINVAL;
+ return rdma_user_mmap_io(
+ context, vma,
+ (pci_resource_start(dev->dev->persist->pdev,
+ params.bar) +
+ params.offset) >>
+ PAGE_SHIFT,
+ PAGE_SIZE, pgprot_noncached(vma->vm_page_prot),
+ NULL);
}
- return 0;
+ default:
+ return -EINVAL;
+ }
}
-static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
- struct mlx4_ib_pd *pd;
+ struct mlx4_ib_pd *pd = to_mpd(ibpd);
+ struct ib_device *ibdev = ibpd->device;
int err;
- pd = kmalloc(sizeof *pd, GFP_KERNEL);
- if (!pd)
- return ERR_PTR(-ENOMEM);
-
err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
- if (err) {
- kfree(pd);
- return ERR_PTR(err);
- }
-
- if (context)
- if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
- mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
- kfree(pd);
- return ERR_PTR(-EFAULT);
- }
+ if (err)
+ return err;
- return &pd->ibpd;
+ if (udata && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
+ mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
+ return -EFAULT;
+ }
+ return 0;
}
-static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
+static int mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
- kfree(pd);
-
return 0;
}
-static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
{
- struct mlx4_ib_xrcd *xrcd;
+ struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
+ struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
struct ib_cq_init_attr cq_attr = {};
int err;
- if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return ERR_PTR(-ENOSYS);
-
- xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
- if (!xrcd)
- return ERR_PTR(-ENOMEM);
+ if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ return -EOPNOTSUPP;
- err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
+ err = mlx4_xrcd_alloc(dev->dev, &xrcd->xrcdn);
if (err)
- goto err1;
+ return err;
- xrcd->pd = ib_alloc_pd(ibdev, 0);
+ xrcd->pd = ib_alloc_pd(ibxrcd->device, 0);
if (IS_ERR(xrcd->pd)) {
err = PTR_ERR(xrcd->pd);
goto err2;
}
cq_attr.cqe = 1;
- xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr);
+ xrcd->cq = ib_create_cq(ibxrcd->device, NULL, NULL, xrcd, &cq_attr);
if (IS_ERR(xrcd->cq)) {
err = PTR_ERR(xrcd->cq);
goto err3;
}
- return &xrcd->ibxrcd;
+ return 0;
err3:
ib_dealloc_pd(xrcd->pd);
err2:
- mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
-err1:
- kfree(xrcd);
- return ERR_PTR(err);
+ mlx4_xrcd_free(dev->dev, xrcd->xrcdn);
+ return err;
}
-static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
ib_destroy_cq(to_mxrcd(xrcd)->cq);
ib_dealloc_pd(to_mxrcd(xrcd)->pd);
mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
- kfree(xrcd);
-
return 0;
}
@@ -1407,8 +1312,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
spin_lock_bh(&mdev->iboe.lock);
ndev = mdev->iboe.netdevs[mqp->port - 1];
- if (ndev)
- dev_hold(ndev);
+ dev_hold(ndev);
spin_unlock_bh(&mdev->iboe.lock);
if (ndev) {
@@ -1584,8 +1488,9 @@ static int __mlx4_ib_create_default_rules(
int i;
for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
+ union ib_flow_spec ib_spec = {};
int ret;
- union ib_flow_spec ib_spec;
+
switch (pdefault_rules->rules_create_list[i]) {
case 0:
/* no rule */
@@ -1626,23 +1531,11 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
struct mlx4_net_trans_rule_hw_ctrl *ctrl;
int default_flow;
- static const u16 __mlx4_domain[] = {
- [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
- [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
- [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
- [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
- };
-
if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
pr_err("Invalid priority value %d\n", flow_attr->priority);
return -EINVAL;
}
- if (domain >= IB_FLOW_DOMAIN_NUM) {
- pr_err("Invalid domain value %d\n", domain);
- return -EINVAL;
- }
-
if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
return -EINVAL;
@@ -1651,8 +1544,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
return PTR_ERR(mailbox);
ctrl = mailbox->buf;
- ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
- flow_attr->priority);
+ ctrl->prio = cpu_to_be16(domain | flow_attr->priority);
ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
ctrl->port = flow_attr->port;
ctrl->qpn = cpu_to_be32(qp->qp_num);
@@ -1794,8 +1686,8 @@ static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
}
static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr,
- int domain)
+ struct ib_flow_attr *flow_attr,
+ struct ib_udata *udata)
{
int err = 0, i = 0, j = 0;
struct mlx4_ib_flow *mflow;
@@ -1803,13 +1695,17 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
int is_bonded = mlx4_is_bonded(dev);
- if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
- return ERR_PTR(-EINVAL);
+ if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)
+ return ERR_PTR(-EOPNOTSUPP);
if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
(flow_attr->type != IB_FLOW_ATTR_NORMAL))
return ERR_PTR(-EOPNOTSUPP);
+ if (udata &&
+ udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
+ return ERR_PTR(-EOPNOTSUPP);
+
memset(type, 0, sizeof(type));
mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
@@ -1854,8 +1750,8 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
}
while (i < ARRAY_SIZE(type) && type[i]) {
- err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
- &mflow->reg_id[i].id);
+ err = __mlx4_ib_create_flow(qp, flow_attr, MLX4_DOMAIN_UVERBS,
+ type[i], &mflow->reg_id[i].id);
if (err)
goto err_create_flow;
if (is_bonded) {
@@ -1864,7 +1760,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
*/
flow_attr->port = 2;
err = __mlx4_ib_create_flow(qp, flow_attr,
- domain, type[j],
+ MLX4_DOMAIN_UVERBS, type[j],
&mflow->reg_id[j].mirror);
flow_attr->port = 1;
if (err)
@@ -2063,11 +1959,9 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
if (ge) {
spin_lock_bh(&mdev->iboe.lock);
ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
- if (ndev)
- dev_hold(ndev);
+ dev_hold(ndev);
spin_unlock_bh(&mdev->iboe.lock);
- if (ndev)
- dev_put(ndev);
+ dev_put(ndev);
list_del(&ge->list);
kfree(ge);
} else
@@ -2080,8 +1974,8 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
static int init_node_data(struct mlx4_ib_dev *dev)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
@@ -2090,7 +1984,7 @@ static int init_node_data(struct mlx4_ib_dev *dev)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
if (mlx4_is_master(dev->dev))
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
@@ -2116,39 +2010,45 @@ out:
return err;
}
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx4_ib_dev *dev =
- container_of(device, struct mlx4_ib_dev, ib_dev.dev);
- return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
+ rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
+
+ return sysfs_emit(buf, "MT%d\n", dev->dev->persist->pdev->device);
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx4_ib_dev *dev =
- container_of(device, struct mlx4_ib_dev, ib_dev.dev);
- return sprintf(buf, "%x\n", dev->dev->rev_id);
+ rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
+
+ return sysfs_emit(buf, "%x\n", dev->dev->rev_id);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx4_ib_dev *dev =
- container_of(device, struct mlx4_ib_dev, ib_dev.dev);
- return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
- dev->dev->board_id);
+ rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
+
+ return sysfs_emit(buf, "%.*s\n", MLX4_BOARD_ID_LEN, dev->dev->board_id);
}
+static DEVICE_ATTR_RO(board_id);
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *mlx4_class_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
-static struct device_attribute *mlx4_class_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id
+static const struct attribute_group mlx4_attr_group = {
+ .attrs = mlx4_class_attributes,
};
struct diag_counter {
@@ -2193,23 +2093,35 @@ static const struct diag_counter diag_device_only[] = {
DIAG_COUNTER(rq_num_udsdprd, 0x118),
};
-static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num)
+static struct rdma_hw_stats *
+mlx4_ib_alloc_hw_device_stats(struct ib_device *ibdev)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_diag_counters *diag = dev->diag_counters;
- if (!diag[!!port_num].name)
+ if (!diag[0].descs)
return NULL;
- return rdma_alloc_hw_stats_struct(diag[!!port_num].name,
- diag[!!port_num].num_counters,
+ return rdma_alloc_hw_stats_struct(diag[0].descs, diag[0].num_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static struct rdma_hw_stats *
+mlx4_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibdev);
+ struct mlx4_ib_diag_counters *diag = dev->diag_counters;
+
+ if (!diag[1].descs)
+ return NULL;
+
+ return rdma_alloc_hw_stats_struct(diag[1].descs, diag[1].num_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
- u8 port, int index)
+ u32 port, int index)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_diag_counters *diag = dev->diag_counters;
@@ -2233,10 +2145,8 @@ static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
}
static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
- const char ***name,
- u32 **offset,
- u32 *num,
- bool port)
+ struct rdma_stat_desc **pdescs,
+ u32 **offset, u32 *num, bool port)
{
u32 num_counters;
@@ -2248,51 +2158,62 @@ static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
if (!port)
num_counters += ARRAY_SIZE(diag_device_only);
- *name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL);
- if (!*name)
+ *pdescs = kcalloc(num_counters, sizeof(struct rdma_stat_desc),
+ GFP_KERNEL);
+ if (!*pdescs)
return -ENOMEM;
*offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
if (!*offset)
- goto err_name;
+ goto err;
*num = num_counters;
return 0;
-err_name:
- kfree(*name);
+err:
+ kfree(*pdescs);
return -ENOMEM;
}
static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
- const char **name,
- u32 *offset,
- bool port)
+ struct rdma_stat_desc *descs,
+ u32 *offset, bool port)
{
int i;
int j;
for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
- name[i] = diag_basic[i].name;
+ descs[i].name = diag_basic[i].name;
offset[i] = diag_basic[i].offset;
}
if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
- name[j] = diag_ext[i].name;
+ descs[j].name = diag_ext[i].name;
offset[j] = diag_ext[i].offset;
}
}
if (!port) {
for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
- name[j] = diag_device_only[i].name;
+ descs[j].name = diag_device_only[i].name;
offset[j] = diag_device_only[i].offset;
}
}
}
+static const struct ib_device_ops mlx4_ib_hw_stats_ops = {
+ .alloc_hw_device_stats = mlx4_ib_alloc_hw_device_stats,
+ .alloc_hw_port_stats = mlx4_ib_alloc_hw_port_stats,
+ .get_hw_stats = mlx4_ib_get_hw_stats,
+};
+
+static const struct ib_device_ops mlx4_ib_hw_stats_ops1 = {
+ .alloc_hw_device_stats = mlx4_ib_alloc_hw_device_stats,
+ .get_hw_stats = mlx4_ib_get_hw_stats,
+};
+
static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
{
struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
@@ -2305,28 +2226,34 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
return 0;
for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
- /* i == 1 means we are building port counters */
- if (i && !per_port)
- continue;
+ /*
+ * i == 1 means we are building port counters, set a different
+ * stats ops without port stats callback.
+ */
+ if (i && !per_port) {
+ ib_set_device_ops(&ibdev->ib_dev,
+ &mlx4_ib_hw_stats_ops1);
- ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name,
+ return 0;
+ }
+
+ ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].descs,
&diag[i].offset,
&diag[i].num_counters, i);
if (ret)
goto err_alloc;
- mlx4_ib_fill_diag_counters(ibdev, diag[i].name,
+ mlx4_ib_fill_diag_counters(ibdev, diag[i].descs,
diag[i].offset, i);
}
- ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats;
- ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats;
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_hw_stats_ops);
return 0;
err_alloc:
if (i) {
- kfree(diag[i - 1].name);
+ kfree(diag[i - 1].descs);
kfree(diag[i - 1].offset);
}
@@ -2339,7 +2266,7 @@ static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
kfree(ibdev->diag_counters[i].offset);
- kfree(ibdev->diag_counters[i].name);
+ kfree(ibdev->diag_counters[i].descs);
}
}
@@ -2352,10 +2279,7 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
u64 release_mac = MLX4_IB_INVALID_MAC;
struct mlx4_ib_qp *qp;
- read_lock(&dev_base_lock);
- new_smac = mlx4_mac_to_u64(dev->dev_addr);
- read_unlock(&dev_base_lock);
-
+ new_smac = ether_addr_to_u64(dev->dev_addr);
atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
/* no need for update QP1 and mac registration in non-SRIOV */
@@ -2401,35 +2325,54 @@ unlock:
mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
}
-static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
- struct net_device *dev,
- unsigned long event)
+static void mlx4_ib_scan_netdev(struct mlx4_ib_dev *ibdev,
+ struct net_device *dev,
+ unsigned long event)
{
- struct mlx4_ib_iboe *iboe;
- int update_qps_port = -1;
- int port;
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
ASSERT_RTNL();
- iboe = &ibdev->iboe;
+ if (dev->dev.parent != ibdev->ib_dev.dev.parent)
+ return;
spin_lock_bh(&iboe->lock);
- mlx4_foreach_ib_transport_port(port, ibdev->dev) {
- iboe->netdevs[port - 1] =
- mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
+ iboe->netdevs[dev->dev_port] = event != NETDEV_UNREGISTER ? dev : NULL;
- if (dev == iboe->netdevs[port - 1] &&
- (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
- event == NETDEV_UP || event == NETDEV_CHANGE))
- update_qps_port = port;
+ spin_unlock_bh(&iboe->lock);
+
+ if (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER)
+ mlx4_ib_update_qps(ibdev, dev, dev->dev_port + 1);
+}
+
+static void mlx4_ib_port_event(struct ib_device *ibdev, struct net_device *ndev,
+ unsigned long event)
+{
+ struct mlx4_ib_dev *mlx4_ibdev =
+ container_of(ibdev, struct mlx4_ib_dev, ib_dev);
+ struct mlx4_ib_iboe *iboe = &mlx4_ibdev->iboe;
+
+ if (!net_eq(dev_net(ndev), &init_net))
+ return;
+
+ ASSERT_RTNL();
+
+ if (ndev->dev.parent != mlx4_ibdev->ib_dev.dev.parent)
+ return;
+
+ spin_lock_bh(&iboe->lock);
+
+ iboe->netdevs[ndev->dev_port] = event != NETDEV_UNREGISTER ? ndev : NULL;
+
+ if (event == NETDEV_UP || event == NETDEV_DOWN)
+ ib_dispatch_port_state_event(&mlx4_ibdev->ib_dev, ndev);
- }
spin_unlock_bh(&iboe->lock);
- if (update_qps_port > 0)
- mlx4_ib_update_qps(ibdev, dev, update_qps_port);
+ if (event == NETDEV_UP || event == NETDEV_CHANGE)
+ mlx4_ib_update_qps(mlx4_ibdev, ndev, ndev->dev_port + 1);
}
static int mlx4_ib_netdev_event(struct notifier_block *this,
@@ -2442,7 +2385,7 @@ static int mlx4_ib_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
- mlx4_ib_scan_netdevs(ibdev, dev, event);
+ mlx4_ib_scan_netdev(ibdev, dev, event);
return NOTIFY_DONE;
}
@@ -2530,48 +2473,148 @@ static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
ibdev->eq_table = NULL;
}
-static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
+static int mlx4_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
int err;
- err = mlx4_ib_query_port(ibdev, port_num, &attr);
- if (err)
- return err;
-
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
-
if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
} else {
if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
+ if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
}
- immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
return 0;
}
-static void get_fw_ver_str(struct ib_device *device, char *str,
- size_t str_len)
+static void get_fw_ver_str(struct ib_device *device, char *str)
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev);
- snprintf(str, str_len, "%d.%d.%d",
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d",
(int) (dev->dev->caps.fw_ver >> 32),
(int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
(int) dev->dev->caps.fw_ver & 0xffff);
}
-static void *mlx4_ib_add(struct mlx4_dev *dev)
+static const struct ib_device_ops mlx4_ib_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_MLX4,
+ .uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION,
+
+ .add_gid = mlx4_ib_add_gid,
+ .alloc_mr = mlx4_ib_alloc_mr,
+ .alloc_pd = mlx4_ib_alloc_pd,
+ .alloc_ucontext = mlx4_ib_alloc_ucontext,
+ .attach_mcast = mlx4_ib_mcg_attach,
+ .create_ah = mlx4_ib_create_ah,
+ .create_cq = mlx4_ib_create_cq,
+ .create_qp = mlx4_ib_create_qp,
+ .create_srq = mlx4_ib_create_srq,
+ .dealloc_pd = mlx4_ib_dealloc_pd,
+ .dealloc_ucontext = mlx4_ib_dealloc_ucontext,
+ .del_gid = mlx4_ib_del_gid,
+ .dereg_mr = mlx4_ib_dereg_mr,
+ .destroy_ah = mlx4_ib_destroy_ah,
+ .destroy_cq = mlx4_ib_destroy_cq,
+ .destroy_qp = mlx4_ib_destroy_qp,
+ .destroy_srq = mlx4_ib_destroy_srq,
+ .detach_mcast = mlx4_ib_mcg_detach,
+ .device_group = &mlx4_attr_group,
+ .disassociate_ucontext = mlx4_ib_disassociate_ucontext,
+ .drain_rq = mlx4_ib_drain_rq,
+ .drain_sq = mlx4_ib_drain_sq,
+ .get_dev_fw_str = get_fw_ver_str,
+ .get_dma_mr = mlx4_ib_get_dma_mr,
+ .get_link_layer = mlx4_ib_port_link_layer,
+ .get_netdev = mlx4_ib_get_netdev,
+ .get_port_immutable = mlx4_port_immutable,
+ .map_mr_sg = mlx4_ib_map_mr_sg,
+ .mmap = mlx4_ib_mmap,
+ .modify_cq = mlx4_ib_modify_cq,
+ .modify_device = mlx4_ib_modify_device,
+ .modify_port = mlx4_ib_modify_port,
+ .modify_qp = mlx4_ib_modify_qp,
+ .modify_srq = mlx4_ib_modify_srq,
+ .poll_cq = mlx4_ib_poll_cq,
+ .post_recv = mlx4_ib_post_recv,
+ .post_send = mlx4_ib_post_send,
+ .post_srq_recv = mlx4_ib_post_srq_recv,
+ .process_mad = mlx4_ib_process_mad,
+ .query_ah = mlx4_ib_query_ah,
+ .query_device = mlx4_ib_query_device,
+ .query_gid = mlx4_ib_query_gid,
+ .query_pkey = mlx4_ib_query_pkey,
+ .query_port = mlx4_ib_query_port,
+ .query_qp = mlx4_ib_query_qp,
+ .query_srq = mlx4_ib_query_srq,
+ .reg_user_mr = mlx4_ib_reg_user_mr,
+ .req_notify_cq = mlx4_ib_arm_cq,
+ .rereg_user_mr = mlx4_ib_rereg_user_mr,
+ .resize_cq = mlx4_ib_resize_cq,
+ .report_port_event = mlx4_ib_port_event,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, mlx4_ib_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, mlx4_ib_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_srq, mlx4_ib_srq, ibsrq),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx4_ib_ucontext, ibucontext),
+};
+
+static const struct ib_device_ops mlx4_ib_dev_wq_ops = {
+ .create_rwq_ind_table = mlx4_ib_create_rwq_ind_table,
+ .create_wq = mlx4_ib_create_wq,
+ .destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table,
+ .destroy_wq = mlx4_ib_destroy_wq,
+ .modify_wq = mlx4_ib_modify_wq,
+
+ INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx4_ib_rwq_ind_table,
+ ib_rwq_ind_tbl),
+};
+
+static const struct ib_device_ops mlx4_ib_dev_mw_ops = {
+ .alloc_mw = mlx4_ib_alloc_mw,
+ .dealloc_mw = mlx4_ib_dealloc_mw,
+
+ INIT_RDMA_OBJ_SIZE(ib_mw, mlx4_ib_mw, ibmw),
+};
+
+static const struct ib_device_ops mlx4_ib_dev_xrc_ops = {
+ .alloc_xrcd = mlx4_ib_alloc_xrcd,
+ .dealloc_xrcd = mlx4_ib_dealloc_xrcd,
+
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx4_ib_xrcd, ibxrcd),
+};
+
+static const struct ib_device_ops mlx4_ib_dev_fs_ops = {
+ .create_flow = mlx4_ib_create_flow,
+ .destroy_flow = mlx4_ib_destroy_flow,
+};
+
+static int mlx4_ib_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
{
+ struct mlx4_adev *madev = container_of(adev, struct mlx4_adev, adev);
+ struct mlx4_dev *dev = madev->mdev;
struct mlx4_ib_dev *ibdev;
int num_ports = 0;
int i, j;
@@ -2581,7 +2624,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
int num_req_counters;
int allocated;
u32 counter_index;
- struct counter_index *new_counter_index = NULL;
+ struct counter_index *new_counter_index;
pr_info_once("%s", mlx4_ib_version);
@@ -2591,172 +2634,83 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
/* No point in registering a device with no ports... */
if (num_ports == 0)
- return NULL;
+ return -ENODEV;
- ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
+ ibdev = ib_alloc_device(mlx4_ib_dev, ib_dev);
if (!ibdev) {
dev_err(&dev->persist->pdev->dev,
"Device struct alloc failed\n");
- return NULL;
+ return -ENOMEM;
}
iboe = &ibdev->iboe;
- if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
+ err = mlx4_pd_alloc(dev, &ibdev->priv_pdn);
+ if (err)
goto err_dealloc;
- if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
+ err = mlx4_uar_alloc(dev, &ibdev->priv_uar);
+ if (err)
goto err_pd;
ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
PAGE_SIZE);
- if (!ibdev->uar_map)
+ if (!ibdev->uar_map) {
+ err = -ENOMEM;
goto err_uar;
+ }
MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
ibdev->dev = dev;
ibdev->bond_next_port = 0;
- strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
- ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
ibdev->num_ports = num_ports;
ibdev->ib_dev.phys_port_cnt = mlx4_is_bonded(dev) ?
1 : ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
- ibdev->ib_dev.dma_device = &dev->persist->pdev->dev;
- ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev;
- ibdev->ib_dev.add_gid = mlx4_ib_add_gid;
- ibdev->ib_dev.del_gid = mlx4_ib_del_gid;
+ ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev;
- if (dev->caps.userspace_caps)
- ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
- else
- ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
-
- ibdev->ib_dev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_REREG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
- (1ull << IB_USER_VERBS_CMD_OPEN_QP);
-
- ibdev->ib_dev.query_device = mlx4_ib_query_device;
- ibdev->ib_dev.query_port = mlx4_ib_query_port;
- ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer;
- ibdev->ib_dev.query_gid = mlx4_ib_query_gid;
- ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey;
- ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
- ibdev->ib_dev.modify_port = mlx4_ib_modify_port;
- ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext;
- ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext;
- ibdev->ib_dev.mmap = mlx4_ib_mmap;
- ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd;
- ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd;
- ibdev->ib_dev.create_ah = mlx4_ib_create_ah;
- ibdev->ib_dev.query_ah = mlx4_ib_query_ah;
- ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah;
- ibdev->ib_dev.create_srq = mlx4_ib_create_srq;
- ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq;
- ibdev->ib_dev.query_srq = mlx4_ib_query_srq;
- ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq;
- ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv;
- ibdev->ib_dev.create_qp = mlx4_ib_create_qp;
- ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
- ibdev->ib_dev.query_qp = mlx4_ib_query_qp;
- ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
- ibdev->ib_dev.post_send = mlx4_ib_post_send;
- ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
- ibdev->ib_dev.create_cq = mlx4_ib_create_cq;
- ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq;
- ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq;
- ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq;
- ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq;
- ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
- ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
- ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
- ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
- ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
- ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr;
- ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg;
- ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
- ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
- ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
- ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
- ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str;
- ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
-
- if (!mlx4_is_slave(ibdev->dev)) {
- ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
- ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
- ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
- ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
- }
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops);
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
- dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
- ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
- ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
+ if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
+ ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) ==
+ IB_LINK_LAYER_ETHERNET) ||
+ (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) ==
+ IB_LINK_LAYER_ETHERNET)))
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops);
- ibdev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
- }
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
+ dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops);
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
- ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
- ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
- ibdev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
- (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops);
}
if (check_flow_steering_support(dev)) {
ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
- ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
- ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
-
- ibdev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops);
}
- ibdev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
+ if (!dev->caps.userspace_caps)
+ ibdev->ib_dev.ops.uverbs_abi_ver =
+ MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
mlx4_ib_alloc_eqs(dev, ibdev);
spin_lock_init(&iboe->lock);
- if (init_node_data(ibdev))
+ err = init_node_data(ibdev);
+ if (err)
goto err_map;
mlx4_init_sl2vl_tbl(ibdev);
for (i = 0; i < ibdev->num_ports; ++i) {
mutex_init(&ibdev->counters_table[i].mutex);
INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
+ iboe->last_port_state[i] = IB_PORT_DOWN;
}
num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
@@ -2765,7 +2719,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
allocated = 0;
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
IB_LINK_LAYER_ETHERNET) {
- err = mlx4_counter_alloc(ibdev->dev, &counter_index);
+ err = mlx4_counter_alloc(ibdev->dev, &counter_index,
+ MLX4_RES_USAGE_DRIVER);
/* if failed to allocate a new counter, use default */
if (err)
counter_index =
@@ -2780,6 +2735,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
new_counter_index = kmalloc(sizeof(*new_counter_index),
GFP_KERNEL);
if (!new_counter_index) {
+ err = -ENOMEM;
if (allocated)
mlx4_counter_free(ibdev->dev, counter_index);
goto err_counter;
@@ -2797,8 +2753,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
new_counter_index =
kmalloc(sizeof(struct counter_index),
GFP_KERNEL);
- if (!new_counter_index)
+ if (!new_counter_index) {
+ err = -ENOMEM;
goto err_counter;
+ }
new_counter_index->index = counter_index;
new_counter_index->allocated = 0;
list_add_tail(&new_counter_index->list,
@@ -2820,16 +2778,17 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
MLX4_IB_UC_STEER_QPN_ALIGN,
- &ibdev->steer_qpn_base, 0);
+ &ibdev->steer_qpn_base, 0,
+ MLX4_RES_USAGE_DRIVER);
if (err)
goto err_counter;
- ibdev->ib_uc_qpns_bitmap =
- kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
- sizeof(long),
- GFP_KERNEL);
- if (!ibdev->ib_uc_qpns_bitmap)
+ ibdev->ib_uc_qpns_bitmap = bitmap_alloc(ibdev->steer_qpn_count,
+ GFP_KERNEL);
+ if (!ibdev->ib_uc_qpns_bitmap) {
+ err = -ENOMEM;
goto err_steer_qp_release;
+ }
if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) {
bitmap_zero(ibdev->ib_uc_qpns_bitmap,
@@ -2849,39 +2808,34 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
- if (mlx4_ib_alloc_diag_counters(ibdev))
+ err = mlx4_ib_alloc_diag_counters(ibdev);
+ if (err)
goto err_steer_free_bitmap;
- if (ib_register_device(&ibdev->ib_dev, NULL))
+ err = ib_register_device(&ibdev->ib_dev, "mlx4_%d",
+ &dev->persist->pdev->dev);
+ if (err)
goto err_diag_counters;
- if (mlx4_ib_mad_init(ibdev))
+ err = mlx4_ib_mad_init(ibdev);
+ if (err)
goto err_reg;
- if (mlx4_ib_init_sriov(ibdev))
+ err = mlx4_ib_init_sriov(ibdev);
+ if (err)
goto err_mad;
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
- dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
- if (!iboe->nb.notifier_call) {
- iboe->nb.notifier_call = mlx4_ib_netdev_event;
- err = register_netdevice_notifier(&iboe->nb);
- if (err) {
- iboe->nb.notifier_call = NULL;
- goto err_notif;
- }
- }
- if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
- err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
- if (err) {
- goto err_notif;
- }
+ if (!iboe->nb.notifier_call) {
+ iboe->nb.notifier_call = mlx4_ib_netdev_event;
+ err = register_netdevice_notifier(&iboe->nb);
+ if (err) {
+ iboe->nb.notifier_call = NULL;
+ goto err_notif;
}
}
-
- for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
- if (device_create_file(&ibdev->ib_dev.dev,
- mlx4_class_attributes[j]))
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
+ if (err)
goto err_notif;
}
@@ -2902,7 +2856,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
do_slave_init(ibdev, j, 1);
}
}
- return ibdev;
+
+ /* register mlx4 core notifier */
+ ibdev->mlx_nb.notifier_call = mlx4_ib_event;
+ err = mlx4_register_event_notifier(dev, &ibdev->mlx_nb);
+ WARN(err, "failed to register mlx4 event notifier (%d)", err);
+
+ auxiliary_set_drvdata(adev, ibdev);
+ return 0;
err_notif:
if (ibdev->iboe.nb.notifier_call) {
@@ -2924,17 +2885,17 @@ err_diag_counters:
mlx4_ib_diag_cleanup(ibdev);
err_steer_free_bitmap:
- kfree(ibdev->ib_uc_qpns_bitmap);
+ bitmap_free(ibdev->ib_uc_qpns_bitmap);
err_steer_qp_release:
- if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
- mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
- ibdev->steer_qpn_count);
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
err_counter:
for (i = 0; i < ibdev->num_ports; ++i)
mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
err_map:
+ mlx4_ib_free_eqs(dev, ibdev);
iounmap(ibdev->uar_map);
err_uar:
@@ -2946,7 +2907,7 @@ err_pd:
err_dealloc:
ib_dealloc_device(&ibdev->ib_dev);
- return NULL;
+ return err;
}
int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
@@ -2971,7 +2932,10 @@ void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
return;
- BUG_ON(qpn < dev->steer_qpn_base);
+ if (WARN(qpn < dev->steer_qpn_base, "qpn = %u, steer_qpn_base = %u\n",
+ qpn, dev->steer_qpn_base))
+ /* not supposed to be here */
+ return;
bitmap_release_region(dev->ib_uc_qpns_bitmap,
qpn - dev->steer_qpn_base,
@@ -2983,7 +2947,7 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
{
int err;
size_t flow_size;
- struct ib_flow_attr *flow = NULL;
+ struct ib_flow_attr *flow;
struct ib_flow_spec_ib *ib_spec;
if (is_attach) {
@@ -3001,43 +2965,44 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
/* Add an empty rule for IB L2 */
memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
- err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
- IB_FLOW_DOMAIN_NIC,
- MLX4_FS_REGULAR,
- &mqp->reg_id);
- } else {
- err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
+ err = __mlx4_ib_create_flow(&mqp->ibqp, flow, MLX4_DOMAIN_NIC,
+ MLX4_FS_REGULAR, &mqp->reg_id);
+ kfree(flow);
+ return err;
}
- kfree(flow);
- return err;
+
+ return __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
}
-static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
+static void mlx4_ib_remove(struct auxiliary_device *adev)
{
- struct mlx4_ib_dev *ibdev = ibdev_ptr;
+ struct mlx4_adev *madev = container_of(adev, struct mlx4_adev, adev);
+ struct mlx4_dev *dev = madev->mdev;
+ struct mlx4_ib_dev *ibdev = auxiliary_get_drvdata(adev);
int p;
int i;
+ mlx4_unregister_event_notifier(dev, &ibdev->mlx_nb);
+
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
devlink_port_type_clear(mlx4_get_devlink_port(dev, i));
ibdev->ib_active = false;
flush_workqueue(wq);
- mlx4_ib_close_sriov(ibdev);
- mlx4_ib_mad_cleanup(ibdev);
- ib_unregister_device(&ibdev->ib_dev);
- mlx4_ib_diag_cleanup(ibdev);
if (ibdev->iboe.nb.notifier_call) {
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
- if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
- mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
- ibdev->steer_qpn_count);
- kfree(ibdev->ib_uc_qpns_bitmap);
- }
+ mlx4_ib_close_sriov(ibdev);
+ mlx4_ib_mad_cleanup(ibdev);
+ ib_unregister_device(&ibdev->ib_dev);
+ mlx4_ib_diag_cleanup(ibdev);
+
+ mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_count);
+ bitmap_free(ibdev->ib_uc_qpns_bitmap);
iounmap(ibdev->uar_map);
for (p = 0; p < ibdev->num_ports; ++p)
@@ -3055,7 +3020,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
{
- struct mlx4_ib_demux_work **dm = NULL;
+ struct mlx4_ib_demux_work **dm;
struct mlx4_dev *dev = ibdev->dev;
int i;
unsigned long flags;
@@ -3239,11 +3204,13 @@ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
}
}
-static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
- enum mlx4_dev_event event, unsigned long param)
+static int mlx4_ib_event(struct notifier_block *this, unsigned long event,
+ void *param)
{
+ struct mlx4_ib_dev *ibdev =
+ container_of(this, struct mlx4_ib_dev, mlx_nb);
+ struct mlx4_dev *dev = ibdev->dev;
struct ib_event ibev;
- struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
struct mlx4_eqe *eqe = NULL;
struct ib_event_work *ew;
int p = 0;
@@ -3253,22 +3220,28 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
(event == MLX4_DEV_EVENT_PORT_DOWN))) {
ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
if (!ew)
- return;
+ return NOTIFY_DONE;
INIT_WORK(&ew->work, handle_bonded_port_state_event);
ew->ib_dev = ibdev;
queue_work(wq, &ew->work);
- return;
+ return NOTIFY_DONE;
}
- if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
+ switch (event) {
+ case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
+ break;
+ case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
eqe = (struct mlx4_eqe *)param;
- else
- p = (int) param;
+ break;
+ default:
+ p = *(int *)param;
+ break;
+ }
switch (event) {
case MLX4_DEV_EVENT_PORT_UP:
if (p > ibdev->num_ports)
- return;
+ return NOTIFY_DONE;
if (!mlx4_is_slave(dev) &&
rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
IB_LINK_LAYER_INFINIBAND) {
@@ -3283,7 +3256,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_PORT_DOWN:
if (p > ibdev->num_ports)
- return;
+ return NOTIFY_DONE;
ibev.event = IB_EVENT_PORT_ERR;
break;
@@ -3296,7 +3269,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
ew = kmalloc(sizeof *ew, GFP_ATOMIC);
if (!ew)
- break;
+ return NOTIFY_DONE;
INIT_WORK(&ew->work, handle_port_mgmt_change_event);
memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
@@ -3306,7 +3279,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
queue_work(wq, &ew->work);
else
handle_port_mgmt_change_event(&ew->work);
- return;
+ return NOTIFY_DONE;
case MLX4_DEV_EVENT_SLAVE_INIT:
/* here, p is the slave id */
@@ -3322,7 +3295,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1);
}
}
- return;
+ return NOTIFY_DONE;
case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
if (mlx4_is_master(dev)) {
@@ -3338,22 +3311,33 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
}
/* here, p is the slave id */
do_slave_init(ibdev, p, 0);
- return;
+ return NOTIFY_DONE;
default:
- return;
+ return NOTIFY_DONE;
}
- ibev.device = ibdev_ptr;
+ ibev.device = &ibdev->ib_dev;
ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
ib_dispatch_event(&ibev);
+ return NOTIFY_DONE;
}
-static struct mlx4_interface mlx4_ib_interface = {
- .add = mlx4_ib_add,
- .remove = mlx4_ib_remove,
- .event = mlx4_ib_event,
+static const struct auxiliary_device_id mlx4_ib_id_table[] = {
+ { .name = MLX4_ADEV_NAME ".ib" },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx4_ib_id_table);
+
+static struct mlx4_adrv mlx4_ib_adrv = {
+ .adrv = {
+ .name = "ib",
+ .probe = mlx4_ib_probe,
+ .remove = mlx4_ib_remove,
+ .id_table = mlx4_ib_id_table,
+ },
.protocol = MLX4_PROT_IB_IPV6,
.flags = MLX4_INTFF_BONDING
};
@@ -3366,11 +3350,19 @@ static int __init mlx4_ib_init(void)
if (!wq)
return -ENOMEM;
- err = mlx4_ib_mcg_init();
+ err = mlx4_ib_qp_event_init();
+ if (err)
+ goto clean_qp_event;
+
+ err = mlx4_ib_cm_init();
if (err)
goto clean_wq;
- err = mlx4_register_interface(&mlx4_ib_interface);
+ err = mlx4_ib_mcg_init();
+ if (err)
+ goto clean_cm;
+
+ err = mlx4_register_auxiliary_driver(&mlx4_ib_adrv);
if (err)
goto clean_mcg;
@@ -3379,15 +3371,23 @@ static int __init mlx4_ib_init(void)
clean_mcg:
mlx4_ib_mcg_destroy();
+clean_cm:
+ mlx4_ib_cm_destroy();
+
clean_wq:
+ mlx4_ib_qp_event_cleanup();
+
+clean_qp_event:
destroy_workqueue(wq);
return err;
}
static void __exit mlx4_ib_cleanup(void)
{
- mlx4_unregister_interface(&mlx4_ib_interface);
+ mlx4_unregister_auxiliary_driver(&mlx4_ib_adrv);
mlx4_ib_mcg_destroy();
+ mlx4_ib_cm_destroy();
+ mlx4_ib_qp_event_cleanup();
destroy_workqueue(wq);
}
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index e010fe459e67..e279e69b9a51 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -43,7 +43,7 @@
#define MAX_VFS 80
#define MAX_PEND_REQS_PER_FUNC 4
-#define MAD_TIMEOUT_MS 2000
+#define MAD_TIMEOUT_SEC 2
#define mcg_warn(fmt, arg...) pr_warn("MCG WARNING: " fmt, ##arg)
#define mcg_error(fmt, arg...) pr_err(fmt, ##arg)
@@ -209,7 +209,7 @@ static struct mcast_group *mcast_insert(struct mlx4_ib_demux_ctx *ctx,
static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
{
struct mlx4_ib_dev *dev = ctx->dev;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
spin_lock_irqsave(&dev->sm_lock, flags);
@@ -231,20 +231,20 @@ static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
struct mlx4_ib_dev *dev = ctx->dev;
struct ib_mad_agent *agent = dev->send_agent[ctx->port - 1][1];
struct ib_wc wc;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
/* Our agent might not yet be registered when mads start to arrive */
if (!agent)
return -EAGAIN;
- ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
+ rdma_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
if (ib_find_cached_pkey(&dev->ib_dev, ctx->port, IB_DEFAULT_PKEY_FULL, &wc.pkey_index))
return -EINVAL;
wc.sl = 0;
wc.dlid_path_bits = 0;
wc.port_num = ctx->port;
- wc.slid = ah_attr.dlid; /* opensm lid */
+ wc.slid = rdma_ah_get_dlid(&ah_attr); /* opensm lid */
wc.src_qp = 1;
return mlx4_ib_send_to_slave(dev, slave, ctx->port, IB_QPT_GSI, &wc, NULL, mad);
}
@@ -270,7 +270,7 @@ static int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad
if (!ret) {
/* calls mlx4_ib_mcg_timeout_handler */
queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
- msecs_to_jiffies(MAD_TIMEOUT_MS));
+ secs_to_jiffies(MAD_TIMEOUT_SEC));
}
return ret;
@@ -309,7 +309,7 @@ static int send_leave_to_wire(struct mcast_group *group, u8 join_state)
if (!ret) {
/* calls mlx4_ib_mcg_timeout_handler */
queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
- msecs_to_jiffies(MAD_TIMEOUT_MS));
+ secs_to_jiffies(MAD_TIMEOUT_SEC));
}
return ret;
@@ -673,7 +673,7 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
if (!list_empty(&group->pending_list))
req = list_first_entry(&group->pending_list,
struct mcast_req, group_list);
- if ((method == IB_MGMT_METHOD_GET_RESP)) {
+ if (method == IB_MGMT_METHOD_GET_RESP) {
if (req) {
send_reply_to_slave(req->func, group, &req->sa_mad, status);
--group->func[req->func].num_pend_reqs;
@@ -808,8 +808,7 @@ static ssize_t sysfs_show_group(struct device *dev,
struct device_attribute *attr, char *buf);
static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx,
- union ib_gid *mgid, int create,
- gfp_t gfp_mask)
+ union ib_gid *mgid, int create)
{
struct mcast_group *group, *cur_group;
int is_mgid0;
@@ -825,7 +824,7 @@ static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx,
if (!create)
return ERR_PTR(-ENOENT);
- group = kzalloc(sizeof *group, gfp_mask);
+ group = kzalloc(sizeof(*group), GFP_KERNEL);
if (!group)
return ERR_PTR(-ENOMEM);
@@ -892,7 +891,7 @@ int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
case IB_MGMT_METHOD_GET_RESP:
case IB_SA_METHOD_DELETE_RESP:
mutex_lock(&ctx->mcg_table_lock);
- group = acquire_group(ctx, &rec->mgid, 0, GFP_KERNEL);
+ group = acquire_group(ctx, &rec->mgid, 0);
mutex_unlock(&ctx->mcg_table_lock);
if (IS_ERR(group)) {
if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) {
@@ -945,6 +944,7 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
switch (sa_mad->mad_hdr.method) {
case IB_MGMT_METHOD_SET:
may_create = 1;
+ fallthrough;
case IB_SA_METHOD_DELETE:
req = kzalloc(sizeof *req, GFP_KERNEL);
if (!req)
@@ -954,7 +954,7 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
req->sa_mad = *sa_mad;
mutex_lock(&ctx->mcg_table_lock);
- group = acquire_group(ctx, &rec->mgid, may_create, GFP_KERNEL);
+ group = acquire_group(ctx, &rec->mgid, may_create);
mutex_unlock(&ctx->mcg_table_lock);
if (IS_ERR(group)) {
kfree(req);
@@ -988,53 +988,63 @@ int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
}
static ssize_t sysfs_show_group(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr, char *buf)
{
struct mcast_group *group =
container_of(attr, struct mcast_group, dentry);
struct mcast_req *req = NULL;
- char pending_str[40];
char state_str[40];
- ssize_t len = 0;
- int f;
+ char pending_str[40];
+ int len;
+ int i;
+ u32 hoplimit;
if (group->state == MCAST_IDLE)
- sprintf(state_str, "%s", get_state_string(group->state));
+ scnprintf(state_str, sizeof(state_str), "%s",
+ get_state_string(group->state));
else
- sprintf(state_str, "%s(TID=0x%llx)",
- get_state_string(group->state),
- be64_to_cpu(group->last_req_tid));
+ scnprintf(state_str, sizeof(state_str), "%s(TID=0x%llx)",
+ get_state_string(group->state),
+ be64_to_cpu(group->last_req_tid));
+
if (list_empty(&group->pending_list)) {
- sprintf(pending_str, "No");
+ scnprintf(pending_str, sizeof(pending_str), "No");
} else {
- req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
- sprintf(pending_str, "Yes(TID=0x%llx)",
- be64_to_cpu(req->sa_mad.mad_hdr.tid));
+ req = list_first_entry(&group->pending_list, struct mcast_req,
+ group_list);
+ scnprintf(pending_str, sizeof(pending_str), "Yes(TID=0x%llx)",
+ be64_to_cpu(req->sa_mad.mad_hdr.tid));
}
- len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ",
- group->rec.scope_join_state & 0xf,
- group->members[2], group->members[1], group->members[0],
- atomic_read(&group->refcount),
- pending_str,
- state_str);
- for (f = 0; f < MAX_VFS; ++f)
- if (group->func[f].state == MCAST_MEMBER)
- len += sprintf(buf + len, "%d[%1x] ",
- f, group->func[f].join_state);
-
- len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x "
- "%4x %4x %2x %2x)\n",
- be16_to_cpu(group->rec.pkey),
- be32_to_cpu(group->rec.qkey),
- (group->rec.mtusel_mtu & 0xc0) >> 6,
- group->rec.mtusel_mtu & 0x3f,
- group->rec.tclass,
- (group->rec.ratesel_rate & 0xc0) >> 6,
- group->rec.ratesel_rate & 0x3f,
- (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28,
- (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8,
- be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff,
- group->rec.proxy_join);
+
+ len = sysfs_emit(buf, "%1d [%02d,%02d,%02d] %4d %4s %5s ",
+ group->rec.scope_join_state & 0xf,
+ group->members[2],
+ group->members[1],
+ group->members[0],
+ atomic_read(&group->refcount),
+ pending_str,
+ state_str);
+
+ for (i = 0; i < MAX_VFS; i++) {
+ if (group->func[i].state == MCAST_MEMBER)
+ len += sysfs_emit_at(buf, len, "%d[%1x] ", i,
+ group->func[i].join_state);
+ }
+
+ hoplimit = be32_to_cpu(group->rec.sl_flowlabel_hoplimit);
+ len += sysfs_emit_at(buf, len,
+ "\t\t(%4hx %4x %2x %2x %2x %2x %2x %4x %4x %2x %2x)\n",
+ be16_to_cpu(group->rec.pkey),
+ be32_to_cpu(group->rec.qkey),
+ (group->rec.mtusel_mtu & 0xc0) >> 6,
+ (group->rec.mtusel_mtu & 0x3f),
+ group->rec.tclass,
+ (group->rec.ratesel_rate & 0xc0) >> 6,
+ (group->rec.ratesel_rate & 0x3f),
+ (hoplimit & 0xf0000000) >> 28,
+ (hoplimit & 0x0fffff00) >> 8,
+ (hoplimit & 0x000000ff),
+ group->rec.proxy_join);
return len;
}
@@ -1081,7 +1091,7 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy
for (i = 0; i < MAX_VFS; ++i)
clean_vf_mcast(ctx, i);
- end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000);
+ end = jiffies + secs_to_jiffies(MAD_TIMEOUT_SEC + 3);
do {
count = 0;
mutex_lock(&ctx->mcg_table_lock);
@@ -1091,7 +1101,7 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy
if (!count)
break;
- msleep(1);
+ usleep_range(1000, 2000);
} while (time_after(end, jiffies));
flush_workqueue(ctx->mcg_wq);
@@ -1102,7 +1112,8 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy
while ((p = rb_first(&ctx->mcg_table)) != NULL) {
group = rb_entry(p, struct mcast_group, node);
if (atomic_read(&group->refcount))
- mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group);
+ mcg_debug_group(group, "group refcount %d!!! (pointer %p)\n",
+ atomic_read(&group->refcount), group);
force_clean_group(group);
}
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 7f3d976d81ed..5df5b955114e 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -38,6 +38,7 @@
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/idr.h>
+#include <linux/notifier.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
@@ -46,6 +47,8 @@
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
+#include <linux/mlx4/qp.h>
+#include <linux/mlx4/cq.h>
#define MLX4_IB_DRV_NAME "mlx4_ib"
@@ -55,7 +58,7 @@
#define pr_fmt(fmt) "<" MLX4_IB_DRV_NAME "> %s: " fmt, __func__
#define mlx4_ib_warn(ibdev, format, arg...) \
- dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg)
+ dev_warn((ibdev)->dev.parent, MLX4_IB_DRV_NAME ": " format, ## arg)
enum {
MLX4_IB_SQ_MIN_WQE_SHIFT = 6,
@@ -78,16 +81,13 @@ enum hw_bar_type {
HW_BAR_COUNT
};
-struct mlx4_ib_vma_private_data {
- struct vm_area_struct *vma;
-};
-
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
struct list_head db_page_list;
struct mutex db_page_mutex;
- struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT];
+ struct list_head wqn_ranges_list;
+ struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */
};
struct mlx4_ib_pd {
@@ -147,11 +147,6 @@ struct mlx4_ib_mw {
struct mlx4_mw mmw;
};
-struct mlx4_ib_fmr {
- struct ib_fmr ibfmr;
- struct mlx4_fmr mfmr;
-};
-
#define MAX_REGS_PER_FLOW 2
struct mlx4_flow_reg_id {
@@ -185,7 +180,7 @@ enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
- MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
+ MLX4_IB_QP_SCATTER_FCS = IB_QP_CREATE_SCATTER_FCS,
/* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
@@ -239,7 +234,8 @@ enum mlx4_ib_mad_ifc_flags {
};
enum {
- MLX4_NUM_TUNNEL_BUFS = 256,
+ MLX4_NUM_TUNNEL_BUFS = 512,
+ MLX4_NUM_WIRE_BUFS = 2048,
};
struct mlx4_ib_tunnel_header {
@@ -290,8 +286,45 @@ struct mlx4_roce_smac_vlan_info {
int update_vid;
};
+struct mlx4_wqn_range {
+ int base_wqn;
+ int size;
+ int refcount;
+ bool dirty;
+ struct list_head list;
+};
+
+struct mlx4_ib_rss {
+ unsigned int base_qpn_tbl_sz;
+ u8 flags;
+ u8 rss_key[MLX4_EN_RSS_KEY_SIZE];
+};
+
+enum {
+ /*
+ * Largest possible UD header: send with GRH and immediate
+ * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
+ * tag. (LRH would only use 8 bytes, so Ethernet is the
+ * biggest case)
+ */
+ MLX4_IB_UD_HEADER_SIZE = 82,
+ MLX4_IB_LSO_HEADER_SPARE = 128,
+};
+
+struct mlx4_ib_sqp {
+ int pkey_index;
+ u32 qkey;
+ u32 send_psn;
+ struct ib_ud_header ud_header;
+ u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
+ struct ib_qp *roce_v2_gsi;
+};
+
struct mlx4_ib_qp {
- struct ib_qp ibqp;
+ union {
+ struct ib_qp ibqp;
+ struct ib_wq ibwq;
+ };
struct mlx4_qp mqp;
struct mlx4_buf buf;
@@ -301,7 +334,6 @@ struct mlx4_ib_qp {
u32 doorbell_qpn;
__be32 sq_signal_bits;
unsigned sq_next_wqe;
- int sq_max_wqes_per_wr;
int sq_spare_wqes;
struct mlx4_ib_wq sq;
@@ -319,6 +351,7 @@ struct mlx4_ib_qp {
u8 sq_no_prefetch;
u8 state;
int mlx_type;
+ u32 inl_recv_sz;
struct list_head gid_list;
struct list_head steering_rules;
struct mlx4_ib_buf *sqp_proxy_rcv;
@@ -329,6 +362,13 @@ struct mlx4_ib_qp {
struct list_head cq_recv_list;
struct list_head cq_send_list;
struct counter_index *counter_index;
+ struct mlx4_wqn_range *wqn_range;
+ /* Number of RSS QP parents that uses this WQ */
+ u32 rss_usecnt;
+ union {
+ struct mlx4_ib_rss *rss_ctx;
+ struct mlx4_ib_sqp *sqp;
+ };
};
struct mlx4_ib_srq {
@@ -351,6 +391,10 @@ struct mlx4_ib_ah {
union mlx4_ext_av av;
};
+struct mlx4_ib_rwq_ind_table {
+ struct ib_rwq_ind_table ib_rwq_ind_tbl;
+};
+
/****************************************/
/* alias guid support */
/****************************************/
@@ -386,7 +430,7 @@ struct mlx4_sriov_alias_guid_port_rec_det {
struct mlx4_sriov_alias_guid_info_rec_det all_rec_per_port[NUM_ALIAS_GUID_REC_IN_PORT];
struct workqueue_struct *wq;
struct delayed_work alias_guid_work;
- u8 port;
+ u32 port;
u32 state_flags;
struct mlx4_sriov_alias_guid *parent;
struct list_head cb_list;
@@ -439,6 +483,7 @@ struct mlx4_ib_demux_pv_ctx {
struct ib_pd *pd;
struct work_struct work;
struct workqueue_struct *wq;
+ struct workqueue_struct *wi_wq;
struct mlx4_ib_demux_pv_qp qp[2];
};
@@ -446,6 +491,7 @@ struct mlx4_ib_demux_ctx {
struct ib_device *ib_dev;
int port;
struct workqueue_struct *wq;
+ struct workqueue_struct *wi_wq;
struct workqueue_struct *ud_wq;
spinlock_t ud_lock;
atomic64_t subnet_prefix;
@@ -472,10 +518,12 @@ struct mlx4_ib_sriov {
struct mlx4_sriov_alias_guid alias_guid;
/* CM paravirtualization fields */
- struct list_head cm_list;
+ struct xarray pv_id_table;
+ u32 pv_id_next;
spinlock_t id_map_lock;
struct rb_root sl_id_map;
- struct idr pv_id_table;
+ struct list_head cm_list;
+ struct xarray xa_rej_tmout;
};
struct gid_cache_context {
@@ -487,6 +535,7 @@ struct gid_entry {
union ib_gid gid;
enum ib_gid_type gid_type;
struct gid_cache_context *ctx;
+ u16 vlan_id;
};
struct mlx4_port_gid_table {
@@ -499,6 +548,7 @@ struct mlx4_ib_iboe {
atomic64_t mac[MLX4_MAX_PORTS];
struct notifier_block nb;
struct mlx4_port_gid_table gids[MLX4_MAX_PORTS];
+ enum ib_port_state last_port_state[MLX4_MAX_PORTS];
};
struct pkey_mgt {
@@ -552,7 +602,7 @@ struct mlx4_ib_counters {
#define MLX4_DIAG_COUNTERS_TYPES 2
struct mlx4_ib_diag_counters {
- const char **name;
+ struct rdma_stat_desc *descs;
u32 *offset;
u32 num_counters;
};
@@ -595,6 +645,7 @@ struct mlx4_ib_dev {
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
struct mlx4_ib_diag_counters diag_counters[MLX4_DIAG_COUNTERS_TYPES];
+ struct notifier_block mlx_nb;
};
struct ib_event_work {
@@ -608,7 +659,7 @@ struct mlx4_ib_qp_tunnel_init_attr {
struct ib_qp_init_attr init_attr;
int slave;
enum ib_qp_type proxy_qp_type;
- u8 port;
+ u32 port;
};
struct mlx4_uverbs_ex_query_device {
@@ -616,15 +667,8 @@ struct mlx4_uverbs_ex_query_device {
__u32 reserved;
};
-enum query_device_resp_mask {
- QUERY_DEVICE_RESP_MASK_TIMESTAMP = 1UL << 0,
-};
-
-struct mlx4_uverbs_ex_query_device_resp {
- __u32 comp_mask;
- __u32 response_length;
- __u64 hca_core_clock_offset;
-};
+/* 4k - 4G */
+#define MLX4_PAGE_SIZE_SUPPORTED ((unsigned long)GENMASK_ULL(31, 12))
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
{
@@ -666,11 +710,6 @@ static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw)
return container_of(ibmw, struct mlx4_ib_mw, ibmw);
}
-static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
-{
- return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
-}
-
static inline struct mlx4_ib_flow *to_mflow(struct ib_flow *ibflow)
{
return container_of(ibflow, struct mlx4_ib_flow, ibflow);
@@ -711,7 +750,7 @@ static inline u8 mlx4_ib_bond_next_port(struct mlx4_ib_dev *dev)
int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
-int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
+int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt,
struct mlx4_db *db);
void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db);
@@ -720,85 +759,80 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem);
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
-int mlx4_ib_dereg_mr(struct ib_mr *mr);
-struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
+int mlx4_ib_dereg_mr(struct ib_mr *mr, struct ib_udata *udata);
+int mlx4_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int mlx4_ib_dealloc_mw(struct ib_mw *mw);
-struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
+struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
-struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata);
-int mlx4_ib_destroy_cq(struct ib_cq *cq);
+int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct ib_udata *udata);
-int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
-int mlx4_ib_destroy_ah(struct ib_ah *ah);
+int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr,
+ int slave_sgid_index, u8 *s_mac, u16 vlan_tag);
+int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
+static inline int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return 0;
+}
-struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata);
+int mlx4_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata);
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
-int mlx4_ib_destroy_srq(struct ib_srq *srq);
+int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
-int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr);
-
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata);
-int mlx4_ib_destroy_qp(struct ib_qp *qp);
+int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+
+int mlx4_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata);
+void mlx4_ib_drain_sq(struct ib_qp *qp);
+void mlx4_ib_drain_rq(struct ib_qp *qp);
int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr);
-int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr);
-int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr);
+int mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr);
+int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
-int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index);
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
int mlx4_ib_mad_init(struct mlx4_ib_dev *dev);
void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev);
-struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int mr_access_flags,
- struct ib_fmr_attr *fmr_attr);
-int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
- u64 iova);
-int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
-int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
-int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
+int __mlx4_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props, int netw_view);
-int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+int __mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey, int netw_view);
-int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+int __mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid, int netw_view);
static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
- u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
+ u32 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET)
return true;
@@ -812,7 +846,7 @@ void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave);
int mlx4_ib_mcg_init(void);
void mlx4_ib_mcg_destroy(void);
-int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid);
+int mlx4_ib_find_real_gid(struct ib_device *ibdev, u32 port, __be64 guid);
int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave,
struct ib_sa_mad *sa_mad);
@@ -822,18 +856,18 @@ int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid);
-void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
+void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u32 port_num,
enum ib_event_type type);
void mlx4_ib_tunnels_update_work(struct work_struct *work);
-int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
+int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u32 port,
enum ib_qp_type qpt, struct ib_wc *wc,
struct ib_grh *grh, struct ib_mad *mad);
-int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
+int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u32 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
- u32 qkey, struct ib_ah_attr *attr, u8 *s_mac,
+ u32 qkey, struct rdma_ah_attr *attr, u8 *s_mac,
u16 vlan_id, struct ib_mad *mad);
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
@@ -855,10 +889,10 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port);
void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
int block_num,
- u8 port_num, u8 *p_data);
+ u32 port_num, u8 *p_data);
void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev,
- int block_num, u8 port_num,
+ int block_num, u32 port_num,
u8 *p_data);
int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
@@ -879,16 +913,51 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
int is_attach);
-int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
- u64 start, u64 length, u64 virt_addr,
- int mr_access_flags, struct ib_pd *pd,
- struct ib_udata *udata);
+struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata);
int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
- u8 port_num, int index);
+ const struct ib_gid_attr *attr);
void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
int port);
void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port);
+struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
+int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata);
+
+int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
+static inline int
+mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table)
+{
+ return 0;
+}
+static inline int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
+ u64 start,
+ int *num_of_mtts)
+{
+ unsigned long pg_sz;
+
+ pg_sz = ib_umem_find_best_pgsz(umem, MLX4_PAGE_SIZE_SUPPORTED, start);
+ if (!pg_sz)
+ return -EOPNOTSUPP;
+
+ *num_of_mtts = ib_umem_num_dma_blocks(umem, pg_sz);
+ return order_base_2(pg_sz);
+}
+
+int mlx4_ib_cm_init(void);
+void mlx4_ib_cm_destroy(void);
+
+int mlx4_ib_qp_event_init(void);
+void mlx4_ib_qp_event_cleanup(void);
+
#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 5d73989d9771..94464f1694d9 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -90,49 +90,56 @@ err_free:
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
- u64 *pages;
- int i, k, entry;
- int n;
- int len;
- int err = 0;
- struct scatterlist *sg;
+ struct ib_block_iter biter;
+ int err, i = 0;
+ u64 addr;
- pages = (u64 *) __get_free_page(GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
+ rdma_umem_for_each_dma_block(umem, &biter, BIT(mtt->page_shift)) {
+ addr = rdma_block_iter_dma_address(&biter);
+ err = mlx4_write_mtt(dev->dev, mtt, i++, 1, &addr);
+ if (err)
+ return err;
+ }
+ return 0;
+}
- i = n = 0;
-
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> mtt->page_shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(sg) +
- umem->page_size * k;
- /*
- * Be friendly to mlx4_write_mtt() and
- * pass it chunks of appropriate size.
- */
- if (i == PAGE_SIZE / sizeof (u64)) {
- err = mlx4_write_mtt(dev->dev, mtt, n,
- i, pages);
- if (err)
- goto out;
- n += i;
- i = 0;
- }
+static struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start,
+ u64 length, int access_flags)
+{
+ /*
+ * Force registering the memory as writable if the underlying pages
+ * are writable. This is so rereg can change the access permissions
+ * from readable to writable without having to run through ib_umem_get
+ * again
+ */
+ if (!ib_access_writable(access_flags)) {
+ unsigned long untagged_start = untagged_addr(start);
+ struct vm_area_struct *vma;
+
+ mmap_read_lock(current->mm);
+ /*
+ * FIXME: Ideally this would iterate over all the vmas that
+ * cover the memory, but for now it requires a single vma to
+ * entirely cover the MR to support RO mappings.
+ */
+ vma = find_vma(current->mm, untagged_start);
+ if (vma && vma->vm_end >= untagged_start + length &&
+ vma->vm_start <= untagged_start) {
+ if (vma->vm_flags & VM_WRITE)
+ access_flags |= IB_ACCESS_LOCAL_WRITE;
+ } else {
+ access_flags |= IB_ACCESS_LOCAL_WRITE;
}
- }
- if (i)
- err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
+ mmap_read_unlock(current->mm);
+ }
-out:
- free_page((unsigned long) pages);
- return err;
+ return ib_umem_get(device, start, length, access_flags);
}
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
@@ -141,21 +148,24 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int err;
int n;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
- /* Force registering the memory as writable. */
- /* Used for memory re-registeration. HCA protects the access */
- mr->umem = ib_umem_get(pd->uobject->context, start, length,
- access_flags | IB_ACCESS_LOCAL_WRITE, 0);
+ mr->umem = mlx4_get_umem_mr(pd->device, start, length, access_flags);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
}
- n = ib_umem_page_count(mr->umem);
- shift = ilog2(mr->umem->page_size);
+ shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n);
+ if (shift < 0) {
+ err = shift;
+ goto err_umem;
+ }
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
convert_access(access_flags), n, shift, &mr->mmr);
@@ -171,6 +181,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
+ mr->ibmr.page_size = 1U << shift;
return &mr->ibmr;
@@ -186,10 +197,10 @@ err_free:
return ERR_PTR(err);
}
-int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
- u64 start, u64 length, u64 virt_addr,
- int mr_access_flags, struct ib_pd *pd,
- struct ib_udata *udata)
+struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start,
+ u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_pd *pd,
+ struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(mr->device);
struct mlx4_ib_mr *mmr = to_mmr(mr);
@@ -202,9 +213,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
* race exists.
*/
err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry);
-
if (err)
- return err;
+ return ERR_PTR(err);
if (flags & IB_MR_REREG_PD) {
err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry,
@@ -215,6 +225,12 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
}
if (flags & IB_MR_REREG_ACCESS) {
+ if (ib_access_writable(mr_access_flags) &&
+ !mmr->umem->writable) {
+ err = -EPERM;
+ goto release_mpt_entry;
+ }
+
err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
convert_access(mr_access_flags));
@@ -228,18 +244,16 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
ib_umem_release(mmr->umem);
- mmr->umem = ib_umem_get(mr->uobject->context, start, length,
- mr_access_flags |
- IB_ACCESS_LOCAL_WRITE,
- 0);
+ mmr->umem = mlx4_get_umem_mr(mr->device, start, length,
+ mr_access_flags);
if (IS_ERR(mmr->umem)) {
err = PTR_ERR(mmr->umem);
/* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */
mmr->umem = NULL;
goto release_mpt_entry;
}
- n = ib_umem_page_count(mmr->umem);
- shift = ilog2(mmr->umem->page_size);
+ n = ib_umem_num_dma_blocks(mmr->umem, PAGE_SIZE);
+ shift = PAGE_SHIFT;
err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
virt_addr, length, n, shift,
@@ -268,8 +282,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
release_mpt_entry:
mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
-
- return err;
+ if (err)
+ return ERR_PTR(err);
+ return NULL;
}
static int
@@ -292,10 +307,10 @@ mlx4_alloc_priv_pages(struct ib_device *device,
if (!mr->pages)
return -ENOMEM;
- mr->page_map = dma_map_single(device->dma_device, mr->pages,
+ mr->page_map = dma_map_single(device->dev.parent, mr->pages,
mr->page_map_size, DMA_TO_DEVICE);
- if (dma_mapping_error(device->dma_device, mr->page_map)) {
+ if (dma_mapping_error(device->dev.parent, mr->page_map)) {
ret = -ENOMEM;
goto err;
}
@@ -313,14 +328,14 @@ mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
if (mr->pages) {
struct ib_device *device = mr->ibmr.device;
- dma_unmap_single(device->dma_device, mr->page_map,
+ dma_unmap_single(device->dev.parent, mr->page_map,
mr->page_map_size, DMA_TO_DEVICE);
free_page((unsigned long)mr->pages);
mr->pages = NULL;
}
}
-int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
+int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
int ret;
@@ -337,37 +352,27 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
return 0;
}
-struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata)
+int mlx4_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
{
- struct mlx4_ib_dev *dev = to_mdev(pd->device);
- struct mlx4_ib_mw *mw;
+ struct mlx4_ib_dev *dev = to_mdev(ibmw->device);
+ struct mlx4_ib_mw *mw = to_mmw(ibmw);
int err;
- mw = kmalloc(sizeof(*mw), GFP_KERNEL);
- if (!mw)
- return ERR_PTR(-ENOMEM);
-
- err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn,
- to_mlx4_type(type), &mw->mmw);
+ err = mlx4_mw_alloc(dev->dev, to_mpd(ibmw->pd)->pdn,
+ to_mlx4_type(ibmw->type), &mw->mmw);
if (err)
- goto err_free;
+ return err;
err = mlx4_mw_enable(dev->dev, &mw->mmw);
if (err)
goto err_mw;
- mw->ibmw.rkey = mw->mmw.key;
-
- return &mw->ibmw;
+ ibmw->rkey = mw->mmw.key;
+ return 0;
err_mw:
mlx4_mw_free(dev->dev, &mw->mmw);
-
-err_free:
- kfree(mw);
-
- return ERR_PTR(err);
+ return err;
}
int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
@@ -375,13 +380,10 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
struct mlx4_ib_mw *mw = to_mmw(ibmw);
mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
- kfree(mw);
-
return 0;
}
-struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
+struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg)
{
struct mlx4_ib_dev *dev = to_mdev(pd->device);
@@ -406,7 +408,6 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
goto err_free_mr;
mr->max_pages = max_num_sg;
-
err = mlx4_mr_enable(dev->dev, &mr->mmr);
if (err)
goto err_free_pl;
@@ -417,6 +418,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
return &mr->ibmr;
err_free_pl:
+ mr->ibmr.device = pd->device;
mlx4_free_priv_pages(mr);
err_free_mr:
(void) mlx4_mr_free(dev->dev, &mr->mmr);
@@ -425,99 +427,6 @@ err_free:
return ERR_PTR(err);
}
-struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
- struct ib_fmr_attr *fmr_attr)
-{
- struct mlx4_ib_dev *dev = to_mdev(pd->device);
- struct mlx4_ib_fmr *fmr;
- int err = -ENOMEM;
-
- fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
- if (!fmr)
- return ERR_PTR(-ENOMEM);
-
- err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc),
- fmr_attr->max_pages, fmr_attr->max_maps,
- fmr_attr->page_shift, &fmr->mfmr);
- if (err)
- goto err_free;
-
- err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr);
- if (err)
- goto err_mr;
-
- fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key;
-
- return &fmr->ibfmr;
-
-err_mr:
- (void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
-
-err_free:
- kfree(fmr);
-
- return ERR_PTR(err);
-}
-
-int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
- int npages, u64 iova)
-{
- struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
- struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device);
-
- return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova,
- &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
-}
-
-int mlx4_ib_unmap_fmr(struct list_head *fmr_list)
-{
- struct ib_fmr *ibfmr;
- int err;
- struct mlx4_dev *mdev = NULL;
-
- list_for_each_entry(ibfmr, fmr_list, list) {
- if (mdev && to_mdev(ibfmr->device)->dev != mdev)
- return -EINVAL;
- mdev = to_mdev(ibfmr->device)->dev;
- }
-
- if (!mdev)
- return 0;
-
- list_for_each_entry(ibfmr, fmr_list, list) {
- struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
-
- mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
- }
-
- /*
- * Make sure all MPT status updates are visible before issuing
- * SYNC_TPT firmware command.
- */
- wmb();
-
- err = mlx4_SYNC_TPT(mdev);
- if (err)
- pr_warn("SYNC_TPT error %d when "
- "unmapping FMRs\n", err);
-
- return 0;
-}
-
-int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
-{
- struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
- struct mlx4_ib_dev *dev = to_mdev(ibfmr->device);
- int err;
-
- err = mlx4_fmr_free(dev->dev, &ifmr->mfmr);
-
- if (!err)
- kfree(ifmr);
-
- return err;
-}
-
static int mlx4_set_page(struct ib_mr *ibmr, u64 addr)
{
struct mlx4_ib_mr *mr = to_mmr(ibmr);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 7d76f769233c..f2887ae6390e 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -36,12 +36,12 @@
#include <net/ip.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
-#include <linux/vmalloc.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_mad.h>
+#include <rdma/uverbs_ioctl.h>
#include <linux/mlx4/driver.h>
#include <linux/mlx4/qp.h>
@@ -53,6 +53,8 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq,
struct mlx4_ib_cq *recv_cq);
static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq,
struct mlx4_ib_cq *recv_cq);
+static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state,
+ struct ib_udata *udata);
enum {
MLX4_IB_ACK_REQ_FREQ = 8,
@@ -66,27 +68,6 @@ enum {
};
enum {
- /*
- * Largest possible UD header: send with GRH and immediate
- * data plus 18 bytes for an Ethernet header with VLAN/802.1Q
- * tag. (LRH would only use 8 bytes, so Ethernet is the
- * biggest case)
- */
- MLX4_IB_UD_HEADER_SIZE = 82,
- MLX4_IB_LSO_HEADER_SPARE = 128,
-};
-
-struct mlx4_ib_sqp {
- struct mlx4_ib_qp qp;
- int pkey_index;
- u32 qkey;
- u32 send_psn;
- struct ib_ud_header ud_header;
- u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
- struct ib_qp *roce_v2_gsi;
-};
-
-enum {
MLX4_IB_MIN_SQ_STRIDE = 6,
MLX4_IB_CACHE_LINE_SIZE = 64,
};
@@ -116,10 +97,18 @@ static const __be32 mlx4_ib_opcode[] = {
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
};
-static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
-{
- return container_of(mqp, struct mlx4_ib_sqp, qp);
-}
+enum mlx4_ib_source_type {
+ MLX4_IB_QP_SRC = 0,
+ MLX4_IB_RWQ_SRC = 1,
+};
+
+struct mlx4_ib_qp_event_work {
+ struct work_struct work;
+ struct mlx4_qp *qp;
+ enum mlx4_event type;
+};
+
+static struct workqueue_struct *mlx4_ib_qp_event_wq;
static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
@@ -145,8 +134,8 @@ static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
/* VF or PF -- proxy SQP */
if (mlx4_is_mfunc(dev->dev)) {
for (i = 0; i < dev->dev->caps.num_ports; i++) {
- if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] ||
- qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) {
+ if (qp->mqp.qpn == dev->dev->caps.spec_qps[i].qp0_proxy ||
+ qp->mqp.qpn == dev->dev->caps.spec_qps[i].qp1_proxy) {
proxy_sqp = 1;
break;
}
@@ -173,7 +162,7 @@ static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
/* VF or PF -- proxy QP0 */
if (mlx4_is_mfunc(dev->dev)) {
for (i = 0; i < dev->dev->caps.num_ports; i++) {
- if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) {
+ if (qp->mqp.qpn == dev->dev->caps.spec_qps[i].qp0_proxy) {
proxy_qp0 = 1;
break;
}
@@ -199,135 +188,103 @@ static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
/*
* Stamp a SQ WQE so that it is invalid if prefetched by marking the
- * first four bytes of every 64 byte chunk with
- * 0x7FFFFFF | (invalid_ownership_value << 31).
- *
- * When the max work request size is less than or equal to the WQE
- * basic block size, as an optimization, we can stamp all WQEs with
- * 0xffffffff, and skip the very first chunk of each WQE.
+ * first four bytes of every 64 byte chunk with 0xffffffff, except for
+ * the very first chunk of the WQE.
*/
-static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
+static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n)
{
__be32 *wqe;
int i;
int s;
- int ind;
void *buf;
- __be32 stamp;
struct mlx4_wqe_ctrl_seg *ctrl;
- if (qp->sq_max_wqes_per_wr > 1) {
- s = roundup(size, 1U << qp->sq.wqe_shift);
- for (i = 0; i < s; i += 64) {
- ind = (i >> qp->sq.wqe_shift) + n;
- stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) :
- cpu_to_be32(0xffffffff);
- buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
- wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1));
- *wqe = stamp;
- }
- } else {
- ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
- s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4;
- for (i = 64; i < s; i += 64) {
- wqe = buf + i;
- *wqe = cpu_to_be32(0xffffffff);
- }
+ buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
+ ctrl = (struct mlx4_wqe_ctrl_seg *)buf;
+ s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4;
+ for (i = 64; i < s; i += 64) {
+ wqe = buf + i;
+ *wqe = cpu_to_be32(0xffffffff);
}
}
-static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size)
+static void mlx4_ib_handle_qp_event(struct work_struct *_work)
{
- struct mlx4_wqe_ctrl_seg *ctrl;
- struct mlx4_wqe_inline_seg *inl;
- void *wqe;
- int s;
+ struct mlx4_ib_qp_event_work *qpe_work =
+ container_of(_work, struct mlx4_ib_qp_event_work, work);
+ struct ib_qp *ibqp = &to_mibqp(qpe_work->qp)->ibqp;
+ struct ib_event event = {};
- ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
- s = sizeof(struct mlx4_wqe_ctrl_seg);
+ event.device = ibqp->device;
+ event.element.qp = ibqp;
- if (qp->ibqp.qp_type == IB_QPT_UD) {
- struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl;
- struct mlx4_av *av = (struct mlx4_av *)dgram->av;
- memset(dgram, 0, sizeof *dgram);
- av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn);
- s += sizeof(struct mlx4_wqe_datagram_seg);
- }
-
- /* Pad the remainder of the WQE with an inline data segment. */
- if (size > s) {
- inl = wqe + s;
- inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl));
+ switch (qpe_work->type) {
+ case MLX4_EVENT_TYPE_PATH_MIG:
+ event.event = IB_EVENT_PATH_MIG;
+ break;
+ case MLX4_EVENT_TYPE_COMM_EST:
+ event.event = IB_EVENT_COMM_EST;
+ break;
+ case MLX4_EVENT_TYPE_SQ_DRAINED:
+ event.event = IB_EVENT_SQ_DRAINED;
+ break;
+ case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ break;
+ case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+ case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
+ event.event = IB_EVENT_PATH_MIG_ERR;
+ break;
+ case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ default:
+ pr_warn("Unexpected event type %d on QP %06x\n",
+ qpe_work->type, qpe_work->qp->qpn);
+ goto out;
}
- ctrl->srcrb_flags = 0;
- ctrl->qpn_vlan.fence_size = size / 16;
- /*
- * Make sure descriptor is fully written before setting ownership bit
- * (because HW can start executing as soon as we do).
- */
- wmb();
-
- ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) |
- (n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
- stamp_send_wqe(qp, n + qp->sq_spare_wqes, size);
-}
+ ibqp->event_handler(&event, ibqp->qp_context);
-/* Post NOP WQE to prevent wrap-around in the middle of WR */
-static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind)
-{
- unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1));
- if (unlikely(s < qp->sq_max_wqes_per_wr)) {
- post_nop_wqe(qp, ind, s << qp->sq.wqe_shift);
- ind += s;
- }
- return ind;
+out:
+ mlx4_put_qp(qpe_work->qp);
+ kfree(qpe_work);
}
static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
{
- struct ib_event event;
struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
+ struct mlx4_ib_qp_event_work *qpe_work;
if (type == MLX4_EVENT_TYPE_PATH_MIG)
to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
- if (ibqp->event_handler) {
- event.device = ibqp->device;
- event.element.qp = ibqp;
- switch (type) {
- case MLX4_EVENT_TYPE_PATH_MIG:
- event.event = IB_EVENT_PATH_MIG;
- break;
- case MLX4_EVENT_TYPE_COMM_EST:
- event.event = IB_EVENT_COMM_EST;
- break;
- case MLX4_EVENT_TYPE_SQ_DRAINED:
- event.event = IB_EVENT_SQ_DRAINED;
- break;
- case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
- event.event = IB_EVENT_QP_LAST_WQE_REACHED;
- break;
- case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
- event.event = IB_EVENT_QP_FATAL;
- break;
- case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
- event.event = IB_EVENT_PATH_MIG_ERR;
- break;
- case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- event.event = IB_EVENT_QP_REQ_ERR;
- break;
- case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
- event.event = IB_EVENT_QP_ACCESS_ERR;
- break;
- default:
- pr_warn("Unexpected event type %d "
- "on QP %06x\n", type, qp->qpn);
- return;
- }
+ if (!ibqp->event_handler)
+ goto out_no_handler;
- ibqp->event_handler(&event, ibqp->qp_context);
- }
+ qpe_work = kzalloc(sizeof(*qpe_work), GFP_ATOMIC);
+ if (!qpe_work)
+ goto out_no_handler;
+
+ qpe_work->qp = qp;
+ qpe_work->type = type;
+ INIT_WORK(&qpe_work->work, mlx4_ib_handle_qp_event);
+ queue_work(mlx4_ib_qp_event_wq, &qpe_work->work);
+ return;
+
+out_no_handler:
+ mlx4_put_qp(qp);
+}
+
+static void mlx4_ib_wq_event(struct mlx4_qp *qp, enum mlx4_event type)
+{
+ pr_warn_ratelimited("Unexpected event type %d on WQ 0x%06x. Events are not supported for WQs\n",
+ type, qp->qpn);
}
static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
@@ -377,7 +334,8 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
}
static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
- int is_user, int has_rq, struct mlx4_ib_qp *qp)
+ bool is_user, bool has_rq, struct mlx4_ib_qp *qp,
+ u32 inl_recv_sz)
{
/* Sanity check RQ size before proceeding */
if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||
@@ -385,18 +343,24 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
return -EINVAL;
if (!has_rq) {
- if (cap->max_recv_wr)
+ if (cap->max_recv_wr || inl_recv_sz)
return -EINVAL;
qp->rq.wqe_cnt = qp->rq.max_gs = 0;
} else {
+ u32 max_inl_recv_sz = dev->dev->caps.max_rq_sg *
+ sizeof(struct mlx4_wqe_data_seg);
+ u32 wqe_size;
+
/* HW requires >= 1 RQ entry with >= 1 gather entry */
- if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))
+ if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge ||
+ inl_recv_sz > max_inl_recv_sz))
return -EINVAL;
qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr));
qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
- qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));
+ wqe_size = qp->rq.max_gs * sizeof(struct mlx4_wqe_data_seg);
+ qp->rq.wqe_shift = ilog2(max_t(u32, wqe_size, inl_recv_sz));
}
/* leave userspace return values as they were, so as not to break ABI */
@@ -415,8 +379,7 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
}
static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
- enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp,
- bool shrink_wqe)
+ enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
{
int s;
@@ -443,70 +406,20 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
if (s > dev->dev->caps.max_sq_desc_sz)
return -EINVAL;
+ qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
+
/*
- * Hermon supports shrinking WQEs, such that a single work
- * request can include multiple units of 1 << wqe_shift. This
- * way, work requests can differ in size, and do not have to
- * be a power of 2 in size, saving memory and speeding up send
- * WR posting. Unfortunately, if we do this then the
- * wqe_index field in CQEs can't be used to look up the WR ID
- * anymore, so we do this only if selective signaling is off.
- *
- * Further, on 32-bit platforms, we can't use vmap() to make
- * the QP buffer virtually contiguous. Thus we have to use
- * constant-sized WRs to make sure a WR is always fully within
- * a single page-sized chunk.
- *
- * Finally, we use NOP work requests to pad the end of the
- * work queue, to avoid wrap-around in the middle of WR. We
- * set NEC bit to avoid getting completions with error for
- * these NOP WRs, but since NEC is only supported starting
- * with firmware 2.2.232, we use constant-sized WRs for older
- * firmware.
- *
- * And, since MLX QPs only support SEND, we use constant-sized
- * WRs in this case.
- *
- * We look for the smallest value of wqe_shift such that the
- * resulting number of wqes does not exceed device
- * capabilities.
- *
- * We set WQE size to at least 64 bytes, this way stamping
- * invalidates each WQE.
+ * We need to leave 2 KB + 1 WR of headroom in the SQ to
+ * allow HW to prefetch.
*/
- if (shrink_wqe && dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
- qp->sq_signal_bits && BITS_PER_LONG == 64 &&
- type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
- !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
- MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER)))
- qp->sq.wqe_shift = ilog2(64);
- else
- qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
-
- for (;;) {
- qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift);
-
- /*
- * We need to leave 2 KB + 1 WR of headroom in the SQ to
- * allow HW to prefetch.
- */
- qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr;
- qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr *
- qp->sq_max_wqes_per_wr +
- qp->sq_spare_wqes);
-
- if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes)
- break;
-
- if (qp->sq_max_wqes_per_wr <= 1)
- return -EINVAL;
-
- ++qp->sq.wqe_shift;
- }
-
- qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz,
- (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) -
- send_wqe_overhead(type, qp->flags)) /
+ qp->sq_spare_wqes = MLX4_IB_SQ_HEADROOM(qp->sq.wqe_shift);
+ qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr +
+ qp->sq_spare_wqes);
+
+ qp->sq.max_gs =
+ (min(dev->dev->caps.max_sq_desc_sz,
+ (1 << qp->sq.wqe_shift)) -
+ send_wqe_overhead(type, qp->flags)) /
sizeof (struct mlx4_wqe_data_seg);
qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
@@ -520,7 +433,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
}
cap->max_send_wr = qp->sq.max_post =
- (qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr;
+ qp->sq.wqe_cnt - qp->sq_spare_wqes;
cap->max_send_sge = min(qp->sq.max_gs,
min(dev->dev->caps.max_sq_sg,
dev->dev->caps.max_rq_sg));
@@ -534,9 +447,13 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev,
struct mlx4_ib_qp *qp,
struct mlx4_ib_create_qp *ucmd)
{
+ u32 cnt;
+
/* Sanity check SQ size before proceeding */
- if ((1 << ucmd->log_sq_bb_count) > dev->dev->caps.max_wqes ||
- ucmd->log_sq_stride >
+ if (check_shl_overflow(1, ucmd->log_sq_bb_count, &cnt) ||
+ cnt > dev->dev->caps.max_wqes)
+ return -EINVAL;
+ if (ucmd->log_sq_stride >
ilog2(roundup_pow_of_two(dev->dev->caps.max_sq_desc_sz)) ||
ucmd->log_sq_stride < MLX4_IB_MIN_SQ_STRIDE)
return -EINVAL;
@@ -555,8 +472,8 @@ static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
int i;
qp->sqp_proxy_rcv =
- kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt,
- GFP_KERNEL);
+ kmalloc_array(qp->rq.wqe_cnt, sizeof(struct mlx4_ib_buf),
+ GFP_KERNEL);
if (!qp->sqp_proxy_rcv)
return -ENOMEM;
for (i = 0; i < qp->rq.wqe_cnt; i++) {
@@ -602,10 +519,10 @@ static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
kfree(qp->sqp_proxy_rcv);
}
-static int qp_has_rq(struct ib_qp_init_attr *attr)
+static bool qp_has_rq(struct ib_qp_init_attr *attr)
{
if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
- return 0;
+ return false;
return !attr->srq;
}
@@ -614,8 +531,8 @@ static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
{
int i;
for (i = 0; i < dev->caps.num_ports; i++) {
- if (qpn == dev->caps.qp0_proxy[i])
- return !!dev->caps.qp0_qkey[i];
+ if (qpn == dev->caps.spec_qps[i].qp0_proxy)
+ return !!dev->caps.spec_qps[i].qp0_qkey;
}
return 0;
}
@@ -632,16 +549,458 @@ static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev,
qp->counter_index = NULL;
}
-static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
- gfp_t gfp)
+static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
+ struct ib_qp_init_attr *init_attr,
+ struct mlx4_ib_create_qp_rss *ucmd)
+{
+ rss_ctx->base_qpn_tbl_sz = init_attr->rwq_ind_tbl->ind_tbl[0]->wq_num |
+ (init_attr->rwq_ind_tbl->log_ind_tbl_size << 24);
+
+ if ((ucmd->rx_hash_function == MLX4_IB_RX_HASH_FUNC_TOEPLITZ) &&
+ (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP)) {
+ memcpy(rss_ctx->rss_key, ucmd->rx_hash_key,
+ MLX4_EN_RSS_KEY_SIZE);
+ } else {
+ pr_debug("RX Hash function is not supported\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if (ucmd->rx_hash_fields_mask & ~(u64)(MLX4_IB_RX_HASH_SRC_IPV4 |
+ MLX4_IB_RX_HASH_DST_IPV4 |
+ MLX4_IB_RX_HASH_SRC_IPV6 |
+ MLX4_IB_RX_HASH_DST_IPV6 |
+ MLX4_IB_RX_HASH_SRC_PORT_TCP |
+ MLX4_IB_RX_HASH_DST_PORT_TCP |
+ MLX4_IB_RX_HASH_SRC_PORT_UDP |
+ MLX4_IB_RX_HASH_DST_PORT_UDP |
+ MLX4_IB_RX_HASH_INNER)) {
+ pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n",
+ ucmd->rx_hash_fields_mask);
+ return (-EOPNOTSUPP);
+ }
+
+ if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) &&
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) {
+ rss_ctx->flags = MLX4_RSS_IPV4;
+ } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) ||
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) {
+ pr_debug("RX Hash fields_mask is not supported - both IPv4 SRC and DST must be set\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV6) &&
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV6)) {
+ rss_ctx->flags |= MLX4_RSS_IPV6;
+ } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV6) ||
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV6)) {
+ pr_debug("RX Hash fields_mask is not supported - both IPv6 SRC and DST must be set\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_UDP) &&
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_UDP)) {
+ if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UDP_RSS)) {
+ pr_debug("RX Hash fields_mask for UDP is not supported\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if (rss_ctx->flags & MLX4_RSS_IPV4)
+ rss_ctx->flags |= MLX4_RSS_UDP_IPV4;
+ if (rss_ctx->flags & MLX4_RSS_IPV6)
+ rss_ctx->flags |= MLX4_RSS_UDP_IPV6;
+ if (!(rss_ctx->flags & (MLX4_RSS_IPV6 | MLX4_RSS_IPV4))) {
+ pr_debug("RX Hash fields_mask is not supported - UDP must be set with IPv4 or IPv6\n");
+ return (-EOPNOTSUPP);
+ }
+ } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_UDP) ||
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_UDP)) {
+ pr_debug("RX Hash fields_mask is not supported - both UDP SRC and DST must be set\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) &&
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) {
+ if (rss_ctx->flags & MLX4_RSS_IPV4)
+ rss_ctx->flags |= MLX4_RSS_TCP_IPV4;
+ if (rss_ctx->flags & MLX4_RSS_IPV6)
+ rss_ctx->flags |= MLX4_RSS_TCP_IPV6;
+ if (!(rss_ctx->flags & (MLX4_RSS_IPV6 | MLX4_RSS_IPV4))) {
+ pr_debug("RX Hash fields_mask is not supported - TCP must be set with IPv4 or IPv6\n");
+ return (-EOPNOTSUPP);
+ }
+ } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) {
+ pr_debug("RX Hash fields_mask is not supported - both TCP SRC and DST must be set\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_INNER) {
+ if (dev->dev->caps.tunnel_offload_mode ==
+ MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
+ /*
+ * Hash according to inner headers if exist, otherwise
+ * according to outer headers.
+ */
+ rss_ctx->flags |= MLX4_RSS_BY_INNER_HEADERS_IPONLY;
+ } else {
+ pr_debug("RSS Hash for inner headers isn't supported\n");
+ return (-EOPNOTSUPP);
+ }
+ }
+
+ return 0;
+}
+
+static int create_qp_rss(struct mlx4_ib_dev *dev,
+ struct ib_qp_init_attr *init_attr,
+ struct mlx4_ib_create_qp_rss *ucmd,
+ struct mlx4_ib_qp *qp)
{
int qpn;
int err;
- struct ib_qp_cap backup_cap;
- struct mlx4_ib_sqp *sqp = NULL;
- struct mlx4_ib_qp *qp;
+
+ qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS;
+
+ err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn, 0, qp->mqp.usage);
+ if (err)
+ return err;
+
+ err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
+ if (err)
+ goto err_qpn;
+
+ INIT_LIST_HEAD(&qp->gid_list);
+ INIT_LIST_HEAD(&qp->steering_rules);
+
+ qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_PACKET;
+ qp->state = IB_QPS_RESET;
+
+ /* Set dummy send resources to be compatible with HV and PRM */
+ qp->sq_no_prefetch = 1;
+ qp->sq.wqe_cnt = 1;
+ qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE;
+ qp->buf_size = qp->sq.wqe_cnt << MLX4_IB_MIN_SQ_STRIDE;
+ qp->mtt = (to_mqp(
+ (struct ib_qp *)init_attr->rwq_ind_tbl->ind_tbl[0]))->mtt;
+
+ qp->rss_ctx = kzalloc(sizeof(*qp->rss_ctx), GFP_KERNEL);
+ if (!qp->rss_ctx) {
+ err = -ENOMEM;
+ goto err_qp_alloc;
+ }
+
+ err = set_qp_rss(dev, qp->rss_ctx, init_attr, ucmd);
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ kfree(qp->rss_ctx);
+
+err_qp_alloc:
+ mlx4_qp_remove(dev->dev, &qp->mqp);
+ mlx4_qp_free(dev->dev, &qp->mqp);
+
+err_qpn:
+ mlx4_qp_release_range(dev->dev, qpn, 1);
+ return err;
+}
+
+static int _mlx4_ib_create_qp_rss(struct ib_pd *pd, struct mlx4_ib_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_create_qp_rss ucmd = {};
+ size_t required_cmd_sz;
+ int err;
+
+ if (!udata) {
+ pr_debug("RSS QP with NULL udata\n");
+ return -EINVAL;
+ }
+
+ if (udata->outlen)
+ return -EOPNOTSUPP;
+
+ required_cmd_sz = offsetof(typeof(ucmd), reserved1) +
+ sizeof(ucmd.reserved1);
+ if (udata->inlen < required_cmd_sz) {
+ pr_debug("invalid inlen\n");
+ return -EINVAL;
+ }
+
+ if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
+ pr_debug("copy failed\n");
+ return -EFAULT;
+ }
+
+ if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)))
+ return -EOPNOTSUPP;
+
+ if (ucmd.comp_mask || ucmd.reserved1)
+ return -EOPNOTSUPP;
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd))) {
+ pr_debug("inlen is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ pr_debug("RSS QP with unsupported QP type %d\n",
+ init_attr->qp_type);
+ return -EOPNOTSUPP;
+ }
+
+ if (init_attr->create_flags) {
+ pr_debug("RSS QP doesn't support create flags\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (init_attr->send_cq || init_attr->cap.max_send_wr) {
+ pr_debug("RSS QP with unsupported send attributes\n");
+ return -EOPNOTSUPP;
+ }
+
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
+
+ err = create_qp_rss(to_mdev(pd->device), init_attr, &ucmd, qp);
+ if (err)
+ return err;
+
+ qp->ibqp.qp_num = qp->mqp.qpn;
+ return 0;
+}
+
+/*
+ * This function allocates a WQN from a range which is consecutive and aligned
+ * to its size. In case the range is full, then it creates a new range and
+ * allocates WQN from it. The new range will be used for following allocations.
+ */
+static int mlx4_ib_alloc_wqn(struct mlx4_ib_ucontext *context,
+ struct mlx4_ib_qp *qp, int range_size, int *wqn)
+{
+ struct mlx4_ib_dev *dev = to_mdev(context->ibucontext.device);
+ struct mlx4_wqn_range *range;
+ int err = 0;
+
+ mutex_lock(&context->wqn_ranges_mutex);
+
+ range = list_first_entry_or_null(&context->wqn_ranges_list,
+ struct mlx4_wqn_range, list);
+
+ if (!range || (range->refcount == range->size) || range->dirty) {
+ range = kzalloc(sizeof(*range), GFP_KERNEL);
+ if (!range) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = mlx4_qp_reserve_range(dev->dev, range_size,
+ range_size, &range->base_wqn, 0,
+ qp->mqp.usage);
+ if (err) {
+ kfree(range);
+ goto out;
+ }
+
+ range->size = range_size;
+ list_add(&range->list, &context->wqn_ranges_list);
+ } else if (range_size != 1) {
+ /*
+ * Requesting a new range (>1) when last range is still open, is
+ * not valid.
+ */
+ err = -EINVAL;
+ goto out;
+ }
+
+ qp->wqn_range = range;
+
+ *wqn = range->base_wqn + range->refcount;
+
+ range->refcount++;
+
+out:
+ mutex_unlock(&context->wqn_ranges_mutex);
+
+ return err;
+}
+
+static void mlx4_ib_release_wqn(struct mlx4_ib_ucontext *context,
+ struct mlx4_ib_qp *qp, bool dirty_release)
+{
+ struct mlx4_ib_dev *dev = to_mdev(context->ibucontext.device);
+ struct mlx4_wqn_range *range;
+
+ mutex_lock(&context->wqn_ranges_mutex);
+
+ range = qp->wqn_range;
+
+ range->refcount--;
+ if (!range->refcount) {
+ mlx4_qp_release_range(dev->dev, range->base_wqn,
+ range->size);
+ list_del(&range->list);
+ kfree(range);
+ } else if (dirty_release) {
+ /*
+ * A range which one of its WQNs is destroyed, won't be able to be
+ * reused for further WQN allocations.
+ * The next created WQ will allocate a new range.
+ */
+ range->dirty = true;
+ }
+
+ mutex_unlock(&context->wqn_ranges_mutex);
+}
+
+static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata, struct mlx4_ib_qp *qp)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ int qpn;
+ int err;
+ struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx4_ib_ucontext, ibucontext);
+ struct mlx4_ib_cq *mcq;
+ unsigned long flags;
+ int range_size;
+ struct mlx4_ib_create_wq wq;
+ size_t copy_len;
+ int shift;
+ int n;
+
+ qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_PACKET;
+
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
+ INIT_LIST_HEAD(&qp->gid_list);
+ INIT_LIST_HEAD(&qp->steering_rules);
+
+ qp->state = IB_QPS_RESET;
+
+ copy_len = min(sizeof(struct mlx4_ib_create_wq), udata->inlen);
+
+ if (ib_copy_from_udata(&wq, udata, copy_len)) {
+ err = -EFAULT;
+ goto err;
+ }
+
+ if (wq.comp_mask || wq.reserved[0] || wq.reserved[1] ||
+ wq.reserved[2]) {
+ pr_debug("user command isn't supported\n");
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ if (wq.log_range_size > ilog2(dev->dev->caps.max_rss_tbl_sz)) {
+ pr_debug("WQN range size must be equal or smaller than %d\n",
+ dev->dev->caps.max_rss_tbl_sz);
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+ range_size = 1 << wq.log_range_size;
+
+ if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS)
+ qp->flags |= MLX4_IB_QP_SCATTER_FCS;
+
+ err = set_rq_size(dev, &init_attr->cap, true, true, qp, qp->inl_recv_sz);
+ if (err)
+ goto err;
+
+ qp->sq_no_prefetch = 1;
+ qp->sq.wqe_cnt = 1;
+ qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE;
+ qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+ (qp->sq.wqe_cnt << qp->sq.wqe_shift);
+
+ qp->umem = ib_umem_get(pd->device, wq.buf_addr, qp->buf_size, 0);
+ if (IS_ERR(qp->umem)) {
+ err = PTR_ERR(qp->umem);
+ goto err;
+ }
+
+ shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
+ if (shift < 0) {
+ err = shift;
+ goto err_buf;
+ }
+
+ err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
+ if (err)
+ goto err_buf;
+
+ err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
+ if (err)
+ goto err_mtt;
+
+ err = mlx4_ib_db_map_user(udata, wq.db_addr, &qp->db);
+ if (err)
+ goto err_mtt;
+ qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS;
+
+ err = mlx4_ib_alloc_wqn(context, qp, range_size, &qpn);
+ if (err)
+ goto err_wrid;
+
+ err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
+ if (err)
+ goto err_qpn;
+
+ /*
+ * Hardware wants QPN written in big-endian order (after
+ * shifting) for send doorbell. Precompute this value to save
+ * a little bit when posting sends.
+ */
+ qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
+
+ qp->mqp.event = mlx4_ib_wq_event;
+
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq));
+ /* Maintain device to QPs access, needed for further handling
+ * via reset flow
+ */
+ list_add_tail(&qp->qps_list, &dev->qp_list);
+ /* Maintain CQ to QPs access, needed for further handling
+ * via reset flow
+ */
+ mcq = to_mcq(init_attr->send_cq);
+ list_add_tail(&qp->cq_send_list, &mcq->send_qp_list);
+ mcq = to_mcq(init_attr->recv_cq);
+ list_add_tail(&qp->cq_recv_list, &mcq->recv_qp_list);
+ mlx4_ib_unlock_cqs(to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq));
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+ return 0;
+
+err_qpn:
+ mlx4_ib_release_wqn(context, qp, 0);
+err_wrid:
+ mlx4_ib_db_unmap_user(context, &qp->db);
+
+err_mtt:
+ mlx4_mtt_cleanup(dev->dev, &qp->mtt);
+err_buf:
+ ib_umem_release(qp->umem);
+err:
+ return err;
+}
+
+static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata, int sqpn,
+ struct mlx4_ib_qp *qp)
+{
+ struct mlx4_ib_dev *dev = to_mdev(pd->device);
+ int qpn;
+ int err;
+ struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx4_ib_ucontext, ibucontext);
enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
struct mlx4_ib_cq *mcq;
unsigned long flags;
@@ -687,65 +1046,78 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
sqpn = qpn;
}
- if (!*caller_qp) {
- if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
- (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
- MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
- sqp = kzalloc(sizeof (struct mlx4_ib_sqp), gfp);
- if (!sqp)
- return -ENOMEM;
- qp = &sqp->qp;
- qp->pri.vid = 0xFFFF;
- qp->alt.vid = 0xFFFF;
- } else {
- qp = kzalloc(sizeof (struct mlx4_ib_qp), gfp);
- if (!qp)
- return -ENOMEM;
- qp->pri.vid = 0xFFFF;
- qp->alt.vid = 0xFFFF;
- }
- } else
- qp = *caller_qp;
+ if (init_attr->qp_type == IB_QPT_SMI ||
+ init_attr->qp_type == IB_QPT_GSI || qp_type == MLX4_IB_QPT_SMI ||
+ qp_type == MLX4_IB_QPT_GSI ||
+ (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
+ MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
+ qp->sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL);
+ if (!qp->sqp)
+ return -ENOMEM;
+ }
qp->mlx4_ib_qp_type = qp_type;
- mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
INIT_LIST_HEAD(&qp->gid_list);
INIT_LIST_HEAD(&qp->steering_rules);
- qp->state = IB_QPS_RESET;
+ qp->state = IB_QPS_RESET;
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp);
- if (err)
- goto err;
-
- if (pd->uobject) {
+ if (udata) {
struct mlx4_ib_create_qp ucmd;
+ size_t copy_len;
+ int shift;
+ int n;
+
+ copy_len = sizeof(struct mlx4_ib_create_qp);
- if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ if (ib_copy_from_udata(&ucmd, udata, copy_len)) {
err = -EFAULT;
goto err;
}
+ qp->inl_recv_sz = ucmd.inl_recv_sz;
+
+ if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) {
+ if (!(dev->dev->caps.flags &
+ MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
+ pr_debug("scatter FCS is unsupported\n");
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ qp->flags |= MLX4_IB_QP_SCATTER_FCS;
+ }
+
+ err = set_rq_size(dev, &init_attr->cap, udata,
+ qp_has_rq(init_attr), qp, qp->inl_recv_sz);
+ if (err)
+ goto err;
+
qp->sq_no_prefetch = ucmd.sq_no_prefetch;
err = set_user_sq_size(dev, qp, &ucmd);
if (err)
goto err;
- qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- qp->buf_size, 0, 0);
+ qp->umem =
+ ib_umem_get(pd->device, ucmd.buf_addr, qp->buf_size, 0);
if (IS_ERR(qp->umem)) {
err = PTR_ERR(qp->umem);
goto err;
}
- err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
- ilog2(qp->umem->page_size), &qp->mtt);
+ shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
+ if (shift < 0) {
+ err = shift;
+ goto err_buf;
+ }
+
+ err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
if (err)
goto err_buf;
@@ -754,12 +1126,17 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err_mtt;
if (qp_has_rq(init_attr)) {
- err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
- ucmd.db_addr, &qp->db);
+ err = mlx4_ib_db_map_user(udata, ucmd.db_addr, &qp->db);
if (err)
goto err_mtt;
}
+ qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS;
} else {
+ err = set_rq_size(dev, &init_attr->cap, udata,
+ qp_has_rq(init_attr), qp, 0);
+ if (err)
+ goto err;
+
qp->sq_no_prefetch = 0;
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
@@ -769,38 +1146,28 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (dev->steering_support ==
MLX4_STEERING_MODE_DEVICE_MANAGED)
qp->flags |= MLX4_IB_QP_NETIF;
- else
+ else {
+ err = -EINVAL;
goto err;
+ }
}
- memcpy(&backup_cap, &init_attr->cap, sizeof(backup_cap));
- err = set_kernel_sq_size(dev, &init_attr->cap,
- qp_type, qp, true);
+ err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
if (err)
goto err;
if (qp_has_rq(init_attr)) {
- err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp);
+ err = mlx4_db_alloc(dev->dev, &qp->db, 0);
if (err)
goto err;
*qp->db.db = 0;
}
- if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size,
- &qp->buf, gfp)) {
- memcpy(&init_attr->cap, &backup_cap,
- sizeof(backup_cap));
- err = set_kernel_sq_size(dev, &init_attr->cap, qp_type,
- qp, false);
- if (err)
- goto err_db;
-
- if (mlx4_buf_alloc(dev->dev, qp->buf_size,
- PAGE_SIZE * 2, &qp->buf, gfp)) {
- err = -ENOMEM;
- goto err_db;
- }
+ if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2,
+ &qp->buf)) {
+ err = -ENOMEM;
+ goto err_db;
}
err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
@@ -808,24 +1175,19 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp);
+ err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
if (err)
goto err_mtt;
- qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64),
- gfp | __GFP_NOWARN);
- if (!qp->sq.wrid)
- qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
- gfp, PAGE_KERNEL);
- qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64),
- gfp | __GFP_NOWARN);
- if (!qp->rq.wrid)
- qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
- gfp, PAGE_KERNEL);
+ qp->sq.wrid = kvmalloc_array(qp->sq.wqe_cnt,
+ sizeof(u64), GFP_KERNEL);
+ qp->rq.wrid = kvmalloc_array(qp->rq.wqe_cnt,
+ sizeof(u64), GFP_KERNEL);
if (!qp->sq.wrid || !qp->rq.wrid) {
err = -ENOMEM;
goto err_wrid;
}
+ qp->mqp.usage = MLX4_RES_USAGE_DRIVER;
}
if (sqpn) {
@@ -845,13 +1207,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
(init_attr->cap.max_send_wr ?
MLX4_RESERVE_ETH_BF_QP : 0) |
(init_attr->cap.max_recv_wr ?
- MLX4_RESERVE_A0_QP : 0));
+ MLX4_RESERVE_A0_QP : 0),
+ qp->mqp.usage);
else
if (qp->flags & MLX4_IB_QP_NETIF)
err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn);
else
err = mlx4_qp_reserve_range(dev->dev, 1, 1,
- &qpn, 0);
+ &qpn, 0, qp->mqp.usage);
if (err)
goto err_proxy;
}
@@ -859,7 +1222,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
- err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
+ err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
if (err)
goto err_qpn;
@@ -874,8 +1237,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
qp->mqp.event = mlx4_ib_qp_event;
- if (!*caller_qp)
- *caller_qp = qp;
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq),
@@ -907,9 +1268,9 @@ err_proxy:
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
free_proxy_bufs(pd->device, qp);
err_wrid:
- if (pd->uobject) {
+ if (udata) {
if (qp_has_rq(init_attr))
- mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
+ mlx4_ib_db_unmap_user(context, &qp->db);
} else {
kvfree(qp->sq.wrid);
kvfree(qp->rq.wrid);
@@ -919,20 +1280,16 @@ err_mtt:
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
err_buf:
- if (pd->uobject)
- ib_umem_release(qp->umem);
- else
+ if (!qp->umem)
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
+ ib_umem_release(qp->umem);
err_db:
- if (!pd->uobject && qp_has_rq(init_attr))
+ if (!udata && qp_has_rq(init_attr))
mlx4_db_free(dev->dev, &qp->db);
err:
- if (sqp)
- kfree(sqp);
- else if (!*caller_qp)
- kfree(qp);
+ kfree(qp->sqp);
return err;
}
@@ -998,7 +1355,7 @@ static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp)
return to_mpd(qp->ibqp.pd);
}
-static void get_cqs(struct mlx4_ib_qp *qp,
+static void get_cqs(struct mlx4_ib_qp *qp, enum mlx4_ib_source_type src,
struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq)
{
switch (qp->ibqp.qp_type) {
@@ -1011,14 +1368,46 @@ static void get_cqs(struct mlx4_ib_qp *qp,
*recv_cq = *send_cq;
break;
default:
- *send_cq = to_mcq(qp->ibqp.send_cq);
- *recv_cq = to_mcq(qp->ibqp.recv_cq);
+ *recv_cq = (src == MLX4_IB_QP_SRC) ? to_mcq(qp->ibqp.recv_cq) :
+ to_mcq(qp->ibwq.cq);
+ *send_cq = (src == MLX4_IB_QP_SRC) ? to_mcq(qp->ibqp.send_cq) :
+ *recv_cq;
break;
}
}
+static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+ if (qp->state != IB_QPS_RESET) {
+ int i;
+
+ for (i = 0; i < (1 << qp->ibqp.rwq_ind_tbl->log_ind_tbl_size);
+ i++) {
+ struct ib_wq *ibwq = qp->ibqp.rwq_ind_tbl->ind_tbl[i];
+ struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq);
+
+ mutex_lock(&wq->mutex);
+
+ wq->rss_usecnt--;
+
+ mutex_unlock(&wq->mutex);
+ }
+
+ if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
+ MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
+ pr_warn("modify QP %06x to RESET failed.\n",
+ qp->mqp.qpn);
+ }
+
+ mlx4_qp_remove(dev->dev, &qp->mqp);
+ mlx4_qp_free(dev->dev, &qp->mqp);
+ mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
+ del_gid_entries(qp);
+}
+
static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
- int is_user)
+ enum mlx4_ib_source_type src,
+ struct ib_udata *udata)
{
struct mlx4_ib_cq *send_cq, *recv_cq;
unsigned long flags;
@@ -1051,7 +1440,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
}
}
- get_cqs(qp, &send_cq, &recv_cq);
+ get_cqs(qp, src, &send_cq, &recv_cq);
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx4_ib_lock_cqs(send_cq, recv_cq);
@@ -1060,7 +1449,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
list_del(&qp->qps_list);
list_del(&qp->cq_send_list);
list_del(&qp->cq_recv_list);
- if (!is_user) {
+ if (!udata) {
__mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);
if (send_cq != recv_cq)
@@ -1077,17 +1466,29 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) {
if (qp->flags & MLX4_IB_QP_NETIF)
mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1);
+ else if (src == MLX4_IB_RWQ_SRC)
+ mlx4_ib_release_wqn(
+ rdma_udata_to_drv_context(
+ udata,
+ struct mlx4_ib_ucontext,
+ ibucontext),
+ qp, 1);
else
mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
}
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
- if (is_user) {
- if (qp->rq.wqe_cnt)
- mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
- &qp->db);
- ib_umem_release(qp->umem);
+ if (udata) {
+ if (qp->rq.wqe_cnt) {
+ struct mlx4_ib_ucontext *mcontext =
+ rdma_udata_to_drv_context(
+ udata,
+ struct mlx4_ib_ucontext,
+ ibucontext);
+
+ mlx4_ib_db_unmap_user(mcontext, &qp->db);
+ }
} else {
kvfree(qp->sq.wrid);
kvfree(qp->rq.wrid);
@@ -1098,6 +1499,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
if (qp->rq.wqe_cnt)
mlx4_db_free(dev->dev, &qp->db);
}
+ ib_umem_release(qp->umem);
del_gid_entries(qp);
}
@@ -1114,23 +1516,22 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
}
/* PF or VF -- creating proxies */
if (attr->qp_type == IB_QPT_SMI)
- return dev->dev->caps.qp0_proxy[attr->port_num - 1];
+ return dev->dev->caps.spec_qps[attr->port_num - 1].qp0_proxy;
else
- return dev->dev->caps.qp1_proxy[attr->port_num - 1];
+ return dev->dev->caps.spec_qps[attr->port_num - 1].qp1_proxy;
}
-static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx4_ib_qp *qp = NULL;
int err;
int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
u16 xrcdn = 0;
- gfp_t gfp;
- gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ?
- GFP_NOIO : GFP_KERNEL;
+ if (init_attr->rwq_ind_tbl)
+ return _mlx4_ib_create_qp_rss(pd, qp, init_attr, udata);
+
/*
* We only support LSO, vendor flag1, and multicast loopback blocking,
* and only for kernel UD QPs.
@@ -1140,21 +1541,19 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
MLX4_IB_SRIOV_TUNNEL_QP |
MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_NETIF |
- MLX4_IB_QP_CREATE_ROCE_V2_GSI |
- MLX4_IB_QP_CREATE_USE_GFP_NOIO))
- return ERR_PTR(-EINVAL);
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI))
+ return -EOPNOTSUPP;
if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
if (init_attr->qp_type != IB_QPT_UD)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
if (init_attr->create_flags) {
if (udata && init_attr->create_flags & ~(sup_u_create_flags))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
- MLX4_IB_QP_CREATE_USE_GFP_NOIO |
MLX4_IB_QP_CREATE_ROCE_V2_GSI |
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) &&
init_attr->qp_type != IB_QPT_UD) ||
@@ -1162,7 +1561,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
init_attr->qp_type > IB_QPT_GSI) ||
(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI &&
init_attr->qp_type != IB_QPT_GSI))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
switch (init_attr->qp_type) {
@@ -1170,57 +1569,51 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
pd = to_mxrcd(init_attr->xrcd)->pd;
xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq;
- /* fall through */
+ fallthrough;
case IB_QPT_XRC_INI:
if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
- return ERR_PTR(-ENOSYS);
+ return -ENOSYS;
init_attr->recv_cq = init_attr->send_cq;
- /* fall through */
+ fallthrough;
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_RAW_PACKET:
- qp = kzalloc(sizeof *qp, gfp);
- if (!qp)
- return ERR_PTR(-ENOMEM);
+ case IB_QPT_UD:
qp->pri.vid = 0xFFFF;
qp->alt.vid = 0xFFFF;
- /* fall through */
- case IB_QPT_UD:
- {
- err = create_qp_common(to_mdev(pd->device), pd, init_attr,
- udata, 0, &qp, gfp);
- if (err) {
- kfree(qp);
- return ERR_PTR(err);
- }
+ err = create_qp_common(pd, init_attr, udata, 0, qp);
+ if (err)
+ return err;
qp->ibqp.qp_num = qp->mqp.qpn;
qp->xrcdn = xrcdn;
-
break;
- }
case IB_QPT_SMI:
case IB_QPT_GSI:
{
int sqpn;
- /* Userspace is not allowed to create special QPs: */
- if (udata)
- return ERR_PTR(-EINVAL);
if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
- int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0);
+ int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev,
+ 1, 1, &sqpn, 0,
+ MLX4_RES_USAGE_DRIVER);
if (res)
- return ERR_PTR(res);
+ return res;
} else {
sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
}
- err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
- sqpn,
- &qp, gfp);
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
+ err = create_qp_common(pd, init_attr, udata, sqpn, qp);
if (err)
- return ERR_PTR(err);
+ return err;
+
+ if (init_attr->create_flags &
+ (MLX4_IB_SRIOV_SQP | MLX4_IB_SRIOV_TUNNEL_QP))
+ /* Internal QP created with ib_create_qp */
+ rdma_restrack_no_track(&qp->ibqp.res);
qp->port = init_attr->port_num;
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
@@ -1229,25 +1622,28 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
}
default:
/* Don't support raw QPs */
- return ERR_PTR(-EINVAL);
+ return -EOPNOTSUPP;
}
-
- return &qp->ibqp;
+ return 0;
}
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata) {
- struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
- struct ib_qp *ibqp;
+int mlx4_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct ib_device *device = ibqp->device;
struct mlx4_ib_dev *dev = to_mdev(device);
+ struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ struct ib_pd *pd = ibqp->pd;
+ int ret;
- ibqp = _mlx4_ib_create_qp(pd, init_attr, udata);
+ mutex_init(&qp->mutex);
+ ret = _mlx4_ib_create_qp(pd, qp, init_attr, udata);
+ if (ret)
+ return ret;
- if (!IS_ERR(ibqp) &&
- (init_attr->qp_type == IB_QPT_GSI) &&
+ if (init_attr->qp_type == IB_QPT_GSI &&
!(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
- struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp)));
+ struct mlx4_ib_sqp *sqp = qp->sqp;
int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num);
if (is_eth &&
@@ -1256,24 +1652,24 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
sqp->roce_v2_gsi = ib_create_qp(pd, init_attr);
if (IS_ERR(sqp->roce_v2_gsi)) {
- pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
+ pr_err("Failed to create GSI QP for RoCEv2 (%pe)\n",
+ sqp->roce_v2_gsi);
sqp->roce_v2_gsi = NULL;
} else {
- sqp = to_msqp(to_mqp(sqp->roce_v2_gsi));
- sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP;
+ to_mqp(sqp->roce_v2_gsi)->flags |=
+ MLX4_IB_ROCE_V2_GSI_QP;
}
init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
}
}
- return ibqp;
+ return 0;
}
-static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
+static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(qp->device);
struct mlx4_ib_qp *mqp = to_mqp(qp);
- struct mlx4_ib_pd *pd;
if (is_qp0(dev, mqp))
mlx4_CLOSE_PORT(dev->dev, mqp->port);
@@ -1288,29 +1684,28 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
if (mqp->counter_index)
mlx4_ib_free_qp_counter(dev, mqp);
- pd = get_pd(mqp);
- destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
-
- if (is_sqp(dev, mqp))
- kfree(to_msqp(mqp));
- else
- kfree(mqp);
+ if (qp->rwq_ind_tbl) {
+ destroy_qp_rss(dev, mqp);
+ } else {
+ destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, udata);
+ }
+ kfree(mqp->sqp);
return 0;
}
-int mlx4_ib_destroy_qp(struct ib_qp *qp)
+int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
{
struct mlx4_ib_qp *mqp = to_mqp(qp);
if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
- struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+ struct mlx4_ib_sqp *sqp = mqp->sqp;
if (sqp->roce_v2_gsi)
ib_destroy_qp(sqp->roce_v2_gsi);
}
- return _mlx4_ib_destroy_qp(qp);
+ return _mlx4_ib_destroy_qp(qp, udata);
}
static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
@@ -1383,32 +1778,33 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
}
-static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
+static int _mlx4_set_path(struct mlx4_ib_dev *dev,
+ const struct rdma_ah_attr *ah,
u64 smac, u16 vlan_tag, struct mlx4_qp_path *path,
struct mlx4_roce_smac_vlan_info *smac_info, u8 port)
{
- int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
- IB_LINK_LAYER_ETHERNET;
int vidx;
int smac_index;
int err;
-
- path->grh_mylmc = ah->src_path_bits & 0x7f;
- path->rlid = cpu_to_be16(ah->dlid);
- if (ah->static_rate) {
- path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
+ path->grh_mylmc = rdma_ah_get_path_bits(ah) & 0x7f;
+ path->rlid = cpu_to_be16(rdma_ah_get_dlid(ah));
+ if (rdma_ah_get_static_rate(ah)) {
+ path->static_rate = rdma_ah_get_static_rate(ah) +
+ MLX4_STAT_RATE_OFFSET;
while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
!(1 << path->static_rate & dev->dev->caps.stat_rate_support))
--path->static_rate;
} else
path->static_rate = 0;
- if (ah->ah_flags & IB_AH_GRH) {
- int real_sgid_index = mlx4_ib_gid_index_to_real_index(dev,
- port,
- ah->grh.sgid_index);
+ if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah);
+ int real_sgid_index =
+ mlx4_ib_gid_index_to_real_index(dev, grh->sgid_attr);
+ if (real_sgid_index < 0)
+ return real_sgid_index;
if (real_sgid_index >= dev->dev->caps.gid_table_len[port]) {
pr_err("sgid_index (%u) too large. max is %d\n",
real_sgid_index, dev->dev->caps.gid_table_len[port] - 1);
@@ -1417,19 +1813,19 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
path->grh_mylmc |= 1 << 7;
path->mgid_index = real_sgid_index;
- path->hop_limit = ah->grh.hop_limit;
+ path->hop_limit = grh->hop_limit;
path->tclass_flowlabel =
- cpu_to_be32((ah->grh.traffic_class << 20) |
- (ah->grh.flow_label));
- memcpy(path->rgid, ah->grh.dgid.raw, 16);
+ cpu_to_be32((grh->traffic_class << 20) |
+ (grh->flow_label));
+ memcpy(path->rgid, grh->dgid.raw, 16);
}
- if (is_eth) {
- if (!(ah->ah_flags & IB_AH_GRH))
+ if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ if (!(rdma_ah_get_ah_flags(ah) & IB_AH_GRH))
return -1;
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
- ((port - 1) << 6) | ((ah->sl & 7) << 3);
+ ((port - 1) << 6) | ((rdma_ah_get_sl(ah) & 7) << 3);
path->feup |= MLX4_FEUP_FORCE_ETH_UP;
if (vlan_tag < 0x1000) {
@@ -1488,14 +1884,13 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
} else {
smac_index = smac_info->smac_index;
}
-
- memcpy(path->dmac, ah->dmac, 6);
+ memcpy(path->dmac, ah->roce.dmac, 6);
path->ackto = MLX4_IB_LINK_TYPE_ETH;
/* put MAC table smac index for IBoE */
path->grh_mylmc = (u8) (smac_index) | 0x80;
} else {
path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
- ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
+ ((port - 1) << 6) | ((rdma_ah_get_sl(ah) & 0xf) << 2);
}
return 0;
@@ -1508,7 +1903,7 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
u16 vlan_id, u8 *smac)
{
return _mlx4_set_path(dev, &qp->ah_attr,
- mlx4_mac_to_u64(smac),
+ ether_addr_to_u64(smac),
vlan_id,
path, &mqp->pri, port);
}
@@ -1573,7 +1968,7 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
!(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK))
return 0;
- err = mlx4_counter_alloc(dev->dev, &tmp_idx);
+ err = mlx4_counter_alloc(dev->dev, &tmp_idx, MLX4_RES_USAGE_DRIVER);
if (err)
return err;
@@ -1613,14 +2008,128 @@ static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
}
}
-static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
+/*
+ * Go over all RSS QP's childes (WQs) and apply their HW state according to
+ * their logic state if the RSS QP is the first RSS QP associated for the WQ.
+ */
+static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num,
+ struct ib_udata *udata)
+{
+ int err = 0;
+ int i;
+
+ for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
+ struct ib_wq *ibwq = ind_tbl->ind_tbl[i];
+ struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq);
+
+ mutex_lock(&wq->mutex);
+
+ /* Mlx4_ib restrictions:
+ * WQ's is associated to a port according to the RSS QP it is
+ * associates to.
+ * In case the WQ is associated to a different port by another
+ * RSS QP, return a failure.
+ */
+ if ((wq->rss_usecnt > 0) && (wq->port != port_num)) {
+ err = -EINVAL;
+ mutex_unlock(&wq->mutex);
+ break;
+ }
+ wq->port = port_num;
+ if ((wq->rss_usecnt == 0) && (ibwq->state == IB_WQS_RDY)) {
+ err = _mlx4_ib_modify_wq(ibwq, IB_WQS_RDY, udata);
+ if (err) {
+ mutex_unlock(&wq->mutex);
+ break;
+ }
+ }
+ wq->rss_usecnt++;
+
+ mutex_unlock(&wq->mutex);
+ }
+
+ if (i && err) {
+ int j;
+
+ for (j = (i - 1); j >= 0; j--) {
+ struct ib_wq *ibwq = ind_tbl->ind_tbl[j];
+ struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq);
+
+ mutex_lock(&wq->mutex);
+
+ if ((wq->rss_usecnt == 1) &&
+ (ibwq->state == IB_WQS_RDY))
+ if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET,
+ udata))
+ pr_warn("failed to reverse WQN=0x%06x\n",
+ ibwq->wq_num);
+ wq->rss_usecnt--;
+
+ mutex_unlock(&wq->mutex);
+ }
+ }
+
+ return err;
+}
+
+static void bring_down_rss_rwqs(struct ib_rwq_ind_table *ind_tbl,
+ struct ib_udata *udata)
+{
+ int i;
+
+ for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
+ struct ib_wq *ibwq = ind_tbl->ind_tbl[i];
+ struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq);
+
+ mutex_lock(&wq->mutex);
+
+ if ((wq->rss_usecnt == 1) && (ibwq->state == IB_WQS_RDY))
+ if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET, udata))
+ pr_warn("failed to reverse WQN=%x\n",
+ ibwq->wq_num);
+ wq->rss_usecnt--;
+
+ mutex_unlock(&wq->mutex);
+ }
+}
+
+static void fill_qp_rss_context(struct mlx4_qp_context *context,
+ struct mlx4_ib_qp *qp)
+{
+ struct mlx4_rss_context *rss_context;
+
+ rss_context = (void *)context + offsetof(struct mlx4_qp_context,
+ pri_path) + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH;
+
+ rss_context->base_qpn = cpu_to_be32(qp->rss_ctx->base_qpn_tbl_sz);
+ rss_context->default_qpn =
+ cpu_to_be32(qp->rss_ctx->base_qpn_tbl_sz & 0xffffff);
+ if (qp->rss_ctx->flags & (MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6))
+ rss_context->base_qpn_udp = rss_context->default_qpn;
+ rss_context->flags = qp->rss_ctx->flags;
+ /* Currently support just toeplitz */
+ rss_context->hash_fn = MLX4_RSS_HASH_TOP;
+
+ memcpy(rss_context->rss_key, qp->rss_ctx->rss_key,
+ MLX4_EN_RSS_KEY_SIZE);
+}
+
+static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
const struct ib_qp_attr *attr, int attr_mask,
- enum ib_qp_state cur_state, enum ib_qp_state new_state)
+ enum ib_qp_state cur_state,
+ enum ib_qp_state new_state,
+ struct ib_udata *udata)
{
- struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ struct ib_srq *ibsrq;
+ const struct ib_gid_attr *gid_attr = NULL;
+ struct ib_rwq_ind_table *rwq_ind_tbl;
+ enum ib_qp_type qp_type;
+ struct mlx4_ib_dev *dev;
+ struct mlx4_ib_qp *qp;
struct mlx4_ib_pd *pd;
struct mlx4_ib_cq *send_cq, *recv_cq;
+ struct mlx4_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mlx4_ib_ucontext, ibucontext);
struct mlx4_qp_context *context;
enum mlx4_qp_optpar optpar = 0;
int sqd_event;
@@ -1628,6 +2137,28 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
int err = -EINVAL;
int counter_index;
+ if (src_type == MLX4_IB_RWQ_SRC) {
+ struct ib_wq *ibwq;
+
+ ibwq = (struct ib_wq *)src;
+ ibsrq = NULL;
+ rwq_ind_tbl = NULL;
+ qp_type = IB_QPT_RAW_PACKET;
+ qp = to_mqp((struct ib_qp *)ibwq);
+ dev = to_mdev(ibwq->device);
+ pd = to_mpd(ibwq->pd);
+ } else {
+ struct ib_qp *ibqp;
+
+ ibqp = (struct ib_qp *)src;
+ ibsrq = ibqp->srq;
+ rwq_ind_tbl = ibqp->rwq_ind_tbl;
+ qp_type = ibqp->qp_type;
+ qp = to_mqp(ibqp);
+ dev = to_mdev(ibqp->device);
+ pd = get_pd(qp);
+ }
+
/* APM is not supported under RoCE */
if (attr_mask & IB_QP_ALT_PATH &&
rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
@@ -1658,16 +2189,22 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
- if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
+ if (qp->inl_recv_sz)
+ context->param3 |= cpu_to_be32(1 << 25);
+
+ if (qp->flags & MLX4_IB_QP_SCATTER_FCS)
+ context->param3 |= cpu_to_be32(1 << 29);
+
+ if (qp_type == IB_QPT_GSI || qp_type == IB_QPT_SMI)
context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
- else if (ibqp->qp_type == IB_QPT_RAW_PACKET)
+ else if (qp_type == IB_QPT_RAW_PACKET)
context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX;
- else if (ibqp->qp_type == IB_QPT_UD) {
+ else if (qp_type == IB_QPT_UD) {
if (qp->flags & MLX4_IB_QP_LSO)
context->mtu_msgmax = (IB_MTU_4096 << 5) |
ilog2(dev->dev->caps.max_gso_sz);
else
- context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
+ context->mtu_msgmax = (IB_MTU_4096 << 5) | 13;
} else if (attr_mask & IB_QP_PATH_MTU) {
if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
pr_err("path MTU (%u) is invalid\n",
@@ -1678,9 +2215,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
ilog2(dev->dev->caps.max_msg_sz);
}
- if (qp->rq.wqe_cnt)
- context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3;
- context->rq_size_stride |= qp->rq.wqe_shift - 4;
+ if (!rwq_ind_tbl) { /* PRM RSS receive side should be left zeros */
+ if (qp->rq.wqe_cnt)
+ context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3;
+ context->rq_size_stride |= qp->rq.wqe_shift - 4;
+ }
if (qp->sq.wqe_cnt)
context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
@@ -1692,14 +2231,13 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
context->xrcd = cpu_to_be32((u32) qp->xrcdn);
- if (ibqp->qp_type == IB_QPT_RAW_PACKET)
+ if (qp_type == IB_QPT_RAW_PACKET)
context->param3 |= cpu_to_be32(1 << 30);
}
- if (qp->ibqp.uobject)
+ if (ucontext)
context->usr_page = cpu_to_be32(
- mlx4_to_hw_uar_index(dev->dev,
- to_mucontext(ibqp->uobject->context)->uar.index));
+ mlx4_to_hw_uar_index(dev->dev, ucontext->uar.index));
else
context->usr_page = cpu_to_be32(
mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index));
@@ -1743,7 +2281,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
steer_qp = 1;
}
- if (ibqp->qp_type == IB_QPT_GSI) {
+ if (qp_type == IB_QPT_GSI) {
enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ?
IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE;
u8 qpc_roce_mode = gid_type_to_qpc(gid_type);
@@ -1760,31 +2298,21 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_AV) {
- u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 :
+ u8 port_num = mlx4_is_bonded(dev->dev) ? 1 :
attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- union ib_gid gid;
- struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB};
u16 vlan = 0xffff;
u8 smac[ETH_ALEN];
- int status = 0;
- int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
- attr->ah_attr.ah_flags & IB_AH_GRH;
-
- if (is_eth && attr->ah_attr.ah_flags & IB_AH_GRH) {
- int index = attr->ah_attr.grh.sgid_index;
-
- status = ib_get_cached_gid(ibqp->device, port_num,
- index, &gid, &gid_attr);
- if (!status && !memcmp(&gid, &zgid, sizeof(gid)))
- status = -ENOENT;
- if (!status && gid_attr.ndev) {
- vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev);
- memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN);
- dev_put(gid_attr.ndev);
- }
+ int is_eth =
+ rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
+ rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
+
+ if (is_eth) {
+ gid_attr = attr->ah_attr.grh.sgid_attr;
+ err = rdma_read_gid_l2_fields(gid_attr, &vlan,
+ &smac[0]);
+ if (err)
+ goto out;
}
- if (status)
- goto out;
if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
port_num, vlan, smac))
@@ -1795,7 +2323,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (is_eth &&
(cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) {
- u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type);
+ u8 qpc_roce_mode = gid_type_to_qpc(gid_attr->gid_type);
if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) {
err = -EINVAL;
@@ -1830,15 +2358,20 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
}
- pd = get_pd(qp);
- get_cqs(qp, &send_cq, &recv_cq);
- context->pd = cpu_to_be32(pd->pdn);
+ context->pd = cpu_to_be32(pd->pdn);
+
+ if (!rwq_ind_tbl) {
+ context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
+ get_cqs(qp, src_type, &send_cq, &recv_cq);
+ } else { /* Set dummy CQs to be compatible with HV and PRM */
+ send_cq = to_mcq(rwq_ind_tbl->ind_tbl[0]->cq);
+ recv_cq = send_cq;
+ }
context->cqn_send = cpu_to_be32(send_cq->mcq.cqn);
context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn);
- context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
/* Set "fast registration enabled" for all kernel QPs */
- if (!qp->ibqp.uobject)
+ if (!ucontext)
context->params1 |= cpu_to_be32(1 << 11);
if (attr_mask & IB_QP_RNR_RETRY) {
@@ -1873,7 +2406,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
}
- if (ibqp->srq)
+ if (ibsrq)
context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
if (attr_mask & IB_QP_MIN_RNR_TIMER) {
@@ -1904,17 +2437,19 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_Q_KEY;
}
- if (ibqp->srq)
- context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
+ if (ibsrq)
+ context->srqn = cpu_to_be32(1 << 24 |
+ to_msrq(ibsrq)->msrq.srqn);
- if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ if (qp->rq.wqe_cnt &&
+ cur_state == IB_QPS_RESET &&
+ new_state == IB_QPS_INIT)
context->db_rec_addr = cpu_to_be64(qp->db.dma);
if (cur_state == IB_QPS_INIT &&
new_state == IB_QPS_RTR &&
- (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
- ibqp->qp_type == IB_QPT_UD ||
- ibqp->qp_type == IB_QPT_RAW_PACKET)) {
+ (qp_type == IB_QPT_GSI || qp_type == IB_QPT_SMI ||
+ qp_type == IB_QPT_UD || qp_type == IB_QPT_RAW_PACKET)) {
context->pri_path.sched_queue = (qp->port - 1) << 6;
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
qp->mlx4_ib_qp_type &
@@ -1947,17 +2482,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ if (qp_type == IB_QPT_RAW_PACKET) {
context->pri_path.ackto = (context->pri_path.ackto & 0xf8) |
MLX4_IB_LINK_TYPE_ETH;
if (dev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
/* set QP to receive both tunneled & non-tunneled packets */
- if (!(context->flags & cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET)))
+ if (!rwq_ind_tbl)
context->srqn = cpu_to_be32(7 << 28);
}
}
- if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) {
+ if (qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) {
int is_eth = rdma_port_get_link_layer(
&dev->ib_dev, qp->port) ==
IB_LINK_LAYER_ETHERNET;
@@ -1967,14 +2502,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
-
if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
sqd_event = 1;
else
sqd_event = 0;
- if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ if (!ucontext &&
+ cur_state == IB_QPS_RESET &&
+ new_state == IB_QPS_INIT)
context->rlkey_roce_mode |= (1 << 4);
/*
@@ -1983,21 +2519,28 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
* headroom is stamped so that the hardware doesn't start
* processing stale work requests.
*/
- if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ if (!ucontext &&
+ cur_state == IB_QPS_RESET &&
+ new_state == IB_QPS_INIT) {
struct mlx4_wqe_ctrl_seg *ctrl;
int i;
for (i = 0; i < qp->sq.wqe_cnt; ++i) {
ctrl = get_send_wqe(qp, i);
ctrl->owner_opcode = cpu_to_be32(1 << 31);
- if (qp->sq_max_wqes_per_wr == 1)
- ctrl->qpn_vlan.fence_size =
- 1 << (qp->sq.wqe_shift - 4);
-
- stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
+ ctrl->qpn_vlan.fence_size =
+ 1 << (qp->sq.wqe_shift - 4);
+ stamp_send_wqe(qp, i);
}
}
+ if (rwq_ind_tbl &&
+ cur_state == IB_QPS_RESET &&
+ new_state == IB_QPS_INIT) {
+ fill_qp_rss_context(context, qp);
+ context->flags |= cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET);
+ }
+
err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
to_mlx4_state(new_state), context, optpar,
sqd_event, &qp->mqp);
@@ -2018,7 +2561,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
qp->alt_port = attr->alt_port_num;
if (is_sqp(dev, qp))
- store_sqp_attrs(to_msqp(qp), attr, attr_mask);
+ store_sqp_attrs(qp->sqp, attr, attr_mask);
/*
* If we moved QP0 to RTR, bring the IB link up; if we moved
@@ -2040,9 +2583,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
* entries and reinitialize the QP.
*/
if (new_state == IB_QPS_RESET) {
- if (!ibqp->uobject) {
+ if (!ucontext) {
mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
- ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+ ibsrq ? to_msrq(ibsrq) : NULL);
if (send_cq != recv_cq)
mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
@@ -2153,6 +2696,11 @@ out:
return err;
}
+enum {
+ MLX4_IB_MODIFY_QP_RSS_SUP_ATTR_MSK = (IB_QP_STATE |
+ IB_QP_PORT),
+};
+
static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
@@ -2160,21 +2708,13 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx4_ib_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
- int ll;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (cur_state == new_state && cur_state == IB_QPS_RESET) {
- ll = IB_LINK_LAYER_UNSPECIFIED;
- } else {
- int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- ll = rdma_port_get_link_layer(&dev->ib_dev, port);
- }
-
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
- attr_mask, ll)) {
+ attr_mask)) {
pr_debug("qpn 0x%x: invalid attribute mask specified "
"for transition %d to %d. qp_type %d,"
" attr_mask 0x%x\n",
@@ -2183,6 +2723,27 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
+ if (ibqp->rwq_ind_tbl) {
+ if (!(((cur_state == IB_QPS_RESET) &&
+ (new_state == IB_QPS_INIT)) ||
+ ((cur_state == IB_QPS_INIT) &&
+ (new_state == IB_QPS_RTR)))) {
+ pr_debug("qpn 0x%x: RSS QP unsupported transition %d to %d\n",
+ ibqp->qp_num, cur_state, new_state);
+
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (attr_mask & ~MLX4_IB_MODIFY_QP_RSS_SUP_ATTR_MSK) {
+ pr_debug("qpn 0x%x: RSS QP unsupported attribute mask 0x%x for transition %d to %d\n",
+ ibqp->qp_num, attr_mask, cur_state, new_state);
+
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ }
+
if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) {
if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) {
if ((ibqp->qp_type == IB_QPT_RC) ||
@@ -2247,7 +2808,18 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
- err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+ if (ibqp->rwq_ind_tbl && (new_state == IB_QPS_INIT)) {
+ err = bringup_rss_rwqs(ibqp->rwq_ind_tbl, attr->port_num,
+ udata);
+ if (err)
+ goto out;
+ }
+
+ err = __mlx4_ib_modify_qp(ibqp, MLX4_IB_QP_SRC, attr, attr_mask,
+ cur_state, new_state, udata);
+
+ if (ibqp->rwq_ind_tbl && err)
+ bring_down_rss_rwqs(ibqp->rwq_ind_tbl, udata);
if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT))
attr->port_num = 1;
@@ -2263,10 +2835,13 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
int ret;
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
- struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+ struct mlx4_ib_sqp *sqp = mqp->sqp;
int err = 0;
if (sqp->roce_v2_gsi)
@@ -2282,21 +2857,22 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
{
int i;
for (i = 0; i < dev->caps.num_ports; i++) {
- if (qpn == dev->caps.qp0_proxy[i] ||
- qpn == dev->caps.qp0_tunnel[i]) {
- *qkey = dev->caps.qp0_qkey[i];
+ if (qpn == dev->caps.spec_qps[i].qp0_proxy ||
+ qpn == dev->caps.spec_qps[i].qp0_tunnel) {
+ *qkey = dev->caps.spec_qps[i].qp0_qkey;
return 0;
}
}
return -EINVAL;
}
-static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
- struct ib_ud_wr *wr,
+static int build_sriov_qp0_header(struct mlx4_ib_qp *qp,
+ const struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
- struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
- struct ib_device *ib_dev = &mdev->ib_dev;
+ struct mlx4_ib_dev *mdev = to_mdev(qp->ibqp.device);
+ struct mlx4_ib_sqp *sqp = qp->sqp;
+ struct ib_device *ib_dev = qp->ibqp.device;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
struct mlx4_ib_ah *ah = to_mah(wr->ah);
@@ -2305,6 +2881,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
int send_size;
int header_size;
int spc;
+ int err;
int i;
if (wr->wr.opcode != IB_WR_SEND)
@@ -2317,12 +2894,12 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
/* for proxy-qp0 sends, need to add in size of tunnel header */
/* for tunnel-qp0 sends, tunnel header is already in s/g list */
- if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
send_size += sizeof (struct mlx4_ib_tunnel_header);
ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header);
- if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
sqp->ud_header.lrh.service_level =
be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
sqp->ud_header.lrh.destination_lid =
@@ -2339,24 +2916,26 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
sqp->ud_header.lrh.virtual_lane = 0;
sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
- ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
+ err = ib_get_cached_pkey(ib_dev, qp->port, 0, &pkey);
+ if (err)
+ return err;
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
- if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
else
sqp->ud_header.bth.destination_qpn =
- cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
+ cpu_to_be32(mdev->dev->caps.spec_qps[qp->port - 1].qp0_tunnel);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
if (mlx4_is_master(mdev->dev)) {
- if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ if (mlx4_get_parav_qkey(mdev->dev, qp->mqp.qpn, &qkey))
return -EINVAL;
} else {
- if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+ if (vf_get_qp0_qkey(mdev->dev, qp->mqp.qpn, &qkey))
return -EINVAL;
}
sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
- sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->mqp.qpn);
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
sqp->ud_header.immediate_present = 0;
@@ -2420,11 +2999,32 @@ static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num)
return vl;
}
+static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num,
+ int index, union ib_gid *gid,
+ enum ib_gid_type *gid_type)
+{
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+ struct mlx4_port_gid_table *port_gid_table;
+ unsigned long flags;
+
+ port_gid_table = &iboe->gids[port_num - 1];
+ spin_lock_irqsave(&iboe->lock, flags);
+ memcpy(gid, &port_gid_table->gids[index].gid, sizeof(*gid));
+ *gid_type = port_gid_table->gids[index].gid_type;
+ spin_unlock_irqrestore(&iboe->lock, flags);
+ if (rdma_is_zero_gid(gid))
+ return -ENOENT;
+
+ return 0;
+}
+
#define MLX4_ROCEV2_QP1_SPORT 0xC000
-static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
+static int build_mlx_header(struct mlx4_ib_qp *qp, const struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
- struct ib_device *ib_dev = sqp->qp.ibqp.device;
+ struct mlx4_ib_sqp *sqp = qp->sqp;
+ struct ib_device *ib_dev = qp->ibqp.device;
+ struct mlx4_ib_dev *ibdev = to_mdev(ib_dev);
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_ctrl_seg *ctrl = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
@@ -2447,11 +3047,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
for (i = 0; i < wr->wr.num_sge; ++i)
send_size += wr->wr.sg_list[i].length;
- is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
+ is_eth = rdma_port_get_link_layer(qp->ibqp.device, qp->port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
if (is_eth) {
- struct ib_gid_attr gid_attr;
-
+ enum ib_gid_type gid_type;
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
@@ -2462,18 +3061,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
if (err)
return err;
} else {
- err = ib_get_cached_gid(ib_dev,
- be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index, &sgid,
- &gid_attr);
+ err = fill_gid_by_hw_index(ibdev, qp->port,
+ ah->av.ib.gid_index, &sgid,
+ &gid_type);
if (!err) {
- if (gid_attr.ndev)
- dev_put(gid_attr.ndev);
- if (!memcmp(&sgid, &zgid, sizeof(sgid)))
- err = -ENOENT;
- }
- if (!err) {
- is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+ is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
if (is_udp) {
if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
ip_version = 4;
@@ -2487,7 +3079,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
}
if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
- is_vlan = 1;
+ is_vlan = true;
}
}
err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh,
@@ -2516,18 +3108,21 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
* indexes don't necessarily match the hw ones, so
* we must use our own cache
*/
- sqp->ud_header.grh.source_gid.global.subnet_prefix =
- cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov.
- demux[sqp->qp.port - 1].
- subnet_prefix)));
- sqp->ud_header.grh.source_gid.global.interface_id =
- to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
- guid_cache[ah->av.ib.gid_index];
+ sqp->ud_header.grh.source_gid.global
+ .subnet_prefix =
+ cpu_to_be64(atomic64_read(
+ &(to_mdev(ib_dev)
+ ->sriov
+ .demux[qp->port - 1]
+ .subnet_prefix)));
+ sqp->ud_header.grh.source_gid.global
+ .interface_id =
+ to_mdev(ib_dev)
+ ->sriov.demux[qp->port - 1]
+ .guid_cache[ah->av.ib.gid_index];
} else {
- ib_get_cached_gid(ib_dev,
- be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index,
- &sqp->ud_header.grh.source_gid, NULL);
+ sqp->ud_header.grh.source_gid =
+ ah->ibah.sgid_attr->gid;
}
}
memcpy(sqp->ud_header.grh.destination_gid.raw,
@@ -2556,10 +3151,13 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
if (!is_eth) {
- mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
- (sqp->ud_header.lrh.destination_lid ==
- IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
- (sqp->ud_header.lrh.service_level << 8));
+ mlx->flags |=
+ cpu_to_be32((!qp->ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
+ (sqp->ud_header.lrh.destination_lid ==
+ IB_LID_PERMISSIVE ?
+ MLX4_WQE_MLX_SLR :
+ 0) |
+ (sqp->ud_header.lrh.service_level << 8));
if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
mlx->flags |= cpu_to_be32(0x1); /* force loopback */
mlx->rlid = sqp->ud_header.lrh.destination_lid;
@@ -2580,7 +3178,6 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
}
if (is_eth) {
- struct in6_addr in6;
u16 ether_type;
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
@@ -2590,11 +3187,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
mlx->sched_prio = cpu_to_be16(pcp);
ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac);
- memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
+ ether_addr_copy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac);
memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
- memcpy(&in6, sgid.raw, sizeof(in6));
-
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
@@ -2605,26 +3200,33 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
}
} else {
- sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 :
- sl_to_vl(to_mdev(ib_dev),
- sqp->ud_header.lrh.service_level,
- sqp->qp.port);
- if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15)
+ sqp->ud_header.lrh.virtual_lane =
+ !qp->ibqp.qp_num ?
+ 15 :
+ sl_to_vl(to_mdev(ib_dev),
+ sqp->ud_header.lrh.service_level,
+ qp->port);
+ if (qp->ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15)
return -EINVAL;
if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
}
sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
- if (!sqp->qp.ibqp.qp_num)
- ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
+ if (!qp->ibqp.qp_num)
+ err = ib_get_cached_pkey(ib_dev, qp->port, sqp->pkey_index,
+ &pkey);
else
- ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey);
+ err = ib_get_cached_pkey(ib_dev, qp->port, wr->pkey_index,
+ &pkey);
+ if (err)
+ return err;
+
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
sqp->qkey : wr->remote_qkey);
- sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num);
header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
@@ -2712,7 +3314,7 @@ static __be32 convert_access(int acc)
}
static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
- struct ib_reg_wr *wr)
+ const struct ib_reg_wr *wr)
{
struct mlx4_ib_mr *mr = to_mmr(wr->mr);
@@ -2742,7 +3344,7 @@ static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
}
static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg,
- struct ib_atomic_wr *wr)
+ const struct ib_atomic_wr *wr)
{
if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
aseg->swap_add = cpu_to_be64(wr->swap);
@@ -2758,7 +3360,7 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg,
}
static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
- struct ib_atomic_wr *wr)
+ const struct ib_atomic_wr *wr)
{
aseg->swap_add = cpu_to_be64(wr->swap);
aseg->swap_add_mask = cpu_to_be64(wr->swap_mask);
@@ -2767,7 +3369,7 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
}
static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
- struct ib_ud_wr *wr)
+ const struct ib_ud_wr *wr)
{
memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av));
dseg->dqpn = cpu_to_be32(wr->remote_qpn);
@@ -2778,7 +3380,7 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
struct mlx4_wqe_datagram_seg *dseg,
- struct ib_ud_wr *wr,
+ const struct ib_ud_wr *wr,
enum mlx4_ib_qp_type qpt)
{
union mlx4_ext_av *av = &to_mah(wr->ah)->av;
@@ -2793,14 +3395,15 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
if (qpt == MLX4_IB_QPT_PROXY_GSI)
- dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+ dseg->dqpn = cpu_to_be32(dev->dev->caps.spec_qps[port - 1].qp1_tunnel);
else
- dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]);
+ dseg->dqpn = cpu_to_be32(dev->dev->caps.spec_qps[port - 1].qp0_tunnel);
/* Use QKEY from the QP context, which is set by master */
dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
}
-static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len)
+static void build_tunnel_header(const struct ib_ud_wr *wr, void *wqe,
+ unsigned *mlx_seg_len)
{
struct mlx4_wqe_inline_seg *inl = wqe;
struct mlx4_ib_tunnel_header hdr;
@@ -2883,9 +3486,9 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
dseg->addr = cpu_to_be64(sg->addr);
}
-static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr,
- struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
- __be32 *lso_hdr_sz, __be32 *blh)
+static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe,
+ const struct ib_ud_wr *wr, struct mlx4_ib_qp *qp,
+ unsigned *lso_seg_len, __be32 *lso_hdr_sz, __be32 *blh)
{
unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16);
@@ -2903,7 +3506,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr,
return 0;
}
-static __be32 send_ieth(struct ib_send_wr *wr)
+static __be32 send_ieth(const struct ib_send_wr *wr)
{
switch (wr->opcode) {
case IB_WR_SEND_WITH_IMM:
@@ -2925,8 +3528,8 @@ static void add_zero_len_inline(void *wqe)
inl->byte_count = cpu_to_be32(1 << 31);
}
-int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
+static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr, bool drain)
{
struct mlx4_ib_qp *qp = to_mqp(ibqp);
void *wqe;
@@ -2936,41 +3539,37 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int nreq;
int err = 0;
unsigned ind;
- int uninitialized_var(stamp);
- int uninitialized_var(size);
- unsigned uninitialized_var(seglen);
+ int size;
+ unsigned seglen;
__be32 dummy;
__be32 *lso_wqe;
- __be32 uninitialized_var(lso_hdr_sz);
+ __be32 lso_hdr_sz;
__be32 blh;
int i;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
- struct mlx4_ib_sqp *sqp = to_msqp(qp);
+ struct mlx4_ib_sqp *sqp = qp->sqp;
if (sqp->roce_v2_gsi) {
struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
- struct ib_gid_attr gid_attr;
+ enum ib_gid_type gid_type;
union ib_gid gid;
- if (!ib_get_cached_gid(ibqp->device,
- be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index, &gid,
- &gid_attr)) {
- if (gid_attr.ndev)
- dev_put(gid_attr.ndev);
- qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
- to_mqp(sqp->roce_v2_gsi) : qp;
- } else {
+ if (!fill_gid_by_hw_index(mdev, qp->port,
+ ah->av.ib.gid_index,
+ &gid, &gid_type))
+ qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
+ to_mqp(sqp->roce_v2_gsi) : qp;
+ else
pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
ah->av.ib.gid_index);
- }
}
}
spin_lock_irqsave(&qp->sq.lock, flags);
- if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR &&
+ !drain) {
err = -EIO;
*bad_wr = wr;
nreq = 0;
@@ -3077,8 +3676,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case MLX4_IB_QPT_TUN_SMI_OWNER:
- err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
- ctrl, &seglen);
+ err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl,
+ &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -3114,8 +3713,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case MLX4_IB_QPT_PROXY_SMI_OWNER:
- err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
- ctrl, &seglen);
+ err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl,
+ &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -3148,8 +3747,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case MLX4_IB_QPT_SMI:
case MLX4_IB_QPT_GSI:
- err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl,
- &seglen);
+ err = build_mlx_header(qp, ud_wr(wr), ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
@@ -3212,22 +3810,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
- stamp = ind + qp->sq_spare_wqes;
- ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
-
/*
* We can improve latency by not stamping the last
* send queue WQE until after ringing the doorbell, so
* only stamp here if there are still more WQEs to post.
- *
- * Same optimization applies to padding with NOP wqe
- * in case of WQE shrinking (used to prevent wrap-around
- * in the middle of WR).
*/
- if (wr->next) {
- stamp_send_wqe(qp, stamp, size * 16);
- ind = pad_wraparound(qp, ind);
- }
+ if (wr->next)
+ stamp_send_wqe(qp, ind + qp->sq_spare_wqes);
+ ind++;
}
out:
@@ -3240,18 +3830,11 @@ out:
*/
wmb();
- writel(qp->doorbell_qpn,
- to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
-
- /*
- * Make sure doorbells don't leak out of SQ spinlock
- * and reach the HCA out of order.
- */
- mmiowb();
+ writel_relaxed(qp->doorbell_qpn,
+ to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
- stamp_send_wqe(qp, stamp, size * 16);
+ stamp_send_wqe(qp, ind + qp->sq_spare_wqes - 1);
- ind = pad_wraparound(qp, ind);
qp->sq_next_wqe = ind;
}
@@ -3260,8 +3843,14 @@ out:
return err;
}
-int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+int mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ return _mlx4_ib_post_send(ibqp, wr, bad_wr, false);
+}
+
+static int _mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr, bool drain)
{
struct mlx4_ib_qp *qp = to_mqp(ibqp);
struct mlx4_wqe_data_seg *scat;
@@ -3276,7 +3865,8 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
max_gs = qp->rq.max_gs;
spin_lock_irqsave(&qp->rq.lock, flags);
- if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR &&
+ !drain) {
err = -EIO;
*bad_wr = wr;
nreq = 0;
@@ -3347,6 +3937,12 @@ out:
return err;
}
+int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ return _mlx4_ib_post_recv(ibqp, wr, bad_wr, false);
+}
+
static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)
{
switch (mlx4_state) {
@@ -3386,39 +3982,37 @@ static int to_ib_qp_access_flags(int mlx4_flags)
return ib_flags;
}
-static void to_ib_ah_attr(struct mlx4_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
- struct mlx4_qp_path *path)
+static void to_rdma_ah_attr(struct mlx4_ib_dev *ibdev,
+ struct rdma_ah_attr *ah_attr,
+ struct mlx4_qp_path *path)
{
struct mlx4_dev *dev = ibdev->dev;
- int is_eth;
+ u8 port_num = path->sched_queue & 0x40 ? 2 : 1;
- memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
- ib_ah_attr->port_num = path->sched_queue & 0x40 ? 2 : 1;
-
- if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ if (port_num == 0 || port_num > dev->caps.num_ports)
return;
+ ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num);
- is_eth = rdma_port_get_link_layer(&ibdev->ib_dev, ib_ah_attr->port_num) ==
- IB_LINK_LAYER_ETHERNET;
- if (is_eth)
- ib_ah_attr->sl = ((path->sched_queue >> 3) & 0x7) |
- ((path->sched_queue & 4) << 1);
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE)
+ rdma_ah_set_sl(ah_attr, ((path->sched_queue >> 3) & 0x7) |
+ ((path->sched_queue & 4) << 1));
else
- ib_ah_attr->sl = (path->sched_queue >> 2) & 0xf;
-
- ib_ah_attr->dlid = be16_to_cpu(path->rlid);
- ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
- ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
- ib_ah_attr->ah_flags = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
- if (ib_ah_attr->ah_flags) {
- ib_ah_attr->grh.sgid_index = path->mgid_index;
- ib_ah_attr->grh.hop_limit = path->hop_limit;
- ib_ah_attr->grh.traffic_class =
- (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
- ib_ah_attr->grh.flow_label =
- be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
- memcpy(ib_ah_attr->grh.dgid.raw,
- path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
+ rdma_ah_set_sl(ah_attr, (path->sched_queue >> 2) & 0xf);
+ rdma_ah_set_port_num(ah_attr, port_num);
+
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(path->rlid));
+ rdma_ah_set_path_bits(ah_attr, path->grh_mylmc & 0x7f);
+ rdma_ah_set_static_rate(ah_attr,
+ path->static_rate ? path->static_rate - 5 : 0);
+ if (path->grh_mylmc & (1 << 7)) {
+ rdma_ah_set_grh(ah_attr, NULL,
+ be32_to_cpu(path->tclass_flowlabel) & 0xfffff,
+ path->mgid_index,
+ path->hop_limit,
+ (be32_to_cpu(path->tclass_flowlabel)
+ >> 20) & 0xff);
+ rdma_ah_set_dgid_raw(ah_attr, path->rgid);
}
}
@@ -3431,6 +4025,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
int mlx4_state;
int err = 0;
+ if (ibqp->rwq_ind_tbl)
+ return -EOPNOTSUPP;
+
mutex_lock(&qp->mutex);
if (qp->state == IB_QPS_RESET) {
@@ -3458,11 +4055,14 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
qp_attr->qp_access_flags =
to_ib_qp_access_flags(be32_to_cpu(context.params2));
- if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
- to_ib_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
- to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC ||
+ qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ qp->ibqp.qp_type == IB_QPT_XRC_TGT) {
+ to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context.pri_path);
+ to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context.alt_path);
qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
- qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
+ qp_attr->alt_port_num =
+ rdma_ah_get_port_num(&qp_attr->alt_ah_attr);
}
qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
@@ -3525,3 +4125,408 @@ out:
return err;
}
+struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx4_dev *dev = to_mdev(pd->device)->dev;
+ struct ib_qp_init_attr ib_qp_init_attr = {};
+ struct mlx4_ib_qp *qp;
+ struct mlx4_ib_create_wq ucmd;
+ int err, required_cmd_sz;
+
+ if (!udata)
+ return ERR_PTR(-EINVAL);
+
+ required_cmd_sz = offsetof(typeof(ucmd), comp_mask) +
+ sizeof(ucmd.comp_mask);
+ if (udata->inlen < required_cmd_sz) {
+ pr_debug("invalid inlen\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd))) {
+ pr_debug("inlen is not supported\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (udata->outlen)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (init_attr->wq_type != IB_WQT_RQ) {
+ pr_debug("unsupported wq type %d\n", init_attr->wq_type);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (init_attr->create_flags & ~IB_WQ_FLAGS_SCATTER_FCS ||
+ !(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
+ pr_debug("unsupported create_flags %u\n",
+ init_attr->create_flags);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&qp->mutex);
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
+
+ ib_qp_init_attr.qp_context = init_attr->wq_context;
+ ib_qp_init_attr.qp_type = IB_QPT_RAW_PACKET;
+ ib_qp_init_attr.cap.max_recv_wr = init_attr->max_wr;
+ ib_qp_init_attr.cap.max_recv_sge = init_attr->max_sge;
+ ib_qp_init_attr.recv_cq = init_attr->cq;
+ ib_qp_init_attr.send_cq = ib_qp_init_attr.recv_cq; /* Dummy CQ */
+
+ if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS)
+ ib_qp_init_attr.create_flags |= IB_QP_CREATE_SCATTER_FCS;
+
+ err = create_rq(pd, &ib_qp_init_attr, udata, qp);
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ qp->ibwq.event_handler = init_attr->event_handler;
+ qp->ibwq.wq_num = qp->mqp.qpn;
+ qp->ibwq.state = IB_WQS_RESET;
+
+ return &qp->ibwq;
+}
+
+static int ib_wq2qp_state(enum ib_wq_state state)
+{
+ switch (state) {
+ case IB_WQS_RESET:
+ return IB_QPS_RESET;
+ case IB_WQS_RDY:
+ return IB_QPS_RTR;
+ default:
+ return IB_QPS_ERR;
+ }
+}
+
+static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq);
+ enum ib_qp_state qp_cur_state;
+ enum ib_qp_state qp_new_state;
+ int attr_mask;
+ int err;
+
+ /* ib_qp.state represents the WQ HW state while ib_wq.state represents
+ * the WQ logic state.
+ */
+ qp_cur_state = qp->state;
+ qp_new_state = ib_wq2qp_state(new_state);
+
+ if (ib_wq2qp_state(new_state) == qp_cur_state)
+ return 0;
+
+ if (new_state == IB_WQS_RDY) {
+ struct ib_qp_attr attr = {};
+
+ attr.port_num = qp->port;
+ attr_mask = IB_QP_PORT;
+
+ err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, &attr,
+ attr_mask, IB_QPS_RESET, IB_QPS_INIT,
+ udata);
+ if (err) {
+ pr_debug("WQN=0x%06x failed to apply RST->INIT on the HW QP\n",
+ ibwq->wq_num);
+ return err;
+ }
+
+ qp_cur_state = IB_QPS_INIT;
+ }
+
+ attr_mask = 0;
+ err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL, attr_mask,
+ qp_cur_state, qp_new_state, udata);
+
+ if (err && (qp_cur_state == IB_QPS_INIT)) {
+ qp_new_state = IB_QPS_RESET;
+ if (__mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL,
+ attr_mask, IB_QPS_INIT, IB_QPS_RESET,
+ udata)) {
+ pr_warn("WQN=0x%06x failed with reverting HW's resources failure\n",
+ ibwq->wq_num);
+ qp_new_state = IB_QPS_INIT;
+ }
+ }
+
+ qp->state = qp_new_state;
+
+ return err;
+}
+
+int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata)
+{
+ struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq);
+ struct mlx4_ib_modify_wq ucmd = {};
+ size_t required_cmd_sz;
+ enum ib_wq_state cur_state, new_state;
+ int err = 0;
+
+ required_cmd_sz = offsetof(typeof(ucmd), reserved) +
+ sizeof(ucmd.reserved);
+ if (udata->inlen < required_cmd_sz)
+ return -EINVAL;
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd)))
+ return -EOPNOTSUPP;
+
+ if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)))
+ return -EFAULT;
+
+ if (ucmd.comp_mask || ucmd.reserved)
+ return -EOPNOTSUPP;
+
+ if (wq_attr_mask & IB_WQ_FLAGS)
+ return -EOPNOTSUPP;
+
+ cur_state = wq_attr->curr_wq_state;
+ new_state = wq_attr->wq_state;
+
+ if ((new_state == IB_WQS_RDY) && (cur_state == IB_WQS_ERR))
+ return -EINVAL;
+
+ if ((new_state == IB_WQS_ERR) && (cur_state == IB_WQS_RESET))
+ return -EINVAL;
+
+ /* Need to protect against the parent RSS which also may modify WQ
+ * state.
+ */
+ mutex_lock(&qp->mutex);
+
+ /* Can update HW state only if a RSS QP has already associated to this
+ * WQ, so we can apply its port on the WQ.
+ */
+ if (qp->rss_usecnt)
+ err = _mlx4_ib_modify_wq(ibwq, new_state, udata);
+
+ if (!err)
+ ibwq->state = new_state;
+
+ mutex_unlock(&qp->mutex);
+
+ return err;
+}
+
+int mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibwq->device);
+ struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq);
+
+ if (qp->counter_index)
+ mlx4_ib_free_qp_counter(dev, qp);
+
+ destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, udata);
+
+ kfree(qp);
+ return 0;
+}
+
+int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_create_rwq_ind_tbl_resp resp = {};
+ unsigned int ind_tbl_size = 1 << init_attr->log_ind_tbl_size;
+ struct ib_device *device = rwq_ind_table->device;
+ unsigned int base_wqn;
+ size_t min_resp_len;
+ int i, err = 0;
+
+ if (udata->inlen > 0 &&
+ !ib_is_udata_cleared(udata, 0,
+ udata->inlen))
+ return -EOPNOTSUPP;
+
+ min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+ if (udata->outlen && udata->outlen < min_resp_len)
+ return -EINVAL;
+
+ if (ind_tbl_size >
+ device->attrs.rss_caps.max_rwq_indirection_table_size) {
+ pr_debug("log_ind_tbl_size = %d is bigger than supported = %d\n",
+ ind_tbl_size,
+ device->attrs.rss_caps.max_rwq_indirection_table_size);
+ return -EINVAL;
+ }
+
+ base_wqn = init_attr->ind_tbl[0]->wq_num;
+
+ if (base_wqn % ind_tbl_size) {
+ pr_debug("WQN=0x%x isn't aligned with indirection table size\n",
+ base_wqn);
+ return -EINVAL;
+ }
+
+ for (i = 1; i < ind_tbl_size; i++) {
+ if (++base_wqn != init_attr->ind_tbl[i]->wq_num) {
+ pr_debug("indirection table's WQNs aren't consecutive\n");
+ return -EINVAL;
+ }
+ }
+
+ if (udata->outlen) {
+ resp.response_length = offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length);
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
+ }
+
+ return err;
+}
+
+struct mlx4_ib_drain_cqe {
+ struct ib_cqe cqe;
+ struct completion done;
+};
+
+static void mlx4_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct mlx4_ib_drain_cqe *cqe = container_of(wc->wr_cqe,
+ struct mlx4_ib_drain_cqe,
+ cqe);
+
+ complete(&cqe->done);
+}
+
+/* This function returns only once the drained WR was completed */
+static void handle_drain_completion(struct ib_cq *cq,
+ struct mlx4_ib_drain_cqe *sdrain,
+ struct mlx4_ib_dev *dev)
+{
+ struct mlx4_dev *mdev = dev->dev;
+
+ if (cq->poll_ctx == IB_POLL_DIRECT) {
+ while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ return;
+ }
+
+ if (mdev->persist->state == MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ struct mlx4_ib_cq *mcq = to_mcq(cq);
+ bool triggered = false;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ /* Make sure that the CQ handler won't run if wasn't run yet */
+ if (!mcq->mcq.reset_notify_added)
+ mcq->mcq.reset_notify_added = 1;
+ else
+ triggered = true;
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+
+ if (triggered) {
+ /* Wait for any scheduled/running task to be ended */
+ switch (cq->poll_ctx) {
+ case IB_POLL_SOFTIRQ:
+ irq_poll_disable(&cq->iop);
+ irq_poll_enable(&cq->iop);
+ break;
+ case IB_POLL_WORKQUEUE:
+ cancel_work_sync(&cq->work);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+ }
+
+ /* Run the CQ handler - this makes sure that the drain WR will
+ * be processed if wasn't processed yet.
+ */
+ mcq->mcq.comp(&mcq->mcq);
+ }
+
+ wait_for_completion(&sdrain->done);
+}
+
+void mlx4_ib_drain_sq(struct ib_qp *qp)
+{
+ struct ib_cq *cq = qp->send_cq;
+ struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ struct mlx4_ib_drain_cqe sdrain;
+ const struct ib_send_wr *bad_swr;
+ struct ib_rdma_wr swr = {
+ .wr = {
+ .next = NULL,
+ { .wr_cqe = &sdrain.cqe, },
+ .opcode = IB_WR_RDMA_WRITE,
+ },
+ };
+ int ret;
+ struct mlx4_ib_dev *dev = to_mdev(qp->device);
+ struct mlx4_dev *mdev = dev->dev;
+
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+ return;
+ }
+
+ sdrain.cqe.done = mlx4_ib_drain_qp_done;
+ init_completion(&sdrain.done);
+
+ ret = _mlx4_ib_post_send(qp, &swr.wr, &bad_swr, true);
+ if (ret) {
+ WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+ return;
+ }
+
+ handle_drain_completion(cq, &sdrain, dev);
+}
+
+void mlx4_ib_drain_rq(struct ib_qp *qp)
+{
+ struct ib_cq *cq = qp->recv_cq;
+ struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ struct mlx4_ib_drain_cqe rdrain;
+ struct ib_recv_wr rwr = {};
+ const struct ib_recv_wr *bad_rwr;
+ int ret;
+ struct mlx4_ib_dev *dev = to_mdev(qp->device);
+ struct mlx4_dev *mdev = dev->dev;
+
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+ return;
+ }
+
+ rwr.wr_cqe = &rdrain.cqe;
+ rdrain.cqe.done = mlx4_ib_drain_qp_done;
+ init_completion(&rdrain.done);
+
+ ret = _mlx4_ib_post_recv(qp, &rwr, &bad_rwr, true);
+ if (ret) {
+ WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+ return;
+ }
+
+ handle_drain_completion(cq, &rdrain, dev);
+}
+
+int mlx4_ib_qp_event_init(void)
+{
+ mlx4_ib_qp_event_wq = alloc_ordered_workqueue("mlx4_ib_qp_event_wq", 0);
+ if (!mlx4_ib_qp_event_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx4_ib_qp_event_cleanup(void)
+{
+ destroy_workqueue(mlx4_ib_qp_event_wq);
+}
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 7dd3f267f06b..c4cf91235eee 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -34,10 +34,10 @@
#include <linux/mlx4/qp.h>
#include <linux/mlx4/srq.h>
#include <linux/slab.h>
-#include <linux/vmalloc.h>
#include "mlx4_ib.h"
#include <rdma/mlx4-abi.h>
+#include <rdma/uverbs_ioctl.h>
static void *get_wqe(struct mlx4_ib_srq *srq, int n)
{
@@ -69,12 +69,14 @@ static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
}
}
-struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata)
+int mlx4_ib_create_srq(struct ib_srq *ib_srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx4_ib_dev *dev = to_mdev(pd->device);
- struct mlx4_ib_srq *srq;
+ struct mlx4_ib_dev *dev = to_mdev(ib_srq->device);
+ struct mlx4_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mlx4_ib_ucontext, ibucontext);
+ struct mlx4_ib_srq *srq = to_msrq(ib_srq);
struct mlx4_wqe_srq_next_seg *next;
struct mlx4_wqe_data_seg *scatter;
u32 cqn;
@@ -84,14 +86,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
int err;
int i;
+ if (init_attr->srq_type != IB_SRQT_BASIC &&
+ init_attr->srq_type != IB_SRQT_XRC)
+ return -EOPNOTSUPP;
+
/* Sanity check SRQ size before proceeding */
if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes ||
init_attr->attr.max_sge > dev->dev->caps.max_srq_sge)
- return ERR_PTR(-EINVAL);
-
- srq = kmalloc(sizeof *srq, GFP_KERNEL);
- if (!srq)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
mutex_init(&srq->mutex);
spin_lock_init(&srq->lock);
@@ -106,23 +108,20 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
buf_size = srq->msrq.max * desc_size;
- if (pd->uobject) {
+ if (udata) {
struct mlx4_ib_create_srq ucmd;
- if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
- err = -EFAULT;
- goto err_srq;
- }
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+ return -EFAULT;
- srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- buf_size, 0, 0);
- if (IS_ERR(srq->umem)) {
- err = PTR_ERR(srq->umem);
- goto err_srq;
- }
+ srq->umem =
+ ib_umem_get(ib_srq->device, ucmd.buf_addr, buf_size, 0);
+ if (IS_ERR(srq->umem))
+ return PTR_ERR(srq->umem);
- err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
- ilog2(srq->umem->page_size), &srq->mtt);
+ err = mlx4_mtt_init(
+ dev->dev, ib_umem_num_dma_blocks(srq->umem, PAGE_SIZE),
+ PAGE_SHIFT, &srq->mtt);
if (err)
goto err_buf;
@@ -130,19 +129,18 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_mtt;
- err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
- ucmd.db_addr, &srq->db);
+ err = mlx4_ib_db_map_user(udata, ucmd.db_addr, &srq->db);
if (err)
goto err_mtt;
} else {
- err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL);
+ err = mlx4_db_alloc(dev->dev, &srq->db, 0);
if (err)
- goto err_srq;
+ return err;
*srq->db.db = 0;
- if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf,
- GFP_KERNEL)) {
+ if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2,
+ &srq->buf)) {
err = -ENOMEM;
goto err_db;
}
@@ -167,36 +165,32 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_buf;
- err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL);
+ err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
if (err)
goto err_mtt;
- srq->wrid = kmalloc_array(srq->msrq.max, sizeof(u64),
- GFP_KERNEL | __GFP_NOWARN);
+ srq->wrid = kvmalloc_array(srq->msrq.max,
+ sizeof(u64), GFP_KERNEL);
if (!srq->wrid) {
- srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
- GFP_KERNEL, PAGE_KERNEL);
- if (!srq->wrid) {
- err = -ENOMEM;
- goto err_mtt;
- }
+ err = -ENOMEM;
+ goto err_mtt;
}
}
- cqn = (init_attr->srq_type == IB_SRQT_XRC) ?
- to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0;
+ cqn = ib_srq_has_cq(init_attr->srq_type) ?
+ to_mcq(init_attr->ext.cq)->mcq.cqn : 0;
xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn :
(u16) dev->dev->caps.reserved_xrcds;
- err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt,
- srq->db.dma, &srq->msrq);
+ err = mlx4_srq_alloc(dev->dev, to_mpd(ib_srq->pd)->pdn, cqn, xrcdn,
+ &srq->mtt, srq->db.dma, &srq->msrq);
if (err)
goto err_wrid;
srq->msrq.event = mlx4_ib_srq_event;
srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
- if (pd->uobject)
+ if (udata)
if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
err = -EFAULT;
goto err_wrid;
@@ -204,11 +198,11 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
init_attr->attr.max_wr = srq->msrq.max - 1;
- return &srq->ibsrq;
+ return 0;
err_wrid:
- if (pd->uobject)
- mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
+ if (udata)
+ mlx4_ib_db_unmap_user(ucontext, &srq->db);
else
kvfree(srq->wrid);
@@ -216,19 +210,15 @@ err_mtt:
mlx4_mtt_cleanup(dev->dev, &srq->mtt);
err_buf:
- if (pd->uobject)
- ib_umem_release(srq->umem);
- else
+ if (!srq->umem)
mlx4_buf_free(dev->dev, buf_size, &srq->buf);
+ ib_umem_release(srq->umem);
err_db:
- if (!pd->uobject)
+ if (!udata)
mlx4_db_free(dev->dev, &srq->db);
-err_srq:
- kfree(srq);
-
- return ERR_PTR(err);
+ return err;
}
int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
@@ -275,7 +265,7 @@ int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
return 0;
}
-int mlx4_ib_destroy_srq(struct ib_srq *srq)
+int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(srq->device);
struct mlx4_ib_srq *msrq = to_msrq(srq);
@@ -283,18 +273,20 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq)
mlx4_srq_free(dev->dev, &msrq->msrq);
mlx4_mtt_cleanup(dev->dev, &msrq->mtt);
- if (srq->uobject) {
- mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
- ib_umem_release(msrq->umem);
+ if (udata) {
+ mlx4_ib_db_unmap_user(
+ rdma_udata_to_drv_context(
+ udata,
+ struct mlx4_ib_ucontext,
+ ibucontext),
+ &msrq->db);
} else {
kvfree(msrq->wrid);
mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
&msrq->buf);
mlx4_db_free(dev->dev, &msrq->db);
}
-
- kfree(msrq);
-
+ ib_umem_release(msrq->umem);
return 0;
}
@@ -312,8 +304,8 @@ void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
spin_unlock(&srq->lock);
}
-int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
{
struct mlx4_ib_srq *srq = to_msrq(ibsrq);
struct mlx4_wqe_srq_next_seg *next;
@@ -328,7 +320,6 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
err = -EIO;
*bad_wr = wr;
- nreq = 0;
goto out;
}
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 69fb5ba94d0f..88f534cf690e 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -56,7 +56,7 @@ static ssize_t show_admin_alias_guid(struct device *dev,
mlx4_ib_iov_dentry->entry_num,
port->num);
- return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val));
+ return sysfs_emit(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val));
}
/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
@@ -117,22 +117,24 @@ static ssize_t show_port_gid(struct device *dev,
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
union ib_gid gid;
- ssize_t ret;
+ int ret;
+ __be16 *raw;
ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num,
mlx4_ib_iov_dentry->entry_num, &gid, 1);
if (ret)
return ret;
- ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
- be16_to_cpu(((__be16 *) gid.raw)[0]),
- be16_to_cpu(((__be16 *) gid.raw)[1]),
- be16_to_cpu(((__be16 *) gid.raw)[2]),
- be16_to_cpu(((__be16 *) gid.raw)[3]),
- be16_to_cpu(((__be16 *) gid.raw)[4]),
- be16_to_cpu(((__be16 *) gid.raw)[5]),
- be16_to_cpu(((__be16 *) gid.raw)[6]),
- be16_to_cpu(((__be16 *) gid.raw)[7]));
- return ret;
+
+ raw = (__be16 *)gid.raw;
+ return sysfs_emit(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ be16_to_cpu(raw[0]),
+ be16_to_cpu(raw[1]),
+ be16_to_cpu(raw[2]),
+ be16_to_cpu(raw[3]),
+ be16_to_cpu(raw[4]),
+ be16_to_cpu(raw[5]),
+ be16_to_cpu(raw[6]),
+ be16_to_cpu(raw[7]));
}
static ssize_t show_phys_port_pkey(struct device *dev,
@@ -151,7 +153,7 @@ static ssize_t show_phys_port_pkey(struct device *dev,
if (ret)
return ret;
- return sprintf(buf, "0x%04x\n", pkey);
+ return sysfs_emit(buf, "0x%04x\n", pkey);
}
#define DENTRY_REMOVE(_dentry) \
@@ -221,11 +223,12 @@ void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
{
int i;
- char buff[10];
+ char buff[12];
struct mlx4_ib_iov_port *port = NULL;
int ret = 0 ;
struct ib_port_attr attr;
+ memset(&attr, 0, sizeof(attr));
/* get the physical gid and pkey table sizes.*/
ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
if (ret)
@@ -352,16 +355,12 @@ err:
static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
{
- char base_name[9];
-
- /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
- strlcpy(name, pci_name(dev->dev->persist->pdev), max);
- strncpy(base_name, name, 8); /*till xxxx:yy:*/
- base_name[8] = '\0';
- /* with no ARI only 3 last bits are used so when the fn is higher than 8
+ /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n
+ * with no ARI only 3 last bits are used so when the fn is higher than 8
* need to add it to the dev num, so count in the last number will be
* modulo 8 */
- sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8));
+ snprintf(name, max, "%.8s%.2d.%d", pci_name(dev->dev->persist->pdev),
+ i / 8, i % 8);
}
struct mlx4_port {
@@ -444,16 +443,12 @@ static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
- ssize_t ret = -ENODEV;
-
- if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >=
- (p->dev->dev->caps.pkey_table_len[p->port_num]))
- ret = sprintf(buf, "none\n");
- else
- ret = sprintf(buf, "%d\n",
- p->dev->pkeys.virt2phys_pkey[p->slave]
- [p->port_num - 1][tab_attr->index]);
- return ret;
+ struct pkey_mgt *m = &p->dev->pkeys;
+ u8 key = m->virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index];
+
+ if (key >= p->dev->dev->caps.pkey_table_len[p->port_num])
+ return sysfs_emit(buf, "none\n");
+ return sysfs_emit(buf, "%d\n", key);
}
static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
@@ -491,7 +486,7 @@ static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
static ssize_t show_port_gid_idx(struct mlx4_port *p,
struct port_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", p->slave);
+ return sysfs_emit(buf, "%d\n", p->slave);
}
static struct attribute **
@@ -545,14 +540,10 @@ static ssize_t sysfs_show_smi_enabled(struct device *dev,
{
struct mlx4_port *p =
container_of(attr, struct mlx4_port, smi_enabled);
- ssize_t len = 0;
-
- if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num))
- len = sprintf(buf, "%d\n", 1);
- else
- len = sprintf(buf, "%d\n", 0);
- return len;
+ return sysfs_emit(buf, "%d\n",
+ !!mlx4_vf_smi_enabled(p->dev->dev, p->slave,
+ p->port_num));
}
static ssize_t sysfs_show_enable_smi_admin(struct device *dev,
@@ -561,14 +552,10 @@ static ssize_t sysfs_show_enable_smi_admin(struct device *dev,
{
struct mlx4_port *p =
container_of(attr, struct mlx4_port, enable_smi_admin);
- ssize_t len = 0;
- if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num))
- len = sprintf(buf, "%d\n", 1);
- else
- len = sprintf(buf, "%d\n", 0);
-
- return len;
+ return sysfs_emit(buf, "%d\n",
+ !!mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave,
+ p->port_num));
}
static ssize_t sysfs_store_enable_smi_admin(struct device *dev,
@@ -811,15 +798,13 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
{
- int i;
+ unsigned int i;
int ret = 0;
if (!mlx4_is_master(dev->dev))
return 0;
- dev->iov_parent =
- kobject_create_and_add("iov",
- kobject_get(dev->ib_dev.ports_parent->parent));
+ dev->iov_parent = kobject_create_and_add("iov", &dev->ib_dev.dev.kobj);
if (!dev->iov_parent) {
ret = -ENOMEM;
goto err;
@@ -832,7 +817,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
goto err_ports;
}
- for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) {
+ rdma_for_each_port(&dev->ib_dev, i) {
ret = add_port_entries(dev, i);
if (ret)
goto err_add_entries;
@@ -849,7 +834,6 @@ err_add_entries:
err_ports:
kobject_put(dev->iov_parent);
err:
- kobject_put(dev->ib_dev.ports_parent->parent);
pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret);
return ret;
}
@@ -885,5 +869,4 @@ void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device)
kobject_put(device->ports_parent);
kobject_put(device->iov_parent);
kobject_put(device->iov_parent);
- kobject_put(device->ib_dev.ports_parent->parent);
}
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
index bce263b92821..ef1ff42eaec5 100644
--- a/drivers/infiniband/hw/mlx5/Kconfig
+++ b/drivers/infiniband/hw/mlx5/Kconfig
@@ -1,7 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
config MLX5_INFINIBAND
- tristate "Mellanox Connect-IB HCA support"
+ tristate "Mellanox 5th generation network adapters (ConnectX series) support"
depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
- ---help---
+ help
This driver provides low-level InfiniBand support for
Mellanox Connect-IB PCI Express host channel adapters (HCAs).
This is required to use InfiniBand protocols such as
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 7493a83acd28..dd7bb377f491 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,4 +1,33 @@
-obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
+
+mlx5_ib-y := ah.o \
+ cmd.o \
+ cong.o \
+ counters.o \
+ cq.o \
+ data_direct.o \
+ dm.o \
+ dmah.o \
+ doorbell.o \
+ fs.o \
+ gsi.o \
+ ib_virt.o \
+ mad.o \
+ main.o \
+ mem.o \
+ mr.o \
+ qp.o \
+ qpc.o \
+ restrack.o \
+ srq.o \
+ srq_cmd.o \
+ umr.o \
+ wr.o
-mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
+mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
+mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \
+ qos.o \
+ std_types.o
+mlx5_ib-$(CONFIG_MLX5_MACSEC) += macsec.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index d090e96f6f01..531a57f9ee7e 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -32,104 +32,123 @@
#include "mlx5_ib.h"
-static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
- struct mlx5_ib_ah *ah,
- struct ib_ah_attr *ah_attr,
- enum rdma_link_layer ll)
+static __be16 mlx5_ah_get_udp_sport(const struct mlx5_ib_dev *dev,
+ const struct rdma_ah_attr *ah_attr)
{
- if (ah_attr->ah_flags & IB_AH_GRH) {
- memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
- ah->av.grh_gid_fl = cpu_to_be32(ah_attr->grh.flow_label |
+ enum ib_gid_type gid_type = ah_attr->grh.sgid_attr->gid_type;
+ __be16 sport;
+
+ if ((gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) &&
+ (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) &&
+ (ah_attr->grh.flow_label & IB_GRH_FLOWLABEL_MASK))
+ sport = cpu_to_be16(
+ rdma_flow_label_to_udp_sport(ah_attr->grh.flow_label));
+ else
+ sport = mlx5_get_roce_udp_sport_min(dev,
+ ah_attr->grh.sgid_attr);
+
+ return sport;
+}
+
+static int create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
+ struct rdma_ah_init_attr *init_attr)
+{
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
+ enum ib_gid_type gid_type;
+ int rate_val;
+
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+
+ memcpy(ah->av.rgid, &grh->dgid, 16);
+ ah->av.grh_gid_fl = cpu_to_be32(grh->flow_label |
(1 << 30) |
- ah_attr->grh.sgid_index << 20);
- ah->av.hop_limit = ah_attr->grh.hop_limit;
- ah->av.tclass = ah_attr->grh.traffic_class;
+ grh->sgid_index << 20);
+ ah->av.hop_limit = grh->hop_limit;
+ ah->av.tclass = grh->traffic_class;
}
- ah->av.stat_rate_sl = (ah_attr->static_rate << 4);
-
- if (ll == IB_LINK_LAYER_ETHERNET) {
- memcpy(ah->av.rmac, ah_attr->dmac, sizeof(ah_attr->dmac));
- ah->av.udp_sport =
- mlx5_get_roce_udp_sport(dev,
- ah_attr->port_num,
- ah_attr->grh.sgid_index);
- ah->av.stat_rate_sl |= (ah_attr->sl & 0x7) << 1;
+ rate_val = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr));
+ if (rate_val < 0)
+ return rate_val;
+ ah->av.stat_rate_sl = rate_val << 4;
+
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ if (init_attr->xmit_slave)
+ ah->xmit_port =
+ mlx5_lag_get_slave_port(dev->mdev,
+ init_attr->xmit_slave);
+ gid_type = ah_attr->grh.sgid_attr->gid_type;
+
+ memcpy(ah->av.rmac, ah_attr->roce.dmac,
+ sizeof(ah_attr->roce.dmac));
+ ah->av.udp_sport = mlx5_ah_get_udp_sport(dev, ah_attr);
+ ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1;
+ if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+#define MLX5_ECN_ENABLED BIT(1)
+ ah->av.tclass |= MLX5_ECN_ENABLED;
} else {
- ah->av.rlid = cpu_to_be16(ah_attr->dlid);
- ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
- ah->av.stat_rate_sl |= (ah_attr->sl & 0xf);
+ ah->av.rlid = cpu_to_be16(rdma_ah_get_dlid(ah_attr));
+ ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f;
+ ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0xf);
}
- return &ah->ibah;
+ return 0;
}
-struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct ib_udata *udata)
+int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx5_ib_ah *ah;
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
- enum rdma_link_layer ll;
-
- ll = pd->device->get_link_layer(pd->device, ah_attr->port_num);
+ struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
+ struct mlx5_ib_ah *ah = to_mah(ibah);
+ struct mlx5_ib_dev *dev = to_mdev(ibah->device);
+ enum rdma_ah_attr_type ah_type = ah_attr->type;
- if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
- return ERR_PTR(-EINVAL);
+ if ((ah_type == RDMA_AH_ATTR_TYPE_ROCE) &&
+ !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
+ return -EINVAL;
- if (ll == IB_LINK_LAYER_ETHERNET && udata) {
+ if (ah_type == RDMA_AH_ATTR_TYPE_ROCE && udata) {
int err;
struct mlx5_ib_create_ah_resp resp = {};
- u32 min_resp_len = offsetof(typeof(resp), dmac) +
- sizeof(resp.dmac);
+ u32 min_resp_len =
+ offsetofend(struct mlx5_ib_create_ah_resp, dmac);
if (udata->outlen < min_resp_len)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
resp.response_length = min_resp_len;
- err = ib_resolve_eth_dmac(pd->device, ah_attr);
- if (err)
- return ERR_PTR(err);
-
- memcpy(resp.dmac, ah_attr->dmac, ETH_ALEN);
+ memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
- return ERR_PTR(err);
+ return err;
}
- ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
- if (!ah)
- return ERR_PTR(-ENOMEM);
-
- return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */
+ return create_ib_ah(dev, ah, init_attr);
}
-int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct mlx5_ib_ah *ah = to_mah(ibah);
u32 tmp;
memset(ah_attr, 0, sizeof(*ah_attr));
+ ah_attr->type = ibah->type;
tmp = be32_to_cpu(ah->av.grh_gid_fl);
if (tmp & (1 << 30)) {
- ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.sgid_index = (tmp >> 20) & 0xff;
- ah_attr->grh.flow_label = tmp & 0xfffff;
- memcpy(&ah_attr->grh.dgid, ah->av.rgid, 16);
- ah_attr->grh.hop_limit = ah->av.hop_limit;
- ah_attr->grh.traffic_class = ah->av.tclass;
+ rdma_ah_set_grh(ah_attr, NULL,
+ tmp & 0xfffff,
+ (tmp >> 20) & 0xff,
+ ah->av.hop_limit,
+ ah->av.tclass);
+ rdma_ah_set_dgid_raw(ah_attr, ah->av.rgid);
}
- ah_attr->dlid = be16_to_cpu(ah->av.rlid);
- ah_attr->static_rate = ah->av.stat_rate_sl >> 4;
- ah_attr->sl = ah->av.stat_rate_sl & 0xf;
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(ah->av.rlid));
+ rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate_sl >> 4);
+ rdma_ah_set_sl(ah_attr, ah->av.stat_rate_sl & 0xf);
return 0;
}
-
-int mlx5_ib_destroy_ah(struct ib_ah *ah)
-{
- kfree(to_mah(ah));
- return 0;
-}
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
new file mode 100644
index 000000000000..7c08e3008927
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2017-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include "cmd.h"
+
+int mlx5r_cmd_query_special_mkeys(struct mlx5_ib_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
+ bool is_terminate, is_dump, is_null;
+ int err;
+
+ is_terminate = MLX5_CAP_GEN(dev->mdev, terminate_scatter_list_mkey);
+ is_dump = MLX5_CAP_GEN(dev->mdev, dump_fill_mkey);
+ is_null = MLX5_CAP_GEN(dev->mdev, null_mkey);
+
+ dev->mkeys.terminate_scatter_list_mkey = MLX5_TERMINATE_SCATTER_LIST_LKEY;
+ if (!is_terminate && !is_dump && !is_null)
+ return 0;
+
+ MLX5_SET(query_special_contexts_in, in, opcode,
+ MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+ err = mlx5_cmd_exec_inout(dev->mdev, query_special_contexts, in, out);
+ if (err)
+ return err;
+
+ if (is_dump)
+ dev->mkeys.dump_fill_mkey = MLX5_GET(query_special_contexts_out,
+ out, dump_fill_mkey);
+
+ if (is_null)
+ dev->mkeys.null_mkey = cpu_to_be32(
+ MLX5_GET(query_special_contexts_out, out, null_mkey));
+
+ if (is_terminate)
+ dev->mkeys.terminate_scatter_list_mkey =
+ cpu_to_be32(MLX5_GET(query_special_contexts_out, out,
+ terminate_scatter_list_mkey));
+
+ return 0;
+}
+
+int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
+ void *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = {};
+
+ MLX5_SET(query_cong_params_in, in, opcode,
+ MLX5_CMD_OP_QUERY_CONG_PARAMS);
+ MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point);
+
+ return mlx5_cmd_exec_inout(dev, query_cong_params, in, out);
+}
+
+void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
+
+ MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ MLX5_SET(destroy_tir_in, in, tirn, tirn);
+ MLX5_SET(destroy_tir_in, in, uid, uid);
+ mlx5_cmd_exec_in(dev, destroy_tir, in);
+}
+
+void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {};
+
+ MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, in, tisn, tisn);
+ MLX5_SET(destroy_tis_in, in, uid, uid);
+ mlx5_cmd_exec_in(dev, destroy_tis, in);
+}
+
+int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
+
+ MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
+ MLX5_SET(destroy_rqt_in, in, uid, uid);
+ return mlx5_cmd_exec_in(dev, destroy_rqt, in);
+}
+
+int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
+ u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {};
+ int err;
+
+ MLX5_SET(alloc_transport_domain_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(alloc_transport_domain_in, in, uid, uid);
+
+ err = mlx5_cmd_exec_inout(dev, alloc_transport_domain, in, out);
+ if (!err)
+ *tdn = MLX5_GET(alloc_transport_domain_out, out,
+ transport_domain);
+
+ return err;
+}
+
+void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
+ u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {};
+
+ MLX5_SET(dealloc_transport_domain_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(dealloc_transport_domain_in, in, uid, uid);
+ MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
+ mlx5_cmd_exec_in(dev, dealloc_transport_domain, in);
+}
+
+int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {};
+
+ MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ MLX5_SET(dealloc_pd_in, in, pd, pdn);
+ MLX5_SET(dealloc_pd_in, in, uid, uid);
+ return mlx5_cmd_exec_in(dev, dealloc_pd, in);
+}
+
+int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+ u32 qpn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {};
+ void *gid;
+
+ MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
+ MLX5_SET(attach_to_mcg_in, in, qpn, qpn);
+ MLX5_SET(attach_to_mcg_in, in, uid, uid);
+ gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
+ memcpy(gid, mgid, sizeof(*mgid));
+ return mlx5_cmd_exec_in(dev, attach_to_mcg, in);
+}
+
+int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+ u32 qpn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {};
+ void *gid;
+
+ MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+ MLX5_SET(detach_from_mcg_in, in, qpn, qpn);
+ MLX5_SET(detach_from_mcg_in, in, uid, uid);
+ gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
+ memcpy(gid, mgid, sizeof(*mgid));
+ return mlx5_cmd_exec_in(dev, detach_from_mcg, in);
+}
+
+int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {};
+ int err;
+
+ MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
+ MLX5_SET(alloc_xrcd_in, in, uid, uid);
+ err = mlx5_cmd_exec_inout(dev, alloc_xrcd, in, out);
+ if (!err)
+ *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
+ return err;
+}
+
+int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {};
+
+ MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+ MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
+ MLX5_SET(dealloc_xrcd_in, in, uid, uid);
+ return mlx5_cmd_exec_in(dev, dealloc_xrcd, in);
+}
+
+int mlx5_cmd_mad_ifc(struct mlx5_ib_dev *dev, const void *inb, void *outb,
+ u16 opmod, u8 port)
+{
+ int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
+ int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in);
+ int err = -ENOMEM;
+ void *data;
+ void *resp;
+ u32 *out;
+ u32 *in;
+
+ in = kzalloc(inlen, GFP_KERNEL);
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!in || !out)
+ goto out;
+
+ MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
+ MLX5_SET(mad_ifc_in, in, op_mod, opmod);
+ if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
+ MLX5_SET(mad_ifc_in, in, plane_index, port);
+ MLX5_SET(mad_ifc_in, in, port,
+ smi_to_native_portnum(dev, port));
+ } else {
+ MLX5_SET(mad_ifc_in, in, port, port);
+ }
+
+ data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
+ memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
+
+ err = mlx5_cmd_exec_inout(dev->mdev, mad_ifc, in, out);
+ if (err)
+ goto out;
+
+ resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet);
+ memcpy(outb, resp,
+ MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet));
+
+out:
+ kfree(out);
+ kfree(in);
+ return err;
+}
+
+int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {};
+ int err;
+
+ MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
+ MLX5_SET(alloc_uar_in, in, uid, uid);
+ err = mlx5_cmd_exec_inout(dev, alloc_uar, in, out);
+ if (err)
+ return err;
+
+ *uarn = MLX5_GET(alloc_uar_out, out, uar);
+ return 0;
+}
+
+int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {};
+
+ MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR);
+ MLX5_SET(dealloc_uar_in, in, uar, uarn);
+ MLX5_SET(dealloc_uar_in, in, uid, uid);
+ return mlx5_cmd_exec_in(dev, dealloc_uar, in);
+}
+
+int mlx5_cmd_query_vuid(struct mlx5_core_dev *dev, bool data_direct,
+ char *out_vuid)
+{
+ u8 out[MLX5_ST_SZ_BYTES(query_vuid_out) +
+ MLX5_ST_SZ_BYTES(array1024_auto)] = {};
+ u8 in[MLX5_ST_SZ_BYTES(query_vuid_in)] = {};
+ char *vuid;
+ int err;
+
+ MLX5_SET(query_vuid_in, in, opcode, MLX5_CMD_OPCODE_QUERY_VUID);
+ MLX5_SET(query_vuid_in, in, vhca_id, MLX5_CAP_GEN(dev, vhca_id));
+ MLX5_SET(query_vuid_in, in, data_direct, data_direct);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ vuid = MLX5_ADDR_OF(query_vuid_out, out, vuid);
+ memcpy(out_vuid, vuid, MLX5_ST_SZ_BYTES(array1024_auto));
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
new file mode 100644
index 000000000000..e6c88b6ebd0d
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_IB_CMD_H
+#define MLX5_IB_CMD_H
+
+#include "mlx5_ib.h"
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+
+int mlx5r_cmd_query_special_mkeys(struct mlx5_ib_dev *dev);
+int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
+ void *out);
+int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
+void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
+void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
+int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid);
+int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
+ u16 uid);
+void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
+ u16 uid);
+int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+ u32 qpn, u16 uid);
+int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
+ u32 qpn, u16 uid);
+int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
+int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
+int mlx5_cmd_mad_ifc(struct mlx5_ib_dev *dev, const void *inb, void *outb,
+ u16 opmod, u8 port);
+int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid);
+int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid);
+int mlx5_cmd_query_vuid(struct mlx5_core_dev *dev, bool data_direct,
+ char *out_vuid);
+#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
new file mode 100644
index 000000000000..a78a067e3ce7
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -0,0 +1,491 @@
+/*
+ * Copyright (c) 2013-2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/debugfs.h>
+
+#include "mlx5_ib.h"
+#include "cmd.h"
+
+enum mlx5_ib_cong_node_type {
+ MLX5_IB_RROCE_ECN_RP = 1,
+ MLX5_IB_RROCE_ECN_NP = 2,
+ MLX5_IB_RROCE_GENERAL = 3,
+};
+
+static const char * const mlx5_ib_dbg_cc_name[] = {
+ "rp_clamp_tgt_rate",
+ "rp_clamp_tgt_rate_ati",
+ "rp_time_reset",
+ "rp_byte_reset",
+ "rp_threshold",
+ "rp_ai_rate",
+ "rp_max_rate",
+ "rp_hai_rate",
+ "rp_min_dec_fac",
+ "rp_min_rate",
+ "rp_rate_to_set_on_first_cnp",
+ "rp_dce_tcp_g",
+ "rp_dce_tcp_rtt",
+ "rp_rate_reduce_monitor_period",
+ "rp_initial_alpha_value",
+ "rp_gd",
+ "np_min_time_between_cnps",
+ "np_cnp_dscp",
+ "np_cnp_prio_mode",
+ "np_cnp_prio",
+ "rtt_resp_dscp_valid",
+ "rtt_resp_dscp",
+};
+
+#define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1)
+#define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2)
+#define MLX5_IB_RP_TIME_RESET_ATTR BIT(3)
+#define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4)
+#define MLX5_IB_RP_THRESHOLD_ATTR BIT(5)
+#define MLX5_IB_RP_MAX_RATE_ATTR BIT(6)
+#define MLX5_IB_RP_AI_RATE_ATTR BIT(7)
+#define MLX5_IB_RP_HAI_RATE_ATTR BIT(8)
+#define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9)
+#define MLX5_IB_RP_MIN_RATE_ATTR BIT(10)
+#define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11)
+#define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12)
+#define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13)
+#define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14)
+#define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15)
+#define MLX5_IB_RP_GD_ATTR BIT(16)
+
+#define MLX5_IB_NP_MIN_TIME_BETWEEN_CNPS_ATTR BIT(2)
+#define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3)
+#define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4)
+
+#define MLX5_IB_GENERAL_RTT_RESP_DSCP_ATTR BIT(0)
+
+static enum mlx5_ib_cong_node_type
+mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset)
+{
+ if (param_offset <= MLX5_IB_DBG_CC_RP_GD)
+ return MLX5_IB_RROCE_ECN_RP;
+
+ if (param_offset <= MLX5_IB_DBG_CC_NP_CNP_PRIO)
+ return MLX5_IB_RROCE_ECN_NP;
+
+ return MLX5_IB_RROCE_GENERAL;
+}
+
+static u32 mlx5_get_cc_param_val(void *field, int offset)
+{
+ switch (offset) {
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate);
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate_after_time_inc);
+ case MLX5_IB_DBG_CC_RP_TIME_RESET:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_time_reset);
+ case MLX5_IB_DBG_CC_RP_BYTE_RESET:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_byte_reset);
+ case MLX5_IB_DBG_CC_RP_THRESHOLD:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_threshold);
+ case MLX5_IB_DBG_CC_RP_AI_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_ai_rate);
+ case MLX5_IB_DBG_CC_RP_MAX_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_max_rate);
+ case MLX5_IB_DBG_CC_RP_HAI_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_hai_rate);
+ case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_dec_fac);
+ case MLX5_IB_DBG_CC_RP_MIN_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_rate);
+ case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rate_to_set_on_first_cnp);
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_g);
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_rtt);
+ case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rate_reduce_monitor_period);
+ case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ initial_alpha_value);
+ case MLX5_IB_DBG_CC_RP_GD:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_gd);
+ case MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS:
+ return MLX5_GET(cong_control_r_roce_ecn_np, field,
+ min_time_between_cnps);
+ case MLX5_IB_DBG_CC_NP_CNP_DSCP:
+ return MLX5_GET(cong_control_r_roce_ecn_np, field,
+ cnp_dscp);
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
+ return MLX5_GET(cong_control_r_roce_ecn_np, field,
+ cnp_prio_mode);
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO:
+ return MLX5_GET(cong_control_r_roce_ecn_np, field,
+ cnp_802p_prio);
+ case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID:
+ return MLX5_GET(cong_control_r_roce_general, field,
+ rtt_resp_dscp_valid);
+ case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP:
+ return MLX5_GET(cong_control_r_roce_general, field,
+ rtt_resp_dscp);
+ default:
+ return 0;
+ }
+}
+
+static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
+ u32 var, u32 *attr_mask)
+{
+ switch (offset) {
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
+ *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
+ *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate_after_time_inc, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_TIME_RESET:
+ *attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_time_reset, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_BYTE_RESET:
+ *attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_byte_reset, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_THRESHOLD:
+ *attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_threshold, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_AI_RATE:
+ *attr_mask |= MLX5_IB_RP_AI_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_ai_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_MAX_RATE:
+ *attr_mask |= MLX5_IB_RP_MAX_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_max_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_HAI_RATE:
+ *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_hai_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
+ *attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_dec_fac, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_MIN_RATE:
+ *attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
+ *attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rate_to_set_on_first_cnp, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
+ *attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_g, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
+ *attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_rtt, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
+ *attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rate_reduce_monitor_period, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
+ *attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ initial_alpha_value, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_GD:
+ *attr_mask |= MLX5_IB_RP_GD_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_gd, var);
+ break;
+ case MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS:
+ *attr_mask |= MLX5_IB_NP_MIN_TIME_BETWEEN_CNPS_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_np, field,
+ min_time_between_cnps, var);
+ break;
+ case MLX5_IB_DBG_CC_NP_CNP_DSCP:
+ *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var);
+ break;
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
+ *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var);
+ break;
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO:
+ *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0);
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var);
+ break;
+ case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID:
+ *attr_mask |= MLX5_IB_GENERAL_RTT_RESP_DSCP_ATTR;
+ MLX5_SET(cong_control_r_roce_general, field, rtt_resp_dscp_valid, var);
+ break;
+ case MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP:
+ *attr_mask |= MLX5_IB_GENERAL_RTT_RESP_DSCP_ATTR;
+ MLX5_SET(cong_control_r_roce_general, field, rtt_resp_dscp_valid, 1);
+ MLX5_SET(cong_control_r_roce_general, field, rtt_resp_dscp, var);
+ break;
+ }
+}
+
+static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, u32 port_num,
+ int offset, u32 *var)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
+ void *out;
+ void *field;
+ int err;
+ enum mlx5_ib_cong_node_type node;
+ struct mlx5_core_dev *mdev;
+
+ /* Takes a 1-based port number */
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
+ if (!mdev)
+ return -ENODEV;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out) {
+ err = -ENOMEM;
+ goto alloc_err;
+ }
+
+ node = mlx5_ib_param_to_node(offset);
+
+ err = mlx5_cmd_query_cong_params(mdev, node, out);
+ if (err)
+ goto free;
+
+ field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters);
+ *var = mlx5_get_cc_param_val(field, offset);
+
+free:
+ kvfree(out);
+alloc_err:
+ mlx5_ib_put_native_port_mdev(dev, port_num + 1);
+ return err;
+}
+
+static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, u32 port_num,
+ int offset, u32 var)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
+ void *in;
+ void *field;
+ enum mlx5_ib_cong_node_type node;
+ struct mlx5_core_dev *mdev;
+ u32 attr_mask = 0;
+ int err;
+
+ /* Takes a 1-based port number */
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
+ if (!mdev)
+ return -ENODEV;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto alloc_err;
+ }
+
+ MLX5_SET(modify_cong_params_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_CONG_PARAMS);
+
+ node = mlx5_ib_param_to_node(offset);
+ MLX5_SET(modify_cong_params_in, in, cong_protocol, node);
+
+ field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters);
+ mlx5_ib_set_cc_param_mask_val(field, offset, var, &attr_mask);
+
+ field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select);
+ MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
+ attr_mask);
+
+ err = mlx5_cmd_exec_in(dev->mdev, modify_cong_params, in);
+ kvfree(in);
+alloc_err:
+ mlx5_ib_put_native_port_mdev(dev, port_num + 1);
+ return err;
+}
+
+static ssize_t set_param(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_ib_dbg_param *param = filp->private_data;
+ int offset = param->offset;
+ char lbuf[11] = { };
+ u32 var;
+ int ret;
+
+ if (count > sizeof(lbuf))
+ return -EINVAL;
+
+ if (copy_from_user(lbuf, buf, count))
+ return -EFAULT;
+
+ lbuf[sizeof(lbuf) - 1] = '\0';
+
+ if (kstrtou32(lbuf, 0, &var))
+ return -EINVAL;
+
+ ret = mlx5_ib_set_cc_params(param->dev, param->port_num, offset, var);
+ return ret ? ret : count;
+}
+
+static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_ib_dbg_param *param = filp->private_data;
+ int offset = param->offset;
+ u32 var = 0;
+ int ret;
+ char lbuf[11];
+
+ ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var);
+ if (ret)
+ return ret;
+
+ ret = snprintf(lbuf, sizeof(lbuf), "%d\n", var);
+ if (ret < 0)
+ return ret;
+
+ return simple_read_from_buffer(buf, count, pos, lbuf, ret);
+}
+
+static const struct file_operations dbg_cc_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = set_param,
+ .read = get_param,
+};
+
+void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num)
+{
+ if (!mlx5_debugfs_root ||
+ !dev->port[port_num].dbg_cc_params ||
+ !dev->port[port_num].dbg_cc_params->root)
+ return;
+
+ debugfs_remove_recursive(dev->port[port_num].dbg_cc_params->root);
+ kfree(dev->port[port_num].dbg_cc_params);
+ dev->port[port_num].dbg_cc_params = NULL;
+}
+
+void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num)
+{
+ struct mlx5_ib_dbg_cc_params *dbg_cc_params;
+ struct mlx5_core_dev *mdev;
+ int i;
+
+ if (!mlx5_debugfs_root)
+ return;
+
+ /* Takes a 1-based port number */
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num + 1, NULL);
+ if (!mdev)
+ return;
+
+ if (!MLX5_CAP_GEN(mdev, cc_query_allowed) ||
+ !MLX5_CAP_GEN(mdev, cc_modify_allowed))
+ goto put_mdev;
+
+ dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL);
+ if (!dbg_cc_params)
+ goto err;
+
+ dev->port[port_num].dbg_cc_params = dbg_cc_params;
+
+ dbg_cc_params->root = debugfs_create_dir("cc_params", mlx5_debugfs_get_dev_root(mdev));
+
+ for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
+ if ((i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID ||
+ i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP))
+ if (!MLX5_CAP_GEN(mdev, roce) ||
+ !MLX5_CAP_ROCE(mdev, roce_cc_general))
+ continue;
+
+ dbg_cc_params->params[i].offset = i;
+ dbg_cc_params->params[i].dev = dev;
+ dbg_cc_params->params[i].port_num = port_num;
+ dbg_cc_params->params[i].dentry =
+ debugfs_create_file(mlx5_ib_dbg_cc_name[i],
+ 0600, dbg_cc_params->root,
+ &dbg_cc_params->params[i],
+ &dbg_cc_fops);
+ }
+
+put_mdev:
+ mlx5_ib_put_native_port_mdev(dev, port_num + 1);
+ return;
+
+err:
+ mlx5_ib_warn(dev, "cong debugfs failure\n");
+ mlx5_ib_cleanup_cong_debugfs(dev, port_num);
+ mlx5_ib_put_native_port_mdev(dev, port_num + 1);
+
+ /*
+ * We don't want to fail driver if debugfs failed to initialize,
+ * so we are not forwarding error to the user.
+ */
+ return;
+}
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
new file mode 100644
index 000000000000..e042e0719ead
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -0,0 +1,1279 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include "mlx5_ib.h"
+#include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/vport.h>
+#include "counters.h"
+#include "ib_rep.h"
+#include "qp.h"
+
+struct mlx5_ib_counter {
+ const char *name;
+ size_t offset;
+ u32 type;
+};
+
+struct mlx5_rdma_counter {
+ struct rdma_counter rdma_counter;
+
+ struct mlx5_fc *fc[MLX5_IB_OPCOUNTER_MAX];
+ struct xarray qpn_opfc_xa;
+};
+
+static struct mlx5_rdma_counter *to_mcounter(struct rdma_counter *counter)
+{
+ return container_of(counter, struct mlx5_rdma_counter, rdma_counter);
+}
+
+#define INIT_Q_COUNTER(_name) \
+ { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
+
+#define INIT_VPORT_Q_COUNTER(_name) \
+ { .name = "vport_" #_name, .offset = \
+ MLX5_BYTE_OFF(query_q_counter_out, _name)}
+
+static const struct mlx5_ib_counter basic_q_cnts[] = {
+ INIT_Q_COUNTER(rx_write_requests),
+ INIT_Q_COUNTER(rx_read_requests),
+ INIT_Q_COUNTER(rx_atomic_requests),
+ INIT_Q_COUNTER(rx_dct_connect),
+ INIT_Q_COUNTER(out_of_buffer),
+};
+
+static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
+ INIT_Q_COUNTER(out_of_sequence),
+};
+
+static const struct mlx5_ib_counter retrans_q_cnts[] = {
+ INIT_Q_COUNTER(duplicate_request),
+ INIT_Q_COUNTER(rnr_nak_retry_err),
+ INIT_Q_COUNTER(packet_seq_err),
+ INIT_Q_COUNTER(implied_nak_seq_err),
+ INIT_Q_COUNTER(local_ack_timeout_err),
+};
+
+static const struct mlx5_ib_counter vport_basic_q_cnts[] = {
+ INIT_VPORT_Q_COUNTER(rx_write_requests),
+ INIT_VPORT_Q_COUNTER(rx_read_requests),
+ INIT_VPORT_Q_COUNTER(rx_atomic_requests),
+ INIT_VPORT_Q_COUNTER(rx_dct_connect),
+ INIT_VPORT_Q_COUNTER(out_of_buffer),
+};
+
+static const struct mlx5_ib_counter vport_out_of_seq_q_cnts[] = {
+ INIT_VPORT_Q_COUNTER(out_of_sequence),
+};
+
+static const struct mlx5_ib_counter vport_retrans_q_cnts[] = {
+ INIT_VPORT_Q_COUNTER(duplicate_request),
+ INIT_VPORT_Q_COUNTER(rnr_nak_retry_err),
+ INIT_VPORT_Q_COUNTER(packet_seq_err),
+ INIT_VPORT_Q_COUNTER(implied_nak_seq_err),
+ INIT_VPORT_Q_COUNTER(local_ack_timeout_err),
+};
+
+#define INIT_CONG_COUNTER(_name) \
+ { .name = #_name, .offset = \
+ MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
+
+static const struct mlx5_ib_counter cong_cnts[] = {
+ INIT_CONG_COUNTER(rp_cnp_ignored),
+ INIT_CONG_COUNTER(rp_cnp_handled),
+ INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
+ INIT_CONG_COUNTER(np_cnp_sent),
+};
+
+static const struct mlx5_ib_counter extended_err_cnts[] = {
+ INIT_Q_COUNTER(resp_local_length_error),
+ INIT_Q_COUNTER(resp_cqe_error),
+ INIT_Q_COUNTER(req_cqe_error),
+ INIT_Q_COUNTER(req_remote_invalid_request),
+ INIT_Q_COUNTER(req_remote_access_errors),
+ INIT_Q_COUNTER(resp_remote_access_errors),
+ INIT_Q_COUNTER(resp_cqe_flush_error),
+ INIT_Q_COUNTER(req_cqe_flush_error),
+ INIT_Q_COUNTER(req_transport_retries_exceeded),
+ INIT_Q_COUNTER(req_rnr_retries_exceeded),
+};
+
+static const struct mlx5_ib_counter roce_accl_cnts[] = {
+ INIT_Q_COUNTER(roce_adp_retrans),
+ INIT_Q_COUNTER(roce_adp_retrans_to),
+ INIT_Q_COUNTER(roce_slow_restart),
+ INIT_Q_COUNTER(roce_slow_restart_cnps),
+ INIT_Q_COUNTER(roce_slow_restart_trans),
+};
+
+static const struct mlx5_ib_counter vport_extended_err_cnts[] = {
+ INIT_VPORT_Q_COUNTER(resp_local_length_error),
+ INIT_VPORT_Q_COUNTER(resp_cqe_error),
+ INIT_VPORT_Q_COUNTER(req_cqe_error),
+ INIT_VPORT_Q_COUNTER(req_remote_invalid_request),
+ INIT_VPORT_Q_COUNTER(req_remote_access_errors),
+ INIT_VPORT_Q_COUNTER(resp_remote_access_errors),
+ INIT_VPORT_Q_COUNTER(resp_cqe_flush_error),
+ INIT_VPORT_Q_COUNTER(req_cqe_flush_error),
+ INIT_VPORT_Q_COUNTER(req_transport_retries_exceeded),
+ INIT_VPORT_Q_COUNTER(req_rnr_retries_exceeded),
+};
+
+static const struct mlx5_ib_counter vport_roce_accl_cnts[] = {
+ INIT_VPORT_Q_COUNTER(roce_adp_retrans),
+ INIT_VPORT_Q_COUNTER(roce_adp_retrans_to),
+ INIT_VPORT_Q_COUNTER(roce_slow_restart),
+ INIT_VPORT_Q_COUNTER(roce_slow_restart_cnps),
+ INIT_VPORT_Q_COUNTER(roce_slow_restart_trans),
+};
+
+#define INIT_EXT_PPCNT_COUNTER(_name) \
+ { .name = #_name, .offset = \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
+
+static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
+ INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
+};
+
+#define INIT_OP_COUNTER(_name, _type) \
+ { .name = #_name, .type = MLX5_IB_OPCOUNTER_##_type}
+
+static const struct mlx5_ib_counter basic_op_cnts[] = {
+ INIT_OP_COUNTER(cc_rx_ce_pkts, CC_RX_CE_PKTS),
+};
+
+static const struct mlx5_ib_counter rdmarx_cnp_op_cnts[] = {
+ INIT_OP_COUNTER(cc_rx_cnp_pkts, CC_RX_CNP_PKTS),
+};
+
+static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
+ INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
+};
+
+static const struct mlx5_ib_counter packets_op_cnts[] = {
+ INIT_OP_COUNTER(rdma_tx_packets, RDMA_TX_PACKETS),
+ INIT_OP_COUNTER(rdma_tx_bytes, RDMA_TX_BYTES),
+ INIT_OP_COUNTER(rdma_rx_packets, RDMA_RX_PACKETS),
+ INIT_OP_COUNTER(rdma_rx_bytes, RDMA_RX_BYTES),
+};
+
+static int mlx5_ib_read_counters(struct ib_counters *counters,
+ struct ib_counters_read_attr *read_attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+ struct mlx5_read_counters_attr mread_attr = {};
+ struct mlx5_ib_flow_counters_desc *desc;
+ int ret, i;
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ if (mcounters->cntrs_max_index > read_attr->ncounters) {
+ ret = -EINVAL;
+ goto err_bound;
+ }
+
+ mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
+ GFP_KERNEL);
+ if (!mread_attr.out) {
+ ret = -ENOMEM;
+ goto err_bound;
+ }
+
+ mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
+ mread_attr.flags = read_attr->flags;
+ ret = mcounters->read_counters(counters->device, &mread_attr);
+ if (ret)
+ goto err_read;
+
+ /* do the pass over the counters data array to assign according to the
+ * descriptions and indexing pairs
+ */
+ desc = mcounters->counters_data;
+ for (i = 0; i < mcounters->ncounters; i++)
+ read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
+
+err_read:
+ kfree(mread_attr.out);
+err_bound:
+ mutex_unlock(&mcounters->mcntrs_mutex);
+ return ret;
+}
+
+static int mlx5_ib_destroy_counters(struct ib_counters *counters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+ mlx5_ib_counters_clear_description(counters);
+ if (mcounters->hw_cntrs_hndl)
+ mlx5_fc_destroy(to_mdev(counters->device)->mdev,
+ mcounters->hw_cntrs_hndl);
+ return 0;
+}
+
+static int mlx5_ib_create_counters(struct ib_counters *counters,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+ mutex_init(&mcounters->mcntrs_mutex);
+ return 0;
+}
+
+static bool vport_qcounters_supported(struct mlx5_ib_dev *dev)
+{
+ return MLX5_CAP_GEN(dev->mdev, q_counter_other_vport) &&
+ MLX5_CAP_GEN(dev->mdev, q_counter_aggregation);
+}
+
+static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
+ u32 port_num)
+{
+ if ((is_mdev_switchdev_mode(dev->mdev) &&
+ !vport_qcounters_supported(dev)) || !port_num)
+ return &dev->port[0].cnts;
+
+ return is_mdev_switchdev_mode(dev->mdev) ?
+ &dev->port[1].cnts : &dev->port[port_num - 1].cnts;
+}
+
+/**
+ * mlx5_ib_get_counters_id - Returns counters id to use for device+port
+ * @dev: Pointer to mlx5 IB device
+ * @port_num: Zero based port number
+ *
+ * mlx5_ib_get_counters_id() Returns counters set id to use for given
+ * device port combination in switchdev and non switchdev mode of the
+ * parent device.
+ */
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num)
+{
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num + 1);
+
+ return cnts->set_id;
+}
+
+static struct rdma_hw_stats *do_alloc_stats(const struct mlx5_ib_counters *cnts)
+{
+ struct rdma_hw_stats *stats;
+ u32 num_hw_counters;
+ int i;
+
+ num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+ stats = rdma_alloc_hw_stats_struct(cnts->descs,
+ num_hw_counters +
+ cnts->num_op_counters,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+ if (!stats)
+ return NULL;
+
+ for (i = 0; i < cnts->num_op_counters; i++)
+ set_bit(num_hw_counters + i, stats->is_disabled);
+
+ return stats;
+}
+
+static struct rdma_hw_stats *
+mlx5_ib_alloc_hw_device_stats(struct ib_device *ibdev)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts = &dev->port[0].cnts;
+
+ return do_alloc_stats(cnts);
+}
+
+static struct rdma_hw_stats *
+mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
+
+ return do_alloc_stats(cnts);
+}
+
+static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats,
+ u16 set_id)
+{
+ u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
+ __be32 val;
+ int ret, i;
+
+ MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
+ MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
+ ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < cnts->num_q_counters; i++) {
+ val = *(__be32 *)((void *)out + cnts->offsets[i]);
+ stats->value[i] = (u64)be32_to_cpu(val);
+ }
+
+ return 0;
+}
+
+static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats)
+{
+ int offset = cnts->num_q_counters + cnts->num_cong_counters;
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ int ret, i;
+ void *out;
+
+ out = kvzalloc(sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
+ ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
+ 0, 0);
+ if (ret)
+ goto free;
+
+ for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
+ stats->value[i + offset] =
+ be64_to_cpup((__be64 *)(out +
+ cnts->offsets[i + offset]));
+free:
+ kvfree(out);
+ return ret;
+}
+
+static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
+ u32 port_num,
+ const struct mlx5_ib_counters *cnts,
+ struct rdma_hw_stats *stats)
+
+{
+ u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
+ struct mlx5_core_dev *mdev;
+ __be32 val;
+ int ret, i;
+
+ if (!dev->port[port_num].rep ||
+ dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
+ return 0;
+
+ mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
+ if (!mdev)
+ return -EOPNOTSUPP;
+
+ MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
+ MLX5_SET(query_q_counter_in, in, other_vport, 1);
+ MLX5_SET(query_q_counter_in, in, vport_number,
+ dev->port[port_num].rep->vport);
+ MLX5_SET(query_q_counter_in, in, aggregate, 1);
+ ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < cnts->num_q_counters; i++) {
+ val = *(__be32 *)((void *)out + cnts->offsets[i]);
+ stats->value[i] = (u64)be32_to_cpu(val);
+ }
+
+ return 0;
+}
+
+static int do_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
+ struct mlx5_core_dev *mdev;
+ int ret, num_counters;
+
+ if (!stats)
+ return -EINVAL;
+
+ num_counters = cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+
+ if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
+ ret = mlx5_ib_query_q_counters_vport(dev, port_num - 1, cnts,
+ stats);
+ else
+ ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats,
+ cnts->set_id);
+ if (ret)
+ return ret;
+
+ /* We don't expose device counters over Vports */
+ if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
+ goto done;
+
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
+ if (ret)
+ return ret;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ if (!port_num)
+ port_num = 1;
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
+ if (!mdev) {
+ /* If port is not affiliated yet, its in down state
+ * which doesn't have any counters yet, so it would be
+ * zero. So no need to read from the HCA.
+ */
+ goto done;
+ }
+ ret = mlx5_lag_query_cong_counters(mdev,
+ stats->value +
+ cnts->num_q_counters,
+ cnts->num_cong_counters,
+ cnts->offsets +
+ cnts->num_q_counters);
+
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+ if (ret)
+ return ret;
+ }
+
+done:
+ return num_counters;
+}
+
+static bool is_rdma_bytes_counter(u32 type)
+{
+ if (type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES ||
+ type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES ||
+ type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP ||
+ type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP)
+ return true;
+
+ return false;
+}
+
+static int do_per_qp_get_op_stat(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ int i, ret, index, num_hw_counters;
+ u64 packets = 0, bytes = 0;
+
+ for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
+ if (!mcounter->fc[i])
+ continue;
+
+ ret = mlx5_fc_query(dev->mdev, mcounter->fc[i],
+ &packets, &bytes);
+ if (ret)
+ return ret;
+
+ num_hw_counters = cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+
+ index = i - MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP +
+ num_hw_counters;
+
+ if (is_rdma_bytes_counter(i))
+ counter->stats->value[index] = bytes;
+ else
+ counter->stats->value[index] = packets;
+
+ clear_bit(index, counter->stats->is_disabled);
+ }
+ return 0;
+}
+
+static int do_get_op_stat(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts;
+ const struct mlx5_ib_op_fc *opfcs;
+ u64 packets, bytes;
+ u32 type;
+ int ret;
+
+ cnts = get_counters(dev, port_num);
+
+ opfcs = cnts->opfcs;
+ type = *(u32 *)cnts->descs[index].priv;
+ if (type >= MLX5_IB_OPCOUNTER_MAX)
+ return -EINVAL;
+
+ if (!opfcs[type].fc)
+ goto out;
+
+ ret = mlx5_fc_query(dev->mdev, opfcs[type].fc,
+ &packets, &bytes);
+ if (ret)
+ return ret;
+
+ if (is_rdma_bytes_counter(type))
+ stats->value[index] = bytes;
+ else
+ stats->value[index] = packets;
+out:
+ return index;
+}
+
+static int do_get_op_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u32 port_num)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts;
+ int index, ret, num_hw_counters;
+
+ cnts = get_counters(dev, port_num);
+ num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+ for (index = num_hw_counters;
+ index < (num_hw_counters + cnts->num_op_counters); index++) {
+ ret = do_get_op_stat(ibdev, stats, port_num, index);
+ if (ret != index)
+ return ret;
+ }
+
+ return cnts->num_op_counters;
+}
+
+static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
+ struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ int num_counters, num_hw_counters, num_op_counters;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ const struct mlx5_ib_counters *cnts;
+
+ cnts = get_counters(dev, port_num);
+ num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+ num_counters = num_hw_counters + cnts->num_op_counters;
+
+ if (index < 0 || index > num_counters)
+ return -EINVAL;
+ else if (index > 0 && index < num_hw_counters)
+ return do_get_hw_stats(ibdev, stats, port_num, index);
+ else if (index >= num_hw_counters && index < num_counters)
+ return do_get_op_stat(ibdev, stats, port_num, index);
+
+ num_hw_counters = do_get_hw_stats(ibdev, stats, port_num, index);
+ if (num_hw_counters < 0)
+ return num_hw_counters;
+
+ num_op_counters = do_get_op_stats(ibdev, stats, port_num);
+ if (num_op_counters < 0)
+ return num_op_counters;
+
+ return num_hw_counters + num_op_counters;
+}
+
+static struct rdma_hw_stats *
+mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
+
+ return do_alloc_stats(cnts);
+}
+
+static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
+ int ret;
+
+ ret = mlx5_ib_query_q_counters(dev->mdev, cnts, counter->stats,
+ counter->id);
+ if (ret)
+ return ret;
+
+ if (!counter->mode.bind_opcnt)
+ return 0;
+
+ return do_per_qp_get_op_stat(counter);
+}
+
+static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
+
+ if (!counter->id)
+ return 0;
+
+ WARN_ON(!xa_empty(&mcounter->qpn_opfc_xa));
+ mlx5r_fs_destroy_fcs(dev, mcounter->fc);
+ MLX5_SET(dealloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
+ return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
+}
+
+static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
+ struct ib_qp *qp, u32 port)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ bool new = false;
+ int err;
+
+ if (!counter->id) {
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
+
+ MLX5_SET(alloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
+ err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
+ if (err)
+ return err;
+ counter->id =
+ MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ new = true;
+ }
+
+ err = mlx5_ib_qp_set_counter(qp, counter);
+ if (err)
+ goto fail_set_counter;
+
+ if (!counter->mode.bind_opcnt)
+ return 0;
+
+ err = mlx5r_fs_bind_op_fc(qp, mcounter->fc, &mcounter->qpn_opfc_xa,
+ port);
+ if (err)
+ goto fail_bind_op_fc;
+
+ return 0;
+
+fail_bind_op_fc:
+ mlx5_ib_qp_set_counter(qp, NULL);
+fail_set_counter:
+ if (new) {
+ mlx5_ib_counter_dealloc(counter);
+ counter->id = 0;
+ }
+
+ return err;
+}
+
+static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp, u32 port)
+{
+ struct rdma_counter *counter = qp->counter;
+ struct mlx5_rdma_counter *mcounter;
+ int err;
+
+ mcounter = to_mcounter(counter);
+
+ mlx5r_fs_unbind_op_fc(qp, &mcounter->qpn_opfc_xa);
+
+ err = mlx5_ib_qp_set_counter(qp, NULL);
+ if (err)
+ goto fail_set_counter;
+
+ return 0;
+
+fail_set_counter:
+ if (counter->mode.bind_opcnt)
+ mlx5r_fs_bind_op_fc(qp, mcounter->fc,
+ &mcounter->qpn_opfc_xa, port);
+ return err;
+}
+
+static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
+ struct rdma_stat_desc *descs, size_t *offsets,
+ u32 port_num)
+{
+ bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
+ port_num != MLX5_VPORT_PF;
+ const struct mlx5_ib_counter *names;
+ int j = 0, i, size;
+
+ names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
+ ARRAY_SIZE(basic_q_cnts);
+ for (i = 0; i < size; i++, j++) {
+ descs[j].name = names[i].name;
+ offsets[j] = names[i].offset;
+ }
+
+ names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
+ ARRAY_SIZE(out_of_seq_q_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
+ for (i = 0; i < size; i++, j++) {
+ descs[j].name = names[i].name;
+ offsets[j] = names[i].offset;
+ }
+ }
+
+ names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
+ ARRAY_SIZE(retrans_q_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+ for (i = 0; i < size; i++, j++) {
+ descs[j].name = names[i].name;
+ offsets[j] = names[i].offset;
+ }
+ }
+
+ names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
+ ARRAY_SIZE(extended_err_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
+ for (i = 0; i < size; i++, j++) {
+ descs[j].name = names[i].name;
+ offsets[j] = names[i].offset;
+ }
+ }
+
+ names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
+ size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
+ ARRAY_SIZE(roce_accl_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
+ for (i = 0; i < size; i++, j++) {
+ descs[j].name = names[i].name;
+ offsets[j] = names[i].offset;
+ }
+ }
+
+ if (is_vport)
+ return;
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
+ descs[j].name = cong_cnts[i].name;
+ offsets[j] = cong_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
+ descs[j].name = ext_ppcnt_cnts[i].name;
+ offsets[j] = ext_ppcnt_cnts[i].offset;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(basic_op_cnts); i++, j++) {
+ descs[j].name = basic_op_cnts[i].name;
+ descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+ descs[j].priv = &basic_op_cnts[i].type;
+ }
+
+ if (MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode)) {
+ for (i = 0; i < ARRAY_SIZE(rdmarx_cnp_op_cnts); i++, j++) {
+ descs[j].name = rdmarx_cnp_op_cnts[i].name;
+ descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+ descs[j].priv = &rdmarx_cnp_op_cnts[i].type;
+ }
+ }
+
+ if (MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
+ for (i = 0; i < ARRAY_SIZE(rdmatx_cnp_op_cnts); i++, j++) {
+ descs[j].name = rdmatx_cnp_op_cnts[i].name;
+ descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+ descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(packets_op_cnts); i++, j++) {
+ descs[j].name = packets_op_cnts[i].name;
+ descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+ descs[j].priv = &packets_op_cnts[i].type;
+ }
+}
+
+
+static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_counters *cnts, u32 port_num)
+{
+ bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
+ port_num != MLX5_VPORT_PF;
+ u32 num_counters, num_op_counters = 0, size;
+
+ size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
+ ARRAY_SIZE(basic_q_cnts);
+ num_counters = size;
+
+ size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
+ ARRAY_SIZE(out_of_seq_q_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
+ num_counters += size;
+
+ size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
+ ARRAY_SIZE(retrans_q_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
+ num_counters += size;
+
+ size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
+ ARRAY_SIZE(extended_err_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
+ num_counters += size;
+
+ size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
+ ARRAY_SIZE(roce_accl_cnts);
+ if (MLX5_CAP_GEN(dev->mdev, roce_accl))
+ num_counters += size;
+
+ cnts->num_q_counters = num_counters;
+
+ if (is_vport)
+ goto skip_non_qcounters;
+
+ if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
+ cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
+ num_counters += ARRAY_SIZE(cong_cnts);
+ }
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
+ num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
+ }
+
+ num_op_counters = ARRAY_SIZE(basic_op_cnts);
+
+ num_op_counters += ARRAY_SIZE(packets_op_cnts);
+
+ if (MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode))
+ num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
+
+ if (MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode))
+ num_op_counters += ARRAY_SIZE(rdmatx_cnp_op_cnts);
+
+skip_non_qcounters:
+ cnts->num_op_counters = num_op_counters;
+ num_counters += num_op_counters;
+ cnts->descs = kcalloc(num_counters,
+ sizeof(struct rdma_stat_desc), GFP_KERNEL);
+ if (!cnts->descs)
+ return -ENOMEM;
+
+ cnts->offsets = kcalloc(num_counters,
+ sizeof(*cnts->offsets), GFP_KERNEL);
+ if (!cnts->offsets)
+ goto err;
+
+ return 0;
+
+err:
+ kfree(cnts->descs);
+ cnts->descs = NULL;
+ return -ENOMEM;
+}
+
+/*
+ * Checks if the given flow counter type should be sharing the same flow counter
+ * with another type and if it should, checks if that other type flow counter
+ * was already created, if both conditions are met return true and the counter
+ * else return false.
+ */
+bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type,
+ struct mlx5_ib_op_fc **opfc)
+{
+ u32 shared_fc_type;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ default:
+ return false;
+ }
+
+ *opfc = &opfcs[shared_fc_type];
+ if (!(*opfc)->fc)
+ return false;
+
+ return true;
+}
+
+static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
+ int num_cnt_ports = dev->num_ports;
+ struct mlx5_ib_op_fc *in_use_opfc;
+ int i, j;
+
+ if (is_mdev_switchdev_mode(dev->mdev))
+ num_cnt_ports = min(2, num_cnt_ports);
+
+ MLX5_SET(dealloc_q_counter_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+
+ for (i = 0; i < num_cnt_ports; i++) {
+ if (dev->port[i].cnts.set_id) {
+ MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
+ dev->port[i].cnts.set_id);
+ mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
+ }
+ kfree(dev->port[i].cnts.descs);
+ kfree(dev->port[i].cnts.offsets);
+
+ for (j = 0; j < MLX5_IB_OPCOUNTER_MAX; j++) {
+ if (!dev->port[i].cnts.opfcs[j].fc)
+ continue;
+
+ if (mlx5r_is_opfc_shared_and_in_use(
+ dev->port[i].cnts.opfcs, j, &in_use_opfc))
+ goto skip;
+
+ mlx5_ib_fs_remove_op_fc(dev,
+ &dev->port[i].cnts.opfcs[j], j);
+ mlx5_fc_destroy(dev->mdev,
+ dev->port[i].cnts.opfcs[j].fc);
+skip:
+ dev->port[i].cnts.opfcs[j].fc = NULL;
+ }
+ }
+}
+
+static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
+ int num_cnt_ports = dev->num_ports;
+ int err = 0;
+ int i;
+ bool is_shared;
+
+ MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
+
+ /*
+ * In switchdev we need to allocate two ports, one that is used for
+ * the device Q_counters and it is essentially the real Q_counters of
+ * this device, while the other is used as a helper for PF to be able to
+ * query all other vports.
+ */
+ if (is_mdev_switchdev_mode(dev->mdev))
+ num_cnt_ports = min(2, num_cnt_ports);
+
+ for (i = 0; i < num_cnt_ports; i++) {
+ err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);
+ if (err)
+ goto err_alloc;
+
+ mlx5_ib_fill_counters(dev, dev->port[i].cnts.descs,
+ dev->port[i].cnts.offsets, i);
+
+ MLX5_SET(alloc_q_counter_in, in, uid,
+ is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
+
+ err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
+ if (err) {
+ mlx5_ib_warn(dev,
+ "couldn't allocate queue counter for port %d, err %d\n",
+ i + 1, err);
+ goto err_alloc;
+ }
+
+ dev->port[i].cnts.set_id =
+ MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ }
+ return 0;
+
+err_alloc:
+ mlx5_ib_dealloc_counters(dev);
+ return err;
+}
+
+static int read_flow_counters(struct ib_device *ibdev,
+ struct mlx5_read_counters_attr *read_attr)
+{
+ struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+
+ return mlx5_fc_query(dev->mdev, fc,
+ &read_attr->out[IB_COUNTER_PACKETS],
+ &read_attr->out[IB_COUNTER_BYTES]);
+}
+
+/* flow counters currently expose two counters packets and bytes */
+#define FLOW_COUNTERS_NUM 2
+static int counters_set_description(
+ struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
+ struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+ u32 cntrs_max_index = 0;
+ int i;
+
+ if (counters_type != MLX5_IB_COUNTERS_FLOW)
+ return -EINVAL;
+
+ /* init the fields for the object */
+ mcounters->type = counters_type;
+ mcounters->read_counters = read_flow_counters;
+ mcounters->counters_num = FLOW_COUNTERS_NUM;
+ mcounters->ncounters = ncounters;
+ /* each counter entry have both description and index pair */
+ for (i = 0; i < ncounters; i++) {
+ if (desc_data[i].description > IB_COUNTER_BYTES)
+ return -EINVAL;
+
+ if (cntrs_max_index <= desc_data[i].index)
+ cntrs_max_index = desc_data[i].index + 1;
+ }
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ mcounters->counters_data = desc_data;
+ mcounters->cntrs_max_index = cntrs_max_index;
+ mutex_unlock(&mcounters->mcntrs_mutex);
+
+ return 0;
+}
+
+#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
+int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
+ struct mlx5_ib_create_flow *ucmd)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
+ struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
+ struct mlx5_ib_flow_counters_desc *desc_data = NULL;
+ bool hw_hndl = false;
+ int ret = 0;
+
+ if (ucmd && ucmd->ncounters_data != 0) {
+ cntrs_data = ucmd->data;
+ if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
+ return -EINVAL;
+
+ desc_data = kcalloc(cntrs_data->ncounters,
+ sizeof(*desc_data),
+ GFP_KERNEL);
+ if (!desc_data)
+ return -ENOMEM;
+
+ if (copy_from_user(desc_data,
+ u64_to_user_ptr(cntrs_data->counters_data),
+ sizeof(*desc_data) * cntrs_data->ncounters)) {
+ ret = -EFAULT;
+ goto free;
+ }
+ }
+
+ if (!mcounters->hw_cntrs_hndl) {
+ mcounters->hw_cntrs_hndl = mlx5_fc_create(
+ to_mdev(ibcounters->device)->mdev, false);
+ if (IS_ERR(mcounters->hw_cntrs_hndl)) {
+ ret = PTR_ERR(mcounters->hw_cntrs_hndl);
+ goto free;
+ }
+ hw_hndl = true;
+ }
+
+ if (desc_data) {
+ /* counters already bound to at least one flow */
+ if (mcounters->cntrs_max_index) {
+ ret = -EINVAL;
+ goto free_hndl;
+ }
+
+ ret = counters_set_description(ibcounters,
+ MLX5_IB_COUNTERS_FLOW,
+ desc_data,
+ cntrs_data->ncounters);
+ if (ret)
+ goto free_hndl;
+
+ } else if (!mcounters->cntrs_max_index) {
+ /* counters not bound yet, must have udata passed */
+ ret = -EINVAL;
+ goto free_hndl;
+ }
+
+ return 0;
+
+free_hndl:
+ if (hw_hndl) {
+ mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
+ mcounters->hw_cntrs_hndl);
+ mcounters->hw_cntrs_hndl = NULL;
+ }
+free:
+ kfree(desc_data);
+ return ret;
+}
+
+void mlx5_ib_counters_clear_description(struct ib_counters *counters)
+{
+ struct mlx5_ib_mcounters *mcounters;
+
+ if (!counters || atomic_read(&counters->usecnt) != 1)
+ return;
+
+ mcounters = to_mcounters(counters);
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ kfree(mcounters->counters_data);
+ mcounters->counters_data = NULL;
+ mcounters->cntrs_max_index = 0;
+ mutex_unlock(&mcounters->mcntrs_mutex);
+}
+
+static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
+ unsigned int index, bool enable)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_ib_op_fc *opfc, *in_use_opfc;
+ struct mlx5_ib_counters *cnts;
+ u32 num_hw_counters, type;
+ int ret;
+
+ cnts = &dev->port[port - 1].cnts;
+ num_hw_counters = cnts->num_q_counters + cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+ if (index < num_hw_counters ||
+ index >= (num_hw_counters + cnts->num_op_counters))
+ return -EINVAL;
+
+ if (!(cnts->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
+ return -EINVAL;
+
+ type = *(u32 *)cnts->descs[index].priv;
+ if (type >= MLX5_IB_OPCOUNTER_MAX)
+ return -EINVAL;
+
+ opfc = &cnts->opfcs[type];
+
+ if (enable) {
+ if (opfc->fc)
+ return -EEXIST;
+
+ if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type,
+ &in_use_opfc)) {
+ opfc->fc = in_use_opfc->fc;
+ opfc->rule[0] = in_use_opfc->rule[0];
+ return 0;
+ }
+
+ opfc->fc = mlx5_fc_create(dev->mdev, false);
+ if (IS_ERR(opfc->fc))
+ return PTR_ERR(opfc->fc);
+
+ ret = mlx5_ib_fs_add_op_fc(dev, port, opfc, type);
+ if (ret) {
+ mlx5_fc_destroy(dev->mdev, opfc->fc);
+ opfc->fc = NULL;
+ }
+ return ret;
+ }
+
+ if (!opfc->fc)
+ return -EINVAL;
+
+ if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type, &in_use_opfc))
+ goto out;
+
+ mlx5_ib_fs_remove_op_fc(dev, opfc, type);
+ mlx5_fc_destroy(dev->mdev, opfc->fc);
+out:
+ opfc->fc = NULL;
+ return 0;
+}
+
+static void mlx5_ib_counter_init(struct rdma_counter *counter)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+
+ xa_init(&mcounter->qpn_opfc_xa);
+}
+
+static const struct ib_device_ops hw_stats_ops = {
+ .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
+ .get_hw_stats = mlx5_ib_get_hw_stats,
+ .counter_bind_qp = mlx5_ib_counter_bind_qp,
+ .counter_unbind_qp = mlx5_ib_counter_unbind_qp,
+ .counter_dealloc = mlx5_ib_counter_dealloc,
+ .counter_alloc_stats = mlx5_ib_counter_alloc_stats,
+ .counter_update_stats = mlx5_ib_counter_update_stats,
+ .modify_hw_stat = mlx5_ib_modify_stat,
+ .counter_init = mlx5_ib_counter_init,
+
+ INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter),
+};
+
+static const struct ib_device_ops hw_switchdev_vport_op = {
+ .alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
+};
+
+static const struct ib_device_ops hw_switchdev_stats_ops = {
+ .alloc_hw_device_stats = mlx5_ib_alloc_hw_device_stats,
+ .get_hw_stats = mlx5_ib_get_hw_stats,
+ .counter_bind_qp = mlx5_ib_counter_bind_qp,
+ .counter_unbind_qp = mlx5_ib_counter_unbind_qp,
+ .counter_dealloc = mlx5_ib_counter_dealloc,
+ .counter_alloc_stats = mlx5_ib_counter_alloc_stats,
+ .counter_update_stats = mlx5_ib_counter_update_stats,
+ .counter_init = mlx5_ib_counter_init,
+
+ INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter),
+};
+
+static const struct ib_device_ops counters_ops = {
+ .create_counters = mlx5_ib_create_counters,
+ .destroy_counters = mlx5_ib_destroy_counters,
+ .read_counters = mlx5_ib_read_counters,
+
+ INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
+};
+
+int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
+{
+ ib_set_device_ops(&dev->ib_dev, &counters_ops);
+
+ if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ return 0;
+
+ if (is_mdev_switchdev_mode(dev->mdev)) {
+ ib_set_device_ops(&dev->ib_dev, &hw_switchdev_stats_ops);
+ if (vport_qcounters_supported(dev))
+ ib_set_device_ops(&dev->ib_dev, &hw_switchdev_vport_op);
+ } else
+ ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
+ return mlx5_ib_alloc_counters(dev);
+}
+
+void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ return;
+
+ mlx5_ib_dealloc_counters(dev);
+}
diff --git a/drivers/infiniband/hw/mlx5/counters.h b/drivers/infiniband/hw/mlx5/counters.h
new file mode 100644
index 000000000000..a04e7dd59455
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/counters.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_COUNTERS_H
+#define _MLX5_IB_COUNTERS_H
+
+#include "mlx5_ib.h"
+
+int mlx5_ib_counters_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev);
+void mlx5_ib_counters_clear_description(struct ib_counters *counters);
+int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
+ struct mlx5_ib_create_flow *ucmd);
+u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num);
+bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type,
+ struct mlx5_ib_op_fc **opfc);
+#endif /* _MLX5_IB_COUNTERS_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index b3ef47c3ab73..651d76bca114 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -35,8 +35,13 @@
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_cache.h>
#include "mlx5_ib.h"
+#include "srq.h"
+#include "qp.h"
-static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe)
{
struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
@@ -64,14 +69,9 @@ static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
}
}
-static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
-{
- return mlx5_buf_offset(&buf->buf, n * size);
-}
-
static void *get_cqe(struct mlx5_ib_cq *cq, int n)
{
- return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
+ return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
}
static u8 sw_ownership_bit(int n, int nent)
@@ -86,7 +86,7 @@ static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
- if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
+ if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
!((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
return cqe;
} else {
@@ -124,11 +124,13 @@ static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
case MLX5_OPCODE_RDMA_WRITE_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
+ fallthrough;
case MLX5_OPCODE_RDMA_WRITE:
wc->opcode = IB_WC_RDMA_WRITE;
break;
case MLX5_OPCODE_SEND_IMM:
wc->wc_flags |= IB_WC_WITH_IMM;
+ fallthrough;
case MLX5_OPCODE_SEND:
case MLX5_OPCODE_SEND_INVAL:
wc->opcode = IB_WC_SEND;
@@ -169,18 +171,20 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
{
enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
- struct mlx5_ib_srq *srq;
+ struct mlx5_ib_srq *srq = NULL;
struct mlx5_ib_wq *wq;
u16 wqe_ctr;
+ u8 roce_packet_type;
+ bool vlan_present;
u8 g;
if (qp->ibqp.srq || qp->ibqp.xrcd) {
struct mlx5_core_srq *msrq = NULL;
if (qp->ibqp.xrcd) {
- msrq = mlx5_core_get_srq(dev->mdev,
- be32_to_cpu(cqe->srqn));
- srq = to_mibsrq(msrq);
+ msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn));
+ if (msrq)
+ srq = to_mibsrq(msrq);
} else {
srq = to_msrq(qp->ibqp.srq);
}
@@ -188,8 +192,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wqe_ctr = be16_to_cpu(cqe->wqe_counter);
wc->wr_id = srq->wrid[wqe_ctr];
mlx5_ib_free_srq_wqe(srq, wqe_ctr);
- if (msrq && atomic_dec_and_test(&msrq->refcount))
- complete(&msrq->free);
+ if (msrq)
+ mlx5_core_res_put(&msrq->common);
}
} else {
wq = &qp->rq;
@@ -198,11 +202,11 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
}
wc->byte_len = be32_to_cpu(cqe->byte_cnt);
- switch (cqe->op_own >> 4) {
+ switch (get_cqe_opcode(cqe)) {
case MLX5_CQE_RESP_WR_IMM:
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data = cqe->imm_inval_pkey;
+ wc->ex.imm_data = cqe->immediate;
break;
case MLX5_CQE_RESP_SEND:
wc->opcode = IB_WC_RECV;
@@ -214,22 +218,20 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
case MLX5_CQE_RESP_SEND_IMM:
wc->opcode = IB_WC_RECV;
wc->wc_flags = IB_WC_WITH_IMM;
- wc->ex.imm_data = cqe->imm_inval_pkey;
+ wc->ex.imm_data = cqe->immediate;
break;
case MLX5_CQE_RESP_SEND_INV:
wc->opcode = IB_WC_RECV;
wc->wc_flags = IB_WC_WITH_INVALIDATE;
- wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
+ wc->ex.invalidate_rkey = be32_to_cpu(cqe->inval_rkey);
break;
}
- wc->slid = be16_to_cpu(cqe->slid);
- wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
wc->dlid_path_bits = cqe->ml_path;
g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
wc->wc_flags |= g ? IB_WC_GRH : 0;
- if (unlikely(is_qp1(qp->ibqp.qp_type))) {
- u16 pkey = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;
+ if (is_qp1(qp->type)) {
+ u16 pkey = be32_to_cpu(cqe->pkey) & 0xffff;
ib_find_cached_pkey(&dev->ib_dev, qp->port, pkey,
&wc->pkey_index);
@@ -237,12 +239,26 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wc->pkey_index = 0;
}
- if (ll != IB_LINK_LAYER_ETHERNET)
+ if (ll != IB_LINK_LAYER_ETHERNET) {
+ wc->slid = be16_to_cpu(cqe->slid);
+ wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
return;
+ }
- switch (wc->sl & 0x3) {
+ wc->slid = 0;
+ vlan_present = cqe->l4_l3_hdr_type & 0x1;
+ roce_packet_type = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3;
+ if (vlan_present) {
+ wc->vlan_id = (be16_to_cpu(cqe->vlan_info)) & 0xfff;
+ wc->sl = (be16_to_cpu(cqe->vlan_info) >> 13) & 0x7;
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+ } else {
+ wc->sl = 0;
+ }
+
+ switch (roce_packet_type) {
case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
- wc->network_hdr_type = RDMA_NETWORK_IB;
+ wc->network_hdr_type = RDMA_NETWORK_ROCE_V1;
break;
case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
wc->network_hdr_type = RDMA_NETWORK_IPV6;
@@ -254,23 +270,20 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
}
-static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
+static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe,
+ struct ib_wc *wc, const char *level)
{
- __be32 *p = (__be32 *)cqe;
- int i;
-
- mlx5_ib_warn(dev, "dump error cqe\n");
- for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4)
- pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]),
- be32_to_cpu(p[1]), be32_to_cpu(p[2]),
- be32_to_cpu(p[3]));
+ mlx5_ib_log(level, dev, "WC error: %d, Message: %s\n", wc->status,
+ ib_wc_status_msg(wc->status));
+ print_hex_dump(level, "cqe_dump: ", DUMP_PREFIX_OFFSET, 16, 1,
+ cqe, sizeof(*cqe), false);
}
static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
struct mlx5_err_cqe *cqe,
struct ib_wc *wc)
{
- int dump = 1;
+ const char *dump = KERN_WARNING;
switch (cqe->syndrome) {
case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
@@ -280,10 +293,11 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
wc->status = IB_WC_LOC_QP_OP_ERR;
break;
case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
+ dump = KERN_DEBUG;
wc->status = IB_WC_LOC_PROT_ERR;
break;
case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
- dump = 0;
+ dump = NULL;
wc->status = IB_WC_WR_FLUSH_ERR;
break;
case MLX5_CQE_SYNDROME_MW_BIND_ERR:
@@ -299,18 +313,20 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
wc->status = IB_WC_REM_INV_REQ_ERR;
break;
case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
+ dump = KERN_DEBUG;
wc->status = IB_WC_REM_ACCESS_ERR;
break;
case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
+ dump = KERN_DEBUG;
wc->status = IB_WC_REM_OP_ERR;
break;
case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
+ dump = NULL;
wc->status = IB_WC_RETRY_EXC_ERR;
- dump = 0;
break;
case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
+ dump = NULL;
wc->status = IB_WC_RNR_RETRY_EXC_ERR;
- dump = 0;
break;
case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
wc->status = IB_WC_REM_ABORT_ERR;
@@ -322,51 +338,7 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
wc->vendor_err = cqe->vendor_err_synd;
if (dump)
- dump_cqe(dev, cqe);
-}
-
-static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx)
-{
- /* TBD: waiting decision
- */
- return 0;
-}
-
-static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx)
-{
- struct mlx5_wqe_data_seg *dpseg;
- void *addr;
-
- dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) +
- sizeof(struct mlx5_wqe_raddr_seg) +
- sizeof(struct mlx5_wqe_atomic_seg);
- addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr);
- return addr;
-}
-
-static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
- uint16_t idx)
-{
- void *addr;
- int byte_count;
- int i;
-
- if (!is_atomic_response(qp, idx))
- return;
-
- byte_count = be32_to_cpu(cqe64->byte_cnt);
- addr = mlx5_get_atomic_laddr(qp, idx);
-
- if (byte_count == 4) {
- *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr));
- } else {
- for (i = 0; i < byte_count; i += 8) {
- *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr));
- addr += 8;
- }
- }
-
- return;
+ dump_cqe(dev, cqe, wc, dump);
}
static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
@@ -376,7 +348,6 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
do {
idx = tail & (qp->sq.wqe_cnt - 1);
- handle_atomic(qp, cqe64, idx);
if (idx == head)
break;
@@ -388,7 +359,7 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
{
- mlx5_buf_free(dev->mdev, &buf->buf);
+ mlx5_frag_buf_free(dev->mdev, &buf->frag_buf);
}
static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
@@ -423,16 +394,15 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
item->key = be32_to_cpu(cqe->mkey);
}
-static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries,
- struct ib_wc *wc, int *npolled)
+static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
+ int *npolled, bool is_send)
{
struct mlx5_ib_wq *wq;
unsigned int cur;
- unsigned int idx;
int np;
int i;
- wq = &qp->sq;
+ wq = (is_send) ? &qp->sq : &qp->rq;
cur = wq->head - wq->tail;
np = *npolled;
@@ -440,39 +410,16 @@ static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries,
return;
for (i = 0; i < cur && np < num_entries; i++) {
- idx = wq->last_poll & (wq->wqe_cnt - 1);
- wc->wr_id = wq->wrid[idx];
- wc->status = IB_WC_WR_FLUSH_ERR;
- wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
- wq->tail++;
- np++;
- wc->qp = &qp->ibqp;
- wc++;
- wq->last_poll = wq->w_list[idx].next;
- }
- *npolled = np;
-}
-
-static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries,
- struct ib_wc *wc, int *npolled)
-{
- struct mlx5_ib_wq *wq;
- unsigned int cur;
- int np;
- int i;
-
- wq = &qp->rq;
- cur = wq->head - wq->tail;
- np = *npolled;
+ unsigned int idx;
- if (cur == 0)
- return;
-
- for (i = 0; i < cur && np < num_entries; i++) {
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ idx = (is_send) ? wq->last_poll : wq->tail;
+ idx &= (wq->wqe_cnt - 1);
+ wc->wr_id = wq->wrid[idx];
wc->status = IB_WC_WR_FLUSH_ERR;
wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
wq->tail++;
+ if (is_send)
+ wq->last_poll = wq->w_list[idx].next;
np++;
wc->qp = &qp->ibqp;
wc++;
@@ -486,15 +433,15 @@ static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
struct mlx5_ib_qp *qp;
*npolled = 0;
- /* Find uncompleted WQEs belonging to that cq and retrun mmics ones */
+ /* Find uncompleted WQEs belonging to that cq and return mmics ones */
list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
- sw_send_comp(qp, num_entries, wc + *npolled, npolled);
+ sw_comp(qp, num_entries, wc + *npolled, npolled, true);
if (*npolled >= num_entries)
return;
}
list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
- sw_recv_comp(qp, num_entries, wc + *npolled, npolled);
+ sw_comp(qp, num_entries, wc + *npolled, npolled, false);
if (*npolled >= num_entries)
return;
}
@@ -509,9 +456,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_cqe64 *cqe64;
struct mlx5_core_qp *mqp;
struct mlx5_ib_wq *wq;
- struct mlx5_sig_err_cqe *sig_err_cqe;
- struct mlx5_core_mkey *mmkey;
- struct mlx5_ib_mr *mr;
uint8_t opcode;
uint32_t qpn;
u16 wqe_ctr;
@@ -532,7 +476,7 @@ repoll:
*/
rmb();
- opcode = cqe64->op_own >> 4;
+ opcode = get_cqe_opcode(cqe64);
if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
if (likely(cq->resize_buf)) {
free_cq_buf(dev, &cq->buf);
@@ -546,12 +490,12 @@ repoll:
}
qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
- if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
+ if (!*cur_qp || (qpn != (*cur_qp)->trans_qp.base.mqp.qpn)) {
/* We do not have to take the QP table lock here,
* because CQs will be locked while QPs are removed
* from the table.
*/
- mqp = __mlx5_qp_lookup(dev->mdev, qpn);
+ mqp = radix_tree_lookup(&dev->qp_table.tree, qpn);
*cur_qp = to_mibqp(mqp);
}
@@ -585,6 +529,10 @@ repoll:
"Requestor" : "Responder", cq->mcq.cqn);
mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
err_cqe->syndrome, err_cqe->vendor_err_synd);
+ if (wc->status != IB_WC_WR_FLUSH_ERR &&
+ (*cur_qp)->type == MLX5_IB_QPT_REG_UMR)
+ dev->umrc.state = MLX5_UMR_STATE_RECOVER;
+
if (opcode == MLX5_CQE_REQ_ERR) {
wq = &(*cur_qp)->sq;
wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
@@ -606,33 +554,35 @@ repoll:
}
}
break;
- case MLX5_CQE_SIG_ERR:
- sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
-
- read_lock(&dev->mdev->priv.mkey_table.lock);
- mmkey = __mlx5_mr_lookup(dev->mdev,
- mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
- mr = to_mibmr(mmkey);
- get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
- mr->sig->sig_err_exists = true;
- mr->sig->sigerr_count++;
+ case MLX5_CQE_SIG_ERR: {
+ struct mlx5_sig_err_cqe *sig_err_cqe =
+ (struct mlx5_sig_err_cqe *)cqe64;
+ struct mlx5_core_sig_ctx *sig;
+
+ xa_lock(&dev->sig_mrs);
+ sig = xa_load(&dev->sig_mrs,
+ mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
+ get_sig_err_item(sig_err_cqe, &sig->err_item);
+ sig->sig_err_exists = true;
+ sig->sigerr_count++;
mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
- cq->mcq.cqn, mr->sig->err_item.key,
- mr->sig->err_item.err_type,
- mr->sig->err_item.sig_err_offset,
- mr->sig->err_item.expected,
- mr->sig->err_item.actual);
+ cq->mcq.cqn, sig->err_item.key,
+ sig->err_item.err_type,
+ sig->err_item.sig_err_offset,
+ sig->err_item.expected,
+ sig->err_item.actual);
- read_unlock(&dev->mdev->priv.mkey_table.lock);
+ xa_unlock(&dev->sig_mrs);
goto repoll;
}
+ }
return 0;
}
static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries,
- struct ib_wc *wc)
+ struct ib_wc *wc, bool is_fatal_err)
{
struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
struct mlx5_ib_wc *soft_wc, *next;
@@ -645,6 +595,10 @@ static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries,
mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n",
cq->mcq.cqn);
+ if (unlikely(is_fatal_err)) {
+ soft_wc->wc.status = IB_WC_WR_FLUSH_ERR;
+ soft_wc->wc.vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
+ }
wc[npolled++] = soft_wc->wc;
list_del(&soft_wc->list);
kfree(soft_wc);
@@ -665,12 +619,17 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
spin_lock_irqsave(&cq->lock, flags);
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
- mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled);
+ /* make sure no soft wqe's are waiting */
+ if (unlikely(!list_empty(&cq->wc_list)))
+ soft_polled = poll_soft_wc(cq, num_entries, wc, true);
+
+ mlx5_ib_poll_sw_comp(cq, num_entries - soft_polled,
+ wc + soft_polled, &npolled);
goto out;
}
if (unlikely(!list_empty(&cq->wc_list)))
- soft_polled = poll_soft_wc(cq, num_entries, wc);
+ soft_polled = poll_soft_wc(cq, num_entries, wc, false);
for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))
@@ -689,7 +648,7 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
struct mlx5_ib_cq *cq = to_mcq(ibcq);
- void __iomem *uar_page = mdev->priv.uuari.uars[0].map;
+ void __iomem *uar_page = mdev->priv.bfreg.up->map;
unsigned long irq_flags;
int ret = 0;
@@ -704,146 +663,222 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
mlx5_cq_arm(&cq->mcq,
(flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
- uar_page,
- MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock),
- to_mcq(ibcq)->mcq.cons_index);
+ uar_page, to_mcq(ibcq)->mcq.cons_index);
return ret;
}
-static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
- int nent, int cqe_size)
+static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_cq_buf *buf,
+ int nent,
+ int cqe_size)
{
+ struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
+ u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0);
+ u8 log_wq_sz = ilog2(cqe_size);
int err;
- err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, &buf->buf);
+ err = mlx5_frag_buf_alloc_node(dev->mdev,
+ nent * cqe_size,
+ frag_buf,
+ dev->mdev->priv.numa_node);
if (err)
return err;
+ mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
+
buf->cqe_size = cqe_size;
buf->nent = nent;
return 0;
}
+enum {
+ MLX5_CQE_RES_FORMAT_HASH = 0,
+ MLX5_CQE_RES_FORMAT_CSUM = 1,
+ MLX5_CQE_RES_FORMAT_CSUM_STRIDX = 3,
+};
+
+static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format)
+{
+ switch (format) {
+ case MLX5_IB_CQE_RES_FORMAT_HASH:
+ return MLX5_CQE_RES_FORMAT_HASH;
+ case MLX5_IB_CQE_RES_FORMAT_CSUM:
+ return MLX5_CQE_RES_FORMAT_CSUM;
+ case MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX:
+ if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index))
+ return MLX5_CQE_RES_FORMAT_CSUM_STRIDX;
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
+}
+
static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
- struct ib_ucontext *context, struct mlx5_ib_cq *cq,
- int entries, u32 **cqb,
- int *cqe_size, int *index, int *inlen)
+ struct mlx5_ib_cq *cq, int entries, u32 **cqb,
+ int *cqe_size, int *index, int *inlen,
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_create_cq ucmd = {};
+ unsigned long page_size;
+ unsigned int page_offset_quantized;
size_t ucmdlen;
- int page_shift;
__be64 *pas;
- int npages;
int ncont;
void *cqc;
int err;
+ struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
- ucmdlen =
- (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
- sizeof(ucmd)) ? (sizeof(ucmd) -
- sizeof(ucmd.reserved)) : sizeof(ucmd);
+ ucmdlen = min(udata->inlen, sizeof(ucmd));
+ if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags))
+ return -EINVAL;
if (ib_copy_from_udata(&ucmd, udata, ucmdlen))
return -EFAULT;
- if (ucmdlen == sizeof(ucmd) &&
- ucmd.reserved != 0)
+ if ((ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD |
+ MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX |
+ MLX5_IB_CREATE_CQ_FLAGS_REAL_TIME_TS)))
return -EINVAL;
- if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
+ if ((ucmd.cqe_size != 64 && ucmd.cqe_size != 128) ||
+ ucmd.reserved0 || ucmd.reserved1)
return -EINVAL;
*cqe_size = ucmd.cqe_size;
- cq->buf.umem = ib_umem_get(context, ucmd.buf_addr,
- entries * ucmd.cqe_size,
- IB_ACCESS_LOCAL_WRITE, 1);
+ cq->buf.umem =
+ ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
+ entries * ucmd.cqe_size, IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(cq->buf.umem)) {
err = PTR_ERR(cq->buf.umem);
return err;
}
- err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
- &cq->db);
+ page_size = mlx5_umem_find_best_cq_quantized_pgoff(
+ cq->buf.umem, cqc, log_page_size, MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64, &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto err_umem;
+ }
+
+ err = mlx5_ib_db_map_user(context, ucmd.db_addr, &cq->db);
if (err)
goto err_umem;
- mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift,
- &ncont, NULL);
- mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
- ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
+ ncont = ib_umem_num_dma_blocks(cq->buf.umem, page_size);
+ mlx5_ib_dbg(
+ dev,
+ "addr 0x%llx, size %u, npages %zu, page_size %lu, ncont %d\n",
+ ucmd.buf_addr, entries * ucmd.cqe_size,
+ ib_umem_num_pages(cq->buf.umem), page_size, ncont);
*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
- *cqb = mlx5_vzalloc(*inlen);
+ *cqb = kvzalloc(*inlen, GFP_KERNEL);
if (!*cqb) {
err = -ENOMEM;
goto err_db;
}
pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
- mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0);
+ mlx5_ib_populate_pas(cq->buf.umem, page_size, pas, 0);
cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
MLX5_SET(cqc, cqc, log_page_size,
- page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(cqc, cqc, page_offset, page_offset_quantized);
- *index = to_mucontext(context)->uuari.uars[0].index;
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX)) {
+ err = uverbs_copy_from(index, attrs, MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX);
+ if (err)
+ goto err_cqb;
+ } else if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
+ *index = ucmd.uar_page_index;
+ } else if (context->bfregi.lib_uar_dyn) {
+ err = -EINVAL;
+ goto err_cqb;
+ } else {
+ *index = context->bfregi.sys_pages[0];
+ }
if (ucmd.cqe_comp_en == 1) {
- if (unlikely((*cqe_size != 64) ||
- !MLX5_CAP_GEN(dev->mdev, cqe_compression))) {
+ int mini_cqe_format;
+
+ if (!((*cqe_size == 128 &&
+ MLX5_CAP_GEN(dev->mdev, cqe_compression_128)) ||
+ (*cqe_size == 64 &&
+ MLX5_CAP_GEN(dev->mdev, cqe_compression)))) {
err = -EOPNOTSUPP;
mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n",
*cqe_size);
goto err_cqb;
}
- if (unlikely(!ucmd.cqe_comp_res_format ||
- !(ucmd.cqe_comp_res_format <
- MLX5_IB_CQE_RES_RESERVED) ||
- (ucmd.cqe_comp_res_format &
- (ucmd.cqe_comp_res_format - 1)))) {
- err = -EOPNOTSUPP;
- mlx5_ib_warn(dev, "CQE compression res format %d is not supported!\n",
- ucmd.cqe_comp_res_format);
+ mini_cqe_format =
+ mini_cqe_res_format_to_hw(dev,
+ ucmd.cqe_comp_res_format);
+ if (mini_cqe_format < 0) {
+ err = mini_cqe_format;
+ mlx5_ib_dbg(dev, "CQE compression res format %d error: %d\n",
+ ucmd.cqe_comp_res_format, err);
goto err_cqb;
}
MLX5_SET(cqc, cqc, cqe_comp_en, 1);
- MLX5_SET(cqc, cqc, mini_cqe_res_format,
- ilog2(ucmd.cqe_comp_res_format));
+ MLX5_SET(cqc, cqc, mini_cqe_res_format, mini_cqe_format);
}
+ if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD) {
+ if (*cqe_size != 128 ||
+ !MLX5_CAP_GEN(dev->mdev, cqe_128_always)) {
+ err = -EOPNOTSUPP;
+ mlx5_ib_warn(dev,
+ "CQE padding is not supported for CQE size of %dB!\n",
+ *cqe_size);
+ goto err_cqb;
+ }
+
+ cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD;
+ }
+
+ if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_REAL_TIME_TS)
+ cq->private_flags |= MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS;
+
+ MLX5_SET(create_cq_in, *cqb, uid, context->devx_uid);
return 0;
err_cqb:
- kfree(cqb);
+ kvfree(*cqb);
err_db:
- mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
+ mlx5_ib_db_unmap_user(context, &cq->db);
err_umem:
ib_umem_release(cq->buf.umem);
return err;
}
-static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
+static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata)
{
- mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
+ struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
+
+ mlx5_ib_db_unmap_user(context, &cq->db);
ib_umem_release(cq->buf.umem);
}
-static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
+static void init_cq_frag_buf(struct mlx5_ib_cq_buf *buf)
{
int i;
void *cqe;
struct mlx5_cqe64 *cqe64;
for (i = 0; i < buf->nent; i++) {
- cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
+ cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i);
cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
cqe64->op_own = MLX5_CQE_INVALID << 4;
}
@@ -865,28 +900,30 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
cq->mcq.arm_db = cq->db.db + 1;
cq->mcq.cqe_sz = cqe_size;
- err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
+ err = alloc_cq_frag_buf(dev, &cq->buf, entries, cqe_size);
if (err)
goto err_db;
- init_cq_buf(cq, &cq->buf);
+ init_cq_frag_buf(&cq->buf);
*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
- MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
- *cqb = mlx5_vzalloc(*inlen);
+ MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
+ cq->buf.frag_buf.npages;
+ *cqb = kvzalloc(*inlen, GFP_KERNEL);
if (!*cqb) {
err = -ENOMEM;
goto err_buf;
}
pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
- mlx5_fill_page_array(&cq->buf.buf, pas);
+ mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas);
cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
MLX5_SET(cqc, cqc, log_page_size,
- cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ cq->buf.frag_buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
- *index = dev->mdev->priv.uuari.uars[0].index;
+ *index = dev->mdev->priv.bfreg.up->index;
return 0;
@@ -912,38 +949,34 @@ static void notify_soft_wc_handler(struct work_struct *work)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
-struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_cq *cq;
- int uninitialized_var(index);
- int uninitialized_var(inlen);
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+ int index;
+ int inlen;
u32 *cqb = NULL;
void *cqc;
int cqe_size;
- unsigned int irqn;
int eqn;
int err;
if (entries < 0 ||
(entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))))
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (check_cq_create_flags(attr->flags))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
entries = roundup_pow_of_two(entries + 1);
if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
- return ERR_PTR(-EINVAL);
-
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
cq->ibcq.cqe = entries - 1;
mutex_init(&cq->resize_mutex);
@@ -954,51 +987,56 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
INIT_LIST_HEAD(&cq->list_send_qp);
INIT_LIST_HEAD(&cq->list_recv_qp);
- if (context) {
- err = create_cq_user(dev, udata, context, cq, entries,
- &cqb, &cqe_size, &index, &inlen);
+ if (udata) {
+ err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size,
+ &index, &inlen, attrs);
if (err)
- goto err_create;
+ return err;
} else {
cqe_size = cache_line_size() == 128 ? 128 : 64;
err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
&index, &inlen);
if (err)
- goto err_create;
+ return err;
INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
}
- err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
+ err = mlx5_comp_eqn_get(dev->mdev, vector, &eqn);
if (err)
goto err_cqb;
cq->cqe_size = cqe_size;
cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context);
- MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
+ MLX5_SET(cqc, cqc, cqe_sz,
+ cqe_sz_to_mlx_sz(cqe_size,
+ cq->private_flags &
+ MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
MLX5_SET(cqc, cqc, uar_page, index);
- MLX5_SET(cqc, cqc, c_eqn, eqn);
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
- if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
+ if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
MLX5_SET(cqc, cqc, oi, 1);
- err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen);
+ if (udata) {
+ cq->mcq.comp = mlx5_add_cq_to_tasklet;
+ cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
+ } else {
+ cq->mcq.comp = mlx5_ib_cq_comp;
+ }
+
+ err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out));
if (err)
goto err_cqb;
mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
- cq->mcq.irqn = irqn;
- if (context)
- cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
- else
- cq->mcq.comp = mlx5_ib_cq_comp;
cq->mcq.event = mlx5_ib_cq_event;
INIT_LIST_HEAD(&cq->wc_list);
- if (context)
+ if (udata)
if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
err = -EFAULT;
goto err_cmd;
@@ -1006,42 +1044,45 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
kvfree(cqb);
- return &cq->ibcq;
+ return 0;
err_cmd:
mlx5_core_destroy_cq(dev->mdev, &cq->mcq);
err_cqb:
kvfree(cqb);
- if (context)
- destroy_cq_user(cq, context);
+ if (udata)
+ destroy_cq_user(cq, udata);
else
destroy_cq_kernel(dev, cq);
-
-err_create:
- kfree(cq);
-
- return ERR_PTR(err);
+ return err;
}
-
-int mlx5_ib_destroy_cq(struct ib_cq *cq)
+int mlx5_ib_pre_destroy_cq(struct ib_cq *cq)
{
struct mlx5_ib_dev *dev = to_mdev(cq->device);
struct mlx5_ib_cq *mcq = to_mcq(cq);
- struct ib_ucontext *context = NULL;
- if (cq->uobject)
- context = cq->uobject->context;
+ return mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
+}
- mlx5_core_destroy_cq(dev->mdev, &mcq->mcq);
- if (context)
- destroy_cq_user(mcq, context);
- else
- destroy_cq_kernel(dev, mcq);
+void mlx5_ib_post_destroy_cq(struct ib_cq *cq)
+{
+ destroy_cq_kernel(to_mdev(cq->device), to_mcq(cq));
+}
+
+int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
+{
+ int ret;
- kfree(mcq);
+ ret = mlx5_ib_pre_destroy_cq(cq);
+ if (ret)
+ return ret;
+ if (udata)
+ destroy_cq_user(to_mcq(cq), udata);
+ else
+ mlx5_ib_post_destroy_cq(cq);
return 0;
}
@@ -1118,7 +1159,10 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
int err;
if (!MLX5_CAP_GEN(dev->mdev, cq_moderation))
- return -ENOSYS;
+ return -EOPNOTSUPP;
+
+ if (cq_period > MLX5_MAX_CQ_PERIOD)
+ return -EINVAL;
err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq,
cq_period, cq_count);
@@ -1129,14 +1173,12 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
}
static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
- int entries, struct ib_udata *udata, int *npas,
- int *page_shift, int *cqe_size)
+ int entries, struct ib_udata *udata,
+ int *cqe_size)
{
struct mlx5_ib_resize_cq ucmd;
struct ib_umem *umem;
int err;
- int npages;
- struct ib_ucontext *context = cq->buf.umem->context;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
if (err)
@@ -1145,27 +1187,24 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
if (ucmd.reserved0 || ucmd.reserved1)
return -EINVAL;
- umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
- IB_ACCESS_LOCAL_WRITE, 1);
+ /* check multiplication overflow */
+ if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
+ return -EINVAL;
+
+ umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr,
+ (size_t)ucmd.cqe_size * entries,
+ IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem)) {
err = PTR_ERR(umem);
return err;
}
- mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift,
- npas, NULL);
-
cq->resize_umem = umem;
*cqe_size = ucmd.cqe_size;
return 0;
}
-static void un_resize_user(struct mlx5_ib_cq *cq)
-{
- ib_umem_release(cq->resize_umem);
-}
-
static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
int entries, int cqe_size)
{
@@ -1175,11 +1214,11 @@ static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
if (!cq->resize_buf)
return -ENOMEM;
- err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
+ err = alloc_cq_frag_buf(dev, cq->resize_buf, entries, cqe_size);
if (err)
goto ex;
- init_cq_buf(cq, cq->resize_buf);
+ init_cq_frag_buf(cq->resize_buf);
return 0;
@@ -1188,12 +1227,6 @@ ex:
return err;
}
-static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
-{
- free_cq_buf(dev, cq->resize_buf);
- cq->resize_buf = NULL;
-}
-
static int copy_resize_cqes(struct mlx5_ib_cq *cq)
{
struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
@@ -1223,10 +1256,9 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq)
return -EINVAL;
}
- while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
- dcqe = get_cqe_from_buf(cq->resize_buf,
- (i + 1) & (cq->resize_buf->nent),
- dsize);
+ while (get_cqe_opcode(scqe64) != MLX5_CQE_RESIZE_CQ) {
+ dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
+ (i + 1) & cq->resize_buf->nent);
dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
memcpy(dcqe, scqe, dsize);
@@ -1259,9 +1291,10 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
int err;
int npas;
__be64 *pas;
- int page_shift;
+ unsigned int page_offset_quantized = 0;
+ unsigned int page_shift;
int inlen;
- int uninitialized_var(cqe_size);
+ int cqe_size;
unsigned long flags;
if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) {
@@ -1286,24 +1319,38 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
mutex_lock(&cq->resize_mutex);
if (udata) {
- err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
- &cqe_size);
+ unsigned long page_size;
+
+ err = resize_user(dev, cq, entries, udata, &cqe_size);
+ if (err)
+ goto ex;
+
+ page_size = mlx5_umem_find_best_cq_quantized_pgoff(
+ cq->resize_umem, cqc, log_page_size,
+ MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64,
+ &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto ex_resize;
+ }
+ npas = ib_umem_num_dma_blocks(cq->resize_umem, page_size);
+ page_shift = order_base_2(page_size);
} else {
+ struct mlx5_frag_buf *frag_buf;
+
cqe_size = 64;
err = resize_kernel(dev, cq, entries, cqe_size);
- if (!err) {
- npas = cq->resize_buf->buf.npages;
- page_shift = cq->resize_buf->buf.page_shift;
- }
+ if (err)
+ goto ex;
+ frag_buf = &cq->resize_buf->frag_buf;
+ npas = frag_buf->npages;
+ page_shift = frag_buf->page_shift;
}
- if (err)
- goto ex;
-
inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto ex_resize;
@@ -1311,10 +1358,10 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas);
if (udata)
- mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
- pas, 0);
+ mlx5_ib_populate_pas(cq->resize_umem, 1UL << page_shift, pas,
+ 0);
else
- mlx5_fill_page_array(&cq->resize_buf->buf, pas);
+ mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas);
MLX5_SET(modify_cq_in, in,
modify_field_select_resize_field_select.resize_field_select.resize_field_select,
@@ -1326,7 +1373,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
MLX5_SET(cqc, cqc, log_page_size,
page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
+ MLX5_SET(cqc, cqc, page_offset, page_offset_quantized);
+ MLX5_SET(cqc, cqc, cqe_sz,
+ cqe_sz_to_mlx_sz(cqe_size,
+ cq->private_flags &
+ MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE);
@@ -1370,16 +1421,17 @@ ex_alloc:
kvfree(in);
ex_resize:
- if (udata)
- un_resize_user(cq);
- else
- un_resize_kernel(dev, cq);
+ ib_umem_release(cq->resize_umem);
+ if (!udata) {
+ free_cq_buf(dev, cq->resize_buf);
+ cq->resize_buf = NULL;
+ }
ex:
mutex_unlock(&cq->resize_mutex);
return err;
}
-int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
+int mlx5_ib_get_cqe_size(struct ib_cq *ibcq)
{
struct mlx5_ib_cq *cq;
@@ -1413,3 +1465,17 @@ int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc)
return 0;
}
+
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_cq_create,
+ UVERBS_OBJECT_CQ,
+ UVERBS_METHOD_CQ_CREATE,
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
+
+const struct uapi_definition mlx5_ib_create_cq_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_CQ, &mlx5_ib_cq_create),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/data_direct.c b/drivers/infiniband/hw/mlx5/data_direct.c
new file mode 100644
index 000000000000..b81ac5709b56
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/data_direct.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include "mlx5_ib.h"
+#include "data_direct.h"
+
+static LIST_HEAD(mlx5_data_direct_dev_list);
+static LIST_HEAD(mlx5_data_direct_reg_list);
+
+/*
+ * This mutex should be held when accessing either of the above lists
+ */
+static DEFINE_MUTEX(mlx5_data_direct_mutex);
+
+struct mlx5_data_direct_registration {
+ struct mlx5_ib_dev *ibdev;
+ char vuid[MLX5_ST_SZ_BYTES(array1024_auto) + 1];
+ struct list_head list;
+};
+
+static const struct pci_device_id mlx5_data_direct_pci_table[] = {
+ { PCI_VDEVICE(MELLANOX, 0x2100) }, /* ConnectX-8 Data Direct */
+ { 0, }
+};
+
+static int mlx5_data_direct_vpd_get_vuid(struct mlx5_data_direct_dev *dev)
+{
+ struct pci_dev *pdev = dev->pdev;
+ unsigned int vpd_size, kw_len;
+ u8 *vpd_data;
+ int start;
+ int ret;
+
+ vpd_data = pci_vpd_alloc(pdev, &vpd_size);
+ if (IS_ERR(vpd_data)) {
+ pci_err(pdev, "Unable to read VPD, err=%pe\n", vpd_data);
+ return PTR_ERR(vpd_data);
+ }
+
+ start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, "VU", &kw_len);
+ if (start < 0) {
+ ret = start;
+ pci_err(pdev, "VU keyword not found, err=%d\n", ret);
+ goto end;
+ }
+
+ dev->vuid = kmemdup_nul(vpd_data + start, kw_len, GFP_KERNEL);
+ ret = dev->vuid ? 0 : -ENOMEM;
+
+end:
+ kfree(vpd_data);
+ return ret;
+}
+
+static void mlx5_data_direct_shutdown(struct pci_dev *pdev)
+{
+ pci_disable_device(pdev);
+}
+
+static int mlx5_data_direct_set_dma_caps(struct pci_dev *pdev)
+{
+ int err;
+
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_warn(&pdev->dev,
+ "Warning: couldn't set 64-bit PCI DMA mask, err=%d\n", err);
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev, "Can't set PCI DMA mask, err=%d\n", err);
+ return err;
+ }
+ }
+
+ dma_set_max_seg_size(&pdev->dev, SZ_2G);
+ return 0;
+}
+
+int mlx5_data_direct_ib_reg(struct mlx5_ib_dev *ibdev, char *vuid)
+{
+ struct mlx5_data_direct_registration *reg;
+ struct mlx5_data_direct_dev *dev;
+
+ reg = kzalloc(sizeof(*reg), GFP_KERNEL);
+ if (!reg)
+ return -ENOMEM;
+
+ reg->ibdev = ibdev;
+ strcpy(reg->vuid, vuid);
+
+ mutex_lock(&mlx5_data_direct_mutex);
+ list_for_each_entry(dev, &mlx5_data_direct_dev_list, list) {
+ if (strcmp(dev->vuid, vuid) == 0) {
+ mlx5_ib_data_direct_bind(ibdev, dev);
+ break;
+ }
+ }
+
+ /* Add the registration to its global list, to be used upon bind/unbind
+ * of its affiliated data direct device
+ */
+ list_add_tail(&reg->list, &mlx5_data_direct_reg_list);
+ mutex_unlock(&mlx5_data_direct_mutex);
+ return 0;
+}
+
+void mlx5_data_direct_ib_unreg(struct mlx5_ib_dev *ibdev)
+{
+ struct mlx5_data_direct_registration *reg;
+
+ mutex_lock(&mlx5_data_direct_mutex);
+ list_for_each_entry(reg, &mlx5_data_direct_reg_list, list) {
+ if (reg->ibdev == ibdev) {
+ list_del(&reg->list);
+ kfree(reg);
+ goto end;
+ }
+ }
+
+ WARN_ON(true);
+end:
+ mutex_unlock(&mlx5_data_direct_mutex);
+}
+
+static void mlx5_data_direct_dev_reg(struct mlx5_data_direct_dev *dev)
+{
+ struct mlx5_data_direct_registration *reg;
+
+ mutex_lock(&mlx5_data_direct_mutex);
+ list_for_each_entry(reg, &mlx5_data_direct_reg_list, list) {
+ if (strcmp(dev->vuid, reg->vuid) == 0)
+ mlx5_ib_data_direct_bind(reg->ibdev, dev);
+ }
+
+ /* Add the data direct device to the global list, further IB devices may
+ * use it later as well
+ */
+ list_add_tail(&dev->list, &mlx5_data_direct_dev_list);
+ mutex_unlock(&mlx5_data_direct_mutex);
+}
+
+static void mlx5_data_direct_dev_unreg(struct mlx5_data_direct_dev *dev)
+{
+ struct mlx5_data_direct_registration *reg;
+
+ mutex_lock(&mlx5_data_direct_mutex);
+ /* Prevent any further affiliations */
+ list_del(&dev->list);
+ list_for_each_entry(reg, &mlx5_data_direct_reg_list, list) {
+ if (strcmp(dev->vuid, reg->vuid) == 0)
+ mlx5_ib_data_direct_unbind(reg->ibdev);
+ }
+ mutex_unlock(&mlx5_data_direct_mutex);
+}
+
+static int mlx5_data_direct_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct mlx5_data_direct_dev *dev;
+ int err;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+
+ dev->device = &pdev->dev;
+ dev->pdev = pdev;
+
+ pci_set_drvdata(dev->pdev, dev);
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(dev->device, "Cannot enable PCI device, err=%d\n", err);
+ goto err;
+ }
+
+ pci_set_master(pdev);
+ err = mlx5_data_direct_set_dma_caps(pdev);
+ if (err)
+ goto err_disable;
+
+ if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) &&
+ pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) &&
+ pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128))
+ dev_dbg(dev->device, "Enabling pci atomics failed\n");
+
+ err = mlx5_data_direct_vpd_get_vuid(dev);
+ if (err)
+ goto err_disable;
+
+ mlx5_data_direct_dev_reg(dev);
+ return 0;
+
+err_disable:
+ pci_disable_device(pdev);
+err:
+ kfree(dev);
+ return err;
+}
+
+static void mlx5_data_direct_remove(struct pci_dev *pdev)
+{
+ struct mlx5_data_direct_dev *dev = pci_get_drvdata(pdev);
+
+ mlx5_data_direct_dev_unreg(dev);
+ pci_disable_device(pdev);
+ kfree(dev->vuid);
+ kfree(dev);
+}
+
+static struct pci_driver mlx5_data_direct_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = mlx5_data_direct_pci_table,
+ .probe = mlx5_data_direct_probe,
+ .remove = mlx5_data_direct_remove,
+ .shutdown = mlx5_data_direct_shutdown,
+};
+
+int mlx5_data_direct_driver_register(void)
+{
+ return pci_register_driver(&mlx5_data_direct_driver);
+}
+
+void mlx5_data_direct_driver_unregister(void)
+{
+ pci_unregister_driver(&mlx5_data_direct_driver);
+}
diff --git a/drivers/infiniband/hw/mlx5/data_direct.h b/drivers/infiniband/hw/mlx5/data_direct.h
new file mode 100644
index 000000000000..2fd2bdbe8f69
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/data_direct.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#ifndef _MLX5_IB_DATA_DIRECT_H
+#define _MLX5_IB_DATA_DIRECT_H
+
+struct mlx5_ib_dev;
+
+struct mlx5_data_direct_dev {
+ struct device *device;
+ struct pci_dev *pdev;
+ char *vuid;
+ struct list_head list;
+};
+
+int mlx5_data_direct_ib_reg(struct mlx5_ib_dev *ibdev, char *vuid);
+void mlx5_data_direct_ib_unreg(struct mlx5_ib_dev *ibdev);
+int mlx5_data_direct_driver_register(void);
+void mlx5_data_direct_driver_unregister(void);
+
+#endif
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
new file mode 100644
index 000000000000..d31d7f3005c6
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -0,0 +1,3242 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/uverbs_std_types.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include <rdma/ib_ucaps.h>
+#include "mlx5_ib.h"
+#include "devx.h"
+#include "qp.h"
+#include <linux/xarray.h>
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static void dispatch_event_fd(struct list_head *fd_list, const void *data);
+
+enum devx_obj_flags {
+ DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0,
+ DEVX_OBJ_FLAGS_DCT = 1 << 1,
+ DEVX_OBJ_FLAGS_CQ = 1 << 2,
+ DEVX_OBJ_FLAGS_HW_FREED = 1 << 3,
+};
+
+#define MAX_ASYNC_CMDS 8
+
+struct mlx5_async_cmd {
+ struct ib_uobject *uobject;
+ void *in;
+ int in_size;
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ int err;
+ struct mlx5_async_work cb_work;
+ struct completion comp;
+};
+
+struct devx_async_data {
+ struct mlx5_ib_dev *mdev;
+ struct list_head list;
+ struct devx_async_cmd_event_file *ev_file;
+ struct mlx5_async_work cb_work;
+ u16 cmd_out_len;
+ /* must be last field in this structure */
+ struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
+};
+
+struct devx_async_event_data {
+ struct list_head list; /* headed in ev_file->event_list */
+ struct mlx5_ib_uapi_devx_async_event_hdr hdr;
+};
+
+/* first level XA value data structure */
+struct devx_event {
+ struct xarray object_ids; /* second XA level, Key = object id */
+ struct list_head unaffiliated_list;
+};
+
+/* second level XA value data structure */
+struct devx_obj_event {
+ struct rcu_head rcu;
+ struct list_head obj_sub_list;
+};
+
+struct devx_event_subscription {
+ struct list_head file_list; /* headed in ev_file->
+ * subscribed_events_list
+ */
+ struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
+ * devx_obj_event->obj_sub_list
+ */
+ struct list_head obj_list; /* headed in devx_object */
+ struct list_head event_list; /* headed in ev_file->event_list or in
+ * temp list via subscription
+ */
+
+ u8 is_cleaned:1;
+ u32 xa_key_level1;
+ u32 xa_key_level2;
+ struct rcu_head rcu;
+ u64 cookie;
+ struct devx_async_event_file *ev_file;
+ struct eventfd_ctx *eventfd;
+};
+
+struct devx_async_event_file {
+ struct ib_uobject uobj;
+ /* Head of events that are subscribed to this FD */
+ struct list_head subscribed_events_list;
+ spinlock_t lock;
+ wait_queue_head_t poll_wait;
+ struct list_head event_list;
+ struct mlx5_ib_dev *dev;
+ u8 omit_data:1;
+ u8 is_overflow_err:1;
+ u8 is_destroyed:1;
+};
+
+struct devx_umem {
+ struct mlx5_core_dev *mdev;
+ struct ib_umem *umem;
+ u32 dinlen;
+ u32 dinbox[MLX5_ST_SZ_DW(destroy_umem_in)];
+};
+
+struct devx_umem_reg_cmd {
+ void *in;
+ u32 inlen;
+ u32 out[MLX5_ST_SZ_DW(create_umem_out)];
+};
+
+static struct mlx5_ib_ucontext *
+devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
+{
+ return to_mucontext(ib_uverbs_get_ucontext(attrs));
+}
+
+static int set_uctx_ucaps(struct mlx5_ib_dev *dev, u64 req_ucaps, u32 *cap)
+{
+ if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_LOCAL)) {
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
+ *cap |= MLX5_UCTX_CAP_RDMA_CTRL;
+ else
+ return -EOPNOTSUPP;
+ }
+
+ if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA)) {
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA)
+ *cap |= MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA;
+ else
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps)
+{
+ u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {};
+ void *uctx;
+ int err;
+ u16 uid;
+ u32 cap = 0;
+
+ /* 0 means not supported */
+ if (!MLX5_CAP_GEN(dev->mdev, log_max_uctx))
+ return -EINVAL;
+
+ uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
+ if (is_user &&
+ (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX) &&
+ rdma_dev_has_raw_cap(&dev->ib_dev))
+ cap |= MLX5_UCTX_CAP_RAW_TX;
+ if (is_user &&
+ (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_INTERNAL_DEV_RES) &&
+ capable(CAP_SYS_RAWIO))
+ cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES;
+
+ if (req_ucaps) {
+ err = set_uctx_ucaps(dev, req_ucaps, &cap);
+ if (err)
+ return err;
+ }
+
+ MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
+ MLX5_SET(uctx, uctx, cap, cap);
+
+ err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ uid = MLX5_GET(create_uctx_out, out, uid);
+ return uid;
+}
+
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {};
+
+ MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
+ MLX5_SET(destroy_uctx_in, in, uid, uid);
+
+ mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static bool is_legacy_unaffiliated_event_num(u16 event_num)
+{
+ switch (event_num) {
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool is_legacy_obj_event_num(u16 event_num)
+{
+ switch (event_num) {
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_COMP:
+ case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
+ case MLX5_EVENT_TYPE_XRQ_ERROR:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static u16 get_legacy_obj_type(u16 opcode)
+{
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_RQ:
+ case MLX5_CMD_OP_CREATE_RMP:
+ return MLX5_EVENT_QUEUE_TYPE_RQ;
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_EVENT_QUEUE_TYPE_QP;
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_EVENT_QUEUE_TYPE_SQ;
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_EVENT_QUEUE_TYPE_DCT;
+ default:
+ return 0;
+ }
+}
+
+static u16 get_dec_obj_type(struct devx_obj *obj, u16 event_num)
+{
+ u16 opcode;
+
+ opcode = (obj->obj_id >> 32) & 0xffff;
+
+ if (is_legacy_obj_event_num(event_num))
+ return get_legacy_obj_type(opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ return (obj->obj_id >> 48);
+ case MLX5_CMD_OP_CREATE_RQ:
+ return MLX5_OBJ_TYPE_RQ;
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_OBJ_TYPE_QP;
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_OBJ_TYPE_SQ;
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_OBJ_TYPE_DCT;
+ case MLX5_CMD_OP_CREATE_TIR:
+ return MLX5_OBJ_TYPE_TIR;
+ case MLX5_CMD_OP_CREATE_TIS:
+ return MLX5_OBJ_TYPE_TIS;
+ case MLX5_CMD_OP_CREATE_PSV:
+ return MLX5_OBJ_TYPE_PSV;
+ case MLX5_OBJ_TYPE_MKEY:
+ return MLX5_OBJ_TYPE_MKEY;
+ case MLX5_CMD_OP_CREATE_RMP:
+ return MLX5_OBJ_TYPE_RMP;
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ return MLX5_OBJ_TYPE_XRC_SRQ;
+ case MLX5_CMD_OP_CREATE_XRQ:
+ return MLX5_OBJ_TYPE_XRQ;
+ case MLX5_CMD_OP_CREATE_RQT:
+ return MLX5_OBJ_TYPE_RQT;
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ return MLX5_OBJ_TYPE_FLOW_COUNTER;
+ case MLX5_CMD_OP_CREATE_CQ:
+ return MLX5_OBJ_TYPE_CQ;
+ default:
+ return 0;
+ }
+}
+
+static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe)
+{
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ return eqe->data.qp_srq.type;
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ case MLX5_EVENT_TYPE_XRQ_ERROR:
+ return 0;
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
+ return MLX5_EVENT_QUEUE_TYPE_DCT;
+ default:
+ return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
+ }
+}
+
+static u32 get_dec_obj_id(u64 obj_id)
+{
+ return (obj_id & 0xffffffff);
+}
+
+/*
+ * As the obj_id in the firmware is not globally unique the object type
+ * must be considered upon checking for a valid object id.
+ * For that the opcode of the creator command is encoded as part of the obj_id.
+ */
+static u64 get_enc_obj_id(u32 opcode, u32 obj_id)
+{
+ return ((u64)opcode << 32) | obj_id;
+}
+
+static u32 devx_get_created_obj_id(const void *in, const void *out, u16 opcode)
+{
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ return MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ case MLX5_CMD_OP_CREATE_UMEM:
+ return MLX5_GET(create_umem_out, out, umem_id);
+ case MLX5_CMD_OP_CREATE_MKEY:
+ return MLX5_GET(create_mkey_out, out, mkey_index);
+ case MLX5_CMD_OP_CREATE_CQ:
+ return MLX5_GET(create_cq_out, out, cqn);
+ case MLX5_CMD_OP_ALLOC_PD:
+ return MLX5_GET(alloc_pd_out, out, pd);
+ case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+ return MLX5_GET(alloc_transport_domain_out, out,
+ transport_domain);
+ case MLX5_CMD_OP_CREATE_RMP:
+ return MLX5_GET(create_rmp_out, out, rmpn);
+ case MLX5_CMD_OP_CREATE_SQ:
+ return MLX5_GET(create_sq_out, out, sqn);
+ case MLX5_CMD_OP_CREATE_RQ:
+ return MLX5_GET(create_rq_out, out, rqn);
+ case MLX5_CMD_OP_CREATE_RQT:
+ return MLX5_GET(create_rqt_out, out, rqtn);
+ case MLX5_CMD_OP_CREATE_TIR:
+ return MLX5_GET(create_tir_out, out, tirn);
+ case MLX5_CMD_OP_CREATE_TIS:
+ return MLX5_GET(create_tis_out, out, tisn);
+ case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+ return MLX5_GET(alloc_q_counter_out, out, counter_set_id);
+ case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+ return MLX5_GET(create_flow_table_out, out, table_id);
+ case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+ return MLX5_GET(create_flow_group_out, out, group_id);
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ return MLX5_GET(set_fte_in, in, flow_index);
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ return MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
+ case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
+ return MLX5_GET(alloc_packet_reformat_context_out, out,
+ packet_reformat_id);
+ case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+ return MLX5_GET(alloc_modify_header_context_out, out,
+ modify_header_id);
+ case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+ return MLX5_GET(create_scheduling_element_out, out,
+ scheduling_element_id);
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ return MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ return MLX5_GET(set_l2_table_entry_in, in, table_index);
+ case MLX5_CMD_OP_CREATE_QP:
+ return MLX5_GET(create_qp_out, out, qpn);
+ case MLX5_CMD_OP_CREATE_SRQ:
+ return MLX5_GET(create_srq_out, out, srqn);
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ return MLX5_GET(create_xrc_srq_out, out, xrc_srqn);
+ case MLX5_CMD_OP_CREATE_DCT:
+ return MLX5_GET(create_dct_out, out, dctn);
+ case MLX5_CMD_OP_CREATE_XRQ:
+ return MLX5_GET(create_xrq_out, out, xrqn);
+ case MLX5_CMD_OP_ATTACH_TO_MCG:
+ return MLX5_GET(attach_to_mcg_in, in, qpn);
+ case MLX5_CMD_OP_ALLOC_XRCD:
+ return MLX5_GET(alloc_xrcd_out, out, xrcd);
+ case MLX5_CMD_OP_CREATE_PSV:
+ return MLX5_GET(create_psv_out, out, psv0_index);
+ default:
+ /* The entry must match to one of the devx_is_obj_create_cmd */
+ WARN_ON(true);
+ return 0;
+ }
+}
+
+static u64 devx_get_obj_id(const void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+ u64 obj_id;
+
+ switch (opcode) {
+ case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_GENERAL_OBJECT |
+ MLX5_GET(general_obj_in_cmd_hdr, in,
+ obj_type) << 16,
+ MLX5_GET(general_obj_in_cmd_hdr, in,
+ obj_id));
+ break;
+ case MLX5_CMD_OP_QUERY_MKEY:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_MKEY,
+ MLX5_GET(query_mkey_in, in,
+ mkey_index));
+ break;
+ case MLX5_CMD_OP_QUERY_CQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+ MLX5_GET(query_cq_in, in, cqn));
+ break;
+ case MLX5_CMD_OP_MODIFY_CQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+ MLX5_GET(modify_cq_in, in, cqn));
+ break;
+ case MLX5_CMD_OP_QUERY_SQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+ MLX5_GET(query_sq_in, in, sqn));
+ break;
+ case MLX5_CMD_OP_MODIFY_SQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+ MLX5_GET(modify_sq_in, in, sqn));
+ break;
+ case MLX5_CMD_OP_QUERY_RQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ MLX5_GET(query_rq_in, in, rqn));
+ break;
+ case MLX5_CMD_OP_MODIFY_RQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ MLX5_GET(modify_rq_in, in, rqn));
+ break;
+ case MLX5_CMD_OP_QUERY_RMP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
+ MLX5_GET(query_rmp_in, in, rmpn));
+ break;
+ case MLX5_CMD_OP_MODIFY_RMP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
+ MLX5_GET(modify_rmp_in, in, rmpn));
+ break;
+ case MLX5_CMD_OP_QUERY_RQT:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+ MLX5_GET(query_rqt_in, in, rqtn));
+ break;
+ case MLX5_CMD_OP_MODIFY_RQT:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+ MLX5_GET(modify_rqt_in, in, rqtn));
+ break;
+ case MLX5_CMD_OP_QUERY_TIR:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+ MLX5_GET(query_tir_in, in, tirn));
+ break;
+ case MLX5_CMD_OP_MODIFY_TIR:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+ MLX5_GET(modify_tir_in, in, tirn));
+ break;
+ case MLX5_CMD_OP_QUERY_TIS:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+ MLX5_GET(query_tis_in, in, tisn));
+ break;
+ case MLX5_CMD_OP_MODIFY_TIS:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+ MLX5_GET(modify_tis_in, in, tisn));
+ break;
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
+ MLX5_GET(query_flow_table_in, in,
+ table_id));
+ break;
+ case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
+ MLX5_GET(modify_flow_table_in, in,
+ table_id));
+ break;
+ case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_GROUP,
+ MLX5_GET(query_flow_group_in, in,
+ group_id));
+ break;
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
+ MLX5_GET(query_fte_in, in,
+ flow_index));
+ break;
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
+ MLX5_GET(set_fte_in, in, flow_index));
+ break;
+ case MLX5_CMD_OP_QUERY_Q_COUNTER:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_Q_COUNTER,
+ MLX5_GET(query_q_counter_in, in,
+ counter_set_id));
+ break;
+ case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_FLOW_COUNTER,
+ MLX5_GET(query_flow_counter_in, in,
+ flow_counter_id));
+ break;
+ case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT,
+ MLX5_GET(query_modify_header_context_in,
+ in, modify_header_id));
+ break;
+ case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
+ MLX5_GET(query_scheduling_element_in,
+ in, scheduling_element_id));
+ break;
+ case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
+ MLX5_GET(modify_scheduling_element_in,
+ in, scheduling_element_id));
+ break;
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT,
+ MLX5_GET(add_vxlan_udp_dport_in, in,
+ vxlan_udp_port));
+ break;
+ case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
+ MLX5_GET(query_l2_table_entry_in, in,
+ table_index));
+ break;
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
+ MLX5_GET(set_l2_table_entry_in, in,
+ table_index));
+ break;
+ case MLX5_CMD_OP_QUERY_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(query_qp_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_RST2INIT_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(rst2init_qp_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(init2init_qp_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(init2rtr_qp_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(rtr2rts_qp_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(rts2rts_qp_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_SQERR2RTS_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(sqerr2rts_qp_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_2ERR_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(qp_2err_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_2RST_QP:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ MLX5_GET(qp_2rst_in, in, qpn));
+ break;
+ case MLX5_CMD_OP_QUERY_DCT:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+ MLX5_GET(query_dct_in, in, dctn));
+ break;
+ case MLX5_CMD_OP_QUERY_XRQ:
+ case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
+ MLX5_GET(query_xrq_in, in, xrqn));
+ break;
+ case MLX5_CMD_OP_QUERY_XRC_SRQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
+ MLX5_GET(query_xrc_srq_in, in,
+ xrc_srqn));
+ break;
+ case MLX5_CMD_OP_ARM_XRC_SRQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
+ MLX5_GET(arm_xrc_srq_in, in, xrc_srqn));
+ break;
+ case MLX5_CMD_OP_QUERY_SRQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SRQ,
+ MLX5_GET(query_srq_in, in, srqn));
+ break;
+ case MLX5_CMD_OP_ARM_RQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ MLX5_GET(arm_rq_in, in, srq_number));
+ break;
+ case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+ MLX5_GET(drain_dct_in, in, dctn));
+ break;
+ case MLX5_CMD_OP_ARM_XRQ:
+ case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
+ case MLX5_CMD_OP_MODIFY_XRQ:
+ obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
+ MLX5_GET(arm_xrq_in, in, xrqn));
+ break;
+ case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
+ obj_id = get_enc_obj_id
+ (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT,
+ MLX5_GET(query_packet_reformat_context_in,
+ in, packet_reformat_id));
+ break;
+ default:
+ obj_id = 0;
+ }
+
+ return obj_id;
+}
+
+static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
+ struct ib_uobject *uobj, const void *in)
+{
+ struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ u64 obj_id = devx_get_obj_id(in);
+
+ if (!obj_id)
+ return false;
+
+ switch (uobj_get_object_id(uobj)) {
+ case UVERBS_OBJECT_CQ:
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+ to_mcq(uobj->object)->mcq.cqn) ==
+ obj_id;
+
+ case UVERBS_OBJECT_SRQ:
+ {
+ struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq);
+ u16 opcode;
+
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ opcode = MLX5_CMD_OP_CREATE_XRC_SRQ;
+ break;
+ case MLX5_RES_XRQ:
+ opcode = MLX5_CMD_OP_CREATE_XRQ;
+ break;
+ default:
+ if (!dev->mdev->issi)
+ opcode = MLX5_CMD_OP_CREATE_SRQ;
+ else
+ opcode = MLX5_CMD_OP_CREATE_RMP;
+ }
+
+ return get_enc_obj_id(opcode,
+ to_msrq(uobj->object)->msrq.srqn) ==
+ obj_id;
+ }
+
+ case UVERBS_OBJECT_QP:
+ {
+ struct mlx5_ib_qp *qp = to_mqp(uobj->object);
+
+ if (qp->type == IB_QPT_RAW_PACKET ||
+ (qp->flags & IB_QP_CREATE_SOURCE_QPN)) {
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp =
+ &qp->raw_packet_qp;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+
+ return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ rq->base.mqp.qpn) == obj_id ||
+ get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+ sq->base.mqp.qpn) == obj_id ||
+ get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+ rq->tirn) == obj_id ||
+ get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+ sq->tisn) == obj_id);
+ }
+
+ if (qp->type == MLX5_IB_QPT_DCT)
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+ qp->dct.mdct.mqp.qpn) == obj_id;
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ qp->ibqp.qp_num) == obj_id;
+ }
+
+ case UVERBS_OBJECT_WQ:
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ to_mrwq(uobj->object)->core_qp.qpn) ==
+ obj_id;
+
+ case UVERBS_OBJECT_RWQ_IND_TBL:
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+ to_mrwq_ind_table(uobj->object)->rqtn) ==
+ obj_id;
+
+ case MLX5_IB_OBJECT_DEVX_OBJ:
+ {
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+ struct devx_obj *devx_uobj = uobj->object;
+
+ if (opcode == MLX5_CMD_OP_QUERY_FLOW_COUNTER &&
+ devx_uobj->flow_counter_bulk_size) {
+ u64 end;
+
+ end = devx_uobj->obj_id +
+ devx_uobj->flow_counter_bulk_size;
+ return devx_uobj->obj_id <= obj_id && end > obj_id;
+ }
+
+ return devx_uobj->obj_id == obj_id;
+ }
+
+ default:
+ return false;
+ }
+}
+
+static void devx_set_umem_valid(const void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_MKEY:
+ MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
+ break;
+ case MLX5_CMD_OP_CREATE_CQ:
+ {
+ void *cqc;
+
+ MLX5_SET(create_cq_in, in, cq_umem_valid, 1);
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+ MLX5_SET(cqc, cqc, dbr_umem_valid, 1);
+ break;
+ }
+ case MLX5_CMD_OP_CREATE_QP:
+ {
+ void *qpc;
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, dbr_umem_valid, 1);
+ MLX5_SET(create_qp_in, in, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_RQ:
+ {
+ void *rqc, *wq;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_SQ:
+ {
+ void *sqc, *wq;
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_MODIFY_CQ:
+ MLX5_SET(modify_cq_in, in, cq_umem_valid, 1);
+ break;
+
+ case MLX5_CMD_OP_CREATE_RMP:
+ {
+ void *rmpc, *wq;
+
+ rmpc = MLX5_ADDR_OF(create_rmp_in, in, ctx);
+ wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_XRQ:
+ {
+ void *xrqc, *wq;
+
+ xrqc = MLX5_ADDR_OF(create_xrq_in, in, xrq_context);
+ wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
+ MLX5_SET(wq, wq, dbr_umem_valid, 1);
+ MLX5_SET(wq, wq, wq_umem_valid, 1);
+ break;
+ }
+
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ {
+ void *xrc_srqc;
+
+ MLX5_SET(create_xrc_srq_in, in, xrc_srq_umem_valid, 1);
+ xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, in,
+ xrc_srq_context_entry);
+ MLX5_SET(xrc_srqc, xrc_srqc, dbr_umem_valid, 1);
+ break;
+ }
+
+ default:
+ return;
+ }
+}
+
+static bool devx_is_obj_create_cmd(const void *in, u16 *opcode)
+{
+ *opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (*opcode) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ case MLX5_CMD_OP_CREATE_MKEY:
+ case MLX5_CMD_OP_CREATE_CQ:
+ case MLX5_CMD_OP_ALLOC_PD:
+ case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+ case MLX5_CMD_OP_CREATE_RMP:
+ case MLX5_CMD_OP_CREATE_SQ:
+ case MLX5_CMD_OP_CREATE_RQ:
+ case MLX5_CMD_OP_CREATE_RQT:
+ case MLX5_CMD_OP_CREATE_TIR:
+ case MLX5_CMD_OP_CREATE_TIS:
+ case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+ case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+ case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
+ case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_CREATE_QP:
+ case MLX5_CMD_OP_CREATE_SRQ:
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ case MLX5_CMD_OP_CREATE_DCT:
+ case MLX5_CMD_OP_CREATE_XRQ:
+ case MLX5_CMD_OP_ATTACH_TO_MCG:
+ case MLX5_CMD_OP_ALLOC_XRCD:
+ return true;
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ {
+ u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
+ if (op_mod == 0)
+ return true;
+ return false;
+ }
+ case MLX5_CMD_OP_CREATE_PSV:
+ {
+ u8 num_psv = MLX5_GET(create_psv_in, in, num_psv);
+
+ if (num_psv == 1)
+ return true;
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
+static bool devx_is_obj_modify_cmd(const void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_MODIFY_CQ:
+ case MLX5_CMD_OP_MODIFY_RMP:
+ case MLX5_CMD_OP_MODIFY_SQ:
+ case MLX5_CMD_OP_MODIFY_RQ:
+ case MLX5_CMD_OP_MODIFY_RQT:
+ case MLX5_CMD_OP_MODIFY_TIR:
+ case MLX5_CMD_OP_MODIFY_TIS:
+ case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+ case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_RST2INIT_QP:
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ case MLX5_CMD_OP_SQERR2RTS_QP:
+ case MLX5_CMD_OP_2ERR_QP:
+ case MLX5_CMD_OP_2RST_QP:
+ case MLX5_CMD_OP_ARM_XRC_SRQ:
+ case MLX5_CMD_OP_ARM_RQ:
+ case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+ case MLX5_CMD_OP_ARM_XRQ:
+ case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
+ case MLX5_CMD_OP_MODIFY_XRQ:
+ return true;
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ {
+ u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
+
+ if (op_mod == 1)
+ return true;
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
+static bool devx_is_obj_query_cmd(const void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_QUERY_MKEY:
+ case MLX5_CMD_OP_QUERY_CQ:
+ case MLX5_CMD_OP_QUERY_RMP:
+ case MLX5_CMD_OP_QUERY_SQ:
+ case MLX5_CMD_OP_QUERY_RQ:
+ case MLX5_CMD_OP_QUERY_RQT:
+ case MLX5_CMD_OP_QUERY_TIR:
+ case MLX5_CMD_OP_QUERY_TIS:
+ case MLX5_CMD_OP_QUERY_Q_COUNTER:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+ case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+ case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+ case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_QUERY_QP:
+ case MLX5_CMD_OP_QUERY_SRQ:
+ case MLX5_CMD_OP_QUERY_XRC_SRQ:
+ case MLX5_CMD_OP_QUERY_DCT:
+ case MLX5_CMD_OP_QUERY_XRQ:
+ case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
+ case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool devx_is_whitelist_cmd(void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_QUERY_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
+{
+ if (devx_is_whitelist_cmd(cmd_in)) {
+ struct mlx5_ib_dev *dev;
+
+ if (c->devx_uid)
+ return c->devx_uid;
+
+ dev = to_mdev(c->ibucontext.device);
+ if (dev->devx_whitelist_uid)
+ return dev->devx_whitelist_uid;
+
+ return -EOPNOTSUPP;
+ }
+
+ if (!c->devx_uid)
+ return -EINVAL;
+
+ return c->devx_uid;
+}
+
+static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ /* Pass all cmds for vhca_tunnel as general, tracking is done in FW */
+ if ((MLX5_CAP_GEN_64(dev->mdev, vhca_tunnel_commands) &&
+ MLX5_GET(general_obj_in_cmd_hdr, in, vhca_tunnel_id)) ||
+ (opcode >= MLX5_CMD_OP_GENERAL_START &&
+ opcode < MLX5_CMD_OP_GENERAL_END))
+ return true;
+
+ switch (opcode) {
+ case MLX5_CMD_OP_QUERY_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_VPORT_STATE:
+ case MLX5_CMD_OP_QUERY_ADAPTER:
+ case MLX5_CMD_OP_QUERY_ISSI:
+ case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
+ case MLX5_CMD_OP_QUERY_VNIC_ENV:
+ case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
+ case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
+ case MLX5_CMD_OP_NOP:
+ case MLX5_CMD_OP_QUERY_CONG_STATUS:
+ case MLX5_CMD_OP_QUERY_CONG_PARAMS:
+ case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
+ case MLX5_CMD_OP_QUERY_LAG:
+ case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_ib_dev *dev;
+ int user_vector;
+ int dev_eqn;
+ int err;
+
+ if (uverbs_copy_from(&user_vector, attrs,
+ MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
+ return -EFAULT;
+
+ c = devx_ufile2uctx(attrs);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ dev = to_mdev(c->ibucontext.device);
+
+ err = mlx5_comp_eqn_get(dev->mdev, user_vector, &dev_eqn);
+ if (err < 0)
+ return err;
+
+ if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
+ &dev_eqn, sizeof(dev_eqn)))
+ return -EFAULT;
+
+ return 0;
+}
+
+/*
+ *Security note:
+ * The hardware protection mechanism works like this: Each device object that
+ * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in
+ * the device specification manual) upon its creation. Then upon doorbell,
+ * hardware fetches the object context for which the doorbell was rang, and
+ * validates that the UAR through which the DB was rang matches the UAR ID
+ * of the object.
+ * If no match the doorbell is silently ignored by the hardware. Of course,
+ * the user cannot ring a doorbell on a UAR that was not mapped to it.
+ * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command
+ * mailboxes (except tagging them with UID), we expose to the user its UAR
+ * ID, so it can embed it in these objects in the expected specification
+ * format. So the only thing the user can do is hurt itself by creating a
+ * QP/SQ/CQ with a UAR ID other than his, and then in this case other users
+ * may ring a doorbell on its objects.
+ * The consequence of that will be that another user can schedule a QP/SQ
+ * of the buggy user for execution (just insert it to the hardware schedule
+ * queue or arm its CQ for event generation), no further harm is expected.
+ */
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_ib_dev *dev;
+ u32 user_idx;
+ s32 dev_idx;
+
+ c = devx_ufile2uctx(attrs);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ dev = to_mdev(c->ibucontext.device);
+
+ if (uverbs_copy_from(&user_idx, attrs,
+ MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX))
+ return -EFAULT;
+
+ dev_idx = bfregn_to_uar_index(dev, &c->bfregi, user_idx, true);
+ if (dev_idx < 0)
+ return dev_idx;
+
+ if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
+ &dev_idx, sizeof(dev_idx)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_ib_dev *dev;
+ void *cmd_in = uverbs_attr_get_alloced_ptr(
+ attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN);
+ int cmd_out_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
+ void *cmd_out;
+ int err, err2;
+ int uid;
+
+ c = devx_ufile2uctx(attrs);
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ dev = to_mdev(c->ibucontext.device);
+
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
+
+ /* Only white list of some general HCA commands are allowed for this method. */
+ if (!devx_is_general_cmd(cmd_in, dev))
+ return -EINVAL;
+
+ cmd_out = uverbs_zalloc(attrs, cmd_out_len);
+ if (IS_ERR(cmd_out))
+ return PTR_ERR(cmd_out);
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+ err = mlx5_cmd_do(dev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err && err != -EREMOTEIO)
+ return err;
+
+ err2 = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out,
+ cmd_out_len);
+
+ return err2 ?: err;
+}
+
+static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
+ u32 *dinlen,
+ u32 *obj_id)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+ u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid);
+
+ *obj_id = devx_get_created_obj_id(in, out, opcode);
+ *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr);
+ MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, obj_type,
+ MLX5_GET(general_obj_in_cmd_hdr, in, obj_type));
+ break;
+
+ case MLX5_CMD_OP_CREATE_UMEM:
+ MLX5_SET(destroy_umem_in, din, opcode,
+ MLX5_CMD_OP_DESTROY_UMEM);
+ MLX5_SET(destroy_umem_in, din, umem_id, *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_MKEY:
+ MLX5_SET(destroy_mkey_in, din, opcode,
+ MLX5_CMD_OP_DESTROY_MKEY);
+ MLX5_SET(destroy_mkey_in, din, mkey_index, *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_CQ:
+ MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+ MLX5_SET(destroy_cq_in, din, cqn, *obj_id);
+ break;
+ case MLX5_CMD_OP_ALLOC_PD:
+ MLX5_SET(dealloc_pd_in, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ MLX5_SET(dealloc_pd_in, din, pd, *obj_id);
+ break;
+ case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+ MLX5_SET(dealloc_transport_domain_in, din, opcode,
+ MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(dealloc_transport_domain_in, din, transport_domain,
+ *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_RMP:
+ MLX5_SET(destroy_rmp_in, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
+ MLX5_SET(destroy_rmp_in, din, rmpn, *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_SQ:
+ MLX5_SET(destroy_sq_in, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ MLX5_SET(destroy_sq_in, din, sqn, *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_RQ:
+ MLX5_SET(destroy_rq_in, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ MLX5_SET(destroy_rq_in, din, rqn, *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_RQT:
+ MLX5_SET(destroy_rqt_in, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ MLX5_SET(destroy_rqt_in, din, rqtn, *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_TIR:
+ MLX5_SET(destroy_tir_in, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ MLX5_SET(destroy_tir_in, din, tirn, *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_TIS:
+ MLX5_SET(destroy_tis_in, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ MLX5_SET(destroy_tis_in, din, tisn, *obj_id);
+ break;
+ case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+ MLX5_SET(dealloc_q_counter_in, din, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+ MLX5_SET(dealloc_q_counter_in, din, counter_set_id, *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+ *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in);
+ MLX5_SET(destroy_flow_table_in, din, other_vport,
+ MLX5_GET(create_flow_table_in, in, other_vport));
+ MLX5_SET(destroy_flow_table_in, din, vport_number,
+ MLX5_GET(create_flow_table_in, in, vport_number));
+ MLX5_SET(destroy_flow_table_in, din, other_eswitch,
+ MLX5_GET(create_flow_table_in, in, other_eswitch));
+ MLX5_SET(destroy_flow_table_in, din, eswitch_owner_vhca_id,
+ MLX5_GET(create_flow_table_in, in,
+ eswitch_owner_vhca_id));
+ MLX5_SET(destroy_flow_table_in, din, table_type,
+ MLX5_GET(create_flow_table_in, in, table_type));
+ MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
+ MLX5_SET(destroy_flow_table_in, din, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_TABLE);
+ break;
+ case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+ *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in);
+ MLX5_SET(destroy_flow_group_in, din, other_vport,
+ MLX5_GET(create_flow_group_in, in, other_vport));
+ MLX5_SET(destroy_flow_group_in, din, vport_number,
+ MLX5_GET(create_flow_group_in, in, vport_number));
+ MLX5_SET(destroy_flow_group_in, din, other_eswitch,
+ MLX5_GET(create_flow_group_in, in, other_eswitch));
+ MLX5_SET(destroy_flow_group_in, din, eswitch_owner_vhca_id,
+ MLX5_GET(create_flow_group_in, in,
+ eswitch_owner_vhca_id));
+ MLX5_SET(destroy_flow_group_in, din, table_type,
+ MLX5_GET(create_flow_group_in, in, table_type));
+ MLX5_SET(destroy_flow_group_in, din, table_id,
+ MLX5_GET(create_flow_group_in, in, table_id));
+ MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id);
+ MLX5_SET(destroy_flow_group_in, din, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_GROUP);
+ break;
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in);
+ MLX5_SET(delete_fte_in, din, other_vport,
+ MLX5_GET(set_fte_in, in, other_vport));
+ MLX5_SET(delete_fte_in, din, vport_number,
+ MLX5_GET(set_fte_in, in, vport_number));
+ MLX5_SET(delete_fte_in, din, other_eswitch,
+ MLX5_GET(set_fte_in, in, other_eswitch));
+ MLX5_SET(delete_fte_in, din, eswitch_owner_vhca_id,
+ MLX5_GET(set_fte_in, in, eswitch_owner_vhca_id));
+ MLX5_SET(delete_fte_in, din, table_type,
+ MLX5_GET(set_fte_in, in, table_type));
+ MLX5_SET(delete_fte_in, din, table_id,
+ MLX5_GET(set_fte_in, in, table_id));
+ MLX5_SET(delete_fte_in, din, flow_index, *obj_id);
+ MLX5_SET(delete_fte_in, din, opcode,
+ MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
+ break;
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ MLX5_SET(dealloc_flow_counter_in, din, opcode,
+ MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
+ MLX5_SET(dealloc_flow_counter_in, din, flow_counter_id,
+ *obj_id);
+ break;
+ case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
+ MLX5_SET(dealloc_packet_reformat_context_in, din, opcode,
+ MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_SET(dealloc_packet_reformat_context_in, din,
+ packet_reformat_id, *obj_id);
+ break;
+ case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+ MLX5_SET(dealloc_modify_header_context_in, din, opcode,
+ MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
+ MLX5_SET(dealloc_modify_header_context_in, din,
+ modify_header_id, *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+ *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in);
+ MLX5_SET(destroy_scheduling_element_in, din,
+ scheduling_hierarchy,
+ MLX5_GET(create_scheduling_element_in, in,
+ scheduling_hierarchy));
+ MLX5_SET(destroy_scheduling_element_in, din,
+ scheduling_element_id, *obj_id);
+ MLX5_SET(destroy_scheduling_element_in, din, opcode,
+ MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
+ break;
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in);
+ MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id);
+ MLX5_SET(delete_vxlan_udp_dport_in, din, opcode,
+ MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
+ break;
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in);
+ MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id);
+ MLX5_SET(delete_l2_table_entry_in, din, opcode,
+ MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
+ break;
+ case MLX5_CMD_OP_CREATE_QP:
+ MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, din, qpn, *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_SRQ:
+ MLX5_SET(destroy_srq_in, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
+ MLX5_SET(destroy_srq_in, din, srqn, *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ MLX5_SET(destroy_xrc_srq_in, din, opcode,
+ MLX5_CMD_OP_DESTROY_XRC_SRQ);
+ MLX5_SET(destroy_xrc_srq_in, din, xrc_srqn, *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_DCT:
+ MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, din, dctn, *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_XRQ:
+ MLX5_SET(destroy_xrq_in, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+ MLX5_SET(destroy_xrq_in, din, xrqn, *obj_id);
+ break;
+ case MLX5_CMD_OP_ATTACH_TO_MCG:
+ *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in);
+ MLX5_SET(detach_from_mcg_in, din, qpn,
+ MLX5_GET(attach_to_mcg_in, in, qpn));
+ memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid),
+ MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid),
+ MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid));
+ MLX5_SET(detach_from_mcg_in, din, opcode,
+ MLX5_CMD_OP_DETACH_FROM_MCG);
+ MLX5_SET(detach_from_mcg_in, din, qpn, *obj_id);
+ break;
+ case MLX5_CMD_OP_ALLOC_XRCD:
+ MLX5_SET(dealloc_xrcd_in, din, opcode,
+ MLX5_CMD_OP_DEALLOC_XRCD);
+ MLX5_SET(dealloc_xrcd_in, din, xrcd, *obj_id);
+ break;
+ case MLX5_CMD_OP_CREATE_PSV:
+ MLX5_SET(destroy_psv_in, din, opcode,
+ MLX5_CMD_OP_DESTROY_PSV);
+ MLX5_SET(destroy_psv_in, din, psvn, *obj_id);
+ break;
+ default:
+ /* The entry must match to one of the devx_is_obj_create_cmd */
+ WARN_ON(true);
+ break;
+ }
+}
+
+static int devx_handle_mkey_indirect(struct devx_obj *obj,
+ struct mlx5_ib_dev *dev,
+ void *in, void *out)
+{
+ struct mlx5_ib_mkey *mkey = &obj->mkey;
+ void *mkc;
+ u8 key;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ key = MLX5_GET(mkc, mkc, mkey_7_0);
+ mkey->key = mlx5_idx_to_mkey(
+ MLX5_GET(create_mkey_out, out, mkey_index)) | key;
+ mkey->type = MLX5_MKEY_INDIRECT_DEVX;
+ mkey->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
+ init_waitqueue_head(&mkey->wait);
+
+ return mlx5r_store_odp_mkey(dev, mkey);
+}
+
+static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
+ struct devx_obj *obj,
+ void *in, int in_len)
+{
+ int min_len = MLX5_BYTE_OFF(create_mkey_in, memory_key_mkey_entry) +
+ MLX5_FLD_SZ_BYTES(create_mkey_in,
+ memory_key_mkey_entry);
+ void *mkc;
+ u8 access_mode;
+
+ if (in_len < min_len)
+ return -EINVAL;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ access_mode = MLX5_GET(mkc, mkc, access_mode_1_0);
+ access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2;
+
+ if (access_mode == MLX5_MKC_ACCESS_MODE_KLMS ||
+ access_mode == MLX5_MKC_ACCESS_MODE_KSM) {
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ obj->flags |= DEVX_OBJ_FLAGS_INDIRECT_MKEY;
+ return 0;
+ }
+
+ MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
+ /* TPH is not allowed to bypass the regular kernel's verbs flow */
+ MLX5_SET(mkc, mkc, pcie_tph_en, 0);
+ MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index,
+ MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX);
+ return 0;
+}
+
+static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
+ struct devx_event_subscription *sub)
+{
+ struct devx_event *event;
+ struct devx_obj_event *xa_val_level2;
+
+ if (sub->is_cleaned)
+ return;
+
+ sub->is_cleaned = 1;
+ list_del_rcu(&sub->xa_list);
+
+ if (list_empty(&sub->obj_list))
+ return;
+
+ list_del_rcu(&sub->obj_list);
+ /* check whether key level 1 for this obj_sub_list is empty */
+ event = xa_load(&dev->devx_event_table.event_xa,
+ sub->xa_key_level1);
+ WARN_ON(!event);
+
+ xa_val_level2 = xa_load(&event->object_ids, sub->xa_key_level2);
+ if (list_empty(&xa_val_level2->obj_sub_list)) {
+ xa_erase(&event->object_ids,
+ sub->xa_key_level2);
+ kfree_rcu(xa_val_level2, rcu);
+ }
+}
+
+static int devx_obj_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ struct mlx5_devx_event_table *devx_event_table;
+ struct devx_obj *obj = uobject->object;
+ struct devx_event_subscription *sub_entry, *tmp;
+ struct mlx5_ib_dev *dev;
+ int ret;
+
+ dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY &&
+ xa_erase(&obj->ib_dev->odp_mkeys,
+ mlx5_base_mkey(obj->mkey.key)))
+ /*
+ * The pagefault_single_data_segment() does commands against
+ * the mmkey, we must wait for that to stop before freeing the
+ * mkey, as another allocation could get the same mkey #.
+ */
+ mlx5r_deref_wait_odp_mkey(&obj->mkey);
+
+ if (obj->flags & DEVX_OBJ_FLAGS_HW_FREED)
+ ret = 0;
+ else if (obj->flags & DEVX_OBJ_FLAGS_DCT)
+ ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
+ else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
+ ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
+ else
+ ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox,
+ obj->dinlen, out, sizeof(out));
+ if (ret)
+ return ret;
+
+ devx_event_table = &dev->devx_event_table;
+
+ mutex_lock(&devx_event_table->event_xa_lock);
+ list_for_each_entry_safe(sub_entry, tmp, &obj->event_sub, obj_list)
+ devx_cleanup_subscription(dev, sub_entry);
+ mutex_unlock(&devx_event_table->event_xa_lock);
+
+ kfree(obj);
+ return ret;
+}
+
+static void devx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
+{
+ struct devx_obj *obj = container_of(mcq, struct devx_obj, core_cq);
+ struct mlx5_devx_event_table *table;
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+ u32 obj_id = mcq->cqn;
+
+ table = &obj->ib_dev->devx_event_table;
+ rcu_read_lock();
+ event = xa_load(&table->event_xa, MLX5_EVENT_TYPE_COMP);
+ if (!event)
+ goto out;
+
+ obj_event = xa_load(&event->object_ids, obj_id);
+ if (!obj_event)
+ goto out;
+
+ dispatch_event_fd(&obj_event->obj_sub_list, eqe);
+out:
+ rcu_read_unlock();
+}
+
+static bool is_apu_cq(struct mlx5_ib_dev *dev, const void *in)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, apu) ||
+ !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), apu_cq))
+ return false;
+
+ return true;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
+ int cmd_out_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT);
+ int cmd_in_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
+ void *cmd_out;
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ struct devx_obj *obj;
+ u16 obj_type = 0;
+ int err, err2 = 0;
+ int uid;
+ u32 obj_id;
+ u16 opcode;
+
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
+
+ if (!devx_is_obj_create_cmd(cmd_in, &opcode))
+ return -EINVAL;
+
+ cmd_out = uverbs_zalloc(attrs, cmd_out_len);
+ if (IS_ERR(cmd_out))
+ return PTR_ERR(cmd_out);
+
+ obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+ if (opcode == MLX5_CMD_OP_CREATE_MKEY) {
+ err = devx_handle_mkey_create(dev, obj, cmd_in, cmd_in_len);
+ if (err)
+ goto obj_free;
+ } else {
+ devx_set_umem_valid(cmd_in);
+ }
+
+ if (opcode == MLX5_CMD_OP_CREATE_DCT) {
+ obj->flags |= DEVX_OBJ_FLAGS_DCT;
+ err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in,
+ cmd_in_len, cmd_out, cmd_out_len);
+ } else if (opcode == MLX5_CMD_OP_CREATE_CQ &&
+ !is_apu_cq(dev, cmd_in)) {
+ obj->flags |= DEVX_OBJ_FLAGS_CQ;
+ obj->core_cq.comp = devx_cq_comp;
+ err = mlx5_create_cq(dev->mdev, &obj->core_cq,
+ cmd_in, cmd_in_len, cmd_out,
+ cmd_out_len);
+ } else {
+ err = mlx5_cmd_do(dev->mdev, cmd_in, cmd_in_len,
+ cmd_out, cmd_out_len);
+ }
+
+ if (err == -EREMOTEIO)
+ err2 = uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
+ cmd_out, cmd_out_len);
+ if (err)
+ goto obj_free;
+
+ if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) {
+ u32 bulk = MLX5_GET(alloc_flow_counter_in,
+ cmd_in,
+ flow_counter_bulk_log_size);
+
+ if (bulk)
+ bulk = 1 << bulk;
+ else
+ bulk = 128UL * MLX5_GET(alloc_flow_counter_in,
+ cmd_in,
+ flow_counter_bulk);
+ obj->flow_counter_bulk_size = bulk;
+ }
+
+ uobj->object = obj;
+ INIT_LIST_HEAD(&obj->event_sub);
+ obj->ib_dev = dev;
+ devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
+ &obj_id);
+ WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
+ if (err)
+ goto obj_destroy;
+
+ if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT)
+ obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type);
+ obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id);
+
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+ err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
+ if (err)
+ goto obj_destroy;
+ }
+ return 0;
+
+obj_destroy:
+ if (obj->flags & DEVX_OBJ_FLAGS_DCT)
+ mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
+ else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
+ mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
+ else
+ mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out,
+ sizeof(out));
+obj_free:
+ kfree(obj);
+ return err2 ?: err;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
+ int cmd_out_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT);
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
+ void *cmd_out;
+ int err, err2;
+ int uid;
+
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
+
+ if (!devx_is_obj_modify_cmd(cmd_in))
+ return -EINVAL;
+
+ if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
+ return -EINVAL;
+
+ cmd_out = uverbs_zalloc(attrs, cmd_out_len);
+ if (IS_ERR(cmd_out))
+ return PTR_ERR(cmd_out);
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+ devx_set_umem_valid(cmd_in);
+
+ err = mlx5_cmd_do(mdev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err && err != -EREMOTEIO)
+ return err;
+
+ err2 = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+ cmd_out, cmd_out_len);
+
+ return err2 ?: err;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
+ int cmd_out_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT);
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ void *cmd_out;
+ int err, err2;
+ int uid;
+ struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
+
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
+
+ if (!devx_is_obj_query_cmd(cmd_in))
+ return -EINVAL;
+
+ if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
+ return -EINVAL;
+
+ cmd_out = uverbs_zalloc(attrs, cmd_out_len);
+ if (IS_ERR(cmd_out))
+ return PTR_ERR(cmd_out);
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+ err = mlx5_cmd_do(mdev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err && err != -EREMOTEIO)
+ return err;
+
+ err2 = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+ cmd_out, cmd_out_len);
+
+ return err2 ?: err;
+}
+
+struct devx_async_event_queue {
+ spinlock_t lock;
+ wait_queue_head_t poll_wait;
+ struct list_head event_list;
+ atomic_t bytes_in_use;
+ u8 is_destroyed:1;
+};
+
+struct devx_async_cmd_event_file {
+ struct ib_uobject uobj;
+ struct devx_async_event_queue ev_queue;
+ struct mlx5_async_ctx async_ctx;
+};
+
+static void devx_init_event_queue(struct devx_async_event_queue *ev_queue)
+{
+ spin_lock_init(&ev_queue->lock);
+ INIT_LIST_HEAD(&ev_queue->event_list);
+ init_waitqueue_head(&ev_queue->poll_wait);
+ atomic_set(&ev_queue->bytes_in_use, 0);
+ ev_queue->is_destroyed = 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct devx_async_cmd_event_file *ev_file;
+
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE);
+ struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
+
+ ev_file = container_of(uobj, struct devx_async_cmd_event_file,
+ uobj);
+ devx_init_event_queue(&ev_file->ev_queue);
+ mlx5_cmd_init_async_ctx(mdev->mdev, &ev_file->async_ctx);
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE);
+ struct devx_async_event_file *ev_file;
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ u32 flags;
+ int err;
+
+ err = uverbs_get_flags32(&flags, attrs,
+ MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
+ MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA);
+
+ if (err)
+ return err;
+
+ ev_file = container_of(uobj, struct devx_async_event_file,
+ uobj);
+ spin_lock_init(&ev_file->lock);
+ INIT_LIST_HEAD(&ev_file->event_list);
+ init_waitqueue_head(&ev_file->poll_wait);
+ if (flags & MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA)
+ ev_file->omit_data = 1;
+ INIT_LIST_HEAD(&ev_file->subscribed_events_list);
+ ev_file->dev = dev;
+ get_device(&dev->ib_dev.dev);
+ return 0;
+}
+
+static void devx_query_callback(int status, struct mlx5_async_work *context)
+{
+ struct devx_async_data *async_data =
+ container_of(context, struct devx_async_data, cb_work);
+ struct devx_async_cmd_event_file *ev_file = async_data->ev_file;
+ struct devx_async_event_queue *ev_queue = &ev_file->ev_queue;
+ unsigned long flags;
+
+ /*
+ * Note that if the struct devx_async_cmd_event_file uobj begins to be
+ * destroyed it will block at mlx5_cmd_cleanup_async_ctx() until this
+ * routine returns, ensuring that it always remains valid here.
+ */
+ spin_lock_irqsave(&ev_queue->lock, flags);
+ list_add_tail(&async_data->list, &ev_queue->event_list);
+ spin_unlock_irqrestore(&ev_queue->lock, flags);
+
+ wake_up_interruptible(&ev_queue->poll_wait);
+}
+
+#define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ void *cmd_in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN);
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE);
+ u16 cmd_out_len;
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct ib_uobject *fd_uobj;
+ int err;
+ int uid;
+ struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
+ struct devx_async_cmd_event_file *ev_file;
+ struct devx_async_data *async_data;
+
+ if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
+ return -EINVAL;
+
+ uid = devx_get_uid(c, cmd_in);
+ if (uid < 0)
+ return uid;
+
+ if (!devx_is_obj_query_cmd(cmd_in))
+ return -EINVAL;
+
+ err = uverbs_get_const(&cmd_out_len, attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN);
+ if (err)
+ return err;
+
+ if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
+ return -EINVAL;
+
+ fd_uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD);
+ if (IS_ERR(fd_uobj))
+ return PTR_ERR(fd_uobj);
+
+ ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
+ uobj);
+
+ if (atomic_add_return(cmd_out_len, &ev_file->ev_queue.bytes_in_use) >
+ MAX_ASYNC_BYTES_IN_USE) {
+ atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
+ return -EAGAIN;
+ }
+
+ async_data = kvzalloc(struct_size(async_data, hdr.out_data,
+ cmd_out_len), GFP_KERNEL);
+ if (!async_data) {
+ err = -ENOMEM;
+ goto sub_bytes;
+ }
+
+ err = uverbs_copy_from(&async_data->hdr.wr_id, attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID);
+ if (err)
+ goto free_async;
+
+ async_data->cmd_out_len = cmd_out_len;
+ async_data->mdev = mdev;
+ async_data->ev_file = ev_file;
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+ err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in,
+ uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN),
+ async_data->hdr.out_data,
+ async_data->cmd_out_len,
+ devx_query_callback, &async_data->cb_work);
+
+ if (err)
+ goto free_async;
+
+ return 0;
+
+free_async:
+ kvfree(async_data);
+sub_bytes:
+ atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
+ return err;
+}
+
+static void
+subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
+ u32 key_level1,
+ bool is_level2,
+ u32 key_level2)
+{
+ struct devx_event *event;
+ struct devx_obj_event *xa_val_level2;
+
+ /* Level 1 is valid for future use, no need to free */
+ if (!is_level2)
+ return;
+
+ event = xa_load(&devx_event_table->event_xa, key_level1);
+ WARN_ON(!event);
+
+ xa_val_level2 = xa_load(&event->object_ids,
+ key_level2);
+ if (list_empty(&xa_val_level2->obj_sub_list)) {
+ xa_erase(&event->object_ids,
+ key_level2);
+ kfree_rcu(xa_val_level2, rcu);
+ }
+}
+
+static int
+subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
+ u32 key_level1,
+ bool is_level2,
+ u32 key_level2)
+{
+ struct devx_obj_event *obj_event;
+ struct devx_event *event;
+ int err;
+
+ event = xa_load(&devx_event_table->event_xa, key_level1);
+ if (!event) {
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (!event)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&event->unaffiliated_list);
+ xa_init(&event->object_ids);
+
+ err = xa_insert(&devx_event_table->event_xa,
+ key_level1,
+ event,
+ GFP_KERNEL);
+ if (err) {
+ kfree(event);
+ return err;
+ }
+ }
+
+ if (!is_level2)
+ return 0;
+
+ obj_event = xa_load(&event->object_ids, key_level2);
+ if (!obj_event) {
+ obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
+ if (!obj_event)
+ /* Level1 is valid for future use, no need to free */
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&obj_event->obj_sub_list);
+ err = xa_insert(&event->object_ids,
+ key_level2,
+ obj_event,
+ GFP_KERNEL);
+ if (err) {
+ kfree(obj_event);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static bool is_valid_events_legacy(int num_events, u16 *event_type_num_list,
+ struct devx_obj *obj)
+{
+ int i;
+
+ for (i = 0; i < num_events; i++) {
+ if (obj) {
+ if (!is_legacy_obj_event_num(event_type_num_list[i]))
+ return false;
+ } else if (!is_legacy_unaffiliated_event_num(
+ event_type_num_list[i])) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#define MAX_SUPP_EVENT_NUM 255
+static bool is_valid_events(struct mlx5_core_dev *dev,
+ int num_events, u16 *event_type_num_list,
+ struct devx_obj *obj)
+{
+ __be64 *aff_events;
+ __be64 *unaff_events;
+ int mask_entry;
+ int mask_bit;
+ int i;
+
+ if (MLX5_CAP_GEN(dev, event_cap)) {
+ aff_events = MLX5_CAP_DEV_EVENT(dev,
+ user_affiliated_events);
+ unaff_events = MLX5_CAP_DEV_EVENT(dev,
+ user_unaffiliated_events);
+ } else {
+ return is_valid_events_legacy(num_events, event_type_num_list,
+ obj);
+ }
+
+ for (i = 0; i < num_events; i++) {
+ if (event_type_num_list[i] > MAX_SUPP_EVENT_NUM)
+ return false;
+
+ mask_entry = event_type_num_list[i] / 64;
+ mask_bit = event_type_num_list[i] % 64;
+
+ if (obj) {
+ /* CQ completion */
+ if (event_type_num_list[i] == 0)
+ continue;
+
+ if (!(be64_to_cpu(aff_events[mask_entry]) &
+ (1ull << mask_bit)))
+ return false;
+
+ continue;
+ }
+
+ if (!(be64_to_cpu(unaff_events[mask_entry]) &
+ (1ull << mask_bit)))
+ return false;
+ }
+
+ return true;
+}
+
+#define MAX_NUM_EVENTS 16
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
+ attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ struct ib_uobject *fd_uobj;
+ struct devx_obj *obj = NULL;
+ struct devx_async_event_file *ev_file;
+ struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
+ u16 *event_type_num_list;
+ struct devx_event_subscription *event_sub, *tmp_sub;
+ struct list_head sub_list;
+ int redirect_fd;
+ bool use_eventfd = false;
+ int num_events;
+ u16 obj_type = 0;
+ u64 cookie = 0;
+ u32 obj_id = 0;
+ int err;
+ int i;
+
+ if (!c->devx_uid)
+ return -EINVAL;
+
+ if (!IS_ERR(devx_uobj)) {
+ obj = (struct devx_obj *)devx_uobj->object;
+ if (obj)
+ obj_id = get_dec_obj_id(obj->obj_id);
+ }
+
+ fd_uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
+ if (IS_ERR(fd_uobj))
+ return PTR_ERR(fd_uobj);
+
+ ev_file = container_of(fd_uobj, struct devx_async_event_file,
+ uobj);
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
+ err = uverbs_copy_from(&redirect_fd, attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
+ if (err)
+ return err;
+
+ use_eventfd = true;
+ }
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
+ if (use_eventfd)
+ return -EINVAL;
+
+ err = uverbs_copy_from(&cookie, attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
+ if (err)
+ return err;
+ }
+
+ num_events = uverbs_attr_ptr_get_array_size(
+ attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
+ sizeof(u16));
+
+ if (num_events < 0)
+ return num_events;
+
+ if (num_events > MAX_NUM_EVENTS)
+ return -EINVAL;
+
+ event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
+
+ if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
+ return -EINVAL;
+
+ INIT_LIST_HEAD(&sub_list);
+
+ /* Protect from concurrent subscriptions to same XA entries to allow
+ * both to succeed
+ */
+ mutex_lock(&devx_event_table->event_xa_lock);
+ for (i = 0; i < num_events; i++) {
+ u32 key_level1;
+
+ if (obj)
+ obj_type = get_dec_obj_type(obj,
+ event_type_num_list[i]);
+ key_level1 = event_type_num_list[i] | obj_type << 16;
+
+ err = subscribe_event_xa_alloc(devx_event_table,
+ key_level1,
+ obj,
+ obj_id);
+ if (err)
+ goto err;
+
+ event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
+ if (!event_sub) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ list_add_tail(&event_sub->event_list, &sub_list);
+ uverbs_uobject_get(&ev_file->uobj);
+ if (use_eventfd) {
+ event_sub->eventfd =
+ eventfd_ctx_fdget(redirect_fd);
+
+ if (IS_ERR(event_sub->eventfd)) {
+ err = PTR_ERR(event_sub->eventfd);
+ event_sub->eventfd = NULL;
+ goto err;
+ }
+ }
+
+ event_sub->cookie = cookie;
+ event_sub->ev_file = ev_file;
+ /* May be needed upon cleanup the devx object/subscription */
+ event_sub->xa_key_level1 = key_level1;
+ event_sub->xa_key_level2 = obj_id;
+ INIT_LIST_HEAD(&event_sub->obj_list);
+ }
+
+ /* Once all the allocations and the XA data insertions were done we
+ * can go ahead and add all the subscriptions to the relevant lists
+ * without concern of a failure.
+ */
+ list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+
+ list_del_init(&event_sub->event_list);
+
+ spin_lock_irq(&ev_file->lock);
+ list_add_tail_rcu(&event_sub->file_list,
+ &ev_file->subscribed_events_list);
+ spin_unlock_irq(&ev_file->lock);
+
+ event = xa_load(&devx_event_table->event_xa,
+ event_sub->xa_key_level1);
+ WARN_ON(!event);
+
+ if (!obj) {
+ list_add_tail_rcu(&event_sub->xa_list,
+ &event->unaffiliated_list);
+ continue;
+ }
+
+ obj_event = xa_load(&event->object_ids, obj_id);
+ WARN_ON(!obj_event);
+ list_add_tail_rcu(&event_sub->xa_list,
+ &obj_event->obj_sub_list);
+ list_add_tail_rcu(&event_sub->obj_list,
+ &obj->event_sub);
+ }
+
+ mutex_unlock(&devx_event_table->event_xa_lock);
+ return 0;
+
+err:
+ list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
+ list_del(&event_sub->event_list);
+
+ subscribe_event_xa_dealloc(devx_event_table,
+ event_sub->xa_key_level1,
+ obj,
+ obj_id);
+
+ if (event_sub->eventfd)
+ eventfd_ctx_put(event_sub->eventfd);
+ uverbs_uobject_put(&event_sub->ev_file->uobj);
+ kfree(event_sub);
+ }
+
+ mutex_unlock(&devx_event_table->event_xa_lock);
+ return err;
+}
+
+static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
+ struct uverbs_attr_bundle *attrs,
+ struct devx_umem *obj, u32 access_flags)
+{
+ u64 addr;
+ size_t size;
+ int err;
+
+ if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
+ uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
+ return -EFAULT;
+
+ err = ib_check_mr_access(&dev->ib_dev, access_flags);
+ if (err)
+ return err;
+
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD)) {
+ struct ib_umem_dmabuf *umem_dmabuf;
+ int dmabuf_fd;
+
+ err = uverbs_get_raw_fd(&dmabuf_fd, attrs,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD);
+ if (err)
+ return -EFAULT;
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(
+ &dev->ib_dev, addr, size, dmabuf_fd, access_flags);
+ if (IS_ERR(umem_dmabuf))
+ return PTR_ERR(umem_dmabuf);
+ obj->umem = &umem_dmabuf->umem;
+ } else {
+ obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access_flags);
+ if (IS_ERR(obj->umem))
+ return PTR_ERR(obj->umem);
+ }
+ return 0;
+}
+
+static unsigned int devx_umem_find_best_pgsize(struct ib_umem *umem,
+ unsigned long pgsz_bitmap)
+{
+ unsigned long page_size;
+
+ /* Don't bother checking larger page sizes as offset must be zero and
+ * total DEVX umem length must be equal to total umem length.
+ */
+ pgsz_bitmap &= GENMASK_ULL(max_t(u64, order_base_2(umem->length),
+ PAGE_SHIFT),
+ MLX5_ADAPTER_PAGE_SHIFT);
+ if (!pgsz_bitmap)
+ return 0;
+
+ page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, U64_MAX);
+ if (!page_size)
+ return 0;
+
+ /* If the page_size is less than the CPU page size then we can use the
+ * offset and create a umem which is a subset of the page list.
+ * For larger page sizes we can't be sure the DMA list reflects the
+ * VA so we must ensure that the umem extent is exactly equal to the
+ * page list. Reduce the page size until one of these cases is true.
+ */
+ while ((ib_umem_dma_offset(umem, page_size) != 0 ||
+ (umem->length % page_size) != 0) &&
+ page_size > PAGE_SIZE)
+ page_size /= 2;
+
+ return page_size;
+}
+
+static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev,
+ struct uverbs_attr_bundle *attrs,
+ struct devx_umem *obj,
+ struct devx_umem_reg_cmd *cmd,
+ int access)
+{
+ unsigned long pgsz_bitmap;
+ unsigned int page_size;
+ __be64 *mtt;
+ void *umem;
+ int ret;
+
+ /*
+ * If the user does not pass in pgsz_bitmap then the user promises not
+ * to use umem_offset!=0 in any commands that allocate on top of the
+ * umem.
+ *
+ * If the user wants to use a umem_offset then it must pass in
+ * pgsz_bitmap which guides the maximum page size and thus maximum
+ * object alignment inside the umem. See the PRM.
+ *
+ * Users are not allowed to use IOVA here, mkeys are not supported on
+ * umem.
+ */
+ ret = uverbs_get_const_default(&pgsz_bitmap, attrs,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP,
+ GENMASK_ULL(63,
+ min(PAGE_SHIFT, MLX5_ADAPTER_PAGE_SHIFT)));
+ if (ret)
+ return ret;
+
+ page_size = devx_umem_find_best_pgsize(obj->umem, pgsz_bitmap);
+ if (!page_size)
+ return -EINVAL;
+
+ cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
+ (MLX5_ST_SZ_BYTES(mtt) *
+ ib_umem_num_dma_blocks(obj->umem, page_size));
+ cmd->in = uverbs_zalloc(attrs, cmd->inlen);
+ if (IS_ERR(cmd->in))
+ return PTR_ERR(cmd->in);
+
+ umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
+ mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
+
+ MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM);
+ MLX5_SET64(umem, umem, num_of_mtt,
+ ib_umem_num_dma_blocks(obj->umem, page_size));
+ MLX5_SET(umem, umem, log_page_size,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(umem, umem, page_offset,
+ ib_umem_dma_offset(obj->umem, page_size));
+
+ if (mlx5_umem_needs_ats(dev, obj->umem, access))
+ MLX5_SET(umem, umem, ats, 1);
+
+ mlx5_ib_populate_pas(obj->umem, page_size, mtt,
+ (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
+ MLX5_IB_MTT_READ);
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct devx_umem_reg_cmd cmd;
+ struct devx_umem *obj;
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
+ u32 obj_id;
+ struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+ int access_flags;
+ int err;
+
+ if (!c->devx_uid)
+ return -EINVAL;
+
+ err = uverbs_get_flags32(&access_flags, attrs,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+ IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_RELAXED_ORDERING);
+ if (err)
+ return err;
+
+ obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ err = devx_umem_get(dev, &c->ibucontext, attrs, obj, access_flags);
+ if (err)
+ goto err_obj_free;
+
+ err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd, access_flags);
+ if (err)
+ goto err_umem_release;
+
+ MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid);
+ err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
+ sizeof(cmd.out));
+ if (err)
+ goto err_umem_release;
+
+ obj->mdev = dev->mdev;
+ uobj->object = obj;
+ devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id);
+ uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id,
+ sizeof(obj_id));
+ return err;
+
+err_umem_release:
+ ib_umem_release(obj->umem);
+err_obj_free:
+ kfree(obj);
+ return err;
+}
+
+static int devx_umem_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct devx_umem *obj = uobject->object;
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ int err;
+
+ err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
+ if (err)
+ return err;
+
+ ib_umem_release(obj->umem);
+ kfree(obj);
+ return 0;
+}
+
+static bool is_unaffiliated_event(struct mlx5_core_dev *dev,
+ unsigned long event_type)
+{
+ __be64 *unaff_events;
+ int mask_entry;
+ int mask_bit;
+
+ if (!MLX5_CAP_GEN(dev, event_cap))
+ return is_legacy_unaffiliated_event_num(event_type);
+
+ unaff_events = MLX5_CAP_DEV_EVENT(dev,
+ user_unaffiliated_events);
+ WARN_ON(event_type > MAX_SUPP_EVENT_NUM);
+
+ mask_entry = event_type / 64;
+ mask_bit = event_type % 64;
+
+ if (!(be64_to_cpu(unaff_events[mask_entry]) & (1ull << mask_bit)))
+ return false;
+
+ return true;
+}
+
+static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data)
+{
+ struct mlx5_eqe *eqe = data;
+ u32 obj_id = 0;
+
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ break;
+ case MLX5_EVENT_TYPE_XRQ_ERROR:
+ obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff;
+ break;
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
+ obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+ break;
+ case MLX5_EVENT_TYPE_CQ_ERROR:
+ obj_id = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
+ break;
+ default:
+ obj_id = MLX5_GET(affiliated_event_header, &eqe->data, obj_id);
+ break;
+ }
+
+ return obj_id;
+}
+
+static int deliver_event(struct devx_event_subscription *event_sub,
+ const void *data)
+{
+ struct devx_async_event_file *ev_file;
+ struct devx_async_event_data *event_data;
+ unsigned long flags;
+
+ ev_file = event_sub->ev_file;
+
+ if (ev_file->omit_data) {
+ spin_lock_irqsave(&ev_file->lock, flags);
+ if (!list_empty(&event_sub->event_list) ||
+ ev_file->is_destroyed) {
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ return 0;
+ }
+
+ list_add_tail(&event_sub->event_list, &ev_file->event_list);
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ wake_up_interruptible(&ev_file->poll_wait);
+ return 0;
+ }
+
+ event_data = kzalloc(sizeof(*event_data) + sizeof(struct mlx5_eqe),
+ GFP_ATOMIC);
+ if (!event_data) {
+ spin_lock_irqsave(&ev_file->lock, flags);
+ ev_file->is_overflow_err = 1;
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ return -ENOMEM;
+ }
+
+ event_data->hdr.cookie = event_sub->cookie;
+ memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe));
+
+ spin_lock_irqsave(&ev_file->lock, flags);
+ if (!ev_file->is_destroyed)
+ list_add_tail(&event_data->list, &ev_file->event_list);
+ else
+ kfree(event_data);
+ spin_unlock_irqrestore(&ev_file->lock, flags);
+ wake_up_interruptible(&ev_file->poll_wait);
+
+ return 0;
+}
+
+static void dispatch_event_fd(struct list_head *fd_list,
+ const void *data)
+{
+ struct devx_event_subscription *item;
+
+ list_for_each_entry_rcu(item, fd_list, xa_list) {
+ if (item->eventfd)
+ eventfd_signal(item->eventfd);
+ else
+ deliver_event(item, data);
+ }
+}
+
+static int devx_event_notifier(struct notifier_block *nb,
+ unsigned long event_type, void *data)
+{
+ struct mlx5_devx_event_table *table;
+ struct mlx5_ib_dev *dev;
+ struct devx_event *event;
+ struct devx_obj_event *obj_event;
+ u16 obj_type = 0;
+ bool is_unaffiliated;
+ u32 obj_id;
+
+ /* Explicit filtering to kernel events which may occur frequently */
+ if (event_type == MLX5_EVENT_TYPE_CMD ||
+ event_type == MLX5_EVENT_TYPE_PAGE_REQUEST)
+ return NOTIFY_OK;
+
+ table = container_of(nb, struct mlx5_devx_event_table, devx_nb.nb);
+ dev = container_of(table, struct mlx5_ib_dev, devx_event_table);
+ is_unaffiliated = is_unaffiliated_event(dev->mdev, event_type);
+
+ if (!is_unaffiliated)
+ obj_type = get_event_obj_type(event_type, data);
+
+ rcu_read_lock();
+ event = xa_load(&table->event_xa, event_type | (obj_type << 16));
+ if (!event) {
+ rcu_read_unlock();
+ return NOTIFY_DONE;
+ }
+
+ if (is_unaffiliated) {
+ dispatch_event_fd(&event->unaffiliated_list, data);
+ rcu_read_unlock();
+ return NOTIFY_OK;
+ }
+
+ obj_id = devx_get_obj_id_from_event(event_type, data);
+ obj_event = xa_load(&event->object_ids, obj_id);
+ if (!obj_event) {
+ rcu_read_unlock();
+ return NOTIFY_DONE;
+ }
+
+ dispatch_event_fd(&obj_event->obj_sub_list, data);
+
+ rcu_read_unlock();
+ return NOTIFY_OK;
+}
+
+int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_devx_event_table *table = &dev->devx_event_table;
+ int uid;
+
+ uid = mlx5_ib_devx_create(dev, false, 0);
+ if (uid > 0) {
+ dev->devx_whitelist_uid = uid;
+ xa_init(&table->event_xa);
+ mutex_init(&table->event_xa_lock);
+ MLX5_NB_INIT(&table->devx_nb, devx_event_notifier, NOTIFY_ANY);
+ mlx5_eq_notifier_register(dev->mdev, &table->devx_nb);
+ }
+
+ return 0;
+}
+
+void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_devx_event_table *table = &dev->devx_event_table;
+ struct devx_event_subscription *sub, *tmp;
+ struct devx_event *event;
+ void *entry;
+ unsigned long id;
+
+ if (dev->devx_whitelist_uid) {
+ mlx5_eq_notifier_unregister(dev->mdev, &table->devx_nb);
+ mutex_lock(&dev->devx_event_table.event_xa_lock);
+ xa_for_each(&table->event_xa, id, entry) {
+ event = entry;
+ list_for_each_entry_safe(
+ sub, tmp, &event->unaffiliated_list, xa_list)
+ devx_cleanup_subscription(dev, sub);
+ kfree(entry);
+ }
+ mutex_unlock(&dev->devx_event_table.event_xa_lock);
+ xa_destroy(&table->event_xa);
+
+ mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
+ }
+}
+
+static void devx_async_destroy_cb(int status, struct mlx5_async_work *context)
+{
+ struct mlx5_async_cmd *devx_out = container_of(context,
+ struct mlx5_async_cmd, cb_work);
+ struct devx_obj *obj = devx_out->uobject->object;
+
+ if (!status)
+ obj->flags |= DEVX_OBJ_FLAGS_HW_FREED;
+
+ complete(&devx_out->comp);
+}
+
+static void devx_async_destroy(struct mlx5_ib_dev *dev,
+ struct mlx5_async_cmd *cmd)
+{
+ init_completion(&cmd->comp);
+ cmd->err = mlx5_cmd_exec_cb(&dev->async_ctx, cmd->in, cmd->in_size,
+ &cmd->out, sizeof(cmd->out),
+ devx_async_destroy_cb, &cmd->cb_work);
+}
+
+static void devx_wait_async_destroy(struct mlx5_async_cmd *cmd)
+{
+ if (!cmd->err)
+ wait_for_completion(&cmd->comp);
+ atomic_set(&cmd->uobject->usecnt, 0);
+}
+
+void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
+{
+ struct mlx5_async_cmd *async_cmd;
+ struct ib_ucontext *ucontext = ufile->ucontext;
+ struct ib_device *device = ucontext->device;
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct ib_uobject *uobject;
+ struct devx_obj *obj;
+ int head = 0;
+ int tail = 0;
+
+ async_cmd = kcalloc(MAX_ASYNC_CMDS, sizeof(*async_cmd), GFP_KERNEL);
+ if (!async_cmd)
+ return;
+
+ list_for_each_entry(uobject, &ufile->uobjects, list) {
+ WARN_ON(uverbs_try_lock_object(uobject, UVERBS_LOOKUP_WRITE));
+
+ /*
+ * Currently we only support QP destruction, if other objects
+ * are to be destroyed need to add type synchronization to the
+ * cleanup algorithm and handle pre/post FW cleanup for the
+ * new types if needed.
+ */
+ if (uobj_get_object_id(uobject) != MLX5_IB_OBJECT_DEVX_OBJ ||
+ (get_dec_obj_type(uobject->object, MLX5_EVENT_TYPE_MAX) !=
+ MLX5_OBJ_TYPE_QP)) {
+ atomic_set(&uobject->usecnt, 0);
+ continue;
+ }
+
+ obj = uobject->object;
+
+ async_cmd[tail % MAX_ASYNC_CMDS].in = obj->dinbox;
+ async_cmd[tail % MAX_ASYNC_CMDS].in_size = obj->dinlen;
+ async_cmd[tail % MAX_ASYNC_CMDS].uobject = uobject;
+
+ devx_async_destroy(dev, &async_cmd[tail % MAX_ASYNC_CMDS]);
+ tail++;
+
+ if (tail - head == MAX_ASYNC_CMDS) {
+ devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]);
+ head++;
+ }
+ }
+
+ while (head != tail) {
+ devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]);
+ head++;
+ }
+
+ kfree(async_cmd);
+}
+
+static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
+ struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
+ struct devx_async_data *event;
+ int ret = 0;
+ size_t eventsz;
+
+ spin_lock_irq(&ev_queue->lock);
+
+ while (list_empty(&ev_queue->event_list)) {
+ spin_unlock_irq(&ev_queue->lock);
+
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(
+ ev_queue->poll_wait,
+ (!list_empty(&ev_queue->event_list) ||
+ ev_queue->is_destroyed))) {
+ return -ERESTARTSYS;
+ }
+
+ spin_lock_irq(&ev_queue->lock);
+ if (ev_queue->is_destroyed) {
+ spin_unlock_irq(&ev_queue->lock);
+ return -EIO;
+ }
+ }
+
+ event = list_entry(ev_queue->event_list.next,
+ struct devx_async_data, list);
+ eventsz = event->cmd_out_len +
+ sizeof(struct mlx5_ib_uapi_devx_async_cmd_hdr);
+
+ if (eventsz > count) {
+ spin_unlock_irq(&ev_queue->lock);
+ return -ENOSPC;
+ }
+
+ list_del(ev_queue->event_list.next);
+ spin_unlock_irq(&ev_queue->lock);
+
+ if (copy_to_user(buf, &event->hdr, eventsz))
+ ret = -EFAULT;
+ else
+ ret = eventsz;
+
+ atomic_sub(event->cmd_out_len, &ev_queue->bytes_in_use);
+ kvfree(event);
+ return ret;
+}
+
+static __poll_t devx_async_cmd_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
+ struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
+ __poll_t pollflags = 0;
+
+ poll_wait(filp, &ev_queue->poll_wait, wait);
+
+ spin_lock_irq(&ev_queue->lock);
+ if (ev_queue->is_destroyed)
+ pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
+ else if (!list_empty(&ev_queue->event_list))
+ pollflags = EPOLLIN | EPOLLRDNORM;
+ spin_unlock_irq(&ev_queue->lock);
+
+ return pollflags;
+}
+
+static const struct file_operations devx_async_cmd_event_fops = {
+ .owner = THIS_MODULE,
+ .read = devx_async_cmd_event_read,
+ .poll = devx_async_cmd_event_poll,
+ .release = uverbs_uobject_fd_release,
+};
+
+static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct devx_async_event_file *ev_file = filp->private_data;
+ struct devx_event_subscription *event_sub;
+ struct devx_async_event_data *event;
+ int ret = 0;
+ size_t eventsz;
+ bool omit_data;
+ void *event_data;
+
+ omit_data = ev_file->omit_data;
+
+ spin_lock_irq(&ev_file->lock);
+
+ if (ev_file->is_overflow_err) {
+ ev_file->is_overflow_err = 0;
+ spin_unlock_irq(&ev_file->lock);
+ return -EOVERFLOW;
+ }
+
+
+ while (list_empty(&ev_file->event_list)) {
+ spin_unlock_irq(&ev_file->lock);
+
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(ev_file->poll_wait,
+ (!list_empty(&ev_file->event_list) ||
+ ev_file->is_destroyed))) {
+ return -ERESTARTSYS;
+ }
+
+ spin_lock_irq(&ev_file->lock);
+ if (ev_file->is_destroyed) {
+ spin_unlock_irq(&ev_file->lock);
+ return -EIO;
+ }
+ }
+
+ if (omit_data) {
+ event_sub = list_first_entry(&ev_file->event_list,
+ struct devx_event_subscription,
+ event_list);
+ eventsz = sizeof(event_sub->cookie);
+ event_data = &event_sub->cookie;
+ } else {
+ event = list_first_entry(&ev_file->event_list,
+ struct devx_async_event_data, list);
+ eventsz = sizeof(struct mlx5_eqe) +
+ sizeof(struct mlx5_ib_uapi_devx_async_event_hdr);
+ event_data = &event->hdr;
+ }
+
+ if (eventsz > count) {
+ spin_unlock_irq(&ev_file->lock);
+ return -EINVAL;
+ }
+
+ if (omit_data)
+ list_del_init(&event_sub->event_list);
+ else
+ list_del(&event->list);
+
+ spin_unlock_irq(&ev_file->lock);
+
+ if (copy_to_user(buf, event_data, eventsz))
+ /* This points to an application issue, not a kernel concern */
+ ret = -EFAULT;
+ else
+ ret = eventsz;
+
+ if (!omit_data)
+ kfree(event);
+ return ret;
+}
+
+static __poll_t devx_async_event_poll(struct file *filp,
+ struct poll_table_struct *wait)
+{
+ struct devx_async_event_file *ev_file = filp->private_data;
+ __poll_t pollflags = 0;
+
+ poll_wait(filp, &ev_file->poll_wait, wait);
+
+ spin_lock_irq(&ev_file->lock);
+ if (ev_file->is_destroyed)
+ pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
+ else if (!list_empty(&ev_file->event_list))
+ pollflags = EPOLLIN | EPOLLRDNORM;
+ spin_unlock_irq(&ev_file->lock);
+
+ return pollflags;
+}
+
+static void devx_free_subscription(struct rcu_head *rcu)
+{
+ struct devx_event_subscription *event_sub =
+ container_of(rcu, struct devx_event_subscription, rcu);
+
+ if (event_sub->eventfd)
+ eventfd_ctx_put(event_sub->eventfd);
+ uverbs_uobject_put(&event_sub->ev_file->uobj);
+ kfree(event_sub);
+}
+
+static const struct file_operations devx_async_event_fops = {
+ .owner = THIS_MODULE,
+ .read = devx_async_event_read,
+ .poll = devx_async_event_poll,
+ .release = uverbs_uobject_fd_release,
+};
+
+static void devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct devx_async_cmd_event_file *comp_ev_file =
+ container_of(uobj, struct devx_async_cmd_event_file,
+ uobj);
+ struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
+ struct devx_async_data *entry, *tmp;
+
+ spin_lock_irq(&ev_queue->lock);
+ ev_queue->is_destroyed = 1;
+ spin_unlock_irq(&ev_queue->lock);
+ wake_up_interruptible(&ev_queue->poll_wait);
+
+ mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx);
+
+ spin_lock_irq(&comp_ev_file->ev_queue.lock);
+ list_for_each_entry_safe(entry, tmp,
+ &comp_ev_file->ev_queue.event_list, list) {
+ list_del(&entry->list);
+ kvfree(entry);
+ }
+ spin_unlock_irq(&comp_ev_file->ev_queue.lock);
+};
+
+static void devx_async_event_destroy_uobj(struct ib_uobject *uobj,
+ enum rdma_remove_reason why)
+{
+ struct devx_async_event_file *ev_file =
+ container_of(uobj, struct devx_async_event_file,
+ uobj);
+ struct devx_event_subscription *event_sub, *event_sub_tmp;
+ struct mlx5_ib_dev *dev = ev_file->dev;
+
+ spin_lock_irq(&ev_file->lock);
+ ev_file->is_destroyed = 1;
+
+ /* free the pending events allocation */
+ if (ev_file->omit_data) {
+ struct devx_event_subscription *event_sub, *tmp;
+
+ list_for_each_entry_safe(event_sub, tmp, &ev_file->event_list,
+ event_list)
+ list_del_init(&event_sub->event_list);
+
+ } else {
+ struct devx_async_event_data *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &ev_file->event_list,
+ list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ }
+
+ spin_unlock_irq(&ev_file->lock);
+ wake_up_interruptible(&ev_file->poll_wait);
+
+ mutex_lock(&dev->devx_event_table.event_xa_lock);
+ /* delete the subscriptions which are related to this FD */
+ list_for_each_entry_safe(event_sub, event_sub_tmp,
+ &ev_file->subscribed_events_list, file_list) {
+ devx_cleanup_subscription(dev, event_sub);
+ list_del_rcu(&event_sub->file_list);
+ /* subscription may not be used by the read API any more */
+ call_rcu(&event_sub->rcu, devx_free_subscription);
+ }
+ mutex_unlock(&dev->devx_event_table.event_xa_lock);
+
+ put_device(&dev->ib_dev.dev);
+};
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_UMEM_REG,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
+ MLX5_IB_OBJECT_DEVX_UMEM,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_RAW_FD(MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD,
+ UA_OPTIONAL),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+ enum ib_access_flags),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP,
+ u64),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_DEVX_UMEM_DEREG,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
+ MLX5_IB_OBJECT_DEVX_UMEM,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_QUERY_EQN,
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_QUERY_UAR,
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_OTHER,
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_OBJ_CREATE,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
+ UVERBS_IDR_ANY_OBJECT,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_OBJ_QUERY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
+ UVERBS_IDR_ANY_OBJECT,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
+ UVERBS_IDR_ANY_OBJECT,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN,
+ u16, UA_MANDATORY),
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD,
+ MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT,
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE,
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u16)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
+ UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
+
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC,
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file),
+ devx_async_cmd_event_destroy_uobj,
+ &devx_async_cmd_event_fops, "[devx_async_cmd]",
+ O_RDONLY),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC,
+ UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
+ enum mlx5_ib_uapi_devx_create_event_channel_flags,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file),
+ devx_async_event_destroy_uobj,
+ &devx_async_event_fops, "[devx_async_event]",
+ O_RDONLY),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC));
+
+static bool devx_is_supported(struct ib_device *device)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+
+ return MLX5_CAP_GEN(dev->mdev, log_max_uctx);
+}
+
+const struct uapi_definition mlx5_ib_devx_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_UMEM,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h
new file mode 100644
index 000000000000..ee9e7d3af93f
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/devx.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_DEVX_H
+#define _MLX5_IB_DEVX_H
+
+#include "mlx5_ib.h"
+
+#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
+struct devx_obj {
+ struct mlx5_ib_dev *ib_dev;
+ u64 obj_id;
+ u32 dinlen; /* destroy inbox length */
+ u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
+ u32 flags;
+ union {
+ struct mlx5_ib_mkey mkey;
+ struct mlx5_core_dct core_dct;
+ struct mlx5_core_cq core_cq;
+ u32 flow_counter_bulk_size;
+ };
+ struct list_head event_sub; /* holds devx_event_subscription entries */
+};
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps);
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
+int mlx5_ib_devx_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev);
+void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile);
+#else
+static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user,
+ u64 req_ucaps)
+{
+ return -EOPNOTSUPP;
+}
+static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
+static inline int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
+{
+ return 0;
+}
+static inline void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev)
+{
+}
+static inline void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
+{
+}
+#endif
+#endif /* _MLX5_IB_DEVX_H */
diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c
new file mode 100644
index 000000000000..9ded2b7c1e31
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/dm.c
@@ -0,0 +1,612 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2021, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "dm.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
+ u64 length, u32 alignment)
+{
+ struct mlx5_core_dev *dev = dm->dev;
+ u64 num_memic_hw_pages = MLX5_CAP_DEV_MEM(dev, memic_bar_size)
+ >> PAGE_SHIFT;
+ u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
+ u32 max_alignment = MLX5_CAP_DEV_MEM(dev, log_max_memic_addr_alignment);
+ u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
+ u32 out[MLX5_ST_SZ_DW(alloc_memic_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_memic_in)] = {};
+ u32 mlx5_alignment;
+ u64 page_idx = 0;
+ int ret = 0;
+
+ if (!length || (length & MLX5_MEMIC_ALLOC_SIZE_MASK))
+ return -EINVAL;
+
+ /* mlx5 device sets alignment as 64*2^driver_value
+ * so normalizing is needed.
+ */
+ mlx5_alignment = (alignment < MLX5_MEMIC_BASE_ALIGN) ? 0 :
+ alignment - MLX5_MEMIC_BASE_ALIGN;
+ if (mlx5_alignment > max_alignment)
+ return -EINVAL;
+
+ MLX5_SET(alloc_memic_in, in, opcode, MLX5_CMD_OP_ALLOC_MEMIC);
+ MLX5_SET(alloc_memic_in, in, range_size, num_pages * PAGE_SIZE);
+ MLX5_SET(alloc_memic_in, in, memic_size, length);
+ MLX5_SET(alloc_memic_in, in, log_memic_addr_alignment,
+ mlx5_alignment);
+
+ while (page_idx < num_memic_hw_pages) {
+ spin_lock(&dm->lock);
+ page_idx = bitmap_find_next_zero_area(dm->memic_alloc_pages,
+ num_memic_hw_pages,
+ page_idx,
+ num_pages, 0);
+
+ if (page_idx < num_memic_hw_pages)
+ bitmap_set(dm->memic_alloc_pages,
+ page_idx, num_pages);
+
+ spin_unlock(&dm->lock);
+
+ if (page_idx >= num_memic_hw_pages)
+ break;
+
+ MLX5_SET64(alloc_memic_in, in, range_start_addr,
+ hw_start_addr + (page_idx * PAGE_SIZE));
+
+ ret = mlx5_cmd_exec_inout(dev, alloc_memic, in, out);
+ if (ret) {
+ spin_lock(&dm->lock);
+ bitmap_clear(dm->memic_alloc_pages,
+ page_idx, num_pages);
+ spin_unlock(&dm->lock);
+
+ if (ret == -EAGAIN) {
+ page_idx++;
+ continue;
+ }
+
+ return ret;
+ }
+
+ *addr = dev->bar_addr +
+ MLX5_GET64(alloc_memic_out, out, memic_start_addr);
+
+ return 0;
+ }
+
+ return -ENOMEM;
+}
+
+void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr,
+ u64 length)
+{
+ struct mlx5_core_dev *dev = dm->dev;
+ u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
+ u32 num_pages = DIV_ROUND_UP(length, PAGE_SIZE);
+ u32 in[MLX5_ST_SZ_DW(dealloc_memic_in)] = {};
+ u64 start_page_idx;
+ int err;
+
+ addr -= dev->bar_addr;
+ start_page_idx = (addr - hw_start_addr) >> PAGE_SHIFT;
+
+ MLX5_SET(dealloc_memic_in, in, opcode, MLX5_CMD_OP_DEALLOC_MEMIC);
+ MLX5_SET64(dealloc_memic_in, in, memic_start_addr, addr);
+ MLX5_SET(dealloc_memic_in, in, memic_size, length);
+
+ err = mlx5_cmd_exec_in(dev, dealloc_memic, in);
+ if (err)
+ return;
+
+ spin_lock(&dm->lock);
+ bitmap_clear(dm->memic_alloc_pages,
+ start_page_idx, num_pages);
+ spin_unlock(&dm->lock);
+}
+
+void mlx5_cmd_dealloc_memic_op(struct mlx5_dm *dm, phys_addr_t addr,
+ u8 operation)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_memic_in)] = {};
+ struct mlx5_core_dev *dev = dm->dev;
+
+ MLX5_SET(modify_memic_in, in, opcode, MLX5_CMD_OP_MODIFY_MEMIC);
+ MLX5_SET(modify_memic_in, in, op_mod, MLX5_MODIFY_MEMIC_OP_MOD_DEALLOC);
+ MLX5_SET(modify_memic_in, in, memic_operation_type, operation);
+ MLX5_SET64(modify_memic_in, in, memic_start_addr, addr - dev->bar_addr);
+
+ mlx5_cmd_exec_in(dev, modify_memic, in);
+}
+
+static int mlx5_cmd_alloc_memic_op(struct mlx5_dm *dm, phys_addr_t addr,
+ u8 operation, phys_addr_t *op_addr)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_memic_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(modify_memic_in)] = {};
+ struct mlx5_core_dev *dev = dm->dev;
+ int err;
+
+ MLX5_SET(modify_memic_in, in, opcode, MLX5_CMD_OP_MODIFY_MEMIC);
+ MLX5_SET(modify_memic_in, in, op_mod, MLX5_MODIFY_MEMIC_OP_MOD_ALLOC);
+ MLX5_SET(modify_memic_in, in, memic_operation_type, operation);
+ MLX5_SET64(modify_memic_in, in, memic_start_addr, addr - dev->bar_addr);
+
+ err = mlx5_cmd_exec_inout(dev, modify_memic, in, out);
+ if (err)
+ return err;
+
+ *op_addr = dev->bar_addr +
+ MLX5_GET64(modify_memic_out, out, memic_operation_addr);
+ return 0;
+}
+
+static int add_dm_mmap_entry(struct ib_ucontext *context,
+ struct mlx5_user_mmap_entry *mentry, u8 mmap_flag,
+ size_t size, u64 address)
+{
+ mentry->mmap_flag = mmap_flag;
+ mentry->address = address;
+
+ return rdma_user_mmap_entry_insert_range(
+ context, &mentry->rdma_entry, size,
+ MLX5_IB_MMAP_DEVICE_MEM << 16,
+ (MLX5_IB_MMAP_DEVICE_MEM << 16) + (1UL << 16) - 1);
+}
+
+static void mlx5_ib_dm_memic_free(struct kref *kref)
+{
+ struct mlx5_ib_dm_memic *dm =
+ container_of(kref, struct mlx5_ib_dm_memic, ref);
+ struct mlx5_ib_dev *dev = to_mdev(dm->base.ibdm.device);
+
+ mlx5_cmd_dealloc_memic(&dev->dm, dm->base.dev_addr, dm->base.size);
+ kfree(dm);
+}
+
+static int copy_op_to_user(struct mlx5_ib_dm_op_entry *op_entry,
+ struct uverbs_attr_bundle *attrs)
+{
+ u64 start_offset;
+ u16 page_idx;
+ int err;
+
+ page_idx = op_entry->mentry.rdma_entry.start_pgoff & 0xFFFF;
+ start_offset = op_entry->op_addr & ~PAGE_MASK;
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_PAGE_INDEX,
+ &page_idx, sizeof(page_idx));
+ if (err)
+ return err;
+
+ return uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_START_OFFSET,
+ &start_offset, sizeof(start_offset));
+}
+
+static int map_existing_op(struct mlx5_ib_dm_memic *dm, u8 op,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dm_op_entry *op_entry;
+
+ op_entry = xa_load(&dm->ops, op);
+ if (!op_entry)
+ return -ENOENT;
+
+ return copy_op_to_user(op_entry, attrs);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DM_MAP_OP_ADDR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_HANDLE);
+ struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
+ struct ib_dm *ibdm = uobj->object;
+ struct mlx5_ib_dm_memic *dm = to_memic(ibdm);
+ struct mlx5_ib_dm_op_entry *op_entry;
+ int err;
+ u8 op;
+
+ err = uverbs_copy_from(&op, attrs, MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_OP);
+ if (err)
+ return err;
+
+ if (op >= BITS_PER_TYPE(u32))
+ return -EOPNOTSUPP;
+
+ if (!(MLX5_CAP_DEV_MEM(dev->mdev, memic_operations) & BIT(op)))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&dm->ops_xa_lock);
+ err = map_existing_op(dm, op, attrs);
+ if (!err || err != -ENOENT)
+ goto err_unlock;
+
+ op_entry = kzalloc(sizeof(*op_entry), GFP_KERNEL);
+ if (!op_entry)
+ goto err_unlock;
+
+ err = mlx5_cmd_alloc_memic_op(&dev->dm, dm->base.dev_addr, op,
+ &op_entry->op_addr);
+ if (err) {
+ kfree(op_entry);
+ goto err_unlock;
+ }
+ op_entry->op = op;
+ op_entry->dm = dm;
+
+ err = add_dm_mmap_entry(uobj->context, &op_entry->mentry,
+ MLX5_IB_MMAP_TYPE_MEMIC_OP, dm->base.size,
+ op_entry->op_addr & PAGE_MASK);
+ if (err) {
+ mlx5_cmd_dealloc_memic_op(&dev->dm, dm->base.dev_addr, op);
+ kfree(op_entry);
+ goto err_unlock;
+ }
+ /* From this point, entry will be freed by mmap_free */
+ kref_get(&dm->ref);
+
+ err = copy_op_to_user(op_entry, attrs);
+ if (err)
+ goto err_remove;
+
+ err = xa_insert(&dm->ops, op, op_entry, GFP_KERNEL);
+ if (err)
+ goto err_remove;
+ mutex_unlock(&dm->ops_xa_lock);
+
+ return 0;
+
+err_remove:
+ rdma_user_mmap_entry_remove(&op_entry->mentry.rdma_entry);
+err_unlock:
+ mutex_unlock(&dm->ops_xa_lock);
+
+ return err;
+}
+
+static struct ib_dm *handle_alloc_dm_memic(struct ib_ucontext *ctx,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
+ struct mlx5_ib_dm_memic *dm;
+ u64 start_offset;
+ u16 page_idx;
+ int err;
+ u64 address;
+
+ if (!dm_db || !MLX5_CAP_DEV_MEM(dm_db->dev, memic))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+ if (!dm)
+ return ERR_PTR(-ENOMEM);
+
+ dm->base.type = MLX5_IB_UAPI_DM_TYPE_MEMIC;
+ dm->base.size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
+ dm->base.ibdm.device = ctx->device;
+
+ kref_init(&dm->ref);
+ xa_init(&dm->ops);
+ mutex_init(&dm->ops_xa_lock);
+ dm->req_length = attr->length;
+
+ err = mlx5_cmd_alloc_memic(dm_db, &dm->base.dev_addr,
+ dm->base.size, attr->alignment);
+ if (err) {
+ kfree(dm);
+ return ERR_PTR(err);
+ }
+
+ address = dm->base.dev_addr & PAGE_MASK;
+ err = add_dm_mmap_entry(ctx, &dm->mentry, MLX5_IB_MMAP_TYPE_MEMIC,
+ dm->base.size, address);
+ if (err) {
+ mlx5_cmd_dealloc_memic(dm_db, dm->base.dev_addr, dm->base.size);
+ kfree(dm);
+ return ERR_PTR(err);
+ }
+
+ page_idx = dm->mentry.rdma_entry.start_pgoff & 0xFFFF;
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ &page_idx, sizeof(page_idx));
+ if (err)
+ goto err_copy;
+
+ start_offset = dm->base.dev_addr & ~PAGE_MASK;
+ err = uverbs_copy_to(attrs,
+ MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ &start_offset, sizeof(start_offset));
+ if (err)
+ goto err_copy;
+
+ return &dm->base.ibdm;
+
+err_copy:
+ rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
+ return ERR_PTR(err);
+}
+
+static enum mlx5_sw_icm_type get_icm_type(int uapi_type)
+{
+ switch (uapi_type) {
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ return MLX5_SW_ICM_TYPE_HEADER_MODIFY;
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ return MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN;
+ case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM:
+ return MLX5_SW_ICM_TYPE_SW_ENCAP;
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ default:
+ return MLX5_SW_ICM_TYPE_STEERING;
+ }
+}
+
+static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs,
+ int type)
+{
+ struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev;
+ enum mlx5_sw_icm_type icm_type;
+ struct mlx5_ib_dm_icm *dm;
+ u64 act_size;
+ int err;
+
+ if (!capable(CAP_SYS_RAWIO) || !capable(CAP_NET_RAW))
+ return ERR_PTR(-EPERM);
+
+ switch (type) {
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM:
+ if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2)))
+ return ERR_PTR(-EOPNOTSUPP);
+ break;
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ if (!MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) ||
+ !MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner_v2))
+ return ERR_PTR(-EOPNOTSUPP);
+ break;
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+ if (!dm)
+ return ERR_PTR(-ENOMEM);
+
+ dm->base.type = type;
+ dm->base.ibdm.device = ctx->device;
+
+ /* Allocation size must a multiple of the basic block size
+ * and a power of 2.
+ */
+ act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+ act_size = roundup_pow_of_two(act_size);
+
+ dm->base.size = act_size;
+ icm_type = get_icm_type(type);
+
+ err = mlx5_dm_sw_icm_alloc(dev, icm_type, act_size, attr->alignment,
+ to_mucontext(ctx)->devx_uid,
+ &dm->base.dev_addr, &dm->obj_id);
+ if (err)
+ goto free;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ &dm->base.dev_addr, sizeof(dm->base.dev_addr));
+ if (err) {
+ mlx5_dm_sw_icm_dealloc(dev, icm_type, dm->base.size,
+ to_mucontext(ctx)->devx_uid,
+ dm->base.dev_addr, dm->obj_id);
+ goto free;
+ }
+ return &dm->base.ibdm;
+free:
+ kfree(dm);
+ return ERR_PTR(err);
+}
+
+struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ enum mlx5_ib_uapi_dm_type type;
+ int err;
+
+ err = uverbs_get_const_default(&type, attrs,
+ MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
+ MLX5_IB_UAPI_DM_TYPE_MEMIC);
+ if (err)
+ return ERR_PTR(err);
+
+ mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n",
+ type, attr->length, attr->alignment);
+
+ switch (type) {
+ case MLX5_IB_UAPI_DM_TYPE_MEMIC:
+ return handle_alloc_dm_memic(context, attr, attrs);
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM:
+ return handle_alloc_dm_sw_icm(context, attr, attrs, type);
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+}
+
+static void dm_memic_remove_ops(struct mlx5_ib_dm_memic *dm)
+{
+ struct mlx5_ib_dm_op_entry *entry;
+ unsigned long idx;
+
+ mutex_lock(&dm->ops_xa_lock);
+ xa_for_each(&dm->ops, idx, entry) {
+ xa_erase(&dm->ops, idx);
+ rdma_user_mmap_entry_remove(&entry->mentry.rdma_entry);
+ }
+ mutex_unlock(&dm->ops_xa_lock);
+}
+
+static void mlx5_dm_memic_dealloc(struct mlx5_ib_dm_memic *dm)
+{
+ dm_memic_remove_ops(dm);
+ rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
+}
+
+static int mlx5_dm_icm_dealloc(struct mlx5_ib_ucontext *ctx,
+ struct mlx5_ib_dm_icm *dm)
+{
+ enum mlx5_sw_icm_type type = get_icm_type(dm->base.type);
+ struct mlx5_core_dev *dev = to_mdev(dm->base.ibdm.device)->mdev;
+ int err;
+
+ err = mlx5_dm_sw_icm_dealloc(dev, type, dm->base.size, ctx->devx_uid,
+ dm->base.dev_addr, dm->obj_id);
+ if (!err)
+ kfree(dm);
+ return 0;
+}
+
+static int mlx5_ib_dealloc_dm(struct ib_dm *ibdm,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
+ &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dm *dm = to_mdm(ibdm);
+
+ switch (dm->type) {
+ case MLX5_IB_UAPI_DM_TYPE_MEMIC:
+ mlx5_dm_memic_dealloc(to_memic(ibdm));
+ return 0;
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM:
+ return mlx5_dm_icm_dealloc(ctx, to_icm(ibdm));
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DM_QUERY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_dm *ibdm =
+ uverbs_attr_get_obj(attrs, MLX5_IB_ATTR_QUERY_DM_REQ_HANDLE);
+ struct mlx5_ib_dm *dm = to_mdm(ibdm);
+ struct mlx5_ib_dm_memic *memic;
+ u64 start_offset;
+ u16 page_idx;
+ int err;
+
+ if (dm->type != MLX5_IB_UAPI_DM_TYPE_MEMIC)
+ return -EOPNOTSUPP;
+
+ memic = to_memic(ibdm);
+ page_idx = memic->mentry.rdma_entry.start_pgoff & 0xFFFF;
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_DM_RESP_PAGE_INDEX,
+ &page_idx, sizeof(page_idx));
+ if (err)
+ return err;
+
+ start_offset = memic->base.dev_addr & ~PAGE_MASK;
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_DM_RESP_START_OFFSET,
+ &start_offset, sizeof(start_offset));
+ if (err)
+ return err;
+
+ return uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_DM_RESP_LENGTH,
+ &memic->req_length,
+ sizeof(memic->req_length));
+}
+
+void mlx5_ib_dm_mmap_free(struct mlx5_ib_dev *dev,
+ struct mlx5_user_mmap_entry *mentry)
+{
+ struct mlx5_ib_dm_op_entry *op_entry;
+ struct mlx5_ib_dm_memic *mdm;
+
+ switch (mentry->mmap_flag) {
+ case MLX5_IB_MMAP_TYPE_MEMIC:
+ mdm = container_of(mentry, struct mlx5_ib_dm_memic, mentry);
+ kref_put(&mdm->ref, mlx5_ib_dm_memic_free);
+ break;
+ case MLX5_IB_MMAP_TYPE_MEMIC_OP:
+ op_entry = container_of(mentry, struct mlx5_ib_dm_op_entry,
+ mentry);
+ mdm = op_entry->dm;
+ mlx5_cmd_dealloc_memic_op(&dev->dm, mdm->base.dev_addr,
+ op_entry->op);
+ kfree(op_entry);
+ kref_put(&mdm->ref, mlx5_ib_dm_memic_free);
+ break;
+ default:
+ WARN_ON(true);
+ }
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DM_QUERY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_QUERY_DM_REQ_HANDLE, UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_READ, UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_DM_RESP_START_OFFSET,
+ UVERBS_ATTR_TYPE(u64), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_DM_RESP_PAGE_INDEX,
+ UVERBS_ATTR_TYPE(u16), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_DM_RESP_LENGTH,
+ UVERBS_ATTR_TYPE(u64), UA_MANDATORY));
+
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_dm, UVERBS_OBJECT_DM, UVERBS_METHOD_DM_ALLOC,
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ UVERBS_ATTR_TYPE(u64), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ UVERBS_ATTR_TYPE(u16), UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
+ enum mlx5_ib_uapi_dm_type, UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DM_MAP_OP_ADDR,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DM_MAP_OP_ADDR_REQ_OP,
+ UVERBS_ATTR_TYPE(u8),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_START_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DM_MAP_OP_ADDR_RESP_PAGE_INDEX,
+ UVERBS_ATTR_TYPE(u16),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DM,
+ &UVERBS_METHOD(MLX5_IB_METHOD_DM_MAP_OP_ADDR),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DM_QUERY));
+
+const struct uapi_definition mlx5_ib_dm_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DM),
+ {},
+};
+
+const struct ib_device_ops mlx5_ib_dev_dm_ops = {
+ .alloc_dm = mlx5_ib_alloc_dm,
+ .dealloc_dm = mlx5_ib_dealloc_dm,
+ .reg_dm_mr = mlx5_ib_reg_dm_mr,
+};
diff --git a/drivers/infiniband/hw/mlx5/dm.h b/drivers/infiniband/hw/mlx5/dm.h
new file mode 100644
index 000000000000..9674a80d8d70
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/dm.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2021, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_DM_H
+#define _MLX5_IB_DM_H
+
+#include "mlx5_ib.h"
+
+extern const struct ib_device_ops mlx5_ib_dev_dm_ops;
+extern const struct uapi_definition mlx5_ib_dm_defs[];
+
+struct mlx5_ib_dm {
+ struct ib_dm ibdm;
+ u32 type;
+ phys_addr_t dev_addr;
+ size_t size;
+};
+
+struct mlx5_ib_dm_op_entry {
+ struct mlx5_user_mmap_entry mentry;
+ phys_addr_t op_addr;
+ struct mlx5_ib_dm_memic *dm;
+ u8 op;
+};
+
+struct mlx5_ib_dm_memic {
+ struct mlx5_ib_dm base;
+ struct mlx5_user_mmap_entry mentry;
+ struct xarray ops;
+ struct mutex ops_xa_lock;
+ struct kref ref;
+ size_t req_length;
+};
+
+struct mlx5_ib_dm_icm {
+ struct mlx5_ib_dm base;
+ u32 obj_id;
+};
+
+static inline struct mlx5_ib_dm *to_mdm(struct ib_dm *ibdm)
+{
+ return container_of(ibdm, struct mlx5_ib_dm, ibdm);
+}
+
+static inline struct mlx5_ib_dm_memic *to_memic(struct ib_dm *ibdm)
+{
+ return container_of(ibdm, struct mlx5_ib_dm_memic, base.ibdm);
+}
+
+static inline struct mlx5_ib_dm_icm *to_icm(struct ib_dm *ibdm)
+{
+ return container_of(ibdm, struct mlx5_ib_dm_icm, base.ibdm);
+}
+
+struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_dm_alloc_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+void mlx5_ib_dm_mmap_free(struct mlx5_ib_dev *dev,
+ struct mlx5_user_mmap_entry *mentry);
+void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr,
+ u64 length);
+void mlx5_cmd_dealloc_memic_op(struct mlx5_dm *dm, phys_addr_t addr,
+ u8 operation);
+
+#endif /* _MLX5_IB_DM_H */
diff --git a/drivers/infiniband/hw/mlx5/dmah.c b/drivers/infiniband/hw/mlx5/dmah.c
new file mode 100644
index 000000000000..362a88992ffa
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/dmah.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include <linux/pci-tph.h>
+#include "dmah.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static int mlx5_ib_alloc_dmah(struct ib_dmah *ibdmah,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_core_dev *mdev = to_mdev(ibdmah->device)->mdev;
+ struct mlx5_ib_dmah *dmah = to_mdmah(ibdmah);
+ u16 st_bits = BIT(IB_DMAH_CPU_ID_EXISTS) |
+ BIT(IB_DMAH_MEM_TYPE_EXISTS);
+ int err;
+
+ /* PH is a must for TPH following PCIe spec 6.2-1.0 */
+ if (!(ibdmah->valid_fields & BIT(IB_DMAH_PH_EXISTS)))
+ return -EINVAL;
+
+ /* ST is optional; however, partial data for it is not allowed */
+ if (ibdmah->valid_fields & st_bits) {
+ if ((ibdmah->valid_fields & st_bits) != st_bits)
+ return -EINVAL;
+ err = mlx5_st_alloc_index(mdev, ibdmah->mem_type,
+ ibdmah->cpu_id, &dmah->st_index);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int mlx5_ib_dealloc_dmah(struct ib_dmah *ibdmah,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dmah *dmah = to_mdmah(ibdmah);
+ struct mlx5_core_dev *mdev = to_mdev(ibdmah->device)->mdev;
+
+ if (ibdmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS))
+ return mlx5_st_dealloc_index(mdev, dmah->st_index);
+
+ return 0;
+}
+
+const struct ib_device_ops mlx5_ib_dev_dmah_ops = {
+ .alloc_dmah = mlx5_ib_alloc_dmah,
+ .dealloc_dmah = mlx5_ib_dealloc_dmah,
+};
diff --git a/drivers/infiniband/hw/mlx5/dmah.h b/drivers/infiniband/hw/mlx5/dmah.h
new file mode 100644
index 000000000000..68de72b4744a
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/dmah.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#ifndef _MLX5_IB_DMAH_H
+#define _MLX5_IB_DMAH_H
+
+#include "mlx5_ib.h"
+
+extern const struct ib_device_ops mlx5_ib_dev_dmah_ops;
+
+struct mlx5_ib_dmah {
+ struct ib_dmah ibdmah;
+ u16 st_index;
+};
+
+static inline struct mlx5_ib_dmah *to_mdmah(struct ib_dmah *ibdmah)
+{
+ return container_of(ibdmah, struct mlx5_ib_dmah, ibdmah);
+}
+
+#endif /* _MLX5_IB_DMAH_H */
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
index a0e4e6ddb71a..e32111117a5e 100644
--- a/drivers/infiniband/hw/mlx5/doorbell.c
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -32,6 +32,7 @@
#include <linux/kref.h>
#include <linux/slab.h>
+#include <linux/sched/mm.h>
#include <rdma/ib_umem.h>
#include "mlx5_ib.h"
@@ -41,6 +42,7 @@ struct mlx5_ib_user_db_page {
struct ib_umem *umem;
unsigned long user_virt;
int refcnt;
+ struct mm_struct *mm;
};
int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
@@ -52,7 +54,8 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
mutex_lock(&context->db_page_mutex);
list_for_each_entry(page, &context->db_page_list, list)
- if (page->user_virt == (virt & PAGE_MASK))
+ if ((current->mm == page->mm) &&
+ (page->user_virt == (virt & PAGE_MASK)))
goto found;
page = kmalloc(sizeof(*page), GFP_KERNEL);
@@ -63,18 +66,21 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
page->user_virt = (virt & PAGE_MASK);
page->refcnt = 0;
- page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
- PAGE_SIZE, 0, 0);
+ page->umem = ib_umem_get(context->ibucontext.device, virt & PAGE_MASK,
+ PAGE_SIZE, 0);
if (IS_ERR(page->umem)) {
err = PTR_ERR(page->umem);
kfree(page);
goto out;
}
+ mmgrab(current->mm);
+ page->mm = current->mm;
list_add(&page->list, &context->db_page_list);
found:
- db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK);
+ db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
+ (virt & ~PAGE_MASK);
db->u.user_page = page;
++page->refcnt;
@@ -90,6 +96,7 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db)
if (!--db->u.user_page->refcnt) {
list_del(&db->u.user_page->list);
+ mmdrop(db->u.user_page->mm);
ib_umem_release(db->u.user_page->umem);
kfree(db->u.user_page);
}
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
new file mode 100644
index 000000000000..d17823ce7f38
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -0,0 +1,3516 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/uverbs_std_types.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_hdrs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_ucaps.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/fs_helpers.h>
+#include <linux/mlx5/eswitch.h>
+#include <net/inet_ecn.h>
+#include "mlx5_ib.h"
+#include "counters.h"
+#include "devx.h"
+#include "fs.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+enum {
+ MATCH_CRITERIA_ENABLE_OUTER_BIT,
+ MATCH_CRITERIA_ENABLE_MISC_BIT,
+ MATCH_CRITERIA_ENABLE_INNER_BIT,
+ MATCH_CRITERIA_ENABLE_MISC2_BIT
+};
+
+
+struct mlx5_per_qp_opfc {
+ struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX];
+};
+
+#define HEADER_IS_ZERO(match_criteria, headers) \
+ !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
+ 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
+
+static u8 get_match_criteria_enable(u32 *match_criteria)
+{
+ u8 match_criteria_enable;
+
+ match_criteria_enable =
+ (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
+ MATCH_CRITERIA_ENABLE_OUTER_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
+ MATCH_CRITERIA_ENABLE_MISC_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
+ MATCH_CRITERIA_ENABLE_INNER_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
+ MATCH_CRITERIA_ENABLE_MISC2_BIT;
+
+ return match_criteria_enable;
+}
+
+static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
+{
+ u8 entry_mask;
+ u8 entry_val;
+ int err = 0;
+
+ if (!mask)
+ goto out;
+
+ entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
+ ip_protocol);
+ entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
+ ip_protocol);
+ if (!entry_mask) {
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
+ goto out;
+ }
+ /* Don't override existing ip protocol */
+ if (mask != entry_mask || val != entry_val)
+ err = -EINVAL;
+out:
+ return err;
+}
+
+static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
+ bool inner)
+{
+ if (inner) {
+ MLX5_SET(fte_match_set_misc,
+ misc_c, inner_ipv6_flow_label, mask);
+ MLX5_SET(fte_match_set_misc,
+ misc_v, inner_ipv6_flow_label, val);
+ } else {
+ MLX5_SET(fte_match_set_misc,
+ misc_c, outer_ipv6_flow_label, mask);
+ MLX5_SET(fte_match_set_misc,
+ misc_v, outer_ipv6_flow_label, val);
+ }
+}
+
+static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
+{
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
+}
+
+static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
+{
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+#define LAST_ETH_FIELD vlan_tag
+#define LAST_IPV4_FIELD tos
+#define LAST_IPV6_FIELD traffic_class
+#define LAST_TCP_UDP_FIELD src_port
+#define LAST_TUNNEL_FIELD tunnel_id
+#define LAST_FLOW_TAG_FIELD tag_id
+#define LAST_DROP_FIELD size
+#define LAST_COUNTERS_FIELD counters
+
+/* Field is the last supported field */
+#define FIELDS_NOT_SUPPORTED(filter, field) \
+ memchr_inv((void *)&filter.field + sizeof(filter.field), 0, \
+ sizeof(filter) - offsetofend(typeof(filter), field))
+
+int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
+ bool is_egress,
+ struct mlx5_flow_act *action)
+{
+
+ switch (maction->ib_action.type) {
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ action->modify_hdr =
+ maction->flow_action_raw.modify_hdr;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_DECAP) {
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+ return -EINVAL;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+ return 0;
+ }
+ if (maction->flow_action_raw.sub_type ==
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
+ if (action->action &
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
+ return -EINVAL;
+ action->action |=
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ action->pkt_reformat =
+ maction->flow_action_raw.pkt_reformat;
+ return 0;
+ }
+ fallthrough;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int parse_flow_attr(struct mlx5_core_dev *mdev,
+ struct mlx5_flow_spec *spec,
+ const union ib_flow_spec *ib_spec,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_act *action, u32 prev_type)
+{
+ struct mlx5_flow_context *flow_context = &spec->flow_context;
+ u32 *match_c = spec->match_criteria;
+ u32 *match_v = spec->match_value;
+ void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ misc_parameters);
+ void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ misc_parameters);
+ void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ misc_parameters_2);
+ void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ misc_parameters_2);
+ void *headers_c;
+ void *headers_v;
+ int match_ipv;
+ int ret;
+
+ if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+ headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ inner_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version);
+ } else {
+ headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
+ }
+
+ switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
+ case IB_FLOW_SPEC_ETH:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
+ return -EOPNOTSUPP;
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dmac_47_16),
+ ib_spec->eth.mask.dst_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dmac_47_16),
+ ib_spec->eth.val.dst_mac);
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ smac_47_16),
+ ib_spec->eth.mask.src_mac);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ smac_47_16),
+ ib_spec->eth.val.src_mac);
+
+ if (ib_spec->eth.mask.vlan_tag) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ cvlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ cvlan_tag, 1);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ first_vid, ntohs(ib_spec->eth.val.vlan_tag));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ first_cfi,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ first_cfi,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 12);
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ first_prio,
+ ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ first_prio,
+ ntohs(ib_spec->eth.val.vlan_tag) >> 13);
+ }
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, ntohs(ib_spec->eth.mask.ether_type));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ntohs(ib_spec->eth.val.ether_type));
+ break;
+ case IB_FLOW_SPEC_IPV4:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
+ return -EOPNOTSUPP;
+
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, MLX5_FS_IPV4_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IP);
+ }
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.src_ip,
+ sizeof(ib_spec->ipv4.mask.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.src_ip,
+ sizeof(ib_spec->ipv4.val.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.mask.dst_ip,
+ sizeof(ib_spec->ipv4.mask.dst_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ib_spec->ipv4.val.dst_ip,
+ sizeof(ib_spec->ipv4.val.dst_ip));
+
+ set_tos(headers_c, headers_v,
+ ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
+
+ if (set_proto(headers_c, headers_v,
+ ib_spec->ipv4.mask.proto,
+ ib_spec->ipv4.val.proto))
+ return -EINVAL;
+ break;
+ case IB_FLOW_SPEC_IPV6:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
+ return -EOPNOTSUPP;
+
+ if (match_ipv) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ip_version, MLX5_FS_IPV6_VERSION);
+ } else {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ ethertype, 0xffff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ ethertype, ETH_P_IPV6);
+ }
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.mask.src_ip,
+ sizeof(ib_spec->ipv6.mask.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.val.src_ip,
+ sizeof(ib_spec->ipv6.val.src_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.mask.dst_ip,
+ sizeof(ib_spec->ipv6.mask.dst_ip));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &ib_spec->ipv6.val.dst_ip,
+ sizeof(ib_spec->ipv6.val.dst_ip));
+
+ set_tos(headers_c, headers_v,
+ ib_spec->ipv6.mask.traffic_class,
+ ib_spec->ipv6.val.traffic_class);
+
+ if (set_proto(headers_c, headers_v,
+ ib_spec->ipv6.mask.next_hdr,
+ ib_spec->ipv6.val.next_hdr))
+ return -EINVAL;
+
+ set_flow_label(misc_params_c, misc_params_v,
+ ntohl(ib_spec->ipv6.mask.flow_label),
+ ntohl(ib_spec->ipv6.val.flow_label),
+ ib_spec->type & IB_FLOW_SPEC_INNER);
+ break;
+ case IB_FLOW_SPEC_ESP:
+ return -EOPNOTSUPP;
+ case IB_FLOW_SPEC_TCP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
+ LAST_TCP_UDP_FIELD))
+ return -EOPNOTSUPP;
+
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ case IB_FLOW_SPEC_UDP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
+ LAST_TCP_UDP_FIELD))
+ return -EOPNOTSUPP;
+
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
+ ntohs(ib_spec->tcp_udp.mask.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
+ ntohs(ib_spec->tcp_udp.val.src_port));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
+ ntohs(ib_spec->tcp_udp.mask.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
+ ntohs(ib_spec->tcp_udp.val.dst_port));
+ break;
+ case IB_FLOW_SPEC_GRE:
+ if (ib_spec->gre.mask.c_ks_res0_ver)
+ return -EOPNOTSUPP;
+
+ if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
+ return -EINVAL;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+ 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ IPPROTO_GRE);
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
+ ntohs(ib_spec->gre.mask.protocol));
+ MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
+ ntohs(ib_spec->gre.val.protocol));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
+ gre_key.nvgre.hi),
+ &ib_spec->gre.mask.key,
+ sizeof(ib_spec->gre.mask.key));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
+ gre_key.nvgre.hi),
+ &ib_spec->gre.val.key,
+ sizeof(ib_spec->gre.val.key));
+ break;
+ case IB_FLOW_SPEC_MPLS:
+ switch (prev_type) {
+ case IB_FLOW_SPEC_UDP:
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls_over_udp),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls_over_udp),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls_over_udp),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ break;
+ case IB_FLOW_SPEC_GRE:
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls_over_gre),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls_over_gre),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls_over_gre),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ break;
+ default:
+ if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_first_mpls),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ inner_first_mpls),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ inner_first_mpls),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ } else {
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ }
+ }
+ break;
+ case IB_FLOW_SPEC_VXLAN_TUNNEL:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
+ LAST_TUNNEL_FIELD))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
+ ntohl(ib_spec->tunnel.mask.tunnel_id));
+ MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
+ ntohl(ib_spec->tunnel.val.tunnel_id));
+ break;
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
+ LAST_FLOW_TAG_FIELD))
+ return -EOPNOTSUPP;
+ if (ib_spec->flow_tag.tag_id >= BIT(24))
+ return -EINVAL;
+
+ flow_context->flow_tag = ib_spec->flow_tag.tag_id;
+ flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
+ break;
+ case IB_FLOW_SPEC_ACTION_DROP:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
+ LAST_DROP_FIELD))
+ return -EOPNOTSUPP;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+ break;
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act),
+ flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action);
+ if (ret)
+ return ret;
+ break;
+ case IB_FLOW_SPEC_ACTION_COUNT:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
+ LAST_COUNTERS_FIELD))
+ return -EOPNOTSUPP;
+
+ /* for now support only one counters spec per flow */
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ return -EINVAL;
+
+ action->counters = ib_spec->flow_count.counters;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* If a flow could catch both multicast and unicast packets,
+ * it won't fall into the multicast flow steering table and this rule
+ * could steal other multicast packets.
+ */
+static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr)
+{
+ union ib_flow_spec *flow_spec;
+
+ if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
+ ib_attr->num_of_specs < 1)
+ return false;
+
+ flow_spec = (union ib_flow_spec *)(ib_attr + 1);
+ if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
+ struct ib_flow_spec_ipv4 *ipv4_spec;
+
+ ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
+ if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
+ return true;
+
+ return false;
+ }
+
+ if (flow_spec->type == IB_FLOW_SPEC_ETH) {
+ struct ib_flow_spec_eth *eth_spec;
+
+ eth_spec = (struct ib_flow_spec_eth *)flow_spec;
+ return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
+ is_multicast_ether_addr(eth_spec->val.dst_mac);
+ }
+
+ return false;
+}
+
+static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr,
+ bool check_inner)
+{
+ union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
+ int match_ipv = check_inner ?
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_ip_version) :
+ MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_ip_version);
+ int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0;
+ bool ipv4_spec_valid, ipv6_spec_valid;
+ unsigned int ip_spec_type = 0;
+ bool has_ethertype = false;
+ unsigned int spec_index;
+ bool mask_valid = true;
+ u16 eth_type = 0;
+ bool type_valid;
+
+ /* Validate that ethertype is correct */
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) &&
+ ib_spec->eth.mask.ether_type) {
+ mask_valid = (ib_spec->eth.mask.ether_type ==
+ htons(0xffff));
+ has_ethertype = true;
+ eth_type = ntohs(ib_spec->eth.val.ether_type);
+ } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) ||
+ (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) {
+ ip_spec_type = ib_spec->type;
+ }
+ ib_spec = (void *)ib_spec + ib_spec->size;
+ }
+
+ type_valid = (!has_ethertype) || (!ip_spec_type);
+ if (!type_valid && mask_valid) {
+ ipv4_spec_valid = (eth_type == ETH_P_IP) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit));
+ ipv6_spec_valid = (eth_type == ETH_P_IPV6) &&
+ (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit));
+
+ type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) ||
+ (((eth_type == ETH_P_MPLS_UC) ||
+ (eth_type == ETH_P_MPLS_MC)) && match_ipv);
+ }
+
+ return type_valid;
+}
+
+static bool is_valid_attr(struct mlx5_core_dev *mdev,
+ const struct ib_flow_attr *flow_attr)
+{
+ return is_valid_ethertype(mdev, flow_attr, false) &&
+ is_valid_ethertype(mdev, flow_attr, true);
+}
+
+static void put_flow_table(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *prio, bool ft_added)
+{
+ prio->refcount -= !!ft_added;
+ if (!prio->refcount) {
+ mlx5_destroy_flow_table(prio->flow_table);
+ prio->flow_table = NULL;
+ }
+}
+
+static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
+{
+ struct mlx5_ib_flow_handler *handler = container_of(flow_id,
+ struct mlx5_ib_flow_handler,
+ ibflow);
+ struct mlx5_ib_flow_handler *iter, *tmp;
+ struct mlx5_ib_dev *dev = handler->dev;
+
+ mutex_lock(&dev->flow_db->lock);
+
+ list_for_each_entry_safe(iter, tmp, &handler->list, list) {
+ mlx5_del_flow_rules(iter->rule);
+ put_flow_table(dev, iter->prio, true);
+ list_del(&iter->list);
+ kfree(iter);
+ }
+
+ mlx5_del_flow_rules(handler->rule);
+ put_flow_table(dev, handler->prio, true);
+ mlx5_ib_counters_clear_description(handler->ibcounters);
+ mutex_unlock(&dev->flow_db->lock);
+ if (handler->flow_matcher)
+ atomic_dec(&handler->flow_matcher->usecnt);
+ kfree(handler);
+
+ return 0;
+}
+
+static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
+{
+ priority *= 2;
+ if (!dont_trap)
+ priority++;
+ return priority;
+}
+
+enum flow_table_type {
+ MLX5_IB_FT_RX,
+ MLX5_IB_FT_TX
+};
+
+#define MLX5_FS_MAX_TYPES 6
+#define MLX5_FS_MAX_ENTRIES BIT(16)
+
+static bool __maybe_unused mlx5_ib_shared_ft_allowed(struct ib_device *device)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+
+ return MLX5_CAP_GEN(dev->mdev, shared_object_to_user_object_allowed);
+}
+
+static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
+ struct mlx5_ib_flow_prio *prio,
+ struct mlx5_flow_table_attr *ft_attr)
+{
+ struct mlx5_flow_table *ft;
+
+ ft = mlx5_create_auto_grouped_flow_table(ns, ft_attr);
+ if (IS_ERR(ft))
+ return ERR_CAST(ft);
+
+ prio->flow_table = ft;
+ prio->refcount = 0;
+ return prio;
+}
+
+static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
+ struct ib_flow_attr *flow_attr,
+ enum flow_table_type ft_type)
+{
+ bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns = NULL;
+ enum mlx5_flow_namespace_type fn_type;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_flow_table *ft;
+ int max_table_size;
+ int num_entries;
+ int num_groups;
+ bool esw_encap;
+ u32 flags = 0;
+ int priority;
+
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ log_max_ft_size));
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ switch (flow_attr->type) {
+ case IB_FLOW_ATTR_NORMAL:
+ if (flow_is_multicast_only(flow_attr) && !dont_trap)
+ priority = MLX5_IB_FLOW_MCAST_PRIO;
+ else
+ priority = ib_prio_to_core_prio(flow_attr->priority,
+ dont_trap);
+ if (ft_type == MLX5_IB_FT_RX) {
+ fn_type = MLX5_FLOW_NAMESPACE_BYPASS;
+ prio = &dev->flow_db->prios[priority];
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ } else {
+ max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX(
+ dev->mdev, log_max_ft_size));
+ fn_type = MLX5_FLOW_NAMESPACE_EGRESS;
+ prio = &dev->flow_db->egress_prios[priority];
+ if (!dev->is_rep && !esw_encap &&
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat))
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ }
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ num_entries = MLX5_FS_MAX_ENTRIES;
+ num_groups = MLX5_FS_MAX_TYPES;
+ break;
+ case IB_FLOW_ATTR_ALL_DEFAULT:
+ case IB_FLOW_ATTR_MC_DEFAULT:
+ ns = mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_LEFTOVERS);
+ build_leftovers_ft_param(&priority, &num_entries, &num_groups);
+ prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
+ break;
+ case IB_FLOW_ATTR_SNIFFER:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ allow_sniffer_and_nic_rx_shared_tir))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ns = mlx5_get_flow_namespace(
+ dev->mdev, ft_type == MLX5_IB_FT_RX ?
+ MLX5_FLOW_NAMESPACE_SNIFFER_RX :
+ MLX5_FLOW_NAMESPACE_SNIFFER_TX);
+
+ prio = &dev->flow_db->sniffer[ft_type];
+ priority = 0;
+ num_entries = 1;
+ num_groups = 1;
+ break;
+ default:
+ break;
+ }
+
+ if (!ns)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ max_table_size = min_t(int, num_entries, max_table_size);
+
+ ft = prio->flow_table;
+ if (ft)
+ return prio;
+
+ ft_attr.prio = priority;
+ ft_attr.max_fte = max_table_size;
+ ft_attr.flags = flags;
+ ft_attr.autogroup.max_num_groups = num_groups;
+ return _get_prio(ns, prio, &ft_attr);
+}
+
+enum {
+ RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO,
+ RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO,
+ RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO,
+ RDMA_RX_ECN_OPCOUNTER_PRIO,
+ RDMA_RX_CNP_OPCOUNTER_PRIO,
+ RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO,
+};
+
+enum {
+ RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO,
+ RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO,
+ RDMA_TX_CNP_OPCOUNTER_PRIO,
+ RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO,
+};
+
+static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_flow_spec *spec)
+{
+ if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+ ft_field_support.source_vhca_port) ||
+ !MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+ ft_field_support.source_vhca_port))
+ return -EOPNOTSUPP;
+
+ MLX5_SET_TO_ONES(fte_match_param, &spec->match_criteria,
+ misc_parameters.source_vhca_port);
+ MLX5_SET(fte_match_param, &spec->match_value,
+ misc_parameters.source_vhca_port, port_num);
+
+ return 0;
+}
+
+static int set_ecn_ce_spec(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_flow_spec *spec, int ipv)
+{
+ if (!MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+ ft_field_support.outer_ip_version))
+ return -EOPNOTSUPP;
+
+ if (mlx5_core_mp_enabled(dev->mdev) &&
+ set_vhca_port_spec(dev, port_num, spec))
+ return -EOPNOTSUPP;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.ip_ecn);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_ecn,
+ INET_ECN_CE);
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.ip_version);
+ MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
+ ipv);
+
+ spec->match_criteria_enable =
+ get_match_criteria_enable(spec->match_criteria);
+
+ return 0;
+}
+
+static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_flow_spec *spec)
+{
+ if (mlx5_core_mp_enabled(dev->mdev) &&
+ set_vhca_port_spec(dev, port_num, spec))
+ return -EOPNOTSUPP;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.bth_opcode);
+ MLX5_SET(fte_match_param, spec->match_value, misc_parameters.bth_opcode,
+ IB_BTH_OPCODE_CNP);
+
+ spec->match_criteria_enable =
+ get_match_criteria_enable(spec->match_criteria);
+
+ return 0;
+}
+
+/* Returns the prio we should use for the given optional counter type,
+ * whereas for bytes type we use the packet type, since they share the same
+ * resources.
+ */
+static struct mlx5_ib_flow_prio *get_opfc_prio(struct mlx5_ib_dev *dev,
+ u32 type)
+{
+ u32 prio_type;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ default:
+ prio_type = type;
+ }
+
+ return &dev->flow_db->opfcs[prio_type];
+}
+
+static void put_per_qp_prio(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_optional_counter_type type)
+{
+ enum mlx5_ib_optional_counter_type per_qp_type;
+ struct mlx5_ib_flow_prio *prio;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ default:
+ return;
+ }
+
+ prio = get_opfc_prio(dev, per_qp_type);
+ put_flow_table(dev, prio, true);
+}
+
+static int get_per_qp_prio(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_optional_counter_type type)
+{
+ enum mlx5_ib_optional_counter_type per_qp_type;
+ struct mlx5_flow_table_attr ft_attr = {};
+ enum mlx5_flow_namespace_type fn_type;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_ib_flow_prio *prio;
+ int priority;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ if (!ns)
+ return -EOPNOTSUPP;
+
+ prio = get_opfc_prio(dev, per_qp_type);
+ if (prio->flow_table)
+ return 0;
+
+ ft_attr.prio = priority;
+ ft_attr.max_fte = MLX5_FS_MAX_POOL_SIZE;
+ ft_attr.autogroup.max_num_groups = 1;
+ prio = _get_prio(ns, prio, &ft_attr);
+ if (IS_ERR(prio))
+ return PTR_ERR(prio);
+
+ prio->refcount = 1;
+
+ return 0;
+}
+
+static struct mlx5_per_qp_opfc *get_per_qp_opfc(struct xarray *qpn_opfc_xa,
+ u32 qp_num, bool *new)
+{
+ struct mlx5_per_qp_opfc *per_qp_opfc;
+
+ *new = false;
+
+ per_qp_opfc = xa_load(qpn_opfc_xa, qp_num);
+ if (per_qp_opfc)
+ return per_qp_opfc;
+ per_qp_opfc = kzalloc(sizeof(*per_qp_opfc), GFP_KERNEL);
+
+ if (!per_qp_opfc)
+ return NULL;
+
+ *new = true;
+ return per_qp_opfc;
+}
+
+static int add_op_fc_rules(struct mlx5_ib_dev *dev,
+ struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX],
+ struct xarray *qpn_opfc_xa,
+ struct mlx5_per_qp_opfc *per_qp_opfc,
+ struct mlx5_ib_flow_prio *prio,
+ enum mlx5_ib_optional_counter_type type,
+ u32 qp_num, u32 port_num)
+{
+ struct mlx5_ib_op_fc *opfc = &per_qp_opfc->opfcs[type], *in_use_opfc;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_destination dst;
+ struct mlx5_flow_spec *spec;
+ int i, err, spec_num;
+ bool is_tx;
+
+ if (opfc->fc)
+ return -EEXIST;
+
+ if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, type,
+ &in_use_opfc)) {
+ opfc->fc = in_use_opfc->fc;
+ opfc->rule[0] = in_use_opfc->rule[0];
+ return 0;
+ }
+
+ opfc->fc = fc_arr[type];
+
+ spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto null_fc;
+ }
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP:
+ if (set_ecn_ce_spec(dev, port_num, &spec[0],
+ MLX5_FS_IPV4_VERSION) ||
+ set_ecn_ce_spec(dev, port_num, &spec[1],
+ MLX5_FS_IPV6_VERSION)) {
+ err = -EOPNOTSUPP;
+ goto free_spec;
+ }
+ spec_num = 2;
+ is_tx = false;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec[1].match_criteria,
+ misc_parameters.bth_dst_qp);
+ MLX5_SET(fte_match_param, spec[1].match_value,
+ misc_parameters.bth_dst_qp, qp_num);
+ spec[1].match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP:
+ if (!MLX5_CAP_FLOWTABLE(
+ dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free_spec;
+ }
+ spec_num = 1;
+ is_tx = false;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP:
+ if (!MLX5_CAP_FLOWTABLE(
+ dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free_spec;
+ }
+ spec_num = 1;
+ is_tx = true;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ spec_num = 1;
+ is_tx = true;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ spec_num = 1;
+ is_tx = false;
+ break;
+ default:
+ err = -EINVAL;
+ goto free_spec;
+ }
+
+ if (is_tx) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.source_sqn);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.source_sqn, qp_num);
+ } else {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.bth_dst_qp);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.bth_dst_qp, qp_num);
+ }
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
+ dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst.counter = opfc->fc;
+
+ flow_act.action =
+ MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ for (i = 0; i < spec_num; i++) {
+ opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
+ &flow_act, &dst, 1);
+ if (IS_ERR(opfc->rule[i])) {
+ err = PTR_ERR(opfc->rule[i]);
+ goto del_rules;
+ }
+ }
+ prio->refcount += spec_num;
+
+ err = xa_err(xa_store(qpn_opfc_xa, qp_num, per_qp_opfc, GFP_KERNEL));
+ if (err)
+ goto del_rules;
+
+ kfree(spec);
+
+ return 0;
+
+del_rules:
+ while (i--)
+ mlx5_del_flow_rules(opfc->rule[i]);
+ put_flow_table(dev, prio, false);
+free_spec:
+ kfree(spec);
+null_fc:
+ opfc->fc = NULL;
+ return err;
+}
+
+static bool
+is_fc_shared_and_in_use(struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX], u32 type,
+ struct mlx5_fc **fc)
+{
+ u32 shared_fc_type;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ default:
+ return false;
+ }
+
+ *fc = fc_arr[shared_fc_type];
+ if (!(*fc))
+ return false;
+
+ return true;
+}
+
+void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev,
+ struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX])
+{
+ struct mlx5_fc *in_use_fc;
+ int i;
+
+ for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
+ if (!fc_arr[i])
+ continue;
+
+ if (is_fc_shared_and_in_use(fc_arr, i, &in_use_fc)) {
+ fc_arr[i] = NULL;
+ continue;
+ }
+
+ mlx5_fc_destroy(dev->mdev, fc_arr[i]);
+ fc_arr[i] = NULL;
+ }
+}
+
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ enum mlx5_flow_namespace_type fn_type;
+ int priority, i, err, spec_num;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_destination dst;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_flow_spec *spec;
+
+ spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ if (set_ecn_ce_spec(dev, port_num, &spec[0],
+ MLX5_FS_IPV4_VERSION) ||
+ set_ecn_ce_spec(dev, port_num, &spec[1],
+ MLX5_FS_IPV6_VERSION)) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ spec_num = 2;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
+ break;
+
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
+ break;
+
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
+ break;
+
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO;
+ break;
+
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO;
+ break;
+
+ default:
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ if (!ns) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+
+ prio = get_opfc_prio(dev, type);
+ if (!prio->flow_table) {
+ err = get_per_qp_prio(dev, type);
+ if (err)
+ goto free;
+
+ ft_attr.prio = priority;
+ ft_attr.max_fte = dev->num_ports * MAX_OPFC_RULES;
+ ft_attr.autogroup.max_num_groups = 1;
+ prio = _get_prio(ns, prio, &ft_attr);
+ if (IS_ERR(prio)) {
+ err = PTR_ERR(prio);
+ goto put_prio;
+ }
+ }
+
+ dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst.counter = opfc->fc;
+
+ flow_act.action =
+ MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ for (i = 0; i < spec_num; i++) {
+ opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
+ &flow_act, &dst, 1);
+ if (IS_ERR(opfc->rule[i])) {
+ err = PTR_ERR(opfc->rule[i]);
+ goto del_rules;
+ }
+ }
+ prio->refcount += spec_num;
+ kfree(spec);
+
+ return 0;
+
+del_rules:
+ for (i -= 1; i >= 0; i--)
+ mlx5_del_flow_rules(opfc->rule[i]);
+ put_flow_table(dev, prio, false);
+put_prio:
+ put_per_qp_prio(dev, type);
+free:
+ kfree(spec);
+ return err;
+}
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type)
+{
+ struct mlx5_ib_flow_prio *prio;
+ int i;
+
+ prio = get_opfc_prio(dev, type);
+
+ for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
+ mlx5_del_flow_rules(opfc->rule[i]);
+ put_flow_table(dev, prio, true);
+ }
+
+ put_per_qp_prio(dev, type);
+}
+
+void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct xarray *qpn_opfc_xa)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_per_qp_opfc *per_qp_opfc;
+ struct mlx5_ib_op_fc *in_use_opfc;
+ struct mlx5_ib_flow_prio *prio;
+ int i, j;
+
+ per_qp_opfc = xa_load(qpn_opfc_xa, qp->qp_num);
+ if (!per_qp_opfc)
+ return;
+
+ for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
+ if (!per_qp_opfc->opfcs[i].fc)
+ continue;
+
+ if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, i,
+ &in_use_opfc)) {
+ per_qp_opfc->opfcs[i].fc = NULL;
+ continue;
+ }
+
+ for (j = 0; j < MAX_OPFC_RULES; j++) {
+ if (!per_qp_opfc->opfcs[i].rule[j])
+ continue;
+ mlx5_del_flow_rules(per_qp_opfc->opfcs[i].rule[j]);
+ prio = get_opfc_prio(dev, i);
+ put_flow_table(dev, prio, true);
+ }
+ per_qp_opfc->opfcs[i].fc = NULL;
+ }
+
+ kfree(per_qp_opfc);
+ xa_erase(qpn_opfc_xa, qp->qp_num);
+}
+
+int mlx5r_fs_bind_op_fc(struct ib_qp *qp,
+ struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX],
+ struct xarray *qpn_opfc_xa, u32 port)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_per_qp_opfc *per_qp_opfc;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_ib_counters *cnts;
+ struct mlx5_ib_op_fc *opfc;
+ struct mlx5_fc *in_use_fc;
+ int i, err, per_qp_type;
+ bool new;
+
+ cnts = &dev->port[port - 1].cnts;
+
+ for (i = 0; i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES; i++) {
+ opfc = &cnts->opfcs[i];
+ if (!opfc->fc)
+ continue;
+
+ per_qp_type = i + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ prio = get_opfc_prio(dev, per_qp_type);
+ WARN_ON(!prio->flow_table);
+
+ if (is_fc_shared_and_in_use(fc_arr, per_qp_type, &in_use_fc))
+ fc_arr[per_qp_type] = in_use_fc;
+
+ if (!fc_arr[per_qp_type]) {
+ fc_arr[per_qp_type] = mlx5_fc_create(dev->mdev, false);
+ if (IS_ERR(fc_arr[per_qp_type]))
+ return PTR_ERR(fc_arr[per_qp_type]);
+ }
+
+ per_qp_opfc = get_per_qp_opfc(qpn_opfc_xa, qp->qp_num, &new);
+ if (!per_qp_opfc) {
+ err = -ENOMEM;
+ goto free_fc;
+ }
+ err = add_op_fc_rules(dev, fc_arr, qpn_opfc_xa, per_qp_opfc,
+ prio, per_qp_type, qp->qp_num, port);
+ if (err)
+ goto del_rules;
+ }
+
+ return 0;
+
+del_rules:
+ mlx5r_fs_unbind_op_fc(qp, qpn_opfc_xa);
+ if (new)
+ kfree(per_qp_opfc);
+free_fc:
+ if (xa_empty(qpn_opfc_xa))
+ mlx5r_fs_destroy_fcs(dev, fc_arr);
+ return err;
+}
+
+static void set_underlay_qp(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ u32 underlay_qpn)
+{
+ void *misc_params_c = MLX5_ADDR_OF(fte_match_param,
+ spec->match_criteria,
+ misc_parameters);
+ void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ if (underlay_qpn &&
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ ft_field_support.bth_dst_qp)) {
+ MLX5_SET(fte_match_set_misc,
+ misc_params_v, bth_dst_qp, underlay_qpn);
+ MLX5_SET(fte_match_set_misc,
+ misc_params_c, bth_dst_qp, 0xffffff);
+ }
+}
+
+static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ void *misc;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_for_match(rep->esw,
+ rep->vport));
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters_2);
+
+ MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
+ mlx5_eswitch_get_vport_metadata_mask());
+ } else {
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ misc_parameters);
+
+ MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport);
+
+ misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ misc_parameters);
+
+ MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+ }
+}
+
+static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst,
+ u32 underlay_qpn,
+ struct mlx5_ib_create_flow *ucmd)
+{
+ struct mlx5_flow_table *ft = ft_prio->flow_table;
+ struct mlx5_ib_flow_handler *handler;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_destination dest_arr[2] = {};
+ struct mlx5_flow_destination *rule_dst = dest_arr;
+ const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
+ unsigned int spec_index;
+ u32 prev_type = 0;
+ int err = 0;
+ int dest_num = 0;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
+
+ if (!is_valid_attr(dev->mdev, flow_attr))
+ return ERR_PTR(-EINVAL);
+
+ if (dev->is_rep && is_egress)
+ return ERR_PTR(-EINVAL);
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler || !spec) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ INIT_LIST_HEAD(&handler->list);
+
+ for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
+ err = parse_flow_attr(dev->mdev, spec,
+ ib_flow, flow_attr, &flow_act,
+ prev_type);
+ if (err < 0)
+ goto free;
+
+ prev_type = ((union ib_flow_spec *)ib_flow)->type;
+ ib_flow += ((union ib_flow_spec *)ib_flow)->size;
+ }
+
+ if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
+ memcpy(&dest_arr[0], dst, sizeof(*dst));
+ dest_num++;
+ }
+
+ if (!flow_is_multicast_only(flow_attr))
+ set_underlay_qp(dev, spec, underlay_qpn);
+
+ if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) {
+ struct mlx5_eswitch_rep *rep;
+
+ rep = dev->port[flow_attr->port - 1].rep;
+ if (!rep) {
+ err = -EINVAL;
+ goto free;
+ }
+
+ mlx5_ib_set_rule_source_port(dev, spec, rep);
+ }
+
+ spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ struct mlx5_ib_mcounters *mcounters;
+
+ err = mlx5_ib_flow_counters_set_data(flow_act.counters, ucmd);
+ if (err)
+ goto free;
+
+ mcounters = to_mcounters(flow_act.counters);
+ handler->ibcounters = flow_act.counters;
+ dest_arr[dest_num].type =
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest_arr[dest_num].counter =
+ mcounters->hw_cntrs_hndl;
+ dest_num++;
+ }
+
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ if (!dest_num)
+ rule_dst = NULL;
+ } else {
+ if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
+ flow_act.action |=
+ MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+ if (is_egress)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ else if (dest_num)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ }
+
+ if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+ mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
+ spec->flow_context.flow_tag, flow_attr->type);
+ err = -EINVAL;
+ goto free;
+ }
+ handler->rule = mlx5_add_flow_rules(ft, spec,
+ &flow_act,
+ rule_dst, dest_num);
+
+ if (IS_ERR(handler->rule)) {
+ err = PTR_ERR(handler->rule);
+ goto free;
+ }
+
+ ft_prio->refcount++;
+ handler->prio = ft_prio;
+ handler->dev = dev;
+
+ ft_prio->flow_table = ft;
+free:
+ if (err && handler) {
+ mlx5_ib_counters_clear_description(handler->ibcounters);
+ kfree(handler);
+ }
+ kvfree(spec);
+ return err ? ERR_PTR(err) : handler;
+}
+
+static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ const struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
+}
+
+static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct ib_flow_attr *flow_attr,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_ib_flow_handler *handler_ucast = NULL;
+ struct mlx5_ib_flow_handler *handler = NULL;
+
+ static struct {
+ struct ib_flow_spec_eth eth_flow;
+ struct ib_flow_attr flow_attr;
+ } leftovers_wc = { .flow_attr = { .num_of_specs = 1,
+ .size = sizeof(leftovers_wc) },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = { .dst_mac = { 0x1 } },
+ .val = { .dst_mac = { 0x1 } } } };
+
+ static struct {
+ struct ib_flow_spec_eth eth_flow;
+ struct ib_flow_attr flow_attr;
+ } leftovers_uc = { .flow_attr = { .num_of_specs = 1,
+ .size = sizeof(leftovers_uc) },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = { .dst_mac = { 0x1 } },
+ .val = { .dst_mac = {} } } };
+
+ handler = create_flow_rule(dev, ft_prio, &leftovers_wc.flow_attr, dst);
+ if (!IS_ERR(handler) &&
+ flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
+ handler_ucast = create_flow_rule(dev, ft_prio,
+ &leftovers_uc.flow_attr, dst);
+ if (IS_ERR(handler_ucast)) {
+ mlx5_del_flow_rules(handler->rule);
+ ft_prio->refcount--;
+ kfree(handler);
+ handler = handler_ucast;
+ } else {
+ list_add(&handler_ucast->list, &handler->list);
+ }
+ }
+
+ return handler;
+}
+
+static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_rx,
+ struct mlx5_ib_flow_prio *ft_tx,
+ struct mlx5_flow_destination *dst)
+{
+ struct mlx5_ib_flow_handler *handler_rx;
+ struct mlx5_ib_flow_handler *handler_tx;
+ int err;
+ static const struct ib_flow_attr flow_attr = {
+ .num_of_specs = 0,
+ .type = IB_FLOW_ATTR_SNIFFER,
+ .size = sizeof(flow_attr)
+ };
+
+ handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
+ if (IS_ERR(handler_rx)) {
+ err = PTR_ERR(handler_rx);
+ goto err;
+ }
+
+ handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
+ if (IS_ERR(handler_tx)) {
+ err = PTR_ERR(handler_tx);
+ goto err_tx;
+ }
+
+ list_add(&handler_tx->list, &handler_rx->list);
+
+ return handler_rx;
+
+err_tx:
+ mlx5_del_flow_rules(handler_rx->rule);
+ ft_rx->refcount--;
+ kfree(handler_rx);
+err:
+ return ERR_PTR(err);
+}
+
+static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
+ struct ib_flow_attr *flow_attr,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_flow_handler *handler = NULL;
+ struct mlx5_flow_destination *dst = NULL;
+ struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
+ struct mlx5_ib_flow_prio *ft_prio;
+ bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
+ struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
+ size_t min_ucmd_sz, required_ucmd_sz;
+ int err;
+ int underlay_qpn;
+
+ if (udata && udata->inlen) {
+ min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved);
+ if (udata->inlen < min_ucmd_sz)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
+ if (err)
+ return ERR_PTR(err);
+
+ /* currently supports only one counters data */
+ if (ucmd_hdr.ncounters_data > 1)
+ return ERR_PTR(-EINVAL);
+
+ required_ucmd_sz = min_ucmd_sz +
+ sizeof(struct mlx5_ib_flow_counters_data) *
+ ucmd_hdr.ncounters_data;
+ if (udata->inlen > required_ucmd_sz &&
+ !ib_is_udata_cleared(udata, required_ucmd_sz,
+ udata->inlen - required_ucmd_sz))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
+ if (!ucmd)
+ return ERR_PTR(-ENOMEM);
+
+ err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
+ if (err)
+ goto free_ucmd;
+ }
+
+ if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
+ err = -ENOMEM;
+ goto free_ucmd;
+ }
+
+ if (flow_attr->flags &
+ ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS)) {
+ err = -EINVAL;
+ goto free_ucmd;
+ }
+
+ if (is_egress &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+ err = -EINVAL;
+ goto free_ucmd;
+ }
+
+ dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ if (!dst) {
+ err = -ENOMEM;
+ goto free_ucmd;
+ }
+
+ mutex_lock(&dev->flow_db->lock);
+
+ ft_prio = get_flow_table(dev, flow_attr,
+ is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX);
+ if (IS_ERR(ft_prio)) {
+ err = PTR_ERR(ft_prio);
+ goto unlock;
+ }
+ if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
+ ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
+ if (IS_ERR(ft_prio_tx)) {
+ err = PTR_ERR(ft_prio_tx);
+ ft_prio_tx = NULL;
+ goto destroy_ft;
+ }
+ }
+
+ if (is_egress) {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ } else {
+ dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ if (mqp->is_rss)
+ dst->tir_num = mqp->rss_qp.tirn;
+ else
+ dst->tir_num = mqp->raw_packet_qp.rq.tirn;
+ }
+
+ switch (flow_attr->type) {
+ case IB_FLOW_ATTR_NORMAL:
+ underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
+ mqp->underlay_qpn :
+ 0;
+ handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
+ underlay_qpn, ucmd);
+ break;
+ case IB_FLOW_ATTR_ALL_DEFAULT:
+ case IB_FLOW_ATTR_MC_DEFAULT:
+ handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst);
+ break;
+ case IB_FLOW_ATTR_SNIFFER:
+ handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
+ break;
+ default:
+ err = -EINVAL;
+ goto destroy_ft;
+ }
+
+ if (IS_ERR(handler)) {
+ err = PTR_ERR(handler);
+ handler = NULL;
+ goto destroy_ft;
+ }
+
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(dst);
+ kfree(ucmd);
+
+ return &handler->ibflow;
+
+destroy_ft:
+ put_flow_table(dev, ft_prio, false);
+ if (ft_prio_tx)
+ put_flow_table(dev, ft_prio_tx, false);
+unlock:
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(dst);
+free_ucmd:
+ kfree(ucmd);
+ return ERR_PTR(err);
+}
+
+static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev,
+ enum mlx5_flow_namespace_type type,
+ u32 *flags, u16 *vport_idx,
+ u16 *vport,
+ struct mlx5_core_dev **ft_mdev,
+ u32 ib_port, u16 *esw_owner_vhca_id)
+{
+ struct mlx5_core_dev *esw_mdev;
+
+ if (!is_mdev_switchdev_mode(dev->mdev))
+ return 0;
+
+ if (!MLX5_CAP_ADV_RDMA(dev->mdev, rdma_transport_manager))
+ return -EOPNOTSUPP;
+
+ if (!dev->port[ib_port - 1].rep)
+ return -EINVAL;
+
+ esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw);
+ if (esw_mdev != dev->mdev) {
+ if (!MLX5_CAP_ADV_RDMA(dev->mdev,
+ rdma_transport_manager_other_eswitch))
+ return -EOPNOTSUPP;
+ *flags |= MLX5_FLOW_TABLE_OTHER_ESWITCH;
+ *esw_owner_vhca_id = MLX5_CAP_GEN(esw_mdev, vhca_id);
+ }
+
+ *flags |= MLX5_FLOW_TABLE_OTHER_VPORT;
+ *ft_mdev = esw_mdev;
+ *vport = dev->port[ib_port - 1].rep->vport;
+ *vport_idx = dev->port[ib_port - 1].rep->vport_index;
+
+ return 0;
+}
+
+static struct mlx5_ib_flow_prio *
+_get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
+ enum mlx5_flow_namespace_type ns_type,
+ bool mcast, u32 ib_port)
+{
+ struct mlx5_core_dev *ft_mdev = dev->mdev;
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns = NULL;
+ struct mlx5_ib_flow_prio *prio = NULL;
+ u16 esw_owner_vhca_id = 0;
+ int max_table_size = 0;
+ u16 vport_idx = 0;
+ bool esw_encap;
+ u32 flags = 0;
+ u16 vport = 0;
+ int priority;
+ int ret;
+
+ if (mcast)
+ priority = MLX5_IB_FLOW_MCAST_PRIO;
+ else
+ priority = ib_prio_to_core_prio(user_priority, false);
+
+ esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) !=
+ DEVLINK_ESWITCH_ENCAP_MODE_NONE;
+ switch (ns_type) {
+ case MLX5_FLOW_NAMESPACE_BYPASS:
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ reformat_l3_tunnel_to_l2) &&
+ !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ break;
+ case MLX5_FLOW_NAMESPACE_EGRESS:
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) &&
+ !esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ break;
+ case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
+ max_table_size = BIT(
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size));
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP;
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev,
+ reformat_l3_tunnel_to_l2) &&
+ esw_encap)
+ flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
+ priority = user_priority;
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX:
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size));
+ priority = user_priority;
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX:
+ max_table_size = BIT(
+ MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
+ priority = user_priority;
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
+ if (ib_port == 0 ||
+ user_priority >= MLX5_RDMA_TRANSPORT_BYPASS_PRIO)
+ return ERR_PTR(-EINVAL);
+ ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags,
+ &vport_idx, &vport,
+ &ft_mdev, ib_port,
+ &esw_owner_vhca_id);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX)
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(
+ ft_mdev, log_max_ft_size));
+ else
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(
+ ft_mdev, log_max_ft_size));
+ priority = user_priority;
+ break;
+ default:
+ break;
+ }
+
+ max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX ||
+ ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX)
+ ns = mlx5_get_flow_vport_namespace(ft_mdev, ns_type, vport_idx);
+ else
+ ns = mlx5_get_flow_namespace(ft_mdev, ns_type);
+
+ if (!ns)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ switch (ns_type) {
+ case MLX5_FLOW_NAMESPACE_BYPASS:
+ prio = &dev->flow_db->prios[priority];
+ break;
+ case MLX5_FLOW_NAMESPACE_EGRESS:
+ prio = &dev->flow_db->egress_prios[priority];
+ break;
+ case MLX5_FLOW_NAMESPACE_FDB_BYPASS:
+ prio = &dev->flow_db->fdb[priority];
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_RX:
+ prio = &dev->flow_db->rdma_rx[priority];
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TX:
+ prio = &dev->flow_db->rdma_tx[priority];
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
+ prio = &dev->flow_db->rdma_transport_rx[priority][ib_port - 1];
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
+ prio = &dev->flow_db->rdma_transport_tx[priority][ib_port - 1];
+ break;
+ default: return ERR_PTR(-EINVAL);
+ }
+
+ if (!prio)
+ return ERR_PTR(-EINVAL);
+
+ if (prio->flow_table)
+ return prio;
+
+ ft_attr.prio = priority;
+ ft_attr.max_fte = max_table_size;
+ ft_attr.flags = flags;
+ ft_attr.vport = vport;
+ ft_attr.esw_owner_vhca_id = esw_owner_vhca_id;
+ ft_attr.autogroup.max_num_groups = MLX5_FS_MAX_TYPES;
+ return _get_prio(ns, prio, &ft_attr);
+}
+
+static struct mlx5_ib_flow_handler *
+_create_raw_flow_rule(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ struct mlx5_flow_destination *dst,
+ struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context,
+ struct mlx5_flow_act *flow_act,
+ void *cmd_in, int inlen,
+ int dst_num)
+{
+ struct mlx5_ib_flow_handler *handler;
+ struct mlx5_flow_spec *spec;
+ struct mlx5_flow_table *ft = ft_prio->flow_table;
+ int err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler || !spec) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ INIT_LIST_HEAD(&handler->list);
+
+ memcpy(spec->match_value, cmd_in, inlen);
+ memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
+ fs_matcher->mask_len);
+ spec->match_criteria_enable = fs_matcher->match_criteria_enable;
+ spec->flow_context = *flow_context;
+
+ handler->rule = mlx5_add_flow_rules(ft, spec,
+ flow_act, dst, dst_num);
+
+ if (IS_ERR(handler->rule)) {
+ err = PTR_ERR(handler->rule);
+ goto free;
+ }
+
+ ft_prio->refcount++;
+ handler->prio = ft_prio;
+ handler->dev = dev;
+ ft_prio->flow_table = ft;
+
+free:
+ if (err)
+ kfree(handler);
+ kvfree(spec);
+ return err ? ERR_PTR(err) : handler;
+}
+
+static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
+ void *match_v)
+{
+ void *match_c;
+ void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
+ void *dmac, *dmac_mask;
+ void *ipv4, *ipv4_mask;
+
+ if (!(fs_matcher->match_criteria_enable &
+ (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
+ return false;
+
+ match_c = fs_matcher->matcher_mask.match_params;
+ match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+
+ dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
+ dmac_47_16);
+ dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
+ dmac_47_16);
+
+ if (is_multicast_ether_addr(dmac) &&
+ is_multicast_ether_addr(dmac_mask))
+ return true;
+
+ ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+ ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+ if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
+ ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
+ return true;
+
+ return false;
+}
+
+static struct mlx5_ib_flow_handler *raw_fs_rule_add(
+ struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
+ struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
+ struct mlx5_fc *counter, void *cmd_in, int inlen, int dest_id, int dest_type)
+{
+ struct mlx5_flow_destination *dst;
+ struct mlx5_ib_flow_prio *ft_prio;
+ struct mlx5_ib_flow_handler *handler;
+ int dst_num = 0;
+ bool mcast;
+ int err;
+
+ if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
+ return ERR_PTR(-ENOMEM);
+
+ dst = kcalloc(2, sizeof(*dst), GFP_KERNEL);
+ if (!dst)
+ return ERR_PTR(-ENOMEM);
+
+ mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
+ mutex_lock(&dev->flow_db->lock);
+
+ ft_prio = _get_flow_table(dev, fs_matcher->priority,
+ fs_matcher->ns_type, mcast,
+ fs_matcher->ib_port);
+ if (IS_ERR(ft_prio)) {
+ err = PTR_ERR(ft_prio);
+ goto unlock;
+ }
+
+ switch (dest_type) {
+ case MLX5_FLOW_DESTINATION_TYPE_TIR:
+ dst[dst_num].type = dest_type;
+ dst[dst_num++].tir_num = dest_id;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
+ dst[dst_num++].ft_num = dest_id;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_PORT:
+ dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+ break;
+ default:
+ break;
+ }
+
+ if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ if (WARN_ON(!counter)) {
+ err = -EINVAL;
+ goto unlock;
+ }
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst[dst_num].counter = counter;
+ dst_num++;
+ }
+
+ handler = _create_raw_flow_rule(dev, ft_prio, dst_num ? dst : NULL,
+ fs_matcher, flow_context, flow_act,
+ cmd_in, inlen, dst_num);
+
+ if (IS_ERR(handler)) {
+ err = PTR_ERR(handler);
+ goto destroy_ft;
+ }
+
+ mutex_unlock(&dev->flow_db->lock);
+ atomic_inc(&fs_matcher->usecnt);
+ handler->flow_matcher = fs_matcher;
+
+ kfree(dst);
+
+ return handler;
+
+destroy_ft:
+ put_flow_table(dev, ft_prio, false);
+unlock:
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(dst);
+
+ return ERR_PTR(err);
+}
+
+static void destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
+{
+ switch (maction->flow_action_raw.sub_type) {
+ case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
+ mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
+ maction->flow_action_raw.modify_hdr);
+ break;
+ case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
+ mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
+ maction->flow_action_raw.pkt_reformat);
+ break;
+ case MLX5_IB_FLOW_ACTION_DECAP:
+ break;
+ default:
+ break;
+ }
+}
+
+static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action)
+{
+ struct mlx5_ib_flow_action *maction = to_mflow_act(action);
+
+ switch (action->type) {
+ case IB_FLOW_ACTION_UNSPECIFIED:
+ destroy_flow_action_raw(maction);
+ break;
+ default:
+ WARN_ON(true);
+ break;
+ }
+
+ kfree(maction);
+ return 0;
+}
+
+static int
+mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
+ enum mlx5_flow_namespace_type *namespace)
+{
+ switch (table_type) {
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_BYPASS;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_EGRESS;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
+ *namespace = MLX5_FLOW_NAMESPACE_FDB_BYPASS;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
+ [MLX5_IB_FLOW_TYPE_NORMAL] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ .u.ptr = {
+ .len = sizeof(u16), /* data is priority */
+ .min_len = sizeof(u16),
+ }
+ },
+ [MLX5_IB_FLOW_TYPE_SNIFFER] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+ [MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+ [MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_NO_DATA(),
+ },
+};
+
+static bool is_flow_dest(void *obj, int *dest_id, int *dest_type)
+{
+ struct devx_obj *devx_obj = obj;
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_DESTROY_TIR:
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ *dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
+ obj_id);
+ return true;
+
+ case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ *dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
+ table_id);
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int get_dests(struct uverbs_attr_bundle *attrs,
+ struct mlx5_ib_flow_matcher *fs_matcher, int *dest_id,
+ int *dest_type, struct ib_qp **qp, u32 *flags)
+{
+ bool dest_devx, dest_qp;
+ void *devx_obj;
+ int err;
+
+ dest_devx = uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
+ dest_qp = uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
+
+ *flags = 0;
+ err = uverbs_get_flags32(flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
+ MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS |
+ MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP);
+ if (err)
+ return err;
+
+ /* Both flags are not allowed */
+ if (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS &&
+ *flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
+ return -EINVAL;
+
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) {
+ if (dest_devx && (dest_qp || *flags))
+ return -EINVAL;
+ else if (dest_qp && *flags)
+ return -EINVAL;
+ }
+
+ /* Allow only DEVX object, drop as dest for FDB */
+ if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
+ !(dest_devx || (*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)))
+ return -EINVAL;
+
+ /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
+ ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
+ return -EINVAL;
+
+ *qp = NULL;
+ if (dest_devx) {
+ devx_obj =
+ uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
+
+ /* Verify that the given DEVX object is a flow
+ * steering destination.
+ */
+ if (!is_flow_dest(devx_obj, dest_id, dest_type))
+ return -EINVAL;
+ /* Allow only flow table as dest when inserting to FDB or RDMA_RX */
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
+ *dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+ return -EINVAL;
+ } else if (dest_qp) {
+ struct mlx5_ib_qp *mqp;
+
+ *qp = uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
+ if (IS_ERR(*qp))
+ return PTR_ERR(*qp);
+
+ if ((*qp)->qp_type != IB_QPT_RAW_PACKET)
+ return -EINVAL;
+
+ mqp = to_mqp(*qp);
+ if (mqp->is_rss)
+ *dest_id = mqp->rss_qp.tirn;
+ else
+ *dest_id = mqp->raw_packet_qp.rq.tirn;
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ } else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) &&
+ !(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
+ *dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ }
+
+ if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+ (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX))
+ return -EINVAL;
+
+ return 0;
+}
+
+static bool
+is_flow_counter(void *obj, u32 offset, u32 *counter_id, u32 *fc_bulk_size)
+{
+ struct devx_obj *devx_obj = obj;
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+ if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
+
+ if (offset && offset >= devx_obj->flow_counter_bulk_size)
+ return false;
+
+ *fc_bulk_size = devx_obj->flow_counter_bulk_size;
+ *counter_id = MLX5_GET(dealloc_flow_counter_in,
+ devx_obj->dinbox,
+ flow_counter_id);
+ *counter_id += offset;
+ return true;
+ }
+
+ return false;
+}
+
+#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
+static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_flow_context flow_context = {.flow_tag =
+ MLX5_FS_DEFAULT_FLOW_TAG};
+ int dest_id, dest_type = -1, inlen, len, ret, i;
+ struct mlx5_ib_flow_handler *flow_handler;
+ struct mlx5_ib_flow_matcher *fs_matcher;
+ struct ib_uobject **arr_flow_actions;
+ struct ib_uflow_resources *uflow_res;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_fc *counter = NULL;
+ struct ib_qp *qp = NULL;
+ void *devx_obj, *cmd_in;
+ struct ib_uobject *uobj;
+ struct mlx5_ib_dev *dev;
+ u32 flags;
+
+ if (!rdma_uattrs_has_raw_cap(attrs))
+ return -EPERM;
+
+ fs_matcher = uverbs_attr_get_obj(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
+ uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
+ dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+
+ if (get_dests(attrs, fs_matcher, &dest_id, &dest_type, &qp, &flags))
+ return -EINVAL;
+
+ if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DEFAULT_MISS)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
+
+ if (flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ len = uverbs_attr_get_uobjs_arr(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
+ if (len) {
+ u32 *offset_attr, fc_bulk_size, offset = 0, counter_id = 0;
+ devx_obj = arr_flow_actions[0]->object;
+
+ if (uverbs_attr_is_valid(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET)) {
+
+ int num_offsets = uverbs_attr_ptr_get_array_size(
+ attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
+ sizeof(u32));
+
+ if (num_offsets != 1)
+ return -EINVAL;
+
+ offset_attr = uverbs_attr_get_alloced_ptr(
+ attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET);
+ offset = *offset_attr;
+ }
+
+ if (!is_flow_counter(devx_obj, offset, &counter_id, &fc_bulk_size))
+ return -EINVAL;
+ counter = mlx5_fc_local_create(counter_id, offset, fc_bulk_size);
+ if (IS_ERR(counter))
+ return PTR_ERR(counter);
+
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ }
+
+ cmd_in = uverbs_attr_get_alloced_ptr(
+ attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
+ inlen = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
+
+ uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
+ if (!uflow_res) {
+ ret = -ENOMEM;
+ goto destroy_counter;
+ }
+
+ len = uverbs_attr_get_uobjs_arr(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
+ for (i = 0; i < len; i++) {
+ struct mlx5_ib_flow_action *maction =
+ to_mflow_act(arr_flow_actions[i]->object);
+
+ ret = parse_flow_flow_action(maction, false, &flow_act);
+ if (ret)
+ goto err_out;
+ flow_resources_add(uflow_res, IB_FLOW_SPEC_ACTION_HANDLE,
+ arr_flow_actions[i]->object);
+ }
+
+ ret = uverbs_copy_from(&flow_context.flow_tag, attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_TAG);
+ if (!ret) {
+ if (flow_context.flow_tag >= BIT(24)) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+ flow_context.flags |= FLOW_CONTEXT_HAS_TAG;
+ }
+
+ flow_handler =
+ raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
+ counter, cmd_in, inlen, dest_id, dest_type);
+ if (IS_ERR(flow_handler)) {
+ ret = PTR_ERR(flow_handler);
+ goto err_out;
+ }
+
+ ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev, uflow_res);
+
+ return 0;
+err_out:
+ ib_uverbs_flow_resources_free(uflow_res);
+destroy_counter:
+ if (counter)
+ mlx5_fc_local_destroy(counter);
+ return ret;
+}
+
+static int flow_matcher_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_flow_matcher *obj = uobject->object;
+
+ if (atomic_read(&obj->usecnt))
+ return -EBUSY;
+
+ kfree(obj);
+ return 0;
+}
+
+static int steering_anchor_create_ft(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_flow_table *ft;
+
+ if (ft_prio->anchor.ft)
+ return 0;
+
+ ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
+ if (!ns)
+ return -EOPNOTSUPP;
+
+ ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+ ft_attr.uid = MLX5_SHARED_RESOURCE_UID;
+ ft_attr.prio = 0;
+ ft_attr.max_fte = 2;
+ ft_attr.level = 1;
+
+ ft = mlx5_create_flow_table(ns, &ft_attr);
+ if (IS_ERR(ft))
+ return PTR_ERR(ft);
+
+ ft_prio->anchor.ft = ft;
+
+ return 0;
+}
+
+static void steering_anchor_destroy_ft(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.ft) {
+ mlx5_destroy_flow_table(ft_prio->anchor.ft);
+ ft_prio->anchor.ft = NULL;
+ }
+}
+
+static int
+steering_anchor_create_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ void *flow_group_in;
+ int err = 0;
+
+ if (ft_prio->anchor.fg_drop)
+ return 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+ MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+ fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
+ if (IS_ERR(fg)) {
+ err = PTR_ERR(fg);
+ goto out;
+ }
+
+ ft_prio->anchor.fg_drop = fg;
+
+out:
+ kvfree(flow_group_in);
+
+ return err;
+}
+
+static void
+steering_anchor_destroy_fg_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.fg_drop) {
+ mlx5_destroy_flow_group(ft_prio->anchor.fg_drop);
+ ft_prio->anchor.fg_drop = NULL;
+ }
+}
+
+static int
+steering_anchor_create_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ struct mlx5_flow_group *fg;
+ void *flow_group_in;
+ int err = 0;
+
+ if (ft_prio->anchor.fg_goto_table)
+ return 0;
+
+ flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!flow_group_in)
+ return -ENOMEM;
+
+ fg = mlx5_create_flow_group(ft_prio->anchor.ft, flow_group_in);
+ if (IS_ERR(fg)) {
+ err = PTR_ERR(fg);
+ goto out;
+ }
+ ft_prio->anchor.fg_goto_table = fg;
+
+out:
+ kvfree(flow_group_in);
+
+ return err;
+}
+
+static void
+steering_anchor_destroy_fg_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.fg_goto_table) {
+ mlx5_destroy_flow_group(ft_prio->anchor.fg_goto_table);
+ ft_prio->anchor.fg_goto_table = NULL;
+ }
+}
+
+static int
+steering_anchor_create_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *handle;
+
+ if (ft_prio->anchor.rule_drop)
+ return 0;
+
+ flow_act.fg = ft_prio->anchor.fg_drop;
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+ handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
+ NULL, 0);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ ft_prio->anchor.rule_drop = handle;
+
+ return 0;
+}
+
+static void steering_anchor_destroy_rule_drop(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.rule_drop) {
+ mlx5_del_flow_rules(ft_prio->anchor.rule_drop);
+ ft_prio->anchor.rule_drop = NULL;
+ }
+}
+
+static int
+steering_anchor_create_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ struct mlx5_flow_destination dest = {};
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_handle *handle;
+
+ if (ft_prio->anchor.rule_goto_table)
+ return 0;
+
+ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+ flow_act.fg = ft_prio->anchor.fg_goto_table;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest.ft = ft_prio->flow_table;
+
+ handle = mlx5_add_flow_rules(ft_prio->anchor.ft, NULL, &flow_act,
+ &dest, 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ ft_prio->anchor.rule_goto_table = handle;
+
+ return 0;
+}
+
+static void
+steering_anchor_destroy_rule_goto_table(struct mlx5_ib_flow_prio *ft_prio)
+{
+ if (ft_prio->anchor.rule_goto_table) {
+ mlx5_del_flow_rules(ft_prio->anchor.rule_goto_table);
+ ft_prio->anchor.rule_goto_table = NULL;
+ }
+}
+
+static int steering_anchor_create_res(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_prio *ft_prio,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ int err;
+
+ err = steering_anchor_create_ft(dev, ft_prio, ns_type);
+ if (err)
+ return err;
+
+ err = steering_anchor_create_fg_drop(ft_prio);
+ if (err)
+ goto destroy_ft;
+
+ err = steering_anchor_create_fg_goto_table(ft_prio);
+ if (err)
+ goto destroy_fg_drop;
+
+ err = steering_anchor_create_rule_drop(ft_prio);
+ if (err)
+ goto destroy_fg_goto_table;
+
+ err = steering_anchor_create_rule_goto_table(ft_prio);
+ if (err)
+ goto destroy_rule_drop;
+
+ return 0;
+
+destroy_rule_drop:
+ steering_anchor_destroy_rule_drop(ft_prio);
+destroy_fg_goto_table:
+ steering_anchor_destroy_fg_goto_table(ft_prio);
+destroy_fg_drop:
+ steering_anchor_destroy_fg_drop(ft_prio);
+destroy_ft:
+ steering_anchor_destroy_ft(ft_prio);
+
+ return err;
+}
+
+static void mlx5_steering_anchor_destroy_res(struct mlx5_ib_flow_prio *ft_prio)
+{
+ steering_anchor_destroy_rule_goto_table(ft_prio);
+ steering_anchor_destroy_rule_drop(ft_prio);
+ steering_anchor_destroy_fg_goto_table(ft_prio);
+ steering_anchor_destroy_fg_drop(ft_prio);
+ steering_anchor_destroy_ft(ft_prio);
+}
+
+static int steering_anchor_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_steering_anchor *obj = uobject->object;
+
+ if (atomic_read(&obj->usecnt))
+ return -EBUSY;
+
+ mutex_lock(&obj->dev->flow_db->lock);
+ if (!--obj->ft_prio->anchor.rule_goto_table_ref)
+ steering_anchor_destroy_rule_goto_table(obj->ft_prio);
+
+ put_flow_table(obj->dev, obj->ft_prio, true);
+ mutex_unlock(&obj->dev->flow_db->lock);
+
+ kfree(obj);
+ return 0;
+}
+
+static void fs_cleanup_anchor(struct mlx5_ib_flow_prio *prio,
+ int count)
+{
+ while (count--)
+ mlx5_steering_anchor_destroy_res(&prio[count]);
+}
+
+void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev)
+{
+ fs_cleanup_anchor(dev->flow_db->prios, MLX5_IB_NUM_FLOW_FT);
+ fs_cleanup_anchor(dev->flow_db->egress_prios, MLX5_IB_NUM_FLOW_FT);
+ fs_cleanup_anchor(dev->flow_db->sniffer, MLX5_IB_NUM_SNIFFER_FTS);
+ fs_cleanup_anchor(dev->flow_db->egress, MLX5_IB_NUM_EGRESS_FTS);
+ fs_cleanup_anchor(dev->flow_db->fdb, MLX5_IB_NUM_FDB_FTS);
+ fs_cleanup_anchor(dev->flow_db->rdma_rx, MLX5_IB_NUM_FLOW_FT);
+ fs_cleanup_anchor(dev->flow_db->rdma_tx, MLX5_IB_NUM_FLOW_FT);
+}
+
+static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
+ struct mlx5_ib_flow_matcher *obj)
+{
+ enum mlx5_ib_uapi_flow_table_type ft_type =
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX;
+ u32 flags;
+ int err;
+
+ /* New users should use MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE and older
+ * users should switch to it. We leave this to not break userspace
+ */
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE) &&
+ uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS))
+ return -EINVAL;
+
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE)) {
+ err = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE);
+ if (err)
+ return err;
+
+ err = mlx5_ib_ft_type_to_namespace(ft_type, &obj->ns_type);
+ if (err)
+ return err;
+
+ return 0;
+ }
+
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS)) {
+ err = uverbs_get_flags32(&flags, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ IB_FLOW_ATTR_FLAGS_EGRESS);
+ if (err)
+ return err;
+
+ if (flags)
+ return mlx5_ib_ft_type_to_namespace(
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX,
+ &obj->ns_type);
+ }
+
+ obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS;
+
+ return 0;
+}
+
+static bool verify_context_caps(struct mlx5_ib_dev *dev, u64 enabled_caps)
+{
+ if (is_mdev_switchdev_mode(dev->mdev))
+ return UCAP_ENABLED(enabled_caps,
+ RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+
+ return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
+ struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ struct mlx5_ib_flow_matcher *obj;
+ int err;
+
+ obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ obj->mask_len = uverbs_attr_get_len(
+ attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
+ err = uverbs_copy_from(&obj->matcher_mask,
+ attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
+ if (err)
+ goto end;
+
+ obj->flow_type = uverbs_attr_get_enum_id(
+ attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
+
+ if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
+ err = uverbs_copy_from(&obj->priority,
+ attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
+ if (err)
+ goto end;
+ }
+
+ err = uverbs_copy_from(&obj->match_criteria_enable,
+ attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
+ if (err)
+ goto end;
+
+ err = mlx5_ib_matcher_ns(attrs, obj);
+ if (err)
+ goto end;
+
+ if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS &&
+ mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) {
+ err = -EINVAL;
+ goto end;
+ }
+
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT)) {
+ err = uverbs_copy_from(&obj->ib_port, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT);
+ if (err)
+ goto end;
+ if (!rdma_is_port_valid(&dev->ib_dev, obj->ib_port)) {
+ err = -EINVAL;
+ goto end;
+ }
+ if (obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX &&
+ obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) {
+ err = -EINVAL;
+ goto end;
+ }
+ if (!verify_context_caps(dev, uobj->context->enabled_caps)) {
+ err = -EOPNOTSUPP;
+ goto end;
+ }
+ }
+
+ uobj->object = obj;
+ obj->mdev = dev->mdev;
+ atomic_set(&obj->usecnt, 0);
+ return 0;
+
+end:
+ kfree(obj);
+ return err;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE);
+ struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ enum mlx5_ib_uapi_flow_table_type ib_uapi_ft_type;
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_ib_steering_anchor *obj;
+ struct mlx5_ib_flow_prio *ft_prio;
+ u16 priority;
+ u32 ft_id;
+ int err;
+
+ if (!rdma_dev_has_raw_cap(&dev->ib_dev))
+ return -EPERM;
+
+ err = uverbs_get_const(&ib_uapi_ft_type, attrs,
+ MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE);
+ if (err)
+ return err;
+
+ err = mlx5_ib_ft_type_to_namespace(ib_uapi_ft_type, &ns_type);
+ if (err)
+ return err;
+
+ err = uverbs_copy_from(&priority, attrs,
+ MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY);
+ if (err)
+ return err;
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ mutex_lock(&dev->flow_db->lock);
+
+ ft_prio = _get_flow_table(dev, priority, ns_type, 0, 0);
+ if (IS_ERR(ft_prio)) {
+ err = PTR_ERR(ft_prio);
+ goto free_obj;
+ }
+
+ ft_prio->refcount++;
+
+ if (!ft_prio->anchor.rule_goto_table_ref) {
+ err = steering_anchor_create_res(dev, ft_prio, ns_type);
+ if (err)
+ goto put_flow_table;
+ }
+
+ ft_prio->anchor.rule_goto_table_ref++;
+
+ ft_id = mlx5_flow_table_id(ft_prio->anchor.ft);
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
+ &ft_id, sizeof(ft_id));
+ if (err)
+ goto destroy_res;
+
+ mutex_unlock(&dev->flow_db->lock);
+
+ uobj->object = obj;
+ obj->dev = dev;
+ obj->ft_prio = ft_prio;
+ atomic_set(&obj->usecnt, 0);
+
+ return 0;
+
+destroy_res:
+ --ft_prio->anchor.rule_goto_table_ref;
+ mlx5_steering_anchor_destroy_res(ft_prio);
+put_flow_table:
+ put_flow_table(dev, ft_prio, true);
+free_obj:
+ mutex_unlock(&dev->flow_db->lock);
+ kfree(obj);
+
+ return err;
+}
+
+static struct ib_flow_action *
+mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_uapi_flow_table_type ft_type,
+ u8 num_actions, void *in)
+{
+ enum mlx5_flow_namespace_type namespace;
+ struct mlx5_ib_flow_action *maction;
+ int ret;
+
+ ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+ if (ret)
+ return ERR_PTR(-EINVAL);
+
+ maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+ if (!maction)
+ return ERR_PTR(-ENOMEM);
+
+ maction->flow_action_raw.modify_hdr =
+ mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
+
+ if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
+ ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
+ kfree(maction);
+ return ERR_PTR(ret);
+ }
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER;
+ maction->flow_action_raw.dev = dev;
+
+ return &maction->ib_action;
+}
+
+static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
+{
+ return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+ max_modify_header_actions) ||
+ MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+ max_modify_header_actions) ||
+ MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+ max_modify_header_actions);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE);
+ struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ enum mlx5_ib_uapi_flow_table_type ft_type;
+ struct ib_flow_action *action;
+ int num_actions;
+ void *in;
+ int ret;
+
+ if (!mlx5_ib_modify_header_supported(mdev))
+ return -EOPNOTSUPP;
+
+ in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
+
+ num_actions = uverbs_attr_ptr_get_array_size(
+ attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
+ if (num_actions < 0)
+ return num_actions;
+
+ ret = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
+ if (ret)
+ return ret;
+ action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
+ if (IS_ERR(action))
+ return PTR_ERR(action);
+
+ uverbs_flow_action_fill_action(action, uobj, &mdev->ib_dev,
+ IB_FLOW_ACTION_UNSPECIFIED);
+
+ return 0;
+}
+
+static bool mlx5_ib_flow_action_packet_reformat_valid(struct mlx5_ib_dev *ibdev,
+ u8 packet_reformat_type,
+ u8 ft_type)
+{
+ switch (packet_reformat_type) {
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+ return MLX5_CAP_FLOWTABLE(ibdev->mdev,
+ encap_general_header);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX)
+ return MLX5_CAP_FLOWTABLE_NIC_TX(ibdev->mdev,
+ reformat_l2_to_l3_tunnel);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+ return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev,
+ reformat_l3_tunnel_to_l2);
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2:
+ if (ft_type == MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX)
+ return MLX5_CAP_FLOWTABLE_NIC_RX(ibdev->mdev, decap);
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static int mlx5_ib_dv_to_prm_packet_reforamt_type(u8 dv_prt, u8 *prm_prt)
+{
+ switch (dv_prt) {
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL;
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ *prm_prt = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
+ break;
+ case MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ *prm_prt = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlx5_ib_flow_action_create_packet_reformat_ctx(
+ struct mlx5_ib_dev *dev,
+ struct mlx5_ib_flow_action *maction,
+ u8 ft_type, u8 dv_prt,
+ void *in, size_t len)
+{
+ struct mlx5_pkt_reformat_params reformat_params;
+ enum mlx5_flow_namespace_type namespace;
+ u8 prm_prt;
+ int ret;
+
+ ret = mlx5_ib_ft_type_to_namespace(ft_type, &namespace);
+ if (ret)
+ return ret;
+
+ ret = mlx5_ib_dv_to_prm_packet_reforamt_type(dv_prt, &prm_prt);
+ if (ret)
+ return ret;
+
+ memset(&reformat_params, 0, sizeof(reformat_params));
+ reformat_params.type = prm_prt;
+ reformat_params.size = len;
+ reformat_params.data = in;
+ maction->flow_action_raw.pkt_reformat =
+ mlx5_packet_reformat_alloc(dev->mdev, &reformat_params,
+ namespace);
+ if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
+ ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
+ return ret;
+ }
+
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
+ maction->flow_action_raw.dev = dev;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE);
+ struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
+ enum mlx5_ib_uapi_flow_action_packet_reformat_type dv_prt;
+ enum mlx5_ib_uapi_flow_table_type ft_type;
+ struct mlx5_ib_flow_action *maction;
+ int ret;
+
+ ret = uverbs_get_const(&ft_type, attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_const(&dv_prt, attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE);
+ if (ret)
+ return ret;
+
+ if (!mlx5_ib_flow_action_packet_reformat_valid(mdev, dv_prt, ft_type))
+ return -EOPNOTSUPP;
+
+ maction = kzalloc(sizeof(*maction), GFP_KERNEL);
+ if (!maction)
+ return -ENOMEM;
+
+ if (dv_prt ==
+ MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TUNNEL_TO_L2) {
+ maction->flow_action_raw.sub_type =
+ MLX5_IB_FLOW_ACTION_DECAP;
+ maction->flow_action_raw.dev = mdev;
+ } else {
+ void *in;
+ int len;
+
+ in = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+ if (IS_ERR(in)) {
+ ret = PTR_ERR(in);
+ goto free_maction;
+ }
+
+ len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF);
+
+ ret = mlx5_ib_flow_action_create_packet_reformat_ctx(mdev,
+ maction, ft_type, dv_prt, in, len);
+ if (ret)
+ goto free_maction;
+ }
+
+ uverbs_flow_action_fill_action(&maction->ib_action, uobj, &mdev->ib_dev,
+ IB_FLOW_ACTION_UNSPECIFIED);
+ return 0;
+
+free_maction:
+ kfree(maction);
+ return ret;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_CREATE_FLOW,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
+ UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
+ UVERBS_OBJECT_QP,
+ UVERBS_ACCESS_READ),
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ),
+ UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_READ, 1,
+ MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ, 1, 1,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX_OFFSET,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
+ UA_OPTIONAL,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_FLAGS,
+ enum mlx5_ib_create_flow_flags,
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_DESTROY_FLOW,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(mlx5_ib_fs,
+ UVERBS_OBJECT_FLOW,
+ &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
+ set_add_copy_action_in_auto)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_DATA_BUF,
+ UVERBS_ATTR_MIN_SIZE(1),
+ UA_ALLOC_AND_COPY,
+ UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_TYPE,
+ enum mlx5_ib_uapi_flow_action_packet_reformat_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_PACKET_REFORMAT_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(
+ mlx5_ib_flow_actions,
+ UVERBS_OBJECT_FLOW_ACTION,
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
+ UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
+ UA_MANDATORY),
+ UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
+ mlx5_ib_flow_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
+ UVERBS_ATTR_TYPE(u8),
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
+ enum ib_flow_flags,
+ UA_OPTIONAL),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
+ UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
+ &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_STEERING_ANCHOR_CREATE,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_CREATE_HANDLE,
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_TYPE,
+ enum mlx5_ib_uapi_flow_table_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_PRIORITY,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_STEERING_ANCHOR_FT_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_STEERING_ANCHOR_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UVERBS_TYPE_ALLOC_IDR(steering_anchor_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE),
+ &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY));
+
+const struct uapi_definition mlx5_ib_flow_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_FLOW_MATCHER),
+ UAPI_DEF_CHAIN_OBJ_TREE(
+ UVERBS_OBJECT_FLOW,
+ &mlx5_ib_fs),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
+ &mlx5_ib_flow_actions),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_STEERING_ANCHOR,
+ UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)),
+ {},
+};
+
+static const struct ib_device_ops flow_ops = {
+ .create_flow = mlx5_ib_create_flow,
+ .destroy_flow = mlx5_ib_destroy_flow,
+ .destroy_flow_action = mlx5_ib_destroy_flow_action,
+};
+
+int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
+{
+ int i, j;
+
+ dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
+
+ if (!dev->flow_db)
+ return -ENOMEM;
+
+ for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++) {
+ dev->flow_db->rdma_transport_rx[i] =
+ kcalloc(dev->num_ports,
+ sizeof(struct mlx5_ib_flow_prio), GFP_KERNEL);
+ if (!dev->flow_db->rdma_transport_rx[i])
+ goto free_rdma_transport_rx;
+ }
+
+ for (j = 0; j < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; j++) {
+ dev->flow_db->rdma_transport_tx[j] =
+ kcalloc(dev->num_ports,
+ sizeof(struct mlx5_ib_flow_prio), GFP_KERNEL);
+ if (!dev->flow_db->rdma_transport_tx[j])
+ goto free_rdma_transport_tx;
+ }
+
+ mutex_init(&dev->flow_db->lock);
+
+ ib_set_device_ops(&dev->ib_dev, &flow_ops);
+ return 0;
+
+free_rdma_transport_tx:
+ while (j--)
+ kfree(dev->flow_db->rdma_transport_tx[j]);
+free_rdma_transport_rx:
+ while (i--)
+ kfree(dev->flow_db->rdma_transport_rx[i]);
+ kfree(dev->flow_db);
+ return -ENOMEM;
+}
diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h
new file mode 100644
index 000000000000..7abba0e2837c
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/fs.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_FS_H
+#define _MLX5_IB_FS_H
+
+#include "mlx5_ib.h"
+
+int mlx5_ib_fs_init(struct mlx5_ib_dev *dev);
+void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev);
+
+static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
+{
+ int i;
+
+ /* When a steering anchor is created, a special flow table is also
+ * created for the user to reference. Since the user can reference it,
+ * the kernel cannot trust that when the user destroys the steering
+ * anchor, they no longer reference the flow table.
+ *
+ * To address this issue, when a user destroys a steering anchor, only
+ * the flow steering rule in the table is destroyed, but the table
+ * itself is kept to deal with the above scenario. The remaining
+ * resources are only removed when the RDMA device is destroyed, which
+ * is a safe assumption that all references are gone.
+ */
+ mlx5_ib_fs_cleanup_anchor(dev);
+ for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++)
+ kfree(dev->flow_db->rdma_transport_tx[i]);
+ for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++)
+ kfree(dev->flow_db->rdma_transport_rx[i]);
+ kfree(dev->flow_db);
+}
+#endif /* _MLX5_IB_FS_H */
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 79e6309460dc..d5487834ed25 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -35,44 +35,19 @@
struct mlx5_ib_gsi_wr {
struct ib_cqe cqe;
struct ib_wc wc;
- int send_flags;
bool completed:1;
};
-struct mlx5_ib_gsi_qp {
- struct ib_qp ibqp;
- struct ib_qp *rx_qp;
- u8 port_num;
- struct ib_qp_cap cap;
- enum ib_sig_type sq_sig_type;
- /* Serialize qp state modifications */
- struct mutex mutex;
- struct ib_cq *cq;
- struct mlx5_ib_gsi_wr *outstanding_wrs;
- u32 outstanding_pi, outstanding_ci;
- int num_qps;
- /* Protects access to the tx_qps. Post send operations synchronize
- * with tx_qp creation in setup_qp(). Also protects the
- * outstanding_wrs array and indices.
- */
- spinlock_t lock;
- struct ib_qp **tx_qps;
-};
-
-static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp)
-{
- return container_of(qp, struct mlx5_ib_gsi_qp, ibqp);
-}
-
static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
{
return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
}
/* Call with gsi->lock locked */
-static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
+static void generate_completions(struct mlx5_ib_qp *mqp)
{
- struct ib_cq *gsi_cq = gsi->ibqp.send_cq;
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
+ struct ib_cq *gsi_cq = mqp->ibqp.send_cq;
struct mlx5_ib_gsi_wr *wr;
u32 index;
@@ -83,10 +58,7 @@ static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
if (!wr->completed)
break;
- if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR ||
- wr->send_flags & IB_SEND_SIGNALED)
- WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
-
+ WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
wr->completed = false;
}
@@ -98,6 +70,7 @@ static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
struct mlx5_ib_gsi_wr *wr =
container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
+ struct mlx5_ib_qp *mqp = container_of(gsi, struct mlx5_ib_qp, gsi);
u64 wr_id;
unsigned long flags;
@@ -106,54 +79,43 @@ static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
wr_id = wr->wc.wr_id;
wr->wc = *wc;
wr->wc.wr_id = wr_id;
- wr->wc.qp = &gsi->ibqp;
+ wr->wc.qp = &mqp->ibqp;
- generate_completions(gsi);
+ generate_completions(mqp);
spin_unlock_irqrestore(&gsi->lock, flags);
}
-struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr)
+int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
+ struct ib_qp_init_attr *attr)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_gsi_qp *gsi;
- struct ib_qp_init_attr hw_init_attr = *init_attr;
- const u8 port_num = init_attr->port_num;
- const int num_pkeys = pd->device->attrs.max_pkeys;
- const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0;
+ struct ib_qp_init_attr hw_init_attr = *attr;
+ const u8 port_num = attr->port_num;
+ int num_qps = 0;
int ret;
- mlx5_ib_dbg(dev, "creating GSI QP\n");
-
- if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) {
- mlx5_ib_warn(dev,
- "invalid port number %d during GSI QP creation\n",
- port_num);
- return ERR_PTR(-EINVAL);
+ if (mlx5_ib_deth_sqpn_cap(dev)) {
+ if (MLX5_CAP_GEN(dev->mdev,
+ port_type) == MLX5_CAP_PORT_TYPE_IB)
+ num_qps = pd->device->attrs.max_pkeys;
+ else if (dev->lag_active)
+ num_qps = dev->lag_ports;
}
- gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
- if (!gsi)
- return ERR_PTR(-ENOMEM);
-
+ gsi = &mqp->gsi;
gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
- if (!gsi->tx_qps) {
- ret = -ENOMEM;
- goto err_free;
- }
+ if (!gsi->tx_qps)
+ return -ENOMEM;
- gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr,
- sizeof(*gsi->outstanding_wrs),
- GFP_KERNEL);
+ gsi->outstanding_wrs =
+ kcalloc(attr->cap.max_send_wr, sizeof(*gsi->outstanding_wrs),
+ GFP_KERNEL);
if (!gsi->outstanding_wrs) {
ret = -ENOMEM;
goto err_free_tx;
}
- mutex_init(&gsi->mutex);
-
- mutex_lock(&dev->devr.mutex);
-
if (dev->devr.ports[port_num - 1].gsi) {
mlx5_ib_warn(dev, "GSI QP already exists on port %d\n",
port_num);
@@ -163,16 +125,15 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
gsi->num_qps = num_qps;
spin_lock_init(&gsi->lock);
- gsi->cap = init_attr->cap;
- gsi->sq_sig_type = init_attr->sq_sig_type;
- gsi->ibqp.qp_num = 1;
+ gsi->cap = attr->cap;
gsi->port_num = port_num;
- gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0,
+ gsi->cq = ib_alloc_cq(pd->device, gsi, attr->cap.max_send_wr, 0,
IB_POLL_SOFTIRQ);
if (IS_ERR(gsi->cq)) {
- mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
- PTR_ERR(gsi->cq));
+ mlx5_ib_warn(dev,
+ "unable to create send CQ for GSI QP. error %pe\n",
+ gsi->cq);
ret = PTR_ERR(gsi->cq);
goto err_free_wrs;
}
@@ -184,52 +145,43 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
hw_init_attr.cap.max_send_sge = 0;
hw_init_attr.cap.max_inline_data = 0;
}
+
gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
if (IS_ERR(gsi->rx_qp)) {
- mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
- PTR_ERR(gsi->rx_qp));
+ mlx5_ib_warn(dev,
+ "unable to create hardware GSI QP. error %pe\n",
+ gsi->rx_qp);
ret = PTR_ERR(gsi->rx_qp);
goto err_destroy_cq;
}
- dev->devr.ports[init_attr->port_num - 1].gsi = gsi;
-
- mutex_unlock(&dev->devr.mutex);
-
- return &gsi->ibqp;
+ dev->devr.ports[attr->port_num - 1].gsi = gsi;
+ return 0;
err_destroy_cq:
ib_free_cq(gsi->cq);
err_free_wrs:
- mutex_unlock(&dev->devr.mutex);
kfree(gsi->outstanding_wrs);
err_free_tx:
kfree(gsi->tx_qps);
-err_free:
- kfree(gsi);
- return ERR_PTR(ret);
+ return ret;
}
-int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
+int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->device);
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
const int port_num = gsi->port_num;
int qp_index;
int ret;
- mlx5_ib_dbg(dev, "destroying GSI QP\n");
-
- mutex_lock(&dev->devr.mutex);
ret = ib_destroy_qp(gsi->rx_qp);
if (ret) {
mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
ret);
- mutex_unlock(&dev->devr.mutex);
return ret;
}
dev->devr.ports[port_num - 1].gsi = NULL;
- mutex_unlock(&dev->devr.mutex);
gsi->rx_qp = NULL;
for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) {
@@ -243,8 +195,6 @@ int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
kfree(gsi->outstanding_wrs);
kfree(gsi->tx_qps);
- kfree(gsi);
-
return 0;
}
@@ -261,16 +211,15 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
.max_send_sge = gsi->cap.max_send_sge,
.max_inline_data = gsi->cap.max_inline_data,
},
- .sq_sig_type = gsi->sq_sig_type,
.qp_type = IB_QPT_UD,
- .create_flags = mlx5_ib_create_qp_sqpn_qp1(),
+ .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1,
};
return ib_create_qp(pd, &init_attr);
}
static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
- u16 qp_index)
+ u16 pkey_index)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct ib_qp_attr attr;
@@ -279,7 +228,7 @@ static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT;
attr.qp_state = IB_QPS_INIT;
- attr.pkey_index = qp_index;
+ attr.pkey_index = pkey_index;
attr.qkey = IB_QP1_QKEY;
attr.port_num = gsi->port_num;
ret = ib_modify_qp(qp, &attr, mask);
@@ -313,12 +262,17 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
{
struct ib_device *device = gsi->rx_qp->device;
struct mlx5_ib_dev *dev = to_mdev(device);
+ int pkey_index = qp_index;
+ struct mlx5_ib_qp *mqp;
struct ib_qp *qp;
unsigned long flags;
u16 pkey;
int ret;
- ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey);
+ if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB)
+ pkey_index = 0;
+
+ ret = ib_query_pkey(device, gsi->port_num, pkey_index, &pkey);
if (ret) {
mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n",
gsi->port_num, qp_index);
@@ -342,12 +296,16 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
qp = create_gsi_ud_qp(gsi);
if (IS_ERR(qp)) {
- mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n",
- PTR_ERR(qp));
+ mlx5_ib_warn(dev,
+ "unable to create hardware UD QP for GSI: %pe\n",
+ qp);
return;
}
- ret = modify_to_rts(gsi, qp, qp_index);
+ mqp = to_mqp(qp);
+ if (dev->lag_active)
+ mqp->gsi_lag_port = qp_index + 1;
+ ret = modify_to_rts(gsi, qp, pkey_index);
if (ret)
goto err_destroy_qp;
@@ -362,58 +320,49 @@ err_destroy_qp:
WARN_ON_ONCE(qp);
}
-static void setup_qps(struct mlx5_ib_gsi_qp *gsi)
-{
- u16 qp_index;
-
- for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
- setup_qp(gsi, qp_index);
-}
-
int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
int attr_mask)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
+ u16 qp_index;
int ret;
mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
- mutex_lock(&gsi->mutex);
ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
if (ret) {
mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
- goto unlock;
+ return ret;
}
- if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS)
- setup_qps(gsi);
+ if (to_mqp(gsi->rx_qp)->state != IB_QPS_RTS)
+ return 0;
-unlock:
- mutex_unlock(&gsi->mutex);
-
- return ret;
+ for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
+ setup_qp(gsi, qp_index);
+ return 0;
}
int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr)
{
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
int ret;
- mutex_lock(&gsi->mutex);
ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
qp_init_attr->cap = gsi->cap;
- mutex_unlock(&gsi->mutex);
-
return ret;
}
/* Call with gsi->lock locked */
-static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
+static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_qp *mqp,
struct ib_ud_wr *wr, struct ib_wc *wc)
{
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
struct mlx5_ib_gsi_wr *gsi_wr;
@@ -442,22 +391,21 @@ static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
}
/* Call with gsi->lock locked */
-static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
- struct ib_ud_wr *wr)
+static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_qp *mqp, struct ib_ud_wr *wr)
{
struct ib_wc wc = {
{ .wr_id = wr->wr.wr_id },
.status = IB_WC_SUCCESS,
.opcode = IB_WC_SEND,
- .qp = &gsi->ibqp,
+ .qp = &mqp->ibqp,
};
int ret;
- ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc);
+ ret = mlx5_ib_add_outstanding_wr(mqp, wr, &wc);
if (ret)
return ret;
- generate_completions(gsi);
+ generate_completions(mqp);
return 0;
}
@@ -466,21 +414,26 @@ static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
{
struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
+ struct mlx5_ib_ah *ah = to_mah(wr->ah);
int qp_index = wr->pkey_index;
- if (!mlx5_ib_deth_sqpn_cap(dev))
+ if (!gsi->num_qps)
return gsi->rx_qp;
+ if (dev->lag_active && ah->xmit_port)
+ qp_index = ah->xmit_port - 1;
+
if (qp_index >= gsi->num_qps)
return NULL;
return gsi->tx_qps[qp_index];
}
-int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
+int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
{
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
struct ib_qp *tx_qp;
unsigned long flags;
int ret;
@@ -493,22 +446,21 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
spin_lock_irqsave(&gsi->lock, flags);
tx_qp = get_tx_qp(gsi, &cur_wr);
if (!tx_qp) {
- ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr);
+ ret = mlx5_ib_gsi_silent_drop(mqp, &cur_wr);
if (ret)
goto err;
spin_unlock_irqrestore(&gsi->lock, flags);
continue;
}
- ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL);
+ ret = mlx5_ib_add_outstanding_wr(mqp, &cur_wr, NULL);
if (ret)
goto err;
ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
if (ret) {
/* Undo the effect of adding the outstanding wr */
- gsi->outstanding_pi = (gsi->outstanding_pi - 1) %
- gsi->cap.max_send_wr;
+ gsi->outstanding_pi--;
goto err;
}
spin_unlock_irqrestore(&gsi->lock, flags);
@@ -522,20 +474,19 @@ err:
return ret;
}
-int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
{
- struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_gsi_qp *gsi = &mqp->gsi;
return ib_post_recv(gsi->rx_qp, wr, bad_wr);
}
void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
{
- if (!gsi)
- return;
+ u16 qp_index;
- mutex_lock(&gsi->mutex);
- setup_qps(gsi);
- mutex_unlock(&gsi->mutex);
+ for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
+ setup_qp(gsi, qp_index);
}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
new file mode 100644
index 000000000000..bbecca405171
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#include <linux/mlx5/vport.h>
+#include "ib_rep.h"
+#include "srq.h"
+
+static int
+mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
+ struct mlx5_eswitch_rep *rep,
+ int vport_index)
+{
+ struct mlx5_ib_dev *ibdev;
+ struct net_device *ndev;
+
+ ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
+ if (!ibdev)
+ return -EINVAL;
+
+ ibdev->port[vport_index].rep = rep;
+ rep->rep_data[REP_IB].priv = ibdev;
+ ndev = mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
+
+ return ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1);
+}
+
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
+
+static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
+{
+ struct mlx5_core_dev *peer_dev;
+ int i;
+
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+ if (mlx5_lag_is_mpesw(peer_dev))
+ *num_ports += peer_num_ports;
+ else
+ /* Only 1 ib port is the representor for all uplinks */
+ *num_ports += peer_num_ports - 1;
+ }
+}
+
+static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner,
+ struct mlx5_core_dev *new_owner)
+{
+ int ret;
+
+ if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) ||
+ !MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support))
+ return 0;
+
+ if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) ||
+ !MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch))
+ return 0;
+
+ ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
+ FS_FT_RDMA_TRANSPORT_TX);
+ if (ret)
+ return ret;
+
+ ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
+ FS_FT_RDMA_TRANSPORT_RX);
+ if (ret) {
+ mlx5_fs_set_root_dev(cur_owner, cur_owner,
+ FS_FT_RDMA_TRANSPORT_TX);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void mlx5_ib_release_transport(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_dev *peer_dev;
+ int i, ret;
+
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ ret = mlx5_ib_set_owner_transport(peer_dev, peer_dev);
+ WARN_ON_ONCE(ret);
+ }
+}
+
+static int mlx5_ib_take_transport(struct mlx5_core_dev *dev)
+{
+ struct mlx5_core_dev *peer_dev;
+ int ret;
+ int i;
+
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ ret = mlx5_ib_set_owner_transport(peer_dev, dev);
+ if (ret) {
+ mlx5_ib_release_transport(dev);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int
+mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+{
+ u32 num_ports = mlx5_eswitch_get_total_vports(dev);
+ struct mlx5_core_dev *lag_master = dev;
+ const struct mlx5_ib_profile *profile;
+ struct mlx5_core_dev *peer_dev;
+ struct mlx5_ib_dev *ibdev;
+ int new_uplink = false;
+ int vport_index;
+ int ret;
+ int i;
+
+ vport_index = rep->vport_index;
+
+ if (mlx5_lag_is_shared_fdb(dev)) {
+ if (mlx5_lag_is_master(dev)) {
+ mlx5_ib_num_ports_update(dev, &num_ports);
+ } else {
+ if (rep->vport == MLX5_VPORT_UPLINK) {
+ if (!mlx5_lag_is_mpesw(dev))
+ return 0;
+ new_uplink = true;
+ }
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+ u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev);
+
+ if (mlx5_lag_is_master(peer_dev))
+ lag_master = peer_dev;
+ else if (!mlx5_lag_is_mpesw(dev))
+ /* Only 1 ib port is the representor for all uplinks */
+ peer_n_ports--;
+
+ if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev))
+ vport_index += peer_n_ports;
+ }
+ }
+ }
+
+ if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
+ profile = &raw_eth_profile;
+ else
+ return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
+
+ if (mlx5_lag_is_shared_fdb(dev)) {
+ ret = mlx5_ib_take_transport(lag_master);
+ if (ret)
+ return ret;
+ }
+
+ ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
+ mlx5_core_net(lag_master));
+ if (!ibdev) {
+ ret = -ENOMEM;
+ goto release_transport;
+ }
+
+ ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
+ GFP_KERNEL);
+ if (!ibdev->port) {
+ ret = -ENOMEM;
+ goto fail_port;
+ }
+
+ ibdev->is_rep = true;
+ vport_index = rep->vport_index;
+ ibdev->port[vport_index].rep = rep;
+ ibdev->mdev = lag_master;
+ ibdev->num_ports = num_ports;
+ ibdev->ib_dev.phys_port_cnt = num_ports;
+ ret = ib_device_set_netdev(&ibdev->ib_dev,
+ mlx5_ib_get_rep_netdev(lag_master->priv.eswitch,
+ rep->vport),
+ vport_index + 1);
+ if (ret)
+ goto fail_add;
+
+ ret = __mlx5_ib_add(ibdev, profile);
+ if (ret)
+ goto fail_add;
+
+ rep->rep_data[REP_IB].priv = ibdev;
+ if (mlx5_lag_is_shared_fdb(lag_master))
+ mlx5_ib_register_peer_vport_reps(lag_master);
+
+ return 0;
+
+fail_add:
+ kfree(ibdev->port);
+fail_port:
+ ib_dealloc_device(&ibdev->ib_dev);
+release_transport:
+ if (mlx5_lag_is_shared_fdb(lag_master))
+ mlx5_ib_release_transport(lag_master);
+
+ return ret;
+}
+
+static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
+{
+ return rep->rep_data[REP_IB].priv;
+}
+
+static void
+mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
+{
+ struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
+ struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
+ int vport_index = rep->vport_index;
+ struct mlx5_ib_port *port;
+ int i;
+
+ if (WARN_ON(!mdev))
+ return;
+
+ if (!dev)
+ return;
+
+ if (mlx5_lag_is_shared_fdb(mdev) &&
+ !mlx5_lag_is_master(mdev)) {
+ if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev))
+ return;
+ for (i = 0; i < dev->num_ports; i++) {
+ if (dev->port[i].rep == rep)
+ break;
+ }
+ if (WARN_ON(i == dev->num_ports))
+ return;
+ vport_index = i;
+ }
+
+ port = &dev->port[vport_index];
+
+ ib_device_set_netdev(&dev->ib_dev, NULL, vport_index + 1);
+ rep->rep_data[REP_IB].priv = NULL;
+ port->rep = NULL;
+
+ if (rep->vport == MLX5_VPORT_UPLINK) {
+
+ if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev))
+ return;
+
+ if (mlx5_lag_is_shared_fdb(mdev)) {
+ struct mlx5_core_dev *peer_mdev;
+ struct mlx5_eswitch *esw;
+
+ mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+ esw = peer_mdev->priv.eswitch;
+ mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+ }
+ mlx5_ib_release_transport(mdev);
+ }
+ __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+ }
+}
+
+static const struct mlx5_eswitch_rep_ops rep_ops = {
+ .load = mlx5_ib_vport_rep_load,
+ .unload = mlx5_ib_vport_rep_unload,
+ .get_proto_dev = mlx5_ib_rep_to_dev,
+};
+
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_core_dev *peer_mdev;
+ struct mlx5_eswitch *esw;
+ int i;
+
+ mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
+ esw = peer_mdev->priv.eswitch;
+ mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+ }
+}
+
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ return mlx5_eswitch_get_proto_dev(esw, vport_num, REP_ETH);
+}
+
+struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq,
+ u32 port)
+{
+ struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
+ struct mlx5_eswitch_rep *rep;
+
+ if (!dev->is_rep || !port)
+ return NULL;
+
+ if (!dev->port[port - 1].rep)
+ return ERR_PTR(-EINVAL);
+
+ rep = dev->port[port - 1].rep;
+
+ return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn);
+}
+
+static int mlx5r_rep_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = idev->mdev;
+ struct mlx5_eswitch *esw;
+
+ esw = mdev->priv.eswitch;
+ mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+ return 0;
+}
+
+static void mlx5r_rep_remove(struct auxiliary_device *adev)
+{
+ struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = idev->mdev;
+ struct mlx5_eswitch *esw;
+
+ esw = mdev->priv.eswitch;
+ mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+}
+
+static const struct auxiliary_device_id mlx5r_rep_id_table[] = {
+ { .name = MLX5_ADEV_NAME ".rdma-rep", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table);
+
+static struct auxiliary_driver mlx5r_rep_driver = {
+ .name = "rep",
+ .probe = mlx5r_rep_probe,
+ .remove = mlx5r_rep_remove,
+ .id_table = mlx5r_rep_id_table,
+};
+
+int mlx5r_rep_init(void)
+{
+ return auxiliary_driver_register(&mlx5r_rep_driver);
+}
+
+void mlx5r_rep_cleanup(void)
+{
+ auxiliary_driver_unregister(&mlx5r_rep_driver);
+}
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
new file mode 100644
index 000000000000..9c55e5c528b4
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLX5_IB_REP_H__
+#define __MLX5_IB_REP_H__
+
+#include <linux/mlx5/eswitch.h>
+#include "mlx5_ib.h"
+
+extern const struct mlx5_ib_profile raw_eth_profile;
+
+#ifdef CONFIG_MLX5_ESWITCH
+int mlx5r_rep_init(void);
+void mlx5r_rep_cleanup(void);
+struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq,
+ u32 port);
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ u16 vport_num);
+#else /* CONFIG_MLX5_ESWITCH */
+static inline int mlx5r_rep_init(void) { return 0; }
+static inline void mlx5r_rep_cleanup(void) {}
+static inline
+struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq,
+ u32 port)
+{
+ return NULL;
+}
+
+static inline
+struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
+ u16 vport_num)
+{
+ return NULL;
+}
+#endif
+#endif /* __MLX5_IB_REP_H__ */
diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c
index c1b9de800fe5..afeb5e53254f 100644
--- a/drivers/infiniband/hw/mlx5/ib_virt.c
+++ b/drivers/infiniband/hw/mlx5/ib_virt.c
@@ -30,7 +30,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
@@ -48,7 +47,7 @@ static inline u32 mlx_to_net_policy(enum port_state_policy mlx_policy)
}
}
-int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u32 port,
struct ifla_vf_info *info)
{
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -91,11 +90,12 @@ static inline enum port_state_policy net_to_mlx_policy(int policy)
}
int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
- u8 port, int state)
+ u32 port, int state)
{
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *in;
+ struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -109,6 +109,8 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
}
in->field_select = MLX5_HCA_VPORT_SEL_STATE_POLICY;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+ if (!err)
+ vfs_ctx[vf].policy = in->policy;
out:
kfree(in);
@@ -116,7 +118,7 @@ out:
}
int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
- u8 port, struct ifla_vf_stats *stats)
+ u32 port, struct ifla_vf_stats *stats)
{
int out_sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
struct mlx5_core_dev *mdev;
@@ -131,7 +133,7 @@ int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
if (!out)
return -ENOMEM;
- err = mlx5_core_query_vport_counter(mdev, true, vf, port, out, out_sz);
+ err = mlx5_core_query_vport_counter(mdev, true, vf, port, out);
if (err)
goto ex;
@@ -146,11 +148,13 @@ ex:
return err;
}
-static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+static int set_vf_node_guid(struct ib_device *device, int vf, u32 port,
+ u64 guid)
{
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *in;
+ struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -160,15 +164,21 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID;
in->node_guid = guid;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+ if (!err) {
+ vfs_ctx[vf].node_guid = guid;
+ vfs_ctx[vf].node_guid_valid = 1;
+ }
kfree(in);
return err;
}
-static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+static int set_vf_port_guid(struct ib_device *device, int vf, u32 port,
+ u64 guid)
{
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *in;
+ struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -178,11 +188,15 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID;
in->port_guid = guid;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+ if (!err) {
+ vfs_ctx[vf].port_guid = guid;
+ vfs_ctx[vf].port_guid_valid = 1;
+ }
kfree(in);
return err;
}
-int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u32 port,
u64 guid, int type)
{
if (type == IFLA_VF_IB_NODE_GUID)
@@ -192,3 +206,19 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
return -EINVAL;
}
+
+int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u32 port,
+ struct ifla_vf_guid *node_guid,
+ struct ifla_vf_guid *port_guid)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
+
+ node_guid->guid =
+ vfs_ctx[vf].node_guid_valid ? vfs_ctx[vf].node_guid : 0;
+ port_guid->guid =
+ vfs_ctx[vf].port_guid_valid ? vfs_ctx[vf].port_guid : 0;
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/macsec.c b/drivers/infiniband/hw/mlx5/macsec.c
new file mode 100644
index 000000000000..3c56eb5eddf3
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/macsec.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#include "macsec.h"
+#include <linux/mlx5/macsec.h>
+
+struct mlx5_reserved_gids {
+ int macsec_index;
+ const struct ib_gid_attr *physical_gid;
+};
+
+struct mlx5_roce_gids {
+ struct list_head roce_gid_list_entry;
+ u16 gid_idx;
+ union {
+ struct sockaddr_in sockaddr_in;
+ struct sockaddr_in6 sockaddr_in6;
+ } addr;
+};
+
+struct mlx5_macsec_device {
+ struct list_head macsec_devices_list_entry;
+ void *macdev;
+ struct list_head macsec_roce_gids;
+ struct list_head tx_rules_list;
+ struct list_head rx_rules_list;
+};
+
+static void cleanup_macsec_device(struct mlx5_macsec_device *macsec_device)
+{
+ if (!list_empty(&macsec_device->tx_rules_list) ||
+ !list_empty(&macsec_device->rx_rules_list) ||
+ !list_empty(&macsec_device->macsec_roce_gids))
+ return;
+
+ list_del(&macsec_device->macsec_devices_list_entry);
+ kfree(macsec_device);
+}
+
+static struct mlx5_macsec_device *get_macsec_device(void *macdev,
+ struct list_head *macsec_devices_list)
+{
+ struct mlx5_macsec_device *iter, *macsec_device = NULL;
+
+ list_for_each_entry(iter, macsec_devices_list, macsec_devices_list_entry) {
+ if (iter->macdev == macdev) {
+ macsec_device = iter;
+ break;
+ }
+ }
+
+ if (macsec_device)
+ return macsec_device;
+
+ macsec_device = kzalloc(sizeof(*macsec_device), GFP_KERNEL);
+ if (!macsec_device)
+ return NULL;
+
+ macsec_device->macdev = macdev;
+ INIT_LIST_HEAD(&macsec_device->tx_rules_list);
+ INIT_LIST_HEAD(&macsec_device->rx_rules_list);
+ INIT_LIST_HEAD(&macsec_device->macsec_roce_gids);
+ list_add(&macsec_device->macsec_devices_list_entry, macsec_devices_list);
+
+ return macsec_device;
+}
+
+static void mlx5_macsec_del_roce_gid(struct mlx5_macsec_device *macsec_device, u16 gid_idx)
+{
+ struct mlx5_roce_gids *current_gid, *next_gid;
+
+ list_for_each_entry_safe(current_gid, next_gid, &macsec_device->macsec_roce_gids,
+ roce_gid_list_entry)
+ if (current_gid->gid_idx == gid_idx) {
+ list_del(&current_gid->roce_gid_list_entry);
+ kfree(current_gid);
+ }
+}
+
+static void mlx5_macsec_save_roce_gid(struct mlx5_macsec_device *macsec_device,
+ const struct sockaddr *addr, u16 gid_idx)
+{
+ struct mlx5_roce_gids *roce_gids;
+
+ roce_gids = kzalloc(sizeof(*roce_gids), GFP_KERNEL);
+ if (!roce_gids)
+ return;
+
+ roce_gids->gid_idx = gid_idx;
+ if (addr->sa_family == AF_INET)
+ memcpy(&roce_gids->addr.sockaddr_in, addr, sizeof(roce_gids->addr.sockaddr_in));
+ else
+ memcpy(&roce_gids->addr.sockaddr_in6, addr, sizeof(roce_gids->addr.sockaddr_in6));
+
+ list_add_tail(&roce_gids->roce_gid_list_entry, &macsec_device->macsec_roce_gids);
+}
+
+static void handle_macsec_gids(struct list_head *macsec_devices_list,
+ struct mlx5_macsec_event_data *data)
+{
+ struct mlx5_macsec_device *macsec_device;
+ struct mlx5_roce_gids *gid;
+
+ macsec_device = get_macsec_device(data->macdev, macsec_devices_list);
+ if (!macsec_device)
+ return;
+
+ list_for_each_entry(gid, &macsec_device->macsec_roce_gids, roce_gid_list_entry) {
+ mlx5_macsec_add_roce_sa_rules(data->fs_id, (struct sockaddr *)&gid->addr,
+ gid->gid_idx, &macsec_device->tx_rules_list,
+ &macsec_device->rx_rules_list, data->macsec_fs,
+ data->is_tx);
+ }
+}
+
+static void del_sa_roce_rule(struct list_head *macsec_devices_list,
+ struct mlx5_macsec_event_data *data)
+{
+ struct mlx5_macsec_device *macsec_device;
+
+ macsec_device = get_macsec_device(data->macdev, macsec_devices_list);
+ WARN_ON(!macsec_device);
+
+ mlx5_macsec_del_roce_sa_rules(data->fs_id, data->macsec_fs,
+ &macsec_device->tx_rules_list,
+ &macsec_device->rx_rules_list, data->is_tx);
+}
+
+static int macsec_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5_macsec *macsec = container_of(nb, struct mlx5_macsec, blocking_events_nb);
+
+ mutex_lock(&macsec->lock);
+ switch (event) {
+ case MLX5_DRIVER_EVENT_MACSEC_SA_ADDED:
+ handle_macsec_gids(&macsec->macsec_devices_list, data);
+ break;
+ case MLX5_DRIVER_EVENT_MACSEC_SA_DELETED:
+ del_sa_roce_rule(&macsec->macsec_devices_list, data);
+ break;
+ default:
+ mutex_unlock(&macsec->lock);
+ return NOTIFY_DONE;
+ }
+ mutex_unlock(&macsec->lock);
+ return NOTIFY_OK;
+}
+
+void mlx5r_macsec_event_register(struct mlx5_ib_dev *dev)
+{
+ if (!mlx5_is_macsec_roce_supported(dev->mdev)) {
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+ return;
+ }
+
+ dev->macsec.blocking_events_nb.notifier_call = macsec_event;
+ blocking_notifier_chain_register(&dev->mdev->macsec_nh,
+ &dev->macsec.blocking_events_nb);
+}
+
+void mlx5r_macsec_event_unregister(struct mlx5_ib_dev *dev)
+{
+ if (!mlx5_is_macsec_roce_supported(dev->mdev)) {
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+ return;
+ }
+
+ blocking_notifier_chain_unregister(&dev->mdev->macsec_nh,
+ &dev->macsec.blocking_events_nb);
+}
+
+int mlx5r_macsec_init_gids_and_devlist(struct mlx5_ib_dev *dev)
+{
+ int i, j, max_gids;
+
+ if (!mlx5_is_macsec_roce_supported(dev->mdev)) {
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+ return 0;
+ }
+
+ max_gids = MLX5_CAP_ROCE(dev->mdev, roce_address_table_size);
+ for (i = 0; i < dev->num_ports; i++) {
+ dev->port[i].reserved_gids = kcalloc(max_gids,
+ sizeof(*dev->port[i].reserved_gids),
+ GFP_KERNEL);
+ if (!dev->port[i].reserved_gids)
+ goto err;
+
+ for (j = 0; j < max_gids; j++)
+ dev->port[i].reserved_gids[j].macsec_index = -1;
+ }
+
+ INIT_LIST_HEAD(&dev->macsec.macsec_devices_list);
+ mutex_init(&dev->macsec.lock);
+
+ return 0;
+err:
+ while (i >= 0) {
+ kfree(dev->port[i].reserved_gids);
+ i--;
+ }
+ return -ENOMEM;
+}
+
+void mlx5r_macsec_dealloc_gids(struct mlx5_ib_dev *dev)
+{
+ int i;
+
+ if (!mlx5_is_macsec_roce_supported(dev->mdev))
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+
+ for (i = 0; i < dev->num_ports; i++)
+ kfree(dev->port[i].reserved_gids);
+
+ mutex_destroy(&dev->macsec.lock);
+}
+
+int mlx5r_add_gid_macsec_operations(const struct ib_gid_attr *attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(attr->device);
+ struct mlx5_macsec_device *macsec_device;
+ const struct ib_gid_attr *physical_gid;
+ struct mlx5_reserved_gids *mgids;
+ struct net_device *ndev;
+ int ret = 0;
+ union {
+ struct sockaddr_in sockaddr_in;
+ struct sockaddr_in6 sockaddr_in6;
+ } addr;
+
+ if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ return 0;
+
+ if (!mlx5_is_macsec_roce_supported(dev->mdev)) {
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+ return 0;
+ }
+
+ rcu_read_lock();
+ ndev = rcu_dereference(attr->ndev);
+ if (!ndev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+
+ if (!netif_is_macsec(ndev) || !macsec_netdev_is_offloaded(ndev)) {
+ rcu_read_unlock();
+ return 0;
+ }
+ dev_hold(ndev);
+ rcu_read_unlock();
+
+ mutex_lock(&dev->macsec.lock);
+ macsec_device = get_macsec_device(ndev, &dev->macsec.macsec_devices_list);
+ if (!macsec_device) {
+ ret = -ENOMEM;
+ goto dev_err;
+ }
+
+ physical_gid = rdma_find_gid(attr->device, &attr->gid,
+ attr->gid_type, NULL);
+ if (!IS_ERR(physical_gid)) {
+ ret = set_roce_addr(to_mdev(physical_gid->device),
+ physical_gid->port_num,
+ physical_gid->index, NULL,
+ physical_gid);
+ if (ret)
+ goto gid_err;
+
+ mgids = &dev->port[attr->port_num - 1].reserved_gids[physical_gid->index];
+ mgids->macsec_index = attr->index;
+ mgids->physical_gid = physical_gid;
+ }
+
+ /* Proceed with adding steering rules, regardless if there was gid ambiguity or not.*/
+ rdma_gid2ip((struct sockaddr *)&addr, &attr->gid);
+ ret = mlx5_macsec_add_roce_rule(ndev, (struct sockaddr *)&addr, attr->index,
+ &macsec_device->tx_rules_list,
+ &macsec_device->rx_rules_list, dev->mdev->macsec_fs);
+ if (ret && !IS_ERR(physical_gid))
+ goto rule_err;
+
+ mlx5_macsec_save_roce_gid(macsec_device, (struct sockaddr *)&addr, attr->index);
+
+ dev_put(ndev);
+ mutex_unlock(&dev->macsec.lock);
+ return ret;
+
+rule_err:
+ set_roce_addr(to_mdev(physical_gid->device), physical_gid->port_num,
+ physical_gid->index, &physical_gid->gid, physical_gid);
+ mgids->macsec_index = -1;
+gid_err:
+ rdma_put_gid_attr(physical_gid);
+ cleanup_macsec_device(macsec_device);
+dev_err:
+ dev_put(ndev);
+ mutex_unlock(&dev->macsec.lock);
+ return ret;
+}
+
+void mlx5r_del_gid_macsec_operations(const struct ib_gid_attr *attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(attr->device);
+ struct mlx5_macsec_device *macsec_device;
+ struct mlx5_reserved_gids *mgids;
+ struct net_device *ndev;
+ int i, max_gids;
+
+ if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ return;
+
+ if (!mlx5_is_macsec_roce_supported(dev->mdev)) {
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+ return;
+ }
+
+ mgids = &dev->port[attr->port_num - 1].reserved_gids[attr->index];
+ if (mgids->macsec_index != -1) { /* Checking if physical gid has ambiguous IP */
+ rdma_put_gid_attr(mgids->physical_gid);
+ mgids->macsec_index = -1;
+ return;
+ }
+
+ rcu_read_lock();
+ ndev = rcu_dereference(attr->ndev);
+ if (!ndev) {
+ rcu_read_unlock();
+ return;
+ }
+
+ if (!netif_is_macsec(ndev) || !macsec_netdev_is_offloaded(ndev)) {
+ rcu_read_unlock();
+ return;
+ }
+ dev_hold(ndev);
+ rcu_read_unlock();
+
+ mutex_lock(&dev->macsec.lock);
+ max_gids = MLX5_CAP_ROCE(dev->mdev, roce_address_table_size);
+ for (i = 0; i < max_gids; i++) { /* Checking if macsec gid has ambiguous IP */
+ mgids = &dev->port[attr->port_num - 1].reserved_gids[i];
+ if (mgids->macsec_index == attr->index) {
+ const struct ib_gid_attr *physical_gid = mgids->physical_gid;
+
+ set_roce_addr(to_mdev(physical_gid->device),
+ physical_gid->port_num,
+ physical_gid->index,
+ &physical_gid->gid, physical_gid);
+
+ rdma_put_gid_attr(physical_gid);
+ mgids->macsec_index = -1;
+ break;
+ }
+ }
+ macsec_device = get_macsec_device(ndev, &dev->macsec.macsec_devices_list);
+ mlx5_macsec_del_roce_rule(attr->index, dev->mdev->macsec_fs,
+ &macsec_device->tx_rules_list, &macsec_device->rx_rules_list);
+ mlx5_macsec_del_roce_gid(macsec_device, attr->index);
+ cleanup_macsec_device(macsec_device);
+
+ dev_put(ndev);
+ mutex_unlock(&dev->macsec.lock);
+}
diff --git a/drivers/infiniband/hw/mlx5/macsec.h b/drivers/infiniband/hw/mlx5/macsec.h
new file mode 100644
index 000000000000..9b77ba90f0f4
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/macsec.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_MACSEC_H__
+#define __MLX5_MACSEC_H__
+
+#include <net/macsec.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_addr.h>
+#include "mlx5_ib.h"
+
+#ifdef CONFIG_MLX5_MACSEC
+struct mlx5_reserved_gids;
+
+int mlx5r_add_gid_macsec_operations(const struct ib_gid_attr *attr);
+void mlx5r_del_gid_macsec_operations(const struct ib_gid_attr *attr);
+int mlx5r_macsec_init_gids_and_devlist(struct mlx5_ib_dev *dev);
+void mlx5r_macsec_dealloc_gids(struct mlx5_ib_dev *dev);
+void mlx5r_macsec_event_register(struct mlx5_ib_dev *dev);
+void mlx5r_macsec_event_unregister(struct mlx5_ib_dev *dev);
+#else
+static inline int mlx5r_add_gid_macsec_operations(const struct ib_gid_attr *attr) { return 0; }
+static inline void mlx5r_del_gid_macsec_operations(const struct ib_gid_attr *attr) {}
+static inline int mlx5r_macsec_init_gids_and_devlist(struct mlx5_ib_dev *dev) { return 0; }
+static inline void mlx5r_macsec_dealloc_gids(struct mlx5_ib_dev *dev) {}
+static inline void mlx5r_macsec_event_register(struct mlx5_ib_dev *dev) {}
+static inline void mlx5r_macsec_event_unregister(struct mlx5_ib_dev *dev) {}
+#endif
+#endif /* __MLX5_MACSEC_H__ */
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 39e58489dcc2..2453ae4384a7 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -30,24 +30,37 @@
* SOFTWARE.
*/
-#include <linux/mlx5/cmd.h>
#include <linux/mlx5/vport.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_pma.h>
#include "mlx5_ib.h"
+#include "cmd.h"
enum {
MLX5_IB_VENDOR_CLASS1 = 0x9,
MLX5_IB_VENDOR_CLASS2 = 0xa
};
-int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
- u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const void *in_mad, void *response_mad)
+static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u32 port_num,
+ struct ib_mad *in_mad)
+{
+ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+ in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ return true;
+ return dev->port_caps[port_num - 1].has_smi;
+}
+
+static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey,
+ int ignore_bkey, u32 port, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh, const void *in_mad,
+ void *response_mad)
{
u8 op_modifier = 0;
+ if (!can_do_mad_ifc(dev, port, (struct ib_mad *)in_mad))
+ return -EPERM;
+
/* Key check traps can't be generated unless we have in_wc to
* tell us where to send the trap.
*/
@@ -56,59 +69,8 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
if (ignore_bkey || !in_wc)
op_modifier |= 0x2;
- return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port);
-}
-
-static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
- const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad *in_mad, struct ib_mad *out_mad)
-{
- u16 slid;
- int err;
-
- slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
-
- if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0)
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-
- if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
- in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
- if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
- in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
- in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
- return IB_MAD_RESULT_SUCCESS;
-
- /* Don't process SMInfo queries -- the SMA can't handle them.
- */
- if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
- return IB_MAD_RESULT_SUCCESS;
- } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
- in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1 ||
- in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2 ||
- in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
- if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
- in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
- return IB_MAD_RESULT_SUCCESS;
- } else {
- return IB_MAD_RESULT_SUCCESS;
- }
-
- err = mlx5_MAD_IFC(to_mdev(ibdev),
- mad_flags & IB_MAD_IGNORE_MKEY,
- mad_flags & IB_MAD_IGNORE_BKEY,
- port_num, in_wc, in_grh, in_mad, out_mad);
- if (err)
- return IB_MAD_RESULT_FAILURE;
-
- /* set return bit in status of directed route responses */
- if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
- out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
-
- if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
- /* no response for trap repress */
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
-
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ return mlx5_cmd_mad_ifc(dev, in_mad, response_mad, op_modifier,
+ port);
}
static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext,
@@ -175,6 +137,8 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
port_xmit_discards);
MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_constraint_errors,
port_xmit_constraint_errors);
+ MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_wait,
+ port_xmit_wait);
MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_constraint_errors,
port_rcv_constraint_errors);
MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_overrun_errors,
@@ -183,88 +147,223 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
vl_15_dropped);
}
-static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
- const struct ib_mad *in_mad, struct ib_mad *out_mad)
+static void pma_cnt_ext_assign_ppcnt(struct ib_pma_portcounters_ext *cnt_ext,
+ void *out)
{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ void *out_pma = MLX5_ADDR_OF(ppcnt_reg, out,
+ counter_set);
+
+#define MLX5_GET_EXT_CNTR(counter_name) \
+ MLX5_GET64(ib_ext_port_cntrs_grp_data_layout, \
+ out_pma, counter_name##_high)
+
+ cnt_ext->port_xmit_data =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_xmit_data) >> 2);
+ cnt_ext->port_rcv_data =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_rcv_data) >> 2);
+
+ cnt_ext->port_xmit_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_xmit_pkts));
+ cnt_ext->port_rcv_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_rcv_pkts));
+
+ cnt_ext->port_unicast_xmit_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_unicast_xmit_pkts));
+ cnt_ext->port_unicast_rcv_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_unicast_rcv_pkts));
+
+ cnt_ext->port_multicast_xmit_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_multicast_xmit_pkts));
+ cnt_ext->port_multicast_rcv_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_multicast_rcv_pkts));
+}
+
+static int query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, u8 plane_num,
+ void *out, size_t sz, bool ext)
+{
+ u32 *in;
int err;
+
+ in = kvzalloc(sz, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ MLX5_SET(ppcnt_reg, in, local_port, port_num);
+ MLX5_SET(ppcnt_reg, in, plane_ind, plane_num);
+
+ if (ext)
+ MLX5_SET(ppcnt_reg, in, grp,
+ MLX5_INFINIBAND_EXTENDED_PORT_COUNTERS_GROUP);
+ else
+ MLX5_SET(ppcnt_reg, in, grp,
+ MLX5_INFINIBAND_PORT_COUNTERS_GROUP);
+ err = mlx5_core_access_reg(dev, in, sz, out,
+ sz, MLX5_REG_PPCNT, 0, 0);
+
+ kvfree(in);
+ return err;
+}
+
+static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+ struct mlx5_core_dev *mdev;
+ bool native_port = true;
+ u32 mdev_port_num;
void *out_cnt;
+ int err;
- /* Decalring support of extended counters */
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
+ if (!mdev) {
+ /* Fail to get the native port, likely due to 2nd port is still
+ * unaffiliated. In such case default to 1st port and attached
+ * PF device.
+ */
+ native_port = false;
+ mdev = dev->mdev;
+ mdev_port_num = 1;
+ }
+ if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1 &&
+ !mlx5_core_mp_enabled(mdev) &&
+ dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI) {
+ /* set local port to one for Function-Per-Port HCA. */
+ mdev = dev->mdev;
+ mdev_port_num = 1;
+ }
+
+ /* Declaring support of extended counters */
if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) {
struct ib_class_port_info cpi = {};
cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
memcpy((out_mad->data + 40), &cpi, sizeof(cpi));
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ goto done;
}
if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) {
struct ib_pma_portcounters_ext *pma_cnt_ext =
(struct ib_pma_portcounters_ext *)(out_mad->data + 40);
- int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+ int sz = max(MLX5_ST_SZ_BYTES(query_vport_counter_out),
+ MLX5_ST_SZ_BYTES(ppcnt_reg));
- out_cnt = mlx5_vzalloc(sz);
- if (!out_cnt)
- return IB_MAD_RESULT_FAILURE;
+ out_cnt = kvzalloc(sz, GFP_KERNEL);
+ if (!out_cnt) {
+ err = IB_MAD_RESULT_FAILURE;
+ goto done;
+ }
- err = mlx5_core_query_vport_counter(dev->mdev, 0, 0,
- port_num, out_cnt, sz);
- if (!err)
- pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
+ if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
+ err = query_ib_ppcnt(mdev, mdev_port_num,
+ port_num, out_cnt, sz, 1);
+ if (!err)
+ pma_cnt_ext_assign_ppcnt(pma_cnt_ext, out_cnt);
+ } else {
+ err = mlx5_core_query_vport_counter(mdev, 0, 0,
+ mdev_port_num,
+ out_cnt);
+ if (!err)
+ pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
+ }
} else {
struct ib_pma_portcounters *pma_cnt =
(struct ib_pma_portcounters *)(out_mad->data + 40);
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
- out_cnt = mlx5_vzalloc(sz);
- if (!out_cnt)
- return IB_MAD_RESULT_FAILURE;
+ out_cnt = kvzalloc(sz, GFP_KERNEL);
+ if (!out_cnt) {
+ err = IB_MAD_RESULT_FAILURE;
+ goto done;
+ }
+
+ if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI)
+ err = query_ib_ppcnt(mdev, mdev_port_num, port_num,
+ out_cnt, sz, 0);
+ else
+ err = query_ib_ppcnt(mdev, mdev_port_num, 0,
+ out_cnt, sz, 0);
- err = mlx5_core_query_ib_ppcnt(dev->mdev, port_num,
- out_cnt, sz);
if (!err)
pma_cnt_assign(pma_cnt, out_cnt);
- }
-
+ }
kvfree(out_cnt);
- if (err)
- return IB_MAD_RESULT_FAILURE;
-
- return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+ err = err ? IB_MAD_RESULT_FAILURE :
+ IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+done:
+ if (native_port)
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+ return err;
}
-int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index)
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_core_dev *mdev = dev->mdev;
- const struct ib_mad *in_mad = (const struct ib_mad *)in;
- struct ib_mad *out_mad = (struct ib_mad *)out;
+ u8 mgmt_class = in->mad_hdr.mgmt_class;
+ u8 method = in->mad_hdr.method;
+ u16 slid;
+ int err;
- if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
- *out_mad_size != sizeof(*out_mad)))
- return IB_MAD_RESULT_FAILURE;
+ slid = in_wc ? ib_lid_cpu16(in_wc->slid) :
+ be16_to_cpu(IB_LID_PERMISSIVE);
- memset(out_mad->data, 0, sizeof(out_mad->data));
+ if (method == IB_MGMT_METHOD_TRAP && !slid)
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
- if (MLX5_CAP_GEN(mdev, vport_counters) &&
- in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
- in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) {
- return process_pma_cmd(ibdev, port_num, in_mad, out_mad);
- } else {
- return process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
- in_mad, out_mad);
+ switch (mgmt_class) {
+ case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+ case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: {
+ if (method != IB_MGMT_METHOD_GET &&
+ method != IB_MGMT_METHOD_SET &&
+ method != IB_MGMT_METHOD_TRAP_REPRESS)
+ return IB_MAD_RESULT_SUCCESS;
+
+ /* Don't process SMInfo queries -- the SMA can't handle them.
+ */
+ if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
+ return IB_MAD_RESULT_SUCCESS;
+ } break;
+ case IB_MGMT_CLASS_PERF_MGMT:
+ if (MLX5_CAP_GEN(dev->mdev, vport_counters) &&
+ method == IB_MGMT_METHOD_GET)
+ return process_pma_cmd(dev, port_num, in, out);
+ fallthrough;
+ case MLX5_IB_VENDOR_CLASS1:
+ case MLX5_IB_VENDOR_CLASS2:
+ case IB_MGMT_CLASS_CONG_MGMT: {
+ if (method != IB_MGMT_METHOD_GET &&
+ method != IB_MGMT_METHOD_SET)
+ return IB_MAD_RESULT_SUCCESS;
+ } break;
+ default:
+ return IB_MAD_RESULT_SUCCESS;
}
+
+ err = mlx5_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY,
+ mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc,
+ in_grh, in, out);
+ if (err)
+ return IB_MAD_RESULT_FAILURE;
+
+ /* set return bit in status of directed route responses */
+ if (mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ out->mad_hdr.status |= cpu_to_be16(1 << 15);
+
+ if (method == IB_MGMT_METHOD_TRAP_REPRESS)
+ /* no response for trap repress */
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
+
+ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
}
-int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
+int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
u16 packet_error;
@@ -273,7 +372,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -281,7 +380,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
packet_error = be16_to_cpu(out_mad->status);
- dev->mdev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ?
+ dev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ?
MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0;
out:
@@ -290,17 +389,17 @@ out:
return err;
}
-int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
- struct ib_smp *out_mad)
+static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
+ struct ib_smp *out_mad)
{
- struct ib_smp *in_mad = NULL;
- int err = -ENOMEM;
+ struct ib_smp *in_mad;
+ int err;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
if (!in_mad)
return -ENOMEM;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad,
@@ -313,8 +412,8 @@ int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid)
{
- struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
+ struct ib_smp *out_mad;
+ int err;
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
if (!out_mad)
@@ -335,8 +434,8 @@ out:
int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev,
u16 *max_pkeys)
{
- struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
+ struct ib_smp *out_mad;
+ int err;
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
if (!out_mad)
@@ -357,8 +456,8 @@ out:
int mlx5_query_mad_ifc_vendor_id(struct ib_device *ibdev,
u32 *vendor_id)
{
- struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
+ struct ib_smp *out_mad;
+ int err;
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
if (!out_mad)
@@ -378,8 +477,8 @@ out:
int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
@@ -387,7 +486,7 @@ int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
@@ -403,8 +502,8 @@ out:
int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
@@ -412,7 +511,7 @@ int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
@@ -426,11 +525,11 @@ out:
return err;
}
-int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u8 port, u16 index,
+int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
@@ -438,7 +537,7 @@ int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u8 port, u16 index,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
@@ -455,11 +554,11 @@ out:
return err;
}
-int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u8 port, int index,
+int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
@@ -467,7 +566,7 @@ int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u8 port, int index,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -478,7 +577,7 @@ int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u8 port, int index,
memcpy(gid->raw, out_mad->data + 8, 8);
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
@@ -495,29 +594,24 @@ out:
return err;
}
-int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
+int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int ext_active_speed;
int err = -ENOMEM;
- if (port < 1 || port > MLX5_CAP_GEN(mdev, num_ports)) {
- mlx5_ib_warn(dev, "invalid port number %d\n", port);
- return -EINVAL;
- }
-
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
if (!in_mad || !out_mad)
goto out;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -536,7 +630,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20));
props->gid_tbl_len = out_mad->data[50];
props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
- props->pkey_tbl_len = mdev->port_caps[port - 1].pkey_table_len;
+ props->pkey_tbl_len = dev->pkey_table_len;
props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46));
props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48));
props->active_width = out_mad->data[31] & 0xf;
@@ -547,6 +641,14 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
props->max_vl_num = out_mad->data[37] >> 4;
props->init_type_reply = out_mad->data[41] >> 4;
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) {
+ props->port_cap_flags2 =
+ be16_to_cpup((__be16 *)(out_mad->data + 60));
+
+ if (props->port_cap_flags2 & IB_PORT_LINK_WIDTH_2X_SUP)
+ props->active_width = out_mad->data[31] & 0x1f;
+ }
+
/* Check if extended speeds (EDR/FDR/...) are supported */
if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
ext_active_speed = out_mad->data[62] >> 4;
@@ -558,14 +660,37 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
case 2:
props->active_speed = 32; /* EDR */
break;
+ case 4:
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP &&
+ props->port_cap_flags2 & IB_PORT_LINK_SPEED_HDR_SUP)
+ props->active_speed = IB_SPEED_HDR;
+ break;
+ case 8:
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP &&
+ props->port_cap_flags2 & IB_PORT_LINK_SPEED_NDR_SUP)
+ props->active_speed = IB_SPEED_NDR;
+ break;
+ }
+ }
+
+ /* Check if extended speeds 2 (XDR/...) are supported */
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP &&
+ props->port_cap_flags2 & IB_PORT_EXTENDED_SPEEDS2_SUP) {
+ ext_active_speed = (out_mad->data[56] >> 4) & 0x6;
+
+ switch (ext_active_speed) {
+ case 2:
+ if (props->port_cap_flags2 & IB_PORT_LINK_SPEED_XDR_SUP)
+ props->active_speed = IB_SPEED_XDR;
+ break;
}
}
/* If reported active speed is QDR, check if is FDR-10 */
if (props->active_speed == 4) {
- if (mdev->port_caps[port - 1].ext_port_cap &
+ if (dev->port_caps[port - 1].ext_port_cap &
MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d566f6738833..40284bbb45d6 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1,35 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
*/
+#include <linux/debugfs.h>
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -37,45 +12,87 @@
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
-#if defined(CONFIG_X86)
-#include <asm/pat.h>
-#endif
+#include <linux/bitmap.h>
+#include <linux/log2.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
#include <linux/delay.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/driver.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
-#include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
+#include <rdma/lag.h>
#include <linux/in.h>
#include <linux/etherdevice.h>
-#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
-
-#define DRIVER_NAME "mlx5_ib"
-#define DRIVER_VERSION "2.2-1"
-#define DRIVER_RELDATE "Feb 2014"
+#include "ib_rep.h"
+#include "cmd.h"
+#include "devx.h"
+#include "dm.h"
+#include "fs.h"
+#include "srq.h"
+#include "qp.h"
+#include "wr.h"
+#include "restrack.h"
+#include "counters.h"
+#include "umr.h"
+#include <rdma/uverbs_std_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/ib_ucaps.h>
+#include "macsec.h"
+#include "data_direct.h"
+#include "dmah.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
-MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
+MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) IB driver");
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRIVER_VERSION);
-
-static int deprecated_prof_sel = 2;
-module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
-MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
-static char mlx5_version[] =
- DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
- DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
+struct mlx5_ib_event_work {
+ struct work_struct work;
+ union {
+ struct mlx5_ib_dev *dev;
+ struct mlx5_ib_multiport_info *mpi;
+ };
+ bool is_slave;
+ unsigned int event;
+ void *param;
+};
enum {
MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
};
+static struct workqueue_struct *mlx5_ib_event_wq;
+static LIST_HEAD(mlx5_ib_unaffiliated_port_list);
+static LIST_HEAD(mlx5_ib_dev_list);
+/*
+ * This mutex should be held when accessing either of the above lists
+ */
+static DEFINE_MUTEX(mlx5_ib_multiport_mutex);
+
+struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi)
+{
+ struct mlx5_ib_dev *dev;
+
+ mutex_lock(&mlx5_ib_multiport_mutex);
+ dev = mpi->ibdev;
+ mutex_unlock(&mlx5_ib_multiport_mutex);
+ return dev;
+}
+
static enum rdma_link_layer
mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
{
@@ -90,7 +107,7 @@ mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
}
static enum rdma_link_layer
-mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
+mlx5_ib_port_link_layer(struct ib_device *device, u32 port_num)
{
struct mlx5_ib_dev *dev = to_mdev(device);
int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
@@ -98,41 +115,184 @@ mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
}
+static int get_port_state(struct ib_device *ibdev,
+ u32 port_num,
+ enum ib_port_state *state)
+{
+ struct ib_port_attr attr;
+ int ret;
+
+ memset(&attr, 0, sizeof(attr));
+ ret = ibdev->ops.query_port(ibdev, port_num, &attr);
+ if (!ret)
+ *state = attr.state;
+ return ret;
+}
+
+static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
+ struct net_device *ndev,
+ struct net_device *upper,
+ u32 *port_num)
+{
+ struct net_device *rep_ndev;
+ struct mlx5_ib_port *port;
+ int i;
+
+ for (i = 0; i < dev->num_ports; i++) {
+ port = &dev->port[i];
+ if (!port->rep)
+ continue;
+
+ if (upper == ndev && port->rep->vport == MLX5_VPORT_UPLINK) {
+ *port_num = i + 1;
+ return &port->roce;
+ }
+
+ if (upper && port->rep->vport == MLX5_VPORT_UPLINK)
+ continue;
+ rep_ndev = ib_device_get_netdev(&dev->ib_dev, i + 1);
+ if (rep_ndev && rep_ndev == ndev) {
+ dev_put(rep_ndev);
+ *port_num = i + 1;
+ return &port->roce;
+ }
+
+ dev_put(rep_ndev);
+ }
+
+ return NULL;
+}
+
+static bool mlx5_netdev_send_event(struct mlx5_ib_dev *dev,
+ struct net_device *ndev,
+ struct net_device *upper,
+ struct net_device *ib_ndev)
+{
+ if (!dev->ib_active)
+ return false;
+
+ /* Event is about our upper device */
+ if (upper == ndev)
+ return true;
+
+ /* RDMA device is not in lag and not in switchdev */
+ if (!dev->is_rep && !upper && ndev == ib_ndev)
+ return true;
+
+ /* RDMA devie is in switchdev */
+ if (dev->is_rep && ndev == ib_ndev)
+ return true;
+
+ return false;
+}
+
+static struct net_device *mlx5_ib_get_rep_uplink_netdev(struct mlx5_ib_dev *ibdev)
+{
+ struct mlx5_ib_port *port;
+ int i;
+
+ for (i = 0; i < ibdev->num_ports; i++) {
+ port = &ibdev->port[i];
+ if (port->rep && port->rep->vport == MLX5_VPORT_UPLINK) {
+ return ib_device_get_netdev(&ibdev->ib_dev, i + 1);
+ }
+ }
+
+ return NULL;
+}
+
static int mlx5_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
+ struct mlx5_roce *roce = container_of(this, struct mlx5_roce, nb);
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
- struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
- roce.nb);
+ u32 port_num = roce->native_port_num;
+ struct net_device *ib_ndev = NULL;
+ struct mlx5_core_dev *mdev;
+ struct mlx5_ib_dev *ibdev;
+
+ ibdev = roce->dev;
+ mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL);
+ if (!mdev)
+ return NOTIFY_DONE;
switch (event) {
case NETDEV_REGISTER:
- case NETDEV_UNREGISTER:
- write_lock(&ibdev->roce.netdev_lock);
- if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
- ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ?
- NULL : ndev;
- write_unlock(&ibdev->roce.netdev_lock);
+ /* Should already be registered during the load */
+ if (ibdev->is_rep)
+ break;
+
+ ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num);
+ /* Exit if already registered */
+ if (ib_ndev)
+ goto put_ndev;
+
+ if (ndev->dev.parent == mdev->device)
+ ib_device_set_netdev(&ibdev->ib_dev, ndev, port_num);
break;
+ case NETDEV_UNREGISTER:
+ /* In case of reps, ib device goes away before the netdevs */
+ if (ibdev->is_rep)
+ break;
+ ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num);
+ if (ib_ndev == ndev)
+ ib_device_set_netdev(&ibdev->ib_dev, NULL, port_num);
+ goto put_ndev;
+
+ case NETDEV_CHANGE:
case NETDEV_UP:
case NETDEV_DOWN: {
- struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
struct net_device *upper = NULL;
- if (lag_ndev) {
- upper = netdev_master_upper_dev_get(lag_ndev);
- dev_put(lag_ndev);
+ if (!netif_is_lag_master(ndev) && !netif_is_lag_port(ndev) &&
+ !mlx5_core_mp_enabled(mdev))
+ return NOTIFY_DONE;
+
+ if (mlx5_lag_is_roce(mdev) || mlx5_lag_is_sriov(mdev)) {
+ struct net_device *lag_ndev;
+
+ if(mlx5_lag_is_roce(mdev))
+ lag_ndev = ib_device_get_netdev(&ibdev->ib_dev, 1);
+ else /* sriov lag */
+ lag_ndev = mlx5_ib_get_rep_uplink_netdev(ibdev);
+
+ if (lag_ndev) {
+ upper = netdev_master_upper_dev_get(lag_ndev);
+ dev_put(lag_ndev);
+ } else {
+ goto done;
+ }
}
- if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
- && ibdev->ib_active) {
+ if (ibdev->is_rep)
+ roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num);
+ if (!roce)
+ return NOTIFY_DONE;
+
+ ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num);
+
+ if (mlx5_netdev_send_event(ibdev, ndev, upper, ib_ndev)) {
struct ib_event ibev = { };
+ enum ib_port_state port_state;
+
+ if (get_port_state(&ibdev->ib_dev, port_num,
+ &port_state))
+ goto put_ndev;
+
+ if (roce->last_port_state == port_state)
+ goto put_ndev;
+ roce->last_port_state = port_state;
ibev.device = &ibdev->ib_dev;
- ibev.event = (event == NETDEV_UP) ?
- IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
- ibev.element.port_num = 1;
+ if (port_state == IB_PORT_DOWN)
+ ibev.event = IB_EVENT_PORT_ERR;
+ else if (port_state == IB_PORT_ACTIVE)
+ ibev.event = IB_EVENT_PORT_ACTIVE;
+ else
+ goto put_ndev;
+
+ ibev.element.port_num = port_num;
ib_dispatch_event(&ibev);
}
break;
@@ -141,60 +301,308 @@ static int mlx5_netdev_event(struct notifier_block *this,
default:
break;
}
-
+put_ndev:
+ dev_put(ib_ndev);
+done:
+ mlx5_ib_put_native_port_mdev(ibdev, port_num);
return NOTIFY_DONE;
}
-static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
- u8 port_num)
+struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
+ u32 ib_port_num,
+ u32 *native_port_num)
{
- struct mlx5_ib_dev *ibdev = to_mdev(device);
- struct net_device *ndev;
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev,
+ ib_port_num);
+ struct mlx5_core_dev *mdev = NULL;
+ struct mlx5_ib_multiport_info *mpi;
+ struct mlx5_ib_port *port;
+
+ if (ibdev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
+ if (native_port_num)
+ *native_port_num = smi_to_native_portnum(ibdev,
+ ib_port_num);
+ return ibdev->mdev;
+
+ }
- ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
- if (ndev)
- return ndev;
+ if (!mlx5_core_mp_enabled(ibdev->mdev) ||
+ ll != IB_LINK_LAYER_ETHERNET) {
+ if (native_port_num)
+ *native_port_num = ib_port_num;
+ return ibdev->mdev;
+ }
- /* Ensure ndev does not disappear before we invoke dev_hold()
- */
- read_lock(&ibdev->roce.netdev_lock);
- ndev = ibdev->roce.netdev;
- if (ndev)
- dev_hold(ndev);
- read_unlock(&ibdev->roce.netdev_lock);
+ if (native_port_num)
+ *native_port_num = 1;
- return ndev;
+ port = &ibdev->port[ib_port_num - 1];
+ spin_lock(&port->mp.mpi_lock);
+ mpi = ibdev->port[ib_port_num - 1].mp.mpi;
+ if (mpi && !mpi->unaffiliate) {
+ mdev = mpi->mdev;
+ /* If it's the master no need to refcount, it'll exist
+ * as long as the ib_dev exists.
+ */
+ if (!mpi->is_master)
+ mpi->mdev_refcnt++;
+ }
+ spin_unlock(&port->mp.mpi_lock);
+
+ return mdev;
+}
+
+void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *ibdev, u32 port_num)
+{
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev,
+ port_num);
+ struct mlx5_ib_multiport_info *mpi;
+ struct mlx5_ib_port *port;
+
+ if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
+ return;
+
+ port = &ibdev->port[port_num - 1];
+
+ spin_lock(&port->mp.mpi_lock);
+ mpi = ibdev->port[port_num - 1].mp.mpi;
+ if (mpi->is_master)
+ goto out;
+
+ mpi->mdev_refcnt--;
+ if (mpi->unaffiliate)
+ complete(&mpi->unref_comp);
+out:
+ spin_unlock(&port->mp.mpi_lock);
}
-static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+static int translate_eth_legacy_proto_oper(u32 eth_proto_oper,
+ u16 *active_speed, u8 *active_width)
+{
+ switch (eth_proto_oper) {
+ case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
+ case MLX5E_PROT_MASK(MLX5E_1000BASE_KX):
+ case MLX5E_PROT_MASK(MLX5E_100BASE_TX):
+ case MLX5E_PROT_MASK(MLX5E_1000BASE_T):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_SDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_T):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_CX4):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_KX4):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_KR):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_CR):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_SR):
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_ER):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_QDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_25GBASE_CR):
+ case MLX5E_PROT_MASK(MLX5E_25GBASE_KR):
+ case MLX5E_PROT_MASK(MLX5E_25GBASE_SR):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_EDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_CR4):
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_KR4):
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_SR4):
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_LR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_QDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_50GBASE_CR2):
+ case MLX5E_PROT_MASK(MLX5E_50GBASE_KR2):
+ case MLX5E_PROT_MASK(MLX5E_50GBASE_SR2):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_HDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_56GBASE_R4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_FDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_CR4):
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_SR4):
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_KR4):
+ case MLX5E_PROT_MASK(MLX5E_100GBASE_LR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_EDR;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
+ u8 *active_width)
+{
+ switch (eth_proto_oper) {
+ case MLX5E_PROT_MASK(MLX5E_SGMII_100M):
+ case MLX5E_PROT_MASK(MLX5E_1000BASE_X_SGMII):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_SDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_5GBASE_R):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_DDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_10GBASE_XFI_XAUI_1):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_QDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_40GBASE_XLAUI_4_XLPPI_4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_QDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_25GAUI_1_25GBASE_CR_KR):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_EDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2):
+ *active_width = IB_WIDTH_2X;
+ *active_speed = IB_SPEED_EDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_HDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_CAUI_4_100GBASE_CR4_KR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_EDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_100GAUI_2_100GBASE_CR2_KR2):
+ *active_width = IB_WIDTH_2X;
+ *active_speed = IB_SPEED_HDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_100GAUI_1_100GBASE_CR_KR):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_NDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_HDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_200GAUI_2_200GBASE_CR2_KR2):
+ *active_width = IB_WIDTH_2X;
+ *active_speed = IB_SPEED_NDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_200GAUI_1_200GBASE_CR1_KR1):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_XDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_400GAUI_8_400GBASE_CR8):
+ *active_width = IB_WIDTH_8X;
+ *active_speed = IB_SPEED_HDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_NDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_400GAUI_2_400GBASE_CR2_KR2):
+ *active_width = IB_WIDTH_2X;
+ *active_speed = IB_SPEED_XDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_800GAUI_8_800GBASE_CR8_KR8):
+ *active_width = IB_WIDTH_8X;
+ *active_speed = IB_SPEED_NDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_800GAUI_4_800GBASE_CR4_KR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_XDR;
+ break;
+ case MLX5E_PROT_MASK(MLX5E_1600TAUI_8_1600TBASE_CR8_KR8):
+ *active_width = IB_WIDTH_8X;
+ *active_speed = IB_SPEED_XDR;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int translate_eth_proto_oper(u32 eth_proto_oper, u16 *active_speed,
+ u8 *active_width, bool ext)
+{
+ return ext ?
+ translate_eth_ext_proto_oper(eth_proto_oper, active_speed,
+ active_width) :
+ translate_eth_legacy_proto_oper(eth_proto_oper, active_speed,
+ active_width);
+}
+
+static int mlx5_query_port_roce(struct ib_device *device, u32 port_num,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(device);
+ u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
+ struct mlx5_core_dev *mdev;
struct net_device *ndev, *upper;
enum ib_mtu ndev_ib_mtu;
- u16 qkey_viol_cntr;
+ bool put_mdev = true;
+ u32 eth_prot_oper;
+ u32 mdev_port_num;
+ bool ext;
+ int err;
- memset(props, 0, sizeof(*props));
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
+ if (!mdev) {
+ /* This means the port isn't affiliated yet. Get the
+ * info for the master port instead.
+ */
+ put_mdev = false;
+ mdev = dev->mdev;
+ mdev_port_num = 1;
+ port_num = 1;
+ }
- props->port_cap_flags |= IB_PORT_CM_SUP;
- props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
+ /* Possible bad flows are checked before filling out props so in case
+ * of an error it will still be zeroed out.
+ * Use native port in case of reps
+ */
+ if (dev->is_rep)
+ err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
+ 1, 0);
+ else
+ err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
+ mdev_port_num, 0);
+ if (err)
+ goto out;
+ ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability);
+ eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
+
+ props->active_width = IB_WIDTH_4X;
+ props->active_speed = IB_SPEED_QDR;
- props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
- roce_address_table_size);
+ translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
+ &props->active_width, ext);
+
+ if (!dev->is_rep && dev->mdev->roce.roce_en) {
+ u16 qkey_viol_cntr;
+
+ props->port_cap_flags |= IB_PORT_CM_SUP;
+ props->ip_gids = true;
+ props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
+ roce_address_table_size);
+ mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
+ props->qkey_viol_cntr = qkey_viol_cntr;
+ }
props->max_mtu = IB_MTU_4096;
props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
props->pkey_tbl_len = 1;
props->state = IB_PORT_DOWN;
- props->phys_state = 3;
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
- mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
- props->qkey_viol_cntr = qkey_viol_cntr;
+ /* If this is a stub query for an unaffiliated port stop here */
+ if (!put_mdev)
+ goto out;
- ndev = mlx5_ib_get_netdev(device, port_num);
+ ndev = ib_device_get_netdev(device, port_num);
if (!ndev)
- return 0;
+ goto out;
- if (mlx5_lag_is_active(dev->mdev)) {
+ if (mlx5_lag_is_roce(mdev) || mlx5_lag_is_sriov(mdev)) {
rcu_read_lock();
upper = netdev_master_upper_dev_get_rcu(ndev);
if (upper) {
@@ -207,7 +615,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
if (netif_running(ndev) && netif_carrier_ok(ndev)) {
props->state = IB_PORT_ACTIVE;
- props->phys_state = 5;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
}
ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
@@ -215,111 +623,83 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
dev_put(ndev);
props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
-
- props->active_width = IB_WIDTH_4X; /* TODO */
- props->active_speed = IB_SPEED_QDR; /* TODO */
-
- return 0;
+out:
+ if (put_mdev)
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+ return err;
}
-static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- void *mlx5_addr)
+int set_roce_addr(struct mlx5_ib_dev *dev, u32 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
{
-#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
- char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
- source_l3_address);
- void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
- source_mac_47_32);
-
- if (!gid)
- return;
-
- ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
+ enum ib_gid_type gid_type;
+ u16 vlan_id = 0xffff;
+ u8 roce_version = 0;
+ u8 roce_l3_type = 0;
+ u8 mac[ETH_ALEN];
+ int ret;
- if (is_vlan_dev(attr->ndev)) {
- MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
- MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
+ gid_type = attr->gid_type;
+ if (gid) {
+ ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]);
+ if (ret)
+ return ret;
}
- switch (attr->gid_type) {
- case IB_GID_TYPE_IB:
- MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
+ switch (gid_type) {
+ case IB_GID_TYPE_ROCE:
+ roce_version = MLX5_ROCE_VERSION_1;
break;
case IB_GID_TYPE_ROCE_UDP_ENCAP:
- MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
+ roce_version = MLX5_ROCE_VERSION_2;
+ if (gid && ipv6_addr_v4mapped((void *)gid))
+ roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4;
+ else
+ roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6;
break;
default:
- WARN_ON(true);
- }
-
- if (attr->gid_type != IB_GID_TYPE_IB) {
- if (ipv6_addr_v4mapped((void *)gid))
- MLX5_SET_RA(mlx5_addr, roce_l3_type,
- MLX5_ROCE_L3_TYPE_IPV4);
- else
- MLX5_SET_RA(mlx5_addr, roce_l3_type,
- MLX5_ROCE_L3_TYPE_IPV6);
+ mlx5_ib_warn(dev, "Unexpected GID type %u\n", gid_type);
}
- if ((attr->gid_type == IB_GID_TYPE_IB) ||
- !ipv6_addr_v4mapped((void *)gid))
- memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
- else
- memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
+ return mlx5_core_roce_gid_set(dev->mdev, index, roce_version,
+ roce_l3_type, gid->raw, mac,
+ vlan_id < VLAN_CFI_MASK, vlan_id,
+ port_num);
}
-static int set_roce_addr(struct ib_device *device, u8 port_num,
- unsigned int index,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr)
+static int mlx5_ib_add_gid(const struct ib_gid_attr *attr,
+ __always_unused void **context)
{
- struct mlx5_ib_dev *dev = to_mdev(device);
- u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
- u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
- void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
- enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
-
- if (ll != IB_LINK_LAYER_ETHERNET)
- return -EINVAL;
+ int ret;
- ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
+ ret = mlx5r_add_gid_macsec_operations(attr);
+ if (ret)
+ return ret;
- MLX5_SET(set_roce_address_in, in, roce_address_index, index);
- MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
- return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+ return set_roce_addr(to_mdev(attr->device), attr->port_num,
+ attr->index, &attr->gid, attr);
}
-static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
- unsigned int index, const union ib_gid *gid,
- const struct ib_gid_attr *attr,
+static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
__always_unused void **context)
{
- return set_roce_addr(device, port_num, index, gid, attr);
-}
+ int ret;
-static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
- unsigned int index, __always_unused void **context)
-{
- return set_roce_addr(device, port_num, index, NULL, NULL);
+ ret = set_roce_addr(to_mdev(attr->device), attr->port_num,
+ attr->index, NULL, attr);
+ if (ret)
+ return ret;
+
+ mlx5r_del_gid_macsec_operations(attr);
+ return 0;
}
-__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
- int index)
+__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev,
+ const struct ib_gid_attr *attr)
{
- struct ib_gid_attr attr;
- union ib_gid gid;
-
- if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
- return 0;
-
- if (!attr.ndev)
- return 0;
-
- dev_put(attr.ndev);
-
- if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
return 0;
return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
@@ -351,13 +731,13 @@ static int mlx5_get_vport_access_method(struct ib_device *ibdev)
}
static void get_atomic_caps(struct mlx5_ib_dev *dev,
+ u8 atomic_size_qp,
struct ib_device_attr *props)
{
u8 tmp;
u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
- u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
u8 atomic_req_8B_endianness_mode =
- MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
+ MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianness_mode);
/* Check if HW supports 8 bytes standard atomic operations and capable
* of host endianness respond
@@ -372,6 +752,14 @@ static void get_atomic_caps(struct mlx5_ib_dev *dev,
}
}
+static void get_atomic_caps_qp(struct mlx5_ib_dev *dev,
+ struct ib_device_attr *props)
+{
+ u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
+
+ get_atomic_caps(dev, atomic_size_qp, props);
+}
+
static int mlx5_query_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid)
{
@@ -458,7 +846,7 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
break;
case MLX5_VPORT_ACCESS_METHOD_NIC:
- err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
+ err = mlx5_query_nic_vport_node_guid(dev->mdev, 0, false, &tmp);
break;
default:
@@ -489,10 +877,67 @@ static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
MLX5_REG_NODE_DESC, 0, 0);
}
+static void fill_esw_mgr_reg_c0(struct mlx5_core_dev *mdev,
+ struct mlx5_ib_query_device_resp *resp)
+{
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ u16 vport = mlx5_eswitch_manager_vport(mdev);
+
+ resp->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match(esw,
+ vport);
+ resp->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask();
+}
+
+/*
+ * Calculate maximum SQ overhead across all QP types.
+ * Other QP types (REG_UMR, UC, RC, UD/SMI/GSI, XRC_TGT)
+ * have smaller overhead than the types calculated below,
+ * so they are implicitly included.
+ */
+static u32 mlx5_ib_calc_max_sq_overhead(void)
+{
+ u32 max_overhead_xrc, overhead_ud_lso, a, b;
+
+ /* XRC_INI */
+ max_overhead_xrc = sizeof(struct mlx5_wqe_xrc_seg);
+ max_overhead_xrc += sizeof(struct mlx5_wqe_ctrl_seg);
+ a = sizeof(struct mlx5_wqe_atomic_seg) +
+ sizeof(struct mlx5_wqe_raddr_seg);
+ b = sizeof(struct mlx5_wqe_umr_ctrl_seg) +
+ sizeof(struct mlx5_mkey_seg) +
+ MLX5_IB_SQ_UMR_INLINE_THRESHOLD / MLX5_IB_UMR_OCTOWORD;
+ max_overhead_xrc += max(a, b);
+
+ /* UD with LSO */
+ overhead_ud_lso = sizeof(struct mlx5_wqe_ctrl_seg);
+ overhead_ud_lso += sizeof(struct mlx5_wqe_eth_pad);
+ overhead_ud_lso += sizeof(struct mlx5_wqe_eth_seg);
+ overhead_ud_lso += sizeof(struct mlx5_wqe_datagram_seg);
+
+ return max(max_overhead_xrc, overhead_ud_lso);
+}
+
+static u32 mlx5_ib_calc_max_qp_wr(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ u32 max_wqe_bb_units = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
+ u32 max_wqe_size;
+ /* max QP overhead + 1 SGE, no inline, no special features */
+ max_wqe_size = mlx5_ib_calc_max_sq_overhead() +
+ sizeof(struct mlx5_wqe_data_seg);
+
+ max_wqe_size = roundup_pow_of_two(max_wqe_size);
+
+ max_wqe_size = ALIGN(max_wqe_size, MLX5_SEND_WQE_BB);
+
+ return (max_wqe_bb_units * MLX5_SEND_WQE_BB) / max_wqe_size;
+}
+
static int mlx5_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
{
+ size_t uhw_outlen = (uhw) ? uhw->outlen : 0;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
int err = -ENOMEM;
@@ -500,17 +945,18 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
int max_rq_sg;
int max_sq_sg;
u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
+ bool raw_support = !mlx5_core_mp_enabled(mdev);
struct mlx5_ib_query_device_resp resp = {};
size_t resp_len;
u64 max_tso;
resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length);
- if (uhw->outlen && uhw->outlen < resp_len)
+ if (uhw_outlen && uhw_outlen < resp_len)
return -EINVAL;
- else
- resp.response_length = resp_len;
- if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
+ resp.response_length = resp_len;
+
+ if (uhw && uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
return -EINVAL;
memset(props, 0, sizeof(*props));
@@ -519,9 +965,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (err)
return err;
- err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
- if (err)
- return err;
+ props->max_pkeys = dev->pkey_table_len;
err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
if (err)
@@ -548,11 +992,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
IB_DEVICE_MEM_WINDOW_TYPE_2B;
props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
/* We support 'Gappy' memory registration too */
- props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
+ props->kernel_cap_flags |= IBK_SG_GAPS_REG;
}
- props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ /* IB_WR_REG_MR always requires changing the entity size with UMR */
+ if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, sho)) {
- props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
+ props->kernel_cap_flags |= IBK_INTEGRITY_HANDOVER;
/* At this stage no support for signature handover */
props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
IB_PROT_T10DIF_TYPE_2 |
@@ -561,13 +1007,20 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
IB_GUARD_T10DIF_CSUM;
}
if (MLX5_CAP_GEN(mdev, block_lb_mc))
- props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ props->kernel_cap_flags |= IBK_BLOCK_MULTICAST_LOOPBACK;
- if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
- if (MLX5_CAP_ETH(mdev, csum_cap))
+ if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && raw_support) {
+ if (MLX5_CAP_ETH(mdev, csum_cap)) {
+ /* Legacy bit to support old userspace libraries */
props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM;
+ }
- if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
+ if (MLX5_CAP_ETH(dev->mdev, vlan_cap))
+ props->raw_packet_caps |=
+ IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
+
+ if (offsetofend(typeof(resp), tso_caps) <= uhw_outlen) {
max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
if (max_tso) {
resp.tso_caps.max_tso = 1 << max_tso;
@@ -577,7 +1030,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
- if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
+ if (offsetofend(typeof(resp), rss_caps) <= uhw_outlen) {
resp.rss_caps.rx_hash_function =
MLX5_RX_HASH_FUNC_TOEPLITZ;
resp.rss_caps.rx_hash_fields_mask =
@@ -588,42 +1041,65 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_RX_HASH_SRC_PORT_TCP |
MLX5_RX_HASH_DST_PORT_TCP |
MLX5_RX_HASH_SRC_PORT_UDP |
- MLX5_RX_HASH_DST_PORT_UDP;
+ MLX5_RX_HASH_DST_PORT_UDP |
+ MLX5_RX_HASH_INNER;
resp.response_length += sizeof(resp.rss_caps);
}
} else {
- if (field_avail(typeof(resp), tso_caps, uhw->outlen))
+ if (offsetofend(typeof(resp), tso_caps) <= uhw_outlen)
resp.response_length += sizeof(resp.tso_caps);
- if (field_avail(typeof(resp), rss_caps, uhw->outlen))
+ if (offsetofend(typeof(resp), rss_caps) <= uhw_outlen)
resp.response_length += sizeof(resp.rss_caps);
}
if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
- props->device_cap_flags |= IB_DEVICE_UD_TSO;
+ props->kernel_cap_flags |= IBK_UD_TSO;
}
+ if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
+ MLX5_CAP_GEN(dev->mdev, general_notification_event) &&
+ raw_support)
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP;
+
+ if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
+ MLX5_CAP_IPOIB_ENHANCED(mdev, csum_cap))
+ props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
+
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
- MLX5_CAP_ETH(dev->mdev, scatter_fcs))
+ MLX5_CAP_ETH(dev->mdev, scatter_fcs) &&
+ raw_support) {
+ /* Legacy bit to support old userspace libraries */
props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
+ }
+
+ if (MLX5_CAP_DEV_MEM(mdev, memic)) {
+ props->max_dm_size =
+ MLX5_CAP_DEV_MEM(mdev, max_memic_size);
+ }
if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
+ if (MLX5_CAP_GEN(mdev, end_pad))
+ props->device_cap_flags |= IB_DEVICE_PCI_WRITE_END_PADDING;
+
props->vendor_part_id = mdev->pdev->device;
props->hw_ver = mdev->pdev->revision;
props->max_mr_size = ~0ull;
props->page_size_cap = ~(min_page_size - 1);
props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
- props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
+ props->max_qp_wr = mlx5_ib_calc_max_qp_wr(dev);
max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
sizeof(struct mlx5_wqe_data_seg);
max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) -
sizeof(struct mlx5_wqe_raddr_seg)) /
sizeof(struct mlx5_wqe_data_seg);
- props->max_sge = min(max_rq_sg, max_sq_sg);
+ props->max_send_sge = max_sq_sg;
+ props->max_recv_sge = max_rq_sg;
props->max_sge_rd = MLX5_MAX_SGE_RD;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
@@ -638,31 +1114,48 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len =
1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
- get_atomic_caps(dev, props);
+ props->max_pi_fast_reg_page_list_len =
+ props->max_fast_reg_page_list_len / 2;
+ props->max_sgl_rd =
+ MLX5_CAP_GEN(mdev, max_sgl_for_optimized_performance);
+ get_atomic_caps_qp(dev, props);
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
- props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
props->max_ah = INT_MAX;
props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (MLX5_CAP_GEN(mdev, pg))
- props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
- props->odp_caps = dev->odp_caps;
-#endif
-
- if (MLX5_CAP_GEN(mdev, cd))
- props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ if (dev->odp_caps.general_caps & IB_ODP_SUPPORT)
+ props->kernel_cap_flags |= IBK_ON_DEMAND_PAGING;
+ props->odp_caps = dev->odp_caps;
+ if (!uhw) {
+ /* ODP for kernel QPs is not implemented for receive
+ * WQEs and SRQ WQEs
+ */
+ props->odp_caps.per_transport_caps.rc_odp_caps &=
+ ~(IB_ODP_SUPPORT_READ |
+ IB_ODP_SUPPORT_SRQ_RECV);
+ props->odp_caps.per_transport_caps.uc_odp_caps &=
+ ~(IB_ODP_SUPPORT_READ |
+ IB_ODP_SUPPORT_SRQ_RECV);
+ props->odp_caps.per_transport_caps.ud_odp_caps &=
+ ~(IB_ODP_SUPPORT_READ |
+ IB_ODP_SUPPORT_SRQ_RECV);
+ props->odp_caps.per_transport_caps.xrc_odp_caps &=
+ ~(IB_ODP_SUPPORT_READ |
+ IB_ODP_SUPPORT_SRQ_RECV);
+ }
+ }
- if (!mlx5_core_is_pf(mdev))
- props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
+ if (mlx5_core_is_vf(mdev))
+ props->kernel_cap_flags |= IBK_VIRTUAL_FUNCTION;
if (mlx5_ib_port_link_layer(ibdev, 1) ==
- IB_LINK_LAYER_ETHERNET) {
+ IB_LINK_LAYER_ETHERNET && raw_support) {
props->rss_caps.max_rwq_indirection_tables =
1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt);
props->rss_caps.max_rwq_indirection_table_size =
@@ -672,28 +1165,47 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
}
- if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
- uhw->outlen)) {
- resp.mlx5_ib_support_multi_pkt_send_wqes =
- MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
- resp.response_length +=
- sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
+ if (MLX5_CAP_GEN(mdev, tag_matching)) {
+ props->tm_caps.max_num_tags =
+ (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
+ props->tm_caps.max_ops =
+ 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
+ props->tm_caps.max_sge = MLX5_TM_MAX_SGE;
}
- if (field_avail(typeof(resp), reserved, uhw->outlen))
- resp.response_length += sizeof(resp.reserved);
+ if (MLX5_CAP_GEN(mdev, tag_matching) &&
+ MLX5_CAP_GEN(mdev, rndv_offload_rc)) {
+ props->tm_caps.flags = IB_TM_CAP_RNDV_RC;
+ props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, cq_moderation)) {
+ props->cq_caps.max_cq_moderation_count =
+ MLX5_MAX_CQ_COUNT;
+ props->cq_caps.max_cq_moderation_period =
+ MLX5_MAX_CQ_PERIOD;
+ }
- if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
- resp.cqe_comp_caps.max_num =
- MLX5_CAP_GEN(dev->mdev, cqe_compression) ?
- MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0;
- resp.cqe_comp_caps.supported_format =
- MLX5_IB_CQE_RES_FORMAT_HASH |
- MLX5_IB_CQE_RES_FORMAT_CSUM;
+ if (offsetofend(typeof(resp), cqe_comp_caps) <= uhw_outlen) {
resp.response_length += sizeof(resp.cqe_comp_caps);
+
+ if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) {
+ resp.cqe_comp_caps.max_num =
+ MLX5_CAP_GEN(dev->mdev,
+ cqe_compression_max_num);
+
+ resp.cqe_comp_caps.supported_format =
+ MLX5_IB_CQE_RES_FORMAT_HASH |
+ MLX5_IB_CQE_RES_FORMAT_CSUM;
+
+ if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index))
+ resp.cqe_comp_caps.supported_format |=
+ MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX;
+ }
}
- if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen)) {
+ if (offsetofend(typeof(resp), packet_pacing_caps) <= uhw_outlen &&
+ raw_support) {
if (MLX5_CAP_QOS(mdev, packet_pacing) &&
MLX5_CAP_GEN(mdev, qos)) {
resp.packet_pacing_caps.qp_rate_limit_max =
@@ -702,11 +1214,138 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_CAP_QOS(mdev, packet_pacing_min_rate);
resp.packet_pacing_caps.supported_qpts |=
1 << IB_QPT_RAW_PACKET;
+ if (MLX5_CAP_QOS(mdev, packet_pacing_burst_bound) &&
+ MLX5_CAP_QOS(mdev, packet_pacing_typical_size))
+ resp.packet_pacing_caps.cap_flags |=
+ MLX5_IB_PP_SUPPORT_BURST;
}
resp.response_length += sizeof(resp.packet_pacing_caps);
}
- if (uhw->outlen) {
+ if (offsetofend(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes) <=
+ uhw_outlen) {
+ if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe))
+ resp.mlx5_ib_support_multi_pkt_send_wqes =
+ MLX5_IB_ALLOW_MPW;
+
+ if (MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe))
+ resp.mlx5_ib_support_multi_pkt_send_wqes |=
+ MLX5_IB_SUPPORT_EMPW;
+
+ resp.response_length +=
+ sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
+ }
+
+ if (offsetofend(typeof(resp), flags) <= uhw_outlen) {
+ resp.response_length += sizeof(resp.flags);
+
+ if (MLX5_CAP_GEN(mdev, cqe_compression_128))
+ resp.flags |=
+ MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP;
+
+ if (MLX5_CAP_GEN(mdev, cqe_128_always))
+ resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD;
+ if (MLX5_CAP_GEN(mdev, qp_packet_based))
+ resp.flags |=
+ MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
+
+ resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT;
+
+ if (MLX5_CAP_GEN_2(mdev, dp_ordering_force) &&
+ (MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_xrc) ||
+ MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_dc) ||
+ MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_rc) ||
+ MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_ud) ||
+ MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_uc)))
+ resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_OOO_DP;
+ }
+
+ if (offsetofend(typeof(resp), sw_parsing_caps) <= uhw_outlen) {
+ resp.response_length += sizeof(resp.sw_parsing_caps);
+ if (MLX5_CAP_ETH(mdev, swp)) {
+ resp.sw_parsing_caps.sw_parsing_offloads |=
+ MLX5_IB_SW_PARSING;
+
+ if (MLX5_CAP_ETH(mdev, swp_csum))
+ resp.sw_parsing_caps.sw_parsing_offloads |=
+ MLX5_IB_SW_PARSING_CSUM;
+
+ if (MLX5_CAP_ETH(mdev, swp_lso))
+ resp.sw_parsing_caps.sw_parsing_offloads |=
+ MLX5_IB_SW_PARSING_LSO;
+
+ if (resp.sw_parsing_caps.sw_parsing_offloads)
+ resp.sw_parsing_caps.supported_qpts =
+ BIT(IB_QPT_RAW_PACKET);
+ }
+ }
+
+ if (offsetofend(typeof(resp), striding_rq_caps) <= uhw_outlen &&
+ raw_support) {
+ resp.response_length += sizeof(resp.striding_rq_caps);
+ if (MLX5_CAP_GEN(mdev, striding_rq)) {
+ resp.striding_rq_caps.min_single_stride_log_num_of_bytes =
+ MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES;
+ resp.striding_rq_caps.max_single_stride_log_num_of_bytes =
+ MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES;
+ if (MLX5_CAP_GEN(dev->mdev, ext_stride_num_range))
+ resp.striding_rq_caps
+ .min_single_wqe_log_num_of_strides =
+ MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
+ else
+ resp.striding_rq_caps
+ .min_single_wqe_log_num_of_strides =
+ MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
+ resp.striding_rq_caps.max_single_wqe_log_num_of_strides =
+ MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES;
+ resp.striding_rq_caps.supported_qpts =
+ BIT(IB_QPT_RAW_PACKET);
+ }
+ }
+
+ if (offsetofend(typeof(resp), tunnel_offloads_caps) <= uhw_outlen) {
+ resp.response_length += sizeof(resp.tunnel_offloads_caps);
+ if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan))
+ resp.tunnel_offloads_caps |=
+ MLX5_IB_TUNNELED_OFFLOADS_VXLAN;
+ if (MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx))
+ resp.tunnel_offloads_caps |=
+ MLX5_IB_TUNNELED_OFFLOADS_GENEVE;
+ if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre))
+ resp.tunnel_offloads_caps |=
+ MLX5_IB_TUNNELED_OFFLOADS_GRE;
+ if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre))
+ resp.tunnel_offloads_caps |=
+ MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE;
+ if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_udp))
+ resp.tunnel_offloads_caps |=
+ MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP;
+ }
+
+ if (offsetofend(typeof(resp), dci_streams_caps) <= uhw_outlen) {
+ resp.response_length += sizeof(resp.dci_streams_caps);
+
+ resp.dci_streams_caps.max_log_num_concurent =
+ MLX5_CAP_GEN(mdev, log_max_dci_stream_channels);
+
+ resp.dci_streams_caps.max_log_num_errored =
+ MLX5_CAP_GEN(mdev, log_max_dci_errored_streams);
+ }
+
+ if (offsetofend(typeof(resp), reserved) <= uhw_outlen)
+ resp.response_length += sizeof(resp.reserved);
+
+ if (offsetofend(typeof(resp), reg_c0) <= uhw_outlen) {
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+ resp.response_length += sizeof(resp.reg_c0);
+
+ if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS &&
+ mlx5_eswitch_vport_match_metadata_enabled(esw))
+ fill_esw_mgr_reg_c0(mdev, &resp);
+ }
+
+ if (uhw_outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
if (err)
@@ -716,39 +1355,28 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
return 0;
}
-enum mlx5_ib_width {
- MLX5_IB_WIDTH_1X = 1 << 0,
- MLX5_IB_WIDTH_2X = 1 << 1,
- MLX5_IB_WIDTH_4X = 1 << 2,
- MLX5_IB_WIDTH_8X = 1 << 3,
- MLX5_IB_WIDTH_12X = 1 << 4
-};
-
-static int translate_active_width(struct ib_device *ibdev, u8 active_width,
- u8 *ib_width)
+static void translate_active_width(struct ib_device *ibdev, u16 active_width,
+ u8 *ib_width)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- int err = 0;
- if (active_width & MLX5_IB_WIDTH_1X) {
+ if (active_width & MLX5_PTYS_WIDTH_1X)
*ib_width = IB_WIDTH_1X;
- } else if (active_width & MLX5_IB_WIDTH_2X) {
- mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n",
- (int)active_width);
- err = -EINVAL;
- } else if (active_width & MLX5_IB_WIDTH_4X) {
+ else if (active_width & MLX5_PTYS_WIDTH_2X)
+ *ib_width = IB_WIDTH_2X;
+ else if (active_width & MLX5_PTYS_WIDTH_4X)
*ib_width = IB_WIDTH_4X;
- } else if (active_width & MLX5_IB_WIDTH_8X) {
+ else if (active_width & MLX5_PTYS_WIDTH_8X)
*ib_width = IB_WIDTH_8X;
- } else if (active_width & MLX5_IB_WIDTH_12X) {
+ else if (active_width & MLX5_PTYS_WIDTH_12X)
*ib_width = IB_WIDTH_12X;
- } else {
- mlx5_ib_dbg(dev, "Invalid active_width %d\n",
- (int)active_width);
- err = -EINVAL;
+ else {
+ mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n",
+ active_width);
+ *ib_width = IB_WIDTH_4X;
}
- return err;
+ return;
}
static int mlx5_mtu_to_ib_mtu(int mtu)
@@ -812,17 +1440,17 @@ static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
return 0;
}
-static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
+static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *rep;
+ u8 vl_hw_cap, plane_index = 0;
u16 max_mtu;
u16 oper_mtu;
int err;
- u8 ib_link_width_oper;
- u8 vl_hw_cap;
+ u16 ib_link_width_oper;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
if (!rep) {
@@ -830,7 +1458,12 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
goto out;
}
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
+
+ if (ibdev->type == RDMA_DEVICE_TYPE_SMI) {
+ plane_index = port;
+ port = smi_to_native_portnum(dev, port);
+ }
err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
if (err)
@@ -842,7 +1475,14 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
props->sm_sl = rep->sm_sl;
props->state = rep->vport_state;
props->phys_state = rep->port_physical_state;
- props->port_cap_flags = rep->cap_mask1;
+
+ props->port_cap_flags = rep->cap_mask1;
+ if (dev->num_plane) {
+ props->port_cap_flags |= IB_PORT_SM_DISABLED;
+ props->port_cap_flags &= ~IB_PORT_SM;
+ } else if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
+ props->port_cap_flags &= ~IB_PORT_CM_SUP;
+
props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
@@ -850,20 +1490,17 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
props->qkey_viol_cntr = rep->qkey_violation_counter;
props->subnet_timeout = rep->subnet_timeout;
props->init_type_reply = rep->init_type_reply;
- props->grh_required = rep->grh_required;
- err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
- if (err)
- goto out;
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP)
+ props->port_cap_flags2 = rep->cap_mask2;
- err = translate_active_width(ibdev, ib_link_width_oper,
- &props->active_width);
- if (err)
- goto out;
- err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
+ err = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper,
+ &props->active_speed, port, plane_index);
if (err)
goto out;
+ translate_active_width(ibdev, ib_link_width_oper, &props->active_width);
+
mlx5_query_port_max_mtu(mdev, &max_mtu, port);
props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu);
@@ -883,25 +1520,68 @@ out:
return err;
}
-int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
+int mlx5_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
+ unsigned int count;
+ int ret;
+
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
- return mlx5_query_mad_ifc_port(ibdev, port, props);
+ ret = mlx5_query_mad_ifc_port(ibdev, port, props);
+ break;
case MLX5_VPORT_ACCESS_METHOD_HCA:
- return mlx5_query_hca_port(ibdev, port, props);
+ ret = mlx5_query_hca_port(ibdev, port, props);
+ break;
case MLX5_VPORT_ACCESS_METHOD_NIC:
- return mlx5_query_port_roce(ibdev, port, props);
+ ret = mlx5_query_port_roce(ibdev, port, props);
+ break;
default:
- return -EINVAL;
+ ret = -EINVAL;
+ }
+
+ if (!ret && props) {
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_core_dev *mdev;
+ bool put_mdev = true;
+
+ mdev = mlx5_ib_get_native_port_mdev(dev, port, NULL);
+ if (!mdev) {
+ /* If the port isn't affiliated yet query the master.
+ * The master and slave will have the same values.
+ */
+ mdev = dev->mdev;
+ port = 1;
+ put_mdev = false;
+ }
+ count = mlx5_core_reserved_gids_count(mdev);
+ if (put_mdev)
+ mlx5_ib_put_native_port_mdev(dev, port);
+ props->gid_tbl_len -= count;
}
+ return ret;
}
-static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *props)
+{
+ return mlx5_query_port_roce(ibdev, port, props);
+}
+
+static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
+ u16 *pkey)
+{
+ /* Default special Pkey for representor device port as per the
+ * IB specification 1.3 section 10.9.1.2.
+ */
+ *pkey = 0xffff;
+ return 0;
+}
+
+static int mlx5_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
@@ -920,20 +1600,43 @@ static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
}
-static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
- u16 *pkey)
+static int mlx5_query_hca_nic_pkey(struct ib_device *ibdev, u32 port,
+ u16 index, u16 *pkey)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_core_dev *mdev;
+ bool put_mdev = true;
+ u32 mdev_port_num;
+ int err;
+
+ mdev = mlx5_ib_get_native_port_mdev(dev, port, &mdev_port_num);
+ if (!mdev) {
+ /* The port isn't affiliated yet, get the PKey from the master
+ * port. For RoCE the PKey tables will be the same.
+ */
+ put_mdev = false;
+ mdev = dev->mdev;
+ mdev_port_num = 1;
+ }
+ err = mlx5_query_hca_vport_pkey(mdev, 0, mdev_port_num, 0,
+ index, pkey);
+ if (put_mdev)
+ mlx5_ib_put_native_port_mdev(dev, port);
+
+ return err;
+}
+
+static int mlx5_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
+ u16 *pkey)
+{
switch (mlx5_get_vport_access_method(ibdev)) {
case MLX5_VPORT_ACCESS_METHOD_MAD:
return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey);
case MLX5_VPORT_ACCESS_METHOD_HCA:
case MLX5_VPORT_ACCESS_METHOD_NIC:
- return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
- pkey);
+ return mlx5_query_hca_nic_pkey(ibdev, port, index, pkey);
default:
return -EINVAL;
}
@@ -968,17 +1671,67 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
return err;
}
-static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
+static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u32 port_num, u32 mask,
+ u32 value)
+{
+ struct mlx5_hca_vport_context ctx = {};
+ struct mlx5_core_dev *mdev;
+ u32 mdev_port_num;
+ int err;
+
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
+ if (!mdev)
+ return -ENODEV;
+
+ err = mlx5_query_hca_vport_context(mdev, 0, mdev_port_num, 0, &ctx);
+ if (err)
+ goto out;
+
+ if (~ctx.cap_mask1_perm & mask) {
+ mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n",
+ mask, ctx.cap_mask1_perm);
+ err = -EINVAL;
+ goto out;
+ }
+
+ ctx.cap_mask1 = value;
+ ctx.cap_mask1_perm = mask;
+ err = mlx5_core_modify_hca_vport_context(mdev, 0, mdev_port_num,
+ 0, &ctx);
+
+out:
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+
+ return err;
+}
+
+static int mlx5_ib_modify_port(struct ib_device *ibdev, u32 port, int mask,
struct ib_port_modify *props)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct ib_port_attr attr;
u32 tmp;
int err;
+ u32 change_mask;
+ u32 value;
+ bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) ==
+ IB_LINK_LAYER_INFINIBAND);
+
+ /* CM layer calls ib_modify_port() regardless of the link layer. For
+ * Ethernet ports, qkey violation and Port capabilities are meaningless.
+ */
+ if (!is_ib)
+ return 0;
+
+ if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) {
+ change_mask = props->clr_port_cap_mask | props->set_port_cap_mask;
+ value = ~props->clr_port_cap_mask | props->set_port_cap_mask;
+ return set_port_caps_atomic(dev, port, change_mask, value);
+ }
mutex_lock(&dev->cap_mask_mutex);
- err = mlx5_ib_query_port(ibdev, port, &attr);
+ err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
@@ -992,225 +1745,518 @@ out:
return err;
}
-static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
+static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps)
{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_alloc_ucontext_req_v2 req = {};
- struct mlx5_ib_alloc_ucontext_resp resp = {};
- struct mlx5_ib_ucontext *context;
- struct mlx5_uuar_info *uuari;
- struct mlx5_uar *uars;
- int gross_uuars;
- int num_uars;
- int ver;
- int uuarn;
+ mlx5_ib_dbg(dev, "MLX5_LIB_CAP_4K_UAR = %s\n",
+ caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n");
+}
+
+static u16 calc_dynamic_bfregs(int uars_per_sys_page)
+{
+ /* Large page with non 4k uar support might limit the dynamic size */
+ if (uars_per_sys_page == 1 && PAGE_SIZE > 4096)
+ return MLX5_MIN_DYN_BFREGS;
+
+ return MLX5_MAX_DYN_BFREGS;
+}
+
+static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
+ struct mlx5_ib_alloc_ucontext_req_v2 *req,
+ struct mlx5_bfreg_info *bfregi)
+{
+ int uars_per_sys_page;
+ int bfregs_per_sys_page;
+ int ref_bfregs = req->total_num_bfregs;
+
+ if (req->total_num_bfregs == 0)
+ return -EINVAL;
+
+ BUILD_BUG_ON(MLX5_MAX_BFREGS % MLX5_NON_FP_BFREGS_IN_PAGE);
+ BUILD_BUG_ON(MLX5_MAX_BFREGS < MLX5_NON_FP_BFREGS_IN_PAGE);
+
+ if (req->total_num_bfregs > MLX5_MAX_BFREGS)
+ return -ENOMEM;
+
+ uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k);
+ bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR;
+ /* This holds the required static allocation asked by the user */
+ req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page);
+ if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
+ return -EINVAL;
+
+ bfregi->num_static_sys_pages = req->total_num_bfregs / bfregs_per_sys_page;
+ bfregi->num_dyn_bfregs = ALIGN(calc_dynamic_bfregs(uars_per_sys_page), bfregs_per_sys_page);
+ bfregi->total_num_bfregs = req->total_num_bfregs + bfregi->num_dyn_bfregs;
+ bfregi->num_sys_pages = bfregi->total_num_bfregs / bfregs_per_sys_page;
+
+ mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, total bfregs %d, using %d sys pages\n",
+ MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no",
+ lib_uar_4k ? "yes" : "no", ref_bfregs,
+ req->total_num_bfregs, bfregi->total_num_bfregs,
+ bfregi->num_sys_pages);
+
+ return 0;
+}
+
+static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
+{
+ struct mlx5_bfreg_info *bfregi;
int err;
int i;
- size_t reqlen;
- size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
- max_cqe_version);
- if (!dev->ib_active)
- return ERR_PTR(-EAGAIN);
+ bfregi = &context->bfregi;
+ for (i = 0; i < bfregi->num_static_sys_pages; i++) {
+ err = mlx5_cmd_uar_alloc(dev->mdev, &bfregi->sys_pages[i],
+ context->devx_uid);
+ if (err)
+ goto error;
- if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr))
- return ERR_PTR(-EINVAL);
+ mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]);
+ }
- reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
- if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
- ver = 0;
- else if (reqlen >= min_req_v2)
- ver = 2;
- else
- return ERR_PTR(-EINVAL);
+ for (i = bfregi->num_static_sys_pages; i < bfregi->num_sys_pages; i++)
+ bfregi->sys_pages[i] = MLX5_IB_INVALID_UAR_INDEX;
+
+ return 0;
+
+error:
+ for (--i; i >= 0; i--)
+ if (mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i],
+ context->devx_uid))
+ mlx5_ib_warn(dev, "failed to free uar %d\n", i);
+
+ return err;
+}
+
+static void deallocate_uars(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_ucontext *context)
+{
+ struct mlx5_bfreg_info *bfregi;
+ int i;
+
+ bfregi = &context->bfregi;
+ for (i = 0; i < bfregi->num_sys_pages; i++)
+ if (i < bfregi->num_static_sys_pages ||
+ bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX)
+ mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i],
+ context->devx_uid);
+}
+
+static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave,
+ struct mlx5_ib_lb_state *lb_state)
+{
+ int err;
- err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
+ err = mlx5_nic_vport_update_local_lb(master, true);
if (err)
- return ERR_PTR(err);
+ return err;
- if (req.flags)
- return ERR_PTR(-EINVAL);
+ err = mlx5_nic_vport_update_local_lb(slave, true);
+ if (err)
+ goto out;
- if (req.total_num_uuars > MLX5_MAX_UUARS)
- return ERR_PTR(-ENOMEM);
+ lb_state->force_enable = true;
+ return 0;
- if (req.total_num_uuars == 0)
- return ERR_PTR(-EINVAL);
+out:
+ mlx5_nic_vport_update_local_lb(master, false);
+ return err;
+}
- if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
- return ERR_PTR(-EOPNOTSUPP);
+static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave,
+ struct mlx5_ib_lb_state *lb_state)
+{
+ mlx5_nic_vport_update_local_lb(slave, false);
+ mlx5_nic_vport_update_local_lb(master, false);
- if (reqlen > sizeof(req) &&
- !ib_is_udata_cleared(udata, sizeof(req),
- reqlen - sizeof(req)))
- return ERR_PTR(-EOPNOTSUPP);
+ lb_state->force_enable = false;
+}
- req.total_num_uuars = ALIGN(req.total_num_uuars,
- MLX5_NON_FP_BF_REGS_PER_PAGE);
- if (req.num_low_latency_uuars > req.total_num_uuars - 1)
- return ERR_PTR(-EINVAL);
-
- num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
- gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
- resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
- if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
- resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
- resp.cache_line_size = cache_line_size();
- resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
- resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
- resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
- resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
- resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
- resp.cqe_version = min_t(__u8,
- (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
- req.max_cqe_version);
- resp.response_length = min(offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length), udata->outlen);
+int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
+{
+ int err = 0;
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return ERR_PTR(-ENOMEM);
+ if (dev->lb.force_enable)
+ return 0;
- uuari = &context->uuari;
- mutex_init(&uuari->lock);
- uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
- if (!uars) {
- err = -ENOMEM;
- goto out_ctx;
+ mutex_lock(&dev->lb.mutex);
+ if (td)
+ dev->lb.user_td++;
+ if (qp)
+ dev->lb.qps++;
+
+ if (dev->lb.user_td == 2 ||
+ dev->lb.qps == 1) {
+ if (!dev->lb.enabled) {
+ err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
+ dev->lb.enabled = true;
+ }
}
- uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
- sizeof(*uuari->bitmap),
- GFP_KERNEL);
- if (!uuari->bitmap) {
- err = -ENOMEM;
- goto out_uar_ctx;
- }
- /*
- * clear all fast path uuars
- */
- for (i = 0; i < gross_uuars; i++) {
- uuarn = i & 3;
- if (uuarn == 2 || uuarn == 3)
- set_bit(i, uuari->bitmap);
- }
+ mutex_unlock(&dev->lb.mutex);
- uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
- if (!uuari->count) {
- err = -ENOMEM;
- goto out_bitmap;
- }
+ return err;
+}
- for (i = 0; i < num_uars; i++) {
- err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
- if (err)
- goto out_count;
+void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
+{
+ if (dev->lb.force_enable)
+ return;
+
+ mutex_lock(&dev->lb.mutex);
+ if (td)
+ dev->lb.user_td--;
+ if (qp)
+ dev->lb.qps--;
+
+ if (dev->lb.user_td == 1 &&
+ dev->lb.qps == 0) {
+ if (dev->lb.enabled) {
+ mlx5_nic_vport_update_local_lb(dev->mdev, false);
+ dev->lb.enabled = false;
+ }
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
-#endif
+ mutex_unlock(&dev->lb.mutex);
+}
+
+static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn,
+ u16 uid)
+{
+ int err;
- if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
- err = mlx5_core_alloc_transport_domain(dev->mdev,
- &context->tdn);
- if (err)
- goto out_uars;
- }
+ if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ return 0;
- INIT_LIST_HEAD(&context->vma_private_list);
- INIT_LIST_HEAD(&context->db_page_list);
- mutex_init(&context->db_page_mutex);
+ err = mlx5_cmd_alloc_transport_domain(dev->mdev, tdn, uid);
+ if (err)
+ return err;
- resp.tot_uuars = req.total_num_uuars;
- resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
+ if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
+ (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
+ !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
+ return err;
- if (field_avail(typeof(resp), cqe_version, udata->outlen))
- resp.response_length += sizeof(resp.cqe_version);
+ return mlx5_ib_enable_lb(dev, true, false);
+}
- if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
- resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
+static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
+ u16 uid)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ return;
+
+ mlx5_cmd_dealloc_transport_domain(dev->mdev, tdn, uid);
+
+ if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
+ (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
+ !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
+ return;
+
+ mlx5_ib_disable_lb(dev, true, false);
+}
+
+static int set_ucontext_resp(struct ib_ucontext *uctx,
+ struct mlx5_ib_alloc_ucontext_resp *resp)
+{
+ struct ib_device *ibdev = uctx->device;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_ucontext *context = to_mucontext(uctx);
+ struct mlx5_bfreg_info *bfregi = &context->bfregi;
+
+ if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
+ resp->dump_fill_mkey = dev->mkeys.dump_fill_mkey;
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
+ }
+
+ resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
+ if (mlx5_wc_support_get(dev->mdev))
+ resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_bf_reg_size);
+ resp->cache_line_size = cache_line_size();
+ resp->max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
+ resp->max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
+ resp->max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
+ resp->max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
+ resp->max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ resp->cqe_version = context->cqe_version;
+ resp->log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
+ resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_CAP_GEN(dev->mdev,
+ num_of_uars_per_page) : 1;
+ resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 :
+ bfregi->total_num_bfregs - bfregi->num_dyn_bfregs;
+ resp->num_ports = dev->num_ports;
+ resp->cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
- resp.response_length += sizeof(resp.cmds_supp_uhw);
+
+ if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
+ mlx5_query_min_inline(dev->mdev, &resp->eth_min_inline);
+ resp->eth_min_inline++;
}
+ if (dev->mdev->clock_info)
+ resp->clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
+
/*
* We don't want to expose information from the PCI bar that is located
* after 4096 bytes, so if the arch only supports larger pages, let's
* pretend we don't support reading the HCA's core clock. This is also
* forced by mmap function.
*/
- if (PAGE_SIZE <= 4096 &&
- field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
- resp.comp_mask |=
+ if (PAGE_SIZE <= 4096) {
+ resp->comp_mask |=
MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
- resp.hca_core_clock_offset =
- offsetof(struct mlx5_init_seg, internal_timer_h) %
- PAGE_SIZE;
- resp.response_length += sizeof(resp.hca_core_clock_offset) +
- sizeof(resp.reserved2);
+ resp->hca_core_clock_offset =
+ offsetof(struct mlx5_init_seg,
+ internal_timer_h) % PAGE_SIZE;
}
+ if (MLX5_CAP_GEN(dev->mdev, ece_support))
+ resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE;
+
+ if (rt_supported(MLX5_CAP_GEN(dev->mdev, sq_ts_format)) &&
+ rt_supported(MLX5_CAP_GEN(dev->mdev, rq_ts_format)) &&
+ rt_supported(MLX5_CAP_ROCE(dev->mdev, qp_ts_format)))
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_REAL_TIME_TS;
+
+ resp->num_dyn_bfregs = bfregi->num_dyn_bfregs;
+
+ if (MLX5_CAP_GEN(dev->mdev, drain_sigerr))
+ resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS;
+
+ resp->comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_MKEY_UPDATE_TAG;
+
+ return 0;
+}
+
+static bool uctx_rdma_ctrl_is_enabled(u64 enabled_caps)
+{
+ return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL) ||
+ UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+}
+
+static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
+ struct ib_udata *udata)
+{
+ struct ib_device *ibdev = uctx->device;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_alloc_ucontext_req_v2 req = {};
+ struct mlx5_ib_alloc_ucontext_resp resp = {};
+ struct mlx5_ib_ucontext *context = to_mucontext(uctx);
+ struct mlx5_bfreg_info *bfregi;
+ int ver;
+ int err;
+ size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
+ max_cqe_version);
+ bool lib_uar_4k;
+ bool lib_uar_dyn;
+
+ if (!dev->ib_active)
+ return -EAGAIN;
+
+ if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
+ ver = 0;
+ else if (udata->inlen >= min_req_v2)
+ ver = 2;
+ else
+ return -EINVAL;
+
+ err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
+ if (err)
+ return err;
+
+ if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX)
+ return -EOPNOTSUPP;
+
+ if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
+ return -EOPNOTSUPP;
+
+ req.total_num_bfregs = ALIGN(req.total_num_bfregs,
+ MLX5_NON_FP_BFREGS_PER_UAR);
+ if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
+ return -EINVAL;
+
+ if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
+ err = mlx5_ib_devx_create(dev, true, uctx->enabled_caps);
+ if (err < 0)
+ goto out_ctx;
+ context->devx_uid = err;
+
+ if (uctx_rdma_ctrl_is_enabled(uctx->enabled_caps)) {
+ err = mlx5_cmd_add_privileged_uid(dev->mdev,
+ context->devx_uid);
+ if (err)
+ goto out_devx;
+ }
+ }
+
+ lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
+ lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
+ bfregi = &context->bfregi;
+
+ if (lib_uar_dyn) {
+ bfregi->lib_uar_dyn = lib_uar_dyn;
+ goto uar_done;
+ }
+
+ /* updates req->total_num_bfregs */
+ err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
+ if (err)
+ goto out_ucap;
+
+ mutex_init(&bfregi->lock);
+ bfregi->lib_uar_4k = lib_uar_4k;
+ bfregi->count = kcalloc(bfregi->total_num_bfregs, sizeof(*bfregi->count),
+ GFP_KERNEL);
+ if (!bfregi->count) {
+ err = -ENOMEM;
+ goto out_ucap;
+ }
+
+ bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
+ sizeof(*bfregi->sys_pages),
+ GFP_KERNEL);
+ if (!bfregi->sys_pages) {
+ err = -ENOMEM;
+ goto out_count;
+ }
+
+ err = allocate_uars(dev, context);
+ if (err)
+ goto out_sys_pages;
+
+uar_done:
+ err = mlx5_ib_alloc_transport_domain(dev, &context->tdn,
+ context->devx_uid);
+ if (err)
+ goto out_uars;
+
+ INIT_LIST_HEAD(&context->db_page_list);
+ mutex_init(&context->db_page_mutex);
+
+ context->cqe_version = min_t(__u8,
+ (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
+ req.max_cqe_version);
+
+ err = set_ucontext_resp(uctx, &resp);
+ if (err)
+ goto out_mdev;
+
+ resp.response_length = min(udata->outlen, sizeof(resp));
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
- goto out_td;
+ goto out_mdev;
- uuari->ver = ver;
- uuari->num_low_latency_uuars = req.num_low_latency_uuars;
- uuari->uars = uars;
- uuari->num_uars = num_uars;
- context->cqe_version = resp.cqe_version;
+ bfregi->ver = ver;
+ bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
+ context->lib_caps = req.lib_caps;
+ print_lib_caps(dev, context->lib_caps);
- return &context->ibucontext;
+ if (mlx5_ib_lag_should_assign_affinity(dev)) {
+ u32 port = mlx5_core_native_port_num(dev->mdev) - 1;
-out_td:
- if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+ atomic_set(&context->tx_port_affinity,
+ atomic_add_return(
+ 1, &dev->port[port].roce.tx_port_affinity));
+ }
+
+ return 0;
+
+out_mdev:
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
out_uars:
- for (i--; i >= 0; i--)
- mlx5_cmd_free_uar(dev->mdev, uars[i].index);
+ deallocate_uars(dev, context);
+
+out_sys_pages:
+ kfree(bfregi->sys_pages);
+
out_count:
- kfree(uuari->count);
+ kfree(bfregi->count);
-out_bitmap:
- kfree(uuari->bitmap);
+out_ucap:
+ if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX &&
+ uctx_rdma_ctrl_is_enabled(uctx->enabled_caps))
+ mlx5_cmd_remove_privileged_uid(dev->mdev, context->devx_uid);
-out_uar_ctx:
- kfree(uars);
+out_devx:
+ if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
out_ctx:
- kfree(context);
- return ERR_PTR(err);
+ return err;
}
-static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+static int mlx5_ib_query_ucontext(struct ib_ucontext *ibcontext,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_alloc_ucontext_resp uctx_resp = {};
+ int ret;
+
+ ret = set_ucontext_resp(ibcontext, &uctx_resp);
+ if (ret)
+ return ret;
+
+ uctx_resp.response_length =
+ min_t(size_t,
+ uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX),
+ sizeof(uctx_resp));
+
+ ret = uverbs_copy_to_struct_or_zero(attrs,
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX,
+ &uctx_resp,
+ sizeof(uctx_resp));
+ return ret;
+}
+
+static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
- struct mlx5_uuar_info *uuari = &context->uuari;
- int i;
+ struct mlx5_bfreg_info *bfregi;
+
+ bfregi = &context->bfregi;
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
- if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+ deallocate_uars(dev, context);
+ kfree(bfregi->sys_pages);
+ kfree(bfregi->count);
- for (i = 0; i < uuari->num_uars; i++) {
- if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
- mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
+ if (context->devx_uid) {
+ if (uctx_rdma_ctrl_is_enabled(ibcontext->enabled_caps))
+ mlx5_cmd_remove_privileged_uid(dev->mdev,
+ context->devx_uid);
+ mlx5_ib_devx_destroy(dev, context->devx_uid);
}
+}
- kfree(uuari->count);
- kfree(uuari->bitmap);
- kfree(uuari->uars);
- kfree(context);
+static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
+ int uar_idx)
+{
+ int fw_uars_per_page;
- return 0;
+ fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
+
+ return (dev->mdev->bar_addr >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
}
-static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
+static u64 uar_index2paddress(struct mlx5_ib_dev *dev,
+ int uar_idx)
{
- return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
+ unsigned int fw_uars_per_page;
+
+ fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_UARS_IN_PAGE : 1;
+
+ return (dev->mdev->bar_addr + (uar_idx / fw_uars_per_page) * PAGE_SIZE);
}
static int get_command(unsigned long offset)
@@ -1228,123 +2274,15 @@ static int get_index(unsigned long offset)
return get_arg(offset);
}
-static void mlx5_ib_vma_open(struct vm_area_struct *area)
+/* Index resides in an extra byte to enable larger values than 255 */
+static int get_extended_index(unsigned long offset)
{
- /* vma_open is called when a new VMA is created on top of our VMA. This
- * is done through either mremap flow or split_vma (usually due to
- * mlock, madvise, munmap, etc.) We do not support a clone of the VMA,
- * as this VMA is strongly hardware related. Therefore we set the
- * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
- * calling us again and trying to do incorrect actions. We assume that
- * the original VMA size is exactly a single page, and therefore all
- * "splitting" operation will not happen to it.
- */
- area->vm_ops = NULL;
+ return get_arg(offset) | ((offset >> 16) & 0xff) << 8;
}
-static void mlx5_ib_vma_close(struct vm_area_struct *area)
-{
- struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data;
-
- /* It's guaranteed that all VMAs opened on a FD are closed before the
- * file itself is closed, therefore no sync is needed with the regular
- * closing flow. (e.g. mlx5 ib_dealloc_ucontext)
- * However need a sync with accessing the vma as part of
- * mlx5_ib_disassociate_ucontext.
- * The close operation is usually called under mm->mmap_sem except when
- * process is exiting.
- * The exiting case is handled explicitly as part of
- * mlx5_ib_disassociate_ucontext.
- */
- mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data;
-
- /* setting the vma context pointer to null in the mlx5_ib driver's
- * private data, to protect a race condition in
- * mlx5_ib_disassociate_ucontext().
- */
- mlx5_ib_vma_priv_data->vma = NULL;
- list_del(&mlx5_ib_vma_priv_data->list);
- kfree(mlx5_ib_vma_priv_data);
-}
-
-static const struct vm_operations_struct mlx5_ib_vm_ops = {
- .open = mlx5_ib_vma_open,
- .close = mlx5_ib_vma_close
-};
-
-static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
- struct mlx5_ib_ucontext *ctx)
-{
- struct mlx5_ib_vma_private_data *vma_prv;
- struct list_head *vma_head = &ctx->vma_private_list;
-
- vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL);
- if (!vma_prv)
- return -ENOMEM;
-
- vma_prv->vma = vma;
- vma->vm_private_data = vma_prv;
- vma->vm_ops = &mlx5_ib_vm_ops;
-
- list_add(&vma_prv->list, vma_head);
-
- return 0;
-}
static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
- int ret;
- struct vm_area_struct *vma;
- struct mlx5_ib_vma_private_data *vma_private, *n;
- struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
- struct task_struct *owning_process = NULL;
- struct mm_struct *owning_mm = NULL;
-
- owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
- if (!owning_process)
- return;
-
- owning_mm = get_task_mm(owning_process);
- if (!owning_mm) {
- pr_info("no mm, disassociate ucontext is pending task termination\n");
- while (1) {
- put_task_struct(owning_process);
- usleep_range(1000, 2000);
- owning_process = get_pid_task(ibcontext->tgid,
- PIDTYPE_PID);
- if (!owning_process ||
- owning_process->state == TASK_DEAD) {
- pr_info("disassociate ucontext done, task was terminated\n");
- /* in case task was dead need to release the
- * task struct.
- */
- if (owning_process)
- put_task_struct(owning_process);
- return;
- }
- }
- }
-
- /* need to protect from a race on closing the vma as part of
- * mlx5_ib_vma_close.
- */
- down_read(&owning_mm->mmap_sem);
- list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
- list) {
- vma = vma_private->vma;
- ret = zap_vma_ptes(vma, vma->vm_start,
- PAGE_SIZE);
- WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__);
- /* context going to be destroyed, should
- * not access ops any more.
- */
- vma->vm_ops = NULL;
- list_del(&vma_private->list);
- kfree(vma_private);
- }
- up_read(&owning_mm->mmap_sem);
- mmput(owning_mm);
- put_task_struct(owning_process);
}
static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
@@ -1356,8 +2294,61 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
return "best effort WC";
case MLX5_IB_MMAP_NC_PAGE:
return "NC";
+ case MLX5_IB_MMAP_DEVICE_MEM:
+ return "Device Memory";
+ default:
+ return "Unknown";
+ }
+}
+
+static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
+ struct vm_area_struct *vma,
+ struct mlx5_ib_ucontext *context)
+{
+ if ((vma->vm_end - vma->vm_start != PAGE_SIZE) ||
+ !(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ if (get_index(vma->vm_pgoff) != MLX5_IB_CLOCK_INFO_V1)
+ return -EOPNOTSUPP;
+
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+ return -EPERM;
+ vm_flags_clear(vma, VM_MAYWRITE);
+
+ if (!dev->mdev->clock_info)
+ return -EOPNOTSUPP;
+
+ return vm_insert_page(vma, vma->vm_start,
+ virt_to_page(dev->mdev->clock_info));
+}
+
+static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
+{
+ struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
+ struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
+ struct mlx5_var_table *var_table = &dev->var_table;
+ struct mlx5_ib_ucontext *context = to_mucontext(entry->ucontext);
+
+ switch (mentry->mmap_flag) {
+ case MLX5_IB_MMAP_TYPE_MEMIC:
+ case MLX5_IB_MMAP_TYPE_MEMIC_OP:
+ mlx5_ib_dm_mmap_free(dev, mentry);
+ break;
+ case MLX5_IB_MMAP_TYPE_VAR:
+ mutex_lock(&var_table->bitmap_lock);
+ clear_bit(mentry->page_idx, var_table->bitmap);
+ mutex_unlock(&var_table->bitmap_lock);
+ kfree(mentry);
+ break;
+ case MLX5_IB_MMAP_TYPE_UAR_WC:
+ case MLX5_IB_MMAP_TYPE_UAR_NC:
+ mlx5_cmd_uar_dealloc(dev->mdev, mentry->page_idx,
+ context->devx_uid);
+ kfree(mentry);
+ break;
default:
- return NULL;
+ WARN_ON(true);
}
}
@@ -1365,22 +2356,37 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
struct vm_area_struct *vma,
struct mlx5_ib_ucontext *context)
{
- struct mlx5_uuar_info *uuari = &context->uuari;
+ struct mlx5_bfreg_info *bfregi = &context->bfregi;
int err;
unsigned long idx;
- phys_addr_t pfn, pa;
+ phys_addr_t pfn;
pgprot_t prot;
+ u32 bfreg_dyn_idx = 0;
+ u32 uar_index;
+ int dyn_uar = (cmd == MLX5_IB_MMAP_ALLOC_WC);
+ int max_valid_idx = dyn_uar ? bfregi->num_sys_pages :
+ bfregi->num_static_sys_pages;
+
+ if (bfregi->lib_uar_dyn)
+ return -EINVAL;
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ if (dyn_uar)
+ idx = get_extended_index(vma->vm_pgoff) + bfregi->num_static_sys_pages;
+ else
+ idx = get_index(vma->vm_pgoff);
+
+ if (idx >= max_valid_idx) {
+ mlx5_ib_warn(dev, "invalid uar index %lu, max=%d\n",
+ idx, max_valid_idx);
+ return -EINVAL;
+ }
switch (cmd) {
case MLX5_IB_MMAP_WC_PAGE:
-/* Some architectures don't support WC memory */
-#if defined(CONFIG_X86)
- if (!pat_enabled())
- return -EPERM;
-#elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU)))
- return -EPERM;
-#endif
- /* fall through */
+ case MLX5_IB_MMAP_ALLOC_WC:
case MLX5_IB_MMAP_REGULAR_PAGE:
/* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */
prot = pgprot_writecombine(vma->vm_page_prot);
@@ -1392,30 +2398,117 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
return -EINVAL;
}
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
- return -EINVAL;
+ if (dyn_uar) {
+ int uars_per_page;
- idx = get_index(vma->vm_pgoff);
- if (idx >= uuari->num_uars)
- return -EINVAL;
+ uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k);
+ bfreg_dyn_idx = idx * (uars_per_page * MLX5_NON_FP_BFREGS_PER_UAR);
+ if (bfreg_dyn_idx >= bfregi->total_num_bfregs) {
+ mlx5_ib_warn(dev, "invalid bfreg_dyn_idx %u, max=%u\n",
+ bfreg_dyn_idx, bfregi->total_num_bfregs);
+ return -EINVAL;
+ }
+
+ mutex_lock(&bfregi->lock);
+ /* Fail if uar already allocated, first bfreg index of each
+ * page holds its count.
+ */
+ if (bfregi->count[bfreg_dyn_idx]) {
+ mlx5_ib_warn(dev, "wrong offset, idx %lu is busy, bfregn=%u\n", idx, bfreg_dyn_idx);
+ mutex_unlock(&bfregi->lock);
+ return -EINVAL;
+ }
+
+ bfregi->count[bfreg_dyn_idx]++;
+ mutex_unlock(&bfregi->lock);
+
+ err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index,
+ context->devx_uid);
+ if (err) {
+ mlx5_ib_warn(dev, "UAR alloc failed\n");
+ goto free_bfreg;
+ }
+ } else {
+ uar_index = bfregi->sys_pages[idx];
+ }
- pfn = uar_index2pfn(dev, uuari->uars[idx].index);
+ pfn = uar_index2pfn(dev, uar_index);
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
- vma->vm_page_prot = prot;
- err = io_remap_pfn_range(vma, vma->vm_start, pfn,
- PAGE_SIZE, vma->vm_page_prot);
+ err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE,
+ prot, NULL);
if (err) {
- mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n",
- err, vma->vm_start, &pfn, mmap_cmd2str(cmd));
- return -EAGAIN;
+ mlx5_ib_err(dev,
+ "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n",
+ err, mmap_cmd2str(cmd));
+ goto err;
}
- pa = pfn << PAGE_SHIFT;
- mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
- vma->vm_start, &pa);
+ if (dyn_uar)
+ bfregi->sys_pages[idx] = uar_index;
+ return 0;
+
+err:
+ if (!dyn_uar)
+ return err;
+
+ mlx5_cmd_uar_dealloc(dev->mdev, idx, context->devx_uid);
+
+free_bfreg:
+ mlx5_ib_free_bfreg(dev, bfregi, bfreg_dyn_idx);
+
+ return err;
+}
+
+static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma)
+{
+ unsigned long idx;
+ u8 command;
+
+ command = get_command(vma->vm_pgoff);
+ idx = get_extended_index(vma->vm_pgoff);
- return mlx5_ib_set_vma_data(vma, context);
+ return (command << 16 | idx);
+}
+
+static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
+ struct vm_area_struct *vma,
+ struct ib_ucontext *ucontext)
+{
+ struct mlx5_user_mmap_entry *mentry;
+ struct rdma_user_mmap_entry *entry;
+ unsigned long pgoff;
+ pgprot_t prot;
+ phys_addr_t pfn;
+ int ret;
+
+ pgoff = mlx5_vma_to_pgoff(vma);
+ entry = rdma_user_mmap_entry_get_pgoff(ucontext, pgoff);
+ if (!entry)
+ return -EINVAL;
+
+ mentry = to_mmmap(entry);
+ pfn = (mentry->address >> PAGE_SHIFT);
+ if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR ||
+ mentry->mmap_flag == MLX5_IB_MMAP_TYPE_UAR_NC)
+ prot = pgprot_noncached(vma->vm_page_prot);
+ else
+ prot = pgprot_writecombine(vma->vm_page_prot);
+ ret = rdma_user_mmap_io(ucontext, vma, pfn,
+ entry->npages * PAGE_SIZE,
+ prot,
+ entry);
+ rdma_user_mmap_entry_put(&mentry->rdma_entry);
+ return ret;
+}
+
+static u64 mlx5_entry_to_mmap_offset(struct mlx5_user_mmap_entry *entry)
+{
+ u64 cmd = (entry->rdma_entry.start_pgoff >> 16) & 0xFFFF;
+ u64 index = entry->rdma_entry.start_pgoff & 0xFFFF;
+
+ return (((index >> 8) << 16) | (cmd << MLX5_IB_MMAP_CMD_SHIFT) |
+ (index & 0xFF)) << PAGE_SHIFT;
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
@@ -1428,6 +2521,10 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
command = get_command(vma->vm_pgoff);
switch (command) {
case MLX5_IB_MMAP_WC_PAGE:
+ case MLX5_IB_MMAP_ALLOC_WC:
+ if (!mlx5_wc_support_get(dev->mdev))
+ return -EPERM;
+ fallthrough;
case MLX5_IB_MMAP_NC_PAGE:
case MLX5_IB_MMAP_REGULAR_PAGE:
return uar_mmap(dev, command, vma, context);
@@ -1441,785 +2538,85 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
if (vma->vm_flags & VM_WRITE)
return -EPERM;
+ vm_flags_clear(vma, VM_MAYWRITE);
/* Don't expose to user-space information it shouldn't have */
if (PAGE_SIZE > 4096)
return -EOPNOTSUPP;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
pfn = (dev->mdev->iseg_base +
offsetof(struct mlx5_init_seg, internal_timer_h)) >>
PAGE_SHIFT;
- if (io_remap_pfn_range(vma, vma->vm_start, pfn,
- PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
-
- mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
- vma->vm_start,
- (unsigned long long)pfn << PAGE_SHIFT);
- break;
+ return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
+ PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ NULL);
+ case MLX5_IB_MMAP_CLOCK_INFO:
+ return mlx5_ib_mmap_clock_info_page(dev, vma, context);
default:
- return -EINVAL;
+ return mlx5_ib_mmap_offset(dev, vma, ibcontext);
}
return 0;
}
-static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
+ struct mlx5_ib_pd *pd = to_mpd(ibpd);
+ struct ib_device *ibdev = ibpd->device;
struct mlx5_ib_alloc_pd_resp resp;
- struct mlx5_ib_pd *pd;
int err;
+ u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {};
+ u16 uid = 0;
+ struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
+
+ uid = context ? context->devx_uid : 0;
+ MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
+ MLX5_SET(alloc_pd_in, in, uid, uid);
+ err = mlx5_cmd_exec_inout(to_mdev(ibdev)->mdev, alloc_pd, in, out);
+ if (err)
+ return err;
- pd = kmalloc(sizeof(*pd), GFP_KERNEL);
- if (!pd)
- return ERR_PTR(-ENOMEM);
-
- err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
- if (err) {
- kfree(pd);
- return ERR_PTR(err);
- }
-
- if (context) {
+ pd->pdn = MLX5_GET(alloc_pd_out, out, pd);
+ pd->uid = uid;
+ if (udata) {
resp.pdn = pd->pdn;
if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
- mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
- kfree(pd);
- return ERR_PTR(-EFAULT);
+ mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid);
+ return -EFAULT;
}
}
- return &pd->ibpd;
+ return 0;
}
-static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
+static int mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
struct mlx5_ib_dev *mdev = to_mdev(pd->device);
struct mlx5_ib_pd *mpd = to_mpd(pd);
- mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
- kfree(mpd);
-
- return 0;
-}
-
-enum {
- MATCH_CRITERIA_ENABLE_OUTER_BIT,
- MATCH_CRITERIA_ENABLE_MISC_BIT,
- MATCH_CRITERIA_ENABLE_INNER_BIT
-};
-
-#define HEADER_IS_ZERO(match_criteria, headers) \
- !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
- 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
-
-static u8 get_match_criteria_enable(u32 *match_criteria)
-{
- u8 match_criteria_enable;
-
- match_criteria_enable =
- (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
- MATCH_CRITERIA_ENABLE_OUTER_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
- MATCH_CRITERIA_ENABLE_MISC_BIT;
- match_criteria_enable |=
- (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
- MATCH_CRITERIA_ENABLE_INNER_BIT;
-
- return match_criteria_enable;
-}
-
-static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
-{
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
-}
-
-static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val,
- bool inner)
-{
- if (inner) {
- MLX5_SET(fte_match_set_misc,
- misc_c, inner_ipv6_flow_label, mask);
- MLX5_SET(fte_match_set_misc,
- misc_v, inner_ipv6_flow_label, val);
- } else {
- MLX5_SET(fte_match_set_misc,
- misc_c, outer_ipv6_flow_label, mask);
- MLX5_SET(fte_match_set_misc,
- misc_v, outer_ipv6_flow_label, val);
- }
-}
-
-static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
-{
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
- MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
- MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
-}
-
-#define LAST_ETH_FIELD vlan_tag
-#define LAST_IB_FIELD sl
-#define LAST_IPV4_FIELD tos
-#define LAST_IPV6_FIELD traffic_class
-#define LAST_TCP_UDP_FIELD src_port
-#define LAST_TUNNEL_FIELD tunnel_id
-
-/* Field is the last supported field */
-#define FIELDS_NOT_SUPPORTED(filter, field)\
- memchr_inv((void *)&filter.field +\
- sizeof(filter.field), 0,\
- sizeof(filter) -\
- offsetof(typeof(filter), field) -\
- sizeof(filter.field))
-
-static int parse_flow_attr(u32 *match_c, u32 *match_v,
- const union ib_flow_spec *ib_spec)
-{
- void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
- misc_parameters);
- void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
- misc_parameters);
- void *headers_c;
- void *headers_v;
-
- if (ib_spec->type & IB_FLOW_SPEC_INNER) {
- headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
- inner_headers);
- headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
- inner_headers);
- } else {
- headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
- headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
- outer_headers);
- }
-
- switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
- case IB_FLOW_SPEC_ETH:
- if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
- return -ENOTSUPP;
-
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dmac_47_16),
- ib_spec->eth.mask.dst_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dmac_47_16),
- ib_spec->eth.val.dst_mac);
-
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- smac_47_16),
- ib_spec->eth.mask.src_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- smac_47_16),
- ib_spec->eth.val.src_mac);
-
- if (ib_spec->eth.mask.vlan_tag) {
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- vlan_tag, 1);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- vlan_tag, 1);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_vid, ntohs(ib_spec->eth.val.vlan_tag));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_cfi,
- ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_cfi,
- ntohs(ib_spec->eth.val.vlan_tag) >> 12);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- first_prio,
- ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- first_prio,
- ntohs(ib_spec->eth.val.vlan_tag) >> 13);
- }
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, ntohs(ib_spec->eth.mask.ether_type));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ntohs(ib_spec->eth.val.ether_type));
- break;
- case IB_FLOW_SPEC_IPV4:
- if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
- return -ENOTSUPP;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IP);
-
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.mask.src_ip,
- sizeof(ib_spec->ipv4.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.val.src_ip,
- sizeof(ib_spec->ipv4.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.mask.dst_ip,
- sizeof(ib_spec->ipv4.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- &ib_spec->ipv4.val.dst_ip,
- sizeof(ib_spec->ipv4.val.dst_ip));
-
- set_tos(headers_c, headers_v,
- ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
-
- set_proto(headers_c, headers_v,
- ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto);
- break;
- case IB_FLOW_SPEC_IPV6:
- if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
- return -ENOTSUPP;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- ethertype, ETH_P_IPV6);
-
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.mask.src_ip,
- sizeof(ib_spec->ipv6.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- src_ipv4_src_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.val.src_ip,
- sizeof(ib_spec->ipv6.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.mask.dst_ip,
- sizeof(ib_spec->ipv6.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
- dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
- &ib_spec->ipv6.val.dst_ip,
- sizeof(ib_spec->ipv6.val.dst_ip));
-
- set_tos(headers_c, headers_v,
- ib_spec->ipv6.mask.traffic_class,
- ib_spec->ipv6.val.traffic_class);
-
- set_proto(headers_c, headers_v,
- ib_spec->ipv6.mask.next_hdr,
- ib_spec->ipv6.val.next_hdr);
-
- set_flow_label(misc_params_c, misc_params_v,
- ntohl(ib_spec->ipv6.mask.flow_label),
- ntohl(ib_spec->ipv6.val.flow_label),
- ib_spec->type & IB_FLOW_SPEC_INNER);
-
- break;
- case IB_FLOW_SPEC_TCP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
- LAST_TCP_UDP_FIELD))
- return -ENOTSUPP;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
- 0xff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
- IPPROTO_TCP);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
- ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
- ntohs(ib_spec->tcp_udp.val.src_port));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
- ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
- ntohs(ib_spec->tcp_udp.val.dst_port));
- break;
- case IB_FLOW_SPEC_UDP:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
- LAST_TCP_UDP_FIELD))
- return -ENOTSUPP;
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
- 0xff);
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
- IPPROTO_UDP);
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
- ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
- ntohs(ib_spec->tcp_udp.val.src_port));
-
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
- ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
- ntohs(ib_spec->tcp_udp.val.dst_port));
- break;
- case IB_FLOW_SPEC_VXLAN_TUNNEL:
- if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
- LAST_TUNNEL_FIELD))
- return -ENOTSUPP;
-
- MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
- ntohl(ib_spec->tunnel.mask.tunnel_id));
- MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
- ntohl(ib_spec->tunnel.val.tunnel_id));
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* If a flow could catch both multicast and unicast packets,
- * it won't fall into the multicast flow steering table and this rule
- * could steal other multicast packets.
- */
-static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
-{
- struct ib_flow_spec_eth *eth_spec;
-
- if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
- ib_attr->size < sizeof(struct ib_flow_attr) +
- sizeof(struct ib_flow_spec_eth) ||
- ib_attr->num_of_specs < 1)
- return false;
-
- eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1);
- if (eth_spec->type != IB_FLOW_SPEC_ETH ||
- eth_spec->size != sizeof(*eth_spec))
- return false;
-
- return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
- is_multicast_ether_addr(eth_spec->val.dst_mac);
-}
-
-static bool is_valid_attr(const struct ib_flow_attr *flow_attr)
-{
- union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1);
- bool has_ipv4_spec = false;
- bool eth_type_ipv4 = true;
- unsigned int spec_index;
-
- /* Validate that ethertype is correct */
- for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- if (ib_spec->type == IB_FLOW_SPEC_ETH &&
- ib_spec->eth.mask.ether_type) {
- if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) &&
- ib_spec->eth.val.ether_type == htons(ETH_P_IP)))
- eth_type_ipv4 = false;
- } else if (ib_spec->type == IB_FLOW_SPEC_IPV4) {
- has_ipv4_spec = true;
- }
- ib_spec = (void *)ib_spec + ib_spec->size;
- }
- return !has_ipv4_spec || eth_type_ipv4;
-}
-
-static void put_flow_table(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *prio, bool ft_added)
-{
- prio->refcount -= !!ft_added;
- if (!prio->refcount) {
- mlx5_destroy_flow_table(prio->flow_table);
- prio->flow_table = NULL;
- }
-}
-
-static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
-{
- struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
- struct mlx5_ib_flow_handler *handler = container_of(flow_id,
- struct mlx5_ib_flow_handler,
- ibflow);
- struct mlx5_ib_flow_handler *iter, *tmp;
-
- mutex_lock(&dev->flow_db.lock);
-
- list_for_each_entry_safe(iter, tmp, &handler->list, list) {
- mlx5_del_flow_rules(iter->rule);
- put_flow_table(dev, iter->prio, true);
- list_del(&iter->list);
- kfree(iter);
- }
-
- mlx5_del_flow_rules(handler->rule);
- put_flow_table(dev, handler->prio, true);
- mutex_unlock(&dev->flow_db.lock);
-
- kfree(handler);
-
- return 0;
-}
-
-static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
-{
- priority *= 2;
- if (!dont_trap)
- priority++;
- return priority;
-}
-
-enum flow_table_type {
- MLX5_IB_FT_RX,
- MLX5_IB_FT_TX
-};
-
-#define MLX5_FS_MAX_TYPES 10
-#define MLX5_FS_MAX_ENTRIES 32000UL
-static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
- struct ib_flow_attr *flow_attr,
- enum flow_table_type ft_type)
-{
- bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
- struct mlx5_flow_namespace *ns = NULL;
- struct mlx5_ib_flow_prio *prio;
- struct mlx5_flow_table *ft;
- int num_entries;
- int num_groups;
- int priority;
- int err = 0;
-
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- if (flow_is_multicast_only(flow_attr) &&
- !dont_trap)
- priority = MLX5_IB_FLOW_MCAST_PRIO;
- else
- priority = ib_prio_to_core_prio(flow_attr->priority,
- dont_trap);
- ns = mlx5_get_flow_namespace(dev->mdev,
- MLX5_FLOW_NAMESPACE_BYPASS);
- num_entries = MLX5_FS_MAX_ENTRIES;
- num_groups = MLX5_FS_MAX_TYPES;
- prio = &dev->flow_db.prios[priority];
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
- ns = mlx5_get_flow_namespace(dev->mdev,
- MLX5_FLOW_NAMESPACE_LEFTOVERS);
- build_leftovers_ft_param(&priority,
- &num_entries,
- &num_groups);
- prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO];
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- if (!MLX5_CAP_FLOWTABLE(dev->mdev,
- allow_sniffer_and_nic_rx_shared_tir))
- return ERR_PTR(-ENOTSUPP);
-
- ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ?
- MLX5_FLOW_NAMESPACE_SNIFFER_RX :
- MLX5_FLOW_NAMESPACE_SNIFFER_TX);
-
- prio = &dev->flow_db.sniffer[ft_type];
- priority = 0;
- num_entries = 1;
- num_groups = 1;
- }
-
- if (!ns)
- return ERR_PTR(-ENOTSUPP);
-
- ft = prio->flow_table;
- if (!ft) {
- ft = mlx5_create_auto_grouped_flow_table(ns, priority,
- num_entries,
- num_groups,
- 0, 0);
-
- if (!IS_ERR(ft)) {
- prio->refcount = 0;
- prio->flow_table = ft;
- } else {
- err = PTR_ERR(ft);
- }
- }
-
- return err ? ERR_PTR(err) : prio;
+ return mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
}
-static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
-{
- struct mlx5_flow_table *ft = ft_prio->flow_table;
- struct mlx5_ib_flow_handler *handler;
- struct mlx5_flow_act flow_act = {0};
- struct mlx5_flow_spec *spec;
- const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
- unsigned int spec_index;
- int err = 0;
-
- if (!is_valid_attr(flow_attr))
- return ERR_PTR(-EINVAL);
-
- spec = mlx5_vzalloc(sizeof(*spec));
- handler = kzalloc(sizeof(*handler), GFP_KERNEL);
- if (!handler || !spec) {
- err = -ENOMEM;
- goto free;
- }
-
- INIT_LIST_HEAD(&handler->list);
-
- for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
- err = parse_flow_attr(spec->match_criteria,
- spec->match_value, ib_flow);
- if (err < 0)
- goto free;
-
- ib_flow += ((union ib_flow_spec *)ib_flow)->size;
- }
-
- spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
- flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
- MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
- flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
- handler->rule = mlx5_add_flow_rules(ft, spec,
- &flow_act,
- dst, 1);
-
- if (IS_ERR(handler->rule)) {
- err = PTR_ERR(handler->rule);
- goto free;
- }
-
- ft_prio->refcount++;
- handler->prio = ft_prio;
-
- ft_prio->flow_table = ft;
-free:
- if (err)
- kfree(handler);
- kvfree(spec);
- return err ? ERR_PTR(err) : handler;
-}
-
-static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
-{
- struct mlx5_ib_flow_handler *handler_dst = NULL;
- struct mlx5_ib_flow_handler *handler = NULL;
-
- handler = create_flow_rule(dev, ft_prio, flow_attr, NULL);
- if (!IS_ERR(handler)) {
- handler_dst = create_flow_rule(dev, ft_prio,
- flow_attr, dst);
- if (IS_ERR(handler_dst)) {
- mlx5_del_flow_rules(handler->rule);
- ft_prio->refcount--;
- kfree(handler);
- handler = handler_dst;
- } else {
- list_add(&handler_dst->list, &handler->list);
- }
- }
-
- return handler;
-}
-enum {
- LEFTOVERS_MC,
- LEFTOVERS_UC,
-};
-
-static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_prio,
- struct ib_flow_attr *flow_attr,
- struct mlx5_flow_destination *dst)
-{
- struct mlx5_ib_flow_handler *handler_ucast = NULL;
- struct mlx5_ib_flow_handler *handler = NULL;
-
- static struct {
- struct ib_flow_attr flow_attr;
- struct ib_flow_spec_eth eth_flow;
- } leftovers_specs[] = {
- [LEFTOVERS_MC] = {
- .flow_attr = {
- .num_of_specs = 1,
- .size = sizeof(leftovers_specs[0])
- },
- .eth_flow = {
- .type = IB_FLOW_SPEC_ETH,
- .size = sizeof(struct ib_flow_spec_eth),
- .mask = {.dst_mac = {0x1} },
- .val = {.dst_mac = {0x1} }
- }
- },
- [LEFTOVERS_UC] = {
- .flow_attr = {
- .num_of_specs = 1,
- .size = sizeof(leftovers_specs[0])
- },
- .eth_flow = {
- .type = IB_FLOW_SPEC_ETH,
- .size = sizeof(struct ib_flow_spec_eth),
- .mask = {.dst_mac = {0x1} },
- .val = {.dst_mac = {} }
- }
- }
- };
-
- handler = create_flow_rule(dev, ft_prio,
- &leftovers_specs[LEFTOVERS_MC].flow_attr,
- dst);
- if (!IS_ERR(handler) &&
- flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
- handler_ucast = create_flow_rule(dev, ft_prio,
- &leftovers_specs[LEFTOVERS_UC].flow_attr,
- dst);
- if (IS_ERR(handler_ucast)) {
- mlx5_del_flow_rules(handler->rule);
- ft_prio->refcount--;
- kfree(handler);
- handler = handler_ucast;
- } else {
- list_add(&handler_ucast->list, &handler->list);
- }
- }
-
- return handler;
-}
-
-static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
- struct mlx5_ib_flow_prio *ft_rx,
- struct mlx5_ib_flow_prio *ft_tx,
- struct mlx5_flow_destination *dst)
-{
- struct mlx5_ib_flow_handler *handler_rx;
- struct mlx5_ib_flow_handler *handler_tx;
- int err;
- static const struct ib_flow_attr flow_attr = {
- .num_of_specs = 0,
- .size = sizeof(flow_attr)
- };
-
- handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst);
- if (IS_ERR(handler_rx)) {
- err = PTR_ERR(handler_rx);
- goto err;
- }
-
- handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst);
- if (IS_ERR(handler_tx)) {
- err = PTR_ERR(handler_tx);
- goto err_tx;
- }
-
- list_add(&handler_tx->list, &handler_rx->list);
-
- return handler_rx;
-
-err_tx:
- mlx5_del_flow_rules(handler_rx->rule);
- ft_rx->refcount--;
- kfree(handler_rx);
-err:
- return ERR_PTR(err);
-}
-
-static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr,
- int domain)
+static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->device);
- struct mlx5_ib_qp *mqp = to_mqp(qp);
- struct mlx5_ib_flow_handler *handler = NULL;
- struct mlx5_flow_destination *dst = NULL;
- struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
- struct mlx5_ib_flow_prio *ft_prio;
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(ibqp);
int err;
+ u16 uid;
- if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
- return ERR_PTR(-ENOSPC);
-
- if (domain != IB_FLOW_DOMAIN_USER ||
- flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
- (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
- return ERR_PTR(-EINVAL);
+ uid = ibqp->pd ?
+ to_mpd(ibqp->pd)->uid : 0;
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
- if (!dst)
- return ERR_PTR(-ENOMEM);
-
- mutex_lock(&dev->flow_db.lock);
-
- ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX);
- if (IS_ERR(ft_prio)) {
- err = PTR_ERR(ft_prio);
- goto unlock;
- }
- if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX);
- if (IS_ERR(ft_prio_tx)) {
- err = PTR_ERR(ft_prio_tx);
- ft_prio_tx = NULL;
- goto destroy_ft;
- }
- }
-
- dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- if (mqp->flags & MLX5_IB_QP_RSS)
- dst->tir_num = mqp->rss_qp.tirn;
- else
- dst->tir_num = mqp->raw_packet_qp.rq.tirn;
-
- if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
- if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) {
- handler = create_dont_trap_rule(dev, ft_prio,
- flow_attr, dst);
- } else {
- handler = create_flow_rule(dev, ft_prio, flow_attr,
- dst);
- }
- } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
- flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
- handler = create_leftovers_rule(dev, ft_prio, flow_attr,
- dst);
- } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) {
- handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst);
- } else {
- err = -EINVAL;
- goto destroy_ft;
- }
-
- if (IS_ERR(handler)) {
- err = PTR_ERR(handler);
- handler = NULL;
- goto destroy_ft;
+ if (mqp->flags & IB_QP_CREATE_SOURCE_QPN) {
+ mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
+ return -EOPNOTSUPP;
}
- mutex_unlock(&dev->flow_db.lock);
- kfree(dst);
-
- return &handler->ibflow;
-
-destroy_ft:
- put_flow_table(dev, ft_prio, false);
- if (ft_prio_tx)
- put_flow_table(dev, ft_prio_tx, false);
-unlock:
- mutex_unlock(&dev->flow_db.lock);
- kfree(dst);
- kfree(handler);
- return ERR_PTR(err);
-}
-
-static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- int err;
-
- err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
+ err = mlx5_cmd_attach_mcg(dev->mdev, gid, ibqp->qp_num, uid);
if (err)
mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
ibqp->qp_num, gid->raw);
@@ -2231,8 +2628,11 @@ static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
int err;
+ u16 uid;
- err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
+ uid = ibqp->pd ?
+ to_mpd(ibqp->pd)->uid : 0;
+ err = mlx5_cmd_detach_mcg(dev->mdev, gid, ibqp->qp_num, uid);
if (err)
mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
ibqp->qp_num, gid->raw);
@@ -2253,61 +2653,68 @@ static int init_node_data(struct mlx5_ib_dev *dev)
return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
}
-static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t fw_pages_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
+ return sysfs_emit(buf, "%d\n", dev->mdev->priv.fw_pages);
}
+static DEVICE_ATTR_RO(fw_pages);
-static ssize_t show_reg_pages(struct device *device,
+static ssize_t reg_pages_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
- return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
+ return sysfs_emit(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
}
+static DEVICE_ATTR_RO(reg_pages);
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
- return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
+
+ return sysfs_emit(buf, "MT%d\n", dev->mdev->pdev->device);
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
- return sprintf(buf, "%x\n", dev->mdev->rev_id);
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
+
+ return sysfs_emit(buf, "%x\n", dev->mdev->rev_id);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mlx5_ib_dev *dev =
- container_of(device, struct mlx5_ib_dev, ib_dev.dev);
- return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
- dev->mdev->board_id);
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
-static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
-
-static struct device_attribute *mlx5_class_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id,
- &dev_attr_fw_pages,
- &dev_attr_reg_pages,
+ rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
+
+ return sysfs_emit(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
+ dev->mdev->board_id);
+}
+static DEVICE_ATTR_RO(board_id);
+
+static struct attribute *mlx5_class_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ &dev_attr_fw_pages.attr,
+ &dev_attr_reg_pages.attr,
+ NULL,
+};
+
+static const struct attribute_group mlx5_attr_group = {
+ .attrs = mlx5_class_attributes,
};
static void pkey_change_handler(struct work_struct *work)
@@ -2316,9 +2723,14 @@ static void pkey_change_handler(struct work_struct *work)
container_of(work, struct mlx5_ib_port_resources,
pkey_change_work);
- mutex_lock(&ports->devr->mutex);
+ if (!ports->gsi)
+ /*
+ * We got this event before device was fully configured
+ * and MAD registration code wasn't called/finished yet.
+ */
+ return;
+
mlx5_ib_gsi_pkey_change(ports->gsi);
- mutex_unlock(&ports->devr->mutex);
}
static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
@@ -2375,73 +2787,130 @@ static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
* lock/unlock above locks Now need to arm all involved CQs.
*/
list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
- mcq->comp(mcq);
+ mcq->comp(mcq, NULL);
}
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
}
-static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
- enum mlx5_dev_event event, unsigned long param)
+static void delay_drop_handler(struct work_struct *work)
{
- struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
- struct ib_event ibev;
- bool fatal = false;
- u8 port = 0;
+ int err;
+ struct mlx5_ib_delay_drop *delay_drop =
+ container_of(work, struct mlx5_ib_delay_drop,
+ delay_drop_work);
- switch (event) {
- case MLX5_DEV_EVENT_SYS_ERROR:
- ibev.event = IB_EVENT_DEVICE_FATAL;
- mlx5_ib_handle_internal_error(ibdev);
- fatal = true;
+ atomic_inc(&delay_drop->events_cnt);
+
+ mutex_lock(&delay_drop->lock);
+ err = mlx5_core_set_delay_drop(delay_drop->dev, delay_drop->timeout);
+ if (err) {
+ mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n",
+ delay_drop->timeout);
+ delay_drop->activate = false;
+ }
+ mutex_unlock(&delay_drop->lock);
+}
+
+static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
+ struct ib_event *ibev)
+{
+ u32 port = (eqe->data.port.port >> 4) & 0xf;
+
+ switch (eqe->sub_type) {
+ case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
+ if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
+ IB_LINK_LAYER_ETHERNET)
+ schedule_work(&ibdev->delay_drop.delay_drop_work);
break;
+ default: /* do nothing */
+ return;
+ }
+}
- case MLX5_DEV_EVENT_PORT_UP:
- case MLX5_DEV_EVENT_PORT_DOWN:
- case MLX5_DEV_EVENT_PORT_INITIALIZED:
- port = (u8)param;
+static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
+ struct ib_event *ibev)
+{
+ u32 port = (eqe->data.port.port >> 4) & 0xf;
+ ibev->element.port_num = port;
+
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
/* In RoCE, port up/down events are handled in
* mlx5_netdev_event().
*/
if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
- IB_LINK_LAYER_ETHERNET)
- return;
+ IB_LINK_LAYER_ETHERNET)
+ return -EINVAL;
- ibev.event = (event == MLX5_DEV_EVENT_PORT_UP) ?
- IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ ibev->event = (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) ?
+ IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
break;
- case MLX5_DEV_EVENT_LID_CHANGE:
- ibev.event = IB_EVENT_LID_CHANGE;
- port = (u8)param;
+ case MLX5_PORT_CHANGE_SUBTYPE_LID:
+ ibev->event = IB_EVENT_LID_CHANGE;
break;
- case MLX5_DEV_EVENT_PKEY_CHANGE:
- ibev.event = IB_EVENT_PKEY_CHANGE;
- port = (u8)param;
-
+ case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
+ ibev->event = IB_EVENT_PKEY_CHANGE;
schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
break;
- case MLX5_DEV_EVENT_GUID_CHANGE:
- ibev.event = IB_EVENT_GID_CHANGE;
- port = (u8)param;
+ case MLX5_PORT_CHANGE_SUBTYPE_GUID:
+ ibev->event = IB_EVENT_GID_CHANGE;
break;
- case MLX5_DEV_EVENT_CLIENT_REREG:
- ibev.event = IB_EVENT_CLIENT_REREGISTER;
- port = (u8)param;
+ case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
+ ibev->event = IB_EVENT_CLIENT_REREGISTER;
break;
default:
- return;
+ return -EINVAL;
}
- ibev.device = &ibdev->ib_dev;
- ibev.element.port_num = port;
+ return 0;
+}
- if (port < 1 || port > ibdev->num_ports) {
- mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
- return;
+static void mlx5_ib_handle_event(struct work_struct *_work)
+{
+ struct mlx5_ib_event_work *work =
+ container_of(_work, struct mlx5_ib_event_work, work);
+ struct mlx5_ib_dev *ibdev;
+ struct ib_event ibev;
+ bool fatal = false;
+
+ if (work->is_slave) {
+ ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi);
+ if (!ibdev)
+ goto out;
+ } else {
+ ibdev = work->dev;
+ }
+
+ switch (work->event) {
+ case MLX5_DEV_EVENT_SYS_ERROR:
+ ibev.event = IB_EVENT_DEVICE_FATAL;
+ mlx5_ib_handle_internal_error(ibdev);
+ ibev.element.port_num = (u8)(unsigned long)work->param;
+ fatal = true;
+ break;
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ if (handle_port_change(ibdev, work->param, &ibev))
+ goto out;
+ break;
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
+ handle_general_event(ibdev, work->param, &ibev);
+ fallthrough;
+ default:
+ goto out;
+ }
+
+ ibev.device = &ibdev->ib_dev;
+
+ if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) {
+ mlx5_ib_warn(ibdev, "warning: event on port %d\n", ibev.element.port_num);
+ goto out;
}
if (ibdev->ib_active)
@@ -2449,338 +2918,380 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
if (fatal)
ibdev->ib_active = false;
+out:
+ kfree(work);
}
-static void get_ext_port_caps(struct mlx5_ib_dev *dev)
+static int mlx5_ib_event(struct notifier_block *nb,
+ unsigned long event, void *param)
{
- int port;
+ struct mlx5_ib_event_work *work;
- for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
- mlx5_query_ext_port_caps(dev, port);
-}
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return NOTIFY_DONE;
-static int get_port_caps(struct mlx5_ib_dev *dev)
-{
- struct ib_device_attr *dprops = NULL;
- struct ib_port_attr *pprops = NULL;
- int err = -ENOMEM;
- int port;
- struct ib_udata uhw = {.inlen = 0, .outlen = 0};
+ INIT_WORK(&work->work, mlx5_ib_handle_event);
+ work->dev = container_of(nb, struct mlx5_ib_dev, mdev_events);
+ work->is_slave = false;
+ work->param = param;
+ work->event = event;
- pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
- if (!pprops)
- goto out;
+ queue_work(mlx5_ib_event_wq, &work->work);
- dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
- if (!dprops)
- goto out;
+ return NOTIFY_OK;
+}
- err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
- if (err) {
- mlx5_ib_warn(dev, "query_device failed %d\n", err);
- goto out;
- }
+static int mlx5_ib_event_slave_port(struct notifier_block *nb,
+ unsigned long event, void *param)
+{
+ struct mlx5_ib_event_work *work;
- for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
- err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
- if (err) {
- mlx5_ib_warn(dev, "query_port %d failed %d\n",
- port, err);
- break;
- }
- dev->mdev->port_caps[port - 1].pkey_table_len =
- dprops->max_pkeys;
- dev->mdev->port_caps[port - 1].gid_table_len =
- pprops->gid_tbl_len;
- mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
- dprops->max_pkeys, pprops->gid_tbl_len);
- }
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return NOTIFY_DONE;
-out:
- kfree(pprops);
- kfree(dprops);
+ INIT_WORK(&work->work, mlx5_ib_handle_event);
+ work->mpi = container_of(nb, struct mlx5_ib_multiport_info, mdev_events);
+ work->is_slave = true;
+ work->param = param;
+ work->event = event;
+ queue_work(mlx5_ib_event_wq, &work->work);
- return err;
+ return NOTIFY_OK;
}
-static void destroy_umrc_res(struct mlx5_ib_dev *dev)
+static int mlx5_ib_get_plane_num(struct mlx5_core_dev *mdev, u8 *num_plane)
{
+ struct mlx5_hca_vport_context vport_ctx;
int err;
- err = mlx5_mr_cache_cleanup(dev);
+ *num_plane = 0;
+ if (!MLX5_CAP_GEN(mdev, ib_virt) || !MLX5_CAP_GEN_2(mdev, multiplane))
+ return 0;
+
+ err = mlx5_query_hca_vport_context(mdev, 0, 1, 0, &vport_ctx);
if (err)
- mlx5_ib_warn(dev, "mr cache cleanup failed\n");
+ return err;
- mlx5_ib_destroy_qp(dev->umrc.qp);
- ib_free_cq(dev->umrc.cq);
- ib_dealloc_pd(dev->umrc.pd);
+ *num_plane = vport_ctx.num_plane;
+ return 0;
}
-enum {
- MAX_UMR_WR = 128,
-};
-
-static int create_umr_res(struct mlx5_ib_dev *dev)
+static int set_has_smi_cap(struct mlx5_ib_dev *dev)
{
- struct ib_qp_init_attr *init_attr = NULL;
- struct ib_qp_attr *attr = NULL;
- struct ib_pd *pd;
- struct ib_cq *cq;
- struct ib_qp *qp;
- int ret;
+ struct mlx5_hca_vport_context vport_ctx;
+ int err;
+ int port;
- attr = kzalloc(sizeof(*attr), GFP_KERNEL);
- init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
- if (!attr || !init_attr) {
- ret = -ENOMEM;
- goto error_0;
- }
+ if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB)
+ return 0;
- pd = ib_alloc_pd(&dev->ib_dev, 0);
- if (IS_ERR(pd)) {
- mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
- ret = PTR_ERR(pd);
- goto error_0;
- }
+ for (port = 1; port <= dev->num_ports; port++) {
+ if (dev->num_plane) {
+ dev->port_caps[port - 1].has_smi = false;
+ continue;
+ } else if (!MLX5_CAP_GEN(dev->mdev, ib_virt) ||
+ dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
+ dev->port_caps[port - 1].has_smi = true;
+ continue;
+ }
- cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
- if (IS_ERR(cq)) {
- mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
- ret = PTR_ERR(cq);
- goto error_2;
- }
-
- init_attr->send_cq = cq;
- init_attr->recv_cq = cq;
- init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
- init_attr->cap.max_send_wr = MAX_UMR_WR;
- init_attr->cap.max_send_sge = 1;
- init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
- init_attr->port_num = 1;
- qp = mlx5_ib_create_qp(pd, init_attr, NULL);
- if (IS_ERR(qp)) {
- mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
- ret = PTR_ERR(qp);
- goto error_3;
- }
- qp->device = &dev->ib_dev;
- qp->real_qp = qp;
- qp->uobject = NULL;
- qp->qp_type = MLX5_IB_QPT_REG_UMR;
-
- attr->qp_state = IB_QPS_INIT;
- attr->port_num = 1;
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
- IB_QP_PORT, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
- goto error_4;
+ err = mlx5_query_hca_vport_context(dev->mdev, 0, port, 0,
+ &vport_ctx);
+ if (err) {
+ mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
+ port, err);
+ return err;
+ }
+ dev->port_caps[port - 1].has_smi = vport_ctx.has_smi;
}
- memset(attr, 0, sizeof(*attr));
- attr->qp_state = IB_QPS_RTR;
- attr->path_mtu = IB_MTU_256;
+ return 0;
+}
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
- goto error_4;
- }
+static void get_ext_port_caps(struct mlx5_ib_dev *dev)
+{
+ unsigned int port;
- memset(attr, 0, sizeof(*attr));
- attr->qp_state = IB_QPS_RTS;
- ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
- if (ret) {
- mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
- goto error_4;
+ rdma_for_each_port (&dev->ib_dev, port)
+ mlx5_query_ext_port_caps(dev, port);
+}
+
+static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
+{
+ switch (umr_fence_cap) {
+ case MLX5_CAP_UMR_FENCE_NONE:
+ return MLX5_FENCE_MODE_NONE;
+ case MLX5_CAP_UMR_FENCE_SMALL:
+ return MLX5_FENCE_MODE_INITIATOR_SMALL;
+ default:
+ return MLX5_FENCE_MODE_STRONG_ORDERING;
}
+}
- dev->umrc.qp = qp;
- dev->umrc.cq = cq;
- dev->umrc.pd = pd;
+int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_resources *devr = &dev->devr;
+ struct ib_cq_init_attr cq_attr = {.cqe = 1};
+ struct ib_device *ibdev;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
+ int ret = 0;
- sema_init(&dev->umrc.sem, MAX_UMR_WR);
- ret = mlx5_mr_cache_init(dev);
- if (ret) {
- mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
- goto error_4;
- }
- kfree(attr);
- kfree(init_attr);
+ /*
+ * devr->c0 is set once, never changed until device unload.
+ * Avoid taking the mutex if initialization is already done.
+ */
+ if (devr->c0)
+ return 0;
- return 0;
+ mutex_lock(&devr->cq_lock);
+ if (devr->c0)
+ goto unlock;
-error_4:
- mlx5_ib_destroy_qp(qp);
+ ibdev = &dev->ib_dev;
+ pd = ib_alloc_pd(ibdev, 0);
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
+ mlx5_ib_err(dev, "Couldn't allocate PD for res init, err=%pe\n",
+ pd);
+ goto unlock;
+ }
-error_3:
- ib_free_cq(cq);
+ cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ mlx5_ib_err(dev, "Couldn't create CQ for res init, err=%pe\n",
+ cq);
+ ib_dealloc_pd(pd);
+ goto unlock;
+ }
-error_2:
- ib_dealloc_pd(pd);
+ devr->p0 = pd;
+ devr->c0 = cq;
-error_0:
- kfree(attr);
- kfree(init_attr);
+unlock:
+ mutex_unlock(&devr->cq_lock);
return ret;
}
-static int create_dev_resources(struct mlx5_ib_resources *devr)
+int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev)
{
+ struct mlx5_ib_resources *devr = &dev->devr;
struct ib_srq_init_attr attr;
- struct mlx5_ib_dev *dev;
- struct ib_cq_init_attr cq_attr = {.cqe = 1};
- int port;
+ struct ib_srq *s0, *s1;
int ret = 0;
- dev = container_of(devr, struct mlx5_ib_dev, devr);
-
- mutex_init(&devr->mutex);
-
- devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
- if (IS_ERR(devr->p0)) {
- ret = PTR_ERR(devr->p0);
- goto error0;
- }
- devr->p0->device = &dev->ib_dev;
- devr->p0->uobject = NULL;
- atomic_set(&devr->p0->usecnt, 0);
-
- devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
- if (IS_ERR(devr->c0)) {
- ret = PTR_ERR(devr->c0);
- goto error1;
- }
- devr->c0->device = &dev->ib_dev;
- devr->c0->uobject = NULL;
- devr->c0->comp_handler = NULL;
- devr->c0->event_handler = NULL;
- devr->c0->cq_context = NULL;
- atomic_set(&devr->c0->usecnt, 0);
-
- devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
- if (IS_ERR(devr->x0)) {
- ret = PTR_ERR(devr->x0);
- goto error2;
- }
- devr->x0->device = &dev->ib_dev;
- devr->x0->inode = NULL;
- atomic_set(&devr->x0->usecnt, 0);
- mutex_init(&devr->x0->tgt_qp_mutex);
- INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
-
- devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
- if (IS_ERR(devr->x1)) {
- ret = PTR_ERR(devr->x1);
- goto error3;
- }
- devr->x1->device = &dev->ib_dev;
- devr->x1->inode = NULL;
- atomic_set(&devr->x1->usecnt, 0);
- mutex_init(&devr->x1->tgt_qp_mutex);
- INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
+ /*
+ * devr->s1 is set once, never changed until device unload.
+ * Avoid taking the mutex if initialization is already done.
+ */
+ if (devr->s1)
+ return 0;
+
+ mutex_lock(&devr->srq_lock);
+ if (devr->s1)
+ goto unlock;
+
+ ret = mlx5_ib_dev_res_cq_init(dev);
+ if (ret)
+ goto unlock;
memset(&attr, 0, sizeof(attr));
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_XRC;
- attr.ext.xrc.cq = devr->c0;
- attr.ext.xrc.xrcd = devr->x0;
-
- devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
- if (IS_ERR(devr->s0)) {
- ret = PTR_ERR(devr->s0);
- goto error4;
- }
- devr->s0->device = &dev->ib_dev;
- devr->s0->pd = devr->p0;
- devr->s0->uobject = NULL;
- devr->s0->event_handler = NULL;
- devr->s0->srq_context = NULL;
- devr->s0->srq_type = IB_SRQT_XRC;
- devr->s0->ext.xrc.xrcd = devr->x0;
- devr->s0->ext.xrc.cq = devr->c0;
- atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
- atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
- atomic_inc(&devr->p0->usecnt);
- atomic_set(&devr->s0->usecnt, 0);
+ attr.ext.cq = devr->c0;
+
+ s0 = ib_create_srq(devr->p0, &attr);
+ if (IS_ERR(s0)) {
+ ret = PTR_ERR(s0);
+ mlx5_ib_err(dev,
+ "Couldn't create SRQ 0 for res init, err=%pe\n",
+ s0);
+ goto unlock;
+ }
memset(&attr, 0, sizeof(attr));
attr.attr.max_sge = 1;
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_BASIC;
- devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
- if (IS_ERR(devr->s1)) {
- ret = PTR_ERR(devr->s1);
- goto error5;
- }
- devr->s1->device = &dev->ib_dev;
- devr->s1->pd = devr->p0;
- devr->s1->uobject = NULL;
- devr->s1->event_handler = NULL;
- devr->s1->srq_context = NULL;
- devr->s1->srq_type = IB_SRQT_BASIC;
- devr->s1->ext.xrc.cq = devr->c0;
- atomic_inc(&devr->p0->usecnt);
- atomic_set(&devr->s0->usecnt, 0);
-
- for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
- INIT_WORK(&devr->ports[port].pkey_change_work,
- pkey_change_handler);
- devr->ports[port].devr = devr;
+
+ s1 = ib_create_srq(devr->p0, &attr);
+ if (IS_ERR(s1)) {
+ ret = PTR_ERR(s1);
+ mlx5_ib_err(dev,
+ "Couldn't create SRQ 1 for res init, err=%pe\n",
+ s1);
+ ib_destroy_srq(s0);
}
- return 0;
+ devr->s0 = s0;
+ devr->s1 = s1;
-error5:
- mlx5_ib_destroy_srq(devr->s0);
-error4:
- mlx5_ib_dealloc_xrcd(devr->x1);
-error3:
- mlx5_ib_dealloc_xrcd(devr->x0);
-error2:
- mlx5_ib_destroy_cq(devr->c0);
-error1:
- mlx5_ib_dealloc_pd(devr->p0);
-error0:
+unlock:
+ mutex_unlock(&devr->srq_lock);
return ret;
}
-static void destroy_dev_resources(struct mlx5_ib_resources *devr)
+static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
{
- struct mlx5_ib_dev *dev =
- container_of(devr, struct mlx5_ib_dev, devr);
- int port;
+ struct mlx5_ib_resources *devr = &dev->devr;
+ int ret;
- mlx5_ib_destroy_srq(devr->s1);
- mlx5_ib_destroy_srq(devr->s0);
- mlx5_ib_dealloc_xrcd(devr->x0);
- mlx5_ib_dealloc_xrcd(devr->x1);
- mlx5_ib_destroy_cq(devr->c0);
- mlx5_ib_dealloc_pd(devr->p0);
+ if (!MLX5_CAP_GEN(dev->mdev, xrc))
+ return -EOPNOTSUPP;
- /* Make sure no change P_Key work items are still executing */
- for (port = 0; port < dev->num_ports; ++port)
- cancel_work_sync(&devr->ports[port].pkey_change_work);
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0);
+ if (ret)
+ return ret;
+
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0);
+ if (ret) {
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
+ return ret;
+ }
+
+ mutex_init(&devr->cq_lock);
+ mutex_init(&devr->srq_lock);
+
+ return 0;
+}
+
+static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_resources *devr = &dev->devr;
+
+ /* After s0/s1 init, they are not unset during the device lifetime. */
+ if (devr->s1) {
+ ib_destroy_srq(devr->s1);
+ ib_destroy_srq(devr->s0);
+ }
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
+ /* After p0/c0 init, they are not unset during the device lifetime. */
+ if (devr->c0) {
+ ib_destroy_cq(devr->c0);
+ ib_dealloc_pd(devr->p0);
+ }
+ mutex_destroy(&devr->cq_lock);
+ mutex_destroy(&devr->srq_lock);
+}
+
+static int
+mlx5_ib_create_data_direct_resources(struct mlx5_ib_dev *dev)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ bool ro_supp = false;
+ void *mkc;
+ u32 mkey;
+ u32 pdn;
+ u32 *in;
+ int err;
+
+ err = mlx5_core_alloc_pd(mdev, &pdn);
+ if (err)
+ return err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ MLX5_SET(create_mkey_in, in, data_direct, 1);
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, rw, 1);
+ MLX5_SET(mkc, mkc, rr, 1);
+ MLX5_SET(mkc, mkc, a, 1);
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ err = mlx5_core_create_mkey(mdev, &mkey, in, inlen);
+ if (err)
+ goto err_mkey;
+
+ dev->ddr.mkey = mkey;
+ dev->ddr.pdn = pdn;
+
+ /* create another mkey with RO support */
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) {
+ MLX5_SET(mkc, mkc, relaxed_ordering_write, 1);
+ ro_supp = true;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) {
+ MLX5_SET(mkc, mkc, relaxed_ordering_read, 1);
+ ro_supp = true;
+ }
+
+ if (ro_supp) {
+ err = mlx5_core_create_mkey(mdev, &mkey, in, inlen);
+ /* RO is defined as best effort */
+ if (!err) {
+ dev->ddr.mkey_ro = mkey;
+ dev->ddr.mkey_ro_valid = true;
+ }
+ }
+
+ kvfree(in);
+ return 0;
+
+err_mkey:
+ kvfree(in);
+err:
+ mlx5_core_dealloc_pd(mdev, pdn);
+ return err;
+}
+
+static void
+mlx5_ib_free_data_direct_resources(struct mlx5_ib_dev *dev)
+{
+
+ if (dev->ddr.mkey_ro_valid)
+ mlx5_core_destroy_mkey(dev->mdev, dev->ddr.mkey_ro);
+
+ mlx5_core_destroy_mkey(dev->mdev, dev->ddr.mkey);
+ mlx5_core_dealloc_pd(dev->mdev, dev->ddr.pdn);
}
-static u32 get_core_cap_flags(struct ib_device *ibdev)
+static u32 get_core_cap_flags(struct ib_device *ibdev,
+ struct mlx5_hca_vport_context *rep)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
+ bool raw_support = !mlx5_core_mp_enabled(dev->mdev);
u32 ret = 0;
+ if (rep->grh_required)
+ ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED;
+
+ if (dev->num_plane)
+ return ret | RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_IB_MAD |
+ RDMA_CORE_CAP_IB_CM | RDMA_CORE_CAP_IB_SA |
+ RDMA_CORE_CAP_AF_IB;
+ else if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
+ return ret | RDMA_CORE_CAP_IB_MAD | RDMA_CORE_CAP_IB_SMI;
+
if (ll == IB_LINK_LAYER_INFINIBAND)
- return RDMA_CORE_PORT_IBA_IB;
+ return ret | RDMA_CORE_PORT_IBA_IB;
+
+ if (raw_support)
+ ret |= RDMA_CORE_PORT_RAW_PACKET;
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
- return 0;
+ return ret;
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
- return 0;
+ return ret;
if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
ret |= RDMA_CORE_PORT_IBA_ROCE;
@@ -2791,34 +3302,124 @@ static u32 get_core_cap_flags(struct ib_device *ibdev)
return ret;
}
-static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
+static int mlx5_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
+ struct mlx5_hca_vport_context rep = {0};
int err;
- err = mlx5_ib_query_port(ibdev, port_num, &attr);
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
+ if (ll == IB_LINK_LAYER_INFINIBAND) {
+ if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
+ port_num = smi_to_native_portnum(dev, port_num);
+
+ err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0,
+ &rep);
+ if (err)
+ return err;
+ }
+
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = get_core_cap_flags(ibdev);
- if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
- immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep);
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
-static void get_dev_fw_str(struct ib_device *ibdev, char *str,
- size_t str_len)
+static int mlx5_port_rep_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+ return 0;
+}
+
+static void get_dev_fw_str(struct ib_device *ibdev, char *str)
{
struct mlx5_ib_dev *dev =
container_of(ibdev, struct mlx5_ib_dev, ib_dev);
- snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev),
- fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%04d",
+ fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev),
+ fw_rev_sub(dev->mdev));
+}
+
+static int lag_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5_ib_dev *dev = container_of(nb, struct mlx5_ib_dev,
+ lag_events);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct ib_device *ibdev = &dev->ib_dev;
+ struct net_device *old_ndev = NULL;
+ struct mlx5_ib_port *port;
+ struct net_device *ndev;
+ u32 portnum = 0;
+ int ret = 0;
+ int i;
+
+ switch (event) {
+ case MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE:
+ ndev = data;
+ if (ndev) {
+ if (!mlx5_lag_is_roce(mdev)) {
+ // sriov lag
+ for (i = 0; i < dev->num_ports; i++) {
+ port = &dev->port[i];
+ if (port->rep && port->rep->vport ==
+ MLX5_VPORT_UPLINK) {
+ portnum = i;
+ break;
+ }
+ }
+ }
+ old_ndev = ib_device_get_netdev(ibdev, portnum + 1);
+ ret = ib_device_set_netdev(ibdev, ndev, portnum + 1);
+ if (ret)
+ goto out;
+
+ if (old_ndev)
+ roce_del_all_netdev_gids(ibdev, portnum + 1,
+ old_ndev);
+ rdma_roce_rescan_port(ibdev, portnum + 1);
+ }
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+out:
+ dev_put(old_ndev);
+ return notifier_from_errno(ret);
+}
+
+static void mlx5e_lag_event_register(struct mlx5_ib_dev *dev)
+{
+ dev->lag_events.notifier_call = lag_event;
+ blocking_notifier_chain_register(&dev->mdev->priv.lag_nh,
+ &dev->lag_events);
+}
+
+static void mlx5e_lag_event_unregister(struct mlx5_ib_dev *dev)
+{
+ blocking_notifier_chain_unregister(&dev->mdev->priv.lag_nh,
+ &dev->lag_events);
}
static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
@@ -2842,7 +3443,10 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
goto err_destroy_vport_lag;
}
- dev->flow_db.lag_demux_ft = ft;
+ mlx5e_lag_event_register(dev);
+ dev->flow_db->lag_demux_ft = ft;
+ dev->lag_ports = mlx5_lag_get_num_ports(mdev);
+ dev->lag_active = true;
return 0;
err_destroy_vport_lag:
@@ -2854,48 +3458,84 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
- if (dev->flow_db.lag_demux_ft) {
- mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft);
- dev->flow_db.lag_demux_ft = NULL;
+ if (dev->lag_active) {
+ dev->lag_active = false;
+
+ mlx5e_lag_event_unregister(dev);
+ mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
+ dev->flow_db->lag_demux_ft = NULL;
mlx5_cmd_destroy_vport_lag(mdev);
}
}
-static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev)
+static void mlx5_netdev_notifier_register(struct mlx5_roce *roce,
+ struct net_device *netdev)
{
int err;
- dev->roce.nb.notifier_call = mlx5_netdev_event;
- err = register_netdevice_notifier(&dev->roce.nb);
- if (err) {
- dev->roce.nb.notifier_call = NULL;
- return err;
- }
+ if (roce->tracking_netdev)
+ return;
+ roce->tracking_netdev = netdev;
+ roce->nb.notifier_call = mlx5_netdev_event;
+ err = register_netdevice_notifier_dev_net(netdev, &roce->nb, &roce->nn);
+ WARN_ON(err);
+}
- return 0;
+static void mlx5_netdev_notifier_unregister(struct mlx5_roce *roce)
+{
+ if (!roce->tracking_netdev)
+ return;
+ unregister_netdevice_notifier_dev_net(roce->tracking_netdev, &roce->nb,
+ &roce->nn);
+ roce->tracking_netdev = NULL;
}
-static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev)
+static int mlx5e_mdev_notifier_event(struct notifier_block *nb,
+ unsigned long event, void *data)
{
- if (dev->roce.nb.notifier_call) {
- unregister_netdevice_notifier(&dev->roce.nb);
- dev->roce.nb.notifier_call = NULL;
+ struct mlx5_roce *roce = container_of(nb, struct mlx5_roce, mdev_nb);
+ struct net_device *netdev = data;
+
+ switch (event) {
+ case MLX5_DRIVER_EVENT_UPLINK_NETDEV:
+ if (netdev)
+ mlx5_netdev_notifier_register(roce, netdev);
+ else
+ mlx5_netdev_notifier_unregister(roce);
+ break;
+ default:
+ return NOTIFY_DONE;
}
+
+ return NOTIFY_OK;
+}
+
+static void mlx5_mdev_netdev_track(struct mlx5_ib_dev *dev, u32 port_num)
+{
+ struct mlx5_roce *roce = &dev->port[port_num].roce;
+
+ roce->mdev_nb.notifier_call = mlx5e_mdev_notifier_event;
+ mlx5_blocking_notifier_register(dev->mdev, &roce->mdev_nb);
+ mlx5_core_uplink_netdev_event_replay(dev->mdev);
+}
+
+static void mlx5_mdev_netdev_untrack(struct mlx5_ib_dev *dev, u32 port_num)
+{
+ struct mlx5_roce *roce = &dev->port[port_num].roce;
+
+ mlx5_blocking_notifier_unregister(dev->mdev, &roce->mdev_nb);
+ mlx5_netdev_notifier_unregister(roce);
}
static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
{
int err;
- err = mlx5_add_netdev_notifier(dev);
- if (err)
- return err;
-
- if (MLX5_CAP_GEN(dev->mdev, roce)) {
+ if (!dev->is_rep && dev->profile != &raw_eth_profile) {
err = mlx5_nic_vport_enable_roce(dev->mdev);
if (err)
- goto err_unregister_netdevice_notifier;
+ return err;
}
err = mlx5_eth_lag_init(dev);
@@ -2905,437 +3545,1636 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
return 0;
err_disable_roce:
- if (MLX5_CAP_GEN(dev->mdev, roce))
+ if (!dev->is_rep && dev->profile != &raw_eth_profile)
mlx5_nic_vport_disable_roce(dev->mdev);
-err_unregister_netdevice_notifier:
- mlx5_remove_netdev_notifier(dev);
return err;
}
static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
{
mlx5_eth_lag_cleanup(dev);
- if (MLX5_CAP_GEN(dev->mdev, roce))
+ if (!dev->is_rep && dev->profile != &raw_eth_profile)
mlx5_nic_vport_disable_roce(dev->mdev);
}
-static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
+static int mlx5_ib_rn_get_params(struct ib_device *device, u32 port_num,
+ enum rdma_netdev_t type,
+ struct rdma_netdev_alloc_params *params)
{
- unsigned int i;
+ if (type != RDMA_NETDEV_IPOIB)
+ return -EOPNOTSUPP;
- for (i = 0; i < dev->num_ports; i++)
- mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].q_cnt_id);
+ return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params);
}
-static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
+static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
{
+ struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
+ char lbuf[20];
+ int len;
+
+ len = snprintf(lbuf, sizeof(lbuf), "%u\n", delay_drop->timeout);
+ return simple_read_from_buffer(buf, count, pos, lbuf, len);
+}
+
+static ssize_t delay_drop_timeout_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
+ u32 timeout;
+ u32 var;
+
+ if (kstrtouint_from_user(buf, count, 0, &var))
+ return -EFAULT;
+
+ timeout = min_t(u32, roundup(var, 100), MLX5_MAX_DELAY_DROP_TIMEOUT_MS *
+ 1000);
+ if (timeout != var)
+ mlx5_ib_dbg(delay_drop->dev, "Round delay drop timeout to %u usec\n",
+ timeout);
+
+ delay_drop->timeout = timeout;
+
+ return count;
+}
+
+static const struct file_operations fops_delay_drop_timeout = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = delay_drop_timeout_write,
+ .read = delay_drop_timeout_read,
+};
+
+static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
+ struct mlx5_ib_multiport_info *mpi)
+{
+ u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
+ struct mlx5_ib_port *port = &ibdev->port[port_num];
+ int comps;
+ int err;
int i;
+
+ lockdep_assert_held(&mlx5_ib_multiport_mutex);
+
+ mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev, &ibdev->lb);
+
+ mlx5_core_mp_event_replay(ibdev->mdev,
+ MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
+ NULL);
+ mlx5_core_mp_event_replay(mpi->mdev,
+ MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
+ NULL);
+
+ mlx5_ib_cleanup_cong_debugfs(ibdev, port_num);
+
+ spin_lock(&port->mp.mpi_lock);
+ if (!mpi->ibdev) {
+ spin_unlock(&port->mp.mpi_lock);
+ return;
+ }
+
+ mpi->ibdev = NULL;
+
+ spin_unlock(&port->mp.mpi_lock);
+ if (mpi->mdev_events.notifier_call)
+ mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
+ mpi->mdev_events.notifier_call = NULL;
+ mlx5_mdev_netdev_untrack(ibdev, port_num);
+ spin_lock(&port->mp.mpi_lock);
+
+ comps = mpi->mdev_refcnt;
+ if (comps) {
+ mpi->unaffiliate = true;
+ init_completion(&mpi->unref_comp);
+ spin_unlock(&port->mp.mpi_lock);
+
+ for (i = 0; i < comps; i++)
+ wait_for_completion(&mpi->unref_comp);
+
+ spin_lock(&port->mp.mpi_lock);
+ mpi->unaffiliate = false;
+ }
+
+ port->mp.mpi = NULL;
+
+ spin_unlock(&port->mp.mpi_lock);
+
+ err = mlx5_nic_vport_unaffiliate_multiport(mpi->mdev);
+
+ mlx5_ib_dbg(ibdev, "unaffiliated port %u\n", port_num + 1);
+ /* Log an error, still needed to cleanup the pointers and add
+ * it back to the list.
+ */
+ if (err)
+ mlx5_ib_err(ibdev, "Failed to unaffiliate port %u\n",
+ port_num + 1);
+
+ ibdev->port[port_num].roce.last_port_state = IB_PORT_DOWN;
+}
+
+static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
+ struct mlx5_ib_multiport_info *mpi)
+{
+ u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
+ u64 key;
+ int err;
+
+ lockdep_assert_held(&mlx5_ib_multiport_mutex);
+
+ spin_lock(&ibdev->port[port_num].mp.mpi_lock);
+ if (ibdev->port[port_num].mp.mpi) {
+ mlx5_ib_dbg(ibdev, "port %u already affiliated.\n",
+ port_num + 1);
+ spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
+ return false;
+ }
+
+ ibdev->port[port_num].mp.mpi = mpi;
+ mpi->ibdev = ibdev;
+ mpi->mdev_events.notifier_call = NULL;
+ spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
+
+ err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev);
+ if (err)
+ goto unbind;
+
+ mlx5_mdev_netdev_track(ibdev, port_num);
+
+ mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port;
+ mlx5_notifier_register(mpi->mdev, &mpi->mdev_events);
+
+ mlx5_ib_init_cong_debugfs(ibdev, port_num);
+
+ key = mpi->mdev->priv.adev_idx;
+ mlx5_core_mp_event_replay(mpi->mdev,
+ MLX5_DRIVER_EVENT_AFFILIATION_DONE,
+ &key);
+ mlx5_core_mp_event_replay(ibdev->mdev,
+ MLX5_DRIVER_EVENT_AFFILIATION_DONE,
+ &key);
+
+ err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev, &ibdev->lb);
+ if (err)
+ goto unbind;
+
+ return true;
+
+unbind:
+ mlx5_ib_unbind_slave_port(ibdev, mpi);
+ return false;
+}
+
+static int mlx5_ib_data_direct_init(struct mlx5_ib_dev *dev)
+{
+ char vuid[MLX5_ST_SZ_BYTES(array1024_auto) + 1] = {};
int ret;
+ if (!MLX5_CAP_GEN(dev->mdev, data_direct) ||
+ !MLX5_CAP_GEN_2(dev->mdev, query_vuid))
+ return 0;
+
+ ret = mlx5_cmd_query_vuid(dev->mdev, true, vuid);
+ if (ret)
+ return ret;
+
+ ret = mlx5_ib_create_data_direct_resources(dev);
+ if (ret)
+ return ret;
+
+ INIT_LIST_HEAD(&dev->data_direct_mr_list);
+ ret = mlx5_data_direct_ib_reg(dev, vuid);
+ if (ret)
+ mlx5_ib_free_data_direct_resources(dev);
+
+ return ret;
+}
+
+static void mlx5_ib_data_direct_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, data_direct) ||
+ !MLX5_CAP_GEN_2(dev->mdev, query_vuid))
+ return;
+
+ mlx5_data_direct_ib_unreg(dev);
+ mlx5_ib_free_data_direct_resources(dev);
+}
+
+static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev)
+{
+ u32 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev,
+ port_num + 1);
+ struct mlx5_ib_multiport_info *mpi;
+ int err;
+ u32 i;
+
+ if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
+ return 0;
+
+ err = mlx5_query_nic_vport_system_image_guid(dev->mdev,
+ &dev->sys_image_guid);
+ if (err)
+ return err;
+
+ err = mlx5_nic_vport_enable_roce(dev->mdev);
+ if (err)
+ return err;
+
+ mutex_lock(&mlx5_ib_multiport_mutex);
+ for (i = 0; i < dev->num_ports; i++) {
+ bool bound = false;
+
+ /* build a stub multiport info struct for the native port. */
+ if (i == port_num) {
+ mpi = kzalloc(sizeof(*mpi), GFP_KERNEL);
+ if (!mpi) {
+ mutex_unlock(&mlx5_ib_multiport_mutex);
+ mlx5_nic_vport_disable_roce(dev->mdev);
+ return -ENOMEM;
+ }
+
+ mpi->is_master = true;
+ mpi->mdev = dev->mdev;
+ mpi->sys_image_guid = dev->sys_image_guid;
+ dev->port[i].mp.mpi = mpi;
+ mpi->ibdev = dev;
+ mpi = NULL;
+ continue;
+ }
+
+ list_for_each_entry(mpi, &mlx5_ib_unaffiliated_port_list,
+ list) {
+ if (dev->sys_image_guid == mpi->sys_image_guid &&
+ (mlx5_core_native_port_num(mpi->mdev) - 1) == i &&
+ mlx5_core_same_coredev_type(dev->mdev, mpi->mdev)) {
+ bound = mlx5_ib_bind_slave_port(dev, mpi);
+ }
+
+ if (bound) {
+ dev_dbg(mpi->mdev->device,
+ "removing port from unaffiliated list.\n");
+ mlx5_ib_dbg(dev, "port %d bound\n", i + 1);
+ list_del(&mpi->list);
+ break;
+ }
+ }
+ if (!bound)
+ mlx5_ib_dbg(dev, "no free port found for port %d\n",
+ i + 1);
+ }
+
+ list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list);
+ mutex_unlock(&mlx5_ib_multiport_mutex);
+ return err;
+}
+
+static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
+{
+ u32 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev,
+ port_num + 1);
+ u32 i;
+
+ if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
+ return;
+
+ mutex_lock(&mlx5_ib_multiport_mutex);
for (i = 0; i < dev->num_ports; i++) {
- ret = mlx5_core_alloc_q_counter(dev->mdev,
- &dev->port[i].q_cnt_id);
- if (ret) {
- mlx5_ib_warn(dev,
- "couldn't allocate queue counter for port %d, err %d\n",
- i + 1, ret);
- goto dealloc_counters;
+ if (dev->port[i].mp.mpi) {
+ /* Destroy the native port stub */
+ if (i == port_num) {
+ kfree(dev->port[i].mp.mpi);
+ dev->port[i].mp.mpi = NULL;
+ } else {
+ mlx5_ib_dbg(dev, "unbinding port_num: %u\n",
+ i + 1);
+ list_add_tail(&dev->port[i].mp.mpi->list,
+ &mlx5_ib_unaffiliated_port_list);
+ mlx5_ib_unbind_slave_port(dev,
+ dev->port[i].mp.mpi);
+ }
}
}
+ mlx5_ib_dbg(dev, "removing from devlist\n");
+ list_del(&dev->ib_dev_list);
+ mutex_unlock(&mlx5_ib_multiport_mutex);
+
+ mlx5_nic_vport_disable_roce(dev->mdev);
+}
+
+static int mmap_obj_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_user_mmap_entry *obj = uobject->object;
+
+ rdma_user_mmap_entry_remove(&obj->rdma_entry);
return 0;
+}
-dealloc_counters:
- while (--i >= 0)
- mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].q_cnt_id);
+static int mlx5_rdma_user_mmap_entry_insert(struct mlx5_ib_ucontext *c,
+ struct mlx5_user_mmap_entry *entry,
+ size_t length)
+{
+ return rdma_user_mmap_entry_insert_range(
+ &c->ibucontext, &entry->rdma_entry, length,
+ (MLX5_IB_MMAP_OFFSET_START << 16),
+ ((MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1));
+}
- return ret;
+static struct mlx5_user_mmap_entry *
+alloc_var_entry(struct mlx5_ib_ucontext *c)
+{
+ struct mlx5_user_mmap_entry *entry;
+ struct mlx5_var_table *var_table;
+ u32 page_idx;
+ int err;
+
+ var_table = &to_mdev(c->ibucontext.device)->var_table;
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_lock(&var_table->bitmap_lock);
+ page_idx = find_first_zero_bit(var_table->bitmap,
+ var_table->num_var_hw_entries);
+ if (page_idx >= var_table->num_var_hw_entries) {
+ err = -ENOSPC;
+ mutex_unlock(&var_table->bitmap_lock);
+ goto end;
+ }
+
+ set_bit(page_idx, var_table->bitmap);
+ mutex_unlock(&var_table->bitmap_lock);
+
+ entry->address = var_table->hw_start_addr +
+ (page_idx * var_table->stride_size);
+ entry->page_idx = page_idx;
+ entry->mmap_flag = MLX5_IB_MMAP_TYPE_VAR;
+
+ err = mlx5_rdma_user_mmap_entry_insert(c, entry,
+ var_table->stride_size);
+ if (err)
+ goto err_insert;
+
+ return entry;
+
+err_insert:
+ mutex_lock(&var_table->bitmap_lock);
+ clear_bit(page_idx, var_table->bitmap);
+ mutex_unlock(&var_table->bitmap_lock);
+end:
+ kfree(entry);
+ return ERR_PTR(err);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_VAR_OBJ_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE);
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_user_mmap_entry *entry;
+ u64 mmap_offset;
+ u32 length;
+ int err;
+
+ c = to_mucontext(ib_uverbs_get_ucontext(attrs));
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+
+ entry = alloc_var_entry(c);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
+
+ mmap_offset = mlx5_entry_to_mmap_offset(entry);
+ length = entry->rdma_entry.npages * PAGE_SIZE;
+ uobj->object = entry;
+ uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE);
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET,
+ &mmap_offset, sizeof(mmap_offset));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID,
+ &entry->page_idx, sizeof(entry->page_idx));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH,
+ &length, sizeof(length));
+ return err;
}
-static const char * const names[] = {
- "rx_write_requests",
- "rx_read_requests",
- "rx_atomic_requests",
- "out_of_buffer",
- "out_of_sequence",
- "duplicate_request",
- "rnr_nak_retry_err",
- "packet_seq_err",
- "implied_nak_seq_err",
- "local_ack_timeout_err",
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_VAR_OBJ_ALLOC,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_VAR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_VAR_OBJ_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_VAR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_VAR,
+ UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_ALLOC),
+ &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_DESTROY));
+
+static bool var_is_supported(struct ib_device *device)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+
+ return (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q);
+}
+
+static struct mlx5_user_mmap_entry *
+alloc_uar_entry(struct mlx5_ib_ucontext *c,
+ enum mlx5_ib_uapi_uar_alloc_type alloc_type)
+{
+ struct mlx5_user_mmap_entry *entry;
+ struct mlx5_ib_dev *dev;
+ u32 uar_index;
+ int err;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return ERR_PTR(-ENOMEM);
+
+ dev = to_mdev(c->ibucontext.device);
+ err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index, c->devx_uid);
+ if (err)
+ goto end;
+
+ entry->page_idx = uar_index;
+ entry->address = uar_index2paddress(dev, uar_index);
+ if (alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
+ entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_WC;
+ else
+ entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_NC;
+
+ err = mlx5_rdma_user_mmap_entry_insert(c, entry, PAGE_SIZE);
+ if (err)
+ goto err_insert;
+
+ return entry;
+
+err_insert:
+ mlx5_cmd_uar_dealloc(dev->mdev, uar_index, c->devx_uid);
+end:
+ kfree(entry);
+ return ERR_PTR(err);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(
+ attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE);
+ enum mlx5_ib_uapi_uar_alloc_type alloc_type;
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_user_mmap_entry *entry;
+ u64 mmap_offset;
+ u32 length;
+ int err;
+
+ c = to_mucontext(ib_uverbs_get_ucontext(attrs));
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+
+ err = uverbs_get_const(&alloc_type, attrs,
+ MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE);
+ if (err)
+ return err;
+
+ if (alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF &&
+ alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC)
+ return -EOPNOTSUPP;
+
+ if (!mlx5_wc_support_get(to_mdev(c->ibucontext.device)->mdev) &&
+ alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
+ return -EOPNOTSUPP;
+
+ entry = alloc_uar_entry(c, alloc_type);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
+
+ mmap_offset = mlx5_entry_to_mmap_offset(entry);
+ length = entry->rdma_entry.npages * PAGE_SIZE;
+ uobj->object = entry;
+ uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE);
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET,
+ &mmap_offset, sizeof(mmap_offset));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID,
+ &entry->page_idx, sizeof(entry->page_idx));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH,
+ &length, sizeof(length));
+ return err;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_UAR_OBJ_ALLOC,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_UAR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE,
+ enum mlx5_ib_uapi_uar_alloc_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_UAR_OBJ_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_UAR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_UAR,
+ UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_ALLOC),
+ &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_DESTROY));
+
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_query_context,
+ UVERBS_OBJECT_DEVICE,
+ UVERBS_METHOD_QUERY_CONTEXT,
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX,
+ UVERBS_ATTR_STRUCT(struct mlx5_ib_alloc_ucontext_resp,
+ dump_fill_mkey),
+ UA_MANDATORY));
+
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_reg_dmabuf_mr,
+ UVERBS_OBJECT_MR,
+ UVERBS_METHOD_REG_DMABUF_MR,
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ enum mlx5_ib_uapi_reg_dmabuf_flags,
+ UA_OPTIONAL));
+
+static const struct uapi_definition mlx5_ib_defs[] = {
+ UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_qos_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_std_types_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_dm_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_create_cq_defs),
+
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_MR, &mlx5_ib_reg_dmabuf_mr),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
+ UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR),
+ {}
+};
+
+static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_data_direct_cleanup(dev);
+ mlx5_ib_cleanup_multiport_master(dev);
+ WARN_ON(!xa_empty(&dev->odp_mkeys));
+ mutex_destroy(&dev->cap_mask_mutex);
+ WARN_ON(!xa_empty(&dev->sig_mrs));
+ WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
+ mlx5r_macsec_dealloc_gids(dev);
+}
+
+static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ int err, i;
+
+ dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+ dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
+ dev->ib_dev.dev.parent = mdev->device;
+ dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES;
+
+ for (i = 0; i < dev->num_ports; i++) {
+ spin_lock_init(&dev->port[i].mp.mpi_lock);
+ dev->port[i].roce.dev = dev;
+ dev->port[i].roce.native_port_num = i + 1;
+ dev->port[i].roce.last_port_state = IB_PORT_DOWN;
+ }
+
+ err = mlx5r_cmd_query_special_mkeys(dev);
+ if (err)
+ return err;
+
+ err = mlx5r_macsec_init_gids_and_devlist(dev);
+ if (err)
+ return err;
+
+ err = mlx5_ib_init_multiport_master(dev);
+ if (err)
+ goto err;
+
+ err = set_has_smi_cap(dev);
+ if (err)
+ goto err_mp;
+
+ err = mlx5_query_max_pkeys(&dev->ib_dev, &dev->pkey_table_len);
+ if (err)
+ goto err_mp;
+
+ if (mlx5_use_mad_ifc(dev))
+ get_ext_port_caps(dev);
+
+ dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_max(mdev);
+
+ mutex_init(&dev->cap_mask_mutex);
+ mutex_init(&dev->data_direct_lock);
+ INIT_LIST_HEAD(&dev->qp_list);
+ spin_lock_init(&dev->reset_flow_resource_lock);
+ xa_init(&dev->odp_mkeys);
+ xa_init(&dev->sig_mrs);
+ atomic_set(&dev->mkey_var, 0);
+
+ spin_lock_init(&dev->dm.lock);
+ dev->dm.dev = mdev;
+ err = mlx5_ib_data_direct_init(dev);
+ if (err)
+ goto err_mp;
+
+ return 0;
+err_mp:
+ mlx5_ib_cleanup_multiport_master(dev);
+err:
+ mlx5r_macsec_dealloc_gids(dev);
+ return err;
+}
+
+static struct ib_device *mlx5_ib_add_sub_dev(struct ib_device *parent,
+ enum rdma_nl_dev_type type,
+ const char *name);
+static void mlx5_ib_del_sub_dev(struct ib_device *sub_dev);
+
+static const struct ib_device_ops mlx5_ib_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_MLX5,
+ .uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION,
+
+ .add_gid = mlx5_ib_add_gid,
+ .add_sub_dev = mlx5_ib_add_sub_dev,
+ .alloc_mr = mlx5_ib_alloc_mr,
+ .alloc_mr_integrity = mlx5_ib_alloc_mr_integrity,
+ .alloc_pd = mlx5_ib_alloc_pd,
+ .alloc_ucontext = mlx5_ib_alloc_ucontext,
+ .attach_mcast = mlx5_ib_mcg_attach,
+ .check_mr_status = mlx5_ib_check_mr_status,
+ .create_ah = mlx5_ib_create_ah,
+ .create_cq = mlx5_ib_create_cq,
+ .create_qp = mlx5_ib_create_qp,
+ .create_srq = mlx5_ib_create_srq,
+ .create_user_ah = mlx5_ib_create_ah,
+ .dealloc_pd = mlx5_ib_dealloc_pd,
+ .dealloc_ucontext = mlx5_ib_dealloc_ucontext,
+ .del_gid = mlx5_ib_del_gid,
+ .del_sub_dev = mlx5_ib_del_sub_dev,
+ .dereg_mr = mlx5_ib_dereg_mr,
+ .destroy_ah = mlx5_ib_destroy_ah,
+ .destroy_cq = mlx5_ib_destroy_cq,
+ .destroy_qp = mlx5_ib_destroy_qp,
+ .destroy_srq = mlx5_ib_destroy_srq,
+ .detach_mcast = mlx5_ib_mcg_detach,
+ .disassociate_ucontext = mlx5_ib_disassociate_ucontext,
+ .drain_rq = mlx5_ib_drain_rq,
+ .drain_sq = mlx5_ib_drain_sq,
+ .device_group = &mlx5_attr_group,
+ .get_dev_fw_str = get_dev_fw_str,
+ .get_dma_mr = mlx5_ib_get_dma_mr,
+ .get_link_layer = mlx5_ib_port_link_layer,
+ .map_mr_sg = mlx5_ib_map_mr_sg,
+ .map_mr_sg_pi = mlx5_ib_map_mr_sg_pi,
+ .mmap = mlx5_ib_mmap,
+ .mmap_free = mlx5_ib_mmap_free,
+ .modify_cq = mlx5_ib_modify_cq,
+ .modify_device = mlx5_ib_modify_device,
+ .modify_port = mlx5_ib_modify_port,
+ .modify_qp = mlx5_ib_modify_qp,
+ .modify_srq = mlx5_ib_modify_srq,
+ .pre_destroy_cq = mlx5_ib_pre_destroy_cq,
+ .poll_cq = mlx5_ib_poll_cq,
+ .post_destroy_cq = mlx5_ib_post_destroy_cq,
+ .post_recv = mlx5_ib_post_recv_nodrain,
+ .post_send = mlx5_ib_post_send_nodrain,
+ .post_srq_recv = mlx5_ib_post_srq_recv,
+ .process_mad = mlx5_ib_process_mad,
+ .query_ah = mlx5_ib_query_ah,
+ .query_device = mlx5_ib_query_device,
+ .query_gid = mlx5_ib_query_gid,
+ .query_pkey = mlx5_ib_query_pkey,
+ .query_qp = mlx5_ib_query_qp,
+ .query_srq = mlx5_ib_query_srq,
+ .query_ucontext = mlx5_ib_query_ucontext,
+ .reg_user_mr = mlx5_ib_reg_user_mr,
+ .reg_user_mr_dmabuf = mlx5_ib_reg_user_mr_dmabuf,
+ .req_notify_cq = mlx5_ib_arm_cq,
+ .rereg_user_mr = mlx5_ib_rereg_user_mr,
+ .resize_cq = mlx5_ib_resize_cq,
+ .ufile_hw_cleanup = mlx5_ib_ufile_hw_cleanup,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
+ INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_dmah, mlx5_ib_dmah, ibdmah),
+ INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, mlx5_ib_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext),
+};
+
+static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = {
+ .rdma_netdev_get_params = mlx5_ib_rn_get_params,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_sriov_ops = {
+ .get_vf_config = mlx5_ib_get_vf_config,
+ .get_vf_guid = mlx5_ib_get_vf_guid,
+ .get_vf_stats = mlx5_ib_get_vf_stats,
+ .set_vf_guid = mlx5_ib_set_vf_guid,
+ .set_vf_link_state = mlx5_ib_set_vf_link_state,
};
-static const size_t stats_offsets[] = {
- MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
- MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
- MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
- MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
- MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
- MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
- MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
- MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
- MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
- MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
+static const struct ib_device_ops mlx5_ib_dev_mw_ops = {
+ .alloc_mw = mlx5_ib_alloc_mw,
+ .dealloc_mw = mlx5_ib_dealloc_mw,
+
+ INIT_RDMA_OBJ_SIZE(ib_mw, mlx5_ib_mw, ibmw),
+};
+
+static const struct ib_device_ops mlx5_ib_dev_xrc_ops = {
+ .alloc_xrcd = mlx5_ib_alloc_xrcd,
+ .dealloc_xrcd = mlx5_ib_dealloc_xrcd,
+
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx5_ib_xrcd, ibxrcd),
};
-static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
- u8 port_num)
+static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev)
{
- BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets));
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_var_table *var_table = &dev->var_table;
+ u8 log_doorbell_bar_size;
+ u8 log_doorbell_stride;
+ u64 bar_size;
+
+ log_doorbell_bar_size = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
+ log_doorbell_bar_size);
+ log_doorbell_stride = MLX5_CAP_DEV_VDPA_EMULATION(mdev,
+ log_doorbell_stride);
+ var_table->hw_start_addr = dev->mdev->bar_addr +
+ MLX5_CAP64_DEV_VDPA_EMULATION(mdev,
+ doorbell_bar_offset);
+ bar_size = (1ULL << log_doorbell_bar_size) * 4096;
+ var_table->stride_size = 1ULL << log_doorbell_stride;
+ var_table->num_var_hw_entries = div_u64(bar_size,
+ var_table->stride_size);
+ mutex_init(&var_table->bitmap_lock);
+ var_table->bitmap = bitmap_zalloc(var_table->num_var_hw_entries,
+ GFP_KERNEL);
+ return (var_table->bitmap) ? 0 : -ENOMEM;
+}
- /* We support only per port stats */
- if (port_num == 0)
- return NULL;
+static void mlx5_ib_cleanup_ucaps(struct mlx5_ib_dev *dev)
+{
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
+ ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
- return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names),
- RDMA_HW_STATS_DEFAULT_LIFESPAN);
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA)
+ ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
}
-static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
- struct rdma_hw_stats *stats,
- u8 port, int index)
+static int mlx5_ib_init_ucaps(struct mlx5_ib_dev *dev)
{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
- void *out;
- __be32 val;
int ret;
- int i;
- if (!port || !stats)
- return -ENOSYS;
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL) {
+ ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
+ if (ret)
+ return ret;
+ }
- out = mlx5_vzalloc(outlen);
- if (!out)
- return -ENOMEM;
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA) {
+ ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+ if (ret)
+ goto remove_local;
+ }
- ret = mlx5_core_query_q_counter(dev->mdev,
- dev->port[port - 1].q_cnt_id, 0,
- out, outlen);
- if (ret)
- goto free;
+ return 0;
- for (i = 0; i < ARRAY_SIZE(names); i++) {
- val = *(__be32 *)(out + stats_offsets[i]);
- stats->value[i] = (u64)be32_to_cpu(val);
+remove_local:
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
+ ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
+ return ret;
+}
+
+static void mlx5_ib_stage_caps_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) &
+ MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL)
+ mlx5_ib_cleanup_ucaps(dev);
+
+ bitmap_free(dev->var_table.bitmap);
+}
+
+static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ int err;
+
+ if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
+ IS_ENABLED(CONFIG_MLX5_CORE_IPOIB))
+ ib_set_device_ops(&dev->ib_dev,
+ &mlx5_ib_dev_ipoib_enhanced_ops);
+
+ if (mlx5_core_is_pf(mdev))
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_sriov_ops);
+
+ dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
+
+ if (MLX5_CAP_GEN(mdev, imaicl))
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops);
+
+ if (MLX5_CAP_GEN(mdev, xrc))
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops);
+
+ if (MLX5_CAP_DEV_MEM(mdev, memic) ||
+ MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops);
+
+ if (mdev->st)
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dmah_ops);
+
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops);
+
+ if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
+ dev->ib_dev.driver_def = mlx5_ib_defs;
+
+ err = init_node_data(dev);
+ if (err)
+ return err;
+
+ if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
+ (MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) ||
+ MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
+ mutex_init(&dev->lb.mutex);
+
+ if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) {
+ err = mlx5_ib_init_var_table(dev);
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) &
+ MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL) {
+ err = mlx5_ib_init_ucaps(dev);
+ if (err)
+ return err;
}
-free:
- kvfree(out);
- return ARRAY_SIZE(names);
+
+ dev->ib_dev.use_cq_dim = true;
+
+ return 0;
}
-static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
+static const struct ib_device_ops mlx5_ib_dev_port_ops = {
+ .get_port_immutable = mlx5_port_immutable,
+ .query_port = mlx5_ib_query_port,
+};
+
+static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
{
- struct mlx5_ib_dev *dev;
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_ops);
+ return 0;
+}
+
+static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = {
+ .get_port_immutable = mlx5_port_rep_immutable,
+ .query_port = mlx5_ib_rep_query_port,
+ .query_pkey = mlx5_ib_rep_query_pkey,
+};
+
+static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev)
+{
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops);
+ return 0;
+}
+
+static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = {
+ .create_rwq_ind_table = mlx5_ib_create_rwq_ind_table,
+ .create_wq = mlx5_ib_create_wq,
+ .destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table,
+ .destroy_wq = mlx5_ib_destroy_wq,
+ .modify_wq = mlx5_ib_modify_wq,
+
+ INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx5_ib_rwq_ind_table,
+ ib_rwq_ind_tbl),
+};
+
+static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
- const char *name;
+ u32 port_num = 0;
int err;
- int i;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
- printk_once(KERN_INFO "%s", mlx5_version);
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
+
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
- dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
- if (!dev)
- return NULL;
+ /* Register only for native ports */
+ mlx5_mdev_netdev_track(dev, port_num);
- dev->mdev = mdev;
+ err = mlx5_enable_eth(dev);
+ if (err)
+ goto cleanup;
+ }
+
+ return 0;
+cleanup:
+ mlx5_mdev_netdev_untrack(dev, port_num);
+ return err;
+}
+
+static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ enum rdma_link_layer ll;
+ int port_type_cap;
+ u32 port_num;
+
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ mlx5_disable_eth(dev);
+
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ mlx5_mdev_netdev_untrack(dev, port_num);
+ }
+}
+
+static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_init_cong_debugfs(dev,
+ mlx5_core_native_port_num(dev->mdev) - 1);
+ return 0;
+}
- dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
- GFP_KERNEL);
- if (!dev->port)
- goto err_dealloc;
+static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_cleanup_cong_debugfs(dev,
+ mlx5_core_native_port_num(dev->mdev) - 1);
+}
- rwlock_init(&dev->roce.netdev_lock);
- err = get_port_caps(dev);
+static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
+{
+ int err;
+
+ err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
if (err)
- goto err_free_port;
+ return err;
- if (mlx5_use_mad_ifc(dev))
- get_ext_port_caps(dev);
+ err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
+ if (err)
+ mlx5_free_bfreg(dev->mdev, &dev->bfreg);
+
+ return err;
+}
+
+static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
+ mlx5_free_bfreg(dev->mdev, &dev->bfreg);
+}
- MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
+static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
+{
+ const char *name;
- if (!mlx5_lag_is_active(mdev))
+ if (dev->sub_dev_name) {
+ name = dev->sub_dev_name;
+ ib_mark_name_assigned_by_user(&dev->ib_dev);
+ } else if (!mlx5_lag_is_active(dev->mdev))
name = "mlx5_%d";
else
name = "mlx5_bond_%d";
+ return ib_register_device(&dev->ib_dev, name, &dev->mdev->pdev->dev);
+}
- strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX);
- dev->ib_dev.owner = THIS_MODULE;
- dev->ib_dev.node_type = RDMA_NODE_IB_CA;
- dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
- dev->num_ports = MLX5_CAP_GEN(mdev, num_ports);
- dev->ib_dev.phys_port_cnt = dev->num_ports;
- dev->ib_dev.num_comp_vectors =
- dev->mdev->priv.eq_table.num_comp_vectors;
- dev->ib_dev.dma_device = &mdev->pdev->dev;
-
- dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
- dev->ib_dev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_REREG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
- (1ull << IB_USER_VERBS_CMD_OPEN_QP);
- dev->ib_dev.uverbs_ex_cmd_mask =
- (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP);
-
- dev->ib_dev.query_device = mlx5_ib_query_device;
- dev->ib_dev.query_port = mlx5_ib_query_port;
- dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
- if (ll == IB_LINK_LAYER_ETHERNET)
- dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
- dev->ib_dev.query_gid = mlx5_ib_query_gid;
- dev->ib_dev.add_gid = mlx5_ib_add_gid;
- dev->ib_dev.del_gid = mlx5_ib_del_gid;
- dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
- dev->ib_dev.modify_device = mlx5_ib_modify_device;
- dev->ib_dev.modify_port = mlx5_ib_modify_port;
- dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
- dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
- dev->ib_dev.mmap = mlx5_ib_mmap;
- dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
- dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
- dev->ib_dev.create_ah = mlx5_ib_create_ah;
- dev->ib_dev.query_ah = mlx5_ib_query_ah;
- dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
- dev->ib_dev.create_srq = mlx5_ib_create_srq;
- dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
- dev->ib_dev.query_srq = mlx5_ib_query_srq;
- dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
- dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
- dev->ib_dev.create_qp = mlx5_ib_create_qp;
- dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
- dev->ib_dev.query_qp = mlx5_ib_query_qp;
- dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
- dev->ib_dev.post_send = mlx5_ib_post_send;
- dev->ib_dev.post_recv = mlx5_ib_post_recv;
- dev->ib_dev.create_cq = mlx5_ib_create_cq;
- dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
- dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
- dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
- dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
- dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
- dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
- dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
- dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr;
- dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
- dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
- dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
- dev->ib_dev.process_mad = mlx5_ib_process_mad;
- dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
- dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
- dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
- dev->ib_dev.get_port_immutable = mlx5_port_immutable;
- dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
- if (mlx5_core_is_pf(mdev)) {
- dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
- dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
- dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats;
- dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid;
- }
-
- dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext;
-
- mlx5_ib_internal_fill_odp_caps(dev);
+static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_mkey_cache_cleanup(dev);
+ mlx5r_umr_resource_cleanup(dev);
+ mlx5r_umr_cleanup(dev);
+}
- if (MLX5_CAP_GEN(mdev, imaicl)) {
- dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw;
- dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw;
- dev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
- }
-
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
- MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
- dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
- dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
- }
-
- if (MLX5_CAP_GEN(mdev, xrc)) {
- dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
- dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
- dev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
- (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
- }
-
- if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
- IB_LINK_LAYER_ETHERNET) {
- dev->ib_dev.create_flow = mlx5_ib_create_flow;
- dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
- dev->ib_dev.create_wq = mlx5_ib_create_wq;
- dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
- dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
- dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
- dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
- dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
+{
+ ib_unregister_device(&dev->ib_dev);
+}
+
+static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
+{
+ int ret;
+
+ ret = mlx5r_umr_init(dev);
+ if (ret)
+ return ret;
+
+ ret = mlx5_mkey_cache_init(dev);
+ if (ret)
+ mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
+ return ret;
+}
+
+static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev)
+{
+ struct dentry *root;
+
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return 0;
+
+ mutex_init(&dev->delay_drop.lock);
+ dev->delay_drop.dev = dev;
+ dev->delay_drop.activate = false;
+ dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
+ INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
+ atomic_set(&dev->delay_drop.rqs_cnt, 0);
+ atomic_set(&dev->delay_drop.events_cnt, 0);
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ root = debugfs_create_dir("delay_drop", mlx5_debugfs_get_dev_root(dev->mdev));
+ dev->delay_drop.dir_debugfs = root;
+
+ debugfs_create_atomic_t("num_timeout_events", 0400, root,
+ &dev->delay_drop.events_cnt);
+ debugfs_create_atomic_t("num_rqs", 0400, root,
+ &dev->delay_drop.rqs_cnt);
+ debugfs_create_file("timeout", 0600, root, &dev->delay_drop,
+ &fops_delay_drop_timeout);
+ return 0;
+}
+
+static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ cancel_work_sync(&dev->delay_drop.delay_drop_work);
+ if (!dev->delay_drop.dir_debugfs)
+ return;
+
+ debugfs_remove_recursive(dev->delay_drop.dir_debugfs);
+ dev->delay_drop.dir_debugfs = NULL;
+}
+
+static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_resources *devr = &dev->devr;
+ int port;
+
+ for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
+ INIT_WORK(&devr->ports[port].pkey_change_work,
+ pkey_change_handler);
+
+ dev->mdev_events.notifier_call = mlx5_ib_event;
+ mlx5_notifier_register(dev->mdev, &dev->mdev_events);
+
+ mlx5r_macsec_event_register(dev);
+
+ return 0;
+}
+
+static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_resources *devr = &dev->devr;
+ int port;
+
+ mlx5r_macsec_event_unregister(dev);
+ mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
+
+ for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
+ cancel_work_sync(&devr->ports[port].pkey_change_work);
+}
+
+void mlx5_ib_data_direct_bind(struct mlx5_ib_dev *ibdev,
+ struct mlx5_data_direct_dev *dev)
+{
+ mutex_lock(&ibdev->data_direct_lock);
+ ibdev->data_direct_dev = dev;
+ mutex_unlock(&ibdev->data_direct_lock);
+}
+
+void mlx5_ib_data_direct_unbind(struct mlx5_ib_dev *ibdev)
+{
+ mutex_lock(&ibdev->data_direct_lock);
+ mlx5_ib_revoke_data_direct_mrs(ibdev);
+ ibdev->data_direct_dev = NULL;
+ mutex_unlock(&ibdev->data_direct_lock);
+}
+
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile,
+ int stage)
+{
+ dev->ib_active = false;
+
+ /* Number of stages to cleanup */
+ while (stage) {
+ stage--;
+ if (profile->stage[stage].cleanup)
+ profile->stage[stage].cleanup(dev);
}
- err = init_node_data(dev);
- if (err)
- goto err_free_port;
- mutex_init(&dev->flow_db.lock);
- mutex_init(&dev->cap_mask_mutex);
- INIT_LIST_HEAD(&dev->qp_list);
- spin_lock_init(&dev->reset_flow_resource_lock);
+ kfree(dev->port);
+ ib_dealloc_device(&dev->ib_dev);
+}
- if (ll == IB_LINK_LAYER_ETHERNET) {
- err = mlx5_enable_eth(dev);
- if (err)
- goto err_free_port;
+int __mlx5_ib_add(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile)
+{
+ int err;
+ int i;
+
+ dev->profile = profile;
+
+ for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
+ if (profile->stage[i].init) {
+ err = profile->stage[i].init(dev);
+ if (err)
+ goto err_out;
+ }
}
- err = create_dev_resources(&dev->devr);
- if (err)
- goto err_disable_eth;
+ dev->ib_active = true;
+ return 0;
- err = mlx5_ib_odp_init_one(dev);
- if (err)
- goto err_rsrc;
+err_out:
+ /* Clean up stages which were initialized */
+ while (i) {
+ i--;
+ if (profile->stage[i].cleanup)
+ profile->stage[i].cleanup(dev);
+ }
+ return -ENOMEM;
+}
- err = mlx5_ib_alloc_q_counters(dev);
- if (err)
- goto err_odp;
+static const struct mlx5_ib_profile pf_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_INIT,
+ mlx5_ib_stage_init_init,
+ mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FS,
+ mlx5_ib_fs_init,
+ mlx5_ib_fs_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+ mlx5_ib_stage_caps_init,
+ mlx5_ib_stage_caps_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_non_default_cb,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+ mlx5_ib_roce_init,
+ mlx5_ib_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_QP,
+ mlx5_init_qp_table,
+ mlx5_cleanup_qp_table),
+ STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+ mlx5_init_srq_table,
+ mlx5_cleanup_srq_table),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+ mlx5_ib_dev_res_init,
+ mlx5_ib_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_ODP,
+ mlx5_ib_odp_init_one,
+ mlx5_ib_odp_cleanup_one),
+ STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+ mlx5_ib_counters_init,
+ mlx5_ib_counters_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
+ mlx5_ib_stage_cong_debugfs_init,
+ mlx5_ib_stage_cong_debugfs_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+ mlx5_ib_stage_bfrag_init,
+ mlx5_ib_stage_bfrag_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
+ NULL,
+ mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
+ mlx5_ib_devx_init,
+ mlx5_ib_devx_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+ mlx5_ib_stage_ib_reg_init,
+ mlx5_ib_stage_ib_reg_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+ mlx5_ib_stage_dev_notifier_init,
+ mlx5_ib_stage_dev_notifier_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
+ mlx5_ib_stage_post_ib_reg_umr_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
+ mlx5_ib_stage_delay_drop_init,
+ mlx5_ib_stage_delay_drop_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
+ mlx5_ib_restrack_init,
+ NULL),
+};
- err = ib_register_device(&dev->ib_dev, NULL);
- if (err)
- goto err_q_cnt;
+const struct mlx5_ib_profile raw_eth_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_INIT,
+ mlx5_ib_stage_init_init,
+ mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_FS,
+ mlx5_ib_fs_init,
+ mlx5_ib_fs_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+ mlx5_ib_stage_caps_init,
+ mlx5_ib_stage_caps_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_raw_eth_non_default_cb,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_ROCE,
+ mlx5_ib_roce_init,
+ mlx5_ib_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_QP,
+ mlx5_init_qp_table,
+ mlx5_cleanup_qp_table),
+ STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+ mlx5_init_srq_table,
+ mlx5_cleanup_srq_table),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+ mlx5_ib_dev_res_init,
+ mlx5_ib_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
+ mlx5_ib_counters_init,
+ mlx5_ib_counters_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
+ mlx5_ib_stage_cong_debugfs_init,
+ mlx5_ib_stage_cong_debugfs_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+ mlx5_ib_stage_bfrag_init,
+ mlx5_ib_stage_bfrag_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
+ NULL,
+ mlx5_ib_stage_pre_ib_reg_umr_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
+ mlx5_ib_devx_init,
+ mlx5_ib_devx_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+ mlx5_ib_stage_ib_reg_init,
+ mlx5_ib_stage_ib_reg_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+ mlx5_ib_stage_dev_notifier_init,
+ mlx5_ib_stage_dev_notifier_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
+ mlx5_ib_stage_post_ib_reg_umr_init,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP,
+ mlx5_ib_stage_delay_drop_init,
+ mlx5_ib_stage_delay_drop_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_RESTRACK,
+ mlx5_ib_restrack_init,
+ NULL),
+};
- err = create_umr_res(dev);
- if (err)
- goto err_dev;
+static const struct mlx5_ib_profile plane_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_INIT,
+ mlx5_ib_stage_init_init,
+ mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+ mlx5_ib_stage_caps_init,
+ mlx5_ib_stage_caps_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_non_default_cb,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_QP,
+ mlx5_init_qp_table,
+ mlx5_cleanup_qp_table),
+ STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+ mlx5_init_srq_table,
+ mlx5_cleanup_srq_table),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+ mlx5_ib_dev_res_init,
+ mlx5_ib_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+ mlx5_ib_stage_bfrag_init,
+ mlx5_ib_stage_bfrag_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+ mlx5_ib_stage_ib_reg_init,
+ mlx5_ib_stage_ib_reg_cleanup),
+};
- for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
- err = device_create_file(&dev->ib_dev.dev,
- mlx5_class_attributes[i]);
- if (err)
- goto err_umrc;
+static struct ib_device *mlx5_ib_add_sub_dev(struct ib_device *parent,
+ enum rdma_nl_dev_type type,
+ const char *name)
+{
+ struct mlx5_ib_dev *mparent = to_mdev(parent), *mplane;
+ enum rdma_link_layer ll;
+ int ret;
+
+ if (mparent->smi_dev)
+ return ERR_PTR(-EEXIST);
+
+ ll = mlx5_port_type_cap_to_rdma_ll(MLX5_CAP_GEN(mparent->mdev,
+ port_type));
+ if (type != RDMA_DEVICE_TYPE_SMI || !mparent->num_plane ||
+ ll != IB_LINK_LAYER_INFINIBAND ||
+ !MLX5_CAP_GEN_2(mparent->mdev, multiplane_qp_ud))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mplane = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
+ mlx5_core_net(mparent->mdev));
+ if (!mplane)
+ return ERR_PTR(-ENOMEM);
+
+ mplane->port = kcalloc(mparent->num_plane * mparent->num_ports,
+ sizeof(*mplane->port), GFP_KERNEL);
+ if (!mplane->port) {
+ ret = -ENOMEM;
+ goto fail_kcalloc;
}
- dev->ib_active = true;
+ mplane->ib_dev.type = type;
+ mplane->mdev = mparent->mdev;
+ mplane->num_ports = mparent->num_plane;
+ mplane->sub_dev_name = name;
+ mplane->ib_dev.phys_port_cnt = mplane->num_ports;
- return dev;
+ ret = __mlx5_ib_add(mplane, &plane_profile);
+ if (ret)
+ goto fail_ib_add;
-err_umrc:
- destroy_umrc_res(dev);
+ mparent->smi_dev = mplane;
+ return &mplane->ib_dev;
-err_dev:
- ib_unregister_device(&dev->ib_dev);
+fail_ib_add:
+ kfree(mplane->port);
+fail_kcalloc:
+ ib_dealloc_device(&mplane->ib_dev);
+ return ERR_PTR(ret);
+}
-err_q_cnt:
- mlx5_ib_dealloc_q_counters(dev);
+static void mlx5_ib_del_sub_dev(struct ib_device *sub_dev)
+{
+ struct mlx5_ib_dev *mdev = to_mdev(sub_dev);
-err_odp:
- mlx5_ib_odp_remove_one(dev);
+ to_mdev(sub_dev->parent)->smi_dev = NULL;
+ __mlx5_ib_remove(mdev, mdev->profile, MLX5_IB_STAGE_MAX);
+}
-err_rsrc:
- destroy_dev_resources(&dev->devr);
+static int mlx5r_mp_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = idev->mdev;
+ struct mlx5_ib_multiport_info *mpi;
+ struct mlx5_ib_dev *dev;
+ bool bound = false;
+ int err;
-err_disable_eth:
- if (ll == IB_LINK_LAYER_ETHERNET) {
- mlx5_disable_eth(dev);
- mlx5_remove_netdev_notifier(dev);
+ mpi = kzalloc(sizeof(*mpi), GFP_KERNEL);
+ if (!mpi)
+ return -ENOMEM;
+
+ mpi->mdev = mdev;
+ err = mlx5_query_nic_vport_system_image_guid(mdev,
+ &mpi->sys_image_guid);
+ if (err) {
+ kfree(mpi);
+ return err;
}
-err_free_port:
- kfree(dev->port);
+ mutex_lock(&mlx5_ib_multiport_mutex);
+ list_for_each_entry(dev, &mlx5_ib_dev_list, ib_dev_list) {
+ if (dev->sys_image_guid == mpi->sys_image_guid &&
+ mlx5_core_same_coredev_type(dev->mdev, mpi->mdev))
+ bound = mlx5_ib_bind_slave_port(dev, mpi);
-err_dealloc:
- ib_dealloc_device((struct ib_device *)dev);
+ if (bound) {
+ rdma_roce_rescan_device(&dev->ib_dev);
+ mpi->ibdev->ib_active = true;
+ break;
+ }
+ }
- return NULL;
+ if (!bound) {
+ list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list);
+ dev_dbg(mdev->device,
+ "no suitable IB device found to bind to, added to unaffiliated list.\n");
+ }
+ mutex_unlock(&mlx5_ib_multiport_mutex);
+
+ auxiliary_set_drvdata(adev, mpi);
+ return 0;
}
-static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
+static void mlx5r_mp_remove(struct auxiliary_device *adev)
{
- struct mlx5_ib_dev *dev = context;
- enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
+ struct mlx5_ib_multiport_info *mpi;
- mlx5_remove_netdev_notifier(dev);
- ib_unregister_device(&dev->ib_dev);
- mlx5_ib_dealloc_q_counters(dev);
- destroy_umrc_res(dev);
- mlx5_ib_odp_remove_one(dev);
- destroy_dev_resources(&dev->devr);
- if (ll == IB_LINK_LAYER_ETHERNET)
- mlx5_disable_eth(dev);
+ mpi = auxiliary_get_drvdata(adev);
+ mutex_lock(&mlx5_ib_multiport_mutex);
+ if (mpi->ibdev)
+ mlx5_ib_unbind_slave_port(mpi->ibdev, mpi);
+ else
+ list_del(&mpi->list);
+ mutex_unlock(&mlx5_ib_multiport_mutex);
+ kfree(mpi);
+}
+
+static int mlx5r_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = idev->mdev;
+ const struct mlx5_ib_profile *profile;
+ int port_type_cap, num_ports, ret;
+ enum rdma_link_layer ll;
+ struct mlx5_ib_dev *dev;
+
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+ num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
+ MLX5_CAP_GEN(mdev, num_vhca_ports));
+ dev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
+ mlx5_core_net(mdev));
+ if (!dev)
+ return -ENOMEM;
+
+ if (ll == IB_LINK_LAYER_INFINIBAND) {
+ ret = mlx5_ib_get_plane_num(mdev, &dev->num_plane);
+ if (ret)
+ goto fail;
+ }
+
+ dev->port = kcalloc(num_ports, sizeof(*dev->port),
+ GFP_KERNEL);
+ if (!dev->port) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ dev->mdev = mdev;
+ dev->num_ports = num_ports;
+ dev->ib_dev.phys_port_cnt = num_ports;
+
+ if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_get_roce_state(mdev))
+ profile = &raw_eth_profile;
+ else
+ profile = &pf_profile;
+
+ ret = __mlx5_ib_add(dev, profile);
+ if (ret)
+ goto fail_ib_add;
+
+ auxiliary_set_drvdata(adev, dev);
+ return 0;
+
+fail_ib_add:
kfree(dev->port);
+fail:
ib_dealloc_device(&dev->ib_dev);
+ return ret;
+}
+
+static void mlx5r_remove(struct auxiliary_device *adev)
+{
+ struct mlx5_ib_dev *dev;
+
+ dev = auxiliary_get_drvdata(adev);
+ __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
}
-static struct mlx5_interface mlx5_ib_interface = {
- .add = mlx5_ib_add,
- .remove = mlx5_ib_remove,
- .event = mlx5_ib_event,
- .protocol = MLX5_INTERFACE_PROTOCOL_IB,
+static const struct auxiliary_device_id mlx5r_mp_id_table[] = {
+ { .name = MLX5_ADEV_NAME ".multiport", },
+ {},
+};
+
+static const struct auxiliary_device_id mlx5r_id_table[] = {
+ { .name = MLX5_ADEV_NAME ".rdma", },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx5r_mp_id_table);
+MODULE_DEVICE_TABLE(auxiliary, mlx5r_id_table);
+
+static struct auxiliary_driver mlx5r_mp_driver = {
+ .name = "multiport",
+ .probe = mlx5r_mp_probe,
+ .remove = mlx5r_mp_remove,
+ .id_table = mlx5r_mp_id_table,
+};
+
+static struct auxiliary_driver mlx5r_driver = {
+ .name = "rdma",
+ .probe = mlx5r_probe,
+ .remove = mlx5r_remove,
+ .id_table = mlx5r_id_table,
};
static int __init mlx5_ib_init(void)
{
- int err;
+ int ret;
- if (deprecated_prof_sel != 2)
- pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
+ xlt_emergency_page = (void *)__get_free_page(GFP_KERNEL);
+ if (!xlt_emergency_page)
+ return -ENOMEM;
- err = mlx5_ib_odp_init();
- if (err)
- return err;
+ mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0);
+ if (!mlx5_ib_event_wq) {
+ free_page((unsigned long)xlt_emergency_page);
+ return -ENOMEM;
+ }
- err = mlx5_register_interface(&mlx5_ib_interface);
- if (err)
- goto clean_odp;
+ ret = mlx5_ib_qp_event_init();
+ if (ret)
+ goto qp_event_err;
- return err;
+ mlx5_ib_odp_init();
+ ret = mlx5r_rep_init();
+ if (ret)
+ goto rep_err;
+ ret = mlx5_data_direct_driver_register();
+ if (ret)
+ goto dd_err;
+ ret = auxiliary_driver_register(&mlx5r_mp_driver);
+ if (ret)
+ goto mp_err;
+ ret = auxiliary_driver_register(&mlx5r_driver);
+ if (ret)
+ goto drv_err;
-clean_odp:
- mlx5_ib_odp_cleanup();
- return err;
+ return 0;
+
+drv_err:
+ auxiliary_driver_unregister(&mlx5r_mp_driver);
+mp_err:
+ mlx5_data_direct_driver_unregister();
+dd_err:
+ mlx5r_rep_cleanup();
+rep_err:
+ mlx5_ib_qp_event_cleanup();
+qp_event_err:
+ destroy_workqueue(mlx5_ib_event_wq);
+ free_page((unsigned long)xlt_emergency_page);
+ return ret;
}
static void __exit mlx5_ib_cleanup(void)
{
- mlx5_unregister_interface(&mlx5_ib_interface);
- mlx5_ib_odp_cleanup();
+ mlx5_data_direct_driver_unregister();
+ auxiliary_driver_unregister(&mlx5r_driver);
+ auxiliary_driver_unregister(&mlx5r_mp_driver);
+ mlx5r_rep_cleanup();
+
+ mlx5_ib_qp_event_cleanup();
+ destroy_workqueue(mlx5_ib_event_wq);
+ free_page((unsigned long)xlt_emergency_page);
}
module_init(mlx5_ib_init);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 6851357c16f4..af321f6ef7f5 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -30,201 +30,66 @@
* SOFTWARE.
*/
-#include <linux/module.h>
-#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
-/* @umem: umem object to scan
- * @addr: ib virtual address requested by the user
- * @max_page_shift: high limit for page_shift - 0 means no limit
- * @count: number of PAGE_SIZE pages covered by umem
- * @shift: page shift for the compound pages found in the region
- * @ncont: number of compund pages
- * @order: log2 of the number of compound pages
+/*
+ * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
+ * filled in the pas array.
*/
-void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
- unsigned long max_page_shift,
- int *count, int *shift,
- int *ncont, int *order)
+void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
+ u64 access_flags)
{
- unsigned long tmp;
- unsigned long m;
- int i, k;
- u64 base = 0;
- int p = 0;
- int skip;
- int mask;
- u64 len;
- u64 pfn;
- struct scatterlist *sg;
- int entry;
- unsigned long page_shift = ilog2(umem->page_size);
-
- /* With ODP we must always match OS page size. */
- if (umem->odp_data) {
- *count = ib_umem_page_count(umem);
- *shift = PAGE_SHIFT;
- *ncont = *count;
- if (order)
- *order = ilog2(roundup_pow_of_two(*count));
-
- return;
- }
+ struct ib_block_iter biter;
- addr = addr >> page_shift;
- tmp = (unsigned long)addr;
- m = find_first_bit(&tmp, BITS_PER_LONG);
- if (max_page_shift)
- m = min_t(unsigned long, max_page_shift - page_shift, m);
- skip = 1 << m;
- mask = skip - 1;
- i = 0;
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> page_shift;
- pfn = sg_dma_address(sg) >> page_shift;
- for (k = 0; k < len; k++) {
- if (!(i & mask)) {
- tmp = (unsigned long)pfn;
- m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG));
- skip = 1 << m;
- mask = skip - 1;
- base = pfn;
- p = 0;
- } else {
- if (base + p != pfn) {
- tmp = (unsigned long)p;
- m = find_first_bit(&tmp, BITS_PER_LONG);
- skip = 1 << m;
- mask = skip - 1;
- base = pfn;
- p = 0;
- }
- }
- p++;
- i++;
- }
+ rdma_umem_for_each_dma_block (umem, &biter, page_size) {
+ *pas = cpu_to_be64(rdma_block_iter_dma_address(&biter) |
+ access_flags);
+ pas++;
}
-
- if (i) {
- m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
-
- if (order)
- *order = ilog2(roundup_pow_of_two(i) >> m);
-
- *ncont = DIV_ROUND_UP(i, (1 << m));
- } else {
- m = 0;
-
- if (order)
- *order = 0;
-
- *ncont = 0;
- }
- *shift = page_shift + m;
- *count = i;
-}
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
-{
- u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
-
- if (umem_dma & ODP_READ_ALLOWED_BIT)
- mtt_entry |= MLX5_IB_MTT_READ;
- if (umem_dma & ODP_WRITE_ALLOWED_BIT)
- mtt_entry |= MLX5_IB_MTT_WRITE;
-
- return mtt_entry;
}
-#endif
/*
- * Populate the given array with bus addresses from the umem.
- *
- * dev - mlx5_ib device
- * umem - umem to use to fill the pages
- * page_shift - determines the page size used in the resulting array
- * offset - offset into the umem to start from,
- * only implemented for ODP umems
- * num_pages - total number of pages to fill
- * pas - bus addresses array to fill
- * access_flags - access flags to set on all present pages.
- use enum mlx5_ib_mtt_access_flags for this.
+ * Compute the page shift and page_offset for mailboxes that use a quantized
+ * page_offset. The granulatity of the page offset scales according to page
+ * size.
*/
-void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, size_t offset, size_t num_pages,
- __be64 *pas, int access_flags)
+unsigned long __mlx5_umem_find_best_quantized_pgoff(
+ struct ib_umem *umem, unsigned long pgsz_bitmap,
+ unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale,
+ unsigned int *page_offset_quantized)
{
- unsigned long umem_page_shift = ilog2(umem->page_size);
- int shift = page_shift - umem_page_shift;
- int mask = (1 << shift) - 1;
- int i, k;
- u64 cur = 0;
- u64 base;
- int len;
- struct scatterlist *sg;
- int entry;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- const bool odp = umem->odp_data != NULL;
-
- if (odp) {
- WARN_ON(shift != 0);
- WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
-
- for (i = 0; i < num_pages; ++i) {
- dma_addr_t pa = umem->odp_data->dma_list[offset + i];
-
- pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
- }
- return;
- }
-#endif
-
- i = 0;
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- len = sg_dma_len(sg) >> umem_page_shift;
- base = sg_dma_address(sg);
- for (k = 0; k < len; k++) {
- if (!(i & mask)) {
- cur = base + (k << umem_page_shift);
- cur |= access_flags;
-
- pas[i >> shift] = cpu_to_be64(cur);
- mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
- i >> shift, be64_to_cpu(pas[i >> shift]));
- } else
- mlx5_ib_dbg(dev, "=====> 0x%llx\n",
- base + (k << umem_page_shift));
- i++;
- }
+ const u64 page_offset_mask = (1UL << page_offset_bits) - 1;
+ unsigned long page_size;
+ u64 page_offset;
+
+ page_size = ib_umem_find_best_pgoff(umem, pgsz_bitmap, pgoff_bitmask);
+ if (!page_size)
+ return 0;
+
+ /*
+ * page size is the largest possible page size.
+ *
+ * Reduce the page_size, and thus the page_offset and quanta, until the
+ * page_offset fits into the mailbox field. Once page_size < scale this
+ * loop is guaranteed to terminate.
+ */
+ page_offset = ib_umem_dma_offset(umem, page_size);
+ while (page_offset & ~(u64)(page_offset_mask * (page_size / scale))) {
+ page_size /= 2;
+ page_offset = ib_umem_dma_offset(umem, page_size);
}
-}
-
-void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, __be64 *pas, int access_flags)
-{
- return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
- ib_umem_num_pages(umem), pas,
- access_flags);
-}
-int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
-{
- u64 page_size;
- u64 page_mask;
- u64 off_size;
- u64 off_mask;
- u64 buf_off;
-
- page_size = (u64)1 << page_shift;
- page_mask = page_size - 1;
- buf_off = addr & page_mask;
- off_size = page_size >> 6;
- off_mask = off_size - 1;
-
- if (buf_off & off_mask)
- return -EINVAL;
- *offset = buf_off >> ilog2(off_size);
- return 0;
+ /*
+ * The address is not aligned, or otherwise cannot be represented by the
+ * page_offset.
+ */
+ if (!(pgsz_bitmap & page_size))
+ return 0;
+
+ *page_offset_quantized =
+ (unsigned long)page_offset / (page_size / scale);
+ if (WARN_ON(*page_offset_quantized > page_offset_mask))
+ return 0;
+ return page_size;
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 6c6057eb60ea..09d82d5f95e3 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1,33 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
*/
#ifndef MLX5_IB_H
@@ -36,47 +10,108 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
#include <rdma/ib_smi.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/cq.h>
+#include <linux/mlx5/fs.h>
#include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
#include <linux/types.h>
#include <linux/mlx5/transobj.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/mlx5-abi.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+
+#include "srq.h"
+#include "qp.h"
+#include "macsec.h"
-#define mlx5_ib_dbg(dev, format, arg...) \
-pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
- __LINE__, current->pid, ##arg)
+#define mlx5_ib_dbg(_dev, format, arg...) \
+ dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
+ __LINE__, current->pid, ##arg)
-#define mlx5_ib_err(dev, format, arg...) \
-pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
- __LINE__, current->pid, ##arg)
+#define mlx5_ib_err(_dev, format, arg...) \
+ dev_err(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
+ __LINE__, current->pid, ##arg)
-#define mlx5_ib_warn(dev, format, arg...) \
-pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
- __LINE__, current->pid, ##arg)
+#define mlx5_ib_warn(_dev, format, arg...) \
+ dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
+ __LINE__, current->pid, ##arg)
+
+#define mlx5_ib_log(lvl, _dev, format, arg...) \
+ dev_printk(lvl, &(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, \
+ __func__, __LINE__, current->pid, ##arg)
-#define field_avail(type, fld, sz) (offsetof(type, fld) + \
- sizeof(((type *)0)->fld) <= (sz))
#define MLX5_IB_DEFAULT_UIDX 0xffffff
#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
-#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size)
+static __always_inline unsigned long
+__mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits,
+ unsigned int pgsz_shift)
+{
+ unsigned int largest_pg_shift =
+ min_t(unsigned long, (1ULL << log_pgsz_bits) - 1 + pgsz_shift,
+ BITS_PER_LONG - 1);
+
+ /*
+ * Despite a command allowing it, the device does not support lower than
+ * 4k page size.
+ */
+ pgsz_shift = max_t(unsigned int, MLX5_ADAPTER_PAGE_SHIFT, pgsz_shift);
+ return GENMASK(largest_pg_shift, pgsz_shift);
+}
+
+static __always_inline unsigned long
+__mlx5_page_offset_to_bitmask(unsigned int page_offset_bits,
+ unsigned int offset_shift)
+{
+ unsigned int largest_offset_shift =
+ min_t(unsigned long, page_offset_bits - 1 + offset_shift,
+ BITS_PER_LONG - 1);
+
+ return GENMASK(largest_offset_shift, offset_shift);
+}
+
+/*
+ * QP/CQ/WQ/etc type commands take a page offset that satisifies:
+ * page_offset_quantized * (page_size/scale) = page_offset
+ * Which restricts allowed page sizes to ones that satisify the above.
+ */
+unsigned long __mlx5_umem_find_best_quantized_pgoff(
+ struct ib_umem *umem, unsigned long pgsz_bitmap,
+ unsigned int page_offset_bits, u64 pgoff_bitmask, unsigned int scale,
+ unsigned int *page_offset_quantized);
+#define mlx5_umem_find_best_quantized_pgoff(umem, typ, log_pgsz_fld, \
+ pgsz_shift, page_offset_fld, \
+ scale, page_offset_quantized) \
+ __mlx5_umem_find_best_quantized_pgoff( \
+ umem, \
+ __mlx5_log_page_size_to_bitmap( \
+ __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \
+ __mlx5_bit_sz(typ, page_offset_fld), \
+ GENMASK(31, order_base_2(scale)), scale, \
+ page_offset_quantized)
+
+#define mlx5_umem_find_best_cq_quantized_pgoff(umem, typ, log_pgsz_fld, \
+ pgsz_shift, page_offset_fld, \
+ scale, page_offset_quantized) \
+ __mlx5_umem_find_best_quantized_pgoff( \
+ umem, \
+ __mlx5_log_page_size_to_bitmap( \
+ __mlx5_bit_sz(typ, log_pgsz_fld), pgsz_shift), \
+ __mlx5_bit_sz(typ, page_offset_fld), 0, scale, \
+ page_offset_quantized)
enum {
- MLX5_IB_MMAP_CMD_SHIFT = 8,
- MLX5_IB_MMAP_CMD_MASK = 0xff,
+ MLX5_IB_MMAP_OFFSET_START = 9,
+ MLX5_IB_MMAP_OFFSET_END = 255,
};
-enum mlx5_ib_mmap_cmd {
- MLX5_IB_MMAP_REGULAR_PAGE = 0,
- MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
- MLX5_IB_MMAP_WC_PAGE = 2,
- MLX5_IB_MMAP_NC_PAGE = 3,
- /* 5 is chosen in order to be compatible with old versions of libmlx5 */
- MLX5_IB_MMAP_CORE_CLOCK = 5,
+enum {
+ MLX5_IB_MMAP_CMD_SHIFT = 8,
+ MLX5_IB_MMAP_CMD_MASK = 0xff,
};
enum {
@@ -86,13 +121,6 @@ enum {
MLX5_REQ_SCAT_DATA64_CQE = 0x22,
};
-enum mlx5_ib_latency_class {
- MLX5_IB_LATENCY_CLASS_LOW,
- MLX5_IB_LATENCY_CLASS_MEDIUM,
- MLX5_IB_LATENCY_CLASS_HIGH,
- MLX5_IB_LATENCY_CLASS_FAST_PATH
-};
-
enum mlx5_ib_mad_ifc_flags {
MLX5_MAD_IFC_IGNORE_MKEY = 1,
MLX5_MAD_IFC_IGNORE_BKEY = 2,
@@ -100,7 +128,7 @@ enum mlx5_ib_mad_ifc_flags {
};
enum {
- MLX5_CROSS_CHANNEL_UUAR = 0,
+ MLX5_CROSS_CHANNEL_BFREG = 0,
};
enum {
@@ -108,9 +136,50 @@ enum {
MLX5_CQE_VERSION_V1,
};
-struct mlx5_ib_vma_private_data {
- struct list_head list;
- struct vm_area_struct *vma;
+enum {
+ MLX5_TM_MAX_RNDV_MSG_SIZE = 64,
+ MLX5_TM_MAX_SGE = 1,
+};
+
+enum {
+ MLX5_IB_INVALID_UAR_INDEX = BIT(31),
+ MLX5_IB_INVALID_BFREG = BIT(31),
+};
+
+enum {
+ MLX5_MAX_MEMIC_PAGES = 0x100,
+ MLX5_MEMIC_ALLOC_SIZE_MASK = 0x3f,
+};
+
+enum {
+ MLX5_MEMIC_BASE_ALIGN = 6,
+ MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN,
+};
+
+enum mlx5_ib_mmap_type {
+ MLX5_IB_MMAP_TYPE_MEMIC = 1,
+ MLX5_IB_MMAP_TYPE_VAR = 2,
+ MLX5_IB_MMAP_TYPE_UAR_WC = 3,
+ MLX5_IB_MMAP_TYPE_UAR_NC = 4,
+ MLX5_IB_MMAP_TYPE_MEMIC_OP = 5,
+};
+
+struct mlx5_bfreg_info {
+ u32 *sys_pages;
+ int num_low_latency_bfregs;
+ unsigned int *count;
+
+ /*
+ * protect bfreg allocation data structs
+ */
+ struct mutex lock;
+ u32 ver;
+ u8 lib_uar_4k : 1;
+ u8 lib_uar_dyn : 1;
+ u32 num_sys_pages;
+ u32 num_static_sys_pages;
+ u32 total_num_bfregs;
+ u32 num_dyn_bfregs;
};
struct mlx5_ib_ucontext {
@@ -120,11 +189,15 @@ struct mlx5_ib_ucontext {
/* protect doorbell record alloc/free
*/
struct mutex db_page_mutex;
- struct mlx5_uuar_info uuari;
+ struct mlx5_bfreg_info bfregi;
u8 cqe_version;
/* Transport Domain number */
u32 tdn;
- struct list_head vma_private_list;
+
+ u64 lib_caps;
+ u16 devx_uid;
+ /* For RoCE LAG TX affinity */
+ atomic_t tx_port_affinity;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -135,6 +208,13 @@ static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibuconte
struct mlx5_ib_pd {
struct ib_pd ibpd;
u32 pdn;
+ u16 uid;
+};
+
+enum {
+ MLX5_IB_FLOW_ACTION_MODIFY_HEADER,
+ MLX5_IB_FLOW_ACTION_PACKET_REFORMAT,
+ MLX5_IB_FLOW_ACTION_DECAP,
};
#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1)
@@ -146,8 +226,21 @@ struct mlx5_ib_pd {
#define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1)
#define MLX5_IB_NUM_SNIFFER_FTS 2
+#define MLX5_IB_NUM_EGRESS_FTS 1
+#define MLX5_IB_NUM_FDB_FTS MLX5_BY_PASS_NUM_REGULAR_PRIOS
+
+struct mlx5_ib_anchor {
+ struct mlx5_flow_table *ft;
+ struct mlx5_flow_group *fg_goto_table;
+ struct mlx5_flow_group *fg_drop;
+ struct mlx5_flow_handle *rule_goto_table;
+ struct mlx5_flow_handle *rule_drop;
+ unsigned int rule_goto_table_ref;
+};
+
struct mlx5_ib_flow_prio {
struct mlx5_flow_table *flow_table;
+ struct mlx5_ib_anchor anchor;
unsigned int refcount;
};
@@ -156,12 +249,66 @@ struct mlx5_ib_flow_handler {
struct ib_flow ibflow;
struct mlx5_ib_flow_prio *prio;
struct mlx5_flow_handle *rule;
+ struct ib_counters *ibcounters;
+ struct mlx5_ib_dev *dev;
+ struct mlx5_ib_flow_matcher *flow_matcher;
+};
+
+struct mlx5_ib_flow_matcher {
+ struct mlx5_ib_match_params matcher_mask;
+ int mask_len;
+ enum mlx5_ib_flow_type flow_type;
+ enum mlx5_flow_namespace_type ns_type;
+ u16 priority;
+ struct mlx5_core_dev *mdev;
+ atomic_t usecnt;
+ u8 match_criteria_enable;
+ u32 ib_port;
+};
+
+struct mlx5_ib_steering_anchor {
+ struct mlx5_ib_flow_prio *ft_prio;
+ struct mlx5_ib_dev *dev;
+ atomic_t usecnt;
+};
+
+struct mlx5_ib_pp {
+ u16 index;
+ struct mlx5_core_dev *mdev;
+};
+
+enum mlx5_ib_optional_counter_type {
+ MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS,
+ MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS,
+ MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS,
+ MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS,
+ MLX5_IB_OPCOUNTER_RDMA_TX_BYTES,
+ MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS,
+ MLX5_IB_OPCOUNTER_RDMA_RX_BYTES,
+
+ MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP,
+ MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP,
+ MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP,
+
+ MLX5_IB_OPCOUNTER_MAX,
};
struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT];
+ struct mlx5_ib_flow_prio egress_prios[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS];
+ struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS];
+ struct mlx5_ib_flow_prio fdb[MLX5_IB_NUM_FDB_FTS];
+ struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT];
+ struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT];
+ struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX];
struct mlx5_flow_table *lag_demux_ft;
+ struct mlx5_ib_flow_prio *rdma_transport_rx[MLX5_RDMA_TRANSPORT_BYPASS_PRIO];
+ struct mlx5_ib_flow_prio *rdma_transport_tx[MLX5_RDMA_TRANSPORT_BYPASS_PRIO];
/* Protect flow steering bypass flow tables
* when add/del flow rules.
* only single add/removal of flow steering rule could be done
@@ -171,43 +318,48 @@ struct mlx5_ib_flow_db {
};
/* Use macros here so that don't have to duplicate
- * enum ib_send_flags and enum ib_qp_type for low-level driver
+ * enum ib_qp_type for low-level driver
*/
-#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START
-#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1)
-#define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2)
-
-#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 3)
-#define MLX5_IB_SEND_UMR_UPDATE_PD (IB_SEND_RESERVED_START << 4)
-#define MLX5_IB_SEND_UMR_UPDATE_ACCESS IB_SEND_RESERVED_END
-
#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1
/*
* IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI
* creates the actual hardware QP.
*/
#define MLX5_IB_QPT_HW_GSI IB_QPT_RESERVED2
+#define MLX5_IB_QPT_DCI IB_QPT_RESERVED3
+#define MLX5_IB_QPT_DCT IB_QPT_RESERVED4
#define MLX5_IB_WR_UMR IB_WR_RESERVED1
+#define MLX5_IB_UPD_XLT_ZAP BIT(0)
+#define MLX5_IB_UPD_XLT_ENABLE BIT(1)
+#define MLX5_IB_UPD_XLT_ATOMIC BIT(2)
+#define MLX5_IB_UPD_XLT_ADDR BIT(3)
+#define MLX5_IB_UPD_XLT_PD BIT(4)
+#define MLX5_IB_UPD_XLT_ACCESS BIT(5)
+#define MLX5_IB_UPD_XLT_INDIRECT BIT(6)
+#define MLX5_IB_UPD_XLT_DOWNGRADE BIT(7)
+#define MLX5_IB_UPD_XLT_KEEP_PGSZ BIT(8)
+
/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
*
* These flags are intended for internal use by the mlx5_ib driver, and they
* rely on the range reserved for that use in the ib_qp_create_flags enum.
*/
-
-/* Create a UD QP whose source QP number is 1 */
-static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void)
-{
- return IB_QP_CREATE_RESERVED_START;
-}
+#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START
struct wr_list {
u16 opcode;
u16 next;
};
+enum mlx5_ib_rq_flags {
+ MLX5_IB_RQ_CVLAN_STRIPPING = 1 << 0,
+ MLX5_IB_RQ_PCI_WRITE_END_PADDING = 1 << 1,
+};
+
struct mlx5_ib_wq {
+ struct mlx5_frag_buf_ctrl fbc;
u64 *wrid;
u32 *wr_data;
struct wr_list *w_list;
@@ -226,9 +378,20 @@ struct mlx5_ib_wq {
unsigned tail;
u16 cur_post;
u16 last_poll;
- void *qend;
+ void *cur_edge;
+};
+
+enum mlx5_ib_wq_flags {
+ MLX5_IB_WQ_FLAGS_DELAY_DROP = 0x1,
+ MLX5_IB_WQ_FLAGS_STRIDING_RQ = 0x2,
};
+#define MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES 9
+#define MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES 16
+#define MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES 6
+#define MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES 13
+#define MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES 3
+
struct mlx5_ib_rwq {
struct ib_wq ibwq;
struct mlx5_core_qp core_qp;
@@ -237,54 +400,24 @@ struct mlx5_ib_rwq {
u32 log_rq_size;
u32 rq_page_offset;
u32 log_page_size;
+ u32 log_num_strides;
+ u32 two_byte_shift_en;
+ u32 single_stride_log_num_of_bytes;
struct ib_umem *umem;
size_t buf_size;
unsigned int page_shift;
- int create_type;
struct mlx5_db db;
u32 user_index;
u32 wqe_count;
u32 wqe_shift;
int wq_sig;
-};
-
-enum {
- MLX5_QP_USER,
- MLX5_QP_KERNEL,
- MLX5_QP_EMPTY
-};
-
-enum {
- MLX5_WQ_USER,
- MLX5_WQ_KERNEL
+ u32 create_flags; /* Use enum mlx5_ib_wq_flags */
};
struct mlx5_ib_rwq_ind_table {
struct ib_rwq_ind_table ib_rwq_ind_tbl;
u32 rqtn;
-};
-
-/*
- * Connect-IB can trigger up to four concurrent pagefaults
- * per-QP.
- */
-enum mlx5_ib_pagefault_context {
- MLX5_IB_PAGEFAULT_RESPONDER_READ,
- MLX5_IB_PAGEFAULT_REQUESTOR_READ,
- MLX5_IB_PAGEFAULT_RESPONDER_WRITE,
- MLX5_IB_PAGEFAULT_REQUESTOR_WRITE,
- MLX5_IB_PAGEFAULT_CONTEXTS
-};
-
-static inline enum mlx5_ib_pagefault_context
- mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault)
-{
- return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE);
-}
-
-struct mlx5_ib_pfault {
- struct work_struct work;
- struct mlx5_pagefault mpfault;
+ u16 uid;
};
struct mlx5_ib_ubuffer {
@@ -302,7 +435,7 @@ struct mlx5_ib_qp_base {
struct mlx5_ib_qp_trans {
struct mlx5_ib_qp_base base;
u16 xrcdn;
- u8 alt_port;
+ u32 alt_port;
u8 atomic_rd_en;
u8 resp_depth;
};
@@ -318,6 +451,7 @@ struct mlx5_ib_rq {
struct mlx5_db *doorbell;
u32 tirn;
u8 state;
+ u32 flags;
};
struct mlx5_ib_sq {
@@ -325,6 +459,7 @@ struct mlx5_ib_sq {
struct mlx5_ib_wq *sq;
struct mlx5_ib_ubuffer ubuffer;
struct mlx5_db *doorbell;
+ struct mlx5_flow_handle *flow_rule;
u32 tisn;
u8 state;
};
@@ -334,106 +469,98 @@ struct mlx5_ib_raw_packet_qp {
struct mlx5_ib_rq rq;
};
+struct mlx5_bf {
+ int buf_size;
+ unsigned long offset;
+ struct mlx5_sq_bfreg *bfreg;
+};
+
+struct mlx5_ib_dct {
+ struct mlx5_core_dct mdct;
+ u32 *in;
+};
+
+struct mlx5_ib_gsi_qp {
+ struct ib_qp *rx_qp;
+ u32 port_num;
+ struct ib_qp_cap cap;
+ struct ib_cq *cq;
+ struct mlx5_ib_gsi_wr *outstanding_wrs;
+ u32 outstanding_pi, outstanding_ci;
+ int num_qps;
+ /* Protects access to the tx_qps. Post send operations synchronize
+ * with tx_qp creation in setup_qp(). Also protects the
+ * outstanding_wrs array and indices.
+ */
+ spinlock_t lock;
+ struct ib_qp **tx_qps;
+};
+
struct mlx5_ib_qp {
struct ib_qp ibqp;
union {
struct mlx5_ib_qp_trans trans_qp;
struct mlx5_ib_raw_packet_qp raw_packet_qp;
struct mlx5_ib_rss_qp rss_qp;
+ struct mlx5_ib_dct dct;
+ struct mlx5_ib_gsi_qp gsi;
};
- struct mlx5_buf buf;
+ struct mlx5_frag_buf buf;
struct mlx5_db db;
struct mlx5_ib_wq rq;
u8 sq_signal_bits;
- u8 fm_cache;
+ u8 next_fence;
struct mlx5_ib_wq sq;
/* serialize qp state modifications
*/
struct mutex mutex;
+ /* cached variant of create_flags from struct ib_qp_init_attr */
u32 flags;
- u8 port;
+ u32 port;
u8 state;
- int wq_sig;
- int scat_cqe;
int max_inline_data;
- struct mlx5_bf *bf;
- int has_rq;
+ struct mlx5_bf bf;
+ u8 has_rq:1;
+ u8 is_rss:1;
+ u8 is_ooo_rq:1;
/* only for user space QPs. For kernel
* we have it from the bf object
*/
- int uuarn;
-
- int create_type;
+ int bfregn;
- /* Store signature errors */
- bool signature_en;
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- /*
- * A flag that is true for QP's that are in a state that doesn't
- * allow page faults, and shouldn't schedule any more faults.
- */
- int disable_page_faults;
- /*
- * The disable_page_faults_lock protects a QP's disable_page_faults
- * field, allowing for a thread to atomically check whether the QP
- * allows page faults, and if so schedule a page fault.
- */
- spinlock_t disable_page_faults_lock;
- struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
-#endif
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
- u32 rate_limit;
+ struct mlx5_rate_limit rl;
+ u32 underlay_qpn;
+ u32 flags_en;
+ /*
+ * IB/core doesn't store low-level QP types, so
+ * store both MLX and IBTA types in the field below.
+ */
+ enum ib_qp_type type;
+ /* A flag to indicate if there's a new counter is configured
+ * but not take effective
+ */
+ u32 counter_pending;
+ u16 gsi_lag_port;
};
struct mlx5_ib_cq_buf {
- struct mlx5_buf buf;
+ struct mlx5_frag_buf_ctrl fbc;
+ struct mlx5_frag_buf frag_buf;
struct ib_umem *umem;
int cqe_size;
int nent;
};
-enum mlx5_ib_qp_flags {
- MLX5_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
- MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
- MLX5_IB_QP_CROSS_CHANNEL = IB_QP_CREATE_CROSS_CHANNEL,
- MLX5_IB_QP_MANAGED_SEND = IB_QP_CREATE_MANAGED_SEND,
- MLX5_IB_QP_MANAGED_RECV = IB_QP_CREATE_MANAGED_RECV,
- MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 5,
- /* QP uses 1 as its source QP number */
- MLX5_IB_QP_SQPN_QP1 = 1 << 6,
- MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
- MLX5_IB_QP_RSS = 1 << 8,
-};
-
-struct mlx5_umr_wr {
- struct ib_send_wr wr;
- union {
- u64 virt_addr;
- u64 offset;
- } target;
- struct ib_pd *pd;
- unsigned int page_shift;
- unsigned int npages;
- u64 length;
- int access_flags;
- u32 mkey;
-};
-
-static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr)
-{
- return container_of(wr, struct mlx5_umr_wr, wr);
-}
-
-struct mlx5_shared_mr_info {
- int mr_id;
- struct ib_umem *umem;
+enum mlx5_ib_cq_pr_flags {
+ MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD = 1 << 0,
+ MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS = 1 << 1,
};
struct mlx5_ib_cq {
@@ -458,6 +585,7 @@ struct mlx5_ib_cq {
struct list_head wc_list;
enum ib_cq_notify_flags notify_flags;
struct work_struct notify_work;
+ u16 private_flags; /* Use mlx5_ib_cq_pr_flags */
};
struct mlx5_ib_wc {
@@ -468,8 +596,9 @@ struct mlx5_ib_wc {
struct mlx5_ib_srq {
struct ib_srq ibsrq;
struct mlx5_core_srq msrq;
- struct mlx5_buf buf;
+ struct mlx5_frag_buf buf;
struct mlx5_db db;
+ struct mlx5_frag_buf_ctrl fbc;
u64 *wrid;
/* protect SRQ hanlding
*/
@@ -494,34 +623,137 @@ enum mlx5_ib_mtt_access_flags {
MLX5_IB_MTT_WRITE = (1 << 1),
};
+struct mlx5_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ u8 mmap_flag;
+ u64 address;
+ u32 page_idx;
+};
+
+enum mlx5_mkey_type {
+ MLX5_MKEY_MR = 1,
+ MLX5_MKEY_MW,
+ MLX5_MKEY_INDIRECT_DEVX,
+ MLX5_MKEY_NULL,
+ MLX5_MKEY_IMPLICIT_CHILD,
+};
+
+/* Used for non-existent ph value */
+#define MLX5_IB_NO_PH 0xff
+
+struct mlx5r_cache_rb_key {
+ u8 ats:1;
+ u8 ph;
+ u16 st_index;
+ unsigned int access_mode;
+ unsigned int access_flags;
+ unsigned int ndescs;
+};
+
+struct mlx5_ib_mkey {
+ u32 key;
+ enum mlx5_mkey_type type;
+ unsigned int ndescs;
+ struct wait_queue_head wait;
+ refcount_t usecount;
+ /* Cacheable user Mkey must hold either a rb_key or a cache_ent. */
+ struct mlx5r_cache_rb_key rb_key;
+ struct mlx5_cache_ent *cache_ent;
+ u8 cacheable : 1;
+};
+
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
+#define MLX5_IB_DM_MEMIC_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\
+ IB_ACCESS_REMOTE_WRITE |\
+ IB_ACCESS_REMOTE_READ |\
+ IB_ACCESS_REMOTE_ATOMIC |\
+ IB_ZERO_BASED)
+
+#define MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE |\
+ IB_ACCESS_REMOTE_WRITE |\
+ IB_ACCESS_REMOTE_READ |\
+ IB_ZERO_BASED)
+
+#define mlx5_update_odp_stats(mr, counter_name, value) \
+ atomic64_add(value, &((mr)->odp_stats.counter_name))
+
+#define mlx5_update_odp_stats_with_handled(mr, counter_name, value) \
+ do { \
+ mlx5_update_odp_stats(mr, counter_name, value); \
+ atomic64_add(1, &((mr)->odp_stats.counter_name##_handled)); \
+ } while (0)
+
struct mlx5_ib_mr {
- struct ib_mr ibmr;
- void *descs;
- dma_addr_t desc_map;
- int ndescs;
- int max_descs;
- int desc_size;
- int access_mode;
- struct mlx5_core_mkey mmkey;
- struct ib_umem *umem;
- struct mlx5_shared_mr_info *smr_info;
- struct list_head list;
- int order;
- int umred;
- int npages;
- struct mlx5_ib_dev *dev;
- u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
- struct mlx5_core_sig_ctx *sig;
- int live;
- void *descs_alloc;
- int access_flags; /* Needed for rereg MR */
+ struct ib_mr ibmr;
+ struct mlx5_ib_mkey mmkey;
+
+ struct ib_umem *umem;
+ /* The mr is data direct related */
+ u8 data_direct :1;
+
+ union {
+ /* Used only by kernel MRs (umem == NULL) */
+ struct {
+ void *descs;
+ void *descs_alloc;
+ dma_addr_t desc_map;
+ int max_descs;
+ int desc_size;
+ int access_mode;
+
+ /* For Kernel IB_MR_TYPE_INTEGRITY */
+ struct mlx5_core_sig_ctx *sig;
+ struct mlx5_ib_mr *pi_mr;
+ struct mlx5_ib_mr *klm_mr;
+ struct mlx5_ib_mr *mtt_mr;
+ u64 data_iova;
+ u64 pi_iova;
+ int meta_ndescs;
+ int meta_length;
+ int data_length;
+ };
+
+ /* Used only by User MRs (umem != NULL) */
+ struct {
+ unsigned int page_shift;
+ /* Current access_flags */
+ int access_flags;
+
+ /* For User ODP */
+ struct mlx5_ib_mr *parent;
+ struct xarray implicit_children;
+ union {
+ struct work_struct work;
+ } odp_destroy;
+ struct ib_odp_counters odp_stats;
+ bool is_odp_implicit;
+ /* The affilated data direct crossed mr */
+ struct mlx5_ib_mr *dd_crossed_mr;
+ struct list_head dd_node;
+ u8 revoked :1;
+ /* Indicates previous dmabuf page fault occurred */
+ u8 dmabuf_faulted:1;
+ struct mlx5_ib_mkey null_mmkey;
+ };
+ };
};
+static inline bool is_odp_mr(struct mlx5_ib_mr *mr)
+{
+ return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem &&
+ mr->umem->is_odp;
+}
+
+static inline bool is_dmabuf_mr(struct mlx5_ib_mr *mr)
+{
+ return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem &&
+ mr->umem->is_dmabuf;
+}
+
struct mlx5_ib_mw {
struct ib_mw ibmw;
- struct mlx5_core_mkey mmkey;
+ struct mlx5_ib_mkey mmkey;
};
struct mlx5_ib_umr_context {
@@ -530,122 +762,473 @@ struct mlx5_ib_umr_context {
struct completion done;
};
+enum {
+ MLX5_UMR_STATE_UNINIT,
+ MLX5_UMR_STATE_ACTIVE,
+ MLX5_UMR_STATE_RECOVER,
+ MLX5_UMR_STATE_ERR,
+};
+
struct umr_common {
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
- /* control access to UMR QP
+ /* Protects from UMR QP overflow
*/
struct semaphore sem;
+ /* Protects from using UMR while the UMR is not active
+ */
+ struct mutex lock;
+ unsigned int state;
+ /* Protects from repeat UMR QP creation */
+ struct mutex init_lock;
};
-enum {
- MLX5_FMR_INVALID,
- MLX5_FMR_VALID,
- MLX5_FMR_BUSY,
+#define NUM_MKEYS_PER_PAGE \
+ ((PAGE_SIZE - sizeof(struct list_head)) / sizeof(u32))
+
+struct mlx5_mkeys_page {
+ u32 mkeys[NUM_MKEYS_PER_PAGE];
+ struct list_head list;
};
+static_assert(sizeof(struct mlx5_mkeys_page) == PAGE_SIZE);
-struct mlx5_cache_ent {
- struct list_head head;
- /* sync access to the cahce entry
- */
- spinlock_t lock;
+struct mlx5_mkeys_queue {
+ struct list_head pages_list;
+ u32 num_pages;
+ unsigned long ci;
+ spinlock_t lock; /* sync list ops */
+};
+struct mlx5_cache_ent {
+ struct mlx5_mkeys_queue mkeys_queue;
+ u32 pending;
- struct dentry *dir;
char name[4];
- u32 order;
- u32 size;
- u32 cur;
- u32 miss;
- u32 limit;
- struct dentry *fsize;
- struct dentry *fcur;
- struct dentry *fmiss;
- struct dentry *flimit;
+ struct rb_node node;
+ struct mlx5r_cache_rb_key rb_key;
+
+ u8 is_tmp:1;
+ u8 disabled:1;
+ u8 fill_to_high_water:1;
+ u8 tmp_cleanup_scheduled:1;
+
+ /*
+ * - limit is the low water mark for stored mkeys, 2* limit is the
+ * upper water mark.
+ */
+ u32 in_use;
+ u32 limit;
+
+ /* Statistics */
+ u32 miss;
struct mlx5_ib_dev *dev;
- struct work_struct work;
struct delayed_work dwork;
- int pending;
};
-struct mlx5_mr_cache {
+struct mlx5r_async_create_mkey {
+ union {
+ u32 in[MLX5_ST_SZ_BYTES(create_mkey_in)];
+ u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
+ };
+ struct mlx5_async_work cb_work;
+ struct mlx5_cache_ent *ent;
+ u32 mkey;
+};
+
+struct mlx5_mkey_cache {
struct workqueue_struct *wq;
- struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES];
- int stopped;
- struct dentry *root;
+ struct rb_root rb_root;
+ struct mutex rb_lock;
+ struct dentry *fs_root;
unsigned long last_add;
};
-struct mlx5_ib_gsi_qp;
-
struct mlx5_ib_port_resources {
- struct mlx5_ib_resources *devr;
struct mlx5_ib_gsi_qp *gsi;
struct work_struct pkey_change_work;
};
+struct mlx5_data_direct_resources {
+ u32 pdn;
+ u32 mkey;
+ u32 mkey_ro;
+ u8 mkey_ro_valid :1;
+};
+
struct mlx5_ib_resources {
struct ib_cq *c0;
- struct ib_xrcd *x0;
- struct ib_xrcd *x1;
+ struct mutex cq_lock;
+ u32 xrcdn0;
+ u32 xrcdn1;
struct ib_pd *p0;
struct ib_srq *s0;
struct ib_srq *s1;
+ struct mutex srq_lock;
struct mlx5_ib_port_resources ports[2];
- /* Protects changes to the port resources */
- struct mutex mutex;
};
-struct mlx5_ib_port {
- u16 q_cnt_id;
+#define MAX_OPFC_RULES 2
+
+struct mlx5_ib_op_fc {
+ struct mlx5_fc *fc;
+ struct mlx5_flow_handle *rule[MAX_OPFC_RULES];
+};
+
+struct mlx5_ib_counters {
+ struct rdma_stat_desc *descs;
+ size_t *offsets;
+ u32 num_q_counters;
+ u32 num_cong_counters;
+ u32 num_ext_ppcnt_counters;
+ u32 num_op_counters;
+ u16 set_id;
+ struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX];
+};
+
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type);
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type);
+
+int mlx5r_fs_bind_op_fc(struct ib_qp *qp,
+ struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX],
+ struct xarray *qpn_opfc_xa, u32 port);
+
+void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct xarray *qpn_opfc_xa);
+
+void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev,
+ struct mlx5_fc *fc_arr[MLX5_IB_OPCOUNTER_MAX]);
+
+struct mlx5_ib_multiport_info;
+
+struct mlx5_ib_multiport {
+ struct mlx5_ib_multiport_info *mpi;
+ /* To be held when accessing the multiport info */
+ spinlock_t mpi_lock;
};
struct mlx5_roce {
/* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
* netdev pointer
*/
- rwlock_t netdev_lock;
- struct net_device *netdev;
struct notifier_block nb;
- atomic_t next_port;
+ struct netdev_net_notifier nn;
+ struct notifier_block mdev_nb;
+ struct net_device *tracking_netdev;
+ atomic_t tx_port_affinity;
+ enum ib_port_state last_port_state;
+ struct mlx5_ib_dev *dev;
+ u32 native_port_num;
+};
+
+struct mlx5_ib_port {
+ struct mlx5_ib_counters cnts;
+ struct mlx5_ib_multiport mp;
+ struct mlx5_ib_dbg_cc_params *dbg_cc_params;
+ struct mlx5_roce roce;
+ struct mlx5_eswitch_rep *rep;
+#ifdef CONFIG_MLX5_MACSEC
+ struct mlx5_reserved_gids *reserved_gids;
+#endif
+};
+
+struct mlx5_ib_dbg_param {
+ int offset;
+ struct mlx5_ib_dev *dev;
+ struct dentry *dentry;
+ u32 port_num;
+};
+
+enum mlx5_ib_dbg_cc_types {
+ MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE,
+ MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI,
+ MLX5_IB_DBG_CC_RP_TIME_RESET,
+ MLX5_IB_DBG_CC_RP_BYTE_RESET,
+ MLX5_IB_DBG_CC_RP_THRESHOLD,
+ MLX5_IB_DBG_CC_RP_AI_RATE,
+ MLX5_IB_DBG_CC_RP_MAX_RATE,
+ MLX5_IB_DBG_CC_RP_HAI_RATE,
+ MLX5_IB_DBG_CC_RP_MIN_DEC_FAC,
+ MLX5_IB_DBG_CC_RP_MIN_RATE,
+ MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP,
+ MLX5_IB_DBG_CC_RP_DCE_TCP_G,
+ MLX5_IB_DBG_CC_RP_DCE_TCP_RTT,
+ MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD,
+ MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE,
+ MLX5_IB_DBG_CC_RP_GD,
+ MLX5_IB_DBG_CC_NP_MIN_TIME_BETWEEN_CNPS,
+ MLX5_IB_DBG_CC_NP_CNP_DSCP,
+ MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE,
+ MLX5_IB_DBG_CC_NP_CNP_PRIO,
+ MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID,
+ MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP,
+ MLX5_IB_DBG_CC_MAX,
+};
+
+struct mlx5_ib_dbg_cc_params {
+ struct dentry *root;
+ struct mlx5_ib_dbg_param params[MLX5_IB_DBG_CC_MAX];
+};
+
+enum {
+ MLX5_MAX_DELAY_DROP_TIMEOUT_MS = 100,
+};
+
+struct mlx5_ib_delay_drop {
+ struct mlx5_ib_dev *dev;
+ struct work_struct delay_drop_work;
+ /* serialize setting of delay drop */
+ struct mutex lock;
+ u32 timeout;
+ bool activate;
+ atomic_t events_cnt;
+ atomic_t rqs_cnt;
+ struct dentry *dir_debugfs;
+};
+
+enum mlx5_ib_stages {
+ MLX5_IB_STAGE_INIT,
+ MLX5_IB_STAGE_FS,
+ MLX5_IB_STAGE_CAPS,
+ MLX5_IB_STAGE_NON_DEFAULT_CB,
+ MLX5_IB_STAGE_ROCE,
+ MLX5_IB_STAGE_QP,
+ MLX5_IB_STAGE_SRQ,
+ MLX5_IB_STAGE_DEVICE_RESOURCES,
+ MLX5_IB_STAGE_ODP,
+ MLX5_IB_STAGE_COUNTERS,
+ MLX5_IB_STAGE_CONG_DEBUGFS,
+ MLX5_IB_STAGE_BFREG,
+ MLX5_IB_STAGE_PRE_IB_REG_UMR,
+ MLX5_IB_STAGE_WHITELIST_UID,
+ MLX5_IB_STAGE_IB_REG,
+ MLX5_IB_STAGE_DEVICE_NOTIFIER,
+ MLX5_IB_STAGE_POST_IB_REG_UMR,
+ MLX5_IB_STAGE_DELAY_DROP,
+ MLX5_IB_STAGE_RESTRACK,
+ MLX5_IB_STAGE_MAX,
+};
+
+struct mlx5_ib_stage {
+ int (*init)(struct mlx5_ib_dev *dev);
+ void (*cleanup)(struct mlx5_ib_dev *dev);
+};
+
+#define STAGE_CREATE(_stage, _init, _cleanup) \
+ .stage[_stage] = {.init = _init, .cleanup = _cleanup}
+
+struct mlx5_ib_profile {
+ struct mlx5_ib_stage stage[MLX5_IB_STAGE_MAX];
+};
+
+struct mlx5_ib_multiport_info {
+ struct list_head list;
+ struct mlx5_ib_dev *ibdev;
+ struct mlx5_core_dev *mdev;
+ struct notifier_block mdev_events;
+ struct completion unref_comp;
+ u64 sys_image_guid;
+ u32 mdev_refcnt;
+ bool is_master;
+ bool unaffiliate;
+};
+
+struct mlx5_ib_flow_action {
+ struct ib_flow_action ib_action;
+ union {
+ struct {
+ u64 ib_flags;
+ struct mlx5_accel_esp_xfrm *ctx;
+ } esp_aes_gcm;
+ struct {
+ struct mlx5_ib_dev *dev;
+ u32 sub_type;
+ union {
+ struct mlx5_modify_hdr *modify_hdr;
+ struct mlx5_pkt_reformat *pkt_reformat;
+ };
+ } flow_action_raw;
+ };
+};
+
+struct mlx5_dm {
+ struct mlx5_core_dev *dev;
+ /* This lock is used to protect the access to the shared
+ * allocation map when concurrent requests by different
+ * processes are handled.
+ */
+ spinlock_t lock;
+ DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
+};
+
+struct mlx5_read_counters_attr {
+ struct mlx5_fc *hw_cntrs_hndl;
+ u64 *out;
+ u32 flags;
+};
+
+enum mlx5_ib_counters_type {
+ MLX5_IB_COUNTERS_FLOW,
+};
+
+struct mlx5_ib_mcounters {
+ struct ib_counters ibcntrs;
+ enum mlx5_ib_counters_type type;
+ /* number of counters supported for this counters type */
+ u32 counters_num;
+ struct mlx5_fc *hw_cntrs_hndl;
+ /* read function for this counters type */
+ int (*read_counters)(struct ib_device *ibdev,
+ struct mlx5_read_counters_attr *read_attr);
+ /* max index set as part of create_flow */
+ u32 cntrs_max_index;
+ /* number of counters data entries (<description,index> pair) */
+ u32 ncounters;
+ /* counters data array for descriptions and indexes */
+ struct mlx5_ib_flow_counters_desc *counters_data;
+ /* protects access to mcounters internal data */
+ struct mutex mcntrs_mutex;
+};
+
+static inline struct mlx5_ib_mcounters *
+to_mcounters(struct ib_counters *ibcntrs)
+{
+ return container_of(ibcntrs, struct mlx5_ib_mcounters, ibcntrs);
+}
+
+int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
+ bool is_egress,
+ struct mlx5_flow_act *action);
+struct mlx5_ib_lb_state {
+ /* protect the user_td */
+ struct mutex mutex;
+ u32 user_td;
+ int qps;
+ bool enabled;
+ bool force_enable;
+};
+
+struct mlx5_ib_pf_eq {
+ struct notifier_block irq_nb;
+ struct mlx5_ib_dev *dev;
+ struct mlx5_eq *core;
+ struct work_struct work;
+ spinlock_t lock; /* Pagefaults spinlock */
+ struct workqueue_struct *wq;
+ mempool_t *pool;
+};
+
+struct mlx5_devx_event_table {
+ struct mlx5_nb devx_nb;
+ /* serialize updating the event_xa */
+ struct mutex event_xa_lock;
+ struct xarray event_xa;
+};
+
+struct mlx5_var_table {
+ /* serialize updating the bitmap */
+ struct mutex bitmap_lock;
+ unsigned long *bitmap;
+ u64 hw_start_addr;
+ u32 stride_size;
+ u64 num_var_hw_entries;
+};
+
+struct mlx5_port_caps {
+ bool has_smi;
+ u8 ext_port_cap;
+};
+
+
+struct mlx5_special_mkeys {
+ u32 dump_fill_mkey;
+ __be32 null_mkey;
+ __be32 terminate_scatter_list_mkey;
+};
+
+struct mlx5_macsec {
+ struct mutex lock; /* Protects mlx5_macsec internal contexts */
+ struct list_head macsec_devices_list;
+ struct notifier_block blocking_events_nb;
};
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
- struct mlx5_roce roce;
- MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
+ struct mlx5_data_direct_dev *data_direct_dev;
+ /* protect accessing data_direct_dev */
+ struct mutex data_direct_lock;
+ struct notifier_block mdev_events;
+ struct notifier_block lag_events;
int num_ports;
/* serialize update of capability mask
*/
struct mutex cap_mask_mutex;
- bool ib_active;
+ u8 ib_active:1;
+ u8 is_rep:1;
+ u8 lag_active:1;
+ u8 fill_delay;
struct umr_common umrc;
/* sync used page count stats
*/
struct mlx5_ib_resources devr;
- struct mlx5_mr_cache cache;
+
+ atomic_t mkey_var;
+ struct mlx5_mkey_cache cache;
struct timer_list delay_timer;
/* Prevents soft lock on massive reg MRs */
struct mutex slow_path_mutex;
- int fill_delay;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct ib_odp_caps odp_caps;
- /*
- * Sleepable RCU that prevents destruction of MRs while they are still
- * being used by a page fault handler.
- */
- struct srcu_struct mr_srcu;
-#endif
- struct mlx5_ib_flow_db flow_db;
+ u64 odp_max_size;
+ struct mutex odp_eq_mutex;
+ struct mlx5_ib_pf_eq odp_pf_eq;
+
+ struct xarray odp_mkeys;
+
+ struct mlx5_ib_flow_db *flow_db;
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
+ struct list_head data_direct_mr_list;
/* Array with num_ports elements */
struct mlx5_ib_port *port;
+ struct mlx5_sq_bfreg bfreg;
+ struct mlx5_sq_bfreg fp_bfreg;
+ struct mlx5_ib_delay_drop delay_drop;
+ const struct mlx5_ib_profile *profile;
+
+ struct mlx5_ib_lb_state lb;
+ u8 umr_fence;
+ struct list_head ib_dev_list;
+ u64 sys_image_guid;
+ struct mlx5_dm dm;
+ u16 devx_whitelist_uid;
+ struct mlx5_srq_table srq_table;
+ struct mlx5_qp_table qp_table;
+ struct mlx5_async_ctx async_ctx;
+ struct mlx5_devx_event_table devx_event_table;
+ struct mlx5_var_table var_table;
+
+ struct xarray sig_mrs;
+ struct mlx5_port_caps port_caps[MLX5_MAX_PORTS];
+ u16 pkey_table_len;
+ u8 lag_ports;
+ struct mlx5_special_mkeys mkeys;
+ struct mlx5_data_direct_resources ddr;
+
+#ifdef CONFIG_MLX5_MACSEC
+ struct mlx5_macsec macsec;
+#endif
+
+ u8 num_plane;
+ struct mlx5_ib_dev *smi_dev;
+ const char *sub_dev_name;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -663,6 +1246,19 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev)
return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
}
+static inline struct mlx5_ib_dev *mr_to_mdev(struct mlx5_ib_mr *mr)
+{
+ return to_mdev(mr->ibmr.device);
+}
+
+static inline struct mlx5_ib_dev *mlx5_udata_to_mdev(struct ib_udata *udata)
+{
+ struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
+
+ return to_mdev(context->ibucontext.device);
+}
+
static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
{
return container_of(ibcq, struct mlx5_ib_cq, ibcq);
@@ -678,11 +1274,6 @@ static inline struct mlx5_ib_rwq *to_mibrwq(struct mlx5_core_qp *core_qp)
return container_of(core_qp, struct mlx5_ib_rwq, core_qp);
}
-static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey)
-{
- return container_of(mmkey, struct mlx5_ib_mr, mmkey);
-}
-
static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
{
return container_of(ibpd, struct mlx5_ib_pd, ibpd);
@@ -723,94 +1314,111 @@ static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw)
return container_of(ibmw, struct mlx5_ib_mw, ibmw);
}
-struct mlx5_ib_ah {
- struct ib_ah ibah;
- struct mlx5_av av;
-};
+static inline struct mlx5_ib_flow_action *
+to_mflow_act(struct ib_flow_action *ibact)
+{
+ return container_of(ibact, struct mlx5_ib_flow_action, ib_action);
+}
-static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah)
+static inline struct mlx5_user_mmap_entry *
+to_mmmap(struct rdma_user_mmap_entry *rdma_entry)
{
- return container_of(ibah, struct mlx5_ib_ah, ibah);
+ return container_of(rdma_entry,
+ struct mlx5_user_mmap_entry, rdma_entry);
}
+int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev);
int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
struct mlx5_db *db);
void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
-int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
- u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const void *in_mad, void *response_mad);
-struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct ib_udata *udata);
-int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
-int mlx5_ib_destroy_ah(struct ib_ah *ah);
-struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata);
+int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
+static inline int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+ return 0;
+}
+int mlx5_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata);
int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
-int mlx5_ib_destroy_srq(struct ib_srq *srq);
-int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr);
-struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata);
+int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata);
+int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
+void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp);
+int mlx5_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr);
-int mlx5_ib_destroy_qp(struct ib_qp *qp);
-int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr);
-int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr);
-void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
-int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
- void *buffer, u32 length,
- struct mlx5_ib_qp_base *base);
-struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata);
-int mlx5_ib_destroy_cq(struct ib_cq *cq);
+int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata);
+void mlx5_ib_drain_sq(struct ib_qp *qp);
+void mlx5_ib_drain_rq(struct ib_qp *qp);
+int mlx5_ib_read_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc);
+int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc);
+int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc);
+int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int mlx5_ib_pre_destroy_cq(struct ib_cq *cq);
+void mlx5_ib_post_destroy_cq(struct ib_cq *cq);
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
+ struct ib_dmah *dmah,
struct ib_udata *udata);
-struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata);
+struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs);
+int mlx5_ib_advise_mr(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 flags,
+ struct ib_sge *sg_list,
+ u32 num_sge,
+ struct uverbs_attr_bundle *attrs);
+int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
-int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
- int npages, int zap);
-int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
- u64 length, u64 virt_addr, int access_flags,
- struct ib_pd *pd, struct ib_udata *udata);
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
-struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
+struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+ int access_flags);
+void mlx5_ib_free_odp_mr(struct mlx5_ib_mr *mr);
+struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
+ u64 length, u64 virt_addr, int access_flags,
+ struct ib_pd *pd, struct ib_udata *udata);
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
+struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
+struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_sg,
+ u32 max_num_meta_sg);
int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
-int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset);
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index);
-struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata);
-int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd);
-int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
-int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
-int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
- struct ib_smp *out_mad);
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
+int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata);
+int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port);
int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid);
int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev,
@@ -819,136 +1427,172 @@ int mlx5_query_mad_ifc_vendor_id(struct ib_device *ibdev,
u32 *vendor_id);
int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc);
int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid);
-int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u8 port, u16 index,
+int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey);
-int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u8 port, int index,
+int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid);
-int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
+int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props);
-int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
+int mlx5_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props);
-int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
-void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
-void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
- unsigned long max_page_shift,
- int *count, int *shift,
- int *ncont, int *order);
-void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, size_t offset, size_t num_pages,
- __be64 *pas, int access_flags);
-void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int page_shift, __be64 *pas, int access_flags);
-void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
-int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
-int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
-int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
-int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
+void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
+ u64 access_flags);
+int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
+int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
+void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
+struct mlx5_cache_ent *
+mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
+ struct mlx5r_cache_rb_key rb_key,
+ bool persistent_entry);
+
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
+ int access_flags, int access_mode,
+ int ndescs);
+
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
-int mlx5_ib_destroy_wq(struct ib_wq *wq);
+int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata);
int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
u32 wq_attr_mask, struct ib_udata *udata);
-struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata);
+int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
+struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
+ struct ib_dm_mr_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+void mlx5_ib_data_direct_bind(struct mlx5_ib_dev *ibdev,
+ struct mlx5_data_direct_dev *dev);
+void mlx5_ib_data_direct_unbind(struct mlx5_ib_dev *ibdev);
+void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-extern struct workqueue_struct *mlx5_ib_page_fault_wq;
-
-void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
-void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault);
-void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp);
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
-void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
+int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq);
+void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
-void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
-void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
-void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
- unsigned long end);
+int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev);
+int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags);
+
+int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 flags, struct ib_sge *sg_list, u32 num_sge);
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr);
+int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
-static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
+static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
+static inline int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_pf_eq *eq)
{
- return;
+ return 0;
}
-
-static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {}
-static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
-static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
+static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
-static inline void mlx5_ib_odp_cleanup(void) {}
-static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {}
-static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {}
+static inline void mlx5_ib_odp_cleanup(void) {}
+static inline int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
+{
+ return 0;
+}
+static inline int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags)
+{
+ return -EOPNOTSUPP;
+}
+static inline int
+mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice, u32 flags,
+ struct ib_sge *sg_list, u32 num_sge)
+{
+ return -EOPNOTSUPP;
+}
+static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
+{
+ return -EOPNOTSUPP;
+}
+static inline int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+extern const struct mmu_interval_notifier_ops mlx5_mn_ops;
+
+/* Needed for rep profile */
+void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile,
+ int stage);
+int __mlx5_ib_add(struct mlx5_ib_dev *dev,
+ const struct mlx5_ib_profile *profile);
+
int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
- u8 port, struct ifla_vf_info *info);
+ u32 port, struct ifla_vf_info *info);
int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
- u8 port, int state);
+ u32 port, int state);
int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
- u8 port, struct ifla_vf_stats *stats);
-int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+ u32 port, struct ifla_vf_stats *stats);
+int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u32 port,
+ struct ifla_vf_guid *node_guid,
+ struct ifla_vf_guid *port_guid);
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u32 port,
u64 guid, int type);
-__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
- int index);
+__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev,
+ const struct ib_gid_attr *attr);
+
+void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num);
+void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num);
/* GSI QP helper functions */
-struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr);
-int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp);
+int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp,
+ struct ib_qp_init_attr *attr);
+int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp);
int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
int attr_mask);
int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr);
-int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr);
-int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr);
+int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr);
+int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi);
int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc);
-static inline void init_query_mad(struct ib_smp *mad)
-{
- mad->base_version = 1;
- mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
- mad->class_version = 1;
- mad->method = IB_MGMT_METHOD_GET;
-}
-
-static inline u8 convert_access(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
- (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
- (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
- MLX5_PERM_LOCAL_READ;
-}
+void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi,
+ int bfregn);
+struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi);
+struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev,
+ u32 ib_port_num,
+ u32 *native_port_num);
+void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
+ u32 port_num);
+
+extern const struct uapi_definition mlx5_ib_devx_defs[];
+extern const struct uapi_definition mlx5_ib_flow_defs[];
+extern const struct uapi_definition mlx5_ib_qos_defs[];
+extern const struct uapi_definition mlx5_ib_std_types_defs[];
+extern const struct uapi_definition mlx5_ib_create_cq_defs[];
static inline int is_qp1(enum ib_qp_type qp_type)
{
- return qp_type == MLX5_IB_QPT_HW_GSI;
+ return qp_type == MLX5_IB_QPT_HW_GSI || qp_type == IB_QPT_GSI;
}
-#define MLX5_MAX_UMR_SHIFT 16
-#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
-
static inline u32 check_cq_create_flags(u32 flags)
{
/*
* It returns non-zero value for unsupported CQ
* create flags, otherwise it returns zero.
*/
- return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
- IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
+ return (flags & ~(IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN |
+ IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION));
}
static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx,
@@ -973,12 +1617,11 @@ static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
{
u8 cqe_version = ucontext->cqe_version;
- if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
- !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+ if ((offsetofend(typeof(*ucmd), uidx) <= inlen) && !cqe_version &&
+ (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
return 0;
- if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
- !!cqe_version))
+ if ((offsetofend(typeof(*ucmd), uidx) <= inlen) != !!cqe_version)
return -EINVAL;
return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
@@ -991,14 +1634,192 @@ static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
{
u8 cqe_version = ucontext->cqe_version;
- if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
- !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+ if ((offsetofend(typeof(*ucmd), uidx) <= inlen) && !cqe_version &&
+ (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
return 0;
- if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
- !!cqe_version))
+ if ((offsetofend(typeof(*ucmd), uidx) <= inlen) != !!cqe_version)
return -EINVAL;
return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
}
+
+static inline int get_uars_per_sys_page(struct mlx5_ib_dev *dev, bool lib_support)
+{
+ return lib_support && MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_UARS_IN_PAGE : 1;
+}
+
+extern void *xlt_emergency_page;
+
+int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi, u32 bfregn,
+ bool dyn_bfreg);
+
+static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_mkey *mmkey)
+{
+ refcount_set(&mmkey->usecount, 1);
+
+ return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mmkey->key),
+ mmkey, GFP_KERNEL));
+}
+
+/* deref an mkey that can participate in ODP flow */
+static inline void mlx5r_deref_odp_mkey(struct mlx5_ib_mkey *mmkey)
+{
+ if (refcount_dec_and_test(&mmkey->usecount))
+ wake_up(&mmkey->wait);
+}
+
+/* deref an mkey that can participate in ODP flow and wait for relese */
+static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_ib_mkey *mmkey)
+{
+ mlx5r_deref_odp_mkey(mmkey);
+ wait_event(mmkey->wait, refcount_read(&mmkey->usecount) == 0);
+}
+
+static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev)
+{
+ /*
+ * If the driver is in hash mode and the port_select_flow_table_bypass cap
+ * is supported, it means that the driver no longer needs to assign the port
+ * affinity by default. If a user wants to set the port affinity explicitly,
+ * the user has a dedicated API to do that, so there is no need to assign
+ * the port affinity by default.
+ */
+ if (dev->lag_active &&
+ mlx5_lag_mode_is_hash(dev->mdev) &&
+ MLX5_CAP_PORT_SELECTION(dev->mdev, port_select_flow_table_bypass))
+ return 0;
+
+ if (mlx5_lag_is_lacp_owner(dev->mdev) && !dev->lag_active)
+ return 0;
+
+ return dev->lag_active ||
+ (MLX5_CAP_GEN(dev->mdev, num_lag_ports) > 1 &&
+ MLX5_CAP_GEN(dev->mdev, lag_tx_port_affinity));
+}
+
+static inline bool rt_supported(int ts_cap)
+{
+ return ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME ||
+ ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
+}
+
+/*
+ * PCI Peer to Peer is a trainwreck. If no switch is present then things
+ * sometimes work, depending on the pci_distance_p2p logic for excluding broken
+ * root complexes. However if a switch is present in the path, then things get
+ * really ugly depending on how the switch is setup. This table assumes that the
+ * root complex is strict and is validating that all req/reps are matches
+ * perfectly - so any scenario where it sees only half the transaction is a
+ * failure.
+ *
+ * CR/RR/DT ATS RO P2P
+ * 00X X X OK
+ * 010 X X fails (request is routed to root but root never sees comp)
+ * 011 0 X fails (request is routed to root but root never sees comp)
+ * 011 1 X OK
+ * 10X X 1 OK
+ * 101 X 0 fails (completion is routed to root but root didn't see req)
+ * 110 X 0 SLOW
+ * 111 0 0 SLOW
+ * 111 1 0 fails (completion is routed to root but root didn't see req)
+ * 111 1 1 OK
+ *
+ * Unfortunately we cannot reliably know if a switch is present or what the
+ * CR/RR/DT ACS settings are, as in a VM that is all hidden. Assume that
+ * CR/RR/DT is 111 if the ATS cap is enabled and follow the last three rows.
+ *
+ * For now assume if the umem is a dma_buf then it is P2P.
+ */
+static inline bool mlx5_umem_needs_ats(struct mlx5_ib_dev *dev,
+ struct ib_umem *umem, int access_flags)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, ats) || !umem->is_dmabuf)
+ return false;
+ return access_flags & IB_ACCESS_RELAXED_ORDERING;
+}
+
+int set_roce_addr(struct mlx5_ib_dev *dev, u32 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr);
+
+static inline u32 smi_to_native_portnum(struct mlx5_ib_dev *dev, u32 port)
+{
+ return (port - 1) / dev->num_ports + 1;
+}
+
+static inline unsigned int get_max_log_entity_size_cap(struct mlx5_ib_dev *dev,
+ int access_mode)
+{
+ int max_log_size = 0;
+
+ if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
+ max_log_size =
+ MLX5_CAP_GEN_2(dev->mdev, max_mkey_log_entity_size_mtt);
+ else if (access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+ max_log_size = MLX5_CAP_GEN_2(
+ dev->mdev, max_mkey_log_entity_size_fixed_buffer);
+
+ if (!max_log_size ||
+ (max_log_size > 31 &&
+ !MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5)))
+ max_log_size = 31;
+
+ return max_log_size;
+}
+
+static inline unsigned int get_min_log_entity_size_cap(struct mlx5_ib_dev *dev,
+ int access_mode)
+{
+ int min_log_size = 0;
+
+ if (access_mode == MLX5_MKC_ACCESS_MODE_KSM &&
+ MLX5_CAP_GEN_2(dev->mdev,
+ min_mkey_log_entity_size_fixed_buffer_valid))
+ min_log_size = MLX5_CAP_GEN_2(
+ dev->mdev, min_mkey_log_entity_size_fixed_buffer);
+ else
+ min_log_size =
+ MLX5_CAP_GEN_2(dev->mdev, log_min_mkey_entity_size);
+
+ min_log_size = max(min_log_size, MLX5_ADAPTER_PAGE_SHIFT);
+ return min_log_size;
+}
+
+/*
+ * For mkc users, instead of a page_offset the command has a start_iova which
+ * specifies both the page_offset and the on-the-wire IOVA
+ */
+static __always_inline unsigned long
+mlx5_umem_mkc_find_best_pgsz(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+ u64 iova, int access_mode)
+{
+ unsigned int max_log_entity_size_cap, min_log_entity_size_cap;
+ unsigned long bitmap;
+
+ max_log_entity_size_cap = get_max_log_entity_size_cap(dev, access_mode);
+ min_log_entity_size_cap = get_min_log_entity_size_cap(dev, access_mode);
+
+ bitmap = GENMASK_ULL(max_log_entity_size_cap, min_log_entity_size_cap);
+
+ /* In KSM mode HW requires IOVA and mkey's page size to be aligned */
+ if (access_mode == MLX5_MKC_ACCESS_MODE_KSM && iova)
+ bitmap &= GENMASK_ULL(__ffs64(iova), 0);
+
+ return ib_umem_find_best_pgsz(umem, bitmap, iova);
+}
+
+static inline unsigned long
+mlx5_umem_dmabuf_find_best_pgsz(struct ib_umem_dmabuf *umem_dmabuf,
+ int access_mode)
+{
+ return mlx5_umem_mkc_find_best_pgsz(to_mdev(umem_dmabuf->umem.ibdev),
+ &umem_dmabuf->umem,
+ umem_dmabuf->umem.iova,
+ access_mode);
+}
+
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 8f608debe141..325fa04cbe8a 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2020, Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -36,210 +37,346 @@
#include <linux/debugfs.h>
#include <linux/export.h>
#include <linux/delay.h>
-#include <rdma/ib_umem.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
#include <rdma/ib_umem_odp.h>
-#include <rdma/ib_verbs.h>
+#include "dm.h"
#include "mlx5_ib.h"
+#include "umr.h"
+#include "data_direct.h"
+#include "dmah.h"
enum {
MAX_PENDING_REG_MR = 8,
};
+#define MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS 4
#define MLX5_UMR_ALIGN 2048
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-static __be64 mlx5_ib_update_mtt_emergency_buffer[
- MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
- __aligned(MLX5_UMR_ALIGN);
-static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
-#endif
-static int clean_mr(struct mlx5_ib_mr *mr);
+static void
+create_mkey_callback(int status, struct mlx5_async_work *context);
+static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags,
+ unsigned long page_size, bool populate,
+ int access_mode, u16 st_index, u8 ph);
+static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr);
+
+static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
+ struct ib_pd *pd)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
-static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+ MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
+ MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
+ MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
+ MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
+ MLX5_SET(mkc, mkc, lr, 1);
+
+ if (acc & IB_ACCESS_RELAXED_ORDERING) {
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write))
+ MLX5_SET(mkc, mkc, relaxed_ordering_write, 1);
+
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) ||
+ (MLX5_CAP_GEN(dev->mdev,
+ relaxed_ordering_read_pci_enabled) &&
+ pcie_relaxed_ordering_enabled(dev->mdev->pdev)))
+ MLX5_SET(mkc, mkc, relaxed_ordering_read, 1);
+ }
+
+ MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ MLX5_SET64(mkc, mkc, start_addr, start_addr);
+}
+
+static void assign_mkey_variant(struct mlx5_ib_dev *dev, u32 *mkey, u32 *in)
{
- int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
+ u8 key = atomic_inc_return(&dev->mkey_var);
+ void *mkc;
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, mkey_7_0, key);
+ *mkey = key;
+}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- /* Wait until all page fault handlers using the mr complete. */
- synchronize_srcu(&dev->mr_srcu);
-#endif
+static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_mkey *mkey, u32 *in, int inlen)
+{
+ int ret;
- return err;
+ assign_mkey_variant(dev, &mkey->key, in);
+ ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen);
+ if (!ret)
+ init_waitqueue_head(&mkey->wait);
+
+ return ret;
}
-static int order2idx(struct mlx5_ib_dev *dev, int order)
+static int mlx5_ib_create_mkey_cb(struct mlx5r_async_create_mkey *async_create)
{
- struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_ib_dev *dev = async_create->ent->dev;
+ size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ size_t outlen = MLX5_ST_SZ_BYTES(create_mkey_out);
+
+ MLX5_SET(create_mkey_in, async_create->in, opcode,
+ MLX5_CMD_OP_CREATE_MKEY);
+ assign_mkey_variant(dev, &async_create->mkey, async_create->in);
+ return mlx5_cmd_exec_cb(&dev->async_ctx, async_create->in, inlen,
+ async_create->out, outlen, create_mkey_callback,
+ &async_create->cb_work);
+}
- if (order < cache->ent[0].order)
- return 0;
- else
- return order - cache->ent[0].order;
+static int mkey_cache_max_order(struct mlx5_ib_dev *dev);
+static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
+
+static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+{
+ WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
+
+ return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key);
}
-static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
+static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out)
{
- return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
- length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
+ if (status == -ENXIO) /* core driver is not available */
+ return;
+
+ mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
+ if (status != -EREMOTEIO) /* driver specific failure */
+ return;
+
+ /* Failed in FW, print cmd out failure details */
+ mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out);
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-static void update_odp_mr(struct mlx5_ib_mr *mr)
+static int push_mkey_locked(struct mlx5_cache_ent *ent, u32 mkey)
{
- if (mr->umem->odp_data) {
- /*
- * This barrier prevents the compiler from moving the
- * setting of umem->odp_data->private to point to our
- * MR, before reg_umr finished, to ensure that the MR
- * initialization have finished before starting to
- * handle invalidations.
- */
- smp_wmb();
- mr->umem->odp_data->private = mr;
- /*
- * Make sure we will see the new
- * umem->odp_data->private value in the invalidation
- * routines, before we can get page faults on the
- * MR. Page faults can happen once we put the MR in
- * the tree, below this line. Without the barrier,
- * there can be a fault handling and an invalidation
- * before umem->odp_data->private == mr is visible to
- * the invalidation handler.
- */
- smp_wmb();
+ unsigned long tmp = ent->mkeys_queue.ci % NUM_MKEYS_PER_PAGE;
+ struct mlx5_mkeys_page *page;
+
+ lockdep_assert_held(&ent->mkeys_queue.lock);
+ if (ent->mkeys_queue.ci >=
+ ent->mkeys_queue.num_pages * NUM_MKEYS_PER_PAGE) {
+ page = kzalloc(sizeof(*page), GFP_ATOMIC);
+ if (!page)
+ return -ENOMEM;
+ ent->mkeys_queue.num_pages++;
+ list_add_tail(&page->list, &ent->mkeys_queue.pages_list);
+ } else {
+ page = list_last_entry(&ent->mkeys_queue.pages_list,
+ struct mlx5_mkeys_page, list);
}
+
+ page->mkeys[tmp] = mkey;
+ ent->mkeys_queue.ci++;
+ return 0;
}
-#endif
-static void reg_mr_callback(int status, void *context)
+static int pop_mkey_locked(struct mlx5_cache_ent *ent)
{
- struct mlx5_ib_mr *mr = context;
- struct mlx5_ib_dev *dev = mr->dev;
- struct mlx5_mr_cache *cache = &dev->cache;
- int c = order2idx(dev, mr->order);
- struct mlx5_cache_ent *ent = &cache->ent[c];
- u8 key;
+ unsigned long tmp = (ent->mkeys_queue.ci - 1) % NUM_MKEYS_PER_PAGE;
+ struct mlx5_mkeys_page *last_page;
+ u32 mkey;
+
+ lockdep_assert_held(&ent->mkeys_queue.lock);
+ last_page = list_last_entry(&ent->mkeys_queue.pages_list,
+ struct mlx5_mkeys_page, list);
+ mkey = last_page->mkeys[tmp];
+ last_page->mkeys[tmp] = 0;
+ ent->mkeys_queue.ci--;
+ if (ent->mkeys_queue.num_pages > 1 && !tmp) {
+ list_del(&last_page->list);
+ ent->mkeys_queue.num_pages--;
+ kfree(last_page);
+ }
+ return mkey;
+}
+
+static void create_mkey_callback(int status, struct mlx5_async_work *context)
+{
+ struct mlx5r_async_create_mkey *mkey_out =
+ container_of(context, struct mlx5r_async_create_mkey, cb_work);
+ struct mlx5_cache_ent *ent = mkey_out->ent;
+ struct mlx5_ib_dev *dev = ent->dev;
unsigned long flags;
- struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
- int err;
- spin_lock_irqsave(&ent->lock, flags);
- ent->pending--;
- spin_unlock_irqrestore(&ent->lock, flags);
if (status) {
- mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
- kfree(mr);
- dev->fill_delay = 1;
+ create_mkey_warn(dev, status, mkey_out->out);
+ kfree(mkey_out);
+ spin_lock_irqsave(&ent->mkeys_queue.lock, flags);
+ ent->pending--;
+ WRITE_ONCE(dev->fill_delay, 1);
+ spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags);
mod_timer(&dev->delay_timer, jiffies + HZ);
return;
}
- spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
- key = dev->mdev->priv.mkey_key++;
- spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
- mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
+ mkey_out->mkey |= mlx5_idx_to_mkey(
+ MLX5_GET(create_mkey_out, mkey_out->out, mkey_index));
+ WRITE_ONCE(dev->cache.last_add, jiffies);
- cache->last_add = jiffies;
+ spin_lock_irqsave(&ent->mkeys_queue.lock, flags);
+ push_mkey_locked(ent, mkey_out->mkey);
+ ent->pending--;
+ /* If we are doing fill_to_high_water then keep going. */
+ queue_adjust_cache_locked(ent);
+ spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags);
+ kfree(mkey_out);
+}
- spin_lock_irqsave(&ent->lock, flags);
- list_add_tail(&mr->list, &ent->head);
- ent->cur++;
- ent->size++;
- spin_unlock_irqrestore(&ent->lock, flags);
+static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs)
+{
+ int ret = 0;
- write_lock_irqsave(&table->lock, flags);
- err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
- &mr->mmkey);
- if (err)
- pr_err("Error inserting to mkey tree. 0x%x\n", -err);
- write_unlock_irqrestore(&table->lock, flags);
+ switch (access_mode) {
+ case MLX5_MKC_ACCESS_MODE_MTT:
+ ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD /
+ sizeof(struct mlx5_mtt));
+ break;
+ case MLX5_MKC_ACCESS_MODE_KSM:
+ ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD /
+ sizeof(struct mlx5_klm));
+ break;
+ default:
+ WARN_ON(1);
+ }
+ return ret;
}
-static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
+static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
{
- struct mlx5_mr_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent = &cache->ent[c];
- int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- struct mlx5_ib_mr *mr;
- int npages = 1 << ent->order;
+ set_mkc_access_pd_addr_fields(mkc, ent->rb_key.access_flags, 0,
+ ent->dev->umrc.pd);
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2,
+ (ent->rb_key.access_mode >> 2) & 0x7);
+ MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats);
+
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ get_mkc_octo_size(ent->rb_key.access_mode,
+ ent->rb_key.ndescs));
+ MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
+
+ if (ent->rb_key.ph != MLX5_IB_NO_PH) {
+ MLX5_SET(mkc, mkc, pcie_tph_en, 1);
+ MLX5_SET(mkc, mkc, pcie_tph_ph, ent->rb_key.ph);
+ if (ent->rb_key.st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX)
+ MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index,
+ ent->rb_key.st_index);
+ }
+}
+
+/* Asynchronously schedule new MRs to be populated in the cache. */
+static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
+{
+ struct mlx5r_async_create_mkey *async_create;
void *mkc;
- u32 *in;
int err = 0;
int i;
- in = kzalloc(inlen, GFP_KERNEL);
- if (!in)
- return -ENOMEM;
-
- mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
for (i = 0; i < num; i++) {
+ async_create = kzalloc(sizeof(struct mlx5r_async_create_mkey),
+ GFP_KERNEL);
+ if (!async_create)
+ return -ENOMEM;
+ mkc = MLX5_ADDR_OF(create_mkey_in, async_create->in,
+ memory_key_mkey_entry);
+ set_cache_mkc(ent, mkc);
+ async_create->ent = ent;
+
+ spin_lock_irq(&ent->mkeys_queue.lock);
if (ent->pending >= MAX_PENDING_REG_MR) {
err = -EAGAIN;
- break;
+ goto free_async_create;
}
+ ent->pending++;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
- mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr) {
- err = -ENOMEM;
- break;
+ err = mlx5_ib_create_mkey_cb(async_create);
+ if (err) {
+ mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
+ goto err_create_mkey;
}
- mr->order = ent->order;
- mr->umred = 1;
- mr->dev = dev;
+ }
- MLX5_SET(mkc, mkc, free, 1);
- MLX5_SET(mkc, mkc, umr_en, 1);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+ return 0;
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
- MLX5_SET(mkc, mkc, log_page_size, 12);
+err_create_mkey:
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ent->pending--;
+free_async_create:
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ kfree(async_create);
+ return err;
+}
- spin_lock_irq(&ent->lock);
- ent->pending++;
- spin_unlock_irq(&ent->lock);
- err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
- in, inlen,
- mr->out, sizeof(mr->out),
- reg_mr_callback, mr);
- if (err) {
- spin_lock_irq(&ent->lock);
- ent->pending--;
- spin_unlock_irq(&ent->lock);
- mlx5_ib_warn(dev, "create mkey failed %d\n", err);
- kfree(mr);
- break;
- }
- }
+/* Synchronously create a MR in the cache */
+static int create_cache_mkey(struct mlx5_cache_ent *ent, u32 *mkey)
+{
+ size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ void *mkc;
+ u32 *in;
+ int err;
+
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ set_cache_mkc(ent, mkc);
+ err = mlx5_core_create_mkey(ent->dev->mdev, mkey, in, inlen);
+ if (err)
+ goto free_in;
+
+ WRITE_ONCE(ent->dev->cache.last_add, jiffies);
+free_in:
kfree(in);
return err;
}
-static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
+static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
+{
+ u32 mkey;
+
+ lockdep_assert_held(&ent->mkeys_queue.lock);
+ if (!ent->mkeys_queue.ci)
+ return;
+ mkey = pop_mkey_locked(ent);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ mlx5_core_destroy_mkey(ent->dev->mdev, mkey);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+}
+
+static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
+ bool limit_fill)
+ __acquires(&ent->mkeys_queue.lock) __releases(&ent->mkeys_queue.lock)
{
- struct mlx5_mr_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent = &cache->ent[c];
- struct mlx5_ib_mr *mr;
int err;
- int i;
- for (i = 0; i < num; i++) {
- spin_lock_irq(&ent->lock);
- if (list_empty(&ent->head)) {
- spin_unlock_irq(&ent->lock);
- return;
+ lockdep_assert_held(&ent->mkeys_queue.lock);
+
+ while (true) {
+ if (limit_fill)
+ target = ent->limit * 2;
+ if (target == ent->pending + ent->mkeys_queue.ci)
+ return 0;
+ if (target > ent->pending + ent->mkeys_queue.ci) {
+ u32 todo = target - (ent->pending + ent->mkeys_queue.ci);
+
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ err = add_keys(ent, todo);
+ if (err == -EAGAIN)
+ usleep_range(3000, 5000);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ if (err) {
+ if (err != -EAGAIN)
+ return err;
+ } else
+ return 0;
+ } else {
+ remove_cache_mr_locked(ent);
}
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_del(&mr->list);
- ent->cur--;
- ent->size--;
- spin_unlock_irq(&ent->lock);
- err = destroy_mkey(dev, mr);
- if (err)
- mlx5_ib_warn(dev, "failed destroy mkey\n");
- else
- kfree(mr);
}
}
@@ -247,37 +384,38 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct mlx5_cache_ent *ent = filp->private_data;
- struct mlx5_ib_dev *dev = ent->dev;
- char lbuf[20];
- u32 var;
+ u32 target;
int err;
- int c;
-
- if (copy_from_user(lbuf, buf, sizeof(lbuf)))
- return -EFAULT;
-
- c = order2idx(dev, ent->order);
- lbuf[sizeof(lbuf) - 1] = 0;
- if (sscanf(lbuf, "%u", &var) != 1)
- return -EINVAL;
-
- if (var < ent->limit)
- return -EINVAL;
-
- if (var > ent->size) {
- do {
- err = add_keys(dev, c, var - ent->size);
- if (err && err != -EAGAIN)
- return err;
+ err = kstrtou32_from_user(buf, count, 0, &target);
+ if (err)
+ return err;
- usleep_range(3000, 5000);
- } while (err);
- } else if (var < ent->size) {
- remove_keys(dev, c, ent->size - var);
+ /*
+ * Target is the new value of total_mrs the user requests, however we
+ * cannot free MRs that are in use. Compute the target value for stored
+ * mkeys.
+ */
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ if (target < ent->in_use) {
+ err = -EINVAL;
+ goto err_unlock;
}
+ target = target - ent->in_use;
+ if (target < ent->limit || target > ent->limit*2) {
+ err = -EINVAL;
+ goto err_unlock;
+ }
+ err = resize_available_mrs(ent, target, false);
+ if (err)
+ goto err_unlock;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
return count;
+
+err_unlock:
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ return err;
}
static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
@@ -287,19 +425,12 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
char lbuf[20];
int err;
- if (*pos)
- return 0;
-
- err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
+ err = snprintf(lbuf, sizeof(lbuf), "%ld\n",
+ ent->mkeys_queue.ci + ent->in_use);
if (err < 0)
return err;
- if (copy_to_user(buf, lbuf, err))
- return -EFAULT;
-
- *pos += err;
-
- return err;
+ return simple_read_from_buffer(buf, count, pos, lbuf, err);
}
static const struct file_operations size_fops = {
@@ -313,32 +444,23 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct mlx5_cache_ent *ent = filp->private_data;
- struct mlx5_ib_dev *dev = ent->dev;
- char lbuf[20];
u32 var;
int err;
- int c;
- if (copy_from_user(lbuf, buf, sizeof(lbuf)))
- return -EFAULT;
-
- c = order2idx(dev, ent->order);
- lbuf[sizeof(lbuf) - 1] = 0;
-
- if (sscanf(lbuf, "%u", &var) != 1)
- return -EINVAL;
-
- if (var > ent->size)
- return -EINVAL;
+ err = kstrtou32_from_user(buf, count, 0, &var);
+ if (err)
+ return err;
+ /*
+ * Upon set we immediately fill the cache to high water mark implied by
+ * the limit.
+ */
+ spin_lock_irq(&ent->mkeys_queue.lock);
ent->limit = var;
-
- if (ent->cur < ent->limit) {
- err = add_keys(dev, c, 2 * ent->limit - ent->cur);
- if (err)
- return err;
- }
-
+ err = resize_available_mrs(ent, 0, true);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ if (err)
+ return err;
return count;
}
@@ -349,19 +471,11 @@ static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
char lbuf[20];
int err;
- if (*pos)
- return 0;
-
err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
if (err < 0)
return err;
- if (copy_to_user(buf, lbuf, err))
- return -EFAULT;
-
- *pos += err;
-
- return err;
+ return simple_read_from_buffer(buf, count, pos, lbuf, err);
}
static const struct file_operations limit_fops = {
@@ -371,68 +485,140 @@ static const struct file_operations limit_fops = {
.read = limit_read,
};
-static int someone_adding(struct mlx5_mr_cache *cache)
+static bool someone_adding(struct mlx5_mkey_cache *cache)
{
- int i;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *node;
+ bool ret;
+
+ mutex_lock(&cache->rb_lock);
+ for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ret = ent->mkeys_queue.ci < ent->limit;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ if (ret) {
+ mutex_unlock(&cache->rb_lock);
+ return true;
+ }
+ }
+ mutex_unlock(&cache->rb_lock);
+ return false;
+}
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
- if (cache->ent[i].cur < cache->ent[i].limit)
- return 1;
+/*
+ * Check if the bucket is outside the high/low water mark and schedule an async
+ * update. The cache refill has hysteresis, once the low water mark is hit it is
+ * refilled up to the high mark.
+ */
+static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
+{
+ lockdep_assert_held(&ent->mkeys_queue.lock);
+
+ if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp)
+ return;
+ if (ent->mkeys_queue.ci < ent->limit) {
+ ent->fill_to_high_water = true;
+ mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
+ } else if (ent->fill_to_high_water &&
+ ent->mkeys_queue.ci + ent->pending < 2 * ent->limit) {
+ /*
+ * Once we start populating due to hitting a low water mark
+ * continue until we pass the high water mark.
+ */
+ mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
+ } else if (ent->mkeys_queue.ci == 2 * ent->limit) {
+ ent->fill_to_high_water = false;
+ } else if (ent->mkeys_queue.ci > 2 * ent->limit) {
+ /* Queue deletion of excess entries */
+ ent->fill_to_high_water = false;
+ if (ent->pending)
+ queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
+ secs_to_jiffies(1));
+ else
+ mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
}
+}
- return 0;
+static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
+{
+ u32 mkey;
+
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ while (ent->mkeys_queue.ci) {
+ mkey = pop_mkey_locked(ent);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ mlx5_core_destroy_mkey(dev->mdev, mkey);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ }
+ ent->tmp_cleanup_scheduled = false;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
}
static void __cache_work_func(struct mlx5_cache_ent *ent)
{
struct mlx5_ib_dev *dev = ent->dev;
- struct mlx5_mr_cache *cache = &dev->cache;
- int i = order2idx(dev, ent->order);
+ struct mlx5_mkey_cache *cache = &dev->cache;
int err;
- if (cache->stopped)
- return;
-
- ent = &dev->cache.ent[i];
- if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
- err = add_keys(dev, i, 1);
- if (ent->cur < 2 * ent->limit) {
- if (err == -EAGAIN) {
- mlx5_ib_dbg(dev, "returned eagain, order %d\n",
- i + 2);
- queue_delayed_work(cache->wq, &ent->dwork,
- msecs_to_jiffies(3));
- } else if (err) {
- mlx5_ib_warn(dev, "command failed order %d, err %d\n",
- i + 2, err);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ if (ent->disabled)
+ goto out;
+
+ if (ent->fill_to_high_water &&
+ ent->mkeys_queue.ci + ent->pending < 2 * ent->limit &&
+ !READ_ONCE(dev->fill_delay)) {
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ err = add_keys(ent, 1);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ if (ent->disabled)
+ goto out;
+ if (err) {
+ /*
+ * EAGAIN only happens if there are pending MRs, so we
+ * will be rescheduled when storing them. The only
+ * failure path here is ENOMEM.
+ */
+ if (err != -EAGAIN) {
+ mlx5_ib_warn(
+ dev,
+ "add keys command failed, err %d\n",
+ err);
queue_delayed_work(cache->wq, &ent->dwork,
- msecs_to_jiffies(1000));
- } else {
- queue_work(cache->wq, &ent->work);
+ secs_to_jiffies(1));
}
}
- } else if (ent->cur > 2 * ent->limit) {
+ } else if (ent->mkeys_queue.ci > 2 * ent->limit) {
+ bool need_delay;
+
/*
- * The remove_keys() logic is performed as garbage collection
- * task. Such task is intended to be run when no other active
- * processes are running.
+ * The remove_cache_mr() logic is performed as garbage
+ * collection task. Such task is intended to be run when no
+ * other active processes are running.
*
* The need_resched() will return TRUE if there are user tasks
* to be activated in near future.
*
- * In such case, we don't execute remove_keys() and postpone
- * the garbage collection work to try to run in next cycle,
- * in order to free CPU resources to other tasks.
+ * In such case, we don't execute remove_cache_mr() and postpone
+ * the garbage collection work to try to run in next cycle, in
+ * order to free CPU resources to other tasks.
*/
- if (!need_resched() && !someone_adding(cache) &&
- time_after(jiffies, cache->last_add + 300 * HZ)) {
- remove_keys(dev, i, 1);
- if (ent->cur > ent->limit)
- queue_work(cache->wq, &ent->work);
- } else {
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ need_delay = need_resched() || someone_adding(cache) ||
+ !time_after(jiffies,
+ READ_ONCE(cache->last_add) + 300 * HZ);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ if (ent->disabled)
+ goto out;
+ if (need_delay) {
queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
+ goto out;
}
+ remove_cache_mr_locked(ent);
+ queue_adjust_cache_locked(ent);
}
+out:
+ spin_unlock_irq(&ent->mkeys_queue.lock);
}
static void delayed_cache_work_func(struct work_struct *work)
@@ -440,264 +626,434 @@ static void delayed_cache_work_func(struct work_struct *work)
struct mlx5_cache_ent *ent;
ent = container_of(work, struct mlx5_cache_ent, dwork.work);
- __cache_work_func(ent);
+ /* temp entries are never filled, only cleaned */
+ if (ent->is_tmp)
+ clean_keys(ent->dev, ent);
+ else
+ __cache_work_func(ent);
}
-static void cache_work_func(struct work_struct *work)
+static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1,
+ struct mlx5r_cache_rb_key key2)
{
- struct mlx5_cache_ent *ent;
+ int res;
- ent = container_of(work, struct mlx5_cache_ent, work);
- __cache_work_func(ent);
-}
+ res = key1.ats - key2.ats;
+ if (res)
+ return res;
-static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
-{
- struct mlx5_mr_cache *cache = &dev->cache;
- struct mlx5_ib_mr *mr = NULL;
- struct mlx5_cache_ent *ent;
- int c;
- int i;
+ res = key1.access_mode - key2.access_mode;
+ if (res)
+ return res;
- c = order2idx(dev, order);
- if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
- mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
- return NULL;
- }
+ res = key1.access_flags - key2.access_flags;
+ if (res)
+ return res;
- for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
- ent = &cache->ent[i];
+ res = key1.st_index - key2.st_index;
+ if (res)
+ return res;
- mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
+ res = key1.ph - key2.ph;
+ if (res)
+ return res;
- spin_lock_irq(&ent->lock);
- if (!list_empty(&ent->head)) {
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
- list);
- list_del(&mr->list);
- ent->cur--;
- spin_unlock_irq(&ent->lock);
- if (ent->cur < ent->limit)
- queue_work(cache->wq, &ent->work);
- break;
- }
- spin_unlock_irq(&ent->lock);
+ /*
+ * keep ndescs the last in the compare table since the find function
+ * searches for an exact match on all properties and only closest
+ * match in size.
+ */
+ return key1.ndescs - key2.ndescs;
+}
- queue_work(cache->wq, &ent->work);
+static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
+ struct mlx5_cache_ent *ent)
+{
+ struct rb_node **new = &cache->rb_root.rb_node, *parent = NULL;
+ struct mlx5_cache_ent *cur;
+ int cmp;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ cur = rb_entry(*new, struct mlx5_cache_ent, node);
+ parent = *new;
+ cmp = cache_ent_key_cmp(cur->rb_key, ent->rb_key);
+ if (cmp > 0)
+ new = &((*new)->rb_left);
+ if (cmp < 0)
+ new = &((*new)->rb_right);
+ if (cmp == 0)
+ return -EEXIST;
}
- if (!mr)
- cache->ent[c].miss++;
+ /* Add new node and rebalance tree. */
+ rb_link_node(&ent->node, parent, new);
+ rb_insert_color(&ent->node, &cache->rb_root);
- return mr;
+ return 0;
}
-static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+static struct mlx5_cache_ent *
+mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev,
+ struct mlx5r_cache_rb_key rb_key)
{
- struct mlx5_mr_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent;
- int shrink = 0;
- int c;
+ struct rb_node *node = dev->cache.rb_root.rb_node;
+ struct mlx5_cache_ent *cur, *smallest = NULL;
+ u64 ndescs_limit;
+ int cmp;
- c = order2idx(dev, mr->order);
- if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
- mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
- return;
+ /*
+ * Find the smallest ent with order >= requested_order.
+ */
+ while (node) {
+ cur = rb_entry(node, struct mlx5_cache_ent, node);
+ cmp = cache_ent_key_cmp(cur->rb_key, rb_key);
+ if (cmp > 0) {
+ smallest = cur;
+ node = node->rb_left;
+ }
+ if (cmp < 0)
+ node = node->rb_right;
+ if (cmp == 0)
+ return cur;
}
- ent = &cache->ent[c];
- spin_lock_irq(&ent->lock);
- list_add_tail(&mr->list, &ent->head);
- ent->cur++;
- if (ent->cur > 2 * ent->limit)
- shrink = 1;
- spin_unlock_irq(&ent->lock);
- if (shrink)
- queue_work(cache->wq, &ent->work);
+ /*
+ * Limit the usage of mkeys larger than twice the required size while
+ * also allowing the usage of smallest cache entry for small MRs.
+ */
+ ndescs_limit = max_t(u64, rb_key.ndescs * 2,
+ MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS);
+
+ return (smallest &&
+ smallest->rb_key.access_mode == rb_key.access_mode &&
+ smallest->rb_key.access_flags == rb_key.access_flags &&
+ smallest->rb_key.ats == rb_key.ats &&
+ smallest->rb_key.st_index == rb_key.st_index &&
+ smallest->rb_key.ph == rb_key.ph &&
+ smallest->rb_key.ndescs <= ndescs_limit) ?
+ smallest :
+ NULL;
}
-static void clean_keys(struct mlx5_ib_dev *dev, int c)
+static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
+ struct mlx5_cache_ent *ent)
{
- struct mlx5_mr_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent = &cache->ent[c];
struct mlx5_ib_mr *mr;
int err;
- cancel_delayed_work(&ent->dwork);
- while (1) {
- spin_lock_irq(&ent->lock);
- if (list_empty(&ent->head)) {
- spin_unlock_irq(&ent->lock);
- return;
- }
- mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
- list_del(&mr->list);
- ent->cur--;
- ent->size--;
- spin_unlock_irq(&ent->lock);
- err = destroy_mkey(dev, mr);
- if (err)
- mlx5_ib_warn(dev, "failed destroy mkey\n");
- else
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ent->in_use++;
+
+ if (!ent->mkeys_queue.ci) {
+ queue_adjust_cache_locked(ent);
+ ent->miss++;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ err = create_cache_mkey(ent, &mr->mmkey.key);
+ if (err) {
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ent->in_use--;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
kfree(mr);
+ return ERR_PTR(err);
+ }
+ } else {
+ mr->mmkey.key = pop_mkey_locked(ent);
+ queue_adjust_cache_locked(ent);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
}
+ mr->mmkey.cache_ent = ent;
+ mr->mmkey.type = MLX5_MKEY_MR;
+ mr->mmkey.rb_key = ent->rb_key;
+ mr->mmkey.cacheable = true;
+ init_waitqueue_head(&mr->mmkey.wait);
+ return mr;
}
-static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
+static int get_unchangeable_access_flags(struct mlx5_ib_dev *dev,
+ int access_flags)
{
- struct mlx5_mr_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent;
- int i;
+ int ret = 0;
- if (!mlx5_debugfs_root)
- return 0;
+ if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+ MLX5_CAP_GEN(dev->mdev, atomic) &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+ ret |= IB_ACCESS_REMOTE_ATOMIC;
- cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
- if (!cache->root)
- return -ENOMEM;
+ if ((access_flags & IB_ACCESS_RELAXED_ORDERING) &&
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ ret |= IB_ACCESS_RELAXED_ORDERING;
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
- ent = &cache->ent[i];
- sprintf(ent->name, "%d", ent->order);
- ent->dir = debugfs_create_dir(ent->name, cache->root);
- if (!ent->dir)
- return -ENOMEM;
+ if ((access_flags & IB_ACCESS_RELAXED_ORDERING) &&
+ (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) ||
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled)) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ ret |= IB_ACCESS_RELAXED_ORDERING;
- ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
- &size_fops);
- if (!ent->fsize)
- return -ENOMEM;
+ return ret;
+}
- ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
- &limit_fops);
- if (!ent->flimit)
- return -ENOMEM;
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
+ int access_flags, int access_mode,
+ int ndescs)
+{
+ struct mlx5r_cache_rb_key rb_key = {
+ .ndescs = ndescs,
+ .access_mode = access_mode,
+ .access_flags = get_unchangeable_access_flags(dev, access_flags),
+ .ph = MLX5_IB_NO_PH,
+ };
+ struct mlx5_cache_ent *ent = mkey_cache_ent_from_rb_key(dev, rb_key);
+
+ if (!ent)
+ return ERR_PTR(-EOPNOTSUPP);
- ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
- &ent->cur);
- if (!ent->fcur)
- return -ENOMEM;
+ return _mlx5_mr_cache_alloc(dev, ent);
+}
- ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
- &ent->miss);
- if (!ent->fmiss)
- return -ENOMEM;
- }
+static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!mlx5_debugfs_root || dev->is_rep)
+ return;
- return 0;
+ debugfs_remove_recursive(dev->cache.fs_root);
+ dev->cache.fs_root = NULL;
}
-static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev,
+ struct mlx5_cache_ent *ent)
{
- if (!mlx5_debugfs_root)
+ int order = order_base_2(ent->rb_key.ndescs);
+ struct dentry *dir;
+
+ if (!mlx5_debugfs_root || dev->is_rep)
return;
- debugfs_remove_recursive(dev->cache.root);
+ if (ent->rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+ order = MLX5_IMR_KSM_CACHE_ENTRY + 2;
+
+ sprintf(ent->name, "%d", order);
+ dir = debugfs_create_dir(ent->name, dev->cache.fs_root);
+ debugfs_create_file("size", 0600, dir, ent, &size_fops);
+ debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
+ debugfs_create_ulong("cur", 0400, dir, &ent->mkeys_queue.ci);
+ debugfs_create_u32("miss", 0600, dir, &ent->miss);
}
-static void delay_time_func(unsigned long ctx)
+static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
{
- struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
+ struct dentry *dbg_root = mlx5_debugfs_get_dev_root(dev->mdev);
+ struct mlx5_mkey_cache *cache = &dev->cache;
+
+ if (!mlx5_debugfs_root || dev->is_rep)
+ return;
- dev->fill_delay = 0;
+ cache->fs_root = debugfs_create_dir("mr_cache", dbg_root);
}
-int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
+static void delay_time_func(struct timer_list *t)
{
- struct mlx5_mr_cache *cache = &dev->cache;
- struct mlx5_cache_ent *ent;
- int limit;
- int err;
- int i;
+ struct mlx5_ib_dev *dev = timer_container_of(dev, t, delay_timer);
- mutex_init(&dev->slow_path_mutex);
- cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
- if (!cache->wq) {
- mlx5_ib_warn(dev, "failed to create work queue\n");
+ WRITE_ONCE(dev->fill_delay, 0);
+}
+
+static int mlx5r_mkeys_init(struct mlx5_cache_ent *ent)
+{
+ struct mlx5_mkeys_page *page;
+
+ page = kzalloc(sizeof(*page), GFP_KERNEL);
+ if (!page)
return -ENOMEM;
- }
+ INIT_LIST_HEAD(&ent->mkeys_queue.pages_list);
+ spin_lock_init(&ent->mkeys_queue.lock);
+ list_add_tail(&page->list, &ent->mkeys_queue.pages_list);
+ ent->mkeys_queue.num_pages++;
+ return 0;
+}
+
+static void mlx5r_mkeys_uninit(struct mlx5_cache_ent *ent)
+{
+ struct mlx5_mkeys_page *page;
+
+ WARN_ON(ent->mkeys_queue.ci || ent->mkeys_queue.num_pages > 1);
+ page = list_last_entry(&ent->mkeys_queue.pages_list,
+ struct mlx5_mkeys_page, list);
+ list_del(&page->list);
+ kfree(page);
+}
- setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
- INIT_LIST_HEAD(&cache->ent[i].head);
- spin_lock_init(&cache->ent[i].lock);
+struct mlx5_cache_ent *
+mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
+ struct mlx5r_cache_rb_key rb_key,
+ bool persistent_entry)
+{
+ struct mlx5_cache_ent *ent;
+ int order;
+ int ret;
- ent = &cache->ent[i];
- INIT_LIST_HEAD(&ent->head);
- spin_lock_init(&ent->lock);
- ent->order = i + 2;
- ent->dev = dev;
+ ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+ if (!ent)
+ return ERR_PTR(-ENOMEM);
- if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
- (mlx5_core_is_pf(dev->mdev)))
- limit = dev->mdev->profile->mr_cache[i].limit;
+ ret = mlx5r_mkeys_init(ent);
+ if (ret)
+ goto mkeys_err;
+ ent->rb_key = rb_key;
+ ent->dev = dev;
+ ent->is_tmp = !persistent_entry;
+
+ INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
+
+ ret = mlx5_cache_ent_insert(&dev->cache, ent);
+ if (ret)
+ goto ent_insert_err;
+
+ if (persistent_entry) {
+ if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+ order = MLX5_IMR_KSM_CACHE_ENTRY;
+ else
+ order = order_base_2(rb_key.ndescs) - 2;
+
+ if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
+ !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
+ mlx5r_umr_can_load_pas(dev, 0))
+ ent->limit = dev->mdev->profile.mr_cache[order].limit;
else
- limit = 0;
+ ent->limit = 0;
- INIT_WORK(&ent->work, cache_work_func);
- INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
- ent->limit = limit;
- queue_work(cache->wq, &ent->work);
+ mlx5_mkey_cache_debugfs_add_ent(dev, ent);
}
- err = mlx5_mr_cache_debugfs_init(dev);
- if (err)
- mlx5_ib_warn(dev, "cache debugfs failure\n");
+ return ent;
+ent_insert_err:
+ mlx5r_mkeys_uninit(ent);
+mkeys_err:
+ kfree(ent);
+ return ERR_PTR(ret);
+}
- return 0;
+static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev)
+{
+ struct rb_root *root = &dev->cache.rb_root;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *node;
+
+ mutex_lock(&dev->cache.rb_lock);
+ node = rb_first(root);
+ while (node) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ node = rb_next(node);
+ clean_keys(dev, ent);
+ rb_erase(&ent->node, root);
+ mlx5r_mkeys_uninit(ent);
+ kfree(ent);
+ }
+ mutex_unlock(&dev->cache.rb_lock);
}
-static void wait_for_async_commands(struct mlx5_ib_dev *dev)
+int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
{
- struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_mkey_cache *cache = &dev->cache;
+ struct rb_root *root = &dev->cache.rb_root;
+ struct mlx5r_cache_rb_key rb_key = {
+ .access_mode = MLX5_MKC_ACCESS_MODE_MTT,
+ .ph = MLX5_IB_NO_PH,
+ };
struct mlx5_cache_ent *ent;
- int total = 0;
+ struct rb_node *node;
+ int ret;
int i;
- int j;
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
- ent = &cache->ent[i];
- for (j = 0 ; j < 1000; j++) {
- if (!ent->pending)
- break;
- msleep(50);
+ mutex_init(&dev->slow_path_mutex);
+ mutex_init(&dev->cache.rb_lock);
+ dev->cache.rb_root = RB_ROOT;
+ cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
+ if (!cache->wq) {
+ mlx5_ib_warn(dev, "failed to create work queue\n");
+ return -ENOMEM;
+ }
+
+ mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
+ timer_setup(&dev->delay_timer, delay_time_func, 0);
+ mlx5_mkey_cache_debugfs_init(dev);
+ mutex_lock(&cache->rb_lock);
+ for (i = 0; i <= mkey_cache_max_order(dev); i++) {
+ rb_key.ndescs = MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS << i;
+ ent = mlx5r_cache_create_ent_locked(dev, rb_key, true);
+ if (IS_ERR(ent)) {
+ ret = PTR_ERR(ent);
+ goto err;
}
}
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
- ent = &cache->ent[i];
- total += ent->pending;
+
+ ret = mlx5_odp_init_mkey_cache(dev);
+ if (ret)
+ goto err;
+
+ mutex_unlock(&cache->rb_lock);
+ for (node = rb_first(root); node; node = rb_next(node)) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ queue_adjust_cache_locked(ent);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
}
- if (total)
- mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total);
- else
- mlx5_ib_warn(dev, "done with all pending requests\n");
+ return 0;
+
+err:
+ mutex_unlock(&cache->rb_lock);
+ mlx5_mkey_cache_debugfs_cleanup(dev);
+ mlx5r_destroy_cache_entries(dev);
+ destroy_workqueue(cache->wq);
+ mlx5_ib_warn(dev, "failed to create mkey cache entry\n");
+ return ret;
}
-int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
+void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
{
- int i;
+ struct rb_root *root = &dev->cache.rb_root;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *node;
- dev->cache.stopped = 1;
+ if (!dev->cache.wq)
+ return;
+
+ mutex_lock(&dev->cache.rb_lock);
+ for (node = rb_first(root); node; node = rb_next(node)) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ent->disabled = true;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ cancel_delayed_work(&ent->dwork);
+ }
+ mutex_unlock(&dev->cache.rb_lock);
+
+ /*
+ * After all entries are disabled and will not reschedule on WQ,
+ * flush it and all async commands.
+ */
flush_workqueue(dev->cache.wq);
- mlx5_mr_cache_debugfs_cleanup(dev);
+ mlx5_mkey_cache_debugfs_cleanup(dev);
+ mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
- for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
- clean_keys(dev, i);
+ /* At this point all entries are disabled and have no concurrent work. */
+ mlx5r_destroy_cache_entries(dev);
destroy_workqueue(dev->cache.wq);
- wait_for_async_commands(dev);
- del_timer_sync(&dev->delay_timer);
-
- return 0;
+ timer_delete_sync(&dev->delay_timer);
}
struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_mr *mr;
void *mkc;
u32 *in;
@@ -715,23 +1071,18 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
- MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
- MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
- MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
- MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
- MLX5_SET(mkc, mkc, lr, 1);
-
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
MLX5_SET(mkc, mkc, length64, 1);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET64(mkc, mkc, start_addr, 0);
+ set_mkc_access_pd_addr_fields(mkc, acc | IB_ACCESS_RELAXED_ORDERING, 0,
+ pd);
+ MLX5_SET(mkc, mkc, ma_translation_mode, MLX5_CAP_GEN(dev->mdev, ats));
- err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
+ err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
if (err)
goto err_in;
kfree(in);
+ mr->mmkey.type = MLX5_MKEY_MR;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
mr->umem = NULL;
@@ -747,700 +1098,877 @@ err_free:
return ERR_PTR(err);
}
-static int get_octo_len(u64 addr, u64 len, int page_size)
+static int get_octo_len(u64 addr, u64 len, int page_shift)
{
+ u64 page_size = 1ULL << page_shift;
u64 offset;
int npages;
offset = addr & (page_size - 1);
- npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
+ npages = ALIGN(len + offset, page_size) >> page_shift;
return (npages + 1) / 2;
}
-static int use_umr(int order)
+static int mkey_cache_max_order(struct mlx5_ib_dev *dev)
{
- return order <= MLX5_MAX_UMR_SHIFT;
+ if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
+ return MKEY_CACHE_LAST_STD_ENTRY;
+ return MLX5_MAX_UMR_SHIFT;
}
-static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int npages, int page_shift, int *size,
- __be64 **mr_pas, dma_addr_t *dma)
+static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+ u64 length, int access_flags, u64 iova)
{
- __be64 *pas;
- struct device *ddev = dev->ib_dev.dma_device;
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+ mr->ibmr.length = length;
+ mr->ibmr.device = &dev->ib_dev;
+ mr->ibmr.iova = iova;
+ mr->access_flags = access_flags;
+}
+static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem,
+ u64 iova)
+{
/*
- * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
- * To avoid copying garbage after the pas array, we allocate
- * a little more.
+ * The alignment of iova has already been checked upon entering
+ * UVERBS_METHOD_REG_DMABUF_MR
*/
- *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
- *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
- if (!(*mr_pas))
- return -ENOMEM;
-
- pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN);
- mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
- /* Clear padding after the actual pages. */
- memset(pas + npages, 0, *size - npages * sizeof(u64));
-
- *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE);
- if (dma_mapping_error(ddev, *dma)) {
- kfree(*mr_pas);
- return -ENOMEM;
- }
-
- return 0;
+ umem->iova = iova;
+ return PAGE_SIZE;
}
-static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr,
- struct ib_sge *sg, u64 dma, int n, u32 key,
- int page_shift)
+static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
+ struct ib_umem *umem, u64 iova,
+ int access_flags, int access_mode,
+ u16 st_index, u8 ph)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- sg->addr = dma;
- sg->length = ALIGN(sizeof(u64) * n, 64);
- sg->lkey = dev->umrc.pd->local_dma_lkey;
+ struct mlx5r_cache_rb_key rb_key = {};
+ struct mlx5_cache_ent *ent;
+ struct mlx5_ib_mr *mr;
+ unsigned long page_size;
- wr->next = NULL;
- wr->sg_list = sg;
- if (n)
- wr->num_sge = 1;
+ if (umem->is_dmabuf)
+ page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
else
- wr->num_sge = 0;
+ page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova,
+ access_mode);
+ if (WARN_ON(!page_size))
+ return ERR_PTR(-EINVAL);
+
+ rb_key.access_mode = access_mode;
+ rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size);
+ rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags);
+ rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags);
+ rb_key.st_index = st_index;
+ rb_key.ph = ph;
+ ent = mkey_cache_ent_from_rb_key(dev, rb_key);
+ /*
+ * If the MR can't come from the cache then synchronously create an uncached
+ * one.
+ */
+ if (!ent) {
+ mutex_lock(&dev->slow_path_mutex);
+ mr = reg_create(pd, umem, iova, access_flags, page_size, false, access_mode,
+ st_index, ph);
+ mutex_unlock(&dev->slow_path_mutex);
+ if (IS_ERR(mr))
+ return mr;
+ mr->mmkey.rb_key = rb_key;
+ mr->mmkey.cacheable = true;
+ return mr;
+ }
- wr->opcode = MLX5_IB_WR_UMR;
+ mr = _mlx5_mr_cache_alloc(dev, ent);
+ if (IS_ERR(mr))
+ return mr;
- umrwr->npages = n;
- umrwr->page_shift = page_shift;
- umrwr->mkey = key;
+ mr->ibmr.pd = pd;
+ mr->umem = umem;
+ mr->page_shift = order_base_2(page_size);
+ set_mr_fields(dev, mr, umem->length, access_flags, iova);
+
+ return mr;
}
-static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
- struct ib_sge *sg, u64 dma, int n, u32 key,
- int page_shift, u64 virt_addr, u64 len,
- int access_flags)
+static struct ib_mr *
+reg_create_crossing_vhca_mr(struct ib_pd *pd, u64 iova, u64 length, int access_flags,
+ u32 crossed_lkey)
{
- struct mlx5_umr_wr *umrwr = umr_wr(wr);
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int access_mode = MLX5_MKC_ACCESS_MODE_CROSSING;
+ struct mlx5_ib_mr *mr;
+ void *mkc;
+ int inlen;
+ u32 *in;
+ int err;
- prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift);
+ if (!MLX5_CAP_GEN(dev->mdev, crossing_vhca_mkey))
+ return ERR_PTR(-EOPNOTSUPP);
- wr->send_flags = 0;
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
- umrwr->target.virt_addr = virt_addr;
- umrwr->length = len;
- umrwr->access_flags = access_flags;
- umrwr->pd = pd;
-}
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_1;
+ }
-static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
- struct ib_send_wr *wr, u32 key)
-{
- struct mlx5_umr_wr *umrwr = umr_wr(wr);
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, crossing_target_vhca_id,
+ MLX5_CAP_GEN(dev->mdev, vhca_id));
+ MLX5_SET(mkc, mkc, translations_octword_size, crossed_lkey);
+ MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
+
+ /* for this crossing mkey IOVA should be 0 and len should be IOVA + len */
+ set_mkc_access_pd_addr_fields(mkc, access_flags, 0, pd);
+ MLX5_SET64(mkc, mkc, len, iova + length);
+
+ MLX5_SET(mkc, mkc, free, 0);
+ MLX5_SET(mkc, mkc, umr_en, 0);
+ err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
+ if (err)
+ goto err_2;
+
+ mr->mmkey.type = MLX5_MKEY_MR;
+ set_mr_fields(dev, mr, length, access_flags, iova);
+ mr->ibmr.pd = pd;
+ kvfree(in);
+ mlx5_ib_dbg(dev, "crossing mkey = 0x%x\n", mr->mmkey.key);
- wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
- wr->opcode = MLX5_IB_WR_UMR;
- umrwr->mkey = key;
+ return &mr->ibmr;
+err_2:
+ kvfree(in);
+err_1:
+ kfree(mr);
+ return ERR_PTR(err);
}
-static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
- int access_flags, struct ib_umem **umem,
- int *npages, int *page_shift, int *ncont,
- int *order)
+/*
+ * If ibmr is NULL it will be allocated by reg_create.
+ * Else, the given ibmr will be used.
+ */
+static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags,
+ unsigned long page_size, bool populate,
+ int access_mode, u16 st_index, u8 ph)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_mr *mr;
+ __be64 *pas;
+ void *mkc;
+ int inlen;
+ u32 *in;
int err;
+ bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)) &&
+ (access_mode == MLX5_MKC_ACCESS_MODE_MTT) &&
+ (ph == MLX5_IB_NO_PH);
+ bool ksm_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM);
- *umem = ib_umem_get(pd->uobject->context, start, length,
- access_flags, 0);
- err = PTR_ERR_OR_ZERO(*umem);
- if (err < 0) {
- mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
- return err;
+ if (!page_size)
+ return ERR_PTR(-EINVAL);
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->ibmr.pd = pd;
+ mr->access_flags = access_flags;
+ mr->page_shift = order_base_2(page_size);
+
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ if (populate)
+ inlen += sizeof(*pas) *
+ roundup(ib_umem_num_dma_blocks(umem, page_size), 2);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_1;
+ }
+ pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+ if (populate) {
+ if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND || ksm_mode)) {
+ err = -EINVAL;
+ goto err_2;
+ }
+ mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas,
+ pg_cap ? MLX5_IB_MTT_PRESENT : 0);
}
- mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
- page_shift, ncont, order);
- if (!*npages) {
- mlx5_ib_warn(dev, "avoid zero region\n");
- ib_umem_release(*umem);
- return -EINVAL;
+ /* The pg_access bit allows setting the access flags
+ * in the page list submitted with the command.
+ */
+ MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ set_mkc_access_pd_addr_fields(mkc, access_flags, iova,
+ populate ? pd : dev->umrc.pd);
+ /* In case a data direct flow, overwrite the pdn field by its internal kernel PD */
+ if (umem->is_dmabuf && ksm_mode)
+ MLX5_SET(mkc, mkc, pd, dev->ddr.pdn);
+
+ MLX5_SET(mkc, mkc, free, !populate);
+ MLX5_SET(mkc, mkc, access_mode_1_0, access_mode);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+
+ MLX5_SET64(mkc, mkc, len, umem->length);
+ MLX5_SET(mkc, mkc, bsf_octword_size, 0);
+ if (ksm_mode)
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ get_octo_len(iova, umem->length, mr->page_shift) * 2);
+ else
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ get_octo_len(iova, umem->length, mr->page_shift));
+ MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
+ if (mlx5_umem_needs_ats(dev, umem, access_flags))
+ MLX5_SET(mkc, mkc, ma_translation_mode, 1);
+ if (populate) {
+ MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
+ get_octo_len(iova, umem->length, mr->page_shift));
}
- mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
- *npages, *ncont, *order, *page_shift);
+ if (ph != MLX5_IB_NO_PH) {
+ MLX5_SET(mkc, mkc, pcie_tph_en, 1);
+ MLX5_SET(mkc, mkc, pcie_tph_ph, ph);
+ if (st_index != MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX)
+ MLX5_SET(mkc, mkc, pcie_tph_steering_tag_index, st_index);
+ }
- return 0;
-}
+ err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
+ if (err) {
+ mlx5_ib_warn(dev, "create mkey failed\n");
+ goto err_2;
+ }
+ mr->mmkey.type = MLX5_MKEY_MR;
+ mr->mmkey.ndescs = get_octo_len(iova, umem->length, mr->page_shift);
+ mr->umem = umem;
+ set_mr_fields(dev, mr, umem->length, access_flags, iova);
+ kvfree(in);
-static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
-{
- struct mlx5_ib_umr_context *context =
- container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
+ mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
- context->status = wc->status;
- complete(&context->done);
-}
+ return mr;
-static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
-{
- context->cqe.done = mlx5_ib_umr_done;
- context->status = -1;
- init_completion(&context->done);
+err_2:
+ kvfree(in);
+err_1:
+ kfree(mr);
+ return ERR_PTR(err);
}
-static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
- u64 virt_addr, u64 len, int npages,
- int page_shift, int order, int access_flags)
+static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
+ u64 length, int acc, int mode)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct device *ddev = dev->ib_dev.dma_device;
- struct umr_common *umrc = &dev->umrc;
- struct mlx5_ib_umr_context umr_context;
- struct mlx5_umr_wr umrwr = {};
- struct ib_send_wr *bad;
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
- struct ib_sge sg;
- int size;
- __be64 *mr_pas;
- dma_addr_t dma;
- int err = 0;
- int i;
+ void *mkc;
+ u32 *in;
+ int err;
- for (i = 0; i < 1; i++) {
- mr = alloc_cached_mr(dev, order);
- if (mr)
- break;
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
- err = add_keys(dev, order2idx(dev, order), 1);
- if (err && err != -EAGAIN) {
- mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
- break;
- }
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_free;
}
- if (!mr)
- return ERR_PTR(-EAGAIN);
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas,
- &dma);
- if (err)
- goto free_mr;
+ MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
+ MLX5_SET64(mkc, mkc, len, length);
+ set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd);
- mlx5_ib_init_umr_context(&umr_context);
+ err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
+ if (err)
+ goto err_in;
- umrwr.wr.wr_cqe = &umr_context.cqe;
- prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
- page_shift, virt_addr, len, access_flags);
+ kfree(in);
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
- if (err) {
- mlx5_ib_warn(dev, "post send failed, err %d\n", err);
- goto unmap_dma;
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed\n");
- err = -EFAULT;
- }
- }
+ set_mr_fields(dev, mr, length, acc, start_addr);
- mr->mmkey.iova = virt_addr;
- mr->mmkey.size = len;
- mr->mmkey.pd = to_mpd(pd)->pdn;
+ return &mr->ibmr;
- mr->live = 1;
+err_in:
+ kfree(in);
-unmap_dma:
- up(&umrc->sem);
- dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
+err_free:
+ kfree(mr);
- kfree(mr_pas);
+ return ERR_PTR(err);
+}
-free_mr:
- if (err) {
- free_cached_mr(dev, mr);
- return ERR_PTR(err);
- }
+int mlx5_ib_advise_mr(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 flags,
+ struct ib_sge *sg_list,
+ u32 num_sge,
+ struct uverbs_attr_bundle *attrs)
+{
+ if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
+ advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
+ advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
+ return -EOPNOTSUPP;
- return mr;
+ return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
+ sg_list, num_sge);
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
- int zap)
+struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
+ struct ib_dm_mr_attr *attr,
+ struct uverbs_attr_bundle *attrs)
{
- struct mlx5_ib_dev *dev = mr->dev;
- struct device *ddev = dev->ib_dev.dma_device;
- struct umr_common *umrc = &dev->umrc;
- struct mlx5_ib_umr_context umr_context;
- struct ib_umem *umem = mr->umem;
- int size;
- __be64 *pas;
- dma_addr_t dma;
- struct ib_send_wr *bad;
- struct mlx5_umr_wr wr;
- struct ib_sge sg;
- int err = 0;
- const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
- const int page_index_mask = page_index_alignment - 1;
- size_t pages_mapped = 0;
- size_t pages_to_map = 0;
- size_t pages_iter = 0;
- int use_emergency_buf = 0;
-
- /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
- * so we need to align the offset and length accordingly */
- if (start_page_index & page_index_mask) {
- npages += start_page_index & page_index_mask;
- start_page_index &= ~page_index_mask;
- }
-
- pages_to_map = ALIGN(npages, page_index_alignment);
-
- if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
- return -EINVAL;
-
- size = sizeof(u64) * pages_to_map;
- size = min_t(int, PAGE_SIZE, size);
- /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
- * code, when we are called from an invalidation. The pas buffer must
- * be 2k-aligned for Connect-IB. */
- pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
- if (!pas) {
- mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
- pas = mlx5_ib_update_mtt_emergency_buffer;
- size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
- use_emergency_buf = 1;
- mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
- memset(pas, 0, size);
- }
- pages_iter = size / sizeof(u64);
- dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
- if (dma_mapping_error(ddev, dma)) {
- mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
- err = -ENOMEM;
- goto free_pas;
+ struct mlx5_ib_dm *mdm = to_mdm(dm);
+ struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev;
+ u64 start_addr = mdm->dev_addr + attr->offset;
+ int mode;
+
+ switch (mdm->type) {
+ case MLX5_IB_UAPI_DM_TYPE_MEMIC:
+ if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS)
+ return ERR_PTR(-EINVAL);
+
+ mode = MLX5_MKC_ACCESS_MODE_MEMIC;
+ start_addr -= pci_resource_start(dev->pdev, 0);
+ break;
+ case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM:
+ if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
+ return ERR_PTR(-EINVAL);
+
+ mode = MLX5_MKC_ACCESS_MODE_SW_ICM;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
}
- for (pages_mapped = 0;
- pages_mapped < pages_to_map && !err;
- pages_mapped += pages_iter, start_page_index += pages_iter) {
- dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
+ return mlx5_ib_get_dm_mr(pd, start_addr, attr->length,
+ attr->access_flags, mode);
+}
- npages = min_t(size_t,
- pages_iter,
- ib_umem_num_pages(umem) - start_page_index);
+static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
+ u64 iova, int access_flags,
+ struct ib_dmah *dmah)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_mr *mr = NULL;
+ bool xlt_with_umr;
+ u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX;
+ u8 ph = MLX5_IB_NO_PH;
+ int err;
- if (!zap) {
- __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
- start_page_index, npages, pas,
- MLX5_IB_MTT_PRESENT);
- /* Clear padding after the pages brought from the
- * umem. */
- memset(pas + npages, 0, size - npages * sizeof(u64));
- }
+ if (dmah) {
+ struct mlx5_ib_dmah *mdmah = to_mdmah(dmah);
- dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
+ ph = dmah->ph;
+ if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS))
+ st_index = mdmah->st_index;
+ }
- mlx5_ib_init_umr_context(&umr_context);
+ xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length);
+ if (xlt_with_umr) {
+ mr = alloc_cacheable_mr(pd, umem, iova, access_flags,
+ MLX5_MKC_ACCESS_MODE_MTT,
+ st_index, ph);
+ } else {
+ unsigned long page_size = mlx5_umem_mkc_find_best_pgsz(
+ dev, umem, iova, MLX5_MKC_ACCESS_MODE_MTT);
- memset(&wr, 0, sizeof(wr));
- wr.wr.wr_cqe = &umr_context.cqe;
+ mutex_lock(&dev->slow_path_mutex);
+ mr = reg_create(pd, umem, iova, access_flags, page_size,
+ true, MLX5_MKC_ACCESS_MODE_MTT,
+ st_index, ph);
+ mutex_unlock(&dev->slow_path_mutex);
+ }
+ if (IS_ERR(mr)) {
+ ib_umem_release(umem);
+ return ERR_CAST(mr);
+ }
- sg.addr = dma;
- sg.length = ALIGN(npages * sizeof(u64),
- MLX5_UMR_MTT_ALIGNMENT);
- sg.lkey = dev->umrc.pd->local_dma_lkey;
+ mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
- wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
- MLX5_IB_SEND_UMR_UPDATE_MTT;
- wr.wr.sg_list = &sg;
- wr.wr.num_sge = 1;
- wr.wr.opcode = MLX5_IB_WR_UMR;
- wr.npages = sg.length / sizeof(u64);
- wr.page_shift = PAGE_SHIFT;
- wr.mkey = mr->mmkey.key;
- wr.target.offset = start_page_index;
+ atomic_add(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &wr.wr, &bad);
+ if (xlt_with_umr) {
+ /*
+ * If the MR was created with reg_create then it will be
+ * configured properly but left disabled. It is safe to go ahead
+ * and configure it again via UMR while enabling it.
+ */
+ err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
if (err) {
- mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_err(dev, "UMR completion failed, code %d\n",
- umr_context.status);
- err = -EFAULT;
- }
+ mlx5_ib_dereg_mr(&mr->ibmr, NULL);
+ return ERR_PTR(err);
}
- up(&umrc->sem);
}
- dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
-
-free_pas:
- if (!use_emergency_buf)
- free_page((unsigned long)pas);
- else
- mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
-
- return err;
+ return &mr->ibmr;
}
-#endif
-/*
- * If ibmr is NULL it will be allocated by reg_create.
- * Else, the given ibmr will be used.
- */
-static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
- u64 virt_addr, u64 length,
- struct ib_umem *umem, int npages,
- int page_shift, int access_flags)
+static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem_odp *odp;
struct mlx5_ib_mr *mr;
- __be64 *pas;
- void *mkc;
- int inlen;
- u32 *in;
int err;
- bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
- mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr)
- return ERR_PTR(-ENOMEM);
+ if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ return ERR_PTR(-EOPNOTSUPP);
- inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
- sizeof(*pas) * ((npages + 1) / 2) * 2;
- in = mlx5_vzalloc(inlen);
- if (!in) {
- err = -ENOMEM;
- goto err_1;
- }
- pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
- mlx5_ib_populate_pas(dev, umem, page_shift, pas,
- pg_cap ? MLX5_IB_MTT_PRESENT : 0);
+ err = mlx5r_odp_create_eq(dev, &dev->odp_pf_eq);
+ if (err)
+ return ERR_PTR(err);
+ if (!start && length == U64_MAX) {
+ if (iova != 0)
+ return ERR_PTR(-EINVAL);
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return ERR_PTR(-EINVAL);
- /* The pg_access bit allows setting the access flags
- * in the page list submitted with the command. */
- MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
+ mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
+ if (IS_ERR(mr))
+ return ERR_CAST(mr);
+ return &mr->ibmr;
+ }
- mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
- MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
- MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
- MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
- MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
- MLX5_SET(mkc, mkc, lr, 1);
+ /* ODP requires xlt update via umr to work. */
+ if (!mlx5r_umr_can_load_pas(dev, length))
+ return ERR_PTR(-EINVAL);
- MLX5_SET64(mkc, mkc, start_addr, virt_addr);
- MLX5_SET64(mkc, mkc, len, length);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
- MLX5_SET(mkc, mkc, bsf_octword_size, 0);
- MLX5_SET(mkc, mkc, translations_octword_size,
- get_octo_len(virt_addr, length, 1 << page_shift));
- MLX5_SET(mkc, mkc, log_page_size, page_shift);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
- get_octo_len(virt_addr, length, 1 << page_shift));
+ odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
+ &mlx5_mn_ops);
+ if (IS_ERR(odp))
+ return ERR_CAST(odp);
- err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
- if (err) {
- mlx5_ib_warn(dev, "create mkey failed\n");
- goto err_2;
+ mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags,
+ MLX5_MKC_ACCESS_MODE_MTT,
+ MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX,
+ MLX5_IB_NO_PH);
+ if (IS_ERR(mr)) {
+ ib_umem_release(&odp->umem);
+ return ERR_CAST(mr);
}
- mr->umem = umem;
- mr->dev = dev;
- mr->live = 1;
- kvfree(in);
+ xa_init(&mr->implicit_children);
- mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
+ odp->private = mr;
+ err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
+ if (err)
+ goto err_dereg_mr;
- return mr;
+ err = mlx5_ib_init_odp_mr(mr);
+ if (err)
+ goto err_dereg_mr;
+ return &mr->ibmr;
-err_2:
- kvfree(in);
+err_dereg_mr:
+ mlx5_ib_dereg_mr(&mr->ibmr, NULL);
+ return ERR_PTR(err);
+}
-err_1:
- if (!ibmr)
- kfree(mr);
+struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 iova, int access_flags,
+ struct ib_dmah *dmah,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem *umem;
+ int err;
- return ERR_PTR(err);
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
+ ((access_flags & IB_ACCESS_ON_DEMAND) && dmah))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
+ start, iova, length, access_flags);
+
+ err = mlx5r_umr_resource_init(dev);
+ if (err)
+ return ERR_PTR(err);
+
+ if (access_flags & IB_ACCESS_ON_DEMAND)
+ return create_user_odp_mr(pd, start, length, iova, access_flags,
+ udata);
+ umem = ib_umem_get(&dev->ib_dev, start, length, access_flags);
+ if (IS_ERR(umem))
+ return ERR_CAST(umem);
+ return create_real_mr(pd, umem, iova, access_flags, dmah);
}
-static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
- int npages, u64 length, int access_flags)
+static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
{
- mr->npages = npages;
- atomic_add(npages, &dev->mdev->priv.reg_pages);
- mr->ibmr.lkey = mr->mmkey.key;
- mr->ibmr.rkey = mr->mmkey.key;
- mr->ibmr.length = length;
- mr->access_flags = access_flags;
+ struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv;
+ struct mlx5_ib_mr *mr = umem_dmabuf->private;
+
+ dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv);
+
+ if (!umem_dmabuf->sgt || !mr)
+ return;
+
+ mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
+ ib_umem_dmabuf_unmap_pages(umem_dmabuf);
}
-struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt_addr, int access_flags,
- struct ib_udata *udata)
+static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = {
+ .allow_peer2peer = 1,
+ .move_notify = mlx5_ib_dmabuf_invalidate_cb,
+};
+
+static struct ib_mr *
+reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device,
+ u64 offset, u64 length, u64 virt_addr,
+ int fd, int access_flags, int access_mode,
+ struct ib_dmah *dmah)
{
+ bool pinned_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM);
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
- struct ib_umem *umem;
- int page_shift;
- int npages;
- int ncont;
- int order;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ u16 st_index = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX;
+ u8 ph = MLX5_IB_NO_PH;
int err;
- mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
- start, virt_addr, length, access_flags);
- err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
- &page_shift, &ncont, &order);
-
- if (err < 0)
+ err = mlx5r_umr_resource_init(dev);
+ if (err)
return ERR_PTR(err);
- if (use_umr(order)) {
- mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
- order, access_flags);
- if (PTR_ERR(mr) == -EAGAIN) {
- mlx5_ib_dbg(dev, "cache empty for order %d", order);
- mr = NULL;
- }
- } else if (access_flags & IB_ACCESS_ON_DEMAND) {
- err = -EINVAL;
- pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
- goto error;
+ if (!pinned_mode)
+ umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev,
+ offset, length, fd,
+ access_flags,
+ &mlx5_ib_dmabuf_attach_ops);
+ else
+ umem_dmabuf = ib_umem_dmabuf_get_pinned_with_dma_device(&dev->ib_dev,
+ dma_device, offset, length,
+ fd, access_flags);
+
+ if (IS_ERR(umem_dmabuf)) {
+ mlx5_ib_dbg(dev, "umem_dmabuf get failed (%pe)\n", umem_dmabuf);
+ return ERR_CAST(umem_dmabuf);
}
- if (!mr) {
- mutex_lock(&dev->slow_path_mutex);
- mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
- page_shift, access_flags);
- mutex_unlock(&dev->slow_path_mutex);
+ if (dmah) {
+ struct mlx5_ib_dmah *mdmah = to_mdmah(dmah);
+
+ ph = dmah->ph;
+ if (dmah->valid_fields & BIT(IB_DMAH_CPU_ID_EXISTS))
+ st_index = mdmah->st_index;
}
+ mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr,
+ access_flags, access_mode,
+ st_index, ph);
if (IS_ERR(mr)) {
- err = PTR_ERR(mr);
- goto error;
+ ib_umem_release(&umem_dmabuf->umem);
+ return ERR_CAST(mr);
}
mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
- mr->umem = umem;
- set_mr_fileds(dev, mr, npages, length, access_flags);
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- update_odp_mr(mr);
-#endif
+ atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
+ umem_dmabuf->private = mr;
+ if (!pinned_mode) {
+ err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
+ if (err)
+ goto err_dereg_mr;
+ } else {
+ mr->data_direct = true;
+ }
+ err = mlx5_ib_init_dmabuf_mr(mr);
+ if (err)
+ goto err_dereg_mr;
return &mr->ibmr;
-error:
- ib_umem_release(umem);
+err_dereg_mr:
+ __mlx5_ib_dereg_mr(&mr->ibmr);
return ERR_PTR(err);
}
-static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+static struct ib_mr *
+reg_user_mr_dmabuf_by_data_direct(struct ib_pd *pd, u64 offset,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags)
{
- struct mlx5_core_dev *mdev = dev->mdev;
- struct umr_common *umrc = &dev->umrc;
- struct mlx5_ib_umr_context umr_context;
- struct mlx5_umr_wr umrwr = {};
- struct ib_send_wr *bad;
- int err;
-
- if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
- return 0;
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_data_direct_dev *data_direct_dev;
+ struct ib_mr *crossing_mr;
+ struct ib_mr *crossed_mr;
+ int ret = 0;
- mlx5_ib_init_umr_context(&umr_context);
+ /* As of HW behaviour the IOVA must be page aligned in KSM mode */
+ if (!PAGE_ALIGNED(virt_addr) || (access_flags & IB_ACCESS_ON_DEMAND))
+ return ERR_PTR(-EOPNOTSUPP);
- umrwr.wr.wr_cqe = &umr_context.cqe;
- prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key);
+ mutex_lock(&dev->data_direct_lock);
+ data_direct_dev = dev->data_direct_dev;
+ if (!data_direct_dev) {
+ ret = -EINVAL;
+ goto end;
+ }
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
- if (err) {
- up(&umrc->sem);
- mlx5_ib_dbg(dev, "err %d\n", err);
- goto error;
- } else {
- wait_for_completion(&umr_context.done);
- up(&umrc->sem);
+ /* If no device's 'data direct mkey' with RO flags exists
+ * mask it out accordingly.
+ */
+ if (!dev->ddr.mkey_ro_valid)
+ access_flags &= ~IB_ACCESS_RELAXED_ORDERING;
+ crossed_mr = reg_user_mr_dmabuf(pd, &data_direct_dev->pdev->dev,
+ offset, length, virt_addr, fd,
+ access_flags, MLX5_MKC_ACCESS_MODE_KSM,
+ NULL);
+ if (IS_ERR(crossed_mr)) {
+ ret = PTR_ERR(crossed_mr);
+ goto end;
}
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "unreg umr failed\n");
- err = -EFAULT;
- goto error;
+
+ mutex_lock(&dev->slow_path_mutex);
+ crossing_mr = reg_create_crossing_vhca_mr(pd, virt_addr, length, access_flags,
+ crossed_mr->lkey);
+ mutex_unlock(&dev->slow_path_mutex);
+ if (IS_ERR(crossing_mr)) {
+ __mlx5_ib_dereg_mr(crossed_mr);
+ ret = PTR_ERR(crossing_mr);
+ goto end;
}
- return 0;
-error:
- return err;
+ list_add_tail(&to_mmr(crossed_mr)->dd_node, &dev->data_direct_mr_list);
+ to_mmr(crossing_mr)->dd_crossed_mr = to_mmr(crossed_mr);
+ to_mmr(crossing_mr)->data_direct = true;
+end:
+ mutex_unlock(&dev->data_direct_lock);
+ return ret ? ERR_PTR(ret) : crossing_mr;
}
-static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
- u64 length, int npages, int page_shift, int order,
- int access_flags, int flags)
+struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags,
+ struct ib_dmah *dmah,
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct device *ddev = dev->ib_dev.dma_device;
- struct mlx5_ib_umr_context umr_context;
- struct ib_send_wr *bad;
- struct mlx5_umr_wr umrwr = {};
- struct ib_sge sg;
- struct umr_common *umrc = &dev->umrc;
- dma_addr_t dma = 0;
- __be64 *mr_pas = NULL;
- int size;
+ int mlx5_access_flags = 0;
int err;
- mlx5_ib_init_umr_context(&umr_context);
-
- umrwr.wr.wr_cqe = &umr_context.cqe;
- umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
+ !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ return ERR_PTR(-EOPNOTSUPP);
- if (flags & IB_MR_REREG_TRANS) {
- err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size,
- &mr_pas, &dma);
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS)) {
+ err = uverbs_get_flags32(&mlx5_access_flags, attrs,
+ MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT);
if (err)
- return err;
-
- umrwr.target.virt_addr = virt_addr;
- umrwr.length = length;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
+ return ERR_PTR(err);
}
- prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
- page_shift);
+ mlx5_ib_dbg(dev,
+ "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x, mlx5_access_flags 0x%x\n",
+ offset, virt_addr, length, fd, access_flags, mlx5_access_flags);
+
+ /* dmabuf requires xlt update via umr to work. */
+ if (!mlx5r_umr_can_load_pas(dev, length))
+ return ERR_PTR(-EINVAL);
+
+ if (mlx5_access_flags & MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT)
+ return reg_user_mr_dmabuf_by_data_direct(pd, offset, length, virt_addr,
+ fd, access_flags);
+
+ return reg_user_mr_dmabuf(pd, pd->device->dma_device,
+ offset, length, virt_addr,
+ fd, access_flags, MLX5_MKC_ACCESS_MODE_MTT,
+ dmah);
+}
+
+/*
+ * True if the change in access flags can be done via UMR, only some access
+ * flags can be updated.
+ */
+static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
+ unsigned int current_access_flags,
+ unsigned int target_access_flags)
+{
+ unsigned int diffs = current_access_flags ^ target_access_flags;
+
+ if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING |
+ IB_ACCESS_REMOTE_ATOMIC))
+ return false;
+ return mlx5r_umr_can_reconfig(dev, current_access_flags,
+ target_access_flags);
+}
+
+static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
+ struct ib_umem *new_umem,
+ int new_access_flags, u64 iova,
+ unsigned long *page_size)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+
+ /* We only track the allocated sizes of MRs from the cache */
+ if (!mr->mmkey.cache_ent)
+ return false;
+ if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
+ return false;
+
+ *page_size = mlx5_umem_mkc_find_best_pgsz(
+ dev, new_umem, iova, mr->mmkey.cache_ent->rb_key.access_mode);
+ if (WARN_ON(!*page_size))
+ return false;
+ return (mr->mmkey.cache_ent->rb_key.ndescs) >=
+ ib_umem_num_dma_blocks(new_umem, *page_size);
+}
+
+static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
+ int access_flags, int flags, struct ib_umem *new_umem,
+ u64 iova, unsigned long page_size)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ int upd_flags = MLX5_IB_UPD_XLT_ADDR | MLX5_IB_UPD_XLT_ENABLE;
+ struct ib_umem *old_umem = mr->umem;
+ int err;
+
+ /*
+ * To keep everything simple the MR is revoked before we start to mess
+ * with it. This ensure the change is atomic relative to any use of the
+ * MR.
+ */
+ err = mlx5r_umr_revoke_mr(mr);
+ if (err)
+ return err;
if (flags & IB_MR_REREG_PD) {
- umrwr.pd = pd;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD;
+ mr->ibmr.pd = pd;
+ upd_flags |= MLX5_IB_UPD_XLT_PD;
}
-
if (flags & IB_MR_REREG_ACCESS) {
- umrwr.access_flags = access_flags;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS;
+ mr->access_flags = access_flags;
+ upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
}
- /* post send request to UMR QP */
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
-
+ mr->ibmr.iova = iova;
+ mr->ibmr.length = new_umem->length;
+ mr->page_shift = order_base_2(page_size);
+ mr->umem = new_umem;
+ err = mlx5r_umr_update_mr_pas(mr, upd_flags);
if (err) {
- mlx5_ib_warn(dev, "post send failed, err %d\n", err);
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed (%u)\n",
- umr_context.status);
- err = -EFAULT;
- }
+ /*
+ * The MR is revoked at this point so there is no issue to free
+ * new_umem.
+ */
+ mr->umem = old_umem;
+ return err;
}
- up(&umrc->sem);
- if (flags & IB_MR_REREG_TRANS) {
- dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
- kfree(mr_pas);
- }
- return err;
+ atomic_sub(ib_umem_num_pages(old_umem), &dev->mdev->priv.reg_pages);
+ ib_umem_release(old_umem);
+ atomic_add(ib_umem_num_pages(new_umem), &dev->mdev->priv.reg_pages);
+ return 0;
}
-int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
- u64 length, u64 virt_addr, int new_access_flags,
- struct ib_pd *new_pd, struct ib_udata *udata)
+struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
+ u64 length, u64 iova, int new_access_flags,
+ struct ib_pd *new_pd,
+ struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
struct mlx5_ib_mr *mr = to_mmr(ib_mr);
- struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
- int access_flags = flags & IB_MR_REREG_ACCESS ?
- new_access_flags :
- mr->access_flags;
- u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
- u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
- int page_shift = 0;
- int npages = 0;
- int ncont = 0;
- int order = 0;
int err;
- mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
- start, virt_addr, length, access_flags);
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || mr->data_direct ||
+ mr->mmkey.rb_key.ph != MLX5_IB_NO_PH)
+ return ERR_PTR(-EOPNOTSUPP);
- if (flags != IB_MR_REREG_PD) {
- /*
- * Replace umem. This needs to be done whether or not UMR is
- * used.
- */
- flags |= IB_MR_REREG_TRANS;
- ib_umem_release(mr->umem);
- err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
- &npages, &page_shift, &ncont, &order);
- if (err < 0) {
- mr->umem = NULL;
- return err;
- }
- }
+ mlx5_ib_dbg(
+ dev,
+ "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
+ start, iova, length, new_access_flags);
- if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
- /*
- * UMR can't be used - MKey needs to be replaced.
- */
- if (mr->umred) {
- err = unreg_umr(dev, mr);
- if (err)
- mlx5_ib_warn(dev, "Failed to unregister MR\n");
- } else {
- err = destroy_mkey(dev, mr);
- if (err)
- mlx5_ib_warn(dev, "Failed to destroy MKey\n");
- }
- if (err)
- return err;
+ if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS))
+ return ERR_PTR(-EOPNOTSUPP);
- mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
- page_shift, access_flags);
+ if (!(flags & IB_MR_REREG_ACCESS))
+ new_access_flags = mr->access_flags;
+ if (!(flags & IB_MR_REREG_PD))
+ new_pd = ib_mr->pd;
- if (IS_ERR(mr))
- return PTR_ERR(mr);
+ if (!(flags & IB_MR_REREG_TRANS)) {
+ struct ib_umem *umem;
+
+ /* Fast path for PD/access change */
+ if (can_use_umr_rereg_access(dev, mr->access_flags,
+ new_access_flags)) {
+ err = mlx5r_umr_rereg_pd_access(mr, new_pd,
+ new_access_flags);
+ if (err)
+ return ERR_PTR(err);
+ return NULL;
+ }
+ /* DM or ODP MR's don't have a normal umem so we can't re-use it */
+ if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
+ goto recreate;
- mr->umred = 0;
- } else {
/*
- * Send a UMR WQE
+ * Only one active MR can refer to a umem at one time, revoke
+ * the old MR before assigning the umem to the new one.
*/
- err = rereg_umr(pd, mr, addr, len, npages, page_shift,
- order, access_flags, flags);
- if (err) {
- mlx5_ib_warn(dev, "Failed to rereg UMR\n");
- return err;
- }
- }
+ err = mlx5r_umr_revoke_mr(mr);
+ if (err)
+ return ERR_PTR(err);
+ umem = mr->umem;
+ mr->umem = NULL;
+ atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
- if (flags & IB_MR_REREG_PD) {
- ib_mr->pd = pd;
- mr->mmkey.pd = to_mpd(pd)->pdn;
+ return create_real_mr(new_pd, umem, mr->ibmr.iova,
+ new_access_flags, NULL);
}
- if (flags & IB_MR_REREG_ACCESS)
- mr->access_flags = access_flags;
-
- if (flags & IB_MR_REREG_TRANS) {
- atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
- set_mr_fileds(dev, mr, npages, len, access_flags);
- mr->mmkey.iova = addr;
- mr->mmkey.size = len;
+ /*
+ * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does
+ * but the logic around releasing the umem is different
+ */
+ if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr))
+ goto recreate;
+
+ if (!(new_access_flags & IB_ACCESS_ON_DEMAND) &&
+ can_use_umr_rereg_access(dev, mr->access_flags, new_access_flags)) {
+ struct ib_umem *new_umem;
+ unsigned long page_size;
+
+ new_umem = ib_umem_get(&dev->ib_dev, start, length,
+ new_access_flags);
+ if (IS_ERR(new_umem))
+ return ERR_CAST(new_umem);
+
+ /* Fast path for PAS change */
+ if (can_use_umr_rereg_pas(mr, new_umem, new_access_flags, iova,
+ &page_size)) {
+ err = umr_rereg_pas(mr, new_pd, new_access_flags, flags,
+ new_umem, iova, page_size);
+ if (err) {
+ ib_umem_release(new_umem);
+ return ERR_PTR(err);
+ }
+ return NULL;
+ }
+ return create_real_mr(new_pd, new_umem, iova, new_access_flags, NULL);
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- update_odp_mr(mr);
-#endif
- return 0;
+ /*
+ * Everything else has no state we can preserve, just create a new MR
+ * from scratch
+ */
+recreate:
+ return mlx5_ib_reg_user_mr(new_pd, start, length, iova,
+ new_access_flags, NULL, udata);
}
static int
@@ -1449,11 +1977,18 @@ mlx5_alloc_priv_descs(struct ib_device *device,
int ndescs,
int desc_size)
{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct device *ddev = &dev->mdev->pdev->dev;
int size = ndescs * desc_size;
int add_size;
int ret;
add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+ if (is_power_of_2(MLX5_UMR_ALIGN) && add_size) {
+ int end = max_t(int, MLX5_UMR_ALIGN, roundup_pow_of_two(size));
+
+ add_size = min_t(int, end - size, add_size);
+ }
mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
if (!mr->descs_alloc)
@@ -1461,9 +1996,8 @@ mlx5_alloc_priv_descs(struct ib_device *device,
mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
- mr->desc_map = dma_map_single(device->dma_device, mr->descs,
- size, DMA_TO_DEVICE);
- if (dma_mapping_error(device->dma_device, mr->desc_map)) {
+ mr->desc_map = dma_map_single(ddev, mr->descs, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, mr->desc_map)) {
ret = -ENOMEM;
goto err;
}
@@ -1478,108 +2012,333 @@ err:
static void
mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
{
- if (mr->descs) {
+ if (!mr->umem && !mr->data_direct &&
+ mr->ibmr.type != IB_MR_TYPE_DM && mr->descs) {
struct ib_device *device = mr->ibmr.device;
int size = mr->max_descs * mr->desc_size;
+ struct mlx5_ib_dev *dev = to_mdev(device);
- dma_unmap_single(device->dma_device, mr->desc_map,
- size, DMA_TO_DEVICE);
+ dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
+ DMA_TO_DEVICE);
kfree(mr->descs_alloc);
mr->descs = NULL;
}
}
-static int clean_mr(struct mlx5_ib_mr *mr)
+static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_mr *mr)
+{
+ struct mlx5_mkey_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent;
+ int ret;
+
+ if (mr->mmkey.cache_ent) {
+ spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
+ goto end;
+ }
+
+ mutex_lock(&cache->rb_lock);
+ ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key);
+ if (ent) {
+ if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) {
+ if (ent->disabled) {
+ mutex_unlock(&cache->rb_lock);
+ return -EOPNOTSUPP;
+ }
+ mr->mmkey.cache_ent = ent;
+ spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
+ mutex_unlock(&cache->rb_lock);
+ goto end;
+ }
+ }
+
+ ent = mlx5r_cache_create_ent_locked(dev, mr->mmkey.rb_key, false);
+ mutex_unlock(&cache->rb_lock);
+ if (IS_ERR(ent))
+ return PTR_ERR(ent);
+
+ mr->mmkey.cache_ent = ent;
+ spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
+
+end:
+ ret = push_mkey_locked(mr->mmkey.cache_ent, mr->mmkey.key);
+ spin_unlock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
+ return ret;
+}
+
+static int mlx5_ib_revoke_data_direct_mr(struct mlx5_ib_mr *mr)
{
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
- int umred = mr->umred;
+ struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
int err;
- if (mr->sig) {
+ lockdep_assert_held(&dev->data_direct_lock);
+ mr->revoked = true;
+ err = mlx5r_umr_revoke_mr(mr);
+ if (WARN_ON(err))
+ return err;
+
+ ib_umem_dmabuf_revoke(umem_dmabuf);
+ return 0;
+}
+
+void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_mr *mr, *next;
+
+ lockdep_assert_held(&dev->data_direct_lock);
+
+ list_for_each_entry_safe(mr, next, &dev->data_direct_mr_list, dd_node) {
+ list_del(&mr->dd_node);
+ mlx5_ib_revoke_data_direct_mr(mr);
+ }
+}
+
+static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr)
+{
+ bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
+ !to_ib_umem_dmabuf(mr->umem)->pinned;
+ bool is_odp = is_odp_mr(mr);
+ int ret;
+
+ if (is_odp)
+ mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+
+ if (is_odp_dma_buf)
+ dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
+ NULL);
+
+ ret = mlx5r_umr_revoke_mr(mr);
+
+ if (is_odp) {
+ if (!ret)
+ to_ib_umem_odp(mr->umem)->private = NULL;
+ mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+ }
+
+ if (is_odp_dma_buf) {
+ if (!ret)
+ to_ib_umem_dmabuf(mr->umem)->private = NULL;
+ dma_resv_unlock(
+ to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
+ }
+
+ return ret;
+}
+
+static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr)
+{
+ bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
+ !to_ib_umem_dmabuf(mr->umem)->pinned;
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
+ bool is_odp = is_odp_mr(mr);
+ bool from_cache = !!ent;
+ int ret;
+
+ if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) &&
+ !cache_ent_find_and_store(dev, mr)) {
+ ent = mr->mmkey.cache_ent;
+ /* upon storing to a clean temp entry - schedule its cleanup */
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ if (from_cache)
+ ent->in_use--;
+ if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
+ mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
+ secs_to_jiffies(30));
+ ent->tmp_cleanup_scheduled = true;
+ }
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ return 0;
+ }
+
+ if (ent) {
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ent->in_use--;
+ mr->mmkey.cache_ent = NULL;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ }
+
+ if (is_odp)
+ mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+
+ if (is_odp_dma_buf)
+ dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
+ NULL);
+ ret = destroy_mkey(dev, mr);
+ if (is_odp) {
+ if (!ret)
+ to_ib_umem_odp(mr->umem)->private = NULL;
+ mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+ }
+
+ if (is_odp_dma_buf) {
+ if (!ret)
+ to_ib_umem_dmabuf(mr->umem)->private = NULL;
+ dma_resv_unlock(
+ to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
+ }
+ return ret;
+}
+
+static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+ int rc;
+
+ /*
+ * Any async use of the mr must hold the refcount, once the refcount
+ * goes to zero no other thread, such as ODP page faults, prefetch, any
+ * UMR activity, etc can touch the mkey. Thus it is safe to destroy it.
+ */
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) &&
+ refcount_read(&mr->mmkey.usecount) != 0 &&
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)))
+ mlx5r_deref_wait_odp_mkey(&mr->mmkey);
+
+ if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
+ xa_cmpxchg(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
+ mr->sig, NULL, GFP_KERNEL);
+
+ if (mr->mtt_mr) {
+ rc = mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL);
+ if (rc)
+ return rc;
+ mr->mtt_mr = NULL;
+ }
+ if (mr->klm_mr) {
+ rc = mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL);
+ if (rc)
+ return rc;
+ mr->klm_mr = NULL;
+ }
+
if (mlx5_core_destroy_psv(dev->mdev,
mr->sig->psv_memory.psv_idx))
mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
mr->sig->psv_memory.psv_idx);
- if (mlx5_core_destroy_psv(dev->mdev,
- mr->sig->psv_wire.psv_idx))
+ if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
mr->sig->psv_wire.psv_idx);
kfree(mr->sig);
mr->sig = NULL;
}
- mlx5_free_priv_descs(mr);
+ /* Stop DMA */
+ rc = mlx5r_handle_mkey_cleanup(mr);
+ if (rc)
+ return rc;
- if (!umred) {
- err = destroy_mkey(dev, mr);
- if (err) {
- mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
- mr->mmkey.key, err);
- return err;
- }
- } else {
- err = unreg_umr(dev, mr);
- if (err) {
- mlx5_ib_warn(dev, "failed unregister\n");
- return err;
- }
- free_cached_mr(dev, mr);
+ if (mr->umem) {
+ bool is_odp = is_odp_mr(mr);
+
+ if (!is_odp)
+ atomic_sub(ib_umem_num_pages(mr->umem),
+ &dev->mdev->priv.reg_pages);
+ ib_umem_release(mr->umem);
+ if (is_odp)
+ mlx5_ib_free_odp_mr(mr);
}
- if (!umred)
- kfree(mr);
+ if (!mr->mmkey.cache_ent)
+ mlx5_free_priv_descs(mr);
+ kfree(mr);
return 0;
}
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+static int dereg_crossing_data_direct_mr(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_mr *mr)
+{
+ struct mlx5_ib_mr *dd_crossed_mr = mr->dd_crossed_mr;
+ int ret;
+
+ ret = __mlx5_ib_dereg_mr(&mr->ibmr);
+ if (ret)
+ return ret;
+
+ mutex_lock(&dev->data_direct_lock);
+ if (!dd_crossed_mr->revoked)
+ list_del(&dd_crossed_mr->dd_node);
+
+ ret = __mlx5_ib_dereg_mr(&dd_crossed_mr->ibmr);
+ mutex_unlock(&dev->data_direct_lock);
+ return ret;
+}
+
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
- int npages = mr->npages;
- struct ib_umem *umem = mr->umem;
-
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- if (umem && umem->odp_data) {
- /* Prevent new page faults from succeeding */
- mr->live = 0;
- /* Wait for all running page-fault handlers to finish. */
- synchronize_srcu(&dev->mr_srcu);
- /* Destroy all page mappings */
- mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
- ib_umem_end(umem));
- /*
- * We kill the umem before the MR for ODP,
- * so that there will not be any invalidations in
- * flight, looking at the *mr struct.
- */
- ib_umem_release(umem);
- atomic_sub(npages, &dev->mdev->priv.reg_pages);
+ struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
- /* Avoid double-freeing the umem. */
- umem = NULL;
- }
-#endif
+ if (mr->data_direct)
+ return dereg_crossing_data_direct_mr(dev, mr);
- clean_mr(mr);
+ return __mlx5_ib_dereg_mr(ibmr);
+}
- if (umem) {
- ib_umem_release(umem);
- atomic_sub(npages, &dev->mdev->priv.reg_pages);
- }
+static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
+ int access_mode, int page_shift)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ void *mkc;
- return 0;
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+ /* This is only used from the kernel, so setting the PD is OK. */
+ set_mkc_access_pd_addr_fields(mkc, IB_ACCESS_RELAXED_ORDERING, 0, pd);
+ MLX5_SET(mkc, mkc, free, 1);
+ MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
+ MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
+ MLX5_SET(mkc, mkc, umr_en, 1);
+ MLX5_SET(mkc, mkc, log_page_size, page_shift);
+ if (access_mode == MLX5_MKC_ACCESS_MODE_PA ||
+ access_mode == MLX5_MKC_ACCESS_MODE_MTT)
+ MLX5_SET(mkc, mkc, ma_translation_mode, MLX5_CAP_GEN(dev->mdev, ats));
}
-struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
- u32 max_num_sg)
+static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int ndescs, int desc_size, int page_shift,
+ int access_mode, u32 *in, int inlen)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int err;
+
+ mr->access_mode = access_mode;
+ mr->desc_size = desc_size;
+ mr->max_descs = ndescs;
+
+ err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size);
+ if (err)
+ return err;
+
+ mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift);
+
+ err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
+ if (err)
+ goto err_free_descs;
+
+ mr->mmkey.type = MLX5_MKEY_MR;
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+
+ return 0;
+
+err_free_descs:
+ mlx5_free_priv_descs(mr);
+ return err;
+}
+
+static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
+ u32 max_num_sg, u32 max_num_meta_sg,
+ int desc_size, int access_mode)
+{
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- int ndescs = ALIGN(max_num_sg, 4);
+ int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
+ int page_shift = 0;
struct mlx5_ib_mr *mr;
- void *mkc;
u32 *in;
int err;
@@ -1587,96 +2346,175 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
if (!mr)
return ERR_PTR(-ENOMEM);
+ mr->ibmr.pd = pd;
+ mr->ibmr.device = pd->device;
+
in = kzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_free;
}
+ if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
+ page_shift = PAGE_SHIFT;
+
+ err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift,
+ access_mode, in, inlen);
+ if (err)
+ goto err_free_in;
+
+ mr->umem = NULL;
+ kfree(in);
+
+ return mr;
+
+err_free_in:
+ kfree(in);
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int ndescs, u32 *in, int inlen)
+{
+ return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt),
+ PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in,
+ inlen);
+}
+
+static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int ndescs, u32 *in, int inlen)
+{
+ return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm),
+ 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
+}
+
+static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
+ int max_num_sg, int max_num_meta_sg,
+ u32 *in, int inlen)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ u32 psv_index[2];
+ void *mkc;
+ int err;
+
+ mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
+ if (!mr->sig)
+ return -ENOMEM;
+
+ /* create mem & wire PSVs */
+ err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index);
+ if (err)
+ goto err_free_sig;
+
+ mr->sig->psv_memory.psv_idx = psv_index[0];
+ mr->sig->psv_wire.psv_idx = psv_index[1];
+
+ mr->sig->sig_status_checked = true;
+ mr->sig->sig_err_exists = false;
+ /* Next UMR, Arm SIGERR */
+ ++mr->sig->sigerr_count;
+ mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
+ sizeof(struct mlx5_klm),
+ MLX5_MKC_ACCESS_MODE_KLMS);
+ if (IS_ERR(mr->klm_mr)) {
+ err = PTR_ERR(mr->klm_mr);
+ goto err_destroy_psv;
+ }
+ mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
+ sizeof(struct mlx5_mtt),
+ MLX5_MKC_ACCESS_MODE_MTT);
+ if (IS_ERR(mr->mtt_mr)) {
+ err = PTR_ERR(mr->mtt_mr);
+ goto err_free_klm_mr;
+ }
+
+ /* Set bsf descriptors for mkey */
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
- MLX5_SET(mkc, mkc, free, 1);
- MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
- MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, bsf_en, 1);
+ MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
- if (mr_type == IB_MR_TYPE_MEM_REG) {
- mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
- MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
- err = mlx5_alloc_priv_descs(pd->device, mr,
- ndescs, sizeof(u64));
- if (err)
- goto err_free_in;
+ err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0,
+ MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
+ if (err)
+ goto err_free_mtt_mr;
- mr->desc_size = sizeof(u64);
- mr->max_descs = ndescs;
- } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
- mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
+ err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
+ mr->sig, GFP_KERNEL));
+ if (err)
+ goto err_free_descs;
+ return 0;
- err = mlx5_alloc_priv_descs(pd->device, mr,
- ndescs, sizeof(struct mlx5_klm));
- if (err)
- goto err_free_in;
- mr->desc_size = sizeof(struct mlx5_klm);
- mr->max_descs = ndescs;
- } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
- u32 psv_index[2];
-
- MLX5_SET(mkc, mkc, bsf_en, 1);
- MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
- mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
- if (!mr->sig) {
- err = -ENOMEM;
- goto err_free_in;
- }
+err_free_descs:
+ destroy_mkey(dev, mr);
+ mlx5_free_priv_descs(mr);
+err_free_mtt_mr:
+ mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL);
+ mr->mtt_mr = NULL;
+err_free_klm_mr:
+ mlx5_ib_dereg_mr(&mr->klm_mr->ibmr, NULL);
+ mr->klm_mr = NULL;
+err_destroy_psv:
+ if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
+ mr->sig->psv_memory.psv_idx);
+ if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
+ mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
+ mr->sig->psv_wire.psv_idx);
+err_free_sig:
+ kfree(mr->sig);
- /* create mem & wire PSVs */
- err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
- 2, psv_index);
- if (err)
- goto err_free_sig;
+ return err;
+}
- mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
- mr->sig->psv_memory.psv_idx = psv_index[0];
- mr->sig->psv_wire.psv_idx = psv_index[1];
+static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
+ enum ib_mr_type mr_type, u32 max_num_sg,
+ u32 max_num_meta_sg)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ int ndescs = ALIGN(max_num_sg, 4);
+ struct mlx5_ib_mr *mr;
+ u32 *in;
+ int err;
- mr->sig->sig_status_checked = true;
- mr->sig->sig_err_exists = false;
- /* Next UMR, Arm SIGERR */
- ++mr->sig->sigerr_count;
- } else {
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ mr->ibmr.device = pd->device;
+ mr->umem = NULL;
+
+ switch (mr_type) {
+ case IB_MR_TYPE_MEM_REG:
+ err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen);
+ break;
+ case IB_MR_TYPE_SG_GAPS:
+ err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen);
+ break;
+ case IB_MR_TYPE_INTEGRITY:
+ err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg,
+ max_num_meta_sg, in, inlen);
+ break;
+ default:
mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
err = -EINVAL;
- goto err_free_in;
}
- MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
- MLX5_SET(mkc, mkc, umr_en, 1);
-
- err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
if (err)
- goto err_destroy_psv;
+ goto err_free_in;
- mr->ibmr.lkey = mr->mmkey.key;
- mr->ibmr.rkey = mr->mmkey.key;
- mr->umem = NULL;
kfree(in);
return &mr->ibmr;
-err_destroy_psv:
- if (mr->sig) {
- if (mlx5_core_destroy_psv(dev->mdev,
- mr->sig->psv_memory.psv_idx))
- mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
- mr->sig->psv_memory.psv_idx);
- if (mlx5_core_destroy_psv(dev->mdev,
- mr->sig->psv_wire.psv_idx))
- mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
- mr->sig->psv_wire.psv_idx);
- }
- mlx5_free_priv_descs(mr);
-err_free_sig:
- kfree(mr->sig);
err_free_in:
kfree(in);
err_free:
@@ -1684,15 +2522,27 @@ err_free:
return ERR_PTR(err);
}
-struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
- struct ib_udata *udata)
+struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
+}
+
+struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
+ u32 max_num_sg, u32 max_num_meta_sg)
+{
+ return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
+ max_num_meta_sg);
+}
+
+int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibmw->device);
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
- struct mlx5_ib_mw *mw = NULL;
+ struct mlx5_ib_mw *mw = to_mmw(ibmw);
+ unsigned int ndescs;
u32 *in = NULL;
void *mkc;
- int ndescs;
int err;
struct mlx5_ib_alloc_mw req = {};
struct {
@@ -1702,71 +2552,79 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
if (err)
- return ERR_PTR(err);
+ return err;
if (req.comp_mask || req.reserved1 || req.reserved2)
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (udata->inlen > sizeof(req) &&
!ib_is_udata_cleared(udata, sizeof(req),
udata->inlen - sizeof(req)))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
- mw = kzalloc(sizeof(*mw), GFP_KERNEL);
in = kzalloc(inlen, GFP_KERNEL);
- if (!mw || !in) {
- err = -ENOMEM;
- goto free;
- }
+ if (!in)
+ return -ENOMEM;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
- MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, lr, 1);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS);
- MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
+ MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2)));
MLX5_SET(mkc, mkc, qpn, 0xffffff);
- err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
+ err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen);
if (err)
goto free;
- mw->ibmw.rkey = mw->mmkey.key;
+ mw->mmkey.type = MLX5_MKEY_MW;
+ ibmw->rkey = mw->mmkey.key;
+ mw->mmkey.ndescs = ndescs;
- resp.response_length = min(offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length), udata->outlen);
+ resp.response_length =
+ min(offsetofend(typeof(resp), response_length), udata->outlen);
if (resp.response_length) {
err = ib_copy_to_udata(udata, &resp, resp.response_length);
- if (err) {
- mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
- goto free;
- }
+ if (err)
+ goto free_mkey;
+ }
+
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ err = mlx5r_store_odp_mkey(dev, &mw->mmkey);
+ if (err)
+ goto free_mkey;
}
kfree(in);
- return &mw->ibmw;
+ return 0;
+free_mkey:
+ mlx5_core_destroy_mkey(dev->mdev, mw->mmkey.key);
free:
- kfree(mw);
kfree(in);
- return ERR_PTR(err);
+ return err;
}
int mlx5_ib_dealloc_mw(struct ib_mw *mw)
{
+ struct mlx5_ib_dev *dev = to_mdev(mw->device);
struct mlx5_ib_mw *mmw = to_mmw(mw);
- int err;
- err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
- &mmw->mmkey);
- if (!err)
- kfree(mmw);
- return err;
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) &&
+ xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)))
+ /*
+ * pagefault_single_data_segment() may be accessing mmw
+ * if the user bound an ODP MR to this MW.
+ */
+ mlx5r_deref_wait_odp_mkey(&mmw->mmkey);
+
+ return mlx5_core_destroy_mkey(dev->mdev, mmw->mmkey.key);
}
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
@@ -1811,28 +2669,64 @@ done:
}
static int
+mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ unsigned int sg_offset = 0;
+ int n = 0;
+
+ mr->meta_length = 0;
+ if (data_sg_nents == 1) {
+ n++;
+ mr->mmkey.ndescs = 1;
+ if (data_sg_offset)
+ sg_offset = *data_sg_offset;
+ mr->data_length = sg_dma_len(data_sg) - sg_offset;
+ mr->data_iova = sg_dma_address(data_sg) + sg_offset;
+ if (meta_sg_nents == 1) {
+ n++;
+ mr->meta_ndescs = 1;
+ if (meta_sg_offset)
+ sg_offset = *meta_sg_offset;
+ else
+ sg_offset = 0;
+ mr->meta_length = sg_dma_len(meta_sg) - sg_offset;
+ mr->pi_iova = sg_dma_address(meta_sg) + sg_offset;
+ }
+ ibmr->length = mr->data_length + mr->meta_length;
+ }
+
+ return n;
+}
+
+static int
mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
struct scatterlist *sgl,
unsigned short sg_nents,
- unsigned int *sg_offset_p)
+ unsigned int *sg_offset_p,
+ struct scatterlist *meta_sgl,
+ unsigned short meta_sg_nents,
+ unsigned int *meta_sg_offset_p)
{
struct scatterlist *sg = sgl;
struct mlx5_klm *klms = mr->descs;
unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
u32 lkey = mr->ibmr.pd->local_dma_lkey;
- int i;
+ int i, j = 0;
mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
mr->ibmr.length = 0;
- mr->ndescs = sg_nents;
for_each_sg(sgl, sg, sg_nents, i) {
- if (unlikely(i > mr->max_descs))
+ if (unlikely(i >= mr->max_descs))
break;
klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
klms[i].key = cpu_to_be32(lkey);
- mr->ibmr.length += sg_dma_len(sg);
+ mr->ibmr.length += sg_dma_len(sg) - sg_offset;
sg_offset = 0;
}
@@ -1840,7 +2734,32 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
if (sg_offset_p)
*sg_offset_p = sg_offset;
- return i;
+ mr->mmkey.ndescs = i;
+ mr->data_length = mr->ibmr.length;
+
+ if (meta_sg_nents) {
+ sg = meta_sgl;
+ sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
+ for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
+ if (unlikely(i + j >= mr->max_descs))
+ break;
+ klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
+ sg_offset);
+ klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
+ sg_offset);
+ klms[i + j].key = cpu_to_be32(lkey);
+ mr->ibmr.length += sg_dma_len(sg) - sg_offset;
+
+ sg_offset = 0;
+ }
+ if (meta_sg_offset_p)
+ *meta_sg_offset_p = sg_offset;
+
+ mr->meta_ndescs = j;
+ mr->meta_length = mr->ibmr.length - mr->data_length;
+ }
+
+ return i + j;
}
static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
@@ -1848,11 +2767,186 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
struct mlx5_ib_mr *mr = to_mmr(ibmr);
__be64 *descs;
- if (unlikely(mr->ndescs == mr->max_descs))
+ if (unlikely(mr->mmkey.ndescs == mr->max_descs))
+ return -ENOMEM;
+
+ descs = mr->descs;
+ descs[mr->mmkey.ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
+
+ return 0;
+}
+
+static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ __be64 *descs;
+
+ if (unlikely(mr->mmkey.ndescs + mr->meta_ndescs == mr->max_descs))
return -ENOMEM;
descs = mr->descs;
- descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
+ descs[mr->mmkey.ndescs + mr->meta_ndescs++] =
+ cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
+
+ return 0;
+}
+
+static int
+mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
+ int n;
+
+ pi_mr->mmkey.ndescs = 0;
+ pi_mr->meta_ndescs = 0;
+ pi_mr->meta_length = 0;
+
+ ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ pi_mr->ibmr.page_size = ibmr->page_size;
+ n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset,
+ mlx5_set_page);
+ if (n != data_sg_nents)
+ return n;
+
+ pi_mr->data_iova = pi_mr->ibmr.iova;
+ pi_mr->data_length = pi_mr->ibmr.length;
+ pi_mr->ibmr.length = pi_mr->data_length;
+ ibmr->length = pi_mr->data_length;
+
+ if (meta_sg_nents) {
+ u64 page_mask = ~((u64)ibmr->page_size - 1);
+ u64 iova = pi_mr->data_iova;
+
+ n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents,
+ meta_sg_offset, mlx5_set_page_pi);
+
+ pi_mr->meta_length = pi_mr->ibmr.length;
+ /*
+ * PI address for the HW is the offset of the metadata address
+ * relative to the first data page address.
+ * It equals to first data page address + size of data pages +
+ * metadata offset at the first metadata page
+ */
+ pi_mr->pi_iova = (iova & page_mask) +
+ pi_mr->mmkey.ndescs * ibmr->page_size +
+ (pi_mr->ibmr.iova & ~page_mask);
+ /*
+ * In order to use one MTT MR for data and metadata, we register
+ * also the gaps between the end of the data and the start of
+ * the metadata (the sig MR will verify that the HW will access
+ * to right addresses). This mapping is safe because we use
+ * internal mkey for the registration.
+ */
+ pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova;
+ pi_mr->ibmr.iova = iova;
+ ibmr->length += pi_mr->meta_length;
+ }
+
+ ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ return n;
+}
+
+static int
+mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_mr *pi_mr = mr->klm_mr;
+ int n;
+
+ pi_mr->mmkey.ndescs = 0;
+ pi_mr->meta_ndescs = 0;
+ pi_mr->meta_length = 0;
+
+ ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
+ meta_sg, meta_sg_nents, meta_sg_offset);
+
+ ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
+ pi_mr->desc_size * pi_mr->max_descs,
+ DMA_TO_DEVICE);
+
+ /* This is zero-based memory region */
+ pi_mr->data_iova = 0;
+ pi_mr->ibmr.iova = 0;
+ pi_mr->pi_iova = pi_mr->data_length;
+ ibmr->length = pi_mr->ibmr.length;
+
+ return n;
+}
+
+int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
+ int data_sg_nents, unsigned int *data_sg_offset,
+ struct scatterlist *meta_sg, int meta_sg_nents,
+ unsigned int *meta_sg_offset)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_mr *pi_mr = NULL;
+ int n;
+
+ WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
+
+ mr->mmkey.ndescs = 0;
+ mr->data_length = 0;
+ mr->data_iova = 0;
+ mr->meta_ndescs = 0;
+ mr->pi_iova = 0;
+ /*
+ * As a performance optimization, if possible, there is no need to
+ * perform UMR operation to register the data/metadata buffers.
+ * First try to map the sg lists to PA descriptors with local_dma_lkey.
+ * Fallback to UMR only in case of a failure.
+ */
+ n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg, meta_sg_nents,
+ meta_sg_offset);
+ if (n == data_sg_nents + meta_sg_nents)
+ goto out;
+ /*
+ * As a performance optimization, if possible, there is no need to map
+ * the sg lists to KLM descriptors. First try to map the sg lists to MTT
+ * descriptors and fallback to KLM only in case of a failure.
+ * It's more efficient for the HW to work with MTT descriptors
+ * (especially in high load).
+ * Use KLM (indirect access) only if it's mandatory.
+ */
+ pi_mr = mr->mtt_mr;
+ n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg, meta_sg_nents,
+ meta_sg_offset);
+ if (n == data_sg_nents + meta_sg_nents)
+ goto out;
+
+ pi_mr = mr->klm_mr;
+ n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents,
+ data_sg_offset, meta_sg, meta_sg_nents,
+ meta_sg_offset);
+ if (unlikely(n != data_sg_nents + meta_sg_nents))
+ return -ENOMEM;
+
+out:
+ /* This is zero-based memory region */
+ ibmr->iova = 0;
+ mr->pi_mr = pi_mr;
+ if (pi_mr)
+ ibmr->sig_attrs->meta_length = pi_mr->meta_length;
+ else
+ ibmr->sig_attrs->meta_length = mr->meta_length;
return 0;
}
@@ -1863,14 +2957,15 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
struct mlx5_ib_mr *mr = to_mmr(ibmr);
int n;
- mr->ndescs = 0;
+ mr->mmkey.ndescs = 0;
ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
mr->desc_size * mr->max_descs,
DMA_TO_DEVICE);
if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
- n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
+ n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0,
+ NULL);
else
n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
mlx5_set_page);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index cacb631a7b0a..e71ee3d52eb0 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -30,10 +30,66 @@
* SOFTWARE.
*/
-#include <rdma/ib_umem.h>
#include <rdma/ib_umem_odp.h>
+#include <linux/kernel.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-resv.h>
+#include <linux/hmm.h>
+#include <linux/hmm-dma.h>
+#include <linux/pci-p2pdma.h>
#include "mlx5_ib.h"
+#include "cmd.h"
+#include "umr.h"
+#include "qp.h"
+
+#include <linux/mlx5/eq.h>
+
+/* Contains the details of a pagefault. */
+struct mlx5_pagefault {
+ u32 bytes_committed;
+ u64 token;
+ u8 event_subtype;
+ u8 type;
+ union {
+ /* Initiator or send message responder pagefault details. */
+ struct {
+ /* Received packet size, only valid for responders. */
+ u32 packet_size;
+ /*
+ * Number of resource holding WQE, depends on type.
+ */
+ u32 wq_num;
+ /*
+ * WQE index. Refers to either the send queue or
+ * receive queue, according to event_subtype.
+ */
+ u16 wqe_index;
+ } wqe;
+ /* RDMA responder pagefault details */
+ struct {
+ u32 r_key;
+ /*
+ * Received packet size, minimal size page fault
+ * resolution required for forward progress.
+ */
+ u32 packet_size;
+ u32 rdma_op_len;
+ u64 rdma_va;
+ } rdma;
+ struct {
+ u64 va;
+ u32 mkey;
+ u32 fault_byte_count;
+ u32 prefetch_before_byte_count;
+ u32 prefetch_after_byte_count;
+ u8 flags;
+ } memory;
+ };
+
+ struct mlx5_ib_pf_eq *eq;
+ struct work_struct work;
+};
#define MAX_PREFETCH_LEN (4*1024*1024U)
@@ -41,29 +97,203 @@
* a pagefault. */
#define MMU_NOTIFIER_TIMEOUT 1000
-struct workqueue_struct *mlx5_ib_page_fault_wq;
+static u64 mlx5_imr_ksm_entries;
+static u64 mlx5_imr_mtt_entries;
+static u64 mlx5_imr_mtt_size;
+static u8 mlx5_imr_mtt_shift;
+static u8 mlx5_imr_ksm_page_shift;
+
+static void populate_ksm(struct mlx5_ksm *pksm, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *imr, int flags)
+{
+ struct mlx5_core_dev *dev = mr_to_mdev(imr)->mdev;
+ struct mlx5_ksm *end = pksm + nentries;
+ u64 step = MLX5_CAP_ODP(dev, mem_page_fault) ? mlx5_imr_mtt_size : 0;
+ __be32 key = MLX5_CAP_ODP(dev, mem_page_fault) ?
+ cpu_to_be32(imr->null_mmkey.key) :
+ mr_to_mdev(imr)->mkeys.null_mkey;
+ u64 va =
+ MLX5_CAP_ODP(dev, mem_page_fault) ? idx * mlx5_imr_mtt_size : 0;
+
+ if (flags & MLX5_IB_UPD_XLT_ZAP) {
+ for (; pksm != end; pksm++, idx++, va += step) {
+ pksm->key = key;
+ pksm->va = cpu_to_be64(va);
+ }
+ return;
+ }
+
+ /*
+ * The locking here is pretty subtle. Ideally the implicit_children
+ * xarray would be protected by the umem_mutex, however that is not
+ * possible. Instead this uses a weaker update-then-lock pattern:
+ *
+ * xa_store()
+ * mutex_lock(umem_mutex)
+ * mlx5r_umr_update_xlt()
+ * mutex_unlock(umem_mutex)
+ * destroy lkey
+ *
+ * ie any change the xarray must be followed by the locked update_xlt
+ * before destroying.
+ *
+ * The umem_mutex provides the acquire/release semantic needed to make
+ * the xa_store() visible to a racing thread.
+ */
+ lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex);
+
+ for (; pksm != end; pksm++, idx++, va += step) {
+ struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx);
+
+ if (mtt) {
+ pksm->key = cpu_to_be32(mtt->ibmr.lkey);
+ pksm->va = cpu_to_be64(idx * mlx5_imr_mtt_size);
+ } else {
+ pksm->key = key;
+ pksm->va = cpu_to_be64(va);
+ }
+ }
+}
+
+static int populate_mtt(__be64 *pas, size_t start, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags)
+{
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ bool downgrade = flags & MLX5_IB_UPD_XLT_DOWNGRADE;
+ struct pci_p2pdma_map_state p2pdma_state = {};
+ struct ib_device *dev = odp->umem.ibdev;
+ size_t i;
+
+ if (flags & MLX5_IB_UPD_XLT_ZAP)
+ return 0;
+
+ for (i = 0; i < nentries; i++) {
+ unsigned long pfn = odp->map.pfn_list[start + i];
+ dma_addr_t dma_addr;
+
+ pfn = odp->map.pfn_list[start + i];
+ if (!(pfn & HMM_PFN_VALID))
+ /* ODP initialization */
+ continue;
+
+ dma_addr = hmm_dma_map_pfn(dev->dma_device, &odp->map,
+ start + i, &p2pdma_state);
+ if (ib_dma_mapping_error(dev, dma_addr))
+ return -EFAULT;
+
+ dma_addr |= MLX5_IB_MTT_READ;
+ if ((pfn & HMM_PFN_WRITE) && !downgrade)
+ dma_addr |= MLX5_IB_MTT_WRITE;
+
+ pas[i] = cpu_to_be64(dma_addr);
+ odp->npages++;
+ }
+ return 0;
+}
-void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
- unsigned long end)
+int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags)
{
+ if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
+ populate_ksm(xlt, idx, nentries, mr, flags);
+ return 0;
+ } else {
+ return populate_mtt(xlt, idx, nentries, mr, flags);
+ }
+}
+
+/*
+ * This must be called after the mr has been removed from implicit_children.
+ * NOTE: The MR does not necessarily have to be
+ * empty here, parallel page faults could have raced with the free process and
+ * added pages to it.
+ */
+static void free_implicit_child_mr_work(struct work_struct *work)
+{
+ struct mlx5_ib_mr *mr =
+ container_of(work, struct mlx5_ib_mr, odp_destroy.work);
+ struct mlx5_ib_mr *imr = mr->parent;
+ struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+
+ mlx5r_deref_wait_odp_mkey(&mr->mmkey);
+
+ mutex_lock(&odp_imr->umem_mutex);
+ mlx5r_umr_update_xlt(mr->parent,
+ ib_umem_start(odp) >> mlx5_imr_mtt_shift, 1, 0,
+ MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC);
+ mutex_unlock(&odp_imr->umem_mutex);
+ mlx5_ib_dereg_mr(&mr->ibmr, NULL);
+
+ mlx5r_deref_odp_mkey(&imr->mmkey);
+}
+
+static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
+{
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ unsigned long idx = ib_umem_start(odp) >> mlx5_imr_mtt_shift;
+ struct mlx5_ib_mr *imr = mr->parent;
+
+ /*
+ * If userspace is racing freeing the parent implicit ODP MR then we can
+ * loose the race with parent destruction. In this case
+ * mlx5_ib_free_odp_mr() will free everything in the implicit_children
+ * xarray so NOP is fine. This child MR cannot be destroyed here because
+ * we are under its umem_mutex.
+ */
+ if (!refcount_inc_not_zero(&imr->mmkey.usecount))
+ return;
+
+ xa_lock(&imr->implicit_children);
+ if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_KERNEL) !=
+ mr) {
+ xa_unlock(&imr->implicit_children);
+ mlx5r_deref_odp_mkey(&imr->mmkey);
+ return;
+ }
+
+ if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault))
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys,
+ mlx5_base_mkey(mr->mmkey.key));
+ xa_unlock(&imr->implicit_children);
+
+ /* Freeing a MR is a sleeping operation, so bounce to a work queue */
+ INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work);
+ queue_work(system_dfl_wq, &mr->odp_destroy.work);
+}
+
+static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct ib_umem_odp *umem_odp =
+ container_of(mni, struct ib_umem_odp, notifier);
struct mlx5_ib_mr *mr;
- const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(u64)) - 1;
+ const u64 umr_block_mask = MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT - 1;
u64 idx = 0, blk_start_idx = 0;
+ u64 invalidations = 0;
+ unsigned long start;
+ unsigned long end;
int in_block = 0;
u64 addr;
- if (!umem || !umem->odp_data) {
- pr_err("invalidation called on NULL umem or non-ODP umem\n");
- return;
- }
+ if (!mmu_notifier_range_blockable(range))
+ return false;
- mr = umem->odp_data->private;
-
- if (!mr || !mr->ibmr.pd)
- return;
+ mutex_lock(&umem_odp->umem_mutex);
+ mmu_interval_set_seq(mni, cur_seq);
+ /*
+ * If npages is zero then umem_odp->private may not be setup yet. This
+ * does not complete until after the first page is mapped for DMA.
+ */
+ if (!umem_odp->npages)
+ goto out;
+ mr = umem_odp->private;
+ if (!mr)
+ goto out;
- start = max_t(u64, ib_umem_start(umem), start);
- end = min_t(u64, ib_umem_end(umem), end);
+ start = max_t(u64, ib_umem_start(umem_odp), range->start);
+ end = min_t(u64, ib_umem_end(umem_odp), range->end);
/*
* Iteration one - zap the HW's MTTs. The notifiers_count ensures that
@@ -71,17 +301,15 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
* overwrite the same MTTs. Concurent invalidations might race us,
* but they will write 0s as well, so no difference in the end result.
*/
-
- for (addr = start; addr < end; addr += (u64)umem->page_size) {
- idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+ for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) {
+ idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
/*
* Strive to write the MTTs in chunks, but avoid overwriting
* non-existing MTTs. The huristic here can be improved to
* estimate the cost of another UMR vs. the cost of bigger
* UMR.
*/
- if (umem->odp_data->dma_list[idx] &
- (ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) {
+ if (umem_odp->map.pfn_list[idx] & HMM_PFN_VALID) {
if (!in_block) {
blk_start_idx = idx;
in_block = 1;
@@ -90,15 +318,26 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
u64 umr_offset = idx & umr_block_mask;
if (in_block && umr_offset == 0) {
- mlx5_ib_update_mtt(mr, blk_start_idx,
- idx - blk_start_idx, 1);
+ mlx5r_umr_update_xlt(mr, blk_start_idx,
+ idx - blk_start_idx, 0,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0;
+ /* Count page invalidations */
+ invalidations += idx - blk_start_idx + 1;
}
}
}
- if (in_block)
- mlx5_ib_update_mtt(mr, blk_start_idx, idx - blk_start_idx + 1,
- 1);
+ if (in_block) {
+ mlx5r_umr_update_xlt(mr, blk_start_idx,
+ idx - blk_start_idx + 1, 0,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ATOMIC);
+ /* Count page invalidations */
+ invalidations += idx - blk_start_idx + 1;
+ }
+
+ mlx5_update_odp_stats_with_handled(mr, invalidations, invalidations);
/*
* We are now sure that the device will not access the
@@ -106,225 +345,837 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
* needed.
*/
- ib_umem_odp_unmap_dma_pages(umem, start, end);
+ ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
+
+ if (unlikely(!umem_odp->npages && mr->parent))
+ destroy_unused_implicit_child_mr(mr);
+out:
+ mutex_unlock(&umem_odp->umem_mutex);
+ return true;
}
-void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
+const struct mmu_interval_notifier_ops mlx5_mn_ops = {
+ .invalidate = mlx5_ib_invalidate_range,
+};
+
+static void internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
struct ib_odp_caps *caps = &dev->odp_caps;
memset(caps, 0, sizeof(*caps));
- if (!MLX5_CAP_GEN(dev->mdev, pg))
+ if (!MLX5_CAP_GEN(dev->mdev, pg) || !mlx5r_umr_can_load_pas(dev, 0))
return;
caps->general_caps = IB_ODP_SUPPORT;
- if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.send))
+ if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
+ dev->odp_max_size = U64_MAX;
+ else
+ dev->odp_max_size = BIT_ULL(MLX5_MAX_UMR_SHIFT + PAGE_SHIFT);
+
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, ud_odp_caps.send))
caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.send))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, ud_odp_caps.srq_receive))
+ caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
+
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.send))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.receive))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.receive))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.write))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.write))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.read))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.read))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ;
- return;
-}
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.atomic))
+ caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
-static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
- u32 key)
-{
- u32 base_key = mlx5_base_mkey(key);
- struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key);
- struct mlx5_ib_mr *mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.srq_receive))
+ caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
+
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.send))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SEND;
+
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.receive))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_RECV;
+
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.write))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_WRITE;
+
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.read))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_READ;
+
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.atomic))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
- if (!mmkey || mmkey->key != key || !mr->live)
- return NULL;
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.srq_receive))
+ caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
- return container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
+ MLX5_CAP_GEN(dev->mdev, null_mkey) &&
+ MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
+ !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled) &&
+ mlx5_imr_ksm_entries != 0 &&
+ !(mlx5_imr_ksm_page_shift >
+ get_max_log_entity_size_cap(dev, MLX5_MKC_ACCESS_MODE_KSM)))
+ caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT;
}
-static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault,
+static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
+ struct mlx5_pagefault *pfault,
int error)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
- u32 qpn = qp->trans_qp.base.mqp.qpn;
- int ret = mlx5_core_page_fault_resume(dev->mdev,
- qpn,
- pfault->mpfault.flags,
- error);
- if (ret)
- pr_err("Failed to resolve the page fault on QP 0x%x\n", qpn);
+ int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ?
+ pfault->wqe.wq_num : pfault->token;
+ u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {};
+ void *info;
+ int err;
+
+ MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME);
+
+ if (pfault->event_subtype == MLX5_PFAULT_SUBTYPE_MEMORY) {
+ info = MLX5_ADDR_OF(page_fault_resume_in, in,
+ page_fault_info.mem_page_fault_info);
+ MLX5_SET(mem_page_fault_info, info, fault_token_31_0,
+ pfault->token & 0xffffffff);
+ MLX5_SET(mem_page_fault_info, info, fault_token_47_32,
+ (pfault->token >> 32) & 0xffff);
+ MLX5_SET(mem_page_fault_info, info, error, !!error);
+ } else {
+ info = MLX5_ADDR_OF(page_fault_resume_in, in,
+ page_fault_info.trans_page_fault_info);
+ MLX5_SET(trans_page_fault_info, info, page_fault_type,
+ pfault->type);
+ MLX5_SET(trans_page_fault_info, info, fault_token,
+ pfault->token);
+ MLX5_SET(trans_page_fault_info, info, wq_number, wq_num);
+ MLX5_SET(trans_page_fault_info, info, error, !!error);
+ }
+
+ err = mlx5_cmd_exec_in(dev->mdev, page_fault_resume, in);
+ if (err)
+ mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n",
+ wq_num, err);
+}
+
+static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
+ unsigned long idx)
+{
+ struct mlx5_ib_dev *dev = mr_to_mdev(imr);
+ struct ib_umem_odp *odp;
+ struct mlx5_ib_mr *mr;
+ struct mlx5_ib_mr *ret;
+ int err;
+
+ odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem),
+ idx * mlx5_imr_mtt_size,
+ mlx5_imr_mtt_size, &mlx5_mn_ops);
+ if (IS_ERR(odp))
+ return ERR_CAST(odp);
+
+ mr = mlx5_mr_cache_alloc(dev, imr->access_flags,
+ MLX5_MKC_ACCESS_MODE_MTT,
+ mlx5_imr_mtt_entries);
+ if (IS_ERR(mr)) {
+ ib_umem_odp_release(odp);
+ return mr;
+ }
+
+ mr->access_flags = imr->access_flags;
+ mr->ibmr.pd = imr->ibmr.pd;
+ mr->ibmr.device = &mr_to_mdev(imr)->ib_dev;
+ mr->umem = &odp->umem;
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+ mr->ibmr.iova = idx * mlx5_imr_mtt_size;
+ mr->parent = imr;
+ odp->private = mr;
+
+ /*
+ * First refcount is owned by the xarray and second refconut
+ * is returned to the caller.
+ */
+ refcount_set(&mr->mmkey.usecount, 2);
+
+ err = mlx5r_umr_update_xlt(mr, 0,
+ mlx5_imr_mtt_entries,
+ PAGE_SHIFT,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ENABLE);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto out_mr;
+ }
+
+ xa_lock(&imr->implicit_children);
+ ret = __xa_cmpxchg(&imr->implicit_children, idx, NULL, mr,
+ GFP_KERNEL);
+ if (unlikely(ret)) {
+ if (xa_is_err(ret)) {
+ ret = ERR_PTR(xa_err(ret));
+ goto out_lock;
+ }
+ /*
+ * Another thread beat us to creating the child mr, use
+ * theirs.
+ */
+ refcount_inc(&ret->mmkey.usecount);
+ goto out_lock;
+ }
+
+ if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
+ ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey, GFP_KERNEL);
+ if (xa_is_err(ret)) {
+ ret = ERR_PTR(xa_err(ret));
+ __xa_erase(&imr->implicit_children, idx);
+ goto out_lock;
+ }
+ mr->mmkey.type = MLX5_MKEY_IMPLICIT_CHILD;
+ }
+ xa_unlock(&imr->implicit_children);
+ mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr);
+ return mr;
+
+out_lock:
+ xa_unlock(&imr->implicit_children);
+out_mr:
+ mlx5_ib_dereg_mr(&mr->ibmr, NULL);
+ return ret;
}
/*
- * Handle a single data segment in a page-fault WQE.
- *
- * Returns number of pages retrieved on success. The caller will continue to
- * the next data segment.
- * Can return the following error codes:
- * -EAGAIN to designate a temporary error. The caller will abort handling the
- * page fault and resolve it.
- * -EFAULT when there's an error mapping the requested pages. The caller will
- * abort the page fault handling and possibly move the QP to an error state.
- * On other errors the QP should also be closed with an error.
+ * When using memory scheme ODP, implicit MRs can't use the reserved null mkey
+ * and each implicit MR needs to assign a private null mkey to get the page
+ * faults on.
+ * The null mkey is created with the properties to enable getting the page
+ * fault for every time it is accessed and having all relevant access flags.
*/
-static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault,
- u32 key, u64 io_virt, size_t bcnt,
- u32 *bytes_mapped)
+static int alloc_implicit_mr_null_mkey(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_mr *imr,
+ struct mlx5_ib_pd *pd)
{
- struct mlx5_ib_dev *mib_dev = to_mdev(qp->ibqp.pd->device);
- int srcu_key;
- unsigned int current_seq;
+ size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + 64;
+ void *mkc;
+ u32 *in;
+ int err;
+
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 4);
+ MLX5_SET(create_mkey_in, in, pg_access, 1);
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, a, 1);
+ MLX5_SET(mkc, mkc, rw, 1);
+ MLX5_SET(mkc, mkc, rr, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, free, 0);
+ MLX5_SET(mkc, mkc, umr_en, 0);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+
+ MLX5_SET(mkc, mkc, translations_octword_size, 4);
+ MLX5_SET(mkc, mkc, log_page_size, 61);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, pd, pd->pdn);
+ MLX5_SET64(mkc, mkc, start_addr, 0);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ err = mlx5_core_create_mkey(dev->mdev, &imr->null_mmkey.key, in, inlen);
+ if (err)
+ goto free_in;
+
+ imr->null_mmkey.type = MLX5_MKEY_NULL;
+
+free_in:
+ kfree(in);
+ return err;
+}
+
+struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+ int access_flags)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device);
+ struct ib_umem_odp *umem_odp;
+ struct mlx5_ib_mr *imr;
+ int err;
+
+ if (!mlx5r_umr_can_load_pas(dev, mlx5_imr_mtt_entries * PAGE_SIZE))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags);
+ if (IS_ERR(umem_odp))
+ return ERR_CAST(umem_odp);
+
+ imr = mlx5_mr_cache_alloc(dev, access_flags, MLX5_MKC_ACCESS_MODE_KSM,
+ mlx5_imr_ksm_entries);
+ if (IS_ERR(imr)) {
+ ib_umem_odp_release(umem_odp);
+ return imr;
+ }
+
+ imr->access_flags = access_flags;
+ imr->ibmr.pd = &pd->ibpd;
+ imr->ibmr.iova = 0;
+ imr->umem = &umem_odp->umem;
+ imr->ibmr.lkey = imr->mmkey.key;
+ imr->ibmr.rkey = imr->mmkey.key;
+ imr->ibmr.device = &dev->ib_dev;
+ imr->is_odp_implicit = true;
+ xa_init(&imr->implicit_children);
+
+ if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
+ err = alloc_implicit_mr_null_mkey(dev, imr, pd);
+ if (err)
+ goto out_mr;
+
+ err = mlx5r_store_odp_mkey(dev, &imr->null_mmkey);
+ if (err)
+ goto out_mr;
+ }
+
+ err = mlx5r_umr_update_xlt(imr, 0,
+ mlx5_imr_ksm_entries,
+ mlx5_imr_ksm_page_shift,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ENABLE);
+ if (err)
+ goto out_mr;
+
+ err = mlx5r_store_odp_mkey(dev, &imr->mmkey);
+ if (err)
+ goto out_mr;
+
+ mlx5_ib_dbg(dev, "key %x mr %p\n", imr->mmkey.key, imr);
+ return imr;
+out_mr:
+ mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
+ mlx5_ib_dereg_mr(&imr->ibmr, NULL);
+ return ERR_PTR(err);
+}
+
+void mlx5_ib_free_odp_mr(struct mlx5_ib_mr *mr)
+{
+ struct mlx5_ib_mr *mtt;
+ unsigned long idx;
+
+ /*
+ * If this is an implicit MR it is already invalidated so we can just
+ * delete the children mkeys.
+ */
+ xa_for_each(&mr->implicit_children, idx, mtt) {
+ xa_erase(&mr->implicit_children, idx);
+ mlx5_ib_dereg_mr(&mtt->ibmr, NULL);
+ }
+
+ if (mr->null_mmkey.key) {
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys,
+ mlx5_base_mkey(mr->null_mmkey.key));
+
+ mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev,
+ mr->null_mmkey.key);
+ }
+}
+
+#define MLX5_PF_FLAGS_DOWNGRADE BIT(1)
+#define MLX5_PF_FLAGS_SNAPSHOT BIT(2)
+#define MLX5_PF_FLAGS_ENABLE BIT(3)
+static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
+ u64 user_va, size_t bcnt, u32 *bytes_mapped,
+ u32 flags)
+{
+ int page_shift, ret, np;
+ bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
+ u64 access_mask = 0;
u64 start_idx;
- int npages = 0, ret = 0;
- struct mlx5_ib_mr *mr;
- u64 access_mask = ODP_READ_ALLOWED_BIT;
+ bool fault = !(flags & MLX5_PF_FLAGS_SNAPSHOT);
+ u32 xlt_flags = MLX5_IB_UPD_XLT_ATOMIC;
+
+ if (flags & MLX5_PF_FLAGS_ENABLE)
+ xlt_flags |= MLX5_IB_UPD_XLT_ENABLE;
+
+ if (flags & MLX5_PF_FLAGS_DOWNGRADE)
+ xlt_flags |= MLX5_IB_UPD_XLT_DOWNGRADE;
+
+ page_shift = odp->page_shift;
+ start_idx = (user_va - ib_umem_start(odp)) >> page_shift;
+
+ if (odp->umem.writable && !downgrade)
+ access_mask |= HMM_PFN_WRITE;
+
+ np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, fault);
+ if (np < 0)
+ return np;
- srcu_key = srcu_read_lock(&mib_dev->mr_srcu);
- mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key);
/*
- * If we didn't find the MR, it means the MR was closed while we were
- * handling the ODP event. In this case we return -EFAULT so that the
- * QP will be closed.
+ * No need to check whether the MTTs really belong to this MR, since
+ * ib_umem_odp_map_dma_and_lock already checks this.
*/
- if (!mr || !mr->ibmr.pd) {
- pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
- key);
- ret = -EFAULT;
- goto srcu_unlock;
- }
- if (!mr->umem->odp_data) {
- pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
- key);
- if (bytes_mapped)
- *bytes_mapped +=
- (bcnt - pfault->mpfault.bytes_committed);
- goto srcu_unlock;
- }
- if (mr->ibmr.pd != qp->ibqp.pd) {
- pr_err("Page-fault with different PDs for QP and MR.\n");
- ret = -EFAULT;
- goto srcu_unlock;
+ ret = mlx5r_umr_update_xlt(mr, start_idx, np, page_shift, xlt_flags);
+ mutex_unlock(&odp->umem_mutex);
+
+ if (ret < 0) {
+ if (ret != -EAGAIN)
+ mlx5_ib_err(mr_to_mdev(mr),
+ "Failed to update mkey page tables\n");
+ goto out;
+ }
+
+ if (bytes_mapped) {
+ u32 new_mappings = (np << page_shift) -
+ (user_va - round_down(user_va, 1 << page_shift));
+
+ *bytes_mapped += min_t(u32, new_mappings, bcnt);
}
- current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq);
+ return np << (page_shift - PAGE_SHIFT);
+
+out:
+ return ret;
+}
+
+static int pagefault_implicit_mr(struct mlx5_ib_mr *imr,
+ struct ib_umem_odp *odp_imr, u64 user_va,
+ size_t bcnt, u32 *bytes_mapped, u32 flags)
+{
+ unsigned long end_idx = (user_va + bcnt - 1) >> mlx5_imr_mtt_shift;
+ unsigned long upd_start_idx = end_idx + 1;
+ unsigned long upd_len = 0;
+ unsigned long npages = 0;
+ int err;
+ int ret;
+
+ if (unlikely(user_va >= mlx5_imr_ksm_entries * mlx5_imr_mtt_size ||
+ mlx5_imr_ksm_entries * mlx5_imr_mtt_size - user_va < bcnt))
+ return -EFAULT;
+
+ /* Fault each child mr that intersects with our interval. */
+ while (bcnt) {
+ unsigned long idx = user_va >> mlx5_imr_mtt_shift;
+ struct ib_umem_odp *umem_odp;
+ struct mlx5_ib_mr *mtt;
+ u64 len;
+
+ xa_lock(&imr->implicit_children);
+ mtt = xa_load(&imr->implicit_children, idx);
+ if (unlikely(!mtt)) {
+ xa_unlock(&imr->implicit_children);
+ mtt = implicit_get_child_mr(imr, idx);
+ if (IS_ERR(mtt)) {
+ ret = PTR_ERR(mtt);
+ goto out;
+ }
+ upd_start_idx = min(upd_start_idx, idx);
+ upd_len = idx - upd_start_idx + 1;
+ } else {
+ refcount_inc(&mtt->mmkey.usecount);
+ xa_unlock(&imr->implicit_children);
+ }
+
+ umem_odp = to_ib_umem_odp(mtt->umem);
+ len = min_t(u64, user_va + bcnt, ib_umem_end(umem_odp)) -
+ user_va;
+
+ ret = pagefault_real_mr(mtt, umem_odp, user_va, len,
+ bytes_mapped, flags);
+
+ mlx5r_deref_odp_mkey(&mtt->mmkey);
+
+ if (ret < 0)
+ goto out;
+ user_va += len;
+ bcnt -= len;
+ npages += ret;
+ }
+
+ ret = npages;
+
/*
- * Ensure the sequence number is valid for some time before we call
- * gup.
+ * Any time the implicit_children are changed we must perform an
+ * update of the xlt before exiting to ensure the HW and the
+ * implicit_children remains synchronized.
*/
- smp_rmb();
+out:
+ if (likely(!upd_len))
+ return ret;
/*
- * Avoid branches - this code will perform correctly
- * in all iterations (in iteration 2 and above,
- * bytes_committed == 0).
+ * Notice this is not strictly ordered right, the KSM is updated after
+ * the implicit_children is updated, so a parallel page fault could
+ * see a MR that is not yet visible in the KSM. This is similar to a
+ * parallel page fault seeing a MR that is being concurrently removed
+ * from the KSM. Both of these improbable situations are resolved
+ * safely by resuming the HW and then taking another page fault. The
+ * next pagefault handler will see the new information.
*/
- io_virt += pfault->mpfault.bytes_committed;
- bcnt -= pfault->mpfault.bytes_committed;
+ mutex_lock(&odp_imr->umem_mutex);
+ err = mlx5r_umr_update_xlt(imr, upd_start_idx, upd_len, 0,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ATOMIC);
+ mutex_unlock(&odp_imr->umem_mutex);
+ if (err) {
+ mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n");
+ return err;
+ }
+ return ret;
+}
- start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
+static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
+ u32 *bytes_mapped, u32 flags)
+{
+ struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
+ int access_mode = mr->data_direct ? MLX5_MKC_ACCESS_MODE_KSM :
+ MLX5_MKC_ACCESS_MODE_MTT;
+ unsigned int old_page_shift = mr->page_shift;
+ unsigned int page_shift;
+ unsigned long page_size;
+ u32 xlt_flags = 0;
+ int err;
+
+ if (flags & MLX5_PF_FLAGS_ENABLE)
+ xlt_flags |= MLX5_IB_UPD_XLT_ENABLE;
+
+ dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL);
+ err = ib_umem_dmabuf_map_pages(umem_dmabuf);
+ if (err) {
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
+ return err;
+ }
+
+ page_size = mlx5_umem_dmabuf_find_best_pgsz(umem_dmabuf, access_mode);
+ if (!page_size) {
+ ib_umem_dmabuf_unmap_pages(umem_dmabuf);
+ err = -EINVAL;
+ } else {
+ page_shift = order_base_2(page_size);
+ if (page_shift != mr->page_shift && mr->dmabuf_faulted) {
+ err = mlx5r_umr_dmabuf_update_pgsz(mr, xlt_flags,
+ page_shift);
+ } else {
+ mr->page_shift = page_shift;
+ if (mr->data_direct)
+ err = mlx5r_umr_update_data_direct_ksm_pas(
+ mr, xlt_flags);
+ else
+ err = mlx5r_umr_update_mr_pas(mr,
+ xlt_flags);
+ }
+ }
+ dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
- if (mr->umem->writable)
- access_mask |= ODP_WRITE_ALLOWED_BIT;
- npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt,
- access_mask, current_seq);
- if (npages < 0) {
- ret = npages;
- goto srcu_unlock;
+ if (err) {
+ mr->page_shift = old_page_shift;
+ return err;
}
- if (npages > 0) {
- mutex_lock(&mr->umem->odp_data->umem_mutex);
- if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+ mr->dmabuf_faulted = 1;
+
+ if (bytes_mapped)
+ *bytes_mapped += bcnt;
+
+ return ib_umem_num_pages(mr->umem);
+}
+
+/*
+ * Returns:
+ * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are
+ * not accessible, or the MR is no longer valid.
+ * -EAGAIN/-ENOMEM: The operation should be retried
+ *
+ * -EINVAL/others: General internal malfunction
+ * >0: Number of pages mapped
+ */
+static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
+ u32 *bytes_mapped, u32 flags, bool permissive_fault)
+{
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+
+ if (unlikely(io_virt < mr->ibmr.iova) && !permissive_fault)
+ return -EFAULT;
+
+ if (mr->umem->is_dmabuf)
+ return pagefault_dmabuf_mr(mr, bcnt, bytes_mapped, flags);
+
+ if (!odp->is_implicit_odp) {
+ u64 offset = io_virt < mr->ibmr.iova ? 0 : io_virt - mr->ibmr.iova;
+ u64 user_va;
+
+ if (check_add_overflow(offset, (u64)odp->umem.address,
+ &user_va))
+ return -EFAULT;
+
+ if (permissive_fault) {
+ if (user_va < ib_umem_start(odp))
+ user_va = ib_umem_start(odp);
+ if ((user_va + bcnt) > ib_umem_end(odp))
+ bcnt = ib_umem_end(odp) - user_va;
+ } else if (unlikely(user_va >= ib_umem_end(odp) ||
+ ib_umem_end(odp) - user_va < bcnt))
+ return -EFAULT;
+ return pagefault_real_mr(mr, odp, user_va, bcnt, bytes_mapped,
+ flags);
+ }
+ return pagefault_implicit_mr(mr, odp, io_virt, bcnt, bytes_mapped,
+ flags);
+}
+
+int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr)
+{
+ int ret;
+
+ ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), mr->umem->address,
+ mr->umem->length, NULL,
+ MLX5_PF_FLAGS_SNAPSHOT | MLX5_PF_FLAGS_ENABLE);
+ return ret >= 0 ? 0 : ret;
+}
+
+int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr)
+{
+ int ret;
+
+ ret = pagefault_dmabuf_mr(mr, mr->umem->length, NULL,
+ MLX5_PF_FLAGS_ENABLE);
+
+ return ret >= 0 ? 0 : ret;
+}
+
+struct pf_frame {
+ struct pf_frame *next;
+ u32 key;
+ u64 io_virt;
+ size_t bcnt;
+ int depth;
+};
+
+static bool mkey_is_eq(struct mlx5_ib_mkey *mmkey, u32 key)
+{
+ if (!mmkey)
+ return false;
+ if (mmkey->type == MLX5_MKEY_MW ||
+ mmkey->type == MLX5_MKEY_INDIRECT_DEVX)
+ return mlx5_base_mkey(mmkey->key) == mlx5_base_mkey(key);
+ return mmkey->key == key;
+}
+
+static struct mlx5_ib_mkey *find_odp_mkey(struct mlx5_ib_dev *dev, u32 key)
+{
+ struct mlx5_ib_mkey *mmkey;
+
+ xa_lock(&dev->odp_mkeys);
+ mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key));
+ if (!mmkey) {
+ mmkey = ERR_PTR(-ENOENT);
+ goto out;
+ }
+ if (!mkey_is_eq(mmkey, key)) {
+ mmkey = ERR_PTR(-EFAULT);
+ goto out;
+ }
+ refcount_inc(&mmkey->usecount);
+out:
+ xa_unlock(&dev->odp_mkeys);
+
+ return mmkey;
+}
+
+/*
+ * Handle a single data segment in a page-fault WQE or RDMA region.
+ *
+ * Returns zero on success. The caller may continue to the next data segment.
+ * Can return the following error codes:
+ * -EAGAIN to designate a temporary error. The caller will abort handling the
+ * page fault and resolve it.
+ * -EFAULT when there's an error mapping the requested pages. The caller will
+ * abort the page fault handling.
+ */
+static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
+ struct ib_pd *pd, u32 key,
+ u64 io_virt, size_t bcnt,
+ u32 *bytes_committed,
+ u32 *bytes_mapped)
+{
+ int ret, i, outlen, cur_outlen = 0, depth = 0, pages_in_range;
+ struct pf_frame *head = NULL, *frame;
+ struct mlx5_ib_mkey *mmkey;
+ struct mlx5_ib_mr *mr;
+ struct mlx5_klm *pklm;
+ u32 *out = NULL;
+ size_t offset;
+
+ io_virt += *bytes_committed;
+ bcnt -= *bytes_committed;
+next_mr:
+ mmkey = find_odp_mkey(dev, key);
+ if (IS_ERR(mmkey)) {
+ ret = PTR_ERR(mmkey);
+ if (ret == -ENOENT) {
+ mlx5_ib_dbg(
+ dev,
+ "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+ key);
+ if (bytes_mapped)
+ *bytes_mapped += bcnt;
/*
- * No need to check whether the MTTs really belong to
- * this MR, since ib_umem_odp_map_dma_pages already
- * checks this.
+ * The user could specify a SGL with multiple lkeys and
+ * only some of them are ODP. Treat the non-ODP ones as
+ * fully faulted.
*/
- ret = mlx5_ib_update_mtt(mr, start_idx, npages, 0);
- } else {
- ret = -EAGAIN;
+ ret = 0;
}
- mutex_unlock(&mr->umem->odp_data->umem_mutex);
- if (ret < 0) {
- if (ret != -EAGAIN)
- pr_err("Failed to update mkey page tables\n");
- goto srcu_unlock;
+ goto end;
+ }
+
+ switch (mmkey->type) {
+ case MLX5_MKEY_MR:
+ mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+
+ pages_in_range = (ALIGN(io_virt + bcnt, PAGE_SIZE) -
+ (io_virt & PAGE_MASK)) >>
+ PAGE_SHIFT;
+ ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0, false);
+ if (ret < 0)
+ goto end;
+
+ mlx5_update_odp_stats_with_handled(mr, faults, ret);
+
+ if (ret < pages_in_range) {
+ ret = -EFAULT;
+ goto end;
+ }
+
+ ret = 0;
+ break;
+
+ case MLX5_MKEY_MW:
+ case MLX5_MKEY_INDIRECT_DEVX:
+ if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) {
+ mlx5_ib_dbg(dev, "indirection level exceeded\n");
+ ret = -EFAULT;
+ goto end;
}
- if (bytes_mapped) {
- u32 new_mappings = npages * PAGE_SIZE -
- (io_virt - round_down(io_virt, PAGE_SIZE));
- *bytes_mapped += min_t(u32, new_mappings, bcnt);
+ outlen = MLX5_ST_SZ_BYTES(query_mkey_out) +
+ sizeof(*pklm) * (mmkey->ndescs - 2);
+
+ if (outlen > cur_outlen) {
+ kfree(out);
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out) {
+ ret = -ENOMEM;
+ goto end;
+ }
+ cur_outlen = outlen;
}
- }
-srcu_unlock:
- if (ret == -EAGAIN) {
- if (!mr->umem->odp_data->dying) {
- struct ib_umem_odp *odp_data = mr->umem->odp_data;
- unsigned long timeout =
- msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
-
- if (!wait_for_completion_timeout(
- &odp_data->notifier_completion,
- timeout)) {
- pr_warn("timeout waiting for mmu notifier completion\n");
+ pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out,
+ bsf0_klm0_pas_mtt0_1);
+
+ ret = mlx5_core_query_mkey(dev->mdev, mmkey->key, out, outlen);
+ if (ret)
+ goto end;
+
+ offset = io_virt - MLX5_GET64(query_mkey_out, out,
+ memory_key_mkey_entry.start_addr);
+
+ for (i = 0; bcnt && i < mmkey->ndescs; i++, pklm++) {
+ if (offset >= be32_to_cpu(pklm->bcount)) {
+ offset -= be32_to_cpu(pklm->bcount);
+ continue;
}
- } else {
- /* The MR is being killed, kill the QP as well. */
- ret = -EFAULT;
+
+ frame = kzalloc(sizeof(*frame), GFP_KERNEL);
+ if (!frame) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ frame->key = be32_to_cpu(pklm->key);
+ frame->io_virt = be64_to_cpu(pklm->va) + offset;
+ frame->bcnt = min_t(size_t, bcnt,
+ be32_to_cpu(pklm->bcount) - offset);
+ frame->depth = depth + 1;
+ frame->next = head;
+ head = frame;
+
+ bcnt -= frame->bcnt;
+ offset = 0;
}
+ break;
+
+ default:
+ mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type);
+ ret = -EFAULT;
+ goto end;
+ }
+
+ if (head) {
+ frame = head;
+ head = frame->next;
+
+ key = frame->key;
+ io_virt = frame->io_virt;
+ bcnt = frame->bcnt;
+ depth = frame->depth;
+ kfree(frame);
+
+ mlx5r_deref_odp_mkey(mmkey);
+ goto next_mr;
+ }
+
+end:
+ if (!IS_ERR(mmkey))
+ mlx5r_deref_odp_mkey(mmkey);
+ while (head) {
+ frame = head;
+ head = frame->next;
+ kfree(frame);
}
- srcu_read_unlock(&mib_dev->mr_srcu, srcu_key);
- pfault->mpfault.bytes_committed = 0;
- return ret ? ret : npages;
+ kfree(out);
+
+ *bytes_committed = 0;
+ return ret;
}
-/**
+/*
* Parse a series of data segments for page fault handling.
*
- * @qp the QP on which the fault occurred.
- * @pfault contains page fault information.
- * @wqe points at the first data segment in the WQE.
- * @wqe_end points after the end of the WQE.
- * @bytes_mapped receives the number of bytes that the function was able to
- * map. This allows the caller to decide intelligently whether
- * enough memory was mapped to resolve the page fault
- * successfully (e.g. enough for the next MTU, or the entire
- * WQE).
- * @total_wqe_bytes receives the total data size of this WQE in bytes (minus
- * the committed bytes).
+ * @dev: Pointer to mlx5 IB device
+ * @pfault: contains page fault information.
+ * @wqe: points at the first data segment in the WQE.
+ * @wqe_end: points after the end of the WQE.
+ * @bytes_mapped: receives the number of bytes that the function was able to
+ * map. This allows the caller to decide intelligently whether
+ * enough memory was mapped to resolve the page fault
+ * successfully (e.g. enough for the next MTU, or the entire
+ * WQE).
+ * @total_wqe_bytes: receives the total data size of this WQE in bytes (minus
+ * the committed bytes).
+ * @receive_queue: receive WQE end of sg list
*
- * Returns the number of pages loaded if positive, zero for an empty WQE, or a
- * negative error code.
+ * Returns zero for success or a negative error code.
*/
-static int pagefault_data_segments(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault, void *wqe,
+static int pagefault_data_segments(struct mlx5_ib_dev *dev,
+ struct mlx5_pagefault *pfault,
+ void *wqe,
void *wqe_end, u32 *bytes_mapped,
- u32 *total_wqe_bytes, int receive_queue)
+ u32 *total_wqe_bytes, bool receive_queue)
{
- int ret = 0, npages = 0;
+ int ret = 0;
u64 io_virt;
- u32 key;
+ __be32 key;
u32 byte_count;
size_t bcnt;
int inline_segment;
- /* Skip SRQ next-WQE segment. */
- if (receive_queue && qp->ibqp.srq)
- wqe += sizeof(struct mlx5_wqe_srq_next_seg);
-
if (bytes_mapped)
*bytes_mapped = 0;
if (total_wqe_bytes)
@@ -334,7 +1185,7 @@ static int pagefault_data_segments(struct mlx5_ib_qp *qp,
struct mlx5_wqe_data_seg *dseg = wqe;
io_virt = be64_to_cpu(dseg->addr);
- key = be32_to_cpu(dseg->lkey);
+ key = dseg->lkey;
byte_count = be32_to_cpu(dseg->byte_count);
inline_segment = !!(byte_count & MLX5_INLINE_SEG);
bcnt = byte_count & ~MLX5_INLINE_SEG;
@@ -348,34 +1199,36 @@ static int pagefault_data_segments(struct mlx5_ib_qp *qp,
}
/* receive WQE end of sg list. */
- if (receive_queue && bcnt == 0 && key == MLX5_INVALID_LKEY &&
+ if (receive_queue && bcnt == 0 &&
+ key == dev->mkeys.terminate_scatter_list_mkey &&
io_virt == 0)
break;
if (!inline_segment && total_wqe_bytes) {
*total_wqe_bytes += bcnt - min_t(size_t, bcnt,
- pfault->mpfault.bytes_committed);
+ pfault->bytes_committed);
}
/* A zero length data segment designates a length of 2GB. */
if (bcnt == 0)
bcnt = 1U << 31;
- if (inline_segment || bcnt <= pfault->mpfault.bytes_committed) {
- pfault->mpfault.bytes_committed -=
+ if (inline_segment || bcnt <= pfault->bytes_committed) {
+ pfault->bytes_committed -=
min_t(size_t, bcnt,
- pfault->mpfault.bytes_committed);
+ pfault->bytes_committed);
continue;
}
- ret = pagefault_single_data_segment(qp, pfault, key, io_virt,
- bcnt, bytes_mapped);
+ ret = pagefault_single_data_segment(dev, NULL, be32_to_cpu(key),
+ io_virt, bcnt,
+ &pfault->bytes_committed,
+ bytes_mapped);
if (ret < 0)
break;
- npages += ret;
}
- return ret < 0 ? ret : npages;
+ return ret;
}
/*
@@ -383,16 +1236,13 @@ static int pagefault_data_segments(struct mlx5_ib_qp *qp,
* scatter-gather list, and set wqe_end to the end of the WQE.
*/
static int mlx5_ib_mr_initiator_pfault_handler(
- struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
- void **wqe, void **wqe_end, int wqe_length)
+ struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault,
+ struct mlx5_ib_qp *qp, void **wqe, void **wqe_end, int wqe_length)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
struct mlx5_wqe_ctrl_seg *ctrl = *wqe;
- u16 wqe_index = pfault->mpfault.wqe.wqe_index;
+ u16 wqe_index = pfault->wqe.wqe_index;
+ struct mlx5_base_av *av;
unsigned ds, opcode;
-#if defined(DEBUG)
- u32 ctrl_wqe_index, ctrl_qpn;
-#endif
u32 qpn = qp->trans_qp.base.mqp.qpn;
ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
@@ -408,100 +1258,69 @@ static int mlx5_ib_mr_initiator_pfault_handler(
return -EFAULT;
}
-#if defined(DEBUG)
- ctrl_wqe_index = (be32_to_cpu(ctrl->opmod_idx_opcode) &
- MLX5_WQE_CTRL_WQE_INDEX_MASK) >>
- MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
- if (wqe_index != ctrl_wqe_index) {
- mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
- wqe_index, qpn,
- ctrl_wqe_index);
- return -EFAULT;
- }
-
- ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
- MLX5_WQE_CTRL_QPN_SHIFT;
- if (qpn != ctrl_qpn) {
- mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
- wqe_index, qpn,
- ctrl_qpn);
- return -EFAULT;
- }
-#endif /* DEBUG */
-
*wqe_end = *wqe + ds * MLX5_WQE_DS_UNITS;
*wqe += sizeof(*ctrl);
opcode = be32_to_cpu(ctrl->opmod_idx_opcode) &
MLX5_WQE_CTRL_OPCODE_MASK;
- switch (qp->ibqp.qp_type) {
- case IB_QPT_RC:
- switch (opcode) {
- case MLX5_OPCODE_SEND:
- case MLX5_OPCODE_SEND_IMM:
- case MLX5_OPCODE_SEND_INVAL:
- if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
- IB_ODP_SUPPORT_SEND))
- goto invalid_transport_or_opcode;
- break;
- case MLX5_OPCODE_RDMA_WRITE:
- case MLX5_OPCODE_RDMA_WRITE_IMM:
- if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
- IB_ODP_SUPPORT_WRITE))
- goto invalid_transport_or_opcode;
- *wqe += sizeof(struct mlx5_wqe_raddr_seg);
- break;
- case MLX5_OPCODE_RDMA_READ:
- if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
- IB_ODP_SUPPORT_READ))
- goto invalid_transport_or_opcode;
- *wqe += sizeof(struct mlx5_wqe_raddr_seg);
- break;
- default:
- goto invalid_transport_or_opcode;
- }
+
+ if (qp->type == IB_QPT_XRC_INI)
+ *wqe += sizeof(struct mlx5_wqe_xrc_seg);
+
+ if (qp->type == IB_QPT_UD || qp->type == MLX5_IB_QPT_DCI) {
+ av = *wqe;
+ if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
+ *wqe += sizeof(struct mlx5_av);
+ else
+ *wqe += sizeof(struct mlx5_base_av);
+ }
+
+ switch (opcode) {
+ case MLX5_OPCODE_RDMA_WRITE:
+ case MLX5_OPCODE_RDMA_WRITE_IMM:
+ case MLX5_OPCODE_RDMA_READ:
+ *wqe += sizeof(struct mlx5_wqe_raddr_seg);
break;
- case IB_QPT_UD:
- switch (opcode) {
- case MLX5_OPCODE_SEND:
- case MLX5_OPCODE_SEND_IMM:
- if (!(dev->odp_caps.per_transport_caps.ud_odp_caps &
- IB_ODP_SUPPORT_SEND))
- goto invalid_transport_or_opcode;
- *wqe += sizeof(struct mlx5_wqe_datagram_seg);
- break;
- default:
- goto invalid_transport_or_opcode;
- }
+ case MLX5_OPCODE_ATOMIC_CS:
+ case MLX5_OPCODE_ATOMIC_FA:
+ *wqe += sizeof(struct mlx5_wqe_raddr_seg);
+ *wqe += sizeof(struct mlx5_wqe_atomic_seg);
break;
- default:
-invalid_transport_or_opcode:
- mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode or transport. transport: 0x%x opcode: 0x%x.\n",
- qp->ibqp.qp_type, opcode);
- return -EFAULT;
}
return 0;
}
/*
- * Parse responder WQE. Advances the wqe pointer to point at the
- * scatter-gather list, and set wqe_end to the end of the WQE.
+ * Parse responder WQE and set wqe_end to the end of the WQE.
*/
-static int mlx5_ib_mr_responder_pfault_handler(
- struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
- void **wqe, void **wqe_end, int wqe_length)
+static int mlx5_ib_mr_responder_pfault_handler_srq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_srq *srq,
+ void **wqe, void **wqe_end,
+ int wqe_length)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
- struct mlx5_ib_wq *wq = &qp->rq;
- int wqe_size = 1 << wq->wqe_shift;
+ int wqe_size = 1 << srq->msrq.wqe_shift;
- if (qp->ibqp.srq) {
- mlx5_ib_err(dev, "ODP fault on SRQ is not supported\n");
+ if (wqe_size > wqe_length) {
+ mlx5_ib_err(dev, "Couldn't read all of the receive WQE's content\n");
return -EFAULT;
}
- if (qp->wq_sig) {
+ *wqe_end = *wqe + wqe_size;
+ *wqe += sizeof(struct mlx5_wqe_srq_next_seg);
+
+ return 0;
+}
+
+static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
+ void *wqe, void **wqe_end,
+ int wqe_length)
+{
+ struct mlx5_ib_wq *wq = &qp->rq;
+ int wqe_size = 1 << wq->wqe_shift;
+
+ if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) {
mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n");
return -EFAULT;
}
@@ -511,106 +1330,148 @@ static int mlx5_ib_mr_responder_pfault_handler(
return -EFAULT;
}
- switch (qp->ibqp.qp_type) {
- case IB_QPT_RC:
- if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
- IB_ODP_SUPPORT_RECV))
- goto invalid_transport_or_opcode;
+ *wqe_end = wqe + wqe_size;
+
+ return 0;
+}
+
+static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev,
+ u32 wq_num, int pf_type)
+{
+ struct mlx5_core_rsc_common *common = NULL;
+ struct mlx5_core_srq *srq;
+
+ switch (pf_type) {
+ case MLX5_WQE_PF_TYPE_RMP:
+ srq = mlx5_cmd_get_srq(dev, wq_num);
+ if (srq)
+ common = &srq->common;
+ break;
+ case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE:
+ case MLX5_WQE_PF_TYPE_RESP:
+ case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC:
+ common = mlx5_core_res_hold(dev, wq_num, MLX5_RES_QP);
break;
default:
-invalid_transport_or_opcode:
- mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport. transport: 0x%x\n",
- qp->ibqp.qp_type);
- return -EFAULT;
+ break;
}
- *wqe_end = *wqe + wqe_size;
+ return common;
+}
- return 0;
+static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res)
+{
+ struct mlx5_core_qp *mqp = (struct mlx5_core_qp *)res;
+
+ return to_mibqp(mqp);
}
-static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault)
+static inline struct mlx5_ib_srq *res_to_srq(struct mlx5_core_rsc_common *res)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
- int ret;
- void *wqe, *wqe_end;
- u32 bytes_mapped, total_wqe_bytes;
- char *buffer = NULL;
- int resume_with_error = 0;
- u16 wqe_index = pfault->mpfault.wqe.wqe_index;
- int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
- u32 qpn = qp->trans_qp.base.mqp.qpn;
+ struct mlx5_core_srq *msrq =
+ container_of(res, struct mlx5_core_srq, common);
- buffer = (char *)__get_free_page(GFP_KERNEL);
- if (!buffer) {
- mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
- resume_with_error = 1;
- goto resolve_page_fault;
+ return to_mibsrq(msrq);
+}
+
+static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
+ struct mlx5_pagefault *pfault)
+{
+ bool sq = pfault->type & MLX5_PFAULT_REQUESTOR;
+ u16 wqe_index = pfault->wqe.wqe_index;
+ void *wqe, *wqe_start = NULL, *wqe_end = NULL;
+ u32 bytes_mapped, total_wqe_bytes;
+ struct mlx5_core_rsc_common *res;
+ int resume_with_error = 1;
+ struct mlx5_ib_qp *qp;
+ size_t bytes_copied;
+ int ret = 0;
+
+ res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type);
+ if (!res) {
+ mlx5_ib_dbg(dev, "wqe page fault for missing resource %d\n", pfault->wqe.wq_num);
+ return;
}
- ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
- PAGE_SIZE, &qp->trans_qp.base);
- if (ret < 0) {
- mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
- -ret, wqe_index, qpn);
- resume_with_error = 1;
+ if (res->res != MLX5_RES_QP && res->res != MLX5_RES_SRQ &&
+ res->res != MLX5_RES_XSRQ) {
+ mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n",
+ pfault->type);
goto resolve_page_fault;
}
- wqe = buffer;
- if (requestor)
- ret = mlx5_ib_mr_initiator_pfault_handler(qp, pfault, &wqe,
- &wqe_end, ret);
- else
- ret = mlx5_ib_mr_responder_pfault_handler(qp, pfault, &wqe,
- &wqe_end, ret);
- if (ret < 0) {
- resume_with_error = 1;
+ wqe_start = (void *)__get_free_page(GFP_KERNEL);
+ if (!wqe_start) {
+ mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
goto resolve_page_fault;
}
- if (wqe >= wqe_end) {
- mlx5_ib_err(dev, "ODP fault on invalid WQE.\n");
- resume_with_error = 1;
- goto resolve_page_fault;
+ wqe = wqe_start;
+ qp = (res->res == MLX5_RES_QP) ? res_to_qp(res) : NULL;
+ if (qp && sq) {
+ ret = mlx5_ib_read_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE,
+ &bytes_copied);
+ if (ret)
+ goto read_user;
+ ret = mlx5_ib_mr_initiator_pfault_handler(
+ dev, pfault, qp, &wqe, &wqe_end, bytes_copied);
+ } else if (qp && !sq) {
+ ret = mlx5_ib_read_wqe_rq(qp, wqe_index, wqe, PAGE_SIZE,
+ &bytes_copied);
+ if (ret)
+ goto read_user;
+ ret = mlx5_ib_mr_responder_pfault_handler_rq(
+ dev, qp, wqe, &wqe_end, bytes_copied);
+ } else if (!qp) {
+ struct mlx5_ib_srq *srq = res_to_srq(res);
+
+ ret = mlx5_ib_read_wqe_srq(srq, wqe_index, wqe, PAGE_SIZE,
+ &bytes_copied);
+ if (ret)
+ goto read_user;
+ ret = mlx5_ib_mr_responder_pfault_handler_srq(
+ dev, srq, &wqe, &wqe_end, bytes_copied);
}
- ret = pagefault_data_segments(qp, pfault, wqe, wqe_end, &bytes_mapped,
- &total_wqe_bytes, !requestor);
- if (ret == -EAGAIN) {
+ if (ret < 0 || wqe >= wqe_end)
goto resolve_page_fault;
- } else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
- mlx5_ib_err(dev, "Error getting user pages for page fault. Error: 0x%x\n",
- -ret);
- resume_with_error = 1;
+
+ ret = pagefault_data_segments(dev, pfault, wqe, wqe_end, &bytes_mapped,
+ &total_wqe_bytes, !sq);
+ if (ret == -EAGAIN)
+ goto out;
+
+ if (ret < 0 || total_wqe_bytes > bytes_mapped)
goto resolve_page_fault;
- }
-resolve_page_fault:
- mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
- mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
- qpn, resume_with_error,
- pfault->mpfault.flags);
+out:
+ ret = 0;
+ resume_with_error = 0;
- free_page((unsigned long)buffer);
-}
+read_user:
+ if (ret)
+ mlx5_ib_err(
+ dev,
+ "Failed reading a WQE following page fault, error %d, wqe_index %x, qpn %llx\n",
+ ret, wqe_index, pfault->token);
-static int pages_in_range(u64 address, u32 length)
-{
- return (ALIGN(address + length, PAGE_SIZE) -
- (address & PAGE_MASK)) >> PAGE_SHIFT;
+resolve_page_fault:
+ mlx5_ib_page_fault_resume(dev, pfault, resume_with_error);
+ mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
+ pfault->wqe.wq_num, resume_with_error,
+ pfault->type);
+ mlx5_core_res_put(res);
+ free_page((unsigned long)wqe_start);
}
-static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault)
+static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
+ struct mlx5_pagefault *pfault)
{
- struct mlx5_pagefault *mpfault = &pfault->mpfault;
u64 address;
u32 length;
- u32 prefetch_len = mpfault->bytes_committed;
+ u32 prefetch_len = pfault->bytes_committed;
int prefetch_activated = 0;
- u32 rkey = mpfault->rdma.r_key;
+ u32 rkey = pfault->rdma.r_key;
int ret;
/* The RDMA responder handler handles the page fault in two parts.
@@ -619,38 +1480,40 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
* prefetches more pages. The second operation cannot use the pfault
* context and therefore uses the dummy_pfault context allocated on
* the stack */
- struct mlx5_ib_pfault dummy_pfault = {};
-
- dummy_pfault.mpfault.bytes_committed = 0;
+ pfault->rdma.rdma_va += pfault->bytes_committed;
+ pfault->rdma.rdma_op_len -= min(pfault->bytes_committed,
+ pfault->rdma.rdma_op_len);
+ pfault->bytes_committed = 0;
- mpfault->rdma.rdma_va += mpfault->bytes_committed;
- mpfault->rdma.rdma_op_len -= min(mpfault->bytes_committed,
- mpfault->rdma.rdma_op_len);
- mpfault->bytes_committed = 0;
-
- address = mpfault->rdma.rdma_va;
- length = mpfault->rdma.rdma_op_len;
+ address = pfault->rdma.rdma_va;
+ length = pfault->rdma.rdma_op_len;
/* For some operations, the hardware cannot tell the exact message
* length, and in those cases it reports zero. Use prefetch
* logic. */
if (length == 0) {
prefetch_activated = 1;
- length = mpfault->rdma.packet_size;
+ length = pfault->rdma.packet_size;
prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len);
}
- ret = pagefault_single_data_segment(qp, pfault, rkey, address, length,
- NULL);
+ ret = pagefault_single_data_segment(dev, NULL, rkey, address, length,
+ &pfault->bytes_committed, NULL);
if (ret == -EAGAIN) {
/* We're racing with an invalidation, don't prefetch */
prefetch_activated = 0;
- } else if (ret < 0 || pages_in_range(address, length) > ret) {
- mlx5_ib_page_fault_resume(qp, pfault, 1);
+ } else if (ret < 0) {
+ mlx5_ib_page_fault_resume(dev, pfault, 1);
+ if (ret != -ENOENT)
+ mlx5_ib_dbg(dev, "PAGE FAULT error %d. QP 0x%llx, type: 0x%x\n",
+ ret, pfault->token, pfault->type);
return;
}
- mlx5_ib_page_fault_resume(qp, pfault, 0);
+ mlx5_ib_page_fault_resume(dev, pfault, 0);
+ mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%llx, type: 0x%x, prefetch_activated: %d\n",
+ pfault->token, pfault->type,
+ prefetch_activated);
/* At this point, there might be a new pagefault already arriving in
* the eq, switch to the dummy pagefault for the rest of the
@@ -658,139 +1521,591 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
* work-queue is being fenced. */
if (prefetch_activated) {
- ret = pagefault_single_data_segment(qp, &dummy_pfault, rkey,
- address,
+ u32 bytes_committed = 0;
+
+ ret = pagefault_single_data_segment(dev, NULL, rkey, address,
prefetch_len,
- NULL);
- if (ret < 0) {
- pr_warn("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n",
- ret, prefetch_activated,
- qp->ibqp.qp_num, address, prefetch_len);
+ &bytes_committed, NULL);
+ if (ret < 0 && ret != -EAGAIN) {
+ mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%llx, address: 0x%.16llx, length = 0x%.16x\n",
+ ret, pfault->token, address, prefetch_len);
}
}
}
-void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault)
+#define MLX5_MEMORY_PAGE_FAULT_FLAGS_LAST BIT(7)
+static void mlx5_ib_mr_memory_pfault_handler(struct mlx5_ib_dev *dev,
+ struct mlx5_pagefault *pfault)
{
- u8 event_subtype = pfault->mpfault.event_subtype;
+ u64 prefetch_va =
+ pfault->memory.va - pfault->memory.prefetch_before_byte_count;
+ size_t prefetch_size = pfault->memory.prefetch_before_byte_count +
+ pfault->memory.fault_byte_count +
+ pfault->memory.prefetch_after_byte_count;
+ struct mlx5_ib_mkey *mmkey;
+ struct mlx5_ib_mr *mr, *child_mr;
+ int ret = 0;
+
+ mmkey = find_odp_mkey(dev, pfault->memory.mkey);
+ if (IS_ERR(mmkey))
+ goto err;
+
+ switch (mmkey->type) {
+ case MLX5_MKEY_IMPLICIT_CHILD:
+ child_mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ mr = child_mr->parent;
+ break;
+ case MLX5_MKEY_NULL:
+ mr = container_of(mmkey, struct mlx5_ib_mr, null_mmkey);
+ break;
+ default:
+ mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ break;
+ }
+
+ /* If prefetch fails, handle only demanded page fault */
+ ret = pagefault_mr(mr, prefetch_va, prefetch_size, NULL, 0, true);
+ if (ret < 0) {
+ ret = pagefault_mr(mr, pfault->memory.va,
+ pfault->memory.fault_byte_count, NULL, 0,
+ true);
+ if (ret < 0)
+ goto err;
+ }
+
+ mlx5_update_odp_stats_with_handled(mr, faults, ret);
+ mlx5r_deref_odp_mkey(mmkey);
+
+ if (pfault->memory.flags & MLX5_MEMORY_PAGE_FAULT_FLAGS_LAST)
+ mlx5_ib_page_fault_resume(dev, pfault, 0);
+
+ mlx5_ib_dbg(
+ dev,
+ "PAGE FAULT completed %s. token 0x%llx, mkey: 0x%x, va: 0x%llx, byte_count: 0x%x\n",
+ pfault->memory.flags & MLX5_MEMORY_PAGE_FAULT_FLAGS_LAST ?
+ "" :
+ "without resume cmd",
+ pfault->token, pfault->memory.mkey, pfault->memory.va,
+ pfault->memory.fault_byte_count);
+
+ return;
+
+err:
+ if (!IS_ERR(mmkey))
+ mlx5r_deref_odp_mkey(mmkey);
+ mlx5_ib_page_fault_resume(dev, pfault, 1);
+ mlx5_ib_dbg(
+ dev,
+ "PAGE FAULT error. token 0x%llx, mkey: 0x%x, va: 0x%llx, byte_count: 0x%x, err: %d\n",
+ pfault->token, pfault->memory.mkey, pfault->memory.va,
+ pfault->memory.fault_byte_count, ret);
+}
+
+static void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault)
+{
+ u8 event_subtype = pfault->event_subtype;
switch (event_subtype) {
case MLX5_PFAULT_SUBTYPE_WQE:
- mlx5_ib_mr_wqe_pfault_handler(qp, pfault);
+ mlx5_ib_mr_wqe_pfault_handler(dev, pfault);
break;
case MLX5_PFAULT_SUBTYPE_RDMA:
- mlx5_ib_mr_rdma_pfault_handler(qp, pfault);
+ mlx5_ib_mr_rdma_pfault_handler(dev, pfault);
break;
- default:
- pr_warn("Invalid page fault event subtype: 0x%x\n",
- event_subtype);
- mlx5_ib_page_fault_resume(qp, pfault, 1);
+ case MLX5_PFAULT_SUBTYPE_MEMORY:
+ mlx5_ib_mr_memory_pfault_handler(dev, pfault);
break;
+ default:
+ mlx5_ib_err(dev, "Invalid page fault event subtype: 0x%x\n",
+ event_subtype);
+ mlx5_ib_page_fault_resume(dev, pfault, 1);
}
}
-static void mlx5_ib_qp_pfault_action(struct work_struct *work)
+static void mlx5_ib_eqe_pf_action(struct work_struct *work)
{
- struct mlx5_ib_pfault *pfault = container_of(work,
- struct mlx5_ib_pfault,
+ struct mlx5_pagefault *pfault = container_of(work,
+ struct mlx5_pagefault,
work);
- enum mlx5_ib_pagefault_context context =
- mlx5_ib_get_pagefault_context(&pfault->mpfault);
- struct mlx5_ib_qp *qp = container_of(pfault, struct mlx5_ib_qp,
- pagefaults[context]);
- mlx5_ib_mr_pfault_handler(qp, pfault);
+ struct mlx5_ib_pf_eq *eq = pfault->eq;
+
+ mlx5_ib_pfault(eq->dev, pfault);
+ mempool_free(pfault, eq->pool);
}
-void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp)
+#define MEMORY_SCHEME_PAGE_FAULT_GRANULARITY 4096
+static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
{
- unsigned long flags;
+ struct mlx5_eqe_page_fault *pf_eqe;
+ struct mlx5_pagefault *pfault;
+ struct mlx5_eqe *eqe;
+ int cc = 0;
+
+ while ((eqe = mlx5_eq_get_eqe(eq->core, cc))) {
+ pfault = mempool_alloc(eq->pool, GFP_ATOMIC);
+ if (!pfault) {
+ schedule_work(&eq->work);
+ break;
+ }
- spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
- qp->disable_page_faults = 1;
- spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
+ pf_eqe = &eqe->data.page_fault;
+ pfault->event_subtype = eqe->sub_type;
+
+ switch (eqe->sub_type) {
+ case MLX5_PFAULT_SUBTYPE_RDMA:
+ /* RDMA based event */
+ pfault->bytes_committed =
+ be32_to_cpu(pf_eqe->rdma.bytes_committed);
+ pfault->type =
+ be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
+ pfault->token =
+ be32_to_cpu(pf_eqe->rdma.pftype_token) &
+ MLX5_24BIT_MASK;
+ pfault->rdma.r_key =
+ be32_to_cpu(pf_eqe->rdma.r_key);
+ pfault->rdma.packet_size =
+ be16_to_cpu(pf_eqe->rdma.packet_length);
+ pfault->rdma.rdma_op_len =
+ be32_to_cpu(pf_eqe->rdma.rdma_op_len);
+ pfault->rdma.rdma_va =
+ be64_to_cpu(pf_eqe->rdma.rdma_va);
+ mlx5_ib_dbg(
+ eq->dev,
+ "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x, type:0x%x, token: 0x%06llx, r_key: 0x%08x\n",
+ eqe->sub_type, pfault->bytes_committed,
+ pfault->type, pfault->token,
+ pfault->rdma.r_key);
+ mlx5_ib_dbg(eq->dev,
+ "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
+ pfault->rdma.rdma_op_len,
+ pfault->rdma.rdma_va);
+ break;
- /*
- * Note that at this point, we are guarenteed that no more
- * work queue elements will be posted to the work queue with
- * the QP we are closing.
- */
- flush_workqueue(mlx5_ib_page_fault_wq);
+ case MLX5_PFAULT_SUBTYPE_WQE:
+ /* WQE based event */
+ pfault->bytes_committed =
+ be32_to_cpu(pf_eqe->wqe.bytes_committed);
+ pfault->type =
+ (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
+ pfault->token =
+ be32_to_cpu(pf_eqe->wqe.token);
+ pfault->wqe.wq_num =
+ be32_to_cpu(pf_eqe->wqe.pftype_wq) &
+ MLX5_24BIT_MASK;
+ pfault->wqe.wqe_index =
+ be16_to_cpu(pf_eqe->wqe.wqe_index);
+ pfault->wqe.packet_size =
+ be16_to_cpu(pf_eqe->wqe.packet_length);
+ mlx5_ib_dbg(
+ eq->dev,
+ "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x, type:0x%x, token: 0x%06llx, wq_num: 0x%06x, wqe_index: 0x%04x\n",
+ eqe->sub_type, pfault->bytes_committed,
+ pfault->type, pfault->token, pfault->wqe.wq_num,
+ pfault->wqe.wqe_index);
+ break;
+
+ case MLX5_PFAULT_SUBTYPE_MEMORY:
+ /* Memory based event */
+ pfault->bytes_committed = 0;
+ pfault->token =
+ be32_to_cpu(pf_eqe->memory.token31_0) |
+ ((u64)be16_to_cpu(pf_eqe->memory.token47_32)
+ << 32);
+ pfault->memory.va = be64_to_cpu(pf_eqe->memory.va);
+ pfault->memory.mkey = be32_to_cpu(pf_eqe->memory.mkey);
+ pfault->memory.fault_byte_count = (be32_to_cpu(
+ pf_eqe->memory.demand_fault_pages) >> 12) *
+ MEMORY_SCHEME_PAGE_FAULT_GRANULARITY;
+ pfault->memory.prefetch_before_byte_count =
+ be16_to_cpu(
+ pf_eqe->memory.pre_demand_fault_pages) *
+ MEMORY_SCHEME_PAGE_FAULT_GRANULARITY;
+ pfault->memory.prefetch_after_byte_count =
+ be16_to_cpu(
+ pf_eqe->memory.post_demand_fault_pages) *
+ MEMORY_SCHEME_PAGE_FAULT_GRANULARITY;
+ pfault->memory.flags = pf_eqe->memory.flags;
+ mlx5_ib_dbg(
+ eq->dev,
+ "PAGE_FAULT: subtype: 0x%02x, token: 0x%06llx, mkey: 0x%06x, fault_byte_count: 0x%06x, va: 0x%016llx, flags: 0x%02x\n",
+ eqe->sub_type, pfault->token,
+ pfault->memory.mkey,
+ pfault->memory.fault_byte_count,
+ pfault->memory.va, pfault->memory.flags);
+ mlx5_ib_dbg(
+ eq->dev,
+ "PAGE_FAULT: prefetch size: before: 0x%06x, after 0x%06x\n",
+ pfault->memory.prefetch_before_byte_count,
+ pfault->memory.prefetch_after_byte_count);
+ break;
+
+ default:
+ mlx5_ib_warn(eq->dev,
+ "Unsupported page fault event sub-type: 0x%02hhx\n",
+ eqe->sub_type);
+ /* Unsupported page faults should still be
+ * resolved by the page fault handler
+ */
+ }
+
+ pfault->eq = eq;
+ INIT_WORK(&pfault->work, mlx5_ib_eqe_pf_action);
+ queue_work(eq->wq, &pfault->work);
+
+ cc = mlx5_eq_update_cc(eq->core, ++cc);
+ }
+
+ mlx5_eq_update_ci(eq->core, cc, 1);
}
-void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)
+static int mlx5_ib_eq_pf_int(struct notifier_block *nb, unsigned long type,
+ void *data)
{
+ struct mlx5_ib_pf_eq *eq =
+ container_of(nb, struct mlx5_ib_pf_eq, irq_nb);
unsigned long flags;
- spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
- qp->disable_page_faults = 0;
- spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
+ if (spin_trylock_irqsave(&eq->lock, flags)) {
+ mlx5_ib_eq_pf_process(eq);
+ spin_unlock_irqrestore(&eq->lock, flags);
+ } else {
+ schedule_work(&eq->work);
+ }
+
+ return IRQ_HANDLED;
}
-static void mlx5_ib_pfault_handler(struct mlx5_core_qp *qp,
- struct mlx5_pagefault *pfault)
+/* mempool_refill() was proposed but unfortunately wasn't accepted
+ * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
+ * Cheap workaround.
+ */
+static void mempool_refill(mempool_t *pool)
{
- /*
- * Note that we will only get one fault event per QP per context
- * (responder/initiator, read/write), until we resolve the page fault
- * with the mlx5_ib_page_fault_resume command. Since this function is
- * called from within the work element, there is no risk of missing
- * events.
- */
- struct mlx5_ib_qp *mibqp = to_mibqp(qp);
- enum mlx5_ib_pagefault_context context =
- mlx5_ib_get_pagefault_context(pfault);
- struct mlx5_ib_pfault *qp_pfault = &mibqp->pagefaults[context];
+ while (pool->curr_nr < pool->min_nr)
+ mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
+}
+
+static void mlx5_ib_eq_pf_action(struct work_struct *work)
+{
+ struct mlx5_ib_pf_eq *eq =
+ container_of(work, struct mlx5_ib_pf_eq, work);
- qp_pfault->mpfault = *pfault;
+ mempool_refill(eq->pool);
- /* No need to stop interrupts here since we are in an interrupt */
- spin_lock(&mibqp->disable_page_faults_lock);
- if (!mibqp->disable_page_faults)
- queue_work(mlx5_ib_page_fault_wq, &qp_pfault->work);
- spin_unlock(&mibqp->disable_page_faults_lock);
+ spin_lock_irq(&eq->lock);
+ mlx5_ib_eq_pf_process(eq);
+ spin_unlock_irq(&eq->lock);
}
-void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
+enum {
+ MLX5_IB_NUM_PF_EQE = 0x1000,
+ MLX5_IB_NUM_PF_DRAIN = 64,
+};
+
+int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
{
- int i;
+ struct mlx5_eq_param param = {};
+ int err = 0;
+
+ mutex_lock(&dev->odp_eq_mutex);
+ if (eq->core)
+ goto unlock;
+ INIT_WORK(&eq->work, mlx5_ib_eq_pf_action);
+ spin_lock_init(&eq->lock);
+ eq->dev = dev;
+
+ eq->pool = mempool_create_kmalloc_pool(MLX5_IB_NUM_PF_DRAIN,
+ sizeof(struct mlx5_pagefault));
+ if (!eq->pool) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+
+ eq->wq = alloc_workqueue("mlx5_ib_page_fault",
+ WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM,
+ MLX5_NUM_CMD_EQE);
+ if (!eq->wq) {
+ err = -ENOMEM;
+ goto err_mempool;
+ }
+
+ eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
+ param = (struct mlx5_eq_param) {
+ .nent = MLX5_IB_NUM_PF_EQE,
+ };
+ param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT;
+ eq->core = mlx5_eq_create_generic(dev->mdev, &param);
+ if (IS_ERR(eq->core)) {
+ err = PTR_ERR(eq->core);
+ goto err_wq;
+ }
+ err = mlx5_eq_enable(dev->mdev, eq->core, &eq->irq_nb);
+ if (err) {
+ mlx5_ib_err(dev, "failed to enable odp EQ %d\n", err);
+ goto err_eq;
+ }
- qp->disable_page_faults = 1;
- spin_lock_init(&qp->disable_page_faults_lock);
+ mutex_unlock(&dev->odp_eq_mutex);
+ return 0;
+err_eq:
+ mlx5_eq_destroy_generic(dev->mdev, eq->core);
+err_wq:
+ eq->core = NULL;
+ destroy_workqueue(eq->wq);
+err_mempool:
+ mempool_destroy(eq->pool);
+unlock:
+ mutex_unlock(&dev->odp_eq_mutex);
+ return err;
+}
- qp->trans_qp.base.mqp.pfault_handler = mlx5_ib_pfault_handler;
+static int
+mlx5_ib_odp_destroy_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
+{
+ int err;
- for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
- INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
+ if (!eq->core)
+ return 0;
+ mlx5_eq_disable(dev->mdev, eq->core, &eq->irq_nb);
+ err = mlx5_eq_destroy_generic(dev->mdev, eq->core);
+ cancel_work_sync(&eq->work);
+ destroy_workqueue(eq->wq);
+ mempool_destroy(eq->pool);
+
+ return err;
}
-int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev)
+int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
{
- int ret;
+ struct mlx5r_cache_rb_key rb_key = {
+ .access_mode = MLX5_MKC_ACCESS_MODE_KSM,
+ .ndescs = mlx5_imr_ksm_entries,
+ .ph = MLX5_IB_NO_PH,
+ };
+ struct mlx5_cache_ent *ent;
- ret = init_srcu_struct(&ibdev->mr_srcu);
- if (ret)
- return ret;
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return 0;
+
+ ent = mlx5r_cache_create_ent_locked(dev, rb_key, true);
+ if (IS_ERR(ent))
+ return PTR_ERR(ent);
return 0;
}
-void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)
+static const struct ib_device_ops mlx5_ib_dev_odp_ops = {
+ .advise_mr = mlx5_ib_advise_mr,
+};
+
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
{
- cleanup_srcu_struct(&ibdev->mr_srcu);
+ internal_fill_odp_caps(dev);
+
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT))
+ return 0;
+
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops);
+
+ mutex_init(&dev->odp_eq_mutex);
+ return 0;
}
-int __init mlx5_ib_odp_init(void)
+void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev)
{
- mlx5_ib_page_fault_wq = alloc_ordered_workqueue("mlx5_ib_page_faults",
- WQ_MEM_RECLAIM);
- if (!mlx5_ib_page_fault_wq)
- return -ENOMEM;
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT))
+ return;
+
+ mlx5_ib_odp_destroy_eq(dev, &dev->odp_pf_eq);
+}
+
+int mlx5_ib_odp_init(void)
+{
+ u32 log_va_pages = ilog2(TASK_SIZE) - PAGE_SHIFT;
+ u8 mlx5_imr_mtt_bits;
+
+ /* 48 is default ARM64 VA space and covers X86 4-level paging which is 47 */
+ if (log_va_pages <= 48 - PAGE_SHIFT)
+ mlx5_imr_mtt_shift = 30;
+ /* 56 is x86-64, 5-level paging */
+ else if (log_va_pages <= 56 - PAGE_SHIFT)
+ mlx5_imr_mtt_shift = 34;
+ else
+ return 0;
+
+ mlx5_imr_mtt_size = BIT_ULL(mlx5_imr_mtt_shift);
+ mlx5_imr_mtt_bits = mlx5_imr_mtt_shift - PAGE_SHIFT;
+ mlx5_imr_mtt_entries = BIT_ULL(mlx5_imr_mtt_bits);
+ mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) -
+ mlx5_imr_mtt_bits);
+
+ mlx5_imr_ksm_page_shift = mlx5_imr_mtt_shift;
+ return 0;
+}
+
+struct prefetch_mr_work {
+ struct work_struct work;
+ u32 pf_flags;
+ u32 num_sge;
+ struct {
+ u64 io_virt;
+ struct mlx5_ib_mr *mr;
+ size_t length;
+ } frags[];
+};
+
+static void destroy_prefetch_work(struct prefetch_mr_work *work)
+{
+ u32 i;
+
+ for (i = 0; i < work->num_sge; ++i)
+ mlx5r_deref_odp_mkey(&work->frags[i].mr->mmkey);
+
+ kvfree(work);
+}
+
+static struct mlx5_ib_mr *
+get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
+ u32 lkey)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_mr *mr = NULL;
+ struct mlx5_ib_mkey *mmkey;
+
+ xa_lock(&dev->odp_mkeys);
+ mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey));
+ if (!mmkey || mmkey->key != lkey) {
+ mr = ERR_PTR(-ENOENT);
+ goto end;
+ }
+ if (mmkey->type != MLX5_MKEY_MR) {
+ mr = ERR_PTR(-EINVAL);
+ goto end;
+ }
+
+ mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+
+ if (mr->ibmr.pd != pd) {
+ mr = ERR_PTR(-EPERM);
+ goto end;
+ }
+
+ /* prefetch with write-access must be supported by the MR */
+ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE &&
+ !mr->umem->writable) {
+ mr = ERR_PTR(-EPERM);
+ goto end;
+ }
+
+ refcount_inc(&mmkey->usecount);
+end:
+ xa_unlock(&dev->odp_mkeys);
+ return mr;
+}
+
+static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
+{
+ struct prefetch_mr_work *work =
+ container_of(w, struct prefetch_mr_work, work);
+ u32 bytes_mapped = 0;
+ int ret;
+ u32 i;
+
+ /* We rely on IB/core that work is executed if we have num_sge != 0 only. */
+ WARN_ON(!work->num_sge);
+ for (i = 0; i < work->num_sge; ++i) {
+ ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt,
+ work->frags[i].length, &bytes_mapped,
+ work->pf_flags, false);
+ if (ret <= 0)
+ continue;
+ mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret);
+ }
+
+ destroy_prefetch_work(work);
+}
+
+static int init_prefetch_work(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 pf_flags, struct prefetch_mr_work *work,
+ struct ib_sge *sg_list, u32 num_sge)
+{
+ u32 i;
+
+ INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work);
+ work->pf_flags = pf_flags;
+
+ for (i = 0; i < num_sge; ++i) {
+ struct mlx5_ib_mr *mr;
+
+ mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey);
+ if (IS_ERR(mr)) {
+ work->num_sge = i;
+ return PTR_ERR(mr);
+ }
+ work->frags[i].io_virt = sg_list[i].addr;
+ work->frags[i].length = sg_list[i].length;
+ work->frags[i].mr = mr;
+ }
+ work->num_sge = num_sge;
+ return 0;
+}
+
+static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 pf_flags, struct ib_sge *sg_list,
+ u32 num_sge)
+{
+ u32 bytes_mapped = 0;
+ int ret = 0;
+ u32 i;
+
+ for (i = 0; i < num_sge; ++i) {
+ struct mlx5_ib_mr *mr;
+
+ mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey);
+ if (IS_ERR(mr))
+ return PTR_ERR(mr);
+ ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length,
+ &bytes_mapped, pf_flags, false);
+ if (ret < 0) {
+ mlx5r_deref_odp_mkey(&mr->mmkey);
+ return ret;
+ }
+ mlx5_update_odp_stats(mr, prefetch, ret);
+ mlx5r_deref_odp_mkey(&mr->mmkey);
+ }
return 0;
}
-void mlx5_ib_odp_cleanup(void)
+int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 flags, struct ib_sge *sg_list, u32 num_sge)
{
- destroy_workqueue(mlx5_ib_page_fault_wq);
+ u32 pf_flags = 0;
+ struct prefetch_mr_work *work;
+ int rc;
+
+ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
+ pf_flags |= MLX5_PF_FLAGS_DOWNGRADE;
+
+ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT)
+ pf_flags |= MLX5_PF_FLAGS_SNAPSHOT;
+
+ if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH)
+ return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list,
+ num_sge);
+
+ work = kvzalloc(struct_size(work, frags, num_sge), GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ rc = init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge);
+ if (rc) {
+ destroy_prefetch_work(work);
+ return rc;
+ }
+ queue_work(system_dfl_wq, &work->work);
+ return 0;
}
diff --git a/drivers/infiniband/hw/mlx5/qos.c b/drivers/infiniband/hw/mlx5/qos.c
new file mode 100644
index 000000000000..dce92554142a
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/qos.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_ib.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static bool pp_is_supported(struct ib_device *device)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+
+ return (MLX5_CAP_GEN(dev->mdev, qos) &&
+ MLX5_CAP_QOS(dev->mdev, packet_pacing) &&
+ MLX5_CAP_QOS(dev->mdev, packet_pacing_uid));
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_PP_OBJ_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
+{
+ u8 rl_raw[MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)] = {};
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_PP_OBJ_ALLOC_HANDLE);
+ struct mlx5_ib_dev *dev;
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_ib_pp *pp_entry;
+ void *in_ctx;
+ u16 uid;
+ int inlen;
+ u32 flags;
+ int err;
+
+ c = to_mucontext(ib_uverbs_get_ucontext(attrs));
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+
+ /* The allocated entry can be used only by a DEVX context */
+ if (!c->devx_uid)
+ return -EINVAL;
+
+ dev = to_mdev(c->ibucontext.device);
+ pp_entry = kzalloc(sizeof(*pp_entry), GFP_KERNEL);
+ if (!pp_entry)
+ return -ENOMEM;
+
+ in_ctx = uverbs_attr_get_alloced_ptr(attrs,
+ MLX5_IB_ATTR_PP_OBJ_ALLOC_CTX);
+ inlen = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_PP_OBJ_ALLOC_CTX);
+ memcpy(rl_raw, in_ctx, inlen);
+ err = uverbs_get_flags32(&flags, attrs,
+ MLX5_IB_ATTR_PP_OBJ_ALLOC_FLAGS,
+ MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX);
+ if (err)
+ goto err;
+
+ uid = (flags & MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX) ?
+ c->devx_uid : MLX5_SHARED_RESOURCE_UID;
+
+ err = mlx5_rl_add_rate_raw(dev->mdev, rl_raw, uid,
+ (flags & MLX5_IB_UAPI_PP_ALLOC_FLAGS_DEDICATED_INDEX),
+ &pp_entry->index);
+ if (err)
+ goto err;
+
+ pp_entry->mdev = dev->mdev;
+ uobj->object = pp_entry;
+ uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_PP_OBJ_ALLOC_HANDLE);
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_PP_OBJ_ALLOC_INDEX,
+ &pp_entry->index, sizeof(pp_entry->index));
+ return err;
+
+err:
+ kfree(pp_entry);
+ return err;
+}
+
+static int pp_obj_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_pp *pp_entry = uobject->object;
+
+ mlx5_rl_remove_rate_raw(pp_entry->mdev, pp_entry->index);
+ kfree(pp_entry);
+ return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_PP_OBJ_ALLOC,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_PP_OBJ_ALLOC_HANDLE,
+ MLX5_IB_OBJECT_PP,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_PP_OBJ_ALLOC_CTX,
+ UVERBS_ATTR_SIZE(1,
+ MLX5_ST_SZ_BYTES(set_pp_rate_limit_context)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_PP_OBJ_ALLOC_FLAGS,
+ enum mlx5_ib_uapi_pp_alloc_flags,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_PP_OBJ_ALLOC_INDEX,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_PP_OBJ_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_PP_OBJ_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_PP,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_PP,
+ UVERBS_TYPE_ALLOC_IDR(pp_obj_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_PP_OBJ_ALLOC),
+ &UVERBS_METHOD(MLX5_IB_METHOD_PP_OBJ_DESTROY));
+
+
+const struct uapi_definition mlx5_ib_qos_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_PP,
+ UAPI_DEF_IS_OBJ_SUPPORTED(pp_is_supported)),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index a1b3125f0a6e..69af20790481 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -30,14 +30,19 @@
* SOFTWARE.
*/
-#include <linux/module.h>
+#include <linux/etherdevice.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/rdma_counter.h>
+#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
-
-/* not supported currently */
-static int wq_signature;
+#include "ib_rep.h"
+#include "counters.h"
+#include "cmd.h"
+#include "umr.h"
+#include "qp.h"
+#include "wr.h"
enum {
MLX5_IB_ACK_REQ_FREQ = 8,
@@ -50,44 +55,34 @@ enum {
MLX5_IB_LINK_TYPE_ETH = 1
};
-enum {
- MLX5_IB_SQ_STRIDE = 6,
-};
-
-static const u32 mlx5_ib_opcode[] = {
- [IB_WR_SEND] = MLX5_OPCODE_SEND,
- [IB_WR_LSO] = MLX5_OPCODE_LSO,
- [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM,
- [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE,
- [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM,
- [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ,
- [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS,
- [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA,
- [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL,
- [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR,
- [IB_WR_REG_MR] = MLX5_OPCODE_UMR,
- [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS,
- [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA,
- [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
-};
-
-struct mlx5_wqe_eth_pad {
- u8 rsvd0[16];
-};
-
enum raw_qp_set_mask_map {
MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID = 1UL << 0,
MLX5_RAW_QP_RATE_LIMIT = 1UL << 1,
};
+enum {
+ MLX5_QP_RM_GO_BACK_N = 0x1,
+};
+
struct mlx5_modify_raw_qp_param {
u16 operation;
u32 set_mask; /* raw_qp_set_mask_map */
- u32 rate_limit;
+
+ struct mlx5_rate_limit rl;
+
u8 rq_q_ctr_id;
+ u32 port;
+};
+
+struct mlx5_ib_qp_event_work {
+ struct work_struct work;
+ struct mlx5_core_qp *qp;
+ int type;
};
+static struct workqueue_struct *mlx5_ib_qp_event_wq;
+
static void get_cqs(enum ib_qp_type qp_type,
struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq);
@@ -102,138 +97,337 @@ static int is_sqp(enum ib_qp_type qp_type)
return is_qp0(qp_type) || is_qp1(qp_type);
}
-static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
-{
- return mlx5_buf_offset(&qp->buf, offset);
-}
+/**
+ * mlx5_ib_read_user_wqe_common() - Copy a WQE (or part of) from user WQ
+ * to kernel buffer
+ *
+ * @umem: User space memory where the WQ is
+ * @buffer: buffer to copy to
+ * @buflen: buffer length
+ * @wqe_index: index of WQE to copy from
+ * @wq_offset: offset to start of WQ
+ * @wq_wqe_cnt: number of WQEs in WQ
+ * @wq_wqe_shift: log2 of WQE size
+ * @bcnt: number of bytes to copy
+ * @bytes_copied: number of bytes to copy (return value)
+ *
+ * Copies from start of WQE bcnt or less bytes.
+ * Does not gurantee to copy the entire WQE.
+ *
+ * Return: zero on success, or an error code.
+ */
+static int mlx5_ib_read_user_wqe_common(struct ib_umem *umem, void *buffer,
+ size_t buflen, int wqe_index,
+ int wq_offset, int wq_wqe_cnt,
+ int wq_wqe_shift, int bcnt,
+ size_t *bytes_copied)
+{
+ size_t offset = wq_offset + ((wqe_index % wq_wqe_cnt) << wq_wqe_shift);
+ size_t wq_end = wq_offset + (wq_wqe_cnt << wq_wqe_shift);
+ size_t copy_length;
+ int ret;
-static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
-{
- return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
+ /* don't copy more than requested, more than buffer length or
+ * beyond WQ end
+ */
+ copy_length = min_t(u32, buflen, wq_end - offset);
+ copy_length = min_t(u32, copy_length, bcnt);
+
+ ret = ib_umem_copy_from(buffer, umem, offset, copy_length);
+ if (ret)
+ return ret;
+
+ if (!ret && bytes_copied)
+ *bytes_copied = copy_length;
+
+ return 0;
}
-void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
+static int mlx5_ib_read_kernel_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index,
+ void *buffer, size_t buflen, size_t *bc)
{
- return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
+ struct mlx5_wqe_ctrl_seg *ctrl;
+ size_t bytes_copied = 0;
+ size_t wqe_length;
+ void *p;
+ int ds;
+
+ wqe_index = wqe_index & qp->sq.fbc.sz_m1;
+
+ /* read the control segment first */
+ p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, wqe_index);
+ ctrl = p;
+ ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+ wqe_length = ds * MLX5_WQE_DS_UNITS;
+
+ /* read rest of WQE if it spreads over more than one stride */
+ while (bytes_copied < wqe_length) {
+ size_t copy_length =
+ min_t(size_t, buflen - bytes_copied, MLX5_SEND_WQE_BB);
+
+ if (!copy_length)
+ break;
+
+ memcpy(buffer + bytes_copied, p, copy_length);
+ bytes_copied += copy_length;
+
+ wqe_index = (wqe_index + 1) & qp->sq.fbc.sz_m1;
+ p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, wqe_index);
+ }
+ *bc = bytes_copied;
+ return 0;
}
-/**
- * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space.
- *
- * @qp: QP to copy from.
- * @send: copy from the send queue when non-zero, use the receive queue
- * otherwise.
- * @wqe_index: index to start copying from. For send work queues, the
- * wqe_index is in units of MLX5_SEND_WQE_BB.
- * For receive work queue, it is the number of work queue
- * element in the queue.
- * @buffer: destination buffer.
- * @length: maximum number of bytes to copy.
- *
- * Copies at least a single WQE, but may copy more data.
- *
- * Return: the number of bytes copied, or an error code.
- */
-int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
- void *buffer, u32 length,
- struct mlx5_ib_qp_base *base)
-{
- struct ib_device *ibdev = qp->ibqp.device;
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
- size_t offset;
- size_t wq_end;
+static int mlx5_ib_read_user_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index,
+ void *buffer, size_t buflen, size_t *bc)
+{
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct ib_umem *umem = base->ubuffer.umem;
- u32 first_copy_length;
- int wqe_length;
+ struct mlx5_ib_wq *wq = &qp->sq;
+ struct mlx5_wqe_ctrl_seg *ctrl;
+ size_t bytes_copied;
+ size_t bytes_copied2;
+ size_t wqe_length;
int ret;
+ int ds;
+
+ /* at first read as much as possible */
+ ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index,
+ wq->offset, wq->wqe_cnt,
+ wq->wqe_shift, buflen,
+ &bytes_copied);
+ if (ret)
+ return ret;
- if (wq->wqe_cnt == 0) {
- mlx5_ib_dbg(dev, "mlx5_ib_read_user_wqe for a QP with wqe_cnt == 0. qp_type: 0x%x\n",
- qp->ibqp.qp_type);
+ /* we need at least control segment size to proceed */
+ if (bytes_copied < sizeof(*ctrl))
return -EINVAL;
+
+ ctrl = buffer;
+ ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+ wqe_length = ds * MLX5_WQE_DS_UNITS;
+
+ /* if we copied enough then we are done */
+ if (bytes_copied >= wqe_length) {
+ *bc = bytes_copied;
+ return 0;
}
- offset = wq->offset + ((wqe_index % wq->wqe_cnt) << wq->wqe_shift);
- wq_end = wq->offset + (wq->wqe_cnt << wq->wqe_shift);
+ /* otherwise this a wrapped around wqe
+ * so read the remaining bytes starting
+ * from wqe_index 0
+ */
+ ret = mlx5_ib_read_user_wqe_common(umem, buffer + bytes_copied,
+ buflen - bytes_copied, 0, wq->offset,
+ wq->wqe_cnt, wq->wqe_shift,
+ wqe_length - bytes_copied,
+ &bytes_copied2);
- if (send && length < sizeof(struct mlx5_wqe_ctrl_seg))
- return -EINVAL;
+ if (ret)
+ return ret;
+ *bc = bytes_copied + bytes_copied2;
+ return 0;
+}
- if (offset > umem->length ||
- (send && offset + sizeof(struct mlx5_wqe_ctrl_seg) > umem->length))
+int mlx5_ib_read_wqe_sq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc)
+{
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
+ struct ib_umem *umem = base->ubuffer.umem;
+
+ if (buflen < sizeof(struct mlx5_wqe_ctrl_seg))
return -EINVAL;
- first_copy_length = min_t(u32, offset + length, wq_end) - offset;
- ret = ib_umem_copy_from(buffer, umem, offset, first_copy_length);
+ if (!umem)
+ return mlx5_ib_read_kernel_wqe_sq(qp, wqe_index, buffer,
+ buflen, bc);
+
+ return mlx5_ib_read_user_wqe_sq(qp, wqe_index, buffer, buflen, bc);
+}
+
+static int mlx5_ib_read_user_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index,
+ void *buffer, size_t buflen, size_t *bc)
+{
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
+ struct ib_umem *umem = base->ubuffer.umem;
+ struct mlx5_ib_wq *wq = &qp->rq;
+ size_t bytes_copied;
+ int ret;
+
+ ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index,
+ wq->offset, wq->wqe_cnt,
+ wq->wqe_shift, buflen,
+ &bytes_copied);
+
if (ret)
return ret;
+ *bc = bytes_copied;
+ return 0;
+}
+
+int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc)
+{
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
+ struct ib_umem *umem = base->ubuffer.umem;
+ struct mlx5_ib_wq *wq = &qp->rq;
+ size_t wqe_size = 1 << wq->wqe_shift;
- if (send) {
- struct mlx5_wqe_ctrl_seg *ctrl = buffer;
- int ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+ if (buflen < wqe_size)
+ return -EINVAL;
- wqe_length = ds * MLX5_WQE_DS_UNITS;
- } else {
- wqe_length = 1 << wq->wqe_shift;
- }
+ if (!umem)
+ return -EOPNOTSUPP;
- if (wqe_length <= first_copy_length)
- return first_copy_length;
+ return mlx5_ib_read_user_wqe_rq(qp, wqe_index, buffer, buflen, bc);
+}
+
+static int mlx5_ib_read_user_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index,
+ void *buffer, size_t buflen, size_t *bc)
+{
+ struct ib_umem *umem = srq->umem;
+ size_t bytes_copied;
+ int ret;
+
+ ret = mlx5_ib_read_user_wqe_common(umem, buffer, buflen, wqe_index, 0,
+ srq->msrq.max, srq->msrq.wqe_shift,
+ buflen, &bytes_copied);
- ret = ib_umem_copy_from(buffer + first_copy_length, umem, wq->offset,
- wqe_length - first_copy_length);
if (ret)
return ret;
+ *bc = bytes_copied;
+ return 0;
+}
+
+int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer,
+ size_t buflen, size_t *bc)
+{
+ struct ib_umem *umem = srq->umem;
+ size_t wqe_size = 1 << srq->msrq.wqe_shift;
+
+ if (buflen < wqe_size)
+ return -EINVAL;
+
+ if (!umem)
+ return -EOPNOTSUPP;
+
+ return mlx5_ib_read_user_wqe_srq(srq, wqe_index, buffer, buflen, bc);
+}
+
+static void mlx5_ib_qp_err_syndrome(struct ib_qp *ibqp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ void *pas_ext_union, *err_syn;
+ u32 *outb;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev->mdev, qpc_extension) ||
+ !MLX5_CAP_GEN(dev->mdev, qp_error_syndrome))
+ return;
+
+ outb = kzalloc(outlen, GFP_KERNEL);
+ if (!outb)
+ return;
+
+ err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen,
+ true);
+ if (err)
+ goto out;
- return wqe_length;
+ pas_ext_union =
+ MLX5_ADDR_OF(query_qp_out, outb, qp_pas_or_qpc_ext_and_pas);
+ err_syn = MLX5_ADDR_OF(qpc_extension_and_pas_list_in, pas_ext_union,
+ qpc_data_extension.error_syndrome);
+
+ pr_err("%s/%d: QP %d error: %s (0x%x 0x%x 0x%x)\n",
+ ibqp->device->name, ibqp->port, ibqp->qp_num,
+ ib_wc_status_msg(
+ MLX5_GET(cqe_error_syndrome, err_syn, syndrome)),
+ MLX5_GET(cqe_error_syndrome, err_syn, vendor_error_syndrome),
+ MLX5_GET(cqe_error_syndrome, err_syn, hw_syndrome_type),
+ MLX5_GET(cqe_error_syndrome, err_syn, hw_error_syndrome));
+out:
+ kfree(outb);
+}
+
+static void mlx5_ib_handle_qp_event(struct work_struct *_work)
+{
+ struct mlx5_ib_qp_event_work *qpe_work =
+ container_of(_work, struct mlx5_ib_qp_event_work, work);
+ struct ib_qp *ibqp = &to_mibqp(qpe_work->qp)->ibqp;
+ struct ib_event event = {};
+
+ event.device = ibqp->device;
+ event.element.qp = ibqp;
+ switch (qpe_work->type) {
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ event.event = IB_EVENT_PATH_MIG;
+ break;
+ case MLX5_EVENT_TYPE_COMM_EST:
+ event.event = IB_EVENT_COMM_EST;
+ break;
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ event.event = IB_EVENT_SQ_DRAINED;
+ break;
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ break;
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ event.event = IB_EVENT_PATH_MIG_ERR;
+ break;
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ default:
+ pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n",
+ qpe_work->type, qpe_work->qp->qpn);
+ goto out;
+ }
+
+ if ((event.event == IB_EVENT_QP_FATAL) ||
+ (event.event == IB_EVENT_QP_ACCESS_ERR))
+ mlx5_ib_qp_err_syndrome(ibqp);
+
+ ibqp->event_handler(&event, ibqp->qp_context);
+
+out:
+ mlx5_core_res_put(&qpe_work->qp->common);
+ kfree(qpe_work);
}
static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
{
struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
- struct ib_event event;
+ struct mlx5_ib_qp_event_work *qpe_work;
if (type == MLX5_EVENT_TYPE_PATH_MIG) {
/* This event is only valid for trans_qps */
to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
}
- if (ibqp->event_handler) {
- event.device = ibqp->device;
- event.element.qp = ibqp;
- switch (type) {
- case MLX5_EVENT_TYPE_PATH_MIG:
- event.event = IB_EVENT_PATH_MIG;
- break;
- case MLX5_EVENT_TYPE_COMM_EST:
- event.event = IB_EVENT_COMM_EST;
- break;
- case MLX5_EVENT_TYPE_SQ_DRAINED:
- event.event = IB_EVENT_SQ_DRAINED;
- break;
- case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
- event.event = IB_EVENT_QP_LAST_WQE_REACHED;
- break;
- case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
- event.event = IB_EVENT_QP_FATAL;
- break;
- case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
- event.event = IB_EVENT_PATH_MIG_ERR;
- break;
- case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
- event.event = IB_EVENT_QP_REQ_ERR;
- break;
- case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
- event.event = IB_EVENT_QP_ACCESS_ERR;
- break;
- default:
- pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
- return;
- }
+ if (!ibqp->event_handler)
+ goto out_no_handler;
- ibqp->event_handler(&event, ibqp->qp_context);
- }
+ qpe_work = kzalloc(sizeof(*qpe_work), GFP_ATOMIC);
+ if (!qpe_work)
+ goto out_no_handler;
+
+ qpe_work->qp = qp;
+ qpe_work->type = type;
+ INIT_WORK(&qpe_work->work, mlx5_ib_handle_qp_event);
+ queue_work(mlx5_ib_qp_event_wq, &qpe_work->work);
+ return;
+
+out_no_handler:
+ mlx5_core_res_put(&qp->common);
}
static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
@@ -253,13 +447,26 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
cap->max_recv_wr = 0;
cap->max_recv_sge = 0;
} else {
+ int wq_sig = !!(qp->flags_en & MLX5_QP_FLAG_SIGNATURE);
+
if (ucmd) {
qp->rq.wqe_cnt = ucmd->rq_wqe_count;
+ if (ucmd->rq_wqe_shift > BITS_PER_BYTE * sizeof(ucmd->rq_wqe_shift))
+ return -EINVAL;
qp->rq.wqe_shift = ucmd->rq_wqe_shift;
- qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
+ if ((1 << qp->rq.wqe_shift) /
+ sizeof(struct mlx5_wqe_data_seg) <
+ wq_sig)
+ return -EINVAL;
+ qp->rq.max_gs =
+ (1 << qp->rq.wqe_shift) /
+ sizeof(struct mlx5_wqe_data_seg) -
+ wq_sig;
qp->rq.max_post = qp->rq.wqe_cnt;
} else {
- wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0;
+ wqe_size =
+ wq_sig ? sizeof(struct mlx5_wqe_signature_seg) :
+ 0;
wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg);
wqe_size = roundup_pow_of_two(wqe_size);
wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
@@ -273,7 +480,10 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
return -EINVAL;
}
qp->rq.wqe_shift = ilog2(wqe_size);
- qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
+ qp->rq.max_gs =
+ (1 << qp->rq.wqe_shift) /
+ sizeof(struct mlx5_wqe_data_seg) -
+ wq_sig;
qp->rq.max_post = qp->rq.wqe_cnt;
}
}
@@ -288,13 +498,15 @@ static int sq_overhead(struct ib_qp_init_attr *attr)
switch (attr->qp_type) {
case IB_QPT_XRC_INI:
size += sizeof(struct mlx5_wqe_xrc_seg);
- /* fall through */
+ fallthrough;
case IB_QPT_RC:
size += sizeof(struct mlx5_wqe_ctrl_seg) +
max(sizeof(struct mlx5_wqe_atomic_seg) +
sizeof(struct mlx5_wqe_raddr_seg),
sizeof(struct mlx5_wqe_umr_ctrl_seg) +
- sizeof(struct mlx5_mkey_seg));
+ sizeof(struct mlx5_mkey_seg) +
+ MLX5_IB_SQ_UMR_INLINE_THRESHOLD /
+ MLX5_IB_UMR_OCTOWORD);
break;
case IB_QPT_XRC_TGT:
@@ -311,7 +523,7 @@ static int sq_overhead(struct ib_qp_init_attr *attr)
if (attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
size += sizeof(struct mlx5_wqe_eth_pad) +
sizeof(struct mlx5_wqe_eth_seg);
- /* fall through */
+ fallthrough;
case IB_QPT_SMI:
case MLX5_IB_QPT_HW_GSI:
size += sizeof(struct mlx5_wqe_ctrl_seg) +
@@ -346,9 +558,9 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
}
size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN &&
+ if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN &&
ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE)
- return MLX5_SIG_WQE_SIZE;
+ return MLX5_SIG_WQE_SIZE;
else
return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
}
@@ -400,9 +612,6 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
sizeof(struct mlx5_wqe_inline_seg);
attr->cap.max_inline_data = qp->max_inline_data;
- if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
- qp->signature_en = true;
-
wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
@@ -438,9 +647,9 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
return -EINVAL;
}
- if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) {
- mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n",
- ucmd->sq_wqe_count, ucmd->sq_wqe_count);
+ if (ucmd->sq_wqe_count && !is_power_of_2(ucmd->sq_wqe_count)) {
+ mlx5_ib_warn(dev, "sq_wqe_count %d is not a power of two\n",
+ ucmd->sq_wqe_count);
return -EINVAL;
}
@@ -453,7 +662,8 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
return -EINVAL;
}
- if (attr->qp_type == IB_QPT_RAW_PACKET) {
+ if (attr->qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) {
base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
} else {
@@ -475,60 +685,55 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
return 1;
}
-static int first_med_uuar(void)
-{
- return 1;
-}
+enum {
+ /* this is the first blue flame register in the array of bfregs assigned
+ * to a processes. Since we do not use it for blue flame but rather
+ * regular 64 bit doorbells, we do not need a lock for maintaiing
+ * "odd/even" order
+ */
+ NUM_NON_BLUE_FLAME_BFREGS = 1,
+};
-static int next_uuar(int n)
+static int max_bfregs(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi)
{
- n++;
-
- while (((n % 4) & 2))
- n++;
-
- return n;
+ return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
+ bfregi->num_static_sys_pages * MLX5_NON_FP_BFREGS_PER_UAR;
}
-static int num_med_uuar(struct mlx5_uuar_info *uuari)
+static int num_med_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
{
int n;
- n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE -
- uuari->num_low_latency_uuars - 1;
+ n = max_bfregs(dev, bfregi) - bfregi->num_low_latency_bfregs -
+ NUM_NON_BLUE_FLAME_BFREGS;
return n >= 0 ? n : 0;
}
-static int max_uuari(struct mlx5_uuar_info *uuari)
+static int first_med_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
{
- return uuari->num_uars * 4;
+ return num_med_bfreg(dev, bfregi) ? 1 : -ENOMEM;
}
-static int first_hi_uuar(struct mlx5_uuar_info *uuari)
+static int first_hi_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
{
int med;
- int i;
- int t;
- med = num_med_uuar(uuari);
- for (t = 0, i = first_med_uuar();; i = next_uuar(i)) {
- t++;
- if (t == med)
- return next_uuar(i);
- }
-
- return 0;
+ med = num_med_bfreg(dev, bfregi);
+ return ++med;
}
-static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
+static int alloc_high_class_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
{
int i;
- for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) {
- if (!test_bit(i, uuari->bitmap)) {
- set_bit(i, uuari->bitmap);
- uuari->count[i]++;
+ for (i = first_hi_bfreg(dev, bfregi); i < max_bfregs(dev, bfregi); i++) {
+ if (!bfregi->count[i]) {
+ bfregi->count[i]++;
return i;
}
}
@@ -536,87 +741,56 @@ static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
return -ENOMEM;
}
-static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
+static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
{
- int minidx = first_med_uuar();
+ int minidx = first_med_bfreg(dev, bfregi);
int i;
- for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) {
- if (uuari->count[i] < uuari->count[minidx])
+ if (minidx < 0)
+ return minidx;
+
+ for (i = minidx; i < first_hi_bfreg(dev, bfregi); i++) {
+ if (bfregi->count[i] < bfregi->count[minidx])
minidx = i;
+ if (!bfregi->count[minidx])
+ break;
}
- uuari->count[minidx]++;
+ bfregi->count[minidx]++;
return minidx;
}
-static int alloc_uuar(struct mlx5_uuar_info *uuari,
- enum mlx5_ib_latency_class lat)
+static int alloc_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
{
- int uuarn = -EINVAL;
-
- mutex_lock(&uuari->lock);
- switch (lat) {
- case MLX5_IB_LATENCY_CLASS_LOW:
- uuarn = 0;
- uuari->count[uuarn]++;
- break;
+ int bfregn = -ENOMEM;
- case MLX5_IB_LATENCY_CLASS_MEDIUM:
- if (uuari->ver < 2)
- uuarn = -ENOMEM;
- else
- uuarn = alloc_med_class_uuar(uuari);
- break;
-
- case MLX5_IB_LATENCY_CLASS_HIGH:
- if (uuari->ver < 2)
- uuarn = -ENOMEM;
- else
- uuarn = alloc_high_class_uuar(uuari);
- break;
+ if (bfregi->lib_uar_dyn)
+ return -EINVAL;
- case MLX5_IB_LATENCY_CLASS_FAST_PATH:
- uuarn = 2;
- break;
+ mutex_lock(&bfregi->lock);
+ if (bfregi->ver >= 2) {
+ bfregn = alloc_high_class_bfreg(dev, bfregi);
+ if (bfregn < 0)
+ bfregn = alloc_med_class_bfreg(dev, bfregi);
}
- mutex_unlock(&uuari->lock);
-
- return uuarn;
-}
-static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
-{
- clear_bit(uuarn, uuari->bitmap);
- --uuari->count[uuarn];
-}
+ if (bfregn < 0) {
+ BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1);
+ bfregn = 0;
+ bfregi->count[bfregn]++;
+ }
+ mutex_unlock(&bfregi->lock);
-static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
-{
- clear_bit(uuarn, uuari->bitmap);
- --uuari->count[uuarn];
+ return bfregn;
}
-static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn)
+void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, int bfregn)
{
- int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
- int high_uuar = nuuars - uuari->num_low_latency_uuars;
-
- mutex_lock(&uuari->lock);
- if (uuarn == 0) {
- --uuari->count[uuarn];
- goto out;
- }
-
- if (uuarn < high_uuar) {
- free_med_class_uuar(uuari, uuarn);
- goto out;
- }
-
- free_high_class_uuar(uuari, uuarn);
-
-out:
- mutex_unlock(&uuari->lock);
+ mutex_lock(&bfregi->lock);
+ bfregi->count[bfregn]--;
+ mutex_unlock(&bfregi->lock);
}
static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state)
@@ -644,10 +818,8 @@ static int to_mlx5_st(enum ib_qp_type type)
case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC;
case IB_QPT_SMI: return MLX5_QP_ST_QP0;
case MLX5_IB_QPT_HW_GSI: return MLX5_QP_ST_QP1;
- case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
- case IB_QPT_RAW_PACKET:
- case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
- case IB_QPT_MAX:
+ case MLX5_IB_QPT_DCI: return MLX5_QP_ST_DCI;
+ case IB_QPT_RAW_PACKET: return MLX5_QP_ST_RAW_ETHERTYPE;
default: return -EINVAL;
}
}
@@ -657,104 +829,101 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
struct mlx5_ib_cq *recv_cq);
-static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
+int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi, u32 bfregn,
+ bool dyn_bfreg)
{
- return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
-}
+ unsigned int bfregs_per_sys_page;
+ u32 index_of_sys_page;
+ u32 offset;
-static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
- struct ib_pd *pd,
- unsigned long addr, size_t size,
- struct ib_umem **umem,
- int *npages, int *page_shift, int *ncont,
- u32 *offset)
-{
- int err;
+ if (bfregi->lib_uar_dyn)
+ return -EINVAL;
- *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0);
- if (IS_ERR(*umem)) {
- mlx5_ib_dbg(dev, "umem_get failed\n");
- return PTR_ERR(*umem);
- }
+ bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
+ MLX5_NON_FP_BFREGS_PER_UAR;
+ index_of_sys_page = bfregn / bfregs_per_sys_page;
- mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL);
+ if (dyn_bfreg) {
+ index_of_sys_page += bfregi->num_static_sys_pages;
- err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
- if (err) {
- mlx5_ib_warn(dev, "bad offset\n");
- goto err_umem;
- }
-
- mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n",
- addr, size, *npages, *page_shift, *ncont, *offset);
-
- return 0;
+ if (index_of_sys_page >= bfregi->num_sys_pages)
+ return -EINVAL;
-err_umem:
- ib_umem_release(*umem);
- *umem = NULL;
+ if (bfregn > bfregi->num_dyn_bfregs ||
+ bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) {
+ mlx5_ib_dbg(dev, "Invalid dynamic uar index\n");
+ return -EINVAL;
+ }
+ }
- return err;
+ offset = bfregn % bfregs_per_sys_page / MLX5_NON_FP_BFREGS_PER_UAR;
+ return bfregi->sys_pages[index_of_sys_page] + offset;
}
-static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq)
+static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_rwq *rwq, struct ib_udata *udata)
{
- struct mlx5_ib_ucontext *context;
+ struct mlx5_ib_ucontext *context =
+ rdma_udata_to_drv_context(
+ udata,
+ struct mlx5_ib_ucontext,
+ ibucontext);
+
+ if (rwq->create_flags & MLX5_IB_WQ_FLAGS_DELAY_DROP)
+ atomic_dec(&dev->delay_drop.rqs_cnt);
- context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &rwq->db);
- if (rwq->umem)
- ib_umem_release(rwq->umem);
+ ib_umem_release(rwq->umem);
}
static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
- struct mlx5_ib_rwq *rwq,
+ struct ib_udata *udata, struct mlx5_ib_rwq *rwq,
struct mlx5_ib_create_wq *ucmd)
{
- struct mlx5_ib_ucontext *context;
- int page_shift = 0;
- int npages;
+ struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
+ unsigned long page_size = 0;
u32 offset = 0;
- int ncont = 0;
int err;
if (!ucmd->buf_addr)
return -EINVAL;
- context = to_mucontext(pd->uobject->context);
- rwq->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr,
- rwq->buf_size, 0, 0);
+ rwq->umem = ib_umem_get(&dev->ib_dev, ucmd->buf_addr, rwq->buf_size, 0);
if (IS_ERR(rwq->umem)) {
mlx5_ib_dbg(dev, "umem_get failed\n");
err = PTR_ERR(rwq->umem);
return err;
}
- mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift,
- &ncont, NULL);
- err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift,
- &rwq->rq_page_offset);
- if (err) {
+ page_size = mlx5_umem_find_best_quantized_pgoff(
+ rwq->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64, &rwq->rq_page_offset);
+ if (!page_size) {
mlx5_ib_warn(dev, "bad offset\n");
+ err = -EINVAL;
goto err_umem;
}
- rwq->rq_num_pas = ncont;
- rwq->page_shift = page_shift;
- rwq->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ rwq->rq_num_pas = ib_umem_num_dma_blocks(rwq->umem, page_size);
+ rwq->page_shift = order_base_2(page_size);
+ rwq->log_page_size = rwq->page_shift - MLX5_ADAPTER_PAGE_SHIFT;
rwq->wq_sig = !!(ucmd->flags & MLX5_WQ_FLAG_SIGNATURE);
- mlx5_ib_dbg(dev, "addr 0x%llx, size %zd, npages %d, page_shift %d, ncont %d, offset %d\n",
- (unsigned long long)ucmd->buf_addr, rwq->buf_size,
- npages, page_shift, ncont, offset);
+ mlx5_ib_dbg(
+ dev,
+ "addr 0x%llx, size %zd, npages %zu, page_size %ld, ncont %d, offset %d\n",
+ (unsigned long long)ucmd->buf_addr, rwq->buf_size,
+ ib_umem_num_pages(rwq->umem), page_size, rwq->rq_num_pas,
+ offset);
- err = mlx5_ib_db_map_user(context, ucmd->db_addr, &rwq->db);
+ err = mlx5_ib_db_map_user(ucontext, ucmd->db_addr, &rwq->db);
if (err) {
mlx5_ib_dbg(dev, "map failed\n");
goto err_umem;
}
- rwq->create_type = MLX5_WQ_USER;
return 0;
err_umem:
@@ -762,195 +931,227 @@ err_umem:
return err;
}
-static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
- struct mlx5_ib_qp *qp, struct ib_udata *udata,
- struct ib_qp_init_attr *attr,
- u32 **in,
- struct mlx5_ib_create_qp_resp *resp, int *inlen,
- struct mlx5_ib_qp_base *base)
+static int adjust_bfregn(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi, int bfregn)
+{
+ return bfregn / MLX5_NON_FP_BFREGS_PER_UAR * MLX5_BFREGS_PER_UAR +
+ bfregn % MLX5_NON_FP_BFREGS_PER_UAR;
+}
+
+static int _create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp, struct ib_udata *udata,
+ struct ib_qp_init_attr *attr, u32 **in,
+ struct mlx5_ib_create_qp_resp *resp, int *inlen,
+ struct mlx5_ib_qp_base *base,
+ struct mlx5_ib_create_qp *ucmd)
{
struct mlx5_ib_ucontext *context;
- struct mlx5_ib_create_qp ucmd;
struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
- int page_shift = 0;
- int uar_index;
- int npages;
- u32 offset = 0;
- int uuarn;
+ unsigned int page_offset_quantized = 0;
+ unsigned long page_size = 0;
+ int uar_index = 0;
+ int bfregn;
int ncont = 0;
__be64 *pas;
void *qpc;
int err;
-
- err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
- if (err) {
- mlx5_ib_dbg(dev, "copy failed\n");
- return err;
- }
-
- context = to_mucontext(pd->uobject->context);
- /*
- * TBD: should come from the verbs when we have the API
- */
- if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
- /* In CROSS_CHANNEL CQ and QP must use the same UAR */
- uuarn = MLX5_CROSS_CHANNEL_UUAR;
- else {
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
- if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
- mlx5_ib_dbg(dev, "reverting to medium latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
- if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
- mlx5_ib_dbg(dev, "reverting to high latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
- if (uuarn < 0) {
- mlx5_ib_warn(dev, "uuar allocation failed\n");
- return uuarn;
- }
- }
- }
+ u16 uid;
+ u32 uar_flags;
+
+ context = rdma_udata_to_drv_context(udata, struct mlx5_ib_ucontext,
+ ibucontext);
+ uar_flags = qp->flags_en &
+ (MLX5_QP_FLAG_UAR_PAGE_INDEX | MLX5_QP_FLAG_BFREG_INDEX);
+ switch (uar_flags) {
+ case MLX5_QP_FLAG_UAR_PAGE_INDEX:
+ uar_index = ucmd->bfreg_index;
+ bfregn = MLX5_IB_INVALID_BFREG;
+ break;
+ case MLX5_QP_FLAG_BFREG_INDEX:
+ uar_index = bfregn_to_uar_index(dev, &context->bfregi,
+ ucmd->bfreg_index, true);
+ if (uar_index < 0)
+ return uar_index;
+ bfregn = MLX5_IB_INVALID_BFREG;
+ break;
+ case 0:
+ if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
+ return -EINVAL;
+ bfregn = alloc_bfreg(dev, &context->bfregi);
+ if (bfregn < 0)
+ return bfregn;
+ break;
+ default:
+ return -EINVAL;
}
- uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
- mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
+ mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index);
+ if (bfregn != MLX5_IB_INVALID_BFREG)
+ uar_index = bfregn_to_uar_index(dev, &context->bfregi, bfregn,
+ false);
qp->rq.offset = 0;
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
- err = set_user_buf_size(dev, qp, &ucmd, base, attr);
+ err = set_user_buf_size(dev, qp, ucmd, base, attr);
if (err)
- goto err_uuar;
-
- if (ucmd.buf_addr && ubuffer->buf_size) {
- ubuffer->buf_addr = ucmd.buf_addr;
- err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr,
- ubuffer->buf_size,
- &ubuffer->umem, &npages, &page_shift,
- &ncont, &offset);
- if (err)
- goto err_uuar;
+ goto err_bfreg;
+
+ if (ucmd->buf_addr && ubuffer->buf_size) {
+ ubuffer->buf_addr = ucmd->buf_addr;
+ ubuffer->umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
+ ubuffer->buf_size, 0);
+ if (IS_ERR(ubuffer->umem)) {
+ err = PTR_ERR(ubuffer->umem);
+ goto err_bfreg;
+ }
+ page_size = mlx5_umem_find_best_quantized_pgoff(
+ ubuffer->umem, qpc, log_page_size,
+ MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64,
+ &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto err_umem;
+ }
+ ncont = ib_umem_num_dma_blocks(ubuffer->umem, page_size);
} else {
ubuffer->umem = NULL;
}
*inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont;
- *in = mlx5_vzalloc(*inlen);
+ *in = kvzalloc(*inlen, GFP_KERNEL);
if (!*in) {
err = -ENOMEM;
goto err_umem;
}
- pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
- if (ubuffer->umem)
- mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
-
+ uid = (attr->qp_type != IB_QPT_XRC_INI) ? to_mpd(pd)->uid : 0;
+ MLX5_SET(create_qp_in, *in, uid, uid);
qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
-
- MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- MLX5_SET(qpc, qpc, page_offset, offset);
-
+ pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
+ if (ubuffer->umem) {
+ mlx5_ib_populate_pas(ubuffer->umem, page_size, pas, 0);
+ MLX5_SET(qpc, qpc, log_page_size,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(qpc, qpc, page_offset, page_offset_quantized);
+ }
MLX5_SET(qpc, qpc, uar_page, uar_index);
- resp->uuar_index = uuarn;
- qp->uuarn = uuarn;
+ if (bfregn != MLX5_IB_INVALID_BFREG)
+ resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn);
+ else
+ resp->bfreg_index = MLX5_IB_INVALID_BFREG;
+ qp->bfregn = bfregn;
- err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db);
+ err = mlx5_ib_db_map_user(context, ucmd->db_addr, &qp->db);
if (err) {
mlx5_ib_dbg(dev, "map failed\n");
goto err_free;
}
- err = ib_copy_to_udata(udata, resp, sizeof(*resp));
- if (err) {
- mlx5_ib_dbg(dev, "copy failed\n");
- goto err_unmap;
- }
- qp->create_type = MLX5_QP_USER;
-
return 0;
-err_unmap:
- mlx5_ib_db_unmap_user(context, &qp->db);
-
err_free:
kvfree(*in);
err_umem:
- if (ubuffer->umem)
- ib_umem_release(ubuffer->umem);
+ ib_umem_release(ubuffer->umem);
-err_uuar:
- free_uuar(&context->uuari, uuarn);
+err_bfreg:
+ if (bfregn != MLX5_IB_INVALID_BFREG)
+ mlx5_ib_free_bfreg(dev, &context->bfregi, bfregn);
return err;
}
-static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
- struct mlx5_ib_qp_base *base)
+static void destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct mlx5_ib_qp_base *base, struct ib_udata *udata)
{
- struct mlx5_ib_ucontext *context;
+ struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
- context = to_mucontext(pd->uobject->context);
- mlx5_ib_db_unmap_user(context, &qp->db);
- if (base->ubuffer.umem)
+ if (udata) {
+ /* User QP */
+ mlx5_ib_db_unmap_user(context, &qp->db);
ib_umem_release(base->ubuffer.umem);
- free_uuar(&context->uuari, qp->uuarn);
+
+ /*
+ * Free only the BFREGs which are handled by the kernel.
+ * BFREGs of UARs allocated dynamically are handled by user.
+ */
+ if (qp->bfregn != MLX5_IB_INVALID_BFREG)
+ mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn);
+ return;
+ }
+
+ /* Kernel QP */
+ kvfree(qp->sq.wqe_head);
+ kvfree(qp->sq.w_list);
+ kvfree(qp->sq.wrid);
+ kvfree(qp->sq.wr_data);
+ kvfree(qp->rq.wrid);
+ if (qp->db.db)
+ mlx5_db_free(dev->mdev, &qp->db);
+ if (qp->buf.frags)
+ mlx5_frag_buf_free(dev->mdev, &qp->buf);
}
-static int create_kernel_qp(struct mlx5_ib_dev *dev,
- struct ib_qp_init_attr *init_attr,
- struct mlx5_ib_qp *qp,
- u32 **in, int *inlen,
- struct mlx5_ib_qp_base *base)
+static int _create_kernel_qp(struct mlx5_ib_dev *dev,
+ struct ib_qp_init_attr *init_attr,
+ struct mlx5_ib_qp *qp, u32 **in, int *inlen,
+ struct mlx5_ib_qp_base *base)
{
- enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
- struct mlx5_uuar_info *uuari;
int uar_index;
void *qpc;
- int uuarn;
int err;
- uuari = &dev->mdev->priv.uuari;
- if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN |
- IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
- IB_QP_CREATE_IPOIB_UD_LSO |
- mlx5_ib_create_qp_sqpn_qp1()))
- return -EINVAL;
-
if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
- lc = MLX5_IB_LATENCY_CLASS_FAST_PATH;
-
- uuarn = alloc_uuar(uuari, lc);
- if (uuarn < 0) {
- mlx5_ib_dbg(dev, "\n");
- return -ENOMEM;
- }
+ qp->bf.bfreg = &dev->fp_bfreg;
+ else
+ qp->bf.bfreg = &dev->bfreg;
- qp->bf = &uuari->bfs[uuarn];
- uar_index = qp->bf->uar->index;
+ /* We need to divide by two since each register is comprised of
+ * two buffers of identical size, namely odd and even
+ */
+ qp->bf.buf_size = (1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size)) / 2;
+ uar_index = qp->bf.bfreg->index;
err = calc_sq_size(dev, init_attr, qp);
if (err < 0) {
mlx5_ib_dbg(dev, "err %d\n", err);
- goto err_uuar;
+ return err;
}
qp->rq.offset = 0;
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
- err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
+ err = mlx5_frag_buf_alloc_node(dev->mdev, base->ubuffer.buf_size,
+ &qp->buf, dev->mdev->priv.numa_node);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
- goto err_uuar;
+ return err;
+ }
+
+ if (qp->rq.wqe_cnt)
+ mlx5_init_fbc(qp->buf.frags, qp->rq.wqe_shift,
+ ilog2(qp->rq.wqe_cnt), &qp->rq.fbc);
+
+ if (qp->sq.wqe_cnt) {
+ int sq_strides_offset = (qp->sq.offset & (PAGE_SIZE - 1)) /
+ MLX5_SEND_WQE_BB;
+ mlx5_init_fbc_offset(qp->buf.frags +
+ (qp->sq.offset / PAGE_SIZE),
+ ilog2(MLX5_SEND_WQE_BB),
+ ilog2(qp->sq.wqe_cnt),
+ sq_strides_offset, &qp->sq.fbc);
+
+ qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
}
- qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
*inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
- *in = mlx5_vzalloc(*inlen);
+ *in = kvzalloc(*inlen, GFP_KERNEL);
if (!*in) {
err = -ENOMEM;
goto err_buf;
@@ -958,19 +1159,19 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
MLX5_SET(qpc, qpc, uar_page, uar_index);
+ MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(dev->mdev));
MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
/* Set "fast registration enabled" for all kernel QPs */
MLX5_SET(qpc, qpc, fre, 1);
MLX5_SET(qpc, qpc, rlky, 1);
- if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) {
+ if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
MLX5_SET(qpc, qpc, deth_sqpn, 1);
- qp->flags |= MLX5_IB_QP_SQPN_QP1;
- }
- mlx5_fill_page_array(&qp->buf,
- (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas));
+ mlx5_fill_page_frag_array(&qp->buf,
+ (__be64 *)MLX5_ADDR_OF(create_qp_in,
+ *in, pas));
err = mlx5_db_alloc(dev->mdev, &qp->db);
if (err) {
@@ -978,90 +1179,163 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
goto err_free;
}
- qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL);
- qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL);
- qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL);
- qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL);
- qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL);
+ qp->sq.wrid = kvmalloc_array(qp->sq.wqe_cnt,
+ sizeof(*qp->sq.wrid), GFP_KERNEL);
+ qp->sq.wr_data = kvmalloc_array(qp->sq.wqe_cnt,
+ sizeof(*qp->sq.wr_data), GFP_KERNEL);
+ qp->rq.wrid = kvmalloc_array(qp->rq.wqe_cnt,
+ sizeof(*qp->rq.wrid), GFP_KERNEL);
+ qp->sq.w_list = kvmalloc_array(qp->sq.wqe_cnt,
+ sizeof(*qp->sq.w_list), GFP_KERNEL);
+ qp->sq.wqe_head = kvmalloc_array(qp->sq.wqe_cnt,
+ sizeof(*qp->sq.wqe_head), GFP_KERNEL);
if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid ||
!qp->sq.w_list || !qp->sq.wqe_head) {
err = -ENOMEM;
goto err_wrid;
}
- qp->create_type = MLX5_QP_KERNEL;
return 0;
err_wrid:
+ kvfree(qp->sq.wqe_head);
+ kvfree(qp->sq.w_list);
+ kvfree(qp->sq.wrid);
+ kvfree(qp->sq.wr_data);
+ kvfree(qp->rq.wrid);
mlx5_db_free(dev->mdev, &qp->db);
- kfree(qp->sq.wqe_head);
- kfree(qp->sq.w_list);
- kfree(qp->sq.wrid);
- kfree(qp->sq.wr_data);
- kfree(qp->rq.wrid);
err_free:
kvfree(*in);
err_buf:
- mlx5_buf_free(dev->mdev, &qp->buf);
-
-err_uuar:
- free_uuar(&dev->mdev->priv.uuari, uuarn);
+ mlx5_frag_buf_free(dev->mdev, &qp->buf);
return err;
}
-static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
-{
- mlx5_db_free(dev->mdev, &qp->db);
- kfree(qp->sq.wqe_head);
- kfree(qp->sq.w_list);
- kfree(qp->sq.wrid);
- kfree(qp->sq.wr_data);
- kfree(qp->rq.wrid);
- mlx5_buf_free(dev->mdev, &qp->buf);
- free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn);
-}
-
static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
{
- if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
- (attr->qp_type == IB_QPT_XRC_INI))
+ if (attr->srq || (qp->type == IB_QPT_XRC_TGT) ||
+ (qp->type == MLX5_IB_QPT_DCI) || (qp->type == IB_QPT_XRC_INI))
return MLX5_SRQ_RQ;
else if (!qp->has_rq)
return MLX5_ZERO_LEN_RQ;
- else
- return MLX5_NON_ZERO_RQ;
-}
-static int is_connected(enum ib_qp_type qp_type)
-{
- if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC)
- return 1;
-
- return 0;
+ return MLX5_NON_ZERO_RQ;
}
static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
- struct mlx5_ib_sq *sq, u32 tdn)
+ struct mlx5_ib_qp *qp,
+ struct mlx5_ib_sq *sq, u32 tdn,
+ struct ib_pd *pd)
{
- u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+ MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
MLX5_SET(tisc, tisc, transport_domain, tdn);
- return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
+ if (!mlx5_ib_lag_should_assign_affinity(dev) &&
+ mlx5_lag_is_lacp_owner(dev->mdev))
+ MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
+ if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
+ MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
+
+ return mlx5_core_create_tis(dev->mdev, in, &sq->tisn);
}
static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
- struct mlx5_ib_sq *sq)
+ struct mlx5_ib_sq *sq, struct ib_pd *pd)
+{
+ mlx5_cmd_destroy_tis(dev->mdev, sq->tisn, to_mpd(pd)->uid);
+}
+
+static void destroy_flow_rule_vport_sq(struct mlx5_ib_sq *sq)
+{
+ if (sq->flow_rule)
+ mlx5_del_flow_rules(sq->flow_rule);
+ sq->flow_rule = NULL;
+}
+
+static bool fr_supported(int ts_cap)
{
- mlx5_core_destroy_tis(dev->mdev, sq->tisn);
+ return ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING ||
+ ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
+}
+
+static int get_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+ bool fr_sup, bool rt_sup)
+{
+ if (cq->private_flags & MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS) {
+ if (!rt_sup) {
+ mlx5_ib_dbg(dev,
+ "Real time TS format is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ return MLX5_TIMESTAMP_FORMAT_REAL_TIME;
+ }
+ if (cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) {
+ if (!fr_sup) {
+ mlx5_ib_dbg(dev,
+ "Free running TS format is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ return MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
+ }
+ return fr_sup ? MLX5_TIMESTAMP_FORMAT_FREE_RUNNING :
+ MLX5_TIMESTAMP_FORMAT_DEFAULT;
+}
+
+static int get_rq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *recv_cq)
+{
+ u8 ts_cap = MLX5_CAP_GEN(dev->mdev, rq_ts_format);
+
+ return get_ts_format(dev, recv_cq, fr_supported(ts_cap),
+ rt_supported(ts_cap));
+}
+
+static int get_sq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq)
+{
+ u8 ts_cap = MLX5_CAP_GEN(dev->mdev, sq_ts_format);
+
+ return get_ts_format(dev, send_cq, fr_supported(ts_cap),
+ rt_supported(ts_cap));
+}
+
+static int get_qp_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq,
+ struct mlx5_ib_cq *recv_cq)
+{
+ u8 ts_cap = MLX5_CAP_ROCE(dev->mdev, qp_ts_format);
+ bool fr_sup = fr_supported(ts_cap);
+ bool rt_sup = rt_supported(ts_cap);
+ u8 default_ts = fr_sup ? MLX5_TIMESTAMP_FORMAT_FREE_RUNNING :
+ MLX5_TIMESTAMP_FORMAT_DEFAULT;
+ int send_ts_format =
+ send_cq ? get_ts_format(dev, send_cq, fr_sup, rt_sup) :
+ default_ts;
+ int recv_ts_format =
+ recv_cq ? get_ts_format(dev, recv_cq, fr_sup, rt_sup) :
+ default_ts;
+
+ if (send_ts_format < 0 || recv_ts_format < 0)
+ return -EOPNOTSUPP;
+
+ if (send_ts_format != MLX5_TIMESTAMP_FORMAT_DEFAULT &&
+ recv_ts_format != MLX5_TIMESTAMP_FORMAT_DEFAULT &&
+ send_ts_format != recv_ts_format) {
+ mlx5_ib_dbg(
+ dev,
+ "The send ts_format does not match the receive ts_format\n");
+ return -EOPNOTSUPP;
+ }
+
+ return send_ts_format == default_ts ? recv_ts_format : send_ts_format;
}
static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+ struct ib_udata *udata,
struct mlx5_ib_sq *sq, void *qpin,
- struct ib_pd *pd)
+ struct ib_pd *pd, struct mlx5_ib_cq *cq)
{
struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
__be64 *pas;
@@ -1071,31 +1345,49 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
void *wq;
int inlen;
int err;
- int page_shift = 0;
- int npages;
- int ncont = 0;
- u32 offset = 0;
-
- err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size,
- &sq->ubuffer.umem, &npages, &page_shift,
- &ncont, &offset);
- if (err)
- return err;
+ unsigned int page_offset_quantized;
+ unsigned long page_size;
+ int ts_format;
+
+ ts_format = get_sq_ts_format(dev, cq);
+ if (ts_format < 0)
+ return ts_format;
+
+ sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr,
+ ubuffer->buf_size, 0);
+ if (IS_ERR(sq->ubuffer.umem))
+ return PTR_ERR(sq->ubuffer.umem);
+ page_size = mlx5_umem_find_best_quantized_pgoff(
+ ubuffer->umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64, &page_offset_quantized);
+ if (!page_size) {
+ err = -EINVAL;
+ goto err_umem;
+ }
- inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
- in = mlx5_vzalloc(inlen);
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
+ sizeof(u64) *
+ ib_umem_num_dma_blocks(sq->ubuffer.umem, page_size);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_umem;
}
+ MLX5_SET(create_sq_in, in, uid, to_mpd(pd)->uid);
sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+ if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe))
+ MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1);
MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, ts_format, ts_format);
MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
MLX5_SET(sqc, sqc, tis_lst_sz, 1);
MLX5_SET(sqc, sqc, tis_num_0, sq->tisn);
+ if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(dev->mdev, swp))
+ MLX5_SET(sqc, sqc, allow_swp, 1);
wq = MLX5_ADDR_OF(sqc, sqc, wq);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
@@ -1104,13 +1396,14 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
- MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- MLX5_SET(wq, wq, page_offset, offset);
+ MLX5_SET(wq, wq, log_wq_pg_sz,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(wq, wq, page_offset, page_offset_quantized);
pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
- mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0);
+ mlx5_ib_populate_pas(sq->ubuffer.umem, page_size, pas, 0);
- err = mlx5_core_create_sq_tracked(dev->mdev, in, inlen, &sq->base.mqp);
+ err = mlx5_core_create_sq_tracked(dev, in, inlen, &sq->base.mqp);
kvfree(in);
@@ -1129,71 +1422,75 @@ err_umem:
static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq)
{
- mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+ destroy_flow_rule_vport_sq(sq);
+ mlx5_core_destroy_sq_tracked(dev, &sq->base.mqp);
ib_umem_release(sq->ubuffer.umem);
}
-static int get_rq_pas_size(void *qpc)
-{
- u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
- u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
- u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size);
- u32 page_offset = MLX5_GET(qpc, qpc, page_offset);
- u32 po_quanta = 1 << (log_page_size - 6);
- u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride);
- u32 page_size = 1 << log_page_size;
- u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
- u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size;
-
- return rq_num_pas * sizeof(u64);
-}
-
static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
- struct mlx5_ib_rq *rq, void *qpin)
+ struct mlx5_ib_rq *rq, void *qpin,
+ struct ib_pd *pd, struct mlx5_ib_cq *cq)
{
struct mlx5_ib_qp *mqp = rq->base.container_mibqp;
__be64 *pas;
- __be64 *qp_pas;
void *in;
void *rqc;
void *wq;
void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
- int inlen;
+ struct ib_umem *umem = rq->base.ubuffer.umem;
+ unsigned int page_offset_quantized;
+ unsigned long page_size = 0;
+ int ts_format;
+ size_t inlen;
int err;
- u32 rq_pas_size = get_rq_pas_size(qpc);
- inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
- in = mlx5_vzalloc(inlen);
+ ts_format = get_rq_ts_format(dev, cq);
+ if (ts_format < 0)
+ return ts_format;
+
+ page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz,
+ MLX5_ADAPTER_PAGE_SHIFT,
+ page_offset, 64,
+ &page_offset_quantized);
+ if (!page_size)
+ return -EINVAL;
+
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
+ sizeof(u64) * ib_umem_num_dma_blocks(umem, page_size);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
+ MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
- MLX5_SET(rqc, rqc, vsd, 1);
+ if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING))
+ MLX5_SET(rqc, rqc, vsd, 1);
MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, ts_format, ts_format);
MLX5_SET(rqc, rqc, flush_in_error_en, 1);
MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
- if (mqp->flags & MLX5_IB_QP_CAP_SCATTER_FCS)
+ if (mqp->flags & IB_QP_CREATE_SCATTER_FCS)
MLX5_SET(rqc, rqc, scatter_fcs, 1);
wq = MLX5_ADDR_OF(rqc, rqc, wq);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
- MLX5_SET(wq, wq, end_padding_mode,
- MLX5_GET(qpc, qpc, end_padding_mode));
- MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset));
+ if (rq->flags & MLX5_IB_RQ_PCI_WRITE_END_PADDING)
+ MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
+ MLX5_SET(wq, wq, page_offset, page_offset_quantized);
MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
- MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size));
+ MLX5_SET(wq, wq, log_wq_pg_sz,
+ order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
- qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas);
- memcpy(pas, qp_pas, rq_pas_size);
+ mlx5_ib_populate_pas(umem, page_size, pas, 0);
- err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rq->base.mqp);
+ err = mlx5_core_create_rq_tracked(dev, in, inlen, &rq->base.mqp);
kvfree(in);
@@ -1203,81 +1500,151 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq)
{
- mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp);
+ mlx5_core_destroy_rq_tracked(dev, &rq->base.mqp);
+}
+
+static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq,
+ u32 qp_flags_en,
+ struct ib_pd *pd)
+{
+ if (qp_flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
+ mlx5_ib_disable_lb(dev, false, true);
+ mlx5_cmd_destroy_tir(dev->mdev, rq->tirn, to_mpd(pd)->uid);
}
static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
- struct mlx5_ib_rq *rq, u32 tdn)
+ struct mlx5_ib_rq *rq, u32 tdn,
+ u32 *qp_flags_en, struct ib_pd *pd,
+ u32 *out)
{
+ u8 lb_flag = 0;
u32 *in;
void *tirc;
int inlen;
int err;
inlen = MLX5_ST_SZ_BYTES(create_tir_in);
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
+ MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
MLX5_SET(tirc, tirc, transport_domain, tdn);
+ if (*qp_flags_en & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
+ MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
+
+ if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC)
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+
+ if (*qp_flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
+
+ if (dev->is_rep) {
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+ *qp_flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
+ }
- err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
+ MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
+ MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
+ err = mlx5_cmd_exec_inout(dev->mdev, create_tir, in, out);
+ rq->tirn = MLX5_GET(create_tir_out, out, tirn);
+ if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
+ err = mlx5_ib_enable_lb(dev, false, true);
+ if (err)
+ destroy_raw_packet_qp_tir(dev, rq, 0, pd);
+ }
kvfree(in);
return err;
}
-static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
- struct mlx5_ib_rq *rq)
-{
- mlx5_core_destroy_tir(dev->mdev, rq->tirn);
-}
-
static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
- u32 *in,
- struct ib_pd *pd)
+ u32 *in, size_t inlen, struct ib_pd *pd,
+ struct ib_udata *udata,
+ struct mlx5_ib_create_qp_resp *resp,
+ struct ib_qp_init_attr *init_attr)
{
struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
- struct ib_uobject *uobj = pd->uobject;
- struct ib_ucontext *ucontext = uobj->context;
- struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
+ struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
int err;
u32 tdn = mucontext->tdn;
+ u16 uid = to_mpd(pd)->uid;
+ u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {};
+ if (!qp->sq.wqe_cnt && !qp->rq.wqe_cnt)
+ return -EINVAL;
if (qp->sq.wqe_cnt) {
- err = create_raw_packet_qp_tis(dev, sq, tdn);
+ err = create_raw_packet_qp_tis(dev, qp, sq, tdn, pd);
if (err)
return err;
- err = create_raw_packet_qp_sq(dev, sq, in, pd);
+ err = create_raw_packet_qp_sq(dev, udata, sq, in, pd,
+ to_mcq(init_attr->send_cq));
if (err)
goto err_destroy_tis;
+ if (uid) {
+ resp->tisn = sq->tisn;
+ resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TISN;
+ resp->sqn = sq->base.mqp.qpn;
+ resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_SQN;
+ }
+
sq->base.container_mibqp = qp;
+ sq->base.mqp.event = mlx5_ib_qp_event;
}
if (qp->rq.wqe_cnt) {
rq->base.container_mibqp = qp;
- err = create_raw_packet_qp_rq(dev, rq, in);
+ if (qp->flags & IB_QP_CREATE_CVLAN_STRIPPING)
+ rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
+ if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING)
+ rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING;
+ err = create_raw_packet_qp_rq(dev, rq, in, pd,
+ to_mcq(init_attr->recv_cq));
if (err)
goto err_destroy_sq;
-
- err = create_raw_packet_qp_tir(dev, rq, tdn);
+ err = create_raw_packet_qp_tir(dev, rq, tdn, &qp->flags_en, pd,
+ out);
if (err)
goto err_destroy_rq;
+
+ if (uid) {
+ resp->rqn = rq->base.mqp.qpn;
+ resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN;
+ resp->tirn = rq->tirn;
+ resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) {
+ resp->tir_icm_addr = MLX5_GET(
+ create_tir_out, out, icm_address_31_0);
+ resp->tir_icm_addr |=
+ (u64)MLX5_GET(create_tir_out, out,
+ icm_address_39_32)
+ << 32;
+ resp->tir_icm_addr |=
+ (u64)MLX5_GET(create_tir_out, out,
+ icm_address_63_40)
+ << 40;
+ resp->comp_mask |=
+ MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR;
+ }
+ }
}
qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
rq->base.mqp.qpn;
-
return 0;
err_destroy_rq:
@@ -1287,7 +1654,7 @@ err_destroy_sq:
return err;
destroy_raw_packet_qp_sq(dev, sq);
err_destroy_tis:
- destroy_raw_packet_qp_tis(dev, sq);
+ destroy_raw_packet_qp_tis(dev, sq, pd);
return err;
}
@@ -1300,13 +1667,13 @@ static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
if (qp->rq.wqe_cnt) {
- destroy_raw_packet_qp_tir(dev, rq);
+ destroy_raw_packet_qp_tir(dev, rq, qp->flags_en, qp->ibqp.pd);
destroy_raw_packet_qp_rq(dev, rq);
}
if (qp->sq.wqe_cnt) {
destroy_raw_packet_qp_sq(dev, sq);
- destroy_raw_packet_qp_tis(dev, sq);
+ destroy_raw_packet_qp_tis(dev, sq, qp->ibqp.pd);
}
}
@@ -1324,78 +1691,74 @@ static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
static void destroy_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
- mlx5_core_destroy_tir(dev->mdev, qp->rss_qp.tirn);
-}
+ if (qp->flags_en & (MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC))
+ mlx5_ib_disable_lb(dev, false, true);
+ mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn,
+ to_mpd(qp->ibqp.pd)->uid);
+}
+
+struct mlx5_create_qp_params {
+ struct ib_udata *udata;
+ size_t inlen;
+ size_t outlen;
+ size_t ucmd_size;
+ void *ucmd;
+ u8 is_rss_raw : 1;
+ struct ib_qp_init_attr *attr;
+ u32 uidx;
+ struct mlx5_ib_create_qp_resp resp;
+};
-static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
- struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
{
- struct ib_uobject *uobj = pd->uobject;
- struct ib_ucontext *ucontext = uobj->context;
- struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
- struct mlx5_ib_create_qp_resp resp = {};
+ struct ib_qp_init_attr *init_attr = params->attr;
+ struct mlx5_ib_create_qp_rss *ucmd = params->ucmd;
+ struct ib_udata *udata = params->udata;
+ struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
int inlen;
+ int outlen;
int err;
u32 *in;
+ u32 *out;
void *tirc;
void *hfso;
u32 selected_fields = 0;
- size_t min_resp_len;
+ u32 outer_l4;
u32 tdn = mucontext->tdn;
- struct mlx5_ib_create_qp_rss ucmd = {};
- size_t required_cmd_sz;
+ u8 lb_flag = 0;
- if (init_attr->qp_type != IB_QPT_RAW_PACKET)
+ if (ucmd->comp_mask) {
+ mlx5_ib_dbg(dev, "invalid comp mask\n");
return -EOPNOTSUPP;
-
- if (init_attr->create_flags || init_attr->send_cq)
- return -EINVAL;
-
- min_resp_len = offsetof(typeof(resp), uuar_index) + sizeof(resp.uuar_index);
- if (udata->outlen < min_resp_len)
- return -EINVAL;
-
- required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1);
- if (udata->inlen < required_cmd_sz) {
- mlx5_ib_dbg(dev, "invalid inlen\n");
- return -EINVAL;
}
- if (udata->inlen > sizeof(ucmd) &&
- !ib_is_udata_cleared(udata, sizeof(ucmd),
- udata->inlen - sizeof(ucmd))) {
- mlx5_ib_dbg(dev, "inlen is not supported\n");
+ if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_INNER &&
+ !(ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)) {
+ mlx5_ib_dbg(dev, "Tunnel offloads must be set for inner RSS\n");
return -EOPNOTSUPP;
}
- if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
- mlx5_ib_dbg(dev, "copy failed\n");
- return -EFAULT;
- }
-
- if (ucmd.comp_mask) {
- mlx5_ib_dbg(dev, "invalid comp mask\n");
- return -EOPNOTSUPP;
- }
+ if (dev->is_rep)
+ qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC;
- if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved)) || ucmd.reserved1) {
- mlx5_ib_dbg(dev, "invalid reserved\n");
- return -EOPNOTSUPP;
- }
+ if (qp->flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC)
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
- err = ib_copy_to_udata(udata, &resp, min_resp_len);
- if (err) {
- mlx5_ib_dbg(dev, "copy failed\n");
- return -EINVAL;
- }
+ if (qp->flags_en & MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC)
+ lb_flag |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
inlen = MLX5_ST_SZ_BYTES(create_tir_in);
- in = mlx5_vzalloc(inlen);
+ outlen = MLX5_ST_SZ_BYTES(create_tir_out);
+ in = kvzalloc(inlen + outlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
+ out = in + MLX5_ST_SZ_DW(create_tir_in);
+ MLX5_SET(create_tir_in, in, uid, to_mpd(pd)->uid);
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
MLX5_SET(tirc, tirc, disp_type,
MLX5_TIRC_DISP_TYPE_INDIRECT);
@@ -1404,20 +1767,30 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
MLX5_SET(tirc, tirc, transport_domain, tdn);
hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
- switch (ucmd.rx_hash_function) {
+
+ if (ucmd->flags & MLX5_QP_FLAG_TUNNEL_OFFLOADS)
+ MLX5_SET(tirc, tirc, tunneled_offload_en, 1);
+
+ MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
+
+ if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_INNER)
+ hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner);
+ else
+ hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
+
+ switch (ucmd->rx_hash_function) {
case MLX5_RX_HASH_FUNC_TOEPLITZ:
{
void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key);
- if (len != ucmd.rx_key_len) {
+ if (len != ucmd->rx_key_len) {
err = -EINVAL;
goto err;
}
MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
- MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
- memcpy(rss_key, ucmd.rx_hash_key, len);
+ memcpy(rss_key, ucmd->rx_hash_key, len);
break;
}
default:
@@ -1425,7 +1798,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
goto err;
}
- if (!ucmd.rx_hash_fields_mask) {
+ if (!ucmd->rx_hash_fields_mask) {
/* special case when this TIR serves as steering entry without hashing */
if (!init_attr->rwq_ind_tbl->log_ind_tbl_size)
goto create_tir;
@@ -1433,70 +1806,109 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
goto err;
}
- if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) &&
- ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) {
+ if (((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4)) &&
+ ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))) {
err = -EINVAL;
goto err;
}
/* If none of IPV4 & IPV6 SRC/DST was set - this bit field is ignored */
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4))
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV4);
- else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
+ else if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
MLX5_L3_PROT_TYPE_IPV6);
- if (((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP)) &&
- ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))) {
+ outer_l4 = ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
+ << 0 |
+ ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
+ << 1 |
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI) << 2;
+
+ /* Check that only one l4 protocol is set */
+ if (outer_l4 & (outer_l4 - 1)) {
err = -EINVAL;
goto err;
}
/* If none of TCP & UDP SRC/DST was set - this bit field is ignored */
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP))
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_TCP);
- else if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
+ else if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
MLX5_L4_PROT_TYPE_UDP);
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV4) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_IPV6))
selected_fields |= MLX5_HASH_FIELD_SEL_SRC_IP;
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV4) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_IPV6))
selected_fields |= MLX5_HASH_FIELD_SEL_DST_IP;
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_SRC_PORT_UDP))
selected_fields |= MLX5_HASH_FIELD_SEL_L4_SPORT;
- if ((ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) ||
- (ucmd.rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
+ if ((ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_TCP) ||
+ (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_DST_PORT_UDP))
selected_fields |= MLX5_HASH_FIELD_SEL_L4_DPORT;
+ if (ucmd->rx_hash_fields_mask & MLX5_RX_HASH_IPSEC_SPI)
+ selected_fields |= MLX5_HASH_FIELD_SEL_IPSEC_SPI;
+
MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
create_tir:
- err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
+ MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
+ err = mlx5_cmd_exec_inout(dev->mdev, create_tir, in, out);
+
+ qp->rss_qp.tirn = MLX5_GET(create_tir_out, out, tirn);
+ if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
+ err = mlx5_ib_enable_lb(dev, false, true);
+
+ if (err)
+ mlx5_cmd_destroy_tir(dev->mdev, qp->rss_qp.tirn,
+ to_mpd(pd)->uid);
+ }
if (err)
goto err;
+ if (mucontext->devx_uid) {
+ params->resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN;
+ params->resp.tirn = qp->rss_qp.tirn;
+ if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
+ MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) {
+ params->resp.tir_icm_addr =
+ MLX5_GET(create_tir_out, out, icm_address_31_0);
+ params->resp.tir_icm_addr |=
+ (u64)MLX5_GET(create_tir_out, out,
+ icm_address_39_32)
+ << 32;
+ params->resp.tir_icm_addr |=
+ (u64)MLX5_GET(create_tir_out, out,
+ icm_address_63_40)
+ << 40;
+ params->resp.comp_mask |=
+ MLX5_IB_CREATE_QP_RESP_MASK_TIR_ICM_ADDR;
+ }
+ }
+
kvfree(in);
/* qpn is reserved for that QP */
qp->trans_qp.base.mqp.qpn = 0;
- qp->flags |= MLX5_IB_QP_RSS;
+ qp->is_rss = true;
return 0;
err:
@@ -1504,235 +1916,455 @@ err:
return err;
}
-static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata, struct mlx5_ib_qp *qp)
+static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
+ struct ib_qp_init_attr *init_attr,
+ void *qpc)
+{
+ int scqe_sz;
+ bool allow_scat_cqe = false;
+
+ allow_scat_cqe = qp->flags_en & MLX5_QP_FLAG_ALLOW_SCATTER_CQE;
+
+ if (!allow_scat_cqe && init_attr->sq_sig_type != IB_SIGNAL_ALL_WR)
+ return;
+
+ scqe_sz = mlx5_ib_get_cqe_size(init_attr->send_cq);
+ if (scqe_sz == 128) {
+ MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE);
+ return;
+ }
+
+ if (init_attr->qp_type != MLX5_IB_QPT_DCI ||
+ MLX5_CAP_GEN(dev->mdev, dc_req_scat_data_cqe))
+ MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
+}
+
+static int atomic_size_to_mode(int size_mask)
+{
+ /* driver does not support atomic_size > 256B
+ * and does not know how to translate bigger sizes
+ */
+ int supported_size_mask = size_mask & 0x1ff;
+ int log_max_size;
+
+ if (!supported_size_mask)
+ return -EOPNOTSUPP;
+
+ log_max_size = __fls(supported_size_mask);
+
+ if (log_max_size > 3)
+ return log_max_size;
+
+ return MLX5_ATOMIC_MODE_8B;
+}
+
+static int get_atomic_mode(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp)
+{
+ u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
+ u8 atomic = MLX5_CAP_GEN(dev->mdev, atomic);
+ int atomic_mode = -EOPNOTSUPP;
+ int atomic_size_mask;
+
+ if (!atomic)
+ return -EOPNOTSUPP;
+
+ if (qp->type == MLX5_IB_QPT_DCT)
+ atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
+ else
+ atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
+
+ if ((atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_CMP_SWAP) ||
+ (atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_FETCH_ADD))
+ atomic_mode = atomic_size_to_mode(atomic_size_mask);
+
+ if (atomic_mode <= 0 &&
+ (atomic_operations & MLX5_ATOMIC_OPS_CMP_SWAP &&
+ atomic_operations & MLX5_ATOMIC_OPS_FETCH_ADD))
+ atomic_mode = MLX5_ATOMIC_MODE_IB_COMP;
+
+ /* OOO DP QPs do not support larger than 8-Bytes atomic operations */
+ if (atomic_mode > MLX5_ATOMIC_MODE_8B && qp->is_ooo_rq)
+ atomic_mode = MLX5_ATOMIC_MODE_8B;
+
+ return atomic_mode;
+}
+
+static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
{
+ struct ib_qp_init_attr *attr = params->attr;
+ u32 uidx = params->uidx;
+ struct mlx5_ib_resources *devr = &dev->devr;
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+ int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_qp_base *base;
+ unsigned long flags;
+ void *qpc;
+ u32 *in;
+ int err;
+
+ if (attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_XRC);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, pd, to_mpd(devr->p0)->pdn);
+
+ if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+ MLX5_SET(qpc, qpc, block_lb_mc, 1);
+ if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
+ MLX5_SET(qpc, qpc, cd_master, 1);
+ if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
+ MLX5_SET(qpc, qpc, cd_slave_send, 1);
+ if (qp->flags & IB_QP_CREATE_MANAGED_RECV)
+ MLX5_SET(qpc, qpc, cd_slave_receive, 1);
+
+ MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(dev->mdev));
+ MLX5_SET(qpc, qpc, rq_type, MLX5_SRQ_RQ);
+ MLX5_SET(qpc, qpc, no_sq, 1);
+ MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
+ MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
+ MLX5_SET(qpc, qpc, xrcd, to_mxrcd(attr->xrcd)->xrcdn);
+ MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
+
+ /* 0xffffff means we ask to work with cqe version 0 */
+ if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
+ MLX5_SET(qpc, qpc, user_index, uidx);
+
+ if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) {
+ MLX5_SET(qpc, qpc, end_padding_mode,
+ MLX5_WQ_END_PAD_MODE_ALIGN);
+ /* Special case to clean flag */
+ qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
+ }
+
+ base = &qp->trans_qp.base;
+ err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
+ kvfree(in);
+ if (err)
+ return err;
+
+ base->container_mibqp = qp;
+ base->mqp.event = mlx5_ib_qp_event;
+ if (MLX5_CAP_GEN(mdev, ece_support))
+ params->resp.ece_options = MLX5_GET(create_qp_out, out, ece);
+
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ list_add_tail(&qp->qps_list, &dev->qp_list);
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+
+ qp->trans_qp.xrcdn = to_mxrcd(attr->xrcd)->xrcdn;
+ return 0;
+}
+
+static int create_dci(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
+{
+ struct ib_qp_init_attr *init_attr = params->attr;
+ struct mlx5_ib_create_qp *ucmd = params->ucmd;
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+ struct ib_udata *udata = params->udata;
+ u32 uidx = params->uidx;
struct mlx5_ib_resources *devr = &dev->devr;
int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
struct mlx5_core_dev *mdev = dev->mdev;
- struct mlx5_ib_create_qp_resp resp;
struct mlx5_ib_cq *send_cq;
struct mlx5_ib_cq *recv_cq;
unsigned long flags;
- u32 uidx = MLX5_IB_DEFAULT_UIDX;
- struct mlx5_ib_create_qp ucmd;
struct mlx5_ib_qp_base *base;
+ int ts_format;
+ int mlx5_st;
void *qpc;
u32 *in;
int err;
- base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
- &qp->raw_packet_qp.rq.base :
- &qp->trans_qp.base;
-
- if (init_attr->qp_type != IB_QPT_RAW_PACKET)
- mlx5_ib_odp_create_qp(qp);
-
- mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
- if (init_attr->rwq_ind_tbl) {
- if (!udata)
- return -ENOSYS;
+ mlx5_st = to_mlx5_st(qp->type);
+ if (mlx5_st < 0)
+ return -EINVAL;
+
+ if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+
+ base = &qp->trans_qp.base;
- err = create_rss_raw_qp_tir(dev, qp, pd, init_attr, udata);
+ qp->has_rq = qp_has_rq(init_attr);
+ err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, ucmd);
+ if (err) {
+ mlx5_ib_dbg(dev, "err %d\n", err);
return err;
}
- if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
- if (!MLX5_CAP_GEN(mdev, block_lb_mc)) {
- mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
- return -EINVAL;
- } else {
- qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
- }
- }
+ if (ucmd->rq_wqe_shift != qp->rq.wqe_shift ||
+ ucmd->rq_wqe_count != qp->rq.wqe_cnt)
+ return -EINVAL;
- if (init_attr->create_flags &
- (IB_QP_CREATE_CROSS_CHANNEL |
- IB_QP_CREATE_MANAGED_SEND |
- IB_QP_CREATE_MANAGED_RECV)) {
- if (!MLX5_CAP_GEN(mdev, cd)) {
- mlx5_ib_dbg(dev, "cross-channel isn't supported\n");
- return -EINVAL;
- }
- if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL)
- qp->flags |= MLX5_IB_QP_CROSS_CHANNEL;
- if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND)
- qp->flags |= MLX5_IB_QP_MANAGED_SEND;
- if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
- qp->flags |= MLX5_IB_QP_MANAGED_RECV;
- }
-
- if (init_attr->qp_type == IB_QPT_UD &&
- (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO))
- if (!MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
- mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n");
- return -EOPNOTSUPP;
- }
+ if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz)))
+ return -EINVAL;
- if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) {
- if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
- mlx5_ib_dbg(dev, "Scatter FCS is supported only for Raw Packet QPs");
- return -EOPNOTSUPP;
- }
- if (!MLX5_CAP_GEN(dev->mdev, eth_net_offloads) ||
- !MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
- mlx5_ib_dbg(dev, "Scatter FCS isn't supported\n");
- return -EOPNOTSUPP;
- }
- qp->flags |= MLX5_IB_QP_CAP_SCATTER_FCS;
+ ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq));
+
+ if (ts_format < 0)
+ return ts_format;
+
+ err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, &params->resp,
+ &inlen, base, ucmd);
+ if (err)
+ return err;
+
+ if (MLX5_CAP_GEN(mdev, ece_support))
+ MLX5_SET(create_qp_in, in, ece, ucmd->ece_options);
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+
+ MLX5_SET(qpc, qpc, st, mlx5_st);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, pd, to_mpd(pd)->pdn);
+
+ if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
+ MLX5_SET(qpc, qpc, wq_signature, 1);
+
+ if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
+ MLX5_SET(qpc, qpc, cd_master, 1);
+ if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
+ MLX5_SET(qpc, qpc, cd_slave_send, 1);
+ if (qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE)
+ configure_requester_scat_cqe(dev, qp, init_attr, qpc);
+
+ if (qp->rq.wqe_cnt) {
+ MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
+ MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
}
- if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
- qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+ if (qp->flags_en & MLX5_QP_FLAG_DCI_STREAM) {
+ MLX5_SET(qpc, qpc, log_num_dci_stream_channels,
+ ucmd->dci_streams.log_num_concurent);
+ MLX5_SET(qpc, qpc, log_num_dci_errored_streams,
+ ucmd->dci_streams.log_num_errored);
+ }
- if (pd && pd->uobject) {
- if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
- mlx5_ib_dbg(dev, "copy failed\n");
- return -EFAULT;
- }
+ MLX5_SET(qpc, qpc, ts_format, ts_format);
+ MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
- err = get_qp_user_index(to_mucontext(pd->uobject->context),
- &ucmd, udata->inlen, &uidx);
- if (err)
- return err;
+ MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
- qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
- qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
+ /* Set default resources */
+ if (init_attr->srq) {
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
+ to_msrq(init_attr->srq)->msrq.srqn);
} else {
- qp->wq_sig = !!wq_signature;
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
+ to_msrq(devr->s1)->msrq.srqn);
+ }
+
+ if (init_attr->send_cq)
+ MLX5_SET(qpc, qpc, cqn_snd,
+ to_mcq(init_attr->send_cq)->mcq.cqn);
+
+ if (init_attr->recv_cq)
+ MLX5_SET(qpc, qpc, cqn_rcv,
+ to_mcq(init_attr->recv_cq)->mcq.cqn);
+
+ MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
+
+ /* 0xffffff means we ask to work with cqe version 0 */
+ if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
+ MLX5_SET(qpc, qpc, user_index, uidx);
+
+ if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) {
+ MLX5_SET(qpc, qpc, end_padding_mode,
+ MLX5_WQ_END_PAD_MODE_ALIGN);
+ /* Special case to clean flag */
+ qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
}
+ err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
+
+ kvfree(in);
+ if (err)
+ goto err_create;
+
+ base->container_mibqp = qp;
+ base->mqp.event = mlx5_ib_qp_event;
+ if (MLX5_CAP_GEN(mdev, ece_support))
+ params->resp.ece_options = MLX5_GET(create_qp_out, out, ece);
+
+ get_cqs(qp->type, init_attr->send_cq, init_attr->recv_cq,
+ &send_cq, &recv_cq);
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ mlx5_ib_lock_cqs(send_cq, recv_cq);
+ /* Maintain device to QPs access, needed for further handling via reset
+ * flow
+ */
+ list_add_tail(&qp->qps_list, &dev->qp_list);
+ /* Maintain CQ to QPs access, needed for further handling via reset flow
+ */
+ if (send_cq)
+ list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
+ if (recv_cq)
+ list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
+ mlx5_ib_unlock_cqs(send_cq, recv_cq);
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+
+ return 0;
+
+err_create:
+ destroy_qp(dev, qp, base, udata);
+ return err;
+}
+
+static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
+{
+ struct ib_qp_init_attr *init_attr = params->attr;
+ struct mlx5_ib_create_qp *ucmd = params->ucmd;
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+ struct ib_udata *udata = params->udata;
+ u32 uidx = params->uidx;
+ struct mlx5_ib_resources *devr = &dev->devr;
+ int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_cq *send_cq;
+ struct mlx5_ib_cq *recv_cq;
+ unsigned long flags;
+ struct mlx5_ib_qp_base *base;
+ int ts_format;
+ int mlx5_st;
+ void *qpc;
+ u32 *in;
+ int err;
+
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
+
+ mlx5_st = to_mlx5_st(qp->type);
+ if (mlx5_st < 0)
+ return -EINVAL;
+
+ if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+
+ if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
+ qp->underlay_qpn = init_attr->source_qpn;
+
+ base = (init_attr->qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
+
qp->has_rq = qp_has_rq(init_attr);
- err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
- qp, (pd && pd->uobject) ? &ucmd : NULL);
+ err = set_rq_size(dev, &init_attr->cap, qp->has_rq, qp, ucmd);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
return err;
}
- if (pd) {
- if (pd->uobject) {
- __u32 max_wqes =
- 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
- mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
- if (ucmd.rq_wqe_shift != qp->rq.wqe_shift ||
- ucmd.rq_wqe_count != qp->rq.wqe_cnt) {
- mlx5_ib_dbg(dev, "invalid rq params\n");
- return -EINVAL;
- }
- if (ucmd.sq_wqe_count > max_wqes) {
- mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
- ucmd.sq_wqe_count, max_wqes);
- return -EINVAL;
- }
- if (init_attr->create_flags &
- mlx5_ib_create_qp_sqpn_qp1()) {
- mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n");
- return -EINVAL;
- }
- err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
- &resp, &inlen, base);
- if (err)
- mlx5_ib_dbg(dev, "err %d\n", err);
- } else {
- err = create_kernel_qp(dev, init_attr, qp, &in, &inlen,
- base);
- if (err)
- mlx5_ib_dbg(dev, "err %d\n", err);
- }
+ if (ucmd->rq_wqe_shift != qp->rq.wqe_shift ||
+ ucmd->rq_wqe_count != qp->rq.wqe_cnt)
+ return -EINVAL;
- if (err)
- return err;
- } else {
- in = mlx5_vzalloc(inlen);
- if (!in)
- return -ENOMEM;
+ if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz)))
+ return -EINVAL;
- qp->create_type = MLX5_QP_EMPTY;
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq),
+ to_mcq(init_attr->recv_cq));
+ if (ts_format < 0)
+ return ts_format;
}
+ err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, &params->resp,
+ &inlen, base, ucmd);
+ if (err)
+ return err;
+
if (is_sqp(init_attr->qp_type))
qp->port = init_attr->port_num;
+ if (MLX5_CAP_GEN(mdev, ece_support))
+ MLX5_SET(create_qp_in, in, ece, ucmd->ece_options);
qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
- MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type));
+ MLX5_SET(qpc, qpc, st, mlx5_st);
MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, pd, to_mpd(pd)->pdn);
- if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
- MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn);
- else
- MLX5_SET(qpc, qpc, latency_sensitive, 1);
-
-
- if (qp->wq_sig)
+ if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
MLX5_SET(qpc, qpc, wq_signature, 1);
- if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
+ if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
MLX5_SET(qpc, qpc, block_lb_mc, 1);
- if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ if (qp->flags & IB_QP_CREATE_CROSS_CHANNEL)
MLX5_SET(qpc, qpc, cd_master, 1);
- if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+ if (qp->flags & IB_QP_CREATE_MANAGED_SEND)
MLX5_SET(qpc, qpc, cd_slave_send, 1);
- if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+ if (qp->flags & IB_QP_CREATE_MANAGED_RECV)
MLX5_SET(qpc, qpc, cd_slave_receive, 1);
+ if (qp->flags_en & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE)
+ MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1);
+ if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) &&
+ (init_attr->qp_type == IB_QPT_RC ||
+ init_attr->qp_type == IB_QPT_UC)) {
+ int rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
- if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
- int rcqe_sz;
- int scqe_sz;
-
- rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq);
- scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
-
- if (rcqe_sz == 128)
- MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
- else
- MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
-
- if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) {
- if (scqe_sz == 128)
- MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE);
- else
- MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
- }
+ MLX5_SET(qpc, qpc, cs_res,
+ rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE :
+ MLX5_RES_SCAT_DATA32_CQE);
}
+ if ((qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) &&
+ (qp->type == MLX5_IB_QPT_DCI || qp->type == IB_QPT_RC))
+ configure_requester_scat_cqe(dev, qp, init_attr, qpc);
if (qp->rq.wqe_cnt) {
MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
}
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET)
+ MLX5_SET(qpc, qpc, ts_format, ts_format);
+
MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
- if (qp->sq.wqe_cnt)
+ if (qp->sq.wqe_cnt) {
MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
- else
+ } else {
MLX5_SET(qpc, qpc, no_sq, 1);
+ if (init_attr->srq &&
+ init_attr->srq->srq_type == IB_SRQT_TM)
+ MLX5_SET(qpc, qpc, offload_type,
+ MLX5_QPC_OFFLOAD_TYPE_RNDV);
+ }
/* Set default resources */
switch (init_attr->qp_type) {
- case IB_QPT_XRC_TGT:
- MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
- MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn);
- MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(init_attr->xrcd)->xrcdn);
- break;
case IB_QPT_XRC_INI:
MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
break;
default:
if (init_attr->srq) {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn);
} else {
- MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn);
}
}
@@ -1749,32 +2381,166 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
MLX5_SET(qpc, qpc, user_index, uidx);
- /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
- if (init_attr->qp_type == IB_QPT_UD &&
- (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) {
- MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
- qp->flags |= MLX5_IB_QP_LSO;
+ if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING &&
+ init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ MLX5_SET(qpc, qpc, end_padding_mode,
+ MLX5_WQ_END_PAD_MODE_ALIGN);
+ /* Special case to clean flag */
+ qp->flags &= ~IB_QP_CREATE_PCI_WRITE_END_PADDING;
}
- if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
- qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) {
+ qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr;
raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
- err = create_raw_packet_qp(dev, qp, in, pd);
- } else {
- err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
- }
+ err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata,
+ &params->resp, init_attr);
+ } else
+ err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
- if (err) {
- mlx5_ib_dbg(dev, "create qp failed\n");
+ kvfree(in);
+ if (err)
goto err_create;
+
+ base->container_mibqp = qp;
+ base->mqp.event = mlx5_ib_qp_event;
+ if (MLX5_CAP_GEN(mdev, ece_support))
+ params->resp.ece_options = MLX5_GET(create_qp_out, out, ece);
+
+ get_cqs(qp->type, init_attr->send_cq, init_attr->recv_cq,
+ &send_cq, &recv_cq);
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ mlx5_ib_lock_cqs(send_cq, recv_cq);
+ /* Maintain device to QPs access, needed for further handling via reset
+ * flow
+ */
+ list_add_tail(&qp->qps_list, &dev->qp_list);
+ /* Maintain CQ to QPs access, needed for further handling via reset flow
+ */
+ if (send_cq)
+ list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
+ if (recv_cq)
+ list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
+ mlx5_ib_unlock_cqs(send_cq, recv_cq);
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+
+ return 0;
+
+err_create:
+ destroy_qp(dev, qp, base, udata);
+ return err;
+}
+
+static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
+{
+ struct ib_qp_init_attr *attr = params->attr;
+ u32 uidx = params->uidx;
+ struct mlx5_ib_resources *devr = &dev->devr;
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+ int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_cq *send_cq;
+ struct mlx5_ib_cq *recv_cq;
+ unsigned long flags;
+ struct mlx5_ib_qp_base *base;
+ int mlx5_st;
+ void *qpc;
+ u32 *in;
+ int err;
+
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
+
+ mlx5_st = to_mlx5_st(qp->type);
+ if (mlx5_st < 0)
+ return -EINVAL;
+
+ if (attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+ qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+
+ base = &qp->trans_qp.base;
+
+ qp->has_rq = qp_has_rq(attr);
+ err = set_rq_size(dev, &attr->cap, qp->has_rq, qp, NULL);
+ if (err) {
+ mlx5_ib_dbg(dev, "err %d\n", err);
+ return err;
+ }
+
+ err = _create_kernel_qp(dev, attr, qp, &in, &inlen, base);
+ if (err)
+ return err;
+
+ if (is_sqp(attr->qp_type))
+ qp->port = attr->port_num;
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+
+ MLX5_SET(qpc, qpc, st, mlx5_st);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+
+ if (attr->qp_type != MLX5_IB_QPT_REG_UMR)
+ MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn);
+ else
+ MLX5_SET(qpc, qpc, latency_sensitive, 1);
+
+
+ if (qp->flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+ MLX5_SET(qpc, qpc, block_lb_mc, 1);
+
+ if (qp->rq.wqe_cnt) {
+ MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
+ MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
}
+ MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, attr));
+
+ if (qp->sq.wqe_cnt)
+ MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
+ else
+ MLX5_SET(qpc, qpc, no_sq, 1);
+
+ if (attr->srq) {
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn0);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
+ to_msrq(attr->srq)->msrq.srqn);
+ } else {
+ MLX5_SET(qpc, qpc, xrcd, devr->xrcdn1);
+ MLX5_SET(qpc, qpc, srqn_rmpn_xrqn,
+ to_msrq(devr->s1)->msrq.srqn);
+ }
+
+ if (attr->send_cq)
+ MLX5_SET(qpc, qpc, cqn_snd, to_mcq(attr->send_cq)->mcq.cqn);
+
+ if (attr->recv_cq)
+ MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(attr->recv_cq)->mcq.cqn);
+
+ MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
+
+ /* 0xffffff means we ask to work with cqe version 0 */
+ if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
+ MLX5_SET(qpc, qpc, user_index, uidx);
+
+ /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
+ if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO)
+ MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
+
+ if (qp->flags & IB_QP_CREATE_INTEGRITY_EN &&
+ MLX5_CAP_GEN(mdev, go_back_n))
+ MLX5_SET(qpc, qpc, retry_mode, MLX5_QP_RM_GO_BACK_N);
+
+ err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out);
kvfree(in);
+ if (err)
+ goto err_create;
base->container_mibqp = qp;
base->mqp.event = mlx5_ib_qp_event;
- get_cqs(init_attr->qp_type, init_attr->send_cq, init_attr->recv_cq,
+ get_cqs(qp->type, attr->send_cq, attr->recv_cq,
&send_cq, &recv_cq);
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx5_ib_lock_cqs(send_cq, recv_cq);
@@ -1794,12 +2560,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return 0;
err_create:
- if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(pd, qp, base);
- else if (qp->create_type == MLX5_QP_KERNEL)
- destroy_qp_kernel(dev, qp);
-
- kvfree(in);
+ destroy_qp(dev, qp, base, NULL);
return err;
}
@@ -1861,11 +2622,6 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re
}
}
-static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
-{
- return to_mpd(qp->ibqp.pd);
-}
-
static void get_cqs(enum ib_qp_type qp_type,
struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq,
struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
@@ -1886,14 +2642,10 @@ static void get_cqs(enum ib_qp_type qp_type,
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
- case IB_QPT_RAW_IPV6:
- case IB_QPT_RAW_ETHERTYPE:
case IB_QPT_RAW_PACKET:
*send_cq = ib_send_cq ? to_mcq(ib_send_cq) : NULL;
*recv_cq = ib_recv_cq ? to_mcq(ib_recv_cq) : NULL;
break;
-
- case IB_QPT_MAX:
default:
*send_cq = NULL;
*recv_cq = NULL;
@@ -1905,28 +2657,29 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
const struct mlx5_modify_raw_qp_param *raw_qp_param,
u8 lag_tx_affinity);
-static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
+static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct ib_udata *udata)
{
struct mlx5_ib_cq *send_cq, *recv_cq;
- struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
+ struct mlx5_ib_qp_base *base;
unsigned long flags;
int err;
- if (qp->ibqp.rwq_ind_tbl) {
+ if (qp->is_rss) {
destroy_rss_raw_qp_tir(dev, qp);
return;
}
- base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
- &qp->raw_packet_qp.rq.base :
- &qp->trans_qp.base;
+ base = (qp->type == IB_QPT_RAW_PACKET ||
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
if (qp->state != IB_QPS_RESET) {
- if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
- mlx5_ib_qp_disable_pagefaults(qp);
- err = mlx5_core_qp_modify(dev->mdev,
- MLX5_CMD_OP_2RST_QP, 0,
- NULL, &base->mqp);
+ if (qp->type != IB_QPT_RAW_PACKET &&
+ !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) {
+ err = mlx5_core_qp_modify(dev, MLX5_CMD_OP_2RST_QP, 0,
+ NULL, &base->mqp, NULL);
} else {
struct mlx5_modify_raw_qp_param raw_qp_param = {
.operation = MLX5_CMD_OP_2RST_QP
@@ -1939,8 +2692,8 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
base->mqp.qpn);
}
- get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
- &send_cq, &recv_cq);
+ get_cqs(qp->type, qp->ibqp.send_cq, qp->ibqp.recv_cq, &send_cq,
+ &recv_cq);
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx5_ib_lock_cqs(send_cq, recv_cq);
@@ -1952,7 +2705,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
if (recv_cq)
list_del(&qp->cq_recv_list);
- if (qp->create_type == MLX5_QP_KERNEL) {
+ if (!udata) {
__mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
if (send_cq != recv_cq)
@@ -1962,167 +2715,662 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
mlx5_ib_unlock_cqs(send_cq, recv_cq);
spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ if (qp->type == IB_QPT_RAW_PACKET ||
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) {
destroy_raw_packet_qp(dev, qp);
} else {
- err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
+ err = mlx5_core_destroy_qp(dev, &base->mqp);
if (err)
mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
base->mqp.qpn);
}
- if (qp->create_type == MLX5_QP_KERNEL)
- destroy_qp_kernel(dev, qp);
- else if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(&get_pd(qp)->ibpd, qp, base);
+ destroy_qp(dev, qp, base, udata);
}
-static const char *ib_qp_type_str(enum ib_qp_type type)
+static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
{
- switch (type) {
- case IB_QPT_SMI:
- return "IB_QPT_SMI";
- case IB_QPT_GSI:
- return "IB_QPT_GSI";
+ struct ib_qp_init_attr *attr = params->attr;
+ struct mlx5_ib_create_qp *ucmd = params->ucmd;
+ u32 uidx = params->uidx;
+ void *dctc;
+
+ if (mlx5_lag_is_active(dev->mdev) && !MLX5_CAP_GEN(dev->mdev, lag_dct))
+ return -EOPNOTSUPP;
+
+ qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL);
+ if (!qp->dct.in)
+ return -ENOMEM;
+
+ MLX5_SET(create_dct_in, qp->dct.in, uid, to_mpd(pd)->uid);
+ dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
+ MLX5_SET(dctc, dctc, pd, to_mpd(pd)->pdn);
+ MLX5_SET(dctc, dctc, srqn_xrqn, to_msrq(attr->srq)->msrq.srqn);
+ MLX5_SET(dctc, dctc, cqn, to_mcq(attr->recv_cq)->mcq.cqn);
+ MLX5_SET64(dctc, dctc, dc_access_key, ucmd->access_key);
+ MLX5_SET(dctc, dctc, user_index, uidx);
+ if (MLX5_CAP_GEN(dev->mdev, ece_support))
+ MLX5_SET(dctc, dctc, ece, ucmd->ece_options);
+
+ if (qp->flags_en & MLX5_QP_FLAG_SCATTER_CQE) {
+ int rcqe_sz = mlx5_ib_get_cqe_size(attr->recv_cq);
+
+ if (rcqe_sz == 128)
+ MLX5_SET(dctc, dctc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
+ }
+
+ qp->state = IB_QPS_RESET;
+ return 0;
+}
+
+static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
+ enum ib_qp_type *type)
+{
+ if (attr->qp_type == IB_QPT_DRIVER && !MLX5_CAP_GEN(dev->mdev, dct))
+ goto out;
+
+ switch (attr->qp_type) {
+ case IB_QPT_XRC_TGT:
+ case IB_QPT_XRC_INI:
+ if (!MLX5_CAP_GEN(dev->mdev, xrc))
+ goto out;
+ fallthrough;
case IB_QPT_RC:
- return "IB_QPT_RC";
case IB_QPT_UC:
- return "IB_QPT_UC";
- case IB_QPT_UD:
- return "IB_QPT_UD";
- case IB_QPT_RAW_IPV6:
- return "IB_QPT_RAW_IPV6";
- case IB_QPT_RAW_ETHERTYPE:
- return "IB_QPT_RAW_ETHERTYPE";
- case IB_QPT_XRC_INI:
- return "IB_QPT_XRC_INI";
- case IB_QPT_XRC_TGT:
- return "IB_QPT_XRC_TGT";
+ case IB_QPT_SMI:
+ case MLX5_IB_QPT_HW_GSI:
+ case IB_QPT_DRIVER:
+ case IB_QPT_GSI:
case IB_QPT_RAW_PACKET:
- return "IB_QPT_RAW_PACKET";
+ case IB_QPT_UD:
case MLX5_IB_QPT_REG_UMR:
- return "MLX5_IB_QPT_REG_UMR";
- case IB_QPT_MAX:
+ break;
default:
- return "Invalid QP type";
+ goto out;
}
+
+ *type = attr->qp_type;
+ return 0;
+
+out:
+ mlx5_ib_dbg(dev, "Unsupported QP type %d\n", attr->qp_type);
+ return -EOPNOTSUPP;
}
-struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev;
- struct mlx5_ib_qp *qp;
- u16 xrcdn = 0;
- int err;
+ struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
- if (pd) {
- dev = to_mdev(pd->device);
+ if (!udata) {
+ /* Kernel create_qp callers */
+ if (attr->rwq_ind_tbl)
+ return -EOPNOTSUPP;
- if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
- if (!pd->uobject) {
- mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
- return ERR_PTR(-EINVAL);
- } else if (!to_mucontext(pd->uobject->context)->cqe_version) {
- mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n");
- return ERR_PTR(-EINVAL);
- }
- }
- } else {
- /* being cautious here */
- if (init_attr->qp_type != IB_QPT_XRC_TGT &&
- init_attr->qp_type != MLX5_IB_QPT_REG_UMR) {
- pr_warn("%s: no PD for transport %s\n", __func__,
- ib_qp_type_str(init_attr->qp_type));
- return ERR_PTR(-EINVAL);
+ switch (attr->qp_type) {
+ case IB_QPT_RAW_PACKET:
+ case IB_QPT_DRIVER:
+ return -EOPNOTSUPP;
+ default:
+ return 0;
}
- dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
}
- switch (init_attr->qp_type) {
- case IB_QPT_XRC_TGT:
- case IB_QPT_XRC_INI:
- if (!MLX5_CAP_GEN(dev->mdev, xrc)) {
- mlx5_ib_dbg(dev, "XRC not supported\n");
- return ERR_PTR(-ENOSYS);
- }
- init_attr->recv_cq = NULL;
- if (init_attr->qp_type == IB_QPT_XRC_TGT) {
- xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
- init_attr->send_cq = NULL;
- }
+ /* Userspace create_qp callers */
+ if (attr->qp_type == IB_QPT_RAW_PACKET && !ucontext->cqe_version) {
+ mlx5_ib_dbg(dev,
+ "Raw Packet QP is only supported for CQE version > 0\n");
+ return -EINVAL;
+ }
- /* fall through */
- case IB_QPT_RAW_PACKET:
+ if (attr->qp_type != IB_QPT_RAW_PACKET && attr->rwq_ind_tbl) {
+ mlx5_ib_dbg(dev,
+ "Wrong QP type %d for the RWQ indirect table\n",
+ attr->qp_type);
+ return -EINVAL;
+ }
+
+ /*
+ * We don't need to see this warning, it means that kernel code
+ * missing ib_pd. Placed here to catch developer's mistakes.
+ */
+ WARN_ONCE(!pd && attr->qp_type != IB_QPT_XRC_TGT,
+ "There is a missing PD pointer assignment\n");
+ return 0;
+}
+
+static bool get_dp_ooo_cap(struct mlx5_core_dev *mdev, enum ib_qp_type qp_type)
+{
+ if (!MLX5_CAP_GEN_2(mdev, dp_ordering_force))
+ return false;
+
+ switch (qp_type) {
case IB_QPT_RC:
+ return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_rc);
+ case IB_QPT_XRC_INI:
+ case IB_QPT_XRC_TGT:
+ return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_xrc);
case IB_QPT_UC:
+ return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_uc);
case IB_QPT_UD:
- case IB_QPT_SMI:
+ return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_ud);
+ case MLX5_IB_QPT_DCI:
+ case MLX5_IB_QPT_DCT:
+ return MLX5_CAP_GEN(mdev, dp_ordering_ooo_all_dc);
+ default:
+ return false;
+ }
+}
+
+static void process_vendor_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
+ bool cond, struct mlx5_ib_qp *qp)
+{
+ if (!(*flags & flag))
+ return;
+
+ if (cond) {
+ qp->flags_en |= flag;
+ *flags &= ~flag;
+ return;
+ }
+
+ switch (flag) {
+ case MLX5_QP_FLAG_SCATTER_CQE:
+ case MLX5_QP_FLAG_ALLOW_SCATTER_CQE:
+ /*
+ * We don't return error if these flags were provided,
+ * and mlx5 doesn't have right capability.
+ */
+ *flags &= ~(MLX5_QP_FLAG_SCATTER_CQE |
+ MLX5_QP_FLAG_ALLOW_SCATTER_CQE);
+ return;
+ default:
+ break;
+ }
+ mlx5_ib_dbg(dev, "Vendor create QP flag 0x%X is not supported\n", flag);
+}
+
+static int process_vendor_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ void *ucmd, struct ib_qp_init_attr *attr)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+ bool cond;
+ int flags;
+
+ if (attr->rwq_ind_tbl)
+ flags = ((struct mlx5_ib_create_qp_rss *)ucmd)->flags;
+ else
+ flags = ((struct mlx5_ib_create_qp *)ucmd)->flags;
+
+ switch (flags & (MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI)) {
+ case MLX5_QP_FLAG_TYPE_DCI:
+ qp->type = MLX5_IB_QPT_DCI;
+ break;
+ case MLX5_QP_FLAG_TYPE_DCT:
+ qp->type = MLX5_IB_QPT_DCT;
+ break;
+ default:
+ if (qp->type != IB_QPT_DRIVER)
+ break;
+ /*
+ * It is IB_QPT_DRIVER and or no subtype or
+ * wrong subtype were provided.
+ */
+ return -EINVAL;
+ }
+
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCI, true, qp);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TYPE_DCT, true, qp);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_DCI_STREAM,
+ MLX5_CAP_GEN(mdev, log_max_dci_stream_channels),
+ qp);
+
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SIGNATURE, true, qp);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_SCATTER_CQE,
+ MLX5_CAP_GEN(mdev, sctr_data_cqe), qp);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_ALLOW_SCATTER_CQE,
+ MLX5_CAP_GEN(mdev, sctr_data_cqe), qp);
+
+ if (qp->type == IB_QPT_RAW_PACKET) {
+ cond = MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) ||
+ MLX5_CAP_ETH(mdev, tunnel_stateless_gre) ||
+ MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_TUNNEL_OFFLOADS,
+ cond, qp);
+ process_vendor_flag(dev, &flags,
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC, true,
+ qp);
+ process_vendor_flag(dev, &flags,
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC, true,
+ qp);
+ }
+
+ if (qp->type == IB_QPT_RC)
+ process_vendor_flag(dev, &flags,
+ MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE,
+ MLX5_CAP_GEN(mdev, qp_packet_based), qp);
+
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_BFREG_INDEX, true, qp);
+ process_vendor_flag(dev, &flags, MLX5_QP_FLAG_UAR_PAGE_INDEX, true, qp);
+
+ cond = qp->flags_en & ~(MLX5_QP_FLAG_TUNNEL_OFFLOADS |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_UC |
+ MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC);
+ if (attr->rwq_ind_tbl && cond) {
+ mlx5_ib_dbg(dev, "RSS RAW QP has unsupported flags 0x%X\n",
+ cond);
+ return -EINVAL;
+ }
+
+ if (flags)
+ mlx5_ib_dbg(dev, "udata has unsupported flags 0x%X\n", flags);
+
+ return (flags) ? -EINVAL : 0;
+ }
+
+static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
+ bool cond, struct mlx5_ib_qp *qp)
+{
+ if (!(*flags & flag))
+ return;
+
+ if (cond) {
+ qp->flags |= flag;
+ *flags &= ~flag;
+ return;
+ }
+
+ mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag);
+}
+
+static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct ib_qp_init_attr *attr)
+{
+ enum ib_qp_type qp_type = qp->type;
+ struct mlx5_core_dev *mdev = dev->mdev;
+ int create_flags = attr->create_flags;
+ bool cond;
+
+ if (qp_type == MLX5_IB_QPT_DCT)
+ return (create_flags) ? -EINVAL : 0;
+
+ if (qp_type == IB_QPT_RAW_PACKET && attr->rwq_ind_tbl)
+ return (create_flags) ? -EINVAL : 0;
+
+ process_create_flag(dev, &create_flags, IB_QP_CREATE_NETIF_QP,
+ mlx5_get_flow_namespace(dev->mdev,
+ MLX5_FLOW_NAMESPACE_BYPASS),
+ qp);
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_INTEGRITY_EN,
+ MLX5_CAP_GEN(mdev, sho), qp);
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
+ MLX5_CAP_GEN(mdev, block_lb_mc), qp);
+ process_create_flag(dev, &create_flags, IB_QP_CREATE_CROSS_CHANNEL,
+ MLX5_CAP_GEN(mdev, cd), qp);
+ process_create_flag(dev, &create_flags, IB_QP_CREATE_MANAGED_SEND,
+ MLX5_CAP_GEN(mdev, cd), qp);
+ process_create_flag(dev, &create_flags, IB_QP_CREATE_MANAGED_RECV,
+ MLX5_CAP_GEN(mdev, cd), qp);
+
+ if (qp_type == IB_QPT_UD) {
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_IPOIB_UD_LSO,
+ MLX5_CAP_GEN(mdev, ipoib_basic_offloads),
+ qp);
+ cond = MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_IB;
+ process_create_flag(dev, &create_flags, IB_QP_CREATE_SOURCE_QPN,
+ cond, qp);
+ }
+
+ if (qp_type == IB_QPT_RAW_PACKET) {
+ cond = MLX5_CAP_GEN(mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(mdev, scatter_fcs);
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_SCATTER_FCS, cond, qp);
+
+ cond = MLX5_CAP_GEN(mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(mdev, vlan_cap);
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_CVLAN_STRIPPING, cond, qp);
+ }
+
+ process_create_flag(dev, &create_flags,
+ IB_QP_CREATE_PCI_WRITE_END_PADDING,
+ MLX5_CAP_GEN(mdev, end_pad), qp);
+
+ process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1,
+ true, qp);
+
+ if (create_flags) {
+ mlx5_ib_dbg(dev, "Create QP has unsupported flags 0x%X\n",
+ create_flags);
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int process_udata_size(struct mlx5_ib_dev *dev,
+ struct mlx5_create_qp_params *params)
+{
+ size_t ucmd = sizeof(struct mlx5_ib_create_qp);
+ struct ib_udata *udata = params->udata;
+ size_t outlen = udata->outlen;
+ size_t inlen = udata->inlen;
+
+ params->outlen = min(outlen, sizeof(struct mlx5_ib_create_qp_resp));
+ params->ucmd_size = ucmd;
+ if (!params->is_rss_raw) {
+ /* User has old rdma-core, which doesn't support ECE */
+ size_t min_inlen =
+ offsetof(struct mlx5_ib_create_qp, ece_options);
+
+ /*
+ * We will check in check_ucmd_data() that user
+ * cleared everything after inlen.
+ */
+ params->inlen = (inlen < min_inlen) ? 0 : min(inlen, ucmd);
+ goto out;
+ }
+
+ /* RSS RAW QP */
+ if (inlen < offsetofend(struct mlx5_ib_create_qp_rss, flags))
+ return -EINVAL;
+
+ if (outlen < offsetofend(struct mlx5_ib_create_qp_resp, bfreg_index))
+ return -EINVAL;
+
+ ucmd = sizeof(struct mlx5_ib_create_qp_rss);
+ params->ucmd_size = ucmd;
+ if (inlen > ucmd && !ib_is_udata_cleared(udata, ucmd, inlen - ucmd))
+ return -EINVAL;
+
+ params->inlen = min(ucmd, inlen);
+out:
+ if (!params->inlen)
+ mlx5_ib_dbg(dev, "udata is too small\n");
+
+ return (params->inlen) ? 0 : -EINVAL;
+}
+
+static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
+{
+ int err;
+
+ if (params->is_rss_raw) {
+ err = create_rss_raw_qp_tir(dev, pd, qp, params);
+ goto out;
+ }
+
+ switch (qp->type) {
+ case MLX5_IB_QPT_DCT:
+ err = create_dct(dev, pd, qp, params);
+ break;
+ case MLX5_IB_QPT_DCI:
+ err = create_dci(dev, pd, qp, params);
+ break;
+ case IB_QPT_XRC_TGT:
+ err = create_xrc_tgt_qp(dev, qp, params);
+ break;
+ case IB_QPT_GSI:
+ err = mlx5_ib_create_gsi(pd, qp, params->attr);
+ break;
case MLX5_IB_QPT_HW_GSI:
+ rdma_restrack_no_track(&qp->ibqp.res);
+ fallthrough;
case MLX5_IB_QPT_REG_UMR:
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
+ default:
+ if (params->udata)
+ err = create_user_qp(dev, pd, qp, params);
+ else
+ err = create_kernel_qp(dev, pd, qp, params);
+ }
+
+out:
+ if (err) {
+ mlx5_ib_err(dev, "Create QP type %d failed\n", qp->type);
+ return err;
+ }
+
+ if (is_qp0(qp->type))
+ qp->ibqp.qp_num = 0;
+ else if (is_qp1(qp->type))
+ qp->ibqp.qp_num = 1;
+ else
+ qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
+
+ mlx5_ib_dbg(dev,
+ "QP type %d, ib qpn 0x%X, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x, ece 0x%x\n",
+ qp->type, qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
+ params->attr->recv_cq ? to_mcq(params->attr->recv_cq)->mcq.cqn :
+ -1,
+ params->attr->send_cq ? to_mcq(params->attr->send_cq)->mcq.cqn :
+ -1,
+ params->resp.ece_options);
+
+ return 0;
+}
+
+static int check_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct ib_qp_init_attr *attr)
+{
+ int ret = 0;
+
+ switch (qp->type) {
+ case MLX5_IB_QPT_DCT:
+ ret = (!attr->srq || !attr->recv_cq) ? -EINVAL : 0;
+ break;
+ case MLX5_IB_QPT_DCI:
+ ret = (attr->cap.max_recv_wr || attr->cap.max_recv_sge) ?
+ -EINVAL :
+ 0;
+ break;
+ case IB_QPT_RAW_PACKET:
+ ret = (attr->rwq_ind_tbl && attr->send_cq) ? -EINVAL : 0;
+ break;
+ default:
+ break;
+ }
+
+ if (ret)
+ mlx5_ib_dbg(dev, "QP type %d has wrong attributes\n", qp->type);
+
+ return ret;
+}
+
+static int get_qp_uidx(struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_params *params)
+{
+ struct mlx5_ib_create_qp *ucmd = params->ucmd;
+ struct ib_udata *udata = params->udata;
+ struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
+
+ if (params->is_rss_raw)
+ return 0;
+
+ return get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), &params->uidx);
+}
- err = create_qp_common(dev, pd, init_attr, udata, qp);
+static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device);
+
+ if (mqp->state == IB_QPS_RTR) {
+ int err;
+
+ err = mlx5_core_destroy_dct(dev, &mqp->dct.mdct);
if (err) {
- mlx5_ib_dbg(dev, "create_qp_common failed\n");
- kfree(qp);
- return ERR_PTR(err);
+ mlx5_ib_warn(dev, "failed to destroy DCT %d\n", err);
+ return err;
}
+ }
- if (is_qp0(init_attr->qp_type))
- qp->ibqp.qp_num = 0;
- else if (is_qp1(init_attr->qp_type))
- qp->ibqp.qp_num = 1;
- else
- qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
+ kfree(mqp->dct.in);
+ return 0;
+}
- mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
- qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
- init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1,
- init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1);
+static int check_ucmd_data(struct mlx5_ib_dev *dev,
+ struct mlx5_create_qp_params *params)
+{
+ struct ib_udata *udata = params->udata;
+ size_t size, last;
+ int ret;
- qp->trans_qp.xrcdn = xrcdn;
+ if (params->is_rss_raw)
+ /*
+ * These QPs don't have "reserved" field in their
+ * create_qp input struct, so their data is always valid.
+ */
+ last = sizeof(struct mlx5_ib_create_qp_rss);
+ else
+ last = offsetof(struct mlx5_ib_create_qp, reserved);
- break;
+ if (udata->inlen <= last)
+ return 0;
- case IB_QPT_GSI:
- return mlx5_ib_gsi_create_qp(pd, init_attr);
+ /*
+ * User provides different create_qp structures based on the
+ * flow and we need to know if he cleared memory after our
+ * struct create_qp ends.
+ */
+ size = udata->inlen - last;
+ ret = ib_is_udata_cleared(params->udata, last, size);
+ if (!ret)
+ mlx5_ib_dbg(
+ dev,
+ "udata is not cleared, inlen = %zu, ucmd = %zu, last = %zu, size = %zu\n",
+ udata->inlen, params->ucmd_size, last, size);
+ return ret ? 0 : -EINVAL;
+}
+
+int mlx5_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mlx5_create_qp_params params = {};
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct ib_pd *pd = ibqp->pd;
+ enum ib_qp_type type;
+ int err;
- case IB_QPT_RAW_IPV6:
- case IB_QPT_RAW_ETHERTYPE:
- case IB_QPT_MAX:
+ err = mlx5_ib_dev_res_srq_init(dev);
+ if (err)
+ return err;
+
+ err = check_qp_type(dev, attr, &type);
+ if (err)
+ return err;
+
+ err = check_valid_flow(dev, pd, attr, udata);
+ if (err)
+ return err;
+
+ params.udata = udata;
+ params.uidx = MLX5_IB_DEFAULT_UIDX;
+ params.attr = attr;
+ params.is_rss_raw = !!attr->rwq_ind_tbl;
+
+ if (udata) {
+ err = process_udata_size(dev, &params);
+ if (err)
+ return err;
+
+ err = check_ucmd_data(dev, &params);
+ if (err)
+ return err;
+
+ params.ucmd = kzalloc(params.ucmd_size, GFP_KERNEL);
+ if (!params.ucmd)
+ return -ENOMEM;
+
+ err = ib_copy_from_udata(params.ucmd, udata, params.inlen);
+ if (err)
+ goto free_ucmd;
+ }
+
+ mutex_init(&qp->mutex);
+ qp->type = type;
+ if (udata) {
+ err = process_vendor_flags(dev, qp, params.ucmd, attr);
+ if (err)
+ goto free_ucmd;
+
+ err = get_qp_uidx(qp, &params);
+ if (err)
+ goto free_ucmd;
+ }
+ err = process_create_flags(dev, qp, attr);
+ if (err)
+ goto free_ucmd;
+
+ err = check_qp_attr(dev, qp, attr);
+ if (err)
+ goto free_ucmd;
+
+ err = create_qp(dev, pd, qp, &params);
+ if (err)
+ goto free_ucmd;
+
+ kfree(params.ucmd);
+ params.ucmd = NULL;
+
+ if (udata)
+ /*
+ * It is safe to copy response for all user create QP flows,
+ * including MLX5_IB_QPT_DCT, which doesn't need it.
+ * In that case, resp will be filled with zeros.
+ */
+ err = ib_copy_to_udata(udata, &params.resp, params.outlen);
+ if (err)
+ goto destroy_qp;
+
+ return 0;
+
+destroy_qp:
+ switch (qp->type) {
+ case MLX5_IB_QPT_DCT:
+ mlx5_ib_destroy_dct(qp);
+ break;
+ case IB_QPT_GSI:
+ mlx5_ib_destroy_gsi(qp);
+ break;
default:
- mlx5_ib_dbg(dev, "unsupported qp type %d\n",
- init_attr->qp_type);
- /* Don't support raw QPs */
- return ERR_PTR(-EINVAL);
+ destroy_qp_common(dev, qp, udata);
}
- return &qp->ibqp;
+free_ucmd:
+ kfree(params.ucmd);
+ return err;
}
-int mlx5_ib_destroy_qp(struct ib_qp *qp)
+int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_ib_qp *mqp = to_mqp(qp);
- if (unlikely(qp->qp_type == IB_QPT_GSI))
- return mlx5_ib_gsi_destroy_qp(qp);
-
- destroy_qp_common(dev, mqp);
+ if (mqp->type == IB_QPT_GSI)
+ return mlx5_ib_destroy_gsi(mqp);
- kfree(mqp);
+ if (mqp->type == MLX5_IB_QPT_DCT)
+ return mlx5_ib_destroy_dct(mqp);
+ destroy_qp_common(dev, mqp, udata);
return 0;
}
-static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr,
- int attr_mask)
+static int set_qpc_atomic_flags(struct mlx5_ib_qp *qp,
+ const struct ib_qp_attr *attr, int attr_mask,
+ void *qpc)
{
- u32 hw_access_flags = 0;
+ struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
u8 dest_rd_atomic;
u32 access_flags;
@@ -2139,14 +3387,21 @@ static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_att
if (!dest_rd_atomic)
access_flags &= IB_ACCESS_REMOTE_WRITE;
- if (access_flags & IB_ACCESS_REMOTE_READ)
- hw_access_flags |= MLX5_QP_BIT_RRE;
- if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
- hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX);
- if (access_flags & IB_ACCESS_REMOTE_WRITE)
- hw_access_flags |= MLX5_QP_BIT_RWE;
+ MLX5_SET(qpc, qpc, rre, !!(access_flags & IB_ACCESS_REMOTE_READ));
- return cpu_to_be32(hw_access_flags);
+ if (access_flags & IB_ACCESS_REMOTE_ATOMIC) {
+ int atomic_mode;
+
+ atomic_mode = get_atomic_mode(dev, qp);
+ if (atomic_mode < 0)
+ return -EOPNOTSUPP;
+
+ MLX5_SET(qpc, qpc, rae, 1);
+ MLX5_SET(qpc, qpc, atomic_mode, atomic_mode);
+ }
+
+ MLX5_SET(qpc, qpc, rwe, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
+ return 0;
}
enum {
@@ -2155,24 +3410,65 @@ enum {
MLX5_PATH_FLAG_COUNTER = 1 << 2,
};
-static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
+static int mlx5_to_ib_rate_map(u8 rate)
{
- if (rate == IB_RATE_PORT_CURRENT) {
+ static const int rates[] = { IB_RATE_PORT_CURRENT, IB_RATE_56_GBPS,
+ IB_RATE_25_GBPS, IB_RATE_100_GBPS,
+ IB_RATE_200_GBPS, IB_RATE_50_GBPS,
+ IB_RATE_400_GBPS };
+
+ if (rate < ARRAY_SIZE(rates))
+ return rates[rate];
+
+ return rate - MLX5_STAT_RATE_OFFSET;
+}
+
+static int ib_to_mlx5_rate_map(u8 rate)
+{
+ switch (rate) {
+ case IB_RATE_PORT_CURRENT:
return 0;
- } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
- return -EINVAL;
- } else {
- while (rate != IB_RATE_2_5_GBPS &&
- !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
- MLX5_CAP_GEN(dev->mdev, stat_rate_support)))
- --rate;
+ case IB_RATE_56_GBPS:
+ return 1;
+ case IB_RATE_25_GBPS:
+ return 2;
+ case IB_RATE_100_GBPS:
+ return 3;
+ case IB_RATE_200_GBPS:
+ return 4;
+ case IB_RATE_50_GBPS:
+ return 5;
+ case IB_RATE_400_GBPS:
+ return 6;
+ default:
+ return rate + MLX5_STAT_RATE_OFFSET;
}
- return rate + MLX5_STAT_RATE_OFFSET;
+ return 0;
+}
+
+int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate)
+{
+ u32 stat_rate_support;
+
+ if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS ||
+ rate == IB_RATE_1600_GBPS)
+ return 0;
+
+ if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_1600_GBPS)
+ return -EINVAL;
+
+ stat_rate_support = MLX5_CAP_GEN(dev->mdev, stat_rate_support);
+ while (rate != IB_RATE_PORT_CURRENT &&
+ !(1 << ib_to_mlx5_rate_map(rate) & stat_rate_support))
+ --rate;
+
+ return ib_to_mlx5_rate_map(rate);
}
static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
- struct mlx5_ib_sq *sq, u8 sl)
+ struct mlx5_ib_sq *sq, u8 sl,
+ struct ib_pd *pd)
{
void *in;
void *tisc;
@@ -2180,16 +3476,17 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
int err;
inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
+ MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
- err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+ err = mlx5_core_modify_tis(dev, sq->tisn, in);
kvfree(in);
@@ -2197,7 +3494,8 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
}
static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
- struct mlx5_ib_sq *sq, u8 tx_affinity)
+ struct mlx5_ib_sq *sq, u8 tx_affinity,
+ struct ib_pd *pd)
{
void *in;
void *tisc;
@@ -2205,85 +3503,114 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
int err;
inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1);
+ MLX5_SET(modify_tis_in, in, uid, to_mpd(pd)->uid);
tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity);
- err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+ err = mlx5_core_modify_tis(dev, sq->tisn, in);
kvfree(in);
return err;
}
+static void mlx5_set_path_udp_sport(void *path, const struct rdma_ah_attr *ah,
+ u32 lqpn, u32 rqpn)
+
+{
+ u32 fl = ah->grh.flow_label;
+
+ if (!fl)
+ fl = rdma_calc_flow_label(lqpn, rqpn);
+
+ MLX5_SET(ads, path, udp_sport, rdma_flow_label_to_udp_sport(fl));
+}
+
static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
- const struct ib_ah_attr *ah,
- struct mlx5_qp_path *path, u8 port, int attr_mask,
- u32 path_flags, const struct ib_qp_attr *attr,
- bool alt)
+ const struct rdma_ah_attr *ah, void *path, u8 port,
+ int attr_mask, u32 path_flags,
+ const struct ib_qp_attr *attr, bool alt)
{
- enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah);
int err;
+ enum ib_gid_type gid_type;
+ u8 ah_flags = rdma_ah_get_ah_flags(ah);
+ u8 sl = rdma_ah_get_sl(ah);
if (attr_mask & IB_QP_PKEY_INDEX)
- path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index :
- attr->pkey_index);
+ MLX5_SET(ads, path, pkey_index,
+ alt ? attr->alt_pkey_index : attr->pkey_index);
+
+ if (ah_flags & IB_AH_GRH) {
+ const struct ib_port_immutable *immutable;
- if (ah->ah_flags & IB_AH_GRH) {
- if (ah->grh.sgid_index >=
- dev->mdev->port_caps[port - 1].gid_table_len) {
+ immutable = ib_port_immutable_read(&dev->ib_dev, port);
+ if (grh->sgid_index >= immutable->gid_tbl_len) {
pr_err("sgid_index (%u) too large. max is %d\n",
- ah->grh.sgid_index,
- dev->mdev->port_caps[port - 1].gid_table_len);
+ grh->sgid_index,
+ immutable->gid_tbl_len);
return -EINVAL;
}
}
- if (ll == IB_LINK_LAYER_ETHERNET) {
- if (!(ah->ah_flags & IB_AH_GRH))
+ if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ if (!(ah_flags & IB_AH_GRH))
return -EINVAL;
- memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
- path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
- ah->grh.sgid_index);
- path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
+
+ ether_addr_copy(MLX5_ADDR_OF(ads, path, rmac_47_32),
+ ah->roce.dmac);
+ if ((qp->type == IB_QPT_RC ||
+ qp->type == IB_QPT_UC ||
+ qp->type == IB_QPT_XRC_INI ||
+ qp->type == IB_QPT_XRC_TGT) &&
+ (grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) &&
+ (attr_mask & IB_QP_DEST_QPN))
+ mlx5_set_path_udp_sport(path, ah,
+ qp->ibqp.qp_num,
+ attr->dest_qp_num);
+ MLX5_SET(ads, path, eth_prio, sl & 0x7);
+ gid_type = ah->grh.sgid_attr->gid_type;
+ if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ MLX5_SET(ads, path, dscp, grh->traffic_class >> 2);
} else {
- path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
- path->fl_free_ar |=
- (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0;
- path->rlid = cpu_to_be16(ah->dlid);
- path->grh_mlid = ah->src_path_bits & 0x7f;
- if (ah->ah_flags & IB_AH_GRH)
- path->grh_mlid |= 1 << 7;
- path->dci_cfi_prio_sl = ah->sl & 0xf;
- }
-
- if (ah->ah_flags & IB_AH_GRH) {
- path->mgid_index = ah->grh.sgid_index;
- path->hop_limit = ah->grh.hop_limit;
- path->tclass_flowlabel =
- cpu_to_be32((ah->grh.traffic_class << 20) |
- (ah->grh.flow_label));
- memcpy(path->rgid, ah->grh.dgid.raw, 16);
- }
-
- err = ib_rate_to_mlx5(dev, ah->static_rate);
+ MLX5_SET(ads, path, fl, !!(path_flags & MLX5_PATH_FLAG_FL));
+ MLX5_SET(ads, path, free_ar,
+ !!(path_flags & MLX5_PATH_FLAG_FREE_AR));
+ MLX5_SET(ads, path, rlid, rdma_ah_get_dlid(ah));
+ MLX5_SET(ads, path, mlid, rdma_ah_get_path_bits(ah));
+ MLX5_SET(ads, path, grh, !!(ah_flags & IB_AH_GRH));
+ MLX5_SET(ads, path, sl, sl);
+ }
+
+ if (ah_flags & IB_AH_GRH) {
+ MLX5_SET(ads, path, src_addr_index, grh->sgid_index);
+ MLX5_SET(ads, path, hop_limit, grh->hop_limit);
+ MLX5_SET(ads, path, tclass, grh->traffic_class);
+ MLX5_SET(ads, path, flow_label, grh->flow_label);
+ memcpy(MLX5_ADDR_OF(ads, path, rgid_rip), grh->dgid.raw,
+ sizeof(grh->dgid.raw));
+ }
+
+ err = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah));
if (err < 0)
return err;
- path->static_rate = err;
- path->port = port;
+ MLX5_SET(ads, path, stat_rate, err);
+ MLX5_SET(ads, path, vhca_port_num, port);
if (attr_mask & IB_QP_TIMEOUT)
- path->ackto_lt = (alt ? attr->alt_timeout : attr->timeout) << 3;
+ MLX5_SET(ads, path, ack_timeout,
+ alt ? attr->alt_timeout : attr->timeout);
- if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
+ if ((qp->type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
return modify_raw_packet_eth_prio(dev->mdev,
&qp->raw_packet_qp.sq,
- ah->sl & 0xf);
+ sl & 0xf, qp->ibqp.pd);
return 0;
}
@@ -2295,23 +3622,33 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX |
- MLX5_QP_OPTPAR_PRI_PORT,
+ MLX5_QP_OPTPAR_PRI_PORT |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX |
- MLX5_QP_OPTPAR_PRI_PORT,
+ MLX5_QP_OPTPAR_PRI_PORT |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_Q_KEY |
MLX5_QP_OPTPAR_PRI_PORT,
+ [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_PKEY_INDEX |
+ MLX5_QP_OPTPAR_PRI_PORT |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
},
[MLX5_QP_STATE_RTR] = {
[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
MLX5_QP_OPTPAR_RRE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
- MLX5_QP_OPTPAR_PKEY_INDEX,
+ MLX5_QP_OPTPAR_PKEY_INDEX |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
MLX5_QP_OPTPAR_RWE |
- MLX5_QP_OPTPAR_PKEY_INDEX,
+ MLX5_QP_OPTPAR_PKEY_INDEX |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_Q_KEY,
[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX |
@@ -2320,7 +3657,8 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RRE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
- MLX5_QP_OPTPAR_PKEY_INDEX,
+ MLX5_QP_OPTPAR_PKEY_INDEX |
+ MLX5_QP_OPTPAR_LAG_TX_AFF,
},
},
[MLX5_QP_STATE_RTR] = {
@@ -2335,6 +3673,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PM_STATE,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
+ [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
+ MLX5_QP_OPTPAR_RRE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_PM_STATE |
+ MLX5_QP_OPTPAR_RNR_TIMEOUT,
},
},
[MLX5_QP_STATE_RTS] = {
@@ -2351,6 +3695,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY |
MLX5_QP_OPTPAR_SRQN |
MLX5_QP_OPTPAR_CQN_RCV,
+ [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_RNR_TIMEOUT |
+ MLX5_QP_OPTPAR_PM_STATE |
+ MLX5_QP_OPTPAR_ALT_ADDR_PATH,
},
},
[MLX5_QP_STATE_SQER] = {
@@ -2362,6 +3712,21 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RRE,
+ [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RNR_TIMEOUT |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RRE,
+ },
+ },
+ [MLX5_QP_STATE_SQD] = {
+ [MLX5_QP_STATE_RTS] = {
+ [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
+ [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
+ [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE,
+ [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RNR_TIMEOUT |
+ MLX5_QP_OPTPAR_RWE |
+ MLX5_QP_OPTPAR_RAE |
+ MLX5_QP_OPTPAR_RRE,
},
},
};
@@ -2431,9 +3796,9 @@ static int ib_mask_to_mlx5_opt(int ib_mask)
return result;
}
-static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
- struct mlx5_ib_rq *rq, int new_state,
- const struct mlx5_modify_raw_qp_param *raw_qp_param)
+static int modify_raw_packet_qp_rq(
+ struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, int new_state,
+ const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd)
{
void *in;
void *rqc;
@@ -2441,11 +3806,12 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
int err;
inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+ MLX5_SET(modify_rq_in, in, uid, to_mpd(pd)->uid);
rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
MLX5_SET(rqc, rqc, state, new_state);
@@ -2453,14 +3819,15 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) {
if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
MLX5_SET64(modify_rq_in, in, modify_bitmask,
- MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID);
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
} else
- pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n",
- dev->ib_dev.name);
+ dev_info_once(
+ &dev->ib_dev.dev,
+ "RAW PACKET QP counters are not supported on current FW\n");
}
- err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in, inlen);
+ err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in);
if (err)
goto out;
@@ -2471,14 +3838,14 @@ out:
return err;
}
-static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
- struct mlx5_ib_sq *sq,
- int new_state,
- const struct mlx5_modify_raw_qp_param *raw_qp_param)
+static int modify_raw_packet_qp_sq(
+ struct mlx5_core_dev *dev, struct mlx5_ib_sq *sq, int new_state,
+ const struct mlx5_modify_raw_qp_param *raw_qp_param, struct ib_pd *pd)
{
struct mlx5_ib_qp *ibqp = sq->base.container_mibqp;
- u32 old_rate = ibqp->rate_limit;
- u32 new_rate = old_rate;
+ struct mlx5_rate_limit old_rl = ibqp->rl;
+ struct mlx5_rate_limit new_rl = old_rl;
+ bool new_rate_added = false;
u16 rl_index = 0;
void *in;
void *sqc;
@@ -2486,10 +3853,11 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
int err;
inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
+ MLX5_SET(modify_sq_in, in, uid, to_mpd(pd)->uid);
MLX5_SET(modify_sq_in, in, sq_state, sq->state);
sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
@@ -2500,39 +3868,45 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n",
__func__);
else
- new_rate = raw_qp_param->rate_limit;
+ new_rl = raw_qp_param->rl;
}
- if (old_rate != new_rate) {
- if (new_rate) {
- err = mlx5_rl_add_rate(dev, new_rate, &rl_index);
+ if (!mlx5_rl_are_equal(&old_rl, &new_rl)) {
+ if (new_rl.rate) {
+ err = mlx5_rl_add_rate(dev, &rl_index, &new_rl);
if (err) {
- pr_err("Failed configuring rate %u: %d\n",
- new_rate, err);
+ pr_err("Failed configuring rate limit(err %d): \
+ rate %u, max_burst_sz %u, typical_pkt_sz %u\n",
+ err, new_rl.rate, new_rl.max_burst_sz,
+ new_rl.typical_pkt_sz);
+
goto out;
}
+ new_rate_added = true;
}
MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
+ /* index 0 means no limit */
MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index);
}
- err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
+ err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in);
if (err) {
/* Remove new rate from table if failed */
- if (new_rate &&
- old_rate != new_rate)
- mlx5_rl_remove_rate(dev, new_rate);
+ if (new_rate_added)
+ mlx5_rl_remove_rate(dev, &new_rl);
goto out;
}
/* Only remove the old rate after new rate was set */
- if ((old_rate &&
- (old_rate != new_rate)) ||
- (new_state != MLX5_SQC_STATE_RDY))
- mlx5_rl_remove_rate(dev, old_rate);
+ if ((old_rl.rate && !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
+ (new_state != MLX5_SQC_STATE_RDY)) {
+ mlx5_rl_remove_rate(dev, &old_rl);
+ if (new_state != MLX5_SQC_STATE_RDY)
+ memset(&new_rl, 0, sizeof(new_rl));
+ }
- ibqp->rate_limit = new_rate;
+ ibqp->rl = new_rl;
sq->state = new_state;
out:
@@ -2556,7 +3930,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
switch (raw_qp_param->operation) {
case MLX5_CMD_OP_RST2INIT_QP:
rq_state = MLX5_RQC_STATE_RDY;
- sq_state = MLX5_SQC_STATE_RDY;
+ sq_state = MLX5_SQC_STATE_RST;
break;
case MLX5_CMD_OP_2ERR_QP:
rq_state = MLX5_RQC_STATE_ERR;
@@ -2568,13 +3942,11 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
break;
case MLX5_CMD_OP_RTR2RTS_QP:
case MLX5_CMD_OP_RTS2RTS_QP:
- if (raw_qp_param->set_mask ==
- MLX5_RAW_QP_RATE_LIMIT) {
- modify_rq = 0;
- sq_state = sq->state;
- } else {
- return raw_qp_param->set_mask ? -EINVAL : 0;
- }
+ if (raw_qp_param->set_mask & ~MLX5_RAW_QP_RATE_LIMIT)
+ return -EINVAL;
+
+ modify_rq = 0;
+ sq_state = MLX5_SQC_STATE_RDY;
break;
case MLX5_CMD_OP_INIT2INIT_QP:
case MLX5_CMD_OP_INIT2RTR_QP:
@@ -2588,28 +3960,171 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
}
if (modify_rq) {
- err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param);
+ err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param,
+ qp->ibqp.pd);
if (err)
return err;
}
if (modify_sq) {
+ struct mlx5_flow_handle *flow_rule;
+
if (tx_affinity) {
err = modify_raw_packet_tx_affinity(dev->mdev, sq,
- tx_affinity);
+ tx_affinity,
+ qp->ibqp.pd);
if (err)
return err;
}
- return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, raw_qp_param);
+ flow_rule = create_flow_rule_vport_sq(dev, sq,
+ raw_qp_param->port);
+ if (IS_ERR(flow_rule))
+ return PTR_ERR(flow_rule);
+
+ err = modify_raw_packet_qp_sq(dev->mdev, sq, sq_state,
+ raw_qp_param, qp->ibqp.pd);
+ if (err) {
+ if (flow_rule)
+ mlx5_del_flow_rules(flow_rule);
+ return err;
+ }
+
+ if (flow_rule) {
+ destroy_flow_rule_vport_sq(sq);
+ sq->flow_rule = flow_rule;
+ }
+
+ return err;
}
return 0;
}
+static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
+ u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+ atomic_t *tx_port_affinity;
+
+ if (ucontext)
+ tx_port_affinity = &ucontext->tx_port_affinity;
+ else
+ tx_port_affinity = &dev->port[port_num].roce.tx_port_affinity;
+
+ return (unsigned int)atomic_add_return(1, tx_port_affinity) %
+ (dev->lag_active ? dev->lag_ports : MLX5_CAP_GEN(dev->mdev, num_lag_ports)) + 1;
+}
+
+static bool qp_supports_affinity(struct mlx5_ib_qp *qp)
+{
+ if ((qp->type == IB_QPT_RC) || (qp->type == IB_QPT_UD) ||
+ (qp->type == IB_QPT_UC) || (qp->type == IB_QPT_RAW_PACKET) ||
+ (qp->type == IB_QPT_XRC_INI) || (qp->type == IB_QPT_XRC_TGT) ||
+ (qp->type == MLX5_IB_QPT_DCI))
+ return true;
+ return false;
+}
+
+static unsigned int get_tx_affinity(struct ib_qp *qp,
+ const struct ib_qp_attr *attr,
+ int attr_mask, u8 init,
+ struct ib_udata *udata)
+{
+ struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_qp_base *qp_base;
+ unsigned int tx_affinity;
+
+ if (!(mlx5_ib_lag_should_assign_affinity(dev) &&
+ qp_supports_affinity(mqp)))
+ return 0;
+
+ if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
+ tx_affinity = mqp->gsi_lag_port;
+ else if (init)
+ tx_affinity = get_tx_affinity_rr(dev, udata);
+ else if ((attr_mask & IB_QP_AV) && attr->xmit_slave)
+ tx_affinity =
+ mlx5_lag_get_slave_port(dev->mdev, attr->xmit_slave);
+ else
+ return 0;
+
+ qp_base = &mqp->trans_qp.base;
+ if (ucontext)
+ mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x ucontext %p\n",
+ tx_affinity, qp_base->mqp.qpn, ucontext);
+ else
+ mlx5_ib_dbg(dev, "Set tx affinity 0x%x to qpn 0x%x\n",
+ tx_affinity, qp_base->mqp.qpn);
+ return tx_affinity;
+}
+
+static int __mlx5_ib_qp_set_raw_qp_counter(struct mlx5_ib_qp *qp, u32 set_id,
+ struct mlx5_core_dev *mdev)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ u32 in[MLX5_ST_SZ_DW(modify_rq_in)] = {};
+ void *rqc;
+
+ if (!qp->rq.wqe_cnt)
+ return 0;
+
+ MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+ MLX5_SET(modify_rq_in, in, uid, to_mpd(qp->ibqp.pd)->uid);
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
+
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
+ MLX5_SET(rqc, rqc, counter_set_id, set_id);
+
+ return mlx5_core_modify_rq(mdev, rq->base.mqp.qpn, in);
+}
+
+static int __mlx5_ib_qp_set_counter(struct ib_qp *qp,
+ struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ u32 in[MLX5_ST_SZ_DW(rts2rts_qp_in)] = {};
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ struct mlx5_ib_qp_base *base;
+ u32 set_id;
+ u32 *qpc;
+
+ if (counter)
+ set_id = counter->id;
+ else
+ set_id = mlx5_ib_get_counters_id(dev, mqp->port - 1);
+
+ if (mqp->type == IB_QPT_RAW_PACKET)
+ return __mlx5_ib_qp_set_raw_qp_counter(mqp, set_id, dev->mdev);
+
+ base = &mqp->trans_qp.base;
+ MLX5_SET(rts2rts_qp_in, in, opcode, MLX5_CMD_OP_RTS2RTS_QP);
+ MLX5_SET(rts2rts_qp_in, in, qpn, base->mqp.qpn);
+ MLX5_SET(rts2rts_qp_in, in, uid, base->mqp.uid);
+ MLX5_SET(rts2rts_qp_in, in, opt_param_mask,
+ MLX5_QP_OPTPAR_COUNTER_SET_ID);
+
+ qpc = MLX5_ADDR_OF(rts2rts_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, counter_set_id, set_id);
+ return mlx5_cmd_exec_in(dev->mdev, rts2rts_qp, in);
+}
+
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
- enum ib_qp_state cur_state, enum ib_qp_state new_state)
+ enum ib_qp_state cur_state,
+ enum ib_qp_state new_state,
+ const struct mlx5_ib_modify_qp *ucmd,
+ struct mlx5_ib_modify_qp_resp *resp,
+ struct ib_udata *udata)
{
static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
[MLX5_QP_STATE_RST] = {
@@ -2636,6 +4151,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
[MLX5_QP_STATE_SQD] = {
[MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
[MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQD_RTS_QP,
},
[MLX5_QP_STATE_SQER] = {
[MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
@@ -2652,66 +4168,60 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_ib_cq *send_cq, *recv_cq;
- struct mlx5_qp_context *context;
struct mlx5_ib_pd *pd;
- struct mlx5_ib_port *mibport = NULL;
enum mlx5_qp_state mlx5_cur, mlx5_new;
- enum mlx5_qp_optpar optpar;
+ void *qpc, *pri_path, *alt_path;
+ enum mlx5_qp_optpar optpar = 0;
+ u32 set_id = 0;
int mlx5_st;
int err;
u16 op;
u8 tx_affinity = 0;
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context)
- return -ENOMEM;
+ mlx5_st = to_mlx5_st(qp->type);
+ if (mlx5_st < 0)
+ return -EINVAL;
- err = to_mlx5_st(ibqp->qp_type);
- if (err < 0) {
- mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type);
- goto out;
- }
+ qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
+ if (!qpc)
+ return -ENOMEM;
- context->flags = cpu_to_be32(err << 16);
+ pd = to_mpd(qp->ibqp.pd);
+ MLX5_SET(qpc, qpc, st, mlx5_st);
if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
- context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
} else {
switch (attr->path_mig_state) {
case IB_MIG_MIGRATED:
- context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
break;
case IB_MIG_REARM:
- context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_REARM);
break;
case IB_MIG_ARMED:
- context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_ARMED);
break;
}
}
- if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) {
- if ((ibqp->qp_type == IB_QPT_RC) ||
- (ibqp->qp_type == IB_QPT_UD &&
- !(qp->flags & MLX5_IB_QP_SQPN_QP1)) ||
- (ibqp->qp_type == IB_QPT_UC) ||
- (ibqp->qp_type == IB_QPT_RAW_PACKET) ||
- (ibqp->qp_type == IB_QPT_XRC_INI) ||
- (ibqp->qp_type == IB_QPT_XRC_TGT)) {
- if (mlx5_lag_is_active(dev->mdev)) {
- tx_affinity = (unsigned int)atomic_add_return(1,
- &dev->roce.next_port) %
- MLX5_MAX_PORTS + 1;
- context->flags |= cpu_to_be32(tx_affinity << 24);
- }
- }
- }
+ tx_affinity = get_tx_affinity(ibqp, attr, attr_mask,
+ cur_state == IB_QPS_RESET &&
+ new_state == IB_QPS_INIT, udata);
+
+ MLX5_SET(qpc, qpc, lag_tx_port_affinity, tx_affinity);
+ if (tx_affinity && new_state == IB_QPS_RTR &&
+ MLX5_CAP_GEN(dev->mdev, init2_lag_tx_port_affinity))
+ optpar |= MLX5_QP_OPTPAR_LAG_TX_AFF;
- if (is_sqp(ibqp->qp_type)) {
- context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
- } else if (ibqp->qp_type == IB_QPT_UD ||
- ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
- context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
+ if (is_sqp(qp->type)) {
+ MLX5_SET(qpc, qpc, mtu, IB_MTU_256);
+ MLX5_SET(qpc, qpc, log_msg_max, 8);
+ } else if ((qp->type == IB_QPT_UD &&
+ !(qp->flags & IB_QP_CREATE_SOURCE_QPN)) ||
+ qp->type == MLX5_IB_QPT_REG_UMR) {
+ MLX5_SET(qpc, qpc, mtu, IB_MTU_4096);
+ MLX5_SET(qpc, qpc, log_msg_max, 12);
} else if (attr_mask & IB_QP_PATH_MTU) {
if (attr->path_mtu < IB_MTU_256 ||
attr->path_mtu > IB_MTU_4096) {
@@ -2719,155 +4229,196 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
err = -EINVAL;
goto out;
}
- context->mtu_msgmax = (attr->path_mtu << 5) |
- (u8)MLX5_CAP_GEN(dev->mdev, log_max_msg);
+ MLX5_SET(qpc, qpc, mtu, attr->path_mtu);
+ MLX5_SET(qpc, qpc, log_msg_max,
+ MLX5_CAP_GEN(dev->mdev, log_max_msg));
}
if (attr_mask & IB_QP_DEST_QPN)
- context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num);
+ MLX5_SET(qpc, qpc, remote_qpn, attr->dest_qp_num);
+
+ pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
+ alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path);
if (attr_mask & IB_QP_PKEY_INDEX)
- context->pri_path.pkey_index = cpu_to_be16(attr->pkey_index);
+ MLX5_SET(ads, pri_path, pkey_index, attr->pkey_index);
/* todo implement counter_index functionality */
- if (is_sqp(ibqp->qp_type))
- context->pri_path.port = qp->port;
+ if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI && is_qp0(qp->type)) {
+ MLX5_SET(ads, pri_path, vhca_port_num,
+ smi_to_native_portnum(dev, qp->port));
+ if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)
+ MLX5_SET(ads, pri_path, plane_index, qp->port);
+ } else if (is_sqp(qp->type))
+ MLX5_SET(ads, pri_path, vhca_port_num, qp->port);
if (attr_mask & IB_QP_PORT)
- context->pri_path.port = attr->port_num;
+ MLX5_SET(ads, pri_path, vhca_port_num, attr->port_num);
if (attr_mask & IB_QP_AV) {
- err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path,
- attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
+ err = mlx5_set_path(dev, qp, &attr->ah_attr, pri_path,
+ attr_mask & IB_QP_PORT ? attr->port_num :
+ qp->port,
attr_mask, 0, attr, false);
if (err)
goto out;
}
if (attr_mask & IB_QP_TIMEOUT)
- context->pri_path.ackto_lt |= attr->timeout << 3;
+ MLX5_SET(ads, pri_path, ack_timeout, attr->timeout);
if (attr_mask & IB_QP_ALT_PATH) {
- err = mlx5_set_path(dev, qp, &attr->alt_ah_attr,
- &context->alt_path,
+ err = mlx5_set_path(dev, qp, &attr->alt_ah_attr, alt_path,
attr->alt_port_num,
- attr_mask | IB_QP_PKEY_INDEX | IB_QP_TIMEOUT,
+ attr_mask | IB_QP_PKEY_INDEX |
+ IB_QP_TIMEOUT,
0, attr, true);
if (err)
goto out;
}
- pd = get_pd(qp);
- get_cqs(qp->ibqp.qp_type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
+ get_cqs(qp->type, qp->ibqp.send_cq, qp->ibqp.recv_cq,
&send_cq, &recv_cq);
- context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
- context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
- context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0;
- context->params1 = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28);
+ MLX5_SET(qpc, qpc, pd, pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
+ if (send_cq)
+ MLX5_SET(qpc, qpc, cqn_snd, send_cq->mcq.cqn);
+ if (recv_cq)
+ MLX5_SET(qpc, qpc, cqn_rcv, recv_cq->mcq.cqn);
+
+ MLX5_SET(qpc, qpc, log_ack_req_freq, MLX5_IB_ACK_REQ_FREQ);
if (attr_mask & IB_QP_RNR_RETRY)
- context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
+ MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
if (attr_mask & IB_QP_RETRY_CNT)
- context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
+ MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
- if (attr->max_rd_atomic)
- context->params1 |=
- cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
- }
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic)
+ MLX5_SET(qpc, qpc, log_sra_max, fls(attr->max_rd_atomic - 1));
if (attr_mask & IB_QP_SQ_PSN)
- context->next_send_psn = cpu_to_be32(attr->sq_psn);
+ MLX5_SET(qpc, qpc, next_send_psn, attr->sq_psn);
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
- if (attr->max_dest_rd_atomic)
- context->params2 |=
- cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
- }
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic)
+ MLX5_SET(qpc, qpc, log_rra_max,
+ fls(attr->max_dest_rd_atomic - 1));
- if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
- context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask);
+ if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
+ err = set_qpc_atomic_flags(qp, attr, attr_mask, qpc);
+ if (err)
+ goto out;
+ }
if (attr_mask & IB_QP_MIN_RNR_TIMER)
- context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
+ MLX5_SET(qpc, qpc, min_rnr_nak, attr->min_rnr_timer);
if (attr_mask & IB_QP_RQ_PSN)
- context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
+ MLX5_SET(qpc, qpc, next_rcv_psn, attr->rq_psn);
if (attr_mask & IB_QP_QKEY)
- context->qkey = cpu_to_be32(attr->qkey);
+ MLX5_SET(qpc, qpc, q_key, attr->qkey);
if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
- context->db_rec_addr = cpu_to_be64(qp->db.dma);
+ MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
qp->port) - 1;
- mibport = &dev->port[port_num];
- context->qp_counter_set_usr_page |=
- cpu_to_be32((u32)(mibport->q_cnt_id) << 24);
+
+ /* Underlay port should be used - index 0 function per port */
+ if (qp->flags & IB_QP_CREATE_SOURCE_QPN)
+ port_num = 0;
+
+ if (ibqp->counter)
+ set_id = ibqp->counter->id;
+ else
+ set_id = mlx5_ib_get_counters_id(dev, port_num);
+ MLX5_SET(qpc, qpc, counter_set_id, set_id);
}
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
- context->sq_crq_size |= cpu_to_be16(1 << 4);
+ MLX5_SET(qpc, qpc, rlky, 1);
+
+ if (qp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
+ MLX5_SET(qpc, qpc, deth_sqpn, 1);
- if (qp->flags & MLX5_IB_QP_SQPN_QP1)
- context->deth_sqpn = cpu_to_be32(1);
+ if (qp->is_ooo_rq && cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ MLX5_SET(qpc, qpc, dp_ordering_1, 1);
+ MLX5_SET(qpc, qpc, dp_ordering_force, 1);
+ }
mlx5_cur = to_mlx5_state(cur_state);
mlx5_new = to_mlx5_state(new_state);
- mlx5_st = to_mlx5_st(ibqp->qp_type);
- if (mlx5_st < 0)
- goto out;
-
- /* If moving to a reset or error state, we must disable page faults on
- * this QP and flush all current page faults. Otherwise a stale page
- * fault may attempt to work on this QP after it is reset and moved
- * again to RTS, and may cause the driver and the device to get out of
- * sync. */
- if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
- (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
- (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
- mlx5_ib_qp_disable_pagefaults(qp);
if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
- !optab[mlx5_cur][mlx5_new])
+ !optab[mlx5_cur][mlx5_new]) {
+ err = -EINVAL;
goto out;
+ }
op = optab[mlx5_cur][mlx5_new];
- optpar = ib_mask_to_mlx5_opt(attr_mask);
+ optpar |= ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ if (qp->type == IB_QPT_RAW_PACKET ||
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) {
struct mlx5_modify_raw_qp_param raw_qp_param = {};
raw_qp_param.operation = op;
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id;
+ raw_qp_param.rq_q_ctr_id = set_id;
raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
}
+ if (attr_mask & IB_QP_PORT)
+ raw_qp_param.port = attr->port_num;
+
if (attr_mask & IB_QP_RATE_LIMIT) {
- raw_qp_param.rate_limit = attr->rate_limit;
+ raw_qp_param.rl.rate = attr->rate_limit;
+
+ if (ucmd->burst_info.max_burst_sz) {
+ if (attr->rate_limit &&
+ MLX5_CAP_QOS(dev->mdev, packet_pacing_burst_bound)) {
+ raw_qp_param.rl.max_burst_sz =
+ ucmd->burst_info.max_burst_sz;
+ } else {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (ucmd->burst_info.typical_pkt_sz) {
+ if (attr->rate_limit &&
+ MLX5_CAP_QOS(dev->mdev, packet_pacing_typical_size)) {
+ raw_qp_param.rl.typical_pkt_sz =
+ ucmd->burst_info.typical_pkt_sz;
+ } else {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT;
}
err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity);
} else {
- err = mlx5_core_qp_modify(dev->mdev, op, optpar, context,
- &base->mqp);
+ if (udata) {
+ /* For the kernel flows, the resp will stay zero */
+ resp->ece_options =
+ MLX5_CAP_GEN(dev->mdev, ece_support) ?
+ ucmd->ece_options : 0;
+ resp->response_length = sizeof(*resp);
+ }
+ err = mlx5_core_qp_modify(dev, op, optpar, qpc, &base->mqp,
+ &resp->ece_options);
}
if (err)
goto out;
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
- (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
- mlx5_ib_qp_enable_pagefaults(qp);
-
qp->state = new_state;
if (attr_mask & IB_QP_ACCESS_FLAGS)
@@ -2883,7 +4434,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
- if (new_state == IB_QPS_RESET && !ibqp->uobject) {
+ if (new_state == IB_QPS_RESET &&
+ !ibqp->uobject && qp->type != IB_QPT_XRC_TGT) {
mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
ibqp->srq ? to_msrq(ibqp->srq) : NULL);
if (send_cq != recv_cq)
@@ -2894,1394 +4446,353 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->sq.head = 0;
qp->sq.tail = 0;
qp->sq.cur_post = 0;
+ if (qp->sq.wqe_cnt)
+ qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
qp->sq.last_poll = 0;
qp->db.db[MLX5_RCV_DBR] = 0;
qp->db.db[MLX5_SND_DBR] = 0;
}
-out:
- kfree(context);
- return err;
-}
-
-int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx5_ib_qp *qp = to_mqp(ibqp);
- enum ib_qp_type qp_type;
- enum ib_qp_state cur_state, new_state;
- int err = -EINVAL;
- int port;
- enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
-
- if (ibqp->rwq_ind_tbl)
- return -ENOSYS;
-
- if (unlikely(ibqp->qp_type == IB_QPT_GSI))
- return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
-
- qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ?
- IB_QPT_GSI : ibqp->qp_type;
-
- mutex_lock(&qp->mutex);
-
- cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
- new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
- if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
- port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
- }
-
- if (qp_type != MLX5_IB_QPT_REG_UMR &&
- !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) {
- mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
- cur_state, new_state, ibqp->qp_type, attr_mask);
- goto out;
- }
-
- if ((attr_mask & IB_QP_PORT) &&
- (attr->port_num == 0 ||
- attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) {
- mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
- attr->port_num, dev->num_ports);
- goto out;
- }
-
- if (attr_mask & IB_QP_PKEY_INDEX) {
- port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- if (attr->pkey_index >=
- dev->mdev->port_caps[port - 1].pkey_table_len) {
- mlx5_ib_dbg(dev, "invalid pkey index %d\n",
- attr->pkey_index);
- goto out;
- }
+ if ((new_state == IB_QPS_RTS) && qp->counter_pending) {
+ err = __mlx5_ib_qp_set_counter(ibqp, ibqp->counter);
+ if (!err)
+ qp->counter_pending = 0;
}
- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
- attr->max_rd_atomic >
- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) {
- mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
- attr->max_rd_atomic);
- goto out;
- }
-
- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
- attr->max_dest_rd_atomic >
- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) {
- mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
- attr->max_dest_rd_atomic);
- goto out;
- }
-
- if (cur_state == new_state && cur_state == IB_QPS_RESET) {
- err = 0;
- goto out;
- }
-
- err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
-
out:
- mutex_unlock(&qp->mutex);
+ kfree(qpc);
return err;
}
-static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
+static inline bool is_valid_mask(int mask, int req, int opt)
{
- struct mlx5_ib_cq *cq;
- unsigned cur;
+ if ((mask & req) != req)
+ return false;
- cur = wq->head - wq->tail;
- if (likely(cur + nreq < wq->max_post))
- return 0;
-
- cq = to_mcq(ib_cq);
- spin_lock(&cq->lock);
- cur = wq->head - wq->tail;
- spin_unlock(&cq->lock);
+ if (mask & ~(req | opt))
+ return false;
- return cur + nreq >= wq->max_post;
+ return true;
}
-static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
- u64 remote_addr, u32 rkey)
-{
- rseg->raddr = cpu_to_be64(remote_addr);
- rseg->rkey = cpu_to_be32(rkey);
- rseg->reserved = 0;
-}
-
-static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
- struct ib_send_wr *wr, void *qend,
- struct mlx5_ib_qp *qp, int *size)
+/* check valid transition for driver QP types
+ * for now the only QP type that this function supports is DCI
+ */
+static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new_state,
+ enum ib_qp_attr_mask attr_mask)
+{
+ int req = IB_QP_STATE;
+ int opt = 0;
+
+ if (new_state == IB_QPS_RESET) {
+ return is_valid_mask(attr_mask, req, opt);
+ } else if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ req |= IB_QP_PKEY_INDEX | IB_QP_PORT;
+ return is_valid_mask(attr_mask, req, opt);
+ } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
+ opt = IB_QP_PKEY_INDEX | IB_QP_PORT;
+ return is_valid_mask(attr_mask, req, opt);
+ } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ req |= IB_QP_PATH_MTU;
+ opt = IB_QP_PKEY_INDEX | IB_QP_AV;
+ return is_valid_mask(attr_mask, req, opt);
+ } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
+ req |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
+ IB_QP_MAX_QP_RD_ATOMIC | IB_QP_SQ_PSN;
+ opt = IB_QP_MIN_RNR_TIMER;
+ return is_valid_mask(attr_mask, req, opt);
+ } else if (cur_state == IB_QPS_RTS && new_state == IB_QPS_RTS) {
+ opt = IB_QP_MIN_RNR_TIMER;
+ return is_valid_mask(attr_mask, req, opt);
+ } else if (cur_state != IB_QPS_RESET && new_state == IB_QPS_ERR) {
+ return is_valid_mask(attr_mask, req, opt);
+ }
+ return false;
+}
+
+/* mlx5_ib_modify_dct: modify a DCT QP
+ * valid transitions are:
+ * RESET to INIT: must set access_flags, pkey_index and port
+ * INIT to RTR : must set min_rnr_timer, tclass, flow_label,
+ * mtu, gid_index and hop_limit
+ * Other transitions and attributes are illegal
+ */
+static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct mlx5_ib_modify_qp *ucmd,
+ struct ib_udata *udata)
{
- void *seg = eseg;
-
- memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
-
- if (wr->send_flags & IB_SEND_IP_CSUM)
- eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
- MLX5_ETH_WQE_L4_CSUM;
-
- seg += sizeof(struct mlx5_wqe_eth_seg);
- *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ enum ib_qp_state cur_state, new_state;
+ int required = IB_QP_STATE;
+ void *dctc;
+ int err;
- if (wr->opcode == IB_WR_LSO) {
- struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
- int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start);
- u64 left, leftlen, copysz;
- void *pdata = ud_wr->header;
+ if (!(attr_mask & IB_QP_STATE))
+ return -EINVAL;
- left = ud_wr->hlen;
- eseg->mss = cpu_to_be16(ud_wr->mss);
- eseg->inline_hdr_sz = cpu_to_be16(left);
+ cur_state = qp->state;
+ new_state = attr->qp_state;
+ dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry);
+ if (MLX5_CAP_GEN(dev->mdev, ece_support) && ucmd->ece_options)
/*
- * check if there is space till the end of queue, if yes,
- * copy all in one shot, otherwise copy till the end of queue,
- * rollback and than the copy the left
+ * DCT doesn't initialize QP till modify command is executed,
+ * so we need to overwrite previously set ECE field if user
+ * provided any value except zero, which means not set/not
+ * valid.
*/
- leftlen = qend - (void *)eseg->inline_hdr_start;
- copysz = min_t(u64, leftlen, left);
-
- memcpy(seg - size_of_inl_hdr_start, pdata, copysz);
-
- if (likely(copysz > size_of_inl_hdr_start)) {
- seg += ALIGN(copysz - size_of_inl_hdr_start, 16);
- *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16;
- }
-
- if (unlikely(copysz < left)) { /* the last wqe in the queue */
- seg = mlx5_get_send_wqe(qp, 0);
- left -= copysz;
- pdata += copysz;
- memcpy(seg, pdata, left);
- seg += ALIGN(left, 16);
- *size += ALIGN(left, 16) / 16;
- }
- }
-
- return seg;
-}
-
-static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
- struct ib_send_wr *wr)
-{
- memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
- dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
- dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
-}
-
-static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
-{
- dseg->byte_count = cpu_to_be32(sg->length);
- dseg->lkey = cpu_to_be32(sg->lkey);
- dseg->addr = cpu_to_be64(sg->addr);
-}
-
-static __be16 get_klm_octo(int npages)
-{
- return cpu_to_be16(ALIGN(npages, 8) / 2);
-}
-
-static __be64 frwr_mkey_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_LEN |
- MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR |
- MLX5_MKEY_MASK_EN_RINVAL |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_LR |
- MLX5_MKEY_MASK_LW |
- MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW |
- MLX5_MKEY_MASK_A |
- MLX5_MKEY_MASK_SMALL_FENCE |
- MLX5_MKEY_MASK_FREE;
-
- return cpu_to_be64(result);
-}
-
-static __be64 sig_mkey_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_LEN |
- MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR |
- MLX5_MKEY_MASK_EN_SIGERR |
- MLX5_MKEY_MASK_EN_RINVAL |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_LR |
- MLX5_MKEY_MASK_LW |
- MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW |
- MLX5_MKEY_MASK_SMALL_FENCE |
- MLX5_MKEY_MASK_FREE |
- MLX5_MKEY_MASK_BSF_EN;
-
- return cpu_to_be64(result);
-}
-
-static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct mlx5_ib_mr *mr)
-{
- int ndescs = mr->ndescs;
-
- memset(umr, 0, sizeof(*umr));
-
- if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
- /* KLMs take twice the size of MTTs */
- ndescs *= 2;
-
- umr->flags = MLX5_UMR_CHECK_NOT_FREE;
- umr->klm_octowords = get_klm_octo(ndescs);
- umr->mkey_mask = frwr_mkey_mask();
-}
-
-static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
-{
- memset(umr, 0, sizeof(*umr));
- umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
- umr->flags = MLX5_UMR_INLINE;
-}
-
-static __be64 get_umr_reg_mr_mask(int atomic)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_LEN |
- MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR |
- MLX5_MKEY_MASK_PD |
- MLX5_MKEY_MASK_LR |
- MLX5_MKEY_MASK_LW |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW |
- MLX5_MKEY_MASK_FREE;
-
- if (atomic)
- result |= MLX5_MKEY_MASK_A;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_unreg_mr_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_FREE;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_mtt_mask(void)
-{
- u64 result;
+ MLX5_SET(dctc, dctc, ece, ucmd->ece_options);
- result = MLX5_MKEY_MASK_FREE;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_translation_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_LEN |
- MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_FREE;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_access_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_LW |
- MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW |
- MLX5_MKEY_MASK_A |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_FREE;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_update_pd_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_PD |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_FREE;
-
- return cpu_to_be64(result);
-}
-
-static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct ib_send_wr *wr, int atomic)
-{
- struct mlx5_umr_wr *umrwr = umr_wr(wr);
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ u16 set_id;
- memset(umr, 0, sizeof(*umr));
+ required |= IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
+ if (!is_valid_mask(attr_mask, required, 0))
+ return -EINVAL;
- if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
- umr->flags = MLX5_UMR_CHECK_FREE; /* fail if free */
- else
- umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */
-
- if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
- umr->klm_octowords = get_klm_octo(umrwr->npages);
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT) {
- umr->mkey_mask = get_umr_update_mtt_mask();
- umr->bsf_octowords = get_klm_octo(umrwr->target.offset);
- umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
+ if (attr->port_num == 0 ||
+ attr->port_num > dev->num_ports) {
+ mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
+ attr->port_num, dev->num_ports);
+ return -EINVAL;
}
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
- umr->mkey_mask |= get_umr_update_translation_mask();
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_ACCESS)
- umr->mkey_mask |= get_umr_update_access_mask();
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD)
- umr->mkey_mask |= get_umr_update_pd_mask();
- if (!umr->mkey_mask)
- umr->mkey_mask = get_umr_reg_mr_mask(atomic);
- } else {
- umr->mkey_mask = get_umr_unreg_mr_mask();
- }
-
- if (!wr->num_sge)
- umr->flags |= MLX5_UMR_INLINE;
-}
-
-static u8 get_umr_flags(int acc)
-{
- return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
- (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
- (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
- (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
- MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
-}
-
-static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
- struct mlx5_ib_mr *mr,
- u32 key, int access)
-{
- int ndescs = ALIGN(mr->ndescs, 8) >> 1;
-
- memset(seg, 0, sizeof(*seg));
-
- if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT)
- seg->log2_page_size = ilog2(mr->ibmr.page_size);
- else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
- /* KLMs take twice the size of MTTs */
- ndescs *= 2;
-
- seg->flags = get_umr_flags(access) | mr->access_mode;
- seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
- seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
- seg->start_addr = cpu_to_be64(mr->ibmr.iova);
- seg->len = cpu_to_be64(mr->ibmr.length);
- seg->xlt_oct_size = cpu_to_be32(ndescs);
-}
-
-static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
-{
- memset(seg, 0, sizeof(*seg));
- seg->status = MLX5_MKEY_STATUS_FREE;
-}
-
-static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
-{
- struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- memset(seg, 0, sizeof(*seg));
- if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
- seg->status = MLX5_MKEY_STATUS_FREE;
- return;
- }
-
- seg->flags = convert_access(umrwr->access_flags);
- if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) {
- if (umrwr->pd)
- seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
- seg->start_addr = cpu_to_be64(umrwr->target.virt_addr);
- }
- seg->len = cpu_to_be64(umrwr->length);
- seg->log2_page_size = umrwr->page_shift;
- seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
- mlx5_mkey_variant(umrwr->mkey));
-}
-
-static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
- struct mlx5_ib_mr *mr,
- struct mlx5_ib_pd *pd)
-{
- int bcount = mr->desc_size * mr->ndescs;
-
- dseg->addr = cpu_to_be64(mr->desc_map);
- dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
- dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
-}
-
-static __be32 send_ieth(struct ib_send_wr *wr)
-{
- switch (wr->opcode) {
- case IB_WR_SEND_WITH_IMM:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- return wr->ex.imm_data;
-
- case IB_WR_SEND_WITH_INV:
- return cpu_to_be32(wr->ex.invalidate_rkey);
-
- default:
- return 0;
- }
-}
-
-static u8 calc_sig(void *wqe, int size)
-{
- u8 *p = wqe;
- u8 res = 0;
- int i;
-
- for (i = 0; i < size; i++)
- res ^= p[i];
-
- return ~res;
-}
-
-static u8 wq_sig(void *wqe)
-{
- return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
-}
-
-static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
- void *wqe, int *sz)
-{
- struct mlx5_wqe_inline_seg *seg;
- void *qend = qp->sq.qend;
- void *addr;
- int inl = 0;
- int copy;
- int len;
- int i;
-
- seg = wqe;
- wqe += sizeof(*seg);
- for (i = 0; i < wr->num_sge; i++) {
- addr = (void *)(unsigned long)(wr->sg_list[i].addr);
- len = wr->sg_list[i].length;
- inl += len;
-
- if (unlikely(inl > qp->max_inline_data))
- return -ENOMEM;
-
- if (unlikely(wqe + len > qend)) {
- copy = qend - wqe;
- memcpy(wqe, addr, copy);
- addr += copy;
- len -= copy;
- wqe = mlx5_get_send_wqe(qp, 0);
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
+ MLX5_SET(dctc, dctc, rre, 1);
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
+ MLX5_SET(dctc, dctc, rwe, 1);
+ if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
+ int atomic_mode;
+
+ atomic_mode = get_atomic_mode(dev, qp);
+ if (atomic_mode < 0)
+ return -EOPNOTSUPP;
+
+ MLX5_SET(dctc, dctc, atomic_mode, atomic_mode);
+ MLX5_SET(dctc, dctc, rae, 1);
}
- memcpy(wqe, addr, len);
- wqe += len;
- }
-
- seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
-
- *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
-
- return 0;
-}
-
-static u16 prot_field_size(enum ib_signature_type type)
-{
- switch (type) {
- case IB_SIG_TYPE_T10_DIF:
- return MLX5_DIF_SIZE;
- default:
- return 0;
- }
-}
-
-static u8 bs_selector(int block_size)
-{
- switch (block_size) {
- case 512: return 0x1;
- case 520: return 0x2;
- case 4096: return 0x3;
- case 4160: return 0x4;
- case 1073741824: return 0x5;
- default: return 0;
- }
-}
-
-static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
- struct mlx5_bsf_inl *inl)
-{
- /* Valid inline section and allow BSF refresh */
- inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
- MLX5_BSF_REFRESH_DIF);
- inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
- inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
- /* repeating block */
- inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
- inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
- MLX5_DIF_CRC : MLX5_DIF_IPCS;
-
- if (domain->sig.dif.ref_remap)
- inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG;
-
- if (domain->sig.dif.app_escape) {
- if (domain->sig.dif.ref_escape)
- inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE;
+ MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index);
+ if (mlx5_lag_is_active(dev->mdev))
+ MLX5_SET(dctc, dctc, port,
+ get_tx_affinity_rr(dev, udata));
else
- inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE;
- }
+ MLX5_SET(dctc, dctc, port, attr->port_num);
- inl->dif_app_bitmask_check =
- cpu_to_be16(domain->sig.dif.apptag_check_mask);
-}
-
-static int mlx5_set_bsf(struct ib_mr *sig_mr,
- struct ib_sig_attrs *sig_attrs,
- struct mlx5_bsf *bsf, u32 data_size)
-{
- struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
- struct mlx5_bsf_basic *basic = &bsf->basic;
- struct ib_sig_domain *mem = &sig_attrs->mem;
- struct ib_sig_domain *wire = &sig_attrs->wire;
-
- memset(bsf, 0, sizeof(*bsf));
-
- /* Basic + Extended + Inline */
- basic->bsf_size_sbs = 1 << 7;
- /* Input domain check byte mask */
- basic->check_byte_mask = sig_attrs->check_mask;
- basic->raw_data_size = cpu_to_be32(data_size);
-
- /* Memory domain */
- switch (sig_attrs->mem.sig_type) {
- case IB_SIG_TYPE_NONE:
- break;
- case IB_SIG_TYPE_T10_DIF:
- basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
- basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
- mlx5_fill_inl_bsf(mem, &bsf->m_inl);
- break;
- default:
- return -EINVAL;
- }
-
- /* Wire domain */
- switch (sig_attrs->wire.sig_type) {
- case IB_SIG_TYPE_NONE:
- break;
- case IB_SIG_TYPE_T10_DIF:
- if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
- mem->sig_type == wire->sig_type) {
- /* Same block structure */
- basic->bsf_size_sbs |= 1 << 4;
- if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
- basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
- if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
- basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
- if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
- basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
- } else
- basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
-
- basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
- mlx5_fill_inl_bsf(wire, &bsf->w_inl);
- break;
- default:
- return -EINVAL;
- }
+ set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1);
+ MLX5_SET(dctc, dctc, counter_set_id, set_id);
- return 0;
-}
+ qp->port = attr->port_num;
+ } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
+ struct mlx5_ib_modify_qp_resp resp = {};
+ u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {};
+ u32 min_resp_len = offsetofend(typeof(resp), dctn);
-static int set_sig_data_segment(struct ib_sig_handover_wr *wr,
- struct mlx5_ib_qp *qp, void **seg, int *size)
-{
- struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
- struct ib_mr *sig_mr = wr->sig_mr;
- struct mlx5_bsf *bsf;
- u32 data_len = wr->wr.sg_list->length;
- u32 data_key = wr->wr.sg_list->lkey;
- u64 data_va = wr->wr.sg_list->addr;
- int ret;
- int wqe_size;
+ if (udata->outlen < min_resp_len)
+ return -EINVAL;
+ /*
+ * If we don't have enough space for the ECE options,
+ * simply indicate it with resp.response_length.
+ */
+ resp.response_length = (udata->outlen < sizeof(resp)) ?
+ min_resp_len :
+ sizeof(resp);
- if (!wr->prot ||
- (data_key == wr->prot->lkey &&
- data_va == wr->prot->addr &&
- data_len == wr->prot->length)) {
- /**
- * Source domain doesn't contain signature information
- * or data and protection are interleaved in memory.
- * So need construct:
- * ------------------
- * | data_klm |
- * ------------------
- * | BSF |
- * ------------------
- **/
- struct mlx5_klm *data_klm = *seg;
-
- data_klm->bcount = cpu_to_be32(data_len);
- data_klm->key = cpu_to_be32(data_key);
- data_klm->va = cpu_to_be64(data_va);
- wqe_size = ALIGN(sizeof(*data_klm), 64);
- } else {
- /**
- * Source domain contains signature information
- * So need construct a strided block format:
- * ---------------------------
- * | stride_block_ctrl |
- * ---------------------------
- * | data_klm |
- * ---------------------------
- * | prot_klm |
- * ---------------------------
- * | BSF |
- * ---------------------------
- **/
- struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
- struct mlx5_stride_block_entry *data_sentry;
- struct mlx5_stride_block_entry *prot_sentry;
- u32 prot_key = wr->prot->lkey;
- u64 prot_va = wr->prot->addr;
- u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
- int prot_size;
-
- sblock_ctrl = *seg;
- data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
- prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
-
- prot_size = prot_field_size(sig_attrs->mem.sig_type);
- if (!prot_size) {
- pr_err("Bad block size given: %u\n", block_size);
+ required |= IB_QP_MIN_RNR_TIMER | IB_QP_AV | IB_QP_PATH_MTU;
+ if (!is_valid_mask(attr_mask, required, 0))
return -EINVAL;
+ MLX5_SET(dctc, dctc, min_rnr_nak, attr->min_rnr_timer);
+ MLX5_SET(dctc, dctc, tclass, attr->ah_attr.grh.traffic_class);
+ MLX5_SET(dctc, dctc, flow_label, attr->ah_attr.grh.flow_label);
+ MLX5_SET(dctc, dctc, mtu, attr->path_mtu);
+ MLX5_SET(dctc, dctc, my_addr_index, attr->ah_attr.grh.sgid_index);
+ MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit);
+ if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
+ MLX5_SET(dctc, dctc, eth_prio, attr->ah_attr.sl & 0x7);
+ if (qp->is_ooo_rq) {
+ MLX5_SET(dctc, dctc, dp_ordering_1, 1);
+ MLX5_SET(dctc, dctc, dp_ordering_force, 1);
}
- sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
- prot_size);
- sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
- sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
- sblock_ctrl->num_entries = cpu_to_be16(2);
-
- data_sentry->bcount = cpu_to_be16(block_size);
- data_sentry->key = cpu_to_be32(data_key);
- data_sentry->va = cpu_to_be64(data_va);
- data_sentry->stride = cpu_to_be16(block_size);
- prot_sentry->bcount = cpu_to_be16(prot_size);
- prot_sentry->key = cpu_to_be32(prot_key);
- prot_sentry->va = cpu_to_be64(prot_va);
- prot_sentry->stride = cpu_to_be16(prot_size);
-
- wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
- sizeof(*prot_sentry), 64);
- }
-
- *seg += wqe_size;
- *size += wqe_size / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
-
- bsf = *seg;
- ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
- if (ret)
- return -EINVAL;
-
- *seg += sizeof(*bsf);
- *size += sizeof(*bsf) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
-
- return 0;
-}
-
-static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
- struct ib_sig_handover_wr *wr, u32 nelements,
- u32 length, u32 pdn)
-{
- struct ib_mr *sig_mr = wr->sig_mr;
- u32 sig_key = sig_mr->rkey;
- u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
-
- memset(seg, 0, sizeof(*seg));
-
- seg->flags = get_umr_flags(wr->access_flags) |
- MLX5_MKC_ACCESS_MODE_KLMS;
- seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
- seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
- MLX5_MKEY_BSF_EN | pdn);
- seg->len = cpu_to_be64(length);
- seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements)));
- seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
-}
-
-static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
- u32 nelements)
-{
- memset(umr, 0, sizeof(*umr));
-
- umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
- umr->klm_octowords = get_klm_octo(nelements);
- umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
- umr->mkey_mask = sig_mkey_mask();
-}
-
-
-static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp,
- void **seg, int *size)
-{
- struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
- struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
- u32 pdn = get_pd(qp)->pdn;
- u32 klm_oct_size;
- int region_len, ret;
-
- if (unlikely(wr->wr.num_sge != 1) ||
- unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) ||
- unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
- unlikely(!sig_mr->sig->sig_status_checked))
+ err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in,
+ MLX5_ST_SZ_BYTES(create_dct_in), out,
+ sizeof(out));
+ err = mlx5_cmd_check(dev->mdev, err, qp->dct.in, out);
+ if (err)
+ return err;
+ resp.dctn = qp->dct.mdct.mqp.qpn;
+ if (MLX5_CAP_GEN(dev->mdev, ece_support))
+ resp.ece_options = MLX5_GET(create_dct_out, out, ece);
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
+ if (err) {
+ mlx5_core_destroy_dct(dev, &qp->dct.mdct);
+ return err;
+ }
+ } else {
+ mlx5_ib_warn(dev, "Modify DCT: Invalid transition from %d to %d\n", cur_state, new_state);
return -EINVAL;
-
- /* length of the protected region, data + protection */
- region_len = wr->wr.sg_list->length;
- if (wr->prot &&
- (wr->prot->lkey != wr->wr.sg_list->lkey ||
- wr->prot->addr != wr->wr.sg_list->addr ||
- wr->prot->length != wr->wr.sg_list->length))
- region_len += wr->prot->length;
-
- /**
- * KLM octoword size - if protection was provided
- * then we use strided block format (3 octowords),
- * else we use single KLM (1 octoword)
- **/
- klm_oct_size = wr->prot ? 3 : 1;
-
- set_sig_umr_segment(*seg, klm_oct_size);
- *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
- *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
-
- set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn);
- *seg += sizeof(struct mlx5_mkey_seg);
- *size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
-
- ret = set_sig_data_segment(wr, qp, seg, size);
- if (ret)
- return ret;
-
- sig_mr->sig->sig_status_checked = false;
- return 0;
-}
-
-static int set_psv_wr(struct ib_sig_domain *domain,
- u32 psv_idx, void **seg, int *size)
-{
- struct mlx5_seg_set_psv *psv_seg = *seg;
-
- memset(psv_seg, 0, sizeof(*psv_seg));
- psv_seg->psv_num = cpu_to_be32(psv_idx);
- switch (domain->sig_type) {
- case IB_SIG_TYPE_NONE:
- break;
- case IB_SIG_TYPE_T10_DIF:
- psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
- domain->sig.dif.app_tag);
- psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
- break;
- default:
- pr_err("Bad signature type given.\n");
- return 1;
}
- *seg += sizeof(*psv_seg);
- *size += sizeof(*psv_seg) / 16;
-
+ qp->state = new_state;
return 0;
}
-static int set_reg_wr(struct mlx5_ib_qp *qp,
- struct ib_reg_wr *wr,
- void **seg, int *size)
+static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp)
{
- struct mlx5_ib_mr *mr = to_mmr(wr->mr);
- struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
-
- if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
- mlx5_ib_warn(to_mdev(qp->ibqp.device),
- "Invalid IB_SEND_INLINE send flag\n");
- return -EINVAL;
- }
-
- set_reg_umr_seg(*seg, mr);
- *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
- *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
-
- set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
- *seg += sizeof(struct mlx5_mkey_seg);
- *size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ if (dev->profile != &raw_eth_profile)
+ return true;
- set_reg_data_seg(*seg, mr, pd);
- *seg += sizeof(struct mlx5_wqe_data_seg);
- *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
+ if (qp->type == IB_QPT_RAW_PACKET || qp->type == MLX5_IB_QPT_REG_UMR)
+ return true;
- return 0;
-}
-
-static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size)
-{
- set_linv_umr_seg(*seg);
- *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
- *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
- set_linv_mkey_seg(*seg);
- *seg += sizeof(struct mlx5_mkey_seg);
- *size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ return false;
}
-static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
+static int validate_rd_atomic(struct mlx5_ib_dev *dev, struct ib_qp_attr *attr,
+ int attr_mask, enum ib_qp_type qp_type)
{
- __be32 *p = NULL;
- int tidx = idx;
- int i, j;
+ int log_max_ra_res;
+ int log_max_ra_req;
- pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
- for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
- if ((i & 0xf) == 0) {
- void *buf = mlx5_get_send_wqe(qp, tidx);
- tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
- p = buf;
- j = 0;
- }
- pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
- be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
- be32_to_cpu(p[j + 3]));
+ if (qp_type == MLX5_IB_QPT_DCI) {
+ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_max_ra_res_dc);
+ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_max_ra_req_dc);
+ } else {
+ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_max_ra_res_qp);
+ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev,
+ log_max_ra_req_qp);
}
-}
-static void mlx5_bf_copy(u64 __iomem *dst, u64 *src,
- unsigned bytecnt, struct mlx5_ib_qp *qp)
-{
- while (bytecnt > 0) {
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- bytecnt -= 64;
- if (unlikely(src == qp->sq.qend))
- src = mlx5_get_send_wqe(qp, 0);
+ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+ attr->max_rd_atomic > log_max_ra_res) {
+ mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
+ attr->max_rd_atomic);
+ return false;
}
-}
-
-static u8 get_fence(u8 fence, struct ib_send_wr *wr)
-{
- if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
- wr->send_flags & IB_SEND_FENCE))
- return MLX5_FENCE_MODE_STRONG_ORDERING;
- if (unlikely(fence)) {
- if (wr->send_flags & IB_SEND_FENCE)
- return MLX5_FENCE_MODE_SMALL_AND_FENCE;
- else
- return fence;
- } else if (unlikely(wr->send_flags & IB_SEND_FENCE)) {
- return MLX5_FENCE_MODE_FENCE;
+ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+ attr->max_dest_rd_atomic > log_max_ra_req) {
+ mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
+ attr->max_dest_rd_atomic);
+ return false;
}
-
- return 0;
-}
-
-static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
- struct mlx5_wqe_ctrl_seg **ctrl,
- struct ib_send_wr *wr, unsigned *idx,
- int *size, int nreq)
-{
- if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
- return -ENOMEM;
-
- *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
- *seg = mlx5_get_send_wqe(qp, *idx);
- *ctrl = *seg;
- *(uint32_t *)(*seg + 8) = 0;
- (*ctrl)->imm = send_ieth(wr);
- (*ctrl)->fm_ce_se = qp->sq_signal_bits |
- (wr->send_flags & IB_SEND_SIGNALED ?
- MLX5_WQE_CTRL_CQ_UPDATE : 0) |
- (wr->send_flags & IB_SEND_SOLICITED ?
- MLX5_WQE_CTRL_SOLICITED : 0);
-
- *seg += sizeof(**ctrl);
- *size = sizeof(**ctrl) / 16;
-
- return 0;
-}
-
-static void finish_wqe(struct mlx5_ib_qp *qp,
- struct mlx5_wqe_ctrl_seg *ctrl,
- u8 size, unsigned idx, u64 wr_id,
- int nreq, u8 fence, u8 next_fence,
- u32 mlx5_opcode)
-{
- u8 opmod = 0;
-
- ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
- mlx5_opcode | ((u32)opmod << 24));
- ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
- ctrl->fm_ce_se |= fence;
- qp->fm_cache = next_fence;
- if (unlikely(qp->wq_sig))
- ctrl->signature = wq_sig(ctrl);
-
- qp->sq.wrid[idx] = wr_id;
- qp->sq.w_list[idx].opcode = mlx5_opcode;
- qp->sq.wqe_head[idx] = qp->sq.head + nreq;
- qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
- qp->sq.w_list[idx].next = qp->sq.cur_post;
+ return true;
}
-
-int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
+int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
{
- struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx5_core_dev *mdev = dev->mdev;
- struct mlx5_ib_qp *qp;
- struct mlx5_ib_mr *mr;
- struct mlx5_wqe_data_seg *dpseg;
- struct mlx5_wqe_xrc_seg *xrc;
- struct mlx5_bf *bf;
- int uninitialized_var(size);
- void *qend;
- unsigned long flags;
- unsigned idx;
- int err = 0;
- int inl = 0;
- int num_sge;
- void *seg;
- int nreq;
- int i;
- u8 next_fence = 0;
- u8 fence;
-
- if (unlikely(ibqp->qp_type == IB_QPT_GSI))
- return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
-
- qp = to_mqp(ibqp);
- bf = qp->bf;
- qend = qp->sq.qend;
-
- spin_lock_irqsave(&qp->sq.lock, flags);
-
- if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
- err = -EIO;
- *bad_wr = wr;
- nreq = 0;
- goto out;
- }
-
- for (nreq = 0; wr; nreq++, wr = wr->next) {
- if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
- mlx5_ib_warn(dev, "\n");
- err = -EINVAL;
- *bad_wr = wr;
- goto out;
- }
-
- fence = qp->fm_cache;
- num_sge = wr->num_sge;
- if (unlikely(num_sge > qp->sq.max_gs)) {
- mlx5_ib_warn(dev, "\n");
- err = -EINVAL;
- *bad_wr = wr;
- goto out;
- }
-
- err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- err = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- switch (ibqp->qp_type) {
- case IB_QPT_XRC_INI:
- xrc = seg;
- seg += sizeof(*xrc);
- size += sizeof(*xrc) / 16;
- /* fall through */
- case IB_QPT_RC:
- switch (wr->opcode) {
- case IB_WR_RDMA_READ:
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
- rdma_wr(wr)->rkey);
- seg += sizeof(struct mlx5_wqe_raddr_seg);
- size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
- break;
-
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
- mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
- err = -ENOSYS;
- *bad_wr = wr;
- goto out;
-
- case IB_WR_LOCAL_INV:
- next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
- qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
- ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
- set_linv_wr(qp, &seg, &size);
- num_sge = 0;
- break;
-
- case IB_WR_REG_MR:
- next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
- qp->sq.wr_data[idx] = IB_WR_REG_MR;
- ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
- err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
- if (err) {
- *bad_wr = wr;
- goto out;
- }
- num_sge = 0;
- break;
-
- case IB_WR_REG_SIG_MR:
- qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR;
- mr = to_mmr(sig_handover_wr(wr)->sig_mr);
-
- ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
- err = set_sig_umr_wr(wr, qp, &seg, &size);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- *bad_wr = wr;
- goto out;
- }
-
- finish_wqe(qp, ctrl, size, idx, wr->wr_id,
- nreq, get_fence(fence, wr),
- next_fence, MLX5_OPCODE_UMR);
- /*
- * SET_PSV WQEs are not signaled and solicited
- * on error
- */
- wr->send_flags &= ~IB_SEND_SIGNALED;
- wr->send_flags |= IB_SEND_SOLICITED;
- err = begin_wqe(qp, &seg, &ctrl, wr,
- &idx, &size, nreq);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- err = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem,
- mr->sig->psv_memory.psv_idx, &seg,
- &size);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- *bad_wr = wr;
- goto out;
- }
-
- finish_wqe(qp, ctrl, size, idx, wr->wr_id,
- nreq, get_fence(fence, wr),
- next_fence, MLX5_OPCODE_SET_PSV);
- err = begin_wqe(qp, &seg, &ctrl, wr,
- &idx, &size, nreq);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- err = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
- err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire,
- mr->sig->psv_wire.psv_idx, &seg,
- &size);
- if (err) {
- mlx5_ib_warn(dev, "\n");
- *bad_wr = wr;
- goto out;
- }
-
- finish_wqe(qp, ctrl, size, idx, wr->wr_id,
- nreq, get_fence(fence, wr),
- next_fence, MLX5_OPCODE_SET_PSV);
- num_sge = 0;
- goto skip_psv;
-
- default:
- break;
- }
- break;
-
- case IB_QPT_UC:
- switch (wr->opcode) {
- case IB_WR_RDMA_WRITE:
- case IB_WR_RDMA_WRITE_WITH_IMM:
- set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
- rdma_wr(wr)->rkey);
- seg += sizeof(struct mlx5_wqe_raddr_seg);
- size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
- break;
-
- default:
- break;
- }
- break;
-
- case IB_QPT_SMI:
- case MLX5_IB_QPT_HW_GSI:
- set_datagram_seg(seg, wr);
- seg += sizeof(struct mlx5_wqe_datagram_seg);
- size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
- break;
- case IB_QPT_UD:
- set_datagram_seg(seg, wr);
- seg += sizeof(struct mlx5_wqe_datagram_seg);
- size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
+ struct mlx5_ib_modify_qp_resp resp = {};
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_ib_modify_qp ucmd = {};
+ enum ib_qp_type qp_type;
+ enum ib_qp_state cur_state, new_state;
+ int err = -EINVAL;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ if (!mlx5_ib_modify_qp_allowed(dev, qp))
+ return -EOPNOTSUPP;
- /* handle qp that supports ud offload */
- if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
- struct mlx5_wqe_eth_pad *pad;
+ if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT))
+ return -EOPNOTSUPP;
- pad = seg;
- memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
- seg += sizeof(struct mlx5_wqe_eth_pad);
- size += sizeof(struct mlx5_wqe_eth_pad) / 16;
+ if (ibqp->rwq_ind_tbl)
+ return -ENOSYS;
- seg = set_eth_seg(seg, wr, qend, qp, &size);
+ if (udata && udata->inlen) {
+ if (udata->inlen < offsetofend(typeof(ucmd), ece_options))
+ return -EINVAL;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
- }
- break;
- case MLX5_IB_QPT_REG_UMR:
- if (wr->opcode != MLX5_IB_WR_UMR) {
- err = -EINVAL;
- mlx5_ib_warn(dev, "bad opcode\n");
- goto out;
- }
- qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
- ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey);
- set_reg_umr_segment(seg, wr, !!(MLX5_CAP_GEN(mdev, atomic)));
- seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
- size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
- set_reg_mkey_segment(seg, wr);
- seg += sizeof(struct mlx5_mkey_seg);
- size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
- break;
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd)))
+ return -EOPNOTSUPP;
- default:
- break;
- }
+ if (ib_copy_from_udata(&ucmd, udata,
+ min(udata->inlen, sizeof(ucmd))))
+ return -EFAULT;
- if (wr->send_flags & IB_SEND_INLINE && num_sge) {
- int uninitialized_var(sz);
+ if (ucmd.comp_mask & ~MLX5_IB_MODIFY_QP_OOO_DP ||
+ memchr_inv(&ucmd.burst_info.reserved, 0,
+ sizeof(ucmd.burst_info.reserved)))
+ return -EOPNOTSUPP;
- err = set_data_inl_seg(qp, wr, seg, &sz);
- if (unlikely(err)) {
- mlx5_ib_warn(dev, "\n");
- *bad_wr = wr;
- goto out;
- }
- inl = 1;
- size += sz;
- } else {
- dpseg = seg;
- for (i = 0; i < num_sge; i++) {
- if (unlikely(dpseg == qend)) {
- seg = mlx5_get_send_wqe(qp, 0);
- dpseg = seg;
- }
- if (likely(wr->sg_list[i].length)) {
- set_data_ptr_seg(dpseg, wr->sg_list + i);
- size += sizeof(struct mlx5_wqe_data_seg) / 16;
- dpseg++;
- }
- }
+ if (ucmd.comp_mask & MLX5_IB_MODIFY_QP_OOO_DP) {
+ if (!get_dp_ooo_cap(dev->mdev, qp->type))
+ return -EOPNOTSUPP;
+ qp->is_ooo_rq = 1;
}
-
- finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
- get_fence(fence, wr), next_fence,
- mlx5_ib_opcode[wr->opcode]);
-skip_psv:
- if (0)
- dump_wqe(qp, idx, size);
}
-out:
- if (likely(nreq)) {
- qp->sq.head += nreq;
+ if (qp->type == IB_QPT_GSI)
+ return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
- /* Make sure that descriptors are written before
- * updating doorbell record and ringing the doorbell
- */
- wmb();
+ qp_type = (qp->type == MLX5_IB_QPT_HW_GSI) ? IB_QPT_GSI : qp->type;
- qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
+ if (qp_type == MLX5_IB_QPT_DCT)
+ return mlx5_ib_modify_dct(ibqp, attr, attr_mask, &ucmd, udata);
- /* Make sure doorbell record is visible to the HCA before
- * we hit doorbell */
- wmb();
+ mutex_lock(&qp->mutex);
- if (bf->need_lock)
- spin_lock(&bf->lock);
- else
- __acquire(&bf->lock);
+ cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
+ new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- /* TBD enable WC */
- if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) {
- mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp);
- /* wc_wmb(); */
- } else {
- mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset,
- MLX5_GET_DOORBELL_LOCK(&bf->lock32));
- /* Make sure doorbells don't leak out of SQ spinlock
- * and reach the HCA out of order.
- */
- mmiowb();
+ if (qp->flags & IB_QP_CREATE_SOURCE_QPN) {
+ if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) {
+ mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n",
+ attr_mask);
+ goto out;
}
- bf->offset ^= bf->buf_size;
- if (bf->need_lock)
- spin_unlock(&bf->lock);
- else
- __release(&bf->lock);
+ } else if (qp_type != MLX5_IB_QPT_REG_UMR &&
+ qp_type != MLX5_IB_QPT_DCI &&
+ !ib_modify_qp_is_ok(cur_state, new_state, qp_type,
+ attr_mask)) {
+ mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
+ cur_state, new_state, qp->type, attr_mask);
+ goto out;
+ } else if (qp_type == MLX5_IB_QPT_DCI &&
+ !modify_dci_qp_is_ok(cur_state, new_state, attr_mask)) {
+ mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
+ cur_state, new_state, qp_type, attr_mask);
+ goto out;
}
- spin_unlock_irqrestore(&qp->sq.lock, flags);
-
- return err;
-}
-
-static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
-{
- sig->signature = calc_sig(sig, size);
-}
-
-int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
-{
- struct mlx5_ib_qp *qp = to_mqp(ibqp);
- struct mlx5_wqe_data_seg *scat;
- struct mlx5_rwqe_sig *sig;
- struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx5_core_dev *mdev = dev->mdev;
- unsigned long flags;
- int err = 0;
- int nreq;
- int ind;
- int i;
-
- if (unlikely(ibqp->qp_type == IB_QPT_GSI))
- return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
-
- spin_lock_irqsave(&qp->rq.lock, flags);
-
- if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
- err = -EIO;
- *bad_wr = wr;
- nreq = 0;
+ if ((attr_mask & IB_QP_PORT) &&
+ (attr->port_num == 0 ||
+ attr->port_num > dev->num_ports)) {
+ mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
+ attr->port_num, dev->num_ports);
goto out;
}
- ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
-
- for (nreq = 0; wr; nreq++, wr = wr->next) {
- if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
- err = -ENOMEM;
- *bad_wr = wr;
- goto out;
- }
-
- if (unlikely(wr->num_sge > qp->rq.max_gs)) {
- err = -EINVAL;
- *bad_wr = wr;
- goto out;
- }
-
- scat = get_recv_wqe(qp, ind);
- if (qp->wq_sig)
- scat++;
-
- for (i = 0; i < wr->num_sge; i++)
- set_data_ptr_seg(scat + i, wr->sg_list + i);
-
- if (i < qp->rq.max_gs) {
- scat[i].byte_count = 0;
- scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
- scat[i].addr = 0;
- }
-
- if (qp->wq_sig) {
- sig = (struct mlx5_rwqe_sig *)scat;
- set_sig_seg(sig, (qp->rq.max_gs + 1) << 2);
- }
-
- qp->rq.wrid[ind] = wr->wr_id;
-
- ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
+ if ((attr_mask & IB_QP_PKEY_INDEX) &&
+ attr->pkey_index >= dev->pkey_table_len) {
+ mlx5_ib_dbg(dev, "invalid pkey index %d\n", attr->pkey_index);
+ goto out;
}
-out:
- if (likely(nreq)) {
- qp->rq.head += nreq;
-
- /* Make sure that descriptors are written before
- * doorbell record.
- */
- wmb();
+ if (!validate_rd_atomic(dev, attr, attr_mask, qp_type))
+ goto out;
- *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
+ if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+ err = 0;
+ goto out;
}
- spin_unlock_irqrestore(&qp->rq.lock, flags);
+ err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state,
+ new_state, &ucmd, &resp, udata);
+
+ /* resp.response_length is set in ECE supported flows only */
+ if (!err && resp.response_length &&
+ udata->outlen >= resp.response_length)
+ /* Return -EFAULT to the user and expect him to destroy QP. */
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
+out:
+ mutex_unlock(&qp->mutex);
return err;
}
@@ -4310,47 +4821,34 @@ static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state)
}
}
-static int to_ib_qp_access_flags(int mlx5_flags)
+static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev,
+ struct rdma_ah_attr *ah_attr, void *path)
{
- int ib_flags = 0;
+ int port = MLX5_GET(ads, path, vhca_port_num);
+ int static_rate;
- if (mlx5_flags & MLX5_QP_BIT_RRE)
- ib_flags |= IB_ACCESS_REMOTE_READ;
- if (mlx5_flags & MLX5_QP_BIT_RWE)
- ib_flags |= IB_ACCESS_REMOTE_WRITE;
- if (mlx5_flags & MLX5_QP_BIT_RAE)
- ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
+ memset(ah_attr, 0, sizeof(*ah_attr));
- return ib_flags;
-}
-
-static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
- struct mlx5_qp_path *path)
-{
- struct mlx5_core_dev *dev = ibdev->mdev;
+ if (!port || port > ibdev->num_ports)
+ return;
- memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
- ib_ah_attr->port_num = path->port;
+ ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port);
- if (ib_ah_attr->port_num == 0 ||
- ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
- return;
+ rdma_ah_set_port_num(ah_attr, port);
+ rdma_ah_set_sl(ah_attr, MLX5_GET(ads, path, sl));
- ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
+ rdma_ah_set_dlid(ah_attr, MLX5_GET(ads, path, rlid));
+ rdma_ah_set_path_bits(ah_attr, MLX5_GET(ads, path, mlid));
- ib_ah_attr->dlid = be16_to_cpu(path->rlid);
- ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
- ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
- ib_ah_attr->ah_flags = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0;
- if (ib_ah_attr->ah_flags) {
- ib_ah_attr->grh.sgid_index = path->mgid_index;
- ib_ah_attr->grh.hop_limit = path->hop_limit;
- ib_ah_attr->grh.traffic_class =
- (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
- ib_ah_attr->grh.flow_label =
- be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
- memcpy(ib_ah_attr->grh.dgid.raw,
- path->rgid, sizeof(ib_ah_attr->grh.dgid.raw));
+ static_rate = MLX5_GET(ads, path, stat_rate);
+ rdma_ah_set_static_rate(ah_attr, mlx5_to_ib_rate_map(static_rate));
+ if (MLX5_GET(ads, path, grh) ||
+ ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
+ rdma_ah_set_grh(ah_attr, NULL, MLX5_GET(ads, path, flow_label),
+ MLX5_GET(ads, path, src_addr_index),
+ MLX5_GET(ads, path, hop_limit),
+ MLX5_GET(ads, path, tclass));
+ rdma_ah_set_dgid_raw(ah_attr, MLX5_ADDR_OF(ads, path, rgid_rip));
}
}
@@ -4358,26 +4856,14 @@ static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
struct mlx5_ib_sq *sq,
u8 *sq_state)
{
- void *out;
- void *sqc;
- int inlen;
int err;
- inlen = MLX5_ST_SZ_BYTES(query_sq_out);
- out = mlx5_vzalloc(inlen);
- if (!out)
- return -ENOMEM;
-
- err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out);
+ err = mlx5_core_query_sq_state(dev->mdev, sq->base.mqp.qpn, sq_state);
if (err)
goto out;
-
- sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
- *sq_state = MLX5_GET(sqc, sqc, state);
sq->state = *sq_state;
out:
- kvfree(out);
return err;
}
@@ -4391,7 +4877,7 @@ static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev,
int err;
inlen = MLX5_ST_SZ_BYTES(query_rq_out);
- out = mlx5_vzalloc(inlen);
+ out = kvzalloc(inlen, GFP_KERNEL);
if (!out)
return -ENOMEM;
@@ -4419,7 +4905,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
[MLX5_SQ_STATE_NA] = IB_QPS_RESET,
},
[MLX5_RQC_STATE_RDY] = {
- [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE,
[MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
[MLX5_SQC_STATE_ERR] = IB_QPS_SQE,
[MLX5_SQ_STATE_NA] = MLX5_QP_STATE,
@@ -4431,7 +4917,7 @@ static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
[MLX5_SQ_STATE_NA] = IB_QPS_ERR,
},
[MLX5_RQ_STATE_NA] = {
- [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE,
[MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
[MLX5_SQC_STATE_ERR] = MLX5_QP_STATE,
[MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD,
@@ -4484,66 +4970,131 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
struct ib_qp_attr *qp_attr)
{
int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
- struct mlx5_qp_context *context;
- int mlx5_state;
+ void *qpc, *pri_path, *alt_path;
u32 *outb;
- int err = 0;
+ int err;
outb = kzalloc(outlen, GFP_KERNEL);
if (!outb)
return -ENOMEM;
- err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
- outlen);
+ err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen,
+ false);
if (err)
goto out;
- /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */
- context = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, outb, qpc);
+ qpc = MLX5_ADDR_OF(query_qp_out, outb, qpc);
- mlx5_state = be32_to_cpu(context->flags) >> 28;
+ qp->state = to_ib_qp_state(MLX5_GET(qpc, qpc, state));
+ if (MLX5_GET(qpc, qpc, state) == MLX5_QP_STATE_SQ_DRAINING)
+ qp_attr->sq_draining = 1;
- qp->state = to_ib_qp_state(mlx5_state);
- qp_attr->path_mtu = context->mtu_msgmax >> 5;
- qp_attr->path_mig_state =
- to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
- qp_attr->qkey = be32_to_cpu(context->qkey);
- qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
- qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff;
- qp_attr->dest_qp_num = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff;
- qp_attr->qp_access_flags =
- to_ib_qp_access_flags(be32_to_cpu(context->params2));
+ qp_attr->path_mtu = MLX5_GET(qpc, qpc, mtu);
+ qp_attr->path_mig_state = to_ib_mig_state(MLX5_GET(qpc, qpc, pm_state));
+ qp_attr->qkey = MLX5_GET(qpc, qpc, q_key);
+ qp_attr->rq_psn = MLX5_GET(qpc, qpc, next_rcv_psn);
+ qp_attr->sq_psn = MLX5_GET(qpc, qpc, next_send_psn);
+ qp_attr->dest_qp_num = MLX5_GET(qpc, qpc, remote_qpn);
- if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
- to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
- to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
- qp_attr->alt_pkey_index =
- be16_to_cpu(context->alt_path.pkey_index);
- qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
- }
+ if (MLX5_GET(qpc, qpc, rre))
+ qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ;
+ if (MLX5_GET(qpc, qpc, rwe))
+ qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE;
+ if (MLX5_GET(qpc, qpc, rae))
+ qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_ATOMIC;
- qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index);
- qp_attr->port_num = context->pri_path.port;
+ qp_attr->max_rd_atomic = 1 << MLX5_GET(qpc, qpc, log_sra_max);
+ qp_attr->max_dest_rd_atomic = 1 << MLX5_GET(qpc, qpc, log_rra_max);
+ qp_attr->min_rnr_timer = MLX5_GET(qpc, qpc, min_rnr_nak);
+ qp_attr->retry_cnt = MLX5_GET(qpc, qpc, retry_count);
+ qp_attr->rnr_retry = MLX5_GET(qpc, qpc, rnr_retry);
- /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
- qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING;
+ pri_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
+ alt_path = MLX5_ADDR_OF(qpc, qpc, secondary_address_path);
- qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
+ if (qp->type == IB_QPT_RC || qp->type == IB_QPT_UC ||
+ qp->type == IB_QPT_XRC_INI || qp->type == IB_QPT_XRC_TGT) {
+ to_rdma_ah_attr(dev, &qp_attr->ah_attr, pri_path);
+ to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, alt_path);
+ qp_attr->alt_pkey_index = MLX5_GET(ads, alt_path, pkey_index);
+ qp_attr->alt_port_num = MLX5_GET(ads, alt_path, vhca_port_num);
+ }
- qp_attr->max_dest_rd_atomic =
- 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
- qp_attr->min_rnr_timer =
- (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
- qp_attr->timeout = context->pri_path.ackto_lt >> 3;
- qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
- qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7;
- qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3;
+ qp_attr->pkey_index = MLX5_GET(ads, pri_path, pkey_index);
+ qp_attr->port_num = MLX5_GET(ads, pri_path, vhca_port_num);
+ qp_attr->timeout = MLX5_GET(ads, pri_path, ack_timeout);
+ qp_attr->alt_timeout = MLX5_GET(ads, alt_path, ack_timeout);
out:
kfree(outb);
return err;
}
+static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp,
+ struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mlx5_core_dct *dct = &mqp->dct.mdct;
+ u32 *out;
+ u32 access_flags = 0;
+ int outlen = MLX5_ST_SZ_BYTES(query_dct_out);
+ void *dctc;
+ int err;
+ int supported_mask = IB_QP_STATE |
+ IB_QP_ACCESS_FLAGS |
+ IB_QP_PORT |
+ IB_QP_MIN_RNR_TIMER |
+ IB_QP_AV |
+ IB_QP_PATH_MTU |
+ IB_QP_PKEY_INDEX;
+
+ if (qp_attr_mask & ~supported_mask)
+ return -EINVAL;
+ if (mqp->state != IB_QPS_RTR)
+ return -EINVAL;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_dct_query(dev, dct, out, outlen);
+ if (err)
+ goto out;
+
+ dctc = MLX5_ADDR_OF(query_dct_out, out, dct_context_entry);
+
+ if (qp_attr_mask & IB_QP_STATE)
+ qp_attr->qp_state = IB_QPS_RTR;
+
+ if (qp_attr_mask & IB_QP_ACCESS_FLAGS) {
+ if (MLX5_GET(dctc, dctc, rre))
+ access_flags |= IB_ACCESS_REMOTE_READ;
+ if (MLX5_GET(dctc, dctc, rwe))
+ access_flags |= IB_ACCESS_REMOTE_WRITE;
+ if (MLX5_GET(dctc, dctc, rae))
+ access_flags |= IB_ACCESS_REMOTE_ATOMIC;
+ qp_attr->qp_access_flags = access_flags;
+ }
+
+ if (qp_attr_mask & IB_QP_PORT)
+ qp_attr->port_num = mqp->port;
+ if (qp_attr_mask & IB_QP_MIN_RNR_TIMER)
+ qp_attr->min_rnr_timer = MLX5_GET(dctc, dctc, min_rnr_nak);
+ if (qp_attr_mask & IB_QP_AV) {
+ qp_attr->ah_attr.grh.traffic_class = MLX5_GET(dctc, dctc, tclass);
+ qp_attr->ah_attr.grh.flow_label = MLX5_GET(dctc, dctc, flow_label);
+ qp_attr->ah_attr.grh.sgid_index = MLX5_GET(dctc, dctc, my_addr_index);
+ qp_attr->ah_attr.grh.hop_limit = MLX5_GET(dctc, dctc, hop_limit);
+ }
+ if (qp_attr_mask & IB_QP_PATH_MTU)
+ qp_attr->path_mtu = MLX5_GET(dctc, dctc, mtu);
+ if (qp_attr_mask & IB_QP_PKEY_INDEX)
+ qp_attr->pkey_index = MLX5_GET(dctc, dctc, pkey_index);
+out:
+ kfree(out);
+ return err;
+}
+
int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
{
@@ -4555,21 +5106,22 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
if (ibqp->rwq_ind_tbl)
return -ENOSYS;
- if (unlikely(ibqp->qp_type == IB_QPT_GSI))
+ if (qp->type == IB_QPT_GSI)
return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
qp_init_attr);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- /*
- * Wait for any outstanding page faults, in case the user frees memory
- * based upon this query's result.
- */
- flush_workqueue(mlx5_ib_page_fault_wq);
-#endif
+ /* Not all of output fields are applicable, make sure to zero them */
+ memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+ memset(qp_attr, 0, sizeof(*qp_attr));
+
+ if (unlikely(qp->type == MLX5_IB_QPT_DCT))
+ return mlx5_ib_dct_query_qp(dev, qp, qp_attr,
+ qp_attr_mask, qp_init_attr);
mutex_lock(&qp->mutex);
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ if (qp->type == IB_QPT_RAW_PACKET ||
+ qp->flags & IB_QP_CREATE_SOURCE_QPN) {
err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
if (err)
goto out;
@@ -4595,7 +5147,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_attr->cap.max_send_sge = 0;
}
- qp_init_attr->qp_type = ibqp->qp_type;
+ qp_init_attr->qp_type = qp->type;
qp_init_attr->recv_cq = ibqp->recv_cq;
qp_init_attr->send_cq = ibqp->send_cq;
qp_init_attr->srq = ibqp->srq;
@@ -4603,18 +5155,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_init_attr->cap = qp_attr->cap;
- qp_init_attr->create_flags = 0;
- if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
- qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
-
- if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
- qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL;
- if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
- qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
- if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
- qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
- if (qp->flags & MLX5_IB_QP_SQPN_QP1)
- qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1();
+ qp_init_attr->create_flags = qp->flags;
qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
@@ -4624,45 +5165,23 @@ out:
return err;
}
-struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_xrcd *xrcd;
- int err;
+ struct mlx5_ib_dev *dev = to_mdev(ibxrcd->device);
+ struct mlx5_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
if (!MLX5_CAP_GEN(dev->mdev, xrc))
- return ERR_PTR(-ENOSYS);
-
- xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
- if (!xrcd)
- return ERR_PTR(-ENOMEM);
-
- err = mlx5_core_xrcd_alloc(dev->mdev, &xrcd->xrcdn);
- if (err) {
- kfree(xrcd);
- return ERR_PTR(-ENOMEM);
- }
+ return -EOPNOTSUPP;
- return &xrcd->ibxrcd;
+ return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
}
-int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
- int err;
-
- err = mlx5_core_xrcd_dealloc(dev->mdev, xrcdn);
- if (err) {
- mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
- return err;
- }
- kfree(xrcd);
-
- return 0;
+ return mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
}
static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
@@ -4687,11 +5206,34 @@ static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
}
}
+static int set_delay_drop(struct mlx5_ib_dev *dev)
+{
+ int err = 0;
+
+ mutex_lock(&dev->delay_drop.lock);
+ if (dev->delay_drop.activate)
+ goto out;
+
+ err = mlx5_core_set_delay_drop(dev, dev->delay_drop.timeout);
+ if (err)
+ goto out;
+
+ dev->delay_drop.activate = true;
+out:
+ mutex_unlock(&dev->delay_drop.lock);
+
+ if (!err)
+ atomic_inc(&dev->delay_drop.rqs_cnt);
+ return err;
+}
+
static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
struct ib_wq_init_attr *init_attr)
{
struct mlx5_ib_dev *dev;
+ int has_net_offloads;
__be64 *rq_pas0;
+ int ts_format;
void *in;
void *rqc;
void *wq;
@@ -4700,31 +5242,102 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
dev = to_mdev(pd->device);
+ ts_format = get_rq_ts_format(dev, to_mcq(init_attr->cq));
+ if (ts_format < 0)
+ return ts_format;
+
inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas;
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
+ MLX5_SET(create_rq_in, in, uid, to_mpd(pd)->uid);
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
MLX5_SET(rqc, rqc, mem_rq_type,
MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
+ MLX5_SET(rqc, rqc, ts_format, ts_format);
MLX5_SET(rqc, rqc, user_index, rwq->user_index);
MLX5_SET(rqc, rqc, cqn, to_mcq(init_attr->cq)->mcq.cqn);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
MLX5_SET(rqc, rqc, flush_in_error_en, 1);
wq = MLX5_ADDR_OF(rqc, rqc, wq);
- MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
- MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
+ MLX5_SET(wq, wq, wq_type,
+ rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ ?
+ MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ : MLX5_WQ_TYPE_CYCLIC);
+ if (init_attr->create_flags & IB_WQ_FLAGS_PCI_WRITE_END_PADDING) {
+ if (!MLX5_CAP_GEN(dev->mdev, end_pad)) {
+ mlx5_ib_dbg(dev, "Scatter end padding is not supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ } else {
+ MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
+ }
+ }
MLX5_SET(wq, wq, log_wq_stride, rwq->log_rq_stride);
+ if (rwq->create_flags & MLX5_IB_WQ_FLAGS_STRIDING_RQ) {
+ /*
+ * In Firmware number of strides in each WQE is:
+ * "512 * 2^single_wqe_log_num_of_strides"
+ * Values 3 to 8 are accepted as 10 to 15, 9 to 18 are
+ * accepted as 0 to 9
+ */
+ static const u8 fw_map[] = { 10, 11, 12, 13, 14, 15, 0, 1,
+ 2, 3, 4, 5, 6, 7, 8, 9 };
+ MLX5_SET(wq, wq, two_byte_shift_en, rwq->two_byte_shift_en);
+ MLX5_SET(wq, wq, log_wqe_stride_size,
+ rwq->single_stride_log_num_of_bytes -
+ MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES);
+ MLX5_SET(wq, wq, log_wqe_num_of_strides,
+ fw_map[rwq->log_num_strides -
+ MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES]);
+ }
MLX5_SET(wq, wq, log_wq_sz, rwq->log_rq_size);
MLX5_SET(wq, wq, pd, to_mpd(pd)->pdn);
MLX5_SET(wq, wq, page_offset, rwq->rq_page_offset);
MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size);
MLX5_SET(wq, wq, wq_signature, rwq->wq_sig);
MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
+ has_net_offloads = MLX5_CAP_GEN(dev->mdev, eth_net_offloads);
+ if (init_attr->create_flags & IB_WQ_FLAGS_CVLAN_STRIPPING) {
+ if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
+ mlx5_ib_dbg(dev, "VLAN offloads are not supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ } else {
+ MLX5_SET(rqc, rqc, vsd, 1);
+ }
+ if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS) {
+ if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, scatter_fcs))) {
+ mlx5_ib_dbg(dev, "Scatter FCS is not supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ MLX5_SET(rqc, rqc, scatter_fcs, 1);
+ }
+ if (init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
+ if (!(dev->ib_dev.attrs.raw_packet_caps &
+ IB_RAW_PACKET_CAP_DELAY_DROP)) {
+ mlx5_ib_dbg(dev, "Delay drop is not supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ MLX5_SET(rqc, rqc, delay_drop_en, 1);
+ }
rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
- mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
- err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
+ mlx5_ib_populate_pas(rwq->umem, 1UL << rwq->page_shift, rq_pas0, 0);
+ err = mlx5_core_create_rq_tracked(dev, in, inlen, &rwq->core_qp);
+ if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
+ err = set_delay_drop(dev);
+ if (err) {
+ mlx5_ib_warn(dev, "Failed to enable delay drop err=%d\n",
+ err);
+ mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
+ } else {
+ rwq->create_flags |= MLX5_IB_WQ_FLAGS_DELAY_DROP;
+ }
+ }
+out:
kvfree(in);
return err;
}
@@ -4743,12 +5356,27 @@ static int set_user_rq_size(struct mlx5_ib_dev *dev,
rwq->wqe_count = ucmd->rq_wqe_count;
rwq->wqe_shift = ucmd->rq_wqe_shift;
- rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift);
+ if (check_shl_overflow(rwq->wqe_count, rwq->wqe_shift, &rwq->buf_size))
+ return -EINVAL;
+
rwq->log_rq_stride = rwq->wqe_shift;
rwq->log_rq_size = ilog2(rwq->wqe_count);
return 0;
}
+static bool log_of_strides_valid(struct mlx5_ib_dev *dev, u32 log_num_strides)
+{
+ if ((log_num_strides > MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES) ||
+ (log_num_strides < MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES))
+ return false;
+
+ if (!MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) &&
+ (log_num_strides < MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES))
+ return false;
+
+ return true;
+}
+
static int prepare_user_rq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata,
@@ -4759,7 +5387,8 @@ static int prepare_user_rq(struct ib_pd *pd,
int err;
size_t required_cmd_sz;
- required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
+ required_cmd_sz = offsetofend(struct mlx5_ib_create_wq,
+ single_stride_log_num_of_bytes);
if (udata->inlen < required_cmd_sz) {
mlx5_ib_dbg(dev, "invalid inlen\n");
return -EINVAL;
@@ -4777,14 +5406,41 @@ static int prepare_user_rq(struct ib_pd *pd,
return -EFAULT;
}
- if (ucmd.comp_mask) {
+ if (ucmd.comp_mask & (~MLX5_IB_CREATE_WQ_STRIDING_RQ)) {
mlx5_ib_dbg(dev, "invalid comp mask\n");
return -EOPNOTSUPP;
- }
-
- if (ucmd.reserved) {
- mlx5_ib_dbg(dev, "invalid reserved\n");
- return -EOPNOTSUPP;
+ } else if (ucmd.comp_mask & MLX5_IB_CREATE_WQ_STRIDING_RQ) {
+ if (!MLX5_CAP_GEN(dev->mdev, striding_rq)) {
+ mlx5_ib_dbg(dev, "Striding RQ is not supported\n");
+ return -EOPNOTSUPP;
+ }
+ if ((ucmd.single_stride_log_num_of_bytes <
+ MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES) ||
+ (ucmd.single_stride_log_num_of_bytes >
+ MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES)) {
+ mlx5_ib_dbg(dev, "Invalid log stride size (%u. Range is %u - %u)\n",
+ ucmd.single_stride_log_num_of_bytes,
+ MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES,
+ MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES);
+ return -EINVAL;
+ }
+ if (!log_of_strides_valid(dev,
+ ucmd.single_wqe_log_num_of_strides)) {
+ mlx5_ib_dbg(
+ dev,
+ "Invalid log num strides (%u. Range is %u - %u)\n",
+ ucmd.single_wqe_log_num_of_strides,
+ MLX5_CAP_GEN(dev->mdev, ext_stride_num_range) ?
+ MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES :
+ MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES,
+ MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES);
+ return -EINVAL;
+ }
+ rwq->single_stride_log_num_of_bytes =
+ ucmd.single_stride_log_num_of_bytes;
+ rwq->log_num_strides = ucmd.single_wqe_log_num_of_strides;
+ rwq->two_byte_shift_en = !!ucmd.two_byte_shift_en;
+ rwq->create_flags |= MLX5_IB_WQ_FLAGS_STRIDING_RQ;
}
err = set_user_rq_size(dev, init_attr, &ucmd, rwq);
@@ -4793,11 +5449,10 @@ static int prepare_user_rq(struct ib_pd *pd,
return err;
}
- err = create_user_rq(dev, pd, rwq, &ucmd);
+ err = create_user_rq(dev, pd, udata, rwq, &ucmd);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
- if (err)
- return err;
+ return err;
}
rwq->user_index = ucmd.user_index;
@@ -4817,10 +5472,14 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
if (!udata)
return ERR_PTR(-ENOSYS);
- min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+ min_resp_len = offsetofend(struct mlx5_ib_create_wq_resp, reserved);
if (udata->outlen && udata->outlen < min_resp_len)
return ERR_PTR(-EINVAL);
+ if (!capable(CAP_SYS_RAWIO) &&
+ init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP)
+ return ERR_PTR(-EPERM);
+
dev = to_mdev(pd->device);
switch (init_attr->wq_type) {
case IB_WQT_RQ:
@@ -4843,8 +5502,8 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
rwq->ibwq.wq_num = rwq->core_qp.qpn;
rwq->ibwq.state = IB_WQS_RESET;
if (udata->outlen) {
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
+ resp.response_length = offsetofend(
+ struct mlx5_ib_create_wq_resp, response_length);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto err_copy;
@@ -4855,32 +5514,35 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
return &rwq->ibwq;
err_copy:
- mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
+ mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
err_user_rq:
- destroy_user_rq(pd, rwq);
+ destroy_user_rq(dev, pd, rwq, udata);
err:
kfree(rwq);
return ERR_PTR(err);
}
-int mlx5_ib_destroy_wq(struct ib_wq *wq)
+int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(wq->device);
struct mlx5_ib_rwq *rwq = to_mrwq(wq);
+ int ret;
- mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
- destroy_user_rq(wq->pd, rwq);
+ ret = mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
+ if (ret)
+ return ret;
+ destroy_user_rq(dev, wq->pd, rwq, udata);
kfree(rwq);
-
return 0;
}
-struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
- struct ib_rwq_ind_table_init_attr *init_attr,
- struct ib_udata *udata)
+int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev = to_mdev(device);
- struct mlx5_ib_rwq_ind_table *rwq_ind_tbl;
+ struct mlx5_ib_rwq_ind_table *rwq_ind_tbl =
+ to_mrwq_ind_table(ib_rwq_ind_table);
+ struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_table->device);
int sz = 1 << init_attr->log_ind_tbl_size;
struct mlx5_ib_create_rwq_ind_tbl_resp resp = {};
size_t min_resp_len;
@@ -4893,30 +5555,25 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
if (udata->inlen > 0 &&
!ib_is_udata_cleared(udata, 0,
udata->inlen))
- return ERR_PTR(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if (init_attr->log_ind_tbl_size >
MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) {
mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n",
init_attr->log_ind_tbl_size,
MLX5_CAP_GEN(dev->mdev, log_max_rqt_size));
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
- min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+ min_resp_len =
+ offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp, reserved);
if (udata->outlen && udata->outlen < min_resp_len)
- return ERR_PTR(-EINVAL);
-
- rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL);
- if (!rwq_ind_tbl)
- return ERR_PTR(-ENOMEM);
+ return -EINVAL;
inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
- in = mlx5_vzalloc(inlen);
- if (!in) {
- err = -ENOMEM;
- goto err;
- }
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
@@ -4926,28 +5583,29 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
for (i = 0; i < sz; i++)
MLX5_SET(rqtc, rqtc, rq_num[i], init_attr->ind_tbl[i]->wq_num);
+ rwq_ind_tbl->uid = to_mpd(init_attr->ind_tbl[0]->pd)->uid;
+ MLX5_SET(create_rqt_in, in, uid, rwq_ind_tbl->uid);
+
err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn);
kvfree(in);
-
if (err)
- goto err;
+ return err;
rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn;
if (udata->outlen) {
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
+ resp.response_length =
+ offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp,
+ response_length);
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto err_copy;
}
- return &rwq_ind_tbl->ib_rwq_ind_tbl;
+ return 0;
err_copy:
- mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
-err:
- kfree(rwq_ind_tbl);
- return ERR_PTR(err);
+ mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
+ return err;
}
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
@@ -4955,10 +5613,7 @@ int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl);
struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device);
- mlx5_core_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn);
-
- kfree(rwq_ind_tbl);
- return 0;
+ return mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid);
}
int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
@@ -4975,7 +5630,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
void *rqc;
void *in;
- required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved);
+ required_cmd_sz = offsetofend(struct mlx5_ib_modify_wq, reserved);
if (udata->inlen < required_cmd_sz)
return -EINVAL;
@@ -4991,27 +5646,242 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
return -EOPNOTSUPP;
inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
- in = mlx5_vzalloc(inlen);
+ in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
- curr_wq_state = (wq_attr_mask & IB_WQ_CUR_STATE) ?
- wq_attr->curr_wq_state : wq->state;
- wq_state = (wq_attr_mask & IB_WQ_STATE) ?
- wq_attr->wq_state : curr_wq_state;
+ curr_wq_state = wq_attr->curr_wq_state;
+ wq_state = wq_attr->wq_state;
if (curr_wq_state == IB_WQS_ERR)
curr_wq_state = MLX5_RQC_STATE_ERR;
if (wq_state == IB_WQS_ERR)
wq_state = MLX5_RQC_STATE_ERR;
MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
+ MLX5_SET(modify_rq_in, in, uid, to_mpd(wq->pd)->uid);
MLX5_SET(rqc, rqc, state, wq_state);
- err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
- kvfree(in);
+ if (wq_attr_mask & IB_WQ_FLAGS) {
+ if (wq_attr->flags_mask & IB_WQ_FLAGS_CVLAN_STRIPPING) {
+ if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
+ mlx5_ib_dbg(dev, "VLAN offloads are not supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
+ MLX5_SET(rqc, rqc, vsd,
+ (wq_attr->flags & IB_WQ_FLAGS_CVLAN_STRIPPING) ? 0 : 1);
+ }
+
+ if (wq_attr->flags_mask & IB_WQ_FLAGS_PCI_WRITE_END_PADDING) {
+ mlx5_ib_dbg(dev, "Modifying scatter end padding is not supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ }
+
+ if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) {
+ u16 set_id;
+
+ set_id = mlx5_ib_get_counters_id(dev, 0);
+ if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
+ MLX5_SET(rqc, rqc, counter_set_id, set_id);
+ } else
+ dev_info_once(
+ &dev->ib_dev.dev,
+ "Receive WQ counters are not supported on current FW\n");
+ }
+
+ err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in);
if (!err)
rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
+out:
+ kvfree(in);
+ return err;
+}
+
+struct mlx5_ib_drain_cqe {
+ struct ib_cqe cqe;
+ struct completion done;
+};
+
+static void mlx5_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct mlx5_ib_drain_cqe *cqe = container_of(wc->wr_cqe,
+ struct mlx5_ib_drain_cqe,
+ cqe);
+
+ complete(&cqe->done);
+}
+
+/* This function returns only once the drained WR was completed */
+static void handle_drain_completion(struct ib_cq *cq,
+ struct mlx5_ib_drain_cqe *sdrain,
+ struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+
+ if (cq->poll_ctx == IB_POLL_DIRECT) {
+ while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ return;
+ }
+
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ struct mlx5_ib_cq *mcq = to_mcq(cq);
+ bool triggered = false;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ /* Make sure that the CQ handler won't run if wasn't run yet */
+ if (!mcq->mcq.reset_notify_added)
+ mcq->mcq.reset_notify_added = 1;
+ else
+ triggered = true;
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+
+ if (triggered) {
+ /* Wait for any scheduled/running task to be ended */
+ switch (cq->poll_ctx) {
+ case IB_POLL_SOFTIRQ:
+ irq_poll_disable(&cq->iop);
+ irq_poll_enable(&cq->iop);
+ break;
+ case IB_POLL_WORKQUEUE:
+ cancel_work_sync(&cq->work);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+ }
+
+ /* Run the CQ handler - this makes sure that the drain WR will
+ * be processed if wasn't processed yet.
+ */
+ mcq->mcq.comp(&mcq->mcq, NULL);
+ }
+
+ wait_for_completion(&sdrain->done);
+}
+
+void mlx5_ib_drain_sq(struct ib_qp *qp)
+{
+ struct ib_cq *cq = qp->send_cq;
+ struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ struct mlx5_ib_drain_cqe sdrain;
+ const struct ib_send_wr *bad_swr;
+ struct ib_rdma_wr swr = {
+ .wr = {
+ .next = NULL,
+ { .wr_cqe = &sdrain.cqe, },
+ .opcode = IB_WR_RDMA_WRITE,
+ },
+ };
+ int ret;
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+ return;
+ }
+
+ sdrain.cqe.done = mlx5_ib_drain_qp_done;
+ init_completion(&sdrain.done);
+
+ ret = mlx5_ib_post_send_drain(qp, &swr.wr, &bad_swr);
+ if (ret) {
+ WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+ return;
+ }
+
+ handle_drain_completion(cq, &sdrain, dev);
+}
+
+void mlx5_ib_drain_rq(struct ib_qp *qp)
+{
+ struct ib_cq *cq = qp->recv_cq;
+ struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ struct mlx5_ib_drain_cqe rdrain;
+ struct ib_recv_wr rwr = {};
+ const struct ib_recv_wr *bad_rwr;
+ int ret;
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+ return;
+ }
+
+ rwr.wr_cqe = &rdrain.cqe;
+ rdrain.cqe.done = mlx5_ib_drain_qp_done;
+ init_completion(&rdrain.done);
+
+ ret = mlx5_ib_post_recv_drain(qp, &rwr, &bad_rwr);
+ if (ret) {
+ WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+ return;
+ }
+
+ handle_drain_completion(cq, &rdrain, dev);
+}
+
+/*
+ * Bind a qp to a counter. If @counter is NULL then bind the qp to
+ * the default counter
+ */
+int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(qp);
+ int err = 0;
+
+ mutex_lock(&mqp->mutex);
+ if (mqp->state == IB_QPS_RESET) {
+ qp->counter = counter;
+ goto out;
+ }
+
+ if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id)) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (mqp->state == IB_QPS_RTS) {
+ err = __mlx5_ib_qp_set_counter(qp, counter);
+ if (!err)
+ qp->counter = counter;
+
+ goto out;
+ }
+
+ mqp->counter_pending = 1;
+ qp->counter = counter;
+
+out:
+ mutex_unlock(&mqp->mutex);
return err;
}
+
+int mlx5_ib_qp_event_init(void)
+{
+ mlx5_ib_qp_event_wq = alloc_ordered_workqueue("mlx5_ib_qp_event_wq", 0);
+ if (!mlx5_ib_qp_event_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mlx5_ib_qp_event_cleanup(void)
+{
+ destroy_workqueue(mlx5_ib_qp_event_wq);
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
new file mode 100644
index 000000000000..2530e7730635
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/qp.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_QP_H
+#define _MLX5_IB_QP_H
+
+struct mlx5_ib_dev;
+
+struct mlx5_qp_table {
+ struct notifier_block nb;
+ struct xarray dct_xa;
+
+ /* protect radix tree
+ */
+ spinlock_t lock;
+ struct radix_tree_root tree;
+};
+
+int mlx5_init_qp_table(struct mlx5_ib_dev *dev);
+void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev);
+
+int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *qp,
+ u32 *in, int inlen, u32 *out, int outlen);
+int mlx5_qpc_create_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
+ u32 *in, int inlen, u32 *out);
+int mlx5_core_qp_modify(struct mlx5_ib_dev *dev, u16 opcode, u32 opt_param_mask,
+ void *qpc, struct mlx5_core_qp *qp, u32 *ece);
+int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp);
+int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct);
+int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
+ u32 *out, int outlen, bool qpc_ext);
+int mlx5_core_dct_query(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
+ u32 *out, int outlen);
+
+int mlx5_core_set_delay_drop(struct mlx5_ib_dev *dev, u32 timeout_usec);
+
+int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *rq);
+int mlx5_core_create_sq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *sq);
+void mlx5_core_destroy_sq_tracked(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *sq);
+
+int mlx5_core_create_rq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *rq);
+
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_ib_dev *dev,
+ int res_num,
+ enum mlx5_res_type res_type);
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res);
+
+int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn);
+int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn);
+int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter);
+int mlx5_ib_qp_event_init(void);
+void mlx5_ib_qp_event_cleanup(void);
+int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate);
+#endif /* _MLX5_IB_QP_H */
diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
new file mode 100644
index 000000000000..146d03ae40bd
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/qpc.c
@@ -0,0 +1,697 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <linux/gfp.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_ib.h"
+#include "qp.h"
+
+static int mlx5_core_drain_dct(struct mlx5_ib_dev *dev,
+ struct mlx5_core_dct *dct);
+
+static struct mlx5_core_rsc_common *
+mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
+{
+ struct mlx5_core_rsc_common *common;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+
+ common = radix_tree_lookup(&table->tree, rsn);
+ if (common && !common->invalid)
+ refcount_inc(&common->refcount);
+ else
+ common = NULL;
+
+ spin_unlock_irqrestore(&table->lock, flags);
+
+ return common;
+}
+
+void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
+{
+ if (refcount_dec_and_test(&common->refcount))
+ complete(&common->free);
+}
+
+static u64 qp_allowed_event_types(void)
+{
+ u64 mask;
+
+ mask = BIT(MLX5_EVENT_TYPE_PATH_MIG) |
+ BIT(MLX5_EVENT_TYPE_COMM_EST) |
+ BIT(MLX5_EVENT_TYPE_SQ_DRAINED) |
+ BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+ BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR) |
+ BIT(MLX5_EVENT_TYPE_PATH_MIG_FAILED) |
+ BIT(MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) |
+ BIT(MLX5_EVENT_TYPE_WQ_ACCESS_ERROR);
+
+ return mask;
+}
+
+static u64 rq_allowed_event_types(void)
+{
+ u64 mask;
+
+ mask = BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+ BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+
+ return mask;
+}
+
+static u64 sq_allowed_event_types(void)
+{
+ return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+}
+
+static u64 dct_allowed_event_types(void)
+{
+ return BIT(MLX5_EVENT_TYPE_DCT_DRAINED);
+}
+
+static bool is_event_type_allowed(int rsc_type, int event_type)
+{
+ switch (rsc_type) {
+ case MLX5_EVENT_QUEUE_TYPE_QP:
+ return BIT(event_type) & qp_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_RQ:
+ return BIT(event_type) & rq_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_SQ:
+ return BIT(event_type) & sq_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_DCT:
+ return BIT(event_type) & dct_allowed_event_types();
+ default:
+ WARN(1, "Event arrived for unknown resource type");
+ return false;
+ }
+}
+
+static int dct_event_notifier(struct mlx5_ib_dev *dev, struct mlx5_eqe *eqe)
+{
+ struct mlx5_core_dct *dct;
+ unsigned long flags;
+ u32 qpn;
+
+ qpn = be32_to_cpu(eqe->data.dct.dctn) & 0xFFFFFF;
+ xa_lock_irqsave(&dev->qp_table.dct_xa, flags);
+ dct = xa_load(&dev->qp_table.dct_xa, qpn);
+ if (dct)
+ complete(&dct->drained);
+ xa_unlock_irqrestore(&dev->qp_table.dct_xa, flags);
+ return NOTIFY_OK;
+}
+
+static int rsc_event_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_ib_dev *dev =
+ container_of(nb, struct mlx5_ib_dev, qp_table.nb);
+ struct mlx5_core_rsc_common *common;
+ struct mlx5_eqe *eqe = data;
+ u8 event_type = (u8)type;
+ struct mlx5_core_qp *qp;
+ u32 rsn;
+
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_DCT_DRAINED:
+ return dct_event_notifier(dev, eqe);
+ case MLX5_EVENT_TYPE_PATH_MIG:
+ case MLX5_EVENT_TYPE_COMM_EST:
+ case MLX5_EVENT_TYPE_SQ_DRAINED:
+ case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ common = mlx5_get_rsc(&dev->qp_table, rsn);
+ if (!common)
+ return NOTIFY_OK;
+
+ if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type))
+ goto out;
+
+ switch (common->res) {
+ case MLX5_RES_QP:
+ case MLX5_RES_RQ:
+ case MLX5_RES_SQ:
+ qp = (struct mlx5_core_qp *)common;
+ qp->event(qp, event_type);
+ /* Need to put resource in event handler */
+ return NOTIFY_OK;
+ default:
+ break;
+ }
+out:
+ mlx5_core_put_rsc(common);
+
+ return NOTIFY_OK;
+}
+
+static int create_resource_common(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *qp, int rsc_type)
+{
+ struct mlx5_qp_table *table = &dev->qp_table;
+ int err;
+
+ qp->common.res = rsc_type;
+ spin_lock_irq(&table->lock);
+ err = radix_tree_insert(&table->tree,
+ qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN),
+ qp);
+ spin_unlock_irq(&table->lock);
+ if (err)
+ return err;
+
+ refcount_set(&qp->common.refcount, 1);
+ init_completion(&qp->common.free);
+ qp->pid = current->pid;
+
+ return 0;
+}
+
+static void modify_resource_common_state(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *qp,
+ bool invalid)
+{
+ struct mlx5_qp_table *table = &dev->qp_table;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+ qp->common.invalid = invalid;
+ spin_unlock_irqrestore(&table->lock, flags);
+}
+
+static void destroy_resource_common(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *qp)
+{
+ struct mlx5_qp_table *table = &dev->qp_table;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+ radix_tree_delete(&table->tree,
+ qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN));
+ spin_unlock_irqrestore(&table->lock, flags);
+ mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
+ wait_for_completion(&qp->common.free);
+}
+
+static int _mlx5_core_destroy_dct(struct mlx5_ib_dev *dev,
+ struct mlx5_core_dct *dct)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {};
+ struct mlx5_core_qp *qp = &dct->mqp;
+
+ MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
+ MLX5_SET(destroy_dct_in, in, uid, qp->uid);
+ return mlx5_cmd_exec_in(dev->mdev, destroy_dct, in);
+}
+
+int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
+ u32 *in, int inlen, u32 *out, int outlen)
+{
+ struct mlx5_core_qp *qp = &dct->mqp;
+ int err;
+
+ init_completion(&dct->drained);
+ MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT);
+
+ err = mlx5_cmd_do(dev->mdev, in, inlen, out, outlen);
+ if (err)
+ return err;
+
+ qp->qpn = MLX5_GET(create_dct_out, out, dctn);
+ qp->uid = MLX5_GET(create_dct_in, in, uid);
+ err = xa_err(xa_store_irq(&dev->qp_table.dct_xa, qp->qpn, dct, GFP_KERNEL));
+ if (err)
+ goto err_cmd;
+
+ return 0;
+err_cmd:
+ _mlx5_core_destroy_dct(dev, dct);
+ return err;
+}
+
+int mlx5_qpc_create_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
+ u32 *in, int inlen, u32 *out)
+{
+ u32 din[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+ int err;
+
+ MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
+
+ err = mlx5_cmd_exec(dev->mdev, in, inlen, out,
+ MLX5_ST_SZ_BYTES(create_qp_out));
+ if (err)
+ return err;
+
+ qp->uid = MLX5_GET(create_qp_in, in, uid);
+ qp->qpn = MLX5_GET(create_qp_out, out, qpn);
+
+ err = create_resource_common(dev, qp, MLX5_RES_QP);
+ if (err)
+ goto err_cmd;
+
+ if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
+ mlx5_debug_qp_add(dev->mdev, qp);
+
+ return 0;
+
+err_cmd:
+ MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
+ MLX5_SET(destroy_qp_in, din, uid, qp->uid);
+ mlx5_cmd_exec_in(dev->mdev, destroy_qp, din);
+ return err;
+}
+
+static int mlx5_core_drain_dct(struct mlx5_ib_dev *dev,
+ struct mlx5_core_dct *dct)
+{
+ u32 in[MLX5_ST_SZ_DW(drain_dct_in)] = {};
+ struct mlx5_core_qp *qp = &dct->mqp;
+
+ MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT);
+ MLX5_SET(drain_dct_in, in, dctn, qp->qpn);
+ MLX5_SET(drain_dct_in, in, uid, qp->uid);
+ return mlx5_cmd_exec_in(dev->mdev, drain_dct, in);
+}
+
+int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev,
+ struct mlx5_core_dct *dct)
+{
+ struct mlx5_qp_table *table = &dev->qp_table;
+ struct mlx5_core_dct *tmp;
+ int err;
+
+ err = mlx5_core_drain_dct(dev, dct);
+ if (err) {
+ if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto destroy;
+
+ return err;
+ }
+ wait_for_completion(&dct->drained);
+
+destroy:
+ tmp = xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, dct, XA_ZERO_ENTRY, GFP_KERNEL);
+ if (WARN_ON(tmp != dct))
+ return xa_err(tmp) ?: -EINVAL;
+
+ err = _mlx5_core_destroy_dct(dev, dct);
+ if (err) {
+ xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, XA_ZERO_ENTRY, dct, 0);
+ return err;
+ }
+ xa_erase_irq(&table->dct_xa, dct->mqp.qpn);
+ return 0;
+}
+
+int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+
+ if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
+ mlx5_debug_qp_remove(dev->mdev, qp);
+
+ destroy_resource_common(dev, qp);
+
+ MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+ MLX5_SET(destroy_qp_in, in, uid, qp->uid);
+ return mlx5_cmd_exec_in(dev->mdev, destroy_qp, in);
+}
+
+int mlx5_core_set_delay_drop(struct mlx5_ib_dev *dev,
+ u32 timeout_usec)
+{
+ u32 in[MLX5_ST_SZ_DW(set_delay_drop_params_in)] = {};
+
+ MLX5_SET(set_delay_drop_params_in, in, opcode,
+ MLX5_CMD_OP_SET_DELAY_DROP_PARAMS);
+ MLX5_SET(set_delay_drop_params_in, in, delay_drop_timeout,
+ timeout_usec / 100);
+ return mlx5_cmd_exec_in(dev->mdev, set_delay_drop_params, in);
+}
+
+struct mbox_info {
+ u32 *in;
+ u32 *out;
+ int inlen;
+ int outlen;
+};
+
+static int mbox_alloc(struct mbox_info *mbox, int inlen, int outlen)
+{
+ mbox->inlen = inlen;
+ mbox->outlen = outlen;
+ mbox->in = kzalloc(mbox->inlen, GFP_KERNEL);
+ mbox->out = kzalloc(mbox->outlen, GFP_KERNEL);
+ if (!mbox->in || !mbox->out) {
+ kfree(mbox->in);
+ kfree(mbox->out);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void mbox_free(struct mbox_info *mbox)
+{
+ kfree(mbox->in);
+ kfree(mbox->out);
+}
+
+static int get_ece_from_mbox(void *out, u16 opcode)
+{
+ int ece = 0;
+
+ switch (opcode) {
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ ece = MLX5_GET(init2init_qp_out, out, ece);
+ break;
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ ece = MLX5_GET(init2rtr_qp_out, out, ece);
+ break;
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ ece = MLX5_GET(rtr2rts_qp_out, out, ece);
+ break;
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ ece = MLX5_GET(rts2rts_qp_out, out, ece);
+ break;
+ case MLX5_CMD_OP_RST2INIT_QP:
+ ece = MLX5_GET(rst2init_qp_out, out, ece);
+ break;
+ default:
+ break;
+ }
+
+ return ece;
+}
+
+static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
+ u32 opt_param_mask, void *qpc,
+ struct mbox_info *mbox, u16 uid, u32 ece)
+{
+ mbox->out = NULL;
+ mbox->in = NULL;
+
+#define MBOX_ALLOC(mbox, typ) \
+ mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out))
+
+#define MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid) \
+ do { \
+ MLX5_SET(typ##_in, in, opcode, _opcode); \
+ MLX5_SET(typ##_in, in, qpn, _qpn); \
+ MLX5_SET(typ##_in, in, uid, _uid); \
+ } while (0)
+
+#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc, _uid) \
+ do { \
+ MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid); \
+ MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \
+ memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, \
+ MLX5_ST_SZ_BYTES(qpc)); \
+ } while (0)
+
+ switch (opcode) {
+ /* 2RST & 2ERR */
+ case MLX5_CMD_OP_2RST_QP:
+ if (MBOX_ALLOC(mbox, qp_2rst))
+ return -ENOMEM;
+ MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn, uid);
+ break;
+ case MLX5_CMD_OP_2ERR_QP:
+ if (MBOX_ALLOC(mbox, qp_2err))
+ return -ENOMEM;
+ MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn, uid);
+ break;
+
+ /* MODIFY with QPC */
+ case MLX5_CMD_OP_RST2INIT_QP:
+ if (MBOX_ALLOC(mbox, rst2init_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc, uid);
+ MLX5_SET(rst2init_qp_in, mbox->in, ece, ece);
+ break;
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ if (MBOX_ALLOC(mbox, init2rtr_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc, uid);
+ MLX5_SET(init2rtr_qp_in, mbox->in, ece, ece);
+ break;
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ if (MBOX_ALLOC(mbox, rtr2rts_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc, uid);
+ MLX5_SET(rtr2rts_qp_in, mbox->in, ece, ece);
+ break;
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ if (MBOX_ALLOC(mbox, rts2rts_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc, uid);
+ MLX5_SET(rts2rts_qp_in, mbox->in, ece, ece);
+ break;
+ case MLX5_CMD_OP_SQERR2RTS_QP:
+ if (MBOX_ALLOC(mbox, sqerr2rts_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc, uid);
+ break;
+ case MLX5_CMD_OP_SQD_RTS_QP:
+ if (MBOX_ALLOC(mbox, sqd2rts_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(sqd2rts_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc, uid);
+ break;
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ if (MBOX_ALLOC(mbox, init2init_qp))
+ return -ENOMEM;
+ MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn,
+ opt_param_mask, qpc, uid);
+ MLX5_SET(init2init_qp_in, mbox->in, ece, ece);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int mlx5_core_qp_modify(struct mlx5_ib_dev *dev, u16 opcode, u32 opt_param_mask,
+ void *qpc, struct mlx5_core_qp *qp, u32 *ece)
+{
+ struct mbox_info mbox;
+ int err;
+
+ err = modify_qp_mbox_alloc(dev->mdev, opcode, qp->qpn, opt_param_mask,
+ qpc, &mbox, qp->uid, (ece) ? *ece : 0);
+ if (err)
+ return err;
+
+ err = mlx5_cmd_exec(dev->mdev, mbox.in, mbox.inlen, mbox.out,
+ mbox.outlen);
+
+ if (ece)
+ *ece = get_ece_from_mbox(mbox.out, opcode);
+
+ mbox_free(&mbox);
+ return err;
+}
+
+int mlx5_init_qp_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_qp_table *table = &dev->qp_table;
+
+ spin_lock_init(&table->lock);
+ INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+ xa_init(&table->dct_xa);
+
+ if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
+ mlx5_qp_debugfs_init(dev->mdev);
+
+ table->nb.notifier_call = rsc_event_notifier;
+ mlx5_notifier_register(dev->mdev, &table->nb);
+
+ return 0;
+}
+
+void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_qp_table *table = &dev->qp_table;
+
+ mlx5_notifier_unregister(dev->mdev, &table->nb);
+ if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
+ mlx5_qp_debugfs_cleanup(dev->mdev);
+}
+
+int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
+ u32 *out, int outlen, bool qpc_ext)
+{
+ u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {};
+
+ MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP);
+ MLX5_SET(query_qp_in, in, qpn, qp->qpn);
+ MLX5_SET(query_qp_in, in, qpc_ext, qpc_ext);
+
+ return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_core_dct_query(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
+ u32 *out, int outlen)
+{
+ u32 in[MLX5_ST_SZ_DW(query_dct_in)] = {};
+ struct mlx5_core_qp *qp = &dct->mqp;
+
+ MLX5_SET(query_dct_in, in, opcode, MLX5_CMD_OP_QUERY_DCT);
+ MLX5_SET(query_dct_in, in, dctn, qp->qpn);
+
+ return mlx5_cmd_exec(dev->mdev, (void *)&in, sizeof(in), (void *)out,
+ outlen);
+}
+
+int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn)
+{
+ u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {};
+ int err;
+
+ MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
+ err = mlx5_cmd_exec_inout(dev->mdev, alloc_xrcd, in, out);
+ if (!err)
+ *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
+ return err;
+}
+
+int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn)
+{
+ u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {};
+
+ MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+ MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
+ return mlx5_cmd_exec_in(dev->mdev, dealloc_xrcd, in);
+}
+
+static int destroy_rq_tracked(struct mlx5_ib_dev *dev, u32 rqn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {};
+
+ MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ MLX5_SET(destroy_rq_in, in, rqn, rqn);
+ MLX5_SET(destroy_rq_in, in, uid, uid);
+ return mlx5_cmd_exec_in(dev->mdev, destroy_rq, in);
+}
+
+int mlx5_core_create_rq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *rq)
+{
+ int err;
+ u32 rqn;
+
+ err = mlx5_core_create_rq(dev->mdev, in, inlen, &rqn);
+ if (err)
+ return err;
+
+ rq->uid = MLX5_GET(create_rq_in, in, uid);
+ rq->qpn = rqn;
+ err = create_resource_common(dev, rq, MLX5_RES_RQ);
+ if (err)
+ goto err_destroy_rq;
+
+ return 0;
+
+err_destroy_rq:
+ destroy_rq_tracked(dev, rq->qpn, rq->uid);
+
+ return err;
+}
+
+int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *rq)
+{
+ int ret;
+
+ /* The rq destruction can be called again in case it fails, hence we
+ * mark the common resource as invalid and only once FW destruction
+ * is completed successfully we actually destroy the resources.
+ */
+ modify_resource_common_state(dev, rq, true);
+ ret = destroy_rq_tracked(dev, rq->qpn, rq->uid);
+ if (ret) {
+ modify_resource_common_state(dev, rq, false);
+ return ret;
+ }
+ destroy_resource_common(dev, rq);
+ return 0;
+}
+
+static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {};
+
+ MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ MLX5_SET(destroy_sq_in, in, sqn, sqn);
+ MLX5_SET(destroy_sq_in, in, uid, uid);
+ mlx5_cmd_exec_in(dev->mdev, destroy_sq, in);
+}
+
+int mlx5_core_create_sq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *sq)
+{
+ u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {};
+ int err;
+
+ MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ);
+ err = mlx5_cmd_exec(dev->mdev, in, inlen, out, sizeof(out));
+ if (err)
+ return err;
+
+ sq->qpn = MLX5_GET(create_sq_out, out, sqn);
+ sq->uid = MLX5_GET(create_sq_in, in, uid);
+ err = create_resource_common(dev, sq, MLX5_RES_SQ);
+ if (err)
+ goto err_destroy_sq;
+
+ return 0;
+
+err_destroy_sq:
+ destroy_sq_tracked(dev, sq->qpn, sq->uid);
+
+ return err;
+}
+
+void mlx5_core_destroy_sq_tracked(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *sq)
+{
+ destroy_resource_common(dev, sq);
+ destroy_sq_tracked(dev, sq->qpn, sq->uid);
+}
+
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_ib_dev *dev,
+ int res_num,
+ enum mlx5_res_type res_type)
+{
+ u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN);
+ struct mlx5_qp_table *table = &dev->qp_table;
+
+ return mlx5_get_rsc(table, rsn);
+}
+
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res)
+{
+ mlx5_core_put_rsc(res);
+}
diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c
new file mode 100644
index 000000000000..67841922c7b8
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/restrack.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019-2020, Mellanox Technologies Ltd. All rights reserved.
+ */
+
+#include <uapi/rdma/rdma_netlink.h>
+#include <linux/mlx5/rsc_dump.h>
+#include <rdma/ib_umem_odp.h>
+#include <rdma/restrack.h>
+#include "mlx5_ib.h"
+#include "restrack.h"
+
+#define MAX_DUMP_SIZE 1024
+
+static int dump_rsc(struct mlx5_core_dev *dev, enum mlx5_sgmt_type type,
+ int index, void *data, int *data_len)
+{
+ struct mlx5_core_dev *mdev = dev;
+ struct mlx5_rsc_dump_cmd *cmd;
+ struct mlx5_rsc_key key = {};
+ struct page *page;
+ int offset = 0;
+ int err = 0;
+ int cmd_err;
+ int size;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ key.size = PAGE_SIZE;
+ key.rsc = type;
+ key.index1 = index;
+ key.num_of_obj1 = 1;
+
+ cmd = mlx5_rsc_dump_cmd_create(mdev, &key);
+ if (IS_ERR(cmd)) {
+ err = PTR_ERR(cmd);
+ goto free_page;
+ }
+
+ do {
+ cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
+ if (cmd_err < 0 || size + offset > MAX_DUMP_SIZE) {
+ err = cmd_err;
+ goto destroy_cmd;
+ }
+ memcpy(data + offset, page_address(page), size);
+ offset += size;
+ } while (cmd_err > 0);
+ *data_len = offset;
+
+destroy_cmd:
+ mlx5_rsc_dump_cmd_destroy(cmd);
+free_page:
+ __free_page(page);
+ return err;
+}
+
+static int fill_res_raw(struct sk_buff *msg, struct mlx5_ib_dev *dev,
+ enum mlx5_sgmt_type type, u32 key)
+{
+ int len = 0;
+ void *data;
+ int err;
+
+ data = kzalloc(MAX_DUMP_SIZE, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ err = dump_rsc(dev->mdev, type, key, data, &len);
+ if (err)
+ goto out;
+
+ err = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, len, data);
+out:
+ kfree(data);
+ return err;
+}
+
+static int fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct nlattr *table_attr;
+
+ if (!(mr->access_flags & IB_ACCESS_ON_DEMAND))
+ return 0;
+
+ table_attr = nla_nest_start(msg,
+ RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
+
+ if (!table_attr)
+ goto err;
+
+ if (rdma_nl_stat_hwcounter_entry(msg, "page_faults",
+ atomic64_read(&mr->odp_stats.faults)))
+ goto err_table;
+ if (rdma_nl_stat_hwcounter_entry(
+ msg, "page_faults_handled",
+ atomic64_read(&mr->odp_stats.faults_handled)))
+ goto err_table;
+ if (rdma_nl_stat_hwcounter_entry(
+ msg, "page_invalidations",
+ atomic64_read(&mr->odp_stats.invalidations)))
+ goto err_table;
+ if (rdma_nl_stat_hwcounter_entry(
+ msg, "page_invalidations_handled",
+ atomic64_read(&mr->odp_stats.invalidations_handled)))
+ goto err_table;
+
+ if (rdma_nl_stat_hwcounter_entry(msg, "page_prefetch",
+ atomic64_read(&mr->odp_stats.prefetch)))
+ goto err_table;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err_table:
+ nla_nest_cancel(msg, table_attr);
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ibmr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+
+ return fill_res_raw(msg, mr_to_mdev(mr), MLX5_SGMT_TYPE_PRM_QUERY_MKEY,
+ mlx5_mkey_to_idx(mr->mmkey.key));
+}
+
+static int fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct nlattr *table_attr;
+
+ if (!(mr->access_flags & IB_ACCESS_ON_DEMAND))
+ return 0;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ goto err;
+
+ if (mr->is_odp_implicit) {
+ if (rdma_nl_put_driver_string(msg, "odp", "implicit"))
+ goto err;
+ } else {
+ if (rdma_nl_put_driver_string(msg, "odp", "explicit"))
+ goto err;
+ }
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+static int fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
+ struct mlx5_ib_cq *cq = to_mcq(ibcq);
+
+ return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_CQ, cq->mcq.cqn);
+}
+
+static int fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp)
+{
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ int ret;
+
+ if (qp->type < IB_QPT_DRIVER)
+ return 0;
+
+ switch (qp->type) {
+ case MLX5_IB_QPT_REG_UMR:
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUBTYPE,
+ "REG_UMR");
+ break;
+ case MLX5_IB_QPT_DCT:
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUBTYPE, "DCT");
+ break;
+ case MLX5_IB_QPT_DCI:
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUBTYPE, "DCI");
+ break;
+ default:
+ return 0;
+ }
+ if (ret)
+ return ret;
+
+ return nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, IB_QPT_DRIVER);
+}
+
+static int fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+
+ return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_QP,
+ ibqp->qp_num);
+}
+
+static const struct ib_device_ops restrack_ops = {
+ .fill_res_cq_entry_raw = fill_res_cq_entry_raw,
+ .fill_res_mr_entry = fill_res_mr_entry,
+ .fill_res_mr_entry_raw = fill_res_mr_entry_raw,
+ .fill_res_qp_entry = fill_res_qp_entry,
+ .fill_res_qp_entry_raw = fill_res_qp_entry_raw,
+ .fill_stat_mr_entry = fill_stat_mr_entry,
+};
+
+int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev)
+{
+ ib_set_device_ops(&dev->ib_dev, &restrack_ops);
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/restrack.h b/drivers/infiniband/hw/mlx5/restrack.h
new file mode 100644
index 000000000000..e8d81270f1b6
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/restrack.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies Ltd. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_RESTRACK_H
+#define _MLX5_IB_RESTRACK_H
+
+#include "mlx5_ib.h"
+
+int mlx5_ib_restrack_init(struct mlx5_ib_dev *dev);
+
+#endif /* _MLX5_IB_RESTRACK_H */
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 6f4397ee1ed6..bcb6b324af50 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -1,50 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved.
*/
-#include <linux/module.h>
#include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
#include <linux/slab.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
-
#include "mlx5_ib.h"
-
-/* not supported currently */
-static int srq_signature;
+#include "srq.h"
static void *get_wqe(struct mlx5_ib_srq *srq, int n)
{
- return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
+ return mlx5_frag_buf_get_wqe(&srq->fbc, n);
}
static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
@@ -78,12 +46,10 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_create_srq ucmd = {};
+ struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mlx5_ib_ucontext, ibucontext);
size_t ucmdlen;
int err;
- int npages;
- int page_shift;
- int ncont;
- u32 offset;
u32 uidx = MLX5_IB_DEFAULT_UIDX;
ucmdlen = min(udata->inlen, sizeof(ucmd));
@@ -101,58 +67,35 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
udata->inlen - sizeof(ucmd)))
return -EINVAL;
- if (in->type == IB_SRQT_XRC) {
- err = get_srq_user_index(to_mucontext(pd->uobject->context),
- &ucmd, udata->inlen, &uidx);
+ if (in->type != IB_SRQT_BASIC) {
+ err = get_srq_user_index(ucontext, &ucmd, udata->inlen, &uidx);
if (err)
return err;
}
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
- srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
- 0, 0);
+ srq->umem = ib_umem_get(pd->device, ucmd.buf_addr, buf_size, 0);
if (IS_ERR(srq->umem)) {
mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size);
err = PTR_ERR(srq->umem);
return err;
}
+ in->umem = srq->umem;
- mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages,
- &page_shift, &ncont, NULL);
- err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift,
- &offset);
- if (err) {
- mlx5_ib_warn(dev, "bad offset\n");
- goto err_umem;
- }
-
- in->pas = mlx5_vzalloc(sizeof(*in->pas) * ncont);
- if (!in->pas) {
- err = -ENOMEM;
- goto err_umem;
- }
-
- mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0);
-
- err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context),
- ucmd.db_addr, &srq->db);
+ err = mlx5_ib_db_map_user(ucontext, ucmd.db_addr, &srq->db);
if (err) {
mlx5_ib_dbg(dev, "map doorbell failed\n");
- goto err_in;
+ goto err_umem;
}
- in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
- in->page_offset = offset;
+ in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
- in->type == IB_SRQT_XRC)
+ in->type != IB_SRQT_BASIC)
in->user_index = uidx;
return 0;
-err_in:
- kvfree(in->pas);
-
err_umem:
ib_umem_release(srq->umem);
@@ -165,8 +108,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
int err;
int i;
struct mlx5_wqe_srq_next_seg *next;
- int page_shift;
- int npages;
err = mlx5_db_alloc(dev->mdev, &srq->db);
if (err) {
@@ -174,12 +115,15 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
return err;
}
- if (mlx5_buf_alloc(dev->mdev, buf_size, &srq->buf)) {
+ if (mlx5_frag_buf_alloc_node(dev->mdev, buf_size, &srq->buf,
+ dev->mdev->priv.numa_node)) {
mlx5_ib_dbg(dev, "buf alloc failed\n");
err = -ENOMEM;
goto err_db;
}
- page_shift = srq->buf.page_shift;
+
+ mlx5_init_fbc(srq->buf.frags, srq->msrq.wqe_shift, ilog2(srq->msrq.max),
+ &srq->fbc);
srq->head = 0;
srq->tail = srq->msrq.max - 1;
@@ -191,26 +135,24 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
cpu_to_be16((i + 1) & (srq->msrq.max - 1));
}
- npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
- mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
- buf_size, page_shift, srq->buf.npages, npages);
- in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages);
+ mlx5_ib_dbg(dev, "srq->buf.page_shift = %d\n", srq->buf.page_shift);
+ in->pas = kvcalloc(srq->buf.npages, sizeof(*in->pas), GFP_KERNEL);
if (!in->pas) {
err = -ENOMEM;
goto err_buf;
}
- mlx5_fill_page_array(&srq->buf, in->pas);
+ mlx5_fill_page_frag_array(&srq->buf, in->pas);
- srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
+ srq->wrid = kvmalloc_array(srq->msrq.max, sizeof(u64), GFP_KERNEL);
if (!srq->wrid) {
err = -ENOMEM;
goto err_in;
}
- srq->wq_sig = !!srq_signature;
+ srq->wq_sig = 0;
- in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
- in->type == IB_SRQT_XRC)
+ in->type != IB_SRQT_BASIC)
in->user_index = MLX5_IB_DEFAULT_UIDX;
return 0;
@@ -219,50 +161,64 @@ err_in:
kvfree(in->pas);
err_buf:
- mlx5_buf_free(dev->mdev, &srq->buf);
+ mlx5_frag_buf_free(dev->mdev, &srq->buf);
err_db:
mlx5_db_free(dev->mdev, &srq->db);
return err;
}
-static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq)
+static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
+ struct ib_udata *udata)
{
- mlx5_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
+ mlx5_ib_db_unmap_user(
+ rdma_udata_to_drv_context(
+ udata,
+ struct mlx5_ib_ucontext,
+ ibucontext),
+ &srq->db);
ib_umem_release(srq->umem);
}
static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq)
{
- kfree(srq->wrid);
- mlx5_buf_free(dev->mdev, &srq->buf);
+ kvfree(srq->wrid);
+ mlx5_frag_buf_free(dev->mdev, &srq->buf);
mlx5_db_free(dev->mdev, &srq->db);
}
-struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata)
+int mlx5_ib_create_srq(struct ib_srq *ib_srq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_ib_srq *srq;
- int desc_size;
- int buf_size;
+ struct mlx5_ib_dev *dev = to_mdev(ib_srq->device);
+ struct mlx5_ib_srq *srq = to_msrq(ib_srq);
+ size_t desc_size;
+ size_t buf_size;
int err;
- struct mlx5_srq_attr in = {0};
+ struct mlx5_srq_attr in = {};
__u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
-
- /* Sanity check SRQ size before proceeding */
- if (init_attr->attr.max_wr >= max_srq_wqes) {
- mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
- init_attr->attr.max_wr,
- max_srq_wqes);
- return ERR_PTR(-EINVAL);
+ __u32 max_sge_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq) /
+ sizeof(struct mlx5_wqe_data_seg);
+
+ if (init_attr->srq_type != IB_SRQT_BASIC &&
+ init_attr->srq_type != IB_SRQT_XRC &&
+ init_attr->srq_type != IB_SRQT_TM)
+ return -EOPNOTSUPP;
+
+ /* Sanity check SRQ and sge size before proceeding */
+ if (init_attr->attr.max_wr >= max_srq_wqes ||
+ init_attr->attr.max_sge > max_sge_sz) {
+ mlx5_ib_dbg(dev, "max_wr %d,wr_cap %d,max_sge %d, sge_cap:%d\n",
+ init_attr->attr.max_wr, max_srq_wqes,
+ init_attr->attr.max_sge, max_sge_sz);
+ return -EINVAL;
}
- srq = kmalloc(sizeof(*srq), GFP_KERNEL);
- if (!srq)
- return ERR_PTR(-ENOMEM);
+ err = mlx5_ib_dev_res_cq_init(dev);
+ if (err)
+ return err;
mutex_init(&srq->mutex);
spin_lock_init(&srq->lock);
@@ -271,43 +227,64 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
desc_size = sizeof(struct mlx5_wqe_srq_next_seg) +
srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg);
+ if (desc_size == 0 || srq->msrq.max_gs > desc_size)
+ return -EINVAL;
+
desc_size = roundup_pow_of_two(desc_size);
- desc_size = max_t(int, 32, desc_size);
+ desc_size = max_t(size_t, 32, desc_size);
+ if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg))
+ return -EINVAL;
+
srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) /
sizeof(struct mlx5_wqe_data_seg);
srq->msrq.wqe_shift = ilog2(desc_size);
buf_size = srq->msrq.max * desc_size;
- mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n",
- desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
- srq->msrq.max_avail_gather);
+ if (buf_size < desc_size)
+ return -EINVAL;
+
in.type = init_attr->srq_type;
- if (pd->uobject)
- err = create_srq_user(pd, srq, &in, udata, buf_size);
+ if (udata)
+ err = create_srq_user(ib_srq->pd, srq, &in, udata, buf_size);
else
err = create_srq_kernel(dev, srq, &in, buf_size);
if (err) {
mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
- pd->uobject ? "user" : "kernel", err);
- goto err_srq;
+ udata ? "user" : "kernel", err);
+ return err;
}
in.log_size = ilog2(srq->msrq.max);
in.wqe_shift = srq->msrq.wqe_shift - 4;
if (srq->wq_sig)
in.flags |= MLX5_SRQ_FLAG_WQ_SIG;
- if (init_attr->srq_type == IB_SRQT_XRC) {
+
+ if (init_attr->srq_type == IB_SRQT_XRC && init_attr->ext.xrc.xrcd)
in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
- in.cqn = to_mcq(init_attr->ext.xrc.cq)->mcq.cqn;
- } else if (init_attr->srq_type == IB_SRQT_BASIC) {
- in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn;
- in.cqn = to_mcq(dev->devr.c0)->mcq.cqn;
+ else
+ in.xrcd = dev->devr.xrcdn0;
+
+ if (init_attr->srq_type == IB_SRQT_TM) {
+ in.tm_log_list_size =
+ ilog2(init_attr->ext.tag_matching.max_num_tags) + 1;
+ if (in.tm_log_list_size >
+ MLX5_CAP_GEN(dev->mdev, log_tag_matching_list_sz)) {
+ mlx5_ib_dbg(dev, "TM SRQ max_num_tags exceeding limit\n");
+ err = -EINVAL;
+ goto err_usr_kern_srq;
+ }
+ in.flags |= MLX5_SRQ_FLAG_RNDV;
}
- in.pd = to_mpd(pd)->pdn;
+ if (ib_srq_has_cq(init_attr->srq_type))
+ in.cqn = to_mcq(init_attr->ext.cq)->mcq.cqn;
+ else
+ in.cqn = to_mcq(dev->devr.c0)->mcq.cqn;
+
+ in.pd = to_mpd(ib_srq->pd)->pdn;
in.db_record = srq->db.dma;
- err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in);
+ err = mlx5_cmd_create_srq(dev, &srq->msrq, &in);
kvfree(in.pas);
if (err) {
mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
@@ -319,30 +296,33 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
srq->msrq.event = mlx5_ib_srq_event;
srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
- if (pd->uobject)
- if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) {
+ if (udata) {
+ struct mlx5_ib_create_srq_resp resp = {
+ .srqn = srq->msrq.srqn,
+ };
+
+ if (ib_copy_to_udata(udata, &resp, min(udata->outlen,
+ sizeof(resp)))) {
mlx5_ib_dbg(dev, "copy to user failed\n");
err = -EFAULT;
goto err_core;
}
+ }
init_attr->attr.max_wr = srq->msrq.max - 1;
- return &srq->ibsrq;
+ return 0;
err_core:
- mlx5_core_destroy_srq(dev->mdev, &srq->msrq);
+ mlx5_cmd_destroy_srq(dev, &srq->msrq);
err_usr_kern_srq:
- if (pd->uobject)
- destroy_srq_user(pd, srq);
+ if (udata)
+ destroy_srq_user(ib_srq->pd, srq, udata);
else
destroy_srq_kernel(dev, srq);
-err_srq:
- kfree(srq);
-
- return ERR_PTR(err);
+ return err;
}
int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
@@ -361,7 +341,7 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
return -EINVAL;
mutex_lock(&srq->mutex);
- ret = mlx5_core_arm_srq(dev->mdev, &srq->msrq, attr->srq_limit, 1);
+ ret = mlx5_cmd_arm_srq(dev, &srq->msrq, attr->srq_limit, 1);
mutex_unlock(&srq->mutex);
if (ret)
@@ -382,7 +362,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
if (!out)
return -ENOMEM;
- ret = mlx5_core_query_srq(dev->mdev, &srq->msrq, out);
+ ret = mlx5_cmd_query_srq(dev, &srq->msrq, out);
if (ret)
goto out_box;
@@ -395,21 +375,20 @@ out_box:
return ret;
}
-int mlx5_ib_destroy_srq(struct ib_srq *srq)
+int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(srq->device);
struct mlx5_ib_srq *msrq = to_msrq(srq);
+ int ret;
- mlx5_core_destroy_srq(dev->mdev, &msrq->msrq);
+ ret = mlx5_cmd_destroy_srq(dev, &msrq->msrq);
+ if (ret)
+ return ret;
- if (srq->uobject) {
- mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
- ib_umem_release(msrq->umem);
- } else {
+ if (udata)
+ destroy_srq_user(srq->pd, msrq, udata);
+ else
destroy_srq_kernel(dev, msrq);
- }
-
- kfree(srq);
return 0;
}
@@ -427,8 +406,8 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index)
spin_unlock(&srq->lock);
}
-int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
{
struct mlx5_ib_srq *srq = to_msrq(ibsrq);
struct mlx5_wqe_srq_next_seg *next;
@@ -475,7 +454,7 @@ int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
if (i < srq->msrq.max_avail_gather) {
scat[i].byte_count = 0;
- scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
+ scat[i].lkey = dev->mkeys.terminate_scatter_list_mkey;
scat[i].addr = 0;
}
}
diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h
new file mode 100644
index 000000000000..a7e3dc5564ac
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/srq.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2018, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef MLX5_IB_SRQ_H
+#define MLX5_IB_SRQ_H
+
+enum {
+ MLX5_SRQ_FLAG_ERR = (1 << 0),
+ MLX5_SRQ_FLAG_WQ_SIG = (1 << 1),
+ MLX5_SRQ_FLAG_RNDV = (1 << 2),
+};
+
+struct mlx5_srq_attr {
+ u32 type;
+ u32 flags;
+ u32 log_size;
+ u32 wqe_shift;
+ u32 log_page_size;
+ u32 wqe_cnt;
+ u32 srqn;
+ u32 xrcd;
+ u32 page_offset;
+ u32 cqn;
+ u32 pd;
+ u32 lwm;
+ u32 user_index;
+ u64 db_record;
+ __be64 *pas;
+ struct ib_umem *umem;
+ u32 tm_log_list_size;
+ u32 tm_next_tag;
+ u32 tm_hw_phase_cnt;
+ u32 tm_sw_phase_cnt;
+ u16 uid;
+};
+
+struct mlx5_ib_dev;
+
+struct mlx5_core_srq {
+ struct mlx5_core_rsc_common common; /* must be first */
+ u32 srqn;
+ int max;
+ size_t max_gs;
+ size_t max_avail_gather;
+ int wqe_shift;
+ void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e);
+
+ u16 uid;
+};
+
+struct mlx5_srq_table {
+ struct notifier_block nb;
+ struct xarray array;
+};
+
+int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in);
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq);
+int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out);
+int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm, int is_srq);
+struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn);
+
+int mlx5_init_srq_table(struct mlx5_ib_dev *dev);
+void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev);
+#endif /* MLX5_IB_SRQ_H */
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
new file mode 100644
index 000000000000..8b3385396599
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -0,0 +1,774 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_ib.h"
+#include "srq.h"
+#include "qp.h"
+
+static int get_pas_size(struct mlx5_srq_attr *in)
+{
+ u32 log_page_size = in->log_page_size + 12;
+ u32 log_srq_size = in->log_size;
+ u32 log_rq_stride = in->wqe_shift;
+ u32 page_offset = in->page_offset;
+ u32 po_quanta = 1 << (log_page_size - 6);
+ u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride);
+ u32 page_size = 1 << log_page_size;
+ u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
+ u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size);
+
+ return rq_num_pas * sizeof(u64);
+}
+
+static void set_wq(void *wq, struct mlx5_srq_attr *in)
+{
+ MLX5_SET(wq, wq, wq_signature, !!(in->flags
+ & MLX5_SRQ_FLAG_WQ_SIG));
+ MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size);
+ MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4);
+ MLX5_SET(wq, wq, log_wq_sz, in->log_size);
+ MLX5_SET(wq, wq, page_offset, in->page_offset);
+ MLX5_SET(wq, wq, lwm, in->lwm);
+ MLX5_SET(wq, wq, pd, in->pd);
+ MLX5_SET64(wq, wq, dbr_addr, in->db_record);
+}
+
+static void set_srqc(void *srqc, struct mlx5_srq_attr *in)
+{
+ MLX5_SET(srqc, srqc, wq_signature, !!(in->flags
+ & MLX5_SRQ_FLAG_WQ_SIG));
+ MLX5_SET(srqc, srqc, log_page_size, in->log_page_size);
+ MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift);
+ MLX5_SET(srqc, srqc, log_srq_size, in->log_size);
+ MLX5_SET(srqc, srqc, page_offset, in->page_offset);
+ MLX5_SET(srqc, srqc, lwm, in->lwm);
+ MLX5_SET(srqc, srqc, pd, in->pd);
+ MLX5_SET64(srqc, srqc, dbr_addr, in->db_record);
+ MLX5_SET(srqc, srqc, xrcd, in->xrcd);
+ MLX5_SET(srqc, srqc, cqn, in->cqn);
+}
+
+static void get_wq(void *wq, struct mlx5_srq_attr *in)
+{
+ if (MLX5_GET(wq, wq, wq_signature))
+ in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
+ in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz);
+ in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4;
+ in->log_size = MLX5_GET(wq, wq, log_wq_sz);
+ in->page_offset = MLX5_GET(wq, wq, page_offset);
+ in->lwm = MLX5_GET(wq, wq, lwm);
+ in->pd = MLX5_GET(wq, wq, pd);
+ in->db_record = MLX5_GET64(wq, wq, dbr_addr);
+}
+
+static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
+{
+ if (MLX5_GET(srqc, srqc, wq_signature))
+ in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
+ in->log_page_size = MLX5_GET(srqc, srqc, log_page_size);
+ in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride);
+ in->log_size = MLX5_GET(srqc, srqc, log_srq_size);
+ in->page_offset = MLX5_GET(srqc, srqc, page_offset);
+ in->lwm = MLX5_GET(srqc, srqc, lwm);
+ in->pd = MLX5_GET(srqc, srqc, pd);
+ in->db_record = MLX5_GET64(srqc, srqc, dbr_addr);
+}
+
+struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn)
+{
+ struct mlx5_srq_table *table = &dev->srq_table;
+ struct mlx5_core_srq *srq;
+
+ xa_lock_irq(&table->array);
+ srq = xa_load(&table->array, srqn);
+ if (srq)
+ refcount_inc(&srq->common.refcount);
+ xa_unlock_irq(&table->array);
+
+ return srq;
+}
+
+static int __set_srq_page_size(struct mlx5_srq_attr *in,
+ unsigned long page_size)
+{
+ if (!page_size)
+ return -EINVAL;
+ in->log_page_size = order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT;
+
+ if (WARN_ON(get_pas_size(in) !=
+ ib_umem_num_dma_blocks(in->umem, page_size) * sizeof(u64)))
+ return -EINVAL;
+ return 0;
+}
+
+#define set_srq_page_size(in, typ, log_pgsz_fld) \
+ __set_srq_page_size(in, mlx5_umem_find_best_quantized_pgoff( \
+ (in)->umem, typ, log_pgsz_fld, \
+ MLX5_ADAPTER_PAGE_SHIFT, page_offset, \
+ 64, &(in)->page_offset))
+
+static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
+ void *create_in;
+ void *srqc;
+ void *pas;
+ int pas_size;
+ int inlen;
+ int err;
+
+ if (in->umem) {
+ err = set_srq_page_size(in, srqc, log_page_size);
+ if (err)
+ return err;
+ }
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!create_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_srq_in, create_in, uid, in->uid);
+ srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
+ pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
+
+ set_srqc(srqc, in);
+ if (in->umem)
+ mlx5_ib_populate_pas(
+ in->umem,
+ 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+ pas, 0);
+ else
+ memcpy(pas, in->pas, pas_size);
+
+ MLX5_SET(create_srq_in, create_in, opcode,
+ MLX5_CMD_OP_CREATE_SRQ);
+
+ err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
+ sizeof(create_out));
+ kvfree(create_in);
+ if (!err) {
+ srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
+ srq->uid = in->uid;
+ }
+
+ return err;
+}
+
+static int destroy_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_srq_in)] = {};
+
+ MLX5_SET(destroy_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_SRQ);
+ MLX5_SET(destroy_srq_in, in, srqn, srq->srqn);
+ MLX5_SET(destroy_srq_in, in, uid, srq->uid);
+
+ return mlx5_cmd_exec_in(dev->mdev, destroy_srq, in);
+}
+
+static int arm_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm, int is_srq)
+{
+ u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {};
+
+ MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ);
+ MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
+ MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
+ MLX5_SET(arm_rq_in, in, lwm, lwm);
+ MLX5_SET(arm_rq_in, in, uid, srq->uid);
+
+ return mlx5_cmd_exec_in(dev->mdev, arm_rq, in);
+}
+
+static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_srq_in)] = {};
+ u32 *srq_out;
+ void *srqc;
+ int err;
+
+ srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL);
+ if (!srq_out)
+ return -ENOMEM;
+
+ MLX5_SET(query_srq_in, in, opcode, MLX5_CMD_OP_QUERY_SRQ);
+ MLX5_SET(query_srq_in, in, srqn, srq->srqn);
+ err = mlx5_cmd_exec_inout(dev->mdev, query_srq, in, srq_out);
+ if (err)
+ goto out;
+
+ srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry);
+ get_srqc(srqc, out);
+ if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+out:
+ kvfree(srq_out);
+ return err;
+}
+
+static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev,
+ struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
+ void *create_in;
+ void *xrc_srqc;
+ void *pas;
+ int pas_size;
+ int inlen;
+ int err;
+
+ if (in->umem) {
+ err = set_srq_page_size(in, xrc_srqc, log_page_size);
+ if (err)
+ return err;
+ }
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!create_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid);
+ xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in,
+ xrc_srq_context_entry);
+ pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
+
+ set_srqc(xrc_srqc, in);
+ MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
+ if (in->umem)
+ mlx5_ib_populate_pas(
+ in->umem,
+ 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+ pas, 0);
+ else
+ memcpy(pas, in->pas, pas_size);
+ MLX5_SET(create_xrc_srq_in, create_in, opcode,
+ MLX5_CMD_OP_CREATE_XRC_SRQ);
+
+ memset(create_out, 0, sizeof(create_out));
+ err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
+ sizeof(create_out));
+ if (err)
+ goto out;
+
+ srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn);
+ srq->uid = in->uid;
+out:
+ kvfree(create_in);
+ return err;
+}
+
+static int destroy_xrc_srq_cmd(struct mlx5_ib_dev *dev,
+ struct mlx5_core_srq *srq)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {};
+
+ MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ);
+ MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, srq->srqn);
+ MLX5_SET(destroy_xrc_srq_in, in, uid, srq->uid);
+
+ return mlx5_cmd_exec_in(dev->mdev, destroy_xrc_srq, in);
+}
+
+static int arm_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm)
+{
+ u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {};
+
+ MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
+ MLX5_SET(arm_xrc_srq_in, in, op_mod,
+ MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
+ MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, srq->srqn);
+ MLX5_SET(arm_xrc_srq_in, in, lwm, lwm);
+ MLX5_SET(arm_xrc_srq_in, in, uid, srq->uid);
+
+ return mlx5_cmd_exec_in(dev->mdev, arm_xrc_srq, in);
+}
+
+static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev,
+ struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)] = {};
+ u32 *xrcsrq_out;
+ void *xrc_srqc;
+ int err;
+
+ xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL);
+ if (!xrcsrq_out)
+ return -ENOMEM;
+
+ MLX5_SET(query_xrc_srq_in, in, opcode, MLX5_CMD_OP_QUERY_XRC_SRQ);
+ MLX5_SET(query_xrc_srq_in, in, xrc_srqn, srq->srqn);
+
+ err = mlx5_cmd_exec_inout(dev->mdev, query_xrc_srq, in, xrcsrq_out);
+ if (err)
+ goto out;
+
+ xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out,
+ xrc_srq_context_entry);
+ get_srqc(xrc_srqc, out);
+ if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+
+out:
+ kvfree(xrcsrq_out);
+ return err;
+}
+
+static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ void *create_out = NULL;
+ void *create_in = NULL;
+ void *rmpc;
+ void *wq;
+ void *pas;
+ int pas_size;
+ int outlen;
+ int inlen;
+ int err;
+
+ if (in->umem) {
+ err = set_srq_page_size(in, wq, log_wq_pg_sz);
+ if (err)
+ return err;
+ }
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
+ outlen = MLX5_ST_SZ_BYTES(create_rmp_out);
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ create_out = kvzalloc(outlen, GFP_KERNEL);
+ if (!create_in || !create_out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
+ wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+
+ MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+ MLX5_SET(create_rmp_in, create_in, uid, in->uid);
+ pas = MLX5_ADDR_OF(rmpc, rmpc, wq.pas);
+
+ set_wq(wq, in);
+ if (in->umem)
+ mlx5_ib_populate_pas(
+ in->umem,
+ 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+ pas, 0);
+ else
+ memcpy(pas, in->pas, pas_size);
+
+ MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP);
+ err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen);
+ if (!err) {
+ srq->srqn = MLX5_GET(create_rmp_out, create_out, rmpn);
+ srq->uid = in->uid;
+ }
+
+out:
+ kvfree(create_in);
+ kvfree(create_out);
+ return err;
+}
+
+static int destroy_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {};
+
+ MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
+ MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
+ MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
+ return mlx5_cmd_exec_in(dev->mdev, destroy_rmp, in);
+}
+
+static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm)
+{
+ void *out = NULL;
+ void *in = NULL;
+ void *rmpc;
+ void *wq;
+ void *bitmask;
+ int outlen;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rmp_in);
+ outlen = MLX5_ST_SZ_BYTES(modify_rmp_out);
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx);
+ bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask);
+ wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+
+ MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY);
+ MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn);
+ MLX5_SET(modify_rmp_in, in, uid, srq->uid);
+ MLX5_SET(wq, wq, lwm, lwm);
+ MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
+ MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+ MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
+
+ err = mlx5_cmd_exec_inout(dev->mdev, modify_rmp, in, out);
+
+out:
+ kvfree(in);
+ kvfree(out);
+ return err;
+}
+
+static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 *rmp_out = NULL;
+ u32 *rmp_in = NULL;
+ void *rmpc;
+ int outlen;
+ int inlen;
+ int err;
+
+ outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
+ inlen = MLX5_ST_SZ_BYTES(query_rmp_in);
+
+ rmp_out = kvzalloc(outlen, GFP_KERNEL);
+ rmp_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!rmp_out || !rmp_in) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(query_rmp_in, rmp_in, opcode, MLX5_CMD_OP_QUERY_RMP);
+ MLX5_SET(query_rmp_in, rmp_in, rmpn, srq->srqn);
+ err = mlx5_cmd_exec_inout(dev->mdev, query_rmp, rmp_in, rmp_out);
+ if (err)
+ goto out;
+
+ rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context);
+ get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out);
+ if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+
+out:
+ kvfree(rmp_out);
+ kvfree(rmp_in);
+ return err;
+}
+
+static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0};
+ void *create_in;
+ void *xrqc;
+ void *wq;
+ void *pas;
+ int pas_size;
+ int inlen;
+ int err;
+
+ if (in->umem) {
+ err = set_srq_page_size(in, wq, log_wq_pg_sz);
+ if (err)
+ return err;
+ }
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size;
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!create_in)
+ return -ENOMEM;
+
+ xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context);
+ wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
+ pas = MLX5_ADDR_OF(xrqc, xrqc, wq.pas);
+
+ set_wq(wq, in);
+ if (in->umem)
+ mlx5_ib_populate_pas(
+ in->umem,
+ 1UL << (in->log_page_size + MLX5_ADAPTER_PAGE_SHIFT),
+ pas, 0);
+ else
+ memcpy(pas, in->pas, pas_size);
+
+ if (in->type == IB_SRQT_TM) {
+ MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING);
+ if (in->flags & MLX5_SRQ_FLAG_RNDV)
+ MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV);
+ MLX5_SET(xrqc, xrqc,
+ tag_matching_topology_context.log_matching_list_sz,
+ in->tm_log_list_size);
+ }
+ MLX5_SET(xrqc, xrqc, user_index, in->user_index);
+ MLX5_SET(xrqc, xrqc, cqn, in->cqn);
+ MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
+ MLX5_SET(create_xrq_in, create_in, uid, in->uid);
+ err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
+ sizeof(create_out));
+ kvfree(create_in);
+ if (!err) {
+ srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn);
+ srq->uid = in->uid;
+ }
+
+ return err;
+}
+
+static int destroy_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {};
+
+ MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+ MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn);
+ MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
+
+ return mlx5_cmd_exec_in(dev->mdev, destroy_xrq, in);
+}
+
+static int arm_xrq_cmd(struct mlx5_ib_dev *dev,
+ struct mlx5_core_srq *srq,
+ u16 lwm)
+{
+ u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {};
+
+ MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ);
+ MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ);
+ MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
+ MLX5_SET(arm_rq_in, in, lwm, lwm);
+ MLX5_SET(arm_rq_in, in, uid, srq->uid);
+
+ return mlx5_cmd_exec_in(dev->mdev, arm_rq, in);
+}
+
+static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {};
+ u32 *xrq_out;
+ int outlen = MLX5_ST_SZ_BYTES(query_xrq_out);
+ void *xrqc;
+ int err;
+
+ xrq_out = kvzalloc(outlen, GFP_KERNEL);
+ if (!xrq_out)
+ return -ENOMEM;
+
+ MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ);
+ MLX5_SET(query_xrq_in, in, xrqn, srq->srqn);
+
+ err = mlx5_cmd_exec_inout(dev->mdev, query_xrq, in, xrq_out);
+ if (err)
+ goto out;
+
+ xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context);
+ get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out);
+ if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+ out->tm_next_tag =
+ MLX5_GET(xrqc, xrqc,
+ tag_matching_topology_context.append_next_index);
+ out->tm_hw_phase_cnt =
+ MLX5_GET(xrqc, xrqc,
+ tag_matching_topology_context.hw_phase_cnt);
+ out->tm_sw_phase_cnt =
+ MLX5_GET(xrqc, xrqc,
+ tag_matching_topology_context.sw_phase_cnt);
+
+out:
+ kvfree(xrq_out);
+ return err;
+}
+
+static int create_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ if (!dev->mdev->issi)
+ return create_srq_cmd(dev, srq, in);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return create_xrc_srq_cmd(dev, srq, in);
+ case MLX5_RES_XRQ:
+ return create_xrq_cmd(dev, srq, in);
+ default:
+ return create_rmp_cmd(dev, srq, in);
+ }
+}
+
+static int destroy_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+ if (!dev->mdev->issi)
+ return destroy_srq_cmd(dev, srq);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return destroy_xrc_srq_cmd(dev, srq);
+ case MLX5_RES_XRQ:
+ return destroy_xrq_cmd(dev, srq);
+ default:
+ return destroy_rmp_cmd(dev, srq);
+ }
+}
+
+int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ struct mlx5_srq_table *table = &dev->srq_table;
+ int err;
+
+ switch (in->type) {
+ case IB_SRQT_XRC:
+ srq->common.res = MLX5_RES_XSRQ;
+ break;
+ case IB_SRQT_TM:
+ srq->common.res = MLX5_RES_XRQ;
+ break;
+ default:
+ srq->common.res = MLX5_RES_SRQ;
+ }
+
+ err = create_srq_split(dev, srq, in);
+ if (err)
+ return err;
+
+ refcount_set(&srq->common.refcount, 1);
+ init_completion(&srq->common.free);
+
+ err = xa_err(xa_store_irq(&table->array, srq->srqn, srq, GFP_KERNEL));
+ if (err)
+ goto err_destroy_srq_split;
+
+ return 0;
+
+err_destroy_srq_split:
+ destroy_srq_split(dev, srq);
+
+ return err;
+}
+
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+ struct mlx5_srq_table *table = &dev->srq_table;
+ struct mlx5_core_srq *tmp;
+ int err;
+
+ /* Delete entry, but leave index occupied */
+ tmp = xa_cmpxchg_irq(&table->array, srq->srqn, srq, XA_ZERO_ENTRY, 0);
+ if (WARN_ON(tmp != srq))
+ return xa_err(tmp) ?: -EINVAL;
+
+ err = destroy_srq_split(dev, srq);
+ if (err) {
+ /*
+ * We don't need to check returned result for an error,
+ * because we are storing in pre-allocated space xarray
+ * entry and it can't fail at this stage.
+ */
+ xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, srq, 0);
+ return err;
+ }
+ xa_erase_irq(&table->array, srq->srqn);
+
+ mlx5_core_res_put(&srq->common);
+ wait_for_completion(&srq->common.free);
+ return 0;
+}
+
+int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ if (!dev->mdev->issi)
+ return query_srq_cmd(dev, srq, out);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return query_xrc_srq_cmd(dev, srq, out);
+ case MLX5_RES_XRQ:
+ return query_xrq_cmd(dev, srq, out);
+ default:
+ return query_rmp_cmd(dev, srq, out);
+ }
+}
+
+int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm, int is_srq)
+{
+ if (!dev->mdev->issi)
+ return arm_srq_cmd(dev, srq, lwm, is_srq);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return arm_xrc_srq_cmd(dev, srq, lwm);
+ case MLX5_RES_XRQ:
+ return arm_xrq_cmd(dev, srq, lwm);
+ default:
+ return arm_rmp_cmd(dev, srq, lwm);
+ }
+}
+
+static int srq_event_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_srq_table *table;
+ struct mlx5_core_srq *srq;
+ struct mlx5_eqe *eqe;
+ u32 srqn;
+
+ if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR &&
+ type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)
+ return NOTIFY_DONE;
+
+ table = container_of(nb, struct mlx5_srq_table, nb);
+
+ eqe = data;
+ srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+
+ xa_lock(&table->array);
+ srq = xa_load(&table->array, srqn);
+ if (srq)
+ refcount_inc(&srq->common.refcount);
+ xa_unlock(&table->array);
+
+ if (!srq)
+ return NOTIFY_OK;
+
+ srq->event(srq, eqe->type);
+
+ mlx5_core_res_put(&srq->common);
+
+ return NOTIFY_OK;
+}
+
+int mlx5_init_srq_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_srq_table *table = &dev->srq_table;
+
+ memset(table, 0, sizeof(*table));
+ xa_init_flags(&table->array, XA_FLAGS_LOCK_IRQ);
+
+ table->nb.notifier_call = srq_event_notifier;
+ mlx5_notifier_register(dev->mdev, &table->nb);
+
+ return 0;
+}
+
+void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_srq_table *table = &dev->srq_table;
+
+ mlx5_notifier_unregister(dev->mdev, &table->nb);
+}
diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c
new file mode 100644
index 000000000000..2fcf553044e1
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/std_types.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_ib.h"
+#include "data_direct.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_PD_QUERY)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, MLX5_IB_ATTR_QUERY_PD_HANDLE);
+ struct mlx5_ib_pd *mpd = to_mpd(pd);
+
+ return uverbs_copy_to(attrs, MLX5_IB_ATTR_QUERY_PD_RESP_PDN,
+ &mpd->pdn, sizeof(mpd->pdn));
+}
+
+static int fill_vport_icm_addr(struct mlx5_core_dev *mdev, u16 vport,
+ struct mlx5_ib_uapi_query_port *info)
+{
+ u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
+ bool sw_owner_supp;
+ u64 icm_rx;
+ u64 icm_tx;
+ int err;
+
+ sw_owner_supp = MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, sw_owner) ||
+ MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, sw_owner_v2);
+
+ if (vport == MLX5_VPORT_UPLINK) {
+ icm_rx = MLX5_CAP64_ESW_FLOWTABLE(mdev,
+ sw_steering_uplink_icm_address_rx);
+ icm_tx = MLX5_CAP64_ESW_FLOWTABLE(mdev,
+ sw_steering_uplink_icm_address_tx);
+ } else {
+ MLX5_SET(query_esw_vport_context_in, in, opcode,
+ MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+ MLX5_SET(query_esw_vport_context_in, in, vport_number, vport);
+ MLX5_SET(query_esw_vport_context_in, in, other_vport, true);
+
+ err = mlx5_cmd_exec_inout(mdev, query_esw_vport_context, in,
+ out);
+
+ if (err)
+ return err;
+
+ icm_rx = MLX5_GET64(
+ query_esw_vport_context_out, out,
+ esw_vport_context.sw_steering_vport_icm_address_rx);
+
+ icm_tx = MLX5_GET64(
+ query_esw_vport_context_out, out,
+ esw_vport_context.sw_steering_vport_icm_address_tx);
+ }
+
+ if (sw_owner_supp && icm_rx) {
+ info->vport_steering_icm_rx = icm_rx;
+ info->flags |=
+ MLX5_IB_UAPI_QUERY_PORT_VPORT_STEERING_ICM_RX;
+ }
+
+ if (sw_owner_supp && icm_tx) {
+ info->vport_steering_icm_tx = icm_tx;
+ info->flags |=
+ MLX5_IB_UAPI_QUERY_PORT_VPORT_STEERING_ICM_TX;
+ }
+
+ return 0;
+}
+
+static int fill_vport_vhca_id(struct mlx5_core_dev *mdev, u16 vport,
+ struct mlx5_ib_uapi_query_port *info)
+{
+ int err = mlx5_vport_get_vhca_id(mdev, vport, &info->vport_vhca_id);
+
+ if (err)
+ return err;
+
+ info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_VHCA_ID;
+
+ return 0;
+}
+
+static int fill_multiport_info(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_ib_uapi_query_port *info)
+{
+ struct mlx5_core_dev *mdev;
+
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
+ if (!mdev)
+ return -EINVAL;
+
+ info->vport_vhca_id = MLX5_CAP_GEN(mdev, vhca_id);
+ info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_VHCA_ID;
+
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+
+ return 0;
+}
+
+static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_ib_uapi_query_port *info)
+{
+ struct mlx5_eswitch_rep *rep;
+ struct mlx5_core_dev *mdev;
+ int err;
+
+ rep = dev->port[port_num - 1].rep;
+ if (!rep)
+ return -EOPNOTSUPP;
+
+ mdev = mlx5_eswitch_get_core_dev(rep->esw);
+ if (!mdev)
+ return -EINVAL;
+
+ info->vport = rep->vport;
+ info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT;
+
+ if (rep->vport != MLX5_VPORT_UPLINK) {
+ err = fill_vport_vhca_id(mdev, rep->vport, info);
+ if (err)
+ return err;
+ }
+
+ info->esw_owner_vhca_id = MLX5_CAP_GEN(mdev, vhca_id);
+ info->flags |= MLX5_IB_UAPI_QUERY_PORT_ESW_OWNER_VHCA_ID;
+
+ err = fill_vport_icm_addr(mdev, rep->vport, info);
+ if (err)
+ return err;
+
+ if (mlx5_eswitch_vport_match_metadata_enabled(rep->esw)) {
+ info->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match(
+ rep->esw, rep->vport);
+ info->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask();
+ info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_REG_C0;
+ }
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_QUERY_PORT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_uapi_query_port info = {};
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_ib_dev *dev;
+ u32 port_num;
+ int ret;
+
+ if (uverbs_copy_from(&port_num, attrs,
+ MLX5_IB_ATTR_QUERY_PORT_PORT_NUM))
+ return -EFAULT;
+
+ c = to_mucontext(ib_uverbs_get_ucontext(attrs));
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ dev = to_mdev(c->ibucontext.device);
+
+ if (!rdma_is_port_valid(&dev->ib_dev, port_num))
+ return -EINVAL;
+
+ if (mlx5_eswitch_mode(dev->mdev) == MLX5_ESWITCH_OFFLOADS) {
+ ret = fill_switchdev_info(dev, port_num, &info);
+ if (ret)
+ return ret;
+ } else if (mlx5_core_mp_enabled(dev->mdev)) {
+ ret = fill_multiport_info(dev, port_num, &info);
+ if (ret)
+ return ret;
+ }
+
+ return uverbs_copy_to_struct_or_zero(attrs, MLX5_IB_ATTR_QUERY_PORT, &info,
+ sizeof(info));
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_GET_DATA_DIRECT_SYSFS_PATH)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_data_direct_dev *data_direct_dev;
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_ib_dev *dev;
+ int out_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_GET_DATA_DIRECT_SYSFS_PATH);
+ u32 dev_path_len;
+ char *dev_path;
+ int ret;
+
+ c = to_mucontext(ib_uverbs_get_ucontext(attrs));
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ dev = to_mdev(c->ibucontext.device);
+ mutex_lock(&dev->data_direct_lock);
+ data_direct_dev = dev->data_direct_dev;
+ if (!data_direct_dev) {
+ ret = -ENODEV;
+ goto end;
+ }
+
+ dev_path = kobject_get_path(&data_direct_dev->device->kobj, GFP_KERNEL);
+ if (!dev_path) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ dev_path_len = strlen(dev_path) + 1;
+ if (dev_path_len > out_len) {
+ ret = -ENOSPC;
+ goto end;
+ }
+
+ ret = uverbs_copy_to(attrs, MLX5_IB_ATTR_GET_DATA_DIRECT_SYSFS_PATH, dev_path,
+ dev_path_len);
+ kfree(dev_path);
+
+end:
+ mutex_unlock(&dev->data_direct_lock);
+ return ret;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_QUERY_PORT,
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_QUERY_PORT_PORT_NUM,
+ UVERBS_ATTR_TYPE(u32), UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_QUERY_PORT,
+ UVERBS_ATTR_STRUCT(struct mlx5_ib_uapi_query_port,
+ reg_c0),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_GET_DATA_DIRECT_SYSFS_PATH,
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_GET_DATA_DIRECT_SYSFS_PATH,
+ UVERBS_ATTR_MIN_SIZE(0),
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(mlx5_ib_device,
+ UVERBS_OBJECT_DEVICE,
+ &UVERBS_METHOD(MLX5_IB_METHOD_QUERY_PORT),
+ &UVERBS_METHOD(MLX5_IB_METHOD_GET_DATA_DIRECT_SYSFS_PATH));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_PD_QUERY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_QUERY_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_QUERY_PD_RESP_PDN,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(mlx5_ib_pd,
+ UVERBS_OBJECT_PD,
+ &UVERBS_METHOD(MLX5_IB_METHOD_PD_QUERY));
+
+const struct uapi_definition mlx5_ib_std_types_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE(
+ UVERBS_OBJECT_PD,
+ &mlx5_ib_pd),
+ UAPI_DEF_CHAIN_OBJ_TREE(
+ UVERBS_OBJECT_DEVICE,
+ &mlx5_ib_device),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
new file mode 100644
index 000000000000..4e562e0dd9e1
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -0,0 +1,1129 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#include <rdma/ib_umem_odp.h>
+#include "mlx5_ib.h"
+#include "umr.h"
+#include "wr.h"
+
+/*
+ * We can't use an array for xlt_emergency_page because dma_map_single doesn't
+ * work on kernel modules memory
+ */
+void *xlt_emergency_page;
+static DEFINE_MUTEX(xlt_emergency_page_mutex);
+
+static __be64 get_umr_enable_mr_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_FREE;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_disable_mr_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_FREE;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_translation_mask(struct mlx5_ib_dev *dev)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LEN |
+ MLX5_MKEY_MASK_PAGE_SIZE |
+ MLX5_MKEY_MASK_START_ADDR;
+ if (MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5))
+ result |= MLX5_MKEY_MASK_PAGE_SIZE_5;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_RR |
+ MLX5_MKEY_MASK_RW;
+
+ if (MLX5_CAP_GEN(dev->mdev, atomic))
+ result |= MLX5_MKEY_MASK_A;
+
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
+
+ if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_pd_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_PD;
+
+ return cpu_to_be64(result);
+}
+
+static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
+{
+ if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
+ return -EPERM;
+
+ if (mask & MLX5_MKEY_MASK_A &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+ return -EPERM;
+
+ if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ return -EPERM;
+
+ if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ return -EPERM;
+
+ return 0;
+}
+
+enum {
+ MAX_UMR_WR = 128,
+};
+
+static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp)
+{
+ struct ib_qp_attr attr = {};
+ int ret;
+
+ attr.qp_state = IB_QPS_INIT;
+ attr.port_num = 1;
+ ret = ib_modify_qp(qp, &attr,
+ IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
+ return ret;
+ }
+
+ memset(&attr, 0, sizeof(attr));
+ attr.qp_state = IB_QPS_RTR;
+
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
+ return ret;
+ }
+
+ memset(&attr, 0, sizeof(attr));
+ attr.qp_state = IB_QPS_RTS;
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret) {
+ mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
+{
+ struct ib_qp_init_attr init_attr = {};
+ struct ib_cq *cq;
+ struct ib_qp *qp;
+ int ret = 0;
+
+
+ /*
+ * UMR qp is set once, never changed until device unload.
+ * Avoid taking the mutex if initialization is already done.
+ */
+ if (dev->umrc.qp)
+ return 0;
+
+ mutex_lock(&dev->umrc.init_lock);
+ /* First user allocates the UMR resources. Skip if already allocated. */
+ if (dev->umrc.qp)
+ goto unlock;
+
+ cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
+ if (IS_ERR(cq)) {
+ mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
+ ret = PTR_ERR(cq);
+ goto unlock;
+ }
+
+ init_attr.send_cq = cq;
+ init_attr.recv_cq = cq;
+ init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr.cap.max_send_wr = MAX_UMR_WR;
+ init_attr.cap.max_send_sge = 1;
+ init_attr.qp_type = MLX5_IB_QPT_REG_UMR;
+ init_attr.port_num = 1;
+ qp = ib_create_qp(dev->umrc.pd, &init_attr);
+ if (IS_ERR(qp)) {
+ mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
+ ret = PTR_ERR(qp);
+ goto destroy_cq;
+ }
+
+ ret = mlx5r_umr_qp_rst2rts(dev, qp);
+ if (ret)
+ goto destroy_qp;
+
+ dev->umrc.cq = cq;
+
+ sema_init(&dev->umrc.sem, MAX_UMR_WR);
+ mutex_init(&dev->umrc.lock);
+ dev->umrc.state = MLX5_UMR_STATE_ACTIVE;
+ dev->umrc.qp = qp;
+
+ mutex_unlock(&dev->umrc.init_lock);
+ return 0;
+
+destroy_qp:
+ ib_destroy_qp(qp);
+destroy_cq:
+ ib_free_cq(cq);
+unlock:
+ mutex_unlock(&dev->umrc.init_lock);
+ return ret;
+}
+
+void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (dev->umrc.state == MLX5_UMR_STATE_UNINIT)
+ return;
+ mutex_destroy(&dev->umrc.lock);
+ /* After device init, UMR cp/qp are not unset during the lifetime. */
+ ib_destroy_qp(dev->umrc.qp);
+ ib_free_cq(dev->umrc.cq);
+}
+
+int mlx5r_umr_init(struct mlx5_ib_dev *dev)
+{
+ struct ib_pd *pd;
+
+ pd = ib_alloc_pd(&dev->ib_dev, 0);
+ if (IS_ERR(pd)) {
+ mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
+ return PTR_ERR(pd);
+ }
+ dev->umrc.pd = pd;
+
+ mutex_init(&dev->umrc.init_lock);
+
+ return 0;
+}
+
+void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!dev->umrc.pd)
+ return;
+
+ mutex_destroy(&dev->umrc.init_lock);
+ ib_dealloc_pd(dev->umrc.pd);
+}
+
+
+static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
+ struct mlx5r_umr_wqe *wqe, bool with_data)
+{
+ unsigned int wqe_size =
+ with_data ? sizeof(struct mlx5r_umr_wqe) :
+ sizeof(struct mlx5r_umr_wqe) -
+ sizeof(struct mlx5_wqe_data_seg);
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_wqe_ctrl_seg *ctrl;
+ union {
+ struct ib_cqe *ib_cqe;
+ u64 wr_id;
+ } id;
+ void *cur_edge, *seg;
+ unsigned long flags;
+ unsigned int idx;
+ int size, err;
+
+ if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
+ return -EIO;
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0,
+ cpu_to_be32(mkey), false, false);
+ if (WARN_ON(err))
+ goto out;
+
+ qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
+
+ mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size);
+
+ id.ib_cqe = cqe;
+ mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
+ MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR);
+
+ mlx5r_ring_db(qp, 1, ctrl);
+
+out:
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+ return err;
+}
+
+static int mlx5r_umr_recover(struct mlx5_ib_dev *dev, u32 mkey,
+ struct mlx5r_umr_context *umr_context,
+ struct mlx5r_umr_wqe *wqe, bool with_data)
+{
+ struct umr_common *umrc = &dev->umrc;
+ struct ib_qp_attr attr;
+ int err;
+
+ mutex_lock(&umrc->lock);
+ /* Preventing any further WRs to be sent now */
+ if (umrc->state != MLX5_UMR_STATE_RECOVER) {
+ mlx5_ib_warn(dev, "UMR recovery encountered an unexpected state=%d\n",
+ umrc->state);
+ umrc->state = MLX5_UMR_STATE_RECOVER;
+ }
+ mutex_unlock(&umrc->lock);
+
+ /* Sending a final/barrier WR (the failed one) and wait for its completion.
+ * This will ensure that all the previous WRs got a completion before
+ * we set the QP state to RESET.
+ */
+ err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context->cqe, wqe,
+ with_data);
+ if (err) {
+ mlx5_ib_warn(dev, "UMR recovery post send failed, err %d\n", err);
+ goto err;
+ }
+
+ /* Since the QP is in an error state, it will only receive
+ * IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier
+ * we don't care about its status.
+ */
+ wait_for_completion(&umr_context->done);
+
+ attr.qp_state = IB_QPS_RESET;
+ err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
+ if (err) {
+ mlx5_ib_warn(dev, "Couldn't modify UMR QP to RESET, err=%d\n", err);
+ goto err;
+ }
+
+ err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
+ if (err) {
+ mlx5_ib_warn(dev, "Couldn't modify UMR QP to RTS, err=%d\n", err);
+ goto err;
+ }
+
+ umrc->state = MLX5_UMR_STATE_ACTIVE;
+ return 0;
+
+err:
+ umrc->state = MLX5_UMR_STATE_ERR;
+ return err;
+}
+
+static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct mlx5_ib_umr_context *context =
+ container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
+
+ context->status = wc->status;
+ complete(&context->done);
+}
+
+static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context)
+{
+ context->cqe.done = mlx5r_umr_done;
+ init_completion(&context->done);
+}
+
+static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
+ struct mlx5r_umr_wqe *wqe, bool with_data)
+{
+ struct umr_common *umrc = &dev->umrc;
+ struct mlx5r_umr_context umr_context;
+ int err;
+
+ err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask));
+ if (WARN_ON(err))
+ return err;
+
+ mlx5r_umr_init_context(&umr_context);
+
+ down(&umrc->sem);
+ while (true) {
+ mutex_lock(&umrc->lock);
+ if (umrc->state == MLX5_UMR_STATE_ERR) {
+ mutex_unlock(&umrc->lock);
+ err = -EFAULT;
+ break;
+ }
+
+ if (umrc->state == MLX5_UMR_STATE_RECOVER) {
+ mutex_unlock(&umrc->lock);
+ usleep_range(3000, 5000);
+ continue;
+ }
+
+ err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
+ with_data);
+ mutex_unlock(&umrc->lock);
+ if (err) {
+ mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
+ err);
+ break;
+ }
+
+ wait_for_completion(&umr_context.done);
+
+ if (umr_context.status == IB_WC_SUCCESS)
+ break;
+
+ if (umr_context.status == IB_WC_WR_FLUSH_ERR)
+ continue;
+
+ WARN_ON_ONCE(1);
+ mlx5_ib_warn(dev,
+ "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n",
+ umr_context.status, mkey);
+ err = mlx5r_umr_recover(dev, mkey, &umr_context, wqe, with_data);
+ if (err)
+ mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
+ err);
+ err = -EFAULT;
+ break;
+ }
+ up(&umrc->sem);
+ return err;
+}
+
+/**
+ * mlx5r_umr_revoke_mr - Fence all DMA on the MR
+ * @mr: The MR to fence
+ *
+ * Upon return the NIC will not be doing any DMA to the pages under the MR,
+ * and any DMA in progress will be completed. Failure of this function
+ * indicates the HW has failed catastrophically.
+ */
+int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr)
+{
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct mlx5r_umr_wqe wqe = {};
+
+ if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ return 0;
+
+ wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
+ wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask();
+ wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
+
+ MLX5_SET(mkc, &wqe.mkey_seg, free, 1);
+ MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn);
+ MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
+ MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
+ mlx5_mkey_variant(mr->mmkey.key));
+
+ return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
+}
+
+static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev,
+ struct mlx5_mkey_seg *seg,
+ unsigned int access_flags)
+{
+ bool ro_read = (access_flags & IB_ACCESS_RELAXED_ORDERING) &&
+ (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) ||
+ pcie_relaxed_ordering_enabled(dev->mdev->pdev));
+
+ MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
+ MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
+ MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
+ MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
+ MLX5_SET(mkc, seg, lr, 1);
+ MLX5_SET(mkc, seg, relaxed_ordering_write,
+ !!(access_flags & IB_ACCESS_RELAXED_ORDERING));
+ MLX5_SET(mkc, seg, relaxed_ordering_read, ro_read);
+}
+
+int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
+ int access_flags)
+{
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct mlx5r_umr_wqe wqe = {};
+ int err;
+
+ wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev);
+ wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
+ wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE;
+ wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
+
+ mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags);
+ MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn);
+ MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
+ MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
+ mlx5_mkey_variant(mr->mmkey.key));
+
+ err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
+ if (err)
+ return err;
+
+ mr->access_flags = access_flags;
+ return 0;
+}
+
+#define MLX5_MAX_UMR_CHUNK \
+ ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_FLEX_ALIGNMENT)
+#define MLX5_SPARE_UMR_CHUNK 0x10000
+
+/*
+ * Allocate a temporary buffer to hold the per-page information to transfer to
+ * HW. For efficiency this should be as large as it can be, but buffer
+ * allocation failure is not allowed, so try smaller sizes.
+ */
+static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
+{
+ const size_t xlt_chunk_align = MLX5_UMR_FLEX_ALIGNMENT / ent_size;
+ size_t size;
+ void *res = NULL;
+
+ static_assert(PAGE_SIZE % MLX5_UMR_FLEX_ALIGNMENT == 0);
+
+ /*
+ * MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
+ * allocation can't trigger any kind of reclaim.
+ */
+ might_sleep();
+
+ gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
+
+ /*
+ * If the system already has a suitable high order page then just use
+ * that, but don't try hard to create one. This max is about 1M, so a
+ * free x86 huge page will satisfy it.
+ */
+ size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
+ MLX5_MAX_UMR_CHUNK);
+ *nents = size / ent_size;
+ res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
+ get_order(size));
+ if (res)
+ return res;
+
+ if (size > MLX5_SPARE_UMR_CHUNK) {
+ size = MLX5_SPARE_UMR_CHUNK;
+ *nents = size / ent_size;
+ res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
+ get_order(size));
+ if (res)
+ return res;
+ }
+
+ *nents = PAGE_SIZE / ent_size;
+ res = (void *)__get_free_page(gfp_mask);
+ if (res)
+ return res;
+
+ mutex_lock(&xlt_emergency_page_mutex);
+ memset(xlt_emergency_page, 0, PAGE_SIZE);
+ return xlt_emergency_page;
+}
+
+static void mlx5r_umr_free_xlt(void *xlt, size_t length)
+{
+ if (xlt == xlt_emergency_page) {
+ mutex_unlock(&xlt_emergency_page_mutex);
+ return;
+ }
+
+ free_pages((unsigned long)xlt, get_order(length));
+}
+
+static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
+ struct ib_sge *sg)
+{
+ struct device *ddev = &dev->mdev->pdev->dev;
+
+ dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
+ mlx5r_umr_free_xlt(xlt, sg->length);
+}
+
+/*
+ * Create an XLT buffer ready for submission.
+ */
+static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
+ size_t nents, size_t ent_size,
+ unsigned int flags)
+{
+ struct device *ddev = &dev->mdev->pdev->dev;
+ dma_addr_t dma;
+ void *xlt;
+
+ xlt = mlx5r_umr_alloc_xlt(&nents, ent_size,
+ flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
+ GFP_KERNEL);
+ sg->length = nents * ent_size;
+ dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
+ if (dma_mapping_error(ddev, dma)) {
+ mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
+ mlx5r_umr_free_xlt(xlt, sg->length);
+ return NULL;
+ }
+ sg->addr = dma;
+ sg->lkey = dev->umrc.pd->local_dma_lkey;
+
+ return xlt;
+}
+
+static void
+mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
+ unsigned int flags, struct ib_sge *sg)
+{
+ if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
+ /* fail if free */
+ ctrl_seg->flags = MLX5_UMR_CHECK_FREE;
+ else
+ /* fail if not free */
+ ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE;
+ ctrl_seg->xlt_octowords =
+ cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
+}
+
+static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev,
+ struct mlx5_mkey_seg *mkey_seg,
+ struct mlx5_ib_mr *mr,
+ unsigned int page_shift)
+{
+ mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags);
+ MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn);
+ MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova);
+ MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length);
+ MLX5_SET(mkc, mkey_seg, log_page_size, page_shift);
+ MLX5_SET(mkc, mkey_seg, qpn, 0xffffff);
+ MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key));
+}
+
+static void
+mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg,
+ struct ib_sge *sg)
+{
+ data_seg->byte_count = cpu_to_be32(sg->length);
+ data_seg->lkey = cpu_to_be32(sg->lkey);
+ data_seg->addr = cpu_to_be64(sg->addr);
+}
+
+static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
+ u64 offset)
+{
+ u64 octo_offset = mlx5r_umr_get_xlt_octo(offset);
+
+ ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff);
+ ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16);
+ ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
+}
+
+static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
+ struct mlx5r_umr_wqe *wqe,
+ struct mlx5_ib_mr *mr, struct ib_sge *sg,
+ unsigned int flags)
+{
+ bool update_pd_access, update_translation;
+
+ if (flags & MLX5_IB_UPD_XLT_ENABLE)
+ wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask();
+
+ update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE ||
+ flags & MLX5_IB_UPD_XLT_PD ||
+ flags & MLX5_IB_UPD_XLT_ACCESS;
+
+ if (update_pd_access) {
+ wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev);
+ wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
+ }
+
+ update_translation =
+ flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR;
+
+ if (update_translation) {
+ wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask(dev);
+ if (!mr->ibmr.length)
+ MLX5_SET(mkc, &wqe->mkey_seg, length64, 1);
+ if (flags & MLX5_IB_UPD_XLT_KEEP_PGSZ)
+ wqe->ctrl_seg.mkey_mask &=
+ cpu_to_be64(~MLX5_MKEY_MASK_PAGE_SIZE);
+ }
+
+ wqe->ctrl_seg.xlt_octowords =
+ cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
+ wqe->data_seg.byte_count = cpu_to_be32(sg->length);
+}
+
+static void
+_mlx5r_umr_init_wqe(struct mlx5_ib_mr *mr, struct mlx5r_umr_wqe *wqe,
+ struct ib_sge *sg, unsigned int flags,
+ unsigned int page_shift, bool dd)
+{
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+
+ mlx5r_umr_set_update_xlt_ctrl_seg(&wqe->ctrl_seg, flags, sg);
+ mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe->mkey_seg, mr, page_shift);
+ if (dd) /* Use the data direct internal kernel PD */
+ MLX5_SET(mkc, &wqe->mkey_seg, pd, dev->ddr.pdn);
+ mlx5r_umr_set_update_xlt_data_seg(&wqe->data_seg, sg);
+}
+
+static int
+_mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags, bool dd,
+ size_t start_block, size_t nblocks)
+{
+ size_t ent_size = dd ? sizeof(struct mlx5_ksm) : sizeof(struct mlx5_mtt);
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct device *ddev = &dev->mdev->pdev->dev;
+ struct mlx5r_umr_wqe wqe = {};
+ size_t processed_blocks = 0;
+ struct ib_block_iter biter;
+ size_t cur_block_idx = 0;
+ struct mlx5_ksm *cur_ksm;
+ struct mlx5_mtt *cur_mtt;
+ size_t orig_sg_length;
+ size_t total_blocks;
+ size_t final_size;
+ void *curr_entry;
+ struct ib_sge sg;
+ void *entry;
+ u64 offset;
+ int err = 0;
+
+ total_blocks = ib_umem_num_dma_blocks(mr->umem, 1UL << mr->page_shift);
+ if (start_block > total_blocks)
+ return -EINVAL;
+
+ /* nblocks 0 means update all blocks starting from start_block */
+ if (nblocks)
+ total_blocks = nblocks;
+
+ entry = mlx5r_umr_create_xlt(dev, &sg, total_blocks, ent_size, flags);
+ if (!entry)
+ return -ENOMEM;
+
+ orig_sg_length = sg.length;
+
+ _mlx5r_umr_init_wqe(mr, &wqe, &sg, flags, mr->page_shift, dd);
+
+ /* Set initial translation offset to start_block */
+ offset = (u64)start_block * ent_size;
+ mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
+
+ if (dd)
+ cur_ksm = entry;
+ else
+ cur_mtt = entry;
+
+ curr_entry = entry;
+
+ rdma_umem_for_each_dma_block(mr->umem, &biter, BIT(mr->page_shift)) {
+ if (cur_block_idx < start_block) {
+ cur_block_idx++;
+ continue;
+ }
+
+ if (nblocks && processed_blocks >= nblocks)
+ break;
+
+ if (curr_entry == entry + sg.length) {
+ dma_sync_single_for_device(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+
+ err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe,
+ true);
+ if (err)
+ goto err;
+ dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+ offset += sg.length;
+ mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
+ if (dd)
+ cur_ksm = entry;
+ else
+ cur_mtt = entry;
+ }
+
+ if (dd) {
+ cur_ksm->va = cpu_to_be64(rdma_block_iter_dma_address(&biter));
+ if (mr->access_flags & IB_ACCESS_RELAXED_ORDERING &&
+ dev->ddr.mkey_ro_valid)
+ cur_ksm->key = cpu_to_be32(dev->ddr.mkey_ro);
+ else
+ cur_ksm->key = cpu_to_be32(dev->ddr.mkey);
+ if (mr->umem->is_dmabuf &&
+ (flags & MLX5_IB_UPD_XLT_ZAP)) {
+ cur_ksm->va = 0;
+ cur_ksm->key = 0;
+ }
+ cur_ksm++;
+ curr_entry = cur_ksm;
+ } else {
+ cur_mtt->ptag =
+ cpu_to_be64(rdma_block_iter_dma_address(&biter) |
+ MLX5_IB_MTT_PRESENT);
+ if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
+ cur_mtt->ptag = 0;
+ cur_mtt++;
+ curr_entry = cur_mtt;
+ }
+
+ processed_blocks++;
+ }
+
+ final_size = curr_entry - entry;
+ sg.length = ALIGN(final_size, MLX5_UMR_FLEX_ALIGNMENT);
+ memset(curr_entry, 0, sg.length - final_size);
+ mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
+
+ dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
+ err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
+
+err:
+ sg.length = orig_sg_length;
+ mlx5r_umr_unmap_free_xlt(dev, entry, &sg);
+ return err;
+}
+
+int mlx5r_umr_update_data_direct_ksm_pas_range(struct mlx5_ib_mr *mr,
+ unsigned int flags,
+ size_t start_block,
+ size_t nblocks)
+{
+ /* No invalidation flow is expected */
+ if (WARN_ON(!mr->umem->is_dmabuf) || ((flags & MLX5_IB_UPD_XLT_ZAP) &&
+ !(flags & MLX5_IB_UPD_XLT_KEEP_PGSZ)))
+ return -EINVAL;
+
+ return _mlx5r_umr_update_mr_pas(mr, flags, true, start_block, nblocks);
+}
+
+int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr,
+ unsigned int flags)
+{
+ return mlx5r_umr_update_data_direct_ksm_pas_range(mr, flags, 0, 0);
+}
+
+int mlx5r_umr_update_mr_pas_range(struct mlx5_ib_mr *mr, unsigned int flags,
+ size_t start_block, size_t nblocks)
+{
+ if (WARN_ON(mr->umem->is_odp))
+ return -EINVAL;
+
+ return _mlx5r_umr_update_mr_pas(mr, flags, false, start_block, nblocks);
+}
+
+/*
+ * Send the DMA list to the HW for a normal MR using UMR.
+ * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
+ * flag may be used.
+ */
+int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
+{
+ return mlx5r_umr_update_mr_pas_range(mr, flags, 0, 0);
+}
+
+static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
+{
+ return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
+}
+
+int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
+ int page_shift, int flags)
+{
+ int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
+ ? sizeof(struct mlx5_klm)
+ : sizeof(struct mlx5_mtt);
+ const int page_align = MLX5_UMR_FLEX_ALIGNMENT / desc_size;
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct device *ddev = &dev->mdev->pdev->dev;
+ const int page_mask = page_align - 1;
+ struct mlx5r_umr_wqe wqe = {};
+ size_t pages_mapped = 0;
+ size_t pages_to_map = 0;
+ size_t size_to_map = 0;
+ size_t orig_sg_length;
+ size_t pages_iter;
+ struct ib_sge sg;
+ int err = 0;
+ void *xlt;
+
+ if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
+ !umr_can_use_indirect_mkey(dev))
+ return -EPERM;
+
+ if (WARN_ON(!mr->umem->is_odp))
+ return -EINVAL;
+
+ /* UMR copies MTTs in units of MLX5_UMR_FLEX_ALIGNMENT bytes,
+ * so we need to align the offset and length accordingly
+ */
+ if (idx & page_mask) {
+ npages += idx & page_mask;
+ idx &= ~page_mask;
+ }
+ pages_to_map = ALIGN(npages, page_align);
+
+ xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags);
+ if (!xlt)
+ return -ENOMEM;
+
+ pages_iter = sg.length / desc_size;
+ orig_sg_length = sg.length;
+
+ if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
+ struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
+ size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
+
+ pages_to_map = min_t(size_t, pages_to_map, max_pages);
+ }
+
+ mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
+ mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift);
+ mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
+
+ for (pages_mapped = 0;
+ pages_mapped < pages_to_map && !err;
+ pages_mapped += pages_iter, idx += pages_iter) {
+ npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
+ size_to_map = npages * desc_size;
+ dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+ /*
+ * npages is the maximum number of pages to map, but we
+ * can't guarantee that all pages are actually mapped.
+ *
+ * For example, if page is p2p of type which is not supported
+ * for mapping, the number of pages mapped will be less than
+ * requested.
+ */
+ err = mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
+ if (err)
+ return err;
+ dma_sync_single_for_device(ddev, sg.addr, sg.length,
+ DMA_TO_DEVICE);
+ sg.length = ALIGN(size_to_map, MLX5_UMR_FLEX_ALIGNMENT);
+
+ if (pages_mapped + pages_iter >= pages_to_map)
+ mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
+ mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size);
+ err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
+ }
+ sg.length = orig_sg_length;
+ mlx5r_umr_unmap_free_xlt(dev, xlt, &sg);
+ return err;
+}
+
+/*
+ * Update only the page-size (log_page_size) field of an existing memory key
+ * using UMR. This is useful when the MR's physical layout stays the same
+ * but the optimal page shift has changed (e.g. dmabuf after pages are
+ * pinned and the HW can switch from 4K to huge-page alignment).
+ */
+int mlx5r_umr_update_mr_page_shift(struct mlx5_ib_mr *mr,
+ unsigned int page_shift,
+ bool dd)
+{
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ struct mlx5r_umr_wqe wqe = {};
+ int err;
+
+ /* Build UMR wqe: we touch only PAGE_SIZE, so use the dedicated mask */
+ wqe.ctrl_seg.mkey_mask = get_umr_update_translation_mask(dev);
+
+ /* MR must be free while page size is modified */
+ wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE | MLX5_UMR_INLINE;
+
+ /* Fill mkey segment with the new page size, keep the rest unchanged */
+ MLX5_SET(mkc, &wqe.mkey_seg, log_page_size, page_shift);
+
+ if (dd)
+ MLX5_SET(mkc, &wqe.mkey_seg, pd, dev->ddr.pdn);
+ else
+ MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn);
+
+ MLX5_SET64(mkc, &wqe.mkey_seg, start_addr, mr->ibmr.iova);
+ MLX5_SET64(mkc, &wqe.mkey_seg, len, mr->ibmr.length);
+ MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
+ MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
+ mlx5_mkey_variant(mr->mmkey.key));
+
+ err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
+ if (!err)
+ mr->page_shift = page_shift;
+
+ return err;
+}
+
+static inline int
+_mlx5r_dmabuf_umr_update_pas(struct mlx5_ib_mr *mr, unsigned int flags,
+ size_t start_block, size_t nblocks, bool dd)
+{
+ if (dd)
+ return mlx5r_umr_update_data_direct_ksm_pas_range(mr, flags,
+ start_block,
+ nblocks);
+ else
+ return mlx5r_umr_update_mr_pas_range(mr, flags, start_block,
+ nblocks);
+}
+
+/**
+ * This function makes an mkey non-present by zapping the translation entries of
+ * the mkey by zapping (zeroing out) the first N entries, where N is determined
+ * by the largest page size supported by the device and the MR length.
+ * It then updates the mkey's page size to the largest possible value, ensuring
+ * the MR is completely non-present and safe for further updates.
+ * It is useful to update the page size of a dmabuf MR on a page fault.
+ *
+ * Return: On success, returns the number of entries that were zapped.
+ * On error, returns a negative error code.
+ */
+static int _mlx5r_umr_zap_mkey(struct mlx5_ib_mr *mr,
+ unsigned int flags,
+ unsigned int page_shift,
+ size_t *nblocks,
+ bool dd)
+{
+ unsigned int old_page_shift = mr->page_shift;
+ struct mlx5_ib_dev *dev = mr_to_mdev(mr);
+ unsigned int max_page_shift;
+ size_t page_shift_nblocks;
+ unsigned int max_log_size;
+ int access_mode;
+ int err;
+
+ access_mode = dd ? MLX5_MKC_ACCESS_MODE_KSM : MLX5_MKC_ACCESS_MODE_MTT;
+ flags |= MLX5_IB_UPD_XLT_KEEP_PGSZ | MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ATOMIC;
+ max_log_size = get_max_log_entity_size_cap(dev, access_mode);
+ max_page_shift = order_base_2(mr->ibmr.length);
+ max_page_shift = min(max(max_page_shift, page_shift), max_log_size);
+ /* Count blocks in units of max_page_shift, we will zap exactly this
+ * many to make the whole MR non-present.
+ * Block size must be aligned to MLX5_UMR_FLEX_ALIGNMENT since it may
+ * be used as offset into the XLT later on.
+ */
+ *nblocks = ib_umem_num_dma_blocks(mr->umem, 1UL << max_page_shift);
+ if (dd)
+ *nblocks = ALIGN(*nblocks, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT);
+ else
+ *nblocks = ALIGN(*nblocks, MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT);
+ page_shift_nblocks = ib_umem_num_dma_blocks(mr->umem,
+ 1UL << page_shift);
+ /* If the number of blocks at max possible page shift is greater than
+ * the number of blocks at the new page size, we should just go over the
+ * whole mkey entries.
+ */
+ if (*nblocks >= page_shift_nblocks)
+ *nblocks = 0;
+
+ /* Make the first nblocks entries non-present without changing
+ * page size yet.
+ */
+ if (*nblocks)
+ mr->page_shift = max_page_shift;
+ err = _mlx5r_dmabuf_umr_update_pas(mr, flags, 0, *nblocks, dd);
+ if (err) {
+ mr->page_shift = old_page_shift;
+ return err;
+ }
+
+ /* Change page size to the max page size now that the MR is completely
+ * non-present.
+ */
+ if (*nblocks) {
+ err = mlx5r_umr_update_mr_page_shift(mr, max_page_shift, dd);
+ if (err) {
+ mr->page_shift = old_page_shift;
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * mlx5r_umr_dmabuf_update_pgsz - Safely update DMABUF MR page size and its
+ * entries accordingly
+ * @mr: The memory region to update
+ * @xlt_flags: Translation table update flags
+ * @page_shift: The new (optimized) page shift to use
+ *
+ * This function updates the page size and mkey translation entries for a DMABUF
+ * MR in a safe, multi-step process to avoid exposing partially updated mappings
+ * The update is performed in 5 steps:
+ * 1. Make the first X entries non-present, while X is calculated to be
+ * minimal according to a large page shift that can be used to cover the
+ * MR length.
+ * 2. Update the page size to the large supported page size
+ * 3. Load the remaining N-X entries according to the (optimized) page_shift
+ * 4. Update the page size according to the (optimized) page_shift
+ * 5. Load the first X entries with the correct translations
+ *
+ * This ensures that at no point is the MR accessible with a partially updated
+ * translation table, maintaining correctness and preventing access to stale or
+ * inconsistent mappings.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int mlx5r_umr_dmabuf_update_pgsz(struct mlx5_ib_mr *mr, u32 xlt_flags,
+ unsigned int page_shift)
+{
+ unsigned int old_page_shift = mr->page_shift;
+ size_t zapped_blocks;
+ size_t total_blocks;
+ int err;
+
+ err = _mlx5r_umr_zap_mkey(mr, xlt_flags, page_shift, &zapped_blocks,
+ mr->data_direct);
+ if (err)
+ return err;
+
+ /* _mlx5r_umr_zap_mkey already enables the mkey */
+ xlt_flags &= ~MLX5_IB_UPD_XLT_ENABLE;
+ mr->page_shift = page_shift;
+ total_blocks = ib_umem_num_dma_blocks(mr->umem, 1UL << mr->page_shift);
+ if (zapped_blocks && zapped_blocks < total_blocks) {
+ /* Update PAS according to the new page size but don't update
+ * the page size in the mkey yet.
+ */
+ err = _mlx5r_dmabuf_umr_update_pas(
+ mr,
+ xlt_flags | MLX5_IB_UPD_XLT_KEEP_PGSZ,
+ zapped_blocks,
+ total_blocks - zapped_blocks,
+ mr->data_direct);
+ if (err)
+ goto err;
+ }
+
+ err = mlx5r_umr_update_mr_page_shift(mr, mr->page_shift,
+ mr->data_direct);
+ if (err)
+ goto err;
+ err = _mlx5r_dmabuf_umr_update_pas(mr, xlt_flags, 0, zapped_blocks,
+ mr->data_direct);
+ if (err)
+ goto err;
+
+ return 0;
+err:
+ mr->page_shift = old_page_shift;
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h
new file mode 100644
index 000000000000..e9361f0140e7
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/umr.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef _MLX5_IB_UMR_H
+#define _MLX5_IB_UMR_H
+
+#include "mlx5_ib.h"
+
+
+#define MLX5_MAX_UMR_SHIFT 16
+#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
+
+#define MLX5_IB_UMR_OCTOWORD 16
+#define MLX5_IB_UMR_XLT_ALIGNMENT 64
+
+int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev);
+void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev);
+
+int mlx5r_umr_init(struct mlx5_ib_dev *dev);
+void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev);
+
+static inline bool mlx5r_umr_can_load_pas(struct mlx5_ib_dev *dev,
+ size_t length)
+{
+ /*
+ * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is
+ * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka
+ * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey
+ * can never be enabled without this capability. Simplify this weird
+ * quirky hardware by just saying it can't use PAS lists with UMR at
+ * all.
+ */
+ if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
+ return false;
+
+ /*
+ * length is the size of the MR in bytes when mlx5_ib_update_xlt() is
+ * used.
+ */
+ if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
+ length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE)
+ return false;
+ return true;
+}
+
+/*
+ * true if an existing MR can be reconfigured to new access_flags using UMR.
+ * Older HW cannot use UMR to update certain elements of the MKC. See
+ * get_umr_update_access_mask() and umr_check_mkey_mask()
+ */
+static inline bool mlx5r_umr_can_reconfig(struct mlx5_ib_dev *dev,
+ unsigned int current_access_flags,
+ unsigned int target_access_flags)
+{
+ unsigned int diffs = current_access_flags ^ target_access_flags;
+
+ if ((diffs & IB_ACCESS_REMOTE_ATOMIC) &&
+ MLX5_CAP_GEN(dev->mdev, atomic) &&
+ MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
+ return false;
+
+ if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
+ return false;
+
+ if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
+ (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) ||
+ MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_pci_enabled)) &&
+ !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
+ return false;
+
+ return true;
+}
+
+static inline u64 mlx5r_umr_get_xlt_octo(u64 bytes)
+{
+ return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
+ MLX5_IB_UMR_OCTOWORD;
+}
+
+struct mlx5r_umr_context {
+ struct ib_cqe cqe;
+ enum ib_wc_status status;
+ struct completion done;
+};
+
+struct mlx5r_umr_wqe {
+ struct mlx5_wqe_umr_ctrl_seg ctrl_seg;
+ struct mlx5_mkey_seg mkey_seg;
+ struct mlx5_wqe_data_seg data_seg;
+};
+
+int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr);
+int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
+ int access_flags);
+int mlx5r_umr_update_data_direct_ksm_pas_range(struct mlx5_ib_mr *mr,
+ unsigned int flags,
+ size_t start_block,
+ size_t nblocks);
+int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int flags);
+int mlx5r_umr_update_mr_pas_range(struct mlx5_ib_mr *mr, unsigned int flags,
+ size_t start_block, size_t nblocks);
+int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
+int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
+ int page_shift, int flags);
+int mlx5r_umr_update_mr_page_shift(struct mlx5_ib_mr *mr,
+ unsigned int page_shift,
+ bool dd);
+int mlx5r_umr_dmabuf_update_pgsz(struct mlx5_ib_mr *mr, u32 xlt_flags,
+ unsigned int page_shift);
+
+#endif /* _MLX5_IB_UMR_H */
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
new file mode 100644
index 000000000000..9947feb7fb8a
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -0,0 +1,1284 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <linux/gfp.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/driver.h>
+#include "wr.h"
+#include "umr.h"
+
+static const u32 mlx5_ib_opcode[] = {
+ [IB_WR_SEND] = MLX5_OPCODE_SEND,
+ [IB_WR_LSO] = MLX5_OPCODE_LSO,
+ [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM,
+ [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM,
+ [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA,
+ [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL,
+ [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR,
+ [IB_WR_REG_MR] = MLX5_OPCODE_UMR,
+ [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS,
+ [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA,
+ [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
+};
+
+int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
+{
+ struct mlx5_ib_cq *cq;
+ unsigned int cur;
+
+ cur = wq->head - wq->tail;
+ if (likely(cur + nreq < wq->max_post))
+ return 0;
+
+ cq = to_mcq(ib_cq);
+ spin_lock(&cq->lock);
+ cur = wq->head - wq->tail;
+ spin_unlock(&cq->lock);
+
+ return cur + nreq >= wq->max_post;
+}
+
+static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
+ u64 remote_addr, u32 rkey)
+{
+ rseg->raddr = cpu_to_be64(remote_addr);
+ rseg->rkey = cpu_to_be32(rkey);
+ rseg->reserved = 0;
+}
+
+static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+ void **seg, int *size, void **cur_edge)
+{
+ struct mlx5_wqe_eth_seg *eseg = *seg;
+
+ memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
+
+ if (wr->send_flags & IB_SEND_IP_CSUM)
+ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
+ MLX5_ETH_WQE_L4_CSUM;
+
+ if (wr->opcode == IB_WR_LSO) {
+ struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
+ size_t left, copysz;
+ void *pdata = ud_wr->header;
+ size_t stride;
+
+ left = ud_wr->hlen;
+ eseg->mss = cpu_to_be16(ud_wr->mss);
+ eseg->inline_hdr.sz = cpu_to_be16(left);
+
+ /* mlx5r_memcpy_send_wqe should get a 16B align address. Hence,
+ * we first copy up to the current edge and then, if needed,
+ * continue to mlx5r_memcpy_send_wqe.
+ */
+ copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
+ left);
+ memcpy(eseg->inline_hdr.data, pdata, copysz);
+ stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
+ sizeof(eseg->inline_hdr.start) + copysz, 16);
+ *size += stride / 16;
+ *seg += stride;
+
+ if (copysz < left) {
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+ left -= copysz;
+ pdata += copysz;
+ mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size,
+ pdata, left);
+ }
+
+ return;
+ }
+
+ *seg += sizeof(struct mlx5_wqe_eth_seg);
+ *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
+}
+
+static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
+ const struct ib_send_wr *wr)
+{
+ memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
+ dseg->av.dqp_dct =
+ cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
+ dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
+}
+
+static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
+{
+ dseg->byte_count = cpu_to_be32(sg->length);
+ dseg->lkey = cpu_to_be32(sg->lkey);
+ dseg->addr = cpu_to_be64(sg->addr);
+}
+
+static __be64 frwr_mkey_mask(bool atomic)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LEN |
+ MLX5_MKEY_MASK_PAGE_SIZE |
+ MLX5_MKEY_MASK_START_ADDR |
+ MLX5_MKEY_MASK_EN_RINVAL |
+ MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_RR |
+ MLX5_MKEY_MASK_RW |
+ MLX5_MKEY_MASK_SMALL_FENCE |
+ MLX5_MKEY_MASK_FREE;
+
+ if (atomic)
+ result |= MLX5_MKEY_MASK_A;
+
+ return cpu_to_be64(result);
+}
+
+static __be64 sig_mkey_mask(void)
+{
+ u64 result;
+
+ result = MLX5_MKEY_MASK_LEN |
+ MLX5_MKEY_MASK_PAGE_SIZE |
+ MLX5_MKEY_MASK_START_ADDR |
+ MLX5_MKEY_MASK_EN_SIGERR |
+ MLX5_MKEY_MASK_EN_RINVAL |
+ MLX5_MKEY_MASK_KEY |
+ MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
+ MLX5_MKEY_MASK_RR |
+ MLX5_MKEY_MASK_RW |
+ MLX5_MKEY_MASK_SMALL_FENCE |
+ MLX5_MKEY_MASK_FREE |
+ MLX5_MKEY_MASK_BSF_EN;
+
+ return cpu_to_be64(result);
+}
+
+static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
+ struct mlx5_ib_mr *mr, u8 flags, bool atomic)
+{
+ int size = (mr->mmkey.ndescs + mr->meta_ndescs) * mr->desc_size;
+
+ memset(umr, 0, sizeof(*umr));
+
+ umr->flags = flags;
+ umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
+ umr->mkey_mask = frwr_mkey_mask(atomic);
+}
+
+static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
+{
+ memset(umr, 0, sizeof(*umr));
+ umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+ umr->flags = MLX5_UMR_INLINE;
+}
+
+static u8 get_umr_flags(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
+ (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
+ (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
+ MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
+}
+
+static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
+ struct mlx5_ib_mr *mr,
+ u32 key, int access)
+{
+ int ndescs = ALIGN(mr->mmkey.ndescs + mr->meta_ndescs, 8) >> 1;
+
+ memset(seg, 0, sizeof(*seg));
+
+ if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT)
+ seg->log2_page_size = ilog2(mr->ibmr.page_size);
+ else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
+ /* KLMs take twice the size of MTTs */
+ ndescs *= 2;
+
+ seg->flags = get_umr_flags(access) | mr->access_mode;
+ seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
+ seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
+ seg->start_addr = cpu_to_be64(mr->ibmr.iova);
+ seg->len = cpu_to_be64(mr->ibmr.length);
+ seg->xlt_oct_size = cpu_to_be32(ndescs);
+}
+
+static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
+{
+ memset(seg, 0, sizeof(*seg));
+ seg->status = MLX5_MKEY_STATUS_FREE;
+}
+
+static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
+ struct mlx5_ib_mr *mr,
+ struct mlx5_ib_pd *pd)
+{
+ int bcount = mr->desc_size * (mr->mmkey.ndescs + mr->meta_ndescs);
+
+ dseg->addr = cpu_to_be64(mr->desc_map);
+ dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
+ dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
+}
+
+static __be32 send_ieth(const struct ib_send_wr *wr)
+{
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ return wr->ex.imm_data;
+
+ case IB_WR_SEND_WITH_INV:
+ return cpu_to_be32(wr->ex.invalidate_rkey);
+
+ default:
+ return 0;
+ }
+}
+
+static u8 calc_sig(void *wqe, int size)
+{
+ u8 *p = wqe;
+ u8 res = 0;
+ int i;
+
+ for (i = 0; i < size; i++)
+ res ^= p[i];
+
+ return ~res;
+}
+
+static u8 wq_sig(void *wqe)
+{
+ return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
+}
+
+static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
+ void **wqe, int *wqe_sz, void **cur_edge)
+{
+ struct mlx5_wqe_inline_seg *seg;
+ size_t offset;
+ int inl = 0;
+ int i;
+
+ seg = *wqe;
+ *wqe += sizeof(*seg);
+ offset = sizeof(*seg);
+
+ for (i = 0; i < wr->num_sge; i++) {
+ size_t len = wr->sg_list[i].length;
+ void *addr = (void *)(unsigned long)(wr->sg_list[i].addr);
+
+ inl += len;
+
+ if (unlikely(inl > qp->max_inline_data))
+ return -ENOMEM;
+
+ while (likely(len)) {
+ size_t leftlen;
+ size_t copysz;
+
+ handle_post_send_edge(&qp->sq, wqe,
+ *wqe_sz + (offset >> 4),
+ cur_edge);
+
+ leftlen = *cur_edge - *wqe;
+ copysz = min_t(size_t, leftlen, len);
+
+ memcpy(*wqe, addr, copysz);
+ len -= copysz;
+ addr += copysz;
+ *wqe += copysz;
+ offset += copysz;
+ }
+ }
+
+ seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
+
+ *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
+
+ return 0;
+}
+
+static u16 prot_field_size(enum ib_signature_type type)
+{
+ switch (type) {
+ case IB_SIG_TYPE_T10_DIF:
+ return MLX5_DIF_SIZE;
+ default:
+ return 0;
+ }
+}
+
+static u8 bs_selector(int block_size)
+{
+ switch (block_size) {
+ case 512: return 0x1;
+ case 520: return 0x2;
+ case 4096: return 0x3;
+ case 4160: return 0x4;
+ case 1073741824: return 0x5;
+ default: return 0;
+ }
+}
+
+static void mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
+ struct mlx5_bsf_inl *inl)
+{
+ /* Valid inline section and allow BSF refresh */
+ inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
+ MLX5_BSF_REFRESH_DIF);
+ inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
+ inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
+ /* repeating block */
+ inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
+ inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
+ MLX5_DIF_CRC : MLX5_DIF_IPCS;
+
+ if (domain->sig.dif.ref_remap)
+ inl->dif_inc_ref_guard_check |= MLX5_BSF_INC_REFTAG;
+
+ if (domain->sig.dif.app_escape) {
+ if (domain->sig.dif.ref_escape)
+ inl->dif_inc_ref_guard_check |= MLX5_BSF_APPREF_ESCAPE;
+ else
+ inl->dif_inc_ref_guard_check |= MLX5_BSF_APPTAG_ESCAPE;
+ }
+
+ inl->dif_app_bitmask_check =
+ cpu_to_be16(domain->sig.dif.apptag_check_mask);
+}
+
+static int mlx5_set_bsf(struct ib_mr *sig_mr,
+ struct ib_sig_attrs *sig_attrs,
+ struct mlx5_bsf *bsf, u32 data_size)
+{
+ struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig;
+ struct mlx5_bsf_basic *basic = &bsf->basic;
+ struct ib_sig_domain *mem = &sig_attrs->mem;
+ struct ib_sig_domain *wire = &sig_attrs->wire;
+
+ memset(bsf, 0, sizeof(*bsf));
+
+ /* Basic + Extended + Inline */
+ basic->bsf_size_sbs = 1 << 7;
+ /* Input domain check byte mask */
+ basic->check_byte_mask = sig_attrs->check_mask;
+ basic->raw_data_size = cpu_to_be32(data_size);
+
+ /* Memory domain */
+ switch (sig_attrs->mem.sig_type) {
+ case IB_SIG_TYPE_NONE:
+ break;
+ case IB_SIG_TYPE_T10_DIF:
+ basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
+ basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
+ mlx5_fill_inl_bsf(mem, &bsf->m_inl);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* Wire domain */
+ switch (sig_attrs->wire.sig_type) {
+ case IB_SIG_TYPE_NONE:
+ break;
+ case IB_SIG_TYPE_T10_DIF:
+ if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
+ mem->sig_type == wire->sig_type) {
+ /* Same block structure */
+ basic->bsf_size_sbs |= 1 << 4;
+ if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
+ basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
+ if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
+ basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
+ if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
+ basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
+ } else
+ basic->wire.bs_selector =
+ bs_selector(wire->sig.dif.pi_interval);
+
+ basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
+ mlx5_fill_inl_bsf(wire, &bsf->w_inl);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+
+static int set_sig_data_segment(const struct ib_send_wr *send_wr,
+ struct ib_mr *sig_mr,
+ struct ib_sig_attrs *sig_attrs,
+ struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
+{
+ struct mlx5_bsf *bsf;
+ u32 data_len;
+ u32 data_key;
+ u64 data_va;
+ u32 prot_len = 0;
+ u32 prot_key = 0;
+ u64 prot_va = 0;
+ bool prot = false;
+ int ret;
+ int wqe_size;
+ struct mlx5_ib_mr *mr = to_mmr(sig_mr);
+ struct mlx5_ib_mr *pi_mr = mr->pi_mr;
+
+ data_len = pi_mr->data_length;
+ data_key = pi_mr->ibmr.lkey;
+ data_va = pi_mr->data_iova;
+ if (pi_mr->meta_ndescs) {
+ prot_len = pi_mr->meta_length;
+ prot_key = pi_mr->ibmr.lkey;
+ prot_va = pi_mr->pi_iova;
+ prot = true;
+ }
+
+ if (!prot || (data_key == prot_key && data_va == prot_va &&
+ data_len == prot_len)) {
+ /**
+ * Source domain doesn't contain signature information
+ * or data and protection are interleaved in memory.
+ * So need construct:
+ * ------------------
+ * | data_klm |
+ * ------------------
+ * | BSF |
+ * ------------------
+ **/
+ struct mlx5_klm *data_klm = *seg;
+
+ data_klm->bcount = cpu_to_be32(data_len);
+ data_klm->key = cpu_to_be32(data_key);
+ data_klm->va = cpu_to_be64(data_va);
+ wqe_size = ALIGN(sizeof(*data_klm), 64);
+ } else {
+ /**
+ * Source domain contains signature information
+ * So need construct a strided block format:
+ * ---------------------------
+ * | stride_block_ctrl |
+ * ---------------------------
+ * | data_klm |
+ * ---------------------------
+ * | prot_klm |
+ * ---------------------------
+ * | BSF |
+ * ---------------------------
+ **/
+ struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
+ struct mlx5_stride_block_entry *data_sentry;
+ struct mlx5_stride_block_entry *prot_sentry;
+ u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
+ int prot_size;
+
+ sblock_ctrl = *seg;
+ data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl);
+ prot_sentry = (void *)data_sentry + sizeof(*data_sentry);
+
+ prot_size = prot_field_size(sig_attrs->mem.sig_type);
+ if (!prot_size) {
+ pr_err("Bad block size given: %u\n", block_size);
+ return -EINVAL;
+ }
+ sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size +
+ prot_size);
+ sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP);
+ sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size);
+ sblock_ctrl->num_entries = cpu_to_be16(2);
+
+ data_sentry->bcount = cpu_to_be16(block_size);
+ data_sentry->key = cpu_to_be32(data_key);
+ data_sentry->va = cpu_to_be64(data_va);
+ data_sentry->stride = cpu_to_be16(block_size);
+
+ prot_sentry->bcount = cpu_to_be16(prot_size);
+ prot_sentry->key = cpu_to_be32(prot_key);
+ prot_sentry->va = cpu_to_be64(prot_va);
+ prot_sentry->stride = cpu_to_be16(prot_size);
+
+ wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) +
+ sizeof(*prot_sentry), 64);
+ }
+
+ *seg += wqe_size;
+ *size += wqe_size / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+
+ bsf = *seg;
+ ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
+ if (ret)
+ return -EINVAL;
+
+ *seg += sizeof(*bsf);
+ *size += sizeof(*bsf) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+
+ return 0;
+}
+
+static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
+ struct ib_mr *sig_mr, int access_flags,
+ u32 size, u32 length, u32 pdn)
+{
+ u32 sig_key = sig_mr->rkey;
+ u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
+
+ memset(seg, 0, sizeof(*seg));
+
+ seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS;
+ seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
+ seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
+ MLX5_MKEY_BSF_EN | pdn);
+ seg->len = cpu_to_be64(length);
+ seg->xlt_oct_size = cpu_to_be32(mlx5r_umr_get_xlt_octo(size));
+ seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
+}
+
+static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
+ u32 size)
+{
+ memset(umr, 0, sizeof(*umr));
+
+ umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
+ umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
+ umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
+ umr->mkey_mask = sig_mkey_mask();
+}
+
+static int set_pi_umr_wr(const struct ib_send_wr *send_wr,
+ struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
+{
+ const struct ib_reg_wr *wr = reg_wr(send_wr);
+ struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr);
+ struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr;
+ struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs;
+ u32 pdn = to_mpd(qp->ibqp.pd)->pdn;
+ u32 xlt_size;
+ int region_len, ret;
+
+ if (unlikely(send_wr->num_sge != 0) ||
+ unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) ||
+ unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) ||
+ unlikely(!sig_mr->sig->sig_status_checked))
+ return -EINVAL;
+
+ /* length of the protected region, data + protection */
+ region_len = pi_mr->ibmr.length;
+
+ /**
+ * KLM octoword size - if protection was provided
+ * then we use strided block format (3 octowords),
+ * else we use single KLM (1 octoword)
+ **/
+ if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE)
+ xlt_size = 0x30;
+ else
+ xlt_size = sizeof(struct mlx5_klm);
+
+ set_sig_umr_segment(*seg, xlt_size);
+ *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+ *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+
+ set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len,
+ pdn);
+ *seg += sizeof(struct mlx5_mkey_seg);
+ *size += sizeof(struct mlx5_mkey_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+
+ ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size,
+ cur_edge);
+ if (ret)
+ return ret;
+
+ sig_mr->sig->sig_status_checked = false;
+ return 0;
+}
+
+static int set_psv_wr(struct ib_sig_domain *domain,
+ u32 psv_idx, void **seg, int *size)
+{
+ struct mlx5_seg_set_psv *psv_seg = *seg;
+
+ memset(psv_seg, 0, sizeof(*psv_seg));
+ psv_seg->psv_num = cpu_to_be32(psv_idx);
+ switch (domain->sig_type) {
+ case IB_SIG_TYPE_NONE:
+ break;
+ case IB_SIG_TYPE_T10_DIF:
+ psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 |
+ domain->sig.dif.app_tag);
+ psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
+ break;
+ default:
+ pr_err("Bad signature type (%d) is given.\n",
+ domain->sig_type);
+ return -EINVAL;
+ }
+
+ *seg += sizeof(*psv_seg);
+ *size += sizeof(*psv_seg) / 16;
+
+ return 0;
+}
+
+static int set_reg_wr(struct mlx5_ib_qp *qp,
+ const struct ib_reg_wr *wr,
+ void **seg, int *size, void **cur_edge,
+ bool check_not_free)
+{
+ struct mlx5_ib_mr *mr = to_mmr(wr->mr);
+ struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
+ struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device);
+ int mr_list_size = (mr->mmkey.ndescs + mr->meta_ndescs) * mr->desc_size;
+ bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
+ bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC;
+ u8 flags = 0;
+
+ /* Matches access in mlx5_set_umr_free_mkey().
+ * Relaxed Ordering is set implicitly in mlx5_set_umr_free_mkey() and
+ * kernel ULPs are not aware of it, so we don't set it here.
+ */
+ if (!mlx5r_umr_can_reconfig(dev, 0, wr->access)) {
+ mlx5_ib_warn(
+ to_mdev(qp->ibqp.device),
+ "Fast update for MR access flags is not possible\n");
+ return -EINVAL;
+ }
+
+ if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
+ mlx5_ib_warn(to_mdev(qp->ibqp.device),
+ "Invalid IB_SEND_INLINE send flag\n");
+ return -EINVAL;
+ }
+
+ if (check_not_free)
+ flags |= MLX5_UMR_CHECK_NOT_FREE;
+ if (umr_inline)
+ flags |= MLX5_UMR_INLINE;
+
+ set_reg_umr_seg(*seg, mr, flags, atomic);
+ *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+ *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+
+ set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
+ *seg += sizeof(struct mlx5_mkey_seg);
+ *size += sizeof(struct mlx5_mkey_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+
+ if (umr_inline) {
+ mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
+ mr_list_size);
+ *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
+ } else {
+ set_reg_data_seg(*seg, mr, pd);
+ *seg += sizeof(struct mlx5_wqe_data_seg);
+ *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
+ }
+ return 0;
+}
+
+static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
+{
+ set_linv_umr_seg(*seg);
+ *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+ *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+ set_linv_mkey_seg(*seg);
+ *seg += sizeof(struct mlx5_mkey_seg);
+ *size += sizeof(struct mlx5_mkey_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+}
+
+static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
+{
+ __be32 *p = NULL;
+ int i, j;
+
+ pr_debug("dump WQE index %u:\n", idx);
+ for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
+ if ((i & 0xf) == 0) {
+ p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
+ pr_debug("WQBB at %p:\n", (void *)p);
+ j = 0;
+ idx = (idx + 1) & (qp->sq.wqe_cnt - 1);
+ }
+ pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
+ be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
+ be32_to_cpu(p[j + 3]));
+ }
+}
+
+int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+ struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx,
+ int *size, void **cur_edge, int nreq, __be32 general_id,
+ bool send_signaled, bool solicited)
+{
+ if (unlikely(mlx5r_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
+ return -ENOMEM;
+
+ *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
+ *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
+ *ctrl = *seg;
+ *(uint32_t *)(*seg + 8) = 0;
+ (*ctrl)->general_id = general_id;
+ (*ctrl)->fm_ce_se = qp->sq_signal_bits |
+ (send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
+ (solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
+
+ *seg += sizeof(**ctrl);
+ *size = sizeof(**ctrl) / 16;
+ *cur_edge = qp->sq.cur_edge;
+
+ return 0;
+}
+
+static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+ struct mlx5_wqe_ctrl_seg **ctrl,
+ const struct ib_send_wr *wr, unsigned int *idx, int *size,
+ void **cur_edge, int nreq)
+{
+ return mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
+ send_ieth(wr), wr->send_flags & IB_SEND_SIGNALED,
+ wr->send_flags & IB_SEND_SOLICITED);
+}
+
+void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl,
+ void *seg, u8 size, void *cur_edge, unsigned int idx,
+ u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode)
+{
+ u8 opmod = 0;
+
+ ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
+ mlx5_opcode | ((u32)opmod << 24));
+ ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
+ ctrl->fm_ce_se |= fence;
+ if (unlikely(qp->flags_en & MLX5_QP_FLAG_SIGNATURE))
+ ctrl->signature = wq_sig(ctrl);
+
+ qp->sq.wrid[idx] = wr_id;
+ qp->sq.w_list[idx].opcode = mlx5_opcode;
+ qp->sq.wqe_head[idx] = qp->sq.head + nreq;
+ qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
+ qp->sq.w_list[idx].next = qp->sq.cur_post;
+
+ /* We save the edge which was possibly updated during the WQE
+ * construction, into SQ's cache.
+ */
+ seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB);
+ qp->sq.cur_edge = (unlikely(seg == cur_edge)) ?
+ get_sq_edge(&qp->sq, qp->sq.cur_post &
+ (qp->sq.wqe_cnt - 1)) :
+ cur_edge;
+}
+
+static void handle_rdma_op(const struct ib_send_wr *wr, void **seg, int *size)
+{
+ set_raddr_seg(*seg, rdma_wr(wr)->remote_addr, rdma_wr(wr)->rkey);
+ *seg += sizeof(struct mlx5_wqe_raddr_seg);
+ *size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
+}
+
+static void handle_local_inv(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
+ struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
+ int *size, void **cur_edge, unsigned int idx)
+{
+ qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
+ (*ctrl)->imm = cpu_to_be32(wr->ex.invalidate_rkey);
+ set_linv_wr(qp, seg, size, cur_edge);
+}
+
+static int handle_reg_mr(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
+ struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
+ void **cur_edge, unsigned int idx)
+{
+ qp->sq.wr_data[idx] = IB_WR_REG_MR;
+ (*ctrl)->imm = cpu_to_be32(reg_wr(wr)->key);
+ return set_reg_wr(qp, reg_wr(wr), seg, size, cur_edge, true);
+}
+
+static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ const struct ib_send_wr *wr,
+ struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
+ void **cur_edge, unsigned int *idx, int nreq,
+ struct ib_sig_domain *domain, u32 psv_index,
+ u8 next_fence)
+{
+ int err;
+
+ /*
+ * SET_PSV WQEs are not signaled and solicited on error.
+ */
+ err = mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
+ send_ieth(wr), false, true);
+ if (unlikely(err)) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ goto out;
+ }
+ err = set_psv_wr(domain, psv_index, seg, size);
+ if (unlikely(err)) {
+ mlx5_ib_warn(dev, "\n");
+ goto out;
+ }
+ mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
+ nreq, next_fence, MLX5_OPCODE_SET_PSV);
+
+out:
+ return err;
+}
+
+static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
+ const struct ib_send_wr *wr,
+ struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
+ int *size, void **cur_edge,
+ unsigned int *idx, int nreq, u8 fence,
+ u8 next_fence)
+{
+ struct mlx5_ib_mr *mr;
+ struct mlx5_ib_mr *pi_mr;
+ struct mlx5_ib_mr pa_pi_mr;
+ struct ib_sig_attrs *sig_attrs;
+ struct ib_reg_wr reg_pi_wr;
+ int err;
+
+ qp->sq.wr_data[*idx] = IB_WR_REG_MR_INTEGRITY;
+
+ mr = to_mmr(reg_wr(wr)->mr);
+ pi_mr = mr->pi_mr;
+
+ if (pi_mr) {
+ memset(&reg_pi_wr, 0,
+ sizeof(struct ib_reg_wr));
+
+ reg_pi_wr.mr = &pi_mr->ibmr;
+ reg_pi_wr.access = reg_wr(wr)->access;
+ reg_pi_wr.key = pi_mr->ibmr.rkey;
+
+ (*ctrl)->imm = cpu_to_be32(reg_pi_wr.key);
+ /* UMR for data + prot registration */
+ err = set_reg_wr(qp, &reg_pi_wr, seg, size, cur_edge, false);
+ if (unlikely(err))
+ goto out;
+
+ mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx,
+ wr->wr_id, nreq, fence, MLX5_OPCODE_UMR);
+
+ err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq);
+ if (unlikely(err)) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ goto out;
+ }
+ } else {
+ memset(&pa_pi_mr, 0, sizeof(struct mlx5_ib_mr));
+ /* No UMR, use local_dma_lkey */
+ pa_pi_mr.ibmr.lkey = mr->ibmr.pd->local_dma_lkey;
+ pa_pi_mr.mmkey.ndescs = mr->mmkey.ndescs;
+ pa_pi_mr.data_length = mr->data_length;
+ pa_pi_mr.data_iova = mr->data_iova;
+ if (mr->meta_ndescs) {
+ pa_pi_mr.meta_ndescs = mr->meta_ndescs;
+ pa_pi_mr.meta_length = mr->meta_length;
+ pa_pi_mr.pi_iova = mr->pi_iova;
+ }
+
+ pa_pi_mr.ibmr.length = mr->ibmr.length;
+ mr->pi_mr = &pa_pi_mr;
+ }
+ (*ctrl)->imm = cpu_to_be32(mr->ibmr.rkey);
+ /* UMR for sig MR */
+ err = set_pi_umr_wr(wr, qp, seg, size, cur_edge);
+ if (unlikely(err)) {
+ mlx5_ib_warn(dev, "\n");
+ goto out;
+ }
+ mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
+ nreq, fence, MLX5_OPCODE_UMR);
+
+ sig_attrs = mr->ibmr.sig_attrs;
+ err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
+ &sig_attrs->mem, mr->sig->psv_memory.psv_idx,
+ next_fence);
+ if (unlikely(err))
+ goto out;
+
+ err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
+ &sig_attrs->wire, mr->sig->psv_wire.psv_idx,
+ next_fence);
+ if (unlikely(err))
+ goto out;
+
+ qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+
+out:
+ return err;
+}
+
+static int handle_qpt_rc(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ const struct ib_send_wr *wr,
+ struct mlx5_wqe_ctrl_seg **ctrl, void **seg, int *size,
+ void **cur_edge, unsigned int *idx, int nreq, u8 fence,
+ u8 next_fence, int *num_sge)
+{
+ int err = 0;
+
+ switch (wr->opcode) {
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ handle_rdma_op(wr, seg, size);
+ break;
+
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
+ mlx5_ib_warn(dev, "Atomic operations are not supported yet\n");
+ err = -EOPNOTSUPP;
+ goto out;
+
+ case IB_WR_LOCAL_INV:
+ handle_local_inv(qp, wr, ctrl, seg, size, cur_edge, *idx);
+ *num_sge = 0;
+ break;
+
+ case IB_WR_REG_MR:
+ err = handle_reg_mr(qp, wr, ctrl, seg, size, cur_edge, *idx);
+ if (unlikely(err))
+ goto out;
+ *num_sge = 0;
+ break;
+
+ case IB_WR_REG_MR_INTEGRITY:
+ err = handle_reg_mr_integrity(dev, qp, wr, ctrl, seg, size,
+ cur_edge, idx, nreq, fence,
+ next_fence);
+ if (unlikely(err))
+ goto out;
+ *num_sge = 0;
+ break;
+
+ default:
+ break;
+ }
+
+out:
+ return err;
+}
+
+static void handle_qpt_uc(const struct ib_send_wr *wr, void **seg, int *size)
+{
+ switch (wr->opcode) {
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ handle_rdma_op(wr, seg, size);
+ break;
+ default:
+ break;
+ }
+}
+
+static void handle_qpt_hw_gsi(struct mlx5_ib_qp *qp,
+ const struct ib_send_wr *wr, void **seg,
+ int *size, void **cur_edge)
+{
+ set_datagram_seg(*seg, wr);
+ *seg += sizeof(struct mlx5_wqe_datagram_seg);
+ *size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+}
+
+static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
+ void **seg, int *size, void **cur_edge)
+{
+ set_datagram_seg(*seg, wr);
+ *seg += sizeof(struct mlx5_wqe_datagram_seg);
+ *size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+
+ /* handle qp that supports ud offload */
+ if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
+ struct mlx5_wqe_eth_pad *pad;
+
+ pad = *seg;
+ memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
+ *seg += sizeof(struct mlx5_wqe_eth_pad);
+ *size += sizeof(struct mlx5_wqe_eth_pad) / 16;
+ set_eth_seg(wr, qp, seg, size, cur_edge);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
+ }
+}
+
+void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq,
+ struct mlx5_wqe_ctrl_seg *ctrl)
+{
+ struct mlx5_bf *bf = &qp->bf;
+
+ qp->sq.head += nreq;
+
+ /* Make sure that descriptors are written before
+ * updating doorbell record and ringing the doorbell
+ */
+ wmb();
+
+ qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
+
+ /* Make sure doorbell record is visible to the HCA before
+ * we hit doorbell.
+ */
+ wmb();
+
+ mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
+ /* Make sure doorbells don't leak out of SQ spinlock
+ * and reach the HCA out of order.
+ */
+ bf->offset ^= bf->buf_size;
+}
+
+int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr, bool drain)
+{
+ struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_wqe_xrc_seg *xrc;
+ void *cur_edge;
+ int size;
+ unsigned long flags;
+ unsigned int idx;
+ int err = 0;
+ int num_sge;
+ void *seg;
+ int nreq;
+ int i;
+ u8 next_fence = 0;
+ u8 fence;
+
+ if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
+ !drain)) {
+ *bad_wr = wr;
+ return -EIO;
+ }
+
+ if (qp->type == IB_QPT_GSI)
+ return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ for (nreq = 0; wr; nreq++, wr = wr->next) {
+ if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
+ mlx5_ib_warn(dev, "\n");
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ num_sge = wr->num_sge;
+ if (unlikely(num_sge > qp->sq.max_gs)) {
+ mlx5_ib_warn(dev, "\n");
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge,
+ nreq);
+ if (err) {
+ mlx5_ib_warn(dev, "\n");
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (wr->opcode == IB_WR_REG_MR ||
+ wr->opcode == IB_WR_REG_MR_INTEGRITY) {
+ fence = dev->umr_fence;
+ next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
+ } else {
+ if (wr->send_flags & IB_SEND_FENCE) {
+ if (qp->next_fence)
+ fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
+ else
+ fence = MLX5_FENCE_MODE_FENCE;
+ } else {
+ fence = qp->next_fence;
+ }
+ }
+
+ switch (qp->type) {
+ case IB_QPT_XRC_INI:
+ xrc = seg;
+ seg += sizeof(*xrc);
+ size += sizeof(*xrc) / 16;
+ fallthrough;
+ case IB_QPT_RC:
+ err = handle_qpt_rc(dev, qp, wr, &ctrl, &seg, &size,
+ &cur_edge, &idx, nreq, fence,
+ next_fence, &num_sge);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ goto out;
+ } else if (wr->opcode == IB_WR_REG_MR_INTEGRITY) {
+ goto skip_psv;
+ }
+ break;
+
+ case IB_QPT_UC:
+ handle_qpt_uc(wr, &seg, &size);
+ break;
+ case IB_QPT_SMI:
+ if (unlikely(!dev->port_caps[qp->port - 1].has_smi)) {
+ mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
+ err = -EPERM;
+ *bad_wr = wr;
+ goto out;
+ }
+ fallthrough;
+ case MLX5_IB_QPT_HW_GSI:
+ handle_qpt_hw_gsi(qp, wr, &seg, &size, &cur_edge);
+ break;
+ case IB_QPT_UD:
+ handle_qpt_ud(qp, wr, &seg, &size, &cur_edge);
+ break;
+
+ default:
+ break;
+ }
+
+ if (wr->send_flags & IB_SEND_INLINE && num_sge) {
+ err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge);
+ if (unlikely(err)) {
+ mlx5_ib_warn(dev, "\n");
+ *bad_wr = wr;
+ goto out;
+ }
+ } else {
+ for (i = 0; i < num_sge; i++) {
+ handle_post_send_edge(&qp->sq, &seg, size,
+ &cur_edge);
+ if (unlikely(!wr->sg_list[i].length))
+ continue;
+
+ set_data_ptr_seg(
+ (struct mlx5_wqe_data_seg *)seg,
+ wr->sg_list + i);
+ size += sizeof(struct mlx5_wqe_data_seg) / 16;
+ seg += sizeof(struct mlx5_wqe_data_seg);
+ }
+ }
+
+ qp->next_fence = next_fence;
+ mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id,
+ nreq, fence, mlx5_ib_opcode[wr->opcode]);
+skip_psv:
+ if (0)
+ dump_wqe(qp, idx, size);
+ }
+
+out:
+ if (likely(nreq))
+ mlx5r_ring_db(qp, nreq, ctrl);
+
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+ return err;
+}
+
+static void set_sig_seg(struct mlx5_rwqe_sig *sig, int max_gs)
+{
+ sig->signature = calc_sig(sig, (max_gs + 1) << 2);
+}
+
+int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr, bool drain)
+{
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_wqe_data_seg *scat;
+ struct mlx5_rwqe_sig *sig;
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ unsigned long flags;
+ int err = 0;
+ int nreq;
+ int ind;
+ int i;
+
+ if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR &&
+ !drain)) {
+ *bad_wr = wr;
+ return -EIO;
+ }
+
+ if (qp->type == IB_QPT_GSI)
+ return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
+
+ spin_lock_irqsave(&qp->rq.lock, flags);
+
+ ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
+
+ for (nreq = 0; wr; nreq++, wr = wr->next) {
+ if (mlx5r_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+ err = -ENOMEM;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > qp->rq.max_gs)) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind);
+ if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE)
+ scat++;
+
+ for (i = 0; i < wr->num_sge; i++)
+ set_data_ptr_seg(scat + i, wr->sg_list + i);
+
+ if (i < qp->rq.max_gs) {
+ scat[i].byte_count = 0;
+ scat[i].lkey = dev->mkeys.terminate_scatter_list_mkey;
+ scat[i].addr = 0;
+ }
+
+ if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) {
+ sig = (struct mlx5_rwqe_sig *)scat;
+ set_sig_seg(sig, qp->rq.max_gs);
+ }
+
+ qp->rq.wrid[ind] = wr->wr_id;
+
+ ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
+ }
+
+out:
+ if (likely(nreq)) {
+ qp->rq.head += nreq;
+
+ /* Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
+ }
+
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/wr.h b/drivers/infiniband/hw/mlx5/wr.h
new file mode 100644
index 000000000000..2dc89438000d
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/wr.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_WR_H
+#define _MLX5_IB_WR_H
+
+#include "mlx5_ib.h"
+
+enum {
+ MLX5_IB_SQ_UMR_INLINE_THRESHOLD = 64,
+};
+
+struct mlx5_wqe_eth_pad {
+ u8 rsvd0[16];
+};
+
+
+/* get_sq_edge - Get the next nearby edge.
+ *
+ * An 'edge' is defined as the first following address after the end
+ * of the fragment or the SQ. Accordingly, during the WQE construction
+ * which repetitively increases the pointer to write the next data, it
+ * simply should check if it gets to an edge.
+ *
+ * @sq - SQ buffer.
+ * @idx - Stride index in the SQ buffer.
+ *
+ * Return:
+ * The new edge.
+ */
+static inline void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx)
+{
+ void *fragment_end;
+
+ fragment_end = mlx5_frag_buf_get_wqe
+ (&sq->fbc,
+ mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx));
+
+ return fragment_end + MLX5_SEND_WQE_BB;
+}
+
+/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
+ * next nearby edge and get new address translation for current WQE position.
+ * @sq: SQ buffer.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @cur_edge: Updated current edge.
+ */
+static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
+ u32 wqe_sz, void **cur_edge)
+{
+ u32 idx;
+
+ if (likely(*seg != *cur_edge))
+ return;
+
+ idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
+ *cur_edge = get_sq_edge(sq, idx);
+
+ *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
+}
+
+/* mlx5r_memcpy_send_wqe - copy data from src to WQE and update the relevant
+ * WQ's pointers. At the end @seg is aligned to 16B regardless the copied size.
+ * @sq: SQ buffer.
+ * @cur_edge: Updated current edge.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @src: Pointer to copy from.
+ * @n: Number of bytes to copy.
+ */
+static inline void mlx5r_memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
+ void **seg, u32 *wqe_sz,
+ const void *src, size_t n)
+{
+ while (likely(n)) {
+ size_t leftlen = *cur_edge - *seg;
+ size_t copysz = min_t(size_t, leftlen, n);
+ size_t stride;
+
+ memcpy(*seg, src, copysz);
+
+ n -= copysz;
+ src += copysz;
+ stride = !n ? ALIGN(copysz, 16) : copysz;
+ *seg += stride;
+ *wqe_sz += stride >> 4;
+ handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
+ }
+}
+
+int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq);
+int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+ struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx,
+ int *size, void **cur_edge, int nreq, __be32 general_id,
+ bool send_signaled, bool solicited);
+void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl,
+ void *seg, u8 size, void *cur_edge, unsigned int idx,
+ u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode);
+void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq,
+ struct mlx5_wqe_ctrl_seg *ctrl);
+int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr, bool drain);
+int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr, bool drain);
+
+static inline int mlx5_ib_post_send_nodrain(struct ib_qp *ibqp,
+ const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ return mlx5_ib_post_send(ibqp, wr, bad_wr, false);
+}
+
+static inline int mlx5_ib_post_send_drain(struct ib_qp *ibqp,
+ const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ return mlx5_ib_post_send(ibqp, wr, bad_wr, true);
+}
+
+static inline int mlx5_ib_post_recv_nodrain(struct ib_qp *ibqp,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ return mlx5_ib_post_recv(ibqp, wr, bad_wr, false);
+}
+
+static inline int mlx5_ib_post_recv_drain(struct ib_qp *ibqp,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ return mlx5_ib_post_recv(ibqp, wr, bad_wr, true);
+}
+#endif /* _MLX5_IB_WR_H */
diff --git a/drivers/infiniband/hw/mthca/Kconfig b/drivers/infiniband/hw/mthca/Kconfig
index da314c3fec23..faa7381f7d63 100644
--- a/drivers/infiniband/hw/mthca/Kconfig
+++ b/drivers/infiniband/hw/mthca/Kconfig
@@ -1,7 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_MTHCA
tristate "Mellanox HCA support"
depends on PCI
- ---help---
+ help
This is a low-level driver for Mellanox InfiniHost host
channel adapters (HCAs), including the MT23108 PCI-X HCA
("Tavor") and the MT25208 PCI Express HCA ("Arbel").
@@ -10,7 +11,7 @@ config INFINIBAND_MTHCA_DEBUG
bool "Verbose debugging output" if EXPERT
depends on INFINIBAND_MTHCA
default y
- ---help---
+ help
This option causes debugging code to be compiled into the
mthca driver. The output can be turned on via the
debug_level module parameter (which can also be set after
diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile
index e388d95d0cf1..3a09e9ffd634 100644
--- a/drivers/infiniband/hw/mthca/Makefile
+++ b/drivers/infiniband/hw/mthca/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o
ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c
index b4e0cf4e95cd..9f0f79d02d3c 100644
--- a/drivers/infiniband/hw/mthca/mthca_allocator.c
+++ b/drivers/infiniband/hw/mthca/mthca_allocator.c
@@ -51,7 +51,7 @@ u32 mthca_alloc(struct mthca_alloc *alloc)
}
if (obj < alloc->max) {
- set_bit(obj, alloc->table);
+ __set_bit(obj, alloc->table);
obj |= alloc->top;
} else
obj = -1;
@@ -69,7 +69,7 @@ void mthca_free(struct mthca_alloc *alloc, u32 obj)
spin_lock_irqsave(&alloc->lock, flags);
- clear_bit(obj, alloc->table);
+ __clear_bit(obj, alloc->table);
alloc->last = min(alloc->last, obj);
alloc->top = (alloc->top + alloc->max) & alloc->mask;
@@ -79,8 +79,6 @@ void mthca_free(struct mthca_alloc *alloc, u32 obj)
int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
u32 reserved)
{
- int i;
-
/* num must be a power of 2 */
if (num != 1 << (ffs(num) - 1))
return -EINVAL;
@@ -90,21 +88,18 @@ int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
alloc->max = num;
alloc->mask = mask;
spin_lock_init(&alloc->lock);
- alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof (long),
- GFP_KERNEL);
+ alloc->table = bitmap_zalloc(num, GFP_KERNEL);
if (!alloc->table)
return -ENOMEM;
- bitmap_zero(alloc->table, num);
- for (i = 0; i < reserved; ++i)
- set_bit(i, alloc->table);
+ bitmap_set(alloc->table, 0, reserved);
return 0;
}
void mthca_alloc_cleanup(struct mthca_alloc *alloc)
{
- kfree(alloc->table);
+ bitmap_free(alloc->table);
}
/*
@@ -162,7 +157,8 @@ int mthca_array_init(struct mthca_array *array, int nent)
int npage = (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE;
int i;
- array->page_list = kmalloc(npage * sizeof *array->page_list, GFP_KERNEL);
+ array->page_list = kmalloc_array(npage, sizeof(*array->page_list),
+ GFP_KERNEL);
if (!array->page_list)
return -ENOMEM;
@@ -213,14 +209,13 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
dma_unmap_addr_set(&buf->direct, mapping, t);
- memset(buf->direct.buf, 0, size);
-
while (t & ((1 << shift) - 1)) {
--shift;
npages *= 2;
}
- dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+ dma_list = kmalloc_array(npages, sizeof(*dma_list),
+ GFP_KERNEL);
if (!dma_list)
goto err_free;
@@ -231,12 +226,14 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
npages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
shift = PAGE_SHIFT;
- dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+ dma_list = kmalloc_array(npages, sizeof(*dma_list),
+ GFP_KERNEL);
if (!dma_list)
return -ENOMEM;
- buf->page_list = kmalloc(npages * sizeof *buf->page_list,
- GFP_KERNEL);
+ buf->page_list = kmalloc_array(npages,
+ sizeof(*buf->page_list),
+ GFP_KERNEL);
if (!buf->page_list)
goto err_out;
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index c9f0f364f484..3df1f5ff7932 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -91,7 +91,7 @@ static enum ib_rate tavor_rate_to_ib(u8 mthca_rate, u8 port_rate)
}
}
-enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port)
+enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u32 port)
{
if (mthca_is_memfree(dev)) {
/* Handle old Arbel FW */
@@ -115,7 +115,7 @@ static u8 ib_rate_to_memfree(u8 req_rate, u8 cur_rate)
switch ((cur_rate - 1) / req_rate) {
case 0: return MTHCA_RATE_MEMFREE_FULL;
case 1: return MTHCA_RATE_MEMFREE_HALF;
- case 2: /* fall through */
+ case 2:
case 3: return MTHCA_RATE_MEMFREE_QUARTER;
default: return MTHCA_RATE_MEMFREE_EIGHTH;
}
@@ -131,7 +131,7 @@ static u8 ib_rate_to_tavor(u8 static_rate)
}
}
-u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port)
+u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u32 port)
{
u8 rate;
@@ -152,7 +152,7 @@ u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port)
int mthca_create_ah(struct mthca_dev *dev,
struct mthca_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct mthca_ah *ah)
{
u32 index = -1;
@@ -186,7 +186,7 @@ int mthca_create_ah(struct mthca_dev *dev,
on_hca_fail:
if (ah->type == MTHCA_AH_PCI_POOL) {
- ah->av = pci_pool_zalloc(dev->av_table.pool,
+ ah->av = dma_pool_zalloc(dev->av_table.pool,
GFP_ATOMIC, &ah->avdma);
if (!ah->av)
return -ENOMEM;
@@ -196,21 +196,26 @@ on_hca_fail:
ah->key = pd->ntmr.ibmr.lkey;
- av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24));
- av->g_slid = ah_attr->src_path_bits;
- av->dlid = cpu_to_be16(ah_attr->dlid);
+ av->port_pd = cpu_to_be32(pd->pd_num |
+ (rdma_ah_get_port_num(ah_attr) << 24));
+ av->g_slid = rdma_ah_get_path_bits(ah_attr);
+ av->dlid = cpu_to_be16(rdma_ah_get_dlid(ah_attr));
av->msg_sr = (3 << 4) | /* 2K message */
- mthca_get_rate(dev, ah_attr->static_rate, ah_attr->port_num);
- av->sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
- if (ah_attr->ah_flags & IB_AH_GRH) {
+ mthca_get_rate(dev, rdma_ah_get_static_rate(ah_attr),
+ rdma_ah_get_port_num(ah_attr));
+ av->sl_tclass_flowlabel = cpu_to_be32(rdma_ah_get_sl(ah_attr) << 28);
+ if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+
av->g_slid |= 0x80;
- av->gid_index = (ah_attr->port_num - 1) * dev->limits.gid_table_len +
- ah_attr->grh.sgid_index;
- av->hop_limit = ah_attr->grh.hop_limit;
+ av->gid_index = (rdma_ah_get_port_num(ah_attr) - 1) *
+ dev->limits.gid_table_len +
+ grh->sgid_index;
+ av->hop_limit = grh->hop_limit;
av->sl_tclass_flowlabel |=
- cpu_to_be32((ah_attr->grh.traffic_class << 20) |
- ah_attr->grh.flow_label);
- memcpy(av->dgid, ah_attr->grh.dgid.raw, 16);
+ cpu_to_be32((grh->traffic_class << 20) |
+ grh->flow_label);
+ memcpy(av->dgid, grh->dgid.raw, 16);
} else {
/* Arbel workaround -- low byte of GID must be 2 */
av->dgid[3] = cpu_to_be32(2);
@@ -245,7 +250,7 @@ int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah)
break;
case MTHCA_AH_PCI_POOL:
- pci_pool_free(dev->av_table.pool, ah->av, ah->avdma);
+ dma_pool_free(dev->av_table.pool, ah->av, ah->avdma);
break;
case MTHCA_AH_KMALLOC:
@@ -276,10 +281,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
header->grh.flow_label =
ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff);
header->grh.hop_limit = ah->av->hop_limit;
- ib_get_cached_gid(&dev->ib_dev,
- be32_to_cpu(ah->av->port_pd) >> 24,
- ah->av->gid_index % dev->limits.gid_table_len,
- &header->grh.source_gid, NULL);
+ header->grh.source_gid = ah->ibah.sgid_attr->gid;
memcpy(header->grh.destination_gid.raw,
ah->av->dgid, 16);
}
@@ -287,33 +289,35 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
return 0;
}
-int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr)
+int mthca_ah_query(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
struct mthca_ah *ah = to_mah(ibah);
struct mthca_dev *dev = to_mdev(ibah->device);
+ u32 port_num = be32_to_cpu(ah->av->port_pd) >> 24;
/* Only implement for MAD and memfree ah for now. */
if (ah->type == MTHCA_AH_ON_HCA)
return -ENOSYS;
memset(attr, 0, sizeof *attr);
- attr->dlid = be16_to_cpu(ah->av->dlid);
- attr->sl = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28;
- attr->port_num = be32_to_cpu(ah->av->port_pd) >> 24;
- attr->static_rate = mthca_rate_to_ib(dev, ah->av->msg_sr & 0x7,
- attr->port_num);
- attr->src_path_bits = ah->av->g_slid & 0x7F;
- attr->ah_flags = mthca_ah_grh_present(ah) ? IB_AH_GRH : 0;
-
- if (attr->ah_flags) {
- attr->grh.traffic_class =
- be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 20;
- attr->grh.flow_label =
- be32_to_cpu(ah->av->sl_tclass_flowlabel) & 0xfffff;
- attr->grh.hop_limit = ah->av->hop_limit;
- attr->grh.sgid_index = ah->av->gid_index &
- (dev->limits.gid_table_len - 1);
- memcpy(attr->grh.dgid.raw, ah->av->dgid, 16);
+ attr->type = ibah->type;
+ rdma_ah_set_dlid(attr, be16_to_cpu(ah->av->dlid));
+ rdma_ah_set_sl(attr, be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28);
+ rdma_ah_set_port_num(attr, port_num);
+ rdma_ah_set_static_rate(attr,
+ mthca_rate_to_ib(dev, ah->av->msg_sr & 0x7,
+ port_num));
+ rdma_ah_set_path_bits(attr, ah->av->g_slid & 0x7F);
+ if (mthca_ah_grh_present(ah)) {
+ u32 tc_fl = be32_to_cpu(ah->av->sl_tclass_flowlabel);
+
+ rdma_ah_set_grh(attr, NULL,
+ tc_fl & 0xfffff,
+ ah->av->gid_index &
+ (dev->limits.gid_table_len - 1),
+ ah->av->hop_limit,
+ (tc_fl >> 20) & 0xff);
+ rdma_ah_set_dgid_raw(attr, ah->av->dgid);
}
return 0;
@@ -333,7 +337,7 @@ int mthca_init_av_table(struct mthca_dev *dev)
if (err)
return err;
- dev->av_table.pool = pci_pool_create("mthca_av", dev->pdev,
+ dev->av_table.pool = dma_pool_create("mthca_av", &dev->pdev->dev,
MTHCA_AV_SIZE,
MTHCA_AV_SIZE, 0);
if (!dev->av_table.pool)
@@ -353,7 +357,7 @@ int mthca_init_av_table(struct mthca_dev *dev)
return 0;
out_free_pool:
- pci_pool_destroy(dev->av_table.pool);
+ dma_pool_destroy(dev->av_table.pool);
out_free_alloc:
mthca_alloc_cleanup(&dev->av_table.alloc);
@@ -367,6 +371,6 @@ void mthca_cleanup_av_table(struct mthca_dev *dev)
if (dev->av_table.av_map)
iounmap(dev->av_table.av_map);
- pci_pool_destroy(dev->av_table.pool);
+ dma_pool_destroy(dev->av_table.pool);
mthca_alloc_cleanup(&dev->av_table.alloc);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index f6474c24f193..f1d79968c985 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -130,9 +130,9 @@ static void handle_catas(struct mthca_dev *dev)
spin_unlock_irqrestore(&catas_lock, flags);
}
-static void poll_catas(unsigned long dev_ptr)
+static void poll_catas(struct timer_list *t)
{
- struct mthca_dev *dev = (struct mthca_dev *) dev_ptr;
+ struct mthca_dev *dev = timer_container_of(dev, t, catas_err.timer);
int i;
for (i = 0; i < dev->catas_err.size; ++i)
@@ -149,7 +149,7 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
{
phys_addr_t addr;
- init_timer(&dev->catas_err.timer);
+ timer_setup(&dev->catas_err.timer, poll_catas, 0);
dev->catas_err.map = NULL;
addr = pci_resource_start(dev->pdev, 0) +
@@ -164,8 +164,6 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
return;
}
- dev->catas_err.timer.data = (unsigned long) dev;
- dev->catas_err.timer.function = poll_catas;
dev->catas_err.timer.expires = jiffies + MTHCA_CATAS_POLL_INTERVAL;
INIT_LIST_HEAD(&dev->catas_err.list);
add_timer(&dev->catas_err.timer);
@@ -173,7 +171,7 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
void mthca_stop_catas_poll(struct mthca_dev *dev)
{
- del_timer_sync(&dev->catas_err.timer);
+ timer_delete_sync(&dev->catas_err.timer);
if (dev->catas_err.map)
iounmap(dev->catas_err.map);
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index c7f49bbb0c72..8fe0cef7e2be 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -292,12 +292,6 @@ static int mthca_cmd_post(struct mthca_dev *dev,
err = mthca_cmd_post_hcr(dev, in_param, out_param, in_modifier,
op_modifier, op, token, event);
- /*
- * Make sure that our HCR writes don't get mixed in with
- * writes from another CPU starting a FW command.
- */
- mmiowb();
-
mutex_unlock(&dev->cmd.hcr_mutex);
return err;
}
@@ -367,12 +361,16 @@ static int mthca_cmd_poll(struct mthca_dev *dev,
goto out;
}
- if (out_is_imm)
+ if (out_is_imm && out_param) {
*out_param =
(u64) be32_to_cpu((__force __be32)
__raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 |
(u64) be32_to_cpu((__force __be32)
__raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET + 4));
+ } else if (out_is_imm) {
+ err = -EINVAL;
+ goto out;
+ }
status = be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24;
if (status) {
@@ -450,8 +448,12 @@ static int mthca_cmd_wait(struct mthca_dev *dev,
err = mthca_status_to_errno(context->status);
}
- if (out_is_imm)
+ if (out_is_imm && out_param) {
*out_param = context->out_param;
+ } else if (out_is_imm) {
+ err = -EINVAL;
+ goto out;
+ }
out:
spin_lock(&dev->cmd.context_lock);
@@ -530,7 +532,7 @@ int mthca_cmd_init(struct mthca_dev *dev)
return -ENOMEM;
}
- dev->cmd.pool = pci_pool_create("mthca_cmd", dev->pdev,
+ dev->cmd.pool = dma_pool_create("mthca_cmd", &dev->pdev->dev,
MTHCA_MAILBOX_SIZE,
MTHCA_MAILBOX_SIZE, 0);
if (!dev->cmd.pool) {
@@ -543,7 +545,7 @@ int mthca_cmd_init(struct mthca_dev *dev)
void mthca_cmd_cleanup(struct mthca_dev *dev)
{
- pci_pool_destroy(dev->cmd.pool);
+ dma_pool_destroy(dev->cmd.pool);
iounmap(dev->hcr);
if (dev->cmd.flags & MTHCA_CMD_POST_DOORBELLS)
iounmap(dev->cmd.dbell_map);
@@ -557,9 +559,9 @@ int mthca_cmd_use_events(struct mthca_dev *dev)
{
int i;
- dev->cmd.context = kmalloc(dev->cmd.max_cmds *
- sizeof (struct mthca_cmd_context),
- GFP_KERNEL);
+ dev->cmd.context = kmalloc_array(dev->cmd.max_cmds,
+ sizeof(struct mthca_cmd_context),
+ GFP_KERNEL);
if (!dev->cmd.context)
return -ENOMEM;
@@ -613,7 +615,7 @@ struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
if (!mailbox)
return ERR_PTR(-ENOMEM);
- mailbox->buf = pci_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma);
+ mailbox->buf = dma_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma);
if (!mailbox->buf) {
kfree(mailbox);
return ERR_PTR(-ENOMEM);
@@ -627,13 +629,13 @@ void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox)
if (!mailbox)
return;
- pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma);
+ dma_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma);
kfree(mailbox);
}
int mthca_SYS_EN(struct mthca_dev *dev)
{
- u64 out;
+ u64 out = 0;
int ret;
ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, CMD_TIME_CLASS_D);
@@ -690,7 +692,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm,
for (i = 0; i < mthca_icm_size(&iter) >> lg; ++i) {
if (virt != -1) {
pages[nent * 2] = cpu_to_be64(virt);
- virt += 1 << lg;
+ virt += 1ULL << lg;
}
pages[nent * 2 + 1] =
@@ -1250,7 +1252,7 @@ static void get_board_id(void *vsd, char *board_id)
if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN &&
be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) {
- strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN);
+ strscpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN);
} else {
/*
* The board ID is a string but the firmware byte
@@ -1913,7 +1915,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
(in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
MTHCA_PUT(inbox, val, MAD_IFC_G_PATH_OFFSET);
- MTHCA_PUT(inbox, in_wc->slid, MAD_IFC_RLID_OFFSET);
+ MTHCA_PUT(inbox, ib_lid_cpu16(in_wc->slid), MAD_IFC_RLID_OFFSET);
MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET);
if (in_grh)
@@ -1921,7 +1923,7 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey,
op_modifier |= 0x4;
- in_modifier |= in_wc->slid << 16;
+ in_modifier |= ib_lid_cpu16(in_wc->slid) << 16;
}
err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma,
@@ -1953,7 +1955,7 @@ int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
u16 *hash)
{
- u64 imm;
+ u64 imm = 0;
int err;
err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH,
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index a6531ffe29a6..26c3408dcaca 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -77,7 +77,7 @@ struct mthca_cq_context {
__be32 ci_db; /* Arbel only */
__be32 state_db; /* Arbel only */
u32 reserved;
-} __attribute__((packed));
+} __packed;
#define MTHCA_CQ_STATUS_OK ( 0 << 28)
#define MTHCA_CQ_STATUS_OVERFLOW ( 9 << 28)
@@ -211,11 +211,6 @@ static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq,
mthca_write64(MTHCA_TAVOR_CQ_DB_INC_CI | cq->cqn, incr - 1,
dev->kar + MTHCA_CQ_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
- /*
- * Make sure doorbells don't leak out of CQ spinlock
- * and reach the HCA out of order:
- */
- mmiowb();
}
}
@@ -609,7 +604,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
entry->byte_len = MTHCA_ATOMIC_BYTE_LEN;
break;
default:
- entry->opcode = MTHCA_OPCODE_INVALID;
+ entry->opcode = 0xFF;
break;
}
} else {
@@ -808,8 +803,10 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
}
mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
- if (IS_ERR(mailbox))
+ if (IS_ERR(mailbox)) {
+ err = PTR_ERR(mailbox);
goto err_out_arm;
+ }
cq_context = mailbox->buf;
@@ -851,9 +848,9 @@ int mthca_init_cq(struct mthca_dev *dev, int nent,
}
spin_lock_irq(&dev->cq_table.lock);
- if (mthca_array_set(&dev->cq_table.cq,
- cq->cqn & (dev->limits.num_cqs - 1),
- cq)) {
+ err = mthca_array_set(&dev->cq_table.cq,
+ cq->cqn & (dev->limits.num_cqs - 1), cq);
+ if (err) {
spin_unlock_irq(&dev->cq_table.lock);
goto err_out_free_mr;
}
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 4393a022867b..a4a9d871d00e 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -105,7 +105,6 @@ enum {
MTHCA_OPCODE_ATOMIC_CS = 0x11,
MTHCA_OPCODE_ATOMIC_FA = 0x12,
MTHCA_OPCODE_BIND_MW = 0x18,
- MTHCA_OPCODE_INVALID = 0xff
};
enum {
@@ -118,7 +117,7 @@ enum {
};
struct mthca_cmd {
- struct pci_pool *pool;
+ struct dma_pool *pool;
struct mutex hcr_mutex;
struct semaphore poll_sem;
struct semaphore event_sem;
@@ -263,7 +262,7 @@ struct mthca_qp_table {
};
struct mthca_av_table {
- struct pci_pool *pool;
+ struct dma_pool *pool;
int num_ddr_avs;
u64 ddr_av_base;
void __iomem *av_map;
@@ -478,16 +477,6 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
u32 access, struct mthca_mr *mr);
void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr);
-int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
- u32 access, struct mthca_fmr *fmr);
-int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
- int list_len, u64 iova);
-void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr);
-int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
- int list_len, u64 iova);
-void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr);
-int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr);
-
int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt);
void mthca_unmap_eq_icm(struct mthca_dev *dev);
@@ -510,7 +499,8 @@ int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent
void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe);
int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
- struct ib_srq_attr *attr, struct mthca_srq *srq);
+ struct ib_srq_attr *attr, struct mthca_srq *srq,
+ struct ib_udata *udata);
void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
@@ -519,10 +509,10 @@ int mthca_max_srq_sge(struct mthca_dev *dev);
void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
enum ib_event_type event_type);
void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
-int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr);
-int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr);
+int mthca_tavor_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+int mthca_arbel_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
enum ib_event_type event_type);
@@ -530,14 +520,14 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
struct ib_qp_init_attr *qp_init_attr);
int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
struct ib_udata *udata);
-int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr);
-int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr);
-int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr);
-int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr);
+int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr);
+int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr);
+int mthca_arbel_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
int index, int *dbd, __be32 *new_wqe);
int mthca_alloc_qp(struct mthca_dev *dev,
@@ -547,7 +537,8 @@ int mthca_alloc_qp(struct mthca_dev *dev,
enum ib_qp_type type,
enum ib_sig_type send_policy,
struct ib_qp_cap *cap,
- struct mthca_qp *qp);
+ struct mthca_qp *qp,
+ struct ib_udata *udata);
int mthca_alloc_sqp(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_cq *send_cq,
@@ -555,32 +546,29 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
enum ib_sig_type send_policy,
struct ib_qp_cap *cap,
int qpn,
- int port,
- struct mthca_sqp *sqp);
+ u32 port,
+ struct mthca_qp *qp,
+ struct ib_udata *udata);
void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
int mthca_create_ah(struct mthca_dev *dev,
struct mthca_pd *pd,
- struct ib_ah_attr *ah_attr,
+ struct rdma_ah_attr *ah_attr,
struct mthca_ah *ah);
int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah);
int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
struct ib_ud_header *header);
-int mthca_ah_query(struct ib_ah *ibah, struct ib_ah_attr *attr);
+int mthca_ah_query(struct ib_ah *ibah, struct rdma_ah_attr *attr);
int mthca_ah_grh_present(struct mthca_ah *ah);
-u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u8 port);
-enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u8 port);
+u8 mthca_get_rate(struct mthca_dev *dev, int static_rate, u32 port);
+enum ib_rate mthca_rate_to_ib(struct mthca_dev *dev, u8 mthca_rate, u32 port);
int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-int mthca_process_mad(struct ib_device *ibdev,
- int mad_flags,
- u8 port_num,
- const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index);
+int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
+ const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
int mthca_create_agents(struct mthca_dev *dev);
void mthca_free_agents(struct mthca_dev *dev);
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 690201738993..97287c544da8 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -63,7 +63,7 @@ struct mthca_eq_context {
__be32 consumer_index;
__be32 producer_index;
u32 reserved3[4];
-} __attribute__((packed));
+} __packed;
#define MTHCA_EQ_STATUS_OK ( 0 << 28)
#define MTHCA_EQ_STATUS_OVERFLOW ( 9 << 28)
@@ -130,7 +130,7 @@ struct mthca_eqe {
u32 raw[6];
struct {
__be32 cqn;
- } __attribute__((packed)) comp;
+ } __packed comp;
struct {
u16 reserved1;
__be16 token;
@@ -138,27 +138,27 @@ struct mthca_eqe {
u8 reserved3[3];
u8 status;
__be64 out_param;
- } __attribute__((packed)) cmd;
+ } __packed cmd;
struct {
__be32 qpn;
- } __attribute__((packed)) qp;
+ } __packed qp;
struct {
__be32 srqn;
- } __attribute__((packed)) srq;
+ } __packed srq;
struct {
__be32 cqn;
u32 reserved1;
u8 reserved2[3];
u8 syndrome;
- } __attribute__((packed)) cq_err;
+ } __packed cq_err;
struct {
u32 reserved1[2];
__be32 port;
- } __attribute__((packed)) port_change;
+ } __packed port_change;
} event;
u8 reserved3[3];
u8 owner;
-} __attribute__((packed));
+} __packed;
#define MTHCA_EQ_ENTRY_OWNER_SW (0 << 7)
#define MTHCA_EQ_ENTRY_OWNER_HW (1 << 7)
@@ -479,15 +479,15 @@ static int mthca_create_eq(struct mthca_dev *dev,
eq->nent = roundup_pow_of_two(max(nent, 2));
npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE;
- eq->page_list = kmalloc(npages * sizeof *eq->page_list,
- GFP_KERNEL);
+ eq->page_list = kmalloc_array(npages, sizeof(*eq->page_list),
+ GFP_KERNEL);
if (!eq->page_list)
goto err_out;
for (i = 0; i < npages; ++i)
eq->page_list[i].buf = NULL;
- dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+ dma_list = kmalloc_array(npages, sizeof(*dma_list), GFP_KERNEL);
if (!dma_list)
goto err_out_free;
@@ -617,9 +617,9 @@ static void mthca_free_eq(struct mthca_dev *dev,
mthca_free_mr(dev, &eq->mr);
for (i = 0; i < npages; ++i)
- pci_free_consistent(dev->pdev, PAGE_SIZE,
- eq->page_list[i].buf,
- dma_unmap_addr(&eq->page_list[i], mapping));
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ eq->page_list[i].buf,
+ dma_unmap_addr(&eq->page_list[i], mapping));
kfree(eq->page_list);
mthca_free_mailbox(dev, mailbox);
@@ -739,17 +739,18 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
dev->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
if (!dev->eq_table.icm_page)
return -ENOMEM;
- dev->eq_table.icm_dma = pci_map_page(dev->pdev, dev->eq_table.icm_page, 0,
- PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- if (pci_dma_mapping_error(dev->pdev, dev->eq_table.icm_dma)) {
+ dev->eq_table.icm_dma =
+ dma_map_page(&dev->pdev->dev, dev->eq_table.icm_page, 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&dev->pdev->dev, dev->eq_table.icm_dma)) {
__free_page(dev->eq_table.icm_page);
return -ENOMEM;
}
ret = mthca_MAP_ICM_page(dev, dev->eq_table.icm_dma, icm_virt);
if (ret) {
- pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
- PCI_DMA_BIDIRECTIONAL);
+ dma_unmap_page(&dev->pdev->dev, dev->eq_table.icm_dma,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
__free_page(dev->eq_table.icm_page);
}
@@ -759,8 +760,8 @@ int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt)
void mthca_unmap_eq_icm(struct mthca_dev *dev)
{
mthca_UNMAP_ICM(dev, dev->eq_table.icm_virt, 1);
- pci_unmap_page(dev->pdev, dev->eq_table.icm_dma, PAGE_SIZE,
- PCI_DMA_BIDIRECTIONAL);
+ dma_unmap_page(&dev->pdev->dev, dev->eq_table.icm_dma, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
__free_page(dev->eq_table.icm_page);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 9139405c4810..04252700790e 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -58,8 +58,9 @@ static int mthca_update_rate(struct mthca_dev *dev, u8 port_num)
ret = ib_query_port(&dev->ib_dev, port_num, tprops);
if (ret) {
- printk(KERN_WARNING "ib_query_port failed (%d) for %s port %d\n",
- ret, dev->ib_dev.name, port_num);
+ dev_warn(&dev->ib_dev.dev,
+ "ib_query_port failed (%d) forport %d\n", ret,
+ port_num);
goto out;
}
@@ -75,25 +76,26 @@ static void update_sm_ah(struct mthca_dev *dev,
u8 port_num, u16 lid, u8 sl)
{
struct ib_ah *new_ah;
- struct ib_ah_attr ah_attr;
+ struct rdma_ah_attr ah_attr;
unsigned long flags;
if (!dev->send_agent[port_num - 1][0])
return;
memset(&ah_attr, 0, sizeof ah_attr);
- ah_attr.dlid = lid;
- ah_attr.sl = sl;
- ah_attr.port_num = port_num;
+ ah_attr.type = rdma_ah_find_type(&dev->ib_dev, port_num);
+ rdma_ah_set_dlid(&ah_attr, lid);
+ rdma_ah_set_sl(&ah_attr, sl);
+ rdma_ah_set_port_num(&ah_attr, port_num);
- new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
- &ah_attr);
+ new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
+ &ah_attr, 0);
if (IS_ERR(new_ah))
return;
spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
- ib_destroy_ah(dev->sm_ah[port_num - 1]);
+ rdma_destroy_ah(dev->sm_ah[port_num - 1], 0);
dev->sm_ah[port_num - 1] = new_ah;
spin_unlock_irqrestore(&dev->sm_lock, flags);
}
@@ -160,7 +162,7 @@ static void node_desc_override(struct ib_device *dev,
}
static void forward_trap(struct mthca_dev *dev,
- u8 port_num,
+ u32 port_num,
const struct ib_mad *mad)
{
int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
@@ -194,30 +196,19 @@ static void forward_trap(struct mthca_dev *dev,
}
}
-int mthca_process_mad(struct ib_device *ibdev,
- int mad_flags,
- u8 port_num,
- const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index)
+int mthca_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
+ const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+ const struct ib_mad *in, struct ib_mad *out,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
{
int err;
- u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
+ u16 slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE);
u16 prev_lid = 0;
struct ib_port_attr pattr;
- const struct ib_mad *in_mad = (const struct ib_mad *)in;
- struct ib_mad *out_mad = (struct ib_mad *)out;
-
- if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
- *out_mad_size != sizeof(*out_mad)))
- return IB_MAD_RESULT_FAILURE;
/* Forward locally generated traps to the SM */
- if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP &&
- slid == 0) {
- forward_trap(to_mdev(ibdev), port_num, in_mad);
+ if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP && !slid) {
+ forward_trap(to_mdev(ibdev), port_num, in);
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
}
@@ -227,40 +218,39 @@ int mthca_process_mad(struct ib_device *ibdev,
* Only handle PMA and Mellanox vendor-specific class gets and
* sets for other classes.
*/
- if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
- in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
- if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
- in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
- in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
+ if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ if (in->mad_hdr.method != IB_MGMT_METHOD_GET &&
+ in->mad_hdr.method != IB_MGMT_METHOD_SET &&
+ in->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
return IB_MAD_RESULT_SUCCESS;
/*
* Don't process SMInfo queries or vendor-specific
* MADs -- the SMA can't handle them.
*/
- if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
- ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
+ if (in->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
+ ((in->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
IB_SMP_ATTR_VENDOR_MASK))
return IB_MAD_RESULT_SUCCESS;
- } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
- in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 ||
- in_mad->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) {
- if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
- in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
+ } else if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
+ in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS1 ||
+ in->mad_hdr.mgmt_class == MTHCA_VENDOR_CLASS2) {
+ if (in->mad_hdr.method != IB_MGMT_METHOD_GET &&
+ in->mad_hdr.method != IB_MGMT_METHOD_SET)
return IB_MAD_RESULT_SUCCESS;
} else
return IB_MAD_RESULT_SUCCESS;
- if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
- in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
- in_mad->mad_hdr.method == IB_MGMT_METHOD_SET &&
- in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
+ if ((in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
+ in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
+ in->mad_hdr.method == IB_MGMT_METHOD_SET &&
+ in->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
!ib_query_port(ibdev, port_num, &pattr))
- prev_lid = pattr.lid;
+ prev_lid = ib_lid_cpu16(pattr.lid);
- err = mthca_MAD_IFC(to_mdev(ibdev),
- mad_flags & IB_MAD_IGNORE_MKEY,
- mad_flags & IB_MAD_IGNORE_BKEY,
- port_num, in_wc, in_grh, in_mad, out_mad);
+ err = mthca_MAD_IFC(to_mdev(ibdev), mad_flags & IB_MAD_IGNORE_MKEY,
+ mad_flags & IB_MAD_IGNORE_BKEY, port_num, in_wc,
+ in_grh, in, out);
if (err == -EBADMSG)
return IB_MAD_RESULT_SUCCESS;
else if (err) {
@@ -268,16 +258,16 @@ int mthca_process_mad(struct ib_device *ibdev,
return IB_MAD_RESULT_FAILURE;
}
- if (!out_mad->mad_hdr.status) {
- smp_snoop(ibdev, port_num, in_mad, prev_lid);
- node_desc_override(ibdev, out_mad);
+ if (!out->mad_hdr.status) {
+ smp_snoop(ibdev, port_num, in, prev_lid);
+ node_desc_override(ibdev, out);
}
/* set return bit in status of directed route responses */
- if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
- out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
+ if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ out->mad_hdr.status |= cpu_to_be16(1 << 15);
- if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
+ if (in->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
/* no response for trap repress */
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
@@ -345,6 +335,7 @@ void mthca_free_agents(struct mthca_dev *dev)
}
if (dev->sm_ah[p])
- ib_destroy_ah(dev->sm_ah[p]);
+ rdma_destroy_ah(dev->sm_ah[p],
+ RDMA_DESTROY_AH_SLEEPABLE);
}
}
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index ded76c101dde..1ab268b77096 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -49,7 +49,6 @@
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver");
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION);
#ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
@@ -383,7 +382,7 @@ static int mthca_init_icm(struct mthca_dev *mdev,
struct mthca_init_hca_param *init_hca,
u64 icm_size)
{
- u64 aux_pages;
+ u64 aux_pages = 0;
int err;
err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages);
@@ -474,11 +473,11 @@ static int mthca_init_icm(struct mthca_dev *mdev,
goto err_unmap_eqp;
}
- mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base,
- dev_lim->cqc_entry_sz,
- mdev->limits.num_cqs,
- mdev->limits.reserved_cqs,
- 0, 0);
+ mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base,
+ dev_lim->cqc_entry_sz,
+ mdev->limits.num_cqs,
+ mdev->limits.reserved_cqs,
+ 0, 0);
if (!mdev->cq_table.table) {
mthca_err(mdev, "Failed to map CQ context memory, aborting.\n");
err = -ENOMEM;
@@ -851,20 +850,18 @@ err_uar_table_free:
static int mthca_enable_msi_x(struct mthca_dev *mdev)
{
- struct msix_entry entries[3];
int err;
- entries[0].entry = 0;
- entries[1].entry = 1;
- entries[2].entry = 2;
-
- err = pci_enable_msix_exact(mdev->pdev, entries, ARRAY_SIZE(entries));
- if (err)
+ err = pci_alloc_irq_vectors(mdev->pdev, 3, 3, PCI_IRQ_MSIX);
+ if (err < 0)
return err;
- mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector;
- mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector;
- mdev->eq_table.eq[MTHCA_EQ_CMD ].msi_x_vector = entries[2].vector;
+ mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector =
+ pci_irq_vector(mdev->pdev, 0);
+ mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector =
+ pci_irq_vector(mdev->pdev, 1);
+ mdev->eq_table.eq[MTHCA_EQ_CMD ].msi_x_vector =
+ pci_irq_vector(mdev->pdev, 2);
return 0;
}
@@ -940,31 +937,16 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
pci_set_master(pdev);
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
- if (err) {
- dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
- err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
- goto err_free_res;
- }
- }
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (err) {
- dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
- "consistent PCI DMA mask.\n");
- err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
- "aborting.\n");
- goto err_free_res;
- }
+ dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
+ goto err_free_res;
}
/* We can handle large RDMA requests, so allow larger segments. */
dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
- mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev);
+ mdev = ib_alloc_device(mthca_dev, ib_dev);
if (!mdev) {
dev_err(&pdev->dev, "Device struct alloc failed, "
"aborting.\n");
@@ -989,7 +971,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
goto err_free_dev;
}
- if (mthca_cmd_init(mdev)) {
+ err = mthca_cmd_init(mdev);
+ if (err) {
mthca_err(mdev, "Failed to init command interface, aborting.\n");
goto err_free_dev;
}
@@ -1017,8 +1000,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
err = mthca_setup_hca(mdev);
if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) {
- if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
- pci_disable_msix(pdev);
+ pci_free_irq_vectors(pdev);
mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X;
err = mthca_setup_hca(mdev);
@@ -1062,7 +1044,7 @@ err_cleanup:
err_close:
if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
- pci_disable_msix(pdev);
+ pci_free_irq_vectors(pdev);
mthca_close_hca(mdev);
@@ -1113,7 +1095,7 @@ static void __mthca_remove_one(struct pci_dev *pdev)
mthca_cmd_cleanup(mdev);
if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
- pci_disable_msix(pdev);
+ pci_free_irq_vectors(pdev);
ib_dealloc_device(&mdev->ib_dev);
pci_release_regions(pdev);
@@ -1164,7 +1146,7 @@ static void mthca_remove_one(struct pci_dev *pdev)
mutex_unlock(&mthca_device_mutex);
}
-static struct pci_device_id mthca_pci_table[] = {
+static const struct pci_device_id mthca_pci_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR),
.driver_data = TAVOR },
{ PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_TAVOR),
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index c6fe89d79248..f2734a5c5f26 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -58,7 +58,7 @@ struct mthca_user_db_table {
u64 uvirt;
struct scatterlist mem;
int refcount;
- } page[0];
+ } page[];
};
static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
@@ -66,8 +66,8 @@ static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *
int i;
if (chunk->nsg > 0)
- pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
- PCI_DMA_BIDIRECTIONAL);
+ dma_unmap_sg(&dev->pdev->dev, chunk->mem, chunk->npages,
+ DMA_BIDIRECTIONAL);
for (i = 0; i < chunk->npages; ++i)
__free_pages(sg_page(&chunk->mem[i]),
@@ -184,9 +184,10 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
if (coherent)
++chunk->nsg;
else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) {
- chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
- chunk->npages,
- PCI_DMA_BIDIRECTIONAL);
+ chunk->nsg =
+ dma_map_sg(&dev->pdev->dev, chunk->mem,
+ chunk->npages,
+ DMA_BIDIRECTIONAL);
if (chunk->nsg <= 0)
goto fail;
@@ -204,9 +205,8 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
}
if (!coherent && chunk) {
- chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
- chunk->npages,
- PCI_DMA_BIDIRECTIONAL);
+ chunk->nsg = dma_map_sg(&dev->pdev->dev, chunk->mem,
+ chunk->npages, DMA_BIDIRECTIONAL);
if (chunk->nsg <= 0)
goto fail;
@@ -367,7 +367,7 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
obj_per_chunk = MTHCA_TABLE_CHUNK_SIZE / obj_size;
num_icm = DIV_ROUND_UP(nobj, obj_per_chunk);
- table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL);
+ table = kmalloc(struct_size(table, icm, num_icm), GFP_KERNEL);
if (!table)
return NULL;
@@ -472,24 +472,27 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
goto out;
}
- ret = get_user_pages(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages, NULL);
+ ret = pin_user_pages_fast(uaddr & PAGE_MASK, 1,
+ FOLL_WRITE | FOLL_LONGTERM, pages);
if (ret < 0)
goto out;
sg_set_page(&db_tab->page[i].mem, pages[0], MTHCA_ICM_PAGE_SIZE,
uaddr & ~PAGE_MASK);
- ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ ret = dma_map_sg(&dev->pdev->dev, &db_tab->page[i].mem, 1,
+ DMA_TO_DEVICE);
if (ret < 0) {
- put_page(pages[0]);
+ unpin_user_page(pages[0]);
goto out;
}
ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
mthca_uarc_virt(dev, uar, i));
if (ret) {
- pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
- put_page(sg_page(&db_tab->page[i].mem));
+ dma_unmap_sg(&dev->pdev->dev, &db_tab->page[i].mem, 1,
+ DMA_TO_DEVICE);
+ unpin_user_page(sg_page(&db_tab->page[i].mem));
goto out;
}
@@ -529,7 +532,7 @@ struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
return NULL;
npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
- db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
+ db_tab = kmalloc(struct_size(db_tab, page, npages), GFP_KERNEL);
if (!db_tab)
return ERR_PTR(-ENOMEM);
@@ -554,8 +557,9 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) {
if (db_tab->page[i].uvirt) {
mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1);
- pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
- put_page(sg_page(&db_tab->page[i].mem));
+ dma_unmap_sg(&dev->pdev->dev, &db_tab->page[i].mem, 1,
+ DMA_TO_DEVICE);
+ unpin_user_page(sg_page(&db_tab->page[i].mem));
}
}
@@ -623,13 +627,13 @@ int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
page = dev->db_tab->page + end;
alloc:
- page->db_rec = dma_alloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
- &page->mapping, GFP_KERNEL);
+ page->db_rec = dma_alloc_coherent(&dev->pdev->dev,
+ MTHCA_ICM_PAGE_SIZE, &page->mapping,
+ GFP_KERNEL);
if (!page->db_rec) {
ret = -ENOMEM;
goto out;
}
- memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE);
ret = mthca_MAP_ICM_page(dev, page->mapping,
mthca_uarc_virt(dev, &dev->driver_uar, i));
@@ -713,9 +717,9 @@ int mthca_init_db_tab(struct mthca_dev *dev)
dev->db_tab->max_group1 = 0;
dev->db_tab->min_group2 = dev->db_tab->npages - 1;
- dev->db_tab->page = kmalloc(dev->db_tab->npages *
- sizeof *dev->db_tab->page,
- GFP_KERNEL);
+ dev->db_tab->page = kmalloc_array(dev->db_tab->npages,
+ sizeof(*dev->db_tab->page),
+ GFP_KERNEL);
if (!dev->db_tab->page) {
kfree(dev->db_tab);
return -ENOMEM;
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index da9b8f9b884f..61d5bbba293a 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -68,7 +68,7 @@ struct mthca_icm_table {
int lowmem;
int coherent;
struct mutex mutex;
- struct mthca_icm *icm[0];
+ struct mthca_icm *icm[] __counted_by(num_icm);
};
struct mthca_icm_iter {
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index ed9a989e501b..dacb8ceeebe0 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -60,7 +60,7 @@ struct mthca_mpt_entry {
__be64 mtt_seg;
__be32 mtt_sz; /* Arbel only */
u32 reserved[2];
-} __attribute__((packed));
+} __packed;
#define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28)
#define MTHCA_MPT_FLAG_MIO (1 << 17)
@@ -101,13 +101,13 @@ static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
return -1;
found:
- clear_bit(seg, buddy->bits[o]);
+ __clear_bit(seg, buddy->bits[o]);
--buddy->num_free[o];
while (o > order) {
--o;
seg <<= 1;
- set_bit(seg ^ 1, buddy->bits[o]);
+ __set_bit(seg ^ 1, buddy->bits[o]);
++buddy->num_free[o];
}
@@ -125,13 +125,13 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
spin_lock(&buddy->lock);
while (test_bit(seg ^ 1, buddy->bits[order])) {
- clear_bit(seg ^ 1, buddy->bits[order]);
+ __clear_bit(seg ^ 1, buddy->bits[order]);
--buddy->num_free[order];
seg >>= 1;
++order;
}
- set_bit(seg, buddy->bits[order]);
+ __set_bit(seg, buddy->bits[order]);
++buddy->num_free[order];
spin_unlock(&buddy->lock);
@@ -139,12 +139,12 @@ static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
{
- int i, s;
+ int i;
buddy->max_order = max_order;
spin_lock_init(&buddy->lock);
- buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
+ buddy->bits = kcalloc(buddy->max_order + 1, sizeof(*buddy->bits),
GFP_KERNEL);
buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free,
GFP_KERNEL);
@@ -152,22 +152,20 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
goto err_out;
for (i = 0; i <= buddy->max_order; ++i) {
- s = BITS_TO_LONGS(1 << (buddy->max_order - i));
- buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
+ buddy->bits[i] = bitmap_zalloc(1 << (buddy->max_order - i),
+ GFP_KERNEL);
if (!buddy->bits[i])
goto err_out_free;
- bitmap_zero(buddy->bits[i],
- 1 << (buddy->max_order - i));
}
- set_bit(0, buddy->bits[buddy->max_order]);
+ __set_bit(0, buddy->bits[buddy->max_order]);
buddy->num_free[buddy->max_order] = 1;
return 0;
err_out_free:
for (i = 0; i <= buddy->max_order; ++i)
- kfree(buddy->bits[i]);
+ bitmap_free(buddy->bits[i]);
err_out:
kfree(buddy->bits);
@@ -181,7 +179,7 @@ static void mthca_buddy_cleanup(struct mthca_buddy *buddy)
int i;
for (i = 0; i <= buddy->max_order; ++i)
- kfree(buddy->bits[i]);
+ bitmap_free(buddy->bits[i]);
kfree(buddy->bits);
kfree(buddy->num_free);
@@ -469,8 +467,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
mpt_entry->start = cpu_to_be64(iova);
mpt_entry->length = cpu_to_be64(total_size);
- memset(&mpt_entry->lkey, 0,
- sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
+ memset_startat(mpt_entry, 0, lkey);
if (mr->mtt)
mpt_entry->mtt_seg =
@@ -541,7 +538,7 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
return err;
}
-/* Free mr or fmr */
+/* Free mr */
static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
{
mthca_table_put(dev, dev->mr_table.mpt_table,
@@ -564,266 +561,6 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
mthca_free_mtt(dev, mr->mtt);
}
-int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
- u32 access, struct mthca_fmr *mr)
-{
- struct mthca_mpt_entry *mpt_entry;
- struct mthca_mailbox *mailbox;
- u64 mtt_seg;
- u32 key, idx;
- int list_len = mr->attr.max_pages;
- int err = -ENOMEM;
- int i;
-
- if (mr->attr.page_shift < 12 || mr->attr.page_shift >= 32)
- return -EINVAL;
-
- /* For Arbel, all MTTs must fit in the same page. */
- if (mthca_is_memfree(dev) &&
- mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE)
- return -EINVAL;
-
- mr->maps = 0;
-
- key = mthca_alloc(&dev->mr_table.mpt_alloc);
- if (key == -1)
- return -ENOMEM;
- key = adjust_key(dev, key);
-
- idx = key & (dev->limits.num_mpts - 1);
- mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
-
- if (mthca_is_memfree(dev)) {
- err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
- if (err)
- goto err_out_mpt_free;
-
- mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key, NULL);
- BUG_ON(!mr->mem.arbel.mpt);
- } else
- mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
- sizeof *(mr->mem.tavor.mpt) * idx;
-
- mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
- if (IS_ERR(mr->mtt)) {
- err = PTR_ERR(mr->mtt);
- goto err_out_table;
- }
-
- mtt_seg = mr->mtt->first_seg * dev->limits.mtt_seg_size;
-
- if (mthca_is_memfree(dev)) {
- mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
- mr->mtt->first_seg,
- &mr->mem.arbel.dma_handle);
- BUG_ON(!mr->mem.arbel.mtts);
- } else
- mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
-
- mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
- if (IS_ERR(mailbox)) {
- err = PTR_ERR(mailbox);
- goto err_out_free_mtt;
- }
-
- mpt_entry = mailbox->buf;
-
- mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
- MTHCA_MPT_FLAG_MIO |
- MTHCA_MPT_FLAG_REGION |
- access);
-
- mpt_entry->page_size = cpu_to_be32(mr->attr.page_shift - 12);
- mpt_entry->key = cpu_to_be32(key);
- mpt_entry->pd = cpu_to_be32(pd);
- memset(&mpt_entry->start, 0,
- sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
- mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg);
-
- if (0) {
- mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
- for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
- if (i % 4 == 0)
- printk("[%02x] ", i * 4);
- printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i]));
- if ((i + 1) % 4 == 0)
- printk("\n");
- }
- }
-
- err = mthca_SW2HW_MPT(dev, mailbox,
- key & (dev->limits.num_mpts - 1));
- if (err) {
- mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
- goto err_out_mailbox_free;
- }
-
- mthca_free_mailbox(dev, mailbox);
- return 0;
-
-err_out_mailbox_free:
- mthca_free_mailbox(dev, mailbox);
-
-err_out_free_mtt:
- mthca_free_mtt(dev, mr->mtt);
-
-err_out_table:
- mthca_table_put(dev, dev->mr_table.mpt_table, key);
-
-err_out_mpt_free:
- mthca_free(&dev->mr_table.mpt_alloc, key);
- return err;
-}
-
-int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
-{
- if (fmr->maps)
- return -EBUSY;
-
- mthca_free_region(dev, fmr->ibmr.lkey);
- mthca_free_mtt(dev, fmr->mtt);
-
- return 0;
-}
-
-static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
- int list_len, u64 iova)
-{
- int i, page_mask;
-
- if (list_len > fmr->attr.max_pages)
- return -EINVAL;
-
- page_mask = (1 << fmr->attr.page_shift) - 1;
-
- /* We are getting page lists, so va must be page aligned. */
- if (iova & page_mask)
- return -EINVAL;
-
- /* Trust the user not to pass misaligned data in page_list */
- if (0)
- for (i = 0; i < list_len; ++i) {
- if (page_list[i] & ~page_mask)
- return -EINVAL;
- }
-
- if (fmr->maps >= fmr->attr.max_maps)
- return -EINVAL;
-
- return 0;
-}
-
-
-int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
- int list_len, u64 iova)
-{
- struct mthca_fmr *fmr = to_mfmr(ibfmr);
- struct mthca_dev *dev = to_mdev(ibfmr->device);
- struct mthca_mpt_entry mpt_entry;
- u32 key;
- int i, err;
-
- err = mthca_check_fmr(fmr, page_list, list_len, iova);
- if (err)
- return err;
-
- ++fmr->maps;
-
- key = tavor_key_to_hw_index(fmr->ibmr.lkey);
- key += dev->limits.num_mpts;
- fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
-
- writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
-
- for (i = 0; i < list_len; ++i) {
- __be64 mtt_entry = cpu_to_be64(page_list[i] |
- MTHCA_MTT_FLAG_PRESENT);
- mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i);
- }
-
- mpt_entry.lkey = cpu_to_be32(key);
- mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
- mpt_entry.start = cpu_to_be64(iova);
-
- __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
- memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
- offsetof(struct mthca_mpt_entry, window_count) -
- offsetof(struct mthca_mpt_entry, start));
-
- writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt);
-
- return 0;
-}
-
-int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
- int list_len, u64 iova)
-{
- struct mthca_fmr *fmr = to_mfmr(ibfmr);
- struct mthca_dev *dev = to_mdev(ibfmr->device);
- u32 key;
- int i, err;
-
- err = mthca_check_fmr(fmr, page_list, list_len, iova);
- if (err)
- return err;
-
- ++fmr->maps;
-
- key = arbel_key_to_hw_index(fmr->ibmr.lkey);
- if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
- key += SINAI_FMR_KEY_INC;
- else
- key += dev->limits.num_mpts;
- fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
-
- *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
-
- wmb();
-
- dma_sync_single_for_cpu(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
- list_len * sizeof(u64), DMA_TO_DEVICE);
-
- for (i = 0; i < list_len; ++i)
- fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
- MTHCA_MTT_FLAG_PRESENT);
-
- dma_sync_single_for_device(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
- list_len * sizeof(u64), DMA_TO_DEVICE);
-
- fmr->mem.arbel.mpt->key = cpu_to_be32(key);
- fmr->mem.arbel.mpt->lkey = cpu_to_be32(key);
- fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
- fmr->mem.arbel.mpt->start = cpu_to_be64(iova);
-
- wmb();
-
- *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW;
-
- wmb();
-
- return 0;
-}
-
-void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
-{
- if (!fmr->maps)
- return;
-
- fmr->maps = 0;
-
- writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
-}
-
-void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
-{
- if (!fmr->maps)
- return;
-
- fmr->maps = 0;
-
- *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
-}
-
int mthca_init_mr_table(struct mthca_dev *dev)
{
phys_addr_t addr;
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index 15d064479ef6..69af65f1b332 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -31,8 +31,6 @@
* SOFTWARE.
*/
-#include <linux/module.h>
-#include <linux/moduleparam.h>
#include <linux/string.h>
#include <linux/slab.h>
@@ -79,7 +77,7 @@ s64 mthca_make_profile(struct mthca_dev *dev,
struct mthca_resource *profile;
int i, j;
- profile = kzalloc(MTHCA_RES_NUM * sizeof *profile, GFP_KERNEL);
+ profile = kcalloc(MTHCA_RES_NUM, sizeof(*profile), GFP_KERNEL);
if (!profile)
return -ENOMEM;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index d31708742ba5..dd572d76866c 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -37,6 +37,7 @@
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_ioctl.h>
#include <linux/sched.h>
#include <linux/slab.h>
@@ -49,19 +50,11 @@
#include <rdma/mthca-abi.h>
#include "mthca_memfree.h"
-static void init_query_mad(struct ib_smp *mad)
-{
- mad->base_version = 1;
- mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
- mad->class_version = 1;
- mad->method = IB_MGMT_METHOD_GET;
-}
-
static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
struct ib_udata *uhw)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
struct mthca_dev *mdev = to_mdev(ibdev);
@@ -77,7 +70,7 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr
props->fw_ver = mdev->fw_ver;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mthca_MAD_IFC(mdev, 1, 1,
@@ -96,8 +89,9 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr
props->page_size_cap = mdev->limits.page_size_cap;
props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps;
props->max_qp_wr = mdev->limits.max_wqes;
- props->max_sge = mdev->limits.max_sg;
- props->max_sge_rd = props->max_sge;
+ props->max_send_sge = mdev->limits.max_sg;
+ props->max_recv_sge = mdev->limits.max_sg;
+ props->max_sge_rd = mdev->limits.max_sg;
props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
props->max_cqe = mdev->limits.max_cqes;
props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
@@ -116,16 +110,6 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr
props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
- /*
- * If Sinai memory key optimization is being used, then only
- * the 8-bit key portion will change. For other HCAs, the
- * unused index bits will also be used for FMR remapping.
- */
- if (mdev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
- props->max_map_per_fmr = 255;
- else
- props->max_map_per_fmr =
- (1 << (32 - ilog2(mdev->limits.num_mpts))) - 1;
err = 0;
out:
@@ -135,10 +119,10 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr
}
static int mthca_query_port(struct ib_device *ibdev,
- u8 port, struct ib_port_attr *props)
+ u32 port, struct ib_port_attr *props)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -146,9 +130,9 @@ static int mthca_query_port(struct ib_device *ibdev,
if (!in_mad || !out_mad)
goto out;
- memset(props, 0, sizeof *props);
+ /* props being zeroed by the caller, avoid zeroing it here */
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -202,7 +186,7 @@ static int mthca_modify_device(struct ib_device *ibdev,
}
static int mthca_modify_port(struct ib_device *ibdev,
- u8 port, int port_modify_mask,
+ u32 port, int port_modify_mask,
struct ib_port_modify *props)
{
struct mthca_set_ib_param set_ib;
@@ -212,7 +196,7 @@ static int mthca_modify_port(struct ib_device *ibdev,
if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
return -ERESTARTSYS;
- err = mthca_query_port(ibdev, port, &attr);
+ err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
@@ -231,10 +215,10 @@ out:
}
static int mthca_query_pkey(struct ib_device *ibdev,
- u8 port, u16 index, u16 *pkey)
+ u32 port, u16 index, u16 *pkey)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -242,7 +226,7 @@ static int mthca_query_pkey(struct ib_device *ibdev,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
@@ -259,11 +243,11 @@ static int mthca_query_pkey(struct ib_device *ibdev,
return err;
}
-static int mthca_query_gid(struct ib_device *ibdev, u8 port,
+static int mthca_query_gid(struct ib_device *ibdev, u32 port,
int index, union ib_gid *gid)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -271,7 +255,7 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port,
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
@@ -282,7 +266,7 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port,
memcpy(gid->raw, out_mad->data + 8, 8);
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
@@ -299,17 +283,16 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port,
return err;
}
-static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
+static int mthca_alloc_ucontext(struct ib_ucontext *uctx,
+ struct ib_udata *udata)
{
- struct mthca_alloc_ucontext_resp uresp;
- struct mthca_ucontext *context;
+ struct ib_device *ibdev = uctx->device;
+ struct mthca_alloc_ucontext_resp uresp = {};
+ struct mthca_ucontext *context = to_mucontext(uctx);
int err;
if (!(to_mdev(ibdev)->active))
- return ERR_PTR(-EAGAIN);
-
- memset(&uresp, 0, sizeof uresp);
+ return -EAGAIN;
uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
if (mthca_is_memfree(to_mdev(ibdev)))
@@ -317,44 +300,33 @@ static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
else
uresp.uarc_size = 0;
- context = kmalloc(sizeof *context, GFP_KERNEL);
- if (!context)
- return ERR_PTR(-ENOMEM);
-
err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
- if (err) {
- kfree(context);
- return ERR_PTR(err);
- }
+ if (err)
+ return err;
context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
if (IS_ERR(context->db_tab)) {
err = PTR_ERR(context->db_tab);
mthca_uar_free(to_mdev(ibdev), &context->uar);
- kfree(context);
- return ERR_PTR(err);
+ return err;
}
- if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
+ if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
mthca_uar_free(to_mdev(ibdev), &context->uar);
- kfree(context);
- return ERR_PTR(-EFAULT);
+ return -EFAULT;
}
context->reg_mr_warned = 0;
- return &context->ibucontext;
+ return 0;
}
-static int mthca_dealloc_ucontext(struct ib_ucontext *context)
+static void mthca_dealloc_ucontext(struct ib_ucontext *context)
{
mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
to_mucontext(context)->db_tab);
mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
- kfree(to_mucontext(context));
-
- return 0;
}
static int mthca_mmap_uar(struct ib_ucontext *context,
@@ -373,195 +345,151 @@ static int mthca_mmap_uar(struct ib_ucontext *context,
return 0;
}
-static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
- struct mthca_pd *pd;
+ struct ib_device *ibdev = ibpd->device;
+ struct mthca_pd *pd = to_mpd(ibpd);
int err;
- pd = kmalloc(sizeof *pd, GFP_KERNEL);
- if (!pd)
- return ERR_PTR(-ENOMEM);
-
- err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
- if (err) {
- kfree(pd);
- return ERR_PTR(err);
- }
+ err = mthca_pd_alloc(to_mdev(ibdev), !udata, pd);
+ if (err)
+ return err;
- if (context) {
+ if (udata) {
if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
mthca_pd_free(to_mdev(ibdev), pd);
- kfree(pd);
- return ERR_PTR(-EFAULT);
+ return -EFAULT;
}
}
- return &pd->ibpd;
+ return 0;
}
-static int mthca_dealloc_pd(struct ib_pd *pd)
+static int mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
{
mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
- kfree(pd);
-
return 0;
}
-static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr,
- struct ib_udata *udata)
+static int mthca_ah_create(struct ib_ah *ibah,
+ struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
- int err;
- struct mthca_ah *ah;
-
- ah = kmalloc(sizeof *ah, GFP_ATOMIC);
- if (!ah)
- return ERR_PTR(-ENOMEM);
+ struct mthca_ah *ah = to_mah(ibah);
- err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah);
- if (err) {
- kfree(ah);
- return ERR_PTR(err);
- }
-
- return &ah->ibah;
+ return mthca_create_ah(to_mdev(ibah->device), to_mpd(ibah->pd),
+ init_attr->ah_attr, ah);
}
-static int mthca_ah_destroy(struct ib_ah *ah)
+static int mthca_ah_destroy(struct ib_ah *ah, u32 flags)
{
mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
- kfree(ah);
-
return 0;
}
-static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata)
+static int mthca_create_srq(struct ib_srq *ibsrq,
+ struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct mthca_create_srq ucmd;
- struct mthca_ucontext *context = NULL;
- struct mthca_srq *srq;
+ struct mthca_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mthca_ucontext, ibucontext);
+ struct mthca_srq *srq = to_msrq(ibsrq);
int err;
if (init_attr->srq_type != IB_SRQT_BASIC)
- return ERR_PTR(-ENOSYS);
-
- srq = kmalloc(sizeof *srq, GFP_KERNEL);
- if (!srq)
- return ERR_PTR(-ENOMEM);
-
- if (pd->uobject) {
- context = to_mucontext(pd->uobject->context);
+ return -EOPNOTSUPP;
- if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
- err = -EFAULT;
- goto err_free;
- }
+ if (udata) {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+ return -EFAULT;
- err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ err = mthca_map_user_db(to_mdev(ibsrq->device), &context->uar,
context->db_tab, ucmd.db_index,
ucmd.db_page);
if (err)
- goto err_free;
+ return err;
srq->mr.ibmr.lkey = ucmd.lkey;
srq->db_index = ucmd.db_index;
}
- err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd),
- &init_attr->attr, srq);
+ err = mthca_alloc_srq(to_mdev(ibsrq->device), to_mpd(ibsrq->pd),
+ &init_attr->attr, srq, udata);
- if (err && pd->uobject)
- mthca_unmap_user_db(to_mdev(pd->device), &context->uar,
+ if (err && udata)
+ mthca_unmap_user_db(to_mdev(ibsrq->device), &context->uar,
context->db_tab, ucmd.db_index);
if (err)
- goto err_free;
+ return err;
- if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) {
- mthca_free_srq(to_mdev(pd->device), srq);
- err = -EFAULT;
- goto err_free;
+ if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) {
+ mthca_free_srq(to_mdev(ibsrq->device), srq);
+ return -EFAULT;
}
- return &srq->ibsrq;
-
-err_free:
- kfree(srq);
-
- return ERR_PTR(err);
+ return 0;
}
-static int mthca_destroy_srq(struct ib_srq *srq)
+static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata)
{
- struct mthca_ucontext *context;
-
- if (srq->uobject) {
- context = to_mucontext(srq->uobject->context);
+ if (udata) {
+ struct mthca_ucontext *context =
+ rdma_udata_to_drv_context(
+ udata,
+ struct mthca_ucontext,
+ ibucontext);
mthca_unmap_user_db(to_mdev(srq->device), &context->uar,
context->db_tab, to_msrq(srq)->db_index);
}
mthca_free_srq(to_mdev(srq->device), to_msrq(srq));
- kfree(srq);
-
return 0;
}
-static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static int mthca_create_qp(struct ib_qp *ibqp,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
+ struct mthca_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mthca_ucontext, ibucontext);
struct mthca_create_qp ucmd;
- struct mthca_qp *qp;
+ struct mthca_qp *qp = to_mqp(ibqp);
+ struct mthca_dev *dev = to_mdev(ibqp->device);
int err;
if (init_attr->create_flags)
- return ERR_PTR(-EINVAL);
+ return -EOPNOTSUPP;
switch (init_attr->qp_type) {
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
{
- struct mthca_ucontext *context;
-
- qp = kmalloc(sizeof *qp, GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
-
- if (pd->uobject) {
- context = to_mucontext(pd->uobject->context);
-
- if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
- kfree(qp);
- return ERR_PTR(-EFAULT);
- }
+ if (udata) {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+ return -EFAULT;
- err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ err = mthca_map_user_db(dev, &context->uar,
context->db_tab,
- ucmd.sq_db_index, ucmd.sq_db_page);
- if (err) {
- kfree(qp);
- return ERR_PTR(err);
- }
+ ucmd.sq_db_index,
+ ucmd.sq_db_page);
+ if (err)
+ return err;
- err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ err = mthca_map_user_db(dev, &context->uar,
context->db_tab,
- ucmd.rq_db_index, ucmd.rq_db_page);
+ ucmd.rq_db_index,
+ ucmd.rq_db_page);
if (err) {
- mthca_unmap_user_db(to_mdev(pd->device),
- &context->uar,
+ mthca_unmap_user_db(dev, &context->uar,
context->db_tab,
ucmd.sq_db_index);
- kfree(qp);
- return ERR_PTR(err);
+ return err;
}
qp->mr.ibmr.lkey = ucmd.lkey;
@@ -569,22 +497,16 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
qp->rq.db_index = ucmd.rq_db_index;
}
- err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
+ err = mthca_alloc_qp(dev, to_mpd(ibqp->pd),
to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq),
init_attr->qp_type, init_attr->sq_sig_type,
- &init_attr->cap, qp);
-
- if (err && pd->uobject) {
- context = to_mucontext(pd->uobject->context);
+ &init_attr->cap, qp, udata);
- mthca_unmap_user_db(to_mdev(pd->device),
- &context->uar,
- context->db_tab,
+ if (err && udata) {
+ mthca_unmap_user_db(dev, &context->uar, context->db_tab,
ucmd.sq_db_index);
- mthca_unmap_user_db(to_mdev(pd->device),
- &context->uar,
- context->db_tab,
+ mthca_unmap_user_db(dev, &context->uar, context->db_tab,
ucmd.rq_db_index);
}
@@ -594,32 +516,28 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
case IB_QPT_SMI:
case IB_QPT_GSI:
{
- /* Don't allow userspace to create special QPs */
- if (pd->uobject)
- return ERR_PTR(-EINVAL);
-
- qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
+ qp->sqp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL);
+ if (!qp->sqp)
+ return -ENOMEM;
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
- err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
+ err = mthca_alloc_sqp(dev, to_mpd(ibqp->pd),
to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq),
init_attr->sq_sig_type, &init_attr->cap,
- qp->ibqp.qp_num, init_attr->port_num,
- to_msqp(qp));
+ qp->ibqp.qp_num, init_attr->port_num, qp,
+ udata);
break;
}
default:
/* Don't support raw QPs */
- return ERR_PTR(-ENOSYS);
+ return -EOPNOTSUPP;
}
if (err) {
- kfree(qp);
- return ERR_PTR(err);
+ kfree(qp->sqp);
+ return err;
}
init_attr->cap.max_send_wr = qp->sq.max;
@@ -628,67 +546,72 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
init_attr->cap.max_recv_sge = qp->rq.max_gs;
init_attr->cap.max_inline_data = qp->max_inline_data;
- return &qp->ibqp;
+ return 0;
}
-static int mthca_destroy_qp(struct ib_qp *qp)
+static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
{
- if (qp->uobject) {
+ if (udata) {
+ struct mthca_ucontext *context =
+ rdma_udata_to_drv_context(
+ udata,
+ struct mthca_ucontext,
+ ibucontext);
+
mthca_unmap_user_db(to_mdev(qp->device),
- &to_mucontext(qp->uobject->context)->uar,
- to_mucontext(qp->uobject->context)->db_tab,
+ &context->uar,
+ context->db_tab,
to_mqp(qp)->sq.db_index);
mthca_unmap_user_db(to_mdev(qp->device),
- &to_mucontext(qp->uobject->context)->uar,
- to_mucontext(qp->uobject->context)->db_tab,
+ &context->uar,
+ context->db_tab,
to_mqp(qp)->rq.db_index);
}
mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
- kfree(qp);
+ kfree(to_mqp(qp)->sqp);
return 0;
}
-static struct ib_cq *mthca_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+static int mthca_create_cq(struct ib_cq *ibcq,
+ const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
struct mthca_create_cq ucmd;
struct mthca_cq *cq;
int nent;
int err;
+ struct mthca_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mthca_ucontext, ibucontext);
if (attr->flags)
- return ERR_PTR(-EINVAL);
+ return -EOPNOTSUPP;
if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
- if (context) {
- if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
- return ERR_PTR(-EFAULT);
+ if (udata) {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
+ return -EFAULT;
- err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
- to_mucontext(context)->db_tab,
- ucmd.set_db_index, ucmd.set_db_page);
+ err = mthca_map_user_db(to_mdev(ibdev), &context->uar,
+ context->db_tab, ucmd.set_db_index,
+ ucmd.set_db_page);
if (err)
- return ERR_PTR(err);
+ return err;
- err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
- to_mucontext(context)->db_tab,
- ucmd.arm_db_index, ucmd.arm_db_page);
+ err = mthca_map_user_db(to_mdev(ibdev), &context->uar,
+ context->db_tab, ucmd.arm_db_index,
+ ucmd.arm_db_page);
if (err)
goto err_unmap_set;
}
- cq = kmalloc(sizeof *cq, GFP_KERNEL);
- if (!cq) {
- err = -ENOMEM;
- goto err_unmap_arm;
- }
+ cq = to_mcq(ibcq);
- if (context) {
+ if (udata) {
cq->buf.mr.ibmr.lkey = ucmd.lkey;
cq->set_ci_db_index = ucmd.set_db_index;
cq->arm_db_index = ucmd.arm_db_index;
@@ -697,37 +620,33 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev,
for (nent = 1; nent <= entries; nent <<= 1)
; /* nothing */
- err = mthca_init_cq(to_mdev(ibdev), nent,
- context ? to_mucontext(context) : NULL,
- context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
+ err = mthca_init_cq(to_mdev(ibdev), nent, context,
+ udata ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
cq);
if (err)
- goto err_free;
+ goto err_unmap_arm;
- if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
+ if (udata && ib_copy_to_udata(udata, &cq->cqn, sizeof(__u32))) {
mthca_free_cq(to_mdev(ibdev), cq);
err = -EFAULT;
- goto err_free;
+ goto err_unmap_arm;
}
cq->resize_buf = NULL;
- return &cq->ibcq;
-
-err_free:
- kfree(cq);
+ return 0;
err_unmap_arm:
- if (context)
- mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
- to_mucontext(context)->db_tab, ucmd.arm_db_index);
+ if (udata)
+ mthca_unmap_user_db(to_mdev(ibdev), &context->uar,
+ context->db_tab, ucmd.arm_db_index);
err_unmap_set:
- if (context)
- mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
- to_mucontext(context)->db_tab, ucmd.set_db_index);
+ if (udata)
+ mthca_unmap_user_db(to_mdev(ibdev), &context->uar,
+ context->db_tab, ucmd.set_db_index);
- return ERR_PTR(err);
+ return err;
}
static int mthca_alloc_resize_buf(struct mthca_dev *dev, struct mthca_cq *cq,
@@ -851,21 +770,25 @@ out:
return ret;
}
-static int mthca_destroy_cq(struct ib_cq *cq)
+static int mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata)
{
- if (cq->uobject) {
+ if (udata) {
+ struct mthca_ucontext *context =
+ rdma_udata_to_drv_context(
+ udata,
+ struct mthca_ucontext,
+ ibucontext);
+
mthca_unmap_user_db(to_mdev(cq->device),
- &to_mucontext(cq->uobject->context)->uar,
- to_mucontext(cq->uobject->context)->db_tab,
+ &context->uar,
+ context->db_tab,
to_mcq(cq)->arm_db_index);
mthca_unmap_user_db(to_mdev(cq->device),
- &to_mucontext(cq->uobject->context)->uar,
- to_mucontext(cq->uobject->context)->db_tab,
+ &context->uar,
+ context->db_tab,
to_mcq(cq)->set_ci_db_index);
}
mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
- kfree(cq);
-
return 0;
}
@@ -902,25 +825,30 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
}
static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *udata)
+ u64 virt, int acc, struct ib_dmah *dmah,
+ struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(pd->device);
- struct scatterlist *sg;
+ struct ib_block_iter biter;
+ struct mthca_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mthca_ucontext, ibucontext);
struct mthca_mr *mr;
struct mthca_reg_mr ucmd;
u64 *pages;
- int shift, n, len;
- int i, k, entry;
+ int n, i;
int err = 0;
int write_mtt_size;
- if (udata->inlen - sizeof (struct ib_uverbs_cmd_hdr) < sizeof ucmd) {
- if (!to_mucontext(pd->uobject->context)->reg_mr_warned) {
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (udata->inlen < sizeof ucmd) {
+ if (!context->reg_mr_warned) {
mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n",
current->comm);
mthca_warn(dev, " Update libmthca to fix this.\n");
}
- ++to_mucontext(pd->uobject->context)->reg_mr_warned;
+ ++context->reg_mr_warned;
ucmd.mr_attrs = 0;
} else if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
return ERR_PTR(-EFAULT);
@@ -929,16 +857,13 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr)
return ERR_PTR(-ENOMEM);
- mr->umem = ib_umem_get(pd->uobject->context, start, length, acc,
- ucmd.mr_attrs & MTHCA_MR_DMASYNC);
-
+ mr->umem = ib_umem_get(pd->device, start, length, acc);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err;
}
- shift = ffs(mr->umem->page_size) - 1;
- n = mr->umem->nmap;
+ n = ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE);
mr->mtt = mthca_alloc_mtt(dev, n);
if (IS_ERR(mr->mtt)) {
@@ -956,22 +881,19 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
- for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
- len = sg_dma_len(sg) >> shift;
- for (k = 0; k < len; ++k) {
- pages[i++] = sg_dma_address(sg) +
- mr->umem->page_size * k;
- /*
- * Be friendly to write_mtt and pass it chunks
- * of appropriate size.
- */
- if (i == write_mtt_size) {
- err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
- if (err)
- goto mtt_done;
- n += i;
- i = 0;
- }
+ rdma_umem_for_each_dma_block(mr->umem, &biter, PAGE_SIZE) {
+ pages[i++] = rdma_block_iter_dma_address(&biter);
+
+ /*
+ * Be friendly to write_mtt and pass it chunks
+ * of appropriate size.
+ */
+ if (i == write_mtt_size) {
+ err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
+ if (err)
+ goto mtt_done;
+ n += i;
+ i = 0;
}
}
@@ -982,7 +904,7 @@ mtt_done:
if (err)
goto err_mtt;
- err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length,
+ err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, PAGE_SHIFT, virt, length,
convert_access(acc), mr);
if (err)
@@ -1001,131 +923,79 @@ err:
return ERR_PTR(err);
}
-static int mthca_dereg_mr(struct ib_mr *mr)
+static int mthca_dereg_mr(struct ib_mr *mr, struct ib_udata *udata)
{
struct mthca_mr *mmr = to_mmr(mr);
mthca_free_mr(to_mdev(mr->device), mmr);
- if (mmr->umem)
- ib_umem_release(mmr->umem);
+ ib_umem_release(mmr->umem);
kfree(mmr);
return 0;
}
-static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
- struct ib_fmr_attr *fmr_attr)
-{
- struct mthca_fmr *fmr;
- int err;
-
- fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
- if (!fmr)
- return ERR_PTR(-ENOMEM);
-
- memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr);
- err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num,
- convert_access(mr_access_flags), fmr);
-
- if (err) {
- kfree(fmr);
- return ERR_PTR(err);
- }
-
- return &fmr->ibmr;
-}
-
-static int mthca_dealloc_fmr(struct ib_fmr *fmr)
+static ssize_t hw_rev_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
- struct mthca_fmr *mfmr = to_mfmr(fmr);
- int err;
-
- err = mthca_free_fmr(to_mdev(fmr->device), mfmr);
- if (err)
- return err;
+ struct mthca_dev *dev =
+ rdma_device_to_drv_device(device, struct mthca_dev, ib_dev);
- kfree(mfmr);
- return 0;
+ return sysfs_emit(buf, "%x\n", dev->rev_id);
}
+static DEVICE_ATTR_RO(hw_rev);
-static int mthca_unmap_fmr(struct list_head *fmr_list)
+static const char *hca_type_string(int hca_type)
{
- struct ib_fmr *fmr;
- int err;
- struct mthca_dev *mdev = NULL;
-
- list_for_each_entry(fmr, fmr_list, list) {
- if (mdev && to_mdev(fmr->device) != mdev)
- return -EINVAL;
- mdev = to_mdev(fmr->device);
+ switch (hca_type) {
+ case PCI_DEVICE_ID_MELLANOX_TAVOR:
+ return "MT23108";
+ case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT:
+ return "MT25208 (MT23108 compat mode)";
+ case PCI_DEVICE_ID_MELLANOX_ARBEL:
+ return "MT25208";
+ case PCI_DEVICE_ID_MELLANOX_SINAI:
+ case PCI_DEVICE_ID_MELLANOX_SINAI_OLD:
+ return "MT25204";
}
- if (!mdev)
- return 0;
-
- if (mthca_is_memfree(mdev)) {
- list_for_each_entry(fmr, fmr_list, list)
- mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr));
-
- wmb();
- } else
- list_for_each_entry(fmr, fmr_list, list)
- mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr));
-
- err = mthca_SYNC_TPT(mdev);
- return err;
+ return "unknown";
}
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mthca_dev *dev =
- container_of(device, struct mthca_dev, ib_dev.dev);
- return sprintf(buf, "%x\n", dev->rev_id);
-}
+ rdma_device_to_drv_device(device, struct mthca_dev, ib_dev);
-static ssize_t show_hca(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct mthca_dev *dev =
- container_of(device, struct mthca_dev, ib_dev.dev);
- switch (dev->pdev->device) {
- case PCI_DEVICE_ID_MELLANOX_TAVOR:
- return sprintf(buf, "MT23108\n");
- case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT:
- return sprintf(buf, "MT25208 (MT23108 compat mode)\n");
- case PCI_DEVICE_ID_MELLANOX_ARBEL:
- return sprintf(buf, "MT25208\n");
- case PCI_DEVICE_ID_MELLANOX_SINAI:
- case PCI_DEVICE_ID_MELLANOX_SINAI_OLD:
- return sprintf(buf, "MT25204\n");
- default:
- return sprintf(buf, "unknown\n");
- }
+ return sysfs_emit(buf, "%s\n", hca_type_string(dev->pdev->device));
}
+static DEVICE_ATTR_RO(hca_type);
-static ssize_t show_board(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct mthca_dev *dev =
- container_of(device, struct mthca_dev, ib_dev.dev);
- return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
+ rdma_device_to_drv_device(device, struct mthca_dev, ib_dev);
+
+ return sysfs_emit(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
}
+static DEVICE_ATTR_RO(board_id);
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+static struct attribute *mthca_dev_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ &dev_attr_board_id.attr,
+ NULL
+};
-static struct device_attribute *mthca_dev_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id
+static const struct attribute_group mthca_attr_group = {
+ .attrs = mthca_dev_attributes,
};
static int mthca_init_node_data(struct mthca_dev *dev)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -1133,7 +1003,7 @@ static int mthca_init_node_data(struct mthca_dev *dev)
if (!in_mad || !out_mad)
goto out;
- init_query_mad(in_mad);
+ ib_init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
err = mthca_MAD_IFC(dev, 1, 1,
@@ -1160,155 +1030,147 @@ out:
return err;
}
-static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num,
+static int mthca_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
int err;
- err = mthca_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
-static void get_dev_fw_str(struct ib_device *device, char *str,
- size_t str_len)
+static void get_dev_fw_str(struct ib_device *device, char *str)
{
struct mthca_dev *dev =
container_of(device, struct mthca_dev, ib_dev);
- snprintf(str, str_len, "%d.%d.%d",
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d",
(int) (dev->fw_ver >> 32),
(int) (dev->fw_ver >> 16) & 0xffff,
(int) dev->fw_ver & 0xffff);
}
+static const struct ib_device_ops mthca_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_MTHCA,
+ .uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION,
+ .uverbs_no_driver_id_binding = 1,
+
+ .alloc_pd = mthca_alloc_pd,
+ .alloc_ucontext = mthca_alloc_ucontext,
+ .attach_mcast = mthca_multicast_attach,
+ .create_ah = mthca_ah_create,
+ .create_cq = mthca_create_cq,
+ .create_qp = mthca_create_qp,
+ .dealloc_pd = mthca_dealloc_pd,
+ .dealloc_ucontext = mthca_dealloc_ucontext,
+ .dereg_mr = mthca_dereg_mr,
+ .destroy_ah = mthca_ah_destroy,
+ .destroy_cq = mthca_destroy_cq,
+ .destroy_qp = mthca_destroy_qp,
+ .detach_mcast = mthca_multicast_detach,
+ .device_group = &mthca_attr_group,
+ .get_dev_fw_str = get_dev_fw_str,
+ .get_dma_mr = mthca_get_dma_mr,
+ .get_port_immutable = mthca_port_immutable,
+ .mmap = mthca_mmap_uar,
+ .modify_device = mthca_modify_device,
+ .modify_port = mthca_modify_port,
+ .modify_qp = mthca_modify_qp,
+ .poll_cq = mthca_poll_cq,
+ .process_mad = mthca_process_mad,
+ .query_ah = mthca_ah_query,
+ .query_device = mthca_query_device,
+ .query_gid = mthca_query_gid,
+ .query_pkey = mthca_query_pkey,
+ .query_port = mthca_query_port,
+ .query_qp = mthca_query_qp,
+ .reg_user_mr = mthca_reg_user_mr,
+ .resize_cq = mthca_resize_cq,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, mthca_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, mthca_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, mthca_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, mthca_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, mthca_ucontext, ibucontext),
+};
+
+static const struct ib_device_ops mthca_dev_arbel_srq_ops = {
+ .create_srq = mthca_create_srq,
+ .destroy_srq = mthca_destroy_srq,
+ .modify_srq = mthca_modify_srq,
+ .post_srq_recv = mthca_arbel_post_srq_recv,
+ .query_srq = mthca_query_srq,
+
+ INIT_RDMA_OBJ_SIZE(ib_srq, mthca_srq, ibsrq),
+};
+
+static const struct ib_device_ops mthca_dev_tavor_srq_ops = {
+ .create_srq = mthca_create_srq,
+ .destroy_srq = mthca_destroy_srq,
+ .modify_srq = mthca_modify_srq,
+ .post_srq_recv = mthca_tavor_post_srq_recv,
+ .query_srq = mthca_query_srq,
+
+ INIT_RDMA_OBJ_SIZE(ib_srq, mthca_srq, ibsrq),
+};
+
+static const struct ib_device_ops mthca_dev_arbel_ops = {
+ .post_recv = mthca_arbel_post_receive,
+ .post_send = mthca_arbel_post_send,
+ .req_notify_cq = mthca_arbel_arm_cq,
+};
+
+static const struct ib_device_ops mthca_dev_tavor_ops = {
+ .post_recv = mthca_tavor_post_receive,
+ .post_send = mthca_tavor_post_send,
+ .req_notify_cq = mthca_tavor_arm_cq,
+};
+
int mthca_register_device(struct mthca_dev *dev)
{
int ret;
- int i;
ret = mthca_init_node_data(dev);
if (ret)
return ret;
- strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
- dev->ib_dev.owner = THIS_MODULE;
-
- dev->ib_dev.uverbs_abi_ver = MTHCA_UVERBS_ABI_VERSION;
- dev->ib_dev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
- (1ull << IB_USER_VERBS_CMD_DETACH_MCAST);
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
dev->ib_dev.num_comp_vectors = 1;
- dev->ib_dev.dma_device = &dev->pdev->dev;
- dev->ib_dev.query_device = mthca_query_device;
- dev->ib_dev.query_port = mthca_query_port;
- dev->ib_dev.modify_device = mthca_modify_device;
- dev->ib_dev.modify_port = mthca_modify_port;
- dev->ib_dev.query_pkey = mthca_query_pkey;
- dev->ib_dev.query_gid = mthca_query_gid;
- dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext;
- dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext;
- dev->ib_dev.mmap = mthca_mmap_uar;
- dev->ib_dev.alloc_pd = mthca_alloc_pd;
- dev->ib_dev.dealloc_pd = mthca_dealloc_pd;
- dev->ib_dev.create_ah = mthca_ah_create;
- dev->ib_dev.query_ah = mthca_ah_query;
- dev->ib_dev.destroy_ah = mthca_ah_destroy;
+ dev->ib_dev.dev.parent = &dev->pdev->dev;
if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
- dev->ib_dev.create_srq = mthca_create_srq;
- dev->ib_dev.modify_srq = mthca_modify_srq;
- dev->ib_dev.query_srq = mthca_query_srq;
- dev->ib_dev.destroy_srq = mthca_destroy_srq;
- dev->ib_dev.uverbs_cmd_mask |=
- (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
-
if (mthca_is_memfree(dev))
- dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv;
+ ib_set_device_ops(&dev->ib_dev,
+ &mthca_dev_arbel_srq_ops);
else
- dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv;
+ ib_set_device_ops(&dev->ib_dev,
+ &mthca_dev_tavor_srq_ops);
}
- dev->ib_dev.create_qp = mthca_create_qp;
- dev->ib_dev.modify_qp = mthca_modify_qp;
- dev->ib_dev.query_qp = mthca_query_qp;
- dev->ib_dev.destroy_qp = mthca_destroy_qp;
- dev->ib_dev.create_cq = mthca_create_cq;
- dev->ib_dev.resize_cq = mthca_resize_cq;
- dev->ib_dev.destroy_cq = mthca_destroy_cq;
- dev->ib_dev.poll_cq = mthca_poll_cq;
- dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
- dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
- dev->ib_dev.dereg_mr = mthca_dereg_mr;
- dev->ib_dev.get_port_immutable = mthca_port_immutable;
- dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
-
- if (dev->mthca_flags & MTHCA_FLAG_FMR) {
- dev->ib_dev.alloc_fmr = mthca_alloc_fmr;
- dev->ib_dev.unmap_fmr = mthca_unmap_fmr;
- dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr;
- if (mthca_is_memfree(dev))
- dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr;
- else
- dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr;
- }
-
- dev->ib_dev.attach_mcast = mthca_multicast_attach;
- dev->ib_dev.detach_mcast = mthca_multicast_detach;
- dev->ib_dev.process_mad = mthca_process_mad;
+ ib_set_device_ops(&dev->ib_dev, &mthca_dev_ops);
- if (mthca_is_memfree(dev)) {
- dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
- dev->ib_dev.post_send = mthca_arbel_post_send;
- dev->ib_dev.post_recv = mthca_arbel_post_receive;
- } else {
- dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq;
- dev->ib_dev.post_send = mthca_tavor_post_send;
- dev->ib_dev.post_recv = mthca_tavor_post_receive;
- }
+ if (mthca_is_memfree(dev))
+ ib_set_device_ops(&dev->ib_dev, &mthca_dev_arbel_ops);
+ else
+ ib_set_device_ops(&dev->ib_dev, &mthca_dev_tavor_ops);
mutex_init(&dev->cap_mask_mutex);
- ret = ib_register_device(&dev->ib_dev, NULL);
+ ret = ib_register_device(&dev->ib_dev, "mthca%d", &dev->pdev->dev);
if (ret)
return ret;
- for (i = 0; i < ARRAY_SIZE(mthca_dev_attributes); ++i) {
- ret = device_create_file(&dev->ib_dev.dev,
- mthca_dev_attributes[i]);
- if (ret) {
- ib_unregister_device(&dev->ib_dev);
- return ret;
- }
- }
-
mthca_start_catas_poll(dev);
return 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 596acc45569b..8a77483bb33c 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -76,24 +76,6 @@ struct mthca_mr {
struct mthca_mtt *mtt;
};
-struct mthca_fmr {
- struct ib_fmr ibmr;
- struct ib_fmr_attr attr;
- struct mthca_mtt *mtt;
- int maps;
- union {
- struct {
- struct mthca_mpt_entry __iomem *mpt;
- u64 __iomem *mtts;
- } tavor;
- struct {
- struct mthca_mpt_entry *mpt;
- __be64 *mtts;
- dma_addr_t dma_handle;
- } arbel;
- } mem;
-};
-
struct mthca_pd {
struct ib_pd ibpd;
u32 pd_num;
@@ -258,6 +240,16 @@ struct mthca_wq {
__be32 *db;
};
+struct mthca_sqp {
+ int pkey_index;
+ u32 qkey;
+ u32 send_psn;
+ struct ib_ud_header ud_header;
+ int header_buf_size;
+ void *header_buf;
+ dma_addr_t header_dma;
+};
+
struct mthca_qp {
struct ib_qp ibqp;
int refcount;
@@ -283,17 +275,7 @@ struct mthca_qp {
wait_queue_head_t wait;
struct mutex mutex;
-};
-
-struct mthca_sqp {
- struct mthca_qp qp;
- int pkey_index;
- u32 qkey;
- u32 send_psn;
- struct ib_ud_header ud_header;
- int header_buf_size;
- void *header_buf;
- dma_addr_t header_dma;
+ struct mthca_sqp *sqp;
};
static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -301,11 +283,6 @@ static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext
return container_of(ibucontext, struct mthca_ucontext, ibucontext);
}
-static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr)
-{
- return container_of(ibmr, struct mthca_fmr, ibmr);
-}
-
static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr)
{
return container_of(ibmr, struct mthca_mr, ibmr);
@@ -336,9 +313,4 @@ static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp)
return container_of(ibqp, struct mthca_qp, ibqp);
}
-static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp)
-{
- return container_of(qp, struct mthca_sqp, qp);
-}
-
#endif /* MTHCA_PROVIDER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 96e5fb91fb48..53f43649f7d0 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -42,6 +42,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_pack.h>
+#include <rdma/uverbs_ioctl.h>
#include "mthca_dev.h"
#include "mthca_cmd.h"
@@ -114,7 +115,7 @@ struct mthca_qp_path {
u8 hop_limit;
__be32 sl_tclass_flowlabel;
u8 rgid[16];
-} __attribute__((packed));
+} __packed;
struct mthca_qp_context {
__be32 flags;
@@ -153,14 +154,14 @@ struct mthca_qp_context {
__be16 rq_wqe_counter; /* reserved on Tavor */
__be16 sq_wqe_counter; /* reserved on Tavor */
u32 reserved3[18];
-} __attribute__((packed));
+} __packed;
struct mthca_qp_param {
__be32 opt_param_mask;
u32 reserved1;
struct mthca_qp_context context;
u32 reserved2[62];
-} __attribute__((packed));
+} __packed;
enum {
MTHCA_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
@@ -393,31 +394,36 @@ static int to_ib_qp_access_flags(int mthca_flags)
return ib_flags;
}
-static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
- struct mthca_qp_path *path)
+static void to_rdma_ah_attr(struct mthca_dev *dev,
+ struct rdma_ah_attr *ah_attr,
+ struct mthca_qp_path *path)
{
- memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
- ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
+ u8 port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
- if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports)
- return;
+ memset(ah_attr, 0, sizeof(*ah_attr));
- ib_ah_attr->dlid = be16_to_cpu(path->rlid);
- ib_ah_attr->sl = be32_to_cpu(path->sl_tclass_flowlabel) >> 28;
- ib_ah_attr->src_path_bits = path->g_mylmc & 0x7f;
- ib_ah_attr->static_rate = mthca_rate_to_ib(dev,
- path->static_rate & 0xf,
- ib_ah_attr->port_num);
- ib_ah_attr->ah_flags = (path->g_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
- if (ib_ah_attr->ah_flags) {
- ib_ah_attr->grh.sgid_index = path->mgid_index & (dev->limits.gid_table_len - 1);
- ib_ah_attr->grh.hop_limit = path->hop_limit;
- ib_ah_attr->grh.traffic_class =
- (be32_to_cpu(path->sl_tclass_flowlabel) >> 20) & 0xff;
- ib_ah_attr->grh.flow_label =
- be32_to_cpu(path->sl_tclass_flowlabel) & 0xfffff;
- memcpy(ib_ah_attr->grh.dgid.raw,
- path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
+ if (port_num == 0 || port_num > dev->limits.num_ports)
+ return;
+ ah_attr->type = rdma_ah_find_type(&dev->ib_dev, port_num);
+ rdma_ah_set_port_num(ah_attr, port_num);
+
+ rdma_ah_set_dlid(ah_attr, be16_to_cpu(path->rlid));
+ rdma_ah_set_sl(ah_attr, be32_to_cpu(path->sl_tclass_flowlabel) >> 28);
+ rdma_ah_set_path_bits(ah_attr, path->g_mylmc & 0x7f);
+ rdma_ah_set_static_rate(ah_attr,
+ mthca_rate_to_ib(dev,
+ path->static_rate & 0xf,
+ port_num));
+ if (path->g_mylmc & (1 << 7)) {
+ u32 tc_fl = be32_to_cpu(path->sl_tclass_flowlabel);
+
+ rdma_ah_set_grh(ah_attr, NULL,
+ tc_fl & 0xfffff,
+ path->mgid_index &
+ (dev->limits.gid_table_len - 1),
+ path->hop_limit,
+ (tc_fl >> 20) & 0xff);
+ rdma_ah_set_dgid_raw(ah_attr, path->rgid);
}
}
@@ -468,11 +474,12 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
to_ib_qp_access_flags(be32_to_cpu(context->params2));
if (qp->transport == RC || qp->transport == UC) {
- to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
- to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
+ to_rdma_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
+ to_rdma_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
qp_attr->alt_pkey_index =
be32_to_cpu(context->alt_path.port_pkey) & 0x7f;
- qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
+ qp_attr->alt_port_num =
+ rdma_ah_get_port_num(&qp_attr->alt_ah_attr);
}
qp_attr->pkey_index = be32_to_cpu(context->pri_path.port_pkey) & 0x7f;
@@ -512,40 +519,50 @@ out:
return err;
}
-static int mthca_path_set(struct mthca_dev *dev, const struct ib_ah_attr *ah,
+static int mthca_path_set(struct mthca_dev *dev, const struct rdma_ah_attr *ah,
struct mthca_qp_path *path, u8 port)
{
- path->g_mylmc = ah->src_path_bits & 0x7f;
- path->rlid = cpu_to_be16(ah->dlid);
- path->static_rate = mthca_get_rate(dev, ah->static_rate, port);
+ path->g_mylmc = rdma_ah_get_path_bits(ah) & 0x7f;
+ path->rlid = cpu_to_be16(rdma_ah_get_dlid(ah));
+ path->static_rate = mthca_get_rate(dev, rdma_ah_get_static_rate(ah),
+ port);
+
+ if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) {
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah);
- if (ah->ah_flags & IB_AH_GRH) {
- if (ah->grh.sgid_index >= dev->limits.gid_table_len) {
+ if (grh->sgid_index >= dev->limits.gid_table_len) {
mthca_dbg(dev, "sgid_index (%u) too large. max is %d\n",
- ah->grh.sgid_index, dev->limits.gid_table_len-1);
+ grh->sgid_index,
+ dev->limits.gid_table_len - 1);
return -1;
}
path->g_mylmc |= 1 << 7;
- path->mgid_index = ah->grh.sgid_index;
- path->hop_limit = ah->grh.hop_limit;
+ path->mgid_index = grh->sgid_index;
+ path->hop_limit = grh->hop_limit;
path->sl_tclass_flowlabel =
- cpu_to_be32((ah->sl << 28) |
- (ah->grh.traffic_class << 20) |
- (ah->grh.flow_label));
- memcpy(path->rgid, ah->grh.dgid.raw, 16);
- } else
- path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28);
+ cpu_to_be32((rdma_ah_get_sl(ah) << 28) |
+ (grh->traffic_class << 20) |
+ (grh->flow_label));
+ memcpy(path->rgid, grh->dgid.raw, 16);
+ } else {
+ path->sl_tclass_flowlabel = cpu_to_be32(rdma_ah_get_sl(ah) <<
+ 28);
+ }
return 0;
}
static int __mthca_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
- enum ib_qp_state cur_state, enum ib_qp_state new_state)
+ enum ib_qp_state cur_state,
+ enum ib_qp_state new_state,
+ struct ib_udata *udata)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
+ struct mthca_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct mthca_ucontext, ibucontext);
struct mthca_mailbox *mailbox;
struct mthca_qp_param *qp_param;
struct mthca_qp_context *qp_context;
@@ -607,8 +624,7 @@ static int __mthca_modify_qp(struct ib_qp *ibqp,
/* leave arbel_sched_queue as 0 */
if (qp->ibqp.uobject)
- qp_context->usr_page =
- cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index);
+ qp_context->usr_page = cpu_to_be32(context->uar.index);
else
qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
qp_context->local_qpn = cpu_to_be32(qp->qpn);
@@ -680,7 +696,7 @@ static int __mthca_modify_qp(struct ib_qp *ibqp,
}
if (mthca_path_set(dev, &attr->alt_ah_attr, &qp_context->alt_path,
- attr->alt_ah_attr.port_num))
+ rdma_ah_get_port_num(&attr->alt_ah_attr)))
goto out_mailbox;
qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index |
@@ -793,7 +809,7 @@ static int __mthca_modify_qp(struct ib_qp *ibqp,
qp->alt_port = attr->alt_port_num;
if (is_sqp(dev, qp))
- store_attrs(to_msqp(qp), attr, attr_mask);
+ store_attrs(qp->sqp, attr, attr_mask);
/*
* If we moved QP0 to RTR, bring the IB link up; if we moved
@@ -847,6 +863,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
mutex_lock(&qp->mutex);
if (attr_mask & IB_QP_CUR_STATE) {
cur_state = attr->cur_qp_state;
@@ -860,8 +879,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
- if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_UNSPECIFIED)) {
+ if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+ attr_mask)) {
mthca_dbg(dev, "Bad QP transition (transport %d) "
"%d->%d with attr 0x%08x\n",
qp->transport, cur_state, new_state,
@@ -901,7 +920,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
goto out;
}
- err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+ err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state,
+ udata);
out:
mutex_unlock(&qp->mutex);
@@ -969,7 +989,8 @@ static void mthca_adjust_qp_caps(struct mthca_dev *dev,
*/
static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
struct mthca_pd *pd,
- struct mthca_qp *qp)
+ struct mthca_qp *qp,
+ struct ib_udata *udata)
{
int size;
int err = -ENOMEM;
@@ -1036,14 +1057,14 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
* allocate anything. All we need is to calculate the WQE
* sizes and the send_wqe_offset, so we're done now.
*/
- if (pd->ibpd.uobject)
+ if (udata)
return 0;
size = PAGE_ALIGN(qp->send_wqe_offset +
(qp->sq.max << qp->sq.wqe_shift));
- qp->wrid = kmalloc((qp->rq.max + qp->sq.max) * sizeof (u64),
- GFP_KERNEL);
+ qp->wrid = kmalloc_array(qp->rq.max + qp->sq.max, sizeof(u64),
+ GFP_KERNEL);
if (!qp->wrid)
goto err_out;
@@ -1143,7 +1164,8 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
struct mthca_cq *send_cq,
struct mthca_cq *recv_cq,
enum ib_sig_type send_policy,
- struct mthca_qp *qp)
+ struct mthca_qp *qp,
+ struct ib_udata *udata)
{
int ret;
int i;
@@ -1166,7 +1188,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
if (ret)
return ret;
- ret = mthca_alloc_wqe_buf(dev, pd, qp);
+ ret = mthca_alloc_wqe_buf(dev, pd, qp, udata);
if (ret) {
mthca_unmap_memfree(dev, qp);
return ret;
@@ -1179,7 +1201,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
* will be allocated and buffers will be initialized in
* userspace.
*/
- if (pd->ibpd.uobject)
+ if (udata)
return 0;
ret = mthca_alloc_memfree(dev, qp);
@@ -1273,7 +1295,8 @@ int mthca_alloc_qp(struct mthca_dev *dev,
enum ib_qp_type type,
enum ib_sig_type send_policy,
struct ib_qp_cap *cap,
- struct mthca_qp *qp)
+ struct mthca_qp *qp,
+ struct ib_udata *udata)
{
int err;
@@ -1296,7 +1319,7 @@ int mthca_alloc_qp(struct mthca_dev *dev,
qp->port = 0;
err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
- send_policy, qp);
+ send_policy, qp, udata);
if (err) {
mthca_free(&dev->qp_table.alloc, qp->qpn);
return err;
@@ -1347,39 +1370,41 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
enum ib_sig_type send_policy,
struct ib_qp_cap *cap,
int qpn,
- int port,
- struct mthca_sqp *sqp)
+ u32 port,
+ struct mthca_qp *qp,
+ struct ib_udata *udata)
{
u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
int err;
- sqp->qp.transport = MLX;
- err = mthca_set_qp_size(dev, cap, pd, &sqp->qp);
+ qp->transport = MLX;
+ err = mthca_set_qp_size(dev, cap, pd, qp);
if (err)
return err;
- sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE;
- sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size,
- &sqp->header_dma, GFP_KERNEL);
- if (!sqp->header_buf)
+ qp->sqp->header_buf_size = qp->sq.max * MTHCA_UD_HEADER_SIZE;
+ qp->sqp->header_buf =
+ dma_alloc_coherent(&dev->pdev->dev, qp->sqp->header_buf_size,
+ &qp->sqp->header_dma, GFP_KERNEL);
+ if (!qp->sqp->header_buf)
return -ENOMEM;
spin_lock_irq(&dev->qp_table.lock);
if (mthca_array_get(&dev->qp_table.qp, mqpn))
err = -EBUSY;
else
- mthca_array_set(&dev->qp_table.qp, mqpn, sqp);
+ mthca_array_set(&dev->qp_table.qp, mqpn, qp);
spin_unlock_irq(&dev->qp_table.lock);
if (err)
goto err_out;
- sqp->qp.port = port;
- sqp->qp.qpn = mqpn;
- sqp->qp.transport = MLX;
+ qp->port = port;
+ qp->qpn = mqpn;
+ qp->transport = MLX;
err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
- send_policy, &sqp->qp);
+ send_policy, qp, udata);
if (err)
goto err_out_free;
@@ -1400,10 +1425,9 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
mthca_unlock_cqs(send_cq, recv_cq);
- err_out:
- dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,
- sqp->header_buf, sqp->header_dma);
-
+err_out:
+ dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size,
+ qp->sqp->header_buf, qp->sqp->header_dma);
return err;
}
@@ -1466,20 +1490,19 @@ void mthca_free_qp(struct mthca_dev *dev,
if (is_sqp(dev, qp)) {
atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
- dma_free_coherent(&dev->pdev->dev,
- to_msqp(qp)->header_buf_size,
- to_msqp(qp)->header_buf,
- to_msqp(qp)->header_dma);
+ dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size,
+ qp->sqp->header_buf, qp->sqp->header_dma);
} else
mthca_free(&dev->qp_table.alloc, qp->qpn);
}
/* Create UD header for an MLX send and build a data segment for it */
-static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
- int ind, struct ib_ud_wr *wr,
+static int build_mlx_header(struct mthca_dev *dev, struct mthca_qp *qp, int ind,
+ const struct ib_ud_wr *wr,
struct mthca_mlx_seg *mlx,
struct mthca_data_seg *data)
{
+ struct mthca_sqp *sqp = qp->sqp;
int header_size;
int err;
u16 pkey;
@@ -1492,7 +1515,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
if (err)
return err;
mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
- mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
+ mlx->flags |= cpu_to_be32((!qp->ibqp.qp_num ? MTHCA_MLX_VL15 : 0) |
(sqp->ud_header.lrh.destination_lid ==
IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) |
(sqp->ud_header.lrh.service_level << 8));
@@ -1513,29 +1536,29 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
return -EINVAL;
}
- sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
+ sqp->ud_header.lrh.virtual_lane = !qp->ibqp.qp_num ? 15 : 0;
if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
- if (!sqp->qp.ibqp.qp_num)
- ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
- sqp->pkey_index, &pkey);
+ if (!qp->ibqp.qp_num)
+ ib_get_cached_pkey(&dev->ib_dev, qp->port, sqp->pkey_index,
+ &pkey);
else
- ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
- wr->pkey_index, &pkey);
+ ib_get_cached_pkey(&dev->ib_dev, qp->port, wr->pkey_index,
+ &pkey);
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
sqp->qkey : wr->remote_qkey);
- sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
+ sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num);
header_size = ib_ud_header_pack(&sqp->ud_header,
sqp->header_buf +
ind * MTHCA_UD_HEADER_SIZE);
data->byte_count = cpu_to_be32(header_size);
- data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey);
+ data->lkey = cpu_to_be32(to_mpd(qp->ibqp.pd)->ntmr.ibmr.lkey);
data->addr = cpu_to_be64(sqp->header_dma +
ind * MTHCA_UD_HEADER_SIZE);
@@ -1569,7 +1592,7 @@ static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,
}
static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
- struct ib_atomic_wr *wr)
+ const struct ib_atomic_wr *wr)
{
if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
aseg->swap_add = cpu_to_be64(wr->swap);
@@ -1582,7 +1605,7 @@ static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
}
static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
- struct ib_ud_wr *wr)
+ const struct ib_ud_wr *wr)
{
useg->lkey = cpu_to_be32(to_mah(wr->ah)->key);
useg->av_addr = cpu_to_be64(to_mah(wr->ah)->avdma);
@@ -1592,15 +1615,15 @@ static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
}
static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
- struct ib_ud_wr *wr)
+ const struct ib_ud_wr *wr)
{
memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE);
useg->dqpn = cpu_to_be32(wr->remote_qpn);
useg->qkey = cpu_to_be32(wr->remote_qkey);
}
-int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
+int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
@@ -1618,8 +1641,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
* without initializing f0 and size0, and they are in fact
* never used uninitialized.
*/
- int uninitialized_var(size0);
- u32 uninitialized_var(f0);
+ int size0;
+ u32 f0;
int ind;
u8 op0 = 0;
@@ -1714,9 +1737,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case MLX:
- err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
- wqe - sizeof (struct mthca_next_seg),
- wqe);
+ err = build_mlx_header(
+ dev, qp, ind, ud_wr(wr),
+ wqe - sizeof(struct mthca_next_seg), wqe);
if (err) {
*bad_wr = wr;
goto out;
@@ -1788,11 +1811,6 @@ out:
(qp->qpn << 8) | size0,
dev->kar + MTHCA_SEND_DOORBELL,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
- /*
- * Make sure doorbells don't leak out of SQ spinlock
- * and reach the HCA out of order:
- */
- mmiowb();
}
qp->sq.next_ind = ind;
@@ -1802,8 +1820,8 @@ out:
return err;
}
-int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
@@ -1819,7 +1837,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
* without initializing size0, and it is in fact never used
* uninitialized.
*/
- int uninitialized_var(size0);
+ int size0;
int ind;
void *wqe;
void *prev_wqe;
@@ -1903,18 +1921,12 @@ out:
qp->rq.next_ind = ind;
qp->rq.head += nreq;
- /*
- * Make sure doorbells don't leak out of RQ spinlock and reach
- * the HCA out of order:
- */
- mmiowb();
-
spin_unlock_irqrestore(&qp->rq.lock, flags);
return err;
}
-int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
+int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
@@ -1933,8 +1945,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
* without initializing f0 and size0, and they are in fact
* never used uninitialized.
*/
- int uninitialized_var(size0);
- u32 uninitialized_var(f0);
+ int size0;
+ u32 f0;
int ind;
u8 op0 = 0;
@@ -2055,9 +2067,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case MLX:
- err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
- wqe - sizeof (struct mthca_next_seg),
- wqe);
+ err = build_mlx_header(
+ dev, qp, ind, ud_wr(wr),
+ wqe - sizeof(struct mthca_next_seg), wqe);
if (err) {
*bad_wr = wr;
goto out;
@@ -2143,18 +2155,12 @@ out:
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
- /*
- * Make sure doorbells don't leak out of SQ spinlock and reach
- * the HCA out of order:
- */
- mmiowb();
-
spin_unlock_irqrestore(&qp->sq.lock, flags);
return err;
}
-int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+int mthca_arbel_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
{
struct mthca_dev *dev = to_mdev(ibqp->device);
struct mthca_qp *qp = to_mqp(ibqp);
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index d22f970480c0..a85935ccce88 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -36,6 +36,8 @@
#include <asm/io.h>
+#include <rdma/uverbs_ioctl.h>
+
#include "mthca_dev.h"
#include "mthca_cmd.h"
#include "mthca_memfree.h"
@@ -95,17 +97,20 @@ static inline int *wqe_to_link(void *wqe)
static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_srq *srq,
- struct mthca_tavor_srq_context *context)
+ struct mthca_tavor_srq_context *context,
+ struct ib_udata *udata)
{
+ struct mthca_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mthca_ucontext, ibucontext);
+
memset(context, 0, sizeof *context);
context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4));
context->state_pd = cpu_to_be32(pd->pd_num);
context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
- if (pd->ibpd.uobject)
- context->uar =
- cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
+ if (udata)
+ context->uar = cpu_to_be32(ucontext->uar.index);
else
context->uar = cpu_to_be32(dev->driver_uar.index);
}
@@ -113,8 +118,11 @@ static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_srq *srq,
- struct mthca_arbel_srq_context *context)
+ struct mthca_arbel_srq_context *context,
+ struct ib_udata *udata)
{
+ struct mthca_ucontext *ucontext = rdma_udata_to_drv_context(
+ udata, struct mthca_ucontext, ibucontext);
int logsize, max;
memset(context, 0, sizeof *context);
@@ -129,9 +137,8 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
context->db_index = cpu_to_be32(srq->db_index);
context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29);
- if (pd->ibpd.uobject)
- context->logstride_usrpage |=
- cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
+ if (udata)
+ context->logstride_usrpage |= cpu_to_be32(ucontext->uar.index);
else
context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index);
context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num);
@@ -145,17 +152,17 @@ static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq)
}
static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd,
- struct mthca_srq *srq)
+ struct mthca_srq *srq, struct ib_udata *udata)
{
struct mthca_data_seg *scatter;
void *wqe;
int err;
int i;
- if (pd->ibpd.uobject)
+ if (udata)
return 0;
- srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL);
+ srq->wrid = kmalloc_array(srq->max, sizeof(u64), GFP_KERNEL);
if (!srq->wrid)
return -ENOMEM;
@@ -197,7 +204,8 @@ static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd,
}
int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
- struct ib_srq_attr *attr, struct mthca_srq *srq)
+ struct ib_srq_attr *attr, struct mthca_srq *srq,
+ struct ib_udata *udata)
{
struct mthca_mailbox *mailbox;
int ds;
@@ -235,7 +243,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
if (err)
goto err_out;
- if (!pd->ibpd.uobject) {
+ if (!udata) {
srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ,
srq->srqn, &srq->db);
if (srq->db_index < 0) {
@@ -251,7 +259,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
goto err_out_db;
}
- err = mthca_alloc_srq_buf(dev, pd, srq);
+ err = mthca_alloc_srq_buf(dev, pd, srq, udata);
if (err)
goto err_out_mailbox;
@@ -261,9 +269,9 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
mutex_init(&srq->mutex);
if (mthca_is_memfree(dev))
- mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf);
+ mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf, udata);
else
- mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf);
+ mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf, udata);
err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn);
@@ -297,14 +305,14 @@ err_out_free_srq:
mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
err_out_free_buf:
- if (!pd->ibpd.uobject)
+ if (!udata)
mthca_free_srq_buf(dev, srq);
err_out_mailbox:
mthca_free_mailbox(dev, mailbox);
err_out_db:
- if (!pd->ibpd.uobject && mthca_is_memfree(dev))
+ if (!udata && mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
err_out_icm:
@@ -472,8 +480,8 @@ void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr)
spin_unlock(&srq->lock);
}
-int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
{
struct mthca_dev *dev = to_mdev(ibsrq->device);
struct mthca_srq *srq = to_msrq(ibsrq);
@@ -562,18 +570,12 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
}
- /*
- * Make sure doorbells don't leak out of SRQ spinlock and
- * reach the HCA out of order:
- */
- mmiowb();
-
spin_unlock_irqrestore(&srq->lock, flags);
return err;
}
-int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
{
struct mthca_dev *dev = to_mdev(ibsrq->device);
struct mthca_srq *srq = to_msrq(ibsrq);
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
deleted file mode 100644
index 5fe56e810739..000000000000
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications. All rights reserved.
- * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef MTHCA_USER_H
-#define MTHCA_USER_H
-
-#include <linux/types.h>
-
-/*
- * Increment this value if any changes that break userspace ABI
- * compatibility are made.
- */
-#define MTHCA_UVERBS_ABI_VERSION 1
-
-/*
- * Make sure that all structs defined in this file remain laid out so
- * that they pack the same way on 32-bit and 64-bit architectures (to
- * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
- */
-
-struct mthca_alloc_ucontext_resp {
- __u32 qp_tab_size;
- __u32 uarc_size;
-};
-
-struct mthca_alloc_pd_resp {
- __u32 pdn;
- __u32 reserved;
-};
-
-struct mthca_reg_mr {
-/*
- * Mark the memory region with a DMA attribute that causes
- * in-flight DMA to be flushed when the region is written to:
- */
-#define MTHCA_MR_DMASYNC 0x1
- __u32 mr_attrs;
- __u32 reserved;
-};
-
-struct mthca_create_cq {
- __u32 lkey;
- __u32 pdn;
- __u64 arm_db_page;
- __u64 set_db_page;
- __u32 arm_db_index;
- __u32 set_db_index;
-};
-
-struct mthca_create_cq_resp {
- __u32 cqn;
- __u32 reserved;
-};
-
-struct mthca_resize_cq {
- __u32 lkey;
- __u32 reserved;
-};
-
-struct mthca_create_srq {
- __u32 lkey;
- __u32 db_index;
- __u64 db_page;
-};
-
-struct mthca_create_srq_resp {
- __u32 srqn;
- __u32 reserved;
-};
-
-struct mthca_create_qp {
- __u32 lkey;
- __u32 reserved;
- __u64 sq_db_page;
- __u64 rq_db_page;
- __u32 sq_db_index;
- __u32 rq_db_index;
-};
-
-#endif /* MTHCA_USER_H */
diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig
deleted file mode 100644
index 7964eba8e7ed..000000000000
--- a/drivers/infiniband/hw/nes/Kconfig
+++ /dev/null
@@ -1,15 +0,0 @@
-config INFINIBAND_NES
- tristate "NetEffect RNIC Driver"
- depends on PCI && INET && INFINIBAND
- select LIBCRC32C
- ---help---
- This is the RDMA Network Interface Card (RNIC) driver for
- NetEffect Ethernet Cluster Server Adapters.
-
-config INFINIBAND_NES_DEBUG
- bool "Verbose debugging output"
- depends on INFINIBAND_NES
- default n
- ---help---
- This option enables debug messages from the NetEffect RNIC
- driver. Select this if you are diagnosing a problem.
diff --git a/drivers/infiniband/hw/nes/Makefile b/drivers/infiniband/hw/nes/Makefile
deleted file mode 100644
index 97820c23ecef..000000000000
--- a/drivers/infiniband/hw/nes/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_INFINIBAND_NES) += iw_nes.o
-
-iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o nes_mgt.o
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
deleted file mode 100644
index 5b9601014f0c..000000000000
--- a/drivers/infiniband/hw/nes/nes.c
+++ /dev/null
@@ -1,1243 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/if_arp.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-#include <rdma/ib_smi.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_pack.h>
-#include <rdma/iw_cm.h>
-
-#include "nes.h"
-
-#include <net/netevent.h>
-#include <net/neighbour.h>
-#include <linux/route.h>
-#include <net/ip_fib.h>
-
-MODULE_AUTHOR("NetEffect");
-MODULE_DESCRIPTION("NetEffect RNIC Low-level iWARP Driver");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(DRV_VERSION);
-
-int interrupt_mod_interval = 0;
-
-/* Interoperability */
-int mpa_version = 1;
-module_param(mpa_version, int, 0644);
-MODULE_PARM_DESC(mpa_version, "MPA version to be used int MPA Req/Resp (0 or 1)");
-
-/* Interoperability */
-int disable_mpa_crc = 0;
-module_param(disable_mpa_crc, int, 0644);
-MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC");
-
-unsigned int nes_drv_opt = NES_DRV_OPT_DISABLE_INT_MOD | NES_DRV_OPT_ENABLE_PAU;
-module_param(nes_drv_opt, int, 0644);
-MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters");
-
-unsigned int nes_debug_level = 0;
-module_param_named(debug_level, nes_debug_level, uint, 0644);
-MODULE_PARM_DESC(debug_level, "Enable debug output level");
-
-unsigned int wqm_quanta = 0x10000;
-module_param(wqm_quanta, int, 0644);
-MODULE_PARM_DESC(wqm_quanta, "WQM quanta");
-
-static bool limit_maxrdreqsz;
-module_param(limit_maxrdreqsz, bool, 0644);
-MODULE_PARM_DESC(limit_maxrdreqsz, "Limit max read request size to 256 Bytes");
-
-LIST_HEAD(nes_adapter_list);
-static LIST_HEAD(nes_dev_list);
-
-atomic_t qps_destroyed;
-
-static unsigned int ee_flsh_adapter;
-static unsigned int sysfs_nonidx_addr;
-static unsigned int sysfs_idx_addr;
-
-static struct pci_device_id nes_pci_table[] = {
- { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020), },
- { PCI_VDEVICE(NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020_KR), },
- {0}
-};
-
-MODULE_DEVICE_TABLE(pci, nes_pci_table);
-
-static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *);
-static int nes_net_event(struct notifier_block *, unsigned long, void *);
-static int nes_notifiers_registered;
-
-
-static struct notifier_block nes_inetaddr_notifier = {
- .notifier_call = nes_inetaddr_event
-};
-
-static struct notifier_block nes_net_notifier = {
- .notifier_call = nes_net_event
-};
-
-/**
- * nes_inetaddr_event
- */
-static int nes_inetaddr_event(struct notifier_block *notifier,
- unsigned long event, void *ptr)
-{
- struct in_ifaddr *ifa = ptr;
- struct net_device *event_netdev = ifa->ifa_dev->dev;
- struct nes_device *nesdev;
- struct net_device *netdev;
- struct net_device *upper_dev;
- struct nes_vnic *nesvnic;
- unsigned int is_bonded;
-
- nes_debug(NES_DBG_NETDEV, "nes_inetaddr_event: ip address %pI4, netmask %pI4.\n",
- &ifa->ifa_address, &ifa->ifa_mask);
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p. (%s)\n",
- nesdev, nesdev->netdev[0]->name);
- netdev = nesdev->netdev[0];
- nesvnic = netdev_priv(netdev);
- upper_dev = netdev_master_upper_dev_get(netdev);
- is_bonded = netif_is_bond_slave(netdev) &&
- (upper_dev == event_netdev);
- if ((netdev == event_netdev) || is_bonded) {
- if (nesvnic->rdma_enabled == 0) {
- nes_debug(NES_DBG_NETDEV, "Returning without processing event for %s since"
- " RDMA is not enabled.\n",
- netdev->name);
- return NOTIFY_OK;
- }
- /* we have ifa->ifa_address/mask here if we need it */
- switch (event) {
- case NETDEV_DOWN:
- nes_debug(NES_DBG_NETDEV, "event:DOWN\n");
- nes_write_indexed(nesdev,
- NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), 0);
-
- nes_manage_arp_cache(netdev, netdev->dev_addr,
- ntohl(nesvnic->local_ipaddr), NES_ARP_DELETE);
- nesvnic->local_ipaddr = 0;
- if (is_bonded)
- continue;
- else
- return NOTIFY_OK;
- break;
- case NETDEV_UP:
- nes_debug(NES_DBG_NETDEV, "event:UP\n");
-
- if (nesvnic->local_ipaddr != 0) {
- nes_debug(NES_DBG_NETDEV, "Interface already has local_ipaddr\n");
- return NOTIFY_OK;
- }
- /* fall through */
- case NETDEV_CHANGEADDR:
- /* Add the address to the IP table */
- if (upper_dev)
- nesvnic->local_ipaddr =
- ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address;
- else
- nesvnic->local_ipaddr = ifa->ifa_address;
-
- nes_write_indexed(nesdev,
- NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)),
- ntohl(nesvnic->local_ipaddr));
- nes_manage_arp_cache(netdev, netdev->dev_addr,
- ntohl(nesvnic->local_ipaddr), NES_ARP_ADD);
- if (is_bonded)
- continue;
- else
- return NOTIFY_OK;
- break;
- default:
- break;
- }
- }
- }
-
- return NOTIFY_DONE;
-}
-
-
-/**
- * nes_net_event
- */
-static int nes_net_event(struct notifier_block *notifier,
- unsigned long event, void *ptr)
-{
- struct neighbour *neigh = ptr;
- struct nes_device *nesdev;
- struct net_device *netdev;
- struct nes_vnic *nesvnic;
-
- switch (event) {
- case NETEVENT_NEIGH_UPDATE:
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- /* nes_debug(NES_DBG_NETDEV, "Nesdev list entry = 0x%p.\n", nesdev); */
- netdev = nesdev->netdev[0];
- nesvnic = netdev_priv(netdev);
- if (netdev == neigh->dev) {
- if (nesvnic->rdma_enabled == 0) {
- nes_debug(NES_DBG_NETDEV, "Skipping device %s since no RDMA\n",
- netdev->name);
- } else {
- if (neigh->nud_state & NUD_VALID) {
- nes_manage_arp_cache(neigh->dev, neigh->ha,
- ntohl(*(__be32 *)neigh->primary_key), NES_ARP_ADD);
- } else {
- nes_manage_arp_cache(neigh->dev, neigh->ha,
- ntohl(*(__be32 *)neigh->primary_key), NES_ARP_DELETE);
- }
- }
- return NOTIFY_OK;
- }
- }
- break;
- default:
- nes_debug(NES_DBG_NETDEV, "NETEVENT_ %lu undefined\n", event);
- break;
- }
-
- return NOTIFY_DONE;
-}
-
-
-/**
- * nes_add_ref
- */
-void nes_add_ref(struct ib_qp *ibqp)
-{
- struct nes_qp *nesqp;
-
- nesqp = to_nesqp(ibqp);
- nes_debug(NES_DBG_QP, "Bumping refcount for QP%u. Pre-inc value = %u\n",
- ibqp->qp_num, atomic_read(&nesqp->refcount));
- atomic_inc(&nesqp->refcount);
-}
-
-static void nes_cqp_rem_ref_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request)
-{
- unsigned long flags;
- struct nes_qp *nesqp = cqp_request->cqp_callback_pointer;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
-
- atomic_inc(&qps_destroyed);
-
- /* Free the control structures */
-
- if (nesqp->pbl_vbase) {
- pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size,
- nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase);
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- nesadapter->free_256pbl++;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase);
- nesqp->pbl_vbase = NULL;
-
- } else {
- pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size,
- nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase);
- }
- nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id);
-
- nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL;
- kfree(nesqp->allocated_buffer);
-
-}
-
-/**
- * nes_rem_ref
- */
-void nes_rem_ref(struct ib_qp *ibqp)
-{
- u64 u64temp;
- struct nes_qp *nesqp;
- struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- u32 opcode;
-
- nesqp = to_nesqp(ibqp);
-
- if (atomic_read(&nesqp->refcount) == 0) {
- printk(KERN_INFO PFX "%s: Reference count already 0 for QP%d, last aeq = 0x%04X.\n",
- __func__, ibqp->qp_num, nesqp->last_aeq);
- BUG();
- }
-
- if (atomic_dec_and_test(&nesqp->refcount)) {
- if (nesqp->pau_mode)
- nes_destroy_pau_qp(nesdev, nesqp);
-
- /* Destroy the QP */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n");
- return;
- }
- cqp_request->waiting = 0;
- cqp_request->callback = 1;
- cqp_request->cqp_callback = nes_cqp_rem_ref_callback;
- cqp_request->cqp_callback_pointer = nesqp;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- opcode = NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_IWARP;
-
- if (nesqp->hte_added) {
- opcode |= NES_CQP_QP_DEL_HTE;
- nesqp->hte_added = 0;
- }
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
- u64temp = (u64)nesqp->nesqp_context_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
- nes_post_cqp_request(nesdev, cqp_request);
- }
-}
-
-
-/**
- * nes_get_qp
- */
-struct ib_qp *nes_get_qp(struct ib_device *device, int qpn)
-{
- struct nes_vnic *nesvnic = to_nesvnic(device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
-
- if ((qpn < NES_FIRST_QPN) || (qpn >= (NES_FIRST_QPN + nesadapter->max_qp)))
- return NULL;
-
- return &nesadapter->qp_table[qpn - NES_FIRST_QPN]->ibqp;
-}
-
-
-/**
- * nes_print_macaddr
- */
-static void nes_print_macaddr(struct net_device *netdev)
-{
- nes_debug(NES_DBG_INIT, "%s: %pM, IRQ %u\n",
- netdev->name, netdev->dev_addr, netdev->irq);
-}
-
-/**
- * nes_interrupt - handle interrupts
- */
-static irqreturn_t nes_interrupt(int irq, void *dev_id)
-{
- struct nes_device *nesdev = (struct nes_device *)dev_id;
- int handled = 0;
- u32 int_mask;
- u32 int_req;
- u32 int_stat;
- u32 intf_int_stat;
- u32 timer_stat;
-
- if (nesdev->msi_enabled) {
- /* No need to read the interrupt pending register if msi is enabled */
- handled = 1;
- } else {
- if (unlikely(nesdev->nesadapter->hw_rev == NE020_REV)) {
- /* Master interrupt enable provides synchronization for kicking off bottom half
- when interrupt sharing is going on */
- int_mask = nes_read32(nesdev->regs + NES_INT_MASK);
- if (int_mask & 0x80000000) {
- /* Check interrupt status to see if this might be ours */
- int_stat = nes_read32(nesdev->regs + NES_INT_STAT);
- int_req = nesdev->int_req;
- if (int_stat&int_req) {
- /* if interesting CEQ or AEQ is pending, claim the interrupt */
- if ((int_stat&int_req) & (~(NES_INT_TIMER|NES_INT_INTF))) {
- handled = 1;
- } else {
- if (((int_stat & int_req) & NES_INT_TIMER) == NES_INT_TIMER) {
- /* Timer might be running but might be for another function */
- timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT);
- if ((timer_stat & nesdev->timer_int_req) != 0) {
- handled = 1;
- }
- }
- if ((((int_stat & int_req) & NES_INT_INTF) == NES_INT_INTF) &&
- (handled == 0)) {
- intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT);
- if ((intf_int_stat & nesdev->intf_int_req) != 0) {
- handled = 1;
- }
- }
- }
- if (handled) {
- nes_write32(nesdev->regs+NES_INT_MASK, int_mask & (~0x80000000));
- int_mask = nes_read32(nesdev->regs+NES_INT_MASK);
- /* Save off the status to save an additional read */
- nesdev->int_stat = int_stat;
- nesdev->napi_isr_ran = 1;
- }
- }
- }
- } else {
- handled = nes_read32(nesdev->regs+NES_INT_PENDING);
- }
- }
-
- if (handled) {
-
- if (nes_napi_isr(nesdev) == 0) {
- tasklet_schedule(&nesdev->dpc_tasklet);
-
- }
- return IRQ_HANDLED;
- } else {
- return IRQ_NONE;
- }
-}
-
-
-/**
- * nes_probe - Device initialization
- */
-static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
-{
- struct net_device *netdev = NULL;
- struct nes_device *nesdev = NULL;
- int ret = 0;
- void __iomem *mmio_regs = NULL;
- u8 hw_rev;
-
- assert(pcidev != NULL);
- assert(ent != NULL);
-
- printk(KERN_INFO PFX "NetEffect RNIC driver v%s loading. (%s)\n",
- DRV_VERSION, pci_name(pcidev));
-
- ret = pci_enable_device(pcidev);
- if (ret) {
- printk(KERN_ERR PFX "Unable to enable PCI device. (%s)\n", pci_name(pcidev));
- goto bail0;
- }
-
- nes_debug(NES_DBG_INIT, "BAR0 (@0x%08lX) size = 0x%lX bytes\n",
- (long unsigned int)pci_resource_start(pcidev, BAR_0),
- (long unsigned int)pci_resource_len(pcidev, BAR_0));
- nes_debug(NES_DBG_INIT, "BAR1 (@0x%08lX) size = 0x%lX bytes\n",
- (long unsigned int)pci_resource_start(pcidev, BAR_1),
- (long unsigned int)pci_resource_len(pcidev, BAR_1));
-
- /* Make sure PCI base addr are MMIO */
- if (!(pci_resource_flags(pcidev, BAR_0) & IORESOURCE_MEM) ||
- !(pci_resource_flags(pcidev, BAR_1) & IORESOURCE_MEM)) {
- printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
- ret = -ENODEV;
- goto bail1;
- }
-
- /* Reserve PCI I/O and memory resources */
- ret = pci_request_regions(pcidev, DRV_NAME);
- if (ret) {
- printk(KERN_ERR PFX "Unable to request regions. (%s)\n", pci_name(pcidev));
- goto bail1;
- }
-
- if ((sizeof(dma_addr_t) > 4)) {
- ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(64));
- if (ret < 0) {
- printk(KERN_ERR PFX "64b DMA mask configuration failed\n");
- goto bail2;
- }
- ret = pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(64));
- if (ret) {
- printk(KERN_ERR PFX "64b DMA consistent mask configuration failed\n");
- goto bail2;
- }
- } else {
- ret = pci_set_dma_mask(pcidev, DMA_BIT_MASK(32));
- if (ret < 0) {
- printk(KERN_ERR PFX "32b DMA mask configuration failed\n");
- goto bail2;
- }
- ret = pci_set_consistent_dma_mask(pcidev, DMA_BIT_MASK(32));
- if (ret) {
- printk(KERN_ERR PFX "32b DMA consistent mask configuration failed\n");
- goto bail2;
- }
- }
-
- pci_set_master(pcidev);
-
- /* Allocate hardware structure */
- nesdev = kzalloc(sizeof(struct nes_device), GFP_KERNEL);
- if (!nesdev) {
- ret = -ENOMEM;
- goto bail2;
- }
-
- nes_debug(NES_DBG_INIT, "Allocated nes device at %p\n", nesdev);
- nesdev->pcidev = pcidev;
- pci_set_drvdata(pcidev, nesdev);
-
- pci_read_config_byte(pcidev, 0x0008, &hw_rev);
- nes_debug(NES_DBG_INIT, "hw_rev=%u\n", hw_rev);
-
- spin_lock_init(&nesdev->indexed_regs_lock);
-
- /* Remap the PCI registers in adapter BAR0 to kernel VA space */
- mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0),
- pci_resource_len(pcidev, BAR_0));
- if (mmio_regs == NULL) {
- printk(KERN_ERR PFX "Unable to remap BAR0\n");
- ret = -EIO;
- goto bail3;
- }
- nesdev->regs = mmio_regs;
- nesdev->index_reg = 0x50 + (PCI_FUNC(pcidev->devfn)*8) + mmio_regs;
-
- /* Ensure interrupts are disabled */
- nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff);
-
- if (nes_drv_opt & NES_DRV_OPT_ENABLE_MSI) {
- if (!pci_enable_msi(nesdev->pcidev)) {
- nesdev->msi_enabled = 1;
- nes_debug(NES_DBG_INIT, "MSI is enabled for device %s\n",
- pci_name(pcidev));
- } else {
- nes_debug(NES_DBG_INIT, "MSI is disabled by linux for device %s\n",
- pci_name(pcidev));
- }
- } else {
- nes_debug(NES_DBG_INIT, "MSI not requested due to driver options for device %s\n",
- pci_name(pcidev));
- }
-
- nesdev->csr_start = pci_resource_start(nesdev->pcidev, BAR_0);
- nesdev->doorbell_region = pci_resource_start(nesdev->pcidev, BAR_1);
-
- /* Init the adapter */
- nesdev->nesadapter = nes_init_adapter(nesdev, hw_rev);
- if (!nesdev->nesadapter) {
- printk(KERN_ERR PFX "Unable to initialize adapter.\n");
- ret = -ENOMEM;
- goto bail5;
- }
- nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval;
- nesdev->nesadapter->wqm_quanta = wqm_quanta;
-
- /* nesdev->base_doorbell_index =
- nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */
- nesdev->base_doorbell_index = 1;
- nesdev->doorbell_start = nesdev->nesadapter->doorbell_start;
- if (nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
- switch (PCI_FUNC(nesdev->pcidev->devfn) %
- nesdev->nesadapter->port_count) {
- case 1:
- nesdev->mac_index = 2;
- break;
- case 2:
- nesdev->mac_index = 1;
- break;
- case 3:
- nesdev->mac_index = 3;
- break;
- case 0:
- default:
- nesdev->mac_index = 0;
- }
- } else {
- nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) %
- nesdev->nesadapter->port_count;
- }
-
- if ((limit_maxrdreqsz ||
- ((nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_GLADIUS) &&
- (hw_rev == NE020_REV1))) &&
- (pcie_get_readrq(pcidev) > 256)) {
- if (pcie_set_readrq(pcidev, 256))
- printk(KERN_ERR PFX "Unable to set max read request"
- " to 256 bytes\n");
- else
- nes_debug(NES_DBG_INIT, "Max read request size set"
- " to 256 bytes\n");
- }
-
- tasklet_init(&nesdev->dpc_tasklet, nes_dpc, (unsigned long)nesdev);
-
- /* bring up the Control QP */
- if (nes_init_cqp(nesdev)) {
- ret = -ENODEV;
- goto bail6;
- }
-
- /* Arm the CCQ */
- nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
- PCI_FUNC(nesdev->pcidev->devfn));
- nes_read32(nesdev->regs+NES_CQE_ALLOC);
-
- /* Enable the interrupts */
- nesdev->int_req = (0x101 << PCI_FUNC(nesdev->pcidev->devfn)) |
- (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16));
- if (PCI_FUNC(nesdev->pcidev->devfn) < 4) {
- nesdev->int_req |= (1 << (PCI_FUNC(nesdev->mac_index)+24));
- }
-
- /* TODO: This really should be the first driver to load, not function 0 */
- if (PCI_FUNC(nesdev->pcidev->devfn) == 0) {
- /* pick up PCI and critical errors if the first driver to load */
- nesdev->intf_int_req = NES_INTF_INT_PCIERR | NES_INTF_INT_CRITERR;
- nesdev->int_req |= NES_INT_INTF;
- } else {
- nesdev->intf_int_req = 0;
- }
- nesdev->intf_int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16));
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0, 0);
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 0);
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS2, 0x00001265);
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS4, 0x18021804);
-
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS3, 0x17801790);
-
- /* deal with both periodic and one_shot */
- nesdev->timer_int_req = 0x101 << PCI_FUNC(nesdev->pcidev->devfn);
- nesdev->nesadapter->timer_int_req |= nesdev->timer_int_req;
- nes_debug(NES_DBG_INIT, "setting int_req for function %u, nesdev = 0x%04X, adapter = 0x%04X\n",
- PCI_FUNC(nesdev->pcidev->devfn),
- nesdev->timer_int_req, nesdev->nesadapter->timer_int_req);
-
- nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
-
- list_add_tail(&nesdev->list, &nes_dev_list);
-
- /* Request an interrupt line for the driver */
- ret = request_irq(pcidev->irq, nes_interrupt, IRQF_SHARED, DRV_NAME, nesdev);
- if (ret) {
- printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
- pci_name(pcidev), pcidev->irq);
- goto bail65;
- }
-
- nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
-
- if (nes_notifiers_registered == 0) {
- register_inetaddr_notifier(&nes_inetaddr_notifier);
- register_netevent_notifier(&nes_net_notifier);
- }
- nes_notifiers_registered++;
-
- INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status);
-
- /* Initialize network devices */
- netdev = nes_netdev_init(nesdev, mmio_regs);
- if (netdev == NULL) {
- ret = -ENOMEM;
- goto bail7;
- }
-
- /* Register network device */
- ret = register_netdev(netdev);
- if (ret) {
- printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", ret);
- nes_netdev_destroy(netdev);
- goto bail7;
- }
-
- nes_print_macaddr(netdev);
-
- nesdev->netdev_count++;
- nesdev->nesadapter->netdev_count++;
-
- printk(KERN_INFO PFX "%s: NetEffect RNIC driver successfully loaded.\n",
- pci_name(pcidev));
- return 0;
-
- bail7:
- printk(KERN_ERR PFX "bail7\n");
- while (nesdev->netdev_count > 0) {
- nesdev->netdev_count--;
- nesdev->nesadapter->netdev_count--;
-
- unregister_netdev(nesdev->netdev[nesdev->netdev_count]);
- nes_netdev_destroy(nesdev->netdev[nesdev->netdev_count]);
- }
-
- nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n",
- nesdev->netdev_count, nesdev->nesadapter->netdev_count);
-
- nes_notifiers_registered--;
- if (nes_notifiers_registered == 0) {
- unregister_netevent_notifier(&nes_net_notifier);
- unregister_inetaddr_notifier(&nes_inetaddr_notifier);
- }
-
- list_del(&nesdev->list);
- nes_destroy_cqp(nesdev);
-
- bail65:
- printk(KERN_ERR PFX "bail65\n");
- free_irq(pcidev->irq, nesdev);
- if (nesdev->msi_enabled) {
- pci_disable_msi(pcidev);
- }
- bail6:
- printk(KERN_ERR PFX "bail6\n");
- tasklet_kill(&nesdev->dpc_tasklet);
- /* Deallocate the Adapter Structure */
- nes_destroy_adapter(nesdev->nesadapter);
-
- bail5:
- printk(KERN_ERR PFX "bail5\n");
- iounmap(nesdev->regs);
-
- bail3:
- printk(KERN_ERR PFX "bail3\n");
- kfree(nesdev);
-
- bail2:
- pci_release_regions(pcidev);
-
- bail1:
- pci_disable_device(pcidev);
-
- bail0:
- return ret;
-}
-
-
-/**
- * nes_remove - unload from kernel
- */
-static void nes_remove(struct pci_dev *pcidev)
-{
- struct nes_device *nesdev = pci_get_drvdata(pcidev);
- struct net_device *netdev;
- int netdev_index = 0;
- unsigned long flags;
-
- if (nesdev->netdev_count) {
- netdev = nesdev->netdev[netdev_index];
- if (netdev) {
- netif_stop_queue(netdev);
- unregister_netdev(netdev);
- nes_netdev_destroy(netdev);
-
- nesdev->netdev[netdev_index] = NULL;
- nesdev->netdev_count--;
- nesdev->nesadapter->netdev_count--;
- }
- }
-
- nes_notifiers_registered--;
- if (nes_notifiers_registered == 0) {
- unregister_netevent_notifier(&nes_net_notifier);
- unregister_inetaddr_notifier(&nes_inetaddr_notifier);
- }
-
- list_del(&nesdev->list);
- nes_destroy_cqp(nesdev);
-
- free_irq(pcidev->irq, nesdev);
- tasklet_kill(&nesdev->dpc_tasklet);
-
- spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
- if (nesdev->link_recheck) {
- spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
- cancel_delayed_work_sync(&nesdev->work);
- } else {
- spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
- }
-
- /* Deallocate the Adapter Structure */
- nes_destroy_adapter(nesdev->nesadapter);
-
- if (nesdev->msi_enabled) {
- pci_disable_msi(pcidev);
- }
-
- iounmap(nesdev->regs);
- kfree(nesdev);
-
- /* nes_debug(NES_DBG_SHUTDOWN, "calling pci_release_regions.\n"); */
- pci_release_regions(pcidev);
- pci_disable_device(pcidev);
- pci_set_drvdata(pcidev, NULL);
-}
-
-
-static struct pci_driver nes_pci_driver = {
- .name = DRV_NAME,
- .id_table = nes_pci_table,
- .probe = nes_probe,
- .remove = nes_remove,
-};
-
-static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf)
-{
- unsigned int devfn = 0xffffffff;
- unsigned char bus_number = 0xff;
- unsigned int i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- devfn = nesdev->pcidev->devfn;
- bus_number = nesdev->pcidev->bus->number;
- break;
- }
- i++;
- }
-
- return snprintf(buf, PAGE_SIZE, "%x:%x\n", bus_number, devfn);
-}
-
-static ssize_t nes_store_adapter(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
-
- ee_flsh_adapter = simple_strtoul(p, &p, 10);
- return strnlen(buf, count);
-}
-
-static ssize_t nes_show_ee_cmd(struct device_driver *ddp, char *buf)
-{
- u32 eeprom_cmd = 0xdead;
- u32 i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- eeprom_cmd = nes_read32(nesdev->regs + NES_EEPROM_COMMAND);
- break;
- }
- i++;
- }
- return snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_cmd);
-}
-
-static ssize_t nes_store_ee_cmd(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
- u32 val;
- u32 i = 0;
- struct nes_device *nesdev;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
- val = simple_strtoul(p, &p, 16);
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nes_write32(nesdev->regs + NES_EEPROM_COMMAND, val);
- break;
- }
- i++;
- }
- }
- return strnlen(buf, count);
-}
-
-static ssize_t nes_show_ee_data(struct device_driver *ddp, char *buf)
-{
- u32 eeprom_data = 0xdead;
- u32 i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- eeprom_data = nes_read32(nesdev->regs + NES_EEPROM_DATA);
- break;
- }
- i++;
- }
-
- return snprintf(buf, PAGE_SIZE, "0x%x\n", eeprom_data);
-}
-
-static ssize_t nes_store_ee_data(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
- u32 val;
- u32 i = 0;
- struct nes_device *nesdev;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
- val = simple_strtoul(p, &p, 16);
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nes_write32(nesdev->regs + NES_EEPROM_DATA, val);
- break;
- }
- i++;
- }
- }
- return strnlen(buf, count);
-}
-
-static ssize_t nes_show_flash_cmd(struct device_driver *ddp, char *buf)
-{
- u32 flash_cmd = 0xdead;
- u32 i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- flash_cmd = nes_read32(nesdev->regs + NES_FLASH_COMMAND);
- break;
- }
- i++;
- }
-
- return snprintf(buf, PAGE_SIZE, "0x%x\n", flash_cmd);
-}
-
-static ssize_t nes_store_flash_cmd(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
- u32 val;
- u32 i = 0;
- struct nes_device *nesdev;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
- val = simple_strtoul(p, &p, 16);
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nes_write32(nesdev->regs + NES_FLASH_COMMAND, val);
- break;
- }
- i++;
- }
- }
- return strnlen(buf, count);
-}
-
-static ssize_t nes_show_flash_data(struct device_driver *ddp, char *buf)
-{
- u32 flash_data = 0xdead;
- u32 i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- flash_data = nes_read32(nesdev->regs + NES_FLASH_DATA);
- break;
- }
- i++;
- }
-
- return snprintf(buf, PAGE_SIZE, "0x%x\n", flash_data);
-}
-
-static ssize_t nes_store_flash_data(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
- u32 val;
- u32 i = 0;
- struct nes_device *nesdev;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
- val = simple_strtoul(p, &p, 16);
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nes_write32(nesdev->regs + NES_FLASH_DATA, val);
- break;
- }
- i++;
- }
- }
- return strnlen(buf, count);
-}
-
-static ssize_t nes_show_nonidx_addr(struct device_driver *ddp, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_nonidx_addr);
-}
-
-static ssize_t nes_store_nonidx_addr(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X')
- sysfs_nonidx_addr = simple_strtoul(p, &p, 16);
-
- return strnlen(buf, count);
-}
-
-static ssize_t nes_show_nonidx_data(struct device_driver *ddp, char *buf)
-{
- u32 nonidx_data = 0xdead;
- u32 i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nonidx_data = nes_read32(nesdev->regs + sysfs_nonidx_addr);
- break;
- }
- i++;
- }
-
- return snprintf(buf, PAGE_SIZE, "0x%x\n", nonidx_data);
-}
-
-static ssize_t nes_store_nonidx_data(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
- u32 val;
- u32 i = 0;
- struct nes_device *nesdev;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
- val = simple_strtoul(p, &p, 16);
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nes_write32(nesdev->regs + sysfs_nonidx_addr, val);
- break;
- }
- i++;
- }
- }
- return strnlen(buf, count);
-}
-
-static ssize_t nes_show_idx_addr(struct device_driver *ddp, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "0x%x\n", sysfs_idx_addr);
-}
-
-static ssize_t nes_store_idx_addr(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X')
- sysfs_idx_addr = simple_strtoul(p, &p, 16);
-
- return strnlen(buf, count);
-}
-
-static ssize_t nes_show_idx_data(struct device_driver *ddp, char *buf)
-{
- u32 idx_data = 0xdead;
- u32 i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- idx_data = nes_read_indexed(nesdev, sysfs_idx_addr);
- break;
- }
- i++;
- }
-
- return snprintf(buf, PAGE_SIZE, "0x%x\n", idx_data);
-}
-
-static ssize_t nes_store_idx_data(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- char *p = (char *)buf;
- u32 val;
- u32 i = 0;
- struct nes_device *nesdev;
-
- if (p[1] == 'x' || p[1] == 'X' || p[0] == 'x' || p[0] == 'X') {
- val = simple_strtoul(p, &p, 16);
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nes_write_indexed(nesdev, sysfs_idx_addr, val);
- break;
- }
- i++;
- }
- }
- return strnlen(buf, count);
-}
-
-
-/**
- * nes_show_wqm_quanta
- */
-static ssize_t nes_show_wqm_quanta(struct device_driver *ddp, char *buf)
-{
- u32 wqm_quanta_value = 0xdead;
- u32 i = 0;
- struct nes_device *nesdev;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- wqm_quanta_value = nesdev->nesadapter->wqm_quanta;
- break;
- }
- i++;
- }
-
- return snprintf(buf, PAGE_SIZE, "0x%X\n", wqm_quanta_value);
-}
-
-
-/**
- * nes_store_wqm_quanta
- */
-static ssize_t nes_store_wqm_quanta(struct device_driver *ddp,
- const char *buf, size_t count)
-{
- unsigned long wqm_quanta_value;
- u32 wqm_config1;
- u32 i = 0;
- struct nes_device *nesdev;
-
- if (kstrtoul(buf, 0, &wqm_quanta_value) < 0)
- return -EINVAL;
-
- list_for_each_entry(nesdev, &nes_dev_list, list) {
- if (i == ee_flsh_adapter) {
- nesdev->nesadapter->wqm_quanta = wqm_quanta_value;
- wqm_config1 = nes_read_indexed(nesdev,
- NES_IDX_WQM_CONFIG1);
- nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1,
- ((wqm_quanta_value << 1) |
- (wqm_config1 & 0x00000001)));
- break;
- }
- i++;
- }
- return strnlen(buf, count);
-}
-
-static DRIVER_ATTR(adapter, S_IRUSR | S_IWUSR,
- nes_show_adapter, nes_store_adapter);
-static DRIVER_ATTR(eeprom_cmd, S_IRUSR | S_IWUSR,
- nes_show_ee_cmd, nes_store_ee_cmd);
-static DRIVER_ATTR(eeprom_data, S_IRUSR | S_IWUSR,
- nes_show_ee_data, nes_store_ee_data);
-static DRIVER_ATTR(flash_cmd, S_IRUSR | S_IWUSR,
- nes_show_flash_cmd, nes_store_flash_cmd);
-static DRIVER_ATTR(flash_data, S_IRUSR | S_IWUSR,
- nes_show_flash_data, nes_store_flash_data);
-static DRIVER_ATTR(nonidx_addr, S_IRUSR | S_IWUSR,
- nes_show_nonidx_addr, nes_store_nonidx_addr);
-static DRIVER_ATTR(nonidx_data, S_IRUSR | S_IWUSR,
- nes_show_nonidx_data, nes_store_nonidx_data);
-static DRIVER_ATTR(idx_addr, S_IRUSR | S_IWUSR,
- nes_show_idx_addr, nes_store_idx_addr);
-static DRIVER_ATTR(idx_data, S_IRUSR | S_IWUSR,
- nes_show_idx_data, nes_store_idx_data);
-static DRIVER_ATTR(wqm_quanta, S_IRUSR | S_IWUSR,
- nes_show_wqm_quanta, nes_store_wqm_quanta);
-
-static int nes_create_driver_sysfs(struct pci_driver *drv)
-{
- int error;
- error = driver_create_file(&drv->driver, &driver_attr_adapter);
- error |= driver_create_file(&drv->driver, &driver_attr_eeprom_cmd);
- error |= driver_create_file(&drv->driver, &driver_attr_eeprom_data);
- error |= driver_create_file(&drv->driver, &driver_attr_flash_cmd);
- error |= driver_create_file(&drv->driver, &driver_attr_flash_data);
- error |= driver_create_file(&drv->driver, &driver_attr_nonidx_addr);
- error |= driver_create_file(&drv->driver, &driver_attr_nonidx_data);
- error |= driver_create_file(&drv->driver, &driver_attr_idx_addr);
- error |= driver_create_file(&drv->driver, &driver_attr_idx_data);
- error |= driver_create_file(&drv->driver, &driver_attr_wqm_quanta);
- return error;
-}
-
-static void nes_remove_driver_sysfs(struct pci_driver *drv)
-{
- driver_remove_file(&drv->driver, &driver_attr_adapter);
- driver_remove_file(&drv->driver, &driver_attr_eeprom_cmd);
- driver_remove_file(&drv->driver, &driver_attr_eeprom_data);
- driver_remove_file(&drv->driver, &driver_attr_flash_cmd);
- driver_remove_file(&drv->driver, &driver_attr_flash_data);
- driver_remove_file(&drv->driver, &driver_attr_nonidx_addr);
- driver_remove_file(&drv->driver, &driver_attr_nonidx_data);
- driver_remove_file(&drv->driver, &driver_attr_idx_addr);
- driver_remove_file(&drv->driver, &driver_attr_idx_data);
- driver_remove_file(&drv->driver, &driver_attr_wqm_quanta);
-}
-
-/**
- * nes_init_module - module initialization entry point
- */
-static int __init nes_init_module(void)
-{
- int retval;
- int retval1;
-
- retval = nes_cm_start();
- if (retval) {
- printk(KERN_ERR PFX "Unable to start NetEffect iWARP CM.\n");
- return retval;
- }
- retval = pci_register_driver(&nes_pci_driver);
- if (retval >= 0) {
- retval1 = nes_create_driver_sysfs(&nes_pci_driver);
- if (retval1 < 0)
- printk(KERN_ERR PFX "Unable to create NetEffect sys files.\n");
- }
- return retval;
-}
-
-
-/**
- * nes_exit_module - module unload entry point
- */
-static void __exit nes_exit_module(void)
-{
- nes_cm_stop();
- nes_remove_driver_sysfs(&nes_pci_driver);
-
- pci_unregister_driver(&nes_pci_driver);
-}
-
-
-module_init(nes_init_module);
-module_exit(nes_exit_module);
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
deleted file mode 100644
index 85acd0843b50..000000000000
--- a/drivers/infiniband/hw/nes/nes.h
+++ /dev/null
@@ -1,582 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __NES_H
-#define __NES_H
-
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/workqueue.h>
-#include <linux/slab.h>
-#include <asm/io.h>
-#include <linux/crc32c.h>
-
-#include <rdma/ib_smi.h>
-#include <rdma/ib_verbs.h>
-#include <rdma/ib_pack.h>
-#include <rdma/rdma_cm.h>
-#include <rdma/iw_cm.h>
-#include <rdma/rdma_netlink.h>
-#include <rdma/iw_portmap.h>
-
-#define NES_SEND_FIRST_WRITE
-
-#define QUEUE_DISCONNECTS
-
-#define DRV_NAME "iw_nes"
-#define DRV_VERSION "1.5.0.1"
-#define PFX DRV_NAME ": "
-
-/*
- * NetEffect PCI vendor id and NE010 PCI device id.
- */
-#ifndef PCI_VENDOR_ID_NETEFFECT /* not in pci.ids yet */
-#define PCI_VENDOR_ID_NETEFFECT 0x1678
-#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100
-#define PCI_DEVICE_ID_NETEFFECT_NE020_KR 0x0110
-#endif
-
-#define NE020_REV 4
-#define NE020_REV1 5
-
-#define BAR_0 0
-#define BAR_1 2
-
-#define RX_BUF_SIZE (1536 + 8)
-#define NES_REG0_SIZE (4 * 1024)
-#define NES_TX_TIMEOUT (6*HZ)
-#define NES_FIRST_QPN 64
-#define NES_SW_CONTEXT_ALIGN 1024
-
-#define NES_MAX_MTU 9000
-
-#define NES_NIC_MAX_NICS 16
-#define NES_MAX_ARP_TABLE_SIZE 4096
-
-#define NES_NIC_CEQ_SIZE 8
-/* NICs will be on a separate CQ */
-#define NES_CCEQ_SIZE ((nesadapter->max_cq / nesadapter->port_count) - 32)
-
-#define NES_MAX_PORT_COUNT 4
-
-#define MAX_DPC_ITERATIONS 128
-
-#define NES_DRV_OPT_ENABLE_MPA_VER_0 0x00000001
-#define NES_DRV_OPT_DISABLE_MPA_CRC 0x00000002
-#define NES_DRV_OPT_DISABLE_FIRST_WRITE 0x00000004
-#define NES_DRV_OPT_DISABLE_INTF 0x00000008
-#define NES_DRV_OPT_ENABLE_MSI 0x00000010
-#define NES_DRV_OPT_DUAL_LOGICAL_PORT 0x00000020
-#define NES_DRV_OPT_SUPRESS_OPTION_BC 0x00000040
-#define NES_DRV_OPT_NO_INLINE_DATA 0x00000080
-#define NES_DRV_OPT_DISABLE_INT_MOD 0x00000100
-#define NES_DRV_OPT_DISABLE_VIRT_WQ 0x00000200
-#define NES_DRV_OPT_ENABLE_PAU 0x00000400
-
-#define NES_AEQ_EVENT_TIMEOUT 2500
-#define NES_DISCONNECT_EVENT_TIMEOUT 2000
-
-/* debug levels */
-/* must match userspace */
-#define NES_DBG_HW 0x00000001
-#define NES_DBG_INIT 0x00000002
-#define NES_DBG_ISR 0x00000004
-#define NES_DBG_PHY 0x00000008
-#define NES_DBG_NETDEV 0x00000010
-#define NES_DBG_CM 0x00000020
-#define NES_DBG_CM1 0x00000040
-#define NES_DBG_NIC_RX 0x00000080
-#define NES_DBG_NIC_TX 0x00000100
-#define NES_DBG_CQP 0x00000200
-#define NES_DBG_MMAP 0x00000400
-#define NES_DBG_MR 0x00000800
-#define NES_DBG_PD 0x00001000
-#define NES_DBG_CQ 0x00002000
-#define NES_DBG_QP 0x00004000
-#define NES_DBG_MOD_QP 0x00008000
-#define NES_DBG_AEQ 0x00010000
-#define NES_DBG_IW_RX 0x00020000
-#define NES_DBG_IW_TX 0x00040000
-#define NES_DBG_SHUTDOWN 0x00080000
-#define NES_DBG_PAU 0x00100000
-#define NES_DBG_NLMSG 0x00200000
-#define NES_DBG_RSVD1 0x10000000
-#define NES_DBG_RSVD2 0x20000000
-#define NES_DBG_RSVD3 0x40000000
-#define NES_DBG_RSVD4 0x80000000
-#define NES_DBG_ALL 0xffffffff
-
-#ifdef CONFIG_INFINIBAND_NES_DEBUG
-#define nes_debug(level, fmt, args...) \
-do { \
- if (level & nes_debug_level) \
- printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \
-} while (0)
-
-#define assert(expr) \
-do { \
- if (!(expr)) { \
- printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \
- #expr, __FILE__, __func__, __LINE__); \
- } \
-} while (0)
-
-#define NES_EVENT_TIMEOUT 1200000
-#else
-#define nes_debug(level, fmt, args...)
-#define assert(expr) do {} while (0)
-
-#define NES_EVENT_TIMEOUT 100000
-#endif
-
-#include "nes_hw.h"
-#include "nes_verbs.h"
-#include "nes_context.h"
-#include <rdma/nes-abi.h>
-#include "nes_cm.h"
-#include "nes_mgt.h"
-
-extern int interrupt_mod_interval;
-extern int nes_if_count;
-extern int mpa_version;
-extern int disable_mpa_crc;
-extern unsigned int nes_drv_opt;
-extern unsigned int nes_debug_level;
-extern unsigned int wqm_quanta;
-extern struct list_head nes_adapter_list;
-
-extern atomic_t cm_connects;
-extern atomic_t cm_accepts;
-extern atomic_t cm_disconnects;
-extern atomic_t cm_closes;
-extern atomic_t cm_connecteds;
-extern atomic_t cm_connect_reqs;
-extern atomic_t cm_rejects;
-extern atomic_t mod_qp_timouts;
-extern atomic_t qps_created;
-extern atomic_t qps_destroyed;
-extern atomic_t sw_qps_destroyed;
-extern u32 mh_detected;
-extern u32 mh_pauses_sent;
-extern u32 cm_packets_sent;
-extern u32 cm_packets_bounced;
-extern u32 cm_packets_created;
-extern u32 cm_packets_received;
-extern u32 cm_packets_dropped;
-extern u32 cm_packets_retrans;
-extern atomic_t cm_listens_created;
-extern atomic_t cm_listens_destroyed;
-extern u32 cm_backlog_drops;
-extern atomic_t cm_loopbacks;
-extern atomic_t cm_nodes_created;
-extern atomic_t cm_nodes_destroyed;
-extern atomic_t cm_accel_dropped_pkts;
-extern atomic_t cm_resets_recvd;
-extern atomic_t pau_qps_created;
-extern atomic_t pau_qps_destroyed;
-
-extern u32 int_mod_timer_init;
-extern u32 int_mod_cq_depth_256;
-extern u32 int_mod_cq_depth_128;
-extern u32 int_mod_cq_depth_32;
-extern u32 int_mod_cq_depth_24;
-extern u32 int_mod_cq_depth_16;
-extern u32 int_mod_cq_depth_4;
-extern u32 int_mod_cq_depth_1;
-
-struct nes_device {
- struct nes_adapter *nesadapter;
- void __iomem *regs;
- void __iomem *index_reg;
- struct pci_dev *pcidev;
- struct net_device *netdev[NES_NIC_MAX_NICS];
- u64 link_status_interrupts;
- struct tasklet_struct dpc_tasklet;
- spinlock_t indexed_regs_lock;
- unsigned long csr_start;
- unsigned long doorbell_region;
- unsigned long doorbell_start;
- unsigned long mac_tx_errors;
- unsigned long mac_pause_frames_sent;
- unsigned long mac_pause_frames_received;
- unsigned long mac_rx_errors;
- unsigned long mac_rx_crc_errors;
- unsigned long mac_rx_symbol_err_frames;
- unsigned long mac_rx_jabber_frames;
- unsigned long mac_rx_oversized_frames;
- unsigned long mac_rx_short_frames;
- unsigned long port_rx_discards;
- unsigned long port_tx_discards;
- unsigned int mac_index;
- unsigned int nes_stack_start;
-
- /* Control Structures */
- void *cqp_vbase;
- dma_addr_t cqp_pbase;
- u32 cqp_mem_size;
- u8 ceq_index;
- u8 nic_ceq_index;
- struct nes_hw_cqp cqp;
- struct nes_hw_cq ccq;
- struct list_head cqp_avail_reqs;
- struct list_head cqp_pending_reqs;
- struct nes_cqp_request *nes_cqp_requests;
-
- u32 int_req;
- u32 int_stat;
- u32 timer_int_req;
- u32 timer_only_int_count;
- u32 intf_int_req;
- u32 last_mac_tx_pauses;
- u32 last_used_chunks_tx;
- struct list_head list;
-
- u16 base_doorbell_index;
- u16 currcq_count;
- u16 deepcq_count;
- u8 iw_status;
- u8 msi_enabled;
- u8 netdev_count;
- u8 napi_isr_ran;
- u8 disable_rx_flow_control;
- u8 disable_tx_flow_control;
-
- struct delayed_work work;
- u8 link_recheck;
-};
-
-/* Receive skb private area - must fit in skb->cb area */
-struct nes_rskb_cb {
- u64 busaddr;
- u32 maplen;
- u32 seqnum;
- u8 *data_start;
- struct nes_qp *nesqp;
-};
-
-static inline __le32 get_crc_value(struct nes_v4_quad *nes_quad)
-{
- u32 crc_value;
- crc_value = crc32c(~0, (void *)nes_quad, sizeof (struct nes_v4_quad));
-
- /*
- * With commit ef19454b ("[LIB] crc32c: Keep intermediate crc
- * state in cpu order"), behavior of crc32c changes on
- * big-endian platforms. Our algorithm expects the previous
- * behavior; otherwise we have RDMA connection establishment
- * issue on big-endian.
- */
- return cpu_to_le32(crc_value);
-}
-
-static inline void
-set_wqe_64bit_value(__le32 *wqe_words, u32 index, u64 value)
-{
- wqe_words[index] = cpu_to_le32((u32) value);
- wqe_words[index + 1] = cpu_to_le32(upper_32_bits(value));
-}
-
-static inline void
-set_wqe_32bit_value(__le32 *wqe_words, u32 index, u32 value)
-{
- wqe_words[index] = cpu_to_le32(value);
-}
-
-static inline void
-nes_fill_init_cqp_wqe(struct nes_hw_cqp_wqe *cqp_wqe, struct nes_device *nesdev)
-{
- cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_LEN_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_LOW_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PA_LOW_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PA_HIGH_IDX] = 0;
-}
-
-static inline void
-nes_fill_init_qp_wqe(struct nes_hw_qp_wqe *wqe, struct nes_qp *nesqp, u32 head)
-{
- u32 value;
- value = ((u32)((unsigned long) nesqp)) | head;
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX,
- (u32)(upper_32_bits((unsigned long)(nesqp))));
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, value);
-}
-
-/* Read from memory-mapped device */
-static inline u32 nes_read_indexed(struct nes_device *nesdev, u32 reg_index)
-{
- unsigned long flags;
- void __iomem *addr = nesdev->index_reg;
- u32 value;
-
- spin_lock_irqsave(&nesdev->indexed_regs_lock, flags);
-
- writel(reg_index, addr);
- value = readl((void __iomem *)addr + 4);
-
- spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags);
- return value;
-}
-
-static inline u32 nes_read32(const void __iomem *addr)
-{
- return readl(addr);
-}
-
-static inline u16 nes_read16(const void __iomem *addr)
-{
- return readw(addr);
-}
-
-static inline u8 nes_read8(const void __iomem *addr)
-{
- return readb(addr);
-}
-
-/* Write to memory-mapped device */
-static inline void nes_write_indexed(struct nes_device *nesdev, u32 reg_index, u32 val)
-{
- unsigned long flags;
- void __iomem *addr = nesdev->index_reg;
-
- spin_lock_irqsave(&nesdev->indexed_regs_lock, flags);
-
- writel(reg_index, addr);
- writel(val, (void __iomem *)addr + 4);
-
- spin_unlock_irqrestore(&nesdev->indexed_regs_lock, flags);
-}
-
-static inline void nes_write32(void __iomem *addr, u32 val)
-{
- writel(val, addr);
-}
-
-static inline void nes_write16(void __iomem *addr, u16 val)
-{
- writew(val, addr);
-}
-
-static inline void nes_write8(void __iomem *addr, u8 val)
-{
- writeb(val, addr);
-}
-
-enum nes_resource {
- NES_RESOURCE_MW = 1,
- NES_RESOURCE_FAST_MR,
- NES_RESOURCE_PHYS_MR,
- NES_RESOURCE_USER_MR,
- NES_RESOURCE_PD,
- NES_RESOURCE_QP,
- NES_RESOURCE_CQ,
- NES_RESOURCE_ARP
-};
-
-static inline int nes_alloc_resource(struct nes_adapter *nesadapter,
- unsigned long *resource_array, u32 max_resources,
- u32 *req_resource_num, u32 *next, enum nes_resource resource_type)
-{
- unsigned long flags;
- u32 resource_num;
-
- spin_lock_irqsave(&nesadapter->resource_lock, flags);
-
- resource_num = find_next_zero_bit(resource_array, max_resources, *next);
- if (resource_num >= max_resources) {
- resource_num = find_first_zero_bit(resource_array, max_resources);
- if (resource_num >= max_resources) {
- printk(KERN_ERR PFX "%s: No available resources [type=%u].\n", __func__, resource_type);
- spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
- return -EMFILE;
- }
- }
- set_bit(resource_num, resource_array);
- *next = resource_num+1;
- if (*next == max_resources) {
- *next = 0;
- }
- spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
- *req_resource_num = resource_num;
-
- return 0;
-}
-
-static inline int nes_is_resource_allocated(struct nes_adapter *nesadapter,
- unsigned long *resource_array, u32 resource_num)
-{
- unsigned long flags;
- int bit_is_set;
-
- spin_lock_irqsave(&nesadapter->resource_lock, flags);
-
- bit_is_set = test_bit(resource_num, resource_array);
- nes_debug(NES_DBG_HW, "resource_num %u is%s allocated.\n",
- resource_num, (bit_is_set ? "": " not"));
- spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
-
- return bit_is_set;
-}
-
-static inline void nes_free_resource(struct nes_adapter *nesadapter,
- unsigned long *resource_array, u32 resource_num)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&nesadapter->resource_lock, flags);
- clear_bit(resource_num, resource_array);
- spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
-}
-
-static inline struct nes_vnic *to_nesvnic(struct ib_device *ibdev)
-{
- return container_of(ibdev, struct nes_ib_device, ibdev)->nesvnic;
-}
-
-static inline struct nes_pd *to_nespd(struct ib_pd *ibpd)
-{
- return container_of(ibpd, struct nes_pd, ibpd);
-}
-
-static inline struct nes_ucontext *to_nesucontext(struct ib_ucontext *ibucontext)
-{
- return container_of(ibucontext, struct nes_ucontext, ibucontext);
-}
-
-static inline struct nes_mr *to_nesmr(struct ib_mr *ibmr)
-{
- return container_of(ibmr, struct nes_mr, ibmr);
-}
-
-static inline struct nes_mr *to_nesmr_from_ibfmr(struct ib_fmr *ibfmr)
-{
- return container_of(ibfmr, struct nes_mr, ibfmr);
-}
-
-static inline struct nes_mr *to_nesmw(struct ib_mw *ibmw)
-{
- return container_of(ibmw, struct nes_mr, ibmw);
-}
-
-static inline struct nes_fmr *to_nesfmr(struct nes_mr *nesmr)
-{
- return container_of(nesmr, struct nes_fmr, nesmr);
-}
-
-static inline struct nes_cq *to_nescq(struct ib_cq *ibcq)
-{
- return container_of(ibcq, struct nes_cq, ibcq);
-}
-
-static inline struct nes_qp *to_nesqp(struct ib_qp *ibqp)
-{
- return container_of(ibqp, struct nes_qp, ibqp);
-}
-
-
-
-/* nes.c */
-void nes_add_ref(struct ib_qp *);
-void nes_rem_ref(struct ib_qp *);
-struct ib_qp *nes_get_qp(struct ib_device *, int);
-
-
-/* nes_hw.c */
-struct nes_adapter *nes_init_adapter(struct nes_device *, u8);
-void nes_nic_init_timer_defaults(struct nes_device *, u8);
-void nes_destroy_adapter(struct nes_adapter *);
-int nes_init_cqp(struct nes_device *);
-int nes_init_phy(struct nes_device *);
-int nes_init_nic_qp(struct nes_device *, struct net_device *);
-void nes_destroy_nic_qp(struct nes_vnic *);
-int nes_napi_isr(struct nes_device *);
-void nes_dpc(unsigned long);
-void nes_nic_ce_handler(struct nes_device *, struct nes_hw_nic_cq *);
-void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *);
-int nes_destroy_cqp(struct nes_device *);
-int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
-void nes_recheck_link_status(struct work_struct *work);
-void nes_terminate_timeout(unsigned long context);
-
-/* nes_nic.c */
-struct net_device *nes_netdev_init(struct nes_device *, void __iomem *);
-void nes_netdev_destroy(struct net_device *);
-int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
-
-/* nes_cm.c */
-void *nes_cm_create(struct net_device *);
-int nes_cm_recv(struct sk_buff *, struct net_device *);
-void nes_update_arp(unsigned char *, u32, u32, u16, u16);
-void nes_manage_arp_cache(struct net_device *, unsigned char *, u32, u32);
-void nes_sock_release(struct nes_qp *, unsigned long *);
-void flush_wqes(struct nes_device *nesdev, struct nes_qp *, u32, u32);
-int nes_manage_apbvt(struct nes_vnic *, u32, u32, u32);
-int nes_cm_disconn(struct nes_qp *);
-void nes_cm_disconn_worker(void *);
-
-/* nes_verbs.c */
-int nes_hw_modify_qp(struct nes_device *, struct nes_qp *, u32, u32, u32);
-int nes_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
-struct nes_ib_device *nes_init_ofa_device(struct net_device *);
-void nes_port_ibevent(struct nes_vnic *nesvnic);
-void nes_destroy_ofa_device(struct nes_ib_device *);
-int nes_register_ofa_device(struct nes_ib_device *);
-
-/* nes_util.c */
-int nes_read_eeprom_values(struct nes_device *, struct nes_adapter *);
-void nes_write_1G_phy_reg(struct nes_device *, u8, u8, u16);
-void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *);
-void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16);
-void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16);
-struct nes_cqp_request *nes_get_cqp_request(struct nes_device *);
-void nes_free_cqp_request(struct nes_device *nesdev,
- struct nes_cqp_request *cqp_request);
-void nes_put_cqp_request(struct nes_device *nesdev,
- struct nes_cqp_request *cqp_request);
-void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *);
-int nes_arp_table(struct nes_device *, u32, u8 *, u32);
-void nes_mh_fix(unsigned long);
-void nes_clc(unsigned long);
-void nes_dump_mem(unsigned int, void *, int);
-u32 nes_crc32(u32, u32, u32, u32, u8 *, u32, u32, u32);
-
-#endif /* __NES_H */
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
deleted file mode 100644
index fb983df7c157..000000000000
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ /dev/null
@@ -1,3998 +0,0 @@
-/*
- * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-
-#define TCPOPT_TIMESTAMP 8
-
-#include <linux/atomic.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/if_arp.h>
-#include <linux/if_vlan.h>
-#include <linux/notifier.h>
-#include <linux/net.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/time.h>
-#include <linux/delay.h>
-#include <linux/etherdevice.h>
-#include <linux/netdevice.h>
-#include <linux/random.h>
-#include <linux/list.h>
-#include <linux/threads.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <net/arp.h>
-#include <net/neighbour.h>
-#include <net/route.h>
-#include <net/ip_fib.h>
-#include <net/tcp.h>
-#include <linux/fcntl.h>
-
-#include "nes.h"
-
-u32 cm_packets_sent;
-u32 cm_packets_bounced;
-u32 cm_packets_dropped;
-u32 cm_packets_retrans;
-u32 cm_packets_created;
-u32 cm_packets_received;
-atomic_t cm_listens_created;
-atomic_t cm_listens_destroyed;
-u32 cm_backlog_drops;
-atomic_t cm_loopbacks;
-atomic_t cm_nodes_created;
-atomic_t cm_nodes_destroyed;
-atomic_t cm_accel_dropped_pkts;
-atomic_t cm_resets_recvd;
-
-static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *);
-static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, struct nes_vnic *, struct nes_cm_info *);
-static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *);
-static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *, struct nes_vnic *, u16, void *, struct nes_cm_info *);
-static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *);
-static int mini_cm_accept(struct nes_cm_core *, struct nes_cm_node *);
-static int mini_cm_reject(struct nes_cm_core *, struct nes_cm_node *);
-static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *);
-static int mini_cm_dealloc_core(struct nes_cm_core *);
-static int mini_cm_get(struct nes_cm_core *);
-static int mini_cm_set(struct nes_cm_core *, u32, u32);
-
-static void form_cm_frame(struct sk_buff *, struct nes_cm_node *, void *, u32, void *, u32, u8);
-static int add_ref_cm_node(struct nes_cm_node *);
-static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *);
-
-static int nes_cm_disconn_true(struct nes_qp *);
-static int nes_cm_post_event(struct nes_cm_event *event);
-static int nes_disconnect(struct nes_qp *nesqp, int abrupt);
-static void nes_disconnect_worker(struct work_struct *work);
-
-static int send_mpa_request(struct nes_cm_node *, struct sk_buff *);
-static int send_mpa_reject(struct nes_cm_node *);
-static int send_syn(struct nes_cm_node *, u32, struct sk_buff *);
-static int send_reset(struct nes_cm_node *, struct sk_buff *);
-static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb);
-static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb);
-static void process_packet(struct nes_cm_node *, struct sk_buff *, struct nes_cm_core *);
-
-static void active_open_err(struct nes_cm_node *, struct sk_buff *, int);
-static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int);
-static void cleanup_retrans_entry(struct nes_cm_node *);
-static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *);
-static void free_retrans_entry(struct nes_cm_node *cm_node);
-static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, struct sk_buff *skb, int optionsize, int passive);
-
-/* CM event handler functions */
-static void cm_event_connected(struct nes_cm_event *);
-static void cm_event_connect_error(struct nes_cm_event *);
-static void cm_event_reset(struct nes_cm_event *);
-static void cm_event_mpa_req(struct nes_cm_event *);
-static void cm_event_mpa_reject(struct nes_cm_event *);
-static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node);
-
-/* MPA build functions */
-static int cm_build_mpa_frame(struct nes_cm_node *, u8 **, u16 *, u8 *, u8);
-static void build_mpa_v2(struct nes_cm_node *, void *, u8);
-static void build_mpa_v1(struct nes_cm_node *, void *, u8);
-static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **);
-
-static void print_core(struct nes_cm_core *core);
-static void record_ird_ord(struct nes_cm_node *, u16, u16);
-
-/* External CM API Interface */
-/* instance of function pointers for client API */
-/* set address of this instance to cm_core->cm_ops at cm_core alloc */
-static const struct nes_cm_ops nes_cm_api = {
- .accelerated = mini_cm_accelerated,
- .listen = mini_cm_listen,
- .stop_listener = mini_cm_del_listen,
- .connect = mini_cm_connect,
- .close = mini_cm_close,
- .accept = mini_cm_accept,
- .reject = mini_cm_reject,
- .recv_pkt = mini_cm_recv_pkt,
- .destroy_cm_core = mini_cm_dealloc_core,
- .get = mini_cm_get,
- .set = mini_cm_set
-};
-
-static struct nes_cm_core *g_cm_core;
-
-atomic_t cm_connects;
-atomic_t cm_accepts;
-atomic_t cm_disconnects;
-atomic_t cm_closes;
-atomic_t cm_connecteds;
-atomic_t cm_connect_reqs;
-atomic_t cm_rejects;
-
-int nes_add_ref_cm_node(struct nes_cm_node *cm_node)
-{
- return add_ref_cm_node(cm_node);
-}
-
-int nes_rem_ref_cm_node(struct nes_cm_node *cm_node)
-{
- return rem_ref_cm_node(cm_node->cm_core, cm_node);
-}
-/**
- * create_event
- */
-static struct nes_cm_event *create_event(struct nes_cm_node * cm_node,
- enum nes_cm_event_type type)
-{
- struct nes_cm_event *event;
-
- if (!cm_node->cm_id)
- return NULL;
-
- /* allocate an empty event */
- event = kzalloc(sizeof(*event), GFP_ATOMIC);
-
- if (!event)
- return NULL;
-
- event->type = type;
- event->cm_node = cm_node;
- event->cm_info.rem_addr = cm_node->rem_addr;
- event->cm_info.loc_addr = cm_node->loc_addr;
- event->cm_info.rem_port = cm_node->rem_port;
- event->cm_info.loc_port = cm_node->loc_port;
- event->cm_info.cm_id = cm_node->cm_id;
-
- nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, "
- "dst_addr=%08x[%x], src_addr=%08x[%x]\n",
- cm_node, event, type, event->cm_info.loc_addr,
- event->cm_info.loc_port, event->cm_info.rem_addr,
- event->cm_info.rem_port);
-
- nes_cm_post_event(event);
- return event;
-}
-
-
-/**
- * send_mpa_request
- */
-static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb)
-{
- u8 start_addr = 0;
- u8 *start_ptr = &start_addr;
- u8 **start_buff = &start_ptr;
- u16 buff_len = 0;
-
- if (!skb) {
- nes_debug(NES_DBG_CM, "skb set to NULL\n");
- return -1;
- }
-
- /* send an MPA Request frame */
- cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REQUEST);
- form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK);
-
- return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
-}
-
-
-
-static int send_mpa_reject(struct nes_cm_node *cm_node)
-{
- struct sk_buff *skb = NULL;
- u8 start_addr = 0;
- u8 *start_ptr = &start_addr;
- u8 **start_buff = &start_ptr;
- u16 buff_len = 0;
- struct ietf_mpa_v1 *mpa_frame;
-
- skb = dev_alloc_skb(MAX_CM_BUFFER);
- if (!skb) {
- nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
- return -ENOMEM;
- }
-
- /* send an MPA reject frame */
- cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REPLY);
- mpa_frame = (struct ietf_mpa_v1 *)*start_buff;
- mpa_frame->flags |= IETF_MPA_FLAGS_REJECT;
- form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK | SET_FIN);
-
- cm_node->state = NES_CM_STATE_FIN_WAIT1;
- return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
-}
-
-
-/**
- * recv_mpa - process a received TCP pkt, we are expecting an
- * IETF MPA frame
- */
-static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
- u32 len)
-{
- struct ietf_mpa_v1 *mpa_frame;
- struct ietf_mpa_v2 *mpa_v2_frame;
- struct ietf_rtr_msg *rtr_msg;
- int mpa_hdr_len;
- int priv_data_len;
-
- *type = NES_MPA_REQUEST_ACCEPT;
-
- /* assume req frame is in tcp data payload */
- if (len < sizeof(struct ietf_mpa_v1)) {
- nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len);
- return -EINVAL;
- }
-
- /* points to the beginning of the frame, which could be MPA V1 or V2 */
- mpa_frame = (struct ietf_mpa_v1 *)buffer;
- mpa_hdr_len = sizeof(struct ietf_mpa_v1);
- priv_data_len = ntohs(mpa_frame->priv_data_len);
-
- /* make sure mpa private data len is less than 512 bytes */
- if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) {
- nes_debug(NES_DBG_CM, "The received Length of Private"
- " Data field exceeds 512 octets\n");
- return -EINVAL;
- }
- /*
- * make sure MPA receiver interoperate with the
- * received MPA version and MPA key information
- *
- */
- if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) {
- nes_debug(NES_DBG_CM, "The received mpa version"
- " is not supported\n");
- return -EINVAL;
- }
- /*
- * backwards compatibility only
- */
- if (mpa_frame->rev > cm_node->mpa_frame_rev) {
- nes_debug(NES_DBG_CM, "The received mpa version"
- " can not be interoperated\n");
- return -EINVAL;
- } else {
- cm_node->mpa_frame_rev = mpa_frame->rev;
- }
-
- if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
- if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
- nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
- return -EINVAL;
- }
- } else {
- if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) {
- nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
- return -EINVAL;
- }
- }
-
- if (priv_data_len + mpa_hdr_len != len) {
- nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
- " complete (%x + %x != %x)\n",
- priv_data_len, mpa_hdr_len, len);
- return -EINVAL;
- }
- /* make sure it does not exceed the max size */
- if (len > MAX_CM_BUFFER) {
- nes_debug(NES_DBG_CM, "The received ietf buffer was too large"
- " (%x + %x != %x)\n",
- priv_data_len, mpa_hdr_len, len);
- return -EINVAL;
- }
-
- cm_node->mpa_frame_size = priv_data_len;
-
- switch (mpa_frame->rev) {
- case IETF_MPA_V2: {
- u16 ird_size;
- u16 ord_size;
- u16 rtr_ctrl_ird;
- u16 rtr_ctrl_ord;
-
- mpa_v2_frame = (struct ietf_mpa_v2 *)buffer;
- mpa_hdr_len += IETF_RTR_MSG_SIZE;
- cm_node->mpa_frame_size -= IETF_RTR_MSG_SIZE;
- rtr_msg = &mpa_v2_frame->rtr_msg;
-
- /* parse rtr message */
- rtr_ctrl_ird = ntohs(rtr_msg->ctrl_ird);
- rtr_ctrl_ord = ntohs(rtr_msg->ctrl_ord);
- ird_size = rtr_ctrl_ird & IETF_NO_IRD_ORD;
- ord_size = rtr_ctrl_ord & IETF_NO_IRD_ORD;
-
- if (!(rtr_ctrl_ird & IETF_PEER_TO_PEER)) {
- /* send reset */
- return -EINVAL;
- }
- if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD)
- cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
-
- if (cm_node->mpav2_ird_ord != IETF_NO_IRD_ORD) {
- /* responder */
- if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
- /* we are still negotiating */
- if (ord_size > NES_MAX_IRD) {
- cm_node->ird_size = NES_MAX_IRD;
- } else {
- cm_node->ird_size = ord_size;
- if (ord_size == 0 &&
- (rtr_ctrl_ord & IETF_RDMA0_READ)) {
- cm_node->ird_size = 1;
- nes_debug(NES_DBG_CM,
- "%s: Remote peer doesn't support RDMA0_READ (ord=%u)\n",
- __func__, ord_size);
- }
- }
- if (ird_size > NES_MAX_ORD)
- cm_node->ord_size = NES_MAX_ORD;
- else
- cm_node->ord_size = ird_size;
- } else { /* initiator */
- if (ord_size > NES_MAX_IRD) {
- nes_debug(NES_DBG_CM,
- "%s: Unable to support the requested (ord =%u)\n",
- __func__, ord_size);
- return -EINVAL;
- }
- cm_node->ird_size = ord_size;
-
- if (ird_size > NES_MAX_ORD) {
- cm_node->ord_size = NES_MAX_ORD;
- } else {
- if (ird_size == 0 &&
- (rtr_ctrl_ord & IETF_RDMA0_READ)) {
- nes_debug(NES_DBG_CM,
- "%s: Remote peer doesn't support RDMA0_READ (ird=%u)\n",
- __func__, ird_size);
- return -EINVAL;
- } else {
- cm_node->ord_size = ird_size;
- }
- }
- }
- }
-
- if (rtr_ctrl_ord & IETF_RDMA0_READ) {
- cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
-
- } else if (rtr_ctrl_ord & IETF_RDMA0_WRITE) {
- cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
- } else { /* Not supported RDMA0 operation */
- return -EINVAL;
- }
- break;
- }
- case IETF_MPA_V1:
- default:
- break;
- }
-
- /* copy entire MPA frame to our cm_node's frame */
- memcpy(cm_node->mpa_frame_buf, buffer + mpa_hdr_len, cm_node->mpa_frame_size);
-
- if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT)
- *type = NES_MPA_REQUEST_REJECT;
- return 0;
-}
-
-
-/**
- * form_cm_frame - get a free packet and build empty frame Use
- * node info to build.
- */
-static void form_cm_frame(struct sk_buff *skb,
- struct nes_cm_node *cm_node, void *options, u32 optionsize,
- void *data, u32 datasize, u8 flags)
-{
- struct tcphdr *tcph;
- struct iphdr *iph;
- struct ethhdr *ethh;
- u8 *buf;
- u16 packetsize = sizeof(*iph);
-
- packetsize += sizeof(*tcph);
- packetsize += optionsize + datasize;
-
- skb_trim(skb, 0);
- memset(skb->data, 0x00, ETH_HLEN + sizeof(*iph) + sizeof(*tcph));
-
- buf = skb_put(skb, packetsize + ETH_HLEN);
-
- ethh = (struct ethhdr *)buf;
- buf += ETH_HLEN;
-
- iph = (struct iphdr *)buf;
- buf += sizeof(*iph);
- tcph = (struct tcphdr *)buf;
- skb_reset_mac_header(skb);
- skb_set_network_header(skb, ETH_HLEN);
- skb_set_transport_header(skb, ETH_HLEN + sizeof(*iph));
- buf += sizeof(*tcph);
-
- skb->ip_summed = CHECKSUM_PARTIAL;
- if (!(cm_node->netdev->features & NETIF_F_IP_CSUM))
- skb->ip_summed = CHECKSUM_NONE;
- skb->protocol = htons(0x800);
- skb->data_len = 0;
- skb->mac_len = ETH_HLEN;
-
- memcpy(ethh->h_dest, cm_node->rem_mac, ETH_ALEN);
- memcpy(ethh->h_source, cm_node->loc_mac, ETH_ALEN);
- ethh->h_proto = htons(0x0800);
-
- iph->version = IPVERSION;
- iph->ihl = 5; /* 5 * 4Byte words, IP headr len */
- iph->tos = 0;
- iph->tot_len = htons(packetsize);
- iph->id = htons(++cm_node->tcp_cntxt.loc_id);
-
- iph->frag_off = htons(0x4000);
- iph->ttl = 0x40;
- iph->protocol = 0x06; /* IPPROTO_TCP */
-
- iph->saddr = htonl(cm_node->loc_addr);
- iph->daddr = htonl(cm_node->rem_addr);
-
- tcph->source = htons(cm_node->loc_port);
- tcph->dest = htons(cm_node->rem_port);
- tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
-
- if (flags & SET_ACK) {
- cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
- tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
- tcph->ack = 1;
- } else {
- tcph->ack_seq = 0;
- }
-
- if (flags & SET_SYN) {
- cm_node->tcp_cntxt.loc_seq_num++;
- tcph->syn = 1;
- } else {
- cm_node->tcp_cntxt.loc_seq_num += datasize;
- }
-
- if (flags & SET_FIN) {
- cm_node->tcp_cntxt.loc_seq_num++;
- tcph->fin = 1;
- }
-
- if (flags & SET_RST)
- tcph->rst = 1;
-
- tcph->doff = (u16)((sizeof(*tcph) + optionsize + 3) >> 2);
- tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd);
- tcph->urg_ptr = 0;
- if (optionsize)
- memcpy(buf, options, optionsize);
- buf += optionsize;
- if (datasize)
- memcpy(buf, data, datasize);
-
- skb_shinfo(skb)->nr_frags = 0;
- cm_packets_created++;
-}
-
-/**
- * print_core - dump a cm core
- */
-static void print_core(struct nes_cm_core *core)
-{
- nes_debug(NES_DBG_CM, "---------------------------------------------\n");
- nes_debug(NES_DBG_CM, "CM Core -- (core = %p )\n", core);
- if (!core)
- return;
- nes_debug(NES_DBG_CM, "---------------------------------------------\n");
-
- nes_debug(NES_DBG_CM, "State : %u \n", core->state);
-
- nes_debug(NES_DBG_CM, "Listen Nodes : %u \n", atomic_read(&core->listen_node_cnt));
- nes_debug(NES_DBG_CM, "Active Nodes : %u \n", atomic_read(&core->node_cnt));
-
- nes_debug(NES_DBG_CM, "core : %p \n", core);
-
- nes_debug(NES_DBG_CM, "-------------- end core ---------------\n");
-}
-
-static void record_ird_ord(struct nes_cm_node *cm_node,
- u16 conn_ird, u16 conn_ord)
-{
- if (conn_ird > NES_MAX_IRD)
- conn_ird = NES_MAX_IRD;
-
- if (conn_ord > NES_MAX_ORD)
- conn_ord = NES_MAX_ORD;
-
- cm_node->ird_size = conn_ird;
- cm_node->ord_size = conn_ord;
-}
-
-/**
- * cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame
- */
-static int cm_build_mpa_frame(struct nes_cm_node *cm_node, u8 **start_buff,
- u16 *buff_len, u8 *pci_mem, u8 mpa_key)
-{
- int ret = 0;
-
- *start_buff = (pci_mem) ? pci_mem : &cm_node->mpa_frame_buf[0];
-
- switch (cm_node->mpa_frame_rev) {
- case IETF_MPA_V1:
- *start_buff = (u8 *)*start_buff + sizeof(struct ietf_rtr_msg);
- *buff_len = sizeof(struct ietf_mpa_v1) + cm_node->mpa_frame_size;
- build_mpa_v1(cm_node, *start_buff, mpa_key);
- break;
- case IETF_MPA_V2:
- *buff_len = sizeof(struct ietf_mpa_v2) + cm_node->mpa_frame_size;
- build_mpa_v2(cm_node, *start_buff, mpa_key);
- break;
- default:
- ret = -EINVAL;
- }
- return ret;
-}
-
-/**
- * build_mpa_v2 - build a MPA V2 frame
- */
-static void build_mpa_v2(struct nes_cm_node *cm_node,
- void *start_addr, u8 mpa_key)
-{
- struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr;
- struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
- u16 ctrl_ird;
- u16 ctrl_ord;
-
- /* initialize the upper 5 bytes of the frame */
- build_mpa_v1(cm_node, start_addr, mpa_key);
- mpa_frame->flags |= IETF_MPA_V2_FLAG; /* set a bit to indicate MPA V2 */
- mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE);
-
- /* initialize RTR msg */
- if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
- ctrl_ird = IETF_NO_IRD_ORD;
- ctrl_ord = IETF_NO_IRD_ORD;
- } else {
- ctrl_ird = cm_node->ird_size & IETF_NO_IRD_ORD;
- ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD;
- }
- ctrl_ird |= IETF_PEER_TO_PEER;
- ctrl_ird |= IETF_FLPDU_ZERO_LEN;
-
- switch (mpa_key) {
- case MPA_KEY_REQUEST:
- ctrl_ord |= IETF_RDMA0_WRITE;
- ctrl_ord |= IETF_RDMA0_READ;
- break;
- case MPA_KEY_REPLY:
- switch (cm_node->send_rdma0_op) {
- case SEND_RDMA_WRITE_ZERO:
- ctrl_ord |= IETF_RDMA0_WRITE;
- break;
- case SEND_RDMA_READ_ZERO:
- ctrl_ord |= IETF_RDMA0_READ;
- break;
- }
- }
- rtr_msg->ctrl_ird = htons(ctrl_ird);
- rtr_msg->ctrl_ord = htons(ctrl_ord);
-}
-
-/**
- * build_mpa_v1 - build a MPA V1 frame
- */
-static void build_mpa_v1(struct nes_cm_node *cm_node, void *start_addr, u8 mpa_key)
-{
- struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr;
-
- switch (mpa_key) {
- case MPA_KEY_REQUEST:
- memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
- break;
- case MPA_KEY_REPLY:
- memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
- break;
- }
- mpa_frame->flags = IETF_MPA_FLAGS_CRC;
- mpa_frame->rev = cm_node->mpa_frame_rev;
- mpa_frame->priv_data_len = htons(cm_node->mpa_frame_size);
-}
-
-static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_addr)
-{
- u64 u64temp;
- struct nes_qp *nesqp = *nesqp_addr;
- struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0];
-
- u64temp = (unsigned long)nesqp->nesuqp_addr;
- u64temp |= NES_SW_CONTEXT_ALIGN >> 1;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp);
-
- wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0;
-
- switch (cm_node->send_rdma0_op) {
- case SEND_RDMA_WRITE_ZERO:
- nes_debug(NES_DBG_CM, "Sending first write.\n");
- wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
- cpu_to_le32(NES_IWARP_SQ_OP_RDMAW);
- wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
- break;
-
- case SEND_RDMA_READ_ZERO:
- default:
- if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO)
- WARN(1, "Unsupported RDMA0 len operation=%u\n",
- cm_node->send_rdma0_op);
- nes_debug(NES_DBG_CM, "Sending first rdma operation.\n");
- wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
- cpu_to_le32(NES_IWARP_SQ_OP_RDMAR);
- wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX] = 1;
- wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] = 0;
- wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_STAG_IDX] = 1;
- wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 1;
- break;
- }
-
- if (nesqp->sq_kmapped) {
- nesqp->sq_kmapped = 0;
- kunmap(nesqp->page);
- }
-
- /*use the reserved spot on the WQ for the extra first WQE*/
- nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
- NES_QPCONTEXT_ORDIRD_WRPDU |
- NES_QPCONTEXT_ORDIRD_ALSMM));
- nesqp->skip_lsmm = 1;
- nesqp->hwqp.sq_tail = 0;
-}
-
-/**
- * schedule_nes_timer
- * note - cm_node needs to be protected before calling this. Encase in:
- * rem_ref_cm_node(cm_core, cm_node);add_ref_cm_node(cm_node);
- */
-int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
- enum nes_timer_type type, int send_retrans,
- int close_when_complete)
-{
- unsigned long flags;
- struct nes_cm_core *cm_core = cm_node->cm_core;
- struct nes_timer_entry *new_send;
- int ret = 0;
-
- new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
- if (!new_send)
- return -ENOMEM;
-
- /* new_send->timetosend = currenttime */
- new_send->retrycount = NES_DEFAULT_RETRYS;
- new_send->retranscount = NES_DEFAULT_RETRANS;
- new_send->skb = skb;
- new_send->timetosend = jiffies;
- new_send->type = type;
- new_send->netdev = cm_node->netdev;
- new_send->send_retrans = send_retrans;
- new_send->close_when_complete = close_when_complete;
-
- if (type == NES_TIMER_TYPE_CLOSE) {
- new_send->timetosend += (HZ / 10);
- if (cm_node->recv_entry) {
- kfree(new_send);
- WARN_ON(1);
- return -EINVAL;
- }
- cm_node->recv_entry = new_send;
- }
-
- if (type == NES_TIMER_TYPE_SEND) {
- new_send->seq_num = ntohl(tcp_hdr(skb)->seq);
- atomic_inc(&new_send->skb->users);
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- cm_node->send_entry = new_send;
- add_ref_cm_node(cm_node);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- new_send->timetosend = jiffies + NES_RETRY_TIMEOUT;
-
- ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev);
- if (ret != NETDEV_TX_OK) {
- nes_debug(NES_DBG_CM, "Error sending packet %p "
- "(jiffies = %lu)\n", new_send, jiffies);
- new_send->timetosend = jiffies;
- ret = NETDEV_TX_OK;
- } else {
- cm_packets_sent++;
- if (!send_retrans) {
- cleanup_retrans_entry(cm_node);
- if (close_when_complete)
- rem_ref_cm_node(cm_core, cm_node);
- return ret;
- }
- }
- }
-
- if (!timer_pending(&cm_core->tcp_timer))
- mod_timer(&cm_core->tcp_timer, new_send->timetosend);
-
- return ret;
-}
-
-static void nes_retrans_expired(struct nes_cm_node *cm_node)
-{
- struct iw_cm_id *cm_id = cm_node->cm_id;
- enum nes_cm_node_state state = cm_node->state;
- cm_node->state = NES_CM_STATE_CLOSED;
-
- switch (state) {
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_CLOSING:
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- break;
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_FIN_WAIT1:
- if (cm_node->cm_id)
- cm_id->rem_ref(cm_id);
- send_reset(cm_node, NULL);
- break;
- default:
- add_ref_cm_node(cm_node);
- send_reset(cm_node, NULL);
- create_event(cm_node, NES_CM_EVENT_ABORTED);
- }
-}
-
-static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node)
-{
- struct nes_timer_entry *recv_entry = cm_node->recv_entry;
- struct iw_cm_id *cm_id = cm_node->cm_id;
- struct nes_qp *nesqp;
- unsigned long qplockflags;
-
- if (!recv_entry)
- return;
- nesqp = (struct nes_qp *)recv_entry->skb;
- if (nesqp) {
- spin_lock_irqsave(&nesqp->lock, qplockflags);
- if (nesqp->cm_id) {
- nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
- "refcount = %d: HIT A "
- "NES_TIMER_TYPE_CLOSE with something "
- "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
- atomic_read(&nesqp->refcount));
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
- nesqp->ibqp_state = IB_QPS_ERR;
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_cm_disconn(nesqp);
- } else {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
- "refcount = %d: HIT A "
- "NES_TIMER_TYPE_CLOSE with nothing "
- "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
- atomic_read(&nesqp->refcount));
- }
- } else if (rem_node) {
- /* TIME_WAIT state */
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- }
- if (cm_node->cm_id)
- cm_id->rem_ref(cm_id);
- kfree(recv_entry);
- cm_node->recv_entry = NULL;
-}
-
-/**
- * nes_cm_timer_tick
- */
-static void nes_cm_timer_tick(unsigned long pass)
-{
- unsigned long flags;
- unsigned long nexttimeout = jiffies + NES_LONG_TIME;
- struct nes_cm_node *cm_node;
- struct nes_timer_entry *send_entry, *recv_entry;
- struct list_head *list_core_temp;
- struct list_head *list_node;
- struct nes_cm_core *cm_core = g_cm_core;
- u32 settimer = 0;
- unsigned long timetosend;
- int ret = NETDEV_TX_OK;
-
- struct list_head timer_list;
-
- INIT_LIST_HEAD(&timer_list);
- spin_lock_irqsave(&cm_core->ht_lock, flags);
-
- list_for_each_safe(list_node, list_core_temp,
- &cm_core->connected_nodes) {
- cm_node = container_of(list_node, struct nes_cm_node, list);
- if ((cm_node->recv_entry) || (cm_node->send_entry)) {
- add_ref_cm_node(cm_node);
- list_add(&cm_node->timer_entry, &timer_list);
- }
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- list_for_each_safe(list_node, list_core_temp, &timer_list) {
- cm_node = container_of(list_node, struct nes_cm_node,
- timer_entry);
- recv_entry = cm_node->recv_entry;
-
- if (recv_entry) {
- if (time_after(recv_entry->timetosend, jiffies)) {
- if (nexttimeout > recv_entry->timetosend ||
- !settimer) {
- nexttimeout = recv_entry->timetosend;
- settimer = 1;
- }
- } else {
- handle_recv_entry(cm_node, 1);
- }
- }
-
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- do {
- send_entry = cm_node->send_entry;
- if (!send_entry)
- break;
- if (time_after(send_entry->timetosend, jiffies)) {
- if (cm_node->state != NES_CM_STATE_TSA) {
- if ((nexttimeout >
- send_entry->timetosend) ||
- !settimer) {
- nexttimeout =
- send_entry->timetosend;
- settimer = 1;
- }
- } else {
- free_retrans_entry(cm_node);
- }
- break;
- }
-
- if ((cm_node->state == NES_CM_STATE_TSA) ||
- (cm_node->state == NES_CM_STATE_CLOSED)) {
- free_retrans_entry(cm_node);
- break;
- }
-
- if (!send_entry->retranscount ||
- !send_entry->retrycount) {
- cm_packets_dropped++;
- free_retrans_entry(cm_node);
-
- spin_unlock_irqrestore(
- &cm_node->retrans_list_lock, flags);
- nes_retrans_expired(cm_node);
- cm_node->state = NES_CM_STATE_CLOSED;
- spin_lock_irqsave(&cm_node->retrans_list_lock,
- flags);
- break;
- }
- atomic_inc(&send_entry->skb->users);
- cm_packets_retrans++;
- nes_debug(NES_DBG_CM, "Retransmitting send_entry %p "
- "for node %p, jiffies = %lu, time to send = "
- "%lu, retranscount = %u, send_entry->seq_num = "
- "0x%08X, cm_node->tcp_cntxt.rem_ack_num = "
- "0x%08X\n", send_entry, cm_node, jiffies,
- send_entry->timetosend,
- send_entry->retranscount,
- send_entry->seq_num,
- cm_node->tcp_cntxt.rem_ack_num);
-
- spin_unlock_irqrestore(&cm_node->retrans_list_lock,
- flags);
- ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev);
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- if (ret != NETDEV_TX_OK) {
- nes_debug(NES_DBG_CM, "rexmit failed for "
- "node=%p\n", cm_node);
- cm_packets_bounced++;
- send_entry->retrycount--;
- nexttimeout = jiffies + NES_SHORT_TIME;
- settimer = 1;
- break;
- } else {
- cm_packets_sent++;
- }
- nes_debug(NES_DBG_CM, "Packet Sent: retrans count = "
- "%u, retry count = %u.\n",
- send_entry->retranscount,
- send_entry->retrycount);
- if (send_entry->send_retrans) {
- send_entry->retranscount--;
- timetosend = (NES_RETRY_TIMEOUT <<
- (NES_DEFAULT_RETRANS - send_entry->retranscount));
-
- send_entry->timetosend = jiffies +
- min(timetosend, NES_MAX_TIMEOUT);
- if (nexttimeout > send_entry->timetosend ||
- !settimer) {
- nexttimeout = send_entry->timetosend;
- settimer = 1;
- }
- } else {
- int close_when_complete;
- close_when_complete =
- send_entry->close_when_complete;
- nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n",
- cm_node, cm_node->state);
- free_retrans_entry(cm_node);
- if (close_when_complete)
- rem_ref_cm_node(cm_node->cm_core,
- cm_node);
- }
- } while (0);
-
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- }
-
- if (settimer) {
- if (!timer_pending(&cm_core->tcp_timer))
- mod_timer(&cm_core->tcp_timer, nexttimeout);
- }
-}
-
-
-/**
- * send_syn
- */
-static int send_syn(struct nes_cm_node *cm_node, u32 sendack,
- struct sk_buff *skb)
-{
- int ret;
- int flags = SET_SYN;
- char optionsbuffer[sizeof(struct option_mss) +
- sizeof(struct option_windowscale) + sizeof(struct option_base) +
- TCP_OPTIONS_PADDING];
-
- int optionssize = 0;
- /* Sending MSS option */
- union all_known_options *options;
-
- if (!cm_node)
- return -EINVAL;
-
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_mss.optionnum = OPTION_NUMBER_MSS;
- options->as_mss.length = sizeof(struct option_mss);
- options->as_mss.mss = htons(cm_node->tcp_cntxt.mss);
- optionssize += sizeof(struct option_mss);
-
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE;
- options->as_windowscale.length = sizeof(struct option_windowscale);
- options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale;
- optionssize += sizeof(struct option_windowscale);
-
- if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt)) {
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_base.optionnum = OPTION_NUMBER_WRITE0;
- options->as_base.length = sizeof(struct option_base);
- optionssize += sizeof(struct option_base);
- /* we need the size to be a multiple of 4 */
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_end = 1;
- optionssize += 1;
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_end = 1;
- optionssize += 1;
- }
-
- options = (union all_known_options *)&optionsbuffer[optionssize];
- options->as_end = OPTION_NUMBER_END;
- optionssize += 1;
-
- if (!skb)
- skb = dev_alloc_skb(MAX_CM_BUFFER);
- if (!skb) {
- nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
- return -1;
- }
-
- if (sendack)
- flags |= SET_ACK;
-
- form_cm_frame(skb, cm_node, optionsbuffer, optionssize, NULL, 0, flags);
- ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
-
- return ret;
-}
-
-
-/**
- * send_reset
- */
-static int send_reset(struct nes_cm_node *cm_node, struct sk_buff *skb)
-{
- int ret;
- int flags = SET_RST | SET_ACK;
-
- if (!skb)
- skb = dev_alloc_skb(MAX_CM_BUFFER);
- if (!skb) {
- nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
- return -ENOMEM;
- }
-
- form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, flags);
- ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 1);
-
- return ret;
-}
-
-
-/**
- * send_ack
- */
-static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb)
-{
- int ret;
-
- if (!skb)
- skb = dev_alloc_skb(MAX_CM_BUFFER);
-
- if (!skb) {
- nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
- return -1;
- }
-
- form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK);
- ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 0);
-
- return ret;
-}
-
-
-/**
- * send_fin
- */
-static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb)
-{
- int ret;
-
- /* if we didn't get a frame get one */
- if (!skb)
- skb = dev_alloc_skb(MAX_CM_BUFFER);
-
- if (!skb) {
- nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
- return -1;
- }
-
- form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK | SET_FIN);
- ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
-
- return ret;
-}
-
-
-/**
- * find_node - find a cm node that matches the reference cm node
- */
-static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
- u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr)
-{
- unsigned long flags;
- struct list_head *hte;
- struct nes_cm_node *cm_node;
-
- /* get a handle on the hte */
- hte = &cm_core->connected_nodes;
-
- /* walk list and find cm_node associated with this session ID */
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_entry(cm_node, hte, list) {
- /* compare quad, return node handle if a match */
- nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n",
- cm_node->loc_addr, cm_node->loc_port,
- loc_addr, loc_port,
- cm_node->rem_addr, cm_node->rem_port,
- rem_addr, rem_port);
- if ((cm_node->loc_addr == loc_addr) &&
- (cm_node->loc_port == loc_port) &&
- (cm_node->rem_addr == rem_addr) &&
- (cm_node->rem_port == rem_port)) {
- add_ref_cm_node(cm_node);
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- return cm_node;
- }
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- /* no owner node */
- return NULL;
-}
-
-
-/**
- * find_listener - find a cm node listening on this addr-port pair
- */
-static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
- nes_addr_t dst_addr, u16 dst_port,
- enum nes_cm_listener_state listener_state)
-{
- unsigned long flags;
- struct nes_cm_listener *listen_node;
- nes_addr_t listen_addr;
- u16 listen_port;
-
- /* walk list and find cm_node associated with this session ID */
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- list_for_each_entry(listen_node, &cm_core->listen_list.list, list) {
- listen_addr = listen_node->loc_addr;
- listen_port = listen_node->loc_port;
-
- /* compare node pair, return node handle if a match */
- if (((listen_addr == dst_addr) ||
- listen_addr == 0x00000000) &&
- (listen_port == dst_port) &&
- (listener_state & listen_node->listener_state)) {
- atomic_inc(&listen_node->ref_count);
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- return listen_node;
- }
- }
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
-
- /* no listener */
- return NULL;
-}
-
-/**
- * add_hte_node - add a cm node to the hash table
- */
-static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
-{
- unsigned long flags;
- struct list_head *hte;
-
- if (!cm_node || !cm_core)
- return -EINVAL;
-
- nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n",
- cm_node);
-
- spin_lock_irqsave(&cm_core->ht_lock, flags);
-
- /* get a handle on the hash table element (list head for this slot) */
- hte = &cm_core->connected_nodes;
- list_add_tail(&cm_node->list, hte);
- atomic_inc(&cm_core->ht_node_cnt);
-
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
-
- return 0;
-}
-
-
-/**
- * mini_cm_dec_refcnt_listen
- */
-static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
- struct nes_cm_listener *listener, int free_hanging_nodes)
-{
- int ret = -EINVAL;
- int err = 0;
- unsigned long flags;
- struct list_head *list_pos = NULL;
- struct list_head *list_temp = NULL;
- struct nes_cm_node *cm_node = NULL;
- struct list_head reset_list;
-
- nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, "
- "refcnt=%d\n", listener, free_hanging_nodes,
- atomic_read(&listener->ref_count));
- /* free non-accelerated child nodes for this listener */
- INIT_LIST_HEAD(&reset_list);
- if (free_hanging_nodes) {
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- list_for_each_safe(list_pos, list_temp,
- &g_cm_core->connected_nodes) {
- cm_node = container_of(list_pos, struct nes_cm_node,
- list);
- if ((cm_node->listener == listener) &&
- (!cm_node->accelerated)) {
- add_ref_cm_node(cm_node);
- list_add(&cm_node->reset_entry, &reset_list);
- }
- }
- spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- }
-
- list_for_each_safe(list_pos, list_temp, &reset_list) {
- cm_node = container_of(list_pos, struct nes_cm_node,
- reset_entry);
- {
- struct nes_cm_node *loopback = cm_node->loopbackpartner;
- enum nes_cm_node_state old_state;
- if (NES_CM_STATE_FIN_WAIT1 <= cm_node->state) {
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- } else {
- if (!loopback) {
- cleanup_retrans_entry(cm_node);
- err = send_reset(cm_node, NULL);
- if (err) {
- cm_node->state =
- NES_CM_STATE_CLOSED;
- WARN_ON(1);
- } else {
- old_state = cm_node->state;
- cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
- if (old_state != NES_CM_STATE_MPAREQ_RCVD)
- rem_ref_cm_node(
- cm_node->cm_core,
- cm_node);
- }
- } else {
- struct nes_cm_event event;
-
- event.cm_node = loopback;
- event.cm_info.rem_addr =
- loopback->rem_addr;
- event.cm_info.loc_addr =
- loopback->loc_addr;
- event.cm_info.rem_port =
- loopback->rem_port;
- event.cm_info.loc_port =
- loopback->loc_port;
- event.cm_info.cm_id = loopback->cm_id;
- add_ref_cm_node(loopback);
- loopback->state = NES_CM_STATE_CLOSED;
- cm_event_connect_error(&event);
- cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
-
- rem_ref_cm_node(cm_node->cm_core,
- cm_node);
-
- }
- }
- }
- }
-
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- if (!atomic_dec_return(&listener->ref_count)) {
- list_del(&listener->list);
-
- /* decrement our listen node count */
- atomic_dec(&cm_core->listen_node_cnt);
-
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
-
- if (listener->nesvnic) {
- nes_manage_apbvt(listener->nesvnic,
- listener->loc_port,
- PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn),
- NES_MANAGE_APBVT_DEL);
-
- nes_debug(NES_DBG_NLMSG,
- "Delete APBVT loc_port = %04X\n",
- listener->loc_port);
- }
-
- nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener);
-
- kfree(listener);
- listener = NULL;
- ret = 0;
- atomic_inc(&cm_listens_destroyed);
- } else {
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- }
- if (listener) {
- if (atomic_read(&listener->pend_accepts_cnt) > 0)
- nes_debug(NES_DBG_CM, "destroying listener (%p)"
- " with non-zero pending accepts=%u\n",
- listener, atomic_read(&listener->pend_accepts_cnt));
- }
-
- return ret;
-}
-
-
-/**
- * mini_cm_del_listen
- */
-static int mini_cm_del_listen(struct nes_cm_core *cm_core,
- struct nes_cm_listener *listener)
-{
- listener->listener_state = NES_CM_LISTENER_PASSIVE_STATE;
- listener->cm_id = NULL; /* going to be destroyed pretty soon */
- return mini_cm_dec_refcnt_listen(cm_core, listener, 1);
-}
-
-
-/**
- * mini_cm_accelerated
- */
-static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
- struct nes_cm_node *cm_node)
-{
- cm_node->accelerated = 1;
-
- if (cm_node->accept_pend) {
- BUG_ON(!cm_node->listener);
- atomic_dec(&cm_node->listener->pend_accepts_cnt);
- cm_node->accept_pend = 0;
- BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
- }
-
- if (!timer_pending(&cm_core->tcp_timer))
- mod_timer(&cm_core->tcp_timer, (jiffies + NES_SHORT_TIME));
-
- return 0;
-}
-
-
-/**
- * nes_addr_resolve_neigh
- */
-static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpindex)
-{
- struct rtable *rt;
- struct neighbour *neigh;
- int rc = arpindex;
- struct net_device *netdev;
- struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
- __be32 dst_ipaddr = htonl(dst_ip);
-
- rt = ip_route_output(&init_net, dst_ipaddr, nesvnic->local_ipaddr, 0, 0);
- if (IS_ERR(rt)) {
- printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n",
- __func__, dst_ip);
- return rc;
- }
-
- if (netif_is_bond_slave(nesvnic->netdev))
- netdev = netdev_master_upper_dev_get(nesvnic->netdev);
- else
- netdev = nesvnic->netdev;
-
- neigh = dst_neigh_lookup(&rt->dst, &dst_ipaddr);
-
- rcu_read_lock();
- if (neigh) {
- if (neigh->nud_state & NUD_VALID) {
- nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X"
- " is %pM, Gateway is 0x%08X \n", dst_ip,
- neigh->ha, ntohl(rt->rt_gateway));
-
- if (arpindex >= 0) {
- if (ether_addr_equal(nesadapter->arp_table[arpindex].mac_addr, neigh->ha)) {
- /* Mac address same as in nes_arp_table */
- goto out;
- }
-
- nes_manage_arp_cache(nesvnic->netdev,
- nesadapter->arp_table[arpindex].mac_addr,
- dst_ip, NES_ARP_DELETE);
- }
-
- nes_manage_arp_cache(nesvnic->netdev, neigh->ha,
- dst_ip, NES_ARP_ADD);
- rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL,
- NES_ARP_RESOLVE);
- } else {
- neigh_event_send(neigh, NULL);
- }
- }
-out:
- rcu_read_unlock();
-
- if (neigh)
- neigh_release(neigh);
-
- ip_rt_put(rt);
- return rc;
-}
-
-/**
- * make_cm_node - create a new instance of a cm node
- */
-static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
- struct nes_vnic *nesvnic, struct nes_cm_info *cm_info,
- struct nes_cm_listener *listener)
-{
- struct nes_cm_node *cm_node;
- struct timespec ts;
- int oldarpindex = 0;
- int arpindex = 0;
- struct nes_device *nesdev;
- struct nes_adapter *nesadapter;
-
- /* create an hte and cm_node for this instance */
- cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC);
- if (!cm_node)
- return NULL;
-
- /* set our node specific transport info */
- if (listener) {
- cm_node->loc_addr = listener->loc_addr;
- cm_node->loc_port = listener->loc_port;
- } else {
- cm_node->loc_addr = cm_info->loc_addr;
- cm_node->loc_port = cm_info->loc_port;
- }
- cm_node->rem_addr = cm_info->rem_addr;
- cm_node->rem_port = cm_info->rem_port;
-
- cm_node->mpa_frame_rev = mpa_version;
- cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
- cm_node->mpav2_ird_ord = 0;
- cm_node->ird_size = 0;
- cm_node->ord_size = 0;
-
- nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n",
- &cm_node->loc_addr, cm_node->loc_port,
- &cm_node->rem_addr, cm_node->rem_port);
- cm_node->listener = listener;
- if (listener)
- cm_node->tos = listener->tos;
- cm_node->netdev = nesvnic->netdev;
- cm_node->cm_id = cm_info->cm_id;
- memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN);
-
- nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener,
- cm_node->cm_id);
-
- spin_lock_init(&cm_node->retrans_list_lock);
-
- cm_node->loopbackpartner = NULL;
- atomic_set(&cm_node->ref_count, 1);
- /* associate our parent CM core */
- cm_node->cm_core = cm_core;
- cm_node->tcp_cntxt.loc_id = NES_CM_DEF_LOCAL_ID;
- cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
- cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >>
- NES_CM_DEFAULT_RCV_WND_SCALE;
- ts = current_kernel_time();
- cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec);
- cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) -
- sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN;
- cm_node->tcp_cntxt.rcv_nxt = 0;
- /* get a unique session ID , add thread_id to an upcounter to handle race */
- atomic_inc(&cm_core->node_cnt);
- cm_node->conn_type = cm_info->conn_type;
- cm_node->apbvt_set = 0;
- cm_node->accept_pend = 0;
-
- cm_node->nesvnic = nesvnic;
- /* get some device handles, for arp lookup */
- nesdev = nesvnic->nesdev;
- nesadapter = nesdev->nesadapter;
-
- cm_node->loopbackpartner = NULL;
-
- /* get the mac addr for the remote node */
- oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr,
- NULL, NES_ARP_RESOLVE);
- arpindex = nes_addr_resolve_neigh(nesvnic, cm_node->rem_addr,
- oldarpindex);
- if (arpindex < 0) {
- kfree(cm_node);
- return NULL;
- }
-
- /* copy the mac addr to node context */
- memcpy(cm_node->rem_mac, nesadapter->arp_table[arpindex].mac_addr, ETH_ALEN);
- nes_debug(NES_DBG_CM, "Remote mac addr from arp table: %pM\n",
- cm_node->rem_mac);
-
- add_hte_node(cm_core, cm_node);
- atomic_inc(&cm_nodes_created);
-
- return cm_node;
-}
-
-
-/**
- * add_ref_cm_node - destroy an instance of a cm node
- */
-static int add_ref_cm_node(struct nes_cm_node *cm_node)
-{
- atomic_inc(&cm_node->ref_count);
- return 0;
-}
-
-
-/**
- * rem_ref_cm_node - destroy an instance of a cm node
- */
-static int rem_ref_cm_node(struct nes_cm_core *cm_core,
- struct nes_cm_node *cm_node)
-{
- unsigned long flags;
- struct nes_qp *nesqp;
-
- if (!cm_node)
- return -EINVAL;
-
- spin_lock_irqsave(&cm_node->cm_core->ht_lock, flags);
- if (atomic_dec_return(&cm_node->ref_count)) {
- spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
- return 0;
- }
- list_del(&cm_node->list);
- atomic_dec(&cm_core->ht_node_cnt);
- spin_unlock_irqrestore(&cm_node->cm_core->ht_lock, flags);
-
- /* if the node is destroyed before connection was accelerated */
- if (!cm_node->accelerated && cm_node->accept_pend) {
- BUG_ON(!cm_node->listener);
- atomic_dec(&cm_node->listener->pend_accepts_cnt);
- BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
- }
- WARN_ON(cm_node->send_entry);
- if (cm_node->recv_entry)
- handle_recv_entry(cm_node, 0);
- if (cm_node->listener) {
- mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0);
- } else {
- if (cm_node->apbvt_set && cm_node->nesvnic) {
- nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port,
- PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn),
- NES_MANAGE_APBVT_DEL);
- }
- nes_debug(NES_DBG_NLMSG, "Delete APBVT loc_port = %04X\n",
- cm_node->loc_port);
- }
-
- atomic_dec(&cm_core->node_cnt);
- atomic_inc(&cm_nodes_destroyed);
- nesqp = cm_node->nesqp;
- if (nesqp) {
- nesqp->cm_node = NULL;
- nes_rem_ref(&nesqp->ibqp);
- cm_node->nesqp = NULL;
- }
-
- kfree(cm_node);
- return 0;
-}
-
-/**
- * process_options
- */
-static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc,
- u32 optionsize, u32 syn_packet)
-{
- u32 tmp;
- u32 offset = 0;
- union all_known_options *all_options;
- char got_mss_option = 0;
-
- while (offset < optionsize) {
- all_options = (union all_known_options *)(optionsloc + offset);
- switch (all_options->as_base.optionnum) {
- case OPTION_NUMBER_END:
- offset = optionsize;
- break;
- case OPTION_NUMBER_NONE:
- offset += 1;
- continue;
- case OPTION_NUMBER_MSS:
- nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d "
- "Size: %d\n", __func__,
- all_options->as_mss.length, offset, optionsize);
- got_mss_option = 1;
- if (all_options->as_mss.length != 4) {
- return 1;
- } else {
- tmp = ntohs(all_options->as_mss.mss);
- if (tmp > 0 && tmp <
- cm_node->tcp_cntxt.mss)
- cm_node->tcp_cntxt.mss = tmp;
- }
- break;
- case OPTION_NUMBER_WINDOW_SCALE:
- cm_node->tcp_cntxt.snd_wscale =
- all_options->as_windowscale.shiftcount;
- break;
- default:
- nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n",
- all_options->as_base.optionnum);
- break;
- }
- offset += all_options->as_base.length;
- }
- if ((!got_mss_option) && (syn_packet))
- cm_node->tcp_cntxt.mss = NES_CM_DEFAULT_MSS;
- return 0;
-}
-
-static void drop_packet(struct sk_buff *skb)
-{
- atomic_inc(&cm_accel_dropped_pkts);
- dev_kfree_skb_any(skb);
-}
-
-static void handle_fin_pkt(struct nes_cm_node *cm_node)
-{
- nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
- "refcnt=%d\n", cm_node, cm_node->state,
- atomic_read(&cm_node->ref_count));
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_MPAREJ_RCVD:
- cm_node->tcp_cntxt.rcv_nxt++;
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_LAST_ACK;
- send_fin(cm_node, NULL);
- break;
- case NES_CM_STATE_MPAREQ_SENT:
- create_event(cm_node, NES_CM_EVENT_ABORTED);
- cm_node->tcp_cntxt.rcv_nxt++;
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_CLOSED;
- add_ref_cm_node(cm_node);
- send_reset(cm_node, NULL);
- break;
- case NES_CM_STATE_FIN_WAIT1:
- cm_node->tcp_cntxt.rcv_nxt++;
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_CLOSING;
- send_ack(cm_node, NULL);
- /* Wait for ACK as this is simultaneous close..
- * After we receive ACK, do not send anything..
- * Just rm the node.. Done.. */
- break;
- case NES_CM_STATE_FIN_WAIT2:
- cm_node->tcp_cntxt.rcv_nxt++;
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_TIME_WAIT;
- send_ack(cm_node, NULL);
- schedule_nes_timer(cm_node, NULL, NES_TIMER_TYPE_CLOSE, 1, 0);
- break;
- case NES_CM_STATE_TIME_WAIT:
- cm_node->tcp_cntxt.rcv_nxt++;
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_CLOSED;
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- break;
- case NES_CM_STATE_TSA:
- default:
- nes_debug(NES_DBG_CM, "Error Rcvd FIN for node-%p state = %d\n",
- cm_node, cm_node->state);
- break;
- }
-}
-
-
-static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
- struct tcphdr *tcph)
-{
-
- int reset = 0; /* whether to send reset in case of err.. */
- atomic_inc(&cm_resets_recvd);
- nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u."
- " refcnt=%d\n", cm_node, cm_node->state,
- atomic_read(&cm_node->ref_count));
- cleanup_retrans_entry(cm_node);
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_MPAREQ_SENT:
- nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
- "listener=%p state=%d\n", __func__, __LINE__, cm_node,
- cm_node->listener, cm_node->state);
- switch (cm_node->mpa_frame_rev) {
- case IETF_MPA_V2:
- cm_node->mpa_frame_rev = IETF_MPA_V1;
- /* send a syn and goto syn sent state */
- cm_node->state = NES_CM_STATE_SYN_SENT;
- if (send_syn(cm_node, 0, NULL)) {
- active_open_err(cm_node, skb, reset);
- }
- break;
- case IETF_MPA_V1:
- default:
- active_open_err(cm_node, skb, reset);
- break;
- }
- break;
- case NES_CM_STATE_MPAREQ_RCVD:
- atomic_inc(&cm_node->passive_state);
- dev_kfree_skb_any(skb);
- break;
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_LISTENING:
- nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__);
- passive_open_err(cm_node, skb, reset);
- break;
- case NES_CM_STATE_TSA:
- active_open_err(cm_node, skb, reset);
- break;
- case NES_CM_STATE_CLOSED:
- drop_packet(skb);
- break;
- case NES_CM_STATE_FIN_WAIT2:
- case NES_CM_STATE_FIN_WAIT1:
- case NES_CM_STATE_LAST_ACK:
- cm_node->cm_id->rem_ref(cm_node->cm_id);
- case NES_CM_STATE_TIME_WAIT:
- cm_node->state = NES_CM_STATE_CLOSED;
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- drop_packet(skb);
- break;
- default:
- drop_packet(skb);
- break;
- }
-}
-
-
-static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb)
-{
- int ret = 0;
- int datasize = skb->len;
- u8 *dataloc = skb->data;
-
- enum nes_cm_event_type type = NES_CM_EVENT_UNKNOWN;
- u32 res_type;
-
- ret = parse_mpa(cm_node, dataloc, &res_type, datasize);
- if (ret) {
- nes_debug(NES_DBG_CM, "didn't like MPA Request\n");
- if (cm_node->state == NES_CM_STATE_MPAREQ_SENT) {
- nes_debug(NES_DBG_CM, "%s[%u] create abort for "
- "cm_node=%p listener=%p state=%d\n", __func__,
- __LINE__, cm_node, cm_node->listener,
- cm_node->state);
- active_open_err(cm_node, skb, 1);
- } else {
- passive_open_err(cm_node, skb, 1);
- }
- return;
- }
-
- switch (cm_node->state) {
- case NES_CM_STATE_ESTABLISHED:
- if (res_type == NES_MPA_REQUEST_REJECT)
- /*BIG problem as we are receiving the MPA.. So should
- * not be REJECT.. This is Passive Open.. We can
- * only receive it Reject for Active Open...*/
- WARN_ON(1);
- cm_node->state = NES_CM_STATE_MPAREQ_RCVD;
- type = NES_CM_EVENT_MPA_REQ;
- atomic_set(&cm_node->passive_state,
- NES_PASSIVE_STATE_INDICATED);
- break;
- case NES_CM_STATE_MPAREQ_SENT:
- cleanup_retrans_entry(cm_node);
- if (res_type == NES_MPA_REQUEST_REJECT) {
- type = NES_CM_EVENT_MPA_REJECT;
- cm_node->state = NES_CM_STATE_MPAREJ_RCVD;
- } else {
- type = NES_CM_EVENT_CONNECTED;
- cm_node->state = NES_CM_STATE_TSA;
- }
-
- break;
- default:
- WARN_ON(1);
- break;
- }
- dev_kfree_skb_any(skb);
- create_event(cm_node, type);
-}
-
-static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb)
-{
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_MPAREQ_SENT:
- nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
- "listener=%p state=%d\n", __func__, __LINE__, cm_node,
- cm_node->listener, cm_node->state);
- active_open_err(cm_node, skb, 1);
- break;
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_SYN_RCVD:
- passive_open_err(cm_node, skb, 1);
- break;
- case NES_CM_STATE_TSA:
- default:
- drop_packet(skb);
- }
-}
-
-static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph,
- struct sk_buff *skb)
-{
- int err;
-
- err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num)) ? 0 : 1;
- if (err)
- active_open_err(cm_node, skb, 1);
-
- return err;
-}
-
-static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph,
- struct sk_buff *skb)
-{
- int err = 0;
- u32 seq;
- u32 ack_seq;
- u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
- u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
- u32 rcv_wnd;
-
- seq = ntohl(tcph->seq);
- ack_seq = ntohl(tcph->ack_seq);
- rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
- if (ack_seq != loc_seq_num)
- err = 1;
- else if (!between(seq, rcv_nxt, (rcv_nxt + rcv_wnd)))
- err = 1;
- if (err) {
- nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
- "listener=%p state=%d\n", __func__, __LINE__, cm_node,
- cm_node->listener, cm_node->state);
- indicate_pkt_err(cm_node, skb);
- nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X "
- "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt,
- rcv_wnd);
- }
- return err;
-}
-
-/*
- * handle_syn_pkt() is for Passive node. The syn packet is received when a node
- * is created with a listener or it may comein as rexmitted packet which in
- * that case will be just dropped.
- */
-static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
- struct tcphdr *tcph)
-{
- int ret;
- u32 inc_sequence;
- int optionsize;
-
- optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
- skb_trim(skb, 0);
- inc_sequence = ntohl(tcph->seq);
-
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_MPAREQ_SENT:
- /* Rcvd syn on active open connection*/
- active_open_err(cm_node, skb, 1);
- break;
- case NES_CM_STATE_LISTENING:
- /* Passive OPEN */
- if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
- cm_node->listener->backlog) {
- nes_debug(NES_DBG_CM, "drop syn due to backlog "
- "pressure \n");
- cm_backlog_drops++;
- passive_open_err(cm_node, skb, 0);
- break;
- }
- ret = handle_tcp_options(cm_node, tcph, skb, optionsize,
- 1);
- if (ret) {
- passive_open_err(cm_node, skb, 0);
- /* drop pkt */
- break;
- }
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
- BUG_ON(cm_node->send_entry);
- cm_node->accept_pend = 1;
- atomic_inc(&cm_node->listener->pend_accepts_cnt);
-
- cm_node->state = NES_CM_STATE_SYN_RCVD;
- send_syn(cm_node, 1, skb);
- break;
- case NES_CM_STATE_CLOSED:
- cleanup_retrans_entry(cm_node);
- add_ref_cm_node(cm_node);
- send_reset(cm_node, skb);
- break;
- case NES_CM_STATE_TSA:
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_FIN_WAIT1:
- case NES_CM_STATE_FIN_WAIT2:
- case NES_CM_STATE_MPAREQ_RCVD:
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_CLOSING:
- case NES_CM_STATE_UNKNOWN:
- default:
- drop_packet(skb);
- break;
- }
-}
-
-static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
- struct tcphdr *tcph)
-{
- int ret;
- u32 inc_sequence;
- int optionsize;
-
- optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
- skb_trim(skb, 0);
- inc_sequence = ntohl(tcph->seq);
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_SENT:
- cleanup_retrans_entry(cm_node);
- /* active open */
- if (check_syn(cm_node, tcph, skb))
- return;
- cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
- /* setup options */
- ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0);
- if (ret) {
- nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n",
- cm_node);
- break;
- }
- cleanup_retrans_entry(cm_node);
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
- send_mpa_request(cm_node, skb);
- cm_node->state = NES_CM_STATE_MPAREQ_SENT;
- break;
- case NES_CM_STATE_MPAREQ_RCVD:
- /* passive open, so should not be here */
- passive_open_err(cm_node, skb, 1);
- break;
- case NES_CM_STATE_LISTENING:
- cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_CLOSED;
- send_reset(cm_node, skb);
- break;
- case NES_CM_STATE_CLOSED:
- cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
- cleanup_retrans_entry(cm_node);
- add_ref_cm_node(cm_node);
- send_reset(cm_node, skb);
- break;
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_FIN_WAIT1:
- case NES_CM_STATE_FIN_WAIT2:
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_TSA:
- case NES_CM_STATE_CLOSING:
- case NES_CM_STATE_UNKNOWN:
- case NES_CM_STATE_MPAREQ_SENT:
- default:
- drop_packet(skb);
- break;
- }
-}
-
-static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
- struct tcphdr *tcph)
-{
- int datasize = 0;
- u32 inc_sequence;
- int ret = 0;
- int optionsize;
-
- optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
-
- if (check_seq(cm_node, tcph, skb))
- return -EINVAL;
-
- skb_pull(skb, tcph->doff << 2);
- inc_sequence = ntohl(tcph->seq);
- datasize = skb->len;
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_RCVD:
- /* Passive OPEN */
- cleanup_retrans_entry(cm_node);
- ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1);
- if (ret)
- break;
- cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
- cm_node->state = NES_CM_STATE_ESTABLISHED;
- if (datasize) {
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
- handle_rcv_mpa(cm_node, skb);
- } else { /* rcvd ACK only */
- dev_kfree_skb_any(skb);
- }
- break;
- case NES_CM_STATE_ESTABLISHED:
- /* Passive OPEN */
- cleanup_retrans_entry(cm_node);
- if (datasize) {
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
- handle_rcv_mpa(cm_node, skb);
- } else {
- drop_packet(skb);
- }
- break;
- case NES_CM_STATE_MPAREQ_SENT:
- cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
- if (datasize) {
- cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
- handle_rcv_mpa(cm_node, skb);
- } else { /* Could be just an ack pkt.. */
- dev_kfree_skb_any(skb);
- }
- break;
- case NES_CM_STATE_LISTENING:
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_CLOSED;
- send_reset(cm_node, skb);
- break;
- case NES_CM_STATE_CLOSED:
- cleanup_retrans_entry(cm_node);
- add_ref_cm_node(cm_node);
- send_reset(cm_node, skb);
- break;
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_CLOSING:
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_CLOSED;
- cm_node->cm_id->rem_ref(cm_node->cm_id);
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- drop_packet(skb);
- break;
- case NES_CM_STATE_FIN_WAIT1:
- cleanup_retrans_entry(cm_node);
- drop_packet(skb);
- cm_node->state = NES_CM_STATE_FIN_WAIT2;
- break;
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_FIN_WAIT2:
- case NES_CM_STATE_TSA:
- case NES_CM_STATE_MPAREQ_RCVD:
- case NES_CM_STATE_UNKNOWN:
- default:
- cleanup_retrans_entry(cm_node);
- drop_packet(skb);
- break;
- }
- return ret;
-}
-
-
-
-static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
- struct sk_buff *skb, int optionsize, int passive)
-{
- u8 *optionsloc = (u8 *)&tcph[1];
-
- if (optionsize) {
- if (process_options(cm_node, optionsloc, optionsize,
- (u32)tcph->syn)) {
- nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n",
- __func__, cm_node);
- if (passive)
- passive_open_err(cm_node, skb, 1);
- else
- active_open_err(cm_node, skb, 1);
- return 1;
- }
- }
-
- cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
- cm_node->tcp_cntxt.snd_wscale;
-
- if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
- cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
- return 0;
-}
-
-/*
- * active_open_err() will send reset() if flag set..
- * It will also send ABORT event.
- */
-static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
- int reset)
-{
- cleanup_retrans_entry(cm_node);
- if (reset) {
- nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, "
- "state=%d\n", cm_node, cm_node->state);
- add_ref_cm_node(cm_node);
- send_reset(cm_node, skb);
- } else {
- dev_kfree_skb_any(skb);
- }
-
- cm_node->state = NES_CM_STATE_CLOSED;
- create_event(cm_node, NES_CM_EVENT_ABORTED);
-}
-
-/*
- * passive_open_err() will either do a reset() or will free up the skb and
- * remove the cm_node.
- */
-static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
- int reset)
-{
- cleanup_retrans_entry(cm_node);
- cm_node->state = NES_CM_STATE_CLOSED;
- if (reset) {
- nes_debug(NES_DBG_CM, "passive_open_err sending RST for "
- "cm_node=%p state =%d\n", cm_node, cm_node->state);
- send_reset(cm_node, skb);
- } else {
- dev_kfree_skb_any(skb);
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- }
-}
-
-/*
- * free_retrans_entry() routines assumes that the retrans_list_lock has
- * been acquired before calling.
- */
-static void free_retrans_entry(struct nes_cm_node *cm_node)
-{
- struct nes_timer_entry *send_entry;
-
- send_entry = cm_node->send_entry;
- if (send_entry) {
- cm_node->send_entry = NULL;
- dev_kfree_skb_any(send_entry->skb);
- kfree(send_entry);
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- }
-}
-
-static void cleanup_retrans_entry(struct nes_cm_node *cm_node)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
- free_retrans_entry(cm_node);
- spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
-}
-
-/**
- * process_packet
- * Returns skb if to be freed, else it will return NULL if already used..
- */
-static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
- struct nes_cm_core *cm_core)
-{
- enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN;
- struct tcphdr *tcph = tcp_hdr(skb);
- u32 fin_set = 0;
- int ret = 0;
-
- skb_pull(skb, ip_hdr(skb)->ihl << 2);
-
- nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d "
- "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn,
- tcph->ack, tcph->rst, tcph->fin);
-
- if (tcph->rst) {
- pkt_type = NES_PKT_TYPE_RST;
- } else if (tcph->syn) {
- pkt_type = NES_PKT_TYPE_SYN;
- if (tcph->ack)
- pkt_type = NES_PKT_TYPE_SYNACK;
- } else if (tcph->ack) {
- pkt_type = NES_PKT_TYPE_ACK;
- }
- if (tcph->fin)
- fin_set = 1;
-
- switch (pkt_type) {
- case NES_PKT_TYPE_SYN:
- handle_syn_pkt(cm_node, skb, tcph);
- break;
- case NES_PKT_TYPE_SYNACK:
- handle_synack_pkt(cm_node, skb, tcph);
- break;
- case NES_PKT_TYPE_ACK:
- ret = handle_ack_pkt(cm_node, skb, tcph);
- if (fin_set && !ret)
- handle_fin_pkt(cm_node);
- break;
- case NES_PKT_TYPE_RST:
- handle_rst_pkt(cm_node, skb, tcph);
- break;
- default:
- if ((fin_set) && (!check_seq(cm_node, tcph, skb)))
- handle_fin_pkt(cm_node);
- drop_packet(skb);
- break;
- }
-}
-
-/**
- * mini_cm_listen - create a listen node with params
- */
-static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
- struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
-{
- struct nes_cm_listener *listener;
- unsigned long flags;
-
- nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n",
- cm_info->loc_addr, cm_info->loc_port);
-
- /* cannot have multiple matching listeners */
- listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port,
- NES_CM_LISTENER_EITHER_STATE);
-
- if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) {
- /* find automatically incs ref count ??? */
- atomic_dec(&listener->ref_count);
- nes_debug(NES_DBG_CM, "Not creating listener since it already exists\n");
- return NULL;
- }
-
- if (!listener) {
- /* create a CM listen node (1/2 node to compare incoming traffic to) */
- listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
- if (!listener)
- return NULL;
-
- listener->loc_addr = cm_info->loc_addr;
- listener->loc_port = cm_info->loc_port;
- listener->reused_node = 0;
-
- atomic_set(&listener->ref_count, 1);
- }
- /* pasive case */
- /* find already inc'ed the ref count */
- else {
- listener->reused_node = 1;
- }
-
- listener->cm_id = cm_info->cm_id;
- atomic_set(&listener->pend_accepts_cnt, 0);
- listener->cm_core = cm_core;
- listener->nesvnic = nesvnic;
- atomic_inc(&cm_core->node_cnt);
-
- listener->conn_type = cm_info->conn_type;
- listener->backlog = cm_info->backlog;
- listener->listener_state = NES_CM_LISTENER_ACTIVE_STATE;
-
- if (!listener->reused_node) {
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- list_add(&listener->list, &cm_core->listen_list.list);
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- atomic_inc(&cm_core->listen_node_cnt);
- }
-
- nes_debug(NES_DBG_CM, "Api - listen(): addr=0x%08X, port=0x%04x,"
- " listener = %p, backlog = %d, cm_id = %p.\n",
- cm_info->loc_addr, cm_info->loc_port,
- listener, listener->backlog, listener->cm_id);
-
- return listener;
-}
-
-
-/**
- * mini_cm_connect - make a connection node with params
- */
-static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
- struct nes_vnic *nesvnic, u16 private_data_len,
- void *private_data, struct nes_cm_info *cm_info)
-{
- int ret = 0;
- struct nes_cm_node *cm_node;
- struct nes_cm_listener *loopbackremotelistener;
- struct nes_cm_node *loopbackremotenode;
- struct nes_cm_info loopback_cm_info;
- u8 *start_buff;
-
- /* create a CM connection node */
- cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL);
- if (!cm_node)
- return NULL;
-
- /* set our node side to client (active) side */
- cm_node->tcp_cntxt.client = 1;
- cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
-
- if (cm_info->loc_addr == cm_info->rem_addr) {
- loopbackremotelistener = find_listener(cm_core,
- cm_node->loc_addr, cm_node->rem_port,
- NES_CM_LISTENER_ACTIVE_STATE);
- if (loopbackremotelistener == NULL) {
- create_event(cm_node, NES_CM_EVENT_ABORTED);
- } else {
- loopback_cm_info = *cm_info;
- loopback_cm_info.loc_port = cm_info->rem_port;
- loopback_cm_info.rem_port = cm_info->loc_port;
- loopback_cm_info.loc_port =
- cm_info->rem_port;
- loopback_cm_info.rem_port =
- cm_info->loc_port;
- loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
- loopbackremotenode = make_cm_node(cm_core, nesvnic,
- &loopback_cm_info, loopbackremotelistener);
- if (!loopbackremotenode) {
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- return NULL;
- }
- atomic_inc(&cm_loopbacks);
- loopbackremotenode->loopbackpartner = cm_node;
- loopbackremotenode->tcp_cntxt.rcv_wscale =
- NES_CM_DEFAULT_RCV_WND_SCALE;
- cm_node->loopbackpartner = loopbackremotenode;
- memcpy(loopbackremotenode->mpa_frame_buf, private_data,
- private_data_len);
- loopbackremotenode->mpa_frame_size = private_data_len;
-
- /* we are done handling this state. */
- /* set node to a TSA state */
- cm_node->state = NES_CM_STATE_TSA;
- cm_node->tcp_cntxt.rcv_nxt =
- loopbackremotenode->tcp_cntxt.loc_seq_num;
- loopbackremotenode->tcp_cntxt.rcv_nxt =
- cm_node->tcp_cntxt.loc_seq_num;
- cm_node->tcp_cntxt.max_snd_wnd =
- loopbackremotenode->tcp_cntxt.rcv_wnd;
- loopbackremotenode->tcp_cntxt.max_snd_wnd =
- cm_node->tcp_cntxt.rcv_wnd;
- cm_node->tcp_cntxt.snd_wnd =
- loopbackremotenode->tcp_cntxt.rcv_wnd;
- loopbackremotenode->tcp_cntxt.snd_wnd =
- cm_node->tcp_cntxt.rcv_wnd;
- cm_node->tcp_cntxt.snd_wscale =
- loopbackremotenode->tcp_cntxt.rcv_wscale;
- loopbackremotenode->tcp_cntxt.snd_wscale =
- cm_node->tcp_cntxt.rcv_wscale;
- loopbackremotenode->state = NES_CM_STATE_MPAREQ_RCVD;
- create_event(loopbackremotenode, NES_CM_EVENT_MPA_REQ);
- }
- return cm_node;
- }
-
- start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2);
- cm_node->mpa_frame_size = private_data_len;
-
- memcpy(start_buff, private_data, private_data_len);
-
- /* send a syn and goto syn sent state */
- cm_node->state = NES_CM_STATE_SYN_SENT;
- ret = send_syn(cm_node, 0, NULL);
-
- if (ret) {
- /* error in sending the syn free up the cm_node struct */
- nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest "
- "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n",
- cm_node->rem_addr, cm_node->rem_port, cm_node,
- cm_node->cm_id);
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- cm_node = NULL;
- }
-
- if (cm_node) {
- nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X,"
- "port=0x%04x, cm_node=%p, cm_id = %p.\n",
- cm_node->rem_addr, cm_node->rem_port, cm_node,
- cm_node->cm_id);
- }
-
- return cm_node;
-}
-
-
-/**
- * mini_cm_accept - accept a connection
- * This function is never called
- */
-static int mini_cm_accept(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
-{
- return 0;
-}
-
-
-/**
- * mini_cm_reject - reject and teardown a connection
- */
-static int mini_cm_reject(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
-{
- int ret = 0;
- int err = 0;
- int passive_state;
- struct nes_cm_event event;
- struct iw_cm_id *cm_id = cm_node->cm_id;
- struct nes_cm_node *loopback = cm_node->loopbackpartner;
-
- nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n",
- __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state);
-
- if (cm_node->tcp_cntxt.client)
- return ret;
- cleanup_retrans_entry(cm_node);
-
- if (!loopback) {
- passive_state = atomic_add_return(1, &cm_node->passive_state);
- if (passive_state == NES_SEND_RESET_EVENT) {
- cm_node->state = NES_CM_STATE_CLOSED;
- rem_ref_cm_node(cm_core, cm_node);
- } else {
- if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
- rem_ref_cm_node(cm_core, cm_node);
- } else {
- ret = send_mpa_reject(cm_node);
- if (ret) {
- cm_node->state = NES_CM_STATE_CLOSED;
- err = send_reset(cm_node, NULL);
- if (err)
- WARN_ON(1);
- } else {
- cm_id->add_ref(cm_id);
- }
- }
- }
- } else {
- cm_node->cm_id = NULL;
- if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
- rem_ref_cm_node(cm_core, cm_node);
- rem_ref_cm_node(cm_core, loopback);
- } else {
- event.cm_node = loopback;
- event.cm_info.rem_addr = loopback->rem_addr;
- event.cm_info.loc_addr = loopback->loc_addr;
- event.cm_info.rem_port = loopback->rem_port;
- event.cm_info.loc_port = loopback->loc_port;
- event.cm_info.cm_id = loopback->cm_id;
- cm_event_mpa_reject(&event);
- rem_ref_cm_node(cm_core, cm_node);
- loopback->state = NES_CM_STATE_CLOSING;
-
- cm_id = loopback->cm_id;
- rem_ref_cm_node(cm_core, loopback);
- cm_id->rem_ref(cm_id);
- }
- }
-
- return ret;
-}
-
-
-/**
- * mini_cm_close
- */
-static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
-{
- int ret = 0;
-
- if (!cm_core || !cm_node)
- return -EINVAL;
-
- switch (cm_node->state) {
- case NES_CM_STATE_SYN_RCVD:
- case NES_CM_STATE_SYN_SENT:
- case NES_CM_STATE_ONE_SIDE_ESTABLISHED:
- case NES_CM_STATE_ESTABLISHED:
- case NES_CM_STATE_ACCEPTING:
- case NES_CM_STATE_MPAREQ_SENT:
- case NES_CM_STATE_MPAREQ_RCVD:
- cleanup_retrans_entry(cm_node);
- send_reset(cm_node, NULL);
- break;
- case NES_CM_STATE_CLOSE_WAIT:
- cm_node->state = NES_CM_STATE_LAST_ACK;
- send_fin(cm_node, NULL);
- break;
- case NES_CM_STATE_FIN_WAIT1:
- case NES_CM_STATE_FIN_WAIT2:
- case NES_CM_STATE_LAST_ACK:
- case NES_CM_STATE_TIME_WAIT:
- case NES_CM_STATE_CLOSING:
- ret = -1;
- break;
- case NES_CM_STATE_LISTENING:
- cleanup_retrans_entry(cm_node);
- send_reset(cm_node, NULL);
- break;
- case NES_CM_STATE_MPAREJ_RCVD:
- case NES_CM_STATE_UNKNOWN:
- case NES_CM_STATE_INITED:
- case NES_CM_STATE_CLOSED:
- case NES_CM_STATE_LISTENER_DESTROYED:
- ret = rem_ref_cm_node(cm_core, cm_node);
- break;
- case NES_CM_STATE_TSA:
- if (cm_node->send_entry)
- printk(KERN_ERR "ERROR Close got called from STATE_TSA "
- "send_entry=%p\n", cm_node->send_entry);
- ret = rem_ref_cm_node(cm_core, cm_node);
- break;
- }
- return ret;
-}
-
-
-/**
- * recv_pkt - recv an ETHERNET packet, and process it through CM
- * node state machine
- */
-static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
- struct nes_vnic *nesvnic, struct sk_buff *skb)
-{
- struct nes_cm_node *cm_node = NULL;
- struct nes_cm_listener *listener = NULL;
- struct iphdr *iph;
- struct tcphdr *tcph;
- struct nes_cm_info nfo;
- int skb_handled = 1;
- __be32 tmp_daddr, tmp_saddr;
-
- if (!skb)
- return 0;
- if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr))
- return 0;
-
- iph = (struct iphdr *)skb->data;
- tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr));
-
- nfo.loc_addr = ntohl(iph->daddr);
- nfo.loc_port = ntohs(tcph->dest);
- nfo.rem_addr = ntohl(iph->saddr);
- nfo.rem_port = ntohs(tcph->source);
-
- tmp_daddr = cpu_to_be32(iph->daddr);
- tmp_saddr = cpu_to_be32(iph->saddr);
-
- nes_debug(NES_DBG_CM, "Received packet: dest=%pI4:0x%04X src=%pI4:0x%04X\n",
- &tmp_daddr, tcph->dest, &tmp_saddr, tcph->source);
-
- do {
- cm_node = find_node(cm_core,
- nfo.rem_port, nfo.rem_addr,
- nfo.loc_port, nfo.loc_addr);
-
- if (!cm_node) {
- /* Only type of packet accepted are for */
- /* the PASSIVE open (syn only) */
- if ((!tcph->syn) || (tcph->ack)) {
- skb_handled = 0;
- break;
- }
- listener = find_listener(cm_core, nfo.loc_addr,
- nfo.loc_port,
- NES_CM_LISTENER_ACTIVE_STATE);
- if (!listener) {
- nfo.cm_id = NULL;
- nfo.conn_type = 0;
- nes_debug(NES_DBG_CM, "Unable to find listener for the pkt\n");
- skb_handled = 0;
- break;
- }
- nfo.cm_id = listener->cm_id;
- nfo.conn_type = listener->conn_type;
- cm_node = make_cm_node(cm_core, nesvnic, &nfo,
- listener);
- if (!cm_node) {
- nes_debug(NES_DBG_CM, "Unable to allocate "
- "node\n");
- cm_packets_dropped++;
- atomic_dec(&listener->ref_count);
- dev_kfree_skb_any(skb);
- break;
- }
- if (!tcph->rst && !tcph->fin) {
- cm_node->state = NES_CM_STATE_LISTENING;
- } else {
- cm_packets_dropped++;
- rem_ref_cm_node(cm_core, cm_node);
- dev_kfree_skb_any(skb);
- break;
- }
- add_ref_cm_node(cm_node);
- } else if (cm_node->state == NES_CM_STATE_TSA) {
- if (cm_node->nesqp->pau_mode)
- nes_queue_mgt_skbs(skb, nesvnic, cm_node->nesqp);
- else {
- rem_ref_cm_node(cm_core, cm_node);
- atomic_inc(&cm_accel_dropped_pkts);
- dev_kfree_skb_any(skb);
- }
- break;
- }
- skb_reset_network_header(skb);
- skb_set_transport_header(skb, sizeof(*tcph));
- skb->len = ntohs(iph->tot_len);
- process_packet(cm_node, skb, cm_core);
- rem_ref_cm_node(cm_core, cm_node);
- } while (0);
- return skb_handled;
-}
-
-
-/**
- * nes_cm_alloc_core - allocate a top level instance of a cm core
- */
-static struct nes_cm_core *nes_cm_alloc_core(void)
-{
- struct nes_cm_core *cm_core;
-
- /* setup the CM core */
- /* alloc top level core control structure */
- cm_core = kzalloc(sizeof(*cm_core), GFP_KERNEL);
- if (!cm_core)
- return NULL;
-
- INIT_LIST_HEAD(&cm_core->connected_nodes);
- init_timer(&cm_core->tcp_timer);
- cm_core->tcp_timer.function = nes_cm_timer_tick;
-
- cm_core->mtu = NES_CM_DEFAULT_MTU;
- cm_core->state = NES_CM_STATE_INITED;
- cm_core->free_tx_pkt_max = NES_CM_DEFAULT_FREE_PKTS;
-
- atomic_set(&cm_core->events_posted, 0);
-
- cm_core->api = &nes_cm_api;
-
- spin_lock_init(&cm_core->ht_lock);
- spin_lock_init(&cm_core->listen_list_lock);
-
- INIT_LIST_HEAD(&cm_core->listen_list.list);
-
- nes_debug(NES_DBG_CM, "Init CM Core completed -- cm_core=%p\n", cm_core);
-
- nes_debug(NES_DBG_CM, "Enable QUEUE EVENTS\n");
- cm_core->event_wq = alloc_ordered_workqueue("nesewq", 0);
- if (!cm_core->event_wq)
- goto out_free_cmcore;
- cm_core->post_event = nes_cm_post_event;
- nes_debug(NES_DBG_CM, "Enable QUEUE DISCONNECTS\n");
- cm_core->disconn_wq = alloc_ordered_workqueue("nesdwq", 0);
- if (!cm_core->disconn_wq)
- goto out_free_wq;
-
- print_core(cm_core);
- return cm_core;
-
-out_free_wq:
- destroy_workqueue(cm_core->event_wq);
-out_free_cmcore:
- kfree(cm_core);
- return NULL;
-}
-
-
-/**
- * mini_cm_dealloc_core - deallocate a top level instance of a cm core
- */
-static int mini_cm_dealloc_core(struct nes_cm_core *cm_core)
-{
- nes_debug(NES_DBG_CM, "De-Alloc CM Core (%p)\n", cm_core);
-
- if (!cm_core)
- return -EINVAL;
-
- barrier();
-
- if (timer_pending(&cm_core->tcp_timer))
- del_timer(&cm_core->tcp_timer);
-
- destroy_workqueue(cm_core->event_wq);
- destroy_workqueue(cm_core->disconn_wq);
- nes_debug(NES_DBG_CM, "\n");
- kfree(cm_core);
-
- return 0;
-}
-
-
-/**
- * mini_cm_get
- */
-static int mini_cm_get(struct nes_cm_core *cm_core)
-{
- return cm_core->state;
-}
-
-
-/**
- * mini_cm_set
- */
-static int mini_cm_set(struct nes_cm_core *cm_core, u32 type, u32 value)
-{
- int ret = 0;
-
- switch (type) {
- case NES_CM_SET_PKT_SIZE:
- cm_core->mtu = value;
- break;
- case NES_CM_SET_FREE_PKT_Q_SIZE:
- cm_core->free_tx_pkt_max = value;
- break;
- default:
- /* unknown set option */
- ret = -EINVAL;
- }
-
- return ret;
-}
-
-
-/**
- * nes_cm_init_tsa_conn setup HW; MPA frames must be
- * successfully exchanged when this is called
- */
-static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_node)
-{
- int ret = 0;
-
- if (!nesqp)
- return -EINVAL;
-
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_IPV4 |
- NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG |
- NES_QPCONTEXT_MISC_DROS);
-
- if (cm_node->tcp_cntxt.snd_wscale || cm_node->tcp_cntxt.rcv_wscale)
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WSCALE);
-
- nesqp->nesqp_context->misc2 |= cpu_to_le32(64 << NES_QPCONTEXT_MISC2_TTL_SHIFT);
-
- nesqp->nesqp_context->misc2 |= cpu_to_le32(
- cm_node->tos << NES_QPCONTEXT_MISC2_TOS_SHIFT);
-
- nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16);
-
- nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32(
- (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT);
-
- nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
- (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) &
- NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK);
-
- nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
- (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) &
- NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK);
-
- nesqp->nesqp_context->keepalive = cpu_to_le32(0x80);
- nesqp->nesqp_context->ts_recent = 0;
- nesqp->nesqp_context->ts_age = 0;
- nesqp->nesqp_context->snd_nxt = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
- nesqp->nesqp_context->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd);
- nesqp->nesqp_context->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
- nesqp->nesqp_context->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd <<
- cm_node->tcp_cntxt.rcv_wscale);
- nesqp->nesqp_context->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
- nesqp->nesqp_context->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
- nesqp->nesqp_context->srtt = 0;
- nesqp->nesqp_context->rttvar = cpu_to_le32(0x6);
- nesqp->nesqp_context->ssthresh = cpu_to_le32(0x3FFFC000);
- nesqp->nesqp_context->cwnd = cpu_to_le32(2 * cm_node->tcp_cntxt.mss);
- nesqp->nesqp_context->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
- nesqp->nesqp_context->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
- nesqp->nesqp_context->max_snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd);
-
- nes_debug(NES_DBG_CM, "QP%u: rcv_nxt = 0x%08X, snd_nxt = 0x%08X,"
- " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n",
- nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
- le32_to_cpu(nesqp->nesqp_context->snd_nxt),
- cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale),
- le32_to_cpu(nesqp->nesqp_context->rcv_wnd),
- le32_to_cpu(nesqp->nesqp_context->misc));
- nes_debug(NES_DBG_CM, " snd_wnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->snd_wnd));
- nes_debug(NES_DBG_CM, " snd_cwnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->cwnd));
- nes_debug(NES_DBG_CM, " max_swnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->max_snd_wnd));
-
- nes_debug(NES_DBG_CM, "Change cm_node state to TSA\n");
- cm_node->state = NES_CM_STATE_TSA;
-
- return ret;
-}
-
-
-/**
- * nes_cm_disconn
- */
-int nes_cm_disconn(struct nes_qp *nesqp)
-{
- struct disconn_work *work;
-
- work = kzalloc(sizeof *work, GFP_ATOMIC);
- if (!work)
- return -ENOMEM; /* Timer will clean up */
-
- nes_add_ref(&nesqp->ibqp);
- work->nesqp = nesqp;
- INIT_WORK(&work->work, nes_disconnect_worker);
- queue_work(g_cm_core->disconn_wq, &work->work);
- return 0;
-}
-
-
-/**
- * nes_disconnect_worker
- */
-static void nes_disconnect_worker(struct work_struct *work)
-{
- struct disconn_work *dwork = container_of(work, struct disconn_work, work);
- struct nes_qp *nesqp = dwork->nesqp;
-
- kfree(dwork);
- nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n",
- nesqp->last_aeq, nesqp->hwqp.qp_id);
- nes_cm_disconn_true(nesqp);
- nes_rem_ref(&nesqp->ibqp);
-}
-
-
-/**
- * nes_cm_disconn_true
- */
-static int nes_cm_disconn_true(struct nes_qp *nesqp)
-{
- unsigned long flags;
- int ret = 0;
- struct iw_cm_id *cm_id;
- struct iw_cm_event cm_event;
- struct nes_vnic *nesvnic;
- u16 last_ae;
- u8 original_hw_tcp_state;
- u8 original_ibqp_state;
- int disconn_status = 0;
- int issue_disconn = 0;
- int issue_close = 0;
- int issue_flush = 0;
- u32 flush_q = NES_CQP_FLUSH_RQ;
- struct ib_event ibevent;
-
- if (!nesqp) {
- nes_debug(NES_DBG_CM, "disconnect_worker nesqp is NULL\n");
- return -1;
- }
-
- spin_lock_irqsave(&nesqp->lock, flags);
- cm_id = nesqp->cm_id;
- /* make sure we havent already closed this connection */
- if (!cm_id) {
- nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n",
- nesqp->hwqp.qp_id);
- spin_unlock_irqrestore(&nesqp->lock, flags);
- return -1;
- }
-
- nesvnic = to_nesvnic(nesqp->ibqp.device);
- nes_debug(NES_DBG_CM, "Disconnecting QP%u\n", nesqp->hwqp.qp_id);
-
- original_hw_tcp_state = nesqp->hw_tcp_state;
- original_ibqp_state = nesqp->ibqp_state;
- last_ae = nesqp->last_aeq;
-
- if (nesqp->term_flags) {
- issue_disconn = 1;
- issue_close = 1;
- nesqp->cm_id = NULL;
- del_timer(&nesqp->terminate_timer);
- if (nesqp->flush_issued == 0) {
- nesqp->flush_issued = 1;
- issue_flush = 1;
- }
- } else if ((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
- ((original_ibqp_state == IB_QPS_RTS) &&
- (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
- issue_disconn = 1;
- if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET)
- disconn_status = -ECONNRESET;
- }
-
- if (((original_hw_tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
- (original_hw_tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT) ||
- (last_ae == NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) ||
- (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
- issue_close = 1;
- nesqp->cm_id = NULL;
- if (nesqp->flush_issued == 0) {
- nesqp->flush_issued = 1;
- issue_flush = 1;
- }
- }
-
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
- if ((issue_flush) && (nesqp->destroyed == 0)) {
- /* Flush the queue(s) */
- if (nesqp->hw_iwarp_state >= NES_AEQE_IWARP_STATE_TERMINATE)
- flush_q |= NES_CQP_FLUSH_SQ;
- flush_wqes(nesvnic->nesdev, nesqp, flush_q, 1);
-
- if (nesqp->term_flags) {
- ibevent.device = nesqp->ibqp.device;
- ibevent.event = nesqp->terminate_eventtype;
- ibevent.element.qp = &nesqp->ibqp;
- if (nesqp->ibqp.event_handler)
- nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
- }
- }
-
- if ((cm_id) && (cm_id->event_handler)) {
- if (issue_disconn) {
- atomic_inc(&cm_disconnects);
- cm_event.event = IW_CM_EVENT_DISCONNECT;
- cm_event.status = disconn_status;
- cm_event.local_addr = cm_id->m_local_addr;
- cm_event.remote_addr = cm_id->m_remote_addr;
- cm_event.private_data = NULL;
- cm_event.private_data_len = 0;
-
- nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event"
- " for QP%u, SQ Head = %u, SQ Tail = %u. "
- "cm_id = %p, refcount = %u.\n",
- nesqp->hwqp.qp_id, nesqp->hwqp.sq_head,
- nesqp->hwqp.sq_tail, cm_id,
- atomic_read(&nesqp->refcount));
-
- ret = cm_id->event_handler(cm_id, &cm_event);
- if (ret)
- nes_debug(NES_DBG_CM, "OFA CM event_handler "
- "returned, ret=%d\n", ret);
- }
-
- if (issue_close) {
- atomic_inc(&cm_closes);
- nes_disconnect(nesqp, 1);
-
- cm_id->provider_data = nesqp;
- /* Send up the close complete event */
- cm_event.event = IW_CM_EVENT_CLOSE;
- cm_event.status = 0;
- cm_event.provider_data = cm_id->provider_data;
- cm_event.local_addr = cm_id->m_local_addr;
- cm_event.remote_addr = cm_id->m_remote_addr;
- cm_event.private_data = NULL;
- cm_event.private_data_len = 0;
-
- ret = cm_id->event_handler(cm_id, &cm_event);
- if (ret)
- nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
-
- cm_id->rem_ref(cm_id);
- }
- }
-
- return 0;
-}
-
-
-/**
- * nes_disconnect
- */
-static int nes_disconnect(struct nes_qp *nesqp, int abrupt)
-{
- int ret = 0;
- struct nes_vnic *nesvnic;
- struct nes_device *nesdev;
- struct nes_ib_device *nesibdev;
-
- nesvnic = to_nesvnic(nesqp->ibqp.device);
- if (!nesvnic)
- return -EINVAL;
-
- nesdev = nesvnic->nesdev;
- nesibdev = nesvnic->nesibdev;
-
- nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
- netdev_refcnt_read(nesvnic->netdev));
-
- if (nesqp->active_conn) {
-
- /* indicate this connection is NOT active */
- nesqp->active_conn = 0;
- } else {
- /* Need to free the Last Streaming Mode Message */
- if (nesqp->ietf_frame) {
- if (nesqp->lsmm_mr)
- nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr);
- pci_free_consistent(nesdev->pcidev,
- nesqp->private_data_len + nesqp->ietf_frame_size,
- nesqp->ietf_frame, nesqp->ietf_frame_pbase);
- }
- }
-
- /* close the CM node down if it is still active */
- if (nesqp->cm_node) {
- nes_debug(NES_DBG_CM, "Call close API\n");
-
- g_cm_core->api->close(g_cm_core, nesqp->cm_node);
- }
-
- return ret;
-}
-
-
-/**
- * nes_accept
- */
-int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
- u64 u64temp;
- struct ib_qp *ibqp;
- struct nes_qp *nesqp;
- struct nes_vnic *nesvnic;
- struct nes_device *nesdev;
- struct nes_cm_node *cm_node;
- struct nes_adapter *adapter;
- struct ib_qp_attr attr;
- struct iw_cm_event cm_event;
- struct nes_hw_qp_wqe *wqe;
- struct nes_v4_quad nes_quad;
- u32 crc_value;
- int ret;
- int passive_state;
- struct nes_ib_device *nesibdev;
- struct ib_mr *ibmr = NULL;
- struct nes_pd *nespd;
- u64 tagged_offset;
- u8 mpa_frame_offset = 0;
- struct ietf_mpa_v2 *mpa_v2_frame;
- u8 start_addr = 0;
- u8 *start_ptr = &start_addr;
- u8 **start_buff = &start_ptr;
- u16 buff_len = 0;
- struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
- struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
-
- ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
- if (!ibqp)
- return -EINVAL;
-
- /* get all our handles */
- nesqp = to_nesqp(ibqp);
- nesvnic = to_nesvnic(nesqp->ibqp.device);
- nesdev = nesvnic->nesdev;
- adapter = nesdev->nesadapter;
-
- cm_node = (struct nes_cm_node *)cm_id->provider_data;
- nes_debug(NES_DBG_CM, "nes_accept: cm_node= %p nesvnic=%p, netdev=%p,"
- "%s\n", cm_node, nesvnic, nesvnic->netdev,
- nesvnic->netdev->name);
-
- if (NES_CM_STATE_LISTENER_DESTROYED == cm_node->state) {
- if (cm_node->loopbackpartner)
- rem_ref_cm_node(cm_node->cm_core, cm_node->loopbackpartner);
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- return -EINVAL;
- }
-
- passive_state = atomic_add_return(1, &cm_node->passive_state);
- if (passive_state == NES_SEND_RESET_EVENT) {
- rem_ref_cm_node(cm_node->cm_core, cm_node);
- return -ECONNRESET;
- }
- /* associate the node with the QP */
- nesqp->cm_node = (void *)cm_node;
- cm_node->nesqp = nesqp;
-
-
- nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n",
- nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener);
- atomic_inc(&cm_accepts);
-
- nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
- netdev_refcnt_read(nesvnic->netdev));
-
- nesqp->ietf_frame_size = sizeof(struct ietf_mpa_v2);
- /* allocate the ietf frame and space for private data */
- nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev,
- nesqp->ietf_frame_size + conn_param->private_data_len,
- &nesqp->ietf_frame_pbase);
-
- if (!nesqp->ietf_frame) {
- nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n");
- return -ENOMEM;
- }
- mpa_v2_frame = (struct ietf_mpa_v2 *)nesqp->ietf_frame;
-
- if (cm_node->mpa_frame_rev == IETF_MPA_V1)
- mpa_frame_offset = 4;
-
- if (cm_node->mpa_frame_rev == IETF_MPA_V1 ||
- cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
- record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
- }
-
- memcpy(mpa_v2_frame->priv_data, conn_param->private_data,
- conn_param->private_data_len);
-
- cm_build_mpa_frame(cm_node, start_buff, &buff_len, nesqp->ietf_frame, MPA_KEY_REPLY);
- nesqp->private_data_len = conn_param->private_data_len;
-
- /* setup our first outgoing iWarp send WQE (the IETF frame response) */
- wqe = &nesqp->hwqp.sq_vbase[0];
-
- if (raddr->sin_addr.s_addr != laddr->sin_addr.s_addr) {
- u64temp = (unsigned long)nesqp;
- nesibdev = nesvnic->nesibdev;
- nespd = nesqp->nespd;
- tagged_offset = (u64)(unsigned long)*start_buff;
- ibmr = nes_reg_phys_mr(&nespd->ibpd,
- nesqp->ietf_frame_pbase + mpa_frame_offset,
- buff_len, IB_ACCESS_LOCAL_WRITE,
- &tagged_offset);
- if (IS_ERR(ibmr)) {
- nes_debug(NES_DBG_CM, "Unable to register memory region"
- "for lSMM for cm_node = %p \n",
- cm_node);
- pci_free_consistent(nesdev->pcidev,
- nesqp->private_data_len + nesqp->ietf_frame_size,
- nesqp->ietf_frame, nesqp->ietf_frame_pbase);
- return PTR_ERR(ibmr);
- }
-
- ibmr->pd = &nespd->ibpd;
- ibmr->device = nespd->ibpd.device;
- nesqp->lsmm_mr = ibmr;
-
- u64temp |= NES_SW_CONTEXT_ALIGN >> 1;
- set_wqe_64bit_value(wqe->wqe_words,
- NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
- u64temp);
- wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
- cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING |
- NES_IWARP_SQ_WQE_WRPDU);
- wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] =
- cpu_to_le32(buff_len);
- set_wqe_64bit_value(wqe->wqe_words,
- NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
- (u64)(unsigned long)(*start_buff));
- wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] =
- cpu_to_le32(buff_len);
- wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey;
- if (nesqp->sq_kmapped) {
- nesqp->sq_kmapped = 0;
- kunmap(nesqp->page);
- }
-
- nesqp->nesqp_context->ird_ord_sizes |=
- cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
- NES_QPCONTEXT_ORDIRD_WRPDU);
- } else {
- nesqp->nesqp_context->ird_ord_sizes |=
- cpu_to_le32(NES_QPCONTEXT_ORDIRD_WRPDU);
- }
- nesqp->skip_lsmm = 1;
-
- /* Cache the cm_id in the qp */
- nesqp->cm_id = cm_id;
- cm_node->cm_id = cm_id;
-
- /* nesqp->cm_node = (void *)cm_id->provider_data; */
- cm_id->provider_data = nesqp;
- nesqp->active_conn = 0;
-
- if (cm_node->state == NES_CM_STATE_TSA)
- nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n",
- cm_node);
-
- nes_cm_init_tsa_conn(nesqp, cm_node);
-
- nesqp->nesqp_context->tcpPorts[0] =
- cpu_to_le16(cm_node->loc_port);
- nesqp->nesqp_context->tcpPorts[1] =
- cpu_to_le16(cm_node->rem_port);
-
- nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->rem_addr);
-
- nesqp->nesqp_context->misc2 |= cpu_to_le32(
- (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
- NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
-
- nesqp->nesqp_context->arp_index_vlan |=
- cpu_to_le32(nes_arp_table(nesdev,
- le32_to_cpu(nesqp->nesqp_context->ip0), NULL,
- NES_ARP_RESOLVE) << 16);
-
- nesqp->nesqp_context->ts_val_delta = cpu_to_le32(
- jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
-
- nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id);
-
- nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(
- ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT));
- nesqp->nesqp_context->ird_ord_sizes |=
- cpu_to_le32((u32)cm_node->ord_size);
-
- memset(&nes_quad, 0, sizeof(nes_quad));
- nes_quad.DstIpAdrIndex =
- cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = htonl(cm_node->rem_addr);
- nes_quad.TcpPorts[0] = htons(cm_node->rem_port);
- nes_quad.TcpPorts[1] = htons(cm_node->loc_port);
-
- /* Produce hash key */
- crc_value = get_crc_value(&nes_quad);
- nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
- nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n",
- nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask);
-
- nesqp->hte_index &= adapter->hte_index_mask;
- nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
-
- cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
-
- nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = "
- "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + "
- "private data length=%u.\n", nesqp->hwqp.qp_id,
- ntohl(raddr->sin_addr.s_addr), ntohs(raddr->sin_port),
- ntohl(laddr->sin_addr.s_addr), ntohs(laddr->sin_port),
- le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
- le32_to_cpu(nesqp->nesqp_context->snd_nxt),
- buff_len);
-
- /* notify OF layer that accept event was successful */
- cm_id->add_ref(cm_id);
- nes_add_ref(&nesqp->ibqp);
-
- cm_event.event = IW_CM_EVENT_ESTABLISHED;
- cm_event.status = 0;
- cm_event.provider_data = (void *)nesqp;
- cm_event.local_addr = cm_id->m_local_addr;
- cm_event.remote_addr = cm_id->m_remote_addr;
- cm_event.private_data = NULL;
- cm_event.private_data_len = 0;
- cm_event.ird = cm_node->ird_size;
- cm_event.ord = cm_node->ord_size;
-
- ret = cm_id->event_handler(cm_id, &cm_event);
- attr.qp_state = IB_QPS_RTS;
- nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
- if (cm_node->loopbackpartner) {
- cm_node->loopbackpartner->mpa_frame_size =
- nesqp->private_data_len;
- /* copy entire MPA frame to our cm_node's frame */
- memcpy(cm_node->loopbackpartner->mpa_frame_buf,
- conn_param->private_data, conn_param->private_data_len);
- create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED);
- }
- if (ret)
- printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
- "ret=%d\n", __func__, __LINE__, ret);
-
- return 0;
-}
-
-
-/**
- * nes_reject
- */
-int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
-{
- struct nes_cm_node *cm_node;
- struct nes_cm_node *loopback;
- struct nes_cm_core *cm_core;
- u8 *start_buff;
-
- atomic_inc(&cm_rejects);
- cm_node = (struct nes_cm_node *)cm_id->provider_data;
- loopback = cm_node->loopbackpartner;
- cm_core = cm_node->cm_core;
- cm_node->cm_id = cm_id;
-
- if (pdata_len + sizeof(struct ietf_mpa_v2) > MAX_CM_BUFFER)
- return -EINVAL;
-
- if (loopback) {
- memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len);
- loopback->mpa_frame.priv_data_len = pdata_len;
- loopback->mpa_frame_size = pdata_len;
- } else {
- start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2);
- cm_node->mpa_frame_size = pdata_len;
- memcpy(start_buff, pdata, pdata_len);
- }
- return cm_core->api->reject(cm_core, cm_node);
-}
-
-
-/**
- * nes_connect
- * setup and launch cm connect node
- */
-int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
-{
- struct ib_qp *ibqp;
- struct nes_qp *nesqp;
- struct nes_vnic *nesvnic;
- struct nes_device *nesdev;
- struct nes_cm_node *cm_node;
- struct nes_cm_info cm_info;
- int apbvt_set = 0;
- struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
- struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
-
- if (cm_id->remote_addr.ss_family != AF_INET)
- return -ENOSYS;
- ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
- if (!ibqp)
- return -EINVAL;
- nesqp = to_nesqp(ibqp);
- if (!nesqp)
- return -EINVAL;
- nesvnic = to_nesvnic(nesqp->ibqp.device);
- if (!nesvnic)
- return -EINVAL;
- nesdev = nesvnic->nesdev;
- if (!nesdev)
- return -EINVAL;
-
- if (!laddr->sin_port || !raddr->sin_port)
- return -EINVAL;
-
- nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
- "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
- ntohl(nesvnic->local_ipaddr), ntohl(raddr->sin_addr.s_addr),
- ntohs(raddr->sin_port), ntohl(laddr->sin_addr.s_addr),
- ntohs(laddr->sin_port));
-
- atomic_inc(&cm_connects);
- nesqp->active_conn = 1;
-
- /* cache the cm_id in the qp */
- nesqp->cm_id = cm_id;
- cm_id->provider_data = nesqp;
- nesqp->private_data_len = conn_param->private_data_len;
-
- nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord);
- nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
- conn_param->private_data_len);
-
- /* set up the connection params for the node */
- cm_info.loc_addr = ntohl(laddr->sin_addr.s_addr);
- cm_info.loc_port = ntohs(laddr->sin_port);
- cm_info.rem_addr = ntohl(raddr->sin_addr.s_addr);
- cm_info.rem_port = ntohs(raddr->sin_port);
- cm_info.cm_id = cm_id;
- cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
-
- if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) {
- nes_manage_apbvt(nesvnic, cm_info.loc_port,
- PCI_FUNC(nesdev->pcidev->devfn),
- NES_MANAGE_APBVT_ADD);
- apbvt_set = 1;
- }
-
- cm_id->add_ref(cm_id);
-
- /* create a connect CM node connection */
- cm_node = g_cm_core->api->connect(g_cm_core, nesvnic,
- conn_param->private_data_len, (void *)conn_param->private_data,
- &cm_info);
- if (!cm_node) {
- if (apbvt_set)
- nes_manage_apbvt(nesvnic, cm_info.loc_port,
- PCI_FUNC(nesdev->pcidev->devfn),
- NES_MANAGE_APBVT_DEL);
-
- nes_debug(NES_DBG_NLMSG, "Delete loc_port = %04X\n",
- cm_info.loc_port);
- cm_id->rem_ref(cm_id);
- return -ENOMEM;
- }
-
- record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
- if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
- cm_node->ord_size == 0)
- cm_node->ord_size = 1;
-
- cm_node->apbvt_set = apbvt_set;
- cm_node->tos = cm_id->tos;
- nesqp->cm_node = cm_node;
- cm_node->nesqp = nesqp;
- nes_add_ref(&nesqp->ibqp);
-
- return 0;
-}
-
-
-/**
- * nes_create_listen
- */
-int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
-{
- struct nes_vnic *nesvnic;
- struct nes_cm_listener *cm_node;
- struct nes_cm_info cm_info;
- int err;
- struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
-
- nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n",
- cm_id, ntohs(laddr->sin_port));
-
- if (cm_id->m_local_addr.ss_family != AF_INET)
- return -ENOSYS;
- nesvnic = to_nesvnic(cm_id->device);
- if (!nesvnic)
- return -EINVAL;
-
- nes_debug(NES_DBG_CM, "nesvnic=%p, netdev=%p, %s\n",
- nesvnic, nesvnic->netdev, nesvnic->netdev->name);
-
- nes_debug(NES_DBG_CM, "nesvnic->local_ipaddr=0x%08x, sin_addr.s_addr=0x%08x\n",
- nesvnic->local_ipaddr, laddr->sin_addr.s_addr);
-
- /* setup listen params in our api call struct */
- cm_info.loc_addr = ntohl(nesvnic->local_ipaddr);
- cm_info.loc_port = ntohs(laddr->sin_port);
- cm_info.backlog = backlog;
- cm_info.cm_id = cm_id;
-
- cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
-
- cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
- if (!cm_node) {
- printk(KERN_ERR "%s[%u] Error returned from listen API call\n",
- __func__, __LINE__);
- return -ENOMEM;
- }
-
- cm_id->provider_data = cm_node;
- cm_node->tos = cm_id->tos;
-
- if (!cm_node->reused_node) {
- err = nes_manage_apbvt(nesvnic, cm_node->loc_port,
- PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
- NES_MANAGE_APBVT_ADD);
- if (err) {
- printk(KERN_ERR "nes_manage_apbvt call returned %d.\n",
- err);
- g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node);
- return err;
- }
- atomic_inc(&cm_listens_created);
- }
-
- cm_id->add_ref(cm_id);
- cm_id->provider_data = (void *)cm_node;
-
-
- return 0;
-}
-
-
-/**
- * nes_destroy_listen
- */
-int nes_destroy_listen(struct iw_cm_id *cm_id)
-{
- if (cm_id->provider_data)
- g_cm_core->api->stop_listener(g_cm_core, cm_id->provider_data);
- else
- nes_debug(NES_DBG_CM, "cm_id->provider_data was NULL\n");
-
- cm_id->rem_ref(cm_id);
-
- return 0;
-}
-
-
-/**
- * nes_cm_recv
- */
-int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice)
-{
- int rc = 0;
-
- cm_packets_received++;
- if ((g_cm_core) && (g_cm_core->api))
- rc = g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb);
- else
- nes_debug(NES_DBG_CM, "Unable to process packet for CM,"
- " cm is not setup properly.\n");
-
- return rc;
-}
-
-
-/**
- * nes_cm_start
- * Start and init a cm core module
- */
-int nes_cm_start(void)
-{
- nes_debug(NES_DBG_CM, "\n");
- /* create the primary CM core, pass this handle to subsequent core inits */
- g_cm_core = nes_cm_alloc_core();
- if (g_cm_core)
- return 0;
- else
- return -ENOMEM;
-}
-
-
-/**
- * nes_cm_stop
- * stop and dealloc all cm core instances
- */
-int nes_cm_stop(void)
-{
- g_cm_core->api->destroy_cm_core(g_cm_core);
- return 0;
-}
-
-
-/**
- * cm_event_connected
- * handle a connected event, setup QPs and HW
- */
-static void cm_event_connected(struct nes_cm_event *event)
-{
- struct nes_qp *nesqp;
- struct nes_vnic *nesvnic;
- struct nes_device *nesdev;
- struct nes_cm_node *cm_node;
- struct nes_adapter *nesadapter;
- struct ib_qp_attr attr;
- struct iw_cm_id *cm_id;
- struct iw_cm_event cm_event;
- struct nes_v4_quad nes_quad;
- u32 crc_value;
- int ret;
- struct sockaddr_in *laddr;
- struct sockaddr_in *raddr;
- struct sockaddr_in *cm_event_laddr;
-
- /* get all our handles */
- cm_node = event->cm_node;
- cm_id = cm_node->cm_id;
- nes_debug(NES_DBG_CM, "cm_event_connected - %p - cm_id = %p\n", cm_node, cm_id);
- nesqp = (struct nes_qp *)cm_id->provider_data;
- nesvnic = to_nesvnic(nesqp->ibqp.device);
- nesdev = nesvnic->nesdev;
- nesadapter = nesdev->nesadapter;
- laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
- raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
- cm_event_laddr = (struct sockaddr_in *)&cm_event.local_addr;
-
- if (nesqp->destroyed)
- return;
- atomic_inc(&cm_connecteds);
- nes_debug(NES_DBG_CM, "QP%u attempting to connect to 0x%08X:0x%04X on"
- " local port 0x%04X. jiffies = %lu.\n",
- nesqp->hwqp.qp_id, ntohl(raddr->sin_addr.s_addr),
- ntohs(raddr->sin_port), ntohs(laddr->sin_port), jiffies);
-
- nes_cm_init_tsa_conn(nesqp, cm_node);
-
- /* set the QP tsa context */
- nesqp->nesqp_context->tcpPorts[0] =
- cpu_to_le16(cm_node->loc_port);
- nesqp->nesqp_context->tcpPorts[1] =
- cpu_to_le16(cm_node->rem_port);
- nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->rem_addr);
-
- nesqp->nesqp_context->misc2 |= cpu_to_le32(
- (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
- NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
- nesqp->nesqp_context->arp_index_vlan |= cpu_to_le32(
- nes_arp_table(nesdev,
- le32_to_cpu(nesqp->nesqp_context->ip0),
- NULL, NES_ARP_RESOLVE) << 16);
- nesqp->nesqp_context->ts_val_delta = cpu_to_le32(
- jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
- nesqp->nesqp_context->ird_index = cpu_to_le32(nesqp->hwqp.qp_id);
- nesqp->nesqp_context->ird_ord_sizes |=
- cpu_to_le32((u32)1 <<
- NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT);
- nesqp->nesqp_context->ird_ord_sizes |=
- cpu_to_le32((u32)cm_node->ord_size);
-
- /* Adjust tail for not having a LSMM */
- /*nesqp->hwqp.sq_tail = 1;*/
-
- build_rdma0_msg(cm_node, &nesqp);
-
- nes_write32(nesdev->regs + NES_WQE_ALLOC,
- (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
-
- memset(&nes_quad, 0, sizeof(nes_quad));
-
- nes_quad.DstIpAdrIndex =
- cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = htonl(cm_node->rem_addr);
- nes_quad.TcpPorts[0] = htons(cm_node->rem_port);
- nes_quad.TcpPorts[1] = htons(cm_node->loc_port);
-
- /* Produce hash key */
- crc_value = get_crc_value(&nes_quad);
- nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
- nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n",
- nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
-
- nesqp->hte_index &= nesadapter->hte_index_mask;
- nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
-
- nesqp->ietf_frame = &cm_node->mpa_frame;
- nesqp->private_data_len = (u8)cm_node->mpa_frame_size;
- cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
-
- /* notify OF layer we successfully created the requested connection */
- cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
- cm_event.status = 0;
- cm_event.provider_data = cm_id->provider_data;
- cm_event_laddr->sin_family = AF_INET;
- cm_event_laddr->sin_port = laddr->sin_port;
- cm_event.remote_addr = cm_id->m_remote_addr;
-
- cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
- cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size;
- cm_event.ird = cm_node->ird_size;
- cm_event.ord = cm_node->ord_size;
-
- cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr);
- ret = cm_id->event_handler(cm_id, &cm_event);
- nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
-
- if (ret)
- printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
- "ret=%d\n", __func__, __LINE__, ret);
- attr.qp_state = IB_QPS_RTS;
- nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
-
- nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = "
- "%lu\n", nesqp->hwqp.qp_id, jiffies);
-
- return;
-}
-
-
-/**
- * cm_event_connect_error
- */
-static void cm_event_connect_error(struct nes_cm_event *event)
-{
- struct nes_qp *nesqp;
- struct iw_cm_id *cm_id;
- struct iw_cm_event cm_event;
- /* struct nes_cm_info cm_info; */
- int ret;
-
- if (!event->cm_node)
- return;
-
- cm_id = event->cm_node->cm_id;
- if (!cm_id)
- return;
-
- nes_debug(NES_DBG_CM, "cm_node=%p, cm_id=%p\n", event->cm_node, cm_id);
- nesqp = cm_id->provider_data;
-
- if (!nesqp)
- return;
-
- /* notify OF layer about this connection error event */
- /* cm_id->rem_ref(cm_id); */
- nesqp->cm_id = NULL;
- cm_id->provider_data = NULL;
- cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
- cm_event.status = -ECONNRESET;
- cm_event.provider_data = cm_id->provider_data;
- cm_event.local_addr = cm_id->m_local_addr;
- cm_event.remote_addr = cm_id->m_remote_addr;
- cm_event.private_data = NULL;
- cm_event.private_data_len = 0;
-
-#ifdef CONFIG_INFINIBAND_NES_DEBUG
- {
- struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *)
- &cm_event.local_addr;
- struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *)
- &cm_event.remote_addr;
- nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, remote_addr=%08x\n",
- cm_event_laddr->sin_addr.s_addr, cm_event_raddr->sin_addr.s_addr);
- }
-#endif
-
- ret = cm_id->event_handler(cm_id, &cm_event);
- nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
- if (ret)
- printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
- "ret=%d\n", __func__, __LINE__, ret);
- cm_id->rem_ref(cm_id);
-
- rem_ref_cm_node(event->cm_node->cm_core, event->cm_node);
- return;
-}
-
-
-/**
- * cm_event_reset
- */
-static void cm_event_reset(struct nes_cm_event *event)
-{
- struct nes_qp *nesqp;
- struct iw_cm_id *cm_id;
- struct iw_cm_event cm_event;
- /* struct nes_cm_info cm_info; */
- int ret;
-
- if (!event->cm_node)
- return;
-
- if (!event->cm_node->cm_id)
- return;
-
- cm_id = event->cm_node->cm_id;
-
- nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id);
- nesqp = cm_id->provider_data;
- if (!nesqp)
- return;
-
- nesqp->cm_id = NULL;
- /* cm_id->provider_data = NULL; */
- cm_event.event = IW_CM_EVENT_DISCONNECT;
- cm_event.status = -ECONNRESET;
- cm_event.provider_data = cm_id->provider_data;
- cm_event.local_addr = cm_id->m_local_addr;
- cm_event.remote_addr = cm_id->m_remote_addr;
- cm_event.private_data = NULL;
- cm_event.private_data_len = 0;
-
- cm_id->add_ref(cm_id);
- ret = cm_id->event_handler(cm_id, &cm_event);
- atomic_inc(&cm_closes);
- cm_event.event = IW_CM_EVENT_CLOSE;
- cm_event.status = 0;
- cm_event.provider_data = cm_id->provider_data;
- cm_event.local_addr = cm_id->m_local_addr;
- cm_event.remote_addr = cm_id->m_remote_addr;
- cm_event.private_data = NULL;
- cm_event.private_data_len = 0;
- nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node);
- ret = cm_id->event_handler(cm_id, &cm_event);
-
- nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
-
-
- /* notify OF layer about this connection error event */
- cm_id->rem_ref(cm_id);
-
- return;
-}
-
-
-/**
- * cm_event_mpa_req
- */
-static void cm_event_mpa_req(struct nes_cm_event *event)
-{
- struct iw_cm_id *cm_id;
- struct iw_cm_event cm_event;
- int ret;
- struct nes_cm_node *cm_node;
- struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *)
- &cm_event.local_addr;
- struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *)
- &cm_event.remote_addr;
-
- cm_node = event->cm_node;
- if (!cm_node)
- return;
- cm_id = cm_node->cm_id;
-
- atomic_inc(&cm_connect_reqs);
- nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n",
- cm_node, cm_id, jiffies);
-
- cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
- cm_event.status = 0;
- cm_event.provider_data = (void *)cm_node;
-
- cm_event_laddr->sin_family = AF_INET;
- cm_event_laddr->sin_port = htons(event->cm_info.loc_port);
- cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr);
-
- cm_event_raddr->sin_family = AF_INET;
- cm_event_raddr->sin_port = htons(event->cm_info.rem_port);
- cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
- cm_event.private_data = cm_node->mpa_frame_buf;
- cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
- if (cm_node->mpa_frame_rev == IETF_MPA_V1) {
- cm_event.ird = NES_MAX_IRD;
- cm_event.ord = NES_MAX_ORD;
- } else {
- cm_event.ird = cm_node->ird_size;
- cm_event.ord = cm_node->ord_size;
- }
-
- ret = cm_id->event_handler(cm_id, &cm_event);
- if (ret)
- printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n",
- __func__, __LINE__, ret);
- return;
-}
-
-
-static void cm_event_mpa_reject(struct nes_cm_event *event)
-{
- struct iw_cm_id *cm_id;
- struct iw_cm_event cm_event;
- struct nes_cm_node *cm_node;
- int ret;
- struct sockaddr_in *cm_event_laddr = (struct sockaddr_in *)
- &cm_event.local_addr;
- struct sockaddr_in *cm_event_raddr = (struct sockaddr_in *)
- &cm_event.remote_addr;
-
- cm_node = event->cm_node;
- if (!cm_node)
- return;
- cm_id = cm_node->cm_id;
-
- atomic_inc(&cm_connect_reqs);
- nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n",
- cm_node, cm_id, jiffies);
-
- cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
- cm_event.status = -ECONNREFUSED;
- cm_event.provider_data = cm_id->provider_data;
-
- cm_event_laddr->sin_family = AF_INET;
- cm_event_laddr->sin_port = htons(event->cm_info.loc_port);
- cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr);
-
- cm_event_raddr->sin_family = AF_INET;
- cm_event_raddr->sin_port = htons(event->cm_info.rem_port);
- cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
-
- cm_event.private_data = cm_node->mpa_frame_buf;
- cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
-
- nes_debug(NES_DBG_CM, "call CM_EVENT_MPA_REJECTED, local_addr=%08x, "
- "remove_addr=%08x\n",
- cm_event_laddr->sin_addr.s_addr,
- cm_event_raddr->sin_addr.s_addr);
-
- ret = cm_id->event_handler(cm_id, &cm_event);
- if (ret)
- printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n",
- __func__, __LINE__, ret);
-
- return;
-}
-
-
-static void nes_cm_event_handler(struct work_struct *);
-
-/**
- * nes_cm_post_event
- * post an event to the cm event handler
- */
-static int nes_cm_post_event(struct nes_cm_event *event)
-{
- atomic_inc(&event->cm_node->cm_core->events_posted);
- add_ref_cm_node(event->cm_node);
- event->cm_info.cm_id->add_ref(event->cm_info.cm_id);
- INIT_WORK(&event->event_work, nes_cm_event_handler);
- nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n",
- event->cm_node, event);
-
- queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
-
- nes_debug(NES_DBG_CM, "Exit\n");
- return 0;
-}
-
-
-/**
- * nes_cm_event_handler
- * worker function to handle cm events
- * will free instance of nes_cm_event
- */
-static void nes_cm_event_handler(struct work_struct *work)
-{
- struct nes_cm_event *event = container_of(work, struct nes_cm_event,
- event_work);
- struct nes_cm_core *cm_core;
-
- if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core))
- return;
-
- cm_core = event->cm_node->cm_core;
- nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n",
- event, event->type, atomic_read(&cm_core->events_posted));
-
- switch (event->type) {
- case NES_CM_EVENT_MPA_REQ:
- cm_event_mpa_req(event);
- nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n",
- event->cm_node);
- break;
- case NES_CM_EVENT_RESET:
- nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n",
- event->cm_node);
- cm_event_reset(event);
- break;
- case NES_CM_EVENT_CONNECTED:
- if ((!event->cm_node->cm_id) ||
- (event->cm_node->state != NES_CM_STATE_TSA))
- break;
- cm_event_connected(event);
- nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n");
- break;
- case NES_CM_EVENT_MPA_REJECT:
- if ((!event->cm_node->cm_id) ||
- (event->cm_node->state == NES_CM_STATE_TSA))
- break;
- cm_event_mpa_reject(event);
- nes_debug(NES_DBG_CM, "CM Event: REJECT\n");
- break;
-
- case NES_CM_EVENT_ABORTED:
- if ((!event->cm_node->cm_id) ||
- (event->cm_node->state == NES_CM_STATE_TSA))
- break;
- cm_event_connect_error(event);
- nes_debug(NES_DBG_CM, "CM Event: ABORTED\n");
- break;
- case NES_CM_EVENT_DROPPED_PKT:
- nes_debug(NES_DBG_CM, "CM Event: DROPPED PKT\n");
- break;
- default:
- nes_debug(NES_DBG_CM, "CM Event: UNKNOWN EVENT TYPE\n");
- break;
- }
-
- atomic_dec(&cm_core->events_posted);
- event->cm_info.cm_id->rem_ref(event->cm_info.cm_id);
- rem_ref_cm_node(cm_core, event->cm_node);
- kfree(event);
-
- return;
-}
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
deleted file mode 100644
index d827d03e3941..000000000000
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ /dev/null
@@ -1,471 +0,0 @@
-/*
- * Copyright (c) 2006 - 2014 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef NES_CM_H
-#define NES_CM_H
-
-#define QUEUE_EVENTS
-
-#define NES_MANAGE_APBVT_DEL 0
-#define NES_MANAGE_APBVT_ADD 1
-
-#define NES_MPA_REQUEST_ACCEPT 1
-#define NES_MPA_REQUEST_REJECT 2
-
-/* IETF MPA -- defines, enums, structs */
-#define IEFT_MPA_KEY_REQ "MPA ID Req Frame"
-#define IEFT_MPA_KEY_REP "MPA ID Rep Frame"
-#define IETF_MPA_KEY_SIZE 16
-#define IETF_MPA_VERSION 1
-#define IETF_MAX_PRIV_DATA_LEN 512
-#define IETF_MPA_FRAME_SIZE 20
-#define IETF_RTR_MSG_SIZE 4
-#define IETF_MPA_V2_FLAG 0x10
-
-/* IETF RTR MSG Fields */
-#define IETF_PEER_TO_PEER 0x8000
-#define IETF_FLPDU_ZERO_LEN 0x4000
-#define IETF_RDMA0_WRITE 0x8000
-#define IETF_RDMA0_READ 0x4000
-#define IETF_NO_IRD_ORD 0x3FFF
-#define NES_MAX_IRD 0x40
-#define NES_MAX_ORD 0x7F
-
-enum ietf_mpa_flags {
- IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */
- IETF_MPA_FLAGS_CRC = 0x40, /* receive Markers */
- IETF_MPA_FLAGS_REJECT = 0x20, /* Reject */
-};
-
-struct ietf_mpa_v1 {
- u8 key[IETF_MPA_KEY_SIZE];
- u8 flags;
- u8 rev;
- __be16 priv_data_len;
- u8 priv_data[0];
-};
-
-#define ietf_mpa_req_resp_frame ietf_mpa_frame
-
-struct ietf_rtr_msg {
- __be16 ctrl_ird;
- __be16 ctrl_ord;
-};
-
-struct ietf_mpa_v2 {
- u8 key[IETF_MPA_KEY_SIZE];
- u8 flags;
- u8 rev;
- __be16 priv_data_len;
- struct ietf_rtr_msg rtr_msg;
- u8 priv_data[0];
-};
-
-struct nes_v4_quad {
- u32 rsvd0;
- __le32 DstIpAdrIndex; /* Only most significant 5 bits are valid */
- __be32 SrcIpadr;
- __be16 TcpPorts[2]; /* src is low, dest is high */
-};
-
-struct nes_cm_node;
-enum nes_timer_type {
- NES_TIMER_TYPE_SEND,
- NES_TIMER_TYPE_RECV,
- NES_TIMER_NODE_CLEANUP,
- NES_TIMER_TYPE_CLOSE,
-};
-
-#define NES_PASSIVE_STATE_INDICATED 0
-#define NES_DO_NOT_SEND_RESET_EVENT 1
-#define NES_SEND_RESET_EVENT 2
-
-#define MAX_NES_IFS 4
-
-#define SET_ACK 1
-#define SET_SYN 2
-#define SET_FIN 4
-#define SET_RST 8
-
-#define TCP_OPTIONS_PADDING 3
-
-struct option_base {
- u8 optionnum;
- u8 length;
-};
-
-enum option_numbers {
- OPTION_NUMBER_END,
- OPTION_NUMBER_NONE,
- OPTION_NUMBER_MSS,
- OPTION_NUMBER_WINDOW_SCALE,
- OPTION_NUMBER_SACK_PERM,
- OPTION_NUMBER_SACK,
- OPTION_NUMBER_WRITE0 = 0xbc
-};
-
-struct option_mss {
- u8 optionnum;
- u8 length;
- __be16 mss;
-};
-
-struct option_windowscale {
- u8 optionnum;
- u8 length;
- u8 shiftcount;
-};
-
-union all_known_options {
- char as_end;
- struct option_base as_base;
- struct option_mss as_mss;
- struct option_windowscale as_windowscale;
-};
-
-struct nes_timer_entry {
- struct list_head list;
- unsigned long timetosend; /* jiffies */
- struct sk_buff *skb;
- u32 type;
- u32 retrycount;
- u32 retranscount;
- u32 context;
- u32 seq_num;
- u32 send_retrans;
- int close_when_complete;
- struct net_device *netdev;
-};
-
-#define NES_DEFAULT_RETRYS 64
-#define NES_DEFAULT_RETRANS 8
-#ifdef CONFIG_INFINIBAND_NES_DEBUG
-#define NES_RETRY_TIMEOUT (1000*HZ/1000)
-#else
-#define NES_RETRY_TIMEOUT (3000*HZ/1000)
-#endif
-#define NES_SHORT_TIME (10)
-#define NES_LONG_TIME (2000*HZ/1000)
-#define NES_MAX_TIMEOUT ((unsigned long) (12*HZ))
-
-#define NES_CM_HASHTABLE_SIZE 1024
-#define NES_CM_TCP_TIMER_INTERVAL 3000
-#define NES_CM_DEFAULT_MTU 1540
-#define NES_CM_DEFAULT_FRAME_CNT 10
-#define NES_CM_THREAD_STACK_SIZE 256
-#define NES_CM_DEFAULT_RCV_WND 64240 // before we know that window scaling is allowed
-#define NES_CM_DEFAULT_RCV_WND_SCALED 256960 // after we know that window scaling is allowed
-#define NES_CM_DEFAULT_RCV_WND_SCALE 2
-#define NES_CM_DEFAULT_FREE_PKTS 0x000A
-#define NES_CM_FREE_PKT_LO_WATERMARK 2
-
-#define NES_CM_DEFAULT_MSS 536
-
-#define NES_CM_DEF_SEQ 0x159bf75f
-#define NES_CM_DEF_LOCAL_ID 0x3b47
-
-#define NES_CM_DEF_SEQ2 0x18ed5740
-#define NES_CM_DEF_LOCAL_ID2 0xb807
-#define MAX_CM_BUFFER (IETF_MPA_FRAME_SIZE + IETF_RTR_MSG_SIZE + IETF_MAX_PRIV_DATA_LEN)
-
-typedef u32 nes_addr_t;
-
-#define nes_cm_tsa_context nes_qp_context
-
-struct nes_qp;
-
-/* cm node transition states */
-enum nes_cm_node_state {
- NES_CM_STATE_UNKNOWN,
- NES_CM_STATE_INITED,
- NES_CM_STATE_LISTENING,
- NES_CM_STATE_SYN_RCVD,
- NES_CM_STATE_SYN_SENT,
- NES_CM_STATE_ONE_SIDE_ESTABLISHED,
- NES_CM_STATE_ESTABLISHED,
- NES_CM_STATE_ACCEPTING,
- NES_CM_STATE_MPAREQ_SENT,
- NES_CM_STATE_MPAREQ_RCVD,
- NES_CM_STATE_MPAREJ_RCVD,
- NES_CM_STATE_TSA,
- NES_CM_STATE_FIN_WAIT1,
- NES_CM_STATE_FIN_WAIT2,
- NES_CM_STATE_CLOSE_WAIT,
- NES_CM_STATE_TIME_WAIT,
- NES_CM_STATE_LAST_ACK,
- NES_CM_STATE_CLOSING,
- NES_CM_STATE_LISTENER_DESTROYED,
- NES_CM_STATE_CLOSED
-};
-
-enum mpa_frame_version {
- IETF_MPA_V1 = 1,
- IETF_MPA_V2 = 2
-};
-
-enum mpa_frame_key {
- MPA_KEY_REQUEST,
- MPA_KEY_REPLY
-};
-
-enum send_rdma0 {
- SEND_RDMA_READ_ZERO = 1,
- SEND_RDMA_WRITE_ZERO = 2
-};
-
-enum nes_tcpip_pkt_type {
- NES_PKT_TYPE_UNKNOWN,
- NES_PKT_TYPE_SYN,
- NES_PKT_TYPE_SYNACK,
- NES_PKT_TYPE_ACK,
- NES_PKT_TYPE_FIN,
- NES_PKT_TYPE_RST
-};
-
-
-/* type of nes connection */
-enum nes_cm_conn_type {
- NES_CM_IWARP_CONN_TYPE,
-};
-
-/* CM context params */
-struct nes_cm_tcp_context {
- u8 client;
-
- u32 loc_seq_num;
- u32 loc_ack_num;
- u32 rem_ack_num;
- u32 rcv_nxt;
-
- u32 loc_id;
- u32 rem_id;
-
- u32 snd_wnd;
- u32 max_snd_wnd;
-
- u32 rcv_wnd;
- u32 mss;
- u8 snd_wscale;
- u8 rcv_wscale;
-
- struct nes_cm_tsa_context tsa_cntxt;
- struct timeval sent_ts;
-};
-
-
-enum nes_cm_listener_state {
- NES_CM_LISTENER_PASSIVE_STATE = 1,
- NES_CM_LISTENER_ACTIVE_STATE = 2,
- NES_CM_LISTENER_EITHER_STATE = 3
-};
-
-struct nes_cm_listener {
- struct list_head list;
- struct nes_cm_core *cm_core;
- u8 loc_mac[ETH_ALEN];
- nes_addr_t loc_addr;
- u16 loc_port;
- struct iw_cm_id *cm_id;
- enum nes_cm_conn_type conn_type;
- atomic_t ref_count;
- struct nes_vnic *nesvnic;
- atomic_t pend_accepts_cnt;
- int backlog;
- enum nes_cm_listener_state listener_state;
- u32 reused_node;
- u8 tos;
-};
-
-/* per connection node and node state information */
-struct nes_cm_node {
- nes_addr_t loc_addr, rem_addr;
- u16 loc_port, rem_port;
-
- u8 loc_mac[ETH_ALEN];
- u8 rem_mac[ETH_ALEN];
-
- enum nes_cm_node_state state;
- struct nes_cm_tcp_context tcp_cntxt;
- struct nes_cm_core *cm_core;
- struct sk_buff_head resend_list;
- atomic_t ref_count;
- struct net_device *netdev;
-
- struct nes_cm_node *loopbackpartner;
-
- struct nes_timer_entry *send_entry;
- struct nes_timer_entry *recv_entry;
- spinlock_t retrans_list_lock;
- enum send_rdma0 send_rdma0_op;
-
- union {
- struct ietf_mpa_v1 mpa_frame;
- struct ietf_mpa_v2 mpa_v2_frame;
- u8 mpa_frame_buf[MAX_CM_BUFFER];
- };
- enum mpa_frame_version mpa_frame_rev;
- u16 ird_size;
- u16 ord_size;
- u16 mpav2_ird_ord;
-
- u16 mpa_frame_size;
- struct iw_cm_id *cm_id;
- struct list_head list;
- int accelerated;
- struct nes_cm_listener *listener;
- enum nes_cm_conn_type conn_type;
- struct nes_vnic *nesvnic;
- int apbvt_set;
- int accept_pend;
- struct list_head timer_entry;
- struct list_head reset_entry;
- struct nes_qp *nesqp;
- atomic_t passive_state;
- u8 tos;
-};
-
-/* structure for client or CM to fill when making CM api calls. */
-/* - only need to set relevant data, based on op. */
-struct nes_cm_info {
- union {
- struct iw_cm_id *cm_id;
- struct net_device *netdev;
- };
-
- u16 loc_port;
- u16 rem_port;
- nes_addr_t loc_addr;
- nes_addr_t rem_addr;
- enum nes_cm_conn_type conn_type;
- int backlog;
-};
-
-/* CM event codes */
-enum nes_cm_event_type {
- NES_CM_EVENT_UNKNOWN,
- NES_CM_EVENT_ESTABLISHED,
- NES_CM_EVENT_MPA_REQ,
- NES_CM_EVENT_MPA_CONNECT,
- NES_CM_EVENT_MPA_ACCEPT,
- NES_CM_EVENT_MPA_REJECT,
- NES_CM_EVENT_MPA_ESTABLISHED,
- NES_CM_EVENT_CONNECTED,
- NES_CM_EVENT_CLOSED,
- NES_CM_EVENT_RESET,
- NES_CM_EVENT_DROPPED_PKT,
- NES_CM_EVENT_CLOSE_IMMED,
- NES_CM_EVENT_CLOSE_HARD,
- NES_CM_EVENT_CLOSE_CLEAN,
- NES_CM_EVENT_ABORTED,
- NES_CM_EVENT_SEND_FIRST
-};
-
-/* event to post to CM event handler */
-struct nes_cm_event {
- enum nes_cm_event_type type;
-
- struct nes_cm_info cm_info;
- struct work_struct event_work;
- struct nes_cm_node *cm_node;
-};
-
-struct nes_cm_core {
- enum nes_cm_node_state state;
-
- atomic_t listen_node_cnt;
- struct nes_cm_node listen_list;
- spinlock_t listen_list_lock;
-
- u32 mtu;
- u32 free_tx_pkt_max;
- u32 rx_pkt_posted;
- atomic_t ht_node_cnt;
- struct list_head connected_nodes;
- /* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */
- spinlock_t ht_lock;
-
- struct timer_list tcp_timer;
-
- const struct nes_cm_ops *api;
-
- int (*post_event)(struct nes_cm_event *event);
- atomic_t events_posted;
- struct workqueue_struct *event_wq;
- struct workqueue_struct *disconn_wq;
-
- atomic_t node_cnt;
- u64 aborted_connects;
- u32 options;
-
- struct nes_cm_node *current_listen_node;
-};
-
-
-#define NES_CM_SET_PKT_SIZE (1 << 1)
-#define NES_CM_SET_FREE_PKT_Q_SIZE (1 << 2)
-
-/* CM ops/API for client interface */
-struct nes_cm_ops {
- int (*accelerated)(struct nes_cm_core *, struct nes_cm_node *);
- struct nes_cm_listener * (*listen)(struct nes_cm_core *, struct nes_vnic *,
- struct nes_cm_info *);
- int (*stop_listener)(struct nes_cm_core *, struct nes_cm_listener *);
- struct nes_cm_node * (*connect)(struct nes_cm_core *,
- struct nes_vnic *, u16, void *,
- struct nes_cm_info *);
- int (*close)(struct nes_cm_core *, struct nes_cm_node *);
- int (*accept)(struct nes_cm_core *, struct nes_cm_node *);
- int (*reject)(struct nes_cm_core *, struct nes_cm_node *);
- int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
- struct sk_buff *);
- int (*destroy_cm_core)(struct nes_cm_core *);
- int (*get)(struct nes_cm_core *);
- int (*set)(struct nes_cm_core *, u32, u32);
-};
-
-int schedule_nes_timer(struct nes_cm_node *, struct sk_buff *,
- enum nes_timer_type, int, int);
-
-int nes_accept(struct iw_cm_id *, struct iw_cm_conn_param *);
-int nes_reject(struct iw_cm_id *, const void *, u8);
-int nes_connect(struct iw_cm_id *, struct iw_cm_conn_param *);
-int nes_create_listen(struct iw_cm_id *, int);
-int nes_destroy_listen(struct iw_cm_id *);
-
-int nes_cm_recv(struct sk_buff *, struct net_device *);
-int nes_cm_start(void);
-int nes_cm_stop(void);
-int nes_add_ref_cm_node(struct nes_cm_node *cm_node);
-int nes_rem_ref_cm_node(struct nes_cm_node *cm_node);
-
-#endif /* NES_CM_H */
diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h
deleted file mode 100644
index a69eef16d72d..000000000000
--- a/drivers/infiniband/hw/nes/nes_context.h
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef NES_CONTEXT_H
-#define NES_CONTEXT_H
-
-struct nes_qp_context {
- __le32 misc;
- __le32 cqs;
- __le32 sq_addr_low;
- __le32 sq_addr_high;
- __le32 rq_addr_low;
- __le32 rq_addr_high;
- __le32 misc2;
- __le16 tcpPorts[2];
- __le32 ip0;
- __le32 ip1;
- __le32 ip2;
- __le32 ip3;
- __le32 mss;
- __le32 arp_index_vlan;
- __le32 tcp_state_flow_label;
- __le32 pd_index_wscale;
- __le32 keepalive;
- u32 ts_recent;
- u32 ts_age;
- __le32 snd_nxt;
- __le32 snd_wnd;
- __le32 rcv_nxt;
- __le32 rcv_wnd;
- __le32 snd_max;
- __le32 snd_una;
- u32 srtt;
- __le32 rttvar;
- __le32 ssthresh;
- __le32 cwnd;
- __le32 snd_wl1;
- __le32 snd_wl2;
- __le32 max_snd_wnd;
- __le32 ts_val_delta;
- u32 retransmit;
- u32 probe_cnt;
- u32 hte_index;
- __le32 q2_addr_low;
- __le32 q2_addr_high;
- __le32 ird_index;
- u32 Rsvd3;
- __le32 ird_ord_sizes;
- u32 mrkr_offset;
- __le32 aeq_token_low;
- __le32 aeq_token_high;
-};
-
-/* QP Context Misc Field */
-
-#define NES_QPCONTEXT_MISC_IWARP_VER_MASK 0x00000003
-#define NES_QPCONTEXT_MISC_IWARP_VER_SHIFT 0
-#define NES_QPCONTEXT_MISC_EFB_SIZE_MASK 0x000000C0
-#define NES_QPCONTEXT_MISC_EFB_SIZE_SHIFT 6
-#define NES_QPCONTEXT_MISC_RQ_SIZE_MASK 0x00000300
-#define NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT 8
-#define NES_QPCONTEXT_MISC_SQ_SIZE_MASK 0x00000c00
-#define NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT 10
-#define NES_QPCONTEXT_MISC_PCI_FCN_MASK 0x00007000
-#define NES_QPCONTEXT_MISC_PCI_FCN_SHIFT 12
-#define NES_QPCONTEXT_MISC_DUP_ACKS_MASK 0x00070000
-#define NES_QPCONTEXT_MISC_DUP_ACKS_SHIFT 16
-
-enum nes_qp_context_misc_bits {
- NES_QPCONTEXT_MISC_RX_WQE_SIZE = 0x00000004,
- NES_QPCONTEXT_MISC_IPV4 = 0x00000008,
- NES_QPCONTEXT_MISC_DO_NOT_FRAG = 0x00000010,
- NES_QPCONTEXT_MISC_INSERT_VLAN = 0x00000020,
- NES_QPCONTEXT_MISC_DROS = 0x00008000,
- NES_QPCONTEXT_MISC_WSCALE = 0x00080000,
- NES_QPCONTEXT_MISC_KEEPALIVE = 0x00100000,
- NES_QPCONTEXT_MISC_TIMESTAMP = 0x00200000,
- NES_QPCONTEXT_MISC_SACK = 0x00400000,
- NES_QPCONTEXT_MISC_RDMA_WRITE_EN = 0x00800000,
- NES_QPCONTEXT_MISC_RDMA_READ_EN = 0x01000000,
- NES_QPCONTEXT_MISC_WBIND_EN = 0x10000000,
- NES_QPCONTEXT_MISC_FAST_REGISTER_EN = 0x20000000,
- NES_QPCONTEXT_MISC_PRIV_EN = 0x40000000,
- NES_QPCONTEXT_MISC_NO_NAGLE = 0x80000000
-};
-
-enum nes_qp_acc_wq_sizes {
- HCONTEXT_TSA_WQ_SIZE_4 = 0,
- HCONTEXT_TSA_WQ_SIZE_32 = 1,
- HCONTEXT_TSA_WQ_SIZE_128 = 2,
- HCONTEXT_TSA_WQ_SIZE_512 = 3
-};
-
-/* QP Context Misc2 Fields */
-#define NES_QPCONTEXT_MISC2_TTL_MASK 0x000000ff
-#define NES_QPCONTEXT_MISC2_TTL_SHIFT 0
-#define NES_QPCONTEXT_MISC2_HOP_LIMIT_MASK 0x000000ff
-#define NES_QPCONTEXT_MISC2_HOP_LIMIT_SHIFT 0
-#define NES_QPCONTEXT_MISC2_LIMIT_MASK 0x00000300
-#define NES_QPCONTEXT_MISC2_LIMIT_SHIFT 8
-#define NES_QPCONTEXT_MISC2_NIC_INDEX_MASK 0x0000fc00
-#define NES_QPCONTEXT_MISC2_NIC_INDEX_SHIFT 10
-#define NES_QPCONTEXT_MISC2_SRC_IP_MASK 0x001f0000
-#define NES_QPCONTEXT_MISC2_SRC_IP_SHIFT 16
-#define NES_QPCONTEXT_MISC2_TOS_MASK 0xff000000
-#define NES_QPCONTEXT_MISC2_TOS_SHIFT 24
-#define NES_QPCONTEXT_MISC2_TRAFFIC_CLASS_MASK 0xff000000
-#define NES_QPCONTEXT_MISC2_TRAFFIC_CLASS_SHIFT 24
-
-/* QP Context Tcp State/Flow Label Fields */
-#define NES_QPCONTEXT_TCPFLOW_FLOW_LABEL_MASK 0x000fffff
-#define NES_QPCONTEXT_TCPFLOW_FLOW_LABEL_SHIFT 0
-#define NES_QPCONTEXT_TCPFLOW_TCP_STATE_MASK 0xf0000000
-#define NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT 28
-
-enum nes_qp_tcp_state {
- NES_QPCONTEXT_TCPSTATE_CLOSED = 1,
- NES_QPCONTEXT_TCPSTATE_EST = 5,
- NES_QPCONTEXT_TCPSTATE_TIME_WAIT = 11,
-};
-
-/* QP Context PD Index/wscale Fields */
-#define NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK 0x0000000f
-#define NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT 0
-#define NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK 0x00000f00
-#define NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT 8
-#define NES_QPCONTEXT_PDWSCALE_PDINDEX_MASK 0xffff0000
-#define NES_QPCONTEXT_PDWSCALE_PDINDEX_SHIFT 16
-
-/* QP Context Keepalive Fields */
-#define NES_QPCONTEXT_KEEPALIVE_DELTA_MASK 0x0000ffff
-#define NES_QPCONTEXT_KEEPALIVE_DELTA_SHIFT 0
-#define NES_QPCONTEXT_KEEPALIVE_PROBE_CNT_MASK 0x00ff0000
-#define NES_QPCONTEXT_KEEPALIVE_PROBE_CNT_SHIFT 16
-#define NES_QPCONTEXT_KEEPALIVE_INTV_MASK 0xff000000
-#define NES_QPCONTEXT_KEEPALIVE_INTV_SHIFT 24
-
-/* QP Context ORD/IRD Fields */
-#define NES_QPCONTEXT_ORDIRD_ORDSIZE_MASK 0x0000007f
-#define NES_QPCONTEXT_ORDIRD_ORDSIZE_SHIFT 0
-#define NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK 0x00030000
-#define NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT 16
-#define NES_QPCONTEXT_ORDIRD_IWARP_MODE_MASK 0x30000000
-#define NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT 28
-
-enum nes_ord_ird_bits {
- NES_QPCONTEXT_ORDIRD_WRPDU = 0x02000000,
- NES_QPCONTEXT_ORDIRD_LSMM_PRESENT = 0x04000000,
- NES_QPCONTEXT_ORDIRD_ALSMM = 0x08000000,
- NES_QPCONTEXT_ORDIRD_AAH = 0x40000000,
- NES_QPCONTEXT_ORDIRD_RNMC = 0x80000000
-};
-
-enum nes_iwarp_qp_state {
- NES_QPCONTEXT_IWARP_STATE_NONEXIST = 0,
- NES_QPCONTEXT_IWARP_STATE_IDLE = 1,
- NES_QPCONTEXT_IWARP_STATE_RTS = 2,
- NES_QPCONTEXT_IWARP_STATE_CLOSING = 3,
- NES_QPCONTEXT_IWARP_STATE_TERMINATE = 5,
- NES_QPCONTEXT_IWARP_STATE_ERROR = 6
-};
-
-
-#endif /* NES_CONTEXT_H */
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
deleted file mode 100644
index 19acd13c6cb1..000000000000
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ /dev/null
@@ -1,3893 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/if_vlan.h>
-#include <linux/slab.h>
-
-#include "nes.h"
-
-static int wide_ppm_offset;
-module_param(wide_ppm_offset, int, 0644);
-MODULE_PARM_DESC(wide_ppm_offset, "Increase CX4 interface clock ppm offset, 0=100ppm (default), 1=300ppm");
-
-static u32 crit_err_count;
-u32 int_mod_timer_init;
-u32 int_mod_cq_depth_256;
-u32 int_mod_cq_depth_128;
-u32 int_mod_cq_depth_32;
-u32 int_mod_cq_depth_24;
-u32 int_mod_cq_depth_16;
-u32 int_mod_cq_depth_4;
-u32 int_mod_cq_depth_1;
-static const u8 nes_max_critical_error_count = 100;
-#include "nes_cm.h"
-
-static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq);
-static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count);
-static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
- struct nes_adapter *nesadapter, u8 OneG_Mode);
-static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq);
-static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq);
-static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq);
-static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
- struct nes_hw_aeqe *aeqe);
-static void process_critical_error(struct nes_device *nesdev);
-static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
-static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
-static void nes_terminate_start_timer(struct nes_qp *nesqp);
-
-#ifdef CONFIG_INFINIBAND_NES_DEBUG
-static unsigned char *nes_iwarp_state_str[] = {
- "Non-Existent",
- "Idle",
- "RTS",
- "Closing",
- "RSVD1",
- "Terminate",
- "Error",
- "RSVD2",
-};
-
-static unsigned char *nes_tcp_state_str[] = {
- "Non-Existent",
- "Closed",
- "Listen",
- "SYN Sent",
- "SYN Rcvd",
- "Established",
- "Close Wait",
- "FIN Wait 1",
- "Closing",
- "Last Ack",
- "FIN Wait 2",
- "Time Wait",
- "RSVD1",
- "RSVD2",
- "RSVD3",
- "RSVD4",
-};
-#endif
-
-static inline void print_ip(struct nes_cm_node *cm_node)
-{
- unsigned char *rem_addr;
- if (cm_node) {
- rem_addr = (unsigned char *)&cm_node->rem_addr;
- printk(KERN_ERR PFX "Remote IP addr: %pI4\n", rem_addr);
- }
-}
-
-/**
- * nes_nic_init_timer_defaults
- */
-void nes_nic_init_timer_defaults(struct nes_device *nesdev, u8 jumbomode)
-{
- unsigned long flags;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
-
- spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
-
- shared_timer->timer_in_use_min = NES_NIC_FAST_TIMER_LOW;
- shared_timer->timer_in_use_max = NES_NIC_FAST_TIMER_HIGH;
- if (jumbomode) {
- shared_timer->threshold_low = DEFAULT_JUMBO_NES_QL_LOW;
- shared_timer->threshold_target = DEFAULT_JUMBO_NES_QL_TARGET;
- shared_timer->threshold_high = DEFAULT_JUMBO_NES_QL_HIGH;
- } else {
- shared_timer->threshold_low = DEFAULT_NES_QL_LOW;
- shared_timer->threshold_target = DEFAULT_NES_QL_TARGET;
- shared_timer->threshold_high = DEFAULT_NES_QL_HIGH;
- }
-
- /* todo use netdev->mtu to set thresholds */
- spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
-}
-
-
-/**
- * nes_nic_init_timer
- */
-static void nes_nic_init_timer(struct nes_device *nesdev)
-{
- unsigned long flags;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
-
- spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
-
- if (shared_timer->timer_in_use_old == 0) {
- nesdev->deepcq_count = 0;
- shared_timer->timer_direction_upward = 0;
- shared_timer->timer_direction_downward = 0;
- shared_timer->timer_in_use = NES_NIC_FAST_TIMER;
- shared_timer->timer_in_use_old = 0;
-
- }
- if (shared_timer->timer_in_use != shared_timer->timer_in_use_old) {
- shared_timer->timer_in_use_old = shared_timer->timer_in_use;
- nes_write32(nesdev->regs+NES_PERIODIC_CONTROL,
- 0x80000000 | ((u32)(shared_timer->timer_in_use*8)));
- }
- /* todo use netdev->mtu to set thresholds */
- spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
-}
-
-
-/**
- * nes_nic_tune_timer
- */
-static void nes_nic_tune_timer(struct nes_device *nesdev)
-{
- unsigned long flags;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
- u16 cq_count = nesdev->currcq_count;
-
- spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
-
- if (shared_timer->cq_count_old <= cq_count)
- shared_timer->cq_direction_downward = 0;
- else
- shared_timer->cq_direction_downward++;
- shared_timer->cq_count_old = cq_count;
- if (shared_timer->cq_direction_downward > NES_NIC_CQ_DOWNWARD_TREND) {
- if (cq_count <= shared_timer->threshold_low &&
- shared_timer->threshold_low > 4) {
- shared_timer->threshold_low = shared_timer->threshold_low/2;
- shared_timer->cq_direction_downward=0;
- nesdev->currcq_count = 0;
- spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
- return;
- }
- }
-
- if (cq_count > 1) {
- nesdev->deepcq_count += cq_count;
- if (cq_count <= shared_timer->threshold_low) { /* increase timer gently */
- shared_timer->timer_direction_upward++;
- shared_timer->timer_direction_downward = 0;
- } else if (cq_count <= shared_timer->threshold_target) { /* balanced */
- shared_timer->timer_direction_upward = 0;
- shared_timer->timer_direction_downward = 0;
- } else if (cq_count <= shared_timer->threshold_high) { /* decrease timer gently */
- shared_timer->timer_direction_downward++;
- shared_timer->timer_direction_upward = 0;
- } else if (cq_count <= (shared_timer->threshold_high) * 2) {
- shared_timer->timer_in_use -= 2;
- shared_timer->timer_direction_upward = 0;
- shared_timer->timer_direction_downward++;
- } else {
- shared_timer->timer_in_use -= 4;
- shared_timer->timer_direction_upward = 0;
- shared_timer->timer_direction_downward++;
- }
-
- if (shared_timer->timer_direction_upward > 3 ) { /* using history */
- shared_timer->timer_in_use += 3;
- shared_timer->timer_direction_upward = 0;
- shared_timer->timer_direction_downward = 0;
- }
- if (shared_timer->timer_direction_downward > 5) { /* using history */
- shared_timer->timer_in_use -= 4 ;
- shared_timer->timer_direction_downward = 0;
- shared_timer->timer_direction_upward = 0;
- }
- }
-
- /* boundary checking */
- if (shared_timer->timer_in_use > shared_timer->threshold_high)
- shared_timer->timer_in_use = shared_timer->threshold_high;
- else if (shared_timer->timer_in_use < shared_timer->threshold_low)
- shared_timer->timer_in_use = shared_timer->threshold_low;
-
- nesdev->currcq_count = 0;
-
- spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
-}
-
-
-/**
- * nes_init_adapter - initialize adapter
- */
-struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
- struct nes_adapter *nesadapter = NULL;
- unsigned long num_pds;
- u32 u32temp;
- u32 port_count;
- u16 max_rq_wrs;
- u16 max_sq_wrs;
- u32 max_mr;
- u32 max_256pbl;
- u32 max_4kpbl;
- u32 max_qp;
- u32 max_irrq;
- u32 max_cq;
- u32 hte_index_mask;
- u32 adapter_size;
- u32 arp_table_size;
- u16 vendor_id;
- u16 device_id;
- u8 OneG_Mode;
- u8 func_index;
-
- /* search the list of existing adapters */
- list_for_each_entry(nesadapter, &nes_adapter_list, list) {
- nes_debug(NES_DBG_INIT, "Searching Adapter list for PCI devfn = 0x%X,"
- " adapter PCI slot/bus = %u/%u, pci devices PCI slot/bus = %u/%u, .\n",
- nesdev->pcidev->devfn,
- PCI_SLOT(nesadapter->devfn),
- nesadapter->bus_number,
- PCI_SLOT(nesdev->pcidev->devfn),
- nesdev->pcidev->bus->number );
- if ((PCI_SLOT(nesadapter->devfn) == PCI_SLOT(nesdev->pcidev->devfn)) &&
- (nesadapter->bus_number == nesdev->pcidev->bus->number)) {
- nesadapter->ref_count++;
- return nesadapter;
- }
- }
-
- /* no adapter found */
- num_pds = pci_resource_len(nesdev->pcidev, BAR_1) >> PAGE_SHIFT;
- if ((hw_rev != NE020_REV) && (hw_rev != NE020_REV1)) {
- nes_debug(NES_DBG_INIT, "NE020 driver detected unknown hardware revision 0x%x\n",
- hw_rev);
- return NULL;
- }
-
- nes_debug(NES_DBG_INIT, "Determine Soft Reset, QP_control=0x%x, CPU0=0x%x, CPU1=0x%x, CPU2=0x%x\n",
- nes_read_indexed(nesdev, NES_IDX_QP_CONTROL + PCI_FUNC(nesdev->pcidev->devfn) * 8),
- nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS),
- nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS + 4),
- nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS + 8));
-
- nes_debug(NES_DBG_INIT, "Reset and init NE020\n");
-
-
- if ((port_count = nes_reset_adapter_ne020(nesdev, &OneG_Mode)) == 0)
- return NULL;
-
- max_qp = nes_read_indexed(nesdev, NES_IDX_QP_CTX_SIZE);
- nes_debug(NES_DBG_INIT, "QP_CTX_SIZE=%u\n", max_qp);
-
- u32temp = nes_read_indexed(nesdev, NES_IDX_QUAD_HASH_TABLE_SIZE);
- if (max_qp > ((u32)1 << (u32temp & 0x001f))) {
- nes_debug(NES_DBG_INIT, "Reducing Max QPs to %u due to hash table size = 0x%08X\n",
- max_qp, u32temp);
- max_qp = (u32)1 << (u32temp & 0x001f);
- }
-
- hte_index_mask = ((u32)1 << ((u32temp & 0x001f)+1))-1;
- nes_debug(NES_DBG_INIT, "Max QP = %u, hte_index_mask = 0x%08X.\n",
- max_qp, hte_index_mask);
-
- u32temp = nes_read_indexed(nesdev, NES_IDX_IRRQ_COUNT);
-
- max_irrq = 1 << (u32temp & 0x001f);
-
- if (max_qp > max_irrq) {
- max_qp = max_irrq;
- nes_debug(NES_DBG_INIT, "Reducing Max QPs to %u due to Available Q1s.\n",
- max_qp);
- }
-
- /* there should be no reason to allocate more pds than qps */
- if (num_pds > max_qp)
- num_pds = max_qp;
-
- u32temp = nes_read_indexed(nesdev, NES_IDX_MRT_SIZE);
- max_mr = (u32)8192 << (u32temp & 0x7);
-
- u32temp = nes_read_indexed(nesdev, NES_IDX_PBL_REGION_SIZE);
- max_256pbl = (u32)1 << (u32temp & 0x0000001f);
- max_4kpbl = (u32)1 << ((u32temp >> 16) & 0x0000001f);
- max_cq = nes_read_indexed(nesdev, NES_IDX_CQ_CTX_SIZE);
-
- u32temp = nes_read_indexed(nesdev, NES_IDX_ARP_CACHE_SIZE);
- arp_table_size = 1 << u32temp;
-
- adapter_size = (sizeof(struct nes_adapter) +
- (sizeof(unsigned long)-1)) & (~(sizeof(unsigned long)-1));
- adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp);
- adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr);
- adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_cq);
- adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(num_pds);
- adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(arp_table_size);
- adapter_size += sizeof(struct nes_qp **) * max_qp;
-
- /* allocate a new adapter struct */
- nesadapter = kzalloc(adapter_size, GFP_KERNEL);
- if (!nesadapter)
- return NULL;
-
- nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n",
- nesadapter, (u32)sizeof(struct nes_adapter), adapter_size);
-
- if (nes_read_eeprom_values(nesdev, nesadapter)) {
- printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
- kfree(nesadapter);
- return NULL;
- }
-
- nesadapter->vendor_id = (((u32) nesadapter->mac_addr_high) << 8) |
- (nesadapter->mac_addr_low >> 24);
-
- pci_bus_read_config_word(nesdev->pcidev->bus, nesdev->pcidev->devfn,
- PCI_DEVICE_ID, &device_id);
- nesadapter->vendor_part_id = device_id;
-
- if (nes_init_serdes(nesdev, hw_rev, port_count, nesadapter,
- OneG_Mode)) {
- kfree(nesadapter);
- return NULL;
- }
- nes_init_csr_ne020(nesdev, hw_rev, port_count);
-
- memset(nesadapter->pft_mcast_map, 255,
- sizeof nesadapter->pft_mcast_map);
-
- /* populate the new nesadapter */
- nesadapter->devfn = nesdev->pcidev->devfn;
- nesadapter->bus_number = nesdev->pcidev->bus->number;
- nesadapter->ref_count = 1;
- nesadapter->timer_int_req = 0xffff0000;
- nesadapter->OneG_Mode = OneG_Mode;
- nesadapter->doorbell_start = nesdev->doorbell_region;
-
- /* nesadapter->tick_delta = clk_divisor; */
- nesadapter->hw_rev = hw_rev;
- nesadapter->port_count = port_count;
-
- nesadapter->max_qp = max_qp;
- nesadapter->hte_index_mask = hte_index_mask;
- nesadapter->max_irrq = max_irrq;
- nesadapter->max_mr = max_mr;
- nesadapter->max_256pbl = max_256pbl - 1;
- nesadapter->max_4kpbl = max_4kpbl - 1;
- nesadapter->max_cq = max_cq;
- nesadapter->free_256pbl = max_256pbl - 1;
- nesadapter->free_4kpbl = max_4kpbl - 1;
- nesadapter->max_pd = num_pds;
- nesadapter->arp_table_size = arp_table_size;
-
- nesadapter->et_pkt_rate_low = NES_TIMER_ENABLE_LIMIT;
- if (nes_drv_opt & NES_DRV_OPT_DISABLE_INT_MOD) {
- nesadapter->et_use_adaptive_rx_coalesce = 0;
- nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT;
- nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval;
- } else {
- nesadapter->et_use_adaptive_rx_coalesce = 1;
- nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC;
- nesadapter->et_rx_coalesce_usecs_irq = 0;
- printk(PFX "%s: Using Adaptive Interrupt Moderation\n", __func__);
- }
- /* Setup and enable the periodic timer */
- if (nesadapter->et_rx_coalesce_usecs_irq)
- nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x80000000 |
- ((u32)(nesadapter->et_rx_coalesce_usecs_irq * 8)));
- else
- nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x00000000);
-
- nesadapter->base_pd = 1;
-
- nesadapter->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
- IB_DEVICE_MEM_WINDOW |
- IB_DEVICE_MEM_MGT_EXTENSIONS;
-
- nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
- [(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
- nesadapter->allocated_cqs = &nesadapter->allocated_qps[BITS_TO_LONGS(max_qp)];
- nesadapter->allocated_mrs = &nesadapter->allocated_cqs[BITS_TO_LONGS(max_cq)];
- nesadapter->allocated_pds = &nesadapter->allocated_mrs[BITS_TO_LONGS(max_mr)];
- nesadapter->allocated_arps = &nesadapter->allocated_pds[BITS_TO_LONGS(num_pds)];
- nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
-
-
- /* mark the usual suspect QPs, MR and CQs as in use */
- for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) {
- set_bit(u32temp, nesadapter->allocated_qps);
- set_bit(u32temp, nesadapter->allocated_cqs);
- }
- set_bit(0, nesadapter->allocated_mrs);
-
- for (u32temp = 0; u32temp < 20; u32temp++)
- set_bit(u32temp, nesadapter->allocated_pds);
- u32temp = nes_read_indexed(nesdev, NES_IDX_QP_MAX_CFG_SIZES);
-
- max_rq_wrs = ((u32temp >> 8) & 3);
- switch (max_rq_wrs) {
- case 0:
- max_rq_wrs = 4;
- break;
- case 1:
- max_rq_wrs = 16;
- break;
- case 2:
- max_rq_wrs = 32;
- break;
- case 3:
- max_rq_wrs = 512;
- break;
- }
-
- max_sq_wrs = (u32temp & 3);
- switch (max_sq_wrs) {
- case 0:
- max_sq_wrs = 4;
- break;
- case 1:
- max_sq_wrs = 16;
- break;
- case 2:
- max_sq_wrs = 32;
- break;
- case 3:
- max_sq_wrs = 512;
- break;
- }
- nesadapter->max_qp_wr = min(max_rq_wrs, max_sq_wrs);
- nesadapter->max_irrq_wr = (u32temp >> 16) & 3;
-
- nesadapter->max_sge = 4;
- nesadapter->max_cqe = 32766;
-
- if (nes_read_eeprom_values(nesdev, nesadapter)) {
- printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
- kfree(nesadapter);
- return NULL;
- }
-
- u32temp = nes_read_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG);
- nes_write_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG,
- (u32temp & 0xff000000) | (nesadapter->tcp_timer_core_clk_divisor & 0x00ffffff));
-
- /* setup port configuration */
- if (nesadapter->port_count == 1) {
- nesadapter->log_port = 0x00000000;
- if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT)
- nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000002);
- else
- nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003);
- } else {
- if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
- nesadapter->log_port = 0x000000D8;
- } else {
- if (nesadapter->port_count == 2)
- nesadapter->log_port = 0x00000044;
- else
- nesadapter->log_port = 0x000000e4;
- }
- nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003);
- }
-
- nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT,
- nesadapter->log_port);
- nes_debug(NES_DBG_INIT, "Probe time, LOG2PHY=%u\n",
- nes_read_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT));
-
- spin_lock_init(&nesadapter->resource_lock);
- spin_lock_init(&nesadapter->phy_lock);
- spin_lock_init(&nesadapter->pbl_lock);
- spin_lock_init(&nesadapter->periodic_timer_lock);
-
- INIT_LIST_HEAD(&nesadapter->nesvnic_list[0]);
- INIT_LIST_HEAD(&nesadapter->nesvnic_list[1]);
- INIT_LIST_HEAD(&nesadapter->nesvnic_list[2]);
- INIT_LIST_HEAD(&nesadapter->nesvnic_list[3]);
-
- if ((!nesadapter->OneG_Mode) && (nesadapter->port_count == 2)) {
- u32 pcs_control_status0, pcs_control_status1;
- u32 reset_value;
- u32 i = 0;
- u32 int_cnt = 0;
- u32 ext_cnt = 0;
- unsigned long flags;
- u32 j = 0;
-
- pcs_control_status0 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0);
- pcs_control_status1 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
-
- for (i = 0; i < NES_MAX_LINK_CHECK; i++) {
- pcs_control_status0 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0);
- pcs_control_status1 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
- if ((0x0F000100 == (pcs_control_status0 & 0x0F000100))
- || (0x0F000100 == (pcs_control_status1 & 0x0F000100)))
- int_cnt++;
- msleep(1);
- }
- if (int_cnt > 1) {
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8);
- mh_detected++;
- reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
- reset_value |= 0x0000003d;
- nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
-
- while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
- & 0x00000040) != 0x00000040) && (j++ < 5000));
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
-
- pcs_control_status0 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0);
- pcs_control_status1 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
-
- for (i = 0; i < NES_MAX_LINK_CHECK; i++) {
- pcs_control_status0 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0);
- pcs_control_status1 = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
- if ((0x0F000100 == (pcs_control_status0 & 0x0F000100))
- || (0x0F000100 == (pcs_control_status1 & 0x0F000100))) {
- if (++ext_cnt > int_cnt) {
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1,
- 0x0000F088);
- mh_detected++;
- reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
- reset_value |= 0x0000003d;
- nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
-
- while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
- & 0x00000040) != 0x00000040) && (j++ < 5000));
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
- break;
- }
- }
- msleep(1);
- }
- }
- }
-
- if (nesadapter->hw_rev == NE020_REV) {
- init_timer(&nesadapter->mh_timer);
- nesadapter->mh_timer.function = nes_mh_fix;
- nesadapter->mh_timer.expires = jiffies + (HZ/5); /* 1 second */
- nesadapter->mh_timer.data = (unsigned long)nesdev;
- add_timer(&nesadapter->mh_timer);
- } else {
- nes_write32(nesdev->regs+NES_INTF_INT_STAT, 0x0f000000);
- }
-
- init_timer(&nesadapter->lc_timer);
- nesadapter->lc_timer.function = nes_clc;
- nesadapter->lc_timer.expires = jiffies + 3600 * HZ; /* 1 hour */
- nesadapter->lc_timer.data = (unsigned long)nesdev;
- add_timer(&nesadapter->lc_timer);
-
- list_add_tail(&nesadapter->list, &nes_adapter_list);
-
- for (func_index = 0; func_index < 8; func_index++) {
- pci_bus_read_config_word(nesdev->pcidev->bus,
- PCI_DEVFN(PCI_SLOT(nesdev->pcidev->devfn),
- func_index), 0, &vendor_id);
- if (vendor_id == 0xffff)
- break;
- }
- nes_debug(NES_DBG_INIT, "%s %d functions found for %s.\n", __func__,
- func_index, pci_name(nesdev->pcidev));
- nesadapter->adapter_fcn_count = func_index;
-
- return nesadapter;
-}
-
-
-/**
- * nes_reset_adapter_ne020
- */
-static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode)
-{
- u32 port_count;
- u32 u32temp;
- u32 i;
-
- u32temp = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
- port_count = ((u32temp & 0x00000300) >> 8) + 1;
- /* TODO: assuming that both SERDES are set the same for now */
- *OneG_Mode = (u32temp & 0x00003c00) ? 0 : 1;
- nes_debug(NES_DBG_INIT, "Initial Software Reset = 0x%08X, port_count=%u\n",
- u32temp, port_count);
- if (*OneG_Mode)
- nes_debug(NES_DBG_INIT, "Running in 1G mode.\n");
- u32temp &= 0xff00ffc0;
- switch (port_count) {
- case 1:
- u32temp |= 0x00ee0000;
- break;
- case 2:
- u32temp |= 0x00cc0000;
- break;
- case 4:
- u32temp |= 0x00000000;
- break;
- default:
- return 0;
- break;
- }
-
- /* check and do full reset if needed */
- if (nes_read_indexed(nesdev, NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))) {
- nes_debug(NES_DBG_INIT, "Issuing Full Soft reset = 0x%08X\n", u32temp | 0xd);
- nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd);
-
- i = 0;
- while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000)
- mdelay(1);
- if (i > 10000) {
- nes_debug(NES_DBG_INIT, "Did not see full soft reset done.\n");
- return 0;
- }
-
- i = 0;
- while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000)
- mdelay(1);
- if (i > 10000) {
- printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n",
- nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS));
- return 0;
- }
- }
-
- /* port reset */
- switch (port_count) {
- case 1:
- u32temp |= 0x00ee0010;
- break;
- case 2:
- u32temp |= 0x00cc0030;
- break;
- case 4:
- u32temp |= 0x00000030;
- break;
- }
-
- nes_debug(NES_DBG_INIT, "Issuing Port Soft reset = 0x%08X\n", u32temp | 0xd);
- nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd);
-
- i = 0;
- while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000)
- mdelay(1);
- if (i > 10000) {
- nes_debug(NES_DBG_INIT, "Did not see port soft reset done.\n");
- return 0;
- }
-
- /* serdes 0 */
- i = 0;
- while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0)
- & 0x0000000f)) != 0x0000000f) && i++ < 5000)
- mdelay(1);
- if (i > 5000) {
- nes_debug(NES_DBG_INIT, "Serdes 0 not ready, status=%x\n", u32temp);
- return 0;
- }
-
- /* serdes 1 */
- if (port_count > 1) {
- i = 0;
- while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1)
- & 0x0000000f)) != 0x0000000f) && i++ < 5000)
- mdelay(1);
- if (i > 5000) {
- nes_debug(NES_DBG_INIT, "Serdes 1 not ready, status=%x\n", u32temp);
- return 0;
- }
- }
-
- return port_count;
-}
-
-
-/**
- * nes_init_serdes
- */
-static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count,
- struct nes_adapter *nesadapter, u8 OneG_Mode)
-{
- int i;
- u32 u32temp;
- u32 sds;
-
- if (hw_rev != NE020_REV) {
- /* init serdes 0 */
- switch (nesadapter->phy_type[0]) {
- case NES_PHY_TYPE_CX4:
- if (wide_ppm_offset)
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA);
- else
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
- break;
- case NES_PHY_TYPE_KR:
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000);
- break;
- case NES_PHY_TYPE_PUMA_1G:
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
- sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0);
- sds |= 0x00000100;
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds);
- break;
- default:
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
- break;
- }
-
- if (!OneG_Mode)
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000);
-
- if (port_count < 2)
- return 0;
-
- /* init serdes 1 */
- if (!(OneG_Mode && (nesadapter->phy_type[1] != NES_PHY_TYPE_PUMA_1G)))
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF);
-
- switch (nesadapter->phy_type[1]) {
- case NES_PHY_TYPE_ARGUS:
- case NES_PHY_TYPE_SFP_D:
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000);
- break;
- case NES_PHY_TYPE_CX4:
- if (wide_ppm_offset)
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000FFFAA);
- break;
- case NES_PHY_TYPE_KR:
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000);
- break;
- case NES_PHY_TYPE_PUMA_1G:
- sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
- sds |= 0x000000100;
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds);
- }
- if (!OneG_Mode) {
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000);
- sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
- sds &= 0xFFFFFFBF;
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds);
- }
- } else {
- /* init serdes 0 */
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008);
- i = 0;
- while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0)
- & 0x0000000f)) != 0x0000000f) && i++ < 5000)
- mdelay(1);
- if (i > 5000) {
- nes_debug(NES_DBG_PHY, "Init: serdes 0 not ready, status=%x\n", u32temp);
- return 1;
- }
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE0, 0x0ff00000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET0, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS0, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0, 0x00000000);
- if (OneG_Mode)
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0182222);
- else
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0042222);
-
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff);
- if (port_count > 1) {
- /* init serdes 1 */
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x00000048);
- i = 0;
- while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1)
- & 0x0000000f)) != 0x0000000f) && (i++ < 5000))
- mdelay(1);
- if (i > 5000) {
- printk("%s: Init: serdes 1 not ready, status=%x\n", __func__, u32temp);
- /* return 1; */
- }
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x000bdef7);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE1, 0x9ce73000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE1, 0x0ff00000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET1, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS1, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL1, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL1, 0xf0002222);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000ff);
- }
- }
- return 0;
-}
-
-
-/**
- * nes_init_csr_ne020
- * Initialize registers for ne020 hardware
- */
-static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count)
-{
- u32 u32temp;
-
- nes_debug(NES_DBG_INIT, "port_count=%d\n", port_count);
-
- nes_write_indexed(nesdev, 0x000001E4, 0x00000007);
- /* nes_write_indexed(nesdev, 0x000001E8, 0x000208C4); */
- nes_write_indexed(nesdev, 0x000001E8, 0x00020874);
- nes_write_indexed(nesdev, 0x000001D8, 0x00048002);
- /* nes_write_indexed(nesdev, 0x000001D8, 0x0004B002); */
- nes_write_indexed(nesdev, 0x000001FC, 0x00050005);
- nes_write_indexed(nesdev, 0x00000600, 0x55555555);
- nes_write_indexed(nesdev, 0x00000604, 0x55555555);
-
- /* TODO: move these MAC register settings to NIC bringup */
- nes_write_indexed(nesdev, 0x00002000, 0x00000001);
- nes_write_indexed(nesdev, 0x00002004, 0x00000001);
- nes_write_indexed(nesdev, 0x00002008, 0x0000FFFF);
- nes_write_indexed(nesdev, 0x0000200C, 0x00000001);
- nes_write_indexed(nesdev, 0x00002010, 0x000003c1);
- nes_write_indexed(nesdev, 0x0000201C, 0x75345678);
- if (port_count > 1) {
- nes_write_indexed(nesdev, 0x00002200, 0x00000001);
- nes_write_indexed(nesdev, 0x00002204, 0x00000001);
- nes_write_indexed(nesdev, 0x00002208, 0x0000FFFF);
- nes_write_indexed(nesdev, 0x0000220C, 0x00000001);
- nes_write_indexed(nesdev, 0x00002210, 0x000003c1);
- nes_write_indexed(nesdev, 0x0000221C, 0x75345678);
- nes_write_indexed(nesdev, 0x00000908, 0x20000001);
- }
- if (port_count > 2) {
- nes_write_indexed(nesdev, 0x00002400, 0x00000001);
- nes_write_indexed(nesdev, 0x00002404, 0x00000001);
- nes_write_indexed(nesdev, 0x00002408, 0x0000FFFF);
- nes_write_indexed(nesdev, 0x0000240C, 0x00000001);
- nes_write_indexed(nesdev, 0x00002410, 0x000003c1);
- nes_write_indexed(nesdev, 0x0000241C, 0x75345678);
- nes_write_indexed(nesdev, 0x00000910, 0x20000001);
-
- nes_write_indexed(nesdev, 0x00002600, 0x00000001);
- nes_write_indexed(nesdev, 0x00002604, 0x00000001);
- nes_write_indexed(nesdev, 0x00002608, 0x0000FFFF);
- nes_write_indexed(nesdev, 0x0000260C, 0x00000001);
- nes_write_indexed(nesdev, 0x00002610, 0x000003c1);
- nes_write_indexed(nesdev, 0x0000261C, 0x75345678);
- nes_write_indexed(nesdev, 0x00000918, 0x20000001);
- }
-
- nes_write_indexed(nesdev, 0x00005000, 0x00018000);
- /* nes_write_indexed(nesdev, 0x00005000, 0x00010000); */
- nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1, (wqm_quanta << 1) |
- 0x00000001);
- nes_write_indexed(nesdev, 0x00005008, 0x1F1F1F1F);
- nes_write_indexed(nesdev, 0x00005010, 0x1F1F1F1F);
- nes_write_indexed(nesdev, 0x00005018, 0x1F1F1F1F);
- nes_write_indexed(nesdev, 0x00005020, 0x1F1F1F1F);
- nes_write_indexed(nesdev, 0x00006090, 0xFFFFFFFF);
-
- /* TODO: move this to code, get from EEPROM */
- nes_write_indexed(nesdev, 0x00000900, 0x20000001);
- nes_write_indexed(nesdev, 0x000060C0, 0x0000028e);
- nes_write_indexed(nesdev, 0x000060C8, 0x00000020);
-
- nes_write_indexed(nesdev, 0x000001EC, 0x7b2625a0);
- /* nes_write_indexed(nesdev, 0x000001EC, 0x5f2625a0); */
-
- if (hw_rev != NE020_REV) {
- u32temp = nes_read_indexed(nesdev, 0x000008e8);
- u32temp |= 0x80000000;
- nes_write_indexed(nesdev, 0x000008e8, u32temp);
- u32temp = nes_read_indexed(nesdev, 0x000021f8);
- u32temp &= 0x7fffffff;
- u32temp |= 0x7fff0010;
- nes_write_indexed(nesdev, 0x000021f8, u32temp);
- if (port_count > 1) {
- u32temp = nes_read_indexed(nesdev, 0x000023f8);
- u32temp &= 0x7fffffff;
- u32temp |= 0x7fff0010;
- nes_write_indexed(nesdev, 0x000023f8, u32temp);
- }
- }
-}
-
-
-/**
- * nes_destroy_adapter - destroy the adapter structure
- */
-void nes_destroy_adapter(struct nes_adapter *nesadapter)
-{
- struct nes_adapter *tmp_adapter;
-
- list_for_each_entry(tmp_adapter, &nes_adapter_list, list) {
- nes_debug(NES_DBG_SHUTDOWN, "Nes Adapter list entry = 0x%p.\n",
- tmp_adapter);
- }
-
- nesadapter->ref_count--;
- if (!nesadapter->ref_count) {
- if (nesadapter->hw_rev == NE020_REV) {
- del_timer(&nesadapter->mh_timer);
- }
- del_timer(&nesadapter->lc_timer);
-
- list_del(&nesadapter->list);
- kfree(nesadapter);
- }
-}
-
-
-/**
- * nes_init_cqp
- */
-int nes_init_cqp(struct nes_device *nesdev)
-{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_cqp_qp_context *cqp_qp_context;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_hw_ceq *ceq;
- struct nes_hw_ceq *nic_ceq;
- struct nes_hw_aeq *aeq;
- void *vmem;
- dma_addr_t pmem;
- u32 count=0;
- u32 cqp_head;
- u64 u64temp;
- u32 u32temp;
-
- /* allocate CQP memory */
- /* Need to add max_cq to the aeq size once cq overflow checking is added back */
- /* SQ is 512 byte aligned, others are 256 byte aligned */
- nesdev->cqp_mem_size = 512 +
- (sizeof(struct nes_hw_cqp_wqe) * NES_CQP_SQ_SIZE) +
- (sizeof(struct nes_hw_cqe) * NES_CCQ_SIZE) +
- max(((u32)sizeof(struct nes_hw_ceqe) * NES_CCEQ_SIZE), (u32)256) +
- max(((u32)sizeof(struct nes_hw_ceqe) * NES_NIC_CEQ_SIZE), (u32)256) +
- (sizeof(struct nes_hw_aeqe) * nesadapter->max_qp) +
- sizeof(struct nes_hw_cqp_qp_context);
-
- nesdev->cqp_vbase = pci_zalloc_consistent(nesdev->pcidev,
- nesdev->cqp_mem_size,
- &nesdev->cqp_pbase);
- if (!nesdev->cqp_vbase) {
- nes_debug(NES_DBG_INIT, "Unable to allocate memory for host descriptor rings\n");
- return -ENOMEM;
- }
-
- /* Allocate a twice the number of CQP requests as the SQ size */
- nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) *
- 2 * NES_CQP_SQ_SIZE, GFP_KERNEL);
- if (!nesdev->nes_cqp_requests) {
- pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase,
- nesdev->cqp.sq_pbase);
- return -ENOMEM;
- }
-
- nes_debug(NES_DBG_INIT, "Allocated CQP structures at %p (phys = %016lX), size = %u.\n",
- nesdev->cqp_vbase, (unsigned long)nesdev->cqp_pbase, nesdev->cqp_mem_size);
-
- spin_lock_init(&nesdev->cqp.lock);
- init_waitqueue_head(&nesdev->cqp.waitq);
-
- /* Setup Various Structures */
- vmem = (void *)(((unsigned long)nesdev->cqp_vbase + (512 - 1)) &
- ~(unsigned long)(512 - 1));
- pmem = (dma_addr_t)(((unsigned long long)nesdev->cqp_pbase + (512 - 1)) &
- ~(unsigned long long)(512 - 1));
-
- nesdev->cqp.sq_vbase = vmem;
- nesdev->cqp.sq_pbase = pmem;
- nesdev->cqp.sq_size = NES_CQP_SQ_SIZE;
- nesdev->cqp.sq_head = 0;
- nesdev->cqp.sq_tail = 0;
- nesdev->cqp.qp_id = PCI_FUNC(nesdev->pcidev->devfn);
-
- vmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size);
- pmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size);
-
- nesdev->ccq.cq_vbase = vmem;
- nesdev->ccq.cq_pbase = pmem;
- nesdev->ccq.cq_size = NES_CCQ_SIZE;
- nesdev->ccq.cq_head = 0;
- nesdev->ccq.ce_handler = nes_cqp_ce_handler;
- nesdev->ccq.cq_number = PCI_FUNC(nesdev->pcidev->devfn);
-
- vmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size);
- pmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size);
-
- nesdev->ceq_index = PCI_FUNC(nesdev->pcidev->devfn);
- ceq = &nesadapter->ceq[nesdev->ceq_index];
- ceq->ceq_vbase = vmem;
- ceq->ceq_pbase = pmem;
- ceq->ceq_size = NES_CCEQ_SIZE;
- ceq->ceq_head = 0;
-
- vmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256);
- pmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256);
-
- nesdev->nic_ceq_index = PCI_FUNC(nesdev->pcidev->devfn) + 8;
- nic_ceq = &nesadapter->ceq[nesdev->nic_ceq_index];
- nic_ceq->ceq_vbase = vmem;
- nic_ceq->ceq_pbase = pmem;
- nic_ceq->ceq_size = NES_NIC_CEQ_SIZE;
- nic_ceq->ceq_head = 0;
-
- vmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256);
- pmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256);
-
- aeq = &nesadapter->aeq[PCI_FUNC(nesdev->pcidev->devfn)];
- aeq->aeq_vbase = vmem;
- aeq->aeq_pbase = pmem;
- aeq->aeq_size = nesadapter->max_qp;
- aeq->aeq_head = 0;
-
- /* Setup QP Context */
- vmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size);
- pmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size);
-
- cqp_qp_context = vmem;
- cqp_qp_context->context_words[0] =
- cpu_to_le32((PCI_FUNC(nesdev->pcidev->devfn) << 12) + (2 << 10));
- cqp_qp_context->context_words[1] = 0;
- cqp_qp_context->context_words[2] = cpu_to_le32((u32)nesdev->cqp.sq_pbase);
- cqp_qp_context->context_words[3] = cpu_to_le32(((u64)nesdev->cqp.sq_pbase) >> 32);
-
-
- /* Write the address to Create CQP */
- if ((sizeof(dma_addr_t) > 4)) {
- nes_write_indexed(nesdev,
- NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8),
- ((u64)pmem) >> 32);
- } else {
- nes_write_indexed(nesdev,
- NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8), 0);
- }
- nes_write_indexed(nesdev,
- NES_IDX_CREATE_CQP_LOW + (PCI_FUNC(nesdev->pcidev->devfn) * 8),
- (u32)pmem);
-
- INIT_LIST_HEAD(&nesdev->cqp_avail_reqs);
- INIT_LIST_HEAD(&nesdev->cqp_pending_reqs);
-
- for (count = 0; count < 2*NES_CQP_SQ_SIZE; count++) {
- init_waitqueue_head(&nesdev->nes_cqp_requests[count].waitq);
- list_add_tail(&nesdev->nes_cqp_requests[count].list, &nesdev->cqp_avail_reqs);
- }
-
- /* Write Create CCQ WQE */
- cqp_head = nesdev->cqp.sq_head++;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
- NES_CQP_CQ_CHK_OVERFLOW | ((u32)nesdev->ccq.cq_size << 16)));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- (nesdev->ccq.cq_number |
- ((u32)nesdev->ceq_index << 16)));
- u64temp = (u64)nesdev->ccq.cq_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
- u64temp = (unsigned long)&nesdev->ccq;
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] =
- cpu_to_le32((u32)(u64temp >> 1));
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
- cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
-
- /* Write Create CEQ WQE */
- cqp_head = nesdev->cqp.sq_head++;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_CREATE_CEQ + ((u32)nesdev->ceq_index << 8)));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX, ceq->ceq_size);
- u64temp = (u64)ceq->ceq_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
-
- /* Write Create AEQ WQE */
- cqp_head = nesdev->cqp.sq_head++;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_CREATE_AEQ + ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8)));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_AEQ_WQE_ELEMENT_COUNT_IDX, aeq->aeq_size);
- u64temp = (u64)aeq->aeq_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
-
- /* Write Create NIC CEQ WQE */
- cqp_head = nesdev->cqp.sq_head++;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_CREATE_CEQ + ((u32)nesdev->nic_ceq_index << 8)));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX, nic_ceq->ceq_size);
- u64temp = (u64)nic_ceq->ceq_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
-
- /* Poll until CCQP done */
- count = 0;
- do {
- if (count++ > 1000) {
- printk(KERN_ERR PFX "Error creating CQP\n");
- pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
- nesdev->cqp_vbase, nesdev->cqp_pbase);
- return -1;
- }
- udelay(10);
- } while (!(nes_read_indexed(nesdev,
- NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn) * 8)) & (1 << 8)));
-
- nes_debug(NES_DBG_INIT, "CQP Status = 0x%08X\n", nes_read_indexed(nesdev,
- NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)));
-
- u32temp = 0x04800000;
- nes_write32(nesdev->regs+NES_WQE_ALLOC, u32temp | nesdev->cqp.qp_id);
-
- /* wait for the CCQ, CEQ, and AEQ to get created */
- count = 0;
- do {
- if (count++ > 1000) {
- printk(KERN_ERR PFX "Error creating CCQ, CEQ, and AEQ\n");
- pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
- nesdev->cqp_vbase, nesdev->cqp_pbase);
- return -1;
- }
- udelay(10);
- } while (((nes_read_indexed(nesdev,
- NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15<<8)) != (15<<8)));
-
- /* dump the QP status value */
- nes_debug(NES_DBG_INIT, "QP Status = 0x%08X\n", nes_read_indexed(nesdev,
- NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)));
-
- nesdev->cqp.sq_tail++;
-
- return 0;
-}
-
-
-/**
- * nes_destroy_cqp
- */
-int nes_destroy_cqp(struct nes_device *nesdev)
-{
- struct nes_hw_cqp_wqe *cqp_wqe;
- u32 count = 0;
- u32 cqp_head;
- unsigned long flags;
-
- do {
- if (count++ > 1000)
- break;
- udelay(10);
- } while (!(nesdev->cqp.sq_head == nesdev->cqp.sq_tail));
-
- /* Reset CCQ */
- nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_RESET |
- nesdev->ccq.cq_number);
-
- /* Disable device interrupts */
- nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff);
-
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
-
- /* Destroy the AEQ */
- cqp_head = nesdev->cqp.sq_head++;
- nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_AEQ |
- ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8));
- cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0;
-
- /* Destroy the NIC CEQ */
- cqp_head = nesdev->cqp.sq_head++;
- nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ |
- ((u32)nesdev->nic_ceq_index << 8));
-
- /* Destroy the CEQ */
- cqp_head = nesdev->cqp.sq_head++;
- nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ |
- (nesdev->ceq_index << 8));
-
- /* Destroy the CCQ */
- cqp_head = nesdev->cqp.sq_head++;
- nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CQ);
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->ccq.cq_number |
- ((u32)nesdev->ceq_index << 16));
-
- /* Destroy CQP */
- cqp_head = nesdev->cqp.sq_head++;
- nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_QP |
- NES_CQP_QP_TYPE_CQP);
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->cqp.qp_id);
-
- barrier();
- /* Ring doorbell (5 WQEs) */
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x05800000 | nesdev->cqp.qp_id);
-
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-
- /* wait for the CCQ, CEQ, and AEQ to get destroyed */
- count = 0;
- do {
- if (count++ > 1000) {
- printk(KERN_ERR PFX "Function%d: Error destroying CCQ, CEQ, and AEQ\n",
- PCI_FUNC(nesdev->pcidev->devfn));
- break;
- }
- udelay(10);
- } while (((nes_read_indexed(nesdev,
- NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15 << 8)) != 0));
-
- /* dump the QP status value */
- nes_debug(NES_DBG_SHUTDOWN, "Function%d: QP Status = 0x%08X\n",
- PCI_FUNC(nesdev->pcidev->devfn),
- nes_read_indexed(nesdev,
- NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)));
-
- kfree(nesdev->nes_cqp_requests);
-
- /* Free the control structures */
- pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase,
- nesdev->cqp.sq_pbase);
-
- return 0;
-}
-
-
-/**
- * nes_init_1g_phy
- */
-static int nes_init_1g_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index)
-{
- u32 counter = 0;
- u16 phy_data;
- int ret = 0;
-
- nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data);
- nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000);
-
- /* Reset the PHY */
- nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000);
- udelay(100);
- counter = 0;
- do {
- nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
- if (counter++ > 100) {
- ret = -1;
- break;
- }
- } while (phy_data & 0x8000);
-
- /* Setting no phy loopback */
- phy_data &= 0xbfff;
- phy_data |= 0x1140;
- nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data);
- nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
- nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data);
- nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data);
-
- /* Setting the interrupt mask */
- nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
- nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee);
- nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
-
- /* turning on flow control */
- nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
- nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00);
- nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
-
- /* Clear Half duplex */
- nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
- nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100));
- nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
-
- nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
- nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300);
-
- return ret;
-}
-
-
-/**
- * nes_init_2025_phy
- */
-static int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index)
-{
- u32 temp_phy_data = 0;
- u32 temp_phy_data2 = 0;
- u32 counter = 0;
- u32 sds;
- u32 mac_index = nesdev->mac_index;
- int ret = 0;
- unsigned int first_attempt = 1;
-
- /* Check firmware heartbeat */
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- udelay(1500);
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
- temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-
- if (temp_phy_data != temp_phy_data2) {
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- if ((temp_phy_data & 0xff) > 0x20)
- return 0;
- printk(PFX "Reinitialize external PHY\n");
- }
-
- /* no heartbeat, configure the PHY */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
-
- switch (phy_type) {
- case NES_PHY_TYPE_ARGUS:
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
-
- /* setup LEDs */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009);
- break;
-
- case NES_PHY_TYPE_SFP_D:
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
-
- /* setup LEDs */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009);
- break;
-
- case NES_PHY_TYPE_KR:
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0010);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0080);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
-
- /* setup LEDs */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x000B);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x0003);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0004);
-
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0022, 0x406D);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0023, 0x0020);
- break;
- }
-
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528);
-
- /* Bring PHY out of reset */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002);
-
- /* Check for heartbeat */
- counter = 0;
- mdelay(690);
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- do {
- if (counter++ > 150) {
- printk(PFX "No PHY heartbeat\n");
- break;
- }
- mdelay(1);
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
- temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- } while ((temp_phy_data2 == temp_phy_data));
-
- /* wait for tracking */
- counter = 0;
- do {
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- if (counter++ > 300) {
- if (((temp_phy_data & 0xff) == 0x0) && first_attempt) {
- first_attempt = 0;
- counter = 0;
- /* reset AMCC PHY and try again */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040);
- continue;
- } else {
- ret = 1;
- break;
- }
- }
- mdelay(10);
- } while ((temp_phy_data & 0xff) < 0x30);
-
- /* setup signal integrity */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032);
- if (phy_type == NES_PHY_TYPE_KR) {
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x000C);
- } else {
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x0002);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc314, 0x0063);
- }
-
- /* reset serdes */
- sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200);
- sds |= 0x1;
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds);
- sds &= 0xfffffffe;
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds);
-
- counter = 0;
- while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040)
- && (counter++ < 5000))
- ;
-
- return ret;
-}
-
-
-/**
- * nes_init_phy
- */
-int nes_init_phy(struct nes_device *nesdev)
-{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 mac_index = nesdev->mac_index;
- u32 tx_config = 0;
- unsigned long flags;
- u8 phy_type = nesadapter->phy_type[mac_index];
- u8 phy_index = nesadapter->phy_index[mac_index];
- int ret = 0;
-
- tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
- if (phy_type == NES_PHY_TYPE_1G) {
- /* setup 1G MDIO operation */
- tx_config &= 0xFFFFFFE3;
- tx_config |= 0x04;
- } else {
- /* setup 10G MDIO operation */
- tx_config &= 0xFFFFFFE3;
- tx_config |= 0x1D;
- }
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
-
- spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
-
- switch (phy_type) {
- case NES_PHY_TYPE_1G:
- ret = nes_init_1g_phy(nesdev, phy_type, phy_index);
- break;
- case NES_PHY_TYPE_ARGUS:
- case NES_PHY_TYPE_SFP_D:
- case NES_PHY_TYPE_KR:
- ret = nes_init_2025_phy(nesdev, phy_type, phy_index);
- break;
- }
-
- spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
-
- return ret;
-}
-
-
-/**
- * nes_replenish_nic_rq
- */
-static void nes_replenish_nic_rq(struct nes_vnic *nesvnic)
-{
- unsigned long flags;
- dma_addr_t bus_address;
- struct sk_buff *skb;
- struct nes_hw_nic_rq_wqe *nic_rqe;
- struct nes_hw_nic *nesnic;
- struct nes_device *nesdev;
- struct nes_rskb_cb *cb;
- u32 rx_wqes_posted = 0;
-
- nesnic = &nesvnic->nic;
- nesdev = nesvnic->nesdev;
- spin_lock_irqsave(&nesnic->rq_lock, flags);
- if (nesnic->replenishing_rq !=0) {
- if (((nesnic->rq_size-1) == atomic_read(&nesvnic->rx_skbs_needed)) &&
- (atomic_read(&nesvnic->rx_skb_timer_running) == 0)) {
- atomic_set(&nesvnic->rx_skb_timer_running, 1);
- spin_unlock_irqrestore(&nesnic->rq_lock, flags);
- nesvnic->rq_wqes_timer.expires = jiffies + (HZ/2); /* 1/2 second */
- add_timer(&nesvnic->rq_wqes_timer);
- } else
- spin_unlock_irqrestore(&nesnic->rq_lock, flags);
- return;
- }
- nesnic->replenishing_rq = 1;
- spin_unlock_irqrestore(&nesnic->rq_lock, flags);
- do {
- skb = dev_alloc_skb(nesvnic->max_frame_size);
- if (skb) {
- skb->dev = nesvnic->netdev;
-
- bus_address = pci_map_single(nesdev->pcidev,
- skb->data, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- cb->busaddr = bus_address;
- cb->maplen = nesvnic->max_frame_size;
-
- nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_head];
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] =
- cpu_to_le32(nesvnic->max_frame_size);
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] =
- cpu_to_le32((u32)bus_address);
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] =
- cpu_to_le32((u32)((u64)bus_address >> 32));
- nesnic->rx_skb[nesnic->rq_head] = skb;
- nesnic->rq_head++;
- nesnic->rq_head &= nesnic->rq_size - 1;
- atomic_dec(&nesvnic->rx_skbs_needed);
- barrier();
- if (++rx_wqes_posted == 255) {
- nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id);
- rx_wqes_posted = 0;
- }
- } else {
- spin_lock_irqsave(&nesnic->rq_lock, flags);
- if (((nesnic->rq_size-1) == atomic_read(&nesvnic->rx_skbs_needed)) &&
- (atomic_read(&nesvnic->rx_skb_timer_running) == 0)) {
- atomic_set(&nesvnic->rx_skb_timer_running, 1);
- spin_unlock_irqrestore(&nesnic->rq_lock, flags);
- nesvnic->rq_wqes_timer.expires = jiffies + (HZ/2); /* 1/2 second */
- add_timer(&nesvnic->rq_wqes_timer);
- } else
- spin_unlock_irqrestore(&nesnic->rq_lock, flags);
- break;
- }
- } while (atomic_read(&nesvnic->rx_skbs_needed));
- barrier();
- if (rx_wqes_posted)
- nes_write32(nesdev->regs+NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesnic->qp_id);
- nesnic->replenishing_rq = 0;
-}
-
-
-/**
- * nes_rq_wqes_timeout
- */
-static void nes_rq_wqes_timeout(unsigned long parm)
-{
- struct nes_vnic *nesvnic = (struct nes_vnic *)parm;
- printk("%s: Timer fired.\n", __func__);
- atomic_set(&nesvnic->rx_skb_timer_running, 0);
- if (atomic_read(&nesvnic->rx_skbs_needed))
- nes_replenish_nic_rq(nesvnic);
-}
-
-
-/**
- * nes_init_nic_qp
- */
-int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
-{
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_hw_nic_sq_wqe *nic_sqe;
- struct nes_hw_nic_qp_context *nic_context;
- struct sk_buff *skb;
- struct nes_hw_nic_rq_wqe *nic_rqe;
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- unsigned long flags;
- void *vmem;
- dma_addr_t pmem;
- u64 u64temp;
- int ret;
- u32 cqp_head;
- u32 counter;
- u32 wqe_count;
- struct nes_rskb_cb *cb;
- u8 jumbomode=0;
-
- /* Allocate fragment, SQ, RQ, and CQ; Reuse CEQ based on the PCI function */
- nesvnic->nic_mem_size = 256 +
- (NES_NIC_WQ_SIZE * sizeof(struct nes_first_frag)) +
- (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe)) +
- (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe)) +
- (NES_NIC_WQ_SIZE * 2 * sizeof(struct nes_hw_nic_cqe)) +
- sizeof(struct nes_hw_nic_qp_context);
-
- nesvnic->nic_vbase = pci_zalloc_consistent(nesdev->pcidev,
- nesvnic->nic_mem_size,
- &nesvnic->nic_pbase);
- if (!nesvnic->nic_vbase) {
- nes_debug(NES_DBG_INIT, "Unable to allocate memory for NIC host descriptor rings\n");
- return -ENOMEM;
- }
- nes_debug(NES_DBG_INIT, "Allocated NIC QP structures at %p (phys = %016lX), size = %u.\n",
- nesvnic->nic_vbase, (unsigned long)nesvnic->nic_pbase, nesvnic->nic_mem_size);
-
- vmem = (void *)(((unsigned long)nesvnic->nic_vbase + (256 - 1)) &
- ~(unsigned long)(256 - 1));
- pmem = (dma_addr_t)(((unsigned long long)nesvnic->nic_pbase + (256 - 1)) &
- ~(unsigned long long)(256 - 1));
-
- /* Setup the first Fragment buffers */
- nesvnic->nic.first_frag_vbase = vmem;
-
- for (counter = 0; counter < NES_NIC_WQ_SIZE; counter++) {
- nesvnic->nic.frag_paddr[counter] = pmem;
- pmem += sizeof(struct nes_first_frag);
- }
-
- /* setup the SQ */
- vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_first_frag));
-
- nesvnic->nic.sq_vbase = (void *)vmem;
- nesvnic->nic.sq_pbase = pmem;
- nesvnic->nic.sq_head = 0;
- nesvnic->nic.sq_tail = 0;
- nesvnic->nic.sq_size = NES_NIC_WQ_SIZE;
- for (counter = 0; counter < NES_NIC_WQ_SIZE; counter++) {
- nic_sqe = &nesvnic->nic.sq_vbase[counter];
- nic_sqe->wqe_words[NES_NIC_SQ_WQE_MISC_IDX] =
- cpu_to_le32(NES_NIC_SQ_WQE_DISABLE_CHKSUM |
- NES_NIC_SQ_WQE_COMPLETION);
- nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX] =
- cpu_to_le32((u32)NES_FIRST_FRAG_SIZE << 16);
- nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX] =
- cpu_to_le32((u32)nesvnic->nic.frag_paddr[counter]);
- nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX] =
- cpu_to_le32((u32)((u64)nesvnic->nic.frag_paddr[counter] >> 32));
- }
-
- nesvnic->get_cqp_request = nes_get_cqp_request;
- nesvnic->post_cqp_request = nes_post_cqp_request;
- nesvnic->mcrq_mcast_filter = NULL;
-
- spin_lock_init(&nesvnic->nic.rq_lock);
-
- /* setup the RQ */
- vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe));
- pmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_sq_wqe));
-
-
- nesvnic->nic.rq_vbase = vmem;
- nesvnic->nic.rq_pbase = pmem;
- nesvnic->nic.rq_head = 0;
- nesvnic->nic.rq_tail = 0;
- nesvnic->nic.rq_size = NES_NIC_WQ_SIZE;
-
- /* setup the CQ */
- vmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe));
- pmem += (NES_NIC_WQ_SIZE * sizeof(struct nes_hw_nic_rq_wqe));
-
- if (nesdev->nesadapter->netdev_count > 2)
- nesvnic->mcrq_qp_id = nesvnic->nic_index + 32;
- else
- nesvnic->mcrq_qp_id = nesvnic->nic.qp_id + 4;
-
- nesvnic->nic_cq.cq_vbase = vmem;
- nesvnic->nic_cq.cq_pbase = pmem;
- nesvnic->nic_cq.cq_head = 0;
- nesvnic->nic_cq.cq_size = NES_NIC_WQ_SIZE * 2;
-
- nesvnic->nic_cq.ce_handler = nes_nic_napi_ce_handler;
-
- /* Send CreateCQ request to CQP */
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- cqp_head = nesdev->cqp.sq_head;
-
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(
- NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
- ((u32)nesvnic->nic_cq.cq_size << 16));
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(
- nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16));
- u64temp = (u64)nesvnic->nic_cq.cq_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
- u64temp = (unsigned long)&nesvnic->nic_cq;
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1));
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
- cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- /* Send CreateQP request to CQP */
- nic_context = (void *)(&nesvnic->nic_cq.cq_vbase[nesvnic->nic_cq.cq_size]);
- nic_context->context_words[NES_NIC_CTX_MISC_IDX] =
- cpu_to_le32((u32)NES_NIC_CTX_SIZE |
- ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12));
- nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n",
- nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE),
- nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE));
- if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0) {
- nic_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE);
- }
-
- u64temp = (u64)nesvnic->nic.sq_pbase;
- nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
- nic_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
- u64temp = (u64)nesvnic->nic.rq_pbase;
- nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
- nic_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
-
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP |
- NES_CQP_QP_TYPE_NIC);
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesvnic->nic.qp_id);
- u64temp = (u64)nesvnic->nic_cq.cq_pbase +
- (nesvnic->nic_cq.cq_size * sizeof(struct nes_hw_nic_cqe));
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
-
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
- nesdev->cqp.sq_head = cqp_head;
-
- barrier();
-
- /* Ring doorbell (2 WQEs) */
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
-
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- nes_debug(NES_DBG_INIT, "Waiting for create NIC QP%u to complete.\n",
- nesvnic->nic.qp_id);
-
- ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_INIT, "Create NIC QP%u completed, wait_event_timeout ret = %u.\n",
- nesvnic->nic.qp_id, ret);
- if (!ret) {
- nes_debug(NES_DBG_INIT, "NIC QP%u create timeout expired\n", nesvnic->nic.qp_id);
- pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase,
- nesvnic->nic_pbase);
- return -EIO;
- }
-
- /* Populate the RQ */
- for (counter = 0; counter < (NES_NIC_WQ_SIZE - 1); counter++) {
- skb = dev_alloc_skb(nesvnic->max_frame_size);
- if (!skb) {
- nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name);
-
- nes_destroy_nic_qp(nesvnic);
- return -ENOMEM;
- }
-
- skb->dev = netdev;
-
- pmem = pci_map_single(nesdev->pcidev, skb->data,
- nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- cb->busaddr = pmem;
- cb->maplen = nesvnic->max_frame_size;
-
- nic_rqe = &nesvnic->nic.rq_vbase[counter];
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size);
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem);
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32));
- nesvnic->nic.rx_skb[counter] = skb;
- }
-
- wqe_count = NES_NIC_WQ_SIZE - 1;
- nesvnic->nic.rq_head = wqe_count;
- barrier();
- do {
- counter = min(wqe_count, ((u32)255));
- wqe_count -= counter;
- nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter << 24) | nesvnic->nic.qp_id);
- } while (wqe_count);
- init_timer(&nesvnic->rq_wqes_timer);
- nesvnic->rq_wqes_timer.function = nes_rq_wqes_timeout;
- nesvnic->rq_wqes_timer.data = (unsigned long)nesvnic;
- nes_debug(NES_DBG_INIT, "NAPI support Enabled\n");
- if (nesdev->nesadapter->et_use_adaptive_rx_coalesce)
- {
- nes_nic_init_timer(nesdev);
- if (netdev->mtu > 1500)
- jumbomode = 1;
- nes_nic_init_timer_defaults(nesdev, jumbomode);
- }
- if ((nesdev->nesadapter->allow_unaligned_fpdus) &&
- (nes_init_mgt_qp(nesdev, netdev, nesvnic))) {
- nes_debug(NES_DBG_INIT, "%s: Out of memory for pau nic\n", netdev->name);
- nes_destroy_nic_qp(nesvnic);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-
-/**
- * nes_destroy_nic_qp
- */
-void nes_destroy_nic_qp(struct nes_vnic *nesvnic)
-{
- u64 u64temp;
- dma_addr_t bus_address;
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_hw_nic_sq_wqe *nic_sqe;
- __le16 *wqe_fragment_length;
- u16 wqe_fragment_index;
- u32 cqp_head;
- u32 wqm_cfg0;
- unsigned long flags;
- struct sk_buff *rx_skb;
- struct nes_rskb_cb *cb;
- int ret;
-
- if (nesdev->nesadapter->allow_unaligned_fpdus)
- nes_destroy_mgt(nesvnic);
-
- /* clear wqe stall before destroying NIC QP */
- wqm_cfg0 = nes_read_indexed(nesdev, NES_IDX_WQM_CONFIG0);
- nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0 & 0xFFFF7FFF);
-
- /* Free remaining NIC receive buffers */
- while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) {
- rx_skb = nesvnic->nic.rx_skb[nesvnic->nic.rq_tail];
- cb = (struct nes_rskb_cb *)&rx_skb->cb[0];
- pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen,
- PCI_DMA_FROMDEVICE);
-
- dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]);
- nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1);
- }
-
- /* Free remaining NIC transmit buffers */
- while (nesvnic->nic.sq_head != nesvnic->nic.sq_tail) {
- nic_sqe = &nesvnic->nic.sq_vbase[nesvnic->nic.sq_tail];
- wqe_fragment_index = 1;
- wqe_fragment_length = (__le16 *)
- &nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
- /* bump past the vlan tag */
- wqe_fragment_length++;
- if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) {
- u64temp = (u64)le32_to_cpu(
- nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+
- wqe_fragment_index*2]);
- u64temp += ((u64)le32_to_cpu(
- nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX
- + wqe_fragment_index*2]))<<32;
- bus_address = (dma_addr_t)u64temp;
- if (test_and_clear_bit(nesvnic->nic.sq_tail,
- nesvnic->nic.first_frag_overflow)) {
- pci_unmap_single(nesdev->pcidev,
- bus_address,
- le16_to_cpu(wqe_fragment_length[
- wqe_fragment_index++]),
- PCI_DMA_TODEVICE);
- }
- for (; wqe_fragment_index < 5; wqe_fragment_index++) {
- if (wqe_fragment_length[wqe_fragment_index]) {
- u64temp = le32_to_cpu(
- nic_sqe->wqe_words[
- NES_NIC_SQ_WQE_FRAG0_LOW_IDX+
- wqe_fragment_index*2]);
- u64temp += ((u64)le32_to_cpu(
- nic_sqe->wqe_words[
- NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+
- wqe_fragment_index*2]))<<32;
- bus_address = (dma_addr_t)u64temp;
- pci_unmap_page(nesdev->pcidev,
- bus_address,
- le16_to_cpu(
- wqe_fragment_length[
- wqe_fragment_index]),
- PCI_DMA_TODEVICE);
- } else
- break;
- }
- }
- if (nesvnic->nic.tx_skb[nesvnic->nic.sq_tail])
- dev_kfree_skb(
- nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]);
-
- nesvnic->nic.sq_tail = (nesvnic->nic.sq_tail + 1)
- & (nesvnic->nic.sq_size - 1);
- }
-
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
-
- /* Destroy NIC QP */
- cqp_head = nesdev->cqp.sq_head;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- nesvnic->nic.qp_id);
-
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
-
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
-
- /* Destroy NIC CQ */
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_DESTROY_CQ | ((u32)nesvnic->nic_cq.cq_size << 16)));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- (nesvnic->nic_cq.cq_number | ((u32)nesdev->nic_ceq_index << 16)));
-
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
-
- nesdev->cqp.sq_head = cqp_head;
- barrier();
-
- /* Ring doorbell (2 WQEs) */
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
-
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u,"
- " cqp.sq_tail=%u, cqp.sq_size=%u\n",
- cqp_head, nesdev->cqp.sq_head,
- nesdev->cqp.sq_tail, nesdev->cqp.sq_size);
-
- ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
- NES_EVENT_TIMEOUT);
-
- nes_debug(NES_DBG_SHUTDOWN, "Destroy NIC QP returned, wait_event_timeout ret = %u, cqp_head=%u,"
- " cqp.sq_head=%u, cqp.sq_tail=%u\n",
- ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
- if (!ret) {
- nes_debug(NES_DBG_SHUTDOWN, "NIC QP%u destroy timeout expired\n",
- nesvnic->nic.qp_id);
- }
-
- pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase,
- nesvnic->nic_pbase);
-
- /* restore old wqm_cfg0 value */
- nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0);
-}
-
-/**
- * nes_napi_isr
- */
-int nes_napi_isr(struct nes_device *nesdev)
-{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 int_stat;
-
- if (nesdev->napi_isr_ran) {
- /* interrupt status has already been read in ISR */
- int_stat = nesdev->int_stat;
- } else {
- int_stat = nes_read32(nesdev->regs + NES_INT_STAT);
- nesdev->int_stat = int_stat;
- nesdev->napi_isr_ran = 1;
- }
-
- int_stat &= nesdev->int_req;
- /* iff NIC, process here, else wait for DPC */
- if ((int_stat) && ((int_stat & 0x0000ff00) == int_stat)) {
- nesdev->napi_isr_ran = 0;
- nes_write32(nesdev->regs + NES_INT_STAT,
- (int_stat &
- ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 | NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3)));
-
- /* Process the CEQs */
- nes_process_ceq(nesdev, &nesdev->nesadapter->ceq[nesdev->nic_ceq_index]);
-
- if (unlikely((((nesadapter->et_rx_coalesce_usecs_irq) &&
- (!nesadapter->et_use_adaptive_rx_coalesce)) ||
- ((nesadapter->et_use_adaptive_rx_coalesce) &&
- (nesdev->deepcq_count > nesadapter->et_pkt_rate_low))))) {
- if ((nesdev->int_req & NES_INT_TIMER) == 0) {
- /* Enable Periodic timer interrupts */
- nesdev->int_req |= NES_INT_TIMER;
- /* ack any pending periodic timer interrupts so we don't get an immediate interrupt */
- /* TODO: need to also ack other unused periodic timer values, get from nesadapter */
- nes_write32(nesdev->regs+NES_TIMER_STAT,
- nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req));
- nes_write32(nesdev->regs+NES_INTF_INT_MASK,
- ~(nesdev->intf_int_req | NES_INTF_PERIODIC_TIMER));
- }
-
- if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
- {
- nes_nic_init_timer(nesdev);
- }
- /* Enable interrupts, except CEQs */
- nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
- } else {
- /* Enable interrupts, make sure timer is off */
- nesdev->int_req &= ~NES_INT_TIMER;
- nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
- nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
- }
- nesdev->deepcq_count = 0;
- return 1;
- } else {
- return 0;
- }
-}
-
-static void process_critical_error(struct nes_device *nesdev)
-{
- u32 debug_error;
- u32 nes_idx_debug_error_masks0 = 0;
- u16 error_module = 0;
-
- debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS);
- printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n",
- (u16)debug_error);
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS,
- 0x01010000 | (debug_error & 0x0000ffff));
- if (crit_err_count++ > 10)
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17);
- error_module = (u16) (debug_error & 0x1F00) >> 8;
- if (++nesdev->nesadapter->crit_error_count[error_module-1] >=
- nes_max_critical_error_count) {
- printk(KERN_ERR PFX "Masking off critical error for module "
- "0x%02X\n", (u16)error_module);
- nes_idx_debug_error_masks0 = nes_read_indexed(nesdev,
- NES_IDX_DEBUG_ERROR_MASKS0);
- nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0,
- nes_idx_debug_error_masks0 | (1 << error_module));
- }
-}
-/**
- * nes_dpc
- */
-void nes_dpc(unsigned long param)
-{
- struct nes_device *nesdev = (struct nes_device *)param;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 counter;
- u32 loop_counter = 0;
- u32 int_status_bit;
- u32 int_stat;
- u32 timer_stat;
- u32 temp_int_stat;
- u32 intf_int_stat;
- u32 processed_intf_int = 0;
- u16 processed_timer_int = 0;
- u16 completion_ints = 0;
- u16 timer_ints = 0;
-
- /* nes_debug(NES_DBG_ISR, "\n"); */
-
- do {
- timer_stat = 0;
- if (nesdev->napi_isr_ran) {
- nesdev->napi_isr_ran = 0;
- int_stat = nesdev->int_stat;
- } else
- int_stat = nes_read32(nesdev->regs+NES_INT_STAT);
- if (processed_intf_int != 0)
- int_stat &= nesdev->int_req & ~NES_INT_INTF;
- else
- int_stat &= nesdev->int_req;
- if (processed_timer_int == 0) {
- processed_timer_int = 1;
- if (int_stat & NES_INT_TIMER) {
- timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT);
- if ((timer_stat & nesdev->timer_int_req) == 0) {
- int_stat &= ~NES_INT_TIMER;
- }
- }
- } else {
- int_stat &= ~NES_INT_TIMER;
- }
-
- if (int_stat) {
- if (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0|
- NES_INT_MAC1|NES_INT_MAC2 | NES_INT_MAC3)) {
- /* Ack the interrupts */
- nes_write32(nesdev->regs+NES_INT_STAT,
- (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0|
- NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3)));
- }
-
- temp_int_stat = int_stat;
- for (counter = 0, int_status_bit = 1; counter < 16; counter++) {
- if (int_stat & int_status_bit) {
- nes_process_ceq(nesdev, &nesadapter->ceq[counter]);
- temp_int_stat &= ~int_status_bit;
- completion_ints = 1;
- }
- if (!(temp_int_stat & 0x0000ffff))
- break;
- int_status_bit <<= 1;
- }
-
- /* Process the AEQ for this pci function */
- int_status_bit = 1 << (16 + PCI_FUNC(nesdev->pcidev->devfn));
- if (int_stat & int_status_bit) {
- nes_process_aeq(nesdev, &nesadapter->aeq[PCI_FUNC(nesdev->pcidev->devfn)]);
- }
-
- /* Process the MAC interrupt for this pci function */
- int_status_bit = 1 << (24 + nesdev->mac_index);
- if (int_stat & int_status_bit) {
- nes_process_mac_intr(nesdev, nesdev->mac_index);
- }
-
- if (int_stat & NES_INT_TIMER) {
- if (timer_stat & nesdev->timer_int_req) {
- nes_write32(nesdev->regs + NES_TIMER_STAT,
- (timer_stat & nesdev->timer_int_req) |
- ~(nesdev->nesadapter->timer_int_req));
- timer_ints = 1;
- }
- }
-
- if (int_stat & NES_INT_INTF) {
- processed_intf_int = 1;
- intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT);
- intf_int_stat &= nesdev->intf_int_req;
- if (NES_INTF_INT_CRITERR & intf_int_stat) {
- process_critical_error(nesdev);
- }
- if (NES_INTF_INT_PCIERR & intf_int_stat) {
- printk(KERN_ERR PFX "PCI Error reported by device!!!\n");
- BUG();
- }
- if (NES_INTF_INT_AEQ_OFLOW & intf_int_stat) {
- printk(KERN_ERR PFX "AEQ Overflow reported by device!!!\n");
- BUG();
- }
- nes_write32(nesdev->regs+NES_INTF_INT_STAT, intf_int_stat);
- }
-
- if (int_stat & NES_INT_TSW) {
- }
- }
- /* Don't use the interface interrupt bit stay in loop */
- int_stat &= ~NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 |
- NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3;
- } while ((int_stat != 0) && (loop_counter++ < MAX_DPC_ITERATIONS));
-
- if (timer_ints == 1) {
- if ((nesadapter->et_rx_coalesce_usecs_irq) || (nesadapter->et_use_adaptive_rx_coalesce)) {
- if (completion_ints == 0) {
- nesdev->timer_only_int_count++;
- if (nesdev->timer_only_int_count>=nesadapter->timer_int_limit) {
- nesdev->timer_only_int_count = 0;
- nesdev->int_req &= ~NES_INT_TIMER;
- nes_write32(nesdev->regs + NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
- nes_write32(nesdev->regs + NES_INT_MASK, ~nesdev->int_req);
- } else {
- nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
- }
- } else {
- if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
- {
- nes_nic_init_timer(nesdev);
- }
- nesdev->timer_only_int_count = 0;
- nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
- }
- } else {
- nesdev->timer_only_int_count = 0;
- nesdev->int_req &= ~NES_INT_TIMER;
- nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
- nes_write32(nesdev->regs+NES_TIMER_STAT,
- nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req));
- nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
- }
- } else {
- if ( (completion_ints == 1) &&
- (((nesadapter->et_rx_coalesce_usecs_irq) &&
- (!nesadapter->et_use_adaptive_rx_coalesce)) ||
- ((nesdev->deepcq_count > nesadapter->et_pkt_rate_low) &&
- (nesadapter->et_use_adaptive_rx_coalesce) )) ) {
- /* nes_debug(NES_DBG_ISR, "Enabling periodic timer interrupt.\n" ); */
- nesdev->timer_only_int_count = 0;
- nesdev->int_req |= NES_INT_TIMER;
- nes_write32(nesdev->regs+NES_TIMER_STAT,
- nesdev->timer_int_req | ~(nesdev->nesadapter->timer_int_req));
- nes_write32(nesdev->regs+NES_INTF_INT_MASK,
- ~(nesdev->intf_int_req | NES_INTF_PERIODIC_TIMER));
- nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
- } else {
- nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
- }
- }
- nesdev->deepcq_count = 0;
-}
-
-
-/**
- * nes_process_ceq
- */
-static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq)
-{
- u64 u64temp;
- struct nes_hw_cq *cq;
- u32 head;
- u32 ceq_size;
-
- /* nes_debug(NES_DBG_CQ, "\n"); */
- head = ceq->ceq_head;
- ceq_size = ceq->ceq_size;
-
- do {
- if (le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]) &
- NES_CEQE_VALID) {
- u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]))) << 32) |
- ((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_LOW_IDX])));
- u64temp <<= 1;
- cq = *((struct nes_hw_cq **)&u64temp);
- /* nes_debug(NES_DBG_CQ, "pCQ = %p\n", cq); */
- barrier();
- ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX] = 0;
-
- /* call the event handler */
- cq->ce_handler(nesdev, cq);
-
- if (++head >= ceq_size)
- head = 0;
- } else {
- break;
- }
-
- } while (1);
-
- ceq->ceq_head = head;
-}
-
-
-/**
- * nes_process_aeq
- */
-static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq)
-{
- /* u64 u64temp; */
- u32 head;
- u32 aeq_size;
- u32 aeqe_misc;
- u32 aeqe_cq_id;
- struct nes_hw_aeqe volatile *aeqe;
-
- head = aeq->aeq_head;
- aeq_size = aeq->aeq_size;
-
- do {
- aeqe = &aeq->aeq_vbase[head];
- if ((le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]) & NES_AEQE_VALID) == 0)
- break;
- aeqe_misc = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
- aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]);
- if (aeqe_misc & (NES_AEQE_QP|NES_AEQE_CQ)) {
- if (aeqe_cq_id >= NES_FIRST_QPN) {
- /* dealing with an accelerated QP related AE */
- /*
- * u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX]))) << 32) |
- * ((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX])));
- */
- nes_process_iwarp_aeqe(nesdev, (struct nes_hw_aeqe *)aeqe);
- } else {
- /* TODO: dealing with a CQP related AE */
- nes_debug(NES_DBG_AEQ, "Processing CQP related AE, misc = 0x%04X\n",
- (u16)(aeqe_misc >> 16));
- }
- }
-
- aeqe->aeqe_words[NES_AEQE_MISC_IDX] = 0;
-
- if (++head >= aeq_size)
- head = 0;
-
- nes_write32(nesdev->regs + NES_AEQ_ALLOC, 1 << 16);
- }
- while (1);
- aeq->aeq_head = head;
-}
-
-static void nes_reset_link(struct nes_device *nesdev, u32 mac_index)
-{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 reset_value;
- u32 i=0;
- u32 u32temp;
-
- if (nesadapter->hw_rev == NE020_REV) {
- return;
- }
- mh_detected++;
-
- reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
-
- if ((mac_index == 0) || ((mac_index == 1) && (nesadapter->OneG_Mode)))
- reset_value |= 0x0000001d;
- else
- reset_value |= 0x0000002d;
-
- if (4 <= (nesadapter->link_interrupt_count[mac_index] / ((u16)NES_MAX_LINK_INTERRUPTS))) {
- if ((!nesadapter->OneG_Mode) && (nesadapter->port_count == 2)) {
- nesadapter->link_interrupt_count[0] = 0;
- nesadapter->link_interrupt_count[1] = 0;
- u32temp = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
- if (0x00000040 & u32temp)
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088);
- else
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8);
-
- reset_value |= 0x0000003d;
- }
- nesadapter->link_interrupt_count[mac_index] = 0;
- }
-
- nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
-
- while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
- & 0x00000040) != 0x00000040) && (i++ < 5000));
-
- if (0x0000003d == (reset_value & 0x0000003d)) {
- u32 pcs_control_status0, pcs_control_status1;
-
- for (i = 0; i < 10; i++) {
- pcs_control_status0 = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0);
- pcs_control_status1 = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
- if (((0x0F000000 == (pcs_control_status0 & 0x0F000000))
- && (pcs_control_status0 & 0x00100000))
- || ((0x0F000000 == (pcs_control_status1 & 0x0F000000))
- && (pcs_control_status1 & 0x00100000)))
- continue;
- else
- break;
- }
- if (10 == i) {
- u32temp = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
- if (0x00000040 & u32temp)
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088);
- else
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F0C8);
-
- nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value);
-
- while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET)
- & 0x00000040) != 0x00000040) && (i++ < 5000));
- }
- }
-}
-
-/**
- * nes_process_mac_intr
- */
-static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
-{
- unsigned long flags;
- u32 pcs_control_status;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_vnic *nesvnic;
- u32 mac_status;
- u32 mac_index = nesdev->mac_index;
- u32 u32temp;
- u16 phy_data;
- u16 temp_phy_data;
- u32 pcs_val = 0x0f0f0000;
- u32 pcs_mask = 0x0f1f0000;
- u32 cdr_ctrl;
-
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
- if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) {
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
- return;
- }
- nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_INTERRUPT;
-
- /* ack the MAC interrupt */
- mac_status = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200));
- /* Clear the interrupt */
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (mac_index * 0x200), mac_status);
-
- nes_debug(NES_DBG_PHY, "MAC%u interrupt status = 0x%X.\n", mac_number, mac_status);
-
- if (mac_status & (NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT)) {
- nesdev->link_status_interrupts++;
- if (0 == (++nesadapter->link_interrupt_count[mac_index] % ((u16)NES_MAX_LINK_INTERRUPTS)))
- nes_reset_link(nesdev, mac_index);
-
- /* read the PHY interrupt status register */
- if ((nesadapter->OneG_Mode) &&
- (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
- do {
- nes_read_1G_phy_reg(nesdev, 0x1a,
- nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy%d data from register 0x1a = 0x%X.\n",
- nesadapter->phy_index[mac_index], phy_data);
- } while (phy_data&0x8000);
-
- temp_phy_data = 0;
- do {
- nes_read_1G_phy_reg(nesdev, 0x11,
- nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy%d data from register 0x11 = 0x%X.\n",
- nesadapter->phy_index[mac_index], phy_data);
- if (temp_phy_data == phy_data)
- break;
- temp_phy_data = phy_data;
- } while (1);
-
- nes_read_1G_phy_reg(nesdev, 0x1e,
- nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "Phy%d data from register 0x1e = 0x%X.\n",
- nesadapter->phy_index[mac_index], phy_data);
-
- nes_read_1G_phy_reg(nesdev, 1,
- nesadapter->phy_index[mac_index], &phy_data);
- nes_debug(NES_DBG_PHY, "1G phy%u data from register 1 = 0x%X\n",
- nesadapter->phy_index[mac_index], phy_data);
-
- if (temp_phy_data & 0x1000) {
- nes_debug(NES_DBG_PHY, "The Link is up according to the PHY\n");
- phy_data = 4;
- } else {
- nes_debug(NES_DBG_PHY, "The Link is down according to the PHY\n");
- }
- }
- nes_debug(NES_DBG_PHY, "Eth SERDES Common Status: 0=0x%08X, 1=0x%08X\n",
- nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0),
- nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0+0x200));
-
- if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_PUMA_1G) {
- switch (mac_index) {
- case 1:
- case 3:
- pcs_control_status = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
- break;
- default:
- pcs_control_status = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0);
- break;
- }
- } else {
- pcs_control_status = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200));
- pcs_control_status = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200));
- }
-
- nes_debug(NES_DBG_PHY, "PCS PHY Control/Status%u: 0x%08X\n",
- mac_index, pcs_control_status);
- if ((nesadapter->OneG_Mode) &&
- (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
- u32temp = 0x01010000;
- if (nesadapter->port_count > 2) {
- u32temp |= 0x02020000;
- }
- if ((pcs_control_status & u32temp)!= u32temp) {
- phy_data = 0;
- nes_debug(NES_DBG_PHY, "PCS says the link is down\n");
- }
- } else {
- switch (nesadapter->phy_type[mac_index]) {
- case NES_PHY_TYPE_ARGUS:
- case NES_PHY_TYPE_SFP_D:
- case NES_PHY_TYPE_KR:
- /* clear the alarms */
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc002);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc005);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc006);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9004);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9005);
- /* check link status */
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
- nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
- phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-
- phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0;
-
- nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
- __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
- break;
-
- case NES_PHY_TYPE_PUMA_1G:
- if (mac_index < 2)
- pcs_val = pcs_mask = 0x01010000;
- else
- pcs_val = pcs_mask = 0x02020000;
- /* fall through */
- default:
- phy_data = (pcs_val == (pcs_control_status & pcs_mask)) ? 0x4 : 0x0;
- break;
- }
- }
-
- if (phy_data & 0x0004) {
- if (wide_ppm_offset &&
- (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) &&
- (nesadapter->hw_rev != NE020_REV)) {
- cdr_ctrl = nes_read_indexed(nesdev,
- NES_IDX_ETH_SERDES_CDR_CONTROL0 +
- mac_index * 0x200);
- nes_write_indexed(nesdev,
- NES_IDX_ETH_SERDES_CDR_CONTROL0 +
- mac_index * 0x200,
- cdr_ctrl | 0x000F0000);
- }
- nesadapter->mac_link_down[mac_index] = 0;
- list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
- nes_debug(NES_DBG_PHY, "The Link is UP!!. linkup was %d\n",
- nesvnic->linkup);
- if (nesvnic->linkup == 0) {
- printk(PFX "The Link is now up for port %s, netdev %p.\n",
- nesvnic->netdev->name, nesvnic->netdev);
- if (netif_queue_stopped(nesvnic->netdev))
- netif_start_queue(nesvnic->netdev);
- nesvnic->linkup = 1;
- netif_carrier_on(nesvnic->netdev);
-
- spin_lock(&nesvnic->port_ibevent_lock);
- if (nesvnic->of_device_registered) {
- if (nesdev->iw_status == 0) {
- nesdev->iw_status = 1;
- nes_port_ibevent(nesvnic);
- }
- }
- spin_unlock(&nesvnic->port_ibevent_lock);
- }
- }
- } else {
- if (wide_ppm_offset &&
- (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) &&
- (nesadapter->hw_rev != NE020_REV)) {
- cdr_ctrl = nes_read_indexed(nesdev,
- NES_IDX_ETH_SERDES_CDR_CONTROL0 +
- mac_index * 0x200);
- nes_write_indexed(nesdev,
- NES_IDX_ETH_SERDES_CDR_CONTROL0 +
- mac_index * 0x200,
- cdr_ctrl & 0xFFF0FFFF);
- }
- nesadapter->mac_link_down[mac_index] = 1;
- list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
- nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n",
- nesvnic->linkup);
- if (nesvnic->linkup == 1) {
- printk(PFX "The Link is now down for port %s, netdev %p.\n",
- nesvnic->netdev->name, nesvnic->netdev);
- if (!(netif_queue_stopped(nesvnic->netdev)))
- netif_stop_queue(nesvnic->netdev);
- nesvnic->linkup = 0;
- netif_carrier_off(nesvnic->netdev);
-
- spin_lock(&nesvnic->port_ibevent_lock);
- if (nesvnic->of_device_registered) {
- if (nesdev->iw_status == 1) {
- nesdev->iw_status = 0;
- nes_port_ibevent(nesvnic);
- }
- }
- spin_unlock(&nesvnic->port_ibevent_lock);
- }
- }
- }
- if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_SFP_D) {
- nesdev->link_recheck = 1;
- mod_delayed_work(system_wq, &nesdev->work,
- NES_LINK_RECHECK_DELAY);
- }
- }
-
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
-
- nesadapter->mac_sw_state[mac_number] = NES_MAC_SW_IDLE;
-}
-
-void nes_recheck_link_status(struct work_struct *work)
-{
- unsigned long flags;
- struct nes_device *nesdev = container_of(work, struct nes_device, work.work);
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_vnic *nesvnic;
- u32 mac_index = nesdev->mac_index;
- u16 phy_data;
- u16 temp_phy_data;
-
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
-
- /* check link status */
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
- nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
- phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-
- phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0;
-
- nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
- __func__, phy_data,
- nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
-
- if (phy_data & 0x0004) {
- nesadapter->mac_link_down[mac_index] = 0;
- list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
- if (nesvnic->linkup == 0) {
- printk(PFX "The Link is now up for port %s, netdev %p.\n",
- nesvnic->netdev->name, nesvnic->netdev);
- if (netif_queue_stopped(nesvnic->netdev))
- netif_start_queue(nesvnic->netdev);
- nesvnic->linkup = 1;
- netif_carrier_on(nesvnic->netdev);
-
- spin_lock(&nesvnic->port_ibevent_lock);
- if (nesvnic->of_device_registered) {
- if (nesdev->iw_status == 0) {
- nesdev->iw_status = 1;
- nes_port_ibevent(nesvnic);
- }
- }
- spin_unlock(&nesvnic->port_ibevent_lock);
- }
- }
-
- } else {
- nesadapter->mac_link_down[mac_index] = 1;
- list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
- if (nesvnic->linkup == 1) {
- printk(PFX "The Link is now down for port %s, netdev %p.\n",
- nesvnic->netdev->name, nesvnic->netdev);
- if (!(netif_queue_stopped(nesvnic->netdev)))
- netif_stop_queue(nesvnic->netdev);
- nesvnic->linkup = 0;
- netif_carrier_off(nesvnic->netdev);
-
- spin_lock(&nesvnic->port_ibevent_lock);
- if (nesvnic->of_device_registered) {
- if (nesdev->iw_status == 1) {
- nesdev->iw_status = 0;
- nes_port_ibevent(nesvnic);
- }
- }
- spin_unlock(&nesvnic->port_ibevent_lock);
- }
- }
- }
- if (nesdev->link_recheck++ < NES_LINK_RECHECK_MAX)
- schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY);
- else
- nesdev->link_recheck = 0;
-
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
-}
-
-
-static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
-{
- struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq);
-
- napi_schedule(&nesvnic->napi);
-}
-
-
-/* The MAX_RQES_TO_PROCESS defines how many max read requests to complete before
-* getting out of nic_ce_handler
-*/
-#define MAX_RQES_TO_PROCESS 384
-
-/**
- * nes_nic_ce_handler
- */
-void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
-{
- u64 u64temp;
- dma_addr_t bus_address;
- struct nes_hw_nic *nesnic;
- struct nes_vnic *nesvnic = container_of(cq, struct nes_vnic, nic_cq);
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_nic_rq_wqe *nic_rqe;
- struct nes_hw_nic_sq_wqe *nic_sqe;
- struct sk_buff *skb;
- struct sk_buff *rx_skb;
- struct nes_rskb_cb *cb;
- __le16 *wqe_fragment_length;
- u32 head;
- u32 cq_size;
- u32 rx_pkt_size;
- u32 cqe_count=0;
- u32 cqe_errv;
- u32 cqe_misc;
- u16 wqe_fragment_index = 1; /* first fragment (0) is used by copy buffer */
- u16 vlan_tag;
- u16 pkt_type;
- u16 rqes_processed = 0;
- u8 sq_cqes = 0;
-
- head = cq->cq_head;
- cq_size = cq->cq_size;
- cq->cqes_pending = 1;
- do {
- if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) &
- NES_NIC_CQE_VALID) {
- nesnic = &nesvnic->nic;
- cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]);
- if (cqe_misc & NES_NIC_CQE_SQ) {
- sq_cqes++;
- wqe_fragment_index = 1;
- nic_sqe = &nesnic->sq_vbase[nesnic->sq_tail];
- skb = nesnic->tx_skb[nesnic->sq_tail];
- wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
- /* bump past the vlan tag */
- wqe_fragment_length++;
- if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) {
- u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX +
- wqe_fragment_index * 2]);
- u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX +
- wqe_fragment_index * 2])) << 32;
- bus_address = (dma_addr_t)u64temp;
- if (test_and_clear_bit(nesnic->sq_tail, nesnic->first_frag_overflow)) {
- pci_unmap_single(nesdev->pcidev,
- bus_address,
- le16_to_cpu(wqe_fragment_length[wqe_fragment_index++]),
- PCI_DMA_TODEVICE);
- }
- for (; wqe_fragment_index < 5; wqe_fragment_index++) {
- if (wqe_fragment_length[wqe_fragment_index]) {
- u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX +
- wqe_fragment_index * 2]);
- u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX
- + wqe_fragment_index * 2])) <<32;
- bus_address = (dma_addr_t)u64temp;
- pci_unmap_page(nesdev->pcidev,
- bus_address,
- le16_to_cpu(wqe_fragment_length[wqe_fragment_index]),
- PCI_DMA_TODEVICE);
- } else
- break;
- }
- }
- if (skb)
- dev_kfree_skb_any(skb);
- nesnic->sq_tail++;
- nesnic->sq_tail &= nesnic->sq_size-1;
- if (sq_cqes > 128) {
- barrier();
- /* restart the queue if it had been stopped */
- if (netif_queue_stopped(nesvnic->netdev))
- netif_wake_queue(nesvnic->netdev);
- sq_cqes = 0;
- }
- } else {
- rqes_processed ++;
-
- cq->rx_cqes_completed++;
- cq->rx_pkts_indicated++;
- rx_pkt_size = cqe_misc & 0x0000ffff;
- nic_rqe = &nesnic->rq_vbase[nesnic->rq_tail];
- /* Get the skb */
- rx_skb = nesnic->rx_skb[nesnic->rq_tail];
- nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_tail];
- bus_address = (dma_addr_t)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
- bus_address += ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32;
- pci_unmap_single(nesdev->pcidev, bus_address,
- nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
- cb = (struct nes_rskb_cb *)&rx_skb->cb[0];
- cb->busaddr = 0;
- /* rx_skb->tail = rx_skb->data + rx_pkt_size; */
- /* rx_skb->len = rx_pkt_size; */
- rx_skb->len = 0; /* TODO: see if this is necessary */
- skb_put(rx_skb, rx_pkt_size);
- rx_skb->protocol = eth_type_trans(rx_skb, nesvnic->netdev);
- nesnic->rq_tail++;
- nesnic->rq_tail &= nesnic->rq_size - 1;
-
- atomic_inc(&nesvnic->rx_skbs_needed);
- if (atomic_read(&nesvnic->rx_skbs_needed) > (nesvnic->nic.rq_size>>1)) {
- nes_write32(nesdev->regs+NES_CQE_ALLOC,
- cq->cq_number | (cqe_count << 16));
- /* nesadapter->tune_timer.cq_count += cqe_count; */
- nesdev->currcq_count += cqe_count;
- cqe_count = 0;
- nes_replenish_nic_rq(nesvnic);
- }
- pkt_type = (u16)(le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX]));
- cqe_errv = (cqe_misc & NES_NIC_CQE_ERRV_MASK) >> NES_NIC_CQE_ERRV_SHIFT;
- rx_skb->ip_summed = CHECKSUM_NONE;
-
- if ((NES_PKT_TYPE_TCPV4_BITS == (pkt_type & NES_PKT_TYPE_TCPV4_MASK)) ||
- (NES_PKT_TYPE_UDPV4_BITS == (pkt_type & NES_PKT_TYPE_UDPV4_MASK))) {
- if ((cqe_errv &
- (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR |
- NES_NIC_ERRV_BITS_IPH_ERR | NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) {
- if (nesvnic->netdev->features & NETIF_F_RXCSUM)
- rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
- } else
- nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet."
- " errv = 0x%X, pkt_type = 0x%X.\n",
- nesvnic->netdev->name, cqe_errv, pkt_type);
-
- } else if ((pkt_type & NES_PKT_TYPE_IPV4_MASK) == NES_PKT_TYPE_IPV4_BITS) {
- if ((cqe_errv &
- (NES_NIC_ERRV_BITS_IPV4_CSUM_ERR | NES_NIC_ERRV_BITS_IPH_ERR |
- NES_NIC_ERRV_BITS_WQE_OVERRUN)) == 0) {
- if (nesvnic->netdev->features & NETIF_F_RXCSUM) {
- rx_skb->ip_summed = CHECKSUM_UNNECESSARY;
- /* nes_debug(NES_DBG_CQ, "%s: Reporting successfully checksummed IPv4 packet.\n",
- nesvnic->netdev->name); */
- }
- } else
- nes_debug(NES_DBG_CQ, "%s: unsuccessfully checksummed TCP or UDP packet."
- " errv = 0x%X, pkt_type = 0x%X.\n",
- nesvnic->netdev->name, cqe_errv, pkt_type);
- }
- /* nes_debug(NES_DBG_CQ, "pkt_type=%x, APBVT_MASK=%x\n",
- pkt_type, (pkt_type & NES_PKT_TYPE_APBVT_MASK)); */
-
- if ((pkt_type & NES_PKT_TYPE_APBVT_MASK) == NES_PKT_TYPE_APBVT_BITS) {
- if (nes_cm_recv(rx_skb, nesvnic->netdev))
- rx_skb = NULL;
- }
- if (rx_skb == NULL)
- goto skip_rx_indicate0;
-
-
- if (cqe_misc & NES_NIC_CQE_TAG_VALID) {
- vlan_tag = (u16)(le32_to_cpu(
- cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX])
- >> 16);
- nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",
- nesvnic->netdev->name, vlan_tag);
-
- __vlan_hwaccel_put_tag(rx_skb, htons(ETH_P_8021Q), vlan_tag);
- }
- napi_gro_receive(&nesvnic->napi, rx_skb);
-
-skip_rx_indicate0:
- ;
- /* nesvnic->netstats.rx_packets++; */
- /* nesvnic->netstats.rx_bytes += rx_pkt_size; */
- }
-
- cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0;
- /* Accounting... */
- cqe_count++;
- if (++head >= cq_size)
- head = 0;
- if (cqe_count == 255) {
- /* Replenish Nic CQ */
- nes_write32(nesdev->regs+NES_CQE_ALLOC,
- cq->cq_number | (cqe_count << 16));
- /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */
- nesdev->currcq_count += cqe_count;
- cqe_count = 0;
- }
-
- if (cq->rx_cqes_completed >= nesvnic->budget)
- break;
- } else {
- cq->cqes_pending = 0;
- break;
- }
-
- } while (1);
-
- if (sq_cqes) {
- barrier();
- /* restart the queue if it had been stopped */
- if (netif_queue_stopped(nesvnic->netdev))
- netif_wake_queue(nesvnic->netdev);
- }
- cq->cq_head = head;
- /* nes_debug(NES_DBG_CQ, "CQ%u Processed = %u cqes, new head = %u.\n",
- cq->cq_number, cqe_count, cq->cq_head); */
- cq->cqe_allocs_pending = cqe_count;
- if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
- {
- /* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */
- nesdev->currcq_count += cqe_count;
- nes_nic_tune_timer(nesdev);
- }
- if (atomic_read(&nesvnic->rx_skbs_needed))
- nes_replenish_nic_rq(nesvnic);
-}
-
-
-
-/**
- * nes_cqp_ce_handler
- */
-static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
-{
- u64 u64temp;
- unsigned long flags;
- struct nes_hw_cqp *cqp = NULL;
- struct nes_cqp_request *cqp_request;
- struct nes_hw_cqp_wqe *cqp_wqe;
- u32 head;
- u32 cq_size;
- u32 cqe_count=0;
- u32 error_code;
- u32 opcode;
- u32 ctx_index;
- /* u32 counter; */
-
- head = cq->cq_head;
- cq_size = cq->cq_size;
-
- do {
- /* process the CQE */
- /* nes_debug(NES_DBG_CQP, "head=%u cqe_words=%08X\n", head,
- le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])); */
-
- opcode = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]);
- if (opcode & NES_CQE_VALID) {
- cqp = &nesdev->cqp;
-
- error_code = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX]);
- if (error_code) {
- nes_debug(NES_DBG_CQP, "Bad Completion code for opcode 0x%02X from CQP,"
- " Major/Minor codes = 0x%04X:%04X.\n",
- le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])&0x3f,
- (u16)(error_code >> 16),
- (u16)error_code);
- }
-
- u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head].
- cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) |
- ((u64)(le32_to_cpu(cq->cq_vbase[head].
- cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX])));
-
- cqp_request = (struct nes_cqp_request *)(unsigned long)u64temp;
- if (cqp_request) {
- if (cqp_request->waiting) {
- /* nes_debug(NES_DBG_CQP, "%s: Waking up requestor\n"); */
- cqp_request->major_code = (u16)(error_code >> 16);
- cqp_request->minor_code = (u16)error_code;
- barrier();
- cqp_request->request_done = 1;
- wake_up(&cqp_request->waitq);
- nes_put_cqp_request(nesdev, cqp_request);
- } else {
- if (cqp_request->callback)
- cqp_request->cqp_callback(nesdev, cqp_request);
- nes_free_cqp_request(nesdev, cqp_request);
- }
- } else {
- wake_up(&nesdev->cqp.waitq);
- }
-
- cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
- nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (1 << 16));
- if (++cqp->sq_tail >= cqp->sq_size)
- cqp->sq_tail = 0;
-
- /* Accounting... */
- cqe_count++;
- if (++head >= cq_size)
- head = 0;
- } else {
- break;
- }
- } while (1);
- cq->cq_head = head;
-
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- while ((!list_empty(&nesdev->cqp_pending_reqs)) &&
- ((((nesdev->cqp.sq_tail+nesdev->cqp.sq_size)-nesdev->cqp.sq_head) &
- (nesdev->cqp.sq_size - 1)) != 1)) {
- cqp_request = list_entry(nesdev->cqp_pending_reqs.next,
- struct nes_cqp_request, list);
- list_del_init(&cqp_request->list);
- head = nesdev->cqp.sq_head++;
- nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
- cqp_wqe = &nesdev->cqp.sq_vbase[head];
- memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
- barrier();
-
- opcode = cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX];
- if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT)
- ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX;
- else
- ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX;
- cqp_wqe->wqe_words[ctx_index] =
- cpu_to_le32((u32)((unsigned long)cqp_request));
- cqp_wqe->wqe_words[ctx_index + 1] =
- cpu_to_le32((u32)(upper_32_bits((unsigned long)cqp_request)));
- nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) put on CQPs SQ wqe%u.\n",
- cqp_request, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, head);
- /* Ring doorbell (1 WQEs) */
- barrier();
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
- }
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-
- /* Arm the CCQ */
- nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
- cq->cq_number);
- nes_read32(nesdev->regs+NES_CQE_ALLOC);
-}
-
-static u8 *locate_mpa(u8 *pkt, u32 aeq_info)
-{
- if (aeq_info & NES_AEQE_Q2_DATA_ETHERNET) {
- /* skip over ethernet header */
- pkt += ETH_HLEN;
-
- /* Skip over IP and TCP headers */
- pkt += 4 * (pkt[0] & 0x0f);
- pkt += 4 * ((pkt[12] >> 4) & 0x0f);
- }
- return pkt;
-}
-
-/* Determine if incoming error pkt is rdma layer */
-static u32 iwarp_opcode(struct nes_qp *nesqp, u32 aeq_info)
-{
- u8 *pkt;
- u16 *mpa;
- u32 opcode = 0xffffffff;
-
- if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
- pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
- mpa = (u16 *)locate_mpa(pkt, aeq_info);
- opcode = be16_to_cpu(mpa[1]) & 0xf;
- }
-
- return opcode;
-}
-
-/* Build iWARP terminate header */
-static int nes_bld_terminate_hdr(struct nes_qp *nesqp, u16 async_event_id, u32 aeq_info)
-{
- u8 *pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
- u16 ddp_seg_len;
- int copy_len = 0;
- u8 is_tagged = 0;
- u8 flush_code = 0;
- struct nes_terminate_hdr *termhdr;
-
- termhdr = (struct nes_terminate_hdr *)nesqp->hwqp.q2_vbase;
- memset(termhdr, 0, 64);
-
- if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
-
- /* Use data from offending packet to fill in ddp & rdma hdrs */
- pkt = locate_mpa(pkt, aeq_info);
- ddp_seg_len = be16_to_cpu(*(u16 *)pkt);
- if (ddp_seg_len) {
- copy_len = 2;
- termhdr->hdrct = DDP_LEN_FLAG;
- if (pkt[2] & 0x80) {
- is_tagged = 1;
- if (ddp_seg_len >= TERM_DDP_LEN_TAGGED) {
- copy_len += TERM_DDP_LEN_TAGGED;
- termhdr->hdrct |= DDP_HDR_FLAG;
- }
- } else {
- if (ddp_seg_len >= TERM_DDP_LEN_UNTAGGED) {
- copy_len += TERM_DDP_LEN_UNTAGGED;
- termhdr->hdrct |= DDP_HDR_FLAG;
- }
-
- if (ddp_seg_len >= (TERM_DDP_LEN_UNTAGGED + TERM_RDMA_LEN)) {
- if ((pkt[3] & RDMA_OPCODE_MASK) == RDMA_READ_REQ_OPCODE) {
- copy_len += TERM_RDMA_LEN;
- termhdr->hdrct |= RDMA_HDR_FLAG;
- }
- }
- }
- }
- }
-
- switch (async_event_id) {
- case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
- switch (iwarp_opcode(nesqp, aeq_info)) {
- case IWARP_OPCODE_WRITE:
- flush_code = IB_WC_LOC_PROT_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
- termhdr->error_code = DDP_TAGGED_INV_STAG;
- break;
- default:
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_INV_STAG;
- }
- break;
- case NES_AEQE_AEID_AMP_INVALID_STAG:
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_INV_STAG;
- break;
- case NES_AEQE_AEID_AMP_BAD_QP:
- flush_code = IB_WC_LOC_QP_OP_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
- termhdr->error_code = DDP_UNTAGGED_INV_QN;
- break;
- case NES_AEQE_AEID_AMP_BAD_STAG_KEY:
- case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
- switch (iwarp_opcode(nesqp, aeq_info)) {
- case IWARP_OPCODE_SEND_INV:
- case IWARP_OPCODE_SEND_SE_INV:
- flush_code = IB_WC_REM_OP_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
- termhdr->error_code = RDMAP_CANT_INV_STAG;
- break;
- default:
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_INV_STAG;
- }
- break;
- case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
- if (aeq_info & (NES_AEQE_Q2_DATA_ETHERNET | NES_AEQE_Q2_DATA_MPA)) {
- flush_code = IB_WC_LOC_PROT_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
- termhdr->error_code = DDP_TAGGED_BOUNDS;
- } else {
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_INV_BOUNDS;
- }
- break;
- case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION:
- case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
- case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_ACCESS;
- break;
- case NES_AEQE_AEID_AMP_TO_WRAP:
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_TO_WRAP;
- break;
- case NES_AEQE_AEID_AMP_BAD_PD:
- switch (iwarp_opcode(nesqp, aeq_info)) {
- case IWARP_OPCODE_WRITE:
- flush_code = IB_WC_LOC_PROT_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
- termhdr->error_code = DDP_TAGGED_UNASSOC_STAG;
- break;
- case IWARP_OPCODE_SEND_INV:
- case IWARP_OPCODE_SEND_SE_INV:
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_CANT_INV_STAG;
- break;
- default:
- flush_code = IB_WC_REM_ACCESS_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_PROT;
- termhdr->error_code = RDMAP_UNASSOC_STAG;
- }
- break;
- case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
- flush_code = IB_WC_LOC_LEN_ERR;
- termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP;
- termhdr->error_code = MPA_MARKER;
- break;
- case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
- flush_code = IB_WC_GENERAL_ERR;
- termhdr->layer_etype = (LAYER_MPA << 4) | DDP_LLP;
- termhdr->error_code = MPA_CRC;
- break;
- case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE:
- case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
- flush_code = IB_WC_LOC_LEN_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC;
- termhdr->error_code = DDP_CATASTROPHIC_LOCAL;
- break;
- case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC:
- case NES_AEQE_AEID_DDP_NO_L_BIT:
- flush_code = IB_WC_FATAL_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_CATASTROPHIC;
- termhdr->error_code = DDP_CATASTROPHIC_LOCAL;
- break;
- case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN:
- case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
- flush_code = IB_WC_GENERAL_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
- termhdr->error_code = DDP_UNTAGGED_INV_MSN_RANGE;
- break;
- case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
- flush_code = IB_WC_LOC_LEN_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
- termhdr->error_code = DDP_UNTAGGED_INV_TOO_LONG;
- break;
- case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION:
- flush_code = IB_WC_GENERAL_ERR;
- if (is_tagged) {
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_TAGGED_BUFFER;
- termhdr->error_code = DDP_TAGGED_INV_DDP_VER;
- } else {
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
- termhdr->error_code = DDP_UNTAGGED_INV_DDP_VER;
- }
- break;
- case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
- flush_code = IB_WC_GENERAL_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
- termhdr->error_code = DDP_UNTAGGED_INV_MO;
- break;
- case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
- flush_code = IB_WC_REM_OP_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
- termhdr->error_code = DDP_UNTAGGED_INV_MSN_NO_BUF;
- break;
- case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
- flush_code = IB_WC_GENERAL_ERR;
- termhdr->layer_etype = (LAYER_DDP << 4) | DDP_UNTAGGED_BUFFER;
- termhdr->error_code = DDP_UNTAGGED_INV_QN;
- break;
- case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION:
- flush_code = IB_WC_GENERAL_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
- termhdr->error_code = RDMAP_INV_RDMAP_VER;
- break;
- case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE:
- flush_code = IB_WC_LOC_QP_OP_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
- termhdr->error_code = RDMAP_UNEXPECTED_OP;
- break;
- default:
- flush_code = IB_WC_FATAL_ERR;
- termhdr->layer_etype = (LAYER_RDMA << 4) | RDMAP_REMOTE_OP;
- termhdr->error_code = RDMAP_UNSPECIFIED;
- break;
- }
-
- if (copy_len)
- memcpy(termhdr + 1, pkt, copy_len);
-
- if ((flush_code) && ((NES_AEQE_INBOUND_RDMA & aeq_info) == 0)) {
- if (aeq_info & NES_AEQE_SQ)
- nesqp->term_sq_flush_code = flush_code;
- else
- nesqp->term_rq_flush_code = flush_code;
- }
-
- return sizeof(struct nes_terminate_hdr) + copy_len;
-}
-
-static void nes_terminate_connection(struct nes_device *nesdev, struct nes_qp *nesqp,
- struct nes_hw_aeqe *aeqe, enum ib_event_type eventtype)
-{
- u64 context;
- unsigned long flags;
- u32 aeq_info;
- u16 async_event_id;
- u8 tcp_state;
- u8 iwarp_state;
- u32 termlen = 0;
- u32 mod_qp_flags = NES_CQP_QP_IWARP_STATE_TERMINATE |
- NES_CQP_QP_TERM_DONT_SEND_FIN;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
-
- if (nesqp->term_flags & NES_TERM_SENT)
- return; /* Sanity check */
-
- aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
- tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
- iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
- async_event_id = (u16)aeq_info;
-
- context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
- aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
- if (!context) {
- WARN_ON(!context);
- return;
- }
-
- nesqp = (struct nes_qp *)(unsigned long)context;
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- nesqp->terminate_eventtype = eventtype;
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
- if (nesadapter->send_term_ok)
- termlen = nes_bld_terminate_hdr(nesqp, async_event_id, aeq_info);
- else
- mod_qp_flags |= NES_CQP_QP_TERM_DONT_SEND_TERM_MSG;
-
- if (!nesdev->iw_status) {
- nesqp->term_flags = NES_TERM_DONE;
- nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_ERROR, 0, 0);
- nes_cm_disconn(nesqp);
- } else {
- nes_terminate_start_timer(nesqp);
- nesqp->term_flags |= NES_TERM_SENT;
- nes_hw_modify_qp(nesdev, nesqp, mod_qp_flags, termlen, 0);
- }
-}
-
-static void nes_terminate_send_fin(struct nes_device *nesdev,
- struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe)
-{
- u32 aeq_info;
- u16 async_event_id;
- u8 tcp_state;
- u8 iwarp_state;
- unsigned long flags;
-
- aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
- tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
- iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
- async_event_id = (u16)aeq_info;
-
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
- /* Send the fin only */
- nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_TERMINATE |
- NES_CQP_QP_TERM_DONT_SEND_TERM_MSG, 0, 0);
-}
-
-/* Cleanup after a terminate sent or received */
-static void nes_terminate_done(struct nes_qp *nesqp, int timeout_occurred)
-{
- u32 next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
- unsigned long flags;
- struct nes_vnic *nesvnic = to_nesvnic(nesqp->ibqp.device);
- struct nes_device *nesdev = nesvnic->nesdev;
- u8 first_time = 0;
-
- spin_lock_irqsave(&nesqp->lock, flags);
- if (nesqp->hte_added) {
- nesqp->hte_added = 0;
- next_iwarp_state |= NES_CQP_QP_DEL_HTE;
- }
-
- first_time = (nesqp->term_flags & NES_TERM_DONE) == 0;
- nesqp->term_flags |= NES_TERM_DONE;
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
- /* Make sure we go through this only once */
- if (first_time) {
- if (timeout_occurred == 0)
- del_timer(&nesqp->terminate_timer);
- else
- next_iwarp_state |= NES_CQP_QP_RESET;
-
- nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
- nes_cm_disconn(nesqp);
- }
-}
-
-static void nes_terminate_received(struct nes_device *nesdev,
- struct nes_qp *nesqp, struct nes_hw_aeqe *aeqe)
-{
- u32 aeq_info;
- u8 *pkt;
- u32 *mpa;
- u8 ddp_ctl;
- u8 rdma_ctl;
- u16 aeq_id = 0;
-
- aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
- if (aeq_info & NES_AEQE_Q2_DATA_WRITTEN) {
- /* Terminate is not a performance path so the silicon */
- /* did not validate the frame - do it now */
- pkt = nesqp->hwqp.q2_vbase + BAD_FRAME_OFFSET;
- mpa = (u32 *)locate_mpa(pkt, aeq_info);
- ddp_ctl = (be32_to_cpu(mpa[0]) >> 8) & 0xff;
- rdma_ctl = be32_to_cpu(mpa[0]) & 0xff;
- if ((ddp_ctl & 0xc0) != 0x40)
- aeq_id = NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC;
- else if ((ddp_ctl & 0x03) != 1)
- aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION;
- else if (be32_to_cpu(mpa[2]) != 2)
- aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_QN;
- else if (be32_to_cpu(mpa[3]) != 1)
- aeq_id = NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN;
- else if (be32_to_cpu(mpa[4]) != 0)
- aeq_id = NES_AEQE_AEID_DDP_UBE_INVALID_MO;
- else if ((rdma_ctl & 0xc0) != 0x40)
- aeq_id = NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION;
-
- if (aeq_id) {
- /* Bad terminate recvd - send back a terminate */
- aeq_info = (aeq_info & 0xffff0000) | aeq_id;
- aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info);
- nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
- return;
- }
- }
-
- nesqp->term_flags |= NES_TERM_RCVD;
- nesqp->terminate_eventtype = IB_EVENT_QP_FATAL;
- nes_terminate_start_timer(nesqp);
- nes_terminate_send_fin(nesdev, nesqp, aeqe);
-}
-
-/* Timeout routine in case terminate fails to complete */
-void nes_terminate_timeout(unsigned long context)
-{
- struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context;
-
- nes_terminate_done(nesqp, 1);
-}
-
-/* Set a timer in case hw cannot complete the terminate sequence */
-static void nes_terminate_start_timer(struct nes_qp *nesqp)
-{
- mod_timer(&nesqp->terminate_timer, (jiffies + HZ));
-}
-
-/**
- * nes_process_iwarp_aeqe
- */
-static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
- struct nes_hw_aeqe *aeqe)
-{
- u64 context;
- unsigned long flags;
- struct nes_qp *nesqp;
- struct nes_hw_cq *hw_cq;
- struct nes_cq *nescq;
- int resource_allocated;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 aeq_info;
- u32 next_iwarp_state = 0;
- u32 aeqe_cq_id;
- u16 async_event_id;
- u8 tcp_state;
- u8 iwarp_state;
- struct ib_event ibevent;
-
- nes_debug(NES_DBG_AEQ, "\n");
- aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
- if ((NES_AEQE_INBOUND_RDMA & aeq_info) || (!(NES_AEQE_QP & aeq_info))) {
- context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
- context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
- } else {
- context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
- aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
- BUG_ON(!context);
- }
-
- /* context is nesqp unless async_event_id == CQ ERROR */
- nesqp = (struct nes_qp *)(unsigned long)context;
- async_event_id = (u16)aeq_info;
- tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
- iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
- nes_debug(NES_DBG_AEQ, "aeid = 0x%04X, qp-cq id = %d, aeqe = %p,"
- " Tcp state = %s, iWARP state = %s\n",
- async_event_id,
- le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe,
- nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]);
-
- aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]);
- if (aeq_info & NES_AEQE_QP) {
- if (!nes_is_resource_allocated(nesadapter,
- nesadapter->allocated_qps,
- aeqe_cq_id))
- return;
- }
-
- switch (async_event_id) {
- case NES_AEQE_AEID_LLP_FIN_RECEIVED:
- if (nesqp->term_flags)
- return; /* Ignore it, wait for close complete */
-
- if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
- if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) &&
- (nesqp->ibqp_state == IB_QPS_RTS)) {
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
- nes_cm_disconn(nesqp);
- }
- nesqp->cm_id->add_ref(nesqp->cm_id);
- schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp,
- NES_TIMER_TYPE_CLOSE, 1, 0);
- nes_debug(NES_DBG_AEQ, "QP%u Not decrementing QP refcount (%d),"
- " need ae to finish up, original_last_aeq = 0x%04X."
- " last_aeq = 0x%04X, scheduling timer. TCP state = %d\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
- async_event_id, nesqp->last_aeq, tcp_state);
- }
- break;
- case NES_AEQE_AEID_LLP_CLOSE_COMPLETE:
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_cm_disconn(nesqp);
- break;
-
- case NES_AEQE_AEID_RESET_SENT:
- tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- nesqp->hte_added = 0;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE;
- nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
- nes_cm_disconn(nesqp);
- break;
-
- case NES_AEQE_AEID_LLP_CONNECTION_RESET:
- if (atomic_read(&nesqp->close_timer_started))
- return;
- spin_lock_irqsave(&nesqp->lock, flags);
- nesqp->hw_iwarp_state = iwarp_state;
- nesqp->hw_tcp_state = tcp_state;
- nesqp->last_aeq = async_event_id;
- spin_unlock_irqrestore(&nesqp->lock, flags);
- nes_cm_disconn(nesqp);
- break;
-
- case NES_AEQE_AEID_TERMINATE_SENT:
- nes_terminate_send_fin(nesdev, nesqp, aeqe);
- break;
-
- case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
- nes_terminate_received(nesdev, nesqp, aeqe);
- break;
-
- case NES_AEQE_AEID_AMP_BAD_STAG_KEY:
- case NES_AEQE_AEID_AMP_BAD_STAG_INDEX:
- case NES_AEQE_AEID_AMP_UNALLOCATED_STAG:
- case NES_AEQE_AEID_AMP_INVALID_STAG:
- case NES_AEQE_AEID_AMP_RIGHTS_VIOLATION:
- case NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
- case NES_AEQE_AEID_PRIV_OPERATION_DENIED:
- case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
- case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
- case NES_AEQE_AEID_AMP_TO_WRAP:
- printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_ACCESS_ERR\n",
- nesqp->hwqp.qp_id, async_event_id);
- nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR);
- break;
-
- case NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE:
- case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
- case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
- case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
- if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) {
- aeq_info &= 0xffff0000;
- aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE;
- aeqe->aeqe_words[NES_AEQE_MISC_IDX] = cpu_to_le32(aeq_info);
- }
-
- case NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE:
- case NES_AEQE_AEID_LLP_TOO_MANY_RETRIES:
- case NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE:
- case NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR:
- case NES_AEQE_AEID_AMP_BAD_QP:
- case NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH:
- case NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC:
- case NES_AEQE_AEID_DDP_NO_L_BIT:
- case NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN:
- case NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID:
- case NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION:
- case NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION:
- case NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE:
- case NES_AEQE_AEID_AMP_BAD_PD:
- case NES_AEQE_AEID_AMP_FASTREG_SHARED:
- case NES_AEQE_AEID_AMP_FASTREG_VALID_STAG:
- case NES_AEQE_AEID_AMP_FASTREG_MW_STAG:
- case NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS:
- case NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW:
- case NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH:
- case NES_AEQE_AEID_AMP_INVALIDATE_SHARED:
- case NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS:
- case NES_AEQE_AEID_AMP_MWBIND_VALID_STAG:
- case NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG:
- case NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG:
- case NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG:
- case NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS:
- case NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS:
- case NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT:
- case NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED:
- case NES_AEQE_AEID_BAD_CLOSE:
- case NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO:
- case NES_AEQE_AEID_STAG_ZERO_INVALID:
- case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST:
- case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
- printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_FATAL\n",
- nesqp->hwqp.qp_id, async_event_id);
- print_ip(nesqp->cm_node);
- if (!atomic_read(&nesqp->close_timer_started))
- nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
- break;
-
- case NES_AEQE_AEID_CQ_OPERATION_ERROR:
- context <<= 1;
- nes_debug(NES_DBG_AEQ, "Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u, %p\n",
- le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), (void *)(unsigned long)context);
- resource_allocated = nes_is_resource_allocated(nesadapter, nesadapter->allocated_cqs,
- le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
- if (resource_allocated) {
- printk(KERN_ERR PFX "%s: Processing an NES_AEQE_AEID_CQ_OPERATION_ERROR event on CQ%u\n",
- __func__, le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]));
- hw_cq = (struct nes_hw_cq *)(unsigned long)context;
- if (hw_cq) {
- nescq = container_of(hw_cq, struct nes_cq, hw_cq);
- if (nescq->ibcq.event_handler) {
- ibevent.device = nescq->ibcq.device;
- ibevent.event = IB_EVENT_CQ_ERR;
- ibevent.element.cq = &nescq->ibcq;
- nescq->ibcq.event_handler(&ibevent, nescq->ibcq.cq_context);
- }
- }
- }
- break;
-
- default:
- nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n",
- async_event_id);
- break;
- }
-
-}
-
-/**
- * nes_iwarp_ce_handler
- */
-void nes_iwarp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *hw_cq)
-{
- struct nes_cq *nescq = container_of(hw_cq, struct nes_cq, hw_cq);
-
- /* nes_debug(NES_DBG_CQ, "Processing completion event for iWARP CQ%u.\n",
- nescq->hw_cq.cq_number); */
- nes_write32(nesdev->regs+NES_CQ_ACK, nescq->hw_cq.cq_number);
-
- if (nescq->ibcq.comp_handler)
- nescq->ibcq.comp_handler(&nescq->ibcq, nescq->ibcq.cq_context);
-
- return;
-}
-
-
-/**
- * nes_manage_apbvt()
- */
-int nes_manage_apbvt(struct nes_vnic *nesvnic, u32 accel_local_port,
- u32 nic_index, u32 add_port)
-{
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- int ret = 0;
- u16 major_code;
-
- /* Send manage APBVT request to CQP */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n");
- return -ENOMEM;
- }
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- nes_debug(NES_DBG_QP, "%s APBV for local port=%u(0x%04x), nic_index=%u\n",
- (add_port == NES_MANAGE_APBVT_ADD) ? "ADD" : "DEL",
- accel_local_port, accel_local_port, nic_index);
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, (NES_CQP_MANAGE_APBVT |
- ((add_port == NES_MANAGE_APBVT_ADD) ? NES_CQP_APBVT_ADD : 0)));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- ((nic_index << NES_CQP_APBVT_NIC_SHIFT) | accel_local_port));
-
- nes_debug(NES_DBG_QP, "Waiting for CQP completion for APBVT.\n");
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- if (add_port == NES_MANAGE_APBVT_ADD)
- ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_QP, "Completed, ret=%u, CQP Major:Minor codes = 0x%04X:0x%04X\n",
- ret, cqp_request->major_code, cqp_request->minor_code);
- major_code = cqp_request->major_code;
-
- nes_put_cqp_request(nesdev, cqp_request);
-
- if (!ret)
- return -ETIME;
- else if (major_code)
- return -EIO;
- else
- return 0;
-}
-
-
-/**
- * nes_manage_arp_cache
- */
-void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr,
- u32 ip_addr, u32 action)
-{
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev;
- struct nes_cqp_request *cqp_request;
- int arp_index;
-
- nesdev = nesvnic->nesdev;
- arp_index = nes_arp_table(nesdev, ip_addr, mac_addr, action);
- if (arp_index == -1) {
- return;
- }
-
- /* update the ARP entry */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_NETDEV, "Failed to get a cqp_request.\n");
- return;
- }
- cqp_request->waiting = 0;
- cqp_wqe = &cqp_request->cqp_wqe;
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(
- NES_CQP_MANAGE_ARP_CACHE | NES_CQP_ARP_PERM);
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(
- (u32)PCI_FUNC(nesdev->pcidev->devfn) << NES_CQP_ARP_AEQ_INDEX_SHIFT);
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(arp_index);
-
- if (action == NES_ARP_ADD) {
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_ARP_VALID);
- cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = cpu_to_le32(
- (((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) |
- (((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]);
- cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32(
- (((u32)mac_addr[0]) << 8) | (u32)mac_addr[1]);
- } else {
- cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = 0;
- cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = 0;
- }
-
- nes_debug(NES_DBG_NETDEV, "Not waiting for CQP, cqp.sq_head=%u, cqp.sq_tail=%u\n",
- nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
-
- atomic_set(&cqp_request->refcount, 1);
- nes_post_cqp_request(nesdev, cqp_request);
-}
-
-
-/**
- * flush_wqes
- */
-void flush_wqes(struct nes_device *nesdev, struct nes_qp *nesqp,
- u32 which_wq, u32 wait_completion)
-{
- struct nes_cqp_request *cqp_request;
- struct nes_hw_cqp_wqe *cqp_wqe;
- u32 sq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH;
- u32 rq_code = (NES_IWARP_CQE_MAJOR_FLUSH << 16) | NES_IWARP_CQE_MINOR_FLUSH;
- int ret;
-
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_QP, "Failed to get a cqp_request.\n");
- return;
- }
- if (wait_completion) {
- cqp_request->waiting = 1;
- atomic_set(&cqp_request->refcount, 2);
- } else {
- cqp_request->waiting = 0;
- }
- cqp_wqe = &cqp_request->cqp_wqe;
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- /* If wqe in error was identified, set code to be put into cqe */
- if ((nesqp->term_sq_flush_code) && (which_wq & NES_CQP_FLUSH_SQ)) {
- which_wq |= NES_CQP_FLUSH_MAJ_MIN;
- sq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_sq_flush_code;
- nesqp->term_sq_flush_code = 0;
- }
-
- if ((nesqp->term_rq_flush_code) && (which_wq & NES_CQP_FLUSH_RQ)) {
- which_wq |= NES_CQP_FLUSH_MAJ_MIN;
- rq_code = (CQE_MAJOR_DRV << 16) | nesqp->term_rq_flush_code;
- nesqp->term_rq_flush_code = 0;
- }
-
- if (which_wq & NES_CQP_FLUSH_MAJ_MIN) {
- cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_SQ_CODE] = cpu_to_le32(sq_code);
- cqp_wqe->wqe_words[NES_CQP_QP_WQE_FLUSH_RQ_CODE] = cpu_to_le32(rq_code);
- }
-
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] =
- cpu_to_le32(NES_CQP_FLUSH_WQES | which_wq);
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id);
-
- nes_post_cqp_request(nesdev, cqp_request);
-
- if (wait_completion) {
- /* Wait for CQP */
- ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_QP, "Flush SQ QP WQEs completed, ret=%u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X\n",
- ret, cqp_request->major_code, cqp_request->minor_code);
- nes_put_cqp_request(nesdev, cqp_request);
- }
-}
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
deleted file mode 100644
index 1b66ef1e9937..000000000000
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ /dev/null
@@ -1,1379 +0,0 @@
-/*
-* Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenIB.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*/
-
-#ifndef __NES_HW_H
-#define __NES_HW_H
-
-#define NES_PHY_TYPE_CX4 1
-#define NES_PHY_TYPE_1G 2
-#define NES_PHY_TYPE_ARGUS 4
-#define NES_PHY_TYPE_PUMA_1G 5
-#define NES_PHY_TYPE_PUMA_10G 6
-#define NES_PHY_TYPE_GLADIUS 7
-#define NES_PHY_TYPE_SFP_D 8
-#define NES_PHY_TYPE_KR 9
-
-#define NES_MULTICAST_PF_MAX 8
-#define NES_A0 3
-
-#define NES_ENABLE_PAU 0x07000001
-#define NES_DISABLE_PAU 0x07000000
-#define NES_PAU_COUNTER 10
-#define NES_CQP_OPCODE_MASK 0x3f
-
-enum pci_regs {
- NES_INT_STAT = 0x0000,
- NES_INT_MASK = 0x0004,
- NES_INT_PENDING = 0x0008,
- NES_INTF_INT_STAT = 0x000C,
- NES_INTF_INT_MASK = 0x0010,
- NES_TIMER_STAT = 0x0014,
- NES_PERIODIC_CONTROL = 0x0018,
- NES_ONE_SHOT_CONTROL = 0x001C,
- NES_EEPROM_COMMAND = 0x0020,
- NES_EEPROM_DATA = 0x0024,
- NES_FLASH_COMMAND = 0x0028,
- NES_FLASH_DATA = 0x002C,
- NES_SOFTWARE_RESET = 0x0030,
- NES_CQ_ACK = 0x0034,
- NES_WQE_ALLOC = 0x0040,
- NES_CQE_ALLOC = 0x0044,
- NES_AEQ_ALLOC = 0x0048
-};
-
-enum indexed_regs {
- NES_IDX_CREATE_CQP_LOW = 0x0000,
- NES_IDX_CREATE_CQP_HIGH = 0x0004,
- NES_IDX_QP_CONTROL = 0x0040,
- NES_IDX_FLM_CONTROL = 0x0080,
- NES_IDX_INT_CPU_STATUS = 0x00a0,
- NES_IDX_GPR_TRIGGER = 0x00bc,
- NES_IDX_GPIO_CONTROL = 0x00f0,
- NES_IDX_GPIO_DATA = 0x00f4,
- NES_IDX_GPR2 = 0x010c,
- NES_IDX_TCP_CONFIG0 = 0x01e4,
- NES_IDX_TCP_TIMER_CONFIG = 0x01ec,
- NES_IDX_TCP_NOW = 0x01f0,
- NES_IDX_QP_MAX_CFG_SIZES = 0x0200,
- NES_IDX_QP_CTX_SIZE = 0x0218,
- NES_IDX_TCP_TIMER_SIZE0 = 0x0238,
- NES_IDX_TCP_TIMER_SIZE1 = 0x0240,
- NES_IDX_ARP_CACHE_SIZE = 0x0258,
- NES_IDX_CQ_CTX_SIZE = 0x0260,
- NES_IDX_MRT_SIZE = 0x0278,
- NES_IDX_PBL_REGION_SIZE = 0x0280,
- NES_IDX_IRRQ_COUNT = 0x02b0,
- NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x02f0,
- NES_IDX_RX_WINDOW_BUFFER_SIZE = 0x0300,
- NES_IDX_DST_IP_ADDR = 0x0400,
- NES_IDX_PCIX_DIAG = 0x08e8,
- NES_IDX_MPP_DEBUG = 0x0a00,
- NES_IDX_PORT_RX_DISCARDS = 0x0a30,
- NES_IDX_PORT_TX_DISCARDS = 0x0a34,
- NES_IDX_MPP_LB_DEBUG = 0x0b00,
- NES_IDX_DENALI_CTL_22 = 0x1058,
- NES_IDX_MAC_TX_CONTROL = 0x2000,
- NES_IDX_MAC_TX_CONFIG = 0x2004,
- NES_IDX_MAC_TX_PAUSE_QUANTA = 0x2008,
- NES_IDX_MAC_RX_CONTROL = 0x200c,
- NES_IDX_MAC_RX_CONFIG = 0x2010,
- NES_IDX_MAC_EXACT_MATCH_BOTTOM = 0x201c,
- NES_IDX_MAC_MDIO_CONTROL = 0x2084,
- NES_IDX_MAC_TX_OCTETS_LOW = 0x2100,
- NES_IDX_MAC_TX_OCTETS_HIGH = 0x2104,
- NES_IDX_MAC_TX_FRAMES_LOW = 0x2108,
- NES_IDX_MAC_TX_FRAMES_HIGH = 0x210c,
- NES_IDX_MAC_TX_PAUSE_FRAMES = 0x2118,
- NES_IDX_MAC_TX_ERRORS = 0x2138,
- NES_IDX_MAC_RX_OCTETS_LOW = 0x213c,
- NES_IDX_MAC_RX_OCTETS_HIGH = 0x2140,
- NES_IDX_MAC_RX_FRAMES_LOW = 0x2144,
- NES_IDX_MAC_RX_FRAMES_HIGH = 0x2148,
- NES_IDX_MAC_RX_BC_FRAMES_LOW = 0x214c,
- NES_IDX_MAC_RX_MC_FRAMES_HIGH = 0x2150,
- NES_IDX_MAC_RX_PAUSE_FRAMES = 0x2154,
- NES_IDX_MAC_RX_SHORT_FRAMES = 0x2174,
- NES_IDX_MAC_RX_OVERSIZED_FRAMES = 0x2178,
- NES_IDX_MAC_RX_JABBER_FRAMES = 0x217c,
- NES_IDX_MAC_RX_CRC_ERR_FRAMES = 0x2180,
- NES_IDX_MAC_RX_LENGTH_ERR_FRAMES = 0x2184,
- NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES = 0x2188,
- NES_IDX_MAC_INT_STATUS = 0x21f0,
- NES_IDX_MAC_INT_MASK = 0x21f4,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 = 0x2800,
- NES_IDX_PHY_PCS_CONTROL_STATUS1 = 0x2a00,
- NES_IDX_ETH_SERDES_COMMON_CONTROL0 = 0x2808,
- NES_IDX_ETH_SERDES_COMMON_CONTROL1 = 0x2a08,
- NES_IDX_ETH_SERDES_COMMON_STATUS0 = 0x280c,
- NES_IDX_ETH_SERDES_COMMON_STATUS1 = 0x2a0c,
- NES_IDX_ETH_SERDES_TX_EMP0 = 0x2810,
- NES_IDX_ETH_SERDES_TX_EMP1 = 0x2a10,
- NES_IDX_ETH_SERDES_TX_DRIVE0 = 0x2814,
- NES_IDX_ETH_SERDES_TX_DRIVE1 = 0x2a14,
- NES_IDX_ETH_SERDES_RX_MODE0 = 0x2818,
- NES_IDX_ETH_SERDES_RX_MODE1 = 0x2a18,
- NES_IDX_ETH_SERDES_RX_SIGDET0 = 0x281c,
- NES_IDX_ETH_SERDES_RX_SIGDET1 = 0x2a1c,
- NES_IDX_ETH_SERDES_BYPASS0 = 0x2820,
- NES_IDX_ETH_SERDES_BYPASS1 = 0x2a20,
- NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0 = 0x2824,
- NES_IDX_ETH_SERDES_LOOPBACK_CONTROL1 = 0x2a24,
- NES_IDX_ETH_SERDES_RX_EQ_CONTROL0 = 0x2828,
- NES_IDX_ETH_SERDES_RX_EQ_CONTROL1 = 0x2a28,
- NES_IDX_ETH_SERDES_RX_EQ_STATUS0 = 0x282c,
- NES_IDX_ETH_SERDES_RX_EQ_STATUS1 = 0x2a2c,
- NES_IDX_ETH_SERDES_CDR_RESET0 = 0x2830,
- NES_IDX_ETH_SERDES_CDR_RESET1 = 0x2a30,
- NES_IDX_ETH_SERDES_CDR_CONTROL0 = 0x2834,
- NES_IDX_ETH_SERDES_CDR_CONTROL1 = 0x2a34,
- NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0 = 0x2838,
- NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1 = 0x2a38,
- NES_IDX_ENDNODE0_NSTAT_RX_DISCARD = 0x3080,
- NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO = 0x3000,
- NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI = 0x3004,
- NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO = 0x3008,
- NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI = 0x300c,
- NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO = 0x7000,
- NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI = 0x7004,
- NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO = 0x7008,
- NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI = 0x700c,
- NES_IDX_WQM_CONFIG0 = 0x5000,
- NES_IDX_WQM_CONFIG1 = 0x5004,
- NES_IDX_CM_CONFIG = 0x5100,
- NES_IDX_NIC_LOGPORT_TO_PHYPORT = 0x6000,
- NES_IDX_NIC_PHYPORT_TO_USW = 0x6008,
- NES_IDX_NIC_ACTIVE = 0x6010,
- NES_IDX_NIC_UNICAST_ALL = 0x6018,
- NES_IDX_NIC_MULTICAST_ALL = 0x6020,
- NES_IDX_NIC_MULTICAST_ENABLE = 0x6028,
- NES_IDX_NIC_BROADCAST_ON = 0x6030,
- NES_IDX_USED_CHUNKS_TX = 0x60b0,
- NES_IDX_TX_POOL_SIZE = 0x60b8,
- NES_IDX_QUAD_HASH_TABLE_SIZE = 0x6148,
- NES_IDX_PERFECT_FILTER_LOW = 0x6200,
- NES_IDX_PERFECT_FILTER_HIGH = 0x6204,
- NES_IDX_IPV4_TCP_REXMITS = 0x7080,
- NES_IDX_DEBUG_ERROR_CONTROL_STATUS = 0x913c,
- NES_IDX_DEBUG_ERROR_MASKS0 = 0x9140,
- NES_IDX_DEBUG_ERROR_MASKS1 = 0x9144,
- NES_IDX_DEBUG_ERROR_MASKS2 = 0x9148,
- NES_IDX_DEBUG_ERROR_MASKS3 = 0x914c,
- NES_IDX_DEBUG_ERROR_MASKS4 = 0x9150,
- NES_IDX_DEBUG_ERROR_MASKS5 = 0x9154,
-};
-
-#define NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE 1
-#define NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE (1 << 17)
-
-enum nes_cqp_opcodes {
- NES_CQP_CREATE_QP = 0x00,
- NES_CQP_MODIFY_QP = 0x01,
- NES_CQP_DESTROY_QP = 0x02,
- NES_CQP_CREATE_CQ = 0x03,
- NES_CQP_MODIFY_CQ = 0x04,
- NES_CQP_DESTROY_CQ = 0x05,
- NES_CQP_ALLOCATE_STAG = 0x09,
- NES_CQP_REGISTER_STAG = 0x0a,
- NES_CQP_QUERY_STAG = 0x0b,
- NES_CQP_REGISTER_SHARED_STAG = 0x0c,
- NES_CQP_DEALLOCATE_STAG = 0x0d,
- NES_CQP_MANAGE_ARP_CACHE = 0x0f,
- NES_CQP_DOWNLOAD_SEGMENT = 0x10,
- NES_CQP_SUSPEND_QPS = 0x11,
- NES_CQP_UPLOAD_CONTEXT = 0x13,
- NES_CQP_CREATE_CEQ = 0x16,
- NES_CQP_DESTROY_CEQ = 0x18,
- NES_CQP_CREATE_AEQ = 0x19,
- NES_CQP_DESTROY_AEQ = 0x1b,
- NES_CQP_LMI_ACCESS = 0x20,
- NES_CQP_FLUSH_WQES = 0x22,
- NES_CQP_MANAGE_APBVT = 0x23,
- NES_CQP_MANAGE_QUAD_HASH = 0x25
-};
-
-enum nes_cqp_wqe_word_idx {
- NES_CQP_WQE_OPCODE_IDX = 0,
- NES_CQP_WQE_ID_IDX = 1,
- NES_CQP_WQE_COMP_CTX_LOW_IDX = 2,
- NES_CQP_WQE_COMP_CTX_HIGH_IDX = 3,
- NES_CQP_WQE_COMP_SCRATCH_LOW_IDX = 4,
- NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX = 5,
-};
-
-enum nes_cqp_wqe_word_download_idx { /* format differs from other cqp ops */
- NES_CQP_WQE_DL_OPCODE_IDX = 0,
- NES_CQP_WQE_DL_COMP_CTX_LOW_IDX = 1,
- NES_CQP_WQE_DL_COMP_CTX_HIGH_IDX = 2,
- NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX = 3
- /* For index values 4-15 use NES_NIC_SQ_WQE_ values */
-};
-
-enum nes_cqp_cq_wqeword_idx {
- NES_CQP_CQ_WQE_PBL_LOW_IDX = 6,
- NES_CQP_CQ_WQE_PBL_HIGH_IDX = 7,
- NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX = 8,
- NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX = 9,
- NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX = 10,
-};
-
-enum nes_cqp_stag_wqeword_idx {
- NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX = 1,
- NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX = 6,
- NES_CQP_STAG_WQE_LEN_LOW_IDX = 7,
- NES_CQP_STAG_WQE_STAG_IDX = 8,
- NES_CQP_STAG_WQE_VA_LOW_IDX = 10,
- NES_CQP_STAG_WQE_VA_HIGH_IDX = 11,
- NES_CQP_STAG_WQE_PA_LOW_IDX = 12,
- NES_CQP_STAG_WQE_PA_HIGH_IDX = 13,
- NES_CQP_STAG_WQE_PBL_LEN_IDX = 14
-};
-
-#define NES_CQP_OP_LOGICAL_PORT_SHIFT 26
-#define NES_CQP_OP_IWARP_STATE_SHIFT 28
-#define NES_CQP_OP_TERMLEN_SHIFT 28
-
-enum nes_cqp_qp_bits {
- NES_CQP_QP_ARP_VALID = (1<<8),
- NES_CQP_QP_WINBUF_VALID = (1<<9),
- NES_CQP_QP_CONTEXT_VALID = (1<<10),
- NES_CQP_QP_ORD_VALID = (1<<11),
- NES_CQP_QP_WINBUF_DATAIND_EN = (1<<12),
- NES_CQP_QP_VIRT_WQS = (1<<13),
- NES_CQP_QP_DEL_HTE = (1<<14),
- NES_CQP_QP_CQS_VALID = (1<<15),
- NES_CQP_QP_TYPE_TSA = 0,
- NES_CQP_QP_TYPE_IWARP = (1<<16),
- NES_CQP_QP_TYPE_CQP = (4<<16),
- NES_CQP_QP_TYPE_NIC = (5<<16),
- NES_CQP_QP_MSS_CHG = (1<<20),
- NES_CQP_QP_STATIC_RESOURCES = (1<<21),
- NES_CQP_QP_IGNORE_MW_BOUND = (1<<22),
- NES_CQP_QP_VWQ_USE_LMI = (1<<23),
- NES_CQP_QP_IWARP_STATE_IDLE = (1<<NES_CQP_OP_IWARP_STATE_SHIFT),
- NES_CQP_QP_IWARP_STATE_RTS = (2<<NES_CQP_OP_IWARP_STATE_SHIFT),
- NES_CQP_QP_IWARP_STATE_CLOSING = (3<<NES_CQP_OP_IWARP_STATE_SHIFT),
- NES_CQP_QP_IWARP_STATE_TERMINATE = (5<<NES_CQP_OP_IWARP_STATE_SHIFT),
- NES_CQP_QP_IWARP_STATE_ERROR = (6<<NES_CQP_OP_IWARP_STATE_SHIFT),
- NES_CQP_QP_IWARP_STATE_MASK = (7<<NES_CQP_OP_IWARP_STATE_SHIFT),
- NES_CQP_QP_TERM_DONT_SEND_FIN = (1<<24),
- NES_CQP_QP_TERM_DONT_SEND_TERM_MSG = (1<<25),
- NES_CQP_QP_RESET = (1<<31),
-};
-
-enum nes_cqp_qp_wqe_word_idx {
- NES_CQP_QP_WQE_CONTEXT_LOW_IDX = 6,
- NES_CQP_QP_WQE_CONTEXT_HIGH_IDX = 7,
- NES_CQP_QP_WQE_FLUSH_SQ_CODE = 8,
- NES_CQP_QP_WQE_FLUSH_RQ_CODE = 9,
- NES_CQP_QP_WQE_NEW_MSS_IDX = 15,
-};
-
-enum nes_nic_ctx_bits {
- NES_NIC_CTX_RQ_SIZE_32 = (3<<8),
- NES_NIC_CTX_RQ_SIZE_512 = (3<<8),
- NES_NIC_CTX_SQ_SIZE_32 = (1<<10),
- NES_NIC_CTX_SQ_SIZE_512 = (3<<10),
-};
-
-enum nes_nic_qp_ctx_word_idx {
- NES_NIC_CTX_MISC_IDX = 0,
- NES_NIC_CTX_SQ_LOW_IDX = 2,
- NES_NIC_CTX_SQ_HIGH_IDX = 3,
- NES_NIC_CTX_RQ_LOW_IDX = 4,
- NES_NIC_CTX_RQ_HIGH_IDX = 5,
-};
-
-enum nes_cqp_cq_bits {
- NES_CQP_CQ_CEQE_MASK = (1<<9),
- NES_CQP_CQ_CEQ_VALID = (1<<10),
- NES_CQP_CQ_RESIZE = (1<<11),
- NES_CQP_CQ_CHK_OVERFLOW = (1<<12),
- NES_CQP_CQ_4KB_CHUNK = (1<<14),
- NES_CQP_CQ_VIRT = (1<<15),
-};
-
-enum nes_cqp_stag_bits {
- NES_CQP_STAG_VA_TO = (1<<9),
- NES_CQP_STAG_DEALLOC_PBLS = (1<<10),
- NES_CQP_STAG_PBL_BLK_SIZE = (1<<11),
- NES_CQP_STAG_MR = (1<<13),
- NES_CQP_STAG_RIGHTS_LOCAL_READ = (1<<16),
- NES_CQP_STAG_RIGHTS_LOCAL_WRITE = (1<<17),
- NES_CQP_STAG_RIGHTS_REMOTE_READ = (1<<18),
- NES_CQP_STAG_RIGHTS_REMOTE_WRITE = (1<<19),
- NES_CQP_STAG_RIGHTS_WINDOW_BIND = (1<<20),
- NES_CQP_STAG_REM_ACC_EN = (1<<21),
- NES_CQP_STAG_LEAVE_PENDING = (1<<31),
-};
-
-enum nes_cqp_ceq_wqeword_idx {
- NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX = 1,
- NES_CQP_CEQ_WQE_PBL_LOW_IDX = 6,
- NES_CQP_CEQ_WQE_PBL_HIGH_IDX = 7,
-};
-
-enum nes_cqp_ceq_bits {
- NES_CQP_CEQ_4KB_CHUNK = (1<<14),
- NES_CQP_CEQ_VIRT = (1<<15),
-};
-
-enum nes_cqp_aeq_wqeword_idx {
- NES_CQP_AEQ_WQE_ELEMENT_COUNT_IDX = 1,
- NES_CQP_AEQ_WQE_PBL_LOW_IDX = 6,
- NES_CQP_AEQ_WQE_PBL_HIGH_IDX = 7,
-};
-
-enum nes_cqp_aeq_bits {
- NES_CQP_AEQ_4KB_CHUNK = (1<<14),
- NES_CQP_AEQ_VIRT = (1<<15),
-};
-
-enum nes_cqp_lmi_wqeword_idx {
- NES_CQP_LMI_WQE_LMI_OFFSET_IDX = 1,
- NES_CQP_LMI_WQE_FRAG_LOW_IDX = 8,
- NES_CQP_LMI_WQE_FRAG_HIGH_IDX = 9,
- NES_CQP_LMI_WQE_FRAG_LEN_IDX = 10,
-};
-
-enum nes_cqp_arp_wqeword_idx {
- NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX = 6,
- NES_CQP_ARP_WQE_MAC_HIGH_IDX = 7,
- NES_CQP_ARP_WQE_REACHABILITY_MAX_IDX = 1,
-};
-
-enum nes_cqp_upload_wqeword_idx {
- NES_CQP_UPLOAD_WQE_CTXT_LOW_IDX = 6,
- NES_CQP_UPLOAD_WQE_CTXT_HIGH_IDX = 7,
- NES_CQP_UPLOAD_WQE_HTE_IDX = 8,
-};
-
-enum nes_cqp_arp_bits {
- NES_CQP_ARP_VALID = (1<<8),
- NES_CQP_ARP_PERM = (1<<9),
-};
-
-enum nes_cqp_flush_bits {
- NES_CQP_FLUSH_SQ = (1<<30),
- NES_CQP_FLUSH_RQ = (1<<31),
- NES_CQP_FLUSH_MAJ_MIN = (1<<28),
-};
-
-enum nes_cqe_opcode_bits {
- NES_CQE_STAG_VALID = (1<<6),
- NES_CQE_ERROR = (1<<7),
- NES_CQE_SQ = (1<<8),
- NES_CQE_SE = (1<<9),
- NES_CQE_PSH = (1<<29),
- NES_CQE_FIN = (1<<30),
- NES_CQE_VALID = (1<<31),
-};
-
-
-enum nes_cqe_word_idx {
- NES_CQE_PAYLOAD_LENGTH_IDX = 0,
- NES_CQE_COMP_COMP_CTX_LOW_IDX = 2,
- NES_CQE_COMP_COMP_CTX_HIGH_IDX = 3,
- NES_CQE_INV_STAG_IDX = 4,
- NES_CQE_QP_ID_IDX = 5,
- NES_CQE_ERROR_CODE_IDX = 6,
- NES_CQE_OPCODE_IDX = 7,
-};
-
-enum nes_ceqe_word_idx {
- NES_CEQE_CQ_CTX_LOW_IDX = 0,
- NES_CEQE_CQ_CTX_HIGH_IDX = 1,
-};
-
-enum nes_ceqe_status_bit {
- NES_CEQE_VALID = (1<<31),
-};
-
-enum nes_int_bits {
- NES_INT_CEQ0 = (1<<0),
- NES_INT_CEQ1 = (1<<1),
- NES_INT_CEQ2 = (1<<2),
- NES_INT_CEQ3 = (1<<3),
- NES_INT_CEQ4 = (1<<4),
- NES_INT_CEQ5 = (1<<5),
- NES_INT_CEQ6 = (1<<6),
- NES_INT_CEQ7 = (1<<7),
- NES_INT_CEQ8 = (1<<8),
- NES_INT_CEQ9 = (1<<9),
- NES_INT_CEQ10 = (1<<10),
- NES_INT_CEQ11 = (1<<11),
- NES_INT_CEQ12 = (1<<12),
- NES_INT_CEQ13 = (1<<13),
- NES_INT_CEQ14 = (1<<14),
- NES_INT_CEQ15 = (1<<15),
- NES_INT_AEQ0 = (1<<16),
- NES_INT_AEQ1 = (1<<17),
- NES_INT_AEQ2 = (1<<18),
- NES_INT_AEQ3 = (1<<19),
- NES_INT_AEQ4 = (1<<20),
- NES_INT_AEQ5 = (1<<21),
- NES_INT_AEQ6 = (1<<22),
- NES_INT_AEQ7 = (1<<23),
- NES_INT_MAC0 = (1<<24),
- NES_INT_MAC1 = (1<<25),
- NES_INT_MAC2 = (1<<26),
- NES_INT_MAC3 = (1<<27),
- NES_INT_TSW = (1<<28),
- NES_INT_TIMER = (1<<29),
- NES_INT_INTF = (1<<30),
-};
-
-enum nes_intf_int_bits {
- NES_INTF_INT_PCIERR = (1<<0),
- NES_INTF_PERIODIC_TIMER = (1<<2),
- NES_INTF_ONE_SHOT_TIMER = (1<<3),
- NES_INTF_INT_CRITERR = (1<<14),
- NES_INTF_INT_AEQ0_OFLOW = (1<<16),
- NES_INTF_INT_AEQ1_OFLOW = (1<<17),
- NES_INTF_INT_AEQ2_OFLOW = (1<<18),
- NES_INTF_INT_AEQ3_OFLOW = (1<<19),
- NES_INTF_INT_AEQ4_OFLOW = (1<<20),
- NES_INTF_INT_AEQ5_OFLOW = (1<<21),
- NES_INTF_INT_AEQ6_OFLOW = (1<<22),
- NES_INTF_INT_AEQ7_OFLOW = (1<<23),
- NES_INTF_INT_AEQ_OFLOW = (0xff<<16),
-};
-
-enum nes_mac_int_bits {
- NES_MAC_INT_LINK_STAT_CHG = (1<<1),
- NES_MAC_INT_XGMII_EXT = (1<<2),
- NES_MAC_INT_TX_UNDERFLOW = (1<<6),
- NES_MAC_INT_TX_ERROR = (1<<7),
-};
-
-enum nes_cqe_allocate_bits {
- NES_CQE_ALLOC_INC_SELECT = (1<<28),
- NES_CQE_ALLOC_NOTIFY_NEXT = (1<<29),
- NES_CQE_ALLOC_NOTIFY_SE = (1<<30),
- NES_CQE_ALLOC_RESET = (1<<31),
-};
-
-enum nes_nic_rq_wqe_word_idx {
- NES_NIC_RQ_WQE_LENGTH_1_0_IDX = 0,
- NES_NIC_RQ_WQE_LENGTH_3_2_IDX = 1,
- NES_NIC_RQ_WQE_FRAG0_LOW_IDX = 2,
- NES_NIC_RQ_WQE_FRAG0_HIGH_IDX = 3,
- NES_NIC_RQ_WQE_FRAG1_LOW_IDX = 4,
- NES_NIC_RQ_WQE_FRAG1_HIGH_IDX = 5,
- NES_NIC_RQ_WQE_FRAG2_LOW_IDX = 6,
- NES_NIC_RQ_WQE_FRAG2_HIGH_IDX = 7,
- NES_NIC_RQ_WQE_FRAG3_LOW_IDX = 8,
- NES_NIC_RQ_WQE_FRAG3_HIGH_IDX = 9,
-};
-
-enum nes_nic_sq_wqe_word_idx {
- NES_NIC_SQ_WQE_MISC_IDX = 0,
- NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX = 1,
- NES_NIC_SQ_WQE_LSO_INFO_IDX = 2,
- NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX = 3,
- NES_NIC_SQ_WQE_LENGTH_2_1_IDX = 4,
- NES_NIC_SQ_WQE_LENGTH_4_3_IDX = 5,
- NES_NIC_SQ_WQE_FRAG0_LOW_IDX = 6,
- NES_NIC_SQ_WQE_FRAG0_HIGH_IDX = 7,
- NES_NIC_SQ_WQE_FRAG1_LOW_IDX = 8,
- NES_NIC_SQ_WQE_FRAG1_HIGH_IDX = 9,
- NES_NIC_SQ_WQE_FRAG2_LOW_IDX = 10,
- NES_NIC_SQ_WQE_FRAG2_HIGH_IDX = 11,
- NES_NIC_SQ_WQE_FRAG3_LOW_IDX = 12,
- NES_NIC_SQ_WQE_FRAG3_HIGH_IDX = 13,
- NES_NIC_SQ_WQE_FRAG4_LOW_IDX = 14,
- NES_NIC_SQ_WQE_FRAG4_HIGH_IDX = 15,
-};
-
-enum nes_iwarp_sq_wqe_word_idx {
- NES_IWARP_SQ_WQE_MISC_IDX = 0,
- NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX = 1,
- NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX = 2,
- NES_IWARP_SQ_WQE_COMP_CTX_HIGH_IDX = 3,
- NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX = 4,
- NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX = 5,
- NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX = 7,
- NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX = 8,
- NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX = 9,
- NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX = 10,
- NES_IWARP_SQ_WQE_RDMA_STAG_IDX = 11,
- NES_IWARP_SQ_WQE_IMM_DATA_START_IDX = 12,
- NES_IWARP_SQ_WQE_FRAG0_LOW_IDX = 16,
- NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX = 17,
- NES_IWARP_SQ_WQE_LENGTH0_IDX = 18,
- NES_IWARP_SQ_WQE_STAG0_IDX = 19,
- NES_IWARP_SQ_WQE_FRAG1_LOW_IDX = 20,
- NES_IWARP_SQ_WQE_FRAG1_HIGH_IDX = 21,
- NES_IWARP_SQ_WQE_LENGTH1_IDX = 22,
- NES_IWARP_SQ_WQE_STAG1_IDX = 23,
- NES_IWARP_SQ_WQE_FRAG2_LOW_IDX = 24,
- NES_IWARP_SQ_WQE_FRAG2_HIGH_IDX = 25,
- NES_IWARP_SQ_WQE_LENGTH2_IDX = 26,
- NES_IWARP_SQ_WQE_STAG2_IDX = 27,
- NES_IWARP_SQ_WQE_FRAG3_LOW_IDX = 28,
- NES_IWARP_SQ_WQE_FRAG3_HIGH_IDX = 29,
- NES_IWARP_SQ_WQE_LENGTH3_IDX = 30,
- NES_IWARP_SQ_WQE_STAG3_IDX = 31,
-};
-
-enum nes_iwarp_sq_bind_wqe_word_idx {
- NES_IWARP_SQ_BIND_WQE_MR_IDX = 6,
- NES_IWARP_SQ_BIND_WQE_MW_IDX = 7,
- NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX = 8,
- NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX = 9,
- NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX = 10,
- NES_IWARP_SQ_BIND_WQE_VA_FBO_HIGH_IDX = 11,
-};
-
-enum nes_iwarp_sq_fmr_wqe_word_idx {
- NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX = 7,
- NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX = 8,
- NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX = 9,
- NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX = 10,
- NES_IWARP_SQ_FMR_WQE_VA_FBO_HIGH_IDX = 11,
- NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX = 12,
- NES_IWARP_SQ_FMR_WQE_PBL_ADDR_HIGH_IDX = 13,
- NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX = 14,
-};
-
-enum nes_iwarp_sq_fmr_opcodes {
- NES_IWARP_SQ_FMR_WQE_ZERO_BASED = (1<<6),
- NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K = (0<<7),
- NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M = (1<<7),
- NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ = (1<<16),
- NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE = (1<<17),
- NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ = (1<<18),
- NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE = (1<<19),
- NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND = (1<<20),
-};
-
-#define NES_IWARP_SQ_FMR_WQE_MR_LENGTH_HIGH_MASK 0xFF;
-
-enum nes_iwarp_sq_locinv_wqe_word_idx {
- NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX = 6,
-};
-
-enum nes_iwarp_rq_wqe_word_idx {
- NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX = 1,
- NES_IWARP_RQ_WQE_COMP_CTX_LOW_IDX = 2,
- NES_IWARP_RQ_WQE_COMP_CTX_HIGH_IDX = 3,
- NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX = 4,
- NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX = 5,
- NES_IWARP_RQ_WQE_FRAG0_LOW_IDX = 8,
- NES_IWARP_RQ_WQE_FRAG0_HIGH_IDX = 9,
- NES_IWARP_RQ_WQE_LENGTH0_IDX = 10,
- NES_IWARP_RQ_WQE_STAG0_IDX = 11,
- NES_IWARP_RQ_WQE_FRAG1_LOW_IDX = 12,
- NES_IWARP_RQ_WQE_FRAG1_HIGH_IDX = 13,
- NES_IWARP_RQ_WQE_LENGTH1_IDX = 14,
- NES_IWARP_RQ_WQE_STAG1_IDX = 15,
- NES_IWARP_RQ_WQE_FRAG2_LOW_IDX = 16,
- NES_IWARP_RQ_WQE_FRAG2_HIGH_IDX = 17,
- NES_IWARP_RQ_WQE_LENGTH2_IDX = 18,
- NES_IWARP_RQ_WQE_STAG2_IDX = 19,
- NES_IWARP_RQ_WQE_FRAG3_LOW_IDX = 20,
- NES_IWARP_RQ_WQE_FRAG3_HIGH_IDX = 21,
- NES_IWARP_RQ_WQE_LENGTH3_IDX = 22,
- NES_IWARP_RQ_WQE_STAG3_IDX = 23,
-};
-
-enum nes_nic_sq_wqe_bits {
- NES_NIC_SQ_WQE_PHDR_CS_READY = (1<<21),
- NES_NIC_SQ_WQE_LSO_ENABLE = (1<<22),
- NES_NIC_SQ_WQE_TAGVALUE_ENABLE = (1<<23),
- NES_NIC_SQ_WQE_DISABLE_CHKSUM = (1<<30),
- NES_NIC_SQ_WQE_COMPLETION = (1<<31),
-};
-
-enum nes_nic_cqe_word_idx {
- NES_NIC_CQE_ACCQP_ID_IDX = 0,
- NES_NIC_CQE_HASH_RCVNXT = 1,
- NES_NIC_CQE_TAG_PKT_TYPE_IDX = 2,
- NES_NIC_CQE_MISC_IDX = 3,
-};
-
-#define NES_PKT_TYPE_APBVT_BITS 0xC112
-#define NES_PKT_TYPE_APBVT_MASK 0xff3e
-
-#define NES_PKT_TYPE_PVALID_BITS 0x10000000
-#define NES_PKT_TYPE_PVALID_MASK 0x30000000
-
-#define NES_PKT_TYPE_TCPV4_BITS 0x0110
-#define NES_PKT_TYPE_TCPV4_MASK 0x3f30
-
-#define NES_PKT_TYPE_UDPV4_BITS 0x0210
-#define NES_PKT_TYPE_UDPV4_MASK 0x3f30
-
-#define NES_PKT_TYPE_IPV4_BITS 0x0010
-#define NES_PKT_TYPE_IPV4_MASK 0x3f30
-
-#define NES_PKT_TYPE_OTHER_BITS 0x0000
-#define NES_PKT_TYPE_OTHER_MASK 0x0030
-
-#define NES_NIC_CQE_ERRV_SHIFT 16
-enum nes_nic_ev_bits {
- NES_NIC_ERRV_BITS_MODE = (1<<0),
- NES_NIC_ERRV_BITS_IPV4_CSUM_ERR = (1<<1),
- NES_NIC_ERRV_BITS_TCPUDP_CSUM_ERR = (1<<2),
- NES_NIC_ERRV_BITS_WQE_OVERRUN = (1<<3),
- NES_NIC_ERRV_BITS_IPH_ERR = (1<<4),
-};
-
-enum nes_nic_cqe_bits {
- NES_NIC_CQE_ERRV_MASK = (0xff<<NES_NIC_CQE_ERRV_SHIFT),
- NES_NIC_CQE_SQ = (1<<24),
- NES_NIC_CQE_ACCQP_PORT = (1<<28),
- NES_NIC_CQE_ACCQP_VALID = (1<<29),
- NES_NIC_CQE_TAG_VALID = (1<<30),
- NES_NIC_CQE_VALID = (1<<31),
-};
-
-enum nes_aeqe_word_idx {
- NES_AEQE_COMP_CTXT_LOW_IDX = 0,
- NES_AEQE_COMP_CTXT_HIGH_IDX = 1,
- NES_AEQE_COMP_QP_CQ_ID_IDX = 2,
- NES_AEQE_MISC_IDX = 3,
-};
-
-enum nes_aeqe_bits {
- NES_AEQE_QP = (1<<16),
- NES_AEQE_CQ = (1<<17),
- NES_AEQE_SQ = (1<<18),
- NES_AEQE_INBOUND_RDMA = (1<<19),
- NES_AEQE_IWARP_STATE_MASK = (7<<20),
- NES_AEQE_TCP_STATE_MASK = (0xf<<24),
- NES_AEQE_Q2_DATA_WRITTEN = (0x3<<28),
- NES_AEQE_VALID = (1<<31),
-};
-
-#define NES_AEQE_IWARP_STATE_SHIFT 20
-#define NES_AEQE_TCP_STATE_SHIFT 24
-#define NES_AEQE_Q2_DATA_ETHERNET (1<<28)
-#define NES_AEQE_Q2_DATA_MPA (1<<29)
-
-enum nes_aeqe_iwarp_state {
- NES_AEQE_IWARP_STATE_NON_EXISTANT = 0,
- NES_AEQE_IWARP_STATE_IDLE = 1,
- NES_AEQE_IWARP_STATE_RTS = 2,
- NES_AEQE_IWARP_STATE_CLOSING = 3,
- NES_AEQE_IWARP_STATE_TERMINATE = 5,
- NES_AEQE_IWARP_STATE_ERROR = 6
-};
-
-enum nes_aeqe_tcp_state {
- NES_AEQE_TCP_STATE_NON_EXISTANT = 0,
- NES_AEQE_TCP_STATE_CLOSED = 1,
- NES_AEQE_TCP_STATE_LISTEN = 2,
- NES_AEQE_TCP_STATE_SYN_SENT = 3,
- NES_AEQE_TCP_STATE_SYN_RCVD = 4,
- NES_AEQE_TCP_STATE_ESTABLISHED = 5,
- NES_AEQE_TCP_STATE_CLOSE_WAIT = 6,
- NES_AEQE_TCP_STATE_FIN_WAIT_1 = 7,
- NES_AEQE_TCP_STATE_CLOSING = 8,
- NES_AEQE_TCP_STATE_LAST_ACK = 9,
- NES_AEQE_TCP_STATE_FIN_WAIT_2 = 10,
- NES_AEQE_TCP_STATE_TIME_WAIT = 11
-};
-
-enum nes_aeqe_aeid {
- NES_AEQE_AEID_AMP_UNALLOCATED_STAG = 0x0102,
- NES_AEQE_AEID_AMP_INVALID_STAG = 0x0103,
- NES_AEQE_AEID_AMP_BAD_QP = 0x0104,
- NES_AEQE_AEID_AMP_BAD_PD = 0x0105,
- NES_AEQE_AEID_AMP_BAD_STAG_KEY = 0x0106,
- NES_AEQE_AEID_AMP_BAD_STAG_INDEX = 0x0107,
- NES_AEQE_AEID_AMP_BOUNDS_VIOLATION = 0x0108,
- NES_AEQE_AEID_AMP_RIGHTS_VIOLATION = 0x0109,
- NES_AEQE_AEID_AMP_TO_WRAP = 0x010a,
- NES_AEQE_AEID_AMP_FASTREG_SHARED = 0x010b,
- NES_AEQE_AEID_AMP_FASTREG_VALID_STAG = 0x010c,
- NES_AEQE_AEID_AMP_FASTREG_MW_STAG = 0x010d,
- NES_AEQE_AEID_AMP_FASTREG_INVALID_RIGHTS = 0x010e,
- NES_AEQE_AEID_AMP_FASTREG_PBL_TABLE_OVERFLOW = 0x010f,
- NES_AEQE_AEID_AMP_FASTREG_INVALID_LENGTH = 0x0110,
- NES_AEQE_AEID_AMP_INVALIDATE_SHARED = 0x0111,
- NES_AEQE_AEID_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS = 0x0112,
- NES_AEQE_AEID_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS = 0x0113,
- NES_AEQE_AEID_AMP_MWBIND_VALID_STAG = 0x0114,
- NES_AEQE_AEID_AMP_MWBIND_OF_MR_STAG = 0x0115,
- NES_AEQE_AEID_AMP_MWBIND_TO_ZERO_BASED_STAG = 0x0116,
- NES_AEQE_AEID_AMP_MWBIND_TO_MW_STAG = 0x0117,
- NES_AEQE_AEID_AMP_MWBIND_INVALID_RIGHTS = 0x0118,
- NES_AEQE_AEID_AMP_MWBIND_INVALID_BOUNDS = 0x0119,
- NES_AEQE_AEID_AMP_MWBIND_TO_INVALID_PARENT = 0x011a,
- NES_AEQE_AEID_AMP_MWBIND_BIND_DISABLED = 0x011b,
- NES_AEQE_AEID_BAD_CLOSE = 0x0201,
- NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE = 0x0202,
- NES_AEQE_AEID_CQ_OPERATION_ERROR = 0x0203,
- NES_AEQE_AEID_PRIV_OPERATION_DENIED = 0x0204,
- NES_AEQE_AEID_RDMA_READ_WHILE_ORD_ZERO = 0x0205,
- NES_AEQE_AEID_STAG_ZERO_INVALID = 0x0206,
- NES_AEQE_AEID_DDP_INVALID_MSN_GAP_IN_MSN = 0x0301,
- NES_AEQE_AEID_DDP_INVALID_MSN_RANGE_IS_NOT_VALID = 0x0302,
- NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER = 0x0303,
- NES_AEQE_AEID_DDP_UBE_INVALID_DDP_VERSION = 0x0304,
- NES_AEQE_AEID_DDP_UBE_INVALID_MO = 0x0305,
- NES_AEQE_AEID_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE = 0x0306,
- NES_AEQE_AEID_DDP_UBE_INVALID_QN = 0x0307,
- NES_AEQE_AEID_DDP_NO_L_BIT = 0x0308,
- NES_AEQE_AEID_RDMAP_ROE_INVALID_RDMAP_VERSION = 0x0311,
- NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE = 0x0312,
- NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST = 0x0313,
- NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP = 0x0314,
- NES_AEQE_AEID_INVALID_ARP_ENTRY = 0x0401,
- NES_AEQE_AEID_INVALID_TCP_OPTION_RCVD = 0x0402,
- NES_AEQE_AEID_STALE_ARP_ENTRY = 0x0403,
- NES_AEQE_AEID_LLP_CLOSE_COMPLETE = 0x0501,
- NES_AEQE_AEID_LLP_CONNECTION_RESET = 0x0502,
- NES_AEQE_AEID_LLP_FIN_RECEIVED = 0x0503,
- NES_AEQE_AEID_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH = 0x0504,
- NES_AEQE_AEID_LLP_RECEIVED_MPA_CRC_ERROR = 0x0505,
- NES_AEQE_AEID_LLP_SEGMENT_TOO_LARGE = 0x0506,
- NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL = 0x0507,
- NES_AEQE_AEID_LLP_SYN_RECEIVED = 0x0508,
- NES_AEQE_AEID_LLP_TERMINATE_RECEIVED = 0x0509,
- NES_AEQE_AEID_LLP_TOO_MANY_RETRIES = 0x050a,
- NES_AEQE_AEID_LLP_TOO_MANY_KEEPALIVE_RETRIES = 0x050b,
- NES_AEQE_AEID_RESET_SENT = 0x0601,
- NES_AEQE_AEID_TERMINATE_SENT = 0x0602,
- NES_AEQE_AEID_DDP_LCE_LOCAL_CATASTROPHIC = 0x0700
-};
-
-enum nes_iwarp_sq_opcodes {
- NES_IWARP_SQ_WQE_WRPDU = (1<<15),
- NES_IWARP_SQ_WQE_PSH = (1<<21),
- NES_IWARP_SQ_WQE_STREAMING = (1<<23),
- NES_IWARP_SQ_WQE_IMM_DATA = (1<<28),
- NES_IWARP_SQ_WQE_READ_FENCE = (1<<29),
- NES_IWARP_SQ_WQE_LOCAL_FENCE = (1<<30),
- NES_IWARP_SQ_WQE_SIGNALED_COMPL = (1<<31),
-};
-
-enum nes_iwarp_sq_wqe_bits {
- NES_IWARP_SQ_OP_RDMAW = 0,
- NES_IWARP_SQ_OP_RDMAR = 1,
- NES_IWARP_SQ_OP_SEND = 3,
- NES_IWARP_SQ_OP_SENDINV = 4,
- NES_IWARP_SQ_OP_SENDSE = 5,
- NES_IWARP_SQ_OP_SENDSEINV = 6,
- NES_IWARP_SQ_OP_BIND = 8,
- NES_IWARP_SQ_OP_FAST_REG = 9,
- NES_IWARP_SQ_OP_LOCINV = 10,
- NES_IWARP_SQ_OP_RDMAR_LOCINV = 11,
- NES_IWARP_SQ_OP_NOP = 12,
-};
-
-enum nes_iwarp_cqe_major_code {
- NES_IWARP_CQE_MAJOR_FLUSH = 1,
- NES_IWARP_CQE_MAJOR_DRV = 0x8000
-};
-
-enum nes_iwarp_cqe_minor_code {
- NES_IWARP_CQE_MINOR_FLUSH = 1
-};
-
-#define NES_EEPROM_READ_REQUEST (1<<16)
-#define NES_MAC_ADDR_VALID (1<<20)
-
-/*
- * NES index registers init values.
- */
-struct nes_init_values {
- u32 index;
- u32 data;
- u8 wrt;
-};
-
-/*
- * NES registers in BAR0.
- */
-struct nes_pci_regs {
- u32 int_status;
- u32 int_mask;
- u32 int_pending;
- u32 intf_int_status;
- u32 intf_int_mask;
- u32 other_regs[59]; /* pad out to 256 bytes for now */
-};
-
-#define NES_CQP_SQ_SIZE 128
-#define NES_CCQ_SIZE 128
-#define NES_NIC_WQ_SIZE 512
-#define NES_NIC_CTX_SIZE ((NES_NIC_CTX_RQ_SIZE_512) | (NES_NIC_CTX_SQ_SIZE_512))
-#define NES_NIC_BACK_STORE 0x00038000
-
-struct nes_device;
-
-struct nes_hw_nic_qp_context {
- __le32 context_words[6];
-};
-
-struct nes_hw_nic_sq_wqe {
- __le32 wqe_words[16];
-};
-
-struct nes_hw_nic_rq_wqe {
- __le32 wqe_words[16];
-};
-
-struct nes_hw_nic_cqe {
- __le32 cqe_words[4];
-};
-
-struct nes_hw_cqp_qp_context {
- __le32 context_words[4];
-};
-
-struct nes_hw_cqp_wqe {
- __le32 wqe_words[16];
-};
-
-struct nes_hw_qp_wqe {
- __le32 wqe_words[32];
-};
-
-struct nes_hw_cqe {
- __le32 cqe_words[8];
-};
-
-struct nes_hw_ceqe {
- __le32 ceqe_words[2];
-};
-
-struct nes_hw_aeqe {
- __le32 aeqe_words[4];
-};
-
-struct nes_cqp_request {
- union {
- u64 cqp_callback_context;
- void *cqp_callback_pointer;
- };
- wait_queue_head_t waitq;
- struct nes_hw_cqp_wqe cqp_wqe;
- struct list_head list;
- atomic_t refcount;
- void (*cqp_callback)(struct nes_device *nesdev, struct nes_cqp_request *cqp_request);
- u16 major_code;
- u16 minor_code;
- u8 waiting;
- u8 request_done;
- u8 dynamic;
- u8 callback;
-};
-
-struct nes_hw_cqp {
- struct nes_hw_cqp_wqe *sq_vbase;
- dma_addr_t sq_pbase;
- spinlock_t lock;
- wait_queue_head_t waitq;
- u16 qp_id;
- u16 sq_head;
- u16 sq_tail;
- u16 sq_size;
-};
-
-#define NES_FIRST_FRAG_SIZE 128
-struct nes_first_frag {
- u8 buffer[NES_FIRST_FRAG_SIZE];
-};
-
-struct nes_hw_nic {
- struct nes_first_frag *first_frag_vbase; /* virtual address of first frags */
- struct nes_hw_nic_sq_wqe *sq_vbase; /* virtual address of sq */
- struct nes_hw_nic_rq_wqe *rq_vbase; /* virtual address of rq */
- struct sk_buff *tx_skb[NES_NIC_WQ_SIZE];
- struct sk_buff *rx_skb[NES_NIC_WQ_SIZE];
- dma_addr_t frag_paddr[NES_NIC_WQ_SIZE];
- unsigned long first_frag_overflow[BITS_TO_LONGS(NES_NIC_WQ_SIZE)];
- dma_addr_t sq_pbase; /* PCI memory for host rings */
- dma_addr_t rq_pbase; /* PCI memory for host rings */
-
- u16 qp_id;
- u16 sq_head;
- u16 sq_tail;
- u16 sq_size;
- u16 rq_head;
- u16 rq_tail;
- u16 rq_size;
- u8 replenishing_rq;
- u8 reserved;
-
- spinlock_t rq_lock;
-};
-
-struct nes_hw_nic_cq {
- struct nes_hw_nic_cqe volatile *cq_vbase; /* PCI memory for host rings */
- void (*ce_handler)(struct nes_device *nesdev, struct nes_hw_nic_cq *cq);
- dma_addr_t cq_pbase; /* PCI memory for host rings */
- int rx_cqes_completed;
- int cqe_allocs_pending;
- int rx_pkts_indicated;
- u16 cq_head;
- u16 cq_size;
- u16 cq_number;
- u8 cqes_pending;
-};
-
-struct nes_hw_qp {
- struct nes_hw_qp_wqe *sq_vbase; /* PCI memory for host rings */
- struct nes_hw_qp_wqe *rq_vbase; /* PCI memory for host rings */
- void *q2_vbase; /* PCI memory for host rings */
- dma_addr_t sq_pbase; /* PCI memory for host rings */
- dma_addr_t rq_pbase; /* PCI memory for host rings */
- dma_addr_t q2_pbase; /* PCI memory for host rings */
- u32 qp_id;
- u16 sq_head;
- u16 sq_tail;
- u16 sq_size;
- u16 rq_head;
- u16 rq_tail;
- u16 rq_size;
- u8 rq_encoded_size;
- u8 sq_encoded_size;
-};
-
-struct nes_hw_cq {
- struct nes_hw_cqe *cq_vbase; /* PCI memory for host rings */
- void (*ce_handler)(struct nes_device *nesdev, struct nes_hw_cq *cq);
- dma_addr_t cq_pbase; /* PCI memory for host rings */
- u16 cq_head;
- u16 cq_size;
- u16 cq_number;
-};
-
-struct nes_hw_ceq {
- struct nes_hw_ceqe volatile *ceq_vbase; /* PCI memory for host rings */
- dma_addr_t ceq_pbase; /* PCI memory for host rings */
- u16 ceq_head;
- u16 ceq_size;
-};
-
-struct nes_hw_aeq {
- struct nes_hw_aeqe volatile *aeq_vbase; /* PCI memory for host rings */
- dma_addr_t aeq_pbase; /* PCI memory for host rings */
- u16 aeq_head;
- u16 aeq_size;
-};
-
-struct nic_qp_map {
- u8 qpid;
- u8 nic_index;
- u8 logical_port;
- u8 is_hnic;
-};
-
-#define NES_CQP_ARP_AEQ_INDEX_MASK 0x000f0000
-#define NES_CQP_ARP_AEQ_INDEX_SHIFT 16
-
-#define NES_CQP_APBVT_ADD 0x00008000
-#define NES_CQP_APBVT_NIC_SHIFT 16
-
-#define NES_ARP_ADD 1
-#define NES_ARP_DELETE 2
-#define NES_ARP_RESOLVE 3
-
-#define NES_MAC_SW_IDLE 0
-#define NES_MAC_SW_INTERRUPT 1
-#define NES_MAC_SW_MH 2
-
-struct nes_arp_entry {
- u32 ip_addr;
- u8 mac_addr[ETH_ALEN];
-};
-
-#define NES_NIC_FAST_TIMER 96
-#define NES_NIC_FAST_TIMER_LOW 40
-#define NES_NIC_FAST_TIMER_HIGH 1000
-#define DEFAULT_NES_QL_HIGH 256
-#define DEFAULT_NES_QL_LOW 16
-#define DEFAULT_NES_QL_TARGET 64
-#define DEFAULT_JUMBO_NES_QL_LOW 12
-#define DEFAULT_JUMBO_NES_QL_TARGET 40
-#define DEFAULT_JUMBO_NES_QL_HIGH 128
-#define NES_NIC_CQ_DOWNWARD_TREND 16
-#define NES_PFT_SIZE 48
-
-#define NES_MGT_WQ_COUNT 32
-#define NES_MGT_CTX_SIZE ((NES_NIC_CTX_RQ_SIZE_32) | (NES_NIC_CTX_SQ_SIZE_32))
-#define NES_MGT_QP_OFFSET 36
-#define NES_MGT_QP_COUNT 4
-
-struct nes_hw_tune_timer {
- /* u16 cq_count; */
- u16 threshold_low;
- u16 threshold_target;
- u16 threshold_high;
- u16 timer_in_use;
- u16 timer_in_use_old;
- u16 timer_in_use_min;
- u16 timer_in_use_max;
- u8 timer_direction_upward;
- u8 timer_direction_downward;
- u16 cq_count_old;
- u8 cq_direction_downward;
-};
-
-#define NES_TIMER_INT_LIMIT 2
-#define NES_TIMER_INT_LIMIT_DYNAMIC 10
-#define NES_TIMER_ENABLE_LIMIT 4
-#define NES_MAX_LINK_INTERRUPTS 128
-#define NES_MAX_LINK_CHECK 200
-
-struct nes_adapter {
- u64 fw_ver;
- unsigned long *allocated_qps;
- unsigned long *allocated_cqs;
- unsigned long *allocated_mrs;
- unsigned long *allocated_pds;
- unsigned long *allocated_arps;
- struct nes_qp **qp_table;
- struct workqueue_struct *work_q;
-
- struct list_head list;
- struct list_head active_listeners;
- /* list of the netdev's associated with each logical port */
- struct list_head nesvnic_list[4];
-
- struct timer_list mh_timer;
- struct timer_list lc_timer;
- struct work_struct work;
- spinlock_t resource_lock;
- spinlock_t phy_lock;
- spinlock_t pbl_lock;
- spinlock_t periodic_timer_lock;
-
- struct nes_arp_entry arp_table[NES_MAX_ARP_TABLE_SIZE];
-
- /* Adapter CEQ and AEQs */
- struct nes_hw_ceq ceq[16];
- struct nes_hw_aeq aeq[8];
-
- struct nes_hw_tune_timer tune_timer;
-
- unsigned long doorbell_start;
-
- u32 hw_rev;
- u32 vendor_id;
- u32 vendor_part_id;
- u32 device_cap_flags;
- u32 tick_delta;
- u32 timer_int_req;
- u32 arp_table_size;
- u32 next_arp_index;
-
- u32 max_mr;
- u32 max_256pbl;
- u32 max_4kpbl;
- u32 free_256pbl;
- u32 free_4kpbl;
- u32 max_mr_size;
- u32 max_qp;
- u32 next_qp;
- u32 max_irrq;
- u32 max_qp_wr;
- u32 max_sge;
- u32 max_cq;
- u32 next_cq;
- u32 max_cqe;
- u32 max_pd;
- u32 base_pd;
- u32 next_pd;
- u32 hte_index_mask;
-
- /* EEPROM information */
- u32 rx_pool_size;
- u32 tx_pool_size;
- u32 rx_threshold;
- u32 tcp_timer_core_clk_divisor;
- u32 iwarp_config;
- u32 cm_config;
- u32 sws_timer_config;
- u32 tcp_config1;
- u32 wqm_wat;
- u32 core_clock;
- u32 firmware_version;
- u32 eeprom_version;
-
- u32 nic_rx_eth_route_err;
-
- u32 et_rx_coalesce_usecs;
- u32 et_rx_max_coalesced_frames;
- u32 et_rx_coalesce_usecs_irq;
- u32 et_rx_max_coalesced_frames_irq;
- u32 et_pkt_rate_low;
- u32 et_rx_coalesce_usecs_low;
- u32 et_rx_max_coalesced_frames_low;
- u32 et_pkt_rate_high;
- u32 et_rx_coalesce_usecs_high;
- u32 et_rx_max_coalesced_frames_high;
- u32 et_rate_sample_interval;
- u32 timer_int_limit;
- u32 wqm_quanta;
- u8 allow_unaligned_fpdus;
-
- /* Adapter base MAC address */
- u32 mac_addr_low;
- u16 mac_addr_high;
-
- u16 firmware_eeprom_offset;
- u16 software_eeprom_offset;
-
- u16 max_irrq_wr;
-
- /* pd config for each port */
- u16 pd_config_size[4];
- u16 pd_config_base[4];
-
- u16 link_interrupt_count[4];
- u8 crit_error_count[32];
-
- /* the phy index for each port */
- u8 phy_index[4];
- u8 mac_sw_state[4];
- u8 mac_link_down[4];
- u8 phy_type[4];
- u8 log_port;
-
- /* PCI information */
- unsigned int devfn;
- unsigned char bus_number;
- unsigned char OneG_Mode;
-
- unsigned char ref_count;
- u8 netdev_count;
- u8 netdev_max; /* from host nic address count in EEPROM */
- u8 port_count;
- u8 virtwq;
- u8 send_term_ok;
- u8 et_use_adaptive_rx_coalesce;
- u8 adapter_fcn_count;
- u8 pft_mcast_map[NES_PFT_SIZE];
-};
-
-struct nes_pbl {
- u64 *pbl_vbase;
- dma_addr_t pbl_pbase;
- struct page *page;
- unsigned long user_base;
- u32 pbl_size;
- struct list_head list;
- /* TODO: need to add list for two level tables */
-};
-
-#define NES_4K_PBL_CHUNK_SIZE 4096
-
-struct nes_fast_mr_wqe_pbl {
- u64 *kva;
- dma_addr_t paddr;
-};
-
-struct nes_listener {
- struct work_struct work;
- struct workqueue_struct *wq;
- struct nes_vnic *nesvnic;
- struct iw_cm_id *cm_id;
- struct list_head list;
- unsigned long socket;
- u8 accept_failed;
-};
-
-struct nes_ib_device;
-
-#define NES_EVENT_DELAY msecs_to_jiffies(100)
-
-struct nes_vnic {
- struct nes_ib_device *nesibdev;
- u64 sq_full;
- u64 tso_requests;
- u64 segmented_tso_requests;
- u64 linearized_skbs;
- u64 tx_sw_dropped;
- u64 endnode_nstat_rx_discard;
- u64 endnode_nstat_rx_octets;
- u64 endnode_nstat_rx_frames;
- u64 endnode_nstat_tx_octets;
- u64 endnode_nstat_tx_frames;
- u64 endnode_ipv4_tcp_retransmits;
- /* void *mem; */
- struct nes_device *nesdev;
- struct net_device *netdev;
- atomic_t rx_skbs_needed;
- atomic_t rx_skb_timer_running;
- int budget;
- u32 msg_enable;
- /* u32 tx_avail; */
- __be32 local_ipaddr;
- struct napi_struct napi;
- spinlock_t tx_lock; /* could use netdev tx lock? */
- struct timer_list rq_wqes_timer;
- u32 nic_mem_size;
- void *nic_vbase;
- dma_addr_t nic_pbase;
- struct nes_hw_nic nic;
- struct nes_hw_nic_cq nic_cq;
- u32 mcrq_qp_id;
- struct nes_ucontext *mcrq_ucontext;
- struct nes_cqp_request* (*get_cqp_request)(struct nes_device *nesdev);
- void (*post_cqp_request)(struct nes_device*, struct nes_cqp_request *);
- int (*mcrq_mcast_filter)( struct nes_vnic* nesvnic, __u8* dmi_addr );
- struct net_device_stats netstats;
- /* used to put the netdev on the adapters logical port list */
- struct list_head list;
- u16 max_frame_size;
- u8 netdev_open;
- u8 linkup;
- u8 logical_port;
- u8 netdev_index; /* might not be needed, indexes nesdev->netdev */
- u8 perfect_filter_index;
- u8 nic_index;
- u8 qp_nic_index[4];
- u8 next_qp_nic_index;
- u8 of_device_registered;
- u8 rdma_enabled;
- struct timer_list event_timer;
- enum ib_event_type delayed_event;
- enum ib_event_type last_dispatched_event;
- spinlock_t port_ibevent_lock;
- u32 mgt_mem_size;
- void *mgt_vbase;
- dma_addr_t mgt_pbase;
- struct nes_vnic_mgt *mgtvnic[NES_MGT_QP_COUNT];
- struct task_struct *mgt_thread;
- wait_queue_head_t mgt_wait_queue;
- struct sk_buff_head mgt_skb_list;
-
-};
-
-struct nes_ib_device {
- struct ib_device ibdev;
- struct nes_vnic *nesvnic;
-
- /* Virtual RNIC Limits */
- u32 max_mr;
- u32 max_qp;
- u32 max_cq;
- u32 max_pd;
- u32 num_mr;
- u32 num_qp;
- u32 num_cq;
- u32 num_pd;
-};
-
-enum nes_hdrct_flags {
- DDP_LEN_FLAG = 0x80,
- DDP_HDR_FLAG = 0x40,
- RDMA_HDR_FLAG = 0x20
-};
-
-enum nes_term_layers {
- LAYER_RDMA = 0,
- LAYER_DDP = 1,
- LAYER_MPA = 2
-};
-
-enum nes_term_error_types {
- RDMAP_CATASTROPHIC = 0,
- RDMAP_REMOTE_PROT = 1,
- RDMAP_REMOTE_OP = 2,
- DDP_CATASTROPHIC = 0,
- DDP_TAGGED_BUFFER = 1,
- DDP_UNTAGGED_BUFFER = 2,
- DDP_LLP = 3
-};
-
-enum nes_term_rdma_errors {
- RDMAP_INV_STAG = 0x00,
- RDMAP_INV_BOUNDS = 0x01,
- RDMAP_ACCESS = 0x02,
- RDMAP_UNASSOC_STAG = 0x03,
- RDMAP_TO_WRAP = 0x04,
- RDMAP_INV_RDMAP_VER = 0x05,
- RDMAP_UNEXPECTED_OP = 0x06,
- RDMAP_CATASTROPHIC_LOCAL = 0x07,
- RDMAP_CATASTROPHIC_GLOBAL = 0x08,
- RDMAP_CANT_INV_STAG = 0x09,
- RDMAP_UNSPECIFIED = 0xff
-};
-
-enum nes_term_ddp_errors {
- DDP_CATASTROPHIC_LOCAL = 0x00,
- DDP_TAGGED_INV_STAG = 0x00,
- DDP_TAGGED_BOUNDS = 0x01,
- DDP_TAGGED_UNASSOC_STAG = 0x02,
- DDP_TAGGED_TO_WRAP = 0x03,
- DDP_TAGGED_INV_DDP_VER = 0x04,
- DDP_UNTAGGED_INV_QN = 0x01,
- DDP_UNTAGGED_INV_MSN_NO_BUF = 0x02,
- DDP_UNTAGGED_INV_MSN_RANGE = 0x03,
- DDP_UNTAGGED_INV_MO = 0x04,
- DDP_UNTAGGED_INV_TOO_LONG = 0x05,
- DDP_UNTAGGED_INV_DDP_VER = 0x06
-};
-
-enum nes_term_mpa_errors {
- MPA_CLOSED = 0x01,
- MPA_CRC = 0x02,
- MPA_MARKER = 0x03,
- MPA_REQ_RSP = 0x04,
-};
-
-struct nes_terminate_hdr {
- u8 layer_etype;
- u8 error_code;
- u8 hdrct;
- u8 rsvd;
-};
-
-/* Used to determine how to fill in terminate error codes */
-#define IWARP_OPCODE_WRITE 0
-#define IWARP_OPCODE_READREQ 1
-#define IWARP_OPCODE_READRSP 2
-#define IWARP_OPCODE_SEND 3
-#define IWARP_OPCODE_SEND_INV 4
-#define IWARP_OPCODE_SEND_SE 5
-#define IWARP_OPCODE_SEND_SE_INV 6
-#define IWARP_OPCODE_TERM 7
-
-/* These values are used only during terminate processing */
-#define TERM_DDP_LEN_TAGGED 14
-#define TERM_DDP_LEN_UNTAGGED 18
-#define TERM_RDMA_LEN 28
-#define RDMA_OPCODE_MASK 0x0f
-#define RDMA_READ_REQ_OPCODE 1
-#define BAD_FRAME_OFFSET 64
-#define CQE_MAJOR_DRV 0x8000
-
-/* Used for link status recheck after interrupt processing */
-#define NES_LINK_RECHECK_DELAY msecs_to_jiffies(50)
-#define NES_LINK_RECHECK_MAX 60
-
-#endif /* __NES_HW_H */
diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c
deleted file mode 100644
index 33624f17c347..000000000000
--- a/drivers/infiniband/hw/nes/nes_mgt.c
+++ /dev/null
@@ -1,1156 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/skbuff.h>
-#include <linux/etherdevice.h>
-#include <linux/kthread.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <net/tcp.h>
-#include "nes.h"
-#include "nes_mgt.h"
-
-atomic_t pau_qps_created;
-atomic_t pau_qps_destroyed;
-
-static void nes_replenish_mgt_rq(struct nes_vnic_mgt *mgtvnic)
-{
- unsigned long flags;
- dma_addr_t bus_address;
- struct sk_buff *skb;
- struct nes_hw_nic_rq_wqe *nic_rqe;
- struct nes_hw_mgt *nesmgt;
- struct nes_device *nesdev;
- struct nes_rskb_cb *cb;
- u32 rx_wqes_posted = 0;
-
- nesmgt = &mgtvnic->mgt;
- nesdev = mgtvnic->nesvnic->nesdev;
- spin_lock_irqsave(&nesmgt->rq_lock, flags);
- if (nesmgt->replenishing_rq != 0) {
- if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) &&
- (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) {
- atomic_set(&mgtvnic->rx_skb_timer_running, 1);
- spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
- mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */
- add_timer(&mgtvnic->rq_wqes_timer);
- } else {
- spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
- }
- return;
- }
- nesmgt->replenishing_rq = 1;
- spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
- do {
- skb = dev_alloc_skb(mgtvnic->nesvnic->max_frame_size);
- if (skb) {
- skb->dev = mgtvnic->nesvnic->netdev;
-
- bus_address = pci_map_single(nesdev->pcidev,
- skb->data, mgtvnic->nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- cb->busaddr = bus_address;
- cb->maplen = mgtvnic->nesvnic->max_frame_size;
-
- nic_rqe = &nesmgt->rq_vbase[mgtvnic->mgt.rq_head];
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] =
- cpu_to_le32(mgtvnic->nesvnic->max_frame_size);
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] =
- cpu_to_le32((u32)bus_address);
- nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] =
- cpu_to_le32((u32)((u64)bus_address >> 32));
- nesmgt->rx_skb[nesmgt->rq_head] = skb;
- nesmgt->rq_head++;
- nesmgt->rq_head &= nesmgt->rq_size - 1;
- atomic_dec(&mgtvnic->rx_skbs_needed);
- barrier();
- if (++rx_wqes_posted == 255) {
- nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id);
- rx_wqes_posted = 0;
- }
- } else {
- spin_lock_irqsave(&nesmgt->rq_lock, flags);
- if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) &&
- (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) {
- atomic_set(&mgtvnic->rx_skb_timer_running, 1);
- spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
- mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */
- add_timer(&mgtvnic->rq_wqes_timer);
- } else {
- spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
- }
- break;
- }
- } while (atomic_read(&mgtvnic->rx_skbs_needed));
- barrier();
- if (rx_wqes_posted)
- nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id);
- nesmgt->replenishing_rq = 0;
-}
-
-/**
- * nes_mgt_rq_wqes_timeout
- */
-static void nes_mgt_rq_wqes_timeout(unsigned long parm)
-{
- struct nes_vnic_mgt *mgtvnic = (struct nes_vnic_mgt *)parm;
-
- atomic_set(&mgtvnic->rx_skb_timer_running, 0);
- if (atomic_read(&mgtvnic->rx_skbs_needed))
- nes_replenish_mgt_rq(mgtvnic);
-}
-
-/**
- * nes_mgt_free_skb - unmap and free skb
- */
-static void nes_mgt_free_skb(struct nes_device *nesdev, struct sk_buff *skb, u32 dir)
-{
- struct nes_rskb_cb *cb;
-
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, dir);
- cb->busaddr = 0;
- dev_kfree_skb_any(skb);
-}
-
-/**
- * nes_download_callback - handle download completions
- */
-static void nes_download_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request)
-{
- struct pau_fpdu_info *fpdu_info = cqp_request->cqp_callback_pointer;
- struct nes_qp *nesqp = fpdu_info->nesqp;
- struct sk_buff *skb;
- int i;
-
- for (i = 0; i < fpdu_info->frag_cnt; i++) {
- skb = fpdu_info->frags[i].skb;
- if (fpdu_info->frags[i].cmplt) {
- nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE);
- nes_rem_ref_cm_node(nesqp->cm_node);
- }
- }
-
- if (fpdu_info->hdr_vbase)
- pci_free_consistent(nesdev->pcidev, fpdu_info->hdr_len,
- fpdu_info->hdr_vbase, fpdu_info->hdr_pbase);
- kfree(fpdu_info);
-}
-
-/**
- * nes_get_seq - Get the seq, ack_seq and window from the packet
- */
-static u32 nes_get_seq(struct sk_buff *skb, u32 *ack, u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd)
-{
- struct nes_rskb_cb *cb = (struct nes_rskb_cb *)&skb->cb[0];
- struct iphdr *iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
- struct tcphdr *tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
-
- *ack = be32_to_cpu(tcph->ack_seq);
- *wnd = be16_to_cpu(tcph->window);
- *fin_rcvd = tcph->fin;
- *rst_rcvd = tcph->rst;
- return be32_to_cpu(tcph->seq);
-}
-
-/**
- * nes_get_next_skb - Get the next skb based on where current skb is in the queue
- */
-static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp *nesqp,
- struct sk_buff *skb, u32 nextseq, u32 *ack,
- u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd)
-{
- u32 seq;
- bool processacks;
- struct sk_buff *old_skb;
-
- if (skb) {
- /* Continue processing fpdu */
- if (skb->next == (struct sk_buff *)&nesqp->pau_list)
- goto out;
- skb = skb->next;
- processacks = false;
- } else {
- /* Starting a new one */
- if (skb_queue_empty(&nesqp->pau_list))
- goto out;
- skb = skb_peek(&nesqp->pau_list);
- processacks = true;
- }
-
- while (1) {
- if (skb_queue_empty(&nesqp->pau_list))
- goto out;
-
- seq = nes_get_seq(skb, ack, wnd, fin_rcvd, rst_rcvd);
- if (seq == nextseq) {
- if (skb->len || processacks)
- break;
- } else if (after(seq, nextseq)) {
- goto out;
- }
-
- old_skb = skb;
- skb = skb->next;
- skb_unlink(old_skb, &nesqp->pau_list);
- nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE);
- nes_rem_ref_cm_node(nesqp->cm_node);
- if (skb == (struct sk_buff *)&nesqp->pau_list)
- goto out;
- }
- return skb;
-
-out:
- return NULL;
-}
-
-/**
- * get_fpdu_info - Find the next complete fpdu and return its fragments.
- */
-static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
- struct pau_fpdu_info **pau_fpdu_info)
-{
- struct sk_buff *skb;
- struct iphdr *iph;
- struct tcphdr *tcph;
- struct nes_rskb_cb *cb;
- struct pau_fpdu_info *fpdu_info = NULL;
- struct pau_fpdu_frag frags[MAX_FPDU_FRAGS];
- u32 fpdu_len = 0;
- u32 tmp_len;
- int frag_cnt = 0;
- u32 tot_len;
- u32 frag_tot;
- u32 ack;
- u32 fin_rcvd;
- u32 rst_rcvd;
- u16 wnd;
- int i;
- int rc = 0;
-
- *pau_fpdu_info = NULL;
-
- skb = nes_get_next_skb(nesdev, nesqp, NULL, nesqp->pau_rcv_nxt, &ack, &wnd, &fin_rcvd, &rst_rcvd);
- if (!skb)
- goto out;
-
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- if (skb->len) {
- fpdu_len = be16_to_cpu(*(__be16 *) skb->data) + MPA_FRAMING;
- fpdu_len = (fpdu_len + 3) & 0xfffffffc;
- tmp_len = fpdu_len;
-
- /* See if we have all of the fpdu */
- frag_tot = 0;
- memset(&frags, 0, sizeof frags);
- for (i = 0; i < MAX_FPDU_FRAGS; i++) {
- frags[i].physaddr = cb->busaddr;
- frags[i].physaddr += skb->data - cb->data_start;
- frags[i].frag_len = min(tmp_len, skb->len);
- frags[i].skb = skb;
- frags[i].cmplt = (skb->len == frags[i].frag_len);
- frag_tot += frags[i].frag_len;
- frag_cnt++;
-
- tmp_len -= frags[i].frag_len;
- if (tmp_len == 0)
- break;
-
- skb = nes_get_next_skb(nesdev, nesqp, skb,
- nesqp->pau_rcv_nxt + frag_tot, &ack, &wnd, &fin_rcvd, &rst_rcvd);
- if (!skb)
- goto out;
- if (rst_rcvd) {
- /* rst received in the middle of fpdu */
- for (; i >= 0; i--) {
- skb_unlink(frags[i].skb, &nesqp->pau_list);
- nes_mgt_free_skb(nesdev, frags[i].skb, PCI_DMA_TODEVICE);
- }
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- frags[0].physaddr = cb->busaddr;
- frags[0].physaddr += skb->data - cb->data_start;
- frags[0].frag_len = skb->len;
- frags[0].skb = skb;
- frags[0].cmplt = true;
- frag_cnt = 1;
- break;
- }
-
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- }
- } else {
- /* no data */
- frags[0].physaddr = cb->busaddr;
- frags[0].frag_len = 0;
- frags[0].skb = skb;
- frags[0].cmplt = true;
- frag_cnt = 1;
- }
-
- /* Found one */
- fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC);
- if (!fpdu_info) {
- rc = -ENOMEM;
- goto out;
- }
-
- fpdu_info->cqp_request = nes_get_cqp_request(nesdev);
- if (fpdu_info->cqp_request == NULL) {
- nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
- rc = -ENOMEM;
- goto out;
- }
-
- cb = (struct nes_rskb_cb *)&frags[0].skb->cb[0];
- iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
- tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
- fpdu_info->hdr_len = (((unsigned char *)tcph) + 4 * (tcph->doff)) - cb->data_start;
- fpdu_info->data_len = fpdu_len;
- tot_len = fpdu_info->hdr_len + fpdu_len - ETH_HLEN;
-
- if (frags[0].cmplt) {
- fpdu_info->hdr_pbase = cb->busaddr;
- fpdu_info->hdr_vbase = NULL;
- } else {
- fpdu_info->hdr_vbase = pci_alloc_consistent(nesdev->pcidev,
- fpdu_info->hdr_len, &fpdu_info->hdr_pbase);
- if (!fpdu_info->hdr_vbase) {
- nes_debug(NES_DBG_PAU, "Unable to allocate memory for pau first frag\n");
- rc = -ENOMEM;
- goto out;
- }
-
- /* Copy hdrs, adjusting len and seqnum */
- memcpy(fpdu_info->hdr_vbase, cb->data_start, fpdu_info->hdr_len);
- iph = (struct iphdr *)(fpdu_info->hdr_vbase + ETH_HLEN);
- tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
- }
-
- iph->tot_len = cpu_to_be16(tot_len);
- iph->saddr = cpu_to_be32(0x7f000001);
-
- tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt);
- tcph->ack_seq = cpu_to_be32(ack);
- tcph->window = cpu_to_be16(wnd);
-
- nesqp->pau_rcv_nxt += fpdu_len + fin_rcvd;
-
- memcpy(fpdu_info->frags, frags, sizeof(fpdu_info->frags));
- fpdu_info->frag_cnt = frag_cnt;
- fpdu_info->nesqp = nesqp;
- *pau_fpdu_info = fpdu_info;
-
- /* Update skb's for next pass */
- for (i = 0; i < frag_cnt; i++) {
- cb = (struct nes_rskb_cb *)&frags[i].skb->cb[0];
- skb_pull(frags[i].skb, frags[i].frag_len);
-
- if (frags[i].skb->len == 0) {
- /* Pull skb off the list - it will be freed in the callback */
- if (!skb_queue_empty(&nesqp->pau_list))
- skb_unlink(frags[i].skb, &nesqp->pau_list);
- } else {
- /* Last skb still has data so update the seq */
- iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
- tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
- tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt);
- }
- }
-
-out:
- if (rc) {
- if (fpdu_info) {
- if (fpdu_info->cqp_request)
- nes_put_cqp_request(nesdev, fpdu_info->cqp_request);
- kfree(fpdu_info);
- }
- }
- return rc;
-}
-
-/**
- * forward_fpdu - send complete fpdus, one at a time
- */
-static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
-{
- struct nes_device *nesdev = nesvnic->nesdev;
- struct pau_fpdu_info *fpdu_info;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- unsigned long flags;
- u64 u64tmp;
- u32 u32tmp;
- int rc;
-
- while (1) {
- spin_lock_irqsave(&nesqp->pau_lock, flags);
- rc = get_fpdu_info(nesdev, nesqp, &fpdu_info);
- if (rc || (fpdu_info == NULL)) {
- spin_unlock_irqrestore(&nesqp->pau_lock, flags);
- return rc;
- }
-
- cqp_request = fpdu_info->cqp_request;
- cqp_wqe = &cqp_request->cqp_wqe;
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_OPCODE_IDX,
- NES_CQP_DOWNLOAD_SEGMENT |
- (((u32)nesvnic->logical_port) << NES_CQP_OP_LOGICAL_PORT_SHIFT));
-
- u32tmp = fpdu_info->hdr_len << 16;
- u32tmp |= fpdu_info->hdr_len + (u32)fpdu_info->data_len;
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX,
- u32tmp);
-
- u32tmp = (fpdu_info->frags[1].frag_len << 16) | fpdu_info->frags[0].frag_len;
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_2_1_IDX,
- u32tmp);
-
- u32tmp = (fpdu_info->frags[3].frag_len << 16) | fpdu_info->frags[2].frag_len;
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_4_3_IDX,
- u32tmp);
-
- u64tmp = (u64)fpdu_info->hdr_pbase;
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX,
- lower_32_bits(u64tmp));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_HIGH_IDX,
- upper_32_bits(u64tmp));
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
- lower_32_bits(fpdu_info->frags[0].physaddr));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_HIGH_IDX,
- upper_32_bits(fpdu_info->frags[0].physaddr));
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_LOW_IDX,
- lower_32_bits(fpdu_info->frags[1].physaddr));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_HIGH_IDX,
- upper_32_bits(fpdu_info->frags[1].physaddr));
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_LOW_IDX,
- lower_32_bits(fpdu_info->frags[2].physaddr));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_HIGH_IDX,
- upper_32_bits(fpdu_info->frags[2].physaddr));
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_LOW_IDX,
- lower_32_bits(fpdu_info->frags[3].physaddr));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_HIGH_IDX,
- upper_32_bits(fpdu_info->frags[3].physaddr));
-
- cqp_request->cqp_callback_pointer = fpdu_info;
- cqp_request->callback = 1;
- cqp_request->cqp_callback = nes_download_callback;
-
- atomic_set(&cqp_request->refcount, 1);
- nes_post_cqp_request(nesdev, cqp_request);
- spin_unlock_irqrestore(&nesqp->pau_lock, flags);
- }
-
- return 0;
-}
-
-static void process_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
-{
- int again = 1;
- unsigned long flags;
-
- do {
- /* Ignore rc - if it failed, tcp retries will cause it to try again */
- forward_fpdus(nesvnic, nesqp);
-
- spin_lock_irqsave(&nesqp->pau_lock, flags);
- if (nesqp->pau_pending) {
- nesqp->pau_pending = 0;
- } else {
- nesqp->pau_busy = 0;
- again = 0;
- }
-
- spin_unlock_irqrestore(&nesqp->pau_lock, flags);
- } while (again);
-}
-
-/**
- * queue_fpdus - Handle fpdu's that hw passed up to sw
- */
-static void queue_fpdus(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp)
-{
- struct sk_buff *tmpskb;
- struct nes_rskb_cb *cb;
- struct iphdr *iph;
- struct tcphdr *tcph;
- unsigned char *tcph_end;
- u32 rcv_nxt;
- u32 rcv_wnd;
- u32 seqnum;
- u32 len;
- bool process_it = false;
- unsigned long flags;
-
- /* Move data ptr to after tcp header */
- iph = (struct iphdr *)skb->data;
- tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
- seqnum = be32_to_cpu(tcph->seq);
- tcph_end = (((char *)tcph) + (4 * tcph->doff));
-
- len = be16_to_cpu(iph->tot_len);
- if (skb->len > len)
- skb_trim(skb, len);
- skb_pull(skb, tcph_end - skb->data);
-
- /* Initialize tracking values */
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- cb->seqnum = seqnum;
-
- /* Make sure data is in the receive window */
- rcv_nxt = nesqp->pau_rcv_nxt;
- rcv_wnd = le32_to_cpu(nesqp->nesqp_context->rcv_wnd);
- if (!between(seqnum, rcv_nxt, (rcv_nxt + rcv_wnd))) {
- nes_mgt_free_skb(nesvnic->nesdev, skb, PCI_DMA_TODEVICE);
- nes_rem_ref_cm_node(nesqp->cm_node);
- return;
- }
-
- spin_lock_irqsave(&nesqp->pau_lock, flags);
-
- if (nesqp->pau_busy)
- nesqp->pau_pending = 1;
- else
- nesqp->pau_busy = 1;
-
- /* Queue skb by sequence number */
- if (skb_queue_len(&nesqp->pau_list) == 0) {
- skb_queue_head(&nesqp->pau_list, skb);
- } else {
- tmpskb = nesqp->pau_list.next;
- while (tmpskb != (struct sk_buff *)&nesqp->pau_list) {
- cb = (struct nes_rskb_cb *)&tmpskb->cb[0];
- if (before(seqnum, cb->seqnum))
- break;
- tmpskb = tmpskb->next;
- }
- skb_insert(tmpskb, skb, &nesqp->pau_list);
- }
- if (nesqp->pau_state == PAU_READY)
- process_it = true;
- spin_unlock_irqrestore(&nesqp->pau_lock, flags);
-
- if (process_it)
- process_fpdus(nesvnic, nesqp);
-
- return;
-}
-
-/**
- * mgt_thread - Handle mgt skbs in a safe context
- */
-static int mgt_thread(void *context)
-{
- struct nes_vnic *nesvnic = context;
- struct sk_buff *skb;
- struct nes_rskb_cb *cb;
-
- while (!kthread_should_stop()) {
- wait_event_interruptible(nesvnic->mgt_wait_queue,
- skb_queue_len(&nesvnic->mgt_skb_list) || kthread_should_stop());
- while ((skb_queue_len(&nesvnic->mgt_skb_list)) && !kthread_should_stop()) {
- skb = skb_dequeue(&nesvnic->mgt_skb_list);
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- cb->data_start = skb->data - ETH_HLEN;
- cb->busaddr = pci_map_single(nesvnic->nesdev->pcidev, cb->data_start,
- nesvnic->max_frame_size, PCI_DMA_TODEVICE);
- queue_fpdus(skb, nesvnic, cb->nesqp);
- }
- }
-
- /* Closing down so delete any entries on the queue */
- while (skb_queue_len(&nesvnic->mgt_skb_list)) {
- skb = skb_dequeue(&nesvnic->mgt_skb_list);
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- nes_rem_ref_cm_node(cb->nesqp->cm_node);
- dev_kfree_skb_any(skb);
- }
- return 0;
-}
-
-/**
- * nes_queue_skbs - Queue skb so it can be handled in a thread context
- */
-void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp)
-{
- struct nes_rskb_cb *cb;
-
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- cb->nesqp = nesqp;
- skb_queue_tail(&nesvnic->mgt_skb_list, skb);
- wake_up_interruptible(&nesvnic->mgt_wait_queue);
-}
-
-void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp)
-{
- struct sk_buff *skb;
- unsigned long flags;
- atomic_inc(&pau_qps_destroyed);
-
- /* Free packets that have not yet been forwarded */
- /* Lock is acquired by skb_dequeue when removing the skb */
- spin_lock_irqsave(&nesqp->pau_lock, flags);
- while (skb_queue_len(&nesqp->pau_list)) {
- skb = skb_dequeue(&nesqp->pau_list);
- nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE);
- nes_rem_ref_cm_node(nesqp->cm_node);
- }
- spin_unlock_irqrestore(&nesqp->pau_lock, flags);
-}
-
-static void nes_chg_qh_handler(struct nes_device *nesdev, struct nes_cqp_request *cqp_request)
-{
- struct pau_qh_chg *qh_chg = cqp_request->cqp_callback_pointer;
- struct nes_cqp_request *new_request;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_adapter *nesadapter;
- struct nes_qp *nesqp;
- struct nes_v4_quad nes_quad;
- u32 crc_value;
- u64 u64temp;
-
- nesadapter = nesdev->nesadapter;
- nesqp = qh_chg->nesqp;
-
- /* Should we handle the bad completion */
- if (cqp_request->major_code)
- WARN(1, PFX "Invalid cqp_request major_code=0x%x\n",
- cqp_request->major_code);
-
- switch (nesqp->pau_state) {
- case PAU_DEL_QH:
- /* Old hash code deleted, now set the new one */
- nesqp->pau_state = PAU_ADD_LB_QH;
- new_request = nes_get_cqp_request(nesdev);
- if (new_request == NULL) {
- nes_debug(NES_DBG_PAU, "Failed to get a new_request.\n");
- WARN_ON(1);
- return;
- }
-
- memset(&nes_quad, 0, sizeof(nes_quad));
- nes_quad.DstIpAdrIndex =
- cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
- nes_quad.SrcIpadr = cpu_to_be32(0x7f000001);
- nes_quad.TcpPorts[0] = swab16(nesqp->nesqp_context->tcpPorts[1]);
- nes_quad.TcpPorts[1] = swab16(nesqp->nesqp_context->tcpPorts[0]);
-
- /* Produce hash key */
- crc_value = get_crc_value(&nes_quad);
- nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
- nes_debug(NES_DBG_PAU, "new HTE Index = 0x%08X, CRC = 0x%08X\n",
- nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
-
- nesqp->hte_index &= nesadapter->hte_index_mask;
- nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
- nesqp->nesqp_context->ip0 = cpu_to_le32(0x7f000001);
- nesqp->nesqp_context->rcv_nxt = cpu_to_le32(nesqp->pau_rcv_nxt);
-
- cqp_wqe = &new_request->cqp_wqe;
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words,
- NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH |
- NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
- u64temp = (u64)nesqp->nesqp_context_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
-
- nes_debug(NES_DBG_PAU, "Waiting for CQP completion for adding the quad hash.\n");
-
- new_request->cqp_callback_pointer = qh_chg;
- new_request->callback = 1;
- new_request->cqp_callback = nes_chg_qh_handler;
- atomic_set(&new_request->refcount, 1);
- nes_post_cqp_request(nesdev, new_request);
- break;
-
- case PAU_ADD_LB_QH:
- /* Start processing the queued fpdu's */
- nesqp->pau_state = PAU_READY;
- process_fpdus(qh_chg->nesvnic, qh_chg->nesqp);
- kfree(qh_chg);
- break;
- }
-}
-
-/**
- * nes_change_quad_hash
- */
-static int nes_change_quad_hash(struct nes_device *nesdev,
- struct nes_vnic *nesvnic, struct nes_qp *nesqp)
-{
- struct nes_cqp_request *cqp_request = NULL;
- struct pau_qh_chg *qh_chg = NULL;
- u64 u64temp;
- struct nes_hw_cqp_wqe *cqp_wqe;
- int ret = 0;
-
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
- ret = -ENOMEM;
- goto chg_qh_err;
- }
-
- qh_chg = kmalloc(sizeof *qh_chg, GFP_ATOMIC);
- if (!qh_chg) {
- ret = -ENOMEM;
- goto chg_qh_err;
- }
- qh_chg->nesdev = nesdev;
- qh_chg->nesvnic = nesvnic;
- qh_chg->nesqp = nesqp;
- nesqp->pau_state = PAU_DEL_QH;
-
- cqp_wqe = &cqp_request->cqp_wqe;
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words,
- NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH | NES_CQP_QP_DEL_HTE |
- NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
- u64temp = (u64)nesqp->nesqp_context_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
-
- nes_debug(NES_DBG_PAU, "Waiting for CQP completion for deleting the quad hash.\n");
-
- cqp_request->cqp_callback_pointer = qh_chg;
- cqp_request->callback = 1;
- cqp_request->cqp_callback = nes_chg_qh_handler;
- atomic_set(&cqp_request->refcount, 1);
- nes_post_cqp_request(nesdev, cqp_request);
-
- return ret;
-
-chg_qh_err:
- kfree(qh_chg);
- if (cqp_request)
- nes_put_cqp_request(nesdev, cqp_request);
- return ret;
-}
-
-/**
- * nes_mgt_ce_handler
- * This management code deals with any packed and unaligned (pau) fpdu's
- * that the hardware cannot handle.
- */
-static void nes_mgt_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
-{
- struct nes_vnic_mgt *mgtvnic = container_of(cq, struct nes_vnic_mgt, mgt_cq);
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 head;
- u32 cq_size;
- u32 cqe_count = 0;
- u32 cqe_misc;
- u32 qp_id = 0;
- u32 skbs_needed;
- unsigned long context;
- struct nes_qp *nesqp;
- struct sk_buff *rx_skb;
- struct nes_rskb_cb *cb;
-
- head = cq->cq_head;
- cq_size = cq->cq_size;
-
- while (1) {
- cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]);
- if (!(cqe_misc & NES_NIC_CQE_VALID))
- break;
-
- nesqp = NULL;
- if (cqe_misc & NES_NIC_CQE_ACCQP_VALID) {
- qp_id = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_ACCQP_ID_IDX]);
- qp_id &= 0x001fffff;
- if (qp_id < nesadapter->max_qp) {
- context = (unsigned long)nesadapter->qp_table[qp_id - NES_FIRST_QPN];
- nesqp = (struct nes_qp *)context;
- }
- }
-
- if (nesqp) {
- if (nesqp->pau_mode == false) {
- nesqp->pau_mode = true; /* First time for this qp */
- nesqp->pau_rcv_nxt = le32_to_cpu(
- cq->cq_vbase[head].cqe_words[NES_NIC_CQE_HASH_RCVNXT]);
- skb_queue_head_init(&nesqp->pau_list);
- spin_lock_init(&nesqp->pau_lock);
- atomic_inc(&pau_qps_created);
- nes_change_quad_hash(nesdev, mgtvnic->nesvnic, nesqp);
- }
-
- rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail];
- rx_skb->len = 0;
- skb_put(rx_skb, cqe_misc & 0x0000ffff);
- rx_skb->protocol = eth_type_trans(rx_skb, mgtvnic->nesvnic->netdev);
- cb = (struct nes_rskb_cb *)&rx_skb->cb[0];
- pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, PCI_DMA_FROMDEVICE);
- cb->busaddr = 0;
- mgtvnic->mgt.rq_tail++;
- mgtvnic->mgt.rq_tail &= mgtvnic->mgt.rq_size - 1;
-
- nes_add_ref_cm_node(nesqp->cm_node);
- nes_queue_mgt_skbs(rx_skb, mgtvnic->nesvnic, nesqp);
- } else {
- printk(KERN_ERR PFX "Invalid QP %d for packed/unaligned handling\n", qp_id);
- }
-
- cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0;
- cqe_count++;
- if (++head >= cq_size)
- head = 0;
-
- if (cqe_count == 255) {
- /* Replenish mgt CQ */
- nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (cqe_count << 16));
- nesdev->currcq_count += cqe_count;
- cqe_count = 0;
- }
-
- skbs_needed = atomic_inc_return(&mgtvnic->rx_skbs_needed);
- if (skbs_needed > (mgtvnic->mgt.rq_size >> 1))
- nes_replenish_mgt_rq(mgtvnic);
- }
-
- cq->cq_head = head;
- nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
- cq->cq_number | (cqe_count << 16));
- nes_read32(nesdev->regs + NES_CQE_ALLOC);
- nesdev->currcq_count += cqe_count;
-}
-
-/**
- * nes_init_mgt_qp
- */
-int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic)
-{
- struct nes_vnic_mgt *mgtvnic;
- u32 counter;
- void *vmem;
- dma_addr_t pmem;
- struct nes_hw_cqp_wqe *cqp_wqe;
- u32 cqp_head;
- unsigned long flags;
- struct nes_hw_nic_qp_context *mgt_context;
- u64 u64temp;
- struct nes_hw_nic_rq_wqe *mgt_rqe;
- struct sk_buff *skb;
- u32 wqe_count;
- struct nes_rskb_cb *cb;
- u32 mgt_mem_size;
- void *mgt_vbase;
- dma_addr_t mgt_pbase;
- int i;
- int ret;
-
- /* Allocate space the all mgt QPs once */
- mgtvnic = kzalloc(NES_MGT_QP_COUNT * sizeof(struct nes_vnic_mgt), GFP_KERNEL);
- if (!mgtvnic)
- return -ENOMEM;
-
- /* Allocate fragment, RQ, and CQ; Reuse CEQ based on the PCI function */
- /* We are not sending from this NIC so sq is not allocated */
- mgt_mem_size = 256 +
- (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)) +
- (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_cqe)) +
- sizeof(struct nes_hw_nic_qp_context);
- mgt_mem_size = (mgt_mem_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
- mgt_vbase = pci_alloc_consistent(nesdev->pcidev, NES_MGT_QP_COUNT * mgt_mem_size, &mgt_pbase);
- if (!mgt_vbase) {
- kfree(mgtvnic);
- nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt host descriptor rings\n");
- return -ENOMEM;
- }
-
- nesvnic->mgt_mem_size = NES_MGT_QP_COUNT * mgt_mem_size;
- nesvnic->mgt_vbase = mgt_vbase;
- nesvnic->mgt_pbase = mgt_pbase;
-
- skb_queue_head_init(&nesvnic->mgt_skb_list);
- init_waitqueue_head(&nesvnic->mgt_wait_queue);
- nesvnic->mgt_thread = kthread_run(mgt_thread, nesvnic, "nes_mgt_thread");
-
- for (i = 0; i < NES_MGT_QP_COUNT; i++) {
- mgtvnic->nesvnic = nesvnic;
- mgtvnic->mgt.qp_id = nesdev->mac_index + NES_MGT_QP_OFFSET + i;
- memset(mgt_vbase, 0, mgt_mem_size);
- nes_debug(NES_DBG_INIT, "Allocated mgt QP structures at %p (phys = %016lX), size = %u.\n",
- mgt_vbase, (unsigned long)mgt_pbase, mgt_mem_size);
-
- vmem = (void *)(((unsigned long)mgt_vbase + (256 - 1)) &
- ~(unsigned long)(256 - 1));
- pmem = (dma_addr_t)(((unsigned long long)mgt_pbase + (256 - 1)) &
- ~(unsigned long long)(256 - 1));
-
- spin_lock_init(&mgtvnic->mgt.rq_lock);
-
- /* setup the RQ */
- mgtvnic->mgt.rq_vbase = vmem;
- mgtvnic->mgt.rq_pbase = pmem;
- mgtvnic->mgt.rq_head = 0;
- mgtvnic->mgt.rq_tail = 0;
- mgtvnic->mgt.rq_size = NES_MGT_WQ_COUNT;
-
- /* setup the CQ */
- vmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe));
- pmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe));
-
- mgtvnic->mgt_cq.cq_number = mgtvnic->mgt.qp_id;
- mgtvnic->mgt_cq.cq_vbase = vmem;
- mgtvnic->mgt_cq.cq_pbase = pmem;
- mgtvnic->mgt_cq.cq_head = 0;
- mgtvnic->mgt_cq.cq_size = NES_MGT_WQ_COUNT;
-
- mgtvnic->mgt_cq.ce_handler = nes_mgt_ce_handler;
-
- /* Send CreateCQ request to CQP */
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- cqp_head = nesdev->cqp.sq_head;
-
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(
- NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
- ((u32)mgtvnic->mgt_cq.cq_size << 16));
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(
- mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16));
- u64temp = (u64)mgtvnic->mgt_cq.cq_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
- u64temp = (unsigned long)&mgtvnic->mgt_cq;
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1));
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
- cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
-
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- /* Send CreateQP request to CQP */
- mgt_context = (void *)(&mgtvnic->mgt_cq.cq_vbase[mgtvnic->mgt_cq.cq_size]);
- mgt_context->context_words[NES_NIC_CTX_MISC_IDX] =
- cpu_to_le32((u32)NES_MGT_CTX_SIZE |
- ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12));
- nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n",
- nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE),
- nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE));
- if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0)
- mgt_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE);
-
- u64temp = (u64)mgtvnic->mgt.rq_pbase;
- mgt_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
- mgt_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
- u64temp = (u64)mgtvnic->mgt.rq_pbase;
- mgt_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
- mgt_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
-
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP |
- NES_CQP_QP_TYPE_NIC);
- cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(mgtvnic->mgt.qp_id);
- u64temp = (u64)mgtvnic->mgt_cq.cq_pbase +
- (mgtvnic->mgt_cq.cq_size * sizeof(struct nes_hw_nic_cqe));
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
-
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
- nesdev->cqp.sq_head = cqp_head;
-
- barrier();
-
- /* Ring doorbell (2 WQEs) */
- nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
-
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- nes_debug(NES_DBG_INIT, "Waiting for create MGT QP%u to complete.\n",
- mgtvnic->mgt.qp_id);
-
- ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_INIT, "Create MGT QP%u completed, wait_event_timeout ret = %u.\n",
- mgtvnic->mgt.qp_id, ret);
- if (!ret) {
- nes_debug(NES_DBG_INIT, "MGT QP%u create timeout expired\n", mgtvnic->mgt.qp_id);
- if (i == 0) {
- pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase,
- nesvnic->mgt_pbase);
- kfree(mgtvnic);
- } else {
- nes_destroy_mgt(nesvnic);
- }
- return -EIO;
- }
-
- /* Populate the RQ */
- for (counter = 0; counter < (NES_MGT_WQ_COUNT - 1); counter++) {
- skb = dev_alloc_skb(nesvnic->max_frame_size);
- if (!skb) {
- nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name);
- return -ENOMEM;
- }
-
- skb->dev = netdev;
-
- pmem = pci_map_single(nesdev->pcidev, skb->data,
- nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
- cb = (struct nes_rskb_cb *)&skb->cb[0];
- cb->busaddr = pmem;
- cb->maplen = nesvnic->max_frame_size;
-
- mgt_rqe = &mgtvnic->mgt.rq_vbase[counter];
- mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32((u32)nesvnic->max_frame_size);
- mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
- mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem);
- mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32));
- mgtvnic->mgt.rx_skb[counter] = skb;
- }
-
- init_timer(&mgtvnic->rq_wqes_timer);
- mgtvnic->rq_wqes_timer.function = nes_mgt_rq_wqes_timeout;
- mgtvnic->rq_wqes_timer.data = (unsigned long)mgtvnic;
-
- wqe_count = NES_MGT_WQ_COUNT - 1;
- mgtvnic->mgt.rq_head = wqe_count;
- barrier();
- do {
- counter = min(wqe_count, ((u32)255));
- wqe_count -= counter;
- nes_write32(nesdev->regs + NES_WQE_ALLOC, (counter << 24) | mgtvnic->mgt.qp_id);
- } while (wqe_count);
-
- nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
- mgtvnic->mgt_cq.cq_number);
- nes_read32(nesdev->regs + NES_CQE_ALLOC);
-
- mgt_vbase += mgt_mem_size;
- mgt_pbase += mgt_mem_size;
- nesvnic->mgtvnic[i] = mgtvnic++;
- }
- return 0;
-}
-
-
-void nes_destroy_mgt(struct nes_vnic *nesvnic)
-{
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_vnic_mgt *mgtvnic;
- struct nes_vnic_mgt *first_mgtvnic;
- unsigned long flags;
- struct nes_hw_cqp_wqe *cqp_wqe;
- u32 cqp_head;
- struct sk_buff *rx_skb;
- int i;
- int ret;
-
- kthread_stop(nesvnic->mgt_thread);
-
- /* Free remaining NIC receive buffers */
- first_mgtvnic = nesvnic->mgtvnic[0];
- for (i = 0; i < NES_MGT_QP_COUNT; i++) {
- mgtvnic = nesvnic->mgtvnic[i];
- if (mgtvnic == NULL)
- continue;
-
- while (mgtvnic->mgt.rq_head != mgtvnic->mgt.rq_tail) {
- rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail];
- nes_mgt_free_skb(nesdev, rx_skb, PCI_DMA_FROMDEVICE);
- mgtvnic->mgt.rq_tail++;
- mgtvnic->mgt.rq_tail &= (mgtvnic->mgt.rq_size - 1);
- }
-
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
-
- /* Destroy NIC QP */
- cqp_head = nesdev->cqp.sq_head;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- mgtvnic->mgt.qp_id);
-
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
-
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
-
- /* Destroy NIC CQ */
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- (NES_CQP_DESTROY_CQ | ((u32)mgtvnic->mgt_cq.cq_size << 16)));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- (mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16)));
-
- if (++cqp_head >= nesdev->cqp.sq_size)
- cqp_head = 0;
-
- nesdev->cqp.sq_head = cqp_head;
- barrier();
-
- /* Ring doorbell (2 WQEs) */
- nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
-
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u,"
- " cqp.sq_tail=%u, cqp.sq_size=%u\n",
- cqp_head, nesdev->cqp.sq_head,
- nesdev->cqp.sq_tail, nesdev->cqp.sq_size);
-
- ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
- NES_EVENT_TIMEOUT);
-
- nes_debug(NES_DBG_SHUTDOWN, "Destroy MGT QP returned, wait_event_timeout ret = %u, cqp_head=%u,"
- " cqp.sq_head=%u, cqp.sq_tail=%u\n",
- ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
- if (!ret)
- nes_debug(NES_DBG_SHUTDOWN, "MGT QP%u destroy timeout expired\n",
- mgtvnic->mgt.qp_id);
-
- nesvnic->mgtvnic[i] = NULL;
- }
-
- if (nesvnic->mgt_vbase) {
- pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase,
- nesvnic->mgt_pbase);
- nesvnic->mgt_vbase = NULL;
- nesvnic->mgt_pbase = 0;
- }
-
- kfree(first_mgtvnic);
-}
diff --git a/drivers/infiniband/hw/nes/nes_mgt.h b/drivers/infiniband/hw/nes/nes_mgt.h
deleted file mode 100644
index 4f7f701c4a81..000000000000
--- a/drivers/infiniband/hw/nes/nes_mgt.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
-* Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved.
-*
-* This software is available to you under a choice of one of two
-* licenses. You may choose to be licensed under the terms of the GNU
-* General Public License (GPL) Version 2, available from the file
-* COPYING in the main directory of this source tree, or the
-* OpenIB.org BSD license below:
-*
-* Redistribution and use in source and binary forms, with or
-* without modification, are permitted provided that the following
-* conditions are met:
-*
-* - Redistributions of source code must retain the above
-* copyright notice, this list of conditions and the following
-* disclaimer.
-*
-* - Redistributions in binary form must reproduce the above
-* copyright notice, this list of conditions and the following
-* disclaimer in the documentation and/or other materials
-* provided with the distribution.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*/
-
-#ifndef __NES_MGT_H
-#define __NES_MGT_H
-
-#define MPA_FRAMING 6 /* length is 2 bytes, crc is 4 bytes */
-
-int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic);
-void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp);
-void nes_destroy_mgt(struct nes_vnic *nesvnic);
-void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp);
-
-struct nes_hw_mgt {
- struct nes_hw_nic_rq_wqe *rq_vbase; /* virtual address of rq */
- dma_addr_t rq_pbase; /* PCI memory for host rings */
- struct sk_buff *rx_skb[NES_NIC_WQ_SIZE];
- u16 qp_id;
- u16 sq_head;
- u16 rq_head;
- u16 rq_tail;
- u16 rq_size;
- u8 replenishing_rq;
- u8 reserved;
- spinlock_t rq_lock;
-};
-
-struct nes_vnic_mgt {
- struct nes_vnic *nesvnic;
- struct nes_hw_mgt mgt;
- struct nes_hw_nic_cq mgt_cq;
- atomic_t rx_skbs_needed;
- struct timer_list rq_wqes_timer;
- atomic_t rx_skb_timer_running;
-};
-
-#define MAX_FPDU_FRAGS 4
-struct pau_fpdu_frag {
- struct sk_buff *skb;
- u64 physaddr;
- u32 frag_len;
- bool cmplt;
-};
-
-struct pau_fpdu_info {
- struct nes_qp *nesqp;
- struct nes_cqp_request *cqp_request;
- void *hdr_vbase;
- dma_addr_t hdr_pbase;
- int hdr_len;
- u16 data_len;
- u16 frag_cnt;
- struct pau_fpdu_frag frags[MAX_FPDU_FRAGS];
-};
-
-enum pau_qh_state {
- PAU_DEL_QH,
- PAU_ADD_LB_QH,
- PAU_READY
-};
-
-struct pau_qh_chg {
- struct nes_device *nesdev;
- struct nes_vnic *nesvnic;
- struct nes_qp *nesqp;
-};
-
-#endif /* __NES_MGT_H */
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
deleted file mode 100644
index 5921ea3d50ae..000000000000
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ /dev/null
@@ -1,1874 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/if_arp.h>
-#include <linux/if_vlan.h>
-#include <linux/ethtool.h>
-#include <linux/slab.h>
-#include <net/tcp.h>
-
-#include <net/inet_common.h>
-#include <linux/inet.h>
-
-#include "nes.h"
-
-static struct nic_qp_map nic_qp_mapping_0[] = {
- {16,0,0,1},{24,4,0,0},{28,8,0,0},{32,12,0,0},
- {20,2,2,1},{26,6,2,0},{30,10,2,0},{34,14,2,0},
- {18,1,1,1},{25,5,1,0},{29,9,1,0},{33,13,1,0},
- {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0}
-};
-
-static struct nic_qp_map nic_qp_mapping_1[] = {
- {18,1,1,1},{25,5,1,0},{29,9,1,0},{33,13,1,0},
- {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0}
-};
-
-static struct nic_qp_map nic_qp_mapping_2[] = {
- {20,2,2,1},{26,6,2,0},{30,10,2,0},{34,14,2,0}
-};
-
-static struct nic_qp_map nic_qp_mapping_3[] = {
- {22,3,3,1},{27,7,3,0},{31,11,3,0},{35,15,3,0}
-};
-
-static struct nic_qp_map nic_qp_mapping_4[] = {
- {28,8,0,0},{32,12,0,0}
-};
-
-static struct nic_qp_map nic_qp_mapping_5[] = {
- {29,9,1,0},{33,13,1,0}
-};
-
-static struct nic_qp_map nic_qp_mapping_6[] = {
- {30,10,2,0},{34,14,2,0}
-};
-
-static struct nic_qp_map nic_qp_mapping_7[] = {
- {31,11,3,0},{35,15,3,0}
-};
-
-static struct nic_qp_map *nic_qp_mapping_per_function[] = {
- nic_qp_mapping_0, nic_qp_mapping_1, nic_qp_mapping_2, nic_qp_mapping_3,
- nic_qp_mapping_4, nic_qp_mapping_5, nic_qp_mapping_6, nic_qp_mapping_7
-};
-
-static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
- | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
-static int debug = -1;
-static int nics_per_function = 1;
-
-/**
- * nes_netdev_poll
- */
-static int nes_netdev_poll(struct napi_struct *napi, int budget)
-{
- struct nes_vnic *nesvnic = container_of(napi, struct nes_vnic, napi);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_hw_nic_cq *nescq = &nesvnic->nic_cq;
-
- nesvnic->budget = budget;
- nescq->cqes_pending = 0;
- nescq->rx_cqes_completed = 0;
- nescq->cqe_allocs_pending = 0;
- nescq->rx_pkts_indicated = 0;
-
- nes_nic_ce_handler(nesdev, nescq);
-
- if (nescq->cqes_pending == 0) {
- napi_complete(napi);
- /* clear out completed cqes and arm */
- nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
- nescq->cq_number | (nescq->cqe_allocs_pending << 16));
- nes_read32(nesdev->regs+NES_CQE_ALLOC);
- } else {
- /* clear out completed cqes but don't arm */
- nes_write32(nesdev->regs+NES_CQE_ALLOC,
- nescq->cq_number | (nescq->cqe_allocs_pending << 16));
- nes_debug(NES_DBG_NETDEV, "%s: exiting with work pending\n",
- nesvnic->netdev->name);
- }
- return nescq->rx_pkts_indicated;
-}
-
-
-/**
- * nes_netdev_open - Activate the network interface; ifconfig
- * ethx up.
- */
-static int nes_netdev_open(struct net_device *netdev)
-{
- u32 macaddr_low;
- u16 macaddr_high;
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- int ret;
- int i;
- struct nes_vnic *first_nesvnic = NULL;
- u32 nic_active_bit;
- u32 nic_active;
- struct list_head *list_pos, *list_temp;
- unsigned long flags;
-
- assert(nesdev != NULL);
-
- if (nesvnic->netdev_open == 1)
- return 0;
-
- if (netif_msg_ifup(nesvnic))
- printk(KERN_INFO PFX "%s: enabling interface\n", netdev->name);
-
- ret = nes_init_nic_qp(nesdev, netdev);
- if (ret) {
- return ret;
- }
-
- netif_carrier_off(netdev);
- netif_stop_queue(netdev);
-
- if ((!nesvnic->of_device_registered) && (nesvnic->rdma_enabled)) {
- nesvnic->nesibdev = nes_init_ofa_device(netdev);
- if (nesvnic->nesibdev == NULL) {
- printk(KERN_ERR PFX "%s: nesvnic->nesibdev alloc failed", netdev->name);
- } else {
- nesvnic->nesibdev->nesvnic = nesvnic;
- ret = nes_register_ofa_device(nesvnic->nesibdev);
- if (ret) {
- printk(KERN_ERR PFX "%s: Unable to register RDMA device, ret = %d\n",
- netdev->name, ret);
- }
- }
- }
- /* Set packet filters */
- nic_active_bit = 1 << nesvnic->nic_index;
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_ACTIVE);
- nic_active |= nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_ACTIVE, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE);
- nic_active |= nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON);
- nic_active |= nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active);
-
- macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
- macaddr_high += (u16)netdev->dev_addr[1];
-
- macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
- macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
- macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
- macaddr_low += (u32)netdev->dev_addr[5];
-
- /* Program the various MAC regs */
- for (i = 0; i < NES_MAX_PORT_COUNT; i++) {
- if (nesvnic->qp_nic_index[i] == 0xf) {
- break;
- }
- nes_debug(NES_DBG_NETDEV, "i=%d, perfect filter table index= %d, PERF FILTER LOW"
- " (Addr:%08X) = %08X, HIGH = %08X.\n",
- i, nesvnic->qp_nic_index[i],
- NES_IDX_PERFECT_FILTER_LOW+
- (nesvnic->qp_nic_index[i] * 8),
- macaddr_low,
- (u32)macaddr_high | NES_MAC_ADDR_VALID |
- ((((u32)nesvnic->nic_index) << 16)));
- nes_write_indexed(nesdev,
- NES_IDX_PERFECT_FILTER_LOW + (nesvnic->qp_nic_index[i] * 8),
- macaddr_low);
- nes_write_indexed(nesdev,
- NES_IDX_PERFECT_FILTER_HIGH + (nesvnic->qp_nic_index[i] * 8),
- (u32)macaddr_high | NES_MAC_ADDR_VALID |
- ((((u32)nesvnic->nic_index) << 16)));
- }
-
-
- nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
- nesvnic->nic_cq.cq_number);
- nes_read32(nesdev->regs+NES_CQE_ALLOC);
- list_for_each_safe(list_pos, list_temp, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]) {
- first_nesvnic = container_of(list_pos, struct nes_vnic, list);
- if (first_nesvnic->netdev_open == 1)
- break;
- }
- if (first_nesvnic->netdev_open == 0) {
- nes_debug(NES_DBG_INIT, "Setting up MAC interrupt mask.\n");
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK + (0x200 * nesdev->mac_index),
- ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT |
- NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR));
- first_nesvnic = nesvnic;
- }
-
- if (first_nesvnic->linkup) {
- /* Enable network packets */
- nesvnic->linkup = 1;
- netif_start_queue(netdev);
- netif_carrier_on(netdev);
- }
-
- spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
- if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_SFP_D) {
- nesdev->link_recheck = 1;
- mod_delayed_work(system_wq, &nesdev->work,
- NES_LINK_RECHECK_DELAY);
- }
- spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
-
- spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags);
- if (nesvnic->of_device_registered) {
- nesdev->nesadapter->send_term_ok = 1;
- if (nesvnic->linkup == 1) {
- if (nesdev->iw_status == 0) {
- nesdev->iw_status = 1;
- nes_port_ibevent(nesvnic);
- }
- } else {
- nesdev->iw_status = 0;
- }
- }
- spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags);
-
- napi_enable(&nesvnic->napi);
- nesvnic->netdev_open = 1;
-
- return 0;
-}
-
-
-/**
- * nes_netdev_stop
- */
-static int nes_netdev_stop(struct net_device *netdev)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- u32 nic_active_mask;
- u32 nic_active;
- struct nes_vnic *first_nesvnic = NULL;
- struct list_head *list_pos, *list_temp;
- unsigned long flags;
-
- nes_debug(NES_DBG_SHUTDOWN, "nesvnic=%p, nesdev=%p, netdev=%p %s\n",
- nesvnic, nesdev, netdev, netdev->name);
- if (nesvnic->netdev_open == 0)
- return 0;
-
- if (netif_msg_ifdown(nesvnic))
- printk(KERN_INFO PFX "%s: disabling interface\n", netdev->name);
- netif_carrier_off(netdev);
-
- /* Disable network packets */
- napi_disable(&nesvnic->napi);
- netif_stop_queue(netdev);
- list_for_each_safe(list_pos, list_temp, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]) {
- first_nesvnic = container_of(list_pos, struct nes_vnic, list);
- if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic))
- break;
- }
-
- if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic) &&
- (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) !=
- PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) {
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+
- (0x200*nesdev->mac_index), 0xffffffff);
- nes_write_indexed(first_nesvnic->nesdev,
- NES_IDX_MAC_INT_MASK+
- (0x200*first_nesvnic->nesdev->mac_index),
- ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT |
- NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR));
- } else {
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff);
- }
-
- nic_active_mask = ~((u32)(1 << nesvnic->nic_index));
- nes_write_indexed(nesdev, NES_IDX_PERFECT_FILTER_HIGH+
- (nesvnic->perfect_filter_index*8), 0);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_ACTIVE);
- nic_active &= nic_active_mask;
- nes_write_indexed(nesdev, NES_IDX_NIC_ACTIVE, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
- nic_active &= nic_active_mask;
- nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE);
- nic_active &= nic_active_mask;
- nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ENABLE, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
- nic_active &= nic_active_mask;
- nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON);
- nic_active &= nic_active_mask;
- nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active);
-
- spin_lock_irqsave(&nesvnic->port_ibevent_lock, flags);
- if (nesvnic->of_device_registered) {
- nesdev->nesadapter->send_term_ok = 0;
- nesdev->iw_status = 0;
- if (nesvnic->linkup == 1)
- nes_port_ibevent(nesvnic);
- }
- del_timer_sync(&nesvnic->event_timer);
- nesvnic->event_timer.function = NULL;
- spin_unlock_irqrestore(&nesvnic->port_ibevent_lock, flags);
-
- nes_destroy_nic_qp(nesvnic);
-
- nesvnic->netdev_open = 0;
-
- return 0;
-}
-
-
-/**
- * nes_nic_send
- */
-static bool nes_nic_send(struct sk_buff *skb, struct net_device *netdev)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_hw_nic *nesnic = &nesvnic->nic;
- struct nes_hw_nic_sq_wqe *nic_sqe;
- struct tcphdr *tcph;
- __le16 *wqe_fragment_length;
- u32 wqe_misc;
- u16 wqe_fragment_index = 1; /* first fragment (0) is used by copy buffer */
- u16 skb_fragment_index;
- dma_addr_t bus_address;
-
- nic_sqe = &nesnic->sq_vbase[nesnic->sq_head];
- wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
-
- /* setup the VLAN tag if present */
- if (skb_vlan_tag_present(skb)) {
- nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n",
- netdev->name, skb_vlan_tag_get(skb));
- wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE;
- wqe_fragment_length[0] = (__force __le16) skb_vlan_tag_get(skb);
- } else
- wqe_misc = 0;
-
- /* bump past the vlan tag */
- wqe_fragment_length++;
- /* wqe_fragment_address = (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; */
- wqe_misc |= NES_NIC_SQ_WQE_COMPLETION;
-
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- if (skb_is_gso(skb)) {
- tcph = tcp_hdr(skb);
- /* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... is_gso = %u seg size = %u\n",
- netdev->name, skb_is_gso(skb), skb_shinfo(skb)->gso_size); */
- wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE | (u16)skb_shinfo(skb)->gso_size;
- set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX,
- ((u32)tcph->doff) |
- (((u32)(((unsigned char *)tcph) - skb->data)) << 4));
- }
- } else { /* CHECKSUM_HW */
- wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM;
- }
-
- set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX,
- skb->len);
- memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer,
- skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE), skb_headlen(skb)));
- wqe_fragment_length[0] = cpu_to_le16(min(((unsigned int)NES_FIRST_FRAG_SIZE),
- skb_headlen(skb)));
- wqe_fragment_length[1] = 0;
- if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) {
- if ((skb_shinfo(skb)->nr_frags + 1) > 4) {
- nes_debug(NES_DBG_NIC_TX, "%s: Packet with %u fragments not sent, skb_headlen=%u\n",
- netdev->name, skb_shinfo(skb)->nr_frags + 2, skb_headlen(skb));
- kfree_skb(skb);
- nesvnic->tx_sw_dropped++;
- return false;
- }
- set_bit(nesnic->sq_head, nesnic->first_frag_overflow);
- bus_address = pci_map_single(nesdev->pcidev, skb->data + NES_FIRST_FRAG_SIZE,
- skb_headlen(skb) - NES_FIRST_FRAG_SIZE, PCI_DMA_TODEVICE);
- wqe_fragment_length[wqe_fragment_index++] =
- cpu_to_le16(skb_headlen(skb) - NES_FIRST_FRAG_SIZE);
- wqe_fragment_length[wqe_fragment_index] = 0;
- set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
- ((u64)(bus_address)));
- nesnic->tx_skb[nesnic->sq_head] = skb;
- }
-
- if (skb_headlen(skb) == skb->len) {
- if (skb_headlen(skb) <= NES_FIRST_FRAG_SIZE) {
- nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_2_1_IDX] = 0;
- nesnic->tx_skb[nesnic->sq_head] = skb;
- }
- } else {
- /* Deal with Fragments */
- nesnic->tx_skb[nesnic->sq_head] = skb;
- for (skb_fragment_index = 0; skb_fragment_index < skb_shinfo(skb)->nr_frags;
- skb_fragment_index++) {
- skb_frag_t *frag =
- &skb_shinfo(skb)->frags[skb_fragment_index];
- bus_address = skb_frag_dma_map(&nesdev->pcidev->dev,
- frag, 0, skb_frag_size(frag),
- DMA_TO_DEVICE);
- wqe_fragment_length[wqe_fragment_index] =
- cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[skb_fragment_index]));
- set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index),
- bus_address);
- wqe_fragment_index++;
- if (wqe_fragment_index < 5)
- wqe_fragment_length[wqe_fragment_index] = 0;
- }
- }
-
- set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_MISC_IDX, wqe_misc);
- nesnic->sq_head++;
- nesnic->sq_head &= nesnic->sq_size - 1;
- return true;
-}
-
-
-/**
- * nes_netdev_start_xmit
- */
-static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_hw_nic *nesnic = &nesvnic->nic;
- struct nes_hw_nic_sq_wqe *nic_sqe;
- struct tcphdr *tcph;
- /* struct udphdr *udph; */
-#define NES_MAX_TSO_FRAGS MAX_SKB_FRAGS
- /* 64K segment plus overflow on each side */
- dma_addr_t tso_bus_address[NES_MAX_TSO_FRAGS];
- dma_addr_t bus_address;
- u32 tso_frag_index;
- u32 tso_frag_count;
- u32 tso_wqe_length;
- u32 curr_tcp_seq;
- u32 wqe_count=1;
- struct iphdr *iph;
- __le16 *wqe_fragment_length;
- u32 nr_frags;
- u32 original_first_length;
- /* u64 *wqe_fragment_address; */
- /* first fragment (0) is used by copy buffer */
- u16 wqe_fragment_index=1;
- u16 hoffset;
- u16 nhoffset;
- u16 wqes_needed;
- u16 wqes_available;
- u32 wqe_misc;
-
- /*
- * nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
- * " (%u frags), tso_size=%u\n",
- * netdev->name, skb->len, skb_headlen(skb),
- * skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
- */
-
- if (netif_queue_stopped(netdev))
- return NETDEV_TX_BUSY;
-
- /* Check if SQ is full */
- if ((((nesnic->sq_tail+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) == 1) {
- if (!netif_queue_stopped(netdev)) {
- netif_stop_queue(netdev);
- barrier();
- if ((((((volatile u16)nesnic->sq_tail)+(nesnic->sq_size*2))-nesnic->sq_head) & (nesnic->sq_size - 1)) != 1) {
- netif_start_queue(netdev);
- goto sq_no_longer_full;
- }
- }
- nesvnic->sq_full++;
- return NETDEV_TX_BUSY;
- }
-
-sq_no_longer_full:
- nr_frags = skb_shinfo(skb)->nr_frags;
- if (skb_headlen(skb) > NES_FIRST_FRAG_SIZE) {
- nr_frags++;
- }
- /* Check if too many fragments */
- if (unlikely((nr_frags > 4))) {
- if (skb_is_gso(skb)) {
- nesvnic->segmented_tso_requests++;
- nesvnic->tso_requests++;
- /* Basically 4 fragments available per WQE with extended fragments */
- wqes_needed = nr_frags >> 2;
- wqes_needed += (nr_frags&3)?1:0;
- wqes_available = (((nesnic->sq_tail+nesnic->sq_size)-nesnic->sq_head) - 1) &
- (nesnic->sq_size - 1);
-
- if (unlikely(wqes_needed > wqes_available)) {
- if (!netif_queue_stopped(netdev)) {
- netif_stop_queue(netdev);
- barrier();
- wqes_available = (((((volatile u16)nesnic->sq_tail)+nesnic->sq_size)-nesnic->sq_head) - 1) &
- (nesnic->sq_size - 1);
- if (wqes_needed <= wqes_available) {
- netif_start_queue(netdev);
- goto tso_sq_no_longer_full;
- }
- }
- nesvnic->sq_full++;
- nes_debug(NES_DBG_NIC_TX, "%s: HNIC SQ full- TSO request has too many frags!\n",
- netdev->name);
- return NETDEV_TX_BUSY;
- }
-tso_sq_no_longer_full:
- /* Map all the buffers */
- for (tso_frag_count=0; tso_frag_count < skb_shinfo(skb)->nr_frags;
- tso_frag_count++) {
- skb_frag_t *frag =
- &skb_shinfo(skb)->frags[tso_frag_count];
- tso_bus_address[tso_frag_count] =
- skb_frag_dma_map(&nesdev->pcidev->dev,
- frag, 0, skb_frag_size(frag),
- DMA_TO_DEVICE);
- }
-
- tso_frag_index = 0;
- curr_tcp_seq = ntohl(tcp_hdr(skb)->seq);
- hoffset = skb_transport_header(skb) - skb->data;
- nhoffset = skb_network_header(skb) - skb->data;
- original_first_length = hoffset + ((((struct tcphdr *)skb_transport_header(skb))->doff)<<2);
-
- for (wqe_count=0; wqe_count<((u32)wqes_needed); wqe_count++) {
- tso_wqe_length = 0;
- nic_sqe = &nesnic->sq_vbase[nesnic->sq_head];
- wqe_fragment_length =
- (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
- /* setup the VLAN tag if present */
- if (skb_vlan_tag_present(skb)) {
- nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n",
- netdev->name,
- skb_vlan_tag_get(skb));
- wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE;
- wqe_fragment_length[0] = (__force __le16) skb_vlan_tag_get(skb);
- } else
- wqe_misc = 0;
-
- /* bump past the vlan tag */
- wqe_fragment_length++;
-
- /* Assumes header totally fits in allocated buffer and is in first fragment */
- if (original_first_length > NES_FIRST_FRAG_SIZE) {
- nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, headlen=%u, FIRST_FRAG_SIZE=%u\n",
- original_first_length, NES_FIRST_FRAG_SIZE);
- nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
- " (%u frags), is_gso = %u tso_size=%u\n",
- netdev->name,
- skb->len, skb_headlen(skb),
- skb_shinfo(skb)->nr_frags, skb_is_gso(skb), skb_shinfo(skb)->gso_size);
- }
- memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer,
- skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE),
- original_first_length));
- iph = (struct iphdr *)
- (&nesnic->first_frag_vbase[nesnic->sq_head].buffer[nhoffset]);
- tcph = (struct tcphdr *)
- (&nesnic->first_frag_vbase[nesnic->sq_head].buffer[hoffset]);
- if ((wqe_count+1)!=(u32)wqes_needed) {
- tcph->fin = 0;
- tcph->psh = 0;
- tcph->rst = 0;
- tcph->urg = 0;
- }
- if (wqe_count) {
- tcph->syn = 0;
- }
- tcph->seq = htonl(curr_tcp_seq);
- wqe_fragment_length[0] = cpu_to_le16(min(((unsigned int)NES_FIRST_FRAG_SIZE),
- original_first_length));
-
- wqe_fragment_index = 1;
- if ((wqe_count==0) && (skb_headlen(skb) > original_first_length)) {
- set_bit(nesnic->sq_head, nesnic->first_frag_overflow);
- bus_address = pci_map_single(nesdev->pcidev, skb->data + original_first_length,
- skb_headlen(skb) - original_first_length, PCI_DMA_TODEVICE);
- wqe_fragment_length[wqe_fragment_index++] =
- cpu_to_le16(skb_headlen(skb) - original_first_length);
- wqe_fragment_length[wqe_fragment_index] = 0;
- set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
- bus_address);
- tso_wqe_length += skb_headlen(skb) -
- original_first_length;
- }
- while (wqe_fragment_index < 5) {
- wqe_fragment_length[wqe_fragment_index] =
- cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index]));
- set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index),
- (u64)tso_bus_address[tso_frag_index]);
- wqe_fragment_index++;
- tso_wqe_length += skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index++]);
- if (wqe_fragment_index < 5)
- wqe_fragment_length[wqe_fragment_index] = 0;
- if (tso_frag_index == tso_frag_count)
- break;
- }
- if ((wqe_count+1) == (u32)wqes_needed) {
- nesnic->tx_skb[nesnic->sq_head] = skb;
- } else {
- nesnic->tx_skb[nesnic->sq_head] = NULL;
- }
- wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_shinfo(skb)->gso_size;
- if ((tso_wqe_length + original_first_length) > skb_shinfo(skb)->gso_size) {
- wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE;
- } else {
- iph->tot_len = htons(tso_wqe_length + original_first_length - nhoffset);
- }
-
- set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_MISC_IDX,
- wqe_misc);
- set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX,
- ((u32)tcph->doff) | (((u32)hoffset) << 4));
-
- set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX,
- tso_wqe_length + original_first_length);
- curr_tcp_seq += tso_wqe_length;
- nesnic->sq_head++;
- nesnic->sq_head &= nesnic->sq_size-1;
- }
- } else {
- hoffset = skb_transport_header(skb) - skb->data;
- nhoffset = skb_network_header(skb) - skb->data;
- if (skb_linearize(skb)) {
- nesvnic->tx_sw_dropped++;
- kfree_skb(skb);
- return NETDEV_TX_OK;
- }
- nesvnic->linearized_skbs++;
- skb_set_transport_header(skb, hoffset);
- skb_set_network_header(skb, nhoffset);
- if (!nes_nic_send(skb, netdev))
- return NETDEV_TX_OK;
- }
- } else {
- if (!nes_nic_send(skb, netdev))
- return NETDEV_TX_OK;
- }
-
- barrier();
-
- if (wqe_count)
- nes_write32(nesdev->regs+NES_WQE_ALLOC,
- (wqe_count << 24) | (1 << 23) | nesvnic->nic.qp_id);
-
- netif_trans_update(netdev);
-
- return NETDEV_TX_OK;
-}
-
-
-/**
- * nes_netdev_get_stats
- */
-static struct net_device_stats *nes_netdev_get_stats(struct net_device *netdev)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- u64 u64temp;
- u32 u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_DISCARD + (nesvnic->nic_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->endnode_nstat_rx_discard += u32temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO + (nesvnic->nic_index*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI + (nesvnic->nic_index*0x200))) << 32;
-
- nesvnic->endnode_nstat_rx_octets += u64temp;
- nesvnic->netstats.rx_bytes += u64temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO + (nesvnic->nic_index*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI + (nesvnic->nic_index*0x200))) << 32;
-
- nesvnic->endnode_nstat_rx_frames += u64temp;
- nesvnic->netstats.rx_packets += u64temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO + (nesvnic->nic_index*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI + (nesvnic->nic_index*0x200))) << 32;
-
- nesvnic->endnode_nstat_tx_octets += u64temp;
- nesvnic->netstats.tx_bytes += u64temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO + (nesvnic->nic_index*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI + (nesvnic->nic_index*0x200))) << 32;
-
- nesvnic->endnode_nstat_tx_frames += u64temp;
- nesvnic->netstats.tx_packets += u64temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_SHORT_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_short_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_OVERSIZED_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_oversized_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_JABBER_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_jabber_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_symbol_err_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_LENGTH_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_length_errors += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_CRC_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_crc_errors += u32temp;
- nesvnic->netstats.rx_crc_errors += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_TX_ERRORS + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->nesdev->mac_tx_errors += u32temp;
- nesvnic->netstats.tx_errors += u32temp;
-
- return &nesvnic->netstats;
-}
-
-
-/**
- * nes_netdev_tx_timeout
- */
-static void nes_netdev_tx_timeout(struct net_device *netdev)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
-
- if (netif_msg_timer(nesvnic))
- nes_debug(NES_DBG_NIC_TX, "%s: tx timeout\n", netdev->name);
-}
-
-
-/**
- * nes_netdev_set_mac_address
- */
-static int nes_netdev_set_mac_address(struct net_device *netdev, void *p)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct sockaddr *mac_addr = p;
- int i;
- u32 macaddr_low;
- u16 macaddr_high;
-
- if (!is_valid_ether_addr(mac_addr->sa_data))
- return -EADDRNOTAVAIL;
-
- memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len);
- printk(PFX "%s: Address length = %d, Address = %pM\n",
- __func__, netdev->addr_len, mac_addr->sa_data);
- macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
- macaddr_high += (u16)netdev->dev_addr[1];
- macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
- macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
- macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
- macaddr_low += (u32)netdev->dev_addr[5];
-
- for (i = 0; i < NES_MAX_PORT_COUNT; i++) {
- if (nesvnic->qp_nic_index[i] == 0xf) {
- break;
- }
- nes_write_indexed(nesdev,
- NES_IDX_PERFECT_FILTER_LOW + (nesvnic->qp_nic_index[i] * 8),
- macaddr_low);
- nes_write_indexed(nesdev,
- NES_IDX_PERFECT_FILTER_HIGH + (nesvnic->qp_nic_index[i] * 8),
- (u32)macaddr_high | NES_MAC_ADDR_VALID |
- ((((u32)nesvnic->nic_index) << 16)));
- }
- return 0;
-}
-
-
-static void set_allmulti(struct nes_device *nesdev, u32 nic_active_bit)
-{
- u32 nic_active;
-
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
- nic_active |= nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
- nic_active &= ~nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
-}
-
-#define get_addr(addrs, index) ((addrs) + (index) * ETH_ALEN)
-
-/**
- * nes_netdev_set_multicast_list
- */
-static void nes_netdev_set_multicast_list(struct net_device *netdev)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
- u32 nic_active_bit;
- u32 nic_active;
- u32 perfect_filter_register_address;
- u32 macaddr_low;
- u16 macaddr_high;
- u8 mc_all_on = 0;
- u8 mc_index;
- int mc_nic_index = -1;
- u8 pft_entries_preallocated = max(nesadapter->adapter_fcn_count *
- nics_per_function, 4);
- u8 max_pft_entries_avaiable = NES_PFT_SIZE - pft_entries_preallocated;
- unsigned long flags;
- int mc_count = netdev_mc_count(netdev);
-
- spin_lock_irqsave(&nesadapter->resource_lock, flags);
- nic_active_bit = 1 << nesvnic->nic_index;
-
- if (netdev->flags & IFF_PROMISC) {
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
- nic_active |= nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
- nic_active |= nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
- mc_all_on = 1;
- } else if ((netdev->flags & IFF_ALLMULTI) ||
- (nesvnic->nic_index > 3)) {
- set_allmulti(nesdev, nic_active_bit);
- mc_all_on = 1;
- } else {
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL);
- nic_active &= ~nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, nic_active);
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
- nic_active &= ~nic_active_bit;
- nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
- }
-
- nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscuous = %d, All Multicast = %d.\n",
- mc_count, !!(netdev->flags & IFF_PROMISC),
- !!(netdev->flags & IFF_ALLMULTI));
- if (!mc_all_on) {
- char *addrs;
- int i;
- struct netdev_hw_addr *ha;
-
- addrs = kmalloc(ETH_ALEN * mc_count, GFP_ATOMIC);
- if (!addrs) {
- set_allmulti(nesdev, nic_active_bit);
- goto unlock;
- }
- i = 0;
- netdev_for_each_mc_addr(ha, netdev)
- memcpy(get_addr(addrs, i++), ha->addr, ETH_ALEN);
-
- perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW +
- pft_entries_preallocated * 0x8;
- for (i = 0, mc_index = 0; mc_index < max_pft_entries_avaiable;
- mc_index++) {
- while (i < mc_count && nesvnic->mcrq_mcast_filter &&
- ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic,
- get_addr(addrs, i++))) == 0));
- if (mc_nic_index < 0)
- mc_nic_index = nesvnic->nic_index;
- while (nesadapter->pft_mcast_map[mc_index] < 16 &&
- nesadapter->pft_mcast_map[mc_index] !=
- nesvnic->nic_index &&
- mc_index < max_pft_entries_avaiable) {
- nes_debug(NES_DBG_NIC_RX,
- "mc_index=%d skipping nic_index=%d, "
- "used for=%d \n", mc_index,
- nesvnic->nic_index,
- nesadapter->pft_mcast_map[mc_index]);
- mc_index++;
- }
- if (mc_index >= max_pft_entries_avaiable)
- break;
- if (i < mc_count) {
- char *addr = get_addr(addrs, i++);
-
- nes_debug(NES_DBG_NIC_RX, "Assigning MC Address %pM to register 0x%04X nic_idx=%d\n",
- addr,
- perfect_filter_register_address+(mc_index * 8),
- mc_nic_index);
- macaddr_high = ((u8) addr[0]) << 8;
- macaddr_high += (u8) addr[1];
- macaddr_low = ((u8) addr[2]) << 24;
- macaddr_low += ((u8) addr[3]) << 16;
- macaddr_low += ((u8) addr[4]) << 8;
- macaddr_low += (u8) addr[5];
-
- nes_write_indexed(nesdev,
- perfect_filter_register_address+(mc_index * 8),
- macaddr_low);
- nes_write_indexed(nesdev,
- perfect_filter_register_address+4+(mc_index * 8),
- (u32)macaddr_high | NES_MAC_ADDR_VALID |
- ((((u32)(1<<mc_nic_index)) << 16)));
- nesadapter->pft_mcast_map[mc_index] =
- nesvnic->nic_index;
- } else {
- nes_debug(NES_DBG_NIC_RX, "Clearing MC Address at register 0x%04X\n",
- perfect_filter_register_address+(mc_index * 8));
- nes_write_indexed(nesdev,
- perfect_filter_register_address+4+(mc_index * 8),
- 0);
- nesadapter->pft_mcast_map[mc_index] = 255;
- }
- }
- kfree(addrs);
- /* PFT is not large enough */
- if (i < mc_count)
- set_allmulti(nesdev, nic_active_bit);
- }
-
-unlock:
- spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
-}
-
-
-/**
- * nes_netdev_change_mtu
- */
-static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- u8 jumbomode = 0;
- u32 nic_active;
- u32 nic_active_bit;
- u32 uc_all_active;
- u32 mc_all_active;
-
- netdev->mtu = new_mtu;
- nesvnic->max_frame_size = new_mtu + VLAN_ETH_HLEN;
-
- if (netdev->mtu > ETH_DATA_LEN) {
- jumbomode=1;
- }
- nes_nic_init_timer_defaults(nesdev, jumbomode);
-
- if (netif_running(netdev)) {
- nic_active_bit = 1 << nesvnic->nic_index;
- mc_all_active = nes_read_indexed(nesdev,
- NES_IDX_NIC_MULTICAST_ALL) & nic_active_bit;
- uc_all_active = nes_read_indexed(nesdev,
- NES_IDX_NIC_UNICAST_ALL) & nic_active_bit;
-
- nes_netdev_stop(netdev);
- nes_netdev_open(netdev);
-
- nic_active = nes_read_indexed(nesdev,
- NES_IDX_NIC_MULTICAST_ALL);
- nic_active |= mc_all_active;
- nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL,
- nic_active);
-
- nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL);
- nic_active |= uc_all_active;
- nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
- }
-
- return 0;
-}
-
-
-static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = {
- "Link Change Interrupts",
- "Linearized SKBs",
- "T/GSO Requests",
- "Pause Frames Sent",
- "Pause Frames Received",
- "Internal Routing Errors",
- "SQ SW Dropped SKBs",
- "SQ Full",
- "Segmented TSO Requests",
- "Rx Symbol Errors",
- "Rx Jabber Errors",
- "Rx Oversized Frames",
- "Rx Short Frames",
- "Rx Length Errors",
- "Rx CRC Errors",
- "Rx Port Discard",
- "Endnode Rx Discards",
- "Endnode Rx Octets",
- "Endnode Rx Frames",
- "Endnode Tx Octets",
- "Endnode Tx Frames",
- "Tx Errors",
- "mh detected",
- "mh pauses",
- "Retransmission Count",
- "CM Connects",
- "CM Accepts",
- "Disconnects",
- "Connected Events",
- "Connect Requests",
- "CM Rejects",
- "ModifyQP Timeouts",
- "CreateQPs",
- "SW DestroyQPs",
- "DestroyQPs",
- "CM Closes",
- "CM Packets Sent",
- "CM Packets Bounced",
- "CM Packets Created",
- "CM Packets Rcvd",
- "CM Packets Dropped",
- "CM Packets Retrans",
- "CM Listens Created",
- "CM Listens Destroyed",
- "CM Backlog Drops",
- "CM Loopbacks",
- "CM Nodes Created",
- "CM Nodes Destroyed",
- "CM Accel Drops",
- "CM Resets Received",
- "Free 4Kpbls",
- "Free 256pbls",
- "Timer Inits",
- "PAU CreateQPs",
- "PAU DestroyQPs",
-};
-#define NES_ETHTOOL_STAT_COUNT ARRAY_SIZE(nes_ethtool_stringset)
-
-
-/**
- * nes_netdev_get_sset_count
- */
-static int nes_netdev_get_sset_count(struct net_device *netdev, int stringset)
-{
- if (stringset == ETH_SS_STATS)
- return NES_ETHTOOL_STAT_COUNT;
- else
- return -EINVAL;
-}
-
-
-/**
- * nes_netdev_get_strings
- */
-static void nes_netdev_get_strings(struct net_device *netdev, u32 stringset,
- u8 *ethtool_strings)
-{
- if (stringset == ETH_SS_STATS)
- memcpy(ethtool_strings,
- &nes_ethtool_stringset,
- sizeof(nes_ethtool_stringset));
-}
-
-
-/**
- * nes_netdev_get_ethtool_stats
- */
-
-static void nes_netdev_get_ethtool_stats(struct net_device *netdev,
- struct ethtool_stats *target_ethtool_stats, u64 *target_stat_values)
-{
- u64 u64temp;
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 nic_count;
- u32 u32temp;
- u32 index = 0;
-
- target_ethtool_stats->n_stats = NES_ETHTOOL_STAT_COUNT;
- target_stat_values[index] = nesvnic->nesdev->link_status_interrupts;
- target_stat_values[++index] = nesvnic->linearized_skbs;
- target_stat_values[++index] = nesvnic->tso_requests;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_TX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->nesdev->mac_pause_frames_sent += u32temp;
- target_stat_values[++index] = nesvnic->nesdev->mac_pause_frames_sent;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_PAUSE_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->nesdev->mac_pause_frames_received += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_PORT_RX_DISCARDS + (nesvnic->nesdev->mac_index*0x40));
- nesvnic->nesdev->port_rx_discards += u32temp;
- nesvnic->netstats.rx_dropped += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_PORT_TX_DISCARDS + (nesvnic->nesdev->mac_index*0x40));
- nesvnic->nesdev->port_tx_discards += u32temp;
- nesvnic->netstats.tx_dropped += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_SHORT_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_short_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_OVERSIZED_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_oversized_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_JABBER_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_jabber_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_SYMBOL_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_symbol_err_frames += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_LENGTH_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->netstats.rx_length_errors += u32temp;
- nesvnic->nesdev->mac_rx_errors += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_RX_CRC_ERR_FRAMES + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->nesdev->mac_rx_errors += u32temp;
- nesvnic->nesdev->mac_rx_crc_errors += u32temp;
- nesvnic->netstats.rx_crc_errors += u32temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_TX_ERRORS + (nesvnic->nesdev->mac_index*0x200));
- nesvnic->nesdev->mac_tx_errors += u32temp;
- nesvnic->netstats.tx_errors += u32temp;
-
- for (nic_count = 0; nic_count < NES_MAX_PORT_COUNT; nic_count++) {
- if (nesvnic->qp_nic_index[nic_count] == 0xf)
- break;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_DISCARD +
- (nesvnic->qp_nic_index[nic_count]*0x200));
- nesvnic->netstats.rx_dropped += u32temp;
- nesvnic->endnode_nstat_rx_discard += u32temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_LO +
- (nesvnic->qp_nic_index[nic_count]*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_OCTETS_HI +
- (nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
-
- nesvnic->endnode_nstat_rx_octets += u64temp;
- nesvnic->netstats.rx_bytes += u64temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_LO +
- (nesvnic->qp_nic_index[nic_count]*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_RX_FRAMES_HI +
- (nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
-
- nesvnic->endnode_nstat_rx_frames += u64temp;
- nesvnic->netstats.rx_packets += u64temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_LO +
- (nesvnic->qp_nic_index[nic_count]*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI +
- (nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
-
- nesvnic->endnode_nstat_tx_octets += u64temp;
- nesvnic->netstats.tx_bytes += u64temp;
-
- u64temp = (u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO +
- (nesvnic->qp_nic_index[nic_count]*0x200));
- u64temp += ((u64)nes_read_indexed(nesdev,
- NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI +
- (nesvnic->qp_nic_index[nic_count]*0x200))) << 32;
-
- nesvnic->endnode_nstat_tx_frames += u64temp;
- nesvnic->netstats.tx_packets += u64temp;
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_IPV4_TCP_REXMITS + (nesvnic->qp_nic_index[nic_count]*0x200));
- nesvnic->endnode_ipv4_tcp_retransmits += u32temp;
- }
-
- target_stat_values[++index] = nesvnic->nesdev->mac_pause_frames_received;
- target_stat_values[++index] = nesdev->nesadapter->nic_rx_eth_route_err;
- target_stat_values[++index] = nesvnic->tx_sw_dropped;
- target_stat_values[++index] = nesvnic->sq_full;
- target_stat_values[++index] = nesvnic->segmented_tso_requests;
- target_stat_values[++index] = nesvnic->nesdev->mac_rx_symbol_err_frames;
- target_stat_values[++index] = nesvnic->nesdev->mac_rx_jabber_frames;
- target_stat_values[++index] = nesvnic->nesdev->mac_rx_oversized_frames;
- target_stat_values[++index] = nesvnic->nesdev->mac_rx_short_frames;
- target_stat_values[++index] = nesvnic->netstats.rx_length_errors;
- target_stat_values[++index] = nesvnic->nesdev->mac_rx_crc_errors;
- target_stat_values[++index] = nesvnic->nesdev->port_rx_discards;
- target_stat_values[++index] = nesvnic->endnode_nstat_rx_discard;
- target_stat_values[++index] = nesvnic->endnode_nstat_rx_octets;
- target_stat_values[++index] = nesvnic->endnode_nstat_rx_frames;
- target_stat_values[++index] = nesvnic->endnode_nstat_tx_octets;
- target_stat_values[++index] = nesvnic->endnode_nstat_tx_frames;
- target_stat_values[++index] = nesvnic->nesdev->mac_tx_errors;
- target_stat_values[++index] = mh_detected;
- target_stat_values[++index] = mh_pauses_sent;
- target_stat_values[++index] = nesvnic->endnode_ipv4_tcp_retransmits;
- target_stat_values[++index] = atomic_read(&cm_connects);
- target_stat_values[++index] = atomic_read(&cm_accepts);
- target_stat_values[++index] = atomic_read(&cm_disconnects);
- target_stat_values[++index] = atomic_read(&cm_connecteds);
- target_stat_values[++index] = atomic_read(&cm_connect_reqs);
- target_stat_values[++index] = atomic_read(&cm_rejects);
- target_stat_values[++index] = atomic_read(&mod_qp_timouts);
- target_stat_values[++index] = atomic_read(&qps_created);
- target_stat_values[++index] = atomic_read(&sw_qps_destroyed);
- target_stat_values[++index] = atomic_read(&qps_destroyed);
- target_stat_values[++index] = atomic_read(&cm_closes);
- target_stat_values[++index] = cm_packets_sent;
- target_stat_values[++index] = cm_packets_bounced;
- target_stat_values[++index] = cm_packets_created;
- target_stat_values[++index] = cm_packets_received;
- target_stat_values[++index] = cm_packets_dropped;
- target_stat_values[++index] = cm_packets_retrans;
- target_stat_values[++index] = atomic_read(&cm_listens_created);
- target_stat_values[++index] = atomic_read(&cm_listens_destroyed);
- target_stat_values[++index] = cm_backlog_drops;
- target_stat_values[++index] = atomic_read(&cm_loopbacks);
- target_stat_values[++index] = atomic_read(&cm_nodes_created);
- target_stat_values[++index] = atomic_read(&cm_nodes_destroyed);
- target_stat_values[++index] = atomic_read(&cm_accel_dropped_pkts);
- target_stat_values[++index] = atomic_read(&cm_resets_recvd);
- target_stat_values[++index] = nesadapter->free_4kpbl;
- target_stat_values[++index] = nesadapter->free_256pbl;
- target_stat_values[++index] = int_mod_timer_init;
- target_stat_values[++index] = atomic_read(&pau_qps_created);
- target_stat_values[++index] = atomic_read(&pau_qps_destroyed);
-}
-
-/**
- * nes_netdev_get_drvinfo
- */
-static void nes_netdev_get_drvinfo(struct net_device *netdev,
- struct ethtool_drvinfo *drvinfo)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
-
- strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver));
- strlcpy(drvinfo->bus_info, pci_name(nesvnic->nesdev->pcidev),
- sizeof(drvinfo->bus_info));
- snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
- "%u.%u", nesadapter->firmware_version >> 16,
- nesadapter->firmware_version & 0x000000ff);
- strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
-}
-
-
-/**
- * nes_netdev_set_coalesce
- */
-static int nes_netdev_set_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *et_coalesce)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
- unsigned long flags;
-
- spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
- if (et_coalesce->rx_max_coalesced_frames_low) {
- shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low;
- }
- if (et_coalesce->rx_max_coalesced_frames_irq) {
- shared_timer->threshold_target = et_coalesce->rx_max_coalesced_frames_irq;
- }
- if (et_coalesce->rx_max_coalesced_frames_high) {
- shared_timer->threshold_high = et_coalesce->rx_max_coalesced_frames_high;
- }
- if (et_coalesce->rx_coalesce_usecs_low) {
- shared_timer->timer_in_use_min = et_coalesce->rx_coalesce_usecs_low;
- }
- if (et_coalesce->rx_coalesce_usecs_high) {
- shared_timer->timer_in_use_max = et_coalesce->rx_coalesce_usecs_high;
- }
- spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
-
- /* using this to drive total interrupt moderation */
- nesadapter->et_rx_coalesce_usecs_irq = et_coalesce->rx_coalesce_usecs_irq;
- if (et_coalesce->use_adaptive_rx_coalesce) {
- nesadapter->et_use_adaptive_rx_coalesce = 1;
- nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC;
- nesadapter->et_rx_coalesce_usecs_irq = 0;
- if (et_coalesce->pkt_rate_low) {
- nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low;
- }
- } else {
- nesadapter->et_use_adaptive_rx_coalesce = 0;
- nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT;
- if (nesadapter->et_rx_coalesce_usecs_irq) {
- nes_write32(nesdev->regs+NES_PERIODIC_CONTROL,
- 0x80000000 | ((u32)(nesadapter->et_rx_coalesce_usecs_irq*8)));
- }
- }
- return 0;
-}
-
-
-/**
- * nes_netdev_get_coalesce
- */
-static int nes_netdev_get_coalesce(struct net_device *netdev,
- struct ethtool_coalesce *et_coalesce)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct ethtool_coalesce temp_et_coalesce;
- struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
- unsigned long flags;
-
- memset(&temp_et_coalesce, 0, sizeof(temp_et_coalesce));
- temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq;
- temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce;
- temp_et_coalesce.rate_sample_interval = nesadapter->et_rate_sample_interval;
- temp_et_coalesce.pkt_rate_low = nesadapter->et_pkt_rate_low;
- spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
- temp_et_coalesce.rx_max_coalesced_frames_low = shared_timer->threshold_low;
- temp_et_coalesce.rx_max_coalesced_frames_irq = shared_timer->threshold_target;
- temp_et_coalesce.rx_max_coalesced_frames_high = shared_timer->threshold_high;
- temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min;
- temp_et_coalesce.rx_coalesce_usecs_high = shared_timer->timer_in_use_max;
- if (nesadapter->et_use_adaptive_rx_coalesce) {
- temp_et_coalesce.rx_coalesce_usecs_irq = shared_timer->timer_in_use;
- }
- spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
- memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce));
- return 0;
-}
-
-
-/**
- * nes_netdev_get_pauseparam
- */
-static void nes_netdev_get_pauseparam(struct net_device *netdev,
- struct ethtool_pauseparam *et_pauseparam)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
-
- et_pauseparam->autoneg = 0;
- et_pauseparam->rx_pause = (nesvnic->nesdev->disable_rx_flow_control == 0) ? 1:0;
- et_pauseparam->tx_pause = (nesvnic->nesdev->disable_tx_flow_control == 0) ? 1:0;
-}
-
-
-/**
- * nes_netdev_set_pauseparam
- */
-static int nes_netdev_set_pauseparam(struct net_device *netdev,
- struct ethtool_pauseparam *et_pauseparam)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- u32 u32temp;
-
- if (et_pauseparam->autoneg) {
- /* TODO: should return unsupported */
- return 0;
- }
- if ((et_pauseparam->tx_pause == 1) && (nesdev->disable_tx_flow_control == 1)) {
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200));
- u32temp |= NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE;
- nes_write_indexed(nesdev,
- NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp);
- nesdev->disable_tx_flow_control = 0;
- } else if ((et_pauseparam->tx_pause == 0) && (nesdev->disable_tx_flow_control == 0)) {
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200));
- u32temp &= ~NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE;
- nes_write_indexed(nesdev,
- NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp);
- nesdev->disable_tx_flow_control = 1;
- }
- if ((et_pauseparam->rx_pause == 1) && (nesdev->disable_rx_flow_control == 1)) {
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40));
- u32temp &= ~NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE;
- nes_write_indexed(nesdev,
- NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40), u32temp);
- nesdev->disable_rx_flow_control = 0;
- } else if ((et_pauseparam->rx_pause == 0) && (nesdev->disable_rx_flow_control == 0)) {
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40));
- u32temp |= NES_IDX_MPP_DEBUG_PORT_DISABLE_PAUSE;
- nes_write_indexed(nesdev,
- NES_IDX_MPP_DEBUG + (nesdev->mac_index*0x40), u32temp);
- nesdev->disable_rx_flow_control = 1;
- }
-
- return 0;
-}
-
-
-/**
- * nes_netdev_get_settings
- */
-static int nes_netdev_get_link_ksettings(struct net_device *netdev,
- struct ethtool_link_ksettings *cmd)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 mac_index = nesdev->mac_index;
- u8 phy_type = nesadapter->phy_type[mac_index];
- u8 phy_index = nesadapter->phy_index[mac_index];
- u16 phy_data;
- u32 supported, advertising;
-
- cmd->base.duplex = DUPLEX_FULL;
- cmd->base.port = PORT_MII;
-
- if (nesadapter->OneG_Mode) {
- cmd->base.speed = SPEED_1000;
- if (phy_type == NES_PHY_TYPE_PUMA_1G) {
- supported = SUPPORTED_1000baseT_Full;
- advertising = ADVERTISED_1000baseT_Full;
- cmd->base.autoneg = AUTONEG_DISABLE;
- cmd->base.phy_address = mac_index;
- } else {
- unsigned long flags;
-
- supported = SUPPORTED_1000baseT_Full
- | SUPPORTED_Autoneg;
- advertising = ADVERTISED_1000baseT_Full
- | ADVERTISED_Autoneg;
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
- nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
- if (phy_data & 0x1000)
- cmd->base.autoneg = AUTONEG_ENABLE;
- else
- cmd->base.autoneg = AUTONEG_DISABLE;
- cmd->base.phy_address = phy_index;
- }
- ethtool_convert_legacy_u32_to_link_mode(
- cmd->link_modes.supported, supported);
- ethtool_convert_legacy_u32_to_link_mode(
- cmd->link_modes.advertising, advertising);
- return 0;
- }
- if ((phy_type == NES_PHY_TYPE_ARGUS) ||
- (phy_type == NES_PHY_TYPE_SFP_D) ||
- (phy_type == NES_PHY_TYPE_KR)) {
- cmd->base.port = PORT_FIBRE;
- supported = SUPPORTED_FIBRE;
- advertising = ADVERTISED_FIBRE;
- cmd->base.phy_address = phy_index;
- } else {
- supported = SUPPORTED_10000baseT_Full;
- advertising = ADVERTISED_10000baseT_Full;
- cmd->base.phy_address = mac_index;
- }
- cmd->base.speed = SPEED_10000;
- cmd->base.autoneg = AUTONEG_DISABLE;
- ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
- supported);
- ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
- advertising);
-
- return 0;
-}
-
-
-/**
- * nes_netdev_set_settings
- */
-static int
-nes_netdev_set_link_ksettings(struct net_device *netdev,
- const struct ethtool_link_ksettings *cmd)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
-
- if ((nesadapter->OneG_Mode) &&
- (nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G)) {
- unsigned long flags;
- u16 phy_data;
- u8 phy_index = nesadapter->phy_index[nesdev->mac_index];
-
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
- nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
- if (cmd->base.autoneg) {
- /* Turn on Full duplex, Autoneg, and restart autonegotiation */
- phy_data |= 0x1300;
- } else {
- /* Turn off autoneg */
- phy_data &= ~0x1000;
- }
- nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data);
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
- }
-
- return 0;
-}
-
-
-static const struct ethtool_ops nes_ethtool_ops = {
- .get_link = ethtool_op_get_link,
- .get_strings = nes_netdev_get_strings,
- .get_sset_count = nes_netdev_get_sset_count,
- .get_ethtool_stats = nes_netdev_get_ethtool_stats,
- .get_drvinfo = nes_netdev_get_drvinfo,
- .get_coalesce = nes_netdev_get_coalesce,
- .set_coalesce = nes_netdev_set_coalesce,
- .get_pauseparam = nes_netdev_get_pauseparam,
- .set_pauseparam = nes_netdev_set_pauseparam,
- .get_link_ksettings = nes_netdev_get_link_ksettings,
- .set_link_ksettings = nes_netdev_set_link_ksettings,
-};
-
-static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, netdev_features_t features)
-{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 u32temp;
- unsigned long flags;
-
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
-
- nes_debug(NES_DBG_NETDEV, "%s: %s\n", __func__, netdev->name);
-
- /* Enable/Disable VLAN Stripping */
- u32temp = nes_read_indexed(nesdev, NES_IDX_PCIX_DIAG);
- if (features & NETIF_F_HW_VLAN_CTAG_RX)
- u32temp &= 0xfdffffff;
- else
- u32temp |= 0x02000000;
-
- nes_write_indexed(nesdev, NES_IDX_PCIX_DIAG, u32temp);
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
-}
-
-static netdev_features_t nes_fix_features(struct net_device *netdev, netdev_features_t features)
-{
- /*
- * Since there is no support for separate rx/tx vlan accel
- * enable/disable make sure tx flag is always in same state as rx.
- */
- if (features & NETIF_F_HW_VLAN_CTAG_RX)
- features |= NETIF_F_HW_VLAN_CTAG_TX;
- else
- features &= ~NETIF_F_HW_VLAN_CTAG_TX;
-
- return features;
-}
-
-static int nes_set_features(struct net_device *netdev, netdev_features_t features)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- u32 changed = netdev->features ^ features;
-
- if (changed & NETIF_F_HW_VLAN_CTAG_RX)
- nes_vlan_mode(netdev, nesdev, features);
-
- return 0;
-}
-
-static const struct net_device_ops nes_netdev_ops = {
- .ndo_open = nes_netdev_open,
- .ndo_stop = nes_netdev_stop,
- .ndo_start_xmit = nes_netdev_start_xmit,
- .ndo_get_stats = nes_netdev_get_stats,
- .ndo_tx_timeout = nes_netdev_tx_timeout,
- .ndo_set_mac_address = nes_netdev_set_mac_address,
- .ndo_set_rx_mode = nes_netdev_set_multicast_list,
- .ndo_change_mtu = nes_netdev_change_mtu,
- .ndo_validate_addr = eth_validate_addr,
- .ndo_fix_features = nes_fix_features,
- .ndo_set_features = nes_set_features,
-};
-
-/**
- * nes_netdev_init - initialize network device
- */
-struct net_device *nes_netdev_init(struct nes_device *nesdev,
- void __iomem *mmio_addr)
-{
- u64 u64temp;
- struct nes_vnic *nesvnic;
- struct net_device *netdev;
- struct nic_qp_map *curr_qp_map;
- u8 phy_type = nesdev->nesadapter->phy_type[nesdev->mac_index];
-
- netdev = alloc_etherdev(sizeof(struct nes_vnic));
- if (!netdev) {
- printk(KERN_ERR PFX "nesvnic etherdev alloc failed");
- return NULL;
- }
- nesvnic = netdev_priv(netdev);
-
- nes_debug(NES_DBG_INIT, "netdev = %p, %s\n", netdev, netdev->name);
-
- SET_NETDEV_DEV(netdev, &nesdev->pcidev->dev);
-
- netdev->watchdog_timeo = NES_TX_TIMEOUT;
- netdev->irq = nesdev->pcidev->irq;
- netdev->max_mtu = NES_MAX_MTU;
- netdev->hard_header_len = ETH_HLEN;
- netdev->addr_len = ETH_ALEN;
- netdev->type = ARPHRD_ETHER;
- netdev->netdev_ops = &nes_netdev_ops;
- netdev->ethtool_ops = &nes_ethtool_ops;
- netif_napi_add(netdev, &nesvnic->napi, nes_netdev_poll, 128);
- nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n");
-
- /* Fill in the port structure */
- nesvnic->netdev = netdev;
- nesvnic->nesdev = nesdev;
- nesvnic->msg_enable = netif_msg_init(debug, default_msg);
- nesvnic->netdev_index = nesdev->netdev_count;
- nesvnic->perfect_filter_index = nesdev->nesadapter->netdev_count;
- nesvnic->max_frame_size = netdev->mtu + netdev->hard_header_len + VLAN_HLEN;
-
- curr_qp_map = nic_qp_mapping_per_function[PCI_FUNC(nesdev->pcidev->devfn)];
- nesvnic->nic.qp_id = curr_qp_map[nesdev->netdev_count].qpid;
- nesvnic->nic_index = curr_qp_map[nesdev->netdev_count].nic_index;
- nesvnic->logical_port = curr_qp_map[nesdev->netdev_count].logical_port;
-
- /* Setup the burned in MAC address */
- u64temp = (u64)nesdev->nesadapter->mac_addr_low;
- u64temp += ((u64)nesdev->nesadapter->mac_addr_high) << 32;
- u64temp += nesvnic->nic_index;
- netdev->dev_addr[0] = (u8)(u64temp>>40);
- netdev->dev_addr[1] = (u8)(u64temp>>32);
- netdev->dev_addr[2] = (u8)(u64temp>>24);
- netdev->dev_addr[3] = (u8)(u64temp>>16);
- netdev->dev_addr[4] = (u8)(u64temp>>8);
- netdev->dev_addr[5] = (u8)u64temp;
-
- netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_RX;
- if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV))
- netdev->hw_features |= NETIF_F_TSO;
-
- netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX;
-
- nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d,"
- " nic_index = %d, logical_port = %d, mac_index = %d.\n",
- nesvnic, (unsigned long)netdev->features, nesvnic->nic.qp_id,
- nesvnic->nic_index, nesvnic->logical_port, nesdev->mac_index);
-
- if (nesvnic->nesdev->nesadapter->port_count == 1 &&
- nesvnic->nesdev->nesadapter->adapter_fcn_count == 1) {
-
- nesvnic->qp_nic_index[0] = nesvnic->nic_index;
- nesvnic->qp_nic_index[1] = nesvnic->nic_index + 1;
- if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) {
- nesvnic->qp_nic_index[2] = 0xf;
- nesvnic->qp_nic_index[3] = 0xf;
- } else {
- nesvnic->qp_nic_index[2] = nesvnic->nic_index + 2;
- nesvnic->qp_nic_index[3] = nesvnic->nic_index + 3;
- }
- } else {
- if (nesvnic->nesdev->nesadapter->port_count == 2 ||
- (nesvnic->nesdev->nesadapter->port_count == 1 &&
- nesvnic->nesdev->nesadapter->adapter_fcn_count == 2)) {
- nesvnic->qp_nic_index[0] = nesvnic->nic_index;
- nesvnic->qp_nic_index[1] = nesvnic->nic_index
- + 2;
- nesvnic->qp_nic_index[2] = 0xf;
- nesvnic->qp_nic_index[3] = 0xf;
- } else {
- nesvnic->qp_nic_index[0] = nesvnic->nic_index;
- nesvnic->qp_nic_index[1] = 0xf;
- nesvnic->qp_nic_index[2] = 0xf;
- nesvnic->qp_nic_index[3] = 0xf;
- }
- }
- nesvnic->next_qp_nic_index = 0;
-
- if (nesdev->netdev_count == 0) {
- nesvnic->rdma_enabled = 1;
- } else {
- nesvnic->rdma_enabled = 0;
- }
- nesvnic->nic_cq.cq_number = nesvnic->nic.qp_id;
- init_timer(&nesvnic->event_timer);
- nesvnic->event_timer.function = NULL;
- spin_lock_init(&nesvnic->tx_lock);
- spin_lock_init(&nesvnic->port_ibevent_lock);
- nesdev->netdev[nesdev->netdev_count] = netdev;
-
- nes_debug(NES_DBG_INIT, "Adding nesvnic (%p) to the adapters nesvnic_list for MAC%d.\n",
- nesvnic, nesdev->mac_index);
- list_add_tail(&nesvnic->list, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]);
-
- if ((nesdev->netdev_count == 0) &&
- ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) ||
- ((phy_type == NES_PHY_TYPE_PUMA_1G) &&
- (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) ||
- ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) {
- u32 u32temp;
- u32 link_mask = 0;
- u32 link_val = 0;
- u16 temp_phy_data;
- u16 phy_data = 0;
- unsigned long flags;
-
- u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
- (0x200 * (nesdev->mac_index & 1)));
- if (phy_type != NES_PHY_TYPE_PUMA_1G) {
- u32temp |= 0x00200000;
- nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
- (0x200 * (nesdev->mac_index & 1)), u32temp);
- }
-
- /* Check and set linkup here. This is for back to back */
- /* configuration where second port won't get link interrupt */
- switch (phy_type) {
- case NES_PHY_TYPE_PUMA_1G:
- if (nesdev->mac_index < 2) {
- link_mask = 0x01010000;
- link_val = 0x01010000;
- } else {
- link_mask = 0x02020000;
- link_val = 0x02020000;
- }
- break;
- case NES_PHY_TYPE_SFP_D:
- spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
- nes_read_10G_phy_reg(nesdev,
- nesdev->nesadapter->phy_index[nesdev->mac_index],
- 1, 0x9003);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- nes_read_10G_phy_reg(nesdev,
- nesdev->nesadapter->phy_index[nesdev->mac_index],
- 3, 0x0021);
- nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- nes_read_10G_phy_reg(nesdev,
- nesdev->nesadapter->phy_index[nesdev->mac_index],
- 3, 0x0021);
- phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
- phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0;
- break;
- default:
- link_mask = 0x0f1f0000;
- link_val = 0x0f0f0000;
- break;
- }
-
- u32temp = nes_read_indexed(nesdev,
- NES_IDX_PHY_PCS_CONTROL_STATUS0 +
- (0x200 * (nesdev->mac_index & 1)));
-
- if (phy_type == NES_PHY_TYPE_SFP_D) {
- if (phy_data & 0x0004)
- nesvnic->linkup = 1;
- } else {
- if ((u32temp & link_mask) == link_val)
- nesvnic->linkup = 1;
- }
-
- /* clear the MAC interrupt status, assumes direct logical to physical mapping */
- u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index));
- nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp);
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp);
-
- nes_init_phy(nesdev);
- }
-
- nes_vlan_mode(netdev, nesdev, netdev->features);
-
- return netdev;
-}
-
-
-/**
- * nes_netdev_destroy - destroy network device structure
- */
-void nes_netdev_destroy(struct net_device *netdev)
-{
- struct nes_vnic *nesvnic = netdev_priv(netdev);
-
- /* make sure 'stop' method is called by Linux stack */
- /* nes_netdev_stop(netdev); */
-
- list_del(&nesvnic->list);
-
- if (nesvnic->of_device_registered) {
- nes_destroy_ofa_device(nesvnic->nesibdev);
- }
-
- free_netdev(netdev);
-}
-
-
-/**
- * nes_nic_cm_xmit -- CM calls this to send out pkts
- */
-int nes_nic_cm_xmit(struct sk_buff *skb, struct net_device *netdev)
-{
- int ret;
-
- skb->dev = netdev;
- ret = dev_queue_xmit(skb);
- if (ret) {
- nes_debug(NES_DBG_CM, "Bad return code from dev_queue_xmit %d\n", ret);
- }
-
- return ret;
-}
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
deleted file mode 100644
index 37331e2fdc5f..000000000000
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ /dev/null
@@ -1,918 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/mii.h>
-#include <linux/if_vlan.h>
-#include <linux/slab.h>
-#include <linux/crc32.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/byteorder.h>
-
-#include "nes.h"
-
-static u16 nes_read16_eeprom(void __iomem *addr, u16 offset);
-
-u32 mh_detected;
-u32 mh_pauses_sent;
-
-static u32 nes_set_pau(struct nes_device *nesdev)
-{
- u32 ret = 0;
- u32 counter;
-
- nes_write_indexed(nesdev, NES_IDX_GPR2, NES_ENABLE_PAU);
- nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1);
-
- for (counter = 0; counter < NES_PAU_COUNTER; counter++) {
- udelay(30);
- if (!nes_read_indexed(nesdev, NES_IDX_GPR2)) {
- printk(KERN_INFO PFX "PAU is supported.\n");
- break;
- }
- nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1);
- }
- if (counter == NES_PAU_COUNTER) {
- printk(KERN_INFO PFX "PAU is not supported.\n");
- return -EPERM;
- }
- return ret;
-}
-
-/**
- * nes_read_eeprom_values -
- */
-int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesadapter)
-{
- u32 mac_addr_low;
- u16 mac_addr_high;
- u16 eeprom_data;
- u16 eeprom_offset;
- u16 next_section_address;
- u16 sw_section_ver;
- u8 major_ver = 0;
- u8 minor_ver = 0;
-
- /* TODO: deal with EEPROM endian issues */
- if (nesadapter->firmware_eeprom_offset == 0) {
- /* Read the EEPROM Parameters */
- eeprom_data = nes_read16_eeprom(nesdev->regs, 0);
- nes_debug(NES_DBG_HW, "EEPROM Offset 0 = 0x%04X\n", eeprom_data);
- eeprom_offset = 2 + (((eeprom_data & 0x007f) << 3) <<
- ((eeprom_data & 0x0080) >> 7));
- nes_debug(NES_DBG_HW, "Firmware Offset = 0x%04X\n", eeprom_offset);
- nesadapter->firmware_eeprom_offset = eeprom_offset;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 4);
- if (eeprom_data != 0x5746) {
- nes_debug(NES_DBG_HW, "Not a valid Firmware Image = 0x%04X\n", eeprom_data);
- return -1;
- }
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
- nes_debug(NES_DBG_HW, "EEPROM Offset %u = 0x%04X\n",
- eeprom_offset + 2, eeprom_data);
- eeprom_offset += ((eeprom_data & 0x00ff) << 3) << ((eeprom_data & 0x0100) >> 8);
- nes_debug(NES_DBG_HW, "Software Offset = 0x%04X\n", eeprom_offset);
- nesadapter->software_eeprom_offset = eeprom_offset;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 4);
- if (eeprom_data != 0x5753) {
- printk("Not a valid Software Image = 0x%04X\n", eeprom_data);
- return -1;
- }
- sw_section_ver = nes_read16_eeprom(nesdev->regs, nesadapter->software_eeprom_offset + 6);
- nes_debug(NES_DBG_HW, "Software section version number = 0x%04X\n",
- sw_section_ver);
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
- nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
- eeprom_offset + 2, eeprom_data);
- next_section_address = eeprom_offset + (((eeprom_data & 0x00ff) << 3) <<
- ((eeprom_data & 0x0100) >> 8));
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
- if (eeprom_data != 0x414d) {
- nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x414d but was 0x%04X\n",
- eeprom_data);
- goto no_fw_rev;
- }
- eeprom_offset = next_section_address;
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
- nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
- eeprom_offset + 2, eeprom_data);
- next_section_address = eeprom_offset + (((eeprom_data & 0x00ff) << 3) <<
- ((eeprom_data & 0x0100) >> 8));
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
- if (eeprom_data != 0x4f52) {
- nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x4f52 but was 0x%04X\n",
- eeprom_data);
- goto no_fw_rev;
- }
- eeprom_offset = next_section_address;
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
- nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
- eeprom_offset + 2, eeprom_data);
- next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
- if (eeprom_data != 0x5746) {
- nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x5746 but was 0x%04X\n",
- eeprom_data);
- goto no_fw_rev;
- }
- eeprom_offset = next_section_address;
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
- nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
- eeprom_offset + 2, eeprom_data);
- next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
- if (eeprom_data != 0x5753) {
- nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x5753 but was 0x%04X\n",
- eeprom_data);
- goto no_fw_rev;
- }
- eeprom_offset = next_section_address;
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
- nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
- eeprom_offset + 2, eeprom_data);
- next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
- if (eeprom_data != 0x414d) {
- nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x414d but was 0x%04X\n",
- eeprom_data);
- goto no_fw_rev;
- }
- eeprom_offset = next_section_address;
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset + 2);
- nes_debug(NES_DBG_HW, "EEPROM Offset %u (next section) = 0x%04X\n",
- eeprom_offset + 2, eeprom_data);
- next_section_address = eeprom_offset + ((eeprom_data & 0x00ff) << 3);
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 4);
- if (eeprom_data != 0x464e) {
- nes_debug(NES_DBG_HW, "EEPROM Changed offset should be 0x464e but was 0x%04X\n",
- eeprom_data);
- goto no_fw_rev;
- }
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 8);
- printk(PFX "Firmware version %u.%u\n", (u8)(eeprom_data>>8), (u8)eeprom_data);
- major_ver = (u8)(eeprom_data >> 8);
- minor_ver = (u8)(eeprom_data);
-
- if (nes_drv_opt & NES_DRV_OPT_DISABLE_VIRT_WQ) {
- nes_debug(NES_DBG_HW, "Virtual WQs have been disabled\n");
- } else if (((major_ver == 2) && (minor_ver > 21)) || ((major_ver > 2) && (major_ver != 255))) {
- nesadapter->virtwq = 1;
- }
- if (((major_ver == 3) && (minor_ver >= 16)) || (major_ver > 3))
- nesadapter->send_term_ok = 1;
-
- if (nes_drv_opt & NES_DRV_OPT_ENABLE_PAU) {
- if (!nes_set_pau(nesdev))
- nesadapter->allow_unaligned_fpdus = 1;
- }
-
- nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) +
- (u32)((u8)eeprom_data);
-
- eeprom_data = nes_read16_eeprom(nesdev->regs, next_section_address + 10);
- printk(PFX "EEPROM version %u.%u\n", (u8)(eeprom_data>>8), (u8)eeprom_data);
- nesadapter->eeprom_version = (((u32)(u8)(eeprom_data>>8)) << 16) +
- (u32)((u8)eeprom_data);
-
-no_fw_rev:
- /* eeprom is valid */
- eeprom_offset = nesadapter->software_eeprom_offset;
- eeprom_offset += 8;
- nesadapter->netdev_max = (u8)nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- mac_addr_high = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- mac_addr_low = (u32)nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- mac_addr_low <<= 16;
- mac_addr_low += (u32)nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "Base MAC Address = 0x%04X%08X\n",
- mac_addr_high, mac_addr_low);
- nes_debug(NES_DBG_HW, "MAC Address count = %u\n", nesadapter->netdev_max);
-
- nesadapter->mac_addr_low = mac_addr_low;
- nesadapter->mac_addr_high = mac_addr_high;
-
- /* Read the Phy Type array */
- eeprom_offset += 10;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->phy_type[0] = (u8)(eeprom_data >> 8);
- nesadapter->phy_type[1] = (u8)eeprom_data;
-
- /* Read the port array */
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->phy_type[2] = (u8)(eeprom_data >> 8);
- nesadapter->phy_type[3] = (u8)eeprom_data;
- /* port_count is set by soft reset reg */
- nes_debug(NES_DBG_HW, "port_count = %u, port 0 -> %u, port 1 -> %u,"
- " port 2 -> %u, port 3 -> %u\n",
- nesadapter->port_count,
- nesadapter->phy_type[0], nesadapter->phy_type[1],
- nesadapter->phy_type[2], nesadapter->phy_type[3]);
-
- /* Read PD config array */
- eeprom_offset += 10;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_size[0] = eeprom_data;
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_base[0] = eeprom_data;
- nes_debug(NES_DBG_HW, "PD0 config, size=0x%04x, base=0x%04x\n",
- nesadapter->pd_config_size[0], nesadapter->pd_config_base[0]);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_size[1] = eeprom_data;
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_base[1] = eeprom_data;
- nes_debug(NES_DBG_HW, "PD1 config, size=0x%04x, base=0x%04x\n",
- nesadapter->pd_config_size[1], nesadapter->pd_config_base[1]);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_size[2] = eeprom_data;
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_base[2] = eeprom_data;
- nes_debug(NES_DBG_HW, "PD2 config, size=0x%04x, base=0x%04x\n",
- nesadapter->pd_config_size[2], nesadapter->pd_config_base[2]);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_size[3] = eeprom_data;
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->pd_config_base[3] = eeprom_data;
- nes_debug(NES_DBG_HW, "PD3 config, size=0x%04x, base=0x%04x\n",
- nesadapter->pd_config_size[3], nesadapter->pd_config_base[3]);
-
- /* Read Rx Pool Size */
- eeprom_offset += 22; /* 46 */
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->rx_pool_size = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "rx_pool_size = 0x%08X\n", nesadapter->rx_pool_size);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->tx_pool_size = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "tx_pool_size = 0x%08X\n", nesadapter->tx_pool_size);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->rx_threshold = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "rx_threshold = 0x%08X\n", nesadapter->rx_threshold);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->tcp_timer_core_clk_divisor = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "tcp_timer_core_clk_divisor = 0x%08X\n",
- nesadapter->tcp_timer_core_clk_divisor);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->iwarp_config = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "iwarp_config = 0x%08X\n", nesadapter->iwarp_config);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->cm_config = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "cm_config = 0x%08X\n", nesadapter->cm_config);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->sws_timer_config = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "sws_timer_config = 0x%08X\n", nesadapter->sws_timer_config);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->tcp_config1 = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "tcp_config1 = 0x%08X\n", nesadapter->tcp_config1);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->wqm_wat = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "wqm_wat = 0x%08X\n", nesadapter->wqm_wat);
-
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- eeprom_offset += 2;
- nesadapter->core_clock = (((u32)eeprom_data) << 16) +
- nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nes_debug(NES_DBG_HW, "core_clock = 0x%08X\n", nesadapter->core_clock);
-
- if ((sw_section_ver) && (nesadapter->hw_rev != NE020_REV)) {
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->phy_index[0] = (eeprom_data & 0xff00)>>8;
- nesadapter->phy_index[1] = eeprom_data & 0x00ff;
- eeprom_offset += 2;
- eeprom_data = nes_read16_eeprom(nesdev->regs, eeprom_offset);
- nesadapter->phy_index[2] = (eeprom_data & 0xff00)>>8;
- nesadapter->phy_index[3] = eeprom_data & 0x00ff;
- } else {
- nesadapter->phy_index[0] = 4;
- nesadapter->phy_index[1] = 5;
- nesadapter->phy_index[2] = 6;
- nesadapter->phy_index[3] = 7;
- }
- nes_debug(NES_DBG_HW, "Phy address map = 0 > %u, 1 > %u, 2 > %u, 3 > %u\n",
- nesadapter->phy_index[0],nesadapter->phy_index[1],
- nesadapter->phy_index[2],nesadapter->phy_index[3]);
- }
-
- return 0;
-}
-
-
-/**
- * nes_read16_eeprom
- */
-static u16 nes_read16_eeprom(void __iomem *addr, u16 offset)
-{
- writel(NES_EEPROM_READ_REQUEST + (offset >> 1),
- (void __iomem *)addr + NES_EEPROM_COMMAND);
-
- do {
- } while (readl((void __iomem *)addr + NES_EEPROM_COMMAND) &
- NES_EEPROM_READ_REQUEST);
-
- return readw((void __iomem *)addr + NES_EEPROM_DATA);
-}
-
-
-/**
- * nes_write_1G_phy_reg
- */
-void nes_write_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 data)
-{
- u32 u32temp;
- u32 counter;
-
- nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
- 0x50020000 | data | ((u32)phy_reg << 18) | ((u32)phy_addr << 23));
- for (counter = 0; counter < 100 ; counter++) {
- udelay(30);
- u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
- if (u32temp & 1) {
- /* nes_debug(NES_DBG_PHY, "Phy interrupt status = 0x%X.\n", u32temp); */
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
- break;
- }
- }
- if (!(u32temp & 1))
- nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
- u32temp);
-}
-
-
-/**
- * nes_read_1G_phy_reg
- * This routine only issues the read, the data must be read
- * separately.
- */
-void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16 *data)
-{
- u32 u32temp;
- u32 counter;
-
- /* nes_debug(NES_DBG_PHY, "phy addr = %d, mac_index = %d\n",
- phy_addr, nesdev->mac_index); */
-
- nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
- 0x60020000 | ((u32)phy_reg << 18) | ((u32)phy_addr << 23));
- for (counter = 0; counter < 100 ; counter++) {
- udelay(30);
- u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
- if (u32temp & 1) {
- /* nes_debug(NES_DBG_PHY, "Phy interrupt status = 0x%X.\n", u32temp); */
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
- break;
- }
- }
- if (!(u32temp & 1)) {
- nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
- u32temp);
- *data = 0xffff;
- } else {
- *data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- }
-}
-
-
-/**
- * nes_write_10G_phy_reg
- */
-void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_addr, u8 dev_addr, u16 phy_reg,
- u16 data)
-{
- u32 port_addr;
- u32 u32temp;
- u32 counter;
-
- port_addr = phy_addr;
-
- /* set address */
- nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
- 0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
- for (counter = 0; counter < 100 ; counter++) {
- udelay(30);
- u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
- if (u32temp & 1) {
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
- break;
- }
- }
- if (!(u32temp & 1))
- nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
- u32temp);
-
- /* set data */
- nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
- 0x10020000 | (u32)data | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
- for (counter = 0; counter < 100 ; counter++) {
- udelay(30);
- u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
- if (u32temp & 1) {
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
- break;
- }
- }
- if (!(u32temp & 1))
- nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
- u32temp);
-}
-
-
-/**
- * nes_read_10G_phy_reg
- * This routine only issues the read, the data must be read
- * separately.
- */
-void nes_read_10G_phy_reg(struct nes_device *nesdev, u8 phy_addr, u8 dev_addr, u16 phy_reg)
-{
- u32 port_addr;
- u32 u32temp;
- u32 counter;
-
- port_addr = phy_addr;
-
- /* set address */
- nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
- 0x00020000 | (u32)phy_reg | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
- for (counter = 0; counter < 100 ; counter++) {
- udelay(30);
- u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
- if (u32temp & 1) {
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
- break;
- }
- }
- if (!(u32temp & 1))
- nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
- u32temp);
-
- /* issue read */
- nes_write_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL,
- 0x30020000 | (((u32)dev_addr) << 18) | (((u32)port_addr) << 23));
- for (counter = 0; counter < 100 ; counter++) {
- udelay(30);
- u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS);
- if (u32temp & 1) {
- nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS, 1);
- break;
- }
- }
- if (!(u32temp & 1))
- nes_debug(NES_DBG_PHY, "Phy is not responding. interrupt status = 0x%X.\n",
- u32temp);
-}
-
-
-/**
- * nes_get_cqp_request
- */
-struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev)
-{
- unsigned long flags;
- struct nes_cqp_request *cqp_request = NULL;
-
- if (!list_empty(&nesdev->cqp_avail_reqs)) {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- if (!list_empty(&nesdev->cqp_avail_reqs)) {
- cqp_request = list_entry(nesdev->cqp_avail_reqs.next,
- struct nes_cqp_request, list);
- list_del_init(&cqp_request->list);
- }
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
- if (cqp_request == NULL) {
- cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_ATOMIC);
- if (cqp_request) {
- cqp_request->dynamic = 1;
- INIT_LIST_HEAD(&cqp_request->list);
- }
- }
-
- if (cqp_request) {
- init_waitqueue_head(&cqp_request->waitq);
- cqp_request->waiting = 0;
- cqp_request->request_done = 0;
- cqp_request->callback = 0;
- init_waitqueue_head(&cqp_request->waitq);
- nes_debug(NES_DBG_CQP, "Got cqp request %p from the available list \n",
- cqp_request);
- } else
- printk(KERN_ERR PFX "%s: Could not allocated a CQP request.\n",
- __func__);
-
- return cqp_request;
-}
-
-void nes_free_cqp_request(struct nes_device *nesdev,
- struct nes_cqp_request *cqp_request)
-{
- unsigned long flags;
-
- nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
- cqp_request,
- le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f);
-
- if (cqp_request->dynamic) {
- kfree(cqp_request);
- } else {
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
- }
-}
-
-void nes_put_cqp_request(struct nes_device *nesdev,
- struct nes_cqp_request *cqp_request)
-{
- if (atomic_dec_and_test(&cqp_request->refcount))
- nes_free_cqp_request(nesdev, cqp_request);
-}
-
-
-/**
- * nes_post_cqp_request
- */
-void nes_post_cqp_request(struct nes_device *nesdev,
- struct nes_cqp_request *cqp_request)
-{
- struct nes_hw_cqp_wqe *cqp_wqe;
- unsigned long flags;
- u32 cqp_head;
- u64 u64temp;
- u32 opcode;
- int ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX;
-
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
-
- if (((((nesdev->cqp.sq_tail+(nesdev->cqp.sq_size*2))-nesdev->cqp.sq_head) &
- (nesdev->cqp.sq_size - 1)) != 1)
- && (list_empty(&nesdev->cqp_pending_reqs))) {
- cqp_head = nesdev->cqp.sq_head++;
- nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
- cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
- memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
- opcode = le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]);
- if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT)
- ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX;
- barrier();
- u64temp = (unsigned long)cqp_request;
- set_wqe_64bit_value(cqp_wqe->wqe_words, ctx_index, u64temp);
- nes_debug(NES_DBG_CQP, "CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ,"
- " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u,"
- " waiting = %d, refcount = %d.\n",
- opcode & NES_CQP_OPCODE_MASK,
- le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request,
- nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size,
- cqp_request->waiting, atomic_read(&cqp_request->refcount));
-
- barrier();
-
- /* Ring doorbell (1 WQEs) */
- nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x01800000 | nesdev->cqp.qp_id);
-
- barrier();
- } else {
- nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X), line 1 = 0x%08X"
- " put on the pending queue.\n",
- cqp_request,
- le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f,
- le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_ID_IDX]));
- list_add_tail(&cqp_request->list, &nesdev->cqp_pending_reqs);
- }
-
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-
- return;
-}
-
-/**
- * nes_arp_table
- */
-int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 action)
-{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- int arp_index;
- int err = 0;
- __be32 tmp_addr;
-
- for (arp_index = 0; (u32) arp_index < nesadapter->arp_table_size; arp_index++) {
- if (nesadapter->arp_table[arp_index].ip_addr == ip_addr)
- break;
- }
-
- if (action == NES_ARP_ADD) {
- if (arp_index != nesadapter->arp_table_size) {
- return -1;
- }
-
- arp_index = 0;
- err = nes_alloc_resource(nesadapter, nesadapter->allocated_arps,
- nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index, NES_RESOURCE_ARP);
- if (err) {
- nes_debug(NES_DBG_NETDEV, "nes_alloc_resource returned error = %u\n", err);
- return err;
- }
- nes_debug(NES_DBG_NETDEV, "ADD, arp_index=%d\n", arp_index);
-
- nesadapter->arp_table[arp_index].ip_addr = ip_addr;
- memcpy(nesadapter->arp_table[arp_index].mac_addr, mac_addr, ETH_ALEN);
- return arp_index;
- }
-
- /* DELETE or RESOLVE */
- if (arp_index == nesadapter->arp_table_size) {
- tmp_addr = cpu_to_be32(ip_addr);
- nes_debug(NES_DBG_NETDEV, "MAC for %pI4 not in ARP table - cannot %s\n",
- &tmp_addr, action == NES_ARP_RESOLVE ? "resolve" : "delete");
- return -1;
- }
-
- if (action == NES_ARP_RESOLVE) {
- nes_debug(NES_DBG_NETDEV, "RESOLVE, arp_index=%d\n", arp_index);
- return arp_index;
- }
-
- if (action == NES_ARP_DELETE) {
- nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index);
- nesadapter->arp_table[arp_index].ip_addr = 0;
- eth_zero_addr(nesadapter->arp_table[arp_index].mac_addr);
- nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index);
- return arp_index;
- }
-
- return -1;
-}
-
-
-/**
- * nes_mh_fix
- */
-void nes_mh_fix(unsigned long parm)
-{
- unsigned long flags;
- struct nes_device *nesdev = (struct nes_device *)parm;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_vnic *nesvnic;
- u32 used_chunks_tx;
- u32 temp_used_chunks_tx;
- u32 temp_last_used_chunks_tx;
- u32 used_chunks_mask;
- u32 mac_tx_frames_low;
- u32 mac_tx_frames_high;
- u32 mac_tx_pauses;
- u32 serdes_status;
- u32 reset_value;
- u32 tx_control;
- u32 tx_config;
- u32 tx_pause_quanta;
- u32 rx_control;
- u32 rx_config;
- u32 mac_exact_match;
- u32 mpp_debug;
- u32 i=0;
- u32 chunks_tx_progress = 0;
-
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
- if ((nesadapter->mac_sw_state[0] != NES_MAC_SW_IDLE) || (nesadapter->mac_link_down[0])) {
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
- goto no_mh_work;
- }
- nesadapter->mac_sw_state[0] = NES_MAC_SW_MH;
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
- do {
- mac_tx_frames_low = nes_read_indexed(nesdev, NES_IDX_MAC_TX_FRAMES_LOW);
- mac_tx_frames_high = nes_read_indexed(nesdev, NES_IDX_MAC_TX_FRAMES_HIGH);
- mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES);
- used_chunks_tx = nes_read_indexed(nesdev, NES_IDX_USED_CHUNKS_TX);
- nesdev->mac_pause_frames_sent += mac_tx_pauses;
- used_chunks_mask = 0;
- temp_used_chunks_tx = used_chunks_tx;
- temp_last_used_chunks_tx = nesdev->last_used_chunks_tx;
-
- if (nesdev->netdev[0]) {
- nesvnic = netdev_priv(nesdev->netdev[0]);
- } else {
- break;
- }
-
- for (i=0; i<4; i++) {
- used_chunks_mask <<= 8;
- if (nesvnic->qp_nic_index[i] != 0xff) {
- used_chunks_mask |= 0xff;
- if ((temp_used_chunks_tx&0xff)<(temp_last_used_chunks_tx&0xff)) {
- chunks_tx_progress = 1;
- }
- }
- temp_used_chunks_tx >>= 8;
- temp_last_used_chunks_tx >>= 8;
- }
- if ((mac_tx_frames_low) || (mac_tx_frames_high) ||
- (!(used_chunks_tx&used_chunks_mask)) ||
- (!(nesdev->last_used_chunks_tx&used_chunks_mask)) ||
- (chunks_tx_progress) ) {
- nesdev->last_used_chunks_tx = used_chunks_tx;
- break;
- }
- nesdev->last_used_chunks_tx = used_chunks_tx;
- barrier();
-
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, 0x00000005);
- mh_pauses_sent++;
- mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES);
- if (mac_tx_pauses) {
- nesdev->mac_pause_frames_sent += mac_tx_pauses;
- break;
- }
-
- tx_control = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONTROL);
- tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
- tx_pause_quanta = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_QUANTA);
- rx_control = nes_read_indexed(nesdev, NES_IDX_MAC_RX_CONTROL);
- rx_config = nes_read_indexed(nesdev, NES_IDX_MAC_RX_CONFIG);
- mac_exact_match = nes_read_indexed(nesdev, NES_IDX_MAC_EXACT_MATCH_BOTTOM);
- mpp_debug = nes_read_indexed(nesdev, NES_IDX_MPP_DEBUG);
-
- /* one last ditch effort to avoid a false positive */
- mac_tx_pauses = nes_read_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_FRAMES);
- if (mac_tx_pauses) {
- nesdev->last_mac_tx_pauses = nesdev->mac_pause_frames_sent;
- nes_debug(NES_DBG_HW, "failsafe caught slow outbound pause\n");
- break;
- }
- mh_detected++;
-
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, 0x00000000);
- reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
-
- nes_write32(nesdev->regs+NES_SOFTWARE_RESET, reset_value | 0x0000001d);
-
- while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET)
- & 0x00000040) != 0x00000040) && (i++ < 5000)) {
- /* mdelay(1); */
- }
-
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008);
- serdes_status = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0);
-
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE0, 0x0ff00000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET0, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS0, 0x00000000);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0, 0x00000000);
- if (nesadapter->OneG_Mode) {
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0182222);
- } else {
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0042222);
- }
- serdes_status = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_STATUS0);
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff);
-
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONTROL, tx_control);
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_PAUSE_QUANTA, tx_pause_quanta);
- nes_write_indexed(nesdev, NES_IDX_MAC_RX_CONTROL, rx_control);
- nes_write_indexed(nesdev, NES_IDX_MAC_RX_CONFIG, rx_config);
- nes_write_indexed(nesdev, NES_IDX_MAC_EXACT_MATCH_BOTTOM, mac_exact_match);
- nes_write_indexed(nesdev, NES_IDX_MPP_DEBUG, mpp_debug);
-
- } while (0);
-
- nesadapter->mac_sw_state[0] = NES_MAC_SW_IDLE;
-no_mh_work:
- nesdev->nesadapter->mh_timer.expires = jiffies + (HZ/5);
- add_timer(&nesdev->nesadapter->mh_timer);
-}
-
-/**
- * nes_clc
- */
-void nes_clc(unsigned long parm)
-{
- unsigned long flags;
- struct nes_device *nesdev = (struct nes_device *)parm;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
-
- spin_lock_irqsave(&nesadapter->phy_lock, flags);
- nesadapter->link_interrupt_count[0] = 0;
- nesadapter->link_interrupt_count[1] = 0;
- nesadapter->link_interrupt_count[2] = 0;
- nesadapter->link_interrupt_count[3] = 0;
- spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
-
- nesadapter->lc_timer.expires = jiffies + 3600 * HZ; /* 1 hour */
- add_timer(&nesadapter->lc_timer);
-}
-
-
-/**
- * nes_dump_mem
- */
-void nes_dump_mem(unsigned int dump_debug_level, void *addr, int length)
-{
- if (!(nes_debug_level & dump_debug_level)) {
- return;
- }
-
- if (length > 0x100) {
- nes_debug(dump_debug_level, "Length truncated from %x to %x\n", length, 0x100);
- length = 0x100;
- }
- nes_debug(dump_debug_level, "Address=0x%p, length=0x%x (%d)\n", addr, length, length);
-
- print_hex_dump(KERN_ERR, PFX, DUMP_PREFIX_NONE, 16, 1, addr, length, true);
-}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
deleted file mode 100644
index aff9fb14768b..000000000000
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ /dev/null
@@ -1,3922 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/random.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <asm/byteorder.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/iw_cm.h>
-#include <rdma/ib_user_verbs.h>
-
-#include "nes.h"
-
-#include <rdma/ib_umem.h>
-
-atomic_t mod_qp_timouts;
-atomic_t qps_created;
-atomic_t sw_qps_destroyed;
-
-static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev);
-static int nes_dereg_mr(struct ib_mr *ib_mr);
-
-/**
- * nes_alloc_mw
- */
-static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
- struct ib_udata *udata)
-{
- struct nes_pd *nespd = to_nespd(ibpd);
- struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_cqp_request *cqp_request;
- struct nes_mr *nesmr;
- struct ib_mw *ibmw;
- struct nes_hw_cqp_wqe *cqp_wqe;
- int ret;
- u32 stag;
- u32 stag_index = 0;
- u32 next_stag_index = 0;
- u32 driver_key = 0;
- u8 stag_key = 0;
-
- if (type != IB_MW_TYPE_1)
- return ERR_PTR(-EINVAL);
-
- get_random_bytes(&next_stag_index, sizeof(next_stag_index));
- stag_key = (u8)next_stag_index;
-
- driver_key = 0;
-
- next_stag_index >>= 8;
- next_stag_index %= nesadapter->max_mr;
-
- ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
- nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_MW);
- if (ret) {
- return ERR_PTR(ret);
- }
-
- nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
- if (!nesmr) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- return ERR_PTR(-ENOMEM);
- }
-
- stag = stag_index << 8;
- stag |= driver_key;
- stag += (u32)stag_key;
-
- nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n",
- stag, stag_index);
-
- /* Register the region with the adapter */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- kfree(nesmr);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- return ERR_PTR(-ENOMEM);
- }
-
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] =
- cpu_to_le32( NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_RIGHTS_REMOTE_READ |
- NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_VA_TO |
- NES_CQP_STAG_REM_ACC_EN);
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff));
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
- stag, ret, cqp_request->major_code, cqp_request->minor_code);
- if ((!ret) || (cqp_request->major_code)) {
- nes_put_cqp_request(nesdev, cqp_request);
- kfree(nesmr);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- if (!ret) {
- return ERR_PTR(-ETIME);
- } else {
- return ERR_PTR(-ENOMEM);
- }
- }
- nes_put_cqp_request(nesdev, cqp_request);
-
- nesmr->ibmw.rkey = stag;
- nesmr->mode = IWNES_MEMREG_TYPE_MW;
- ibmw = &nesmr->ibmw;
- nesmr->pbl_4k = 0;
- nesmr->pbls_used = 0;
-
- return ibmw;
-}
-
-
-/**
- * nes_dealloc_mw
- */
-static int nes_dealloc_mw(struct ib_mw *ibmw)
-{
- struct nes_mr *nesmr = to_nesmw(ibmw);
- struct nes_vnic *nesvnic = to_nesvnic(ibmw->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- int err = 0;
- int ret;
-
- /* Deallocate the window with the adapter */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
- return -ENOMEM;
- }
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, NES_CQP_DEALLOCATE_STAG);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ibmw->rkey);
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X to complete.\n",
- ibmw->rkey);
- ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_MR, "Deallocate STag completed, wait_event_timeout ret = %u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
- ret, cqp_request->major_code, cqp_request->minor_code);
- if (!ret)
- err = -ETIME;
- else if (cqp_request->major_code)
- err = -EIO;
-
- nes_put_cqp_request(nesdev, cqp_request);
-
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- (ibmw->rkey & 0x0fffff00) >> 8);
- kfree(nesmr);
-
- return err;
-}
-
-
-/*
- * nes_alloc_fast_mr
- */
-static int alloc_fast_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
- u32 stag, u32 page_count)
-{
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- unsigned long flags;
- int ret;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 opcode = 0;
- u16 major_code;
- u64 region_length = page_count * PAGE_SIZE;
-
-
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
- return -ENOMEM;
- }
- nes_debug(NES_DBG_MR, "alloc_fast_reg_mr: page_count = %d, "
- "region_length = %llu\n",
- page_count, region_length);
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- if (nesadapter->free_4kpbl > 0) {
- nesadapter->free_4kpbl--;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- } else {
- /* No 4kpbl's available: */
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- nes_debug(NES_DBG_MR, "Out of Pbls\n");
- nes_free_cqp_request(nesdev, cqp_request);
- return -ENOMEM;
- }
-
- opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_MR |
- NES_CQP_STAG_PBL_BLK_SIZE | NES_CQP_STAG_VA_TO |
- NES_CQP_STAG_REM_ACC_EN;
- /*
- * The current OFED API does not support the zero based TO option.
- * If added then need to changed the NES_CQP_STAG_VA* option. Also,
- * the API does not support that ability to have the MR set for local
- * access only when created and not allow the SQ op to override. Given
- * this the remote enable must be set here.
- */
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, 1);
-
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] =
- cpu_to_le32((u32)(region_length >> 8) & 0xff000000);
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |=
- cpu_to_le32(nespd->pd_id & 0x00007fff);
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, 0);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, 0);
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, 0);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (page_count * 8));
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE);
- barrier();
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- ret = wait_event_timeout(cqp_request->waitq,
- (0 != cqp_request->request_done),
- NES_EVENT_TIMEOUT);
-
- nes_debug(NES_DBG_MR, "Allocate STag 0x%08X completed, "
- "wait_event_timeout ret = %u, CQP Major:Minor codes = "
- "0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code,
- cqp_request->minor_code);
- major_code = cqp_request->major_code;
- nes_put_cqp_request(nesdev, cqp_request);
-
- if (!ret || major_code) {
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- nesadapter->free_4kpbl++;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- }
-
- if (!ret)
- return -ETIME;
- else if (major_code)
- return -EIO;
- return 0;
-}
-
-/*
- * nes_alloc_mr
- */
-static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd,
- enum ib_mr_type mr_type,
- u32 max_num_sg)
-{
- struct nes_pd *nespd = to_nespd(ibpd);
- struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
-
- u32 next_stag_index;
- u8 stag_key = 0;
- u32 driver_key = 0;
- int err = 0;
- u32 stag_index = 0;
- struct nes_mr *nesmr;
- u32 stag;
- int ret;
- struct ib_mr *ibmr;
-
- if (mr_type != IB_MR_TYPE_MEM_REG)
- return ERR_PTR(-EINVAL);
-
- if (max_num_sg > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
- return ERR_PTR(-E2BIG);
-
-/*
- * Note: Set to always use a fixed length single page entry PBL. This is to allow
- * for the fast_reg_mr operation to always know the size of the PBL.
- */
- if (max_num_sg > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
- return ERR_PTR(-E2BIG);
-
- get_random_bytes(&next_stag_index, sizeof(next_stag_index));
- stag_key = (u8)next_stag_index;
- next_stag_index >>= 8;
- next_stag_index %= nesadapter->max_mr;
-
- err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
- nesadapter->max_mr, &stag_index,
- &next_stag_index, NES_RESOURCE_FAST_MR);
- if (err)
- return ERR_PTR(err);
-
- nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
- if (!nesmr) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- return ERR_PTR(-ENOMEM);
- }
-
- stag = stag_index << 8;
- stag |= driver_key;
- stag += (u32)stag_key;
-
- nes_debug(NES_DBG_MR, "Allocating STag 0x%08X index = 0x%08X\n",
- stag, stag_index);
-
- ret = alloc_fast_reg_mr(nesdev, nespd, stag, max_num_sg);
-
- if (ret == 0) {
- nesmr->ibmr.rkey = stag;
- nesmr->ibmr.lkey = stag;
- nesmr->mode = IWNES_MEMREG_TYPE_FMEM;
- ibmr = &nesmr->ibmr;
- } else {
- kfree(nesmr);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- return ERR_PTR(-ENOMEM);
- }
-
- nesmr->pages = pci_alloc_consistent(nesdev->pcidev,
- max_num_sg * sizeof(u64),
- &nesmr->paddr);
- if (!nesmr->paddr)
- goto err;
-
- nesmr->max_pages = max_num_sg;
-
- return ibmr;
-
-err:
- nes_dereg_mr(ibmr);
-
- return ERR_PTR(-ENOMEM);
-}
-
-static int nes_set_page(struct ib_mr *ibmr, u64 addr)
-{
- struct nes_mr *nesmr = to_nesmr(ibmr);
-
- if (unlikely(nesmr->npages == nesmr->max_pages))
- return -ENOMEM;
-
- nesmr->pages[nesmr->npages++] = cpu_to_le64(addr);
-
- return 0;
-}
-
-static int nes_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
- int sg_nents, unsigned int *sg_offset)
-{
- struct nes_mr *nesmr = to_nesmr(ibmr);
-
- nesmr->npages = 0;
-
- return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, nes_set_page);
-}
-
-/**
- * nes_query_device
- */
-static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
- struct ib_udata *uhw)
-{
- struct nes_vnic *nesvnic = to_nesvnic(ibdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_ib_device *nesibdev = nesvnic->nesibdev;
-
- if (uhw->inlen || uhw->outlen)
- return -EINVAL;
-
- memset(props, 0, sizeof(*props));
- memcpy(&props->sys_image_guid, nesvnic->netdev->dev_addr, 6);
-
- props->fw_ver = nesdev->nesadapter->firmware_version;
- props->device_cap_flags = nesdev->nesadapter->device_cap_flags;
- props->vendor_id = nesdev->nesadapter->vendor_id;
- props->vendor_part_id = nesdev->nesadapter->vendor_part_id;
- props->hw_ver = nesdev->nesadapter->hw_rev;
- props->max_mr_size = 0x80000000;
- props->max_qp = nesibdev->max_qp;
- props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2;
- props->max_sge = nesdev->nesadapter->max_sge;
- props->max_cq = nesibdev->max_cq;
- props->max_cqe = nesdev->nesadapter->max_cqe;
- props->max_mr = nesibdev->max_mr;
- props->max_mw = nesibdev->max_mr;
- props->max_pd = nesibdev->max_pd;
- props->max_sge_rd = 1;
- switch (nesdev->nesadapter->max_irrq_wr) {
- case 0:
- props->max_qp_rd_atom = 2;
- break;
- case 1:
- props->max_qp_rd_atom = 8;
- break;
- case 2:
- props->max_qp_rd_atom = 32;
- break;
- case 3:
- props->max_qp_rd_atom = 64;
- break;
- default:
- props->max_qp_rd_atom = 0;
- }
- props->max_qp_init_rd_atom = props->max_qp_rd_atom;
- props->atomic_cap = IB_ATOMIC_NONE;
- props->max_map_per_fmr = 1;
-
- return 0;
-}
-
-
-/**
- * nes_query_port
- */
-static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props)
-{
- struct nes_vnic *nesvnic = to_nesvnic(ibdev);
- struct net_device *netdev = nesvnic->netdev;
-
- memset(props, 0, sizeof(*props));
-
- props->max_mtu = IB_MTU_4096;
-
- if (netdev->mtu >= 4096)
- props->active_mtu = IB_MTU_4096;
- else if (netdev->mtu >= 2048)
- props->active_mtu = IB_MTU_2048;
- else if (netdev->mtu >= 1024)
- props->active_mtu = IB_MTU_1024;
- else if (netdev->mtu >= 512)
- props->active_mtu = IB_MTU_512;
- else
- props->active_mtu = IB_MTU_256;
-
- props->lid = 1;
- props->lmc = 0;
- props->sm_lid = 0;
- props->sm_sl = 0;
- if (netif_queue_stopped(netdev))
- props->state = IB_PORT_DOWN;
- else if (nesvnic->linkup)
- props->state = IB_PORT_ACTIVE;
- else
- props->state = IB_PORT_DOWN;
- props->phys_state = 0;
- props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
- IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
- props->gid_tbl_len = 1;
- props->pkey_tbl_len = 1;
- props->qkey_viol_cntr = 0;
- props->active_width = IB_WIDTH_4X;
- props->active_speed = IB_SPEED_SDR;
- props->max_msg_sz = 0x80000000;
-
- return 0;
-}
-
-/**
- * nes_query_pkey
- */
-static int nes_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
-{
- *pkey = 0;
- return 0;
-}
-
-
-/**
- * nes_query_gid
- */
-static int nes_query_gid(struct ib_device *ibdev, u8 port,
- int index, union ib_gid *gid)
-{
- struct nes_vnic *nesvnic = to_nesvnic(ibdev);
-
- memset(&(gid->raw[0]), 0, sizeof(gid->raw));
- memcpy(&(gid->raw[0]), nesvnic->netdev->dev_addr, 6);
-
- return 0;
-}
-
-
-/**
- * nes_alloc_ucontext - Allocate the user context data structure. This keeps track
- * of all objects associated with a particular user-mode client.
- */
-static struct ib_ucontext *nes_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
-{
- struct nes_vnic *nesvnic = to_nesvnic(ibdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_alloc_ucontext_req req;
- struct nes_alloc_ucontext_resp uresp;
- struct nes_ucontext *nes_ucontext;
- struct nes_ib_device *nesibdev = nesvnic->nesibdev;
-
-
- if (ib_copy_from_udata(&req, udata, sizeof(struct nes_alloc_ucontext_req))) {
- printk(KERN_ERR PFX "Invalid structure size on allocate user context.\n");
- return ERR_PTR(-EINVAL);
- }
-
- if (req.userspace_ver != NES_ABI_USERSPACE_VER) {
- printk(KERN_ERR PFX "Invalid userspace driver version detected. Detected version %d, should be %d\n",
- req.userspace_ver, NES_ABI_USERSPACE_VER);
- return ERR_PTR(-EINVAL);
- }
-
-
- memset(&uresp, 0, sizeof uresp);
-
- uresp.max_qps = nesibdev->max_qp;
- uresp.max_pds = nesibdev->max_pd;
- uresp.wq_size = nesdev->nesadapter->max_qp_wr * 2;
- uresp.virtwq = nesadapter->virtwq;
- uresp.kernel_ver = NES_ABI_KERNEL_VER;
-
- nes_ucontext = kzalloc(sizeof *nes_ucontext, GFP_KERNEL);
- if (!nes_ucontext)
- return ERR_PTR(-ENOMEM);
-
- nes_ucontext->nesdev = nesdev;
- nes_ucontext->mmap_wq_offset = uresp.max_pds;
- nes_ucontext->mmap_cq_offset = nes_ucontext->mmap_wq_offset +
- ((sizeof(struct nes_hw_qp_wqe) * uresp.max_qps * 2) + PAGE_SIZE-1) /
- PAGE_SIZE;
-
-
- if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
- kfree(nes_ucontext);
- return ERR_PTR(-EFAULT);
- }
-
- INIT_LIST_HEAD(&nes_ucontext->cq_reg_mem_list);
- INIT_LIST_HEAD(&nes_ucontext->qp_reg_mem_list);
- atomic_set(&nes_ucontext->usecnt, 1);
- return &nes_ucontext->ibucontext;
-}
-
-
-/**
- * nes_dealloc_ucontext
- */
-static int nes_dealloc_ucontext(struct ib_ucontext *context)
-{
- /* struct nes_vnic *nesvnic = to_nesvnic(context->device); */
- /* struct nes_device *nesdev = nesvnic->nesdev; */
- struct nes_ucontext *nes_ucontext = to_nesucontext(context);
-
- if (!atomic_dec_and_test(&nes_ucontext->usecnt))
- return 0;
- kfree(nes_ucontext);
- return 0;
-}
-
-
-/**
- * nes_mmap
- */
-static int nes_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
- unsigned long index;
- struct nes_vnic *nesvnic = to_nesvnic(context->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- /* struct nes_adapter *nesadapter = nesdev->nesadapter; */
- struct nes_ucontext *nes_ucontext;
- struct nes_qp *nesqp;
-
- nes_ucontext = to_nesucontext(context);
-
-
- if (vma->vm_pgoff >= nes_ucontext->mmap_wq_offset) {
- index = (vma->vm_pgoff - nes_ucontext->mmap_wq_offset) * PAGE_SIZE;
- index /= ((sizeof(struct nes_hw_qp_wqe) * nesdev->nesadapter->max_qp_wr * 2) +
- PAGE_SIZE-1) & (~(PAGE_SIZE-1));
- if (!test_bit(index, nes_ucontext->allocated_wqs)) {
- nes_debug(NES_DBG_MMAP, "wq %lu not allocated\n", index);
- return -EFAULT;
- }
- nesqp = nes_ucontext->mmap_nesqp[index];
- if (nesqp == NULL) {
- nes_debug(NES_DBG_MMAP, "wq %lu has a NULL QP base.\n", index);
- return -EFAULT;
- }
- if (remap_pfn_range(vma, vma->vm_start,
- virt_to_phys(nesqp->hwqp.sq_vbase) >> PAGE_SHIFT,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot)) {
- nes_debug(NES_DBG_MMAP, "remap_pfn_range failed.\n");
- return -EAGAIN;
- }
- vma->vm_private_data = nesqp;
- return 0;
- } else {
- index = vma->vm_pgoff;
- if (!test_bit(index, nes_ucontext->allocated_doorbells))
- return -EFAULT;
-
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- if (io_remap_pfn_range(vma, vma->vm_start,
- (nesdev->doorbell_start +
- ((nes_ucontext->mmap_db_index[index] - nesdev->base_doorbell_index) * 4096))
- >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot))
- return -EAGAIN;
- vma->vm_private_data = nes_ucontext;
- return 0;
- }
-
- return -ENOSYS;
-}
-
-
-/**
- * nes_alloc_pd
- */
-static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
- struct ib_ucontext *context, struct ib_udata *udata)
-{
- struct nes_pd *nespd;
- struct nes_vnic *nesvnic = to_nesvnic(ibdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_ucontext *nesucontext;
- struct nes_alloc_pd_resp uresp;
- u32 pd_num = 0;
- int err;
-
- nes_debug(NES_DBG_PD, "nesvnic=%p, netdev=%p %s, ibdev=%p, context=%p, netdev refcnt=%u\n",
- nesvnic, nesdev->netdev[0], nesdev->netdev[0]->name, ibdev, context,
- netdev_refcnt_read(nesvnic->netdev));
-
- err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds,
- nesadapter->max_pd, &pd_num, &nesadapter->next_pd, NES_RESOURCE_PD);
- if (err) {
- return ERR_PTR(err);
- }
-
- nespd = kzalloc(sizeof (struct nes_pd), GFP_KERNEL);
- if (!nespd) {
- nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num);
- return ERR_PTR(-ENOMEM);
- }
-
- nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n",
- nespd, nesvnic->nesibdev->ibdev.name);
-
- nespd->pd_id = (pd_num << (PAGE_SHIFT-12)) + nesadapter->base_pd;
-
- if (context) {
- nesucontext = to_nesucontext(context);
- nespd->mmap_db_index = find_next_zero_bit(nesucontext->allocated_doorbells,
- NES_MAX_USER_DB_REGIONS, nesucontext->first_free_db);
- nes_debug(NES_DBG_PD, "find_first_zero_biton doorbells returned %u, mapping pd_id %u.\n",
- nespd->mmap_db_index, nespd->pd_id);
- if (nespd->mmap_db_index >= NES_MAX_USER_DB_REGIONS) {
- nes_debug(NES_DBG_PD, "mmap_db_index > MAX\n");
- nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num);
- kfree(nespd);
- return ERR_PTR(-ENOMEM);
- }
-
- uresp.pd_id = nespd->pd_id;
- uresp.mmap_db_index = nespd->mmap_db_index;
- if (ib_copy_to_udata(udata, &uresp, sizeof (struct nes_alloc_pd_resp))) {
- nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num);
- kfree(nespd);
- return ERR_PTR(-EFAULT);
- }
-
- set_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells);
- nesucontext->mmap_db_index[nespd->mmap_db_index] = nespd->pd_id;
- nesucontext->first_free_db = nespd->mmap_db_index + 1;
- }
-
- nes_debug(NES_DBG_PD, "PD%u structure located @%p.\n", nespd->pd_id, nespd);
- return &nespd->ibpd;
-}
-
-
-/**
- * nes_dealloc_pd
- */
-static int nes_dealloc_pd(struct ib_pd *ibpd)
-{
- struct nes_ucontext *nesucontext;
- struct nes_pd *nespd = to_nespd(ibpd);
- struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
-
- if ((ibpd->uobject) && (ibpd->uobject->context)) {
- nesucontext = to_nesucontext(ibpd->uobject->context);
- nes_debug(NES_DBG_PD, "Clearing bit %u from allocated doorbells\n",
- nespd->mmap_db_index);
- clear_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells);
- nesucontext->mmap_db_index[nespd->mmap_db_index] = 0;
- if (nesucontext->first_free_db > nespd->mmap_db_index) {
- nesucontext->first_free_db = nespd->mmap_db_index;
- }
- }
-
- nes_debug(NES_DBG_PD, "Deallocating PD%u structure located @%p.\n",
- nespd->pd_id, nespd);
- nes_free_resource(nesadapter, nesadapter->allocated_pds,
- (nespd->pd_id-nesadapter->base_pd)>>(PAGE_SHIFT-12));
- kfree(nespd);
-
- return 0;
-}
-
-
-/**
- * nes_create_ah
- */
-static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
- struct ib_udata *udata)
-{
- return ERR_PTR(-ENOSYS);
-}
-
-
-/**
- * nes_destroy_ah
- */
-static int nes_destroy_ah(struct ib_ah *ah)
-{
- return -ENOSYS;
-}
-
-
-/**
- * nes_get_encoded_size
- */
-static inline u8 nes_get_encoded_size(int *size)
-{
- u8 encoded_size = 0;
- if (*size <= 32) {
- *size = 32;
- encoded_size = 1;
- } else if (*size <= 128) {
- *size = 128;
- encoded_size = 2;
- } else if (*size <= 512) {
- *size = 512;
- encoded_size = 3;
- }
- return (encoded_size);
-}
-
-
-
-/**
- * nes_setup_virt_qp
- */
-static int nes_setup_virt_qp(struct nes_qp *nesqp, struct nes_pbl *nespbl,
- struct nes_vnic *nesvnic, int sq_size, int rq_size)
-{
- unsigned long flags;
- void *mem;
- __le64 *pbl = NULL;
- __le64 *tpbl;
- __le64 *pblbuffer;
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- u32 pbl_entries;
- u8 rq_pbl_entries;
- u8 sq_pbl_entries;
-
- pbl_entries = nespbl->pbl_size >> 3;
- nes_debug(NES_DBG_QP, "Userspace PBL, pbl_size=%u, pbl_entries = %d pbl_vbase=%p, pbl_pbase=%lx\n",
- nespbl->pbl_size, pbl_entries,
- (void *)nespbl->pbl_vbase,
- (unsigned long) nespbl->pbl_pbase);
- pbl = (__le64 *) nespbl->pbl_vbase; /* points to first pbl entry */
- /* now lets set the sq_vbase as well as rq_vbase addrs we will assign */
- /* the first pbl to be fro the rq_vbase... */
- rq_pbl_entries = (rq_size * sizeof(struct nes_hw_qp_wqe)) >> 12;
- sq_pbl_entries = (sq_size * sizeof(struct nes_hw_qp_wqe)) >> 12;
- nesqp->hwqp.sq_pbase = (le32_to_cpu(((__le32 *)pbl)[0])) | ((u64)((le32_to_cpu(((__le32 *)pbl)[1]))) << 32);
- if (!nespbl->page) {
- nes_debug(NES_DBG_QP, "QP nespbl->page is NULL \n");
- kfree(nespbl);
- return -ENOMEM;
- }
-
- nesqp->hwqp.sq_vbase = kmap(nespbl->page);
- nesqp->page = nespbl->page;
- if (!nesqp->hwqp.sq_vbase) {
- nes_debug(NES_DBG_QP, "QP sq_vbase kmap failed\n");
- kfree(nespbl);
- return -ENOMEM;
- }
-
- /* Now to get to sq.. we need to calculate how many */
- /* PBL entries were used by the rq.. */
- pbl += sq_pbl_entries;
- nesqp->hwqp.rq_pbase = (le32_to_cpu(((__le32 *)pbl)[0])) | ((u64)((le32_to_cpu(((__le32 *)pbl)[1]))) << 32);
- /* nesqp->hwqp.rq_vbase = bus_to_virt(*pbl); */
- /*nesqp->hwqp.rq_vbase = phys_to_virt(*pbl); */
-
- nes_debug(NES_DBG_QP, "QP sq_vbase= %p sq_pbase=%lx rq_vbase=%p rq_pbase=%lx\n",
- nesqp->hwqp.sq_vbase, (unsigned long) nesqp->hwqp.sq_pbase,
- nesqp->hwqp.rq_vbase, (unsigned long) nesqp->hwqp.rq_pbase);
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- if (!nesadapter->free_256pbl) {
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
- nespbl->pbl_pbase);
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- kunmap(nesqp->page);
- kfree(nespbl);
- return -ENOMEM;
- }
- nesadapter->free_256pbl--;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-
- nesqp->pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 256, &nesqp->pbl_pbase);
- pblbuffer = nesqp->pbl_vbase;
- if (!nesqp->pbl_vbase) {
- /* memory allocated during nes_reg_user_mr() */
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
- nespbl->pbl_pbase);
- kfree(nespbl);
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- nesadapter->free_256pbl++;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- kunmap(nesqp->page);
- return -ENOMEM;
- }
- memset(nesqp->pbl_vbase, 0, 256);
- /* fill in the page address in the pbl buffer.. */
- tpbl = pblbuffer + 16;
- pbl = (__le64 *)nespbl->pbl_vbase;
- while (sq_pbl_entries--)
- *tpbl++ = *pbl++;
- tpbl = pblbuffer;
- while (rq_pbl_entries--)
- *tpbl++ = *pbl++;
-
- /* done with memory allocated during nes_reg_user_mr() */
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
- nespbl->pbl_pbase);
- kfree(nespbl);
-
- nesqp->qp_mem_size =
- max((u32)sizeof(struct nes_qp_context), ((u32)256)) + 256; /* this is Q2 */
- /* Round up to a multiple of a page */
- nesqp->qp_mem_size += PAGE_SIZE - 1;
- nesqp->qp_mem_size &= ~(PAGE_SIZE - 1);
-
- mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size,
- &nesqp->hwqp.q2_pbase);
-
- if (!mem) {
- pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase);
- nesqp->pbl_vbase = NULL;
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- nesadapter->free_256pbl++;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- kunmap(nesqp->page);
- return -ENOMEM;
- }
- nesqp->sq_kmapped = 1;
- nesqp->hwqp.q2_vbase = mem;
- mem += 256;
- memset(nesqp->hwqp.q2_vbase, 0, 256);
- nesqp->nesqp_context = mem;
- memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context));
- nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256;
-
- return 0;
-}
-
-
-/**
- * nes_setup_mmap_qp
- */
-static int nes_setup_mmap_qp(struct nes_qp *nesqp, struct nes_vnic *nesvnic,
- int sq_size, int rq_size)
-{
- void *mem;
- struct nes_device *nesdev = nesvnic->nesdev;
-
- nesqp->qp_mem_size = (sizeof(struct nes_hw_qp_wqe) * sq_size) +
- (sizeof(struct nes_hw_qp_wqe) * rq_size) +
- max((u32)sizeof(struct nes_qp_context), ((u32)256)) +
- 256; /* this is Q2 */
- /* Round up to a multiple of a page */
- nesqp->qp_mem_size += PAGE_SIZE - 1;
- nesqp->qp_mem_size &= ~(PAGE_SIZE - 1);
-
- mem = pci_alloc_consistent(nesdev->pcidev, nesqp->qp_mem_size,
- &nesqp->hwqp.sq_pbase);
- if (!mem)
- return -ENOMEM;
- nes_debug(NES_DBG_QP, "PCI consistent memory for "
- "host descriptor rings located @ %p (pa = 0x%08lX.) size = %u.\n",
- mem, (unsigned long)nesqp->hwqp.sq_pbase, nesqp->qp_mem_size);
-
- memset(mem, 0, nesqp->qp_mem_size);
-
- nesqp->hwqp.sq_vbase = mem;
- mem += sizeof(struct nes_hw_qp_wqe) * sq_size;
-
- nesqp->hwqp.rq_vbase = mem;
- nesqp->hwqp.rq_pbase = nesqp->hwqp.sq_pbase +
- sizeof(struct nes_hw_qp_wqe) * sq_size;
- mem += sizeof(struct nes_hw_qp_wqe) * rq_size;
-
- nesqp->hwqp.q2_vbase = mem;
- nesqp->hwqp.q2_pbase = nesqp->hwqp.rq_pbase +
- sizeof(struct nes_hw_qp_wqe) * rq_size;
- mem += 256;
- memset(nesqp->hwqp.q2_vbase, 0, 256);
-
- nesqp->nesqp_context = mem;
- nesqp->nesqp_context_pbase = nesqp->hwqp.q2_pbase + 256;
- memset(nesqp->nesqp_context, 0, sizeof(*nesqp->nesqp_context));
- return 0;
-}
-
-
-/**
- * nes_free_qp_mem() is to free up the qp's pci_alloc_consistent() memory.
- */
-static void nes_free_qp_mem(struct nes_device *nesdev,
- struct nes_qp *nesqp, int virt_wqs)
-{
- unsigned long flags;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- if (!virt_wqs) {
- pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size,
- nesqp->hwqp.sq_vbase, nesqp->hwqp.sq_pbase);
- }else {
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- nesadapter->free_256pbl++;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase);
- pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase );
- nesqp->pbl_vbase = NULL;
- if (nesqp->sq_kmapped) {
- nesqp->sq_kmapped = 0;
- kunmap(nesqp->page);
- }
- }
-}
-
-
-/**
- * nes_create_qp
- */
-static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *init_attr, struct ib_udata *udata)
-{
- u64 u64temp= 0;
- u64 u64nesqp = 0;
- struct nes_pd *nespd = to_nespd(ibpd);
- struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_qp *nesqp;
- struct nes_cq *nescq;
- struct nes_ucontext *nes_ucontext;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- struct nes_create_qp_req req;
- struct nes_create_qp_resp uresp;
- struct nes_pbl *nespbl = NULL;
- u32 qp_num = 0;
- u32 opcode = 0;
- /* u32 counter = 0; */
- void *mem;
- unsigned long flags;
- int ret;
- int err;
- int virt_wqs = 0;
- int sq_size;
- int rq_size;
- u8 sq_encoded_size;
- u8 rq_encoded_size;
- /* int counter; */
-
- if (init_attr->create_flags)
- return ERR_PTR(-EINVAL);
-
- atomic_inc(&qps_created);
- switch (init_attr->qp_type) {
- case IB_QPT_RC:
- if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) {
- init_attr->cap.max_inline_data = 0;
- } else {
- init_attr->cap.max_inline_data = 64;
- }
- sq_size = init_attr->cap.max_send_wr;
- rq_size = init_attr->cap.max_recv_wr;
-
- /* check if the encoded sizes are OK or not... */
- sq_encoded_size = nes_get_encoded_size(&sq_size);
- rq_encoded_size = nes_get_encoded_size(&rq_size);
-
- if ((!sq_encoded_size) || (!rq_encoded_size)) {
- nes_debug(NES_DBG_QP, "ERROR bad rq (%u) or sq (%u) size\n",
- rq_size, sq_size);
- return ERR_PTR(-EINVAL);
- }
-
- init_attr->cap.max_send_wr = sq_size -2;
- init_attr->cap.max_recv_wr = rq_size -1;
- nes_debug(NES_DBG_QP, "RQ size=%u, SQ Size=%u\n", rq_size, sq_size);
-
- ret = nes_alloc_resource(nesadapter, nesadapter->allocated_qps,
- nesadapter->max_qp, &qp_num, &nesadapter->next_qp, NES_RESOURCE_QP);
- if (ret) {
- return ERR_PTR(ret);
- }
-
- /* Need 512 (actually now 1024) byte alignment on this structure */
- mem = kzalloc(sizeof(*nesqp)+NES_SW_CONTEXT_ALIGN-1, GFP_KERNEL);
- if (!mem) {
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- return ERR_PTR(-ENOMEM);
- }
- u64nesqp = (unsigned long)mem;
- u64nesqp += ((u64)NES_SW_CONTEXT_ALIGN) - 1;
- u64temp = ((u64)NES_SW_CONTEXT_ALIGN) - 1;
- u64nesqp &= ~u64temp;
- nesqp = (struct nes_qp *)(unsigned long)u64nesqp;
- /* nes_debug(NES_DBG_QP, "nesqp=%p, allocated buffer=%p. Rounded to closest %u\n",
- nesqp, mem, NES_SW_CONTEXT_ALIGN); */
- nesqp->allocated_buffer = mem;
-
- if (udata) {
- if (ib_copy_from_udata(&req, udata, sizeof(struct nes_create_qp_req))) {
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- kfree(nesqp->allocated_buffer);
- nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n");
- return ERR_PTR(-EFAULT);
- }
- if (req.user_wqe_buffers) {
- virt_wqs = 1;
- }
- if (req.user_qp_buffer)
- nesqp->nesuqp_addr = req.user_qp_buffer;
- if ((ibpd->uobject) && (ibpd->uobject->context)) {
- nesqp->user_mode = 1;
- nes_ucontext = to_nesucontext(ibpd->uobject->context);
- if (virt_wqs) {
- err = 1;
- list_for_each_entry(nespbl, &nes_ucontext->qp_reg_mem_list, list) {
- if (nespbl->user_base == (unsigned long )req.user_wqe_buffers) {
- list_del(&nespbl->list);
- err = 0;
- nes_debug(NES_DBG_QP, "Found PBL for virtual QP. nespbl=%p. user_base=0x%lx\n",
- nespbl, nespbl->user_base);
- break;
- }
- }
- if (err) {
- nes_debug(NES_DBG_QP, "Didn't Find PBL for virtual QP. address = %llx.\n",
- (long long unsigned int)req.user_wqe_buffers);
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- kfree(nesqp->allocated_buffer);
- return ERR_PTR(-EFAULT);
- }
- }
-
- nes_ucontext = to_nesucontext(ibpd->uobject->context);
- nesqp->mmap_sq_db_index =
- find_next_zero_bit(nes_ucontext->allocated_wqs,
- NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq);
- /* nes_debug(NES_DBG_QP, "find_first_zero_biton wqs returned %u\n",
- nespd->mmap_db_index); */
- if (nesqp->mmap_sq_db_index >= NES_MAX_USER_WQ_REGIONS) {
- nes_debug(NES_DBG_QP,
- "db index > max user regions, failing create QP\n");
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- if (virt_wqs) {
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
- nespbl->pbl_pbase);
- kfree(nespbl);
- }
- kfree(nesqp->allocated_buffer);
- return ERR_PTR(-ENOMEM);
- }
- set_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs);
- nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = nesqp;
- nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index + 1;
- } else {
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- kfree(nesqp->allocated_buffer);
- return ERR_PTR(-EFAULT);
- }
- }
- err = (!virt_wqs) ? nes_setup_mmap_qp(nesqp, nesvnic, sq_size, rq_size) :
- nes_setup_virt_qp(nesqp, nespbl, nesvnic, sq_size, rq_size);
- if (err) {
- nes_debug(NES_DBG_QP,
- "error geting qp mem code = %d\n", err);
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- kfree(nesqp->allocated_buffer);
- return ERR_PTR(-ENOMEM);
- }
-
- nesqp->hwqp.sq_size = sq_size;
- nesqp->hwqp.sq_encoded_size = sq_encoded_size;
- nesqp->hwqp.sq_head = 1;
- nesqp->hwqp.rq_size = rq_size;
- nesqp->hwqp.rq_encoded_size = rq_encoded_size;
- /* nes_debug(NES_DBG_QP, "nesqp->nesqp_context_pbase = %p\n",
- (void *)nesqp->nesqp_context_pbase);
- */
- nesqp->hwqp.qp_id = qp_num;
- nesqp->ibqp.qp_num = nesqp->hwqp.qp_id;
- nesqp->nespd = nespd;
-
- nescq = to_nescq(init_attr->send_cq);
- nesqp->nesscq = nescq;
- nescq = to_nescq(init_attr->recv_cq);
- nesqp->nesrcq = nescq;
-
- nesqp->nesqp_context->misc |= cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) <<
- NES_QPCONTEXT_MISC_PCI_FCN_SHIFT);
- nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.rq_encoded_size <<
- NES_QPCONTEXT_MISC_RQ_SIZE_SHIFT);
- nesqp->nesqp_context->misc |= cpu_to_le32((u32)nesqp->hwqp.sq_encoded_size <<
- NES_QPCONTEXT_MISC_SQ_SIZE_SHIFT);
- if (!udata) {
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_PRIV_EN);
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_FAST_REGISTER_EN);
- }
- nesqp->nesqp_context->cqs = cpu_to_le32(nesqp->nesscq->hw_cq.cq_number +
- ((u32)nesqp->nesrcq->hw_cq.cq_number << 16));
- u64temp = (u64)nesqp->hwqp.sq_pbase;
- nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp);
- nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
-
-
- if (!virt_wqs) {
- u64temp = (u64)nesqp->hwqp.sq_pbase;
- nesqp->nesqp_context->sq_addr_low = cpu_to_le32((u32)u64temp);
- nesqp->nesqp_context->sq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
- u64temp = (u64)nesqp->hwqp.rq_pbase;
- nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp);
- nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
- } else {
- u64temp = (u64)nesqp->pbl_pbase;
- nesqp->nesqp_context->rq_addr_low = cpu_to_le32((u32)u64temp);
- nesqp->nesqp_context->rq_addr_high = cpu_to_le32((u32)(u64temp >> 32));
- }
-
- /* nes_debug(NES_DBG_QP, "next_qp_nic_index=%u, using nic_index=%d\n",
- nesvnic->next_qp_nic_index,
- nesvnic->qp_nic_index[nesvnic->next_qp_nic_index]); */
- spin_lock_irqsave(&nesdev->cqp.lock, flags);
- nesqp->nesqp_context->misc2 |= cpu_to_le32(
- (u32)nesvnic->qp_nic_index[nesvnic->next_qp_nic_index] <<
- NES_QPCONTEXT_MISC2_NIC_INDEX_SHIFT);
- nesvnic->next_qp_nic_index++;
- if ((nesvnic->next_qp_nic_index > 3) ||
- (nesvnic->qp_nic_index[nesvnic->next_qp_nic_index] == 0xf)) {
- nesvnic->next_qp_nic_index = 0;
- }
- spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-
- nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32((u32)nesqp->nespd->pd_id << 16);
- u64temp = (u64)nesqp->hwqp.q2_pbase;
- nesqp->nesqp_context->q2_addr_low = cpu_to_le32((u32)u64temp);
- nesqp->nesqp_context->q2_addr_high = cpu_to_le32((u32)(u64temp >> 32));
- nesqp->nesqp_context->aeq_token_low = cpu_to_le32((u32)((unsigned long)(nesqp)));
- nesqp->nesqp_context->aeq_token_high = cpu_to_le32((u32)(upper_32_bits((unsigned long)(nesqp))));
- nesqp->nesqp_context->ird_ord_sizes = cpu_to_le32(NES_QPCONTEXT_ORDIRD_ALSMM |
- NES_QPCONTEXT_ORDIRD_AAH |
- ((((u32)nesadapter->max_irrq_wr) <<
- NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT) & NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK));
- if (disable_mpa_crc) {
- nes_debug(NES_DBG_QP, "Disabling MPA crc checking due to module option.\n");
- nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(NES_QPCONTEXT_ORDIRD_RNMC);
- }
-
-
- /* Create the QP */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_QP, "Failed to get a cqp_request\n");
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- nes_free_qp_mem(nesdev, nesqp,virt_wqs);
- kfree(nesqp->allocated_buffer);
- return ERR_PTR(-ENOMEM);
- }
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- if (!virt_wqs) {
- opcode = NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP |
- NES_CQP_QP_IWARP_STATE_IDLE;
- } else {
- opcode = NES_CQP_CREATE_QP | NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_VIRT_WQS |
- NES_CQP_QP_IWARP_STATE_IDLE;
- }
- opcode |= NES_CQP_QP_CQS_VALID;
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
-
- u64temp = (u64)nesqp->nesqp_context_pbase;
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- nes_debug(NES_DBG_QP, "Waiting for create iWARP QP%u to complete.\n",
- nesqp->hwqp.qp_id);
- ret = wait_event_timeout(cqp_request->waitq,
- (cqp_request->request_done != 0), NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_QP, "Create iwarp QP%u completed, wait_event_timeout ret=%u,"
- " nesdev->cqp_head = %u, nesdev->cqp.sq_tail = %u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
- nesqp->hwqp.qp_id, ret, nesdev->cqp.sq_head, nesdev->cqp.sq_tail,
- cqp_request->major_code, cqp_request->minor_code);
- if ((!ret) || (cqp_request->major_code)) {
- nes_put_cqp_request(nesdev, cqp_request);
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- nes_free_qp_mem(nesdev, nesqp,virt_wqs);
- kfree(nesqp->allocated_buffer);
- if (!ret) {
- return ERR_PTR(-ETIME);
- } else {
- return ERR_PTR(-EIO);
- }
- }
-
- nes_put_cqp_request(nesdev, cqp_request);
-
- if (ibpd->uobject) {
- uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index;
- uresp.mmap_rq_db_index = 0;
- uresp.actual_sq_size = sq_size;
- uresp.actual_rq_size = rq_size;
- uresp.qp_id = nesqp->hwqp.qp_id;
- uresp.nes_drv_opt = nes_drv_opt;
- if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
- nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- nes_free_qp_mem(nesdev, nesqp,virt_wqs);
- kfree(nesqp->allocated_buffer);
- return ERR_PTR(-EFAULT);
- }
- }
-
- nes_debug(NES_DBG_QP, "QP%u structure located @%p.Size = %u.\n",
- nesqp->hwqp.qp_id, nesqp, (u32)sizeof(*nesqp));
- spin_lock_init(&nesqp->lock);
- nes_add_ref(&nesqp->ibqp);
- break;
- default:
- nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type);
- return ERR_PTR(-EINVAL);
- }
- init_completion(&nesqp->sq_drained);
- init_completion(&nesqp->rq_drained);
-
- nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
- init_timer(&nesqp->terminate_timer);
- nesqp->terminate_timer.function = nes_terminate_timeout;
- nesqp->terminate_timer.data = (unsigned long)nesqp;
-
- /* update the QP table */
- nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
- nes_debug(NES_DBG_QP, "netdev refcnt=%u\n",
- netdev_refcnt_read(nesvnic->netdev));
-
- return &nesqp->ibqp;
-}
-
-/**
- * nes_clean_cq
- */
-static void nes_clean_cq(struct nes_qp *nesqp, struct nes_cq *nescq)
-{
- u32 cq_head;
- u32 lo;
- u32 hi;
- u64 u64temp;
- unsigned long flags = 0;
-
- spin_lock_irqsave(&nescq->lock, flags);
-
- cq_head = nescq->hw_cq.cq_head;
- while (le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) {
- rmb();
- lo = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
- hi = le32_to_cpu(nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]);
- u64temp = (((u64)hi) << 32) | ((u64)lo);
- u64temp &= ~(NES_SW_CONTEXT_ALIGN-1);
- if (u64temp == (u64)(unsigned long)nesqp) {
- /* Zero the context value so cqe will be ignored */
- nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] = 0;
- nescq->hw_cq.cq_vbase[cq_head].cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX] = 0;
- }
-
- if (++cq_head >= nescq->hw_cq.cq_size)
- cq_head = 0;
- }
-
- spin_unlock_irqrestore(&nescq->lock, flags);
-}
-
-
-/**
- * nes_destroy_qp
- */
-static int nes_destroy_qp(struct ib_qp *ibqp)
-{
- struct nes_qp *nesqp = to_nesqp(ibqp);
- struct nes_ucontext *nes_ucontext;
- struct ib_qp_attr attr;
- struct iw_cm_id *cm_id;
- struct iw_cm_event cm_event;
- int ret = 0;
-
- atomic_inc(&sw_qps_destroyed);
- nesqp->destroyed = 1;
-
- /* Blow away the connection if it exists. */
- if (nesqp->ibqp_state >= IB_QPS_INIT && nesqp->ibqp_state <= IB_QPS_RTS) {
- /* if (nesqp->ibqp_state == IB_QPS_RTS) { */
- attr.qp_state = IB_QPS_ERR;
- nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
- }
-
- if (((nesqp->ibqp_state == IB_QPS_INIT) ||
- (nesqp->ibqp_state == IB_QPS_RTR)) && (nesqp->cm_id)) {
- cm_id = nesqp->cm_id;
- cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
- cm_event.status = -ETIMEDOUT;
- cm_event.local_addr = cm_id->local_addr;
- cm_event.remote_addr = cm_id->remote_addr;
- cm_event.private_data = NULL;
- cm_event.private_data_len = 0;
-
- nes_debug(NES_DBG_QP, "Generating a CM Timeout Event for "
- "QP%u. cm_id = %p, refcount = %u. \n",
- nesqp->hwqp.qp_id, cm_id, atomic_read(&nesqp->refcount));
-
- cm_id->rem_ref(cm_id);
- ret = cm_id->event_handler(cm_id, &cm_event);
- if (ret)
- nes_debug(NES_DBG_QP, "OFA CM event_handler returned, ret=%d\n", ret);
- }
-
- if (nesqp->user_mode) {
- if ((ibqp->uobject)&&(ibqp->uobject->context)) {
- nes_ucontext = to_nesucontext(ibqp->uobject->context);
- clear_bit(nesqp->mmap_sq_db_index, nes_ucontext->allocated_wqs);
- nes_ucontext->mmap_nesqp[nesqp->mmap_sq_db_index] = NULL;
- if (nes_ucontext->first_free_wq > nesqp->mmap_sq_db_index) {
- nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index;
- }
- }
- if (nesqp->pbl_pbase && nesqp->sq_kmapped) {
- nesqp->sq_kmapped = 0;
- kunmap(nesqp->page);
- }
- } else {
- /* Clean any pending completions from the cq(s) */
- if (nesqp->nesscq)
- nes_clean_cq(nesqp, nesqp->nesscq);
-
- if ((nesqp->nesrcq) && (nesqp->nesrcq != nesqp->nesscq))
- nes_clean_cq(nesqp, nesqp->nesrcq);
- }
- nes_rem_ref(&nesqp->ibqp);
- return 0;
-}
-
-
-/**
- * nes_create_cq
- */
-static struct ib_cq *nes_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *context,
- struct ib_udata *udata)
-{
- int entries = attr->cqe;
- u64 u64temp;
- struct nes_vnic *nesvnic = to_nesvnic(ibdev);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_cq *nescq;
- struct nes_ucontext *nes_ucontext = NULL;
- struct nes_cqp_request *cqp_request;
- void *mem = NULL;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_pbl *nespbl = NULL;
- struct nes_create_cq_req req;
- struct nes_create_cq_resp resp;
- u32 cq_num = 0;
- u32 opcode = 0;
- u32 pbl_entries = 1;
- int err;
- unsigned long flags;
- int ret;
-
- if (attr->flags)
- return ERR_PTR(-EINVAL);
-
- if (entries > nesadapter->max_cqe)
- return ERR_PTR(-EINVAL);
-
- err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs,
- nesadapter->max_cq, &cq_num, &nesadapter->next_cq, NES_RESOURCE_CQ);
- if (err) {
- return ERR_PTR(err);
- }
-
- nescq = kzalloc(sizeof(struct nes_cq), GFP_KERNEL);
- if (!nescq) {
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- return ERR_PTR(-ENOMEM);
- }
-
- nescq->hw_cq.cq_size = max(entries + 1, 5);
- nescq->hw_cq.cq_number = cq_num;
- nescq->ibcq.cqe = nescq->hw_cq.cq_size - 1;
-
-
- if (context) {
- nes_ucontext = to_nesucontext(context);
- if (ib_copy_from_udata(&req, udata, sizeof (struct nes_create_cq_req))) {
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-EFAULT);
- }
- nesvnic->mcrq_ucontext = nes_ucontext;
- nes_ucontext->mcrqf = req.mcrqf;
- if (nes_ucontext->mcrqf) {
- if (nes_ucontext->mcrqf & 0x80000000)
- nescq->hw_cq.cq_number = nesvnic->nic.qp_id + 28 + 2 * ((nes_ucontext->mcrqf & 0xf) - 1);
- else if (nes_ucontext->mcrqf & 0x40000000)
- nescq->hw_cq.cq_number = nes_ucontext->mcrqf & 0xffff;
- else
- nescq->hw_cq.cq_number = nesvnic->mcrq_qp_id + nes_ucontext->mcrqf-1;
- nescq->mcrqf = nes_ucontext->mcrqf;
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- }
- nes_debug(NES_DBG_CQ, "CQ Virtual Address = %08lX, size = %u.\n",
- (unsigned long)req.user_cq_buffer, entries);
- err = 1;
- list_for_each_entry(nespbl, &nes_ucontext->cq_reg_mem_list, list) {
- if (nespbl->user_base == (unsigned long )req.user_cq_buffer) {
- list_del(&nespbl->list);
- err = 0;
- nes_debug(NES_DBG_CQ, "Found PBL for virtual CQ. nespbl=%p.\n",
- nespbl);
- break;
- }
- }
- if (err) {
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-EFAULT);
- }
-
- pbl_entries = nespbl->pbl_size >> 3;
- nescq->cq_mem_size = 0;
- } else {
- nescq->cq_mem_size = nescq->hw_cq.cq_size * sizeof(struct nes_hw_cqe);
- nes_debug(NES_DBG_CQ, "Attempting to allocate pci memory (%u entries, %u bytes) for CQ%u.\n",
- entries, nescq->cq_mem_size, nescq->hw_cq.cq_number);
-
- /* allocate the physical buffer space */
- mem = pci_zalloc_consistent(nesdev->pcidev, nescq->cq_mem_size,
- &nescq->hw_cq.cq_pbase);
- if (!mem) {
- printk(KERN_ERR PFX "Unable to allocate pci memory for cq\n");
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-ENOMEM);
- }
-
- nescq->hw_cq.cq_vbase = mem;
- nescq->hw_cq.cq_head = 0;
- nes_debug(NES_DBG_CQ, "CQ%u virtual address @ %p, phys = 0x%08X\n",
- nescq->hw_cq.cq_number, nescq->hw_cq.cq_vbase,
- (u32)nescq->hw_cq.cq_pbase);
- }
-
- nescq->hw_cq.ce_handler = nes_iwarp_ce_handler;
- spin_lock_init(&nescq->lock);
-
- /* send CreateCQ request to CQP */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n");
- if (!context)
- pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
- nescq->hw_cq.cq_pbase);
- else {
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size,
- nespbl->pbl_vbase, nespbl->pbl_pbase);
- kfree(nespbl);
- }
-
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-ENOMEM);
- }
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- opcode = NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
- NES_CQP_CQ_CHK_OVERFLOW |
- NES_CQP_CQ_CEQE_MASK | ((u32)nescq->hw_cq.cq_size << 16);
-
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
-
- if (pbl_entries != 1) {
- if (pbl_entries > 32) {
- /* use 4k pbl */
- nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 4k PBL\n", pbl_entries);
- if (nesadapter->free_4kpbl == 0) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- nes_free_cqp_request(nesdev, cqp_request);
- if (!context)
- pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
- nescq->hw_cq.cq_pbase);
- else {
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size,
- nespbl->pbl_vbase, nespbl->pbl_pbase);
- kfree(nespbl);
- }
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-ENOMEM);
- } else {
- opcode |= (NES_CQP_CQ_VIRT | NES_CQP_CQ_4KB_CHUNK);
- nescq->virtual_cq = 2;
- nesadapter->free_4kpbl--;
- }
- } else {
- /* use 256 byte pbl */
- nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 256 byte PBL\n", pbl_entries);
- if (nesadapter->free_256pbl == 0) {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- nes_free_cqp_request(nesdev, cqp_request);
- if (!context)
- pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
- nescq->hw_cq.cq_pbase);
- else {
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size,
- nespbl->pbl_vbase, nespbl->pbl_pbase);
- kfree(nespbl);
- }
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-ENOMEM);
- } else {
- opcode |= NES_CQP_CQ_VIRT;
- nescq->virtual_cq = 1;
- nesadapter->free_256pbl--;
- }
- }
- }
-
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- (nescq->hw_cq.cq_number | ((u32)nesdev->ceq_index << 16)));
-
- if (context) {
- if (pbl_entries != 1)
- u64temp = (u64)nespbl->pbl_pbase;
- else
- u64temp = le64_to_cpu(nespbl->pbl_vbase[0]);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX,
- nes_ucontext->mmap_db_index[0]);
- } else {
- u64temp = (u64)nescq->hw_cq.cq_pbase;
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
- }
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
- u64temp = (u64)(unsigned long)&nescq->hw_cq;
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] =
- cpu_to_le32((u32)(u64temp >> 1));
- cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
- cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- nes_debug(NES_DBG_CQ, "Waiting for create iWARP CQ%u to complete.\n",
- nescq->hw_cq.cq_number);
- ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
- NES_EVENT_TIMEOUT * 2);
- nes_debug(NES_DBG_CQ, "Create iWARP CQ%u completed, wait_event_timeout ret = %d.\n",
- nescq->hw_cq.cq_number, ret);
- if ((!ret) || (cqp_request->major_code)) {
- nes_put_cqp_request(nesdev, cqp_request);
- if (!context)
- pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
- nescq->hw_cq.cq_pbase);
- else {
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size,
- nespbl->pbl_vbase, nespbl->pbl_pbase);
- kfree(nespbl);
- }
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-EIO);
- }
- nes_put_cqp_request(nesdev, cqp_request);
-
- if (context) {
- /* free the nespbl */
- pci_free_consistent(nesdev->pcidev, nespbl->pbl_size, nespbl->pbl_vbase,
- nespbl->pbl_pbase);
- kfree(nespbl);
- resp.cq_id = nescq->hw_cq.cq_number;
- resp.cq_size = nescq->hw_cq.cq_size;
- resp.mmap_db_index = 0;
- if (ib_copy_to_udata(udata, &resp, sizeof resp - sizeof resp.reserved)) {
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- kfree(nescq);
- return ERR_PTR(-EFAULT);
- }
- }
-
- return &nescq->ibcq;
-}
-
-
-/**
- * nes_destroy_cq
- */
-static int nes_destroy_cq(struct ib_cq *ib_cq)
-{
- struct nes_cq *nescq;
- struct nes_device *nesdev;
- struct nes_vnic *nesvnic;
- struct nes_adapter *nesadapter;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- unsigned long flags;
- u32 opcode = 0;
- int ret;
-
- if (ib_cq == NULL)
- return 0;
-
- nescq = to_nescq(ib_cq);
- nesvnic = to_nesvnic(ib_cq->device);
- nesdev = nesvnic->nesdev;
- nesadapter = nesdev->nesadapter;
-
- nes_debug(NES_DBG_CQ, "Destroy CQ%u\n", nescq->hw_cq.cq_number);
-
- /* Send DestroyCQ request to CQP */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_CQ, "Failed to get a cqp_request.\n");
- return -ENOMEM;
- }
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
- opcode = NES_CQP_DESTROY_CQ | (nescq->hw_cq.cq_size << 16);
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- if (nescq->virtual_cq == 1) {
- nesadapter->free_256pbl++;
- if (nesadapter->free_256pbl > nesadapter->max_256pbl) {
- printk(KERN_ERR PFX "%s: free 256B PBLs(%u) has exceeded the max(%u)\n",
- __func__, nesadapter->free_256pbl, nesadapter->max_256pbl);
- }
- } else if (nescq->virtual_cq == 2) {
- nesadapter->free_4kpbl++;
- if (nesadapter->free_4kpbl > nesadapter->max_4kpbl) {
- printk(KERN_ERR PFX "%s: free 4K PBLs(%u) has exceeded the max(%u)\n",
- __func__, nesadapter->free_4kpbl, nesadapter->max_4kpbl);
- }
- opcode |= NES_CQP_CQ_4KB_CHUNK;
- }
-
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
- (nescq->hw_cq.cq_number | ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 16)));
- if (!nescq->mcrqf)
- nes_free_resource(nesadapter, nesadapter->allocated_cqs, nescq->hw_cq.cq_number);
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- nes_debug(NES_DBG_CQ, "Waiting for destroy iWARP CQ%u to complete.\n",
- nescq->hw_cq.cq_number);
- ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_CQ, "Destroy iWARP CQ%u completed, wait_event_timeout ret = %u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
- nescq->hw_cq.cq_number, ret, cqp_request->major_code,
- cqp_request->minor_code);
- if (!ret) {
- nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy timeout expired\n",
- nescq->hw_cq.cq_number);
- ret = -ETIME;
- } else if (cqp_request->major_code) {
- nes_debug(NES_DBG_CQ, "iWARP CQ%u destroy failed\n",
- nescq->hw_cq.cq_number);
- ret = -EIO;
- } else {
- ret = 0;
- }
- nes_put_cqp_request(nesdev, cqp_request);
-
- if (nescq->cq_mem_size)
- pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size,
- nescq->hw_cq.cq_vbase, nescq->hw_cq.cq_pbase);
- kfree(nescq);
-
- return ret;
-}
-
-/**
- * root_256
- */
-static u32 root_256(struct nes_device *nesdev,
- struct nes_root_vpbl *root_vpbl,
- struct nes_root_vpbl *new_root,
- u16 pbl_count_4k)
-{
- u64 leaf_pbl;
- int i, j, k;
-
- if (pbl_count_4k == 1) {
- new_root->pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
- 512, &new_root->pbl_pbase);
-
- if (new_root->pbl_vbase == NULL)
- return 0;
-
- leaf_pbl = (u64)root_vpbl->pbl_pbase;
- for (i = 0; i < 16; i++) {
- new_root->pbl_vbase[i].pa_low =
- cpu_to_le32((u32)leaf_pbl);
- new_root->pbl_vbase[i].pa_high =
- cpu_to_le32((u32)((((u64)leaf_pbl) >> 32)));
- leaf_pbl += 256;
- }
- } else {
- for (i = 3; i >= 0; i--) {
- j = i * 16;
- root_vpbl->pbl_vbase[j] = root_vpbl->pbl_vbase[i];
- leaf_pbl = le32_to_cpu(root_vpbl->pbl_vbase[j].pa_low) +
- (((u64)le32_to_cpu(root_vpbl->pbl_vbase[j].pa_high))
- << 32);
- for (k = 1; k < 16; k++) {
- leaf_pbl += 256;
- root_vpbl->pbl_vbase[j + k].pa_low =
- cpu_to_le32((u32)leaf_pbl);
- root_vpbl->pbl_vbase[j + k].pa_high =
- cpu_to_le32((u32)((((u64)leaf_pbl) >> 32)));
- }
- }
- }
-
- return 1;
-}
-
-
-/**
- * nes_reg_mr
- */
-static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
- u32 stag, u64 region_length, struct nes_root_vpbl *root_vpbl,
- dma_addr_t single_buffer, u16 pbl_count_4k,
- u16 residual_page_count_4k, int acc, u64 *iova_start,
- u16 *actual_pbl_cnt, u8 *used_4k_pbls)
-{
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- unsigned long flags;
- int ret;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- uint pg_cnt = 0;
- u16 pbl_count_256 = 0;
- u16 pbl_count = 0;
- u8 use_256_pbls = 0;
- u8 use_4k_pbls = 0;
- u16 use_two_level = (pbl_count_4k > 1) ? 1 : 0;
- struct nes_root_vpbl new_root = { 0, NULL, NULL };
- u32 opcode = 0;
- u16 major_code;
-
- /* Register the region with the adapter */
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
- return -ENOMEM;
- }
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- if (pbl_count_4k) {
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
-
- pg_cnt = ((pbl_count_4k - 1) * 512) + residual_page_count_4k;
- pbl_count_256 = (pg_cnt + 31) / 32;
- if (pg_cnt <= 32) {
- if (pbl_count_256 <= nesadapter->free_256pbl)
- use_256_pbls = 1;
- else if (pbl_count_4k <= nesadapter->free_4kpbl)
- use_4k_pbls = 1;
- } else if (pg_cnt <= 2048) {
- if (((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) &&
- (nesadapter->free_4kpbl > (nesadapter->max_4kpbl >> 1))) {
- use_4k_pbls = 1;
- } else if ((pbl_count_256 + 1) <= nesadapter->free_256pbl) {
- use_256_pbls = 1;
- use_two_level = 1;
- } else if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) {
- use_4k_pbls = 1;
- }
- } else {
- if ((pbl_count_4k + 1) <= nesadapter->free_4kpbl)
- use_4k_pbls = 1;
- }
-
- if (use_256_pbls) {
- pbl_count = pbl_count_256;
- nesadapter->free_256pbl -= pbl_count + use_two_level;
- } else if (use_4k_pbls) {
- pbl_count = pbl_count_4k;
- nesadapter->free_4kpbl -= pbl_count + use_two_level;
- } else {
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- nes_debug(NES_DBG_MR, "Out of Pbls\n");
- nes_free_cqp_request(nesdev, cqp_request);
- return -ENOMEM;
- }
-
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- }
-
- if (use_256_pbls && use_two_level) {
- if (root_256(nesdev, root_vpbl, &new_root, pbl_count_4k) == 1) {
- if (new_root.pbl_pbase != 0)
- root_vpbl = &new_root;
- } else {
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- nesadapter->free_256pbl += pbl_count_256 + use_two_level;
- use_256_pbls = 0;
-
- if (pbl_count_4k == 1)
- use_two_level = 0;
- pbl_count = pbl_count_4k;
-
- if ((pbl_count_4k + use_two_level) <= nesadapter->free_4kpbl) {
- nesadapter->free_4kpbl -= pbl_count + use_two_level;
- use_4k_pbls = 1;
- }
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-
- if (use_4k_pbls == 0)
- return -ENOMEM;
- }
- }
-
- opcode = NES_CQP_REGISTER_STAG | NES_CQP_STAG_RIGHTS_LOCAL_READ |
- NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR;
- if (acc & IB_ACCESS_LOCAL_WRITE)
- opcode |= NES_CQP_STAG_RIGHTS_LOCAL_WRITE;
- if (acc & IB_ACCESS_REMOTE_WRITE)
- opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_REM_ACC_EN;
- if (acc & IB_ACCESS_REMOTE_READ)
- opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_REM_ACC_EN;
- if (acc & IB_ACCESS_MW_BIND)
- opcode |= NES_CQP_STAG_RIGHTS_WINDOW_BIND | NES_CQP_STAG_REM_ACC_EN;
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, *iova_start);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, region_length);
-
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] =
- cpu_to_le32((u32)(region_length >> 8) & 0xff000000);
- cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |=
- cpu_to_le32(nespd->pd_id & 0x00007fff);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
-
- if (pbl_count == 0) {
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, single_buffer);
- } else {
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, root_vpbl->pbl_pbase);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, pbl_count);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (pg_cnt * 8));
-
- if (use_4k_pbls)
- cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE);
- }
- barrier();
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- ret = wait_event_timeout(cqp_request->waitq, (0 != cqp_request->request_done),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X.\n",
- stag, ret, cqp_request->major_code, cqp_request->minor_code);
- major_code = cqp_request->major_code;
- nes_put_cqp_request(nesdev, cqp_request);
-
- if ((!ret || major_code) && pbl_count != 0) {
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- if (use_256_pbls)
- nesadapter->free_256pbl += pbl_count + use_two_level;
- else if (use_4k_pbls)
- nesadapter->free_4kpbl += pbl_count + use_two_level;
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- }
- if (new_root.pbl_pbase)
- pci_free_consistent(nesdev->pcidev, 512, new_root.pbl_vbase,
- new_root.pbl_pbase);
-
- if (!ret)
- return -ETIME;
- else if (major_code)
- return -EIO;
-
- *actual_pbl_cnt = pbl_count + use_two_level;
- *used_4k_pbls = use_4k_pbls;
- return 0;
-}
-
-
-/**
- * nes_reg_phys_mr
- */
-struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size,
- int acc, u64 *iova_start)
-{
- u64 region_length;
- struct nes_pd *nespd = to_nespd(ib_pd);
- struct nes_vnic *nesvnic = to_nesvnic(ib_pd->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_mr *nesmr;
- struct ib_mr *ibmr;
- struct nes_vpbl vpbl;
- struct nes_root_vpbl root_vpbl;
- u32 stag;
- unsigned long mask;
- u32 stag_index = 0;
- u32 next_stag_index = 0;
- u32 driver_key = 0;
- int err = 0;
- int ret = 0;
- u16 pbl_count = 0;
- u8 single_page = 1;
- u8 stag_key = 0;
-
- region_length = 0;
- vpbl.pbl_vbase = NULL;
- root_vpbl.pbl_vbase = NULL;
- root_vpbl.pbl_pbase = 0;
-
- get_random_bytes(&next_stag_index, sizeof(next_stag_index));
- stag_key = (u8)next_stag_index;
-
- driver_key = 0;
-
- next_stag_index >>= 8;
- next_stag_index %= nesadapter->max_mr;
-
- if ((addr ^ *iova_start) & ~PAGE_MASK)
- return ERR_PTR(-EINVAL);
-
- err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
- &stag_index, &next_stag_index, NES_RESOURCE_PHYS_MR);
- if (err) {
- return ERR_PTR(err);
- }
-
- nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
- if (!nesmr) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- return ERR_PTR(-ENOMEM);
- }
-
- /* Allocate a 4K buffer for the PBL */
- vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
- &vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
- vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
- if (!vpbl.pbl_vbase) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- ibmr = ERR_PTR(-ENOMEM);
- kfree(nesmr);
- goto reg_phys_err;
- }
-
-
- mask = !size;
-
- if (mask & ~PAGE_MASK) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
- ibmr = ERR_PTR(-EINVAL);
- kfree(nesmr);
- goto reg_phys_err;
- }
-
- region_length += size;
- vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)addr & PAGE_MASK);
- vpbl.pbl_vbase[0].pa_high = cpu_to_le32((u32)((((u64)addr) >> 32)));
-
- stag = stag_index << 8;
- stag |= driver_key;
- stag += (u32)stag_key;
-
- nes_debug(NES_DBG_MR, "Registering STag 0x%08X, VA = 0x%016lX,"
- " length = 0x%016lX, index = 0x%08X\n",
- stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index);
-
- /* Make the leaf PBL the root if only one PBL */
- root_vpbl.pbl_pbase = vpbl.pbl_pbase;
-
- if (single_page) {
- pbl_count = 0;
- } else {
- pbl_count = 1;
- }
- ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl,
- addr, pbl_count, 1, acc, iova_start,
- &nesmr->pbls_used, &nesmr->pbl_4k);
-
- if (ret == 0) {
- nesmr->ibmr.rkey = stag;
- nesmr->ibmr.lkey = stag;
- nesmr->mode = IWNES_MEMREG_TYPE_MEM;
- ibmr = &nesmr->ibmr;
- } else {
- kfree(nesmr);
- ibmr = ERR_PTR(-ENOMEM);
- }
-
-reg_phys_err:
- /* single PBL case */
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
- return ibmr;
-}
-
-
-/**
- * nes_get_dma_mr
- */
-static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc)
-{
- u64 kva = 0;
-
- nes_debug(NES_DBG_MR, "\n");
-
- return nes_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
-}
-
-/**
- * nes_reg_user_mr
- */
-static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *udata)
-{
- u64 iova_start;
- __le64 *pbl;
- u64 region_length;
- dma_addr_t last_dma_addr = 0;
- dma_addr_t first_dma_addr = 0;
- struct nes_pd *nespd = to_nespd(pd);
- struct nes_vnic *nesvnic = to_nesvnic(pd->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct ib_mr *ibmr = ERR_PTR(-EINVAL);
- struct scatterlist *sg;
- struct nes_ucontext *nes_ucontext;
- struct nes_pbl *nespbl;
- struct nes_mr *nesmr;
- struct ib_umem *region;
- struct nes_mem_reg_req req;
- struct nes_vpbl vpbl;
- struct nes_root_vpbl root_vpbl;
- int entry, page_index;
- int page_count = 0;
- int err, pbl_depth = 0;
- int chunk_pages;
- int ret;
- u32 stag;
- u32 stag_index = 0;
- u32 next_stag_index;
- u32 driver_key;
- u32 root_pbl_index = 0;
- u32 cur_pbl_index = 0;
- u32 skip_pages;
- u16 pbl_count;
- u8 single_page = 1;
- u8 stag_key;
- int first_page = 1;
-
- region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
- if (IS_ERR(region)) {
- return (struct ib_mr *)region;
- }
-
- nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u,"
- " offset = %u, page size = %u.\n",
- (unsigned long int)start, (unsigned long int)virt, (u32)length,
- ib_umem_offset(region), region->page_size);
-
- skip_pages = ((u32)ib_umem_offset(region)) >> 12;
-
- if (ib_copy_from_udata(&req, udata, sizeof(req))) {
- ib_umem_release(region);
- return ERR_PTR(-EFAULT);
- }
- nes_debug(NES_DBG_MR, "Memory Registration type = %08X.\n", req.reg_type);
-
- switch (req.reg_type) {
- case IWNES_MEMREG_TYPE_MEM:
- pbl_depth = 0;
- region_length = 0;
- vpbl.pbl_vbase = NULL;
- root_vpbl.pbl_vbase = NULL;
- root_vpbl.pbl_pbase = 0;
-
- get_random_bytes(&next_stag_index, sizeof(next_stag_index));
- stag_key = (u8)next_stag_index;
-
- driver_key = next_stag_index & 0x70000000;
-
- next_stag_index >>= 8;
- next_stag_index %= nesadapter->max_mr;
-
- err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
- nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_USER_MR);
- if (err) {
- ib_umem_release(region);
- return ERR_PTR(err);
- }
-
- nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
- if (!nesmr) {
- ib_umem_release(region);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- return ERR_PTR(-ENOMEM);
- }
- nesmr->region = region;
-
- for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
- if (sg_dma_address(sg) & ~PAGE_MASK) {
- ib_umem_release(region);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n",
- (unsigned int) sg_dma_address(sg));
- ibmr = ERR_PTR(-EINVAL);
- kfree(nesmr);
- goto reg_user_mr_err;
- }
-
- if (!sg_dma_len(sg)) {
- ib_umem_release(region);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- stag_index);
- nes_debug(NES_DBG_MR, "Invalid Buffer Size\n");
- ibmr = ERR_PTR(-EINVAL);
- kfree(nesmr);
- goto reg_user_mr_err;
- }
-
- region_length += sg_dma_len(sg);
- chunk_pages = sg_dma_len(sg) >> 12;
- region_length -= skip_pages << 12;
- for (page_index = skip_pages; page_index < chunk_pages; page_index++) {
- skip_pages = 0;
- if ((page_count != 0) && (page_count << 12) - (ib_umem_offset(region) & (4096 - 1)) >= region->length)
- goto enough_pages;
- if ((page_count&0x01FF) == 0) {
- if (page_count >= 1024 * 512) {
- ib_umem_release(region);
- nes_free_resource(nesadapter,
- nesadapter->allocated_mrs, stag_index);
- kfree(nesmr);
- ibmr = ERR_PTR(-E2BIG);
- goto reg_user_mr_err;
- }
- if (root_pbl_index == 1) {
- root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev,
- 8192, &root_vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
- root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
- if (!root_vpbl.pbl_vbase) {
- ib_umem_release(region);
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- stag_index);
- kfree(nesmr);
- ibmr = ERR_PTR(-ENOMEM);
- goto reg_user_mr_err;
- }
- root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024,
- GFP_KERNEL);
- if (!root_vpbl.leaf_vpbl) {
- ib_umem_release(region);
- pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
- root_vpbl.pbl_pbase);
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- stag_index);
- kfree(nesmr);
- ibmr = ERR_PTR(-ENOMEM);
- goto reg_user_mr_err;
- }
- root_vpbl.pbl_vbase[0].pa_low =
- cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[0].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
- root_vpbl.leaf_vpbl[0] = vpbl;
- }
- vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
- &vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n",
- vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase);
- if (!vpbl.pbl_vbase) {
- ib_umem_release(region);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- ibmr = ERR_PTR(-ENOMEM);
- kfree(nesmr);
- goto reg_user_mr_err;
- }
- if (1 <= root_pbl_index) {
- root_vpbl.pbl_vbase[root_pbl_index].pa_low =
- cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[root_pbl_index].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
- root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
- }
- root_pbl_index++;
- cur_pbl_index = 0;
- }
- if (single_page) {
- if (page_count != 0) {
- if ((last_dma_addr+4096) !=
- (sg_dma_address(sg)+
- (page_index*4096)))
- single_page = 0;
- last_dma_addr = sg_dma_address(sg)+
- (page_index*4096);
- } else {
- first_dma_addr = sg_dma_address(sg)+
- (page_index*4096);
- last_dma_addr = first_dma_addr;
- }
- }
-
- vpbl.pbl_vbase[cur_pbl_index].pa_low =
- cpu_to_le32((u32)(sg_dma_address(sg)+
- (page_index*4096)));
- vpbl.pbl_vbase[cur_pbl_index].pa_high =
- cpu_to_le32((u32)((((u64)(sg_dma_address(sg)+
- (page_index*4096))) >> 32)));
- cur_pbl_index++;
- page_count++;
- }
- }
-
- enough_pages:
- nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x,"
- " stag_key=0x%08x\n",
- stag_index, driver_key, stag_key);
- stag = stag_index << 8;
- stag |= driver_key;
- stag += (u32)stag_key;
-
- iova_start = virt;
- /* Make the leaf PBL the root if only one PBL */
- if (root_pbl_index == 1) {
- root_vpbl.pbl_pbase = vpbl.pbl_pbase;
- }
-
- if (single_page) {
- pbl_count = 0;
- } else {
- pbl_count = root_pbl_index;
- first_dma_addr = 0;
- }
- nes_debug(NES_DBG_MR, "Registering STag 0x%08X, VA = 0x%08X, length = 0x%08X,"
- " index = 0x%08X, region->length=0x%08llx, pbl_count = %u\n",
- stag, (unsigned int)iova_start,
- (unsigned int)region_length, stag_index,
- (unsigned long long)region->length, pbl_count);
- ret = nes_reg_mr(nesdev, nespd, stag, region->length, &root_vpbl,
- first_dma_addr, pbl_count, (u16)cur_pbl_index, acc,
- &iova_start, &nesmr->pbls_used, &nesmr->pbl_4k);
-
- nes_debug(NES_DBG_MR, "ret=%d\n", ret);
-
- if (ret == 0) {
- nesmr->ibmr.rkey = stag;
- nesmr->ibmr.lkey = stag;
- nesmr->mode = IWNES_MEMREG_TYPE_MEM;
- ibmr = &nesmr->ibmr;
- } else {
- ib_umem_release(region);
- kfree(nesmr);
- ibmr = ERR_PTR(-ENOMEM);
- }
-
- reg_user_mr_err:
- /* free the resources */
- if (root_pbl_index == 1) {
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- } else {
- for (page_index=0; page_index<root_pbl_index; page_index++) {
- pci_free_consistent(nesdev->pcidev, 4096,
- root_vpbl.leaf_vpbl[page_index].pbl_vbase,
- root_vpbl.leaf_vpbl[page_index].pbl_pbase);
- }
- kfree(root_vpbl.leaf_vpbl);
- pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
- root_vpbl.pbl_pbase);
- }
-
- nes_debug(NES_DBG_MR, "Leaving, ibmr=%p", ibmr);
-
- return ibmr;
- case IWNES_MEMREG_TYPE_QP:
- case IWNES_MEMREG_TYPE_CQ:
- if (!region->length) {
- nes_debug(NES_DBG_MR, "Unable to register zero length region for CQ\n");
- ib_umem_release(region);
- return ERR_PTR(-EINVAL);
- }
- nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL);
- if (!nespbl) {
- ib_umem_release(region);
- return ERR_PTR(-ENOMEM);
- }
- nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
- if (!nesmr) {
- ib_umem_release(region);
- kfree(nespbl);
- return ERR_PTR(-ENOMEM);
- }
- nesmr->region = region;
- nes_ucontext = to_nesucontext(pd->uobject->context);
- pbl_depth = region->length >> 12;
- pbl_depth += (region->length & (4096-1)) ? 1 : 0;
- nespbl->pbl_size = pbl_depth*sizeof(u64);
- if (req.reg_type == IWNES_MEMREG_TYPE_QP) {
- nes_debug(NES_DBG_MR, "Attempting to allocate QP PBL memory");
- } else {
- nes_debug(NES_DBG_MR, "Attempting to allocate CP PBL memory");
- }
-
- nes_debug(NES_DBG_MR, " %u bytes, %u entries.\n",
- nespbl->pbl_size, pbl_depth);
- pbl = pci_alloc_consistent(nesdev->pcidev, nespbl->pbl_size,
- &nespbl->pbl_pbase);
- if (!pbl) {
- ib_umem_release(region);
- kfree(nesmr);
- kfree(nespbl);
- nes_debug(NES_DBG_MR, "Unable to allocate PBL memory\n");
- return ERR_PTR(-ENOMEM);
- }
-
- nespbl->pbl_vbase = (u64 *)pbl;
- nespbl->user_base = start;
- nes_debug(NES_DBG_MR, "Allocated PBL memory, %u bytes, pbl_pbase=%lx,"
- " pbl_vbase=%p user_base=0x%lx\n",
- nespbl->pbl_size, (unsigned long) nespbl->pbl_pbase,
- (void *) nespbl->pbl_vbase, nespbl->user_base);
-
- for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
- chunk_pages = sg_dma_len(sg) >> 12;
- chunk_pages += (sg_dma_len(sg) & (4096-1)) ? 1 : 0;
- if (first_page) {
- nespbl->page = sg_page(sg);
- first_page = 0;
- }
-
- for (page_index = 0; page_index < chunk_pages; page_index++) {
- ((__le32 *)pbl)[0] = cpu_to_le32((u32)
- (sg_dma_address(sg)+
- (page_index*4096)));
- ((__le32 *)pbl)[1] = cpu_to_le32(((u64)
- (sg_dma_address(sg)+
- (page_index*4096)))>>32);
- nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl,
- (unsigned long long)*pbl,
- le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0]));
- pbl++;
- }
- }
-
- if (req.reg_type == IWNES_MEMREG_TYPE_QP) {
- list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list);
- } else {
- list_add_tail(&nespbl->list, &nes_ucontext->cq_reg_mem_list);
- }
- nesmr->ibmr.rkey = -1;
- nesmr->ibmr.lkey = -1;
- nesmr->mode = req.reg_type;
- return &nesmr->ibmr;
- }
-
- ib_umem_release(region);
- return ERR_PTR(-ENOSYS);
-}
-
-
-/**
- * nes_dereg_mr
- */
-static int nes_dereg_mr(struct ib_mr *ib_mr)
-{
- struct nes_mr *nesmr = to_nesmr(ib_mr);
- struct nes_vnic *nesvnic = to_nesvnic(ib_mr->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- struct nes_hw_cqp_wqe *cqp_wqe;
- struct nes_cqp_request *cqp_request;
- unsigned long flags;
- int ret;
- u16 major_code;
- u16 minor_code;
-
-
- if (nesmr->pages)
- pci_free_consistent(nesdev->pcidev,
- nesmr->max_pages * sizeof(u64),
- nesmr->pages,
- nesmr->paddr);
-
- if (nesmr->region) {
- ib_umem_release(nesmr->region);
- }
- if (nesmr->mode != IWNES_MEMREG_TYPE_MEM) {
- kfree(nesmr);
- return 0;
- }
-
- /* Deallocate the region with the adapter */
-
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
- return -ENOMEM;
- }
- cqp_request->waiting = 1;
- cqp_wqe = &cqp_request->cqp_wqe;
-
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- NES_CQP_DEALLOCATE_STAG | NES_CQP_STAG_VA_TO |
- NES_CQP_STAG_DEALLOC_PBLS | NES_CQP_STAG_MR);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, ib_mr->rkey);
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- nes_debug(NES_DBG_MR, "Waiting for deallocate STag 0x%08X completed\n", ib_mr->rkey);
- ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_MR, "Deallocate STag 0x%08X completed, wait_event_timeout ret = %u,"
- " CQP Major:Minor codes = 0x%04X:0x%04X\n",
- ib_mr->rkey, ret, cqp_request->major_code, cqp_request->minor_code);
-
- major_code = cqp_request->major_code;
- minor_code = cqp_request->minor_code;
-
- nes_put_cqp_request(nesdev, cqp_request);
-
- if (!ret) {
- nes_debug(NES_DBG_MR, "Timeout waiting to destroy STag,"
- " ib_mr=%p, rkey = 0x%08X\n",
- ib_mr, ib_mr->rkey);
- return -ETIME;
- } else if (major_code) {
- nes_debug(NES_DBG_MR, "Error (0x%04X:0x%04X) while attempting"
- " to destroy STag, ib_mr=%p, rkey = 0x%08X\n",
- major_code, minor_code, ib_mr, ib_mr->rkey);
- return -EIO;
- }
-
- if (nesmr->pbls_used != 0) {
- spin_lock_irqsave(&nesadapter->pbl_lock, flags);
- if (nesmr->pbl_4k) {
- nesadapter->free_4kpbl += nesmr->pbls_used;
- if (nesadapter->free_4kpbl > nesadapter->max_4kpbl)
- printk(KERN_ERR PFX "free 4KB PBLs(%u) has "
- "exceeded the max(%u)\n",
- nesadapter->free_4kpbl,
- nesadapter->max_4kpbl);
- } else {
- nesadapter->free_256pbl += nesmr->pbls_used;
- if (nesadapter->free_256pbl > nesadapter->max_256pbl)
- printk(KERN_ERR PFX "free 256B PBLs(%u) has "
- "exceeded the max(%u)\n",
- nesadapter->free_256pbl,
- nesadapter->max_256pbl);
- }
- spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
- }
- nes_free_resource(nesadapter, nesadapter->allocated_mrs,
- (ib_mr->rkey & 0x0fffff00) >> 8);
-
- kfree(nesmr);
-
- return 0;
-}
-
-
-/**
- * show_rev
- */
-static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct nes_ib_device *nesibdev =
- container_of(dev, struct nes_ib_device, ibdev.dev);
- struct nes_vnic *nesvnic = nesibdev->nesvnic;
-
- nes_debug(NES_DBG_INIT, "\n");
- return sprintf(buf, "%x\n", nesvnic->nesdev->nesadapter->hw_rev);
-}
-
-
-/**
- * show_hca
- */
-static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- nes_debug(NES_DBG_INIT, "\n");
- return sprintf(buf, "NES020\n");
-}
-
-
-/**
- * show_board
- */
-static ssize_t show_board(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- nes_debug(NES_DBG_INIT, "\n");
- return sprintf(buf, "%.*s\n", 32, "NES020 Board ID");
-}
-
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
-
-static struct device_attribute *nes_dev_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type,
- &dev_attr_board_id
-};
-
-
-/**
- * nes_query_qp
- */
-static int nes_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_qp_init_attr *init_attr)
-{
- struct nes_qp *nesqp = to_nesqp(ibqp);
-
- nes_debug(NES_DBG_QP, "\n");
-
- attr->qp_access_flags = 0;
- attr->cap.max_send_wr = nesqp->hwqp.sq_size;
- attr->cap.max_recv_wr = nesqp->hwqp.rq_size;
- attr->cap.max_recv_sge = 1;
- if (nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA)
- attr->cap.max_inline_data = 0;
- else
- attr->cap.max_inline_data = 64;
-
- init_attr->event_handler = nesqp->ibqp.event_handler;
- init_attr->qp_context = nesqp->ibqp.qp_context;
- init_attr->send_cq = nesqp->ibqp.send_cq;
- init_attr->recv_cq = nesqp->ibqp.recv_cq;
- init_attr->srq = nesqp->ibqp.srq;
- init_attr->cap = attr->cap;
-
- return 0;
-}
-
-
-/**
- * nes_hw_modify_qp
- */
-int nes_hw_modify_qp(struct nes_device *nesdev, struct nes_qp *nesqp,
- u32 next_iwarp_state, u32 termlen, u32 wait_completion)
-{
- struct nes_hw_cqp_wqe *cqp_wqe;
- /* struct iw_cm_id *cm_id = nesqp->cm_id; */
- /* struct iw_cm_event cm_event; */
- struct nes_cqp_request *cqp_request;
- int ret;
- u16 major_code;
-
- nes_debug(NES_DBG_MOD_QP, "QP%u, refcount=%d\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount));
-
- cqp_request = nes_get_cqp_request(nesdev);
- if (cqp_request == NULL) {
- nes_debug(NES_DBG_MOD_QP, "Failed to get a cqp_request.\n");
- return -ENOMEM;
- }
- if (wait_completion) {
- cqp_request->waiting = 1;
- } else {
- cqp_request->waiting = 0;
- }
- cqp_wqe = &cqp_request->cqp_wqe;
-
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
- NES_CQP_MODIFY_QP | NES_CQP_QP_TYPE_IWARP | next_iwarp_state);
- nes_debug(NES_DBG_MOD_QP, "using next_iwarp_state=%08x, wqe_words=%08x\n",
- next_iwarp_state, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]));
- nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
- set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, (u64)nesqp->nesqp_context_pbase);
-
- /* If sending a terminate message, fill in the length (in words) */
- if (((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) == NES_CQP_QP_IWARP_STATE_TERMINATE) &&
- !(next_iwarp_state & NES_CQP_QP_TERM_DONT_SEND_TERM_MSG)) {
- termlen = ((termlen + 3) >> 2) << NES_CQP_OP_TERMLEN_SHIFT;
- set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_NEW_MSS_IDX, termlen);
- }
-
- atomic_set(&cqp_request->refcount, 2);
- nes_post_cqp_request(nesdev, cqp_request);
-
- /* Wait for CQP */
- if (wait_completion) {
- /* nes_debug(NES_DBG_MOD_QP, "Waiting for modify iWARP QP%u to complete.\n",
- nesqp->hwqp.qp_id); */
- ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
- NES_EVENT_TIMEOUT);
- nes_debug(NES_DBG_MOD_QP, "Modify iwarp QP%u completed, wait_event_timeout ret=%u, "
- "CQP Major:Minor codes = 0x%04X:0x%04X.\n",
- nesqp->hwqp.qp_id, ret, cqp_request->major_code, cqp_request->minor_code);
- major_code = cqp_request->major_code;
- if (major_code) {
- nes_debug(NES_DBG_MOD_QP, "Modify iwarp QP%u failed"
- "CQP Major:Minor codes = 0x%04X:0x%04X, intended next state = 0x%08X.\n",
- nesqp->hwqp.qp_id, cqp_request->major_code,
- cqp_request->minor_code, next_iwarp_state);
- }
-
- nes_put_cqp_request(nesdev, cqp_request);
-
- if (!ret)
- return -ETIME;
- else if (major_code)
- return -EIO;
- else
- return 0;
- } else {
- return 0;
- }
-}
-
-
-/**
- * nes_modify_qp
- */
-int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
-{
- struct nes_qp *nesqp = to_nesqp(ibqp);
- struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- /* u32 cqp_head; */
- /* u32 counter; */
- u32 next_iwarp_state = 0;
- int err;
- unsigned long qplockflags;
- int ret;
- u16 original_last_aeq;
- u8 issue_modify_qp = 0;
- u8 dont_wait = 0;
-
- nes_debug(NES_DBG_MOD_QP, "QP%u: QP State=%u, cur QP State=%u,"
- " iwarp_state=0x%X, refcount=%d\n",
- nesqp->hwqp.qp_id, attr->qp_state, nesqp->ibqp_state,
- nesqp->iwarp_state, atomic_read(&nesqp->refcount));
-
- spin_lock_irqsave(&nesqp->lock, qplockflags);
-
- nes_debug(NES_DBG_MOD_QP, "QP%u: hw_iwarp_state=0x%X, hw_tcp_state=0x%X,"
- " QP Access Flags=0x%X, attr_mask = 0x%0x\n",
- nesqp->hwqp.qp_id, nesqp->hw_iwarp_state,
- nesqp->hw_tcp_state, attr->qp_access_flags, attr_mask);
-
- if (attr_mask & IB_QP_STATE) {
- switch (attr->qp_state) {
- case IB_QPS_INIT:
- nes_debug(NES_DBG_MOD_QP, "QP%u: new state = init\n",
- nesqp->hwqp.qp_id);
- if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_IDLE) {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- }
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE;
- issue_modify_qp = 1;
- break;
- case IB_QPS_RTR:
- nes_debug(NES_DBG_MOD_QP, "QP%u: new state = rtr\n",
- nesqp->hwqp.qp_id);
- if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_IDLE) {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- }
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_IDLE;
- issue_modify_qp = 1;
- break;
- case IB_QPS_RTS:
- nes_debug(NES_DBG_MOD_QP, "QP%u: new state = rts\n",
- nesqp->hwqp.qp_id);
- if (nesqp->iwarp_state>(u32)NES_CQP_QP_IWARP_STATE_RTS) {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- }
- if (nesqp->cm_id == NULL) {
- nes_debug(NES_DBG_MOD_QP, "QP%u: Failing attempt to move QP to RTS without a CM_ID. \n",
- nesqp->hwqp.qp_id );
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- }
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_RTS;
- if (nesqp->iwarp_state != NES_CQP_QP_IWARP_STATE_RTS)
- next_iwarp_state |= NES_CQP_QP_CONTEXT_VALID |
- NES_CQP_QP_ARP_VALID | NES_CQP_QP_ORD_VALID;
- issue_modify_qp = 1;
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_ESTABLISHED;
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_RTS;
- nesqp->hte_added = 1;
- break;
- case IB_QPS_SQD:
- issue_modify_qp = 1;
- nes_debug(NES_DBG_MOD_QP, "QP%u: new state=closing. SQ head=%u, SQ tail=%u\n",
- nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, nesqp->hwqp.sq_tail);
- if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_CLOSING) {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return 0;
- } else {
- if (nesqp->iwarp_state > (u32)NES_CQP_QP_IWARP_STATE_CLOSING) {
- nes_debug(NES_DBG_MOD_QP, "QP%u: State change to closing"
- " ignored due to current iWARP state\n",
- nesqp->hwqp.qp_id);
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- }
- if (nesqp->hw_iwarp_state != NES_AEQE_IWARP_STATE_RTS) {
- nes_debug(NES_DBG_MOD_QP, "QP%u: State change to closing"
- " already done based on hw state.\n",
- nesqp->hwqp.qp_id);
- issue_modify_qp = 0;
- }
- switch (nesqp->hw_iwarp_state) {
- case NES_AEQE_IWARP_STATE_CLOSING:
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
- break;
- case NES_AEQE_IWARP_STATE_TERMINATE:
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
- break;
- case NES_AEQE_IWARP_STATE_ERROR:
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
- break;
- default:
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
- break;
- }
- }
- break;
- case IB_QPS_SQE:
- nes_debug(NES_DBG_MOD_QP, "QP%u: new state = terminate\n",
- nesqp->hwqp.qp_id);
- if (nesqp->iwarp_state>=(u32)NES_CQP_QP_IWARP_STATE_TERMINATE) {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- }
- /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE;
- issue_modify_qp = 1;
- break;
- case IB_QPS_ERR:
- case IB_QPS_RESET:
- if (nesqp->iwarp_state == (u32)NES_CQP_QP_IWARP_STATE_ERROR) {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- }
- nes_debug(NES_DBG_MOD_QP, "QP%u: new state = error\n",
- nesqp->hwqp.qp_id);
- if (nesqp->term_flags)
- del_timer(&nesqp->terminate_timer);
-
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR;
- /* next_iwarp_state = (NES_CQP_QP_IWARP_STATE_TERMINATE | 0x02000000); */
- if (nesqp->hte_added) {
- nes_debug(NES_DBG_MOD_QP, "set CQP_QP_DEL_HTE\n");
- next_iwarp_state |= NES_CQP_QP_DEL_HTE;
- nesqp->hte_added = 0;
- }
- if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) &&
- (nesdev->iw_status) &&
- (nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) {
- next_iwarp_state |= NES_CQP_QP_RESET;
- } else {
- nes_debug(NES_DBG_MOD_QP, "QP%u NOT setting NES_CQP_QP_RESET since TCP state = %u\n",
- nesqp->hwqp.qp_id, nesqp->hw_tcp_state);
- dont_wait = 1;
- }
- issue_modify_qp = 1;
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_ERROR;
- break;
- default:
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- return -EINVAL;
- break;
- }
-
- nesqp->ibqp_state = attr->qp_state;
- nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK;
- nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n",
- nesqp->iwarp_state);
- }
-
- if (attr_mask & IB_QP_ACCESS_FLAGS) {
- if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE) {
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN |
- NES_QPCONTEXT_MISC_RDMA_READ_EN);
- issue_modify_qp = 1;
- }
- if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) {
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN);
- issue_modify_qp = 1;
- }
- if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) {
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_READ_EN);
- issue_modify_qp = 1;
- }
- if (attr->qp_access_flags & IB_ACCESS_MW_BIND) {
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WBIND_EN);
- issue_modify_qp = 1;
- }
-
- if (nesqp->user_mode) {
- nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_RDMA_WRITE_EN |
- NES_QPCONTEXT_MISC_RDMA_READ_EN);
- issue_modify_qp = 1;
- }
- }
-
- original_last_aeq = nesqp->last_aeq;
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
-
- nes_debug(NES_DBG_MOD_QP, "issue_modify_qp=%u\n", issue_modify_qp);
-
- ret = 0;
-
-
- if (issue_modify_qp) {
- nes_debug(NES_DBG_MOD_QP, "call nes_hw_modify_qp\n");
- ret = nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 1);
- if (ret)
- nes_debug(NES_DBG_MOD_QP, "nes_hw_modify_qp (next_iwarp_state = 0x%08X)"
- " failed for QP%u.\n",
- next_iwarp_state, nesqp->hwqp.qp_id);
-
- }
-
- if ((issue_modify_qp) && (nesqp->ibqp_state > IB_QPS_RTS)) {
- nes_debug(NES_DBG_MOD_QP, "QP%u Issued ModifyQP refcount (%d),"
- " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
- original_last_aeq, nesqp->last_aeq);
- if (!ret || original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) {
- if (dont_wait) {
- if (nesqp->cm_id && nesqp->hw_tcp_state != 0) {
- nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d),"
- " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
- original_last_aeq, nesqp->last_aeq);
- /* this one is for the cm_disconnect thread */
- spin_lock_irqsave(&nesqp->lock, qplockflags);
- nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_cm_disconn(nesqp);
- } else {
- nes_debug(NES_DBG_MOD_QP, "QP%u No fake disconnect, QP refcount=%d\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount));
- }
- } else {
- spin_lock_irqsave(&nesqp->lock, qplockflags);
- if (nesqp->cm_id) {
- /* These two are for the timer thread */
- if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
- nesqp->cm_id->add_ref(nesqp->cm_id);
- nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d),"
- " need ae to finish up, original_last_aeq = 0x%04X."
- " last_aeq = 0x%04X, scheduling timer.\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
- original_last_aeq, nesqp->last_aeq);
- schedule_nes_timer(nesqp->cm_node, (struct sk_buff *) nesqp, NES_TIMER_TYPE_CLOSE, 1, 0);
- }
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- } else {
- spin_unlock_irqrestore(&nesqp->lock, qplockflags);
- nes_debug(NES_DBG_MOD_QP, "QP%u Not decrementing QP refcount (%d),"
- " need ae to finish up, original_last_aeq = 0x%04X."
- " last_aeq = 0x%04X.\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
- original_last_aeq, nesqp->last_aeq);
- }
- }
- } else {
- nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up,"
- " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
- original_last_aeq, nesqp->last_aeq);
- }
- } else {
- nes_debug(NES_DBG_MOD_QP, "QP%u Decrementing QP refcount (%d), No ae to finish up,"
- " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount),
- original_last_aeq, nesqp->last_aeq);
- }
-
- err = 0;
-
- nes_debug(NES_DBG_MOD_QP, "QP%u Leaving, refcount=%d\n",
- nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount));
-
- return err;
-}
-
-
-/**
- * nes_muticast_attach
- */
-static int nes_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
- nes_debug(NES_DBG_INIT, "\n");
- return -ENOSYS;
-}
-
-
-/**
- * nes_multicast_detach
- */
-static int nes_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
- nes_debug(NES_DBG_INIT, "\n");
- return -ENOSYS;
-}
-
-
-/**
- * nes_process_mad
- */
-static int nes_process_mad(struct ib_device *ibdev, int mad_flags,
- u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
- u16 *out_mad_pkey_index)
-{
- nes_debug(NES_DBG_INIT, "\n");
- return -ENOSYS;
-}
-
-static inline void
-fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, struct ib_send_wr *ib_wr, u32 uselkey)
-{
- int sge_index;
- int total_payload_length = 0;
- for (sge_index = 0; sge_index < ib_wr->num_sge; sge_index++) {
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX+(sge_index*4),
- ib_wr->sg_list[sge_index].addr);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_LENGTH0_IDX + (sge_index*4),
- ib_wr->sg_list[sge_index].length);
- if (uselkey)
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX + (sge_index*4),
- (ib_wr->sg_list[sge_index].lkey));
- else
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX + (sge_index*4), 0);
-
- total_payload_length += ib_wr->sg_list[sge_index].length;
- }
- nes_debug(NES_DBG_IW_TX, "UC UC UC, sending total_payload_length=%u \n",
- total_payload_length);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
- total_payload_length);
-}
-
-/**
- * nes_post_send
- */
-static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
- struct ib_send_wr **bad_wr)
-{
- u64 u64temp;
- unsigned long flags = 0;
- struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_qp *nesqp = to_nesqp(ibqp);
- struct nes_hw_qp_wqe *wqe;
- int err = 0;
- u32 qsize = nesqp->hwqp.sq_size;
- u32 head;
- u32 wqe_misc = 0;
- u32 wqe_count = 0;
- u32 counter;
-
- if (nesqp->ibqp_state > IB_QPS_RTS) {
- err = -EINVAL;
- goto out;
- }
-
- spin_lock_irqsave(&nesqp->lock, flags);
-
- head = nesqp->hwqp.sq_head;
-
- while (ib_wr) {
- /* Check for QP error */
- if (nesqp->term_flags) {
- err = -EINVAL;
- break;
- }
-
- /* Check for SQ overflow */
- if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
- err = -ENOMEM;
- break;
- }
-
- wqe = &nesqp->hwqp.sq_vbase[head];
- /* nes_debug(NES_DBG_IW_TX, "processing sq wqe for QP%u at %p, head = %u.\n",
- nesqp->hwqp.qp_id, wqe, head); */
- nes_fill_init_qp_wqe(wqe, nesqp, head);
- u64temp = (u64)(ib_wr->wr_id);
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX,
- u64temp);
- switch (ib_wr->opcode) {
- case IB_WR_SEND:
- case IB_WR_SEND_WITH_INV:
- if (IB_WR_SEND == ib_wr->opcode) {
- if (ib_wr->send_flags & IB_SEND_SOLICITED)
- wqe_misc = NES_IWARP_SQ_OP_SENDSE;
- else
- wqe_misc = NES_IWARP_SQ_OP_SEND;
- } else {
- if (ib_wr->send_flags & IB_SEND_SOLICITED)
- wqe_misc = NES_IWARP_SQ_OP_SENDSEINV;
- else
- wqe_misc = NES_IWARP_SQ_OP_SENDINV;
-
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX,
- ib_wr->ex.invalidate_rkey);
- }
-
- if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
- err = -EINVAL;
- break;
- }
-
- if (ib_wr->send_flags & IB_SEND_FENCE)
- wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
-
- if ((ib_wr->send_flags & IB_SEND_INLINE) &&
- ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
- (ib_wr->sg_list[0].length <= 64)) {
- memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
- (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
- ib_wr->sg_list[0].length);
- wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
- } else {
- fill_wqe_sg_send(wqe, ib_wr, 1);
- }
-
- break;
- case IB_WR_RDMA_WRITE:
- wqe_misc = NES_IWARP_SQ_OP_RDMAW;
- if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
- nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n",
- ib_wr->num_sge, nesdev->nesadapter->max_sge);
- err = -EINVAL;
- break;
- }
-
- if (ib_wr->send_flags & IB_SEND_FENCE)
- wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
-
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
- rdma_wr(ib_wr)->rkey);
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
- rdma_wr(ib_wr)->remote_addr);
-
- if ((ib_wr->send_flags & IB_SEND_INLINE) &&
- ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
- (ib_wr->sg_list[0].length <= 64)) {
- memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
- (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
- ib_wr->sg_list[0].length);
- wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
- } else {
- fill_wqe_sg_send(wqe, ib_wr, 1);
- }
-
- wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] =
- wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX];
- break;
- case IB_WR_RDMA_READ:
- case IB_WR_RDMA_READ_WITH_INV:
- /* iWARP only supports 1 sge for RDMA reads */
- if (ib_wr->num_sge > 1) {
- nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n",
- ib_wr->num_sge);
- err = -EINVAL;
- break;
- }
- if (ib_wr->opcode == IB_WR_RDMA_READ) {
- wqe_misc = NES_IWARP_SQ_OP_RDMAR;
- } else {
- wqe_misc = NES_IWARP_SQ_OP_RDMAR_LOCINV;
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX,
- ib_wr->ex.invalidate_rkey);
- }
-
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
- rdma_wr(ib_wr)->remote_addr);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
- rdma_wr(ib_wr)->rkey);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
- ib_wr->sg_list->length);
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
- ib_wr->sg_list->addr);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX,
- ib_wr->sg_list->lkey);
- break;
- case IB_WR_LOCAL_INV:
- wqe_misc = NES_IWARP_SQ_OP_LOCINV;
- set_wqe_32bit_value(wqe->wqe_words,
- NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX,
- ib_wr->ex.invalidate_rkey);
- break;
- case IB_WR_REG_MR:
- {
- struct nes_mr *mr = to_nesmr(reg_wr(ib_wr)->mr);
- int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size);
- int flags = reg_wr(ib_wr)->access;
-
- if (mr->npages > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) {
- nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n");
- err = -EINVAL;
- break;
- }
- wqe_misc = NES_IWARP_SQ_OP_FAST_REG;
- set_wqe_64bit_value(wqe->wqe_words,
- NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX,
- mr->ibmr.iova);
- set_wqe_32bit_value(wqe->wqe_words,
- NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
- mr->ibmr.length);
- set_wqe_32bit_value(wqe->wqe_words,
- NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0);
- set_wqe_32bit_value(wqe->wqe_words,
- NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX,
- reg_wr(ib_wr)->key);
-
- if (page_shift == 12) {
- wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K;
- } else if (page_shift == 21) {
- wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M;
- } else {
- nes_debug(NES_DBG_IW_TX, "Invalid page shift,"
- " ib_wr=%u, max=1\n", ib_wr->num_sge);
- err = -EINVAL;
- break;
- }
-
- /* Set access_flags */
- wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ;
- if (flags & IB_ACCESS_LOCAL_WRITE)
- wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE;
-
- if (flags & IB_ACCESS_REMOTE_WRITE)
- wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE;
-
- if (flags & IB_ACCESS_REMOTE_READ)
- wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ;
-
- if (flags & IB_ACCESS_MW_BIND)
- wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND;
-
- /* Fill in PBL info: */
- set_wqe_64bit_value(wqe->wqe_words,
- NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX,
- mr->paddr);
-
- set_wqe_32bit_value(wqe->wqe_words,
- NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX,
- mr->npages * 8);
-
- nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, "
- "length: %d, rkey: %0x, pgl_paddr: %llx, "
- "page_list_len: %u, wqe_misc: %x\n",
- (unsigned long long) mr->ibmr.iova,
- mr->ibmr.length,
- reg_wr(ib_wr)->key,
- (unsigned long long) mr->paddr,
- mr->npages,
- wqe_misc);
- break;
- }
- default:
- /* error */
- err = -EINVAL;
- break;
- }
-
- if (err)
- break;
-
- if ((ib_wr->send_flags & IB_SEND_SIGNALED) || nesqp->sig_all)
- wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
-
- wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(wqe_misc);
-
- ib_wr = ib_wr->next;
- head++;
- wqe_count++;
- if (head >= qsize)
- head = 0;
-
- }
-
- nesqp->hwqp.sq_head = head;
- barrier();
- while (wqe_count) {
- counter = min(wqe_count, ((u32)255));
- wqe_count -= counter;
- nes_write32(nesdev->regs + NES_WQE_ALLOC,
- (counter << 24) | 0x00800000 | nesqp->hwqp.qp_id);
- }
-
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
-out:
- if (err)
- *bad_wr = ib_wr;
- return err;
-}
-
-
-/**
- * nes_post_recv
- */
-static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
- struct ib_recv_wr **bad_wr)
-{
- u64 u64temp;
- unsigned long flags = 0;
- struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_qp *nesqp = to_nesqp(ibqp);
- struct nes_hw_qp_wqe *wqe;
- int err = 0;
- int sge_index;
- u32 qsize = nesqp->hwqp.rq_size;
- u32 head;
- u32 wqe_count = 0;
- u32 counter;
- u32 total_payload_length;
-
- if (nesqp->ibqp_state > IB_QPS_RTS) {
- err = -EINVAL;
- goto out;
- }
-
- spin_lock_irqsave(&nesqp->lock, flags);
-
- head = nesqp->hwqp.rq_head;
-
- while (ib_wr) {
- /* Check for QP error */
- if (nesqp->term_flags) {
- err = -EINVAL;
- break;
- }
-
- if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
- err = -EINVAL;
- break;
- }
- /* Check for RQ overflow */
- if (((head + (2 * qsize) - nesqp->hwqp.rq_tail) % qsize) == (qsize - 1)) {
- err = -ENOMEM;
- break;
- }
-
- nes_debug(NES_DBG_IW_RX, "ibwr sge count = %u.\n", ib_wr->num_sge);
- wqe = &nesqp->hwqp.rq_vbase[head];
-
- /* nes_debug(NES_DBG_IW_RX, "QP%u:processing rq wqe at %p, head = %u.\n",
- nesqp->hwqp.qp_id, wqe, head); */
- nes_fill_init_qp_wqe(wqe, nesqp, head);
- u64temp = (u64)(ib_wr->wr_id);
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX,
- u64temp);
- total_payload_length = 0;
- for (sge_index=0; sge_index < ib_wr->num_sge; sge_index++) {
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_FRAG0_LOW_IDX+(sge_index*4),
- ib_wr->sg_list[sge_index].addr);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_LENGTH0_IDX+(sge_index*4),
- ib_wr->sg_list[sge_index].length);
- set_wqe_32bit_value(wqe->wqe_words,NES_IWARP_RQ_WQE_STAG0_IDX+(sge_index*4),
- ib_wr->sg_list[sge_index].lkey);
-
- total_payload_length += ib_wr->sg_list[sge_index].length;
- }
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX,
- total_payload_length);
-
- ib_wr = ib_wr->next;
- head++;
- wqe_count++;
- if (head >= qsize)
- head = 0;
- }
-
- nesqp->hwqp.rq_head = head;
- barrier();
- while (wqe_count) {
- counter = min(wqe_count, ((u32)255));
- wqe_count -= counter;
- nes_write32(nesdev->regs+NES_WQE_ALLOC, (counter<<24) | nesqp->hwqp.qp_id);
- }
-
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
-out:
- if (err)
- *bad_wr = ib_wr;
- return err;
-}
-
-/**
- * nes_drain_sq - drain sq
- * @ibqp: pointer to ibqp
- */
-static void nes_drain_sq(struct ib_qp *ibqp)
-{
- struct nes_qp *nesqp = to_nesqp(ibqp);
-
- if (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head)
- wait_for_completion(&nesqp->sq_drained);
-}
-
-/**
- * nes_drain_rq - drain rq
- * @ibqp: pointer to ibqp
- */
-static void nes_drain_rq(struct ib_qp *ibqp)
-{
- struct nes_qp *nesqp = to_nesqp(ibqp);
-
- if (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)
- wait_for_completion(&nesqp->rq_drained);
-}
-
-/**
- * nes_poll_cq
- */
-static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
- u64 u64temp;
- u64 wrid;
- unsigned long flags = 0;
- struct nes_vnic *nesvnic = to_nesvnic(ibcq->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_cq *nescq = to_nescq(ibcq);
- struct nes_qp *nesqp;
- struct nes_hw_cqe cqe;
- u32 head;
- u32 wq_tail = 0;
- u32 cq_size;
- u32 cqe_count = 0;
- u32 wqe_index;
- u32 u32temp;
- u32 move_cq_head = 1;
- u32 err_code;
-
- nes_debug(NES_DBG_CQ, "\n");
-
- spin_lock_irqsave(&nescq->lock, flags);
-
- head = nescq->hw_cq.cq_head;
- cq_size = nescq->hw_cq.cq_size;
-
- while (cqe_count < num_entries) {
- if ((le32_to_cpu(nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) &
- NES_CQE_VALID) == 0)
- break;
-
- /*
- * Make sure we read CQ entry contents *after*
- * we've checked the valid bit.
- */
- rmb();
-
- cqe = nescq->hw_cq.cq_vbase[head];
- u32temp = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
- wqe_index = u32temp & (nesdev->nesadapter->max_qp_wr - 1);
- u32temp &= ~(NES_SW_CONTEXT_ALIGN-1);
- /* parse CQE, get completion context from WQE (either rq or sq) */
- u64temp = (((u64)(le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
- ((u64)u32temp);
-
- if (u64temp) {
- nesqp = (struct nes_qp *)(unsigned long)u64temp;
- memset(entry, 0, sizeof *entry);
- if (cqe.cqe_words[NES_CQE_ERROR_CODE_IDX] == 0) {
- entry->status = IB_WC_SUCCESS;
- } else {
- err_code = le32_to_cpu(cqe.cqe_words[NES_CQE_ERROR_CODE_IDX]);
- if (NES_IWARP_CQE_MAJOR_DRV == (err_code >> 16)) {
- entry->status = err_code & 0x0000ffff;
-
- /* The rest of the cqe's will be marked as flushed */
- nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX] =
- cpu_to_le32((NES_IWARP_CQE_MAJOR_FLUSH << 16) |
- NES_IWARP_CQE_MINOR_FLUSH);
- } else
- entry->status = IB_WC_WR_FLUSH_ERR;
- }
-
- entry->qp = &nesqp->ibqp;
- entry->src_qp = nesqp->hwqp.qp_id;
-
- if (le32_to_cpu(cqe.cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_SQ) {
- if (nesqp->skip_lsmm) {
- nesqp->skip_lsmm = 0;
- nesqp->hwqp.sq_tail++;
- }
-
- /* Working on a SQ Completion*/
- wrid = (((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index].
- wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
- ((u64)(cpu_to_le32((u32)nesqp->hwqp.sq_vbase[wqe_index].
- wqe_words[NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX])));
- entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
- wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX]);
-
- switch (le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
- wqe_words[NES_IWARP_SQ_WQE_MISC_IDX]) & 0x3f) {
- case NES_IWARP_SQ_OP_RDMAW:
- nes_debug(NES_DBG_CQ, "Operation = RDMA WRITE.\n");
- entry->opcode = IB_WC_RDMA_WRITE;
- break;
- case NES_IWARP_SQ_OP_RDMAR:
- nes_debug(NES_DBG_CQ, "Operation = RDMA READ.\n");
- entry->opcode = IB_WC_RDMA_READ;
- entry->byte_len = le32_to_cpu(nesqp->hwqp.sq_vbase[wqe_index].
- wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX]);
- break;
- case NES_IWARP_SQ_OP_SENDINV:
- case NES_IWARP_SQ_OP_SENDSEINV:
- case NES_IWARP_SQ_OP_SEND:
- case NES_IWARP_SQ_OP_SENDSE:
- nes_debug(NES_DBG_CQ, "Operation = Send.\n");
- entry->opcode = IB_WC_SEND;
- break;
- case NES_IWARP_SQ_OP_LOCINV:
- entry->opcode = IB_WC_LOCAL_INV;
- break;
- case NES_IWARP_SQ_OP_FAST_REG:
- entry->opcode = IB_WC_REG_MR;
- break;
- }
-
- nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
- if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.sq_tail != nesqp->hwqp.sq_head)) {
- move_cq_head = 0;
- wq_tail = nesqp->hwqp.sq_tail;
- }
- } else {
- /* Working on a RQ Completion*/
- entry->byte_len = le32_to_cpu(cqe.cqe_words[NES_CQE_PAYLOAD_LENGTH_IDX]);
- wrid = ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_LOW_IDX]))) |
- ((u64)(le32_to_cpu(nesqp->hwqp.rq_vbase[wqe_index].wqe_words[NES_IWARP_RQ_WQE_COMP_SCRATCH_HIGH_IDX]))<<32);
- entry->opcode = IB_WC_RECV;
-
- nesqp->hwqp.rq_tail = (wqe_index+1)&(nesqp->hwqp.rq_size - 1);
- if ((entry->status != IB_WC_SUCCESS) && (nesqp->hwqp.rq_tail != nesqp->hwqp.rq_head)) {
- move_cq_head = 0;
- wq_tail = nesqp->hwqp.rq_tail;
- }
- }
-
- if (nesqp->iwarp_state > NES_CQP_QP_IWARP_STATE_RTS) {
- if (nesqp->hwqp.sq_tail == nesqp->hwqp.sq_head)
- complete(&nesqp->sq_drained);
- if (nesqp->hwqp.rq_tail == nesqp->hwqp.rq_head)
- complete(&nesqp->rq_drained);
- }
-
- entry->wr_id = wrid;
- entry++;
- cqe_count++;
- }
-
- if (move_cq_head) {
- nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
- if (++head >= cq_size)
- head = 0;
- nescq->polled_completions++;
-
- if ((nescq->polled_completions > (cq_size / 2)) ||
- (nescq->polled_completions == 255)) {
- nes_debug(NES_DBG_CQ, "CQ%u Issuing CQE Allocate since more than half of cqes"
- " are pending %u of %u.\n",
- nescq->hw_cq.cq_number, nescq->polled_completions, cq_size);
- nes_write32(nesdev->regs+NES_CQE_ALLOC,
- nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
- nescq->polled_completions = 0;
- }
- } else {
- /* Update the wqe index and set status to flush */
- wqe_index = le32_to_cpu(cqe.cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]);
- wqe_index = (wqe_index & (~(nesdev->nesadapter->max_qp_wr - 1))) | wq_tail;
- nescq->hw_cq.cq_vbase[head].cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX] =
- cpu_to_le32(wqe_index);
- move_cq_head = 1; /* ready for next pass */
- }
- }
-
- if (nescq->polled_completions) {
- nes_write32(nesdev->regs+NES_CQE_ALLOC,
- nescq->hw_cq.cq_number | (nescq->polled_completions << 16));
- nescq->polled_completions = 0;
- }
-
- nescq->hw_cq.cq_head = head;
- nes_debug(NES_DBG_CQ, "Reporting %u completions for CQ%u.\n",
- cqe_count, nescq->hw_cq.cq_number);
-
- spin_unlock_irqrestore(&nescq->lock, flags);
-
- return cqe_count;
-}
-
-
-/**
- * nes_req_notify_cq
- */
-static int nes_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
- {
- struct nes_vnic *nesvnic = to_nesvnic(ibcq->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_cq *nescq = to_nescq(ibcq);
- u32 cq_arm;
-
- nes_debug(NES_DBG_CQ, "Requesting notification for CQ%u.\n",
- nescq->hw_cq.cq_number);
-
- cq_arm = nescq->hw_cq.cq_number;
- if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_NEXT_COMP)
- cq_arm |= NES_CQE_ALLOC_NOTIFY_NEXT;
- else if ((notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
- cq_arm |= NES_CQE_ALLOC_NOTIFY_SE;
- else
- return -EINVAL;
-
- nes_write32(nesdev->regs+NES_CQE_ALLOC, cq_arm);
- nes_read32(nesdev->regs+NES_CQE_ALLOC);
-
- return 0;
-}
-
-static int nes_port_immutable(struct ib_device *ibdev, u8 port_num,
- struct ib_port_immutable *immutable)
-{
- struct ib_port_attr attr;
- int err;
-
- err = nes_query_port(ibdev, port_num, &attr);
- if (err)
- return err;
-
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
-
- return 0;
-}
-
-static void get_dev_fw_str(struct ib_device *dev, char *str,
- size_t str_len)
-{
- struct nes_ib_device *nesibdev =
- container_of(dev, struct nes_ib_device, ibdev);
- struct nes_vnic *nesvnic = nesibdev->nesvnic;
-
- nes_debug(NES_DBG_INIT, "\n");
- snprintf(str, str_len, "%u.%u",
- (nesvnic->nesdev->nesadapter->firmware_version >> 16),
- (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff));
-}
-
-/**
- * nes_init_ofa_device
- */
-struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
-{
- struct nes_ib_device *nesibdev;
- struct nes_vnic *nesvnic = netdev_priv(netdev);
- struct nes_device *nesdev = nesvnic->nesdev;
-
- nesibdev = (struct nes_ib_device *)ib_alloc_device(sizeof(struct nes_ib_device));
- if (nesibdev == NULL) {
- return NULL;
- }
- strlcpy(nesibdev->ibdev.name, "nes%d", IB_DEVICE_NAME_MAX);
- nesibdev->ibdev.owner = THIS_MODULE;
-
- nesibdev->ibdev.node_type = RDMA_NODE_RNIC;
- memset(&nesibdev->ibdev.node_guid, 0, sizeof(nesibdev->ibdev.node_guid));
- memcpy(&nesibdev->ibdev.node_guid, netdev->dev_addr, 6);
-
- nesibdev->ibdev.uverbs_cmd_mask =
- (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
- (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
- (1ull << IB_USER_VERBS_CMD_REG_MR) |
- (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
- (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
- (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
- (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
- (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
- (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
- (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
- (1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
- (1ull << IB_USER_VERBS_CMD_BIND_MW) |
- (1ull << IB_USER_VERBS_CMD_DEALLOC_MW) |
- (1ull << IB_USER_VERBS_CMD_POST_RECV) |
- (1ull << IB_USER_VERBS_CMD_POST_SEND);
-
- nesibdev->ibdev.phys_port_cnt = 1;
- nesibdev->ibdev.num_comp_vectors = 1;
- nesibdev->ibdev.dma_device = &nesdev->pcidev->dev;
- nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev;
- nesibdev->ibdev.query_device = nes_query_device;
- nesibdev->ibdev.query_port = nes_query_port;
- nesibdev->ibdev.query_pkey = nes_query_pkey;
- nesibdev->ibdev.query_gid = nes_query_gid;
- nesibdev->ibdev.alloc_ucontext = nes_alloc_ucontext;
- nesibdev->ibdev.dealloc_ucontext = nes_dealloc_ucontext;
- nesibdev->ibdev.mmap = nes_mmap;
- nesibdev->ibdev.alloc_pd = nes_alloc_pd;
- nesibdev->ibdev.dealloc_pd = nes_dealloc_pd;
- nesibdev->ibdev.create_ah = nes_create_ah;
- nesibdev->ibdev.destroy_ah = nes_destroy_ah;
- nesibdev->ibdev.create_qp = nes_create_qp;
- nesibdev->ibdev.modify_qp = nes_modify_qp;
- nesibdev->ibdev.query_qp = nes_query_qp;
- nesibdev->ibdev.destroy_qp = nes_destroy_qp;
- nesibdev->ibdev.create_cq = nes_create_cq;
- nesibdev->ibdev.destroy_cq = nes_destroy_cq;
- nesibdev->ibdev.poll_cq = nes_poll_cq;
- nesibdev->ibdev.get_dma_mr = nes_get_dma_mr;
- nesibdev->ibdev.reg_user_mr = nes_reg_user_mr;
- nesibdev->ibdev.dereg_mr = nes_dereg_mr;
- nesibdev->ibdev.alloc_mw = nes_alloc_mw;
- nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
-
- nesibdev->ibdev.alloc_mr = nes_alloc_mr;
- nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
-
- nesibdev->ibdev.attach_mcast = nes_multicast_attach;
- nesibdev->ibdev.detach_mcast = nes_multicast_detach;
- nesibdev->ibdev.process_mad = nes_process_mad;
-
- nesibdev->ibdev.req_notify_cq = nes_req_notify_cq;
- nesibdev->ibdev.post_send = nes_post_send;
- nesibdev->ibdev.post_recv = nes_post_recv;
- nesibdev->ibdev.drain_sq = nes_drain_sq;
- nesibdev->ibdev.drain_rq = nes_drain_rq;
-
- nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL);
- if (nesibdev->ibdev.iwcm == NULL) {
- ib_dealloc_device(&nesibdev->ibdev);
- return NULL;
- }
- nesibdev->ibdev.iwcm->add_ref = nes_add_ref;
- nesibdev->ibdev.iwcm->rem_ref = nes_rem_ref;
- nesibdev->ibdev.iwcm->get_qp = nes_get_qp;
- nesibdev->ibdev.iwcm->connect = nes_connect;
- nesibdev->ibdev.iwcm->accept = nes_accept;
- nesibdev->ibdev.iwcm->reject = nes_reject;
- nesibdev->ibdev.iwcm->create_listen = nes_create_listen;
- nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen;
- nesibdev->ibdev.get_port_immutable = nes_port_immutable;
- nesibdev->ibdev.get_dev_fw_str = get_dev_fw_str;
- memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name,
- sizeof(nesibdev->ibdev.iwcm->ifname));
-
- return nesibdev;
-}
-
-
-/**
- * nes_handle_delayed_event
- */
-static void nes_handle_delayed_event(unsigned long data)
-{
- struct nes_vnic *nesvnic = (void *) data;
-
- if (nesvnic->delayed_event != nesvnic->last_dispatched_event) {
- struct ib_event event;
-
- event.device = &nesvnic->nesibdev->ibdev;
- if (!event.device)
- goto stop_timer;
- event.event = nesvnic->delayed_event;
- event.element.port_num = nesvnic->logical_port + 1;
- ib_dispatch_event(&event);
- }
-
-stop_timer:
- nesvnic->event_timer.function = NULL;
-}
-
-
-void nes_port_ibevent(struct nes_vnic *nesvnic)
-{
- struct nes_ib_device *nesibdev = nesvnic->nesibdev;
- struct nes_device *nesdev = nesvnic->nesdev;
- struct ib_event event;
- event.device = &nesibdev->ibdev;
- event.element.port_num = nesvnic->logical_port + 1;
- event.event = nesdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
-
- if (!nesvnic->event_timer.function) {
- ib_dispatch_event(&event);
- nesvnic->last_dispatched_event = event.event;
- nesvnic->event_timer.function = nes_handle_delayed_event;
- nesvnic->event_timer.data = (unsigned long) nesvnic;
- nesvnic->event_timer.expires = jiffies + NES_EVENT_DELAY;
- add_timer(&nesvnic->event_timer);
- } else {
- mod_timer(&nesvnic->event_timer, jiffies + NES_EVENT_DELAY);
- }
- nesvnic->delayed_event = event.event;
-}
-
-
-/**
- * nes_destroy_ofa_device
- */
-void nes_destroy_ofa_device(struct nes_ib_device *nesibdev)
-{
- if (nesibdev == NULL)
- return;
-
- nes_unregister_ofa_device(nesibdev);
-
- kfree(nesibdev->ibdev.iwcm);
- ib_dealloc_device(&nesibdev->ibdev);
-}
-
-
-/**
- * nes_register_ofa_device
- */
-int nes_register_ofa_device(struct nes_ib_device *nesibdev)
-{
- struct nes_vnic *nesvnic = nesibdev->nesvnic;
- struct nes_device *nesdev = nesvnic->nesdev;
- struct nes_adapter *nesadapter = nesdev->nesadapter;
- int i, ret;
-
- ret = ib_register_device(&nesvnic->nesibdev->ibdev, NULL);
- if (ret) {
- return ret;
- }
-
- /* Get the resources allocated to this device */
- nesibdev->max_cq = (nesadapter->max_cq-NES_FIRST_QPN) / nesadapter->port_count;
- nesibdev->max_mr = nesadapter->max_mr / nesadapter->port_count;
- nesibdev->max_qp = (nesadapter->max_qp-NES_FIRST_QPN) / nesadapter->port_count;
- nesibdev->max_pd = nesadapter->max_pd / nesadapter->port_count;
-
- for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) {
- ret = device_create_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]);
- if (ret) {
- while (i > 0) {
- i--;
- device_remove_file(&nesibdev->ibdev.dev,
- nes_dev_attributes[i]);
- }
- ib_unregister_device(&nesibdev->ibdev);
- return ret;
- }
- }
-
- nesvnic->of_device_registered = 1;
-
- return 0;
-}
-
-
-/**
- * nes_unregister_ofa_device
- */
-static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev)
-{
- struct nes_vnic *nesvnic = nesibdev->nesvnic;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(nes_dev_attributes); ++i) {
- device_remove_file(&nesibdev->ibdev.dev, nes_dev_attributes[i]);
- }
-
- if (nesvnic->of_device_registered) {
- ib_unregister_device(&nesibdev->ibdev);
- }
-
- nesvnic->of_device_registered = 0;
-}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
deleted file mode 100644
index e02a5662dc20..000000000000
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved.
- * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#ifndef NES_VERBS_H
-#define NES_VERBS_H
-
-struct nes_device;
-
-#define NES_MAX_USER_DB_REGIONS 4096
-#define NES_MAX_USER_WQ_REGIONS 4096
-
-#define NES_TERM_SENT 0x01
-#define NES_TERM_RCVD 0x02
-#define NES_TERM_DONE 0x04
-
-struct nes_ucontext {
- struct ib_ucontext ibucontext;
- struct nes_device *nesdev;
- unsigned long mmap_wq_offset;
- unsigned long mmap_cq_offset; /* to be removed */
- int index; /* rnic index (minor) */
- unsigned long allocated_doorbells[BITS_TO_LONGS(NES_MAX_USER_DB_REGIONS)];
- u16 mmap_db_index[NES_MAX_USER_DB_REGIONS];
- u16 first_free_db;
- unsigned long allocated_wqs[BITS_TO_LONGS(NES_MAX_USER_WQ_REGIONS)];
- struct nes_qp *mmap_nesqp[NES_MAX_USER_WQ_REGIONS];
- u16 first_free_wq;
- struct list_head cq_reg_mem_list;
- struct list_head qp_reg_mem_list;
- u32 mcrqf;
- atomic_t usecnt;
-};
-
-struct nes_pd {
- struct ib_pd ibpd;
- u16 pd_id;
- atomic_t sqp_count;
- u16 mmap_db_index;
-};
-
-struct nes_mr {
- union {
- struct ib_mr ibmr;
- struct ib_mw ibmw;
- struct ib_fmr ibfmr;
- };
- struct ib_umem *region;
- u16 pbls_used;
- u8 mode;
- u8 pbl_4k;
- __le64 *pages;
- dma_addr_t paddr;
- u32 max_pages;
- u32 npages;
-};
-
-struct nes_hw_pb {
- __le32 pa_low;
- __le32 pa_high;
-};
-
-struct nes_vpbl {
- dma_addr_t pbl_pbase;
- struct nes_hw_pb *pbl_vbase;
-};
-
-struct nes_root_vpbl {
- dma_addr_t pbl_pbase;
- struct nes_hw_pb *pbl_vbase;
- struct nes_vpbl *leaf_vpbl;
-};
-
-struct nes_fmr {
- struct nes_mr nesmr;
- u32 leaf_pbl_cnt;
- struct nes_root_vpbl root_vpbl;
- struct ib_qp *ib_qp;
- int access_rights;
- struct ib_fmr_attr attr;
-};
-
-struct nes_av;
-
-struct nes_cq {
- struct ib_cq ibcq;
- struct nes_hw_cq hw_cq;
- u32 polled_completions;
- u32 cq_mem_size;
- spinlock_t lock;
- u8 virtual_cq;
- u8 pad[3];
- u32 mcrqf;
-};
-
-struct nes_wq {
- spinlock_t lock;
-};
-
-struct disconn_work {
- struct work_struct work;
- struct nes_qp *nesqp;
-};
-
-struct iw_cm_id;
-struct ietf_mpa_frame;
-
-struct nes_qp {
- struct ib_qp ibqp;
- void *allocated_buffer;
- struct iw_cm_id *cm_id;
- struct nes_cq *nesscq;
- struct nes_cq *nesrcq;
- struct nes_pd *nespd;
- void *cm_node; /* handle of the node this QP is associated with */
- void *ietf_frame;
- u8 ietf_frame_size;
- dma_addr_t ietf_frame_pbase;
- struct ib_mr *lsmm_mr;
- struct nes_hw_qp hwqp;
- struct work_struct work;
- enum ib_qp_state ibqp_state;
- u32 iwarp_state;
- u32 hte_index;
- u32 last_aeq;
- u32 qp_mem_size;
- atomic_t refcount;
- atomic_t close_timer_started;
- u32 mmap_sq_db_index;
- u32 mmap_rq_db_index;
- spinlock_t lock;
- spinlock_t pau_lock;
- struct nes_qp_context *nesqp_context;
- dma_addr_t nesqp_context_pbase;
- void *pbl_vbase;
- dma_addr_t pbl_pbase;
- struct page *page;
- struct timer_list terminate_timer;
- enum ib_event_type terminate_eventtype;
- struct sk_buff_head pau_list;
- u32 pau_rcv_nxt;
- u16 active_conn:1;
- u16 skip_lsmm:1;
- u16 user_mode:1;
- u16 hte_added:1;
- u16 flush_issued:1;
- u16 destroyed:1;
- u16 sig_all:1;
- u16 pau_mode:1;
- u16 rsvd:8;
- u16 private_data_len;
- u16 term_sq_flush_code;
- u16 term_rq_flush_code;
- u8 hw_iwarp_state;
- u8 hw_tcp_state;
- u8 term_flags;
- u8 sq_kmapped;
- u8 pau_busy;
- u8 pau_pending;
- u8 pau_state;
- __u64 nesuqp_addr;
- struct completion sq_drained;
- struct completion rq_drained;
-};
-
-struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
- u64 addr, u64 size, int acc, u64 *iova_start);
-
-#endif /* NES_VERBS_H */
diff --git a/drivers/infiniband/hw/ocrdma/Kconfig b/drivers/infiniband/hw/ocrdma/Kconfig
index c0cddc0192d1..54bd70bc4d1a 100644
--- a/drivers/infiniband/hw/ocrdma/Kconfig
+++ b/drivers/infiniband/hw/ocrdma/Kconfig
@@ -1,8 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_OCRDMA
tristate "Emulex One Connect HCA support"
depends on ETHERNET && NETDEVICES && PCI && INET && (IPV6 || IPV6=n)
select NET_VENDOR_EMULEX
select BE2NET
- ---help---
+ help
This driver provides low-level InfiniBand over Ethernet
support for Emulex One Connect host channel adapters (HCAs).
diff --git a/drivers/infiniband/hw/ocrdma/Makefile b/drivers/infiniband/hw/ocrdma/Makefile
index d1bfd4f4cdde..14fba95021d8 100644
--- a/drivers/infiniband/hw/ocrdma/Makefile
+++ b/drivers/infiniband/hw/ocrdma/Makefile
@@ -1,4 +1,5 @@
-ccflags-y := -Idrivers/net/ethernet/emulex/benet
+# SPDX-License-Identifier: GPL-2.0-only
+ccflags-y := -I $(srctree)/drivers/net/ethernet/emulex/benet
obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma.o
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 45bdfa0e3b2b..5eb61c110090 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -98,7 +98,6 @@ struct ocrdma_dev_attr {
u64 max_mr_size;
u32 max_num_mr_pbl;
int max_mw;
- int max_fmr;
int max_map_per_fmr;
int max_pages_per_frmr;
u16 max_ord_per_qp;
@@ -186,7 +185,6 @@ struct ocrdma_hw_mr {
u32 num_pbes;
u32 pbl_size;
u32 pbe_size;
- u64 fbo;
u64 va;
};
@@ -527,17 +525,17 @@ static inline int is_cqe_wr_imm(struct ocrdma_cqe *cqe)
}
static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev,
- struct ib_ah_attr *ah_attr, u8 *mac_addr)
+ struct rdma_ah_attr *ah_attr, u8 *mac_addr)
{
struct in6_addr in6;
- memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+ memcpy(&in6, rdma_ah_read_grh(ah_attr)->dgid.raw, sizeof(in6));
if (rdma_is_multicast_addr(&in6))
rdma_get_mcast_mac(&in6, mac_addr);
else if (rdma_link_local_addr(&in6))
rdma_get_ll_mac(&in6, mac_addr);
else
- memcpy(mac_addr, ah_attr->dmac, ETH_ALEN);
+ memcpy(mac_addr, ah_attr->roce.dmac, ETH_ALEN);
return 0;
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index cd66e1e45dd7..88c45928301f 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -71,7 +71,7 @@ static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type)
}
static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
- struct ib_ah_attr *attr, union ib_gid *sgid,
+ struct rdma_ah_attr *attr, const union ib_gid *sgid,
int pdid, bool *isvlan, u16 vlan_tag)
{
int status;
@@ -81,8 +81,8 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
u16 proto_num = 0;
u8 nxthdr = 0x11;
struct iphdr ipv4;
+ const struct ib_global_route *ib_grh;
union {
- struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
@@ -120,32 +120,33 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
status = ocrdma_resolve_dmac(dev, attr, &eth.dmac[0]);
if (status)
return status;
- ah->sgid_index = attr->grh.sgid_index;
+ ib_grh = rdma_ah_read_grh(attr);
+ ah->sgid_index = ib_grh->sgid_index;
/* Eth HDR */
memcpy(&ah->av->eth_hdr, &eth, eth_sz);
if (ah->hdr_type == RDMA_NETWORK_IPV4) {
*((__be16 *)&ipv4) = htons((4 << 12) | (5 << 8) |
- attr->grh.traffic_class);
+ ib_grh->traffic_class);
ipv4.id = cpu_to_be16(pdid);
ipv4.frag_off = htons(IP_DF);
ipv4.tot_len = htons(0);
- ipv4.ttl = attr->grh.hop_limit;
+ ipv4.ttl = ib_grh->hop_limit;
ipv4.protocol = nxthdr;
- rdma_gid2ip(&sgid_addr._sockaddr, sgid);
+ rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid);
ipv4.saddr = sgid_addr._sockaddr_in.sin_addr.s_addr;
- rdma_gid2ip(&dgid_addr._sockaddr, &attr->grh.dgid);
+ rdma_gid2ip((struct sockaddr*)&dgid_addr, &ib_grh->dgid);
ipv4.daddr = dgid_addr._sockaddr_in.sin_addr.s_addr;
memcpy((u8 *)ah->av + eth_sz, &ipv4, sizeof(struct iphdr));
} else {
memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid));
grh.tclass_flow = cpu_to_be32((6 << 28) |
- (attr->grh.traffic_class << 24) |
- attr->grh.flow_label);
- memcpy(&grh.dgid[0], attr->grh.dgid.raw,
- sizeof(attr->grh.dgid.raw));
+ (ib_grh->traffic_class << 24) |
+ ib_grh->flow_label);
+ memcpy(&grh.dgid[0], ib_grh->dgid.raw,
+ sizeof(ib_grh->dgid.raw));
grh.pdid_hoplimit = cpu_to_be32((pdid << 16) |
(nxthdr << 8) |
- attr->grh.hop_limit);
+ ib_grh->hop_limit);
memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
}
if (*isvlan)
@@ -154,69 +155,46 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
return status;
}
-struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
- struct ib_udata *udata)
+int ocrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
u32 *ahid_addr;
int status;
- struct ocrdma_ah *ah;
+ struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
bool isvlan = false;
u16 vlan_tag = 0xffff;
- struct ib_gid_attr sgid_attr;
- struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
- struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
- union ib_gid sgid;
+ const struct ib_gid_attr *sgid_attr;
+ struct ocrdma_pd *pd = get_ocrdma_pd(ibah->pd);
+ struct rdma_ah_attr *attr = init_attr->ah_attr;
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device);
- if (!(attr->ah_flags & IB_AH_GRH))
- return ERR_PTR(-EINVAL);
+ if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) ||
+ !(rdma_ah_get_ah_flags(attr) & IB_AH_GRH))
+ return -EINVAL;
if (atomic_cmpxchg(&dev->update_sl, 1, 0))
ocrdma_init_service_level(dev);
- ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
- if (!ah)
- return ERR_PTR(-ENOMEM);
+ sgid_attr = attr->grh.sgid_attr;
+ status = rdma_read_gid_l2_fields(sgid_attr, &vlan_tag, NULL);
+ if (status)
+ return status;
status = ocrdma_alloc_av(dev, ah);
if (status)
goto av_err;
- status = ib_get_cached_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid,
- &sgid_attr);
- if (status) {
- pr_err("%s(): Failed to query sgid, status = %d\n",
- __func__, status);
- goto av_conf_err;
- }
- if (sgid_attr.ndev) {
- if (is_vlan_dev(sgid_attr.ndev))
- vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
- dev_put(sgid_attr.ndev);
- }
/* Get network header type for this GID */
- ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
-
- if ((pd->uctx) &&
- (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
- (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) {
- status = rdma_addr_find_l2_eth_by_grh(&sgid, &attr->grh.dgid,
- attr->dmac, &vlan_tag,
- &sgid_attr.ndev->ifindex,
- NULL);
- if (status) {
- pr_err("%s(): Failed to resolve dmac from gid."
- "status = %d\n", __func__, status);
- goto av_conf_err;
- }
- }
+ ah->hdr_type = rdma_gid_attr_network_type(sgid_attr);
- status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag);
+ status = set_av_attr(dev, ah, attr, &sgid_attr->gid, pd->id,
+ &isvlan, vlan_tag);
if (status)
goto av_conf_err;
/* if pd is for the user process, pass the ah_id to user space */
if ((pd->uctx) && (pd->uctx->ah_tbl.va)) {
- ahid_addr = pd->uctx->ah_tbl.va + attr->dlid;
+ ahid_addr = pd->uctx->ah_tbl.va + rdma_ah_get_dlid(attr);
*ahid_addr = 0;
*ahid_addr |= ah->id & OCRDMA_AH_ID_MASK;
if (ocrdma_is_udp_encap_supported(dev)) {
@@ -229,83 +207,62 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
OCRDMA_AH_VLAN_VALID_SHIFT);
}
- return &ah->ibah;
+ return 0;
av_conf_err:
ocrdma_free_av(dev, ah);
av_err:
- kfree(ah);
- return ERR_PTR(status);
+ return status;
}
-int ocrdma_destroy_ah(struct ib_ah *ibah)
+int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device);
ocrdma_free_av(dev, ah);
- kfree(ah);
return 0;
}
-int ocrdma_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
+int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
{
struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
struct ocrdma_av *av = ah->av;
struct ocrdma_grh *grh;
- attr->ah_flags |= IB_AH_GRH;
+
+ attr->type = ibah->type;
if (ah->av->valid & OCRDMA_AV_VALID) {
grh = (struct ocrdma_grh *)((u8 *)ah->av +
sizeof(struct ocrdma_eth_vlan));
- attr->sl = be16_to_cpu(av->eth_hdr.vlan_tag) >> 13;
+ rdma_ah_set_sl(attr, be16_to_cpu(av->eth_hdr.vlan_tag) >> 13);
} else {
grh = (struct ocrdma_grh *)((u8 *)ah->av +
sizeof(struct ocrdma_eth_basic));
- attr->sl = 0;
+ rdma_ah_set_sl(attr, 0);
}
- memcpy(&attr->grh.dgid.raw[0], &grh->dgid[0], sizeof(grh->dgid));
- attr->grh.sgid_index = ah->sgid_index;
- attr->grh.hop_limit = be32_to_cpu(grh->pdid_hoplimit) & 0xff;
- attr->grh.traffic_class = be32_to_cpu(grh->tclass_flow) >> 24;
- attr->grh.flow_label = be32_to_cpu(grh->tclass_flow) & 0x00ffffffff;
+ rdma_ah_set_grh(attr, NULL,
+ be32_to_cpu(grh->tclass_flow) & 0xffffffff,
+ ah->sgid_index,
+ be32_to_cpu(grh->pdid_hoplimit) & 0xff,
+ be32_to_cpu(grh->tclass_flow) >> 24);
+ rdma_ah_set_dgid_raw(attr, &grh->dgid[0]);
return 0;
}
-int ocrdma_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
-{
- /* modify_ah is unsupported */
- return -ENOSYS;
-}
-
-int ocrdma_process_mad(struct ib_device *ibdev,
- int process_mad_flags,
- u8 port_num,
- const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
+int ocrdma_process_mad(struct ib_device *ibdev, int process_mad_flags,
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh, const struct ib_mad *in,
+ struct ib_mad *out, size_t *out_mad_size,
u16 *out_mad_pkey_index)
{
- int status;
+ int status = IB_MAD_RESULT_SUCCESS;
struct ocrdma_dev *dev;
- const struct ib_mad *in_mad = (const struct ib_mad *)in;
- struct ib_mad *out_mad = (struct ib_mad *)out;
-
- if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
- *out_mad_size != sizeof(*out_mad)))
- return IB_MAD_RESULT_FAILURE;
- switch (in_mad->mad_hdr.mgmt_class) {
- case IB_MGMT_CLASS_PERF_MGMT:
+ if (in->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
dev = get_ocrdma_dev(ibdev);
- if (!ocrdma_pma_counters(dev, out_mad))
- status = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
- else
- status = IB_MAD_RESULT_SUCCESS;
- break;
- default:
- status = IB_MAD_RESULT_SUCCESS;
- break;
+ ocrdma_pma_counters(dev, out);
+ status |= IB_MAD_RESULT_REPLY;
}
+
return status;
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index 0704a24b17c8..2626679df31d 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -51,18 +51,14 @@ enum {
OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */
};
-struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *,
- struct ib_udata *);
-int ocrdma_destroy_ah(struct ib_ah *);
-int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *);
-int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *);
+int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags);
+int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
-int ocrdma_process_mad(struct ib_device *,
- int process_mad_flags,
- u8 port_num,
- const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in, size_t in_mad_size,
- struct ib_mad_hdr *out, size_t *out_mad_size,
+int ocrdma_process_mad(struct ib_device *dev, int process_mad_flags,
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh, const struct ib_mad *in,
+ struct ib_mad *out, size_t *out_mad_size,
u16 *out_mad_pkey_index);
#endif /* __OCRDMA_AH_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index aa6967197620..56f06c68f31a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -252,7 +252,10 @@ static int ocrdma_get_mbx_errno(u32 status)
case OCRDMA_MBX_ADDI_STATUS_INSUFFICIENT_RESOURCES:
err_num = -EAGAIN;
break;
+ default:
+ err_num = -EFAULT;
}
+ break;
default:
err_num = -EFAULT;
}
@@ -377,11 +380,10 @@ static int ocrdma_alloc_q(struct ocrdma_dev *dev,
q->len = len;
q->entry_size = entry_size;
q->size = len * entry_size;
- q->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, q->size,
- &q->dma, GFP_KERNEL);
+ q->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, q->size, &q->dma,
+ GFP_KERNEL);
if (!q->va)
return -ENOMEM;
- memset(q->va, 0, q->size);
return 0;
}
@@ -790,7 +792,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
qp->srq->ibsrq.
srq_context);
} else if (dev_event) {
- pr_err("%s: Fatal event received\n", dev->ibdev.name);
+ dev_err(&dev->ibdev.dev, "Fatal event received\n");
ib_dispatch_event(&ib_evt);
}
@@ -1090,7 +1092,7 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
rsp = &mqe->u.rsp;
if (cqe_status || ext_status) {
- pr_err("%s() cqe_status=0x%x, ext_status=0x%x,",
+ pr_err("%s() cqe_status=0x%x, ext_status=0x%x,\n",
__func__, cqe_status, ext_status);
if (rsp) {
/* This is for embedded cmds. */
@@ -1188,7 +1190,6 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
attr->max_mr = rsp->max_mr;
attr->max_mr_size = ((u64)rsp->max_mr_size_hi << 32) |
rsp->max_mr_size_lo;
- attr->max_fmr = 0;
attr->max_pages_per_frmr = rsp->max_pages_per_frmr;
attr->max_num_mr_pbl = rsp->max_num_mr_pbl;
attr->max_cqe = rsp->max_cq_cqes_per_cq &
@@ -1349,7 +1350,6 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dma.pa);
mqe->u.nonemb_req.sge[0].len = dma.size;
- memset(dma.va, 0, dma.size);
ocrdma_init_mch((struct ocrdma_mbx_hdr *)dma.va,
OCRDMA_CMD_GET_CTRL_ATTRIBUTES,
OCRDMA_SUBSYS_COMMON,
@@ -1363,8 +1363,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv &
OCRDMA_HBA_ATTRB_PTNUM_MASK)
>> OCRDMA_HBA_ATTRB_PTNUM_SHIFT;
- strncpy(dev->model_number,
- hba_attribs->controller_model_number, 31);
+ strscpy(dev->model_number,
+ hba_attribs->controller_model_number,
+ sizeof(dev->model_number));
}
dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa);
free_mqe:
@@ -1505,7 +1506,6 @@ int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev)
{
int status = -ENOMEM;
- size_t pd_bitmap_size;
struct ocrdma_alloc_pd_range *cmd;
struct ocrdma_alloc_pd_range_rsp *rsp;
@@ -1527,10 +1527,8 @@ static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev)
dev->pd_mgr->pd_dpp_start = rsp->dpp_page_pdid &
OCRDMA_ALLOC_PD_RNG_RSP_START_PDID_MASK;
dev->pd_mgr->max_dpp_pd = rsp->pd_count;
- pd_bitmap_size =
- BITS_TO_LONGS(rsp->pd_count) * sizeof(long);
- dev->pd_mgr->pd_dpp_bitmap = kzalloc(pd_bitmap_size,
- GFP_KERNEL);
+ dev->pd_mgr->pd_dpp_bitmap = bitmap_zalloc(rsp->pd_count,
+ GFP_KERNEL);
}
kfree(cmd);
}
@@ -1546,9 +1544,8 @@ static int ocrdma_mbx_alloc_pd_range(struct ocrdma_dev *dev)
dev->pd_mgr->pd_norm_start = rsp->dpp_page_pdid &
OCRDMA_ALLOC_PD_RNG_RSP_START_PDID_MASK;
dev->pd_mgr->max_normal_pd = rsp->pd_count;
- pd_bitmap_size = BITS_TO_LONGS(rsp->pd_count) * sizeof(long);
- dev->pd_mgr->pd_norm_bitmap = kzalloc(pd_bitmap_size,
- GFP_KERNEL);
+ dev->pd_mgr->pd_norm_bitmap = bitmap_zalloc(rsp->pd_count,
+ GFP_KERNEL);
}
kfree(cmd);
@@ -1610,8 +1607,8 @@ void ocrdma_alloc_pd_pool(struct ocrdma_dev *dev)
static void ocrdma_free_pd_pool(struct ocrdma_dev *dev)
{
ocrdma_mbx_dealloc_pd_range(dev);
- kfree(dev->pd_mgr->pd_norm_bitmap);
- kfree(dev->pd_mgr->pd_dpp_bitmap);
+ bitmap_free(dev->pd_mgr->pd_norm_bitmap);
+ bitmap_free(dev->pd_mgr->pd_dpp_bitmap);
kfree(dev->pd_mgr);
}
@@ -1687,7 +1684,6 @@ static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
goto mem_err_ah;
dev->av_tbl.pa = pa;
dev->av_tbl.num_ah = max_ah;
- memset(dev->av_tbl.va, 0, dev->av_tbl.size);
pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va;
for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) {
@@ -1821,7 +1817,6 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
status = -ENOMEM;
goto mem_err;
}
- memset(cq->va, 0, cq->len);
page_size = cq->len / hw_pages;
cmd->cmd.pgsz_pgcnt = (page_size / OCRDMA_MIN_Q_PAGE_SIZE) <<
OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT;
@@ -1886,14 +1881,13 @@ mem_err:
return status;
}
-int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq)
+void ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq)
{
- int status = -ENOMEM;
struct ocrdma_destroy_cq *cmd;
cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_CQ, sizeof(*cmd));
if (!cmd)
- return status;
+ return;
ocrdma_init_mch(&cmd->req, OCRDMA_CMD_DELETE_CQ,
OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
@@ -1901,11 +1895,10 @@ int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq)
(cq->id << OCRDMA_DESTROY_CQ_QID_SHIFT) &
OCRDMA_DESTROY_CQ_QID_MASK;
- status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
ocrdma_unbind_eq(dev, cq->eqn);
dma_free_coherent(&dev->nic_info.pdev->dev, cq->len, cq->va, cq->pa);
kfree(cmd);
- return status;
}
int ocrdma_mbx_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr,
@@ -1944,7 +1937,7 @@ mbx_err:
int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *dev, int fr_mr, u32 lkey)
{
- int status = -ENOMEM;
+ int status;
struct ocrdma_dealloc_lkey *cmd;
cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DEALLOC_LKEY, sizeof(*cmd));
@@ -1953,9 +1946,7 @@ int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *dev, int fr_mr, u32 lkey)
cmd->lkey = lkey;
cmd->rsvd_frmr = fr_mr ? 1 : 0;
status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
- if (status)
- goto mbx_err;
-mbx_err:
+
kfree(cmd);
return status;
}
@@ -1967,6 +1958,7 @@ static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr,
int i;
struct ocrdma_reg_nsmr *cmd;
struct ocrdma_reg_nsmr_rsp *rsp;
+ u64 fbo = hwmr->va & (hwmr->pbe_size - 1);
cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_REGISTER_NSMR, sizeof(*cmd));
if (!cmd)
@@ -1992,8 +1984,8 @@ static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr,
OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT;
cmd->totlen_low = hwmr->len;
cmd->totlen_high = upper_32_bits(hwmr->len);
- cmd->fbo_low = (u32) (hwmr->fbo & 0xffffffff);
- cmd->fbo_high = (u32) upper_32_bits(hwmr->fbo);
+ cmd->fbo_low = lower_32_bits(fbo);
+ cmd->fbo_high = upper_32_bits(fbo);
cmd->va_loaddr = (u32) hwmr->va;
cmd->va_hiaddr = (u32) upper_32_bits(hwmr->va);
@@ -2015,7 +2007,7 @@ static int ocrdma_mbx_reg_mr_cont(struct ocrdma_dev *dev,
struct ocrdma_hw_mr *hwmr, u32 pbl_cnt,
u32 pbl_offset, u32 last)
{
- int status = -ENOMEM;
+ int status;
int i;
struct ocrdma_reg_nsmr_cont *cmd;
@@ -2034,9 +2026,7 @@ static int ocrdma_mbx_reg_mr_cont(struct ocrdma_dev *dev,
upper_32_bits(hwmr->pbl_table[i + pbl_offset].pa);
}
status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
- if (status)
- goto mbx_err;
-mbx_err:
+
kfree(cmd);
return status;
}
@@ -2214,7 +2204,6 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
qp->sq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
if (!qp->sq.va)
return -EINVAL;
- memset(qp->sq.va, 0, len);
qp->sq.len = len;
qp->sq.pa = pa;
qp->sq.entry_size = dev->attr.wqe_size;
@@ -2265,7 +2254,6 @@ static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd,
qp->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
if (!qp->rq.va)
return -ENOMEM;
- memset(qp->rq.va, 0, len);
qp->rq.pa = pa;
qp->rq.len = len;
qp->rq.entry_size = dev->attr.rqe_size;
@@ -2319,11 +2307,10 @@ static int ocrdma_set_create_qp_ird_cmd(struct ocrdma_create_qp_req *cmd,
if (dev->attr.ird == 0)
return 0;
- qp->ird_q_va = dma_alloc_coherent(&pdev->dev, ird_q_len,
- &pa, GFP_KERNEL);
+ qp->ird_q_va = dma_alloc_coherent(&pdev->dev, ird_q_len, &pa,
+ GFP_KERNEL);
if (!qp->ird_q_va)
return -ENOMEM;
- memset(qp->ird_q_va, 0, ird_q_len);
ocrdma_build_q_pages(&cmd->ird_addr[0], dev->attr.num_ird_pages,
pa, ird_page_size);
for (; i < ird_q_len / dev->attr.rqe_size; i++) {
@@ -2499,59 +2486,55 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
int attr_mask)
{
int status;
- struct ib_ah_attr *ah_attr = &attrs->ah_attr;
- union ib_gid sgid, zgid;
- struct ib_gid_attr sgid_attr;
- u32 vlan_id = 0xFFFF;
+ struct rdma_ah_attr *ah_attr = &attrs->ah_attr;
+ const struct ib_gid_attr *sgid_attr;
+ u16 vlan_id = 0xFFFF;
u8 mac_addr[6], hdr_type;
union {
- struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
+ const struct ib_global_route *grh;
- if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
+ if ((rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) == 0)
return -EINVAL;
+ grh = rdma_ah_read_grh(ah_attr);
if (atomic_cmpxchg(&dev->update_sl, 1, 0))
ocrdma_init_service_level(dev);
cmd->params.tclass_sq_psn |=
- (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
+ (grh->traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
cmd->params.rnt_rc_sl_fl |=
- (ah_attr->grh.flow_label & OCRDMA_QP_PARAMS_FLOW_LABEL_MASK);
- cmd->params.rnt_rc_sl_fl |= (ah_attr->sl << OCRDMA_QP_PARAMS_SL_SHIFT);
+ (grh->flow_label & OCRDMA_QP_PARAMS_FLOW_LABEL_MASK);
+ cmd->params.rnt_rc_sl_fl |= (rdma_ah_get_sl(ah_attr) <<
+ OCRDMA_QP_PARAMS_SL_SHIFT);
cmd->params.hop_lmt_rq_psn |=
- (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT);
+ (grh->hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT);
cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
/* GIDs */
- memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
+ memcpy(&cmd->params.dgid[0], &grh->dgid.raw[0],
sizeof(cmd->params.dgid));
- status = ib_get_cached_gid(&dev->ibdev, 1, ah_attr->grh.sgid_index,
- &sgid, &sgid_attr);
- if (!status && sgid_attr.ndev) {
- vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
- memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN);
- dev_put(sgid_attr.ndev);
- }
-
- memset(&zgid, 0, sizeof(zgid));
- if (!memcmp(&sgid, &zgid, sizeof(zgid)))
- return -EINVAL;
+ sgid_attr = ah_attr->grh.sgid_attr;
+ status = rdma_read_gid_l2_fields(sgid_attr, &vlan_id, &mac_addr[0]);
+ if (status)
+ return status;
- qp->sgid_idx = ah_attr->grh.sgid_index;
- memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
+ qp->sgid_idx = grh->sgid_index;
+ memcpy(&cmd->params.sgid[0], &sgid_attr->gid.raw[0],
+ sizeof(cmd->params.sgid));
status = ocrdma_resolve_dmac(dev, ah_attr, &mac_addr[0]);
if (status)
return status;
+
cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
(mac_addr[2] << 16) | (mac_addr[3] << 24);
- hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+ hdr_type = rdma_gid_attr_network_type(sgid_attr);
if (hdr_type == RDMA_NETWORK_IPV4) {
- rdma_gid2ip(&sgid_addr._sockaddr, &sgid);
- rdma_gid2ip(&dgid_addr._sockaddr, &ah_attr->grh.dgid);
+ rdma_gid2ip((struct sockaddr *)&sgid_addr, &sgid_attr->gid);
+ rdma_gid2ip((struct sockaddr *)&dgid_addr, &grh->dgid);
memcpy(&cmd->params.dgid[0],
&dgid_addr._sockaddr_in.sin_addr.s_addr, 4);
memcpy(&cmd->params.sgid[0],
@@ -2872,21 +2855,19 @@ int ocrdma_mbx_query_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr)
return status;
}
-int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq)
+void ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq)
{
- int status = -ENOMEM;
struct ocrdma_destroy_srq *cmd;
struct pci_dev *pdev = dev->nic_info.pdev;
cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_SRQ, sizeof(*cmd));
if (!cmd)
- return status;
+ return;
cmd->id = srq->id;
- status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
+ ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
if (srq->rq.va)
dma_free_coherent(&pdev->dev, srq->rq.len,
srq->rq.va, srq->rq.pa);
kfree(cmd);
- return status;
}
static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype,
@@ -2916,7 +2897,6 @@ static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype,
mqe_sge->pa_hi = (u32) upper_32_bits(pa);
mqe_sge->len = cmd.hdr.pyld_len;
- memset(req, 0, sizeof(struct ocrdma_get_dcbx_cfg_req));
ocrdma_init_mch(&req->hdr, OCRDMA_CMD_GET_DCBX_CONFIG,
OCRDMA_SUBSYS_DCBX, cmd.hdr.pyld_len);
req->param_type = ptype;
@@ -3076,13 +3056,12 @@ int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah)
return status;
}
-int ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah)
+void ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah)
{
unsigned long flags;
spin_lock_irqsave(&dev->av_tbl.lock, flags);
ah->av->valid = 0;
spin_unlock_irqrestore(&dev->av_tbl.lock, flags);
- return 0;
}
static int ocrdma_create_eqs(struct ocrdma_dev *dev)
@@ -3103,7 +3082,7 @@ static int ocrdma_create_eqs(struct ocrdma_dev *dev)
if (!num_eq)
return -EINVAL;
- dev->eq_tbl = kzalloc(sizeof(struct ocrdma_eq) * num_eq, GFP_KERNEL);
+ dev->eq_tbl = kcalloc(num_eq, sizeof(struct ocrdma_eq), GFP_KERNEL);
if (!dev->eq_tbl)
return -ENOMEM;
@@ -3134,12 +3113,12 @@ done:
static int ocrdma_mbx_modify_eqd(struct ocrdma_dev *dev, struct ocrdma_eq *eq,
int num)
{
- int i, status = -ENOMEM;
+ int i, status;
struct ocrdma_modify_eqd_req *cmd;
cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_MODIFY_EQ_DELAY, sizeof(*cmd));
if (!cmd)
- return status;
+ return -ENOMEM;
ocrdma_init_mch(&cmd->cmd.req, OCRDMA_CMD_MODIFY_EQ_DELAY,
OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
@@ -3152,9 +3131,7 @@ static int ocrdma_mbx_modify_eqd(struct ocrdma_dev *dev, struct ocrdma_eq *eq,
(eq[i].aic_obj.prev_eqd * 65)/100;
}
status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
- if (status)
- goto mbx_err;
-mbx_err:
+
kfree(cmd);
return status;
}
@@ -3180,8 +3157,8 @@ void ocrdma_eqd_set_task(struct work_struct *work)
{
struct ocrdma_dev *dev =
container_of(work, struct ocrdma_dev, eqd_work.work);
- struct ocrdma_eq *eq = 0;
- int i, num = 0, status = -EINVAL;
+ struct ocrdma_eq *eq = NULL;
+ int i, num = 0;
u64 eq_intr;
for (i = 0; i < dev->eq_cnt; i++) {
@@ -3203,7 +3180,7 @@ void ocrdma_eqd_set_task(struct work_struct *work)
}
if (num)
- status = ocrdma_modify_eqd(dev, &dev->eq_tbl[0], num);
+ ocrdma_modify_eqd(dev, &dev->eq_tbl[0], num);
schedule_delayed_work(&dev->eqd_work, msecs_to_jiffies(1000));
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
index ebc1f442aec3..12c23a7652b9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
@@ -122,7 +122,7 @@ int ocrdma_reg_mr(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr,
u32 pd_id, int acc);
int ocrdma_mbx_create_cq(struct ocrdma_dev *, struct ocrdma_cq *,
int entries, int dpp_cq, u16 pd_id);
-int ocrdma_mbx_destroy_cq(struct ocrdma_dev *, struct ocrdma_cq *);
+void ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq);
int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs,
u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset,
@@ -137,10 +137,10 @@ int ocrdma_mbx_create_srq(struct ocrdma_dev *, struct ocrdma_srq *,
struct ocrdma_pd *);
int ocrdma_mbx_modify_srq(struct ocrdma_srq *, struct ib_srq_attr *);
int ocrdma_mbx_query_srq(struct ocrdma_srq *, struct ib_srq_attr *);
-int ocrdma_mbx_destroy_srq(struct ocrdma_dev *, struct ocrdma_srq *);
+void ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq);
-int ocrdma_alloc_av(struct ocrdma_dev *, struct ocrdma_ah *);
-int ocrdma_free_av(struct ocrdma_dev *, struct ocrdma_ah *);
+int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah);
+void ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah);
int ocrdma_qp_state_change(struct ocrdma_qp *, enum ib_qp_state new_state,
enum ib_qp_state *old_ib_state);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 896071502739..5d4b3bc16493 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -58,34 +58,17 @@
#include "ocrdma_stats.h"
#include <rdma/ocrdma-abi.h>
-MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION);
MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION);
MODULE_AUTHOR("Emulex Corporation");
MODULE_LICENSE("Dual BSD/GPL");
-static DEFINE_IDR(ocrdma_dev_id);
-
-void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
-{
- u8 mac_addr[6];
-
- memcpy(&mac_addr[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
- guid[0] = mac_addr[0] ^ 2;
- guid[1] = mac_addr[1];
- guid[2] = mac_addr[2];
- guid[3] = 0xff;
- guid[4] = 0xfe;
- guid[5] = mac_addr[3];
- guid[6] = mac_addr[4];
- guid[7] = mac_addr[5];
-}
static enum rdma_link_layer ocrdma_link_layer(struct ib_device *device,
- u8 port_num)
+ u32 port_num)
{
return IB_LINK_LAYER_ETHERNET;
}
-static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
+static int ocrdma_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
@@ -93,150 +76,163 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
int err;
dev = get_ocrdma_dev(ibdev);
- err = ocrdma_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ if (ocrdma_is_udp_encap_supported(dev))
+ immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
- if (ocrdma_is_udp_encap_supported(dev))
- immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
-static void get_dev_fw_str(struct ib_device *device, char *str,
- size_t str_len)
+static void get_dev_fw_str(struct ib_device *device, char *str)
{
struct ocrdma_dev *dev = get_ocrdma_dev(device);
- snprintf(str, str_len, "%s", &dev->attr.fw_ver[0]);
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", &dev->attr.fw_ver[0]);
+}
+
+/* OCRDMA sysfs interface */
+static ssize_t hw_rev_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ocrdma_dev *dev =
+ rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev);
+
+ return sysfs_emit(buf, "0x%x\n", dev->nic_info.pdev->vendor);
}
+static DEVICE_ATTR_RO(hw_rev);
+
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct ocrdma_dev *dev =
+ rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev);
+
+ return sysfs_emit(buf, "%s\n", &dev->model_number[0]);
+}
+static DEVICE_ATTR_RO(hca_type);
+
+static struct attribute *ocrdma_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ NULL
+};
+
+static const struct attribute_group ocrdma_attr_group = {
+ .attrs = ocrdma_attributes,
+};
+
+static const struct ib_device_ops ocrdma_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_OCRDMA,
+ .uverbs_abi_ver = OCRDMA_ABI_VERSION,
+
+ .alloc_mr = ocrdma_alloc_mr,
+ .alloc_pd = ocrdma_alloc_pd,
+ .alloc_ucontext = ocrdma_alloc_ucontext,
+ .create_ah = ocrdma_create_ah,
+ .create_cq = ocrdma_create_cq,
+ .create_qp = ocrdma_create_qp,
+ .create_user_ah = ocrdma_create_ah,
+ .dealloc_pd = ocrdma_dealloc_pd,
+ .dealloc_ucontext = ocrdma_dealloc_ucontext,
+ .dereg_mr = ocrdma_dereg_mr,
+ .destroy_ah = ocrdma_destroy_ah,
+ .destroy_cq = ocrdma_destroy_cq,
+ .destroy_qp = ocrdma_destroy_qp,
+ .device_group = &ocrdma_attr_group,
+ .get_dev_fw_str = get_dev_fw_str,
+ .get_dma_mr = ocrdma_get_dma_mr,
+ .get_link_layer = ocrdma_link_layer,
+ .get_port_immutable = ocrdma_port_immutable,
+ .map_mr_sg = ocrdma_map_mr_sg,
+ .mmap = ocrdma_mmap,
+ .modify_qp = ocrdma_modify_qp,
+ .poll_cq = ocrdma_poll_cq,
+ .post_recv = ocrdma_post_recv,
+ .post_send = ocrdma_post_send,
+ .process_mad = ocrdma_process_mad,
+ .query_ah = ocrdma_query_ah,
+ .query_device = ocrdma_query_device,
+ .query_pkey = ocrdma_query_pkey,
+ .query_port = ocrdma_query_port,
+ .query_qp = ocrdma_query_qp,
+ .reg_user_mr = ocrdma_reg_user_mr,
+ .req_notify_cq = ocrdma_arm_cq,
+ .resize_cq = ocrdma_resize_cq,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, ocrdma_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, ocrdma_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, ocrdma_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, ocrdma_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, ocrdma_ucontext, ibucontext),
+};
+
+static const struct ib_device_ops ocrdma_dev_srq_ops = {
+ .create_srq = ocrdma_create_srq,
+ .destroy_srq = ocrdma_destroy_srq,
+ .modify_srq = ocrdma_modify_srq,
+ .post_srq_recv = ocrdma_post_srq_recv,
+ .query_srq = ocrdma_query_srq,
+
+ INIT_RDMA_OBJ_SIZE(ib_srq, ocrdma_srq, ibsrq),
+};
static int ocrdma_register_device(struct ocrdma_dev *dev)
{
- strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX);
- ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid);
+ int ret;
+
+ addrconf_addr_eui48((u8 *)&dev->ibdev.node_guid,
+ dev->nic_info.mac_addr);
BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC,
sizeof(OCRDMA_NODE_DESC));
- dev->ibdev.owner = THIS_MODULE;
- dev->ibdev.uverbs_abi_ver = OCRDMA_ABI_VERSION;
- dev->ibdev.uverbs_cmd_mask =
- OCRDMA_UVERBS(GET_CONTEXT) |
- OCRDMA_UVERBS(QUERY_DEVICE) |
- OCRDMA_UVERBS(QUERY_PORT) |
- OCRDMA_UVERBS(ALLOC_PD) |
- OCRDMA_UVERBS(DEALLOC_PD) |
- OCRDMA_UVERBS(REG_MR) |
- OCRDMA_UVERBS(DEREG_MR) |
- OCRDMA_UVERBS(CREATE_COMP_CHANNEL) |
- OCRDMA_UVERBS(CREATE_CQ) |
- OCRDMA_UVERBS(RESIZE_CQ) |
- OCRDMA_UVERBS(DESTROY_CQ) |
- OCRDMA_UVERBS(REQ_NOTIFY_CQ) |
- OCRDMA_UVERBS(CREATE_QP) |
- OCRDMA_UVERBS(MODIFY_QP) |
- OCRDMA_UVERBS(QUERY_QP) |
- OCRDMA_UVERBS(DESTROY_QP) |
- OCRDMA_UVERBS(POLL_CQ) |
- OCRDMA_UVERBS(POST_SEND) |
- OCRDMA_UVERBS(POST_RECV);
-
- dev->ibdev.uverbs_cmd_mask |=
- OCRDMA_UVERBS(CREATE_AH) |
- OCRDMA_UVERBS(MODIFY_AH) |
- OCRDMA_UVERBS(QUERY_AH) |
- OCRDMA_UVERBS(DESTROY_AH);
dev->ibdev.node_type = RDMA_NODE_IB_CA;
dev->ibdev.phys_port_cnt = 1;
dev->ibdev.num_comp_vectors = dev->eq_cnt;
- /* mandatory verbs. */
- dev->ibdev.query_device = ocrdma_query_device;
- dev->ibdev.query_port = ocrdma_query_port;
- dev->ibdev.modify_port = ocrdma_modify_port;
- dev->ibdev.query_gid = ocrdma_query_gid;
- dev->ibdev.get_netdev = ocrdma_get_netdev;
- dev->ibdev.add_gid = ocrdma_add_gid;
- dev->ibdev.del_gid = ocrdma_del_gid;
- dev->ibdev.get_link_layer = ocrdma_link_layer;
- dev->ibdev.alloc_pd = ocrdma_alloc_pd;
- dev->ibdev.dealloc_pd = ocrdma_dealloc_pd;
-
- dev->ibdev.create_cq = ocrdma_create_cq;
- dev->ibdev.destroy_cq = ocrdma_destroy_cq;
- dev->ibdev.resize_cq = ocrdma_resize_cq;
-
- dev->ibdev.create_qp = ocrdma_create_qp;
- dev->ibdev.modify_qp = ocrdma_modify_qp;
- dev->ibdev.query_qp = ocrdma_query_qp;
- dev->ibdev.destroy_qp = ocrdma_destroy_qp;
-
- dev->ibdev.query_pkey = ocrdma_query_pkey;
- dev->ibdev.create_ah = ocrdma_create_ah;
- dev->ibdev.destroy_ah = ocrdma_destroy_ah;
- dev->ibdev.query_ah = ocrdma_query_ah;
- dev->ibdev.modify_ah = ocrdma_modify_ah;
-
- dev->ibdev.poll_cq = ocrdma_poll_cq;
- dev->ibdev.post_send = ocrdma_post_send;
- dev->ibdev.post_recv = ocrdma_post_recv;
- dev->ibdev.req_notify_cq = ocrdma_arm_cq;
-
- dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
- dev->ibdev.dereg_mr = ocrdma_dereg_mr;
- dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
-
- dev->ibdev.alloc_mr = ocrdma_alloc_mr;
- dev->ibdev.map_mr_sg = ocrdma_map_mr_sg;
-
/* mandatory to support user space verbs consumer. */
- dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
- dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext;
- dev->ibdev.mmap = ocrdma_mmap;
- dev->ibdev.dma_device = &dev->nic_info.pdev->dev;
-
- dev->ibdev.process_mad = ocrdma_process_mad;
- dev->ibdev.get_port_immutable = ocrdma_port_immutable;
- dev->ibdev.get_dev_fw_str = get_dev_fw_str;
-
- if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
- dev->ibdev.uverbs_cmd_mask |=
- OCRDMA_UVERBS(CREATE_SRQ) |
- OCRDMA_UVERBS(MODIFY_SRQ) |
- OCRDMA_UVERBS(QUERY_SRQ) |
- OCRDMA_UVERBS(DESTROY_SRQ) |
- OCRDMA_UVERBS(POST_SRQ_RECV);
-
- dev->ibdev.create_srq = ocrdma_create_srq;
- dev->ibdev.modify_srq = ocrdma_modify_srq;
- dev->ibdev.query_srq = ocrdma_query_srq;
- dev->ibdev.destroy_srq = ocrdma_destroy_srq;
- dev->ibdev.post_srq_recv = ocrdma_post_srq_recv;
- }
- return ib_register_device(&dev->ibdev, NULL);
+ dev->ibdev.dev.parent = &dev->nic_info.pdev->dev;
+
+ ib_set_device_ops(&dev->ibdev, &ocrdma_dev_ops);
+
+ if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R)
+ ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops);
+
+ ret = ib_device_set_netdev(&dev->ibdev, dev->nic_info.netdev, 1);
+ if (ret)
+ return ret;
+
+ dma_set_max_seg_size(&dev->nic_info.pdev->dev, UINT_MAX);
+ return ib_register_device(&dev->ibdev, "ocrdma%d",
+ &dev->nic_info.pdev->dev);
}
static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
{
mutex_init(&dev->dev_lock);
- dev->cq_tbl = kzalloc(sizeof(struct ocrdma_cq *) *
- OCRDMA_MAX_CQ, GFP_KERNEL);
+ dev->cq_tbl = kcalloc(OCRDMA_MAX_CQ, sizeof(struct ocrdma_cq *),
+ GFP_KERNEL);
if (!dev->cq_tbl)
goto alloc_err;
if (dev->attr.max_qp) {
- dev->qp_tbl = kzalloc(sizeof(struct ocrdma_qp *) *
- OCRDMA_MAX_QP, GFP_KERNEL);
+ dev->qp_tbl = kcalloc(OCRDMA_MAX_QP,
+ sizeof(struct ocrdma_qp *),
+ GFP_KERNEL);
if (!dev->qp_tbl)
goto alloc_err;
}
- dev->stag_arr = kzalloc(sizeof(u64) * OCRDMA_MAX_STAG, GFP_KERNEL);
+ dev->stag_arr = kcalloc(OCRDMA_MAX_STAG, sizeof(u64), GFP_KERNEL);
if (dev->stag_arr == NULL)
goto alloc_err;
@@ -263,59 +259,24 @@ static void ocrdma_free_resources(struct ocrdma_dev *dev)
kfree(dev->cq_tbl);
}
-/* OCRDMA sysfs interface */
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct ocrdma_dev *dev = dev_get_drvdata(device);
-
- return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
-}
-
-static ssize_t show_hca_type(struct device *device,
- struct device_attribute *attr, char *buf)
-{
- struct ocrdma_dev *dev = dev_get_drvdata(device);
-
- return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
-
-static struct device_attribute *ocrdma_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type
-};
-
-static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
- device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
-}
-
static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
{
- int status = 0, i;
+ int status = 0;
u8 lstate = 0;
struct ocrdma_dev *dev;
- dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
+ dev = ib_alloc_device(ocrdma_dev, ibdev);
if (!dev) {
pr_err("Unable to allocate ib device\n");
return NULL;
}
+
dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL);
if (!dev->mbx_cmd)
- goto idr_err;
+ goto init_err;
memcpy(&dev->nic_info, dev_info, sizeof(*dev_info));
- dev->id = idr_alloc(&ocrdma_dev_id, NULL, 0, 0, GFP_KERNEL);
- if (dev->id < 0)
- goto idr_err;
-
+ dev->id = PCI_FUNC(dev->nic_info.pdev->devfn);
status = ocrdma_init_hw(dev);
if (status)
goto init_err;
@@ -334,9 +295,6 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
if (!status)
ocrdma_update_link_state(dev, lstate);
- for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
- if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
- goto sysfs_err;
/* Init stats */
ocrdma_add_port_stats(dev);
/* Interrupt Moderation */
@@ -351,14 +309,10 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
dev_name(&dev->nic_info.pdev->dev), dev->id);
return dev;
-sysfs_err:
- ocrdma_remove_sysfiles(dev);
alloc_err:
ocrdma_free_resources(dev);
ocrdma_cleanup_hw(dev);
init_err:
- idr_remove(&ocrdma_dev_id, dev->id);
-idr_err:
kfree(dev->mbx_cmd);
ib_dealloc_device(&dev->ibdev);
pr_err("%s() leaving. ret=%d\n", __func__, status);
@@ -368,7 +322,6 @@ idr_err:
static void ocrdma_remove_free(struct ocrdma_dev *dev)
{
- idr_remove(&ocrdma_dev_id, dev->id);
kfree(dev->mbx_cmd);
ib_dealloc_device(&dev->ibdev);
}
@@ -379,7 +332,6 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
* of the registered clients.
*/
cancel_delayed_work_sync(&dev->eqd_work);
- ocrdma_remove_sysfiles(dev);
ib_unregister_device(&dev->ibdev);
ocrdma_rem_port_stats(dev);
@@ -474,7 +426,6 @@ static void __exit ocrdma_exit_module(void)
{
be_roce_unregister_driver(&ocrdma_drv);
ocrdma_rem_debugfs();
- idr_destroy(&ocrdma_dev_id);
}
module_init(ocrdma_init_module);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 6ef89c226ad8..c2e0d0fa44be 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -2034,7 +2034,7 @@ struct ocrdma_rx_stats {
};
struct ocrdma_rx_qp_err_stats {
- u32 nak_invalid_requst_errors;
+ u32 nak_invalid_request_errors;
u32 nak_remote_operation_errors;
u32 nak_count_remote_access_errors;
u32 local_length_errors;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index f8e4b0a6486f..0834416cb3f8 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -46,7 +46,7 @@
static struct dentry *ocrdma_dbgfs_dir;
-static int ocrdma_add_stat(char *start, char *pcur,
+static noinline_for_stack int ocrdma_add_stat(char *start, char *pcur,
char *name, u64 count)
{
char buff[128] = {0};
@@ -73,15 +73,13 @@ bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev)
mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req),
sizeof(struct ocrdma_rdma_stats_resp));
- mem->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, mem->size,
- &mem->pa, GFP_KERNEL);
+ mem->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, mem->size,
+ &mem->pa, GFP_KERNEL);
if (!mem->va) {
pr_err("%s: stats mbox allocation failed\n", __func__);
return false;
}
- memset(mem->va, 0, mem->size);
-
/* Alloc debugfs mem */
mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL);
if (!mem->debugfs_mem)
@@ -101,7 +99,7 @@ void ocrdma_release_stats_resources(struct ocrdma_dev *dev)
kfree(mem->debugfs_mem);
}
-static char *ocrdma_resource_stats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_resource_stats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
struct ocrdma_rdma_stats_resp *rdma_stats =
@@ -218,7 +216,7 @@ static char *ocrdma_resource_stats(struct ocrdma_dev *dev)
return stats;
}
-static char *ocrdma_rx_stats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_rx_stats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
struct ocrdma_rdma_stats_resp *rdma_stats =
@@ -286,7 +284,7 @@ static u64 ocrdma_sysfs_rcv_data(struct ocrdma_dev *dev)
rx_stats->roce_frame_bytes_hi))/4;
}
-static char *ocrdma_tx_stats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_tx_stats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
struct ocrdma_rdma_stats_resp *rdma_stats =
@@ -360,7 +358,7 @@ static u64 ocrdma_sysfs_xmit_data(struct ocrdma_dev *dev)
tx_stats->read_rsp_bytes_hi))/4;
}
-static char *ocrdma_wqe_stats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_wqe_stats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
struct ocrdma_rdma_stats_resp *rdma_stats =
@@ -393,7 +391,7 @@ static char *ocrdma_wqe_stats(struct ocrdma_dev *dev)
return stats;
}
-static char *ocrdma_db_errstats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_db_errstats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
struct ocrdma_rdma_stats_resp *rdma_stats =
@@ -414,7 +412,7 @@ static char *ocrdma_db_errstats(struct ocrdma_dev *dev)
return stats;
}
-static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
struct ocrdma_rdma_stats_resp *rdma_stats =
@@ -425,8 +423,8 @@ static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev)
memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM));
pcur = stats;
- pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors",
- (u64)rx_qp_err_stats->nak_invalid_requst_errors);
+ pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_request_errors",
+ (u64)rx_qp_err_stats->nak_invalid_request_errors);
pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors",
(u64)rx_qp_err_stats->nak_remote_operation_errors);
pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors",
@@ -440,7 +438,7 @@ static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev)
return stats;
}
-static char *ocrdma_txqp_errstats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_txqp_errstats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
struct ocrdma_rdma_stats_resp *rdma_stats =
@@ -464,7 +462,7 @@ static char *ocrdma_txqp_errstats(struct ocrdma_dev *dev)
return stats;
}
-static char *ocrdma_tx_dbg_stats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_tx_dbg_stats(struct ocrdma_dev *dev)
{
int i;
char *pstats = dev->stats_mem.debugfs_mem;
@@ -482,7 +480,7 @@ static char *ocrdma_tx_dbg_stats(struct ocrdma_dev *dev)
return dev->stats_mem.debugfs_mem;
}
-static char *ocrdma_rx_dbg_stats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_rx_dbg_stats(struct ocrdma_dev *dev)
{
int i;
char *pstats = dev->stats_mem.debugfs_mem;
@@ -500,7 +498,7 @@ static char *ocrdma_rx_dbg_stats(struct ocrdma_dev *dev)
return dev->stats_mem.debugfs_mem;
}
-static char *ocrdma_driver_dbg_stats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_driver_dbg_stats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
@@ -643,7 +641,7 @@ static ssize_t ocrdma_dbgfs_ops_write(struct file *filp,
struct ocrdma_stats *pstats = filp->private_data;
struct ocrdma_dev *dev = pstats->dev;
- if (count > 32)
+ if (*ppos != 0 || count == 0 || count > sizeof(tmp_str))
goto err;
if (copy_from_user(tmp_str, buffer, count))
@@ -658,7 +656,7 @@ static ssize_t ocrdma_dbgfs_ops_write(struct file *filp,
if (reset) {
status = ocrdma_mbx_rdma_stats(dev, true);
if (status) {
- pr_err("Failed to reset stats = %d", status);
+ pr_err("Failed to reset stats = %d\n", status);
goto err;
}
}
@@ -672,12 +670,10 @@ err:
return -EFAULT;
}
-int ocrdma_pma_counters(struct ocrdma_dev *dev,
- struct ib_mad *out_mad)
+void ocrdma_pma_counters(struct ocrdma_dev *dev, struct ib_mad *out_mad)
{
struct ib_pma_portcounters *pma_cnt;
- memset(out_mad->data, 0, sizeof out_mad->data);
pma_cnt = (void *)(out_mad->data + 40);
ocrdma_update_stats(dev);
@@ -685,7 +681,6 @@ int ocrdma_pma_counters(struct ocrdma_dev *dev,
pma_cnt->port_rcv_data = cpu_to_be32(ocrdma_sysfs_rcv_data(dev));
pma_cnt->port_xmit_packets = cpu_to_be32(ocrdma_sysfs_xmit_pkts(dev));
pma_cnt->port_rcv_packets = cpu_to_be32(ocrdma_sysfs_rcv_pkts(dev));
- return 0;
}
static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer,
@@ -762,93 +757,72 @@ static const struct file_operations ocrdma_dbg_ops = {
void ocrdma_add_port_stats(struct ocrdma_dev *dev)
{
+ const struct pci_dev *pdev = dev->nic_info.pdev;
+
if (!ocrdma_dbgfs_dir)
return;
/* Create post stats base dir */
- dev->dir = debugfs_create_dir(dev->ibdev.name, ocrdma_dbgfs_dir);
- if (!dev->dir)
- goto err;
+ dev->dir = debugfs_create_dir(pci_name(pdev), ocrdma_dbgfs_dir);
dev->rsrc_stats.type = OCRDMA_RSRC_STATS;
dev->rsrc_stats.dev = dev;
- if (!debugfs_create_file("resource_stats", S_IRUSR, dev->dir,
- &dev->rsrc_stats, &ocrdma_dbg_ops))
- goto err;
+ debugfs_create_file("resource_stats", S_IRUSR, dev->dir,
+ &dev->rsrc_stats, &ocrdma_dbg_ops);
dev->rx_stats.type = OCRDMA_RXSTATS;
dev->rx_stats.dev = dev;
- if (!debugfs_create_file("rx_stats", S_IRUSR, dev->dir,
- &dev->rx_stats, &ocrdma_dbg_ops))
- goto err;
+ debugfs_create_file("rx_stats", S_IRUSR, dev->dir, &dev->rx_stats,
+ &ocrdma_dbg_ops);
dev->wqe_stats.type = OCRDMA_WQESTATS;
dev->wqe_stats.dev = dev;
- if (!debugfs_create_file("wqe_stats", S_IRUSR, dev->dir,
- &dev->wqe_stats, &ocrdma_dbg_ops))
- goto err;
+ debugfs_create_file("wqe_stats", S_IRUSR, dev->dir, &dev->wqe_stats,
+ &ocrdma_dbg_ops);
dev->tx_stats.type = OCRDMA_TXSTATS;
dev->tx_stats.dev = dev;
- if (!debugfs_create_file("tx_stats", S_IRUSR, dev->dir,
- &dev->tx_stats, &ocrdma_dbg_ops))
- goto err;
+ debugfs_create_file("tx_stats", S_IRUSR, dev->dir, &dev->tx_stats,
+ &ocrdma_dbg_ops);
dev->db_err_stats.type = OCRDMA_DB_ERRSTATS;
dev->db_err_stats.dev = dev;
- if (!debugfs_create_file("db_err_stats", S_IRUSR, dev->dir,
- &dev->db_err_stats, &ocrdma_dbg_ops))
- goto err;
-
+ debugfs_create_file("db_err_stats", S_IRUSR, dev->dir,
+ &dev->db_err_stats, &ocrdma_dbg_ops);
dev->tx_qp_err_stats.type = OCRDMA_TXQP_ERRSTATS;
dev->tx_qp_err_stats.dev = dev;
- if (!debugfs_create_file("tx_qp_err_stats", S_IRUSR, dev->dir,
- &dev->tx_qp_err_stats, &ocrdma_dbg_ops))
- goto err;
+ debugfs_create_file("tx_qp_err_stats", S_IRUSR, dev->dir,
+ &dev->tx_qp_err_stats, &ocrdma_dbg_ops);
dev->rx_qp_err_stats.type = OCRDMA_RXQP_ERRSTATS;
dev->rx_qp_err_stats.dev = dev;
- if (!debugfs_create_file("rx_qp_err_stats", S_IRUSR, dev->dir,
- &dev->rx_qp_err_stats, &ocrdma_dbg_ops))
- goto err;
-
+ debugfs_create_file("rx_qp_err_stats", S_IRUSR, dev->dir,
+ &dev->rx_qp_err_stats, &ocrdma_dbg_ops);
dev->tx_dbg_stats.type = OCRDMA_TX_DBG_STATS;
dev->tx_dbg_stats.dev = dev;
- if (!debugfs_create_file("tx_dbg_stats", S_IRUSR, dev->dir,
- &dev->tx_dbg_stats, &ocrdma_dbg_ops))
- goto err;
+ debugfs_create_file("tx_dbg_stats", S_IRUSR, dev->dir,
+ &dev->tx_dbg_stats, &ocrdma_dbg_ops);
dev->rx_dbg_stats.type = OCRDMA_RX_DBG_STATS;
dev->rx_dbg_stats.dev = dev;
- if (!debugfs_create_file("rx_dbg_stats", S_IRUSR, dev->dir,
- &dev->rx_dbg_stats, &ocrdma_dbg_ops))
- goto err;
+ debugfs_create_file("rx_dbg_stats", S_IRUSR, dev->dir,
+ &dev->rx_dbg_stats, &ocrdma_dbg_ops);
dev->driver_stats.type = OCRDMA_DRV_STATS;
dev->driver_stats.dev = dev;
- if (!debugfs_create_file("driver_dbg_stats", S_IRUSR, dev->dir,
- &dev->driver_stats, &ocrdma_dbg_ops))
- goto err;
+ debugfs_create_file("driver_dbg_stats", S_IRUSR, dev->dir,
+ &dev->driver_stats, &ocrdma_dbg_ops);
dev->reset_stats.type = OCRDMA_RESET_STATS;
dev->reset_stats.dev = dev;
- if (!debugfs_create_file("reset_stats", S_IRUSR, dev->dir,
- &dev->reset_stats, &ocrdma_dbg_ops))
- goto err;
-
-
- return;
-err:
- debugfs_remove_recursive(dev->dir);
- dev->dir = NULL;
+ debugfs_create_file("reset_stats", 0200, dev->dir, &dev->reset_stats,
+ &ocrdma_dbg_ops);
}
void ocrdma_rem_port_stats(struct ocrdma_dev *dev)
{
- if (!dev->dir)
- return;
debugfs_remove_recursive(dev->dir);
}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
index bba1fec4f11f..98feca26ac55 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h
@@ -69,7 +69,6 @@ bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev);
void ocrdma_release_stats_resources(struct ocrdma_dev *dev);
void ocrdma_rem_port_stats(struct ocrdma_dev *dev);
void ocrdma_add_port_stats(struct ocrdma_dev *dev);
-int ocrdma_pma_counters(struct ocrdma_dev *dev,
- struct ib_mad *out_mad);
+void ocrdma_pma_counters(struct ocrdma_dev *dev, struct ib_mad *out_mad);
#endif /* __OCRDMA_STATS_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index e06ad7250963..46d911fd38de 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -41,63 +41,29 @@
*/
#include <linux/dma-mapping.h>
+#include <net/addrconf.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/iw_cm.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
+#include <rdma/uverbs_ioctl.h>
#include "ocrdma.h"
#include "ocrdma_hw.h"
#include "ocrdma_verbs.h"
#include <rdma/ocrdma-abi.h>
-int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+int ocrdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
{
- if (index > 1)
+ if (index > 0)
return -EINVAL;
*pkey = 0xffff;
return 0;
}
-int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
- int index, union ib_gid *sgid)
-{
- int ret;
- struct ocrdma_dev *dev;
-
- dev = get_ocrdma_dev(ibdev);
- memset(sgid, 0, sizeof(*sgid));
- if (index >= OCRDMA_MAX_SGID)
- return -EINVAL;
-
- ret = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
- if (ret == -EAGAIN) {
- memcpy(sgid, &zgid, sizeof(*sgid));
- return 0;
- }
-
- return ret;
-}
-
-int ocrdma_add_gid(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- void **context) {
- return 0;
-}
-
-int ocrdma_del_gid(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- void **context) {
- return 0;
-}
-
int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
struct ib_udata *uhw)
{
@@ -109,7 +75,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
memset(attr, 0, sizeof *attr);
memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
- ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid);
+ addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
+ dev->nic_info.mac_addr);
attr->max_mr_size = dev->attr.max_mr_size;
attr->page_size_cap = 0xffff000;
attr->vendor_id = dev->nic_info.pdev->vendor;
@@ -123,9 +90,10 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID |
- IB_DEVICE_LOCAL_DMA_LKEY |
IB_DEVICE_MEM_MGT_EXTENSIONS;
- attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge);
+ attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
+ attr->max_send_sge = dev->attr.max_send_sge;
+ attr->max_recv_sge = dev->attr.max_recv_sge;
attr->max_sge_rd = dev->attr.max_rdma_sge;
attr->max_cq = dev->attr.max_cq;
attr->max_cqe = dev->attr.max_cqe;
@@ -133,8 +101,6 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
attr->max_mw = dev->attr.max_mw;
attr->max_pd = dev->attr.max_pd;
attr->atomic_cap = 0;
- attr->max_fmr = 0;
- attr->max_map_per_fmr = 0;
attr->max_qp_rd_atom =
min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp);
attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp;
@@ -147,26 +113,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
return 0;
}
-struct net_device *ocrdma_get_netdev(struct ib_device *ibdev, u8 port_num)
-{
- struct ocrdma_dev *dev;
- struct net_device *ndev = NULL;
-
- rcu_read_lock();
-
- dev = get_ocrdma_dev(ibdev);
- if (dev)
- ndev = dev->nic_info.netdev;
- if (ndev)
- dev_hold(ndev);
-
- rcu_read_unlock();
-
- return ndev;
-}
-
static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
- u8 *ib_speed, u8 *ib_width)
+ u16 *ib_speed, u8 *ib_width)
{
int status;
u8 speed;
@@ -204,25 +152,21 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
}
int ocrdma_query_port(struct ib_device *ibdev,
- u8 port, struct ib_port_attr *props)
+ u32 port, struct ib_port_attr *props)
{
enum ib_port_state port_state;
struct ocrdma_dev *dev;
struct net_device *netdev;
+ /* props being zeroed by the caller, avoid zeroing it here */
dev = get_ocrdma_dev(ibdev);
- if (port > 1) {
- pr_err("%s(%d) invalid_port=0x%x\n", __func__,
- dev->id, port);
- return -EINVAL;
- }
netdev = dev->nic_info.netdev;
if (netif_running(netdev) && netif_oper_up(netdev)) {
port_state = IB_PORT_ACTIVE;
- props->phys_state = 5;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
} else {
port_state = IB_PORT_DOWN;
- props->phys_state = 3;
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
}
props->max_mtu = IB_MTU_4096;
props->active_mtu = iboe_get_mtu(netdev->mtu);
@@ -231,11 +175,10 @@ int ocrdma_query_port(struct ib_device *ibdev,
props->sm_lid = 0;
props->sm_sl = 0;
props->state = port_state;
- props->port_cap_flags =
- IB_PORT_CM_SUP |
- IB_PORT_REINIT_SUP |
- IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP |
- IB_PORT_IP_BASED_GIDS;
+ props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
+ IB_PORT_DEVICE_MGMT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP;
+ props->ip_gids = true;
props->gid_tbl_len = OCRDMA_MAX_SGID;
props->pkey_tbl_len = 1;
props->bad_pkey_cntr = 0;
@@ -247,19 +190,6 @@ int ocrdma_query_port(struct ib_device *ibdev,
return 0;
}
-int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
- struct ib_port_modify *props)
-{
- struct ocrdma_dev *dev;
-
- dev = get_ocrdma_dev(ibdev);
- if (port > 1) {
- pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port);
- return -EINVAL;
- }
- return 0;
-}
-
static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
unsigned long len)
{
@@ -317,13 +247,13 @@ static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
static u16 _ocrdma_pd_mgr_get_bitmap(struct ocrdma_dev *dev, bool dpp_pool)
{
u16 pd_bitmap_idx = 0;
- const unsigned long *pd_bitmap;
+ unsigned long *pd_bitmap;
if (dpp_pool) {
pd_bitmap = dev->pd_mgr->pd_dpp_bitmap;
pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
dev->pd_mgr->max_dpp_pd);
- __set_bit(pd_bitmap_idx, dev->pd_mgr->pd_dpp_bitmap);
+ __set_bit(pd_bitmap_idx, pd_bitmap);
dev->pd_mgr->pd_dpp_count++;
if (dev->pd_mgr->pd_dpp_count > dev->pd_mgr->pd_dpp_thrsh)
dev->pd_mgr->pd_dpp_thrsh = dev->pd_mgr->pd_dpp_count;
@@ -331,7 +261,7 @@ static u16 _ocrdma_pd_mgr_get_bitmap(struct ocrdma_dev *dev, bool dpp_pool)
pd_bitmap = dev->pd_mgr->pd_norm_bitmap;
pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
dev->pd_mgr->max_normal_pd);
- __set_bit(pd_bitmap_idx, dev->pd_mgr->pd_norm_bitmap);
+ __set_bit(pd_bitmap_idx, pd_bitmap);
dev->pd_mgr->pd_norm_count++;
if (dev->pd_mgr->pd_norm_count > dev->pd_mgr->pd_norm_thrsh)
dev->pd_mgr->pd_norm_thrsh = dev->pd_mgr->pd_norm_count;
@@ -371,7 +301,7 @@ static int _ocrdma_pd_mgr_put_bitmap(struct ocrdma_dev *dev, u16 pd_id,
return 0;
}
-static u8 ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
+static int ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
bool dpp_pool)
{
int status;
@@ -414,17 +344,22 @@ static int ocrdma_get_pd_num(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
return status;
}
-static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
- struct ocrdma_ucontext *uctx,
- struct ib_udata *udata)
+/*
+ * NOTE:
+ *
+ * ocrdma_ucontext must be used here because this function is also
+ * called from ocrdma_alloc_ucontext where ib_udata does not have
+ * valid ib_ucontext pointer. ib_uverbs_get_context does not call
+ * uobj_{alloc|get_xxx} helpers which are used to store the
+ * ib_ucontext in uverbs_attr_bundle wrapping the ib_udata. so
+ * ib_udata does NOT imply valid ib_ucontext here!
+ */
+static int _ocrdma_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
+ struct ocrdma_ucontext *uctx,
+ struct ib_udata *udata)
{
- struct ocrdma_pd *pd = NULL;
int status;
- pd = kzalloc(sizeof(*pd), GFP_KERNEL);
- if (!pd)
- return ERR_PTR(-ENOMEM);
-
if (udata && uctx && dev->attr.max_dpp_pds) {
pd->dpp_enabled =
ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
@@ -433,15 +368,8 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
dev->attr.wqe_size) : 0;
}
- if (dev->pd_mgr->pd_prealloc_valid) {
- status = ocrdma_get_pd_num(dev, pd);
- if (status == 0) {
- return pd;
- } else {
- kfree(pd);
- return ERR_PTR(status);
- }
- }
+ if (dev->pd_mgr->pd_prealloc_valid)
+ return ocrdma_get_pd_num(dev, pd);
retry:
status = ocrdma_mbx_alloc_pd(dev, pd);
@@ -450,45 +378,46 @@ retry:
pd->dpp_enabled = false;
pd->num_dpp_qp = 0;
goto retry;
- } else {
- kfree(pd);
- return ERR_PTR(status);
}
+ return status;
}
- return pd;
+ return 0;
}
static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
struct ocrdma_pd *pd)
{
- return (uctx->cntxt_pd == pd ? true : false);
+ return (uctx->cntxt_pd == pd);
}
-static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
+static void _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
struct ocrdma_pd *pd)
{
- int status;
-
if (dev->pd_mgr->pd_prealloc_valid)
- status = ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled);
+ ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled);
else
- status = ocrdma_mbx_dealloc_pd(dev, pd);
-
- kfree(pd);
- return status;
+ ocrdma_mbx_dealloc_pd(dev, pd);
}
static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev,
struct ocrdma_ucontext *uctx,
struct ib_udata *udata)
{
- int status = 0;
+ struct ib_device *ibdev = &dev->ibdev;
+ struct ib_pd *pd;
+ int status;
+
+ pd = rdma_zalloc_drv_obj(ibdev, ib_pd);
+ if (!pd)
+ return -ENOMEM;
+
+ pd->device = ibdev;
+ uctx->cntxt_pd = get_ocrdma_pd(pd);
- uctx->cntxt_pd = _ocrdma_alloc_pd(dev, uctx, udata);
- if (IS_ERR(uctx->cntxt_pd)) {
- status = PTR_ERR(uctx->cntxt_pd);
- uctx->cntxt_pd = NULL;
+ status = _ocrdma_alloc_pd(dev, uctx->cntxt_pd, uctx, udata);
+ if (status) {
+ kfree(uctx->cntxt_pd);
goto err;
}
@@ -498,7 +427,7 @@ err:
return status;
}
-static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
+static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
{
struct ocrdma_pd *pd = uctx->cntxt_pd;
struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
@@ -508,8 +437,8 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
__func__, dev->id, pd->id);
}
uctx->cntxt_pd = NULL;
- (void)_ocrdma_dealloc_pd(dev, pd);
- return 0;
+ _ocrdma_dealloc_pd(dev, pd);
+ kfree(pd);
}
static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)
@@ -533,34 +462,28 @@ static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx)
mutex_unlock(&uctx->mm_list_lock);
}
-struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
+int ocrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
{
+ struct ib_device *ibdev = uctx->device;
int status;
- struct ocrdma_ucontext *ctx;
- struct ocrdma_alloc_ucontext_resp resp;
+ struct ocrdma_ucontext *ctx = get_ocrdma_ucontext(uctx);
+ struct ocrdma_alloc_ucontext_resp resp = {};
struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
struct pci_dev *pdev = dev->nic_info.pdev;
u32 map_len = roundup(sizeof(u32) * 2048, PAGE_SIZE);
if (!udata)
- return ERR_PTR(-EFAULT);
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return ERR_PTR(-ENOMEM);
+ return -EFAULT;
INIT_LIST_HEAD(&ctx->mm_head);
mutex_init(&ctx->mm_list_lock);
ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len,
&ctx->ah_tbl.pa, GFP_KERNEL);
- if (!ctx->ah_tbl.va) {
- kfree(ctx);
- return ERR_PTR(-ENOMEM);
- }
- memset(ctx->ah_tbl.va, 0, map_len);
+ if (!ctx->ah_tbl.va)
+ return -ENOMEM;
+
ctx->ah_tbl.len = map_len;
- memset(&resp, 0, sizeof(resp));
resp.ah_tbl_len = ctx->ah_tbl.len;
resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va);
@@ -582,27 +505,26 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
status = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (status)
goto cpy_err;
- return &ctx->ibucontext;
+ return 0;
cpy_err:
+ ocrdma_dealloc_ucontext_pd(ctx);
pd_err:
ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
map_err:
dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
ctx->ah_tbl.pa);
- kfree(ctx);
- return ERR_PTR(status);
+ return status;
}
-int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
+void ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
{
- int status;
struct ocrdma_mm *mm, *tmp;
struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
struct pci_dev *pdev = dev->nic_info.pdev;
- status = ocrdma_dealloc_ucontext_pd(uctx);
+ ocrdma_dealloc_ucontext_pd(uctx);
ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
@@ -612,8 +534,6 @@ int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
list_del(&mm->entry);
kfree(mm);
}
- kfree(uctx);
- return status;
}
int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
@@ -660,7 +580,6 @@ int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
}
static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
- struct ib_ucontext *ib_ctx,
struct ib_udata *udata)
{
int status;
@@ -668,7 +587,8 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
u64 dpp_page_addr = 0;
u32 db_page_size;
struct ocrdma_alloc_pd_uresp rsp;
- struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
+ struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
+ udata, struct ocrdma_ucontext, ibucontext);
memset(&rsp, 0, sizeof(rsp));
rsp.id = pd->id;
@@ -706,18 +626,17 @@ dpp_map_err:
return status;
}
-struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
- struct ib_ucontext *context,
- struct ib_udata *udata)
+int ocrdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibpd->device;
struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
struct ocrdma_pd *pd;
- struct ocrdma_ucontext *uctx = NULL;
int status;
u8 is_uctx_pd = false;
+ struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
+ udata, struct ocrdma_ucontext, ibucontext);
- if (udata && context) {
- uctx = get_ocrdma_ucontext(context);
+ if (udata) {
pd = ocrdma_get_ucontext_pd(uctx);
if (pd) {
is_uctx_pd = true;
@@ -725,36 +644,33 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
}
}
- pd = _ocrdma_alloc_pd(dev, uctx, udata);
- if (IS_ERR(pd)) {
- status = PTR_ERR(pd);
+ pd = get_ocrdma_pd(ibpd);
+ status = _ocrdma_alloc_pd(dev, pd, uctx, udata);
+ if (status)
goto exit;
- }
pd_mapping:
- if (udata && context) {
- status = ocrdma_copy_pd_uresp(dev, pd, context, udata);
+ if (udata) {
+ status = ocrdma_copy_pd_uresp(dev, pd, udata);
if (status)
goto err;
}
- return &pd->ibpd;
+ return 0;
err:
- if (is_uctx_pd) {
+ if (is_uctx_pd)
ocrdma_release_ucontext_pd(uctx);
- } else {
- status = _ocrdma_dealloc_pd(dev, pd);
- }
+ else
+ _ocrdma_dealloc_pd(dev, pd);
exit:
- return ERR_PTR(status);
+ return status;
}
-int ocrdma_dealloc_pd(struct ib_pd *ibpd)
+int ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
struct ocrdma_ucontext *uctx = NULL;
- int status = 0;
u64 usr_db;
uctx = pd->uctx;
@@ -768,11 +684,11 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd)
if (is_ucontext_pd(uctx, pd)) {
ocrdma_release_ucontext_pd(uctx);
- return status;
+ return 0;
}
}
- status = _ocrdma_dealloc_pd(dev, pd);
- return status;
+ _ocrdma_dealloc_pd(dev, pd);
+ return 0;
}
static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
@@ -878,8 +794,8 @@ static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
void *va;
dma_addr_t pa;
- mr->pbl_table = kzalloc(sizeof(struct ocrdma_pbl) *
- mr->num_pbls, GFP_KERNEL);
+ mr->pbl_table = kcalloc(mr->num_pbls, sizeof(struct ocrdma_pbl),
+ GFP_KERNEL);
if (!mr->pbl_table)
return -ENOMEM;
@@ -891,21 +807,19 @@ static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
status = -ENOMEM;
break;
}
- memset(va, 0, dma_len);
mr->pbl_table[i].va = va;
mr->pbl_table[i].pa = pa;
}
return status;
}
-static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
- u32 num_pbes)
+static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr)
{
struct ocrdma_pbe *pbe;
- struct scatterlist *sg;
+ struct ib_block_iter biter;
struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
- struct ib_umem *umem = mr->umem;
- int shift, pg_cnt, pages, pbe_cnt, entry, total_num_pbes = 0;
+ int pbe_cnt;
+ u64 pg_addr;
if (!mr->hwmr.num_pbes)
return;
@@ -913,51 +827,36 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
pbe = (struct ocrdma_pbe *)pbl_tbl->va;
pbe_cnt = 0;
- shift = ilog2(umem->page_size);
-
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- pages = sg_dma_len(sg) >> shift;
- for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
- /* store the page address in pbe */
- pbe->pa_lo =
- cpu_to_le32(sg_dma_address
- (sg) +
- (umem->page_size * pg_cnt));
- pbe->pa_hi =
- cpu_to_le32(upper_32_bits
- ((sg_dma_address
- (sg) +
- umem->page_size * pg_cnt)));
- pbe_cnt += 1;
- total_num_pbes += 1;
- pbe++;
-
- /* if done building pbes, issue the mbx cmd. */
- if (total_num_pbes == num_pbes)
- return;
-
- /* if the given pbl is full storing the pbes,
- * move to next pbl.
- */
- if (pbe_cnt ==
- (mr->hwmr.pbl_size / sizeof(u64))) {
- pbl_tbl++;
- pbe = (struct ocrdma_pbe *)pbl_tbl->va;
- pbe_cnt = 0;
- }
+ rdma_umem_for_each_dma_block (mr->umem, &biter, PAGE_SIZE) {
+ /* store the page address in pbe */
+ pg_addr = rdma_block_iter_dma_address(&biter);
+ pbe->pa_lo = cpu_to_le32(pg_addr);
+ pbe->pa_hi = cpu_to_le32(upper_32_bits(pg_addr));
+ pbe_cnt += 1;
+ pbe++;
+ /* if the given pbl is full storing the pbes,
+ * move to next pbl.
+ */
+ if (pbe_cnt == (mr->hwmr.pbl_size / sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+ pbe_cnt = 0;
}
}
}
struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
- u64 usr_addr, int acc, struct ib_udata *udata)
+ u64 usr_addr, int acc, struct ib_dmah *dmah,
+ struct ib_udata *udata)
{
int status = -ENOMEM;
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
struct ocrdma_mr *mr;
struct ocrdma_pd *pd;
- u32 num_pbes;
+
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
pd = get_ocrdma_pd(ibpd);
@@ -967,18 +866,17 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(status);
- mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
+ mr->umem = ib_umem_get(ibpd->device, start, len, acc);
if (IS_ERR(mr->umem)) {
status = -EFAULT;
goto umem_err;
}
- num_pbes = ib_umem_page_count(mr->umem);
- status = ocrdma_get_pbl_info(dev, mr, num_pbes);
+ status = ocrdma_get_pbl_info(
+ dev, mr, ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE));
if (status)
goto umem_err;
- mr->hwmr.pbe_size = mr->umem->page_size;
- mr->hwmr.fbo = ib_umem_offset(mr->umem);
+ mr->hwmr.pbe_size = PAGE_SIZE;
mr->hwmr.va = usr_addr;
mr->hwmr.len = len;
mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
@@ -989,7 +887,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
if (status)
goto umem_err;
- build_user_pbes(dev, mr, num_pbes);
+ build_user_pbes(dev, mr);
status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
if (status)
goto mbx_err;
@@ -1006,7 +904,7 @@ umem_err:
return ERR_PTR(status);
}
-int ocrdma_dereg_mr(struct ib_mr *ib_mr)
+int ocrdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
{
struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr);
struct ocrdma_dev *dev = get_ocrdma_dev(ib_mr->device);
@@ -1017,8 +915,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
/* it could be user registered memory. */
- if (mr->umem)
- ib_umem_release(mr->umem);
+ ib_umem_release(mr->umem);
kfree(mr);
/* Don't stop cleanup, in case FW is unresponsive */
@@ -1030,13 +927,17 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
}
static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
- struct ib_udata *udata,
- struct ib_ucontext *ib_ctx)
+ struct ib_udata *udata)
{
int status;
- struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
+ struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
+ udata, struct ocrdma_ucontext, ibucontext);
struct ocrdma_create_cq_uresp uresp;
+ /* this must be user flow! */
+ if (!udata)
+ return -EINVAL;
+
memset(&uresp, 0, sizeof(uresp));
uresp.cq_id = cq->id;
uresp.page_size = PAGE_ALIGN(cq->len);
@@ -1065,59 +966,53 @@ err:
return status;
}
-struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *ib_ctx,
- struct ib_udata *udata)
+int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
- struct ocrdma_cq *cq;
+ struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
- struct ocrdma_ucontext *uctx = NULL;
+ struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
+ udata, struct ocrdma_ucontext, ibucontext);
u16 pd_id = 0;
int status;
struct ocrdma_create_cq_ureq ureq;
if (attr->flags)
- return ERR_PTR(-EINVAL);
+ return -EOPNOTSUPP;
if (udata) {
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
- return ERR_PTR(-EFAULT);
+ return -EFAULT;
} else
ureq.dpp_cq = 0;
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
spin_lock_init(&cq->cq_lock);
spin_lock_init(&cq->comp_handler_lock);
INIT_LIST_HEAD(&cq->sq_head);
INIT_LIST_HEAD(&cq->rq_head);
- if (ib_ctx) {
- uctx = get_ocrdma_ucontext(ib_ctx);
+ if (udata)
pd_id = uctx->cntxt_pd->id;
- }
status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id);
- if (status) {
- kfree(cq);
- return ERR_PTR(status);
- }
- if (ib_ctx) {
- status = ocrdma_copy_cq_uresp(dev, cq, udata, ib_ctx);
+ if (status)
+ return status;
+
+ if (udata) {
+ status = ocrdma_copy_cq_uresp(dev, cq, udata);
if (status)
goto ctx_err;
}
cq->phase = OCRDMA_CQE_VALID;
dev->cq_tbl[cq->id] = cq;
- return &cq->ibcq;
+ return 0;
ctx_err:
ocrdma_mbx_destroy_cq(dev, cq);
- kfree(cq);
- return ERR_PTR(status);
+ return status;
}
int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
@@ -1160,7 +1055,7 @@ static void ocrdma_flush_cq(struct ocrdma_cq *cq)
spin_unlock_irqrestore(&cq->cq_lock, flags);
}
-int ocrdma_destroy_cq(struct ib_cq *ibcq)
+int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
struct ocrdma_eq *eq = NULL;
@@ -1170,14 +1065,13 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq)
dev->cq_tbl[cq->id] = NULL;
indx = ocrdma_get_eq_table_index(dev, cq->eqn);
- BUG_ON(indx == -EINVAL);
eq = &dev->eq_tbl[indx];
irq = ocrdma_get_irq(dev, eq);
synchronize_irq(irq);
ocrdma_flush_cq(cq);
- (void)ocrdma_mbx_destroy_cq(dev, cq);
+ ocrdma_mbx_destroy_cq(dev, cq);
if (cq->ucontext) {
pdid = cq->ucontext->cntxt_pd->id;
ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
@@ -1186,8 +1080,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq)
ocrdma_get_db_addr(dev, pdid),
dev->nic_info.db_page_size);
}
-
- kfree(cq);
return 0;
}
@@ -1208,7 +1100,8 @@ static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
}
static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
- struct ib_qp_init_attr *attrs)
+ struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
{
if ((attrs->qp_type != IB_QPT_GSI) &&
(attrs->qp_type != IB_QPT_RC) &&
@@ -1216,7 +1109,7 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
(attrs->qp_type != IB_QPT_UD)) {
pr_err("%s(%d) unsupported qp type=0x%x requested\n",
__func__, dev->id, attrs->qp_type);
- return -EINVAL;
+ return -EOPNOTSUPP;
}
/* Skip the check for QP1 to support CM size of 128 */
if ((attrs->qp_type != IB_QPT_GSI) &&
@@ -1256,7 +1149,7 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
return -EINVAL;
}
/* unprivileged user space cannot create special QP */
- if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
+ if (udata && attrs->qp_type == IB_QPT_GSI) {
pr_err
("%s(%d) Userspace can't create special QPs of type=0x%x\n",
__func__, dev->id, attrs->qp_type);
@@ -1362,12 +1255,12 @@ static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
static int ocrdma_alloc_wr_id_tbl(struct ocrdma_qp *qp)
{
qp->wqe_wr_id_tbl =
- kzalloc(sizeof(*(qp->wqe_wr_id_tbl)) * qp->sq.max_cnt,
+ kcalloc(qp->sq.max_cnt, sizeof(*(qp->wqe_wr_id_tbl)),
GFP_KERNEL);
if (qp->wqe_wr_id_tbl == NULL)
return -ENOMEM;
qp->rqe_wr_id_tbl =
- kzalloc(sizeof(u64) * qp->rq.max_cnt, GFP_KERNEL);
+ kcalloc(qp->rq.max_cnt, sizeof(u64), GFP_KERNEL);
if (qp->rqe_wr_id_tbl == NULL)
return -ENOMEM;
@@ -1389,7 +1282,7 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
qp->sq.max_sges = attrs->cap.max_send_sge;
qp->rq.max_sges = attrs->cap.max_recv_sge;
qp->state = OCRDMA_QPS_RST;
- qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
+ qp->signaled = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
}
static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
@@ -1402,30 +1295,28 @@ static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
}
}
-struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *attrs,
- struct ib_udata *udata)
+int ocrdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
{
int status;
+ struct ib_pd *ibpd = ibqp->pd;
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
- struct ocrdma_qp *qp;
- struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
+ struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibqp->device);
struct ocrdma_create_qp_ureq ureq;
u16 dpp_credit_lmt, dpp_offset;
- status = ocrdma_check_qp_params(ibpd, dev, attrs);
+ if (attrs->create_flags)
+ return -EOPNOTSUPP;
+
+ status = ocrdma_check_qp_params(ibpd, dev, attrs, udata);
if (status)
goto gen_err;
memset(&ureq, 0, sizeof(ureq));
if (udata) {
if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
- return ERR_PTR(-EFAULT);
- }
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp) {
- status = -ENOMEM;
- goto gen_err;
+ return -EFAULT;
}
ocrdma_set_qp_init_params(qp, pd, attrs);
if (udata == NULL)
@@ -1460,7 +1351,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
ocrdma_store_gsi_qp_cq(dev, attrs);
qp->ibqp.qp_num = qp->id;
mutex_unlock(&dev->dev_lock);
- return &qp->ibqp;
+ return 0;
cpy_err:
ocrdma_del_qpn_map(dev, qp);
@@ -1470,10 +1361,9 @@ mbx_err:
mutex_unlock(&dev->dev_lock);
kfree(qp->wqe_wr_id_tbl);
kfree(qp->rqe_wr_id_tbl);
- kfree(qp);
pr_err("%s(%d) error=%d\n", __func__, dev->id, status);
gen_err:
- return ERR_PTR(status);
+ return status;
}
int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -1505,6 +1395,9 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct ocrdma_dev *dev;
enum ib_qp_state old_qps, new_qps;
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
qp = get_ocrdma_qp(ibqp);
dev = get_ocrdma_dev(ibqp->device);
@@ -1519,8 +1412,7 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
new_qps = old_qps;
spin_unlock_irqrestore(&qp->q_lock, flags);
- if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_ETHERNET)) {
+ if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
"qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
__func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
@@ -1600,23 +1492,24 @@ int ocrdma_query_qp(struct ib_qp *ibqp,
qp_attr->cap.max_recv_sge = qp->rq.max_sges;
qp_attr->cap.max_inline_data = qp->max_inline_data;
qp_init_attr->cap = qp_attr->cap;
- memcpy(&qp_attr->ah_attr.grh.dgid, &params.dgid[0],
- sizeof(params.dgid));
- qp_attr->ah_attr.grh.flow_label = params.rnt_rc_sl_fl &
- OCRDMA_QP_PARAMS_FLOW_LABEL_MASK;
- qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
- qp_attr->ah_attr.grh.hop_limit = (params.hop_lmt_rq_psn &
- OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
- OCRDMA_QP_PARAMS_HOP_LMT_SHIFT;
- qp_attr->ah_attr.grh.traffic_class = (params.tclass_sq_psn &
- OCRDMA_QP_PARAMS_TCLASS_MASK) >>
- OCRDMA_QP_PARAMS_TCLASS_SHIFT;
-
- qp_attr->ah_attr.ah_flags = IB_AH_GRH;
- qp_attr->ah_attr.port_num = 1;
- qp_attr->ah_attr.sl = (params.rnt_rc_sl_fl &
- OCRDMA_QP_PARAMS_SL_MASK) >>
- OCRDMA_QP_PARAMS_SL_SHIFT;
+ qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+
+ rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
+ params.rnt_rc_sl_fl &
+ OCRDMA_QP_PARAMS_FLOW_LABEL_MASK,
+ qp->sgid_idx,
+ (params.hop_lmt_rq_psn &
+ OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
+ OCRDMA_QP_PARAMS_HOP_LMT_SHIFT,
+ (params.tclass_sq_psn &
+ OCRDMA_QP_PARAMS_TCLASS_MASK) >>
+ OCRDMA_QP_PARAMS_TCLASS_SHIFT);
+ rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid[0]);
+
+ rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
+ rdma_ah_set_sl(&qp_attr->ah_attr, (params.rnt_rc_sl_fl &
+ OCRDMA_QP_PARAMS_SL_MASK) >>
+ OCRDMA_QP_PARAMS_SL_SHIFT);
qp_attr->timeout = (params.ack_to_rnr_rtc_dest_qpn &
OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK) >>
OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
@@ -1629,8 +1522,8 @@ int ocrdma_query_qp(struct ib_qp *ibqp,
qp_attr->min_rnr_timer = 0;
qp_attr->pkey_index = 0;
qp_attr->port_num = 1;
- qp_attr->ah_attr.src_path_bits = 0;
- qp_attr->ah_attr.static_rate = 0;
+ rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
+ rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
qp_attr->alt_pkey_index = 0;
qp_attr->alt_port_num = 0;
qp_attr->alt_timeout = 0;
@@ -1701,7 +1594,6 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
{
unsigned long cq_flags;
unsigned long flags;
- int discard_cnt = 0;
u32 cur_getp, stop_getp;
struct ocrdma_cqe *cqe;
u32 qpn = 0, wqe_idx = 0;
@@ -1753,7 +1645,6 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
/* mark cqe discarded so that it is not picked up later
* in the poll_cq().
*/
- discard_cnt += 1;
cqe->cmn.qpn = 0;
skip_cqe:
cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
@@ -1780,7 +1671,7 @@ void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
spin_unlock_irqrestore(&dev->flush_q_lock, flags);
}
-int ocrdma_destroy_qp(struct ib_qp *ibqp)
+int ocrdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct ocrdma_pd *pd;
struct ocrdma_qp *qp;
@@ -1812,13 +1703,13 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp)
* protect against proessing in-flight CQEs for this QP.
*/
spin_lock_irqsave(&qp->sq_cq->cq_lock, flags);
- if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
+ if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) {
spin_lock(&qp->rq_cq->cq_lock);
-
- ocrdma_del_qpn_map(dev, qp);
-
- if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
+ ocrdma_del_qpn_map(dev, qp);
spin_unlock(&qp->rq_cq->cq_lock);
+ } else {
+ ocrdma_del_qpn_map(dev, qp);
+ }
spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags);
if (!pd->uctx) {
@@ -1839,7 +1730,6 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp)
kfree(qp->wqe_wr_id_tbl);
kfree(qp->rqe_wr_id_tbl);
- kfree(qp);
return 0;
}
@@ -1876,43 +1766,46 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
return status;
}
-struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
- struct ib_srq_init_attr *init_attr,
- struct ib_udata *udata)
+int ocrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
{
- int status = -ENOMEM;
- struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
- struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
- struct ocrdma_srq *srq;
+ int status;
+ struct ocrdma_pd *pd = get_ocrdma_pd(ibsrq->pd);
+ struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
+ struct ocrdma_srq *srq = get_ocrdma_srq(ibsrq);
+
+ if (init_attr->srq_type != IB_SRQT_BASIC)
+ return -EOPNOTSUPP;
if (init_attr->attr.max_sge > dev->attr.max_recv_sge)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
if (init_attr->attr.max_wr > dev->attr.max_rqe)
- return ERR_PTR(-EINVAL);
-
- srq = kzalloc(sizeof(*srq), GFP_KERNEL);
- if (!srq)
- return ERR_PTR(status);
+ return -EINVAL;
spin_lock_init(&srq->q_lock);
srq->pd = pd;
srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size);
status = ocrdma_mbx_create_srq(dev, srq, init_attr, pd);
if (status)
- goto err;
+ return status;
- if (udata == NULL) {
- srq->rqe_wr_id_tbl = kzalloc(sizeof(u64) * srq->rq.max_cnt,
- GFP_KERNEL);
- if (srq->rqe_wr_id_tbl == NULL)
+ if (!udata) {
+ srq->rqe_wr_id_tbl = kcalloc(srq->rq.max_cnt, sizeof(u64),
+ GFP_KERNEL);
+ if (!srq->rqe_wr_id_tbl) {
+ status = -ENOMEM;
goto arm_err;
+ }
srq->bit_fields_len = (srq->rq.max_cnt / 32) +
(srq->rq.max_cnt % 32 ? 1 : 0);
srq->idx_bit_fields =
- kmalloc(srq->bit_fields_len * sizeof(u32), GFP_KERNEL);
- if (srq->idx_bit_fields == NULL)
+ kmalloc_array(srq->bit_fields_len, sizeof(u32),
+ GFP_KERNEL);
+ if (!srq->idx_bit_fields) {
+ status = -ENOMEM;
goto arm_err;
+ }
memset(srq->idx_bit_fields, 0xff,
srq->bit_fields_len * sizeof(u32));
}
@@ -1929,15 +1822,13 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
goto arm_err;
}
- return &srq->ibsrq;
+ return 0;
arm_err:
ocrdma_mbx_destroy_srq(dev, srq);
-err:
kfree(srq->rqe_wr_id_tbl);
kfree(srq->idx_bit_fields);
- kfree(srq);
- return ERR_PTR(status);
+ return status;
}
int ocrdma_modify_srq(struct ib_srq *ibsrq,
@@ -1958,23 +1849,20 @@ int ocrdma_modify_srq(struct ib_srq *ibsrq,
int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
{
- int status;
struct ocrdma_srq *srq;
srq = get_ocrdma_srq(ibsrq);
- status = ocrdma_mbx_query_srq(srq, srq_attr);
- return status;
+ return ocrdma_mbx_query_srq(srq, srq_attr);
}
-int ocrdma_destroy_srq(struct ib_srq *ibsrq)
+int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
- int status;
struct ocrdma_srq *srq;
struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
srq = get_ocrdma_srq(ibsrq);
- status = ocrdma_mbx_destroy_srq(dev, srq);
+ ocrdma_mbx_destroy_srq(dev, srq);
if (srq->pd->uctx)
ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa,
@@ -1982,14 +1870,13 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq)
kfree(srq->idx_bit_fields);
kfree(srq->rqe_wr_id_tbl);
- kfree(srq);
- return status;
+ return 0;
}
/* unprivileged verbs and their support functions. */
static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
struct ocrdma_hdr_wqe *hdr,
- struct ib_send_wr *wr)
+ const struct ib_send_wr *wr)
{
struct ocrdma_ewqe_ud_hdr *ud_hdr =
(struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
@@ -2036,7 +1923,7 @@ static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge)
static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
struct ocrdma_hdr_wqe *hdr,
struct ocrdma_sge *sge,
- struct ib_send_wr *wr, u32 wqe_size)
+ const struct ib_send_wr *wr, u32 wqe_size)
{
int i;
char *dpp_addr;
@@ -2074,9 +1961,8 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
}
static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
- struct ib_send_wr *wr)
+ const struct ib_send_wr *wr)
{
- int status;
struct ocrdma_sge *sge;
u32 wqe_size = sizeof(*hdr);
@@ -2088,12 +1974,11 @@ static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
sge = (struct ocrdma_sge *)(hdr + 1);
}
- status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
- return status;
+ return ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
}
static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
- struct ib_send_wr *wr)
+ const struct ib_send_wr *wr)
{
int status;
struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
@@ -2111,7 +1996,7 @@ static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
}
static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
- struct ib_send_wr *wr)
+ const struct ib_send_wr *wr)
{
struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
struct ocrdma_sge *sge = ext_rw + 1;
@@ -2141,7 +2026,7 @@ static int get_encoded_page_size(int pg_sz)
static int ocrdma_build_reg(struct ocrdma_qp *qp,
struct ocrdma_hdr_wqe *hdr,
- struct ib_reg_wr *wr)
+ const struct ib_reg_wr *wr)
{
u64 fbo;
struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
@@ -2202,8 +2087,8 @@ static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
iowrite32(val, qp->sq_db);
}
-int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
+int ocrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
{
int status = 0;
struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
@@ -2246,6 +2131,7 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_SEND_WITH_IMM:
hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
hdr->immdt = ntohl(wr->ex.imm_data);
+ fallthrough;
case IB_WR_SEND:
hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
ocrdma_build_send(qp, hdr, wr);
@@ -2259,6 +2145,7 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
case IB_WR_RDMA_WRITE_WITH_IMM:
hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
hdr->immdt = ntohl(wr->ex.imm_data);
+ fallthrough;
case IB_WR_RDMA_WRITE:
hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT);
status = ocrdma_build_write(qp, hdr, wr);
@@ -2312,8 +2199,8 @@ static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
iowrite32(val, qp->rq_db);
}
-static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
- u16 tag)
+static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe,
+ const struct ib_recv_wr *wr, u16 tag)
{
u32 wqe_size = 0;
struct ocrdma_sge *sge;
@@ -2333,8 +2220,8 @@ static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
ocrdma_cpu_to_le32(rqe, wqe_size);
}
-int ocrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+int ocrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
{
int status = 0;
unsigned long flags;
@@ -2403,8 +2290,8 @@ static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET);
}
-int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+int ocrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
{
int status = 0;
unsigned long flags;
@@ -3010,8 +2897,7 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
return 0;
}
-struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd,
- enum ib_mr_type mr_type,
+struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
u32 max_num_sg)
{
int status;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 704ef1e9271b..6c5c3755b8a9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -43,59 +43,39 @@
#ifndef __OCRDMA_VERBS_H__
#define __OCRDMA_VERBS_H__
-int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *,
- struct ib_send_wr **bad_wr);
-int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *,
- struct ib_recv_wr **bad_wr);
+int ocrdma_post_send(struct ib_qp *, const struct ib_send_wr *,
+ const struct ib_send_wr **bad_wr);
+int ocrdma_post_recv(struct ib_qp *, const struct ib_recv_wr *,
+ const struct ib_recv_wr **bad_wr);
int ocrdma_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc);
int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags);
int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props,
struct ib_udata *uhw);
-int ocrdma_query_port(struct ib_device *, u8 port, struct ib_port_attr *props);
-int ocrdma_modify_port(struct ib_device *, u8 port, int mask,
- struct ib_port_modify *props);
+int ocrdma_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *props);
enum rdma_protocol_type
-ocrdma_query_protocol(struct ib_device *device, u8 port_num);
+ocrdma_query_protocol(struct ib_device *device, u32 port_num);
-void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid);
-int ocrdma_query_gid(struct ib_device *, u8 port,
- int index, union ib_gid *gid);
-struct net_device *ocrdma_get_netdev(struct ib_device *device, u8 port_num);
-int ocrdma_add_gid(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- void **context);
-int ocrdma_del_gid(struct ib_device *device,
- u8 port_num,
- unsigned int index,
- void **context);
-int ocrdma_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey);
+int ocrdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey);
-struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *,
- struct ib_udata *);
-int ocrdma_dealloc_ucontext(struct ib_ucontext *);
+int ocrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata);
+void ocrdma_dealloc_ucontext(struct ib_ucontext *uctx);
int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
-struct ib_pd *ocrdma_alloc_pd(struct ib_device *,
- struct ib_ucontext *, struct ib_udata *);
-int ocrdma_dealloc_pd(struct ib_pd *pd);
+int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
-struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *ib_ctx,
- struct ib_udata *udata);
+int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
-int ocrdma_destroy_cq(struct ib_cq *);
+int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
-struct ib_qp *ocrdma_create_qp(struct ib_pd *,
- struct ib_qp_init_attr *attrs,
- struct ib_udata *);
+int ocrdma_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata);
int _ocrdma_modify_qp(struct ib_qp *, struct ib_qp_attr *attr,
int attr_mask);
int ocrdma_modify_qp(struct ib_qp *, struct ib_qp_attr *attr,
@@ -103,24 +83,24 @@ int ocrdma_modify_qp(struct ib_qp *, struct ib_qp_attr *attr,
int ocrdma_query_qp(struct ib_qp *,
struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *);
-int ocrdma_destroy_qp(struct ib_qp *);
+int ocrdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
void ocrdma_del_flush_qp(struct ocrdma_qp *qp);
-struct ib_srq *ocrdma_create_srq(struct ib_pd *, struct ib_srq_init_attr *,
- struct ib_udata *);
+int ocrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attr,
+ struct ib_udata *udata);
int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *,
enum ib_srq_attr_mask, struct ib_udata *);
int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *);
-int ocrdma_destroy_srq(struct ib_srq *);
-int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *,
- struct ib_recv_wr **bad_recv_wr);
+int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *,
+ const struct ib_recv_wr **bad_recv_wr);
-int ocrdma_dereg_mr(struct ib_mr *);
+int ocrdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *);
-struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
- enum ib_mr_type mr_type,
+ u64 virt, int acc, struct ib_dmah *dmah,
+ struct ib_udata *);
+struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
diff --git a/drivers/infiniband/hw/qedr/Kconfig b/drivers/infiniband/hw/qedr/Kconfig
index 6c9f3923e838..9e31d13b03c3 100644
--- a/drivers/infiniband/hw/qedr/Kconfig
+++ b/drivers/infiniband/hw/qedr/Kconfig
@@ -1,8 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_QEDR
tristate "QLogic RoCE driver"
depends on 64BIT && QEDE
+ depends on PCI
select QED_LL2
+ select QED_OOO
select QED_RDMA
- ---help---
+ help
This driver provides low-level InfiniBand over Ethernet
support for QLogic QED host channel adapters (HCAs).
diff --git a/drivers/infiniband/hw/qedr/Makefile b/drivers/infiniband/hw/qedr/Makefile
index ba7067c77f2f..c75679837a85 100644
--- a/drivers/infiniband/hw/qedr/Makefile
+++ b/drivers/infiniband/hw/qedr/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_INFINIBAND_QEDR) := qedr.o
-qedr-y := main.o verbs.o qedr_cm.o
+qedr-y := main.o verbs.o qedr_roce_cm.o qedr_iw_cm.o
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 7b74d09a8217..ecdfeff3d44f 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -33,25 +33,28 @@
#include <rdma/ib_verbs.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/iw_cm.h>
+#include <rdma/ib_mad.h>
#include <linux/netdevice.h>
#include <linux/iommu.h>
+#include <linux/pci.h>
#include <net/addrconf.h>
-#include <linux/qed/qede_roce.h>
+
#include <linux/qed/qed_chain.h>
#include <linux/qed/qed_if.h>
#include "qedr.h"
#include "verbs.h"
#include <rdma/qedr-abi.h>
+#include "qedr_iw_cm.h"
MODULE_DESCRIPTION("QLogic 40G/100G ROCE Driver");
MODULE_AUTHOR("QLogic Corporation");
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION(QEDR_MODULE_VERSION);
#define QEDR_WQ_MULTIPLIER_DFT (3)
-void qedr_ib_dispatch_event(struct qedr_dev *dev, u8 port_num,
- enum ib_event_type type)
+static void qedr_ib_dispatch_event(struct qedr_dev *dev, u32 port_num,
+ enum ib_event_type type)
{
struct ib_event ibev;
@@ -63,119 +66,205 @@ void qedr_ib_dispatch_event(struct qedr_dev *dev, u8 port_num,
}
static enum rdma_link_layer qedr_link_layer(struct ib_device *device,
- u8 port_num)
+ u32 port_num)
{
return IB_LINK_LAYER_ETHERNET;
}
-static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str,
- size_t str_len)
+static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str)
{
struct qedr_dev *qedr = get_qedr_dev(ibdev);
u32 fw_ver = (u32)qedr->attr.fw_ver;
- snprintf(str, str_len, "%d. %d. %d. %d",
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d",
(fw_ver >> 24) & 0xFF, (fw_ver >> 16) & 0xFF,
(fw_ver >> 8) & 0xFF, fw_ver & 0xFF);
}
-static struct net_device *qedr_get_netdev(struct ib_device *dev, u8 port_num)
+static int qedr_roce_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable)
{
- struct qedr_dev *qdev;
+ struct ib_port_attr attr;
+ int err;
- qdev = get_qedr_dev(dev);
- dev_hold(qdev->ndev);
+ err = qedr_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
- /* The HW vendor's device driver must guarantee
- * that this function returns NULL before the net device reaches
- * NETDEV_UNREGISTER_FINAL state.
- */
- return qdev->ndev;
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+
+ return 0;
}
-static int qedr_register_device(struct qedr_dev *dev)
+static int qedr_iw_port_immutable(struct ib_device *ibdev, u32 port_num,
+ struct ib_port_immutable *immutable)
{
- strlcpy(dev->ibdev.name, "qedr%d", IB_DEVICE_NAME_MAX);
+ struct ib_port_attr attr;
+ int err;
- dev->ibdev.node_guid = dev->attr.node_guid;
- memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC));
- dev->ibdev.owner = THIS_MODULE;
- dev->ibdev.uverbs_abi_ver = QEDR_ABI_VERSION;
-
- dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) |
- QEDR_UVERBS(QUERY_DEVICE) |
- QEDR_UVERBS(QUERY_PORT) |
- QEDR_UVERBS(ALLOC_PD) |
- QEDR_UVERBS(DEALLOC_PD) |
- QEDR_UVERBS(CREATE_COMP_CHANNEL) |
- QEDR_UVERBS(CREATE_CQ) |
- QEDR_UVERBS(RESIZE_CQ) |
- QEDR_UVERBS(DESTROY_CQ) |
- QEDR_UVERBS(REQ_NOTIFY_CQ) |
- QEDR_UVERBS(CREATE_QP) |
- QEDR_UVERBS(MODIFY_QP) |
- QEDR_UVERBS(QUERY_QP) |
- QEDR_UVERBS(DESTROY_QP) |
- QEDR_UVERBS(REG_MR) |
- QEDR_UVERBS(DEREG_MR) |
- QEDR_UVERBS(POLL_CQ) |
- QEDR_UVERBS(POST_SEND) |
- QEDR_UVERBS(POST_RECV);
+ err = qedr_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
- dev->ibdev.phys_port_cnt = 1;
- dev->ibdev.num_comp_vectors = dev->num_cnq;
- dev->ibdev.node_type = RDMA_NODE_IB_CA;
+ immutable->gid_tbl_len = 1;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+ immutable->max_mad_size = 0;
+
+ return 0;
+}
+
+/* QEDR sysfs interface */
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct qedr_dev *dev =
+ rdma_device_to_drv_device(device, struct qedr_dev, ibdev);
- dev->ibdev.query_device = qedr_query_device;
- dev->ibdev.query_port = qedr_query_port;
- dev->ibdev.modify_port = qedr_modify_port;
+ return sysfs_emit(buf, "0x%x\n", dev->attr.hw_ver);
+}
+static DEVICE_ATTR_RO(hw_rev);
+
+static ssize_t hca_type_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct qedr_dev *dev =
+ rdma_device_to_drv_device(device, struct qedr_dev, ibdev);
+
+ return sysfs_emit(buf, "FastLinQ QL%x %s\n", dev->pdev->device,
+ rdma_protocol_iwarp(&dev->ibdev, 1) ? "iWARP" :
+ "RoCE");
+}
+static DEVICE_ATTR_RO(hca_type);
+
+static struct attribute *qedr_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_hca_type.attr,
+ NULL
+};
- dev->ibdev.query_gid = qedr_query_gid;
- dev->ibdev.add_gid = qedr_add_gid;
- dev->ibdev.del_gid = qedr_del_gid;
+static const struct attribute_group qedr_attr_group = {
+ .attrs = qedr_attributes,
+};
+
+static const struct ib_device_ops qedr_iw_dev_ops = {
+ .get_port_immutable = qedr_iw_port_immutable,
+ .iw_accept = qedr_iw_accept,
+ .iw_add_ref = qedr_iw_qp_add_ref,
+ .iw_connect = qedr_iw_connect,
+ .iw_create_listen = qedr_iw_create_listen,
+ .iw_destroy_listen = qedr_iw_destroy_listen,
+ .iw_get_qp = qedr_iw_get_qp,
+ .iw_reject = qedr_iw_reject,
+ .iw_rem_ref = qedr_iw_qp_rem_ref,
+ .query_gid = qedr_iw_query_gid,
+};
+
+static int qedr_iw_register_device(struct qedr_dev *dev)
+{
+ dev->ibdev.node_type = RDMA_NODE_RNIC;
+
+ ib_set_device_ops(&dev->ibdev, &qedr_iw_dev_ops);
- dev->ibdev.alloc_ucontext = qedr_alloc_ucontext;
- dev->ibdev.dealloc_ucontext = qedr_dealloc_ucontext;
- dev->ibdev.mmap = qedr_mmap;
+ memcpy(dev->ibdev.iw_ifname,
+ dev->ndev->name, sizeof(dev->ibdev.iw_ifname));
+
+ return 0;
+}
- dev->ibdev.alloc_pd = qedr_alloc_pd;
- dev->ibdev.dealloc_pd = qedr_dealloc_pd;
+static const struct ib_device_ops qedr_roce_dev_ops = {
+ .alloc_xrcd = qedr_alloc_xrcd,
+ .dealloc_xrcd = qedr_dealloc_xrcd,
+ .get_port_immutable = qedr_roce_port_immutable,
+ .query_pkey = qedr_query_pkey,
+};
- dev->ibdev.create_cq = qedr_create_cq;
- dev->ibdev.destroy_cq = qedr_destroy_cq;
- dev->ibdev.resize_cq = qedr_resize_cq;
- dev->ibdev.req_notify_cq = qedr_arm_cq;
+static void qedr_roce_register_device(struct qedr_dev *dev)
+{
+ dev->ibdev.node_type = RDMA_NODE_IB_CA;
- dev->ibdev.create_qp = qedr_create_qp;
- dev->ibdev.modify_qp = qedr_modify_qp;
- dev->ibdev.query_qp = qedr_query_qp;
- dev->ibdev.destroy_qp = qedr_destroy_qp;
+ ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops);
+}
- dev->ibdev.query_pkey = qedr_query_pkey;
+static const struct ib_device_ops qedr_dev_ops = {
+ .owner = THIS_MODULE,
+ .driver_id = RDMA_DRIVER_QEDR,
+ .uverbs_abi_ver = QEDR_ABI_VERSION,
+
+ .alloc_mr = qedr_alloc_mr,
+ .alloc_pd = qedr_alloc_pd,
+ .alloc_ucontext = qedr_alloc_ucontext,
+ .create_ah = qedr_create_ah,
+ .create_cq = qedr_create_cq,
+ .create_qp = qedr_create_qp,
+ .create_srq = qedr_create_srq,
+ .dealloc_pd = qedr_dealloc_pd,
+ .dealloc_ucontext = qedr_dealloc_ucontext,
+ .dereg_mr = qedr_dereg_mr,
+ .destroy_ah = qedr_destroy_ah,
+ .destroy_cq = qedr_destroy_cq,
+ .destroy_qp = qedr_destroy_qp,
+ .destroy_srq = qedr_destroy_srq,
+ .device_group = &qedr_attr_group,
+ .get_dev_fw_str = qedr_get_dev_fw_str,
+ .get_dma_mr = qedr_get_dma_mr,
+ .get_link_layer = qedr_link_layer,
+ .map_mr_sg = qedr_map_mr_sg,
+ .mmap = qedr_mmap,
+ .mmap_free = qedr_mmap_free,
+ .modify_qp = qedr_modify_qp,
+ .modify_srq = qedr_modify_srq,
+ .poll_cq = qedr_poll_cq,
+ .post_recv = qedr_post_recv,
+ .post_send = qedr_post_send,
+ .post_srq_recv = qedr_post_srq_recv,
+ .process_mad = qedr_process_mad,
+ .query_device = qedr_query_device,
+ .query_port = qedr_query_port,
+ .query_qp = qedr_query_qp,
+ .query_srq = qedr_query_srq,
+ .reg_user_mr = qedr_reg_user_mr,
+ .req_notify_cq = qedr_arm_cq,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, qedr_ah, ibah),
+ INIT_RDMA_OBJ_SIZE(ib_cq, qedr_cq, ibcq),
+ INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd),
+ INIT_RDMA_OBJ_SIZE(ib_qp, qedr_qp, ibqp),
+ INIT_RDMA_OBJ_SIZE(ib_srq, qedr_srq, ibsrq),
+ INIT_RDMA_OBJ_SIZE(ib_xrcd, qedr_xrcd, ibxrcd),
+ INIT_RDMA_OBJ_SIZE(ib_ucontext, qedr_ucontext, ibucontext),
+};
- dev->ibdev.create_ah = qedr_create_ah;
- dev->ibdev.destroy_ah = qedr_destroy_ah;
+static int qedr_register_device(struct qedr_dev *dev)
+{
+ int rc;
- dev->ibdev.get_dma_mr = qedr_get_dma_mr;
- dev->ibdev.dereg_mr = qedr_dereg_mr;
- dev->ibdev.reg_user_mr = qedr_reg_user_mr;
- dev->ibdev.alloc_mr = qedr_alloc_mr;
- dev->ibdev.map_mr_sg = qedr_map_mr_sg;
+ dev->ibdev.node_guid = dev->attr.node_guid;
+ memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC));
- dev->ibdev.poll_cq = qedr_poll_cq;
- dev->ibdev.post_send = qedr_post_send;
- dev->ibdev.post_recv = qedr_post_recv;
+ if (IS_IWARP(dev)) {
+ rc = qedr_iw_register_device(dev);
+ if (rc)
+ return rc;
+ } else {
+ qedr_roce_register_device(dev);
+ }
- dev->ibdev.process_mad = qedr_process_mad;
- dev->ibdev.get_port_immutable = qedr_port_immutable;
- dev->ibdev.get_netdev = qedr_get_netdev;
+ dev->ibdev.phys_port_cnt = 1;
+ dev->ibdev.num_comp_vectors = dev->num_cnq;
+ dev->ibdev.dev.parent = &dev->pdev->dev;
- dev->ibdev.dma_device = &dev->pdev->dev;
+ ib_set_device_ops(&dev->ibdev, &qedr_dev_ops);
- dev->ibdev.get_link_layer = qedr_link_layer;
- dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
+ rc = ib_device_set_netdev(&dev->ibdev, dev->ndev, 1);
+ if (rc)
+ return rc;
- return ib_register_device(&dev->ibdev, NULL);
+ dma_set_max_seg_size(&dev->pdev->dev, UINT_MAX);
+ return ib_register_device(&dev->ibdev, "qedr%d", &dev->pdev->dev);
}
/* This function allocates fast-path status block memory */
@@ -208,7 +297,8 @@ static void qedr_free_mem_sb(struct qedr_dev *dev,
struct qed_sb_info *sb_info, int sb_id)
{
if (sb_info->sb_virt) {
- dev->ops->common->sb_release(dev->cdev, sb_info, sb_id);
+ dev->ops->common->sb_release(dev->cdev, sb_info, sb_id,
+ QED_SB_TYPE_CNQ);
dma_free_coherent(&dev->pdev->dev, sizeof(*sb_info->sb_virt),
(void *)sb_info->sb_virt, sb_info->sb_phys);
}
@@ -218,6 +308,9 @@ static void qedr_free_resources(struct qedr_dev *dev)
{
int i;
+ if (IS_IWARP(dev))
+ destroy_workqueue(dev->iwarp_wq);
+
for (i = 0; i < dev->num_cnq; i++) {
qedr_free_mem_sb(dev, &dev->sb_array[i], dev->sb_start + i);
dev->ops->common->chain_free(dev->cdev, &dev->cnq_array[i].pbl);
@@ -230,24 +323,39 @@ static void qedr_free_resources(struct qedr_dev *dev)
static int qedr_alloc_resources(struct qedr_dev *dev)
{
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .intended_use = QED_CHAIN_USE_TO_CONSUME,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U16,
+ .elem_size = sizeof(struct regpair *),
+ };
struct qedr_cnq *cnq;
__le16 *cons_pi;
- u16 n_entries;
int i, rc;
- dev->sgid_tbl = kzalloc(sizeof(union ib_gid) *
- QEDR_MAX_SGID, GFP_KERNEL);
+ dev->sgid_tbl = kcalloc(QEDR_MAX_SGID, sizeof(union ib_gid),
+ GFP_KERNEL);
if (!dev->sgid_tbl)
return -ENOMEM;
spin_lock_init(&dev->sgid_lock);
+ xa_init_flags(&dev->srqs, XA_FLAGS_LOCK_IRQ);
+
+ if (IS_IWARP(dev)) {
+ xa_init(&dev->qps);
+ dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq");
+ if (!dev->iwarp_wq) {
+ rc = -ENOMEM;
+ goto err1;
+ }
+ }
/* Allocate Status blocks for CNQ */
dev->sb_array = kcalloc(dev->num_cnq, sizeof(*dev->sb_array),
GFP_KERNEL);
if (!dev->sb_array) {
rc = -ENOMEM;
- goto err1;
+ goto err_destroy_wq;
}
dev->cnq_array = kcalloc(dev->num_cnq,
@@ -260,7 +368,9 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
dev->sb_start = dev->ops->rdma_get_start_sb(dev->cdev);
/* Allocate CNQ PBLs */
- n_entries = min_t(u32, QED_RDMA_MAX_CNQ_SIZE, QEDR_ROCE_MAX_CNQ_SIZE);
+ params.num_elems = min_t(u32, QED_RDMA_MAX_CNQ_SIZE,
+ QEDR_ROCE_MAX_CNQ_SIZE);
+
for (i = 0; i < dev->num_cnq; i++) {
cnq = &dev->cnq_array[i];
@@ -269,13 +379,8 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
if (rc)
goto err3;
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_CONSUME,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U16,
- n_entries,
- sizeof(struct regpair *),
- &cnq->pbl);
+ rc = dev->ops->common->chain_alloc(dev->cdev, &cnq->pbl,
+ &params);
if (rc)
goto err4;
@@ -301,81 +406,25 @@ err3:
kfree(dev->cnq_array);
err2:
kfree(dev->sb_array);
+err_destroy_wq:
+ if (IS_IWARP(dev))
+ destroy_workqueue(dev->iwarp_wq);
err1:
kfree(dev->sgid_tbl);
return rc;
}
-/* QEDR sysfs interface */
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
-{
- struct qedr_dev *dev = dev_get_drvdata(device);
-
- return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor);
-}
-
-static ssize_t show_hca_type(struct device *device,
- struct device_attribute *attr, char *buf)
-{
- return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET");
-}
-
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
-
-static struct device_attribute *qedr_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_hca_type
-};
-
-static void qedr_remove_sysfiles(struct qedr_dev *dev)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
- device_remove_file(&dev->ibdev.dev, qedr_attributes[i]);
-}
-
static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev)
{
- struct pci_dev *bridge;
- u32 val;
-
- dev->atomic_cap = IB_ATOMIC_NONE;
-
- bridge = pdev->bus->self;
- if (!bridge)
- return;
-
- /* Check whether we are connected directly or via a switch */
- while (bridge && bridge->bus->parent) {
- DP_DEBUG(dev, QEDR_MSG_INIT,
- "Device is not connected directly to root. bridge->bus->number=%d primary=%d\n",
- bridge->bus->number, bridge->bus->primary);
- /* Need to check Atomic Op Routing Supported all the way to
- * root complex.
- */
- pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
- if (!(val & PCI_EXP_DEVCAP2_ATOMIC_ROUTE)) {
- pcie_capability_clear_word(pdev,
- PCI_EXP_DEVCTL2,
- PCI_EXP_DEVCTL2_ATOMIC_REQ);
- return;
- }
- bridge = bridge->bus->parent->self;
- }
- bridge = pdev->bus->self;
+ int rc = pci_enable_atomic_ops_to_root(pdev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64);
- /* according to bridge capability */
- pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
- if (val & PCI_EXP_DEVCAP2_ATOMIC_COMP64) {
- pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2,
- PCI_EXP_DEVCTL2_ATOMIC_REQ);
- dev->atomic_cap = IB_ATOMIC_GLOB;
+ if (rc) {
+ dev->atomic_cap = IB_ATOMIC_NONE;
+ DP_DEBUG(dev, QEDR_MSG_INIT, "Atomic capability disabled\n");
} else {
- pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL2,
- PCI_EXP_DEVCTL2_ATOMIC_REQ);
+ dev->atomic_cap = IB_ATOMIC_GLOB;
+ DP_DEBUG(dev, QEDR_MSG_INIT, "Atomic capability enabled\n");
}
}
@@ -423,14 +472,21 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle)
cq->arm_flags = 0;
- if (cq->ibcq.comp_handler)
+ if (!cq->destroyed && cq->ibcq.comp_handler)
(*cq->ibcq.comp_handler)
(&cq->ibcq, cq->ibcq.cq_context);
+ /* The CQ's CNQ notification counter is checked before
+ * destroying the CQ in a busy-wait loop that waits for all of
+ * the CQ's CNQ interrupts to be processed. It is increased
+ * here, only after the completion handler, to ensure that
+ * the handler is not running when the CQ is destroyed.
+ */
+ cq->cnq_notif++;
+
sw_comp_cons = qed_chain_get_cons_idx(&cnq->pbl);
cnq->n_comp++;
-
}
qed_ops->rdma_cnq_prod_update(cnq->dev->rdma_ctx, cnq->index,
@@ -444,12 +500,13 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle)
static void qedr_sync_free_irqs(struct qedr_dev *dev)
{
u32 vector;
+ u16 idx;
int i;
for (i = 0; i < dev->int_info.used_cnt; i++) {
if (dev->int_info.msix_cnt) {
- vector = dev->int_info.msix[i * dev->num_hwfns].vector;
- synchronize_irq(vector);
+ idx = i * dev->num_hwfns + dev->affin_hwfn_idx;
+ vector = dev->int_info.msix[idx].vector;
free_irq(vector, &dev->cnq_array[i]);
}
}
@@ -460,6 +517,7 @@ static void qedr_sync_free_irqs(struct qedr_dev *dev)
static int qedr_req_msix_irqs(struct qedr_dev *dev)
{
int i, rc = 0;
+ u16 idx;
if (dev->num_cnq > dev->int_info.msix_cnt) {
DP_ERR(dev,
@@ -469,7 +527,8 @@ static int qedr_req_msix_irqs(struct qedr_dev *dev)
}
for (i = 0; i < dev->num_cnq; i++) {
- rc = request_irq(dev->int_info.msix[i * dev->num_hwfns].vector,
+ idx = i * dev->num_hwfns + dev->affin_hwfn_idx;
+ rc = request_irq(dev->int_info.msix[idx].vector,
qedr_irq_handler, 0, dev->cnq_array[i].name,
&dev->cnq_array[i]);
if (rc) {
@@ -527,7 +586,7 @@ static int qedr_set_device_attr(struct qedr_dev *dev)
qed_attr = dev->ops->rdma_query_device(dev->rdma_ctx);
/* Part 2 - check capabilities */
- page_size = ~dev->attr.page_size_caps + 1;
+ page_size = ~qed_attr->page_size_caps + 1;
if (page_size > PAGE_SIZE) {
DP_ERR(dev,
"Kernel PAGE_SIZE is %ld which is smaller than minimum page size (%d) required by qedr\n",
@@ -558,7 +617,6 @@ static int qedr_set_device_attr(struct qedr_dev *dev)
attr->max_mr_size = qed_attr->max_mr_size;
attr->max_cqe = min_t(u64, qed_attr->max_cqe, QEDR_MAX_CQES);
attr->max_mw = qed_attr->max_mw;
- attr->max_fmr = qed_attr->max_fmr;
attr->max_mr_mw_fmr_pbl = qed_attr->max_mr_mw_fmr_pbl;
attr->max_mr_mw_fmr_size = qed_attr->max_mr_mw_fmr_size;
attr->max_pd = qed_attr->max_pd;
@@ -576,54 +634,92 @@ static int qedr_set_device_attr(struct qedr_dev *dev)
return 0;
}
-void qedr_unaffiliated_event(void *context,
- u8 event_code)
+static void qedr_unaffiliated_event(void *context, u8 event_code)
{
pr_err("unaffiliated event not implemented yet\n");
}
-void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
+static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
{
#define EVENT_TYPE_NOT_DEFINED 0
#define EVENT_TYPE_CQ 1
#define EVENT_TYPE_QP 2
+#define EVENT_TYPE_SRQ 3
struct qedr_dev *dev = (struct qedr_dev *)context;
- union event_ring_data *data = fw_handle;
- u64 roce_handle64 = ((u64)data->roce_handle.hi << 32) +
- data->roce_handle.lo;
+ struct regpair *async_handle = (struct regpair *)fw_handle;
+ u64 roce_handle64 = ((u64) async_handle->hi << 32) + async_handle->lo;
u8 event_type = EVENT_TYPE_NOT_DEFINED;
struct ib_event event;
+ struct ib_srq *ibsrq;
+ struct qedr_srq *srq;
+ unsigned long flags;
struct ib_cq *ibcq;
struct ib_qp *ibqp;
struct qedr_cq *cq;
struct qedr_qp *qp;
+ u16 srq_id;
- switch (e_code) {
- case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR:
- event.event = IB_EVENT_CQ_ERR;
- event_type = EVENT_TYPE_CQ;
- break;
- case ROCE_ASYNC_EVENT_SQ_DRAINED:
- event.event = IB_EVENT_SQ_DRAINED;
- event_type = EVENT_TYPE_QP;
- break;
- case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR:
- event.event = IB_EVENT_QP_FATAL;
- event_type = EVENT_TYPE_QP;
- break;
- case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR:
- event.event = IB_EVENT_QP_REQ_ERR;
- event_type = EVENT_TYPE_QP;
- break;
- case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR:
- event.event = IB_EVENT_QP_ACCESS_ERR;
- event_type = EVENT_TYPE_QP;
- break;
- default:
+ if (IS_ROCE(dev)) {
+ switch (e_code) {
+ case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR:
+ event.event = IB_EVENT_CQ_ERR;
+ event_type = EVENT_TYPE_CQ;
+ break;
+ case ROCE_ASYNC_EVENT_SQ_DRAINED:
+ event.event = IB_EVENT_SQ_DRAINED;
+ event_type = EVENT_TYPE_QP;
+ break;
+ case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR:
+ event.event = IB_EVENT_QP_FATAL;
+ event_type = EVENT_TYPE_QP;
+ break;
+ case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ event_type = EVENT_TYPE_QP;
+ break;
+ case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ event_type = EVENT_TYPE_QP;
+ break;
+ case ROCE_ASYNC_EVENT_SRQ_LIMIT:
+ event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ event_type = EVENT_TYPE_SRQ;
+ break;
+ case ROCE_ASYNC_EVENT_SRQ_EMPTY:
+ event.event = IB_EVENT_SRQ_ERR;
+ event_type = EVENT_TYPE_SRQ;
+ break;
+ case ROCE_ASYNC_EVENT_XRC_DOMAIN_ERR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ event_type = EVENT_TYPE_QP;
+ break;
+ case ROCE_ASYNC_EVENT_INVALID_XRCETH_ERR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ event_type = EVENT_TYPE_QP;
+ break;
+ case ROCE_ASYNC_EVENT_XRC_SRQ_CATASTROPHIC_ERR:
+ event.event = IB_EVENT_CQ_ERR;
+ event_type = EVENT_TYPE_CQ;
+ break;
+ default:
+ DP_ERR(dev, "unsupported event %d on handle=%llx\n",
+ e_code, roce_handle64);
+ }
+ } else {
+ switch (e_code) {
+ case QED_IWARP_EVENT_SRQ_LIMIT:
+ event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ event_type = EVENT_TYPE_SRQ;
+ break;
+ case QED_IWARP_EVENT_SRQ_EMPTY:
+ event.event = IB_EVENT_SRQ_ERR;
+ event_type = EVENT_TYPE_SRQ;
+ break;
+ default:
DP_ERR(dev, "unsupported event %d on handle=%llx\n", e_code,
roce_handle64);
+ }
}
-
switch (event_type) {
case EVENT_TYPE_CQ:
cq = (struct qedr_cq *)(uintptr_t)roce_handle64;
@@ -639,7 +735,7 @@ void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
"Error: CQ event with NULL pointer ibcq. Handle=%llx\n",
roce_handle64);
}
- DP_ERR(dev, "CQ event %d on hanlde %p\n", e_code, cq);
+ DP_ERR(dev, "CQ event %d on handle %p\n", e_code, cq);
break;
case EVENT_TYPE_QP:
qp = (struct qedr_qp *)(uintptr_t)roce_handle64;
@@ -655,7 +751,27 @@ void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
"Error: QP event with NULL pointer ibqp. Handle=%llx\n",
roce_handle64);
}
- DP_ERR(dev, "QP event %d on hanlde %p\n", e_code, qp);
+ DP_ERR(dev, "QP event %d on handle %p\n", e_code, qp);
+ break;
+ case EVENT_TYPE_SRQ:
+ srq_id = (u16)roce_handle64;
+ xa_lock_irqsave(&dev->srqs, flags);
+ srq = xa_load(&dev->srqs, srq_id);
+ if (srq) {
+ ibsrq = &srq->ibsrq;
+ if (ibsrq->event_handler) {
+ event.device = ibsrq->device;
+ event.element.srq = ibsrq;
+ ibsrq->event_handler(&event,
+ ibsrq->srq_context);
+ }
+ } else {
+ DP_NOTICE(dev,
+ "SRQ event with NULL pointer ibsrq. Handle=%llx\n",
+ roce_handle64);
+ }
+ xa_unlock_irqrestore(&dev->srqs, flags);
+ DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq);
break;
default:
break;
@@ -697,6 +813,7 @@ static int qedr_init_hw(struct qedr_dev *dev)
in_params->events = &events;
in_params->cq_mode = QED_RDMA_CQ_MODE_32_BITS;
in_params->max_mtu = dev->ndev->mtu;
+ dev->iwarp_max_mtu = dev->ndev->mtu;
ether_addr_copy(&in_params->mac_addr[0], dev->ndev->dev_addr);
rc = dev->ops->rdma_init(dev->cdev, in_params);
@@ -707,7 +824,7 @@ static int qedr_init_hw(struct qedr_dev *dev)
if (rc)
goto out;
- dev->db_addr = (void *)(uintptr_t)out_params.dpi_addr;
+ dev->db_addr = out_params.dpi_addr;
dev->db_phys_addr = out_params.dpi_phys_addr;
dev->db_size = out_params.dpi_size;
dev->dpi = out_params.dpi;
@@ -721,7 +838,7 @@ out:
return rc;
}
-void qedr_stop_hw(struct qedr_dev *dev)
+static void qedr_stop_hw(struct qedr_dev *dev)
{
dev->ops->rdma_remove_user(dev->rdma_ctx, dev->dpi);
dev->ops->rdma_stop(dev->rdma_ctx);
@@ -732,9 +849,9 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
{
struct qed_dev_rdma_info dev_info;
struct qedr_dev *dev;
- int rc = 0, i;
+ int rc = 0;
- dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev));
+ dev = ib_alloc_device(qedr_dev, ibdev);
if (!dev) {
pr_err("Unable to allocate ib device\n");
return NULL;
@@ -757,12 +874,25 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
if (rc)
goto init_err;
+ dev->user_dpm_enabled = dev_info.user_dpm_enabled;
+ dev->rdma_type = dev_info.rdma_type;
dev->num_hwfns = dev_info.common.num_hwfns;
+
+ if (IS_IWARP(dev) && QEDR_IS_CMT(dev)) {
+ rc = dev->ops->iwarp_set_engine_affin(cdev, false);
+ if (rc) {
+ DP_ERR(dev, "iWARP is disabled over a 100g device Enabling it may impact L2 performance. To enable it run devlink dev param set <dev> name iwarp_cmt value true cmode runtime\n");
+ goto init_err;
+ }
+ }
+ dev->affin_hwfn_idx = dev->ops->common->get_affin_hwfn_idx(cdev);
+
dev->rdma_ctx = dev->ops->rdma_get_rdma_ctx(cdev);
dev->num_cnq = dev->ops->rdma_get_min_cnq_msix(cdev);
if (!dev->num_cnq) {
- DP_ERR(dev, "not enough CNQ resources.\n");
+ DP_ERR(dev, "Failed. At least one CNQ is required.\n");
+ rc = -ENOMEM;
goto init_err;
}
@@ -788,15 +918,12 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
goto reg_err;
}
- for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
- if (device_create_file(&dev->ibdev.dev, qedr_attributes[i]))
- goto sysfs_err;
+ if (!test_and_set_bit(QEDR_ENET_STATE_BIT, &dev->enet_state))
+ qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ACTIVE);
DP_DEBUG(dev, QEDR_MSG_INIT, "qedr driver loaded successfully\n");
return dev;
-sysfs_err:
- ib_unregister_device(&dev->ibdev);
reg_err:
qedr_sync_free_irqs(dev);
irq_err:
@@ -815,20 +942,22 @@ static void qedr_remove(struct qedr_dev *dev)
/* First unregister with stack to stop all the active traffic
* of the registered clients.
*/
- qedr_remove_sysfiles(dev);
ib_unregister_device(&dev->ibdev);
qedr_stop_hw(dev);
qedr_sync_free_irqs(dev);
qedr_free_resources(dev);
+
+ if (IS_IWARP(dev) && QEDR_IS_CMT(dev))
+ dev->ops->iwarp_set_engine_affin(dev->cdev, true);
+
ib_dealloc_device(&dev->ibdev);
}
-static int qedr_close(struct qedr_dev *dev)
+static void qedr_close(struct qedr_dev *dev)
{
- qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
-
- return 0;
+ if (test_and_clear_bit(QEDR_ENET_STATE_BIT, &dev->enet_state))
+ qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ERR);
}
static void qedr_shutdown(struct qedr_dev *dev)
@@ -837,6 +966,12 @@ static void qedr_shutdown(struct qedr_dev *dev)
qedr_remove(dev);
}
+static void qedr_open(struct qedr_dev *dev)
+{
+ if (!test_and_set_bit(QEDR_ENET_STATE_BIT, &dev->enet_state))
+ qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_PORT_ACTIVE);
+}
+
static void qedr_mac_address_change(struct qedr_dev *dev)
{
union ib_gid *sgid = &dev->sgid_tbl[0];
@@ -857,13 +992,13 @@ static void qedr_mac_address_change(struct qedr_dev *dev)
memcpy(&sgid->raw[8], guid, sizeof(guid));
/* Update LL2 */
- rc = dev->ops->roce_ll2_set_mac_filter(dev->cdev,
- dev->gsi_ll2_mac_address,
- dev->ndev->dev_addr);
+ rc = dev->ops->ll2_set_mac_filter(dev->cdev,
+ dev->gsi_ll2_mac_address,
+ dev->ndev->dev_addr);
ether_addr_copy(dev->gsi_ll2_mac_address, dev->ndev->dev_addr);
- qedr_ib_dispatch_event(dev, 1, IB_EVENT_GID_CHANGE);
+ qedr_ib_dispatch_event(dev, QEDR_PORT, IB_EVENT_GID_CHANGE);
if (rc)
DP_ERR(dev, "Error updating mac filter\n");
@@ -873,11 +1008,11 @@ static void qedr_mac_address_change(struct qedr_dev *dev)
* initialization done before RoCE driver notifies
* event to stack.
*/
-static void qedr_notify(struct qedr_dev *dev, enum qede_roce_event event)
+static void qedr_notify(struct qedr_dev *dev, enum qede_rdma_event event)
{
switch (event) {
case QEDE_UP:
- qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
+ qedr_open(dev);
break;
case QEDE_DOWN:
qedr_close(dev);
@@ -888,6 +1023,13 @@ static void qedr_notify(struct qedr_dev *dev, enum qede_roce_event event)
case QEDE_CHANGE_ADDR:
qedr_mac_address_change(dev);
break;
+ case QEDE_CHANGE_MTU:
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ if (dev->ndev->mtu != dev->iwarp_max_mtu)
+ DP_NOTICE(dev,
+ "Mtu was changed from %d to %d. This will not take affect for iWARP until qedr is reloaded\n",
+ dev->iwarp_max_mtu, dev->ndev->mtu);
+ break;
default:
pr_err("Event not supported\n");
}
@@ -902,12 +1044,12 @@ static struct qedr_driver qedr_drv = {
static int __init qedr_init_module(void)
{
- return qede_roce_register_driver(&qedr_drv);
+ return qede_rdma_register_driver(&qedr_drv);
}
static void __exit qedr_exit_module(void)
{
- qede_roce_unregister_driver(&qedr_drv);
+ qede_rdma_unregister_driver(&qedr_drv);
}
module_init(qedr_init_module);
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 620badd7d4fb..db9ef3e1eb97 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -33,16 +33,20 @@
#define __QEDR_H__
#include <linux/pci.h>
+#include <linux/xarray.h>
#include <rdma/ib_addr.h>
#include <linux/qed/qed_if.h>
#include <linux/qed/qed_chain.h>
-#include <linux/qed/qed_roce_if.h>
-#include <linux/qed/qede_roce.h>
-#include "qedr_hsi.h"
+#include <linux/qed/qed_rdma_if.h>
+#include <linux/qed/qede_rdma.h>
+#include <linux/qed/roce_common.h>
+#include <linux/completion.h>
+#include "qedr_hsi_rdma.h"
-#define QEDR_MODULE_VERSION "8.10.10.0"
#define QEDR_NODE_DESC "QLogic 579xx RoCE HCA"
-#define DP_NAME(dev) ((dev)->ibdev.name)
+#define DP_NAME(_dev) dev_name(&(_dev)->ibdev.dev)
+#define IS_IWARP(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_IWARP)
+#define IS_ROCE(_dev) ((_dev)->rdma_type == QED_RDMA_TYPE_ROCE)
#define DP_DEBUG(dev, module, fmt, ...) \
pr_debug("(%s) " module ": " fmt, \
@@ -55,9 +59,14 @@
#define QEDR_MSG_RQ " RQ"
#define QEDR_MSG_SQ " SQ"
#define QEDR_MSG_QP " QP"
+#define QEDR_MSG_SRQ " SRQ"
#define QEDR_MSG_GSI " GSI"
+#define QEDR_MSG_IWARP " IW"
-#define QEDR_CQ_MAGIC_NUMBER (0x11223344)
+#define QEDR_CQ_MAGIC_NUMBER (0x11223344)
+
+#define FW_PAGE_SIZE (RDMA_RING_PAGE_SIZE)
+#define FW_PAGE_SHIFT (12)
struct qedr_dev;
@@ -94,7 +103,6 @@ struct qedr_device_attr {
u64 max_mr_size;
u32 max_cqe;
u32 max_mw;
- u32 max_fmr;
u32 max_mr_mw_fmr_pbl;
u64 max_mr_mw_fmr_size;
u32 max_pd;
@@ -113,6 +121,8 @@ struct qedr_device_attr {
struct qed_rdma_events events;
};
+#define QEDR_ENET_STATE_BIT (0)
+
struct qedr_dev {
struct ib_device ibdev;
struct qed_dev *cdev;
@@ -147,12 +157,25 @@ struct qedr_dev {
u32 dp_module;
u8 dp_level;
u8 num_hwfns;
+#define QEDR_IS_CMT(dev) ((dev)->num_hwfns > 1)
+ u8 affin_hwfn_idx;
+ u8 gsi_ll2_handle;
+
uint wq_multiplier;
u8 gsi_ll2_mac_address[ETH_ALEN];
int gsi_qp_created;
struct qedr_cq *gsi_sqcq;
struct qedr_cq *gsi_rqcq;
struct qedr_qp *gsi_qp;
+ enum qed_rdma_type rdma_type;
+ struct xarray qps;
+ struct xarray srqs;
+ struct workqueue_struct *iwarp_wq;
+ u16 iwarp_max_mtu;
+
+ unsigned long enet_state;
+
+ u8 user_dpm_enabled;
};
#define QEDR_MAX_SQ_PBL (0x8000)
@@ -188,6 +211,7 @@ struct qedr_dev {
#define QEDR_ROCE_MAX_CNQ_SIZE (0x4000)
#define QEDR_MAX_PORT (1)
+#define QEDR_PORT (1)
#define QEDR_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
@@ -205,15 +229,18 @@ struct qedr_ucontext {
struct ib_ucontext ibucontext;
struct qedr_dev *dev;
struct qedr_pd *pd;
- u64 dpi_addr;
+ void __iomem *dpi_addr;
+ struct rdma_user_mmap_entry *db_mmap_entry;
u64 dpi_phys_addr;
u32 dpi_size;
u16 dpi;
+ bool db_rec;
+ u8 edpm_mode;
+};
- struct list_head mm_head;
-
- /* Lock to protect mm list */
- struct mutex mm_list_lock;
+union db_prod32 {
+ struct rdma_pwm_val16_data data;
+ u32 raw;
};
union db_prod64 {
@@ -241,6 +268,13 @@ struct qedr_userq {
struct qedr_pbl *pbl_tbl;
u64 buf_addr;
size_t buf_len;
+
+ /* doorbell recovery */
+ void __iomem *db_addr;
+ struct qedr_user_db_rec *db_rec_data;
+ struct rdma_user_mmap_entry *db_mmap_entry;
+ void __iomem *db_rec_db2_addr;
+ union db_prod32 db_rec_db2_data;
};
struct qedr_cq {
@@ -251,9 +285,6 @@ struct qedr_cq {
u16 icid;
- /* Lock to protect completion handler */
- spinlock_t comp_handler_lock;
-
/* Lock to protect multiplem CQ's */
spinlock_t cq_lock;
u8 arm_flags;
@@ -269,6 +300,8 @@ struct qedr_cq {
u32 cq_cons;
struct qedr_userq q;
+ u8 destroyed;
+ u16 cnq_notif;
};
struct qedr_pd {
@@ -277,17 +310,9 @@ struct qedr_pd {
struct qedr_ucontext *uctx;
};
-struct qedr_mm {
- struct {
- u64 phy_addr;
- unsigned long len;
- } key;
- struct list_head entry;
-};
-
-union db_prod32 {
- struct rdma_pwm_val16_data data;
- u32 raw;
+struct qedr_xrcd {
+ struct ib_xrcd ibxrcd;
+ u16 xrcd_id;
};
struct qedr_qp_hwq_info {
@@ -306,6 +331,9 @@ struct qedr_qp_hwq_info {
/* DB */
void __iomem *db;
union db_prod32 db_data;
+
+ void __iomem *iwarp_db2;
+ union db_prod32 iwarp_db2_data;
};
#define QEDR_INC_SW_IDX(p_info, index) \
@@ -314,6 +342,35 @@ struct qedr_qp_hwq_info {
qed_chain_get_capacity(p_info->pbl) \
} while (0)
+struct qedr_srq_hwq_info {
+ u32 max_sges;
+ u32 max_wr;
+ struct qed_chain pbl;
+ u64 p_phys_addr_tbl;
+ u32 wqe_prod;
+ u32 sge_prod;
+ u32 wr_prod_cnt;
+ atomic_t wr_cons_cnt;
+ u32 num_elems;
+
+ struct rdma_srq_producers *virt_prod_pair_addr;
+ dma_addr_t phy_prod_pair_addr;
+};
+
+struct qedr_srq {
+ struct ib_srq ibsrq;
+ struct qedr_dev *dev;
+
+ struct qedr_userq usrq;
+ struct qedr_srq_hwq_info hw_srq;
+ struct ib_umem *prod_umem;
+ u16 srq_id;
+ u32 srq_limit;
+ bool is_xrc;
+ /* lock to protect srq recv post */
+ spinlock_t lock;
+};
+
enum qedr_qp_err_bitmap {
QEDR_QP_ERR_SQ_FULL = 1,
QEDR_QP_ERR_RQ_FULL = 2,
@@ -323,10 +380,20 @@ enum qedr_qp_err_bitmap {
QEDR_QP_ERR_RQ_PBL_FULL = 32,
};
+enum qedr_qp_create_type {
+ QEDR_QP_CREATE_NONE,
+ QEDR_QP_CREATE_USER,
+ QEDR_QP_CREATE_KERNEL,
+};
+
+enum qedr_iwarp_cm_flags {
+ QEDR_IWARP_CM_WAIT_FOR_CONNECT = BIT(0),
+ QEDR_IWARP_CM_WAIT_FOR_DISCONNECT = BIT(1),
+};
+
struct qedr_qp {
struct ib_qp ibqp; /* must be first */
struct qedr_dev *dev;
-
struct qedr_qp_hwq_info sq;
struct qedr_qp_hwq_info rq;
@@ -341,6 +408,7 @@ struct qedr_qp {
u32 id;
struct qedr_pd *pd;
enum ib_qp_type qp_type;
+ enum qedr_qp_create_type create_type;
struct qed_rdma_qp *qed_qp;
u32 qp_id;
u16 icid;
@@ -350,6 +418,7 @@ struct qedr_qp {
u32 sq_psn;
u32 qkey;
u32 dest_qp_num;
+ u8 timeout;
/* Relevant to qps created from kernel space only (ULPs) */
u8 prev_wqe_size;
@@ -376,18 +445,24 @@ struct qedr_qp {
u8 wqe_size;
u8 smac[ETH_ALEN];
- u16 vlan_id;
+ u16 vlan;
int rc;
} *rqe_wr_id;
/* Relevant to qps created from user space only (applications) */
struct qedr_userq usq;
struct qedr_userq urq;
+
+ /* synchronization objects used with iwarp ep */
+ struct kref refcnt;
+ struct completion iwarp_cm_comp;
+ struct completion qp_rel_comp;
+ unsigned long iwarp_cm_flags; /* enum iwarp_cm_flags */
};
struct qedr_ah {
struct ib_ah ibah;
- struct ib_ah_attr attr;
+ struct rdma_ah_attr attr;
};
enum qedr_mr_type {
@@ -420,13 +495,26 @@ struct qedr_mr {
u32 npages;
};
+struct qedr_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ struct qedr_dev *dev;
+ union {
+ u64 io_address;
+ void *address;
+ };
+ size_t length;
+ u16 dpi;
+ u8 mmap_flag;
+};
+
#define SET_FIELD2(value, name, flag) ((value) |= ((flag) << (name ## _SHIFT)))
#define QEDR_RESP_IMM (RDMA_CQE_RESPONDER_IMM_FLG_MASK << \
RDMA_CQE_RESPONDER_IMM_FLG_SHIFT)
#define QEDR_RESP_RDMA (RDMA_CQE_RESPONDER_RDMA_FLG_MASK << \
RDMA_CQE_RESPONDER_RDMA_FLG_SHIFT)
-#define QEDR_RESP_RDMA_IMM (QEDR_RESP_IMM | QEDR_RESP_RDMA)
+#define QEDR_RESP_INV (RDMA_CQE_RESPONDER_INV_FLG_MASK << \
+ RDMA_CQE_RESPONDER_INV_FLG_SHIFT)
static inline void qedr_inc_sw_cons(struct qedr_qp_hwq_info *info)
{
@@ -440,23 +528,43 @@ static inline void qedr_inc_sw_prod(struct qedr_qp_hwq_info *info)
}
static inline int qedr_get_dmac(struct qedr_dev *dev,
- struct ib_ah_attr *ah_attr, u8 *mac_addr)
+ struct rdma_ah_attr *ah_attr, u8 *mac_addr)
{
union ib_gid zero_sgid = { { 0 } };
struct in6_addr in6;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+ u8 *dmac;
- if (!memcmp(&ah_attr->grh.dgid, &zero_sgid, sizeof(union ib_gid))) {
+ if (!memcmp(&grh->dgid, &zero_sgid, sizeof(union ib_gid))) {
DP_ERR(dev, "Local port GID not supported\n");
eth_zero_addr(mac_addr);
return -EINVAL;
}
- memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
- ether_addr_copy(mac_addr, ah_attr->dmac);
+ memcpy(&in6, grh->dgid.raw, sizeof(in6));
+ dmac = rdma_ah_retrieve_dmac(ah_attr);
+ if (!dmac)
+ return -EINVAL;
+ ether_addr_copy(mac_addr, dmac);
return 0;
}
+struct qedr_iw_listener {
+ struct qedr_dev *dev;
+ struct iw_cm_id *cm_id;
+ int backlog;
+ void *qed_handle;
+};
+
+struct qedr_iw_ep {
+ struct qedr_dev *dev;
+ struct iw_cm_id *cm_id;
+ struct qedr_qp *qp;
+ void *qed_context;
+ struct kref refcnt;
+};
+
static inline
struct qedr_ucontext *get_qedr_ucontext(struct ib_ucontext *ibucontext)
{
@@ -473,6 +581,11 @@ static inline struct qedr_pd *get_qedr_pd(struct ib_pd *ibpd)
return container_of(ibpd, struct qedr_pd, ibpd);
}
+static inline struct qedr_xrcd *get_qedr_xrcd(struct ib_xrcd *ibxrcd)
+{
+ return container_of(ibxrcd, struct qedr_xrcd, ibxrcd);
+}
+
static inline struct qedr_cq *get_qedr_cq(struct ib_cq *ibcq)
{
return container_of(ibcq, struct qedr_cq, ibcq);
@@ -492,4 +605,38 @@ static inline struct qedr_mr *get_qedr_mr(struct ib_mr *ibmr)
{
return container_of(ibmr, struct qedr_mr, ibmr);
}
+
+static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct qedr_srq, ibsrq);
+}
+
+static inline bool qedr_qp_has_srq(struct qedr_qp *qp)
+{
+ return qp->srq;
+}
+
+static inline bool qedr_qp_has_sq(struct qedr_qp *qp)
+{
+ if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_TGT)
+ return false;
+
+ return true;
+}
+
+static inline bool qedr_qp_has_rq(struct qedr_qp *qp)
+{
+ if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_INI ||
+ qp->qp_type == IB_QPT_XRC_TGT || qedr_qp_has_srq(qp))
+ return false;
+
+ return true;
+}
+
+static inline struct qedr_user_mmap_entry *
+get_qedr_mmap_entry(struct rdma_user_mmap_entry *rdma_entry)
+{
+ return container_of(rdma_entry, struct qedr_user_mmap_entry,
+ rdma_entry);
+}
#endif
diff --git a/drivers/infiniband/hw/qedr/qedr_hsi.h b/drivers/infiniband/hw/qedr/qedr_hsi.h
deleted file mode 100644
index 66d27521373f..000000000000
--- a/drivers/infiniband/hw/qedr/qedr_hsi.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* QLogic qedr NIC Driver
- * Copyright (c) 2015-2016 QLogic Corporation
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and /or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __QED_HSI_ROCE__
-#define __QED_HSI_ROCE__
-
-#include <linux/qed/common_hsi.h>
-#include <linux/qed/roce_common.h>
-#include "qedr_hsi_rdma.h"
-
-/* Affiliated asynchronous events / errors enumeration */
-enum roce_async_events_type {
- ROCE_ASYNC_EVENT_NONE = 0,
- ROCE_ASYNC_EVENT_COMM_EST = 1,
- ROCE_ASYNC_EVENT_SQ_DRAINED,
- ROCE_ASYNC_EVENT_SRQ_LIMIT,
- ROCE_ASYNC_EVENT_LAST_WQE_REACHED,
- ROCE_ASYNC_EVENT_CQ_ERR,
- ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR,
- ROCE_ASYNC_EVENT_LOCAL_CATASTROPHIC_ERR,
- ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR,
- ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR,
- ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR,
- ROCE_ASYNC_EVENT_SRQ_EMPTY,
- MAX_ROCE_ASYNC_EVENTS_TYPE
-};
-
-#endif /* __QED_HSI_ROCE__ */
diff --git a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
index 5c98d2055cad..228dd7d49622 100644
--- a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
+++ b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
@@ -45,7 +45,7 @@ struct rdma_cqe_responder {
__le32 imm_data_or_inv_r_Key;
__le32 length;
__le32 imm_data_hi;
- __le16 rq_cons;
+ __le16 rq_cons_or_srq_id;
u8 flags;
#define RDMA_CQE_RESPONDER_TOGGLE_BIT_MASK 0x1
#define RDMA_CQE_RESPONDER_TOGGLE_BIT_SHIFT 0
@@ -115,6 +115,8 @@ enum rdma_cqe_requester_status_enum {
RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR,
RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR,
RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR,
+ RDMA_CQE_REQ_STS_XRC_VOILATION_ERR,
+ RDMA_CQE_REQ_STS_SIG_ERR,
MAX_RDMA_CQE_REQUESTER_STATUS_ENUM
};
@@ -136,6 +138,7 @@ enum rdma_cqe_type {
RDMA_CQE_TYPE_REQUESTER,
RDMA_CQE_TYPE_RESPONDER_RQ,
RDMA_CQE_TYPE_RESPONDER_SRQ,
+ RDMA_CQE_TYPE_RESPONDER_XRC_SRQ,
RDMA_CQE_TYPE_INVALID,
MAX_RDMA_CQE_TYPE
};
@@ -150,12 +153,18 @@ struct rdma_rq_sge {
struct regpair addr;
__le32 length;
__le32 flags;
-#define RDMA_RQ_SGE_L_KEY_MASK 0x3FFFFFF
-#define RDMA_RQ_SGE_L_KEY_SHIFT 0
+#define RDMA_RQ_SGE_L_KEY_LO_MASK 0x3FFFFFF
+#define RDMA_RQ_SGE_L_KEY_LO_SHIFT 0
#define RDMA_RQ_SGE_NUM_SGES_MASK 0x7
#define RDMA_RQ_SGE_NUM_SGES_SHIFT 26
-#define RDMA_RQ_SGE_RESERVED0_MASK 0x7
-#define RDMA_RQ_SGE_RESERVED0_SHIFT 29
+#define RDMA_RQ_SGE_L_KEY_HI_MASK 0x7
+#define RDMA_RQ_SGE_L_KEY_HI_SHIFT 29
+};
+
+struct rdma_srq_wqe_header {
+ struct regpair wr_id;
+ u8 num_sges /* number of SGEs in WQE */;
+ u8 reserved2[7];
};
struct rdma_srq_sge {
@@ -164,6 +173,18 @@ struct rdma_srq_sge {
__le32 l_key;
};
+union rdma_srq_elm {
+ struct rdma_srq_wqe_header header;
+ struct rdma_srq_sge sge;
+};
+
+/* Rdma doorbell data for flags update */
+struct rdma_pwm_flags_data {
+ __le16 icid; /* internal CID */
+ u8 agg_flags; /* aggregative flags */
+ u8 reserved;
+};
+
/* Rdma doorbell data for SQ and RQ */
struct rdma_pwm_val16_data {
__le16 icid;
@@ -180,12 +201,16 @@ struct rdma_pwm_val32_data {
__le16 icid;
u8 agg_flags;
u8 params;
-#define RDMA_PWM_VAL32_DATA_AGG_CMD_MASK 0x3
-#define RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT 0
-#define RDMA_PWM_VAL32_DATA_BYPASS_EN_MASK 0x1
-#define RDMA_PWM_VAL32_DATA_BYPASS_EN_SHIFT 2
-#define RDMA_PWM_VAL32_DATA_RESERVED_MASK 0x1F
-#define RDMA_PWM_VAL32_DATA_RESERVED_SHIFT 3
+#define RDMA_PWM_VAL32_DATA_AGG_CMD_MASK 0x3
+#define RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT 0
+#define RDMA_PWM_VAL32_DATA_BYPASS_EN_MASK 0x1
+#define RDMA_PWM_VAL32_DATA_BYPASS_EN_SHIFT 2
+#define RDMA_PWM_VAL32_DATA_CONN_TYPE_IS_IWARP_MASK 0x1
+#define RDMA_PWM_VAL32_DATA_CONN_TYPE_IS_IWARP_SHIFT 3
+#define RDMA_PWM_VAL32_DATA_SET_16B_VAL_MASK 0x1
+#define RDMA_PWM_VAL32_DATA_SET_16B_VAL_SHIFT 4
+#define RDMA_PWM_VAL32_DATA_RESERVED_MASK 0x7
+#define RDMA_PWM_VAL32_DATA_RESERVED_SHIFT 5
__le32 value;
};
@@ -228,18 +253,39 @@ enum rdma_dif_io_direction_flg {
MAX_RDMA_DIF_IO_DIRECTION_FLG
};
-/* RDMA DIF Runt Result Structure */
-struct rdma_dif_runt_result {
- __le16 guard_tag;
- __le16 reserved[3];
+struct rdma_dif_params {
+ __le32 base_ref_tag;
+ __le16 app_tag;
+ __le16 app_tag_mask;
+ __le16 runt_crc_value;
+ __le16 flags;
+#define RDMA_DIF_PARAMS_IO_DIRECTION_FLG_MASK 0x1
+#define RDMA_DIF_PARAMS_IO_DIRECTION_FLG_SHIFT 0
+#define RDMA_DIF_PARAMS_BLOCK_SIZE_MASK 0x1
+#define RDMA_DIF_PARAMS_BLOCK_SIZE_SHIFT 1
+#define RDMA_DIF_PARAMS_RUNT_VALID_FLG_MASK 0x1
+#define RDMA_DIF_PARAMS_RUNT_VALID_FLG_SHIFT 2
+#define RDMA_DIF_PARAMS_VALIDATE_CRC_GUARD_MASK 0x1
+#define RDMA_DIF_PARAMS_VALIDATE_CRC_GUARD_SHIFT 3
+#define RDMA_DIF_PARAMS_VALIDATE_REF_TAG_MASK 0x1
+#define RDMA_DIF_PARAMS_VALIDATE_REF_TAG_SHIFT 4
+#define RDMA_DIF_PARAMS_VALIDATE_APP_TAG_MASK 0x1
+#define RDMA_DIF_PARAMS_VALIDATE_APP_TAG_SHIFT 5
+#define RDMA_DIF_PARAMS_CRC_SEED_MASK 0x1
+#define RDMA_DIF_PARAMS_CRC_SEED_SHIFT 6
+#define RDMA_DIF_PARAMS_RX_REF_TAG_CONST_MASK 0x1
+#define RDMA_DIF_PARAMS_RX_REF_TAG_CONST_SHIFT 7
+#define RDMA_DIF_PARAMS_BLOCK_GUARD_TYPE_MASK 0x1
+#define RDMA_DIF_PARAMS_BLOCK_GUARD_TYPE_SHIFT 8
+#define RDMA_DIF_PARAMS_APP_ESCAPE_MASK 0x1
+#define RDMA_DIF_PARAMS_APP_ESCAPE_SHIFT 9
+#define RDMA_DIF_PARAMS_REF_ESCAPE_MASK 0x1
+#define RDMA_DIF_PARAMS_REF_ESCAPE_SHIFT 10
+#define RDMA_DIF_PARAMS_RESERVED4_MASK 0x1F
+#define RDMA_DIF_PARAMS_RESERVED4_SHIFT 11
+ __le32 reserved5;
};
-/* Memory window type enumeration */
-enum rdma_mw_type {
- RDMA_MW_TYPE_1,
- RDMA_MW_TYPE_2A,
- MAX_RDMA_MW_TYPE
-};
struct rdma_sq_atomic_wqe {
__le32 reserved1;
@@ -321,17 +367,17 @@ struct rdma_sq_bind_wqe {
#define RDMA_SQ_BIND_WQE_SE_FLG_SHIFT 3
#define RDMA_SQ_BIND_WQE_INLINE_FLG_MASK 0x1
#define RDMA_SQ_BIND_WQE_INLINE_FLG_SHIFT 4
-#define RDMA_SQ_BIND_WQE_RESERVED0_MASK 0x7
-#define RDMA_SQ_BIND_WQE_RESERVED0_SHIFT 5
+#define RDMA_SQ_BIND_WQE_DIF_ON_HOST_FLG_MASK 0x1
+#define RDMA_SQ_BIND_WQE_DIF_ON_HOST_FLG_SHIFT 5
+#define RDMA_SQ_BIND_WQE_RESERVED0_MASK 0x3
+#define RDMA_SQ_BIND_WQE_RESERVED0_SHIFT 6
u8 wqe_size;
u8 prev_wqe_size;
u8 bind_ctrl;
#define RDMA_SQ_BIND_WQE_ZERO_BASED_MASK 0x1
#define RDMA_SQ_BIND_WQE_ZERO_BASED_SHIFT 0
-#define RDMA_SQ_BIND_WQE_MW_TYPE_MASK 0x1
-#define RDMA_SQ_BIND_WQE_MW_TYPE_SHIFT 1
-#define RDMA_SQ_BIND_WQE_RESERVED1_MASK 0x3F
-#define RDMA_SQ_BIND_WQE_RESERVED1_SHIFT 2
+#define RDMA_SQ_BIND_WQE_RESERVED1_MASK 0x7F
+#define RDMA_SQ_BIND_WQE_RESERVED1_SHIFT 1
u8 access_ctrl;
#define RDMA_SQ_BIND_WQE_REMOTE_READ_MASK 0x1
#define RDMA_SQ_BIND_WQE_REMOTE_READ_SHIFT 0
@@ -350,6 +396,7 @@ struct rdma_sq_bind_wqe {
__le32 length_lo;
__le32 parent_l_key;
__le32 reserved4;
+ struct rdma_dif_params dif_params;
};
/* First element (16 bytes) of bind wqe */
@@ -379,10 +426,8 @@ struct rdma_sq_bind_wqe_2nd {
u8 bind_ctrl;
#define RDMA_SQ_BIND_WQE_2ND_ZERO_BASED_MASK 0x1
#define RDMA_SQ_BIND_WQE_2ND_ZERO_BASED_SHIFT 0
-#define RDMA_SQ_BIND_WQE_2ND_MW_TYPE_MASK 0x1
-#define RDMA_SQ_BIND_WQE_2ND_MW_TYPE_SHIFT 1
-#define RDMA_SQ_BIND_WQE_2ND_RESERVED1_MASK 0x3F
-#define RDMA_SQ_BIND_WQE_2ND_RESERVED1_SHIFT 2
+#define RDMA_SQ_BIND_WQE_2ND_RESERVED1_MASK 0x7F
+#define RDMA_SQ_BIND_WQE_2ND_RESERVED1_SHIFT 1
u8 access_ctrl;
#define RDMA_SQ_BIND_WQE_2ND_REMOTE_READ_MASK 0x1
#define RDMA_SQ_BIND_WQE_2ND_REMOTE_READ_SHIFT 0
@@ -403,6 +448,11 @@ struct rdma_sq_bind_wqe_2nd {
__le32 reserved4;
};
+/* Third element (16 bytes) of bind wqe */
+struct rdma_sq_bind_wqe_3rd {
+ struct rdma_dif_params dif_params;
+};
+
/* Structure with only the SQ WQE common
* fields. Size is of one SQ element (16B)
*/
@@ -473,28 +523,6 @@ struct rdma_sq_fmr_wqe {
u8 length_hi;
__le32 length_lo;
struct regpair pbl_addr;
- __le32 dif_base_ref_tag;
- __le16 dif_app_tag;
- __le16 dif_app_tag_mask;
- __le16 dif_runt_crc_value;
- __le16 dif_flags;
-#define RDMA_SQ_FMR_WQE_DIF_IO_DIRECTION_FLG_MASK 0x1
-#define RDMA_SQ_FMR_WQE_DIF_IO_DIRECTION_FLG_SHIFT 0
-#define RDMA_SQ_FMR_WQE_DIF_BLOCK_SIZE_MASK 0x1
-#define RDMA_SQ_FMR_WQE_DIF_BLOCK_SIZE_SHIFT 1
-#define RDMA_SQ_FMR_WQE_DIF_RUNT_VALID_FLG_MASK 0x1
-#define RDMA_SQ_FMR_WQE_DIF_RUNT_VALID_FLG_SHIFT 2
-#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_CRC_GUARD_MASK 0x1
-#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_CRC_GUARD_SHIFT 3
-#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_REF_TAG_MASK 0x1
-#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_REF_TAG_SHIFT 4
-#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_APP_TAG_MASK 0x1
-#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_APP_TAG_SHIFT 5
-#define RDMA_SQ_FMR_WQE_DIF_CRC_SEED_MASK 0x1
-#define RDMA_SQ_FMR_WQE_DIF_CRC_SEED_SHIFT 6
-#define RDMA_SQ_FMR_WQE_RESERVED4_MASK 0x1FF
-#define RDMA_SQ_FMR_WQE_RESERVED4_SHIFT 7
- __le32 Reserved5;
};
/* First element (16 bytes) of fmr wqe */
@@ -551,31 +579,6 @@ struct rdma_sq_fmr_wqe_2nd {
struct regpair pbl_addr;
};
-/* Third element (16 bytes) of fmr wqe */
-struct rdma_sq_fmr_wqe_3rd {
- __le32 dif_base_ref_tag;
- __le16 dif_app_tag;
- __le16 dif_app_tag_mask;
- __le16 dif_runt_crc_value;
- __le16 dif_flags;
-#define RDMA_SQ_FMR_WQE_3RD_DIF_IO_DIRECTION_FLG_MASK 0x1
-#define RDMA_SQ_FMR_WQE_3RD_DIF_IO_DIRECTION_FLG_SHIFT 0
-#define RDMA_SQ_FMR_WQE_3RD_DIF_BLOCK_SIZE_MASK 0x1
-#define RDMA_SQ_FMR_WQE_3RD_DIF_BLOCK_SIZE_SHIFT 1
-#define RDMA_SQ_FMR_WQE_3RD_DIF_RUNT_VALID_FLG_MASK 0x1
-#define RDMA_SQ_FMR_WQE_3RD_DIF_RUNT_VALID_FLG_SHIFT 2
-#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_CRC_GUARD_MASK 0x1
-#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_CRC_GUARD_SHIFT 3
-#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_REF_TAG_MASK 0x1
-#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_REF_TAG_SHIFT 4
-#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_APP_TAG_MASK 0x1
-#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_APP_TAG_SHIFT 5
-#define RDMA_SQ_FMR_WQE_3RD_DIF_CRC_SEED_MASK 0x1
-#define RDMA_SQ_FMR_WQE_3RD_DIF_CRC_SEED_SHIFT 6
-#define RDMA_SQ_FMR_WQE_3RD_RESERVED4_MASK 0x1FF
-#define RDMA_SQ_FMR_WQE_3RD_RESERVED4_SHIFT 7
- __le32 Reserved5;
-};
struct rdma_sq_local_inv_wqe {
struct regpair reserved;
@@ -606,20 +609,22 @@ struct rdma_sq_rdma_wqe {
__le32 xrc_srq;
u8 req_type;
u8 flags;
-#define RDMA_SQ_RDMA_WQE_COMP_FLG_MASK 0x1
-#define RDMA_SQ_RDMA_WQE_COMP_FLG_SHIFT 0
-#define RDMA_SQ_RDMA_WQE_RD_FENCE_FLG_MASK 0x1
-#define RDMA_SQ_RDMA_WQE_RD_FENCE_FLG_SHIFT 1
-#define RDMA_SQ_RDMA_WQE_INV_FENCE_FLG_MASK 0x1
-#define RDMA_SQ_RDMA_WQE_INV_FENCE_FLG_SHIFT 2
-#define RDMA_SQ_RDMA_WQE_SE_FLG_MASK 0x1
-#define RDMA_SQ_RDMA_WQE_SE_FLG_SHIFT 3
-#define RDMA_SQ_RDMA_WQE_INLINE_FLG_MASK 0x1
-#define RDMA_SQ_RDMA_WQE_INLINE_FLG_SHIFT 4
-#define RDMA_SQ_RDMA_WQE_DIF_ON_HOST_FLG_MASK 0x1
-#define RDMA_SQ_RDMA_WQE_DIF_ON_HOST_FLG_SHIFT 5
-#define RDMA_SQ_RDMA_WQE_RESERVED0_MASK 0x3
-#define RDMA_SQ_RDMA_WQE_RESERVED0_SHIFT 6
+#define RDMA_SQ_RDMA_WQE_COMP_FLG_MASK 0x1
+#define RDMA_SQ_RDMA_WQE_COMP_FLG_SHIFT 0
+#define RDMA_SQ_RDMA_WQE_RD_FENCE_FLG_MASK 0x1
+#define RDMA_SQ_RDMA_WQE_RD_FENCE_FLG_SHIFT 1
+#define RDMA_SQ_RDMA_WQE_INV_FENCE_FLG_MASK 0x1
+#define RDMA_SQ_RDMA_WQE_INV_FENCE_FLG_SHIFT 2
+#define RDMA_SQ_RDMA_WQE_SE_FLG_MASK 0x1
+#define RDMA_SQ_RDMA_WQE_SE_FLG_SHIFT 3
+#define RDMA_SQ_RDMA_WQE_INLINE_FLG_MASK 0x1
+#define RDMA_SQ_RDMA_WQE_INLINE_FLG_SHIFT 4
+#define RDMA_SQ_RDMA_WQE_DIF_ON_HOST_FLG_MASK 0x1
+#define RDMA_SQ_RDMA_WQE_DIF_ON_HOST_FLG_SHIFT 5
+#define RDMA_SQ_RDMA_WQE_READ_INV_FLG_MASK 0x1
+#define RDMA_SQ_RDMA_WQE_READ_INV_FLG_SHIFT 6
+#define RDMA_SQ_RDMA_WQE_RESERVED1_MASK 0x1
+#define RDMA_SQ_RDMA_WQE_RESERVED1_SHIFT 7
u8 wqe_size;
u8 prev_wqe_size;
struct regpair remote_va;
@@ -627,13 +632,9 @@ struct rdma_sq_rdma_wqe {
u8 dif_flags;
#define RDMA_SQ_RDMA_WQE_DIF_BLOCK_SIZE_MASK 0x1
#define RDMA_SQ_RDMA_WQE_DIF_BLOCK_SIZE_SHIFT 0
-#define RDMA_SQ_RDMA_WQE_DIF_FIRST_RDMA_IN_IO_FLG_MASK 0x1
-#define RDMA_SQ_RDMA_WQE_DIF_FIRST_RDMA_IN_IO_FLG_SHIFT 1
-#define RDMA_SQ_RDMA_WQE_DIF_LAST_RDMA_IN_IO_FLG_MASK 0x1
-#define RDMA_SQ_RDMA_WQE_DIF_LAST_RDMA_IN_IO_FLG_SHIFT 2
-#define RDMA_SQ_RDMA_WQE_RESERVED1_MASK 0x1F
-#define RDMA_SQ_RDMA_WQE_RESERVED1_SHIFT 3
- u8 reserved2[3];
+#define RDMA_SQ_RDMA_WQE_RESERVED2_MASK 0x7F
+#define RDMA_SQ_RDMA_WQE_RESERVED2_SHIFT 1
+ u8 reserved3[3];
};
/* First element (16 bytes) of rdma wqe */
@@ -655,8 +656,10 @@ struct rdma_sq_rdma_wqe_1st {
#define RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG_SHIFT 4
#define RDMA_SQ_RDMA_WQE_1ST_DIF_ON_HOST_FLG_MASK 0x1
#define RDMA_SQ_RDMA_WQE_1ST_DIF_ON_HOST_FLG_SHIFT 5
-#define RDMA_SQ_RDMA_WQE_1ST_RESERVED0_MASK 0x3
-#define RDMA_SQ_RDMA_WQE_1ST_RESERVED0_SHIFT 6
+#define RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG_MASK 0x1
+#define RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG_SHIFT 6
+#define RDMA_SQ_RDMA_WQE_1ST_RESERVED0_MASK 0x1
+#define RDMA_SQ_RDMA_WQE_1ST_RESERVED0_SHIFT 7
u8 wqe_size;
u8 prev_wqe_size;
};
diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
new file mode 100644
index 000000000000..259303b9907c
--- /dev/null
+++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
@@ -0,0 +1,819 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2017 QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and /or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/udp.h>
+#include <net/addrconf.h>
+#include <net/route.h>
+#include <net/ip6_route.h>
+#include <net/flow.h>
+#include "qedr.h"
+#include "qedr_iw_cm.h"
+
+static inline void
+qedr_fill_sockaddr4(const struct qed_iwarp_cm_info *cm_info,
+ struct iw_cm_event *event)
+{
+ struct sockaddr_in *laddr = (struct sockaddr_in *)&event->local_addr;
+ struct sockaddr_in *raddr = (struct sockaddr_in *)&event->remote_addr;
+
+ laddr->sin_family = AF_INET;
+ raddr->sin_family = AF_INET;
+
+ laddr->sin_port = htons(cm_info->local_port);
+ raddr->sin_port = htons(cm_info->remote_port);
+
+ laddr->sin_addr.s_addr = htonl(cm_info->local_ip[0]);
+ raddr->sin_addr.s_addr = htonl(cm_info->remote_ip[0]);
+}
+
+static inline void
+qedr_fill_sockaddr6(const struct qed_iwarp_cm_info *cm_info,
+ struct iw_cm_event *event)
+{
+ struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&event->local_addr;
+ struct sockaddr_in6 *raddr6 =
+ (struct sockaddr_in6 *)&event->remote_addr;
+ int i;
+
+ laddr6->sin6_family = AF_INET6;
+ raddr6->sin6_family = AF_INET6;
+
+ laddr6->sin6_port = htons(cm_info->local_port);
+ raddr6->sin6_port = htons(cm_info->remote_port);
+
+ for (i = 0; i < 4; i++) {
+ laddr6->sin6_addr.in6_u.u6_addr32[i] =
+ htonl(cm_info->local_ip[i]);
+ raddr6->sin6_addr.in6_u.u6_addr32[i] =
+ htonl(cm_info->remote_ip[i]);
+ }
+}
+
+static void qedr_iw_free_qp(struct kref *ref)
+{
+ struct qedr_qp *qp = container_of(ref, struct qedr_qp, refcnt);
+
+ complete(&qp->qp_rel_comp);
+}
+
+static void
+qedr_iw_free_ep(struct kref *ref)
+{
+ struct qedr_iw_ep *ep = container_of(ref, struct qedr_iw_ep, refcnt);
+
+ if (ep->qp)
+ kref_put(&ep->qp->refcnt, qedr_iw_free_qp);
+
+ if (ep->cm_id)
+ ep->cm_id->rem_ref(ep->cm_id);
+
+ kfree(ep);
+}
+
+static void
+qedr_iw_mpa_request(void *context, struct qed_iwarp_cm_event_params *params)
+{
+ struct qedr_iw_listener *listener = (struct qedr_iw_listener *)context;
+ struct qedr_dev *dev = listener->dev;
+ struct iw_cm_event event;
+ struct qedr_iw_ep *ep;
+
+ ep = kzalloc(sizeof(*ep), GFP_ATOMIC);
+ if (!ep)
+ return;
+
+ ep->dev = dev;
+ ep->qed_context = params->ep_context;
+ kref_init(&ep->refcnt);
+
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CONNECT_REQUEST;
+ event.status = params->status;
+
+ if (!IS_ENABLED(CONFIG_IPV6) ||
+ params->cm_info->ip_version == QED_TCP_IPV4)
+ qedr_fill_sockaddr4(params->cm_info, &event);
+ else
+ qedr_fill_sockaddr6(params->cm_info, &event);
+
+ event.provider_data = (void *)ep;
+ event.private_data = (void *)params->cm_info->private_data;
+ event.private_data_len = (u8)params->cm_info->private_data_len;
+ event.ord = params->cm_info->ord;
+ event.ird = params->cm_info->ird;
+
+ listener->cm_id->event_handler(listener->cm_id, &event);
+}
+
+static void
+qedr_iw_issue_event(void *context,
+ struct qed_iwarp_cm_event_params *params,
+ enum iw_cm_event_type event_type)
+{
+ struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
+ struct iw_cm_event event;
+
+ memset(&event, 0, sizeof(event));
+ event.status = params->status;
+ event.event = event_type;
+
+ if (params->cm_info) {
+ event.ird = params->cm_info->ird;
+ event.ord = params->cm_info->ord;
+ /* Only connect_request and reply have valid private data
+ * the rest of the events this may be left overs from
+ * connection establishment. CONNECT_REQUEST is issued via
+ * qedr_iw_mpa_request
+ */
+ if (event_type == IW_CM_EVENT_CONNECT_REPLY) {
+ event.private_data_len =
+ params->cm_info->private_data_len;
+ event.private_data =
+ (void *)params->cm_info->private_data;
+ }
+ }
+
+ if (ep->cm_id)
+ ep->cm_id->event_handler(ep->cm_id, &event);
+}
+
+static void
+qedr_iw_close_event(void *context, struct qed_iwarp_cm_event_params *params)
+{
+ struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
+
+ if (ep->cm_id)
+ qedr_iw_issue_event(context, params, IW_CM_EVENT_CLOSE);
+
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
+}
+
+static void
+qedr_iw_qp_event(void *context,
+ struct qed_iwarp_cm_event_params *params,
+ enum ib_event_type ib_event, char *str)
+{
+ struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
+ struct qedr_dev *dev = ep->dev;
+ struct ib_qp *ibqp = &ep->qp->ibqp;
+ struct ib_event event;
+
+ DP_NOTICE(dev, "QP error received: %s\n", str);
+
+ if (ibqp->event_handler) {
+ event.event = ib_event;
+ event.device = ibqp->device;
+ event.element.qp = ibqp;
+ ibqp->event_handler(&event, ibqp->qp_context);
+ }
+}
+
+struct qedr_discon_work {
+ struct work_struct work;
+ struct qedr_iw_ep *ep;
+ enum qed_iwarp_event_type event;
+ int status;
+};
+
+static void qedr_iw_disconnect_worker(struct work_struct *work)
+{
+ struct qedr_discon_work *dwork =
+ container_of(work, struct qedr_discon_work, work);
+ struct qed_rdma_modify_qp_in_params qp_params = { 0 };
+ struct qedr_iw_ep *ep = dwork->ep;
+ struct qedr_dev *dev = ep->dev;
+ struct qedr_qp *qp = ep->qp;
+ struct iw_cm_event event;
+
+ /* The qp won't be released until we release the ep.
+ * the ep's refcnt was increased before calling this
+ * function, therefore it is safe to access qp
+ */
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
+ &qp->iwarp_cm_flags))
+ goto out;
+
+ memset(&event, 0, sizeof(event));
+ event.status = dwork->status;
+ event.event = IW_CM_EVENT_DISCONNECT;
+
+ /* Success means graceful disconnect was requested. modifying
+ * to SQD is translated to graceful disconnect. O/w reset is sent
+ */
+ if (dwork->status)
+ qp_params.new_state = QED_ROCE_QP_STATE_ERR;
+ else
+ qp_params.new_state = QED_ROCE_QP_STATE_SQD;
+
+
+ if (ep->cm_id)
+ ep->cm_id->event_handler(ep->cm_id, &event);
+
+ SET_FIELD(qp_params.modify_flags,
+ QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
+
+ dev->ops->rdma_modify_qp(dev->rdma_ctx, qp->qed_qp, &qp_params);
+
+ complete(&ep->qp->iwarp_cm_comp);
+out:
+ kfree(dwork);
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
+}
+
+static void
+qedr_iw_disconnect_event(void *context,
+ struct qed_iwarp_cm_event_params *params)
+{
+ struct qedr_discon_work *work;
+ struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
+ struct qedr_dev *dev = ep->dev;
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ /* We can't get a close event before disconnect, but since
+ * we're scheduling a work queue we need to make sure close
+ * won't delete the ep, so we increase the refcnt
+ */
+ kref_get(&ep->refcnt);
+
+ work->ep = ep;
+ work->event = params->event;
+ work->status = params->status;
+
+ INIT_WORK(&work->work, qedr_iw_disconnect_worker);
+ queue_work(dev->iwarp_wq, &work->work);
+}
+
+static void
+qedr_iw_passive_complete(void *context,
+ struct qed_iwarp_cm_event_params *params)
+{
+ struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
+ struct qedr_dev *dev = ep->dev;
+
+ /* We will only reach the following state if MPA_REJECT was called on
+ * passive. In this case there will be no associated QP.
+ */
+ if ((params->status == -ECONNREFUSED) && (!ep->qp)) {
+ DP_DEBUG(dev, QEDR_MSG_IWARP,
+ "PASSIVE connection refused releasing ep...\n");
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
+ return;
+ }
+
+ complete(&ep->qp->iwarp_cm_comp);
+ qedr_iw_issue_event(context, params, IW_CM_EVENT_ESTABLISHED);
+
+ if (params->status < 0)
+ qedr_iw_close_event(context, params);
+}
+
+static void
+qedr_iw_active_complete(void *context,
+ struct qed_iwarp_cm_event_params *params)
+{
+ struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
+
+ complete(&ep->qp->iwarp_cm_comp);
+ qedr_iw_issue_event(context, params, IW_CM_EVENT_CONNECT_REPLY);
+
+ if (params->status < 0)
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
+}
+
+static int
+qedr_iw_mpa_reply(void *context, struct qed_iwarp_cm_event_params *params)
+{
+ struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
+ struct qedr_dev *dev = ep->dev;
+ struct qed_iwarp_send_rtr_in rtr_in;
+
+ rtr_in.ep_context = params->ep_context;
+
+ return dev->ops->iwarp_send_rtr(dev->rdma_ctx, &rtr_in);
+}
+
+static int
+qedr_iw_event_handler(void *context, struct qed_iwarp_cm_event_params *params)
+{
+ struct qedr_iw_ep *ep = (struct qedr_iw_ep *)context;
+ struct qedr_dev *dev = ep->dev;
+
+ switch (params->event) {
+ case QED_IWARP_EVENT_MPA_REQUEST:
+ qedr_iw_mpa_request(context, params);
+ break;
+ case QED_IWARP_EVENT_ACTIVE_MPA_REPLY:
+ qedr_iw_mpa_reply(context, params);
+ break;
+ case QED_IWARP_EVENT_PASSIVE_COMPLETE:
+ qedr_iw_passive_complete(context, params);
+ break;
+ case QED_IWARP_EVENT_ACTIVE_COMPLETE:
+ qedr_iw_active_complete(context, params);
+ break;
+ case QED_IWARP_EVENT_DISCONNECT:
+ qedr_iw_disconnect_event(context, params);
+ break;
+ case QED_IWARP_EVENT_CLOSE:
+ qedr_iw_close_event(context, params);
+ break;
+ case QED_IWARP_EVENT_RQ_EMPTY:
+ qedr_iw_qp_event(context, params, IB_EVENT_QP_FATAL,
+ "QED_IWARP_EVENT_RQ_EMPTY");
+ break;
+ case QED_IWARP_EVENT_IRQ_FULL:
+ qedr_iw_qp_event(context, params, IB_EVENT_QP_FATAL,
+ "QED_IWARP_EVENT_IRQ_FULL");
+ break;
+ case QED_IWARP_EVENT_LLP_TIMEOUT:
+ qedr_iw_qp_event(context, params, IB_EVENT_QP_FATAL,
+ "QED_IWARP_EVENT_LLP_TIMEOUT");
+ break;
+ case QED_IWARP_EVENT_REMOTE_PROTECTION_ERROR:
+ qedr_iw_qp_event(context, params, IB_EVENT_QP_ACCESS_ERR,
+ "QED_IWARP_EVENT_REMOTE_PROTECTION_ERROR");
+ break;
+ case QED_IWARP_EVENT_CQ_OVERFLOW:
+ qedr_iw_qp_event(context, params, IB_EVENT_QP_FATAL,
+ "QED_IWARP_EVENT_CQ_OVERFLOW");
+ break;
+ case QED_IWARP_EVENT_QP_CATASTROPHIC:
+ qedr_iw_qp_event(context, params, IB_EVENT_QP_FATAL,
+ "QED_IWARP_EVENT_QP_CATASTROPHIC");
+ break;
+ case QED_IWARP_EVENT_LOCAL_ACCESS_ERROR:
+ qedr_iw_qp_event(context, params, IB_EVENT_QP_ACCESS_ERR,
+ "QED_IWARP_EVENT_LOCAL_ACCESS_ERROR");
+ break;
+ case QED_IWARP_EVENT_REMOTE_OPERATION_ERROR:
+ qedr_iw_qp_event(context, params, IB_EVENT_QP_FATAL,
+ "QED_IWARP_EVENT_REMOTE_OPERATION_ERROR");
+ break;
+ case QED_IWARP_EVENT_TERMINATE_RECEIVED:
+ DP_NOTICE(dev, "Got terminate message\n");
+ break;
+ default:
+ DP_NOTICE(dev, "Unknown event received %d\n", params->event);
+ break;
+ }
+ return 0;
+}
+
+static u16 qedr_iw_get_vlan_ipv4(struct qedr_dev *dev, u32 *addr)
+{
+ struct net_device *ndev;
+ u16 vlan_id = 0;
+
+ ndev = ip_dev_find(&init_net, htonl(addr[0]));
+
+ if (ndev) {
+ vlan_id = rdma_vlan_dev_vlan_id(ndev);
+ dev_put(ndev);
+ }
+ if (vlan_id == 0xffff)
+ vlan_id = 0;
+ return vlan_id;
+}
+
+static u16 qedr_iw_get_vlan_ipv6(u32 *addr)
+{
+ struct net_device *ndev = NULL;
+ struct in6_addr laddr6;
+ u16 vlan_id = 0;
+ int i;
+
+ if (!IS_ENABLED(CONFIG_IPV6))
+ return vlan_id;
+
+ for (i = 0; i < 4; i++)
+ laddr6.in6_u.u6_addr32[i] = htonl(addr[i]);
+
+ rcu_read_lock();
+ for_each_netdev_rcu(&init_net, ndev) {
+ if (ipv6_chk_addr(&init_net, &laddr6, ndev, 1)) {
+ vlan_id = rdma_vlan_dev_vlan_id(ndev);
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+ if (vlan_id == 0xffff)
+ vlan_id = 0;
+
+ return vlan_id;
+}
+
+static int
+qedr_addr4_resolve(struct qedr_dev *dev,
+ struct sockaddr_in *src_in,
+ struct sockaddr_in *dst_in, u8 *dst_mac)
+{
+ __be32 src_ip = src_in->sin_addr.s_addr;
+ __be32 dst_ip = dst_in->sin_addr.s_addr;
+ struct neighbour *neigh = NULL;
+ struct rtable *rt = NULL;
+ int rc = 0;
+
+ rt = ip_route_output(&init_net, dst_ip, src_ip, 0, 0,
+ RT_SCOPE_UNIVERSE);
+ if (IS_ERR(rt)) {
+ DP_ERR(dev, "ip_route_output returned error\n");
+ return -EINVAL;
+ }
+
+ neigh = dst_neigh_lookup(&rt->dst, &dst_ip);
+
+ if (neigh) {
+ rcu_read_lock();
+ if (neigh->nud_state & NUD_VALID) {
+ ether_addr_copy(dst_mac, neigh->ha);
+ DP_DEBUG(dev, QEDR_MSG_QP, "mac_addr=[%pM]\n", dst_mac);
+ } else {
+ neigh_event_send(neigh, NULL);
+ }
+ rcu_read_unlock();
+ neigh_release(neigh);
+ }
+
+ ip_rt_put(rt);
+
+ return rc;
+}
+
+static int
+qedr_addr6_resolve(struct qedr_dev *dev,
+ struct sockaddr_in6 *src_in,
+ struct sockaddr_in6 *dst_in, u8 *dst_mac)
+{
+ struct neighbour *neigh = NULL;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+ int rc = 0;
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.daddr = dst_in->sin6_addr;
+ fl6.saddr = src_in->sin6_addr;
+
+ dst = ip6_route_output(&init_net, NULL, &fl6);
+
+ if ((!dst) || dst->error) {
+ if (dst) {
+ DP_ERR(dev,
+ "ip6_route_output returned dst->error = %d\n",
+ dst->error);
+ dst_release(dst);
+ }
+ return -EINVAL;
+ }
+ neigh = dst_neigh_lookup(dst, &fl6.daddr);
+ if (neigh) {
+ rcu_read_lock();
+ if (neigh->nud_state & NUD_VALID) {
+ ether_addr_copy(dst_mac, neigh->ha);
+ DP_DEBUG(dev, QEDR_MSG_QP, "mac_addr=[%pM]\n", dst_mac);
+ } else {
+ neigh_event_send(neigh, NULL);
+ }
+ rcu_read_unlock();
+ neigh_release(neigh);
+ }
+
+ dst_release(dst);
+
+ return rc;
+}
+
+static struct qedr_qp *qedr_iw_load_qp(struct qedr_dev *dev, u32 qpn)
+{
+ struct qedr_qp *qp;
+
+ xa_lock(&dev->qps);
+ qp = xa_load(&dev->qps, qpn);
+ if (qp)
+ kref_get(&qp->refcnt);
+ xa_unlock(&dev->qps);
+
+ return qp;
+}
+
+int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ struct qedr_dev *dev = get_qedr_dev(cm_id->device);
+ struct qed_iwarp_connect_out out_params;
+ struct qed_iwarp_connect_in in_params;
+ struct qed_iwarp_cm_info *cm_info;
+ struct sockaddr_in6 *laddr6;
+ struct sockaddr_in6 *raddr6;
+ struct sockaddr_in *laddr;
+ struct sockaddr_in *raddr;
+ struct qedr_iw_ep *ep;
+ struct qedr_qp *qp;
+ int rc = 0;
+ int i;
+
+ laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+ raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
+ laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+ raddr6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
+
+ DP_DEBUG(dev, QEDR_MSG_IWARP, "MAPPED %d %d\n",
+ ntohs(((struct sockaddr_in *)&cm_id->remote_addr)->sin_port),
+ ntohs(raddr->sin_port));
+
+ DP_DEBUG(dev, QEDR_MSG_IWARP,
+ "Connect source address: %pISpc, remote address: %pISpc\n",
+ &cm_id->local_addr, &cm_id->remote_addr);
+
+ if (!laddr->sin_port || !raddr->sin_port)
+ return -EINVAL;
+
+ ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+ if (!ep)
+ return -ENOMEM;
+
+ ep->dev = dev;
+ kref_init(&ep->refcnt);
+
+ qp = qedr_iw_load_qp(dev, conn_param->qpn);
+ if (!qp) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ ep->qp = qp;
+ cm_id->add_ref(cm_id);
+ ep->cm_id = cm_id;
+
+ in_params.event_cb = qedr_iw_event_handler;
+ in_params.cb_context = ep;
+
+ cm_info = &in_params.cm_info;
+ memset(cm_info->local_ip, 0, sizeof(cm_info->local_ip));
+ memset(cm_info->remote_ip, 0, sizeof(cm_info->remote_ip));
+
+ if (!IS_ENABLED(CONFIG_IPV6) ||
+ cm_id->remote_addr.ss_family == AF_INET) {
+ cm_info->ip_version = QED_TCP_IPV4;
+
+ cm_info->remote_ip[0] = ntohl(raddr->sin_addr.s_addr);
+ cm_info->local_ip[0] = ntohl(laddr->sin_addr.s_addr);
+ cm_info->remote_port = ntohs(raddr->sin_port);
+ cm_info->local_port = ntohs(laddr->sin_port);
+ cm_info->vlan = qedr_iw_get_vlan_ipv4(dev, cm_info->local_ip);
+
+ rc = qedr_addr4_resolve(dev, laddr, raddr,
+ (u8 *)in_params.remote_mac_addr);
+
+ in_params.mss = dev->iwarp_max_mtu -
+ (sizeof(struct iphdr) + sizeof(struct tcphdr));
+
+ } else {
+ in_params.cm_info.ip_version = QED_TCP_IPV6;
+
+ for (i = 0; i < 4; i++) {
+ cm_info->remote_ip[i] =
+ ntohl(raddr6->sin6_addr.in6_u.u6_addr32[i]);
+ cm_info->local_ip[i] =
+ ntohl(laddr6->sin6_addr.in6_u.u6_addr32[i]);
+ }
+
+ cm_info->local_port = ntohs(laddr6->sin6_port);
+ cm_info->remote_port = ntohs(raddr6->sin6_port);
+
+ in_params.mss = dev->iwarp_max_mtu -
+ (sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
+
+ cm_info->vlan = qedr_iw_get_vlan_ipv6(cm_info->local_ip);
+
+ rc = qedr_addr6_resolve(dev, laddr6, raddr6,
+ (u8 *)in_params.remote_mac_addr);
+ }
+ if (rc)
+ goto err;
+
+ DP_DEBUG(dev, QEDR_MSG_IWARP,
+ "ord = %d ird=%d private_data=%p private_data_len=%d rq_psn=%d\n",
+ conn_param->ord, conn_param->ird, conn_param->private_data,
+ conn_param->private_data_len, qp->rq_psn);
+
+ cm_info->ord = conn_param->ord;
+ cm_info->ird = conn_param->ird;
+ cm_info->private_data = conn_param->private_data;
+ cm_info->private_data_len = conn_param->private_data_len;
+ in_params.qp = qp->qed_qp;
+ memcpy(in_params.local_mac_addr, dev->ndev->dev_addr, ETH_ALEN);
+
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
+ &qp->iwarp_cm_flags)) {
+ rc = -ENODEV;
+ goto err; /* QP already being destroyed */
+ }
+
+ rc = dev->ops->iwarp_connect(dev->rdma_ctx, &in_params, &out_params);
+ if (rc) {
+ complete(&qp->iwarp_cm_comp);
+ goto err;
+ }
+
+ return rc;
+
+err:
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
+ return rc;
+}
+
+int qedr_iw_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+ struct qedr_dev *dev = get_qedr_dev(cm_id->device);
+ struct qedr_iw_listener *listener;
+ struct qed_iwarp_listen_in iparams;
+ struct qed_iwarp_listen_out oparams;
+ struct sockaddr_in *laddr;
+ struct sockaddr_in6 *laddr6;
+ int rc;
+ int i;
+
+ laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+ laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+
+ DP_DEBUG(dev, QEDR_MSG_IWARP,
+ "Create Listener address: %pISpc\n", &cm_id->local_addr);
+
+ listener = kzalloc(sizeof(*listener), GFP_KERNEL);
+ if (!listener)
+ return -ENOMEM;
+
+ listener->dev = dev;
+ cm_id->add_ref(cm_id);
+ listener->cm_id = cm_id;
+ listener->backlog = backlog;
+
+ iparams.cb_context = listener;
+ iparams.event_cb = qedr_iw_event_handler;
+ iparams.max_backlog = backlog;
+
+ if (!IS_ENABLED(CONFIG_IPV6) ||
+ cm_id->local_addr.ss_family == AF_INET) {
+ iparams.ip_version = QED_TCP_IPV4;
+ memset(iparams.ip_addr, 0, sizeof(iparams.ip_addr));
+
+ iparams.ip_addr[0] = ntohl(laddr->sin_addr.s_addr);
+ iparams.port = ntohs(laddr->sin_port);
+ iparams.vlan = qedr_iw_get_vlan_ipv4(dev, iparams.ip_addr);
+ } else {
+ iparams.ip_version = QED_TCP_IPV6;
+
+ for (i = 0; i < 4; i++) {
+ iparams.ip_addr[i] =
+ ntohl(laddr6->sin6_addr.in6_u.u6_addr32[i]);
+ }
+
+ iparams.port = ntohs(laddr6->sin6_port);
+
+ iparams.vlan = qedr_iw_get_vlan_ipv6(iparams.ip_addr);
+ }
+ rc = dev->ops->iwarp_create_listen(dev->rdma_ctx, &iparams, &oparams);
+ if (rc)
+ goto err;
+
+ listener->qed_handle = oparams.handle;
+ cm_id->provider_data = listener;
+ return rc;
+
+err:
+ cm_id->rem_ref(cm_id);
+ kfree(listener);
+ return rc;
+}
+
+int qedr_iw_destroy_listen(struct iw_cm_id *cm_id)
+{
+ struct qedr_iw_listener *listener = cm_id->provider_data;
+ struct qedr_dev *dev = get_qedr_dev(cm_id->device);
+ int rc = 0;
+
+ if (listener->qed_handle)
+ rc = dev->ops->iwarp_destroy_listen(dev->rdma_ctx,
+ listener->qed_handle);
+
+ cm_id->rem_ref(cm_id);
+ kfree(listener);
+ return rc;
+}
+
+int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ struct qedr_iw_ep *ep = (struct qedr_iw_ep *)cm_id->provider_data;
+ struct qedr_dev *dev = ep->dev;
+ struct qedr_qp *qp;
+ struct qed_iwarp_accept_in params;
+ int rc;
+
+ DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn);
+
+ qp = qedr_iw_load_qp(dev, conn_param->qpn);
+ if (!qp) {
+ DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn);
+ return -EINVAL;
+ }
+
+ ep->qp = qp;
+ cm_id->add_ref(cm_id);
+ ep->cm_id = cm_id;
+
+ params.ep_context = ep->qed_context;
+ params.cb_context = ep;
+ params.qp = ep->qp->qed_qp;
+ params.private_data = conn_param->private_data;
+ params.private_data_len = conn_param->private_data_len;
+ params.ird = conn_param->ird;
+ params.ord = conn_param->ord;
+
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
+ &qp->iwarp_cm_flags)) {
+ rc = -EINVAL;
+ goto err; /* QP already destroyed */
+ }
+
+ rc = dev->ops->iwarp_accept(dev->rdma_ctx, &params);
+ if (rc) {
+ complete(&qp->iwarp_cm_comp);
+ goto err;
+ }
+
+ return rc;
+
+err:
+ kref_put(&ep->refcnt, qedr_iw_free_ep);
+
+ return rc;
+}
+
+int qedr_iw_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+ struct qedr_iw_ep *ep = (struct qedr_iw_ep *)cm_id->provider_data;
+ struct qedr_dev *dev = ep->dev;
+ struct qed_iwarp_reject_in params;
+
+ params.ep_context = ep->qed_context;
+ params.cb_context = ep;
+ params.private_data = pdata;
+ params.private_data_len = pdata_len;
+ ep->qp = NULL;
+
+ return dev->ops->iwarp_reject(dev->rdma_ctx, &params);
+}
+
+void qedr_iw_qp_add_ref(struct ib_qp *ibqp)
+{
+ struct qedr_qp *qp = get_qedr_qp(ibqp);
+
+ kref_get(&qp->refcnt);
+}
+
+void qedr_iw_qp_rem_ref(struct ib_qp *ibqp)
+{
+ struct qedr_qp *qp = get_qedr_qp(ibqp);
+
+ kref_put(&qp->refcnt, qedr_iw_free_qp);
+}
+
+struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn)
+{
+ struct qedr_dev *dev = get_qedr_dev(ibdev);
+
+ return xa_load(&dev->qps, qpn);
+}
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.h b/drivers/infiniband/hw/qedr/qedr_iw_cm.h
index 7ae983a91b8b..08f4b1067e6c 100644
--- a/drivers/infiniband/hw/qib/qib_debugfs.h
+++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.h
@@ -1,9 +1,5 @@
-#ifndef _QIB_DEBUGFS_H
-#define _QIB_DEBUGFS_H
-
-#ifdef CONFIG_DEBUG_FS
-/*
- * Copyright (c) 2013 Intel Corporation. All rights reserved.
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2017 QLogic Corporation
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -21,7 +17,7 @@
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
+ * disclaimer in the documentation and /or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
@@ -33,13 +29,21 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#include <rdma/iw_cm.h>
+
+int qedr_iw_connect(struct iw_cm_id *cm_id,
+ struct iw_cm_conn_param *conn_param);
+
+int qedr_iw_create_listen(struct iw_cm_id *cm_id, int backlog);
+
+int qedr_iw_destroy_listen(struct iw_cm_id *cm_id);
+
+int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+
+int qedr_iw_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
-struct qib_ibdev;
-void qib_dbg_ibdev_init(struct qib_ibdev *ibd);
-void qib_dbg_ibdev_exit(struct qib_ibdev *ibd);
-void qib_dbg_init(void);
-void qib_dbg_exit(void);
+void qedr_iw_qp_add_ref(struct ib_qp *qp);
-#endif
+void qedr_iw_qp_rem_ref(struct ib_qp *qp);
-#endif /* _QIB_DEBUGFS_H */
+struct ib_qp *qedr_iw_get_qp(struct ib_device *dev, int qpn);
diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
index c6aee0333f30..859f66a51bd2 100644
--- a/drivers/infiniband/hw/qedr/qedr_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
@@ -43,15 +43,12 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
-#include "qedr_hsi.h"
#include <linux/qed/qed_if.h>
-#include <linux/qed/qed_roce_if.h>
+#include <linux/qed/qed_rdma_if.h>
#include "qedr.h"
-#include "qedr_hsi.h"
#include "verbs.h"
#include <rdma/qedr-abi.h>
-#include "qedr_hsi.h"
-#include "qedr_cm.h"
+#include "qedr_roce_cm.h"
void qedr_inc_sw_gsi_cons(struct qedr_qp_hwq_info *info)
{
@@ -67,9 +64,14 @@ void qedr_store_gsi_qp_cq(struct qedr_dev *dev, struct qedr_qp *qp,
dev->gsi_qp = qp;
}
-void qedr_ll2_tx_cb(void *_qdev, struct qed_roce_ll2_packet *pkt)
+static void qedr_ll2_complete_tx_packet(void *cxt, u8 connection_handle,
+ void *cookie,
+ dma_addr_t first_frag_addr,
+ bool b_last_fragment,
+ bool b_last_packet)
{
- struct qedr_dev *dev = (struct qedr_dev *)_qdev;
+ struct qedr_dev *dev = (struct qedr_dev *)cxt;
+ struct qed_roce_ll2_packet *pkt = cookie;
struct qedr_cq *cq = dev->gsi_sqcq;
struct qedr_qp *qp = dev->gsi_qp;
unsigned long flags;
@@ -87,37 +89,44 @@ void qedr_ll2_tx_cb(void *_qdev, struct qed_roce_ll2_packet *pkt)
qedr_inc_sw_gsi_cons(&qp->sq);
spin_unlock_irqrestore(&qp->q_lock, flags);
- if (cq->ibcq.comp_handler) {
- spin_lock_irqsave(&cq->comp_handler_lock, flags);
+ if (cq->ibcq.comp_handler)
(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
- spin_unlock_irqrestore(&cq->comp_handler_lock, flags);
- }
}
-void qedr_ll2_rx_cb(void *_dev, struct qed_roce_ll2_packet *pkt,
- struct qed_roce_ll2_rx_params *params)
+static void qedr_ll2_complete_rx_packet(void *cxt,
+ struct qed_ll2_comp_rx_data *data)
{
- struct qedr_dev *dev = (struct qedr_dev *)_dev;
+ struct qedr_dev *dev = (struct qedr_dev *)cxt;
struct qedr_cq *cq = dev->gsi_rqcq;
struct qedr_qp *qp = dev->gsi_qp;
unsigned long flags;
spin_lock_irqsave(&qp->q_lock, flags);
- qp->rqe_wr_id[qp->rq.gsi_cons].rc = params->rc;
- qp->rqe_wr_id[qp->rq.gsi_cons].vlan_id = params->vlan_id;
- qp->rqe_wr_id[qp->rq.gsi_cons].sg_list[0].length = pkt->payload[0].len;
- ether_addr_copy(qp->rqe_wr_id[qp->rq.gsi_cons].smac, params->smac);
+ qp->rqe_wr_id[qp->rq.gsi_cons].rc = data->u.data_length_error ?
+ -EINVAL : 0;
+ qp->rqe_wr_id[qp->rq.gsi_cons].vlan = data->vlan;
+ /* note: length stands for data length i.e. GRH is excluded */
+ qp->rqe_wr_id[qp->rq.gsi_cons].sg_list[0].length =
+ data->length.data_length;
+ *((u32 *)&qp->rqe_wr_id[qp->rq.gsi_cons].smac[0]) =
+ ntohl(data->opaque_data_0);
+ *((u16 *)&qp->rqe_wr_id[qp->rq.gsi_cons].smac[4]) =
+ ntohs((u16)data->opaque_data_1);
qedr_inc_sw_gsi_cons(&qp->rq);
spin_unlock_irqrestore(&qp->q_lock, flags);
- if (cq->ibcq.comp_handler) {
- spin_lock_irqsave(&cq->comp_handler_lock, flags);
+ if (cq->ibcq.comp_handler)
(*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
- spin_unlock_irqrestore(&cq->comp_handler_lock, flags);
- }
+}
+
+static void qedr_ll2_release_rx_packet(void *cxt, u8 connection_handle,
+ void *cookie, dma_addr_t rx_buf_addr,
+ bool b_last_packet)
+{
+ /* Do nothing... */
}
static void qedr_destroy_gsi_cq(struct qedr_dev *dev,
@@ -168,30 +177,161 @@ static inline int qedr_check_gsi_qp_attrs(struct qedr_dev *dev,
return 0;
}
-struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev,
- struct ib_qp_init_attr *attrs,
- struct qedr_qp *qp)
+static int qedr_ll2_post_tx(struct qedr_dev *dev,
+ struct qed_roce_ll2_packet *pkt)
{
- struct qed_roce_ll2_params ll2_params;
+ enum qed_ll2_roce_flavor_type roce_flavor;
+ struct qed_ll2_tx_pkt_info ll2_tx_pkt;
int rc;
+ int i;
- rc = qedr_check_gsi_qp_attrs(dev, attrs);
+ memset(&ll2_tx_pkt, 0, sizeof(ll2_tx_pkt));
+
+ roce_flavor = (pkt->roce_mode == ROCE_V1) ?
+ QED_LL2_ROCE : QED_LL2_RROCE;
+
+ if (pkt->roce_mode == ROCE_V2_IPV4)
+ ll2_tx_pkt.enable_ip_cksum = 1;
+
+ ll2_tx_pkt.num_of_bds = 1 /* hdr */ + pkt->n_seg;
+ ll2_tx_pkt.vlan = 0;
+ ll2_tx_pkt.tx_dest = pkt->tx_dest;
+ ll2_tx_pkt.qed_roce_flavor = roce_flavor;
+ ll2_tx_pkt.first_frag = pkt->header.baddr;
+ ll2_tx_pkt.first_frag_len = pkt->header.len;
+ ll2_tx_pkt.cookie = pkt;
+
+ /* tx header */
+ rc = dev->ops->ll2_prepare_tx_packet(dev->rdma_ctx,
+ dev->gsi_ll2_handle,
+ &ll2_tx_pkt, 1);
+ if (rc) {
+ /* TX failed while posting header - release resources */
+ dma_free_coherent(&dev->pdev->dev, pkt->header.len,
+ pkt->header.vaddr, pkt->header.baddr);
+ kfree(pkt);
+
+ DP_ERR(dev, "roce ll2 tx: header failed (rc=%d)\n", rc);
+ return rc;
+ }
+
+ /* tx payload */
+ for (i = 0; i < pkt->n_seg; i++) {
+ rc = dev->ops->ll2_set_fragment_of_tx_packet(
+ dev->rdma_ctx,
+ dev->gsi_ll2_handle,
+ pkt->payload[i].baddr,
+ pkt->payload[i].len);
+
+ if (rc) {
+ /* if failed not much to do here, partial packet has
+ * been posted we can't free memory, will need to wait
+ * for completion
+ */
+ DP_ERR(dev, "ll2 tx: payload failed (rc=%d)\n", rc);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+static int qedr_ll2_stop(struct qedr_dev *dev)
+{
+ int rc;
+
+ if (dev->gsi_ll2_handle == QED_LL2_UNUSED_HANDLE)
+ return 0;
+
+ /* remove LL2 MAC address filter */
+ rc = dev->ops->ll2_set_mac_filter(dev->cdev,
+ dev->gsi_ll2_mac_address, NULL);
+
+ rc = dev->ops->ll2_terminate_connection(dev->rdma_ctx,
+ dev->gsi_ll2_handle);
if (rc)
- return ERR_PTR(rc);
+ DP_ERR(dev, "Failed to terminate LL2 connection (rc=%d)\n", rc);
+
+ dev->ops->ll2_release_connection(dev->rdma_ctx, dev->gsi_ll2_handle);
+
+ dev->gsi_ll2_handle = QED_LL2_UNUSED_HANDLE;
+
+ return rc;
+}
+
+static int qedr_ll2_start(struct qedr_dev *dev,
+ struct ib_qp_init_attr *attrs, struct qedr_qp *qp)
+{
+ struct qed_ll2_acquire_data data;
+ struct qed_ll2_cbs cbs;
+ int rc;
/* configure and start LL2 */
- memset(&ll2_params, 0, sizeof(ll2_params));
- ll2_params.max_tx_buffers = attrs->cap.max_send_wr;
- ll2_params.max_rx_buffers = attrs->cap.max_recv_wr;
- ll2_params.cbs.tx_cb = qedr_ll2_tx_cb;
- ll2_params.cbs.rx_cb = qedr_ll2_rx_cb;
- ll2_params.cb_cookie = (void *)dev;
- ll2_params.mtu = dev->ndev->mtu;
- ether_addr_copy(ll2_params.mac_address, dev->ndev->dev_addr);
- rc = dev->ops->roce_ll2_start(dev->cdev, &ll2_params);
+ cbs.rx_comp_cb = qedr_ll2_complete_rx_packet;
+ cbs.tx_comp_cb = qedr_ll2_complete_tx_packet;
+ cbs.rx_release_cb = qedr_ll2_release_rx_packet;
+ cbs.tx_release_cb = qedr_ll2_complete_tx_packet;
+ cbs.cookie = dev;
+
+ memset(&data, 0, sizeof(data));
+ data.input.conn_type = QED_LL2_TYPE_ROCE;
+ data.input.mtu = dev->ndev->mtu;
+ data.input.rx_num_desc = attrs->cap.max_recv_wr;
+ data.input.rx_drop_ttl0_flg = true;
+ data.input.rx_vlan_removal_en = false;
+ data.input.tx_num_desc = attrs->cap.max_send_wr;
+ data.input.tx_tc = 0;
+ data.input.tx_dest = QED_LL2_TX_DEST_NW;
+ data.input.ai_err_packet_too_big = QED_LL2_DROP_PACKET;
+ data.input.ai_err_no_buf = QED_LL2_DROP_PACKET;
+ data.input.gsi_enable = 1;
+ data.p_connection_handle = &dev->gsi_ll2_handle;
+ data.cbs = &cbs;
+
+ rc = dev->ops->ll2_acquire_connection(dev->rdma_ctx, &data);
+ if (rc) {
+ DP_ERR(dev,
+ "ll2 start: failed to acquire LL2 connection (rc=%d)\n",
+ rc);
+ return rc;
+ }
+
+ rc = dev->ops->ll2_establish_connection(dev->rdma_ctx,
+ dev->gsi_ll2_handle);
+ if (rc) {
+ DP_ERR(dev,
+ "ll2 start: failed to establish LL2 connection (rc=%d)\n",
+ rc);
+ goto err1;
+ }
+
+ rc = dev->ops->ll2_set_mac_filter(dev->cdev, NULL, dev->ndev->dev_addr);
+ if (rc)
+ goto err2;
+
+ return 0;
+
+err2:
+ dev->ops->ll2_terminate_connection(dev->rdma_ctx, dev->gsi_ll2_handle);
+err1:
+ dev->ops->ll2_release_connection(dev->rdma_ctx, dev->gsi_ll2_handle);
+
+ return rc;
+}
+
+int qedr_create_gsi_qp(struct qedr_dev *dev, struct ib_qp_init_attr *attrs,
+ struct qedr_qp *qp)
+{
+ int rc;
+
+ rc = qedr_check_gsi_qp_attrs(dev, attrs);
+ if (rc)
+ return rc;
+
+ rc = qedr_ll2_start(dev, attrs, qp);
if (rc) {
DP_ERR(dev, "create gsi qp: failed on ll2 start. rc=%d\n", rc);
- return ERR_PTR(rc);
+ return rc;
}
/* create QP */
@@ -214,88 +354,64 @@ struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev,
/* the GSI CQ is handled by the driver so remove it from the FW */
qedr_destroy_gsi_cq(dev, attrs);
dev->gsi_rqcq->cq_type = QEDR_CQ_TYPE_GSI;
- dev->gsi_rqcq->cq_type = QEDR_CQ_TYPE_GSI;
DP_DEBUG(dev, QEDR_MSG_GSI, "created GSI QP %p\n", qp);
- return &qp->ibqp;
+ return 0;
err:
kfree(qp->rqe_wr_id);
- rc = dev->ops->roce_ll2_stop(dev->cdev);
+ rc = qedr_ll2_stop(dev);
if (rc)
DP_ERR(dev, "create gsi qp: failed destroy on create\n");
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
int qedr_destroy_gsi_qp(struct qedr_dev *dev)
{
- int rc;
-
- rc = dev->ops->roce_ll2_stop(dev->cdev);
- if (rc)
- DP_ERR(dev, "destroy gsi qp: failed (rc=%d)\n", rc);
- else
- DP_DEBUG(dev, QEDR_MSG_GSI, "destroy gsi qp: success\n");
-
- return rc;
+ return qedr_ll2_stop(dev);
}
#define QEDR_MAX_UD_HEADER_SIZE (100)
#define QEDR_GSI_QPN (1)
static inline int qedr_gsi_build_header(struct qedr_dev *dev,
struct qedr_qp *qp,
- struct ib_send_wr *swr,
+ const struct ib_send_wr *swr,
struct ib_ud_header *udh,
int *roce_mode)
{
bool has_vlan = false, has_grh_ipv6 = true;
- struct ib_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr;
- struct ib_global_route *grh = &ah_attr->grh;
- union ib_gid sgid;
+ struct rdma_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr;
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+ const struct ib_gid_attr *sgid_attr = grh->sgid_attr;
int send_size = 0;
u16 vlan_id = 0;
u16 ether_type;
- struct ib_gid_attr sgid_attr;
int rc;
int ip_ver = 0;
bool has_udp = false;
int i;
- send_size = 0;
- for (i = 0; i < swr->num_sge; ++i)
- send_size += swr->sg_list[i].length;
-
- rc = ib_get_cached_gid(qp->ibqp.device, ah_attr->port_num,
- grh->sgid_index, &sgid, &sgid_attr);
- if (rc) {
- DP_ERR(dev,
- "gsi post send: failed to get cached GID (port=%d, ix=%d)\n",
- ah_attr->port_num, grh->sgid_index);
+ rc = rdma_read_gid_l2_fields(sgid_attr, &vlan_id, NULL);
+ if (rc)
return rc;
- }
- vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
if (vlan_id < VLAN_CFI_MASK)
has_vlan = true;
- if (sgid_attr.ndev)
- dev_put(sgid_attr.ndev);
- if (!memcmp(&sgid, &zgid, sizeof(sgid))) {
- DP_ERR(dev, "gsi post send: GID not found GID index %d\n",
- ah_attr->grh.sgid_index);
- return -ENOENT;
- }
+ send_size = 0;
+ for (i = 0; i < swr->num_sge; ++i)
+ send_size += swr->sg_list[i].length;
- has_udp = (sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
+ has_udp = (sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
if (!has_udp) {
/* RoCE v1 */
ether_type = ETH_P_IBOE;
*roce_mode = ROCE_V1;
- } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) {
+ } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) {
/* RoCE v2 IPv4 */
ip_ver = 4;
ether_type = ETH_P_IP;
@@ -316,7 +432,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
}
/* ENET + VLAN headers */
- ether_addr_copy(udh->eth.dmac_h, ah_attr->dmac);
+ ether_addr_copy(udh->eth.dmac_h, ah_attr->roce.dmac);
ether_addr_copy(udh->eth.smac_h, dev->ndev->dev_addr);
if (has_vlan) {
udh->eth.type = htons(ETH_P_8021Q);
@@ -343,20 +459,20 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
udh->grh.flow_label = grh->flow_label;
udh->grh.hop_limit = grh->hop_limit;
udh->grh.destination_gid = grh->dgid;
- memcpy(&udh->grh.source_gid.raw, &sgid.raw,
+ memcpy(&udh->grh.source_gid.raw, sgid_attr->gid.raw,
sizeof(udh->grh.source_gid.raw));
} else {
/* IPv4 header */
u32 ipv4_addr;
udh->ip4.protocol = IPPROTO_UDP;
- udh->ip4.tos = htonl(ah_attr->grh.flow_label);
+ udh->ip4.tos = htonl(grh->flow_label);
udh->ip4.frag_off = htons(IP_DF);
- udh->ip4.ttl = ah_attr->grh.hop_limit;
+ udh->ip4.ttl = grh->hop_limit;
- ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw);
+ ipv4_addr = qedr_get_ipv4_from_gid(sgid_attr->gid.raw);
udh->ip4.saddr = ipv4_addr;
- ipv4_addr = qedr_get_ipv4_from_gid(ah_attr->grh.dgid.raw);
+ ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw);
udh->ip4.daddr = ipv4_addr;
/* note: checksum is calculated by the device */
}
@@ -373,7 +489,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
static inline int qedr_gsi_build_packet(struct qedr_dev *dev,
struct qedr_qp *qp,
- struct ib_send_wr *swr,
+ const struct ib_send_wr *swr,
struct qed_roce_ll2_packet **p_packet)
{
u8 ud_header_buffer[QEDR_MAX_UD_HEADER_SIZE];
@@ -404,9 +520,9 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev,
}
if (ether_addr_equal(udh.eth.smac_h, udh.eth.dmac_h))
- packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW;
+ packet->tx_dest = QED_LL2_TX_DEST_LB;
else
- packet->tx_dest = QED_ROCE_LL2_TX_DEST_LB;
+ packet->tx_dest = QED_LL2_TX_DEST_NW;
packet->roce_mode = roce_mode;
memcpy(packet->header.vaddr, ud_header_buffer, header_size);
@@ -422,12 +538,11 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev,
return 0;
}
-int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
+int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
{
struct qed_roce_ll2_packet *pkt = NULL;
struct qedr_qp *qp = get_qedr_qp(ibqp);
- struct qed_roce_ll2_tx_params params;
struct qedr_dev *dev = qp->dev;
unsigned long flags;
int rc;
@@ -455,8 +570,6 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto err;
}
- memset(&params, 0, sizeof(params));
-
spin_lock_irqsave(&qp->q_lock, flags);
rc = qedr_gsi_build_packet(dev, qp, wr, &pkt);
@@ -465,25 +578,15 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
goto err;
}
- rc = dev->ops->roce_ll2_tx(dev->cdev, pkt, &params);
+ rc = qedr_ll2_post_tx(dev, pkt);
+
if (!rc) {
qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
qedr_inc_sw_prod(&qp->sq);
DP_DEBUG(qp->dev, QEDR_MSG_GSI,
- "gsi post send: opcode=%d, in_irq=%ld, irqs_disabled=%d, wr_id=%llx\n",
- wr->opcode, in_irq(), irqs_disabled(), wr->wr_id);
+ "gsi post send: opcode=%d, wr_id=%llx\n", wr->opcode,
+ wr->wr_id);
} else {
- if (rc == QED_ROCE_TX_HEAD_FAILURE) {
- /* TX failed while posting header - release resources */
- dma_free_coherent(&dev->pdev->dev, pkt->header.len,
- pkt->header.vaddr, pkt->header.baddr);
- kfree(pkt);
- } else if (rc == QED_ROCE_TX_FRAG_FAILURE) {
- /* NTD since TX failed while posting a fragment. We will
- * release the resources on TX callback
- */
- }
-
DP_ERR(dev, "gsi post send: failed to transmit (rc=%d)\n", rc);
rc = -EAGAIN;
*bad_wr = wr;
@@ -505,15 +608,13 @@ err:
return rc;
}
-int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+int qedr_gsi_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
{
struct qedr_dev *dev = get_qedr_dev(ibqp->device);
struct qedr_qp *qp = get_qedr_qp(ibqp);
- struct qed_roce_ll2_buffer buf;
unsigned long flags;
- int status = 0;
- int rc;
+ int rc = 0;
if ((qp->state != QED_ROCE_QP_STATE_RTR) &&
(qp->state != QED_ROCE_QP_STATE_RTS)) {
@@ -524,8 +625,6 @@ int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
return -EINVAL;
}
- memset(&buf, 0, sizeof(buf));
-
spin_lock_irqsave(&qp->q_lock, flags);
while (wr) {
@@ -536,10 +635,12 @@ int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
goto err;
}
- buf.baddr = wr->sg_list[0].addr;
- buf.len = wr->sg_list[0].length;
-
- rc = dev->ops->roce_ll2_post_rx_buffer(dev->cdev, &buf, 0, 1);
+ rc = dev->ops->ll2_post_rx_buffer(dev->rdma_ctx,
+ dev->gsi_ll2_handle,
+ wr->sg_list[0].addr,
+ wr->sg_list[0].length,
+ NULL /* cookie */,
+ 1 /* notify_fw */);
if (rc) {
DP_ERR(dev,
"gsi post recv: failed to post rx buffer (rc=%d)\n",
@@ -559,7 +660,7 @@ int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_unlock_irqrestore(&qp->q_lock, flags);
- return status;
+ return rc;
err:
spin_unlock_irqrestore(&qp->q_lock, flags);
*bad_wr = wr;
@@ -572,6 +673,7 @@ int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
struct qedr_cq *cq = get_qedr_cq(ibcq);
struct qedr_qp *qp = dev->gsi_qp;
unsigned long flags;
+ u16 vlan_id;
int i = 0;
spin_lock_irqsave(&cq->cq_lock, flags);
@@ -590,9 +692,14 @@ int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
wc[i].wc_flags |= IB_WC_GRH | IB_WC_IP_CSUM_OK;
ether_addr_copy(wc[i].smac, qp->rqe_wr_id[qp->rq.cons].smac);
wc[i].wc_flags |= IB_WC_WITH_SMAC;
- if (qp->rqe_wr_id[qp->rq.cons].vlan_id) {
+
+ vlan_id = qp->rqe_wr_id[qp->rq.cons].vlan &
+ VLAN_VID_MASK;
+ if (vlan_id) {
wc[i].wc_flags |= IB_WC_WITH_VLAN;
- wc[i].vlan_id = qp->rqe_wr_id[qp->rq.cons].vlan_id;
+ wc[i].vlan_id = vlan_id;
+ wc[i].sl = (qp->rqe_wr_id[qp->rq.cons].vlan &
+ VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
}
qedr_inc_sw_cons(&qp->rq);
diff --git a/drivers/infiniband/hw/qedr/qedr_cm.h b/drivers/infiniband/hw/qedr/qedr_roce_cm.h
index 78efb1b056d1..f3432f035ec6 100644
--- a/drivers/infiniband/hw/qedr/qedr_cm.h
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.h
@@ -39,20 +39,19 @@
#define QEDR_ROCE_V2_UDP_SPORT (0000)
-static inline u32 qedr_get_ipv4_from_gid(u8 *gid)
+static inline u32 qedr_get_ipv4_from_gid(const u8 *gid)
{
return *(u32 *)(void *)&gid[12];
}
/* RDMA CM */
int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
-int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr);
-int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr);
-struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev,
- struct ib_qp_init_attr *attrs,
- struct qedr_qp *qp);
+int qedr_gsi_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr);
+int qedr_create_gsi_qp(struct qedr_dev *dev, struct ib_qp_init_attr *attrs,
+ struct qedr_qp *qp);
void qedr_store_gsi_qp_cq(struct qedr_dev *dev,
struct qedr_qp *qp, struct ib_qp_init_attr *attrs);
int qedr_destroy_gsi_qp(struct qedr_dev *dev);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index bf035a29ab5b..ab9bf0922979 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -42,73 +42,68 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
+#include <rdma/uverbs_ioctl.h>
-#include "qedr_hsi.h"
+#include <linux/qed/common_hsi.h>
+#include "qedr_hsi_rdma.h"
#include <linux/qed/qed_if.h>
#include "qedr.h"
#include "verbs.h"
#include <rdma/qedr-abi.h>
-#include "qedr_cm.h"
+#include "qedr_roce_cm.h"
+#include "qedr_iw_cm.h"
+
+#define QEDR_SRQ_WQE_ELEM_SIZE sizeof(union rdma_srq_elm)
+#define RDMA_MAX_SGE_PER_SRQ (4)
+#define RDMA_MAX_SRQ_WQE_SIZE (RDMA_MAX_SGE_PER_SRQ + 1)
#define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
-int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+enum {
+ QEDR_USER_MMAP_IO_WC = 0,
+ QEDR_USER_MMAP_PHYS_PAGE,
+};
+
+static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
+ size_t len)
{
- if (index > QEDR_ROCE_PKEY_TABLE_LEN)
+ size_t min_len = min_t(size_t, len, udata->outlen);
+
+ return ib_copy_to_udata(udata, src, min_len);
+}
+
+int qedr_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
+{
+ if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
return -EINVAL;
*pkey = QEDR_ROCE_PKEY_DEFAULT;
return 0;
}
-int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
- union ib_gid *sgid)
+int qedr_iw_query_gid(struct ib_device *ibdev, u32 port,
+ int index, union ib_gid *sgid)
{
struct qedr_dev *dev = get_qedr_dev(ibdev);
- int rc = 0;
- if (!rdma_cap_roce_gid_table(ibdev, port))
- return -ENODEV;
+ memset(sgid->raw, 0, sizeof(sgid->raw));
+ ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
- rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
- if (rc == -EAGAIN) {
- memcpy(sgid, &zgid, sizeof(*sgid));
- return 0;
- }
-
- DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index,
+ DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
sgid->global.interface_id, sgid->global.subnet_prefix);
- return rc;
-}
-
-int qedr_add_gid(struct ib_device *device, u8 port_num,
- unsigned int index, const union ib_gid *gid,
- const struct ib_gid_attr *attr, void **context)
-{
- if (!rdma_cap_roce_gid_table(device, port_num))
- return -EINVAL;
-
- if (port_num > QEDR_MAX_PORT)
- return -EINVAL;
-
- if (!context)
- return -EINVAL;
-
return 0;
}
-int qedr_del_gid(struct ib_device *device, u8 port_num,
- unsigned int index, void **context)
+int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
{
- if (!rdma_cap_roce_gid_table(device, port_num))
- return -EINVAL;
-
- if (port_num > QEDR_MAX_PORT)
- return -EINVAL;
+ struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
+ struct qedr_device_attr *qattr = &dev->attr;
+ struct qedr_srq *srq = get_qedr_srq(ibsrq);
- if (!context)
- return -EINVAL;
+ srq_attr->srq_limit = srq->srq_limit;
+ srq_attr->max_wr = qattr->max_srq_wr;
+ srq_attr->max_sge = qattr->max_sge;
return 0;
}
@@ -139,9 +134,13 @@ int qedr_query_device(struct ib_device *ibdev,
attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
IB_DEVICE_RC_RNR_NAK_GEN |
- IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
+ IB_DEVICE_MEM_MGT_EXTENSIONS;
+ attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
- attr->max_sge = qattr->max_sge;
+ if (!rdma_protocol_iwarp(&dev->ibdev, 1))
+ attr->device_cap_flags |= IB_DEVICE_XRC;
+ attr->max_send_sge = qattr->max_sge;
+ attr->max_recv_sge = qattr->max_sge;
attr->max_sge_rd = qattr->max_sge;
attr->max_cq = qattr->max_cq;
attr->max_cqe = qattr->max_cqe;
@@ -149,8 +148,6 @@ int qedr_query_device(struct ib_device *ibdev,
attr->max_mw = qattr->max_mw;
attr->max_pd = qattr->max_pd;
attr->atomic_cap = dev->atomic_cap;
- attr->max_fmr = qattr->max_fmr;
- attr->max_map_per_fmr = 16;
attr->max_qp_init_rd_atom =
1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
attr->max_qp_rd_atom =
@@ -163,74 +160,64 @@ int qedr_query_device(struct ib_device *ibdev,
attr->local_ca_ack_delay = qattr->dev_ack_delay;
attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
- attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
+ attr->max_pkeys = qattr->max_pkey;
attr->max_ah = qattr->max_ah;
return 0;
}
-#define QEDR_SPEED_SDR (1)
-#define QEDR_SPEED_DDR (2)
-#define QEDR_SPEED_QDR (4)
-#define QEDR_SPEED_FDR10 (8)
-#define QEDR_SPEED_FDR (16)
-#define QEDR_SPEED_EDR (32)
-
-static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
+static inline void get_link_speed_and_width(int speed, u16 *ib_speed,
u8 *ib_width)
{
switch (speed) {
case 1000:
- *ib_speed = QEDR_SPEED_SDR;
+ *ib_speed = IB_SPEED_SDR;
*ib_width = IB_WIDTH_1X;
break;
case 10000:
- *ib_speed = QEDR_SPEED_QDR;
+ *ib_speed = IB_SPEED_QDR;
*ib_width = IB_WIDTH_1X;
break;
case 20000:
- *ib_speed = QEDR_SPEED_DDR;
+ *ib_speed = IB_SPEED_DDR;
*ib_width = IB_WIDTH_4X;
break;
case 25000:
- *ib_speed = QEDR_SPEED_EDR;
+ *ib_speed = IB_SPEED_EDR;
*ib_width = IB_WIDTH_1X;
break;
case 40000:
- *ib_speed = QEDR_SPEED_QDR;
+ *ib_speed = IB_SPEED_QDR;
*ib_width = IB_WIDTH_4X;
break;
case 50000:
- *ib_speed = QEDR_SPEED_QDR;
- *ib_width = IB_WIDTH_4X;
+ *ib_speed = IB_SPEED_HDR;
+ *ib_width = IB_WIDTH_1X;
break;
case 100000:
- *ib_speed = QEDR_SPEED_EDR;
+ *ib_speed = IB_SPEED_EDR;
*ib_width = IB_WIDTH_4X;
break;
default:
/* Unsupported */
- *ib_speed = QEDR_SPEED_SDR;
+ *ib_speed = IB_SPEED_SDR;
*ib_width = IB_WIDTH_1X;
}
}
-int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
+int qedr_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *attr)
{
struct qedr_dev *dev;
struct qed_rdma_port *rdma_port;
dev = get_qedr_dev(ibdev);
- if (port > 1) {
- DP_ERR(dev, "invalid_port=0x%x\n", port);
- return -EINVAL;
- }
if (!dev->rdma_ctx) {
DP_ERR(dev, "rdma_ctx is NULL\n");
@@ -238,24 +225,29 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
}
rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
- memset(attr, 0, sizeof(*attr));
+ /* *attr being zeroed by the caller, avoid zeroing it here */
if (rdma_port->port_state == QED_RDMA_PORT_UP) {
attr->state = IB_PORT_ACTIVE;
- attr->phys_state = 5;
+ attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
} else {
attr->state = IB_PORT_DOWN;
- attr->phys_state = 3;
+ attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
}
attr->max_mtu = IB_MTU_4096;
- attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
attr->lid = 0;
attr->lmc = 0;
attr->sm_lid = 0;
attr->sm_sl = 0;
- attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
- attr->gid_tbl_len = QEDR_MAX_SGID;
- attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
+ attr->ip_gids = true;
+ if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ attr->active_mtu = iboe_get_mtu(dev->iwarp_max_mtu);
+ attr->gid_tbl_len = 1;
+ } else {
+ attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
+ attr->gid_tbl_len = QEDR_MAX_SGID;
+ attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
+ }
attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
attr->qkey_viol_cntr = 0;
get_link_speed_and_width(rdma_port->link_speed,
@@ -266,108 +258,82 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
return 0;
}
-int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
- struct ib_port_modify *props)
-{
- struct qedr_dev *dev;
-
- dev = get_qedr_dev(ibdev);
- if (port > 1) {
- DP_ERR(dev, "invalid_port=0x%x\n", port);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
- unsigned long len)
-{
- struct qedr_mm *mm;
-
- mm = kzalloc(sizeof(*mm), GFP_KERNEL);
- if (!mm)
- return -ENOMEM;
-
- mm->key.phy_addr = phy_addr;
- /* This function might be called with a length which is not a multiple
- * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
- * forces this granularity by increasing the requested size if needed.
- * When qedr_mmap is called, it will search the list with the updated
- * length as a key. To prevent search failures, the length is rounded up
- * in advance to PAGE_SIZE.
- */
- mm->key.len = roundup(len, PAGE_SIZE);
- INIT_LIST_HEAD(&mm->entry);
-
- mutex_lock(&uctx->mm_list_lock);
- list_add(&mm->entry, &uctx->mm_head);
- mutex_unlock(&uctx->mm_list_lock);
-
- DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
- "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
- (unsigned long long)mm->key.phy_addr,
- (unsigned long)mm->key.len, uctx);
-
- return 0;
-}
-
-static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
- unsigned long len)
-{
- bool found = false;
- struct qedr_mm *mm;
-
- mutex_lock(&uctx->mm_list_lock);
- list_for_each_entry(mm, &uctx->mm_head, entry) {
- if (len != mm->key.len || phy_addr != mm->key.phy_addr)
- continue;
-
- found = true;
- break;
- }
- mutex_unlock(&uctx->mm_list_lock);
- DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
- "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
- mm->key.phy_addr, mm->key.len, uctx, found);
-
- return found;
-}
-
-struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
- struct ib_udata *udata)
+int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
{
+ struct ib_device *ibdev = uctx->device;
int rc;
- struct qedr_ucontext *ctx;
- struct qedr_alloc_ucontext_resp uresp;
+ struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
+ struct qedr_alloc_ucontext_resp uresp = {};
+ struct qedr_alloc_ucontext_req ureq = {};
struct qedr_dev *dev = get_qedr_dev(ibdev);
struct qed_rdma_add_user_out_params oparams;
+ struct qedr_user_mmap_entry *entry;
if (!udata)
- return ERR_PTR(-EFAULT);
+ return -EFAULT;
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return ERR_PTR(-ENOMEM);
+ if (udata->inlen) {
+ rc = ib_copy_from_udata(&ureq, udata,
+ min(sizeof(ureq), udata->inlen));
+ if (rc) {
+ DP_ERR(dev, "Problem copying data from user space\n");
+ return -EFAULT;
+ }
+ ctx->edpm_mode = !!(ureq.context_flags &
+ QEDR_ALLOC_UCTX_EDPM_MODE);
+ ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC);
+ }
rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
if (rc) {
DP_ERR(dev,
"failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
rc);
- goto err;
+ return rc;
}
ctx->dpi = oparams.dpi;
ctx->dpi_addr = oparams.dpi_addr;
ctx->dpi_phys_addr = oparams.dpi_phys_addr;
ctx->dpi_size = oparams.dpi_size;
- INIT_LIST_HEAD(&ctx->mm_head);
- mutex_init(&ctx->mm_list_lock);
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ rc = -ENOMEM;
+ goto err;
+ }
- memset(&uresp, 0, sizeof(uresp));
+ entry->io_address = ctx->dpi_phys_addr;
+ entry->length = ctx->dpi_size;
+ entry->mmap_flag = QEDR_USER_MMAP_IO_WC;
+ entry->dpi = ctx->dpi;
+ entry->dev = dev;
+ rc = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry,
+ ctx->dpi_size);
+ if (rc) {
+ kfree(entry);
+ goto err;
+ }
+ ctx->db_mmap_entry = &entry->rdma_entry;
+
+ if (!dev->user_dpm_enabled)
+ uresp.dpm_flags = 0;
+ else if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY;
+ else
+ uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED |
+ QEDR_DPM_TYPE_ROCE_LEGACY |
+ QEDR_DPM_TYPE_ROCE_EDPM_MODE;
+
+ if (ureq.context_flags & QEDR_SUPPORT_DPM_SIZES) {
+ uresp.dpm_flags |= QEDR_DPM_SIZES_SET;
+ uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE;
+ uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE;
+ uresp.edpm_limit_size = QEDR_EDPM_MAX_SIZE;
+ }
- uresp.db_pa = ctx->dpi_phys_addr;
+ uresp.wids_enabled = 1;
+ uresp.wid_count = oparams.wid_count;
+ uresp.db_pa = rdma_user_mmap_get_offset(ctx->db_mmap_entry);
uresp.db_size = ctx->dpi_size;
uresp.max_send_wr = dev->attr.max_sqe;
uresp.max_recv_wr = dev->attr.max_rqe;
@@ -377,153 +343,166 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
uresp.max_cqes = QEDR_MAX_CQES;
- rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (rc)
goto err;
ctx->dev = dev;
- rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
- if (rc)
- goto err;
-
DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
&ctx->ibucontext);
- return &ctx->ibucontext;
+ return 0;
err:
- kfree(ctx);
- return ERR_PTR(rc);
+ if (!ctx->db_mmap_entry)
+ dev->ops->rdma_remove_user(dev->rdma_ctx, ctx->dpi);
+ else
+ rdma_user_mmap_entry_remove(ctx->db_mmap_entry);
+
+ return rc;
}
-int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
+void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
{
struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
- struct qedr_mm *mm, *tmp;
- int status = 0;
DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
uctx);
- uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
- list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
- DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
- "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
- mm->key.phy_addr, mm->key.len, uctx);
- list_del(&mm->entry);
- kfree(mm);
- }
+ rdma_user_mmap_entry_remove(uctx->db_mmap_entry);
+}
- kfree(uctx);
- return status;
+void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
+{
+ struct qedr_user_mmap_entry *entry = get_qedr_mmap_entry(rdma_entry);
+ struct qedr_dev *dev = entry->dev;
+
+ if (entry->mmap_flag == QEDR_USER_MMAP_PHYS_PAGE)
+ free_page((unsigned long)entry->address);
+ else if (entry->mmap_flag == QEDR_USER_MMAP_IO_WC)
+ dev->ops->rdma_remove_user(dev->rdma_ctx, entry->dpi);
+
+ kfree(entry);
}
-int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma)
{
- struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
- struct qedr_dev *dev = get_qedr_dev(context->device);
- unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
- u64 unmapped_db = dev->db_phys_addr;
- unsigned long len = (vma->vm_end - vma->vm_start);
+ struct ib_device *dev = ucontext->device;
+ size_t length = vma->vm_end - vma->vm_start;
+ struct rdma_user_mmap_entry *rdma_entry;
+ struct qedr_user_mmap_entry *entry;
int rc = 0;
- bool found;
+ u64 pfn;
- DP_DEBUG(dev, QEDR_MSG_INIT,
- "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
- vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
- if (vma->vm_start & (PAGE_SIZE - 1)) {
- DP_ERR(dev, "Vma_start not page aligned = %ld\n",
- vma->vm_start);
- return -EINVAL;
- }
+ ibdev_dbg(dev,
+ "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
+ vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
- found = qedr_search_mmap(ucontext, vm_page, len);
- if (!found) {
- DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
- vma->vm_pgoff);
+ rdma_entry = rdma_user_mmap_entry_get(ucontext, vma);
+ if (!rdma_entry) {
+ ibdev_dbg(dev, "pgoff[%#lx] does not have valid entry\n",
+ vma->vm_pgoff);
return -EINVAL;
}
+ entry = get_qedr_mmap_entry(rdma_entry);
+ ibdev_dbg(dev,
+ "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
+ entry->io_address, length, entry->mmap_flag);
+
+ switch (entry->mmap_flag) {
+ case QEDR_USER_MMAP_IO_WC:
+ pfn = entry->io_address >> PAGE_SHIFT;
+ rc = rdma_user_mmap_io(ucontext, vma, pfn, length,
+ pgprot_writecombine(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ case QEDR_USER_MMAP_PHYS_PAGE:
+ rc = vm_insert_page(vma, vma->vm_start,
+ virt_to_page(entry->address));
+ break;
+ default:
+ rc = -EINVAL;
+ }
- DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
-
- if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
- dev->db_size))) {
- DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
- if (vma->vm_flags & VM_READ) {
- DP_ERR(dev, "Trying to map doorbell bar for read\n");
- return -EPERM;
- }
-
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ if (rc)
+ ibdev_dbg(dev,
+ "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
+ entry->io_address, length, entry->mmap_flag, rc);
- rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- PAGE_SIZE, vma->vm_page_prot);
- } else {
- DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
- rc = remap_pfn_range(vma, vma->vm_start,
- vma->vm_pgoff, len, vma->vm_page_prot);
- }
- DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
+ rdma_user_mmap_entry_put(rdma_entry);
return rc;
}
-struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
- struct ib_ucontext *context, struct ib_udata *udata)
+int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
+ struct ib_device *ibdev = ibpd->device;
struct qedr_dev *dev = get_qedr_dev(ibdev);
- struct qedr_ucontext *uctx = NULL;
- struct qedr_alloc_pd_uresp uresp;
- struct qedr_pd *pd;
+ struct qedr_pd *pd = get_qedr_pd(ibpd);
u16 pd_id;
int rc;
DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
- (udata && context) ? "User Lib" : "Kernel");
+ udata ? "User Lib" : "Kernel");
if (!dev->rdma_ctx) {
- DP_ERR(dev, "invlaid RDMA context\n");
- return ERR_PTR(-EINVAL);
+ DP_ERR(dev, "invalid RDMA context\n");
+ return -EINVAL;
}
- pd = kzalloc(sizeof(*pd), GFP_KERNEL);
- if (!pd)
- return ERR_PTR(-ENOMEM);
-
- dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
+ rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
+ if (rc)
+ return rc;
- uresp.pd_id = pd_id;
pd->pd_id = pd_id;
- if (udata && context) {
- rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
- if (rc)
+ if (udata) {
+ struct qedr_alloc_pd_uresp uresp = {
+ .pd_id = pd_id,
+ };
+ struct qedr_ucontext *context = rdma_udata_to_drv_context(
+ udata, struct qedr_ucontext, ibucontext);
+
+ rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (rc) {
DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
- uctx = get_qedr_ucontext(context);
- uctx->pd = pd;
- pd->uctx = uctx;
+ dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
+ return rc;
+ }
+
+ pd->uctx = context;
+ pd->uctx->pd = pd;
}
- return &pd->ibpd;
+ return 0;
}
-int qedr_dealloc_pd(struct ib_pd *ibpd)
+int qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct qedr_dev *dev = get_qedr_dev(ibpd->device);
struct qedr_pd *pd = get_qedr_pd(ibpd);
- if (!pd) {
- pr_err("Invalid PD received in dealloc_pd\n");
- return -EINVAL;
- }
-
DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
+ return 0;
+}
- kfree(pd);
- return 0;
+int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
+{
+ struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
+ struct qedr_xrcd *xrcd = get_qedr_xrcd(ibxrcd);
+
+ return dev->ops->rdma_alloc_xrcd(dev->rdma_ctx, &xrcd->xrcd_id);
}
+int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata)
+{
+ struct qedr_dev *dev = get_qedr_dev(ibxrcd->device);
+ u16 xrcd_id = get_qedr_xrcd(ibxrcd)->xrcd_id;
+
+ dev->ops->rdma_dealloc_xrcd(dev->rdma_ctx, xrcd_id);
+ return 0;
+}
static void qedr_free_pbl(struct qedr_dev *dev,
struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
{
@@ -563,12 +542,11 @@ static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
return ERR_PTR(-ENOMEM);
for (i = 0; i < pbl_info->num_pbls; i++) {
- va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size,
- &pa, flags);
+ va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, &pa,
+ flags);
if (!va)
goto err;
- memset(va, 0, pbl_info->pbl_size);
pbl_table[i].va = va;
pbl_table[i].pa = pa;
}
@@ -642,14 +620,12 @@ static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
struct qedr_pbl *pbl,
- struct qedr_pbl_info *pbl_info)
+ struct qedr_pbl_info *pbl_info, u32 pg_shift)
{
- int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
+ int pbe_cnt, total_num_pbes = 0;
struct qedr_pbl *pbl_tbl;
- struct scatterlist *sg;
+ struct ib_block_iter biter;
struct regpair *pbe;
- int entry;
- u32 addr;
if (!pbl_info->num_pbes)
return;
@@ -670,48 +646,75 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
pbe_cnt = 0;
- shift = ilog2(umem->page_size);
-
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
- pages = sg_dma_len(sg) >> shift;
- for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
- /* store the page address in pbe */
- pbe->lo = cpu_to_le32(sg_dma_address(sg) +
- umem->page_size * pg_cnt);
- addr = upper_32_bits(sg_dma_address(sg) +
- umem->page_size * pg_cnt);
- pbe->hi = cpu_to_le32(addr);
- pbe_cnt++;
- total_num_pbes++;
- pbe++;
-
- if (total_num_pbes == pbl_info->num_pbes)
- return;
-
- /* If the given pbl is full storing the pbes,
- * move to next pbl.
- */
- if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
- pbl_tbl++;
- pbe = (struct regpair *)pbl_tbl->va;
- pbe_cnt = 0;
- }
+ rdma_umem_for_each_dma_block (umem, &biter, BIT(pg_shift)) {
+ u64 pg_addr = rdma_block_iter_dma_address(&biter);
+
+ pbe->lo = cpu_to_le32(pg_addr);
+ pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
+
+ pbe_cnt++;
+ total_num_pbes++;
+ pbe++;
+
+ if (total_num_pbes == pbl_info->num_pbes)
+ return;
+
+ /* If the given pbl is full storing the pbes, move to next pbl.
+ */
+ if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct regpair *)pbl_tbl->va;
+ pbe_cnt = 0;
}
}
}
+static int qedr_db_recovery_add(struct qedr_dev *dev,
+ void __iomem *db_addr,
+ void *db_data,
+ enum qed_db_rec_width db_width,
+ enum qed_db_rec_space db_space)
+{
+ if (!db_data) {
+ DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
+ return 0;
+ }
+
+ return dev->ops->common->db_recovery_add(dev->cdev, db_addr, db_data,
+ db_width, db_space);
+}
+
+static void qedr_db_recovery_del(struct qedr_dev *dev,
+ void __iomem *db_addr,
+ void *db_data)
+{
+ if (!db_data) {
+ DP_DEBUG(dev, QEDR_MSG_INIT, "avoiding db rec since old lib\n");
+ return;
+ }
+
+ /* Ignore return code as there is not much we can do about it. Error
+ * log will be printed inside.
+ */
+ dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data);
+}
+
static int qedr_copy_cq_uresp(struct qedr_dev *dev,
- struct qedr_cq *cq, struct ib_udata *udata)
+ struct qedr_cq *cq, struct ib_udata *udata,
+ u32 db_offset)
{
struct qedr_create_cq_uresp uresp;
int rc;
memset(&uresp, 0, sizeof(uresp));
- uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
+ uresp.db_offset = db_offset;
uresp.icid = cq->icid;
+ if (cq->q.db_mmap_entry)
+ uresp.db_rec_addr =
+ rdma_user_mmap_get_offset(cq->q.db_mmap_entry);
- rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
if (rc)
DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
@@ -737,39 +740,99 @@ static inline int qedr_align_cq_entries(int entries)
return aligned_size / QEDR_CQE_SIZE;
}
-static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
+static int qedr_init_user_db_rec(struct ib_udata *udata,
+ struct qedr_dev *dev, struct qedr_userq *q,
+ bool requires_db_rec)
+{
+ struct qedr_ucontext *uctx =
+ rdma_udata_to_drv_context(udata, struct qedr_ucontext,
+ ibucontext);
+ struct qedr_user_mmap_entry *entry;
+ int rc;
+
+ /* Aborting for non doorbell userqueue (SRQ) or non-supporting lib */
+ if (requires_db_rec == 0 || !uctx->db_rec)
+ return 0;
+
+ /* Allocate a page for doorbell recovery, add to mmap */
+ q->db_rec_data = (void *)get_zeroed_page(GFP_USER);
+ if (!q->db_rec_data) {
+ DP_ERR(dev, "get_zeroed_page failed\n");
+ return -ENOMEM;
+ }
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ goto err_free_db_data;
+
+ entry->address = q->db_rec_data;
+ entry->length = PAGE_SIZE;
+ entry->mmap_flag = QEDR_USER_MMAP_PHYS_PAGE;
+ rc = rdma_user_mmap_entry_insert(&uctx->ibucontext,
+ &entry->rdma_entry,
+ PAGE_SIZE);
+ if (rc)
+ goto err_free_entry;
+
+ q->db_mmap_entry = &entry->rdma_entry;
+
+ return 0;
+
+err_free_entry:
+ kfree(entry);
+
+err_free_db_data:
+ free_page((unsigned long)q->db_rec_data);
+ q->db_rec_data = NULL;
+ return -ENOMEM;
+}
+
+static inline int qedr_init_user_queue(struct ib_udata *udata,
struct qedr_dev *dev,
- struct qedr_userq *q,
- u64 buf_addr, size_t buf_len,
- int access, int dmasync)
+ struct qedr_userq *q, u64 buf_addr,
+ size_t buf_len, bool requires_db_rec,
+ int access,
+ int alloc_and_init)
{
- int page_cnt;
+ u32 fw_pages;
int rc;
q->buf_addr = buf_addr;
q->buf_len = buf_len;
- q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
+ q->umem = ib_umem_get(&dev->ibdev, q->buf_addr, q->buf_len, access);
if (IS_ERR(q->umem)) {
DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
PTR_ERR(q->umem));
return PTR_ERR(q->umem);
}
- page_cnt = ib_umem_page_count(q->umem);
- rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, 0);
+ fw_pages = ib_umem_num_dma_blocks(q->umem, 1 << FW_PAGE_SHIFT);
+ rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
if (rc)
goto err0;
- q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
- if (IS_ERR_OR_NULL(q->pbl_tbl))
- goto err0;
-
- qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info);
+ if (alloc_and_init) {
+ q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
+ if (IS_ERR(q->pbl_tbl)) {
+ rc = PTR_ERR(q->pbl_tbl);
+ goto err0;
+ }
+ qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
+ FW_PAGE_SHIFT);
+ } else {
+ q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
+ if (!q->pbl_tbl) {
+ rc = -ENOMEM;
+ goto err0;
+ }
+ }
- return 0;
+ /* mmap the user address used to store doorbell data for recovery */
+ return qedr_init_user_db_rec(udata, dev, q, requires_db_rec);
err0:
ib_umem_release(q->umem);
+ q->umem = NULL;
return rc;
}
@@ -795,20 +858,26 @@ static inline void qedr_init_cq_params(struct qedr_cq *cq,
static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
{
- /* Flush data before signalling doorbell */
- wmb();
cq->db.data.agg_flags = flags;
cq->db.data.value = cpu_to_le32(cons);
writeq(cq->db.raw, cq->db_addr);
-
- /* Make sure write would stick */
- mmiowb();
}
int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct qedr_cq *cq = get_qedr_cq(ibcq);
unsigned long sflags;
+ struct qedr_dev *dev;
+
+ dev = get_qedr_dev(ibcq->device);
+
+ if (cq->destroyed) {
+ DP_ERR(dev,
+ "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
+ cq, cq->icid);
+ return -EINVAL;
+ }
+
if (cq->cq_type == QEDR_CQ_TYPE_GSI)
return 0;
@@ -830,20 +899,29 @@ int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
return 0;
}
-struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *ib_ctx, struct ib_udata *udata)
+int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs)
{
- struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct ib_device *ibdev = ibcq->device;
+ struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
+ udata, struct qedr_ucontext, ibucontext);
struct qed_rdma_destroy_cq_out_params destroy_oparams;
struct qed_rdma_destroy_cq_in_params destroy_iparams;
+ struct qed_chain_init_params chain_params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .intended_use = QED_CHAIN_USE_TO_CONSUME,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U32,
+ .elem_size = sizeof(union rdma_cqe),
+ };
struct qedr_dev *dev = get_qedr_dev(ibdev);
struct qed_rdma_create_cq_in_params params;
- struct qedr_create_cq_ureq ureq;
+ struct qedr_create_cq_ureq ureq = {};
int vector = attr->comp_vector;
int entries = attr->cqe;
- struct qedr_cq *cq;
+ struct qedr_cq *cq = get_qedr_cq(ibcq);
int chain_entries;
+ u32 db_offset;
int page_cnt;
u64 pbl_ptr;
u16 icid;
@@ -853,23 +931,26 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
"create_cq: called from %s. entries=%d, vector=%d\n",
udata ? "User Lib" : "Kernel", entries, vector);
+ if (attr->flags)
+ return -EOPNOTSUPP;
+
if (entries > QEDR_MAX_CQES) {
DP_ERR(dev,
"create cq: the number of entries %d is too high. Must be equal or below %d.\n",
entries, QEDR_MAX_CQES);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
chain_entries = qedr_align_cq_entries(entries);
chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
+ chain_params.num_elems = chain_entries;
- cq = kzalloc(sizeof(*cq), GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
+ /* calc db offset. user will add DPI base, kernel will add db addr */
+ db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
if (udata) {
- memset(&ureq, 0, sizeof(ureq));
- if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
+ if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
+ udata->inlen))) {
DP_ERR(dev,
"create cq: problem copying data from user space\n");
goto err0;
@@ -883,8 +964,9 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
cq->cq_type = QEDR_CQ_TYPE_USER;
- rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
- ureq.len, IB_ACCESS_LOCAL_WRITE, 1);
+ rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
+ ureq.len, true, IB_ACCESS_LOCAL_WRITE,
+ 1);
if (rc)
goto err0;
@@ -892,18 +974,14 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
page_cnt = cq->q.pbl_info.num_pbes;
cq->ibcq.cqe = chain_entries;
+ cq->q.db_addr = ctx->dpi_addr + db_offset;
} else {
cq->cq_type = QEDR_CQ_TYPE_KERNEL;
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_CONSUME,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U32,
- chain_entries,
- sizeof(union rdma_cqe),
- &cq->pbl);
+ rc = dev->ops->common->chain_alloc(dev->cdev, &cq->pbl,
+ &chain_params);
if (rc)
- goto err1;
+ goto err0;
page_cnt = qed_chain_get_page_cnt(&cq->pbl);
pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
@@ -915,22 +993,29 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
if (rc)
- goto err2;
+ goto err1;
cq->icid = icid;
cq->sig = QEDR_CQ_MAGIC_NUMBER;
spin_lock_init(&cq->cq_lock);
- if (ib_ctx) {
- rc = qedr_copy_cq_uresp(dev, cq, udata);
+ if (udata) {
+ rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
if (rc)
- goto err3;
+ goto err2;
+
+ rc = qedr_db_recovery_add(dev, cq->q.db_addr,
+ &cq->q.db_rec_data->db_data,
+ DB_REC_WIDTH_64B,
+ DB_REC_USER);
+ if (rc)
+ goto err2;
+
} else {
/* Generate doorbell address. */
- cq->db_addr = dev->db_addr +
- DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
cq->db.data.icid = cq->icid;
- cq->db.data.params = DB_AGG_CMD_SET <<
+ cq->db_addr = dev->db_addr + db_offset;
+ cq->db.data.params = DB_AGG_CMD_MAX <<
RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
/* point to the very last element, passing it we will toggle */
@@ -939,69 +1024,102 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
cq->latest_cqe = NULL;
consume_cqe(cq);
cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
+
+ rc = qedr_db_recovery_add(dev, cq->db_addr, &cq->db.data,
+ DB_REC_WIDTH_64B, DB_REC_KERNEL);
+ if (rc)
+ goto err2;
}
DP_DEBUG(dev, QEDR_MSG_CQ,
"create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
cq->icid, cq, params.cq_size);
- return &cq->ibcq;
+ return 0;
-err3:
+err2:
destroy_iparams.icid = cq->icid;
dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
&destroy_oparams);
-err2:
- if (udata)
- qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
- else
- dev->ops->common->chain_free(dev->cdev, &cq->pbl);
err1:
- if (udata)
+ if (udata) {
+ qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
ib_umem_release(cq->q.umem);
+ if (cq->q.db_mmap_entry)
+ rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
+ } else {
+ dev->ops->common->chain_free(dev->cdev, &cq->pbl);
+ }
err0:
- kfree(cq);
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
}
-int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
-{
- struct qedr_dev *dev = get_qedr_dev(ibcq->device);
- struct qedr_cq *cq = get_qedr_cq(ibcq);
-
- DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
-
- return 0;
-}
+#define QEDR_DESTROY_CQ_MAX_ITERATIONS (10)
+#define QEDR_DESTROY_CQ_ITER_DURATION (10)
-int qedr_destroy_cq(struct ib_cq *ibcq)
+int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct qedr_dev *dev = get_qedr_dev(ibcq->device);
struct qed_rdma_destroy_cq_out_params oparams;
struct qed_rdma_destroy_cq_in_params iparams;
struct qedr_cq *cq = get_qedr_cq(ibcq);
+ int iter;
- DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq: cq_id %d", cq->icid);
+ DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
- /* GSIs CQs are handled by driver, so they don't exist in the FW */
- if (cq->cq_type != QEDR_CQ_TYPE_GSI) {
- int rc;
+ cq->destroyed = 1;
- iparams.icid = cq->icid;
- rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams,
- &oparams);
- if (rc)
- return rc;
- dev->ops->common->chain_free(dev->cdev, &cq->pbl);
+ /* GSIs CQs are handled by driver, so they don't exist in the FW */
+ if (cq->cq_type == QEDR_CQ_TYPE_GSI) {
+ qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
+ return 0;
}
- if (ibcq->uobject && ibcq->uobject->context) {
+ iparams.icid = cq->icid;
+ dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
+ dev->ops->common->chain_free(dev->cdev, &cq->pbl);
+
+ if (udata) {
qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
ib_umem_release(cq->q.umem);
+
+ if (cq->q.db_rec_data) {
+ qedr_db_recovery_del(dev, cq->q.db_addr,
+ &cq->q.db_rec_data->db_data);
+ rdma_user_mmap_entry_remove(cq->q.db_mmap_entry);
+ }
+ } else {
+ qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data);
+ }
+
+ /* We don't want the IRQ handler to handle a non-existing CQ so we
+ * wait until all CNQ interrupts, if any, are received. This will always
+ * happen and will always happen very fast. If not, then a serious error
+ * has occured. That is why we can use a long delay.
+ * We spin for a short time so we don’t lose time on context switching
+ * in case all the completions are handled in that span. Otherwise
+ * we sleep for a while and check again. Since the CNQ may be
+ * associated with (only) the current CPU we use msleep to allow the
+ * current CPU to be freed.
+ * The CNQ notification is increased in qedr_irq_handler().
+ */
+ iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
+ while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
+ udelay(QEDR_DESTROY_CQ_ITER_DURATION);
+ iter--;
}
- kfree(cq);
+ iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
+ while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
+ msleep(QEDR_DESTROY_CQ_ITER_DURATION);
+ iter--;
+ }
+ /* Note that we don't need to have explicit code to wait for the
+ * completion of the event handler because it is invoked from the EQ.
+ * Since the destroy CQ ramrod has also been received on the EQ we can
+ * be certain that there's no event handler in process.
+ */
return 0;
}
@@ -1011,58 +1129,52 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
struct qed_rdma_modify_qp_in_params
*qp_params)
{
+ const struct ib_gid_attr *gid_attr;
enum rdma_network_type nw_type;
- struct ib_gid_attr gid_attr;
- union ib_gid gid;
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
u32 ipv4_addr;
- int rc = 0;
+ int ret;
int i;
- rc = ib_get_cached_gid(ibqp->device, attr->ah_attr.port_num,
- attr->ah_attr.grh.sgid_index, &gid, &gid_attr);
- if (rc)
- return rc;
-
- if (!memcmp(&gid, &zgid, sizeof(gid)))
- return -ENOENT;
-
- if (gid_attr.ndev) {
- qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
-
- dev_put(gid_attr.ndev);
- nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
- switch (nw_type) {
- case RDMA_NETWORK_IPV6:
- memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
- sizeof(qp_params->sgid));
- memcpy(&qp_params->dgid.bytes[0],
- &attr->ah_attr.grh.dgid,
- sizeof(qp_params->dgid));
- qp_params->roce_mode = ROCE_V2_IPV6;
- SET_FIELD(qp_params->modify_flags,
- QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
- break;
- case RDMA_NETWORK_IB:
- memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
- sizeof(qp_params->sgid));
- memcpy(&qp_params->dgid.bytes[0],
- &attr->ah_attr.grh.dgid,
- sizeof(qp_params->dgid));
- qp_params->roce_mode = ROCE_V1;
- break;
- case RDMA_NETWORK_IPV4:
- memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
- memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
- ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
- qp_params->sgid.ipv4_addr = ipv4_addr;
- ipv4_addr =
- qedr_get_ipv4_from_gid(attr->ah_attr.grh.dgid.raw);
- qp_params->dgid.ipv4_addr = ipv4_addr;
- SET_FIELD(qp_params->modify_flags,
- QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
- qp_params->roce_mode = ROCE_V2_IPV4;
- break;
- }
+ gid_attr = grh->sgid_attr;
+ ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL);
+ if (ret)
+ return ret;
+
+ nw_type = rdma_gid_attr_network_type(gid_attr);
+ switch (nw_type) {
+ case RDMA_NETWORK_IPV6:
+ memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
+ sizeof(qp_params->sgid));
+ memcpy(&qp_params->dgid.bytes[0],
+ &grh->dgid,
+ sizeof(qp_params->dgid));
+ qp_params->roce_mode = ROCE_V2_IPV6;
+ SET_FIELD(qp_params->modify_flags,
+ QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
+ break;
+ case RDMA_NETWORK_ROCE_V1:
+ memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
+ sizeof(qp_params->sgid));
+ memcpy(&qp_params->dgid.bytes[0],
+ &grh->dgid,
+ sizeof(qp_params->dgid));
+ qp_params->roce_mode = ROCE_V1;
+ break;
+ case RDMA_NETWORK_IPV4:
+ memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
+ memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
+ ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
+ qp_params->sgid.ipv4_addr = ipv4_addr;
+ ipv4_addr =
+ qedr_get_ipv4_from_gid(grh->dgid.raw);
+ qp_params->dgid.ipv4_addr = ipv4_addr;
+ SET_FIELD(qp_params->modify_flags,
+ QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
+ qp_params->roce_mode = ROCE_V2_IPV4;
+ break;
+ default:
+ return -EINVAL;
}
for (i = 0; i < 4; i++) {
@@ -1077,16 +1189,20 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
}
static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
- struct ib_qp_init_attr *attrs)
+ struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
{
struct qedr_device_attr *qattr = &dev->attr;
/* QP0... attrs->qp_type == IB_QPT_GSI */
- if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
+ if (attrs->qp_type != IB_QPT_RC &&
+ attrs->qp_type != IB_QPT_GSI &&
+ attrs->qp_type != IB_QPT_XRC_INI &&
+ attrs->qp_type != IB_QPT_XRC_TGT) {
DP_DEBUG(dev, QEDR_MSG_QP,
"create qp: unsupported qp type=0x%x requested\n",
attrs->qp_type);
- return -EINVAL;
+ return -EOPNOTSUPP;
}
if (attrs->cap.max_send_wr > qattr->max_sqe) {
@@ -1117,45 +1233,96 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
return -EINVAL;
}
- /* Unprivileged user space cannot create special QP */
- if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
- DP_ERR(dev,
- "create qp: userspace can't create special QPs of type=0x%x\n",
- attrs->qp_type);
- return -EINVAL;
+ /* verify consumer QPs are not trying to use GSI QP's CQ.
+ * TGT QP isn't associated with RQ/SQ
+ */
+ if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created) &&
+ (attrs->qp_type != IB_QPT_XRC_TGT) &&
+ (attrs->qp_type != IB_QPT_XRC_INI)) {
+ struct qedr_cq *send_cq = get_qedr_cq(attrs->send_cq);
+ struct qedr_cq *recv_cq = get_qedr_cq(attrs->recv_cq);
+
+ if ((send_cq->cq_type == QEDR_CQ_TYPE_GSI) ||
+ (recv_cq->cq_type == QEDR_CQ_TYPE_GSI)) {
+ DP_ERR(dev,
+ "create qp: consumer QP cannot use GSI CQs.\n");
+ return -EINVAL;
+ }
}
return 0;
}
-static void qedr_copy_rq_uresp(struct qedr_create_qp_uresp *uresp,
+static int qedr_copy_srq_uresp(struct qedr_dev *dev,
+ struct qedr_srq *srq, struct ib_udata *udata)
+{
+ struct qedr_create_srq_uresp uresp = {};
+ int rc;
+
+ uresp.srq_id = srq->srq_id;
+
+ rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (rc)
+ DP_ERR(dev, "create srq: problem copying data to user space\n");
+
+ return rc;
+}
+
+static void qedr_copy_rq_uresp(struct qedr_dev *dev,
+ struct qedr_create_qp_uresp *uresp,
struct qedr_qp *qp)
{
- uresp->rq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
+ /* iWARP requires two doorbells per RQ. */
+ if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ uresp->rq_db_offset =
+ DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
+ uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
+ } else {
+ uresp->rq_db_offset =
+ DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
+ }
+
uresp->rq_icid = qp->icid;
+ if (qp->urq.db_mmap_entry)
+ uresp->rq_db_rec_addr =
+ rdma_user_mmap_get_offset(qp->urq.db_mmap_entry);
}
-static void qedr_copy_sq_uresp(struct qedr_create_qp_uresp *uresp,
+static void qedr_copy_sq_uresp(struct qedr_dev *dev,
+ struct qedr_create_qp_uresp *uresp,
struct qedr_qp *qp)
{
uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
- uresp->sq_icid = qp->icid + 1;
+
+ /* iWARP uses the same cid for rq and sq */
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ uresp->sq_icid = qp->icid;
+ else
+ uresp->sq_icid = qp->icid + 1;
+
+ if (qp->usq.db_mmap_entry)
+ uresp->sq_db_rec_addr =
+ rdma_user_mmap_get_offset(qp->usq.db_mmap_entry);
}
static int qedr_copy_qp_uresp(struct qedr_dev *dev,
- struct qedr_qp *qp, struct ib_udata *udata)
+ struct qedr_qp *qp, struct ib_udata *udata,
+ struct qedr_create_qp_uresp *uresp)
{
- struct qedr_create_qp_uresp uresp;
int rc;
- memset(&uresp, 0, sizeof(uresp));
- qedr_copy_sq_uresp(&uresp, qp);
- qedr_copy_rq_uresp(&uresp, qp);
+ memset(uresp, 0, sizeof(*uresp));
- uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
- uresp.qp_id = qp->qp_id;
+ if (qedr_qp_has_sq(qp))
+ qedr_copy_sq_uresp(dev, uresp, qp);
- rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (qedr_qp_has_rq(qp))
+ qedr_copy_rq_uresp(dev, uresp, qp);
+
+ uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
+ uresp->qp_id = qp->qp_id;
+
+ rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp));
if (rc)
DP_ERR(dev,
"create qp: failed a copy to user space with qp icid=0x%x.\n",
@@ -1164,26 +1331,57 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev,
return rc;
}
+static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
+{
+ qed_chain_reset(&qph->pbl);
+ qph->prod = 0;
+ qph->cons = 0;
+ qph->wqe_cons = 0;
+ qph->db_data.data.value = cpu_to_le16(0);
+}
+
static void qedr_set_common_qp_params(struct qedr_dev *dev,
struct qedr_qp *qp,
struct qedr_pd *pd,
struct ib_qp_init_attr *attrs)
{
spin_lock_init(&qp->q_lock);
+ if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ kref_init(&qp->refcnt);
+ init_completion(&qp->iwarp_cm_comp);
+ init_completion(&qp->qp_rel_comp);
+ }
+
qp->pd = pd;
qp->qp_type = attrs->qp_type;
qp->max_inline_data = attrs->cap.max_inline_data;
- qp->sq.max_sges = attrs->cap.max_send_sge;
qp->state = QED_ROCE_QP_STATE_RESET;
- qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
- qp->sq_cq = get_qedr_cq(attrs->send_cq);
- qp->rq_cq = get_qedr_cq(attrs->recv_cq);
+
+ qp->prev_wqe_size = 0;
+
+ qp->signaled = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
qp->dev = dev;
- qp->rq.max_sges = attrs->cap.max_recv_sge;
+ if (qedr_qp_has_sq(qp)) {
+ qedr_reset_qp_hwq_info(&qp->sq);
+ qp->sq.max_sges = attrs->cap.max_send_sge;
+ qp->sq_cq = get_qedr_cq(attrs->send_cq);
+ DP_DEBUG(dev, QEDR_MSG_QP,
+ "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
+ qp->sq.max_sges, qp->sq_cq->icid);
+ }
+
+ if (attrs->srq)
+ qp->srq = get_qedr_srq(attrs->srq);
+
+ if (qedr_qp_has_rq(qp)) {
+ qedr_reset_qp_hwq_info(&qp->rq);
+ qp->rq_cq = get_qedr_cq(attrs->recv_cq);
+ qp->rq.max_sges = attrs->cap.max_recv_sge;
+ DP_DEBUG(dev, QEDR_MSG_QP,
+ "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
+ qp->rq.max_sges, qp->rq_cq->icid);
+ }
- DP_DEBUG(dev, QEDR_MSG_QP,
- "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
- qp->rq.max_sges, qp->rq_cq->icid);
DP_DEBUG(dev, QEDR_MSG_QP,
"QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
pd->pd_id, qp->qp_type, qp->max_inline_data,
@@ -1193,14 +1391,338 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
qp->sq.max_sges, qp->sq_cq->icid);
}
-static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
+static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
{
- qp->sq.db = dev->db_addr +
- DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
- qp->sq.db_data.data.icid = qp->icid + 1;
- qp->rq.db = dev->db_addr +
- DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
- qp->rq.db_data.data.icid = qp->icid;
+ int rc = 0;
+
+ if (qedr_qp_has_sq(qp)) {
+ qp->sq.db = dev->db_addr +
+ DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
+ qp->sq.db_data.data.icid = qp->icid + 1;
+ rc = qedr_db_recovery_add(dev, qp->sq.db, &qp->sq.db_data,
+ DB_REC_WIDTH_32B, DB_REC_KERNEL);
+ if (rc)
+ return rc;
+ }
+
+ if (qedr_qp_has_rq(qp)) {
+ qp->rq.db = dev->db_addr +
+ DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
+ qp->rq.db_data.data.icid = qp->icid;
+ rc = qedr_db_recovery_add(dev, qp->rq.db, &qp->rq.db_data,
+ DB_REC_WIDTH_32B, DB_REC_KERNEL);
+ if (rc && qedr_qp_has_sq(qp))
+ qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
+ }
+
+ return rc;
+}
+
+static int qedr_check_srq_params(struct qedr_dev *dev,
+ struct ib_srq_init_attr *attrs,
+ struct ib_udata *udata)
+{
+ struct qedr_device_attr *qattr = &dev->attr;
+
+ if (attrs->attr.max_wr > qattr->max_srq_wr) {
+ DP_ERR(dev,
+ "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
+ attrs->attr.max_wr, qattr->max_srq_wr);
+ return -EINVAL;
+ }
+
+ if (attrs->attr.max_sge > qattr->max_sge) {
+ DP_ERR(dev,
+ "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
+ attrs->attr.max_sge, qattr->max_sge);
+ }
+
+ if (!udata && attrs->srq_type == IB_SRQT_XRC) {
+ DP_ERR(dev, "XRC SRQs are not supported in kernel-space\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void qedr_free_srq_user_params(struct qedr_srq *srq)
+{
+ qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
+ ib_umem_release(srq->usrq.umem);
+ ib_umem_release(srq->prod_umem);
+}
+
+static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
+{
+ struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
+ struct qedr_dev *dev = srq->dev;
+
+ dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
+
+ dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
+ hw_srq->virt_prod_pair_addr,
+ hw_srq->phy_prod_pair_addr);
+}
+
+static int qedr_init_srq_user_params(struct ib_udata *udata,
+ struct qedr_srq *srq,
+ struct qedr_create_srq_ureq *ureq,
+ int access)
+{
+ struct scatterlist *sg;
+ int rc;
+
+ rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
+ ureq->srq_len, false, access, 1);
+ if (rc)
+ return rc;
+
+ srq->prod_umem = ib_umem_get(srq->ibsrq.device, ureq->prod_pair_addr,
+ sizeof(struct rdma_srq_producers), access);
+ if (IS_ERR(srq->prod_umem)) {
+ qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
+ ib_umem_release(srq->usrq.umem);
+ DP_ERR(srq->dev,
+ "create srq: failed ib_umem_get for producer, got %ld\n",
+ PTR_ERR(srq->prod_umem));
+ return PTR_ERR(srq->prod_umem);
+ }
+
+ sg = srq->prod_umem->sgt_append.sgt.sgl;
+ srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
+
+ return 0;
+}
+
+static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
+ struct qedr_dev *dev,
+ struct ib_srq_init_attr *init_attr)
+{
+ struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U32,
+ .elem_size = QEDR_SRQ_WQE_ELEM_SIZE,
+ };
+ dma_addr_t phy_prod_pair_addr;
+ u32 num_elems;
+ void *va;
+ int rc;
+
+ va = dma_alloc_coherent(&dev->pdev->dev,
+ sizeof(struct rdma_srq_producers),
+ &phy_prod_pair_addr, GFP_KERNEL);
+ if (!va) {
+ DP_ERR(dev,
+ "create srq: failed to allocate dma memory for producer\n");
+ return -ENOMEM;
+ }
+
+ hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
+ hw_srq->virt_prod_pair_addr = va;
+
+ num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
+ params.num_elems = num_elems;
+
+ rc = dev->ops->common->chain_alloc(dev->cdev, &hw_srq->pbl, &params);
+ if (rc)
+ goto err0;
+
+ hw_srq->num_elems = num_elems;
+
+ return 0;
+
+err0:
+ dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
+ va, phy_prod_pair_addr);
+ return rc;
+}
+
+int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct qed_rdma_destroy_srq_in_params destroy_in_params;
+ struct qed_rdma_create_srq_in_params in_params = {};
+ struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
+ struct qed_rdma_create_srq_out_params out_params;
+ struct qedr_pd *pd = get_qedr_pd(ibsrq->pd);
+ struct qedr_create_srq_ureq ureq = {};
+ u64 pbl_base_addr, phy_prod_pair_addr;
+ struct qedr_srq_hwq_info *hw_srq;
+ u32 page_cnt, page_size;
+ struct qedr_srq *srq = get_qedr_srq(ibsrq);
+ int rc = 0;
+
+ DP_DEBUG(dev, QEDR_MSG_QP,
+ "create SRQ called from %s (pd %p)\n",
+ (udata) ? "User lib" : "kernel", pd);
+
+ if (init_attr->srq_type != IB_SRQT_BASIC &&
+ init_attr->srq_type != IB_SRQT_XRC)
+ return -EOPNOTSUPP;
+
+ rc = qedr_check_srq_params(dev, init_attr, udata);
+ if (rc)
+ return -EINVAL;
+
+ srq->dev = dev;
+ srq->is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
+ hw_srq = &srq->hw_srq;
+ spin_lock_init(&srq->lock);
+
+ hw_srq->max_wr = init_attr->attr.max_wr;
+ hw_srq->max_sges = init_attr->attr.max_sge;
+
+ if (udata) {
+ if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
+ udata->inlen))) {
+ DP_ERR(dev,
+ "create srq: problem copying data from user space\n");
+ goto err0;
+ }
+
+ rc = qedr_init_srq_user_params(udata, srq, &ureq, 0);
+ if (rc)
+ goto err0;
+
+ page_cnt = srq->usrq.pbl_info.num_pbes;
+ pbl_base_addr = srq->usrq.pbl_tbl->pa;
+ phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
+ page_size = PAGE_SIZE;
+ } else {
+ struct qed_chain *pbl;
+
+ rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
+ if (rc)
+ goto err0;
+
+ pbl = &hw_srq->pbl;
+ page_cnt = qed_chain_get_page_cnt(pbl);
+ pbl_base_addr = qed_chain_get_pbl_phys(pbl);
+ phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
+ page_size = QED_CHAIN_PAGE_SIZE;
+ }
+
+ in_params.pd_id = pd->pd_id;
+ in_params.pbl_base_addr = pbl_base_addr;
+ in_params.prod_pair_addr = phy_prod_pair_addr;
+ in_params.num_pages = page_cnt;
+ in_params.page_size = page_size;
+ if (srq->is_xrc) {
+ struct qedr_xrcd *xrcd = get_qedr_xrcd(init_attr->ext.xrc.xrcd);
+ struct qedr_cq *cq = get_qedr_cq(init_attr->ext.cq);
+
+ in_params.is_xrc = 1;
+ in_params.xrcd_id = xrcd->xrcd_id;
+ in_params.cq_cid = cq->icid;
+ }
+
+ rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
+ if (rc)
+ goto err1;
+
+ srq->srq_id = out_params.srq_id;
+
+ if (udata) {
+ rc = qedr_copy_srq_uresp(dev, srq, udata);
+ if (rc)
+ goto err2;
+ }
+
+ rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL);
+ if (rc)
+ goto err2;
+
+ DP_DEBUG(dev, QEDR_MSG_SRQ,
+ "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
+ return 0;
+
+err2:
+ destroy_in_params.srq_id = srq->srq_id;
+
+ dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
+err1:
+ if (udata)
+ qedr_free_srq_user_params(srq);
+ else
+ qedr_free_srq_kernel_params(srq);
+err0:
+ return -EFAULT;
+}
+
+int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
+{
+ struct qed_rdma_destroy_srq_in_params in_params = {};
+ struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
+ struct qedr_srq *srq = get_qedr_srq(ibsrq);
+
+ xa_erase_irq(&dev->srqs, srq->srq_id);
+ in_params.srq_id = srq->srq_id;
+ in_params.is_xrc = srq->is_xrc;
+ dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
+
+ if (ibsrq->uobject)
+ qedr_free_srq_user_params(srq);
+ else
+ qedr_free_srq_kernel_params(srq);
+
+ DP_DEBUG(dev, QEDR_MSG_SRQ,
+ "destroy srq: destroyed srq with srq_id=0x%0x\n",
+ srq->srq_id);
+ return 0;
+}
+
+int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
+{
+ struct qed_rdma_modify_srq_in_params in_params = {};
+ struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
+ struct qedr_srq *srq = get_qedr_srq(ibsrq);
+ int rc;
+
+ if (attr_mask & IB_SRQ_MAX_WR) {
+ DP_ERR(dev,
+ "modify srq: invalid attribute mask=0x%x specified for %p\n",
+ attr_mask, srq);
+ return -EINVAL;
+ }
+
+ if (attr_mask & IB_SRQ_LIMIT) {
+ if (attr->srq_limit >= srq->hw_srq.max_wr) {
+ DP_ERR(dev,
+ "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
+ attr->srq_limit, srq->hw_srq.max_wr);
+ return -EINVAL;
+ }
+
+ in_params.srq_id = srq->srq_id;
+ in_params.wqe_limit = attr->srq_limit;
+ rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
+ if (rc)
+ return rc;
+ }
+
+ srq->srq_limit = attr->srq_limit;
+
+ DP_DEBUG(dev, QEDR_MSG_SRQ,
+ "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
+
+ return 0;
+}
+
+static enum qed_rdma_qp_type qedr_ib_to_qed_qp_type(enum ib_qp_type ib_qp_type)
+{
+ switch (ib_qp_type) {
+ case IB_QPT_RC:
+ return QED_RDMA_QP_TYPE_RC;
+ case IB_QPT_XRC_INI:
+ return QED_RDMA_QP_TYPE_XRC_INI;
+ case IB_QPT_XRC_TGT:
+ return QED_RDMA_QP_TYPE_XRC_TGT;
+ default:
+ return QED_RDMA_QP_TYPE_INVAL;
+ }
}
static inline void
@@ -1217,13 +1739,28 @@ qedr_init_common_qp_in_params(struct qedr_dev *dev,
params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
- params->pd = pd->pd_id;
- params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
- params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
+ params->qp_type = qedr_ib_to_qed_qp_type(attrs->qp_type);
params->stats_queue = 0;
- params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
- params->srq_id = 0;
- params->use_srq = false;
+
+ if (pd) {
+ params->pd = pd->pd_id;
+ params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
+ }
+
+ if (qedr_qp_has_sq(qp))
+ params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
+
+ if (qedr_qp_has_rq(qp))
+ params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
+
+ if (qedr_qp_has_srq(qp)) {
+ params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
+ params->srq_id = qp->srq->srq_id;
+ params->use_srq = true;
+ } else {
+ params->srq_id = 0;
+ params->use_srq = false;
+ }
}
static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
@@ -1236,19 +1773,68 @@ static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
"rq_len=%zd"
"\n",
qp,
- qp->usq.buf_addr,
- qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
+ qedr_qp_has_sq(qp) ? qp->usq.buf_addr : 0x0,
+ qedr_qp_has_sq(qp) ? qp->usq.buf_len : 0,
+ qedr_qp_has_rq(qp) ? qp->urq.buf_addr : 0x0,
+ qedr_qp_has_sq(qp) ? qp->urq.buf_len : 0);
+}
+
+static inline void
+qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
+ struct qedr_qp *qp,
+ struct qed_rdma_create_qp_out_params *out_params)
+{
+ qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
+ qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
+
+ qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
+ &qp->usq.pbl_info, FW_PAGE_SHIFT);
+ if (!qp->srq) {
+ qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
+ qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
+ }
+
+ qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
+ &qp->urq.pbl_info, FW_PAGE_SHIFT);
}
-static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
+static void qedr_cleanup_user(struct qedr_dev *dev,
+ struct qedr_ucontext *ctx,
+ struct qedr_qp *qp)
{
- if (qp->usq.umem)
+ if (qedr_qp_has_sq(qp)) {
ib_umem_release(qp->usq.umem);
- qp->usq.umem = NULL;
+ qp->usq.umem = NULL;
+ }
- if (qp->urq.umem)
+ if (qedr_qp_has_rq(qp)) {
ib_umem_release(qp->urq.umem);
- qp->urq.umem = NULL;
+ qp->urq.umem = NULL;
+ }
+
+ if (rdma_protocol_roce(&dev->ibdev, 1)) {
+ qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
+ qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
+ } else {
+ kfree(qp->usq.pbl_tbl);
+ kfree(qp->urq.pbl_tbl);
+ }
+
+ if (qp->usq.db_rec_data) {
+ qedr_db_recovery_del(dev, qp->usq.db_addr,
+ &qp->usq.db_rec_data->db_data);
+ rdma_user_mmap_entry_remove(qp->usq.db_mmap_entry);
+ }
+
+ if (qp->urq.db_rec_data) {
+ qedr_db_recovery_del(dev, qp->urq.db_addr,
+ &qp->urq.db_rec_data->db_data);
+ rdma_user_mmap_entry_remove(qp->urq.db_mmap_entry);
+ }
+
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ qedr_db_recovery_del(dev, qp->urq.db_rec_db2_addr,
+ &qp->urq.db_rec_db2_data);
}
static int qedr_create_user_qp(struct qedr_dev *dev,
@@ -1259,43 +1845,79 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
{
struct qed_rdma_create_qp_in_params in_params;
struct qed_rdma_create_qp_out_params out_params;
- struct qedr_pd *pd = get_qedr_pd(ibpd);
- struct ib_ucontext *ib_ctx = NULL;
+ struct qedr_create_qp_uresp uresp = {};
+ struct qedr_create_qp_ureq ureq = {};
+ int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
struct qedr_ucontext *ctx = NULL;
- struct qedr_create_qp_ureq ureq;
- int rc = -EINVAL;
+ struct qedr_pd *pd = NULL;
+ int rc = 0;
- ib_ctx = ibpd->uobject->context;
- ctx = get_qedr_ucontext(ib_ctx);
+ qp->create_type = QEDR_QP_CREATE_USER;
- memset(&ureq, 0, sizeof(ureq));
- rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
- if (rc) {
- DP_ERR(dev, "Problem copying data from user space\n");
- return rc;
+ if (ibpd) {
+ pd = get_qedr_pd(ibpd);
+ ctx = pd->uctx;
}
- /* SQ - read access only (0), dma sync not required (0) */
- rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
- ureq.sq_len, 0, 0);
- if (rc)
- return rc;
+ if (udata) {
+ rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq),
+ udata->inlen));
+ if (rc) {
+ DP_ERR(dev, "Problem copying data from user space\n");
+ return rc;
+ }
+ }
- /* RQ - read access only (0), dma sync not required (0) */
- rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
- ureq.rq_len, 0, 0);
+ if (qedr_qp_has_sq(qp)) {
+ /* SQ - read access only (0) */
+ rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
+ ureq.sq_len, true, 0, alloc_and_init);
+ if (rc)
+ return rc;
+ }
- if (rc)
- return rc;
+ if (qedr_qp_has_rq(qp)) {
+ /* RQ - read access only (0) */
+ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
+ ureq.rq_len, true, 0, alloc_and_init);
+ if (rc) {
+ ib_umem_release(qp->usq.umem);
+ qp->usq.umem = NULL;
+ if (rdma_protocol_roce(&dev->ibdev, 1)) {
+ qedr_free_pbl(dev, &qp->usq.pbl_info,
+ qp->usq.pbl_tbl);
+ } else {
+ kfree(qp->usq.pbl_tbl);
+ }
+ return rc;
+ }
+ }
memset(&in_params, 0, sizeof(in_params));
qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
in_params.qp_handle_lo = ureq.qp_handle_lo;
in_params.qp_handle_hi = ureq.qp_handle_hi;
- in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
- in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
- in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
- in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
+
+ if (qp->qp_type == IB_QPT_XRC_TGT) {
+ struct qedr_xrcd *xrcd = get_qedr_xrcd(attrs->xrcd);
+
+ in_params.xrcd_id = xrcd->xrcd_id;
+ in_params.qp_handle_lo = qp->qp_id;
+ in_params.use_srq = 1;
+ }
+
+ if (qedr_qp_has_sq(qp)) {
+ in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
+ in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
+ }
+
+ if (qedr_qp_has_rq(qp)) {
+ in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
+ in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
+ }
+
+ if (ctx)
+ SET_FIELD(in_params.flags, QED_ROCE_EDPM_MODE, ctx->edpm_mode);
qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
&in_params, &out_params);
@@ -1305,23 +1927,104 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
goto err1;
}
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ qedr_iwarp_populate_user_qp(dev, qp, &out_params);
+
qp->qp_id = out_params.qp_id;
qp->icid = out_params.icid;
- rc = qedr_copy_qp_uresp(dev, qp, udata);
- if (rc)
- goto err;
+ if (udata) {
+ rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp);
+ if (rc)
+ goto err;
+ }
- qedr_qp_user_print(dev, qp);
+ /* db offset was calculated in copy_qp_uresp, now set in the user q */
+ if (qedr_qp_has_sq(qp)) {
+ qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset;
+ qp->sq.max_wr = attrs->cap.max_send_wr;
+ rc = qedr_db_recovery_add(dev, qp->usq.db_addr,
+ &qp->usq.db_rec_data->db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_USER);
+ if (rc)
+ goto err;
+ }
- return 0;
+ if (qedr_qp_has_rq(qp)) {
+ qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset;
+ qp->rq.max_wr = attrs->cap.max_recv_wr;
+ rc = qedr_db_recovery_add(dev, qp->urq.db_addr,
+ &qp->urq.db_rec_data->db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_USER);
+ if (rc)
+ goto err;
+ }
+
+ if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset;
+
+ /* calculate the db_rec_db2 data since it is constant so no
+ * need to reflect from user
+ */
+ qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid);
+ qp->urq.db_rec_db2_data.data.value =
+ cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD);
+
+ rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr,
+ &qp->urq.db_rec_db2_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_USER);
+ if (rc)
+ goto err;
+ }
+ qedr_qp_user_print(dev, qp);
+ return rc;
err:
rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
if (rc)
DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
err1:
- qedr_cleanup_user(dev, qp);
+ qedr_cleanup_user(dev, ctx, qp);
+ return rc;
+}
+
+static int qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+ int rc;
+
+ qp->sq.db = dev->db_addr +
+ DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
+ qp->sq.db_data.data.icid = qp->icid;
+
+ rc = qedr_db_recovery_add(dev, qp->sq.db,
+ &qp->sq.db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_KERNEL);
+ if (rc)
+ return rc;
+
+ qp->rq.db = dev->db_addr +
+ DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
+ qp->rq.db_data.data.icid = qp->icid;
+ qp->rq.iwarp_db2 = dev->db_addr +
+ DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
+ qp->rq.iwarp_db2_data.data.icid = qp->icid;
+ qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
+
+ rc = qedr_db_recovery_add(dev, qp->rq.db,
+ &qp->rq.db_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_KERNEL);
+ if (rc)
+ return rc;
+
+ rc = qedr_db_recovery_add(dev, qp->rq.iwarp_db2,
+ &qp->rq.iwarp_db2_data,
+ DB_REC_WIDTH_32B,
+ DB_REC_KERNEL);
return rc;
}
@@ -1332,29 +2035,28 @@ qedr_roce_create_kernel_qp(struct qedr_dev *dev,
u32 n_sq_elems, u32 n_rq_elems)
{
struct qed_rdma_create_qp_out_params out_params;
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U32,
+ };
int rc;
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U32,
- n_sq_elems,
- QEDR_SQE_ELEMENT_SIZE,
- &qp->sq.pbl);
+ params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
+ params.num_elems = n_sq_elems;
+ params.elem_size = QEDR_SQE_ELEMENT_SIZE;
+ rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
if (rc)
return rc;
in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
- rc = dev->ops->common->chain_alloc(dev->cdev,
- QED_CHAIN_USE_TO_CONSUME_PRODUCE,
- QED_CHAIN_MODE_PBL,
- QED_CHAIN_CNT_TYPE_U32,
- n_rq_elems,
- QEDR_RQE_ELEMENT_SIZE,
- &qp->rq.pbl);
+ params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
+ params.num_elems = n_rq_elems;
+ params.elem_size = QEDR_RQE_ELEMENT_SIZE;
+
+ rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
if (rc)
return rc;
@@ -1370,9 +2072,68 @@ qedr_roce_create_kernel_qp(struct qedr_dev *dev,
qp->qp_id = out_params.qp_id;
qp->icid = out_params.icid;
- qedr_set_roce_db_info(dev, qp);
+ return qedr_set_roce_db_info(dev, qp);
+}
- return 0;
+static int
+qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
+ struct qedr_qp *qp,
+ struct qed_rdma_create_qp_in_params *in_params,
+ u32 n_sq_elems, u32 n_rq_elems)
+{
+ struct qed_rdma_create_qp_out_params out_params;
+ struct qed_chain_init_params params = {
+ .mode = QED_CHAIN_MODE_PBL,
+ .cnt_type = QED_CHAIN_CNT_TYPE_U32,
+ };
+ int rc;
+
+ in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
+ QEDR_SQE_ELEMENT_SIZE,
+ QED_CHAIN_PAGE_SIZE,
+ QED_CHAIN_MODE_PBL);
+ in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
+ QEDR_RQE_ELEMENT_SIZE,
+ QED_CHAIN_PAGE_SIZE,
+ QED_CHAIN_MODE_PBL);
+
+ qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
+ in_params, &out_params);
+
+ if (!qp->qed_qp)
+ return -EINVAL;
+
+ /* Now we allocate the chain */
+
+ params.intended_use = QED_CHAIN_USE_TO_PRODUCE;
+ params.num_elems = n_sq_elems;
+ params.elem_size = QEDR_SQE_ELEMENT_SIZE;
+ params.ext_pbl_virt = out_params.sq_pbl_virt;
+ params.ext_pbl_phys = out_params.sq_pbl_phys;
+
+ rc = dev->ops->common->chain_alloc(dev->cdev, &qp->sq.pbl, &params);
+ if (rc)
+ goto err;
+
+ params.intended_use = QED_CHAIN_USE_TO_CONSUME_PRODUCE;
+ params.num_elems = n_rq_elems;
+ params.elem_size = QEDR_RQE_ELEMENT_SIZE;
+ params.ext_pbl_virt = out_params.rq_pbl_virt;
+ params.ext_pbl_phys = out_params.rq_pbl_phys;
+
+ rc = dev->ops->common->chain_alloc(dev->cdev, &qp->rq.pbl, &params);
+ if (rc)
+ goto err;
+
+ qp->qp_id = out_params.qp_id;
+ qp->icid = out_params.icid;
+
+ return qedr_set_iwarp_db_info(dev, qp);
+
+err:
+ dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
+
+ return rc;
}
static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
@@ -1382,6 +2143,20 @@ static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
kfree(qp->rqe_wr_id);
+
+ /* GSI qp is not registered to db mechanism so no need to delete */
+ if (qp->qp_type == IB_QPT_GSI)
+ return;
+
+ qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data);
+
+ if (!qp->srq) {
+ qedr_db_recovery_del(dev, qp->rq.db, &qp->rq.db_data);
+
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ qedr_db_recovery_del(dev, qp->rq.iwarp_db2,
+ &qp->rq.iwarp_db2_data);
+ }
}
static int qedr_create_kernel_qp(struct qedr_dev *dev,
@@ -1397,6 +2172,7 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev,
u32 n_sq_entries;
memset(&in_params, 0, sizeof(in_params));
+ qp->create_type = QEDR_QP_CREATE_KERNEL;
/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
* the ring. The ring should allow at least a single WR, even if the
@@ -1411,7 +2187,7 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev,
qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
dev->attr.max_sqe);
- qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
+ qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
GFP_KERNEL);
if (!qp->wqe_wr_id) {
DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
@@ -1429,7 +2205,7 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev,
qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
/* Allocate driver internal RQ array */
- qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
+ qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
GFP_KERNEL);
if (!qp->rqe_wr_id) {
DP_ERR(dev,
@@ -1447,33 +2223,64 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev,
n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
- rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
- n_sq_elems, n_rq_elems);
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
+ n_sq_elems, n_rq_elems);
+ else
+ rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
+ n_sq_elems, n_rq_elems);
if (rc)
qedr_cleanup_kernel(dev, qp);
return rc;
}
-struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
- struct ib_qp_init_attr *attrs,
- struct ib_udata *udata)
+static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
+ struct ib_udata *udata)
{
- struct qedr_dev *dev = get_qedr_dev(ibpd->device);
+ struct qedr_ucontext *ctx =
+ rdma_udata_to_drv_context(udata, struct qedr_ucontext,
+ ibucontext);
+ int rc;
+
+ if (qp->qp_type != IB_QPT_GSI) {
+ rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
+ if (rc)
+ return rc;
+ }
+
+ if (qp->create_type == QEDR_QP_CREATE_USER)
+ qedr_cleanup_user(dev, ctx, qp);
+ else
+ qedr_cleanup_kernel(dev, qp);
+
+ return 0;
+}
+
+int qedr_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
+{
+ struct qedr_xrcd *xrcd = NULL;
+ struct ib_pd *ibpd = ibqp->pd;
struct qedr_pd *pd = get_qedr_pd(ibpd);
- struct qedr_qp *qp;
- struct ib_qp *ibqp;
+ struct qedr_dev *dev = get_qedr_dev(ibqp->device);
+ struct qedr_qp *qp = get_qedr_qp(ibqp);
int rc = 0;
+ if (attrs->create_flags)
+ return -EOPNOTSUPP;
+
+ if (attrs->qp_type == IB_QPT_XRC_TGT)
+ xrcd = get_qedr_xrcd(attrs->xrcd);
+ else
+ pd = get_qedr_pd(ibpd);
+
DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
udata ? "user library" : "kernel", pd);
- rc = qedr_check_qp_attrs(ibpd, dev, attrs);
+ rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
if (rc)
- return ERR_PTR(rc);
-
- if (attrs->srq)
- return ERR_PTR(-EINVAL);
+ return rc;
DP_DEBUG(dev, QEDR_MSG_QP,
"create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
@@ -1481,42 +2288,37 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
get_qedr_cq(attrs->send_cq),
get_qedr_cq(attrs->send_cq)->icid,
get_qedr_cq(attrs->recv_cq),
- get_qedr_cq(attrs->recv_cq)->icid);
-
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp) {
- DP_ERR(dev, "create qp: failed allocating memory\n");
- return ERR_PTR(-ENOMEM);
- }
+ attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
qedr_set_common_qp_params(dev, qp, pd, attrs);
- if (attrs->qp_type == IB_QPT_GSI) {
- ibqp = qedr_create_gsi_qp(dev, attrs, qp);
- if (IS_ERR(ibqp))
- kfree(qp);
- return ibqp;
- }
+ if (attrs->qp_type == IB_QPT_GSI)
+ return qedr_create_gsi_qp(dev, attrs, qp);
- if (udata)
+ if (udata || xrcd)
rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
else
rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
if (rc)
- goto err;
+ return rc;
qp->ibqp.qp_num = qp->qp_id;
- return &qp->ibqp;
+ if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
+ if (rc)
+ goto out_free_qp_resources;
+ }
-err:
- kfree(qp);
+ return 0;
- return ERR_PTR(-EFAULT);
+out_free_qp_resources:
+ qedr_free_qp_resources(dev, qp, udata);
+ return -EFAULT;
}
-enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
+static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
{
switch (qp_state) {
case QED_ROCE_QP_STATE_RESET:
@@ -1537,7 +2339,8 @@ enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
return IB_QPS_ERR;
}
-enum qed_roce_qp_state qedr_get_state_from_ibqp(enum ib_qp_state qp_state)
+static enum qed_roce_qp_state qedr_get_state_from_ibqp(
+ enum ib_qp_state qp_state)
{
switch (qp_state) {
case IB_QPS_RESET:
@@ -1557,36 +2360,25 @@ enum qed_roce_qp_state qedr_get_state_from_ibqp(enum ib_qp_state qp_state)
}
}
-static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
-{
- qed_chain_reset(&qph->pbl);
- qph->prod = 0;
- qph->cons = 0;
- qph->wqe_cons = 0;
- qph->db_data.data.value = cpu_to_le16(0);
-}
-
static int qedr_update_qp_state(struct qedr_dev *dev,
struct qedr_qp *qp,
+ enum qed_roce_qp_state cur_state,
enum qed_roce_qp_state new_state)
{
int status = 0;
- if (new_state == qp->state)
- return 1;
+ if (new_state == cur_state)
+ return 0;
- switch (qp->state) {
+ switch (cur_state) {
case QED_ROCE_QP_STATE_RESET:
switch (new_state) {
case QED_ROCE_QP_STATE_INIT:
- qp->prev_wqe_size = 0;
- qedr_reset_qp_hwq_info(&qp->sq);
- qedr_reset_qp_hwq_info(&qp->rq);
break;
default:
status = -EINVAL;
break;
- };
+ }
break;
case QED_ROCE_QP_STATE_INIT:
switch (new_state) {
@@ -1594,10 +2386,10 @@ static int qedr_update_qp_state(struct qedr_dev *dev,
/* Update doorbell (in case post_recv was
* done before move to RTR)
*/
- wmb();
- writel(qp->rq.db_data.raw, qp->rq.db);
- /* Make sure write takes effect */
- mmiowb();
+
+ if (rdma_protocol_roce(&dev->ibdev, 1)) {
+ writel(qp->rq.db_data.raw, qp->rq.db);
+ }
break;
case QED_ROCE_QP_STATE_ERR:
break;
@@ -1605,7 +2397,7 @@ static int qedr_update_qp_state(struct qedr_dev *dev,
/* Invalid state change. */
status = -EINVAL;
break;
- };
+ }
break;
case QED_ROCE_QP_STATE_RTR:
/* RTR->XXX */
@@ -1618,7 +2410,7 @@ static int qedr_update_qp_state(struct qedr_dev *dev,
/* Invalid state change. */
status = -EINVAL;
break;
- };
+ }
break;
case QED_ROCE_QP_STATE_RTS:
/* RTS->XXX */
@@ -1631,7 +2423,7 @@ static int qedr_update_qp_state(struct qedr_dev *dev,
/* Invalid state change. */
status = -EINVAL;
break;
- };
+ }
break;
case QED_ROCE_QP_STATE_SQD:
/* SQD->XXX */
@@ -1643,22 +2435,30 @@ static int qedr_update_qp_state(struct qedr_dev *dev,
/* Invalid state change. */
status = -EINVAL;
break;
- };
+ }
break;
case QED_ROCE_QP_STATE_ERR:
/* ERR->XXX */
switch (new_state) {
case QED_ROCE_QP_STATE_RESET:
+ if ((qp->rq.prod != qp->rq.cons) ||
+ (qp->sq.prod != qp->sq.cons)) {
+ DP_NOTICE(dev,
+ "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
+ qp->rq.prod, qp->rq.cons, qp->sq.prod,
+ qp->sq.cons);
+ status = -EINVAL;
+ }
break;
default:
status = -EINVAL;
break;
- };
+ }
break;
default:
status = -EINVAL;
break;
- };
+ }
return status;
}
@@ -1669,29 +2469,35 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
struct qedr_qp *qp = get_qedr_qp(ibqp);
struct qed_rdma_modify_qp_in_params qp_params = { 0 };
struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
enum ib_qp_state old_qp_state, new_qp_state;
+ enum qed_roce_qp_state cur_state;
int rc = 0;
DP_DEBUG(dev, QEDR_MSG_QP,
"modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
attr->qp_state);
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
+ return -EOPNOTSUPP;
+
old_qp_state = qedr_get_ibqp_state(qp->state);
if (attr_mask & IB_QP_STATE)
new_qp_state = attr->qp_state;
else
new_qp_state = old_qp_state;
- if (!ib_modify_qp_is_ok
- (old_qp_state, new_qp_state, ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_ETHERNET)) {
- DP_ERR(dev,
- "modify qp: invalid attribute mask=0x%x specified for\n"
- "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
- attr_mask, qp->qp_id, ibqp->qp_type, old_qp_state,
- new_qp_state);
- rc = -EINVAL;
- goto err;
+ if (rdma_protocol_roce(&dev->ibdev, 1)) {
+ if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
+ ibqp->qp_type, attr_mask)) {
+ DP_ERR(dev,
+ "modify qp: invalid attribute mask=0x%x specified for\n"
+ "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
+ attr_mask, qp->qp_id, ibqp->qp_type,
+ old_qp_state, new_qp_state);
+ rc = -EINVAL;
+ goto err;
+ }
}
/* Translate the masks... */
@@ -1730,6 +2536,9 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ return -EINVAL;
+
if (attr_mask & IB_QP_PATH_MTU) {
if (attr->path_mtu < IB_MTU_256 ||
attr->path_mtu > IB_MTU_4096) {
@@ -1751,17 +2560,17 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
SET_FIELD(qp_params.modify_flags,
QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
- qp_params.traffic_class_tos = attr->ah_attr.grh.traffic_class;
- qp_params.flow_label = attr->ah_attr.grh.flow_label;
- qp_params.hop_limit_ttl = attr->ah_attr.grh.hop_limit;
+ qp_params.traffic_class_tos = grh->traffic_class;
+ qp_params.flow_label = grh->flow_label;
+ qp_params.hop_limit_ttl = grh->hop_limit;
- qp->sgid_idx = attr->ah_attr.grh.sgid_index;
+ qp->sgid_idx = grh->sgid_index;
rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
if (rc) {
DP_ERR(dev,
"modify qp: problems with GID index %d (rc=%d)\n",
- attr->ah_attr.grh.sgid_index, rc);
+ grh->sgid_index, rc);
return rc;
}
@@ -1781,7 +2590,6 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
qp_params.remote_mac_addr);
-;
qp_params.mtu = qp->mtu;
qp_params.lb_indication = false;
@@ -1800,18 +2608,25 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
SET_FIELD(qp_params.modify_flags,
QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
- qp_params.ack_timeout = attr->timeout;
- if (attr->timeout) {
- u32 temp;
-
- temp = 4096 * (1UL << attr->timeout) / 1000 / 1000;
- /* FW requires [msec] */
- qp_params.ack_timeout = temp;
- } else {
- /* Infinite */
+ /* The received timeout value is an exponent used like this:
+ * "12.7.34 LOCAL ACK TIMEOUT
+ * Value representing the transport (ACK) timeout for use by
+ * the remote, expressed as: 4.096 * 2^timeout [usec]"
+ * The FW expects timeout in msec so we need to divide the usec
+ * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
+ * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
+ * The value of zero means infinite so we use a 'max_t' to make
+ * sure that sub 1 msec values will be configured as 1 msec.
+ */
+ if (attr->timeout)
+ qp_params.ack_timeout =
+ 1 << max_t(int, attr->timeout - 8, 0);
+ else
qp_params.ack_timeout = 0;
- }
+
+ qp->timeout = attr->timeout;
}
+
if (attr_mask & IB_QP_RETRY_CNT) {
SET_FIELD(qp_params.modify_flags,
QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
@@ -1884,13 +2699,25 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp->dest_qp_num = attr->dest_qp_num;
}
+ cur_state = qp->state;
+
+ /* Update the QP state before the actual ramrod to prevent a race with
+ * fast path. Modifying the QP state to error will cause the device to
+ * flush the CQEs and while polling the flushed CQEs will considered as
+ * a potential issue if the QP isn't in error state.
+ */
+ if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
+ !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
+ qp->state = QED_ROCE_QP_STATE_ERR;
+
if (qp->qp_type != IB_QPT_GSI)
rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
qp->qed_qp, &qp_params);
if (attr_mask & IB_QP_STATE) {
if ((qp->qp_type != IB_QPT_GSI) && (!udata))
- rc = qedr_update_qp_state(dev, qp, qp_params.new_state);
+ rc = qedr_update_qp_state(dev, qp, cur_state,
+ qp_params.new_state);
qp->state = qp_params.new_state;
}
@@ -1922,17 +2749,20 @@ int qedr_query_qp(struct ib_qp *ibqp,
int rc = 0;
memset(&params, 0, sizeof(params));
-
- rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
- if (rc)
- goto err;
-
memset(qp_attr, 0, sizeof(*qp_attr));
memset(qp_init_attr, 0, sizeof(*qp_init_attr));
- qp_attr->qp_state = qedr_get_ibqp_state(params.state);
+ if (qp->qp_type != IB_QPT_GSI) {
+ rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
+ if (rc)
+ goto err;
+ qp_attr->qp_state = qedr_get_ibqp_state(params.state);
+ } else {
+ qp_attr->qp_state = qedr_get_ibqp_state(QED_ROCE_QP_STATE_RTS);
+ }
+
qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
- qp_attr->path_mtu = iboe_get_mtu(params.mtu);
+ qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
qp_attr->path_mig_state = IB_MIG_MIGRATED;
qp_attr->rq_psn = params.rq_psn;
qp_attr->sq_psn = params.sq_psn;
@@ -1944,28 +2774,24 @@ int qedr_query_qp(struct ib_qp *ibqp,
qp_attr->cap.max_recv_wr = qp->rq.max_wr;
qp_attr->cap.max_send_sge = qp->sq.max_sges;
qp_attr->cap.max_recv_sge = qp->rq.max_sges;
- qp_attr->cap.max_inline_data = qp->max_inline_data;
+ qp_attr->cap.max_inline_data = dev->attr.max_inline;
qp_init_attr->cap = qp_attr->cap;
- memcpy(&qp_attr->ah_attr.grh.dgid.raw[0], &params.dgid.bytes[0],
- sizeof(qp_attr->ah_attr.grh.dgid.raw));
-
- qp_attr->ah_attr.grh.flow_label = params.flow_label;
- qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
- qp_attr->ah_attr.grh.hop_limit = params.hop_limit_ttl;
- qp_attr->ah_attr.grh.traffic_class = params.traffic_class_tos;
-
- qp_attr->ah_attr.ah_flags = IB_AH_GRH;
- qp_attr->ah_attr.port_num = 1;
- qp_attr->ah_attr.sl = 0;
- qp_attr->timeout = params.timeout;
+ qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
+ rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
+ params.flow_label, qp->sgid_idx,
+ params.hop_limit_ttl, params.traffic_class_tos);
+ rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
+ rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
+ rdma_ah_set_sl(&qp_attr->ah_attr, 0);
+ qp_attr->timeout = qp->timeout;
qp_attr->rnr_retry = params.rnr_retry;
qp_attr->retry_cnt = params.retry_cnt;
qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
qp_attr->pkey_index = params.pkey_index;
qp_attr->port_num = 1;
- qp_attr->ah_attr.src_path_bits = 0;
- qp_attr->ah_attr.static_rate = 0;
+ rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
+ rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
qp_attr->alt_pkey_index = 0;
qp_attr->alt_port_num = 0;
qp_attr->alt_timeout = 0;
@@ -1983,75 +2809,86 @@ err:
return rc;
}
-int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
-{
- int rc = 0;
-
- if (qp->qp_type != IB_QPT_GSI) {
- rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
- if (rc)
- return rc;
- }
-
- if (qp->ibqp.uobject && qp->ibqp.uobject->context)
- qedr_cleanup_user(dev, qp);
- else
- qedr_cleanup_kernel(dev, qp);
-
- return 0;
-}
-
-int qedr_destroy_qp(struct ib_qp *ibqp)
+int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct qedr_qp *qp = get_qedr_qp(ibqp);
struct qedr_dev *dev = qp->dev;
struct ib_qp_attr attr;
int attr_mask = 0;
- int rc = 0;
DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
qp, qp->qp_type);
- if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
- (qp->state != QED_ROCE_QP_STATE_ERR) &&
- (qp->state != QED_ROCE_QP_STATE_INIT)) {
+ if (rdma_protocol_roce(&dev->ibdev, 1)) {
+ if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
+ (qp->state != QED_ROCE_QP_STATE_ERR) &&
+ (qp->state != QED_ROCE_QP_STATE_INIT)) {
- attr.qp_state = IB_QPS_ERR;
- attr_mask |= IB_QP_STATE;
+ attr.qp_state = IB_QPS_ERR;
+ attr_mask |= IB_QP_STATE;
- /* Change the QP state to ERROR */
- qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
+ /* Change the QP state to ERROR */
+ qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
+ }
+ } else {
+ /* If connection establishment started the WAIT_FOR_CONNECT
+ * bit will be on and we need to Wait for the establishment
+ * to complete before destroying the qp.
+ */
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT,
+ &qp->iwarp_cm_flags))
+ wait_for_completion(&qp->iwarp_cm_comp);
+
+ /* If graceful disconnect started, the WAIT_FOR_DISCONNECT
+ * bit will be on, and we need to wait for the disconnect to
+ * complete before continuing. We can use the same completion,
+ * iwarp_cm_comp, since this is the only place that waits for
+ * this completion and it is sequential. In addition,
+ * disconnect can't occur before the connection is fully
+ * established, therefore if WAIT_FOR_DISCONNECT is on it
+ * means WAIT_FOR_CONNECT is also on and the completion for
+ * CONNECT already occurred.
+ */
+ if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_DISCONNECT,
+ &qp->iwarp_cm_flags))
+ wait_for_completion(&qp->iwarp_cm_comp);
}
if (qp->qp_type == IB_QPT_GSI)
qedr_destroy_gsi_qp(dev);
- qedr_free_qp_resources(dev, qp);
+ /* We need to remove the entry from the xarray before we release the
+ * qp_id to avoid a race of the qp_id being reallocated and failing
+ * on xa_insert
+ */
+ if (rdma_protocol_iwarp(&dev->ibdev, 1))
+ xa_erase(&dev->qps, qp->qp_id);
- kfree(qp);
+ qedr_free_qp_resources(dev, qp, udata);
- return rc;
+ if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ qedr_iw_qp_rem_ref(&qp->ibqp);
+ wait_for_completion(&qp->qp_rel_comp);
+ }
+
+ return 0;
}
-struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
- struct ib_udata *udata)
+int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
{
- struct qedr_ah *ah;
-
- ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
- if (!ah)
- return ERR_PTR(-ENOMEM);
+ struct qedr_ah *ah = get_qedr_ah(ibah);
- ah->attr = *attr;
+ rdma_copy_ah_attr(&ah->attr, init_attr->ah_attr);
- return &ah->ibah;
+ return 0;
}
-int qedr_destroy_ah(struct ib_ah *ibah)
+int qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct qedr_ah *ah = get_qedr_ah(ibah);
- kfree(ah);
+ rdma_destroy_ah_attr(&ah->attr);
return 0;
}
@@ -2087,8 +2924,8 @@ static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
goto done;
info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
- if (!info->pbl_table) {
- rc = -ENOMEM;
+ if (IS_ERR(info->pbl_table)) {
+ rc = PTR_ERR(info->pbl_table);
goto done;
}
@@ -2099,7 +2936,7 @@ static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
* list and allocating another one
*/
tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
- if (!tmp) {
+ if (IS_ERR(tmp)) {
DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
goto done;
}
@@ -2116,13 +2953,17 @@ done:
}
struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
- u64 usr_addr, int acc, struct ib_udata *udata)
+ u64 usr_addr, int acc, struct ib_dmah *dmah,
+ struct ib_udata *udata)
{
struct qedr_dev *dev = get_qedr_dev(ibpd->device);
struct qedr_mr *mr;
struct qedr_pd *pd;
int rc = -ENOMEM;
+ if (dmah)
+ return ERR_PTR(-EOPNOTSUPP);
+
pd = get_qedr_pd(ibpd);
DP_DEBUG(dev, QEDR_MSG_MR,
"qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
@@ -2137,22 +2978,27 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
mr->type = QEDR_MR_USER;
- mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
+ mr->umem = ib_umem_get(ibpd->device, start, len, acc);
if (IS_ERR(mr->umem)) {
rc = -EFAULT;
goto err0;
}
- rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
+ rc = init_mr_info(dev, &mr->info,
+ ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE), 1);
if (rc)
goto err1;
qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
- &mr->info.pbl_info);
+ &mr->info.pbl_info, PAGE_SHIFT);
rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
if (rc) {
- DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
+ if (rc == -EINVAL)
+ DP_ERR(dev, "Out of MR resources\n");
+ else
+ DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
+
goto err1;
}
@@ -2169,11 +3015,9 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
- mr->hw_mr.page_size_log = ilog2(mr->umem->page_size);
- mr->hw_mr.fbo = ib_umem_offset(mr->umem);
+ mr->hw_mr.page_size_log = PAGE_SHIFT;
mr->hw_mr.length = len;
mr->hw_mr.vaddr = usr_addr;
- mr->hw_mr.zbva = false;
mr->hw_mr.phy_mr = false;
mr->hw_mr.dma_mr = false;
@@ -2201,7 +3045,7 @@ err0:
return ERR_PTR(rc);
}
-int qedr_dereg_mr(struct ib_mr *ib_mr)
+int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
{
struct qedr_mr *mr = get_qedr_mr(ib_mr);
struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
@@ -2213,19 +3057,19 @@ int qedr_dereg_mr(struct ib_mr *ib_mr)
dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
- if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
- qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
+ if (mr->type != QEDR_MR_DMA)
+ free_mr_info(dev, &mr->info);
/* it could be user registered memory. */
- if (mr->umem)
- ib_umem_release(mr->umem);
+ ib_umem_release(mr->umem);
kfree(mr);
return rc;
}
-struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, int max_page_list_len)
+static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
+ int max_page_list_len)
{
struct qedr_pd *pd = get_qedr_pd(ibpd);
struct qedr_dev *dev = get_qedr_dev(ibpd->device);
@@ -2249,8 +3093,12 @@ struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, int max_page_list_len)
rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
if (rc) {
- DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
- goto err0;
+ if (rc == -EINVAL)
+ DP_ERR(dev, "Out of MR resources\n");
+ else
+ DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
+
+ goto err1;
}
/* Index only, 18 bit long, lkey = itid << 8 | key */
@@ -2266,17 +3114,15 @@ struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, int max_page_list_len)
mr->hw_mr.pbl_ptr = 0;
mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
- mr->hw_mr.fbo = 0;
mr->hw_mr.length = 0;
mr->hw_mr.vaddr = 0;
- mr->hw_mr.zbva = false;
mr->hw_mr.phy_mr = true;
mr->hw_mr.dma_mr = false;
rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
if (rc) {
DP_ERR(dev, "roce register tid returned an error %d\n", rc);
- goto err1;
+ goto err2;
}
mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
@@ -2285,17 +3131,18 @@ struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, int max_page_list_len)
DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
return mr;
-err1:
+err2:
dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
+err1:
+ qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
err0:
kfree(mr);
return ERR_PTR(rc);
}
-struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
- enum ib_mr_type mr_type, u32 max_num_sg)
+struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
{
- struct qedr_dev *dev;
struct qedr_mr *mr;
if (mr_type != IB_MR_TYPE_MEM_REG)
@@ -2306,8 +3153,6 @@ struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
if (IS_ERR(mr))
return ERR_PTR(-EINVAL);
- dev = mr->dev;
-
return &mr->ibmr;
}
@@ -2319,7 +3164,7 @@ static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
u32 pbes_in_page;
if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
- DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
+ DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
return -ENOMEM;
}
@@ -2384,7 +3229,11 @@ struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
if (rc) {
- DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
+ if (rc == -EINVAL)
+ DP_ERR(dev, "Out of MR resources\n");
+ else
+ DP_ERR(dev, "roce alloc tid returned error %d\n", rc);
+
goto err1;
}
@@ -2444,9 +3293,9 @@ static void swap_wqe_data64(u64 *p)
static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
struct qedr_qp *qp, u8 *wqe_size,
- struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr, u8 *bits,
- u8 bit)
+ const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr,
+ u8 *bits, u8 bit)
{
u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
char *seg_prt, *wqe;
@@ -2529,7 +3378,7 @@ static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
} while (0)
static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
- struct ib_send_wr *wr)
+ const struct ib_send_wr *wr)
{
u32 data_size = 0;
int i;
@@ -2553,8 +3402,8 @@ static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
struct qedr_qp *qp,
struct rdma_sq_rdma_wqe_1st *rwqe,
struct rdma_sq_rdma_wqe_2nd *rwqe2,
- struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
+ const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
{
rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
@@ -2576,8 +3425,8 @@ static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
struct qedr_qp *qp,
struct rdma_sq_send_wqe_1st *swqe,
struct rdma_sq_send_wqe_2st *swqe2,
- struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
+ const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
{
memset(swqe2, 0, sizeof(*swqe2));
if (wr->send_flags & IB_SEND_INLINE) {
@@ -2593,7 +3442,7 @@ static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
static int qedr_prepare_reg(struct qedr_qp *qp,
struct rdma_sq_fmr_wqe_1st *fwqe1,
- struct ib_reg_wr *wr)
+ const struct ib_reg_wr *wr)
{
struct qedr_mr *mr = get_qedr_mr(wr->mr);
struct rdma_sq_fmr_wqe_2nd *fwqe2;
@@ -2603,6 +3452,8 @@ static int qedr_prepare_reg(struct qedr_qp *qp,
fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
fwqe1->l_key = wr->key;
+ fwqe2->access_ctrl = 0;
+
SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
!!(wr->access & IB_ACCESS_REMOTE_READ));
SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
@@ -2627,7 +3478,7 @@ static int qedr_prepare_reg(struct qedr_qp *qp,
return 0;
}
-enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
+static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
{
switch (opcode) {
case IB_WR_RDMA_WRITE:
@@ -2638,6 +3489,7 @@ enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
case IB_WR_SEND_WITH_INV:
return IB_WC_SEND;
case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_READ_WITH_INV:
return IB_WC_RDMA_READ;
case IB_WR_ATOMIC_CMP_AND_SWP:
return IB_WC_COMP_SWAP;
@@ -2652,7 +3504,8 @@ enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
}
}
-inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
+static inline bool qedr_can_post_send(struct qedr_qp *qp,
+ const struct ib_send_wr *wr)
{
int wq_is_full, err_wr, pbl_is_full;
struct qedr_dev *dev = qp->dev;
@@ -2689,8 +3542,8 @@ inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
return true;
}
-int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
+static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
{
struct qedr_dev *dev = get_qedr_dev(ibqp->device);
struct qedr_qp *qp = get_qedr_qp(ibqp);
@@ -2730,12 +3583,17 @@ int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
switch (wr->opcode) {
case IB_WR_SEND_WITH_IMM:
+ if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
+ rc = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
swqe = (struct rdma_sq_send_wqe_1st *)wqe;
swqe->wqe_size = 2;
swqe2 = qed_chain_produce(&qp->sq.pbl);
- swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.imm_data);
+ swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
wr, bad_wr);
swqe->length = cpu_to_le32(length);
@@ -2771,6 +3629,11 @@ int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
+ if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
+ rc = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
@@ -2798,11 +3661,8 @@ int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
break;
case IB_WR_RDMA_READ_WITH_INV:
- DP_ERR(dev,
- "RDMA READ WITH INVALIDATE not supported\n");
- *bad_wr = wr;
- rc = -EINVAL;
- break;
+ SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
+ fallthrough; /* same is identical to RDMA READ */
case IB_WR_RDMA_READ:
wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
@@ -2897,8 +3757,8 @@ int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
return rc;
}
-int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
+int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
{
struct qedr_dev *dev = get_qedr_dev(ibqp->device);
struct qedr_qp *qp = get_qedr_qp(ibqp);
@@ -2912,15 +3772,17 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_lock_irqsave(&qp->q_lock, flags);
- if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
- (qp->state != QED_ROCE_QP_STATE_ERR) &&
- (qp->state != QED_ROCE_QP_STATE_SQD)) {
- spin_unlock_irqrestore(&qp->q_lock, flags);
- *bad_wr = wr;
- DP_DEBUG(dev, QEDR_MSG_CQ,
- "QP in wrong state! QP icid=0x%x state %d\n",
- qp->icid, qp->state);
- return -EINVAL;
+ if (rdma_protocol_roce(&dev->ibdev, 1)) {
+ if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
+ (qp->state != QED_ROCE_QP_STATE_ERR) &&
+ (qp->state != QED_ROCE_QP_STATE_SQD)) {
+ spin_unlock_irqrestore(&qp->q_lock, flags);
+ *bad_wr = wr;
+ DP_DEBUG(dev, QEDR_MSG_CQ,
+ "QP in wrong state! QP icid=0x%x state %d\n",
+ qp->icid, qp->state);
+ return -EINVAL;
+ }
}
while (wr) {
@@ -2942,20 +3804,118 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
* vane. However this is not harmful (as long as the producer value is
* unchanged). For performance reasons we avoid checking for this
* redundant doorbell.
+ *
+ * qp->wqe_wr_id is accessed during qedr_poll_cq, as
+ * soon as we give the doorbell, we could get a completion
+ * for this wr, therefore we need to make sure that the
+ * memory is updated before giving the doorbell.
+ * During qedr_poll_cq, rmb is called before accessing the
+ * cqe. This covers for the smp_rmb as well.
*/
- wmb();
+ smp_wmb();
writel(qp->sq.db_data.raw, qp->sq.db);
- /* Make sure write sticks */
- mmiowb();
-
spin_unlock_irqrestore(&qp->q_lock, flags);
return rc;
}
-int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
+{
+ u32 used;
+
+ /* Calculate number of elements used based on producer
+ * count and consumer count and subtract it from max
+ * work request supported so that we get elements left.
+ */
+ used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt);
+
+ return hw_srq->max_wr - used;
+}
+
+int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct qedr_srq *srq = get_qedr_srq(ibsrq);
+ struct qedr_srq_hwq_info *hw_srq;
+ struct qedr_dev *dev = srq->dev;
+ struct qed_chain *pbl;
+ unsigned long flags;
+ int status = 0;
+ u32 num_sge;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ hw_srq = &srq->hw_srq;
+ pbl = &srq->hw_srq.pbl;
+ while (wr) {
+ struct rdma_srq_wqe_header *hdr;
+ int i;
+
+ if (!qedr_srq_elem_left(hw_srq) ||
+ wr->num_sge > srq->hw_srq.max_sges) {
+ DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n",
+ hw_srq->wr_prod_cnt,
+ atomic_read(&hw_srq->wr_cons_cnt),
+ wr->num_sge, srq->hw_srq.max_sges);
+ status = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
+ hdr = qed_chain_produce(pbl);
+ num_sge = wr->num_sge;
+ /* Set number of sge and work request id in header */
+ SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
+
+ srq->hw_srq.wr_prod_cnt++;
+ hw_srq->wqe_prod++;
+ hw_srq->sge_prod++;
+
+ DP_DEBUG(dev, QEDR_MSG_SRQ,
+ "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
+ wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
+
+ for (i = 0; i < wr->num_sge; i++) {
+ struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
+
+ /* Set SGE length, lkey and address */
+ SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
+ wr->sg_list[i].length, wr->sg_list[i].lkey);
+
+ DP_DEBUG(dev, QEDR_MSG_SRQ,
+ "[%d]: len %d key %x addr %x:%x\n",
+ i, srq_sge->length, srq_sge->l_key,
+ srq_sge->addr.hi, srq_sge->addr.lo);
+ hw_srq->sge_prod++;
+ }
+
+ /* Update WQE and SGE information before
+ * updating producer.
+ */
+ dma_wmb();
+
+ /* SRQ producer is 8 bytes. Need to update SGE producer index
+ * in first 4 bytes and need to update WQE producer in
+ * next 4 bytes.
+ */
+ srq->hw_srq.virt_prod_pair_addr->sge_prod = cpu_to_le32(hw_srq->sge_prod);
+ /* Make sure sge producer is updated first */
+ dma_wmb();
+ srq->hw_srq.virt_prod_pair_addr->wqe_prod = cpu_to_le32(hw_srq->wqe_prod);
+
+ wr = wr->next;
+ }
+
+ DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
+ qed_chain_get_elem_left(pbl));
+ spin_unlock_irqrestore(&srq->lock, flags);
+
+ return status;
+}
+
+int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
{
struct qedr_qp *qp = get_qedr_qp(ibqp);
struct qedr_dev *dev = qp->dev;
@@ -2967,12 +3927,6 @@ int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_lock_irqsave(&qp->q_lock, flags);
- if (qp->state == QED_ROCE_QP_STATE_RESET) {
- spin_unlock_irqrestore(&qp->q_lock, flags);
- *bad_wr = wr;
- return -EINVAL;
- }
-
while (wr) {
int i;
@@ -2999,7 +3953,7 @@ int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
wr->num_sge);
- SET_FIELD(flags, RDMA_RQ_SGE_L_KEY,
+ SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
wr->sg_list[i].lkey);
RQ_SGE_SET(rqe, wr->sg_list[i].addr,
@@ -3018,7 +3972,7 @@ int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
/* First one must include the number
* of SGE in the list
*/
- SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0);
+ SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
RQ_SGE_SET(rqe, 0, 0, flags);
@@ -3030,15 +3984,22 @@ int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
qedr_inc_sw_prod(&qp->rq);
- /* Flush all the writes before signalling doorbell */
- wmb();
+ /* qp->rqe_wr_id is accessed during qedr_poll_cq, as
+ * soon as we give the doorbell, we could get a completion
+ * for this wr, therefore we need to make sure that the
+ * memory is update before giving the doorbell.
+ * During qedr_poll_cq, rmb is called before accessing the
+ * cqe. This covers for the smp_rmb as well.
+ */
+ smp_wmb();
qp->rq.db_data.data.value++;
writel(qp->rq.db_data.raw, qp->rq.db);
- /* Make sure write sticks */
- mmiowb();
+ if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+ writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
+ }
wr = wr->next;
}
@@ -3128,6 +4089,10 @@ static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
case IB_WC_REG_MR:
qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
break;
+ case IB_WC_RDMA_READ:
+ case IB_WC_SEND:
+ wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
+ break;
default:
break;
}
@@ -3157,9 +4122,10 @@ static int qedr_poll_cq_req(struct qedr_dev *dev,
IB_WC_SUCCESS, 0);
break;
case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
- DP_ERR(dev,
- "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
- cq->icid, qp->icid);
+ if (qp->state != QED_ROCE_QP_STATE_ERR)
+ DP_DEBUG(dev, QEDR_MSG_CQ,
+ "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
+ cq->icid, qp->icid);
cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
IB_WC_WR_FLUSH_ERR, 1);
break;
@@ -3248,63 +4214,112 @@ static int qedr_poll_cq_req(struct qedr_dev *dev,
return cnt;
}
-static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
- struct qedr_cq *cq, struct ib_wc *wc,
- struct rdma_cqe_responder *resp, u64 wr_id)
+static inline int qedr_cqe_resp_status_to_ib(u8 status)
{
- enum ib_wc_status wc_status = IB_WC_SUCCESS;
- u8 flags;
-
- wc->opcode = IB_WC_RECV;
- wc->wc_flags = 0;
-
- switch (resp->status) {
+ switch (status) {
case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
- wc_status = IB_WC_LOC_ACCESS_ERR;
- break;
+ return IB_WC_LOC_ACCESS_ERR;
case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
- wc_status = IB_WC_LOC_LEN_ERR;
- break;
+ return IB_WC_LOC_LEN_ERR;
case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
- wc_status = IB_WC_LOC_QP_OP_ERR;
- break;
+ return IB_WC_LOC_QP_OP_ERR;
case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
- wc_status = IB_WC_LOC_PROT_ERR;
- break;
+ return IB_WC_LOC_PROT_ERR;
case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
- wc_status = IB_WC_MW_BIND_ERR;
- break;
+ return IB_WC_MW_BIND_ERR;
case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
- wc_status = IB_WC_REM_INV_RD_REQ_ERR;
- break;
+ return IB_WC_REM_INV_RD_REQ_ERR;
case RDMA_CQE_RESP_STS_OK:
- wc_status = IB_WC_SUCCESS;
- wc->byte_len = le32_to_cpu(resp->length);
+ return IB_WC_SUCCESS;
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+}
- flags = resp->flags & QEDR_RESP_RDMA_IMM;
+static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
+ struct ib_wc *wc)
+{
+ wc->status = IB_WC_SUCCESS;
+ wc->byte_len = le32_to_cpu(resp->length);
- if (flags == QEDR_RESP_RDMA_IMM)
+ if (resp->flags & QEDR_RESP_IMM) {
+ wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
+ wc->wc_flags |= IB_WC_WITH_IMM;
+
+ if (resp->flags & QEDR_RESP_RDMA)
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
- if (flags == QEDR_RESP_RDMA_IMM || flags == QEDR_RESP_IMM) {
- wc->ex.imm_data =
- le32_to_cpu(resp->imm_data_or_inv_r_Key);
- wc->wc_flags |= IB_WC_WITH_IMM;
- }
- break;
- default:
- wc->status = IB_WC_GENERAL_ERR;
- DP_ERR(dev, "Invalid CQE status detected\n");
+ if (resp->flags & QEDR_RESP_INV)
+ return -EINVAL;
+
+ } else if (resp->flags & QEDR_RESP_INV) {
+ wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+
+ if (resp->flags & QEDR_RESP_RDMA)
+ return -EINVAL;
+
+ } else if (resp->flags & QEDR_RESP_RDMA) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
+ struct qedr_cq *cq, struct ib_wc *wc,
+ struct rdma_cqe_responder *resp, u64 wr_id)
+{
+ /* Must fill fields before qedr_set_ok_cqe_resp_wc() */
+ wc->opcode = IB_WC_RECV;
+ wc->wc_flags = 0;
+
+ if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
+ if (qedr_set_ok_cqe_resp_wc(resp, wc))
+ DP_ERR(dev,
+ "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
+ cq, cq->icid, resp->flags);
+
+ } else {
+ wc->status = qedr_cqe_resp_status_to_ib(resp->status);
+ if (wc->status == IB_WC_GENERAL_ERR)
+ DP_ERR(dev,
+ "CQ %p (icid=%d) contains an invalid CQE status %d\n",
+ cq, cq->icid, resp->status);
}
- /* fill WC */
- wc->status = wc_status;
+ /* Fill the rest of the WC */
wc->vendor_err = 0;
wc->src_qp = qp->id;
wc->qp = &qp->ibqp;
wc->wr_id = wr_id;
}
+static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
+ struct qedr_cq *cq, struct ib_wc *wc,
+ struct rdma_cqe_responder *resp)
+{
+ struct qedr_srq *srq = qp->srq;
+ u64 wr_id;
+
+ wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
+ le32_to_cpu(resp->srq_wr_id.lo), u64);
+
+ if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ wc->vendor_err = 0;
+ wc->wr_id = wr_id;
+ wc->byte_len = 0;
+ wc->src_qp = qp->id;
+ wc->qp = &qp->ibqp;
+ wc->wr_id = wr_id;
+ } else {
+ __process_resp_one(dev, qp, cq, wc, resp, wr_id);
+ }
+ atomic_inc(&srq->hw_srq.wr_cons_cnt);
+
+ return 1;
+}
static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
struct qedr_cq *cq, struct ib_wc *wc,
struct rdma_cqe_responder *resp)
@@ -3348,12 +4363,25 @@ static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
struct rdma_cqe_responder *resp, int *update)
{
- if (le16_to_cpu(resp->rq_cons) == qp->rq.wqe_cons) {
+ if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
consume_cqe(cq);
*update |= 1;
}
}
+static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
+ struct qedr_cq *cq, int num_entries,
+ struct ib_wc *wc,
+ struct rdma_cqe_responder *resp)
+{
+ int cnt;
+
+ cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
+ consume_cqe(cq);
+
+ return cnt;
+}
+
static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
struct qedr_cq *cq, int num_entries,
struct ib_wc *wc, struct rdma_cqe_responder *resp,
@@ -3363,7 +4391,7 @@ static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
cnt = process_resp_flush(qp, cq, num_entries, wc,
- resp->rq_cons);
+ resp->rq_cons_or_srq_id);
try_consume_resp_cqe(cq, qp, resp, update);
} else {
cnt = process_resp_one(dev, qp, cq, wc, resp);
@@ -3387,16 +4415,24 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
{
struct qedr_dev *dev = get_qedr_dev(ibcq->device);
struct qedr_cq *cq = get_qedr_cq(ibcq);
- union rdma_cqe *cqe = cq->latest_cqe;
+ union rdma_cqe *cqe;
u32 old_cons, new_cons;
unsigned long flags;
int update = 0;
int done = 0;
+ if (cq->destroyed) {
+ DP_ERR(dev,
+ "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
+ cq, cq->icid);
+ return 0;
+ }
+
if (cq->cq_type == QEDR_CQ_TYPE_GSI)
return qedr_gsi_poll_cq(ibcq, num_entries, wc);
spin_lock_irqsave(&cq->cq_lock, flags);
+ cqe = cq->latest_cqe;
old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
while (num_entries && is_valid_cqe(cq, cqe)) {
struct qedr_qp *qp;
@@ -3423,6 +4459,11 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
&cqe->resp, &update);
break;
+ case RDMA_CQE_TYPE_RESPONDER_SRQ:
+ cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
+ wc, &cqe->resp);
+ update = 1;
+ break;
case RDMA_CQE_TYPE_INVALID:
default:
DP_ERR(dev, "Error: invalid CQE type = %d\n",
@@ -3449,38 +4490,10 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
}
int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
- u8 port_num,
- const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *mad_hdr,
- size_t in_mad_size, struct ib_mad_hdr *out_mad,
- size_t *out_mad_size, u16 *out_mad_pkey_index)
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh, const struct ib_mad *in,
+ struct ib_mad *out_mad, size_t *out_mad_size,
+ u16 *out_mad_pkey_index)
{
- struct qedr_dev *dev = get_qedr_dev(ibdev);
-
- DP_DEBUG(dev, QEDR_MSG_GSI,
- "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
- mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
- mad_hdr->class_specific, mad_hdr->class_version,
- mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
return IB_MAD_RESULT_SUCCESS;
}
-
-int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
- struct ib_port_immutable *immutable)
-{
- struct ib_port_attr attr;
- int err;
-
- err = qedr_query_port(ibdev, port_num, &attr);
- if (err)
- return err;
-
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
- RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
- immutable->max_mad_size = IB_MGMT_MAD_SIZE;
-
- return 0;
-}
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 070677ca4d19..62420a15101b 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -34,51 +34,53 @@
int qedr_query_device(struct ib_device *ibdev,
struct ib_device_attr *attr, struct ib_udata *udata);
-int qedr_query_port(struct ib_device *, u8 port, struct ib_port_attr *props);
-int qedr_modify_port(struct ib_device *, u8 port, int mask,
- struct ib_port_modify *props);
+int qedr_query_port(struct ib_device *ibdev, u32 port,
+ struct ib_port_attr *props);
-int qedr_query_gid(struct ib_device *, u8 port, int index, union ib_gid *gid);
+int qedr_iw_query_gid(struct ib_device *ibdev, u32 port,
+ int index, union ib_gid *gid);
-int qedr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey);
+int qedr_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey);
-struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *, struct ib_udata *);
-int qedr_dealloc_ucontext(struct ib_ucontext *);
+int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata);
+void qedr_dealloc_ucontext(struct ib_ucontext *uctx);
-int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
-int qedr_del_gid(struct ib_device *device, u8 port_num,
- unsigned int index, void **context);
-int qedr_add_gid(struct ib_device *device, u8 port_num,
- unsigned int index, const union ib_gid *gid,
- const struct ib_gid_attr *attr, void **context);
-struct ib_pd *qedr_alloc_pd(struct ib_device *,
- struct ib_ucontext *, struct ib_udata *);
-int qedr_dealloc_pd(struct ib_pd *pd);
-
-struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
- const struct ib_cq_init_attr *attr,
- struct ib_ucontext *ib_ctx,
- struct ib_udata *udata);
-int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
-int qedr_destroy_cq(struct ib_cq *);
+int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma);
+void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
+int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
+int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata);
+int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata);
+int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+ struct uverbs_attr_bundle *attrs);
+int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
-struct ib_qp *qedr_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs,
- struct ib_udata *);
+int qedr_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata);
int qedr_modify_qp(struct ib_qp *, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *);
-int qedr_destroy_qp(struct ib_qp *ibqp);
+int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
-struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
- struct ib_udata *udata);
-int qedr_destroy_ah(struct ib_ah *ibah);
+int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *attr,
+ struct ib_udata *udata);
+int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
+int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
+int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_recv_wr);
+int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int qedr_destroy_ah(struct ib_ah *ibah, u32 flags);
-int qedr_dereg_mr(struct ib_mr *);
+int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc);
struct ib_mr *qedr_reg_user_mr(struct ib_pd *, u64 start, u64 length,
- u64 virt, int acc, struct ib_udata *);
+ u64 virt, int acc, struct ib_dmah *dmah,
+ struct ib_udata *);
int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset);
@@ -86,17 +88,16 @@ int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc);
-int qedr_post_send(struct ib_qp *, struct ib_send_wr *,
- struct ib_send_wr **bad_wr);
-int qedr_post_recv(struct ib_qp *, struct ib_recv_wr *,
- struct ib_recv_wr **bad_wr);
+int qedr_post_send(struct ib_qp *, const struct ib_send_wr *,
+ const struct ib_send_wr **bad_wr);
+int qedr_post_recv(struct ib_qp *, const struct ib_recv_wr *,
+ const struct ib_recv_wr **bad_wr);
int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
- u8 port_num, const struct ib_wc *in_wc,
- const struct ib_grh *in_grh,
- const struct ib_mad_hdr *in_mad,
- size_t in_mad_size, struct ib_mad_hdr *out_mad,
- size_t *out_mad_size, u16 *out_mad_pkey_index);
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh, const struct ib_mad *in_mad,
+ struct ib_mad *out_mad, size_t *out_mad_size,
+ u16 *out_mad_pkey_index);
-int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
+int qedr_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable);
#endif
diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig
deleted file mode 100644
index e0fdb9201423..000000000000
--- a/drivers/infiniband/hw/qib/Kconfig
+++ /dev/null
@@ -1,15 +0,0 @@
-config INFINIBAND_QIB
- tristate "Intel PCIe HCA support"
- depends on 64BIT && INFINIBAND_RDMAVT
- ---help---
- This is a low-level driver for Intel PCIe QLE InfiniBand host
- channel adapters. This driver does not support the Intel
- HyperTransport card (model QHT7140).
-
-config INFINIBAND_QIB_DCA
- bool "QIB DCA support"
- depends on INFINIBAND_QIB && DCA && SMP && !(INFINIBAND_QIB=y && DCA=m)
- default y
- ---help---
- Setting this enables DCA support on some Intel chip sets
- with the iba7322 HCA.
diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile
deleted file mode 100644
index 79ebd79e8405..000000000000
--- a/drivers/infiniband/hw/qib/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-obj-$(CONFIG_INFINIBAND_QIB) += ib_qib.o
-
-ib_qib-y := qib_diag.o qib_driver.o qib_eeprom.o \
- qib_file_ops.o qib_fs.o qib_init.o qib_intr.o \
- qib_mad.o qib_pcie.o qib_pio_copy.o \
- qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o \
- qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \
- qib_user_pages.o qib_user_sdma.o qib_iba7220.o \
- qib_sd7220.o qib_iba7322.o qib_verbs.o
-
-# 6120 has no fallback if no MSI interrupts, others can do INTx
-ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o
-
-ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o
-ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o
-ib_qib-$(CONFIG_DEBUG_FS) += qib_debugfs.o
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
deleted file mode 100644
index a3e21a25cea5..000000000000
--- a/drivers/infiniband/hw/qib/qib.h
+++ /dev/null
@@ -1,1552 +0,0 @@
-#ifndef _QIB_KERNEL_H
-#define _QIB_KERNEL_H
-/*
- * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
- * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
- * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/*
- * This header file is the base header file for qlogic_ib kernel code
- * qib_user.h serves a similar purpose for user code.
- */
-
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/mutex.h>
-#include <linux/list.h>
-#include <linux/scatterlist.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-#include <linux/fs.h>
-#include <linux/completion.h>
-#include <linux/kref.h>
-#include <linux/sched.h>
-#include <linux/kthread.h>
-#include <rdma/ib_hdrs.h>
-#include <rdma/rdma_vt.h>
-
-#include "qib_common.h"
-#include "qib_verbs.h"
-
-/* only s/w major version of QLogic_IB we can handle */
-#define QIB_CHIP_VERS_MAJ 2U
-
-/* don't care about this except printing */
-#define QIB_CHIP_VERS_MIN 0U
-
-/* The Organization Unique Identifier (Mfg code), and its position in GUID */
-#define QIB_OUI 0x001175
-#define QIB_OUI_LSB 40
-
-/*
- * per driver stats, either not device nor port-specific, or
- * summed over all of the devices and ports.
- * They are described by name via ipathfs filesystem, so layout
- * and number of elements can change without breaking compatibility.
- * If members are added or deleted qib_statnames[] in qib_fs.c must
- * change to match.
- */
-struct qlogic_ib_stats {
- __u64 sps_ints; /* number of interrupts handled */
- __u64 sps_errints; /* number of error interrupts */
- __u64 sps_txerrs; /* tx-related packet errors */
- __u64 sps_rcverrs; /* non-crc rcv packet errors */
- __u64 sps_hwerrs; /* hardware errors reported (parity, etc.) */
- __u64 sps_nopiobufs; /* no pio bufs avail from kernel */
- __u64 sps_ctxts; /* number of contexts currently open */
- __u64 sps_lenerrs; /* number of kernel packets where RHF != LRH len */
- __u64 sps_buffull;
- __u64 sps_hdrfull;
-};
-
-extern struct qlogic_ib_stats qib_stats;
-extern const struct pci_error_handlers qib_pci_err_handler;
-
-#define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ
-/*
- * First-cut critierion for "device is active" is
- * two thousand dwords combined Tx, Rx traffic per
- * 5-second interval. SMA packets are 64 dwords,
- * and occur "a few per second", presumably each way.
- */
-#define QIB_TRAFFIC_ACTIVE_THRESHOLD (2000)
-
-/*
- * Struct used to indicate which errors are logged in each of the
- * error-counters that are logged to EEPROM. A counter is incremented
- * _once_ (saturating at 255) for each event with any bits set in
- * the error or hwerror register masks below.
- */
-#define QIB_EEP_LOG_CNT (4)
-struct qib_eep_log_mask {
- u64 errs_to_log;
- u64 hwerrs_to_log;
-};
-
-/*
- * Below contains all data related to a single context (formerly called port).
- */
-
-#ifdef CONFIG_DEBUG_FS
-struct qib_opcode_stats_perctx;
-#endif
-
-struct qib_ctxtdata {
- void **rcvegrbuf;
- dma_addr_t *rcvegrbuf_phys;
- /* rcvhdrq base, needs mmap before useful */
- void *rcvhdrq;
- /* kernel virtual address where hdrqtail is updated */
- void *rcvhdrtail_kvaddr;
- /*
- * temp buffer for expected send setup, allocated at open, instead
- * of each setup call
- */
- void *tid_pg_list;
- /*
- * Shared page for kernel to signal user processes that send buffers
- * need disarming. The process should call QIB_CMD_DISARM_BUFS
- * or QIB_CMD_ACK_EVENT with IPATH_EVENT_DISARM_BUFS set.
- */
- unsigned long *user_event_mask;
- /* when waiting for rcv or pioavail */
- wait_queue_head_t wait;
- /*
- * rcvegr bufs base, physical, must fit
- * in 44 bits so 32 bit programs mmap64 44 bit works)
- */
- dma_addr_t rcvegr_phys;
- /* mmap of hdrq, must fit in 44 bits */
- dma_addr_t rcvhdrq_phys;
- dma_addr_t rcvhdrqtailaddr_phys;
-
- /*
- * number of opens (including slave sub-contexts) on this instance
- * (ignoring forks, dup, etc. for now)
- */
- int cnt;
- /*
- * how much space to leave at start of eager TID entries for
- * protocol use, on each TID
- */
- /* instead of calculating it */
- unsigned ctxt;
- /* local node of context */
- int node_id;
- /* non-zero if ctxt is being shared. */
- u16 subctxt_cnt;
- /* non-zero if ctxt is being shared. */
- u16 subctxt_id;
- /* number of eager TID entries. */
- u16 rcvegrcnt;
- /* index of first eager TID entry. */
- u16 rcvegr_tid_base;
- /* number of pio bufs for this ctxt (all procs, if shared) */
- u32 piocnt;
- /* first pio buffer for this ctxt */
- u32 pio_base;
- /* chip offset of PIO buffers for this ctxt */
- u32 piobufs;
- /* how many alloc_pages() chunks in rcvegrbuf_pages */
- u32 rcvegrbuf_chunks;
- /* how many egrbufs per chunk */
- u16 rcvegrbufs_perchunk;
- /* ilog2 of above */
- u16 rcvegrbufs_perchunk_shift;
- /* order for rcvegrbuf_pages */
- size_t rcvegrbuf_size;
- /* rcvhdrq size (for freeing) */
- size_t rcvhdrq_size;
- /* per-context flags for fileops/intr communication */
- unsigned long flag;
- /* next expected TID to check when looking for free */
- u32 tidcursor;
- /* WAIT_RCV that timed out, no interrupt */
- u32 rcvwait_to;
- /* WAIT_PIO that timed out, no interrupt */
- u32 piowait_to;
- /* WAIT_RCV already happened, no wait */
- u32 rcvnowait;
- /* WAIT_PIO already happened, no wait */
- u32 pionowait;
- /* total number of polled urgent packets */
- u32 urgent;
- /* saved total number of polled urgent packets for poll edge trigger */
- u32 urgent_poll;
- /* pid of process using this ctxt */
- pid_t pid;
- pid_t subpid[QLOGIC_IB_MAX_SUBCTXT];
- /* same size as task_struct .comm[], command that opened context */
- char comm[16];
- /* pkeys set by this use of this ctxt */
- u16 pkeys[4];
- /* so file ops can get at unit */
- struct qib_devdata *dd;
- /* so funcs that need physical port can get it easily */
- struct qib_pportdata *ppd;
- /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
- void *subctxt_uregbase;
- /* An array of pages for the eager receive buffers * N */
- void *subctxt_rcvegrbuf;
- /* An array of pages for the eager header queue entries * N */
- void *subctxt_rcvhdr_base;
- /* The version of the library which opened this ctxt */
- u32 userversion;
- /* Bitmask of active slaves */
- u32 active_slaves;
- /* Type of packets or conditions we want to poll for */
- u16 poll_type;
- /* receive packet sequence counter */
- u8 seq_cnt;
- u8 redirect_seq_cnt;
- /* ctxt rcvhdrq head offset */
- u32 head;
- /* QPs waiting for context processing */
- struct list_head qp_wait_list;
-#ifdef CONFIG_DEBUG_FS
- /* verbs stats per CTX */
- struct qib_opcode_stats_perctx *opstats;
-#endif
-};
-
-struct rvt_sge_state;
-
-struct qib_sdma_txreq {
- int flags;
- int sg_count;
- dma_addr_t addr;
- void (*callback)(struct qib_sdma_txreq *, int);
- u16 start_idx; /* sdma private */
- u16 next_descq_idx; /* sdma private */
- struct list_head list; /* sdma private */
-};
-
-struct qib_sdma_desc {
- __le64 qw[2];
-};
-
-struct qib_verbs_txreq {
- struct qib_sdma_txreq txreq;
- struct rvt_qp *qp;
- struct rvt_swqe *wqe;
- u32 dwords;
- u16 hdr_dwords;
- u16 hdr_inx;
- struct qib_pio_header *align_buf;
- struct rvt_mregion *mr;
- struct rvt_sge_state *ss;
-};
-
-#define QIB_SDMA_TXREQ_F_USELARGEBUF 0x1
-#define QIB_SDMA_TXREQ_F_HEADTOHOST 0x2
-#define QIB_SDMA_TXREQ_F_INTREQ 0x4
-#define QIB_SDMA_TXREQ_F_FREEBUF 0x8
-#define QIB_SDMA_TXREQ_F_FREEDESC 0x10
-
-#define QIB_SDMA_TXREQ_S_OK 0
-#define QIB_SDMA_TXREQ_S_SENDERROR 1
-#define QIB_SDMA_TXREQ_S_ABORTED 2
-#define QIB_SDMA_TXREQ_S_SHUTDOWN 3
-
-/*
- * Get/Set IB link-level config parameters for f_get/set_ib_cfg()
- * Mostly for MADs that set or query link parameters, also ipath
- * config interfaces
- */
-#define QIB_IB_CFG_LIDLMC 0 /* LID (LS16b) and Mask (MS16b) */
-#define QIB_IB_CFG_LWID_ENB 2 /* allowed Link-width */
-#define QIB_IB_CFG_LWID 3 /* currently active Link-width */
-#define QIB_IB_CFG_SPD_ENB 4 /* allowed Link speeds */
-#define QIB_IB_CFG_SPD 5 /* current Link spd */
-#define QIB_IB_CFG_RXPOL_ENB 6 /* Auto-RX-polarity enable */
-#define QIB_IB_CFG_LREV_ENB 7 /* Auto-Lane-reversal enable */
-#define QIB_IB_CFG_LINKLATENCY 8 /* Link Latency (IB1.2 only) */
-#define QIB_IB_CFG_HRTBT 9 /* IB heartbeat off/enable/auto; DDR/QDR only */
-#define QIB_IB_CFG_OP_VLS 10 /* operational VLs */
-#define QIB_IB_CFG_VL_HIGH_CAP 11 /* num of VL high priority weights */
-#define QIB_IB_CFG_VL_LOW_CAP 12 /* num of VL low priority weights */
-#define QIB_IB_CFG_OVERRUN_THRESH 13 /* IB overrun threshold */
-#define QIB_IB_CFG_PHYERR_THRESH 14 /* IB PHY error threshold */
-#define QIB_IB_CFG_LINKDEFAULT 15 /* IB link default (sleep/poll) */
-#define QIB_IB_CFG_PKEYS 16 /* update partition keys */
-#define QIB_IB_CFG_MTU 17 /* update MTU in IBC */
-#define QIB_IB_CFG_LSTATE 18 /* update linkcmd and linkinitcmd in IBC */
-#define QIB_IB_CFG_VL_HIGH_LIMIT 19
-#define QIB_IB_CFG_PMA_TICKS 20 /* PMA sample tick resolution */
-#define QIB_IB_CFG_PORT 21 /* switch port we are connected to */
-
-/*
- * for CFG_LSTATE: LINKCMD in upper 16 bits, LINKINITCMD in lower 16
- * IB_LINKINITCMD_POLL and SLEEP are also used as set/get values for
- * QIB_IB_CFG_LINKDEFAULT cmd
- */
-#define IB_LINKCMD_DOWN (0 << 16)
-#define IB_LINKCMD_ARMED (1 << 16)
-#define IB_LINKCMD_ACTIVE (2 << 16)
-#define IB_LINKINITCMD_NOP 0
-#define IB_LINKINITCMD_POLL 1
-#define IB_LINKINITCMD_SLEEP 2
-#define IB_LINKINITCMD_DISABLE 3
-
-/*
- * valid states passed to qib_set_linkstate() user call
- */
-#define QIB_IB_LINKDOWN 0
-#define QIB_IB_LINKARM 1
-#define QIB_IB_LINKACTIVE 2
-#define QIB_IB_LINKDOWN_ONLY 3
-#define QIB_IB_LINKDOWN_SLEEP 4
-#define QIB_IB_LINKDOWN_DISABLE 5
-
-/*
- * These 7 values (SDR, DDR, and QDR may be ORed for auto-speed
- * negotiation) are used for the 3rd argument to path_f_set_ib_cfg
- * with cmd QIB_IB_CFG_SPD_ENB, by direct calls or via sysfs. They
- * are also the the possible values for qib_link_speed_enabled and active
- * The values were chosen to match values used within the IB spec.
- */
-#define QIB_IB_SDR 1
-#define QIB_IB_DDR 2
-#define QIB_IB_QDR 4
-
-#define QIB_DEFAULT_MTU 4096
-
-/* max number of IB ports supported per HCA */
-#define QIB_MAX_IB_PORTS 2
-
-/*
- * Possible IB config parameters for f_get/set_ib_table()
- */
-#define QIB_IB_TBL_VL_HIGH_ARB 1 /* Get/set VL high priority weights */
-#define QIB_IB_TBL_VL_LOW_ARB 2 /* Get/set VL low priority weights */
-
-/*
- * Possible "operations" for f_rcvctrl(ppd, op, ctxt)
- * these are bits so they can be combined, e.g.
- * QIB_RCVCTRL_INTRAVAIL_ENB | QIB_RCVCTRL_CTXT_ENB
- */
-#define QIB_RCVCTRL_TAILUPD_ENB 0x01
-#define QIB_RCVCTRL_TAILUPD_DIS 0x02
-#define QIB_RCVCTRL_CTXT_ENB 0x04
-#define QIB_RCVCTRL_CTXT_DIS 0x08
-#define QIB_RCVCTRL_INTRAVAIL_ENB 0x10
-#define QIB_RCVCTRL_INTRAVAIL_DIS 0x20
-#define QIB_RCVCTRL_PKEY_ENB 0x40 /* Note, default is enabled */
-#define QIB_RCVCTRL_PKEY_DIS 0x80
-#define QIB_RCVCTRL_BP_ENB 0x0100
-#define QIB_RCVCTRL_BP_DIS 0x0200
-#define QIB_RCVCTRL_TIDFLOW_ENB 0x0400
-#define QIB_RCVCTRL_TIDFLOW_DIS 0x0800
-
-/*
- * Possible "operations" for f_sendctrl(ppd, op, var)
- * these are bits so they can be combined, e.g.
- * QIB_SENDCTRL_BUFAVAIL_ENB | QIB_SENDCTRL_ENB
- * Some operations (e.g. DISARM, ABORT) are known to
- * be "one-shot", so do not modify shadow.
- */
-#define QIB_SENDCTRL_DISARM (0x1000)
-#define QIB_SENDCTRL_DISARM_BUF(bufn) ((bufn) | QIB_SENDCTRL_DISARM)
- /* available (0x2000) */
-#define QIB_SENDCTRL_AVAIL_DIS (0x4000)
-#define QIB_SENDCTRL_AVAIL_ENB (0x8000)
-#define QIB_SENDCTRL_AVAIL_BLIP (0x10000)
-#define QIB_SENDCTRL_SEND_DIS (0x20000)
-#define QIB_SENDCTRL_SEND_ENB (0x40000)
-#define QIB_SENDCTRL_FLUSH (0x80000)
-#define QIB_SENDCTRL_CLEAR (0x100000)
-#define QIB_SENDCTRL_DISARM_ALL (0x200000)
-
-/*
- * These are the generic indices for requesting per-port
- * counter values via the f_portcntr function. They
- * are always returned as 64 bit values, although most
- * are 32 bit counters.
- */
-/* send-related counters */
-#define QIBPORTCNTR_PKTSEND 0U
-#define QIBPORTCNTR_WORDSEND 1U
-#define QIBPORTCNTR_PSXMITDATA 2U
-#define QIBPORTCNTR_PSXMITPKTS 3U
-#define QIBPORTCNTR_PSXMITWAIT 4U
-#define QIBPORTCNTR_SENDSTALL 5U
-/* receive-related counters */
-#define QIBPORTCNTR_PKTRCV 6U
-#define QIBPORTCNTR_PSRCVDATA 7U
-#define QIBPORTCNTR_PSRCVPKTS 8U
-#define QIBPORTCNTR_RCVEBP 9U
-#define QIBPORTCNTR_RCVOVFL 10U
-#define QIBPORTCNTR_WORDRCV 11U
-/* IB link related error counters */
-#define QIBPORTCNTR_RXLOCALPHYERR 12U
-#define QIBPORTCNTR_RXVLERR 13U
-#define QIBPORTCNTR_ERRICRC 14U
-#define QIBPORTCNTR_ERRVCRC 15U
-#define QIBPORTCNTR_ERRLPCRC 16U
-#define QIBPORTCNTR_BADFORMAT 17U
-#define QIBPORTCNTR_ERR_RLEN 18U
-#define QIBPORTCNTR_IBSYMBOLERR 19U
-#define QIBPORTCNTR_INVALIDRLEN 20U
-#define QIBPORTCNTR_UNSUPVL 21U
-#define QIBPORTCNTR_EXCESSBUFOVFL 22U
-#define QIBPORTCNTR_ERRLINK 23U
-#define QIBPORTCNTR_IBLINKDOWN 24U
-#define QIBPORTCNTR_IBLINKERRRECOV 25U
-#define QIBPORTCNTR_LLI 26U
-/* other error counters */
-#define QIBPORTCNTR_RXDROPPKT 27U
-#define QIBPORTCNTR_VL15PKTDROP 28U
-#define QIBPORTCNTR_ERRPKEY 29U
-#define QIBPORTCNTR_KHDROVFL 30U
-/* sampling counters (these are actually control registers) */
-#define QIBPORTCNTR_PSINTERVAL 31U
-#define QIBPORTCNTR_PSSTART 32U
-#define QIBPORTCNTR_PSSTAT 33U
-
-/* how often we check for packet activity for "power on hours (in seconds) */
-#define ACTIVITY_TIMER 5
-
-#define MAX_NAME_SIZE 64
-
-#ifdef CONFIG_INFINIBAND_QIB_DCA
-struct qib_irq_notify;
-#endif
-
-struct qib_msix_entry {
- struct msix_entry msix;
- void *arg;
-#ifdef CONFIG_INFINIBAND_QIB_DCA
- int dca;
- int rcv;
- struct qib_irq_notify *notifier;
-#endif
- char name[MAX_NAME_SIZE];
- cpumask_var_t mask;
-};
-
-/* Below is an opaque struct. Each chip (device) can maintain
- * private data needed for its operation, but not germane to the
- * rest of the driver. For convenience, we define another that
- * is chip-specific, per-port
- */
-struct qib_chip_specific;
-struct qib_chipport_specific;
-
-enum qib_sdma_states {
- qib_sdma_state_s00_hw_down,
- qib_sdma_state_s10_hw_start_up_wait,
- qib_sdma_state_s20_idle,
- qib_sdma_state_s30_sw_clean_up_wait,
- qib_sdma_state_s40_hw_clean_up_wait,
- qib_sdma_state_s50_hw_halt_wait,
- qib_sdma_state_s99_running,
-};
-
-enum qib_sdma_events {
- qib_sdma_event_e00_go_hw_down,
- qib_sdma_event_e10_go_hw_start,
- qib_sdma_event_e20_hw_started,
- qib_sdma_event_e30_go_running,
- qib_sdma_event_e40_sw_cleaned,
- qib_sdma_event_e50_hw_cleaned,
- qib_sdma_event_e60_hw_halted,
- qib_sdma_event_e70_go_idle,
- qib_sdma_event_e7220_err_halted,
- qib_sdma_event_e7322_err_halted,
- qib_sdma_event_e90_timer_tick,
-};
-
-extern char *qib_sdma_state_names[];
-extern char *qib_sdma_event_names[];
-
-struct sdma_set_state_action {
- unsigned op_enable:1;
- unsigned op_intenable:1;
- unsigned op_halt:1;
- unsigned op_drain:1;
- unsigned go_s99_running_tofalse:1;
- unsigned go_s99_running_totrue:1;
-};
-
-struct qib_sdma_state {
- struct kref kref;
- struct completion comp;
- enum qib_sdma_states current_state;
- struct sdma_set_state_action *set_state_action;
- unsigned current_op;
- unsigned go_s99_running;
- unsigned first_sendbuf;
- unsigned last_sendbuf; /* really last +1 */
- /* debugging/devel */
- enum qib_sdma_states previous_state;
- unsigned previous_op;
- enum qib_sdma_events last_event;
-};
-
-struct xmit_wait {
- struct timer_list timer;
- u64 counter;
- u8 flags;
- struct cache {
- u64 psxmitdata;
- u64 psrcvdata;
- u64 psxmitpkts;
- u64 psrcvpkts;
- u64 psxmitwait;
- } counter_cache;
-};
-
-/*
- * The structure below encapsulates data relevant to a physical IB Port.
- * Current chips support only one such port, but the separation
- * clarifies things a bit. Note that to conform to IB conventions,
- * port-numbers are one-based. The first or only port is port1.
- */
-struct qib_pportdata {
- struct qib_ibport ibport_data;
-
- struct qib_devdata *dd;
- struct qib_chippport_specific *cpspec; /* chip-specific per-port */
- struct kobject pport_kobj;
- struct kobject pport_cc_kobj;
- struct kobject sl2vl_kobj;
- struct kobject diagc_kobj;
-
- /* GUID for this interface, in network order */
- __be64 guid;
-
- /* QIB_POLL, etc. link-state specific flags, per port */
- u32 lflags;
- /* qib_lflags driver is waiting for */
- u32 state_wanted;
- spinlock_t lflags_lock;
-
- /* ref count for each pkey */
- atomic_t pkeyrefs[4];
-
- /*
- * this address is mapped readonly into user processes so they can
- * get status cheaply, whenever they want. One qword of status per port
- */
- u64 *statusp;
-
- /* SendDMA related entries */
-
- /* read mostly */
- struct qib_sdma_desc *sdma_descq;
- struct workqueue_struct *qib_wq;
- struct qib_sdma_state sdma_state;
- dma_addr_t sdma_descq_phys;
- volatile __le64 *sdma_head_dma; /* DMA'ed by chip */
- dma_addr_t sdma_head_phys;
- u16 sdma_descq_cnt;
-
- /* read/write using lock */
- spinlock_t sdma_lock ____cacheline_aligned_in_smp;
- struct list_head sdma_activelist;
- struct list_head sdma_userpending;
- u64 sdma_descq_added;
- u64 sdma_descq_removed;
- u16 sdma_descq_tail;
- u16 sdma_descq_head;
- u8 sdma_generation;
- u8 sdma_intrequest;
-
- struct tasklet_struct sdma_sw_clean_up_task
- ____cacheline_aligned_in_smp;
-
- wait_queue_head_t state_wait; /* for state_wanted */
-
- /* HoL blocking for SMP replies */
- unsigned hol_state;
- struct timer_list hol_timer;
-
- /*
- * Shadow copies of registers; size indicates read access size.
- * Most of them are readonly, but some are write-only register,
- * where we manipulate the bits in the shadow copy, and then write
- * the shadow copy to qlogic_ib.
- *
- * We deliberately make most of these 32 bits, since they have
- * restricted range. For any that we read, we won't to generate 32
- * bit accesses, since Opteron will generate 2 separate 32 bit HT
- * transactions for a 64 bit read, and we want to avoid unnecessary
- * bus transactions.
- */
-
- /* This is the 64 bit group */
- /* last ibcstatus. opaque outside chip-specific code */
- u64 lastibcstat;
-
- /* these are the "32 bit" regs */
-
- /*
- * the following two are 32-bit bitmasks, but {test,clear,set}_bit
- * all expect bit fields to be "unsigned long"
- */
- unsigned long p_rcvctrl; /* shadow per-port rcvctrl */
- unsigned long p_sendctrl; /* shadow per-port sendctrl */
-
- u32 ibmtu; /* The MTU programmed for this unit */
- /*
- * Current max size IB packet (in bytes) including IB headers, that
- * we can send. Changes when ibmtu changes.
- */
- u32 ibmaxlen;
- /*
- * ibmaxlen at init time, limited by chip and by receive buffer
- * size. Not changed after init.
- */
- u32 init_ibmaxlen;
- /* LID programmed for this instance */
- u16 lid;
- /* list of pkeys programmed; 0 if not set */
- u16 pkeys[4];
- /* LID mask control */
- u8 lmc;
- u8 link_width_supported;
- u8 link_speed_supported;
- u8 link_width_enabled;
- u8 link_speed_enabled;
- u8 link_width_active;
- u8 link_speed_active;
- u8 vls_supported;
- u8 vls_operational;
- /* Rx Polarity inversion (compensate for ~tx on partner) */
- u8 rx_pol_inv;
-
- u8 hw_pidx; /* physical port index */
- u8 port; /* IB port number and index into dd->pports - 1 */
-
- u8 delay_mult;
-
- /* used to override LED behavior */
- u8 led_override; /* Substituted for normal value, if non-zero */
- u16 led_override_timeoff; /* delta to next timer event */
- u8 led_override_vals[2]; /* Alternates per blink-frame */
- u8 led_override_phase; /* Just counts, LSB picks from vals[] */
- atomic_t led_override_timer_active;
- /* Used to flash LEDs in override mode */
- struct timer_list led_override_timer;
- struct xmit_wait cong_stats;
- struct timer_list symerr_clear_timer;
-
- /* Synchronize access between driver writes and sysfs reads */
- spinlock_t cc_shadow_lock
- ____cacheline_aligned_in_smp;
-
- /* Shadow copy of the congestion control table */
- struct cc_table_shadow *ccti_entries_shadow;
-
- /* Shadow copy of the congestion control entries */
- struct ib_cc_congestion_setting_attr_shadow *congestion_entries_shadow;
-
- /* List of congestion control table entries */
- struct ib_cc_table_entry_shadow *ccti_entries;
-
- /* 16 congestion entries with each entry corresponding to a SL */
- struct ib_cc_congestion_entry_shadow *congestion_entries;
-
- /* Maximum number of congestion control entries that the agent expects
- * the manager to send.
- */
- u16 cc_supported_table_entries;
-
- /* Total number of congestion control table entries */
- u16 total_cct_entry;
-
- /* Bit map identifying service level */
- u16 cc_sl_control_map;
-
- /* maximum congestion control table index */
- u16 ccti_limit;
-
- /* CA's max number of 64 entry units in the congestion control table */
- u8 cc_max_table_entries;
-};
-
-/* Observers. Not to be taken lightly, possibly not to ship. */
-/*
- * If a diag read or write is to (bottom <= offset <= top),
- * the "hoook" is called, allowing, e.g. shadows to be
- * updated in sync with the driver. struct diag_observer
- * is the "visible" part.
- */
-struct diag_observer;
-
-typedef int (*diag_hook) (struct qib_devdata *dd,
- const struct diag_observer *op,
- u32 offs, u64 *data, u64 mask, int only_32);
-
-struct diag_observer {
- diag_hook hook;
- u32 bottom;
- u32 top;
-};
-
-extern int qib_register_observer(struct qib_devdata *dd,
- const struct diag_observer *op);
-
-/* Only declared here, not defined. Private to diags */
-struct diag_observer_list_elt;
-
-/* device data struct now contains only "general per-device" info.
- * fields related to a physical IB port are in a qib_pportdata struct,
- * described above) while fields only used by a particular chip-type are in
- * a qib_chipdata struct, whose contents are opaque to this file.
- */
-struct qib_devdata {
- struct qib_ibdev verbs_dev; /* must be first */
- struct list_head list;
- /* pointers to related structs for this device */
- /* pci access data structure */
- struct pci_dev *pcidev;
- struct cdev *user_cdev;
- struct cdev *diag_cdev;
- struct device *user_device;
- struct device *diag_device;
-
- /* mem-mapped pointer to base of chip regs */
- u64 __iomem *kregbase;
- /* end of mem-mapped chip space excluding sendbuf and user regs */
- u64 __iomem *kregend;
- /* physical address of chip for io_remap, etc. */
- resource_size_t physaddr;
- /* qib_cfgctxts pointers */
- struct qib_ctxtdata **rcd; /* Receive Context Data */
-
- /* qib_pportdata, points to array of (physical) port-specific
- * data structs, indexed by pidx (0..n-1)
- */
- struct qib_pportdata *pport;
- struct qib_chip_specific *cspec; /* chip-specific */
-
- /* kvirt address of 1st 2k pio buffer */
- void __iomem *pio2kbase;
- /* kvirt address of 1st 4k pio buffer */
- void __iomem *pio4kbase;
- /* mem-mapped pointer to base of PIO buffers (if using WC PAT) */
- void __iomem *piobase;
- /* mem-mapped pointer to base of user chip regs (if using WC PAT) */
- u64 __iomem *userbase;
- void __iomem *piovl15base; /* base of VL15 buffers, if not WC */
- /*
- * points to area where PIOavail registers will be DMA'ed.
- * Has to be on a page of it's own, because the page will be
- * mapped into user program space. This copy is *ONLY* ever
- * written by DMA, not by the driver! Need a copy per device
- * when we get to multiple devices
- */
- volatile __le64 *pioavailregs_dma; /* DMA'ed by chip */
- /* physical address where updates occur */
- dma_addr_t pioavailregs_phys;
-
- /* device-specific implementations of functions needed by
- * common code. Contrary to previous consensus, we can't
- * really just point to a device-specific table, because we
- * may need to "bend", e.g. *_f_put_tid
- */
- /* fallback to alternate interrupt type if possible */
- int (*f_intr_fallback)(struct qib_devdata *);
- /* hard reset chip */
- int (*f_reset)(struct qib_devdata *);
- void (*f_quiet_serdes)(struct qib_pportdata *);
- int (*f_bringup_serdes)(struct qib_pportdata *);
- int (*f_early_init)(struct qib_devdata *);
- void (*f_clear_tids)(struct qib_devdata *, struct qib_ctxtdata *);
- void (*f_put_tid)(struct qib_devdata *, u64 __iomem*,
- u32, unsigned long);
- void (*f_cleanup)(struct qib_devdata *);
- void (*f_setextled)(struct qib_pportdata *, u32);
- /* fill out chip-specific fields */
- int (*f_get_base_info)(struct qib_ctxtdata *, struct qib_base_info *);
- /* free irq */
- void (*f_free_irq)(struct qib_devdata *);
- struct qib_message_header *(*f_get_msgheader)
- (struct qib_devdata *, __le32 *);
- void (*f_config_ctxts)(struct qib_devdata *);
- int (*f_get_ib_cfg)(struct qib_pportdata *, int);
- int (*f_set_ib_cfg)(struct qib_pportdata *, int, u32);
- int (*f_set_ib_loopback)(struct qib_pportdata *, const char *);
- int (*f_get_ib_table)(struct qib_pportdata *, int, void *);
- int (*f_set_ib_table)(struct qib_pportdata *, int, void *);
- u32 (*f_iblink_state)(u64);
- u8 (*f_ibphys_portstate)(u64);
- void (*f_xgxs_reset)(struct qib_pportdata *);
- /* per chip actions needed for IB Link up/down changes */
- int (*f_ib_updown)(struct qib_pportdata *, int, u64);
- u32 __iomem *(*f_getsendbuf)(struct qib_pportdata *, u64, u32 *);
- /* Read/modify/write of GPIO pins (potentially chip-specific */
- int (*f_gpio_mod)(struct qib_devdata *dd, u32 out, u32 dir,
- u32 mask);
- /* Enable writes to config EEPROM (if supported) */
- int (*f_eeprom_wen)(struct qib_devdata *dd, int wen);
- /*
- * modify rcvctrl shadow[s] and write to appropriate chip-regs.
- * see above QIB_RCVCTRL_xxx_ENB/DIS for operations.
- * (ctxt == -1) means "all contexts", only meaningful for
- * clearing. Could remove if chip_spec shutdown properly done.
- */
- void (*f_rcvctrl)(struct qib_pportdata *, unsigned int op,
- int ctxt);
- /* Read/modify/write sendctrl appropriately for op and port. */
- void (*f_sendctrl)(struct qib_pportdata *, u32 op);
- void (*f_set_intr_state)(struct qib_devdata *, u32);
- void (*f_set_armlaunch)(struct qib_devdata *, u32);
- void (*f_wantpiobuf_intr)(struct qib_devdata *, u32);
- int (*f_late_initreg)(struct qib_devdata *);
- int (*f_init_sdma_regs)(struct qib_pportdata *);
- u16 (*f_sdma_gethead)(struct qib_pportdata *);
- int (*f_sdma_busy)(struct qib_pportdata *);
- void (*f_sdma_update_tail)(struct qib_pportdata *, u16);
- void (*f_sdma_set_desc_cnt)(struct qib_pportdata *, unsigned);
- void (*f_sdma_sendctrl)(struct qib_pportdata *, unsigned);
- void (*f_sdma_hw_clean_up)(struct qib_pportdata *);
- void (*f_sdma_hw_start_up)(struct qib_pportdata *);
- void (*f_sdma_init_early)(struct qib_pportdata *);
- void (*f_set_cntr_sample)(struct qib_pportdata *, u32, u32);
- void (*f_update_usrhead)(struct qib_ctxtdata *, u64, u32, u32, u32);
- u32 (*f_hdrqempty)(struct qib_ctxtdata *);
- u64 (*f_portcntr)(struct qib_pportdata *, u32);
- u32 (*f_read_cntrs)(struct qib_devdata *, loff_t, char **,
- u64 **);
- u32 (*f_read_portcntrs)(struct qib_devdata *, loff_t, u32,
- char **, u64 **);
- u32 (*f_setpbc_control)(struct qib_pportdata *, u32, u8, u8);
- void (*f_initvl15_bufs)(struct qib_devdata *);
- void (*f_init_ctxt)(struct qib_ctxtdata *);
- void (*f_txchk_change)(struct qib_devdata *, u32, u32, u32,
- struct qib_ctxtdata *);
- void (*f_writescratch)(struct qib_devdata *, u32);
- int (*f_tempsense_rd)(struct qib_devdata *, int regnum);
-#ifdef CONFIG_INFINIBAND_QIB_DCA
- int (*f_notify_dca)(struct qib_devdata *, unsigned long event);
-#endif
-
- char *boardname; /* human readable board info */
-
- /* template for writing TIDs */
- u64 tidtemplate;
- /* value to write to free TIDs */
- u64 tidinvalid;
-
- /* number of registers used for pioavail */
- u32 pioavregs;
- /* device (not port) flags, basically device capabilities */
- u32 flags;
- /* last buffer for user use */
- u32 lastctxt_piobuf;
-
- /* reset value */
- u64 z_int_counter;
- /* percpu intcounter */
- u64 __percpu *int_counter;
-
- /* pio bufs allocated per ctxt */
- u32 pbufsctxt;
- /* if remainder on bufs/ctxt, ctxts < extrabuf get 1 extra */
- u32 ctxts_extrabuf;
- /*
- * number of ctxts configured as max; zero is set to number chip
- * supports, less gives more pio bufs/ctxt, etc.
- */
- u32 cfgctxts;
- /*
- * number of ctxts available for PSM open
- */
- u32 freectxts;
-
- /*
- * hint that we should update pioavailshadow before
- * looking for a PIO buffer
- */
- u32 upd_pio_shadow;
-
- /* internal debugging stats */
- u32 maxpkts_call;
- u32 avgpkts_call;
- u64 nopiobufs;
-
- /* PCI Vendor ID (here for NodeInfo) */
- u16 vendorid;
- /* PCI Device ID (here for NodeInfo) */
- u16 deviceid;
- /* for write combining settings */
- int wc_cookie;
- unsigned long wc_base;
- unsigned long wc_len;
-
- /* shadow copy of struct page *'s for exp tid pages */
- struct page **pageshadow;
- /* shadow copy of dma handles for exp tid pages */
- dma_addr_t *physshadow;
- u64 __iomem *egrtidbase;
- spinlock_t sendctrl_lock; /* protect changes to sendctrl shadow */
- /* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */
- spinlock_t uctxt_lock; /* rcd and user context changes */
- /*
- * per unit status, see also portdata statusp
- * mapped readonly into user processes so they can get unit and
- * IB link status cheaply
- */
- u64 *devstatusp;
- char *freezemsg; /* freeze msg if hw error put chip in freeze */
- u32 freezelen; /* max length of freezemsg */
- /* timer used to prevent stats overflow, error throttling, etc. */
- struct timer_list stats_timer;
-
- /* timer to verify interrupts work, and fallback if possible */
- struct timer_list intrchk_timer;
- unsigned long ureg_align; /* user register alignment */
-
- /*
- * Protects pioavailshadow, pioavailkernel, pio_need_disarm, and
- * pio_writing.
- */
- spinlock_t pioavail_lock;
- /*
- * index of last buffer to optimize search for next
- */
- u32 last_pio;
- /*
- * min kernel pio buffer to optimize search
- */
- u32 min_kernel_pio;
- /*
- * Shadow copies of registers; size indicates read access size.
- * Most of them are readonly, but some are write-only register,
- * where we manipulate the bits in the shadow copy, and then write
- * the shadow copy to qlogic_ib.
- *
- * We deliberately make most of these 32 bits, since they have
- * restricted range. For any that we read, we won't to generate 32
- * bit accesses, since Opteron will generate 2 separate 32 bit HT
- * transactions for a 64 bit read, and we want to avoid unnecessary
- * bus transactions.
- */
-
- /* This is the 64 bit group */
-
- unsigned long pioavailshadow[6];
- /* bitmap of send buffers available for the kernel to use with PIO. */
- unsigned long pioavailkernel[6];
- /* bitmap of send buffers which need to be disarmed. */
- unsigned long pio_need_disarm[3];
- /* bitmap of send buffers which are being written to. */
- unsigned long pio_writing[3];
- /* kr_revision shadow */
- u64 revision;
- /* Base GUID for device (from eeprom, network order) */
- __be64 base_guid;
-
- /*
- * kr_sendpiobufbase value (chip offset of pio buffers), and the
- * base of the 2KB buffer s(user processes only use 2K)
- */
- u64 piobufbase;
- u32 pio2k_bufbase;
-
- /* these are the "32 bit" regs */
-
- /* number of GUIDs in the flash for this interface */
- u32 nguid;
- /*
- * the following two are 32-bit bitmasks, but {test,clear,set}_bit
- * all expect bit fields to be "unsigned long"
- */
- unsigned long rcvctrl; /* shadow per device rcvctrl */
- unsigned long sendctrl; /* shadow per device sendctrl */
-
- /* value we put in kr_rcvhdrcnt */
- u32 rcvhdrcnt;
- /* value we put in kr_rcvhdrsize */
- u32 rcvhdrsize;
- /* value we put in kr_rcvhdrentsize */
- u32 rcvhdrentsize;
- /* kr_ctxtcnt value */
- u32 ctxtcnt;
- /* kr_pagealign value */
- u32 palign;
- /* number of "2KB" PIO buffers */
- u32 piobcnt2k;
- /* size in bytes of "2KB" PIO buffers */
- u32 piosize2k;
- /* max usable size in dwords of a "2KB" PIO buffer before going "4KB" */
- u32 piosize2kmax_dwords;
- /* number of "4KB" PIO buffers */
- u32 piobcnt4k;
- /* size in bytes of "4KB" PIO buffers */
- u32 piosize4k;
- /* kr_rcvegrbase value */
- u32 rcvegrbase;
- /* kr_rcvtidbase value */
- u32 rcvtidbase;
- /* kr_rcvtidcnt value */
- u32 rcvtidcnt;
- /* kr_userregbase */
- u32 uregbase;
- /* shadow the control register contents */
- u32 control;
-
- /* chip address space used by 4k pio buffers */
- u32 align4k;
- /* size of each rcvegrbuffer */
- u16 rcvegrbufsize;
- /* log2 of above */
- u16 rcvegrbufsize_shift;
- /* localbus width (1, 2,4,8,16,32) from config space */
- u32 lbus_width;
- /* localbus speed in MHz */
- u32 lbus_speed;
- int unit; /* unit # of this chip */
-
- /* start of CHIP_SPEC move to chipspec, but need code changes */
- /* low and high portions of MSI capability/vector */
- u32 msi_lo;
- /* saved after PCIe init for restore after reset */
- u32 msi_hi;
- /* MSI data (vector) saved for restore */
- u16 msi_data;
- /* so we can rewrite it after a chip reset */
- u32 pcibar0;
- /* so we can rewrite it after a chip reset */
- u32 pcibar1;
- u64 rhdrhead_intr_off;
-
- /*
- * ASCII serial number, from flash, large enough for original
- * all digit strings, and longer QLogic serial number format
- */
- u8 serial[16];
- /* human readable board version */
- u8 boardversion[96];
- u8 lbus_info[32]; /* human readable localbus info */
- /* chip major rev, from qib_revision */
- u8 majrev;
- /* chip minor rev, from qib_revision */
- u8 minrev;
-
- /* Misc small ints */
- /* Number of physical ports available */
- u8 num_pports;
- /* Lowest context number which can be used by user processes */
- u8 first_user_ctxt;
- u8 n_krcv_queues;
- u8 qpn_mask;
- u8 skip_kctxt_mask;
-
- u16 rhf_offset; /* offset of RHF within receive header entry */
-
- /*
- * GPIO pins for twsi-connected devices, and device code for eeprom
- */
- u8 gpio_sda_num;
- u8 gpio_scl_num;
- u8 twsi_eeprom_dev;
- u8 board_atten;
-
- /* Support (including locks) for EEPROM logging of errors and time */
- /* control access to actual counters, timer */
- spinlock_t eep_st_lock;
- /* control high-level access to EEPROM */
- struct mutex eep_lock;
- uint64_t traffic_wds;
- /*
- * masks for which bits of errs, hwerrs that cause
- * each of the counters to increment.
- */
- struct qib_eep_log_mask eep_st_masks[QIB_EEP_LOG_CNT];
- struct qib_diag_client *diag_client;
- spinlock_t qib_diag_trans_lock; /* protect diag observer ops */
- struct diag_observer_list_elt *diag_observer_list;
-
- u8 psxmitwait_supported;
- /* cycle length of PS* counters in HW (in picoseconds) */
- u16 psxmitwait_check_rate;
- /* high volume overflow errors defered to tasklet */
- struct tasklet_struct error_tasklet;
-
- int assigned_node_id; /* NUMA node closest to HCA */
-};
-
-/* hol_state values */
-#define QIB_HOL_UP 0
-#define QIB_HOL_INIT 1
-
-#define QIB_SDMA_SENDCTRL_OP_ENABLE (1U << 0)
-#define QIB_SDMA_SENDCTRL_OP_INTENABLE (1U << 1)
-#define QIB_SDMA_SENDCTRL_OP_HALT (1U << 2)
-#define QIB_SDMA_SENDCTRL_OP_CLEANUP (1U << 3)
-#define QIB_SDMA_SENDCTRL_OP_DRAIN (1U << 4)
-
-/* operation types for f_txchk_change() */
-#define TXCHK_CHG_TYPE_DIS1 3
-#define TXCHK_CHG_TYPE_ENAB1 2
-#define TXCHK_CHG_TYPE_KERN 1
-#define TXCHK_CHG_TYPE_USER 0
-
-#define QIB_CHASE_TIME msecs_to_jiffies(145)
-#define QIB_CHASE_DIS_TIME msecs_to_jiffies(160)
-
-/* Private data for file operations */
-struct qib_filedata {
- struct qib_ctxtdata *rcd;
- unsigned subctxt;
- unsigned tidcursor;
- struct qib_user_sdma_queue *pq;
- int rec_cpu_num; /* for cpu affinity; -1 if none */
-};
-
-extern struct list_head qib_dev_list;
-extern spinlock_t qib_devs_lock;
-extern struct qib_devdata *qib_lookup(int unit);
-extern u32 qib_cpulist_count;
-extern unsigned long *qib_cpulist;
-extern unsigned qib_cc_table_size;
-
-int qib_init(struct qib_devdata *, int);
-int init_chip_wc_pat(struct qib_devdata *dd, u32);
-int qib_enable_wc(struct qib_devdata *dd);
-void qib_disable_wc(struct qib_devdata *dd);
-int qib_count_units(int *npresentp, int *nupp);
-int qib_count_active_units(void);
-
-int qib_cdev_init(int minor, const char *name,
- const struct file_operations *fops,
- struct cdev **cdevp, struct device **devp);
-void qib_cdev_cleanup(struct cdev **cdevp, struct device **devp);
-int qib_dev_init(void);
-void qib_dev_cleanup(void);
-
-int qib_diag_add(struct qib_devdata *);
-void qib_diag_remove(struct qib_devdata *);
-void qib_handle_e_ibstatuschanged(struct qib_pportdata *, u64);
-void qib_sdma_update_tail(struct qib_pportdata *, u16); /* hold sdma_lock */
-
-int qib_decode_err(struct qib_devdata *dd, char *buf, size_t blen, u64 err);
-void qib_bad_intrstatus(struct qib_devdata *);
-void qib_handle_urcv(struct qib_devdata *, u64);
-
-/* clean up any per-chip chip-specific stuff */
-void qib_chip_cleanup(struct qib_devdata *);
-/* clean up any chip type-specific stuff */
-void qib_chip_done(void);
-
-/* check to see if we have to force ordering for write combining */
-int qib_unordered_wc(void);
-void qib_pio_copy(void __iomem *to, const void *from, size_t count);
-
-void qib_disarm_piobufs(struct qib_devdata *, unsigned, unsigned);
-int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *);
-void qib_disarm_piobufs_set(struct qib_devdata *, unsigned long *, unsigned);
-void qib_cancel_sends(struct qib_pportdata *);
-
-int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *);
-int qib_setup_eagerbufs(struct qib_ctxtdata *);
-void qib_set_ctxtcnt(struct qib_devdata *);
-int qib_create_ctxts(struct qib_devdata *dd);
-struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int);
-int qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
-void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
-
-u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *);
-int qib_reset_device(int);
-int qib_wait_linkstate(struct qib_pportdata *, u32, int);
-int qib_set_linkstate(struct qib_pportdata *, u8);
-int qib_set_mtu(struct qib_pportdata *, u16);
-int qib_set_lid(struct qib_pportdata *, u32, u8);
-void qib_hol_down(struct qib_pportdata *);
-void qib_hol_init(struct qib_pportdata *);
-void qib_hol_up(struct qib_pportdata *);
-void qib_hol_event(unsigned long);
-void qib_disable_after_error(struct qib_devdata *);
-int qib_set_uevent_bits(struct qib_pportdata *, const int);
-
-/* for use in system calls, where we want to know device type, etc. */
-#define ctxt_fp(fp) \
- (((struct qib_filedata *)(fp)->private_data)->rcd)
-#define subctxt_fp(fp) \
- (((struct qib_filedata *)(fp)->private_data)->subctxt)
-#define tidcursor_fp(fp) \
- (((struct qib_filedata *)(fp)->private_data)->tidcursor)
-#define user_sdma_queue_fp(fp) \
- (((struct qib_filedata *)(fp)->private_data)->pq)
-
-static inline struct qib_devdata *dd_from_ppd(struct qib_pportdata *ppd)
-{
- return ppd->dd;
-}
-
-static inline struct qib_devdata *dd_from_dev(struct qib_ibdev *dev)
-{
- return container_of(dev, struct qib_devdata, verbs_dev);
-}
-
-static inline struct qib_devdata *dd_from_ibdev(struct ib_device *ibdev)
-{
- return dd_from_dev(to_idev(ibdev));
-}
-
-static inline struct qib_pportdata *ppd_from_ibp(struct qib_ibport *ibp)
-{
- return container_of(ibp, struct qib_pportdata, ibport_data);
-}
-
-static inline struct qib_ibport *to_iport(struct ib_device *ibdev, u8 port)
-{
- struct qib_devdata *dd = dd_from_ibdev(ibdev);
- unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */
-
- WARN_ON(pidx >= dd->num_pports);
- return &dd->pport[pidx].ibport_data;
-}
-
-/*
- * values for dd->flags (_device_ related flags) and
- */
-#define QIB_HAS_LINK_LATENCY 0x1 /* supports link latency (IB 1.2) */
-#define QIB_INITTED 0x2 /* chip and driver up and initted */
-#define QIB_DOING_RESET 0x4 /* in the middle of doing chip reset */
-#define QIB_PRESENT 0x8 /* chip accesses can be done */
-#define QIB_PIO_FLUSH_WC 0x10 /* Needs Write combining flush for PIO */
-#define QIB_HAS_THRESH_UPDATE 0x40
-#define QIB_HAS_SDMA_TIMEOUT 0x80
-#define QIB_USE_SPCL_TRIG 0x100 /* SpecialTrigger launch enabled */
-#define QIB_NODMA_RTAIL 0x200 /* rcvhdrtail register DMA enabled */
-#define QIB_HAS_INTX 0x800 /* Supports INTx interrupts */
-#define QIB_HAS_SEND_DMA 0x1000 /* Supports Send DMA */
-#define QIB_HAS_VLSUPP 0x2000 /* Supports multiple VLs; PBC different */
-#define QIB_HAS_HDRSUPP 0x4000 /* Supports header suppression */
-#define QIB_BADINTR 0x8000 /* severe interrupt problems */
-#define QIB_DCA_ENABLED 0x10000 /* Direct Cache Access enabled */
-#define QIB_HAS_QSFP 0x20000 /* device (card instance) has QSFP */
-
-/*
- * values for ppd->lflags (_ib_port_ related flags)
- */
-#define QIBL_LINKV 0x1 /* IB link state valid */
-#define QIBL_LINKDOWN 0x8 /* IB link is down */
-#define QIBL_LINKINIT 0x10 /* IB link level is up */
-#define QIBL_LINKARMED 0x20 /* IB link is ARMED */
-#define QIBL_LINKACTIVE 0x40 /* IB link is ACTIVE */
-/* leave a gap for more IB-link state */
-#define QIBL_IB_AUTONEG_INPROG 0x1000 /* non-IBTA DDR/QDR neg active */
-#define QIBL_IB_AUTONEG_FAILED 0x2000 /* non-IBTA DDR/QDR neg failed */
-#define QIBL_IB_LINK_DISABLED 0x4000 /* Linkdown-disable forced,
- * Do not try to bring up */
-#define QIBL_IB_FORCE_NOTIFY 0x8000 /* force notify on next ib change */
-
-/* IB dword length mask in PBC (lower 11 bits); same for all chips */
-#define QIB_PBC_LENGTH_MASK ((1 << 11) - 1)
-
-
-/* ctxt_flag bit offsets */
- /* waiting for a packet to arrive */
-#define QIB_CTXT_WAITING_RCV 2
- /* master has not finished initializing */
-#define QIB_CTXT_MASTER_UNINIT 4
- /* waiting for an urgent packet to arrive */
-#define QIB_CTXT_WAITING_URG 5
-
-/* free up any allocated data at closes */
-void qib_free_data(struct qib_ctxtdata *dd);
-void qib_chg_pioavailkernel(struct qib_devdata *, unsigned, unsigned,
- u32, struct qib_ctxtdata *);
-struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *,
- const struct pci_device_id *);
-struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *,
- const struct pci_device_id *);
-struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *,
- const struct pci_device_id *);
-void qib_free_devdata(struct qib_devdata *);
-struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra);
-
-#define QIB_TWSI_NO_DEV 0xFF
-/* Below qib_twsi_ functions must be called with eep_lock held */
-int qib_twsi_reset(struct qib_devdata *dd);
-int qib_twsi_blk_rd(struct qib_devdata *dd, int dev, int addr, void *buffer,
- int len);
-int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr,
- const void *buffer, int len);
-void qib_get_eeprom_info(struct qib_devdata *);
-#define qib_inc_eeprom_err(dd, eidx, incr)
-void qib_dump_lookup_output_queue(struct qib_devdata *);
-void qib_force_pio_avail_update(struct qib_devdata *);
-void qib_clear_symerror_on_linkup(unsigned long opaque);
-
-/*
- * Set LED override, only the two LSBs have "public" meaning, but
- * any non-zero value substitutes them for the Link and LinkTrain
- * LED states.
- */
-#define QIB_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
-#define QIB_LED_LOG 2 /* Logical (link) YELLOW LED */
-void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val);
-
-/* send dma routines */
-int qib_setup_sdma(struct qib_pportdata *);
-void qib_teardown_sdma(struct qib_pportdata *);
-void __qib_sdma_intr(struct qib_pportdata *);
-void qib_sdma_intr(struct qib_pportdata *);
-void qib_user_sdma_send_desc(struct qib_pportdata *dd,
- struct list_head *pktlist);
-int qib_sdma_verbs_send(struct qib_pportdata *, struct rvt_sge_state *,
- u32, struct qib_verbs_txreq *);
-/* ppd->sdma_lock should be locked before calling this. */
-int qib_sdma_make_progress(struct qib_pportdata *dd);
-
-static inline int qib_sdma_empty(const struct qib_pportdata *ppd)
-{
- return ppd->sdma_descq_added == ppd->sdma_descq_removed;
-}
-
-/* must be called under qib_sdma_lock */
-static inline u16 qib_sdma_descq_freecnt(const struct qib_pportdata *ppd)
-{
- return ppd->sdma_descq_cnt -
- (ppd->sdma_descq_added - ppd->sdma_descq_removed) - 1;
-}
-
-static inline int __qib_sdma_running(struct qib_pportdata *ppd)
-{
- return ppd->sdma_state.current_state == qib_sdma_state_s99_running;
-}
-int qib_sdma_running(struct qib_pportdata *);
-void dump_sdma_state(struct qib_pportdata *ppd);
-void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
-void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
-
-/*
- * number of words used for protocol header if not set by qib_userinit();
- */
-#define QIB_DFLT_RCVHDRSIZE 9
-
-/*
- * We need to be able to handle an IB header of at least 24 dwords.
- * We need the rcvhdrq large enough to handle largest IB header, but
- * still have room for a 2KB MTU standard IB packet.
- * Additionally, some processor/memory controller combinations
- * benefit quite strongly from having the DMA'ed data be cacheline
- * aligned and a cacheline multiple, so we set the size to 32 dwords
- * (2 64-byte primary cachelines for pretty much all processors of
- * interest). The alignment hurts nothing, other than using somewhat
- * more memory.
- */
-#define QIB_RCVHDR_ENTSIZE 32
-
-int qib_get_user_pages(unsigned long, size_t, struct page **);
-void qib_release_user_pages(struct page **, size_t);
-int qib_eeprom_read(struct qib_devdata *, u8, void *, int);
-int qib_eeprom_write(struct qib_devdata *, u8, const void *, int);
-u32 __iomem *qib_getsendbuf_range(struct qib_devdata *, u32 *, u32, u32);
-void qib_sendbuf_done(struct qib_devdata *, unsigned);
-
-static inline void qib_clear_rcvhdrtail(const struct qib_ctxtdata *rcd)
-{
- *((u64 *) rcd->rcvhdrtail_kvaddr) = 0ULL;
-}
-
-static inline u32 qib_get_rcvhdrtail(const struct qib_ctxtdata *rcd)
-{
- /*
- * volatile because it's a DMA target from the chip, routine is
- * inlined, and don't want register caching or reordering.
- */
- return (u32) le64_to_cpu(
- *((volatile __le64 *)rcd->rcvhdrtail_kvaddr)); /* DMA'ed */
-}
-
-static inline u32 qib_get_hdrqtail(const struct qib_ctxtdata *rcd)
-{
- const struct qib_devdata *dd = rcd->dd;
- u32 hdrqtail;
-
- if (dd->flags & QIB_NODMA_RTAIL) {
- __le32 *rhf_addr;
- u32 seq;
-
- rhf_addr = (__le32 *) rcd->rcvhdrq +
- rcd->head + dd->rhf_offset;
- seq = qib_hdrget_seq(rhf_addr);
- hdrqtail = rcd->head;
- if (seq == rcd->seq_cnt)
- hdrqtail++;
- } else
- hdrqtail = qib_get_rcvhdrtail(rcd);
-
- return hdrqtail;
-}
-
-/*
- * sysfs interface.
- */
-
-extern const char ib_qib_version[];
-
-int qib_device_create(struct qib_devdata *);
-void qib_device_remove(struct qib_devdata *);
-
-int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
- struct kobject *kobj);
-int qib_verbs_register_sysfs(struct qib_devdata *);
-void qib_verbs_unregister_sysfs(struct qib_devdata *);
-/* Hook for sysfs read of QSFP */
-extern int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len);
-
-int __init qib_init_qibfs(void);
-int __exit qib_exit_qibfs(void);
-
-int qibfs_add(struct qib_devdata *);
-int qibfs_remove(struct qib_devdata *);
-
-int qib_pcie_init(struct pci_dev *, const struct pci_device_id *);
-int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *,
- const struct pci_device_id *);
-void qib_pcie_ddcleanup(struct qib_devdata *);
-int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct qib_msix_entry *);
-int qib_reinit_intr(struct qib_devdata *);
-void qib_enable_intx(struct pci_dev *);
-void qib_nomsi(struct qib_devdata *);
-void qib_nomsix(struct qib_devdata *);
-void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *);
-void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8);
-/* interrupts for device */
-u64 qib_int_counter(struct qib_devdata *);
-/* interrupt for all devices */
-u64 qib_sps_ints(void);
-
-/*
- * dma_addr wrappers - all 0's invalid for hw
- */
-dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long,
- size_t, int);
-const char *qib_get_unit_name(int unit);
-const char *qib_get_card_name(struct rvt_dev_info *rdi);
-struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi);
-
-/*
- * Flush write combining store buffers (if present) and perform a write
- * barrier.
- */
-static inline void qib_flush_wc(void)
-{
-#if defined(CONFIG_X86_64)
- asm volatile("sfence" : : : "memory");
-#else
- wmb(); /* no reorder around wc flush */
-#endif
-}
-
-/* global module parameter variables */
-extern unsigned qib_ibmtu;
-extern ushort qib_cfgctxts;
-extern ushort qib_num_cfg_vls;
-extern ushort qib_mini_init; /* If set, do few (ideally 0) writes to chip */
-extern unsigned qib_n_krcv_queues;
-extern unsigned qib_sdma_fetch_arb;
-extern unsigned qib_compat_ddr_negotiate;
-extern int qib_special_trigger;
-extern unsigned qib_numa_aware;
-
-extern struct mutex qib_mutex;
-
-/* Number of seconds before our card status check... */
-#define STATUS_TIMEOUT 60
-
-#define QIB_DRV_NAME "ib_qib"
-#define QIB_USER_MINOR_BASE 0
-#define QIB_TRACE_MINOR 127
-#define QIB_DIAGPKT_MINOR 128
-#define QIB_DIAG_MINOR_BASE 129
-#define QIB_NMINORS 255
-
-#define PCI_VENDOR_ID_PATHSCALE 0x1fc1
-#define PCI_VENDOR_ID_QLOGIC 0x1077
-#define PCI_DEVICE_ID_QLOGIC_IB_6120 0x10
-#define PCI_DEVICE_ID_QLOGIC_IB_7220 0x7220
-#define PCI_DEVICE_ID_QLOGIC_IB_7322 0x7322
-
-/*
- * qib_early_err is used (only!) to print early errors before devdata is
- * allocated, or when dd->pcidev may not be valid, and at the tail end of
- * cleanup when devdata may have been freed, etc. qib_dev_porterr is
- * the same as qib_dev_err, but is used when the message really needs
- * the IB port# to be definitive as to what's happening..
- * All of these go to the trace log, and the trace log entry is done
- * first to avoid possible serial port delays from printk.
- */
-#define qib_early_err(dev, fmt, ...) \
- dev_err(dev, fmt, ##__VA_ARGS__)
-
-#define qib_dev_err(dd, fmt, ...) \
- dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
- qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
-
-#define qib_dev_warn(dd, fmt, ...) \
- dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \
- qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
-
-#define qib_dev_porterr(dd, port, fmt, ...) \
- dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \
- qib_get_unit_name((dd)->unit), (dd)->unit, (port), \
- ##__VA_ARGS__)
-
-#define qib_devinfo(pcidev, fmt, ...) \
- dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__)
-
-/*
- * this is used for formatting hw error messages...
- */
-struct qib_hwerror_msgs {
- u64 mask;
- const char *msg;
- size_t sz;
-};
-
-#define QLOGIC_IB_HWE_MSG(a, b) { .mask = a, .msg = b }
-
-/* in qib_intr.c... */
-void qib_format_hwerrors(u64 hwerrs,
- const struct qib_hwerror_msgs *hwerrmsgs,
- size_t nhwerrmsgs, char *msg, size_t lmsg);
-
-void qib_stop_send_queue(struct rvt_qp *qp);
-void qib_quiesce_qp(struct rvt_qp *qp);
-void qib_flush_qp_waiters(struct rvt_qp *qp);
-int qib_mtu_to_path_mtu(u32 mtu);
-u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
-void qib_notify_error_qp(struct rvt_qp *qp);
-int qib_get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
- struct ib_qp_attr *attr);
-
-#endif /* _QIB_KERNEL_H */
diff --git a/drivers/infiniband/hw/qib/qib_6120_regs.h b/drivers/infiniband/hw/qib/qib_6120_regs.h
deleted file mode 100644
index e16cb6f7de2c..000000000000
--- a/drivers/infiniband/hw/qib/qib_6120_regs.h
+++ /dev/null
@@ -1,977 +0,0 @@
-/*
- * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/* This file is mechanically generated from RTL. Any hand-edits will be lost! */
-
-#define QIB_6120_Revision_OFFS 0x0
-#define QIB_6120_Revision_R_Simulator_LSB 0x3F
-#define QIB_6120_Revision_R_Simulator_RMASK 0x1
-#define QIB_6120_Revision_Reserved_LSB 0x28
-#define QIB_6120_Revision_Reserved_RMASK 0x7FFFFF
-#define QIB_6120_Revision_BoardID_LSB 0x20
-#define QIB_6120_Revision_BoardID_RMASK 0xFF
-#define QIB_6120_Revision_R_SW_LSB 0x18
-#define QIB_6120_Revision_R_SW_RMASK 0xFF
-#define QIB_6120_Revision_R_Arch_LSB 0x10
-#define QIB_6120_Revision_R_Arch_RMASK 0xFF
-#define QIB_6120_Revision_R_ChipRevMajor_LSB 0x8
-#define QIB_6120_Revision_R_ChipRevMajor_RMASK 0xFF
-#define QIB_6120_Revision_R_ChipRevMinor_LSB 0x0
-#define QIB_6120_Revision_R_ChipRevMinor_RMASK 0xFF
-
-#define QIB_6120_Control_OFFS 0x8
-#define QIB_6120_Control_TxLatency_LSB 0x4
-#define QIB_6120_Control_TxLatency_RMASK 0x1
-#define QIB_6120_Control_PCIERetryBufDiagEn_LSB 0x3
-#define QIB_6120_Control_PCIERetryBufDiagEn_RMASK 0x1
-#define QIB_6120_Control_LinkEn_LSB 0x2
-#define QIB_6120_Control_LinkEn_RMASK 0x1
-#define QIB_6120_Control_FreezeMode_LSB 0x1
-#define QIB_6120_Control_FreezeMode_RMASK 0x1
-#define QIB_6120_Control_SyncReset_LSB 0x0
-#define QIB_6120_Control_SyncReset_RMASK 0x1
-
-#define QIB_6120_PageAlign_OFFS 0x10
-
-#define QIB_6120_PortCnt_OFFS 0x18
-
-#define QIB_6120_SendRegBase_OFFS 0x30
-
-#define QIB_6120_UserRegBase_OFFS 0x38
-
-#define QIB_6120_CntrRegBase_OFFS 0x40
-
-#define QIB_6120_Scratch_OFFS 0x48
-#define QIB_6120_Scratch_TopHalf_LSB 0x20
-#define QIB_6120_Scratch_TopHalf_RMASK 0xFFFFFFFF
-#define QIB_6120_Scratch_BottomHalf_LSB 0x0
-#define QIB_6120_Scratch_BottomHalf_RMASK 0xFFFFFFFF
-
-#define QIB_6120_IntBlocked_OFFS 0x60
-#define QIB_6120_IntBlocked_ErrorIntBlocked_LSB 0x1F
-#define QIB_6120_IntBlocked_ErrorIntBlocked_RMASK 0x1
-#define QIB_6120_IntBlocked_PioSetIntBlocked_LSB 0x1E
-#define QIB_6120_IntBlocked_PioSetIntBlocked_RMASK 0x1
-#define QIB_6120_IntBlocked_PioBufAvailIntBlocked_LSB 0x1D
-#define QIB_6120_IntBlocked_PioBufAvailIntBlocked_RMASK 0x1
-#define QIB_6120_IntBlocked_assertGPIOIntBlocked_LSB 0x1C
-#define QIB_6120_IntBlocked_assertGPIOIntBlocked_RMASK 0x1
-#define QIB_6120_IntBlocked_Reserved_LSB 0xF
-#define QIB_6120_IntBlocked_Reserved_RMASK 0x1FFF
-#define QIB_6120_IntBlocked_RcvAvail4IntBlocked_LSB 0x10
-#define QIB_6120_IntBlocked_RcvAvail4IntBlocked_RMASK 0x1
-#define QIB_6120_IntBlocked_RcvAvail3IntBlocked_LSB 0xF
-#define QIB_6120_IntBlocked_RcvAvail3IntBlocked_RMASK 0x1
-#define QIB_6120_IntBlocked_RcvAvail2IntBlocked_LSB 0xE
-#define QIB_6120_IntBlocked_RcvAvail2IntBlocked_RMASK 0x1
-#define QIB_6120_IntBlocked_RcvAvail1IntBlocked_LSB 0xD
-#define QIB_6120_IntBlocked_RcvAvail1IntBlocked_RMASK 0x1
-#define QIB_6120_IntBlocked_RcvAvail0IntBlocked_LSB 0xC
-#define QIB_6120_IntBlocked_RcvAvail0IntBlocked_RMASK 0x1
-#define QIB_6120_IntBlocked_Reserved1_LSB 0x5
-#define QIB_6120_IntBlocked_Reserved1_RMASK 0x7F
-#define QIB_6120_IntBlocked_RcvUrg4IntBlocked_LSB 0x4
-#define QIB_6120_IntBlocked_RcvUrg4IntBlocked_RMASK 0x1
-#define QIB_6120_IntBlocked_RcvUrg3IntBlocked_LSB 0x3
-#define QIB_6120_IntBlocked_RcvUrg3IntBlocked_RMASK 0x1
-#define QIB_6120_IntBlocked_RcvUrg2IntBlocked_LSB 0x2
-#define QIB_6120_IntBlocked_RcvUrg2IntBlocked_RMASK 0x1
-#define QIB_6120_IntBlocked_RcvUrg1IntBlocked_LSB 0x1
-#define QIB_6120_IntBlocked_RcvUrg1IntBlocked_RMASK 0x1
-#define QIB_6120_IntBlocked_RcvUrg0IntBlocked_LSB 0x0
-#define QIB_6120_IntBlocked_RcvUrg0IntBlocked_RMASK 0x1
-
-#define QIB_6120_IntMask_OFFS 0x68
-#define QIB_6120_IntMask_ErrorIntMask_LSB 0x1F
-#define QIB_6120_IntMask_ErrorIntMask_RMASK 0x1
-#define QIB_6120_IntMask_PioSetIntMask_LSB 0x1E
-#define QIB_6120_IntMask_PioSetIntMask_RMASK 0x1
-#define QIB_6120_IntMask_PioBufAvailIntMask_LSB 0x1D
-#define QIB_6120_IntMask_PioBufAvailIntMask_RMASK 0x1
-#define QIB_6120_IntMask_assertGPIOIntMask_LSB 0x1C
-#define QIB_6120_IntMask_assertGPIOIntMask_RMASK 0x1
-#define QIB_6120_IntMask_Reserved_LSB 0x11
-#define QIB_6120_IntMask_Reserved_RMASK 0x7FF
-#define QIB_6120_IntMask_RcvAvail4IntMask_LSB 0x10
-#define QIB_6120_IntMask_RcvAvail4IntMask_RMASK 0x1
-#define QIB_6120_IntMask_RcvAvail3IntMask_LSB 0xF
-#define QIB_6120_IntMask_RcvAvail3IntMask_RMASK 0x1
-#define QIB_6120_IntMask_RcvAvail2IntMask_LSB 0xE
-#define QIB_6120_IntMask_RcvAvail2IntMask_RMASK 0x1
-#define QIB_6120_IntMask_RcvAvail1IntMask_LSB 0xD
-#define QIB_6120_IntMask_RcvAvail1IntMask_RMASK 0x1
-#define QIB_6120_IntMask_RcvAvail0IntMask_LSB 0xC
-#define QIB_6120_IntMask_RcvAvail0IntMask_RMASK 0x1
-#define QIB_6120_IntMask_Reserved1_LSB 0x5
-#define QIB_6120_IntMask_Reserved1_RMASK 0x7F
-#define QIB_6120_IntMask_RcvUrg4IntMask_LSB 0x4
-#define QIB_6120_IntMask_RcvUrg4IntMask_RMASK 0x1
-#define QIB_6120_IntMask_RcvUrg3IntMask_LSB 0x3
-#define QIB_6120_IntMask_RcvUrg3IntMask_RMASK 0x1
-#define QIB_6120_IntMask_RcvUrg2IntMask_LSB 0x2
-#define QIB_6120_IntMask_RcvUrg2IntMask_RMASK 0x1
-#define QIB_6120_IntMask_RcvUrg1IntMask_LSB 0x1
-#define QIB_6120_IntMask_RcvUrg1IntMask_RMASK 0x1
-#define QIB_6120_IntMask_RcvUrg0IntMask_LSB 0x0
-#define QIB_6120_IntMask_RcvUrg0IntMask_RMASK 0x1
-
-#define QIB_6120_IntStatus_OFFS 0x70
-#define QIB_6120_IntStatus_Error_LSB 0x1F
-#define QIB_6120_IntStatus_Error_RMASK 0x1
-#define QIB_6120_IntStatus_PioSent_LSB 0x1E
-#define QIB_6120_IntStatus_PioSent_RMASK 0x1
-#define QIB_6120_IntStatus_PioBufAvail_LSB 0x1D
-#define QIB_6120_IntStatus_PioBufAvail_RMASK 0x1
-#define QIB_6120_IntStatus_assertGPIO_LSB 0x1C
-#define QIB_6120_IntStatus_assertGPIO_RMASK 0x1
-#define QIB_6120_IntStatus_Reserved_LSB 0xF
-#define QIB_6120_IntStatus_Reserved_RMASK 0x1FFF
-#define QIB_6120_IntStatus_RcvAvail4_LSB 0x10
-#define QIB_6120_IntStatus_RcvAvail4_RMASK 0x1
-#define QIB_6120_IntStatus_RcvAvail3_LSB 0xF
-#define QIB_6120_IntStatus_RcvAvail3_RMASK 0x1
-#define QIB_6120_IntStatus_RcvAvail2_LSB 0xE
-#define QIB_6120_IntStatus_RcvAvail2_RMASK 0x1
-#define QIB_6120_IntStatus_RcvAvail1_LSB 0xD
-#define QIB_6120_IntStatus_RcvAvail1_RMASK 0x1
-#define QIB_6120_IntStatus_RcvAvail0_LSB 0xC
-#define QIB_6120_IntStatus_RcvAvail0_RMASK 0x1
-#define QIB_6120_IntStatus_Reserved1_LSB 0x5
-#define QIB_6120_IntStatus_Reserved1_RMASK 0x7F
-#define QIB_6120_IntStatus_RcvUrg4_LSB 0x4
-#define QIB_6120_IntStatus_RcvUrg4_RMASK 0x1
-#define QIB_6120_IntStatus_RcvUrg3_LSB 0x3
-#define QIB_6120_IntStatus_RcvUrg3_RMASK 0x1
-#define QIB_6120_IntStatus_RcvUrg2_LSB 0x2
-#define QIB_6120_IntStatus_RcvUrg2_RMASK 0x1
-#define QIB_6120_IntStatus_RcvUrg1_LSB 0x1
-#define QIB_6120_IntStatus_RcvUrg1_RMASK 0x1
-#define QIB_6120_IntStatus_RcvUrg0_LSB 0x0
-#define QIB_6120_IntStatus_RcvUrg0_RMASK 0x1
-
-#define QIB_6120_IntClear_OFFS 0x78
-#define QIB_6120_IntClear_ErrorIntClear_LSB 0x1F
-#define QIB_6120_IntClear_ErrorIntClear_RMASK 0x1
-#define QIB_6120_IntClear_PioSetIntClear_LSB 0x1E
-#define QIB_6120_IntClear_PioSetIntClear_RMASK 0x1
-#define QIB_6120_IntClear_PioBufAvailIntClear_LSB 0x1D
-#define QIB_6120_IntClear_PioBufAvailIntClear_RMASK 0x1
-#define QIB_6120_IntClear_assertGPIOIntClear_LSB 0x1C
-#define QIB_6120_IntClear_assertGPIOIntClear_RMASK 0x1
-#define QIB_6120_IntClear_Reserved_LSB 0xF
-#define QIB_6120_IntClear_Reserved_RMASK 0x1FFF
-#define QIB_6120_IntClear_RcvAvail4IntClear_LSB 0x10
-#define QIB_6120_IntClear_RcvAvail4IntClear_RMASK 0x1
-#define QIB_6120_IntClear_RcvAvail3IntClear_LSB 0xF
-#define QIB_6120_IntClear_RcvAvail3IntClear_RMASK 0x1
-#define QIB_6120_IntClear_RcvAvail2IntClear_LSB 0xE
-#define QIB_6120_IntClear_RcvAvail2IntClear_RMASK 0x1
-#define QIB_6120_IntClear_RcvAvail1IntClear_LSB 0xD
-#define QIB_6120_IntClear_RcvAvail1IntClear_RMASK 0x1
-#define QIB_6120_IntClear_RcvAvail0IntClear_LSB 0xC
-#define QIB_6120_IntClear_RcvAvail0IntClear_RMASK 0x1
-#define QIB_6120_IntClear_Reserved1_LSB 0x5
-#define QIB_6120_IntClear_Reserved1_RMASK 0x7F
-#define QIB_6120_IntClear_RcvUrg4IntClear_LSB 0x4
-#define QIB_6120_IntClear_RcvUrg4IntClear_RMASK 0x1
-#define QIB_6120_IntClear_RcvUrg3IntClear_LSB 0x3
-#define QIB_6120_IntClear_RcvUrg3IntClear_RMASK 0x1
-#define QIB_6120_IntClear_RcvUrg2IntClear_LSB 0x2
-#define QIB_6120_IntClear_RcvUrg2IntClear_RMASK 0x1
-#define QIB_6120_IntClear_RcvUrg1IntClear_LSB 0x1
-#define QIB_6120_IntClear_RcvUrg1IntClear_RMASK 0x1
-#define QIB_6120_IntClear_RcvUrg0IntClear_LSB 0x0
-#define QIB_6120_IntClear_RcvUrg0IntClear_RMASK 0x1
-
-#define QIB_6120_ErrMask_OFFS 0x80
-#define QIB_6120_ErrMask_Reserved_LSB 0x34
-#define QIB_6120_ErrMask_Reserved_RMASK 0xFFF
-#define QIB_6120_ErrMask_HardwareErrMask_LSB 0x33
-#define QIB_6120_ErrMask_HardwareErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_ResetNegatedMask_LSB 0x32
-#define QIB_6120_ErrMask_ResetNegatedMask_RMASK 0x1
-#define QIB_6120_ErrMask_InvalidAddrErrMask_LSB 0x31
-#define QIB_6120_ErrMask_InvalidAddrErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_IBStatusChangedMask_LSB 0x30
-#define QIB_6120_ErrMask_IBStatusChangedMask_RMASK 0x1
-#define QIB_6120_ErrMask_Reserved1_LSB 0x26
-#define QIB_6120_ErrMask_Reserved1_RMASK 0x3FF
-#define QIB_6120_ErrMask_SendUnsupportedVLErrMask_LSB 0x25
-#define QIB_6120_ErrMask_SendUnsupportedVLErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_SendUnexpectedPktNumErrMask_LSB 0x24
-#define QIB_6120_ErrMask_SendUnexpectedPktNumErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_SendPioArmLaunchErrMask_LSB 0x23
-#define QIB_6120_ErrMask_SendPioArmLaunchErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_SendDroppedDataPktErrMask_LSB 0x22
-#define QIB_6120_ErrMask_SendDroppedDataPktErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_SendDroppedSmpPktErrMask_LSB 0x21
-#define QIB_6120_ErrMask_SendDroppedSmpPktErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_SendPktLenErrMask_LSB 0x20
-#define QIB_6120_ErrMask_SendPktLenErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_SendUnderRunErrMask_LSB 0x1F
-#define QIB_6120_ErrMask_SendUnderRunErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_SendMaxPktLenErrMask_LSB 0x1E
-#define QIB_6120_ErrMask_SendMaxPktLenErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_SendMinPktLenErrMask_LSB 0x1D
-#define QIB_6120_ErrMask_SendMinPktLenErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_Reserved2_LSB 0x12
-#define QIB_6120_ErrMask_Reserved2_RMASK 0x7FF
-#define QIB_6120_ErrMask_RcvIBLostLinkErrMask_LSB 0x11
-#define QIB_6120_ErrMask_RcvIBLostLinkErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvHdrErrMask_LSB 0x10
-#define QIB_6120_ErrMask_RcvHdrErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvHdrLenErrMask_LSB 0xF
-#define QIB_6120_ErrMask_RcvHdrLenErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvBadTidErrMask_LSB 0xE
-#define QIB_6120_ErrMask_RcvBadTidErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvHdrFullErrMask_LSB 0xD
-#define QIB_6120_ErrMask_RcvHdrFullErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvEgrFullErrMask_LSB 0xC
-#define QIB_6120_ErrMask_RcvEgrFullErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvBadVersionErrMask_LSB 0xB
-#define QIB_6120_ErrMask_RcvBadVersionErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvIBFlowErrMask_LSB 0xA
-#define QIB_6120_ErrMask_RcvIBFlowErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvEBPErrMask_LSB 0x9
-#define QIB_6120_ErrMask_RcvEBPErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvUnsupportedVLErrMask_LSB 0x8
-#define QIB_6120_ErrMask_RcvUnsupportedVLErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvUnexpectedCharErrMask_LSB 0x7
-#define QIB_6120_ErrMask_RcvUnexpectedCharErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvShortPktLenErrMask_LSB 0x6
-#define QIB_6120_ErrMask_RcvShortPktLenErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvLongPktLenErrMask_LSB 0x5
-#define QIB_6120_ErrMask_RcvLongPktLenErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvMaxPktLenErrMask_LSB 0x4
-#define QIB_6120_ErrMask_RcvMaxPktLenErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvMinPktLenErrMask_LSB 0x3
-#define QIB_6120_ErrMask_RcvMinPktLenErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvICRCErrMask_LSB 0x2
-#define QIB_6120_ErrMask_RcvICRCErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvVCRCErrMask_LSB 0x1
-#define QIB_6120_ErrMask_RcvVCRCErrMask_RMASK 0x1
-#define QIB_6120_ErrMask_RcvFormatErrMask_LSB 0x0
-#define QIB_6120_ErrMask_RcvFormatErrMask_RMASK 0x1
-
-#define QIB_6120_ErrStatus_OFFS 0x88
-#define QIB_6120_ErrStatus_Reserved_LSB 0x34
-#define QIB_6120_ErrStatus_Reserved_RMASK 0xFFF
-#define QIB_6120_ErrStatus_HardwareErr_LSB 0x33
-#define QIB_6120_ErrStatus_HardwareErr_RMASK 0x1
-#define QIB_6120_ErrStatus_ResetNegated_LSB 0x32
-#define QIB_6120_ErrStatus_ResetNegated_RMASK 0x1
-#define QIB_6120_ErrStatus_InvalidAddrErr_LSB 0x31
-#define QIB_6120_ErrStatus_InvalidAddrErr_RMASK 0x1
-#define QIB_6120_ErrStatus_IBStatusChanged_LSB 0x30
-#define QIB_6120_ErrStatus_IBStatusChanged_RMASK 0x1
-#define QIB_6120_ErrStatus_Reserved1_LSB 0x26
-#define QIB_6120_ErrStatus_Reserved1_RMASK 0x3FF
-#define QIB_6120_ErrStatus_SendUnsupportedVLErr_LSB 0x25
-#define QIB_6120_ErrStatus_SendUnsupportedVLErr_RMASK 0x1
-#define QIB_6120_ErrStatus_SendUnexpectedPktNumErr_LSB 0x24
-#define QIB_6120_ErrStatus_SendUnexpectedPktNumErr_RMASK 0x1
-#define QIB_6120_ErrStatus_SendPioArmLaunchErr_LSB 0x23
-#define QIB_6120_ErrStatus_SendPioArmLaunchErr_RMASK 0x1
-#define QIB_6120_ErrStatus_SendDroppedDataPktErr_LSB 0x22
-#define QIB_6120_ErrStatus_SendDroppedDataPktErr_RMASK 0x1
-#define QIB_6120_ErrStatus_SendDroppedSmpPktErr_LSB 0x21
-#define QIB_6120_ErrStatus_SendDroppedSmpPktErr_RMASK 0x1
-#define QIB_6120_ErrStatus_SendPktLenErr_LSB 0x20
-#define QIB_6120_ErrStatus_SendPktLenErr_RMASK 0x1
-#define QIB_6120_ErrStatus_SendUnderRunErr_LSB 0x1F
-#define QIB_6120_ErrStatus_SendUnderRunErr_RMASK 0x1
-#define QIB_6120_ErrStatus_SendMaxPktLenErr_LSB 0x1E
-#define QIB_6120_ErrStatus_SendMaxPktLenErr_RMASK 0x1
-#define QIB_6120_ErrStatus_SendMinPktLenErr_LSB 0x1D
-#define QIB_6120_ErrStatus_SendMinPktLenErr_RMASK 0x1
-#define QIB_6120_ErrStatus_Reserved2_LSB 0x12
-#define QIB_6120_ErrStatus_Reserved2_RMASK 0x7FF
-#define QIB_6120_ErrStatus_RcvIBLostLinkErr_LSB 0x11
-#define QIB_6120_ErrStatus_RcvIBLostLinkErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvHdrErr_LSB 0x10
-#define QIB_6120_ErrStatus_RcvHdrErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvHdrLenErr_LSB 0xF
-#define QIB_6120_ErrStatus_RcvHdrLenErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvBadTidErr_LSB 0xE
-#define QIB_6120_ErrStatus_RcvBadTidErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvHdrFullErr_LSB 0xD
-#define QIB_6120_ErrStatus_RcvHdrFullErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvEgrFullErr_LSB 0xC
-#define QIB_6120_ErrStatus_RcvEgrFullErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvBadVersionErr_LSB 0xB
-#define QIB_6120_ErrStatus_RcvBadVersionErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvIBFlowErr_LSB 0xA
-#define QIB_6120_ErrStatus_RcvIBFlowErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvEBPErr_LSB 0x9
-#define QIB_6120_ErrStatus_RcvEBPErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvUnsupportedVLErr_LSB 0x8
-#define QIB_6120_ErrStatus_RcvUnsupportedVLErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvUnexpectedCharErr_LSB 0x7
-#define QIB_6120_ErrStatus_RcvUnexpectedCharErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvShortPktLenErr_LSB 0x6
-#define QIB_6120_ErrStatus_RcvShortPktLenErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvLongPktLenErr_LSB 0x5
-#define QIB_6120_ErrStatus_RcvLongPktLenErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvMaxPktLenErr_LSB 0x4
-#define QIB_6120_ErrStatus_RcvMaxPktLenErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvMinPktLenErr_LSB 0x3
-#define QIB_6120_ErrStatus_RcvMinPktLenErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvICRCErr_LSB 0x2
-#define QIB_6120_ErrStatus_RcvICRCErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvVCRCErr_LSB 0x1
-#define QIB_6120_ErrStatus_RcvVCRCErr_RMASK 0x1
-#define QIB_6120_ErrStatus_RcvFormatErr_LSB 0x0
-#define QIB_6120_ErrStatus_RcvFormatErr_RMASK 0x1
-
-#define QIB_6120_ErrClear_OFFS 0x90
-#define QIB_6120_ErrClear_Reserved_LSB 0x34
-#define QIB_6120_ErrClear_Reserved_RMASK 0xFFF
-#define QIB_6120_ErrClear_HardwareErrClear_LSB 0x33
-#define QIB_6120_ErrClear_HardwareErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_ResetNegatedClear_LSB 0x32
-#define QIB_6120_ErrClear_ResetNegatedClear_RMASK 0x1
-#define QIB_6120_ErrClear_InvalidAddrErrClear_LSB 0x31
-#define QIB_6120_ErrClear_InvalidAddrErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_IBStatusChangedClear_LSB 0x30
-#define QIB_6120_ErrClear_IBStatusChangedClear_RMASK 0x1
-#define QIB_6120_ErrClear_Reserved1_LSB 0x26
-#define QIB_6120_ErrClear_Reserved1_RMASK 0x3FF
-#define QIB_6120_ErrClear_SendUnsupportedVLErrClear_LSB 0x25
-#define QIB_6120_ErrClear_SendUnsupportedVLErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_SendUnexpectedPktNumErrClear_LSB 0x24
-#define QIB_6120_ErrClear_SendUnexpectedPktNumErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_SendPioArmLaunchErrClear_LSB 0x23
-#define QIB_6120_ErrClear_SendPioArmLaunchErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_SendDroppedDataPktErrClear_LSB 0x22
-#define QIB_6120_ErrClear_SendDroppedDataPktErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_SendDroppedSmpPktErrClear_LSB 0x21
-#define QIB_6120_ErrClear_SendDroppedSmpPktErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_SendPktLenErrClear_LSB 0x20
-#define QIB_6120_ErrClear_SendPktLenErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_SendUnderRunErrClear_LSB 0x1F
-#define QIB_6120_ErrClear_SendUnderRunErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_SendMaxPktLenErrClear_LSB 0x1E
-#define QIB_6120_ErrClear_SendMaxPktLenErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_SendMinPktLenErrClear_LSB 0x1D
-#define QIB_6120_ErrClear_SendMinPktLenErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_Reserved2_LSB 0x12
-#define QIB_6120_ErrClear_Reserved2_RMASK 0x7FF
-#define QIB_6120_ErrClear_RcvIBLostLinkErrClear_LSB 0x11
-#define QIB_6120_ErrClear_RcvIBLostLinkErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvHdrErrClear_LSB 0x10
-#define QIB_6120_ErrClear_RcvHdrErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvHdrLenErrClear_LSB 0xF
-#define QIB_6120_ErrClear_RcvHdrLenErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvBadTidErrClear_LSB 0xE
-#define QIB_6120_ErrClear_RcvBadTidErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvHdrFullErrClear_LSB 0xD
-#define QIB_6120_ErrClear_RcvHdrFullErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvEgrFullErrClear_LSB 0xC
-#define QIB_6120_ErrClear_RcvEgrFullErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvBadVersionErrClear_LSB 0xB
-#define QIB_6120_ErrClear_RcvBadVersionErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvIBFlowErrClear_LSB 0xA
-#define QIB_6120_ErrClear_RcvIBFlowErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvEBPErrClear_LSB 0x9
-#define QIB_6120_ErrClear_RcvEBPErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvUnsupportedVLErrClear_LSB 0x8
-#define QIB_6120_ErrClear_RcvUnsupportedVLErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvUnexpectedCharErrClear_LSB 0x7
-#define QIB_6120_ErrClear_RcvUnexpectedCharErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvShortPktLenErrClear_LSB 0x6
-#define QIB_6120_ErrClear_RcvShortPktLenErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvLongPktLenErrClear_LSB 0x5
-#define QIB_6120_ErrClear_RcvLongPktLenErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvMaxPktLenErrClear_LSB 0x4
-#define QIB_6120_ErrClear_RcvMaxPktLenErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvMinPktLenErrClear_LSB 0x3
-#define QIB_6120_ErrClear_RcvMinPktLenErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvICRCErrClear_LSB 0x2
-#define QIB_6120_ErrClear_RcvICRCErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvVCRCErrClear_LSB 0x1
-#define QIB_6120_ErrClear_RcvVCRCErrClear_RMASK 0x1
-#define QIB_6120_ErrClear_RcvFormatErrClear_LSB 0x0
-#define QIB_6120_ErrClear_RcvFormatErrClear_RMASK 0x1
-
-#define QIB_6120_HwErrMask_OFFS 0x98
-#define QIB_6120_HwErrMask_IBCBusFromSPCParityErrMask_LSB 0x3F
-#define QIB_6120_HwErrMask_IBCBusFromSPCParityErrMask_RMASK 0x1
-#define QIB_6120_HwErrMask_IBCBusToSPCParityErrMask_LSB 0x3E
-#define QIB_6120_HwErrMask_IBCBusToSPCParityErrMask_RMASK 0x1
-#define QIB_6120_HwErrMask_Reserved_LSB 0x3D
-#define QIB_6120_HwErrMask_Reserved_RMASK 0x1
-#define QIB_6120_HwErrMask_IBSerdesPClkNotDetectMask_LSB 0x3C
-#define QIB_6120_HwErrMask_IBSerdesPClkNotDetectMask_RMASK 0x1
-#define QIB_6120_HwErrMask_PCIESerdesQ0PClkNotDetectMask_LSB 0x3B
-#define QIB_6120_HwErrMask_PCIESerdesQ0PClkNotDetectMask_RMASK 0x1
-#define QIB_6120_HwErrMask_PCIESerdesQ1PClkNotDetectMask_LSB 0x3A
-#define QIB_6120_HwErrMask_PCIESerdesQ1PClkNotDetectMask_RMASK 0x1
-#define QIB_6120_HwErrMask_Reserved1_LSB 0x39
-#define QIB_6120_HwErrMask_Reserved1_RMASK 0x1
-#define QIB_6120_HwErrMask_IBPLLrfSlipMask_LSB 0x38
-#define QIB_6120_HwErrMask_IBPLLrfSlipMask_RMASK 0x1
-#define QIB_6120_HwErrMask_IBPLLfbSlipMask_LSB 0x37
-#define QIB_6120_HwErrMask_IBPLLfbSlipMask_RMASK 0x1
-#define QIB_6120_HwErrMask_PowerOnBISTFailedMask_LSB 0x36
-#define QIB_6120_HwErrMask_PowerOnBISTFailedMask_RMASK 0x1
-#define QIB_6120_HwErrMask_Reserved2_LSB 0x33
-#define QIB_6120_HwErrMask_Reserved2_RMASK 0x7
-#define QIB_6120_HwErrMask_RXEMemParityErrMask_LSB 0x2C
-#define QIB_6120_HwErrMask_RXEMemParityErrMask_RMASK 0x7F
-#define QIB_6120_HwErrMask_TXEMemParityErrMask_LSB 0x28
-#define QIB_6120_HwErrMask_TXEMemParityErrMask_RMASK 0xF
-#define QIB_6120_HwErrMask_Reserved3_LSB 0x22
-#define QIB_6120_HwErrMask_Reserved3_RMASK 0x3F
-#define QIB_6120_HwErrMask_PCIeBusParityErrMask_LSB 0x1F
-#define QIB_6120_HwErrMask_PCIeBusParityErrMask_RMASK 0x7
-#define QIB_6120_HwErrMask_PcieCplTimeoutMask_LSB 0x1E
-#define QIB_6120_HwErrMask_PcieCplTimeoutMask_RMASK 0x1
-#define QIB_6120_HwErrMask_PoisonedTLPMask_LSB 0x1D
-#define QIB_6120_HwErrMask_PoisonedTLPMask_RMASK 0x1
-#define QIB_6120_HwErrMask_Reserved4_LSB 0x6
-#define QIB_6120_HwErrMask_Reserved4_RMASK 0x7FFFFF
-#define QIB_6120_HwErrMask_PCIeMemParityErrMask_LSB 0x0
-#define QIB_6120_HwErrMask_PCIeMemParityErrMask_RMASK 0x3F
-
-#define QIB_6120_HwErrStatus_OFFS 0xA0
-#define QIB_6120_HwErrStatus_IBCBusFromSPCParityErr_LSB 0x3F
-#define QIB_6120_HwErrStatus_IBCBusFromSPCParityErr_RMASK 0x1
-#define QIB_6120_HwErrStatus_IBCBusToSPCParityErr_LSB 0x3E
-#define QIB_6120_HwErrStatus_IBCBusToSPCParityErr_RMASK 0x1
-#define QIB_6120_HwErrStatus_Reserved_LSB 0x3D
-#define QIB_6120_HwErrStatus_Reserved_RMASK 0x1
-#define QIB_6120_HwErrStatus_IBSerdesPClkNotDetect_LSB 0x3C
-#define QIB_6120_HwErrStatus_IBSerdesPClkNotDetect_RMASK 0x1
-#define QIB_6120_HwErrStatus_PCIESerdesQ0PClkNotDetect_LSB 0x3B
-#define QIB_6120_HwErrStatus_PCIESerdesQ0PClkNotDetect_RMASK 0x1
-#define QIB_6120_HwErrStatus_PCIESerdesQ1PClkNotDetect_LSB 0x3A
-#define QIB_6120_HwErrStatus_PCIESerdesQ1PClkNotDetect_RMASK 0x1
-#define QIB_6120_HwErrStatus_Reserved1_LSB 0x39
-#define QIB_6120_HwErrStatus_Reserved1_RMASK 0x1
-#define QIB_6120_HwErrStatus_IBPLLrfSlip_LSB 0x38
-#define QIB_6120_HwErrStatus_IBPLLrfSlip_RMASK 0x1
-#define QIB_6120_HwErrStatus_IBPLLfbSlip_LSB 0x37
-#define QIB_6120_HwErrStatus_IBPLLfbSlip_RMASK 0x1
-#define QIB_6120_HwErrStatus_PowerOnBISTFailed_LSB 0x36
-#define QIB_6120_HwErrStatus_PowerOnBISTFailed_RMASK 0x1
-#define QIB_6120_HwErrStatus_Reserved2_LSB 0x33
-#define QIB_6120_HwErrStatus_Reserved2_RMASK 0x7
-#define QIB_6120_HwErrStatus_RXEMemParity_LSB 0x2C
-#define QIB_6120_HwErrStatus_RXEMemParity_RMASK 0x7F
-#define QIB_6120_HwErrStatus_TXEMemParity_LSB 0x28
-#define QIB_6120_HwErrStatus_TXEMemParity_RMASK 0xF
-#define QIB_6120_HwErrStatus_Reserved3_LSB 0x22
-#define QIB_6120_HwErrStatus_Reserved3_RMASK 0x3F
-#define QIB_6120_HwErrStatus_PCIeBusParity_LSB 0x1F
-#define QIB_6120_HwErrStatus_PCIeBusParity_RMASK 0x7
-#define QIB_6120_HwErrStatus_PcieCplTimeout_LSB 0x1E
-#define QIB_6120_HwErrStatus_PcieCplTimeout_RMASK 0x1
-#define QIB_6120_HwErrStatus_PoisenedTLP_LSB 0x1D
-#define QIB_6120_HwErrStatus_PoisenedTLP_RMASK 0x1
-#define QIB_6120_HwErrStatus_Reserved4_LSB 0x6
-#define QIB_6120_HwErrStatus_Reserved4_RMASK 0x7FFFFF
-#define QIB_6120_HwErrStatus_PCIeMemParity_LSB 0x0
-#define QIB_6120_HwErrStatus_PCIeMemParity_RMASK 0x3F
-
-#define QIB_6120_HwErrClear_OFFS 0xA8
-#define QIB_6120_HwErrClear_IBCBusFromSPCParityErrClear_LSB 0x3F
-#define QIB_6120_HwErrClear_IBCBusFromSPCParityErrClear_RMASK 0x1
-#define QIB_6120_HwErrClear_IBCBusToSPCparityErrClear_LSB 0x3E
-#define QIB_6120_HwErrClear_IBCBusToSPCparityErrClear_RMASK 0x1
-#define QIB_6120_HwErrClear_Reserved_LSB 0x3D
-#define QIB_6120_HwErrClear_Reserved_RMASK 0x1
-#define QIB_6120_HwErrClear_IBSerdesPClkNotDetectClear_LSB 0x3C
-#define QIB_6120_HwErrClear_IBSerdesPClkNotDetectClear_RMASK 0x1
-#define QIB_6120_HwErrClear_PCIESerdesQ0PClkNotDetectClear_LSB 0x3B
-#define QIB_6120_HwErrClear_PCIESerdesQ0PClkNotDetectClear_RMASK 0x1
-#define QIB_6120_HwErrClear_PCIESerdesQ1PClkNotDetectClear_LSB 0x3A
-#define QIB_6120_HwErrClear_PCIESerdesQ1PClkNotDetectClear_RMASK 0x1
-#define QIB_6120_HwErrClear_Reserved1_LSB 0x39
-#define QIB_6120_HwErrClear_Reserved1_RMASK 0x1
-#define QIB_6120_HwErrClear_IBPLLrfSlipClear_LSB 0x38
-#define QIB_6120_HwErrClear_IBPLLrfSlipClear_RMASK 0x1
-#define QIB_6120_HwErrClear_IBPLLfbSlipClear_LSB 0x37
-#define QIB_6120_HwErrClear_IBPLLfbSlipClear_RMASK 0x1
-#define QIB_6120_HwErrClear_PowerOnBISTFailedClear_LSB 0x36
-#define QIB_6120_HwErrClear_PowerOnBISTFailedClear_RMASK 0x1
-#define QIB_6120_HwErrClear_Reserved2_LSB 0x33
-#define QIB_6120_HwErrClear_Reserved2_RMASK 0x7
-#define QIB_6120_HwErrClear_RXEMemParityClear_LSB 0x2C
-#define QIB_6120_HwErrClear_RXEMemParityClear_RMASK 0x7F
-#define QIB_6120_HwErrClear_TXEMemParityClear_LSB 0x28
-#define QIB_6120_HwErrClear_TXEMemParityClear_RMASK 0xF
-#define QIB_6120_HwErrClear_Reserved3_LSB 0x22
-#define QIB_6120_HwErrClear_Reserved3_RMASK 0x3F
-#define QIB_6120_HwErrClear_PCIeBusParityClr_LSB 0x1F
-#define QIB_6120_HwErrClear_PCIeBusParityClr_RMASK 0x7
-#define QIB_6120_HwErrClear_PcieCplTimeoutClear_LSB 0x1E
-#define QIB_6120_HwErrClear_PcieCplTimeoutClear_RMASK 0x1
-#define QIB_6120_HwErrClear_PoisonedTLPClear_LSB 0x1D
-#define QIB_6120_HwErrClear_PoisonedTLPClear_RMASK 0x1
-#define QIB_6120_HwErrClear_Reserved4_LSB 0x6
-#define QIB_6120_HwErrClear_Reserved4_RMASK 0x7FFFFF
-#define QIB_6120_HwErrClear_PCIeMemParityClr_LSB 0x0
-#define QIB_6120_HwErrClear_PCIeMemParityClr_RMASK 0x3F
-
-#define QIB_6120_HwDiagCtrl_OFFS 0xB0
-#define QIB_6120_HwDiagCtrl_ForceIBCBusFromSPCParityErr_LSB 0x3F
-#define QIB_6120_HwDiagCtrl_ForceIBCBusFromSPCParityErr_RMASK 0x1
-#define QIB_6120_HwDiagCtrl_ForceIBCBusToSPCParityErr_LSB 0x3E
-#define QIB_6120_HwDiagCtrl_ForceIBCBusToSPCParityErr_RMASK 0x1
-#define QIB_6120_HwDiagCtrl_CounterWrEnable_LSB 0x3D
-#define QIB_6120_HwDiagCtrl_CounterWrEnable_RMASK 0x1
-#define QIB_6120_HwDiagCtrl_CounterDisable_LSB 0x3C
-#define QIB_6120_HwDiagCtrl_CounterDisable_RMASK 0x1
-#define QIB_6120_HwDiagCtrl_Reserved_LSB 0x33
-#define QIB_6120_HwDiagCtrl_Reserved_RMASK 0x1FF
-#define QIB_6120_HwDiagCtrl_ForceRxMemParityErr_LSB 0x2C
-#define QIB_6120_HwDiagCtrl_ForceRxMemParityErr_RMASK 0x7F
-#define QIB_6120_HwDiagCtrl_ForceTxMemparityErr_LSB 0x28
-#define QIB_6120_HwDiagCtrl_ForceTxMemparityErr_RMASK 0xF
-#define QIB_6120_HwDiagCtrl_Reserved1_LSB 0x23
-#define QIB_6120_HwDiagCtrl_Reserved1_RMASK 0x1F
-#define QIB_6120_HwDiagCtrl_forcePCIeBusParity_LSB 0x1F
-#define QIB_6120_HwDiagCtrl_forcePCIeBusParity_RMASK 0xF
-#define QIB_6120_HwDiagCtrl_Reserved2_LSB 0x6
-#define QIB_6120_HwDiagCtrl_Reserved2_RMASK 0x1FFFFFF
-#define QIB_6120_HwDiagCtrl_forcePCIeMemParity_LSB 0x0
-#define QIB_6120_HwDiagCtrl_forcePCIeMemParity_RMASK 0x3F
-
-#define QIB_6120_IBCStatus_OFFS 0xC0
-#define QIB_6120_IBCStatus_TxCreditOk_LSB 0x1F
-#define QIB_6120_IBCStatus_TxCreditOk_RMASK 0x1
-#define QIB_6120_IBCStatus_TxReady_LSB 0x1E
-#define QIB_6120_IBCStatus_TxReady_RMASK 0x1
-#define QIB_6120_IBCStatus_Reserved_LSB 0x7
-#define QIB_6120_IBCStatus_Reserved_RMASK 0x7FFFFF
-#define QIB_6120_IBCStatus_LinkState_LSB 0x4
-#define QIB_6120_IBCStatus_LinkState_RMASK 0x7
-#define QIB_6120_IBCStatus_LinkTrainingState_LSB 0x0
-#define QIB_6120_IBCStatus_LinkTrainingState_RMASK 0xF
-
-#define QIB_6120_IBCCtrl_OFFS 0xC8
-#define QIB_6120_IBCCtrl_Loopback_LSB 0x3F
-#define QIB_6120_IBCCtrl_Loopback_RMASK 0x1
-#define QIB_6120_IBCCtrl_LinkDownDefaultState_LSB 0x3E
-#define QIB_6120_IBCCtrl_LinkDownDefaultState_RMASK 0x1
-#define QIB_6120_IBCCtrl_Reserved_LSB 0x2B
-#define QIB_6120_IBCCtrl_Reserved_RMASK 0x7FFFF
-#define QIB_6120_IBCCtrl_CreditScale_LSB 0x28
-#define QIB_6120_IBCCtrl_CreditScale_RMASK 0x7
-#define QIB_6120_IBCCtrl_OverrunThreshold_LSB 0x24
-#define QIB_6120_IBCCtrl_OverrunThreshold_RMASK 0xF
-#define QIB_6120_IBCCtrl_PhyerrThreshold_LSB 0x20
-#define QIB_6120_IBCCtrl_PhyerrThreshold_RMASK 0xF
-#define QIB_6120_IBCCtrl_Reserved1_LSB 0x1F
-#define QIB_6120_IBCCtrl_Reserved1_RMASK 0x1
-#define QIB_6120_IBCCtrl_MaxPktLen_LSB 0x14
-#define QIB_6120_IBCCtrl_MaxPktLen_RMASK 0x7FF
-#define QIB_6120_IBCCtrl_LinkCmd_LSB 0x12
-#define QIB_6120_IBCCtrl_LinkCmd_RMASK 0x3
-#define QIB_6120_IBCCtrl_LinkInitCmd_LSB 0x10
-#define QIB_6120_IBCCtrl_LinkInitCmd_RMASK 0x3
-#define QIB_6120_IBCCtrl_FlowCtrlWaterMark_LSB 0x8
-#define QIB_6120_IBCCtrl_FlowCtrlWaterMark_RMASK 0xFF
-#define QIB_6120_IBCCtrl_FlowCtrlPeriod_LSB 0x0
-#define QIB_6120_IBCCtrl_FlowCtrlPeriod_RMASK 0xFF
-
-#define QIB_6120_EXTStatus_OFFS 0xD0
-#define QIB_6120_EXTStatus_GPIOIn_LSB 0x30
-#define QIB_6120_EXTStatus_GPIOIn_RMASK 0xFFFF
-#define QIB_6120_EXTStatus_Reserved_LSB 0x20
-#define QIB_6120_EXTStatus_Reserved_RMASK 0xFFFF
-#define QIB_6120_EXTStatus_Reserved1_LSB 0x10
-#define QIB_6120_EXTStatus_Reserved1_RMASK 0xFFFF
-#define QIB_6120_EXTStatus_MemBISTFoundErr_LSB 0xF
-#define QIB_6120_EXTStatus_MemBISTFoundErr_RMASK 0x1
-#define QIB_6120_EXTStatus_MemBISTEndTest_LSB 0xE
-#define QIB_6120_EXTStatus_MemBISTEndTest_RMASK 0x1
-#define QIB_6120_EXTStatus_Reserved2_LSB 0x0
-#define QIB_6120_EXTStatus_Reserved2_RMASK 0x3FFF
-
-#define QIB_6120_EXTCtrl_OFFS 0xD8
-#define QIB_6120_EXTCtrl_GPIOOe_LSB 0x30
-#define QIB_6120_EXTCtrl_GPIOOe_RMASK 0xFFFF
-#define QIB_6120_EXTCtrl_GPIOInvert_LSB 0x20
-#define QIB_6120_EXTCtrl_GPIOInvert_RMASK 0xFFFF
-#define QIB_6120_EXTCtrl_Reserved_LSB 0x4
-#define QIB_6120_EXTCtrl_Reserved_RMASK 0xFFFFFFF
-#define QIB_6120_EXTCtrl_LEDPriPortGreenOn_LSB 0x3
-#define QIB_6120_EXTCtrl_LEDPriPortGreenOn_RMASK 0x1
-#define QIB_6120_EXTCtrl_LEDPriPortYellowOn_LSB 0x2
-#define QIB_6120_EXTCtrl_LEDPriPortYellowOn_RMASK 0x1
-#define QIB_6120_EXTCtrl_LEDGblOkGreenOn_LSB 0x1
-#define QIB_6120_EXTCtrl_LEDGblOkGreenOn_RMASK 0x1
-#define QIB_6120_EXTCtrl_LEDGblErrRedOff_LSB 0x0
-#define QIB_6120_EXTCtrl_LEDGblErrRedOff_RMASK 0x1
-
-#define QIB_6120_GPIOOut_OFFS 0xE0
-
-#define QIB_6120_GPIOMask_OFFS 0xE8
-
-#define QIB_6120_GPIOStatus_OFFS 0xF0
-
-#define QIB_6120_GPIOClear_OFFS 0xF8
-
-#define QIB_6120_RcvCtrl_OFFS 0x100
-#define QIB_6120_RcvCtrl_TailUpd_LSB 0x1F
-#define QIB_6120_RcvCtrl_TailUpd_RMASK 0x1
-#define QIB_6120_RcvCtrl_RcvPartitionKeyDisable_LSB 0x1E
-#define QIB_6120_RcvCtrl_RcvPartitionKeyDisable_RMASK 0x1
-#define QIB_6120_RcvCtrl_Reserved_LSB 0x15
-#define QIB_6120_RcvCtrl_Reserved_RMASK 0x1FF
-#define QIB_6120_RcvCtrl_IntrAvail_LSB 0x10
-#define QIB_6120_RcvCtrl_IntrAvail_RMASK 0x1F
-#define QIB_6120_RcvCtrl_Reserved1_LSB 0x9
-#define QIB_6120_RcvCtrl_Reserved1_RMASK 0x7F
-#define QIB_6120_RcvCtrl_Reserved2_LSB 0x5
-#define QIB_6120_RcvCtrl_Reserved2_RMASK 0xF
-#define QIB_6120_RcvCtrl_PortEnable_LSB 0x0
-#define QIB_6120_RcvCtrl_PortEnable_RMASK 0x1F
-
-#define QIB_6120_RcvBTHQP_OFFS 0x108
-#define QIB_6120_RcvBTHQP_BTHQP_Mask_LSB 0x1E
-#define QIB_6120_RcvBTHQP_BTHQP_Mask_RMASK 0x3
-#define QIB_6120_RcvBTHQP_Reserved_LSB 0x18
-#define QIB_6120_RcvBTHQP_Reserved_RMASK 0x3F
-#define QIB_6120_RcvBTHQP_RcvBTHQP_LSB 0x0
-#define QIB_6120_RcvBTHQP_RcvBTHQP_RMASK 0xFFFFFF
-
-#define QIB_6120_RcvHdrSize_OFFS 0x110
-
-#define QIB_6120_RcvHdrCnt_OFFS 0x118
-
-#define QIB_6120_RcvHdrEntSize_OFFS 0x120
-
-#define QIB_6120_RcvTIDBase_OFFS 0x128
-
-#define QIB_6120_RcvTIDCnt_OFFS 0x130
-
-#define QIB_6120_RcvEgrBase_OFFS 0x138
-
-#define QIB_6120_RcvEgrCnt_OFFS 0x140
-
-#define QIB_6120_RcvBufBase_OFFS 0x148
-
-#define QIB_6120_RcvBufSize_OFFS 0x150
-
-#define QIB_6120_RxIntMemBase_OFFS 0x158
-
-#define QIB_6120_RxIntMemSize_OFFS 0x160
-
-#define QIB_6120_RcvPartitionKey_OFFS 0x168
-
-#define QIB_6120_RcvPktLEDCnt_OFFS 0x178
-#define QIB_6120_RcvPktLEDCnt_ONperiod_LSB 0x20
-#define QIB_6120_RcvPktLEDCnt_ONperiod_RMASK 0xFFFFFFFF
-#define QIB_6120_RcvPktLEDCnt_OFFperiod_LSB 0x0
-#define QIB_6120_RcvPktLEDCnt_OFFperiod_RMASK 0xFFFFFFFF
-
-#define QIB_6120_SendCtrl_OFFS 0x1C0
-#define QIB_6120_SendCtrl_Disarm_LSB 0x1F
-#define QIB_6120_SendCtrl_Disarm_RMASK 0x1
-#define QIB_6120_SendCtrl_Reserved_LSB 0x17
-#define QIB_6120_SendCtrl_Reserved_RMASK 0xFF
-#define QIB_6120_SendCtrl_DisarmPIOBuf_LSB 0x10
-#define QIB_6120_SendCtrl_DisarmPIOBuf_RMASK 0x7F
-#define QIB_6120_SendCtrl_Reserved1_LSB 0x4
-#define QIB_6120_SendCtrl_Reserved1_RMASK 0xFFF
-#define QIB_6120_SendCtrl_PIOEnable_LSB 0x3
-#define QIB_6120_SendCtrl_PIOEnable_RMASK 0x1
-#define QIB_6120_SendCtrl_PIOBufAvailUpd_LSB 0x2
-#define QIB_6120_SendCtrl_PIOBufAvailUpd_RMASK 0x1
-#define QIB_6120_SendCtrl_PIOIntBufAvail_LSB 0x1
-#define QIB_6120_SendCtrl_PIOIntBufAvail_RMASK 0x1
-#define QIB_6120_SendCtrl_Abort_LSB 0x0
-#define QIB_6120_SendCtrl_Abort_RMASK 0x1
-
-#define QIB_6120_SendPIOBufBase_OFFS 0x1C8
-#define QIB_6120_SendPIOBufBase_Reserved_LSB 0x35
-#define QIB_6120_SendPIOBufBase_Reserved_RMASK 0x7FF
-#define QIB_6120_SendPIOBufBase_BaseAddr_LargePIO_LSB 0x20
-#define QIB_6120_SendPIOBufBase_BaseAddr_LargePIO_RMASK 0x1FFFFF
-#define QIB_6120_SendPIOBufBase_Reserved1_LSB 0x15
-#define QIB_6120_SendPIOBufBase_Reserved1_RMASK 0x7FF
-#define QIB_6120_SendPIOBufBase_BaseAddr_SmallPIO_LSB 0x0
-#define QIB_6120_SendPIOBufBase_BaseAddr_SmallPIO_RMASK 0x1FFFFF
-
-#define QIB_6120_SendPIOSize_OFFS 0x1D0
-#define QIB_6120_SendPIOSize_Reserved_LSB 0x2D
-#define QIB_6120_SendPIOSize_Reserved_RMASK 0xFFFFF
-#define QIB_6120_SendPIOSize_Size_LargePIO_LSB 0x20
-#define QIB_6120_SendPIOSize_Size_LargePIO_RMASK 0x1FFF
-#define QIB_6120_SendPIOSize_Reserved1_LSB 0xC
-#define QIB_6120_SendPIOSize_Reserved1_RMASK 0xFFFFF
-#define QIB_6120_SendPIOSize_Size_SmallPIO_LSB 0x0
-#define QIB_6120_SendPIOSize_Size_SmallPIO_RMASK 0xFFF
-
-#define QIB_6120_SendPIOBufCnt_OFFS 0x1D8
-#define QIB_6120_SendPIOBufCnt_Reserved_LSB 0x24
-#define QIB_6120_SendPIOBufCnt_Reserved_RMASK 0xFFFFFFF
-#define QIB_6120_SendPIOBufCnt_Num_LargePIO_LSB 0x20
-#define QIB_6120_SendPIOBufCnt_Num_LargePIO_RMASK 0xF
-#define QIB_6120_SendPIOBufCnt_Reserved1_LSB 0x9
-#define QIB_6120_SendPIOBufCnt_Reserved1_RMASK 0x7FFFFF
-#define QIB_6120_SendPIOBufCnt_Num_SmallPIO_LSB 0x0
-#define QIB_6120_SendPIOBufCnt_Num_SmallPIO_RMASK 0x1FF
-
-#define QIB_6120_SendPIOAvailAddr_OFFS 0x1E0
-#define QIB_6120_SendPIOAvailAddr_SendPIOAvailAddr_LSB 0x6
-#define QIB_6120_SendPIOAvailAddr_SendPIOAvailAddr_RMASK 0x3FFFFFFFF
-#define QIB_6120_SendPIOAvailAddr_Reserved_LSB 0x0
-#define QIB_6120_SendPIOAvailAddr_Reserved_RMASK 0x3F
-
-#define QIB_6120_SendBufErr0_OFFS 0x240
-#define QIB_6120_SendBufErr0_SendBufErrPIO_63_0_LSB 0x0
-#define QIB_6120_SendBufErr0_SendBufErrPIO_63_0_RMASK 0x0
-
-#define QIB_6120_RcvHdrAddr0_OFFS 0x280
-#define QIB_6120_RcvHdrAddr0_RcvHdrAddr0_LSB 0x2
-#define QIB_6120_RcvHdrAddr0_RcvHdrAddr0_RMASK 0x3FFFFFFFFF
-#define QIB_6120_RcvHdrAddr0_Reserved_LSB 0x0
-#define QIB_6120_RcvHdrAddr0_Reserved_RMASK 0x3
-
-#define QIB_6120_RcvHdrTailAddr0_OFFS 0x300
-#define QIB_6120_RcvHdrTailAddr0_RcvHdrTailAddr0_LSB 0x2
-#define QIB_6120_RcvHdrTailAddr0_RcvHdrTailAddr0_RMASK 0x3FFFFFFFFF
-#define QIB_6120_RcvHdrTailAddr0_Reserved_LSB 0x0
-#define QIB_6120_RcvHdrTailAddr0_Reserved_RMASK 0x3
-
-#define QIB_6120_SerdesCfg0_OFFS 0x3C0
-#define QIB_6120_SerdesCfg0_DisableIBTxIdleDetect_LSB 0x3F
-#define QIB_6120_SerdesCfg0_DisableIBTxIdleDetect_RMASK 0x1
-#define QIB_6120_SerdesCfg0_Reserved_LSB 0x38
-#define QIB_6120_SerdesCfg0_Reserved_RMASK 0x7F
-#define QIB_6120_SerdesCfg0_RxEqCtl_LSB 0x36
-#define QIB_6120_SerdesCfg0_RxEqCtl_RMASK 0x3
-#define QIB_6120_SerdesCfg0_TxTermAdj_LSB 0x34
-#define QIB_6120_SerdesCfg0_TxTermAdj_RMASK 0x3
-#define QIB_6120_SerdesCfg0_RxTermAdj_LSB 0x32
-#define QIB_6120_SerdesCfg0_RxTermAdj_RMASK 0x3
-#define QIB_6120_SerdesCfg0_TermAdj1_LSB 0x31
-#define QIB_6120_SerdesCfg0_TermAdj1_RMASK 0x1
-#define QIB_6120_SerdesCfg0_TermAdj0_LSB 0x30
-#define QIB_6120_SerdesCfg0_TermAdj0_RMASK 0x1
-#define QIB_6120_SerdesCfg0_LPBKA_LSB 0x2F
-#define QIB_6120_SerdesCfg0_LPBKA_RMASK 0x1
-#define QIB_6120_SerdesCfg0_LPBKB_LSB 0x2E
-#define QIB_6120_SerdesCfg0_LPBKB_RMASK 0x1
-#define QIB_6120_SerdesCfg0_LPBKC_LSB 0x2D
-#define QIB_6120_SerdesCfg0_LPBKC_RMASK 0x1
-#define QIB_6120_SerdesCfg0_LPBKD_LSB 0x2C
-#define QIB_6120_SerdesCfg0_LPBKD_RMASK 0x1
-#define QIB_6120_SerdesCfg0_PW_LSB 0x2B
-#define QIB_6120_SerdesCfg0_PW_RMASK 0x1
-#define QIB_6120_SerdesCfg0_RefSel_LSB 0x29
-#define QIB_6120_SerdesCfg0_RefSel_RMASK 0x3
-#define QIB_6120_SerdesCfg0_ParReset_LSB 0x28
-#define QIB_6120_SerdesCfg0_ParReset_RMASK 0x1
-#define QIB_6120_SerdesCfg0_ParLPBK_LSB 0x27
-#define QIB_6120_SerdesCfg0_ParLPBK_RMASK 0x1
-#define QIB_6120_SerdesCfg0_OffsetEn_LSB 0x26
-#define QIB_6120_SerdesCfg0_OffsetEn_RMASK 0x1
-#define QIB_6120_SerdesCfg0_Offset_LSB 0x1E
-#define QIB_6120_SerdesCfg0_Offset_RMASK 0xFF
-#define QIB_6120_SerdesCfg0_L2PwrDn_LSB 0x1D
-#define QIB_6120_SerdesCfg0_L2PwrDn_RMASK 0x1
-#define QIB_6120_SerdesCfg0_ResetPLL_LSB 0x1C
-#define QIB_6120_SerdesCfg0_ResetPLL_RMASK 0x1
-#define QIB_6120_SerdesCfg0_RxTermEnX_LSB 0x18
-#define QIB_6120_SerdesCfg0_RxTermEnX_RMASK 0xF
-#define QIB_6120_SerdesCfg0_BeaconTxEnX_LSB 0x14
-#define QIB_6120_SerdesCfg0_BeaconTxEnX_RMASK 0xF
-#define QIB_6120_SerdesCfg0_RxDetEnX_LSB 0x10
-#define QIB_6120_SerdesCfg0_RxDetEnX_RMASK 0xF
-#define QIB_6120_SerdesCfg0_TxIdeEnX_LSB 0xC
-#define QIB_6120_SerdesCfg0_TxIdeEnX_RMASK 0xF
-#define QIB_6120_SerdesCfg0_RxIdleEnX_LSB 0x8
-#define QIB_6120_SerdesCfg0_RxIdleEnX_RMASK 0xF
-#define QIB_6120_SerdesCfg0_L1PwrDnA_LSB 0x7
-#define QIB_6120_SerdesCfg0_L1PwrDnA_RMASK 0x1
-#define QIB_6120_SerdesCfg0_L1PwrDnB_LSB 0x6
-#define QIB_6120_SerdesCfg0_L1PwrDnB_RMASK 0x1
-#define QIB_6120_SerdesCfg0_L1PwrDnC_LSB 0x5
-#define QIB_6120_SerdesCfg0_L1PwrDnC_RMASK 0x1
-#define QIB_6120_SerdesCfg0_L1PwrDnD_LSB 0x4
-#define QIB_6120_SerdesCfg0_L1PwrDnD_RMASK 0x1
-#define QIB_6120_SerdesCfg0_ResetA_LSB 0x3
-#define QIB_6120_SerdesCfg0_ResetA_RMASK 0x1
-#define QIB_6120_SerdesCfg0_ResetB_LSB 0x2
-#define QIB_6120_SerdesCfg0_ResetB_RMASK 0x1
-#define QIB_6120_SerdesCfg0_ResetC_LSB 0x1
-#define QIB_6120_SerdesCfg0_ResetC_RMASK 0x1
-#define QIB_6120_SerdesCfg0_ResetD_LSB 0x0
-#define QIB_6120_SerdesCfg0_ResetD_RMASK 0x1
-
-#define QIB_6120_SerdesStat_OFFS 0x3D0
-#define QIB_6120_SerdesStat_Reserved_LSB 0xC
-#define QIB_6120_SerdesStat_Reserved_RMASK 0xFFFFFFFFFFFFF
-#define QIB_6120_SerdesStat_BeaconDetA_LSB 0xB
-#define QIB_6120_SerdesStat_BeaconDetA_RMASK 0x1
-#define QIB_6120_SerdesStat_BeaconDetB_LSB 0xA
-#define QIB_6120_SerdesStat_BeaconDetB_RMASK 0x1
-#define QIB_6120_SerdesStat_BeaconDetC_LSB 0x9
-#define QIB_6120_SerdesStat_BeaconDetC_RMASK 0x1
-#define QIB_6120_SerdesStat_BeaconDetD_LSB 0x8
-#define QIB_6120_SerdesStat_BeaconDetD_RMASK 0x1
-#define QIB_6120_SerdesStat_RxDetA_LSB 0x7
-#define QIB_6120_SerdesStat_RxDetA_RMASK 0x1
-#define QIB_6120_SerdesStat_RxDetB_LSB 0x6
-#define QIB_6120_SerdesStat_RxDetB_RMASK 0x1
-#define QIB_6120_SerdesStat_RxDetC_LSB 0x5
-#define QIB_6120_SerdesStat_RxDetC_RMASK 0x1
-#define QIB_6120_SerdesStat_RxDetD_LSB 0x4
-#define QIB_6120_SerdesStat_RxDetD_RMASK 0x1
-#define QIB_6120_SerdesStat_TxIdleDetA_LSB 0x3
-#define QIB_6120_SerdesStat_TxIdleDetA_RMASK 0x1
-#define QIB_6120_SerdesStat_TxIdleDetB_LSB 0x2
-#define QIB_6120_SerdesStat_TxIdleDetB_RMASK 0x1
-#define QIB_6120_SerdesStat_TxIdleDetC_LSB 0x1
-#define QIB_6120_SerdesStat_TxIdleDetC_RMASK 0x1
-#define QIB_6120_SerdesStat_TxIdleDetD_LSB 0x0
-#define QIB_6120_SerdesStat_TxIdleDetD_RMASK 0x1
-
-#define QIB_6120_XGXSCfg_OFFS 0x3D8
-#define QIB_6120_XGXSCfg_ArmLaunchErrorDisable_LSB 0x3F
-#define QIB_6120_XGXSCfg_ArmLaunchErrorDisable_RMASK 0x1
-#define QIB_6120_XGXSCfg_Reserved_LSB 0x17
-#define QIB_6120_XGXSCfg_Reserved_RMASK 0xFFFFFFFFFF
-#define QIB_6120_XGXSCfg_polarity_inv_LSB 0x13
-#define QIB_6120_XGXSCfg_polarity_inv_RMASK 0xF
-#define QIB_6120_XGXSCfg_link_sync_mask_LSB 0x9
-#define QIB_6120_XGXSCfg_link_sync_mask_RMASK 0x3FF
-#define QIB_6120_XGXSCfg_port_addr_LSB 0x4
-#define QIB_6120_XGXSCfg_port_addr_RMASK 0x1F
-#define QIB_6120_XGXSCfg_mdd_30_LSB 0x3
-#define QIB_6120_XGXSCfg_mdd_30_RMASK 0x1
-#define QIB_6120_XGXSCfg_xcv_resetn_LSB 0x2
-#define QIB_6120_XGXSCfg_xcv_resetn_RMASK 0x1
-#define QIB_6120_XGXSCfg_Reserved1_LSB 0x1
-#define QIB_6120_XGXSCfg_Reserved1_RMASK 0x1
-#define QIB_6120_XGXSCfg_tx_rx_resetn_LSB 0x0
-#define QIB_6120_XGXSCfg_tx_rx_resetn_RMASK 0x1
-
-#define QIB_6120_LBIntCnt_OFFS 0x12000
-
-#define QIB_6120_LBFlowStallCnt_OFFS 0x12008
-
-#define QIB_6120_TxUnsupVLErrCnt_OFFS 0x12018
-
-#define QIB_6120_TxDataPktCnt_OFFS 0x12020
-
-#define QIB_6120_TxFlowPktCnt_OFFS 0x12028
-
-#define QIB_6120_TxDwordCnt_OFFS 0x12030
-
-#define QIB_6120_TxLenErrCnt_OFFS 0x12038
-
-#define QIB_6120_TxMaxMinLenErrCnt_OFFS 0x12040
-
-#define QIB_6120_TxUnderrunCnt_OFFS 0x12048
-
-#define QIB_6120_TxFlowStallCnt_OFFS 0x12050
-
-#define QIB_6120_TxDroppedPktCnt_OFFS 0x12058
-
-#define QIB_6120_RxDroppedPktCnt_OFFS 0x12060
-
-#define QIB_6120_RxDataPktCnt_OFFS 0x12068
-
-#define QIB_6120_RxFlowPktCnt_OFFS 0x12070
-
-#define QIB_6120_RxDwordCnt_OFFS 0x12078
-
-#define QIB_6120_RxLenErrCnt_OFFS 0x12080
-
-#define QIB_6120_RxMaxMinLenErrCnt_OFFS 0x12088
-
-#define QIB_6120_RxICRCErrCnt_OFFS 0x12090
-
-#define QIB_6120_RxVCRCErrCnt_OFFS 0x12098
-
-#define QIB_6120_RxFlowCtrlErrCnt_OFFS 0x120A0
-
-#define QIB_6120_RxBadFormatCnt_OFFS 0x120A8
-
-#define QIB_6120_RxLinkProblemCnt_OFFS 0x120B0
-
-#define QIB_6120_RxEBPCnt_OFFS 0x120B8
-
-#define QIB_6120_RxLPCRCErrCnt_OFFS 0x120C0
-
-#define QIB_6120_RxBufOvflCnt_OFFS 0x120C8
-
-#define QIB_6120_RxTIDFullErrCnt_OFFS 0x120D0
-
-#define QIB_6120_RxTIDValidErrCnt_OFFS 0x120D8
-
-#define QIB_6120_RxPKeyMismatchCnt_OFFS 0x120E0
-
-#define QIB_6120_RxP0HdrEgrOvflCnt_OFFS 0x120E8
-
-#define QIB_6120_IBStatusChangeCnt_OFFS 0x12140
-
-#define QIB_6120_IBLinkErrRecoveryCnt_OFFS 0x12148
-
-#define QIB_6120_IBLinkDownedCnt_OFFS 0x12150
-
-#define QIB_6120_IBSymbolErrCnt_OFFS 0x12158
-
-#define QIB_6120_PcieRetryBufDiagQwordCnt_OFFS 0x12170
-
-#define QIB_6120_RcvEgrArray0_OFFS 0x14000
-
-#define QIB_6120_RcvTIDArray0_OFFS 0x54000
-
-#define QIB_6120_PIOLaunchFIFO_OFFS 0x64000
-
-#define QIB_6120_SendPIOpbcCache_OFFS 0x64800
-
-#define QIB_6120_RcvBuf1_OFFS 0x72000
-
-#define QIB_6120_RcvBuf2_OFFS 0x75000
-
-#define QIB_6120_RcvFlags_OFFS 0x77000
-
-#define QIB_6120_RcvLookupBuf1_OFFS 0x79000
-
-#define QIB_6120_RcvDMABuf_OFFS 0x7B000
-
-#define QIB_6120_MiscRXEIntMem_OFFS 0x7C000
-
-#define QIB_6120_PCIERcvBuf_OFFS 0x80000
-
-#define QIB_6120_PCIERetryBuf_OFFS 0x82000
-
-#define QIB_6120_PCIERcvBufRdToWrAddr_OFFS 0x84000
-
-#define QIB_6120_PIOBuf0_MA_OFFS 0x100000
diff --git a/drivers/infiniband/hw/qib/qib_7220.h b/drivers/infiniband/hw/qib/qib_7220.h
deleted file mode 100644
index a5356cb4252e..000000000000
--- a/drivers/infiniband/hw/qib/qib_7220.h
+++ /dev/null
@@ -1,149 +0,0 @@
-#ifndef _QIB_7220_H
-#define _QIB_7220_H
-/*
- * Copyright (c) 2007, 2009, 2010 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/* grab register-defs auto-generated by HW */
-#include "qib_7220_regs.h"
-
-/* The number of eager receive TIDs for context zero. */
-#define IBA7220_KRCVEGRCNT 2048U
-
-#define IB_7220_LT_STATE_CFGRCVFCFG 0x09
-#define IB_7220_LT_STATE_CFGWAITRMT 0x0a
-#define IB_7220_LT_STATE_TXREVLANES 0x0d
-#define IB_7220_LT_STATE_CFGENH 0x10
-
-struct qib_chip_specific {
- u64 __iomem *cregbase;
- u64 *cntrs;
- u64 *portcntrs;
- spinlock_t sdepb_lock; /* serdes EPB bus */
- spinlock_t rcvmod_lock; /* protect rcvctrl shadow changes */
- spinlock_t gpio_lock; /* RMW of shadows/regs for ExtCtrl and GPIO */
- u64 hwerrmask;
- u64 errormask;
- u64 gpio_out; /* shadow of kr_gpio_out, for rmw ops */
- u64 gpio_mask; /* shadow the gpio mask register */
- u64 extctrl; /* shadow the gpio output enable, etc... */
- u32 ncntrs;
- u32 nportcntrs;
- u32 cntrnamelen;
- u32 portcntrnamelen;
- u32 numctxts;
- u32 rcvegrcnt;
- u32 autoneg_tries;
- u32 serdes_first_init_done;
- u32 sdmabufcnt;
- u32 lastbuf_for_pio;
- u32 updthresh; /* current AvailUpdThld */
- u32 updthresh_dflt; /* default AvailUpdThld */
- int irq;
- u8 presets_needed;
- u8 relock_timer_active;
- char emsgbuf[128];
- char sdmamsgbuf[192];
- char bitsmsgbuf[64];
- struct timer_list relock_timer;
- unsigned int relock_interval; /* in jiffies */
-};
-
-struct qib_chippport_specific {
- struct qib_pportdata pportdata;
- wait_queue_head_t autoneg_wait;
- struct delayed_work autoneg_work;
- struct timer_list chase_timer;
- /*
- * these 5 fields are used to establish deltas for IB symbol
- * errors and linkrecovery errors. They can be reported on
- * some chips during link negotiation prior to INIT, and with
- * DDR when faking DDR negotiations with non-IBTA switches.
- * The chip counters are adjusted at driver unload if there is
- * a non-zero delta.
- */
- u64 ibdeltainprog;
- u64 ibsymdelta;
- u64 ibsymsnap;
- u64 iblnkerrdelta;
- u64 iblnkerrsnap;
- u64 ibcctrl; /* kr_ibcctrl shadow */
- u64 ibcddrctrl; /* kr_ibcddrctrl shadow */
- unsigned long chase_end;
- u32 last_delay_mult;
-};
-
-/*
- * This header file provides the declarations and common definitions
- * for (mostly) manipulation of the SerDes blocks within the IBA7220.
- * the functions declared should only be called from within other
- * 7220-related files such as qib_iba7220.c or qib_sd7220.c.
- */
-int qib_sd7220_presets(struct qib_devdata *dd);
-int qib_sd7220_init(struct qib_devdata *dd);
-void qib_sd7220_clr_ibpar(struct qib_devdata *);
-/*
- * Below used for sdnum parameter, selecting one of the two sections
- * used for PCIe, or the single SerDes used for IB, which is the
- * only one currently used
- */
-#define IB_7220_SERDES 2
-
-static inline u32 qib_read_kreg32(const struct qib_devdata *dd,
- const u16 regno)
-{
- if (!dd->kregbase || !(dd->flags & QIB_PRESENT))
- return -1;
- return readl((u32 __iomem *)&dd->kregbase[regno]);
-}
-
-static inline u64 qib_read_kreg64(const struct qib_devdata *dd,
- const u16 regno)
-{
- if (!dd->kregbase || !(dd->flags & QIB_PRESENT))
- return -1;
-
- return readq(&dd->kregbase[regno]);
-}
-
-static inline void qib_write_kreg(const struct qib_devdata *dd,
- const u16 regno, u64 value)
-{
- if (dd->kregbase)
- writeq(value, &dd->kregbase[regno]);
-}
-
-void set_7220_relock_poll(struct qib_devdata *, int);
-void shutdown_7220_relock_poll(struct qib_devdata *);
-void toggle_7220_rclkrls(struct qib_devdata *);
-
-
-#endif /* _QIB_7220_H */
diff --git a/drivers/infiniband/hw/qib/qib_7220_regs.h b/drivers/infiniband/hw/qib/qib_7220_regs.h
deleted file mode 100644
index 0da5bb750e52..000000000000
--- a/drivers/infiniband/hw/qib/qib_7220_regs.h
+++ /dev/null
@@ -1,1496 +0,0 @@
-/*
- * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved.
- *
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-/* This file is mechanically generated from RTL. Any hand-edits will be lost! */
-
-#define QIB_7220_Revision_OFFS 0x0
-#define QIB_7220_Revision_R_Simulator_LSB 0x3F
-#define QIB_7220_Revision_R_Simulator_RMASK 0x1
-#define QIB_7220_Revision_R_Emulation_LSB 0x3E
-#define QIB_7220_Revision_R_Emulation_RMASK 0x1
-#define QIB_7220_Revision_R_Emulation_Revcode_LSB 0x28
-#define QIB_7220_Revision_R_Emulation_Revcode_RMASK 0x3FFFFF
-#define QIB_7220_Revision_BoardID_LSB 0x20
-#define QIB_7220_Revision_BoardID_RMASK 0xFF
-#define QIB_7220_Revision_R_SW_LSB 0x18
-#define QIB_7220_Revision_R_SW_RMASK 0xFF
-#define QIB_7220_Revision_R_Arch_LSB 0x10
-#define QIB_7220_Revision_R_Arch_RMASK 0xFF
-#define QIB_7220_Revision_R_ChipRevMajor_LSB 0x8
-#define QIB_7220_Revision_R_ChipRevMajor_RMASK 0xFF
-#define QIB_7220_Revision_R_ChipRevMinor_LSB 0x0
-#define QIB_7220_Revision_R_ChipRevMinor_RMASK 0xFF
-
-#define QIB_7220_Control_OFFS 0x8
-#define QIB_7220_Control_SyncResetExceptPcieIRAMRST_LSB 0x7
-#define QIB_7220_Control_SyncResetExceptPcieIRAMRST_RMASK 0x1
-#define QIB_7220_Control_PCIECplQDiagEn_LSB 0x6
-#define QIB_7220_Control_PCIECplQDiagEn_RMASK 0x1
-#define QIB_7220_Control_Reserved_LSB 0x5
-#define QIB_7220_Control_Reserved_RMASK 0x1
-#define QIB_7220_Control_TxLatency_LSB 0x4
-#define QIB_7220_Control_TxLatency_RMASK 0x1
-#define QIB_7220_Control_PCIERetryBufDiagEn_LSB 0x3
-#define QIB_7220_Control_PCIERetryBufDiagEn_RMASK 0x1
-#define QIB_7220_Control_LinkEn_LSB 0x2
-#define QIB_7220_Control_LinkEn_RMASK 0x1
-#define QIB_7220_Control_FreezeMode_LSB 0x1
-#define QIB_7220_Control_FreezeMode_RMASK 0x1
-#define QIB_7220_Control_SyncReset_LSB 0x0
-#define QIB_7220_Control_SyncReset_RMASK 0x1
-
-#define QIB_7220_PageAlign_OFFS 0x10
-
-#define QIB_7220_PortCnt_OFFS 0x18
-
-#define QIB_7220_SendRegBase_OFFS 0x30
-
-#define QIB_7220_UserRegBase_OFFS 0x38
-
-#define QIB_7220_CntrRegBase_OFFS 0x40
-
-#define QIB_7220_Scratch_OFFS 0x48
-
-#define QIB_7220_IntMask_OFFS 0x68
-#define QIB_7220_IntMask_SDmaIntMask_LSB 0x3F
-#define QIB_7220_IntMask_SDmaIntMask_RMASK 0x1
-#define QIB_7220_IntMask_SDmaDisabledMasked_LSB 0x3E
-#define QIB_7220_IntMask_SDmaDisabledMasked_RMASK 0x1
-#define QIB_7220_IntMask_Reserved_LSB 0x31
-#define QIB_7220_IntMask_Reserved_RMASK 0x1FFF
-#define QIB_7220_IntMask_RcvUrg16IntMask_LSB 0x30
-#define QIB_7220_IntMask_RcvUrg16IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg15IntMask_LSB 0x2F
-#define QIB_7220_IntMask_RcvUrg15IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg14IntMask_LSB 0x2E
-#define QIB_7220_IntMask_RcvUrg14IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg13IntMask_LSB 0x2D
-#define QIB_7220_IntMask_RcvUrg13IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg12IntMask_LSB 0x2C
-#define QIB_7220_IntMask_RcvUrg12IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg11IntMask_LSB 0x2B
-#define QIB_7220_IntMask_RcvUrg11IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg10IntMask_LSB 0x2A
-#define QIB_7220_IntMask_RcvUrg10IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg9IntMask_LSB 0x29
-#define QIB_7220_IntMask_RcvUrg9IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg8IntMask_LSB 0x28
-#define QIB_7220_IntMask_RcvUrg8IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg7IntMask_LSB 0x27
-#define QIB_7220_IntMask_RcvUrg7IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg6IntMask_LSB 0x26
-#define QIB_7220_IntMask_RcvUrg6IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg5IntMask_LSB 0x25
-#define QIB_7220_IntMask_RcvUrg5IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg4IntMask_LSB 0x24
-#define QIB_7220_IntMask_RcvUrg4IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg3IntMask_LSB 0x23
-#define QIB_7220_IntMask_RcvUrg3IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg2IntMask_LSB 0x22
-#define QIB_7220_IntMask_RcvUrg2IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg1IntMask_LSB 0x21
-#define QIB_7220_IntMask_RcvUrg1IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvUrg0IntMask_LSB 0x20
-#define QIB_7220_IntMask_RcvUrg0IntMask_RMASK 0x1
-#define QIB_7220_IntMask_ErrorIntMask_LSB 0x1F
-#define QIB_7220_IntMask_ErrorIntMask_RMASK 0x1
-#define QIB_7220_IntMask_PioSetIntMask_LSB 0x1E
-#define QIB_7220_IntMask_PioSetIntMask_RMASK 0x1
-#define QIB_7220_IntMask_PioBufAvailIntMask_LSB 0x1D
-#define QIB_7220_IntMask_PioBufAvailIntMask_RMASK 0x1
-#define QIB_7220_IntMask_assertGPIOIntMask_LSB 0x1C
-#define QIB_7220_IntMask_assertGPIOIntMask_RMASK 0x1
-#define QIB_7220_IntMask_IBSerdesTrimDoneIntMask_LSB 0x1B
-#define QIB_7220_IntMask_IBSerdesTrimDoneIntMask_RMASK 0x1
-#define QIB_7220_IntMask_JIntMask_LSB 0x1A
-#define QIB_7220_IntMask_JIntMask_RMASK 0x1
-#define QIB_7220_IntMask_Reserved1_LSB 0x11
-#define QIB_7220_IntMask_Reserved1_RMASK 0x1FF
-#define QIB_7220_IntMask_RcvAvail16IntMask_LSB 0x10
-#define QIB_7220_IntMask_RcvAvail16IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail15IntMask_LSB 0xF
-#define QIB_7220_IntMask_RcvAvail15IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail14IntMask_LSB 0xE
-#define QIB_7220_IntMask_RcvAvail14IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail13IntMask_LSB 0xD
-#define QIB_7220_IntMask_RcvAvail13IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail12IntMask_LSB 0xC
-#define QIB_7220_IntMask_RcvAvail12IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail11IntMask_LSB 0xB
-#define QIB_7220_IntMask_RcvAvail11IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail10IntMask_LSB 0xA
-#define QIB_7220_IntMask_RcvAvail10IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail9IntMask_LSB 0x9
-#define QIB_7220_IntMask_RcvAvail9IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail8IntMask_LSB 0x8
-#define QIB_7220_IntMask_RcvAvail8IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail7IntMask_LSB 0x7
-#define QIB_7220_IntMask_RcvAvail7IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail6IntMask_LSB 0x6
-#define QIB_7220_IntMask_RcvAvail6IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail5IntMask_LSB 0x5
-#define QIB_7220_IntMask_RcvAvail5IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail4IntMask_LSB 0x4
-#define QIB_7220_IntMask_RcvAvail4IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail3IntMask_LSB 0x3
-#define QIB_7220_IntMask_RcvAvail3IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail2IntMask_LSB 0x2
-#define QIB_7220_IntMask_RcvAvail2IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail1IntMask_LSB 0x1
-#define QIB_7220_IntMask_RcvAvail1IntMask_RMASK 0x1
-#define QIB_7220_IntMask_RcvAvail0IntMask_LSB 0x0
-#define QIB_7220_IntMask_RcvAvail0IntMask_RMASK 0x1
-
-#define QIB_7220_IntStatus_OFFS 0x70
-#define QIB_7220_IntStatus_SDmaInt_LSB 0x3F
-#define QIB_7220_IntStatus_SDmaInt_RMASK 0x1
-#define QIB_7220_IntStatus_SDmaDisabled_LSB 0x3E
-#define QIB_7220_IntStatus_SDmaDisabled_RMASK 0x1
-#define QIB_7220_IntStatus_Reserved_LSB 0x31
-#define QIB_7220_IntStatus_Reserved_RMASK 0x1FFF
-#define QIB_7220_IntStatus_RcvUrg16_LSB 0x30
-#define QIB_7220_IntStatus_RcvUrg16_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg15_LSB 0x2F
-#define QIB_7220_IntStatus_RcvUrg15_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg14_LSB 0x2E
-#define QIB_7220_IntStatus_RcvUrg14_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg13_LSB 0x2D
-#define QIB_7220_IntStatus_RcvUrg13_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg12_LSB 0x2C
-#define QIB_7220_IntStatus_RcvUrg12_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg11_LSB 0x2B
-#define QIB_7220_IntStatus_RcvUrg11_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg10_LSB 0x2A
-#define QIB_7220_IntStatus_RcvUrg10_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg9_LSB 0x29
-#define QIB_7220_IntStatus_RcvUrg9_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg8_LSB 0x28
-#define QIB_7220_IntStatus_RcvUrg8_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg7_LSB 0x27
-#define QIB_7220_IntStatus_RcvUrg7_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg6_LSB 0x26
-#define QIB_7220_IntStatus_RcvUrg6_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg5_LSB 0x25
-#define QIB_7220_IntStatus_RcvUrg5_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg4_LSB 0x24
-#define QIB_7220_IntStatus_RcvUrg4_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg3_LSB 0x23
-#define QIB_7220_IntStatus_RcvUrg3_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg2_LSB 0x22
-#define QIB_7220_IntStatus_RcvUrg2_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg1_LSB 0x21
-#define QIB_7220_IntStatus_RcvUrg1_RMASK 0x1
-#define QIB_7220_IntStatus_RcvUrg0_LSB 0x20
-#define QIB_7220_IntStatus_RcvUrg0_RMASK 0x1
-#define QIB_7220_IntStatus_Error_LSB 0x1F
-#define QIB_7220_IntStatus_Error_RMASK 0x1
-#define QIB_7220_IntStatus_PioSent_LSB 0x1E
-#define QIB_7220_IntStatus_PioSent_RMASK 0x1
-#define QIB_7220_IntStatus_PioBufAvail_LSB 0x1D
-#define QIB_7220_IntStatus_PioBufAvail_RMASK 0x1
-#define QIB_7220_IntStatus_assertGPIO_LSB 0x1C
-#define QIB_7220_IntStatus_assertGPIO_RMASK 0x1
-#define QIB_7220_IntStatus_IBSerdesTrimDone_LSB 0x1B
-#define QIB_7220_IntStatus_IBSerdesTrimDone_RMASK 0x1
-#define QIB_7220_IntStatus_JInt_LSB 0x1A
-#define QIB_7220_IntStatus_JInt_RMASK 0x1
-#define QIB_7220_IntStatus_Reserved1_LSB 0x11
-#define QIB_7220_IntStatus_Reserved1_RMASK 0x1FF
-#define QIB_7220_IntStatus_RcvAvail16_LSB 0x10
-#define QIB_7220_IntStatus_RcvAvail16_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail15_LSB 0xF
-#define QIB_7220_IntStatus_RcvAvail15_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail14_LSB 0xE
-#define QIB_7220_IntStatus_RcvAvail14_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail13_LSB 0xD
-#define QIB_7220_IntStatus_RcvAvail13_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail12_LSB 0xC
-#define QIB_7220_IntStatus_RcvAvail12_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail11_LSB 0xB
-#define QIB_7220_IntStatus_RcvAvail11_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail10_LSB 0xA
-#define QIB_7220_IntStatus_RcvAvail10_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail9_LSB 0x9
-#define QIB_7220_IntStatus_RcvAvail9_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail8_LSB 0x8
-#define QIB_7220_IntStatus_RcvAvail8_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail7_LSB 0x7
-#define QIB_7220_IntStatus_RcvAvail7_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail6_LSB 0x6
-#define QIB_7220_IntStatus_RcvAvail6_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail5_LSB 0x5
-#define QIB_7220_IntStatus_RcvAvail5_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail4_LSB 0x4
-#define QIB_7220_IntStatus_RcvAvail4_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail3_LSB 0x3
-#define QIB_7220_IntStatus_RcvAvail3_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail2_LSB 0x2
-#define QIB_7220_IntStatus_RcvAvail2_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail1_LSB 0x1
-#define QIB_7220_IntStatus_RcvAvail1_RMASK 0x1
-#define QIB_7220_IntStatus_RcvAvail0_LSB 0x0
-#define QIB_7220_IntStatus_RcvAvail0_RMASK 0x1
-
-#define QIB_7220_IntClear_OFFS 0x78
-#define QIB_7220_IntClear_SDmaIntClear_LSB 0x3F
-#define QIB_7220_IntClear_SDmaIntClear_RMASK 0x1
-#define QIB_7220_IntClear_SDmaDisabledClear_LSB 0x3E
-#define QIB_7220_IntClear_SDmaDisabledClear_RMASK 0x1
-#define QIB_7220_IntClear_Reserved_LSB 0x31
-#define QIB_7220_IntClear_Reserved_RMASK 0x1FFF
-#define QIB_7220_IntClear_RcvUrg16IntClear_LSB 0x30
-#define QIB_7220_IntClear_RcvUrg16IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg15IntClear_LSB 0x2F
-#define QIB_7220_IntClear_RcvUrg15IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg14IntClear_LSB 0x2E
-#define QIB_7220_IntClear_RcvUrg14IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg13IntClear_LSB 0x2D
-#define QIB_7220_IntClear_RcvUrg13IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg12IntClear_LSB 0x2C
-#define QIB_7220_IntClear_RcvUrg12IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg11IntClear_LSB 0x2B
-#define QIB_7220_IntClear_RcvUrg11IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg10IntClear_LSB 0x2A
-#define QIB_7220_IntClear_RcvUrg10IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg9IntClear_LSB 0x29
-#define QIB_7220_IntClear_RcvUrg9IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg8IntClear_LSB 0x28
-#define QIB_7220_IntClear_RcvUrg8IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg7IntClear_LSB 0x27
-#define QIB_7220_IntClear_RcvUrg7IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg6IntClear_LSB 0x26
-#define QIB_7220_IntClear_RcvUrg6IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg5IntClear_LSB 0x25
-#define QIB_7220_IntClear_RcvUrg5IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg4IntClear_LSB 0x24
-#define QIB_7220_IntClear_RcvUrg4IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg3IntClear_LSB 0x23
-#define QIB_7220_IntClear_RcvUrg3IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg2IntClear_LSB 0x22
-#define QIB_7220_IntClear_RcvUrg2IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg1IntClear_LSB 0x21
-#define QIB_7220_IntClear_RcvUrg1IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvUrg0IntClear_LSB 0x20
-#define QIB_7220_IntClear_RcvUrg0IntClear_RMASK 0x1
-#define QIB_7220_IntClear_ErrorIntClear_LSB 0x1F
-#define QIB_7220_IntClear_ErrorIntClear_RMASK 0x1
-#define QIB_7220_IntClear_PioSetIntClear_LSB 0x1E
-#define QIB_7220_IntClear_PioSetIntClear_RMASK 0x1
-#define QIB_7220_IntClear_PioBufAvailIntClear_LSB 0x1D
-#define QIB_7220_IntClear_PioBufAvailIntClear_RMASK 0x1
-#define QIB_7220_IntClear_assertGPIOIntClear_LSB 0x1C
-#define QIB_7220_IntClear_assertGPIOIntClear_RMASK 0x1
-#define QIB_7220_IntClear_IBSerdesTrimDoneClear_LSB 0x1B
-#define QIB_7220_IntClear_IBSerdesTrimDoneClear_RMASK 0x1
-#define QIB_7220_IntClear_JIntClear_LSB 0x1A
-#define QIB_7220_IntClear_JIntClear_RMASK 0x1
-#define QIB_7220_IntClear_Reserved1_LSB 0x11
-#define QIB_7220_IntClear_Reserved1_RMASK 0x1FF
-#define QIB_7220_IntClear_RcvAvail16IntClear_LSB 0x10
-#define QIB_7220_IntClear_RcvAvail16IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail15IntClear_LSB 0xF
-#define QIB_7220_IntClear_RcvAvail15IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail14IntClear_LSB 0xE
-#define QIB_7220_IntClear_RcvAvail14IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail13IntClear_LSB 0xD
-#define QIB_7220_IntClear_RcvAvail13IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail12IntClear_LSB 0xC
-#define QIB_7220_IntClear_RcvAvail12IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail11IntClear_LSB 0xB
-#define QIB_7220_IntClear_RcvAvail11IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail10IntClear_LSB 0xA
-#define QIB_7220_IntClear_RcvAvail10IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail9IntClear_LSB 0x9
-#define QIB_7220_IntClear_RcvAvail9IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail8IntClear_LSB 0x8
-#define QIB_7220_IntClear_RcvAvail8IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail7IntClear_LSB 0x7
-#define QIB_7220_IntClear_RcvAvail7IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail6IntClear_LSB 0x6
-#define QIB_7220_IntClear_RcvAvail6IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail5IntClear_LSB 0x5
-#define QIB_7220_IntClear_RcvAvail5IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail4IntClear_LSB 0x4
-#define QIB_7220_IntClear_RcvAvail4IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail3IntClear_LSB 0x3
-#define QIB_7220_IntClear_RcvAvail3IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail2IntClear_LSB 0x2
-#define QIB_7220_IntClear_RcvAvail2IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail1IntClear_LSB 0x1
-#define QIB_7220_IntClear_RcvAvail1IntClear_RMASK 0x1
-#define QIB_7220_IntClear_RcvAvail0IntClear_LSB 0x0
-#define QIB_7220_IntClear_RcvAvail0IntClear_RMASK 0x1
-
-#define QIB_7220_ErrMask_OFFS 0x80
-#define QIB_7220_ErrMask_Reserved_LSB 0x36
-#define QIB_7220_ErrMask_Reserved_RMASK 0x3FF
-#define QIB_7220_ErrMask_InvalidEEPCmdMask_LSB 0x35
-#define QIB_7220_ErrMask_InvalidEEPCmdMask_RMASK 0x1
-#define QIB_7220_ErrMask_SDmaDescAddrMisalignErrMask_LSB 0x34
-#define QIB_7220_ErrMask_SDmaDescAddrMisalignErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_HardwareErrMask_LSB 0x33
-#define QIB_7220_ErrMask_HardwareErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_ResetNegatedMask_LSB 0x32
-#define QIB_7220_ErrMask_ResetNegatedMask_RMASK 0x1
-#define QIB_7220_ErrMask_InvalidAddrErrMask_LSB 0x31
-#define QIB_7220_ErrMask_InvalidAddrErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_IBStatusChangedMask_LSB 0x30
-#define QIB_7220_ErrMask_IBStatusChangedMask_RMASK 0x1
-#define QIB_7220_ErrMask_SDmaUnexpDataErrMask_LSB 0x2F
-#define QIB_7220_ErrMask_SDmaUnexpDataErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SDmaMissingDwErrMask_LSB 0x2E
-#define QIB_7220_ErrMask_SDmaMissingDwErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SDmaDwEnErrMask_LSB 0x2D
-#define QIB_7220_ErrMask_SDmaDwEnErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SDmaRpyTagErrMask_LSB 0x2C
-#define QIB_7220_ErrMask_SDmaRpyTagErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SDma1stDescErrMask_LSB 0x2B
-#define QIB_7220_ErrMask_SDma1stDescErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SDmaBaseErrMask_LSB 0x2A
-#define QIB_7220_ErrMask_SDmaBaseErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SDmaTailOutOfBoundErrMask_LSB 0x29
-#define QIB_7220_ErrMask_SDmaTailOutOfBoundErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SDmaOutOfBoundErrMask_LSB 0x28
-#define QIB_7220_ErrMask_SDmaOutOfBoundErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SDmaGenMismatchErrMask_LSB 0x27
-#define QIB_7220_ErrMask_SDmaGenMismatchErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SendBufMisuseErrMask_LSB 0x26
-#define QIB_7220_ErrMask_SendBufMisuseErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SendUnsupportedVLErrMask_LSB 0x25
-#define QIB_7220_ErrMask_SendUnsupportedVLErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SendUnexpectedPktNumErrMask_LSB 0x24
-#define QIB_7220_ErrMask_SendUnexpectedPktNumErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SendPioArmLaunchErrMask_LSB 0x23
-#define QIB_7220_ErrMask_SendPioArmLaunchErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SendDroppedDataPktErrMask_LSB 0x22
-#define QIB_7220_ErrMask_SendDroppedDataPktErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SendDroppedSmpPktErrMask_LSB 0x21
-#define QIB_7220_ErrMask_SendDroppedSmpPktErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SendPktLenErrMask_LSB 0x20
-#define QIB_7220_ErrMask_SendPktLenErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SendUnderRunErrMask_LSB 0x1F
-#define QIB_7220_ErrMask_SendUnderRunErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SendMaxPktLenErrMask_LSB 0x1E
-#define QIB_7220_ErrMask_SendMaxPktLenErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SendMinPktLenErrMask_LSB 0x1D
-#define QIB_7220_ErrMask_SendMinPktLenErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SDmaDisabledErrMask_LSB 0x1C
-#define QIB_7220_ErrMask_SDmaDisabledErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_SendSpecialTriggerErrMask_LSB 0x1B
-#define QIB_7220_ErrMask_SendSpecialTriggerErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_Reserved1_LSB 0x12
-#define QIB_7220_ErrMask_Reserved1_RMASK 0x1FF
-#define QIB_7220_ErrMask_RcvIBLostLinkErrMask_LSB 0x11
-#define QIB_7220_ErrMask_RcvIBLostLinkErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvHdrErrMask_LSB 0x10
-#define QIB_7220_ErrMask_RcvHdrErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvHdrLenErrMask_LSB 0xF
-#define QIB_7220_ErrMask_RcvHdrLenErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvBadTidErrMask_LSB 0xE
-#define QIB_7220_ErrMask_RcvBadTidErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvHdrFullErrMask_LSB 0xD
-#define QIB_7220_ErrMask_RcvHdrFullErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvEgrFullErrMask_LSB 0xC
-#define QIB_7220_ErrMask_RcvEgrFullErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvBadVersionErrMask_LSB 0xB
-#define QIB_7220_ErrMask_RcvBadVersionErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvIBFlowErrMask_LSB 0xA
-#define QIB_7220_ErrMask_RcvIBFlowErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvEBPErrMask_LSB 0x9
-#define QIB_7220_ErrMask_RcvEBPErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvUnsupportedVLErrMask_LSB 0x8
-#define QIB_7220_ErrMask_RcvUnsupportedVLErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvUnexpectedCharErrMask_LSB 0x7
-#define QIB_7220_ErrMask_RcvUnexpectedCharErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvShortPktLenErrMask_LSB 0x6
-#define QIB_7220_ErrMask_RcvShortPktLenErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvLongPktLenErrMask_LSB 0x5
-#define QIB_7220_ErrMask_RcvLongPktLenErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvMaxPktLenErrMask_LSB 0x4
-#define QIB_7220_ErrMask_RcvMaxPktLenErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvMinPktLenErrMask_LSB 0x3
-#define QIB_7220_ErrMask_RcvMinPktLenErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvICRCErrMask_LSB 0x2
-#define QIB_7220_ErrMask_RcvICRCErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvVCRCErrMask_LSB 0x1
-#define QIB_7220_ErrMask_RcvVCRCErrMask_RMASK 0x1
-#define QIB_7220_ErrMask_RcvFormatErrMask_LSB 0x0
-#define QIB_7220_ErrMask_RcvFormatErrMask_RMASK 0x1
-
-#define QIB_7220_ErrStatus_OFFS 0x88
-#define QIB_7220_ErrStatus_Reserved_LSB 0x36
-#define QIB_7220_ErrStatus_Reserved_RMASK 0x3FF
-#define QIB_7220_ErrStatus_InvalidEEPCmdErr_LSB 0x35
-#define QIB_7220_ErrStatus_InvalidEEPCmdErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SDmaDescAddrMisalignErr_LSB 0x34
-#define QIB_7220_ErrStatus_SDmaDescAddrMisalignErr_RMASK 0x1
-#define QIB_7220_ErrStatus_HardwareErr_LSB 0x33
-#define QIB_7220_ErrStatus_HardwareErr_RMASK 0x1
-#define QIB_7220_ErrStatus_ResetNegated_LSB 0x32
-#define QIB_7220_ErrStatus_ResetNegated_RMASK 0x1
-#define QIB_7220_ErrStatus_InvalidAddrErr_LSB 0x31
-#define QIB_7220_ErrStatus_InvalidAddrErr_RMASK 0x1
-#define QIB_7220_ErrStatus_IBStatusChanged_LSB 0x30
-#define QIB_7220_ErrStatus_IBStatusChanged_RMASK 0x1
-#define QIB_7220_ErrStatus_SDmaUnexpDataErr_LSB 0x2F
-#define QIB_7220_ErrStatus_SDmaUnexpDataErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SDmaMissingDwErr_LSB 0x2E
-#define QIB_7220_ErrStatus_SDmaMissingDwErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SDmaDwEnErr_LSB 0x2D
-#define QIB_7220_ErrStatus_SDmaDwEnErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SDmaRpyTagErr_LSB 0x2C
-#define QIB_7220_ErrStatus_SDmaRpyTagErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SDma1stDescErr_LSB 0x2B
-#define QIB_7220_ErrStatus_SDma1stDescErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SDmaBaseErr_LSB 0x2A
-#define QIB_7220_ErrStatus_SDmaBaseErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SDmaTailOutOfBoundErr_LSB 0x29
-#define QIB_7220_ErrStatus_SDmaTailOutOfBoundErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SDmaOutOfBoundErr_LSB 0x28
-#define QIB_7220_ErrStatus_SDmaOutOfBoundErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SDmaGenMismatchErr_LSB 0x27
-#define QIB_7220_ErrStatus_SDmaGenMismatchErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SendBufMisuseErr_LSB 0x26
-#define QIB_7220_ErrStatus_SendBufMisuseErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SendUnsupportedVLErr_LSB 0x25
-#define QIB_7220_ErrStatus_SendUnsupportedVLErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SendUnexpectedPktNumErr_LSB 0x24
-#define QIB_7220_ErrStatus_SendUnexpectedPktNumErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SendPioArmLaunchErr_LSB 0x23
-#define QIB_7220_ErrStatus_SendPioArmLaunchErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SendDroppedDataPktErr_LSB 0x22
-#define QIB_7220_ErrStatus_SendDroppedDataPktErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SendDroppedSmpPktErr_LSB 0x21
-#define QIB_7220_ErrStatus_SendDroppedSmpPktErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SendPktLenErr_LSB 0x20
-#define QIB_7220_ErrStatus_SendPktLenErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SendUnderRunErr_LSB 0x1F
-#define QIB_7220_ErrStatus_SendUnderRunErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SendMaxPktLenErr_LSB 0x1E
-#define QIB_7220_ErrStatus_SendMaxPktLenErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SendMinPktLenErr_LSB 0x1D
-#define QIB_7220_ErrStatus_SendMinPktLenErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SDmaDisabledErr_LSB 0x1C
-#define QIB_7220_ErrStatus_SDmaDisabledErr_RMASK 0x1
-#define QIB_7220_ErrStatus_SendSpecialTriggerErr_LSB 0x1B
-#define QIB_7220_ErrStatus_SendSpecialTriggerErr_RMASK 0x1
-#define QIB_7220_ErrStatus_Reserved1_LSB 0x12
-#define QIB_7220_ErrStatus_Reserved1_RMASK 0x1FF
-#define QIB_7220_ErrStatus_RcvIBLostLinkErr_LSB 0x11
-#define QIB_7220_ErrStatus_RcvIBLostLinkErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvHdrErr_LSB 0x10
-#define QIB_7220_ErrStatus_RcvHdrErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvHdrLenErr_LSB 0xF
-#define QIB_7220_ErrStatus_RcvHdrLenErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvBadTidErr_LSB 0xE
-#define QIB_7220_ErrStatus_RcvBadTidErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvHdrFullErr_LSB 0xD
-#define QIB_7220_ErrStatus_RcvHdrFullErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvEgrFullErr_LSB 0xC
-#define QIB_7220_ErrStatus_RcvEgrFullErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvBadVersionErr_LSB 0xB
-#define QIB_7220_ErrStatus_RcvBadVersionErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvIBFlowErr_LSB 0xA
-#define QIB_7220_ErrStatus_RcvIBFlowErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvEBPErr_LSB 0x9
-#define QIB_7220_ErrStatus_RcvEBPErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvUnsupportedVLErr_LSB 0x8
-#define QIB_7220_ErrStatus_RcvUnsupportedVLErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvUnexpectedCharErr_LSB 0x7
-#define QIB_7220_ErrStatus_RcvUnexpectedCharErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvShortPktLenErr_LSB 0x6
-#define QIB_7220_ErrStatus_RcvShortPktLenErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvLongPktLenErr_LSB 0x5
-#define QIB_7220_ErrStatus_RcvLongPktLenErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvMaxPktLenErr_LSB 0x4
-#define QIB_7220_ErrStatus_RcvMaxPktLenErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvMinPktLenErr_LSB 0x3
-#define QIB_7220_ErrStatus_RcvMinPktLenErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvICRCErr_LSB 0x2
-#define QIB_7220_ErrStatus_RcvICRCErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvVCRCErr_LSB 0x1
-#define QIB_7220_ErrStatus_RcvVCRCErr_RMASK 0x1
-#define QIB_7220_ErrStatus_RcvFormatErr_LSB 0x0
-#define QIB_7220_ErrStatus_RcvFormatErr_RMASK 0x1
-
-#define QIB_7220_ErrClear_OFFS 0x90
-#define QIB_7220_ErrClear_Reserved_LSB 0x36
-#define QIB_7220_ErrClear_Reserved_RMASK 0x3FF
-#define QIB_7220_ErrClear_InvalidEEPCmdErrClear_LSB 0x35
-#define QIB_7220_ErrClear_InvalidEEPCmdErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SDmaDescAddrMisalignErrClear_LSB 0x34
-#define QIB_7220_ErrClear_SDmaDescAddrMisalignErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_HardwareErrClear_LSB 0x33
-#define QIB_7220_ErrClear_HardwareErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_ResetNegatedClear_LSB 0x32
-#define QIB_7220_ErrClear_ResetNegatedClear_RMASK 0x1
-#define QIB_7220_ErrClear_InvalidAddrErrClear_LSB 0x31
-#define QIB_7220_ErrClear_InvalidAddrErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_IBStatusChangedClear_LSB 0x30
-#define QIB_7220_ErrClear_IBStatusChangedClear_RMASK 0x1
-#define QIB_7220_ErrClear_SDmaUnexpDataErrClear_LSB 0x2F
-#define QIB_7220_ErrClear_SDmaUnexpDataErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SDmaMissingDwErrClear_LSB 0x2E
-#define QIB_7220_ErrClear_SDmaMissingDwErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SDmaDwEnErrClear_LSB 0x2D
-#define QIB_7220_ErrClear_SDmaDwEnErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SDmaRpyTagErrClear_LSB 0x2C
-#define QIB_7220_ErrClear_SDmaRpyTagErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SDma1stDescErrClear_LSB 0x2B
-#define QIB_7220_ErrClear_SDma1stDescErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SDmaBaseErrClear_LSB 0x2A
-#define QIB_7220_ErrClear_SDmaBaseErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SDmaTailOutOfBoundErrClear_LSB 0x29
-#define QIB_7220_ErrClear_SDmaTailOutOfBoundErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SDmaOutOfBoundErrClear_LSB 0x28
-#define QIB_7220_ErrClear_SDmaOutOfBoundErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SDmaGenMismatchErrClear_LSB 0x27
-#define QIB_7220_ErrClear_SDmaGenMismatchErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SendBufMisuseErrClear_LSB 0x26
-#define QIB_7220_ErrClear_SendBufMisuseErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SendUnsupportedVLErrClear_LSB 0x25
-#define QIB_7220_ErrClear_SendUnsupportedVLErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SendUnexpectedPktNumErrClear_LSB 0x24
-#define QIB_7220_ErrClear_SendUnexpectedPktNumErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SendPioArmLaunchErrClear_LSB 0x23
-#define QIB_7220_ErrClear_SendPioArmLaunchErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SendDroppedDataPktErrClear_LSB 0x22
-#define QIB_7220_ErrClear_SendDroppedDataPktErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SendDroppedSmpPktErrClear_LSB 0x21
-#define QIB_7220_ErrClear_SendDroppedSmpPktErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SendPktLenErrClear_LSB 0x20
-#define QIB_7220_ErrClear_SendPktLenErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SendUnderRunErrClear_LSB 0x1F
-#define QIB_7220_ErrClear_SendUnderRunErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SendMaxPktLenErrClear_LSB 0x1E
-#define QIB_7220_ErrClear_SendMaxPktLenErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SendMinPktLenErrClear_LSB 0x1D
-#define QIB_7220_ErrClear_SendMinPktLenErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SDmaDisabledErrClear_LSB 0x1C
-#define QIB_7220_ErrClear_SDmaDisabledErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_SendSpecialTriggerErrClear_LSB 0x1B
-#define QIB_7220_ErrClear_SendSpecialTriggerErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_Reserved1_LSB 0x12
-#define QIB_7220_ErrClear_Reserved1_RMASK 0x1FF
-#define QIB_7220_ErrClear_RcvIBLostLinkErrClear_LSB 0x11
-#define QIB_7220_ErrClear_RcvIBLostLinkErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvHdrErrClear_LSB 0x10
-#define QIB_7220_ErrClear_RcvHdrErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvHdrLenErrClear_LSB 0xF
-#define QIB_7220_ErrClear_RcvHdrLenErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvBadTidErrClear_LSB 0xE
-#define QIB_7220_ErrClear_RcvBadTidErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvHdrFullErrClear_LSB 0xD
-#define QIB_7220_ErrClear_RcvHdrFullErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvEgrFullErrClear_LSB 0xC
-#define QIB_7220_ErrClear_RcvEgrFullErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvBadVersionErrClear_LSB 0xB
-#define QIB_7220_ErrClear_RcvBadVersionErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvIBFlowErrClear_LSB 0xA
-#define QIB_7220_ErrClear_RcvIBFlowErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvEBPErrClear_LSB 0x9
-#define QIB_7220_ErrClear_RcvEBPErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvUnsupportedVLErrClear_LSB 0x8
-#define QIB_7220_ErrClear_RcvUnsupportedVLErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvUnexpectedCharErrClear_LSB 0x7
-#define QIB_7220_ErrClear_RcvUnexpectedCharErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvShortPktLenErrClear_LSB 0x6
-#define QIB_7220_ErrClear_RcvShortPktLenErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvLongPktLenErrClear_LSB 0x5
-#define QIB_7220_ErrClear_RcvLongPktLenErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvMaxPktLenErrClear_LSB 0x4
-#define QIB_7220_ErrClear_RcvMaxPktLenErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvMinPktLenErrClear_LSB 0x3
-#define QIB_7220_ErrClear_RcvMinPktLenErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvICRCErrClear_LSB 0x2
-#define QIB_7220_ErrClear_RcvICRCErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvVCRCErrClear_LSB 0x1
-#define QIB_7220_ErrClear_RcvVCRCErrClear_RMASK 0x1
-#define QIB_7220_ErrClear_RcvFormatErrClear_LSB 0x0
-#define QIB_7220_ErrClear_RcvFormatErrClear_RMASK 0x1
-
-#define QIB_7220_HwErrMask_OFFS 0x98
-#define QIB_7220_HwErrMask_IBCBusFromSPCParityErrMask_LSB 0x3F
-#define QIB_7220_HwErrMask_IBCBusFromSPCParityErrMask_RMASK 0x1
-#define QIB_7220_HwErrMask_IBCBusToSPCParityErrMask_LSB 0x3E
-#define QIB_7220_HwErrMask_IBCBusToSPCParityErrMask_RMASK 0x1
-#define QIB_7220_HwErrMask_Clk_uC_PLLNotLockedMask_LSB 0x3D
-#define QIB_7220_HwErrMask_Clk_uC_PLLNotLockedMask_RMASK 0x1
-#define QIB_7220_HwErrMask_IBSerdesPClkNotDetectMask_LSB 0x3C
-#define QIB_7220_HwErrMask_IBSerdesPClkNotDetectMask_RMASK 0x1
-#define QIB_7220_HwErrMask_PCIESerdesQ3PClkNotDetectMask_LSB 0x3B
-#define QIB_7220_HwErrMask_PCIESerdesQ3PClkNotDetectMask_RMASK 0x1
-#define QIB_7220_HwErrMask_PCIESerdesQ2PClkNotDetectMask_LSB 0x3A
-#define QIB_7220_HwErrMask_PCIESerdesQ2PClkNotDetectMask_RMASK 0x1
-#define QIB_7220_HwErrMask_PCIESerdesQ1PClkNotDetectMask_LSB 0x39
-#define QIB_7220_HwErrMask_PCIESerdesQ1PClkNotDetectMask_RMASK 0x1
-#define QIB_7220_HwErrMask_PCIESerdesQ0PClkNotDetectMask_LSB 0x38
-#define QIB_7220_HwErrMask_PCIESerdesQ0PClkNotDetectMask_RMASK 0x1
-#define QIB_7220_HwErrMask_Reserved_LSB 0x37
-#define QIB_7220_HwErrMask_Reserved_RMASK 0x1
-#define QIB_7220_HwErrMask_PowerOnBISTFailedMask_LSB 0x36
-#define QIB_7220_HwErrMask_PowerOnBISTFailedMask_RMASK 0x1
-#define QIB_7220_HwErrMask_Reserved1_LSB 0x33
-#define QIB_7220_HwErrMask_Reserved1_RMASK 0x7
-#define QIB_7220_HwErrMask_RXEMemParityErrMask_LSB 0x2C
-#define QIB_7220_HwErrMask_RXEMemParityErrMask_RMASK 0x7F
-#define QIB_7220_HwErrMask_TXEMemParityErrMask_LSB 0x28
-#define QIB_7220_HwErrMask_TXEMemParityErrMask_RMASK 0xF
-#define QIB_7220_HwErrMask_DDSRXEQMemoryParityErrMask_LSB 0x27
-#define QIB_7220_HwErrMask_DDSRXEQMemoryParityErrMask_RMASK 0x1
-#define QIB_7220_HwErrMask_IB_uC_MemoryParityErrMask_LSB 0x26
-#define QIB_7220_HwErrMask_IB_uC_MemoryParityErrMask_RMASK 0x1
-#define QIB_7220_HwErrMask_PCIEOct1_uC_MemoryParityErrMask_LSB 0x25
-#define QIB_7220_HwErrMask_PCIEOct1_uC_MemoryParityErrMask_RMASK 0x1
-#define QIB_7220_HwErrMask_PCIEOct0_uC_MemoryParityErrMask_LSB 0x24
-#define QIB_7220_HwErrMask_PCIEOct0_uC_MemoryParityErrMask_RMASK 0x1
-#define QIB_7220_HwErrMask_Reserved2_LSB 0x22
-#define QIB_7220_HwErrMask_Reserved2_RMASK 0x3
-#define QIB_7220_HwErrMask_PCIeBusParityErrMask_LSB 0x1F
-#define QIB_7220_HwErrMask_PCIeBusParityErrMask_RMASK 0x7
-#define QIB_7220_HwErrMask_PcieCplTimeoutMask_LSB 0x1E
-#define QIB_7220_HwErrMask_PcieCplTimeoutMask_RMASK 0x1
-#define QIB_7220_HwErrMask_PoisonedTLPMask_LSB 0x1D
-#define QIB_7220_HwErrMask_PoisonedTLPMask_RMASK 0x1
-#define QIB_7220_HwErrMask_SDmaMemReadErrMask_LSB 0x1C
-#define QIB_7220_HwErrMask_SDmaMemReadErrMask_RMASK 0x1
-#define QIB_7220_HwErrMask_Reserved3_LSB 0x8
-#define QIB_7220_HwErrMask_Reserved3_RMASK 0xFFFFF
-#define QIB_7220_HwErrMask_PCIeMemParityErrMask_LSB 0x0
-#define QIB_7220_HwErrMask_PCIeMemParityErrMask_RMASK 0xFF
-
-#define QIB_7220_HwErrStatus_OFFS 0xA0
-#define QIB_7220_HwErrStatus_IBCBusFromSPCParityErr_LSB 0x3F
-#define QIB_7220_HwErrStatus_IBCBusFromSPCParityErr_RMASK 0x1
-#define QIB_7220_HwErrStatus_IBCBusToSPCParityErr_LSB 0x3E
-#define QIB_7220_HwErrStatus_IBCBusToSPCParityErr_RMASK 0x1
-#define QIB_7220_HwErrStatus_Clk_uC_PLLNotLocked_LSB 0x3D
-#define QIB_7220_HwErrStatus_Clk_uC_PLLNotLocked_RMASK 0x1
-#define QIB_7220_HwErrStatus_IBSerdesPClkNotDetect_LSB 0x3C
-#define QIB_7220_HwErrStatus_IBSerdesPClkNotDetect_RMASK 0x1
-#define QIB_7220_HwErrStatus_PCIESerdesQ3PClkNotDetect_LSB 0x3B
-#define QIB_7220_HwErrStatus_PCIESerdesQ3PClkNotDetect_RMASK 0x1
-#define QIB_7220_HwErrStatus_PCIESerdesQ2PClkNotDetect_LSB 0x3A
-#define QIB_7220_HwErrStatus_PCIESerdesQ2PClkNotDetect_RMASK 0x1
-#define QIB_7220_HwErrStatus_PCIESerdesQ1PClkNotDetect_LSB 0x39
-#define QIB_7220_HwErrStatus_PCIESerdesQ1PClkNotDetect_RMASK 0x1
-#define QIB_7220_HwErrStatus_PCIESerdesQ0PClkNotDetect_LSB 0x38
-#define QIB_7220_HwErrStatus_PCIESerdesQ0PClkNotDetect_RMASK 0x1
-#define QIB_7220_HwErrStatus_Reserved_LSB 0x37
-#define QIB_7220_HwErrStatus_Reserved_RMASK 0x1
-#define QIB_7220_HwErrStatus_PowerOnBISTFailed_LSB 0x36
-#define QIB_7220_HwErrStatus_PowerOnBISTFailed_RMASK 0x1
-#define QIB_7220_HwErrStatus_Reserved1_LSB 0x33
-#define QIB_7220_HwErrStatus_Reserved1_RMASK 0x7
-#define QIB_7220_HwErrStatus_RXEMemParity_LSB 0x2C
-#define QIB_7220_HwErrStatus_RXEMemParity_RMASK 0x7F
-#define QIB_7220_HwErrStatus_TXEMemParity_LSB 0x28
-#define QIB_7220_HwErrStatus_TXEMemParity_RMASK 0xF
-#define QIB_7220_HwErrStatus_DDSRXEQMemoryParityErr_LSB 0x27
-#define QIB_7220_HwErrStatus_DDSRXEQMemoryParityErr_RMASK 0x1
-#define QIB_7220_HwErrStatus_IB_uC_MemoryParityErr_LSB 0x26
-#define QIB_7220_HwErrStatus_IB_uC_MemoryParityErr_RMASK 0x1
-#define QIB_7220_HwErrStatus_PCIE_uC_Oct1MemoryParityErr_LSB 0x25
-#define QIB_7220_HwErrStatus_PCIE_uC_Oct1MemoryParityErr_RMASK 0x1
-#define QIB_7220_HwErrStatus_PCIE_uC_Oct0MemoryParityErr_LSB 0x24
-#define QIB_7220_HwErrStatus_PCIE_uC_Oct0MemoryParityErr_RMASK 0x1
-#define QIB_7220_HwErrStatus_Reserved2_LSB 0x22
-#define QIB_7220_HwErrStatus_Reserved2_RMASK 0x3
-#define QIB_7220_HwErrStatus_PCIeBusParity_LSB 0x1F
-#define QIB_7220_HwErrStatus_PCIeBusParity_RMASK 0x7
-#define QIB_7220_HwErrStatus_PcieCplTimeout_LSB 0x1E
-#define QIB_7220_HwErrStatus_PcieCplTimeout_RMASK 0x1
-#define QIB_7220_HwErrStatus_PoisenedTLP_LSB 0x1D
-#define QIB_7220_HwErrStatus_PoisenedTLP_RMASK 0x1
-#define QIB_7220_HwErrStatus_SDmaMemReadErr_LSB 0x1C
-#define QIB_7220_HwErrStatus_SDmaMemReadErr_RMASK 0x1
-#define QIB_7220_HwErrStatus_Reserved3_LSB 0x8
-#define QIB_7220_HwErrStatus_Reserved3_RMASK 0xFFFFF
-#define QIB_7220_HwErrStatus_PCIeMemParity_LSB 0x0
-#define QIB_7220_HwErrStatus_PCIeMemParity_RMASK 0xFF
-
-#define QIB_7220_HwErrClear_OFFS 0xA8
-#define QIB_7220_HwErrClear_IBCBusFromSPCParityErrClear_LSB 0x3F
-#define QIB_7220_HwErrClear_IBCBusFromSPCParityErrClear_RMASK 0x1
-#define QIB_7220_HwErrClear_IBCBusToSPCparityErrClear_LSB 0x3E
-#define QIB_7220_HwErrClear_IBCBusToSPCparityErrClear_RMASK 0x1
-#define QIB_7220_HwErrClear_Clk_uC_PLLNotLockedClear_LSB 0x3D
-#define QIB_7220_HwErrClear_Clk_uC_PLLNotLockedClear_RMASK 0x1
-#define QIB_7220_HwErrClear_IBSerdesPClkNotDetectClear_LSB 0x3C
-#define QIB_7220_HwErrClear_IBSerdesPClkNotDetectClear_RMASK 0x1
-#define QIB_7220_HwErrClear_PCIESerdesQ3PClkNotDetectClear_LSB 0x3B
-#define QIB_7220_HwErrClear_PCIESerdesQ3PClkNotDetectClear_RMASK 0x1
-#define QIB_7220_HwErrClear_PCIESerdesQ2PClkNotDetectClear_LSB 0x3A
-#define QIB_7220_HwErrClear_PCIESerdesQ2PClkNotDetectClear_RMASK 0x1
-#define QIB_7220_HwErrClear_PCIESerdesQ1PClkNotDetectClear_LSB 0x39
-#define QIB_7220_HwErrClear_PCIESerdesQ1PClkNotDetectClear_RMASK 0x1
-#define QIB_7220_HwErrClear_PCIESerdesQ0PClkNotDetectClear_LSB 0x38
-#define QIB_7220_HwErrClear_PCIESerdesQ0PClkNotDetectClear_RMASK 0x1
-#define QIB_7220_HwErrClear_Reserved_LSB 0x37
-#define QIB_7220_HwErrClear_Reserved_RMASK 0x1
-#define QIB_7220_HwErrClear_PowerOnBISTFailedClear_LSB 0x36
-#define QIB_7220_HwErrClear_PowerOnBISTFailedClear_RMASK 0x1
-#define QIB_7220_HwErrClear_Reserved1_LSB 0x33
-#define QIB_7220_HwErrClear_Reserved1_RMASK 0x7
-#define QIB_7220_HwErrClear_RXEMemParityClear_LSB 0x2C
-#define QIB_7220_HwErrClear_RXEMemParityClear_RMASK 0x7F
-#define QIB_7220_HwErrClear_TXEMemParityClear_LSB 0x28
-#define QIB_7220_HwErrClear_TXEMemParityClear_RMASK 0xF
-#define QIB_7220_HwErrClear_DDSRXEQMemoryParityErrClear_LSB 0x27
-#define QIB_7220_HwErrClear_DDSRXEQMemoryParityErrClear_RMASK 0x1
-#define QIB_7220_HwErrClear_IB_uC_MemoryParityErrClear_LSB 0x26
-#define QIB_7220_HwErrClear_IB_uC_MemoryParityErrClear_RMASK 0x1
-#define QIB_7220_HwErrClear_PCIE_uC_Oct1MemoryParityErrClear_LSB 0x25
-#define QIB_7220_HwErrClear_PCIE_uC_Oct1MemoryParityErrClear_RMASK 0x1
-#define QIB_7220_HwErrClear_PCIE_uC_Oct0MemoryParityErrClear_LSB 0x24
-#define QIB_7220_HwErrClear_PCIE_uC_Oct0MemoryParityErrClear_RMASK 0x1
-#define QIB_7220_HwErrClear_Reserved2_LSB 0x22
-#define QIB_7220_HwErrClear_Reserved2_RMASK 0x3
-#define QIB_7220_HwErrClear_PCIeBusParityClr_LSB 0x1F
-#define QIB_7220_HwErrClear_PCIeBusParityClr_RMASK 0x7
-#define QIB_7220_HwErrClear_PcieCplTimeoutClear_LSB 0x1E
-#define QIB_7220_HwErrClear_PcieCplTimeoutClear_RMASK 0x1
-#define QIB_7220_HwErrClear_PoisonedTLPClear_LSB 0x1D
-#define QIB_7220_HwErrClear_PoisonedTLPClear_RMASK 0x1
-#define QIB_7220_HwErrClear_SDmaMemReadErrClear_LSB 0x1C
-#define QIB_7220_HwErrClear_SDmaMemReadErrClear_RMASK 0x1
-#define QIB_7220_HwErrClear_Reserved3_LSB 0x8
-#define QIB_7220_HwErrClear_Reserved3_RMASK 0xFFFFF
-#define QIB_7220_HwErrClear_PCIeMemParityClr_LSB 0x0
-#define QIB_7220_HwErrClear_PCIeMemParityClr_RMASK 0xFF
-
-#define QIB_7220_HwDiagCtrl_OFFS 0xB0
-#define QIB_7220_HwDiagCtrl_ForceIBCBusFromSPCParityErr_LSB 0x3F
-#define QIB_7220_HwDiagCtrl_ForceIBCBusFromSPCParityErr_RMASK 0x1
-#define QIB_7220_HwDiagCtrl_ForceIBCBusToSPCParityErr_LSB 0x3E
-#define QIB_7220_HwDiagCtrl_ForceIBCBusToSPCParityErr_RMASK 0x1
-#define QIB_7220_HwDiagCtrl_CounterWrEnable_LSB 0x3D
-#define QIB_7220_HwDiagCtrl_CounterWrEnable_RMASK 0x1
-#define QIB_7220_HwDiagCtrl_CounterDisable_LSB 0x3C
-#define QIB_7220_HwDiagCtrl_CounterDisable_RMASK 0x1
-#define QIB_7220_HwDiagCtrl_Reserved_LSB 0x33
-#define QIB_7220_HwDiagCtrl_Reserved_RMASK 0x1FF
-#define QIB_7220_HwDiagCtrl_ForceRxMemParityErr_LSB 0x2C
-#define QIB_7220_HwDiagCtrl_ForceRxMemParityErr_RMASK 0x7F
-#define QIB_7220_HwDiagCtrl_ForceTxMemparityErr_LSB 0x28
-#define QIB_7220_HwDiagCtrl_ForceTxMemparityErr_RMASK 0xF
-#define QIB_7220_HwDiagCtrl_ForceDDSRXEQMemoryParityErr_LSB 0x27
-#define QIB_7220_HwDiagCtrl_ForceDDSRXEQMemoryParityErr_RMASK 0x1
-#define QIB_7220_HwDiagCtrl_ForceIB_uC_MemoryParityErr_LSB 0x26
-#define QIB_7220_HwDiagCtrl_ForceIB_uC_MemoryParityErr_RMASK 0x1
-#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct1MemoryParityErr_LSB 0x25
-#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct1MemoryParityErr_RMASK 0x1
-#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct0MemoryParityErr_LSB 0x24
-#define QIB_7220_HwDiagCtrl_ForcePCIE_uC_Oct0MemoryParityErr_RMASK 0x1
-#define QIB_7220_HwDiagCtrl_Reserved1_LSB 0x23
-#define QIB_7220_HwDiagCtrl_Reserved1_RMASK 0x1
-#define QIB_7220_HwDiagCtrl_forcePCIeBusParity_LSB 0x1F
-#define QIB_7220_HwDiagCtrl_forcePCIeBusParity_RMASK 0xF
-#define QIB_7220_HwDiagCtrl_Reserved2_LSB 0x8
-#define QIB_7220_HwDiagCtrl_Reserved2_RMASK 0x7FFFFF
-#define QIB_7220_HwDiagCtrl_forcePCIeMemParity_LSB 0x0
-#define QIB_7220_HwDiagCtrl_forcePCIeMemParity_RMASK 0xFF
-
-#define QIB_7220_REG_0000B8_OFFS 0xB8
-
-#define QIB_7220_IBCStatus_OFFS 0xC0
-#define QIB_7220_IBCStatus_TxCreditOk_LSB 0x1F
-#define QIB_7220_IBCStatus_TxCreditOk_RMASK 0x1
-#define QIB_7220_IBCStatus_TxReady_LSB 0x1E
-#define QIB_7220_IBCStatus_TxReady_RMASK 0x1
-#define QIB_7220_IBCStatus_Reserved_LSB 0xE
-#define QIB_7220_IBCStatus_Reserved_RMASK 0xFFFF
-#define QIB_7220_IBCStatus_IBTxLaneReversed_LSB 0xD
-#define QIB_7220_IBCStatus_IBTxLaneReversed_RMASK 0x1
-#define QIB_7220_IBCStatus_IBRxLaneReversed_LSB 0xC
-#define QIB_7220_IBCStatus_IBRxLaneReversed_RMASK 0x1
-#define QIB_7220_IBCStatus_IB_SERDES_TRIM_DONE_LSB 0xB
-#define QIB_7220_IBCStatus_IB_SERDES_TRIM_DONE_RMASK 0x1
-#define QIB_7220_IBCStatus_DDS_RXEQ_FAIL_LSB 0xA
-#define QIB_7220_IBCStatus_DDS_RXEQ_FAIL_RMASK 0x1
-#define QIB_7220_IBCStatus_LinkWidthActive_LSB 0x9
-#define QIB_7220_IBCStatus_LinkWidthActive_RMASK 0x1
-#define QIB_7220_IBCStatus_LinkSpeedActive_LSB 0x8
-#define QIB_7220_IBCStatus_LinkSpeedActive_RMASK 0x1
-#define QIB_7220_IBCStatus_LinkState_LSB 0x5
-#define QIB_7220_IBCStatus_LinkState_RMASK 0x7
-#define QIB_7220_IBCStatus_LinkTrainingState_LSB 0x0
-#define QIB_7220_IBCStatus_LinkTrainingState_RMASK 0x1F
-
-#define QIB_7220_IBCCtrl_OFFS 0xC8
-#define QIB_7220_IBCCtrl_Loopback_LSB 0x3F
-#define QIB_7220_IBCCtrl_Loopback_RMASK 0x1
-#define QIB_7220_IBCCtrl_LinkDownDefaultState_LSB 0x3E
-#define QIB_7220_IBCCtrl_LinkDownDefaultState_RMASK 0x1
-#define QIB_7220_IBCCtrl_Reserved_LSB 0x2B
-#define QIB_7220_IBCCtrl_Reserved_RMASK 0x7FFFF
-#define QIB_7220_IBCCtrl_CreditScale_LSB 0x28
-#define QIB_7220_IBCCtrl_CreditScale_RMASK 0x7
-#define QIB_7220_IBCCtrl_OverrunThreshold_LSB 0x24
-#define QIB_7220_IBCCtrl_OverrunThreshold_RMASK 0xF
-#define QIB_7220_IBCCtrl_PhyerrThreshold_LSB 0x20
-#define QIB_7220_IBCCtrl_PhyerrThreshold_RMASK 0xF
-#define QIB_7220_IBCCtrl_MaxPktLen_LSB 0x15
-#define QIB_7220_IBCCtrl_MaxPktLen_RMASK 0x7FF
-#define QIB_7220_IBCCtrl_LinkCmd_LSB 0x13
-#define QIB_7220_IBCCtrl_LinkCmd_RMASK 0x3
-#define QIB_7220_IBCCtrl_LinkInitCmd_LSB 0x10
-#define QIB_7220_IBCCtrl_LinkInitCmd_RMASK 0x7
-#define QIB_7220_IBCCtrl_FlowCtrlWaterMark_LSB 0x8
-#define QIB_7220_IBCCtrl_FlowCtrlWaterMark_RMASK 0xFF
-#define QIB_7220_IBCCtrl_FlowCtrlPeriod_LSB 0x0
-#define QIB_7220_IBCCtrl_FlowCtrlPeriod_RMASK 0xFF
-
-#define QIB_7220_EXTStatus_OFFS 0xD0
-#define QIB_7220_EXTStatus_GPIOIn_LSB 0x30
-#define QIB_7220_EXTStatus_GPIOIn_RMASK 0xFFFF
-#define QIB_7220_EXTStatus_Reserved_LSB 0x20
-#define QIB_7220_EXTStatus_Reserved_RMASK 0xFFFF
-#define QIB_7220_EXTStatus_Reserved1_LSB 0x10
-#define QIB_7220_EXTStatus_Reserved1_RMASK 0xFFFF
-#define QIB_7220_EXTStatus_MemBISTDisabled_LSB 0xF
-#define QIB_7220_EXTStatus_MemBISTDisabled_RMASK 0x1
-#define QIB_7220_EXTStatus_MemBISTEndTest_LSB 0xE
-#define QIB_7220_EXTStatus_MemBISTEndTest_RMASK 0x1
-#define QIB_7220_EXTStatus_Reserved2_LSB 0x0
-#define QIB_7220_EXTStatus_Reserved2_RMASK 0x3FFF
-
-#define QIB_7220_EXTCtrl_OFFS 0xD8
-#define QIB_7220_EXTCtrl_GPIOOe_LSB 0x30
-#define QIB_7220_EXTCtrl_GPIOOe_RMASK 0xFFFF
-#define QIB_7220_EXTCtrl_GPIOInvert_LSB 0x20
-#define QIB_7220_EXTCtrl_GPIOInvert_RMASK 0xFFFF
-#define QIB_7220_EXTCtrl_Reserved_LSB 0x4
-#define QIB_7220_EXTCtrl_Reserved_RMASK 0xFFFFFFF
-#define QIB_7220_EXTCtrl_LEDPriPortGreenOn_LSB 0x3
-#define QIB_7220_EXTCtrl_LEDPriPortGreenOn_RMASK 0x1
-#define QIB_7220_EXTCtrl_LEDPriPortYellowOn_LSB 0x2
-#define QIB_7220_EXTCtrl_LEDPriPortYellowOn_RMASK 0x1
-#define QIB_7220_EXTCtrl_LEDGblOkGreenOn_LSB 0x1
-#define QIB_7220_EXTCtrl_LEDGblOkGreenOn_RMASK 0x1
-#define QIB_7220_EXTCtrl_LEDGblErrRedOff_LSB 0x0
-#define QIB_7220_EXTCtrl_LEDGblErrRedOff_RMASK 0x1
-
-#define QIB_7220_GPIOOut_OFFS 0xE0
-
-#define QIB_7220_GPIOMask_OFFS 0xE8
-
-#define QIB_7220_GPIOStatus_OFFS 0xF0
-
-#define QIB_7220_GPIOClear_OFFS 0xF8
-
-#define QIB_7220_RcvCtrl_OFFS 0x100
-#define QIB_7220_RcvCtrl_Reserved_LSB 0x27
-#define QIB_7220_RcvCtrl_Reserved_RMASK 0x1FFFFFF
-#define QIB_7220_RcvCtrl_RcvQPMapEnable_LSB 0x26
-#define QIB_7220_RcvCtrl_RcvQPMapEnable_RMASK 0x1
-#define QIB_7220_RcvCtrl_PortCfg_LSB 0x24
-#define QIB_7220_RcvCtrl_PortCfg_RMASK 0x3
-#define QIB_7220_RcvCtrl_TailUpd_LSB 0x23
-#define QIB_7220_RcvCtrl_TailUpd_RMASK 0x1
-#define QIB_7220_RcvCtrl_RcvPartitionKeyDisable_LSB 0x22
-#define QIB_7220_RcvCtrl_RcvPartitionKeyDisable_RMASK 0x1
-#define QIB_7220_RcvCtrl_IntrAvail_LSB 0x11
-#define QIB_7220_RcvCtrl_IntrAvail_RMASK 0x1FFFF
-#define QIB_7220_RcvCtrl_PortEnable_LSB 0x0
-#define QIB_7220_RcvCtrl_PortEnable_RMASK 0x1FFFF
-
-#define QIB_7220_RcvBTHQP_OFFS 0x108
-#define QIB_7220_RcvBTHQP_Reserved_LSB 0x18
-#define QIB_7220_RcvBTHQP_Reserved_RMASK 0xFF
-#define QIB_7220_RcvBTHQP_RcvBTHQP_LSB 0x0
-#define QIB_7220_RcvBTHQP_RcvBTHQP_RMASK 0xFFFFFF
-
-#define QIB_7220_RcvHdrSize_OFFS 0x110
-
-#define QIB_7220_RcvHdrCnt_OFFS 0x118
-
-#define QIB_7220_RcvHdrEntSize_OFFS 0x120
-
-#define QIB_7220_RcvTIDBase_OFFS 0x128
-
-#define QIB_7220_RcvTIDCnt_OFFS 0x130
-
-#define QIB_7220_RcvEgrBase_OFFS 0x138
-
-#define QIB_7220_RcvEgrCnt_OFFS 0x140
-
-#define QIB_7220_RcvBufBase_OFFS 0x148
-
-#define QIB_7220_RcvBufSize_OFFS 0x150
-
-#define QIB_7220_RxIntMemBase_OFFS 0x158
-
-#define QIB_7220_RxIntMemSize_OFFS 0x160
-
-#define QIB_7220_RcvPartitionKey_OFFS 0x168
-
-#define QIB_7220_RcvQPMulticastPort_OFFS 0x170
-#define QIB_7220_RcvQPMulticastPort_Reserved_LSB 0x5
-#define QIB_7220_RcvQPMulticastPort_Reserved_RMASK 0x7FFFFFFFFFFFFFF
-#define QIB_7220_RcvQPMulticastPort_RcvQpMcPort_LSB 0x0
-#define QIB_7220_RcvQPMulticastPort_RcvQpMcPort_RMASK 0x1F
-
-#define QIB_7220_RcvPktLEDCnt_OFFS 0x178
-#define QIB_7220_RcvPktLEDCnt_ONperiod_LSB 0x20
-#define QIB_7220_RcvPktLEDCnt_ONperiod_RMASK 0xFFFFFFFF
-#define QIB_7220_RcvPktLEDCnt_OFFperiod_LSB 0x0
-#define QIB_7220_RcvPktLEDCnt_OFFperiod_RMASK 0xFFFFFFFF
-
-#define QIB_7220_IBCDDRCtrl_OFFS 0x180
-#define QIB_7220_IBCDDRCtrl_IB_DLID_MASK_LSB 0x30
-#define QIB_7220_IBCDDRCtrl_IB_DLID_MASK_RMASK 0xFFFF
-#define QIB_7220_IBCDDRCtrl_IB_DLID_LSB 0x20
-#define QIB_7220_IBCDDRCtrl_IB_DLID_RMASK 0xFFFF
-#define QIB_7220_IBCDDRCtrl_Reserved_LSB 0x1B
-#define QIB_7220_IBCDDRCtrl_Reserved_RMASK 0x1F
-#define QIB_7220_IBCDDRCtrl_HRTBT_REQ_LSB 0x1A
-#define QIB_7220_IBCDDRCtrl_HRTBT_REQ_RMASK 0x1
-#define QIB_7220_IBCDDRCtrl_HRTBT_PORT_LSB 0x12
-#define QIB_7220_IBCDDRCtrl_HRTBT_PORT_RMASK 0xFF
-#define QIB_7220_IBCDDRCtrl_HRTBT_AUTO_LSB 0x11
-#define QIB_7220_IBCDDRCtrl_HRTBT_AUTO_RMASK 0x1
-#define QIB_7220_IBCDDRCtrl_HRTBT_ENB_LSB 0x10
-#define QIB_7220_IBCDDRCtrl_HRTBT_ENB_RMASK 0x1
-#define QIB_7220_IBCDDRCtrl_SD_DDS_LSB 0xC
-#define QIB_7220_IBCDDRCtrl_SD_DDS_RMASK 0xF
-#define QIB_7220_IBCDDRCtrl_SD_DDSV_LSB 0xB
-#define QIB_7220_IBCDDRCtrl_SD_DDSV_RMASK 0x1
-#define QIB_7220_IBCDDRCtrl_SD_ADD_ENB_LSB 0xA
-#define QIB_7220_IBCDDRCtrl_SD_ADD_ENB_RMASK 0x1
-#define QIB_7220_IBCDDRCtrl_SD_RX_EQUAL_ENABLE_LSB 0x9
-#define QIB_7220_IBCDDRCtrl_SD_RX_EQUAL_ENABLE_RMASK 0x1
-#define QIB_7220_IBCDDRCtrl_IB_LANE_REV_SUPPORTED_LSB 0x8
-#define QIB_7220_IBCDDRCtrl_IB_LANE_REV_SUPPORTED_RMASK 0x1
-#define QIB_7220_IBCDDRCtrl_IB_POLARITY_REV_SUPP_LSB 0x7
-#define QIB_7220_IBCDDRCtrl_IB_POLARITY_REV_SUPP_RMASK 0x1
-#define QIB_7220_IBCDDRCtrl_IB_NUM_CHANNELS_LSB 0x5
-#define QIB_7220_IBCDDRCtrl_IB_NUM_CHANNELS_RMASK 0x3
-#define QIB_7220_IBCDDRCtrl_SD_SPEED_QDR_LSB 0x4
-#define QIB_7220_IBCDDRCtrl_SD_SPEED_QDR_RMASK 0x1
-#define QIB_7220_IBCDDRCtrl_SD_SPEED_DDR_LSB 0x3
-#define QIB_7220_IBCDDRCtrl_SD_SPEED_DDR_RMASK 0x1
-#define QIB_7220_IBCDDRCtrl_SD_SPEED_SDR_LSB 0x2
-#define QIB_7220_IBCDDRCtrl_SD_SPEED_SDR_RMASK 0x1
-#define QIB_7220_IBCDDRCtrl_SD_SPEED_LSB 0x1
-#define QIB_7220_IBCDDRCtrl_SD_SPEED_RMASK 0x1
-#define QIB_7220_IBCDDRCtrl_IB_ENHANCED_MODE_LSB 0x0
-#define QIB_7220_IBCDDRCtrl_IB_ENHANCED_MODE_RMASK 0x1
-
-#define QIB_7220_HRTBT_GUID_OFFS 0x188
-
-#define QIB_7220_IBCDDRCtrl2_OFFS 0x1A0
-#define QIB_7220_IBCDDRCtrl2_IB_BACK_PORCH_LSB 0x5
-#define QIB_7220_IBCDDRCtrl2_IB_BACK_PORCH_RMASK 0x1F
-#define QIB_7220_IBCDDRCtrl2_IB_FRONT_PORCH_LSB 0x0
-#define QIB_7220_IBCDDRCtrl2_IB_FRONT_PORCH_RMASK 0x1F
-
-#define QIB_7220_IBCDDRStatus_OFFS 0x1A8
-#define QIB_7220_IBCDDRStatus_heartbeat_timed_out_LSB 0x24
-#define QIB_7220_IBCDDRStatus_heartbeat_timed_out_RMASK 0x1
-#define QIB_7220_IBCDDRStatus_heartbeat_crosstalk_LSB 0x20
-#define QIB_7220_IBCDDRStatus_heartbeat_crosstalk_RMASK 0xF
-#define QIB_7220_IBCDDRStatus_RxEqLocalDevice_LSB 0x1E
-#define QIB_7220_IBCDDRStatus_RxEqLocalDevice_RMASK 0x3
-#define QIB_7220_IBCDDRStatus_ReqDDSLocalFromRmt_LSB 0x1A
-#define QIB_7220_IBCDDRStatus_ReqDDSLocalFromRmt_RMASK 0xF
-#define QIB_7220_IBCDDRStatus_LinkRoundTripLatency_LSB 0x0
-#define QIB_7220_IBCDDRStatus_LinkRoundTripLatency_RMASK 0x3FFFFFF
-
-#define QIB_7220_JIntReload_OFFS 0x1B0
-#define QIB_7220_JIntReload_J_limit_reload_LSB 0x10
-#define QIB_7220_JIntReload_J_limit_reload_RMASK 0xFFFF
-#define QIB_7220_JIntReload_J_reload_LSB 0x0
-#define QIB_7220_JIntReload_J_reload_RMASK 0xFFFF
-
-#define QIB_7220_IBNCModeCtrl_OFFS 0x1B8
-#define QIB_7220_IBNCModeCtrl_Reserved_LSB 0x1A
-#define QIB_7220_IBNCModeCtrl_Reserved_RMASK 0x3FFFFFFFFF
-#define QIB_7220_IBNCModeCtrl_TSMCode_TS2_LSB 0x11
-#define QIB_7220_IBNCModeCtrl_TSMCode_TS2_RMASK 0x1FF
-#define QIB_7220_IBNCModeCtrl_TSMCode_TS1_LSB 0x8
-#define QIB_7220_IBNCModeCtrl_TSMCode_TS1_RMASK 0x1FF
-#define QIB_7220_IBNCModeCtrl_Reserved1_LSB 0x3
-#define QIB_7220_IBNCModeCtrl_Reserved1_RMASK 0x1F
-#define QIB_7220_IBNCModeCtrl_TSMEnable_ignore_TSM_on_rx_LSB 0x2
-#define QIB_7220_IBNCModeCtrl_TSMEnable_ignore_TSM_on_rx_RMASK 0x1
-#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS2_LSB 0x1
-#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS2_RMASK 0x1
-#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS1_LSB 0x0
-#define QIB_7220_IBNCModeCtrl_TSMEnable_send_TS1_RMASK 0x1
-
-#define QIB_7220_SendCtrl_OFFS 0x1C0
-#define QIB_7220_SendCtrl_Disarm_LSB 0x1F
-#define QIB_7220_SendCtrl_Disarm_RMASK 0x1
-#define QIB_7220_SendCtrl_Reserved_LSB 0x1D
-#define QIB_7220_SendCtrl_Reserved_RMASK 0x3
-#define QIB_7220_SendCtrl_AvailUpdThld_LSB 0x18
-#define QIB_7220_SendCtrl_AvailUpdThld_RMASK 0x1F
-#define QIB_7220_SendCtrl_DisarmPIOBuf_LSB 0x10
-#define QIB_7220_SendCtrl_DisarmPIOBuf_RMASK 0xFF
-#define QIB_7220_SendCtrl_Reserved1_LSB 0xD
-#define QIB_7220_SendCtrl_Reserved1_RMASK 0x7
-#define QIB_7220_SendCtrl_SDmaHalt_LSB 0xC
-#define QIB_7220_SendCtrl_SDmaHalt_RMASK 0x1
-#define QIB_7220_SendCtrl_SDmaEnable_LSB 0xB
-#define QIB_7220_SendCtrl_SDmaEnable_RMASK 0x1
-#define QIB_7220_SendCtrl_SDmaSingleDescriptor_LSB 0xA
-#define QIB_7220_SendCtrl_SDmaSingleDescriptor_RMASK 0x1
-#define QIB_7220_SendCtrl_SDmaIntEnable_LSB 0x9
-#define QIB_7220_SendCtrl_SDmaIntEnable_RMASK 0x1
-#define QIB_7220_SendCtrl_Reserved2_LSB 0x5
-#define QIB_7220_SendCtrl_Reserved2_RMASK 0xF
-#define QIB_7220_SendCtrl_SSpecialTriggerEn_LSB 0x4
-#define QIB_7220_SendCtrl_SSpecialTriggerEn_RMASK 0x1
-#define QIB_7220_SendCtrl_SPioEnable_LSB 0x3
-#define QIB_7220_SendCtrl_SPioEnable_RMASK 0x1
-#define QIB_7220_SendCtrl_SendBufAvailUpd_LSB 0x2
-#define QIB_7220_SendCtrl_SendBufAvailUpd_RMASK 0x1
-#define QIB_7220_SendCtrl_SendIntBufAvail_LSB 0x1
-#define QIB_7220_SendCtrl_SendIntBufAvail_RMASK 0x1
-#define QIB_7220_SendCtrl_Abort_LSB 0x0
-#define QIB_7220_SendCtrl_Abort_RMASK 0x1
-
-#define QIB_7220_SendBufBase_OFFS 0x1C8
-#define QIB_7220_SendBufBase_Reserved_LSB 0x35
-#define QIB_7220_SendBufBase_Reserved_RMASK 0x7FF
-#define QIB_7220_SendBufBase_BaseAddr_LargePIO_LSB 0x20
-#define QIB_7220_SendBufBase_BaseAddr_LargePIO_RMASK 0x1FFFFF
-#define QIB_7220_SendBufBase_Reserved1_LSB 0x15
-#define QIB_7220_SendBufBase_Reserved1_RMASK 0x7FF
-#define QIB_7220_SendBufBase_BaseAddr_SmallPIO_LSB 0x0
-#define QIB_7220_SendBufBase_BaseAddr_SmallPIO_RMASK 0x1FFFFF
-
-#define QIB_7220_SendBufSize_OFFS 0x1D0
-#define QIB_7220_SendBufSize_Reserved_LSB 0x2D
-#define QIB_7220_SendBufSize_Reserved_RMASK 0xFFFFF
-#define QIB_7220_SendBufSize_Size_LargePIO_LSB 0x20
-#define QIB_7220_SendBufSize_Size_LargePIO_RMASK 0x1FFF
-#define QIB_7220_SendBufSize_Reserved1_LSB 0xC
-#define QIB_7220_SendBufSize_Reserved1_RMASK 0xFFFFF
-#define QIB_7220_SendBufSize_Size_SmallPIO_LSB 0x0
-#define QIB_7220_SendBufSize_Size_SmallPIO_RMASK 0xFFF
-
-#define QIB_7220_SendBufCnt_OFFS 0x1D8
-#define QIB_7220_SendBufCnt_Reserved_LSB 0x24
-#define QIB_7220_SendBufCnt_Reserved_RMASK 0xFFFFFFF
-#define QIB_7220_SendBufCnt_Num_LargeBuffers_LSB 0x20
-#define QIB_7220_SendBufCnt_Num_LargeBuffers_RMASK 0xF
-#define QIB_7220_SendBufCnt_Reserved1_LSB 0x9
-#define QIB_7220_SendBufCnt_Reserved1_RMASK 0x7FFFFF
-#define QIB_7220_SendBufCnt_Num_SmallBuffers_LSB 0x0
-#define QIB_7220_SendBufCnt_Num_SmallBuffers_RMASK 0x1FF
-
-#define QIB_7220_SendBufAvailAddr_OFFS 0x1E0
-#define QIB_7220_SendBufAvailAddr_SendBufAvailAddr_LSB 0x6
-#define QIB_7220_SendBufAvailAddr_SendBufAvailAddr_RMASK 0x3FFFFFFFF
-#define QIB_7220_SendBufAvailAddr_Reserved_LSB 0x0
-#define QIB_7220_SendBufAvailAddr_Reserved_RMASK 0x3F
-
-#define QIB_7220_TxIntMemBase_OFFS 0x1E8
-
-#define QIB_7220_TxIntMemSize_OFFS 0x1F0
-
-#define QIB_7220_SendDmaBase_OFFS 0x1F8
-#define QIB_7220_SendDmaBase_Reserved_LSB 0x30
-#define QIB_7220_SendDmaBase_Reserved_RMASK 0xFFFF
-#define QIB_7220_SendDmaBase_SendDmaBase_LSB 0x0
-#define QIB_7220_SendDmaBase_SendDmaBase_RMASK 0xFFFFFFFFFFFF
-
-#define QIB_7220_SendDmaLenGen_OFFS 0x200
-#define QIB_7220_SendDmaLenGen_Reserved_LSB 0x13
-#define QIB_7220_SendDmaLenGen_Reserved_RMASK 0x1FFFFFFFFFFF
-#define QIB_7220_SendDmaLenGen_Generation_LSB 0x10
-#define QIB_7220_SendDmaLenGen_Generation_MSB 0x12
-#define QIB_7220_SendDmaLenGen_Generation_RMASK 0x7
-#define QIB_7220_SendDmaLenGen_Length_LSB 0x0
-#define QIB_7220_SendDmaLenGen_Length_RMASK 0xFFFF
-
-#define QIB_7220_SendDmaTail_OFFS 0x208
-#define QIB_7220_SendDmaTail_Reserved_LSB 0x10
-#define QIB_7220_SendDmaTail_Reserved_RMASK 0xFFFFFFFFFFFF
-#define QIB_7220_SendDmaTail_SendDmaTail_LSB 0x0
-#define QIB_7220_SendDmaTail_SendDmaTail_RMASK 0xFFFF
-
-#define QIB_7220_SendDmaHead_OFFS 0x210
-#define QIB_7220_SendDmaHead_Reserved_LSB 0x30
-#define QIB_7220_SendDmaHead_Reserved_RMASK 0xFFFF
-#define QIB_7220_SendDmaHead_InternalSendDmaHead_LSB 0x20
-#define QIB_7220_SendDmaHead_InternalSendDmaHead_RMASK 0xFFFF
-#define QIB_7220_SendDmaHead_Reserved1_LSB 0x10
-#define QIB_7220_SendDmaHead_Reserved1_RMASK 0xFFFF
-#define QIB_7220_SendDmaHead_SendDmaHead_LSB 0x0
-#define QIB_7220_SendDmaHead_SendDmaHead_RMASK 0xFFFF
-
-#define QIB_7220_SendDmaHeadAddr_OFFS 0x218
-#define QIB_7220_SendDmaHeadAddr_Reserved_LSB 0x30
-#define QIB_7220_SendDmaHeadAddr_Reserved_RMASK 0xFFFF
-#define QIB_7220_SendDmaHeadAddr_SendDmaHeadAddr_LSB 0x0
-#define QIB_7220_SendDmaHeadAddr_SendDmaHeadAddr_RMASK 0xFFFFFFFFFFFF
-
-#define QIB_7220_SendDmaBufMask0_OFFS 0x220
-#define QIB_7220_SendDmaBufMask0_BufMask_63_0_LSB 0x0
-#define QIB_7220_SendDmaBufMask0_BufMask_63_0_RMASK 0x0
-
-#define QIB_7220_SendDmaStatus_OFFS 0x238
-#define QIB_7220_SendDmaStatus_ScoreBoardDrainInProg_LSB 0x3F
-#define QIB_7220_SendDmaStatus_ScoreBoardDrainInProg_RMASK 0x1
-#define QIB_7220_SendDmaStatus_AbortInProg_LSB 0x3E
-#define QIB_7220_SendDmaStatus_AbortInProg_RMASK 0x1
-#define QIB_7220_SendDmaStatus_InternalSDmaEnable_LSB 0x3D
-#define QIB_7220_SendDmaStatus_InternalSDmaEnable_RMASK 0x1
-#define QIB_7220_SendDmaStatus_ScbDescIndex_13_0_LSB 0x2F
-#define QIB_7220_SendDmaStatus_ScbDescIndex_13_0_RMASK 0x3FFF
-#define QIB_7220_SendDmaStatus_RpyLowAddr_6_0_LSB 0x28
-#define QIB_7220_SendDmaStatus_RpyLowAddr_6_0_RMASK 0x7F
-#define QIB_7220_SendDmaStatus_RpyTag_7_0_LSB 0x20
-#define QIB_7220_SendDmaStatus_RpyTag_7_0_RMASK 0xFF
-#define QIB_7220_SendDmaStatus_ScbFull_LSB 0x1F
-#define QIB_7220_SendDmaStatus_ScbFull_RMASK 0x1
-#define QIB_7220_SendDmaStatus_ScbEmpty_LSB 0x1E
-#define QIB_7220_SendDmaStatus_ScbEmpty_RMASK 0x1
-#define QIB_7220_SendDmaStatus_ScbEntryValid_LSB 0x1D
-#define QIB_7220_SendDmaStatus_ScbEntryValid_RMASK 0x1
-#define QIB_7220_SendDmaStatus_ScbFetchDescFlag_LSB 0x1C
-#define QIB_7220_SendDmaStatus_ScbFetchDescFlag_RMASK 0x1
-#define QIB_7220_SendDmaStatus_SplFifoReadyToGo_LSB 0x1B
-#define QIB_7220_SendDmaStatus_SplFifoReadyToGo_RMASK 0x1
-#define QIB_7220_SendDmaStatus_SplFifoDisarmed_LSB 0x1A
-#define QIB_7220_SendDmaStatus_SplFifoDisarmed_RMASK 0x1
-#define QIB_7220_SendDmaStatus_SplFifoEmpty_LSB 0x19
-#define QIB_7220_SendDmaStatus_SplFifoEmpty_RMASK 0x1
-#define QIB_7220_SendDmaStatus_SplFifoFull_LSB 0x18
-#define QIB_7220_SendDmaStatus_SplFifoFull_RMASK 0x1
-#define QIB_7220_SendDmaStatus_SplFifoBufNum_LSB 0x10
-#define QIB_7220_SendDmaStatus_SplFifoBufNum_RMASK 0xFF
-#define QIB_7220_SendDmaStatus_SplFifoDescIndex_LSB 0x0
-#define QIB_7220_SendDmaStatus_SplFifoDescIndex_RMASK 0xFFFF
-
-#define QIB_7220_SendBufErr0_OFFS 0x240
-#define QIB_7220_SendBufErr0_SendBufErr_63_0_LSB 0x0
-#define QIB_7220_SendBufErr0_SendBufErr_63_0_RMASK 0x0
-
-#define QIB_7220_RcvHdrAddr0_OFFS 0x270
-#define QIB_7220_RcvHdrAddr0_RcvHdrAddr0_LSB 0x2
-#define QIB_7220_RcvHdrAddr0_RcvHdrAddr0_RMASK 0x3FFFFFFFFF
-#define QIB_7220_RcvHdrAddr0_Reserved_LSB 0x0
-#define QIB_7220_RcvHdrAddr0_Reserved_RMASK 0x3
-
-#define QIB_7220_RcvHdrTailAddr0_OFFS 0x300
-#define QIB_7220_RcvHdrTailAddr0_RcvHdrTailAddr0_LSB 0x2
-#define QIB_7220_RcvHdrTailAddr0_RcvHdrTailAddr0_RMASK 0x3FFFFFFFFF
-#define QIB_7220_RcvHdrTailAddr0_Reserved_LSB 0x0
-#define QIB_7220_RcvHdrTailAddr0_Reserved_RMASK 0x3
-
-#define QIB_7220_ibsd_epb_access_ctrl_OFFS 0x3C0
-#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_granted_LSB 0x8
-#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_granted_RMASK 0x1
-#define QIB_7220_ibsd_epb_access_ctrl_Reserved_LSB 0x1
-#define QIB_7220_ibsd_epb_access_ctrl_Reserved_RMASK 0x7F
-#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_LSB 0x0
-#define QIB_7220_ibsd_epb_access_ctrl_sw_ib_epb_req_RMASK 0x1
-
-#define QIB_7220_ibsd_epb_transaction_reg_OFFS 0x3C8
-#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_rdy_LSB 0x1F
-#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_rdy_RMASK 0x1
-#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_req_error_LSB 0x1E
-#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_req_error_RMASK 0x1
-#define QIB_7220_ibsd_epb_transaction_reg_Reserved_LSB 0x1D
-#define QIB_7220_ibsd_epb_transaction_reg_Reserved_RMASK 0x1
-#define QIB_7220_ibsd_epb_transaction_reg_mem_data_parity_LSB 0x1C
-#define QIB_7220_ibsd_epb_transaction_reg_mem_data_parity_RMASK 0x1
-#define QIB_7220_ibsd_epb_transaction_reg_Reserved1_LSB 0x1B
-#define QIB_7220_ibsd_epb_transaction_reg_Reserved1_RMASK 0x1
-#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_cs_LSB 0x19
-#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_cs_RMASK 0x3
-#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_read_write_LSB 0x18
-#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_read_write_RMASK 0x1
-#define QIB_7220_ibsd_epb_transaction_reg_Reserved2_LSB 0x17
-#define QIB_7220_ibsd_epb_transaction_reg_Reserved2_RMASK 0x1
-#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_address_LSB 0x8
-#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_address_RMASK 0x7FFF
-#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_data_LSB 0x0
-#define QIB_7220_ibsd_epb_transaction_reg_ib_epb_data_RMASK 0xFF
-
-#define QIB_7220_XGXSCfg_OFFS 0x3D8
-#define QIB_7220_XGXSCfg_sel_link_down_for_fctrl_lane_sync_reset_LSB 0x3F
-#define QIB_7220_XGXSCfg_sel_link_down_for_fctrl_lane_sync_reset_RMASK 0x1
-#define QIB_7220_XGXSCfg_Reserved_LSB 0x13
-#define QIB_7220_XGXSCfg_Reserved_RMASK 0xFFFFFFFFFFF
-#define QIB_7220_XGXSCfg_link_sync_mask_LSB 0x9
-#define QIB_7220_XGXSCfg_link_sync_mask_RMASK 0x3FF
-#define QIB_7220_XGXSCfg_Reserved1_LSB 0x3
-#define QIB_7220_XGXSCfg_Reserved1_RMASK 0x3F
-#define QIB_7220_XGXSCfg_xcv_reset_LSB 0x2
-#define QIB_7220_XGXSCfg_xcv_reset_RMASK 0x1
-#define QIB_7220_XGXSCfg_Reserved2_LSB 0x1
-#define QIB_7220_XGXSCfg_Reserved2_RMASK 0x1
-#define QIB_7220_XGXSCfg_tx_rx_reset_LSB 0x0
-#define QIB_7220_XGXSCfg_tx_rx_reset_RMASK 0x1
-
-#define QIB_7220_IBSerDesCtrl_OFFS 0x3E0
-#define QIB_7220_IBSerDesCtrl_Reserved_LSB 0x2D
-#define QIB_7220_IBSerDesCtrl_Reserved_RMASK 0x7FFFF
-#define QIB_7220_IBSerDesCtrl_INT_uC_LSB 0x2C
-#define QIB_7220_IBSerDesCtrl_INT_uC_RMASK 0x1
-#define QIB_7220_IBSerDesCtrl_CKSEL_uC_LSB 0x2A
-#define QIB_7220_IBSerDesCtrl_CKSEL_uC_RMASK 0x3
-#define QIB_7220_IBSerDesCtrl_PLLN_LSB 0x28
-#define QIB_7220_IBSerDesCtrl_PLLN_RMASK 0x3
-#define QIB_7220_IBSerDesCtrl_PLLM_LSB 0x25
-#define QIB_7220_IBSerDesCtrl_PLLM_RMASK 0x7
-#define QIB_7220_IBSerDesCtrl_TXOBPD_LSB 0x24
-#define QIB_7220_IBSerDesCtrl_TXOBPD_RMASK 0x1
-#define QIB_7220_IBSerDesCtrl_TWC_LSB 0x23
-#define QIB_7220_IBSerDesCtrl_TWC_RMASK 0x1
-#define QIB_7220_IBSerDesCtrl_RXIDLE_LSB 0x22
-#define QIB_7220_IBSerDesCtrl_RXIDLE_RMASK 0x1
-#define QIB_7220_IBSerDesCtrl_RXINV_LSB 0x21
-#define QIB_7220_IBSerDesCtrl_RXINV_RMASK 0x1
-#define QIB_7220_IBSerDesCtrl_TXINV_LSB 0x20
-#define QIB_7220_IBSerDesCtrl_TXINV_RMASK 0x1
-#define QIB_7220_IBSerDesCtrl_Reserved1_LSB 0x12
-#define QIB_7220_IBSerDesCtrl_Reserved1_RMASK 0x3FFF
-#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForRXEQ_LSB 0xD
-#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForRXEQ_RMASK 0x1F
-#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForDDS_LSB 0x8
-#define QIB_7220_IBSerDesCtrl_NumSerDesRegsToWrForDDS_RMASK 0x1F
-#define QIB_7220_IBSerDesCtrl_Reserved2_LSB 0x1
-#define QIB_7220_IBSerDesCtrl_Reserved2_RMASK 0x7F
-#define QIB_7220_IBSerDesCtrl_ResetIB_uC_Core_LSB 0x0
-#define QIB_7220_IBSerDesCtrl_ResetIB_uC_Core_RMASK 0x1
-
-#define QIB_7220_pciesd_epb_access_ctrl_OFFS 0x400
-#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_granted_LSB 0x8
-#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_granted_RMASK 0x1
-#define QIB_7220_pciesd_epb_access_ctrl_Reserved_LSB 0x3
-#define QIB_7220_pciesd_epb_access_ctrl_Reserved_RMASK 0x1F
-#define QIB_7220_pciesd_epb_access_ctrl_sw_pcieepb_star_en_LSB 0x1
-#define QIB_7220_pciesd_epb_access_ctrl_sw_pcieepb_star_en_RMASK 0x3
-#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_LSB 0x0
-#define QIB_7220_pciesd_epb_access_ctrl_sw_pcie_epb_req_RMASK 0x1
-
-#define QIB_7220_pciesd_epb_transaction_reg_OFFS 0x408
-#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_rdy_LSB 0x1F
-#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_rdy_RMASK 0x1
-#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_req_error_LSB 0x1E
-#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_req_error_RMASK 0x1
-#define QIB_7220_pciesd_epb_transaction_reg_Reserved_LSB 0x1D
-#define QIB_7220_pciesd_epb_transaction_reg_Reserved_RMASK 0x1
-#define QIB_7220_pciesd_epb_transaction_reg_mem_data_parity_LSB 0x1C
-#define QIB_7220_pciesd_epb_transaction_reg_mem_data_parity_RMASK 0x1
-#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_cs_LSB 0x19
-#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_cs_RMASK 0x7
-#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_read_write_LSB 0x18
-#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_read_write_RMASK 0x1
-#define QIB_7220_pciesd_epb_transaction_reg_Reserved1_LSB 0x17
-#define QIB_7220_pciesd_epb_transaction_reg_Reserved1_RMASK 0x1
-#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_address_LSB 0x8
-#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_address_RMASK 0x7FFF
-#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_data_LSB 0x0
-#define QIB_7220_pciesd_epb_transaction_reg_pcie_epb_data_RMASK 0xFF
-
-#define QIB_7220_SerDes_DDSRXEQ0_OFFS 0x500
-#define QIB_7220_SerDes_DDSRXEQ0_reg_addr_LSB 0x4
-#define QIB_7220_SerDes_DDSRXEQ0_reg_addr_RMASK 0x3F
-#define QIB_7220_SerDes_DDSRXEQ0_element_num_LSB 0x0
-#define QIB_7220_SerDes_DDSRXEQ0_element_num_RMASK 0xF
-
-#define QIB_7220_LBIntCnt_OFFS 0x13000
-
-#define QIB_7220_LBFlowStallCnt_OFFS 0x13008
-
-#define QIB_7220_TxSDmaDescCnt_OFFS 0x13010
-
-#define QIB_7220_TxUnsupVLErrCnt_OFFS 0x13018
-
-#define QIB_7220_TxDataPktCnt_OFFS 0x13020
-
-#define QIB_7220_TxFlowPktCnt_OFFS 0x13028
-
-#define QIB_7220_TxDwordCnt_OFFS 0x13030
-
-#define QIB_7220_TxLenErrCnt_OFFS 0x13038
-
-#define QIB_7220_TxMaxMinLenErrCnt_OFFS 0x13040
-
-#define QIB_7220_TxUnderrunCnt_OFFS 0x13048
-
-#define QIB_7220_TxFlowStallCnt_OFFS 0x13050
-
-#define QIB_7220_TxDroppedPktCnt_OFFS 0x13058
-
-#define QIB_7220_RxDroppedPktCnt_OFFS 0x13060
-
-#define QIB_7220_RxDataPktCnt_OFFS 0x13068
-
-#define QIB_7220_RxFlowPktCnt_OFFS 0x13070
-
-#define QIB_7220_RxDwordCnt_OFFS 0x13078
-
-#define QIB_7220_RxLenErrCnt_OFFS 0x13080
-
-#define QIB_7220_RxMaxMinLenErrCnt_OFFS 0x13088
-
-#define QIB_7220_RxICRCErrCnt_OFFS 0x13090
-
-#define QIB_7220_RxVCRCErrCnt_OFFS 0x13098
-
-#define QIB_7220_RxFlowCtrlViolCnt_OFFS 0x130A0
-
-#define QIB_7220_RxVersionErrCnt_OFFS 0x130A8
-
-#define QIB_7220_RxLinkMalformCnt_OFFS 0x130B0
-
-#define QIB_7220_RxEBPCnt_OFFS 0x130B8
-
-#define QIB_7220_RxLPCRCErrCnt_OFFS 0x130C0
-
-#define QIB_7220_RxBufOvflCnt_OFFS 0x130C8
-
-#define QIB_7220_RxTIDFullErrCnt_OFFS 0x130D0
-
-#define QIB_7220_RxTIDValidErrCnt_OFFS 0x130D8
-
-#define QIB_7220_RxPKeyMismatchCnt_OFFS 0x130E0
-
-#define QIB_7220_RxP0HdrEgrOvflCnt_OFFS 0x130E8
-
-#define QIB_7220_IBStatusChangeCnt_OFFS 0x13170
-
-#define QIB_7220_IBLinkErrRecoveryCnt_OFFS 0x13178
-
-#define QIB_7220_IBLinkDownedCnt_OFFS 0x13180
-
-#define QIB_7220_IBSymbolErrCnt_OFFS 0x13188
-
-#define QIB_7220_RxVL15DroppedPktCnt_OFFS 0x13190
-
-#define QIB_7220_RxOtherLocalPhyErrCnt_OFFS 0x13198
-
-#define QIB_7220_PcieRetryBufDiagQwordCnt_OFFS 0x131A0
-
-#define QIB_7220_ExcessBufferOvflCnt_OFFS 0x131A8
-
-#define QIB_7220_LocalLinkIntegrityErrCnt_OFFS 0x131B0
-
-#define QIB_7220_RxVlErrCnt_OFFS 0x131B8
-
-#define QIB_7220_RxDlidFltrCnt_OFFS 0x131C0
-
-#define QIB_7220_CNT_0131C8_OFFS 0x131C8
-
-#define QIB_7220_PSStat_OFFS 0x13200
-
-#define QIB_7220_PSStart_OFFS 0x13208
-
-#define QIB_7220_PSInterval_OFFS 0x13210
-
-#define QIB_7220_PSRcvDataCount_OFFS 0x13218
-
-#define QIB_7220_PSRcvPktsCount_OFFS 0x13220
-
-#define QIB_7220_PSXmitDataCount_OFFS 0x13228
-
-#define QIB_7220_PSXmitPktsCount_OFFS 0x13230
-
-#define QIB_7220_PSXmitWaitCount_OFFS 0x13238
-
-#define QIB_7220_CNT_013240_OFFS 0x13240
-
-#define QIB_7220_RcvEgrArray_OFFS 0x14000
-
-#define QIB_7220_MEM_038000_OFFS 0x38000
-
-#define QIB_7220_RcvTIDArray0_OFFS 0x53000
-
-#define QIB_7220_PIOLaunchFIFO_OFFS 0x64000
-
-#define QIB_7220_MEM_064480_OFFS 0x64480
-
-#define QIB_7220_SendPIOpbcCache_OFFS 0x64800
-
-#define QIB_7220_MEM_064C80_OFFS 0x64C80
-
-#define QIB_7220_PreLaunchFIFO_OFFS 0x65000
-
-#define QIB_7220_MEM_065080_OFFS 0x65080
-
-#define QIB_7220_ScoreBoard_OFFS 0x65400
-
-#define QIB_7220_MEM_065440_OFFS 0x65440
-
-#define QIB_7220_DescriptorFIFO_OFFS 0x65800
-
-#define QIB_7220_MEM_065880_OFFS 0x65880
-
-#define QIB_7220_RcvBuf1_OFFS 0x72000
-
-#define QIB_7220_MEM_074800_OFFS 0x74800
-
-#define QIB_7220_RcvBuf2_OFFS 0x75000
-
-#define QIB_7220_MEM_076400_OFFS 0x76400
-
-#define QIB_7220_RcvFlags_OFFS 0x77000
-
-#define QIB_7220_MEM_078400_OFFS 0x78400
-
-#define QIB_7220_RcvLookupBuf1_OFFS 0x79000
-
-#define QIB_7220_MEM_07A400_OFFS 0x7A400
-
-#define QIB_7220_RcvDMADatBuf_OFFS 0x7B000
-
-#define QIB_7220_RcvDMAHdrBuf_OFFS 0x7B800
-
-#define QIB_7220_MiscRXEIntMem_OFFS 0x7C000
-
-#define QIB_7220_MEM_07D400_OFFS 0x7D400
-
-#define QIB_7220_PCIERcvBuf_OFFS 0x80000
-
-#define QIB_7220_PCIERetryBuf_OFFS 0x84000
-
-#define QIB_7220_PCIERcvBufRdToWrAddr_OFFS 0x88000
-
-#define QIB_7220_PCIECplBuf_OFFS 0x90000
-
-#define QIB_7220_IBSerDesMappTable_OFFS 0x94000
-
-#define QIB_7220_MEM_095000_OFFS 0x95000
-
-#define QIB_7220_SendBuf0_MA_OFFS 0x100000
-
-#define QIB_7220_MEM_1A0000_OFFS 0x1A0000
diff --git a/drivers/infiniband/hw/qib/qib_7322_regs.h b/drivers/infiniband/hw/qib/qib_7322_regs.h
deleted file mode 100644
index 32dc81ff8d4a..000000000000
--- a/drivers/infiniband/hw/qib/qib_7322_regs.h
+++ /dev/null
@@ -1,3163 +0,0 @@
-/*
- * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-/* This file is mechanically generated from RTL. Any hand-edits will be lost! */
-
-#define QIB_7322_Revision_OFFS 0x0
-#define QIB_7322_Revision_DEF 0x0000000002010601
-#define QIB_7322_Revision_R_Simulator_LSB 0x3F
-#define QIB_7322_Revision_R_Simulator_MSB 0x3F
-#define QIB_7322_Revision_R_Simulator_RMASK 0x1
-#define QIB_7322_Revision_R_Emulation_LSB 0x3E
-#define QIB_7322_Revision_R_Emulation_MSB 0x3E
-#define QIB_7322_Revision_R_Emulation_RMASK 0x1
-#define QIB_7322_Revision_R_Emulation_Revcode_LSB 0x28
-#define QIB_7322_Revision_R_Emulation_Revcode_MSB 0x3D
-#define QIB_7322_Revision_R_Emulation_Revcode_RMASK 0x3FFFFF
-#define QIB_7322_Revision_BoardID_LSB 0x20
-#define QIB_7322_Revision_BoardID_MSB 0x27
-#define QIB_7322_Revision_BoardID_RMASK 0xFF
-#define QIB_7322_Revision_R_SW_LSB 0x18
-#define QIB_7322_Revision_R_SW_MSB 0x1F
-#define QIB_7322_Revision_R_SW_RMASK 0xFF
-#define QIB_7322_Revision_R_Arch_LSB 0x10
-#define QIB_7322_Revision_R_Arch_MSB 0x17
-#define QIB_7322_Revision_R_Arch_RMASK 0xFF
-#define QIB_7322_Revision_R_ChipRevMajor_LSB 0x8
-#define QIB_7322_Revision_R_ChipRevMajor_MSB 0xF
-#define QIB_7322_Revision_R_ChipRevMajor_RMASK 0xFF
-#define QIB_7322_Revision_R_ChipRevMinor_LSB 0x0
-#define QIB_7322_Revision_R_ChipRevMinor_MSB 0x7
-#define QIB_7322_Revision_R_ChipRevMinor_RMASK 0xFF
-
-#define QIB_7322_Control_OFFS 0x8
-#define QIB_7322_Control_DEF 0x0000000000000000
-#define QIB_7322_Control_PCIECplQDiagEn_LSB 0x6
-#define QIB_7322_Control_PCIECplQDiagEn_MSB 0x6
-#define QIB_7322_Control_PCIECplQDiagEn_RMASK 0x1
-#define QIB_7322_Control_PCIEPostQDiagEn_LSB 0x5
-#define QIB_7322_Control_PCIEPostQDiagEn_MSB 0x5
-#define QIB_7322_Control_PCIEPostQDiagEn_RMASK 0x1
-#define QIB_7322_Control_SDmaDescFetchPriorityEn_LSB 0x4
-#define QIB_7322_Control_SDmaDescFetchPriorityEn_MSB 0x4
-#define QIB_7322_Control_SDmaDescFetchPriorityEn_RMASK 0x1
-#define QIB_7322_Control_PCIERetryBufDiagEn_LSB 0x3
-#define QIB_7322_Control_PCIERetryBufDiagEn_MSB 0x3
-#define QIB_7322_Control_PCIERetryBufDiagEn_RMASK 0x1
-#define QIB_7322_Control_FreezeMode_LSB 0x1
-#define QIB_7322_Control_FreezeMode_MSB 0x1
-#define QIB_7322_Control_FreezeMode_RMASK 0x1
-#define QIB_7322_Control_SyncReset_LSB 0x0
-#define QIB_7322_Control_SyncReset_MSB 0x0
-#define QIB_7322_Control_SyncReset_RMASK 0x1
-
-#define QIB_7322_PageAlign_OFFS 0x10
-#define QIB_7322_PageAlign_DEF 0x0000000000001000
-
-#define QIB_7322_ContextCnt_OFFS 0x18
-#define QIB_7322_ContextCnt_DEF 0x0000000000000012
-
-#define QIB_7322_Scratch_OFFS 0x20
-#define QIB_7322_Scratch_DEF 0x0000000000000000
-
-#define QIB_7322_CntrRegBase_OFFS 0x28
-#define QIB_7322_CntrRegBase_DEF 0x0000000000011000
-
-#define QIB_7322_SendRegBase_OFFS 0x30
-#define QIB_7322_SendRegBase_DEF 0x0000000000003000
-
-#define QIB_7322_UserRegBase_OFFS 0x38
-#define QIB_7322_UserRegBase_DEF 0x0000000000200000
-
-#define QIB_7322_IntMask_OFFS 0x68
-#define QIB_7322_IntMask_DEF 0x0000000000000000
-#define QIB_7322_IntMask_SDmaIntMask_1_LSB 0x3F
-#define QIB_7322_IntMask_SDmaIntMask_1_MSB 0x3F
-#define QIB_7322_IntMask_SDmaIntMask_1_RMASK 0x1
-#define QIB_7322_IntMask_SDmaIntMask_0_LSB 0x3E
-#define QIB_7322_IntMask_SDmaIntMask_0_MSB 0x3E
-#define QIB_7322_IntMask_SDmaIntMask_0_RMASK 0x1
-#define QIB_7322_IntMask_SDmaProgressIntMask_1_LSB 0x3D
-#define QIB_7322_IntMask_SDmaProgressIntMask_1_MSB 0x3D
-#define QIB_7322_IntMask_SDmaProgressIntMask_1_RMASK 0x1
-#define QIB_7322_IntMask_SDmaProgressIntMask_0_LSB 0x3C
-#define QIB_7322_IntMask_SDmaProgressIntMask_0_MSB 0x3C
-#define QIB_7322_IntMask_SDmaProgressIntMask_0_RMASK 0x1
-#define QIB_7322_IntMask_SDmaIdleIntMask_1_LSB 0x3B
-#define QIB_7322_IntMask_SDmaIdleIntMask_1_MSB 0x3B
-#define QIB_7322_IntMask_SDmaIdleIntMask_1_RMASK 0x1
-#define QIB_7322_IntMask_SDmaIdleIntMask_0_LSB 0x3A
-#define QIB_7322_IntMask_SDmaIdleIntMask_0_MSB 0x3A
-#define QIB_7322_IntMask_SDmaIdleIntMask_0_RMASK 0x1
-#define QIB_7322_IntMask_SDmaCleanupDoneMask_1_LSB 0x39
-#define QIB_7322_IntMask_SDmaCleanupDoneMask_1_MSB 0x39
-#define QIB_7322_IntMask_SDmaCleanupDoneMask_1_RMASK 0x1
-#define QIB_7322_IntMask_SDmaCleanupDoneMask_0_LSB 0x38
-#define QIB_7322_IntMask_SDmaCleanupDoneMask_0_MSB 0x38
-#define QIB_7322_IntMask_SDmaCleanupDoneMask_0_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg17IntMask_LSB 0x31
-#define QIB_7322_IntMask_RcvUrg17IntMask_MSB 0x31
-#define QIB_7322_IntMask_RcvUrg17IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg16IntMask_LSB 0x30
-#define QIB_7322_IntMask_RcvUrg16IntMask_MSB 0x30
-#define QIB_7322_IntMask_RcvUrg16IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg15IntMask_LSB 0x2F
-#define QIB_7322_IntMask_RcvUrg15IntMask_MSB 0x2F
-#define QIB_7322_IntMask_RcvUrg15IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg14IntMask_LSB 0x2E
-#define QIB_7322_IntMask_RcvUrg14IntMask_MSB 0x2E
-#define QIB_7322_IntMask_RcvUrg14IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg13IntMask_LSB 0x2D
-#define QIB_7322_IntMask_RcvUrg13IntMask_MSB 0x2D
-#define QIB_7322_IntMask_RcvUrg13IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg12IntMask_LSB 0x2C
-#define QIB_7322_IntMask_RcvUrg12IntMask_MSB 0x2C
-#define QIB_7322_IntMask_RcvUrg12IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg11IntMask_LSB 0x2B
-#define QIB_7322_IntMask_RcvUrg11IntMask_MSB 0x2B
-#define QIB_7322_IntMask_RcvUrg11IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg10IntMask_LSB 0x2A
-#define QIB_7322_IntMask_RcvUrg10IntMask_MSB 0x2A
-#define QIB_7322_IntMask_RcvUrg10IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg9IntMask_LSB 0x29
-#define QIB_7322_IntMask_RcvUrg9IntMask_MSB 0x29
-#define QIB_7322_IntMask_RcvUrg9IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg8IntMask_LSB 0x28
-#define QIB_7322_IntMask_RcvUrg8IntMask_MSB 0x28
-#define QIB_7322_IntMask_RcvUrg8IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg7IntMask_LSB 0x27
-#define QIB_7322_IntMask_RcvUrg7IntMask_MSB 0x27
-#define QIB_7322_IntMask_RcvUrg7IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg6IntMask_LSB 0x26
-#define QIB_7322_IntMask_RcvUrg6IntMask_MSB 0x26
-#define QIB_7322_IntMask_RcvUrg6IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg5IntMask_LSB 0x25
-#define QIB_7322_IntMask_RcvUrg5IntMask_MSB 0x25
-#define QIB_7322_IntMask_RcvUrg5IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg4IntMask_LSB 0x24
-#define QIB_7322_IntMask_RcvUrg4IntMask_MSB 0x24
-#define QIB_7322_IntMask_RcvUrg4IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg3IntMask_LSB 0x23
-#define QIB_7322_IntMask_RcvUrg3IntMask_MSB 0x23
-#define QIB_7322_IntMask_RcvUrg3IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg2IntMask_LSB 0x22
-#define QIB_7322_IntMask_RcvUrg2IntMask_MSB 0x22
-#define QIB_7322_IntMask_RcvUrg2IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg1IntMask_LSB 0x21
-#define QIB_7322_IntMask_RcvUrg1IntMask_MSB 0x21
-#define QIB_7322_IntMask_RcvUrg1IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvUrg0IntMask_LSB 0x20
-#define QIB_7322_IntMask_RcvUrg0IntMask_MSB 0x20
-#define QIB_7322_IntMask_RcvUrg0IntMask_RMASK 0x1
-#define QIB_7322_IntMask_ErrIntMask_1_LSB 0x1F
-#define QIB_7322_IntMask_ErrIntMask_1_MSB 0x1F
-#define QIB_7322_IntMask_ErrIntMask_1_RMASK 0x1
-#define QIB_7322_IntMask_ErrIntMask_0_LSB 0x1E
-#define QIB_7322_IntMask_ErrIntMask_0_MSB 0x1E
-#define QIB_7322_IntMask_ErrIntMask_0_RMASK 0x1
-#define QIB_7322_IntMask_ErrIntMask_LSB 0x1D
-#define QIB_7322_IntMask_ErrIntMask_MSB 0x1D
-#define QIB_7322_IntMask_ErrIntMask_RMASK 0x1
-#define QIB_7322_IntMask_AssertGPIOIntMask_LSB 0x1C
-#define QIB_7322_IntMask_AssertGPIOIntMask_MSB 0x1C
-#define QIB_7322_IntMask_AssertGPIOIntMask_RMASK 0x1
-#define QIB_7322_IntMask_SendDoneIntMask_1_LSB 0x19
-#define QIB_7322_IntMask_SendDoneIntMask_1_MSB 0x19
-#define QIB_7322_IntMask_SendDoneIntMask_1_RMASK 0x1
-#define QIB_7322_IntMask_SendDoneIntMask_0_LSB 0x18
-#define QIB_7322_IntMask_SendDoneIntMask_0_MSB 0x18
-#define QIB_7322_IntMask_SendDoneIntMask_0_RMASK 0x1
-#define QIB_7322_IntMask_SendBufAvailIntMask_LSB 0x17
-#define QIB_7322_IntMask_SendBufAvailIntMask_MSB 0x17
-#define QIB_7322_IntMask_SendBufAvailIntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail17IntMask_LSB 0x11
-#define QIB_7322_IntMask_RcvAvail17IntMask_MSB 0x11
-#define QIB_7322_IntMask_RcvAvail17IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail16IntMask_LSB 0x10
-#define QIB_7322_IntMask_RcvAvail16IntMask_MSB 0x10
-#define QIB_7322_IntMask_RcvAvail16IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail15IntMask_LSB 0xF
-#define QIB_7322_IntMask_RcvAvail15IntMask_MSB 0xF
-#define QIB_7322_IntMask_RcvAvail15IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail14IntMask_LSB 0xE
-#define QIB_7322_IntMask_RcvAvail14IntMask_MSB 0xE
-#define QIB_7322_IntMask_RcvAvail14IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail13IntMask_LSB 0xD
-#define QIB_7322_IntMask_RcvAvail13IntMask_MSB 0xD
-#define QIB_7322_IntMask_RcvAvail13IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail12IntMask_LSB 0xC
-#define QIB_7322_IntMask_RcvAvail12IntMask_MSB 0xC
-#define QIB_7322_IntMask_RcvAvail12IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail11IntMask_LSB 0xB
-#define QIB_7322_IntMask_RcvAvail11IntMask_MSB 0xB
-#define QIB_7322_IntMask_RcvAvail11IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail10IntMask_LSB 0xA
-#define QIB_7322_IntMask_RcvAvail10IntMask_MSB 0xA
-#define QIB_7322_IntMask_RcvAvail10IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail9IntMask_LSB 0x9
-#define QIB_7322_IntMask_RcvAvail9IntMask_MSB 0x9
-#define QIB_7322_IntMask_RcvAvail9IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail8IntMask_LSB 0x8
-#define QIB_7322_IntMask_RcvAvail8IntMask_MSB 0x8
-#define QIB_7322_IntMask_RcvAvail8IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail7IntMask_LSB 0x7
-#define QIB_7322_IntMask_RcvAvail7IntMask_MSB 0x7
-#define QIB_7322_IntMask_RcvAvail7IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail6IntMask_LSB 0x6
-#define QIB_7322_IntMask_RcvAvail6IntMask_MSB 0x6
-#define QIB_7322_IntMask_RcvAvail6IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail5IntMask_LSB 0x5
-#define QIB_7322_IntMask_RcvAvail5IntMask_MSB 0x5
-#define QIB_7322_IntMask_RcvAvail5IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail4IntMask_LSB 0x4
-#define QIB_7322_IntMask_RcvAvail4IntMask_MSB 0x4
-#define QIB_7322_IntMask_RcvAvail4IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail3IntMask_LSB 0x3
-#define QIB_7322_IntMask_RcvAvail3IntMask_MSB 0x3
-#define QIB_7322_IntMask_RcvAvail3IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail2IntMask_LSB 0x2
-#define QIB_7322_IntMask_RcvAvail2IntMask_MSB 0x2
-#define QIB_7322_IntMask_RcvAvail2IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail1IntMask_LSB 0x1
-#define QIB_7322_IntMask_RcvAvail1IntMask_MSB 0x1
-#define QIB_7322_IntMask_RcvAvail1IntMask_RMASK 0x1
-#define QIB_7322_IntMask_RcvAvail0IntMask_LSB 0x0
-#define QIB_7322_IntMask_RcvAvail0IntMask_MSB 0x0
-#define QIB_7322_IntMask_RcvAvail0IntMask_RMASK 0x1
-
-#define QIB_7322_IntStatus_OFFS 0x70
-#define QIB_7322_IntStatus_DEF 0x0000000000000000
-#define QIB_7322_IntStatus_SDmaInt_1_LSB 0x3F
-#define QIB_7322_IntStatus_SDmaInt_1_MSB 0x3F
-#define QIB_7322_IntStatus_SDmaInt_1_RMASK 0x1
-#define QIB_7322_IntStatus_SDmaInt_0_LSB 0x3E
-#define QIB_7322_IntStatus_SDmaInt_0_MSB 0x3E
-#define QIB_7322_IntStatus_SDmaInt_0_RMASK 0x1
-#define QIB_7322_IntStatus_SDmaProgressInt_1_LSB 0x3D
-#define QIB_7322_IntStatus_SDmaProgressInt_1_MSB 0x3D
-#define QIB_7322_IntStatus_SDmaProgressInt_1_RMASK 0x1
-#define QIB_7322_IntStatus_SDmaProgressInt_0_LSB 0x3C
-#define QIB_7322_IntStatus_SDmaProgressInt_0_MSB 0x3C
-#define QIB_7322_IntStatus_SDmaProgressInt_0_RMASK 0x1
-#define QIB_7322_IntStatus_SDmaIdleInt_1_LSB 0x3B
-#define QIB_7322_IntStatus_SDmaIdleInt_1_MSB 0x3B
-#define QIB_7322_IntStatus_SDmaIdleInt_1_RMASK 0x1
-#define QIB_7322_IntStatus_SDmaIdleInt_0_LSB 0x3A
-#define QIB_7322_IntStatus_SDmaIdleInt_0_MSB 0x3A
-#define QIB_7322_IntStatus_SDmaIdleInt_0_RMASK 0x1
-#define QIB_7322_IntStatus_SDmaCleanupDone_1_LSB 0x39
-#define QIB_7322_IntStatus_SDmaCleanupDone_1_MSB 0x39
-#define QIB_7322_IntStatus_SDmaCleanupDone_1_RMASK 0x1
-#define QIB_7322_IntStatus_SDmaCleanupDone_0_LSB 0x38
-#define QIB_7322_IntStatus_SDmaCleanupDone_0_MSB 0x38
-#define QIB_7322_IntStatus_SDmaCleanupDone_0_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg17_LSB 0x31
-#define QIB_7322_IntStatus_RcvUrg17_MSB 0x31
-#define QIB_7322_IntStatus_RcvUrg17_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg16_LSB 0x30
-#define QIB_7322_IntStatus_RcvUrg16_MSB 0x30
-#define QIB_7322_IntStatus_RcvUrg16_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg15_LSB 0x2F
-#define QIB_7322_IntStatus_RcvUrg15_MSB 0x2F
-#define QIB_7322_IntStatus_RcvUrg15_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg14_LSB 0x2E
-#define QIB_7322_IntStatus_RcvUrg14_MSB 0x2E
-#define QIB_7322_IntStatus_RcvUrg14_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg13_LSB 0x2D
-#define QIB_7322_IntStatus_RcvUrg13_MSB 0x2D
-#define QIB_7322_IntStatus_RcvUrg13_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg12_LSB 0x2C
-#define QIB_7322_IntStatus_RcvUrg12_MSB 0x2C
-#define QIB_7322_IntStatus_RcvUrg12_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg11_LSB 0x2B
-#define QIB_7322_IntStatus_RcvUrg11_MSB 0x2B
-#define QIB_7322_IntStatus_RcvUrg11_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg10_LSB 0x2A
-#define QIB_7322_IntStatus_RcvUrg10_MSB 0x2A
-#define QIB_7322_IntStatus_RcvUrg10_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg9_LSB 0x29
-#define QIB_7322_IntStatus_RcvUrg9_MSB 0x29
-#define QIB_7322_IntStatus_RcvUrg9_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg8_LSB 0x28
-#define QIB_7322_IntStatus_RcvUrg8_MSB 0x28
-#define QIB_7322_IntStatus_RcvUrg8_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg7_LSB 0x27
-#define QIB_7322_IntStatus_RcvUrg7_MSB 0x27
-#define QIB_7322_IntStatus_RcvUrg7_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg6_LSB 0x26
-#define QIB_7322_IntStatus_RcvUrg6_MSB 0x26
-#define QIB_7322_IntStatus_RcvUrg6_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg5_LSB 0x25
-#define QIB_7322_IntStatus_RcvUrg5_MSB 0x25
-#define QIB_7322_IntStatus_RcvUrg5_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg4_LSB 0x24
-#define QIB_7322_IntStatus_RcvUrg4_MSB 0x24
-#define QIB_7322_IntStatus_RcvUrg4_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg3_LSB 0x23
-#define QIB_7322_IntStatus_RcvUrg3_MSB 0x23
-#define QIB_7322_IntStatus_RcvUrg3_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg2_LSB 0x22
-#define QIB_7322_IntStatus_RcvUrg2_MSB 0x22
-#define QIB_7322_IntStatus_RcvUrg2_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg1_LSB 0x21
-#define QIB_7322_IntStatus_RcvUrg1_MSB 0x21
-#define QIB_7322_IntStatus_RcvUrg1_RMASK 0x1
-#define QIB_7322_IntStatus_RcvUrg0_LSB 0x20
-#define QIB_7322_IntStatus_RcvUrg0_MSB 0x20
-#define QIB_7322_IntStatus_RcvUrg0_RMASK 0x1
-#define QIB_7322_IntStatus_Err_1_LSB 0x1F
-#define QIB_7322_IntStatus_Err_1_MSB 0x1F
-#define QIB_7322_IntStatus_Err_1_RMASK 0x1
-#define QIB_7322_IntStatus_Err_0_LSB 0x1E
-#define QIB_7322_IntStatus_Err_0_MSB 0x1E
-#define QIB_7322_IntStatus_Err_0_RMASK 0x1
-#define QIB_7322_IntStatus_Err_LSB 0x1D
-#define QIB_7322_IntStatus_Err_MSB 0x1D
-#define QIB_7322_IntStatus_Err_RMASK 0x1
-#define QIB_7322_IntStatus_AssertGPIO_LSB 0x1C
-#define QIB_7322_IntStatus_AssertGPIO_MSB 0x1C
-#define QIB_7322_IntStatus_AssertGPIO_RMASK 0x1
-#define QIB_7322_IntStatus_SendDone_1_LSB 0x19
-#define QIB_7322_IntStatus_SendDone_1_MSB 0x19
-#define QIB_7322_IntStatus_SendDone_1_RMASK 0x1
-#define QIB_7322_IntStatus_SendDone_0_LSB 0x18
-#define QIB_7322_IntStatus_SendDone_0_MSB 0x18
-#define QIB_7322_IntStatus_SendDone_0_RMASK 0x1
-#define QIB_7322_IntStatus_SendBufAvail_LSB 0x17
-#define QIB_7322_IntStatus_SendBufAvail_MSB 0x17
-#define QIB_7322_IntStatus_SendBufAvail_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail17_LSB 0x11
-#define QIB_7322_IntStatus_RcvAvail17_MSB 0x11
-#define QIB_7322_IntStatus_RcvAvail17_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail16_LSB 0x10
-#define QIB_7322_IntStatus_RcvAvail16_MSB 0x10
-#define QIB_7322_IntStatus_RcvAvail16_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail15_LSB 0xF
-#define QIB_7322_IntStatus_RcvAvail15_MSB 0xF
-#define QIB_7322_IntStatus_RcvAvail15_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail14_LSB 0xE
-#define QIB_7322_IntStatus_RcvAvail14_MSB 0xE
-#define QIB_7322_IntStatus_RcvAvail14_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail13_LSB 0xD
-#define QIB_7322_IntStatus_RcvAvail13_MSB 0xD
-#define QIB_7322_IntStatus_RcvAvail13_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail12_LSB 0xC
-#define QIB_7322_IntStatus_RcvAvail12_MSB 0xC
-#define QIB_7322_IntStatus_RcvAvail12_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail11_LSB 0xB
-#define QIB_7322_IntStatus_RcvAvail11_MSB 0xB
-#define QIB_7322_IntStatus_RcvAvail11_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail10_LSB 0xA
-#define QIB_7322_IntStatus_RcvAvail10_MSB 0xA
-#define QIB_7322_IntStatus_RcvAvail10_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail9_LSB 0x9
-#define QIB_7322_IntStatus_RcvAvail9_MSB 0x9
-#define QIB_7322_IntStatus_RcvAvail9_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail8_LSB 0x8
-#define QIB_7322_IntStatus_RcvAvail8_MSB 0x8
-#define QIB_7322_IntStatus_RcvAvail8_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail7_LSB 0x7
-#define QIB_7322_IntStatus_RcvAvail7_MSB 0x7
-#define QIB_7322_IntStatus_RcvAvail7_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail6_LSB 0x6
-#define QIB_7322_IntStatus_RcvAvail6_MSB 0x6
-#define QIB_7322_IntStatus_RcvAvail6_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail5_LSB 0x5
-#define QIB_7322_IntStatus_RcvAvail5_MSB 0x5
-#define QIB_7322_IntStatus_RcvAvail5_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail4_LSB 0x4
-#define QIB_7322_IntStatus_RcvAvail4_MSB 0x4
-#define QIB_7322_IntStatus_RcvAvail4_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail3_LSB 0x3
-#define QIB_7322_IntStatus_RcvAvail3_MSB 0x3
-#define QIB_7322_IntStatus_RcvAvail3_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail2_LSB 0x2
-#define QIB_7322_IntStatus_RcvAvail2_MSB 0x2
-#define QIB_7322_IntStatus_RcvAvail2_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail1_LSB 0x1
-#define QIB_7322_IntStatus_RcvAvail1_MSB 0x1
-#define QIB_7322_IntStatus_RcvAvail1_RMASK 0x1
-#define QIB_7322_IntStatus_RcvAvail0_LSB 0x0
-#define QIB_7322_IntStatus_RcvAvail0_MSB 0x0
-#define QIB_7322_IntStatus_RcvAvail0_RMASK 0x1
-
-#define QIB_7322_IntClear_OFFS 0x78
-#define QIB_7322_IntClear_DEF 0x0000000000000000
-#define QIB_7322_IntClear_SDmaIntClear_1_LSB 0x3F
-#define QIB_7322_IntClear_SDmaIntClear_1_MSB 0x3F
-#define QIB_7322_IntClear_SDmaIntClear_1_RMASK 0x1
-#define QIB_7322_IntClear_SDmaIntClear_0_LSB 0x3E
-#define QIB_7322_IntClear_SDmaIntClear_0_MSB 0x3E
-#define QIB_7322_IntClear_SDmaIntClear_0_RMASK 0x1
-#define QIB_7322_IntClear_SDmaProgressIntClear_1_LSB 0x3D
-#define QIB_7322_IntClear_SDmaProgressIntClear_1_MSB 0x3D
-#define QIB_7322_IntClear_SDmaProgressIntClear_1_RMASK 0x1
-#define QIB_7322_IntClear_SDmaProgressIntClear_0_LSB 0x3C
-#define QIB_7322_IntClear_SDmaProgressIntClear_0_MSB 0x3C
-#define QIB_7322_IntClear_SDmaProgressIntClear_0_RMASK 0x1
-#define QIB_7322_IntClear_SDmaIdleIntClear_1_LSB 0x3B
-#define QIB_7322_IntClear_SDmaIdleIntClear_1_MSB 0x3B
-#define QIB_7322_IntClear_SDmaIdleIntClear_1_RMASK 0x1
-#define QIB_7322_IntClear_SDmaIdleIntClear_0_LSB 0x3A
-#define QIB_7322_IntClear_SDmaIdleIntClear_0_MSB 0x3A
-#define QIB_7322_IntClear_SDmaIdleIntClear_0_RMASK 0x1
-#define QIB_7322_IntClear_SDmaCleanupDoneClear_1_LSB 0x39
-#define QIB_7322_IntClear_SDmaCleanupDoneClear_1_MSB 0x39
-#define QIB_7322_IntClear_SDmaCleanupDoneClear_1_RMASK 0x1
-#define QIB_7322_IntClear_SDmaCleanupDoneClear_0_LSB 0x38
-#define QIB_7322_IntClear_SDmaCleanupDoneClear_0_MSB 0x38
-#define QIB_7322_IntClear_SDmaCleanupDoneClear_0_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg17IntClear_LSB 0x31
-#define QIB_7322_IntClear_RcvUrg17IntClear_MSB 0x31
-#define QIB_7322_IntClear_RcvUrg17IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg16IntClear_LSB 0x30
-#define QIB_7322_IntClear_RcvUrg16IntClear_MSB 0x30
-#define QIB_7322_IntClear_RcvUrg16IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg15IntClear_LSB 0x2F
-#define QIB_7322_IntClear_RcvUrg15IntClear_MSB 0x2F
-#define QIB_7322_IntClear_RcvUrg15IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg14IntClear_LSB 0x2E
-#define QIB_7322_IntClear_RcvUrg14IntClear_MSB 0x2E
-#define QIB_7322_IntClear_RcvUrg14IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg13IntClear_LSB 0x2D
-#define QIB_7322_IntClear_RcvUrg13IntClear_MSB 0x2D
-#define QIB_7322_IntClear_RcvUrg13IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg12IntClear_LSB 0x2C
-#define QIB_7322_IntClear_RcvUrg12IntClear_MSB 0x2C
-#define QIB_7322_IntClear_RcvUrg12IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg11IntClear_LSB 0x2B
-#define QIB_7322_IntClear_RcvUrg11IntClear_MSB 0x2B
-#define QIB_7322_IntClear_RcvUrg11IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg10IntClear_LSB 0x2A
-#define QIB_7322_IntClear_RcvUrg10IntClear_MSB 0x2A
-#define QIB_7322_IntClear_RcvUrg10IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg9IntClear_LSB 0x29
-#define QIB_7322_IntClear_RcvUrg9IntClear_MSB 0x29
-#define QIB_7322_IntClear_RcvUrg9IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg8IntClear_LSB 0x28
-#define QIB_7322_IntClear_RcvUrg8IntClear_MSB 0x28
-#define QIB_7322_IntClear_RcvUrg8IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg7IntClear_LSB 0x27
-#define QIB_7322_IntClear_RcvUrg7IntClear_MSB 0x27
-#define QIB_7322_IntClear_RcvUrg7IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg6IntClear_LSB 0x26
-#define QIB_7322_IntClear_RcvUrg6IntClear_MSB 0x26
-#define QIB_7322_IntClear_RcvUrg6IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg5IntClear_LSB 0x25
-#define QIB_7322_IntClear_RcvUrg5IntClear_MSB 0x25
-#define QIB_7322_IntClear_RcvUrg5IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg4IntClear_LSB 0x24
-#define QIB_7322_IntClear_RcvUrg4IntClear_MSB 0x24
-#define QIB_7322_IntClear_RcvUrg4IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg3IntClear_LSB 0x23
-#define QIB_7322_IntClear_RcvUrg3IntClear_MSB 0x23
-#define QIB_7322_IntClear_RcvUrg3IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg2IntClear_LSB 0x22
-#define QIB_7322_IntClear_RcvUrg2IntClear_MSB 0x22
-#define QIB_7322_IntClear_RcvUrg2IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg1IntClear_LSB 0x21
-#define QIB_7322_IntClear_RcvUrg1IntClear_MSB 0x21
-#define QIB_7322_IntClear_RcvUrg1IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvUrg0IntClear_LSB 0x20
-#define QIB_7322_IntClear_RcvUrg0IntClear_MSB 0x20
-#define QIB_7322_IntClear_RcvUrg0IntClear_RMASK 0x1
-#define QIB_7322_IntClear_ErrIntClear_1_LSB 0x1F
-#define QIB_7322_IntClear_ErrIntClear_1_MSB 0x1F
-#define QIB_7322_IntClear_ErrIntClear_1_RMASK 0x1
-#define QIB_7322_IntClear_ErrIntClear_0_LSB 0x1E
-#define QIB_7322_IntClear_ErrIntClear_0_MSB 0x1E
-#define QIB_7322_IntClear_ErrIntClear_0_RMASK 0x1
-#define QIB_7322_IntClear_ErrIntClear_LSB 0x1D
-#define QIB_7322_IntClear_ErrIntClear_MSB 0x1D
-#define QIB_7322_IntClear_ErrIntClear_RMASK 0x1
-#define QIB_7322_IntClear_AssertGPIOIntClear_LSB 0x1C
-#define QIB_7322_IntClear_AssertGPIOIntClear_MSB 0x1C
-#define QIB_7322_IntClear_AssertGPIOIntClear_RMASK 0x1
-#define QIB_7322_IntClear_SendDoneIntClear_1_LSB 0x19
-#define QIB_7322_IntClear_SendDoneIntClear_1_MSB 0x19
-#define QIB_7322_IntClear_SendDoneIntClear_1_RMASK 0x1
-#define QIB_7322_IntClear_SendDoneIntClear_0_LSB 0x18
-#define QIB_7322_IntClear_SendDoneIntClear_0_MSB 0x18
-#define QIB_7322_IntClear_SendDoneIntClear_0_RMASK 0x1
-#define QIB_7322_IntClear_SendBufAvailIntClear_LSB 0x17
-#define QIB_7322_IntClear_SendBufAvailIntClear_MSB 0x17
-#define QIB_7322_IntClear_SendBufAvailIntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail17IntClear_LSB 0x11
-#define QIB_7322_IntClear_RcvAvail17IntClear_MSB 0x11
-#define QIB_7322_IntClear_RcvAvail17IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail16IntClear_LSB 0x10
-#define QIB_7322_IntClear_RcvAvail16IntClear_MSB 0x10
-#define QIB_7322_IntClear_RcvAvail16IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail15IntClear_LSB 0xF
-#define QIB_7322_IntClear_RcvAvail15IntClear_MSB 0xF
-#define QIB_7322_IntClear_RcvAvail15IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail14IntClear_LSB 0xE
-#define QIB_7322_IntClear_RcvAvail14IntClear_MSB 0xE
-#define QIB_7322_IntClear_RcvAvail14IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail13IntClear_LSB 0xD
-#define QIB_7322_IntClear_RcvAvail13IntClear_MSB 0xD
-#define QIB_7322_IntClear_RcvAvail13IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail12IntClear_LSB 0xC
-#define QIB_7322_IntClear_RcvAvail12IntClear_MSB 0xC
-#define QIB_7322_IntClear_RcvAvail12IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail11IntClear_LSB 0xB
-#define QIB_7322_IntClear_RcvAvail11IntClear_MSB 0xB
-#define QIB_7322_IntClear_RcvAvail11IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail10IntClear_LSB 0xA
-#define QIB_7322_IntClear_RcvAvail10IntClear_MSB 0xA
-#define QIB_7322_IntClear_RcvAvail10IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail9IntClear_LSB 0x9
-#define QIB_7322_IntClear_RcvAvail9IntClear_MSB 0x9
-#define QIB_7322_IntClear_RcvAvail9IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail8IntClear_LSB 0x8
-#define QIB_7322_IntClear_RcvAvail8IntClear_MSB 0x8
-#define QIB_7322_IntClear_RcvAvail8IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail7IntClear_LSB 0x7
-#define QIB_7322_IntClear_RcvAvail7IntClear_MSB 0x7
-#define QIB_7322_IntClear_RcvAvail7IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail6IntClear_LSB 0x6
-#define QIB_7322_IntClear_RcvAvail6IntClear_MSB 0x6
-#define QIB_7322_IntClear_RcvAvail6IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail5IntClear_LSB 0x5
-#define QIB_7322_IntClear_RcvAvail5IntClear_MSB 0x5
-#define QIB_7322_IntClear_RcvAvail5IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail4IntClear_LSB 0x4
-#define QIB_7322_IntClear_RcvAvail4IntClear_MSB 0x4
-#define QIB_7322_IntClear_RcvAvail4IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail3IntClear_LSB 0x3
-#define QIB_7322_IntClear_RcvAvail3IntClear_MSB 0x3
-#define QIB_7322_IntClear_RcvAvail3IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail2IntClear_LSB 0x2
-#define QIB_7322_IntClear_RcvAvail2IntClear_MSB 0x2
-#define QIB_7322_IntClear_RcvAvail2IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail1IntClear_LSB 0x1
-#define QIB_7322_IntClear_RcvAvail1IntClear_MSB 0x1
-#define QIB_7322_IntClear_RcvAvail1IntClear_RMASK 0x1
-#define QIB_7322_IntClear_RcvAvail0IntClear_LSB 0x0
-#define QIB_7322_IntClear_RcvAvail0IntClear_MSB 0x0
-#define QIB_7322_IntClear_RcvAvail0IntClear_RMASK 0x1
-
-#define QIB_7322_ErrMask_OFFS 0x80
-#define QIB_7322_ErrMask_DEF 0x0000000000000000
-#define QIB_7322_ErrMask_ResetNegatedMask_LSB 0x3F
-#define QIB_7322_ErrMask_ResetNegatedMask_MSB 0x3F
-#define QIB_7322_ErrMask_ResetNegatedMask_RMASK 0x1
-#define QIB_7322_ErrMask_HardwareErrMask_LSB 0x3E
-#define QIB_7322_ErrMask_HardwareErrMask_MSB 0x3E
-#define QIB_7322_ErrMask_HardwareErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_InvalidAddrErrMask_LSB 0x3D
-#define QIB_7322_ErrMask_InvalidAddrErrMask_MSB 0x3D
-#define QIB_7322_ErrMask_InvalidAddrErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_SDmaVL15ErrMask_LSB 0x38
-#define QIB_7322_ErrMask_SDmaVL15ErrMask_MSB 0x38
-#define QIB_7322_ErrMask_SDmaVL15ErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_SBufVL15MisUseErrMask_LSB 0x37
-#define QIB_7322_ErrMask_SBufVL15MisUseErrMask_MSB 0x37
-#define QIB_7322_ErrMask_SBufVL15MisUseErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_InvalidEEPCmdMask_LSB 0x35
-#define QIB_7322_ErrMask_InvalidEEPCmdMask_MSB 0x35
-#define QIB_7322_ErrMask_InvalidEEPCmdMask_RMASK 0x1
-#define QIB_7322_ErrMask_RcvContextShareErrMask_LSB 0x34
-#define QIB_7322_ErrMask_RcvContextShareErrMask_MSB 0x34
-#define QIB_7322_ErrMask_RcvContextShareErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_SendVLMismatchErrMask_LSB 0x24
-#define QIB_7322_ErrMask_SendVLMismatchErrMask_MSB 0x24
-#define QIB_7322_ErrMask_SendVLMismatchErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_SendArmLaunchErrMask_LSB 0x23
-#define QIB_7322_ErrMask_SendArmLaunchErrMask_MSB 0x23
-#define QIB_7322_ErrMask_SendArmLaunchErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_SendSpecialTriggerErrMask_LSB 0x1B
-#define QIB_7322_ErrMask_SendSpecialTriggerErrMask_MSB 0x1B
-#define QIB_7322_ErrMask_SendSpecialTriggerErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_SDmaWrongPortErrMask_LSB 0x1A
-#define QIB_7322_ErrMask_SDmaWrongPortErrMask_MSB 0x1A
-#define QIB_7322_ErrMask_SDmaWrongPortErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_SDmaBufMaskDuplicateErrMask_LSB 0x19
-#define QIB_7322_ErrMask_SDmaBufMaskDuplicateErrMask_MSB 0x19
-#define QIB_7322_ErrMask_SDmaBufMaskDuplicateErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_RcvHdrFullErrMask_LSB 0xD
-#define QIB_7322_ErrMask_RcvHdrFullErrMask_MSB 0xD
-#define QIB_7322_ErrMask_RcvHdrFullErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_RcvEgrFullErrMask_LSB 0xC
-#define QIB_7322_ErrMask_RcvEgrFullErrMask_MSB 0xC
-#define QIB_7322_ErrMask_RcvEgrFullErrMask_RMASK 0x1
-
-#define QIB_7322_ErrStatus_OFFS 0x88
-#define QIB_7322_ErrStatus_DEF 0x0000000000000000
-#define QIB_7322_ErrStatus_ResetNegated_LSB 0x3F
-#define QIB_7322_ErrStatus_ResetNegated_MSB 0x3F
-#define QIB_7322_ErrStatus_ResetNegated_RMASK 0x1
-#define QIB_7322_ErrStatus_HardwareErr_LSB 0x3E
-#define QIB_7322_ErrStatus_HardwareErr_MSB 0x3E
-#define QIB_7322_ErrStatus_HardwareErr_RMASK 0x1
-#define QIB_7322_ErrStatus_InvalidAddrErr_LSB 0x3D
-#define QIB_7322_ErrStatus_InvalidAddrErr_MSB 0x3D
-#define QIB_7322_ErrStatus_InvalidAddrErr_RMASK 0x1
-#define QIB_7322_ErrStatus_SDmaVL15Err_LSB 0x38
-#define QIB_7322_ErrStatus_SDmaVL15Err_MSB 0x38
-#define QIB_7322_ErrStatus_SDmaVL15Err_RMASK 0x1
-#define QIB_7322_ErrStatus_SBufVL15MisUseErr_LSB 0x37
-#define QIB_7322_ErrStatus_SBufVL15MisUseErr_MSB 0x37
-#define QIB_7322_ErrStatus_SBufVL15MisUseErr_RMASK 0x1
-#define QIB_7322_ErrStatus_InvalidEEPCmdErr_LSB 0x35
-#define QIB_7322_ErrStatus_InvalidEEPCmdErr_MSB 0x35
-#define QIB_7322_ErrStatus_InvalidEEPCmdErr_RMASK 0x1
-#define QIB_7322_ErrStatus_RcvContextShareErr_LSB 0x34
-#define QIB_7322_ErrStatus_RcvContextShareErr_MSB 0x34
-#define QIB_7322_ErrStatus_RcvContextShareErr_RMASK 0x1
-#define QIB_7322_ErrStatus_SendVLMismatchErr_LSB 0x24
-#define QIB_7322_ErrStatus_SendVLMismatchErr_MSB 0x24
-#define QIB_7322_ErrStatus_SendVLMismatchErr_RMASK 0x1
-#define QIB_7322_ErrStatus_SendArmLaunchErr_LSB 0x23
-#define QIB_7322_ErrStatus_SendArmLaunchErr_MSB 0x23
-#define QIB_7322_ErrStatus_SendArmLaunchErr_RMASK 0x1
-#define QIB_7322_ErrStatus_SendSpecialTriggerErr_LSB 0x1B
-#define QIB_7322_ErrStatus_SendSpecialTriggerErr_MSB 0x1B
-#define QIB_7322_ErrStatus_SendSpecialTriggerErr_RMASK 0x1
-#define QIB_7322_ErrStatus_SDmaWrongPortErr_LSB 0x1A
-#define QIB_7322_ErrStatus_SDmaWrongPortErr_MSB 0x1A
-#define QIB_7322_ErrStatus_SDmaWrongPortErr_RMASK 0x1
-#define QIB_7322_ErrStatus_SDmaBufMaskDuplicateErr_LSB 0x19
-#define QIB_7322_ErrStatus_SDmaBufMaskDuplicateErr_MSB 0x19
-#define QIB_7322_ErrStatus_SDmaBufMaskDuplicateErr_RMASK 0x1
-#define QIB_7322_ErrStatus_RcvHdrFullErr_LSB 0xD
-#define QIB_7322_ErrStatus_RcvHdrFullErr_MSB 0xD
-#define QIB_7322_ErrStatus_RcvHdrFullErr_RMASK 0x1
-#define QIB_7322_ErrStatus_RcvEgrFullErr_LSB 0xC
-#define QIB_7322_ErrStatus_RcvEgrFullErr_MSB 0xC
-#define QIB_7322_ErrStatus_RcvEgrFullErr_RMASK 0x1
-
-#define QIB_7322_ErrClear_OFFS 0x90
-#define QIB_7322_ErrClear_DEF 0x0000000000000000
-#define QIB_7322_ErrClear_ResetNegatedClear_LSB 0x3F
-#define QIB_7322_ErrClear_ResetNegatedClear_MSB 0x3F
-#define QIB_7322_ErrClear_ResetNegatedClear_RMASK 0x1
-#define QIB_7322_ErrClear_HardwareErrClear_LSB 0x3E
-#define QIB_7322_ErrClear_HardwareErrClear_MSB 0x3E
-#define QIB_7322_ErrClear_HardwareErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_InvalidAddrErrClear_LSB 0x3D
-#define QIB_7322_ErrClear_InvalidAddrErrClear_MSB 0x3D
-#define QIB_7322_ErrClear_InvalidAddrErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_SDmaVL15ErrClear_LSB 0x38
-#define QIB_7322_ErrClear_SDmaVL15ErrClear_MSB 0x38
-#define QIB_7322_ErrClear_SDmaVL15ErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_SBufVL15MisUseErrClear_LSB 0x37
-#define QIB_7322_ErrClear_SBufVL15MisUseErrClear_MSB 0x37
-#define QIB_7322_ErrClear_SBufVL15MisUseErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_InvalidEEPCmdErrClear_LSB 0x35
-#define QIB_7322_ErrClear_InvalidEEPCmdErrClear_MSB 0x35
-#define QIB_7322_ErrClear_InvalidEEPCmdErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_RcvContextShareErrClear_LSB 0x34
-#define QIB_7322_ErrClear_RcvContextShareErrClear_MSB 0x34
-#define QIB_7322_ErrClear_RcvContextShareErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_SendVLMismatchErrMask_LSB 0x24
-#define QIB_7322_ErrClear_SendVLMismatchErrMask_MSB 0x24
-#define QIB_7322_ErrClear_SendVLMismatchErrMask_RMASK 0x1
-#define QIB_7322_ErrClear_SendArmLaunchErrClear_LSB 0x23
-#define QIB_7322_ErrClear_SendArmLaunchErrClear_MSB 0x23
-#define QIB_7322_ErrClear_SendArmLaunchErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_SendSpecialTriggerErrClear_LSB 0x1B
-#define QIB_7322_ErrClear_SendSpecialTriggerErrClear_MSB 0x1B
-#define QIB_7322_ErrClear_SendSpecialTriggerErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_SDmaWrongPortErrClear_LSB 0x1A
-#define QIB_7322_ErrClear_SDmaWrongPortErrClear_MSB 0x1A
-#define QIB_7322_ErrClear_SDmaWrongPortErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_SDmaBufMaskDuplicateErrClear_LSB 0x19
-#define QIB_7322_ErrClear_SDmaBufMaskDuplicateErrClear_MSB 0x19
-#define QIB_7322_ErrClear_SDmaBufMaskDuplicateErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_RcvHdrFullErrClear_LSB 0xD
-#define QIB_7322_ErrClear_RcvHdrFullErrClear_MSB 0xD
-#define QIB_7322_ErrClear_RcvHdrFullErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_RcvEgrFullErrClear_LSB 0xC
-#define QIB_7322_ErrClear_RcvEgrFullErrClear_MSB 0xC
-#define QIB_7322_ErrClear_RcvEgrFullErrClear_RMASK 0x1
-
-#define QIB_7322_HwErrMask_OFFS 0x98
-#define QIB_7322_HwErrMask_DEF 0x0000000000000000
-#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_1_LSB 0x3F
-#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_1_MSB 0x3F
-#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_1_RMASK 0x1
-#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_0_LSB 0x3E
-#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_0_MSB 0x3E
-#define QIB_7322_HwErrMask_IBSerdesPClkNotDetectMask_0_RMASK 0x1
-#define QIB_7322_HwErrMask_PCIESerdesPClkNotDetectMask_LSB 0x37
-#define QIB_7322_HwErrMask_PCIESerdesPClkNotDetectMask_MSB 0x37
-#define QIB_7322_HwErrMask_PCIESerdesPClkNotDetectMask_RMASK 0x1
-#define QIB_7322_HwErrMask_PowerOnBISTFailedMask_LSB 0x36
-#define QIB_7322_HwErrMask_PowerOnBISTFailedMask_MSB 0x36
-#define QIB_7322_HwErrMask_PowerOnBISTFailedMask_RMASK 0x1
-#define QIB_7322_HwErrMask_TempsenseTholdReachedMask_LSB 0x35
-#define QIB_7322_HwErrMask_TempsenseTholdReachedMask_MSB 0x35
-#define QIB_7322_HwErrMask_TempsenseTholdReachedMask_RMASK 0x1
-#define QIB_7322_HwErrMask_MemoryErrMask_LSB 0x30
-#define QIB_7322_HwErrMask_MemoryErrMask_MSB 0x30
-#define QIB_7322_HwErrMask_MemoryErrMask_RMASK 0x1
-#define QIB_7322_HwErrMask_pcie_phy_txParityErr_LSB 0x22
-#define QIB_7322_HwErrMask_pcie_phy_txParityErr_MSB 0x22
-#define QIB_7322_HwErrMask_pcie_phy_txParityErr_RMASK 0x1
-#define QIB_7322_HwErrMask_PCIeBusParityErrMask_LSB 0x1F
-#define QIB_7322_HwErrMask_PCIeBusParityErrMask_MSB 0x21
-#define QIB_7322_HwErrMask_PCIeBusParityErrMask_RMASK 0x7
-#define QIB_7322_HwErrMask_PcieCplTimeoutMask_LSB 0x1E
-#define QIB_7322_HwErrMask_PcieCplTimeoutMask_MSB 0x1E
-#define QIB_7322_HwErrMask_PcieCplTimeoutMask_RMASK 0x1
-#define QIB_7322_HwErrMask_PciePoisonedTLPMask_LSB 0x1D
-#define QIB_7322_HwErrMask_PciePoisonedTLPMask_MSB 0x1D
-#define QIB_7322_HwErrMask_PciePoisonedTLPMask_RMASK 0x1
-#define QIB_7322_HwErrMask_SDmaMemReadErrMask_1_LSB 0x1C
-#define QIB_7322_HwErrMask_SDmaMemReadErrMask_1_MSB 0x1C
-#define QIB_7322_HwErrMask_SDmaMemReadErrMask_1_RMASK 0x1
-#define QIB_7322_HwErrMask_SDmaMemReadErrMask_0_LSB 0x1B
-#define QIB_7322_HwErrMask_SDmaMemReadErrMask_0_MSB 0x1B
-#define QIB_7322_HwErrMask_SDmaMemReadErrMask_0_RMASK 0x1
-#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_1_LSB 0xF
-#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_1_MSB 0xF
-#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_1_RMASK 0x1
-#define QIB_7322_HwErrMask_IBCBusToSPCParityErrMask_1_LSB 0xE
-#define QIB_7322_HwErrMask_IBCBusToSPCParityErrMask_1_MSB 0xE
-#define QIB_7322_HwErrMask_IBCBusToSPCParityErrMask_1_RMASK 0x1
-#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_0_LSB 0xD
-#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_0_MSB 0xD
-#define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_0_RMASK 0x1
-#define QIB_7322_HwErrMask_statusValidNoEopMask_LSB 0xC
-#define QIB_7322_HwErrMask_statusValidNoEopMask_MSB 0xC
-#define QIB_7322_HwErrMask_statusValidNoEopMask_RMASK 0x1
-#define QIB_7322_HwErrMask_LATriggeredMask_LSB 0xB
-#define QIB_7322_HwErrMask_LATriggeredMask_MSB 0xB
-#define QIB_7322_HwErrMask_LATriggeredMask_RMASK 0x1
-
-#define QIB_7322_HwErrStatus_OFFS 0xA0
-#define QIB_7322_HwErrStatus_DEF 0x0000000000000000
-#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_1_LSB 0x3F
-#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_1_MSB 0x3F
-#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_1_RMASK 0x1
-#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_0_LSB 0x3E
-#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_0_MSB 0x3E
-#define QIB_7322_HwErrStatus_IBSerdesPClkNotDetect_0_RMASK 0x1
-#define QIB_7322_HwErrStatus_PCIESerdesPClkNotDetect_LSB 0x37
-#define QIB_7322_HwErrStatus_PCIESerdesPClkNotDetect_MSB 0x37
-#define QIB_7322_HwErrStatus_PCIESerdesPClkNotDetect_RMASK 0x1
-#define QIB_7322_HwErrStatus_PowerOnBISTFailed_LSB 0x36
-#define QIB_7322_HwErrStatus_PowerOnBISTFailed_MSB 0x36
-#define QIB_7322_HwErrStatus_PowerOnBISTFailed_RMASK 0x1
-#define QIB_7322_HwErrStatus_TempsenseTholdReached_LSB 0x35
-#define QIB_7322_HwErrStatus_TempsenseTholdReached_MSB 0x35
-#define QIB_7322_HwErrStatus_TempsenseTholdReached_RMASK 0x1
-#define QIB_7322_HwErrStatus_MemoryErr_LSB 0x30
-#define QIB_7322_HwErrStatus_MemoryErr_MSB 0x30
-#define QIB_7322_HwErrStatus_MemoryErr_RMASK 0x1
-#define QIB_7322_HwErrStatus_pcie_phy_txParityErr_LSB 0x22
-#define QIB_7322_HwErrStatus_pcie_phy_txParityErr_MSB 0x22
-#define QIB_7322_HwErrStatus_pcie_phy_txParityErr_RMASK 0x1
-#define QIB_7322_HwErrStatus_PCIeBusParity_LSB 0x1F
-#define QIB_7322_HwErrStatus_PCIeBusParity_MSB 0x21
-#define QIB_7322_HwErrStatus_PCIeBusParity_RMASK 0x7
-#define QIB_7322_HwErrStatus_PcieCplTimeout_LSB 0x1E
-#define QIB_7322_HwErrStatus_PcieCplTimeout_MSB 0x1E
-#define QIB_7322_HwErrStatus_PcieCplTimeout_RMASK 0x1
-#define QIB_7322_HwErrStatus_PciePoisonedTLP_LSB 0x1D
-#define QIB_7322_HwErrStatus_PciePoisonedTLP_MSB 0x1D
-#define QIB_7322_HwErrStatus_PciePoisonedTLP_RMASK 0x1
-#define QIB_7322_HwErrStatus_SDmaMemReadErr_1_LSB 0x1C
-#define QIB_7322_HwErrStatus_SDmaMemReadErr_1_MSB 0x1C
-#define QIB_7322_HwErrStatus_SDmaMemReadErr_1_RMASK 0x1
-#define QIB_7322_HwErrStatus_SDmaMemReadErr_0_LSB 0x1B
-#define QIB_7322_HwErrStatus_SDmaMemReadErr_0_MSB 0x1B
-#define QIB_7322_HwErrStatus_SDmaMemReadErr_0_RMASK 0x1
-#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_1_LSB 0xF
-#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_1_MSB 0xF
-#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_1_RMASK 0x1
-#define QIB_7322_HwErrStatus_IBCBusToSPCParityErr_1_LSB 0xE
-#define QIB_7322_HwErrStatus_IBCBusToSPCParityErr_1_MSB 0xE
-#define QIB_7322_HwErrStatus_IBCBusToSPCParityErr_1_RMASK 0x1
-#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_0_LSB 0xD
-#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_0_MSB 0xD
-#define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_0_RMASK 0x1
-#define QIB_7322_HwErrStatus_statusValidNoEop_LSB 0xC
-#define QIB_7322_HwErrStatus_statusValidNoEop_MSB 0xC
-#define QIB_7322_HwErrStatus_statusValidNoEop_RMASK 0x1
-#define QIB_7322_HwErrStatus_LATriggered_LSB 0xB
-#define QIB_7322_HwErrStatus_LATriggered_MSB 0xB
-#define QIB_7322_HwErrStatus_LATriggered_RMASK 0x1
-
-#define QIB_7322_HwErrClear_OFFS 0xA8
-#define QIB_7322_HwErrClear_DEF 0x0000000000000000
-#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_1_LSB 0x3F
-#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_1_MSB 0x3F
-#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_1_RMASK 0x1
-#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_0_LSB 0x3E
-#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_0_MSB 0x3E
-#define QIB_7322_HwErrClear_IBSerdesPClkNotDetectClear_0_RMASK 0x1
-#define QIB_7322_HwErrClear_PCIESerdesPClkNotDetectClear_LSB 0x37
-#define QIB_7322_HwErrClear_PCIESerdesPClkNotDetectClear_MSB 0x37
-#define QIB_7322_HwErrClear_PCIESerdesPClkNotDetectClear_RMASK 0x1
-#define QIB_7322_HwErrClear_PowerOnBISTFailedClear_LSB 0x36
-#define QIB_7322_HwErrClear_PowerOnBISTFailedClear_MSB 0x36
-#define QIB_7322_HwErrClear_PowerOnBISTFailedClear_RMASK 0x1
-#define QIB_7322_HwErrClear_TempsenseTholdReachedClear_LSB 0x35
-#define QIB_7322_HwErrClear_TempsenseTholdReachedClear_MSB 0x35
-#define QIB_7322_HwErrClear_TempsenseTholdReachedClear_RMASK 0x1
-#define QIB_7322_HwErrClear_MemoryErrClear_LSB 0x30
-#define QIB_7322_HwErrClear_MemoryErrClear_MSB 0x30
-#define QIB_7322_HwErrClear_MemoryErrClear_RMASK 0x1
-#define QIB_7322_HwErrClear_pcie_phy_txParityErr_LSB 0x22
-#define QIB_7322_HwErrClear_pcie_phy_txParityErr_MSB 0x22
-#define QIB_7322_HwErrClear_pcie_phy_txParityErr_RMASK 0x1
-#define QIB_7322_HwErrClear_PCIeBusParityClear_LSB 0x1F
-#define QIB_7322_HwErrClear_PCIeBusParityClear_MSB 0x21
-#define QIB_7322_HwErrClear_PCIeBusParityClear_RMASK 0x7
-#define QIB_7322_HwErrClear_PcieCplTimeoutClear_LSB 0x1E
-#define QIB_7322_HwErrClear_PcieCplTimeoutClear_MSB 0x1E
-#define QIB_7322_HwErrClear_PcieCplTimeoutClear_RMASK 0x1
-#define QIB_7322_HwErrClear_PciePoisonedTLPClear_LSB 0x1D
-#define QIB_7322_HwErrClear_PciePoisonedTLPClear_MSB 0x1D
-#define QIB_7322_HwErrClear_PciePoisonedTLPClear_RMASK 0x1
-#define QIB_7322_HwErrClear_SDmaMemReadErrClear_1_LSB 0x1C
-#define QIB_7322_HwErrClear_SDmaMemReadErrClear_1_MSB 0x1C
-#define QIB_7322_HwErrClear_SDmaMemReadErrClear_1_RMASK 0x1
-#define QIB_7322_HwErrClear_SDmaMemReadErrClear_0_LSB 0x1B
-#define QIB_7322_HwErrClear_SDmaMemReadErrClear_0_MSB 0x1B
-#define QIB_7322_HwErrClear_SDmaMemReadErrClear_0_RMASK 0x1
-#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_1_LSB 0xF
-#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_1_MSB 0xF
-#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_1_RMASK 0x1
-#define QIB_7322_HwErrClear_IBCBusToSPCParityErrClear_1_LSB 0xE
-#define QIB_7322_HwErrClear_IBCBusToSPCParityErrClear_1_MSB 0xE
-#define QIB_7322_HwErrClear_IBCBusToSPCParityErrClear_1_RMASK 0x1
-#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_0_LSB 0xD
-#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_0_MSB 0xD
-#define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_0_RMASK 0x1
-#define QIB_7322_HwErrClear_statusValidNoEopClear_LSB 0xC
-#define QIB_7322_HwErrClear_statusValidNoEopClear_MSB 0xC
-#define QIB_7322_HwErrClear_statusValidNoEopClear_RMASK 0x1
-#define QIB_7322_HwErrClear_LATriggeredClear_LSB 0xB
-#define QIB_7322_HwErrClear_LATriggeredClear_MSB 0xB
-#define QIB_7322_HwErrClear_LATriggeredClear_RMASK 0x1
-
-#define QIB_7322_HwDiagCtrl_OFFS 0xB0
-#define QIB_7322_HwDiagCtrl_DEF 0x0000000000000000
-#define QIB_7322_HwDiagCtrl_Diagnostic_LSB 0x3F
-#define QIB_7322_HwDiagCtrl_Diagnostic_MSB 0x3F
-#define QIB_7322_HwDiagCtrl_Diagnostic_RMASK 0x1
-#define QIB_7322_HwDiagCtrl_CounterWrEnable_LSB 0x3D
-#define QIB_7322_HwDiagCtrl_CounterWrEnable_MSB 0x3D
-#define QIB_7322_HwDiagCtrl_CounterWrEnable_RMASK 0x1
-#define QIB_7322_HwDiagCtrl_CounterDisable_LSB 0x3C
-#define QIB_7322_HwDiagCtrl_CounterDisable_MSB 0x3C
-#define QIB_7322_HwDiagCtrl_CounterDisable_RMASK 0x1
-#define QIB_7322_HwDiagCtrl_forcePCIeBusParity_LSB 0x1F
-#define QIB_7322_HwDiagCtrl_forcePCIeBusParity_MSB 0x22
-#define QIB_7322_HwDiagCtrl_forcePCIeBusParity_RMASK 0xF
-#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_1_LSB 0xF
-#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_1_MSB 0xF
-#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_1_RMASK 0x1
-#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_1_LSB 0xE
-#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_1_MSB 0xE
-#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_1_RMASK 0x1
-#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_0_LSB 0xD
-#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_0_MSB 0xD
-#define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_0_RMASK 0x1
-#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_0_LSB 0xC
-#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_0_MSB 0xC
-#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_0_RMASK 0x1
-
-#define QIB_7322_EXTStatus_OFFS 0xC0
-#define QIB_7322_EXTStatus_DEF 0x000000000000X000
-#define QIB_7322_EXTStatus_GPIOIn_LSB 0x30
-#define QIB_7322_EXTStatus_GPIOIn_MSB 0x3F
-#define QIB_7322_EXTStatus_GPIOIn_RMASK 0xFFFF
-#define QIB_7322_EXTStatus_MemBISTDisabled_LSB 0xF
-#define QIB_7322_EXTStatus_MemBISTDisabled_MSB 0xF
-#define QIB_7322_EXTStatus_MemBISTDisabled_RMASK 0x1
-#define QIB_7322_EXTStatus_MemBISTEndTest_LSB 0xE
-#define QIB_7322_EXTStatus_MemBISTEndTest_MSB 0xE
-#define QIB_7322_EXTStatus_MemBISTEndTest_RMASK 0x1
-
-#define QIB_7322_EXTCtrl_OFFS 0xC8
-#define QIB_7322_EXTCtrl_DEF 0x0000000000000000
-#define QIB_7322_EXTCtrl_GPIOOe_LSB 0x30
-#define QIB_7322_EXTCtrl_GPIOOe_MSB 0x3F
-#define QIB_7322_EXTCtrl_GPIOOe_RMASK 0xFFFF
-#define QIB_7322_EXTCtrl_GPIOInvert_LSB 0x20
-#define QIB_7322_EXTCtrl_GPIOInvert_MSB 0x2F
-#define QIB_7322_EXTCtrl_GPIOInvert_RMASK 0xFFFF
-#define QIB_7322_EXTCtrl_LEDPort1GreenOn_LSB 0x3
-#define QIB_7322_EXTCtrl_LEDPort1GreenOn_MSB 0x3
-#define QIB_7322_EXTCtrl_LEDPort1GreenOn_RMASK 0x1
-#define QIB_7322_EXTCtrl_LEDPort1YellowOn_LSB 0x2
-#define QIB_7322_EXTCtrl_LEDPort1YellowOn_MSB 0x2
-#define QIB_7322_EXTCtrl_LEDPort1YellowOn_RMASK 0x1
-#define QIB_7322_EXTCtrl_LEDPort0GreenOn_LSB 0x1
-#define QIB_7322_EXTCtrl_LEDPort0GreenOn_MSB 0x1
-#define QIB_7322_EXTCtrl_LEDPort0GreenOn_RMASK 0x1
-#define QIB_7322_EXTCtrl_LEDPort0YellowOn_LSB 0x0
-#define QIB_7322_EXTCtrl_LEDPort0YellowOn_MSB 0x0
-#define QIB_7322_EXTCtrl_LEDPort0YellowOn_RMASK 0x1
-
-#define QIB_7322_GPIOOut_OFFS 0xE0
-#define QIB_7322_GPIOOut_DEF 0x0000000000000000
-
-#define QIB_7322_GPIOMask_OFFS 0xE8
-#define QIB_7322_GPIOMask_DEF 0x0000000000000000
-
-#define QIB_7322_GPIOStatus_OFFS 0xF0
-#define QIB_7322_GPIOStatus_DEF 0x0000000000000000
-
-#define QIB_7322_GPIOClear_OFFS 0xF8
-#define QIB_7322_GPIOClear_DEF 0x0000000000000000
-
-#define QIB_7322_RcvCtrl_OFFS 0x100
-#define QIB_7322_RcvCtrl_DEF 0x0000000000000000
-#define QIB_7322_RcvCtrl_TidReDirect_LSB 0x30
-#define QIB_7322_RcvCtrl_TidReDirect_MSB 0x3F
-#define QIB_7322_RcvCtrl_TidReDirect_RMASK 0xFFFF
-#define QIB_7322_RcvCtrl_TailUpd_LSB 0x2F
-#define QIB_7322_RcvCtrl_TailUpd_MSB 0x2F
-#define QIB_7322_RcvCtrl_TailUpd_RMASK 0x1
-#define QIB_7322_RcvCtrl_XrcTypeCode_LSB 0x2C
-#define QIB_7322_RcvCtrl_XrcTypeCode_MSB 0x2E
-#define QIB_7322_RcvCtrl_XrcTypeCode_RMASK 0x7
-#define QIB_7322_RcvCtrl_TidFlowEnable_LSB 0x2B
-#define QIB_7322_RcvCtrl_TidFlowEnable_MSB 0x2B
-#define QIB_7322_RcvCtrl_TidFlowEnable_RMASK 0x1
-#define QIB_7322_RcvCtrl_ContextCfg_LSB 0x29
-#define QIB_7322_RcvCtrl_ContextCfg_MSB 0x2A
-#define QIB_7322_RcvCtrl_ContextCfg_RMASK 0x3
-#define QIB_7322_RcvCtrl_IntrAvail_LSB 0x14
-#define QIB_7322_RcvCtrl_IntrAvail_MSB 0x25
-#define QIB_7322_RcvCtrl_IntrAvail_RMASK 0x3FFFF
-#define QIB_7322_RcvCtrl_dontDropRHQFull_LSB 0x0
-#define QIB_7322_RcvCtrl_dontDropRHQFull_MSB 0x11
-#define QIB_7322_RcvCtrl_dontDropRHQFull_RMASK 0x3FFFF
-
-#define QIB_7322_RcvHdrSize_OFFS 0x110
-#define QIB_7322_RcvHdrSize_DEF 0x0000000000000000
-
-#define QIB_7322_RcvHdrCnt_OFFS 0x118
-#define QIB_7322_RcvHdrCnt_DEF 0x0000000000000000
-
-#define QIB_7322_RcvHdrEntSize_OFFS 0x120
-#define QIB_7322_RcvHdrEntSize_DEF 0x0000000000000000
-
-#define QIB_7322_RcvTIDBase_OFFS 0x128
-#define QIB_7322_RcvTIDBase_DEF 0x0000000000050000
-
-#define QIB_7322_RcvTIDCnt_OFFS 0x130
-#define QIB_7322_RcvTIDCnt_DEF 0x0000000000000200
-
-#define QIB_7322_RcvEgrBase_OFFS 0x138
-#define QIB_7322_RcvEgrBase_DEF 0x0000000000014000
-
-#define QIB_7322_RcvEgrCnt_OFFS 0x140
-#define QIB_7322_RcvEgrCnt_DEF 0x0000000000001000
-
-#define QIB_7322_RcvBufBase_OFFS 0x148
-#define QIB_7322_RcvBufBase_DEF 0x0000000000080000
-
-#define QIB_7322_RcvBufSize_OFFS 0x150
-#define QIB_7322_RcvBufSize_DEF 0x0000000000005000
-
-#define QIB_7322_RxIntMemBase_OFFS 0x158
-#define QIB_7322_RxIntMemBase_DEF 0x0000000000077000
-
-#define QIB_7322_RxIntMemSize_OFFS 0x160
-#define QIB_7322_RxIntMemSize_DEF 0x0000000000007000
-
-#define QIB_7322_feature_mask_OFFS 0x190
-#define QIB_7322_feature_mask_DEF 0x00000000000000XX
-
-#define QIB_7322_active_feature_mask_OFFS 0x198
-#define QIB_7322_active_feature_mask_DEF 0x00000000000000XX
-#define QIB_7322_active_feature_mask_Port1_QDR_Enabled_LSB 0x5
-#define QIB_7322_active_feature_mask_Port1_QDR_Enabled_MSB 0x5
-#define QIB_7322_active_feature_mask_Port1_QDR_Enabled_RMASK 0x1
-#define QIB_7322_active_feature_mask_Port1_DDR_Enabled_LSB 0x4
-#define QIB_7322_active_feature_mask_Port1_DDR_Enabled_MSB 0x4
-#define QIB_7322_active_feature_mask_Port1_DDR_Enabled_RMASK 0x1
-#define QIB_7322_active_feature_mask_Port1_SDR_Enabled_LSB 0x3
-#define QIB_7322_active_feature_mask_Port1_SDR_Enabled_MSB 0x3
-#define QIB_7322_active_feature_mask_Port1_SDR_Enabled_RMASK 0x1
-#define QIB_7322_active_feature_mask_Port0_QDR_Enabled_LSB 0x2
-#define QIB_7322_active_feature_mask_Port0_QDR_Enabled_MSB 0x2
-#define QIB_7322_active_feature_mask_Port0_QDR_Enabled_RMASK 0x1
-#define QIB_7322_active_feature_mask_Port0_DDR_Enabled_LSB 0x1
-#define QIB_7322_active_feature_mask_Port0_DDR_Enabled_MSB 0x1
-#define QIB_7322_active_feature_mask_Port0_DDR_Enabled_RMASK 0x1
-#define QIB_7322_active_feature_mask_Port0_SDR_Enabled_LSB 0x0
-#define QIB_7322_active_feature_mask_Port0_SDR_Enabled_MSB 0x0
-#define QIB_7322_active_feature_mask_Port0_SDR_Enabled_RMASK 0x1
-
-#define QIB_7322_SendCtrl_OFFS 0x1C0
-#define QIB_7322_SendCtrl_DEF 0x0000000000000000
-#define QIB_7322_SendCtrl_Disarm_LSB 0x1F
-#define QIB_7322_SendCtrl_Disarm_MSB 0x1F
-#define QIB_7322_SendCtrl_Disarm_RMASK 0x1
-#define QIB_7322_SendCtrl_SendBufAvailPad64Byte_LSB 0x1D
-#define QIB_7322_SendCtrl_SendBufAvailPad64Byte_MSB 0x1D
-#define QIB_7322_SendCtrl_SendBufAvailPad64Byte_RMASK 0x1
-#define QIB_7322_SendCtrl_AvailUpdThld_LSB 0x18
-#define QIB_7322_SendCtrl_AvailUpdThld_MSB 0x1C
-#define QIB_7322_SendCtrl_AvailUpdThld_RMASK 0x1F
-#define QIB_7322_SendCtrl_DisarmSendBuf_LSB 0x10
-#define QIB_7322_SendCtrl_DisarmSendBuf_MSB 0x17
-#define QIB_7322_SendCtrl_DisarmSendBuf_RMASK 0xFF
-#define QIB_7322_SendCtrl_SpecialTriggerEn_LSB 0x4
-#define QIB_7322_SendCtrl_SpecialTriggerEn_MSB 0x4
-#define QIB_7322_SendCtrl_SpecialTriggerEn_RMASK 0x1
-#define QIB_7322_SendCtrl_SendBufAvailUpd_LSB 0x2
-#define QIB_7322_SendCtrl_SendBufAvailUpd_MSB 0x2
-#define QIB_7322_SendCtrl_SendBufAvailUpd_RMASK 0x1
-#define QIB_7322_SendCtrl_SendIntBufAvail_LSB 0x1
-#define QIB_7322_SendCtrl_SendIntBufAvail_MSB 0x1
-#define QIB_7322_SendCtrl_SendIntBufAvail_RMASK 0x1
-
-#define QIB_7322_SendBufBase_OFFS 0x1C8
-#define QIB_7322_SendBufBase_DEF 0x0018000000100000
-#define QIB_7322_SendBufBase_BaseAddr_LargePIO_LSB 0x20
-#define QIB_7322_SendBufBase_BaseAddr_LargePIO_MSB 0x34
-#define QIB_7322_SendBufBase_BaseAddr_LargePIO_RMASK 0x1FFFFF
-#define QIB_7322_SendBufBase_BaseAddr_SmallPIO_LSB 0x0
-#define QIB_7322_SendBufBase_BaseAddr_SmallPIO_MSB 0x14
-#define QIB_7322_SendBufBase_BaseAddr_SmallPIO_RMASK 0x1FFFFF
-
-#define QIB_7322_SendBufSize_OFFS 0x1D0
-#define QIB_7322_SendBufSize_DEF 0x0000108000000880
-#define QIB_7322_SendBufSize_Size_LargePIO_LSB 0x20
-#define QIB_7322_SendBufSize_Size_LargePIO_MSB 0x2C
-#define QIB_7322_SendBufSize_Size_LargePIO_RMASK 0x1FFF
-#define QIB_7322_SendBufSize_Size_SmallPIO_LSB 0x0
-#define QIB_7322_SendBufSize_Size_SmallPIO_MSB 0xB
-#define QIB_7322_SendBufSize_Size_SmallPIO_RMASK 0xFFF
-
-#define QIB_7322_SendBufCnt_OFFS 0x1D8
-#define QIB_7322_SendBufCnt_DEF 0x0000002000000080
-#define QIB_7322_SendBufCnt_Num_LargeBuffers_LSB 0x20
-#define QIB_7322_SendBufCnt_Num_LargeBuffers_MSB 0x25
-#define QIB_7322_SendBufCnt_Num_LargeBuffers_RMASK 0x3F
-#define QIB_7322_SendBufCnt_Num_SmallBuffers_LSB 0x0
-#define QIB_7322_SendBufCnt_Num_SmallBuffers_MSB 0x8
-#define QIB_7322_SendBufCnt_Num_SmallBuffers_RMASK 0x1FF
-
-#define QIB_7322_SendBufAvailAddr_OFFS 0x1E0
-#define QIB_7322_SendBufAvailAddr_DEF 0x0000000000000000
-#define QIB_7322_SendBufAvailAddr_SendBufAvailAddr_LSB 0x6
-#define QIB_7322_SendBufAvailAddr_SendBufAvailAddr_MSB 0x27
-#define QIB_7322_SendBufAvailAddr_SendBufAvailAddr_RMASK 0x3FFFFFFFF
-
-#define QIB_7322_SendBufErr0_OFFS 0x240
-#define QIB_7322_SendBufErr0_DEF 0x0000000000000000
-#define QIB_7322_SendBufErr0_SendBufErr_63_0_LSB 0x0
-#define QIB_7322_SendBufErr0_SendBufErr_63_0_MSB 0x3F
-#define QIB_7322_SendBufErr0_SendBufErr_63_0_RMASK 0x0
-
-#define QIB_7322_AvailUpdCount_OFFS 0x268
-#define QIB_7322_AvailUpdCount_DEF 0x0000000000000000
-#define QIB_7322_AvailUpdCount_AvailUpdCount_LSB 0x0
-#define QIB_7322_AvailUpdCount_AvailUpdCount_MSB 0x4
-#define QIB_7322_AvailUpdCount_AvailUpdCount_RMASK 0x1F
-
-#define QIB_7322_RcvHdrAddr0_OFFS 0x280
-#define QIB_7322_RcvHdrAddr0_DEF 0x0000000000000000
-#define QIB_7322_RcvHdrAddr0_RcvHdrAddr_LSB 0x2
-#define QIB_7322_RcvHdrAddr0_RcvHdrAddr_MSB 0x27
-#define QIB_7322_RcvHdrAddr0_RcvHdrAddr_RMASK 0x3FFFFFFFFF
-
-#define QIB_7322_RcvHdrTailAddr0_OFFS 0x340
-#define QIB_7322_RcvHdrTailAddr0_DEF 0x0000000000000000
-#define QIB_7322_RcvHdrTailAddr0_RcvHdrTailAddr_LSB 0x2
-#define QIB_7322_RcvHdrTailAddr0_RcvHdrTailAddr_MSB 0x27
-#define QIB_7322_RcvHdrTailAddr0_RcvHdrTailAddr_RMASK 0x3FFFFFFFFF
-
-#define QIB_7322_ahb_access_ctrl_OFFS 0x460
-#define QIB_7322_ahb_access_ctrl_DEF 0x0000000000000000
-#define QIB_7322_ahb_access_ctrl_sw_sel_ahb_trgt_LSB 0x1
-#define QIB_7322_ahb_access_ctrl_sw_sel_ahb_trgt_MSB 0x2
-#define QIB_7322_ahb_access_ctrl_sw_sel_ahb_trgt_RMASK 0x3
-#define QIB_7322_ahb_access_ctrl_sw_ahb_sel_LSB 0x0
-#define QIB_7322_ahb_access_ctrl_sw_ahb_sel_MSB 0x0
-#define QIB_7322_ahb_access_ctrl_sw_ahb_sel_RMASK 0x1
-
-#define QIB_7322_ahb_transaction_reg_OFFS 0x468
-#define QIB_7322_ahb_transaction_reg_DEF 0x0000000080000000
-#define QIB_7322_ahb_transaction_reg_ahb_data_LSB 0x20
-#define QIB_7322_ahb_transaction_reg_ahb_data_MSB 0x3F
-#define QIB_7322_ahb_transaction_reg_ahb_data_RMASK 0xFFFFFFFF
-#define QIB_7322_ahb_transaction_reg_ahb_rdy_LSB 0x1F
-#define QIB_7322_ahb_transaction_reg_ahb_rdy_MSB 0x1F
-#define QIB_7322_ahb_transaction_reg_ahb_rdy_RMASK 0x1
-#define QIB_7322_ahb_transaction_reg_ahb_req_err_LSB 0x1E
-#define QIB_7322_ahb_transaction_reg_ahb_req_err_MSB 0x1E
-#define QIB_7322_ahb_transaction_reg_ahb_req_err_RMASK 0x1
-#define QIB_7322_ahb_transaction_reg_write_not_read_LSB 0x1B
-#define QIB_7322_ahb_transaction_reg_write_not_read_MSB 0x1B
-#define QIB_7322_ahb_transaction_reg_write_not_read_RMASK 0x1
-#define QIB_7322_ahb_transaction_reg_ahb_address_LSB 0x10
-#define QIB_7322_ahb_transaction_reg_ahb_address_MSB 0x1A
-#define QIB_7322_ahb_transaction_reg_ahb_address_RMASK 0x7FF
-
-#define QIB_7322_SPC_JTAG_ACCESS_REG_OFFS 0x470
-#define QIB_7322_SPC_JTAG_ACCESS_REG_DEF 0x0000000000000001
-#define QIB_7322_SPC_JTAG_ACCESS_REG_SPC_JTAG_ACCESS_EN_LSB 0xA
-#define QIB_7322_SPC_JTAG_ACCESS_REG_SPC_JTAG_ACCESS_EN_MSB 0xA
-#define QIB_7322_SPC_JTAG_ACCESS_REG_SPC_JTAG_ACCESS_EN_RMASK 0x1
-#define QIB_7322_SPC_JTAG_ACCESS_REG_bist_en_LSB 0x5
-#define QIB_7322_SPC_JTAG_ACCESS_REG_bist_en_MSB 0x9
-#define QIB_7322_SPC_JTAG_ACCESS_REG_bist_en_RMASK 0x1F
-#define QIB_7322_SPC_JTAG_ACCESS_REG_opcode_LSB 0x3
-#define QIB_7322_SPC_JTAG_ACCESS_REG_opcode_MSB 0x4
-#define QIB_7322_SPC_JTAG_ACCESS_REG_opcode_RMASK 0x3
-#define QIB_7322_SPC_JTAG_ACCESS_REG_tdi_LSB 0x2
-#define QIB_7322_SPC_JTAG_ACCESS_REG_tdi_MSB 0x2
-#define QIB_7322_SPC_JTAG_ACCESS_REG_tdi_RMASK 0x1
-#define QIB_7322_SPC_JTAG_ACCESS_REG_tdo_LSB 0x1
-#define QIB_7322_SPC_JTAG_ACCESS_REG_tdo_MSB 0x1
-#define QIB_7322_SPC_JTAG_ACCESS_REG_tdo_RMASK 0x1
-#define QIB_7322_SPC_JTAG_ACCESS_REG_rdy_LSB 0x0
-#define QIB_7322_SPC_JTAG_ACCESS_REG_rdy_MSB 0x0
-#define QIB_7322_SPC_JTAG_ACCESS_REG_rdy_RMASK 0x1
-
-#define QIB_7322_SendCheckMask0_OFFS 0x4C0
-#define QIB_7322_SendCheckMask0_DEF 0x0000000000000000
-#define QIB_7322_SendCheckMask0_SendCheckMask_63_32_LSB 0x0
-#define QIB_7322_SendCheckMask0_SendCheckMask_63_32_MSB 0x3F
-#define QIB_7322_SendCheckMask0_SendCheckMask_63_32_RMASK 0x0
-
-#define QIB_7322_SendGRHCheckMask0_OFFS 0x4E0
-#define QIB_7322_SendGRHCheckMask0_DEF 0x0000000000000000
-#define QIB_7322_SendGRHCheckMask0_SendGRHCheckMask_63_32_LSB 0x0
-#define QIB_7322_SendGRHCheckMask0_SendGRHCheckMask_63_32_MSB 0x3F
-#define QIB_7322_SendGRHCheckMask0_SendGRHCheckMask_63_32_RMASK 0x0
-
-#define QIB_7322_SendIBPacketMask0_OFFS 0x500
-#define QIB_7322_SendIBPacketMask0_DEF 0x0000000000000000
-#define QIB_7322_SendIBPacketMask0_SendIBPacketMask_63_32_LSB 0x0
-#define QIB_7322_SendIBPacketMask0_SendIBPacketMask_63_32_MSB 0x3F
-#define QIB_7322_SendIBPacketMask0_SendIBPacketMask_63_32_RMASK 0x0
-
-#define QIB_7322_IntRedirect0_OFFS 0x540
-#define QIB_7322_IntRedirect0_DEF 0x0000000000000000
-#define QIB_7322_IntRedirect0_vec11_LSB 0x37
-#define QIB_7322_IntRedirect0_vec11_MSB 0x3B
-#define QIB_7322_IntRedirect0_vec11_RMASK 0x1F
-#define QIB_7322_IntRedirect0_vec10_LSB 0x32
-#define QIB_7322_IntRedirect0_vec10_MSB 0x36
-#define QIB_7322_IntRedirect0_vec10_RMASK 0x1F
-#define QIB_7322_IntRedirect0_vec9_LSB 0x2D
-#define QIB_7322_IntRedirect0_vec9_MSB 0x31
-#define QIB_7322_IntRedirect0_vec9_RMASK 0x1F
-#define QIB_7322_IntRedirect0_vec8_LSB 0x28
-#define QIB_7322_IntRedirect0_vec8_MSB 0x2C
-#define QIB_7322_IntRedirect0_vec8_RMASK 0x1F
-#define QIB_7322_IntRedirect0_vec7_LSB 0x23
-#define QIB_7322_IntRedirect0_vec7_MSB 0x27
-#define QIB_7322_IntRedirect0_vec7_RMASK 0x1F
-#define QIB_7322_IntRedirect0_vec6_LSB 0x1E
-#define QIB_7322_IntRedirect0_vec6_MSB 0x22
-#define QIB_7322_IntRedirect0_vec6_RMASK 0x1F
-#define QIB_7322_IntRedirect0_vec5_LSB 0x19
-#define QIB_7322_IntRedirect0_vec5_MSB 0x1D
-#define QIB_7322_IntRedirect0_vec5_RMASK 0x1F
-#define QIB_7322_IntRedirect0_vec4_LSB 0x14
-#define QIB_7322_IntRedirect0_vec4_MSB 0x18
-#define QIB_7322_IntRedirect0_vec4_RMASK 0x1F
-#define QIB_7322_IntRedirect0_vec3_LSB 0xF
-#define QIB_7322_IntRedirect0_vec3_MSB 0x13
-#define QIB_7322_IntRedirect0_vec3_RMASK 0x1F
-#define QIB_7322_IntRedirect0_vec2_LSB 0xA
-#define QIB_7322_IntRedirect0_vec2_MSB 0xE
-#define QIB_7322_IntRedirect0_vec2_RMASK 0x1F
-#define QIB_7322_IntRedirect0_vec1_LSB 0x5
-#define QIB_7322_IntRedirect0_vec1_MSB 0x9
-#define QIB_7322_IntRedirect0_vec1_RMASK 0x1F
-#define QIB_7322_IntRedirect0_vec0_LSB 0x0
-#define QIB_7322_IntRedirect0_vec0_MSB 0x4
-#define QIB_7322_IntRedirect0_vec0_RMASK 0x1F
-
-#define QIB_7322_Int_Granted_OFFS 0x570
-#define QIB_7322_Int_Granted_DEF 0x0000000000000000
-
-#define QIB_7322_vec_clr_without_int_OFFS 0x578
-#define QIB_7322_vec_clr_without_int_DEF 0x0000000000000000
-
-#define QIB_7322_DCACtrlA_OFFS 0x580
-#define QIB_7322_DCACtrlA_DEF 0x0000000000000000
-#define QIB_7322_DCACtrlA_SendDMAHead1DCAEnable_LSB 0x4
-#define QIB_7322_DCACtrlA_SendDMAHead1DCAEnable_MSB 0x4
-#define QIB_7322_DCACtrlA_SendDMAHead1DCAEnable_RMASK 0x1
-#define QIB_7322_DCACtrlA_SendDMAHead0DCAEnable_LSB 0x3
-#define QIB_7322_DCACtrlA_SendDMAHead0DCAEnable_MSB 0x3
-#define QIB_7322_DCACtrlA_SendDMAHead0DCAEnable_RMASK 0x1
-#define QIB_7322_DCACtrlA_RcvTailUpdDCAEnable_LSB 0x2
-#define QIB_7322_DCACtrlA_RcvTailUpdDCAEnable_MSB 0x2
-#define QIB_7322_DCACtrlA_RcvTailUpdDCAEnable_RMASK 0x1
-#define QIB_7322_DCACtrlA_EagerDCAEnable_LSB 0x1
-#define QIB_7322_DCACtrlA_EagerDCAEnable_MSB 0x1
-#define QIB_7322_DCACtrlA_EagerDCAEnable_RMASK 0x1
-#define QIB_7322_DCACtrlA_RcvHdrqDCAEnable_LSB 0x0
-#define QIB_7322_DCACtrlA_RcvHdrqDCAEnable_MSB 0x0
-#define QIB_7322_DCACtrlA_RcvHdrqDCAEnable_RMASK 0x1
-
-#define QIB_7322_DCACtrlB_OFFS 0x588
-#define QIB_7322_DCACtrlB_DEF 0x0000000000000000
-#define QIB_7322_DCACtrlB_RcvHdrq3DCAXfrCnt_LSB 0x36
-#define QIB_7322_DCACtrlB_RcvHdrq3DCAXfrCnt_MSB 0x3B
-#define QIB_7322_DCACtrlB_RcvHdrq3DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlB_RcvHdrq3DCAOPH_LSB 0x2E
-#define QIB_7322_DCACtrlB_RcvHdrq3DCAOPH_MSB 0x35
-#define QIB_7322_DCACtrlB_RcvHdrq3DCAOPH_RMASK 0xFF
-#define QIB_7322_DCACtrlB_RcvHdrq2DCAXfrCnt_LSB 0x28
-#define QIB_7322_DCACtrlB_RcvHdrq2DCAXfrCnt_MSB 0x2D
-#define QIB_7322_DCACtrlB_RcvHdrq2DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlB_RcvHdrq2DCAOPH_LSB 0x20
-#define QIB_7322_DCACtrlB_RcvHdrq2DCAOPH_MSB 0x27
-#define QIB_7322_DCACtrlB_RcvHdrq2DCAOPH_RMASK 0xFF
-#define QIB_7322_DCACtrlB_RcvHdrq1DCAXfrCnt_LSB 0x16
-#define QIB_7322_DCACtrlB_RcvHdrq1DCAXfrCnt_MSB 0x1B
-#define QIB_7322_DCACtrlB_RcvHdrq1DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlB_RcvHdrq1DCAOPH_LSB 0xE
-#define QIB_7322_DCACtrlB_RcvHdrq1DCAOPH_MSB 0x15
-#define QIB_7322_DCACtrlB_RcvHdrq1DCAOPH_RMASK 0xFF
-#define QIB_7322_DCACtrlB_RcvHdrq0DCAXfrCnt_LSB 0x8
-#define QIB_7322_DCACtrlB_RcvHdrq0DCAXfrCnt_MSB 0xD
-#define QIB_7322_DCACtrlB_RcvHdrq0DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlB_RcvHdrq0DCAOPH_LSB 0x0
-#define QIB_7322_DCACtrlB_RcvHdrq0DCAOPH_MSB 0x7
-#define QIB_7322_DCACtrlB_RcvHdrq0DCAOPH_RMASK 0xFF
-
-#define QIB_7322_DCACtrlC_OFFS 0x590
-#define QIB_7322_DCACtrlC_DEF 0x0000000000000000
-#define QIB_7322_DCACtrlC_RcvHdrq7DCAXfrCnt_LSB 0x36
-#define QIB_7322_DCACtrlC_RcvHdrq7DCAXfrCnt_MSB 0x3B
-#define QIB_7322_DCACtrlC_RcvHdrq7DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlC_RcvHdrq7DCAOPH_LSB 0x2E
-#define QIB_7322_DCACtrlC_RcvHdrq7DCAOPH_MSB 0x35
-#define QIB_7322_DCACtrlC_RcvHdrq7DCAOPH_RMASK 0xFF
-#define QIB_7322_DCACtrlC_RcvHdrq6DCAXfrCnt_LSB 0x28
-#define QIB_7322_DCACtrlC_RcvHdrq6DCAXfrCnt_MSB 0x2D
-#define QIB_7322_DCACtrlC_RcvHdrq6DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlC_RcvHdrq6DCAOPH_LSB 0x20
-#define QIB_7322_DCACtrlC_RcvHdrq6DCAOPH_MSB 0x27
-#define QIB_7322_DCACtrlC_RcvHdrq6DCAOPH_RMASK 0xFF
-#define QIB_7322_DCACtrlC_RcvHdrq5DCAXfrCnt_LSB 0x16
-#define QIB_7322_DCACtrlC_RcvHdrq5DCAXfrCnt_MSB 0x1B
-#define QIB_7322_DCACtrlC_RcvHdrq5DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlC_RcvHdrq5DCAOPH_LSB 0xE
-#define QIB_7322_DCACtrlC_RcvHdrq5DCAOPH_MSB 0x15
-#define QIB_7322_DCACtrlC_RcvHdrq5DCAOPH_RMASK 0xFF
-#define QIB_7322_DCACtrlC_RcvHdrq4DCAXfrCnt_LSB 0x8
-#define QIB_7322_DCACtrlC_RcvHdrq4DCAXfrCnt_MSB 0xD
-#define QIB_7322_DCACtrlC_RcvHdrq4DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlC_RcvHdrq4DCAOPH_LSB 0x0
-#define QIB_7322_DCACtrlC_RcvHdrq4DCAOPH_MSB 0x7
-#define QIB_7322_DCACtrlC_RcvHdrq4DCAOPH_RMASK 0xFF
-
-#define QIB_7322_DCACtrlD_OFFS 0x598
-#define QIB_7322_DCACtrlD_DEF 0x0000000000000000
-#define QIB_7322_DCACtrlD_RcvHdrq11DCAXfrCnt_LSB 0x36
-#define QIB_7322_DCACtrlD_RcvHdrq11DCAXfrCnt_MSB 0x3B
-#define QIB_7322_DCACtrlD_RcvHdrq11DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlD_RcvHdrq11DCAOPH_LSB 0x2E
-#define QIB_7322_DCACtrlD_RcvHdrq11DCAOPH_MSB 0x35
-#define QIB_7322_DCACtrlD_RcvHdrq11DCAOPH_RMASK 0xFF
-#define QIB_7322_DCACtrlD_RcvHdrq10DCAXfrCnt_LSB 0x28
-#define QIB_7322_DCACtrlD_RcvHdrq10DCAXfrCnt_MSB 0x2D
-#define QIB_7322_DCACtrlD_RcvHdrq10DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlD_RcvHdrq10DCAOPH_LSB 0x20
-#define QIB_7322_DCACtrlD_RcvHdrq10DCAOPH_MSB 0x27
-#define QIB_7322_DCACtrlD_RcvHdrq10DCAOPH_RMASK 0xFF
-#define QIB_7322_DCACtrlD_RcvHdrq9DCAXfrCnt_LSB 0x16
-#define QIB_7322_DCACtrlD_RcvHdrq9DCAXfrCnt_MSB 0x1B
-#define QIB_7322_DCACtrlD_RcvHdrq9DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlD_RcvHdrq9DCAOPH_LSB 0xE
-#define QIB_7322_DCACtrlD_RcvHdrq9DCAOPH_MSB 0x15
-#define QIB_7322_DCACtrlD_RcvHdrq9DCAOPH_RMASK 0xFF
-#define QIB_7322_DCACtrlD_RcvHdrq8DCAXfrCnt_LSB 0x8
-#define QIB_7322_DCACtrlD_RcvHdrq8DCAXfrCnt_MSB 0xD
-#define QIB_7322_DCACtrlD_RcvHdrq8DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlD_RcvHdrq8DCAOPH_LSB 0x0
-#define QIB_7322_DCACtrlD_RcvHdrq8DCAOPH_MSB 0x7
-#define QIB_7322_DCACtrlD_RcvHdrq8DCAOPH_RMASK 0xFF
-
-#define QIB_7322_DCACtrlE_OFFS 0x5A0
-#define QIB_7322_DCACtrlE_DEF 0x0000000000000000
-#define QIB_7322_DCACtrlE_RcvHdrq15DCAXfrCnt_LSB 0x36
-#define QIB_7322_DCACtrlE_RcvHdrq15DCAXfrCnt_MSB 0x3B
-#define QIB_7322_DCACtrlE_RcvHdrq15DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlE_RcvHdrq15DCAOPH_LSB 0x2E
-#define QIB_7322_DCACtrlE_RcvHdrq15DCAOPH_MSB 0x35
-#define QIB_7322_DCACtrlE_RcvHdrq15DCAOPH_RMASK 0xFF
-#define QIB_7322_DCACtrlE_RcvHdrq14DCAXfrCnt_LSB 0x28
-#define QIB_7322_DCACtrlE_RcvHdrq14DCAXfrCnt_MSB 0x2D
-#define QIB_7322_DCACtrlE_RcvHdrq14DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlE_RcvHdrq14DCAOPH_LSB 0x20
-#define QIB_7322_DCACtrlE_RcvHdrq14DCAOPH_MSB 0x27
-#define QIB_7322_DCACtrlE_RcvHdrq14DCAOPH_RMASK 0xFF
-#define QIB_7322_DCACtrlE_RcvHdrq13DCAXfrCnt_LSB 0x16
-#define QIB_7322_DCACtrlE_RcvHdrq13DCAXfrCnt_MSB 0x1B
-#define QIB_7322_DCACtrlE_RcvHdrq13DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlE_RcvHdrq13DCAOPH_LSB 0xE
-#define QIB_7322_DCACtrlE_RcvHdrq13DCAOPH_MSB 0x15
-#define QIB_7322_DCACtrlE_RcvHdrq13DCAOPH_RMASK 0xFF
-#define QIB_7322_DCACtrlE_RcvHdrq12DCAXfrCnt_LSB 0x8
-#define QIB_7322_DCACtrlE_RcvHdrq12DCAXfrCnt_MSB 0xD
-#define QIB_7322_DCACtrlE_RcvHdrq12DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlE_RcvHdrq12DCAOPH_LSB 0x0
-#define QIB_7322_DCACtrlE_RcvHdrq12DCAOPH_MSB 0x7
-#define QIB_7322_DCACtrlE_RcvHdrq12DCAOPH_RMASK 0xFF
-
-#define QIB_7322_DCACtrlF_OFFS 0x5A8
-#define QIB_7322_DCACtrlF_DEF 0x0000000000000000
-#define QIB_7322_DCACtrlF_SendDma1DCAOPH_LSB 0x28
-#define QIB_7322_DCACtrlF_SendDma1DCAOPH_MSB 0x2F
-#define QIB_7322_DCACtrlF_SendDma1DCAOPH_RMASK 0xFF
-#define QIB_7322_DCACtrlF_SendDma0DCAOPH_LSB 0x20
-#define QIB_7322_DCACtrlF_SendDma0DCAOPH_MSB 0x27
-#define QIB_7322_DCACtrlF_SendDma0DCAOPH_RMASK 0xFF
-#define QIB_7322_DCACtrlF_RcvHdrq17DCAXfrCnt_LSB 0x16
-#define QIB_7322_DCACtrlF_RcvHdrq17DCAXfrCnt_MSB 0x1B
-#define QIB_7322_DCACtrlF_RcvHdrq17DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlF_RcvHdrq17DCAOPH_LSB 0xE
-#define QIB_7322_DCACtrlF_RcvHdrq17DCAOPH_MSB 0x15
-#define QIB_7322_DCACtrlF_RcvHdrq17DCAOPH_RMASK 0xFF
-#define QIB_7322_DCACtrlF_RcvHdrq16DCAXfrCnt_LSB 0x8
-#define QIB_7322_DCACtrlF_RcvHdrq16DCAXfrCnt_MSB 0xD
-#define QIB_7322_DCACtrlF_RcvHdrq16DCAXfrCnt_RMASK 0x3F
-#define QIB_7322_DCACtrlF_RcvHdrq16DCAOPH_LSB 0x0
-#define QIB_7322_DCACtrlF_RcvHdrq16DCAOPH_MSB 0x7
-#define QIB_7322_DCACtrlF_RcvHdrq16DCAOPH_RMASK 0xFF
-
-#define QIB_7322_RcvAvailTimeOut0_OFFS 0xC00
-#define QIB_7322_RcvAvailTimeOut0_DEF 0x0000000000000000
-#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOCount_LSB 0x10
-#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOCount_MSB 0x1F
-#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOCount_RMASK 0xFFFF
-#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOReload_LSB 0x0
-#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOReload_MSB 0xF
-#define QIB_7322_RcvAvailTimeOut0_RcvAvailTOReload_RMASK 0xFFFF
-
-#define QIB_7322_CntrRegBase_0_OFFS 0x1028
-#define QIB_7322_CntrRegBase_0_DEF 0x0000000000012000
-
-#define QIB_7322_ErrMask_0_OFFS 0x1080
-#define QIB_7322_ErrMask_0_DEF 0x0000000000000000
-#define QIB_7322_ErrMask_0_IBStatusChangedMask_LSB 0x3A
-#define QIB_7322_ErrMask_0_IBStatusChangedMask_MSB 0x3A
-#define QIB_7322_ErrMask_0_IBStatusChangedMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SHeadersErrMask_LSB 0x39
-#define QIB_7322_ErrMask_0_SHeadersErrMask_MSB 0x39
-#define QIB_7322_ErrMask_0_SHeadersErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_VL15BufMisuseErrMask_LSB 0x36
-#define QIB_7322_ErrMask_0_VL15BufMisuseErrMask_MSB 0x36
-#define QIB_7322_ErrMask_0_VL15BufMisuseErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SDmaHaltErrMask_LSB 0x31
-#define QIB_7322_ErrMask_0_SDmaHaltErrMask_MSB 0x31
-#define QIB_7322_ErrMask_0_SDmaHaltErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SDmaDescAddrMisalignErrMask_LSB 0x30
-#define QIB_7322_ErrMask_0_SDmaDescAddrMisalignErrMask_MSB 0x30
-#define QIB_7322_ErrMask_0_SDmaDescAddrMisalignErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SDmaUnexpDataErrMask_LSB 0x2F
-#define QIB_7322_ErrMask_0_SDmaUnexpDataErrMask_MSB 0x2F
-#define QIB_7322_ErrMask_0_SDmaUnexpDataErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SDmaMissingDwErrMask_LSB 0x2E
-#define QIB_7322_ErrMask_0_SDmaMissingDwErrMask_MSB 0x2E
-#define QIB_7322_ErrMask_0_SDmaMissingDwErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SDmaDwEnErrMask_LSB 0x2D
-#define QIB_7322_ErrMask_0_SDmaDwEnErrMask_MSB 0x2D
-#define QIB_7322_ErrMask_0_SDmaDwEnErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SDmaRpyTagErrMask_LSB 0x2C
-#define QIB_7322_ErrMask_0_SDmaRpyTagErrMask_MSB 0x2C
-#define QIB_7322_ErrMask_0_SDmaRpyTagErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SDma1stDescErrMask_LSB 0x2B
-#define QIB_7322_ErrMask_0_SDma1stDescErrMask_MSB 0x2B
-#define QIB_7322_ErrMask_0_SDma1stDescErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SDmaBaseErrMask_LSB 0x2A
-#define QIB_7322_ErrMask_0_SDmaBaseErrMask_MSB 0x2A
-#define QIB_7322_ErrMask_0_SDmaBaseErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SDmaTailOutOfBoundErrMask_LSB 0x29
-#define QIB_7322_ErrMask_0_SDmaTailOutOfBoundErrMask_MSB 0x29
-#define QIB_7322_ErrMask_0_SDmaTailOutOfBoundErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SDmaOutOfBoundErrMask_LSB 0x28
-#define QIB_7322_ErrMask_0_SDmaOutOfBoundErrMask_MSB 0x28
-#define QIB_7322_ErrMask_0_SDmaOutOfBoundErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SDmaGenMismatchErrMask_LSB 0x27
-#define QIB_7322_ErrMask_0_SDmaGenMismatchErrMask_MSB 0x27
-#define QIB_7322_ErrMask_0_SDmaGenMismatchErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SendBufMisuseErrMask_LSB 0x26
-#define QIB_7322_ErrMask_0_SendBufMisuseErrMask_MSB 0x26
-#define QIB_7322_ErrMask_0_SendBufMisuseErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SendUnsupportedVLErrMask_LSB 0x25
-#define QIB_7322_ErrMask_0_SendUnsupportedVLErrMask_MSB 0x25
-#define QIB_7322_ErrMask_0_SendUnsupportedVLErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SendUnexpectedPktNumErrMask_LSB 0x24
-#define QIB_7322_ErrMask_0_SendUnexpectedPktNumErrMask_MSB 0x24
-#define QIB_7322_ErrMask_0_SendUnexpectedPktNumErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SendDroppedDataPktErrMask_LSB 0x22
-#define QIB_7322_ErrMask_0_SendDroppedDataPktErrMask_MSB 0x22
-#define QIB_7322_ErrMask_0_SendDroppedDataPktErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SendDroppedSmpPktErrMask_LSB 0x21
-#define QIB_7322_ErrMask_0_SendDroppedSmpPktErrMask_MSB 0x21
-#define QIB_7322_ErrMask_0_SendDroppedSmpPktErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SendPktLenErrMask_LSB 0x20
-#define QIB_7322_ErrMask_0_SendPktLenErrMask_MSB 0x20
-#define QIB_7322_ErrMask_0_SendPktLenErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SendUnderRunErrMask_LSB 0x1F
-#define QIB_7322_ErrMask_0_SendUnderRunErrMask_MSB 0x1F
-#define QIB_7322_ErrMask_0_SendUnderRunErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SendMaxPktLenErrMask_LSB 0x1E
-#define QIB_7322_ErrMask_0_SendMaxPktLenErrMask_MSB 0x1E
-#define QIB_7322_ErrMask_0_SendMaxPktLenErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_SendMinPktLenErrMask_LSB 0x1D
-#define QIB_7322_ErrMask_0_SendMinPktLenErrMask_MSB 0x1D
-#define QIB_7322_ErrMask_0_SendMinPktLenErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvIBLostLinkErrMask_LSB 0x11
-#define QIB_7322_ErrMask_0_RcvIBLostLinkErrMask_MSB 0x11
-#define QIB_7322_ErrMask_0_RcvIBLostLinkErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvHdrErrMask_LSB 0x10
-#define QIB_7322_ErrMask_0_RcvHdrErrMask_MSB 0x10
-#define QIB_7322_ErrMask_0_RcvHdrErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvHdrLenErrMask_LSB 0xF
-#define QIB_7322_ErrMask_0_RcvHdrLenErrMask_MSB 0xF
-#define QIB_7322_ErrMask_0_RcvHdrLenErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvBadTidErrMask_LSB 0xE
-#define QIB_7322_ErrMask_0_RcvBadTidErrMask_MSB 0xE
-#define QIB_7322_ErrMask_0_RcvBadTidErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvBadVersionErrMask_LSB 0xB
-#define QIB_7322_ErrMask_0_RcvBadVersionErrMask_MSB 0xB
-#define QIB_7322_ErrMask_0_RcvBadVersionErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvIBFlowErrMask_LSB 0xA
-#define QIB_7322_ErrMask_0_RcvIBFlowErrMask_MSB 0xA
-#define QIB_7322_ErrMask_0_RcvIBFlowErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvEBPErrMask_LSB 0x9
-#define QIB_7322_ErrMask_0_RcvEBPErrMask_MSB 0x9
-#define QIB_7322_ErrMask_0_RcvEBPErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvUnsupportedVLErrMask_LSB 0x8
-#define QIB_7322_ErrMask_0_RcvUnsupportedVLErrMask_MSB 0x8
-#define QIB_7322_ErrMask_0_RcvUnsupportedVLErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvUnexpectedCharErrMask_LSB 0x7
-#define QIB_7322_ErrMask_0_RcvUnexpectedCharErrMask_MSB 0x7
-#define QIB_7322_ErrMask_0_RcvUnexpectedCharErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvShortPktLenErrMask_LSB 0x6
-#define QIB_7322_ErrMask_0_RcvShortPktLenErrMask_MSB 0x6
-#define QIB_7322_ErrMask_0_RcvShortPktLenErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvLongPktLenErrMask_LSB 0x5
-#define QIB_7322_ErrMask_0_RcvLongPktLenErrMask_MSB 0x5
-#define QIB_7322_ErrMask_0_RcvLongPktLenErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvMaxPktLenErrMask_LSB 0x4
-#define QIB_7322_ErrMask_0_RcvMaxPktLenErrMask_MSB 0x4
-#define QIB_7322_ErrMask_0_RcvMaxPktLenErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvMinPktLenErrMask_LSB 0x3
-#define QIB_7322_ErrMask_0_RcvMinPktLenErrMask_MSB 0x3
-#define QIB_7322_ErrMask_0_RcvMinPktLenErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvICRCErrMask_LSB 0x2
-#define QIB_7322_ErrMask_0_RcvICRCErrMask_MSB 0x2
-#define QIB_7322_ErrMask_0_RcvICRCErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvVCRCErrMask_LSB 0x1
-#define QIB_7322_ErrMask_0_RcvVCRCErrMask_MSB 0x1
-#define QIB_7322_ErrMask_0_RcvVCRCErrMask_RMASK 0x1
-#define QIB_7322_ErrMask_0_RcvFormatErrMask_LSB 0x0
-#define QIB_7322_ErrMask_0_RcvFormatErrMask_MSB 0x0
-#define QIB_7322_ErrMask_0_RcvFormatErrMask_RMASK 0x1
-
-#define QIB_7322_ErrStatus_0_OFFS 0x1088
-#define QIB_7322_ErrStatus_0_DEF 0x0000000000000000
-#define QIB_7322_ErrStatus_0_IBStatusChanged_LSB 0x3A
-#define QIB_7322_ErrStatus_0_IBStatusChanged_MSB 0x3A
-#define QIB_7322_ErrStatus_0_IBStatusChanged_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SHeadersErr_LSB 0x39
-#define QIB_7322_ErrStatus_0_SHeadersErr_MSB 0x39
-#define QIB_7322_ErrStatus_0_SHeadersErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_VL15BufMisuseErr_LSB 0x36
-#define QIB_7322_ErrStatus_0_VL15BufMisuseErr_MSB 0x36
-#define QIB_7322_ErrStatus_0_VL15BufMisuseErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SDmaHaltErr_LSB 0x31
-#define QIB_7322_ErrStatus_0_SDmaHaltErr_MSB 0x31
-#define QIB_7322_ErrStatus_0_SDmaHaltErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SDmaDescAddrMisalignErr_LSB 0x30
-#define QIB_7322_ErrStatus_0_SDmaDescAddrMisalignErr_MSB 0x30
-#define QIB_7322_ErrStatus_0_SDmaDescAddrMisalignErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SDmaUnexpDataErr_LSB 0x2F
-#define QIB_7322_ErrStatus_0_SDmaUnexpDataErr_MSB 0x2F
-#define QIB_7322_ErrStatus_0_SDmaUnexpDataErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SDmaMissingDwErr_LSB 0x2E
-#define QIB_7322_ErrStatus_0_SDmaMissingDwErr_MSB 0x2E
-#define QIB_7322_ErrStatus_0_SDmaMissingDwErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SDmaDwEnErr_LSB 0x2D
-#define QIB_7322_ErrStatus_0_SDmaDwEnErr_MSB 0x2D
-#define QIB_7322_ErrStatus_0_SDmaDwEnErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SDmaRpyTagErr_LSB 0x2C
-#define QIB_7322_ErrStatus_0_SDmaRpyTagErr_MSB 0x2C
-#define QIB_7322_ErrStatus_0_SDmaRpyTagErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SDma1stDescErr_LSB 0x2B
-#define QIB_7322_ErrStatus_0_SDma1stDescErr_MSB 0x2B
-#define QIB_7322_ErrStatus_0_SDma1stDescErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SDmaBaseErr_LSB 0x2A
-#define QIB_7322_ErrStatus_0_SDmaBaseErr_MSB 0x2A
-#define QIB_7322_ErrStatus_0_SDmaBaseErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SDmaTailOutOfBoundErr_LSB 0x29
-#define QIB_7322_ErrStatus_0_SDmaTailOutOfBoundErr_MSB 0x29
-#define QIB_7322_ErrStatus_0_SDmaTailOutOfBoundErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SDmaOutOfBoundErr_LSB 0x28
-#define QIB_7322_ErrStatus_0_SDmaOutOfBoundErr_MSB 0x28
-#define QIB_7322_ErrStatus_0_SDmaOutOfBoundErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SDmaGenMismatchErr_LSB 0x27
-#define QIB_7322_ErrStatus_0_SDmaGenMismatchErr_MSB 0x27
-#define QIB_7322_ErrStatus_0_SDmaGenMismatchErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SendBufMisuseErr_LSB 0x26
-#define QIB_7322_ErrStatus_0_SendBufMisuseErr_MSB 0x26
-#define QIB_7322_ErrStatus_0_SendBufMisuseErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SendUnsupportedVLErr_LSB 0x25
-#define QIB_7322_ErrStatus_0_SendUnsupportedVLErr_MSB 0x25
-#define QIB_7322_ErrStatus_0_SendUnsupportedVLErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SendUnexpectedPktNumErr_LSB 0x24
-#define QIB_7322_ErrStatus_0_SendUnexpectedPktNumErr_MSB 0x24
-#define QIB_7322_ErrStatus_0_SendUnexpectedPktNumErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SendDroppedDataPktErr_LSB 0x22
-#define QIB_7322_ErrStatus_0_SendDroppedDataPktErr_MSB 0x22
-#define QIB_7322_ErrStatus_0_SendDroppedDataPktErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SendDroppedSmpPktErr_LSB 0x21
-#define QIB_7322_ErrStatus_0_SendDroppedSmpPktErr_MSB 0x21
-#define QIB_7322_ErrStatus_0_SendDroppedSmpPktErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SendPktLenErr_LSB 0x20
-#define QIB_7322_ErrStatus_0_SendPktLenErr_MSB 0x20
-#define QIB_7322_ErrStatus_0_SendPktLenErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SendUnderRunErr_LSB 0x1F
-#define QIB_7322_ErrStatus_0_SendUnderRunErr_MSB 0x1F
-#define QIB_7322_ErrStatus_0_SendUnderRunErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SendMaxPktLenErr_LSB 0x1E
-#define QIB_7322_ErrStatus_0_SendMaxPktLenErr_MSB 0x1E
-#define QIB_7322_ErrStatus_0_SendMaxPktLenErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_SendMinPktLenErr_LSB 0x1D
-#define QIB_7322_ErrStatus_0_SendMinPktLenErr_MSB 0x1D
-#define QIB_7322_ErrStatus_0_SendMinPktLenErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvIBLostLinkErr_LSB 0x11
-#define QIB_7322_ErrStatus_0_RcvIBLostLinkErr_MSB 0x11
-#define QIB_7322_ErrStatus_0_RcvIBLostLinkErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvHdrErr_LSB 0x10
-#define QIB_7322_ErrStatus_0_RcvHdrErr_MSB 0x10
-#define QIB_7322_ErrStatus_0_RcvHdrErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvHdrLenErr_LSB 0xF
-#define QIB_7322_ErrStatus_0_RcvHdrLenErr_MSB 0xF
-#define QIB_7322_ErrStatus_0_RcvHdrLenErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvBadTidErr_LSB 0xE
-#define QIB_7322_ErrStatus_0_RcvBadTidErr_MSB 0xE
-#define QIB_7322_ErrStatus_0_RcvBadTidErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvBadVersionErr_LSB 0xB
-#define QIB_7322_ErrStatus_0_RcvBadVersionErr_MSB 0xB
-#define QIB_7322_ErrStatus_0_RcvBadVersionErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvIBFlowErr_LSB 0xA
-#define QIB_7322_ErrStatus_0_RcvIBFlowErr_MSB 0xA
-#define QIB_7322_ErrStatus_0_RcvIBFlowErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvEBPErr_LSB 0x9
-#define QIB_7322_ErrStatus_0_RcvEBPErr_MSB 0x9
-#define QIB_7322_ErrStatus_0_RcvEBPErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvUnsupportedVLErr_LSB 0x8
-#define QIB_7322_ErrStatus_0_RcvUnsupportedVLErr_MSB 0x8
-#define QIB_7322_ErrStatus_0_RcvUnsupportedVLErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvUnexpectedCharErr_LSB 0x7
-#define QIB_7322_ErrStatus_0_RcvUnexpectedCharErr_MSB 0x7
-#define QIB_7322_ErrStatus_0_RcvUnexpectedCharErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvShortPktLenErr_LSB 0x6
-#define QIB_7322_ErrStatus_0_RcvShortPktLenErr_MSB 0x6
-#define QIB_7322_ErrStatus_0_RcvShortPktLenErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvLongPktLenErr_LSB 0x5
-#define QIB_7322_ErrStatus_0_RcvLongPktLenErr_MSB 0x5
-#define QIB_7322_ErrStatus_0_RcvLongPktLenErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvMaxPktLenErr_LSB 0x4
-#define QIB_7322_ErrStatus_0_RcvMaxPktLenErr_MSB 0x4
-#define QIB_7322_ErrStatus_0_RcvMaxPktLenErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvMinPktLenErr_LSB 0x3
-#define QIB_7322_ErrStatus_0_RcvMinPktLenErr_MSB 0x3
-#define QIB_7322_ErrStatus_0_RcvMinPktLenErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvICRCErr_LSB 0x2
-#define QIB_7322_ErrStatus_0_RcvICRCErr_MSB 0x2
-#define QIB_7322_ErrStatus_0_RcvICRCErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvVCRCErr_LSB 0x1
-#define QIB_7322_ErrStatus_0_RcvVCRCErr_MSB 0x1
-#define QIB_7322_ErrStatus_0_RcvVCRCErr_RMASK 0x1
-#define QIB_7322_ErrStatus_0_RcvFormatErr_LSB 0x0
-#define QIB_7322_ErrStatus_0_RcvFormatErr_MSB 0x0
-#define QIB_7322_ErrStatus_0_RcvFormatErr_RMASK 0x1
-
-#define QIB_7322_ErrClear_0_OFFS 0x1090
-#define QIB_7322_ErrClear_0_DEF 0x0000000000000000
-#define QIB_7322_ErrClear_0_IBStatusChangedClear_LSB 0x3A
-#define QIB_7322_ErrClear_0_IBStatusChangedClear_MSB 0x3A
-#define QIB_7322_ErrClear_0_IBStatusChangedClear_RMASK 0x1
-#define QIB_7322_ErrClear_0_SHeadersErrClear_LSB 0x39
-#define QIB_7322_ErrClear_0_SHeadersErrClear_MSB 0x39
-#define QIB_7322_ErrClear_0_SHeadersErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_0_VL15BufMisuseErrClear_LSB 0x36
-#define QIB_7322_ErrClear_0_VL15BufMisuseErrClear_MSB 0x36
-#define QIB_7322_ErrClear_0_VL15BufMisuseErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_0_SDmaHaltErrClear_LSB 0x31
-#define QIB_7322_ErrClear_0_SDmaHaltErrClear_MSB 0x31
-#define QIB_7322_ErrClear_0_SDmaHaltErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_0_SDmaDescAddrMisalignErrClear_LSB 0x30
-#define QIB_7322_ErrClear_0_SDmaDescAddrMisalignErrClear_MSB 0x30
-#define QIB_7322_ErrClear_0_SDmaDescAddrMisalignErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_0_SDmaUnexpDataErrClear_LSB 0x2F
-#define QIB_7322_ErrClear_0_SDmaUnexpDataErrClear_MSB 0x2F
-#define QIB_7322_ErrClear_0_SDmaUnexpDataErrClear_RMASK 0x1
-#define QIB_7322_ErrClear_0_SDmaMissingDwErrClear_LSB 0x2E